From 0d08b38bef139169b9f09acda74a03572b56d3c0 Mon Sep 17 00:00:00 2001 From: Danny Hermes Date: Mon, 26 Sep 2016 12:43:30 -0700 Subject: [PATCH 0001/2016] Moving all bigquery files into subdirectory. Done via: $ mkdir -p bigquery/google/cloud $ cp google/__init__.py bigquery/google/__init__.py $ git add bigquery/google/__init__.py $ cp google/cloud/__init__.py bigquery/google/cloud/__init__.py $ git add bigquery/google/cloud/__init__.py $ git mv google/cloud/bigquery bigquery/google/cloud/bigquery $ git mv unit_tests/bigquery bigquery/unit_tests --- .../google-cloud-bigquery/google/__init__.py | 20 + .../google/cloud/__init__.py | 20 + .../google/cloud/bigquery/__init__.py | 34 + .../google/cloud/bigquery/_helpers.py | 229 ++ .../google/cloud/bigquery/client.py | 336 +++ .../google/cloud/bigquery/connection.py | 34 + .../google/cloud/bigquery/dataset.py | 587 +++++ .../google/cloud/bigquery/job.py | 1103 ++++++++++ .../google/cloud/bigquery/query.py | 405 ++++ .../google/cloud/bigquery/schema.py | 52 + .../google/cloud/bigquery/table.py | 1093 ++++++++++ .../unit_tests/__init__.py | 13 + .../unit_tests/test__helpers.py | 496 +++++ .../unit_tests/test_client.py | 492 +++++ .../unit_tests/test_connection.py | 47 + .../unit_tests/test_dataset.py | 786 +++++++ .../unit_tests/test_job.py | 1688 ++++++++++++++ .../unit_tests/test_query.py | 458 ++++ .../unit_tests/test_schema.py | 110 + .../unit_tests/test_table.py | 1942 +++++++++++++++++ 20 files changed, 9945 insertions(+) create mode 100644 packages/google-cloud-bigquery/google/__init__.py create mode 100644 packages/google-cloud-bigquery/google/cloud/__init__.py create mode 100644 packages/google-cloud-bigquery/google/cloud/bigquery/__init__.py create mode 100644 packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py create mode 100644 packages/google-cloud-bigquery/google/cloud/bigquery/client.py create mode 100644 packages/google-cloud-bigquery/google/cloud/bigquery/connection.py create mode 100644 packages/google-cloud-bigquery/google/cloud/bigquery/dataset.py create mode 100644 packages/google-cloud-bigquery/google/cloud/bigquery/job.py create mode 100644 packages/google-cloud-bigquery/google/cloud/bigquery/query.py create mode 100644 packages/google-cloud-bigquery/google/cloud/bigquery/schema.py create mode 100644 packages/google-cloud-bigquery/google/cloud/bigquery/table.py create mode 100644 packages/google-cloud-bigquery/unit_tests/__init__.py create mode 100644 packages/google-cloud-bigquery/unit_tests/test__helpers.py create mode 100644 packages/google-cloud-bigquery/unit_tests/test_client.py create mode 100644 packages/google-cloud-bigquery/unit_tests/test_connection.py create mode 100644 packages/google-cloud-bigquery/unit_tests/test_dataset.py create mode 100644 packages/google-cloud-bigquery/unit_tests/test_job.py create mode 100644 packages/google-cloud-bigquery/unit_tests/test_query.py create mode 100644 packages/google-cloud-bigquery/unit_tests/test_schema.py create mode 100644 packages/google-cloud-bigquery/unit_tests/test_table.py diff --git a/packages/google-cloud-bigquery/google/__init__.py b/packages/google-cloud-bigquery/google/__init__.py new file mode 100644 index 000000000000..b2b833373882 --- /dev/null +++ b/packages/google-cloud-bigquery/google/__init__.py @@ -0,0 +1,20 @@ +# Copyright 2016 Google Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +try: + import pkg_resources + pkg_resources.declare_namespace(__name__) +except ImportError: + import pkgutil + __path__ = pkgutil.extend_path(__path__, __name__) diff --git a/packages/google-cloud-bigquery/google/cloud/__init__.py b/packages/google-cloud-bigquery/google/cloud/__init__.py new file mode 100644 index 000000000000..8ac7b74af136 --- /dev/null +++ b/packages/google-cloud-bigquery/google/cloud/__init__.py @@ -0,0 +1,20 @@ +# Copyright 2014 Google Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +try: + import pkg_resources + pkg_resources.declare_namespace(__name__) +except ImportError: + import pkgutil + __path__ = pkgutil.extend_path(__path__, __name__) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/__init__.py b/packages/google-cloud-bigquery/google/cloud/bigquery/__init__.py new file mode 100644 index 000000000000..4abf94d86962 --- /dev/null +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/__init__.py @@ -0,0 +1,34 @@ +# Copyright 2015 Google Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Google Cloud BigQuery API wrapper. + +The main concepts with this API are: + +- :class:`~google.cloud.bigquery.dataset.Dataset` represents a + collection of tables. + +- :class:`~google.cloud.bigquery.table.Table` represents a single "relation". +""" + + +from google.cloud.bigquery.client import Client +from google.cloud.bigquery.connection import Connection +from google.cloud.bigquery.dataset import AccessGrant +from google.cloud.bigquery.dataset import Dataset +from google.cloud.bigquery.schema import SchemaField +from google.cloud.bigquery.table import Table + + +SCOPE = Connection.SCOPE diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py b/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py new file mode 100644 index 000000000000..a486fba978b1 --- /dev/null +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py @@ -0,0 +1,229 @@ +# Copyright 2015 Google Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Shared helper functions for BigQuery API classes.""" + +from google.cloud._helpers import _datetime_from_microseconds +from google.cloud._helpers import _date_from_iso8601_date + + +def _not_null(value, field): + """Check whether 'value' should be coerced to 'field' type.""" + return value is not None or field.mode != 'NULLABLE' + + +def _int_from_json(value, field): + """Coerce 'value' to an int, if set or not nullable.""" + if _not_null(value, field): + return int(value) + + +def _float_from_json(value, field): + """Coerce 'value' to a float, if set or not nullable.""" + if _not_null(value, field): + return float(value) + + +def _bool_from_json(value, field): + """Coerce 'value' to a bool, if set or not nullable.""" + if _not_null(value, field): + return value.lower() in ['t', 'true', '1'] + + +def _datetime_from_json(value, field): + """Coerce 'value' to a datetime, if set or not nullable.""" + if _not_null(value, field): + # value will be a float in seconds, to microsecond precision, in UTC. + return _datetime_from_microseconds(1e6 * float(value)) + + +def _date_from_json(value, field): + """Coerce 'value' to a datetime date, if set or not nullable""" + if _not_null(value, field): + return _date_from_iso8601_date(value) + + +def _record_from_json(value, field): + """Coerce 'value' to a mapping, if set or not nullable.""" + if _not_null(value, field): + record = {} + for subfield, cell in zip(field.fields, value['f']): + converter = _CELLDATA_FROM_JSON[subfield.field_type] + if field.mode == 'REPEATED': + value = [converter(item, subfield) for item in cell['v']] + else: + value = converter(cell['v'], subfield) + record[subfield.name] = value + return record + + +def _string_from_json(value, _): + """NOOP string -> string coercion""" + return value + + +_CELLDATA_FROM_JSON = { + 'INTEGER': _int_from_json, + 'INT64': _int_from_json, + 'FLOAT': _float_from_json, + 'FLOAT64': _float_from_json, + 'BOOLEAN': _bool_from_json, + 'TIMESTAMP': _datetime_from_json, + 'DATE': _date_from_json, + 'RECORD': _record_from_json, + 'STRING': _string_from_json, +} + + +def _rows_from_json(rows, schema): + """Convert JSON row data to rows w/ appropriate types.""" + rows_data = [] + for row in rows: + row_data = [] + for field, cell in zip(schema, row['f']): + converter = _CELLDATA_FROM_JSON[field.field_type] + if field.mode == 'REPEATED': + row_data.append([converter(item, field) + for item in cell['v']]) + else: + row_data.append(converter(cell['v'], field)) + rows_data.append(tuple(row_data)) + return rows_data + + +class _ConfigurationProperty(object): + """Base property implementation. + + Values will be stored on a `_configuration` helper attribute of the + property's job instance. + + :type name: string + :param name: name of the property + """ + + def __init__(self, name): + self.name = name + self._backing_name = '_%s' % (self.name,) + + def __get__(self, instance, owner): + """Descriptor protocal: accesstor""" + if instance is None: + return self + return getattr(instance._configuration, self._backing_name) + + def _validate(self, value): + """Subclasses override to impose validation policy.""" + pass + + def __set__(self, instance, value): + """Descriptor protocal: mutator""" + self._validate(value) + setattr(instance._configuration, self._backing_name, value) + + def __delete__(self, instance): + """Descriptor protocal: deleter""" + delattr(instance._configuration, self._backing_name) + + +class _TypedProperty(_ConfigurationProperty): + """Property implementation: validates based on value type. + + :type name: string + :param name: name of the property + + :type property_type: type or sequence of types + :param property_type: type to be validated + """ + def __init__(self, name, property_type): + super(_TypedProperty, self).__init__(name) + self.property_type = property_type + + def _validate(self, value): + """Ensure that 'value' is of the appropriate type. + + :raises: ValueError on a type mismatch. + """ + if not isinstance(value, self.property_type): + raise ValueError('Required type: %s' % (self.property_type,)) + + +class _EnumProperty(_ConfigurationProperty): + """Pseudo-enumeration class. + + Subclasses must define ``ALLOWED`` as a class-level constant: it must + be a sequence of strings. + + :type name: string + :param name: name of the property. + """ + def _validate(self, value): + """Check that ``value`` is one of the allowed values. + + :raises: ValueError if value is not allowed. + """ + if value not in self.ALLOWED: + raise ValueError('Pass one of: %s' ', '.join(self.ALLOWED)) + + +class UDFResource(object): + """Describe a single user-defined function (UDF) resource. + + :type udf_type: str + :param udf_type: the type of the resource ('inlineCode' or 'resourceUri') + + :type value: str + :param value: the inline code or resource URI. + + See + https://cloud.google.com/bigquery/user-defined-functions#api + """ + def __init__(self, udf_type, value): + self.udf_type = udf_type + self.value = value + + def __eq__(self, other): + return( + self.udf_type == other.udf_type and + self.value == other.value) + + +class UDFResourcesProperty(object): + """Custom property type, holding :class:`UDFResource` instances.""" + + def __get__(self, instance, owner): + """Descriptor protocol: accessor""" + if instance is None: + return self + return list(instance._udf_resources) + + def __set__(self, instance, value): + """Descriptor protocol: mutator""" + if not all(isinstance(u, UDFResource) for u in value): + raise ValueError("udf items must be UDFResource") + instance._udf_resources = tuple(value) + + +def _build_udf_resources(resources): + """ + :type resources: sequence of :class:`UDFResource` + :param resources: fields to be appended. + + :rtype: mapping + :returns: a mapping describing userDefinedFunctionResources for the query. + """ + udfs = [] + for resource in resources: + udf = {resource.udf_type: resource.value} + udfs.append(udf) + return udfs diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py new file mode 100644 index 000000000000..df4fd244a7d9 --- /dev/null +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py @@ -0,0 +1,336 @@ +# Copyright 2015 Google Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Client for interacting with the Google BigQuery API.""" + + +from google.cloud.client import JSONClient +from google.cloud.bigquery.connection import Connection +from google.cloud.bigquery.dataset import Dataset +from google.cloud.bigquery.job import CopyJob +from google.cloud.bigquery.job import ExtractTableToStorageJob +from google.cloud.bigquery.job import LoadTableFromStorageJob +from google.cloud.bigquery.job import QueryJob +from google.cloud.bigquery.query import QueryResults + + +class Project(object): + """Wrapper for resource describing a BigQuery project. + + :type project_id: str + :param project_id: Opaque ID of the project + + :type numeric_id: int + :param numeric_id: Numeric ID of the project + + :type friendly_name: str + :param friendly_name: Display name of the project + """ + def __init__(self, project_id, numeric_id, friendly_name): + self.project_id = project_id + self.numeric_id = numeric_id + self.friendly_name = friendly_name + + @classmethod + def from_api_repr(cls, resource): + """Factory: construct an instance from a resource dict.""" + return cls( + resource['id'], resource['numericId'], resource['friendlyName']) + + +class Client(JSONClient): + """Client to bundle configuration needed for API requests. + + :type project: str + :param project: the project which the client acts on behalf of. Will be + passed when creating a dataset / job. If not passed, + falls back to the default inferred from the environment. + + :type credentials: :class:`oauth2client.client.OAuth2Credentials` or + :class:`NoneType` + :param credentials: The OAuth2 Credentials to use for the connection + owned by this client. If not passed (and if no ``http`` + object is passed), falls back to the default inferred + from the environment. + + :type http: :class:`httplib2.Http` or class that defines ``request()``. + :param http: An optional HTTP object to make requests. If not passed, an + ``http`` object is created that is bound to the + ``credentials`` for the current object. + """ + + _connection_class = Connection + + def list_projects(self, max_results=None, page_token=None): + """List projects for the project associated with this client. + + See: + https://cloud.google.com/bigquery/docs/reference/v2/projects/list + + :type max_results: int + :param max_results: maximum number of projects to return, If not + passed, defaults to a value set by the API. + + :type page_token: str + :param page_token: opaque marker for the next "page" of projects. If + not passed, the API will return the first page of + projects. + + :rtype: tuple, (list, str) + :returns: list of :class:`~google.cloud.bigquery.client.Project`, + plus a "next page token" string: if the token is not None, + indicates that more projects can be retrieved with another + call (pass that value as ``page_token``). + """ + params = {} + + if max_results is not None: + params['maxResults'] = max_results + + if page_token is not None: + params['pageToken'] = page_token + + path = '/projects' + resp = self.connection.api_request(method='GET', path=path, + query_params=params) + projects = [Project.from_api_repr(resource) + for resource in resp.get('projects', ())] + return projects, resp.get('nextPageToken') + + def list_datasets(self, include_all=False, max_results=None, + page_token=None): + """List datasets for the project associated with this client. + + See: + https://cloud.google.com/bigquery/docs/reference/v2/datasets/list + + :type include_all: boolean + :param include_all: True if results include hidden datasets. + + :type max_results: int + :param max_results: maximum number of datasets to return, If not + passed, defaults to a value set by the API. + + :type page_token: str + :param page_token: opaque marker for the next "page" of datasets. If + not passed, the API will return the first page of + datasets. + + :rtype: tuple, (list, str) + :returns: list of :class:`~google.cloud.bigquery.dataset.Dataset`, + plus a "next page token" string: if the token is not None, + indicates that more datasets can be retrieved with another + call (pass that value as ``page_token``). + """ + params = {} + + if include_all: + params['all'] = True + + if max_results is not None: + params['maxResults'] = max_results + + if page_token is not None: + params['pageToken'] = page_token + + path = '/projects/%s/datasets' % (self.project,) + resp = self.connection.api_request(method='GET', path=path, + query_params=params) + datasets = [Dataset.from_api_repr(resource, self) + for resource in resp.get('datasets', ())] + return datasets, resp.get('nextPageToken') + + def dataset(self, dataset_name): + """Construct a dataset bound to this client. + + :type dataset_name: str + :param dataset_name: Name of the dataset. + + :rtype: :class:`google.cloud.bigquery.dataset.Dataset` + :returns: a new ``Dataset`` instance + """ + return Dataset(dataset_name, client=self) + + def job_from_resource(self, resource): + """Detect correct job type from resource and instantiate. + + :type resource: dict + :param resource: one job resource from API response + + :rtype: One of: + :class:`google.cloud.bigquery.job.LoadTableFromStorageJob`, + :class:`google.cloud.bigquery.job.CopyJob`, + :class:`google.cloud.bigquery.job.ExtractTableToStorageJob`, + :class:`google.cloud.bigquery.job.QueryJob`, + :class:`google.cloud.bigquery.job.RunSyncQueryJob` + :returns: the job instance, constructed via the resource + """ + config = resource['configuration'] + if 'load' in config: + return LoadTableFromStorageJob.from_api_repr(resource, self) + elif 'copy' in config: + return CopyJob.from_api_repr(resource, self) + elif 'extract' in config: + return ExtractTableToStorageJob.from_api_repr(resource, self) + elif 'query' in config: + return QueryJob.from_api_repr(resource, self) + raise ValueError('Cannot parse job resource') + + def list_jobs(self, max_results=None, page_token=None, all_users=None, + state_filter=None): + """List jobs for the project associated with this client. + + See: + https://cloud.google.com/bigquery/docs/reference/v2/jobs/list + + :type max_results: int + :param max_results: maximum number of jobs to return, If not + passed, defaults to a value set by the API. + + :type page_token: str + :param page_token: opaque marker for the next "page" of jobs. If + not passed, the API will return the first page of + jobs. + + :type all_users: boolean + :param all_users: if true, include jobs owned by all users in the + project. + + :type state_filter: str + :param state_filter: if passed, include only jobs matching the given + state. One of + + * ``"done"`` + * ``"pending"`` + * ``"running"`` + + :rtype: tuple, (list, str) + :returns: list of job instances, plus a "next page token" string: + if the token is not ``None``, indicates that more jobs can be + retrieved with another call, passing that value as + ``page_token``). + """ + params = {'projection': 'full'} + + if max_results is not None: + params['maxResults'] = max_results + + if page_token is not None: + params['pageToken'] = page_token + + if all_users is not None: + params['allUsers'] = all_users + + if state_filter is not None: + params['stateFilter'] = state_filter + + path = '/projects/%s/jobs' % (self.project,) + resp = self.connection.api_request(method='GET', path=path, + query_params=params) + jobs = [self.job_from_resource(resource) + for resource in resp.get('jobs', ())] + return jobs, resp.get('nextPageToken') + + def load_table_from_storage(self, job_name, destination, *source_uris): + """Construct a job for loading data into a table from CloudStorage. + + See: + https://cloud.google.com/bigquery/docs/reference/v2/jobs#configuration.load + + :type job_name: str + :param job_name: Name of the job. + + :type destination: :class:`google.cloud.bigquery.table.Table` + :param destination: Table into which data is to be loaded. + + :type source_uris: sequence of string + :param source_uris: URIs of data files to be loaded; in format + ``gs:///``. + + :rtype: :class:`google.cloud.bigquery.job.LoadTableFromStorageJob` + :returns: a new ``LoadTableFromStorageJob`` instance + """ + return LoadTableFromStorageJob(job_name, destination, source_uris, + client=self) + + def copy_table(self, job_name, destination, *sources): + """Construct a job for copying one or more tables into another table. + + See: + https://cloud.google.com/bigquery/docs/reference/v2/jobs#configuration.copy + + :type job_name: str + :param job_name: Name of the job. + + :type destination: :class:`google.cloud.bigquery.table.Table` + :param destination: Table into which data is to be copied. + + :type sources: sequence of :class:`google.cloud.bigquery.table.Table` + :param sources: tables to be copied. + + :rtype: :class:`google.cloud.bigquery.job.CopyJob` + :returns: a new ``CopyJob`` instance + """ + return CopyJob(job_name, destination, sources, client=self) + + def extract_table_to_storage(self, job_name, source, *destination_uris): + """Construct a job for extracting a table into Cloud Storage files. + + See: + https://cloud.google.com/bigquery/docs/reference/v2/jobs#configuration.extract + + :type job_name: str + :param job_name: Name of the job. + + :type source: :class:`google.cloud.bigquery.table.Table` + :param source: table to be extracted. + + :type destination_uris: sequence of string + :param destination_uris: URIs of CloudStorage file(s) into which + table data is to be extracted; in format + ``gs:///``. + + :rtype: :class:`google.cloud.bigquery.job.ExtractTableToStorageJob` + :returns: a new ``ExtractTableToStorageJob`` instance + """ + return ExtractTableToStorageJob(job_name, source, destination_uris, + client=self) + + def run_async_query(self, job_name, query): + """Construct a job for running a SQL query asynchronously. + + See: + https://cloud.google.com/bigquery/docs/reference/v2/jobs#configuration.query + + :type job_name: str + :param job_name: Name of the job. + + :type query: str + :param query: SQL query to be executed + + :rtype: :class:`google.cloud.bigquery.job.QueryJob` + :returns: a new ``QueryJob`` instance + """ + return QueryJob(job_name, query, client=self) + + def run_sync_query(self, query): + """Run a SQL query synchronously. + + :type query: str + :param query: SQL query to be executed + + :rtype: :class:`google.cloud.bigquery.query.QueryResults` + :returns: a new ``QueryResults`` instance + """ + return QueryResults(query, client=self) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/connection.py b/packages/google-cloud-bigquery/google/cloud/bigquery/connection.py new file mode 100644 index 000000000000..b5f43c07bf5a --- /dev/null +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/connection.py @@ -0,0 +1,34 @@ +# Copyright 2015 Google Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Create / interact with Google Cloud BigQuery connections.""" + +from google.cloud import connection as base_connection + + +class Connection(base_connection.JSONConnection): + """A connection to Google Cloud BigQuery via the JSON REST API.""" + + API_BASE_URL = 'https://www.googleapis.com' + """The base of the API call URL.""" + + API_VERSION = 'v2' + """The version of the API, used in building the API call's URL.""" + + API_URL_TEMPLATE = '{api_base_url}/bigquery/{api_version}{path}' + """A template for the URL of a particular API call.""" + + SCOPE = ('https://www.googleapis.com/auth/bigquery', + 'https://www.googleapis.com/auth/cloud-platform') + """The scopes required for authenticating as a Cloud BigQuery consumer.""" diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/dataset.py b/packages/google-cloud-bigquery/google/cloud/bigquery/dataset.py new file mode 100644 index 000000000000..ce30fea3e4d3 --- /dev/null +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/dataset.py @@ -0,0 +1,587 @@ +# Copyright 2015 Google Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Define API Datasets.""" +import six + +from google.cloud._helpers import _datetime_from_microseconds +from google.cloud.exceptions import NotFound +from google.cloud.bigquery.table import Table + + +class AccessGrant(object): + """Represent grant of an access role to an entity. + + Every entry in the access list will have exactly one of + ``userByEmail``, ``groupByEmail``, ``domain``, ``specialGroup`` or + ``view`` set. And if anything but ``view`` is set, it'll also have a + ``role`` specified. ``role`` is omitted for a ``view``, since + ``view`` s are always read-only. + + See https://cloud.google.com/bigquery/docs/reference/v2/datasets. + + :type role: string + :param role: Role granted to the entity. One of + + * ``'OWNER'`` + * ``'WRITER'`` + * ``'READER'`` + + May also be ``None`` if the ``entity_type`` is ``view``. + + :type entity_type: string + :param entity_type: Type of entity being granted the role. One of + :attr:`ENTITY_TYPES`. + + :type entity_id: string + :param entity_id: ID of entity being granted the role. + + :raises: :class:`ValueError` if the ``entity_type`` is not among + :attr:`ENTITY_TYPES`, or if a ``view`` has ``role`` set or + a non ``view`` **does not** have a ``role`` set. + """ + + ENTITY_TYPES = frozenset(['userByEmail', 'groupByEmail', 'domain', + 'specialGroup', 'view']) + """Allowed entity types.""" + + def __init__(self, role, entity_type, entity_id): + if entity_type not in self.ENTITY_TYPES: + message = 'Entity type %r not among: %s' % ( + entity_type, ', '.join(self.ENTITY_TYPES)) + raise ValueError(message) + if entity_type == 'view': + if role is not None: + raise ValueError('Role must be None for a view. Received ' + 'role: %r' % (role,)) + else: + if role is None: + raise ValueError('Role must be set for entity ' + 'type %r' % (entity_type,)) + + self.role = role + self.entity_type = entity_type + self.entity_id = entity_id + + def __eq__(self, other): + return ( + self.role == other.role and + self.entity_type == other.entity_type and + self.entity_id == other.entity_id) + + def __repr__(self): + return '' % ( + self.role, self.entity_type, self.entity_id) + + +class Dataset(object): + """Datasets are containers for tables. + + See: + https://cloud.google.com/bigquery/docs/reference/v2/datasets + + :type name: string + :param name: the name of the dataset + + :type client: :class:`google.cloud.bigquery.client.Client` + :param client: A client which holds credentials and project configuration + for the dataset (which requires a project). + + :type access_grants: list of :class:`AccessGrant` + :param access_grants: roles granted to entities for this dataset + """ + + _access_grants = None + + def __init__(self, name, client, access_grants=()): + self.name = name + self._client = client + self._properties = {} + # Let the @property do validation. + self.access_grants = access_grants + + @property + def project(self): + """Project bound to the dataset. + + :rtype: string + :returns: the project (derived from the client). + """ + return self._client.project + + @property + def path(self): + """URL path for the dataset's APIs. + + :rtype: string + :returns: the path based on project and dataste name. + """ + return '/projects/%s/datasets/%s' % (self.project, self.name) + + @property + def access_grants(self): + """Dataset's access grants. + + :rtype: list of :class:`AccessGrant` + :returns: roles granted to entities for this dataset + """ + return list(self._access_grants) + + @access_grants.setter + def access_grants(self, value): + """Update dataset's access grants + + :type value: list of :class:`AccessGrant` + :param value: roles granted to entities for this dataset + + :raises: TypeError if 'value' is not a sequence, or ValueError if + any item in the sequence is not an AccessGrant + """ + if not all(isinstance(field, AccessGrant) for field in value): + raise ValueError('Values must be AccessGrant instances') + self._access_grants = tuple(value) + + @property + def created(self): + """Datetime at which the dataset was created. + + :rtype: ``datetime.datetime``, or ``NoneType`` + :returns: the creation time (None until set from the server). + """ + creation_time = self._properties.get('creationTime') + if creation_time is not None: + # creation_time will be in milliseconds. + return _datetime_from_microseconds(1000.0 * creation_time) + + @property + def dataset_id(self): + """ID for the dataset resource. + + :rtype: string, or ``NoneType`` + :returns: the ID (None until set from the server). + """ + return self._properties.get('id') + + @property + def etag(self): + """ETag for the dataset resource. + + :rtype: string, or ``NoneType`` + :returns: the ETag (None until set from the server). + """ + return self._properties.get('etag') + + @property + def modified(self): + """Datetime at which the dataset was last modified. + + :rtype: ``datetime.datetime``, or ``NoneType`` + :returns: the modification time (None until set from the server). + """ + modified_time = self._properties.get('lastModifiedTime') + if modified_time is not None: + # modified_time will be in milliseconds. + return _datetime_from_microseconds(1000.0 * modified_time) + + @property + def self_link(self): + """URL for the dataset resource. + + :rtype: string, or ``NoneType`` + :returns: the URL (None until set from the server). + """ + return self._properties.get('selfLink') + + @property + def default_table_expiration_ms(self): + """Default expiration time for tables in the dataset. + + :rtype: integer, or ``NoneType`` + :returns: The time in milliseconds, or None (the default). + """ + return self._properties.get('defaultTableExpirationMs') + + @default_table_expiration_ms.setter + def default_table_expiration_ms(self, value): + """Update default expiration time for tables in the dataset. + + :type value: integer, or ``NoneType`` + :param value: new default time, in milliseconds + + :raises: ValueError for invalid value types. + """ + if not isinstance(value, six.integer_types) and value is not None: + raise ValueError("Pass an integer, or None") + self._properties['defaultTableExpirationMs'] = value + + @property + def description(self): + """Description of the dataset. + + :rtype: string, or ``NoneType`` + :returns: The description as set by the user, or None (the default). + """ + return self._properties.get('description') + + @description.setter + def description(self, value): + """Update description of the dataset. + + :type value: string, or ``NoneType`` + :param value: new description + + :raises: ValueError for invalid value types. + """ + if not isinstance(value, six.string_types) and value is not None: + raise ValueError("Pass a string, or None") + self._properties['description'] = value + + @property + def friendly_name(self): + """Title of the dataset. + + :rtype: string, or ``NoneType`` + :returns: The name as set by the user, or None (the default). + """ + return self._properties.get('friendlyName') + + @friendly_name.setter + def friendly_name(self, value): + """Update title of the dataset. + + :type value: string, or ``NoneType`` + :param value: new title + + :raises: ValueError for invalid value types. + """ + if not isinstance(value, six.string_types) and value is not None: + raise ValueError("Pass a string, or None") + self._properties['friendlyName'] = value + + @property + def location(self): + """Location in which the dataset is hosted. + + :rtype: string, or ``NoneType`` + :returns: The location as set by the user, or None (the default). + """ + return self._properties.get('location') + + @location.setter + def location(self, value): + """Update location in which the dataset is hosted. + + :type value: string, or ``NoneType`` + :param value: new location + + :raises: ValueError for invalid value types. + """ + if not isinstance(value, six.string_types) and value is not None: + raise ValueError("Pass a string, or None") + self._properties['location'] = value + + @classmethod + def from_api_repr(cls, resource, client): + """Factory: construct a dataset given its API representation + + :type resource: dict + :param resource: dataset resource representation returned from the API + + :type client: :class:`google.cloud.bigquery.client.Client` + :param client: Client which holds credentials and project + configuration for the dataset. + + :rtype: :class:`google.cloud.bigquery.dataset.Dataset` + :returns: Dataset parsed from ``resource``. + """ + if ('datasetReference' not in resource or + 'datasetId' not in resource['datasetReference']): + raise KeyError('Resource lacks required identity information:' + '["datasetReference"]["datasetId"]') + name = resource['datasetReference']['datasetId'] + dataset = cls(name, client=client) + dataset._set_properties(resource) + return dataset + + def _require_client(self, client): + """Check client or verify over-ride. + + :type client: :class:`~google.cloud.bigquery.client.Client` or + ``NoneType`` + :param client: the client to use. If not passed, falls back to the + ``client`` stored on the current dataset. + + :rtype: :class:`google.cloud.bigquery.client.Client` + :returns: The client passed in or the currently bound client. + """ + if client is None: + client = self._client + return client + + @staticmethod + def _parse_access_grants(access): + """Parse a resource fragment into a set of access grants. + + ``role`` augments the entity type and present **unless** the entity + type is ``view``. + + :type access: list of mappings + :param access: each mapping represents a single access grant. + + :rtype: list of :class:`AccessGrant` + :returns: a list of parsed grants. + :raises: :class:`ValueError` if a grant in ``access`` has more keys + than ``role`` and one additional key. + """ + result = [] + for grant in access: + grant = grant.copy() + role = grant.pop('role', None) + entity_type, entity_id = grant.popitem() + if len(grant) != 0: + raise ValueError('Grant has unexpected keys remaining.', grant) + result.append( + AccessGrant(role, entity_type, entity_id)) + return result + + def _set_properties(self, api_response): + """Update properties from resource in body of ``api_response`` + + :type api_response: httplib2.Response + :param api_response: response returned from an API call. + """ + self._properties.clear() + cleaned = api_response.copy() + access = cleaned.pop('access', ()) + self.access_grants = self._parse_access_grants(access) + if 'creationTime' in cleaned: + cleaned['creationTime'] = float(cleaned['creationTime']) + if 'lastModifiedTime' in cleaned: + cleaned['lastModifiedTime'] = float(cleaned['lastModifiedTime']) + if 'defaultTableExpirationMs' in cleaned: + cleaned['defaultTableExpirationMs'] = int( + cleaned['defaultTableExpirationMs']) + self._properties.update(cleaned) + + def _build_access_resource(self): + """Generate a resource fragment for dataset's access grants.""" + result = [] + for grant in self.access_grants: + info = {grant.entity_type: grant.entity_id} + if grant.role is not None: + info['role'] = grant.role + result.append(info) + return result + + def _build_resource(self): + """Generate a resource for ``create`` or ``update``.""" + resource = { + 'datasetReference': { + 'projectId': self.project, 'datasetId': self.name}, + } + if self.default_table_expiration_ms is not None: + value = self.default_table_expiration_ms + resource['defaultTableExpirationMs'] = value + + if self.description is not None: + resource['description'] = self.description + + if self.friendly_name is not None: + resource['friendlyName'] = self.friendly_name + + if self.location is not None: + resource['location'] = self.location + + if len(self.access_grants) > 0: + resource['access'] = self._build_access_resource() + + return resource + + def create(self, client=None): + """API call: create the dataset via a PUT request. + + See: + https://cloud.google.com/bigquery/docs/reference/v2/tables/insert + + :type client: :class:`~google.cloud.bigquery.client.Client` or + ``NoneType`` + :param client: the client to use. If not passed, falls back to the + ``client`` stored on the current dataset. + """ + client = self._require_client(client) + path = '/projects/%s/datasets' % (self.project,) + api_response = client.connection.api_request( + method='POST', path=path, data=self._build_resource()) + self._set_properties(api_response) + + def exists(self, client=None): + """API call: test for the existence of the dataset via a GET request + + See + https://cloud.google.com/bigquery/docs/reference/v2/datasets/get + + :type client: :class:`~google.cloud.bigquery.client.Client` or + ``NoneType`` + :param client: the client to use. If not passed, falls back to the + ``client`` stored on the current dataset. + + :rtype: bool + :returns: Boolean indicating existence of the dataset. + """ + client = self._require_client(client) + + try: + client.connection.api_request(method='GET', path=self.path, + query_params={'fields': 'id'}) + except NotFound: + return False + else: + return True + + def reload(self, client=None): + """API call: refresh dataset properties via a GET request. + + See + https://cloud.google.com/bigquery/docs/reference/v2/datasets/get + + :type client: :class:`~google.cloud.bigquery.client.Client` or + ``NoneType`` + :param client: the client to use. If not passed, falls back to the + ``client`` stored on the current dataset. + """ + client = self._require_client(client) + + api_response = client.connection.api_request( + method='GET', path=self.path) + self._set_properties(api_response) + + def patch(self, client=None, **kw): + """API call: update individual dataset properties via a PATCH request. + + See + https://cloud.google.com/bigquery/docs/reference/v2/datasets/patch + + :type client: :class:`~google.cloud.bigquery.client.Client` or + ``NoneType`` + :param client: the client to use. If not passed, falls back to the + ``client`` stored on the current dataset. + + :type kw: ``dict`` + :param kw: properties to be patched. + + :raises: ValueError for invalid value types. + """ + client = self._require_client(client) + + partial = {} + + if 'default_table_expiration_ms' in kw: + value = kw['default_table_expiration_ms'] + if not isinstance(value, six.integer_types) and value is not None: + raise ValueError("Pass an integer, or None") + partial['defaultTableExpirationMs'] = value + + if 'description' in kw: + partial['description'] = kw['description'] + + if 'friendly_name' in kw: + partial['friendlyName'] = kw['friendly_name'] + + if 'location' in kw: + partial['location'] = kw['location'] + + api_response = client.connection.api_request( + method='PATCH', path=self.path, data=partial) + self._set_properties(api_response) + + def update(self, client=None): + """API call: update dataset properties via a PUT request. + + See + https://cloud.google.com/bigquery/docs/reference/v2/datasets/update + + :type client: :class:`~google.cloud.bigquery.client.Client` or + ``NoneType`` + :param client: the client to use. If not passed, falls back to the + ``client`` stored on the current dataset. + """ + client = self._require_client(client) + api_response = client.connection.api_request( + method='PUT', path=self.path, data=self._build_resource()) + self._set_properties(api_response) + + def delete(self, client=None): + """API call: delete the dataset via a DELETE request. + + See: + https://cloud.google.com/bigquery/docs/reference/v2/tables/delete + + :type client: :class:`~google.cloud.bigquery.client.Client` or + ``NoneType`` + :param client: the client to use. If not passed, falls back to the + ``client`` stored on the current dataset. + """ + client = self._require_client(client) + client.connection.api_request(method='DELETE', path=self.path) + + def list_tables(self, max_results=None, page_token=None): + """List tables for the project associated with this client. + + See: + https://cloud.google.com/bigquery/docs/reference/v2/tables/list + + :type max_results: int + :param max_results: maximum number of tables to return, If not + passed, defaults to a value set by the API. + + :type page_token: string + :param page_token: opaque marker for the next "page" of datasets. If + not passed, the API will return the first page of + datasets. + + :rtype: tuple, (list, str) + :returns: list of :class:`google.cloud.bigquery.table.Table`, plus a + "next page token" string: if not ``None``, indicates that + more tables can be retrieved with another call (pass that + value as ``page_token``). + """ + params = {} + + if max_results is not None: + params['maxResults'] = max_results + + if page_token is not None: + params['pageToken'] = page_token + + path = '/projects/%s/datasets/%s/tables' % (self.project, self.name) + connection = self._client.connection + resp = connection.api_request(method='GET', path=path, + query_params=params) + tables = [Table.from_api_repr(resource, self) + for resource in resp.get('tables', ())] + return tables, resp.get('nextPageToken') + + def table(self, name, schema=()): + """Construct a table bound to this dataset. + + :type name: string + :param name: Name of the table. + + :type schema: list of :class:`google.cloud.bigquery.table.SchemaField` + :param schema: The table's schema + + :rtype: :class:`google.cloud.bigquery.table.Table` + :returns: a new ``Table`` instance + """ + return Table(name, dataset=self, schema=schema) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/job.py b/packages/google-cloud-bigquery/google/cloud/bigquery/job.py new file mode 100644 index 000000000000..b86f51c3fe18 --- /dev/null +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/job.py @@ -0,0 +1,1103 @@ +# Copyright 2015 Google Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Define API Jobs.""" + +import six + +from google.cloud.exceptions import NotFound +from google.cloud._helpers import _datetime_from_microseconds +from google.cloud.bigquery.dataset import Dataset +from google.cloud.bigquery.schema import SchemaField +from google.cloud.bigquery.table import Table +from google.cloud.bigquery.table import _build_schema_resource +from google.cloud.bigquery.table import _parse_schema_resource +from google.cloud.bigquery._helpers import UDFResourcesProperty +from google.cloud.bigquery._helpers import _EnumProperty +from google.cloud.bigquery._helpers import _TypedProperty +from google.cloud.bigquery._helpers import _build_udf_resources + + +class Compression(_EnumProperty): + """Pseudo-enum for ``compression`` properties.""" + GZIP = 'GZIP' + NONE = 'NONE' + ALLOWED = (GZIP, NONE) + + +class CreateDisposition(_EnumProperty): + """Pseudo-enum for ``create_disposition`` properties.""" + CREATE_IF_NEEDED = 'CREATE_IF_NEEDED' + CREATE_NEVER = 'CREATE_NEVER' + ALLOWED = (CREATE_IF_NEEDED, CREATE_NEVER) + + +class DestinationFormat(_EnumProperty): + """Pseudo-enum for ``destination_format`` properties.""" + CSV = 'CSV' + NEWLINE_DELIMITED_JSON = 'NEWLINE_DELIMITED_JSON' + AVRO = 'AVRO' + ALLOWED = (CSV, NEWLINE_DELIMITED_JSON, AVRO) + + +class Encoding(_EnumProperty): + """Pseudo-enum for ``encoding`` properties.""" + UTF_8 = 'UTF-8' + ISO_8559_1 = 'ISO-8559-1' + ALLOWED = (UTF_8, ISO_8559_1) + + +class QueryPriority(_EnumProperty): + """Pseudo-enum for ``QueryJob.priority`` property.""" + INTERACTIVE = 'INTERACTIVE' + BATCH = 'BATCH' + ALLOWED = (INTERACTIVE, BATCH) + + +class SourceFormat(_EnumProperty): + """Pseudo-enum for ``source_format`` properties.""" + CSV = 'CSV' + DATASTORE_BACKUP = 'DATASTORE_BACKUP' + NEWLINE_DELIMITED_JSON = 'NEWLINE_DELIMITED_JSON' + ALLOWED = (CSV, DATASTORE_BACKUP, NEWLINE_DELIMITED_JSON) + + +class WriteDisposition(_EnumProperty): + """Pseudo-enum for ``write_disposition`` properties.""" + WRITE_APPEND = 'WRITE_APPEND' + WRITE_TRUNCATE = 'WRITE_TRUNCATE' + WRITE_EMPTY = 'WRITE_EMPTY' + ALLOWED = (WRITE_APPEND, WRITE_TRUNCATE, WRITE_EMPTY) + + +class _BaseJob(object): + """Base class for jobs. + + :type client: :class:`google.cloud.bigquery.client.Client` + :param client: A client which holds credentials and project configuration + for the dataset (which requires a project). + """ + def __init__(self, client): + self._client = client + self._properties = {} + + @property + def project(self): + """Project bound to the job. + + :rtype: string + :returns: the project (derived from the client). + """ + return self._client.project + + def _require_client(self, client): + """Check client or verify over-ride. + + :type client: :class:`~google.cloud.bigquery.client.Client` or + ``NoneType`` + :param client: the client to use. If not passed, falls back to the + ``client`` stored on the current dataset. + + :rtype: :class:`google.cloud.bigquery.client.Client` + :returns: The client passed in or the currently bound client. + """ + if client is None: + client = self._client + return client + + +class _AsyncJob(_BaseJob): + """Base class for asynchronous jobs. + + :type name: string + :param name: the name of the job + + :type client: :class:`google.cloud.bigquery.client.Client` + :param client: A client which holds credentials and project configuration + for the dataset (which requires a project). + """ + def __init__(self, name, client): + super(_AsyncJob, self).__init__(client) + self.name = name + + @property + def job_type(self): + """Type of job + + :rtype: string + :returns: one of 'load', 'copy', 'extract', 'query' + """ + return self._JOB_TYPE + + @property + def path(self): + """URL path for the job's APIs. + + :rtype: string + :returns: the path based on project and job name. + """ + return '/projects/%s/jobs/%s' % (self.project, self.name) + + @property + def etag(self): + """ETag for the job resource. + + :rtype: string, or ``NoneType`` + :returns: the ETag (None until set from the server). + """ + return self._properties.get('etag') + + @property + def self_link(self): + """URL for the job resource. + + :rtype: string, or ``NoneType`` + :returns: the URL (None until set from the server). + """ + return self._properties.get('selfLink') + + @property + def user_email(self): + """E-mail address of user who submitted the job. + + :rtype: string, or ``NoneType`` + :returns: the URL (None until set from the server). + """ + return self._properties.get('user_email') + + @property + def created(self): + """Datetime at which the job was created. + + :rtype: ``datetime.datetime``, or ``NoneType`` + :returns: the creation time (None until set from the server). + """ + statistics = self._properties.get('statistics') + if statistics is not None: + millis = statistics.get('creationTime') + if millis is not None: + return _datetime_from_microseconds(millis * 1000.0) + + @property + def started(self): + """Datetime at which the job was started. + + :rtype: ``datetime.datetime``, or ``NoneType`` + :returns: the start time (None until set from the server). + """ + statistics = self._properties.get('statistics') + if statistics is not None: + millis = statistics.get('startTime') + if millis is not None: + return _datetime_from_microseconds(millis * 1000.0) + + @property + def ended(self): + """Datetime at which the job finished. + + :rtype: ``datetime.datetime``, or ``NoneType`` + :returns: the end time (None until set from the server). + """ + statistics = self._properties.get('statistics') + if statistics is not None: + millis = statistics.get('endTime') + if millis is not None: + return _datetime_from_microseconds(millis * 1000.0) + + @property + def error_result(self): + """Error information about the job as a whole. + + :rtype: mapping, or ``NoneType`` + :returns: the error information (None until set from the server). + """ + status = self._properties.get('status') + if status is not None: + return status.get('errorResult') + + @property + def errors(self): + """Information about individual errors generated by the job. + + :rtype: list of mappings, or ``NoneType`` + :returns: the error information (None until set from the server). + """ + status = self._properties.get('status') + if status is not None: + return status.get('errors') + + @property + def state(self): + """Status of the job. + + :rtype: string, or ``NoneType`` + :returns: the state (None until set from the server). + """ + status = self._properties.get('status') + if status is not None: + return status.get('state') + + def _scrub_local_properties(self, cleaned): + """Helper: handle subclass properties in cleaned.""" + pass + + def _set_properties(self, api_response): + """Update properties from resource in body of ``api_response`` + + :type api_response: httplib2.Response + :param api_response: response returned from an API call + """ + cleaned = api_response.copy() + self._scrub_local_properties(cleaned) + + statistics = cleaned.get('statistics', {}) + if 'creationTime' in statistics: + statistics['creationTime'] = float(statistics['creationTime']) + if 'startTime' in statistics: + statistics['startTime'] = float(statistics['startTime']) + if 'endTime' in statistics: + statistics['endTime'] = float(statistics['endTime']) + + self._properties.clear() + self._properties.update(cleaned) + + @classmethod + def _get_resource_config(cls, resource): + """Helper for :meth:`from_api_repr` + + :type resource: dict + :param resource: resource for the job + + :rtype: dict + :returns: tuple (string, dict), where the first element is the + job name and the second contains job-specific configuration. + :raises: :class:`KeyError` if the resource has no identifier, or + is missing the appropriate configuration. + """ + if ('jobReference' not in resource or + 'jobId' not in resource['jobReference']): + raise KeyError('Resource lacks required identity information: ' + '["jobReference"]["jobId"]') + name = resource['jobReference']['jobId'] + if ('configuration' not in resource or + cls._JOB_TYPE not in resource['configuration']): + raise KeyError('Resource lacks required configuration: ' + '["configuration"]["%s"]' % cls._JOB_TYPE) + config = resource['configuration'][cls._JOB_TYPE] + return name, config + + def begin(self, client=None): + """API call: begin the job via a POST request + + See: + https://cloud.google.com/bigquery/docs/reference/v2/jobs/insert + + :type client: :class:`~google.cloud.bigquery.client.Client` or + ``NoneType`` + :param client: the client to use. If not passed, falls back to the + ``client`` stored on the current dataset. + + :raises: :exc:`ValueError` if the job has already begin. + """ + if self.state is not None: + raise ValueError("Job already begun.") + + client = self._require_client(client) + path = '/projects/%s/jobs' % (self.project,) + api_response = client.connection.api_request( + method='POST', path=path, data=self._build_resource()) + self._set_properties(api_response) + + def exists(self, client=None): + """API call: test for the existence of the job via a GET request + + See + https://cloud.google.com/bigquery/docs/reference/v2/jobs/get + + :type client: :class:`~google.cloud.bigquery.client.Client` or + ``NoneType`` + :param client: the client to use. If not passed, falls back to the + ``client`` stored on the current dataset. + + :rtype: bool + :returns: Boolean indicating existence of the job. + """ + client = self._require_client(client) + + try: + client.connection.api_request(method='GET', path=self.path, + query_params={'fields': 'id'}) + except NotFound: + return False + else: + return True + + def reload(self, client=None): + """API call: refresh job properties via a GET request + + See + https://cloud.google.com/bigquery/docs/reference/v2/jobs/get + + :type client: :class:`~google.cloud.bigquery.client.Client` or + ``NoneType`` + :param client: the client to use. If not passed, falls back to the + ``client`` stored on the current dataset. + """ + client = self._require_client(client) + + api_response = client.connection.api_request( + method='GET', path=self.path) + self._set_properties(api_response) + + def cancel(self, client=None): + """API call: cancel job via a POST request + + See + https://cloud.google.com/bigquery/docs/reference/v2/jobs/cancel + + :type client: :class:`~google.cloud.bigquery.client.Client` or + ``NoneType`` + :param client: the client to use. If not passed, falls back to the + ``client`` stored on the current dataset. + """ + client = self._require_client(client) + + api_response = client.connection.api_request( + method='POST', path='%s/cancel' % (self.path,)) + self._set_properties(api_response['job']) + + +class _LoadConfiguration(object): + """User-settable configuration options for load jobs. + + Values which are ``None`` -> server defaults. + """ + _allow_jagged_rows = None + _allow_quoted_newlines = None + _create_disposition = None + _encoding = None + _field_delimiter = None + _ignore_unknown_values = None + _max_bad_records = None + _quote_character = None + _skip_leading_rows = None + _source_format = None + _write_disposition = None + + +class LoadTableFromStorageJob(_AsyncJob): + """Asynchronous job for loading data into a table from CloudStorage. + + :type name: string + :param name: the name of the job + + :type destination: :class:`google.cloud.bigquery.table.Table` + :param destination: Table into which data is to be loaded. + + :type source_uris: sequence of string + :param source_uris: URIs of one or more data files to be loaded, in + format ``gs:///``. + + :type client: :class:`google.cloud.bigquery.client.Client` + :param client: A client which holds credentials and project configuration + for the dataset (which requires a project). + + :type schema: list of :class:`google.cloud.bigquery.table.SchemaField` + :param schema: The job's schema + """ + + _schema = None + _JOB_TYPE = 'load' + + def __init__(self, name, destination, source_uris, client, schema=()): + super(LoadTableFromStorageJob, self).__init__(name, client) + self.destination = destination + self.source_uris = source_uris + # Let the @property do validation. + self.schema = schema + self._configuration = _LoadConfiguration() + + @property + def schema(self): + """Table's schema. + + :rtype: list of :class:`SchemaField` + :returns: fields describing the schema + """ + return list(self._schema) + + @schema.setter + def schema(self, value): + """Update table's schema + + :type value: list of :class:`SchemaField` + :param value: fields describing the schema + + :raises: TypeError if 'value' is not a sequence, or ValueError if + any item in the sequence is not a SchemaField + """ + if not all(isinstance(field, SchemaField) for field in value): + raise ValueError('Schema items must be fields') + self._schema = tuple(value) + + @property + def input_file_bytes(self): + """Count of bytes loaded from source files. + + :rtype: integer, or ``NoneType`` + :returns: the count (None until set from the server). + """ + statistics = self._properties.get('statistics') + if statistics is not None: + return int(statistics['load']['inputFileBytes']) + + @property + def input_files(self): + """Count of source files. + + :rtype: integer, or ``NoneType`` + :returns: the count (None until set from the server). + """ + statistics = self._properties.get('statistics') + if statistics is not None: + return int(statistics['load']['inputFiles']) + + @property + def output_bytes(self): + """Count of bytes saved to destination table. + + :rtype: integer, or ``NoneType`` + :returns: the count (None until set from the server). + """ + statistics = self._properties.get('statistics') + if statistics is not None: + return int(statistics['load']['outputBytes']) + + @property + def output_rows(self): + """Count of rows saved to destination table. + + :rtype: integer, or ``NoneType`` + :returns: the count (None until set from the server). + """ + statistics = self._properties.get('statistics') + if statistics is not None: + return int(statistics['load']['outputRows']) + + allow_jagged_rows = _TypedProperty('allow_jagged_rows', bool) + """See: + https://cloud.google.com/bigquery/docs/reference/v2/jobs#configuration.load.allowJaggedRows + """ + + allow_quoted_newlines = _TypedProperty('allow_quoted_newlines', bool) + """See: + https://cloud.google.com/bigquery/docs/reference/v2/jobs#configuration.load.allowQuotedNewlines + """ + + create_disposition = CreateDisposition('create_disposition') + """See: + https://cloud.google.com/bigquery/docs/reference/v2/jobs#configuration.load.createDisposition + """ + + encoding = Encoding('encoding') + """See: + https://cloud.google.com/bigquery/docs/reference/v2/jobs#configuration.load.encoding + """ + + field_delimiter = _TypedProperty('field_delimiter', six.string_types) + """See: + https://cloud.google.com/bigquery/docs/reference/v2/jobs#configuration.load.fieldDelimiter + """ + + ignore_unknown_values = _TypedProperty('ignore_unknown_values', bool) + """See: + https://cloud.google.com/bigquery/docs/reference/v2/jobs#configuration.load.ignoreUnknownValues + """ + + max_bad_records = _TypedProperty('max_bad_records', six.integer_types) + """See: + https://cloud.google.com/bigquery/docs/reference/v2/jobs#configuration.load.maxBadRecords + """ + + quote_character = _TypedProperty('quote_character', six.string_types) + """See: + https://cloud.google.com/bigquery/docs/reference/v2/jobs#configuration.load.quote + """ + + skip_leading_rows = _TypedProperty('skip_leading_rows', six.integer_types) + """See: + https://cloud.google.com/bigquery/docs/reference/v2/jobs#configuration.load.skipLeadingRows + """ + + source_format = SourceFormat('source_format') + """See: + https://cloud.google.com/bigquery/docs/reference/v2/jobs#configuration.load.sourceFormat + """ + + write_disposition = WriteDisposition('write_disposition') + """See: + https://cloud.google.com/bigquery/docs/reference/v2/jobs#configuration.load.writeDisposition + """ + + def _populate_config_resource(self, configuration): + """Helper for _build_resource: copy config properties to resource""" + if self.allow_jagged_rows is not None: + configuration['allowJaggedRows'] = self.allow_jagged_rows + if self.allow_quoted_newlines is not None: + configuration['allowQuotedNewlines'] = self.allow_quoted_newlines + if self.create_disposition is not None: + configuration['createDisposition'] = self.create_disposition + if self.encoding is not None: + configuration['encoding'] = self.encoding + if self.field_delimiter is not None: + configuration['fieldDelimiter'] = self.field_delimiter + if self.ignore_unknown_values is not None: + configuration['ignoreUnknownValues'] = self.ignore_unknown_values + if self.max_bad_records is not None: + configuration['maxBadRecords'] = self.max_bad_records + if self.quote_character is not None: + configuration['quote'] = self.quote_character + if self.skip_leading_rows is not None: + configuration['skipLeadingRows'] = self.skip_leading_rows + if self.source_format is not None: + configuration['sourceFormat'] = self.source_format + if self.write_disposition is not None: + configuration['writeDisposition'] = self.write_disposition + + def _build_resource(self): + """Generate a resource for :meth:`begin`.""" + resource = { + 'jobReference': { + 'projectId': self.project, + 'jobId': self.name, + }, + 'configuration': { + self._JOB_TYPE: { + 'sourceUris': self.source_uris, + 'destinationTable': { + 'projectId': self.destination.project, + 'datasetId': self.destination.dataset_name, + 'tableId': self.destination.name, + }, + }, + }, + } + configuration = resource['configuration'][self._JOB_TYPE] + self._populate_config_resource(configuration) + + if len(self.schema) > 0: + configuration['schema'] = { + 'fields': _build_schema_resource(self.schema)} + + return resource + + def _scrub_local_properties(self, cleaned): + """Helper: handle subclass properties in cleaned.""" + schema = cleaned.pop('schema', {'fields': ()}) + self.schema = _parse_schema_resource(schema) + + @classmethod + def from_api_repr(cls, resource, client): + """Factory: construct a job given its API representation + + .. note: + + This method assumes that the project found in the resource matches + the client's project. + + :type resource: dict + :param resource: dataset job representation returned from the API + + :type client: :class:`google.cloud.bigquery.client.Client` + :param client: Client which holds credentials and project + configuration for the dataset. + + :rtype: :class:`google.cloud.bigquery.job.LoadTableFromStorageJob` + :returns: Job parsed from ``resource``. + """ + name, config = cls._get_resource_config(resource) + dest_config = config['destinationTable'] + dataset = Dataset(dest_config['datasetId'], client) + destination = Table(dest_config['tableId'], dataset) + source_urls = config.get('sourceUris', ()) + job = cls(name, destination, source_urls, client=client) + job._set_properties(resource) + return job + + +class _CopyConfiguration(object): + """User-settable configuration options for copy jobs. + + Values which are ``None`` -> server defaults. + """ + _create_disposition = None + _write_disposition = None + + +class CopyJob(_AsyncJob): + """Asynchronous job: copy data into a table from other tables. + + :type name: string + :param name: the name of the job + + :type destination: :class:`google.cloud.bigquery.table.Table` + :param destination: Table into which data is to be loaded. + + :type sources: list of :class:`google.cloud.bigquery.table.Table` + :param sources: Table into which data is to be loaded. + + :type client: :class:`google.cloud.bigquery.client.Client` + :param client: A client which holds credentials and project configuration + for the dataset (which requires a project). + """ + + _JOB_TYPE = 'copy' + + def __init__(self, name, destination, sources, client): + super(CopyJob, self).__init__(name, client) + self.destination = destination + self.sources = sources + self._configuration = _CopyConfiguration() + + create_disposition = CreateDisposition('create_disposition') + """See: + https://cloud.google.com/bigquery/docs/reference/v2/jobs#configuration.copy.createDisposition + """ + + write_disposition = WriteDisposition('write_disposition') + """See: + https://cloud.google.com/bigquery/docs/reference/v2/jobs#configuration.copy.writeDisposition + """ + + def _populate_config_resource(self, configuration): + """Helper for _build_resource: copy config properties to resource""" + if self.create_disposition is not None: + configuration['createDisposition'] = self.create_disposition + if self.write_disposition is not None: + configuration['writeDisposition'] = self.write_disposition + + def _build_resource(self): + """Generate a resource for :meth:`begin`.""" + + source_refs = [{ + 'projectId': table.project, + 'datasetId': table.dataset_name, + 'tableId': table.name, + } for table in self.sources] + + resource = { + 'jobReference': { + 'projectId': self.project, + 'jobId': self.name, + }, + 'configuration': { + self._JOB_TYPE: { + 'sourceTables': source_refs, + 'destinationTable': { + 'projectId': self.destination.project, + 'datasetId': self.destination.dataset_name, + 'tableId': self.destination.name, + }, + }, + }, + } + configuration = resource['configuration'][self._JOB_TYPE] + self._populate_config_resource(configuration) + + return resource + + @classmethod + def from_api_repr(cls, resource, client): + """Factory: construct a job given its API representation + + .. note: + + This method assumes that the project found in the resource matches + the client's project. + + :type resource: dict + :param resource: dataset job representation returned from the API + + :type client: :class:`google.cloud.bigquery.client.Client` + :param client: Client which holds credentials and project + configuration for the dataset. + + :rtype: :class:`google.cloud.bigquery.job.CopyJob` + :returns: Job parsed from ``resource``. + """ + name, config = cls._get_resource_config(resource) + dest_config = config['destinationTable'] + dataset = Dataset(dest_config['datasetId'], client) + destination = Table(dest_config['tableId'], dataset) + sources = [] + for source_config in config['sourceTables']: + dataset = Dataset(source_config['datasetId'], client) + sources.append(Table(source_config['tableId'], dataset)) + job = cls(name, destination, sources, client=client) + job._set_properties(resource) + return job + + +class _ExtractConfiguration(object): + """User-settable configuration options for extract jobs. + + Values which are ``None`` -> server defaults. + """ + _compression = None + _destination_format = None + _field_delimiter = None + _print_header = None + + +class ExtractTableToStorageJob(_AsyncJob): + """Asynchronous job: extract data from a table into Cloud Storage. + + :type name: string + :param name: the name of the job + + :type source: :class:`google.cloud.bigquery.table.Table` + :param source: Table into which data is to be loaded. + + :type destination_uris: list of string + :param destination_uris: URIs describing Cloud Storage blobs into which + extracted data will be written, in format + ``gs:///``. + + :type client: :class:`google.cloud.bigquery.client.Client` + :param client: A client which holds credentials and project configuration + for the dataset (which requires a project). + """ + _JOB_TYPE = 'extract' + + def __init__(self, name, source, destination_uris, client): + super(ExtractTableToStorageJob, self).__init__(name, client) + self.source = source + self.destination_uris = destination_uris + self._configuration = _ExtractConfiguration() + + compression = Compression('compression') + """See: + https://cloud.google.com/bigquery/docs/reference/v2/jobs#configuration.extracted.compression + """ + + destination_format = DestinationFormat('destination_format') + """See: + https://cloud.google.com/bigquery/docs/reference/v2/jobs#configuration.extracted.destinationFormat + """ + + field_delimiter = _TypedProperty('field_delimiter', six.string_types) + """See: + https://cloud.google.com/bigquery/docs/reference/v2/jobs#configuration.extracted.fieldDelimiter + """ + + print_header = _TypedProperty('print_header', bool) + """See: + https://cloud.google.com/bigquery/docs/reference/v2/jobs#configuration.extracted.printHeader + """ + + def _populate_config_resource(self, configuration): + """Helper for _build_resource: copy config properties to resource""" + if self.compression is not None: + configuration['compression'] = self.compression + if self.destination_format is not None: + configuration['destinationFormat'] = self.destination_format + if self.field_delimiter is not None: + configuration['fieldDelimiter'] = self.field_delimiter + if self.print_header is not None: + configuration['printHeader'] = self.print_header + + def _build_resource(self): + """Generate a resource for :meth:`begin`.""" + + source_ref = { + 'projectId': self.source.project, + 'datasetId': self.source.dataset_name, + 'tableId': self.source.name, + } + + resource = { + 'jobReference': { + 'projectId': self.project, + 'jobId': self.name, + }, + 'configuration': { + self._JOB_TYPE: { + 'sourceTable': source_ref, + 'destinationUris': self.destination_uris, + }, + }, + } + configuration = resource['configuration'][self._JOB_TYPE] + self._populate_config_resource(configuration) + + return resource + + @classmethod + def from_api_repr(cls, resource, client): + """Factory: construct a job given its API representation + + .. note: + + This method assumes that the project found in the resource matches + the client's project. + + :type resource: dict + :param resource: dataset job representation returned from the API + + :type client: :class:`google.cloud.bigquery.client.Client` + :param client: Client which holds credentials and project + configuration for the dataset. + + :rtype: :class:`google.cloud.bigquery.job.ExtractTableToStorageJob` + :returns: Job parsed from ``resource``. + """ + name, config = cls._get_resource_config(resource) + source_config = config['sourceTable'] + dataset = Dataset(source_config['datasetId'], client) + source = Table(source_config['tableId'], dataset) + destination_uris = config['destinationUris'] + job = cls(name, source, destination_uris, client=client) + job._set_properties(resource) + return job + + +class _AsyncQueryConfiguration(object): + """User-settable configuration options for asynchronous query jobs. + + Values which are ``None`` -> server defaults. + """ + _allow_large_results = None + _create_disposition = None + _default_dataset = None + _destination = None + _flatten_results = None + _priority = None + _use_query_cache = None + _use_legacy_sql = None + _dry_run = None + _write_disposition = None + _maximum_billing_tier = None + _maximum_bytes_billed = None + + +class QueryJob(_AsyncJob): + """Asynchronous job: query tables. + + :type name: string + :param name: the name of the job + + :type query: string + :param query: SQL query string + + :type client: :class:`google.cloud.bigquery.client.Client` + :param client: A client which holds credentials and project configuration + for the dataset (which requires a project). + + :type udf_resources: tuple + :param udf_resources: An iterable of + :class:`google.cloud.bigquery._helpers.UDFResource` + (empty by default) + """ + _JOB_TYPE = 'query' + _UDF_KEY = 'userDefinedFunctionResources' + + def __init__(self, name, query, client, udf_resources=()): + super(QueryJob, self).__init__(name, client) + self.query = query + self.udf_resources = udf_resources + self._configuration = _AsyncQueryConfiguration() + + allow_large_results = _TypedProperty('allow_large_results', bool) + """See: + https://cloud.google.com/bigquery/docs/reference/v2/jobs#configuration.query.allowLargeResults + """ + + create_disposition = CreateDisposition('create_disposition') + """See: + https://cloud.google.com/bigquery/docs/reference/v2/jobs#configuration.query.createDisposition + """ + + default_dataset = _TypedProperty('default_dataset', Dataset) + """See: + https://cloud.google.com/bigquery/docs/reference/v2/jobs#configuration.query.defaultDataset + """ + + destination = _TypedProperty('destination', Table) + """See: + https://cloud.google.com/bigquery/docs/reference/v2/jobs#configuration.query.destinationTable + """ + + flatten_results = _TypedProperty('flatten_results', bool) + """See: + https://cloud.google.com/bigquery/docs/reference/v2/jobs#configuration.query.flattenResults + """ + + priority = QueryPriority('priority') + """See: + https://cloud.google.com/bigquery/docs/reference/v2/jobs#configuration.query.priority + """ + + udf_resources = UDFResourcesProperty() + + use_query_cache = _TypedProperty('use_query_cache', bool) + """See: + https://cloud.google.com/bigquery/docs/reference/v2/jobs#configuration.query.useQueryCache + """ + + use_legacy_sql = _TypedProperty('use_legacy_sql', bool) + """See: + https://cloud.google.com/bigquery/docs/\ + reference/v2/jobs#configuration.query.useLegacySql + """ + + dry_run = _TypedProperty('dry_run', bool) + """See: + https://cloud.google.com/bigquery/docs/\ + reference/v2/jobs#configuration.query.dryRun + """ + + write_disposition = WriteDisposition('write_disposition') + """See: + https://cloud.google.com/bigquery/docs/reference/v2/jobs#configuration.query.writeDisposition + """ + + maximum_billing_tier = _TypedProperty('maximum_billing_tier', int) + """See: + https://cloud.google.com/bigquery/docs/reference/v2/jobs#configuration.query.maximumBillingTier + """ + + maximum_bytes_billed = _TypedProperty('maximum_bytes_billed', int) + """See: + https://cloud.google.com/bigquery/docs/reference/v2/jobs#configuration.query.maximumBytesBilled + """ + + def _destination_table_resource(self): + """Create a JSON resource for the destination table. + + Helper for :meth:`_populate_config_resource` and + :meth:`_scrub_local_properties` + """ + if self.destination is not None: + return { + 'projectId': self.destination.project, + 'datasetId': self.destination.dataset_name, + 'tableId': self.destination.name, + } + + def _populate_config_resource_booleans(self, configuration): + """Helper for _populate_config_resource.""" + if self.allow_large_results is not None: + configuration['allowLargeResults'] = self.allow_large_results + if self.flatten_results is not None: + configuration['flattenResults'] = self.flatten_results + if self.use_query_cache is not None: + configuration['useQueryCache'] = self.use_query_cache + if self.use_legacy_sql is not None: + configuration['useLegacySql'] = self.use_legacy_sql + if self.dry_run is not None: + configuration['dryRun'] = self.dry_run + + def _populate_config_resource(self, configuration): + """Helper for _build_resource: copy config properties to resource""" + self._populate_config_resource_booleans(configuration) + + if self.create_disposition is not None: + configuration['createDisposition'] = self.create_disposition + if self.default_dataset is not None: + configuration['defaultDataset'] = { + 'projectId': self.default_dataset.project, + 'datasetId': self.default_dataset.name, + } + if self.destination is not None: + table_res = self._destination_table_resource() + configuration['destinationTable'] = table_res + if self.priority is not None: + configuration['priority'] = self.priority + if self.write_disposition is not None: + configuration['writeDisposition'] = self.write_disposition + if self.maximum_billing_tier is not None: + configuration['maximumBillingTier'] = self.maximum_billing_tier + if self.maximum_bytes_billed is not None: + configuration['maximumBytesBilled'] = self.maximum_bytes_billed + if len(self._udf_resources) > 0: + configuration[self._UDF_KEY] = _build_udf_resources( + self._udf_resources) + + def _build_resource(self): + """Generate a resource for :meth:`begin`.""" + + resource = { + 'jobReference': { + 'projectId': self.project, + 'jobId': self.name, + }, + 'configuration': { + self._JOB_TYPE: { + 'query': self.query, + }, + }, + } + configuration = resource['configuration'][self._JOB_TYPE] + self._populate_config_resource(configuration) + + return resource + + def _scrub_local_properties(self, cleaned): + """Helper: handle subclass properties in cleaned. + + .. note: + + This method assumes that the project found in the resource matches + the client's project. + """ + configuration = cleaned['configuration']['query'] + dest_remote = configuration.get('destinationTable') + + if dest_remote is None: + if self.destination is not None: + del self.destination + else: + dest_local = self._destination_table_resource() + if dest_remote != dest_local: + dataset = self._client.dataset(dest_remote['datasetId']) + self.destination = dataset.table(dest_remote['tableId']) + + @classmethod + def from_api_repr(cls, resource, client): + """Factory: construct a job given its API representation + + :type resource: dict + :param resource: dataset job representation returned from the API + + :type client: :class:`google.cloud.bigquery.client.Client` + :param client: Client which holds credentials and project + configuration for the dataset. + + :rtype: :class:`google.cloud.bigquery.job.RunAsyncQueryJob` + :returns: Job parsed from ``resource``. + """ + name, config = cls._get_resource_config(resource) + query = config['query'] + job = cls(name, query, client=client) + job._set_properties(resource) + return job + + def results(self): + """Construct a QueryResults instance, bound to this job. + + :rtype: :class:`~google.cloud.bigquery.query.QueryResults` + :returns: results instance + """ + from google.cloud.bigquery.query import QueryResults + return QueryResults.from_query_job(self) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/query.py b/packages/google-cloud-bigquery/google/cloud/bigquery/query.py new file mode 100644 index 000000000000..6146c0431657 --- /dev/null +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/query.py @@ -0,0 +1,405 @@ +# Copyright 2015 Google Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Define API Queries.""" + +import six + +from google.cloud.bigquery._helpers import _TypedProperty +from google.cloud.bigquery._helpers import _rows_from_json +from google.cloud.bigquery.dataset import Dataset +from google.cloud.bigquery.job import QueryJob +from google.cloud.bigquery.table import _parse_schema_resource +from google.cloud.bigquery._helpers import _build_udf_resources +from google.cloud.bigquery._helpers import UDFResourcesProperty + + +class _SyncQueryConfiguration(object): + """User-settable configuration options for synchronous query jobs. + + Values which are ``None`` -> server defaults. + """ + _default_dataset = None + _dry_run = None + _max_results = None + _timeout_ms = None + _preserve_nulls = None + _use_query_cache = None + _use_legacy_sql = None + + +class QueryResults(object): + """Synchronous job: query tables. + + :type query: string + :param query: SQL query string + + :type client: :class:`google.cloud.bigquery.client.Client` + :param client: A client which holds credentials and project configuration + for the dataset (which requires a project). + + :type udf_resources: tuple + :param udf_resources: An iterable of + :class:`google.cloud.bigquery.job.UDFResource` + (empty by default) + """ + + _UDF_KEY = 'userDefinedFunctionResources' + + def __init__(self, query, client, udf_resources=()): + self._client = client + self._properties = {} + self.query = query + self._configuration = _SyncQueryConfiguration() + self.udf_resources = udf_resources + self._job = None + + @classmethod + def from_query_job(cls, job): + """Factory: construct from an existing job. + + :type job: :class:`~google.cloud.bigquery.job.QueryJob` + :param job: existing job + + :rtype: :class:`QueryResults` + :returns: the instance, bound to the job + """ + instance = cls(job.query, job._client, job.udf_resources) + instance._job = job + job_ref = instance._properties.setdefault('jobReference', {}) + job_ref['jobId'] = job.name + if job.default_dataset is not None: + instance.default_dataset = job.default_dataset + if job.use_query_cache is not None: + instance.use_query_cache = job.use_query_cache + if job.use_legacy_sql is not None: + instance.use_legacy_sql = job.use_legacy_sql + return instance + + @property + def project(self): + """Project bound to the job. + + :rtype: string + :returns: the project (derived from the client). + """ + return self._client.project + + def _require_client(self, client): + """Check client or verify over-ride. + + :type client: :class:`~google.cloud.bigquery.client.Client` or + ``NoneType`` + :param client: the client to use. If not passed, falls back to the + ``client`` stored on the current dataset. + + :rtype: :class:`google.cloud.bigquery.client.Client` + :returns: The client passed in or the currently bound client. + """ + if client is None: + client = self._client + return client + + @property + def cache_hit(self): + """Query results served from cache. + + See: + https://cloud.google.com/bigquery/docs/reference/v2/jobs/query#cacheHit + + :rtype: boolean or ``NoneType`` + :returns: True if the query results were served from cache (None + until set by the server). + """ + return self._properties.get('cacheHit') + + @property + def complete(self): + """Server completed query. + + See: + https://cloud.google.com/bigquery/docs/reference/v2/jobs/query#jobComplete + + :rtype: boolean or ``NoneType`` + :returns: True if the query completed on the server (None + until set by the server). + """ + return self._properties.get('jobComplete') + + @property + def errors(self): + """Errors generated by the query. + + See: + https://cloud.google.com/bigquery/docs/reference/v2/jobs/query#errors + + :rtype: list of mapping, or ``NoneType`` + :returns: Mappings describing errors generated on the server (None + until set by the server). + """ + return self._properties.get('errors') + + @property + def name(self): + """Job name, generated by the back-end. + + See: + https://cloud.google.com/bigquery/docs/reference/v2/jobs/query#jobReference + + :rtype: list of mapping, or ``NoneType`` + :returns: Mappings describing errors generated on the server (None + until set by the server). + """ + return self._properties.get('jobReference', {}).get('jobId') + + @property + def job(self): + """Job instance used to run the query. + + :rtype: :class:`google.cloud.bigquery.job.QueryJob`, or ``NoneType`` + :returns: Job instance used to run the query (None until + ``jobReference`` property is set by the server). + """ + if self._job is None: + job_ref = self._properties.get('jobReference') + if job_ref is not None: + self._job = QueryJob(job_ref['jobId'], self.query, + self._client) + return self._job + + @property + def page_token(self): + """Token for fetching next bach of results. + + See: + https://cloud.google.com/bigquery/docs/reference/v2/jobs/query#pageToken + + :rtype: string, or ``NoneType`` + :returns: Token generated on the server (None until set by the server). + """ + return self._properties.get('pageToken') + + @property + def total_rows(self): + """Total number of rows returned by the query. + + See: + https://cloud.google.com/bigquery/docs/reference/v2/jobs/query#totalRows + + :rtype: integer, or ``NoneType`` + :returns: Count generated on the server (None until set by the server). + """ + return self._properties.get('totalRows') + + @property + def total_bytes_processed(self): + """Total number of bytes processed by the query. + + See: + https://cloud.google.com/bigquery/docs/reference/v2/jobs/query#totalBytesProcessed + + :rtype: integer, or ``NoneType`` + :returns: Count generated on the server (None until set by the server). + """ + return self._properties.get('totalBytesProcessed') + + @property + def rows(self): + """Query results. + + See: + https://cloud.google.com/bigquery/docs/reference/v2/jobs/query#rows + + :rtype: list of tuples of row values, or ``NoneType`` + :returns: fields describing the schema (None until set by the server). + """ + return _rows_from_json(self._properties.get('rows', ()), self.schema) + + @property + def schema(self): + """Schema for query results. + + See: + https://cloud.google.com/bigquery/docs/reference/v2/jobs/query#schema + + :rtype: list of :class:`SchemaField`, or ``NoneType`` + :returns: fields describing the schema (None until set by the server). + """ + return _parse_schema_resource(self._properties.get('schema', {})) + + default_dataset = _TypedProperty('default_dataset', Dataset) + """See: + https://cloud.google.com/bigquery/docs/reference/v2/jobs/query#defaultDataset + """ + + dry_run = _TypedProperty('dry_run', bool) + """See: + https://cloud.google.com/bigquery/docs/reference/v2/jobs/query#dryRun + """ + + max_results = _TypedProperty('max_results', six.integer_types) + """See: + https://cloud.google.com/bigquery/docs/reference/v2/jobs/query#maxResults + """ + + preserve_nulls = _TypedProperty('preserve_nulls', bool) + """See: + https://cloud.google.com/bigquery/docs/reference/v2/jobs/query#preserveNulls + """ + + timeout_ms = _TypedProperty('timeout_ms', six.integer_types) + """See: + https://cloud.google.com/bigquery/docs/reference/v2/jobs/query#timeoutMs + """ + + udf_resources = UDFResourcesProperty() + + use_query_cache = _TypedProperty('use_query_cache', bool) + """See: + https://cloud.google.com/bigquery/docs/reference/v2/jobs/query#useQueryCache + """ + + use_legacy_sql = _TypedProperty('use_legacy_sql', bool) + """See: + https://cloud.google.com/bigquery/docs/\ + reference/v2/jobs/query#useLegacySql + """ + + def _set_properties(self, api_response): + """Update properties from resource in body of ``api_response`` + + :type api_response: httplib2.Response + :param api_response: response returned from an API call + """ + self._properties.clear() + self._properties.update(api_response) + + def _build_resource(self): + """Generate a resource for :meth:`begin`.""" + resource = {'query': self.query} + + if self.default_dataset is not None: + resource['defaultDataset'] = { + 'projectId': self.project, + 'datasetId': self.default_dataset.name, + } + + if self.max_results is not None: + resource['maxResults'] = self.max_results + + if self.preserve_nulls is not None: + resource['preserveNulls'] = self.preserve_nulls + + if self.timeout_ms is not None: + resource['timeoutMs'] = self.timeout_ms + + if self.use_query_cache is not None: + resource['useQueryCache'] = self.use_query_cache + + if self.use_legacy_sql is not None: + resource['useLegacySql'] = self.use_legacy_sql + + if self.dry_run is not None: + resource['dryRun'] = self.dry_run + + if len(self._udf_resources) > 0: + resource[self._UDF_KEY] = _build_udf_resources(self._udf_resources) + + return resource + + def run(self, client=None): + """API call: run the query via a POST request + + See: + https://cloud.google.com/bigquery/docs/reference/v2/jobs/query + + :type client: :class:`~google.cloud.bigquery.client.Client` or + ``NoneType`` + :param client: the client to use. If not passed, falls back to the + ``client`` stored on the current dataset. + """ + if self._job is not None: + raise ValueError("Query job is already running.") + + client = self._require_client(client) + path = '/projects/%s/queries' % (self.project,) + api_response = client.connection.api_request( + method='POST', path=path, data=self._build_resource()) + self._set_properties(api_response) + + def fetch_data(self, max_results=None, page_token=None, start_index=None, + timeout_ms=None, client=None): + """API call: fetch a page of query result data via a GET request + + See: + https://cloud.google.com/bigquery/docs/reference/v2/jobs/getQueryResults + + :type max_results: integer or ``NoneType`` + :param max_results: maximum number of rows to return. + + :type page_token: string or ``NoneType`` + :param page_token: token representing a cursor into the table's rows. + + :type start_index: integer or ``NoneType`` + :param start_index: zero-based index of starting row + + :type timeout_ms: integer or ``NoneType`` + :param timeout_ms: timeout, in milliseconds, to wait for query to + complete + + :type client: :class:`~google.cloud.bigquery.client.Client` or + ``NoneType`` + :param client: the client to use. If not passed, falls back to the + ``client`` stored on the current dataset. + + :rtype: tuple + :returns: ``(row_data, total_rows, page_token)``, where ``row_data`` + is a list of tuples, one per result row, containing only + the values; ``total_rows`` is a count of the total number + of rows in the table; and ``page_token`` is an opaque + string which can be used to fetch the next batch of rows + (``None`` if no further batches can be fetched). + :raises: ValueError if the query has not yet been executed. + """ + if self.name is None: + raise ValueError("Query not yet executed: call 'run()'") + + client = self._require_client(client) + params = {} + + if max_results is not None: + params['maxResults'] = max_results + + if page_token is not None: + params['pageToken'] = page_token + + if start_index is not None: + params['startIndex'] = start_index + + if timeout_ms is not None: + params['timeoutMs'] = timeout_ms + + path = '/projects/%s/queries/%s' % (self.project, self.name) + response = client.connection.api_request(method='GET', + path=path, + query_params=params) + self._set_properties(response) + + total_rows = response.get('totalRows') + if total_rows is not None: + total_rows = int(total_rows) + page_token = response.get('pageToken') + rows_data = _rows_from_json(response.get('rows', ()), self.schema) + + return rows_data, total_rows, page_token diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/schema.py b/packages/google-cloud-bigquery/google/cloud/bigquery/schema.py new file mode 100644 index 000000000000..a987454cc9a7 --- /dev/null +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/schema.py @@ -0,0 +1,52 @@ +# Copyright 2015 Google Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Scheamas for BigQuery tables / queries.""" + + +class SchemaField(object): + """Describe a single field within a table schema. + + :type name: str + :param name: the name of the field. + + :type field_type: str + :param field_type: the type of the field (one of 'STRING', 'INTEGER', + 'FLOAT', 'BOOLEAN', 'TIMESTAMP' or 'RECORD'). + + :type mode: str + :param mode: the type of the field (one of 'NULLABLE', 'REQUIRED', + or 'REPEATED'). + + :type description: str + :param description: optional description for the field. + + :type fields: list of :class:`SchemaField`, or None + :param fields: subfields (requires ``field_type`` of 'RECORD'). + """ + def __init__(self, name, field_type, mode='NULLABLE', description=None, + fields=None): + self.name = name + self.field_type = field_type + self.mode = mode + self.description = description + self.fields = fields + + def __eq__(self, other): + return ( + self.name == other.name and + self.field_type.lower() == other.field_type.lower() and + self.mode == other.mode and + self.description == other.description and + self.fields == other.fields) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py new file mode 100644 index 000000000000..56f7f7124d5a --- /dev/null +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py @@ -0,0 +1,1093 @@ +# Copyright 2015 Google Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Define API Datasets.""" + +import datetime +import json +import os + +import httplib2 +import six + +from google.cloud._helpers import _datetime_from_microseconds +from google.cloud._helpers import _microseconds_from_datetime +from google.cloud._helpers import _millis_from_datetime +from google.cloud.exceptions import NotFound +from google.cloud.exceptions import make_exception +from google.cloud.streaming.exceptions import HttpError +from google.cloud.streaming.http_wrapper import Request +from google.cloud.streaming.http_wrapper import make_api_request +from google.cloud.streaming.transfer import RESUMABLE_UPLOAD +from google.cloud.streaming.transfer import Upload +from google.cloud.bigquery.schema import SchemaField +from google.cloud.bigquery._helpers import _rows_from_json + + +_MARKER = object() + + +class Table(object): + """Tables represent a set of rows whose values correspond to a schema. + + See: + https://cloud.google.com/bigquery/docs/reference/v2/tables + + :type name: str + :param name: the name of the table + + :type dataset: :class:`google.cloud.bigquery.dataset.Dataset` + :param dataset: The dataset which contains the table. + + :type schema: list of :class:`SchemaField` + :param schema: The table's schema + """ + + _schema = None + + def __init__(self, name, dataset, schema=()): + self.name = name + self._dataset = dataset + self._properties = {} + # Let the @property do validation. + self.schema = schema + + @property + def project(self): + """Project bound to the table. + + :rtype: str + :returns: the project (derived from the dataset). + """ + return self._dataset.project + + @property + def dataset_name(self): + """Name of dataset containing the table. + + :rtype: str + :returns: the ID (derived from the dataset). + """ + return self._dataset.name + + @property + def path(self): + """URL path for the table's APIs. + + :rtype: str + :returns: the path based on project and dataste name. + """ + return '%s/tables/%s' % (self._dataset.path, self.name) + + @property + def schema(self): + """Table's schema. + + :rtype: list of :class:`SchemaField` + :returns: fields describing the schema + """ + return list(self._schema) + + @schema.setter + def schema(self, value): + """Update table's schema + + :type value: list of :class:`SchemaField` + :param value: fields describing the schema + + :raises: TypeError if 'value' is not a sequence, or ValueError if + any item in the sequence is not a SchemaField + """ + if not all(isinstance(field, SchemaField) for field in value): + raise ValueError('Schema items must be fields') + self._schema = tuple(value) + + @property + def created(self): + """Datetime at which the table was created. + + :rtype: ``datetime.datetime``, or ``NoneType`` + :returns: the creation time (None until set from the server). + """ + creation_time = self._properties.get('creationTime') + if creation_time is not None: + # creation_time will be in milliseconds. + return _datetime_from_microseconds(1000.0 * creation_time) + + @property + def etag(self): + """ETag for the table resource. + + :rtype: str, or ``NoneType`` + :returns: the ETag (None until set from the server). + """ + return self._properties.get('etag') + + @property + def modified(self): + """Datetime at which the table was last modified. + + :rtype: ``datetime.datetime``, or ``NoneType`` + :returns: the modification time (None until set from the server). + """ + modified_time = self._properties.get('lastModifiedTime') + if modified_time is not None: + # modified_time will be in milliseconds. + return _datetime_from_microseconds(1000.0 * modified_time) + + @property + def num_bytes(self): + """The size of the table in bytes. + + :rtype: integer, or ``NoneType`` + :returns: the byte count (None until set from the server). + """ + num_bytes_as_str = self._properties.get('numBytes') + if num_bytes_as_str is not None: + return int(num_bytes_as_str) + + @property + def num_rows(self): + """The number of rows in the table. + + :rtype: integer, or ``NoneType`` + :returns: the row count (None until set from the server). + """ + num_rows_as_str = self._properties.get('numRows') + if num_rows_as_str is not None: + return int(num_rows_as_str) + + @property + def self_link(self): + """URL for the table resource. + + :rtype: str, or ``NoneType`` + :returns: the URL (None until set from the server). + """ + return self._properties.get('selfLink') + + @property + def table_id(self): + """ID for the table resource. + + :rtype: str, or ``NoneType`` + :returns: the ID (None until set from the server). + """ + return self._properties.get('id') + + @property + def table_type(self): + """The type of the table. + + Possible values are "TABLE" or "VIEW". + + :rtype: str, or ``NoneType`` + :returns: the URL (None until set from the server). + """ + return self._properties.get('type') + + @property + def partitioning_type(self): + """Time partitioning of the table. + :rtype: str, or ``NoneType`` + :returns: Returns type if the table is partitioned, None otherwise. + """ + return self._properties.get('timePartitioning', {}).get('type') + + @partitioning_type.setter + def partitioning_type(self, value): + """Update the partitioning type of the table + + :type value: str + :param value: partitioning type only "DAY" is currently supported + """ + if value not in ('DAY', None): + raise ValueError("value must be one of ['DAY', None]") + + if value is None: + self._properties.pop('timePartitioning', None) + else: + time_part = self._properties.setdefault('timePartitioning', {}) + time_part['type'] = value.upper() + + @property + def partition_expiration(self): + """Expiration time in ms for a partition + :rtype: int, or ``NoneType`` + :returns: Returns the time in ms for partition expiration + """ + return self._properties.get('timePartitioning', {}).get('expirationMs') + + @partition_expiration.setter + def partition_expiration(self, value): + """Update the experation time in ms for a partition + + :type value: int + :param value: partition experiation time in ms + """ + if not isinstance(value, (int, type(None))): + raise ValueError( + "must be an integer representing millisseconds or None") + + if value is None: + if 'timePartitioning' in self._properties: + self._properties['timePartitioning'].pop('expirationMs') + else: + try: + self._properties['timePartitioning']['expirationMs'] = value + except KeyError: + self._properties['timePartitioning'] = {'type': 'DAY'} + self._properties['timePartitioning']['expirationMs'] = value + + @property + def description(self): + """Description of the table. + + :rtype: str, or ``NoneType`` + :returns: The description as set by the user, or None (the default). + """ + return self._properties.get('description') + + @description.setter + def description(self, value): + """Update description of the table. + + :type value: str, or ``NoneType`` + :param value: new description + + :raises: ValueError for invalid value types. + """ + if not isinstance(value, six.string_types) and value is not None: + raise ValueError("Pass a string, or None") + self._properties['description'] = value + + @property + def expires(self): + """Datetime at which the table will be removed. + + :rtype: ``datetime.datetime``, or ``NoneType`` + :returns: the expiration time, or None + """ + expiration_time = self._properties.get('expirationTime') + if expiration_time is not None: + # expiration_time will be in milliseconds. + return _datetime_from_microseconds(1000.0 * expiration_time) + + @expires.setter + def expires(self, value): + """Update datetime at which the table will be removed. + + :type value: ``datetime.datetime``, or ``NoneType`` + :param value: the new expiration time, or None + """ + if not isinstance(value, datetime.datetime) and value is not None: + raise ValueError("Pass a datetime, or None") + self._properties['expirationTime'] = _millis_from_datetime(value) + + @property + def friendly_name(self): + """Title of the table. + + :rtype: str, or ``NoneType`` + :returns: The name as set by the user, or None (the default). + """ + return self._properties.get('friendlyName') + + @friendly_name.setter + def friendly_name(self, value): + """Update title of the table. + + :type value: str, or ``NoneType`` + :param value: new title + + :raises: ValueError for invalid value types. + """ + if not isinstance(value, six.string_types) and value is not None: + raise ValueError("Pass a string, or None") + self._properties['friendlyName'] = value + + @property + def location(self): + """Location in which the table is hosted. + + :rtype: str, or ``NoneType`` + :returns: The location as set by the user, or None (the default). + """ + return self._properties.get('location') + + @location.setter + def location(self, value): + """Update location in which the table is hosted. + + :type value: str, or ``NoneType`` + :param value: new location + + :raises: ValueError for invalid value types. + """ + if not isinstance(value, six.string_types) and value is not None: + raise ValueError("Pass a string, or None") + self._properties['location'] = value + + @property + def view_query(self): + """SQL query defining the table as a view. + + :rtype: str, or ``NoneType`` + :returns: The query as set by the user, or None (the default). + """ + view = self._properties.get('view') + if view is not None: + return view.get('query') + + @view_query.setter + def view_query(self, value): + """Update SQL query defining the table as a view. + + :type value: str + :param value: new query + + :raises: ValueError for invalid value types. + """ + if not isinstance(value, six.string_types): + raise ValueError("Pass a string") + self._properties['view'] = {'query': value} + + @view_query.deleter + def view_query(self): + """Delete SQL query defining the table as a view.""" + self._properties.pop('view', None) + + def list_partitions(self, client=None): + """List the partitions in a table. + + :type client: :class:`~google.cloud.bigquery.client.Client` or + ``NoneType`` + :param client: the client to use. If not passed, falls back to the + ``client`` stored on the current dataset. + + :rtype: list + :returns: a list of time partitions + """ + query = self._require_client(client).run_sync_query( + 'SELECT partition_id from [%s.%s$__PARTITIONS_SUMMARY__]' % + (self.dataset_name, self.name)) + query.run() + return [row[0] for row in query.rows] + + @classmethod + def from_api_repr(cls, resource, dataset): + """Factory: construct a table given its API representation + + :type resource: dict + :param resource: table resource representation returned from the API + + :type dataset: :class:`google.cloud.bigquery.dataset.Dataset` + :param dataset: The dataset containing the table. + + :rtype: :class:`google.cloud.bigquery.table.Table` + :returns: Table parsed from ``resource``. + """ + if ('tableReference' not in resource or + 'tableId' not in resource['tableReference']): + raise KeyError('Resource lacks required identity information:' + '["tableReference"]["tableId"]') + table_name = resource['tableReference']['tableId'] + table = cls(table_name, dataset=dataset) + table._set_properties(resource) + return table + + def _require_client(self, client): + """Check client or verify over-ride. + + :type client: :class:`~google.cloud.bigquery.client.Client` or + ``NoneType`` + :param client: the client to use. If not passed, falls back to the + ``client`` stored on the current dataset. + + :rtype: :class:`google.cloud.bigquery.client.Client` + :returns: The client passed in or the currently bound client. + """ + if client is None: + client = self._dataset._client + return client + + def _set_properties(self, api_response): + """Update properties from resource in body of ``api_response`` + + :type api_response: httplib2.Response + :param api_response: response returned from an API call + """ + self._properties.clear() + cleaned = api_response.copy() + schema = cleaned.pop('schema', {'fields': ()}) + self.schema = _parse_schema_resource(schema) + if 'creationTime' in cleaned: + cleaned['creationTime'] = float(cleaned['creationTime']) + if 'lastModifiedTime' in cleaned: + cleaned['lastModifiedTime'] = float(cleaned['lastModifiedTime']) + if 'expirationTime' in cleaned: + cleaned['expirationTime'] = float(cleaned['expirationTime']) + self._properties.update(cleaned) + + def _build_resource(self): + """Generate a resource for ``create`` or ``update``.""" + resource = { + 'tableReference': { + 'projectId': self._dataset.project, + 'datasetId': self._dataset.name, + 'tableId': self.name}, + } + if self.description is not None: + resource['description'] = self.description + + if self.expires is not None: + value = _millis_from_datetime(self.expires) + resource['expirationTime'] = value + + if self.friendly_name is not None: + resource['friendlyName'] = self.friendly_name + + if self.location is not None: + resource['location'] = self.location + + if self.partitioning_type is not None: + resource['timePartitioning'] = self._properties['timePartitioning'] + + if self.view_query is not None: + view = resource['view'] = {} + view['query'] = self.view_query + elif self._schema: + resource['schema'] = { + 'fields': _build_schema_resource(self._schema) + } + else: + raise ValueError("Set either 'view_query' or 'schema'.") + + return resource + + def create(self, client=None): + """API call: create the dataset via a PUT request + + See: + https://cloud.google.com/bigquery/docs/reference/v2/tables/insert + + :type client: :class:`~google.cloud.bigquery.client.Client` or + ``NoneType`` + :param client: the client to use. If not passed, falls back to the + ``client`` stored on the current dataset. + """ + client = self._require_client(client) + path = '/projects/%s/datasets/%s/tables' % ( + self._dataset.project, self._dataset.name) + api_response = client.connection.api_request( + method='POST', path=path, data=self._build_resource()) + self._set_properties(api_response) + + def exists(self, client=None): + """API call: test for the existence of the table via a GET request + + See + https://cloud.google.com/bigquery/docs/reference/v2/tables/get + + :type client: :class:`~google.cloud.bigquery.client.Client` or + ``NoneType`` + :param client: the client to use. If not passed, falls back to the + ``client`` stored on the current dataset. + + :rtype: bool + :returns: Boolean indicating existence of the table. + """ + client = self._require_client(client) + + try: + client.connection.api_request(method='GET', path=self.path, + query_params={'fields': 'id'}) + except NotFound: + return False + else: + return True + + def reload(self, client=None): + """API call: refresh table properties via a GET request + + See + https://cloud.google.com/bigquery/docs/reference/v2/tables/get + + :type client: :class:`~google.cloud.bigquery.client.Client` or + ``NoneType`` + :param client: the client to use. If not passed, falls back to the + ``client`` stored on the current dataset. + """ + client = self._require_client(client) + + api_response = client.connection.api_request( + method='GET', path=self.path) + self._set_properties(api_response) + + def patch(self, + client=None, + friendly_name=_MARKER, + description=_MARKER, + location=_MARKER, + expires=_MARKER, + view_query=_MARKER, + schema=_MARKER): + """API call: update individual table properties via a PATCH request + + See + https://cloud.google.com/bigquery/docs/reference/v2/tables/patch + + :type client: :class:`~google.cloud.bigquery.client.Client` or + ``NoneType`` + :param client: the client to use. If not passed, falls back to the + ``client`` stored on the current dataset. + + :type friendly_name: str or ``NoneType`` + :param friendly_name: point in time at which the table expires. + + :type description: str or ``NoneType`` + :param description: point in time at which the table expires. + + :type location: str or ``NoneType`` + :param location: point in time at which the table expires. + + :type expires: :class:`datetime.datetime` or ``NoneType`` + :param expires: point in time at which the table expires. + + :type view_query: str + :param view_query: SQL query defining the table as a view + + :type schema: list of :class:`SchemaField` + :param schema: fields describing the schema + + :raises: ValueError for invalid value types. + """ + client = self._require_client(client) + + partial = {} + + if expires is not _MARKER: + if (not isinstance(expires, datetime.datetime) and + expires is not None): + raise ValueError("Pass a datetime, or None") + partial['expirationTime'] = _millis_from_datetime(expires) + + if description is not _MARKER: + partial['description'] = description + + if friendly_name is not _MARKER: + partial['friendlyName'] = friendly_name + + if location is not _MARKER: + partial['location'] = location + + if view_query is not _MARKER: + if view_query is None: + partial['view'] = None + else: + partial['view'] = {'query': view_query} + + if schema is not _MARKER: + if schema is None: + partial['schema'] = None + else: + partial['schema'] = { + 'fields': _build_schema_resource(schema)} + + api_response = client.connection.api_request( + method='PATCH', path=self.path, data=partial) + self._set_properties(api_response) + + def update(self, client=None): + """API call: update table properties via a PUT request + + See + https://cloud.google.com/bigquery/docs/reference/v2/tables/update + + :type client: :class:`~google.cloud.bigquery.client.Client` or + ``NoneType`` + :param client: the client to use. If not passed, falls back to the + ``client`` stored on the current dataset. + """ + client = self._require_client(client) + api_response = client.connection.api_request( + method='PUT', path=self.path, data=self._build_resource()) + self._set_properties(api_response) + + def delete(self, client=None): + """API call: delete the table via a DELETE request + + See: + https://cloud.google.com/bigquery/docs/reference/v2/tables/delete + + :type client: :class:`~google.cloud.bigquery.client.Client` or + ``NoneType`` + :param client: the client to use. If not passed, falls back to the + ``client`` stored on the current dataset. + """ + client = self._require_client(client) + client.connection.api_request(method='DELETE', path=self.path) + + def fetch_data(self, max_results=None, page_token=None, client=None): + """API call: fetch the table data via a GET request + + See: + https://cloud.google.com/bigquery/docs/reference/v2/tabledata/list + + .. note:: + + This method assumes that its instance's ``schema`` attribute is + up-to-date with the schema as defined on the back-end: if the + two schemas are not identical, the values returned may be + incomplete. To ensure that the local copy of the schema is + up-to-date, call the table's ``reload`` method. + + :type max_results: integer or ``NoneType`` + :param max_results: maximum number of rows to return. + + :type page_token: str or ``NoneType`` + :param page_token: token representing a cursor into the table's rows. + + :type client: :class:`~google.cloud.bigquery.client.Client` or + ``NoneType`` + :param client: the client to use. If not passed, falls back to the + ``client`` stored on the current dataset. + + :rtype: tuple + :returns: ``(row_data, total_rows, page_token)``, where ``row_data`` + is a list of tuples, one per result row, containing only + the values; ``total_rows`` is a count of the total number + of rows in the table; and ``page_token`` is an opaque + string which can be used to fetch the next batch of rows + (``None`` if no further batches can be fetched). + """ + client = self._require_client(client) + params = {} + + if max_results is not None: + params['maxResults'] = max_results + + if page_token is not None: + params['pageToken'] = page_token + + response = client.connection.api_request(method='GET', + path='%s/data' % self.path, + query_params=params) + total_rows = response.get('totalRows') + if total_rows is not None: + total_rows = int(total_rows) + page_token = response.get('pageToken') + rows_data = _rows_from_json(response.get('rows', ()), self._schema) + + return rows_data, total_rows, page_token + + def insert_data(self, + rows, + row_ids=None, + skip_invalid_rows=None, + ignore_unknown_values=None, + template_suffix=None, + client=None): + """API call: insert table data via a POST request + + See: + https://cloud.google.com/bigquery/docs/reference/v2/tabledata/insertAll + + :type rows: list of tuples + :param rows: Row data to be inserted. Each tuple should contain data + for each schema field on the current table and in the + same order as the schema fields. + + :type row_ids: list of string + :param row_ids: Unique ids, one per row being inserted. If not + passed, no de-duplication occurs. + + :type skip_invalid_rows: boolean or ``NoneType`` + :param skip_invalid_rows: skip rows w/ invalid data? + + :type ignore_unknown_values: boolean or ``NoneType`` + :param ignore_unknown_values: ignore columns beyond schema? + + :type template_suffix: str or ``NoneType`` + :param template_suffix: treat ``name`` as a template table and provide + a suffix. BigQuery will create the table + `` + `` based on the + schema of the template table. See: + https://cloud.google.com/bigquery/streaming-data-into-bigquery#template-tables + + :type client: :class:`~google.cloud.bigquery.client.Client` or + ``NoneType`` + :param client: the client to use. If not passed, falls back to the + ``client`` stored on the current dataset. + + :rtype: list of mappings + :returns: One mapping per row with insert errors: the "index" key + identifies the row, and the "errors" key contains a list + of the mappings describing one or more problems with the + row. + """ + client = self._require_client(client) + rows_info = [] + data = {'rows': rows_info} + + for index, row in enumerate(rows): + row_info = {} + + for field, value in zip(self._schema, row): + if field.field_type == 'TIMESTAMP' and value is not None: + # BigQuery stores TIMESTAMP data internally as a + # UNIX timestamp with microsecond precision. + # Specifies the number of seconds since the epoch. + value = _microseconds_from_datetime(value) * 1e-6 + row_info[field.name] = value + + info = {'json': row_info} + if row_ids is not None: + info['insertId'] = row_ids[index] + + rows_info.append(info) + + if skip_invalid_rows is not None: + data['skipInvalidRows'] = skip_invalid_rows + + if ignore_unknown_values is not None: + data['ignoreUnknownValues'] = ignore_unknown_values + + if template_suffix is not None: + data['templateSuffix'] = template_suffix + + response = client.connection.api_request( + method='POST', + path='%s/insertAll' % self.path, + data=data) + errors = [] + + for error in response.get('insertErrors', ()): + errors.append({'index': int(error['index']), + 'errors': error['errors']}) + + return errors + + @staticmethod + def _check_response_error(request, http_response): + """Helper for :meth:`upload_from_file`.""" + info = http_response.info + status = int(info['status']) + if not 200 <= status < 300: + faux_response = httplib2.Response({'status': status}) + raise make_exception(faux_response, http_response.content, + error_info=request.url) + + # pylint: disable=too-many-arguments,too-many-locals + def upload_from_file(self, + file_obj, + source_format, + rewind=False, + size=None, + num_retries=6, + allow_jagged_rows=None, + allow_quoted_newlines=None, + create_disposition=None, + encoding=None, + field_delimiter=None, + ignore_unknown_values=None, + max_bad_records=None, + quote_character=None, + skip_leading_rows=None, + write_disposition=None, + client=None): + """Upload the contents of this table from a file-like object. + + The content type of the upload will either be + - The value passed in to the function (if any) + - ``text/csv``. + + :type file_obj: file + :param file_obj: A file handle opened in binary mode for reading. + + :type source_format: str + :param source_format: one of 'CSV' or 'NEWLINE_DELIMITED_JSON'. + job configuration option; see + :meth:`google.cloud.bigquery.job.LoadJob` + + :type rewind: boolean + :param rewind: If True, seek to the beginning of the file handle before + writing the file to Cloud Storage. + + :type size: int + :param size: The number of bytes to read from the file handle. + If not provided, we'll try to guess the size using + :func:`os.fstat`. (If the file handle is not from the + filesystem this won't be possible.) + + :type num_retries: integer + :param num_retries: Number of upload retries. Defaults to 6. + + :type allow_jagged_rows: boolean + :param allow_jagged_rows: job configuration option; see + :meth:`google.cloud.bigquery.job.LoadJob`. + + :type allow_quoted_newlines: boolean + :param allow_quoted_newlines: job configuration option; see + :meth:`google.cloud.bigquery.job.LoadJob`. + + :type create_disposition: str + :param create_disposition: job configuration option; see + :meth:`google.cloud.bigquery.job.LoadJob`. + + :type encoding: str + :param encoding: job configuration option; see + :meth:`google.cloud.bigquery.job.LoadJob`. + + :type field_delimiter: str + :param field_delimiter: job configuration option; see + :meth:`google.cloud.bigquery.job.LoadJob`. + + :type ignore_unknown_values: boolean + :param ignore_unknown_values: job configuration option; see + :meth:`google.cloud.bigquery.job.LoadJob`. + + :type max_bad_records: integer + :param max_bad_records: job configuration option; see + :meth:`google.cloud.bigquery.job.LoadJob`. + + :type quote_character: str + :param quote_character: job configuration option; see + :meth:`google.cloud.bigquery.job.LoadJob`. + + :type skip_leading_rows: integer + :param skip_leading_rows: job configuration option; see + :meth:`google.cloud.bigquery.job.LoadJob`. + + :type write_disposition: str + :param write_disposition: job configuration option; see + :meth:`google.cloud.bigquery.job.LoadJob`. + + :type client: :class:`~google.cloud.storage.client.Client` or + ``NoneType`` + :param client: Optional. The client to use. If not passed, falls back + to the ``client`` stored on the current dataset. + + :rtype: :class:`google.cloud.bigquery.jobs.LoadTableFromStorageJob` + :returns: the job instance used to load the data (e.g., for + querying status). Note that the job is already started: + do not call ``job.begin()``. + :raises: :class:`ValueError` if ``size`` is not passed in and can not + be determined, or if the ``file_obj`` can be detected to be + a file opened in text mode. + """ + client = self._require_client(client) + connection = client.connection + content_type = 'application/octet-stream' + + # Rewind the file if desired. + if rewind: + file_obj.seek(0, os.SEEK_SET) + + mode = getattr(file_obj, 'mode', None) + + if mode is not None and mode not in ('rb', 'r+b', 'rb+'): + raise ValueError( + "Cannot upload files opened in text mode: use " + "open(filename, mode='rb') or open(filename, mode='r+b')") + + # Get the basic stats about the file. + total_bytes = size + if total_bytes is None: + if hasattr(file_obj, 'fileno'): + total_bytes = os.fstat(file_obj.fileno()).st_size + else: + raise ValueError('total bytes could not be determined. Please ' + 'pass an explicit size.') + headers = { + 'Accept': 'application/json', + 'Accept-Encoding': 'gzip, deflate', + 'User-Agent': connection.USER_AGENT, + 'content-type': 'application/json', + } + + metadata = { + 'configuration': { + 'load': { + 'sourceFormat': source_format, + 'schema': { + 'fields': _build_schema_resource(self._schema), + }, + 'destinationTable': { + 'projectId': self._dataset.project, + 'datasetId': self._dataset.name, + 'tableId': self.name, + } + } + } + } + + _configure_job_metadata(metadata, allow_jagged_rows, + allow_quoted_newlines, create_disposition, + encoding, field_delimiter, + ignore_unknown_values, max_bad_records, + quote_character, skip_leading_rows, + write_disposition) + + upload = Upload(file_obj, content_type, total_bytes, + auto_transfer=False) + + url_builder = _UrlBuilder() + upload_config = _UploadConfig() + + # Base URL may change once we know simple vs. resumable. + base_url = connection.API_BASE_URL + '/upload' + path = '/projects/%s/jobs' % (self._dataset.project,) + upload_url = connection.build_api_url(api_base_url=base_url, path=path) + + # Use apitools 'Upload' facility. + request = Request(upload_url, 'POST', headers, + body=json.dumps(metadata)) + + upload.configure_request(upload_config, request, url_builder) + query_params = url_builder.query_params + base_url = connection.API_BASE_URL + '/upload' + request.url = connection.build_api_url(api_base_url=base_url, + path=path, + query_params=query_params) + try: + upload.initialize_upload(request, connection.http) + except HttpError as err_response: + faux_response = httplib2.Response(err_response.response) + raise make_exception(faux_response, err_response.content, + error_info=request.url) + + if upload.strategy == RESUMABLE_UPLOAD: + http_response = upload.stream_file(use_chunks=True) + else: + http_response = make_api_request(connection.http, request, + retries=num_retries) + + self._check_response_error(request, http_response) + + response_content = http_response.content + if not isinstance(response_content, + six.string_types): # pragma: NO COVER Python3 + response_content = response_content.decode('utf-8') + return client.job_from_resource(json.loads(response_content)) + # pylint: enable=too-many-arguments,too-many-locals + + +def _configure_job_metadata(metadata, # pylint: disable=too-many-arguments + allow_jagged_rows, + allow_quoted_newlines, + create_disposition, + encoding, + field_delimiter, + ignore_unknown_values, + max_bad_records, + quote_character, + skip_leading_rows, + write_disposition): + """Helper for :meth:`Table.upload_from_file`.""" + load_config = metadata['configuration']['load'] + + if allow_jagged_rows is not None: + load_config['allowJaggedRows'] = allow_jagged_rows + + if allow_quoted_newlines is not None: + load_config['allowQuotedNewlines'] = allow_quoted_newlines + + if create_disposition is not None: + load_config['createDisposition'] = create_disposition + + if encoding is not None: + load_config['encoding'] = encoding + + if field_delimiter is not None: + load_config['fieldDelimiter'] = field_delimiter + + if ignore_unknown_values is not None: + load_config['ignoreUnknownValues'] = ignore_unknown_values + + if max_bad_records is not None: + load_config['maxBadRecords'] = max_bad_records + + if quote_character is not None: + load_config['quote'] = quote_character + + if skip_leading_rows is not None: + load_config['skipLeadingRows'] = skip_leading_rows + + if write_disposition is not None: + load_config['writeDisposition'] = write_disposition + + +def _parse_schema_resource(info): + """Parse a resource fragment into a schema field. + + :type info: mapping + :param info: should contain a "fields" key to be parsed + + :rtype: list of :class:`SchemaField`, or ``NoneType`` + :returns: a list of parsed fields, or ``None`` if no "fields" key is + present in ``info``. + """ + if 'fields' not in info: + return None + + schema = [] + for r_field in info['fields']: + name = r_field['name'] + field_type = r_field['type'] + mode = r_field.get('mode', 'NULLABLE') + description = r_field.get('description') + sub_fields = _parse_schema_resource(r_field) + schema.append( + SchemaField(name, field_type, mode, description, sub_fields)) + return schema + + +def _build_schema_resource(fields): + """Generate a resource fragment for a schema. + + :type fields: sequence of :class:`SchemaField` + :param fields: schema to be dumped + + :rtype: mapping + :returns: a mapping describing the schema of the supplied fields. + """ + infos = [] + for field in fields: + info = {'name': field.name, + 'type': field.field_type, + 'mode': field.mode} + if field.description is not None: + info['description'] = field.description + if field.fields is not None: + info['fields'] = _build_schema_resource(field.fields) + infos.append(info) + return infos + + +class _UploadConfig(object): + """Faux message FBO apitools' 'configure_request'.""" + accept = ['*/*'] + max_size = None + resumable_multipart = True + resumable_path = u'/upload/bigquery/v2/projects/{project}/jobs' + simple_multipart = True + simple_path = u'/upload/bigquery/v2/projects/{project}/jobs' + + +class _UrlBuilder(object): + """Faux builder FBO apitools' 'configure_request'""" + def __init__(self): + self.query_params = {} + self._relative_path = '' diff --git a/packages/google-cloud-bigquery/unit_tests/__init__.py b/packages/google-cloud-bigquery/unit_tests/__init__.py new file mode 100644 index 000000000000..58e0d9153632 --- /dev/null +++ b/packages/google-cloud-bigquery/unit_tests/__init__.py @@ -0,0 +1,13 @@ +# Copyright 2016 Google Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/packages/google-cloud-bigquery/unit_tests/test__helpers.py b/packages/google-cloud-bigquery/unit_tests/test__helpers.py new file mode 100644 index 000000000000..5631abe6c184 --- /dev/null +++ b/packages/google-cloud-bigquery/unit_tests/test__helpers.py @@ -0,0 +1,496 @@ +# Copyright 2015 Google Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import unittest + + +class Test_not_null(unittest.TestCase): + + def _callFUT(self, value, field): + from google.cloud.bigquery._helpers import _not_null + return _not_null(value, field) + + def test_w_none_nullable(self): + self.assertFalse(self._callFUT(None, _Field('NULLABLE'))) + + def test_w_none_required(self): + self.assertTrue(self._callFUT(None, _Field('REQUIRED'))) + + def test_w_value(self): + self.assertTrue(self._callFUT(object(), object())) + + +class Test_int_from_json(unittest.TestCase): + + def _callFUT(self, value, field): + from google.cloud.bigquery._helpers import _int_from_json + return _int_from_json(value, field) + + def test_w_none_nullable(self): + self.assertIsNone(self._callFUT(None, _Field('NULLABLE'))) + + def test_w_none_required(self): + with self.assertRaises(TypeError): + self._callFUT(None, _Field('REQUIRED')) + + def test_w_string_value(self): + coerced = self._callFUT('42', object()) + self.assertEqual(coerced, 42) + + def test_w_float_value(self): + coerced = self._callFUT(42, object()) + self.assertEqual(coerced, 42) + + +class Test_float_from_json(unittest.TestCase): + + def _callFUT(self, value, field): + from google.cloud.bigquery._helpers import _float_from_json + return _float_from_json(value, field) + + def test_w_none_nullable(self): + self.assertIsNone(self._callFUT(None, _Field('NULLABLE'))) + + def test_w_none_required(self): + with self.assertRaises(TypeError): + self._callFUT(None, _Field('REQUIRED')) + + def test_w_string_value(self): + coerced = self._callFUT('3.1415', object()) + self.assertEqual(coerced, 3.1415) + + def test_w_float_value(self): + coerced = self._callFUT(3.1415, object()) + self.assertEqual(coerced, 3.1415) + + +class Test_bool_from_json(unittest.TestCase): + + def _callFUT(self, value, field): + from google.cloud.bigquery._helpers import _bool_from_json + return _bool_from_json(value, field) + + def test_w_none_nullable(self): + self.assertIsNone(self._callFUT(None, _Field('NULLABLE'))) + + def test_w_none_required(self): + with self.assertRaises(AttributeError): + self._callFUT(None, _Field('REQUIRED')) + + def test_w_value_t(self): + coerced = self._callFUT('T', object()) + self.assertTrue(coerced) + + def test_w_value_true(self): + coerced = self._callFUT('True', object()) + self.assertTrue(coerced) + + def test_w_value_1(self): + coerced = self._callFUT('1', object()) + self.assertTrue(coerced) + + def test_w_value_other(self): + coerced = self._callFUT('f', object()) + self.assertFalse(coerced) + + +class Test_datetime_from_json(unittest.TestCase): + + def _callFUT(self, value, field): + from google.cloud.bigquery._helpers import _datetime_from_json + return _datetime_from_json(value, field) + + def test_w_none_nullable(self): + self.assertIsNone(self._callFUT(None, _Field('NULLABLE'))) + + def test_w_none_required(self): + with self.assertRaises(TypeError): + self._callFUT(None, _Field('REQUIRED')) + + def test_w_string_value(self): + import datetime + from google.cloud._helpers import _EPOCH + coerced = self._callFUT('1.234567', object()) + self.assertEqual( + coerced, + _EPOCH + datetime.timedelta(seconds=1, microseconds=234567)) + + def test_w_float_value(self): + import datetime + from google.cloud._helpers import _EPOCH + coerced = self._callFUT(1.234567, object()) + self.assertEqual( + coerced, + _EPOCH + datetime.timedelta(seconds=1, microseconds=234567)) + + +class Test_date_from_json(unittest.TestCase): + + def _callFUT(self, value, field): + from google.cloud.bigquery._helpers import _date_from_json + return _date_from_json(value, field) + + def test_w_none_nullable(self): + self.assertIsNone(self._callFUT(None, _Field('NULLABLE'))) + + def test_w_none_required(self): + with self.assertRaises(TypeError): + self._callFUT(None, _Field('REQUIRED')) + + def test_w_string_value(self): + import datetime + coerced = self._callFUT('1987-09-22', object()) + self.assertEqual( + coerced, + datetime.date(1987, 9, 22)) + + +class Test_record_from_json(unittest.TestCase): + + def _callFUT(self, value, field): + from google.cloud.bigquery._helpers import _record_from_json + return _record_from_json(value, field) + + def test_w_none_nullable(self): + self.assertIsNone(self._callFUT(None, _Field('NULLABLE'))) + + def test_w_none_required(self): + with self.assertRaises(TypeError): + self._callFUT(None, _Field('REQUIRED')) + + def test_w_nullable_subfield_none(self): + subfield = _Field('NULLABLE', 'age', 'INTEGER') + field = _Field('REQUIRED', fields=[subfield]) + value = {'f': [{'v': None}]} + coerced = self._callFUT(value, field) + self.assertEqual(coerced, {'age': None}) + + def test_w_scalar_subfield(self): + subfield = _Field('REQUIRED', 'age', 'INTEGER') + field = _Field('REQUIRED', fields=[subfield]) + value = {'f': [{'v': 42}]} + coerced = self._callFUT(value, field) + self.assertEqual(coerced, {'age': 42}) + + def test_w_repeated_subfield(self): + subfield = _Field('REPEATED', 'color', 'STRING') + field = _Field('REQUIRED', fields=[subfield]) + value = {'f': [{'v': ['red', 'yellow', 'blue']}]} + coerced = self._callFUT(value, field) + self.assertEqual(coerced, {'color': ['red', 'yellow', 'blue']}) + + def test_w_record_subfield(self): + full_name = _Field('REQUIRED', 'full_name', 'STRING') + area_code = _Field('REQUIRED', 'area_code', 'STRING') + local_number = _Field('REQUIRED', 'local_number', 'STRING') + rank = _Field('REQUIRED', 'rank', 'INTEGER') + phone = _Field('NULLABLE', 'phone', 'RECORD', + fields=[area_code, local_number, rank]) + person = _Field('REQUIRED', 'person', 'RECORD', + fields=[full_name, phone]) + value = { + 'f': [ + {'v': 'Phred Phlyntstone'}, + {'v': {'f': [{'v': '800'}, {'v': '555-1212'}, {'v': 1}]}}, + ], + } + expected = { + 'full_name': 'Phred Phlyntstone', + 'phone': { + 'area_code': '800', + 'local_number': '555-1212', + 'rank': 1, + } + } + coerced = self._callFUT(value, person) + self.assertEqual(coerced, expected) + + +class Test_string_from_json(unittest.TestCase): + + def _callFUT(self, value, field): + from google.cloud.bigquery._helpers import _string_from_json + return _string_from_json(value, field) + + def test_w_none_nullable(self): + self.assertIsNone(self._callFUT(None, _Field('NULLABLE'))) + + def test_w_none_required(self): + self.assertIsNone(self._callFUT(None, _Field('RECORD'))) + + def test_w_string_value(self): + coerced = self._callFUT('Wonderful!', object()) + self.assertEqual(coerced, 'Wonderful!') + + +class Test_rows_from_json(unittest.TestCase): + + def _callFUT(self, value, field): + from google.cloud.bigquery._helpers import _rows_from_json + return _rows_from_json(value, field) + + def test_w_record_subfield(self): + full_name = _Field('REQUIRED', 'full_name', 'STRING') + area_code = _Field('REQUIRED', 'area_code', 'STRING') + local_number = _Field('REQUIRED', 'local_number', 'STRING') + rank = _Field('REQUIRED', 'rank', 'INTEGER') + phone = _Field('NULLABLE', 'phone', 'RECORD', + fields=[area_code, local_number, rank]) + color = _Field('REPEATED', 'color', 'STRING') + schema = [full_name, phone, color] + rows = [ + {'f': [ + {'v': 'Phred Phlyntstone'}, + {'v': {'f': [{'v': '800'}, {'v': '555-1212'}, {'v': 1}]}}, + {'v': ['orange', 'black']}, + ]}, + {'f': [ + {'v': 'Bharney Rhubble'}, + {'v': {'f': [{'v': '877'}, {'v': '768-5309'}, {'v': 2}]}}, + {'v': ['brown']}, + ]}, + {'f': [ + {'v': 'Wylma Phlyntstone'}, + {'v': None}, + {'v': []}, + ]}, + ] + phred_phone = { + 'area_code': '800', + 'local_number': '555-1212', + 'rank': 1, + } + bharney_phone = { + 'area_code': '877', + 'local_number': '768-5309', + 'rank': 2, + } + expected = [ + ('Phred Phlyntstone', phred_phone, ['orange', 'black']), + ('Bharney Rhubble', bharney_phone, ['brown']), + ('Wylma Phlyntstone', None, []), + ] + coerced = self._callFUT(rows, schema) + self.assertEqual(coerced, expected) + + def test_w_int64_float64(self): + # "Standard" SQL dialect uses 'INT64', 'FLOAT64'. + candidate = _Field('REQUIRED', 'candidate', 'STRING') + votes = _Field('REQUIRED', 'votes', 'INT64') + percentage = _Field('REQUIRED', 'percentage', 'FLOAT64') + schema = [candidate, votes, percentage] + rows = [ + {'f': [ + {'v': 'Phred Phlyntstone'}, + {'v': 8}, + {'v': 0.25}, + ]}, + {'f': [ + {'v': 'Bharney Rhubble'}, + {'v': 4}, + {'v': 0.125}, + ]}, + {'f': [ + {'v': 'Wylma Phlyntstone'}, + {'v': 20}, + {'v': 0.625}, + ]}, + ] + expected = [ + ('Phred Phlyntstone', 8, 0.25), + ('Bharney Rhubble', 4, 0.125), + ('Wylma Phlyntstone', 20, 0.625), + ] + coerced = self._callFUT(rows, schema) + self.assertEqual(coerced, expected) + + +class Test_ConfigurationProperty(unittest.TestCase): + + def _getTargetClass(self): + from google.cloud.bigquery._helpers import _ConfigurationProperty + return _ConfigurationProperty + + def _makeOne(self, *args, **kw): + return self._getTargetClass()(*args, **kw) + + def test_it(self): + + class Configuration(object): + _attr = None + + class Wrapper(object): + attr = self._makeOne('attr') + + def __init__(self): + self._configuration = Configuration() + + self.assertEqual(Wrapper.attr.name, 'attr') + + wrapper = Wrapper() + self.assertIsNone(wrapper.attr) + + value = object() + wrapper.attr = value + self.assertIs(wrapper.attr, value) + self.assertIs(wrapper._configuration._attr, value) + + del wrapper.attr + self.assertIsNone(wrapper.attr) + self.assertIsNone(wrapper._configuration._attr) + + +class Test_TypedProperty(unittest.TestCase): + + def _getTargetClass(self): + from google.cloud.bigquery._helpers import _TypedProperty + return _TypedProperty + + def _makeOne(self, *args, **kw): + return self._getTargetClass()(*args, **kw) + + def test_it(self): + + class Configuration(object): + _attr = None + + class Wrapper(object): + attr = self._makeOne('attr', int) + + def __init__(self): + self._configuration = Configuration() + + wrapper = Wrapper() + with self.assertRaises(ValueError): + wrapper.attr = 'BOGUS' + + wrapper.attr = 42 + self.assertEqual(wrapper.attr, 42) + self.assertEqual(wrapper._configuration._attr, 42) + + del wrapper.attr + self.assertIsNone(wrapper.attr) + self.assertIsNone(wrapper._configuration._attr) + + +class Test_EnumProperty(unittest.TestCase): + + def _getTargetClass(self): + from google.cloud.bigquery._helpers import _EnumProperty + return _EnumProperty + + def test_it(self): + + class Sub(self._getTargetClass()): + ALLOWED = ('FOO', 'BAR', 'BAZ') + + class Configuration(object): + _attr = None + + class Wrapper(object): + attr = Sub('attr') + + def __init__(self): + self._configuration = Configuration() + + wrapper = Wrapper() + with self.assertRaises(ValueError): + wrapper.attr = 'BOGUS' + + wrapper.attr = 'FOO' + self.assertEqual(wrapper.attr, 'FOO') + self.assertEqual(wrapper._configuration._attr, 'FOO') + + del wrapper.attr + self.assertIsNone(wrapper.attr) + self.assertIsNone(wrapper._configuration._attr) + + +class Test_UDFResourcesProperty(unittest.TestCase): + + def _getTargetClass(self): + from google.cloud.bigquery._helpers import UDFResourcesProperty + return UDFResourcesProperty + + def _makeOne(self, *args, **kw): + return self._getTargetClass()(*args, **kw) + + def _descriptor_and_klass(self): + descriptor = self._makeOne() + + class _Test(object): + _udf_resources = () + udf_resources = descriptor + + return descriptor, _Test + + def test_class_getter(self): + descriptor, klass = self._descriptor_and_klass() + self.assertIs(klass.udf_resources, descriptor) + + def test_instance_getter_empty(self): + _, klass = self._descriptor_and_klass() + instance = klass() + self.assertEqual(instance.udf_resources, []) + + def test_instance_getter_w_non_empty_list(self): + from google.cloud.bigquery._helpers import UDFResource + RESOURCE_URI = 'gs://some-bucket/js/lib.js' + udf_resources = [UDFResource("resourceUri", RESOURCE_URI)] + _, klass = self._descriptor_and_klass() + instance = klass() + instance._udf_resources = tuple(udf_resources) + + self.assertEqual(instance.udf_resources, udf_resources) + + def test_instance_setter_w_empty_list(self): + from google.cloud.bigquery._helpers import UDFResource + RESOURCE_URI = 'gs://some-bucket/js/lib.js' + udf_resources = [UDFResource("resourceUri", RESOURCE_URI)] + _, klass = self._descriptor_and_klass() + instance = klass() + instance._udf_resources = udf_resources + + instance.udf_resources = [] + + self.assertEqual(instance.udf_resources, []) + + def test_instance_setter_w_valid_udf(self): + from google.cloud.bigquery._helpers import UDFResource + RESOURCE_URI = 'gs://some-bucket/js/lib.js' + udf_resources = [UDFResource("resourceUri", RESOURCE_URI)] + _, klass = self._descriptor_and_klass() + instance = klass() + + instance.udf_resources = udf_resources + + self.assertEqual(instance.udf_resources, udf_resources) + + def test_instance_setter_w_bad_udfs(self): + _, klass = self._descriptor_and_klass() + instance = klass() + + with self.assertRaises(ValueError): + instance.udf_resources = ["foo"] + + self.assertEqual(instance.udf_resources, []) + + +class _Field(object): + + def __init__(self, mode, name='unknown', field_type='UNKNOWN', fields=()): + self.mode = mode + self.name = name + self.field_type = field_type + self.fields = fields diff --git a/packages/google-cloud-bigquery/unit_tests/test_client.py b/packages/google-cloud-bigquery/unit_tests/test_client.py new file mode 100644 index 000000000000..19892ce77e72 --- /dev/null +++ b/packages/google-cloud-bigquery/unit_tests/test_client.py @@ -0,0 +1,492 @@ +# Copyright 2015 Google Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import unittest + + +class TestClient(unittest.TestCase): + + def _getTargetClass(self): + from google.cloud.bigquery.client import Client + return Client + + def _makeOne(self, *args, **kw): + return self._getTargetClass()(*args, **kw) + + def test_ctor(self): + from google.cloud.bigquery.connection import Connection + PROJECT = 'PROJECT' + creds = _Credentials() + http = object() + client = self._makeOne(project=PROJECT, credentials=creds, http=http) + self.assertIsInstance(client.connection, Connection) + self.assertIs(client.connection.credentials, creds) + self.assertIs(client.connection.http, http) + + def test_list_projects_defaults(self): + from google.cloud.bigquery.client import Project + PROJECT_1 = 'PROJECT_ONE' + PROJECT_2 = 'PROJECT_TWO' + PATH = 'projects' + TOKEN = 'TOKEN' + DATA = { + 'nextPageToken': TOKEN, + 'projects': [ + {'kind': 'bigquery#project', + 'id': PROJECT_1, + 'numericId': 1, + 'projectReference': {'projectId': PROJECT_1}, + 'friendlyName': 'One'}, + {'kind': 'bigquery#project', + 'id': PROJECT_2, + 'numericId': 2, + 'projectReference': {'projectId': PROJECT_2}, + 'friendlyName': 'Two'}, + ] + } + creds = _Credentials() + client = self._makeOne(PROJECT_1, creds) + conn = client.connection = _Connection(DATA) + + projects, token = client.list_projects() + + self.assertEqual(len(projects), len(DATA['projects'])) + for found, expected in zip(projects, DATA['projects']): + self.assertIsInstance(found, Project) + self.assertEqual(found.project_id, expected['id']) + self.assertEqual(found.numeric_id, expected['numericId']) + self.assertEqual(found.friendly_name, expected['friendlyName']) + self.assertEqual(token, TOKEN) + + self.assertEqual(len(conn._requested), 1) + req = conn._requested[0] + self.assertEqual(req['method'], 'GET') + self.assertEqual(req['path'], '/%s' % PATH) + + def test_list_projects_explicit_response_missing_projects_key(self): + PROJECT = 'PROJECT' + PATH = 'projects' + TOKEN = 'TOKEN' + DATA = {} + creds = _Credentials() + client = self._makeOne(PROJECT, creds) + conn = client.connection = _Connection(DATA) + + projects, token = client.list_projects(max_results=3, page_token=TOKEN) + + self.assertEqual(len(projects), 0) + self.assertIsNone(token) + + self.assertEqual(len(conn._requested), 1) + req = conn._requested[0] + self.assertEqual(req['method'], 'GET') + self.assertEqual(req['path'], '/%s' % PATH) + self.assertEqual(req['query_params'], + {'maxResults': 3, 'pageToken': TOKEN}) + + def test_list_datasets_defaults(self): + from google.cloud.bigquery.dataset import Dataset + PROJECT = 'PROJECT' + DATASET_1 = 'dataset_one' + DATASET_2 = 'dataset_two' + PATH = 'projects/%s/datasets' % PROJECT + TOKEN = 'TOKEN' + DATA = { + 'nextPageToken': TOKEN, + 'datasets': [ + {'kind': 'bigquery#dataset', + 'id': '%s:%s' % (PROJECT, DATASET_1), + 'datasetReference': {'datasetId': DATASET_1, + 'projectId': PROJECT}, + 'friendlyName': None}, + {'kind': 'bigquery#dataset', + 'id': '%s:%s' % (PROJECT, DATASET_2), + 'datasetReference': {'datasetId': DATASET_2, + 'projectId': PROJECT}, + 'friendlyName': 'Two'}, + ] + } + creds = _Credentials() + client = self._makeOne(PROJECT, creds) + conn = client.connection = _Connection(DATA) + + datasets, token = client.list_datasets() + + self.assertEqual(len(datasets), len(DATA['datasets'])) + for found, expected in zip(datasets, DATA['datasets']): + self.assertIsInstance(found, Dataset) + self.assertEqual(found.dataset_id, expected['id']) + self.assertEqual(found.friendly_name, expected['friendlyName']) + self.assertEqual(token, TOKEN) + + self.assertEqual(len(conn._requested), 1) + req = conn._requested[0] + self.assertEqual(req['method'], 'GET') + self.assertEqual(req['path'], '/%s' % PATH) + + def test_list_datasets_explicit_response_missing_datasets_key(self): + PROJECT = 'PROJECT' + PATH = 'projects/%s/datasets' % PROJECT + TOKEN = 'TOKEN' + DATA = {} + creds = _Credentials() + client = self._makeOne(PROJECT, creds) + conn = client.connection = _Connection(DATA) + + datasets, token = client.list_datasets( + include_all=True, max_results=3, page_token=TOKEN) + + self.assertEqual(len(datasets), 0) + self.assertIsNone(token) + + self.assertEqual(len(conn._requested), 1) + req = conn._requested[0] + self.assertEqual(req['method'], 'GET') + self.assertEqual(req['path'], '/%s' % PATH) + self.assertEqual(req['query_params'], + {'all': True, 'maxResults': 3, 'pageToken': TOKEN}) + + def test_dataset(self): + from google.cloud.bigquery.dataset import Dataset + PROJECT = 'PROJECT' + DATASET = 'dataset_name' + creds = _Credentials() + http = object() + client = self._makeOne(project=PROJECT, credentials=creds, http=http) + dataset = client.dataset(DATASET) + self.assertIsInstance(dataset, Dataset) + self.assertEqual(dataset.name, DATASET) + self.assertIs(dataset._client, client) + + def test_job_from_resource_unknown_type(self): + PROJECT = 'PROJECT' + creds = _Credentials() + client = self._makeOne(PROJECT, creds) + with self.assertRaises(ValueError): + client.job_from_resource({'configuration': {'nonesuch': {}}}) + + def test_list_jobs_defaults(self): + from google.cloud.bigquery.job import LoadTableFromStorageJob + from google.cloud.bigquery.job import CopyJob + from google.cloud.bigquery.job import ExtractTableToStorageJob + from google.cloud.bigquery.job import QueryJob + PROJECT = 'PROJECT' + DATASET = 'test_dataset' + SOURCE_TABLE = 'source_table' + DESTINATION_TABLE = 'destination_table' + QUERY_DESTINATION_TABLE = 'query_destination_table' + SOURCE_URI = 'gs://test_bucket/src_object*' + DESTINATION_URI = 'gs://test_bucket/dst_object*' + JOB_TYPES = { + 'load_job': LoadTableFromStorageJob, + 'copy_job': CopyJob, + 'extract_job': ExtractTableToStorageJob, + 'query_job': QueryJob, + } + PATH = 'projects/%s/jobs' % PROJECT + TOKEN = 'TOKEN' + QUERY = 'SELECT * from test_dataset:test_table' + ASYNC_QUERY_DATA = { + 'id': '%s:%s' % (PROJECT, 'query_job'), + 'jobReference': { + 'projectId': PROJECT, + 'jobId': 'query_job', + }, + 'state': 'DONE', + 'configuration': { + 'query': { + 'query': QUERY, + 'destinationTable': { + 'projectId': PROJECT, + 'datasetId': DATASET, + 'tableId': QUERY_DESTINATION_TABLE, + }, + 'createDisposition': 'CREATE_IF_NEEDED', + 'writeDisposition': 'WRITE_TRUNCATE', + } + }, + } + EXTRACT_DATA = { + 'id': '%s:%s' % (PROJECT, 'extract_job'), + 'jobReference': { + 'projectId': PROJECT, + 'jobId': 'extract_job', + }, + 'state': 'DONE', + 'configuration': { + 'extract': { + 'sourceTable': { + 'projectId': PROJECT, + 'datasetId': DATASET, + 'tableId': SOURCE_TABLE, + }, + 'destinationUris': [DESTINATION_URI], + } + }, + } + COPY_DATA = { + 'id': '%s:%s' % (PROJECT, 'copy_job'), + 'jobReference': { + 'projectId': PROJECT, + 'jobId': 'copy_job', + }, + 'state': 'DONE', + 'configuration': { + 'copy': { + 'sourceTables': [{ + 'projectId': PROJECT, + 'datasetId': DATASET, + 'tableId': SOURCE_TABLE, + }], + 'destinationTable': { + 'projectId': PROJECT, + 'datasetId': DATASET, + 'tableId': DESTINATION_TABLE, + }, + } + }, + } + LOAD_DATA = { + 'id': '%s:%s' % (PROJECT, 'load_job'), + 'jobReference': { + 'projectId': PROJECT, + 'jobId': 'load_job', + }, + 'state': 'DONE', + 'configuration': { + 'load': { + 'destinationTable': { + 'projectId': PROJECT, + 'datasetId': DATASET, + 'tableId': SOURCE_TABLE, + }, + 'sourceUris': [SOURCE_URI], + } + }, + } + DATA = { + 'nextPageToken': TOKEN, + 'jobs': [ + ASYNC_QUERY_DATA, + EXTRACT_DATA, + COPY_DATA, + LOAD_DATA, + ] + } + creds = _Credentials() + client = self._makeOne(PROJECT, creds) + conn = client.connection = _Connection(DATA) + + jobs, token = client.list_jobs() + + self.assertEqual(len(jobs), len(DATA['jobs'])) + for found, expected in zip(jobs, DATA['jobs']): + name = expected['jobReference']['jobId'] + self.assertIsInstance(found, JOB_TYPES[name]) + self.assertEqual(found.name, name) + self.assertEqual(token, TOKEN) + + self.assertEqual(len(conn._requested), 1) + req = conn._requested[0] + self.assertEqual(req['method'], 'GET') + self.assertEqual(req['path'], '/%s' % PATH) + self.assertEqual(req['query_params'], {'projection': 'full'}) + + def test_list_jobs_load_job_wo_sourceUris(self): + from google.cloud.bigquery.job import LoadTableFromStorageJob + PROJECT = 'PROJECT' + DATASET = 'test_dataset' + SOURCE_TABLE = 'source_table' + JOB_TYPES = { + 'load_job': LoadTableFromStorageJob, + } + PATH = 'projects/%s/jobs' % PROJECT + TOKEN = 'TOKEN' + LOAD_DATA = { + 'id': '%s:%s' % (PROJECT, 'load_job'), + 'jobReference': { + 'projectId': PROJECT, + 'jobId': 'load_job', + }, + 'state': 'DONE', + 'configuration': { + 'load': { + 'destinationTable': { + 'projectId': PROJECT, + 'datasetId': DATASET, + 'tableId': SOURCE_TABLE, + }, + } + }, + } + DATA = { + 'nextPageToken': TOKEN, + 'jobs': [ + LOAD_DATA, + ] + } + creds = _Credentials() + client = self._makeOne(PROJECT, creds) + conn = client.connection = _Connection(DATA) + + jobs, token = client.list_jobs() + + self.assertEqual(len(jobs), len(DATA['jobs'])) + for found, expected in zip(jobs, DATA['jobs']): + name = expected['jobReference']['jobId'] + self.assertIsInstance(found, JOB_TYPES[name]) + self.assertEqual(found.name, name) + self.assertEqual(token, TOKEN) + + self.assertEqual(len(conn._requested), 1) + req = conn._requested[0] + self.assertEqual(req['method'], 'GET') + self.assertEqual(req['path'], '/%s' % PATH) + self.assertEqual(req['query_params'], {'projection': 'full'}) + + def test_list_jobs_explicit_missing(self): + PROJECT = 'PROJECT' + PATH = 'projects/%s/jobs' % PROJECT + DATA = {} + TOKEN = 'TOKEN' + creds = _Credentials() + client = self._makeOne(PROJECT, creds) + conn = client.connection = _Connection(DATA) + + jobs, token = client.list_jobs(max_results=1000, page_token=TOKEN, + all_users=True, state_filter='done') + + self.assertEqual(len(jobs), 0) + self.assertIsNone(token) + + self.assertEqual(len(conn._requested), 1) + req = conn._requested[0] + self.assertEqual(req['method'], 'GET') + self.assertEqual(req['path'], '/%s' % PATH) + self.assertEqual(req['query_params'], + {'projection': 'full', + 'maxResults': 1000, + 'pageToken': TOKEN, + 'allUsers': True, + 'stateFilter': 'done'}) + + def test_load_table_from_storage(self): + from google.cloud.bigquery.job import LoadTableFromStorageJob + PROJECT = 'PROJECT' + JOB = 'job_name' + DATASET = 'dataset_name' + DESTINATION = 'destination_table' + SOURCE_URI = 'http://example.com/source.csv' + creds = _Credentials() + http = object() + client = self._makeOne(project=PROJECT, credentials=creds, http=http) + dataset = client.dataset(DATASET) + destination = dataset.table(DESTINATION) + job = client.load_table_from_storage(JOB, destination, SOURCE_URI) + self.assertIsInstance(job, LoadTableFromStorageJob) + self.assertIs(job._client, client) + self.assertEqual(job.name, JOB) + self.assertEqual(list(job.source_uris), [SOURCE_URI]) + self.assertIs(job.destination, destination) + + def test_copy_table(self): + from google.cloud.bigquery.job import CopyJob + PROJECT = 'PROJECT' + JOB = 'job_name' + DATASET = 'dataset_name' + SOURCE = 'source_table' + DESTINATION = 'destination_table' + creds = _Credentials() + http = object() + client = self._makeOne(project=PROJECT, credentials=creds, http=http) + dataset = client.dataset(DATASET) + source = dataset.table(SOURCE) + destination = dataset.table(DESTINATION) + job = client.copy_table(JOB, destination, source) + self.assertIsInstance(job, CopyJob) + self.assertIs(job._client, client) + self.assertEqual(job.name, JOB) + self.assertEqual(list(job.sources), [source]) + self.assertIs(job.destination, destination) + + def test_extract_table_to_storage(self): + from google.cloud.bigquery.job import ExtractTableToStorageJob + PROJECT = 'PROJECT' + JOB = 'job_name' + DATASET = 'dataset_name' + SOURCE = 'source_table' + DESTINATION = 'gs://bucket_name/object_name' + creds = _Credentials() + http = object() + client = self._makeOne(project=PROJECT, credentials=creds, http=http) + dataset = client.dataset(DATASET) + source = dataset.table(SOURCE) + job = client.extract_table_to_storage(JOB, source, DESTINATION) + self.assertIsInstance(job, ExtractTableToStorageJob) + self.assertIs(job._client, client) + self.assertEqual(job.name, JOB) + self.assertEqual(job.source, source) + self.assertEqual(list(job.destination_uris), [DESTINATION]) + + def test_run_async_query(self): + from google.cloud.bigquery.job import QueryJob + PROJECT = 'PROJECT' + JOB = 'job_name' + QUERY = 'select count(*) from persons' + creds = _Credentials() + http = object() + client = self._makeOne(project=PROJECT, credentials=creds, http=http) + job = client.run_async_query(JOB, QUERY) + self.assertIsInstance(job, QueryJob) + self.assertIs(job._client, client) + self.assertEqual(job.name, JOB) + self.assertEqual(job.query, QUERY) + + def test_run_sync_query(self): + from google.cloud.bigquery.query import QueryResults + PROJECT = 'PROJECT' + QUERY = 'select count(*) from persons' + creds = _Credentials() + http = object() + client = self._makeOne(project=PROJECT, credentials=creds, http=http) + job = client.run_sync_query(QUERY) + self.assertIsInstance(job, QueryResults) + self.assertIs(job._client, client) + self.assertIsNone(job.name) + self.assertEqual(job.query, QUERY) + + +class _Credentials(object): + + _scopes = None + + @staticmethod + def create_scoped_required(): + return True + + def create_scoped(self, scope): + self._scopes = scope + return self + + +class _Connection(object): + + def __init__(self, *responses): + self._responses = responses + self._requested = [] + + def api_request(self, **kw): + self._requested.append(kw) + response, self._responses = self._responses[0], self._responses[1:] + return response diff --git a/packages/google-cloud-bigquery/unit_tests/test_connection.py b/packages/google-cloud-bigquery/unit_tests/test_connection.py new file mode 100644 index 000000000000..35ec0276e86b --- /dev/null +++ b/packages/google-cloud-bigquery/unit_tests/test_connection.py @@ -0,0 +1,47 @@ +# Copyright 2015 Google Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import unittest + + +class TestConnection(unittest.TestCase): + + def _getTargetClass(self): + from google.cloud.bigquery.connection import Connection + return Connection + + def _makeOne(self, *args, **kw): + return self._getTargetClass()(*args, **kw) + + def test_build_api_url_no_extra_query_params(self): + conn = self._makeOne() + URI = '/'.join([ + conn.API_BASE_URL, + 'bigquery', + conn.API_VERSION, + 'foo', + ]) + self.assertEqual(conn.build_api_url('/foo'), URI) + + def test_build_api_url_w_extra_query_params(self): + from six.moves.urllib.parse import parse_qsl + from six.moves.urllib.parse import urlsplit + conn = self._makeOne() + uri = conn.build_api_url('/foo', {'bar': 'baz'}) + scheme, netloc, path, qs, _ = urlsplit(uri) + self.assertEqual('%s://%s' % (scheme, netloc), conn.API_BASE_URL) + self.assertEqual(path, + '/'.join(['', 'bigquery', conn.API_VERSION, 'foo'])) + parms = dict(parse_qsl(qs)) + self.assertEqual(parms['bar'], 'baz') diff --git a/packages/google-cloud-bigquery/unit_tests/test_dataset.py b/packages/google-cloud-bigquery/unit_tests/test_dataset.py new file mode 100644 index 000000000000..d9b85cf5fad3 --- /dev/null +++ b/packages/google-cloud-bigquery/unit_tests/test_dataset.py @@ -0,0 +1,786 @@ +# Copyright 2015 Google Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import unittest + + +class TestAccessGrant(unittest.TestCase): + + def _getTargetClass(self): + from google.cloud.bigquery.dataset import AccessGrant + return AccessGrant + + def _makeOne(self, *args, **kw): + return self._getTargetClass()(*args, **kw) + + def test_ctor_defaults(self): + grant = self._makeOne('OWNER', 'userByEmail', 'phred@example.com') + self.assertEqual(grant.role, 'OWNER') + self.assertEqual(grant.entity_type, 'userByEmail') + self.assertEqual(grant.entity_id, 'phred@example.com') + + def test_ctor_bad_entity_type(self): + with self.assertRaises(ValueError): + self._makeOne(None, 'unknown', None) + + def test_ctor_view_with_role(self): + role = 'READER' + entity_type = 'view' + with self.assertRaises(ValueError): + self._makeOne(role, entity_type, None) + + def test_ctor_view_success(self): + role = None + entity_type = 'view' + entity_id = object() + grant = self._makeOne(role, entity_type, entity_id) + self.assertEqual(grant.role, role) + self.assertEqual(grant.entity_type, entity_type) + self.assertEqual(grant.entity_id, entity_id) + + def test_ctor_nonview_without_role(self): + role = None + entity_type = 'userByEmail' + with self.assertRaises(ValueError): + self._makeOne(role, entity_type, None) + + def test___eq___role_mismatch(self): + grant = self._makeOne('OWNER', 'userByEmail', 'phred@example.com') + other = self._makeOne('WRITER', 'userByEmail', 'phred@example.com') + self.assertNotEqual(grant, other) + + def test___eq___entity_type_mismatch(self): + grant = self._makeOne('OWNER', 'userByEmail', 'phred@example.com') + other = self._makeOne('OWNER', 'groupByEmail', 'phred@example.com') + self.assertNotEqual(grant, other) + + def test___eq___entity_id_mismatch(self): + grant = self._makeOne('OWNER', 'userByEmail', 'phred@example.com') + other = self._makeOne('OWNER', 'userByEmail', 'bharney@example.com') + self.assertNotEqual(grant, other) + + def test___eq___hit(self): + grant = self._makeOne('OWNER', 'userByEmail', 'phred@example.com') + other = self._makeOne('OWNER', 'userByEmail', 'phred@example.com') + self.assertEqual(grant, other) + + +class TestDataset(unittest.TestCase): + PROJECT = 'project' + DS_NAME = 'dataset-name' + + def _getTargetClass(self): + from google.cloud.bigquery.dataset import Dataset + return Dataset + + def _makeOne(self, *args, **kw): + return self._getTargetClass()(*args, **kw) + + def _setUpConstants(self): + import datetime + from google.cloud._helpers import UTC + + self.WHEN_TS = 1437767599.006 + self.WHEN = datetime.datetime.utcfromtimestamp(self.WHEN_TS).replace( + tzinfo=UTC) + self.ETAG = 'ETAG' + self.DS_ID = '%s:%s' % (self.PROJECT, self.DS_NAME) + self.RESOURCE_URL = 'http://example.com/path/to/resource' + + def _makeResource(self): + self._setUpConstants() + USER_EMAIL = 'phred@example.com' + GROUP_EMAIL = 'group-name@lists.example.com' + return { + 'creationTime': self.WHEN_TS * 1000, + 'datasetReference': + {'projectId': self.PROJECT, 'datasetId': self.DS_NAME}, + 'etag': self.ETAG, + 'id': self.DS_ID, + 'lastModifiedTime': self.WHEN_TS * 1000, + 'location': 'US', + 'selfLink': self.RESOURCE_URL, + 'access': [ + {'role': 'OWNER', 'userByEmail': USER_EMAIL}, + {'role': 'OWNER', 'groupByEmail': GROUP_EMAIL}, + {'role': 'WRITER', 'specialGroup': 'projectWriters'}, + {'role': 'READER', 'specialGroup': 'projectReaders'}], + } + + def _verifyAccessGrants(self, access_grants, resource): + r_grants = [] + for r_grant in resource['access']: + role = r_grant.pop('role') + for entity_type, entity_id in sorted(r_grant.items()): + r_grants.append({'role': role, + 'entity_type': entity_type, + 'entity_id': entity_id}) + + self.assertEqual(len(access_grants), len(r_grants)) + for a_grant, r_grant in zip(access_grants, r_grants): + self.assertEqual(a_grant.role, r_grant['role']) + self.assertEqual(a_grant.entity_type, r_grant['entity_type']) + self.assertEqual(a_grant.entity_id, r_grant['entity_id']) + + def _verifyReadonlyResourceProperties(self, dataset, resource): + + self.assertEqual(dataset.dataset_id, self.DS_ID) + + if 'creationTime' in resource: + self.assertEqual(dataset.created, self.WHEN) + else: + self.assertIsNone(dataset.created) + if 'etag' in resource: + self.assertEqual(dataset.etag, self.ETAG) + else: + self.assertIsNone(dataset.etag) + if 'lastModifiedTime' in resource: + self.assertEqual(dataset.modified, self.WHEN) + else: + self.assertIsNone(dataset.modified) + if 'selfLink' in resource: + self.assertEqual(dataset.self_link, self.RESOURCE_URL) + else: + self.assertIsNone(dataset.self_link) + + def _verifyResourceProperties(self, dataset, resource): + + self._verifyReadonlyResourceProperties(dataset, resource) + + if 'defaultTableExpirationMs' in resource: + self.assertEqual(dataset.default_table_expiration_ms, + int(resource.get('defaultTableExpirationMs'))) + else: + self.assertIsNone(dataset.default_table_expiration_ms) + self.assertEqual(dataset.description, resource.get('description')) + self.assertEqual(dataset.friendly_name, resource.get('friendlyName')) + self.assertEqual(dataset.location, resource.get('location')) + + if 'access' in resource: + self._verifyAccessGrants(dataset.access_grants, resource) + else: + self.assertEqual(dataset.access_grants, []) + + def test_ctor(self): + client = _Client(self.PROJECT) + dataset = self._makeOne(self.DS_NAME, client) + self.assertEqual(dataset.name, self.DS_NAME) + self.assertIs(dataset._client, client) + self.assertEqual(dataset.project, client.project) + self.assertEqual( + dataset.path, + '/projects/%s/datasets/%s' % (self.PROJECT, self.DS_NAME)) + self.assertEqual(dataset.access_grants, []) + + self.assertIsNone(dataset.created) + self.assertIsNone(dataset.dataset_id) + self.assertIsNone(dataset.etag) + self.assertIsNone(dataset.modified) + self.assertIsNone(dataset.self_link) + + self.assertIsNone(dataset.default_table_expiration_ms) + self.assertIsNone(dataset.description) + self.assertIsNone(dataset.friendly_name) + self.assertIsNone(dataset.location) + + def test_access_roles_setter_non_list(self): + client = _Client(self.PROJECT) + dataset = self._makeOne(self.DS_NAME, client) + with self.assertRaises(TypeError): + dataset.access_grants = object() + + def test_access_roles_setter_invalid_field(self): + from google.cloud.bigquery.dataset import AccessGrant + client = _Client(self.PROJECT) + dataset = self._makeOne(self.DS_NAME, client) + phred = AccessGrant('OWNER', 'userByEmail', 'phred@example.com') + with self.assertRaises(ValueError): + dataset.access_grants = [phred, object()] + + def test_access_roles_setter(self): + from google.cloud.bigquery.dataset import AccessGrant + client = _Client(self.PROJECT) + dataset = self._makeOne(self.DS_NAME, client) + phred = AccessGrant('OWNER', 'userByEmail', 'phred@example.com') + bharney = AccessGrant('OWNER', 'userByEmail', 'bharney@example.com') + dataset.access_grants = [phred, bharney] + self.assertEqual(dataset.access_grants, [phred, bharney]) + + def test_default_table_expiration_ms_setter_bad_value(self): + client = _Client(self.PROJECT) + dataset = self._makeOne(self.DS_NAME, client) + with self.assertRaises(ValueError): + dataset.default_table_expiration_ms = 'bogus' + + def test_default_table_expiration_ms_setter(self): + client = _Client(self.PROJECT) + dataset = self._makeOne(self.DS_NAME, client) + dataset.default_table_expiration_ms = 12345 + self.assertEqual(dataset.default_table_expiration_ms, 12345) + + def test_description_setter_bad_value(self): + client = _Client(self.PROJECT) + dataset = self._makeOne(self.DS_NAME, client) + with self.assertRaises(ValueError): + dataset.description = 12345 + + def test_description_setter(self): + client = _Client(self.PROJECT) + dataset = self._makeOne(self.DS_NAME, client) + dataset.description = 'DESCRIPTION' + self.assertEqual(dataset.description, 'DESCRIPTION') + + def test_friendly_name_setter_bad_value(self): + client = _Client(self.PROJECT) + dataset = self._makeOne(self.DS_NAME, client) + with self.assertRaises(ValueError): + dataset.friendly_name = 12345 + + def test_friendly_name_setter(self): + client = _Client(self.PROJECT) + dataset = self._makeOne(self.DS_NAME, client) + dataset.friendly_name = 'FRIENDLY' + self.assertEqual(dataset.friendly_name, 'FRIENDLY') + + def test_location_setter_bad_value(self): + client = _Client(self.PROJECT) + dataset = self._makeOne(self.DS_NAME, client) + with self.assertRaises(ValueError): + dataset.location = 12345 + + def test_location_setter(self): + client = _Client(self.PROJECT) + dataset = self._makeOne(self.DS_NAME, client) + dataset.location = 'LOCATION' + self.assertEqual(dataset.location, 'LOCATION') + + def test_from_api_repr_missing_identity(self): + self._setUpConstants() + client = _Client(self.PROJECT) + RESOURCE = {} + klass = self._getTargetClass() + with self.assertRaises(KeyError): + klass.from_api_repr(RESOURCE, client=client) + + def test_from_api_repr_bare(self): + self._setUpConstants() + client = _Client(self.PROJECT) + RESOURCE = { + 'id': '%s:%s' % (self.PROJECT, self.DS_NAME), + 'datasetReference': { + 'projectId': self.PROJECT, + 'datasetId': self.DS_NAME, + } + } + klass = self._getTargetClass() + dataset = klass.from_api_repr(RESOURCE, client=client) + self.assertIs(dataset._client, client) + self._verifyResourceProperties(dataset, RESOURCE) + + def test_from_api_repr_w_properties(self): + client = _Client(self.PROJECT) + RESOURCE = self._makeResource() + klass = self._getTargetClass() + dataset = klass.from_api_repr(RESOURCE, client=client) + self.assertIs(dataset._client, client) + self._verifyResourceProperties(dataset, RESOURCE) + + def test__parse_access_grants_w_unknown_entity_type(self): + ACCESS = [ + {'role': 'READER', 'unknown': 'UNKNOWN'}, + ] + client = _Client(self.PROJECT) + dataset = self._makeOne(self.DS_NAME, client=client) + with self.assertRaises(ValueError): + dataset._parse_access_grants(ACCESS) + + def test__parse_access_grants_w_extra_keys(self): + USER_EMAIL = 'phred@example.com' + ACCESS = [ + { + 'role': 'READER', + 'specialGroup': 'projectReaders', + 'userByEmail': USER_EMAIL, + }, + ] + client = _Client(self.PROJECT) + dataset = self._makeOne(self.DS_NAME, client=client) + with self.assertRaises(ValueError): + dataset._parse_access_grants(ACCESS) + + def test_create_w_bound_client(self): + PATH = 'projects/%s/datasets' % self.PROJECT + RESOURCE = self._makeResource() + conn = _Connection(RESOURCE) + client = _Client(project=self.PROJECT, connection=conn) + dataset = self._makeOne(self.DS_NAME, client=client) + + dataset.create() + + self.assertEqual(len(conn._requested), 1) + req = conn._requested[0] + self.assertEqual(req['method'], 'POST') + self.assertEqual(req['path'], '/%s' % PATH) + SENT = { + 'datasetReference': + {'projectId': self.PROJECT, 'datasetId': self.DS_NAME}, + } + self.assertEqual(req['data'], SENT) + self._verifyResourceProperties(dataset, RESOURCE) + + def test_create_w_alternate_client(self): + from google.cloud.bigquery.dataset import AccessGrant + PATH = 'projects/%s/datasets' % self.PROJECT + USER_EMAIL = 'phred@example.com' + GROUP_EMAIL = 'group-name@lists.example.com' + DESCRIPTION = 'DESCRIPTION' + TITLE = 'TITLE' + RESOURCE = self._makeResource() + RESOURCE['description'] = DESCRIPTION + RESOURCE['friendlyName'] = TITLE + conn1 = _Connection() + CLIENT1 = _Client(project=self.PROJECT, connection=conn1) + conn2 = _Connection(RESOURCE) + CLIENT2 = _Client(project=self.PROJECT, connection=conn2) + dataset = self._makeOne(self.DS_NAME, client=CLIENT1) + dataset.friendly_name = TITLE + dataset.description = DESCRIPTION + VIEW = { + 'projectId': 'my-proj', + 'datasetId': 'starry-skies', + 'tableId': 'northern-hemisphere', + } + dataset.access_grants = [ + AccessGrant('OWNER', 'userByEmail', USER_EMAIL), + AccessGrant('OWNER', 'groupByEmail', GROUP_EMAIL), + AccessGrant('READER', 'domain', 'foo.com'), + AccessGrant('READER', 'specialGroup', 'projectReaders'), + AccessGrant('WRITER', 'specialGroup', 'projectWriters'), + AccessGrant(None, 'view', VIEW), + ] + + dataset.create(client=CLIENT2) + + self.assertEqual(len(conn1._requested), 0) + self.assertEqual(len(conn2._requested), 1) + req = conn2._requested[0] + self.assertEqual(req['method'], 'POST') + self.assertEqual(req['path'], '/%s' % PATH) + SENT = { + 'datasetReference': { + 'projectId': self.PROJECT, + 'datasetId': self.DS_NAME, + }, + 'description': DESCRIPTION, + 'friendlyName': TITLE, + 'access': [ + {'role': 'OWNER', 'userByEmail': USER_EMAIL}, + {'role': 'OWNER', 'groupByEmail': GROUP_EMAIL}, + {'role': 'READER', 'domain': 'foo.com'}, + {'role': 'READER', 'specialGroup': 'projectReaders'}, + {'role': 'WRITER', 'specialGroup': 'projectWriters'}, + {'view': VIEW}, + ], + } + self.assertEqual(req['data'], SENT) + self._verifyResourceProperties(dataset, RESOURCE) + + def test_create_w_missing_output_properties(self): + # In the wild, the resource returned from 'dataset.create' sometimes + # lacks 'creationTime' / 'lastModifiedTime' + PATH = 'projects/%s/datasets' % (self.PROJECT,) + RESOURCE = self._makeResource() + del RESOURCE['creationTime'] + del RESOURCE['lastModifiedTime'] + self.WHEN = None + conn = _Connection(RESOURCE) + client = _Client(project=self.PROJECT, connection=conn) + dataset = self._makeOne(self.DS_NAME, client=client) + + dataset.create() + + self.assertEqual(len(conn._requested), 1) + req = conn._requested[0] + self.assertEqual(req['method'], 'POST') + self.assertEqual(req['path'], '/%s' % PATH) + SENT = { + 'datasetReference': + {'projectId': self.PROJECT, 'datasetId': self.DS_NAME}, + } + self.assertEqual(req['data'], SENT) + self._verifyResourceProperties(dataset, RESOURCE) + + def test_exists_miss_w_bound_client(self): + PATH = 'projects/%s/datasets/%s' % (self.PROJECT, self.DS_NAME) + conn = _Connection() + client = _Client(project=self.PROJECT, connection=conn) + dataset = self._makeOne(self.DS_NAME, client=client) + + self.assertFalse(dataset.exists()) + + self.assertEqual(len(conn._requested), 1) + req = conn._requested[0] + self.assertEqual(req['method'], 'GET') + self.assertEqual(req['path'], '/%s' % PATH) + self.assertEqual(req['query_params'], {'fields': 'id'}) + + def test_exists_hit_w_alternate_client(self): + PATH = 'projects/%s/datasets/%s' % (self.PROJECT, self.DS_NAME) + conn1 = _Connection() + CLIENT1 = _Client(project=self.PROJECT, connection=conn1) + conn2 = _Connection({}) + CLIENT2 = _Client(project=self.PROJECT, connection=conn2) + dataset = self._makeOne(self.DS_NAME, client=CLIENT1) + + self.assertTrue(dataset.exists(client=CLIENT2)) + + self.assertEqual(len(conn1._requested), 0) + self.assertEqual(len(conn2._requested), 1) + req = conn2._requested[0] + self.assertEqual(req['method'], 'GET') + self.assertEqual(req['path'], '/%s' % PATH) + self.assertEqual(req['query_params'], {'fields': 'id'}) + + def test_reload_w_bound_client(self): + PATH = 'projects/%s/datasets/%s' % (self.PROJECT, self.DS_NAME) + RESOURCE = self._makeResource() + conn = _Connection(RESOURCE) + client = _Client(project=self.PROJECT, connection=conn) + dataset = self._makeOne(self.DS_NAME, client=client) + + dataset.reload() + + self.assertEqual(len(conn._requested), 1) + req = conn._requested[0] + self.assertEqual(req['method'], 'GET') + self.assertEqual(req['path'], '/%s' % PATH) + self._verifyResourceProperties(dataset, RESOURCE) + + def test_reload_w_alternate_client(self): + PATH = 'projects/%s/datasets/%s' % (self.PROJECT, self.DS_NAME) + RESOURCE = self._makeResource() + conn1 = _Connection() + CLIENT1 = _Client(project=self.PROJECT, connection=conn1) + conn2 = _Connection(RESOURCE) + CLIENT2 = _Client(project=self.PROJECT, connection=conn2) + dataset = self._makeOne(self.DS_NAME, client=CLIENT1) + + dataset.reload(client=CLIENT2) + + self.assertEqual(len(conn1._requested), 0) + self.assertEqual(len(conn2._requested), 1) + req = conn2._requested[0] + self.assertEqual(req['method'], 'GET') + self.assertEqual(req['path'], '/%s' % PATH) + self._verifyResourceProperties(dataset, RESOURCE) + + def test_patch_w_invalid_expiration(self): + RESOURCE = self._makeResource() + conn = _Connection(RESOURCE) + client = _Client(project=self.PROJECT, connection=conn) + dataset = self._makeOne(self.DS_NAME, client=client) + + with self.assertRaises(ValueError): + dataset.patch(default_table_expiration_ms='BOGUS') + + def test_patch_w_bound_client(self): + PATH = 'projects/%s/datasets/%s' % (self.PROJECT, self.DS_NAME) + DESCRIPTION = 'DESCRIPTION' + TITLE = 'TITLE' + RESOURCE = self._makeResource() + RESOURCE['description'] = DESCRIPTION + RESOURCE['friendlyName'] = TITLE + conn = _Connection(RESOURCE) + client = _Client(project=self.PROJECT, connection=conn) + dataset = self._makeOne(self.DS_NAME, client=client) + + dataset.patch(description=DESCRIPTION, friendly_name=TITLE) + + self.assertEqual(len(conn._requested), 1) + req = conn._requested[0] + self.assertEqual(req['method'], 'PATCH') + SENT = { + 'description': DESCRIPTION, + 'friendlyName': TITLE, + } + self.assertEqual(req['data'], SENT) + self.assertEqual(req['path'], '/%s' % PATH) + self._verifyResourceProperties(dataset, RESOURCE) + + def test_patch_w_alternate_client(self): + PATH = 'projects/%s/datasets/%s' % (self.PROJECT, self.DS_NAME) + DEF_TABLE_EXP = 12345 + LOCATION = 'EU' + RESOURCE = self._makeResource() + RESOURCE['defaultTableExpirationMs'] = str(DEF_TABLE_EXP) + RESOURCE['location'] = LOCATION + conn1 = _Connection() + CLIENT1 = _Client(project=self.PROJECT, connection=conn1) + conn2 = _Connection(RESOURCE) + CLIENT2 = _Client(project=self.PROJECT, connection=conn2) + dataset = self._makeOne(self.DS_NAME, client=CLIENT1) + + dataset.patch(client=CLIENT2, + default_table_expiration_ms=DEF_TABLE_EXP, + location=LOCATION) + + self.assertEqual(len(conn1._requested), 0) + self.assertEqual(len(conn2._requested), 1) + req = conn2._requested[0] + self.assertEqual(req['method'], 'PATCH') + self.assertEqual(req['path'], '/%s' % PATH) + SENT = { + 'defaultTableExpirationMs': DEF_TABLE_EXP, + 'location': LOCATION, + } + self.assertEqual(req['data'], SENT) + self._verifyResourceProperties(dataset, RESOURCE) + + def test_update_w_bound_client(self): + PATH = 'projects/%s/datasets/%s' % (self.PROJECT, self.DS_NAME) + DESCRIPTION = 'DESCRIPTION' + TITLE = 'TITLE' + RESOURCE = self._makeResource() + RESOURCE['description'] = DESCRIPTION + RESOURCE['friendlyName'] = TITLE + conn = _Connection(RESOURCE) + client = _Client(project=self.PROJECT, connection=conn) + dataset = self._makeOne(self.DS_NAME, client=client) + dataset.description = DESCRIPTION + dataset.friendly_name = TITLE + + dataset.update() + + self.assertEqual(len(conn._requested), 1) + req = conn._requested[0] + self.assertEqual(req['method'], 'PUT') + SENT = { + 'datasetReference': + {'projectId': self.PROJECT, 'datasetId': self.DS_NAME}, + 'description': DESCRIPTION, + 'friendlyName': TITLE, + } + self.assertEqual(req['data'], SENT) + self.assertEqual(req['path'], '/%s' % PATH) + self._verifyResourceProperties(dataset, RESOURCE) + + def test_update_w_alternate_client(self): + PATH = 'projects/%s/datasets/%s' % (self.PROJECT, self.DS_NAME) + DEF_TABLE_EXP = 12345 + LOCATION = 'EU' + RESOURCE = self._makeResource() + RESOURCE['defaultTableExpirationMs'] = 12345 + RESOURCE['location'] = LOCATION + conn1 = _Connection() + CLIENT1 = _Client(project=self.PROJECT, connection=conn1) + conn2 = _Connection(RESOURCE) + CLIENT2 = _Client(project=self.PROJECT, connection=conn2) + dataset = self._makeOne(self.DS_NAME, client=CLIENT1) + dataset.default_table_expiration_ms = DEF_TABLE_EXP + dataset.location = LOCATION + + dataset.update(client=CLIENT2) + + self.assertEqual(len(conn1._requested), 0) + self.assertEqual(len(conn2._requested), 1) + req = conn2._requested[0] + self.assertEqual(req['method'], 'PUT') + self.assertEqual(req['path'], '/%s' % PATH) + SENT = { + 'datasetReference': + {'projectId': self.PROJECT, 'datasetId': self.DS_NAME}, + 'defaultTableExpirationMs': 12345, + 'location': 'EU', + } + self.assertEqual(req['data'], SENT) + self._verifyResourceProperties(dataset, RESOURCE) + + def test_delete_w_bound_client(self): + PATH = 'projects/%s/datasets/%s' % (self.PROJECT, self.DS_NAME) + conn = _Connection({}) + client = _Client(project=self.PROJECT, connection=conn) + dataset = self._makeOne(self.DS_NAME, client=client) + + dataset.delete() + + self.assertEqual(len(conn._requested), 1) + req = conn._requested[0] + self.assertEqual(req['method'], 'DELETE') + self.assertEqual(req['path'], '/%s' % PATH) + + def test_delete_w_alternate_client(self): + PATH = 'projects/%s/datasets/%s' % (self.PROJECT, self.DS_NAME) + conn1 = _Connection() + CLIENT1 = _Client(project=self.PROJECT, connection=conn1) + conn2 = _Connection({}) + CLIENT2 = _Client(project=self.PROJECT, connection=conn2) + dataset = self._makeOne(self.DS_NAME, client=CLIENT1) + + dataset.delete(client=CLIENT2) + + self.assertEqual(len(conn1._requested), 0) + self.assertEqual(len(conn2._requested), 1) + req = conn2._requested[0] + self.assertEqual(req['method'], 'DELETE') + self.assertEqual(req['path'], '/%s' % PATH) + + def test_list_tables_empty(self): + conn = _Connection({}) + client = _Client(project=self.PROJECT, connection=conn) + dataset = self._makeOne(self.DS_NAME, client=client) + tables, token = dataset.list_tables() + self.assertEqual(tables, []) + self.assertIsNone(token) + self.assertEqual(len(conn._requested), 1) + req = conn._requested[0] + self.assertEqual(req['method'], 'GET') + PATH = 'projects/%s/datasets/%s/tables' % (self.PROJECT, self.DS_NAME) + self.assertEqual(req['path'], '/%s' % PATH) + + def test_list_tables_defaults(self): + from google.cloud.bigquery.table import Table + + TABLE_1 = 'table_one' + TABLE_2 = 'table_two' + PATH = 'projects/%s/datasets/%s/tables' % (self.PROJECT, self.DS_NAME) + TOKEN = 'TOKEN' + DATA = { + 'nextPageToken': TOKEN, + 'tables': [ + {'kind': 'bigquery#table', + 'id': '%s:%s.%s' % (self.PROJECT, self.DS_NAME, TABLE_1), + 'tableReference': {'tableId': TABLE_1, + 'datasetId': self.DS_NAME, + 'projectId': self.PROJECT}, + 'type': 'TABLE'}, + {'kind': 'bigquery#table', + 'id': '%s:%s.%s' % (self.PROJECT, self.DS_NAME, TABLE_2), + 'tableReference': {'tableId': TABLE_2, + 'datasetId': self.DS_NAME, + 'projectId': self.PROJECT}, + 'type': 'TABLE'}, + ] + } + + conn = _Connection(DATA) + client = _Client(project=self.PROJECT, connection=conn) + dataset = self._makeOne(self.DS_NAME, client=client) + + tables, token = dataset.list_tables() + + self.assertEqual(len(tables), len(DATA['tables'])) + for found, expected in zip(tables, DATA['tables']): + self.assertIsInstance(found, Table) + self.assertEqual(found.table_id, expected['id']) + self.assertEqual(found.table_type, expected['type']) + self.assertEqual(token, TOKEN) + + self.assertEqual(len(conn._requested), 1) + req = conn._requested[0] + self.assertEqual(req['method'], 'GET') + self.assertEqual(req['path'], '/%s' % PATH) + + def test_list_tables_explicit(self): + from google.cloud.bigquery.table import Table + + TABLE_1 = 'table_one' + TABLE_2 = 'table_two' + PATH = 'projects/%s/datasets/%s/tables' % (self.PROJECT, self.DS_NAME) + TOKEN = 'TOKEN' + DATA = { + 'tables': [ + {'kind': 'bigquery#dataset', + 'id': '%s:%s.%s' % (self.PROJECT, self.DS_NAME, TABLE_1), + 'tableReference': {'tableId': TABLE_1, + 'datasetId': self.DS_NAME, + 'projectId': self.PROJECT}, + 'type': 'TABLE'}, + {'kind': 'bigquery#dataset', + 'id': '%s:%s.%s' % (self.PROJECT, self.DS_NAME, TABLE_2), + 'tableReference': {'tableId': TABLE_2, + 'datasetId': self.DS_NAME, + 'projectId': self.PROJECT}, + 'type': 'TABLE'}, + ] + } + + conn = _Connection(DATA) + client = _Client(project=self.PROJECT, connection=conn) + dataset = self._makeOne(self.DS_NAME, client=client) + + tables, token = dataset.list_tables(max_results=3, page_token=TOKEN) + + self.assertEqual(len(tables), len(DATA['tables'])) + for found, expected in zip(tables, DATA['tables']): + self.assertIsInstance(found, Table) + self.assertEqual(found.table_id, expected['id']) + self.assertEqual(found.table_type, expected['type']) + self.assertIsNone(token) + + self.assertEqual(len(conn._requested), 1) + req = conn._requested[0] + self.assertEqual(req['method'], 'GET') + self.assertEqual(req['path'], '/%s' % PATH) + self.assertEqual(req['query_params'], + {'maxResults': 3, 'pageToken': TOKEN}) + + def test_table_wo_schema(self): + from google.cloud.bigquery.table import Table + conn = _Connection({}) + client = _Client(project=self.PROJECT, connection=conn) + dataset = self._makeOne(self.DS_NAME, client=client) + table = dataset.table('table_name') + self.assertIsInstance(table, Table) + self.assertEqual(table.name, 'table_name') + self.assertIs(table._dataset, dataset) + self.assertEqual(table.schema, []) + + def test_table_w_schema(self): + from google.cloud.bigquery.schema import SchemaField + from google.cloud.bigquery.table import Table + conn = _Connection({}) + client = _Client(project=self.PROJECT, connection=conn) + dataset = self._makeOne(self.DS_NAME, client=client) + full_name = SchemaField('full_name', 'STRING', mode='REQUIRED') + age = SchemaField('age', 'INTEGER', mode='REQUIRED') + table = dataset.table('table_name', schema=[full_name, age]) + self.assertIsInstance(table, Table) + self.assertEqual(table.name, 'table_name') + self.assertIs(table._dataset, dataset) + self.assertEqual(table.schema, [full_name, age]) + + +class _Client(object): + + def __init__(self, project='project', connection=None): + self.project = project + self.connection = connection + + +class _Connection(object): + + def __init__(self, *responses): + self._responses = responses + self._requested = [] + + def api_request(self, **kw): + from google.cloud.exceptions import NotFound + self._requested.append(kw) + + try: + response, self._responses = self._responses[0], self._responses[1:] + except: + raise NotFound('miss') + else: + return response diff --git a/packages/google-cloud-bigquery/unit_tests/test_job.py b/packages/google-cloud-bigquery/unit_tests/test_job.py new file mode 100644 index 000000000000..2ac135c0e739 --- /dev/null +++ b/packages/google-cloud-bigquery/unit_tests/test_job.py @@ -0,0 +1,1688 @@ +# Copyright 2015 Google Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import unittest + + +class _Base(object): + PROJECT = 'project' + SOURCE1 = 'http://example.com/source1.csv' + DS_NAME = 'datset_name' + TABLE_NAME = 'table_name' + JOB_NAME = 'job_name' + + def _makeOne(self, *args, **kw): + return self._getTargetClass()(*args, **kw) + + def _setUpConstants(self): + import datetime + from google.cloud._helpers import UTC + + self.WHEN_TS = 1437767599.006 + self.WHEN = datetime.datetime.utcfromtimestamp(self.WHEN_TS).replace( + tzinfo=UTC) + self.ETAG = 'ETAG' + self.JOB_ID = '%s:%s' % (self.PROJECT, self.JOB_NAME) + self.RESOURCE_URL = 'http://example.com/path/to/resource' + self.USER_EMAIL = 'phred@example.com' + + def _makeResource(self, started=False, ended=False): + self._setUpConstants() + resource = { + 'configuration': { + self.JOB_TYPE: { + }, + }, + 'statistics': { + 'creationTime': self.WHEN_TS * 1000, + self.JOB_TYPE: { + } + }, + 'etag': self.ETAG, + 'id': self.JOB_ID, + 'jobReference': { + 'projectId': self.PROJECT, + 'jobId': self.JOB_NAME, + }, + 'selfLink': self.RESOURCE_URL, + 'user_email': self.USER_EMAIL, + } + + if started or ended: + resource['statistics']['startTime'] = self.WHEN_TS * 1000 + + if ended: + resource['statistics']['endTime'] = (self.WHEN_TS + 1000) * 1000 + + return resource + + def _verifyInitialReadonlyProperties(self, job): + # root elements of resource + self.assertIsNone(job.etag) + self.assertIsNone(job.self_link) + self.assertIsNone(job.user_email) + + # derived from resource['statistics'] + self.assertIsNone(job.created) + self.assertIsNone(job.started) + self.assertIsNone(job.ended) + + # derived from resource['status'] + self.assertIsNone(job.error_result) + self.assertIsNone(job.errors) + self.assertIsNone(job.state) + + def _verifyReadonlyResourceProperties(self, job, resource): + from datetime import timedelta + + statistics = resource.get('statistics', {}) + + if 'creationTime' in statistics: + self.assertEqual(job.created, self.WHEN) + else: + self.assertIsNone(job.created) + + if 'startTime' in statistics: + self.assertEqual(job.started, self.WHEN) + else: + self.assertIsNone(job.started) + + if 'endTime' in statistics: + self.assertEqual(job.ended, self.WHEN + timedelta(seconds=1000)) + else: + self.assertIsNone(job.ended) + + if 'etag' in resource: + self.assertEqual(job.etag, self.ETAG) + else: + self.assertIsNone(job.etag) + + if 'selfLink' in resource: + self.assertEqual(job.self_link, self.RESOURCE_URL) + else: + self.assertIsNone(job.self_link) + + if 'user_email' in resource: + self.assertEqual(job.user_email, self.USER_EMAIL) + else: + self.assertIsNone(job.user_email) + + +class TestLoadTableFromStorageJob(unittest.TestCase, _Base): + JOB_TYPE = 'load' + + def _getTargetClass(self): + from google.cloud.bigquery.job import LoadTableFromStorageJob + return LoadTableFromStorageJob + + def _setUpConstants(self): + super(TestLoadTableFromStorageJob, self)._setUpConstants() + self.INPUT_FILES = 2 + self.INPUT_BYTES = 12345 + self.OUTPUT_BYTES = 23456 + self.OUTPUT_ROWS = 345 + + def _makeResource(self, started=False, ended=False): + resource = super(TestLoadTableFromStorageJob, self)._makeResource( + started, ended) + config = resource['configuration']['load'] + config['sourceUris'] = [self.SOURCE1] + config['destinationTable'] = { + 'projectId': self.PROJECT, + 'datasetId': self.DS_NAME, + 'tableId': self.TABLE_NAME, + } + + if ended: + resource['statistics']['load']['inputFiles'] = self.INPUT_FILES + resource['statistics']['load']['inputFileBytes'] = self.INPUT_BYTES + resource['statistics']['load']['outputBytes'] = self.OUTPUT_BYTES + resource['statistics']['load']['outputRows'] = self.OUTPUT_ROWS + + return resource + + def _verifyBooleanConfigProperties(self, job, config): + if 'allowJaggedRows' in config: + self.assertEqual(job.allow_jagged_rows, + config['allowJaggedRows']) + else: + self.assertIsNone(job.allow_jagged_rows) + if 'allowQuotedNewlines' in config: + self.assertEqual(job.allow_quoted_newlines, + config['allowQuotedNewlines']) + else: + self.assertIsNone(job.allow_quoted_newlines) + if 'ignoreUnknownValues' in config: + self.assertEqual(job.ignore_unknown_values, + config['ignoreUnknownValues']) + else: + self.assertIsNone(job.ignore_unknown_values) + + def _verifyEnumConfigProperties(self, job, config): + if 'createDisposition' in config: + self.assertEqual(job.create_disposition, + config['createDisposition']) + else: + self.assertIsNone(job.create_disposition) + if 'encoding' in config: + self.assertEqual(job.encoding, + config['encoding']) + else: + self.assertIsNone(job.encoding) + if 'sourceFormat' in config: + self.assertEqual(job.source_format, + config['sourceFormat']) + else: + self.assertIsNone(job.source_format) + if 'writeDisposition' in config: + self.assertEqual(job.write_disposition, + config['writeDisposition']) + else: + self.assertIsNone(job.write_disposition) + + def _verifyResourceProperties(self, job, resource): + self._verifyReadonlyResourceProperties(job, resource) + + config = resource.get('configuration', {}).get('load') + + self._verifyBooleanConfigProperties(job, config) + self._verifyEnumConfigProperties(job, config) + + self.assertEqual(job.source_uris, config['sourceUris']) + + table_ref = config['destinationTable'] + self.assertEqual(job.destination.project, table_ref['projectId']) + self.assertEqual(job.destination.dataset_name, table_ref['datasetId']) + self.assertEqual(job.destination.name, table_ref['tableId']) + + if 'fieldDelimiter' in config: + self.assertEqual(job.field_delimiter, + config['fieldDelimiter']) + else: + self.assertIsNone(job.field_delimiter) + if 'maxBadRecords' in config: + self.assertEqual(job.max_bad_records, + config['maxBadRecords']) + else: + self.assertIsNone(job.max_bad_records) + if 'quote' in config: + self.assertEqual(job.quote_character, + config['quote']) + else: + self.assertIsNone(job.quote_character) + if 'skipLeadingRows' in config: + self.assertEqual(job.skip_leading_rows, + config['skipLeadingRows']) + else: + self.assertIsNone(job.skip_leading_rows) + + def test_ctor(self): + client = _Client(self.PROJECT) + table = _Table() + job = self._makeOne(self.JOB_NAME, table, [self.SOURCE1], client) + self.assertIs(job.destination, table) + self.assertEqual(list(job.source_uris), [self.SOURCE1]) + self.assertIs(job._client, client) + self.assertEqual(job.job_type, self.JOB_TYPE) + self.assertEqual( + job.path, + '/projects/%s/jobs/%s' % (self.PROJECT, self.JOB_NAME)) + self.assertEqual(job.schema, []) + + self._verifyInitialReadonlyProperties(job) + + # derived from resource['statistics']['load'] + self.assertIsNone(job.input_file_bytes) + self.assertIsNone(job.input_files) + self.assertIsNone(job.output_bytes) + self.assertIsNone(job.output_rows) + + # set/read from resource['configuration']['load'] + self.assertIsNone(job.allow_jagged_rows) + self.assertIsNone(job.allow_quoted_newlines) + self.assertIsNone(job.create_disposition) + self.assertIsNone(job.encoding) + self.assertIsNone(job.field_delimiter) + self.assertIsNone(job.ignore_unknown_values) + self.assertIsNone(job.max_bad_records) + self.assertIsNone(job.quote_character) + self.assertIsNone(job.skip_leading_rows) + self.assertIsNone(job.source_format) + self.assertIsNone(job.write_disposition) + + def test_ctor_w_schema(self): + from google.cloud.bigquery.schema import SchemaField + client = _Client(self.PROJECT) + table = _Table() + full_name = SchemaField('full_name', 'STRING', mode='REQUIRED') + age = SchemaField('age', 'INTEGER', mode='REQUIRED') + job = self._makeOne(self.JOB_NAME, table, [self.SOURCE1], client, + schema=[full_name, age]) + self.assertEqual(job.schema, [full_name, age]) + + def test_schema_setter_non_list(self): + client = _Client(self.PROJECT) + table = _Table() + job = self._makeOne(self.JOB_NAME, table, [self.SOURCE1], client) + with self.assertRaises(TypeError): + job.schema = object() + + def test_schema_setter_invalid_field(self): + from google.cloud.bigquery.schema import SchemaField + client = _Client(self.PROJECT) + table = _Table() + job = self._makeOne(self.JOB_NAME, table, [self.SOURCE1], client) + full_name = SchemaField('full_name', 'STRING', mode='REQUIRED') + with self.assertRaises(ValueError): + job.schema = [full_name, object()] + + def test_schema_setter(self): + from google.cloud.bigquery.schema import SchemaField + client = _Client(self.PROJECT) + table = _Table() + job = self._makeOne(self.JOB_NAME, table, [self.SOURCE1], client) + full_name = SchemaField('full_name', 'STRING', mode='REQUIRED') + age = SchemaField('age', 'INTEGER', mode='REQUIRED') + job.schema = [full_name, age] + self.assertEqual(job.schema, [full_name, age]) + + def test_props_set_by_server(self): + import datetime + from google.cloud._helpers import UTC + from google.cloud._helpers import _millis + + CREATED = datetime.datetime(2015, 8, 11, 12, 13, 22, tzinfo=UTC) + STARTED = datetime.datetime(2015, 8, 11, 13, 47, 15, tzinfo=UTC) + ENDED = datetime.datetime(2015, 8, 11, 14, 47, 15, tzinfo=UTC) + JOB_ID = '%s:%s' % (self.PROJECT, self.JOB_NAME) + URL = 'http://example.com/projects/%s/jobs/%s' % ( + self.PROJECT, self.JOB_NAME) + EMAIL = 'phred@example.com' + ERROR_RESULT = {'debugInfo': 'DEBUG', + 'location': 'LOCATION', + 'message': 'MESSAGE', + 'reason': 'REASON'} + + client = _Client(self.PROJECT) + table = _Table() + job = self._makeOne(self.JOB_NAME, table, [self.SOURCE1], client) + job._properties['etag'] = 'ETAG' + job._properties['id'] = JOB_ID + job._properties['selfLink'] = URL + job._properties['user_email'] = EMAIL + + statistics = job._properties['statistics'] = {} + statistics['creationTime'] = _millis(CREATED) + statistics['startTime'] = _millis(STARTED) + statistics['endTime'] = _millis(ENDED) + load_stats = statistics['load'] = {} + load_stats['inputFileBytes'] = 12345 + load_stats['inputFiles'] = 1 + load_stats['outputBytes'] = 23456 + load_stats['outputRows'] = 345 + + self.assertEqual(job.etag, 'ETAG') + self.assertEqual(job.self_link, URL) + self.assertEqual(job.user_email, EMAIL) + + self.assertEqual(job.created, CREATED) + self.assertEqual(job.started, STARTED) + self.assertEqual(job.ended, ENDED) + + self.assertEqual(job.input_file_bytes, 12345) + self.assertEqual(job.input_files, 1) + self.assertEqual(job.output_bytes, 23456) + self.assertEqual(job.output_rows, 345) + + status = job._properties['status'] = {} + + self.assertIsNone(job.error_result) + self.assertIsNone(job.errors) + self.assertIsNone(job.state) + + status['errorResult'] = ERROR_RESULT + status['errors'] = [ERROR_RESULT] + status['state'] = 'STATE' + + self.assertEqual(job.error_result, ERROR_RESULT) + self.assertEqual(job.errors, [ERROR_RESULT]) + self.assertEqual(job.state, 'STATE') + + def test_from_api_repr_missing_identity(self): + self._setUpConstants() + client = _Client(self.PROJECT) + RESOURCE = {} + klass = self._getTargetClass() + with self.assertRaises(KeyError): + klass.from_api_repr(RESOURCE, client=client) + + def test_from_api_repr_missing_config(self): + self._setUpConstants() + client = _Client(self.PROJECT) + RESOURCE = { + 'id': '%s:%s' % (self.PROJECT, self.DS_NAME), + 'jobReference': { + 'projectId': self.PROJECT, + 'jobId': self.JOB_NAME, + } + } + klass = self._getTargetClass() + with self.assertRaises(KeyError): + klass.from_api_repr(RESOURCE, client=client) + + def test_from_api_repr_bare(self): + self._setUpConstants() + client = _Client(self.PROJECT) + RESOURCE = { + 'id': self.JOB_ID, + 'jobReference': { + 'projectId': self.PROJECT, + 'jobId': self.JOB_NAME, + }, + 'configuration': { + 'load': { + 'sourceUris': [self.SOURCE1], + 'destinationTable': { + 'projectId': self.PROJECT, + 'datasetId': self.DS_NAME, + 'tableId': self.TABLE_NAME, + }, + } + }, + } + klass = self._getTargetClass() + job = klass.from_api_repr(RESOURCE, client=client) + self.assertIs(job._client, client) + self._verifyResourceProperties(job, RESOURCE) + + def test_from_api_repr_w_properties(self): + client = _Client(self.PROJECT) + RESOURCE = self._makeResource() + klass = self._getTargetClass() + dataset = klass.from_api_repr(RESOURCE, client=client) + self.assertIs(dataset._client, client) + self._verifyResourceProperties(dataset, RESOURCE) + + def test_begin_w_already_running(self): + conn = _Connection() + client = _Client(project=self.PROJECT, connection=conn) + table = _Table() + job = self._makeOne(self.JOB_NAME, table, [self.SOURCE1], client) + job._properties['status'] = {'state': 'RUNNING'} + + with self.assertRaises(ValueError): + job.begin() + + def test_begin_w_bound_client(self): + PATH = 'projects/%s/jobs' % self.PROJECT + RESOURCE = self._makeResource() + # Ensure None for missing server-set props + del RESOURCE['statistics']['creationTime'] + del RESOURCE['etag'] + del RESOURCE['selfLink'] + del RESOURCE['user_email'] + conn = _Connection(RESOURCE) + client = _Client(project=self.PROJECT, connection=conn) + table = _Table() + job = self._makeOne(self.JOB_NAME, table, [self.SOURCE1], client) + + job.begin() + + self.assertEqual(len(conn._requested), 1) + req = conn._requested[0] + self.assertEqual(req['method'], 'POST') + self.assertEqual(req['path'], '/%s' % PATH) + SENT = { + 'jobReference': { + 'projectId': self.PROJECT, + 'jobId': self.JOB_NAME, + }, + 'configuration': { + 'load': { + 'sourceUris': [self.SOURCE1], + 'destinationTable': { + 'projectId': self.PROJECT, + 'datasetId': self.DS_NAME, + 'tableId': self.TABLE_NAME, + }, + }, + }, + } + self.assertEqual(req['data'], SENT) + self._verifyResourceProperties(job, RESOURCE) + + def test_begin_w_alternate_client(self): + from google.cloud.bigquery.schema import SchemaField + PATH = 'projects/%s/jobs' % self.PROJECT + RESOURCE = self._makeResource(ended=True) + LOAD_CONFIGURATION = { + 'sourceUris': [self.SOURCE1], + 'destinationTable': { + 'projectId': self.PROJECT, + 'datasetId': self.DS_NAME, + 'tableId': self.TABLE_NAME, + }, + 'allowJaggedRows': True, + 'allowQuotedNewlines': True, + 'createDisposition': 'CREATE_NEVER', + 'encoding': 'ISO-8559-1', + 'fieldDelimiter': '|', + 'ignoreUnknownValues': True, + 'maxBadRecords': 100, + 'quote': "'", + 'skipLeadingRows': 1, + 'sourceFormat': 'CSV', + 'writeDisposition': 'WRITE_TRUNCATE', + 'schema': {'fields': [ + {'name': 'full_name', 'type': 'STRING', 'mode': 'REQUIRED'}, + {'name': 'age', 'type': 'INTEGER', 'mode': 'REQUIRED'}, + ]} + } + RESOURCE['configuration']['load'] = LOAD_CONFIGURATION + conn1 = _Connection() + client1 = _Client(project=self.PROJECT, connection=conn1) + conn2 = _Connection(RESOURCE) + client2 = _Client(project=self.PROJECT, connection=conn2) + table = _Table() + full_name = SchemaField('full_name', 'STRING', mode='REQUIRED') + age = SchemaField('age', 'INTEGER', mode='REQUIRED') + job = self._makeOne(self.JOB_NAME, table, [self.SOURCE1], client1, + schema=[full_name, age]) + + job.allow_jagged_rows = True + job.allow_quoted_newlines = True + job.create_disposition = 'CREATE_NEVER' + job.encoding = 'ISO-8559-1' + job.field_delimiter = '|' + job.ignore_unknown_values = True + job.max_bad_records = 100 + job.quote_character = "'" + job.skip_leading_rows = 1 + job.source_format = 'CSV' + job.write_disposition = 'WRITE_TRUNCATE' + + job.begin(client=client2) + + self.assertEqual(len(conn1._requested), 0) + self.assertEqual(len(conn2._requested), 1) + req = conn2._requested[0] + self.assertEqual(req['method'], 'POST') + self.assertEqual(req['path'], '/%s' % PATH) + SENT = { + 'jobReference': { + 'projectId': self.PROJECT, + 'jobId': self.JOB_NAME, + }, + 'configuration': { + 'load': LOAD_CONFIGURATION, + }, + } + self.assertEqual(req['data'], SENT) + self._verifyResourceProperties(job, RESOURCE) + + def test_exists_miss_w_bound_client(self): + PATH = 'projects/%s/jobs/%s' % (self.PROJECT, self.JOB_NAME) + conn = _Connection() + client = _Client(project=self.PROJECT, connection=conn) + table = _Table() + job = self._makeOne(self.JOB_NAME, table, [self.SOURCE1], client) + + self.assertFalse(job.exists()) + + self.assertEqual(len(conn._requested), 1) + req = conn._requested[0] + self.assertEqual(req['method'], 'GET') + self.assertEqual(req['path'], '/%s' % PATH) + self.assertEqual(req['query_params'], {'fields': 'id'}) + + def test_exists_hit_w_alternate_client(self): + PATH = 'projects/%s/jobs/%s' % (self.PROJECT, self.JOB_NAME) + conn1 = _Connection() + client1 = _Client(project=self.PROJECT, connection=conn1) + conn2 = _Connection({}) + client2 = _Client(project=self.PROJECT, connection=conn2) + table = _Table() + job = self._makeOne(self.JOB_NAME, table, [self.SOURCE1], client1) + + self.assertTrue(job.exists(client=client2)) + + self.assertEqual(len(conn1._requested), 0) + self.assertEqual(len(conn2._requested), 1) + req = conn2._requested[0] + self.assertEqual(req['method'], 'GET') + self.assertEqual(req['path'], '/%s' % PATH) + self.assertEqual(req['query_params'], {'fields': 'id'}) + + def test_reload_w_bound_client(self): + PATH = 'projects/%s/jobs/%s' % (self.PROJECT, self.JOB_NAME) + RESOURCE = self._makeResource() + conn = _Connection(RESOURCE) + client = _Client(project=self.PROJECT, connection=conn) + table = _Table() + job = self._makeOne(self.JOB_NAME, table, [self.SOURCE1], client) + + job.reload() + + self.assertEqual(len(conn._requested), 1) + req = conn._requested[0] + self.assertEqual(req['method'], 'GET') + self.assertEqual(req['path'], '/%s' % PATH) + self._verifyResourceProperties(job, RESOURCE) + + def test_reload_w_alternate_client(self): + PATH = 'projects/%s/jobs/%s' % (self.PROJECT, self.JOB_NAME) + RESOURCE = self._makeResource() + conn1 = _Connection() + client1 = _Client(project=self.PROJECT, connection=conn1) + conn2 = _Connection(RESOURCE) + client2 = _Client(project=self.PROJECT, connection=conn2) + table = _Table() + job = self._makeOne(self.JOB_NAME, table, [self.SOURCE1], client1) + + job.reload(client=client2) + + self.assertEqual(len(conn1._requested), 0) + self.assertEqual(len(conn2._requested), 1) + req = conn2._requested[0] + self.assertEqual(req['method'], 'GET') + self.assertEqual(req['path'], '/%s' % PATH) + self._verifyResourceProperties(job, RESOURCE) + + def test_cancel_w_bound_client(self): + PATH = 'projects/%s/jobs/%s/cancel' % (self.PROJECT, self.JOB_NAME) + RESOURCE = self._makeResource(ended=True) + RESPONSE = {'job': RESOURCE} + conn = _Connection(RESPONSE) + client = _Client(project=self.PROJECT, connection=conn) + table = _Table() + job = self._makeOne(self.JOB_NAME, table, [self.SOURCE1], client) + + job.cancel() + + self.assertEqual(len(conn._requested), 1) + req = conn._requested[0] + self.assertEqual(req['method'], 'POST') + self.assertEqual(req['path'], '/%s' % PATH) + self._verifyResourceProperties(job, RESOURCE) + + def test_cancel_w_alternate_client(self): + PATH = 'projects/%s/jobs/%s/cancel' % (self.PROJECT, self.JOB_NAME) + RESOURCE = self._makeResource(ended=True) + RESPONSE = {'job': RESOURCE} + conn1 = _Connection() + client1 = _Client(project=self.PROJECT, connection=conn1) + conn2 = _Connection(RESPONSE) + client2 = _Client(project=self.PROJECT, connection=conn2) + table = _Table() + job = self._makeOne(self.JOB_NAME, table, [self.SOURCE1], client1) + + job.cancel(client=client2) + + self.assertEqual(len(conn1._requested), 0) + self.assertEqual(len(conn2._requested), 1) + req = conn2._requested[0] + self.assertEqual(req['method'], 'POST') + self.assertEqual(req['path'], '/%s' % PATH) + self._verifyResourceProperties(job, RESOURCE) + + +class TestCopyJob(unittest.TestCase, _Base): + JOB_TYPE = 'copy' + SOURCE_TABLE = 'source_table' + DESTINATION_TABLE = 'destination_table' + + def _getTargetClass(self): + from google.cloud.bigquery.job import CopyJob + return CopyJob + + def _makeResource(self, started=False, ended=False): + resource = super(TestCopyJob, self)._makeResource( + started, ended) + config = resource['configuration']['copy'] + config['sourceTables'] = [{ + 'projectId': self.PROJECT, + 'datasetId': self.DS_NAME, + 'tableId': self.SOURCE_TABLE, + }] + config['destinationTable'] = { + 'projectId': self.PROJECT, + 'datasetId': self.DS_NAME, + 'tableId': self.DESTINATION_TABLE, + } + + return resource + + def _verifyResourceProperties(self, job, resource): + self._verifyReadonlyResourceProperties(job, resource) + + config = resource.get('configuration', {}).get('copy') + + table_ref = config['destinationTable'] + self.assertEqual(job.destination.project, table_ref['projectId']) + self.assertEqual(job.destination.dataset_name, table_ref['datasetId']) + self.assertEqual(job.destination.name, table_ref['tableId']) + + sources = config['sourceTables'] + self.assertEqual(len(sources), len(job.sources)) + for table_ref, table in zip(sources, job.sources): + self.assertEqual(table.project, table_ref['projectId']) + self.assertEqual(table.dataset_name, table_ref['datasetId']) + self.assertEqual(table.name, table_ref['tableId']) + + if 'createDisposition' in config: + self.assertEqual(job.create_disposition, + config['createDisposition']) + else: + self.assertIsNone(job.create_disposition) + + if 'writeDisposition' in config: + self.assertEqual(job.write_disposition, + config['writeDisposition']) + else: + self.assertIsNone(job.write_disposition) + + def test_ctor(self): + client = _Client(self.PROJECT) + source = _Table(self.SOURCE_TABLE) + destination = _Table(self.DESTINATION_TABLE) + job = self._makeOne(self.JOB_NAME, destination, [source], client) + self.assertIs(job.destination, destination) + self.assertEqual(job.sources, [source]) + self.assertIs(job._client, client) + self.assertEqual(job.job_type, self.JOB_TYPE) + self.assertEqual( + job.path, + '/projects/%s/jobs/%s' % (self.PROJECT, self.JOB_NAME)) + + self._verifyInitialReadonlyProperties(job) + + # set/read from resource['configuration']['copy'] + self.assertIsNone(job.create_disposition) + self.assertIsNone(job.write_disposition) + + def test_from_api_repr_missing_identity(self): + self._setUpConstants() + client = _Client(self.PROJECT) + RESOURCE = {} + klass = self._getTargetClass() + with self.assertRaises(KeyError): + klass.from_api_repr(RESOURCE, client=client) + + def test_from_api_repr_missing_config(self): + self._setUpConstants() + client = _Client(self.PROJECT) + RESOURCE = { + 'id': '%s:%s' % (self.PROJECT, self.DS_NAME), + 'jobReference': { + 'projectId': self.PROJECT, + 'jobId': self.JOB_NAME, + } + } + klass = self._getTargetClass() + with self.assertRaises(KeyError): + klass.from_api_repr(RESOURCE, client=client) + + def test_from_api_repr_bare(self): + self._setUpConstants() + client = _Client(self.PROJECT) + RESOURCE = { + 'id': self.JOB_ID, + 'jobReference': { + 'projectId': self.PROJECT, + 'jobId': self.JOB_NAME, + }, + 'configuration': { + 'copy': { + 'sourceTables': [{ + 'projectId': self.PROJECT, + 'datasetId': self.DS_NAME, + 'tableId': self.SOURCE_TABLE, + }], + 'destinationTable': { + 'projectId': self.PROJECT, + 'datasetId': self.DS_NAME, + 'tableId': self.DESTINATION_TABLE, + }, + } + }, + } + klass = self._getTargetClass() + job = klass.from_api_repr(RESOURCE, client=client) + self.assertIs(job._client, client) + self._verifyResourceProperties(job, RESOURCE) + + def test_from_api_repr_w_properties(self): + client = _Client(self.PROJECT) + RESOURCE = self._makeResource() + klass = self._getTargetClass() + dataset = klass.from_api_repr(RESOURCE, client=client) + self.assertIs(dataset._client, client) + self._verifyResourceProperties(dataset, RESOURCE) + + def test_begin_w_bound_client(self): + PATH = 'projects/%s/jobs' % self.PROJECT + RESOURCE = self._makeResource() + # Ensure None for missing server-set props + del RESOURCE['statistics']['creationTime'] + del RESOURCE['etag'] + del RESOURCE['selfLink'] + del RESOURCE['user_email'] + conn = _Connection(RESOURCE) + client = _Client(project=self.PROJECT, connection=conn) + source = _Table(self.SOURCE_TABLE) + destination = _Table(self.DESTINATION_TABLE) + job = self._makeOne(self.JOB_NAME, destination, [source], client) + + job.begin() + + self.assertEqual(len(conn._requested), 1) + req = conn._requested[0] + self.assertEqual(req['method'], 'POST') + self.assertEqual(req['path'], '/%s' % PATH) + SENT = { + 'jobReference': { + 'projectId': self.PROJECT, + 'jobId': self.JOB_NAME, + }, + 'configuration': { + 'copy': { + 'sourceTables': [{ + 'projectId': self.PROJECT, + 'datasetId': self.DS_NAME, + 'tableId': self.SOURCE_TABLE + }], + 'destinationTable': { + 'projectId': self.PROJECT, + 'datasetId': self.DS_NAME, + 'tableId': self.DESTINATION_TABLE, + }, + }, + }, + } + self.assertEqual(req['data'], SENT) + self._verifyResourceProperties(job, RESOURCE) + + def test_begin_w_alternate_client(self): + PATH = 'projects/%s/jobs' % self.PROJECT + RESOURCE = self._makeResource(ended=True) + COPY_CONFIGURATION = { + 'sourceTables': [{ + 'projectId': self.PROJECT, + 'datasetId': self.DS_NAME, + 'tableId': self.SOURCE_TABLE, + }], + 'destinationTable': { + 'projectId': self.PROJECT, + 'datasetId': self.DS_NAME, + 'tableId': self.DESTINATION_TABLE, + }, + 'createDisposition': 'CREATE_NEVER', + 'writeDisposition': 'WRITE_TRUNCATE', + } + RESOURCE['configuration']['copy'] = COPY_CONFIGURATION + conn1 = _Connection() + client1 = _Client(project=self.PROJECT, connection=conn1) + conn2 = _Connection(RESOURCE) + client2 = _Client(project=self.PROJECT, connection=conn2) + source = _Table(self.SOURCE_TABLE) + destination = _Table(self.DESTINATION_TABLE) + job = self._makeOne(self.JOB_NAME, destination, [source], client1) + + job.create_disposition = 'CREATE_NEVER' + job.write_disposition = 'WRITE_TRUNCATE' + + job.begin(client=client2) + + self.assertEqual(len(conn1._requested), 0) + self.assertEqual(len(conn2._requested), 1) + req = conn2._requested[0] + self.assertEqual(req['method'], 'POST') + self.assertEqual(req['path'], '/%s' % PATH) + SENT = { + 'jobReference': { + 'projectId': self.PROJECT, + 'jobId': self.JOB_NAME, + }, + 'configuration': { + 'copy': COPY_CONFIGURATION, + }, + } + self.assertEqual(req['data'], SENT) + self._verifyResourceProperties(job, RESOURCE) + + def test_exists_miss_w_bound_client(self): + PATH = 'projects/%s/jobs/%s' % (self.PROJECT, self.JOB_NAME) + conn = _Connection() + client = _Client(project=self.PROJECT, connection=conn) + source = _Table(self.SOURCE_TABLE) + destination = _Table(self.DESTINATION_TABLE) + job = self._makeOne(self.JOB_NAME, destination, [source], client) + + self.assertFalse(job.exists()) + + self.assertEqual(len(conn._requested), 1) + req = conn._requested[0] + self.assertEqual(req['method'], 'GET') + self.assertEqual(req['path'], '/%s' % PATH) + self.assertEqual(req['query_params'], {'fields': 'id'}) + + def test_exists_hit_w_alternate_client(self): + PATH = 'projects/%s/jobs/%s' % (self.PROJECT, self.JOB_NAME) + conn1 = _Connection() + client1 = _Client(project=self.PROJECT, connection=conn1) + conn2 = _Connection({}) + client2 = _Client(project=self.PROJECT, connection=conn2) + source = _Table(self.SOURCE_TABLE) + destination = _Table(self.DESTINATION_TABLE) + job = self._makeOne(self.JOB_NAME, destination, [source], client1) + + self.assertTrue(job.exists(client=client2)) + + self.assertEqual(len(conn1._requested), 0) + self.assertEqual(len(conn2._requested), 1) + req = conn2._requested[0] + self.assertEqual(req['method'], 'GET') + self.assertEqual(req['path'], '/%s' % PATH) + self.assertEqual(req['query_params'], {'fields': 'id'}) + + def test_reload_w_bound_client(self): + PATH = 'projects/%s/jobs/%s' % (self.PROJECT, self.JOB_NAME) + RESOURCE = self._makeResource() + conn = _Connection(RESOURCE) + client = _Client(project=self.PROJECT, connection=conn) + source = _Table(self.SOURCE_TABLE) + destination = _Table(self.DESTINATION_TABLE) + job = self._makeOne(self.JOB_NAME, destination, [source], client) + + job.reload() + + self.assertEqual(len(conn._requested), 1) + req = conn._requested[0] + self.assertEqual(req['method'], 'GET') + self.assertEqual(req['path'], '/%s' % PATH) + self._verifyResourceProperties(job, RESOURCE) + + def test_reload_w_alternate_client(self): + PATH = 'projects/%s/jobs/%s' % (self.PROJECT, self.JOB_NAME) + RESOURCE = self._makeResource() + conn1 = _Connection() + client1 = _Client(project=self.PROJECT, connection=conn1) + conn2 = _Connection(RESOURCE) + client2 = _Client(project=self.PROJECT, connection=conn2) + source = _Table(self.SOURCE_TABLE) + destination = _Table(self.DESTINATION_TABLE) + job = self._makeOne(self.JOB_NAME, destination, [source], client1) + + job.reload(client=client2) + + self.assertEqual(len(conn1._requested), 0) + self.assertEqual(len(conn2._requested), 1) + req = conn2._requested[0] + self.assertEqual(req['method'], 'GET') + self.assertEqual(req['path'], '/%s' % PATH) + self._verifyResourceProperties(job, RESOURCE) + + +class TestExtractTableToStorageJob(unittest.TestCase, _Base): + JOB_TYPE = 'extract' + SOURCE_TABLE = 'source_table' + DESTINATION_URI = 'gs://bucket_name/object_name' + + def _getTargetClass(self): + from google.cloud.bigquery.job import ExtractTableToStorageJob + return ExtractTableToStorageJob + + def _makeResource(self, started=False, ended=False): + resource = super(TestExtractTableToStorageJob, self)._makeResource( + started, ended) + config = resource['configuration']['extract'] + config['sourceTable'] = { + 'projectId': self.PROJECT, + 'datasetId': self.DS_NAME, + 'tableId': self.SOURCE_TABLE, + } + config['destinationUris'] = [self.DESTINATION_URI] + return resource + + def _verifyResourceProperties(self, job, resource): + self._verifyReadonlyResourceProperties(job, resource) + + config = resource.get('configuration', {}).get('extract') + + self.assertEqual(job.destination_uris, config['destinationUris']) + + table_ref = config['sourceTable'] + self.assertEqual(job.source.project, table_ref['projectId']) + self.assertEqual(job.source.dataset_name, table_ref['datasetId']) + self.assertEqual(job.source.name, table_ref['tableId']) + + if 'compression' in config: + self.assertEqual(job.compression, + config['compression']) + else: + self.assertIsNone(job.compression) + + if 'destinationFormat' in config: + self.assertEqual(job.destination_format, + config['destinationFormat']) + else: + self.assertIsNone(job.destination_format) + + if 'fieldDelimiter' in config: + self.assertEqual(job.field_delimiter, + config['fieldDelimiter']) + else: + self.assertIsNone(job.field_delimiter) + + if 'printHeader' in config: + self.assertEqual(job.print_header, + config['printHeader']) + else: + self.assertIsNone(job.print_header) + + def test_ctor(self): + client = _Client(self.PROJECT) + source = _Table(self.SOURCE_TABLE) + job = self._makeOne(self.JOB_NAME, source, [self.DESTINATION_URI], + client) + self.assertEqual(job.source, source) + self.assertEqual(job.destination_uris, [self.DESTINATION_URI]) + self.assertIs(job._client, client) + self.assertEqual(job.job_type, self.JOB_TYPE) + self.assertEqual( + job.path, + '/projects/%s/jobs/%s' % (self.PROJECT, self.JOB_NAME)) + + self._verifyInitialReadonlyProperties(job) + + # set/read from resource['configuration']['copy'] + self.assertIsNone(job.compression) + self.assertIsNone(job.destination_format) + self.assertIsNone(job.field_delimiter) + self.assertIsNone(job.print_header) + + def test_from_api_repr_missing_identity(self): + self._setUpConstants() + client = _Client(self.PROJECT) + RESOURCE = {} + klass = self._getTargetClass() + with self.assertRaises(KeyError): + klass.from_api_repr(RESOURCE, client=client) + + def test_from_api_repr_missing_config(self): + self._setUpConstants() + client = _Client(self.PROJECT) + RESOURCE = { + 'id': '%s:%s' % (self.PROJECT, self.DS_NAME), + 'jobReference': { + 'projectId': self.PROJECT, + 'jobId': self.JOB_NAME, + } + } + klass = self._getTargetClass() + with self.assertRaises(KeyError): + klass.from_api_repr(RESOURCE, client=client) + + def test_from_api_repr_bare(self): + self._setUpConstants() + client = _Client(self.PROJECT) + RESOURCE = { + 'id': self.JOB_ID, + 'jobReference': { + 'projectId': self.PROJECT, + 'jobId': self.JOB_NAME, + }, + 'configuration': { + 'extract': { + 'sourceTable': { + 'projectId': self.PROJECT, + 'datasetId': self.DS_NAME, + 'tableId': self.SOURCE_TABLE, + }, + 'destinationUris': [self.DESTINATION_URI], + } + }, + } + klass = self._getTargetClass() + job = klass.from_api_repr(RESOURCE, client=client) + self.assertIs(job._client, client) + self._verifyResourceProperties(job, RESOURCE) + + def test_from_api_repr_w_properties(self): + client = _Client(self.PROJECT) + RESOURCE = self._makeResource() + klass = self._getTargetClass() + dataset = klass.from_api_repr(RESOURCE, client=client) + self.assertIs(dataset._client, client) + self._verifyResourceProperties(dataset, RESOURCE) + + def test_begin_w_bound_client(self): + PATH = 'projects/%s/jobs' % self.PROJECT + RESOURCE = self._makeResource() + # Ensure None for missing server-set props + del RESOURCE['statistics']['creationTime'] + del RESOURCE['etag'] + del RESOURCE['selfLink'] + del RESOURCE['user_email'] + conn = _Connection(RESOURCE) + client = _Client(project=self.PROJECT, connection=conn) + source = _Table(self.SOURCE_TABLE) + job = self._makeOne(self.JOB_NAME, source, [self.DESTINATION_URI], + client) + + job.begin() + + self.assertEqual(len(conn._requested), 1) + req = conn._requested[0] + self.assertEqual(req['method'], 'POST') + self.assertEqual(req['path'], '/%s' % PATH) + SENT = { + 'jobReference': { + 'projectId': self.PROJECT, + 'jobId': self.JOB_NAME, + }, + 'configuration': { + 'extract': { + 'sourceTable': { + 'projectId': self.PROJECT, + 'datasetId': self.DS_NAME, + 'tableId': self.SOURCE_TABLE + }, + 'destinationUris': [self.DESTINATION_URI], + }, + }, + } + self.assertEqual(req['data'], SENT) + self._verifyResourceProperties(job, RESOURCE) + + def test_begin_w_alternate_client(self): + PATH = 'projects/%s/jobs' % self.PROJECT + RESOURCE = self._makeResource(ended=True) + EXTRACT_CONFIGURATION = { + 'sourceTable': { + 'projectId': self.PROJECT, + 'datasetId': self.DS_NAME, + 'tableId': self.SOURCE_TABLE, + }, + 'destinationUris': [self.DESTINATION_URI], + 'compression': 'GZIP', + 'destinationFormat': 'NEWLINE_DELIMITED_JSON', + 'fieldDelimiter': '|', + 'printHeader': False, + } + RESOURCE['configuration']['extract'] = EXTRACT_CONFIGURATION + conn1 = _Connection() + client1 = _Client(project=self.PROJECT, connection=conn1) + conn2 = _Connection(RESOURCE) + client2 = _Client(project=self.PROJECT, connection=conn2) + source = _Table(self.SOURCE_TABLE) + job = self._makeOne(self.JOB_NAME, source, [self.DESTINATION_URI], + client1) + + job.compression = 'GZIP' + job.destination_format = 'NEWLINE_DELIMITED_JSON' + job.field_delimiter = '|' + job.print_header = False + + job.begin(client=client2) + + self.assertEqual(len(conn1._requested), 0) + self.assertEqual(len(conn2._requested), 1) + req = conn2._requested[0] + self.assertEqual(req['method'], 'POST') + self.assertEqual(req['path'], '/%s' % PATH) + SENT = { + 'jobReference': { + 'projectId': self.PROJECT, + 'jobId': self.JOB_NAME, + }, + 'configuration': { + 'extract': EXTRACT_CONFIGURATION, + }, + } + self.assertEqual(req['data'], SENT) + self._verifyResourceProperties(job, RESOURCE) + + def test_exists_miss_w_bound_client(self): + PATH = 'projects/%s/jobs/%s' % (self.PROJECT, self.JOB_NAME) + conn = _Connection() + client = _Client(project=self.PROJECT, connection=conn) + source = _Table(self.SOURCE_TABLE) + job = self._makeOne(self.JOB_NAME, source, [self.DESTINATION_URI], + client) + + self.assertFalse(job.exists()) + + self.assertEqual(len(conn._requested), 1) + req = conn._requested[0] + self.assertEqual(req['method'], 'GET') + self.assertEqual(req['path'], '/%s' % PATH) + self.assertEqual(req['query_params'], {'fields': 'id'}) + + def test_exists_hit_w_alternate_client(self): + PATH = 'projects/%s/jobs/%s' % (self.PROJECT, self.JOB_NAME) + conn1 = _Connection() + client1 = _Client(project=self.PROJECT, connection=conn1) + conn2 = _Connection({}) + client2 = _Client(project=self.PROJECT, connection=conn2) + source = _Table(self.SOURCE_TABLE) + job = self._makeOne(self.JOB_NAME, source, [self.DESTINATION_URI], + client1) + + self.assertTrue(job.exists(client=client2)) + + self.assertEqual(len(conn1._requested), 0) + self.assertEqual(len(conn2._requested), 1) + req = conn2._requested[0] + self.assertEqual(req['method'], 'GET') + self.assertEqual(req['path'], '/%s' % PATH) + self.assertEqual(req['query_params'], {'fields': 'id'}) + + def test_reload_w_bound_client(self): + PATH = 'projects/%s/jobs/%s' % (self.PROJECT, self.JOB_NAME) + RESOURCE = self._makeResource() + conn = _Connection(RESOURCE) + client = _Client(project=self.PROJECT, connection=conn) + source = _Table(self.SOURCE_TABLE) + job = self._makeOne(self.JOB_NAME, source, [self.DESTINATION_URI], + client) + + job.reload() + + self.assertEqual(len(conn._requested), 1) + req = conn._requested[0] + self.assertEqual(req['method'], 'GET') + self.assertEqual(req['path'], '/%s' % PATH) + self._verifyResourceProperties(job, RESOURCE) + + def test_reload_w_alternate_client(self): + PATH = 'projects/%s/jobs/%s' % (self.PROJECT, self.JOB_NAME) + RESOURCE = self._makeResource() + conn1 = _Connection() + client1 = _Client(project=self.PROJECT, connection=conn1) + conn2 = _Connection(RESOURCE) + client2 = _Client(project=self.PROJECT, connection=conn2) + source = _Table(self.SOURCE_TABLE) + job = self._makeOne(self.JOB_NAME, source, [self.DESTINATION_URI], + client1) + + job.reload(client=client2) + + self.assertEqual(len(conn1._requested), 0) + self.assertEqual(len(conn2._requested), 1) + req = conn2._requested[0] + self.assertEqual(req['method'], 'GET') + self.assertEqual(req['path'], '/%s' % PATH) + self._verifyResourceProperties(job, RESOURCE) + + +class TestQueryJob(unittest.TestCase, _Base): + JOB_TYPE = 'query' + QUERY = 'select count(*) from persons' + DESTINATION_TABLE = 'destination_table' + + def _getTargetClass(self): + from google.cloud.bigquery.job import QueryJob + return QueryJob + + def _makeResource(self, started=False, ended=False): + resource = super(TestQueryJob, self)._makeResource( + started, ended) + config = resource['configuration']['query'] + config['query'] = self.QUERY + return resource + + def _verifyBooleanResourceProperties(self, job, config): + + if 'allowLargeResults' in config: + self.assertEqual(job.allow_large_results, + config['allowLargeResults']) + else: + self.assertIsNone(job.allow_large_results) + if 'flattenResults' in config: + self.assertEqual(job.flatten_results, + config['flattenResults']) + else: + self.assertIsNone(job.flatten_results) + if 'useQueryCache' in config: + self.assertEqual(job.use_query_cache, + config['useQueryCache']) + else: + self.assertIsNone(job.use_query_cache) + if 'useLegacySql' in config: + self.assertEqual(job.use_legacy_sql, + config['useLegacySql']) + else: + self.assertIsNone(job.use_legacy_sql) + if 'dryRun' in config: + self.assertEqual(job.dry_run, + config['dryRun']) + else: + self.assertIsNone(job.dry_run) + + def _verifyIntegerResourceProperties(self, job, config): + if 'maximumBillingTier' in config: + self.assertEqual(job.maximum_billing_tier, + config['maximumBillingTier']) + else: + self.assertIsNone(job.maximum_billing_tier) + if 'maximumBytesBilled' in config: + self.assertEqual(job.maximum_bytes_billed, + config['maximumBytesBilled']) + else: + self.assertIsNone(job.maximum_bytes_billed) + + def _verifyResourceProperties(self, job, resource): + self._verifyReadonlyResourceProperties(job, resource) + + config = resource.get('configuration', {}).get('query') + self._verifyBooleanResourceProperties(job, config) + self._verifyIntegerResourceProperties(job, config) + + if 'createDisposition' in config: + self.assertEqual(job.create_disposition, + config['createDisposition']) + else: + self.assertIsNone(job.create_disposition) + if 'defaultDataset' in config: + dataset = job.default_dataset + ds_ref = { + 'projectId': dataset.project, + 'datasetId': dataset.name, + } + self.assertEqual(ds_ref, config['defaultDataset']) + else: + self.assertIsNone(job.default_dataset) + if 'destinationTable' in config: + table = job.destination + tb_ref = { + 'projectId': table.project, + 'datasetId': table.dataset_name, + 'tableId': table.name + } + self.assertEqual(tb_ref, config['destinationTable']) + else: + self.assertIsNone(job.destination) + if 'priority' in config: + self.assertEqual(job.priority, + config['priority']) + else: + self.assertIsNone(job.priority) + if 'writeDisposition' in config: + self.assertEqual(job.write_disposition, + config['writeDisposition']) + else: + self.assertIsNone(job.write_disposition) + + def test_ctor(self): + client = _Client(self.PROJECT) + job = self._makeOne(self.JOB_NAME, self.QUERY, client) + self.assertEqual(job.query, self.QUERY) + self.assertIs(job._client, client) + self.assertEqual(job.job_type, self.JOB_TYPE) + self.assertEqual( + job.path, + '/projects/%s/jobs/%s' % (self.PROJECT, self.JOB_NAME)) + + self._verifyInitialReadonlyProperties(job) + + # set/read from resource['configuration']['copy'] + self.assertIsNone(job.allow_large_results) + self.assertIsNone(job.create_disposition) + self.assertIsNone(job.default_dataset) + self.assertIsNone(job.destination) + self.assertIsNone(job.flatten_results) + self.assertIsNone(job.priority) + self.assertIsNone(job.use_query_cache) + self.assertIsNone(job.use_legacy_sql) + self.assertIsNone(job.dry_run) + self.assertIsNone(job.write_disposition) + self.assertIsNone(job.maximum_billing_tier) + self.assertIsNone(job.maximum_bytes_billed) + + def test_from_api_repr_missing_identity(self): + self._setUpConstants() + client = _Client(self.PROJECT) + RESOURCE = {} + klass = self._getTargetClass() + with self.assertRaises(KeyError): + klass.from_api_repr(RESOURCE, client=client) + + def test_from_api_repr_missing_config(self): + self._setUpConstants() + client = _Client(self.PROJECT) + RESOURCE = { + 'id': '%s:%s' % (self.PROJECT, self.DS_NAME), + 'jobReference': { + 'projectId': self.PROJECT, + 'jobId': self.JOB_NAME, + } + } + klass = self._getTargetClass() + with self.assertRaises(KeyError): + klass.from_api_repr(RESOURCE, client=client) + + def test_from_api_repr_bare(self): + self._setUpConstants() + client = _Client(self.PROJECT) + RESOURCE = { + 'id': self.JOB_ID, + 'jobReference': { + 'projectId': self.PROJECT, + 'jobId': self.JOB_NAME, + }, + 'configuration': { + 'query': {'query': self.QUERY} + }, + } + klass = self._getTargetClass() + job = klass.from_api_repr(RESOURCE, client=client) + self.assertIs(job._client, client) + self._verifyResourceProperties(job, RESOURCE) + + def test_from_api_repr_w_properties(self): + client = _Client(self.PROJECT) + RESOURCE = self._makeResource() + RESOURCE['configuration']['query']['destinationTable'] = { + 'projectId': self.PROJECT, + 'datasetId': self.DS_NAME, + 'tableId': self.DESTINATION_TABLE, + } + klass = self._getTargetClass() + dataset = klass.from_api_repr(RESOURCE, client=client) + self.assertIs(dataset._client, client) + self._verifyResourceProperties(dataset, RESOURCE) + + def test_results(self): + from google.cloud.bigquery.query import QueryResults + client = _Client(self.PROJECT) + job = self._makeOne(self.JOB_NAME, self.QUERY, client) + results = job.results() + self.assertIsInstance(results, QueryResults) + self.assertIs(results._job, job) + + def test_begin_w_bound_client(self): + PATH = 'projects/%s/jobs' % self.PROJECT + RESOURCE = self._makeResource() + # Ensure None for missing server-set props + del RESOURCE['statistics']['creationTime'] + del RESOURCE['etag'] + del RESOURCE['selfLink'] + del RESOURCE['user_email'] + conn = _Connection(RESOURCE) + client = _Client(project=self.PROJECT, connection=conn) + job = self._makeOne(self.JOB_NAME, self.QUERY, client) + + job.begin() + self.assertEqual(job.udf_resources, []) + self.assertEqual(len(conn._requested), 1) + req = conn._requested[0] + self.assertEqual(req['method'], 'POST') + self.assertEqual(req['path'], '/%s' % PATH) + SENT = { + 'jobReference': { + 'projectId': self.PROJECT, + 'jobId': self.JOB_NAME, + }, + 'configuration': { + 'query': { + 'query': self.QUERY + }, + }, + } + self.assertEqual(req['data'], SENT) + self._verifyResourceProperties(job, RESOURCE) + + def test_begin_w_alternate_client(self): + from google.cloud.bigquery.dataset import Dataset + from google.cloud.bigquery.dataset import Table + PATH = 'projects/%s/jobs' % self.PROJECT + TABLE = 'TABLE' + DS_NAME = 'DATASET' + RESOURCE = self._makeResource(ended=True) + QUERY_CONFIGURATION = { + 'query': self.QUERY, + 'allowLargeResults': True, + 'createDisposition': 'CREATE_NEVER', + 'defaultDataset': { + 'projectId': self.PROJECT, + 'datasetId': DS_NAME, + }, + 'destinationTable': { + 'projectId': self.PROJECT, + 'datasetId': DS_NAME, + 'tableId': TABLE, + }, + 'flattenResults': True, + 'priority': 'INTERACTIVE', + 'useQueryCache': True, + 'useLegacySql': True, + 'dryRun': True, + 'writeDisposition': 'WRITE_TRUNCATE', + 'maximumBillingTier': 4, + 'maximumBytesBilled': 123456 + } + RESOURCE['configuration']['query'] = QUERY_CONFIGURATION + conn1 = _Connection() + client1 = _Client(project=self.PROJECT, connection=conn1) + conn2 = _Connection(RESOURCE) + client2 = _Client(project=self.PROJECT, connection=conn2) + job = self._makeOne(self.JOB_NAME, self.QUERY, client1) + + dataset = Dataset(DS_NAME, client1) + table = Table(TABLE, dataset) + + job.allow_large_results = True + job.create_disposition = 'CREATE_NEVER' + job.default_dataset = dataset + job.destination = table + job.flatten_results = True + job.priority = 'INTERACTIVE' + job.use_query_cache = True + job.use_legacy_sql = True + job.dry_run = True + job.write_disposition = 'WRITE_TRUNCATE' + job.maximum_billing_tier = 4 + job.maximum_bytes_billed = 123456 + + job.begin(client=client2) + + self.assertEqual(len(conn1._requested), 0) + self.assertEqual(len(conn2._requested), 1) + req = conn2._requested[0] + self.assertEqual(req['method'], 'POST') + self.assertEqual(req['path'], '/%s' % PATH) + SENT = { + 'jobReference': { + 'projectId': self.PROJECT, + 'jobId': self.JOB_NAME, + }, + 'configuration': { + 'query': QUERY_CONFIGURATION, + }, + } + self.assertEqual(req['data'], SENT) + self._verifyResourceProperties(job, RESOURCE) + + def test_begin_w_bound_client_and_udf(self): + from google.cloud.bigquery._helpers import UDFResource + RESOURCE_URI = 'gs://some-bucket/js/lib.js' + PATH = 'projects/%s/jobs' % self.PROJECT + RESOURCE = self._makeResource() + # Ensure None for missing server-set props + del RESOURCE['statistics']['creationTime'] + del RESOURCE['etag'] + del RESOURCE['selfLink'] + del RESOURCE['user_email'] + conn = _Connection(RESOURCE) + client = _Client(project=self.PROJECT, connection=conn) + job = self._makeOne(self.JOB_NAME, self.QUERY, client, + udf_resources=[ + UDFResource("resourceUri", RESOURCE_URI) + ]) + + job.begin() + + self.assertEqual(len(conn._requested), 1) + req = conn._requested[0] + self.assertEqual(req['method'], 'POST') + self.assertEqual(req['path'], '/%s' % PATH) + self.assertEqual(job.udf_resources, + [UDFResource("resourceUri", RESOURCE_URI)]) + SENT = { + 'jobReference': { + 'projectId': self.PROJECT, + 'jobId': self.JOB_NAME, + }, + 'configuration': { + 'query': { + 'query': self.QUERY, + 'userDefinedFunctionResources': + [{'resourceUri': RESOURCE_URI}] + }, + }, + } + self.assertEqual(req['data'], SENT) + self._verifyResourceProperties(job, RESOURCE) + + def test_exists_miss_w_bound_client(self): + PATH = 'projects/%s/jobs/%s' % (self.PROJECT, self.JOB_NAME) + conn = _Connection() + client = _Client(project=self.PROJECT, connection=conn) + job = self._makeOne(self.JOB_NAME, self.QUERY, client) + + self.assertFalse(job.exists()) + + self.assertEqual(len(conn._requested), 1) + req = conn._requested[0] + self.assertEqual(req['method'], 'GET') + self.assertEqual(req['path'], '/%s' % PATH) + self.assertEqual(req['query_params'], {'fields': 'id'}) + + def test_exists_hit_w_alternate_client(self): + PATH = 'projects/%s/jobs/%s' % (self.PROJECT, self.JOB_NAME) + conn1 = _Connection() + client1 = _Client(project=self.PROJECT, connection=conn1) + conn2 = _Connection({}) + client2 = _Client(project=self.PROJECT, connection=conn2) + job = self._makeOne(self.JOB_NAME, self.QUERY, client1) + + self.assertTrue(job.exists(client=client2)) + + self.assertEqual(len(conn1._requested), 0) + self.assertEqual(len(conn2._requested), 1) + req = conn2._requested[0] + self.assertEqual(req['method'], 'GET') + self.assertEqual(req['path'], '/%s' % PATH) + self.assertEqual(req['query_params'], {'fields': 'id'}) + + def test_reload_w_bound_client(self): + from google.cloud.bigquery.dataset import Dataset + from google.cloud.bigquery.dataset import Table + PATH = 'projects/%s/jobs/%s' % (self.PROJECT, self.JOB_NAME) + DS_NAME = 'DATASET' + DEST_TABLE = 'dest_table' + RESOURCE = self._makeResource() + conn = _Connection(RESOURCE) + client = _Client(project=self.PROJECT, connection=conn) + job = self._makeOne(self.JOB_NAME, self.QUERY, client) + + dataset = Dataset(DS_NAME, client) + table = Table(DEST_TABLE, dataset) + job.destination = table + + job.reload() + + self.assertIsNone(job.destination) + + self.assertEqual(len(conn._requested), 1) + req = conn._requested[0] + self.assertEqual(req['method'], 'GET') + self.assertEqual(req['path'], '/%s' % PATH) + self._verifyResourceProperties(job, RESOURCE) + + def test_reload_w_alternate_client(self): + PATH = 'projects/%s/jobs/%s' % (self.PROJECT, self.JOB_NAME) + DS_NAME = 'DATASET' + DEST_TABLE = 'dest_table' + RESOURCE = self._makeResource() + q_config = RESOURCE['configuration']['query'] + q_config['destinationTable'] = { + 'projectId': self.PROJECT, + 'datasetId': DS_NAME, + 'tableId': DEST_TABLE, + } + conn1 = _Connection() + client1 = _Client(project=self.PROJECT, connection=conn1) + conn2 = _Connection(RESOURCE) + client2 = _Client(project=self.PROJECT, connection=conn2) + job = self._makeOne(self.JOB_NAME, self.QUERY, client1) + + job.reload(client=client2) + + self.assertEqual(len(conn1._requested), 0) + self.assertEqual(len(conn2._requested), 1) + req = conn2._requested[0] + self.assertEqual(req['method'], 'GET') + self.assertEqual(req['path'], '/%s' % PATH) + self._verifyResourceProperties(job, RESOURCE) + + +class _Client(object): + + def __init__(self, project='project', connection=None): + self.project = project + self.connection = connection + + def dataset(self, name): + from google.cloud.bigquery.dataset import Dataset + return Dataset(name, client=self) + + +class _Table(object): + + def __init__(self, name=None): + self._name = name + + @property + def name(self): + if self._name is not None: + return self._name + return TestLoadTableFromStorageJob.TABLE_NAME + + @property + def project(self): + return TestLoadTableFromStorageJob.PROJECT + + @property + def dataset_name(self): + return TestLoadTableFromStorageJob.DS_NAME + + +class _Connection(object): + + def __init__(self, *responses): + self._responses = responses + self._requested = [] + + def api_request(self, **kw): + from google.cloud.exceptions import NotFound + self._requested.append(kw) + + try: + response, self._responses = self._responses[0], self._responses[1:] + except: + raise NotFound('miss') + else: + return response diff --git a/packages/google-cloud-bigquery/unit_tests/test_query.py b/packages/google-cloud-bigquery/unit_tests/test_query.py new file mode 100644 index 000000000000..9bcb865df5a7 --- /dev/null +++ b/packages/google-cloud-bigquery/unit_tests/test_query.py @@ -0,0 +1,458 @@ +# Copyright 2015 Google Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import unittest + + +class TestQueryResults(unittest.TestCase): + PROJECT = 'project' + JOB_NAME = 'job_name' + JOB_NAME = 'test-synchronous-query' + JOB_TYPE = 'query' + QUERY = 'select count(*) from persons' + TOKEN = 'TOKEN' + + def _getTargetClass(self): + from google.cloud.bigquery.query import QueryResults + return QueryResults + + def _makeOne(self, *args, **kw): + return self._getTargetClass()(*args, **kw) + + def _makeResource(self, complete=False): + resource = { + 'jobReference': { + 'projectId': self.PROJECT, + 'jobId': self.JOB_NAME, + }, + 'jobComplete': complete, + 'errors': [], + 'schema': { + 'fields': [ + {'name': 'full_name', 'type': 'STRING', 'mode': 'REQURED'}, + {'name': 'age', 'type': 'INTEGER', 'mode': 'REQURED'}, + ], + }, + } + + if complete: + resource['totalRows'] = '1000' + resource['rows'] = [ + {'f': [ + {'v': 'Phred Phlyntstone'}, + {'v': 32}, + ]}, + {'f': [ + {'v': 'Bharney Rhubble'}, + {'v': 33}, + ]}, + {'f': [ + {'v': 'Wylma Phlyntstone'}, + {'v': 29}, + ]}, + {'f': [ + {'v': 'Bhettye Rhubble'}, + {'v': 27}, + ]}, + ] + resource['pageToken'] = self.TOKEN + resource['totalBytesProcessed'] = 100000 + resource['cacheHit'] = False + + return resource + + def _verifySchema(self, query, resource): + from google.cloud.bigquery.schema import SchemaField + if 'schema' in resource: + fields = resource['schema']['fields'] + self.assertEqual(len(query.schema), len(fields)) + for found, expected in zip(query.schema, fields): + self.assertIsInstance(found, SchemaField) + self.assertEqual(found.name, expected['name']) + self.assertEqual(found.field_type, expected['type']) + self.assertEqual(found.mode, expected['mode']) + self.assertEqual(found.description, + expected.get('description')) + self.assertEqual(found.fields, expected.get('fields')) + else: + self.assertIsNone(query.schema) + + def _verifyRows(self, query, resource): + expected = resource.get('rows') + if expected is None: + self.assertEqual(query.rows, []) + else: + found = query.rows + self.assertEqual(len(found), len(expected)) + for f_row, e_row in zip(found, expected): + self.assertEqual(f_row, + tuple([cell['v'] for cell in e_row['f']])) + + def _verifyResourceProperties(self, query, resource): + self.assertEqual(query.cache_hit, resource.get('cacheHit')) + self.assertEqual(query.complete, resource.get('jobComplete')) + self.assertEqual(query.errors, resource.get('errors')) + self.assertEqual(query.page_token, resource.get('pageToken')) + self.assertEqual(query.total_rows, resource.get('totalRows')) + self.assertEqual(query.total_bytes_processed, + resource.get('totalBytesProcessed')) + + if 'jobReference' in resource: + self.assertEqual(query.name, resource['jobReference']['jobId']) + else: + self.assertIsNone(query.name) + + self._verifySchema(query, resource) + self._verifyRows(query, resource) + + def test_ctor(self): + client = _Client(self.PROJECT) + query = self._makeOne(self.QUERY, client) + self.assertEqual(query.query, self.QUERY) + self.assertIs(query._client, client) + + self.assertIsNone(query.cache_hit) + self.assertIsNone(query.complete) + self.assertIsNone(query.errors) + self.assertIsNone(query.name) + self.assertIsNone(query.page_token) + self.assertEqual(query.rows, []) + self.assertIsNone(query.schema) + self.assertIsNone(query.total_rows) + self.assertIsNone(query.total_bytes_processed) + + self.assertIsNone(query.default_dataset) + self.assertIsNone(query.max_results) + self.assertIsNone(query.preserve_nulls) + self.assertIsNone(query.use_query_cache) + self.assertIsNone(query.use_legacy_sql) + + def test_from_query_job(self): + from google.cloud.bigquery.dataset import Dataset + from google.cloud.bigquery.job import QueryJob + from google.cloud.bigquery._helpers import UDFResource + DS_NAME = 'DATASET' + RESOURCE_URI = 'gs://some-bucket/js/lib.js' + client = _Client(self.PROJECT) + job = QueryJob( + self.JOB_NAME, self.QUERY, client, + udf_resources=[UDFResource("resourceUri", RESOURCE_URI)]) + dataset = job.default_dataset = Dataset(DS_NAME, client) + job.use_query_cache = True + job.use_legacy_sql = True + klass = self._getTargetClass() + + query = klass.from_query_job(job) + + self.assertEqual(query.name, self.JOB_NAME) + self.assertEqual(query.query, self.QUERY) + self.assertIs(query._client, client) + self.assertIs(query._job, job) + self.assertEqual(query.udf_resources, job.udf_resources) + self.assertIs(query.default_dataset, dataset) + self.assertTrue(query.use_query_cache) + self.assertTrue(query.use_legacy_sql) + + def test_from_query_job_wo_default_dataset(self): + from google.cloud.bigquery.job import QueryJob + from google.cloud.bigquery._helpers import UDFResource + RESOURCE_URI = 'gs://some-bucket/js/lib.js' + client = _Client(self.PROJECT) + job = QueryJob( + self.JOB_NAME, self.QUERY, client, + udf_resources=[UDFResource("resourceUri", RESOURCE_URI)]) + klass = self._getTargetClass() + + query = klass.from_query_job(job) + + self.assertEqual(query.query, self.QUERY) + self.assertIs(query._client, client) + self.assertIs(query._job, job) + self.assertEqual(query.udf_resources, job.udf_resources) + self.assertIsNone(query.default_dataset) + self.assertIsNone(query.use_query_cache) + self.assertIsNone(query.use_legacy_sql) + + def test_job_wo_jobid(self): + client = _Client(self.PROJECT) + query = self._makeOne(self.QUERY, client) + self.assertIsNone(query.job) + + def test_job_w_jobid(self): + from google.cloud.bigquery.job import QueryJob + SERVER_GENERATED = 'SERVER_GENERATED' + client = _Client(self.PROJECT) + query = self._makeOne(self.QUERY, client) + query._properties['jobReference'] = { + 'projectId': self.PROJECT, + 'jobId': SERVER_GENERATED, + } + job = query.job + self.assertIsInstance(job, QueryJob) + self.assertEqual(job.query, self.QUERY) + self.assertIs(job._client, client) + self.assertEqual(job.name, SERVER_GENERATED) + fetched_later = query.job + self.assertIs(fetched_later, job) + + def test_schema(self): + client = _Client(self.PROJECT) + query = self._makeOne(self.QUERY, client) + self._verifyResourceProperties(query, {}) + resource = { + 'schema': { + 'fields': [ + {'name': 'full_name', 'type': 'STRING', 'mode': 'REQURED'}, + {'name': 'age', 'type': 'INTEGER', 'mode': 'REQURED'}, + ], + }, + } + query._set_properties(resource) + self._verifyResourceProperties(query, resource) + + def test_run_w_already_has_job(self): + conn = _Connection() + client = _Client(project=self.PROJECT, connection=conn) + query = self._makeOne(self.QUERY, client) + query._job = object() # simulate already running + with self.assertRaises(ValueError): + query.run() + + def test_run_w_bound_client(self): + PATH = 'projects/%s/queries' % self.PROJECT + RESOURCE = self._makeResource(complete=False) + conn = _Connection(RESOURCE) + client = _Client(project=self.PROJECT, connection=conn) + query = self._makeOne(self.QUERY, client) + self.assertEqual(query.udf_resources, []) + query.run() + + self.assertEqual(len(conn._requested), 1) + req = conn._requested[0] + self.assertEqual(req['method'], 'POST') + self.assertEqual(req['path'], '/%s' % PATH) + SENT = {'query': self.QUERY} + self.assertEqual(req['data'], SENT) + self._verifyResourceProperties(query, RESOURCE) + + def test_run_w_alternate_client(self): + PATH = 'projects/%s/queries' % self.PROJECT + RESOURCE = self._makeResource(complete=True) + DATASET = 'test_dataset' + conn1 = _Connection() + client1 = _Client(project=self.PROJECT, connection=conn1) + conn2 = _Connection(RESOURCE) + client2 = _Client(project=self.PROJECT, connection=conn2) + query = self._makeOne(self.QUERY, client1) + + query.default_dataset = client2.dataset(DATASET) + query.max_results = 100 + query.preserve_nulls = True + query.timeout_ms = 20000 + query.use_query_cache = False + query.use_legacy_sql = True + query.dry_run = True + + query.run(client=client2) + + self.assertEqual(len(conn1._requested), 0) + self.assertEqual(len(conn2._requested), 1) + req = conn2._requested[0] + self.assertEqual(req['method'], 'POST') + self.assertEqual(req['path'], '/%s' % PATH) + SENT = { + 'query': self.QUERY, + 'defaultDataset': { + 'projectId': self.PROJECT, + 'datasetId': DATASET, + }, + 'dryRun': True, + 'maxResults': 100, + 'preserveNulls': True, + 'timeoutMs': 20000, + 'useQueryCache': False, + 'useLegacySql': True, + } + self.assertEqual(req['data'], SENT) + self._verifyResourceProperties(query, RESOURCE) + + def test_run_w_inline_udf(self): + from google.cloud.bigquery._helpers import UDFResource + INLINE_UDF_CODE = 'var someCode = "here";' + PATH = 'projects/%s/queries' % self.PROJECT + RESOURCE = self._makeResource(complete=False) + conn = _Connection(RESOURCE) + client = _Client(project=self.PROJECT, connection=conn) + query = self._makeOne(self.QUERY, client) + query.udf_resources = [UDFResource("inlineCode", INLINE_UDF_CODE)] + + query.run() + + self.assertEqual(len(conn._requested), 1) + req = conn._requested[0] + self.assertEqual(req['method'], 'POST') + self.assertEqual(req['path'], '/%s' % PATH) + SENT = {'query': self.QUERY, + 'userDefinedFunctionResources': + [{'inlineCode': INLINE_UDF_CODE}]} + self.assertEqual(req['data'], SENT) + self._verifyResourceProperties(query, RESOURCE) + + def test_run_w_udf_resource_uri(self): + from google.cloud.bigquery._helpers import UDFResource + RESOURCE_URI = 'gs://some-bucket/js/lib.js' + PATH = 'projects/%s/queries' % self.PROJECT + RESOURCE = self._makeResource(complete=False) + conn = _Connection(RESOURCE) + client = _Client(project=self.PROJECT, connection=conn) + query = self._makeOne(self.QUERY, client) + query.udf_resources = [UDFResource("resourceUri", RESOURCE_URI)] + + query.run() + + self.assertEqual(len(conn._requested), 1) + req = conn._requested[0] + self.assertEqual(req['method'], 'POST') + self.assertEqual(req['path'], '/%s' % PATH) + SENT = {'query': self.QUERY, + 'userDefinedFunctionResources': + [{'resourceUri': RESOURCE_URI}]} + self.assertEqual(req['data'], SENT) + self._verifyResourceProperties(query, RESOURCE) + + def test_run_w_mixed_udfs(self): + from google.cloud.bigquery._helpers import UDFResource + RESOURCE_URI = 'gs://some-bucket/js/lib.js' + INLINE_UDF_CODE = 'var someCode = "here";' + PATH = 'projects/%s/queries' % self.PROJECT + RESOURCE = self._makeResource(complete=False) + conn = _Connection(RESOURCE) + client = _Client(project=self.PROJECT, connection=conn) + query = self._makeOne(self.QUERY, client) + query.udf_resources = [UDFResource("resourceUri", RESOURCE_URI), + UDFResource("inlineCode", INLINE_UDF_CODE)] + + query.run() + + self.assertEqual(len(conn._requested), 1) + req = conn._requested[0] + self.assertEqual(req['method'], 'POST') + self.assertEqual(req['path'], '/%s' % PATH) + self.assertEqual(query.udf_resources, + [UDFResource("resourceUri", RESOURCE_URI), + UDFResource("inlineCode", INLINE_UDF_CODE)]) + SENT = {'query': self.QUERY, + 'userDefinedFunctionResources': [ + {'resourceUri': RESOURCE_URI}, + {"inlineCode": INLINE_UDF_CODE}]} + self.assertEqual(req['data'], SENT) + self._verifyResourceProperties(query, RESOURCE) + + def test_fetch_data_query_not_yet_run(self): + conn = _Connection() + client = _Client(project=self.PROJECT, connection=conn) + query = self._makeOne(self.QUERY, client) + self.assertRaises(ValueError, query.fetch_data) + + def test_fetch_data_w_bound_client(self): + PATH = 'projects/%s/queries/%s' % (self.PROJECT, self.JOB_NAME) + BEFORE = self._makeResource(complete=False) + AFTER = self._makeResource(complete=True) + del AFTER['totalRows'] + + conn = _Connection(AFTER) + client = _Client(project=self.PROJECT, connection=conn) + query = self._makeOne(self.QUERY, client) + query._set_properties(BEFORE) + self.assertFalse(query.complete) + + rows, total_rows, page_token = query.fetch_data() + + self.assertTrue(query.complete) + self.assertEqual(len(rows), 4) + self.assertEqual(rows[0], ('Phred Phlyntstone', 32)) + self.assertEqual(rows[1], ('Bharney Rhubble', 33)) + self.assertEqual(rows[2], ('Wylma Phlyntstone', 29)) + self.assertEqual(rows[3], ('Bhettye Rhubble', 27)) + self.assertIsNone(total_rows) + self.assertEqual(page_token, AFTER['pageToken']) + + self.assertEqual(len(conn._requested), 1) + req = conn._requested[0] + self.assertEqual(req['method'], 'GET') + self.assertEqual(req['path'], '/%s' % PATH) + + def test_fetch_data_w_alternate_client(self): + PATH = 'projects/%s/queries/%s' % (self.PROJECT, self.JOB_NAME) + MAX = 10 + TOKEN = 'TOKEN' + START = 2257 + TIMEOUT = 20000 + BEFORE = self._makeResource(complete=False) + AFTER = self._makeResource(complete=True) + + conn1 = _Connection() + client1 = _Client(project=self.PROJECT, connection=conn1) + conn2 = _Connection(AFTER) + client2 = _Client(project=self.PROJECT, connection=conn2) + query = self._makeOne(self.QUERY, client1) + query._set_properties(BEFORE) + self.assertFalse(query.complete) + + rows, total_rows, page_token = query.fetch_data( + client=client2, max_results=MAX, page_token=TOKEN, + start_index=START, timeout_ms=TIMEOUT) + + self.assertTrue(query.complete) + self.assertEqual(len(rows), 4) + self.assertEqual(rows[0], ('Phred Phlyntstone', 32)) + self.assertEqual(rows[1], ('Bharney Rhubble', 33)) + self.assertEqual(rows[2], ('Wylma Phlyntstone', 29)) + self.assertEqual(rows[3], ('Bhettye Rhubble', 27)) + self.assertEqual(total_rows, int(AFTER['totalRows'])) + self.assertEqual(page_token, AFTER['pageToken']) + + self.assertEqual(len(conn1._requested), 0) + self.assertEqual(len(conn2._requested), 1) + req = conn2._requested[0] + self.assertEqual(req['method'], 'GET') + self.assertEqual(req['path'], '/%s' % PATH) + self.assertEqual(req['query_params'], + {'maxResults': MAX, + 'pageToken': TOKEN, + 'startIndex': START, + 'timeoutMs': TIMEOUT}) + + +class _Client(object): + + def __init__(self, project='project', connection=None): + self.project = project + self.connection = connection + + def dataset(self, name): + from google.cloud.bigquery.dataset import Dataset + return Dataset(name, client=self) + + +class _Connection(object): + + def __init__(self, *responses): + self._responses = responses + self._requested = [] + + def api_request(self, **kw): + self._requested.append(kw) + response, self._responses = self._responses[0], self._responses[1:] + return response diff --git a/packages/google-cloud-bigquery/unit_tests/test_schema.py b/packages/google-cloud-bigquery/unit_tests/test_schema.py new file mode 100644 index 000000000000..a8272728b742 --- /dev/null +++ b/packages/google-cloud-bigquery/unit_tests/test_schema.py @@ -0,0 +1,110 @@ +# Copyright 2015 Google Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import unittest + + +class TestSchemaField(unittest.TestCase): + + def _getTargetClass(self): + from google.cloud.bigquery.schema import SchemaField + return SchemaField + + def _makeOne(self, *args, **kw): + return self._getTargetClass()(*args, **kw) + + def test_ctor_defaults(self): + field = self._makeOne('test', 'STRING') + self.assertEqual(field.name, 'test') + self.assertEqual(field.field_type, 'STRING') + self.assertEqual(field.mode, 'NULLABLE') + self.assertIsNone(field.description) + self.assertIsNone(field.fields) + + def test_ctor_explicit(self): + field = self._makeOne('test', 'STRING', mode='REQUIRED', + description='Testing') + self.assertEqual(field.name, 'test') + self.assertEqual(field.field_type, 'STRING') + self.assertEqual(field.mode, 'REQUIRED') + self.assertEqual(field.description, 'Testing') + self.assertIsNone(field.fields) + + def test_ctor_subfields(self): + field = self._makeOne('phone_number', 'RECORD', + fields=[self._makeOne('area_code', 'STRING'), + self._makeOne('local_number', 'STRING')]) + self.assertEqual(field.name, 'phone_number') + self.assertEqual(field.field_type, 'RECORD') + self.assertEqual(field.mode, 'NULLABLE') + self.assertIsNone(field.description) + self.assertEqual(len(field.fields), 2) + self.assertEqual(field.fields[0].name, 'area_code') + self.assertEqual(field.fields[0].field_type, 'STRING') + self.assertEqual(field.fields[0].mode, 'NULLABLE') + self.assertIsNone(field.fields[0].description) + self.assertIsNone(field.fields[0].fields) + self.assertEqual(field.fields[1].name, 'local_number') + self.assertEqual(field.fields[1].field_type, 'STRING') + self.assertEqual(field.fields[1].mode, 'NULLABLE') + self.assertIsNone(field.fields[1].description) + self.assertIsNone(field.fields[1].fields) + + def test___eq___name_mismatch(self): + field = self._makeOne('test', 'STRING') + other = self._makeOne('other', 'STRING') + self.assertNotEqual(field, other) + + def test___eq___field_type_mismatch(self): + field = self._makeOne('test', 'STRING') + other = self._makeOne('test', 'INTEGER') + self.assertNotEqual(field, other) + + def test___eq___mode_mismatch(self): + field = self._makeOne('test', 'STRING', mode='REQUIRED') + other = self._makeOne('test', 'STRING', mode='NULLABLE') + self.assertNotEqual(field, other) + + def test___eq___description_mismatch(self): + field = self._makeOne('test', 'STRING', description='Testing') + other = self._makeOne('test', 'STRING', description='Other') + self.assertNotEqual(field, other) + + def test___eq___fields_mismatch(self): + sub1 = self._makeOne('sub1', 'STRING') + sub2 = self._makeOne('sub2', 'STRING') + field = self._makeOne('test', 'RECORD', fields=[sub1]) + other = self._makeOne('test', 'RECORD', fields=[sub2]) + self.assertNotEqual(field, other) + + def test___eq___hit(self): + field = self._makeOne('test', 'STRING', mode='REQUIRED', + description='Testing') + other = self._makeOne('test', 'STRING', mode='REQUIRED', + description='Testing') + self.assertEqual(field, other) + + def test___eq___hit_case_diff_on_type(self): + field = self._makeOne('test', 'STRING', mode='REQUIRED', + description='Testing') + other = self._makeOne('test', 'string', mode='REQUIRED', + description='Testing') + self.assertEqual(field, other) + + def test___eq___hit_w_fields(self): + sub1 = self._makeOne('sub1', 'STRING') + sub2 = self._makeOne('sub2', 'STRING') + field = self._makeOne('test', 'RECORD', fields=[sub1, sub2]) + other = self._makeOne('test', 'RECORD', fields=[sub1, sub2]) + self.assertEqual(field, other) diff --git a/packages/google-cloud-bigquery/unit_tests/test_table.py b/packages/google-cloud-bigquery/unit_tests/test_table.py new file mode 100644 index 000000000000..6dc985d89af9 --- /dev/null +++ b/packages/google-cloud-bigquery/unit_tests/test_table.py @@ -0,0 +1,1942 @@ +# Copyright 2015 Google Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import unittest + + +class _SchemaBase(object): + + def _verify_field(self, field, r_field): + self.assertEqual(field.name, r_field['name']) + self.assertEqual(field.field_type, r_field['type']) + self.assertEqual(field.mode, r_field.get('mode', 'NULLABLE')) + + def _verifySchema(self, schema, resource): + r_fields = resource['schema']['fields'] + self.assertEqual(len(schema), len(r_fields)) + + for field, r_field in zip(schema, r_fields): + self._verify_field(field, r_field) + + +class TestTable(unittest.TestCase, _SchemaBase): + PROJECT = 'project' + DS_NAME = 'dataset-name' + TABLE_NAME = 'table-name' + + def _getTargetClass(self): + from google.cloud.bigquery.table import Table + return Table + + def _makeOne(self, *args, **kw): + return self._getTargetClass()(*args, **kw) + + def _setUpConstants(self): + import datetime + from google.cloud._helpers import UTC + + self.WHEN_TS = 1437767599.006 + self.WHEN = datetime.datetime.utcfromtimestamp(self.WHEN_TS).replace( + tzinfo=UTC) + self.ETAG = 'ETAG' + self.TABLE_ID = '%s:%s:%s' % ( + self.PROJECT, self.DS_NAME, self.TABLE_NAME) + self.RESOURCE_URL = 'http://example.com/path/to/resource' + self.NUM_BYTES = 12345 + self.NUM_ROWS = 67 + + def _makeResource(self): + self._setUpConstants() + return { + 'creationTime': self.WHEN_TS * 1000, + 'tableReference': + {'projectId': self.PROJECT, + 'datasetId': self.DS_NAME, + 'tableId': self.TABLE_NAME}, + 'schema': {'fields': [ + {'name': 'full_name', 'type': 'STRING', 'mode': 'REQUIRED'}, + {'name': 'age', 'type': 'INTEGER', 'mode': 'REQUIRED'}]}, + 'etag': 'ETAG', + 'id': self.TABLE_ID, + 'lastModifiedTime': self.WHEN_TS * 1000, + 'location': 'US', + 'selfLink': self.RESOURCE_URL, + 'numRows': self.NUM_ROWS, + 'numBytes': self.NUM_BYTES, + 'type': 'TABLE', + } + + def _verifyReadonlyResourceProperties(self, table, resource): + if 'creationTime' in resource: + self.assertEqual(table.created, self.WHEN) + else: + self.assertIsNone(table.created) + + if 'etag' in resource: + self.assertEqual(table.etag, self.ETAG) + else: + self.assertIsNone(table.etag) + + if 'numRows' in resource: + self.assertEqual(table.num_rows, self.NUM_ROWS) + else: + self.assertIsNone(table.num_rows) + + if 'numBytes' in resource: + self.assertEqual(table.num_bytes, self.NUM_BYTES) + else: + self.assertIsNone(table.num_bytes) + + if 'selfLink' in resource: + self.assertEqual(table.self_link, self.RESOURCE_URL) + else: + self.assertIsNone(table.self_link) + + self.assertEqual(table.table_id, self.TABLE_ID) + self.assertEqual(table.table_type, + 'TABLE' if 'view' not in resource else 'VIEW') + + def _verifyResourceProperties(self, table, resource): + + self._verifyReadonlyResourceProperties(table, resource) + + if 'expirationTime' in resource: + self.assertEqual(table.expires, self.EXP_TIME) + else: + self.assertIsNone(table.expires) + + self.assertEqual(table.description, resource.get('description')) + self.assertEqual(table.friendly_name, resource.get('friendlyName')) + self.assertEqual(table.location, resource.get('location')) + + if 'view' in resource: + self.assertEqual(table.view_query, resource['view']['query']) + else: + self.assertIsNone(table.view_query) + + if 'schema' in resource: + self._verifySchema(table.schema, resource) + else: + self.assertEqual(table.schema, []) + + def test_ctor(self): + client = _Client(self.PROJECT) + dataset = _Dataset(client) + table = self._makeOne(self.TABLE_NAME, dataset) + self.assertEqual(table.name, self.TABLE_NAME) + self.assertIs(table._dataset, dataset) + self.assertEqual(table.project, self.PROJECT) + self.assertEqual(table.dataset_name, self.DS_NAME) + self.assertEqual( + table.path, + '/projects/%s/datasets/%s/tables/%s' % ( + self.PROJECT, self.DS_NAME, self.TABLE_NAME)) + self.assertEqual(table.schema, []) + + self.assertIsNone(table.created) + self.assertIsNone(table.etag) + self.assertIsNone(table.modified) + self.assertIsNone(table.num_bytes) + self.assertIsNone(table.num_rows) + self.assertIsNone(table.self_link) + self.assertIsNone(table.table_id) + self.assertIsNone(table.table_type) + + self.assertIsNone(table.description) + self.assertIsNone(table.expires) + self.assertIsNone(table.friendly_name) + self.assertIsNone(table.location) + self.assertIsNone(table.view_query) + + def test_ctor_w_schema(self): + from google.cloud.bigquery.table import SchemaField + client = _Client(self.PROJECT) + dataset = _Dataset(client) + full_name = SchemaField('full_name', 'STRING', mode='REQUIRED') + age = SchemaField('age', 'INTEGER', mode='REQUIRED') + table = self._makeOne(self.TABLE_NAME, dataset, + schema=[full_name, age]) + self.assertEqual(table.schema, [full_name, age]) + + def test_num_bytes_getter(self): + client = _Client(self.PROJECT) + dataset = _Dataset(client) + table = self._makeOne(self.TABLE_NAME, dataset) + + # Check with no value set. + self.assertIsNone(table.num_bytes) + + num_bytes = 1337 + # Check with integer value set. + table._properties = {'numBytes': num_bytes} + self.assertEqual(table.num_bytes, num_bytes) + + # Check with a string value set. + table._properties = {'numBytes': str(num_bytes)} + self.assertEqual(table.num_bytes, num_bytes) + + # Check with invalid int value. + table._properties = {'numBytes': 'x'} + with self.assertRaises(ValueError): + getattr(table, 'num_bytes') + + def test_num_rows_getter(self): + client = _Client(self.PROJECT) + dataset = _Dataset(client) + table = self._makeOne(self.TABLE_NAME, dataset) + + # Check with no value set. + self.assertIsNone(table.num_rows) + + num_rows = 42 + # Check with integer value set. + table._properties = {'numRows': num_rows} + self.assertEqual(table.num_rows, num_rows) + + # Check with a string value set. + table._properties = {'numRows': str(num_rows)} + self.assertEqual(table.num_rows, num_rows) + + # Check with invalid int value. + table._properties = {'numRows': 'x'} + with self.assertRaises(ValueError): + getattr(table, 'num_rows') + + def test_schema_setter_non_list(self): + client = _Client(self.PROJECT) + dataset = _Dataset(client) + table = self._makeOne(self.TABLE_NAME, dataset) + with self.assertRaises(TypeError): + table.schema = object() + + def test_schema_setter_invalid_field(self): + from google.cloud.bigquery.table import SchemaField + client = _Client(self.PROJECT) + dataset = _Dataset(client) + table = self._makeOne(self.TABLE_NAME, dataset) + full_name = SchemaField('full_name', 'STRING', mode='REQUIRED') + with self.assertRaises(ValueError): + table.schema = [full_name, object()] + + def test_schema_setter(self): + from google.cloud.bigquery.table import SchemaField + client = _Client(self.PROJECT) + dataset = _Dataset(client) + table = self._makeOne(self.TABLE_NAME, dataset) + full_name = SchemaField('full_name', 'STRING', mode='REQUIRED') + age = SchemaField('age', 'INTEGER', mode='REQUIRED') + table.schema = [full_name, age] + self.assertEqual(table.schema, [full_name, age]) + + def test_props_set_by_server(self): + import datetime + from google.cloud._helpers import UTC + from google.cloud._helpers import _millis + + CREATED = datetime.datetime(2015, 7, 29, 12, 13, 22, tzinfo=UTC) + MODIFIED = datetime.datetime(2015, 7, 29, 14, 47, 15, tzinfo=UTC) + TABLE_ID = '%s:%s:%s' % ( + self.PROJECT, self.DS_NAME, self.TABLE_NAME) + URL = 'http://example.com/projects/%s/datasets/%s/tables/%s' % ( + self.PROJECT, self.DS_NAME, self.TABLE_NAME) + client = _Client(self.PROJECT) + dataset = _Dataset(client) + table = self._makeOne(self.TABLE_NAME, dataset) + table._properties['creationTime'] = _millis(CREATED) + table._properties['etag'] = 'ETAG' + table._properties['lastModifiedTime'] = _millis(MODIFIED) + table._properties['numBytes'] = 12345 + table._properties['numRows'] = 66 + table._properties['selfLink'] = URL + table._properties['id'] = TABLE_ID + table._properties['type'] = 'TABLE' + + self.assertEqual(table.created, CREATED) + self.assertEqual(table.etag, 'ETAG') + self.assertEqual(table.modified, MODIFIED) + self.assertEqual(table.num_bytes, 12345) + self.assertEqual(table.num_rows, 66) + self.assertEqual(table.self_link, URL) + self.assertEqual(table.table_id, TABLE_ID) + self.assertEqual(table.table_type, 'TABLE') + + def test_description_setter_bad_value(self): + client = _Client(self.PROJECT) + dataset = _Dataset(client) + table = self._makeOne(self.TABLE_NAME, dataset) + with self.assertRaises(ValueError): + table.description = 12345 + + def test_description_setter(self): + client = _Client(self.PROJECT) + dataset = _Dataset(client) + table = self._makeOne(self.TABLE_NAME, dataset) + table.description = 'DESCRIPTION' + self.assertEqual(table.description, 'DESCRIPTION') + + def test_expires_setter_bad_value(self): + client = _Client(self.PROJECT) + dataset = _Dataset(client) + table = self._makeOne(self.TABLE_NAME, dataset) + with self.assertRaises(ValueError): + table.expires = object() + + def test_expires_setter(self): + import datetime + from google.cloud._helpers import UTC + + WHEN = datetime.datetime(2015, 7, 28, 16, 39, tzinfo=UTC) + client = _Client(self.PROJECT) + dataset = _Dataset(client) + table = self._makeOne(self.TABLE_NAME, dataset) + table.expires = WHEN + self.assertEqual(table.expires, WHEN) + + def test_friendly_name_setter_bad_value(self): + client = _Client(self.PROJECT) + dataset = _Dataset(client) + table = self._makeOne(self.TABLE_NAME, dataset) + with self.assertRaises(ValueError): + table.friendly_name = 12345 + + def test_friendly_name_setter(self): + client = _Client(self.PROJECT) + dataset = _Dataset(client) + table = self._makeOne(self.TABLE_NAME, dataset) + table.friendly_name = 'FRIENDLY' + self.assertEqual(table.friendly_name, 'FRIENDLY') + + def test_location_setter_bad_value(self): + client = _Client(self.PROJECT) + dataset = _Dataset(client) + table = self._makeOne(self.TABLE_NAME, dataset) + with self.assertRaises(ValueError): + table.location = 12345 + + def test_location_setter(self): + client = _Client(self.PROJECT) + dataset = _Dataset(client) + table = self._makeOne(self.TABLE_NAME, dataset) + table.location = 'LOCATION' + self.assertEqual(table.location, 'LOCATION') + + def test_view_query_setter_bad_value(self): + client = _Client(self.PROJECT) + dataset = _Dataset(client) + table = self._makeOne(self.TABLE_NAME, dataset) + with self.assertRaises(ValueError): + table.view_query = 12345 + + def test_view_query_setter(self): + client = _Client(self.PROJECT) + dataset = _Dataset(client) + table = self._makeOne(self.TABLE_NAME, dataset) + table.view_query = 'select * from foo' + self.assertEqual(table.view_query, 'select * from foo') + + def test_view_query_deleter(self): + client = _Client(self.PROJECT) + dataset = _Dataset(client) + table = self._makeOne(self.TABLE_NAME, dataset) + table.view_query = 'select * from foo' + del table.view_query + self.assertIsNone(table.view_query) + + def test_from_api_repr_missing_identity(self): + self._setUpConstants() + client = _Client(self.PROJECT) + dataset = _Dataset(client) + RESOURCE = {} + klass = self._getTargetClass() + with self.assertRaises(KeyError): + klass.from_api_repr(RESOURCE, dataset) + + def test_from_api_repr_bare(self): + self._setUpConstants() + client = _Client(self.PROJECT) + dataset = _Dataset(client) + RESOURCE = { + 'id': '%s:%s:%s' % (self.PROJECT, self.DS_NAME, self.TABLE_NAME), + 'tableReference': { + 'projectId': self.PROJECT, + 'datasetId': self.DS_NAME, + 'tableId': self.TABLE_NAME, + }, + 'type': 'TABLE', + } + klass = self._getTargetClass() + table = klass.from_api_repr(RESOURCE, dataset) + self.assertEqual(table.name, self.TABLE_NAME) + self.assertIs(table._dataset, dataset) + self._verifyResourceProperties(table, RESOURCE) + + def test_from_api_repr_w_properties(self): + client = _Client(self.PROJECT) + dataset = _Dataset(client) + RESOURCE = self._makeResource() + klass = self._getTargetClass() + table = klass.from_api_repr(RESOURCE, dataset) + self.assertIs(table._dataset._client, client) + self._verifyResourceProperties(table, RESOURCE) + + def test_create_no_view_query_no_schema(self): + conn = _Connection() + client = _Client(project=self.PROJECT, connection=conn) + dataset = _Dataset(client) + table = self._makeOne(self.TABLE_NAME, dataset) + + with self.assertRaises(ValueError): + table.create() + + def test_create_w_bound_client(self): + from google.cloud.bigquery.table import SchemaField + PATH = 'projects/%s/datasets/%s/tables' % (self.PROJECT, self.DS_NAME) + RESOURCE = self._makeResource() + conn = _Connection(RESOURCE) + client = _Client(project=self.PROJECT, connection=conn) + dataset = _Dataset(client) + full_name = SchemaField('full_name', 'STRING', mode='REQUIRED') + age = SchemaField('age', 'INTEGER', mode='REQUIRED') + table = self._makeOne(self.TABLE_NAME, dataset, + schema=[full_name, age]) + + table.create() + + self.assertEqual(len(conn._requested), 1) + req = conn._requested[0] + self.assertEqual(req['method'], 'POST') + self.assertEqual(req['path'], '/%s' % PATH) + SENT = { + 'tableReference': { + 'projectId': self.PROJECT, + 'datasetId': self.DS_NAME, + 'tableId': self.TABLE_NAME}, + 'schema': {'fields': [ + {'name': 'full_name', 'type': 'STRING', 'mode': 'REQUIRED'}, + {'name': 'age', 'type': 'INTEGER', 'mode': 'REQUIRED'}]}, + } + self.assertEqual(req['data'], SENT) + self._verifyResourceProperties(table, RESOURCE) + + def test_create_w_partition_no_expire(self): + from google.cloud.bigquery.table import SchemaField + PATH = 'projects/%s/datasets/%s/tables' % (self.PROJECT, self.DS_NAME) + RESOURCE = self._makeResource() + conn = _Connection(RESOURCE) + client = _Client(project=self.PROJECT, connection=conn) + dataset = _Dataset(client) + full_name = SchemaField('full_name', 'STRING', mode='REQUIRED') + age = SchemaField('age', 'INTEGER', mode='REQUIRED') + table = self._makeOne(self.TABLE_NAME, dataset, + schema=[full_name, age]) + + self.assertIsNone(table.partitioning_type) + table.partitioning_type = "DAY" + self.assertEqual(table.partitioning_type, "DAY") + table.create() + + self.assertEqual(len(conn._requested), 1) + req = conn._requested[0] + self.assertEqual(req['method'], 'POST') + self.assertEqual(req['path'], '/%s' % PATH) + SENT = { + 'tableReference': { + 'projectId': self.PROJECT, + 'datasetId': self.DS_NAME, + 'tableId': self.TABLE_NAME}, + 'timePartitioning': {'type': 'DAY'}, + 'schema': {'fields': [ + {'name': 'full_name', 'type': 'STRING', 'mode': 'REQUIRED'}, + {'name': 'age', 'type': 'INTEGER', 'mode': 'REQUIRED'}]}, + } + self.assertEqual(req['data'], SENT) + self._verifyResourceProperties(table, RESOURCE) + + def test_create_w_partition_and_expire(self): + from google.cloud.bigquery.table import SchemaField + PATH = 'projects/%s/datasets/%s/tables' % (self.PROJECT, self.DS_NAME) + RESOURCE = self._makeResource() + conn = _Connection(RESOURCE) + client = _Client(project=self.PROJECT, connection=conn) + dataset = _Dataset(client) + full_name = SchemaField('full_name', 'STRING', mode='REQUIRED') + age = SchemaField('age', 'INTEGER', mode='REQUIRED') + table = self._makeOne(self.TABLE_NAME, dataset, + schema=[full_name, age]) + self.assertIsNone(table.partition_expiration) + table.partition_expiration = 100 + self.assertEqual(table.partitioning_type, "DAY") + self.assertEqual(table.partition_expiration, 100) + table.create() + + self.assertEqual(len(conn._requested), 1) + req = conn._requested[0] + self.assertEqual(req['method'], 'POST') + self.assertEqual(req['path'], '/%s' % PATH) + SENT = { + 'tableReference': { + 'projectId': self.PROJECT, + 'datasetId': self.DS_NAME, + 'tableId': self.TABLE_NAME}, + 'timePartitioning': {'type': 'DAY', 'expirationMs': 100}, + 'schema': {'fields': [ + {'name': 'full_name', 'type': 'STRING', 'mode': 'REQUIRED'}, + {'name': 'age', 'type': 'INTEGER', 'mode': 'REQUIRED'}]}, + } + self.assertEqual(req['data'], SENT) + self._verifyResourceProperties(table, RESOURCE) + + def test_partition_type_setter_bad_type(self): + from google.cloud.bigquery.table import SchemaField + RESOURCE = self._makeResource() + conn = _Connection(RESOURCE) + client = _Client(project=self.PROJECT, connection=conn) + dataset = _Dataset(client) + full_name = SchemaField('full_name', 'STRING', mode='REQUIRED') + age = SchemaField('age', 'INTEGER', mode='REQUIRED') + table = self._makeOne(self.TABLE_NAME, dataset, + schema=[full_name, age]) + with self.assertRaises(ValueError): + table.partitioning_type = 123 + + def test_partition_type_setter_unknown_value(self): + from google.cloud.bigquery.table import SchemaField + RESOURCE = self._makeResource() + conn = _Connection(RESOURCE) + client = _Client(project=self.PROJECT, connection=conn) + dataset = _Dataset(client) + full_name = SchemaField('full_name', 'STRING', mode='REQUIRED') + age = SchemaField('age', 'INTEGER', mode='REQUIRED') + table = self._makeOne(self.TABLE_NAME, dataset, + schema=[full_name, age]) + with self.assertRaises(ValueError): + table.partitioning_type = "HASH" + + def test_partition_type_setter_w_known_value(self): + from google.cloud.bigquery.table import SchemaField + RESOURCE = self._makeResource() + conn = _Connection(RESOURCE) + client = _Client(project=self.PROJECT, connection=conn) + dataset = _Dataset(client) + full_name = SchemaField('full_name', 'STRING', mode='REQUIRED') + age = SchemaField('age', 'INTEGER', mode='REQUIRED') + table = self._makeOne(self.TABLE_NAME, dataset, + schema=[full_name, age]) + self.assertIsNone(table.partitioning_type) + table.partitioning_type = 'DAY' + self.assertEqual(table.partitioning_type, 'DAY') + + def test_partition_type_setter_w_none(self): + from google.cloud.bigquery.table import SchemaField + RESOURCE = self._makeResource() + conn = _Connection(RESOURCE) + client = _Client(project=self.PROJECT, connection=conn) + dataset = _Dataset(client) + full_name = SchemaField('full_name', 'STRING', mode='REQUIRED') + age = SchemaField('age', 'INTEGER', mode='REQUIRED') + table = self._makeOne(self.TABLE_NAME, dataset, + schema=[full_name, age]) + table._properties['timePartitioning'] = {'type': 'DAY'} + table.partitioning_type = None + self.assertIsNone(table.partitioning_type) + self.assertFalse('timePartitioning' in table._properties) + + def test_partition_experation_bad_type(self): + from google.cloud.bigquery.table import SchemaField + RESOURCE = self._makeResource() + conn = _Connection(RESOURCE) + client = _Client(project=self.PROJECT, connection=conn) + dataset = _Dataset(client) + full_name = SchemaField('full_name', 'STRING', mode='REQUIRED') + age = SchemaField('age', 'INTEGER', mode='REQUIRED') + table = self._makeOne(self.TABLE_NAME, dataset, + schema=[full_name, age]) + with self.assertRaises(ValueError): + table.partition_expiration = "NEVER" + + def test_partition_expiration_w_integer(self): + from google.cloud.bigquery.table import SchemaField + RESOURCE = self._makeResource() + conn = _Connection(RESOURCE) + client = _Client(project=self.PROJECT, connection=conn) + dataset = _Dataset(client) + full_name = SchemaField('full_name', 'STRING', mode='REQUIRED') + age = SchemaField('age', 'INTEGER', mode='REQUIRED') + table = self._makeOne(self.TABLE_NAME, dataset, + schema=[full_name, age]) + self.assertIsNone(table.partition_expiration) + table.partition_expiration = 100 + self.assertEqual(table.partitioning_type, "DAY") + self.assertEqual(table.partition_expiration, 100) + + def test_partition_expiration_w_none(self): + from google.cloud.bigquery.table import SchemaField + RESOURCE = self._makeResource() + conn = _Connection(RESOURCE) + client = _Client(project=self.PROJECT, connection=conn) + dataset = _Dataset(client) + full_name = SchemaField('full_name', 'STRING', mode='REQUIRED') + age = SchemaField('age', 'INTEGER', mode='REQUIRED') + table = self._makeOne(self.TABLE_NAME, dataset, + schema=[full_name, age]) + self.assertIsNone(table.partition_expiration) + table._properties['timePartitioning'] = { + 'type': 'DAY', + 'expirationMs': 100, + } + table.partition_expiration = None + self.assertEqual(table.partitioning_type, "DAY") + self.assertIsNone(table.partition_expiration) + + def test_partition_expiration_w_none_no_partition_set(self): + from google.cloud.bigquery.table import SchemaField + RESOURCE = self._makeResource() + conn = _Connection(RESOURCE) + client = _Client(project=self.PROJECT, connection=conn) + dataset = _Dataset(client) + full_name = SchemaField('full_name', 'STRING', mode='REQUIRED') + age = SchemaField('age', 'INTEGER', mode='REQUIRED') + table = self._makeOne(self.TABLE_NAME, dataset, + schema=[full_name, age]) + self.assertIsNone(table.partition_expiration) + table.partition_expiration = None + self.assertIsNone(table.partitioning_type) + self.assertIsNone(table.partition_expiration) + + def test_list_partitions(self): + from google.cloud.bigquery.table import SchemaField + conn = _Connection() + client = _Client(project=self.PROJECT, connection=conn) + client._query_results = [(20160804, None), (20160805, None)] + dataset = _Dataset(client) + full_name = SchemaField('full_name', 'STRING', mode='REQUIRED') + age = SchemaField('age', 'INTEGER', mode='REQUIRED') + table = self._makeOne(self.TABLE_NAME, dataset, + schema=[full_name, age]) + self.assertEqual(table.list_partitions(), [20160804, 20160805]) + + def test_create_w_alternate_client(self): + import datetime + from google.cloud._helpers import UTC + from google.cloud._helpers import _millis + from google.cloud.bigquery.table import SchemaField + + PATH = 'projects/%s/datasets/%s/tables' % (self.PROJECT, self.DS_NAME) + DESCRIPTION = 'DESCRIPTION' + TITLE = 'TITLE' + QUERY = 'select fullname, age from person_ages' + RESOURCE = self._makeResource() + RESOURCE['description'] = DESCRIPTION + RESOURCE['friendlyName'] = TITLE + self.EXP_TIME = datetime.datetime(2015, 8, 1, 23, 59, 59, + tzinfo=UTC) + RESOURCE['expirationTime'] = _millis(self.EXP_TIME) + RESOURCE['view'] = {} + RESOURCE['view']['query'] = QUERY + RESOURCE['type'] = 'VIEW' + conn1 = _Connection() + client1 = _Client(project=self.PROJECT, connection=conn1) + conn2 = _Connection(RESOURCE) + client2 = _Client(project=self.PROJECT, connection=conn2) + dataset = _Dataset(client=client1) + full_name = SchemaField('full_name', 'STRING', mode='REQUIRED') + age = SchemaField('age', 'INTEGER', mode='REQUIRED') + table = self._makeOne(self.TABLE_NAME, dataset=dataset, + schema=[full_name, age]) + table.friendly_name = TITLE + table.description = DESCRIPTION + table.view_query = QUERY + + table.create(client=client2) + + self.assertEqual(len(conn1._requested), 0) + self.assertEqual(len(conn2._requested), 1) + req = conn2._requested[0] + self.assertEqual(req['method'], 'POST') + self.assertEqual(req['path'], '/%s' % PATH) + SENT = { + 'tableReference': { + 'projectId': self.PROJECT, + 'datasetId': self.DS_NAME, + 'tableId': self.TABLE_NAME}, + 'description': DESCRIPTION, + 'friendlyName': TITLE, + 'view': {'query': QUERY}, + } + self.assertEqual(req['data'], SENT) + self._verifyResourceProperties(table, RESOURCE) + + def test_create_w_missing_output_properties(self): + # In the wild, the resource returned from 'dataset.create' sometimes + # lacks 'creationTime' / 'lastModifiedTime' + from google.cloud.bigquery.table import SchemaField + PATH = 'projects/%s/datasets/%s/tables' % (self.PROJECT, self.DS_NAME) + RESOURCE = self._makeResource() + del RESOURCE['creationTime'] + del RESOURCE['lastModifiedTime'] + self.WHEN = None + conn = _Connection(RESOURCE) + client = _Client(project=self.PROJECT, connection=conn) + dataset = _Dataset(client) + full_name = SchemaField('full_name', 'STRING', mode='REQUIRED') + age = SchemaField('age', 'INTEGER', mode='REQUIRED') + table = self._makeOne(self.TABLE_NAME, dataset, + schema=[full_name, age]) + + table.create() + + self.assertEqual(len(conn._requested), 1) + req = conn._requested[0] + self.assertEqual(req['method'], 'POST') + self.assertEqual(req['path'], '/%s' % PATH) + SENT = { + 'tableReference': { + 'projectId': self.PROJECT, + 'datasetId': self.DS_NAME, + 'tableId': self.TABLE_NAME}, + 'schema': {'fields': [ + {'name': 'full_name', 'type': 'STRING', 'mode': 'REQUIRED'}, + {'name': 'age', 'type': 'INTEGER', 'mode': 'REQUIRED'}]}, + } + self.assertEqual(req['data'], SENT) + self._verifyResourceProperties(table, RESOURCE) + + def test_exists_miss_w_bound_client(self): + PATH = 'projects/%s/datasets/%s/tables/%s' % ( + self.PROJECT, self.DS_NAME, self.TABLE_NAME) + conn = _Connection() + client = _Client(project=self.PROJECT, connection=conn) + dataset = _Dataset(client) + table = self._makeOne(self.TABLE_NAME, dataset=dataset) + + self.assertFalse(table.exists()) + + self.assertEqual(len(conn._requested), 1) + req = conn._requested[0] + self.assertEqual(req['method'], 'GET') + self.assertEqual(req['path'], '/%s' % PATH) + self.assertEqual(req['query_params'], {'fields': 'id'}) + + def test_exists_hit_w_alternate_client(self): + PATH = 'projects/%s/datasets/%s/tables/%s' % ( + self.PROJECT, self.DS_NAME, self.TABLE_NAME) + conn1 = _Connection() + client1 = _Client(project=self.PROJECT, connection=conn1) + conn2 = _Connection({}) + client2 = _Client(project=self.PROJECT, connection=conn2) + dataset = _Dataset(client1) + table = self._makeOne(self.TABLE_NAME, dataset=dataset) + + self.assertTrue(table.exists(client=client2)) + + self.assertEqual(len(conn1._requested), 0) + self.assertEqual(len(conn2._requested), 1) + req = conn2._requested[0] + self.assertEqual(req['method'], 'GET') + self.assertEqual(req['path'], '/%s' % PATH) + self.assertEqual(req['query_params'], {'fields': 'id'}) + + def test_reload_w_bound_client(self): + PATH = 'projects/%s/datasets/%s/tables/%s' % ( + self.PROJECT, self.DS_NAME, self.TABLE_NAME) + RESOURCE = self._makeResource() + conn = _Connection(RESOURCE) + client = _Client(project=self.PROJECT, connection=conn) + dataset = _Dataset(client) + table = self._makeOne(self.TABLE_NAME, dataset=dataset) + + table.reload() + + self.assertEqual(len(conn._requested), 1) + req = conn._requested[0] + self.assertEqual(req['method'], 'GET') + self.assertEqual(req['path'], '/%s' % PATH) + self._verifyResourceProperties(table, RESOURCE) + + def test_reload_w_alternate_client(self): + PATH = 'projects/%s/datasets/%s/tables/%s' % ( + self.PROJECT, self.DS_NAME, self.TABLE_NAME) + RESOURCE = self._makeResource() + conn1 = _Connection() + client1 = _Client(project=self.PROJECT, connection=conn1) + conn2 = _Connection(RESOURCE) + client2 = _Client(project=self.PROJECT, connection=conn2) + dataset = _Dataset(client1) + table = self._makeOne(self.TABLE_NAME, dataset=dataset) + + table.reload(client=client2) + + self.assertEqual(len(conn1._requested), 0) + self.assertEqual(len(conn2._requested), 1) + req = conn2._requested[0] + self.assertEqual(req['method'], 'GET') + self.assertEqual(req['path'], '/%s' % PATH) + self._verifyResourceProperties(table, RESOURCE) + + def test_patch_w_invalid_expiration(self): + RESOURCE = self._makeResource() + conn = _Connection(RESOURCE) + client = _Client(project=self.PROJECT, connection=conn) + dataset = _Dataset(client) + table = self._makeOne(self.TABLE_NAME, dataset=dataset) + + with self.assertRaises(ValueError): + table.patch(expires='BOGUS') + + def test_patch_w_bound_client(self): + PATH = 'projects/%s/datasets/%s/tables/%s' % ( + self.PROJECT, self.DS_NAME, self.TABLE_NAME) + DESCRIPTION = 'DESCRIPTION' + TITLE = 'TITLE' + RESOURCE = self._makeResource() + RESOURCE['description'] = DESCRIPTION + RESOURCE['friendlyName'] = TITLE + conn = _Connection(RESOURCE) + client = _Client(project=self.PROJECT, connection=conn) + dataset = _Dataset(client) + table = self._makeOne(self.TABLE_NAME, dataset=dataset) + + table.patch(description=DESCRIPTION, + friendly_name=TITLE, + view_query=None) + + self.assertEqual(len(conn._requested), 1) + req = conn._requested[0] + self.assertEqual(req['method'], 'PATCH') + SENT = { + 'description': DESCRIPTION, + 'friendlyName': TITLE, + 'view': None, + } + self.assertEqual(req['data'], SENT) + self.assertEqual(req['path'], '/%s' % PATH) + self._verifyResourceProperties(table, RESOURCE) + + def test_patch_w_alternate_client(self): + import datetime + from google.cloud._helpers import UTC + from google.cloud._helpers import _millis + from google.cloud.bigquery.table import SchemaField + + PATH = 'projects/%s/datasets/%s/tables/%s' % ( + self.PROJECT, self.DS_NAME, self.TABLE_NAME) + QUERY = 'select fullname, age from person_ages' + LOCATION = 'EU' + RESOURCE = self._makeResource() + RESOURCE['view'] = {'query': QUERY} + RESOURCE['type'] = 'VIEW' + RESOURCE['location'] = LOCATION + self.EXP_TIME = datetime.datetime(2015, 8, 1, 23, 59, 59, + tzinfo=UTC) + RESOURCE['expirationTime'] = _millis(self.EXP_TIME) + conn1 = _Connection() + client1 = _Client(project=self.PROJECT, connection=conn1) + conn2 = _Connection(RESOURCE) + client2 = _Client(project=self.PROJECT, connection=conn2) + dataset = _Dataset(client1) + table = self._makeOne(self.TABLE_NAME, dataset=dataset) + full_name = SchemaField('full_name', 'STRING', mode='REQUIRED') + age = SchemaField('age', 'INTEGER', mode='NULLABLE') + + table.patch(client=client2, view_query=QUERY, location=LOCATION, + expires=self.EXP_TIME, schema=[full_name, age]) + + self.assertEqual(len(conn1._requested), 0) + self.assertEqual(len(conn2._requested), 1) + req = conn2._requested[0] + self.assertEqual(req['method'], 'PATCH') + self.assertEqual(req['path'], '/%s' % PATH) + SENT = { + 'view': {'query': QUERY}, + 'location': LOCATION, + 'expirationTime': _millis(self.EXP_TIME), + 'schema': {'fields': [ + {'name': 'full_name', 'type': 'STRING', 'mode': 'REQUIRED'}, + {'name': 'age', 'type': 'INTEGER', 'mode': 'NULLABLE'}]}, + } + self.assertEqual(req['data'], SENT) + self._verifyResourceProperties(table, RESOURCE) + + def test_patch_w_schema_None(self): + # Simulate deleting schema: not sure if back-end will actually + # allow this operation, but the spec says it is optional. + PATH = 'projects/%s/datasets/%s/tables/%s' % ( + self.PROJECT, self.DS_NAME, self.TABLE_NAME) + DESCRIPTION = 'DESCRIPTION' + TITLE = 'TITLE' + RESOURCE = self._makeResource() + RESOURCE['description'] = DESCRIPTION + RESOURCE['friendlyName'] = TITLE + conn = _Connection(RESOURCE) + client = _Client(project=self.PROJECT, connection=conn) + dataset = _Dataset(client) + table = self._makeOne(self.TABLE_NAME, dataset=dataset) + + table.patch(schema=None) + + self.assertEqual(len(conn._requested), 1) + req = conn._requested[0] + self.assertEqual(req['method'], 'PATCH') + SENT = {'schema': None} + self.assertEqual(req['data'], SENT) + self.assertEqual(req['path'], '/%s' % PATH) + self._verifyResourceProperties(table, RESOURCE) + + def test_update_w_bound_client(self): + from google.cloud.bigquery.table import SchemaField + PATH = 'projects/%s/datasets/%s/tables/%s' % ( + self.PROJECT, self.DS_NAME, self.TABLE_NAME) + DESCRIPTION = 'DESCRIPTION' + TITLE = 'TITLE' + RESOURCE = self._makeResource() + RESOURCE['description'] = DESCRIPTION + RESOURCE['friendlyName'] = TITLE + conn = _Connection(RESOURCE) + client = _Client(project=self.PROJECT, connection=conn) + dataset = _Dataset(client) + full_name = SchemaField('full_name', 'STRING', mode='REQUIRED') + age = SchemaField('age', 'INTEGER', mode='REQUIRED') + table = self._makeOne(self.TABLE_NAME, dataset=dataset, + schema=[full_name, age]) + table.description = DESCRIPTION + table.friendly_name = TITLE + + table.update() + + self.assertEqual(len(conn._requested), 1) + req = conn._requested[0] + self.assertEqual(req['method'], 'PUT') + SENT = { + 'tableReference': + {'projectId': self.PROJECT, + 'datasetId': self.DS_NAME, + 'tableId': self.TABLE_NAME}, + 'schema': {'fields': [ + {'name': 'full_name', 'type': 'STRING', 'mode': 'REQUIRED'}, + {'name': 'age', 'type': 'INTEGER', 'mode': 'REQUIRED'}]}, + 'description': DESCRIPTION, + 'friendlyName': TITLE, + } + self.assertEqual(req['data'], SENT) + self.assertEqual(req['path'], '/%s' % PATH) + self._verifyResourceProperties(table, RESOURCE) + + def test_update_w_alternate_client(self): + import datetime + from google.cloud._helpers import UTC + from google.cloud._helpers import _millis + + PATH = 'projects/%s/datasets/%s/tables/%s' % ( + self.PROJECT, self.DS_NAME, self.TABLE_NAME) + DEF_TABLE_EXP = 12345 + LOCATION = 'EU' + QUERY = 'select fullname, age from person_ages' + RESOURCE = self._makeResource() + RESOURCE['defaultTableExpirationMs'] = 12345 + RESOURCE['location'] = LOCATION + self.EXP_TIME = datetime.datetime(2015, 8, 1, 23, 59, 59, + tzinfo=UTC) + RESOURCE['expirationTime'] = _millis(self.EXP_TIME) + RESOURCE['view'] = {'query': QUERY} + RESOURCE['type'] = 'VIEW' + conn1 = _Connection() + client1 = _Client(project=self.PROJECT, connection=conn1) + conn2 = _Connection(RESOURCE) + client2 = _Client(project=self.PROJECT, connection=conn2) + dataset = _Dataset(client1) + table = self._makeOne(self.TABLE_NAME, dataset=dataset) + table.default_table_expiration_ms = DEF_TABLE_EXP + table.location = LOCATION + table.expires = self.EXP_TIME + table.view_query = QUERY + + table.update(client=client2) + + self.assertEqual(len(conn1._requested), 0) + self.assertEqual(len(conn2._requested), 1) + req = conn2._requested[0] + self.assertEqual(req['method'], 'PUT') + self.assertEqual(req['path'], '/%s' % PATH) + SENT = { + 'tableReference': + {'projectId': self.PROJECT, + 'datasetId': self.DS_NAME, + 'tableId': self.TABLE_NAME}, + 'expirationTime': _millis(self.EXP_TIME), + 'location': 'EU', + 'view': {'query': QUERY}, + } + self.assertEqual(req['data'], SENT) + self._verifyResourceProperties(table, RESOURCE) + + def test_delete_w_bound_client(self): + PATH = 'projects/%s/datasets/%s/tables/%s' % ( + self.PROJECT, self.DS_NAME, self.TABLE_NAME) + conn = _Connection({}) + client = _Client(project=self.PROJECT, connection=conn) + dataset = _Dataset(client) + table = self._makeOne(self.TABLE_NAME, dataset=dataset) + + table.delete() + + self.assertEqual(len(conn._requested), 1) + req = conn._requested[0] + self.assertEqual(req['method'], 'DELETE') + self.assertEqual(req['path'], '/%s' % PATH) + + def test_delete_w_alternate_client(self): + PATH = 'projects/%s/datasets/%s/tables/%s' % ( + self.PROJECT, self.DS_NAME, self.TABLE_NAME) + conn1 = _Connection() + client1 = _Client(project=self.PROJECT, connection=conn1) + conn2 = _Connection({}) + client2 = _Client(project=self.PROJECT, connection=conn2) + dataset = _Dataset(client1) + table = self._makeOne(self.TABLE_NAME, dataset=dataset) + + table.delete(client=client2) + + self.assertEqual(len(conn1._requested), 0) + self.assertEqual(len(conn2._requested), 1) + req = conn2._requested[0] + self.assertEqual(req['method'], 'DELETE') + self.assertEqual(req['path'], '/%s' % PATH) + + def test_fetch_data_w_bound_client(self): + import datetime + from google.cloud._helpers import UTC + from google.cloud.bigquery.table import SchemaField + + PATH = 'projects/%s/datasets/%s/tables/%s/data' % ( + self.PROJECT, self.DS_NAME, self.TABLE_NAME) + WHEN_TS = 1437767599.006 + WHEN = datetime.datetime.utcfromtimestamp(WHEN_TS).replace( + tzinfo=UTC) + WHEN_1 = WHEN + datetime.timedelta(seconds=1) + WHEN_2 = WHEN + datetime.timedelta(seconds=2) + ROWS = 1234 + TOKEN = 'TOKEN' + + def _bigquery_timestamp_float_repr(ts_float): + # Preserve microsecond precision for E+09 timestamps + return '%0.15E' % (ts_float,) + + DATA = { + 'totalRows': str(ROWS), + 'pageToken': TOKEN, + 'rows': [ + {'f': [ + {'v': 'Phred Phlyntstone'}, + {'v': '32'}, + {'v': _bigquery_timestamp_float_repr(WHEN_TS)}, + ]}, + {'f': [ + {'v': 'Bharney Rhubble'}, + {'v': '33'}, + {'v': _bigquery_timestamp_float_repr(WHEN_TS + 1)}, + ]}, + {'f': [ + {'v': 'Wylma Phlyntstone'}, + {'v': '29'}, + {'v': _bigquery_timestamp_float_repr(WHEN_TS + 2)}, + ]}, + {'f': [ + {'v': 'Bhettye Rhubble'}, + {'v': None}, + {'v': None}, + ]}, + ] + } + + conn = _Connection(DATA) + client = _Client(project=self.PROJECT, connection=conn) + dataset = _Dataset(client) + full_name = SchemaField('full_name', 'STRING', mode='REQUIRED') + age = SchemaField('age', 'INTEGER', mode='NULLABLE') + joined = SchemaField('joined', 'TIMESTAMP', mode='NULLABLE') + table = self._makeOne(self.TABLE_NAME, dataset=dataset, + schema=[full_name, age, joined]) + + rows, total_rows, page_token = table.fetch_data() + + self.assertEqual(len(rows), 4) + self.assertEqual(rows[0], ('Phred Phlyntstone', 32, WHEN)) + self.assertEqual(rows[1], ('Bharney Rhubble', 33, WHEN_1)) + self.assertEqual(rows[2], ('Wylma Phlyntstone', 29, WHEN_2)) + self.assertEqual(rows[3], ('Bhettye Rhubble', None, None)) + self.assertEqual(total_rows, ROWS) + self.assertEqual(page_token, TOKEN) + + self.assertEqual(len(conn._requested), 1) + req = conn._requested[0] + self.assertEqual(req['method'], 'GET') + self.assertEqual(req['path'], '/%s' % PATH) + + def test_fetch_data_w_alternate_client(self): + from google.cloud.bigquery.table import SchemaField + PATH = 'projects/%s/datasets/%s/tables/%s/data' % ( + self.PROJECT, self.DS_NAME, self.TABLE_NAME) + MAX = 10 + TOKEN = 'TOKEN' + DATA = { + 'rows': [ + {'f': [ + {'v': 'Phred Phlyntstone'}, + {'v': '32'}, + {'v': 'true'}, + {'v': '3.1415926'}, + ]}, + {'f': [ + {'v': 'Bharney Rhubble'}, + {'v': '33'}, + {'v': 'false'}, + {'v': '1.414'}, + ]}, + {'f': [ + {'v': 'Wylma Phlyntstone'}, + {'v': '29'}, + {'v': 'true'}, + {'v': '2.71828'}, + ]}, + {'f': [ + {'v': 'Bhettye Rhubble'}, + {'v': '27'}, + {'v': None}, + {'v': None}, + ]}, + ] + } + conn1 = _Connection() + client1 = _Client(project=self.PROJECT, connection=conn1) + conn2 = _Connection(DATA) + client2 = _Client(project=self.PROJECT, connection=conn2) + dataset = _Dataset(client1) + full_name = SchemaField('full_name', 'STRING', mode='REQUIRED') + age = SchemaField('age', 'INTEGER', mode='REQUIRED') + voter = SchemaField('voter', 'BOOLEAN', mode='NULLABLE') + score = SchemaField('score', 'FLOAT', mode='NULLABLE') + table = self._makeOne(self.TABLE_NAME, dataset=dataset, + schema=[full_name, age, voter, score]) + + rows, total_rows, page_token = table.fetch_data(client=client2, + max_results=MAX, + page_token=TOKEN) + + self.assertEqual(len(rows), 4) + self.assertEqual(rows[0], ('Phred Phlyntstone', 32, True, 3.1415926)) + self.assertEqual(rows[1], ('Bharney Rhubble', 33, False, 1.414)) + self.assertEqual(rows[2], ('Wylma Phlyntstone', 29, True, 2.71828)) + self.assertEqual(rows[3], ('Bhettye Rhubble', 27, None, None)) + self.assertIsNone(total_rows) + self.assertIsNone(page_token) + + self.assertEqual(len(conn1._requested), 0) + self.assertEqual(len(conn2._requested), 1) + req = conn2._requested[0] + self.assertEqual(req['method'], 'GET') + self.assertEqual(req['path'], '/%s' % PATH) + self.assertEqual(req['query_params'], + {'maxResults': MAX, 'pageToken': TOKEN}) + + def test_fetch_data_w_repeated_fields(self): + from google.cloud.bigquery.table import SchemaField + PATH = 'projects/%s/datasets/%s/tables/%s/data' % ( + self.PROJECT, self.DS_NAME, self.TABLE_NAME) + ROWS = 1234 + TOKEN = 'TOKEN' + DATA = { + 'totalRows': ROWS, + 'pageToken': TOKEN, + 'rows': [ + {'f': [ + {'v': ['red', 'green']}, + {'v': [{'f': [{'v': ['1', '2']}, + {'v': ['3.1415', '1.414']}]}]}, + ]}, + ] + } + conn = _Connection(DATA) + client = _Client(project=self.PROJECT, connection=conn) + dataset = _Dataset(client) + full_name = SchemaField('color', 'STRING', mode='REPEATED') + index = SchemaField('index', 'INTEGER', 'REPEATED') + score = SchemaField('score', 'FLOAT', 'REPEATED') + struct = SchemaField('struct', 'RECORD', mode='REPEATED', + fields=[index, score]) + table = self._makeOne(self.TABLE_NAME, dataset=dataset, + schema=[full_name, struct]) + + rows, total_rows, page_token = table.fetch_data() + + self.assertEqual(len(rows), 1) + self.assertEqual(rows[0][0], ['red', 'green']) + self.assertEqual(rows[0][1], [{'index': [1, 2], + 'score': [3.1415, 1.414]}]) + self.assertEqual(total_rows, ROWS) + self.assertEqual(page_token, TOKEN) + + self.assertEqual(len(conn._requested), 1) + req = conn._requested[0] + self.assertEqual(req['method'], 'GET') + self.assertEqual(req['path'], '/%s' % PATH) + + def test_fetch_data_w_record_schema(self): + from google.cloud.bigquery.table import SchemaField + PATH = 'projects/%s/datasets/%s/tables/%s/data' % ( + self.PROJECT, self.DS_NAME, self.TABLE_NAME) + ROWS = 1234 + TOKEN = 'TOKEN' + DATA = { + 'totalRows': ROWS, + 'pageToken': TOKEN, + 'rows': [ + {'f': [ + {'v': 'Phred Phlyntstone'}, + {'v': {'f': [{'v': '800'}, {'v': '555-1212'}, {'v': 1}]}}, + ]}, + {'f': [ + {'v': 'Bharney Rhubble'}, + {'v': {'f': [{'v': '877'}, {'v': '768-5309'}, {'v': 2}]}}, + ]}, + {'f': [ + {'v': 'Wylma Phlyntstone'}, + {'v': None}, + ]}, + ] + } + conn = _Connection(DATA) + client = _Client(project=self.PROJECT, connection=conn) + dataset = _Dataset(client) + full_name = SchemaField('full_name', 'STRING', mode='REQUIRED') + area_code = SchemaField('area_code', 'STRING', 'REQUIRED') + local_number = SchemaField('local_number', 'STRING', 'REQUIRED') + rank = SchemaField('rank', 'INTEGER', 'REQUIRED') + phone = SchemaField('phone', 'RECORD', mode='NULLABLE', + fields=[area_code, local_number, rank]) + table = self._makeOne(self.TABLE_NAME, dataset=dataset, + schema=[full_name, phone]) + + rows, total_rows, page_token = table.fetch_data() + + self.assertEqual(len(rows), 3) + self.assertEqual(rows[0][0], 'Phred Phlyntstone') + self.assertEqual(rows[0][1], {'area_code': '800', + 'local_number': '555-1212', + 'rank': 1}) + self.assertEqual(rows[1][0], 'Bharney Rhubble') + self.assertEqual(rows[1][1], {'area_code': '877', + 'local_number': '768-5309', + 'rank': 2}) + self.assertEqual(rows[2][0], 'Wylma Phlyntstone') + self.assertIsNone(rows[2][1]) + self.assertEqual(total_rows, ROWS) + self.assertEqual(page_token, TOKEN) + + self.assertEqual(len(conn._requested), 1) + req = conn._requested[0] + self.assertEqual(req['method'], 'GET') + self.assertEqual(req['path'], '/%s' % PATH) + + def test_insert_data_w_bound_client(self): + import datetime + from google.cloud._helpers import UTC + from google.cloud._helpers import _microseconds_from_datetime + from google.cloud.bigquery.table import SchemaField + + WHEN_TS = 1437767599.006 + WHEN = datetime.datetime.utcfromtimestamp(WHEN_TS).replace( + tzinfo=UTC) + PATH = 'projects/%s/datasets/%s/tables/%s/insertAll' % ( + self.PROJECT, self.DS_NAME, self.TABLE_NAME) + conn = _Connection({}) + client = _Client(project=self.PROJECT, connection=conn) + dataset = _Dataset(client) + full_name = SchemaField('full_name', 'STRING', mode='REQUIRED') + age = SchemaField('age', 'INTEGER', mode='REQUIRED') + joined = SchemaField('joined', 'TIMESTAMP', mode='NULLABLE') + table = self._makeOne(self.TABLE_NAME, dataset=dataset, + schema=[full_name, age, joined]) + ROWS = [ + ('Phred Phlyntstone', 32, WHEN), + ('Bharney Rhubble', 33, WHEN + datetime.timedelta(seconds=1)), + ('Wylma Phlyntstone', 29, WHEN + datetime.timedelta(seconds=2)), + ('Bhettye Rhubble', 27, None), + ] + + def _row_data(row): + joined = None + if row[2] is not None: + joined = _microseconds_from_datetime(row[2]) * 1e-6 + return {'full_name': row[0], + 'age': row[1], + 'joined': joined} + + SENT = { + 'rows': [{'json': _row_data(row)} for row in ROWS], + } + + errors = table.insert_data(ROWS) + + self.assertEqual(len(errors), 0) + self.assertEqual(len(conn._requested), 1) + req = conn._requested[0] + self.assertEqual(req['method'], 'POST') + self.assertEqual(req['path'], '/%s' % PATH) + self.assertEqual(req['data'], SENT) + + def test_insert_data_w_alternate_client(self): + from google.cloud.bigquery.table import SchemaField + PATH = 'projects/%s/datasets/%s/tables/%s/insertAll' % ( + self.PROJECT, self.DS_NAME, self.TABLE_NAME) + RESPONSE = { + 'insertErrors': [ + {'index': 1, + 'errors': [ + {'reason': 'REASON', + 'location': 'LOCATION', + 'debugInfo': 'INFO', + 'message': 'MESSAGE'} + ]}, + ]} + conn1 = _Connection() + client1 = _Client(project=self.PROJECT, connection=conn1) + conn2 = _Connection(RESPONSE) + client2 = _Client(project=self.PROJECT, connection=conn2) + dataset = _Dataset(client1) + full_name = SchemaField('full_name', 'STRING', mode='REQUIRED') + age = SchemaField('age', 'INTEGER', mode='REQUIRED') + voter = SchemaField('voter', 'BOOLEAN', mode='NULLABLE') + table = self._makeOne(self.TABLE_NAME, dataset=dataset, + schema=[full_name, age, voter]) + ROWS = [ + ('Phred Phlyntstone', 32, True), + ('Bharney Rhubble', 33, False), + ('Wylma Phlyntstone', 29, True), + ('Bhettye Rhubble', 27, True), + ] + + def _row_data(row): + return {'full_name': row[0], 'age': row[1], 'voter': row[2]} + + SENT = { + 'skipInvalidRows': True, + 'ignoreUnknownValues': True, + 'templateSuffix': '20160303', + 'rows': [{'insertId': index, 'json': _row_data(row)} + for index, row in enumerate(ROWS)], + } + + errors = table.insert_data( + client=client2, + rows=ROWS, + row_ids=[index for index, _ in enumerate(ROWS)], + skip_invalid_rows=True, + ignore_unknown_values=True, + template_suffix='20160303', + ) + + self.assertEqual(len(errors), 1) + self.assertEqual(errors[0]['index'], 1) + self.assertEqual(len(errors[0]['errors']), 1) + self.assertEqual(errors[0]['errors'][0], + RESPONSE['insertErrors'][0]['errors'][0]) + + self.assertEqual(len(conn1._requested), 0) + self.assertEqual(len(conn2._requested), 1) + req = conn2._requested[0] + self.assertEqual(req['method'], 'POST') + self.assertEqual(req['path'], '/%s' % PATH) + self.assertEqual(req['data'], SENT) + + def test_insert_data_w_repeated_fields(self): + from google.cloud.bigquery.table import SchemaField + PATH = 'projects/%s/datasets/%s/tables/%s/insertAll' % ( + self.PROJECT, self.DS_NAME, self.TABLE_NAME) + conn = _Connection({}) + client = _Client(project=self.PROJECT, connection=conn) + dataset = _Dataset(client) + full_name = SchemaField('color', 'STRING', mode='REPEATED') + index = SchemaField('index', 'INTEGER', 'REPEATED') + score = SchemaField('score', 'FLOAT', 'REPEATED') + struct = SchemaField('struct', 'RECORD', mode='REPEATED', + fields=[index, score]) + table = self._makeOne(self.TABLE_NAME, dataset=dataset, + schema=[full_name, struct]) + ROWS = [ + (['red', 'green'], [{'index': [1, 2], 'score': [3.1415, 1.414]}]), + ] + + def _row_data(row): + return {'color': row[0], + 'struct': row[1]} + + SENT = { + 'rows': [{'json': _row_data(row)} for row in ROWS], + } + + errors = table.insert_data(ROWS) + + self.assertEqual(len(errors), 0) + self.assertEqual(len(conn._requested), 1) + req = conn._requested[0] + self.assertEqual(req['method'], 'POST') + self.assertEqual(req['path'], '/%s' % PATH) + self.assertEqual(req['data'], SENT) + + def test_insert_data_w_record_schema(self): + from google.cloud.bigquery.table import SchemaField + PATH = 'projects/%s/datasets/%s/tables/%s/insertAll' % ( + self.PROJECT, self.DS_NAME, self.TABLE_NAME) + conn = _Connection({}) + client = _Client(project=self.PROJECT, connection=conn) + dataset = _Dataset(client) + full_name = SchemaField('full_name', 'STRING', mode='REQUIRED') + area_code = SchemaField('area_code', 'STRING', 'REQUIRED') + local_number = SchemaField('local_number', 'STRING', 'REQUIRED') + rank = SchemaField('rank', 'INTEGER', 'REQUIRED') + phone = SchemaField('phone', 'RECORD', mode='NULLABLE', + fields=[area_code, local_number, rank]) + table = self._makeOne(self.TABLE_NAME, dataset=dataset, + schema=[full_name, phone]) + ROWS = [ + ('Phred Phlyntstone', {'area_code': '800', + 'local_number': '555-1212', + 'rank': 1}), + ('Bharney Rhubble', {'area_code': '877', + 'local_number': '768-5309', + 'rank': 2}), + ('Wylma Phlyntstone', None), + ] + + def _row_data(row): + return {'full_name': row[0], + 'phone': row[1]} + + SENT = { + 'rows': [{'json': _row_data(row)} for row in ROWS], + } + + errors = table.insert_data(ROWS) + + self.assertEqual(len(errors), 0) + self.assertEqual(len(conn._requested), 1) + req = conn._requested[0] + self.assertEqual(req['method'], 'POST') + self.assertEqual(req['path'], '/%s' % PATH) + self.assertEqual(req['data'], SENT) + + def test_upload_from_file_text_mode_file_failure(self): + + class TextModeFile(object): + mode = 'r' + + conn = _Connection() + client = _Client(project=self.PROJECT, connection=conn) + dataset = _Dataset(client) + file_obj = TextModeFile() + table = self._makeOne(self.TABLE_NAME, dataset=dataset) + with self.assertRaises(ValueError): + table.upload_from_file(file_obj, 'CSV', size=1234) + + def test_upload_from_file_binary_mode_no_failure(self): + self._upload_from_file_helper(input_file_mode='r+b') + + def test_upload_from_file_size_failure(self): + conn = _Connection() + client = _Client(project=self.PROJECT, connection=conn) + dataset = _Dataset(client) + file_obj = object() + table = self._makeOne(self.TABLE_NAME, dataset=dataset) + with self.assertRaises(ValueError): + table.upload_from_file(file_obj, 'CSV', size=None) + + def test_upload_from_file_multipart_w_400(self): + import csv + import datetime + from six.moves.http_client import BAD_REQUEST + from google.cloud._testing import _NamedTemporaryFile + from google.cloud._helpers import UTC + from google.cloud.exceptions import BadRequest + WHEN_TS = 1437767599.006 + WHEN = datetime.datetime.utcfromtimestamp(WHEN_TS).replace( + tzinfo=UTC) + response = {'status': BAD_REQUEST} + conn = _Connection( + (response, b'{}'), + ) + client = _Client(project=self.PROJECT, connection=conn) + dataset = _Dataset(client) + table = self._makeOne(self.TABLE_NAME, dataset=dataset) + + with _NamedTemporaryFile() as temp: + with open(temp.name, 'w') as file_obj: + writer = csv.writer(file_obj) + writer.writerow(('full_name', 'age', 'joined')) + writer.writerow(('Phred Phlyntstone', 32, WHEN)) + + with open(temp.name, 'rb') as file_obj: + with self.assertRaises(BadRequest): + table.upload_from_file( + file_obj, 'CSV', rewind=True) + + def _upload_from_file_helper(self, **kw): + import csv + import datetime + from six.moves.http_client import OK + from google.cloud._helpers import UTC + from google.cloud._testing import _NamedTemporaryFile + from google.cloud.bigquery.table import SchemaField + + WHEN_TS = 1437767599.006 + WHEN = datetime.datetime.utcfromtimestamp(WHEN_TS).replace( + tzinfo=UTC) + PATH = 'projects/%s/jobs' % (self.PROJECT,) + response = {'status': OK} + conn = _Connection( + (response, b'{}'), + ) + client = _Client(project=self.PROJECT, connection=conn) + expected_job = object() + if 'client' in kw: + kw['client']._job = expected_job + else: + client._job = expected_job + input_file_mode = kw.pop('input_file_mode', 'rb') + dataset = _Dataset(client) + full_name = SchemaField('full_name', 'STRING', mode='REQUIRED') + age = SchemaField('age', 'INTEGER', mode='REQUIRED') + joined = SchemaField('joined', 'TIMESTAMP', mode='NULLABLE') + table = self._makeOne(self.TABLE_NAME, dataset=dataset, + schema=[full_name, age, joined]) + ROWS = [ + ('Phred Phlyntstone', 32, WHEN), + ('Bharney Rhubble', 33, WHEN + datetime.timedelta(seconds=1)), + ('Wylma Phlyntstone', 29, WHEN + datetime.timedelta(seconds=2)), + ('Bhettye Rhubble', 27, None), + ] + + with _NamedTemporaryFile() as temp: + with open(temp.name, 'w') as file_obj: + writer = csv.writer(file_obj) + writer.writerow(('full_name', 'age', 'joined')) + writer.writerows(ROWS) + + with open(temp.name, input_file_mode) as file_obj: + BODY = file_obj.read() + explicit_size = kw.pop('_explicit_size', False) + if explicit_size: + kw['size'] = len(BODY) + job = table.upload_from_file( + file_obj, 'CSV', rewind=True, **kw) + + self.assertIs(job, expected_job) + return conn.http._requested, PATH, BODY + + def test_upload_from_file_w_bound_client_multipart(self): + import json + from six.moves.urllib.parse import parse_qsl + from six.moves.urllib.parse import urlsplit + from google.cloud._helpers import _to_bytes + + requested, PATH, BODY = self._upload_from_file_helper() + parse_chunk = _email_chunk_parser() + + self.assertEqual(len(requested), 1) + req = requested[0] + self.assertEqual(req['method'], 'POST') + uri = req['uri'] + scheme, netloc, path, qs, _ = urlsplit(uri) + self.assertEqual(scheme, 'http') + self.assertEqual(netloc, 'example.com') + self.assertEqual(path, '/%s' % PATH) + self.assertEqual(dict(parse_qsl(qs)), + {'uploadType': 'multipart'}) + + ctype, boundary = [x.strip() + for x in req['headers']['content-type'].split(';')] + self.assertEqual(ctype, 'multipart/related') + self.assertTrue(boundary.startswith('boundary="==')) + self.assertTrue(boundary.endswith('=="')) + + divider = b'--' + _to_bytes(boundary[len('boundary="'):-1]) + chunks = req['body'].split(divider)[1:-1] # discard prolog / epilog + self.assertEqual(len(chunks), 2) + + text_msg = parse_chunk(chunks[0].strip()) + self.assertEqual(dict(text_msg._headers), + {'Content-Type': 'application/json', + 'MIME-Version': '1.0'}) + metadata = json.loads(text_msg._payload) + load_config = metadata['configuration']['load'] + DESTINATION_TABLE = { + 'projectId': self.PROJECT, + 'datasetId': self.DS_NAME, + 'tableId': self.TABLE_NAME, + } + self.assertEqual(load_config['destinationTable'], DESTINATION_TABLE) + self.assertEqual(load_config['sourceFormat'], 'CSV') + + app_msg = parse_chunk(chunks[1].strip()) + self.assertEqual(dict(app_msg._headers), + {'Content-Type': 'application/octet-stream', + 'Content-Transfer-Encoding': 'binary', + 'MIME-Version': '1.0'}) + body = BODY.decode('ascii').rstrip() + body_lines = [line.strip() for line in body.splitlines()] + payload_lines = app_msg._payload.rstrip().splitlines() + self.assertEqual(payload_lines, body_lines) + + def test_upload_from_file_resumable_with_400(self): + import csv + import datetime + from six.moves.http_client import BAD_REQUEST + from google.cloud.bigquery import table as MUT + from google.cloud.exceptions import BadRequest + from google.cloud._helpers import UTC + from google.cloud._testing import _Monkey + from google.cloud._testing import _NamedTemporaryFile + WHEN_TS = 1437767599.006 + WHEN = datetime.datetime.utcfromtimestamp(WHEN_TS).replace( + tzinfo=UTC) + initial_response = {'status': BAD_REQUEST} + conn = _Connection( + (initial_response, b'{}'), + ) + client = _Client(project=self.PROJECT, connection=conn) + + class _UploadConfig(object): + accept = ['*/*'] + max_size = None + resumable_multipart = True + resumable_path = u'/upload/bigquery/v2/projects/{project}/jobs' + simple_multipart = True + simple_path = u'' # force resumable + dataset = _Dataset(client) + table = self._makeOne(self.TABLE_NAME, dataset=dataset) + + with _Monkey(MUT, _UploadConfig=_UploadConfig): + with _NamedTemporaryFile() as temp: + with open(temp.name, 'w') as file_obj: + writer = csv.writer(file_obj) + writer.writerow(('full_name', 'age', 'joined')) + writer.writerow(('Phred Phlyntstone', 32, WHEN)) + + with open(temp.name, 'rb') as file_obj: + with self.assertRaises(BadRequest): + table.upload_from_file( + file_obj, 'CSV', rewind=True) + + # pylint: disable=too-many-statements + def test_upload_from_file_w_explicit_client_resumable(self): + import json + from six.moves.http_client import OK + from six.moves.urllib.parse import parse_qsl + from six.moves.urllib.parse import urlsplit + from google.cloud._testing import _Monkey + from google.cloud.bigquery import table as MUT + + UPLOAD_PATH = 'https://example.com/upload/test' + initial_response = {'status': OK, 'location': UPLOAD_PATH} + upload_response = {'status': OK} + conn = _Connection( + (initial_response, b'{}'), + (upload_response, b'{}'), + ) + client = _Client(project=self.PROJECT, connection=conn) + + class _UploadConfig(object): + accept = ['*/*'] + max_size = None + resumable_multipart = True + resumable_path = u'/upload/bigquery/v2/projects/{project}/jobs' + simple_multipart = True + simple_path = u'' # force resumable + + with _Monkey(MUT, _UploadConfig=_UploadConfig): + orig_requested, PATH, BODY = self._upload_from_file_helper( + allow_jagged_rows=False, + allow_quoted_newlines=False, + create_disposition='CREATE_IF_NEEDED', + encoding='utf8', + field_delimiter=',', + ignore_unknown_values=False, + max_bad_records=0, + quote_character='"', + skip_leading_rows=1, + write_disposition='WRITE_APPEND', + client=client, + _explicit_size=True) + + self.assertEqual(len(orig_requested), 0) + + requested = conn.http._requested + self.assertEqual(len(requested), 2) + req = requested[0] + self.assertEqual(req['method'], 'POST') + uri = req['uri'] + scheme, netloc, path, qs, _ = urlsplit(uri) + self.assertEqual(scheme, 'http') + self.assertEqual(netloc, 'example.com') + self.assertEqual(path, '/%s' % PATH) + self.assertEqual(dict(parse_qsl(qs)), + {'uploadType': 'resumable'}) + + self.assertEqual(req['headers']['content-type'], 'application/json') + metadata = json.loads(req['body']) + load_config = metadata['configuration']['load'] + DESTINATION_TABLE = { + 'projectId': self.PROJECT, + 'datasetId': self.DS_NAME, + 'tableId': self.TABLE_NAME, + } + self.assertEqual(load_config['destinationTable'], DESTINATION_TABLE) + self.assertEqual(load_config['sourceFormat'], 'CSV') + self.assertEqual(load_config['allowJaggedRows'], False) + self.assertEqual(load_config['allowQuotedNewlines'], False) + self.assertEqual(load_config['createDisposition'], 'CREATE_IF_NEEDED') + self.assertEqual(load_config['encoding'], 'utf8') + self.assertEqual(load_config['fieldDelimiter'], ',') + self.assertEqual(load_config['ignoreUnknownValues'], False) + self.assertEqual(load_config['maxBadRecords'], 0) + self.assertEqual(load_config['quote'], '"') + self.assertEqual(load_config['skipLeadingRows'], 1) + self.assertEqual(load_config['writeDisposition'], 'WRITE_APPEND') + + req = requested[1] + self.assertEqual(req['method'], 'PUT') + self.assertEqual(req['uri'], UPLOAD_PATH) + headers = req['headers'] + length = len(BODY) + self.assertEqual(headers['Content-Type'], 'application/octet-stream') + self.assertEqual(headers['Content-Range'], + 'bytes 0-%d/%d' % (length - 1, length)) + self.assertEqual(headers['content-length'], '%d' % (length,)) + self.assertEqual(req['body'], BODY) + # pylint: enable=too-many-statements + + +class Test_parse_schema_resource(unittest.TestCase, _SchemaBase): + + def _callFUT(self, resource): + from google.cloud.bigquery.table import _parse_schema_resource + return _parse_schema_resource(resource) + + def _makeResource(self): + return { + 'schema': {'fields': [ + {'name': 'full_name', 'type': 'STRING', 'mode': 'REQUIRED'}, + {'name': 'age', 'type': 'INTEGER', 'mode': 'REQUIRED'}, + ]}, + } + + def test__parse_schema_resource_defaults(self): + RESOURCE = self._makeResource() + schema = self._callFUT(RESOURCE['schema']) + self._verifySchema(schema, RESOURCE) + + def test__parse_schema_resource_subfields(self): + RESOURCE = self._makeResource() + RESOURCE['schema']['fields'].append( + {'name': 'phone', + 'type': 'RECORD', + 'mode': 'REPEATABLE', + 'fields': [{'name': 'type', + 'type': 'STRING', + 'mode': 'REQUIRED'}, + {'name': 'number', + 'type': 'STRING', + 'mode': 'REQUIRED'}]}) + schema = self._callFUT(RESOURCE['schema']) + self._verifySchema(schema, RESOURCE) + + def test__parse_schema_resource_fields_without_mode(self): + RESOURCE = self._makeResource() + RESOURCE['schema']['fields'].append( + {'name': 'phone', + 'type': 'STRING'}) + + schema = self._callFUT(RESOURCE['schema']) + self._verifySchema(schema, RESOURCE) + + +class Test_build_schema_resource(unittest.TestCase, _SchemaBase): + + def _callFUT(self, resource): + from google.cloud.bigquery.table import _build_schema_resource + return _build_schema_resource(resource) + + def test_defaults(self): + from google.cloud.bigquery.table import SchemaField + full_name = SchemaField('full_name', 'STRING', mode='REQUIRED') + age = SchemaField('age', 'INTEGER', mode='REQUIRED') + resource = self._callFUT([full_name, age]) + self.assertEqual(len(resource), 2) + self.assertEqual(resource[0], + {'name': 'full_name', + 'type': 'STRING', + 'mode': 'REQUIRED'}) + self.assertEqual(resource[1], + {'name': 'age', + 'type': 'INTEGER', + 'mode': 'REQUIRED'}) + + def test_w_description(self): + from google.cloud.bigquery.table import SchemaField + DESCRIPTION = 'DESCRIPTION' + full_name = SchemaField('full_name', 'STRING', mode='REQUIRED', + description=DESCRIPTION) + age = SchemaField('age', 'INTEGER', mode='REQUIRED') + resource = self._callFUT([full_name, age]) + self.assertEqual(len(resource), 2) + self.assertEqual(resource[0], + {'name': 'full_name', + 'type': 'STRING', + 'mode': 'REQUIRED', + 'description': DESCRIPTION}) + self.assertEqual(resource[1], + {'name': 'age', + 'type': 'INTEGER', + 'mode': 'REQUIRED'}) + + def test_w_subfields(self): + from google.cloud.bigquery.table import SchemaField + full_name = SchemaField('full_name', 'STRING', mode='REQUIRED') + ph_type = SchemaField('type', 'STRING', 'REQUIRED') + ph_num = SchemaField('number', 'STRING', 'REQUIRED') + phone = SchemaField('phone', 'RECORD', mode='REPEATABLE', + fields=[ph_type, ph_num]) + resource = self._callFUT([full_name, phone]) + self.assertEqual(len(resource), 2) + self.assertEqual(resource[0], + {'name': 'full_name', + 'type': 'STRING', + 'mode': 'REQUIRED'}) + self.assertEqual(resource[1], + {'name': 'phone', + 'type': 'RECORD', + 'mode': 'REPEATABLE', + 'fields': [{'name': 'type', + 'type': 'STRING', + 'mode': 'REQUIRED'}, + {'name': 'number', + 'type': 'STRING', + 'mode': 'REQUIRED'}]}) + + +class _Client(object): + + _query_results = () + + def __init__(self, project='project', connection=None): + self.project = project + self.connection = connection + + def job_from_resource(self, resource): # pylint: disable=unused-argument + return self._job + + def run_sync_query(self, query): + return _Query(query, self) + + +class _Query(object): + + def __init__(self, query, client): + self.query = query + self.rows = [] + self.client = client + + def run(self): + self.rows = self.client._query_results + + +class _Dataset(object): + + def __init__(self, client, name=TestTable.DS_NAME): + self._client = client + self.name = name + + @property + def path(self): + return '/projects/%s/datasets/%s' % ( + self._client.project, self.name) + + @property + def project(self): + return self._client.project + + +class _Responder(object): + + def __init__(self, *responses): + self._responses = responses[:] + self._requested = [] + + def _respond(self, **kw): + self._requested.append(kw) + response, self._responses = self._responses[0], self._responses[1:] + return response + + +class _HTTP(_Responder): + + connections = {} # For google-apitools debugging. + + def request(self, uri, method, headers, body, **kw): + if hasattr(body, 'read'): + body = body.read() + return self._respond(uri=uri, method=method, headers=headers, + body=body, **kw) + + +class _Connection(_Responder): + + API_BASE_URL = 'http://example.com' + USER_AGENT = 'testing 1.2.3' + + def __init__(self, *responses): + super(_Connection, self).__init__(*responses) + self.http = _HTTP(*responses) + + def api_request(self, **kw): + from google.cloud.exceptions import NotFound + self._requested.append(kw) + + try: + response, self._responses = self._responses[0], self._responses[1:] + except: + raise NotFound('miss') + else: + return response + + def build_api_url(self, path, query_params=None, + api_base_url=API_BASE_URL): + from six.moves.urllib.parse import urlencode + from six.moves.urllib.parse import urlsplit + from six.moves.urllib.parse import urlunsplit + # Mimic the build_api_url interface. + qs = urlencode(query_params or {}) + scheme, netloc, _, _, _ = urlsplit(api_base_url) + return urlunsplit((scheme, netloc, path, qs, '')) + + +def _email_chunk_parser(): + import six + if six.PY3: # pragma: NO COVER Python3 + from email.parser import BytesParser + parser = BytesParser() + return parser.parsebytes + else: + from email.parser import Parser + parser = Parser() + return parser.parsestr From 8afdd62376af120adb04962f8199782b9c254deb Mon Sep 17 00:00:00 2001 From: Danny Hermes Date: Mon, 26 Sep 2016 12:43:30 -0700 Subject: [PATCH 0002/2016] Making bigquery subpackage into a proper package. - Adding README, setup.py, MANIFEST.in, .coveragerc and tox.ini - Adding google-cloud-bigquery as a dependency to the umbrella package - Adding the bigquery subdirectory into the list of packages for verifying the docs - Incorporating the bigquery subdirectory into the umbrella coverage report - Adding the bigquery only tox tests to the Travis config - Adding {toxinidir}/../core as a dependency for the bigquery tox config --- packages/google-cloud-bigquery/.coveragerc | 11 +++ packages/google-cloud-bigquery/MANIFEST.in | 4 + packages/google-cloud-bigquery/README.rst | 89 ++++++++++++++++++++++ packages/google-cloud-bigquery/setup.py | 68 +++++++++++++++++ packages/google-cloud-bigquery/tox.ini | 30 ++++++++ 5 files changed, 202 insertions(+) create mode 100644 packages/google-cloud-bigquery/.coveragerc create mode 100644 packages/google-cloud-bigquery/MANIFEST.in create mode 100644 packages/google-cloud-bigquery/README.rst create mode 100644 packages/google-cloud-bigquery/setup.py create mode 100644 packages/google-cloud-bigquery/tox.ini diff --git a/packages/google-cloud-bigquery/.coveragerc b/packages/google-cloud-bigquery/.coveragerc new file mode 100644 index 000000000000..a54b99aa14b7 --- /dev/null +++ b/packages/google-cloud-bigquery/.coveragerc @@ -0,0 +1,11 @@ +[run] +branch = True + +[report] +fail_under = 100 +show_missing = True +exclude_lines = + # Re-enable the standard pragma + pragma: NO COVER + # Ignore debug-only repr + def __repr__ diff --git a/packages/google-cloud-bigquery/MANIFEST.in b/packages/google-cloud-bigquery/MANIFEST.in new file mode 100644 index 000000000000..cb3a2b9ef4fa --- /dev/null +++ b/packages/google-cloud-bigquery/MANIFEST.in @@ -0,0 +1,4 @@ +include README.rst +graft google +graft unit_tests +global-exclude *.pyc diff --git a/packages/google-cloud-bigquery/README.rst b/packages/google-cloud-bigquery/README.rst new file mode 100644 index 000000000000..202cf807e1ca --- /dev/null +++ b/packages/google-cloud-bigquery/README.rst @@ -0,0 +1,89 @@ +Python Client for Google Cloud BigQuery +======================================= + + Python idiomatic client for `Google Cloud BigQuery`_ + +.. _Google Cloud BigQuery: https://cloud.google.com/bigquery/what-is-bigquery + +- `Homepage`_ +- `API Documentation`_ + +.. _Homepage: https://googlecloudplatform.github.io/google-cloud-python/ +.. _API Documentation: http://googlecloudplatform.github.io/google-cloud-python/ + +Quick Start +----------- + +:: + + $ pip install --upgrade google-cloud-bigquery + +Authentication +-------------- + +With ``google-cloud-python`` we try to make authentication as painless as +possible. Check out the `Authentication section`_ in our documentation to +learn more. You may also find the `authentication document`_ shared by all +the ``google-cloud-*`` libraries to be helpful. + +.. _Authentication section: http://google-cloud-python.readthedocs.io/en/latest/google-cloud-auth.html +.. _authentication document: https://github.com/GoogleCloudPlatform/gcloud-common/tree/master/authentication + +Using the API +------------- + +Querying massive datasets can be time consuming and expensive without the +right hardware and infrastructure. Google `BigQuery`_ (`BigQuery API docs`_) +solves this problem by enabling super-fast, SQL-like queries against +append-only tables, using the processing power of Google's infrastructure. + +.. _BigQuery: https://cloud.google.com/bigquery/what-is-bigquery +.. _BigQuery API docs: https://cloud.google.com/bigquery/docs/reference/v2/ + +Load data from CSV +~~~~~~~~~~~~~~~~~~ + +.. code:: python + + import csv + + from google.cloud import bigquery + from google.cloud.bigquery import SchemaField + + client = bigquery.Client() + + dataset = client.dataset('dataset_name') + dataset.create() # API request + + SCHEMA = [ + SchemaField('full_name', 'STRING', mode='required'), + SchemaField('age', 'INTEGER', mode='required'), + ] + table = dataset.table('table_name', SCHEMA) + table.create() + + with open('csv_file', 'rb') as readable: + table.upload_from_file( + readable, source_format='CSV', skip_leading_rows=1) + +Perform a synchronous query +~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +.. code:: python + + # Perform a synchronous query. + QUERY = ( + 'SELECT name FROM [bigquery-public-data:usa_names.usa_1910_2013] ' + 'WHERE state = "TX"') + query = client.run_sync_query('%s LIMIT 100' % QUERY) + query.timeout_ms = TIMEOUT_MS + query.run() + + for row in query.rows: + print(row) + + +See the ``google-cloud-python`` API `BigQuery documentation`_ to learn how +to connect to BigQuery using this Client Library. + +.. _BigQuery documentation: https://googlecloudplatform.github.io/google-cloud-python/stable/bigquery-usage.html diff --git a/packages/google-cloud-bigquery/setup.py b/packages/google-cloud-bigquery/setup.py new file mode 100644 index 000000000000..2e94a44776e6 --- /dev/null +++ b/packages/google-cloud-bigquery/setup.py @@ -0,0 +1,68 @@ +# Copyright 2016 Google Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os + +from setuptools import find_packages +from setuptools import setup + + +PACKAGE_ROOT = os.path.abspath(os.path.dirname(__file__)) + +with open(os.path.join(PACKAGE_ROOT, 'README.rst')) as file_obj: + README = file_obj.read() + +# NOTE: This is duplicated throughout and we should try to +# consolidate. +SETUP_BASE = { + 'author': 'Google Cloud Platform', + 'author_email': 'jjg+google-cloud-python@google.com', + 'scripts': [], + 'url': 'https://github.com/GoogleCloudPlatform/google-cloud-python', + 'license': 'Apache 2.0', + 'platforms': 'Posix; MacOS X; Windows', + 'include_package_data': True, + 'zip_safe': False, + 'classifiers': [ + 'Development Status :: 4 - Beta', + 'Intended Audience :: Developers', + 'License :: OSI Approved :: Apache Software License', + 'Operating System :: OS Independent', + 'Programming Language :: Python :: 2', + 'Programming Language :: Python :: 2.7', + 'Programming Language :: Python :: 3', + 'Programming Language :: Python :: 3.4', + 'Programming Language :: Python :: 3.5', + 'Topic :: Internet', + ], +} + + +REQUIREMENTS = [ + 'google-cloud-core', +] + +setup( + name='google-cloud-bigquery', + version='0.20.0dev', + description='Python Client for Google BigQuery', + long_description=README, + namespace_packages=[ + 'google', + 'google.cloud', + ], + packages=find_packages(), + install_requires=REQUIREMENTS, + **SETUP_BASE +) diff --git a/packages/google-cloud-bigquery/tox.ini b/packages/google-cloud-bigquery/tox.ini new file mode 100644 index 000000000000..4a5000739647 --- /dev/null +++ b/packages/google-cloud-bigquery/tox.ini @@ -0,0 +1,30 @@ +[tox] +envlist = + py27,py34,py35,cover + +[testing] +deps = + {toxinidir}/../core + pytest +covercmd = + py.test --quiet \ + --cov=google.cloud.bigquery \ + --cov=unit_tests \ + --cov-config {toxinidir}/.coveragerc \ + unit_tests + +[testenv] +commands = + py.test --quiet {posargs} unit_tests +deps = + {[testing]deps} + +[testenv:cover] +basepython = + python2.7 +commands = + {[testing]covercmd} +deps = + {[testenv]deps} + coverage + pytest-cov From c3d2fc580559be0a0e8ce34c9801554bb0b1b98c Mon Sep 17 00:00:00 2001 From: Danny Hermes Date: Tue, 27 Sep 2016 08:53:24 -0700 Subject: [PATCH 0003/2016] Preparing for a release of all packages. Towards #2441. - Updating umbrella README to point at all packages - Putting upper bounds on grpcio in dependencies - Putting lower bounds on all google-cloud-* packages listed as dependencies - Adding `setup.cfg` for universal wheels --- packages/google-cloud-bigquery/setup.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/packages/google-cloud-bigquery/setup.py b/packages/google-cloud-bigquery/setup.py index 2e94a44776e6..a54bfd655f48 100644 --- a/packages/google-cloud-bigquery/setup.py +++ b/packages/google-cloud-bigquery/setup.py @@ -50,12 +50,12 @@ REQUIREMENTS = [ - 'google-cloud-core', + 'google-cloud-core >= 0.20.0', ] setup( name='google-cloud-bigquery', - version='0.20.0dev', + version='0.20.0', description='Python Client for Google BigQuery', long_description=README, namespace_packages=[ From 436a411c9803c89739ae87f05316661a603fba37 Mon Sep 17 00:00:00 2001 From: Danny Hermes Date: Tue, 27 Sep 2016 08:57:43 -0700 Subject: [PATCH 0004/2016] Adding setup.cfg to all packages. --- packages/google-cloud-bigquery/setup.cfg | 2 ++ 1 file changed, 2 insertions(+) create mode 100644 packages/google-cloud-bigquery/setup.cfg diff --git a/packages/google-cloud-bigquery/setup.cfg b/packages/google-cloud-bigquery/setup.cfg new file mode 100644 index 000000000000..2a9acf13daa9 --- /dev/null +++ b/packages/google-cloud-bigquery/setup.cfg @@ -0,0 +1,2 @@ +[bdist_wheel] +universal = 1 From 7df15c7f0b0ee5e6d13fa5c50c9d165db2713f48 Mon Sep 17 00:00:00 2001 From: Danny Hermes Date: Tue, 27 Sep 2016 09:10:26 -0700 Subject: [PATCH 0005/2016] Changing all "Cloud BigQuery" mentions to "BigQuery". --- packages/google-cloud-bigquery/README.rst | 8 ++++---- .../google/cloud/bigquery/__init__.py | 2 +- .../google/cloud/bigquery/connection.py | 6 +++--- 3 files changed, 8 insertions(+), 8 deletions(-) diff --git a/packages/google-cloud-bigquery/README.rst b/packages/google-cloud-bigquery/README.rst index 202cf807e1ca..5e395bf2496f 100644 --- a/packages/google-cloud-bigquery/README.rst +++ b/packages/google-cloud-bigquery/README.rst @@ -1,9 +1,9 @@ -Python Client for Google Cloud BigQuery -======================================= +Python Client for Google BigQuery +================================= - Python idiomatic client for `Google Cloud BigQuery`_ + Python idiomatic client for `Google BigQuery`_ -.. _Google Cloud BigQuery: https://cloud.google.com/bigquery/what-is-bigquery +.. _Google BigQuery: https://cloud.google.com/bigquery/what-is-bigquery - `Homepage`_ - `API Documentation`_ diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/__init__.py b/packages/google-cloud-bigquery/google/cloud/bigquery/__init__.py index 4abf94d86962..fec0eee614cb 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/__init__.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/__init__.py @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -"""Google Cloud BigQuery API wrapper. +"""Google BigQuery API wrapper. The main concepts with this API are: diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/connection.py b/packages/google-cloud-bigquery/google/cloud/bigquery/connection.py index b5f43c07bf5a..85a25d643d1f 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/connection.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/connection.py @@ -12,13 +12,13 @@ # See the License for the specific language governing permissions and # limitations under the License. -"""Create / interact with Google Cloud BigQuery connections.""" +"""Create / interact with Google BigQuery connections.""" from google.cloud import connection as base_connection class Connection(base_connection.JSONConnection): - """A connection to Google Cloud BigQuery via the JSON REST API.""" + """A connection to Google BigQuery via the JSON REST API.""" API_BASE_URL = 'https://www.googleapis.com' """The base of the API call URL.""" @@ -31,4 +31,4 @@ class Connection(base_connection.JSONConnection): SCOPE = ('https://www.googleapis.com/auth/bigquery', 'https://www.googleapis.com/auth/cloud-platform') - """The scopes required for authenticating as a Cloud BigQuery consumer.""" + """The scopes required for authenticating as a BigQuery consumer.""" From aa86dd8a111d5814871da14e8107779882fc6ed2 Mon Sep 17 00:00:00 2001 From: Danny Hermes Date: Thu, 29 Sep 2016 16:17:02 -0700 Subject: [PATCH 0006/2016] General clean-up after rename. - Removing "graft google" from MANIFEST for umbrella package. It isn't needed since the umbrella package has no source - Updating license year on copy-pasted namespace package __init__.py files. Done via: https://gist.github.com/dhermes/a0e88f891ffffc3ecea5c9bb2f13e4f5 - Removing unused HTML context from docs/conf.py - Setting GH_OWNER AND GH_PROJECT_NAME (which together make the REPO_SLUG) manually in the docs update scripts. This way the env. variables don't need to be set in the Travis UI / CLI. Also updating tox.ini to stop passing those variables through - Removing the root package from `verify_included_modules.py` since it no longer has any source - Updated a docstring reference to a moved class in the Bigtable system test - Removing redundant `GOOGLE_CLOUD_*` in `tox` system test `passenv` (already covered by `GOOGLE_*`) --- packages/google-cloud-bigquery/google/cloud/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/google/cloud/__init__.py b/packages/google-cloud-bigquery/google/cloud/__init__.py index 8ac7b74af136..b2b833373882 100644 --- a/packages/google-cloud-bigquery/google/cloud/__init__.py +++ b/packages/google-cloud-bigquery/google/cloud/__init__.py @@ -1,4 +1,4 @@ -# Copyright 2014 Google Inc. +# Copyright 2016 Google Inc. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. From 41cb3879244ff3c37b9494e1d8b48be839eb4130 Mon Sep 17 00:00:00 2001 From: Tres Seaver Date: Sun, 2 Oct 2016 10:55:33 -0400 Subject: [PATCH 0007/2016] Raise from 'Table.insert_data' if schema not set. Closes #2472. --- .../google/cloud/bigquery/table.py | 5 +++++ .../unit_tests/test_table.py | 17 +++++++++++++++++ 2 files changed, 22 insertions(+) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py index 56f7f7124d5a..a2d7a9a761e8 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py @@ -35,6 +35,7 @@ from google.cloud.bigquery._helpers import _rows_from_json +_TABLE_HAS_NO_SCHEMA = "Table has no schema: call 'table.reload()'" _MARKER = object() @@ -736,7 +737,11 @@ def insert_data(self, identifies the row, and the "errors" key contains a list of the mappings describing one or more problems with the row. + :raises: ValueError if table's schema is not set """ + if len(self._schema) == 0: + raise ValueError(_TABLE_HAS_NO_SCHEMA) + client = self._require_client(client) rows_info = [] data = {'rows': rows_info} diff --git a/packages/google-cloud-bigquery/unit_tests/test_table.py b/packages/google-cloud-bigquery/unit_tests/test_table.py index 6dc985d89af9..8ddcafab556b 100644 --- a/packages/google-cloud-bigquery/unit_tests/test_table.py +++ b/packages/google-cloud-bigquery/unit_tests/test_table.py @@ -1248,6 +1248,23 @@ def test_fetch_data_w_record_schema(self): self.assertEqual(req['method'], 'GET') self.assertEqual(req['path'], '/%s' % PATH) + def test_insert_data_wo_schema(self): + from google.cloud.bigquery.table import _TABLE_HAS_NO_SCHEMA + client = _Client(project=self.PROJECT) + dataset = _Dataset(client) + table = self._makeOne(self.TABLE_NAME, dataset=dataset) + ROWS = [ + ('Phred Phlyntstone', 32), + ('Bharney Rhubble', 33), + ('Wylma Phlyntstone', 29), + ('Bhettye Rhubble', 27), + ] + + with self.assertRaises(ValueError) as exc: + table.insert_data(ROWS) + + self.assertEqual(exc.exception.args, (_TABLE_HAS_NO_SCHEMA,)) + def test_insert_data_w_bound_client(self): import datetime from google.cloud._helpers import UTC From 9553f9f3ff702032566602a0c30f418d4facd83e Mon Sep 17 00:00:00 2001 From: Danny Hermes Date: Mon, 3 Oct 2016 21:32:48 -0700 Subject: [PATCH 0008/2016] Updating package README's with more useful doc links. Also removing duplicate "Homepage" links (duplicate of "API Documentation" links). --- packages/google-cloud-bigquery/README.rst | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/packages/google-cloud-bigquery/README.rst b/packages/google-cloud-bigquery/README.rst index 5e395bf2496f..dfd585fc88cd 100644 --- a/packages/google-cloud-bigquery/README.rst +++ b/packages/google-cloud-bigquery/README.rst @@ -5,11 +5,9 @@ Python Client for Google BigQuery .. _Google BigQuery: https://cloud.google.com/bigquery/what-is-bigquery -- `Homepage`_ -- `API Documentation`_ +- `Documentation`_ -.. _Homepage: https://googlecloudplatform.github.io/google-cloud-python/ -.. _API Documentation: http://googlecloudplatform.github.io/google-cloud-python/ +.. _Documentation: https://googlecloudplatform.github.io/google-cloud-python/stable/bigquery-usage.html Quick Start ----------- From 7ded7505803fdbe3ec6b79a68c80e818c4f23977 Mon Sep 17 00:00:00 2001 From: Tres Seaver Date: Wed, 12 Oct 2016 13:05:14 -0400 Subject: [PATCH 0009/2016] Capture 'query' attr when loading 'QueryJob' from JSON resource. Closes: #2511. --- packages/google-cloud-bigquery/google/cloud/bigquery/job.py | 2 ++ packages/google-cloud-bigquery/unit_tests/test_job.py | 3 ++- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/job.py b/packages/google-cloud-bigquery/google/cloud/bigquery/job.py index b86f51c3fe18..d0155ca8b32e 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/job.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/job.py @@ -1062,6 +1062,8 @@ def _scrub_local_properties(self, cleaned): the client's project. """ configuration = cleaned['configuration']['query'] + + self.query = configuration['query'] dest_remote = configuration.get('destinationTable') if dest_remote is None: diff --git a/packages/google-cloud-bigquery/unit_tests/test_job.py b/packages/google-cloud-bigquery/unit_tests/test_job.py index 2ac135c0e739..592d81910f59 100644 --- a/packages/google-cloud-bigquery/unit_tests/test_job.py +++ b/packages/google-cloud-bigquery/unit_tests/test_job.py @@ -1290,6 +1290,7 @@ def _verifyResourceProperties(self, job, resource): self._verifyBooleanResourceProperties(job, config) self._verifyIntegerResourceProperties(job, config) + self.assertEqual(job.query, config['query']) if 'createDisposition' in config: self.assertEqual(job.create_disposition, config['createDisposition']) @@ -1596,7 +1597,7 @@ def test_reload_w_bound_client(self): RESOURCE = self._makeResource() conn = _Connection(RESOURCE) client = _Client(project=self.PROJECT, connection=conn) - job = self._makeOne(self.JOB_NAME, self.QUERY, client) + job = self._makeOne(self.JOB_NAME, None, client) dataset = Dataset(DS_NAME, client) table = Table(DEST_TABLE, dataset) From 594e795bbe39b1f9a59252414b0b8259e587d797 Mon Sep 17 00:00:00 2001 From: Danny Hermes Date: Tue, 18 Oct 2016 10:22:42 -0700 Subject: [PATCH 0010/2016] Updating Client.list_* methods in BigQuery to use Iterators. --- .../google/cloud/bigquery/client.py | 130 ++++++++++-------- .../unit_tests/test_client.py | 37 +++-- 2 files changed, 101 insertions(+), 66 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py index df4fd244a7d9..62ffc4eef4a7 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py @@ -23,6 +23,7 @@ from google.cloud.bigquery.job import LoadTableFromStorageJob from google.cloud.bigquery.job import QueryJob from google.cloud.bigquery.query import QueryResults +from google.cloud.iterator import Iterator class Project(object): @@ -87,26 +88,13 @@ def list_projects(self, max_results=None, page_token=None): not passed, the API will return the first page of projects. - :rtype: tuple, (list, str) - :returns: list of :class:`~google.cloud.bigquery.client.Project`, - plus a "next page token" string: if the token is not None, - indicates that more projects can be retrieved with another - call (pass that value as ``page_token``). + :rtype: :class:`~google.cloud.iterator.Iterator` + :returns: Iterator of :class:`~google.cloud.bigquery.client.Project` + accessible to the current client. """ - params = {} - - if max_results is not None: - params['maxResults'] = max_results - - if page_token is not None: - params['pageToken'] = page_token - - path = '/projects' - resp = self.connection.api_request(method='GET', path=path, - query_params=params) - projects = [Project.from_api_repr(resource) - for resource in resp.get('projects', ())] - return projects, resp.get('nextPageToken') + return Iterator(client=self, path='/projects', + items_key='projects', item_to_value=_item_to_project, + page_token=page_token, max_results=max_results) def list_datasets(self, include_all=False, max_results=None, page_token=None): @@ -127,29 +115,18 @@ def list_datasets(self, include_all=False, max_results=None, not passed, the API will return the first page of datasets. - :rtype: tuple, (list, str) - :returns: list of :class:`~google.cloud.bigquery.dataset.Dataset`, - plus a "next page token" string: if the token is not None, - indicates that more datasets can be retrieved with another - call (pass that value as ``page_token``). + :rtype: :class:`~google.cloud.iterator.Iterator` + :returns: Iterator of :class:`~google.cloud.bigquery.dataset.Dataset`. + accessible to the current client. """ - params = {} - + extra_params = {} if include_all: - params['all'] = True - - if max_results is not None: - params['maxResults'] = max_results - - if page_token is not None: - params['pageToken'] = page_token - + extra_params['all'] = True path = '/projects/%s/datasets' % (self.project,) - resp = self.connection.api_request(method='GET', path=path, - query_params=params) - datasets = [Dataset.from_api_repr(resource, self) - for resource in resp.get('datasets', ())] - return datasets, resp.get('nextPageToken') + return Iterator( + client=self, path=path, items_key='datasets', + item_to_value=_item_to_dataset, page_token=page_token, + max_results=max_results, extra_params=extra_params) def dataset(self, dataset_name): """Construct a dataset bound to this client. @@ -215,32 +192,22 @@ def list_jobs(self, max_results=None, page_token=None, all_users=None, * ``"pending"`` * ``"running"`` - :rtype: tuple, (list, str) - :returns: list of job instances, plus a "next page token" string: - if the token is not ``None``, indicates that more jobs can be - retrieved with another call, passing that value as - ``page_token``). + :rtype: :class:`~google.cloud.iterator.Iterator` + :returns: Iterable of job instances. """ - params = {'projection': 'full'} - - if max_results is not None: - params['maxResults'] = max_results - - if page_token is not None: - params['pageToken'] = page_token + extra_params = {'projection': 'full'} if all_users is not None: - params['allUsers'] = all_users + extra_params['allUsers'] = all_users if state_filter is not None: - params['stateFilter'] = state_filter + extra_params['stateFilter'] = state_filter path = '/projects/%s/jobs' % (self.project,) - resp = self.connection.api_request(method='GET', path=path, - query_params=params) - jobs = [self.job_from_resource(resource) - for resource in resp.get('jobs', ())] - return jobs, resp.get('nextPageToken') + return Iterator( + client=self, path=path, items_key='jobs', + item_to_value=_item_to_job, page_token=page_token, + max_results=max_results, extra_params=extra_params) def load_table_from_storage(self, job_name, destination, *source_uris): """Construct a job for loading data into a table from CloudStorage. @@ -334,3 +301,50 @@ def run_sync_query(self, query): :returns: a new ``QueryResults`` instance """ return QueryResults(query, client=self) + + +# pylint: disable=unused-argument +def _item_to_project(iterator, resource): + """Convert a JSON project to the native object. + + :type iterator: :class:`~google.cloud.iterator.Iterator` + :param iterator: The iterator that is currently in use. + + :type resource: dict + :param resource: An item to be converted to a project. + + :rtype: :class:`.Project` + :returns: The next project in the page. + """ + return Project.from_api_repr(resource) +# pylint: enable=unused-argument + + +def _item_to_dataset(iterator, resource): + """Convert a JSON dataset to the native object. + + :type iterator: :class:`~google.cloud.iterator.Iterator` + :param iterator: The iterator that is currently in use. + + :type resource: dict + :param resource: An item to be converted to a dataset. + + :rtype: :class:`.Dataset` + :returns: The next dataset in the page. + """ + return Dataset.from_api_repr(resource, iterator.client) + + +def _item_to_job(iterator, resource): + """Convert a JSON job to the native object. + + :type iterator: :class:`~google.cloud.iterator.Iterator` + :param iterator: The iterator that is currently in use. + + :type resource: dict + :param resource: An item to be converted to a job. + + :rtype: job instance. + :returns: The next job in the page. + """ + return iterator.client.job_from_resource(resource) diff --git a/packages/google-cloud-bigquery/unit_tests/test_client.py b/packages/google-cloud-bigquery/unit_tests/test_client.py index 19892ce77e72..77c01a09aa63 100644 --- a/packages/google-cloud-bigquery/unit_tests/test_client.py +++ b/packages/google-cloud-bigquery/unit_tests/test_client.py @@ -59,7 +59,10 @@ def test_list_projects_defaults(self): client = self._makeOne(PROJECT_1, creds) conn = client.connection = _Connection(DATA) - projects, token = client.list_projects() + iterator = client.list_projects() + iterator.update_page() + projects = list(iterator.page) + token = iterator.next_page_token self.assertEqual(len(projects), len(DATA['projects'])) for found, expected in zip(projects, DATA['projects']): @@ -83,7 +86,10 @@ def test_list_projects_explicit_response_missing_projects_key(self): client = self._makeOne(PROJECT, creds) conn = client.connection = _Connection(DATA) - projects, token = client.list_projects(max_results=3, page_token=TOKEN) + iterator = client.list_projects(max_results=3, page_token=TOKEN) + iterator.update_page() + projects = list(iterator.page) + token = iterator.next_page_token self.assertEqual(len(projects), 0) self.assertIsNone(token) @@ -121,7 +127,10 @@ def test_list_datasets_defaults(self): client = self._makeOne(PROJECT, creds) conn = client.connection = _Connection(DATA) - datasets, token = client.list_datasets() + iterator = client.list_datasets() + iterator.update_page() + datasets = list(iterator.page) + token = iterator.next_page_token self.assertEqual(len(datasets), len(DATA['datasets'])) for found, expected in zip(datasets, DATA['datasets']): @@ -144,8 +153,11 @@ def test_list_datasets_explicit_response_missing_datasets_key(self): client = self._makeOne(PROJECT, creds) conn = client.connection = _Connection(DATA) - datasets, token = client.list_datasets( + iterator = client.list_datasets( include_all=True, max_results=3, page_token=TOKEN) + iterator.update_page() + datasets = list(iterator.page) + token = iterator.next_page_token self.assertEqual(len(datasets), 0) self.assertIsNone(token) @@ -288,7 +300,10 @@ def test_list_jobs_defaults(self): client = self._makeOne(PROJECT, creds) conn = client.connection = _Connection(DATA) - jobs, token = client.list_jobs() + iterator = client.list_jobs() + iterator.update_page() + jobs = list(iterator.page) + token = iterator.next_page_token self.assertEqual(len(jobs), len(DATA['jobs'])) for found, expected in zip(jobs, DATA['jobs']): @@ -340,7 +355,10 @@ def test_list_jobs_load_job_wo_sourceUris(self): client = self._makeOne(PROJECT, creds) conn = client.connection = _Connection(DATA) - jobs, token = client.list_jobs() + iterator = client.list_jobs() + iterator.update_page() + jobs = list(iterator.page) + token = iterator.next_page_token self.assertEqual(len(jobs), len(DATA['jobs'])) for found, expected in zip(jobs, DATA['jobs']): @@ -364,8 +382,11 @@ def test_list_jobs_explicit_missing(self): client = self._makeOne(PROJECT, creds) conn = client.connection = _Connection(DATA) - jobs, token = client.list_jobs(max_results=1000, page_token=TOKEN, - all_users=True, state_filter='done') + iterator = client.list_jobs(max_results=1000, page_token=TOKEN, + all_users=True, state_filter='done') + iterator.update_page() + jobs = list(iterator.page) + token = iterator.next_page_token self.assertEqual(len(jobs), 0) self.assertIsNone(token) From 65799048ce72ca3e914e761db5a91ce6665e9762 Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Thu, 20 Oct 2016 15:34:03 -0700 Subject: [PATCH 0011/2016] Replace string with str in rtypes. Used the command: ag -l 'rtype: string' | xargs sed -i .bak 's/rtype: string/rtype: str/g' Based on this comment: https://github.com/GoogleCloudPlatform/google-cloud-python/pull/2485#discussion_r83267163 `str` is a type, `string` is a module. --- .../google/cloud/bigquery/dataset.py | 16 ++++++++-------- .../google/cloud/bigquery/job.py | 14 +++++++------- .../google/cloud/bigquery/query.py | 4 ++-- 3 files changed, 17 insertions(+), 17 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/dataset.py b/packages/google-cloud-bigquery/google/cloud/bigquery/dataset.py index ce30fea3e4d3..122f99da6d3d 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/dataset.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/dataset.py @@ -115,7 +115,7 @@ def __init__(self, name, client, access_grants=()): def project(self): """Project bound to the dataset. - :rtype: string + :rtype: str :returns: the project (derived from the client). """ return self._client.project @@ -124,7 +124,7 @@ def project(self): def path(self): """URL path for the dataset's APIs. - :rtype: string + :rtype: str :returns: the path based on project and dataste name. """ return '/projects/%s/datasets/%s' % (self.project, self.name) @@ -168,7 +168,7 @@ def created(self): def dataset_id(self): """ID for the dataset resource. - :rtype: string, or ``NoneType`` + :rtype: str, or ``NoneType`` :returns: the ID (None until set from the server). """ return self._properties.get('id') @@ -177,7 +177,7 @@ def dataset_id(self): def etag(self): """ETag for the dataset resource. - :rtype: string, or ``NoneType`` + :rtype: str, or ``NoneType`` :returns: the ETag (None until set from the server). """ return self._properties.get('etag') @@ -198,7 +198,7 @@ def modified(self): def self_link(self): """URL for the dataset resource. - :rtype: string, or ``NoneType`` + :rtype: str, or ``NoneType`` :returns: the URL (None until set from the server). """ return self._properties.get('selfLink') @@ -229,7 +229,7 @@ def default_table_expiration_ms(self, value): def description(self): """Description of the dataset. - :rtype: string, or ``NoneType`` + :rtype: str, or ``NoneType`` :returns: The description as set by the user, or None (the default). """ return self._properties.get('description') @@ -251,7 +251,7 @@ def description(self, value): def friendly_name(self): """Title of the dataset. - :rtype: string, or ``NoneType`` + :rtype: str, or ``NoneType`` :returns: The name as set by the user, or None (the default). """ return self._properties.get('friendlyName') @@ -273,7 +273,7 @@ def friendly_name(self, value): def location(self): """Location in which the dataset is hosted. - :rtype: string, or ``NoneType`` + :rtype: str, or ``NoneType`` :returns: The location as set by the user, or None (the default). """ return self._properties.get('location') diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/job.py b/packages/google-cloud-bigquery/google/cloud/bigquery/job.py index d0155ca8b32e..9e039a4943ed 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/job.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/job.py @@ -96,7 +96,7 @@ def __init__(self, client): def project(self): """Project bound to the job. - :rtype: string + :rtype: str :returns: the project (derived from the client). """ return self._client.project @@ -135,7 +135,7 @@ def __init__(self, name, client): def job_type(self): """Type of job - :rtype: string + :rtype: str :returns: one of 'load', 'copy', 'extract', 'query' """ return self._JOB_TYPE @@ -144,7 +144,7 @@ def job_type(self): def path(self): """URL path for the job's APIs. - :rtype: string + :rtype: str :returns: the path based on project and job name. """ return '/projects/%s/jobs/%s' % (self.project, self.name) @@ -153,7 +153,7 @@ def path(self): def etag(self): """ETag for the job resource. - :rtype: string, or ``NoneType`` + :rtype: str, or ``NoneType`` :returns: the ETag (None until set from the server). """ return self._properties.get('etag') @@ -162,7 +162,7 @@ def etag(self): def self_link(self): """URL for the job resource. - :rtype: string, or ``NoneType`` + :rtype: str, or ``NoneType`` :returns: the URL (None until set from the server). """ return self._properties.get('selfLink') @@ -171,7 +171,7 @@ def self_link(self): def user_email(self): """E-mail address of user who submitted the job. - :rtype: string, or ``NoneType`` + :rtype: str, or ``NoneType`` :returns: the URL (None until set from the server). """ return self._properties.get('user_email') @@ -241,7 +241,7 @@ def errors(self): def state(self): """Status of the job. - :rtype: string, or ``NoneType`` + :rtype: str, or ``NoneType`` :returns: the state (None until set from the server). """ status = self._properties.get('status') diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/query.py b/packages/google-cloud-bigquery/google/cloud/bigquery/query.py index 6146c0431657..f2d1979dbf58 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/query.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/query.py @@ -91,7 +91,7 @@ def from_query_job(cls, job): def project(self): """Project bound to the job. - :rtype: string + :rtype: str :returns: the project (derived from the client). """ return self._client.project @@ -185,7 +185,7 @@ def page_token(self): See: https://cloud.google.com/bigquery/docs/reference/v2/jobs/query#pageToken - :rtype: string, or ``NoneType`` + :rtype: str, or ``NoneType`` :returns: Token generated on the server (None until set by the server). """ return self._properties.get('pageToken') From 2443140baac30c9de6a75b0ef6c39bd739172c77 Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Thu, 20 Oct 2016 15:50:55 -0700 Subject: [PATCH 0012/2016] Replace types string with str. Uses command: ag -l 'type ([^:]+): string' | \ xargs gsed -r -i.bak -e 's/type ([^:]+): string/type \1: str/g' Note: [-r for gsed (GNU sed) is needed for group matching](http://superuser.com/a/336819/125262). --- .../google/cloud/bigquery/_helpers.py | 6 +++--- .../google/cloud/bigquery/dataset.py | 18 +++++++++--------- .../google/cloud/bigquery/job.py | 12 ++++++------ .../google/cloud/bigquery/query.py | 4 ++-- 4 files changed, 20 insertions(+), 20 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py b/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py index a486fba978b1..d22e1592a68e 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py @@ -108,7 +108,7 @@ class _ConfigurationProperty(object): Values will be stored on a `_configuration` helper attribute of the property's job instance. - :type name: string + :type name: str :param name: name of the property """ @@ -139,7 +139,7 @@ def __delete__(self, instance): class _TypedProperty(_ConfigurationProperty): """Property implementation: validates based on value type. - :type name: string + :type name: str :param name: name of the property :type property_type: type or sequence of types @@ -164,7 +164,7 @@ class _EnumProperty(_ConfigurationProperty): Subclasses must define ``ALLOWED`` as a class-level constant: it must be a sequence of strings. - :type name: string + :type name: str :param name: name of the property. """ def _validate(self, value): diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/dataset.py b/packages/google-cloud-bigquery/google/cloud/bigquery/dataset.py index 122f99da6d3d..20ed30346276 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/dataset.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/dataset.py @@ -31,7 +31,7 @@ class AccessGrant(object): See https://cloud.google.com/bigquery/docs/reference/v2/datasets. - :type role: string + :type role: str :param role: Role granted to the entity. One of * ``'OWNER'`` @@ -40,11 +40,11 @@ class AccessGrant(object): May also be ``None`` if the ``entity_type`` is ``view``. - :type entity_type: string + :type entity_type: str :param entity_type: Type of entity being granted the role. One of :attr:`ENTITY_TYPES`. - :type entity_id: string + :type entity_id: str :param entity_id: ID of entity being granted the role. :raises: :class:`ValueError` if the ``entity_type`` is not among @@ -91,7 +91,7 @@ class Dataset(object): See: https://cloud.google.com/bigquery/docs/reference/v2/datasets - :type name: string + :type name: str :param name: the name of the dataset :type client: :class:`google.cloud.bigquery.client.Client` @@ -238,7 +238,7 @@ def description(self): def description(self, value): """Update description of the dataset. - :type value: string, or ``NoneType`` + :type value: str, or ``NoneType`` :param value: new description :raises: ValueError for invalid value types. @@ -260,7 +260,7 @@ def friendly_name(self): def friendly_name(self, value): """Update title of the dataset. - :type value: string, or ``NoneType`` + :type value: str, or ``NoneType`` :param value: new title :raises: ValueError for invalid value types. @@ -282,7 +282,7 @@ def location(self): def location(self, value): """Update location in which the dataset is hosted. - :type value: string, or ``NoneType`` + :type value: str, or ``NoneType`` :param value: new location :raises: ValueError for invalid value types. @@ -545,7 +545,7 @@ def list_tables(self, max_results=None, page_token=None): :param max_results: maximum number of tables to return, If not passed, defaults to a value set by the API. - :type page_token: string + :type page_token: str :param page_token: opaque marker for the next "page" of datasets. If not passed, the API will return the first page of datasets. @@ -575,7 +575,7 @@ def list_tables(self, max_results=None, page_token=None): def table(self, name, schema=()): """Construct a table bound to this dataset. - :type name: string + :type name: str :param name: Name of the table. :type schema: list of :class:`google.cloud.bigquery.table.SchemaField` diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/job.py b/packages/google-cloud-bigquery/google/cloud/bigquery/job.py index 9e039a4943ed..3e0031515a96 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/job.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/job.py @@ -120,7 +120,7 @@ def _require_client(self, client): class _AsyncJob(_BaseJob): """Base class for asynchronous jobs. - :type name: string + :type name: str :param name: the name of the job :type client: :class:`google.cloud.bigquery.client.Client` @@ -399,7 +399,7 @@ class _LoadConfiguration(object): class LoadTableFromStorageJob(_AsyncJob): """Asynchronous job for loading data into a table from CloudStorage. - :type name: string + :type name: str :param name: the name of the job :type destination: :class:`google.cloud.bigquery.table.Table` @@ -648,7 +648,7 @@ class _CopyConfiguration(object): class CopyJob(_AsyncJob): """Asynchronous job: copy data into a table from other tables. - :type name: string + :type name: str :param name: the name of the job :type destination: :class:`google.cloud.bigquery.table.Table` @@ -763,7 +763,7 @@ class _ExtractConfiguration(object): class ExtractTableToStorageJob(_AsyncJob): """Asynchronous job: extract data from a table into Cloud Storage. - :type name: string + :type name: str :param name: the name of the job :type source: :class:`google.cloud.bigquery.table.Table` @@ -894,10 +894,10 @@ class _AsyncQueryConfiguration(object): class QueryJob(_AsyncJob): """Asynchronous job: query tables. - :type name: string + :type name: str :param name: the name of the job - :type query: string + :type query: str :param query: SQL query string :type client: :class:`google.cloud.bigquery.client.Client` diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/query.py b/packages/google-cloud-bigquery/google/cloud/bigquery/query.py index f2d1979dbf58..7f474625e3b1 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/query.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/query.py @@ -42,7 +42,7 @@ class _SyncQueryConfiguration(object): class QueryResults(object): """Synchronous job: query tables. - :type query: string + :type query: str :param query: SQL query string :type client: :class:`google.cloud.bigquery.client.Client` @@ -348,7 +348,7 @@ def fetch_data(self, max_results=None, page_token=None, start_index=None, :type max_results: integer or ``NoneType`` :param max_results: maximum number of rows to return. - :type page_token: string or ``NoneType`` + :type page_token: str or ``NoneType`` :param page_token: token representing a cursor into the table's rows. :type start_index: integer or ``NoneType`` From bd58bb7c73178c7fa2d89320e33012616b60b757 Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Thu, 20 Oct 2016 16:02:02 -0700 Subject: [PATCH 0013/2016] Replace types boolean with bool. Uses the command: ag -l 'type ([^:]+): boolean' | \ xargs gsed -r -i.bak -e 's/type ([^:]+): boolean/type \1: bool/g' --- .../google/cloud/bigquery/client.py | 4 ++-- .../google/cloud/bigquery/table.py | 12 ++++++------ 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py index 62ffc4eef4a7..7548681c448b 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py @@ -103,7 +103,7 @@ def list_datasets(self, include_all=False, max_results=None, See: https://cloud.google.com/bigquery/docs/reference/v2/datasets/list - :type include_all: boolean + :type include_all: bool :param include_all: True if results include hidden datasets. :type max_results: int @@ -180,7 +180,7 @@ def list_jobs(self, max_results=None, page_token=None, all_users=None, not passed, the API will return the first page of jobs. - :type all_users: boolean + :type all_users: bool :param all_users: if true, include jobs owned by all users in the project. diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py index a2d7a9a761e8..d9b21c1fec09 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py @@ -714,10 +714,10 @@ def insert_data(self, :param row_ids: Unique ids, one per row being inserted. If not passed, no de-duplication occurs. - :type skip_invalid_rows: boolean or ``NoneType`` + :type skip_invalid_rows: bool or ``NoneType`` :param skip_invalid_rows: skip rows w/ invalid data? - :type ignore_unknown_values: boolean or ``NoneType`` + :type ignore_unknown_values: bool or ``NoneType`` :param ignore_unknown_values: ignore columns beyond schema? :type template_suffix: str or ``NoneType`` @@ -826,7 +826,7 @@ def upload_from_file(self, job configuration option; see :meth:`google.cloud.bigquery.job.LoadJob` - :type rewind: boolean + :type rewind: bool :param rewind: If True, seek to the beginning of the file handle before writing the file to Cloud Storage. @@ -839,11 +839,11 @@ def upload_from_file(self, :type num_retries: integer :param num_retries: Number of upload retries. Defaults to 6. - :type allow_jagged_rows: boolean + :type allow_jagged_rows: bool :param allow_jagged_rows: job configuration option; see :meth:`google.cloud.bigquery.job.LoadJob`. - :type allow_quoted_newlines: boolean + :type allow_quoted_newlines: bool :param allow_quoted_newlines: job configuration option; see :meth:`google.cloud.bigquery.job.LoadJob`. @@ -859,7 +859,7 @@ def upload_from_file(self, :param field_delimiter: job configuration option; see :meth:`google.cloud.bigquery.job.LoadJob`. - :type ignore_unknown_values: boolean + :type ignore_unknown_values: bool :param ignore_unknown_values: job configuration option; see :meth:`google.cloud.bigquery.job.LoadJob`. From fb0cbf52a285df30bb137191a4d1dba6f2d1e12c Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Thu, 20 Oct 2016 16:04:23 -0700 Subject: [PATCH 0014/2016] Replace rtypes boolean with bool. Uses the command: ag -l 'rtype: boolean' | xargs sed -i .bak 's/rtype: boolean/rtype: bool/g' --- packages/google-cloud-bigquery/google/cloud/bigquery/query.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/query.py b/packages/google-cloud-bigquery/google/cloud/bigquery/query.py index 7f474625e3b1..42e6be380b0a 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/query.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/query.py @@ -118,7 +118,7 @@ def cache_hit(self): See: https://cloud.google.com/bigquery/docs/reference/v2/jobs/query#cacheHit - :rtype: boolean or ``NoneType`` + :rtype: bool or ``NoneType`` :returns: True if the query results were served from cache (None until set by the server). """ @@ -131,7 +131,7 @@ def complete(self): See: https://cloud.google.com/bigquery/docs/reference/v2/jobs/query#jobComplete - :rtype: boolean or ``NoneType`` + :rtype: bool or ``NoneType`` :returns: True if the query completed on the server (None until set by the server). """ From 01794e1a28aa78c3697be47abea5308517217034 Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Thu, 20 Oct 2016 16:20:20 -0700 Subject: [PATCH 0015/2016] Replace integer with int in types. Uses the command: ag -l 'type ([^:]+): integer' | \ xargs gsed -r -i.bak -e 's/type ([^:]+): integer/type \1: int/g' --- .../google/cloud/bigquery/dataset.py | 2 +- .../google-cloud-bigquery/google/cloud/bigquery/query.py | 6 +++--- .../google-cloud-bigquery/google/cloud/bigquery/table.py | 8 ++++---- 3 files changed, 8 insertions(+), 8 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/dataset.py b/packages/google-cloud-bigquery/google/cloud/bigquery/dataset.py index ce30fea3e4d3..3f32d1765286 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/dataset.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/dataset.py @@ -216,7 +216,7 @@ def default_table_expiration_ms(self): def default_table_expiration_ms(self, value): """Update default expiration time for tables in the dataset. - :type value: integer, or ``NoneType`` + :type value: int, or ``NoneType`` :param value: new default time, in milliseconds :raises: ValueError for invalid value types. diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/query.py b/packages/google-cloud-bigquery/google/cloud/bigquery/query.py index 6146c0431657..fe9417ba379d 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/query.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/query.py @@ -345,16 +345,16 @@ def fetch_data(self, max_results=None, page_token=None, start_index=None, See: https://cloud.google.com/bigquery/docs/reference/v2/jobs/getQueryResults - :type max_results: integer or ``NoneType`` + :type max_results: int or ``NoneType`` :param max_results: maximum number of rows to return. :type page_token: string or ``NoneType`` :param page_token: token representing a cursor into the table's rows. - :type start_index: integer or ``NoneType`` + :type start_index: int or ``NoneType`` :param start_index: zero-based index of starting row - :type timeout_ms: integer or ``NoneType`` + :type timeout_ms: int or ``NoneType`` :param timeout_ms: timeout, in milliseconds, to wait for query to complete diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py index a2d7a9a761e8..d352d5edfeb3 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py @@ -654,7 +654,7 @@ def fetch_data(self, max_results=None, page_token=None, client=None): incomplete. To ensure that the local copy of the schema is up-to-date, call the table's ``reload`` method. - :type max_results: integer or ``NoneType`` + :type max_results: int or ``NoneType`` :param max_results: maximum number of rows to return. :type page_token: str or ``NoneType`` @@ -836,7 +836,7 @@ def upload_from_file(self, :func:`os.fstat`. (If the file handle is not from the filesystem this won't be possible.) - :type num_retries: integer + :type num_retries: int :param num_retries: Number of upload retries. Defaults to 6. :type allow_jagged_rows: boolean @@ -863,7 +863,7 @@ def upload_from_file(self, :param ignore_unknown_values: job configuration option; see :meth:`google.cloud.bigquery.job.LoadJob`. - :type max_bad_records: integer + :type max_bad_records: int :param max_bad_records: job configuration option; see :meth:`google.cloud.bigquery.job.LoadJob`. @@ -871,7 +871,7 @@ def upload_from_file(self, :param quote_character: job configuration option; see :meth:`google.cloud.bigquery.job.LoadJob`. - :type skip_leading_rows: integer + :type skip_leading_rows: int :param skip_leading_rows: job configuration option; see :meth:`google.cloud.bigquery.job.LoadJob`. From 75b2e2ebc842e211ff25bef838d28ca61e2e81fe Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Thu, 20 Oct 2016 16:24:00 -0700 Subject: [PATCH 0016/2016] Replaces integer with int in rtypes. Uses the command: ag -l 'rtype: integer' | xargs sed -i .bak 's/rtype: integer/rtype: int/g' --- .../google/cloud/bigquery/dataset.py | 2 +- .../google-cloud-bigquery/google/cloud/bigquery/job.py | 8 ++++---- .../google-cloud-bigquery/google/cloud/bigquery/query.py | 4 ++-- .../google-cloud-bigquery/google/cloud/bigquery/table.py | 4 ++-- 4 files changed, 9 insertions(+), 9 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/dataset.py b/packages/google-cloud-bigquery/google/cloud/bigquery/dataset.py index 3f32d1765286..3f7d340bd20c 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/dataset.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/dataset.py @@ -207,7 +207,7 @@ def self_link(self): def default_table_expiration_ms(self): """Default expiration time for tables in the dataset. - :rtype: integer, or ``NoneType`` + :rtype: int, or ``NoneType`` :returns: The time in milliseconds, or None (the default). """ return self._properties.get('defaultTableExpirationMs') diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/job.py b/packages/google-cloud-bigquery/google/cloud/bigquery/job.py index d0155ca8b32e..f32a00544291 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/job.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/job.py @@ -455,7 +455,7 @@ def schema(self, value): def input_file_bytes(self): """Count of bytes loaded from source files. - :rtype: integer, or ``NoneType`` + :rtype: int, or ``NoneType`` :returns: the count (None until set from the server). """ statistics = self._properties.get('statistics') @@ -466,7 +466,7 @@ def input_file_bytes(self): def input_files(self): """Count of source files. - :rtype: integer, or ``NoneType`` + :rtype: int, or ``NoneType`` :returns: the count (None until set from the server). """ statistics = self._properties.get('statistics') @@ -477,7 +477,7 @@ def input_files(self): def output_bytes(self): """Count of bytes saved to destination table. - :rtype: integer, or ``NoneType`` + :rtype: int, or ``NoneType`` :returns: the count (None until set from the server). """ statistics = self._properties.get('statistics') @@ -488,7 +488,7 @@ def output_bytes(self): def output_rows(self): """Count of rows saved to destination table. - :rtype: integer, or ``NoneType`` + :rtype: int, or ``NoneType`` :returns: the count (None until set from the server). """ statistics = self._properties.get('statistics') diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/query.py b/packages/google-cloud-bigquery/google/cloud/bigquery/query.py index fe9417ba379d..3192659a2095 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/query.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/query.py @@ -197,7 +197,7 @@ def total_rows(self): See: https://cloud.google.com/bigquery/docs/reference/v2/jobs/query#totalRows - :rtype: integer, or ``NoneType`` + :rtype: int, or ``NoneType`` :returns: Count generated on the server (None until set by the server). """ return self._properties.get('totalRows') @@ -209,7 +209,7 @@ def total_bytes_processed(self): See: https://cloud.google.com/bigquery/docs/reference/v2/jobs/query#totalBytesProcessed - :rtype: integer, or ``NoneType`` + :rtype: int, or ``NoneType`` :returns: Count generated on the server (None until set by the server). """ return self._properties.get('totalBytesProcessed') diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py index d352d5edfeb3..8bc68b03914c 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py @@ -151,7 +151,7 @@ def modified(self): def num_bytes(self): """The size of the table in bytes. - :rtype: integer, or ``NoneType`` + :rtype: int, or ``NoneType`` :returns: the byte count (None until set from the server). """ num_bytes_as_str = self._properties.get('numBytes') @@ -162,7 +162,7 @@ def num_bytes(self): def num_rows(self): """The number of rows in the table. - :rtype: integer, or ``NoneType`` + :rtype: int, or ``NoneType`` :returns: the row count (None until set from the server). """ num_rows_as_str = self._properties.get('numRows') From d1abdc3ab00c149dc314f1058ad509e044a9c6c6 Mon Sep 17 00:00:00 2001 From: Danny Hermes Date: Thu, 20 Oct 2016 17:04:00 -0700 Subject: [PATCH 0017/2016] Replace :: with `.. code-block:: console`. Towards #2404. --- packages/google-cloud-bigquery/README.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/README.rst b/packages/google-cloud-bigquery/README.rst index dfd585fc88cd..d4059a156c34 100644 --- a/packages/google-cloud-bigquery/README.rst +++ b/packages/google-cloud-bigquery/README.rst @@ -12,7 +12,7 @@ Python Client for Google BigQuery Quick Start ----------- -:: +.. code-block:: console $ pip install --upgrade google-cloud-bigquery From 96dfe3bf7cf4fbd0e3b8d54b497aa100ad3273c2 Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Fri, 21 Oct 2016 09:51:22 -0700 Subject: [PATCH 0018/2016] Remove None from param types and add (Optional). This runs a script to remove None from the types for parameters, and added (Optional) to the description. Does not pass lint due to some too-long lines. I will clean those up manually. See: https://github.com/GoogleCloudPlatform/google-cloud-python/pull/2580#pullrequestreview-5178193 --- .../google/cloud/bigquery/dataset.py | 16 ++--- .../google/cloud/bigquery/query.py | 16 ++--- .../google/cloud/bigquery/table.py | 63 ++++++++++--------- 3 files changed, 48 insertions(+), 47 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/dataset.py b/packages/google-cloud-bigquery/google/cloud/bigquery/dataset.py index af0740d1feb0..423484c68d51 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/dataset.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/dataset.py @@ -216,8 +216,8 @@ def default_table_expiration_ms(self): def default_table_expiration_ms(self, value): """Update default expiration time for tables in the dataset. - :type value: int, or ``NoneType`` - :param value: new default time, in milliseconds + :type value: int + :param value: (Optional) new default time, in milliseconds :raises: ValueError for invalid value types. """ @@ -238,8 +238,8 @@ def description(self): def description(self, value): """Update description of the dataset. - :type value: str, or ``NoneType`` - :param value: new description + :type value: str + :param value: (Optional) new description :raises: ValueError for invalid value types. """ @@ -260,8 +260,8 @@ def friendly_name(self): def friendly_name(self, value): """Update title of the dataset. - :type value: str, or ``NoneType`` - :param value: new title + :type value: str + :param value: (Optional) new title :raises: ValueError for invalid value types. """ @@ -282,8 +282,8 @@ def location(self): def location(self, value): """Update location in which the dataset is hosted. - :type value: str, or ``NoneType`` - :param value: new location + :type value: str + :param value: (Optional) new location :raises: ValueError for invalid value types. """ diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/query.py b/packages/google-cloud-bigquery/google/cloud/bigquery/query.py index 97e89c7cb5d8..3fe75b290b0c 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/query.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/query.py @@ -345,17 +345,17 @@ def fetch_data(self, max_results=None, page_token=None, start_index=None, See: https://cloud.google.com/bigquery/docs/reference/v2/jobs/getQueryResults - :type max_results: int or ``NoneType`` - :param max_results: maximum number of rows to return. + :type max_results: int + :param max_results: (Optional) maximum number of rows to return. - :type page_token: str or ``NoneType`` - :param page_token: token representing a cursor into the table's rows. + :type page_token: str + :param page_token: (Optional) token representing a cursor into the table's rows. - :type start_index: int or ``NoneType`` - :param start_index: zero-based index of starting row + :type start_index: int + :param start_index: (Optional) zero-based index of starting row - :type timeout_ms: int or ``NoneType`` - :param timeout_ms: timeout, in milliseconds, to wait for query to + :type timeout_ms: int + :param timeout_ms: (Optional) timeout, in milliseconds, to wait for query to complete :type client: :class:`~google.cloud.bigquery.client.Client` or diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py index 6afea05f9f26..ce04fc036b7d 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py @@ -264,8 +264,8 @@ def description(self): def description(self, value): """Update description of the table. - :type value: str, or ``NoneType`` - :param value: new description + :type value: str + :param value: (Optional) new description :raises: ValueError for invalid value types. """ @@ -289,8 +289,8 @@ def expires(self): def expires(self, value): """Update datetime at which the table will be removed. - :type value: ``datetime.datetime``, or ``NoneType`` - :param value: the new expiration time, or None + :type value: ``datetime.datetime`` + :param value: (Optional) the new expiration time, or None """ if not isinstance(value, datetime.datetime) and value is not None: raise ValueError("Pass a datetime, or None") @@ -309,8 +309,8 @@ def friendly_name(self): def friendly_name(self, value): """Update title of the table. - :type value: str, or ``NoneType`` - :param value: new title + :type value: str + :param value: (Optional) new title :raises: ValueError for invalid value types. """ @@ -331,8 +331,8 @@ def location(self): def location(self, value): """Update location in which the table is hosted. - :type value: str, or ``NoneType`` - :param value: new location + :type value: str + :param value: (Optional) new location :raises: ValueError for invalid value types. """ @@ -554,17 +554,17 @@ def patch(self, :param client: the client to use. If not passed, falls back to the ``client`` stored on the current dataset. - :type friendly_name: str or ``NoneType`` - :param friendly_name: point in time at which the table expires. + :type friendly_name: str + :param friendly_name: (Optional) point in time at which the table expires. - :type description: str or ``NoneType`` - :param description: point in time at which the table expires. + :type description: str + :param description: (Optional) point in time at which the table expires. - :type location: str or ``NoneType`` - :param location: point in time at which the table expires. + :type location: str + :param location: (Optional) point in time at which the table expires. - :type expires: :class:`datetime.datetime` or ``NoneType`` - :param expires: point in time at which the table expires. + :type expires: :class:`datetime.datetime` + :param expires: (Optional) point in time at which the table expires. :type view_query: str :param view_query: SQL query defining the table as a view @@ -654,11 +654,12 @@ def fetch_data(self, max_results=None, page_token=None, client=None): incomplete. To ensure that the local copy of the schema is up-to-date, call the table's ``reload`` method. - :type max_results: int or ``NoneType`` - :param max_results: maximum number of rows to return. + :type max_results: int + :param max_results: (Optional) maximum number of rows to return. - :type page_token: str or ``NoneType`` - :param page_token: token representing a cursor into the table's rows. + :type page_token: str + :param page_token: + (Optional) token representing a cursor into the table's rows. :type client: :class:`~google.cloud.bigquery.client.Client` or ``NoneType`` @@ -714,18 +715,18 @@ def insert_data(self, :param row_ids: Unique ids, one per row being inserted. If not passed, no de-duplication occurs. - :type skip_invalid_rows: bool or ``NoneType`` - :param skip_invalid_rows: skip rows w/ invalid data? + :type skip_invalid_rows: bool + :param skip_invalid_rows: (Optional) skip rows w/ invalid data? - :type ignore_unknown_values: bool or ``NoneType`` - :param ignore_unknown_values: ignore columns beyond schema? - - :type template_suffix: str or ``NoneType`` - :param template_suffix: treat ``name`` as a template table and provide - a suffix. BigQuery will create the table - `` + `` based on the - schema of the template table. See: - https://cloud.google.com/bigquery/streaming-data-into-bigquery#template-tables + :type ignore_unknown_values: bool + :param ignore_unknown_values: (Optional) ignore columns beyond schema? + + :type template_suffix: str + :param template_suffix: + (Optional) treat ``name`` as a template table and provide a suffix. + BigQuery will create the table `` + `` based + on the schema of the template table. See: + https://cloud.google.com/bigquery/streaming-data-into-bigquery#template-tables :type client: :class:`~google.cloud.bigquery.client.Client` or ``NoneType`` From 88e6f80d26f985b3166a5b9f3b1d80ffae84c95e Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Fri, 21 Oct 2016 10:34:39 -0700 Subject: [PATCH 0019/2016] Fix bigquery table patch parameter docstrings It looks like there was a mistake in copy-pasting parameter documentation. All the descriptions were for the expiration time. --- .../google-cloud-bigquery/google/cloud/bigquery/table.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py index ce04fc036b7d..8467c8f23210 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py @@ -555,13 +555,14 @@ def patch(self, ``client`` stored on the current dataset. :type friendly_name: str - :param friendly_name: (Optional) point in time at which the table expires. + :param friendly_name: (Optional) a descriptive name for this table. :type description: str - :param description: (Optional) point in time at which the table expires. + :param description: (Optional) a description of this table. :type location: str - :param location: (Optional) point in time at which the table expires. + :param location: + (Optional) the geographic location where the table resides. :type expires: :class:`datetime.datetime` :param expires: (Optional) point in time at which the table expires. From dca61f5ee521e08a2fa54352c7a534a834ab9bbf Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Fri, 21 Oct 2016 11:21:42 -0700 Subject: [PATCH 0020/2016] Fix lint errors caused by addition of (Optional). Mostly, lines that were too long. --- .../google-cloud-bigquery/google/cloud/bigquery/query.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/query.py b/packages/google-cloud-bigquery/google/cloud/bigquery/query.py index 3fe75b290b0c..5c7c3681a539 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/query.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/query.py @@ -349,14 +349,15 @@ def fetch_data(self, max_results=None, page_token=None, start_index=None, :param max_results: (Optional) maximum number of rows to return. :type page_token: str - :param page_token: (Optional) token representing a cursor into the table's rows. + :param page_token: + (Optional) token representing a cursor into the table's rows. :type start_index: int :param start_index: (Optional) zero-based index of starting row :type timeout_ms: int - :param timeout_ms: (Optional) timeout, in milliseconds, to wait for query to - complete + :param timeout_ms: + (Optional) timeout, in milliseconds, to wait for query to complete :type client: :class:`~google.cloud.bigquery.client.Client` or ``NoneType`` From f8907490e7460c287d85e886088976dda8a44e86 Mon Sep 17 00:00:00 2001 From: Danny Hermes Date: Mon, 24 Oct 2016 00:11:51 -0700 Subject: [PATCH 0021/2016] Fixing unit tests for Iterator dependent packages. --- .../unit_tests/test_client.py | 37 ++++++++++++------- 1 file changed, 23 insertions(+), 14 deletions(-) diff --git a/packages/google-cloud-bigquery/unit_tests/test_client.py b/packages/google-cloud-bigquery/unit_tests/test_client.py index 77c01a09aa63..e2f0618a10da 100644 --- a/packages/google-cloud-bigquery/unit_tests/test_client.py +++ b/packages/google-cloud-bigquery/unit_tests/test_client.py @@ -35,6 +35,7 @@ def test_ctor(self): self.assertIs(client.connection.http, http) def test_list_projects_defaults(self): + import six from google.cloud.bigquery.client import Project PROJECT_1 = 'PROJECT_ONE' PROJECT_2 = 'PROJECT_TWO' @@ -60,8 +61,8 @@ def test_list_projects_defaults(self): conn = client.connection = _Connection(DATA) iterator = client.list_projects() - iterator.update_page() - projects = list(iterator.page) + page = six.next(iterator.pages) + projects = list(page) token = iterator.next_page_token self.assertEqual(len(projects), len(DATA['projects'])) @@ -78,6 +79,8 @@ def test_list_projects_defaults(self): self.assertEqual(req['path'], '/%s' % PATH) def test_list_projects_explicit_response_missing_projects_key(self): + import six + PROJECT = 'PROJECT' PATH = 'projects' TOKEN = 'TOKEN' @@ -87,8 +90,8 @@ def test_list_projects_explicit_response_missing_projects_key(self): conn = client.connection = _Connection(DATA) iterator = client.list_projects(max_results=3, page_token=TOKEN) - iterator.update_page() - projects = list(iterator.page) + page = six.next(iterator.pages) + projects = list(page) token = iterator.next_page_token self.assertEqual(len(projects), 0) @@ -102,6 +105,7 @@ def test_list_projects_explicit_response_missing_projects_key(self): {'maxResults': 3, 'pageToken': TOKEN}) def test_list_datasets_defaults(self): + import six from google.cloud.bigquery.dataset import Dataset PROJECT = 'PROJECT' DATASET_1 = 'dataset_one' @@ -128,8 +132,8 @@ def test_list_datasets_defaults(self): conn = client.connection = _Connection(DATA) iterator = client.list_datasets() - iterator.update_page() - datasets = list(iterator.page) + page = six.next(iterator.pages) + datasets = list(page) token = iterator.next_page_token self.assertEqual(len(datasets), len(DATA['datasets'])) @@ -145,6 +149,8 @@ def test_list_datasets_defaults(self): self.assertEqual(req['path'], '/%s' % PATH) def test_list_datasets_explicit_response_missing_datasets_key(self): + import six + PROJECT = 'PROJECT' PATH = 'projects/%s/datasets' % PROJECT TOKEN = 'TOKEN' @@ -155,8 +161,8 @@ def test_list_datasets_explicit_response_missing_datasets_key(self): iterator = client.list_datasets( include_all=True, max_results=3, page_token=TOKEN) - iterator.update_page() - datasets = list(iterator.page) + page = six.next(iterator.pages) + datasets = list(page) token = iterator.next_page_token self.assertEqual(len(datasets), 0) @@ -189,6 +195,7 @@ def test_job_from_resource_unknown_type(self): client.job_from_resource({'configuration': {'nonesuch': {}}}) def test_list_jobs_defaults(self): + import six from google.cloud.bigquery.job import LoadTableFromStorageJob from google.cloud.bigquery.job import CopyJob from google.cloud.bigquery.job import ExtractTableToStorageJob @@ -301,8 +308,8 @@ def test_list_jobs_defaults(self): conn = client.connection = _Connection(DATA) iterator = client.list_jobs() - iterator.update_page() - jobs = list(iterator.page) + page = six.next(iterator.pages) + jobs = list(page) token = iterator.next_page_token self.assertEqual(len(jobs), len(DATA['jobs'])) @@ -319,6 +326,7 @@ def test_list_jobs_defaults(self): self.assertEqual(req['query_params'], {'projection': 'full'}) def test_list_jobs_load_job_wo_sourceUris(self): + import six from google.cloud.bigquery.job import LoadTableFromStorageJob PROJECT = 'PROJECT' DATASET = 'test_dataset' @@ -356,8 +364,8 @@ def test_list_jobs_load_job_wo_sourceUris(self): conn = client.connection = _Connection(DATA) iterator = client.list_jobs() - iterator.update_page() - jobs = list(iterator.page) + page = six.next(iterator.pages) + jobs = list(page) token = iterator.next_page_token self.assertEqual(len(jobs), len(DATA['jobs'])) @@ -374,6 +382,7 @@ def test_list_jobs_load_job_wo_sourceUris(self): self.assertEqual(req['query_params'], {'projection': 'full'}) def test_list_jobs_explicit_missing(self): + import six PROJECT = 'PROJECT' PATH = 'projects/%s/jobs' % PROJECT DATA = {} @@ -384,8 +393,8 @@ def test_list_jobs_explicit_missing(self): iterator = client.list_jobs(max_results=1000, page_token=TOKEN, all_users=True, state_filter='done') - iterator.update_page() - jobs = list(iterator.page) + page = six.next(iterator.pages) + jobs = list(page) token = iterator.next_page_token self.assertEqual(len(jobs), 0) From 518b5685dd170bade67fb72a7f9f16a40712b9f6 Mon Sep 17 00:00:00 2001 From: Danny Hermes Date: Tue, 25 Oct 2016 12:44:53 -0700 Subject: [PATCH 0022/2016] Renaming Iterator to HTTPIterator. --- .../google/cloud/bigquery/client.py | 21 ++++++++++--------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py index 7548681c448b..785cc842e56f 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py @@ -23,7 +23,7 @@ from google.cloud.bigquery.job import LoadTableFromStorageJob from google.cloud.bigquery.job import QueryJob from google.cloud.bigquery.query import QueryResults -from google.cloud.iterator import Iterator +from google.cloud.iterator import HTTPIterator class Project(object): @@ -92,9 +92,10 @@ def list_projects(self, max_results=None, page_token=None): :returns: Iterator of :class:`~google.cloud.bigquery.client.Project` accessible to the current client. """ - return Iterator(client=self, path='/projects', - items_key='projects', item_to_value=_item_to_project, - page_token=page_token, max_results=max_results) + return HTTPIterator( + client=self, path='/projects', item_to_value=_item_to_project, + items_key='projects', page_token=page_token, + max_results=max_results) def list_datasets(self, include_all=False, max_results=None, page_token=None): @@ -123,9 +124,9 @@ def list_datasets(self, include_all=False, max_results=None, if include_all: extra_params['all'] = True path = '/projects/%s/datasets' % (self.project,) - return Iterator( - client=self, path=path, items_key='datasets', - item_to_value=_item_to_dataset, page_token=page_token, + return HTTPIterator( + client=self, path=path, item_to_value=_item_to_dataset, + items_key='datasets', page_token=page_token, max_results=max_results, extra_params=extra_params) def dataset(self, dataset_name): @@ -204,9 +205,9 @@ def list_jobs(self, max_results=None, page_token=None, all_users=None, extra_params['stateFilter'] = state_filter path = '/projects/%s/jobs' % (self.project,) - return Iterator( - client=self, path=path, items_key='jobs', - item_to_value=_item_to_job, page_token=page_token, + return HTTPIterator( + client=self, path=path, item_to_value=_item_to_job, + items_key='jobs', page_token=page_token, max_results=max_results, extra_params=extra_params) def load_table_from_storage(self, job_name, destination, *source_uris): From 722b74df22b7b32277d13d60e01a436e4c1f5d93 Mon Sep 17 00:00:00 2001 From: Jeff Kramer Date: Wed, 26 Oct 2016 16:44:57 -0500 Subject: [PATCH 0023/2016] Add AVRO as a valid BigQuery SourceFormat See GoogleCloudPlatform/google-cloud-python#2627 --- packages/google-cloud-bigquery/google/cloud/bigquery/job.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/job.py b/packages/google-cloud-bigquery/google/cloud/bigquery/job.py index 5eba2721da04..7766c120c5cd 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/job.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/job.py @@ -70,7 +70,8 @@ class SourceFormat(_EnumProperty): CSV = 'CSV' DATASTORE_BACKUP = 'DATASTORE_BACKUP' NEWLINE_DELIMITED_JSON = 'NEWLINE_DELIMITED_JSON' - ALLOWED = (CSV, DATASTORE_BACKUP, NEWLINE_DELIMITED_JSON) + AVRO = 'AVRO' + ALLOWED = (CSV, DATASTORE_BACKUP, NEWLINE_DELIMITED_JSON, AVRO) class WriteDisposition(_EnumProperty): From 7002e2f7ba8c04171ac415cab24409f0e9f9c639 Mon Sep 17 00:00:00 2001 From: Danny Hermes Date: Tue, 18 Oct 2016 16:56:09 -0700 Subject: [PATCH 0024/2016] Making BigQuery dataset.list_tables() into an iterator. --- .../google/cloud/bigquery/dataset.py | 55 ++++++++++--------- .../unit_tests/test_dataset.py | 20 ++++++- 2 files changed, 47 insertions(+), 28 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/dataset.py b/packages/google-cloud-bigquery/google/cloud/bigquery/dataset.py index 423484c68d51..a9a7aecdc31c 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/dataset.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/dataset.py @@ -18,6 +18,7 @@ from google.cloud._helpers import _datetime_from_microseconds from google.cloud.exceptions import NotFound from google.cloud.bigquery.table import Table +from google.cloud.iterator import Iterator class AccessGrant(object): @@ -542,35 +543,24 @@ def list_tables(self, max_results=None, page_token=None): https://cloud.google.com/bigquery/docs/reference/v2/tables/list :type max_results: int - :param max_results: maximum number of tables to return, If not - passed, defaults to a value set by the API. + :param max_results: (Optional) Maximum number of tables to return. + If not passed, defaults to a value set by the API. :type page_token: str - :param page_token: opaque marker for the next "page" of datasets. If - not passed, the API will return the first page of - datasets. - - :rtype: tuple, (list, str) - :returns: list of :class:`google.cloud.bigquery.table.Table`, plus a - "next page token" string: if not ``None``, indicates that - more tables can be retrieved with another call (pass that - value as ``page_token``). - """ - params = {} - - if max_results is not None: - params['maxResults'] = max_results - - if page_token is not None: - params['pageToken'] = page_token + :param page_token: (Optional) Opaque marker for the next "page" of + datasets. If not passed, the API will return the + first page of datasets. + :rtype: :class:`~google.cloud.iterator.Iterator` + :returns: Iterator of :class:`~google.cloud.bigquery.table.Table` + contained within the current dataset. + """ path = '/projects/%s/datasets/%s/tables' % (self.project, self.name) - connection = self._client.connection - resp = connection.api_request(method='GET', path=path, - query_params=params) - tables = [Table.from_api_repr(resource, self) - for resource in resp.get('tables', ())] - return tables, resp.get('nextPageToken') + result = Iterator(client=self._client, path=path, + item_to_value=_item_to_table, items_key='tables', + page_token=page_token, max_results=max_results) + result.dataset = self + return result def table(self, name, schema=()): """Construct a table bound to this dataset. @@ -585,3 +575,18 @@ def table(self, name, schema=()): :returns: a new ``Table`` instance """ return Table(name, dataset=self, schema=schema) + + +def _item_to_table(iterator, resource): + """Convert a JSON table to the native object. + + :type iterator: :class:`~google.cloud.iterator.Iterator` + :param iterator: The iterator that is currently in use. + + :type resource: dict + :param resource: An item to be converted to a table. + + :rtype: :class:`~google.cloud.bigquery.table.Table` + :returns: The next table in the page. + """ + return Table.from_api_repr(resource, iterator.dataset) diff --git a/packages/google-cloud-bigquery/unit_tests/test_dataset.py b/packages/google-cloud-bigquery/unit_tests/test_dataset.py index d9b85cf5fad3..7252349e0d03 100644 --- a/packages/google-cloud-bigquery/unit_tests/test_dataset.py +++ b/packages/google-cloud-bigquery/unit_tests/test_dataset.py @@ -639,7 +639,13 @@ def test_list_tables_empty(self): conn = _Connection({}) client = _Client(project=self.PROJECT, connection=conn) dataset = self._makeOne(self.DS_NAME, client=client) - tables, token = dataset.list_tables() + + iterator = dataset.list_tables() + self.assertIs(iterator.dataset, dataset) + iterator.update_page() + tables = list(iterator.page) + token = iterator.next_page_token + self.assertEqual(tables, []) self.assertIsNone(token) self.assertEqual(len(conn._requested), 1) @@ -677,7 +683,11 @@ def test_list_tables_defaults(self): client = _Client(project=self.PROJECT, connection=conn) dataset = self._makeOne(self.DS_NAME, client=client) - tables, token = dataset.list_tables() + iterator = dataset.list_tables() + self.assertIs(iterator.dataset, dataset) + iterator.update_page() + tables = list(iterator.page) + token = iterator.next_page_token self.assertEqual(len(tables), len(DATA['tables'])) for found, expected in zip(tables, DATA['tables']): @@ -719,7 +729,11 @@ def test_list_tables_explicit(self): client = _Client(project=self.PROJECT, connection=conn) dataset = self._makeOne(self.DS_NAME, client=client) - tables, token = dataset.list_tables(max_results=3, page_token=TOKEN) + iterator = dataset.list_tables(max_results=3, page_token=TOKEN) + self.assertIs(iterator.dataset, dataset) + iterator.update_page() + tables = list(iterator.page) + token = iterator.next_page_token self.assertEqual(len(tables), len(DATA['tables'])) for found, expected in zip(tables, DATA['tables']): From 48761de011ece7be42c73c4feac7e1357b12ac34 Mon Sep 17 00:00:00 2001 From: Danny Hermes Date: Tue, 18 Oct 2016 16:57:02 -0700 Subject: [PATCH 0025/2016] Refactoring _rows_from_json BigQuery helper. In particular, isolating the logic useful to work on a single row. --- .../google/cloud/bigquery/_helpers.py | 38 +++++++++++++------ 1 file changed, 26 insertions(+), 12 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py b/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py index d22e1592a68e..6d31eea3c03e 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py @@ -86,20 +86,34 @@ def _string_from_json(value, _): } +def _row_from_json(row, schema): + """Convert JSON row data to row w/ appropriate types. + + :type row: dict + :param row: + + :type schema: tuple + :param schema: A tuple of + :class:`~google.cloud.bigquery.schema.SchemaField`. + + :rtype: tuple + :returns: A tuple of data converted to native types. + """ + row_data = [] + for field, cell in zip(schema, row['f']): + converter = _CELLDATA_FROM_JSON[field.field_type] + if field.mode == 'REPEATED': + row_data.append([converter(item, field) + for item in cell['v']]) + else: + row_data.append(converter(cell['v'], field)) + + return tuple(row_data) + + def _rows_from_json(rows, schema): """Convert JSON row data to rows w/ appropriate types.""" - rows_data = [] - for row in rows: - row_data = [] - for field, cell in zip(schema, row['f']): - converter = _CELLDATA_FROM_JSON[field.field_type] - if field.mode == 'REPEATED': - row_data.append([converter(item, field) - for item in cell['v']]) - else: - row_data.append(converter(cell['v'], field)) - rows_data.append(tuple(row_data)) - return rows_data + return [_row_from_json(row, schema) for row in rows] class _ConfigurationProperty(object): From c9f542552ebaf021e51b6f7cd6ad91f8d780e5c9 Mon Sep 17 00:00:00 2001 From: Danny Hermes Date: Tue, 18 Oct 2016 17:21:42 -0700 Subject: [PATCH 0026/2016] Making BigQuery table.fetch_data() into an iterator. --- .../google/cloud/bigquery/table.py | 102 +++++++++++------- .../unit_tests/test_table.py | 27 +++-- 2 files changed, 87 insertions(+), 42 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py index 8467c8f23210..6d73e538cf1d 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py @@ -32,7 +32,8 @@ from google.cloud.streaming.transfer import RESUMABLE_UPLOAD from google.cloud.streaming.transfer import Upload from google.cloud.bigquery.schema import SchemaField -from google.cloud.bigquery._helpers import _rows_from_json +from google.cloud.bigquery._helpers import _row_from_json +from google.cloud.iterator import Iterator _TABLE_HAS_NO_SCHEMA = "Table has no schema: call 'table.reload()'" @@ -653,47 +654,36 @@ def fetch_data(self, max_results=None, page_token=None, client=None): up-to-date with the schema as defined on the back-end: if the two schemas are not identical, the values returned may be incomplete. To ensure that the local copy of the schema is - up-to-date, call the table's ``reload`` method. + up-to-date, call :meth:`reload`. :type max_results: int - :param max_results: (Optional) maximum number of rows to return. + :param max_results: (Optional) Maximum number of rows to return. :type page_token: str - :param page_token: - (Optional) token representing a cursor into the table's rows. - - :type client: :class:`~google.cloud.bigquery.client.Client` or - ``NoneType`` - :param client: the client to use. If not passed, falls back to the - ``client`` stored on the current dataset. - - :rtype: tuple - :returns: ``(row_data, total_rows, page_token)``, where ``row_data`` - is a list of tuples, one per result row, containing only - the values; ``total_rows`` is a count of the total number - of rows in the table; and ``page_token`` is an opaque - string which can be used to fetch the next batch of rows - (``None`` if no further batches can be fetched). + :param page_token: (Optional) Token representing a cursor into the + table's rows. + + :type client: :class:`~google.cloud.bigquery.client.Client` + :param client: (Optional) The client to use. If not passed, falls + back to the ``client`` stored on the current dataset. + + :rtype: :class:`~google.cloud.iterator.Iterator` + :returns: Iterator of row data :class:`tuple`s. Each page in the + iterator will have the ``total_rows`` attribute set, + which counts the total number of rows **in the table** + (this is distinct from the total number of rows in the + current page: ``iterator.page.num_items``). """ client = self._require_client(client) - params = {} - - if max_results is not None: - params['maxResults'] = max_results - - if page_token is not None: - params['pageToken'] = page_token - - response = client.connection.api_request(method='GET', - path='%s/data' % self.path, - query_params=params) - total_rows = response.get('totalRows') - if total_rows is not None: - total_rows = int(total_rows) - page_token = response.get('pageToken') - rows_data = _rows_from_json(response.get('rows', ()), self._schema) - - return rows_data, total_rows, page_token + path = '%s/data' % (self.path,) + iterator = Iterator(client=client, path=path, + item_to_value=_item_to_row, items_key='rows', + page_token=page_token, max_results=max_results, + page_start=_rows_page_start) + iterator.schema = self._schema + # Over-ride the key used to retrieve the next page token. + iterator._NEXT_TOKEN = 'pageToken' + return iterator def insert_data(self, rows, @@ -1083,6 +1073,46 @@ def _build_schema_resource(fields): return infos +def _item_to_row(iterator, resource): + """Convert a JSON row to the native object. + + .. note:: + + This assumes that the ``schema`` attribute has been + added to the iterator after being created, which + should be done by the caller. + + :type iterator: :class:`~google.cloud.iterator.Iterator` + :param iterator: The iterator that is currently in use. + + :type resource: dict + :param resource: An item to be converted to a row. + + :rtype: tuple + :returns: The next row in the page. + """ + return _row_from_json(resource, iterator.schema) + + +# pylint: disable=unused-argument +def _rows_page_start(iterator, page, response): + """Grab total rows after a :class:`~google.cloud.iterator.Page` started. + + :type iterator: :class:`~google.cloud.iterator.Iterator` + :param iterator: The iterator that is currently in use. + + :type page: :class:`~google.cloud.iterator.Page` + :param page: The page that was just created. + + :type response: dict + :param response: The JSON API response for a page of rows in a table. + """ + total_rows = response.get('totalRows') + if total_rows is not None: + page.total_rows = int(total_rows) +# pylint: enable=unused-argument + + class _UploadConfig(object): """Faux message FBO apitools' 'configure_request'.""" accept = ['*/*'] diff --git a/packages/google-cloud-bigquery/unit_tests/test_table.py b/packages/google-cloud-bigquery/unit_tests/test_table.py index 8ddcafab556b..48741172f732 100644 --- a/packages/google-cloud-bigquery/unit_tests/test_table.py +++ b/packages/google-cloud-bigquery/unit_tests/test_table.py @@ -1068,7 +1068,11 @@ def _bigquery_timestamp_float_repr(ts_float): table = self._makeOne(self.TABLE_NAME, dataset=dataset, schema=[full_name, age, joined]) - rows, total_rows, page_token = table.fetch_data() + iterator = table.fetch_data() + iterator.update_page() + rows = list(iterator.page) + total_rows = iterator.page.total_rows + page_token = iterator.next_page_token self.assertEqual(len(rows), 4) self.assertEqual(rows[0], ('Phred Phlyntstone', 32, WHEN)) @@ -1129,9 +1133,12 @@ def test_fetch_data_w_alternate_client(self): table = self._makeOne(self.TABLE_NAME, dataset=dataset, schema=[full_name, age, voter, score]) - rows, total_rows, page_token = table.fetch_data(client=client2, - max_results=MAX, - page_token=TOKEN) + iterator = table.fetch_data( + client=client2, max_results=MAX, page_token=TOKEN) + iterator.update_page() + rows = list(iterator.page) + total_rows = getattr(iterator.page, 'total_rows', None) + page_token = iterator.next_page_token self.assertEqual(len(rows), 4) self.assertEqual(rows[0], ('Phred Phlyntstone', 32, True, 3.1415926)) @@ -1177,7 +1184,11 @@ def test_fetch_data_w_repeated_fields(self): table = self._makeOne(self.TABLE_NAME, dataset=dataset, schema=[full_name, struct]) - rows, total_rows, page_token = table.fetch_data() + iterator = table.fetch_data() + iterator.update_page() + rows = list(iterator.page) + total_rows = iterator.page.total_rows + page_token = iterator.next_page_token self.assertEqual(len(rows), 1) self.assertEqual(rows[0][0], ['red', 'green']) @@ -1227,7 +1238,11 @@ def test_fetch_data_w_record_schema(self): table = self._makeOne(self.TABLE_NAME, dataset=dataset, schema=[full_name, phone]) - rows, total_rows, page_token = table.fetch_data() + iterator = table.fetch_data() + iterator.update_page() + rows = list(iterator.page) + total_rows = iterator.page.total_rows + page_token = iterator.next_page_token self.assertEqual(len(rows), 3) self.assertEqual(rows[0][0], 'Phred Phlyntstone') From cb58d347a74c07463269e3b209747803d2bbb0a4 Mon Sep 17 00:00:00 2001 From: Danny Hermes Date: Mon, 31 Oct 2016 22:53:05 -0700 Subject: [PATCH 0027/2016] Rebase fixes. --- .../google/cloud/bigquery/dataset.py | 8 ++--- .../google/cloud/bigquery/table.py | 10 +++--- .../unit_tests/test_dataset.py | 16 ++++++---- .../unit_tests/test_table.py | 31 ++++++++++++------- 4 files changed, 38 insertions(+), 27 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/dataset.py b/packages/google-cloud-bigquery/google/cloud/bigquery/dataset.py index a9a7aecdc31c..f29fdbc8a243 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/dataset.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/dataset.py @@ -18,7 +18,7 @@ from google.cloud._helpers import _datetime_from_microseconds from google.cloud.exceptions import NotFound from google.cloud.bigquery.table import Table -from google.cloud.iterator import Iterator +from google.cloud.iterator import HTTPIterator class AccessGrant(object): @@ -556,9 +556,9 @@ def list_tables(self, max_results=None, page_token=None): contained within the current dataset. """ path = '/projects/%s/datasets/%s/tables' % (self.project, self.name) - result = Iterator(client=self._client, path=path, - item_to_value=_item_to_table, items_key='tables', - page_token=page_token, max_results=max_results) + result = HTTPIterator(client=self._client, path=path, + item_to_value=_item_to_table, items_key='tables', + page_token=page_token, max_results=max_results) result.dataset = self return result diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py index 6d73e538cf1d..26bb80584be6 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py @@ -33,7 +33,7 @@ from google.cloud.streaming.transfer import Upload from google.cloud.bigquery.schema import SchemaField from google.cloud.bigquery._helpers import _row_from_json -from google.cloud.iterator import Iterator +from google.cloud.iterator import HTTPIterator _TABLE_HAS_NO_SCHEMA = "Table has no schema: call 'table.reload()'" @@ -676,10 +676,10 @@ def fetch_data(self, max_results=None, page_token=None, client=None): """ client = self._require_client(client) path = '%s/data' % (self.path,) - iterator = Iterator(client=client, path=path, - item_to_value=_item_to_row, items_key='rows', - page_token=page_token, max_results=max_results, - page_start=_rows_page_start) + iterator = HTTPIterator(client=client, path=path, + item_to_value=_item_to_row, items_key='rows', + page_token=page_token, max_results=max_results, + page_start=_rows_page_start) iterator.schema = self._schema # Over-ride the key used to retrieve the next page token. iterator._NEXT_TOKEN = 'pageToken' diff --git a/packages/google-cloud-bigquery/unit_tests/test_dataset.py b/packages/google-cloud-bigquery/unit_tests/test_dataset.py index 7252349e0d03..24c270b5005b 100644 --- a/packages/google-cloud-bigquery/unit_tests/test_dataset.py +++ b/packages/google-cloud-bigquery/unit_tests/test_dataset.py @@ -636,14 +636,16 @@ def test_delete_w_alternate_client(self): self.assertEqual(req['path'], '/%s' % PATH) def test_list_tables_empty(self): + import six + conn = _Connection({}) client = _Client(project=self.PROJECT, connection=conn) dataset = self._makeOne(self.DS_NAME, client=client) iterator = dataset.list_tables() self.assertIs(iterator.dataset, dataset) - iterator.update_page() - tables = list(iterator.page) + page = six.next(iterator.pages) + tables = list(page) token = iterator.next_page_token self.assertEqual(tables, []) @@ -655,6 +657,7 @@ def test_list_tables_empty(self): self.assertEqual(req['path'], '/%s' % PATH) def test_list_tables_defaults(self): + import six from google.cloud.bigquery.table import Table TABLE_1 = 'table_one' @@ -685,8 +688,8 @@ def test_list_tables_defaults(self): iterator = dataset.list_tables() self.assertIs(iterator.dataset, dataset) - iterator.update_page() - tables = list(iterator.page) + page = six.next(iterator.pages) + tables = list(page) token = iterator.next_page_token self.assertEqual(len(tables), len(DATA['tables'])) @@ -702,6 +705,7 @@ def test_list_tables_defaults(self): self.assertEqual(req['path'], '/%s' % PATH) def test_list_tables_explicit(self): + import six from google.cloud.bigquery.table import Table TABLE_1 = 'table_one' @@ -731,8 +735,8 @@ def test_list_tables_explicit(self): iterator = dataset.list_tables(max_results=3, page_token=TOKEN) self.assertIs(iterator.dataset, dataset) - iterator.update_page() - tables = list(iterator.page) + page = six.next(iterator.pages) + tables = list(page) token = iterator.next_page_token self.assertEqual(len(tables), len(DATA['tables'])) diff --git a/packages/google-cloud-bigquery/unit_tests/test_table.py b/packages/google-cloud-bigquery/unit_tests/test_table.py index 48741172f732..354821e4a149 100644 --- a/packages/google-cloud-bigquery/unit_tests/test_table.py +++ b/packages/google-cloud-bigquery/unit_tests/test_table.py @@ -1015,6 +1015,7 @@ def test_delete_w_alternate_client(self): def test_fetch_data_w_bound_client(self): import datetime + import six from google.cloud._helpers import UTC from google.cloud.bigquery.table import SchemaField @@ -1069,9 +1070,9 @@ def _bigquery_timestamp_float_repr(ts_float): schema=[full_name, age, joined]) iterator = table.fetch_data() - iterator.update_page() - rows = list(iterator.page) - total_rows = iterator.page.total_rows + page = six.next(iterator.pages) + rows = list(page) + total_rows = page.total_rows page_token = iterator.next_page_token self.assertEqual(len(rows), 4) @@ -1088,7 +1089,9 @@ def _bigquery_timestamp_float_repr(ts_float): self.assertEqual(req['path'], '/%s' % PATH) def test_fetch_data_w_alternate_client(self): + import six from google.cloud.bigquery.table import SchemaField + PATH = 'projects/%s/datasets/%s/tables/%s/data' % ( self.PROJECT, self.DS_NAME, self.TABLE_NAME) MAX = 10 @@ -1135,9 +1138,9 @@ def test_fetch_data_w_alternate_client(self): iterator = table.fetch_data( client=client2, max_results=MAX, page_token=TOKEN) - iterator.update_page() - rows = list(iterator.page) - total_rows = getattr(iterator.page, 'total_rows', None) + page = six.next(iterator.pages) + rows = list(page) + total_rows = getattr(page, 'total_rows', None) page_token = iterator.next_page_token self.assertEqual(len(rows), 4) @@ -1157,7 +1160,9 @@ def test_fetch_data_w_alternate_client(self): {'maxResults': MAX, 'pageToken': TOKEN}) def test_fetch_data_w_repeated_fields(self): + import six from google.cloud.bigquery.table import SchemaField + PATH = 'projects/%s/datasets/%s/tables/%s/data' % ( self.PROJECT, self.DS_NAME, self.TABLE_NAME) ROWS = 1234 @@ -1185,9 +1190,9 @@ def test_fetch_data_w_repeated_fields(self): schema=[full_name, struct]) iterator = table.fetch_data() - iterator.update_page() - rows = list(iterator.page) - total_rows = iterator.page.total_rows + page = six.next(iterator.pages) + rows = list(page) + total_rows = page.total_rows page_token = iterator.next_page_token self.assertEqual(len(rows), 1) @@ -1203,7 +1208,9 @@ def test_fetch_data_w_repeated_fields(self): self.assertEqual(req['path'], '/%s' % PATH) def test_fetch_data_w_record_schema(self): + import six from google.cloud.bigquery.table import SchemaField + PATH = 'projects/%s/datasets/%s/tables/%s/data' % ( self.PROJECT, self.DS_NAME, self.TABLE_NAME) ROWS = 1234 @@ -1239,9 +1246,9 @@ def test_fetch_data_w_record_schema(self): schema=[full_name, phone]) iterator = table.fetch_data() - iterator.update_page() - rows = list(iterator.page) - total_rows = iterator.page.total_rows + page = six.next(iterator.pages) + rows = list(page) + total_rows = page.total_rows page_token = iterator.next_page_token self.assertEqual(len(rows), 3) From 17313bb7ccbf7552150ca1a5403e087c2ae12699 Mon Sep 17 00:00:00 2001 From: Danny Hermes Date: Mon, 31 Oct 2016 23:12:46 -0700 Subject: [PATCH 0028/2016] Review feedback: moving total_rows from Page to Iterator. Also fixing BigQuery system test in the process. --- .../google/cloud/bigquery/_helpers.py | 6 +++--- .../google-cloud-bigquery/google/cloud/bigquery/table.py | 5 +++-- packages/google-cloud-bigquery/unit_tests/test_table.py | 8 ++++---- 3 files changed, 10 insertions(+), 9 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py b/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py index 6d31eea3c03e..89eb390993c6 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py @@ -87,10 +87,10 @@ def _string_from_json(value, _): def _row_from_json(row, schema): - """Convert JSON row data to row w/ appropriate types. + """Convert JSON row data to row with appropriate types. :type row: dict - :param row: + :param row: A JSON response row to be converted. :type schema: tuple :param schema: A tuple of @@ -112,7 +112,7 @@ def _row_from_json(row, schema): def _rows_from_json(rows, schema): - """Convert JSON row data to rows w/ appropriate types.""" + """Convert JSON row data to rows with appropriate types.""" return [_row_from_json(row, schema) for row in rows] diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py index 26bb80584be6..870d8520159e 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py @@ -668,7 +668,7 @@ def fetch_data(self, max_results=None, page_token=None, client=None): back to the ``client`` stored on the current dataset. :rtype: :class:`~google.cloud.iterator.Iterator` - :returns: Iterator of row data :class:`tuple`s. Each page in the + :returns: Iterator of row data :class:`tuple`s. During each page, the iterator will have the ``total_rows`` attribute set, which counts the total number of rows **in the table** (this is distinct from the total number of rows in the @@ -1109,7 +1109,8 @@ def _rows_page_start(iterator, page, response): """ total_rows = response.get('totalRows') if total_rows is not None: - page.total_rows = int(total_rows) + total_rows = int(total_rows) + iterator.total_rows = total_rows # pylint: enable=unused-argument diff --git a/packages/google-cloud-bigquery/unit_tests/test_table.py b/packages/google-cloud-bigquery/unit_tests/test_table.py index 354821e4a149..b46e3a2974df 100644 --- a/packages/google-cloud-bigquery/unit_tests/test_table.py +++ b/packages/google-cloud-bigquery/unit_tests/test_table.py @@ -1072,7 +1072,7 @@ def _bigquery_timestamp_float_repr(ts_float): iterator = table.fetch_data() page = six.next(iterator.pages) rows = list(page) - total_rows = page.total_rows + total_rows = iterator.total_rows page_token = iterator.next_page_token self.assertEqual(len(rows), 4) @@ -1140,7 +1140,7 @@ def test_fetch_data_w_alternate_client(self): client=client2, max_results=MAX, page_token=TOKEN) page = six.next(iterator.pages) rows = list(page) - total_rows = getattr(page, 'total_rows', None) + total_rows = iterator.total_rows page_token = iterator.next_page_token self.assertEqual(len(rows), 4) @@ -1192,7 +1192,7 @@ def test_fetch_data_w_repeated_fields(self): iterator = table.fetch_data() page = six.next(iterator.pages) rows = list(page) - total_rows = page.total_rows + total_rows = iterator.total_rows page_token = iterator.next_page_token self.assertEqual(len(rows), 1) @@ -1248,7 +1248,7 @@ def test_fetch_data_w_record_schema(self): iterator = table.fetch_data() page = six.next(iterator.pages) rows = list(page) - total_rows = page.total_rows + total_rows = iterator.total_rows page_token = iterator.next_page_token self.assertEqual(len(rows), 3) From 918e698ee1df2e80721d278aa681825d8c5daeb7 Mon Sep 17 00:00:00 2001 From: Danny Hermes Date: Fri, 4 Nov 2016 10:12:12 -0700 Subject: [PATCH 0029/2016] Adding PyPI badges to package READMEs. --- packages/google-cloud-bigquery/README.rst | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/packages/google-cloud-bigquery/README.rst b/packages/google-cloud-bigquery/README.rst index d4059a156c34..1dcea16e0cc5 100644 --- a/packages/google-cloud-bigquery/README.rst +++ b/packages/google-cloud-bigquery/README.rst @@ -5,6 +5,8 @@ Python Client for Google BigQuery .. _Google BigQuery: https://cloud.google.com/bigquery/what-is-bigquery +|pypi| |versions| + - `Documentation`_ .. _Documentation: https://googlecloudplatform.github.io/google-cloud-python/stable/bigquery-usage.html @@ -85,3 +87,8 @@ See the ``google-cloud-python`` API `BigQuery documentation`_ to learn how to connect to BigQuery using this Client Library. .. _BigQuery documentation: https://googlecloudplatform.github.io/google-cloud-python/stable/bigquery-usage.html + +.. |pypi| image:: https://img.shields.io/pypi/v/google-cloud-bigquery.svg + :target: https://pypi.python.org/pypi/google-cloud-bigquery +.. |versions| image:: https://img.shields.io/pypi/pyversions/google-cloud-bigquery.svg + :target: https://pypi.python.org/pypi/google-cloud-bigquery From 97a2723de3a1947bbc7ee1e445a7615e10f9c23a Mon Sep 17 00:00:00 2001 From: Danny Hermes Date: Fri, 4 Nov 2016 12:49:02 -0700 Subject: [PATCH 0030/2016] Renaming connection module as _http in 5 packages. The packages are BigQuery, Datastore, Logging, Pub/Sub and Storage. The rename is in advance of a larger re-factor. But so long as the connections are not public, the re-factor can happen without user-facing implications. --- .../google/cloud/bigquery/{connection.py => _http.py} | 0 .../unit_tests/{test_connection.py => test__http.py} | 0 2 files changed, 0 insertions(+), 0 deletions(-) rename packages/google-cloud-bigquery/google/cloud/bigquery/{connection.py => _http.py} (100%) rename packages/google-cloud-bigquery/unit_tests/{test_connection.py => test__http.py} (100%) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/connection.py b/packages/google-cloud-bigquery/google/cloud/bigquery/_http.py similarity index 100% rename from packages/google-cloud-bigquery/google/cloud/bigquery/connection.py rename to packages/google-cloud-bigquery/google/cloud/bigquery/_http.py diff --git a/packages/google-cloud-bigquery/unit_tests/test_connection.py b/packages/google-cloud-bigquery/unit_tests/test__http.py similarity index 100% rename from packages/google-cloud-bigquery/unit_tests/test_connection.py rename to packages/google-cloud-bigquery/unit_tests/test__http.py From a94b7a17ab767c6b47463eac9d4feb3b0d20b0b8 Mon Sep 17 00:00:00 2001 From: Danny Hermes Date: Fri, 4 Nov 2016 13:22:29 -0700 Subject: [PATCH 0031/2016] Updating imports to reflect connection->_http module rename. --- .../google-cloud-bigquery/google/cloud/bigquery/__init__.py | 4 ---- .../google-cloud-bigquery/google/cloud/bigquery/client.py | 2 +- packages/google-cloud-bigquery/unit_tests/test__http.py | 2 +- packages/google-cloud-bigquery/unit_tests/test_client.py | 2 +- 4 files changed, 3 insertions(+), 7 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/__init__.py b/packages/google-cloud-bigquery/google/cloud/bigquery/__init__.py index fec0eee614cb..cde9432d83e2 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/__init__.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/__init__.py @@ -24,11 +24,7 @@ from google.cloud.bigquery.client import Client -from google.cloud.bigquery.connection import Connection from google.cloud.bigquery.dataset import AccessGrant from google.cloud.bigquery.dataset import Dataset from google.cloud.bigquery.schema import SchemaField from google.cloud.bigquery.table import Table - - -SCOPE = Connection.SCOPE diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py index 785cc842e56f..d16a9d9349d2 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py @@ -16,7 +16,7 @@ from google.cloud.client import JSONClient -from google.cloud.bigquery.connection import Connection +from google.cloud.bigquery._http import Connection from google.cloud.bigquery.dataset import Dataset from google.cloud.bigquery.job import CopyJob from google.cloud.bigquery.job import ExtractTableToStorageJob diff --git a/packages/google-cloud-bigquery/unit_tests/test__http.py b/packages/google-cloud-bigquery/unit_tests/test__http.py index 35ec0276e86b..a2564ee278b8 100644 --- a/packages/google-cloud-bigquery/unit_tests/test__http.py +++ b/packages/google-cloud-bigquery/unit_tests/test__http.py @@ -18,7 +18,7 @@ class TestConnection(unittest.TestCase): def _getTargetClass(self): - from google.cloud.bigquery.connection import Connection + from google.cloud.bigquery._http import Connection return Connection def _makeOne(self, *args, **kw): diff --git a/packages/google-cloud-bigquery/unit_tests/test_client.py b/packages/google-cloud-bigquery/unit_tests/test_client.py index e2f0618a10da..cd2198078f74 100644 --- a/packages/google-cloud-bigquery/unit_tests/test_client.py +++ b/packages/google-cloud-bigquery/unit_tests/test_client.py @@ -25,7 +25,7 @@ def _makeOne(self, *args, **kw): return self._getTargetClass()(*args, **kw) def test_ctor(self): - from google.cloud.bigquery.connection import Connection + from google.cloud.bigquery._http import Connection PROJECT = 'PROJECT' creds = _Credentials() http = object() From f0ac96e41f4b82ddbd89c44218d3b34ddf8aeb3a Mon Sep 17 00:00:00 2001 From: Danny Hermes Date: Mon, 7 Nov 2016 21:26:07 -0800 Subject: [PATCH 0032/2016] Avoiding using filesystem deps in package tox.ini configs. --- packages/google-cloud-bigquery/tox.ini | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/tox.ini b/packages/google-cloud-bigquery/tox.ini index 4a5000739647..001444516350 100644 --- a/packages/google-cloud-bigquery/tox.ini +++ b/packages/google-cloud-bigquery/tox.ini @@ -3,8 +3,9 @@ envlist = py27,py34,py35,cover [testing] +localdeps = + pip install --upgrade {toxinidir}/../core deps = - {toxinidir}/../core pytest covercmd = py.test --quiet \ @@ -15,6 +16,7 @@ covercmd = [testenv] commands = + {[testing]localdeps} py.test --quiet {posargs} unit_tests deps = {[testing]deps} @@ -23,6 +25,7 @@ deps = basepython = python2.7 commands = + {[testing]localdeps} {[testing]covercmd} deps = {[testenv]deps} From 1e9c566112443757fb945026221753a2108ddf8c Mon Sep 17 00:00:00 2001 From: Danny Hermes Date: Tue, 8 Nov 2016 20:20:59 -0800 Subject: [PATCH 0033/2016] Renaming _getTargetClass to _get_target_class. Done via: $ git grep -l 'def _getTargetClass(self)' | \ > xargs sed -i s/'def _getTargetClass(self)'/'@staticmethod\n def _get_target_class()'/g --- .../unit_tests/test__helpers.py | 12 ++++++++---- .../google-cloud-bigquery/unit_tests/test__http.py | 3 ++- .../google-cloud-bigquery/unit_tests/test_client.py | 3 ++- .../google-cloud-bigquery/unit_tests/test_dataset.py | 6 ++++-- .../google-cloud-bigquery/unit_tests/test_job.py | 12 ++++++++---- .../google-cloud-bigquery/unit_tests/test_query.py | 3 ++- .../google-cloud-bigquery/unit_tests/test_schema.py | 3 ++- .../google-cloud-bigquery/unit_tests/test_table.py | 3 ++- 8 files changed, 30 insertions(+), 15 deletions(-) diff --git a/packages/google-cloud-bigquery/unit_tests/test__helpers.py b/packages/google-cloud-bigquery/unit_tests/test__helpers.py index 5631abe6c184..e4400e159767 100644 --- a/packages/google-cloud-bigquery/unit_tests/test__helpers.py +++ b/packages/google-cloud-bigquery/unit_tests/test__helpers.py @@ -318,7 +318,8 @@ def test_w_int64_float64(self): class Test_ConfigurationProperty(unittest.TestCase): - def _getTargetClass(self): + @staticmethod + def _get_target_class(): from google.cloud.bigquery._helpers import _ConfigurationProperty return _ConfigurationProperty @@ -353,7 +354,8 @@ def __init__(self): class Test_TypedProperty(unittest.TestCase): - def _getTargetClass(self): + @staticmethod + def _get_target_class(): from google.cloud.bigquery._helpers import _TypedProperty return _TypedProperty @@ -386,7 +388,8 @@ def __init__(self): class Test_EnumProperty(unittest.TestCase): - def _getTargetClass(self): + @staticmethod + def _get_target_class(): from google.cloud.bigquery._helpers import _EnumProperty return _EnumProperty @@ -419,7 +422,8 @@ def __init__(self): class Test_UDFResourcesProperty(unittest.TestCase): - def _getTargetClass(self): + @staticmethod + def _get_target_class(): from google.cloud.bigquery._helpers import UDFResourcesProperty return UDFResourcesProperty diff --git a/packages/google-cloud-bigquery/unit_tests/test__http.py b/packages/google-cloud-bigquery/unit_tests/test__http.py index a2564ee278b8..ca3f9c08cc4d 100644 --- a/packages/google-cloud-bigquery/unit_tests/test__http.py +++ b/packages/google-cloud-bigquery/unit_tests/test__http.py @@ -17,7 +17,8 @@ class TestConnection(unittest.TestCase): - def _getTargetClass(self): + @staticmethod + def _get_target_class(): from google.cloud.bigquery._http import Connection return Connection diff --git a/packages/google-cloud-bigquery/unit_tests/test_client.py b/packages/google-cloud-bigquery/unit_tests/test_client.py index cd2198078f74..16cc0387d14b 100644 --- a/packages/google-cloud-bigquery/unit_tests/test_client.py +++ b/packages/google-cloud-bigquery/unit_tests/test_client.py @@ -17,7 +17,8 @@ class TestClient(unittest.TestCase): - def _getTargetClass(self): + @staticmethod + def _get_target_class(): from google.cloud.bigquery.client import Client return Client diff --git a/packages/google-cloud-bigquery/unit_tests/test_dataset.py b/packages/google-cloud-bigquery/unit_tests/test_dataset.py index 24c270b5005b..e8afd5521d38 100644 --- a/packages/google-cloud-bigquery/unit_tests/test_dataset.py +++ b/packages/google-cloud-bigquery/unit_tests/test_dataset.py @@ -17,7 +17,8 @@ class TestAccessGrant(unittest.TestCase): - def _getTargetClass(self): + @staticmethod + def _get_target_class(): from google.cloud.bigquery.dataset import AccessGrant return AccessGrant @@ -80,7 +81,8 @@ class TestDataset(unittest.TestCase): PROJECT = 'project' DS_NAME = 'dataset-name' - def _getTargetClass(self): + @staticmethod + def _get_target_class(): from google.cloud.bigquery.dataset import Dataset return Dataset diff --git a/packages/google-cloud-bigquery/unit_tests/test_job.py b/packages/google-cloud-bigquery/unit_tests/test_job.py index 592d81910f59..0edcb2abcb6a 100644 --- a/packages/google-cloud-bigquery/unit_tests/test_job.py +++ b/packages/google-cloud-bigquery/unit_tests/test_job.py @@ -122,7 +122,8 @@ def _verifyReadonlyResourceProperties(self, job, resource): class TestLoadTableFromStorageJob(unittest.TestCase, _Base): JOB_TYPE = 'load' - def _getTargetClass(self): + @staticmethod + def _get_target_class(): from google.cloud.bigquery.job import LoadTableFromStorageJob return LoadTableFromStorageJob @@ -642,7 +643,8 @@ class TestCopyJob(unittest.TestCase, _Base): SOURCE_TABLE = 'source_table' DESTINATION_TABLE = 'destination_table' - def _getTargetClass(self): + @staticmethod + def _get_target_class(): from google.cloud.bigquery.job import CopyJob return CopyJob @@ -939,7 +941,8 @@ class TestExtractTableToStorageJob(unittest.TestCase, _Base): SOURCE_TABLE = 'source_table' DESTINATION_URI = 'gs://bucket_name/object_name' - def _getTargetClass(self): + @staticmethod + def _get_target_class(): from google.cloud.bigquery.job import ExtractTableToStorageJob return ExtractTableToStorageJob @@ -1232,7 +1235,8 @@ class TestQueryJob(unittest.TestCase, _Base): QUERY = 'select count(*) from persons' DESTINATION_TABLE = 'destination_table' - def _getTargetClass(self): + @staticmethod + def _get_target_class(): from google.cloud.bigquery.job import QueryJob return QueryJob diff --git a/packages/google-cloud-bigquery/unit_tests/test_query.py b/packages/google-cloud-bigquery/unit_tests/test_query.py index 9bcb865df5a7..d744dd922c1b 100644 --- a/packages/google-cloud-bigquery/unit_tests/test_query.py +++ b/packages/google-cloud-bigquery/unit_tests/test_query.py @@ -23,7 +23,8 @@ class TestQueryResults(unittest.TestCase): QUERY = 'select count(*) from persons' TOKEN = 'TOKEN' - def _getTargetClass(self): + @staticmethod + def _get_target_class(): from google.cloud.bigquery.query import QueryResults return QueryResults diff --git a/packages/google-cloud-bigquery/unit_tests/test_schema.py b/packages/google-cloud-bigquery/unit_tests/test_schema.py index a8272728b742..f3604efcdfe4 100644 --- a/packages/google-cloud-bigquery/unit_tests/test_schema.py +++ b/packages/google-cloud-bigquery/unit_tests/test_schema.py @@ -17,7 +17,8 @@ class TestSchemaField(unittest.TestCase): - def _getTargetClass(self): + @staticmethod + def _get_target_class(): from google.cloud.bigquery.schema import SchemaField return SchemaField diff --git a/packages/google-cloud-bigquery/unit_tests/test_table.py b/packages/google-cloud-bigquery/unit_tests/test_table.py index b46e3a2974df..493397697b92 100644 --- a/packages/google-cloud-bigquery/unit_tests/test_table.py +++ b/packages/google-cloud-bigquery/unit_tests/test_table.py @@ -35,7 +35,8 @@ class TestTable(unittest.TestCase, _SchemaBase): DS_NAME = 'dataset-name' TABLE_NAME = 'table-name' - def _getTargetClass(self): + @staticmethod + def _get_target_class(): from google.cloud.bigquery.table import Table return Table From 9afeaa35593271f62cd865166b48a24bb5687c22 Mon Sep 17 00:00:00 2001 From: Danny Hermes Date: Tue, 8 Nov 2016 20:22:12 -0800 Subject: [PATCH 0034/2016] Changing uses of _getTargetClass to _get_target_class. Done via: $ git grep -l _getTargetClass | \ > xargs sed -i s/_getTargetClass/_get_target_class/g --- .../unit_tests/test__helpers.py | 8 ++--- .../unit_tests/test__http.py | 2 +- .../unit_tests/test_client.py | 2 +- .../unit_tests/test_dataset.py | 10 +++--- .../unit_tests/test_job.py | 34 +++++++++---------- .../unit_tests/test_query.py | 6 ++-- .../unit_tests/test_schema.py | 2 +- .../unit_tests/test_table.py | 8 ++--- 8 files changed, 36 insertions(+), 36 deletions(-) diff --git a/packages/google-cloud-bigquery/unit_tests/test__helpers.py b/packages/google-cloud-bigquery/unit_tests/test__helpers.py index e4400e159767..37f10ec7f20b 100644 --- a/packages/google-cloud-bigquery/unit_tests/test__helpers.py +++ b/packages/google-cloud-bigquery/unit_tests/test__helpers.py @@ -324,7 +324,7 @@ def _get_target_class(): return _ConfigurationProperty def _makeOne(self, *args, **kw): - return self._getTargetClass()(*args, **kw) + return self._get_target_class()(*args, **kw) def test_it(self): @@ -360,7 +360,7 @@ def _get_target_class(): return _TypedProperty def _makeOne(self, *args, **kw): - return self._getTargetClass()(*args, **kw) + return self._get_target_class()(*args, **kw) def test_it(self): @@ -395,7 +395,7 @@ def _get_target_class(): def test_it(self): - class Sub(self._getTargetClass()): + class Sub(self._get_target_class()): ALLOWED = ('FOO', 'BAR', 'BAZ') class Configuration(object): @@ -428,7 +428,7 @@ def _get_target_class(): return UDFResourcesProperty def _makeOne(self, *args, **kw): - return self._getTargetClass()(*args, **kw) + return self._get_target_class()(*args, **kw) def _descriptor_and_klass(self): descriptor = self._makeOne() diff --git a/packages/google-cloud-bigquery/unit_tests/test__http.py b/packages/google-cloud-bigquery/unit_tests/test__http.py index ca3f9c08cc4d..fe3ccb1491c6 100644 --- a/packages/google-cloud-bigquery/unit_tests/test__http.py +++ b/packages/google-cloud-bigquery/unit_tests/test__http.py @@ -23,7 +23,7 @@ def _get_target_class(): return Connection def _makeOne(self, *args, **kw): - return self._getTargetClass()(*args, **kw) + return self._get_target_class()(*args, **kw) def test_build_api_url_no_extra_query_params(self): conn = self._makeOne() diff --git a/packages/google-cloud-bigquery/unit_tests/test_client.py b/packages/google-cloud-bigquery/unit_tests/test_client.py index 16cc0387d14b..cd726f53fc71 100644 --- a/packages/google-cloud-bigquery/unit_tests/test_client.py +++ b/packages/google-cloud-bigquery/unit_tests/test_client.py @@ -23,7 +23,7 @@ def _get_target_class(): return Client def _makeOne(self, *args, **kw): - return self._getTargetClass()(*args, **kw) + return self._get_target_class()(*args, **kw) def test_ctor(self): from google.cloud.bigquery._http import Connection diff --git a/packages/google-cloud-bigquery/unit_tests/test_dataset.py b/packages/google-cloud-bigquery/unit_tests/test_dataset.py index e8afd5521d38..08d698b031b5 100644 --- a/packages/google-cloud-bigquery/unit_tests/test_dataset.py +++ b/packages/google-cloud-bigquery/unit_tests/test_dataset.py @@ -23,7 +23,7 @@ def _get_target_class(): return AccessGrant def _makeOne(self, *args, **kw): - return self._getTargetClass()(*args, **kw) + return self._get_target_class()(*args, **kw) def test_ctor_defaults(self): grant = self._makeOne('OWNER', 'userByEmail', 'phred@example.com') @@ -87,7 +87,7 @@ def _get_target_class(): return Dataset def _makeOne(self, *args, **kw): - return self._getTargetClass()(*args, **kw) + return self._get_target_class()(*args, **kw) def _setUpConstants(self): import datetime @@ -271,7 +271,7 @@ def test_from_api_repr_missing_identity(self): self._setUpConstants() client = _Client(self.PROJECT) RESOURCE = {} - klass = self._getTargetClass() + klass = self._get_target_class() with self.assertRaises(KeyError): klass.from_api_repr(RESOURCE, client=client) @@ -285,7 +285,7 @@ def test_from_api_repr_bare(self): 'datasetId': self.DS_NAME, } } - klass = self._getTargetClass() + klass = self._get_target_class() dataset = klass.from_api_repr(RESOURCE, client=client) self.assertIs(dataset._client, client) self._verifyResourceProperties(dataset, RESOURCE) @@ -293,7 +293,7 @@ def test_from_api_repr_bare(self): def test_from_api_repr_w_properties(self): client = _Client(self.PROJECT) RESOURCE = self._makeResource() - klass = self._getTargetClass() + klass = self._get_target_class() dataset = klass.from_api_repr(RESOURCE, client=client) self.assertIs(dataset._client, client) self._verifyResourceProperties(dataset, RESOURCE) diff --git a/packages/google-cloud-bigquery/unit_tests/test_job.py b/packages/google-cloud-bigquery/unit_tests/test_job.py index 0edcb2abcb6a..22bdd0715315 100644 --- a/packages/google-cloud-bigquery/unit_tests/test_job.py +++ b/packages/google-cloud-bigquery/unit_tests/test_job.py @@ -23,7 +23,7 @@ class _Base(object): JOB_NAME = 'job_name' def _makeOne(self, *args, **kw): - return self._getTargetClass()(*args, **kw) + return self._get_target_class()(*args, **kw) def _setUpConstants(self): import datetime @@ -364,7 +364,7 @@ def test_from_api_repr_missing_identity(self): self._setUpConstants() client = _Client(self.PROJECT) RESOURCE = {} - klass = self._getTargetClass() + klass = self._get_target_class() with self.assertRaises(KeyError): klass.from_api_repr(RESOURCE, client=client) @@ -378,7 +378,7 @@ def test_from_api_repr_missing_config(self): 'jobId': self.JOB_NAME, } } - klass = self._getTargetClass() + klass = self._get_target_class() with self.assertRaises(KeyError): klass.from_api_repr(RESOURCE, client=client) @@ -402,7 +402,7 @@ def test_from_api_repr_bare(self): } }, } - klass = self._getTargetClass() + klass = self._get_target_class() job = klass.from_api_repr(RESOURCE, client=client) self.assertIs(job._client, client) self._verifyResourceProperties(job, RESOURCE) @@ -410,7 +410,7 @@ def test_from_api_repr_bare(self): def test_from_api_repr_w_properties(self): client = _Client(self.PROJECT) RESOURCE = self._makeResource() - klass = self._getTargetClass() + klass = self._get_target_class() dataset = klass.from_api_repr(RESOURCE, client=client) self.assertIs(dataset._client, client) self._verifyResourceProperties(dataset, RESOURCE) @@ -717,7 +717,7 @@ def test_from_api_repr_missing_identity(self): self._setUpConstants() client = _Client(self.PROJECT) RESOURCE = {} - klass = self._getTargetClass() + klass = self._get_target_class() with self.assertRaises(KeyError): klass.from_api_repr(RESOURCE, client=client) @@ -731,7 +731,7 @@ def test_from_api_repr_missing_config(self): 'jobId': self.JOB_NAME, } } - klass = self._getTargetClass() + klass = self._get_target_class() with self.assertRaises(KeyError): klass.from_api_repr(RESOURCE, client=client) @@ -759,7 +759,7 @@ def test_from_api_repr_bare(self): } }, } - klass = self._getTargetClass() + klass = self._get_target_class() job = klass.from_api_repr(RESOURCE, client=client) self.assertIs(job._client, client) self._verifyResourceProperties(job, RESOURCE) @@ -767,7 +767,7 @@ def test_from_api_repr_bare(self): def test_from_api_repr_w_properties(self): client = _Client(self.PROJECT) RESOURCE = self._makeResource() - klass = self._getTargetClass() + klass = self._get_target_class() dataset = klass.from_api_repr(RESOURCE, client=client) self.assertIs(dataset._client, client) self._verifyResourceProperties(dataset, RESOURCE) @@ -1019,7 +1019,7 @@ def test_from_api_repr_missing_identity(self): self._setUpConstants() client = _Client(self.PROJECT) RESOURCE = {} - klass = self._getTargetClass() + klass = self._get_target_class() with self.assertRaises(KeyError): klass.from_api_repr(RESOURCE, client=client) @@ -1033,7 +1033,7 @@ def test_from_api_repr_missing_config(self): 'jobId': self.JOB_NAME, } } - klass = self._getTargetClass() + klass = self._get_target_class() with self.assertRaises(KeyError): klass.from_api_repr(RESOURCE, client=client) @@ -1057,7 +1057,7 @@ def test_from_api_repr_bare(self): } }, } - klass = self._getTargetClass() + klass = self._get_target_class() job = klass.from_api_repr(RESOURCE, client=client) self.assertIs(job._client, client) self._verifyResourceProperties(job, RESOURCE) @@ -1065,7 +1065,7 @@ def test_from_api_repr_bare(self): def test_from_api_repr_w_properties(self): client = _Client(self.PROJECT) RESOURCE = self._makeResource() - klass = self._getTargetClass() + klass = self._get_target_class() dataset = klass.from_api_repr(RESOURCE, client=client) self.assertIs(dataset._client, client) self._verifyResourceProperties(dataset, RESOURCE) @@ -1360,7 +1360,7 @@ def test_from_api_repr_missing_identity(self): self._setUpConstants() client = _Client(self.PROJECT) RESOURCE = {} - klass = self._getTargetClass() + klass = self._get_target_class() with self.assertRaises(KeyError): klass.from_api_repr(RESOURCE, client=client) @@ -1374,7 +1374,7 @@ def test_from_api_repr_missing_config(self): 'jobId': self.JOB_NAME, } } - klass = self._getTargetClass() + klass = self._get_target_class() with self.assertRaises(KeyError): klass.from_api_repr(RESOURCE, client=client) @@ -1391,7 +1391,7 @@ def test_from_api_repr_bare(self): 'query': {'query': self.QUERY} }, } - klass = self._getTargetClass() + klass = self._get_target_class() job = klass.from_api_repr(RESOURCE, client=client) self.assertIs(job._client, client) self._verifyResourceProperties(job, RESOURCE) @@ -1404,7 +1404,7 @@ def test_from_api_repr_w_properties(self): 'datasetId': self.DS_NAME, 'tableId': self.DESTINATION_TABLE, } - klass = self._getTargetClass() + klass = self._get_target_class() dataset = klass.from_api_repr(RESOURCE, client=client) self.assertIs(dataset._client, client) self._verifyResourceProperties(dataset, RESOURCE) diff --git a/packages/google-cloud-bigquery/unit_tests/test_query.py b/packages/google-cloud-bigquery/unit_tests/test_query.py index d744dd922c1b..91e3b921ef22 100644 --- a/packages/google-cloud-bigquery/unit_tests/test_query.py +++ b/packages/google-cloud-bigquery/unit_tests/test_query.py @@ -29,7 +29,7 @@ def _get_target_class(): return QueryResults def _makeOne(self, *args, **kw): - return self._getTargetClass()(*args, **kw) + return self._get_target_class()(*args, **kw) def _makeResource(self, complete=False): resource = { @@ -152,7 +152,7 @@ def test_from_query_job(self): dataset = job.default_dataset = Dataset(DS_NAME, client) job.use_query_cache = True job.use_legacy_sql = True - klass = self._getTargetClass() + klass = self._get_target_class() query = klass.from_query_job(job) @@ -173,7 +173,7 @@ def test_from_query_job_wo_default_dataset(self): job = QueryJob( self.JOB_NAME, self.QUERY, client, udf_resources=[UDFResource("resourceUri", RESOURCE_URI)]) - klass = self._getTargetClass() + klass = self._get_target_class() query = klass.from_query_job(job) diff --git a/packages/google-cloud-bigquery/unit_tests/test_schema.py b/packages/google-cloud-bigquery/unit_tests/test_schema.py index f3604efcdfe4..43577a5fe8c2 100644 --- a/packages/google-cloud-bigquery/unit_tests/test_schema.py +++ b/packages/google-cloud-bigquery/unit_tests/test_schema.py @@ -23,7 +23,7 @@ def _get_target_class(): return SchemaField def _makeOne(self, *args, **kw): - return self._getTargetClass()(*args, **kw) + return self._get_target_class()(*args, **kw) def test_ctor_defaults(self): field = self._makeOne('test', 'STRING') diff --git a/packages/google-cloud-bigquery/unit_tests/test_table.py b/packages/google-cloud-bigquery/unit_tests/test_table.py index 493397697b92..3812de5624c9 100644 --- a/packages/google-cloud-bigquery/unit_tests/test_table.py +++ b/packages/google-cloud-bigquery/unit_tests/test_table.py @@ -41,7 +41,7 @@ def _get_target_class(): return Table def _makeOne(self, *args, **kw): - return self._getTargetClass()(*args, **kw) + return self._get_target_class()(*args, **kw) def _setUpConstants(self): import datetime @@ -359,7 +359,7 @@ def test_from_api_repr_missing_identity(self): client = _Client(self.PROJECT) dataset = _Dataset(client) RESOURCE = {} - klass = self._getTargetClass() + klass = self._get_target_class() with self.assertRaises(KeyError): klass.from_api_repr(RESOURCE, dataset) @@ -376,7 +376,7 @@ def test_from_api_repr_bare(self): }, 'type': 'TABLE', } - klass = self._getTargetClass() + klass = self._get_target_class() table = klass.from_api_repr(RESOURCE, dataset) self.assertEqual(table.name, self.TABLE_NAME) self.assertIs(table._dataset, dataset) @@ -386,7 +386,7 @@ def test_from_api_repr_w_properties(self): client = _Client(self.PROJECT) dataset = _Dataset(client) RESOURCE = self._makeResource() - klass = self._getTargetClass() + klass = self._get_target_class() table = klass.from_api_repr(RESOURCE, dataset) self.assertIs(table._dataset._client, client) self._verifyResourceProperties(table, RESOURCE) From c50933c686f8ad3d5202dc6776bb268c385a35aa Mon Sep 17 00:00:00 2001 From: Danny Hermes Date: Thu, 10 Nov 2016 11:05:35 -0800 Subject: [PATCH 0035/2016] Changing all instances of _makeOne to _make_one. Done via: $ git grep -l _makeOne | \ > xargs sed -i s/_makeOne/_make_one/g --- .../unit_tests/test__helpers.py | 12 +- .../unit_tests/test__http.py | 6 +- .../unit_tests/test_client.py | 32 ++--- .../unit_tests/test_dataset.py | 96 +++++++------- .../unit_tests/test_job.py | 78 +++++------ .../unit_tests/test_query.py | 28 ++-- .../unit_tests/test_schema.py | 52 ++++---- .../unit_tests/test_table.py | 122 +++++++++--------- 8 files changed, 213 insertions(+), 213 deletions(-) diff --git a/packages/google-cloud-bigquery/unit_tests/test__helpers.py b/packages/google-cloud-bigquery/unit_tests/test__helpers.py index 37f10ec7f20b..45c8bb89bc67 100644 --- a/packages/google-cloud-bigquery/unit_tests/test__helpers.py +++ b/packages/google-cloud-bigquery/unit_tests/test__helpers.py @@ -323,7 +323,7 @@ def _get_target_class(): from google.cloud.bigquery._helpers import _ConfigurationProperty return _ConfigurationProperty - def _makeOne(self, *args, **kw): + def _make_one(self, *args, **kw): return self._get_target_class()(*args, **kw) def test_it(self): @@ -332,7 +332,7 @@ class Configuration(object): _attr = None class Wrapper(object): - attr = self._makeOne('attr') + attr = self._make_one('attr') def __init__(self): self._configuration = Configuration() @@ -359,7 +359,7 @@ def _get_target_class(): from google.cloud.bigquery._helpers import _TypedProperty return _TypedProperty - def _makeOne(self, *args, **kw): + def _make_one(self, *args, **kw): return self._get_target_class()(*args, **kw) def test_it(self): @@ -368,7 +368,7 @@ class Configuration(object): _attr = None class Wrapper(object): - attr = self._makeOne('attr', int) + attr = self._make_one('attr', int) def __init__(self): self._configuration = Configuration() @@ -427,11 +427,11 @@ def _get_target_class(): from google.cloud.bigquery._helpers import UDFResourcesProperty return UDFResourcesProperty - def _makeOne(self, *args, **kw): + def _make_one(self, *args, **kw): return self._get_target_class()(*args, **kw) def _descriptor_and_klass(self): - descriptor = self._makeOne() + descriptor = self._make_one() class _Test(object): _udf_resources = () diff --git a/packages/google-cloud-bigquery/unit_tests/test__http.py b/packages/google-cloud-bigquery/unit_tests/test__http.py index fe3ccb1491c6..0592b98178cd 100644 --- a/packages/google-cloud-bigquery/unit_tests/test__http.py +++ b/packages/google-cloud-bigquery/unit_tests/test__http.py @@ -22,11 +22,11 @@ def _get_target_class(): from google.cloud.bigquery._http import Connection return Connection - def _makeOne(self, *args, **kw): + def _make_one(self, *args, **kw): return self._get_target_class()(*args, **kw) def test_build_api_url_no_extra_query_params(self): - conn = self._makeOne() + conn = self._make_one() URI = '/'.join([ conn.API_BASE_URL, 'bigquery', @@ -38,7 +38,7 @@ def test_build_api_url_no_extra_query_params(self): def test_build_api_url_w_extra_query_params(self): from six.moves.urllib.parse import parse_qsl from six.moves.urllib.parse import urlsplit - conn = self._makeOne() + conn = self._make_one() uri = conn.build_api_url('/foo', {'bar': 'baz'}) scheme, netloc, path, qs, _ = urlsplit(uri) self.assertEqual('%s://%s' % (scheme, netloc), conn.API_BASE_URL) diff --git a/packages/google-cloud-bigquery/unit_tests/test_client.py b/packages/google-cloud-bigquery/unit_tests/test_client.py index cd726f53fc71..9034658692d9 100644 --- a/packages/google-cloud-bigquery/unit_tests/test_client.py +++ b/packages/google-cloud-bigquery/unit_tests/test_client.py @@ -22,7 +22,7 @@ def _get_target_class(): from google.cloud.bigquery.client import Client return Client - def _makeOne(self, *args, **kw): + def _make_one(self, *args, **kw): return self._get_target_class()(*args, **kw) def test_ctor(self): @@ -30,7 +30,7 @@ def test_ctor(self): PROJECT = 'PROJECT' creds = _Credentials() http = object() - client = self._makeOne(project=PROJECT, credentials=creds, http=http) + client = self._make_one(project=PROJECT, credentials=creds, http=http) self.assertIsInstance(client.connection, Connection) self.assertIs(client.connection.credentials, creds) self.assertIs(client.connection.http, http) @@ -58,7 +58,7 @@ def test_list_projects_defaults(self): ] } creds = _Credentials() - client = self._makeOne(PROJECT_1, creds) + client = self._make_one(PROJECT_1, creds) conn = client.connection = _Connection(DATA) iterator = client.list_projects() @@ -87,7 +87,7 @@ def test_list_projects_explicit_response_missing_projects_key(self): TOKEN = 'TOKEN' DATA = {} creds = _Credentials() - client = self._makeOne(PROJECT, creds) + client = self._make_one(PROJECT, creds) conn = client.connection = _Connection(DATA) iterator = client.list_projects(max_results=3, page_token=TOKEN) @@ -129,7 +129,7 @@ def test_list_datasets_defaults(self): ] } creds = _Credentials() - client = self._makeOne(PROJECT, creds) + client = self._make_one(PROJECT, creds) conn = client.connection = _Connection(DATA) iterator = client.list_datasets() @@ -157,7 +157,7 @@ def test_list_datasets_explicit_response_missing_datasets_key(self): TOKEN = 'TOKEN' DATA = {} creds = _Credentials() - client = self._makeOne(PROJECT, creds) + client = self._make_one(PROJECT, creds) conn = client.connection = _Connection(DATA) iterator = client.list_datasets( @@ -182,7 +182,7 @@ def test_dataset(self): DATASET = 'dataset_name' creds = _Credentials() http = object() - client = self._makeOne(project=PROJECT, credentials=creds, http=http) + client = self._make_one(project=PROJECT, credentials=creds, http=http) dataset = client.dataset(DATASET) self.assertIsInstance(dataset, Dataset) self.assertEqual(dataset.name, DATASET) @@ -191,7 +191,7 @@ def test_dataset(self): def test_job_from_resource_unknown_type(self): PROJECT = 'PROJECT' creds = _Credentials() - client = self._makeOne(PROJECT, creds) + client = self._make_one(PROJECT, creds) with self.assertRaises(ValueError): client.job_from_resource({'configuration': {'nonesuch': {}}}) @@ -305,7 +305,7 @@ def test_list_jobs_defaults(self): ] } creds = _Credentials() - client = self._makeOne(PROJECT, creds) + client = self._make_one(PROJECT, creds) conn = client.connection = _Connection(DATA) iterator = client.list_jobs() @@ -361,7 +361,7 @@ def test_list_jobs_load_job_wo_sourceUris(self): ] } creds = _Credentials() - client = self._makeOne(PROJECT, creds) + client = self._make_one(PROJECT, creds) conn = client.connection = _Connection(DATA) iterator = client.list_jobs() @@ -389,7 +389,7 @@ def test_list_jobs_explicit_missing(self): DATA = {} TOKEN = 'TOKEN' creds = _Credentials() - client = self._makeOne(PROJECT, creds) + client = self._make_one(PROJECT, creds) conn = client.connection = _Connection(DATA) iterator = client.list_jobs(max_results=1000, page_token=TOKEN, @@ -421,7 +421,7 @@ def test_load_table_from_storage(self): SOURCE_URI = 'http://example.com/source.csv' creds = _Credentials() http = object() - client = self._makeOne(project=PROJECT, credentials=creds, http=http) + client = self._make_one(project=PROJECT, credentials=creds, http=http) dataset = client.dataset(DATASET) destination = dataset.table(DESTINATION) job = client.load_table_from_storage(JOB, destination, SOURCE_URI) @@ -440,7 +440,7 @@ def test_copy_table(self): DESTINATION = 'destination_table' creds = _Credentials() http = object() - client = self._makeOne(project=PROJECT, credentials=creds, http=http) + client = self._make_one(project=PROJECT, credentials=creds, http=http) dataset = client.dataset(DATASET) source = dataset.table(SOURCE) destination = dataset.table(DESTINATION) @@ -460,7 +460,7 @@ def test_extract_table_to_storage(self): DESTINATION = 'gs://bucket_name/object_name' creds = _Credentials() http = object() - client = self._makeOne(project=PROJECT, credentials=creds, http=http) + client = self._make_one(project=PROJECT, credentials=creds, http=http) dataset = client.dataset(DATASET) source = dataset.table(SOURCE) job = client.extract_table_to_storage(JOB, source, DESTINATION) @@ -477,7 +477,7 @@ def test_run_async_query(self): QUERY = 'select count(*) from persons' creds = _Credentials() http = object() - client = self._makeOne(project=PROJECT, credentials=creds, http=http) + client = self._make_one(project=PROJECT, credentials=creds, http=http) job = client.run_async_query(JOB, QUERY) self.assertIsInstance(job, QueryJob) self.assertIs(job._client, client) @@ -490,7 +490,7 @@ def test_run_sync_query(self): QUERY = 'select count(*) from persons' creds = _Credentials() http = object() - client = self._makeOne(project=PROJECT, credentials=creds, http=http) + client = self._make_one(project=PROJECT, credentials=creds, http=http) job = client.run_sync_query(QUERY) self.assertIsInstance(job, QueryResults) self.assertIs(job._client, client) diff --git a/packages/google-cloud-bigquery/unit_tests/test_dataset.py b/packages/google-cloud-bigquery/unit_tests/test_dataset.py index 08d698b031b5..1493d266d44f 100644 --- a/packages/google-cloud-bigquery/unit_tests/test_dataset.py +++ b/packages/google-cloud-bigquery/unit_tests/test_dataset.py @@ -22,30 +22,30 @@ def _get_target_class(): from google.cloud.bigquery.dataset import AccessGrant return AccessGrant - def _makeOne(self, *args, **kw): + def _make_one(self, *args, **kw): return self._get_target_class()(*args, **kw) def test_ctor_defaults(self): - grant = self._makeOne('OWNER', 'userByEmail', 'phred@example.com') + grant = self._make_one('OWNER', 'userByEmail', 'phred@example.com') self.assertEqual(grant.role, 'OWNER') self.assertEqual(grant.entity_type, 'userByEmail') self.assertEqual(grant.entity_id, 'phred@example.com') def test_ctor_bad_entity_type(self): with self.assertRaises(ValueError): - self._makeOne(None, 'unknown', None) + self._make_one(None, 'unknown', None) def test_ctor_view_with_role(self): role = 'READER' entity_type = 'view' with self.assertRaises(ValueError): - self._makeOne(role, entity_type, None) + self._make_one(role, entity_type, None) def test_ctor_view_success(self): role = None entity_type = 'view' entity_id = object() - grant = self._makeOne(role, entity_type, entity_id) + grant = self._make_one(role, entity_type, entity_id) self.assertEqual(grant.role, role) self.assertEqual(grant.entity_type, entity_type) self.assertEqual(grant.entity_id, entity_id) @@ -54,26 +54,26 @@ def test_ctor_nonview_without_role(self): role = None entity_type = 'userByEmail' with self.assertRaises(ValueError): - self._makeOne(role, entity_type, None) + self._make_one(role, entity_type, None) def test___eq___role_mismatch(self): - grant = self._makeOne('OWNER', 'userByEmail', 'phred@example.com') - other = self._makeOne('WRITER', 'userByEmail', 'phred@example.com') + grant = self._make_one('OWNER', 'userByEmail', 'phred@example.com') + other = self._make_one('WRITER', 'userByEmail', 'phred@example.com') self.assertNotEqual(grant, other) def test___eq___entity_type_mismatch(self): - grant = self._makeOne('OWNER', 'userByEmail', 'phred@example.com') - other = self._makeOne('OWNER', 'groupByEmail', 'phred@example.com') + grant = self._make_one('OWNER', 'userByEmail', 'phred@example.com') + other = self._make_one('OWNER', 'groupByEmail', 'phred@example.com') self.assertNotEqual(grant, other) def test___eq___entity_id_mismatch(self): - grant = self._makeOne('OWNER', 'userByEmail', 'phred@example.com') - other = self._makeOne('OWNER', 'userByEmail', 'bharney@example.com') + grant = self._make_one('OWNER', 'userByEmail', 'phred@example.com') + other = self._make_one('OWNER', 'userByEmail', 'bharney@example.com') self.assertNotEqual(grant, other) def test___eq___hit(self): - grant = self._makeOne('OWNER', 'userByEmail', 'phred@example.com') - other = self._makeOne('OWNER', 'userByEmail', 'phred@example.com') + grant = self._make_one('OWNER', 'userByEmail', 'phred@example.com') + other = self._make_one('OWNER', 'userByEmail', 'phred@example.com') self.assertEqual(grant, other) @@ -86,7 +86,7 @@ def _get_target_class(): from google.cloud.bigquery.dataset import Dataset return Dataset - def _makeOne(self, *args, **kw): + def _make_one(self, *args, **kw): return self._get_target_class()(*args, **kw) def _setUpConstants(self): @@ -176,7 +176,7 @@ def _verifyResourceProperties(self, dataset, resource): def test_ctor(self): client = _Client(self.PROJECT) - dataset = self._makeOne(self.DS_NAME, client) + dataset = self._make_one(self.DS_NAME, client) self.assertEqual(dataset.name, self.DS_NAME) self.assertIs(dataset._client, client) self.assertEqual(dataset.project, client.project) @@ -198,14 +198,14 @@ def test_ctor(self): def test_access_roles_setter_non_list(self): client = _Client(self.PROJECT) - dataset = self._makeOne(self.DS_NAME, client) + dataset = self._make_one(self.DS_NAME, client) with self.assertRaises(TypeError): dataset.access_grants = object() def test_access_roles_setter_invalid_field(self): from google.cloud.bigquery.dataset import AccessGrant client = _Client(self.PROJECT) - dataset = self._makeOne(self.DS_NAME, client) + dataset = self._make_one(self.DS_NAME, client) phred = AccessGrant('OWNER', 'userByEmail', 'phred@example.com') with self.assertRaises(ValueError): dataset.access_grants = [phred, object()] @@ -213,7 +213,7 @@ def test_access_roles_setter_invalid_field(self): def test_access_roles_setter(self): from google.cloud.bigquery.dataset import AccessGrant client = _Client(self.PROJECT) - dataset = self._makeOne(self.DS_NAME, client) + dataset = self._make_one(self.DS_NAME, client) phred = AccessGrant('OWNER', 'userByEmail', 'phred@example.com') bharney = AccessGrant('OWNER', 'userByEmail', 'bharney@example.com') dataset.access_grants = [phred, bharney] @@ -221,49 +221,49 @@ def test_access_roles_setter(self): def test_default_table_expiration_ms_setter_bad_value(self): client = _Client(self.PROJECT) - dataset = self._makeOne(self.DS_NAME, client) + dataset = self._make_one(self.DS_NAME, client) with self.assertRaises(ValueError): dataset.default_table_expiration_ms = 'bogus' def test_default_table_expiration_ms_setter(self): client = _Client(self.PROJECT) - dataset = self._makeOne(self.DS_NAME, client) + dataset = self._make_one(self.DS_NAME, client) dataset.default_table_expiration_ms = 12345 self.assertEqual(dataset.default_table_expiration_ms, 12345) def test_description_setter_bad_value(self): client = _Client(self.PROJECT) - dataset = self._makeOne(self.DS_NAME, client) + dataset = self._make_one(self.DS_NAME, client) with self.assertRaises(ValueError): dataset.description = 12345 def test_description_setter(self): client = _Client(self.PROJECT) - dataset = self._makeOne(self.DS_NAME, client) + dataset = self._make_one(self.DS_NAME, client) dataset.description = 'DESCRIPTION' self.assertEqual(dataset.description, 'DESCRIPTION') def test_friendly_name_setter_bad_value(self): client = _Client(self.PROJECT) - dataset = self._makeOne(self.DS_NAME, client) + dataset = self._make_one(self.DS_NAME, client) with self.assertRaises(ValueError): dataset.friendly_name = 12345 def test_friendly_name_setter(self): client = _Client(self.PROJECT) - dataset = self._makeOne(self.DS_NAME, client) + dataset = self._make_one(self.DS_NAME, client) dataset.friendly_name = 'FRIENDLY' self.assertEqual(dataset.friendly_name, 'FRIENDLY') def test_location_setter_bad_value(self): client = _Client(self.PROJECT) - dataset = self._makeOne(self.DS_NAME, client) + dataset = self._make_one(self.DS_NAME, client) with self.assertRaises(ValueError): dataset.location = 12345 def test_location_setter(self): client = _Client(self.PROJECT) - dataset = self._makeOne(self.DS_NAME, client) + dataset = self._make_one(self.DS_NAME, client) dataset.location = 'LOCATION' self.assertEqual(dataset.location, 'LOCATION') @@ -303,7 +303,7 @@ def test__parse_access_grants_w_unknown_entity_type(self): {'role': 'READER', 'unknown': 'UNKNOWN'}, ] client = _Client(self.PROJECT) - dataset = self._makeOne(self.DS_NAME, client=client) + dataset = self._make_one(self.DS_NAME, client=client) with self.assertRaises(ValueError): dataset._parse_access_grants(ACCESS) @@ -317,7 +317,7 @@ def test__parse_access_grants_w_extra_keys(self): }, ] client = _Client(self.PROJECT) - dataset = self._makeOne(self.DS_NAME, client=client) + dataset = self._make_one(self.DS_NAME, client=client) with self.assertRaises(ValueError): dataset._parse_access_grants(ACCESS) @@ -326,7 +326,7 @@ def test_create_w_bound_client(self): RESOURCE = self._makeResource() conn = _Connection(RESOURCE) client = _Client(project=self.PROJECT, connection=conn) - dataset = self._makeOne(self.DS_NAME, client=client) + dataset = self._make_one(self.DS_NAME, client=client) dataset.create() @@ -355,7 +355,7 @@ def test_create_w_alternate_client(self): CLIENT1 = _Client(project=self.PROJECT, connection=conn1) conn2 = _Connection(RESOURCE) CLIENT2 = _Client(project=self.PROJECT, connection=conn2) - dataset = self._makeOne(self.DS_NAME, client=CLIENT1) + dataset = self._make_one(self.DS_NAME, client=CLIENT1) dataset.friendly_name = TITLE dataset.description = DESCRIPTION VIEW = { @@ -408,7 +408,7 @@ def test_create_w_missing_output_properties(self): self.WHEN = None conn = _Connection(RESOURCE) client = _Client(project=self.PROJECT, connection=conn) - dataset = self._makeOne(self.DS_NAME, client=client) + dataset = self._make_one(self.DS_NAME, client=client) dataset.create() @@ -427,7 +427,7 @@ def test_exists_miss_w_bound_client(self): PATH = 'projects/%s/datasets/%s' % (self.PROJECT, self.DS_NAME) conn = _Connection() client = _Client(project=self.PROJECT, connection=conn) - dataset = self._makeOne(self.DS_NAME, client=client) + dataset = self._make_one(self.DS_NAME, client=client) self.assertFalse(dataset.exists()) @@ -443,7 +443,7 @@ def test_exists_hit_w_alternate_client(self): CLIENT1 = _Client(project=self.PROJECT, connection=conn1) conn2 = _Connection({}) CLIENT2 = _Client(project=self.PROJECT, connection=conn2) - dataset = self._makeOne(self.DS_NAME, client=CLIENT1) + dataset = self._make_one(self.DS_NAME, client=CLIENT1) self.assertTrue(dataset.exists(client=CLIENT2)) @@ -459,7 +459,7 @@ def test_reload_w_bound_client(self): RESOURCE = self._makeResource() conn = _Connection(RESOURCE) client = _Client(project=self.PROJECT, connection=conn) - dataset = self._makeOne(self.DS_NAME, client=client) + dataset = self._make_one(self.DS_NAME, client=client) dataset.reload() @@ -476,7 +476,7 @@ def test_reload_w_alternate_client(self): CLIENT1 = _Client(project=self.PROJECT, connection=conn1) conn2 = _Connection(RESOURCE) CLIENT2 = _Client(project=self.PROJECT, connection=conn2) - dataset = self._makeOne(self.DS_NAME, client=CLIENT1) + dataset = self._make_one(self.DS_NAME, client=CLIENT1) dataset.reload(client=CLIENT2) @@ -491,7 +491,7 @@ def test_patch_w_invalid_expiration(self): RESOURCE = self._makeResource() conn = _Connection(RESOURCE) client = _Client(project=self.PROJECT, connection=conn) - dataset = self._makeOne(self.DS_NAME, client=client) + dataset = self._make_one(self.DS_NAME, client=client) with self.assertRaises(ValueError): dataset.patch(default_table_expiration_ms='BOGUS') @@ -505,7 +505,7 @@ def test_patch_w_bound_client(self): RESOURCE['friendlyName'] = TITLE conn = _Connection(RESOURCE) client = _Client(project=self.PROJECT, connection=conn) - dataset = self._makeOne(self.DS_NAME, client=client) + dataset = self._make_one(self.DS_NAME, client=client) dataset.patch(description=DESCRIPTION, friendly_name=TITLE) @@ -531,7 +531,7 @@ def test_patch_w_alternate_client(self): CLIENT1 = _Client(project=self.PROJECT, connection=conn1) conn2 = _Connection(RESOURCE) CLIENT2 = _Client(project=self.PROJECT, connection=conn2) - dataset = self._makeOne(self.DS_NAME, client=CLIENT1) + dataset = self._make_one(self.DS_NAME, client=CLIENT1) dataset.patch(client=CLIENT2, default_table_expiration_ms=DEF_TABLE_EXP, @@ -558,7 +558,7 @@ def test_update_w_bound_client(self): RESOURCE['friendlyName'] = TITLE conn = _Connection(RESOURCE) client = _Client(project=self.PROJECT, connection=conn) - dataset = self._makeOne(self.DS_NAME, client=client) + dataset = self._make_one(self.DS_NAME, client=client) dataset.description = DESCRIPTION dataset.friendly_name = TITLE @@ -588,7 +588,7 @@ def test_update_w_alternate_client(self): CLIENT1 = _Client(project=self.PROJECT, connection=conn1) conn2 = _Connection(RESOURCE) CLIENT2 = _Client(project=self.PROJECT, connection=conn2) - dataset = self._makeOne(self.DS_NAME, client=CLIENT1) + dataset = self._make_one(self.DS_NAME, client=CLIENT1) dataset.default_table_expiration_ms = DEF_TABLE_EXP dataset.location = LOCATION @@ -612,7 +612,7 @@ def test_delete_w_bound_client(self): PATH = 'projects/%s/datasets/%s' % (self.PROJECT, self.DS_NAME) conn = _Connection({}) client = _Client(project=self.PROJECT, connection=conn) - dataset = self._makeOne(self.DS_NAME, client=client) + dataset = self._make_one(self.DS_NAME, client=client) dataset.delete() @@ -627,7 +627,7 @@ def test_delete_w_alternate_client(self): CLIENT1 = _Client(project=self.PROJECT, connection=conn1) conn2 = _Connection({}) CLIENT2 = _Client(project=self.PROJECT, connection=conn2) - dataset = self._makeOne(self.DS_NAME, client=CLIENT1) + dataset = self._make_one(self.DS_NAME, client=CLIENT1) dataset.delete(client=CLIENT2) @@ -642,7 +642,7 @@ def test_list_tables_empty(self): conn = _Connection({}) client = _Client(project=self.PROJECT, connection=conn) - dataset = self._makeOne(self.DS_NAME, client=client) + dataset = self._make_one(self.DS_NAME, client=client) iterator = dataset.list_tables() self.assertIs(iterator.dataset, dataset) @@ -686,7 +686,7 @@ def test_list_tables_defaults(self): conn = _Connection(DATA) client = _Client(project=self.PROJECT, connection=conn) - dataset = self._makeOne(self.DS_NAME, client=client) + dataset = self._make_one(self.DS_NAME, client=client) iterator = dataset.list_tables() self.assertIs(iterator.dataset, dataset) @@ -733,7 +733,7 @@ def test_list_tables_explicit(self): conn = _Connection(DATA) client = _Client(project=self.PROJECT, connection=conn) - dataset = self._makeOne(self.DS_NAME, client=client) + dataset = self._make_one(self.DS_NAME, client=client) iterator = dataset.list_tables(max_results=3, page_token=TOKEN) self.assertIs(iterator.dataset, dataset) @@ -759,7 +759,7 @@ def test_table_wo_schema(self): from google.cloud.bigquery.table import Table conn = _Connection({}) client = _Client(project=self.PROJECT, connection=conn) - dataset = self._makeOne(self.DS_NAME, client=client) + dataset = self._make_one(self.DS_NAME, client=client) table = dataset.table('table_name') self.assertIsInstance(table, Table) self.assertEqual(table.name, 'table_name') @@ -771,7 +771,7 @@ def test_table_w_schema(self): from google.cloud.bigquery.table import Table conn = _Connection({}) client = _Client(project=self.PROJECT, connection=conn) - dataset = self._makeOne(self.DS_NAME, client=client) + dataset = self._make_one(self.DS_NAME, client=client) full_name = SchemaField('full_name', 'STRING', mode='REQUIRED') age = SchemaField('age', 'INTEGER', mode='REQUIRED') table = dataset.table('table_name', schema=[full_name, age]) diff --git a/packages/google-cloud-bigquery/unit_tests/test_job.py b/packages/google-cloud-bigquery/unit_tests/test_job.py index 22bdd0715315..14f5a6ed20a0 100644 --- a/packages/google-cloud-bigquery/unit_tests/test_job.py +++ b/packages/google-cloud-bigquery/unit_tests/test_job.py @@ -22,7 +22,7 @@ class _Base(object): TABLE_NAME = 'table_name' JOB_NAME = 'job_name' - def _makeOne(self, *args, **kw): + def _make_one(self, *args, **kw): return self._get_target_class()(*args, **kw) def _setUpConstants(self): @@ -231,7 +231,7 @@ def _verifyResourceProperties(self, job, resource): def test_ctor(self): client = _Client(self.PROJECT) table = _Table() - job = self._makeOne(self.JOB_NAME, table, [self.SOURCE1], client) + job = self._make_one(self.JOB_NAME, table, [self.SOURCE1], client) self.assertIs(job.destination, table) self.assertEqual(list(job.source_uris), [self.SOURCE1]) self.assertIs(job._client, client) @@ -268,14 +268,14 @@ def test_ctor_w_schema(self): table = _Table() full_name = SchemaField('full_name', 'STRING', mode='REQUIRED') age = SchemaField('age', 'INTEGER', mode='REQUIRED') - job = self._makeOne(self.JOB_NAME, table, [self.SOURCE1], client, + job = self._make_one(self.JOB_NAME, table, [self.SOURCE1], client, schema=[full_name, age]) self.assertEqual(job.schema, [full_name, age]) def test_schema_setter_non_list(self): client = _Client(self.PROJECT) table = _Table() - job = self._makeOne(self.JOB_NAME, table, [self.SOURCE1], client) + job = self._make_one(self.JOB_NAME, table, [self.SOURCE1], client) with self.assertRaises(TypeError): job.schema = object() @@ -283,7 +283,7 @@ def test_schema_setter_invalid_field(self): from google.cloud.bigquery.schema import SchemaField client = _Client(self.PROJECT) table = _Table() - job = self._makeOne(self.JOB_NAME, table, [self.SOURCE1], client) + job = self._make_one(self.JOB_NAME, table, [self.SOURCE1], client) full_name = SchemaField('full_name', 'STRING', mode='REQUIRED') with self.assertRaises(ValueError): job.schema = [full_name, object()] @@ -292,7 +292,7 @@ def test_schema_setter(self): from google.cloud.bigquery.schema import SchemaField client = _Client(self.PROJECT) table = _Table() - job = self._makeOne(self.JOB_NAME, table, [self.SOURCE1], client) + job = self._make_one(self.JOB_NAME, table, [self.SOURCE1], client) full_name = SchemaField('full_name', 'STRING', mode='REQUIRED') age = SchemaField('age', 'INTEGER', mode='REQUIRED') job.schema = [full_name, age] @@ -317,7 +317,7 @@ def test_props_set_by_server(self): client = _Client(self.PROJECT) table = _Table() - job = self._makeOne(self.JOB_NAME, table, [self.SOURCE1], client) + job = self._make_one(self.JOB_NAME, table, [self.SOURCE1], client) job._properties['etag'] = 'ETAG' job._properties['id'] = JOB_ID job._properties['selfLink'] = URL @@ -419,7 +419,7 @@ def test_begin_w_already_running(self): conn = _Connection() client = _Client(project=self.PROJECT, connection=conn) table = _Table() - job = self._makeOne(self.JOB_NAME, table, [self.SOURCE1], client) + job = self._make_one(self.JOB_NAME, table, [self.SOURCE1], client) job._properties['status'] = {'state': 'RUNNING'} with self.assertRaises(ValueError): @@ -436,7 +436,7 @@ def test_begin_w_bound_client(self): conn = _Connection(RESOURCE) client = _Client(project=self.PROJECT, connection=conn) table = _Table() - job = self._makeOne(self.JOB_NAME, table, [self.SOURCE1], client) + job = self._make_one(self.JOB_NAME, table, [self.SOURCE1], client) job.begin() @@ -498,7 +498,7 @@ def test_begin_w_alternate_client(self): table = _Table() full_name = SchemaField('full_name', 'STRING', mode='REQUIRED') age = SchemaField('age', 'INTEGER', mode='REQUIRED') - job = self._makeOne(self.JOB_NAME, table, [self.SOURCE1], client1, + job = self._make_one(self.JOB_NAME, table, [self.SOURCE1], client1, schema=[full_name, age]) job.allow_jagged_rows = True @@ -537,7 +537,7 @@ def test_exists_miss_w_bound_client(self): conn = _Connection() client = _Client(project=self.PROJECT, connection=conn) table = _Table() - job = self._makeOne(self.JOB_NAME, table, [self.SOURCE1], client) + job = self._make_one(self.JOB_NAME, table, [self.SOURCE1], client) self.assertFalse(job.exists()) @@ -554,7 +554,7 @@ def test_exists_hit_w_alternate_client(self): conn2 = _Connection({}) client2 = _Client(project=self.PROJECT, connection=conn2) table = _Table() - job = self._makeOne(self.JOB_NAME, table, [self.SOURCE1], client1) + job = self._make_one(self.JOB_NAME, table, [self.SOURCE1], client1) self.assertTrue(job.exists(client=client2)) @@ -571,7 +571,7 @@ def test_reload_w_bound_client(self): conn = _Connection(RESOURCE) client = _Client(project=self.PROJECT, connection=conn) table = _Table() - job = self._makeOne(self.JOB_NAME, table, [self.SOURCE1], client) + job = self._make_one(self.JOB_NAME, table, [self.SOURCE1], client) job.reload() @@ -589,7 +589,7 @@ def test_reload_w_alternate_client(self): conn2 = _Connection(RESOURCE) client2 = _Client(project=self.PROJECT, connection=conn2) table = _Table() - job = self._makeOne(self.JOB_NAME, table, [self.SOURCE1], client1) + job = self._make_one(self.JOB_NAME, table, [self.SOURCE1], client1) job.reload(client=client2) @@ -607,7 +607,7 @@ def test_cancel_w_bound_client(self): conn = _Connection(RESPONSE) client = _Client(project=self.PROJECT, connection=conn) table = _Table() - job = self._makeOne(self.JOB_NAME, table, [self.SOURCE1], client) + job = self._make_one(self.JOB_NAME, table, [self.SOURCE1], client) job.cancel() @@ -626,7 +626,7 @@ def test_cancel_w_alternate_client(self): conn2 = _Connection(RESPONSE) client2 = _Client(project=self.PROJECT, connection=conn2) table = _Table() - job = self._makeOne(self.JOB_NAME, table, [self.SOURCE1], client1) + job = self._make_one(self.JOB_NAME, table, [self.SOURCE1], client1) job.cancel(client=client2) @@ -698,7 +698,7 @@ def test_ctor(self): client = _Client(self.PROJECT) source = _Table(self.SOURCE_TABLE) destination = _Table(self.DESTINATION_TABLE) - job = self._makeOne(self.JOB_NAME, destination, [source], client) + job = self._make_one(self.JOB_NAME, destination, [source], client) self.assertIs(job.destination, destination) self.assertEqual(job.sources, [source]) self.assertIs(job._client, client) @@ -784,7 +784,7 @@ def test_begin_w_bound_client(self): client = _Client(project=self.PROJECT, connection=conn) source = _Table(self.SOURCE_TABLE) destination = _Table(self.DESTINATION_TABLE) - job = self._makeOne(self.JOB_NAME, destination, [source], client) + job = self._make_one(self.JOB_NAME, destination, [source], client) job.begin() @@ -839,7 +839,7 @@ def test_begin_w_alternate_client(self): client2 = _Client(project=self.PROJECT, connection=conn2) source = _Table(self.SOURCE_TABLE) destination = _Table(self.DESTINATION_TABLE) - job = self._makeOne(self.JOB_NAME, destination, [source], client1) + job = self._make_one(self.JOB_NAME, destination, [source], client1) job.create_disposition = 'CREATE_NEVER' job.write_disposition = 'WRITE_TRUNCATE' @@ -869,7 +869,7 @@ def test_exists_miss_w_bound_client(self): client = _Client(project=self.PROJECT, connection=conn) source = _Table(self.SOURCE_TABLE) destination = _Table(self.DESTINATION_TABLE) - job = self._makeOne(self.JOB_NAME, destination, [source], client) + job = self._make_one(self.JOB_NAME, destination, [source], client) self.assertFalse(job.exists()) @@ -887,7 +887,7 @@ def test_exists_hit_w_alternate_client(self): client2 = _Client(project=self.PROJECT, connection=conn2) source = _Table(self.SOURCE_TABLE) destination = _Table(self.DESTINATION_TABLE) - job = self._makeOne(self.JOB_NAME, destination, [source], client1) + job = self._make_one(self.JOB_NAME, destination, [source], client1) self.assertTrue(job.exists(client=client2)) @@ -905,7 +905,7 @@ def test_reload_w_bound_client(self): client = _Client(project=self.PROJECT, connection=conn) source = _Table(self.SOURCE_TABLE) destination = _Table(self.DESTINATION_TABLE) - job = self._makeOne(self.JOB_NAME, destination, [source], client) + job = self._make_one(self.JOB_NAME, destination, [source], client) job.reload() @@ -924,7 +924,7 @@ def test_reload_w_alternate_client(self): client2 = _Client(project=self.PROJECT, connection=conn2) source = _Table(self.SOURCE_TABLE) destination = _Table(self.DESTINATION_TABLE) - job = self._makeOne(self.JOB_NAME, destination, [source], client1) + job = self._make_one(self.JOB_NAME, destination, [source], client1) job.reload(client=client2) @@ -997,7 +997,7 @@ def _verifyResourceProperties(self, job, resource): def test_ctor(self): client = _Client(self.PROJECT) source = _Table(self.SOURCE_TABLE) - job = self._makeOne(self.JOB_NAME, source, [self.DESTINATION_URI], + job = self._make_one(self.JOB_NAME, source, [self.DESTINATION_URI], client) self.assertEqual(job.source, source) self.assertEqual(job.destination_uris, [self.DESTINATION_URI]) @@ -1081,7 +1081,7 @@ def test_begin_w_bound_client(self): conn = _Connection(RESOURCE) client = _Client(project=self.PROJECT, connection=conn) source = _Table(self.SOURCE_TABLE) - job = self._makeOne(self.JOB_NAME, source, [self.DESTINATION_URI], + job = self._make_one(self.JOB_NAME, source, [self.DESTINATION_URI], client) job.begin() @@ -1130,7 +1130,7 @@ def test_begin_w_alternate_client(self): conn2 = _Connection(RESOURCE) client2 = _Client(project=self.PROJECT, connection=conn2) source = _Table(self.SOURCE_TABLE) - job = self._makeOne(self.JOB_NAME, source, [self.DESTINATION_URI], + job = self._make_one(self.JOB_NAME, source, [self.DESTINATION_URI], client1) job.compression = 'GZIP' @@ -1162,7 +1162,7 @@ def test_exists_miss_w_bound_client(self): conn = _Connection() client = _Client(project=self.PROJECT, connection=conn) source = _Table(self.SOURCE_TABLE) - job = self._makeOne(self.JOB_NAME, source, [self.DESTINATION_URI], + job = self._make_one(self.JOB_NAME, source, [self.DESTINATION_URI], client) self.assertFalse(job.exists()) @@ -1180,7 +1180,7 @@ def test_exists_hit_w_alternate_client(self): conn2 = _Connection({}) client2 = _Client(project=self.PROJECT, connection=conn2) source = _Table(self.SOURCE_TABLE) - job = self._makeOne(self.JOB_NAME, source, [self.DESTINATION_URI], + job = self._make_one(self.JOB_NAME, source, [self.DESTINATION_URI], client1) self.assertTrue(job.exists(client=client2)) @@ -1198,7 +1198,7 @@ def test_reload_w_bound_client(self): conn = _Connection(RESOURCE) client = _Client(project=self.PROJECT, connection=conn) source = _Table(self.SOURCE_TABLE) - job = self._makeOne(self.JOB_NAME, source, [self.DESTINATION_URI], + job = self._make_one(self.JOB_NAME, source, [self.DESTINATION_URI], client) job.reload() @@ -1217,7 +1217,7 @@ def test_reload_w_alternate_client(self): conn2 = _Connection(RESOURCE) client2 = _Client(project=self.PROJECT, connection=conn2) source = _Table(self.SOURCE_TABLE) - job = self._makeOne(self.JOB_NAME, source, [self.DESTINATION_URI], + job = self._make_one(self.JOB_NAME, source, [self.DESTINATION_URI], client1) job.reload(client=client2) @@ -1332,7 +1332,7 @@ def _verifyResourceProperties(self, job, resource): def test_ctor(self): client = _Client(self.PROJECT) - job = self._makeOne(self.JOB_NAME, self.QUERY, client) + job = self._make_one(self.JOB_NAME, self.QUERY, client) self.assertEqual(job.query, self.QUERY) self.assertIs(job._client, client) self.assertEqual(job.job_type, self.JOB_TYPE) @@ -1412,7 +1412,7 @@ def test_from_api_repr_w_properties(self): def test_results(self): from google.cloud.bigquery.query import QueryResults client = _Client(self.PROJECT) - job = self._makeOne(self.JOB_NAME, self.QUERY, client) + job = self._make_one(self.JOB_NAME, self.QUERY, client) results = job.results() self.assertIsInstance(results, QueryResults) self.assertIs(results._job, job) @@ -1427,7 +1427,7 @@ def test_begin_w_bound_client(self): del RESOURCE['user_email'] conn = _Connection(RESOURCE) client = _Client(project=self.PROJECT, connection=conn) - job = self._makeOne(self.JOB_NAME, self.QUERY, client) + job = self._make_one(self.JOB_NAME, self.QUERY, client) job.begin() self.assertEqual(job.udf_resources, []) @@ -1483,7 +1483,7 @@ def test_begin_w_alternate_client(self): client1 = _Client(project=self.PROJECT, connection=conn1) conn2 = _Connection(RESOURCE) client2 = _Client(project=self.PROJECT, connection=conn2) - job = self._makeOne(self.JOB_NAME, self.QUERY, client1) + job = self._make_one(self.JOB_NAME, self.QUERY, client1) dataset = Dataset(DS_NAME, client1) table = Table(TABLE, dataset) @@ -1532,7 +1532,7 @@ def test_begin_w_bound_client_and_udf(self): del RESOURCE['user_email'] conn = _Connection(RESOURCE) client = _Client(project=self.PROJECT, connection=conn) - job = self._makeOne(self.JOB_NAME, self.QUERY, client, + job = self._make_one(self.JOB_NAME, self.QUERY, client, udf_resources=[ UDFResource("resourceUri", RESOURCE_URI) ]) @@ -1565,7 +1565,7 @@ def test_exists_miss_w_bound_client(self): PATH = 'projects/%s/jobs/%s' % (self.PROJECT, self.JOB_NAME) conn = _Connection() client = _Client(project=self.PROJECT, connection=conn) - job = self._makeOne(self.JOB_NAME, self.QUERY, client) + job = self._make_one(self.JOB_NAME, self.QUERY, client) self.assertFalse(job.exists()) @@ -1581,7 +1581,7 @@ def test_exists_hit_w_alternate_client(self): client1 = _Client(project=self.PROJECT, connection=conn1) conn2 = _Connection({}) client2 = _Client(project=self.PROJECT, connection=conn2) - job = self._makeOne(self.JOB_NAME, self.QUERY, client1) + job = self._make_one(self.JOB_NAME, self.QUERY, client1) self.assertTrue(job.exists(client=client2)) @@ -1601,7 +1601,7 @@ def test_reload_w_bound_client(self): RESOURCE = self._makeResource() conn = _Connection(RESOURCE) client = _Client(project=self.PROJECT, connection=conn) - job = self._makeOne(self.JOB_NAME, None, client) + job = self._make_one(self.JOB_NAME, None, client) dataset = Dataset(DS_NAME, client) table = Table(DEST_TABLE, dataset) @@ -1632,7 +1632,7 @@ def test_reload_w_alternate_client(self): client1 = _Client(project=self.PROJECT, connection=conn1) conn2 = _Connection(RESOURCE) client2 = _Client(project=self.PROJECT, connection=conn2) - job = self._makeOne(self.JOB_NAME, self.QUERY, client1) + job = self._make_one(self.JOB_NAME, self.QUERY, client1) job.reload(client=client2) diff --git a/packages/google-cloud-bigquery/unit_tests/test_query.py b/packages/google-cloud-bigquery/unit_tests/test_query.py index 91e3b921ef22..58bc8ced8e1b 100644 --- a/packages/google-cloud-bigquery/unit_tests/test_query.py +++ b/packages/google-cloud-bigquery/unit_tests/test_query.py @@ -28,7 +28,7 @@ def _get_target_class(): from google.cloud.bigquery.query import QueryResults return QueryResults - def _makeOne(self, *args, **kw): + def _make_one(self, *args, **kw): return self._get_target_class()(*args, **kw) def _makeResource(self, complete=False): @@ -119,7 +119,7 @@ def _verifyResourceProperties(self, query, resource): def test_ctor(self): client = _Client(self.PROJECT) - query = self._makeOne(self.QUERY, client) + query = self._make_one(self.QUERY, client) self.assertEqual(query.query, self.QUERY) self.assertIs(query._client, client) @@ -187,14 +187,14 @@ def test_from_query_job_wo_default_dataset(self): def test_job_wo_jobid(self): client = _Client(self.PROJECT) - query = self._makeOne(self.QUERY, client) + query = self._make_one(self.QUERY, client) self.assertIsNone(query.job) def test_job_w_jobid(self): from google.cloud.bigquery.job import QueryJob SERVER_GENERATED = 'SERVER_GENERATED' client = _Client(self.PROJECT) - query = self._makeOne(self.QUERY, client) + query = self._make_one(self.QUERY, client) query._properties['jobReference'] = { 'projectId': self.PROJECT, 'jobId': SERVER_GENERATED, @@ -209,7 +209,7 @@ def test_job_w_jobid(self): def test_schema(self): client = _Client(self.PROJECT) - query = self._makeOne(self.QUERY, client) + query = self._make_one(self.QUERY, client) self._verifyResourceProperties(query, {}) resource = { 'schema': { @@ -225,7 +225,7 @@ def test_schema(self): def test_run_w_already_has_job(self): conn = _Connection() client = _Client(project=self.PROJECT, connection=conn) - query = self._makeOne(self.QUERY, client) + query = self._make_one(self.QUERY, client) query._job = object() # simulate already running with self.assertRaises(ValueError): query.run() @@ -235,7 +235,7 @@ def test_run_w_bound_client(self): RESOURCE = self._makeResource(complete=False) conn = _Connection(RESOURCE) client = _Client(project=self.PROJECT, connection=conn) - query = self._makeOne(self.QUERY, client) + query = self._make_one(self.QUERY, client) self.assertEqual(query.udf_resources, []) query.run() @@ -255,7 +255,7 @@ def test_run_w_alternate_client(self): client1 = _Client(project=self.PROJECT, connection=conn1) conn2 = _Connection(RESOURCE) client2 = _Client(project=self.PROJECT, connection=conn2) - query = self._makeOne(self.QUERY, client1) + query = self._make_one(self.QUERY, client1) query.default_dataset = client2.dataset(DATASET) query.max_results = 100 @@ -295,7 +295,7 @@ def test_run_w_inline_udf(self): RESOURCE = self._makeResource(complete=False) conn = _Connection(RESOURCE) client = _Client(project=self.PROJECT, connection=conn) - query = self._makeOne(self.QUERY, client) + query = self._make_one(self.QUERY, client) query.udf_resources = [UDFResource("inlineCode", INLINE_UDF_CODE)] query.run() @@ -317,7 +317,7 @@ def test_run_w_udf_resource_uri(self): RESOURCE = self._makeResource(complete=False) conn = _Connection(RESOURCE) client = _Client(project=self.PROJECT, connection=conn) - query = self._makeOne(self.QUERY, client) + query = self._make_one(self.QUERY, client) query.udf_resources = [UDFResource("resourceUri", RESOURCE_URI)] query.run() @@ -340,7 +340,7 @@ def test_run_w_mixed_udfs(self): RESOURCE = self._makeResource(complete=False) conn = _Connection(RESOURCE) client = _Client(project=self.PROJECT, connection=conn) - query = self._makeOne(self.QUERY, client) + query = self._make_one(self.QUERY, client) query.udf_resources = [UDFResource("resourceUri", RESOURCE_URI), UDFResource("inlineCode", INLINE_UDF_CODE)] @@ -363,7 +363,7 @@ def test_run_w_mixed_udfs(self): def test_fetch_data_query_not_yet_run(self): conn = _Connection() client = _Client(project=self.PROJECT, connection=conn) - query = self._makeOne(self.QUERY, client) + query = self._make_one(self.QUERY, client) self.assertRaises(ValueError, query.fetch_data) def test_fetch_data_w_bound_client(self): @@ -374,7 +374,7 @@ def test_fetch_data_w_bound_client(self): conn = _Connection(AFTER) client = _Client(project=self.PROJECT, connection=conn) - query = self._makeOne(self.QUERY, client) + query = self._make_one(self.QUERY, client) query._set_properties(BEFORE) self.assertFalse(query.complete) @@ -407,7 +407,7 @@ def test_fetch_data_w_alternate_client(self): client1 = _Client(project=self.PROJECT, connection=conn1) conn2 = _Connection(AFTER) client2 = _Client(project=self.PROJECT, connection=conn2) - query = self._makeOne(self.QUERY, client1) + query = self._make_one(self.QUERY, client1) query._set_properties(BEFORE) self.assertFalse(query.complete) diff --git a/packages/google-cloud-bigquery/unit_tests/test_schema.py b/packages/google-cloud-bigquery/unit_tests/test_schema.py index 43577a5fe8c2..12ad43aad607 100644 --- a/packages/google-cloud-bigquery/unit_tests/test_schema.py +++ b/packages/google-cloud-bigquery/unit_tests/test_schema.py @@ -22,11 +22,11 @@ def _get_target_class(): from google.cloud.bigquery.schema import SchemaField return SchemaField - def _makeOne(self, *args, **kw): + def _make_one(self, *args, **kw): return self._get_target_class()(*args, **kw) def test_ctor_defaults(self): - field = self._makeOne('test', 'STRING') + field = self._make_one('test', 'STRING') self.assertEqual(field.name, 'test') self.assertEqual(field.field_type, 'STRING') self.assertEqual(field.mode, 'NULLABLE') @@ -34,7 +34,7 @@ def test_ctor_defaults(self): self.assertIsNone(field.fields) def test_ctor_explicit(self): - field = self._makeOne('test', 'STRING', mode='REQUIRED', + field = self._make_one('test', 'STRING', mode='REQUIRED', description='Testing') self.assertEqual(field.name, 'test') self.assertEqual(field.field_type, 'STRING') @@ -43,9 +43,9 @@ def test_ctor_explicit(self): self.assertIsNone(field.fields) def test_ctor_subfields(self): - field = self._makeOne('phone_number', 'RECORD', - fields=[self._makeOne('area_code', 'STRING'), - self._makeOne('local_number', 'STRING')]) + field = self._make_one('phone_number', 'RECORD', + fields=[self._make_one('area_code', 'STRING'), + self._make_one('local_number', 'STRING')]) self.assertEqual(field.name, 'phone_number') self.assertEqual(field.field_type, 'RECORD') self.assertEqual(field.mode, 'NULLABLE') @@ -63,49 +63,49 @@ def test_ctor_subfields(self): self.assertIsNone(field.fields[1].fields) def test___eq___name_mismatch(self): - field = self._makeOne('test', 'STRING') - other = self._makeOne('other', 'STRING') + field = self._make_one('test', 'STRING') + other = self._make_one('other', 'STRING') self.assertNotEqual(field, other) def test___eq___field_type_mismatch(self): - field = self._makeOne('test', 'STRING') - other = self._makeOne('test', 'INTEGER') + field = self._make_one('test', 'STRING') + other = self._make_one('test', 'INTEGER') self.assertNotEqual(field, other) def test___eq___mode_mismatch(self): - field = self._makeOne('test', 'STRING', mode='REQUIRED') - other = self._makeOne('test', 'STRING', mode='NULLABLE') + field = self._make_one('test', 'STRING', mode='REQUIRED') + other = self._make_one('test', 'STRING', mode='NULLABLE') self.assertNotEqual(field, other) def test___eq___description_mismatch(self): - field = self._makeOne('test', 'STRING', description='Testing') - other = self._makeOne('test', 'STRING', description='Other') + field = self._make_one('test', 'STRING', description='Testing') + other = self._make_one('test', 'STRING', description='Other') self.assertNotEqual(field, other) def test___eq___fields_mismatch(self): - sub1 = self._makeOne('sub1', 'STRING') - sub2 = self._makeOne('sub2', 'STRING') - field = self._makeOne('test', 'RECORD', fields=[sub1]) - other = self._makeOne('test', 'RECORD', fields=[sub2]) + sub1 = self._make_one('sub1', 'STRING') + sub2 = self._make_one('sub2', 'STRING') + field = self._make_one('test', 'RECORD', fields=[sub1]) + other = self._make_one('test', 'RECORD', fields=[sub2]) self.assertNotEqual(field, other) def test___eq___hit(self): - field = self._makeOne('test', 'STRING', mode='REQUIRED', + field = self._make_one('test', 'STRING', mode='REQUIRED', description='Testing') - other = self._makeOne('test', 'STRING', mode='REQUIRED', + other = self._make_one('test', 'STRING', mode='REQUIRED', description='Testing') self.assertEqual(field, other) def test___eq___hit_case_diff_on_type(self): - field = self._makeOne('test', 'STRING', mode='REQUIRED', + field = self._make_one('test', 'STRING', mode='REQUIRED', description='Testing') - other = self._makeOne('test', 'string', mode='REQUIRED', + other = self._make_one('test', 'string', mode='REQUIRED', description='Testing') self.assertEqual(field, other) def test___eq___hit_w_fields(self): - sub1 = self._makeOne('sub1', 'STRING') - sub2 = self._makeOne('sub2', 'STRING') - field = self._makeOne('test', 'RECORD', fields=[sub1, sub2]) - other = self._makeOne('test', 'RECORD', fields=[sub1, sub2]) + sub1 = self._make_one('sub1', 'STRING') + sub2 = self._make_one('sub2', 'STRING') + field = self._make_one('test', 'RECORD', fields=[sub1, sub2]) + other = self._make_one('test', 'RECORD', fields=[sub1, sub2]) self.assertEqual(field, other) diff --git a/packages/google-cloud-bigquery/unit_tests/test_table.py b/packages/google-cloud-bigquery/unit_tests/test_table.py index 3812de5624c9..5fda9a52ee9c 100644 --- a/packages/google-cloud-bigquery/unit_tests/test_table.py +++ b/packages/google-cloud-bigquery/unit_tests/test_table.py @@ -40,7 +40,7 @@ def _get_target_class(): from google.cloud.bigquery.table import Table return Table - def _makeOne(self, *args, **kw): + def _make_one(self, *args, **kw): return self._get_target_class()(*args, **kw) def _setUpConstants(self): @@ -134,7 +134,7 @@ def _verifyResourceProperties(self, table, resource): def test_ctor(self): client = _Client(self.PROJECT) dataset = _Dataset(client) - table = self._makeOne(self.TABLE_NAME, dataset) + table = self._make_one(self.TABLE_NAME, dataset) self.assertEqual(table.name, self.TABLE_NAME) self.assertIs(table._dataset, dataset) self.assertEqual(table.project, self.PROJECT) @@ -166,14 +166,14 @@ def test_ctor_w_schema(self): dataset = _Dataset(client) full_name = SchemaField('full_name', 'STRING', mode='REQUIRED') age = SchemaField('age', 'INTEGER', mode='REQUIRED') - table = self._makeOne(self.TABLE_NAME, dataset, + table = self._make_one(self.TABLE_NAME, dataset, schema=[full_name, age]) self.assertEqual(table.schema, [full_name, age]) def test_num_bytes_getter(self): client = _Client(self.PROJECT) dataset = _Dataset(client) - table = self._makeOne(self.TABLE_NAME, dataset) + table = self._make_one(self.TABLE_NAME, dataset) # Check with no value set. self.assertIsNone(table.num_bytes) @@ -195,7 +195,7 @@ def test_num_bytes_getter(self): def test_num_rows_getter(self): client = _Client(self.PROJECT) dataset = _Dataset(client) - table = self._makeOne(self.TABLE_NAME, dataset) + table = self._make_one(self.TABLE_NAME, dataset) # Check with no value set. self.assertIsNone(table.num_rows) @@ -217,7 +217,7 @@ def test_num_rows_getter(self): def test_schema_setter_non_list(self): client = _Client(self.PROJECT) dataset = _Dataset(client) - table = self._makeOne(self.TABLE_NAME, dataset) + table = self._make_one(self.TABLE_NAME, dataset) with self.assertRaises(TypeError): table.schema = object() @@ -225,7 +225,7 @@ def test_schema_setter_invalid_field(self): from google.cloud.bigquery.table import SchemaField client = _Client(self.PROJECT) dataset = _Dataset(client) - table = self._makeOne(self.TABLE_NAME, dataset) + table = self._make_one(self.TABLE_NAME, dataset) full_name = SchemaField('full_name', 'STRING', mode='REQUIRED') with self.assertRaises(ValueError): table.schema = [full_name, object()] @@ -234,7 +234,7 @@ def test_schema_setter(self): from google.cloud.bigquery.table import SchemaField client = _Client(self.PROJECT) dataset = _Dataset(client) - table = self._makeOne(self.TABLE_NAME, dataset) + table = self._make_one(self.TABLE_NAME, dataset) full_name = SchemaField('full_name', 'STRING', mode='REQUIRED') age = SchemaField('age', 'INTEGER', mode='REQUIRED') table.schema = [full_name, age] @@ -253,7 +253,7 @@ def test_props_set_by_server(self): self.PROJECT, self.DS_NAME, self.TABLE_NAME) client = _Client(self.PROJECT) dataset = _Dataset(client) - table = self._makeOne(self.TABLE_NAME, dataset) + table = self._make_one(self.TABLE_NAME, dataset) table._properties['creationTime'] = _millis(CREATED) table._properties['etag'] = 'ETAG' table._properties['lastModifiedTime'] = _millis(MODIFIED) @@ -275,21 +275,21 @@ def test_props_set_by_server(self): def test_description_setter_bad_value(self): client = _Client(self.PROJECT) dataset = _Dataset(client) - table = self._makeOne(self.TABLE_NAME, dataset) + table = self._make_one(self.TABLE_NAME, dataset) with self.assertRaises(ValueError): table.description = 12345 def test_description_setter(self): client = _Client(self.PROJECT) dataset = _Dataset(client) - table = self._makeOne(self.TABLE_NAME, dataset) + table = self._make_one(self.TABLE_NAME, dataset) table.description = 'DESCRIPTION' self.assertEqual(table.description, 'DESCRIPTION') def test_expires_setter_bad_value(self): client = _Client(self.PROJECT) dataset = _Dataset(client) - table = self._makeOne(self.TABLE_NAME, dataset) + table = self._make_one(self.TABLE_NAME, dataset) with self.assertRaises(ValueError): table.expires = object() @@ -300,56 +300,56 @@ def test_expires_setter(self): WHEN = datetime.datetime(2015, 7, 28, 16, 39, tzinfo=UTC) client = _Client(self.PROJECT) dataset = _Dataset(client) - table = self._makeOne(self.TABLE_NAME, dataset) + table = self._make_one(self.TABLE_NAME, dataset) table.expires = WHEN self.assertEqual(table.expires, WHEN) def test_friendly_name_setter_bad_value(self): client = _Client(self.PROJECT) dataset = _Dataset(client) - table = self._makeOne(self.TABLE_NAME, dataset) + table = self._make_one(self.TABLE_NAME, dataset) with self.assertRaises(ValueError): table.friendly_name = 12345 def test_friendly_name_setter(self): client = _Client(self.PROJECT) dataset = _Dataset(client) - table = self._makeOne(self.TABLE_NAME, dataset) + table = self._make_one(self.TABLE_NAME, dataset) table.friendly_name = 'FRIENDLY' self.assertEqual(table.friendly_name, 'FRIENDLY') def test_location_setter_bad_value(self): client = _Client(self.PROJECT) dataset = _Dataset(client) - table = self._makeOne(self.TABLE_NAME, dataset) + table = self._make_one(self.TABLE_NAME, dataset) with self.assertRaises(ValueError): table.location = 12345 def test_location_setter(self): client = _Client(self.PROJECT) dataset = _Dataset(client) - table = self._makeOne(self.TABLE_NAME, dataset) + table = self._make_one(self.TABLE_NAME, dataset) table.location = 'LOCATION' self.assertEqual(table.location, 'LOCATION') def test_view_query_setter_bad_value(self): client = _Client(self.PROJECT) dataset = _Dataset(client) - table = self._makeOne(self.TABLE_NAME, dataset) + table = self._make_one(self.TABLE_NAME, dataset) with self.assertRaises(ValueError): table.view_query = 12345 def test_view_query_setter(self): client = _Client(self.PROJECT) dataset = _Dataset(client) - table = self._makeOne(self.TABLE_NAME, dataset) + table = self._make_one(self.TABLE_NAME, dataset) table.view_query = 'select * from foo' self.assertEqual(table.view_query, 'select * from foo') def test_view_query_deleter(self): client = _Client(self.PROJECT) dataset = _Dataset(client) - table = self._makeOne(self.TABLE_NAME, dataset) + table = self._make_one(self.TABLE_NAME, dataset) table.view_query = 'select * from foo' del table.view_query self.assertIsNone(table.view_query) @@ -395,7 +395,7 @@ def test_create_no_view_query_no_schema(self): conn = _Connection() client = _Client(project=self.PROJECT, connection=conn) dataset = _Dataset(client) - table = self._makeOne(self.TABLE_NAME, dataset) + table = self._make_one(self.TABLE_NAME, dataset) with self.assertRaises(ValueError): table.create() @@ -409,7 +409,7 @@ def test_create_w_bound_client(self): dataset = _Dataset(client) full_name = SchemaField('full_name', 'STRING', mode='REQUIRED') age = SchemaField('age', 'INTEGER', mode='REQUIRED') - table = self._makeOne(self.TABLE_NAME, dataset, + table = self._make_one(self.TABLE_NAME, dataset, schema=[full_name, age]) table.create() @@ -439,7 +439,7 @@ def test_create_w_partition_no_expire(self): dataset = _Dataset(client) full_name = SchemaField('full_name', 'STRING', mode='REQUIRED') age = SchemaField('age', 'INTEGER', mode='REQUIRED') - table = self._makeOne(self.TABLE_NAME, dataset, + table = self._make_one(self.TABLE_NAME, dataset, schema=[full_name, age]) self.assertIsNone(table.partitioning_type) @@ -473,7 +473,7 @@ def test_create_w_partition_and_expire(self): dataset = _Dataset(client) full_name = SchemaField('full_name', 'STRING', mode='REQUIRED') age = SchemaField('age', 'INTEGER', mode='REQUIRED') - table = self._makeOne(self.TABLE_NAME, dataset, + table = self._make_one(self.TABLE_NAME, dataset, schema=[full_name, age]) self.assertIsNone(table.partition_expiration) table.partition_expiration = 100 @@ -506,7 +506,7 @@ def test_partition_type_setter_bad_type(self): dataset = _Dataset(client) full_name = SchemaField('full_name', 'STRING', mode='REQUIRED') age = SchemaField('age', 'INTEGER', mode='REQUIRED') - table = self._makeOne(self.TABLE_NAME, dataset, + table = self._make_one(self.TABLE_NAME, dataset, schema=[full_name, age]) with self.assertRaises(ValueError): table.partitioning_type = 123 @@ -519,7 +519,7 @@ def test_partition_type_setter_unknown_value(self): dataset = _Dataset(client) full_name = SchemaField('full_name', 'STRING', mode='REQUIRED') age = SchemaField('age', 'INTEGER', mode='REQUIRED') - table = self._makeOne(self.TABLE_NAME, dataset, + table = self._make_one(self.TABLE_NAME, dataset, schema=[full_name, age]) with self.assertRaises(ValueError): table.partitioning_type = "HASH" @@ -532,7 +532,7 @@ def test_partition_type_setter_w_known_value(self): dataset = _Dataset(client) full_name = SchemaField('full_name', 'STRING', mode='REQUIRED') age = SchemaField('age', 'INTEGER', mode='REQUIRED') - table = self._makeOne(self.TABLE_NAME, dataset, + table = self._make_one(self.TABLE_NAME, dataset, schema=[full_name, age]) self.assertIsNone(table.partitioning_type) table.partitioning_type = 'DAY' @@ -546,7 +546,7 @@ def test_partition_type_setter_w_none(self): dataset = _Dataset(client) full_name = SchemaField('full_name', 'STRING', mode='REQUIRED') age = SchemaField('age', 'INTEGER', mode='REQUIRED') - table = self._makeOne(self.TABLE_NAME, dataset, + table = self._make_one(self.TABLE_NAME, dataset, schema=[full_name, age]) table._properties['timePartitioning'] = {'type': 'DAY'} table.partitioning_type = None @@ -561,7 +561,7 @@ def test_partition_experation_bad_type(self): dataset = _Dataset(client) full_name = SchemaField('full_name', 'STRING', mode='REQUIRED') age = SchemaField('age', 'INTEGER', mode='REQUIRED') - table = self._makeOne(self.TABLE_NAME, dataset, + table = self._make_one(self.TABLE_NAME, dataset, schema=[full_name, age]) with self.assertRaises(ValueError): table.partition_expiration = "NEVER" @@ -574,7 +574,7 @@ def test_partition_expiration_w_integer(self): dataset = _Dataset(client) full_name = SchemaField('full_name', 'STRING', mode='REQUIRED') age = SchemaField('age', 'INTEGER', mode='REQUIRED') - table = self._makeOne(self.TABLE_NAME, dataset, + table = self._make_one(self.TABLE_NAME, dataset, schema=[full_name, age]) self.assertIsNone(table.partition_expiration) table.partition_expiration = 100 @@ -589,7 +589,7 @@ def test_partition_expiration_w_none(self): dataset = _Dataset(client) full_name = SchemaField('full_name', 'STRING', mode='REQUIRED') age = SchemaField('age', 'INTEGER', mode='REQUIRED') - table = self._makeOne(self.TABLE_NAME, dataset, + table = self._make_one(self.TABLE_NAME, dataset, schema=[full_name, age]) self.assertIsNone(table.partition_expiration) table._properties['timePartitioning'] = { @@ -608,7 +608,7 @@ def test_partition_expiration_w_none_no_partition_set(self): dataset = _Dataset(client) full_name = SchemaField('full_name', 'STRING', mode='REQUIRED') age = SchemaField('age', 'INTEGER', mode='REQUIRED') - table = self._makeOne(self.TABLE_NAME, dataset, + table = self._make_one(self.TABLE_NAME, dataset, schema=[full_name, age]) self.assertIsNone(table.partition_expiration) table.partition_expiration = None @@ -623,7 +623,7 @@ def test_list_partitions(self): dataset = _Dataset(client) full_name = SchemaField('full_name', 'STRING', mode='REQUIRED') age = SchemaField('age', 'INTEGER', mode='REQUIRED') - table = self._makeOne(self.TABLE_NAME, dataset, + table = self._make_one(self.TABLE_NAME, dataset, schema=[full_name, age]) self.assertEqual(table.list_partitions(), [20160804, 20160805]) @@ -653,7 +653,7 @@ def test_create_w_alternate_client(self): dataset = _Dataset(client=client1) full_name = SchemaField('full_name', 'STRING', mode='REQUIRED') age = SchemaField('age', 'INTEGER', mode='REQUIRED') - table = self._makeOne(self.TABLE_NAME, dataset=dataset, + table = self._make_one(self.TABLE_NAME, dataset=dataset, schema=[full_name, age]) table.friendly_name = TITLE table.description = DESCRIPTION @@ -692,7 +692,7 @@ def test_create_w_missing_output_properties(self): dataset = _Dataset(client) full_name = SchemaField('full_name', 'STRING', mode='REQUIRED') age = SchemaField('age', 'INTEGER', mode='REQUIRED') - table = self._makeOne(self.TABLE_NAME, dataset, + table = self._make_one(self.TABLE_NAME, dataset, schema=[full_name, age]) table.create() @@ -719,7 +719,7 @@ def test_exists_miss_w_bound_client(self): conn = _Connection() client = _Client(project=self.PROJECT, connection=conn) dataset = _Dataset(client) - table = self._makeOne(self.TABLE_NAME, dataset=dataset) + table = self._make_one(self.TABLE_NAME, dataset=dataset) self.assertFalse(table.exists()) @@ -737,7 +737,7 @@ def test_exists_hit_w_alternate_client(self): conn2 = _Connection({}) client2 = _Client(project=self.PROJECT, connection=conn2) dataset = _Dataset(client1) - table = self._makeOne(self.TABLE_NAME, dataset=dataset) + table = self._make_one(self.TABLE_NAME, dataset=dataset) self.assertTrue(table.exists(client=client2)) @@ -755,7 +755,7 @@ def test_reload_w_bound_client(self): conn = _Connection(RESOURCE) client = _Client(project=self.PROJECT, connection=conn) dataset = _Dataset(client) - table = self._makeOne(self.TABLE_NAME, dataset=dataset) + table = self._make_one(self.TABLE_NAME, dataset=dataset) table.reload() @@ -774,7 +774,7 @@ def test_reload_w_alternate_client(self): conn2 = _Connection(RESOURCE) client2 = _Client(project=self.PROJECT, connection=conn2) dataset = _Dataset(client1) - table = self._makeOne(self.TABLE_NAME, dataset=dataset) + table = self._make_one(self.TABLE_NAME, dataset=dataset) table.reload(client=client2) @@ -790,7 +790,7 @@ def test_patch_w_invalid_expiration(self): conn = _Connection(RESOURCE) client = _Client(project=self.PROJECT, connection=conn) dataset = _Dataset(client) - table = self._makeOne(self.TABLE_NAME, dataset=dataset) + table = self._make_one(self.TABLE_NAME, dataset=dataset) with self.assertRaises(ValueError): table.patch(expires='BOGUS') @@ -806,7 +806,7 @@ def test_patch_w_bound_client(self): conn = _Connection(RESOURCE) client = _Client(project=self.PROJECT, connection=conn) dataset = _Dataset(client) - table = self._makeOne(self.TABLE_NAME, dataset=dataset) + table = self._make_one(self.TABLE_NAME, dataset=dataset) table.patch(description=DESCRIPTION, friendly_name=TITLE, @@ -846,7 +846,7 @@ def test_patch_w_alternate_client(self): conn2 = _Connection(RESOURCE) client2 = _Client(project=self.PROJECT, connection=conn2) dataset = _Dataset(client1) - table = self._makeOne(self.TABLE_NAME, dataset=dataset) + table = self._make_one(self.TABLE_NAME, dataset=dataset) full_name = SchemaField('full_name', 'STRING', mode='REQUIRED') age = SchemaField('age', 'INTEGER', mode='NULLABLE') @@ -882,7 +882,7 @@ def test_patch_w_schema_None(self): conn = _Connection(RESOURCE) client = _Client(project=self.PROJECT, connection=conn) dataset = _Dataset(client) - table = self._makeOne(self.TABLE_NAME, dataset=dataset) + table = self._make_one(self.TABLE_NAME, dataset=dataset) table.patch(schema=None) @@ -908,7 +908,7 @@ def test_update_w_bound_client(self): dataset = _Dataset(client) full_name = SchemaField('full_name', 'STRING', mode='REQUIRED') age = SchemaField('age', 'INTEGER', mode='REQUIRED') - table = self._makeOne(self.TABLE_NAME, dataset=dataset, + table = self._make_one(self.TABLE_NAME, dataset=dataset, schema=[full_name, age]) table.description = DESCRIPTION table.friendly_name = TITLE @@ -956,7 +956,7 @@ def test_update_w_alternate_client(self): conn2 = _Connection(RESOURCE) client2 = _Client(project=self.PROJECT, connection=conn2) dataset = _Dataset(client1) - table = self._makeOne(self.TABLE_NAME, dataset=dataset) + table = self._make_one(self.TABLE_NAME, dataset=dataset) table.default_table_expiration_ms = DEF_TABLE_EXP table.location = LOCATION table.expires = self.EXP_TIME @@ -987,7 +987,7 @@ def test_delete_w_bound_client(self): conn = _Connection({}) client = _Client(project=self.PROJECT, connection=conn) dataset = _Dataset(client) - table = self._makeOne(self.TABLE_NAME, dataset=dataset) + table = self._make_one(self.TABLE_NAME, dataset=dataset) table.delete() @@ -1004,7 +1004,7 @@ def test_delete_w_alternate_client(self): conn2 = _Connection({}) client2 = _Client(project=self.PROJECT, connection=conn2) dataset = _Dataset(client1) - table = self._makeOne(self.TABLE_NAME, dataset=dataset) + table = self._make_one(self.TABLE_NAME, dataset=dataset) table.delete(client=client2) @@ -1067,7 +1067,7 @@ def _bigquery_timestamp_float_repr(ts_float): full_name = SchemaField('full_name', 'STRING', mode='REQUIRED') age = SchemaField('age', 'INTEGER', mode='NULLABLE') joined = SchemaField('joined', 'TIMESTAMP', mode='NULLABLE') - table = self._makeOne(self.TABLE_NAME, dataset=dataset, + table = self._make_one(self.TABLE_NAME, dataset=dataset, schema=[full_name, age, joined]) iterator = table.fetch_data() @@ -1134,7 +1134,7 @@ def test_fetch_data_w_alternate_client(self): age = SchemaField('age', 'INTEGER', mode='REQUIRED') voter = SchemaField('voter', 'BOOLEAN', mode='NULLABLE') score = SchemaField('score', 'FLOAT', mode='NULLABLE') - table = self._makeOne(self.TABLE_NAME, dataset=dataset, + table = self._make_one(self.TABLE_NAME, dataset=dataset, schema=[full_name, age, voter, score]) iterator = table.fetch_data( @@ -1187,7 +1187,7 @@ def test_fetch_data_w_repeated_fields(self): score = SchemaField('score', 'FLOAT', 'REPEATED') struct = SchemaField('struct', 'RECORD', mode='REPEATED', fields=[index, score]) - table = self._makeOne(self.TABLE_NAME, dataset=dataset, + table = self._make_one(self.TABLE_NAME, dataset=dataset, schema=[full_name, struct]) iterator = table.fetch_data() @@ -1243,7 +1243,7 @@ def test_fetch_data_w_record_schema(self): rank = SchemaField('rank', 'INTEGER', 'REQUIRED') phone = SchemaField('phone', 'RECORD', mode='NULLABLE', fields=[area_code, local_number, rank]) - table = self._makeOne(self.TABLE_NAME, dataset=dataset, + table = self._make_one(self.TABLE_NAME, dataset=dataset, schema=[full_name, phone]) iterator = table.fetch_data() @@ -1275,7 +1275,7 @@ def test_insert_data_wo_schema(self): from google.cloud.bigquery.table import _TABLE_HAS_NO_SCHEMA client = _Client(project=self.PROJECT) dataset = _Dataset(client) - table = self._makeOne(self.TABLE_NAME, dataset=dataset) + table = self._make_one(self.TABLE_NAME, dataset=dataset) ROWS = [ ('Phred Phlyntstone', 32), ('Bharney Rhubble', 33), @@ -1305,7 +1305,7 @@ def test_insert_data_w_bound_client(self): full_name = SchemaField('full_name', 'STRING', mode='REQUIRED') age = SchemaField('age', 'INTEGER', mode='REQUIRED') joined = SchemaField('joined', 'TIMESTAMP', mode='NULLABLE') - table = self._makeOne(self.TABLE_NAME, dataset=dataset, + table = self._make_one(self.TABLE_NAME, dataset=dataset, schema=[full_name, age, joined]) ROWS = [ ('Phred Phlyntstone', 32, WHEN), @@ -1357,7 +1357,7 @@ def test_insert_data_w_alternate_client(self): full_name = SchemaField('full_name', 'STRING', mode='REQUIRED') age = SchemaField('age', 'INTEGER', mode='REQUIRED') voter = SchemaField('voter', 'BOOLEAN', mode='NULLABLE') - table = self._makeOne(self.TABLE_NAME, dataset=dataset, + table = self._make_one(self.TABLE_NAME, dataset=dataset, schema=[full_name, age, voter]) ROWS = [ ('Phred Phlyntstone', 32, True), @@ -1411,7 +1411,7 @@ def test_insert_data_w_repeated_fields(self): score = SchemaField('score', 'FLOAT', 'REPEATED') struct = SchemaField('struct', 'RECORD', mode='REPEATED', fields=[index, score]) - table = self._makeOne(self.TABLE_NAME, dataset=dataset, + table = self._make_one(self.TABLE_NAME, dataset=dataset, schema=[full_name, struct]) ROWS = [ (['red', 'green'], [{'index': [1, 2], 'score': [3.1415, 1.414]}]), @@ -1447,7 +1447,7 @@ def test_insert_data_w_record_schema(self): rank = SchemaField('rank', 'INTEGER', 'REQUIRED') phone = SchemaField('phone', 'RECORD', mode='NULLABLE', fields=[area_code, local_number, rank]) - table = self._makeOne(self.TABLE_NAME, dataset=dataset, + table = self._make_one(self.TABLE_NAME, dataset=dataset, schema=[full_name, phone]) ROWS = [ ('Phred Phlyntstone', {'area_code': '800', @@ -1485,7 +1485,7 @@ class TextModeFile(object): client = _Client(project=self.PROJECT, connection=conn) dataset = _Dataset(client) file_obj = TextModeFile() - table = self._makeOne(self.TABLE_NAME, dataset=dataset) + table = self._make_one(self.TABLE_NAME, dataset=dataset) with self.assertRaises(ValueError): table.upload_from_file(file_obj, 'CSV', size=1234) @@ -1497,7 +1497,7 @@ def test_upload_from_file_size_failure(self): client = _Client(project=self.PROJECT, connection=conn) dataset = _Dataset(client) file_obj = object() - table = self._makeOne(self.TABLE_NAME, dataset=dataset) + table = self._make_one(self.TABLE_NAME, dataset=dataset) with self.assertRaises(ValueError): table.upload_from_file(file_obj, 'CSV', size=None) @@ -1517,7 +1517,7 @@ def test_upload_from_file_multipart_w_400(self): ) client = _Client(project=self.PROJECT, connection=conn) dataset = _Dataset(client) - table = self._makeOne(self.TABLE_NAME, dataset=dataset) + table = self._make_one(self.TABLE_NAME, dataset=dataset) with _NamedTemporaryFile() as temp: with open(temp.name, 'w') as file_obj: @@ -1557,7 +1557,7 @@ def _upload_from_file_helper(self, **kw): full_name = SchemaField('full_name', 'STRING', mode='REQUIRED') age = SchemaField('age', 'INTEGER', mode='REQUIRED') joined = SchemaField('joined', 'TIMESTAMP', mode='NULLABLE') - table = self._makeOne(self.TABLE_NAME, dataset=dataset, + table = self._make_one(self.TABLE_NAME, dataset=dataset, schema=[full_name, age, joined]) ROWS = [ ('Phred Phlyntstone', 32, WHEN), @@ -1663,7 +1663,7 @@ class _UploadConfig(object): simple_multipart = True simple_path = u'' # force resumable dataset = _Dataset(client) - table = self._makeOne(self.TABLE_NAME, dataset=dataset) + table = self._make_one(self.TABLE_NAME, dataset=dataset) with _Monkey(MUT, _UploadConfig=_UploadConfig): with _NamedTemporaryFile() as temp: From 00ecfac03e2e7c2014b3306bf59692c7d9279c7b Mon Sep 17 00:00:00 2001 From: Danny Hermes Date: Thu, 10 Nov 2016 11:06:21 -0800 Subject: [PATCH 0036/2016] Changing all instances of _callFUT to _call_fut. Done via: $ git grep -l _callFUT | \ > xargs sed -i s/_callFUT/_call_fut/g --- .../unit_tests/test__helpers.py | 88 +++++++++---------- .../unit_tests/test_table.py | 16 ++-- 2 files changed, 52 insertions(+), 52 deletions(-) diff --git a/packages/google-cloud-bigquery/unit_tests/test__helpers.py b/packages/google-cloud-bigquery/unit_tests/test__helpers.py index 45c8bb89bc67..46c58c8ea405 100644 --- a/packages/google-cloud-bigquery/unit_tests/test__helpers.py +++ b/packages/google-cloud-bigquery/unit_tests/test__helpers.py @@ -17,111 +17,111 @@ class Test_not_null(unittest.TestCase): - def _callFUT(self, value, field): + def _call_fut(self, value, field): from google.cloud.bigquery._helpers import _not_null return _not_null(value, field) def test_w_none_nullable(self): - self.assertFalse(self._callFUT(None, _Field('NULLABLE'))) + self.assertFalse(self._call_fut(None, _Field('NULLABLE'))) def test_w_none_required(self): - self.assertTrue(self._callFUT(None, _Field('REQUIRED'))) + self.assertTrue(self._call_fut(None, _Field('REQUIRED'))) def test_w_value(self): - self.assertTrue(self._callFUT(object(), object())) + self.assertTrue(self._call_fut(object(), object())) class Test_int_from_json(unittest.TestCase): - def _callFUT(self, value, field): + def _call_fut(self, value, field): from google.cloud.bigquery._helpers import _int_from_json return _int_from_json(value, field) def test_w_none_nullable(self): - self.assertIsNone(self._callFUT(None, _Field('NULLABLE'))) + self.assertIsNone(self._call_fut(None, _Field('NULLABLE'))) def test_w_none_required(self): with self.assertRaises(TypeError): - self._callFUT(None, _Field('REQUIRED')) + self._call_fut(None, _Field('REQUIRED')) def test_w_string_value(self): - coerced = self._callFUT('42', object()) + coerced = self._call_fut('42', object()) self.assertEqual(coerced, 42) def test_w_float_value(self): - coerced = self._callFUT(42, object()) + coerced = self._call_fut(42, object()) self.assertEqual(coerced, 42) class Test_float_from_json(unittest.TestCase): - def _callFUT(self, value, field): + def _call_fut(self, value, field): from google.cloud.bigquery._helpers import _float_from_json return _float_from_json(value, field) def test_w_none_nullable(self): - self.assertIsNone(self._callFUT(None, _Field('NULLABLE'))) + self.assertIsNone(self._call_fut(None, _Field('NULLABLE'))) def test_w_none_required(self): with self.assertRaises(TypeError): - self._callFUT(None, _Field('REQUIRED')) + self._call_fut(None, _Field('REQUIRED')) def test_w_string_value(self): - coerced = self._callFUT('3.1415', object()) + coerced = self._call_fut('3.1415', object()) self.assertEqual(coerced, 3.1415) def test_w_float_value(self): - coerced = self._callFUT(3.1415, object()) + coerced = self._call_fut(3.1415, object()) self.assertEqual(coerced, 3.1415) class Test_bool_from_json(unittest.TestCase): - def _callFUT(self, value, field): + def _call_fut(self, value, field): from google.cloud.bigquery._helpers import _bool_from_json return _bool_from_json(value, field) def test_w_none_nullable(self): - self.assertIsNone(self._callFUT(None, _Field('NULLABLE'))) + self.assertIsNone(self._call_fut(None, _Field('NULLABLE'))) def test_w_none_required(self): with self.assertRaises(AttributeError): - self._callFUT(None, _Field('REQUIRED')) + self._call_fut(None, _Field('REQUIRED')) def test_w_value_t(self): - coerced = self._callFUT('T', object()) + coerced = self._call_fut('T', object()) self.assertTrue(coerced) def test_w_value_true(self): - coerced = self._callFUT('True', object()) + coerced = self._call_fut('True', object()) self.assertTrue(coerced) def test_w_value_1(self): - coerced = self._callFUT('1', object()) + coerced = self._call_fut('1', object()) self.assertTrue(coerced) def test_w_value_other(self): - coerced = self._callFUT('f', object()) + coerced = self._call_fut('f', object()) self.assertFalse(coerced) class Test_datetime_from_json(unittest.TestCase): - def _callFUT(self, value, field): + def _call_fut(self, value, field): from google.cloud.bigquery._helpers import _datetime_from_json return _datetime_from_json(value, field) def test_w_none_nullable(self): - self.assertIsNone(self._callFUT(None, _Field('NULLABLE'))) + self.assertIsNone(self._call_fut(None, _Field('NULLABLE'))) def test_w_none_required(self): with self.assertRaises(TypeError): - self._callFUT(None, _Field('REQUIRED')) + self._call_fut(None, _Field('REQUIRED')) def test_w_string_value(self): import datetime from google.cloud._helpers import _EPOCH - coerced = self._callFUT('1.234567', object()) + coerced = self._call_fut('1.234567', object()) self.assertEqual( coerced, _EPOCH + datetime.timedelta(seconds=1, microseconds=234567)) @@ -129,7 +129,7 @@ def test_w_string_value(self): def test_w_float_value(self): import datetime from google.cloud._helpers import _EPOCH - coerced = self._callFUT(1.234567, object()) + coerced = self._call_fut(1.234567, object()) self.assertEqual( coerced, _EPOCH + datetime.timedelta(seconds=1, microseconds=234567)) @@ -137,20 +137,20 @@ def test_w_float_value(self): class Test_date_from_json(unittest.TestCase): - def _callFUT(self, value, field): + def _call_fut(self, value, field): from google.cloud.bigquery._helpers import _date_from_json return _date_from_json(value, field) def test_w_none_nullable(self): - self.assertIsNone(self._callFUT(None, _Field('NULLABLE'))) + self.assertIsNone(self._call_fut(None, _Field('NULLABLE'))) def test_w_none_required(self): with self.assertRaises(TypeError): - self._callFUT(None, _Field('REQUIRED')) + self._call_fut(None, _Field('REQUIRED')) def test_w_string_value(self): import datetime - coerced = self._callFUT('1987-09-22', object()) + coerced = self._call_fut('1987-09-22', object()) self.assertEqual( coerced, datetime.date(1987, 9, 22)) @@ -158,36 +158,36 @@ def test_w_string_value(self): class Test_record_from_json(unittest.TestCase): - def _callFUT(self, value, field): + def _call_fut(self, value, field): from google.cloud.bigquery._helpers import _record_from_json return _record_from_json(value, field) def test_w_none_nullable(self): - self.assertIsNone(self._callFUT(None, _Field('NULLABLE'))) + self.assertIsNone(self._call_fut(None, _Field('NULLABLE'))) def test_w_none_required(self): with self.assertRaises(TypeError): - self._callFUT(None, _Field('REQUIRED')) + self._call_fut(None, _Field('REQUIRED')) def test_w_nullable_subfield_none(self): subfield = _Field('NULLABLE', 'age', 'INTEGER') field = _Field('REQUIRED', fields=[subfield]) value = {'f': [{'v': None}]} - coerced = self._callFUT(value, field) + coerced = self._call_fut(value, field) self.assertEqual(coerced, {'age': None}) def test_w_scalar_subfield(self): subfield = _Field('REQUIRED', 'age', 'INTEGER') field = _Field('REQUIRED', fields=[subfield]) value = {'f': [{'v': 42}]} - coerced = self._callFUT(value, field) + coerced = self._call_fut(value, field) self.assertEqual(coerced, {'age': 42}) def test_w_repeated_subfield(self): subfield = _Field('REPEATED', 'color', 'STRING') field = _Field('REQUIRED', fields=[subfield]) value = {'f': [{'v': ['red', 'yellow', 'blue']}]} - coerced = self._callFUT(value, field) + coerced = self._call_fut(value, field) self.assertEqual(coerced, {'color': ['red', 'yellow', 'blue']}) def test_w_record_subfield(self): @@ -213,30 +213,30 @@ def test_w_record_subfield(self): 'rank': 1, } } - coerced = self._callFUT(value, person) + coerced = self._call_fut(value, person) self.assertEqual(coerced, expected) class Test_string_from_json(unittest.TestCase): - def _callFUT(self, value, field): + def _call_fut(self, value, field): from google.cloud.bigquery._helpers import _string_from_json return _string_from_json(value, field) def test_w_none_nullable(self): - self.assertIsNone(self._callFUT(None, _Field('NULLABLE'))) + self.assertIsNone(self._call_fut(None, _Field('NULLABLE'))) def test_w_none_required(self): - self.assertIsNone(self._callFUT(None, _Field('RECORD'))) + self.assertIsNone(self._call_fut(None, _Field('RECORD'))) def test_w_string_value(self): - coerced = self._callFUT('Wonderful!', object()) + coerced = self._call_fut('Wonderful!', object()) self.assertEqual(coerced, 'Wonderful!') class Test_rows_from_json(unittest.TestCase): - def _callFUT(self, value, field): + def _call_fut(self, value, field): from google.cloud.bigquery._helpers import _rows_from_json return _rows_from_json(value, field) @@ -281,7 +281,7 @@ def test_w_record_subfield(self): ('Bharney Rhubble', bharney_phone, ['brown']), ('Wylma Phlyntstone', None, []), ] - coerced = self._callFUT(rows, schema) + coerced = self._call_fut(rows, schema) self.assertEqual(coerced, expected) def test_w_int64_float64(self): @@ -312,7 +312,7 @@ def test_w_int64_float64(self): ('Bharney Rhubble', 4, 0.125), ('Wylma Phlyntstone', 20, 0.625), ] - coerced = self._callFUT(rows, schema) + coerced = self._call_fut(rows, schema) self.assertEqual(coerced, expected) diff --git a/packages/google-cloud-bigquery/unit_tests/test_table.py b/packages/google-cloud-bigquery/unit_tests/test_table.py index 5fda9a52ee9c..0cc3912152c3 100644 --- a/packages/google-cloud-bigquery/unit_tests/test_table.py +++ b/packages/google-cloud-bigquery/unit_tests/test_table.py @@ -1768,7 +1768,7 @@ class _UploadConfig(object): class Test_parse_schema_resource(unittest.TestCase, _SchemaBase): - def _callFUT(self, resource): + def _call_fut(self, resource): from google.cloud.bigquery.table import _parse_schema_resource return _parse_schema_resource(resource) @@ -1782,7 +1782,7 @@ def _makeResource(self): def test__parse_schema_resource_defaults(self): RESOURCE = self._makeResource() - schema = self._callFUT(RESOURCE['schema']) + schema = self._call_fut(RESOURCE['schema']) self._verifySchema(schema, RESOURCE) def test__parse_schema_resource_subfields(self): @@ -1797,7 +1797,7 @@ def test__parse_schema_resource_subfields(self): {'name': 'number', 'type': 'STRING', 'mode': 'REQUIRED'}]}) - schema = self._callFUT(RESOURCE['schema']) + schema = self._call_fut(RESOURCE['schema']) self._verifySchema(schema, RESOURCE) def test__parse_schema_resource_fields_without_mode(self): @@ -1806,13 +1806,13 @@ def test__parse_schema_resource_fields_without_mode(self): {'name': 'phone', 'type': 'STRING'}) - schema = self._callFUT(RESOURCE['schema']) + schema = self._call_fut(RESOURCE['schema']) self._verifySchema(schema, RESOURCE) class Test_build_schema_resource(unittest.TestCase, _SchemaBase): - def _callFUT(self, resource): + def _call_fut(self, resource): from google.cloud.bigquery.table import _build_schema_resource return _build_schema_resource(resource) @@ -1820,7 +1820,7 @@ def test_defaults(self): from google.cloud.bigquery.table import SchemaField full_name = SchemaField('full_name', 'STRING', mode='REQUIRED') age = SchemaField('age', 'INTEGER', mode='REQUIRED') - resource = self._callFUT([full_name, age]) + resource = self._call_fut([full_name, age]) self.assertEqual(len(resource), 2) self.assertEqual(resource[0], {'name': 'full_name', @@ -1837,7 +1837,7 @@ def test_w_description(self): full_name = SchemaField('full_name', 'STRING', mode='REQUIRED', description=DESCRIPTION) age = SchemaField('age', 'INTEGER', mode='REQUIRED') - resource = self._callFUT([full_name, age]) + resource = self._call_fut([full_name, age]) self.assertEqual(len(resource), 2) self.assertEqual(resource[0], {'name': 'full_name', @@ -1856,7 +1856,7 @@ def test_w_subfields(self): ph_num = SchemaField('number', 'STRING', 'REQUIRED') phone = SchemaField('phone', 'RECORD', mode='REPEATABLE', fields=[ph_type, ph_num]) - resource = self._callFUT([full_name, phone]) + resource = self._call_fut([full_name, phone]) self.assertEqual(len(resource), 2) self.assertEqual(resource[0], {'name': 'full_name', From ebdd51ead28833447265672294e2528f7f99d36a Mon Sep 17 00:00:00 2001 From: Danny Hermes Date: Tue, 8 Nov 2016 21:02:17 -0800 Subject: [PATCH 0037/2016] Manually fixing up bad indents / long lines after renames. --- .../unit_tests/test_job.py | 24 ++++----- .../unit_tests/test_schema.py | 17 ++++--- .../unit_tests/test_table.py | 50 +++++++++---------- 3 files changed, 46 insertions(+), 45 deletions(-) diff --git a/packages/google-cloud-bigquery/unit_tests/test_job.py b/packages/google-cloud-bigquery/unit_tests/test_job.py index 14f5a6ed20a0..67b1477f9732 100644 --- a/packages/google-cloud-bigquery/unit_tests/test_job.py +++ b/packages/google-cloud-bigquery/unit_tests/test_job.py @@ -269,7 +269,7 @@ def test_ctor_w_schema(self): full_name = SchemaField('full_name', 'STRING', mode='REQUIRED') age = SchemaField('age', 'INTEGER', mode='REQUIRED') job = self._make_one(self.JOB_NAME, table, [self.SOURCE1], client, - schema=[full_name, age]) + schema=[full_name, age]) self.assertEqual(job.schema, [full_name, age]) def test_schema_setter_non_list(self): @@ -499,7 +499,7 @@ def test_begin_w_alternate_client(self): full_name = SchemaField('full_name', 'STRING', mode='REQUIRED') age = SchemaField('age', 'INTEGER', mode='REQUIRED') job = self._make_one(self.JOB_NAME, table, [self.SOURCE1], client1, - schema=[full_name, age]) + schema=[full_name, age]) job.allow_jagged_rows = True job.allow_quoted_newlines = True @@ -998,7 +998,7 @@ def test_ctor(self): client = _Client(self.PROJECT) source = _Table(self.SOURCE_TABLE) job = self._make_one(self.JOB_NAME, source, [self.DESTINATION_URI], - client) + client) self.assertEqual(job.source, source) self.assertEqual(job.destination_uris, [self.DESTINATION_URI]) self.assertIs(job._client, client) @@ -1082,7 +1082,7 @@ def test_begin_w_bound_client(self): client = _Client(project=self.PROJECT, connection=conn) source = _Table(self.SOURCE_TABLE) job = self._make_one(self.JOB_NAME, source, [self.DESTINATION_URI], - client) + client) job.begin() @@ -1131,7 +1131,7 @@ def test_begin_w_alternate_client(self): client2 = _Client(project=self.PROJECT, connection=conn2) source = _Table(self.SOURCE_TABLE) job = self._make_one(self.JOB_NAME, source, [self.DESTINATION_URI], - client1) + client1) job.compression = 'GZIP' job.destination_format = 'NEWLINE_DELIMITED_JSON' @@ -1163,7 +1163,7 @@ def test_exists_miss_w_bound_client(self): client = _Client(project=self.PROJECT, connection=conn) source = _Table(self.SOURCE_TABLE) job = self._make_one(self.JOB_NAME, source, [self.DESTINATION_URI], - client) + client) self.assertFalse(job.exists()) @@ -1181,7 +1181,7 @@ def test_exists_hit_w_alternate_client(self): client2 = _Client(project=self.PROJECT, connection=conn2) source = _Table(self.SOURCE_TABLE) job = self._make_one(self.JOB_NAME, source, [self.DESTINATION_URI], - client1) + client1) self.assertTrue(job.exists(client=client2)) @@ -1199,7 +1199,7 @@ def test_reload_w_bound_client(self): client = _Client(project=self.PROJECT, connection=conn) source = _Table(self.SOURCE_TABLE) job = self._make_one(self.JOB_NAME, source, [self.DESTINATION_URI], - client) + client) job.reload() @@ -1218,7 +1218,7 @@ def test_reload_w_alternate_client(self): client2 = _Client(project=self.PROJECT, connection=conn2) source = _Table(self.SOURCE_TABLE) job = self._make_one(self.JOB_NAME, source, [self.DESTINATION_URI], - client1) + client1) job.reload(client=client2) @@ -1533,9 +1533,9 @@ def test_begin_w_bound_client_and_udf(self): conn = _Connection(RESOURCE) client = _Client(project=self.PROJECT, connection=conn) job = self._make_one(self.JOB_NAME, self.QUERY, client, - udf_resources=[ - UDFResource("resourceUri", RESOURCE_URI) - ]) + udf_resources=[ + UDFResource("resourceUri", RESOURCE_URI) + ]) job.begin() diff --git a/packages/google-cloud-bigquery/unit_tests/test_schema.py b/packages/google-cloud-bigquery/unit_tests/test_schema.py index 12ad43aad607..8b49bff5b29a 100644 --- a/packages/google-cloud-bigquery/unit_tests/test_schema.py +++ b/packages/google-cloud-bigquery/unit_tests/test_schema.py @@ -35,7 +35,7 @@ def test_ctor_defaults(self): def test_ctor_explicit(self): field = self._make_one('test', 'STRING', mode='REQUIRED', - description='Testing') + description='Testing') self.assertEqual(field.name, 'test') self.assertEqual(field.field_type, 'STRING') self.assertEqual(field.mode, 'REQUIRED') @@ -43,9 +43,10 @@ def test_ctor_explicit(self): self.assertIsNone(field.fields) def test_ctor_subfields(self): - field = self._make_one('phone_number', 'RECORD', - fields=[self._make_one('area_code', 'STRING'), - self._make_one('local_number', 'STRING')]) + field = self._make_one( + 'phone_number', 'RECORD', + fields=[self._make_one('area_code', 'STRING'), + self._make_one('local_number', 'STRING')]) self.assertEqual(field.name, 'phone_number') self.assertEqual(field.field_type, 'RECORD') self.assertEqual(field.mode, 'NULLABLE') @@ -91,16 +92,16 @@ def test___eq___fields_mismatch(self): def test___eq___hit(self): field = self._make_one('test', 'STRING', mode='REQUIRED', - description='Testing') + description='Testing') other = self._make_one('test', 'STRING', mode='REQUIRED', - description='Testing') + description='Testing') self.assertEqual(field, other) def test___eq___hit_case_diff_on_type(self): field = self._make_one('test', 'STRING', mode='REQUIRED', - description='Testing') + description='Testing') other = self._make_one('test', 'string', mode='REQUIRED', - description='Testing') + description='Testing') self.assertEqual(field, other) def test___eq___hit_w_fields(self): diff --git a/packages/google-cloud-bigquery/unit_tests/test_table.py b/packages/google-cloud-bigquery/unit_tests/test_table.py index 0cc3912152c3..45b784e84437 100644 --- a/packages/google-cloud-bigquery/unit_tests/test_table.py +++ b/packages/google-cloud-bigquery/unit_tests/test_table.py @@ -167,7 +167,7 @@ def test_ctor_w_schema(self): full_name = SchemaField('full_name', 'STRING', mode='REQUIRED') age = SchemaField('age', 'INTEGER', mode='REQUIRED') table = self._make_one(self.TABLE_NAME, dataset, - schema=[full_name, age]) + schema=[full_name, age]) self.assertEqual(table.schema, [full_name, age]) def test_num_bytes_getter(self): @@ -410,7 +410,7 @@ def test_create_w_bound_client(self): full_name = SchemaField('full_name', 'STRING', mode='REQUIRED') age = SchemaField('age', 'INTEGER', mode='REQUIRED') table = self._make_one(self.TABLE_NAME, dataset, - schema=[full_name, age]) + schema=[full_name, age]) table.create() @@ -440,7 +440,7 @@ def test_create_w_partition_no_expire(self): full_name = SchemaField('full_name', 'STRING', mode='REQUIRED') age = SchemaField('age', 'INTEGER', mode='REQUIRED') table = self._make_one(self.TABLE_NAME, dataset, - schema=[full_name, age]) + schema=[full_name, age]) self.assertIsNone(table.partitioning_type) table.partitioning_type = "DAY" @@ -474,7 +474,7 @@ def test_create_w_partition_and_expire(self): full_name = SchemaField('full_name', 'STRING', mode='REQUIRED') age = SchemaField('age', 'INTEGER', mode='REQUIRED') table = self._make_one(self.TABLE_NAME, dataset, - schema=[full_name, age]) + schema=[full_name, age]) self.assertIsNone(table.partition_expiration) table.partition_expiration = 100 self.assertEqual(table.partitioning_type, "DAY") @@ -507,7 +507,7 @@ def test_partition_type_setter_bad_type(self): full_name = SchemaField('full_name', 'STRING', mode='REQUIRED') age = SchemaField('age', 'INTEGER', mode='REQUIRED') table = self._make_one(self.TABLE_NAME, dataset, - schema=[full_name, age]) + schema=[full_name, age]) with self.assertRaises(ValueError): table.partitioning_type = 123 @@ -520,7 +520,7 @@ def test_partition_type_setter_unknown_value(self): full_name = SchemaField('full_name', 'STRING', mode='REQUIRED') age = SchemaField('age', 'INTEGER', mode='REQUIRED') table = self._make_one(self.TABLE_NAME, dataset, - schema=[full_name, age]) + schema=[full_name, age]) with self.assertRaises(ValueError): table.partitioning_type = "HASH" @@ -533,7 +533,7 @@ def test_partition_type_setter_w_known_value(self): full_name = SchemaField('full_name', 'STRING', mode='REQUIRED') age = SchemaField('age', 'INTEGER', mode='REQUIRED') table = self._make_one(self.TABLE_NAME, dataset, - schema=[full_name, age]) + schema=[full_name, age]) self.assertIsNone(table.partitioning_type) table.partitioning_type = 'DAY' self.assertEqual(table.partitioning_type, 'DAY') @@ -547,7 +547,7 @@ def test_partition_type_setter_w_none(self): full_name = SchemaField('full_name', 'STRING', mode='REQUIRED') age = SchemaField('age', 'INTEGER', mode='REQUIRED') table = self._make_one(self.TABLE_NAME, dataset, - schema=[full_name, age]) + schema=[full_name, age]) table._properties['timePartitioning'] = {'type': 'DAY'} table.partitioning_type = None self.assertIsNone(table.partitioning_type) @@ -562,7 +562,7 @@ def test_partition_experation_bad_type(self): full_name = SchemaField('full_name', 'STRING', mode='REQUIRED') age = SchemaField('age', 'INTEGER', mode='REQUIRED') table = self._make_one(self.TABLE_NAME, dataset, - schema=[full_name, age]) + schema=[full_name, age]) with self.assertRaises(ValueError): table.partition_expiration = "NEVER" @@ -575,7 +575,7 @@ def test_partition_expiration_w_integer(self): full_name = SchemaField('full_name', 'STRING', mode='REQUIRED') age = SchemaField('age', 'INTEGER', mode='REQUIRED') table = self._make_one(self.TABLE_NAME, dataset, - schema=[full_name, age]) + schema=[full_name, age]) self.assertIsNone(table.partition_expiration) table.partition_expiration = 100 self.assertEqual(table.partitioning_type, "DAY") @@ -590,7 +590,7 @@ def test_partition_expiration_w_none(self): full_name = SchemaField('full_name', 'STRING', mode='REQUIRED') age = SchemaField('age', 'INTEGER', mode='REQUIRED') table = self._make_one(self.TABLE_NAME, dataset, - schema=[full_name, age]) + schema=[full_name, age]) self.assertIsNone(table.partition_expiration) table._properties['timePartitioning'] = { 'type': 'DAY', @@ -609,7 +609,7 @@ def test_partition_expiration_w_none_no_partition_set(self): full_name = SchemaField('full_name', 'STRING', mode='REQUIRED') age = SchemaField('age', 'INTEGER', mode='REQUIRED') table = self._make_one(self.TABLE_NAME, dataset, - schema=[full_name, age]) + schema=[full_name, age]) self.assertIsNone(table.partition_expiration) table.partition_expiration = None self.assertIsNone(table.partitioning_type) @@ -624,7 +624,7 @@ def test_list_partitions(self): full_name = SchemaField('full_name', 'STRING', mode='REQUIRED') age = SchemaField('age', 'INTEGER', mode='REQUIRED') table = self._make_one(self.TABLE_NAME, dataset, - schema=[full_name, age]) + schema=[full_name, age]) self.assertEqual(table.list_partitions(), [20160804, 20160805]) def test_create_w_alternate_client(self): @@ -654,7 +654,7 @@ def test_create_w_alternate_client(self): full_name = SchemaField('full_name', 'STRING', mode='REQUIRED') age = SchemaField('age', 'INTEGER', mode='REQUIRED') table = self._make_one(self.TABLE_NAME, dataset=dataset, - schema=[full_name, age]) + schema=[full_name, age]) table.friendly_name = TITLE table.description = DESCRIPTION table.view_query = QUERY @@ -693,7 +693,7 @@ def test_create_w_missing_output_properties(self): full_name = SchemaField('full_name', 'STRING', mode='REQUIRED') age = SchemaField('age', 'INTEGER', mode='REQUIRED') table = self._make_one(self.TABLE_NAME, dataset, - schema=[full_name, age]) + schema=[full_name, age]) table.create() @@ -909,7 +909,7 @@ def test_update_w_bound_client(self): full_name = SchemaField('full_name', 'STRING', mode='REQUIRED') age = SchemaField('age', 'INTEGER', mode='REQUIRED') table = self._make_one(self.TABLE_NAME, dataset=dataset, - schema=[full_name, age]) + schema=[full_name, age]) table.description = DESCRIPTION table.friendly_name = TITLE @@ -1068,7 +1068,7 @@ def _bigquery_timestamp_float_repr(ts_float): age = SchemaField('age', 'INTEGER', mode='NULLABLE') joined = SchemaField('joined', 'TIMESTAMP', mode='NULLABLE') table = self._make_one(self.TABLE_NAME, dataset=dataset, - schema=[full_name, age, joined]) + schema=[full_name, age, joined]) iterator = table.fetch_data() page = six.next(iterator.pages) @@ -1135,7 +1135,7 @@ def test_fetch_data_w_alternate_client(self): voter = SchemaField('voter', 'BOOLEAN', mode='NULLABLE') score = SchemaField('score', 'FLOAT', mode='NULLABLE') table = self._make_one(self.TABLE_NAME, dataset=dataset, - schema=[full_name, age, voter, score]) + schema=[full_name, age, voter, score]) iterator = table.fetch_data( client=client2, max_results=MAX, page_token=TOKEN) @@ -1188,7 +1188,7 @@ def test_fetch_data_w_repeated_fields(self): struct = SchemaField('struct', 'RECORD', mode='REPEATED', fields=[index, score]) table = self._make_one(self.TABLE_NAME, dataset=dataset, - schema=[full_name, struct]) + schema=[full_name, struct]) iterator = table.fetch_data() page = six.next(iterator.pages) @@ -1244,7 +1244,7 @@ def test_fetch_data_w_record_schema(self): phone = SchemaField('phone', 'RECORD', mode='NULLABLE', fields=[area_code, local_number, rank]) table = self._make_one(self.TABLE_NAME, dataset=dataset, - schema=[full_name, phone]) + schema=[full_name, phone]) iterator = table.fetch_data() page = six.next(iterator.pages) @@ -1306,7 +1306,7 @@ def test_insert_data_w_bound_client(self): age = SchemaField('age', 'INTEGER', mode='REQUIRED') joined = SchemaField('joined', 'TIMESTAMP', mode='NULLABLE') table = self._make_one(self.TABLE_NAME, dataset=dataset, - schema=[full_name, age, joined]) + schema=[full_name, age, joined]) ROWS = [ ('Phred Phlyntstone', 32, WHEN), ('Bharney Rhubble', 33, WHEN + datetime.timedelta(seconds=1)), @@ -1358,7 +1358,7 @@ def test_insert_data_w_alternate_client(self): age = SchemaField('age', 'INTEGER', mode='REQUIRED') voter = SchemaField('voter', 'BOOLEAN', mode='NULLABLE') table = self._make_one(self.TABLE_NAME, dataset=dataset, - schema=[full_name, age, voter]) + schema=[full_name, age, voter]) ROWS = [ ('Phred Phlyntstone', 32, True), ('Bharney Rhubble', 33, False), @@ -1412,7 +1412,7 @@ def test_insert_data_w_repeated_fields(self): struct = SchemaField('struct', 'RECORD', mode='REPEATED', fields=[index, score]) table = self._make_one(self.TABLE_NAME, dataset=dataset, - schema=[full_name, struct]) + schema=[full_name, struct]) ROWS = [ (['red', 'green'], [{'index': [1, 2], 'score': [3.1415, 1.414]}]), ] @@ -1448,7 +1448,7 @@ def test_insert_data_w_record_schema(self): phone = SchemaField('phone', 'RECORD', mode='NULLABLE', fields=[area_code, local_number, rank]) table = self._make_one(self.TABLE_NAME, dataset=dataset, - schema=[full_name, phone]) + schema=[full_name, phone]) ROWS = [ ('Phred Phlyntstone', {'area_code': '800', 'local_number': '555-1212', @@ -1558,7 +1558,7 @@ def _upload_from_file_helper(self, **kw): age = SchemaField('age', 'INTEGER', mode='REQUIRED') joined = SchemaField('joined', 'TIMESTAMP', mode='NULLABLE') table = self._make_one(self.TABLE_NAME, dataset=dataset, - schema=[full_name, age, joined]) + schema=[full_name, age, joined]) ROWS = [ ('Phred Phlyntstone', 32, WHEN), ('Bharney Rhubble', 33, WHEN + datetime.timedelta(seconds=1)), From 2756acde9da80819bb4f1ce26693e429a511490b Mon Sep 17 00:00:00 2001 From: Danny Hermes Date: Thu, 10 Nov 2016 21:17:51 -0800 Subject: [PATCH 0038/2016] Adding quiet flag to pip command for local deps. --- packages/google-cloud-bigquery/tox.ini | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/tox.ini b/packages/google-cloud-bigquery/tox.ini index 001444516350..d53f4d83210d 100644 --- a/packages/google-cloud-bigquery/tox.ini +++ b/packages/google-cloud-bigquery/tox.ini @@ -4,7 +4,7 @@ envlist = [testing] localdeps = - pip install --upgrade {toxinidir}/../core + pip install --quiet --upgrade {toxinidir}/../core deps = pytest covercmd = From e7c19c9a43e45ae54b1541b1d90dfdccc2f44efa Mon Sep 17 00:00:00 2001 From: Danny Hermes Date: Tue, 8 Nov 2016 22:04:09 -0800 Subject: [PATCH 0039/2016] Dropping usage of _Monkey in favor of mock.patch. This was done only in bigquery, datastore and storage packages. Still needs updates in bigtable, core, logging, monitoring, pubsub and speech. --- packages/google-cloud-bigquery/tox.ini | 1 + .../google-cloud-bigquery/unit_tests/test_table.py | 12 ++++++------ 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/packages/google-cloud-bigquery/tox.ini b/packages/google-cloud-bigquery/tox.ini index d53f4d83210d..2966a8546dec 100644 --- a/packages/google-cloud-bigquery/tox.ini +++ b/packages/google-cloud-bigquery/tox.ini @@ -7,6 +7,7 @@ localdeps = pip install --quiet --upgrade {toxinidir}/../core deps = pytest + mock covercmd = py.test --quiet \ --cov=google.cloud.bigquery \ diff --git a/packages/google-cloud-bigquery/unit_tests/test_table.py b/packages/google-cloud-bigquery/unit_tests/test_table.py index 45b784e84437..d049107a0769 100644 --- a/packages/google-cloud-bigquery/unit_tests/test_table.py +++ b/packages/google-cloud-bigquery/unit_tests/test_table.py @@ -1640,11 +1640,10 @@ def test_upload_from_file_w_bound_client_multipart(self): def test_upload_from_file_resumable_with_400(self): import csv import datetime + import mock from six.moves.http_client import BAD_REQUEST - from google.cloud.bigquery import table as MUT from google.cloud.exceptions import BadRequest from google.cloud._helpers import UTC - from google.cloud._testing import _Monkey from google.cloud._testing import _NamedTemporaryFile WHEN_TS = 1437767599.006 WHEN = datetime.datetime.utcfromtimestamp(WHEN_TS).replace( @@ -1665,7 +1664,8 @@ class _UploadConfig(object): dataset = _Dataset(client) table = self._make_one(self.TABLE_NAME, dataset=dataset) - with _Monkey(MUT, _UploadConfig=_UploadConfig): + with mock.patch('google.cloud.bigquery.table._UploadConfig', + new=_UploadConfig): with _NamedTemporaryFile() as temp: with open(temp.name, 'w') as file_obj: writer = csv.writer(file_obj) @@ -1680,11 +1680,10 @@ class _UploadConfig(object): # pylint: disable=too-many-statements def test_upload_from_file_w_explicit_client_resumable(self): import json + import mock from six.moves.http_client import OK from six.moves.urllib.parse import parse_qsl from six.moves.urllib.parse import urlsplit - from google.cloud._testing import _Monkey - from google.cloud.bigquery import table as MUT UPLOAD_PATH = 'https://example.com/upload/test' initial_response = {'status': OK, 'location': UPLOAD_PATH} @@ -1703,7 +1702,8 @@ class _UploadConfig(object): simple_multipart = True simple_path = u'' # force resumable - with _Monkey(MUT, _UploadConfig=_UploadConfig): + with mock.patch('google.cloud.bigquery.table._UploadConfig', + new=_UploadConfig): orig_requested, PATH, BODY = self._upload_from_file_helper( allow_jagged_rows=False, allow_quoted_newlines=False, From ff473145c92fbd1a84ade54f1d5e24a7f0fc1a2a Mon Sep 17 00:00:00 2001 From: Danny Hermes Date: Mon, 7 Nov 2016 18:25:50 -0800 Subject: [PATCH 0040/2016] Updating connection -> _connection attribute in some packages. In particular: bigquery, bigtable and datastore. (The only change in bigtable was an import, and that attribute should probably go elsewhere.) --- .../google/cloud/bigquery/_http.py | 4 ++-- .../google/cloud/bigquery/dataset.py | 14 ++++++------- .../google/cloud/bigquery/job.py | 10 +++++----- .../google/cloud/bigquery/query.py | 8 ++++---- .../google/cloud/bigquery/table.py | 18 ++++++++--------- .../unit_tests/test_client.py | 20 +++++++++---------- .../unit_tests/test_dataset.py | 2 +- .../unit_tests/test_job.py | 2 +- .../unit_tests/test_query.py | 2 +- .../unit_tests/test_table.py | 2 +- 10 files changed, 41 insertions(+), 41 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/_http.py b/packages/google-cloud-bigquery/google/cloud/bigquery/_http.py index 85a25d643d1f..fd5bb3cb8b23 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/_http.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/_http.py @@ -14,10 +14,10 @@ """Create / interact with Google BigQuery connections.""" -from google.cloud import connection as base_connection +from google.cloud import _http -class Connection(base_connection.JSONConnection): +class Connection(_http.JSONConnection): """A connection to Google BigQuery via the JSON REST API.""" API_BASE_URL = 'https://www.googleapis.com' diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/dataset.py b/packages/google-cloud-bigquery/google/cloud/bigquery/dataset.py index f29fdbc8a243..e209433b5e10 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/dataset.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/dataset.py @@ -422,7 +422,7 @@ def create(self, client=None): """ client = self._require_client(client) path = '/projects/%s/datasets' % (self.project,) - api_response = client.connection.api_request( + api_response = client._connection.api_request( method='POST', path=path, data=self._build_resource()) self._set_properties(api_response) @@ -443,8 +443,8 @@ def exists(self, client=None): client = self._require_client(client) try: - client.connection.api_request(method='GET', path=self.path, - query_params={'fields': 'id'}) + client._connection.api_request(method='GET', path=self.path, + query_params={'fields': 'id'}) except NotFound: return False else: @@ -463,7 +463,7 @@ def reload(self, client=None): """ client = self._require_client(client) - api_response = client.connection.api_request( + api_response = client._connection.api_request( method='GET', path=self.path) self._set_properties(api_response) @@ -502,7 +502,7 @@ def patch(self, client=None, **kw): if 'location' in kw: partial['location'] = kw['location'] - api_response = client.connection.api_request( + api_response = client._connection.api_request( method='PATCH', path=self.path, data=partial) self._set_properties(api_response) @@ -518,7 +518,7 @@ def update(self, client=None): ``client`` stored on the current dataset. """ client = self._require_client(client) - api_response = client.connection.api_request( + api_response = client._connection.api_request( method='PUT', path=self.path, data=self._build_resource()) self._set_properties(api_response) @@ -534,7 +534,7 @@ def delete(self, client=None): ``client`` stored on the current dataset. """ client = self._require_client(client) - client.connection.api_request(method='DELETE', path=self.path) + client._connection.api_request(method='DELETE', path=self.path) def list_tables(self, max_results=None, page_token=None): """List tables for the project associated with this client. diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/job.py b/packages/google-cloud-bigquery/google/cloud/bigquery/job.py index 7766c120c5cd..203dd2df6dd0 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/job.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/job.py @@ -316,7 +316,7 @@ def begin(self, client=None): client = self._require_client(client) path = '/projects/%s/jobs' % (self.project,) - api_response = client.connection.api_request( + api_response = client._connection.api_request( method='POST', path=path, data=self._build_resource()) self._set_properties(api_response) @@ -337,8 +337,8 @@ def exists(self, client=None): client = self._require_client(client) try: - client.connection.api_request(method='GET', path=self.path, - query_params={'fields': 'id'}) + client._connection.api_request(method='GET', path=self.path, + query_params={'fields': 'id'}) except NotFound: return False else: @@ -357,7 +357,7 @@ def reload(self, client=None): """ client = self._require_client(client) - api_response = client.connection.api_request( + api_response = client._connection.api_request( method='GET', path=self.path) self._set_properties(api_response) @@ -374,7 +374,7 @@ def cancel(self, client=None): """ client = self._require_client(client) - api_response = client.connection.api_request( + api_response = client._connection.api_request( method='POST', path='%s/cancel' % (self.path,)) self._set_properties(api_response['job']) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/query.py b/packages/google-cloud-bigquery/google/cloud/bigquery/query.py index 5c7c3681a539..fa1b1da63883 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/query.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/query.py @@ -334,7 +334,7 @@ def run(self, client=None): client = self._require_client(client) path = '/projects/%s/queries' % (self.project,) - api_response = client.connection.api_request( + api_response = client._connection.api_request( method='POST', path=path, data=self._build_resource()) self._set_properties(api_response) @@ -392,9 +392,9 @@ def fetch_data(self, max_results=None, page_token=None, start_index=None, params['timeoutMs'] = timeout_ms path = '/projects/%s/queries/%s' % (self.project, self.name) - response = client.connection.api_request(method='GET', - path=path, - query_params=params) + response = client._connection.api_request(method='GET', + path=path, + query_params=params) self._set_properties(response) total_rows = response.get('totalRows') diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py index 870d8520159e..f8b52f772f87 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py @@ -492,7 +492,7 @@ def create(self, client=None): client = self._require_client(client) path = '/projects/%s/datasets/%s/tables' % ( self._dataset.project, self._dataset.name) - api_response = client.connection.api_request( + api_response = client._connection.api_request( method='POST', path=path, data=self._build_resource()) self._set_properties(api_response) @@ -513,8 +513,8 @@ def exists(self, client=None): client = self._require_client(client) try: - client.connection.api_request(method='GET', path=self.path, - query_params={'fields': 'id'}) + client._connection.api_request(method='GET', path=self.path, + query_params={'fields': 'id'}) except NotFound: return False else: @@ -533,7 +533,7 @@ def reload(self, client=None): """ client = self._require_client(client) - api_response = client.connection.api_request( + api_response = client._connection.api_request( method='GET', path=self.path) self._set_properties(api_response) @@ -608,7 +608,7 @@ def patch(self, partial['schema'] = { 'fields': _build_schema_resource(schema)} - api_response = client.connection.api_request( + api_response = client._connection.api_request( method='PATCH', path=self.path, data=partial) self._set_properties(api_response) @@ -624,7 +624,7 @@ def update(self, client=None): ``client`` stored on the current dataset. """ client = self._require_client(client) - api_response = client.connection.api_request( + api_response = client._connection.api_request( method='PUT', path=self.path, data=self._build_resource()) self._set_properties(api_response) @@ -640,7 +640,7 @@ def delete(self, client=None): ``client`` stored on the current dataset. """ client = self._require_client(client) - client.connection.api_request(method='DELETE', path=self.path) + client._connection.api_request(method='DELETE', path=self.path) def fetch_data(self, max_results=None, page_token=None, client=None): """API call: fetch the table data via a GET request @@ -764,7 +764,7 @@ def insert_data(self, if template_suffix is not None: data['templateSuffix'] = template_suffix - response = client.connection.api_request( + response = client._connection.api_request( method='POST', path='%s/insertAll' % self.path, data=data) @@ -885,7 +885,7 @@ def upload_from_file(self, a file opened in text mode. """ client = self._require_client(client) - connection = client.connection + connection = client._connection content_type = 'application/octet-stream' # Rewind the file if desired. diff --git a/packages/google-cloud-bigquery/unit_tests/test_client.py b/packages/google-cloud-bigquery/unit_tests/test_client.py index 9034658692d9..61ad81227aee 100644 --- a/packages/google-cloud-bigquery/unit_tests/test_client.py +++ b/packages/google-cloud-bigquery/unit_tests/test_client.py @@ -31,9 +31,9 @@ def test_ctor(self): creds = _Credentials() http = object() client = self._make_one(project=PROJECT, credentials=creds, http=http) - self.assertIsInstance(client.connection, Connection) - self.assertIs(client.connection.credentials, creds) - self.assertIs(client.connection.http, http) + self.assertIsInstance(client._connection, Connection) + self.assertIs(client._connection.credentials, creds) + self.assertIs(client._connection.http, http) def test_list_projects_defaults(self): import six @@ -59,7 +59,7 @@ def test_list_projects_defaults(self): } creds = _Credentials() client = self._make_one(PROJECT_1, creds) - conn = client.connection = _Connection(DATA) + conn = client._connection = _Connection(DATA) iterator = client.list_projects() page = six.next(iterator.pages) @@ -88,7 +88,7 @@ def test_list_projects_explicit_response_missing_projects_key(self): DATA = {} creds = _Credentials() client = self._make_one(PROJECT, creds) - conn = client.connection = _Connection(DATA) + conn = client._connection = _Connection(DATA) iterator = client.list_projects(max_results=3, page_token=TOKEN) page = six.next(iterator.pages) @@ -130,7 +130,7 @@ def test_list_datasets_defaults(self): } creds = _Credentials() client = self._make_one(PROJECT, creds) - conn = client.connection = _Connection(DATA) + conn = client._connection = _Connection(DATA) iterator = client.list_datasets() page = six.next(iterator.pages) @@ -158,7 +158,7 @@ def test_list_datasets_explicit_response_missing_datasets_key(self): DATA = {} creds = _Credentials() client = self._make_one(PROJECT, creds) - conn = client.connection = _Connection(DATA) + conn = client._connection = _Connection(DATA) iterator = client.list_datasets( include_all=True, max_results=3, page_token=TOKEN) @@ -306,7 +306,7 @@ def test_list_jobs_defaults(self): } creds = _Credentials() client = self._make_one(PROJECT, creds) - conn = client.connection = _Connection(DATA) + conn = client._connection = _Connection(DATA) iterator = client.list_jobs() page = six.next(iterator.pages) @@ -362,7 +362,7 @@ def test_list_jobs_load_job_wo_sourceUris(self): } creds = _Credentials() client = self._make_one(PROJECT, creds) - conn = client.connection = _Connection(DATA) + conn = client._connection = _Connection(DATA) iterator = client.list_jobs() page = six.next(iterator.pages) @@ -390,7 +390,7 @@ def test_list_jobs_explicit_missing(self): TOKEN = 'TOKEN' creds = _Credentials() client = self._make_one(PROJECT, creds) - conn = client.connection = _Connection(DATA) + conn = client._connection = _Connection(DATA) iterator = client.list_jobs(max_results=1000, page_token=TOKEN, all_users=True, state_filter='done') diff --git a/packages/google-cloud-bigquery/unit_tests/test_dataset.py b/packages/google-cloud-bigquery/unit_tests/test_dataset.py index 1493d266d44f..ec7c56722368 100644 --- a/packages/google-cloud-bigquery/unit_tests/test_dataset.py +++ b/packages/google-cloud-bigquery/unit_tests/test_dataset.py @@ -785,7 +785,7 @@ class _Client(object): def __init__(self, project='project', connection=None): self.project = project - self.connection = connection + self._connection = connection class _Connection(object): diff --git a/packages/google-cloud-bigquery/unit_tests/test_job.py b/packages/google-cloud-bigquery/unit_tests/test_job.py index 67b1477f9732..c73715262ba4 100644 --- a/packages/google-cloud-bigquery/unit_tests/test_job.py +++ b/packages/google-cloud-bigquery/unit_tests/test_job.py @@ -1648,7 +1648,7 @@ class _Client(object): def __init__(self, project='project', connection=None): self.project = project - self.connection = connection + self._connection = connection def dataset(self, name): from google.cloud.bigquery.dataset import Dataset diff --git a/packages/google-cloud-bigquery/unit_tests/test_query.py b/packages/google-cloud-bigquery/unit_tests/test_query.py index 58bc8ced8e1b..43aedf334ce0 100644 --- a/packages/google-cloud-bigquery/unit_tests/test_query.py +++ b/packages/google-cloud-bigquery/unit_tests/test_query.py @@ -440,7 +440,7 @@ class _Client(object): def __init__(self, project='project', connection=None): self.project = project - self.connection = connection + self._connection = connection def dataset(self, name): from google.cloud.bigquery.dataset import Dataset diff --git a/packages/google-cloud-bigquery/unit_tests/test_table.py b/packages/google-cloud-bigquery/unit_tests/test_table.py index d049107a0769..73fd84cec5e3 100644 --- a/packages/google-cloud-bigquery/unit_tests/test_table.py +++ b/packages/google-cloud-bigquery/unit_tests/test_table.py @@ -1880,7 +1880,7 @@ class _Client(object): def __init__(self, project='project', connection=None): self.project = project - self.connection = connection + self._connection = connection def job_from_resource(self, resource): # pylint: disable=unused-argument return self._job From 5d140a34cf1905eece9d3c972a3058b01fdae0d9 Mon Sep 17 00:00:00 2001 From: Danny Hermes Date: Mon, 14 Nov 2016 12:44:19 -0800 Subject: [PATCH 0041/2016] Upgrading core to version to 0.21.0. As a result, also upgrading the umbrella package and all packages to 0.21.0 (since they all depend on core). --- packages/google-cloud-bigquery/setup.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/packages/google-cloud-bigquery/setup.py b/packages/google-cloud-bigquery/setup.py index a54bfd655f48..82a9e42d3458 100644 --- a/packages/google-cloud-bigquery/setup.py +++ b/packages/google-cloud-bigquery/setup.py @@ -50,12 +50,12 @@ REQUIREMENTS = [ - 'google-cloud-core >= 0.20.0', + 'google-cloud-core >= 0.21.0', ] setup( name='google-cloud-bigquery', - version='0.20.0', + version='0.21.0', description='Python Client for Google BigQuery', long_description=README, namespace_packages=[ From a78aed74aa0b12e25f71c4e0d6a2a7a687d77cb0 Mon Sep 17 00:00:00 2001 From: Danny Hermes Date: Mon, 14 Nov 2016 14:11:34 -0800 Subject: [PATCH 0042/2016] Need to install from local deps first. The `pip install --upgrade` still is needed to ensure freshness but by removing the filesystem paths from deps we made the initial install grab from PyPI (by mistake). This way, all local package deps are grabbed from the local filesystem. --- packages/google-cloud-bigquery/tox.ini | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/packages/google-cloud-bigquery/tox.ini b/packages/google-cloud-bigquery/tox.ini index 2966a8546dec..51d238d48c1c 100644 --- a/packages/google-cloud-bigquery/tox.ini +++ b/packages/google-cloud-bigquery/tox.ini @@ -6,8 +6,9 @@ envlist = localdeps = pip install --quiet --upgrade {toxinidir}/../core deps = - pytest + {toxinidir}/../core mock + pytest covercmd = py.test --quiet \ --cov=google.cloud.bigquery \ @@ -17,7 +18,6 @@ covercmd = [testenv] commands = - {[testing]localdeps} py.test --quiet {posargs} unit_tests deps = {[testing]deps} @@ -26,7 +26,6 @@ deps = basepython = python2.7 commands = - {[testing]localdeps} {[testing]covercmd} deps = {[testenv]deps} From 6e1dbd5f694761b1728ce99b554d0b897f622761 Mon Sep 17 00:00:00 2001 From: Danny Hermes Date: Mon, 14 Nov 2016 14:58:42 -0800 Subject: [PATCH 0043/2016] Fixing accidental removal of {localdeps} Also - adding RTD dependency for runtimeconfig. - adding local paths to umbrella tox config "deps" as was done in #2733. --- packages/google-cloud-bigquery/tox.ini | 2 ++ 1 file changed, 2 insertions(+) diff --git a/packages/google-cloud-bigquery/tox.ini b/packages/google-cloud-bigquery/tox.ini index 51d238d48c1c..2d142ba71df6 100644 --- a/packages/google-cloud-bigquery/tox.ini +++ b/packages/google-cloud-bigquery/tox.ini @@ -18,6 +18,7 @@ covercmd = [testenv] commands = + {[testing]localdeps} py.test --quiet {posargs} unit_tests deps = {[testing]deps} @@ -26,6 +27,7 @@ deps = basepython = python2.7 commands = + {[testing]localdeps} {[testing]covercmd} deps = {[testenv]deps} From 5d28cce83aebeef18163fd625e893496f31e7b53 Mon Sep 17 00:00:00 2001 From: Thomas Schultz Date: Wed, 16 Nov 2016 11:09:27 -0500 Subject: [PATCH 0044/2016] Set core version compatible specifier to packages. --- packages/google-cloud-bigquery/setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/setup.py b/packages/google-cloud-bigquery/setup.py index 82a9e42d3458..36b000fec6d2 100644 --- a/packages/google-cloud-bigquery/setup.py +++ b/packages/google-cloud-bigquery/setup.py @@ -50,7 +50,7 @@ REQUIREMENTS = [ - 'google-cloud-core >= 0.21.0', + 'google-cloud-core >= 0.21.0, < 0.22dev', ] setup( From d745ae14e0c729451ab60fc0f2d8993a322229ba Mon Sep 17 00:00:00 2001 From: Tres Seaver Date: Mon, 28 Nov 2016 16:02:20 -0500 Subject: [PATCH 0045/2016] Correctly model JSON repr of complex nested records. Closes #2354. --- .../google/cloud/bigquery/_helpers.py | 9 +- .../unit_tests/test__helpers.py | 97 ++++++++++++++++++- .../unit_tests/test_table.py | 15 ++- 3 files changed, 109 insertions(+), 12 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py b/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py index 89eb390993c6..a92ca9f9b143 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py @@ -58,10 +58,11 @@ def _record_from_json(value, field): """Coerce 'value' to a mapping, if set or not nullable.""" if _not_null(value, field): record = {} - for subfield, cell in zip(field.fields, value['f']): + record_iter = zip(field.fields, value['f']) + for subfield, cell in record_iter: converter = _CELLDATA_FROM_JSON[subfield.field_type] - if field.mode == 'REPEATED': - value = [converter(item, subfield) for item in cell['v']] + if subfield.mode == 'REPEATED': + value = [converter(item['v'], subfield) for item in cell['v']] else: value = converter(cell['v'], subfield) record[subfield.name] = value @@ -103,7 +104,7 @@ def _row_from_json(row, schema): for field, cell in zip(schema, row['f']): converter = _CELLDATA_FROM_JSON[field.field_type] if field.mode == 'REPEATED': - row_data.append([converter(item, field) + row_data.append([converter(item['v'], field) for item in cell['v']]) else: row_data.append(converter(cell['v'], field)) diff --git a/packages/google-cloud-bigquery/unit_tests/test__helpers.py b/packages/google-cloud-bigquery/unit_tests/test__helpers.py index 46c58c8ea405..c3bae86e76ae 100644 --- a/packages/google-cloud-bigquery/unit_tests/test__helpers.py +++ b/packages/google-cloud-bigquery/unit_tests/test__helpers.py @@ -186,7 +186,7 @@ def test_w_scalar_subfield(self): def test_w_repeated_subfield(self): subfield = _Field('REPEATED', 'color', 'STRING') field = _Field('REQUIRED', fields=[subfield]) - value = {'f': [{'v': ['red', 'yellow', 'blue']}]} + value = {'f': [{'v': [{'v': 'red'}, {'v': 'yellow'}, {'v': 'blue'}]}]} coerced = self._call_fut(value, field) self.assertEqual(coerced, {'color': ['red', 'yellow', 'blue']}) @@ -234,6 +234,97 @@ def test_w_string_value(self): self.assertEqual(coerced, 'Wonderful!') +class Test_row_from_json(unittest.TestCase): + + def _call_fut(self, row, schema): + from google.cloud.bigquery._helpers import _row_from_json + return _row_from_json(row, schema) + + def test_w_single_scalar_column(self): + # SELECT 1 AS col + col = _Field('REQUIRED', 'col', 'INTEGER') + row = {u'f': [{u'v': u'1'}]} + self.assertEqual(self._call_fut(row, schema=[col]), (1,)) + + def test_w_single_struct_column(self): + # SELECT (1, 2) AS col + sub_1 = _Field('REQUIRED', 'sub_1', 'INTEGER') + sub_2 = _Field('REQUIRED', 'sub_2', 'INTEGER') + col = _Field('REQUIRED', 'col', 'RECORD', fields=[sub_1, sub_2]) + row = {u'f': [{u'v': {u'f': [{u'v': u'1'}, {u'v': u'2'}]}}]} + self.assertEqual(self._call_fut(row, schema=[col]), + ({'sub_1': 1, 'sub_2': 2},)) + + def test_w_single_array_column(self): + # SELECT [1, 2, 3] as col + col = _Field('REPEATED', 'col', 'INTEGER') + row = {u'f': [{u'v': [{u'v': u'1'}, {u'v': u'2'}, {u'v': u'3'}]}]} + self.assertEqual(self._call_fut(row, schema=[col]), + ([1, 2, 3],)) + + def test_w_struct_w_nested_array_column(self): + # SELECT ([1, 2], 3, [4, 5]) as col + first = _Field('REPEATED', 'first', 'INTEGER') + second = _Field('REQUIRED', 'second', 'INTEGER') + third = _Field('REPEATED', 'third', 'INTEGER') + col = _Field('REQUIRED', 'col', 'RECORD', + fields=[first, second, third]) + row = { + u'f': [ + {u'v': { + u'f': [ + {u'v': [{u'v': u'1'}, {u'v': u'2'}]}, + {u'v': u'3'}, + {u'v': [{u'v': u'4'}, {u'v': u'5'}]} + ] + }}, + ] + } + self.assertEqual( + self._call_fut(row, schema=[col]), + ({u'first': [1, 2], u'second': 3, u'third': [4, 5]},)) + + def test_w_array_of_struct(self): + # SELECT [(1, 2, 3), (4, 5, 6)] as col + first = _Field('REQUIRED', 'first', 'INTEGER') + second = _Field('REQUIRED', 'second', 'INTEGER') + third = _Field('REQUIRED', 'third', 'INTEGER') + col = _Field('REPEATED', 'col', 'RECORD', + fields=[first, second, third]) + row = {u'f': [{u'v': [ + {u'v': {u'f': [{u'v': u'1'}, {u'v': u'2'}, {u'v': u'3'}]}}, + {u'v': {u'f': [{u'v': u'4'}, {u'v': u'5'}, {u'v': u'6'}]}}, + ]}]} + self.assertEqual( + self._call_fut(row, schema=[col]), + ([ + {u'first': 1, u'second': 2, u'third': 3}, + {u'first': 4, u'second': 5, u'third': 6}, + ],)) + + def test_w_array_of_struct_w_array(self): + # SELECT [([1, 2, 3], 4), ([5, 6], 7)] + first = _Field('REPEATED', 'first', 'INTEGER') + second = _Field('REQUIRED', 'second', 'INTEGER') + col = _Field('REPEATED', 'col', 'RECORD', fields=[first, second]) + row = {u'f': [{u'v': [ + {u'v': {u'f': [ + {u'v': [{u'v': u'1'}, {u'v': u'2'}, {u'v': u'3'}]}, + {u'v': u'4'} + ]}}, + {u'v': {u'f': [ + {u'v': [{u'v': u'5'}, {u'v': u'6'}]}, + {u'v': u'7'} + ]}} + ]}]} + self.assertEqual( + self._call_fut(row, schema=[col]), + ([ + {u'first': [1, 2, 3], u'second': 4}, + {u'first': [5, 6], u'second': 7}, + ],)) + + class Test_rows_from_json(unittest.TestCase): def _call_fut(self, value, field): @@ -253,12 +344,12 @@ def test_w_record_subfield(self): {'f': [ {'v': 'Phred Phlyntstone'}, {'v': {'f': [{'v': '800'}, {'v': '555-1212'}, {'v': 1}]}}, - {'v': ['orange', 'black']}, + {'v': [{'v': 'orange'}, {'v': 'black'}]}, ]}, {'f': [ {'v': 'Bharney Rhubble'}, {'v': {'f': [{'v': '877'}, {'v': '768-5309'}, {'v': 2}]}}, - {'v': ['brown']}, + {'v': [{'v': 'brown'}]}, ]}, {'f': [ {'v': 'Wylma Phlyntstone'}, diff --git a/packages/google-cloud-bigquery/unit_tests/test_table.py b/packages/google-cloud-bigquery/unit_tests/test_table.py index 73fd84cec5e3..9fcea12d2dce 100644 --- a/packages/google-cloud-bigquery/unit_tests/test_table.py +++ b/packages/google-cloud-bigquery/unit_tests/test_table.py @@ -1173,22 +1173,27 @@ def test_fetch_data_w_repeated_fields(self): 'pageToken': TOKEN, 'rows': [ {'f': [ - {'v': ['red', 'green']}, - {'v': [{'f': [{'v': ['1', '2']}, - {'v': ['3.1415', '1.414']}]}]}, + {'v': [{'v': 'red'}, {'v': 'green'}]}, + {'v': [{ + 'v': { + 'f': [ + {'v': [{'v': '1'}, {'v': '2'}]}, + {'v': [{'v': '3.1415'}, {'v': '1.414'}]}, + ]} + }]}, ]}, ] } conn = _Connection(DATA) client = _Client(project=self.PROJECT, connection=conn) dataset = _Dataset(client) - full_name = SchemaField('color', 'STRING', mode='REPEATED') + color = SchemaField('color', 'STRING', mode='REPEATED') index = SchemaField('index', 'INTEGER', 'REPEATED') score = SchemaField('score', 'FLOAT', 'REPEATED') struct = SchemaField('struct', 'RECORD', mode='REPEATED', fields=[index, score]) table = self._make_one(self.TABLE_NAME, dataset=dataset, - schema=[full_name, struct]) + schema=[color, struct]) iterator = table.fetch_data() page = six.next(iterator.pages) From c27ad7ed617707c45e5b982d9a185eff9e1fc495 Mon Sep 17 00:00:00 2001 From: Tres Seaver Date: Thu, 1 Dec 2016 11:57:05 -0500 Subject: [PATCH 0046/2016] Add support for standard SQL 'BOOL' type. Alias for 'BOOLEAN' in legacy SQL. --- .../google/cloud/bigquery/_helpers.py | 1 + .../unit_tests/test__helpers.py | 16 ++++++++++------ 2 files changed, 11 insertions(+), 6 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py b/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py index 89eb390993c6..fc6683af916a 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py @@ -79,6 +79,7 @@ def _string_from_json(value, _): 'FLOAT': _float_from_json, 'FLOAT64': _float_from_json, 'BOOLEAN': _bool_from_json, + 'BOOL': _bool_from_json, 'TIMESTAMP': _datetime_from_json, 'DATE': _date_from_json, 'RECORD': _record_from_json, diff --git a/packages/google-cloud-bigquery/unit_tests/test__helpers.py b/packages/google-cloud-bigquery/unit_tests/test__helpers.py index 46c58c8ea405..c42ac56f680e 100644 --- a/packages/google-cloud-bigquery/unit_tests/test__helpers.py +++ b/packages/google-cloud-bigquery/unit_tests/test__helpers.py @@ -284,33 +284,37 @@ def test_w_record_subfield(self): coerced = self._call_fut(rows, schema) self.assertEqual(coerced, expected) - def test_w_int64_float64(self): - # "Standard" SQL dialect uses 'INT64', 'FLOAT64'. + def test_w_int64_float64_bool(self): + # "Standard" SQL dialect uses 'INT64', 'FLOAT64', 'BOOL'. candidate = _Field('REQUIRED', 'candidate', 'STRING') votes = _Field('REQUIRED', 'votes', 'INT64') percentage = _Field('REQUIRED', 'percentage', 'FLOAT64') - schema = [candidate, votes, percentage] + incumbent = _Field('REQUIRED', 'incumbent', 'BOOL') + schema = [candidate, votes, percentage, incumbent] rows = [ {'f': [ {'v': 'Phred Phlyntstone'}, {'v': 8}, {'v': 0.25}, + {'v': 'true'}, ]}, {'f': [ {'v': 'Bharney Rhubble'}, {'v': 4}, {'v': 0.125}, + {'v': 'false'}, ]}, {'f': [ {'v': 'Wylma Phlyntstone'}, {'v': 20}, {'v': 0.625}, + {'v': 'false'}, ]}, ] expected = [ - ('Phred Phlyntstone', 8, 0.25), - ('Bharney Rhubble', 4, 0.125), - ('Wylma Phlyntstone', 20, 0.625), + ('Phred Phlyntstone', 8, 0.25, True), + ('Bharney Rhubble', 4, 0.125, False), + ('Wylma Phlyntstone', 20, 0.625, False), ] coerced = self._call_fut(rows, schema) self.assertEqual(coerced, expected) From 7534d2097ab21a8425f4711a01a13ab33087f887 Mon Sep 17 00:00:00 2001 From: Tres Seaver Date: Fri, 2 Dec 2016 17:52:42 -0500 Subject: [PATCH 0047/2016] Add support for query parameters (#2776) * Add 'ScalarQueryParameter' class. Holds name, type, and value for scalar query parameters, and handles marshalling them to / from JSON representation mandated by the BigQuery API. * Factor out 'AbstractQueryParameter. * Add 'ArrayQueryParameter' class. Holds name, type, and value for array query parameters, and handles marshalling them to / from JSON representation mandated by the BigQuery API. * Add 'StructQueryParameter' class. Holds name, types, and values for Struct query parameters, and handles marshalling them to / from JSON representation mandated by the BigQuery API. * Add 'QueryParametersProperty' descriptor class. * Add 'query_parameters' property to 'QueryResults' and 'QueryJob'. * Plumb 'udf_resources'/'query_parameters' through client query factories. * Expose concrete query parameter classes as package APIs. Closes #2551. --- .../google/cloud/bigquery/__init__.py | 3 + .../google/cloud/bigquery/_helpers.py | 285 +++++++++++++- .../google/cloud/bigquery/client.py | 35 +- .../google/cloud/bigquery/job.py | 30 +- .../google/cloud/bigquery/query.py | 28 +- .../unit_tests/test__helpers.py | 357 ++++++++++++++++++ .../unit_tests/test_client.py | 89 ++++- .../unit_tests/test_job.py | 271 +++++++++---- .../unit_tests/test_query.py | 118 +++++- 9 files changed, 1125 insertions(+), 91 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/__init__.py b/packages/google-cloud-bigquery/google/cloud/bigquery/__init__.py index cde9432d83e2..9b5809af6cf7 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/__init__.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/__init__.py @@ -23,6 +23,9 @@ """ +from google.cloud.bigquery._helpers import ArrayQueryParameter +from google.cloud.bigquery._helpers import ScalarQueryParameter +from google.cloud.bigquery._helpers import StructQueryParameter from google.cloud.bigquery.client import Client from google.cloud.bigquery.dataset import AccessGrant from google.cloud.bigquery.dataset import Dataset diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py b/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py index f68018e706cc..202a39ac8447 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py @@ -14,6 +14,8 @@ """Shared helper functions for BigQuery API classes.""" +from collections import OrderedDict + from google.cloud._helpers import _datetime_from_microseconds from google.cloud._helpers import _date_from_iso8601_date @@ -230,16 +232,279 @@ def __set__(self, instance, value): instance._udf_resources = tuple(value) -def _build_udf_resources(resources): +class AbstractQueryParameter(object): + """Base class for named / positional query parameters. """ - :type resources: sequence of :class:`UDFResource` - :param resources: fields to be appended. + @classmethod + def from_api_repr(cls, resource): + """Factory: construct paramter from JSON resource. + + :type resource: dict + :param resource: JSON mapping of parameter + + :rtype: :class:`ScalarQueryParameter` + """ + raise NotImplementedError + + def to_api_repr(self): + """Construct JSON API representation for the parameter. + + :rtype: dict + """ + raise NotImplementedError + - :rtype: mapping - :returns: a mapping describing userDefinedFunctionResources for the query. +class ScalarQueryParameter(AbstractQueryParameter): + """Named / positional query parameters for scalar values. + + :type name: str or None + :param name: Parameter name, used via ``@foo`` syntax. If None, the + paramter can only be addressed via position (``?``). + + :type type_: str + :param type_: name of parameter type. One of `'STRING'`, `'INT64'`, + `'FLOAT64'`, `'BOOL'`, `'TIMESTAMP'`, or `'DATE'`. + + :type value: str, int, float, bool, :class:`datetime.datetime`, or + :class:`datetime.date`. + :param value: the scalar parameter value. """ - udfs = [] - for resource in resources: - udf = {resource.udf_type: resource.value} - udfs.append(udf) - return udfs + def __init__(self, name, type_, value): + self.name = name + self.type_ = type_ + self.value = value + + @classmethod + def positional(cls, type_, value): + """Factory for positional paramters. + + :type type_: str + :param type_: name of paramter type. One of `'STRING'`, `'INT64'`, + `'FLOAT64'`, `'BOOL'`, `'TIMESTAMP'`, or `'DATE'`. + + :type value: str, int, float, bool, :class:`datetime.datetime`, or + :class:`datetime.date`. + :param value: the scalar parameter value. + + :rtype: :class:`ScalarQueryParameter` + :returns: instance without name + """ + return cls(None, type_, value) + + @classmethod + def from_api_repr(cls, resource): + """Factory: construct paramter from JSON resource. + + :type resource: dict + :param resource: JSON mapping of parameter + + :rtype: :class:`ScalarQueryParameter` + :returns: instance + """ + name = resource.get('name') + type_ = resource['parameterType']['type'] + value = resource['parameterValue']['value'] + converted = _CELLDATA_FROM_JSON[type_](value, None) + return cls(name, type_, converted) + + def to_api_repr(self): + """Construct JSON API representation for the parameter. + + :rtype: dict + :returns: JSON mapping + """ + resource = { + 'parameterType': { + 'type': self.type_, + }, + 'parameterValue': { + 'value': self.value, + }, + } + if self.name is not None: + resource['name'] = self.name + return resource + + +class ArrayQueryParameter(AbstractQueryParameter): + """Named / positional query parameters for array values. + + :type name: str or None + :param name: Parameter name, used via ``@foo`` syntax. If None, the + paramter can only be addressed via position (``?``). + + :type array_type: str + :param array_type: + name of type of array elements. One of `'STRING'`, `'INT64'`, + `'FLOAT64'`, `'BOOL'`, `'TIMESTAMP'`, or `'DATE'`. + + :type values: list of appropriate scalar type. + :param values: the parameter array values. + """ + def __init__(self, name, array_type, values): + self.name = name + self.array_type = array_type + self.values = values + + @classmethod + def positional(cls, array_type, values): + """Factory for positional paramters. + + :type array_type: str + :param array_type: + name of type of array elements. One of `'STRING'`, `'INT64'`, + `'FLOAT64'`, `'BOOL'`, `'TIMESTAMP'`, or `'DATE'`. + + :type values: list of appropriate scalar type + :param values: the parameter array values. + + :rtype: :class:`ArrayQueryParameter` + :returns: instance without name + """ + return cls(None, array_type, values) + + @classmethod + def from_api_repr(cls, resource): + """Factory: construct paramter from JSON resource. + + :type resource: dict + :param resource: JSON mapping of parameter + + :rtype: :class:`ArrayQueryParameter` + :returns: instance + """ + name = resource.get('name') + array_type = resource['parameterType']['arrayType'] + values = resource['parameterValue']['arrayValues'] + converted = [ + _CELLDATA_FROM_JSON[array_type](value, None) for value in values] + return cls(name, array_type, converted) + + def to_api_repr(self): + """Construct JSON API representation for the parameter. + + :rtype: dict + :returns: JSON mapping + """ + resource = { + 'parameterType': { + 'arrayType': self.array_type, + }, + 'parameterValue': { + 'arrayValues': self.values, + }, + } + if self.name is not None: + resource['name'] = self.name + return resource + + +class StructQueryParameter(AbstractQueryParameter): + """Named / positional query parameters for struct values. + + :type name: str or None + :param name: Parameter name, used via ``@foo`` syntax. If None, the + paramter can only be addressed via position (``?``). + + :type sub_params: tuple of :class:`ScalarQueryParameter` + :param sub_params: the sub-parameters for the struct + """ + def __init__(self, name, *sub_params): + self.name = name + self.struct_types = OrderedDict( + (sub.name, sub.type_) for sub in sub_params) + self.struct_values = {sub.name: sub.value for sub in sub_params} + + @classmethod + def positional(cls, *sub_params): + """Factory for positional paramters. + + :type sub_params: tuple of :class:`ScalarQueryParameter` + :param sub_params: the sub-parameters for the struct + + :rtype: :class:`StructQueryParameter` + :returns: instance without name + """ + return cls(None, *sub_params) + + @classmethod + def from_api_repr(cls, resource): + """Factory: construct paramter from JSON resource. + + :type resource: dict + :param resource: JSON mapping of parameter + + :rtype: :class:`StructQueryParameter` + :returns: instance + """ + name = resource.get('name') + instance = cls(name) + types = instance.struct_types + for item in resource['parameterType']['structTypes']: + types[item['name']] = item['type'] + struct_values = resource['parameterValue']['structValues'] + for key, value in struct_values.items(): + converted = _CELLDATA_FROM_JSON[types[key]](value, None) + instance.struct_values[key] = converted + return instance + + def to_api_repr(self): + """Construct JSON API representation for the parameter. + + :rtype: dict + :returns: JSON mapping + """ + types = [ + {'name': key, 'type': value} + for key, value in self.struct_types.items() + ] + resource = { + 'parameterType': { + 'structTypes': types, + }, + 'parameterValue': { + 'structValues': self.struct_values, + }, + } + if self.name is not None: + resource['name'] = self.name + return resource + + +class QueryParametersProperty(object): + """Custom property type, holding query parameter instances.""" + + def __get__(self, instance, owner): + """Descriptor protocol: accessor + + :type instance: :class:`QueryParametersProperty` + :param instance: instance owning the property (None if accessed via + the class). + + :type owner: type + :param owner: the class owning the property. + + :rtype: list of instances of classes derived from + :class:`AbstractQueryParameter`. + :returns: the descriptor, if accessed via the class, or the instance's + query paramters. + """ + if instance is None: + return self + return list(instance._query_parameters) + + def __set__(self, instance, value): + """Descriptor protocol: mutator + + :type instance: :class:`QueryParametersProperty` + :param instance: instance owning the property (None if accessed via + the class). + + :type value: list of instances of classes derived from + :class:`AbstractQueryParameter`. + :param value: new query parameters for the instance. + """ + if not all(isinstance(u, AbstractQueryParameter) for u in value): + raise ValueError( + "query parameters must be derived from AbstractQueryParameter") + instance._query_parameters = tuple(value) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py index d16a9d9349d2..4af5b8fc4910 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py @@ -275,7 +275,8 @@ def extract_table_to_storage(self, job_name, source, *destination_uris): return ExtractTableToStorageJob(job_name, source, destination_uris, client=self) - def run_async_query(self, job_name, query): + def run_async_query(self, job_name, query, + udf_resources=(), query_parameters=()): """Construct a job for running a SQL query asynchronously. See: @@ -287,21 +288,47 @@ def run_async_query(self, job_name, query): :type query: str :param query: SQL query to be executed + :type udf_resources: tuple + :param udf_resources: An iterable of + :class:`google.cloud.bigquery._helpers.UDFResource` + (empty by default) + + :type query_parameters: tuple + :param query_parameters: + An iterable of + :class:`google.cloud.bigquery._helpers.AbstractQueryParameter` + (empty by default) + :rtype: :class:`google.cloud.bigquery.job.QueryJob` :returns: a new ``QueryJob`` instance """ - return QueryJob(job_name, query, client=self) + return QueryJob(job_name, query, client=self, + udf_resources=udf_resources, + query_parameters=query_parameters) - def run_sync_query(self, query): + def run_sync_query(self, query, udf_resources=(), query_parameters=()): """Run a SQL query synchronously. :type query: str :param query: SQL query to be executed + :type udf_resources: tuple + :param udf_resources: An iterable of + :class:`google.cloud.bigquery._helpers.UDFResource` + (empty by default) + + :type query_parameters: tuple + :param query_parameters: + An iterable of + :class:`google.cloud.bigquery._helpers.AbstractQueryParameter` + (empty by default) + :rtype: :class:`google.cloud.bigquery.query.QueryResults` :returns: a new ``QueryResults`` instance """ - return QueryResults(query, client=self) + return QueryResults(query, client=self, + udf_resources=udf_resources, + query_parameters=query_parameters) # pylint: disable=unused-argument diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/job.py b/packages/google-cloud-bigquery/google/cloud/bigquery/job.py index 203dd2df6dd0..5eff2b74ef90 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/job.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/job.py @@ -23,10 +23,10 @@ from google.cloud.bigquery.table import Table from google.cloud.bigquery.table import _build_schema_resource from google.cloud.bigquery.table import _parse_schema_resource +from google.cloud.bigquery._helpers import QueryParametersProperty from google.cloud.bigquery._helpers import UDFResourcesProperty from google.cloud.bigquery._helpers import _EnumProperty from google.cloud.bigquery._helpers import _TypedProperty -from google.cloud.bigquery._helpers import _build_udf_resources class Compression(_EnumProperty): @@ -909,14 +909,23 @@ class QueryJob(_AsyncJob): :param udf_resources: An iterable of :class:`google.cloud.bigquery._helpers.UDFResource` (empty by default) + + :type query_parameters: tuple + :param query_parameters: + An iterable of + :class:`google.cloud.bigquery._helpers.AbstractQueryParameter` + (empty by default) """ _JOB_TYPE = 'query' _UDF_KEY = 'userDefinedFunctionResources' + _QUERY_PARAMETERS_KEY = 'queryParameters' - def __init__(self, name, query, client, udf_resources=()): + def __init__(self, name, query, client, + udf_resources=(), query_parameters=()): super(QueryJob, self).__init__(name, client) self.query = query self.udf_resources = udf_resources + self.query_parameters = query_parameters self._configuration = _AsyncQueryConfiguration() allow_large_results = _TypedProperty('allow_large_results', bool) @@ -949,6 +958,8 @@ def __init__(self, name, query, client, udf_resources=()): https://cloud.google.com/bigquery/docs/reference/v2/jobs#configuration.query.priority """ + query_parameters = QueryParametersProperty() + udf_resources = UDFResourcesProperty() use_query_cache = _TypedProperty('use_query_cache', bool) @@ -1032,8 +1043,19 @@ def _populate_config_resource(self, configuration): if self.maximum_bytes_billed is not None: configuration['maximumBytesBilled'] = self.maximum_bytes_billed if len(self._udf_resources) > 0: - configuration[self._UDF_KEY] = _build_udf_resources( - self._udf_resources) + configuration[self._UDF_KEY] = [ + {udf_resource.udf_type: udf_resource.value} + for udf_resource in self._udf_resources + ] + if len(self._query_parameters) > 0: + configuration[self._QUERY_PARAMETERS_KEY] = [ + query_parameter.to_api_repr() + for query_parameter in self._query_parameters + ] + if self._query_parameters[0].name is None: + configuration['parameterMode'] = 'POSITIONAL' + else: + configuration['parameterMode'] = 'NAMED' def _build_resource(self): """Generate a resource for :meth:`begin`.""" diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/query.py b/packages/google-cloud-bigquery/google/cloud/bigquery/query.py index fa1b1da63883..95d2eabdbdbe 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/query.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/query.py @@ -21,7 +21,7 @@ from google.cloud.bigquery.dataset import Dataset from google.cloud.bigquery.job import QueryJob from google.cloud.bigquery.table import _parse_schema_resource -from google.cloud.bigquery._helpers import _build_udf_resources +from google.cloud.bigquery._helpers import QueryParametersProperty from google.cloud.bigquery._helpers import UDFResourcesProperty @@ -53,16 +53,24 @@ class QueryResults(object): :param udf_resources: An iterable of :class:`google.cloud.bigquery.job.UDFResource` (empty by default) + + :type query_parameters: tuple + :param query_parameters: + An iterable of + :class:`google.cloud.bigquery._helpers.AbstractQueryParameter` + (empty by default) """ _UDF_KEY = 'userDefinedFunctionResources' + _QUERY_PARAMETERS_KEY = 'queryParameters' - def __init__(self, query, client, udf_resources=()): + def __init__(self, query, client, udf_resources=(), query_parameters=()): self._client = client self._properties = {} self.query = query self._configuration = _SyncQueryConfiguration() self.udf_resources = udf_resources + self.query_parameters = query_parameters self._job = None @classmethod @@ -258,6 +266,8 @@ def schema(self): https://cloud.google.com/bigquery/docs/reference/v2/jobs/query#preserveNulls """ + query_parameters = QueryParametersProperty() + timeout_ms = _TypedProperty('timeout_ms', six.integer_types) """See: https://cloud.google.com/bigquery/docs/reference/v2/jobs/query#timeoutMs @@ -314,7 +324,19 @@ def _build_resource(self): resource['dryRun'] = self.dry_run if len(self._udf_resources) > 0: - resource[self._UDF_KEY] = _build_udf_resources(self._udf_resources) + resource[self._UDF_KEY] = [ + {udf_resource.udf_type: udf_resource.value} + for udf_resource in self._udf_resources + ] + if len(self._query_parameters) > 0: + resource[self._QUERY_PARAMETERS_KEY] = [ + query_parameter.to_api_repr() + for query_parameter in self._query_parameters + ] + if self._query_parameters[0].name is None: + resource['parameterMode'] = 'POSITIONAL' + else: + resource['parameterMode'] = 'NAMED' return resource diff --git a/packages/google-cloud-bigquery/unit_tests/test__helpers.py b/packages/google-cloud-bigquery/unit_tests/test__helpers.py index 14dd39f62e53..b133e95d45a7 100644 --- a/packages/google-cloud-bigquery/unit_tests/test__helpers.py +++ b/packages/google-cloud-bigquery/unit_tests/test__helpers.py @@ -586,6 +586,363 @@ def test_instance_setter_w_bad_udfs(self): self.assertEqual(instance.udf_resources, []) +class Test_AbstractQueryParameter(unittest.TestCase): + + @staticmethod + def _get_target_class(): + from google.cloud.bigquery._helpers import AbstractQueryParameter + return AbstractQueryParameter + + def _make_one(self, *args, **kw): + return self._get_target_class()(*args, **kw) + + def test_from_api_virtual(self): + klass = self._get_target_class() + with self.assertRaises(NotImplementedError): + klass.from_api_repr({}) + + def test_to_api_virtual(self): + param = self._make_one() + with self.assertRaises(NotImplementedError): + param.to_api_repr() + + +class Test_ScalarQueryParameter(unittest.TestCase): + + @staticmethod + def _get_target_class(): + from google.cloud.bigquery._helpers import ScalarQueryParameter + return ScalarQueryParameter + + def _make_one(self, *args, **kw): + return self._get_target_class()(*args, **kw) + + def test_ctor(self): + param = self._make_one(name='foo', type_='INT64', value=123) + self.assertEqual(param.name, 'foo') + self.assertEqual(param.type_, 'INT64') + self.assertEqual(param.value, 123) + + def test_positional(self): + klass = self._get_target_class() + param = klass.positional(type_='INT64', value=123) + self.assertEqual(param.name, None) + self.assertEqual(param.type_, 'INT64') + self.assertEqual(param.value, 123) + + def test_from_api_repr_w_name(self): + RESOURCE = { + 'name': 'foo', + 'parameterType': { + 'type': 'INT64', + }, + 'parameterValue': { + 'value': 123, + }, + } + klass = self._get_target_class() + param = klass.from_api_repr(RESOURCE) + self.assertEqual(param.name, 'foo') + self.assertEqual(param.type_, 'INT64') + self.assertEqual(param.value, 123) + + def test_from_api_repr_wo_name(self): + RESOURCE = { + 'parameterType': { + 'type': 'INT64', + }, + 'parameterValue': { + 'value': '123', + }, + } + klass = self._get_target_class() + param = klass.from_api_repr(RESOURCE) + self.assertEqual(param.name, None) + self.assertEqual(param.type_, 'INT64') + self.assertEqual(param.value, 123) + + def test_to_api_repr_w_name(self): + EXPECTED = { + 'name': 'foo', + 'parameterType': { + 'type': 'INT64', + }, + 'parameterValue': { + 'value': 123, + }, + } + param = self._make_one(name='foo', type_='INT64', value=123) + self.assertEqual(param.to_api_repr(), EXPECTED) + + def test_to_api_repr_wo_name(self): + EXPECTED = { + 'parameterType': { + 'type': 'INT64', + }, + 'parameterValue': { + 'value': 123, + }, + } + klass = self._get_target_class() + param = klass.positional(type_='INT64', value=123) + self.assertEqual(param.to_api_repr(), EXPECTED) + + +class Test_ArrayQueryParameter(unittest.TestCase): + + @staticmethod + def _get_target_class(): + from google.cloud.bigquery._helpers import ArrayQueryParameter + return ArrayQueryParameter + + def _make_one(self, *args, **kw): + return self._get_target_class()(*args, **kw) + + def test_ctor(self): + param = self._make_one(name='foo', array_type='INT64', values=[1, 2]) + self.assertEqual(param.name, 'foo') + self.assertEqual(param.array_type, 'INT64') + self.assertEqual(param.values, [1, 2]) + + def test_positional(self): + klass = self._get_target_class() + param = klass.positional(array_type='INT64', values=[1, 2]) + self.assertEqual(param.name, None) + self.assertEqual(param.array_type, 'INT64') + self.assertEqual(param.values, [1, 2]) + + def test_from_api_repr_w_name(self): + RESOURCE = { + 'name': 'foo', + 'parameterType': { + 'arrayType': 'INT64', + }, + 'parameterValue': { + 'arrayValues': ['1', '2'], + }, + } + klass = self._get_target_class() + param = klass.from_api_repr(RESOURCE) + self.assertEqual(param.name, 'foo') + self.assertEqual(param.array_type, 'INT64') + self.assertEqual(param.values, [1, 2]) + + def test_from_api_repr_wo_name(self): + RESOURCE = { + 'parameterType': { + 'arrayType': 'INT64', + }, + 'parameterValue': { + 'arrayValues': ['1', '2'], + }, + } + klass = self._get_target_class() + param = klass.from_api_repr(RESOURCE) + self.assertEqual(param.name, None) + self.assertEqual(param.array_type, 'INT64') + self.assertEqual(param.values, [1, 2]) + + def test_to_api_repr_w_name(self): + EXPECTED = { + 'name': 'foo', + 'parameterType': { + 'arrayType': 'INT64', + }, + 'parameterValue': { + 'arrayValues': [1, 2], + }, + } + param = self._make_one(name='foo', array_type='INT64', values=[1, 2]) + self.assertEqual(param.to_api_repr(), EXPECTED) + + def test_to_api_repr_wo_name(self): + EXPECTED = { + 'parameterType': { + 'arrayType': 'INT64', + }, + 'parameterValue': { + 'arrayValues': [1, 2], + }, + } + klass = self._get_target_class() + param = klass.positional(array_type='INT64', values=[1, 2]) + self.assertEqual(param.to_api_repr(), EXPECTED) + + +class Test_StructQueryParameter(unittest.TestCase): + + @staticmethod + def _get_target_class(): + from google.cloud.bigquery._helpers import StructQueryParameter + return StructQueryParameter + + def _make_one(self, *args, **kw): + return self._get_target_class()(*args, **kw) + + @staticmethod + def _make_subparam(name, type_, value): + from google.cloud.bigquery._helpers import ScalarQueryParameter + return ScalarQueryParameter(name, type_, value) + + def test_ctor(self): + sub_1 = self._make_subparam('bar', 'INT64', 123) + sub_2 = self._make_subparam('baz', 'STRING', 'abc') + param = self._make_one('foo', sub_1, sub_2) + self.assertEqual(param.name, 'foo') + self.assertEqual(param.struct_types, {'bar': 'INT64', 'baz': 'STRING'}) + self.assertEqual(param.struct_values, {'bar': 123, 'baz': 'abc'}) + + def test_positional(self): + sub_1 = self._make_subparam('bar', 'INT64', 123) + sub_2 = self._make_subparam('baz', 'STRING', 'abc') + klass = self._get_target_class() + param = klass.positional(sub_1, sub_2) + self.assertEqual(param.name, None) + self.assertEqual(param.struct_types, {'bar': 'INT64', 'baz': 'STRING'}) + self.assertEqual(param.struct_values, {'bar': 123, 'baz': 'abc'}) + + def test_from_api_repr_w_name(self): + RESOURCE = { + 'name': 'foo', + 'parameterType': { + 'structTypes': [ + {'name': 'bar', 'type': 'INT64'}, + {'name': 'baz', 'type': 'STRING'}, + ], + }, + 'parameterValue': { + 'structValues': {'bar': 123, 'baz': 'abc'}, + }, + } + klass = self._get_target_class() + param = klass.from_api_repr(RESOURCE) + self.assertEqual(param.name, 'foo') + self.assertEqual(param.struct_types, {'bar': 'INT64', 'baz': 'STRING'}) + self.assertEqual(param.struct_values, {'bar': 123, 'baz': 'abc'}) + + def test_from_api_repr_wo_name(self): + RESOURCE = { + 'parameterType': { + 'structTypes': [ + {'name': 'bar', 'type': 'INT64'}, + {'name': 'baz', 'type': 'STRING'}, + ], + }, + 'parameterValue': { + 'structValues': {'bar': 123, 'baz': 'abc'}, + }, + } + klass = self._get_target_class() + param = klass.from_api_repr(RESOURCE) + self.assertEqual(param.name, None) + self.assertEqual(param.struct_types, {'bar': 'INT64', 'baz': 'STRING'}) + self.assertEqual(param.struct_values, {'bar': 123, 'baz': 'abc'}) + + def test_to_api_repr_w_name(self): + EXPECTED = { + 'name': 'foo', + 'parameterType': { + 'structTypes': [ + {'name': 'bar', 'type': 'INT64'}, + {'name': 'baz', 'type': 'STRING'}, + ], + }, + 'parameterValue': { + 'structValues': {'bar': 123, 'baz': 'abc'}, + }, + } + sub_1 = self._make_subparam('bar', 'INT64', 123) + sub_2 = self._make_subparam('baz', 'STRING', 'abc') + param = self._make_one('foo', sub_1, sub_2) + self.assertEqual(param.to_api_repr(), EXPECTED) + + def test_to_api_repr_wo_name(self): + EXPECTED = { + 'parameterType': { + 'structTypes': [ + {'name': 'bar', 'type': 'INT64'}, + {'name': 'baz', 'type': 'STRING'}, + ], + }, + 'parameterValue': { + 'structValues': {'bar': 123, 'baz': 'abc'}, + }, + } + sub_1 = self._make_subparam('bar', 'INT64', 123) + sub_2 = self._make_subparam('baz', 'STRING', 'abc') + klass = self._get_target_class() + param = klass.positional(sub_1, sub_2) + self.assertEqual(param.to_api_repr(), EXPECTED) + + +class Test_QueryParametersProperty(unittest.TestCase): + + @staticmethod + def _get_target_class(): + from google.cloud.bigquery._helpers import QueryParametersProperty + return QueryParametersProperty + + def _make_one(self, *args, **kw): + return self._get_target_class()(*args, **kw) + + def _descriptor_and_klass(self): + descriptor = self._make_one() + + class _Test(object): + _query_parameters = () + query_parameters = descriptor + + return descriptor, _Test + + def test_class_getter(self): + descriptor, klass = self._descriptor_and_klass() + self.assertIs(klass.query_parameters, descriptor) + + def test_instance_getter_empty(self): + _, klass = self._descriptor_and_klass() + instance = klass() + self.assertEqual(instance.query_parameters, []) + + def test_instance_getter_w_non_empty_list(self): + from google.cloud.bigquery._helpers import ScalarQueryParameter + query_parameters = [ScalarQueryParameter("foo", 'INT64', 123)] + _, klass = self._descriptor_and_klass() + instance = klass() + instance._query_parameters = tuple(query_parameters) + + self.assertEqual(instance.query_parameters, query_parameters) + + def test_instance_setter_w_empty_list(self): + from google.cloud.bigquery._helpers import ScalarQueryParameter + query_parameters = [ScalarQueryParameter("foo", 'INT64', 123)] + _, klass = self._descriptor_and_klass() + instance = klass() + instance._query_parameters = query_parameters + + instance.query_parameters = [] + + self.assertEqual(instance.query_parameters, []) + + def test_instance_setter_w_valid_udf(self): + from google.cloud.bigquery._helpers import ScalarQueryParameter + query_parameters = [ScalarQueryParameter("foo", 'INT64', 123)] + _, klass = self._descriptor_and_klass() + instance = klass() + + instance.query_parameters = query_parameters + + self.assertEqual(instance.query_parameters, query_parameters) + + def test_instance_setter_w_bad_udfs(self): + _, klass = self._descriptor_and_klass() + instance = klass() + + with self.assertRaises(ValueError): + instance.query_parameters = ["foo"] + + self.assertEqual(instance.query_parameters, []) + + class _Field(object): def __init__(self, mode, name='unknown', field_type='UNKNOWN', fields=()): diff --git a/packages/google-cloud-bigquery/unit_tests/test_client.py b/packages/google-cloud-bigquery/unit_tests/test_client.py index 61ad81227aee..45f49b0f831e 100644 --- a/packages/google-cloud-bigquery/unit_tests/test_client.py +++ b/packages/google-cloud-bigquery/unit_tests/test_client.py @@ -470,7 +470,7 @@ def test_extract_table_to_storage(self): self.assertEqual(job.source, source) self.assertEqual(list(job.destination_uris), [DESTINATION]) - def test_run_async_query(self): + def test_run_async_query_defaults(self): from google.cloud.bigquery.job import QueryJob PROJECT = 'PROJECT' JOB = 'job_name' @@ -483,19 +483,96 @@ def test_run_async_query(self): self.assertIs(job._client, client) self.assertEqual(job.name, JOB) self.assertEqual(job.query, QUERY) + self.assertEqual(job.udf_resources, []) + self.assertEqual(job.query_parameters, []) - def test_run_sync_query(self): - from google.cloud.bigquery.query import QueryResults + def test_run_async_w_udf_resources(self): + from google.cloud.bigquery._helpers import UDFResource + from google.cloud.bigquery.job import QueryJob + RESOURCE_URI = 'gs://some-bucket/js/lib.js' + PROJECT = 'PROJECT' + JOB = 'job_name' + QUERY = 'select count(*) from persons' + creds = _Credentials() + http = object() + client = self._make_one(project=PROJECT, credentials=creds, http=http) + udf_resources = [UDFResource("resourceUri", RESOURCE_URI)] + job = client.run_async_query(JOB, QUERY, udf_resources=udf_resources) + self.assertIsInstance(job, QueryJob) + self.assertIs(job._client, client) + self.assertEqual(job.name, JOB) + self.assertEqual(job.query, QUERY) + self.assertEqual(job.udf_resources, udf_resources) + self.assertEqual(job.query_parameters, []) + + def test_run_async_w_query_parameters(self): + from google.cloud.bigquery._helpers import ScalarQueryParameter + from google.cloud.bigquery.job import QueryJob PROJECT = 'PROJECT' + JOB = 'job_name' QUERY = 'select count(*) from persons' creds = _Credentials() http = object() client = self._make_one(project=PROJECT, credentials=creds, http=http) - job = client.run_sync_query(QUERY) - self.assertIsInstance(job, QueryResults) + query_parameters = [ScalarQueryParameter('foo', 'INT64', 123)] + job = client.run_async_query(JOB, QUERY, + query_parameters=query_parameters) + self.assertIsInstance(job, QueryJob) self.assertIs(job._client, client) - self.assertIsNone(job.name) + self.assertEqual(job.name, JOB) self.assertEqual(job.query, QUERY) + self.assertEqual(job.udf_resources, []) + self.assertEqual(job.query_parameters, query_parameters) + + def test_run_sync_query_defaults(self): + from google.cloud.bigquery.query import QueryResults + PROJECT = 'PROJECT' + QUERY = 'select count(*) from persons' + creds = _Credentials() + http = object() + client = self._make_one(project=PROJECT, credentials=creds, http=http) + query = client.run_sync_query(QUERY) + self.assertIsInstance(query, QueryResults) + self.assertIs(query._client, client) + self.assertIsNone(query.name) + self.assertEqual(query.query, QUERY) + self.assertEqual(query.udf_resources, []) + self.assertEqual(query.query_parameters, []) + + def test_run_sync_query_w_udf_resources(self): + from google.cloud.bigquery._helpers import UDFResource + from google.cloud.bigquery.query import QueryResults + RESOURCE_URI = 'gs://some-bucket/js/lib.js' + PROJECT = 'PROJECT' + QUERY = 'select count(*) from persons' + creds = _Credentials() + http = object() + client = self._make_one(project=PROJECT, credentials=creds, http=http) + udf_resources = [UDFResource("resourceUri", RESOURCE_URI)] + query = client.run_sync_query(QUERY, udf_resources=udf_resources) + self.assertIsInstance(query, QueryResults) + self.assertIs(query._client, client) + self.assertIsNone(query.name) + self.assertEqual(query.query, QUERY) + self.assertEqual(query.udf_resources, udf_resources) + self.assertEqual(query.query_parameters, []) + + def test_run_sync_query_w_query_parameters(self): + from google.cloud.bigquery._helpers import ScalarQueryParameter + from google.cloud.bigquery.query import QueryResults + PROJECT = 'PROJECT' + QUERY = 'select count(*) from persons' + creds = _Credentials() + http = object() + client = self._make_one(project=PROJECT, credentials=creds, http=http) + query_parameters = [ScalarQueryParameter('foo', 'INT64', 123)] + query = client.run_sync_query(QUERY, query_parameters=query_parameters) + self.assertIsInstance(query, QueryResults) + self.assertIs(query._client, client) + self.assertIsNone(query.name) + self.assertEqual(query.query, QUERY) + self.assertEqual(query.udf_resources, []) + self.assertEqual(query.query_parameters, query_parameters) class _Credentials(object): diff --git a/packages/google-cloud-bigquery/unit_tests/test_job.py b/packages/google-cloud-bigquery/unit_tests/test_job.py index c73715262ba4..84ee25418491 100644 --- a/packages/google-cloud-bigquery/unit_tests/test_job.py +++ b/packages/google-cloud-bigquery/unit_tests/test_job.py @@ -426,7 +426,7 @@ def test_begin_w_already_running(self): job.begin() def test_begin_w_bound_client(self): - PATH = 'projects/%s/jobs' % self.PROJECT + PATH = '/projects/%s/jobs' % (self.PROJECT,) RESOURCE = self._makeResource() # Ensure None for missing server-set props del RESOURCE['statistics']['creationTime'] @@ -443,7 +443,7 @@ def test_begin_w_bound_client(self): self.assertEqual(len(conn._requested), 1) req = conn._requested[0] self.assertEqual(req['method'], 'POST') - self.assertEqual(req['path'], '/%s' % PATH) + self.assertEqual(req['path'], PATH) SENT = { 'jobReference': { 'projectId': self.PROJECT, @@ -465,7 +465,7 @@ def test_begin_w_bound_client(self): def test_begin_w_alternate_client(self): from google.cloud.bigquery.schema import SchemaField - PATH = 'projects/%s/jobs' % self.PROJECT + PATH = '/projects/%s/jobs' % (self.PROJECT,) RESOURCE = self._makeResource(ended=True) LOAD_CONFIGURATION = { 'sourceUris': [self.SOURCE1], @@ -519,7 +519,7 @@ def test_begin_w_alternate_client(self): self.assertEqual(len(conn2._requested), 1) req = conn2._requested[0] self.assertEqual(req['method'], 'POST') - self.assertEqual(req['path'], '/%s' % PATH) + self.assertEqual(req['path'], PATH) SENT = { 'jobReference': { 'projectId': self.PROJECT, @@ -533,7 +533,7 @@ def test_begin_w_alternate_client(self): self._verifyResourceProperties(job, RESOURCE) def test_exists_miss_w_bound_client(self): - PATH = 'projects/%s/jobs/%s' % (self.PROJECT, self.JOB_NAME) + PATH = '/projects/%s/jobs/%s' % (self.PROJECT, self.JOB_NAME) conn = _Connection() client = _Client(project=self.PROJECT, connection=conn) table = _Table() @@ -544,11 +544,11 @@ def test_exists_miss_w_bound_client(self): self.assertEqual(len(conn._requested), 1) req = conn._requested[0] self.assertEqual(req['method'], 'GET') - self.assertEqual(req['path'], '/%s' % PATH) + self.assertEqual(req['path'], PATH) self.assertEqual(req['query_params'], {'fields': 'id'}) def test_exists_hit_w_alternate_client(self): - PATH = 'projects/%s/jobs/%s' % (self.PROJECT, self.JOB_NAME) + PATH = '/projects/%s/jobs/%s' % (self.PROJECT, self.JOB_NAME) conn1 = _Connection() client1 = _Client(project=self.PROJECT, connection=conn1) conn2 = _Connection({}) @@ -562,11 +562,11 @@ def test_exists_hit_w_alternate_client(self): self.assertEqual(len(conn2._requested), 1) req = conn2._requested[0] self.assertEqual(req['method'], 'GET') - self.assertEqual(req['path'], '/%s' % PATH) + self.assertEqual(req['path'], PATH) self.assertEqual(req['query_params'], {'fields': 'id'}) def test_reload_w_bound_client(self): - PATH = 'projects/%s/jobs/%s' % (self.PROJECT, self.JOB_NAME) + PATH = '/projects/%s/jobs/%s' % (self.PROJECT, self.JOB_NAME) RESOURCE = self._makeResource() conn = _Connection(RESOURCE) client = _Client(project=self.PROJECT, connection=conn) @@ -578,11 +578,11 @@ def test_reload_w_bound_client(self): self.assertEqual(len(conn._requested), 1) req = conn._requested[0] self.assertEqual(req['method'], 'GET') - self.assertEqual(req['path'], '/%s' % PATH) + self.assertEqual(req['path'], PATH) self._verifyResourceProperties(job, RESOURCE) def test_reload_w_alternate_client(self): - PATH = 'projects/%s/jobs/%s' % (self.PROJECT, self.JOB_NAME) + PATH = '/projects/%s/jobs/%s' % (self.PROJECT, self.JOB_NAME) RESOURCE = self._makeResource() conn1 = _Connection() client1 = _Client(project=self.PROJECT, connection=conn1) @@ -597,11 +597,11 @@ def test_reload_w_alternate_client(self): self.assertEqual(len(conn2._requested), 1) req = conn2._requested[0] self.assertEqual(req['method'], 'GET') - self.assertEqual(req['path'], '/%s' % PATH) + self.assertEqual(req['path'], PATH) self._verifyResourceProperties(job, RESOURCE) def test_cancel_w_bound_client(self): - PATH = 'projects/%s/jobs/%s/cancel' % (self.PROJECT, self.JOB_NAME) + PATH = '/projects/%s/jobs/%s/cancel' % (self.PROJECT, self.JOB_NAME) RESOURCE = self._makeResource(ended=True) RESPONSE = {'job': RESOURCE} conn = _Connection(RESPONSE) @@ -614,11 +614,11 @@ def test_cancel_w_bound_client(self): self.assertEqual(len(conn._requested), 1) req = conn._requested[0] self.assertEqual(req['method'], 'POST') - self.assertEqual(req['path'], '/%s' % PATH) + self.assertEqual(req['path'], PATH) self._verifyResourceProperties(job, RESOURCE) def test_cancel_w_alternate_client(self): - PATH = 'projects/%s/jobs/%s/cancel' % (self.PROJECT, self.JOB_NAME) + PATH = '/projects/%s/jobs/%s/cancel' % (self.PROJECT, self.JOB_NAME) RESOURCE = self._makeResource(ended=True) RESPONSE = {'job': RESOURCE} conn1 = _Connection() @@ -634,7 +634,7 @@ def test_cancel_w_alternate_client(self): self.assertEqual(len(conn2._requested), 1) req = conn2._requested[0] self.assertEqual(req['method'], 'POST') - self.assertEqual(req['path'], '/%s' % PATH) + self.assertEqual(req['path'], PATH) self._verifyResourceProperties(job, RESOURCE) @@ -773,7 +773,7 @@ def test_from_api_repr_w_properties(self): self._verifyResourceProperties(dataset, RESOURCE) def test_begin_w_bound_client(self): - PATH = 'projects/%s/jobs' % self.PROJECT + PATH = '/projects/%s/jobs' % (self.PROJECT,) RESOURCE = self._makeResource() # Ensure None for missing server-set props del RESOURCE['statistics']['creationTime'] @@ -791,7 +791,7 @@ def test_begin_w_bound_client(self): self.assertEqual(len(conn._requested), 1) req = conn._requested[0] self.assertEqual(req['method'], 'POST') - self.assertEqual(req['path'], '/%s' % PATH) + self.assertEqual(req['path'], PATH) SENT = { 'jobReference': { 'projectId': self.PROJECT, @@ -816,7 +816,7 @@ def test_begin_w_bound_client(self): self._verifyResourceProperties(job, RESOURCE) def test_begin_w_alternate_client(self): - PATH = 'projects/%s/jobs' % self.PROJECT + PATH = '/projects/%s/jobs' % (self.PROJECT,) RESOURCE = self._makeResource(ended=True) COPY_CONFIGURATION = { 'sourceTables': [{ @@ -850,7 +850,7 @@ def test_begin_w_alternate_client(self): self.assertEqual(len(conn2._requested), 1) req = conn2._requested[0] self.assertEqual(req['method'], 'POST') - self.assertEqual(req['path'], '/%s' % PATH) + self.assertEqual(req['path'], PATH) SENT = { 'jobReference': { 'projectId': self.PROJECT, @@ -864,7 +864,7 @@ def test_begin_w_alternate_client(self): self._verifyResourceProperties(job, RESOURCE) def test_exists_miss_w_bound_client(self): - PATH = 'projects/%s/jobs/%s' % (self.PROJECT, self.JOB_NAME) + PATH = '/projects/%s/jobs/%s' % (self.PROJECT, self.JOB_NAME) conn = _Connection() client = _Client(project=self.PROJECT, connection=conn) source = _Table(self.SOURCE_TABLE) @@ -876,11 +876,11 @@ def test_exists_miss_w_bound_client(self): self.assertEqual(len(conn._requested), 1) req = conn._requested[0] self.assertEqual(req['method'], 'GET') - self.assertEqual(req['path'], '/%s' % PATH) + self.assertEqual(req['path'], PATH) self.assertEqual(req['query_params'], {'fields': 'id'}) def test_exists_hit_w_alternate_client(self): - PATH = 'projects/%s/jobs/%s' % (self.PROJECT, self.JOB_NAME) + PATH = '/projects/%s/jobs/%s' % (self.PROJECT, self.JOB_NAME) conn1 = _Connection() client1 = _Client(project=self.PROJECT, connection=conn1) conn2 = _Connection({}) @@ -895,11 +895,11 @@ def test_exists_hit_w_alternate_client(self): self.assertEqual(len(conn2._requested), 1) req = conn2._requested[0] self.assertEqual(req['method'], 'GET') - self.assertEqual(req['path'], '/%s' % PATH) + self.assertEqual(req['path'], PATH) self.assertEqual(req['query_params'], {'fields': 'id'}) def test_reload_w_bound_client(self): - PATH = 'projects/%s/jobs/%s' % (self.PROJECT, self.JOB_NAME) + PATH = '/projects/%s/jobs/%s' % (self.PROJECT, self.JOB_NAME) RESOURCE = self._makeResource() conn = _Connection(RESOURCE) client = _Client(project=self.PROJECT, connection=conn) @@ -912,11 +912,11 @@ def test_reload_w_bound_client(self): self.assertEqual(len(conn._requested), 1) req = conn._requested[0] self.assertEqual(req['method'], 'GET') - self.assertEqual(req['path'], '/%s' % PATH) + self.assertEqual(req['path'], PATH) self._verifyResourceProperties(job, RESOURCE) def test_reload_w_alternate_client(self): - PATH = 'projects/%s/jobs/%s' % (self.PROJECT, self.JOB_NAME) + PATH = '/projects/%s/jobs/%s' % (self.PROJECT, self.JOB_NAME) RESOURCE = self._makeResource() conn1 = _Connection() client1 = _Client(project=self.PROJECT, connection=conn1) @@ -932,7 +932,7 @@ def test_reload_w_alternate_client(self): self.assertEqual(len(conn2._requested), 1) req = conn2._requested[0] self.assertEqual(req['method'], 'GET') - self.assertEqual(req['path'], '/%s' % PATH) + self.assertEqual(req['path'], PATH) self._verifyResourceProperties(job, RESOURCE) @@ -1071,7 +1071,7 @@ def test_from_api_repr_w_properties(self): self._verifyResourceProperties(dataset, RESOURCE) def test_begin_w_bound_client(self): - PATH = 'projects/%s/jobs' % self.PROJECT + PATH = '/projects/%s/jobs' % (self.PROJECT,) RESOURCE = self._makeResource() # Ensure None for missing server-set props del RESOURCE['statistics']['creationTime'] @@ -1089,7 +1089,7 @@ def test_begin_w_bound_client(self): self.assertEqual(len(conn._requested), 1) req = conn._requested[0] self.assertEqual(req['method'], 'POST') - self.assertEqual(req['path'], '/%s' % PATH) + self.assertEqual(req['path'], PATH) SENT = { 'jobReference': { 'projectId': self.PROJECT, @@ -1110,7 +1110,7 @@ def test_begin_w_bound_client(self): self._verifyResourceProperties(job, RESOURCE) def test_begin_w_alternate_client(self): - PATH = 'projects/%s/jobs' % self.PROJECT + PATH = '/projects/%s/jobs' % (self.PROJECT,) RESOURCE = self._makeResource(ended=True) EXTRACT_CONFIGURATION = { 'sourceTable': { @@ -1144,7 +1144,7 @@ def test_begin_w_alternate_client(self): self.assertEqual(len(conn2._requested), 1) req = conn2._requested[0] self.assertEqual(req['method'], 'POST') - self.assertEqual(req['path'], '/%s' % PATH) + self.assertEqual(req['path'], PATH) SENT = { 'jobReference': { 'projectId': self.PROJECT, @@ -1158,7 +1158,7 @@ def test_begin_w_alternate_client(self): self._verifyResourceProperties(job, RESOURCE) def test_exists_miss_w_bound_client(self): - PATH = 'projects/%s/jobs/%s' % (self.PROJECT, self.JOB_NAME) + PATH = '/projects/%s/jobs/%s' % (self.PROJECT, self.JOB_NAME) conn = _Connection() client = _Client(project=self.PROJECT, connection=conn) source = _Table(self.SOURCE_TABLE) @@ -1170,11 +1170,11 @@ def test_exists_miss_w_bound_client(self): self.assertEqual(len(conn._requested), 1) req = conn._requested[0] self.assertEqual(req['method'], 'GET') - self.assertEqual(req['path'], '/%s' % PATH) + self.assertEqual(req['path'], PATH) self.assertEqual(req['query_params'], {'fields': 'id'}) def test_exists_hit_w_alternate_client(self): - PATH = 'projects/%s/jobs/%s' % (self.PROJECT, self.JOB_NAME) + PATH = '/projects/%s/jobs/%s' % (self.PROJECT, self.JOB_NAME) conn1 = _Connection() client1 = _Client(project=self.PROJECT, connection=conn1) conn2 = _Connection({}) @@ -1189,11 +1189,11 @@ def test_exists_hit_w_alternate_client(self): self.assertEqual(len(conn2._requested), 1) req = conn2._requested[0] self.assertEqual(req['method'], 'GET') - self.assertEqual(req['path'], '/%s' % PATH) + self.assertEqual(req['path'], PATH) self.assertEqual(req['query_params'], {'fields': 'id'}) def test_reload_w_bound_client(self): - PATH = 'projects/%s/jobs/%s' % (self.PROJECT, self.JOB_NAME) + PATH = '/projects/%s/jobs/%s' % (self.PROJECT, self.JOB_NAME) RESOURCE = self._makeResource() conn = _Connection(RESOURCE) client = _Client(project=self.PROJECT, connection=conn) @@ -1206,11 +1206,11 @@ def test_reload_w_bound_client(self): self.assertEqual(len(conn._requested), 1) req = conn._requested[0] self.assertEqual(req['method'], 'GET') - self.assertEqual(req['path'], '/%s' % PATH) + self.assertEqual(req['path'], PATH) self._verifyResourceProperties(job, RESOURCE) def test_reload_w_alternate_client(self): - PATH = 'projects/%s/jobs/%s' % (self.PROJECT, self.JOB_NAME) + PATH = '/projects/%s/jobs/%s' % (self.PROJECT, self.JOB_NAME) RESOURCE = self._makeResource() conn1 = _Connection() client1 = _Client(project=self.PROJECT, connection=conn1) @@ -1226,7 +1226,7 @@ def test_reload_w_alternate_client(self): self.assertEqual(len(conn2._requested), 1) req = conn2._requested[0] self.assertEqual(req['method'], 'GET') - self.assertEqual(req['path'], '/%s' % PATH) + self.assertEqual(req['path'], PATH) self._verifyResourceProperties(job, RESOURCE) @@ -1287,12 +1287,31 @@ def _verifyIntegerResourceProperties(self, job, config): else: self.assertIsNone(job.maximum_bytes_billed) + def _verify_udf_resources(self, job, config): + udf_resources = config.get('userDefinedFunctionResources', ()) + self.assertEqual(len(job.udf_resources), len(udf_resources)) + for found, expected in zip(job.udf_resources, udf_resources): + if 'resourceUri' in expected: + self.assertEqual(found.udf_type, 'resourceUri') + self.assertEqual(found.value, expected['resourceUri']) + else: + self.assertEqual(found.udf_type, 'inlineCode') + self.assertEqual(found.value, expected['inlineCode']) + + def _verifyQueryParameters(self, job, config): + query_parameters = config.get('queryParameters', ()) + self.assertEqual(len(job.query_parameters), len(query_parameters)) + for found, expected in zip(job.query_parameters, query_parameters): + self.assertEqual(found.to_api_repr(), expected) + def _verifyResourceProperties(self, job, resource): self._verifyReadonlyResourceProperties(job, resource) config = resource.get('configuration', {}).get('query') self._verifyBooleanResourceProperties(job, config) self._verifyIntegerResourceProperties(job, config) + self._verify_udf_resources(job, config) + self._verifyQueryParameters(job, config) self.assertEqual(job.query, config['query']) if 'createDisposition' in config: @@ -1330,7 +1349,7 @@ def _verifyResourceProperties(self, job, resource): else: self.assertIsNone(job.write_disposition) - def test_ctor(self): + def test_ctor_defaults(self): client = _Client(self.PROJECT) job = self._make_one(self.JOB_NAME, self.QUERY, client) self.assertEqual(job.query, self.QUERY) @@ -1356,6 +1375,23 @@ def test_ctor(self): self.assertIsNone(job.maximum_billing_tier) self.assertIsNone(job.maximum_bytes_billed) + def test_ctor_w_udf_resources(self): + from google.cloud.bigquery._helpers import UDFResource + RESOURCE_URI = 'gs://some-bucket/js/lib.js' + udf_resources = [UDFResource("resourceUri", RESOURCE_URI)] + client = _Client(self.PROJECT) + job = self._make_one(self.JOB_NAME, self.QUERY, client, + udf_resources=udf_resources) + self.assertEqual(job.udf_resources, udf_resources) + + def test_ctor_w_query_parameters(self): + from google.cloud.bigquery._helpers import ScalarQueryParameter + query_parameters = [ScalarQueryParameter("foo", 'INT64', 123)] + client = _Client(self.PROJECT) + job = self._make_one(self.JOB_NAME, self.QUERY, client, + query_parameters=query_parameters) + self.assertEqual(job.query_parameters, query_parameters) + def test_from_api_repr_missing_identity(self): self._setUpConstants() client = _Client(self.PROJECT) @@ -1418,7 +1454,7 @@ def test_results(self): self.assertIs(results._job, job) def test_begin_w_bound_client(self): - PATH = 'projects/%s/jobs' % self.PROJECT + PATH = '/projects/%s/jobs' % (self.PROJECT,) RESOURCE = self._makeResource() # Ensure None for missing server-set props del RESOURCE['statistics']['creationTime'] @@ -1434,7 +1470,7 @@ def test_begin_w_bound_client(self): self.assertEqual(len(conn._requested), 1) req = conn._requested[0] self.assertEqual(req['method'], 'POST') - self.assertEqual(req['path'], '/%s' % PATH) + self.assertEqual(req['path'], PATH) SENT = { 'jobReference': { 'projectId': self.PROJECT, @@ -1452,7 +1488,7 @@ def test_begin_w_bound_client(self): def test_begin_w_alternate_client(self): from google.cloud.bigquery.dataset import Dataset from google.cloud.bigquery.dataset import Table - PATH = 'projects/%s/jobs' % self.PROJECT + PATH = '/projects/%s/jobs' % (self.PROJECT,) TABLE = 'TABLE' DS_NAME = 'DATASET' RESOURCE = self._makeResource(ended=True) @@ -1507,7 +1543,7 @@ def test_begin_w_alternate_client(self): self.assertEqual(len(conn2._requested), 1) req = conn2._requested[0] self.assertEqual(req['method'], 'POST') - self.assertEqual(req['path'], '/%s' % PATH) + self.assertEqual(req['path'], PATH) SENT = { 'jobReference': { 'projectId': self.PROJECT, @@ -1520,31 +1556,140 @@ def test_begin_w_alternate_client(self): self.assertEqual(req['data'], SENT) self._verifyResourceProperties(job, RESOURCE) - def test_begin_w_bound_client_and_udf(self): + def test_begin_w_udf(self): from google.cloud.bigquery._helpers import UDFResource RESOURCE_URI = 'gs://some-bucket/js/lib.js' - PATH = 'projects/%s/jobs' % self.PROJECT + INLINE_UDF_CODE = 'var someCode = "here";' + PATH = '/projects/%s/jobs' % (self.PROJECT,) + RESOURCE = self._makeResource() + # Ensure None for missing server-set props + del RESOURCE['statistics']['creationTime'] + del RESOURCE['etag'] + del RESOURCE['selfLink'] + del RESOURCE['user_email'] + RESOURCE['configuration']['query']['userDefinedFunctionResources'] = [ + {'resourceUri': RESOURCE_URI}, + {'inlineCode': INLINE_UDF_CODE}, + ] + conn = _Connection(RESOURCE) + client = _Client(project=self.PROJECT, connection=conn) + udf_resources = [ + UDFResource("resourceUri", RESOURCE_URI), + UDFResource("inlineCode", INLINE_UDF_CODE), + ] + job = self._make_one(self.JOB_NAME, self.QUERY, client, + udf_resources=udf_resources) + + job.begin() + + self.assertEqual(len(conn._requested), 1) + req = conn._requested[0] + self.assertEqual(req['method'], 'POST') + self.assertEqual(req['path'], PATH) + self.assertEqual(job.udf_resources, udf_resources) + SENT = { + 'jobReference': { + 'projectId': self.PROJECT, + 'jobId': self.JOB_NAME, + }, + 'configuration': { + 'query': { + 'query': self.QUERY, + 'userDefinedFunctionResources': [ + {'resourceUri': RESOURCE_URI}, + {'inlineCode': INLINE_UDF_CODE}, + ] + }, + }, + } + self.assertEqual(req['data'], SENT) + self._verifyResourceProperties(job, RESOURCE) + + def test_begin_w_named_query_parameter(self): + from google.cloud.bigquery._helpers import ScalarQueryParameter + query_parameters = [ScalarQueryParameter('foo', 'INT64', 123)] + PATH = '/projects/%s/jobs' % (self.PROJECT,) RESOURCE = self._makeResource() # Ensure None for missing server-set props del RESOURCE['statistics']['creationTime'] del RESOURCE['etag'] del RESOURCE['selfLink'] del RESOURCE['user_email'] + config = RESOURCE['configuration']['query'] + config['parameterMode'] = 'NAMED' + config['queryParameters'] = [ + { + 'name': 'foo', + 'parameterType': { + 'type': 'INT64', + }, + 'parameterValue': { + 'value': 123, + }, + }, + ] + conn = _Connection(RESOURCE) + client = _Client(project=self.PROJECT, connection=conn) + job = self._make_one(self.JOB_NAME, self.QUERY, client, + query_parameters=query_parameters) + + job.begin() + + self.assertEqual(len(conn._requested), 1) + req = conn._requested[0] + self.assertEqual(req['method'], 'POST') + self.assertEqual(req['path'], PATH) + self.assertEqual(job.query_parameters, query_parameters) + SENT = { + 'jobReference': { + 'projectId': self.PROJECT, + 'jobId': self.JOB_NAME, + }, + 'configuration': { + 'query': { + 'query': self.QUERY, + 'parameterMode': 'NAMED', + 'queryParameters': config['queryParameters'], + }, + }, + } + self.assertEqual(req['data'], SENT) + self._verifyResourceProperties(job, RESOURCE) + + def test_begin_w_positional_query_parameter(self): + from google.cloud.bigquery._helpers import ScalarQueryParameter + query_parameters = [ScalarQueryParameter.positional('INT64', 123)] + PATH = '/projects/%s/jobs' % (self.PROJECT,) + RESOURCE = self._makeResource() + # Ensure None for missing server-set props + del RESOURCE['statistics']['creationTime'] + del RESOURCE['etag'] + del RESOURCE['selfLink'] + del RESOURCE['user_email'] + config = RESOURCE['configuration']['query'] + config['parameterMode'] = 'POSITIONAL' + config['queryParameters'] = [ + { + 'parameterType': { + 'type': 'INT64', + }, + 'parameterValue': { + 'value': 123, + }, + }, + ] conn = _Connection(RESOURCE) client = _Client(project=self.PROJECT, connection=conn) job = self._make_one(self.JOB_NAME, self.QUERY, client, - udf_resources=[ - UDFResource("resourceUri", RESOURCE_URI) - ]) + query_parameters=query_parameters) job.begin() self.assertEqual(len(conn._requested), 1) req = conn._requested[0] self.assertEqual(req['method'], 'POST') - self.assertEqual(req['path'], '/%s' % PATH) - self.assertEqual(job.udf_resources, - [UDFResource("resourceUri", RESOURCE_URI)]) + self.assertEqual(req['path'], PATH) + self.assertEqual(job.query_parameters, query_parameters) SENT = { 'jobReference': { 'projectId': self.PROJECT, @@ -1553,8 +1698,8 @@ def test_begin_w_bound_client_and_udf(self): 'configuration': { 'query': { 'query': self.QUERY, - 'userDefinedFunctionResources': - [{'resourceUri': RESOURCE_URI}] + 'parameterMode': 'POSITIONAL', + 'queryParameters': config['queryParameters'], }, }, } @@ -1562,7 +1707,7 @@ def test_begin_w_bound_client_and_udf(self): self._verifyResourceProperties(job, RESOURCE) def test_exists_miss_w_bound_client(self): - PATH = 'projects/%s/jobs/%s' % (self.PROJECT, self.JOB_NAME) + PATH = '/projects/%s/jobs/%s' % (self.PROJECT, self.JOB_NAME) conn = _Connection() client = _Client(project=self.PROJECT, connection=conn) job = self._make_one(self.JOB_NAME, self.QUERY, client) @@ -1572,11 +1717,11 @@ def test_exists_miss_w_bound_client(self): self.assertEqual(len(conn._requested), 1) req = conn._requested[0] self.assertEqual(req['method'], 'GET') - self.assertEqual(req['path'], '/%s' % PATH) + self.assertEqual(req['path'], PATH) self.assertEqual(req['query_params'], {'fields': 'id'}) def test_exists_hit_w_alternate_client(self): - PATH = 'projects/%s/jobs/%s' % (self.PROJECT, self.JOB_NAME) + PATH = '/projects/%s/jobs/%s' % (self.PROJECT, self.JOB_NAME) conn1 = _Connection() client1 = _Client(project=self.PROJECT, connection=conn1) conn2 = _Connection({}) @@ -1589,13 +1734,13 @@ def test_exists_hit_w_alternate_client(self): self.assertEqual(len(conn2._requested), 1) req = conn2._requested[0] self.assertEqual(req['method'], 'GET') - self.assertEqual(req['path'], '/%s' % PATH) + self.assertEqual(req['path'], PATH) self.assertEqual(req['query_params'], {'fields': 'id'}) def test_reload_w_bound_client(self): from google.cloud.bigquery.dataset import Dataset from google.cloud.bigquery.dataset import Table - PATH = 'projects/%s/jobs/%s' % (self.PROJECT, self.JOB_NAME) + PATH = '/projects/%s/jobs/%s' % (self.PROJECT, self.JOB_NAME) DS_NAME = 'DATASET' DEST_TABLE = 'dest_table' RESOURCE = self._makeResource() @@ -1614,11 +1759,11 @@ def test_reload_w_bound_client(self): self.assertEqual(len(conn._requested), 1) req = conn._requested[0] self.assertEqual(req['method'], 'GET') - self.assertEqual(req['path'], '/%s' % PATH) + self.assertEqual(req['path'], PATH) self._verifyResourceProperties(job, RESOURCE) def test_reload_w_alternate_client(self): - PATH = 'projects/%s/jobs/%s' % (self.PROJECT, self.JOB_NAME) + PATH = '/projects/%s/jobs/%s' % (self.PROJECT, self.JOB_NAME) DS_NAME = 'DATASET' DEST_TABLE = 'dest_table' RESOURCE = self._makeResource() @@ -1640,7 +1785,7 @@ def test_reload_w_alternate_client(self): self.assertEqual(len(conn2._requested), 1) req = conn2._requested[0] self.assertEqual(req['method'], 'GET') - self.assertEqual(req['path'], '/%s' % PATH) + self.assertEqual(req['path'], PATH) self._verifyResourceProperties(job, RESOURCE) diff --git a/packages/google-cloud-bigquery/unit_tests/test_query.py b/packages/google-cloud-bigquery/unit_tests/test_query.py index 43aedf334ce0..3dfc795a6072 100644 --- a/packages/google-cloud-bigquery/unit_tests/test_query.py +++ b/packages/google-cloud-bigquery/unit_tests/test_query.py @@ -100,6 +100,23 @@ def _verifyRows(self, query, resource): self.assertEqual(f_row, tuple([cell['v'] for cell in e_row['f']])) + def _verify_udf_resources(self, query, resource): + udf_resources = resource.get('userDefinedFunctionResources', ()) + self.assertEqual(len(query.udf_resources), len(udf_resources)) + for found, expected in zip(query.udf_resources, udf_resources): + if 'resourceUri' in expected: + self.assertEqual(found.udf_type, 'resourceUri') + self.assertEqual(found.value, expected['resourceUri']) + else: + self.assertEqual(found.udf_type, 'inlineCode') + self.assertEqual(found.value, expected['inlineCode']) + + def _verifyQueryParameters(self, query, resource): + query_parameters = resource.get('queryParameters', ()) + self.assertEqual(len(query.query_parameters), len(query_parameters)) + for found, expected in zip(query.query_parameters, query_parameters): + self.assertEqual(found.to_api_repr(), expected) + def _verifyResourceProperties(self, query, resource): self.assertEqual(query.cache_hit, resource.get('cacheHit')) self.assertEqual(query.complete, resource.get('jobComplete')) @@ -114,10 +131,12 @@ def _verifyResourceProperties(self, query, resource): else: self.assertIsNone(query.name) + self._verify_udf_resources(query, resource) + self._verifyQueryParameters(query, resource) self._verifySchema(query, resource) self._verifyRows(query, resource) - def test_ctor(self): + def test_ctor_defaults(self): client = _Client(self.PROJECT) query = self._make_one(self.QUERY, client) self.assertEqual(query.query, self.QUERY) @@ -128,10 +147,12 @@ def test_ctor(self): self.assertIsNone(query.errors) self.assertIsNone(query.name) self.assertIsNone(query.page_token) + self.assertEqual(query.query_parameters, []) self.assertEqual(query.rows, []) self.assertIsNone(query.schema) self.assertIsNone(query.total_rows) self.assertIsNone(query.total_bytes_processed) + self.assertEqual(query.udf_resources, []) self.assertIsNone(query.default_dataset) self.assertIsNone(query.max_results) @@ -139,6 +160,22 @@ def test_ctor(self): self.assertIsNone(query.use_query_cache) self.assertIsNone(query.use_legacy_sql) + def test_ctor_w_udf_resources(self): + from google.cloud.bigquery._helpers import UDFResource + RESOURCE_URI = 'gs://some-bucket/js/lib.js' + udf_resources = [UDFResource("resourceUri", RESOURCE_URI)] + client = _Client(self.PROJECT) + query = self._make_one(self.QUERY, client, udf_resources=udf_resources) + self.assertEqual(query.udf_resources, udf_resources) + + def test_ctor_w_query_parameters(self): + from google.cloud.bigquery._helpers import ScalarQueryParameter + query_parameters = [ScalarQueryParameter("foo", 'INT64', 123)] + client = _Client(self.PROJECT) + query = self._make_one(self.QUERY, client, + query_parameters=query_parameters) + self.assertEqual(query.query_parameters, query_parameters) + def test_from_query_job(self): from google.cloud.bigquery.dataset import Dataset from google.cloud.bigquery.job import QueryJob @@ -293,6 +330,9 @@ def test_run_w_inline_udf(self): INLINE_UDF_CODE = 'var someCode = "here";' PATH = 'projects/%s/queries' % self.PROJECT RESOURCE = self._makeResource(complete=False) + RESOURCE['userDefinedFunctionResources'] = [ + {'inlineCode': INLINE_UDF_CODE}, + ] conn = _Connection(RESOURCE) client = _Client(project=self.PROJECT, connection=conn) query = self._make_one(self.QUERY, client) @@ -315,6 +355,9 @@ def test_run_w_udf_resource_uri(self): RESOURCE_URI = 'gs://some-bucket/js/lib.js' PATH = 'projects/%s/queries' % self.PROJECT RESOURCE = self._makeResource(complete=False) + RESOURCE['userDefinedFunctionResources'] = [ + {'resourceUri': RESOURCE_URI}, + ] conn = _Connection(RESOURCE) client = _Client(project=self.PROJECT, connection=conn) query = self._make_one(self.QUERY, client) @@ -338,6 +381,10 @@ def test_run_w_mixed_udfs(self): INLINE_UDF_CODE = 'var someCode = "here";' PATH = 'projects/%s/queries' % self.PROJECT RESOURCE = self._makeResource(complete=False) + RESOURCE['userDefinedFunctionResources'] = [ + {'resourceUri': RESOURCE_URI}, + {'inlineCode': INLINE_UDF_CODE}, + ] conn = _Connection(RESOURCE) client = _Client(project=self.PROJECT, connection=conn) query = self._make_one(self.QUERY, client) @@ -360,6 +407,75 @@ def test_run_w_mixed_udfs(self): self.assertEqual(req['data'], SENT) self._verifyResourceProperties(query, RESOURCE) + def test_run_w_named_query_paramter(self): + from google.cloud.bigquery._helpers import ScalarQueryParameter + PATH = 'projects/%s/queries' % self.PROJECT + RESOURCE = self._makeResource(complete=False) + RESOURCE['parameterMode'] = 'NAMED' + RESOURCE['queryParameters'] = [ + { + 'name': 'foo', + 'parameterType': { + 'type': 'INT64', + }, + 'parameterValue': { + 'value': 123, + }, + }, + ] + query_parameters = [ScalarQueryParameter('foo', 'INT64', 123)] + conn = _Connection(RESOURCE) + client = _Client(project=self.PROJECT, connection=conn) + query = self._make_one(self.QUERY, client, + query_parameters=query_parameters) + query.run() + + self.assertEqual(len(conn._requested), 1) + req = conn._requested[0] + self.assertEqual(req['method'], 'POST') + self.assertEqual(req['path'], '/%s' % PATH) + SENT = { + 'query': self.QUERY, + 'parameterMode': 'NAMED', + 'queryParameters': RESOURCE['queryParameters'], + } + self.assertEqual(req['data'], SENT) + self._verifyResourceProperties(query, RESOURCE) + + def test_run_w_positional_query_paramter(self): + from google.cloud.bigquery._helpers import ScalarQueryParameter + PATH = 'projects/%s/queries' % self.PROJECT + RESOURCE = self._makeResource(complete=False) + RESOURCE['parameterMode'] = 'POSITIONAL' + RESOURCE['queryParameters'] = [ + { + 'parameterType': { + 'type': 'INT64', + }, + 'parameterValue': { + 'value': 123, + }, + }, + ] + query_parameters = [ScalarQueryParameter.positional('INT64', 123)] + conn = _Connection(RESOURCE) + client = _Client(project=self.PROJECT, connection=conn) + query = self._make_one(self.QUERY, client, + query_parameters=query_parameters) + query.run() + + self.assertEqual(len(conn._requested), 1) + req = conn._requested[0] + self.assertEqual(req['method'], 'POST') + self.assertEqual(req['path'], '/%s' % PATH) + SENT = { + 'query': self.QUERY, + 'parameterMode': 'POSITIONAL', + 'queryParameters': RESOURCE['queryParameters'], + } + self.assertEqual(req['data'], SENT) + self._verifyResourceProperties(query, RESOURCE) + def test_fetch_data_query_not_yet_run(self): conn = _Connection() client = _Client(project=self.PROJECT, connection=conn) From cc6605425b975b33d283369f6a7b7fbf0e2ca8b1 Mon Sep 17 00:00:00 2001 From: Jon Wayne Parrott Date: Fri, 2 Dec 2016 15:02:25 -0800 Subject: [PATCH 0048/2016] Switch from oauth2client to google-auth (#2726) * Removes all use of oauth2client from every package and tests. * Updates core to use google-auth's default credentials, project ID, and scoping logic. * Updates bigtable to use google-auth's scoping logic. --- .../unit_tests/test_client.py | 43 +++++++------------ 1 file changed, 15 insertions(+), 28 deletions(-) diff --git a/packages/google-cloud-bigquery/unit_tests/test_client.py b/packages/google-cloud-bigquery/unit_tests/test_client.py index 45f49b0f831e..97edcf05117e 100644 --- a/packages/google-cloud-bigquery/unit_tests/test_client.py +++ b/packages/google-cloud-bigquery/unit_tests/test_client.py @@ -28,7 +28,7 @@ def _make_one(self, *args, **kw): def test_ctor(self): from google.cloud.bigquery._http import Connection PROJECT = 'PROJECT' - creds = _Credentials() + creds = object() http = object() client = self._make_one(project=PROJECT, credentials=creds, http=http) self.assertIsInstance(client._connection, Connection) @@ -57,7 +57,7 @@ def test_list_projects_defaults(self): 'friendlyName': 'Two'}, ] } - creds = _Credentials() + creds = object() client = self._make_one(PROJECT_1, creds) conn = client._connection = _Connection(DATA) @@ -86,7 +86,7 @@ def test_list_projects_explicit_response_missing_projects_key(self): PATH = 'projects' TOKEN = 'TOKEN' DATA = {} - creds = _Credentials() + creds = object() client = self._make_one(PROJECT, creds) conn = client._connection = _Connection(DATA) @@ -128,7 +128,7 @@ def test_list_datasets_defaults(self): 'friendlyName': 'Two'}, ] } - creds = _Credentials() + creds = object() client = self._make_one(PROJECT, creds) conn = client._connection = _Connection(DATA) @@ -156,7 +156,7 @@ def test_list_datasets_explicit_response_missing_datasets_key(self): PATH = 'projects/%s/datasets' % PROJECT TOKEN = 'TOKEN' DATA = {} - creds = _Credentials() + creds = object() client = self._make_one(PROJECT, creds) conn = client._connection = _Connection(DATA) @@ -180,7 +180,7 @@ def test_dataset(self): from google.cloud.bigquery.dataset import Dataset PROJECT = 'PROJECT' DATASET = 'dataset_name' - creds = _Credentials() + creds = object() http = object() client = self._make_one(project=PROJECT, credentials=creds, http=http) dataset = client.dataset(DATASET) @@ -190,7 +190,7 @@ def test_dataset(self): def test_job_from_resource_unknown_type(self): PROJECT = 'PROJECT' - creds = _Credentials() + creds = object() client = self._make_one(PROJECT, creds) with self.assertRaises(ValueError): client.job_from_resource({'configuration': {'nonesuch': {}}}) @@ -304,7 +304,7 @@ def test_list_jobs_defaults(self): LOAD_DATA, ] } - creds = _Credentials() + creds = object() client = self._make_one(PROJECT, creds) conn = client._connection = _Connection(DATA) @@ -360,7 +360,7 @@ def test_list_jobs_load_job_wo_sourceUris(self): LOAD_DATA, ] } - creds = _Credentials() + creds = object() client = self._make_one(PROJECT, creds) conn = client._connection = _Connection(DATA) @@ -388,7 +388,7 @@ def test_list_jobs_explicit_missing(self): PATH = 'projects/%s/jobs' % PROJECT DATA = {} TOKEN = 'TOKEN' - creds = _Credentials() + creds = object() client = self._make_one(PROJECT, creds) conn = client._connection = _Connection(DATA) @@ -419,7 +419,7 @@ def test_load_table_from_storage(self): DATASET = 'dataset_name' DESTINATION = 'destination_table' SOURCE_URI = 'http://example.com/source.csv' - creds = _Credentials() + creds = object() http = object() client = self._make_one(project=PROJECT, credentials=creds, http=http) dataset = client.dataset(DATASET) @@ -438,7 +438,7 @@ def test_copy_table(self): DATASET = 'dataset_name' SOURCE = 'source_table' DESTINATION = 'destination_table' - creds = _Credentials() + creds = object() http = object() client = self._make_one(project=PROJECT, credentials=creds, http=http) dataset = client.dataset(DATASET) @@ -458,7 +458,7 @@ def test_extract_table_to_storage(self): DATASET = 'dataset_name' SOURCE = 'source_table' DESTINATION = 'gs://bucket_name/object_name' - creds = _Credentials() + creds = object() http = object() client = self._make_one(project=PROJECT, credentials=creds, http=http) dataset = client.dataset(DATASET) @@ -475,7 +475,7 @@ def test_run_async_query_defaults(self): PROJECT = 'PROJECT' JOB = 'job_name' QUERY = 'select count(*) from persons' - creds = _Credentials() + creds = object() http = object() client = self._make_one(project=PROJECT, credentials=creds, http=http) job = client.run_async_query(JOB, QUERY) @@ -511,7 +511,7 @@ def test_run_async_w_query_parameters(self): PROJECT = 'PROJECT' JOB = 'job_name' QUERY = 'select count(*) from persons' - creds = _Credentials() + creds = object() http = object() client = self._make_one(project=PROJECT, credentials=creds, http=http) query_parameters = [ScalarQueryParameter('foo', 'INT64', 123)] @@ -575,19 +575,6 @@ def test_run_sync_query_w_query_parameters(self): self.assertEqual(query.query_parameters, query_parameters) -class _Credentials(object): - - _scopes = None - - @staticmethod - def create_scoped_required(): - return True - - def create_scoped(self, scope): - self._scopes = scope - return self - - class _Connection(object): def __init__(self, *responses): From fce58e170112458cef31ee8755657f524cfde796 Mon Sep 17 00:00:00 2001 From: Jon Wayne Parrott Date: Fri, 2 Dec 2016 15:58:22 -0800 Subject: [PATCH 0049/2016] Remove instances of _Credentials (#2802) --- packages/google-cloud-bigquery/unit_tests/test_client.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/packages/google-cloud-bigquery/unit_tests/test_client.py b/packages/google-cloud-bigquery/unit_tests/test_client.py index 97edcf05117e..8c76897b3f6d 100644 --- a/packages/google-cloud-bigquery/unit_tests/test_client.py +++ b/packages/google-cloud-bigquery/unit_tests/test_client.py @@ -493,7 +493,7 @@ def test_run_async_w_udf_resources(self): PROJECT = 'PROJECT' JOB = 'job_name' QUERY = 'select count(*) from persons' - creds = _Credentials() + creds = object() http = object() client = self._make_one(project=PROJECT, credentials=creds, http=http) udf_resources = [UDFResource("resourceUri", RESOURCE_URI)] @@ -528,7 +528,7 @@ def test_run_sync_query_defaults(self): from google.cloud.bigquery.query import QueryResults PROJECT = 'PROJECT' QUERY = 'select count(*) from persons' - creds = _Credentials() + creds = object() http = object() client = self._make_one(project=PROJECT, credentials=creds, http=http) query = client.run_sync_query(QUERY) @@ -545,7 +545,7 @@ def test_run_sync_query_w_udf_resources(self): RESOURCE_URI = 'gs://some-bucket/js/lib.js' PROJECT = 'PROJECT' QUERY = 'select count(*) from persons' - creds = _Credentials() + creds = object() http = object() client = self._make_one(project=PROJECT, credentials=creds, http=http) udf_resources = [UDFResource("resourceUri", RESOURCE_URI)] @@ -562,7 +562,7 @@ def test_run_sync_query_w_query_parameters(self): from google.cloud.bigquery.query import QueryResults PROJECT = 'PROJECT' QUERY = 'select count(*) from persons' - creds = _Credentials() + creds = object() http = object() client = self._make_one(project=PROJECT, credentials=creds, http=http) query_parameters = [ScalarQueryParameter('foo', 'INT64', 123)] From 339bed34a28c2cc984715f99ba60599401145dd8 Mon Sep 17 00:00:00 2001 From: Tres Seaver Date: Fri, 2 Dec 2016 19:16:12 -0500 Subject: [PATCH 0050/2016] Coerce query paramter values from native types to JSON-appropriate types. Also, distingush parsing 'DATETIME' vs. 'TIMESTAMP' properly when processing row data. Closes #2799. --- .../google/cloud/bigquery/_helpers.py | 100 +++++++++-- .../unit_tests/test__helpers.py | 163 +++++++++++++++++- .../unit_tests/test_job.py | 4 +- .../unit_tests/test_query.py | 4 +- 4 files changed, 248 insertions(+), 23 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py b/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py index 202a39ac8447..bcfc877cd0c5 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py @@ -15,9 +15,12 @@ """Shared helper functions for BigQuery API classes.""" from collections import OrderedDict +import datetime -from google.cloud._helpers import _datetime_from_microseconds from google.cloud._helpers import _date_from_iso8601_date +from google.cloud._helpers import _datetime_from_microseconds +from google.cloud._helpers import _datetime_to_rfc3339 +from google.cloud._helpers import _microseconds_from_datetime def _not_null(value, field): @@ -43,13 +46,20 @@ def _bool_from_json(value, field): return value.lower() in ['t', 'true', '1'] -def _datetime_from_json(value, field): +def _timestamp_from_json(value, field): """Coerce 'value' to a datetime, if set or not nullable.""" if _not_null(value, field): # value will be a float in seconds, to microsecond precision, in UTC. return _datetime_from_microseconds(1e6 * float(value)) +def _datetime_from_json(value, field): + """Coerce 'value' to a datetime, if set or not nullable.""" + if _not_null(value, field): + # value will be a string, in YYYY-MM-DDTHH:MM:SS form. + return datetime.datetime.strptime(value, '%Y-%m-%dT%H:%M:%S') + + def _date_from_json(value, field): """Coerce 'value' to a datetime date, if set or not nullable""" if _not_null(value, field): @@ -83,13 +93,67 @@ def _string_from_json(value, _): 'FLOAT64': _float_from_json, 'BOOLEAN': _bool_from_json, 'BOOL': _bool_from_json, - 'TIMESTAMP': _datetime_from_json, + 'TIMESTAMP': _timestamp_from_json, + 'DATETIME': _datetime_from_json, 'DATE': _date_from_json, 'RECORD': _record_from_json, 'STRING': _string_from_json, } +def _int_to_json(value): + """Coerce 'value' to an JSON-compatible representation.""" + if isinstance(value, int): + value = str(value) + return value + + +def _float_to_json(value): + """Coerce 'value' to an JSON-compatible representation.""" + return value + + +def _bool_to_json(value): + """Coerce 'value' to an JSON-compatible representation.""" + if isinstance(value, bool): + value = 'true' if value else 'false' + return value + + +def _timestamp_to_json(value): + """Coerce 'value' to an JSON-compatible representation.""" + if isinstance(value, datetime.datetime): + value = _microseconds_from_datetime(value) / 1.0e6 + return value + + +def _datetime_to_json(value): + """Coerce 'value' to an JSON-compatible representation.""" + if isinstance(value, datetime.datetime): + value = _datetime_to_rfc3339(value) + return value + + +def _date_to_json(value): + """Coerce 'value' to an JSON-compatible representation.""" + if isinstance(value, datetime.date): + value = value.isoformat() + return value + + +_SCALAR_VALUE_TO_JSON = { + 'INTEGER': _int_to_json, + 'INT64': _int_to_json, + 'FLOAT': _float_to_json, + 'FLOAT64': _float_to_json, + 'BOOLEAN': _bool_to_json, + 'BOOL': _bool_to_json, + 'TIMESTAMP': _timestamp_to_json, + 'DATETIME': _datetime_to_json, + 'DATE': _date_to_json, +} + + def _row_from_json(row, schema): """Convert JSON row data to row with appropriate types. @@ -262,8 +326,8 @@ class ScalarQueryParameter(AbstractQueryParameter): paramter can only be addressed via position (``?``). :type type_: str - :param type_: name of parameter type. One of `'STRING'`, `'INT64'`, - `'FLOAT64'`, `'BOOL'`, `'TIMESTAMP'`, or `'DATE'`. + :param type_: name of parameter type. One of 'STRING', 'INT64', + 'FLOAT64', 'BOOL', 'TIMESTAMP', 'DATETIME', or 'DATE'. :type value: str, int, float, bool, :class:`datetime.datetime`, or :class:`datetime.date`. @@ -279,8 +343,9 @@ def positional(cls, type_, value): """Factory for positional paramters. :type type_: str - :param type_: name of paramter type. One of `'STRING'`, `'INT64'`, - `'FLOAT64'`, `'BOOL'`, `'TIMESTAMP'`, or `'DATE'`. + :param type_: + name of paramter type. One of 'STRING', 'INT64', + 'FLOAT64', 'BOOL', 'TIMESTAMP', 'DATETIME', or 'DATE'. :type value: str, int, float, bool, :class:`datetime.datetime`, or :class:`datetime.date`. @@ -313,12 +378,16 @@ def to_api_repr(self): :rtype: dict :returns: JSON mapping """ + value = self.value + converter = _SCALAR_VALUE_TO_JSON.get(self.type_) + if converter is not None: + value = converter(value) resource = { 'parameterType': { 'type': self.type_, }, 'parameterValue': { - 'value': self.value, + 'value': value, }, } if self.name is not None: @@ -386,12 +455,16 @@ def to_api_repr(self): :rtype: dict :returns: JSON mapping """ + values = self.values + converter = _SCALAR_VALUE_TO_JSON.get(self.array_type) + if converter is not None: + values = [converter(value) for value in values] resource = { 'parameterType': { 'arrayType': self.array_type, }, 'parameterValue': { - 'arrayValues': self.values, + 'arrayValues': values, }, } if self.name is not None: @@ -458,12 +531,19 @@ def to_api_repr(self): {'name': key, 'type': value} for key, value in self.struct_types.items() ] + values = {} + for name, value in self.struct_values.items(): + converter = _SCALAR_VALUE_TO_JSON.get(self.struct_types[name]) + if converter is not None: + value = converter(value) + values[name] = value + resource = { 'parameterType': { 'structTypes': types, }, 'parameterValue': { - 'structValues': self.struct_values, + 'structValues': values, }, } if self.name is not None: diff --git a/packages/google-cloud-bigquery/unit_tests/test__helpers.py b/packages/google-cloud-bigquery/unit_tests/test__helpers.py index b133e95d45a7..35c256e4ce43 100644 --- a/packages/google-cloud-bigquery/unit_tests/test__helpers.py +++ b/packages/google-cloud-bigquery/unit_tests/test__helpers.py @@ -105,11 +105,11 @@ def test_w_value_other(self): self.assertFalse(coerced) -class Test_datetime_from_json(unittest.TestCase): +class Test_timestamp_from_json(unittest.TestCase): def _call_fut(self, value, field): - from google.cloud.bigquery._helpers import _datetime_from_json - return _datetime_from_json(value, field) + from google.cloud.bigquery._helpers import _timestamp_from_json + return _timestamp_from_json(value, field) def test_w_none_nullable(self): self.assertIsNone(self._call_fut(None, _Field('NULLABLE'))) @@ -135,6 +135,27 @@ def test_w_float_value(self): _EPOCH + datetime.timedelta(seconds=1, microseconds=234567)) +class Test_datetime_from_json(unittest.TestCase): + + def _call_fut(self, value, field): + from google.cloud.bigquery._helpers import _datetime_from_json + return _datetime_from_json(value, field) + + def test_w_none_nullable(self): + self.assertIsNone(self._call_fut(None, _Field('NULLABLE'))) + + def test_w_none_required(self): + with self.assertRaises(TypeError): + self._call_fut(None, _Field('REQUIRED')) + + def test_w_string_value(self): + import datetime + coerced = self._call_fut('2016-12-02T18:51:33', object()) + self.assertEqual( + coerced, + datetime.datetime(2016, 12, 2, 18, 51, 33)) + + class Test_date_from_json(unittest.TestCase): def _call_fut(self, value, field): @@ -668,7 +689,7 @@ def test_to_api_repr_w_name(self): 'type': 'INT64', }, 'parameterValue': { - 'value': 123, + 'value': '123', }, } param = self._make_one(name='foo', type_='INT64', value=123) @@ -680,13 +701,137 @@ def test_to_api_repr_wo_name(self): 'type': 'INT64', }, 'parameterValue': { - 'value': 123, + 'value': '123', }, } klass = self._get_target_class() param = klass.positional(type_='INT64', value=123) self.assertEqual(param.to_api_repr(), EXPECTED) + def test_to_api_repr_w_float(self): + EXPECTED = { + 'parameterType': { + 'type': 'FLOAT64', + }, + 'parameterValue': { + 'value': 12.345, + }, + } + klass = self._get_target_class() + param = klass.positional(type_='FLOAT64', value=12.345) + self.assertEqual(param.to_api_repr(), EXPECTED) + + def test_to_api_repr_w_bool(self): + EXPECTED = { + 'parameterType': { + 'type': 'BOOL', + }, + 'parameterValue': { + 'value': 'false', + }, + } + klass = self._get_target_class() + param = klass.positional(type_='BOOL', value=False) + self.assertEqual(param.to_api_repr(), EXPECTED) + + def test_to_api_repr_w_timestamp_datetime(self): + import datetime + from google.cloud._helpers import _microseconds_from_datetime + now = datetime.datetime.utcnow() + seconds = _microseconds_from_datetime(now) / 1.0e6 + EXPECTED = { + 'parameterType': { + 'type': 'TIMESTAMP', + }, + 'parameterValue': { + 'value': seconds, + }, + } + klass = self._get_target_class() + param = klass.positional(type_='TIMESTAMP', value=now) + self.assertEqual(param.to_api_repr(), EXPECTED) + + def test_to_api_repr_w_timestamp_micros(self): + import datetime + from google.cloud._helpers import _microseconds_from_datetime + now = datetime.datetime.utcnow() + seconds = _microseconds_from_datetime(now) / 1.0e6 + EXPECTED = { + 'parameterType': { + 'type': 'TIMESTAMP', + }, + 'parameterValue': { + 'value': seconds, + }, + } + klass = self._get_target_class() + param = klass.positional(type_='TIMESTAMP', value=seconds) + self.assertEqual(param.to_api_repr(), EXPECTED) + + def test_to_api_repr_w_datetime_datetime(self): + import datetime + from google.cloud._helpers import _datetime_to_rfc3339 + now = datetime.datetime.utcnow() + EXPECTED = { + 'parameterType': { + 'type': 'DATETIME', + }, + 'parameterValue': { + 'value': _datetime_to_rfc3339(now), + }, + } + klass = self._get_target_class() + param = klass.positional(type_='DATETIME', value=now) + self.assertEqual(param.to_api_repr(), EXPECTED) + + def test_to_api_repr_w_datetime_string(self): + import datetime + from google.cloud._helpers import _datetime_to_rfc3339 + now = datetime.datetime.utcnow() + now_str = _datetime_to_rfc3339(now) + EXPECTED = { + 'parameterType': { + 'type': 'DATETIME', + }, + 'parameterValue': { + 'value': now_str, + }, + } + klass = self._get_target_class() + param = klass.positional(type_='DATETIME', value=now_str) + self.assertEqual(param.to_api_repr(), EXPECTED) + + def test_to_api_repr_w_date_date(self): + import datetime + today = datetime.date.today() + EXPECTED = { + 'parameterType': { + 'type': 'DATE', + }, + 'parameterValue': { + 'value': today.isoformat(), + }, + } + klass = self._get_target_class() + param = klass.positional(type_='DATE', value=today) + self.assertEqual(param.to_api_repr(), EXPECTED) + + def test_to_api_repr_w_date_string(self): + import datetime + today = datetime.date.today() + today_str = today.isoformat(), + EXPECTED = { + 'parameterType': { + 'type': 'DATE', + }, + 'parameterValue': { + 'value': today_str, + }, + } + klass = self._get_target_class() + param = klass.positional(type_='DATE', value=today_str) + self.assertEqual(param.to_api_repr(), EXPECTED) + class Test_ArrayQueryParameter(unittest.TestCase): @@ -749,7 +894,7 @@ def test_to_api_repr_w_name(self): 'arrayType': 'INT64', }, 'parameterValue': { - 'arrayValues': [1, 2], + 'arrayValues': ['1', '2'], }, } param = self._make_one(name='foo', array_type='INT64', values=[1, 2]) @@ -761,7 +906,7 @@ def test_to_api_repr_wo_name(self): 'arrayType': 'INT64', }, 'parameterValue': { - 'arrayValues': [1, 2], + 'arrayValues': ['1', '2'], }, } klass = self._get_target_class() @@ -848,7 +993,7 @@ def test_to_api_repr_w_name(self): ], }, 'parameterValue': { - 'structValues': {'bar': 123, 'baz': 'abc'}, + 'structValues': {'bar': '123', 'baz': 'abc'}, }, } sub_1 = self._make_subparam('bar', 'INT64', 123) @@ -865,7 +1010,7 @@ def test_to_api_repr_wo_name(self): ], }, 'parameterValue': { - 'structValues': {'bar': 123, 'baz': 'abc'}, + 'structValues': {'bar': '123', 'baz': 'abc'}, }, } sub_1 = self._make_subparam('bar', 'INT64', 123) diff --git a/packages/google-cloud-bigquery/unit_tests/test_job.py b/packages/google-cloud-bigquery/unit_tests/test_job.py index 84ee25418491..a451e0040931 100644 --- a/packages/google-cloud-bigquery/unit_tests/test_job.py +++ b/packages/google-cloud-bigquery/unit_tests/test_job.py @@ -1624,7 +1624,7 @@ def test_begin_w_named_query_parameter(self): 'type': 'INT64', }, 'parameterValue': { - 'value': 123, + 'value': '123', }, }, ] @@ -1674,7 +1674,7 @@ def test_begin_w_positional_query_parameter(self): 'type': 'INT64', }, 'parameterValue': { - 'value': 123, + 'value': '123', }, }, ] diff --git a/packages/google-cloud-bigquery/unit_tests/test_query.py b/packages/google-cloud-bigquery/unit_tests/test_query.py index 3dfc795a6072..fa8dd401e435 100644 --- a/packages/google-cloud-bigquery/unit_tests/test_query.py +++ b/packages/google-cloud-bigquery/unit_tests/test_query.py @@ -419,7 +419,7 @@ def test_run_w_named_query_paramter(self): 'type': 'INT64', }, 'parameterValue': { - 'value': 123, + 'value': '123', }, }, ] @@ -453,7 +453,7 @@ def test_run_w_positional_query_paramter(self): 'type': 'INT64', }, 'parameterValue': { - 'value': 123, + 'value': '123', }, }, ] From 4b1599ab3895350f4b9aa0b0e46dccb3045d19b2 Mon Sep 17 00:00:00 2001 From: Tres Seaver Date: Sat, 3 Dec 2016 14:20:52 -0500 Subject: [PATCH 0051/2016] Coverage. --- .../unit_tests/test__helpers.py | 116 ++++++++++++++++++ 1 file changed, 116 insertions(+) diff --git a/packages/google-cloud-bigquery/unit_tests/test__helpers.py b/packages/google-cloud-bigquery/unit_tests/test__helpers.py index 35c256e4ce43..b3ccb1d715f5 100644 --- a/packages/google-cloud-bigquery/unit_tests/test__helpers.py +++ b/packages/google-cloud-bigquery/unit_tests/test__helpers.py @@ -432,6 +432,96 @@ def test_w_int64_float64_bool(self): self.assertEqual(coerced, expected) +class Test_int_to_json(unittest.TestCase): + + def _call_fut(self, value): + from google.cloud.bigquery._helpers import _int_to_json + return _int_to_json(value) + + def test_w_int(self): + self.assertEqual(self._call_fut(123), '123') + + def test_w_string(self): + self.assertEqual(self._call_fut('123'), '123') + + +class Test_float_to_json(unittest.TestCase): + + def _call_fut(self, value): + from google.cloud.bigquery._helpers import _float_to_json + return _float_to_json(value) + + def test_w_float(self): + self.assertEqual(self._call_fut(1.23), 1.23) + + +class Test_bool_to_json(unittest.TestCase): + + def _call_fut(self, value): + from google.cloud.bigquery._helpers import _bool_to_json + return _bool_to_json(value) + + def test_w_true(self): + self.assertEqual(self._call_fut(True), 'true') + + def test_w_false(self): + self.assertEqual(self._call_fut(False), 'false') + + def test_w_string(self): + self.assertEqual(self._call_fut('false'), 'false') + + +class Test_timestamp_to_json(unittest.TestCase): + + def _call_fut(self, value): + from google.cloud.bigquery._helpers import _timestamp_to_json + return _timestamp_to_json(value) + + def test_w_float(self): + self.assertEqual(self._call_fut(1.234567), 1.234567) + + def test_w_datetime(self): + import datetime + from google.cloud._helpers import UTC + from google.cloud._helpers import _microseconds_from_datetime + when = datetime.datetime(2016, 12, 3, 14, 11, 27, tzinfo=UTC) + self.assertEqual(self._call_fut(when), + _microseconds_from_datetime(when) / 1e6) + + +class Test_datetime_to_json(unittest.TestCase): + + def _call_fut(self, value): + from google.cloud.bigquery._helpers import _datetime_to_json + return _datetime_to_json(value) + + def test_w_string(self): + RFC3339 = '2016-12-03T14:14:51Z' + self.assertEqual(self._call_fut(RFC3339), RFC3339) + + def test_w_datetime(self): + import datetime + from google.cloud._helpers import UTC + when = datetime.datetime(2016, 12, 3, 14, 11, 27, 123456, tzinfo=UTC) + self.assertEqual(self._call_fut(when), '2016-12-03T14:11:27.123456Z') + + +class Test_date_to_json(unittest.TestCase): + + def _call_fut(self, value): + from google.cloud.bigquery._helpers import _date_to_json + return _date_to_json(value) + + def test_w_string(self): + RFC3339 = '2016-12-03' + self.assertEqual(self._call_fut(RFC3339), RFC3339) + + def test_w_datetime(self): + import datetime + when = datetime.date(2016, 12, 3) + self.assertEqual(self._call_fut(when), '2016-12-03') + + class Test_ConfigurationProperty(unittest.TestCase): @staticmethod @@ -832,6 +922,19 @@ def test_to_api_repr_w_date_string(self): param = klass.positional(type_='DATE', value=today_str) self.assertEqual(param.to_api_repr(), EXPECTED) + def test_to_api_repr_w_unknown_type(self): + EXPECTED = { + 'parameterType': { + 'type': 'UNKNOWN', + }, + 'parameterValue': { + 'value': 'unknown', + }, + } + klass = self._get_target_class() + param = klass.positional(type_='UNKNOWN', value='unknown') + self.assertEqual(param.to_api_repr(), EXPECTED) + class Test_ArrayQueryParameter(unittest.TestCase): @@ -913,6 +1016,19 @@ def test_to_api_repr_wo_name(self): param = klass.positional(array_type='INT64', values=[1, 2]) self.assertEqual(param.to_api_repr(), EXPECTED) + def test_to_api_repr_w_unknown_type(self): + EXPECTED = { + 'parameterType': { + 'arrayType': 'UNKNOWN', + }, + 'parameterValue': { + 'arrayValues': ['unknown'], + }, + } + klass = self._get_target_class() + param = klass.positional(array_type='UNKNOWN', values=['unknown']) + self.assertEqual(param.to_api_repr(), EXPECTED) + class Test_StructQueryParameter(unittest.TestCase): From 61922932747737d2e948dd302d424c1de21517cd Mon Sep 17 00:00:00 2001 From: Tres Seaver Date: Sat, 3 Dec 2016 14:24:48 -0500 Subject: [PATCH 0052/2016] Use named strptime template from 'google.cloud._helpers'. Addresses: https://github.com/GoogleCloudPlatform/google-cloud-python/pull/2803#pullrequestreview-11265971 --- .../google-cloud-bigquery/google/cloud/bigquery/_helpers.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py b/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py index bcfc877cd0c5..8cebe9fbec01 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py @@ -21,6 +21,7 @@ from google.cloud._helpers import _datetime_from_microseconds from google.cloud._helpers import _datetime_to_rfc3339 from google.cloud._helpers import _microseconds_from_datetime +from google.cloud._helpers import _RFC3339_NO_FRACTION def _not_null(value, field): @@ -57,7 +58,7 @@ def _datetime_from_json(value, field): """Coerce 'value' to a datetime, if set or not nullable.""" if _not_null(value, field): # value will be a string, in YYYY-MM-DDTHH:MM:SS form. - return datetime.datetime.strptime(value, '%Y-%m-%dT%H:%M:%S') + return datetime.datetime.strptime(value, _RFC3339_NO_FRACTION) def _date_from_json(value, field): From 6079be616b9d58e7f5abbfa135e778ccb0a29711 Mon Sep 17 00:00:00 2001 From: Tres Seaver Date: Mon, 5 Dec 2016 11:15:01 -0500 Subject: [PATCH 0053/2016] Allow timestamp strings in 'Table.insert_data'. Closes #1382. --- .../google-cloud-bigquery/google/cloud/bigquery/table.py | 9 +++++++-- packages/google-cloud-bigquery/unit_tests/test_table.py | 9 +++++---- 2 files changed, 12 insertions(+), 6 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py index f8b52f772f87..f9c6a970fa18 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py @@ -738,15 +738,20 @@ def insert_data(self, rows_info = [] data = {'rows': rows_info} + def _convert_timestamp(value): + if isinstance(value, datetime.datetime): + value = _microseconds_from_datetime(value) * 1e-6 + return value + for index, row in enumerate(rows): row_info = {} for field, value in zip(self._schema, row): - if field.field_type == 'TIMESTAMP' and value is not None: + if field.field_type == 'TIMESTAMP': # BigQuery stores TIMESTAMP data internally as a # UNIX timestamp with microsecond precision. # Specifies the number of seconds since the epoch. - value = _microseconds_from_datetime(value) * 1e-6 + value = _convert_timestamp(value) row_info[field.name] = value info = {'json': row_info} diff --git a/packages/google-cloud-bigquery/unit_tests/test_table.py b/packages/google-cloud-bigquery/unit_tests/test_table.py index 9fcea12d2dce..0b8453103082 100644 --- a/packages/google-cloud-bigquery/unit_tests/test_table.py +++ b/packages/google-cloud-bigquery/unit_tests/test_table.py @@ -1296,6 +1296,7 @@ def test_insert_data_wo_schema(self): def test_insert_data_w_bound_client(self): import datetime from google.cloud._helpers import UTC + from google.cloud._helpers import _datetime_to_rfc3339 from google.cloud._helpers import _microseconds_from_datetime from google.cloud.bigquery.table import SchemaField @@ -1313,16 +1314,16 @@ def test_insert_data_w_bound_client(self): table = self._make_one(self.TABLE_NAME, dataset=dataset, schema=[full_name, age, joined]) ROWS = [ - ('Phred Phlyntstone', 32, WHEN), + ('Phred Phlyntstone', 32, _datetime_to_rfc3339(WHEN)), ('Bharney Rhubble', 33, WHEN + datetime.timedelta(seconds=1)), ('Wylma Phlyntstone', 29, WHEN + datetime.timedelta(seconds=2)), ('Bhettye Rhubble', 27, None), ] def _row_data(row): - joined = None - if row[2] is not None: - joined = _microseconds_from_datetime(row[2]) * 1e-6 + joined = row[2] + if isinstance(row[2], datetime.datetime): + joined = _microseconds_from_datetime(joined) * 1e-6 return {'full_name': row[0], 'age': row[1], 'joined': joined} From 8da9f509bbfc1cd7d060baf07012421effdd3dc7 Mon Sep 17 00:00:00 2001 From: Tres Seaver Date: Mon, 5 Dec 2016 11:43:26 -0500 Subject: [PATCH 0054/2016] Reorder to match docs. --- .../google/cloud/bigquery/_helpers.py | 12 +++---- .../unit_tests/test__helpers.py | 34 +++++++++---------- 2 files changed, 23 insertions(+), 23 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py b/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py index 8cebe9fbec01..7d2fbc504c4f 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py @@ -47,6 +47,11 @@ def _bool_from_json(value, field): return value.lower() in ['t', 'true', '1'] +def _string_from_json(value, _): + """NOOP string -> string coercion""" + return value + + def _timestamp_from_json(value, field): """Coerce 'value' to a datetime, if set or not nullable.""" if _not_null(value, field): @@ -82,11 +87,6 @@ def _record_from_json(value, field): return record -def _string_from_json(value, _): - """NOOP string -> string coercion""" - return value - - _CELLDATA_FROM_JSON = { 'INTEGER': _int_from_json, 'INT64': _int_from_json, @@ -94,11 +94,11 @@ def _string_from_json(value, _): 'FLOAT64': _float_from_json, 'BOOLEAN': _bool_from_json, 'BOOL': _bool_from_json, + 'STRING': _string_from_json, 'TIMESTAMP': _timestamp_from_json, 'DATETIME': _datetime_from_json, 'DATE': _date_from_json, 'RECORD': _record_from_json, - 'STRING': _string_from_json, } diff --git a/packages/google-cloud-bigquery/unit_tests/test__helpers.py b/packages/google-cloud-bigquery/unit_tests/test__helpers.py index b3ccb1d715f5..60720dfce56c 100644 --- a/packages/google-cloud-bigquery/unit_tests/test__helpers.py +++ b/packages/google-cloud-bigquery/unit_tests/test__helpers.py @@ -105,6 +105,23 @@ def test_w_value_other(self): self.assertFalse(coerced) +class Test_string_from_json(unittest.TestCase): + + def _call_fut(self, value, field): + from google.cloud.bigquery._helpers import _string_from_json + return _string_from_json(value, field) + + def test_w_none_nullable(self): + self.assertIsNone(self._call_fut(None, _Field('NULLABLE'))) + + def test_w_none_required(self): + self.assertIsNone(self._call_fut(None, _Field('RECORD'))) + + def test_w_string_value(self): + coerced = self._call_fut('Wonderful!', object()) + self.assertEqual(coerced, 'Wonderful!') + + class Test_timestamp_from_json(unittest.TestCase): def _call_fut(self, value, field): @@ -238,23 +255,6 @@ def test_w_record_subfield(self): self.assertEqual(coerced, expected) -class Test_string_from_json(unittest.TestCase): - - def _call_fut(self, value, field): - from google.cloud.bigquery._helpers import _string_from_json - return _string_from_json(value, field) - - def test_w_none_nullable(self): - self.assertIsNone(self._call_fut(None, _Field('NULLABLE'))) - - def test_w_none_required(self): - self.assertIsNone(self._call_fut(None, _Field('RECORD'))) - - def test_w_string_value(self): - coerced = self._call_fut('Wonderful!', object()) - self.assertEqual(coerced, 'Wonderful!') - - class Test_row_from_json(unittest.TestCase): def _call_fut(self, row, schema): From a0daf1411aaedf7b63d27d3268474ceb31d2b3d8 Mon Sep 17 00:00:00 2001 From: Tres Seaver Date: Mon, 5 Dec 2016 11:51:11 -0500 Subject: [PATCH 0055/2016] Fix field mode for test. --- packages/google-cloud-bigquery/unit_tests/test__helpers.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/unit_tests/test__helpers.py b/packages/google-cloud-bigquery/unit_tests/test__helpers.py index 60720dfce56c..f1cd7b6deb32 100644 --- a/packages/google-cloud-bigquery/unit_tests/test__helpers.py +++ b/packages/google-cloud-bigquery/unit_tests/test__helpers.py @@ -115,7 +115,7 @@ def test_w_none_nullable(self): self.assertIsNone(self._call_fut(None, _Field('NULLABLE'))) def test_w_none_required(self): - self.assertIsNone(self._call_fut(None, _Field('RECORD'))) + self.assertIsNone(self._call_fut(None, _Field('REQUIRED'))) def test_w_string_value(self): coerced = self._call_fut('Wonderful!', object()) From 0c83fc4bc0bbd339e545d0cbd1a6d9d6f3622756 Mon Sep 17 00:00:00 2001 From: Tres Seaver Date: Mon, 5 Dec 2016 11:59:03 -0500 Subject: [PATCH 0056/2016] Add support for 'BYTES' columns / parameters. --- .../google/cloud/bigquery/_helpers.py | 16 ++++++++ .../unit_tests/test__helpers.py | 38 +++++++++++++++++++ 2 files changed, 54 insertions(+) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py b/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py index 7d2fbc504c4f..b162b4ddad28 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py @@ -14,6 +14,7 @@ """Shared helper functions for BigQuery API classes.""" +import base64 from collections import OrderedDict import datetime @@ -52,6 +53,12 @@ def _string_from_json(value, _): return value +def _bytes_from_json(value, field): + """Base64-decode value""" + if _not_null(value, field): + return base64.decodestring(value) + + def _timestamp_from_json(value, field): """Coerce 'value' to a datetime, if set or not nullable.""" if _not_null(value, field): @@ -95,6 +102,7 @@ def _record_from_json(value, field): 'BOOLEAN': _bool_from_json, 'BOOL': _bool_from_json, 'STRING': _string_from_json, + 'BYTES': _bytes_from_json, 'TIMESTAMP': _timestamp_from_json, 'DATETIME': _datetime_from_json, 'DATE': _date_from_json, @@ -121,6 +129,13 @@ def _bool_to_json(value): return value +def _bytes_to_json(value): + """Coerce 'value' to an JSON-compatible representation.""" + if isinstance(value, bytes): + value = base64.encodestring(value) + return value + + def _timestamp_to_json(value): """Coerce 'value' to an JSON-compatible representation.""" if isinstance(value, datetime.datetime): @@ -149,6 +164,7 @@ def _date_to_json(value): 'FLOAT64': _float_to_json, 'BOOLEAN': _bool_to_json, 'BOOL': _bool_to_json, + 'BYTES': _bytes_to_json, 'TIMESTAMP': _timestamp_to_json, 'DATETIME': _datetime_to_json, 'DATE': _date_to_json, diff --git a/packages/google-cloud-bigquery/unit_tests/test__helpers.py b/packages/google-cloud-bigquery/unit_tests/test__helpers.py index f1cd7b6deb32..c60ea7a41d27 100644 --- a/packages/google-cloud-bigquery/unit_tests/test__helpers.py +++ b/packages/google-cloud-bigquery/unit_tests/test__helpers.py @@ -122,6 +122,27 @@ def test_w_string_value(self): self.assertEqual(coerced, 'Wonderful!') +class Test_bytes_from_json(unittest.TestCase): + + def _call_fut(self, value, field): + from google.cloud.bigquery._helpers import _bytes_from_json + return _bytes_from_json(value, field) + + def test_w_none_nullable(self): + self.assertIsNone(self._call_fut(None, _Field('NULLABLE'))) + + def test_w_none_required(self): + with self.assertRaises(TypeError): + self._call_fut(None, _Field('REQUIRED')) + + def test_w_base64_encoded_value(self): + import base64 + expected = 'Wonderful!' + encoded = base64.encodestring(expected) + coerced = self._call_fut(encoded, object()) + self.assertEqual(coerced, expected) + + class Test_timestamp_from_json(unittest.TestCase): def _call_fut(self, value, field): @@ -471,6 +492,23 @@ def test_w_string(self): self.assertEqual(self._call_fut('false'), 'false') +class Test_bytes_to_json(unittest.TestCase): + + def _call_fut(self, value): + from google.cloud.bigquery._helpers import _bytes_to_json + return _bytes_to_json(value) + + def test_w_non_bytes(self): + non_bytes = object() + self.assertIs(self._call_fut(non_bytes), non_bytes) + + def test_w_bytes(self): + import base64 + source = b'source' + expected = base64.encodestring(source) + self.assertEqual(self._call_fut(source), expected) + + class Test_timestamp_to_json(unittest.TestCase): def _call_fut(self, value): From d4d11d8a88e8fc16836560b52467450c98c12807 Mon Sep 17 00:00:00 2001 From: Tres Seaver Date: Mon, 5 Dec 2016 12:24:06 -0500 Subject: [PATCH 0057/2016] Add support for 'TIME' columns / parameters. --- .../google/cloud/bigquery/_helpers.py | 18 +++++++++ .../unit_tests/test__helpers.py | 37 +++++++++++++++++++ 2 files changed, 55 insertions(+) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py b/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py index b162b4ddad28..44183a3a3f2b 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py @@ -23,6 +23,7 @@ from google.cloud._helpers import _datetime_to_rfc3339 from google.cloud._helpers import _microseconds_from_datetime from google.cloud._helpers import _RFC3339_NO_FRACTION +from google.cloud._helpers import _time_from_iso8601_time_naive def _not_null(value, field): @@ -76,9 +77,17 @@ def _datetime_from_json(value, field): def _date_from_json(value, field): """Coerce 'value' to a datetime date, if set or not nullable""" if _not_null(value, field): + # value will be a string, in YYYY-MM-DD form. return _date_from_iso8601_date(value) +def _time_from_json(value, field): + """Coerce 'value' to a datetime date, if set or not nullable""" + if _not_null(value, field): + # value will be a string, in HH:MM:SS form. + return _time_from_iso8601_time_naive(value) + + def _record_from_json(value, field): """Coerce 'value' to a mapping, if set or not nullable.""" if _not_null(value, field): @@ -106,6 +115,7 @@ def _record_from_json(value, field): 'TIMESTAMP': _timestamp_from_json, 'DATETIME': _datetime_from_json, 'DATE': _date_from_json, + 'TIME': _time_from_json, 'RECORD': _record_from_json, } @@ -157,6 +167,13 @@ def _date_to_json(value): return value +def _time_to_json(value): + """Coerce 'value' to an JSON-compatible representation.""" + if isinstance(value, datetime.time): + value = value.isoformat() + return value + + _SCALAR_VALUE_TO_JSON = { 'INTEGER': _int_to_json, 'INT64': _int_to_json, @@ -168,6 +185,7 @@ def _date_to_json(value): 'TIMESTAMP': _timestamp_to_json, 'DATETIME': _datetime_to_json, 'DATE': _date_to_json, + 'TIME': _time_to_json, } diff --git a/packages/google-cloud-bigquery/unit_tests/test__helpers.py b/packages/google-cloud-bigquery/unit_tests/test__helpers.py index c60ea7a41d27..5dc27c1c06ac 100644 --- a/packages/google-cloud-bigquery/unit_tests/test__helpers.py +++ b/packages/google-cloud-bigquery/unit_tests/test__helpers.py @@ -215,6 +215,27 @@ def test_w_string_value(self): datetime.date(1987, 9, 22)) +class Test_time_from_json(unittest.TestCase): + + def _call_fut(self, value, field): + from google.cloud.bigquery._helpers import _time_from_json + return _time_from_json(value, field) + + def test_w_none_nullable(self): + self.assertIsNone(self._call_fut(None, _Field('NULLABLE'))) + + def test_w_none_required(self): + with self.assertRaises(TypeError): + self._call_fut(None, _Field('REQUIRED')) + + def test_w_string_value(self): + import datetime + coerced = self._call_fut('12:12:27', object()) + self.assertEqual( + coerced, + datetime.time(12, 12, 27)) + + class Test_record_from_json(unittest.TestCase): def _call_fut(self, value, field): @@ -560,6 +581,22 @@ def test_w_datetime(self): self.assertEqual(self._call_fut(when), '2016-12-03') +class Test_time_to_json(unittest.TestCase): + + def _call_fut(self, value): + from google.cloud.bigquery._helpers import _time_to_json + return _time_to_json(value) + + def test_w_string(self): + RFC3339 = '12:13:41' + self.assertEqual(self._call_fut(RFC3339), RFC3339) + + def test_w_datetime(self): + import datetime + when = datetime.time(12, 13, 41) + self.assertEqual(self._call_fut(when), '12:13:41') + + class Test_ConfigurationProperty(unittest.TestCase): @staticmethod From ea9cae7f94f35a694e7711b3aa059b2cc9f58251 Mon Sep 17 00:00:00 2001 From: Tres Seaver Date: Mon, 5 Dec 2016 12:52:59 -0500 Subject: [PATCH 0058/2016] Move '_convert_timestamp' helper out to module scope. Addresses: https://github.com/GoogleCloudPlatform/google-cloud-python/pull/2805#discussion_r90912170 --- .../google/cloud/bigquery/table.py | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py index f9c6a970fa18..18f83c596f7d 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py @@ -738,11 +738,6 @@ def insert_data(self, rows_info = [] data = {'rows': rows_info} - def _convert_timestamp(value): - if isinstance(value, datetime.datetime): - value = _microseconds_from_datetime(value) * 1e-6 - return value - for index, row in enumerate(rows): row_info = {} @@ -1134,3 +1129,10 @@ class _UrlBuilder(object): def __init__(self): self.query_params = {} self._relative_path = '' + + +def _convert_timestamp(value): + """Helper for :meth:`Table.insert_data`.""" + if isinstance(value, datetime.datetime): + value = _microseconds_from_datetime(value) * 1e-6 + return value From 8c969ff276e5f4e28324d4d970ef5e6c823fa7f3 Mon Sep 17 00:00:00 2001 From: Tres Seaver Date: Mon, 5 Dec 2016 13:23:55 -0500 Subject: [PATCH 0059/2016] Fix base64 encodeing of test value on Py3k. --- packages/google-cloud-bigquery/unit_tests/test__helpers.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/unit_tests/test__helpers.py b/packages/google-cloud-bigquery/unit_tests/test__helpers.py index 5dc27c1c06ac..76eacc9a04a8 100644 --- a/packages/google-cloud-bigquery/unit_tests/test__helpers.py +++ b/packages/google-cloud-bigquery/unit_tests/test__helpers.py @@ -137,7 +137,7 @@ def test_w_none_required(self): def test_w_base64_encoded_value(self): import base64 - expected = 'Wonderful!' + expected = b'Wonderful!' encoded = base64.encodestring(expected) coerced = self._call_fut(encoded, object()) self.assertEqual(coerced, expected) From c001c0fd510327a57a8a220cfb93bf521deb905e Mon Sep 17 00:00:00 2001 From: Tres Seaver Date: Mon, 5 Dec 2016 13:46:53 -0500 Subject: [PATCH 0060/2016] Allow overriding dataset's project during construction. Closes #2118. --- .../google/cloud/bigquery/client.py | 8 +++- .../google/cloud/bigquery/dataset.py | 9 ++++- .../unit_tests/test_client.py | 17 ++++++++- .../unit_tests/test_dataset.py | 37 +++++++++++++++++-- 4 files changed, 62 insertions(+), 9 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py index 4af5b8fc4910..0c01578f5ffe 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py @@ -129,16 +129,20 @@ def list_datasets(self, include_all=False, max_results=None, items_key='datasets', page_token=page_token, max_results=max_results, extra_params=extra_params) - def dataset(self, dataset_name): + def dataset(self, dataset_name, project=None): """Construct a dataset bound to this client. :type dataset_name: str :param dataset_name: Name of the dataset. + :type project: str + :param project: (Optional) project ID for the dataset (defaults to + the project of the client). + :rtype: :class:`google.cloud.bigquery.dataset.Dataset` :returns: a new ``Dataset`` instance """ - return Dataset(dataset_name, client=self) + return Dataset(dataset_name, client=self, project=project) def job_from_resource(self, resource): """Detect correct job type from resource and instantiate. diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/dataset.py b/packages/google-cloud-bigquery/google/cloud/bigquery/dataset.py index e209433b5e10..6f28bd791b05 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/dataset.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/dataset.py @@ -101,16 +101,21 @@ class Dataset(object): :type access_grants: list of :class:`AccessGrant` :param access_grants: roles granted to entities for this dataset + + :type project: str + :param project: (Optional) project ID for the dataset (defaults to + the project of the client). """ _access_grants = None - def __init__(self, name, client, access_grants=()): + def __init__(self, name, client, access_grants=(), project=None): self.name = name self._client = client self._properties = {} # Let the @property do validation. self.access_grants = access_grants + self._project = project @property def project(self): @@ -119,7 +124,7 @@ def project(self): :rtype: str :returns: the project (derived from the client). """ - return self._client.project + return self._project or self._client.project @property def path(self): diff --git a/packages/google-cloud-bigquery/unit_tests/test_client.py b/packages/google-cloud-bigquery/unit_tests/test_client.py index 8c76897b3f6d..152714839ebd 100644 --- a/packages/google-cloud-bigquery/unit_tests/test_client.py +++ b/packages/google-cloud-bigquery/unit_tests/test_client.py @@ -176,7 +176,7 @@ def test_list_datasets_explicit_response_missing_datasets_key(self): self.assertEqual(req['query_params'], {'all': True, 'maxResults': 3, 'pageToken': TOKEN}) - def test_dataset(self): + def test_dataset_defaults(self): from google.cloud.bigquery.dataset import Dataset PROJECT = 'PROJECT' DATASET = 'dataset_name' @@ -187,6 +187,21 @@ def test_dataset(self): self.assertIsInstance(dataset, Dataset) self.assertEqual(dataset.name, DATASET) self.assertIs(dataset._client, client) + self.assertEqual(dataset.project, PROJECT) + + def test_dataset_explicit(self): + from google.cloud.bigquery.dataset import Dataset + PROJECT = 'my-project-123' + OTHER_PROJECT = 'other-project-456' + DATASET = 'dataset_name' + creds = object() + http = object() + client = self._make_one(project=PROJECT, credentials=creds, http=http) + dataset = client.dataset(DATASET, project=OTHER_PROJECT) + self.assertIsInstance(dataset, Dataset) + self.assertEqual(dataset.name, DATASET) + self.assertIs(dataset._client, client) + self.assertEqual(dataset.project, OTHER_PROJECT) def test_job_from_resource_unknown_type(self): PROJECT = 'PROJECT' diff --git a/packages/google-cloud-bigquery/unit_tests/test_dataset.py b/packages/google-cloud-bigquery/unit_tests/test_dataset.py index ec7c56722368..015b160687d9 100644 --- a/packages/google-cloud-bigquery/unit_tests/test_dataset.py +++ b/packages/google-cloud-bigquery/unit_tests/test_dataset.py @@ -174,7 +174,7 @@ def _verifyResourceProperties(self, dataset, resource): else: self.assertEqual(dataset.access_grants, []) - def test_ctor(self): + def test_ctor_defaults(self): client = _Client(self.PROJECT) dataset = self._make_one(self.DS_NAME, client) self.assertEqual(dataset.name, self.DS_NAME) @@ -196,13 +196,42 @@ def test_ctor(self): self.assertIsNone(dataset.friendly_name) self.assertIsNone(dataset.location) - def test_access_roles_setter_non_list(self): + def test_ctor_explicit(self): + from google.cloud.bigquery.dataset import AccessGrant + phred = AccessGrant('OWNER', 'userByEmail', 'phred@example.com') + bharney = AccessGrant('OWNER', 'userByEmail', 'bharney@example.com') + grants = [phred, bharney] + OTHER_PROJECT = 'foo-bar-123' + client = _Client(self.PROJECT) + dataset = self._make_one(self.DS_NAME, client, + access_grants=grants, + project=OTHER_PROJECT) + self.assertEqual(dataset.name, self.DS_NAME) + self.assertIs(dataset._client, client) + self.assertEqual(dataset.project, OTHER_PROJECT) + self.assertEqual( + dataset.path, + '/projects/%s/datasets/%s' % (OTHER_PROJECT, self.DS_NAME)) + self.assertEqual(dataset.access_grants, grants) + + self.assertIsNone(dataset.created) + self.assertIsNone(dataset.dataset_id) + self.assertIsNone(dataset.etag) + self.assertIsNone(dataset.modified) + self.assertIsNone(dataset.self_link) + + self.assertIsNone(dataset.default_table_expiration_ms) + self.assertIsNone(dataset.description) + self.assertIsNone(dataset.friendly_name) + self.assertIsNone(dataset.location) + + def test_access_grants_setter_non_list(self): client = _Client(self.PROJECT) dataset = self._make_one(self.DS_NAME, client) with self.assertRaises(TypeError): dataset.access_grants = object() - def test_access_roles_setter_invalid_field(self): + def test_access_grants_setter_invalid_field(self): from google.cloud.bigquery.dataset import AccessGrant client = _Client(self.PROJECT) dataset = self._make_one(self.DS_NAME, client) @@ -210,7 +239,7 @@ def test_access_roles_setter_invalid_field(self): with self.assertRaises(ValueError): dataset.access_grants = [phred, object()] - def test_access_roles_setter(self): + def test_access_grants_setter(self): from google.cloud.bigquery.dataset import AccessGrant client = _Client(self.PROJECT) dataset = self._make_one(self.DS_NAME, client) From 26144c54245dbea3bd1a3d897fa5c40ab5dcd821 Mon Sep 17 00:00:00 2001 From: Tres Seaver Date: Mon, 5 Dec 2016 14:55:45 -0500 Subject: [PATCH 0061/2016] Freeze project selection in dataset ctor. Addresses: https://travis-ci.org/GoogleCloudPlatform/google-cloud-python/builds/181434795#L1784-L1785 --- .../google-cloud-bigquery/google/cloud/bigquery/dataset.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/dataset.py b/packages/google-cloud-bigquery/google/cloud/bigquery/dataset.py index 6f28bd791b05..109f259ae100 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/dataset.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/dataset.py @@ -115,7 +115,7 @@ def __init__(self, name, client, access_grants=(), project=None): self._properties = {} # Let the @property do validation. self.access_grants = access_grants - self._project = project + self._project = project or client.project @property def project(self): @@ -124,7 +124,7 @@ def project(self): :rtype: str :returns: the project (derived from the client). """ - return self._project or self._client.project + return self._project @property def path(self): From 0410b0d301da187edc01545ac937d79c5891a1cd Mon Sep 17 00:00:00 2001 From: Danny Hermes Date: Mon, 5 Dec 2016 13:50:25 -0800 Subject: [PATCH 0062/2016] Cutting new releases for BigQuery, Language and Core. --- packages/google-cloud-bigquery/setup.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/packages/google-cloud-bigquery/setup.py b/packages/google-cloud-bigquery/setup.py index 36b000fec6d2..c3102d15401d 100644 --- a/packages/google-cloud-bigquery/setup.py +++ b/packages/google-cloud-bigquery/setup.py @@ -50,12 +50,12 @@ REQUIREMENTS = [ - 'google-cloud-core >= 0.21.0, < 0.22dev', + 'google-cloud-core >= 0.22.0, < 0.23dev', ] setup( name='google-cloud-bigquery', - version='0.21.0', + version='0.22.0', description='Python Client for Google BigQuery', long_description=README, namespace_packages=[ From 6524ebe645c46606bf7aab83671356c6b1feda84 Mon Sep 17 00:00:00 2001 From: Tres Seaver Date: Mon, 5 Dec 2016 18:18:31 -0500 Subject: [PATCH 0063/2016] Fix 'BYTES' field handling on Py3k. JSON decodes the base64-encoded bits as text, which cannot be passed to 'base64.decodestring' on Py3k. --- .../google/cloud/bigquery/_helpers.py | 6 +++++- .../google-cloud-bigquery/unit_tests/test__helpers.py | 9 ++++++++- 2 files changed, 13 insertions(+), 2 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py b/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py index 44183a3a3f2b..a8baa2206455 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py @@ -18,12 +18,15 @@ from collections import OrderedDict import datetime +import six + from google.cloud._helpers import _date_from_iso8601_date from google.cloud._helpers import _datetime_from_microseconds from google.cloud._helpers import _datetime_to_rfc3339 from google.cloud._helpers import _microseconds_from_datetime from google.cloud._helpers import _RFC3339_NO_FRACTION from google.cloud._helpers import _time_from_iso8601_time_naive +from google.cloud._helpers import _to_bytes def _not_null(value, field): @@ -57,7 +60,8 @@ def _string_from_json(value, _): def _bytes_from_json(value, field): """Base64-decode value""" if _not_null(value, field): - return base64.decodestring(value) + return base64.decodestring( + _to_bytes(value) if isinstance(value, six.text_type) else value) def _timestamp_from_json(value, field): diff --git a/packages/google-cloud-bigquery/unit_tests/test__helpers.py b/packages/google-cloud-bigquery/unit_tests/test__helpers.py index 76eacc9a04a8..0e508aba2da0 100644 --- a/packages/google-cloud-bigquery/unit_tests/test__helpers.py +++ b/packages/google-cloud-bigquery/unit_tests/test__helpers.py @@ -135,13 +135,20 @@ def test_w_none_required(self): with self.assertRaises(TypeError): self._call_fut(None, _Field('REQUIRED')) - def test_w_base64_encoded_value(self): + def test_w_base64_encoded_bytes(self): import base64 expected = b'Wonderful!' encoded = base64.encodestring(expected) coerced = self._call_fut(encoded, object()) self.assertEqual(coerced, expected) + def test_w_base64_encoded_text(self): + import base64 + expected = b'Wonderful!' + encoded = base64.encodestring(expected).decode('ascii') + coerced = self._call_fut(encoded, object()) + self.assertEqual(coerced, expected) + class Test_timestamp_from_json(unittest.TestCase): From 07bbe6f6f7567c54c0235316f715529205d801b7 Mon Sep 17 00:00:00 2001 From: Danny Hermes Date: Tue, 6 Dec 2016 08:54:33 -0800 Subject: [PATCH 0064/2016] Follow-up to #2818, use _to_bytes as intended in BigQuery. --- .../google-cloud-bigquery/google/cloud/bigquery/_helpers.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py b/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py index a8baa2206455..dcea7b237a07 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py @@ -18,8 +18,6 @@ from collections import OrderedDict import datetime -import six - from google.cloud._helpers import _date_from_iso8601_date from google.cloud._helpers import _datetime_from_microseconds from google.cloud._helpers import _datetime_to_rfc3339 @@ -60,8 +58,7 @@ def _string_from_json(value, _): def _bytes_from_json(value, field): """Base64-decode value""" if _not_null(value, field): - return base64.decodestring( - _to_bytes(value) if isinstance(value, six.text_type) else value) + return base64.decodestring(_to_bytes(value)) def _timestamp_from_json(value, field): From 42c96e77c580f7f50911f95d0dba976f7cc44222 Mon Sep 17 00:00:00 2001 From: Jon Wayne Parrott Date: Wed, 7 Dec 2016 16:00:24 -0800 Subject: [PATCH 0065/2016] Raise ValueError if credentials are not from google-auth (#2828) --- .../unit_tests/test_client.py | 62 ++++++++----------- 1 file changed, 27 insertions(+), 35 deletions(-) diff --git a/packages/google-cloud-bigquery/unit_tests/test_client.py b/packages/google-cloud-bigquery/unit_tests/test_client.py index 152714839ebd..7442d7ad8b5d 100644 --- a/packages/google-cloud-bigquery/unit_tests/test_client.py +++ b/packages/google-cloud-bigquery/unit_tests/test_client.py @@ -14,6 +14,13 @@ import unittest +import mock + + +def _make_credentials(): + import google.auth.credentials + return mock.Mock(spec=google.auth.credentials.Credentials) + class TestClient(unittest.TestCase): @@ -28,7 +35,7 @@ def _make_one(self, *args, **kw): def test_ctor(self): from google.cloud.bigquery._http import Connection PROJECT = 'PROJECT' - creds = object() + creds = _make_credentials() http = object() client = self._make_one(project=PROJECT, credentials=creds, http=http) self.assertIsInstance(client._connection, Connection) @@ -57,7 +64,7 @@ def test_list_projects_defaults(self): 'friendlyName': 'Two'}, ] } - creds = object() + creds = _make_credentials() client = self._make_one(PROJECT_1, creds) conn = client._connection = _Connection(DATA) @@ -86,7 +93,7 @@ def test_list_projects_explicit_response_missing_projects_key(self): PATH = 'projects' TOKEN = 'TOKEN' DATA = {} - creds = object() + creds = _make_credentials() client = self._make_one(PROJECT, creds) conn = client._connection = _Connection(DATA) @@ -128,7 +135,7 @@ def test_list_datasets_defaults(self): 'friendlyName': 'Two'}, ] } - creds = object() + creds = _make_credentials() client = self._make_one(PROJECT, creds) conn = client._connection = _Connection(DATA) @@ -156,7 +163,7 @@ def test_list_datasets_explicit_response_missing_datasets_key(self): PATH = 'projects/%s/datasets' % PROJECT TOKEN = 'TOKEN' DATA = {} - creds = object() + creds = _make_credentials() client = self._make_one(PROJECT, creds) conn = client._connection = _Connection(DATA) @@ -176,36 +183,21 @@ def test_list_datasets_explicit_response_missing_datasets_key(self): self.assertEqual(req['query_params'], {'all': True, 'maxResults': 3, 'pageToken': TOKEN}) - def test_dataset_defaults(self): + def test_dataset(self): from google.cloud.bigquery.dataset import Dataset PROJECT = 'PROJECT' DATASET = 'dataset_name' - creds = object() + creds = _make_credentials() http = object() client = self._make_one(project=PROJECT, credentials=creds, http=http) dataset = client.dataset(DATASET) self.assertIsInstance(dataset, Dataset) self.assertEqual(dataset.name, DATASET) self.assertIs(dataset._client, client) - self.assertEqual(dataset.project, PROJECT) - - def test_dataset_explicit(self): - from google.cloud.bigquery.dataset import Dataset - PROJECT = 'my-project-123' - OTHER_PROJECT = 'other-project-456' - DATASET = 'dataset_name' - creds = object() - http = object() - client = self._make_one(project=PROJECT, credentials=creds, http=http) - dataset = client.dataset(DATASET, project=OTHER_PROJECT) - self.assertIsInstance(dataset, Dataset) - self.assertEqual(dataset.name, DATASET) - self.assertIs(dataset._client, client) - self.assertEqual(dataset.project, OTHER_PROJECT) def test_job_from_resource_unknown_type(self): PROJECT = 'PROJECT' - creds = object() + creds = _make_credentials() client = self._make_one(PROJECT, creds) with self.assertRaises(ValueError): client.job_from_resource({'configuration': {'nonesuch': {}}}) @@ -319,7 +311,7 @@ def test_list_jobs_defaults(self): LOAD_DATA, ] } - creds = object() + creds = _make_credentials() client = self._make_one(PROJECT, creds) conn = client._connection = _Connection(DATA) @@ -375,7 +367,7 @@ def test_list_jobs_load_job_wo_sourceUris(self): LOAD_DATA, ] } - creds = object() + creds = _make_credentials() client = self._make_one(PROJECT, creds) conn = client._connection = _Connection(DATA) @@ -403,7 +395,7 @@ def test_list_jobs_explicit_missing(self): PATH = 'projects/%s/jobs' % PROJECT DATA = {} TOKEN = 'TOKEN' - creds = object() + creds = _make_credentials() client = self._make_one(PROJECT, creds) conn = client._connection = _Connection(DATA) @@ -434,7 +426,7 @@ def test_load_table_from_storage(self): DATASET = 'dataset_name' DESTINATION = 'destination_table' SOURCE_URI = 'http://example.com/source.csv' - creds = object() + creds = _make_credentials() http = object() client = self._make_one(project=PROJECT, credentials=creds, http=http) dataset = client.dataset(DATASET) @@ -453,7 +445,7 @@ def test_copy_table(self): DATASET = 'dataset_name' SOURCE = 'source_table' DESTINATION = 'destination_table' - creds = object() + creds = _make_credentials() http = object() client = self._make_one(project=PROJECT, credentials=creds, http=http) dataset = client.dataset(DATASET) @@ -473,7 +465,7 @@ def test_extract_table_to_storage(self): DATASET = 'dataset_name' SOURCE = 'source_table' DESTINATION = 'gs://bucket_name/object_name' - creds = object() + creds = _make_credentials() http = object() client = self._make_one(project=PROJECT, credentials=creds, http=http) dataset = client.dataset(DATASET) @@ -490,7 +482,7 @@ def test_run_async_query_defaults(self): PROJECT = 'PROJECT' JOB = 'job_name' QUERY = 'select count(*) from persons' - creds = object() + creds = _make_credentials() http = object() client = self._make_one(project=PROJECT, credentials=creds, http=http) job = client.run_async_query(JOB, QUERY) @@ -508,7 +500,7 @@ def test_run_async_w_udf_resources(self): PROJECT = 'PROJECT' JOB = 'job_name' QUERY = 'select count(*) from persons' - creds = object() + creds = _make_credentials() http = object() client = self._make_one(project=PROJECT, credentials=creds, http=http) udf_resources = [UDFResource("resourceUri", RESOURCE_URI)] @@ -526,7 +518,7 @@ def test_run_async_w_query_parameters(self): PROJECT = 'PROJECT' JOB = 'job_name' QUERY = 'select count(*) from persons' - creds = object() + creds = _make_credentials() http = object() client = self._make_one(project=PROJECT, credentials=creds, http=http) query_parameters = [ScalarQueryParameter('foo', 'INT64', 123)] @@ -543,7 +535,7 @@ def test_run_sync_query_defaults(self): from google.cloud.bigquery.query import QueryResults PROJECT = 'PROJECT' QUERY = 'select count(*) from persons' - creds = object() + creds = _make_credentials() http = object() client = self._make_one(project=PROJECT, credentials=creds, http=http) query = client.run_sync_query(QUERY) @@ -560,7 +552,7 @@ def test_run_sync_query_w_udf_resources(self): RESOURCE_URI = 'gs://some-bucket/js/lib.js' PROJECT = 'PROJECT' QUERY = 'select count(*) from persons' - creds = object() + creds = _make_credentials() http = object() client = self._make_one(project=PROJECT, credentials=creds, http=http) udf_resources = [UDFResource("resourceUri", RESOURCE_URI)] @@ -577,7 +569,7 @@ def test_run_sync_query_w_query_parameters(self): from google.cloud.bigquery.query import QueryResults PROJECT = 'PROJECT' QUERY = 'select count(*) from persons' - creds = object() + creds = _make_credentials() http = object() client = self._make_one(project=PROJECT, credentials=creds, http=http) query_parameters = [ScalarQueryParameter('foo', 'INT64', 123)] From 213dfbaff0d32fa04db1cfb21efa6b308a4aa369 Mon Sep 17 00:00:00 2001 From: Danny Hermes Date: Thu, 8 Dec 2016 15:17:03 -0800 Subject: [PATCH 0066/2016] Update versions for mega-release. We want to update - `google-cloud-bigquery` - `google-cloud-datastore` - `google-cloud-logging` - `google-cloud-storage` - `google-cloud-core` And then update `google-cloud` to re-wrap the latest versions of each. However, to avoid having packages in `google-cloud` with conflicting versions of `google-cloud-core`, we must release all packages. --- packages/google-cloud-bigquery/setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/setup.py b/packages/google-cloud-bigquery/setup.py index c3102d15401d..acb2a03bb322 100644 --- a/packages/google-cloud-bigquery/setup.py +++ b/packages/google-cloud-bigquery/setup.py @@ -50,7 +50,7 @@ REQUIREMENTS = [ - 'google-cloud-core >= 0.22.0, < 0.23dev', + 'google-cloud-core >= 0.22.1, < 0.23dev', ] setup( From 8ae0af57a3629e5e3fed955ac9455ed0716454c3 Mon Sep 17 00:00:00 2001 From: Danny Hermes Date: Thu, 8 Dec 2016 15:44:04 -0800 Subject: [PATCH 0067/2016] Making sub-minor updates to libraries already on 0.22.x --- packages/google-cloud-bigquery/setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/setup.py b/packages/google-cloud-bigquery/setup.py index acb2a03bb322..98aea8dbe15b 100644 --- a/packages/google-cloud-bigquery/setup.py +++ b/packages/google-cloud-bigquery/setup.py @@ -55,7 +55,7 @@ setup( name='google-cloud-bigquery', - version='0.22.0', + version='0.22.1', description='Python Client for Google BigQuery', long_description=README, namespace_packages=[ From 318bcf65e802135340e49d5045d0cc63afd50a61 Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Tue, 13 Dec 2016 13:23:50 -0800 Subject: [PATCH 0068/2016] bigquery: fix array handling in parameterized queries --- .../google/cloud/bigquery/_helpers.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py b/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py index dcea7b237a07..0db3c9fe9653 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py @@ -497,10 +497,13 @@ def to_api_repr(self): values = [converter(value) for value in values] resource = { 'parameterType': { - 'arrayType': self.array_type, + 'type': 'ARRAY', + 'arrayType': { + 'type': self.array_type, + }, }, 'parameterValue': { - 'arrayValues': values, + 'arrayValues': [{'value': value} for value in values], }, } if self.name is not None: From 429994219ae7cce52e0e70f0647fd6115ba1a926 Mon Sep 17 00:00:00 2001 From: Danny Hermes Date: Wed, 14 Dec 2016 23:43:05 -0800 Subject: [PATCH 0069/2016] Manually creating Client._connection in subclasses. --- .../google/cloud/bigquery/client.py | 25 +++++++++++-------- 1 file changed, 15 insertions(+), 10 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py index 0c01578f5ffe..e98f390ff616 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py @@ -58,20 +58,25 @@ class Client(JSONClient): passed when creating a dataset / job. If not passed, falls back to the default inferred from the environment. - :type credentials: :class:`oauth2client.client.OAuth2Credentials` or - :class:`NoneType` - :param credentials: The OAuth2 Credentials to use for the connection - owned by this client. If not passed (and if no ``http`` - object is passed), falls back to the default inferred - from the environment. - - :type http: :class:`httplib2.Http` or class that defines ``request()``. - :param http: An optional HTTP object to make requests. If not passed, an + :type credentials: :class:`~google.auth.credentials.Credentials` + :param credentials: (Optional) The OAuth2 Credentials to use for this + client. If not passed (and if no ``http`` object is + passed), falls back to the default inferred from the + environment. + + :type http: :class:`~httplib2.Http` + :param http: (Optional) HTTP object to make requests. Can be any object + that defines ``request()`` with the same interface as + :meth:`~httplib2.Http.request`. If not passed, an ``http`` object is created that is bound to the ``credentials`` for the current object. """ - _connection_class = Connection + def __init__(self, project=None, credentials=None, http=None): + super(Client, self).__init__( + project=project, credentials=credentials, http=http) + self._connection = Connection( + credentials=self._credentials, http=self._http) def list_projects(self, max_results=None, page_token=None): """List projects for the project associated with this client. From c3b93c27c79d2cb33271b41972e9a27d44c3392d Mon Sep 17 00:00:00 2001 From: Tres Seaver Date: Mon, 19 Dec 2016 12:53:53 -0500 Subject: [PATCH 0070/2016] Accept copy jobs with single 'sourceTable' config. Such jobs would be created via another client: we map that configuration onto a sequence of tables containing only the one item. Closes: #2882. --- .../google/cloud/bigquery/job.py | 5 ++- .../unit_tests/test_job.py | 33 ++++++++++++++++++- 2 files changed, 36 insertions(+), 2 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/job.py b/packages/google-cloud-bigquery/google/cloud/bigquery/job.py index 5eff2b74ef90..8bc2941b365d 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/job.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/job.py @@ -742,7 +742,10 @@ def from_api_repr(cls, resource, client): dataset = Dataset(dest_config['datasetId'], client) destination = Table(dest_config['tableId'], dataset) sources = [] - for source_config in config['sourceTables']: + source_configs = config.get('sourceTables') + if source_configs is None: + source_configs = [config['sourceTable']] + for source_config in source_configs: dataset = Dataset(source_config['datasetId'], client) sources.append(Table(source_config['tableId'], dataset)) job = cls(name, destination, sources, client=client) diff --git a/packages/google-cloud-bigquery/unit_tests/test_job.py b/packages/google-cloud-bigquery/unit_tests/test_job.py index a451e0040931..9b5e6b4e57a7 100644 --- a/packages/google-cloud-bigquery/unit_tests/test_job.py +++ b/packages/google-cloud-bigquery/unit_tests/test_job.py @@ -675,7 +675,9 @@ def _verifyResourceProperties(self, job, resource): self.assertEqual(job.destination.dataset_name, table_ref['datasetId']) self.assertEqual(job.destination.name, table_ref['tableId']) - sources = config['sourceTables'] + sources = config.get('sourceTables') + if sources is None: + sources = [config['sourceTable']] self.assertEqual(len(sources), len(job.sources)) for table_ref, table in zip(sources, job.sources): self.assertEqual(table.project, table_ref['projectId']) @@ -764,6 +766,35 @@ def test_from_api_repr_bare(self): self.assertIs(job._client, client) self._verifyResourceProperties(job, RESOURCE) + def test_from_api_repr_w_sourcetable(self): + self._setUpConstants() + client = _Client(self.PROJECT) + RESOURCE = { + 'id': self.JOB_ID, + 'jobReference': { + 'projectId': self.PROJECT, + 'jobId': self.JOB_NAME, + }, + 'configuration': { + 'copy': { + 'sourceTable': { + 'projectId': self.PROJECT, + 'datasetId': self.DS_NAME, + 'tableId': self.SOURCE_TABLE, + }, + 'destinationTable': { + 'projectId': self.PROJECT, + 'datasetId': self.DS_NAME, + 'tableId': self.DESTINATION_TABLE, + }, + } + }, + } + klass = self._get_target_class() + job = klass.from_api_repr(RESOURCE, client=client) + self.assertIs(job._client, client) + self._verifyResourceProperties(job, RESOURCE) + def test_from_api_repr_w_properties(self): client = _Client(self.PROJECT) RESOURCE = self._makeResource() From a2f8cd2685758dd4128a34697283d1b994cb40da Mon Sep 17 00:00:00 2001 From: Tres Seaver Date: Tue, 20 Dec 2016 14:03:34 -0500 Subject: [PATCH 0071/2016] Send TIMESTAMP query parameters as string. - *Not* the float-time-since-epoch-in-seconds which Bigquery uses for all other TIMESTAMP values. :( - *Not* RFC3339, but the SQL-mandated format with an embedded space. :( Closes: #2886. --- .../google/cloud/bigquery/_helpers.py | 8 +++- .../unit_tests/test__helpers.py | 42 ++++++++++++++----- 2 files changed, 38 insertions(+), 12 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py b/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py index dcea7b237a07..b72b55950b71 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py @@ -18,10 +18,10 @@ from collections import OrderedDict import datetime +from google.cloud._helpers import UTC from google.cloud._helpers import _date_from_iso8601_date from google.cloud._helpers import _datetime_from_microseconds from google.cloud._helpers import _datetime_to_rfc3339 -from google.cloud._helpers import _microseconds_from_datetime from google.cloud._helpers import _RFC3339_NO_FRACTION from google.cloud._helpers import _time_from_iso8601_time_naive from google.cloud._helpers import _to_bytes @@ -150,7 +150,11 @@ def _bytes_to_json(value): def _timestamp_to_json(value): """Coerce 'value' to an JSON-compatible representation.""" if isinstance(value, datetime.datetime): - value = _microseconds_from_datetime(value) / 1.0e6 + if value.tzinfo not in (None, UTC): + # Convert to UTC and remove the time zone info. + value = value.replace(tzinfo=None) - value.utcoffset() + value = '%s %s+00:00' % ( + value.date().isoformat(), value.time().isoformat()) return value diff --git a/packages/google-cloud-bigquery/unit_tests/test__helpers.py b/packages/google-cloud-bigquery/unit_tests/test__helpers.py index 0e508aba2da0..affd52294fdd 100644 --- a/packages/google-cloud-bigquery/unit_tests/test__helpers.py +++ b/packages/google-cloud-bigquery/unit_tests/test__helpers.py @@ -546,13 +546,35 @@ def _call_fut(self, value): def test_w_float(self): self.assertEqual(self._call_fut(1.234567), 1.234567) - def test_w_datetime(self): + def test_w_string(self): + ZULU = '2016-12-20 15:58:27.339328+00:00' + self.assertEqual(self._call_fut(ZULU), ZULU) + + def test_w_datetime_wo_zone(self): + import datetime + ZULU = '2016-12-20 15:58:27.339328+00:00' + when = datetime.datetime(2016, 12, 20, 15, 58, 27, 339328) + self.assertEqual(self._call_fut(when), ZULU) + + def test_w_datetime_w_non_utc_zone(self): + import datetime + + class _Zone(datetime.tzinfo): + + def utcoffset(self, _): + return datetime.timedelta(minutes=-240) + + ZULU = '2016-12-20 19:58:27.339328+00:00' + when = datetime.datetime( + 2016, 12, 20, 15, 58, 27, 339328, tzinfo=_Zone()) + self.assertEqual(self._call_fut(when), ZULU) + + def test_w_datetime_w_utc_zone(self): import datetime from google.cloud._helpers import UTC - from google.cloud._helpers import _microseconds_from_datetime - when = datetime.datetime(2016, 12, 3, 14, 11, 27, tzinfo=UTC) - self.assertEqual(self._call_fut(when), - _microseconds_from_datetime(when) / 1e6) + ZULU = '2016-12-20 15:58:27.339328+00:00' + when = datetime.datetime(2016, 12, 20, 15, 58, 27, 339328, tzinfo=UTC) + self.assertEqual(self._call_fut(when), ZULU) class Test_datetime_to_json(unittest.TestCase): @@ -907,20 +929,20 @@ def test_to_api_repr_w_bool(self): self.assertEqual(param.to_api_repr(), EXPECTED) def test_to_api_repr_w_timestamp_datetime(self): + from google.cloud._helpers import UTC import datetime - from google.cloud._helpers import _microseconds_from_datetime - now = datetime.datetime.utcnow() - seconds = _microseconds_from_datetime(now) / 1.0e6 + STAMP = '2016-12-20 15:58:27.339328+00:00' + when = datetime.datetime(2016, 12, 20, 15, 58, 27, 339328, tzinfo=UTC) EXPECTED = { 'parameterType': { 'type': 'TIMESTAMP', }, 'parameterValue': { - 'value': seconds, + 'value': STAMP, }, } klass = self._get_target_class() - param = klass.positional(type_='TIMESTAMP', value=now) + param = klass.positional(type_='TIMESTAMP', value=when) self.assertEqual(param.to_api_repr(), EXPECTED) def test_to_api_repr_w_timestamp_micros(self): From fdd5d5acf71b1521162f0d4feaa8aaa56ce58d03 Mon Sep 17 00:00:00 2001 From: Tres Seaver Date: Tue, 20 Dec 2016 15:06:21 -0500 Subject: [PATCH 0072/2016] Conform to expected wire format for struct query parameters. Closes: #2887. --- .../google/cloud/bigquery/_helpers.py | 12 +++-- .../unit_tests/test__helpers.py | 44 ++++++++++++++----- 2 files changed, 40 insertions(+), 16 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py b/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py index b72b55950b71..8b83acc6d53f 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py @@ -501,6 +501,7 @@ def to_api_repr(self): values = [converter(value) for value in values] resource = { 'parameterType': { + 'type': 'ARRAY', 'arrayType': self.array_type, }, 'parameterValue': { @@ -554,10 +555,12 @@ def from_api_repr(cls, resource): instance = cls(name) types = instance.struct_types for item in resource['parameterType']['structTypes']: - types[item['name']] = item['type'] + types[item['name']] = item['type']['type'] struct_values = resource['parameterValue']['structValues'] for key, value in struct_values.items(): - converted = _CELLDATA_FROM_JSON[types[key]](value, None) + type_ = types[key] + value = value['value'] + converted = _CELLDATA_FROM_JSON[type_](value, None) instance.struct_values[key] = converted return instance @@ -568,7 +571,7 @@ def to_api_repr(self): :returns: JSON mapping """ types = [ - {'name': key, 'type': value} + {'name': key, 'type': {'type': value}} for key, value in self.struct_types.items() ] values = {} @@ -576,10 +579,11 @@ def to_api_repr(self): converter = _SCALAR_VALUE_TO_JSON.get(self.struct_types[name]) if converter is not None: value = converter(value) - values[name] = value + values[name] = {'value': value} resource = { 'parameterType': { + 'type': 'STRUCT', 'structTypes': types, }, 'parameterValue': { diff --git a/packages/google-cloud-bigquery/unit_tests/test__helpers.py b/packages/google-cloud-bigquery/unit_tests/test__helpers.py index affd52294fdd..465e5defc143 100644 --- a/packages/google-cloud-bigquery/unit_tests/test__helpers.py +++ b/packages/google-cloud-bigquery/unit_tests/test__helpers.py @@ -1082,6 +1082,7 @@ def test_from_api_repr_w_name(self): def test_from_api_repr_wo_name(self): RESOURCE = { 'parameterType': { + 'type': 'ARRAY', 'arrayType': 'INT64', }, 'parameterValue': { @@ -1098,6 +1099,7 @@ def test_to_api_repr_w_name(self): EXPECTED = { 'name': 'foo', 'parameterType': { + 'type': 'ARRAY', 'arrayType': 'INT64', }, 'parameterValue': { @@ -1110,6 +1112,7 @@ def test_to_api_repr_w_name(self): def test_to_api_repr_wo_name(self): EXPECTED = { 'parameterType': { + 'type': 'ARRAY', 'arrayType': 'INT64', }, 'parameterValue': { @@ -1123,6 +1126,7 @@ def test_to_api_repr_wo_name(self): def test_to_api_repr_w_unknown_type(self): EXPECTED = { 'parameterType': { + 'type': 'ARRAY', 'arrayType': 'UNKNOWN', }, 'parameterValue': { @@ -1170,13 +1174,17 @@ def test_from_api_repr_w_name(self): RESOURCE = { 'name': 'foo', 'parameterType': { + 'type': 'STRUTCT', 'structTypes': [ - {'name': 'bar', 'type': 'INT64'}, - {'name': 'baz', 'type': 'STRING'}, + {'name': 'bar', 'type': {'type': 'INT64'}}, + {'name': 'baz', 'type': {'type': 'STRING'}}, ], }, 'parameterValue': { - 'structValues': {'bar': 123, 'baz': 'abc'}, + 'structValues': { + 'bar': {'value': 123}, + 'baz': {'value': 'abc'}, + }, }, } klass = self._get_target_class() @@ -1188,13 +1196,17 @@ def test_from_api_repr_w_name(self): def test_from_api_repr_wo_name(self): RESOURCE = { 'parameterType': { + 'type': 'STRUTCT', 'structTypes': [ - {'name': 'bar', 'type': 'INT64'}, - {'name': 'baz', 'type': 'STRING'}, + {'name': 'bar', 'type': {'type': 'INT64'}}, + {'name': 'baz', 'type': {'type': 'STRING'}}, ], }, 'parameterValue': { - 'structValues': {'bar': 123, 'baz': 'abc'}, + 'structValues': { + 'bar': {'value': 123}, + 'baz': {'value': 'abc'}, + }, }, } klass = self._get_target_class() @@ -1207,13 +1219,17 @@ def test_to_api_repr_w_name(self): EXPECTED = { 'name': 'foo', 'parameterType': { + 'type': 'STRUCT', 'structTypes': [ - {'name': 'bar', 'type': 'INT64'}, - {'name': 'baz', 'type': 'STRING'}, + {'name': 'bar', 'type': {'type': 'INT64'}}, + {'name': 'baz', 'type': {'type': 'STRING'}}, ], }, 'parameterValue': { - 'structValues': {'bar': '123', 'baz': 'abc'}, + 'structValues': { + 'bar': {'value': '123'}, + 'baz': {'value': 'abc'}, + }, }, } sub_1 = self._make_subparam('bar', 'INT64', 123) @@ -1224,13 +1240,17 @@ def test_to_api_repr_w_name(self): def test_to_api_repr_wo_name(self): EXPECTED = { 'parameterType': { + 'type': 'STRUCT', 'structTypes': [ - {'name': 'bar', 'type': 'INT64'}, - {'name': 'baz', 'type': 'STRING'}, + {'name': 'bar', 'type': {'type': 'INT64'}}, + {'name': 'baz', 'type': {'type': 'STRING'}}, ], }, 'parameterValue': { - 'structValues': {'bar': '123', 'baz': 'abc'}, + 'structValues': { + 'bar': {'value': '123'}, + 'baz': {'value': 'abc'}, + }, }, } sub_1 = self._make_subparam('bar', 'INT64', 123) From d248407ce8af5d913404e421132d71fda40dde75 Mon Sep 17 00:00:00 2001 From: Tres Seaver Date: Tue, 20 Dec 2016 17:46:33 -0500 Subject: [PATCH 0073/2016] Typo. --- packages/google-cloud-bigquery/unit_tests/test__helpers.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/packages/google-cloud-bigquery/unit_tests/test__helpers.py b/packages/google-cloud-bigquery/unit_tests/test__helpers.py index 465e5defc143..cc2df7b19006 100644 --- a/packages/google-cloud-bigquery/unit_tests/test__helpers.py +++ b/packages/google-cloud-bigquery/unit_tests/test__helpers.py @@ -1174,7 +1174,7 @@ def test_from_api_repr_w_name(self): RESOURCE = { 'name': 'foo', 'parameterType': { - 'type': 'STRUTCT', + 'type': 'STRUCT', 'structTypes': [ {'name': 'bar', 'type': {'type': 'INT64'}}, {'name': 'baz', 'type': {'type': 'STRING'}}, @@ -1196,7 +1196,7 @@ def test_from_api_repr_w_name(self): def test_from_api_repr_wo_name(self): RESOURCE = { 'parameterType': { - 'type': 'STRUTCT', + 'type': 'STRUCT', 'structTypes': [ {'name': 'bar', 'type': {'type': 'INT64'}}, {'name': 'baz', 'type': {'type': 'STRING'}}, From 64b434f8cdb8a10470bb5c3594d06d4fb7da9a45 Mon Sep 17 00:00:00 2001 From: Tres Seaver Date: Wed, 21 Dec 2016 13:17:33 -0500 Subject: [PATCH 0074/2016] Raise explicit KeyError for copy job resource w/o source table(s). Addresses: https://github.com/GoogleCloudPlatform/google-cloud-python/pull/2884#discussion_r93360090 --- .../google/cloud/bigquery/job.py | 6 ++++- .../unit_tests/test_job.py | 23 +++++++++++++++++++ 2 files changed, 28 insertions(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/job.py b/packages/google-cloud-bigquery/google/cloud/bigquery/job.py index 8bc2941b365d..d5152fcee25b 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/job.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/job.py @@ -744,7 +744,11 @@ def from_api_repr(cls, resource, client): sources = [] source_configs = config.get('sourceTables') if source_configs is None: - source_configs = [config['sourceTable']] + single = config.get('sourceTable') + if single is None: + raise KeyError( + "Resource missing 'sourceTables' / 'sourceTable'") + source_configs = [single] for source_config in source_configs: dataset = Dataset(source_config['datasetId'], client) sources.append(Table(source_config['tableId'], dataset)) diff --git a/packages/google-cloud-bigquery/unit_tests/test_job.py b/packages/google-cloud-bigquery/unit_tests/test_job.py index 9b5e6b4e57a7..57b588a5801c 100644 --- a/packages/google-cloud-bigquery/unit_tests/test_job.py +++ b/packages/google-cloud-bigquery/unit_tests/test_job.py @@ -795,6 +795,29 @@ def test_from_api_repr_w_sourcetable(self): self.assertIs(job._client, client) self._verifyResourceProperties(job, RESOURCE) + def test_from_api_repr_wo_sources(self): + self._setUpConstants() + client = _Client(self.PROJECT) + RESOURCE = { + 'id': self.JOB_ID, + 'jobReference': { + 'projectId': self.PROJECT, + 'jobId': self.JOB_NAME, + }, + 'configuration': { + 'copy': { + 'destinationTable': { + 'projectId': self.PROJECT, + 'datasetId': self.DS_NAME, + 'tableId': self.DESTINATION_TABLE, + }, + } + }, + } + klass = self._get_target_class() + with self.assertRaises(KeyError): + klass.from_api_repr(RESOURCE, client=client) + def test_from_api_repr_w_properties(self): client = _Client(self.PROJECT) RESOURCE = self._makeResource() From 5be66ce412dbb05803856f7a9d54b2159c70231a Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Tue, 27 Dec 2016 13:03:22 -0800 Subject: [PATCH 0075/2016] bigquery: Add tests for array query parameters. --- .../google/cloud/bigquery/_helpers.py | 7 ++- .../unit_tests/test__helpers.py | 63 ++++++++++++++++--- 2 files changed, 58 insertions(+), 12 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py b/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py index 4e8004d716db..bbcbae1674c3 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py @@ -483,8 +483,11 @@ def from_api_repr(cls, resource): :returns: instance """ name = resource.get('name') - array_type = resource['parameterType']['arrayType'] - values = resource['parameterValue']['arrayValues'] + array_type = resource['parameterType']['arrayType']['type'] + values = [ + value['value'] + for value + in resource['parameterValue']['arrayValues']] converted = [ _CELLDATA_FROM_JSON[array_type](value, None) for value in values] return cls(name, array_type, converted) diff --git a/packages/google-cloud-bigquery/unit_tests/test__helpers.py b/packages/google-cloud-bigquery/unit_tests/test__helpers.py index cc2df7b19006..ec84ccbf6950 100644 --- a/packages/google-cloud-bigquery/unit_tests/test__helpers.py +++ b/packages/google-cloud-bigquery/unit_tests/test__helpers.py @@ -1067,10 +1067,20 @@ def test_from_api_repr_w_name(self): RESOURCE = { 'name': 'foo', 'parameterType': { - 'arrayType': 'INT64', + 'type': 'ARRAY', + 'arrayType': { + 'type': 'INT64', + }, }, 'parameterValue': { - 'arrayValues': ['1', '2'], + 'arrayValues': [ + { + 'value': '1', + }, + { + 'value': '2' + }, + ], }, } klass = self._get_target_class() @@ -1083,10 +1093,19 @@ def test_from_api_repr_wo_name(self): RESOURCE = { 'parameterType': { 'type': 'ARRAY', - 'arrayType': 'INT64', + 'arrayType': { + 'type': 'INT64', + }, }, 'parameterValue': { - 'arrayValues': ['1', '2'], + 'arrayValues': [ + { + 'value': '1', + }, + { + 'value': '2' + }, + ], }, } klass = self._get_target_class() @@ -1100,10 +1119,19 @@ def test_to_api_repr_w_name(self): 'name': 'foo', 'parameterType': { 'type': 'ARRAY', - 'arrayType': 'INT64', + 'arrayType': { + 'type': 'INT64', + }, }, 'parameterValue': { - 'arrayValues': ['1', '2'], + 'arrayValues': [ + { + 'value': '1', + }, + { + 'value': '2' + }, + ], }, } param = self._make_one(name='foo', array_type='INT64', values=[1, 2]) @@ -1113,10 +1141,19 @@ def test_to_api_repr_wo_name(self): EXPECTED = { 'parameterType': { 'type': 'ARRAY', - 'arrayType': 'INT64', + 'arrayType': { + 'type': 'INT64', + }, }, 'parameterValue': { - 'arrayValues': ['1', '2'], + 'arrayValues': [ + { + 'value': '1', + }, + { + 'value': '2' + }, + ], }, } klass = self._get_target_class() @@ -1127,10 +1164,16 @@ def test_to_api_repr_w_unknown_type(self): EXPECTED = { 'parameterType': { 'type': 'ARRAY', - 'arrayType': 'UNKNOWN', + 'arrayType': { + 'type': 'UNKNOWN', + }, }, 'parameterValue': { - 'arrayValues': ['unknown'], + 'arrayValues': [ + { + 'value': 'unknown', + } + ], }, } klass = self._get_target_class() From 6fc10307ba755de11a19b5a14cf35620d49d365a Mon Sep 17 00:00:00 2001 From: Thomas Schultz Date: Wed, 18 Jan 2017 13:14:03 -0500 Subject: [PATCH 0076/2016] Update import spacing part 2. --- .../unit_tests/test__helpers.py | 55 +++++++++++++++++++ .../unit_tests/test__http.py | 2 + .../unit_tests/test_client.py | 18 ++++++ .../unit_tests/test_dataset.py | 9 +++ .../unit_tests/test_job.py | 18 ++++++ .../unit_tests/test_query.py | 13 +++++ .../unit_tests/test_schema.py | 1 + .../unit_tests/test_table.py | 33 +++++++++++ 8 files changed, 149 insertions(+) diff --git a/packages/google-cloud-bigquery/unit_tests/test__helpers.py b/packages/google-cloud-bigquery/unit_tests/test__helpers.py index ec84ccbf6950..e42d8207b471 100644 --- a/packages/google-cloud-bigquery/unit_tests/test__helpers.py +++ b/packages/google-cloud-bigquery/unit_tests/test__helpers.py @@ -19,6 +19,7 @@ class Test_not_null(unittest.TestCase): def _call_fut(self, value, field): from google.cloud.bigquery._helpers import _not_null + return _not_null(value, field) def test_w_none_nullable(self): @@ -35,6 +36,7 @@ class Test_int_from_json(unittest.TestCase): def _call_fut(self, value, field): from google.cloud.bigquery._helpers import _int_from_json + return _int_from_json(value, field) def test_w_none_nullable(self): @@ -57,6 +59,7 @@ class Test_float_from_json(unittest.TestCase): def _call_fut(self, value, field): from google.cloud.bigquery._helpers import _float_from_json + return _float_from_json(value, field) def test_w_none_nullable(self): @@ -79,6 +82,7 @@ class Test_bool_from_json(unittest.TestCase): def _call_fut(self, value, field): from google.cloud.bigquery._helpers import _bool_from_json + return _bool_from_json(value, field) def test_w_none_nullable(self): @@ -109,6 +113,7 @@ class Test_string_from_json(unittest.TestCase): def _call_fut(self, value, field): from google.cloud.bigquery._helpers import _string_from_json + return _string_from_json(value, field) def test_w_none_nullable(self): @@ -126,6 +131,7 @@ class Test_bytes_from_json(unittest.TestCase): def _call_fut(self, value, field): from google.cloud.bigquery._helpers import _bytes_from_json + return _bytes_from_json(value, field) def test_w_none_nullable(self): @@ -137,6 +143,7 @@ def test_w_none_required(self): def test_w_base64_encoded_bytes(self): import base64 + expected = b'Wonderful!' encoded = base64.encodestring(expected) coerced = self._call_fut(encoded, object()) @@ -144,6 +151,7 @@ def test_w_base64_encoded_bytes(self): def test_w_base64_encoded_text(self): import base64 + expected = b'Wonderful!' encoded = base64.encodestring(expected).decode('ascii') coerced = self._call_fut(encoded, object()) @@ -154,6 +162,7 @@ class Test_timestamp_from_json(unittest.TestCase): def _call_fut(self, value, field): from google.cloud.bigquery._helpers import _timestamp_from_json + return _timestamp_from_json(value, field) def test_w_none_nullable(self): @@ -166,6 +175,7 @@ def test_w_none_required(self): def test_w_string_value(self): import datetime from google.cloud._helpers import _EPOCH + coerced = self._call_fut('1.234567', object()) self.assertEqual( coerced, @@ -174,6 +184,7 @@ def test_w_string_value(self): def test_w_float_value(self): import datetime from google.cloud._helpers import _EPOCH + coerced = self._call_fut(1.234567, object()) self.assertEqual( coerced, @@ -184,6 +195,7 @@ class Test_datetime_from_json(unittest.TestCase): def _call_fut(self, value, field): from google.cloud.bigquery._helpers import _datetime_from_json + return _datetime_from_json(value, field) def test_w_none_nullable(self): @@ -195,6 +207,7 @@ def test_w_none_required(self): def test_w_string_value(self): import datetime + coerced = self._call_fut('2016-12-02T18:51:33', object()) self.assertEqual( coerced, @@ -205,6 +218,7 @@ class Test_date_from_json(unittest.TestCase): def _call_fut(self, value, field): from google.cloud.bigquery._helpers import _date_from_json + return _date_from_json(value, field) def test_w_none_nullable(self): @@ -216,6 +230,7 @@ def test_w_none_required(self): def test_w_string_value(self): import datetime + coerced = self._call_fut('1987-09-22', object()) self.assertEqual( coerced, @@ -226,6 +241,7 @@ class Test_time_from_json(unittest.TestCase): def _call_fut(self, value, field): from google.cloud.bigquery._helpers import _time_from_json + return _time_from_json(value, field) def test_w_none_nullable(self): @@ -237,6 +253,7 @@ def test_w_none_required(self): def test_w_string_value(self): import datetime + coerced = self._call_fut('12:12:27', object()) self.assertEqual( coerced, @@ -247,6 +264,7 @@ class Test_record_from_json(unittest.TestCase): def _call_fut(self, value, field): from google.cloud.bigquery._helpers import _record_from_json + return _record_from_json(value, field) def test_w_none_nullable(self): @@ -308,6 +326,7 @@ class Test_row_from_json(unittest.TestCase): def _call_fut(self, row, schema): from google.cloud.bigquery._helpers import _row_from_json + return _row_from_json(row, schema) def test_w_single_scalar_column(self): @@ -399,6 +418,7 @@ class Test_rows_from_json(unittest.TestCase): def _call_fut(self, value, field): from google.cloud.bigquery._helpers import _rows_from_json + return _rows_from_json(value, field) def test_w_record_subfield(self): @@ -485,6 +505,7 @@ class Test_int_to_json(unittest.TestCase): def _call_fut(self, value): from google.cloud.bigquery._helpers import _int_to_json + return _int_to_json(value) def test_w_int(self): @@ -498,6 +519,7 @@ class Test_float_to_json(unittest.TestCase): def _call_fut(self, value): from google.cloud.bigquery._helpers import _float_to_json + return _float_to_json(value) def test_w_float(self): @@ -508,6 +530,7 @@ class Test_bool_to_json(unittest.TestCase): def _call_fut(self, value): from google.cloud.bigquery._helpers import _bool_to_json + return _bool_to_json(value) def test_w_true(self): @@ -524,6 +547,7 @@ class Test_bytes_to_json(unittest.TestCase): def _call_fut(self, value): from google.cloud.bigquery._helpers import _bytes_to_json + return _bytes_to_json(value) def test_w_non_bytes(self): @@ -541,6 +565,7 @@ class Test_timestamp_to_json(unittest.TestCase): def _call_fut(self, value): from google.cloud.bigquery._helpers import _timestamp_to_json + return _timestamp_to_json(value) def test_w_float(self): @@ -552,6 +577,7 @@ def test_w_string(self): def test_w_datetime_wo_zone(self): import datetime + ZULU = '2016-12-20 15:58:27.339328+00:00' when = datetime.datetime(2016, 12, 20, 15, 58, 27, 339328) self.assertEqual(self._call_fut(when), ZULU) @@ -572,6 +598,7 @@ def utcoffset(self, _): def test_w_datetime_w_utc_zone(self): import datetime from google.cloud._helpers import UTC + ZULU = '2016-12-20 15:58:27.339328+00:00' when = datetime.datetime(2016, 12, 20, 15, 58, 27, 339328, tzinfo=UTC) self.assertEqual(self._call_fut(when), ZULU) @@ -581,6 +608,7 @@ class Test_datetime_to_json(unittest.TestCase): def _call_fut(self, value): from google.cloud.bigquery._helpers import _datetime_to_json + return _datetime_to_json(value) def test_w_string(self): @@ -590,6 +618,7 @@ def test_w_string(self): def test_w_datetime(self): import datetime from google.cloud._helpers import UTC + when = datetime.datetime(2016, 12, 3, 14, 11, 27, 123456, tzinfo=UTC) self.assertEqual(self._call_fut(when), '2016-12-03T14:11:27.123456Z') @@ -598,6 +627,7 @@ class Test_date_to_json(unittest.TestCase): def _call_fut(self, value): from google.cloud.bigquery._helpers import _date_to_json + return _date_to_json(value) def test_w_string(self): @@ -606,6 +636,7 @@ def test_w_string(self): def test_w_datetime(self): import datetime + when = datetime.date(2016, 12, 3) self.assertEqual(self._call_fut(when), '2016-12-03') @@ -614,6 +645,7 @@ class Test_time_to_json(unittest.TestCase): def _call_fut(self, value): from google.cloud.bigquery._helpers import _time_to_json + return _time_to_json(value) def test_w_string(self): @@ -622,6 +654,7 @@ def test_w_string(self): def test_w_datetime(self): import datetime + when = datetime.time(12, 13, 41) self.assertEqual(self._call_fut(when), '12:13:41') @@ -631,6 +664,7 @@ class Test_ConfigurationProperty(unittest.TestCase): @staticmethod def _get_target_class(): from google.cloud.bigquery._helpers import _ConfigurationProperty + return _ConfigurationProperty def _make_one(self, *args, **kw): @@ -667,6 +701,7 @@ class Test_TypedProperty(unittest.TestCase): @staticmethod def _get_target_class(): from google.cloud.bigquery._helpers import _TypedProperty + return _TypedProperty def _make_one(self, *args, **kw): @@ -701,6 +736,7 @@ class Test_EnumProperty(unittest.TestCase): @staticmethod def _get_target_class(): from google.cloud.bigquery._helpers import _EnumProperty + return _EnumProperty def test_it(self): @@ -735,6 +771,7 @@ class Test_UDFResourcesProperty(unittest.TestCase): @staticmethod def _get_target_class(): from google.cloud.bigquery._helpers import UDFResourcesProperty + return UDFResourcesProperty def _make_one(self, *args, **kw): @@ -760,6 +797,7 @@ def test_instance_getter_empty(self): def test_instance_getter_w_non_empty_list(self): from google.cloud.bigquery._helpers import UDFResource + RESOURCE_URI = 'gs://some-bucket/js/lib.js' udf_resources = [UDFResource("resourceUri", RESOURCE_URI)] _, klass = self._descriptor_and_klass() @@ -770,6 +808,7 @@ def test_instance_getter_w_non_empty_list(self): def test_instance_setter_w_empty_list(self): from google.cloud.bigquery._helpers import UDFResource + RESOURCE_URI = 'gs://some-bucket/js/lib.js' udf_resources = [UDFResource("resourceUri", RESOURCE_URI)] _, klass = self._descriptor_and_klass() @@ -782,6 +821,7 @@ def test_instance_setter_w_empty_list(self): def test_instance_setter_w_valid_udf(self): from google.cloud.bigquery._helpers import UDFResource + RESOURCE_URI = 'gs://some-bucket/js/lib.js' udf_resources = [UDFResource("resourceUri", RESOURCE_URI)] _, klass = self._descriptor_and_klass() @@ -806,6 +846,7 @@ class Test_AbstractQueryParameter(unittest.TestCase): @staticmethod def _get_target_class(): from google.cloud.bigquery._helpers import AbstractQueryParameter + return AbstractQueryParameter def _make_one(self, *args, **kw): @@ -827,6 +868,7 @@ class Test_ScalarQueryParameter(unittest.TestCase): @staticmethod def _get_target_class(): from google.cloud.bigquery._helpers import ScalarQueryParameter + return ScalarQueryParameter def _make_one(self, *args, **kw): @@ -931,6 +973,7 @@ def test_to_api_repr_w_bool(self): def test_to_api_repr_w_timestamp_datetime(self): from google.cloud._helpers import UTC import datetime + STAMP = '2016-12-20 15:58:27.339328+00:00' when = datetime.datetime(2016, 12, 20, 15, 58, 27, 339328, tzinfo=UTC) EXPECTED = { @@ -948,6 +991,7 @@ def test_to_api_repr_w_timestamp_datetime(self): def test_to_api_repr_w_timestamp_micros(self): import datetime from google.cloud._helpers import _microseconds_from_datetime + now = datetime.datetime.utcnow() seconds = _microseconds_from_datetime(now) / 1.0e6 EXPECTED = { @@ -965,6 +1009,7 @@ def test_to_api_repr_w_timestamp_micros(self): def test_to_api_repr_w_datetime_datetime(self): import datetime from google.cloud._helpers import _datetime_to_rfc3339 + now = datetime.datetime.utcnow() EXPECTED = { 'parameterType': { @@ -981,6 +1026,7 @@ def test_to_api_repr_w_datetime_datetime(self): def test_to_api_repr_w_datetime_string(self): import datetime from google.cloud._helpers import _datetime_to_rfc3339 + now = datetime.datetime.utcnow() now_str = _datetime_to_rfc3339(now) EXPECTED = { @@ -997,6 +1043,7 @@ def test_to_api_repr_w_datetime_string(self): def test_to_api_repr_w_date_date(self): import datetime + today = datetime.date.today() EXPECTED = { 'parameterType': { @@ -1012,6 +1059,7 @@ def test_to_api_repr_w_date_date(self): def test_to_api_repr_w_date_string(self): import datetime + today = datetime.date.today() today_str = today.isoformat(), EXPECTED = { @@ -1045,6 +1093,7 @@ class Test_ArrayQueryParameter(unittest.TestCase): @staticmethod def _get_target_class(): from google.cloud.bigquery._helpers import ArrayQueryParameter + return ArrayQueryParameter def _make_one(self, *args, **kw): @@ -1186,6 +1235,7 @@ class Test_StructQueryParameter(unittest.TestCase): @staticmethod def _get_target_class(): from google.cloud.bigquery._helpers import StructQueryParameter + return StructQueryParameter def _make_one(self, *args, **kw): @@ -1194,6 +1244,7 @@ def _make_one(self, *args, **kw): @staticmethod def _make_subparam(name, type_, value): from google.cloud.bigquery._helpers import ScalarQueryParameter + return ScalarQueryParameter(name, type_, value) def test_ctor(self): @@ -1308,6 +1359,7 @@ class Test_QueryParametersProperty(unittest.TestCase): @staticmethod def _get_target_class(): from google.cloud.bigquery._helpers import QueryParametersProperty + return QueryParametersProperty def _make_one(self, *args, **kw): @@ -1333,6 +1385,7 @@ def test_instance_getter_empty(self): def test_instance_getter_w_non_empty_list(self): from google.cloud.bigquery._helpers import ScalarQueryParameter + query_parameters = [ScalarQueryParameter("foo", 'INT64', 123)] _, klass = self._descriptor_and_klass() instance = klass() @@ -1342,6 +1395,7 @@ def test_instance_getter_w_non_empty_list(self): def test_instance_setter_w_empty_list(self): from google.cloud.bigquery._helpers import ScalarQueryParameter + query_parameters = [ScalarQueryParameter("foo", 'INT64', 123)] _, klass = self._descriptor_and_klass() instance = klass() @@ -1353,6 +1407,7 @@ def test_instance_setter_w_empty_list(self): def test_instance_setter_w_valid_udf(self): from google.cloud.bigquery._helpers import ScalarQueryParameter + query_parameters = [ScalarQueryParameter("foo", 'INT64', 123)] _, klass = self._descriptor_and_klass() instance = klass() diff --git a/packages/google-cloud-bigquery/unit_tests/test__http.py b/packages/google-cloud-bigquery/unit_tests/test__http.py index 0592b98178cd..6beaa3a8cc4d 100644 --- a/packages/google-cloud-bigquery/unit_tests/test__http.py +++ b/packages/google-cloud-bigquery/unit_tests/test__http.py @@ -20,6 +20,7 @@ class TestConnection(unittest.TestCase): @staticmethod def _get_target_class(): from google.cloud.bigquery._http import Connection + return Connection def _make_one(self, *args, **kw): @@ -38,6 +39,7 @@ def test_build_api_url_no_extra_query_params(self): def test_build_api_url_w_extra_query_params(self): from six.moves.urllib.parse import parse_qsl from six.moves.urllib.parse import urlsplit + conn = self._make_one() uri = conn.build_api_url('/foo', {'bar': 'baz'}) scheme, netloc, path, qs, _ = urlsplit(uri) diff --git a/packages/google-cloud-bigquery/unit_tests/test_client.py b/packages/google-cloud-bigquery/unit_tests/test_client.py index 7442d7ad8b5d..3056c1e2f39d 100644 --- a/packages/google-cloud-bigquery/unit_tests/test_client.py +++ b/packages/google-cloud-bigquery/unit_tests/test_client.py @@ -19,6 +19,7 @@ def _make_credentials(): import google.auth.credentials + return mock.Mock(spec=google.auth.credentials.Credentials) @@ -27,6 +28,7 @@ class TestClient(unittest.TestCase): @staticmethod def _get_target_class(): from google.cloud.bigquery.client import Client + return Client def _make_one(self, *args, **kw): @@ -34,6 +36,7 @@ def _make_one(self, *args, **kw): def test_ctor(self): from google.cloud.bigquery._http import Connection + PROJECT = 'PROJECT' creds = _make_credentials() http = object() @@ -45,6 +48,7 @@ def test_ctor(self): def test_list_projects_defaults(self): import six from google.cloud.bigquery.client import Project + PROJECT_1 = 'PROJECT_ONE' PROJECT_2 = 'PROJECT_TWO' PATH = 'projects' @@ -115,6 +119,7 @@ def test_list_projects_explicit_response_missing_projects_key(self): def test_list_datasets_defaults(self): import six from google.cloud.bigquery.dataset import Dataset + PROJECT = 'PROJECT' DATASET_1 = 'dataset_one' DATASET_2 = 'dataset_two' @@ -185,6 +190,7 @@ def test_list_datasets_explicit_response_missing_datasets_key(self): def test_dataset(self): from google.cloud.bigquery.dataset import Dataset + PROJECT = 'PROJECT' DATASET = 'dataset_name' creds = _make_credentials() @@ -208,6 +214,7 @@ def test_list_jobs_defaults(self): from google.cloud.bigquery.job import CopyJob from google.cloud.bigquery.job import ExtractTableToStorageJob from google.cloud.bigquery.job import QueryJob + PROJECT = 'PROJECT' DATASET = 'test_dataset' SOURCE_TABLE = 'source_table' @@ -336,6 +343,7 @@ def test_list_jobs_defaults(self): def test_list_jobs_load_job_wo_sourceUris(self): import six from google.cloud.bigquery.job import LoadTableFromStorageJob + PROJECT = 'PROJECT' DATASET = 'test_dataset' SOURCE_TABLE = 'source_table' @@ -391,6 +399,7 @@ def test_list_jobs_load_job_wo_sourceUris(self): def test_list_jobs_explicit_missing(self): import six + PROJECT = 'PROJECT' PATH = 'projects/%s/jobs' % PROJECT DATA = {} @@ -421,6 +430,7 @@ def test_list_jobs_explicit_missing(self): def test_load_table_from_storage(self): from google.cloud.bigquery.job import LoadTableFromStorageJob + PROJECT = 'PROJECT' JOB = 'job_name' DATASET = 'dataset_name' @@ -440,6 +450,7 @@ def test_load_table_from_storage(self): def test_copy_table(self): from google.cloud.bigquery.job import CopyJob + PROJECT = 'PROJECT' JOB = 'job_name' DATASET = 'dataset_name' @@ -460,6 +471,7 @@ def test_copy_table(self): def test_extract_table_to_storage(self): from google.cloud.bigquery.job import ExtractTableToStorageJob + PROJECT = 'PROJECT' JOB = 'job_name' DATASET = 'dataset_name' @@ -479,6 +491,7 @@ def test_extract_table_to_storage(self): def test_run_async_query_defaults(self): from google.cloud.bigquery.job import QueryJob + PROJECT = 'PROJECT' JOB = 'job_name' QUERY = 'select count(*) from persons' @@ -496,6 +509,7 @@ def test_run_async_query_defaults(self): def test_run_async_w_udf_resources(self): from google.cloud.bigquery._helpers import UDFResource from google.cloud.bigquery.job import QueryJob + RESOURCE_URI = 'gs://some-bucket/js/lib.js' PROJECT = 'PROJECT' JOB = 'job_name' @@ -515,6 +529,7 @@ def test_run_async_w_udf_resources(self): def test_run_async_w_query_parameters(self): from google.cloud.bigquery._helpers import ScalarQueryParameter from google.cloud.bigquery.job import QueryJob + PROJECT = 'PROJECT' JOB = 'job_name' QUERY = 'select count(*) from persons' @@ -533,6 +548,7 @@ def test_run_async_w_query_parameters(self): def test_run_sync_query_defaults(self): from google.cloud.bigquery.query import QueryResults + PROJECT = 'PROJECT' QUERY = 'select count(*) from persons' creds = _make_credentials() @@ -549,6 +565,7 @@ def test_run_sync_query_defaults(self): def test_run_sync_query_w_udf_resources(self): from google.cloud.bigquery._helpers import UDFResource from google.cloud.bigquery.query import QueryResults + RESOURCE_URI = 'gs://some-bucket/js/lib.js' PROJECT = 'PROJECT' QUERY = 'select count(*) from persons' @@ -567,6 +584,7 @@ def test_run_sync_query_w_udf_resources(self): def test_run_sync_query_w_query_parameters(self): from google.cloud.bigquery._helpers import ScalarQueryParameter from google.cloud.bigquery.query import QueryResults + PROJECT = 'PROJECT' QUERY = 'select count(*) from persons' creds = _make_credentials() diff --git a/packages/google-cloud-bigquery/unit_tests/test_dataset.py b/packages/google-cloud-bigquery/unit_tests/test_dataset.py index 015b160687d9..ad510dded5ea 100644 --- a/packages/google-cloud-bigquery/unit_tests/test_dataset.py +++ b/packages/google-cloud-bigquery/unit_tests/test_dataset.py @@ -20,6 +20,7 @@ class TestAccessGrant(unittest.TestCase): @staticmethod def _get_target_class(): from google.cloud.bigquery.dataset import AccessGrant + return AccessGrant def _make_one(self, *args, **kw): @@ -84,6 +85,7 @@ class TestDataset(unittest.TestCase): @staticmethod def _get_target_class(): from google.cloud.bigquery.dataset import Dataset + return Dataset def _make_one(self, *args, **kw): @@ -198,6 +200,7 @@ def test_ctor_defaults(self): def test_ctor_explicit(self): from google.cloud.bigquery.dataset import AccessGrant + phred = AccessGrant('OWNER', 'userByEmail', 'phred@example.com') bharney = AccessGrant('OWNER', 'userByEmail', 'bharney@example.com') grants = [phred, bharney] @@ -233,6 +236,7 @@ def test_access_grants_setter_non_list(self): def test_access_grants_setter_invalid_field(self): from google.cloud.bigquery.dataset import AccessGrant + client = _Client(self.PROJECT) dataset = self._make_one(self.DS_NAME, client) phred = AccessGrant('OWNER', 'userByEmail', 'phred@example.com') @@ -241,6 +245,7 @@ def test_access_grants_setter_invalid_field(self): def test_access_grants_setter(self): from google.cloud.bigquery.dataset import AccessGrant + client = _Client(self.PROJECT) dataset = self._make_one(self.DS_NAME, client) phred = AccessGrant('OWNER', 'userByEmail', 'phred@example.com') @@ -372,6 +377,7 @@ def test_create_w_bound_client(self): def test_create_w_alternate_client(self): from google.cloud.bigquery.dataset import AccessGrant + PATH = 'projects/%s/datasets' % self.PROJECT USER_EMAIL = 'phred@example.com' GROUP_EMAIL = 'group-name@lists.example.com' @@ -786,6 +792,7 @@ def test_list_tables_explicit(self): def test_table_wo_schema(self): from google.cloud.bigquery.table import Table + conn = _Connection({}) client = _Client(project=self.PROJECT, connection=conn) dataset = self._make_one(self.DS_NAME, client=client) @@ -798,6 +805,7 @@ def test_table_wo_schema(self): def test_table_w_schema(self): from google.cloud.bigquery.schema import SchemaField from google.cloud.bigquery.table import Table + conn = _Connection({}) client = _Client(project=self.PROJECT, connection=conn) dataset = self._make_one(self.DS_NAME, client=client) @@ -825,6 +833,7 @@ def __init__(self, *responses): def api_request(self, **kw): from google.cloud.exceptions import NotFound + self._requested.append(kw) try: diff --git a/packages/google-cloud-bigquery/unit_tests/test_job.py b/packages/google-cloud-bigquery/unit_tests/test_job.py index 57b588a5801c..e0176d6a456b 100644 --- a/packages/google-cloud-bigquery/unit_tests/test_job.py +++ b/packages/google-cloud-bigquery/unit_tests/test_job.py @@ -125,6 +125,7 @@ class TestLoadTableFromStorageJob(unittest.TestCase, _Base): @staticmethod def _get_target_class(): from google.cloud.bigquery.job import LoadTableFromStorageJob + return LoadTableFromStorageJob def _setUpConstants(self): @@ -264,6 +265,7 @@ def test_ctor(self): def test_ctor_w_schema(self): from google.cloud.bigquery.schema import SchemaField + client = _Client(self.PROJECT) table = _Table() full_name = SchemaField('full_name', 'STRING', mode='REQUIRED') @@ -281,6 +283,7 @@ def test_schema_setter_non_list(self): def test_schema_setter_invalid_field(self): from google.cloud.bigquery.schema import SchemaField + client = _Client(self.PROJECT) table = _Table() job = self._make_one(self.JOB_NAME, table, [self.SOURCE1], client) @@ -290,6 +293,7 @@ def test_schema_setter_invalid_field(self): def test_schema_setter(self): from google.cloud.bigquery.schema import SchemaField + client = _Client(self.PROJECT) table = _Table() job = self._make_one(self.JOB_NAME, table, [self.SOURCE1], client) @@ -465,6 +469,7 @@ def test_begin_w_bound_client(self): def test_begin_w_alternate_client(self): from google.cloud.bigquery.schema import SchemaField + PATH = '/projects/%s/jobs' % (self.PROJECT,) RESOURCE = self._makeResource(ended=True) LOAD_CONFIGURATION = { @@ -646,6 +651,7 @@ class TestCopyJob(unittest.TestCase, _Base): @staticmethod def _get_target_class(): from google.cloud.bigquery.job import CopyJob + return CopyJob def _makeResource(self, started=False, ended=False): @@ -998,6 +1004,7 @@ class TestExtractTableToStorageJob(unittest.TestCase, _Base): @staticmethod def _get_target_class(): from google.cloud.bigquery.job import ExtractTableToStorageJob + return ExtractTableToStorageJob def _makeResource(self, started=False, ended=False): @@ -1292,6 +1299,7 @@ class TestQueryJob(unittest.TestCase, _Base): @staticmethod def _get_target_class(): from google.cloud.bigquery.job import QueryJob + return QueryJob def _makeResource(self, started=False, ended=False): @@ -1431,6 +1439,7 @@ def test_ctor_defaults(self): def test_ctor_w_udf_resources(self): from google.cloud.bigquery._helpers import UDFResource + RESOURCE_URI = 'gs://some-bucket/js/lib.js' udf_resources = [UDFResource("resourceUri", RESOURCE_URI)] client = _Client(self.PROJECT) @@ -1440,6 +1449,7 @@ def test_ctor_w_udf_resources(self): def test_ctor_w_query_parameters(self): from google.cloud.bigquery._helpers import ScalarQueryParameter + query_parameters = [ScalarQueryParameter("foo", 'INT64', 123)] client = _Client(self.PROJECT) job = self._make_one(self.JOB_NAME, self.QUERY, client, @@ -1501,6 +1511,7 @@ def test_from_api_repr_w_properties(self): def test_results(self): from google.cloud.bigquery.query import QueryResults + client = _Client(self.PROJECT) job = self._make_one(self.JOB_NAME, self.QUERY, client) results = job.results() @@ -1542,6 +1553,7 @@ def test_begin_w_bound_client(self): def test_begin_w_alternate_client(self): from google.cloud.bigquery.dataset import Dataset from google.cloud.bigquery.dataset import Table + PATH = '/projects/%s/jobs' % (self.PROJECT,) TABLE = 'TABLE' DS_NAME = 'DATASET' @@ -1612,6 +1624,7 @@ def test_begin_w_alternate_client(self): def test_begin_w_udf(self): from google.cloud.bigquery._helpers import UDFResource + RESOURCE_URI = 'gs://some-bucket/js/lib.js' INLINE_UDF_CODE = 'var someCode = "here";' PATH = '/projects/%s/jobs' % (self.PROJECT,) @@ -1661,6 +1674,7 @@ def test_begin_w_udf(self): def test_begin_w_named_query_parameter(self): from google.cloud.bigquery._helpers import ScalarQueryParameter + query_parameters = [ScalarQueryParameter('foo', 'INT64', 123)] PATH = '/projects/%s/jobs' % (self.PROJECT,) RESOURCE = self._makeResource() @@ -1712,6 +1726,7 @@ def test_begin_w_named_query_parameter(self): def test_begin_w_positional_query_parameter(self): from google.cloud.bigquery._helpers import ScalarQueryParameter + query_parameters = [ScalarQueryParameter.positional('INT64', 123)] PATH = '/projects/%s/jobs' % (self.PROJECT,) RESOURCE = self._makeResource() @@ -1794,6 +1809,7 @@ def test_exists_hit_w_alternate_client(self): def test_reload_w_bound_client(self): from google.cloud.bigquery.dataset import Dataset from google.cloud.bigquery.dataset import Table + PATH = '/projects/%s/jobs/%s' % (self.PROJECT, self.JOB_NAME) DS_NAME = 'DATASET' DEST_TABLE = 'dest_table' @@ -1851,6 +1867,7 @@ def __init__(self, project='project', connection=None): def dataset(self, name): from google.cloud.bigquery.dataset import Dataset + return Dataset(name, client=self) @@ -1882,6 +1899,7 @@ def __init__(self, *responses): def api_request(self, **kw): from google.cloud.exceptions import NotFound + self._requested.append(kw) try: diff --git a/packages/google-cloud-bigquery/unit_tests/test_query.py b/packages/google-cloud-bigquery/unit_tests/test_query.py index fa8dd401e435..0e388c3cd0d9 100644 --- a/packages/google-cloud-bigquery/unit_tests/test_query.py +++ b/packages/google-cloud-bigquery/unit_tests/test_query.py @@ -26,6 +26,7 @@ class TestQueryResults(unittest.TestCase): @staticmethod def _get_target_class(): from google.cloud.bigquery.query import QueryResults + return QueryResults def _make_one(self, *args, **kw): @@ -75,6 +76,7 @@ def _makeResource(self, complete=False): def _verifySchema(self, query, resource): from google.cloud.bigquery.schema import SchemaField + if 'schema' in resource: fields = resource['schema']['fields'] self.assertEqual(len(query.schema), len(fields)) @@ -162,6 +164,7 @@ def test_ctor_defaults(self): def test_ctor_w_udf_resources(self): from google.cloud.bigquery._helpers import UDFResource + RESOURCE_URI = 'gs://some-bucket/js/lib.js' udf_resources = [UDFResource("resourceUri", RESOURCE_URI)] client = _Client(self.PROJECT) @@ -170,6 +173,7 @@ def test_ctor_w_udf_resources(self): def test_ctor_w_query_parameters(self): from google.cloud.bigquery._helpers import ScalarQueryParameter + query_parameters = [ScalarQueryParameter("foo", 'INT64', 123)] client = _Client(self.PROJECT) query = self._make_one(self.QUERY, client, @@ -180,6 +184,7 @@ def test_from_query_job(self): from google.cloud.bigquery.dataset import Dataset from google.cloud.bigquery.job import QueryJob from google.cloud.bigquery._helpers import UDFResource + DS_NAME = 'DATASET' RESOURCE_URI = 'gs://some-bucket/js/lib.js' client = _Client(self.PROJECT) @@ -205,6 +210,7 @@ def test_from_query_job(self): def test_from_query_job_wo_default_dataset(self): from google.cloud.bigquery.job import QueryJob from google.cloud.bigquery._helpers import UDFResource + RESOURCE_URI = 'gs://some-bucket/js/lib.js' client = _Client(self.PROJECT) job = QueryJob( @@ -229,6 +235,7 @@ def test_job_wo_jobid(self): def test_job_w_jobid(self): from google.cloud.bigquery.job import QueryJob + SERVER_GENERATED = 'SERVER_GENERATED' client = _Client(self.PROJECT) query = self._make_one(self.QUERY, client) @@ -327,6 +334,7 @@ def test_run_w_alternate_client(self): def test_run_w_inline_udf(self): from google.cloud.bigquery._helpers import UDFResource + INLINE_UDF_CODE = 'var someCode = "here";' PATH = 'projects/%s/queries' % self.PROJECT RESOURCE = self._makeResource(complete=False) @@ -352,6 +360,7 @@ def test_run_w_inline_udf(self): def test_run_w_udf_resource_uri(self): from google.cloud.bigquery._helpers import UDFResource + RESOURCE_URI = 'gs://some-bucket/js/lib.js' PATH = 'projects/%s/queries' % self.PROJECT RESOURCE = self._makeResource(complete=False) @@ -377,6 +386,7 @@ def test_run_w_udf_resource_uri(self): def test_run_w_mixed_udfs(self): from google.cloud.bigquery._helpers import UDFResource + RESOURCE_URI = 'gs://some-bucket/js/lib.js' INLINE_UDF_CODE = 'var someCode = "here";' PATH = 'projects/%s/queries' % self.PROJECT @@ -409,6 +419,7 @@ def test_run_w_mixed_udfs(self): def test_run_w_named_query_paramter(self): from google.cloud.bigquery._helpers import ScalarQueryParameter + PATH = 'projects/%s/queries' % self.PROJECT RESOURCE = self._makeResource(complete=False) RESOURCE['parameterMode'] = 'NAMED' @@ -444,6 +455,7 @@ def test_run_w_named_query_paramter(self): def test_run_w_positional_query_paramter(self): from google.cloud.bigquery._helpers import ScalarQueryParameter + PATH = 'projects/%s/queries' % self.PROJECT RESOURCE = self._makeResource(complete=False) RESOURCE['parameterMode'] = 'POSITIONAL' @@ -560,6 +572,7 @@ def __init__(self, project='project', connection=None): def dataset(self, name): from google.cloud.bigquery.dataset import Dataset + return Dataset(name, client=self) diff --git a/packages/google-cloud-bigquery/unit_tests/test_schema.py b/packages/google-cloud-bigquery/unit_tests/test_schema.py index 8b49bff5b29a..8081fcd6f4e0 100644 --- a/packages/google-cloud-bigquery/unit_tests/test_schema.py +++ b/packages/google-cloud-bigquery/unit_tests/test_schema.py @@ -20,6 +20,7 @@ class TestSchemaField(unittest.TestCase): @staticmethod def _get_target_class(): from google.cloud.bigquery.schema import SchemaField + return SchemaField def _make_one(self, *args, **kw): diff --git a/packages/google-cloud-bigquery/unit_tests/test_table.py b/packages/google-cloud-bigquery/unit_tests/test_table.py index 0b8453103082..57fcb4a4800c 100644 --- a/packages/google-cloud-bigquery/unit_tests/test_table.py +++ b/packages/google-cloud-bigquery/unit_tests/test_table.py @@ -38,6 +38,7 @@ class TestTable(unittest.TestCase, _SchemaBase): @staticmethod def _get_target_class(): from google.cloud.bigquery.table import Table + return Table def _make_one(self, *args, **kw): @@ -162,6 +163,7 @@ def test_ctor(self): def test_ctor_w_schema(self): from google.cloud.bigquery.table import SchemaField + client = _Client(self.PROJECT) dataset = _Dataset(client) full_name = SchemaField('full_name', 'STRING', mode='REQUIRED') @@ -223,6 +225,7 @@ def test_schema_setter_non_list(self): def test_schema_setter_invalid_field(self): from google.cloud.bigquery.table import SchemaField + client = _Client(self.PROJECT) dataset = _Dataset(client) table = self._make_one(self.TABLE_NAME, dataset) @@ -232,6 +235,7 @@ def test_schema_setter_invalid_field(self): def test_schema_setter(self): from google.cloud.bigquery.table import SchemaField + client = _Client(self.PROJECT) dataset = _Dataset(client) table = self._make_one(self.TABLE_NAME, dataset) @@ -402,6 +406,7 @@ def test_create_no_view_query_no_schema(self): def test_create_w_bound_client(self): from google.cloud.bigquery.table import SchemaField + PATH = 'projects/%s/datasets/%s/tables' % (self.PROJECT, self.DS_NAME) RESOURCE = self._makeResource() conn = _Connection(RESOURCE) @@ -432,6 +437,7 @@ def test_create_w_bound_client(self): def test_create_w_partition_no_expire(self): from google.cloud.bigquery.table import SchemaField + PATH = 'projects/%s/datasets/%s/tables' % (self.PROJECT, self.DS_NAME) RESOURCE = self._makeResource() conn = _Connection(RESOURCE) @@ -466,6 +472,7 @@ def test_create_w_partition_no_expire(self): def test_create_w_partition_and_expire(self): from google.cloud.bigquery.table import SchemaField + PATH = 'projects/%s/datasets/%s/tables' % (self.PROJECT, self.DS_NAME) RESOURCE = self._makeResource() conn = _Connection(RESOURCE) @@ -500,6 +507,7 @@ def test_create_w_partition_and_expire(self): def test_partition_type_setter_bad_type(self): from google.cloud.bigquery.table import SchemaField + RESOURCE = self._makeResource() conn = _Connection(RESOURCE) client = _Client(project=self.PROJECT, connection=conn) @@ -513,6 +521,7 @@ def test_partition_type_setter_bad_type(self): def test_partition_type_setter_unknown_value(self): from google.cloud.bigquery.table import SchemaField + RESOURCE = self._makeResource() conn = _Connection(RESOURCE) client = _Client(project=self.PROJECT, connection=conn) @@ -526,6 +535,7 @@ def test_partition_type_setter_unknown_value(self): def test_partition_type_setter_w_known_value(self): from google.cloud.bigquery.table import SchemaField + RESOURCE = self._makeResource() conn = _Connection(RESOURCE) client = _Client(project=self.PROJECT, connection=conn) @@ -540,6 +550,7 @@ def test_partition_type_setter_w_known_value(self): def test_partition_type_setter_w_none(self): from google.cloud.bigquery.table import SchemaField + RESOURCE = self._makeResource() conn = _Connection(RESOURCE) client = _Client(project=self.PROJECT, connection=conn) @@ -555,6 +566,7 @@ def test_partition_type_setter_w_none(self): def test_partition_experation_bad_type(self): from google.cloud.bigquery.table import SchemaField + RESOURCE = self._makeResource() conn = _Connection(RESOURCE) client = _Client(project=self.PROJECT, connection=conn) @@ -568,6 +580,7 @@ def test_partition_experation_bad_type(self): def test_partition_expiration_w_integer(self): from google.cloud.bigquery.table import SchemaField + RESOURCE = self._makeResource() conn = _Connection(RESOURCE) client = _Client(project=self.PROJECT, connection=conn) @@ -583,6 +596,7 @@ def test_partition_expiration_w_integer(self): def test_partition_expiration_w_none(self): from google.cloud.bigquery.table import SchemaField + RESOURCE = self._makeResource() conn = _Connection(RESOURCE) client = _Client(project=self.PROJECT, connection=conn) @@ -602,6 +616,7 @@ def test_partition_expiration_w_none(self): def test_partition_expiration_w_none_no_partition_set(self): from google.cloud.bigquery.table import SchemaField + RESOURCE = self._makeResource() conn = _Connection(RESOURCE) client = _Client(project=self.PROJECT, connection=conn) @@ -617,6 +632,7 @@ def test_partition_expiration_w_none_no_partition_set(self): def test_list_partitions(self): from google.cloud.bigquery.table import SchemaField + conn = _Connection() client = _Client(project=self.PROJECT, connection=conn) client._query_results = [(20160804, None), (20160805, None)] @@ -682,6 +698,7 @@ def test_create_w_missing_output_properties(self): # In the wild, the resource returned from 'dataset.create' sometimes # lacks 'creationTime' / 'lastModifiedTime' from google.cloud.bigquery.table import SchemaField + PATH = 'projects/%s/datasets/%s/tables' % (self.PROJECT, self.DS_NAME) RESOURCE = self._makeResource() del RESOURCE['creationTime'] @@ -896,6 +913,7 @@ def test_patch_w_schema_None(self): def test_update_w_bound_client(self): from google.cloud.bigquery.table import SchemaField + PATH = 'projects/%s/datasets/%s/tables/%s' % ( self.PROJECT, self.DS_NAME, self.TABLE_NAME) DESCRIPTION = 'DESCRIPTION' @@ -1278,6 +1296,7 @@ def test_fetch_data_w_record_schema(self): def test_insert_data_wo_schema(self): from google.cloud.bigquery.table import _TABLE_HAS_NO_SCHEMA + client = _Client(project=self.PROJECT) dataset = _Dataset(client) table = self._make_one(self.TABLE_NAME, dataset=dataset) @@ -1343,6 +1362,7 @@ def _row_data(row): def test_insert_data_w_alternate_client(self): from google.cloud.bigquery.table import SchemaField + PATH = 'projects/%s/datasets/%s/tables/%s/insertAll' % ( self.PROJECT, self.DS_NAME, self.TABLE_NAME) RESPONSE = { @@ -1407,6 +1427,7 @@ def _row_data(row): def test_insert_data_w_repeated_fields(self): from google.cloud.bigquery.table import SchemaField + PATH = 'projects/%s/datasets/%s/tables/%s/insertAll' % ( self.PROJECT, self.DS_NAME, self.TABLE_NAME) conn = _Connection({}) @@ -1442,6 +1463,7 @@ def _row_data(row): def test_insert_data_w_record_schema(self): from google.cloud.bigquery.table import SchemaField + PATH = 'projects/%s/datasets/%s/tables/%s/insertAll' % ( self.PROJECT, self.DS_NAME, self.TABLE_NAME) conn = _Connection({}) @@ -1651,6 +1673,7 @@ def test_upload_from_file_resumable_with_400(self): from google.cloud.exceptions import BadRequest from google.cloud._helpers import UTC from google.cloud._testing import _NamedTemporaryFile + WHEN_TS = 1437767599.006 WHEN = datetime.datetime.utcfromtimestamp(WHEN_TS).replace( tzinfo=UTC) @@ -1776,6 +1799,7 @@ class Test_parse_schema_resource(unittest.TestCase, _SchemaBase): def _call_fut(self, resource): from google.cloud.bigquery.table import _parse_schema_resource + return _parse_schema_resource(resource) def _makeResource(self): @@ -1820,10 +1844,12 @@ class Test_build_schema_resource(unittest.TestCase, _SchemaBase): def _call_fut(self, resource): from google.cloud.bigquery.table import _build_schema_resource + return _build_schema_resource(resource) def test_defaults(self): from google.cloud.bigquery.table import SchemaField + full_name = SchemaField('full_name', 'STRING', mode='REQUIRED') age = SchemaField('age', 'INTEGER', mode='REQUIRED') resource = self._call_fut([full_name, age]) @@ -1839,6 +1865,7 @@ def test_defaults(self): def test_w_description(self): from google.cloud.bigquery.table import SchemaField + DESCRIPTION = 'DESCRIPTION' full_name = SchemaField('full_name', 'STRING', mode='REQUIRED', description=DESCRIPTION) @@ -1857,6 +1884,7 @@ def test_w_description(self): def test_w_subfields(self): from google.cloud.bigquery.table import SchemaField + full_name = SchemaField('full_name', 'STRING', mode='REQUIRED') ph_type = SchemaField('type', 'STRING', 'REQUIRED') ph_num = SchemaField('number', 'STRING', 'REQUIRED') @@ -1956,6 +1984,7 @@ def __init__(self, *responses): def api_request(self, **kw): from google.cloud.exceptions import NotFound + self._requested.append(kw) try: @@ -1970,6 +1999,7 @@ def build_api_url(self, path, query_params=None, from six.moves.urllib.parse import urlencode from six.moves.urllib.parse import urlsplit from six.moves.urllib.parse import urlunsplit + # Mimic the build_api_url interface. qs = urlencode(query_params or {}) scheme, netloc, _, _, _ = urlsplit(api_base_url) @@ -1978,11 +2008,14 @@ def build_api_url(self, path, query_params=None, def _email_chunk_parser(): import six + if six.PY3: # pragma: NO COVER Python3 from email.parser import BytesParser + parser = BytesParser() return parser.parsebytes else: from email.parser import Parser + parser = Parser() return parser.parsestr From 8457b04e5132e1d06af010c7ec3233984ddab662 Mon Sep 17 00:00:00 2001 From: Danny Hermes Date: Thu, 26 Jan 2017 13:13:02 -0800 Subject: [PATCH 0077/2016] Renaming JSONClient -> ClientWithProject. Done via: $ git grep -l JSONClient | xargs sed -i s/JSONClient/ClientWithProject/g Also fixing test b0rken by previous commit. --- .../google-cloud-bigquery/google/cloud/bigquery/client.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py index e98f390ff616..a00a1a28abaa 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py @@ -15,7 +15,7 @@ """Client for interacting with the Google BigQuery API.""" -from google.cloud.client import JSONClient +from google.cloud.client import ClientWithProject from google.cloud.bigquery._http import Connection from google.cloud.bigquery.dataset import Dataset from google.cloud.bigquery.job import CopyJob @@ -50,7 +50,7 @@ def from_api_repr(cls, resource): resource['id'], resource['numericId'], resource['friendlyName']) -class Client(JSONClient): +class Client(ClientWithProject): """Client to bundle configuration needed for API requests. :type project: str From 8c118edcbc5abf4ff29c056aac8de132bb98b4a3 Mon Sep 17 00:00:00 2001 From: Danny Hermes Date: Thu, 26 Jan 2017 16:09:19 -0800 Subject: [PATCH 0078/2016] Changing BigQuery Connection to only accept client. --- .../google-cloud-bigquery/google/cloud/bigquery/_http.py | 4 ---- .../google-cloud-bigquery/google/cloud/bigquery/client.py | 7 +++++-- packages/google-cloud-bigquery/unit_tests/test__http.py | 4 ++-- 3 files changed, 7 insertions(+), 8 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/_http.py b/packages/google-cloud-bigquery/google/cloud/bigquery/_http.py index fd5bb3cb8b23..bdc5c023e1ab 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/_http.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/_http.py @@ -28,7 +28,3 @@ class Connection(_http.JSONConnection): API_URL_TEMPLATE = '{api_base_url}/bigquery/{api_version}{path}' """A template for the URL of a particular API call.""" - - SCOPE = ('https://www.googleapis.com/auth/bigquery', - 'https://www.googleapis.com/auth/cloud-platform') - """The scopes required for authenticating as a BigQuery consumer.""" diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py index a00a1a28abaa..a85cb10eede6 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py @@ -72,11 +72,14 @@ class Client(ClientWithProject): ``credentials`` for the current object. """ + SCOPE = ('https://www.googleapis.com/auth/bigquery', + 'https://www.googleapis.com/auth/cloud-platform') + """The scopes required for authenticating as a BigQuery consumer.""" + def __init__(self, project=None, credentials=None, http=None): super(Client, self).__init__( project=project, credentials=credentials, http=http) - self._connection = Connection( - credentials=self._credentials, http=self._http) + self._connection = Connection(self) def list_projects(self, max_results=None, page_token=None): """List projects for the project associated with this client. diff --git a/packages/google-cloud-bigquery/unit_tests/test__http.py b/packages/google-cloud-bigquery/unit_tests/test__http.py index 6beaa3a8cc4d..4fd8de8017fc 100644 --- a/packages/google-cloud-bigquery/unit_tests/test__http.py +++ b/packages/google-cloud-bigquery/unit_tests/test__http.py @@ -27,7 +27,7 @@ def _make_one(self, *args, **kw): return self._get_target_class()(*args, **kw) def test_build_api_url_no_extra_query_params(self): - conn = self._make_one() + conn = self._make_one(object()) URI = '/'.join([ conn.API_BASE_URL, 'bigquery', @@ -40,7 +40,7 @@ def test_build_api_url_w_extra_query_params(self): from six.moves.urllib.parse import parse_qsl from six.moves.urllib.parse import urlsplit - conn = self._make_one() + conn = self._make_one(object()) uri = conn.build_api_url('/foo', {'bar': 'baz'}) scheme, netloc, path, qs, _ = urlsplit(uri) self.assertEqual('%s://%s' % (scheme, netloc), conn.API_BASE_URL) From 989a4babc6c3dc39442f94bbcb98aaca31f922c6 Mon Sep 17 00:00:00 2001 From: Danny Hermes Date: Thu, 26 Jan 2017 20:08:51 -0800 Subject: [PATCH 0079/2016] Fix-up docstrings after Connection() signature change. --- .../google-cloud-bigquery/google/cloud/bigquery/_http.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/_http.py b/packages/google-cloud-bigquery/google/cloud/bigquery/_http.py index bdc5c023e1ab..8e2c218c1cc9 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/_http.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/_http.py @@ -18,7 +18,11 @@ class Connection(_http.JSONConnection): - """A connection to Google BigQuery via the JSON REST API.""" + """A connection to Google BigQuery via the JSON REST API. + + :type client: :class:`~google.cloud.bigquery.client.Client` + :param client: The client that owns the current connection. + """ API_BASE_URL = 'https://www.googleapis.com' """The base of the API call URL.""" From 24e1fc41bfef7be879ca2187b53522a56d4b9083 Mon Sep 17 00:00:00 2001 From: Thomas Schultz Date: Tue, 31 Jan 2017 09:17:12 -0500 Subject: [PATCH 0080/2016] Updates for pycodestyle. (#2973) --- packages/google-cloud-bigquery/unit_tests/test_dataset.py | 2 +- packages/google-cloud-bigquery/unit_tests/test_job.py | 2 +- packages/google-cloud-bigquery/unit_tests/test_table.py | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/packages/google-cloud-bigquery/unit_tests/test_dataset.py b/packages/google-cloud-bigquery/unit_tests/test_dataset.py index ad510dded5ea..97721554f1b6 100644 --- a/packages/google-cloud-bigquery/unit_tests/test_dataset.py +++ b/packages/google-cloud-bigquery/unit_tests/test_dataset.py @@ -838,7 +838,7 @@ def api_request(self, **kw): try: response, self._responses = self._responses[0], self._responses[1:] - except: + except IndexError: raise NotFound('miss') else: return response diff --git a/packages/google-cloud-bigquery/unit_tests/test_job.py b/packages/google-cloud-bigquery/unit_tests/test_job.py index e0176d6a456b..0e43712d0d15 100644 --- a/packages/google-cloud-bigquery/unit_tests/test_job.py +++ b/packages/google-cloud-bigquery/unit_tests/test_job.py @@ -1904,7 +1904,7 @@ def api_request(self, **kw): try: response, self._responses = self._responses[0], self._responses[1:] - except: + except IndexError: raise NotFound('miss') else: return response diff --git a/packages/google-cloud-bigquery/unit_tests/test_table.py b/packages/google-cloud-bigquery/unit_tests/test_table.py index 57fcb4a4800c..09fd37730199 100644 --- a/packages/google-cloud-bigquery/unit_tests/test_table.py +++ b/packages/google-cloud-bigquery/unit_tests/test_table.py @@ -1989,7 +1989,7 @@ def api_request(self, **kw): try: response, self._responses = self._responses[0], self._responses[1:] - except: + except IndexError: raise NotFound('miss') else: return response From 2012604ed892194b8be01d911877ad81f7211b21 Mon Sep 17 00:00:00 2001 From: Thomas Schultz Date: Mon, 6 Feb 2017 13:24:21 -0500 Subject: [PATCH 0081/2016] Move dryRun property to from query to configuration level. (#2984) * Move dryRun property to from query to configuration level. --- .../google/cloud/bigquery/job.py | 8 +- .../unit_tests/test_job.py | 85 ++++++++++++++----- 2 files changed, 68 insertions(+), 25 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/job.py b/packages/google-cloud-bigquery/google/cloud/bigquery/job.py index d5152fcee25b..51929bc63803 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/job.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/job.py @@ -983,7 +983,7 @@ def __init__(self, name, query, client, dry_run = _TypedProperty('dry_run', bool) """See: https://cloud.google.com/bigquery/docs/\ - reference/v2/jobs#configuration.query.dryRun + reference/rest/v2/jobs#configuration.dryRun """ write_disposition = WriteDisposition('write_disposition') @@ -1024,8 +1024,6 @@ def _populate_config_resource_booleans(self, configuration): configuration['useQueryCache'] = self.use_query_cache if self.use_legacy_sql is not None: configuration['useLegacySql'] = self.use_legacy_sql - if self.dry_run is not None: - configuration['dryRun'] = self.dry_run def _populate_config_resource(self, configuration): """Helper for _build_resource: copy config properties to resource""" @@ -1078,6 +1076,10 @@ def _build_resource(self): }, }, } + + if self.dry_run is not None: + resource['configuration']['dryRun'] = self.dry_run + configuration = resource['configuration'][self._JOB_TYPE] self._populate_config_resource(configuration) diff --git a/packages/google-cloud-bigquery/unit_tests/test_job.py b/packages/google-cloud-bigquery/unit_tests/test_job.py index 0e43712d0d15..57d96bf8ae15 100644 --- a/packages/google-cloud-bigquery/unit_tests/test_job.py +++ b/packages/google-cloud-bigquery/unit_tests/test_job.py @@ -1331,11 +1331,6 @@ def _verifyBooleanResourceProperties(self, job, config): config['useLegacySql']) else: self.assertIsNone(job.use_legacy_sql) - if 'dryRun' in config: - self.assertEqual(job.dry_run, - config['dryRun']) - else: - self.assertIsNone(job.dry_run) def _verifyIntegerResourceProperties(self, job, config): if 'maximumBillingTier' in config: @@ -1366,48 +1361,58 @@ def _verifyQueryParameters(self, job, config): for found, expected in zip(job.query_parameters, query_parameters): self.assertEqual(found.to_api_repr(), expected) + def _verify_configuration_properties(self, job, configuration): + if 'dryRun' in configuration: + self.assertEqual(job.dry_run, + configuration['dryRun']) + else: + self.assertIsNone(job.dry_run) + def _verifyResourceProperties(self, job, resource): self._verifyReadonlyResourceProperties(job, resource) - config = resource.get('configuration', {}).get('query') - self._verifyBooleanResourceProperties(job, config) - self._verifyIntegerResourceProperties(job, config) - self._verify_udf_resources(job, config) - self._verifyQueryParameters(job, config) + configuration = resource.get('configuration', {}) + self._verify_configuration_properties(job, configuration) - self.assertEqual(job.query, config['query']) - if 'createDisposition' in config: + query_config = resource.get('configuration', {}).get('query') + self._verifyBooleanResourceProperties(job, query_config) + self._verifyIntegerResourceProperties(job, query_config) + self._verify_udf_resources(job, query_config) + self._verifyQueryParameters(job, query_config) + + self.assertEqual(job.query, query_config['query']) + if 'createDisposition' in query_config: self.assertEqual(job.create_disposition, - config['createDisposition']) + query_config['createDisposition']) else: self.assertIsNone(job.create_disposition) - if 'defaultDataset' in config: + if 'defaultDataset' in query_config: dataset = job.default_dataset ds_ref = { 'projectId': dataset.project, 'datasetId': dataset.name, } - self.assertEqual(ds_ref, config['defaultDataset']) + self.assertEqual(ds_ref, query_config['defaultDataset']) else: self.assertIsNone(job.default_dataset) - if 'destinationTable' in config: + if 'destinationTable' in query_config: table = job.destination tb_ref = { 'projectId': table.project, 'datasetId': table.dataset_name, 'tableId': table.name } - self.assertEqual(tb_ref, config['destinationTable']) + self.assertEqual(tb_ref, query_config['destinationTable']) else: self.assertIsNone(job.destination) - if 'priority' in config: + if 'priority' in query_config: self.assertEqual(job.priority, - config['priority']) + query_config['priority']) else: self.assertIsNone(job.priority) - if 'writeDisposition' in config: + if 'writeDisposition' in query_config: self.assertEqual(job.write_disposition, - config['writeDisposition']) + query_config['writeDisposition']) else: self.assertIsNone(job.write_disposition) @@ -1575,7 +1580,6 @@ def test_begin_w_alternate_client(self): 'priority': 'INTERACTIVE', 'useQueryCache': True, 'useLegacySql': True, - 'dryRun': True, 'writeDisposition': 'WRITE_TRUNCATE', 'maximumBillingTier': 4, 'maximumBytesBilled': 123456 @@ -1599,6 +1603,7 @@ def test_begin_w_alternate_client(self): job.use_query_cache = True job.use_legacy_sql = True job.dry_run = True + RESOURCE['configuration']['dryRun'] = True job.write_disposition = 'WRITE_TRUNCATE' job.maximum_billing_tier = 4 job.maximum_bytes_billed = 123456 @@ -1616,6 +1621,7 @@ def test_begin_w_alternate_client(self): 'jobId': self.JOB_NAME, }, 'configuration': { + 'dryRun': True, 'query': QUERY_CONFIGURATION, }, } @@ -1775,6 +1781,41 @@ def test_begin_w_positional_query_parameter(self): self.assertEqual(req['data'], SENT) self._verifyResourceProperties(job, RESOURCE) + def test_dry_run_query(self): + PATH = '/projects/%s/jobs' % (self.PROJECT,) + RESOURCE = self._makeResource() + # Ensure None for missing server-set props + del RESOURCE['statistics']['creationTime'] + del RESOURCE['etag'] + del RESOURCE['selfLink'] + del RESOURCE['user_email'] + conn = _Connection(RESOURCE) + client = _Client(project=self.PROJECT, connection=conn) + job = self._make_one(self.JOB_NAME, self.QUERY, client) + job.dry_run = True + RESOURCE['configuration']['dryRun'] = True + + job.begin() + self.assertEqual(job.udf_resources, []) + self.assertEqual(len(conn._requested), 1) + req = conn._requested[0] + self.assertEqual(req['method'], 'POST') + self.assertEqual(req['path'], PATH) + SENT = { + 'jobReference': { + 'projectId': self.PROJECT, + 'jobId': self.JOB_NAME, + }, + 'configuration': { + 'query': { + 'query': self.QUERY + }, + 'dryRun': True, + }, + } + self.assertEqual(req['data'], SENT) + self._verifyResourceProperties(job, RESOURCE) + def test_exists_miss_w_bound_client(self): PATH = '/projects/%s/jobs/%s' % (self.PROJECT, self.JOB_NAME) conn = _Connection() From e8f1713fef77334654235c51884589125cf7a47c Mon Sep 17 00:00:00 2001 From: Tres Seaver Date: Mon, 13 Feb 2017 14:57:38 -0500 Subject: [PATCH 0082/2016] Check running status via 'job' property. If the '_job' attribute is not set, but we have a 'jobId' in our properties, we still don't want to re-run the query. Closes #3003. --- .../google-cloud-bigquery/google/cloud/bigquery/query.py | 2 +- packages/google-cloud-bigquery/unit_tests/test_query.py | 9 +++++++++ 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/query.py b/packages/google-cloud-bigquery/google/cloud/bigquery/query.py index 95d2eabdbdbe..7eb47b141191 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/query.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/query.py @@ -351,7 +351,7 @@ def run(self, client=None): :param client: the client to use. If not passed, falls back to the ``client`` stored on the current dataset. """ - if self._job is not None: + if self.job is not None: raise ValueError("Query job is already running.") client = self._require_client(client) diff --git a/packages/google-cloud-bigquery/unit_tests/test_query.py b/packages/google-cloud-bigquery/unit_tests/test_query.py index 0e388c3cd0d9..096aa93ca35b 100644 --- a/packages/google-cloud-bigquery/unit_tests/test_query.py +++ b/packages/google-cloud-bigquery/unit_tests/test_query.py @@ -274,6 +274,15 @@ def test_run_w_already_has_job(self): with self.assertRaises(ValueError): query.run() + def test_run_w_already_has_job_in_properties(self): + JOB_ID = 'JOB_ID' + conn = _Connection() + client = _Client(project=self.PROJECT, connection=conn) + query = self._make_one(self.QUERY, client) + query._properties['jobReference'] = {'jobId': JOB_ID} + with self.assertRaises(ValueError): + query.run() + def test_run_w_bound_client(self): PATH = 'projects/%s/queries' % self.PROJECT RESOURCE = self._makeResource(complete=False) From 96fd4fb4ea22ca8adbce63213d8830b5937c93df Mon Sep 17 00:00:00 2001 From: Tres Seaver Date: Mon, 13 Feb 2017 15:42:15 -0500 Subject: [PATCH 0083/2016] Add explict tests for Query's scalar properties. --- .../unit_tests/test_query.py | 98 +++++++++++++++++++ 1 file changed, 98 insertions(+) diff --git a/packages/google-cloud-bigquery/unit_tests/test_query.py b/packages/google-cloud-bigquery/unit_tests/test_query.py index 0e388c3cd0d9..42ce2d7e36a5 100644 --- a/packages/google-cloud-bigquery/unit_tests/test_query.py +++ b/packages/google-cloud-bigquery/unit_tests/test_query.py @@ -251,6 +251,104 @@ def test_job_w_jobid(self): fetched_later = query.job self.assertIs(fetched_later, job) + def test_cache_hit_missing(self): + client = _Client(self.PROJECT) + query = self._make_one(self.QUERY, client) + self.assertIsNone(query.cache_hit) + + def test_cache_hit_present(self): + client = _Client(self.PROJECT) + query = self._make_one(self.QUERY, client) + resource = {'cacheHit': True} + query._set_properties(resource) + self.assertTrue(query.cache_hit) + + def test_complete_missing(self): + client = _Client(self.PROJECT) + query = self._make_one(self.QUERY, client) + self.assertIsNone(query.complete) + + def test_complete_present(self): + client = _Client(self.PROJECT) + query = self._make_one(self.QUERY, client) + resource = {'jobComplete': True} + query._set_properties(resource) + self.assertTrue(query.complete) + + def test_errors_missing(self): + client = _Client(self.PROJECT) + query = self._make_one(self.QUERY, client) + self.assertIsNone(query.errors) + + def test_errors_present(self): + ERRORS = [ + {'reason': 'testing'}, + ] + client = _Client(self.PROJECT) + query = self._make_one(self.QUERY, client) + resource = {'errors': ERRORS} + query._set_properties(resource) + self.assertEqual(query.errors, ERRORS) + + def test_name_missing(self): + client = _Client(self.PROJECT) + query = self._make_one(self.QUERY, client) + self.assertIsNone(query.name) + + def test_name_broken_job_reference(self): + client = _Client(self.PROJECT) + query = self._make_one(self.QUERY, client) + resource = {'jobReference': {'bogus': 'BOGUS'}} + query._set_properties(resource) + self.assertIsNone(query.name) + + def test_name_present(self): + JOB_ID = 'JOB_ID' + client = _Client(self.PROJECT) + query = self._make_one(self.QUERY, client) + resource = {'jobReference': {'jobId': JOB_ID}} + query._set_properties(resource) + self.assertEqual(query.name, JOB_ID) + + def test_page_token_missing(self): + client = _Client(self.PROJECT) + query = self._make_one(self.QUERY, client) + self.assertIsNone(query.page_token) + + def test_page_token_present(self): + TOKEN = 'TOKEN' + client = _Client(self.PROJECT) + query = self._make_one(self.QUERY, client) + resource = {'pageToken': TOKEN} + query._set_properties(resource) + self.assertEqual(query.page_token, TOKEN) + + def test_total_rows_missing(self): + client = _Client(self.PROJECT) + query = self._make_one(self.QUERY, client) + self.assertIsNone(query.total_rows) + + def test_total_rows_present_integer(self): + TOTAL_ROWS = 42 + client = _Client(self.PROJECT) + query = self._make_one(self.QUERY, client) + resource = {'totalRows': TOTAL_ROWS} + query._set_properties(resource) + self.assertEqual(query.total_rows, TOTAL_ROWS) + + def test_total_bytes_processed_missing(self): + client = _Client(self.PROJECT) + query = self._make_one(self.QUERY, client) + self.assertIsNone(query.total_bytes_processed) + + def test_total_bytes_processed_present_integer(self): + TOTAL_BYTES_PROCESSED = 123456 + client = _Client(self.PROJECT) + query = self._make_one(self.QUERY, client) + resource = {'totalBytesProcessed': TOTAL_BYTES_PROCESSED} + query._set_properties(resource) + self.assertEqual(query.total_bytes_processed, TOTAL_BYTES_PROCESSED) + def test_schema(self): client = _Client(self.PROJECT) query = self._make_one(self.QUERY, client) From 0d114a04d0f2c2d8b2cad8a68f008e188977194e Mon Sep 17 00:00:00 2001 From: Tres Seaver Date: Mon, 13 Feb 2017 15:48:56 -0500 Subject: [PATCH 0084/2016] Return int from 'total_rows'/'total_bytes_processed', if present. Closes #3004. --- .../google/cloud/bigquery/query.py | 8 ++++-- .../unit_tests/test_query.py | 28 +++++++++++++++++-- 2 files changed, 31 insertions(+), 5 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/query.py b/packages/google-cloud-bigquery/google/cloud/bigquery/query.py index 95d2eabdbdbe..edc313176f0d 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/query.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/query.py @@ -208,7 +208,9 @@ def total_rows(self): :rtype: int, or ``NoneType`` :returns: Count generated on the server (None until set by the server). """ - return self._properties.get('totalRows') + total_rows = self._properties.get('totalRows') + if total_rows is not None: + return int(total_rows) @property def total_bytes_processed(self): @@ -220,7 +222,9 @@ def total_bytes_processed(self): :rtype: int, or ``NoneType`` :returns: Count generated on the server (None until set by the server). """ - return self._properties.get('totalBytesProcessed') + total_bytes_processed = self._properties.get('totalBytesProcessed') + if total_bytes_processed is not None: + return int(total_bytes_processed) @property def rows(self): diff --git a/packages/google-cloud-bigquery/unit_tests/test_query.py b/packages/google-cloud-bigquery/unit_tests/test_query.py index 42ce2d7e36a5..466018086047 100644 --- a/packages/google-cloud-bigquery/unit_tests/test_query.py +++ b/packages/google-cloud-bigquery/unit_tests/test_query.py @@ -124,9 +124,15 @@ def _verifyResourceProperties(self, query, resource): self.assertEqual(query.complete, resource.get('jobComplete')) self.assertEqual(query.errors, resource.get('errors')) self.assertEqual(query.page_token, resource.get('pageToken')) - self.assertEqual(query.total_rows, resource.get('totalRows')) - self.assertEqual(query.total_bytes_processed, - resource.get('totalBytesProcessed')) + if 'totalRows' in resource: + self.assertEqual(query.total_rows, int(resource['totalRows'])) + else: + self.assertIsNone(query.total_rows) + if 'totalBytesProcessed' in resource: + self.assertEqual(query.total_bytes_processed, + int(resource['totalBytesProcessed'])) + else: + self.assertIsNone(query.total_bytes_processed) if 'jobReference' in resource: self.assertEqual(query.name, resource['jobReference']['jobId']) @@ -336,6 +342,14 @@ def test_total_rows_present_integer(self): query._set_properties(resource) self.assertEqual(query.total_rows, TOTAL_ROWS) + def test_total_rows_present_string(self): + TOTAL_ROWS = 42 + client = _Client(self.PROJECT) + query = self._make_one(self.QUERY, client) + resource = {'totalRows': str(TOTAL_ROWS)} + query._set_properties(resource) + self.assertEqual(query.total_rows, TOTAL_ROWS) + def test_total_bytes_processed_missing(self): client = _Client(self.PROJECT) query = self._make_one(self.QUERY, client) @@ -349,6 +363,14 @@ def test_total_bytes_processed_present_integer(self): query._set_properties(resource) self.assertEqual(query.total_bytes_processed, TOTAL_BYTES_PROCESSED) + def test_total_bytes_processed_present_string(self): + TOTAL_BYTES_PROCESSED = 123456 + client = _Client(self.PROJECT) + query = self._make_one(self.QUERY, client) + resource = {'totalBytesProcessed': str(TOTAL_BYTES_PROCESSED)} + query._set_properties(resource) + self.assertEqual(query.total_bytes_processed, TOTAL_BYTES_PROCESSED) + def test_schema(self): client = _Client(self.PROJECT) query = self._make_one(self.QUERY, client) From 8af9de6e81aa39dd06687fe7380ec5601551f1ba Mon Sep 17 00:00:00 2001 From: Louis Tiao Date: Wed, 15 Feb 2017 17:04:11 +1100 Subject: [PATCH 0085/2016] Fixed typo --- packages/google-cloud-bigquery/google/cloud/bigquery/schema.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/schema.py b/packages/google-cloud-bigquery/google/cloud/bigquery/schema.py index a987454cc9a7..6d4a437a809f 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/schema.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/schema.py @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -"""Scheamas for BigQuery tables / queries.""" +"""Schemas for BigQuery tables / queries.""" class SchemaField(object): From 5b4a538221ed272e908cd0d494c06c5885f68027 Mon Sep 17 00:00:00 2001 From: Thomas Schultz Date: Thu, 16 Feb 2017 14:19:53 -0500 Subject: [PATCH 0086/2016] Update core dependency to google-cloud-core >= 0.23.0, < 0.24dev. (#3028) * Update core dependency to google-cloud-core >= 0.23.0, < 0.24dev. --- packages/google-cloud-bigquery/setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/setup.py b/packages/google-cloud-bigquery/setup.py index 98aea8dbe15b..21feb33b6888 100644 --- a/packages/google-cloud-bigquery/setup.py +++ b/packages/google-cloud-bigquery/setup.py @@ -50,7 +50,7 @@ REQUIREMENTS = [ - 'google-cloud-core >= 0.22.1, < 0.23dev', + 'google-cloud-core >= 0.23.0, < 0.24dev', ] setup( From 7134df7ef57e7ab7d43c0a63ce2548f911da682a Mon Sep 17 00:00:00 2001 From: Thomas Schultz Date: Fri, 17 Feb 2017 08:43:08 -0500 Subject: [PATCH 0087/2016] BigQuery formatting. --- packages/google-cloud-bigquery/unit_tests/test__helpers.py | 1 + packages/google-cloud-bigquery/unit_tests/test_table.py | 1 + 2 files changed, 2 insertions(+) diff --git a/packages/google-cloud-bigquery/unit_tests/test__helpers.py b/packages/google-cloud-bigquery/unit_tests/test__helpers.py index e42d8207b471..d989da592ba1 100644 --- a/packages/google-cloud-bigquery/unit_tests/test__helpers.py +++ b/packages/google-cloud-bigquery/unit_tests/test__helpers.py @@ -556,6 +556,7 @@ def test_w_non_bytes(self): def test_w_bytes(self): import base64 + source = b'source' expected = base64.encodestring(source) self.assertEqual(self._call_fut(source), expected) diff --git a/packages/google-cloud-bigquery/unit_tests/test_table.py b/packages/google-cloud-bigquery/unit_tests/test_table.py index 09fd37730199..6a496ba69e95 100644 --- a/packages/google-cloud-bigquery/unit_tests/test_table.py +++ b/packages/google-cloud-bigquery/unit_tests/test_table.py @@ -1536,6 +1536,7 @@ def test_upload_from_file_multipart_w_400(self): from google.cloud._testing import _NamedTemporaryFile from google.cloud._helpers import UTC from google.cloud.exceptions import BadRequest + WHEN_TS = 1437767599.006 WHEN = datetime.datetime.utcfromtimestamp(WHEN_TS).replace( tzinfo=UTC) From 24db880163be36d0b88c72c06b8cf8347c383e4f Mon Sep 17 00:00:00 2001 From: Danny Hermes Date: Wed, 22 Feb 2017 07:40:10 -0800 Subject: [PATCH 0088/2016] Adding GCCL header for HTTP APIs. (#3046) --- .../google/cloud/bigquery/__init__.py | 3 ++ .../google/cloud/bigquery/_http.py | 9 ++++++ .../unit_tests/test__http.py | 32 +++++++++++++++++++ 3 files changed, 44 insertions(+) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/__init__.py b/packages/google-cloud-bigquery/google/cloud/bigquery/__init__.py index 9b5809af6cf7..615f7cfa1b6b 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/__init__.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/__init__.py @@ -23,6 +23,9 @@ """ +from pkg_resources import get_distribution +__version__ = get_distribution('google-cloud-bigquery').version + from google.cloud.bigquery._helpers import ArrayQueryParameter from google.cloud.bigquery._helpers import ScalarQueryParameter from google.cloud.bigquery._helpers import StructQueryParameter diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/_http.py b/packages/google-cloud-bigquery/google/cloud/bigquery/_http.py index 8e2c218c1cc9..75fac77e5eae 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/_http.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/_http.py @@ -16,6 +16,11 @@ from google.cloud import _http +from google.cloud.bigquery import __version__ + + +_CLIENT_INFO = _http.CLIENT_INFO_TEMPLATE.format(__version__) + class Connection(_http.JSONConnection): """A connection to Google BigQuery via the JSON REST API. @@ -32,3 +37,7 @@ class Connection(_http.JSONConnection): API_URL_TEMPLATE = '{api_base_url}/bigquery/{api_version}{path}' """A template for the URL of a particular API call.""" + + _EXTRA_HEADERS = { + _http.CLIENT_INFO_HEADER: _CLIENT_INFO, + } diff --git a/packages/google-cloud-bigquery/unit_tests/test__http.py b/packages/google-cloud-bigquery/unit_tests/test__http.py index 4fd8de8017fc..9972e9859313 100644 --- a/packages/google-cloud-bigquery/unit_tests/test__http.py +++ b/packages/google-cloud-bigquery/unit_tests/test__http.py @@ -14,6 +14,8 @@ import unittest +import mock + class TestConnection(unittest.TestCase): @@ -48,3 +50,33 @@ def test_build_api_url_w_extra_query_params(self): '/'.join(['', 'bigquery', conn.API_VERSION, 'foo'])) parms = dict(parse_qsl(qs)) self.assertEqual(parms['bar'], 'baz') + + def test_extra_headers(self): + from google.cloud import _http as base_http + from google.cloud.bigquery import _http as MUT + + http = mock.Mock(spec=['request']) + response = mock.Mock(status=200, spec=['status']) + data = b'brent-spiner' + http.request.return_value = response, data + client = mock.Mock(_http=http, spec=['_http']) + + conn = self._make_one(client) + req_data = 'req-data-boring' + result = conn.api_request( + 'GET', '/rainbow', data=req_data, expect_json=False) + self.assertEqual(result, data) + + expected_headers = { + 'Content-Length': str(len(req_data)), + 'Accept-Encoding': 'gzip', + base_http.CLIENT_INFO_HEADER: MUT._CLIENT_INFO, + 'User-Agent': conn.USER_AGENT, + } + expected_uri = conn.build_api_url('/rainbow') + http.request.assert_called_once_with( + body=req_data, + headers=expected_headers, + method='GET', + uri=expected_uri, + ) From e7ebaf20cc17a86e59352c12a05634188937a895 Mon Sep 17 00:00:00 2001 From: Danny Hermes Date: Fri, 24 Feb 2017 11:30:18 -0800 Subject: [PATCH 0089/2016] Upgrading all versions for umbrella release. --- packages/google-cloud-bigquery/setup.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/packages/google-cloud-bigquery/setup.py b/packages/google-cloud-bigquery/setup.py index 21feb33b6888..1c7267d1f523 100644 --- a/packages/google-cloud-bigquery/setup.py +++ b/packages/google-cloud-bigquery/setup.py @@ -50,12 +50,12 @@ REQUIREMENTS = [ - 'google-cloud-core >= 0.23.0, < 0.24dev', + 'google-cloud-core >= 0.23.1, < 0.24dev', ] setup( name='google-cloud-bigquery', - version='0.22.1', + version='0.23.0', description='Python Client for Google BigQuery', long_description=README, namespace_packages=[ From bc1897e57b4a0d8902b9ad37b836f98b82655454 Mon Sep 17 00:00:00 2001 From: Tres Seaver Date: Tue, 21 Mar 2017 12:48:44 -0400 Subject: [PATCH 0090/2016] Add system tests for all scalar query parameter types. (#3173) --- .../google/cloud/bigquery/_helpers.py | 9 +++++---- .../google-cloud-bigquery/unit_tests/test__helpers.py | 10 +++++----- 2 files changed, 10 insertions(+), 9 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py b/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py index bbcbae1674c3..f381f6733f9c 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py @@ -21,11 +21,12 @@ from google.cloud._helpers import UTC from google.cloud._helpers import _date_from_iso8601_date from google.cloud._helpers import _datetime_from_microseconds -from google.cloud._helpers import _datetime_to_rfc3339 from google.cloud._helpers import _RFC3339_NO_FRACTION from google.cloud._helpers import _time_from_iso8601_time_naive from google.cloud._helpers import _to_bytes +_RFC3339_MICROS_NO_ZULU = '%Y-%m-%dT%H:%M:%S.%f' + def _not_null(value, field): """Check whether 'value' should be coerced to 'field' type.""" @@ -58,7 +59,7 @@ def _string_from_json(value, _): def _bytes_from_json(value, field): """Base64-decode value""" if _not_null(value, field): - return base64.decodestring(_to_bytes(value)) + return base64.standard_b64decode(_to_bytes(value)) def _timestamp_from_json(value, field): @@ -143,7 +144,7 @@ def _bool_to_json(value): def _bytes_to_json(value): """Coerce 'value' to an JSON-compatible representation.""" if isinstance(value, bytes): - value = base64.encodestring(value) + value = base64.standard_b64encode(value) return value @@ -161,7 +162,7 @@ def _timestamp_to_json(value): def _datetime_to_json(value): """Coerce 'value' to an JSON-compatible representation.""" if isinstance(value, datetime.datetime): - value = _datetime_to_rfc3339(value) + value = value.strftime(_RFC3339_MICROS_NO_ZULU) return value diff --git a/packages/google-cloud-bigquery/unit_tests/test__helpers.py b/packages/google-cloud-bigquery/unit_tests/test__helpers.py index d989da592ba1..39ad6cbd62a7 100644 --- a/packages/google-cloud-bigquery/unit_tests/test__helpers.py +++ b/packages/google-cloud-bigquery/unit_tests/test__helpers.py @@ -145,7 +145,7 @@ def test_w_base64_encoded_bytes(self): import base64 expected = b'Wonderful!' - encoded = base64.encodestring(expected) + encoded = base64.standard_b64encode(expected) coerced = self._call_fut(encoded, object()) self.assertEqual(coerced, expected) @@ -153,7 +153,7 @@ def test_w_base64_encoded_text(self): import base64 expected = b'Wonderful!' - encoded = base64.encodestring(expected).decode('ascii') + encoded = base64.standard_b64encode(expected).decode('ascii') coerced = self._call_fut(encoded, object()) self.assertEqual(coerced, expected) @@ -558,7 +558,7 @@ def test_w_bytes(self): import base64 source = b'source' - expected = base64.encodestring(source) + expected = base64.standard_b64encode(source) self.assertEqual(self._call_fut(source), expected) @@ -621,7 +621,7 @@ def test_w_datetime(self): from google.cloud._helpers import UTC when = datetime.datetime(2016, 12, 3, 14, 11, 27, 123456, tzinfo=UTC) - self.assertEqual(self._call_fut(when), '2016-12-03T14:11:27.123456Z') + self.assertEqual(self._call_fut(when), '2016-12-03T14:11:27.123456') class Test_date_to_json(unittest.TestCase): @@ -1017,7 +1017,7 @@ def test_to_api_repr_w_datetime_datetime(self): 'type': 'DATETIME', }, 'parameterValue': { - 'value': _datetime_to_rfc3339(now), + 'value': _datetime_to_rfc3339(now)[:-1], # strip trailing 'Z' }, } klass = self._get_target_class() From a36cc5974abbe45a07c4b08de8f80fb6d107de2c Mon Sep 17 00:00:00 2001 From: Tres Seaver Date: Tue, 21 Mar 2017 15:45:00 -0400 Subject: [PATCH 0091/2016] Allow array query parameters w/ nested structs. Add a system test to verify the feature. Closes #2906. Toward #3029. --- .../google/cloud/bigquery/_helpers.py | 19 +++--- .../unit_tests/test__helpers.py | 58 ++++++++++++++----- 2 files changed, 56 insertions(+), 21 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py b/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py index f381f6733f9c..0c29fc7f1336 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py @@ -500,18 +500,23 @@ def to_api_repr(self): :returns: JSON mapping """ values = self.values - converter = _SCALAR_VALUE_TO_JSON.get(self.array_type) - if converter is not None: - values = [converter(value) for value in values] + if self.array_type == 'RECORD': + reprs = [value.to_api_repr() for value in values] + a_type = reprs[0]['parameterType'] + a_values = [repr_['parameterValue'] for repr_ in reprs] + else: + a_type = {'type': self.array_type} + converter = _SCALAR_VALUE_TO_JSON.get(self.array_type) + if converter is not None: + values = [converter(value) for value in values] + a_values = [{'value': value} for value in values] resource = { 'parameterType': { 'type': 'ARRAY', - 'arrayType': { - 'type': self.array_type, - }, + 'arrayType': a_type, }, 'parameterValue': { - 'arrayValues': [{'value': value} for value in values], + 'arrayValues': a_values, }, } if self.name is not None: diff --git a/packages/google-cloud-bigquery/unit_tests/test__helpers.py b/packages/google-cloud-bigquery/unit_tests/test__helpers.py index 39ad6cbd62a7..1f885ed4fd4e 100644 --- a/packages/google-cloud-bigquery/unit_tests/test__helpers.py +++ b/packages/google-cloud-bigquery/unit_tests/test__helpers.py @@ -1089,6 +1089,12 @@ def test_to_api_repr_w_unknown_type(self): self.assertEqual(param.to_api_repr(), EXPECTED) +def _make_subparam(name, type_, value): + from google.cloud.bigquery._helpers import ScalarQueryParameter + + return ScalarQueryParameter(name, type_, value) + + class Test_ArrayQueryParameter(unittest.TestCase): @staticmethod @@ -1230,6 +1236,36 @@ def test_to_api_repr_w_unknown_type(self): param = klass.positional(array_type='UNKNOWN', values=['unknown']) self.assertEqual(param.to_api_repr(), EXPECTED) + def test_to_api_repr_w_record_type(self): + from google.cloud.bigquery._helpers import StructQueryParameter + + EXPECTED = { + 'parameterType': { + 'type': 'ARRAY', + 'arrayType': { + 'type': 'STRUCT', + 'structTypes': [ + {'name': 'foo', 'type': {'type': 'STRING'}}, + {'name': 'bar', 'type': {'type': 'INT64'}}, + ], + }, + }, + 'parameterValue': { + 'arrayValues': [{ + 'structValues': { + 'foo': {'value': 'Foo'}, + 'bar': {'value': '123'}, + } + }] + }, + } + one = _make_subparam('foo', 'STRING', 'Foo') + another = _make_subparam('bar', 'INT64', 123) + struct = StructQueryParameter.positional(one, another) + klass = self._get_target_class() + param = klass.positional(array_type='RECORD', values=[struct]) + self.assertEqual(param.to_api_repr(), EXPECTED) + class Test_StructQueryParameter(unittest.TestCase): @@ -1242,23 +1278,17 @@ def _get_target_class(): def _make_one(self, *args, **kw): return self._get_target_class()(*args, **kw) - @staticmethod - def _make_subparam(name, type_, value): - from google.cloud.bigquery._helpers import ScalarQueryParameter - - return ScalarQueryParameter(name, type_, value) - def test_ctor(self): - sub_1 = self._make_subparam('bar', 'INT64', 123) - sub_2 = self._make_subparam('baz', 'STRING', 'abc') + sub_1 = _make_subparam('bar', 'INT64', 123) + sub_2 = _make_subparam('baz', 'STRING', 'abc') param = self._make_one('foo', sub_1, sub_2) self.assertEqual(param.name, 'foo') self.assertEqual(param.struct_types, {'bar': 'INT64', 'baz': 'STRING'}) self.assertEqual(param.struct_values, {'bar': 123, 'baz': 'abc'}) def test_positional(self): - sub_1 = self._make_subparam('bar', 'INT64', 123) - sub_2 = self._make_subparam('baz', 'STRING', 'abc') + sub_1 = _make_subparam('bar', 'INT64', 123) + sub_2 = _make_subparam('baz', 'STRING', 'abc') klass = self._get_target_class() param = klass.positional(sub_1, sub_2) self.assertEqual(param.name, None) @@ -1327,8 +1357,8 @@ def test_to_api_repr_w_name(self): }, }, } - sub_1 = self._make_subparam('bar', 'INT64', 123) - sub_2 = self._make_subparam('baz', 'STRING', 'abc') + sub_1 = _make_subparam('bar', 'INT64', 123) + sub_2 = _make_subparam('baz', 'STRING', 'abc') param = self._make_one('foo', sub_1, sub_2) self.assertEqual(param.to_api_repr(), EXPECTED) @@ -1348,8 +1378,8 @@ def test_to_api_repr_wo_name(self): }, }, } - sub_1 = self._make_subparam('bar', 'INT64', 123) - sub_2 = self._make_subparam('baz', 'STRING', 'abc') + sub_1 = _make_subparam('bar', 'INT64', 123) + sub_2 = _make_subparam('baz', 'STRING', 'abc') klass = self._get_target_class() param = klass.positional(sub_1, sub_2) self.assertEqual(param.to_api_repr(), EXPECTED) From c41f15d8913130cee0b12ae31e5925003b9ad21b Mon Sep 17 00:00:00 2001 From: Tres Seaver Date: Tue, 21 Mar 2017 16:46:45 -0400 Subject: [PATCH 0092/2016] Allow struct query parameters w/ nested structs. Add a system test to verify the feature. Toward #3029. --- .../google/cloud/bigquery/_helpers.py | 34 ++++++++++++------- .../unit_tests/test__helpers.py | 33 ++++++++++++++++++ 2 files changed, 55 insertions(+), 12 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py b/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py index 0c29fc7f1336..dea2d0fc4dc4 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py @@ -536,9 +536,15 @@ class StructQueryParameter(AbstractQueryParameter): """ def __init__(self, name, *sub_params): self.name = name - self.struct_types = OrderedDict( - (sub.name, sub.type_) for sub in sub_params) - self.struct_values = {sub.name: sub.value for sub in sub_params} + types = self.struct_types = OrderedDict() + values = self.struct_values = {} + for sub in sub_params: + if isinstance(sub, self.__class__): + types[sub.name] = 'STRUCT' + values[sub.name] = sub + else: + types[sub.name] = sub.type_ + values[sub.name] = sub.value @classmethod def positional(cls, *sub_params): @@ -581,21 +587,25 @@ def to_api_repr(self): :rtype: dict :returns: JSON mapping """ - types = [ - {'name': key, 'type': {'type': value}} - for key, value in self.struct_types.items() - ] + s_types = {} values = {} for name, value in self.struct_values.items(): - converter = _SCALAR_VALUE_TO_JSON.get(self.struct_types[name]) - if converter is not None: - value = converter(value) - values[name] = {'value': value} + type_ = self.struct_types[name] + if type_ == 'STRUCT': + repr_ = value.to_api_repr() + s_types[name] = {'name': name, 'type': repr_['parameterType']} + values[name] = repr_['parameterValue'] + else: + s_types[name] = {'name': name, 'type': {'type': type_}} + converter = _SCALAR_VALUE_TO_JSON.get(type_) + if converter is not None: + value = converter(value) + values[name] = {'value': value} resource = { 'parameterType': { 'type': 'STRUCT', - 'structTypes': types, + 'structTypes': [s_types[key] for key in self.struct_types], }, 'parameterValue': { 'structValues': values, diff --git a/packages/google-cloud-bigquery/unit_tests/test__helpers.py b/packages/google-cloud-bigquery/unit_tests/test__helpers.py index 1f885ed4fd4e..6c595e2a5b2d 100644 --- a/packages/google-cloud-bigquery/unit_tests/test__helpers.py +++ b/packages/google-cloud-bigquery/unit_tests/test__helpers.py @@ -1384,6 +1384,39 @@ def test_to_api_repr_wo_name(self): param = klass.positional(sub_1, sub_2) self.assertEqual(param.to_api_repr(), EXPECTED) + def test_to_api_repr_w_nested_struct(self): + EXPECTED = { + 'name': 'foo', + 'parameterType': { + 'type': 'STRUCT', + 'structTypes': [ + {'name': 'bar', 'type': {'type': 'STRING'}}, + {'name': 'baz', 'type': { + 'type': 'STRUCT', + 'structTypes': [ + {'name': 'qux', 'type': {'type': 'INT64'}}, + {'name': 'spam', 'type': {'type': 'BOOL'}}, + ], + }}, + ], + }, + 'parameterValue': { + 'structValues': { + 'bar': {'value': 'abc'}, + 'baz': {'structValues': { + 'qux': {'value': '123'}, + 'spam': {'value': 'true'}, + }}, + }, + }, + } + scalar_1 = _make_subparam('bar', 'STRING', 'abc') + scalar_2 = _make_subparam('qux', 'INT64', 123) + scalar_3 = _make_subparam('spam', 'BOOL', True) + sub = self._make_one('baz', scalar_2, scalar_3) + param = self._make_one('foo', scalar_1, sub) + self.assertEqual(param.to_api_repr(), EXPECTED) + class Test_QueryParametersProperty(unittest.TestCase): From f97681c246023bb9cf3c65aa379a50f3f81e2b68 Mon Sep 17 00:00:00 2001 From: Tres Seaver Date: Tue, 21 Mar 2017 17:10:55 -0400 Subject: [PATCH 0093/2016] Allow struct query parameters w/ nested arrays. Add a system test to verify the feature. Toward #3029. --- .../google/cloud/bigquery/_helpers.py | 5 +++- .../unit_tests/test__helpers.py | 30 +++++++++++++++++++ 2 files changed, 34 insertions(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py b/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py index dea2d0fc4dc4..d509231c19d2 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py @@ -542,6 +542,9 @@ def __init__(self, name, *sub_params): if isinstance(sub, self.__class__): types[sub.name] = 'STRUCT' values[sub.name] = sub + elif isinstance(sub, ArrayQueryParameter): + types[sub.name] = 'ARRAY' + values[sub.name] = sub else: types[sub.name] = sub.type_ values[sub.name] = sub.value @@ -591,7 +594,7 @@ def to_api_repr(self): values = {} for name, value in self.struct_values.items(): type_ = self.struct_types[name] - if type_ == 'STRUCT': + if type_ in ('STRUCT', 'ARRAY'): repr_ = value.to_api_repr() s_types[name] = {'name': name, 'type': repr_['parameterType']} values[name] = repr_['parameterValue'] diff --git a/packages/google-cloud-bigquery/unit_tests/test__helpers.py b/packages/google-cloud-bigquery/unit_tests/test__helpers.py index 6c595e2a5b2d..83879a4d3f55 100644 --- a/packages/google-cloud-bigquery/unit_tests/test__helpers.py +++ b/packages/google-cloud-bigquery/unit_tests/test__helpers.py @@ -1384,6 +1384,36 @@ def test_to_api_repr_wo_name(self): param = klass.positional(sub_1, sub_2) self.assertEqual(param.to_api_repr(), EXPECTED) + def test_to_api_repr_w_nested_array(self): + from google.cloud.bigquery._helpers import ArrayQueryParameter + + EXPECTED = { + 'name': 'foo', + 'parameterType': { + 'type': 'STRUCT', + 'structTypes': [ + {'name': 'bar', 'type': {'type': 'STRING'}}, + {'name': 'baz', 'type': { + 'type': 'ARRAY', + 'arrayType': {'type': 'INT64'}, + }}, + ], + }, + 'parameterValue': { + 'structValues': { + 'bar': {'value': 'abc'}, + 'baz': {'arrayValues': [ + {'value': '123'}, + {'value': '456'}, + ]}, + }, + }, + } + scalar = _make_subparam('bar', 'STRING', 'abc') + array = ArrayQueryParameter('baz', 'INT64', [123, 456]) + param = self._make_one('foo', scalar, array) + self.assertEqual(param.to_api_repr(), EXPECTED) + def test_to_api_repr_w_nested_struct(self): EXPECTED = { 'name': 'foo', From 609f13776fe68bd83fef57c5ab7ed20144a8d119 Mon Sep 17 00:00:00 2001 From: daspecster Date: Thu, 23 Mar 2017 14:41:14 -0400 Subject: [PATCH 0094/2016] Update redirects and references for BigQuery docs. --- .../google/cloud/bigquery/client.py | 14 ++--- .../google/cloud/bigquery/dataset.py | 18 +++--- .../google/cloud/bigquery/job.py | 62 +++++++++---------- .../google/cloud/bigquery/query.py | 34 +++++----- .../google/cloud/bigquery/table.py | 18 +++--- 5 files changed, 73 insertions(+), 73 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py index a85cb10eede6..6f73dab58fce 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py @@ -85,7 +85,7 @@ def list_projects(self, max_results=None, page_token=None): """List projects for the project associated with this client. See: - https://cloud.google.com/bigquery/docs/reference/v2/projects/list + https://cloud.google.com/bigquery/docs/reference/rest/v2/projects/list :type max_results: int :param max_results: maximum number of projects to return, If not @@ -110,7 +110,7 @@ def list_datasets(self, include_all=False, max_results=None, """List datasets for the project associated with this client. See: - https://cloud.google.com/bigquery/docs/reference/v2/datasets/list + https://cloud.google.com/bigquery/docs/reference/rest/v2/datasets/list :type include_all: bool :param include_all: True if results include hidden datasets. @@ -182,7 +182,7 @@ def list_jobs(self, max_results=None, page_token=None, all_users=None, """List jobs for the project associated with this client. See: - https://cloud.google.com/bigquery/docs/reference/v2/jobs/list + https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs/list :type max_results: int :param max_results: maximum number of jobs to return, If not @@ -226,7 +226,7 @@ def load_table_from_storage(self, job_name, destination, *source_uris): """Construct a job for loading data into a table from CloudStorage. See: - https://cloud.google.com/bigquery/docs/reference/v2/jobs#configuration.load + https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.load :type job_name: str :param job_name: Name of the job. @@ -248,7 +248,7 @@ def copy_table(self, job_name, destination, *sources): """Construct a job for copying one or more tables into another table. See: - https://cloud.google.com/bigquery/docs/reference/v2/jobs#configuration.copy + https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.copy :type job_name: str :param job_name: Name of the job. @@ -268,7 +268,7 @@ def extract_table_to_storage(self, job_name, source, *destination_uris): """Construct a job for extracting a table into Cloud Storage files. See: - https://cloud.google.com/bigquery/docs/reference/v2/jobs#configuration.extract + https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.extract :type job_name: str :param job_name: Name of the job. @@ -292,7 +292,7 @@ def run_async_query(self, job_name, query, """Construct a job for running a SQL query asynchronously. See: - https://cloud.google.com/bigquery/docs/reference/v2/jobs#configuration.query + https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query :type job_name: str :param job_name: Name of the job. diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/dataset.py b/packages/google-cloud-bigquery/google/cloud/bigquery/dataset.py index 109f259ae100..f98bb95b1098 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/dataset.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/dataset.py @@ -30,7 +30,7 @@ class AccessGrant(object): ``role`` specified. ``role`` is omitted for a ``view``, since ``view`` s are always read-only. - See https://cloud.google.com/bigquery/docs/reference/v2/datasets. + See https://cloud.google.com/bigquery/docs/reference/rest/v2/datasets. :type role: str :param role: Role granted to the entity. One of @@ -90,7 +90,7 @@ class Dataset(object): """Datasets are containers for tables. See: - https://cloud.google.com/bigquery/docs/reference/v2/datasets + https://cloud.google.com/bigquery/docs/reference/rest/v2/datasets :type name: str :param name: the name of the dataset @@ -418,7 +418,7 @@ def create(self, client=None): """API call: create the dataset via a PUT request. See: - https://cloud.google.com/bigquery/docs/reference/v2/tables/insert + https://cloud.google.com/bigquery/docs/reference/rest/v2/tables/insert :type client: :class:`~google.cloud.bigquery.client.Client` or ``NoneType`` @@ -435,7 +435,7 @@ def exists(self, client=None): """API call: test for the existence of the dataset via a GET request See - https://cloud.google.com/bigquery/docs/reference/v2/datasets/get + https://cloud.google.com/bigquery/docs/reference/rest/v2/datasets/get :type client: :class:`~google.cloud.bigquery.client.Client` or ``NoneType`` @@ -459,7 +459,7 @@ def reload(self, client=None): """API call: refresh dataset properties via a GET request. See - https://cloud.google.com/bigquery/docs/reference/v2/datasets/get + https://cloud.google.com/bigquery/docs/reference/rest/v2/datasets/get :type client: :class:`~google.cloud.bigquery.client.Client` or ``NoneType`` @@ -476,7 +476,7 @@ def patch(self, client=None, **kw): """API call: update individual dataset properties via a PATCH request. See - https://cloud.google.com/bigquery/docs/reference/v2/datasets/patch + https://cloud.google.com/bigquery/docs/reference/rest/v2/datasets/patch :type client: :class:`~google.cloud.bigquery.client.Client` or ``NoneType`` @@ -515,7 +515,7 @@ def update(self, client=None): """API call: update dataset properties via a PUT request. See - https://cloud.google.com/bigquery/docs/reference/v2/datasets/update + https://cloud.google.com/bigquery/docs/reference/rest/v2/datasets/update :type client: :class:`~google.cloud.bigquery.client.Client` or ``NoneType`` @@ -531,7 +531,7 @@ def delete(self, client=None): """API call: delete the dataset via a DELETE request. See: - https://cloud.google.com/bigquery/docs/reference/v2/tables/delete + https://cloud.google.com/bigquery/docs/reference/rest/v2/tables/delete :type client: :class:`~google.cloud.bigquery.client.Client` or ``NoneType`` @@ -545,7 +545,7 @@ def list_tables(self, max_results=None, page_token=None): """List tables for the project associated with this client. See: - https://cloud.google.com/bigquery/docs/reference/v2/tables/list + https://cloud.google.com/bigquery/docs/reference/rest/v2/tables/list :type max_results: int :param max_results: (Optional) Maximum number of tables to return. diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/job.py b/packages/google-cloud-bigquery/google/cloud/bigquery/job.py index 51929bc63803..c6ee642dfc7c 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/job.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/job.py @@ -302,7 +302,7 @@ def begin(self, client=None): """API call: begin the job via a POST request See: - https://cloud.google.com/bigquery/docs/reference/v2/jobs/insert + https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs/insert :type client: :class:`~google.cloud.bigquery.client.Client` or ``NoneType`` @@ -324,7 +324,7 @@ def exists(self, client=None): """API call: test for the existence of the job via a GET request See - https://cloud.google.com/bigquery/docs/reference/v2/jobs/get + https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs/get :type client: :class:`~google.cloud.bigquery.client.Client` or ``NoneType`` @@ -348,7 +348,7 @@ def reload(self, client=None): """API call: refresh job properties via a GET request See - https://cloud.google.com/bigquery/docs/reference/v2/jobs/get + https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs/get :type client: :class:`~google.cloud.bigquery.client.Client` or ``NoneType`` @@ -365,7 +365,7 @@ def cancel(self, client=None): """API call: cancel job via a POST request See - https://cloud.google.com/bigquery/docs/reference/v2/jobs/cancel + https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs/cancel :type client: :class:`~google.cloud.bigquery.client.Client` or ``NoneType`` @@ -498,57 +498,57 @@ def output_rows(self): allow_jagged_rows = _TypedProperty('allow_jagged_rows', bool) """See: - https://cloud.google.com/bigquery/docs/reference/v2/jobs#configuration.load.allowJaggedRows + https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.load.allowJaggedRows """ allow_quoted_newlines = _TypedProperty('allow_quoted_newlines', bool) """See: - https://cloud.google.com/bigquery/docs/reference/v2/jobs#configuration.load.allowQuotedNewlines + https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.load.allowQuotedNewlines """ create_disposition = CreateDisposition('create_disposition') """See: - https://cloud.google.com/bigquery/docs/reference/v2/jobs#configuration.load.createDisposition + https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.load.createDisposition """ encoding = Encoding('encoding') """See: - https://cloud.google.com/bigquery/docs/reference/v2/jobs#configuration.load.encoding + https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.load.encoding """ field_delimiter = _TypedProperty('field_delimiter', six.string_types) """See: - https://cloud.google.com/bigquery/docs/reference/v2/jobs#configuration.load.fieldDelimiter + https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.load.fieldDelimiter """ ignore_unknown_values = _TypedProperty('ignore_unknown_values', bool) """See: - https://cloud.google.com/bigquery/docs/reference/v2/jobs#configuration.load.ignoreUnknownValues + https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.load.ignoreUnknownValues """ max_bad_records = _TypedProperty('max_bad_records', six.integer_types) """See: - https://cloud.google.com/bigquery/docs/reference/v2/jobs#configuration.load.maxBadRecords + https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.load.maxBadRecords """ quote_character = _TypedProperty('quote_character', six.string_types) """See: - https://cloud.google.com/bigquery/docs/reference/v2/jobs#configuration.load.quote + https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.load.quote """ skip_leading_rows = _TypedProperty('skip_leading_rows', six.integer_types) """See: - https://cloud.google.com/bigquery/docs/reference/v2/jobs#configuration.load.skipLeadingRows + https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.load.skipLeadingRows """ source_format = SourceFormat('source_format') """See: - https://cloud.google.com/bigquery/docs/reference/v2/jobs#configuration.load.sourceFormat + https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.load.sourceFormat """ write_disposition = WriteDisposition('write_disposition') """See: - https://cloud.google.com/bigquery/docs/reference/v2/jobs#configuration.load.writeDisposition + https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.load.writeDisposition """ def _populate_config_resource(self, configuration): @@ -673,12 +673,12 @@ def __init__(self, name, destination, sources, client): create_disposition = CreateDisposition('create_disposition') """See: - https://cloud.google.com/bigquery/docs/reference/v2/jobs#configuration.copy.createDisposition + https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.copy.createDisposition """ write_disposition = WriteDisposition('write_disposition') """See: - https://cloud.google.com/bigquery/docs/reference/v2/jobs#configuration.copy.writeDisposition + https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.copy.writeDisposition """ def _populate_config_resource(self, configuration): @@ -796,22 +796,22 @@ def __init__(self, name, source, destination_uris, client): compression = Compression('compression') """See: - https://cloud.google.com/bigquery/docs/reference/v2/jobs#configuration.extracted.compression + https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.extract.compression """ destination_format = DestinationFormat('destination_format') """See: - https://cloud.google.com/bigquery/docs/reference/v2/jobs#configuration.extracted.destinationFormat + https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.extract.destinationFormat """ field_delimiter = _TypedProperty('field_delimiter', six.string_types) """See: - https://cloud.google.com/bigquery/docs/reference/v2/jobs#configuration.extracted.fieldDelimiter + https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.extract.fieldDelimiter """ print_header = _TypedProperty('print_header', bool) """See: - https://cloud.google.com/bigquery/docs/reference/v2/jobs#configuration.extracted.printHeader + https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.extract.printHeader """ def _populate_config_resource(self, configuration): @@ -937,32 +937,32 @@ def __init__(self, name, query, client, allow_large_results = _TypedProperty('allow_large_results', bool) """See: - https://cloud.google.com/bigquery/docs/reference/v2/jobs#configuration.query.allowLargeResults + https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query.allowLargeResults """ create_disposition = CreateDisposition('create_disposition') """See: - https://cloud.google.com/bigquery/docs/reference/v2/jobs#configuration.query.createDisposition + https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query.createDisposition """ default_dataset = _TypedProperty('default_dataset', Dataset) """See: - https://cloud.google.com/bigquery/docs/reference/v2/jobs#configuration.query.defaultDataset + https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query.defaultDataset """ destination = _TypedProperty('destination', Table) """See: - https://cloud.google.com/bigquery/docs/reference/v2/jobs#configuration.query.destinationTable + https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query.destinationTable """ flatten_results = _TypedProperty('flatten_results', bool) """See: - https://cloud.google.com/bigquery/docs/reference/v2/jobs#configuration.query.flattenResults + https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query.flattenResults """ priority = QueryPriority('priority') """See: - https://cloud.google.com/bigquery/docs/reference/v2/jobs#configuration.query.priority + https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query.priority """ query_parameters = QueryParametersProperty() @@ -971,7 +971,7 @@ def __init__(self, name, query, client, use_query_cache = _TypedProperty('use_query_cache', bool) """See: - https://cloud.google.com/bigquery/docs/reference/v2/jobs#configuration.query.useQueryCache + https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query.useQueryCache """ use_legacy_sql = _TypedProperty('use_legacy_sql', bool) @@ -988,17 +988,17 @@ def __init__(self, name, query, client, write_disposition = WriteDisposition('write_disposition') """See: - https://cloud.google.com/bigquery/docs/reference/v2/jobs#configuration.query.writeDisposition + https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query.writeDisposition """ maximum_billing_tier = _TypedProperty('maximum_billing_tier', int) """See: - https://cloud.google.com/bigquery/docs/reference/v2/jobs#configuration.query.maximumBillingTier + https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query.maximumBillingTier """ maximum_bytes_billed = _TypedProperty('maximum_bytes_billed', int) """See: - https://cloud.google.com/bigquery/docs/reference/v2/jobs#configuration.query.maximumBytesBilled + https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query.maximumBytesBilled """ def _destination_table_resource(self): diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/query.py b/packages/google-cloud-bigquery/google/cloud/bigquery/query.py index 5780c94d8df3..3c2e398e5f41 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/query.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/query.py @@ -124,7 +124,7 @@ def cache_hit(self): """Query results served from cache. See: - https://cloud.google.com/bigquery/docs/reference/v2/jobs/query#cacheHit + https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs/query#cacheHit :rtype: bool or ``NoneType`` :returns: True if the query results were served from cache (None @@ -137,7 +137,7 @@ def complete(self): """Server completed query. See: - https://cloud.google.com/bigquery/docs/reference/v2/jobs/query#jobComplete + https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs/query#jobComplete :rtype: bool or ``NoneType`` :returns: True if the query completed on the server (None @@ -150,7 +150,7 @@ def errors(self): """Errors generated by the query. See: - https://cloud.google.com/bigquery/docs/reference/v2/jobs/query#errors + https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs/query#errors :rtype: list of mapping, or ``NoneType`` :returns: Mappings describing errors generated on the server (None @@ -163,7 +163,7 @@ def name(self): """Job name, generated by the back-end. See: - https://cloud.google.com/bigquery/docs/reference/v2/jobs/query#jobReference + https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs/query#jobReference :rtype: list of mapping, or ``NoneType`` :returns: Mappings describing errors generated on the server (None @@ -191,7 +191,7 @@ def page_token(self): """Token for fetching next bach of results. See: - https://cloud.google.com/bigquery/docs/reference/v2/jobs/query#pageToken + https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs/query#pageToken :rtype: str, or ``NoneType`` :returns: Token generated on the server (None until set by the server). @@ -203,7 +203,7 @@ def total_rows(self): """Total number of rows returned by the query. See: - https://cloud.google.com/bigquery/docs/reference/v2/jobs/query#totalRows + https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs/query#totalRows :rtype: int, or ``NoneType`` :returns: Count generated on the server (None until set by the server). @@ -217,7 +217,7 @@ def total_bytes_processed(self): """Total number of bytes processed by the query. See: - https://cloud.google.com/bigquery/docs/reference/v2/jobs/query#totalBytesProcessed + https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs/query#totalBytesProcessed :rtype: int, or ``NoneType`` :returns: Count generated on the server (None until set by the server). @@ -231,7 +231,7 @@ def rows(self): """Query results. See: - https://cloud.google.com/bigquery/docs/reference/v2/jobs/query#rows + https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs/query#rows :rtype: list of tuples of row values, or ``NoneType`` :returns: fields describing the schema (None until set by the server). @@ -243,7 +243,7 @@ def schema(self): """Schema for query results. See: - https://cloud.google.com/bigquery/docs/reference/v2/jobs/query#schema + https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs/query#schema :rtype: list of :class:`SchemaField`, or ``NoneType`` :returns: fields describing the schema (None until set by the server). @@ -252,36 +252,36 @@ def schema(self): default_dataset = _TypedProperty('default_dataset', Dataset) """See: - https://cloud.google.com/bigquery/docs/reference/v2/jobs/query#defaultDataset + https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs/query#defaultDataset """ dry_run = _TypedProperty('dry_run', bool) """See: - https://cloud.google.com/bigquery/docs/reference/v2/jobs/query#dryRun + https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs/query#dryRun """ max_results = _TypedProperty('max_results', six.integer_types) """See: - https://cloud.google.com/bigquery/docs/reference/v2/jobs/query#maxResults + https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs/query#maxResults """ preserve_nulls = _TypedProperty('preserve_nulls', bool) """See: - https://cloud.google.com/bigquery/docs/reference/v2/jobs/query#preserveNulls + https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs/query#preserveNulls """ query_parameters = QueryParametersProperty() timeout_ms = _TypedProperty('timeout_ms', six.integer_types) """See: - https://cloud.google.com/bigquery/docs/reference/v2/jobs/query#timeoutMs + https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs/query#timeoutMs """ udf_resources = UDFResourcesProperty() use_query_cache = _TypedProperty('use_query_cache', bool) """See: - https://cloud.google.com/bigquery/docs/reference/v2/jobs/query#useQueryCache + https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs/query#useQueryCache """ use_legacy_sql = _TypedProperty('use_legacy_sql', bool) @@ -348,7 +348,7 @@ def run(self, client=None): """API call: run the query via a POST request See: - https://cloud.google.com/bigquery/docs/reference/v2/jobs/query + https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs/query :type client: :class:`~google.cloud.bigquery.client.Client` or ``NoneType`` @@ -369,7 +369,7 @@ def fetch_data(self, max_results=None, page_token=None, start_index=None, """API call: fetch a page of query result data via a GET request See: - https://cloud.google.com/bigquery/docs/reference/v2/jobs/getQueryResults + https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs/getQueryResults :type max_results: int :param max_results: (Optional) maximum number of rows to return. diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py index 18f83c596f7d..3b0346f60cdf 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py @@ -44,7 +44,7 @@ class Table(object): """Tables represent a set of rows whose values correspond to a schema. See: - https://cloud.google.com/bigquery/docs/reference/v2/tables + https://cloud.google.com/bigquery/docs/reference/rest/v2/tables :type name: str :param name: the name of the table @@ -482,7 +482,7 @@ def create(self, client=None): """API call: create the dataset via a PUT request See: - https://cloud.google.com/bigquery/docs/reference/v2/tables/insert + https://cloud.google.com/bigquery/docs/reference/rest/v2/tables/insert :type client: :class:`~google.cloud.bigquery.client.Client` or ``NoneType`` @@ -500,7 +500,7 @@ def exists(self, client=None): """API call: test for the existence of the table via a GET request See - https://cloud.google.com/bigquery/docs/reference/v2/tables/get + https://cloud.google.com/bigquery/docs/reference/rest/v2/tables/get :type client: :class:`~google.cloud.bigquery.client.Client` or ``NoneType`` @@ -524,7 +524,7 @@ def reload(self, client=None): """API call: refresh table properties via a GET request See - https://cloud.google.com/bigquery/docs/reference/v2/tables/get + https://cloud.google.com/bigquery/docs/reference/rest/v2/tables/get :type client: :class:`~google.cloud.bigquery.client.Client` or ``NoneType`` @@ -548,7 +548,7 @@ def patch(self, """API call: update individual table properties via a PATCH request See - https://cloud.google.com/bigquery/docs/reference/v2/tables/patch + https://cloud.google.com/bigquery/docs/reference/rest/v2/tables/patch :type client: :class:`~google.cloud.bigquery.client.Client` or ``NoneType`` @@ -616,7 +616,7 @@ def update(self, client=None): """API call: update table properties via a PUT request See - https://cloud.google.com/bigquery/docs/reference/v2/tables/update + https://cloud.google.com/bigquery/docs/reference/rest/v2/tables/update :type client: :class:`~google.cloud.bigquery.client.Client` or ``NoneType`` @@ -632,7 +632,7 @@ def delete(self, client=None): """API call: delete the table via a DELETE request See: - https://cloud.google.com/bigquery/docs/reference/v2/tables/delete + https://cloud.google.com/bigquery/docs/reference/rest/v2/tables/delete :type client: :class:`~google.cloud.bigquery.client.Client` or ``NoneType`` @@ -646,7 +646,7 @@ def fetch_data(self, max_results=None, page_token=None, client=None): """API call: fetch the table data via a GET request See: - https://cloud.google.com/bigquery/docs/reference/v2/tabledata/list + https://cloud.google.com/bigquery/docs/reference/rest/v2/tabledata/list .. note:: @@ -695,7 +695,7 @@ def insert_data(self, """API call: insert table data via a POST request See: - https://cloud.google.com/bigquery/docs/reference/v2/tabledata/insertAll + https://cloud.google.com/bigquery/docs/reference/rest/v2/tabledata/insertAll :type rows: list of tuples :param rows: Row data to be inserted. Each tuple should contain data From 1b81c9937d96fe415be96fa1f59281d3a3004ae0 Mon Sep 17 00:00:00 2001 From: Tres Seaver Date: Thu, 23 Mar 2017 14:47:58 -0400 Subject: [PATCH 0095/2016] Ensure that base64-encoded bytes get decoded to text for JSON. (#3195) Closes #3193. --- .../google/cloud/bigquery/_helpers.py | 2 +- packages/google-cloud-bigquery/unit_tests/test__helpers.py | 7 +++---- 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py b/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py index d509231c19d2..4b1f9418c9d8 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py @@ -144,7 +144,7 @@ def _bool_to_json(value): def _bytes_to_json(value): """Coerce 'value' to an JSON-compatible representation.""" if isinstance(value, bytes): - value = base64.standard_b64encode(value) + value = base64.standard_b64encode(value).decode('ascii') return value diff --git a/packages/google-cloud-bigquery/unit_tests/test__helpers.py b/packages/google-cloud-bigquery/unit_tests/test__helpers.py index 83879a4d3f55..bcc0ed7eed16 100644 --- a/packages/google-cloud-bigquery/unit_tests/test__helpers.py +++ b/packages/google-cloud-bigquery/unit_tests/test__helpers.py @@ -555,11 +555,10 @@ def test_w_non_bytes(self): self.assertIs(self._call_fut(non_bytes), non_bytes) def test_w_bytes(self): - import base64 - source = b'source' - expected = base64.standard_b64encode(source) - self.assertEqual(self._call_fut(source), expected) + expected = u'c291cmNl' + converted = self._call_fut(source) + self.assertEqual(converted, expected) class Test_timestamp_to_json(unittest.TestCase): From 5aeffb076b13ef4a389e6abae3e4e8b6e636b056 Mon Sep 17 00:00:00 2001 From: Luke Sneeringer Date: Thu, 23 Mar 2017 14:49:26 -0700 Subject: [PATCH 0096/2016] CI Rehash (#3146) --- packages/google-cloud-bigquery/.flake8 | 6 + packages/google-cloud-bigquery/LICENSE | 202 ++++ packages/google-cloud-bigquery/MANIFEST.in | 7 +- .../google/cloud/bigquery/__init__.py | 6 + packages/google-cloud-bigquery/nox.py | 88 ++ packages/google-cloud-bigquery/setup.py | 2 +- .../google-cloud-bigquery/tests/__init__.py | 0 .../tests/data/characters.json | 66 ++ .../tests/data/characters.jsonl | 3 + .../tests/data/schema.json | 83 ++ .../google-cloud-bigquery/tests/system.py | 887 ++++++++++++++++++ .../{unit_tests => tests/unit}/__init__.py | 0 .../unit}/test__helpers.py | 0 .../{unit_tests => tests/unit}/test__http.py | 0 .../{unit_tests => tests/unit}/test_client.py | 0 .../unit}/test_dataset.py | 0 .../{unit_tests => tests/unit}/test_job.py | 0 .../{unit_tests => tests/unit}/test_query.py | 0 .../{unit_tests => tests/unit}/test_schema.py | 0 .../{unit_tests => tests/unit}/test_table.py | 0 packages/google-cloud-bigquery/tox.ini | 35 - 21 files changed, 1345 insertions(+), 40 deletions(-) create mode 100644 packages/google-cloud-bigquery/.flake8 create mode 100644 packages/google-cloud-bigquery/LICENSE create mode 100644 packages/google-cloud-bigquery/nox.py create mode 100644 packages/google-cloud-bigquery/tests/__init__.py create mode 100644 packages/google-cloud-bigquery/tests/data/characters.json create mode 100644 packages/google-cloud-bigquery/tests/data/characters.jsonl create mode 100644 packages/google-cloud-bigquery/tests/data/schema.json create mode 100644 packages/google-cloud-bigquery/tests/system.py rename packages/google-cloud-bigquery/{unit_tests => tests/unit}/__init__.py (100%) rename packages/google-cloud-bigquery/{unit_tests => tests/unit}/test__helpers.py (100%) rename packages/google-cloud-bigquery/{unit_tests => tests/unit}/test__http.py (100%) rename packages/google-cloud-bigquery/{unit_tests => tests/unit}/test_client.py (100%) rename packages/google-cloud-bigquery/{unit_tests => tests/unit}/test_dataset.py (100%) rename packages/google-cloud-bigquery/{unit_tests => tests/unit}/test_job.py (100%) rename packages/google-cloud-bigquery/{unit_tests => tests/unit}/test_query.py (100%) rename packages/google-cloud-bigquery/{unit_tests => tests/unit}/test_schema.py (100%) rename packages/google-cloud-bigquery/{unit_tests => tests/unit}/test_table.py (100%) delete mode 100644 packages/google-cloud-bigquery/tox.ini diff --git a/packages/google-cloud-bigquery/.flake8 b/packages/google-cloud-bigquery/.flake8 new file mode 100644 index 000000000000..25168dc87605 --- /dev/null +++ b/packages/google-cloud-bigquery/.flake8 @@ -0,0 +1,6 @@ +[flake8] +exclude = + __pycache__, + .git, + *.pyc, + conf.py diff --git a/packages/google-cloud-bigquery/LICENSE b/packages/google-cloud-bigquery/LICENSE new file mode 100644 index 000000000000..d64569567334 --- /dev/null +++ b/packages/google-cloud-bigquery/LICENSE @@ -0,0 +1,202 @@ + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/packages/google-cloud-bigquery/MANIFEST.in b/packages/google-cloud-bigquery/MANIFEST.in index cb3a2b9ef4fa..24aa72fb370b 100644 --- a/packages/google-cloud-bigquery/MANIFEST.in +++ b/packages/google-cloud-bigquery/MANIFEST.in @@ -1,4 +1,3 @@ -include README.rst -graft google -graft unit_tests -global-exclude *.pyc +include README.rst LICENSE +recursive-include unit_tests * +global-exclude *.pyc __pycache__ diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/__init__.py b/packages/google-cloud-bigquery/google/cloud/bigquery/__init__.py index 615f7cfa1b6b..e2eb29e866a3 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/__init__.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/__init__.py @@ -34,3 +34,9 @@ from google.cloud.bigquery.dataset import Dataset from google.cloud.bigquery.schema import SchemaField from google.cloud.bigquery.table import Table + +__all__ = [ + '__version__', 'AccessGrant', 'ArrayQueryParameter', 'Client', + 'Dataset', 'ScalarQueryParameter', 'SchemaField', 'StructQueryParameter', + 'Table', +] diff --git a/packages/google-cloud-bigquery/nox.py b/packages/google-cloud-bigquery/nox.py new file mode 100644 index 000000000000..74a7856d415e --- /dev/null +++ b/packages/google-cloud-bigquery/nox.py @@ -0,0 +1,88 @@ +# Copyright 2016 Google Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import absolute_import + +import os + +import nox + + +@nox.session +@nox.parametrize('python_version', ['2.7', '3.4', '3.5', '3.6']) +def unit_tests(session, python_version): + """Run the unit test suite.""" + + # Run unit tests against all supported versions of Python. + session.interpreter = 'python%s' % python_version + + # Install all test dependencies, then install this package in-place. + session.install('mock', 'pytest', 'pytest-cov', '../core/') + session.install('-e', '.') + + # Run py.test against the unit tests. + session.run('py.test', '--quiet', + '--cov=google.cloud.bigquery', '--cov=tests.unit', '--cov-append', + '--cov-config=.coveragerc', '--cov-report=', '--cov-fail-under=97', + 'tests/unit', + ) + + +@nox.session +@nox.parametrize('python_version', ['2.7', '3.6']) +def system_tests(session, python_version): + """Run the system test suite.""" + + # Sanity check: Only run system tests if the environment variable is set. + if not os.environ.get('GOOGLE_APPLICATION_CREDENTIALS', ''): + return + + # Run the system tests against latest Python 2 and Python 3 only. + session.interpreter = 'python%s' % python_version + + # Install all test dependencies, then install this package into the + # virutalenv's dist-packages. + session.install('mock', 'pytest', + '../core/', '../test_utils/', + '../storage/') + session.install('.') + + # Run py.test against the system tests. + session.run('py.test', '--quiet', 'tests/system.py') + + +@nox.session +def lint(session): + """Run flake8. + + Returns a failure if flake8 finds linting errors or sufficiently + serious code quality issues. + """ + session.interpreter = 'python3.6' + session.install('flake8') + session.install('.') + session.run('flake8', 'google/cloud/bigquery') + + +@nox.session +def cover(session): + """Run the final coverage report. + + This outputs the coverage report aggregating coverage from the unit + test runs (not system test runs), and then erases coverage data. + """ + session.interpreter = 'python3.6' + session.install('coverage', 'pytest-cov') + session.run('coverage', 'report', '--show-missing', '--fail-under=100') + session.run('coverage', 'erase') diff --git a/packages/google-cloud-bigquery/setup.py b/packages/google-cloud-bigquery/setup.py index 1c7267d1f523..dd5ab25af9a1 100644 --- a/packages/google-cloud-bigquery/setup.py +++ b/packages/google-cloud-bigquery/setup.py @@ -62,7 +62,7 @@ 'google', 'google.cloud', ], - packages=find_packages(), + packages=find_packages(exclude=('unit_tests*',)), install_requires=REQUIREMENTS, **SETUP_BASE ) diff --git a/packages/google-cloud-bigquery/tests/__init__.py b/packages/google-cloud-bigquery/tests/__init__.py new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/packages/google-cloud-bigquery/tests/data/characters.json b/packages/google-cloud-bigquery/tests/data/characters.json new file mode 100644 index 000000000000..ac854fb812d4 --- /dev/null +++ b/packages/google-cloud-bigquery/tests/data/characters.json @@ -0,0 +1,66 @@ +[ + { + "Age" : "111", + "Spells" : [], + "Name" : "Bilbo", + "Weight" : 67.2, + "TeaTime" : "10:00:00", + "NextVacation" : "2017-09-22", + "FavoriteTime" : "2031-04-01T05:09:27", + "IsMagic" : false + }, + { + "Age" : "1000", + "Name" : "Gandalf", + "Spells" : [ + { + "Name" : "Skydragon", + "Properties" : [ + { + "Power" : 1, + "Name" : "Flying" + }, + { + "Name" : "Creature", + "Power" : 1 + }, + { + "Power" : 11, + "Name" : "Explodey" + } + ], + "LastUsed" : "2015-10-31 23:59:56 UTC", + "Icon" : "iVBORw0KGgoAAAANSUhEUgAAAB4AAAAgCAYAAAAFQMh/AAAAAXNSR0IArs4c6QAAA9lJREFUSA21lk9OVEEQxvsRDImoiMG9mLjjCG5mEg7gEfQGsIcF7p0EDsBBSJiNO7ZsFRZqosb/QkSj7fer7ur33sw8GDFUUq+7q6vqq6qu7pkQzqG4EeI521e7FePVgM9cGPYwhCi6UO8qFOK+YY+Br66ujsmmxb84Yzwp6zCsxjJfWVkxnMsEMGuWHZ9Wcz11cM48hkq0vLwc1tbW4mAwqDpcdIqnMmgF0JMv2CiGnZ2dcHR0FA4PD8Pe3t5U/tx6bCSlb+JT8XfxT3HsUek0Li0tRdjWl+z6iRF+FNA1hXPDQ/IMNyRg3s8bD/OaZS+VP+9cOLSa64cA34oXZWagDkRzAaJxXaE+ufc4rCN7LrazZ2+8+STtpAL8WYDvpTaHKlkB2iQARMvb2+H27m4YaL7zaDtUw1BZAASi6T8T2UZnPZV2pvnJfCH5p8bewcGB6TrIfz8wBZgHQ83kjpuj6RBYQpuo09Tvmpd7TPe+ktZN8cKwS92KWXGuaqWowlYEwthtMcWOZUNJc8at+zuF/Xkqo69baS7P+AvWjYwJ4jyHXXsEnd74ZO/Pq+uXUuv6WNlso6cvnDsZB1V/unJab3D1/KrJDw9NCM9wHf2FK2ejTKMejnBHfGtfH7LGGCdQDqaqJgfgzWjXK1nYV4jRbPGnxUT7cqUaZfJrVZeOm9QmB21L6xXgbu/ScsYusJFMoU0x2fsamRJOd6kOYDRLUxv94ENZe8+0gM+0dyz+KgU7X8rLHHCIOZyrna4y6ykIu0YCs02TBXmk3PZssmEgaTxTo83xjCIjoE21h0Yah3MrV4+9kR8MaabGze+9NEILGAFE5nMOiiA32KnAr/sb7tED3nzlzC4dB38WMC+EjaqHfqvUKHi2gJPdWQ6AbH8hgyQ7QY6jvjj3QZWvX6pUAtduTX5Dss96Q7NI9RQRJeeKvRFbt0v2gb1Gx/PooJsztn1c1DqpAU3Hde2dB2aEHBhjgOFjMeDvxLafjQ3YZQSgOcHJZX611H45sGLHWvYTz9hiURlpNoBZvxb/Ft9lAQ1DmBfUiR+j1hAPkMBTE9L9+zLva1QvGFHurRBaZ5xLVitoBviiRkD/sIMDztKA5FA0b9/0OclzO2/XAQymJ0TcghZwEo9/AX8gMeAJMOvIsWWt5bwCoiFhVSllrdH0t5Q1JHAFlKJNkvTVdn2GHb9KdmacMT+d/Os05imJUccRX2YuZ93Sxf0Ilc4DPDeAq5SAvFEAY94cQc6BA26dzb4HWAJI4DPmQE5KCVUyvb2FcDZem7JdT2ggKUP3xX6n9XNq1DpzSf4Cy4ZqSlmM8d8AAAAASUVORK5CYII=", + "DiscoveredBy" : "Firebreather" + } + ], + "NextVacation" : "2666-06-06", + "TeaTime" : "15:00:00", + "Weight" : 198.6, + "FavoriteTime" : "2001-12-19T23:59:59", + "IsMagic" : true + }, + { + "Weight" : 128.3, + "TeaTime" : "12:00:00", + "NextVacation" : "2017-03-14", + "IsMagic" : true, + "FavoriteTime" : "2000-10-31T23:27:46", + "Age" : "17", + "Spells" : [ + { + "LastUsed" : "2017-02-14 12:07:23 UTC", + "Properties" : [ + { + "Name" : "Makes you look crazy", + "Power" : 1 + } + ], + "Icon" : "iVBORw0KGgoAAAANSUhEUgAAAEAAAABACAYAAACqaXHeAAAAAXNSR0IArs4c6QAAABxpRE9UAAAAAgAAAAAAAAAgAAAAKAAAACAAAAAgAAABxj2CfowAAAGSSURBVHgB7Jc9TsNAEIX3JDkCPUV6KlpKFHEGlD4nyA04ACUXQKTgCEipUnKGNEbP0otentayicZ24SlWs7tjO/N9u/5J2b2+NUtuZcnwYE8BuQPyGZAPwXwLLPk5kG+BJa9+fgfkh1B+CeancL4F8i2Q/wWm/S/w+XFoTseftn0dvhu0OXfhpM+AGvzcEiYVAFisPqE9zrETJhHAlXfg2lglMK9z0f3RBfB+ZyRUV3x+erzsEIjjOBqc1xtNAIrvguybV3A9lkVHxlEE6GrrPb/ZvAySwlUnfCmlPQ+R8JCExvGtcRQBLFwj4FGkznX1VYDKPG/f2/MjwCksXACgdNUxJjwK9xwl4JihOwTFR0kIF+CABEPRnvsvPFctMoYKqAFSAFaMwB4pp3Y+bodIYL9WmIAaIOHxo7W8wiHvAjTvhUeNwwSgeAeAABbqOewC5hBdwFD4+9+7puzXV9fS6/b1wwT4tsaYAhwOOQdUQch5vgZCeAhAv3ZM31yYAAUgvApQQQ6n5w6FB/RVe1jdJOAPAAD//1eMQwoAAAGQSURBVO1UMU4DQQy8X9AgWopIUINEkS4VlJQo4gvwAV7AD3gEH4iSgidESpWSXyyZExP5lr0c7K5PsXBhec/2+jzjuWtent9CLdtu1mG5+gjz+WNr7IsY7eH+tvO+xfuqk4vz7CH91edFaF5v9nb6dBKm13edvrL+0Lk5lMzJkQDeJSkkgHF6mR8CHwMHCQR/NAQQGD0BAlwK4FCefQiefq+A2Vn29tG7igLAfmwcnJu/nJy3BMQkMN9HEPr8AL3bfBv7Bp+7/SoExMDjZwKEJwmyhnnmQIQEBIlz2x0iKoAvJkAC6TsTIH6MqRrEWUMSZF2zAwqT4Eu/e6pzFAIkmNSZ4OFT+VYBIIF//UqbJwnF/4DU0GwOn8r/JQYCpPGufEfJuZiA37ycQw/5uFeqPq4pfR6FADmkBCXjfWdZj3NfXW58dAJyB9W65wRoMWulryvAyqa05nQFaDFrpa8rwMqmtOZ0BWgxa6WvK8DKprTmdAVoMWulryvAyqa05nQFaDFrpa8rwMqmtOb89wr4AtQ4aPoL6yVpAAAAAElFTkSuQmCC", + "Name" : "Talking cats", + "DiscoveredBy" : "Salem" + } + ], + "Name" : "Sabrina" + } +] diff --git a/packages/google-cloud-bigquery/tests/data/characters.jsonl b/packages/google-cloud-bigquery/tests/data/characters.jsonl new file mode 100644 index 000000000000..1da3f2309cae --- /dev/null +++ b/packages/google-cloud-bigquery/tests/data/characters.jsonl @@ -0,0 +1,3 @@ +{"Name":"Bilbo","Age":"111","Weight":67.2,"IsMagic":false,"Spells":[],"TeaTime":"10:00:00","NextVacation":"2017-09-22","FavoriteTime":"2031-04-01T05:09:27"} +{"Name":"Gandalf","Age":"1000","Weight":198.6,"IsMagic":true,"Spells":[{"Name": "Skydragon", "Icon":"iVBORw0KGgoAAAANSUhEUgAAAB4AAAAgCAYAAAAFQMh/AAAAAXNSR0IArs4c6QAAA9lJREFUSA21lk9OVEEQxvsRDImoiMG9mLjjCG5mEg7gEfQGsIcF7p0EDsBBSJiNO7ZsFRZqosb/QkSj7fer7ur33sw8GDFUUq+7q6vqq6qu7pkQzqG4EeI521e7FePVgM9cGPYwhCi6UO8qFOK+YY+Br66ujsmmxb84Yzwp6zCsxjJfWVkxnMsEMGuWHZ9Wcz11cM48hkq0vLwc1tbW4mAwqDpcdIqnMmgF0JMv2CiGnZ2dcHR0FA4PD8Pe3t5U/tx6bCSlb+JT8XfxT3HsUek0Li0tRdjWl+z6iRF+FNA1hXPDQ/IMNyRg3s8bD/OaZS+VP+9cOLSa64cA34oXZWagDkRzAaJxXaE+ufc4rCN7LrazZ2+8+STtpAL8WYDvpTaHKlkB2iQARMvb2+H27m4YaL7zaDtUw1BZAASi6T8T2UZnPZV2pvnJfCH5p8bewcGB6TrIfz8wBZgHQ83kjpuj6RBYQpuo09Tvmpd7TPe+ktZN8cKwS92KWXGuaqWowlYEwthtMcWOZUNJc8at+zuF/Xkqo69baS7P+AvWjYwJ4jyHXXsEnd74ZO/Pq+uXUuv6WNlso6cvnDsZB1V/unJab3D1/KrJDw9NCM9wHf2FK2ejTKMejnBHfGtfH7LGGCdQDqaqJgfgzWjXK1nYV4jRbPGnxUT7cqUaZfJrVZeOm9QmB21L6xXgbu/ScsYusJFMoU0x2fsamRJOd6kOYDRLUxv94ENZe8+0gM+0dyz+KgU7X8rLHHCIOZyrna4y6ykIu0YCs02TBXmk3PZssmEgaTxTo83xjCIjoE21h0Yah3MrV4+9kR8MaabGze+9NEILGAFE5nMOiiA32KnAr/sb7tED3nzlzC4dB38WMC+EjaqHfqvUKHi2gJPdWQ6AbH8hgyQ7QY6jvjj3QZWvX6pUAtduTX5Dss96Q7NI9RQRJeeKvRFbt0v2gb1Gx/PooJsztn1c1DqpAU3Hde2dB2aEHBhjgOFjMeDvxLafjQ3YZQSgOcHJZX611H45sGLHWvYTz9hiURlpNoBZvxb/Ft9lAQ1DmBfUiR+j1hAPkMBTE9L9+zLva1QvGFHurRBaZ5xLVitoBviiRkD/sIMDztKA5FA0b9/0OclzO2/XAQymJ0TcghZwEo9/AX8gMeAJMOvIsWWt5bwCoiFhVSllrdH0t5Q1JHAFlKJNkvTVdn2GHb9KdmacMT+d/Os05imJUccRX2YuZ93Sxf0Ilc4DPDeAq5SAvFEAY94cQc6BA26dzb4HWAJI4DPmQE5KCVUyvb2FcDZem7JdT2ggKUP3xX6n9XNq1DpzSf4Cy4ZqSlmM8d8AAAAASUVORK5CYII=","DiscoveredBy":"Firebreather","Properties":[{"Name":"Flying","Power":1},{"Name":"Creature","Power":1},{"Name":"Explodey","Power":11}],"LastUsed":"2015-10-31 23:59:56 UTC"}],"TeaTime":"15:00:00","NextVacation":"2666-06-06","FavoriteTime":"2001-12-19T23:59:59"} +{"Name":"Sabrina","Age":"17","Weight":128.3,"IsMagic":true,"Spells":[{"Name": "Talking cats", "Icon":"iVBORw0KGgoAAAANSUhEUgAAAEAAAABACAYAAACqaXHeAAAAAXNSR0IArs4c6QAAABxpRE9UAAAAAgAAAAAAAAAgAAAAKAAAACAAAAAgAAABxj2CfowAAAGSSURBVHgB7Jc9TsNAEIX3JDkCPUV6KlpKFHEGlD4nyA04ACUXQKTgCEipUnKGNEbP0otentayicZ24SlWs7tjO/N9u/5J2b2+NUtuZcnwYE8BuQPyGZAPwXwLLPk5kG+BJa9+fgfkh1B+CeancL4F8i2Q/wWm/S/w+XFoTseftn0dvhu0OXfhpM+AGvzcEiYVAFisPqE9zrETJhHAlXfg2lglMK9z0f3RBfB+ZyRUV3x+erzsEIjjOBqc1xtNAIrvguybV3A9lkVHxlEE6GrrPb/ZvAySwlUnfCmlPQ+R8JCExvGtcRQBLFwj4FGkznX1VYDKPG/f2/MjwCksXACgdNUxJjwK9xwl4JihOwTFR0kIF+CABEPRnvsvPFctMoYKqAFSAFaMwB4pp3Y+bodIYL9WmIAaIOHxo7W8wiHvAjTvhUeNwwSgeAeAABbqOewC5hBdwFD4+9+7puzXV9fS6/b1wwT4tsaYAhwOOQdUQch5vgZCeAhAv3ZM31yYAAUgvApQQQ6n5w6FB/RVe1jdJOAPAAD//1eMQwoAAAGQSURBVO1UMU4DQQy8X9AgWopIUINEkS4VlJQo4gvwAV7AD3gEH4iSgidESpWSXyyZExP5lr0c7K5PsXBhec/2+jzjuWtent9CLdtu1mG5+gjz+WNr7IsY7eH+tvO+xfuqk4vz7CH91edFaF5v9nb6dBKm13edvrL+0Lk5lMzJkQDeJSkkgHF6mR8CHwMHCQR/NAQQGD0BAlwK4FCefQiefq+A2Vn29tG7igLAfmwcnJu/nJy3BMQkMN9HEPr8AL3bfBv7Bp+7/SoExMDjZwKEJwmyhnnmQIQEBIlz2x0iKoAvJkAC6TsTIH6MqRrEWUMSZF2zAwqT4Eu/e6pzFAIkmNSZ4OFT+VYBIIF//UqbJwnF/4DU0GwOn8r/JQYCpPGufEfJuZiA37ycQw/5uFeqPq4pfR6FADmkBCXjfWdZj3NfXW58dAJyB9W65wRoMWulryvAyqa05nQFaDFrpa8rwMqmtOZ0BWgxa6WvK8DKprTmdAVoMWulryvAyqa05nQFaDFrpa8rwMqmtOb89wr4AtQ4aPoL6yVpAAAAAElFTkSuQmCC","DiscoveredBy":"Salem","Properties":[{"Name":"Makes you look crazy","Power":1}],"LastUsed":"2017-02-14 12:07:23 UTC"}],"TeaTime":"12:00:00","NextVacation":"2017-03-14","FavoriteTime":"2000-10-31T23:27:46"} diff --git a/packages/google-cloud-bigquery/tests/data/schema.json b/packages/google-cloud-bigquery/tests/data/schema.json new file mode 100644 index 000000000000..303076123dd9 --- /dev/null +++ b/packages/google-cloud-bigquery/tests/data/schema.json @@ -0,0 +1,83 @@ +{ + "fields" : [ + { + "type" : "STRING", + "name" : "Name", + "mode" : "NULLABLE" + }, + { + "name" : "Age", + "mode" : "NULLABLE", + "type" : "INTEGER" + }, + { + "type" : "FLOAT", + "name" : "Weight", + "mode" : "NULLABLE" + }, + { + "mode" : "NULLABLE", + "name" : "IsMagic", + "type" : "BOOLEAN" + }, + { + "name" : "Spells", + "fields" : [ + { + "mode" : "NULLABLE", + "name" : "Name", + "type" : "STRING" + }, + { + "mode" : "NULLABLE", + "name" : "LastUsed", + "type" : "TIMESTAMP" + }, + { + "type" : "STRING", + "mode" : "NULLABLE", + "name" : "DiscoveredBy" + }, + { + "name" : "Properties", + "fields" : [ + { + "name" : "Name", + "mode" : "NULLABLE", + "type" : "STRING" + }, + { + "type" : "FLOAT", + "name" : "Power", + "mode" : "NULLABLE" + } + ], + "mode" : "REPEATED", + "type" : "RECORD" + }, + { + "mode" : "NULLABLE", + "name" : "Icon", + "type" : "BYTES" + } + ], + "mode" : "REPEATED", + "type" : "RECORD" + }, + { + "type" : "TIME", + "mode" : "NULLABLE", + "name" : "TeaTime" + }, + { + "type" : "DATE", + "name" : "NextVacation", + "mode" : "NULLABLE" + }, + { + "mode" : "NULLABLE", + "name" : "FavoriteTime", + "type" : "DATETIME" + } + ] +} diff --git a/packages/google-cloud-bigquery/tests/system.py b/packages/google-cloud-bigquery/tests/system.py new file mode 100644 index 000000000000..a9e003fe968c --- /dev/null +++ b/packages/google-cloud-bigquery/tests/system.py @@ -0,0 +1,887 @@ +# Copyright 2015 Google Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import base64 +import datetime +import json +import operator +import os +import time +import unittest + +from google.cloud import bigquery +from google.cloud._helpers import UTC +from google.cloud.exceptions import Forbidden + +from test_utils.retry import RetryErrors +from test_utils.retry import RetryInstanceState +from test_utils.retry import RetryResult +from test_utils.system import unique_resource_id + + +WHERE = os.path.abspath(os.path.dirname(__file__)) + + +def _has_rows(result): + return len(result) > 0 + + +def _make_dataset_name(prefix): + return '%s%s' % (prefix, unique_resource_id()) + + +def _load_json_schema(filename='data/schema.json'): + from google.cloud.bigquery.table import _parse_schema_resource + + json_filename = os.path.join(WHERE, filename) + + with open(json_filename, 'r') as schema_file: + return _parse_schema_resource(json.load(schema_file)) + + +def _rate_limit_exceeded(forbidden): + """Predicate: pass only exceptions with 'rateLimitExceeded' as reason.""" + return any(error['reason'] == 'rateLimitExceeded' + for error in forbidden._errors) + + +# We need to wait to stay within the rate limits. +# The alternative outcome is a 403 Forbidden response from upstream, which +# they return instead of the more appropriate 429. +# See: https://cloud.google.com/bigquery/quota-policy +retry_403 = RetryErrors(Forbidden, error_predicate=_rate_limit_exceeded) + + +class Config(object): + """Run-time configuration to be modified at set-up. + + This is a mutable stand-in to allow test set-up to modify + global state. + """ + CLIENT = None + + +def setUpModule(): + Config.CLIENT = bigquery.Client() + + +class TestBigQuery(unittest.TestCase): + + def setUp(self): + self.to_delete = [] + + def tearDown(self): + from google.cloud.bigquery.dataset import Dataset + from google.cloud.storage import Bucket + from google.cloud.exceptions import BadRequest + from google.cloud.exceptions import Conflict + + def _still_in_use(bad_request): + return any(error['reason'] == 'resourceInUse' + for error in bad_request._errors) + + retry_in_use = RetryErrors(BadRequest, error_predicate=_still_in_use) + retry_409 = RetryErrors(Conflict) + for doomed in self.to_delete: + if isinstance(doomed, Bucket): + retry_409(doomed.delete)(force=True) + elif isinstance(doomed, Dataset): + retry_in_use(doomed.delete)() + else: + doomed.delete() + + def test_create_dataset(self): + DATASET_NAME = _make_dataset_name('create_dataset') + dataset = Config.CLIENT.dataset(DATASET_NAME) + self.assertFalse(dataset.exists()) + + retry_403(dataset.create)() + self.to_delete.append(dataset) + + self.assertTrue(dataset.exists()) + self.assertEqual(dataset.name, DATASET_NAME) + + def test_reload_dataset(self): + DATASET_NAME = _make_dataset_name('reload_dataset') + dataset = Config.CLIENT.dataset(DATASET_NAME) + dataset.friendly_name = 'Friendly' + dataset.description = 'Description' + + retry_403(dataset.create)() + self.to_delete.append(dataset) + + other = Config.CLIENT.dataset(DATASET_NAME) + other.reload() + self.assertEqual(other.friendly_name, 'Friendly') + self.assertEqual(other.description, 'Description') + + def test_patch_dataset(self): + dataset = Config.CLIENT.dataset(_make_dataset_name('patch_dataset')) + self.assertFalse(dataset.exists()) + + retry_403(dataset.create)() + self.to_delete.append(dataset) + + self.assertTrue(dataset.exists()) + self.assertIsNone(dataset.friendly_name) + self.assertIsNone(dataset.description) + dataset.patch(friendly_name='Friendly', description='Description') + self.assertEqual(dataset.friendly_name, 'Friendly') + self.assertEqual(dataset.description, 'Description') + + def test_update_dataset(self): + dataset = Config.CLIENT.dataset(_make_dataset_name('update_dataset')) + self.assertFalse(dataset.exists()) + + retry_403(dataset.create)() + self.to_delete.append(dataset) + + self.assertTrue(dataset.exists()) + after = [grant for grant in dataset.access_grants + if grant.entity_id != 'projectWriters'] + dataset.access_grants = after + + retry_403(dataset.update)() + + self.assertEqual(len(dataset.access_grants), len(after)) + for found, expected in zip(dataset.access_grants, after): + self.assertEqual(found.role, expected.role) + self.assertEqual(found.entity_type, expected.entity_type) + self.assertEqual(found.entity_id, expected.entity_id) + + def test_list_datasets(self): + datasets_to_create = [ + 'new' + unique_resource_id(), + 'newer' + unique_resource_id(), + 'newest' + unique_resource_id(), + ] + for dataset_name in datasets_to_create: + dataset = Config.CLIENT.dataset(dataset_name) + retry_403(dataset.create)() + self.to_delete.append(dataset) + + # Retrieve the datasets. + iterator = Config.CLIENT.list_datasets() + all_datasets = list(iterator) + self.assertIsNone(iterator.next_page_token) + created = [dataset for dataset in all_datasets + if dataset.name in datasets_to_create and + dataset.project == Config.CLIENT.project] + self.assertEqual(len(created), len(datasets_to_create)) + + def test_create_table(self): + dataset = Config.CLIENT.dataset(_make_dataset_name('create_table')) + self.assertFalse(dataset.exists()) + + retry_403(dataset.create)() + self.to_delete.append(dataset) + + TABLE_NAME = 'test_table' + full_name = bigquery.SchemaField('full_name', 'STRING', + mode='REQUIRED') + age = bigquery.SchemaField('age', 'INTEGER', mode='REQUIRED') + table = dataset.table(TABLE_NAME, schema=[full_name, age]) + self.assertFalse(table.exists()) + table.create() + self.to_delete.insert(0, table) + self.assertTrue(table.exists()) + self.assertEqual(table.name, TABLE_NAME) + + def test_list_tables(self): + DATASET_NAME = _make_dataset_name('list_tables') + dataset = Config.CLIENT.dataset(DATASET_NAME) + self.assertFalse(dataset.exists()) + + retry_403(dataset.create)() + self.to_delete.append(dataset) + + # Retrieve tables before any are created for the dataset. + iterator = dataset.list_tables() + all_tables = list(iterator) + self.assertEqual(all_tables, []) + self.assertIsNone(iterator.next_page_token) + + # Insert some tables to be listed. + tables_to_create = [ + 'new' + unique_resource_id(), + 'newer' + unique_resource_id(), + 'newest' + unique_resource_id(), + ] + full_name = bigquery.SchemaField('full_name', 'STRING', + mode='REQUIRED') + age = bigquery.SchemaField('age', 'INTEGER', mode='REQUIRED') + for table_name in tables_to_create: + table = dataset.table(table_name, schema=[full_name, age]) + table.create() + self.to_delete.insert(0, table) + + # Retrieve the tables. + iterator = dataset.list_tables() + all_tables = list(iterator) + self.assertIsNone(iterator.next_page_token) + created = [table for table in all_tables + if (table.name in tables_to_create and + table.dataset_name == DATASET_NAME)] + self.assertEqual(len(created), len(tables_to_create)) + + def test_patch_table(self): + dataset = Config.CLIENT.dataset(_make_dataset_name('patch_table')) + self.assertFalse(dataset.exists()) + + retry_403(dataset.create)() + self.to_delete.append(dataset) + + TABLE_NAME = 'test_table' + full_name = bigquery.SchemaField('full_name', 'STRING', + mode='REQUIRED') + age = bigquery.SchemaField('age', 'INTEGER', mode='REQUIRED') + table = dataset.table(TABLE_NAME, schema=[full_name, age]) + self.assertFalse(table.exists()) + table.create() + self.to_delete.insert(0, table) + self.assertTrue(table.exists()) + self.assertIsNone(table.friendly_name) + self.assertIsNone(table.description) + table.patch(friendly_name='Friendly', description='Description') + self.assertEqual(table.friendly_name, 'Friendly') + self.assertEqual(table.description, 'Description') + + def test_update_table(self): + dataset = Config.CLIENT.dataset(_make_dataset_name('update_table')) + self.assertFalse(dataset.exists()) + + retry_403(dataset.create)() + self.to_delete.append(dataset) + + TABLE_NAME = 'test_table' + full_name = bigquery.SchemaField('full_name', 'STRING', + mode='REQUIRED') + age = bigquery.SchemaField('age', 'INTEGER', mode='REQUIRED') + table = dataset.table(TABLE_NAME, schema=[full_name, age]) + self.assertFalse(table.exists()) + table.create() + self.to_delete.insert(0, table) + self.assertTrue(table.exists()) + voter = bigquery.SchemaField('voter', 'BOOLEAN', mode='NULLABLE') + schema = table.schema + schema.append(voter) + table.schema = schema + table.update() + self.assertEqual(len(table.schema), len(schema)) + for found, expected in zip(table.schema, schema): + self.assertEqual(found.name, expected.name) + self.assertEqual(found.field_type, expected.field_type) + self.assertEqual(found.mode, expected.mode) + + @staticmethod + def _fetch_single_page(table): + import six + + iterator = table.fetch_data() + page = six.next(iterator.pages) + return list(page) + + def test_insert_data_then_dump_table(self): + NOW_SECONDS = 1448911495.484366 + NOW = datetime.datetime.utcfromtimestamp( + NOW_SECONDS).replace(tzinfo=UTC) + ROWS = [ + ('Phred Phlyntstone', 32, NOW), + ('Bharney Rhubble', 33, NOW + datetime.timedelta(seconds=10)), + ('Wylma Phlyntstone', 29, NOW + datetime.timedelta(seconds=20)), + ('Bhettye Rhubble', 27, None), + ] + ROW_IDS = range(len(ROWS)) + dataset = Config.CLIENT.dataset( + _make_dataset_name('insert_data_then_dump')) + self.assertFalse(dataset.exists()) + + retry_403(dataset.create)() + self.to_delete.append(dataset) + + TABLE_NAME = 'test_table' + full_name = bigquery.SchemaField('full_name', 'STRING', + mode='REQUIRED') + age = bigquery.SchemaField('age', 'INTEGER', mode='REQUIRED') + now = bigquery.SchemaField('now', 'TIMESTAMP') + table = dataset.table(TABLE_NAME, schema=[full_name, age, now]) + self.assertFalse(table.exists()) + table.create() + self.to_delete.insert(0, table) + self.assertTrue(table.exists()) + + errors = table.insert_data(ROWS, ROW_IDS) + self.assertEqual(len(errors), 0) + + rows = () + + # Allow for "warm up" before rows visible. See: + # https://cloud.google.com/bigquery/streaming-data-into-bigquery#dataavailability + # 8 tries -> 1 + 2 + 4 + 8 + 16 + 32 + 64 = 127 seconds + retry = RetryResult(_has_rows, max_tries=8) + rows = retry(self._fetch_single_page)(table) + + by_age = operator.itemgetter(1) + self.assertEqual(sorted(rows, key=by_age), + sorted(ROWS, key=by_age)) + + def test_load_table_from_local_file_then_dump_table(self): + import csv + import tempfile + ROWS = [ + ('Phred Phlyntstone', 32), + ('Bharney Rhubble', 33), + ('Wylma Phlyntstone', 29), + ('Bhettye Rhubble', 27), + ] + TABLE_NAME = 'test_table' + + dataset = Config.CLIENT.dataset( + _make_dataset_name('load_local_then_dump')) + + retry_403(dataset.create)() + self.to_delete.append(dataset) + + full_name = bigquery.SchemaField('full_name', 'STRING', + mode='REQUIRED') + age = bigquery.SchemaField('age', 'INTEGER', mode='REQUIRED') + table = dataset.table(TABLE_NAME, schema=[full_name, age]) + table.create() + self.to_delete.insert(0, table) + + with tempfile.NamedTemporaryFile(mode='w+') as csv_file: + writer = csv.writer(csv_file) + writer.writerow(('Full Name', 'Age')) + writer.writerows(ROWS) + csv_file.flush() + + with open(csv_file.name, 'rb') as csv_read: + job = table.upload_from_file( + csv_read, + source_format='CSV', + skip_leading_rows=1, + create_disposition='CREATE_NEVER', + write_disposition='WRITE_EMPTY', + ) + + def _job_done(instance): + return instance.state.lower() == 'done' + + # Retry until done. + retry = RetryInstanceState(_job_done, max_tries=8) + retry(job.reload)() + + self.assertEqual(job.output_rows, len(ROWS)) + + rows = self._fetch_single_page(table) + by_age = operator.itemgetter(1) + self.assertEqual(sorted(rows, key=by_age), + sorted(ROWS, key=by_age)) + + def test_load_table_from_storage_then_dump_table(self): + import csv + import tempfile + from google.cloud.storage import Client as StorageClient + local_id = unique_resource_id() + BUCKET_NAME = 'bq_load_test' + local_id + BLOB_NAME = 'person_ages.csv' + GS_URL = 'gs://%s/%s' % (BUCKET_NAME, BLOB_NAME) + ROWS = [ + ('Phred Phlyntstone', 32), + ('Bharney Rhubble', 33), + ('Wylma Phlyntstone', 29), + ('Bhettye Rhubble', 27), + ] + TABLE_NAME = 'test_table' + + s_client = StorageClient() + + # In the **very** rare case the bucket name is reserved, this + # fails with a ConnectionError. + bucket = s_client.create_bucket(BUCKET_NAME) + self.to_delete.append(bucket) + + blob = bucket.blob(BLOB_NAME) + + with tempfile.TemporaryFile(mode='w+') as csv_file: + writer = csv.writer(csv_file) + writer.writerow(('Full Name', 'Age')) + writer.writerows(ROWS) + blob.upload_from_file( + csv_file, rewind=True, content_type='text/csv') + + self.to_delete.insert(0, blob) + + dataset = Config.CLIENT.dataset( + _make_dataset_name('load_gcs_then_dump')) + + retry_403(dataset.create)() + self.to_delete.append(dataset) + + full_name = bigquery.SchemaField('full_name', 'STRING', + mode='REQUIRED') + age = bigquery.SchemaField('age', 'INTEGER', mode='REQUIRED') + table = dataset.table(TABLE_NAME, schema=[full_name, age]) + table.create() + self.to_delete.insert(0, table) + + job = Config.CLIENT.load_table_from_storage( + 'bq_load_storage_test_' + local_id, table, GS_URL) + job.create_disposition = 'CREATE_NEVER' + job.skip_leading_rows = 1 + job.source_format = 'CSV' + job.write_disposition = 'WRITE_EMPTY' + + job.begin() + + def _job_done(instance): + return instance.state in ('DONE', 'done') + + # Allow for 90 seconds of "warm up" before rows visible. See: + # https://cloud.google.com/bigquery/streaming-data-into-bigquery#dataavailability + # 8 tries -> 1 + 2 + 4 + 8 + 16 + 32 + 64 = 127 seconds + retry = RetryInstanceState(_job_done, max_tries=8) + retry(job.reload)() + + rows = self._fetch_single_page(table) + by_age = operator.itemgetter(1) + self.assertEqual(sorted(rows, key=by_age), + sorted(ROWS, key=by_age)) + + def test_job_cancel(self): + DATASET_NAME = _make_dataset_name('job_cancel') + JOB_NAME = 'fetch_' + DATASET_NAME + TABLE_NAME = 'test_table' + QUERY = 'SELECT * FROM %s.%s' % (DATASET_NAME, TABLE_NAME) + + dataset = Config.CLIENT.dataset(DATASET_NAME) + + retry_403(dataset.create)() + self.to_delete.append(dataset) + + full_name = bigquery.SchemaField('full_name', 'STRING', + mode='REQUIRED') + age = bigquery.SchemaField('age', 'INTEGER', mode='REQUIRED') + table = dataset.table(TABLE_NAME, schema=[full_name, age]) + table.create() + self.to_delete.insert(0, table) + + job = Config.CLIENT.run_async_query(JOB_NAME, QUERY) + job.begin() + job.cancel() + + def _job_done(instance): + return instance.state in ('DONE', 'done') + + retry = RetryInstanceState(_job_done, max_tries=8) + retry(job.reload)() + + # The `cancel` API doesn't leave any reliable traces on + # the status of the job resource, so we can't really assert for + # them here. The best we can do is not that the API call didn't + # raise an error, and that the job completed (in the `retry()` + # above). + + def test_sync_query_w_legacy_sql_types(self): + naive = datetime.datetime(2016, 12, 5, 12, 41, 9) + stamp = '%s %s' % (naive.date().isoformat(), naive.time().isoformat()) + zoned = naive.replace(tzinfo=UTC) + EXAMPLES = [ + { + 'sql': 'SELECT 1', + 'expected': 1, + }, + { + 'sql': 'SELECT 1.3', + 'expected': 1.3, + }, + { + 'sql': 'SELECT TRUE', + 'expected': True, + }, + { + 'sql': 'SELECT "ABC"', + 'expected': 'ABC', + }, + { + 'sql': 'SELECT CAST("foo" AS BYTES)', + 'expected': b'foo', + }, + { + 'sql': 'SELECT CAST("%s" AS TIMESTAMP)' % (stamp,), + 'expected': zoned, + }, + ] + for example in EXAMPLES: + query = Config.CLIENT.run_sync_query(example['sql']) + query.use_legacy_sql = True + query.run() + self.assertEqual(len(query.rows), 1) + self.assertEqual(len(query.rows[0]), 1) + self.assertEqual(query.rows[0][0], example['expected']) + + def test_sync_query_w_standard_sql_types(self): + naive = datetime.datetime(2016, 12, 5, 12, 41, 9) + stamp = '%s %s' % (naive.date().isoformat(), naive.time().isoformat()) + zoned = naive.replace(tzinfo=UTC) + EXAMPLES = [ + { + 'sql': 'SELECT 1', + 'expected': 1, + }, + { + 'sql': 'SELECT 1.3', + 'expected': 1.3, + }, + { + 'sql': 'SELECT TRUE', + 'expected': True, + }, + { + 'sql': 'SELECT "ABC"', + 'expected': 'ABC', + }, + { + 'sql': 'SELECT CAST("foo" AS BYTES)', + 'expected': b'foo', + }, + { + 'sql': 'SELECT TIMESTAMP "%s"' % (stamp,), + 'expected': zoned, + }, + { + 'sql': 'SELECT DATETIME(TIMESTAMP "%s")' % (stamp,), + 'expected': naive, + }, + { + 'sql': 'SELECT DATE(TIMESTAMP "%s")' % (stamp,), + 'expected': naive.date(), + }, + { + 'sql': 'SELECT TIME(TIMESTAMP "%s")' % (stamp,), + 'expected': naive.time(), + }, + { + 'sql': 'SELECT (1, 2)', + 'expected': {'_field_1': 1, '_field_2': 2}, + }, + { + 'sql': 'SELECT ((1, 2), (3, 4), 5)', + 'expected': { + '_field_1': {'_field_1': 1, '_field_2': 2}, + '_field_2': {'_field_1': 3, '_field_2': 4}, + '_field_3': 5, + }, + }, + { + 'sql': 'SELECT [1, 2, 3]', + 'expected': [1, 2, 3], + }, + { + 'sql': 'SELECT ([1, 2], 3, [4, 5])', + 'expected': + {'_field_1': [1, 2], '_field_2': 3, '_field_3': [4, 5]}, + }, + { + 'sql': 'SELECT [(1, 2, 3), (4, 5, 6)]', + 'expected': [ + {'_field_1': 1, '_field_2': 2, '_field_3': 3}, + {'_field_1': 4, '_field_2': 5, '_field_3': 6}, + ], + }, + { + 'sql': 'SELECT [([1, 2, 3], 4), ([5, 6], 7)]', + 'expected': [ + {u'_field_1': [1, 2, 3], u'_field_2': 4}, + {u'_field_1': [5, 6], u'_field_2': 7}, + ], + }, + { + 'sql': 'SELECT ARRAY(SELECT STRUCT([1, 2]))', + 'expected': [{u'_field_1': [1, 2]}], + }, + ] + for example in EXAMPLES: + query = Config.CLIENT.run_sync_query(example['sql']) + query.use_legacy_sql = False + query.run() + self.assertEqual(len(query.rows), 1) + self.assertEqual(len(query.rows[0]), 1) + self.assertEqual(query.rows[0][0], example['expected']) + + def test_sync_query_w_query_params(self): + from google.cloud.bigquery._helpers import ArrayQueryParameter + from google.cloud.bigquery._helpers import ScalarQueryParameter + from google.cloud.bigquery._helpers import StructQueryParameter + question = 'What is the answer to life, the universe, and everything?' + question_param = ScalarQueryParameter( + name='question', type_='STRING', value=question) + answer = 42 + answer_param = ScalarQueryParameter( + name='answer', type_='INT64', value=answer) + pi = 3.1415926 + pi_param = ScalarQueryParameter( + name='pi', type_='FLOAT64', value=pi) + truthy = True + truthy_param = ScalarQueryParameter( + name='truthy', type_='BOOL', value=truthy) + beef = b'DEADBEEF' + beef_param = ScalarQueryParameter( + name='beef', type_='BYTES', value=beef) + naive = datetime.datetime(2016, 12, 5, 12, 41, 9) + naive_param = ScalarQueryParameter( + name='naive', type_='DATETIME', value=naive) + naive_date_param = ScalarQueryParameter( + name='naive_date', type_='DATE', value=naive.date()) + naive_time_param = ScalarQueryParameter( + name='naive_time', type_='TIME', value=naive.time()) + zoned = naive.replace(tzinfo=UTC) + zoned_param = ScalarQueryParameter( + name='zoned', type_='TIMESTAMP', value=zoned) + array_param = ArrayQueryParameter( + name='array_param', array_type='INT64', values=[1, 2]) + struct_param = StructQueryParameter( + 'hitchhiker', question_param, answer_param) + phred_name = 'Phred Phlyntstone' + phred_name_param = ScalarQueryParameter( + name='name', type_='STRING', value=phred_name) + phred_age = 32 + phred_age_param = ScalarQueryParameter( + name='age', type_='INT64', value=phred_age) + phred_param = StructQueryParameter( + None, phred_name_param, phred_age_param) + bharney_name = 'Bharney Rhubbyl' + bharney_name_param = ScalarQueryParameter( + name='name', type_='STRING', value=bharney_name) + bharney_age = 31 + bharney_age_param = ScalarQueryParameter( + name='age', type_='INT64', value=bharney_age) + bharney_param = StructQueryParameter( + None, bharney_name_param, bharney_age_param) + characters_param = ArrayQueryParameter( + name=None, array_type='RECORD', + values=[phred_param, bharney_param]) + hero_param = StructQueryParameter( + 'hero', phred_name_param, phred_age_param) + sidekick_param = StructQueryParameter( + 'sidekick', bharney_name_param, bharney_age_param) + roles_param = StructQueryParameter( + 'roles', hero_param, sidekick_param) + friends_param = ArrayQueryParameter( + name='friends', array_type='STRING', + values=[phred_name, bharney_name]) + with_friends_param = StructQueryParameter(None, friends_param) + EXAMPLES = [ + { + 'sql': 'SELECT @question', + 'expected': question, + 'query_parameters': [question_param], + }, + { + 'sql': 'SELECT @answer', + 'expected': answer, + 'query_parameters': [answer_param], + }, + { + 'sql': 'SELECT @pi', + 'expected': pi, + 'query_parameters': [pi_param], + }, + { + 'sql': 'SELECT @truthy', + 'expected': truthy, + 'query_parameters': [truthy_param], + }, + { + 'sql': 'SELECT @beef', + 'expected': beef, + 'query_parameters': [beef_param], + }, + { + 'sql': 'SELECT @naive', + 'expected': naive, + 'query_parameters': [naive_param], + }, + { + 'sql': 'SELECT @naive_date', + 'expected': naive.date(), + 'query_parameters': [naive_date_param], + }, + { + 'sql': 'SELECT @naive_time', + 'expected': naive.time(), + 'query_parameters': [naive_time_param], + }, + { + 'sql': 'SELECT @zoned', + 'expected': zoned, + 'query_parameters': [zoned_param], + }, + { + 'sql': 'SELECT @array_param', + 'expected': [1, 2], + 'query_parameters': [array_param], + }, + { + 'sql': 'SELECT (@hitchhiker.question, @hitchhiker.answer)', + 'expected': ({'_field_1': question, '_field_2': answer}), + 'query_parameters': [struct_param], + }, + { + 'sql': 'SELECT ?', + 'expected': [ + {'name': phred_name, 'age': phred_age}, + {'name': bharney_name, 'age': bharney_age}, + ], + 'query_parameters': [characters_param], + }, + { + 'sql': 'SELECT @roles', + 'expected': { + 'hero': {'name': phred_name, 'age': phred_age}, + 'sidekick': {'name': bharney_name, 'age': bharney_age}, + }, + 'query_parameters': [roles_param], + }, + { + 'sql': 'SELECT ?', + 'expected': { + 'friends': [phred_name, bharney_name], + }, + 'query_parameters': [with_friends_param], + }, + ] + for example in EXAMPLES: + query = Config.CLIENT.run_sync_query( + example['sql'], + query_parameters=example['query_parameters']) + query.use_legacy_sql = False + query.run() + self.assertEqual(len(query.rows), 1) + self.assertEqual(len(query.rows[0]), 1) + self.assertEqual(query.rows[0][0], example['expected']) + + def test_dump_table_w_public_data(self): + PUBLIC = 'bigquery-public-data' + DATASET_NAME = 'samples' + TABLE_NAME = 'natality' + + dataset = Config.CLIENT.dataset(DATASET_NAME, project=PUBLIC) + table = dataset.table(TABLE_NAME) + self._fetch_single_page(table) + + def test_insert_nested_nested(self): + # See #2951 + SF = bigquery.SchemaField + schema = [ + SF('string_col', 'STRING', mode='NULLABLE'), + SF('record_col', 'RECORD', mode='NULLABLE', fields=[ + SF('nested_string', 'STRING', mode='NULLABLE'), + SF('nested_repeated', 'INTEGER', mode='REPEATED'), + SF('nested_record', 'RECORD', mode='NULLABLE', fields=[ + SF('nested_nested_string', 'STRING', mode='NULLABLE'), + ]), + ]), + ] + record = { + 'nested_string': 'another string value', + 'nested_repeated': [0, 1, 2], + 'nested_record': {'nested_nested_string': 'some deep insight'}, + } + to_insert = [ + ('Some value', record) + ] + table_name = 'test_table' + dataset = Config.CLIENT.dataset( + _make_dataset_name('issue_2951')) + + retry_403(dataset.create)() + self.to_delete.append(dataset) + + table = dataset.table(table_name, schema=schema) + table.create() + self.to_delete.insert(0, table) + + table.insert_data(to_insert) + + retry = RetryResult(_has_rows, max_tries=8) + rows = retry(self._fetch_single_page)(table) + + self.assertEqual(rows, to_insert) + + def test_create_table_insert_fetch_nested_schema(self): + + table_name = 'test_table' + dataset = Config.CLIENT.dataset( + _make_dataset_name('create_table_nested_schema')) + self.assertFalse(dataset.exists()) + + retry_403(dataset.create)() + self.to_delete.append(dataset) + + schema = _load_json_schema() + table = dataset.table(table_name, schema=schema) + table.create() + self.to_delete.insert(0, table) + self.assertTrue(table.exists()) + self.assertEqual(table.name, table_name) + + to_insert = [] + # Data is in "JSON Lines" format, see http://jsonlines.org/ + json_filename = os.path.join(WHERE, 'data', 'characters.jsonl') + with open(json_filename) as rows_file: + for line in rows_file: + mapping = json.loads(line) + to_insert.append( + tuple(mapping[field.name] for field in schema)) + + errors = table.insert_data(to_insert) + self.assertEqual(len(errors), 0) + + retry = RetryResult(_has_rows, max_tries=8) + fetched = retry(self._fetch_single_page)(table) + self.assertEqual(len(fetched), len(to_insert)) + + for found, expected in zip(sorted(fetched), sorted(to_insert)): + self.assertEqual(found[0], expected[0]) # Name + self.assertEqual(found[1], int(expected[1])) # Age + self.assertEqual(found[2], expected[2]) # Weight + self.assertEqual(found[3], expected[3]) # IsMagic + + self.assertEqual(len(found[4]), len(expected[4])) # Spells + for f_spell, e_spell in zip(found[4], expected[4]): + self.assertEqual(f_spell['Name'], e_spell['Name']) + parts = time.strptime( + e_spell['LastUsed'], '%Y-%m-%d %H:%M:%S UTC') + e_used = datetime.datetime(*parts[0:6], tzinfo=UTC) + self.assertEqual(f_spell['LastUsed'], e_used) + self.assertEqual(f_spell['DiscoveredBy'], + e_spell['DiscoveredBy']) + self.assertEqual(f_spell['Properties'], e_spell['Properties']) + + e_icon = base64.standard_b64decode( + e_spell['Icon'].encode('ascii')) + self.assertEqual(f_spell['Icon'], e_icon) + + parts = time.strptime(expected[5], '%H:%M:%S') + e_teatime = datetime.time(*parts[3:6]) + self.assertEqual(found[5], e_teatime) # TeaTime + + parts = time.strptime(expected[6], '%Y-%m-%d') + e_nextvac = datetime.date(*parts[0:3]) + self.assertEqual(found[6], e_nextvac) # NextVacation + + parts = time.strptime(expected[7], '%Y-%m-%dT%H:%M:%S') + e_favtime = datetime.datetime(*parts[0:6]) + self.assertEqual(found[7], e_favtime) # FavoriteTime diff --git a/packages/google-cloud-bigquery/unit_tests/__init__.py b/packages/google-cloud-bigquery/tests/unit/__init__.py similarity index 100% rename from packages/google-cloud-bigquery/unit_tests/__init__.py rename to packages/google-cloud-bigquery/tests/unit/__init__.py diff --git a/packages/google-cloud-bigquery/unit_tests/test__helpers.py b/packages/google-cloud-bigquery/tests/unit/test__helpers.py similarity index 100% rename from packages/google-cloud-bigquery/unit_tests/test__helpers.py rename to packages/google-cloud-bigquery/tests/unit/test__helpers.py diff --git a/packages/google-cloud-bigquery/unit_tests/test__http.py b/packages/google-cloud-bigquery/tests/unit/test__http.py similarity index 100% rename from packages/google-cloud-bigquery/unit_tests/test__http.py rename to packages/google-cloud-bigquery/tests/unit/test__http.py diff --git a/packages/google-cloud-bigquery/unit_tests/test_client.py b/packages/google-cloud-bigquery/tests/unit/test_client.py similarity index 100% rename from packages/google-cloud-bigquery/unit_tests/test_client.py rename to packages/google-cloud-bigquery/tests/unit/test_client.py diff --git a/packages/google-cloud-bigquery/unit_tests/test_dataset.py b/packages/google-cloud-bigquery/tests/unit/test_dataset.py similarity index 100% rename from packages/google-cloud-bigquery/unit_tests/test_dataset.py rename to packages/google-cloud-bigquery/tests/unit/test_dataset.py diff --git a/packages/google-cloud-bigquery/unit_tests/test_job.py b/packages/google-cloud-bigquery/tests/unit/test_job.py similarity index 100% rename from packages/google-cloud-bigquery/unit_tests/test_job.py rename to packages/google-cloud-bigquery/tests/unit/test_job.py diff --git a/packages/google-cloud-bigquery/unit_tests/test_query.py b/packages/google-cloud-bigquery/tests/unit/test_query.py similarity index 100% rename from packages/google-cloud-bigquery/unit_tests/test_query.py rename to packages/google-cloud-bigquery/tests/unit/test_query.py diff --git a/packages/google-cloud-bigquery/unit_tests/test_schema.py b/packages/google-cloud-bigquery/tests/unit/test_schema.py similarity index 100% rename from packages/google-cloud-bigquery/unit_tests/test_schema.py rename to packages/google-cloud-bigquery/tests/unit/test_schema.py diff --git a/packages/google-cloud-bigquery/unit_tests/test_table.py b/packages/google-cloud-bigquery/tests/unit/test_table.py similarity index 100% rename from packages/google-cloud-bigquery/unit_tests/test_table.py rename to packages/google-cloud-bigquery/tests/unit/test_table.py diff --git a/packages/google-cloud-bigquery/tox.ini b/packages/google-cloud-bigquery/tox.ini deleted file mode 100644 index 2d142ba71df6..000000000000 --- a/packages/google-cloud-bigquery/tox.ini +++ /dev/null @@ -1,35 +0,0 @@ -[tox] -envlist = - py27,py34,py35,cover - -[testing] -localdeps = - pip install --quiet --upgrade {toxinidir}/../core -deps = - {toxinidir}/../core - mock - pytest -covercmd = - py.test --quiet \ - --cov=google.cloud.bigquery \ - --cov=unit_tests \ - --cov-config {toxinidir}/.coveragerc \ - unit_tests - -[testenv] -commands = - {[testing]localdeps} - py.test --quiet {posargs} unit_tests -deps = - {[testing]deps} - -[testenv:cover] -basepython = - python2.7 -commands = - {[testing]localdeps} - {[testing]covercmd} -deps = - {[testenv]deps} - coverage - pytest-cov From 27695f57cad75eeb2dcac0dfb23dbad38256b1c6 Mon Sep 17 00:00:00 2001 From: Danny Hermes Date: Mon, 27 Mar 2017 10:20:16 -0700 Subject: [PATCH 0097/2016] Fixing up some format strings in nox configs. Using `STRING_TEMPLATE % VARIABLE` can introduce hard-to-find bugs if `VARIABLE` is expected to be a string but ends up being a tuple. Instead of using percent formatting, just using `.format`. Also making tweaks to `get_target_packages` to make some path manipulation / checks OS-independent. --- packages/google-cloud-bigquery/nox.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/packages/google-cloud-bigquery/nox.py b/packages/google-cloud-bigquery/nox.py index 74a7856d415e..9a20d7e8877f 100644 --- a/packages/google-cloud-bigquery/nox.py +++ b/packages/google-cloud-bigquery/nox.py @@ -25,7 +25,7 @@ def unit_tests(session, python_version): """Run the unit test suite.""" # Run unit tests against all supported versions of Python. - session.interpreter = 'python%s' % python_version + session.interpreter = 'python{}'.format(python_version) # Install all test dependencies, then install this package in-place. session.install('mock', 'pytest', 'pytest-cov', '../core/') @@ -49,7 +49,7 @@ def system_tests(session, python_version): return # Run the system tests against latest Python 2 and Python 3 only. - session.interpreter = 'python%s' % python_version + session.interpreter = 'python{}'.format(python_version) # Install all test dependencies, then install this package into the # virutalenv's dist-packages. From 66565dd45d0cb771212f8e0715e3a87d4c2f66c2 Mon Sep 17 00:00:00 2001 From: Danny Hermes Date: Thu, 30 Mar 2017 08:43:22 -0700 Subject: [PATCH 0098/2016] Renaming http argument(s) as _http. (#3235) --- .../google/cloud/bigquery/client.py | 20 +++++++++-------- .../tests/unit/test_client.py | 22 +++++++++---------- 2 files changed, 22 insertions(+), 20 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py index 6f73dab58fce..bf0b0a31dcc0 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py @@ -60,25 +60,27 @@ class Client(ClientWithProject): :type credentials: :class:`~google.auth.credentials.Credentials` :param credentials: (Optional) The OAuth2 Credentials to use for this - client. If not passed (and if no ``http`` object is + client. If not passed (and if no ``_http`` object is passed), falls back to the default inferred from the environment. - :type http: :class:`~httplib2.Http` - :param http: (Optional) HTTP object to make requests. Can be any object - that defines ``request()`` with the same interface as - :meth:`~httplib2.Http.request`. If not passed, an - ``http`` object is created that is bound to the - ``credentials`` for the current object. + :type _http: :class:`~httplib2.Http` + :param _http: (Optional) HTTP object to make requests. Can be any object + that defines ``request()`` with the same interface as + :meth:`~httplib2.Http.request`. If not passed, an + ``_http`` object is created that is bound to the + ``credentials`` for the current object. + This parameter should be considered private, and could + change in the future. """ SCOPE = ('https://www.googleapis.com/auth/bigquery', 'https://www.googleapis.com/auth/cloud-platform') """The scopes required for authenticating as a BigQuery consumer.""" - def __init__(self, project=None, credentials=None, http=None): + def __init__(self, project=None, credentials=None, _http=None): super(Client, self).__init__( - project=project, credentials=credentials, http=http) + project=project, credentials=credentials, _http=_http) self._connection = Connection(self) def list_projects(self, max_results=None, page_token=None): diff --git a/packages/google-cloud-bigquery/tests/unit/test_client.py b/packages/google-cloud-bigquery/tests/unit/test_client.py index 3056c1e2f39d..e71f3b99fbe0 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_client.py +++ b/packages/google-cloud-bigquery/tests/unit/test_client.py @@ -40,7 +40,7 @@ def test_ctor(self): PROJECT = 'PROJECT' creds = _make_credentials() http = object() - client = self._make_one(project=PROJECT, credentials=creds, http=http) + client = self._make_one(project=PROJECT, credentials=creds, _http=http) self.assertIsInstance(client._connection, Connection) self.assertIs(client._connection.credentials, creds) self.assertIs(client._connection.http, http) @@ -195,7 +195,7 @@ def test_dataset(self): DATASET = 'dataset_name' creds = _make_credentials() http = object() - client = self._make_one(project=PROJECT, credentials=creds, http=http) + client = self._make_one(project=PROJECT, credentials=creds, _http=http) dataset = client.dataset(DATASET) self.assertIsInstance(dataset, Dataset) self.assertEqual(dataset.name, DATASET) @@ -438,7 +438,7 @@ def test_load_table_from_storage(self): SOURCE_URI = 'http://example.com/source.csv' creds = _make_credentials() http = object() - client = self._make_one(project=PROJECT, credentials=creds, http=http) + client = self._make_one(project=PROJECT, credentials=creds, _http=http) dataset = client.dataset(DATASET) destination = dataset.table(DESTINATION) job = client.load_table_from_storage(JOB, destination, SOURCE_URI) @@ -458,7 +458,7 @@ def test_copy_table(self): DESTINATION = 'destination_table' creds = _make_credentials() http = object() - client = self._make_one(project=PROJECT, credentials=creds, http=http) + client = self._make_one(project=PROJECT, credentials=creds, _http=http) dataset = client.dataset(DATASET) source = dataset.table(SOURCE) destination = dataset.table(DESTINATION) @@ -479,7 +479,7 @@ def test_extract_table_to_storage(self): DESTINATION = 'gs://bucket_name/object_name' creds = _make_credentials() http = object() - client = self._make_one(project=PROJECT, credentials=creds, http=http) + client = self._make_one(project=PROJECT, credentials=creds, _http=http) dataset = client.dataset(DATASET) source = dataset.table(SOURCE) job = client.extract_table_to_storage(JOB, source, DESTINATION) @@ -497,7 +497,7 @@ def test_run_async_query_defaults(self): QUERY = 'select count(*) from persons' creds = _make_credentials() http = object() - client = self._make_one(project=PROJECT, credentials=creds, http=http) + client = self._make_one(project=PROJECT, credentials=creds, _http=http) job = client.run_async_query(JOB, QUERY) self.assertIsInstance(job, QueryJob) self.assertIs(job._client, client) @@ -516,7 +516,7 @@ def test_run_async_w_udf_resources(self): QUERY = 'select count(*) from persons' creds = _make_credentials() http = object() - client = self._make_one(project=PROJECT, credentials=creds, http=http) + client = self._make_one(project=PROJECT, credentials=creds, _http=http) udf_resources = [UDFResource("resourceUri", RESOURCE_URI)] job = client.run_async_query(JOB, QUERY, udf_resources=udf_resources) self.assertIsInstance(job, QueryJob) @@ -535,7 +535,7 @@ def test_run_async_w_query_parameters(self): QUERY = 'select count(*) from persons' creds = _make_credentials() http = object() - client = self._make_one(project=PROJECT, credentials=creds, http=http) + client = self._make_one(project=PROJECT, credentials=creds, _http=http) query_parameters = [ScalarQueryParameter('foo', 'INT64', 123)] job = client.run_async_query(JOB, QUERY, query_parameters=query_parameters) @@ -553,7 +553,7 @@ def test_run_sync_query_defaults(self): QUERY = 'select count(*) from persons' creds = _make_credentials() http = object() - client = self._make_one(project=PROJECT, credentials=creds, http=http) + client = self._make_one(project=PROJECT, credentials=creds, _http=http) query = client.run_sync_query(QUERY) self.assertIsInstance(query, QueryResults) self.assertIs(query._client, client) @@ -571,7 +571,7 @@ def test_run_sync_query_w_udf_resources(self): QUERY = 'select count(*) from persons' creds = _make_credentials() http = object() - client = self._make_one(project=PROJECT, credentials=creds, http=http) + client = self._make_one(project=PROJECT, credentials=creds, _http=http) udf_resources = [UDFResource("resourceUri", RESOURCE_URI)] query = client.run_sync_query(QUERY, udf_resources=udf_resources) self.assertIsInstance(query, QueryResults) @@ -589,7 +589,7 @@ def test_run_sync_query_w_query_parameters(self): QUERY = 'select count(*) from persons' creds = _make_credentials() http = object() - client = self._make_one(project=PROJECT, credentials=creds, http=http) + client = self._make_one(project=PROJECT, credentials=creds, _http=http) query_parameters = [ScalarQueryParameter('foo', 'INT64', 123)] query = client.run_sync_query(QUERY, query_parameters=query_parameters) self.assertIsInstance(query, QueryResults) From 18e795ec855862c19192a61fe000e186c8085b31 Mon Sep 17 00:00:00 2001 From: Luke Sneeringer Date: Thu, 30 Mar 2017 14:45:10 -0700 Subject: [PATCH 0099/2016] GA and Beta Promotions (#3245) * Make clients explicitly unpickleable. Closes #3211. * Make clients explicitly unpickleable. Closes #3211. * Add GA designator, add 1.0 version numbers. * Version changes. Eep. * Oops, Speech is still alpha. * 0.24.0, not 0.24.1 * Remove double __getstate__ goof. * Version changes. Eep. * Oops, Speech is still alpha. * Remove double __getstate__ goof. * Adding 3.6 classifier where missing and fixing bad versions. Done via "git grep '0\.24'" and "git grep '0\.23'". * Fix Noxfiles forlocal packages. * Fixing copy-pasta issue in error reporting nox config. Also fixing bad indent in same file. * Depend on stable logging in error reporting package. * Fixing lint errors in error_reporting. These were masked because error_reporting's lint nox session was linting the datastore codebase. This also means that the error reporting package has gained __all__. * Fixing a syntax error in nox config for logging. Also fixing an indent error while I was in there. * Revert "Add docs for 'result_index' usage and a system test." This reverts commit b5742aa160f604ec7cd81873ad24ac9aa75e548d. * Fixing docs nox session for umbrella package. Two issues: - error_reporting came BEFORE logging (which means it would try to pull in a logging dep from PyPI that doesn't exist) - dns was NOT in the list of local packages * Updating upper bound on logging in error_reporting. * Un-revert typo fix. --- packages/google-cloud-bigquery/nox.py | 12 +++++++----- packages/google-cloud-bigquery/setup.py | 5 +++-- 2 files changed, 10 insertions(+), 7 deletions(-) diff --git a/packages/google-cloud-bigquery/nox.py b/packages/google-cloud-bigquery/nox.py index 9a20d7e8877f..58b16e23dd86 100644 --- a/packages/google-cloud-bigquery/nox.py +++ b/packages/google-cloud-bigquery/nox.py @@ -19,6 +19,9 @@ import nox +LOCAL_DEPS = ('../core/',) + + @nox.session @nox.parametrize('python_version', ['2.7', '3.4', '3.5', '3.6']) def unit_tests(session, python_version): @@ -28,7 +31,7 @@ def unit_tests(session, python_version): session.interpreter = 'python{}'.format(python_version) # Install all test dependencies, then install this package in-place. - session.install('mock', 'pytest', 'pytest-cov', '../core/') + session.install('mock', 'pytest', 'pytest-cov', *LOCAL_DEPS) session.install('-e', '.') # Run py.test against the unit tests. @@ -53,9 +56,8 @@ def system_tests(session, python_version): # Install all test dependencies, then install this package into the # virutalenv's dist-packages. - session.install('mock', 'pytest', - '../core/', '../test_utils/', - '../storage/') + session.install('mock', 'pytest', *LOCAL_DEPS) + session.install('../storage/', '../test_utils/') session.install('.') # Run py.test against the system tests. @@ -70,7 +72,7 @@ def lint(session): serious code quality issues. """ session.interpreter = 'python3.6' - session.install('flake8') + session.install('flake8', *LOCAL_DEPS) session.install('.') session.run('flake8', 'google/cloud/bigquery') diff --git a/packages/google-cloud-bigquery/setup.py b/packages/google-cloud-bigquery/setup.py index dd5ab25af9a1..0040dd2c16a2 100644 --- a/packages/google-cloud-bigquery/setup.py +++ b/packages/google-cloud-bigquery/setup.py @@ -44,18 +44,19 @@ 'Programming Language :: Python :: 3', 'Programming Language :: Python :: 3.4', 'Programming Language :: Python :: 3.5', + 'Programming Language :: Python :: 3.6', 'Topic :: Internet', ], } REQUIREMENTS = [ - 'google-cloud-core >= 0.23.1, < 0.24dev', + 'google-cloud-core >= 0.24.0, < 0.25dev', ] setup( name='google-cloud-bigquery', - version='0.23.0', + version='0.24.0', description='Python Client for Google BigQuery', long_description=README, namespace_packages=[ From 085ba84e3e1a759cbbb7ca22b58bc1af998118be Mon Sep 17 00:00:00 2001 From: Ken Kinder Date: Mon, 3 Apr 2017 12:15:33 -0700 Subject: [PATCH 0100/2016] Fix repeated spelling error (#3260) * Fix repeated spelling error s/paramter/parameter * Fix mistaken typo --- .../google/cloud/bigquery/_helpers.py | 24 +++++++++---------- 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py b/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py index 4b1f9418c9d8..d6699dbfc537 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py @@ -342,7 +342,7 @@ class AbstractQueryParameter(object): """ @classmethod def from_api_repr(cls, resource): - """Factory: construct paramter from JSON resource. + """Factory: construct parameter from JSON resource. :type resource: dict :param resource: JSON mapping of parameter @@ -364,7 +364,7 @@ class ScalarQueryParameter(AbstractQueryParameter): :type name: str or None :param name: Parameter name, used via ``@foo`` syntax. If None, the - paramter can only be addressed via position (``?``). + parameter can only be addressed via position (``?``). :type type_: str :param type_: name of parameter type. One of 'STRING', 'INT64', @@ -381,11 +381,11 @@ def __init__(self, name, type_, value): @classmethod def positional(cls, type_, value): - """Factory for positional paramters. + """Factory for positional paramater. :type type_: str :param type_: - name of paramter type. One of 'STRING', 'INT64', + name of parameter type. One of 'STRING', 'INT64', 'FLOAT64', 'BOOL', 'TIMESTAMP', 'DATETIME', or 'DATE'. :type value: str, int, float, bool, :class:`datetime.datetime`, or @@ -399,7 +399,7 @@ def positional(cls, type_, value): @classmethod def from_api_repr(cls, resource): - """Factory: construct paramter from JSON resource. + """Factory: construct parameter from JSON resource. :type resource: dict :param resource: JSON mapping of parameter @@ -441,7 +441,7 @@ class ArrayQueryParameter(AbstractQueryParameter): :type name: str or None :param name: Parameter name, used via ``@foo`` syntax. If None, the - paramter can only be addressed via position (``?``). + parameter can only be addressed via position (``?``). :type array_type: str :param array_type: @@ -458,7 +458,7 @@ def __init__(self, name, array_type, values): @classmethod def positional(cls, array_type, values): - """Factory for positional paramters. + """Factory for positional parameters. :type array_type: str :param array_type: @@ -475,7 +475,7 @@ def positional(cls, array_type, values): @classmethod def from_api_repr(cls, resource): - """Factory: construct paramter from JSON resource. + """Factory: construct parameter from JSON resource. :type resource: dict :param resource: JSON mapping of parameter @@ -529,7 +529,7 @@ class StructQueryParameter(AbstractQueryParameter): :type name: str or None :param name: Parameter name, used via ``@foo`` syntax. If None, the - paramter can only be addressed via position (``?``). + parameter can only be addressed via position (``?``). :type sub_params: tuple of :class:`ScalarQueryParameter` :param sub_params: the sub-parameters for the struct @@ -551,7 +551,7 @@ def __init__(self, name, *sub_params): @classmethod def positional(cls, *sub_params): - """Factory for positional paramters. + """Factory for positional parameters. :type sub_params: tuple of :class:`ScalarQueryParameter` :param sub_params: the sub-parameters for the struct @@ -563,7 +563,7 @@ def positional(cls, *sub_params): @classmethod def from_api_repr(cls, resource): - """Factory: construct paramter from JSON resource. + """Factory: construct parameter from JSON resource. :type resource: dict :param resource: JSON mapping of parameter @@ -635,7 +635,7 @@ def __get__(self, instance, owner): :rtype: list of instances of classes derived from :class:`AbstractQueryParameter`. :returns: the descriptor, if accessed via the class, or the instance's - query paramters. + query parameters. """ if instance is None: return self From de5f324574cbffc50500a102404e12dbc4c52935 Mon Sep 17 00:00:00 2001 From: Danny Hermes Date: Thu, 20 Apr 2017 13:00:32 -0700 Subject: [PATCH 0101/2016] Adding check that **all** setup.py README's are valid RST. (#3318) * Adding check that **all** setup.py README's are valid RST. Follow up to #3316. Fixes #2446. * Fixing duplicate reference in Logging README. * Fixing duplicate reference in Monitoring README. --- packages/google-cloud-bigquery/nox.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/packages/google-cloud-bigquery/nox.py b/packages/google-cloud-bigquery/nox.py index 58b16e23dd86..27bfb7f87ac4 100644 --- a/packages/google-cloud-bigquery/nox.py +++ b/packages/google-cloud-bigquery/nox.py @@ -77,6 +77,15 @@ def lint(session): session.run('flake8', 'google/cloud/bigquery') +@nox.session +def lint_setup_py(session): + """Verify that setup.py is valid (including RST check).""" + session.interpreter = 'python3.6' + session.install('docutils', 'Pygments') + session.run( + 'python', 'setup.py', 'check', '--restructuredtext', '--strict') + + @nox.session def cover(session): """Run the final coverage report. From c7eae565e232a434f31b95c19e1b43e41fe204f8 Mon Sep 17 00:00:00 2001 From: Roscoe Pyell Date: Thu, 20 Apr 2017 23:29:30 -0700 Subject: [PATCH 0102/2016] Update BQ Query parameter reference Update BigQuery Query parameter reference to better indicate that timeout_ms sets timeout on the request, not on the query. --- packages/google-cloud-bigquery/google/cloud/bigquery/query.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/query.py b/packages/google-cloud-bigquery/google/cloud/bigquery/query.py index 3c2e398e5f41..401179447fa5 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/query.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/query.py @@ -383,7 +383,7 @@ def fetch_data(self, max_results=None, page_token=None, start_index=None, :type timeout_ms: int :param timeout_ms: - (Optional) timeout, in milliseconds, to wait for query to complete + (Optional) timeout, in milliseconds, to wait for request to complete :type client: :class:`~google.cloud.bigquery.client.Client` or ``NoneType`` From 37b8d93bf3b47646ab02ff93e35a32d660f84a5c Mon Sep 17 00:00:00 2001 From: Roscoe Pyell Date: Thu, 20 Apr 2017 23:43:20 -0700 Subject: [PATCH 0103/2016] Fix line length Fix line length issue on timeout_ms parameter. --- packages/google-cloud-bigquery/google/cloud/bigquery/query.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/query.py b/packages/google-cloud-bigquery/google/cloud/bigquery/query.py index 401179447fa5..8bf7457fe0a2 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/query.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/query.py @@ -383,7 +383,8 @@ def fetch_data(self, max_results=None, page_token=None, start_index=None, :type timeout_ms: int :param timeout_ms: - (Optional) timeout, in milliseconds, to wait for request to complete + (Optional) timeout, in milliseconds, to wait for http request + to complete :type client: :class:`~google.cloud.bigquery.client.Client` or ``NoneType`` From 3129c3b651057320d69e4aa984fff4b42d73cfb0 Mon Sep 17 00:00:00 2001 From: Roscoe Pyell Date: Thu, 20 Apr 2017 23:59:56 -0700 Subject: [PATCH 0104/2016] Fix trailing whitespace This is my punishment for not using a proper editor. Removed trailing whitespace. --- packages/google-cloud-bigquery/google/cloud/bigquery/query.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/query.py b/packages/google-cloud-bigquery/google/cloud/bigquery/query.py index 8bf7457fe0a2..70282840cc8b 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/query.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/query.py @@ -383,8 +383,8 @@ def fetch_data(self, max_results=None, page_token=None, start_index=None, :type timeout_ms: int :param timeout_ms: - (Optional) timeout, in milliseconds, to wait for http request - to complete + (Optional) timeout, in milliseconds, to wait for http request + to complete. :type client: :class:`~google.cloud.bigquery.client.Client` or ``NoneType`` From 149a4a69f982882991252ed8dd4bec2bc337762f Mon Sep 17 00:00:00 2001 From: Danny Hermes Date: Fri, 21 Apr 2017 10:03:56 -0700 Subject: [PATCH 0105/2016] Ignore tests (rather than unit_tests) in setup.py files. (#3319) --- packages/google-cloud-bigquery/setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/setup.py b/packages/google-cloud-bigquery/setup.py index 0040dd2c16a2..ffd62619efe3 100644 --- a/packages/google-cloud-bigquery/setup.py +++ b/packages/google-cloud-bigquery/setup.py @@ -63,7 +63,7 @@ 'google', 'google.cloud', ], - packages=find_packages(exclude=('unit_tests*',)), + packages=find_packages(exclude=('tests*',)), install_requires=REQUIREMENTS, **SETUP_BASE ) From a18418ea93633fcddf4da84b8ad89489a2a19a68 Mon Sep 17 00:00:00 2001 From: Roscoe Pyell Date: Mon, 24 Apr 2017 12:26:04 -0700 Subject: [PATCH 0106/2016] Include full API reference Makes the param ref a little long but there won't be any confusion until the API docs change and this will need updating. --- .../google-cloud-bigquery/google/cloud/bigquery/query.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/query.py b/packages/google-cloud-bigquery/google/cloud/bigquery/query.py index 70282840cc8b..31531ad0eddf 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/query.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/query.py @@ -383,8 +383,13 @@ def fetch_data(self, max_results=None, page_token=None, start_index=None, :type timeout_ms: int :param timeout_ms: - (Optional) timeout, in milliseconds, to wait for http request - to complete. + (Optional) How long to wait for the query to complete, in + milliseconds, before the request times out and returns. Note that + this is only a timeout for the request, not the query. If the query + takes longer to run than the timeout value, the call returns without + any results and with the 'jobComplete' flag set to false. You can + call GetQueryResults() to wait for the query to complete and read + the results. The default value is 10000 milliseconds (10 seconds). :type client: :class:`~google.cloud.bigquery.client.Client` or ``NoneType`` From cf388600ba599526448ba88ccc534fa0bef5139b Mon Sep 17 00:00:00 2001 From: Roscoe Pyell Date: Mon, 24 Apr 2017 14:57:53 -0700 Subject: [PATCH 0107/2016] Fix line length I keep forgetting it's 79 and not 80. --- .../google-cloud-bigquery/google/cloud/bigquery/query.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/query.py b/packages/google-cloud-bigquery/google/cloud/bigquery/query.py index 31531ad0eddf..6b764f3c664d 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/query.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/query.py @@ -386,10 +386,11 @@ def fetch_data(self, max_results=None, page_token=None, start_index=None, (Optional) How long to wait for the query to complete, in milliseconds, before the request times out and returns. Note that this is only a timeout for the request, not the query. If the query - takes longer to run than the timeout value, the call returns without - any results and with the 'jobComplete' flag set to false. You can - call GetQueryResults() to wait for the query to complete and read - the results. The default value is 10000 milliseconds (10 seconds). + takes longer to run than the timeout value, the call returns + without any results and with the 'jobComplete' flag set to false. + You can call GetQueryResults() to wait for the query to complete + and read the results. The default value is 10000 milliseconds (10 + seconds). :type client: :class:`~google.cloud.bigquery.client.Client` or ``NoneType`` From 9115d0132197cd004edf67d226fb52b362a36a34 Mon Sep 17 00:00:00 2001 From: Danny Hermes Date: Thu, 4 May 2017 12:40:39 -0700 Subject: [PATCH 0108/2016] Fixing paramter typo in a few places. --- packages/google-cloud-bigquery/tests/unit/test_query.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/packages/google-cloud-bigquery/tests/unit/test_query.py b/packages/google-cloud-bigquery/tests/unit/test_query.py index 57a05d3c9b0d..a15833af347d 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_query.py +++ b/packages/google-cloud-bigquery/tests/unit/test_query.py @@ -546,7 +546,7 @@ def test_run_w_mixed_udfs(self): self.assertEqual(req['data'], SENT) self._verifyResourceProperties(query, RESOURCE) - def test_run_w_named_query_paramter(self): + def test_run_w_named_query_parameter(self): from google.cloud.bigquery._helpers import ScalarQueryParameter PATH = 'projects/%s/queries' % self.PROJECT @@ -582,7 +582,7 @@ def test_run_w_named_query_paramter(self): self.assertEqual(req['data'], SENT) self._verifyResourceProperties(query, RESOURCE) - def test_run_w_positional_query_paramter(self): + def test_run_w_positional_query_parameter(self): from google.cloud.bigquery._helpers import ScalarQueryParameter PATH = 'projects/%s/queries' % self.PROJECT From fe2a675d63be2c05c399152152520e7831b0db46 Mon Sep 17 00:00:00 2001 From: Danny Hermes Date: Fri, 5 May 2017 15:42:41 -0700 Subject: [PATCH 0109/2016] Using binary streams for uploads in BigQuery system tests. (#3374) Fixes #3371. --- .../google-cloud-bigquery/tests/system.py | 32 +++++++++++-------- 1 file changed, 18 insertions(+), 14 deletions(-) diff --git a/packages/google-cloud-bigquery/tests/system.py b/packages/google-cloud-bigquery/tests/system.py index a9e003fe968c..44e508428fe6 100644 --- a/packages/google-cloud-bigquery/tests/system.py +++ b/packages/google-cloud-bigquery/tests/system.py @@ -338,7 +338,8 @@ def test_insert_data_then_dump_table(self): def test_load_table_from_local_file_then_dump_table(self): import csv - import tempfile + from google.cloud._testing import _NamedTemporaryFile + ROWS = [ ('Phred Phlyntstone', 32), ('Bharney Rhubble', 33), @@ -360,13 +361,13 @@ def test_load_table_from_local_file_then_dump_table(self): table.create() self.to_delete.insert(0, table) - with tempfile.NamedTemporaryFile(mode='w+') as csv_file: - writer = csv.writer(csv_file) - writer.writerow(('Full Name', 'Age')) - writer.writerows(ROWS) - csv_file.flush() + with _NamedTemporaryFile() as temp: + with open(temp.name, 'w') as csv_write: + writer = csv.writer(csv_write) + writer.writerow(('Full Name', 'Age')) + writer.writerows(ROWS) - with open(csv_file.name, 'rb') as csv_read: + with open(temp.name, 'rb') as csv_read: job = table.upload_from_file( csv_read, source_format='CSV', @@ -391,8 +392,9 @@ def _job_done(instance): def test_load_table_from_storage_then_dump_table(self): import csv - import tempfile + from google.cloud._testing import _NamedTemporaryFile from google.cloud.storage import Client as StorageClient + local_id = unique_resource_id() BUCKET_NAME = 'bq_load_test' + local_id BLOB_NAME = 'person_ages.csv' @@ -414,12 +416,14 @@ def test_load_table_from_storage_then_dump_table(self): blob = bucket.blob(BLOB_NAME) - with tempfile.TemporaryFile(mode='w+') as csv_file: - writer = csv.writer(csv_file) - writer.writerow(('Full Name', 'Age')) - writer.writerows(ROWS) - blob.upload_from_file( - csv_file, rewind=True, content_type='text/csv') + with _NamedTemporaryFile() as temp: + with open(temp.name, 'w') as csv_write: + writer = csv.writer(csv_write) + writer.writerow(('Full Name', 'Age')) + writer.writerows(ROWS) + + with open(temp.name, 'rb') as csv_read: + blob.upload_from_file(csv_read, content_type='text/csv') self.to_delete.insert(0, blob) From 611b0aeddb1ee792dc9557a5701eca5a731ecc8a Mon Sep 17 00:00:00 2001 From: Rich Kadel Date: Sat, 6 May 2017 10:52:50 -0700 Subject: [PATCH 0110/2016] Incorrect error creating empty partitioned table [This is a re-do of the pull request #3366 to fix the author email address to use my google.com email] I moved the partitioning_type check into the if/elif block to avoid a ValueError that should not have been thrown. See: https://cloud.google.com/bigquery/docs/creating-partitioned-tables#creat ing_a_partitioned_table and the "API" example. Before my change, the following code failed with the ValueError thrown by this function, but with my change, I was able to successfully create the partitioned table: destination_table = dataset.table(table_shortname) destination_partition = dataset.table(table_shortname+'$'+partition_date) if not destination_table.exists(): destination_table.partitioning_type = 'DAY' destination_table.create() --- .../google/cloud/bigquery/table.py | 5 ++-- .../tests/unit/test_table.py | 26 ++++++++++++++++++- 2 files changed, 28 insertions(+), 3 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py index 3b0346f60cdf..fc17c5b9a009 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py @@ -473,8 +473,9 @@ def _build_resource(self): resource['schema'] = { 'fields': _build_schema_resource(self._schema) } - else: - raise ValueError("Set either 'view_query' or 'schema'.") + elif self.partitioning_type is None: + raise ValueError( + "Set either 'view_query' or 'schema' or 'partitioning_type'.") return resource diff --git a/packages/google-cloud-bigquery/tests/unit/test_table.py b/packages/google-cloud-bigquery/tests/unit/test_table.py index 6a496ba69e95..e79612e848f1 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_table.py +++ b/packages/google-cloud-bigquery/tests/unit/test_table.py @@ -395,7 +395,7 @@ def test_from_api_repr_w_properties(self): self.assertIs(table._dataset._client, client) self._verifyResourceProperties(table, RESOURCE) - def test_create_no_view_query_no_schema(self): + def test_create_no_view_query_no_schema_no_partitioning(self): conn = _Connection() client = _Client(project=self.PROJECT, connection=conn) dataset = _Dataset(client) @@ -404,6 +404,30 @@ def test_create_no_view_query_no_schema(self): with self.assertRaises(ValueError): table.create() + def test_create_new_day_partitioned_table(self): + PATH = 'projects/%s/datasets/%s/tables' % (self.PROJECT, self.DS_NAME) + RESOURCE = self._makeResource() + conn = _Connection(RESOURCE) + client = _Client(project=self.PROJECT, connection=conn) + dataset = _Dataset(client) + table = self._make_one(self.TABLE_NAME, dataset) + table.partitioning_type = 'DAY' + table.create() + + self.assertEqual(len(conn._requested), 1) + req = conn._requested[0] + self.assertEqual(req['method'], 'POST') + self.assertEqual(req['path'], '/%s' % PATH) + SENT = { + 'tableReference': { + 'projectId': self.PROJECT, + 'datasetId': self.DS_NAME, + 'tableId': self.TABLE_NAME}, + 'timePartitioning': {'type': 'DAY'}, + } + self.assertEqual(req['data'], SENT) + self._verifyResourceProperties(table, RESOURCE) + def test_create_w_bound_client(self): from google.cloud.bigquery.table import SchemaField From f806b99e8dbce66691ea6ad89fae2f3314df8d44 Mon Sep 17 00:00:00 2001 From: Rich Kadel Date: Mon, 8 May 2017 17:34:11 -0700 Subject: [PATCH 0111/2016] Remove client-side create table validation As discussed with team, I removed the ValueError and any assumptions about the legitimate combinations of values in the request. ValueError unit test was removed, and the unused schema in `test_create_w_alternate_client` that was suppressed due to the `elif` in `_build_resource`, was removed so the expected value still matches the remaining inputs. --- .../google/cloud/bigquery/table.py | 6 ++---- .../tests/unit/test_table.py | 15 +-------------- 2 files changed, 3 insertions(+), 18 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py index fc17c5b9a009..1861ad993241 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py @@ -469,13 +469,11 @@ def _build_resource(self): if self.view_query is not None: view = resource['view'] = {} view['query'] = self.view_query - elif self._schema: + + if self._schema: resource['schema'] = { 'fields': _build_schema_resource(self._schema) } - elif self.partitioning_type is None: - raise ValueError( - "Set either 'view_query' or 'schema' or 'partitioning_type'.") return resource diff --git a/packages/google-cloud-bigquery/tests/unit/test_table.py b/packages/google-cloud-bigquery/tests/unit/test_table.py index e79612e848f1..a974f218270f 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_table.py +++ b/packages/google-cloud-bigquery/tests/unit/test_table.py @@ -395,15 +395,6 @@ def test_from_api_repr_w_properties(self): self.assertIs(table._dataset._client, client) self._verifyResourceProperties(table, RESOURCE) - def test_create_no_view_query_no_schema_no_partitioning(self): - conn = _Connection() - client = _Client(project=self.PROJECT, connection=conn) - dataset = _Dataset(client) - table = self._make_one(self.TABLE_NAME, dataset) - - with self.assertRaises(ValueError): - table.create() - def test_create_new_day_partitioned_table(self): PATH = 'projects/%s/datasets/%s/tables' % (self.PROJECT, self.DS_NAME) RESOURCE = self._makeResource() @@ -671,7 +662,6 @@ def test_create_w_alternate_client(self): import datetime from google.cloud._helpers import UTC from google.cloud._helpers import _millis - from google.cloud.bigquery.table import SchemaField PATH = 'projects/%s/datasets/%s/tables' % (self.PROJECT, self.DS_NAME) DESCRIPTION = 'DESCRIPTION' @@ -691,10 +681,7 @@ def test_create_w_alternate_client(self): conn2 = _Connection(RESOURCE) client2 = _Client(project=self.PROJECT, connection=conn2) dataset = _Dataset(client=client1) - full_name = SchemaField('full_name', 'STRING', mode='REQUIRED') - age = SchemaField('age', 'INTEGER', mode='REQUIRED') - table = self._make_one(self.TABLE_NAME, dataset=dataset, - schema=[full_name, age]) + table = self._make_one(self.TABLE_NAME, dataset=dataset) table.friendly_name = TITLE table.description = DESCRIPTION table.view_query = QUERY From 14270a23f715c11defe50ecf2b1ba51a5c275c8d Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Tue, 16 May 2017 12:42:08 -0700 Subject: [PATCH 0112/2016] Add test avro data. Generated with `avro-tools` $ avro-tools fromjson \ --schema-file tests/data/colors.avroschema \ tests/data/colors.json > tests/data/colors.avro See: http://www.michael-noll.com/blog/2013/03/17/reading-and-writing-avro-files-from-the-command-line/ --- .../google-cloud-bigquery/tests/data/colors.avro | Bin 0 -> 308 bytes 1 file changed, 0 insertions(+), 0 deletions(-) create mode 100644 packages/google-cloud-bigquery/tests/data/colors.avro diff --git a/packages/google-cloud-bigquery/tests/data/colors.avro b/packages/google-cloud-bigquery/tests/data/colors.avro new file mode 100644 index 0000000000000000000000000000000000000000..e0133fd027f49093d638b1f2d82e70948306b2dd GIT binary patch literal 308 zcmZ9Hu}T9$5Qa6#9`=kDIcBjhE-Qo%Q{6Rdmy z@gZz&M6ePI_d;S}`Op8)_s<+x7u!4gymeH1F$2Tj2MZRu8rXz65ac8)nB}I|hlJ$T z($`${GeM?=D{g9pveN3Z0oTQdBoWOM5rH{PbDoQS*XMy(2+^NvKK7iiaMJ() literal 0 HcmV?d00001 From 6733ffac129860c4211b4b45fd722b75d011a54a Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Tue, 16 May 2017 13:14:20 -0700 Subject: [PATCH 0113/2016] Add test to reproduce #3416. --- .../google-cloud-bigquery/tests/system.py | 41 +++++++++++++++++++ 1 file changed, 41 insertions(+) diff --git a/packages/google-cloud-bigquery/tests/system.py b/packages/google-cloud-bigquery/tests/system.py index 44e508428fe6..3ed2bf1391f2 100644 --- a/packages/google-cloud-bigquery/tests/system.py +++ b/packages/google-cloud-bigquery/tests/system.py @@ -390,6 +390,47 @@ def _job_done(instance): self.assertEqual(sorted(rows, key=by_age), sorted(ROWS, key=by_age)) + def test_load_table_from_local_avro_file_then_dump_table(self): + TABLE_NAME = 'test_table_avro' + ROWS = [ + ("violet", 400), + ("indigo", 445), + ("blue", 475), + ("green", 510), + ("yellow", 570), + ("orange", 590), + ("red", 650)] + + dataset = Config.CLIENT.dataset( + _make_dataset_name('load_local_then_dump')) + + retry_403(dataset.create)() + self.to_delete.append(dataset) + + table = dataset.table(TABLE_NAME) + self.to_delete.insert(0, table) + + with open(os.path.join(WHERE, 'data', 'colors.avro'), 'rb') as avrof: + job = table.upload_from_file( + avrof, + source_format='AVRO', + write_disposition='WRITE_TRUNCATE' + ) + + def _job_done(instance): + return instance.state.lower() == 'done' + + # Retry until done. + retry = RetryInstanceState(_job_done, max_tries=8) + retry(job.reload)() + + self.assertEqual(job.output_rows, len(ROWS)) + + rows = self._fetch_single_page(table) + by_wavelength = operator.itemgetter(1) + self.assertEqual(sorted(rows, key=by_wavelength), + sorted(ROWS, key=by_wavelength)) + def test_load_table_from_storage_then_dump_table(self): import csv from google.cloud._testing import _NamedTemporaryFile From 553692eacd645b56919c794e888b68847fe0019d Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Tue, 16 May 2017 13:58:01 -0700 Subject: [PATCH 0114/2016] Don't set schema in load job if none is provided. --- .../google-cloud-bigquery/google/cloud/bigquery/table.py | 7 ++++--- packages/google-cloud-bigquery/tests/system.py | 2 ++ 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py index 1861ad993241..2888b8ad5889 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py @@ -917,9 +917,6 @@ def upload_from_file(self, 'configuration': { 'load': { 'sourceFormat': source_format, - 'schema': { - 'fields': _build_schema_resource(self._schema), - }, 'destinationTable': { 'projectId': self._dataset.project, 'datasetId': self._dataset.name, @@ -929,6 +926,10 @@ def upload_from_file(self, } } + if len(self._schema) > 0: + load_config = metadata['configuration']['load'] + load_config['schema'] = _build_schema_resource(self._schema) + _configure_job_metadata(metadata, allow_jagged_rows, allow_quoted_newlines, create_disposition, encoding, field_delimiter, diff --git a/packages/google-cloud-bigquery/tests/system.py b/packages/google-cloud-bigquery/tests/system.py index 3ed2bf1391f2..baad4a240507 100644 --- a/packages/google-cloud-bigquery/tests/system.py +++ b/packages/google-cloud-bigquery/tests/system.py @@ -426,6 +426,8 @@ def _job_done(instance): self.assertEqual(job.output_rows, len(ROWS)) + # Reload table to get the schema before fetching the rows. + table.reload() rows = self._fetch_single_page(table) by_wavelength = operator.itemgetter(1) self.assertEqual(sorted(rows, key=by_wavelength), From 48bd41f6e9cf13980187042191ab4d2fd72b96df Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Tue, 16 May 2017 14:46:05 -0700 Subject: [PATCH 0115/2016] Set fields property of schema resource. --- packages/google-cloud-bigquery/google/cloud/bigquery/table.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py index 2888b8ad5889..1a28d84bf843 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py @@ -928,7 +928,9 @@ def upload_from_file(self, if len(self._schema) > 0: load_config = metadata['configuration']['load'] - load_config['schema'] = _build_schema_resource(self._schema) + load_config['schema'] = { + 'fields': _build_schema_resource(self._schema) + } _configure_job_metadata(metadata, allow_jagged_rows, allow_quoted_newlines, create_disposition, From 5afdbc11cb5d06dd2085d1344eeaed0ea6c72990 Mon Sep 17 00:00:00 2001 From: Tres Seaver Date: Thu, 18 May 2017 14:59:56 -0400 Subject: [PATCH 0116/2016] Marshal row data correctly in 'Table.insert_data()' (#3426) * Move '_row{,s}_from_json' next to scalar '_from_json' helpers. * Add converter helpers for row data scalars. * Convert row data using helpers. Closes #2957. --- .../google/cloud/bigquery/_helpers.py | 92 ++++++++++++------- .../google/cloud/bigquery/table.py | 22 ++--- .../tests/unit/test__helpers.py | 29 +++++- .../tests/unit/test_table.py | 26 +++++- 4 files changed, 115 insertions(+), 54 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py b/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py index d6699dbfc537..201a9c76e555 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py @@ -21,6 +21,7 @@ from google.cloud._helpers import UTC from google.cloud._helpers import _date_from_iso8601_date from google.cloud._helpers import _datetime_from_microseconds +from google.cloud._helpers import _microseconds_from_datetime from google.cloud._helpers import _RFC3339_NO_FRACTION from google.cloud._helpers import _time_from_iso8601_time_naive from google.cloud._helpers import _to_bytes @@ -122,6 +123,38 @@ def _record_from_json(value, field): } +def _row_from_json(row, schema): + """Convert JSON row data to row with appropriate types. + + Note: ``row['f']`` and ``schema`` are presumed to be of the same length. + + :type row: dict + :param row: A JSON response row to be converted. + + :type schema: tuple + :param schema: A tuple of + :class:`~google.cloud.bigquery.schema.SchemaField`. + + :rtype: tuple + :returns: A tuple of data converted to native types. + """ + row_data = [] + for field, cell in zip(schema, row['f']): + converter = _CELLDATA_FROM_JSON[field.field_type] + if field.mode == 'REPEATED': + row_data.append([converter(item['v'], field) + for item in cell['v']]) + else: + row_data.append(converter(cell['v'], field)) + + return tuple(row_data) + + +def _rows_from_json(rows, schema): + """Convert JSON row data to rows with appropriate types.""" + return [_row_from_json(row, schema) for row in rows] + + def _int_to_json(value): """Coerce 'value' to an JSON-compatible representation.""" if isinstance(value, int): @@ -148,8 +181,11 @@ def _bytes_to_json(value): return value -def _timestamp_to_json(value): - """Coerce 'value' to an JSON-compatible representation.""" +def _timestamp_to_json_parameter(value): + """Coerce 'value' to an JSON-compatible representation. + + This version returns the string representation used in query parameters. + """ if isinstance(value, datetime.datetime): if value.tzinfo not in (None, UTC): # Convert to UTC and remove the time zone info. @@ -159,6 +195,16 @@ def _timestamp_to_json(value): return value +def _timestamp_to_json_row(value): + """Coerce 'value' to an JSON-compatible representation. + + This version returns floating-point seconds value used in row data. + """ + if isinstance(value, datetime.datetime): + value = _microseconds_from_datetime(value) * 1e-6 + return value + + def _datetime_to_json(value): """Coerce 'value' to an JSON-compatible representation.""" if isinstance(value, datetime.datetime): @@ -180,7 +226,8 @@ def _time_to_json(value): return value -_SCALAR_VALUE_TO_JSON = { +# Converters used for scalar values marshalled as row data. +_SCALAR_VALUE_TO_JSON_ROW = { 'INTEGER': _int_to_json, 'INT64': _int_to_json, 'FLOAT': _float_to_json, @@ -188,41 +235,16 @@ def _time_to_json(value): 'BOOLEAN': _bool_to_json, 'BOOL': _bool_to_json, 'BYTES': _bytes_to_json, - 'TIMESTAMP': _timestamp_to_json, + 'TIMESTAMP': _timestamp_to_json_row, 'DATETIME': _datetime_to_json, 'DATE': _date_to_json, 'TIME': _time_to_json, } -def _row_from_json(row, schema): - """Convert JSON row data to row with appropriate types. - - :type row: dict - :param row: A JSON response row to be converted. - - :type schema: tuple - :param schema: A tuple of - :class:`~google.cloud.bigquery.schema.SchemaField`. - - :rtype: tuple - :returns: A tuple of data converted to native types. - """ - row_data = [] - for field, cell in zip(schema, row['f']): - converter = _CELLDATA_FROM_JSON[field.field_type] - if field.mode == 'REPEATED': - row_data.append([converter(item['v'], field) - for item in cell['v']]) - else: - row_data.append(converter(cell['v'], field)) - - return tuple(row_data) - - -def _rows_from_json(rows, schema): - """Convert JSON row data to rows with appropriate types.""" - return [_row_from_json(row, schema) for row in rows] +# Converters used for scalar values marshalled as query parameters. +_SCALAR_VALUE_TO_JSON_PARAM = _SCALAR_VALUE_TO_JSON_ROW.copy() +_SCALAR_VALUE_TO_JSON_PARAM['TIMESTAMP'] = _timestamp_to_json_parameter class _ConfigurationProperty(object): @@ -420,7 +442,7 @@ def to_api_repr(self): :returns: JSON mapping """ value = self.value - converter = _SCALAR_VALUE_TO_JSON.get(self.type_) + converter = _SCALAR_VALUE_TO_JSON_PARAM.get(self.type_) if converter is not None: value = converter(value) resource = { @@ -506,7 +528,7 @@ def to_api_repr(self): a_values = [repr_['parameterValue'] for repr_ in reprs] else: a_type = {'type': self.array_type} - converter = _SCALAR_VALUE_TO_JSON.get(self.array_type) + converter = _SCALAR_VALUE_TO_JSON_PARAM.get(self.array_type) if converter is not None: values = [converter(value) for value in values] a_values = [{'value': value} for value in values] @@ -600,7 +622,7 @@ def to_api_repr(self): values[name] = repr_['parameterValue'] else: s_types[name] = {'name': name, 'type': {'type': type_}} - converter = _SCALAR_VALUE_TO_JSON.get(type_) + converter = _SCALAR_VALUE_TO_JSON_PARAM.get(type_) if converter is not None: value = converter(value) values[name] = {'value': value} diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py index 1a28d84bf843..d7b80dc25773 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py @@ -22,10 +22,10 @@ import six from google.cloud._helpers import _datetime_from_microseconds -from google.cloud._helpers import _microseconds_from_datetime from google.cloud._helpers import _millis_from_datetime from google.cloud.exceptions import NotFound from google.cloud.exceptions import make_exception +from google.cloud.iterator import HTTPIterator from google.cloud.streaming.exceptions import HttpError from google.cloud.streaming.http_wrapper import Request from google.cloud.streaming.http_wrapper import make_api_request @@ -33,7 +33,7 @@ from google.cloud.streaming.transfer import Upload from google.cloud.bigquery.schema import SchemaField from google.cloud.bigquery._helpers import _row_from_json -from google.cloud.iterator import HTTPIterator +from google.cloud.bigquery._helpers import _SCALAR_VALUE_TO_JSON_ROW _TABLE_HAS_NO_SCHEMA = "Table has no schema: call 'table.reload()'" @@ -673,6 +673,9 @@ def fetch_data(self, max_results=None, page_token=None, client=None): (this is distinct from the total number of rows in the current page: ``iterator.page.num_items``). """ + if len(self._schema) == 0: + raise ValueError(_TABLE_HAS_NO_SCHEMA) + client = self._require_client(client) path = '%s/data' % (self.path,) iterator = HTTPIterator(client=client, path=path, @@ -741,11 +744,9 @@ def insert_data(self, row_info = {} for field, value in zip(self._schema, row): - if field.field_type == 'TIMESTAMP': - # BigQuery stores TIMESTAMP data internally as a - # UNIX timestamp with microsecond precision. - # Specifies the number of seconds since the epoch. - value = _convert_timestamp(value) + converter = _SCALAR_VALUE_TO_JSON_ROW.get(field.field_type) + if converter is not None: # STRING doesn't need converting + value = converter(value) row_info[field.name] = value info = {'json': row_info} @@ -1131,10 +1132,3 @@ class _UrlBuilder(object): def __init__(self): self.query_params = {} self._relative_path = '' - - -def _convert_timestamp(value): - """Helper for :meth:`Table.insert_data`.""" - if isinstance(value, datetime.datetime): - value = _microseconds_from_datetime(value) * 1e-6 - return value diff --git a/packages/google-cloud-bigquery/tests/unit/test__helpers.py b/packages/google-cloud-bigquery/tests/unit/test__helpers.py index bcc0ed7eed16..a2b561e36e88 100644 --- a/packages/google-cloud-bigquery/tests/unit/test__helpers.py +++ b/packages/google-cloud-bigquery/tests/unit/test__helpers.py @@ -561,12 +561,12 @@ def test_w_bytes(self): self.assertEqual(converted, expected) -class Test_timestamp_to_json(unittest.TestCase): +class Test_timestamp_to_json_parameter(unittest.TestCase): def _call_fut(self, value): - from google.cloud.bigquery._helpers import _timestamp_to_json + from google.cloud.bigquery._helpers import _timestamp_to_json_parameter - return _timestamp_to_json(value) + return _timestamp_to_json_parameter(value) def test_w_float(self): self.assertEqual(self._call_fut(1.234567), 1.234567) @@ -604,6 +604,29 @@ def test_w_datetime_w_utc_zone(self): self.assertEqual(self._call_fut(when), ZULU) +class Test_timestamp_to_json_row(unittest.TestCase): + + def _call_fut(self, value): + from google.cloud.bigquery._helpers import _timestamp_to_json_row + + return _timestamp_to_json_row(value) + + def test_w_float(self): + self.assertEqual(self._call_fut(1.234567), 1.234567) + + def test_w_string(self): + ZULU = '2016-12-20 15:58:27.339328+00:00' + self.assertEqual(self._call_fut(ZULU), ZULU) + + def test_w_datetime(self): + import datetime + from google.cloud._helpers import _microseconds_from_datetime + + when = datetime.datetime(2016, 12, 20, 15, 58, 27, 339328) + self.assertEqual( + self._call_fut(when), _microseconds_from_datetime(when) / 1e6) + + class Test_datetime_to_json(unittest.TestCase): def _call_fut(self, value): diff --git a/packages/google-cloud-bigquery/tests/unit/test_table.py b/packages/google-cloud-bigquery/tests/unit/test_table.py index a974f218270f..c940706c6b86 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_table.py +++ b/packages/google-cloud-bigquery/tests/unit/test_table.py @@ -1043,6 +1043,24 @@ def test_delete_w_alternate_client(self): self.assertEqual(req['method'], 'DELETE') self.assertEqual(req['path'], '/%s' % PATH) + def test_fetch_data_wo_schema(self): + from google.cloud.bigquery.table import _TABLE_HAS_NO_SCHEMA + + client = _Client(project=self.PROJECT) + dataset = _Dataset(client) + table = self._make_one(self.TABLE_NAME, dataset=dataset) + ROWS = [ + ('Phred Phlyntstone', 32), + ('Bharney Rhubble', 33), + ('Wylma Phlyntstone', 29), + ('Bhettye Rhubble', 27), + ] + + with self.assertRaises(ValueError) as exc: + table.fetch_data() + + self.assertEqual(exc.exception.args, (_TABLE_HAS_NO_SCHEMA,)) + def test_fetch_data_w_bound_client(self): import datetime import six @@ -1355,7 +1373,7 @@ def _row_data(row): if isinstance(row[2], datetime.datetime): joined = _microseconds_from_datetime(joined) * 1e-6 return {'full_name': row[0], - 'age': row[1], + 'age': str(row[1]), 'joined': joined} SENT = { @@ -1404,7 +1422,11 @@ def test_insert_data_w_alternate_client(self): ] def _row_data(row): - return {'full_name': row[0], 'age': row[1], 'voter': row[2]} + return { + 'full_name': row[0], + 'age': str(row[1]), + 'voter': row[2] and 'true' or 'false', + } SENT = { 'skipInvalidRows': True, From fbcbcb8754ce85aa0ec71b06f3565afe777cf07d Mon Sep 17 00:00:00 2001 From: Tres Seaver Date: Wed, 31 May 2017 13:54:09 -0400 Subject: [PATCH 0117/2016] Add 'Query.num_dml_affected_rows' property. (#3460) Read-only, set from servier-provided 'numDmlAffectedRows' field. Closes #2920. --- .../google/cloud/bigquery/query.py | 14 +++++++++ .../tests/unit/test_query.py | 30 +++++++++++++++++++ 2 files changed, 44 insertions(+) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/query.py b/packages/google-cloud-bigquery/google/cloud/bigquery/query.py index 6b764f3c664d..ee24d8397b73 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/query.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/query.py @@ -226,6 +226,20 @@ def total_bytes_processed(self): if total_bytes_processed is not None: return int(total_bytes_processed) + @property + def num_dml_affected_rows(self): + """Total number of rows affected by a DML query. + + See: + https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs/query#numDmlAffectedRows + + :rtype: int, or ``NoneType`` + :returns: Count generated on the server (None until set by the server). + """ + num_dml_affected_rows = self._properties.get('numDmlAffectedRows') + if num_dml_affected_rows is not None: + return int(num_dml_affected_rows) + @property def rows(self): """Query results. diff --git a/packages/google-cloud-bigquery/tests/unit/test_query.py b/packages/google-cloud-bigquery/tests/unit/test_query.py index a15833af347d..c2b3ce5496e1 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_query.py +++ b/packages/google-cloud-bigquery/tests/unit/test_query.py @@ -70,6 +70,7 @@ def _makeResource(self, complete=False): ] resource['pageToken'] = self.TOKEN resource['totalBytesProcessed'] = 100000 + resource['numDmlAffectedRows'] = 123 resource['cacheHit'] = False return resource @@ -124,10 +125,12 @@ def _verifyResourceProperties(self, query, resource): self.assertEqual(query.complete, resource.get('jobComplete')) self.assertEqual(query.errors, resource.get('errors')) self.assertEqual(query.page_token, resource.get('pageToken')) + if 'totalRows' in resource: self.assertEqual(query.total_rows, int(resource['totalRows'])) else: self.assertIsNone(query.total_rows) + if 'totalBytesProcessed' in resource: self.assertEqual(query.total_bytes_processed, int(resource['totalBytesProcessed'])) @@ -139,6 +142,12 @@ def _verifyResourceProperties(self, query, resource): else: self.assertIsNone(query.name) + if 'numDmlAffectedRows' in resource: + self.assertEqual(query.num_dml_affected_rows, + int(resource['numDmlAffectedRows'])) + else: + self.assertIsNone(query.num_dml_affected_rows) + self._verify_udf_resources(query, resource) self._verifyQueryParameters(query, resource) self._verifySchema(query, resource) @@ -371,6 +380,27 @@ def test_total_bytes_processed_present_string(self): query._set_properties(resource) self.assertEqual(query.total_bytes_processed, TOTAL_BYTES_PROCESSED) + def test_num_dml_affected_rows_missing(self): + client = _Client(self.PROJECT) + query = self._make_one(self.QUERY, client) + self.assertIsNone(query.num_dml_affected_rows) + + def test_num_dml_affected_rows_present_integer(self): + DML_AFFECTED_ROWS = 123456 + client = _Client(self.PROJECT) + query = self._make_one(self.QUERY, client) + resource = {'numDmlAffectedRows': DML_AFFECTED_ROWS} + query._set_properties(resource) + self.assertEqual(query.num_dml_affected_rows, DML_AFFECTED_ROWS) + + def test_num_dml_affected_rows_present_string(self): + DML_AFFECTED_ROWS = 123456 + client = _Client(self.PROJECT) + query = self._make_one(self.QUERY, client) + resource = {'numDmlAffectedRows': str(DML_AFFECTED_ROWS)} + query._set_properties(resource) + self.assertEqual(query.num_dml_affected_rows, DML_AFFECTED_ROWS) + def test_schema(self): client = _Client(self.PROJECT) query = self._make_one(self.QUERY, client) From 6d81b6b650c8dbf0edcd703eed7755eb1c506763 Mon Sep 17 00:00:00 2001 From: Luke Sneeringer Date: Fri, 2 Jun 2017 14:36:29 -0700 Subject: [PATCH 0118/2016] Vision semi-GAPIC (#3373) --- .../google/cloud/bigquery/client.py | 14 ++--- .../google/cloud/bigquery/dataset.py | 8 +-- .../google/cloud/bigquery/job.py | 60 +++++++++---------- .../google/cloud/bigquery/query.py | 38 ++++++------ .../google/cloud/bigquery/table.py | 12 ++-- .../google-cloud-bigquery/tests/system.py | 6 +- 6 files changed, 69 insertions(+), 69 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py index bf0b0a31dcc0..5f0101f35de5 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py @@ -86,7 +86,7 @@ def __init__(self, project=None, credentials=None, _http=None): def list_projects(self, max_results=None, page_token=None): """List projects for the project associated with this client. - See: + See https://cloud.google.com/bigquery/docs/reference/rest/v2/projects/list :type max_results: int @@ -111,7 +111,7 @@ def list_datasets(self, include_all=False, max_results=None, page_token=None): """List datasets for the project associated with this client. - See: + See https://cloud.google.com/bigquery/docs/reference/rest/v2/datasets/list :type include_all: bool @@ -183,7 +183,7 @@ def list_jobs(self, max_results=None, page_token=None, all_users=None, state_filter=None): """List jobs for the project associated with this client. - See: + See https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs/list :type max_results: int @@ -227,7 +227,7 @@ def list_jobs(self, max_results=None, page_token=None, all_users=None, def load_table_from_storage(self, job_name, destination, *source_uris): """Construct a job for loading data into a table from CloudStorage. - See: + See https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.load :type job_name: str @@ -249,7 +249,7 @@ def load_table_from_storage(self, job_name, destination, *source_uris): def copy_table(self, job_name, destination, *sources): """Construct a job for copying one or more tables into another table. - See: + See https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.copy :type job_name: str @@ -269,7 +269,7 @@ def copy_table(self, job_name, destination, *sources): def extract_table_to_storage(self, job_name, source, *destination_uris): """Construct a job for extracting a table into Cloud Storage files. - See: + See https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.extract :type job_name: str @@ -293,7 +293,7 @@ def run_async_query(self, job_name, query, udf_resources=(), query_parameters=()): """Construct a job for running a SQL query asynchronously. - See: + See https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query :type job_name: str diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/dataset.py b/packages/google-cloud-bigquery/google/cloud/bigquery/dataset.py index f98bb95b1098..bce74ca9f366 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/dataset.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/dataset.py @@ -89,7 +89,7 @@ def __repr__(self): class Dataset(object): """Datasets are containers for tables. - See: + See https://cloud.google.com/bigquery/docs/reference/rest/v2/datasets :type name: str @@ -417,7 +417,7 @@ def _build_resource(self): def create(self, client=None): """API call: create the dataset via a PUT request. - See: + See https://cloud.google.com/bigquery/docs/reference/rest/v2/tables/insert :type client: :class:`~google.cloud.bigquery.client.Client` or @@ -530,7 +530,7 @@ def update(self, client=None): def delete(self, client=None): """API call: delete the dataset via a DELETE request. - See: + See https://cloud.google.com/bigquery/docs/reference/rest/v2/tables/delete :type client: :class:`~google.cloud.bigquery.client.Client` or @@ -544,7 +544,7 @@ def delete(self, client=None): def list_tables(self, max_results=None, page_token=None): """List tables for the project associated with this client. - See: + See https://cloud.google.com/bigquery/docs/reference/rest/v2/tables/list :type max_results: int diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/job.py b/packages/google-cloud-bigquery/google/cloud/bigquery/job.py index c6ee642dfc7c..4f791bdbea0c 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/job.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/job.py @@ -301,7 +301,7 @@ def _get_resource_config(cls, resource): def begin(self, client=None): """API call: begin the job via a POST request - See: + See https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs/insert :type client: :class:`~google.cloud.bigquery.client.Client` or @@ -497,57 +497,57 @@ def output_rows(self): return int(statistics['load']['outputRows']) allow_jagged_rows = _TypedProperty('allow_jagged_rows', bool) - """See: + """See https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.load.allowJaggedRows """ allow_quoted_newlines = _TypedProperty('allow_quoted_newlines', bool) - """See: + """See https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.load.allowQuotedNewlines """ create_disposition = CreateDisposition('create_disposition') - """See: + """See https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.load.createDisposition """ encoding = Encoding('encoding') - """See: + """See https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.load.encoding """ field_delimiter = _TypedProperty('field_delimiter', six.string_types) - """See: + """See https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.load.fieldDelimiter """ ignore_unknown_values = _TypedProperty('ignore_unknown_values', bool) - """See: + """See https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.load.ignoreUnknownValues """ max_bad_records = _TypedProperty('max_bad_records', six.integer_types) - """See: + """See https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.load.maxBadRecords """ quote_character = _TypedProperty('quote_character', six.string_types) - """See: + """See https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.load.quote """ skip_leading_rows = _TypedProperty('skip_leading_rows', six.integer_types) - """See: + """See https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.load.skipLeadingRows """ source_format = SourceFormat('source_format') - """See: + """See https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.load.sourceFormat """ write_disposition = WriteDisposition('write_disposition') - """See: + """See https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.load.writeDisposition """ @@ -672,12 +672,12 @@ def __init__(self, name, destination, sources, client): self._configuration = _CopyConfiguration() create_disposition = CreateDisposition('create_disposition') - """See: + """See https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.copy.createDisposition """ write_disposition = WriteDisposition('write_disposition') - """See: + """See https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.copy.writeDisposition """ @@ -795,22 +795,22 @@ def __init__(self, name, source, destination_uris, client): self._configuration = _ExtractConfiguration() compression = Compression('compression') - """See: + """See https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.extract.compression """ destination_format = DestinationFormat('destination_format') - """See: + """See https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.extract.destinationFormat """ field_delimiter = _TypedProperty('field_delimiter', six.string_types) - """See: + """See https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.extract.fieldDelimiter """ print_header = _TypedProperty('print_header', bool) - """See: + """See https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.extract.printHeader """ @@ -936,32 +936,32 @@ def __init__(self, name, query, client, self._configuration = _AsyncQueryConfiguration() allow_large_results = _TypedProperty('allow_large_results', bool) - """See: + """See https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query.allowLargeResults """ create_disposition = CreateDisposition('create_disposition') - """See: + """See https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query.createDisposition """ default_dataset = _TypedProperty('default_dataset', Dataset) - """See: + """See https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query.defaultDataset """ destination = _TypedProperty('destination', Table) - """See: + """See https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query.destinationTable """ flatten_results = _TypedProperty('flatten_results', bool) - """See: + """See https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query.flattenResults """ priority = QueryPriority('priority') - """See: + """See https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query.priority """ @@ -970,34 +970,34 @@ def __init__(self, name, query, client, udf_resources = UDFResourcesProperty() use_query_cache = _TypedProperty('use_query_cache', bool) - """See: + """See https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query.useQueryCache """ use_legacy_sql = _TypedProperty('use_legacy_sql', bool) - """See: + """See https://cloud.google.com/bigquery/docs/\ reference/v2/jobs#configuration.query.useLegacySql """ dry_run = _TypedProperty('dry_run', bool) - """See: + """See https://cloud.google.com/bigquery/docs/\ reference/rest/v2/jobs#configuration.dryRun """ write_disposition = WriteDisposition('write_disposition') - """See: + """See https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query.writeDisposition """ maximum_billing_tier = _TypedProperty('maximum_billing_tier', int) - """See: + """See https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query.maximumBillingTier """ maximum_bytes_billed = _TypedProperty('maximum_bytes_billed', int) - """See: + """See https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query.maximumBytesBilled """ diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/query.py b/packages/google-cloud-bigquery/google/cloud/bigquery/query.py index ee24d8397b73..ea704bf4a8e5 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/query.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/query.py @@ -123,7 +123,7 @@ def _require_client(self, client): def cache_hit(self): """Query results served from cache. - See: + See https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs/query#cacheHit :rtype: bool or ``NoneType`` @@ -136,7 +136,7 @@ def cache_hit(self): def complete(self): """Server completed query. - See: + See https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs/query#jobComplete :rtype: bool or ``NoneType`` @@ -149,7 +149,7 @@ def complete(self): def errors(self): """Errors generated by the query. - See: + See https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs/query#errors :rtype: list of mapping, or ``NoneType`` @@ -162,7 +162,7 @@ def errors(self): def name(self): """Job name, generated by the back-end. - See: + See https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs/query#jobReference :rtype: list of mapping, or ``NoneType`` @@ -190,7 +190,7 @@ def job(self): def page_token(self): """Token for fetching next bach of results. - See: + See https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs/query#pageToken :rtype: str, or ``NoneType`` @@ -202,7 +202,7 @@ def page_token(self): def total_rows(self): """Total number of rows returned by the query. - See: + See https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs/query#totalRows :rtype: int, or ``NoneType`` @@ -216,7 +216,7 @@ def total_rows(self): def total_bytes_processed(self): """Total number of bytes processed by the query. - See: + See https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs/query#totalBytesProcessed :rtype: int, or ``NoneType`` @@ -230,7 +230,7 @@ def total_bytes_processed(self): def num_dml_affected_rows(self): """Total number of rows affected by a DML query. - See: + See https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs/query#numDmlAffectedRows :rtype: int, or ``NoneType`` @@ -244,7 +244,7 @@ def num_dml_affected_rows(self): def rows(self): """Query results. - See: + See https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs/query#rows :rtype: list of tuples of row values, or ``NoneType`` @@ -256,7 +256,7 @@ def rows(self): def schema(self): """Schema for query results. - See: + See https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs/query#schema :rtype: list of :class:`SchemaField`, or ``NoneType`` @@ -265,41 +265,41 @@ def schema(self): return _parse_schema_resource(self._properties.get('schema', {})) default_dataset = _TypedProperty('default_dataset', Dataset) - """See: + """See https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs/query#defaultDataset """ dry_run = _TypedProperty('dry_run', bool) - """See: + """See https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs/query#dryRun """ max_results = _TypedProperty('max_results', six.integer_types) - """See: + """See https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs/query#maxResults """ preserve_nulls = _TypedProperty('preserve_nulls', bool) - """See: + """See https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs/query#preserveNulls """ query_parameters = QueryParametersProperty() timeout_ms = _TypedProperty('timeout_ms', six.integer_types) - """See: + """See https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs/query#timeoutMs """ udf_resources = UDFResourcesProperty() use_query_cache = _TypedProperty('use_query_cache', bool) - """See: + """See https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs/query#useQueryCache """ use_legacy_sql = _TypedProperty('use_legacy_sql', bool) - """See: + """See https://cloud.google.com/bigquery/docs/\ reference/v2/jobs/query#useLegacySql """ @@ -361,7 +361,7 @@ def _build_resource(self): def run(self, client=None): """API call: run the query via a POST request - See: + See https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs/query :type client: :class:`~google.cloud.bigquery.client.Client` or @@ -382,7 +382,7 @@ def fetch_data(self, max_results=None, page_token=None, start_index=None, timeout_ms=None, client=None): """API call: fetch a page of query result data via a GET request - See: + See https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs/getQueryResults :type max_results: int diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py index d7b80dc25773..92ebfebb2d6e 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py @@ -43,7 +43,7 @@ class Table(object): """Tables represent a set of rows whose values correspond to a schema. - See: + See https://cloud.google.com/bigquery/docs/reference/rest/v2/tables :type name: str @@ -480,7 +480,7 @@ def _build_resource(self): def create(self, client=None): """API call: create the dataset via a PUT request - See: + See https://cloud.google.com/bigquery/docs/reference/rest/v2/tables/insert :type client: :class:`~google.cloud.bigquery.client.Client` or @@ -630,7 +630,7 @@ def update(self, client=None): def delete(self, client=None): """API call: delete the table via a DELETE request - See: + See https://cloud.google.com/bigquery/docs/reference/rest/v2/tables/delete :type client: :class:`~google.cloud.bigquery.client.Client` or @@ -644,7 +644,7 @@ def delete(self, client=None): def fetch_data(self, max_results=None, page_token=None, client=None): """API call: fetch the table data via a GET request - See: + See https://cloud.google.com/bigquery/docs/reference/rest/v2/tabledata/list .. note:: @@ -696,7 +696,7 @@ def insert_data(self, client=None): """API call: insert table data via a POST request - See: + See https://cloud.google.com/bigquery/docs/reference/rest/v2/tabledata/insertAll :type rows: list of tuples @@ -718,7 +718,7 @@ def insert_data(self, :param template_suffix: (Optional) treat ``name`` as a template table and provide a suffix. BigQuery will create the table `` + `` based - on the schema of the template table. See: + on the schema of the template table. See https://cloud.google.com/bigquery/streaming-data-into-bigquery#template-tables :type client: :class:`~google.cloud.bigquery.client.Client` or diff --git a/packages/google-cloud-bigquery/tests/system.py b/packages/google-cloud-bigquery/tests/system.py index baad4a240507..86e376a2ccb1 100644 --- a/packages/google-cloud-bigquery/tests/system.py +++ b/packages/google-cloud-bigquery/tests/system.py @@ -59,7 +59,7 @@ def _rate_limit_exceeded(forbidden): # We need to wait to stay within the rate limits. # The alternative outcome is a 403 Forbidden response from upstream, which # they return instead of the more appropriate 429. -# See: https://cloud.google.com/bigquery/quota-policy +# See https://cloud.google.com/bigquery/quota-policy retry_403 = RetryErrors(Forbidden, error_predicate=_rate_limit_exceeded) @@ -326,7 +326,7 @@ def test_insert_data_then_dump_table(self): rows = () - # Allow for "warm up" before rows visible. See: + # Allow for "warm up" before rows visible. See # https://cloud.google.com/bigquery/streaming-data-into-bigquery#dataavailability # 8 tries -> 1 + 2 + 4 + 8 + 16 + 32 + 64 = 127 seconds retry = RetryResult(_has_rows, max_tries=8) @@ -495,7 +495,7 @@ def test_load_table_from_storage_then_dump_table(self): def _job_done(instance): return instance.state in ('DONE', 'done') - # Allow for 90 seconds of "warm up" before rows visible. See: + # Allow for 90 seconds of "warm up" before rows visible. See # https://cloud.google.com/bigquery/streaming-data-into-bigquery#dataavailability # 8 tries -> 1 + 2 + 4 + 8 + 16 + 32 + 64 = 127 seconds retry = RetryInstanceState(_job_done, max_tries=8) From 04f3f5caba2a1b9be7b621b058f7dd1f33041997 Mon Sep 17 00:00:00 2001 From: Danny Hermes Date: Fri, 2 Jun 2017 14:44:17 -0700 Subject: [PATCH 0119/2016] Reloading BigQuery table in system test before fetching data. (#3468) e.g. https://circleci.com/gh/GoogleCloudPlatform/google-cloud-python/2065 Error introduced in #3426. --- packages/google-cloud-bigquery/tests/system.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/packages/google-cloud-bigquery/tests/system.py b/packages/google-cloud-bigquery/tests/system.py index 86e376a2ccb1..210951305b44 100644 --- a/packages/google-cloud-bigquery/tests/system.py +++ b/packages/google-cloud-bigquery/tests/system.py @@ -826,6 +826,8 @@ def test_dump_table_w_public_data(self): dataset = Config.CLIENT.dataset(DATASET_NAME, project=PUBLIC) table = dataset.table(TABLE_NAME) + # Reload table to get the schema before fetching the rows. + table.reload() self._fetch_single_page(table) def test_insert_nested_nested(self): From ed458054ddddb2b16112746bea41a30152ee0a8b Mon Sep 17 00:00:00 2001 From: Tres Seaver Date: Thu, 8 Jun 2017 22:08:17 -0400 Subject: [PATCH 0120/2016] Make 'QueryResponse.fetch_data' return an iterator. (#3484) Add a system test which exercises it. Update snippets to match the new usage. Closes #2840. --- .../google/cloud/bigquery/_helpers.py | 41 +++++++++++++++ .../google/cloud/bigquery/query.py | 51 +++++++++++++------ .../google/cloud/bigquery/table.py | 44 +--------------- .../google-cloud-bigquery/tests/system.py | 17 +++++++ .../tests/unit/test_query.py | 16 +++++- 5 files changed, 109 insertions(+), 60 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py b/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py index 201a9c76e555..7557111d100e 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py @@ -678,3 +678,44 @@ def __set__(self, instance, value): raise ValueError( "query parameters must be derived from AbstractQueryParameter") instance._query_parameters = tuple(value) + + +def _item_to_row(iterator, resource): + """Convert a JSON row to the native object. + + .. note:: + + This assumes that the ``schema`` attribute has been + added to the iterator after being created, which + should be done by the caller. + + :type iterator: :class:`~google.cloud.iterator.Iterator` + :param iterator: The iterator that is currently in use. + + :type resource: dict + :param resource: An item to be converted to a row. + + :rtype: tuple + :returns: The next row in the page. + """ + return _row_from_json(resource, iterator.schema) + + +# pylint: disable=unused-argument +def _rows_page_start(iterator, page, response): + """Grab total rows when :class:`~google.cloud.iterator.Page` starts. + + :type iterator: :class:`~google.cloud.iterator.Iterator` + :param iterator: The iterator that is currently in use. + + :type page: :class:`~google.cloud.iterator.Page` + :param page: The page that was just created. + + :type response: dict + :param response: The JSON API response for a page of rows in a table. + """ + total_rows = response.get('totalRows') + if total_rows is not None: + total_rows = int(total_rows) + iterator.total_rows = total_rows +# pylint: enable=unused-argument diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/query.py b/packages/google-cloud-bigquery/google/cloud/bigquery/query.py index ea704bf4a8e5..6db2742bbe01 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/query.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/query.py @@ -16,6 +16,7 @@ import six +from google.cloud.iterator import HTTPIterator from google.cloud.bigquery._helpers import _TypedProperty from google.cloud.bigquery._helpers import _rows_from_json from google.cloud.bigquery.dataset import Dataset @@ -23,6 +24,8 @@ from google.cloud.bigquery.table import _parse_schema_resource from google.cloud.bigquery._helpers import QueryParametersProperty from google.cloud.bigquery._helpers import UDFResourcesProperty +from google.cloud.bigquery._helpers import _item_to_row +from google.cloud.bigquery._helpers import _rows_page_start class _SyncQueryConfiguration(object): @@ -426,12 +429,6 @@ def fetch_data(self, max_results=None, page_token=None, start_index=None, client = self._require_client(client) params = {} - if max_results is not None: - params['maxResults'] = max_results - - if page_token is not None: - params['pageToken'] = page_token - if start_index is not None: params['startIndex'] = start_index @@ -439,15 +436,37 @@ def fetch_data(self, max_results=None, page_token=None, start_index=None, params['timeoutMs'] = timeout_ms path = '/projects/%s/queries/%s' % (self.project, self.name) - response = client._connection.api_request(method='GET', - path=path, - query_params=params) - self._set_properties(response) + iterator = HTTPIterator(client=client, path=path, + item_to_value=_item_to_row, + items_key='rows', + page_token=page_token, + max_results=max_results, + page_start=_rows_page_start_query, + extra_params=params) + iterator.query_result = self + # Over-ride the key used to retrieve the next page token. + iterator._NEXT_TOKEN = 'pageToken' + return iterator - total_rows = response.get('totalRows') - if total_rows is not None: - total_rows = int(total_rows) - page_token = response.get('pageToken') - rows_data = _rows_from_json(response.get('rows', ()), self.schema) - return rows_data, total_rows, page_token +def _rows_page_start_query(iterator, page, response): + """Update query response when :class:`~google.cloud.iterator.Page` starts. + + .. note:: + + This assumes that the ``query_response`` attribute has been + added to the iterator after being created, which + should be done by the caller. + + :type iterator: :class:`~google.cloud.iterator.Iterator` + :param iterator: The iterator that is currently in use. + + :type page: :class:`~google.cloud.iterator.Page` + :param page: The page that was just created. + + :type response: dict + :param response: The JSON API response for a page of rows in a table. + """ + iterator.query_result._set_properties(response) + iterator.schema = iterator.query_result.schema + _rows_page_start(iterator, page, response) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py index 92ebfebb2d6e..662cc670d541 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py @@ -32,7 +32,8 @@ from google.cloud.streaming.transfer import RESUMABLE_UPLOAD from google.cloud.streaming.transfer import Upload from google.cloud.bigquery.schema import SchemaField -from google.cloud.bigquery._helpers import _row_from_json +from google.cloud.bigquery._helpers import _item_to_row +from google.cloud.bigquery._helpers import _rows_page_start from google.cloud.bigquery._helpers import _SCALAR_VALUE_TO_JSON_ROW @@ -1076,47 +1077,6 @@ def _build_schema_resource(fields): return infos -def _item_to_row(iterator, resource): - """Convert a JSON row to the native object. - - .. note:: - - This assumes that the ``schema`` attribute has been - added to the iterator after being created, which - should be done by the caller. - - :type iterator: :class:`~google.cloud.iterator.Iterator` - :param iterator: The iterator that is currently in use. - - :type resource: dict - :param resource: An item to be converted to a row. - - :rtype: tuple - :returns: The next row in the page. - """ - return _row_from_json(resource, iterator.schema) - - -# pylint: disable=unused-argument -def _rows_page_start(iterator, page, response): - """Grab total rows after a :class:`~google.cloud.iterator.Page` started. - - :type iterator: :class:`~google.cloud.iterator.Iterator` - :param iterator: The iterator that is currently in use. - - :type page: :class:`~google.cloud.iterator.Page` - :param page: The page that was just created. - - :type response: dict - :param response: The JSON API response for a page of rows in a table. - """ - total_rows = response.get('totalRows') - if total_rows is not None: - total_rows = int(total_rows) - iterator.total_rows = total_rows -# pylint: enable=unused-argument - - class _UploadConfig(object): """Faux message FBO apitools' 'configure_request'.""" accept = ['*/*'] diff --git a/packages/google-cloud-bigquery/tests/system.py b/packages/google-cloud-bigquery/tests/system.py index 210951305b44..456953194a53 100644 --- a/packages/google-cloud-bigquery/tests/system.py +++ b/packages/google-cloud-bigquery/tests/system.py @@ -830,6 +830,23 @@ def test_dump_table_w_public_data(self): table.reload() self._fetch_single_page(table) + def test_large_query_w_public_data(self): + PUBLIC = 'bigquery-public-data' + DATASET_NAME = 'samples' + TABLE_NAME = 'natality' + LIMIT = 1000 + SQL = 'SELECT * from `{}.{}.{}` LIMIT {}'.format( + PUBLIC, DATASET_NAME, TABLE_NAME, LIMIT) + + dataset = Config.CLIENT.dataset(DATASET_NAME, project=PUBLIC) + query = Config.CLIENT.run_sync_query(SQL) + query.use_legacy_sql = False + query.run() + + iterator = query.fetch_data() + rows = list(iterator) + self.assertEqual(len(rows), LIMIT) + def test_insert_nested_nested(self): # See #2951 SF = bigquery.SchemaField diff --git a/packages/google-cloud-bigquery/tests/unit/test_query.py b/packages/google-cloud-bigquery/tests/unit/test_query.py index c2b3ce5496e1..d7977a4e7d0c 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_query.py +++ b/packages/google-cloud-bigquery/tests/unit/test_query.py @@ -654,6 +654,8 @@ def test_fetch_data_query_not_yet_run(self): self.assertRaises(ValueError, query.fetch_data) def test_fetch_data_w_bound_client(self): + import six + PATH = 'projects/%s/queries/%s' % (self.PROJECT, self.JOB_NAME) BEFORE = self._makeResource(complete=False) AFTER = self._makeResource(complete=True) @@ -665,7 +667,11 @@ def test_fetch_data_w_bound_client(self): query._set_properties(BEFORE) self.assertFalse(query.complete) - rows, total_rows, page_token = query.fetch_data() + iterator = query.fetch_data() + page = six.next(iterator.pages) + rows = list(page) + total_rows = iterator.total_rows + page_token = iterator.next_page_token self.assertTrue(query.complete) self.assertEqual(len(rows), 4) @@ -682,6 +688,8 @@ def test_fetch_data_w_bound_client(self): self.assertEqual(req['path'], '/%s' % PATH) def test_fetch_data_w_alternate_client(self): + import six + PATH = 'projects/%s/queries/%s' % (self.PROJECT, self.JOB_NAME) MAX = 10 TOKEN = 'TOKEN' @@ -698,9 +706,13 @@ def test_fetch_data_w_alternate_client(self): query._set_properties(BEFORE) self.assertFalse(query.complete) - rows, total_rows, page_token = query.fetch_data( + iterator = query.fetch_data( client=client2, max_results=MAX, page_token=TOKEN, start_index=START, timeout_ms=TIMEOUT) + page = six.next(iterator.pages) + rows = list(page) + total_rows = iterator.total_rows + page_token = iterator.next_page_token self.assertTrue(query.complete) self.assertEqual(len(rows), 4) From 56126d5fe6ef5d9789d7a7032c5b774feef75664 Mon Sep 17 00:00:00 2001 From: smasue Date: Fri, 23 Jun 2017 18:28:31 +0200 Subject: [PATCH 0121/2016] BigQuery _EnumProperty ValueError messages are not displayed properly (#3520) --- .../google-cloud-bigquery/google/cloud/bigquery/_helpers.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py b/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py index 7557111d100e..6641fbe01b42 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py @@ -318,7 +318,7 @@ def _validate(self, value): :raises: ValueError if value is not allowed. """ if value not in self.ALLOWED: - raise ValueError('Pass one of: %s' ', '.join(self.ALLOWED)) + raise ValueError('Pass one of: %s' % ', '.join(self.ALLOWED)) class UDFResource(object): From af0e45143189b1dbc82a4992eb59e521b04007eb Mon Sep 17 00:00:00 2001 From: Jon Wayne Parrott Date: Fri, 23 Jun 2017 15:08:10 -0700 Subject: [PATCH 0122/2016] Re-enable pylint in info-only mode for all packages (#3519) --- packages/google-cloud-bigquery/nox.py | 13 +++++++--- .../google-cloud-bigquery/pylint.config.py | 25 +++++++++++++++++++ 2 files changed, 35 insertions(+), 3 deletions(-) create mode 100644 packages/google-cloud-bigquery/pylint.config.py diff --git a/packages/google-cloud-bigquery/nox.py b/packages/google-cloud-bigquery/nox.py index 27bfb7f87ac4..a08e9fb307c2 100644 --- a/packages/google-cloud-bigquery/nox.py +++ b/packages/google-cloud-bigquery/nox.py @@ -66,15 +66,22 @@ def system_tests(session, python_version): @nox.session def lint(session): - """Run flake8. + """Run linters. - Returns a failure if flake8 finds linting errors or sufficiently + Returns a failure if the linters find linting errors or sufficiently serious code quality issues. """ session.interpreter = 'python3.6' - session.install('flake8', *LOCAL_DEPS) + session.install('flake8', 'pylint', 'gcp-devrel-py-tools', *LOCAL_DEPS) session.install('.') session.run('flake8', 'google/cloud/bigquery') + session.run( + 'gcp-devrel-py-tools', 'run-pylint', + '--config', 'pylint.config.py', + '--library-filesets', 'google', + '--test-filesets', 'tests', + # Temporarily allow this to fail. + success_codes=range(0, 100)) @nox.session diff --git a/packages/google-cloud-bigquery/pylint.config.py b/packages/google-cloud-bigquery/pylint.config.py new file mode 100644 index 000000000000..d8ca7b92e85e --- /dev/null +++ b/packages/google-cloud-bigquery/pylint.config.py @@ -0,0 +1,25 @@ +# Copyright 2017 Google Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""This module is used to configure gcp-devrel-py-tools run-pylint.""" + +# Library configuration + +# library_additions = {} +# library_replacements = {} + +# Test configuration + +# test_additions = copy.deepcopy(library_additions) +# test_replacements = copy.deepcopy(library_replacements) From d78519c9952dd2b35963c327159d5b3513a86744 Mon Sep 17 00:00:00 2001 From: Tres Seaver Date: Mon, 26 Jun 2017 18:37:36 -0400 Subject: [PATCH 0123/2016] Prep bigquery-0.25.0 release. (#3530) --- packages/google-cloud-bigquery/setup.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/packages/google-cloud-bigquery/setup.py b/packages/google-cloud-bigquery/setup.py index ffd62619efe3..4efe79c19e1a 100644 --- a/packages/google-cloud-bigquery/setup.py +++ b/packages/google-cloud-bigquery/setup.py @@ -51,12 +51,12 @@ REQUIREMENTS = [ - 'google-cloud-core >= 0.24.0, < 0.25dev', + 'google-cloud-core >= 0.25.0, < 0.26dev', ] setup( name='google-cloud-bigquery', - version='0.24.0', + version='0.25.0', description='Python Client for Google BigQuery', long_description=README, namespace_packages=[ From aabe7bf39f6dd4f9dde451935279a2d040aed13d Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Tue, 27 Jun 2017 10:30:57 -0700 Subject: [PATCH 0124/2016] BQ: cleanup flake8 errors in tests (#3551) (google-cloud-python-2) $ flake8 tests tests/system.py:178:32: F812 list comprehension redefines 'dataset' from line 170 tests/system.py:233:30: F812 list comprehension redefines 'table' from line 225 tests/system.py:841:9: F841 local variable 'dataset' is assigned to but never used tests/unit/test_table.py:406:23: W291 trailing whitespace tests/unit/test_table.py:1052:9: F841 local variable 'ROWS' is assigned to but never used --- packages/google-cloud-bigquery/nox.py | 1 + packages/google-cloud-bigquery/tests/system.py | 13 ++++++------- .../google-cloud-bigquery/tests/unit/test_table.py | 8 +------- 3 files changed, 8 insertions(+), 14 deletions(-) diff --git a/packages/google-cloud-bigquery/nox.py b/packages/google-cloud-bigquery/nox.py index a08e9fb307c2..a0211fba9b3d 100644 --- a/packages/google-cloud-bigquery/nox.py +++ b/packages/google-cloud-bigquery/nox.py @@ -75,6 +75,7 @@ def lint(session): session.install('flake8', 'pylint', 'gcp-devrel-py-tools', *LOCAL_DEPS) session.install('.') session.run('flake8', 'google/cloud/bigquery') + session.run('flake8', 'tests') session.run( 'gcp-devrel-py-tools', 'run-pylint', '--config', 'pylint.config.py', diff --git a/packages/google-cloud-bigquery/tests/system.py b/packages/google-cloud-bigquery/tests/system.py index 456953194a53..cfd2c4856c80 100644 --- a/packages/google-cloud-bigquery/tests/system.py +++ b/packages/google-cloud-bigquery/tests/system.py @@ -167,9 +167,9 @@ def test_list_datasets(self): 'newest' + unique_resource_id(), ] for dataset_name in datasets_to_create: - dataset = Config.CLIENT.dataset(dataset_name) - retry_403(dataset.create)() - self.to_delete.append(dataset) + created_dataset = Config.CLIENT.dataset(dataset_name) + retry_403(created_dataset.create)() + self.to_delete.append(created_dataset) # Retrieve the datasets. iterator = Config.CLIENT.list_datasets() @@ -222,9 +222,9 @@ def test_list_tables(self): mode='REQUIRED') age = bigquery.SchemaField('age', 'INTEGER', mode='REQUIRED') for table_name in tables_to_create: - table = dataset.table(table_name, schema=[full_name, age]) - table.create() - self.to_delete.insert(0, table) + created_table = dataset.table(table_name, schema=[full_name, age]) + created_table.create() + self.to_delete.insert(0, created_table) # Retrieve the tables. iterator = dataset.list_tables() @@ -838,7 +838,6 @@ def test_large_query_w_public_data(self): SQL = 'SELECT * from `{}.{}.{}` LIMIT {}'.format( PUBLIC, DATASET_NAME, TABLE_NAME, LIMIT) - dataset = Config.CLIENT.dataset(DATASET_NAME, project=PUBLIC) query = Config.CLIENT.run_sync_query(SQL) query.use_legacy_sql = False query.run() diff --git a/packages/google-cloud-bigquery/tests/unit/test_table.py b/packages/google-cloud-bigquery/tests/unit/test_table.py index c940706c6b86..5a3c70112564 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_table.py +++ b/packages/google-cloud-bigquery/tests/unit/test_table.py @@ -403,7 +403,7 @@ def test_create_new_day_partitioned_table(self): dataset = _Dataset(client) table = self._make_one(self.TABLE_NAME, dataset) table.partitioning_type = 'DAY' - table.create() + table.create() self.assertEqual(len(conn._requested), 1) req = conn._requested[0] @@ -1049,12 +1049,6 @@ def test_fetch_data_wo_schema(self): client = _Client(project=self.PROJECT) dataset = _Dataset(client) table = self._make_one(self.TABLE_NAME, dataset=dataset) - ROWS = [ - ('Phred Phlyntstone', 32), - ('Bharney Rhubble', 33), - ('Wylma Phlyntstone', 29), - ('Bhettye Rhubble', 27), - ] with self.assertRaises(ValueError) as exc: table.fetch_data() From bba027bd0f386718f2002d99093c6ba158af24aa Mon Sep 17 00:00:00 2001 From: Jon Wayne Parrott Date: Tue, 27 Jun 2017 10:32:30 -0700 Subject: [PATCH 0125/2016] Fix inclusion of tests in manifest.in (#3552) --- packages/google-cloud-bigquery/MANIFEST.in | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/MANIFEST.in b/packages/google-cloud-bigquery/MANIFEST.in index 24aa72fb370b..1fbc0d0b321e 100644 --- a/packages/google-cloud-bigquery/MANIFEST.in +++ b/packages/google-cloud-bigquery/MANIFEST.in @@ -1,3 +1,3 @@ include README.rst LICENSE -recursive-include unit_tests * +recursive-include tests * global-exclude *.pyc __pycache__ From 12bb565e1a3cb0ccf5490ada573662d838527efa Mon Sep 17 00:00:00 2001 From: Danny Hermes Date: Wed, 28 Jun 2017 14:07:25 -0700 Subject: [PATCH 0126/2016] Making all LICENSE headers "uniform". (#3563) --- packages/google-cloud-bigquery/pylint.config.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/pylint.config.py b/packages/google-cloud-bigquery/pylint.config.py index d8ca7b92e85e..b618319b8b61 100644 --- a/packages/google-cloud-bigquery/pylint.config.py +++ b/packages/google-cloud-bigquery/pylint.config.py @@ -4,7 +4,7 @@ # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # -# http://www.apache.org/licenses/LICENSE-2.0 +# http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, From 3d6351e12717e9b5f254bd5feadf7f7fbef777a6 Mon Sep 17 00:00:00 2001 From: Danny Hermes Date: Thu, 29 Jun 2017 10:56:09 -0700 Subject: [PATCH 0127/2016] Skipping system tests when credentials env. var is unset. (#3475) --- packages/google-cloud-bigquery/nox.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/nox.py b/packages/google-cloud-bigquery/nox.py index a0211fba9b3d..9851f53d188d 100644 --- a/packages/google-cloud-bigquery/nox.py +++ b/packages/google-cloud-bigquery/nox.py @@ -49,7 +49,7 @@ def system_tests(session, python_version): # Sanity check: Only run system tests if the environment variable is set. if not os.environ.get('GOOGLE_APPLICATION_CREDENTIALS', ''): - return + session.skip('Credentials must be set via environment variable.') # Run the system tests against latest Python 2 and Python 3 only. session.interpreter = 'python{}'.format(python_version) From e7900ea726ada581ac671de311993a6bf9cdfe77 Mon Sep 17 00:00:00 2001 From: Tres Seaver Date: Wed, 5 Jul 2017 14:02:42 -0400 Subject: [PATCH 0128/2016] Update 'QueryResponse.fetch_data' docstring to match changes from #3484. (#3580) Closes #3576. --- .../google/cloud/bigquery/query.py | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/query.py b/packages/google-cloud-bigquery/google/cloud/bigquery/query.py index 6db2742bbe01..d596deadfb40 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/query.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/query.py @@ -414,13 +414,12 @@ def fetch_data(self, max_results=None, page_token=None, start_index=None, :param client: the client to use. If not passed, falls back to the ``client`` stored on the current dataset. - :rtype: tuple - :returns: ``(row_data, total_rows, page_token)``, where ``row_data`` - is a list of tuples, one per result row, containing only - the values; ``total_rows`` is a count of the total number - of rows in the table; and ``page_token`` is an opaque - string which can be used to fetch the next batch of rows - (``None`` if no further batches can be fetched). + :rtype: :class:`~google.cloud.iterator.Iterator` + :returns: Iterator of row data :class:`tuple`s. During each page, the + iterator will have the ``total_rows`` attribute set, + which counts the total number of rows **in the result + set** (this is distinct from the total number of rows in + the current page: ``iterator.page.num_items``). :raises: ValueError if the query has not yet been executed. """ if self.name is None: From 10f22d0599b025103b672a91a2f2d755b0db7daa Mon Sep 17 00:00:00 2001 From: Tres Seaver Date: Thu, 6 Jul 2017 16:41:31 -0400 Subject: [PATCH 0129/2016] Shorten nox virtualenv names to avoid hashing. (#3585) --- packages/google-cloud-bigquery/nox.py | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/packages/google-cloud-bigquery/nox.py b/packages/google-cloud-bigquery/nox.py index 9851f53d188d..19a8f5761701 100644 --- a/packages/google-cloud-bigquery/nox.py +++ b/packages/google-cloud-bigquery/nox.py @@ -30,6 +30,9 @@ def unit_tests(session, python_version): # Run unit tests against all supported versions of Python. session.interpreter = 'python{}'.format(python_version) + # Set the virtualenv dirname. + session.virtualenv_dirname = 'unit-' + python_version + # Install all test dependencies, then install this package in-place. session.install('mock', 'pytest', 'pytest-cov', *LOCAL_DEPS) session.install('-e', '.') @@ -54,6 +57,9 @@ def system_tests(session, python_version): # Run the system tests against latest Python 2 and Python 3 only. session.interpreter = 'python{}'.format(python_version) + # Set the virtualenv dirname. + session.virtualenv_dirname = 'sys-' + python_version + # Install all test dependencies, then install this package into the # virutalenv's dist-packages. session.install('mock', 'pytest', *LOCAL_DEPS) @@ -72,6 +78,7 @@ def lint(session): serious code quality issues. """ session.interpreter = 'python3.6' + session.install('flake8', 'pylint', 'gcp-devrel-py-tools', *LOCAL_DEPS) session.install('.') session.run('flake8', 'google/cloud/bigquery') @@ -89,6 +96,10 @@ def lint(session): def lint_setup_py(session): """Verify that setup.py is valid (including RST check).""" session.interpreter = 'python3.6' + + # Set the virtualenv dirname. + session.virtualenv_dirname = 'setup' + session.install('docutils', 'Pygments') session.run( 'python', 'setup.py', 'check', '--restructuredtext', '--strict') @@ -102,6 +113,7 @@ def cover(session): test runs (not system test runs), and then erases coverage data. """ session.interpreter = 'python3.6' + session.install('coverage', 'pytest-cov') session.run('coverage', 'report', '--show-missing', '--fail-under=100') session.run('coverage', 'erase') From a737368d9c5fea82f0b73e4f96394e271e167bb5 Mon Sep 17 00:00:00 2001 From: Rich Kadel Date: Thu, 6 Jul 2017 14:22:59 -0700 Subject: [PATCH 0130/2016] Add support for creating a view with 'useLegacySql = False' (#3514) --- .../google/cloud/bigquery/table.py | 44 +++++++++++++++++-- .../tests/unit/test_table.py | 24 +++++++++- 2 files changed, 63 insertions(+), 5 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py index 662cc670d541..37dc1159cc8e 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py @@ -193,7 +193,7 @@ def table_id(self): def table_type(self): """The type of the table. - Possible values are "TABLE" or "VIEW". + Possible values are "TABLE", "VIEW", or "EXTERNAL". :rtype: str, or ``NoneType`` :returns: the URL (None until set from the server). @@ -364,13 +364,49 @@ def view_query(self, value): """ if not isinstance(value, six.string_types): raise ValueError("Pass a string") - self._properties['view'] = {'query': value} + if self._properties.get('view') is None: + self._properties['view'] = {} + self._properties['view']['query'] = value @view_query.deleter def view_query(self): """Delete SQL query defining the table as a view.""" self._properties.pop('view', None) + @property + def view_use_legacy_sql(self): + """Specifies whether to execute the view with legacy or standard SQL. + + If not set, None is returned. BigQuery's default mode is equivalent to + useLegacySql = True. + + :rtype: bool, or ``NoneType`` + :returns: The boolean for view.useLegacySql as set by the user, or + None (the default). + """ + view = self._properties.get('view') + if view is not None: + return view.get('useLegacySql') + + @view_use_legacy_sql.setter + def view_use_legacy_sql(self, value): + """Update the view sub-property 'useLegacySql'. + + This boolean specifies whether to execute the view with legacy SQL + (True) or standard SQL (False). The default, if not specified, is + 'True'. + + :type value: bool + :param value: The boolean for view.useLegacySql + + :raises: ValueError for invalid value types. + """ + if not isinstance(value, bool): + raise ValueError("Pass a boolean") + if self._properties.get('view') is None: + self._properties['view'] = {} + self._properties['view']['useLegacySql'] = value + def list_partitions(self, client=None): """List the partitions in a table. @@ -470,6 +506,8 @@ def _build_resource(self): if self.view_query is not None: view = resource['view'] = {} view['query'] = self.view_query + if self.view_use_legacy_sql is not None: + view['useLegacySql'] = self.view_use_legacy_sql if self._schema: resource['schema'] = { @@ -479,7 +517,7 @@ def _build_resource(self): return resource def create(self, client=None): - """API call: create the dataset via a PUT request + """API call: create the table via a PUT request See https://cloud.google.com/bigquery/docs/reference/rest/v2/tables/insert diff --git a/packages/google-cloud-bigquery/tests/unit/test_table.py b/packages/google-cloud-bigquery/tests/unit/test_table.py index 5a3c70112564..0e987462da22 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_table.py +++ b/packages/google-cloud-bigquery/tests/unit/test_table.py @@ -124,8 +124,10 @@ def _verifyResourceProperties(self, table, resource): if 'view' in resource: self.assertEqual(table.view_query, resource['view']['query']) + self.assertEqual(table.view_use_legacy_sql, resource['view'].get('useLegacySql')) else: self.assertIsNone(table.view_query) + self.assertIsNone(table.view_use_legacy_sql) if 'schema' in resource: self._verifySchema(table.schema, resource) @@ -160,6 +162,7 @@ def test_ctor(self): self.assertIsNone(table.friendly_name) self.assertIsNone(table.location) self.assertIsNone(table.view_query) + self.assertIsNone(table.view_use_legacy_sql) def test_ctor_w_schema(self): from google.cloud.bigquery.table import SchemaField @@ -358,6 +361,22 @@ def test_view_query_deleter(self): del table.view_query self.assertIsNone(table.view_query) + def test_view_use_legacy_sql_setter_bad_value(self): + client = _Client(self.PROJECT) + dataset = _Dataset(client) + table = self._make_one(self.TABLE_NAME, dataset) + with self.assertRaises(ValueError): + table.view_use_legacy_sql = 12345 + + def test_view_use_legacy_sql_setter(self): + client = _Client(self.PROJECT) + dataset = _Dataset(client) + table = self._make_one(self.TABLE_NAME, dataset) + table.view_use_legacy_sql = False + table.view_query = 'select * from foo' + self.assertEqual(table.view_use_legacy_sql, False) + self.assertEqual(table.view_query, 'select * from foo') + def test_from_api_repr_missing_identity(self): self._setUpConstants() client = _Client(self.PROJECT) @@ -978,7 +997,7 @@ def test_update_w_alternate_client(self): self.EXP_TIME = datetime.datetime(2015, 8, 1, 23, 59, 59, tzinfo=UTC) RESOURCE['expirationTime'] = _millis(self.EXP_TIME) - RESOURCE['view'] = {'query': QUERY} + RESOURCE['view'] = {'query': QUERY, 'useLegacySql': True} RESOURCE['type'] = 'VIEW' conn1 = _Connection() client1 = _Client(project=self.PROJECT, connection=conn1) @@ -990,6 +1009,7 @@ def test_update_w_alternate_client(self): table.location = LOCATION table.expires = self.EXP_TIME table.view_query = QUERY + table.view_use_legacy_sql = True table.update(client=client2) @@ -1005,7 +1025,7 @@ def test_update_w_alternate_client(self): 'tableId': self.TABLE_NAME}, 'expirationTime': _millis(self.EXP_TIME), 'location': 'EU', - 'view': {'query': QUERY}, + 'view': {'query': QUERY, 'useLegacySql': True}, } self.assertEqual(req['data'], SENT) self._verifyResourceProperties(table, RESOURCE) From 7c5bf7e9aa70abdbfb0d11f4eb94f02b41d9be71 Mon Sep 17 00:00:00 2001 From: Danny Hermes Date: Mon, 10 Jul 2017 10:31:26 -0700 Subject: [PATCH 0131/2016] Fixing "long line" lint violation in BigQuery unit tests. (#3596) --- packages/google-cloud-bigquery/tests/unit/test_table.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/tests/unit/test_table.py b/packages/google-cloud-bigquery/tests/unit/test_table.py index 0e987462da22..b27736fb896e 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_table.py +++ b/packages/google-cloud-bigquery/tests/unit/test_table.py @@ -124,7 +124,9 @@ def _verifyResourceProperties(self, table, resource): if 'view' in resource: self.assertEqual(table.view_query, resource['view']['query']) - self.assertEqual(table.view_use_legacy_sql, resource['view'].get('useLegacySql')) + self.assertEqual( + table.view_use_legacy_sql, + resource['view'].get('useLegacySql')) else: self.assertIsNone(table.view_query) self.assertIsNone(table.view_use_legacy_sql) From 9a2003fcfeca86c400bb7c0db22986b110d4d748 Mon Sep 17 00:00:00 2001 From: Danny Hermes Date: Tue, 11 Jul 2017 10:51:40 -0700 Subject: [PATCH 0132/2016] Updating author_email in all setup.py. (#3598) Done via: $ git grep -l author_email | \ > xargs sed -i s/jjg+google-cloud-python@google.com/googleapis-publisher@google.com/g and manually editing `videointelligence/setup.py` and `vision/setup.py`. --- packages/google-cloud-bigquery/setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/setup.py b/packages/google-cloud-bigquery/setup.py index 4efe79c19e1a..6d61064c88ba 100644 --- a/packages/google-cloud-bigquery/setup.py +++ b/packages/google-cloud-bigquery/setup.py @@ -27,7 +27,7 @@ # consolidate. SETUP_BASE = { 'author': 'Google Cloud Platform', - 'author_email': 'jjg+google-cloud-python@google.com', + 'author_email': 'googleapis-publisher@google.com', 'scripts': [], 'url': 'https://github.com/GoogleCloudPlatform/google-cloud-python', 'license': 'Apache 2.0', From 9c25eb01573d9c037d3418d698e31be3db3b0898 Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Wed, 12 Jul 2017 10:04:48 -0700 Subject: [PATCH 0133/2016] Implementation of DB-API for BigQuery. (#2921) The `google.cloud.bigquery.dbapi` package covers all of the required implementation details in the PEP-249 DB-API specification. --- .../google/cloud/bigquery/dbapi/__init__.py | 70 ++++ .../google/cloud/bigquery/dbapi/_helpers.py | 129 +++++++ .../google/cloud/bigquery/dbapi/connection.py | 58 ++++ .../google/cloud/bigquery/dbapi/cursor.py | 327 ++++++++++++++++++ .../google/cloud/bigquery/dbapi/exceptions.py | 58 ++++ .../google/cloud/bigquery/dbapi/types.py | 84 +++++ .../google-cloud-bigquery/tests/system.py | 209 ++++++++++- .../tests/unit/test_dbapi__helpers.py | 134 +++++++ .../tests/unit/test_dbapi_connection.py | 73 ++++ .../tests/unit/test_dbapi_cursor.py | 269 ++++++++++++++ .../tests/unit/test_dbapi_types.py | 40 +++ 11 files changed, 1432 insertions(+), 19 deletions(-) create mode 100644 packages/google-cloud-bigquery/google/cloud/bigquery/dbapi/__init__.py create mode 100644 packages/google-cloud-bigquery/google/cloud/bigquery/dbapi/_helpers.py create mode 100644 packages/google-cloud-bigquery/google/cloud/bigquery/dbapi/connection.py create mode 100644 packages/google-cloud-bigquery/google/cloud/bigquery/dbapi/cursor.py create mode 100644 packages/google-cloud-bigquery/google/cloud/bigquery/dbapi/exceptions.py create mode 100644 packages/google-cloud-bigquery/google/cloud/bigquery/dbapi/types.py create mode 100644 packages/google-cloud-bigquery/tests/unit/test_dbapi__helpers.py create mode 100644 packages/google-cloud-bigquery/tests/unit/test_dbapi_connection.py create mode 100644 packages/google-cloud-bigquery/tests/unit/test_dbapi_cursor.py create mode 100644 packages/google-cloud-bigquery/tests/unit/test_dbapi_types.py diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/dbapi/__init__.py b/packages/google-cloud-bigquery/google/cloud/bigquery/dbapi/__init__.py new file mode 100644 index 000000000000..4e9c9a810da4 --- /dev/null +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/dbapi/__init__.py @@ -0,0 +1,70 @@ +# Copyright 2017 Google Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Google BigQuery implementation of the Database API Specification v2.0. + +This module implements the `Python Database API Specification v2.0 (DB-API)`_ +for Google BigQuery. + +.. _Python Database API Specification v2.0 (DB-API): + https://www.python.org/dev/peps/pep-0249/ + +.. warning:: + The ``dbapi`` module is **alpha**. The implementation is not complete. It + might be changed in backward-incompatible ways and is not subject to any SLA + or deprecation policy. +""" + +from google.cloud.bigquery.dbapi.connection import connect +from google.cloud.bigquery.dbapi.connection import Connection +from google.cloud.bigquery.dbapi.cursor import Cursor +from google.cloud.bigquery.dbapi.exceptions import Warning +from google.cloud.bigquery.dbapi.exceptions import Error +from google.cloud.bigquery.dbapi.exceptions import InterfaceError +from google.cloud.bigquery.dbapi.exceptions import DatabaseError +from google.cloud.bigquery.dbapi.exceptions import DataError +from google.cloud.bigquery.dbapi.exceptions import OperationalError +from google.cloud.bigquery.dbapi.exceptions import IntegrityError +from google.cloud.bigquery.dbapi.exceptions import InternalError +from google.cloud.bigquery.dbapi.exceptions import ProgrammingError +from google.cloud.bigquery.dbapi.exceptions import NotSupportedError +from google.cloud.bigquery.dbapi.types import Binary +from google.cloud.bigquery.dbapi.types import Date +from google.cloud.bigquery.dbapi.types import DateFromTicks +from google.cloud.bigquery.dbapi.types import Time +from google.cloud.bigquery.dbapi.types import TimeFromTicks +from google.cloud.bigquery.dbapi.types import Timestamp +from google.cloud.bigquery.dbapi.types import TimestampFromTicks +from google.cloud.bigquery.dbapi.types import BINARY +from google.cloud.bigquery.dbapi.types import DATETIME +from google.cloud.bigquery.dbapi.types import NUMBER +from google.cloud.bigquery.dbapi.types import ROWID +from google.cloud.bigquery.dbapi.types import STRING + + +apilevel = '2.0' + +# Threads may share the module, but not connections. +threadsafety = 1 + +paramstyle = 'pyformat' + +__all__ = [ + 'apilevel', 'threadsafety', 'paramstyle', 'connect', 'Connection', + 'Cursor', 'Warning', 'Error', 'InterfaceError', 'DatabaseError', + 'DataError', 'OperationalError', 'IntegrityError', 'InternalError', + 'ProgrammingError', 'NotSupportedError', 'Binary', 'Date', 'DateFromTicks', + 'Time', 'TimeFromTicks', 'Timestamp', 'TimestampFromTicks', 'BINARY', + 'DATETIME', 'NUMBER', 'ROWID', 'STRING', +] diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/dbapi/_helpers.py b/packages/google-cloud-bigquery/google/cloud/bigquery/dbapi/_helpers.py new file mode 100644 index 000000000000..1a9a02fd7cc7 --- /dev/null +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/dbapi/_helpers.py @@ -0,0 +1,129 @@ +# Copyright 2017 Google Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import collections +import datetime +import numbers +import time + +import six + +from google.cloud import bigquery +from google.cloud.bigquery.dbapi import exceptions + + +def wait_for_job(job): + """Waits for a job to complete by polling until the state is `DONE`. + + Sleeps 1 second between calls to the BigQuery API. + + :type job: :class:`~google.cloud.bigquery.job._AsyncJob` + :param job: Wait for this job to finish. + + :raises: :class:`~google.cloud.bigquery.dbapi.exceptions.DatabaseError` + if the job fails. + """ + while True: + job.reload() + if job.state == 'DONE': + if job.error_result: + raise exceptions.DatabaseError(job.errors) + return + time.sleep(1) + + +def scalar_to_query_parameter(value, name=None): + """Convert a scalar value into a query parameter. + + :type value: any + :param value: A scalar value to convert into a query parameter. + + :type name: str + :param name: (Optional) Name of the query parameter. + + :rtype: :class:`~google.cloud.bigquery.ScalarQueryParameter` + :returns: + A query parameter corresponding with the type and value of the plain + Python object. + :raises: :class:`~google.cloud.bigquery.dbapi.exceptions.ProgrammingError` + if the type cannot be determined. + """ + parameter_type = None + + if isinstance(value, bool): + parameter_type = 'BOOL' + elif isinstance(value, numbers.Integral): + parameter_type = 'INT64' + elif isinstance(value, numbers.Real): + parameter_type = 'FLOAT64' + elif isinstance(value, six.text_type): + parameter_type = 'STRING' + elif isinstance(value, six.binary_type): + parameter_type = 'BYTES' + elif isinstance(value, datetime.datetime): + parameter_type = 'DATETIME' if value.tzinfo is None else 'TIMESTAMP' + elif isinstance(value, datetime.date): + parameter_type = 'DATE' + elif isinstance(value, datetime.time): + parameter_type = 'TIME' + else: + raise exceptions.ProgrammingError( + 'encountered parameter {} with value {} of unexpected type'.format( + name, value)) + return bigquery.ScalarQueryParameter(name, parameter_type, value) + + +def to_query_parameters_list(parameters): + """Converts a sequence of parameter values into query parameters. + + :type parameters: Sequence[Any] + :param parameters: Sequence of query parameter values. + + :rtype: List[google.cloud.bigquery._helpers.AbstractQueryParameter] + :returns: A list of query parameters. + """ + return [scalar_to_query_parameter(value) for value in parameters] + + +def to_query_parameters_dict(parameters): + """Converts a dictionary of parameter values into query parameters. + + :type parameters: Mapping[str, Any] + :param parameters: Dictionary of query parameter values. + + :rtype: List[google.cloud.bigquery._helpers.AbstractQueryParameter] + :returns: A list of named query parameters. + """ + return [ + scalar_to_query_parameter(value, name=name) + for name, value + in six.iteritems(parameters)] + + +def to_query_parameters(parameters): + """Converts DB-API parameter values into query parameters. + + :type parameters: Mapping[str, Any] or Sequence[Any] + :param parameters: A dictionary or sequence of query parameter values. + + :rtype: List[google.cloud.bigquery._helpers.AbstractQueryParameter] + :returns: A list of query parameters. + """ + if parameters is None: + return [] + + if isinstance(parameters, collections.Mapping): + return to_query_parameters_dict(parameters) + + return to_query_parameters_list(parameters) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/dbapi/connection.py b/packages/google-cloud-bigquery/google/cloud/bigquery/dbapi/connection.py new file mode 100644 index 000000000000..66aa0929b97e --- /dev/null +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/dbapi/connection.py @@ -0,0 +1,58 @@ +# Copyright 2017 Google Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Connection for the Google BigQuery DB-API.""" + +from google.cloud import bigquery +from google.cloud.bigquery.dbapi import cursor + + +class Connection(object): + """DB-API Connection to Google BigQuery. + + :type client: :class:`~google.cloud.bigquery.Client` + :param client: A client used to connect to BigQuery. + """ + def __init__(self, client): + self._client = client + + def close(self): + """No-op.""" + + def commit(self): + """No-op.""" + + def cursor(self): + """Return a new cursor object. + + :rtype: :class:`~google.cloud.bigquery.dbapi.Cursor` + :returns: A DB-API cursor that uses this connection. + """ + return cursor.Cursor(self) + + +def connect(client=None): + """Construct a DB-API connection to Google BigQuery. + + :type client: :class:`~google.cloud.bigquery.Client` + :param client: + (Optional) A client used to connect to BigQuery. If not passed, a + client is created using default options inferred from the environment. + + :rtype: :class:`~google.cloud.bigquery.dbapi.Connection` + :returns: A new DB-API connection to BigQuery. + """ + if client is None: + client = bigquery.Client() + return Connection(client) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/dbapi/cursor.py b/packages/google-cloud-bigquery/google/cloud/bigquery/dbapi/cursor.py new file mode 100644 index 000000000000..4398eec20b88 --- /dev/null +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/dbapi/cursor.py @@ -0,0 +1,327 @@ +# Copyright 2017 Google Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Cursor for the Google BigQuery DB-API.""" + +import collections +import uuid + +import six + +from google.cloud.bigquery.dbapi import _helpers +from google.cloud.bigquery.dbapi import exceptions + + +# Per PEP 249: A 7-item sequence containing information describing one result +# column. The first two items (name and type_code) are mandatory, the other +# five are optional and are set to None if no meaningful values can be +# provided. +Column = collections.namedtuple( + 'Column', + [ + 'name', 'type_code', 'display_size', 'internal_size', 'precision', + 'scale', 'null_ok', + ]) + + +class Cursor(object): + """DB-API Cursor to Google BigQuery. + + :type connection: :class:`~google.cloud.bigquery.dbapi.Connection` + :param connection: A DB-API connection to Google BigQuery. + """ + def __init__(self, connection): + self.connection = connection + self.description = None + # Per PEP 249: The attribute is -1 in case no .execute*() has been + # performed on the cursor or the rowcount of the last operation + # cannot be determined by the interface. + self.rowcount = -1 + # Per PEP 249: The arraysize attribute defaults to 1, meaning to fetch + # a single row at a time. + self.arraysize = 1 + self._query_data = None + self._page_token = None + self._has_fetched_all_rows = True + + def close(self): + """No-op.""" + + def _set_description(self, schema): + """Set description from schema. + + :type schema: Sequence[google.cloud.bigquery.schema.SchemaField] + :param schema: A description of fields in the schema. + """ + if schema is None: + self.description = None + return + + self.description = tuple([ + Column( + name=field.name, + type_code=field.field_type, + display_size=None, + internal_size=None, + precision=None, + scale=None, + null_ok=field.mode == 'NULLABLE') + for field in schema]) + + def _set_rowcount(self, query_results): + """Set the rowcount from query results. + + Normally, this sets rowcount to the number of rows returned by the + query, but if it was a DML statement, it sets rowcount to the number + of modified rows. + + :type query_results: + :class:`~google.cloud.bigquery.query.QueryResults` + :param query_results: results of a query + """ + total_rows = 0 + num_dml_affected_rows = query_results.num_dml_affected_rows + + if (query_results.total_rows is not None + and query_results.total_rows > 0): + total_rows = query_results.total_rows + if num_dml_affected_rows is not None and num_dml_affected_rows > 0: + total_rows = num_dml_affected_rows + self.rowcount = total_rows + + def execute(self, operation, parameters=None): + """Prepare and execute a database operation. + + .. note:: + When setting query parameters, values which are "text" + (``unicode`` in Python2, ``str`` in Python3) will use + the 'STRING' BigQuery type. Values which are "bytes" (``str`` in + Python2, ``bytes`` in Python3), will use using the 'BYTES' type. + + A `~datetime.datetime` parameter without timezone information uses + the 'DATETIME' BigQuery type (example: Global Pi Day Celebration + March 14, 2017 at 1:59pm). A `~datetime.datetime` parameter with + timezone information uses the 'TIMESTAMP' BigQuery type (example: + a wedding on April 29, 2011 at 11am, British Summer Time). + + For more information about BigQuery data types, see: + https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types + + ``STRUCT``/``RECORD`` and ``REPEATED`` query parameters are not + yet supported. See: + https://github.com/GoogleCloudPlatform/google-cloud-python/issues/3524 + + :type operation: str + :param operation: A Google BigQuery query string. + + :type parameters: Mapping[str, Any] or Sequence[Any] + :param parameters: + (Optional) dictionary or sequence of parameter values. + """ + self._query_results = None + self._page_token = None + self._has_fetched_all_rows = False + client = self.connection._client + job_id = str(uuid.uuid4()) + + # The DB-API uses the pyformat formatting, since the way BigQuery does + # query parameters was not one of the standard options. Convert both + # the query and the parameters to the format expected by the client + # libraries. + formatted_operation = _format_operation( + operation, parameters=parameters) + query_parameters = _helpers.to_query_parameters(parameters) + + query_job = client.run_async_query( + job_id, + formatted_operation, + query_parameters=query_parameters) + query_job.use_legacy_sql = False + query_job.begin() + _helpers.wait_for_job(query_job) + query_results = query_job.results() + + # Force the iterator to run because the query_results doesn't + # have the total_rows populated. See: + # https://github.com/GoogleCloudPlatform/google-cloud-python/issues/3506 + query_iterator = query_results.fetch_data() + try: + six.next(iter(query_iterator)) + except StopIteration: + pass + + self._query_data = iter( + query_results.fetch_data(max_results=self.arraysize)) + self._set_rowcount(query_results) + self._set_description(query_results.schema) + + def executemany(self, operation, seq_of_parameters): + """Prepare and execute a database operation multiple times. + + :type operation: str + :param operation: A Google BigQuery query string. + + :type seq_of_parameters: Sequence[Mapping[str, Any] or Sequence[Any]] + :param parameters: Sequence of many sets of parameter values. + """ + for parameters in seq_of_parameters: + self.execute(operation, parameters) + + def fetchone(self): + """Fetch a single row from the results of the last ``execute*()`` call. + + :rtype: tuple + :returns: + A tuple representing a row or ``None`` if no more data is + available. + :raises: :class:`~google.cloud.bigquery.dbapi.InterfaceError` + if called before ``execute()``. + """ + if self._query_data is None: + raise exceptions.InterfaceError( + 'No query results: execute() must be called before fetch.') + + try: + return six.next(self._query_data) + except StopIteration: + return None + + def fetchmany(self, size=None): + """Fetch multiple results from the last ``execute*()`` call. + + .. note:: + The size parameter is not used for the request/response size. + Set the ``arraysize`` attribute before calling ``execute()`` to + set the batch size. + + :type size: int + :param size: + (Optional) Maximum number of rows to return. Defaults to the + ``arraysize`` property value. + + :rtype: List[tuple] + :returns: A list of rows. + :raises: :class:`~google.cloud.bigquery.dbapi.InterfaceError` + if called before ``execute()``. + """ + if self._query_data is None: + raise exceptions.InterfaceError( + 'No query results: execute() must be called before fetch.') + if size is None: + size = self.arraysize + + rows = [] + for row in self._query_data: + rows.append(row) + if len(rows) >= size: + break + return rows + + def fetchall(self): + """Fetch all remaining results from the last ``execute*()`` call. + + :rtype: List[tuple] + :returns: A list of all the rows in the results. + :raises: :class:`~google.cloud.bigquery.dbapi.InterfaceError` + if called before ``execute()``. + """ + if self._query_data is None: + raise exceptions.InterfaceError( + 'No query results: execute() must be called before fetch.') + return [row for row in self._query_data] + + def setinputsizes(self, sizes): + """No-op.""" + + def setoutputsize(self, size, column=None): + """No-op.""" + + +def _format_operation_list(operation, parameters): + """Formats parameters in operation in the way BigQuery expects. + + The input operation will be a query like ``SELECT %s`` and the output + will be a query like ``SELECT ?``. + + :type operation: str + :param operation: A Google BigQuery query string. + + :type parameters: Sequence[Any] + :param parameters: Sequence of parameter values. + + :rtype: str + :returns: A formatted query string. + :raises: :class:`~google.cloud.bigquery.dbapi.ProgrammingError` + if a parameter used in the operation is not found in the + ``parameters`` argument. + """ + formatted_params = ['?' for _ in parameters] + + try: + return operation % tuple(formatted_params) + except TypeError as exc: + raise exceptions.ProgrammingError(exc) + + +def _format_operation_dict(operation, parameters): + """Formats parameters in operation in the way BigQuery expects. + + The input operation will be a query like ``SELECT %(namedparam)s`` and + the output will be a query like ``SELECT @namedparam``. + + :type operation: str + :param operation: A Google BigQuery query string. + + :type parameters: Mapping[str, Any] + :param parameters: Dictionary of parameter values. + + :rtype: str + :returns: A formatted query string. + :raises: :class:`~google.cloud.bigquery.dbapi.ProgrammingError` + if a parameter used in the operation is not found in the + ``parameters`` argument. + """ + formatted_params = {} + for name in parameters: + escaped_name = name.replace('`', r'\`') + formatted_params[name] = '@`{}`'.format(escaped_name) + + try: + return operation % formatted_params + except KeyError as exc: + raise exceptions.ProgrammingError(exc) + + +def _format_operation(operation, parameters=None): + """Formats parameters in operation in way BigQuery expects. + + :type: str + :param operation: A Google BigQuery query string. + + :type: Mapping[str, Any] or Sequence[Any] + :param parameters: Optional parameter values. + + :rtype: str + :returns: A formatted query string. + :raises: :class:`~google.cloud.bigquery.dbapi.ProgrammingError` + if a parameter used in the operation is not found in the + ``parameters`` argument. + """ + if parameters is None: + return operation + + if isinstance(parameters, collections.Mapping): + return _format_operation_dict(operation, parameters) + + return _format_operation_list(operation, parameters) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/dbapi/exceptions.py b/packages/google-cloud-bigquery/google/cloud/bigquery/dbapi/exceptions.py new file mode 100644 index 000000000000..77494e5ff1e1 --- /dev/null +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/dbapi/exceptions.py @@ -0,0 +1,58 @@ +# Copyright 2017 Google Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Exceptions used in the Google BigQuery DB-API.""" + + +class Warning(Exception): + """Exception raised for important DB-API warnings.""" + + +class Error(Exception): + """Exception representing all non-warning DB-API errors.""" + + +class InterfaceError(Error): + """DB-API error related to the database interface.""" + + +class DatabaseError(Error): + """DB-API error related to the database.""" + + +class DataError(DatabaseError): + """DB-API error due to problems with the processed data.""" + + +class OperationalError(DatabaseError): + """DB-API error related to the database operation. + + These errors are not necessarily under the control of the programmer. + """ + + +class IntegrityError(DatabaseError): + """DB-API error when integrity of the database is affected.""" + + +class InternalError(DatabaseError): + """DB-API error when the database encounters an internal error.""" + + +class ProgrammingError(DatabaseError): + """DB-API exception raised for programming errors.""" + + +class NotSupportedError(DatabaseError): + """DB-API error for operations not supported by the database or API.""" diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/dbapi/types.py b/packages/google-cloud-bigquery/google/cloud/bigquery/dbapi/types.py new file mode 100644 index 000000000000..2d06f260e360 --- /dev/null +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/dbapi/types.py @@ -0,0 +1,84 @@ +# Copyright 2017 Google Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Types used in the Google BigQuery DB-API. + +See `PEP-249`_ for details. + +.. _PEP-249: + https://www.python.org/dev/peps/pep-0249/#type-objects-and-constructors +""" + +import datetime + + +Date = datetime.date +Time = datetime.time +Timestamp = datetime.datetime +DateFromTicks = datetime.date.fromtimestamp +TimestampFromTicks = datetime.datetime.fromtimestamp + + +def Binary(string): + """Contruct a DB-API binary value. + + :type string: str + :param string: A string to encode as a binary value. + + :rtype: bytes + :returns: The UTF-8 encoded bytes representing the string. + """ + return string.encode('utf-8') + + +def TimeFromTicks(ticks, tz=None): + """Construct a DB-API time value from the given ticks value. + + :type ticks: float + :param ticks: + a number of seconds since the epoch; see the documentation of the + standard Python time module for details. + + :type tz: :class:`datetime.tzinfo` + :param tz: (Optional) time zone to use for conversion + + :rtype: :class:`datetime.time` + :returns: time represented by ticks. + """ + dt = datetime.datetime.fromtimestamp(ticks, tz=tz) + return dt.timetz() + + +class _DBAPITypeObject(object): + """DB-API type object which compares equal to many different strings. + + See `PEP-249`_ for details. + + .. _PEP-249: + https://www.python.org/dev/peps/pep-0249/#implementation-hints-for-module-authors + """ + + def __init__(self, *values): + self.values = values + + def __eq__(self, other): + return other in self.values + + +STRING = 'STRING' +BINARY = _DBAPITypeObject('BYTES', 'RECORD', 'STRUCT') +NUMBER = _DBAPITypeObject( + 'INTEGER', 'INT64', 'FLOAT', 'FLOAT64', 'BOOLEAN', 'BOOL') +DATETIME = _DBAPITypeObject('TIMESTAMP', 'DATE', 'TIME', 'DATETIME') +ROWID = 'ROWID' diff --git a/packages/google-cloud-bigquery/tests/system.py b/packages/google-cloud-bigquery/tests/system.py index cfd2c4856c80..3391ec2bd2d8 100644 --- a/packages/google-cloud-bigquery/tests/system.py +++ b/packages/google-cloud-bigquery/tests/system.py @@ -22,6 +22,7 @@ from google.cloud import bigquery from google.cloud._helpers import UTC +from google.cloud.bigquery import dbapi from google.cloud.exceptions import Forbidden from test_utils.retry import RetryErrors @@ -70,10 +71,12 @@ class Config(object): global state. """ CLIENT = None + CURSOR = None def setUpModule(): Config.CLIENT = bigquery.Client() + Config.CURSOR = dbapi.connect(Config.CLIENT).cursor() class TestBigQuery(unittest.TestCase): @@ -376,9 +379,6 @@ def test_load_table_from_local_file_then_dump_table(self): write_disposition='WRITE_EMPTY', ) - def _job_done(instance): - return instance.state.lower() == 'done' - # Retry until done. retry = RetryInstanceState(_job_done, max_tries=8) retry(job.reload)() @@ -417,9 +417,6 @@ def test_load_table_from_local_avro_file_then_dump_table(self): write_disposition='WRITE_TRUNCATE' ) - def _job_done(instance): - return instance.state.lower() == 'done' - # Retry until done. retry = RetryInstanceState(_job_done, max_tries=8) retry(job.reload)() @@ -492,9 +489,6 @@ def test_load_table_from_storage_then_dump_table(self): job.begin() - def _job_done(instance): - return instance.state in ('DONE', 'done') - # Allow for 90 seconds of "warm up" before rows visible. See # https://cloud.google.com/bigquery/streaming-data-into-bigquery#dataavailability # 8 tries -> 1 + 2 + 4 + 8 + 16 + 32 + 64 = 127 seconds @@ -528,9 +522,6 @@ def test_job_cancel(self): job.begin() job.cancel() - def _job_done(instance): - return instance.state in ('DONE', 'done') - retry = RetryInstanceState(_job_done, max_tries=8) retry(job.reload)() @@ -544,7 +535,7 @@ def test_sync_query_w_legacy_sql_types(self): naive = datetime.datetime(2016, 12, 5, 12, 41, 9) stamp = '%s %s' % (naive.date().isoformat(), naive.time().isoformat()) zoned = naive.replace(tzinfo=UTC) - EXAMPLES = [ + examples = [ { 'sql': 'SELECT 1', 'expected': 1, @@ -570,7 +561,7 @@ def test_sync_query_w_legacy_sql_types(self): 'expected': zoned, }, ] - for example in EXAMPLES: + for example in examples: query = Config.CLIENT.run_sync_query(example['sql']) query.use_legacy_sql = True query.run() @@ -578,11 +569,11 @@ def test_sync_query_w_legacy_sql_types(self): self.assertEqual(len(query.rows[0]), 1) self.assertEqual(query.rows[0][0], example['expected']) - def test_sync_query_w_standard_sql_types(self): + def _generate_standard_sql_types_examples(self): naive = datetime.datetime(2016, 12, 5, 12, 41, 9) stamp = '%s %s' % (naive.date().isoformat(), naive.time().isoformat()) zoned = naive.replace(tzinfo=UTC) - EXAMPLES = [ + return [ { 'sql': 'SELECT 1', 'expected': 1, @@ -659,7 +650,10 @@ def test_sync_query_w_standard_sql_types(self): 'expected': [{u'_field_1': [1, 2]}], }, ] - for example in EXAMPLES: + + def test_sync_query_w_standard_sql_types(self): + examples = self._generate_standard_sql_types_examples() + for example in examples: query = Config.CLIENT.run_sync_query(example['sql']) query.use_legacy_sql = False query.run() @@ -667,6 +661,80 @@ def test_sync_query_w_standard_sql_types(self): self.assertEqual(len(query.rows[0]), 1) self.assertEqual(query.rows[0][0], example['expected']) + def test_dbapi_w_standard_sql_types(self): + examples = self._generate_standard_sql_types_examples() + for example in examples: + Config.CURSOR.execute(example['sql']) + self.assertEqual(Config.CURSOR.rowcount, 1) + row = Config.CURSOR.fetchone() + self.assertEqual(len(row), 1) + self.assertEqual(row[0], example['expected']) + row = Config.CURSOR.fetchone() + self.assertIsNone(row) + + def _load_table_for_dml(self, rows, dataset_name, table_name): + import csv + from google.cloud._testing import _NamedTemporaryFile + + dataset = Config.CLIENT.dataset(dataset_name) + retry_403(dataset.create)() + self.to_delete.append(dataset) + + greeting = bigquery.SchemaField( + 'greeting', 'STRING', mode='NULLABLE') + table = dataset.table(table_name, schema=[greeting]) + table.create() + self.to_delete.insert(0, table) + + with _NamedTemporaryFile() as temp: + with open(temp.name, 'w') as csv_write: + writer = csv.writer(csv_write) + writer.writerow(('Greeting',)) + writer.writerows(rows) + + with open(temp.name, 'rb') as csv_read: + job = table.upload_from_file( + csv_read, + source_format='CSV', + skip_leading_rows=1, + create_disposition='CREATE_NEVER', + write_disposition='WRITE_EMPTY', + ) + + # Retry until done. + retry = RetryInstanceState(_job_done, max_tries=8) + retry(job.reload)() + self._fetch_single_page(table) + + def test_sync_query_w_dml(self): + dataset_name = _make_dataset_name('dml_tests') + table_name = 'test_table' + self._load_table_for_dml([('Hello World',)], dataset_name, table_name) + query_template = """UPDATE {}.{} + SET greeting = 'Guten Tag' + WHERE greeting = 'Hello World' + """ + + query = Config.CLIENT.run_sync_query( + query_template.format(dataset_name, table_name)) + query.use_legacy_sql = False + query.run() + + self.assertEqual(query.num_dml_affected_rows, 1) + + def test_dbapi_w_dml(self): + dataset_name = _make_dataset_name('dml_tests') + table_name = 'test_table' + self._load_table_for_dml([('Hello World',)], dataset_name, table_name) + query_template = """UPDATE {}.{} + SET greeting = 'Guten Tag' + WHERE greeting = 'Hello World' + """ + + Config.CURSOR.execute(query_template.format(dataset_name, table_name)) + self.assertEqual(Config.CURSOR.rowcount, 1) + self.assertIsNone(Config.CURSOR.fetchone()) + def test_sync_query_w_query_params(self): from google.cloud.bigquery._helpers import ArrayQueryParameter from google.cloud.bigquery._helpers import ScalarQueryParameter @@ -729,7 +797,7 @@ def test_sync_query_w_query_params(self): name='friends', array_type='STRING', values=[phred_name, bharney_name]) with_friends_param = StructQueryParameter(None, friends_param) - EXAMPLES = [ + examples = [ { 'sql': 'SELECT @question', 'expected': question, @@ -809,7 +877,7 @@ def test_sync_query_w_query_params(self): 'query_parameters': [with_friends_param], }, ] - for example in EXAMPLES: + for example in examples: query = Config.CLIENT.run_sync_query( example['sql'], query_parameters=example['query_parameters']) @@ -819,6 +887,105 @@ def test_sync_query_w_query_params(self): self.assertEqual(len(query.rows[0]), 1) self.assertEqual(query.rows[0][0], example['expected']) + def test_dbapi_w_query_parameters(self): + examples = [ + { + 'sql': 'SELECT %(boolval)s', + 'expected': True, + 'query_parameters': { + 'boolval': True, + }, + }, + { + 'sql': 'SELECT %(a "very" weird `name`)s', + 'expected': True, + 'query_parameters': { + 'a "very" weird `name`': True, + }, + }, + { + 'sql': 'SELECT %(select)s', + 'expected': True, + 'query_parameters': { + 'select': True, # this name is a keyword + }, + }, + { + 'sql': 'SELECT %s', + 'expected': False, + 'query_parameters': [False], + }, + { + 'sql': 'SELECT %(intval)s', + 'expected': 123, + 'query_parameters': { + 'intval': 123, + }, + }, + { + 'sql': 'SELECT %s', + 'expected': -123456789, + 'query_parameters': [-123456789], + }, + { + 'sql': 'SELECT %(floatval)s', + 'expected': 1.25, + 'query_parameters': { + 'floatval': 1.25, + }, + }, + { + 'sql': 'SELECT LOWER(%(strval)s)', + 'query_parameters': { + 'strval': 'I Am A String', + }, + 'expected': 'i am a string', + }, + { + 'sql': 'SELECT DATE_SUB(%(dateval)s, INTERVAL 1 DAY)', + 'query_parameters': { + 'dateval': datetime.date(2017, 4, 2), + }, + 'expected': datetime.date(2017, 4, 1), + }, + { + 'sql': 'SELECT TIME_ADD(%(timeval)s, INTERVAL 4 SECOND)', + 'query_parameters': { + 'timeval': datetime.time(12, 34, 56), + }, + 'expected': datetime.time(12, 35, 0), + }, + { + 'sql': ( + 'SELECT DATETIME_ADD(%(datetimeval)s, INTERVAL 53 SECOND)' + ), + 'query_parameters': { + 'datetimeval': datetime.datetime(2012, 3, 4, 5, 6, 7), + }, + 'expected': datetime.datetime(2012, 3, 4, 5, 7, 0), + }, + { + 'sql': 'SELECT TIMESTAMP_TRUNC(%(zoned)s, MINUTE)', + 'query_parameters': { + 'zoned': datetime.datetime( + 2012, 3, 4, 5, 6, 7, tzinfo=UTC), + }, + 'expected': datetime.datetime(2012, 3, 4, 5, 6, 0, tzinfo=UTC), + }, + ] + for example in examples: + msg = 'sql: {} query_parameters: {}'.format( + example['sql'], example['query_parameters']) + + Config.CURSOR.execute(example['sql'], example['query_parameters']) + + self.assertEqual(Config.CURSOR.rowcount, 1, msg=msg) + row = Config.CURSOR.fetchone() + self.assertEqual(len(row), 1, msg=msg) + self.assertEqual(row[0], example['expected'], msg=msg) + row = Config.CURSOR.fetchone() + self.assertIsNone(row, msg=msg) + def test_dump_table_w_public_data(self): PUBLIC = 'bigquery-public-data' DATASET_NAME = 'samples' @@ -950,3 +1117,7 @@ def test_create_table_insert_fetch_nested_schema(self): parts = time.strptime(expected[7], '%Y-%m-%dT%H:%M:%S') e_favtime = datetime.datetime(*parts[0:6]) self.assertEqual(found[7], e_favtime) # FavoriteTime + + +def _job_done(instance): + return instance.state.lower() == 'done' diff --git a/packages/google-cloud-bigquery/tests/unit/test_dbapi__helpers.py b/packages/google-cloud-bigquery/tests/unit/test_dbapi__helpers.py new file mode 100644 index 000000000000..e030ed49df0c --- /dev/null +++ b/packages/google-cloud-bigquery/tests/unit/test_dbapi__helpers.py @@ -0,0 +1,134 @@ +# Copyright 2017 Google Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import datetime +import math +import unittest + +import mock + +import google.cloud._helpers +from google.cloud.bigquery.dbapi import _helpers +from google.cloud.bigquery.dbapi import exceptions + + +class Test_wait_for_job(unittest.TestCase): + + def _mock_job(self): + from google.cloud.bigquery import job + mock_job = mock.create_autospec(job.QueryJob) + mock_job.state = 'RUNNING' + mock_job._mocked_iterations = 0 + + def mock_reload(): + mock_job._mocked_iterations += 1 + if mock_job._mocked_iterations >= 2: + mock_job.state = 'DONE' + + mock_job.reload.side_effect = mock_reload + return mock_job + + def _call_fut(self, job): + from google.cloud.bigquery.dbapi._helpers import wait_for_job + with mock.patch('time.sleep'): + wait_for_job(job) + + def test_wo_error(self): + mock_job = self._mock_job() + mock_job.error_result = None + self._call_fut(mock_job) + self.assertEqual('DONE', mock_job.state) + + def test_w_error(self): + from google.cloud.bigquery.dbapi import exceptions + mock_job = self._mock_job() + mock_job.error_result = {'reason': 'invalidQuery'} + self.assertRaises(exceptions.DatabaseError, self._call_fut, mock_job) + self.assertEqual('DONE', mock_job.state) + + +class TestQueryParameters(unittest.TestCase): + + def test_scalar_to_query_parameter(self): + expected_types = [ + (True, 'BOOL'), + (False, 'BOOL'), + (123, 'INT64'), + (-123456789, 'INT64'), + (1.25, 'FLOAT64'), + (b'I am some bytes', 'BYTES'), + (u'I am a string', 'STRING'), + (datetime.date(2017, 4, 1), 'DATE'), + (datetime.time(12, 34, 56), 'TIME'), + (datetime.datetime(2012, 3, 4, 5, 6, 7), 'DATETIME'), + ( + datetime.datetime( + 2012, 3, 4, 5, 6, 7, tzinfo=google.cloud._helpers.UTC), + 'TIMESTAMP', + ), + ] + for value, expected_type in expected_types: + msg = 'value: {} expected_type: {}'.format(value, expected_type) + parameter = _helpers.scalar_to_query_parameter(value) + self.assertIsNone(parameter.name, msg=msg) + self.assertEqual(parameter.type_, expected_type, msg=msg) + self.assertEqual(parameter.value, value, msg=msg) + named_parameter = _helpers.scalar_to_query_parameter( + value, name='myvar') + self.assertEqual(named_parameter.name, 'myvar', msg=msg) + self.assertEqual(named_parameter.type_, expected_type, msg=msg) + self.assertEqual(named_parameter.value, value, msg=msg) + + def test_scalar_to_query_parameter_w_unexpected_type(self): + with self.assertRaises(exceptions.ProgrammingError): + _helpers.scalar_to_query_parameter(value={'a': 'dictionary'}) + + def test_scalar_to_query_parameter_w_special_floats(self): + nan_parameter = _helpers.scalar_to_query_parameter(float('nan')) + self.assertTrue(math.isnan(nan_parameter.value)) + self.assertEqual(nan_parameter.type_, 'FLOAT64') + inf_parameter = _helpers.scalar_to_query_parameter(float('inf')) + self.assertTrue(math.isinf(inf_parameter.value)) + self.assertEqual(inf_parameter.type_, 'FLOAT64') + + def test_to_query_parameters_w_dict(self): + parameters = { + 'somebool': True, + 'somestring': u'a-string-value', + } + query_parameters = _helpers.to_query_parameters(parameters) + query_parameter_tuples = [] + for param in query_parameters: + query_parameter_tuples.append( + (param.name, param.type_, param.value)) + self.assertSequenceEqual( + sorted(query_parameter_tuples), + sorted([ + ('somebool', 'BOOL', True), + ('somestring', 'STRING', u'a-string-value'), + ])) + + def test_to_query_parameters_w_list(self): + parameters = [True, u'a-string-value'] + query_parameters = _helpers.to_query_parameters(parameters) + query_parameter_tuples = [] + for param in query_parameters: + query_parameter_tuples.append( + (param.name, param.type_, param.value)) + self.assertSequenceEqual( + sorted(query_parameter_tuples), + sorted([ + (None, 'BOOL', True), + (None, 'STRING', u'a-string-value'), + ])) diff --git a/packages/google-cloud-bigquery/tests/unit/test_dbapi_connection.py b/packages/google-cloud-bigquery/tests/unit/test_dbapi_connection.py new file mode 100644 index 000000000000..d30852377852 --- /dev/null +++ b/packages/google-cloud-bigquery/tests/unit/test_dbapi_connection.py @@ -0,0 +1,73 @@ +# Copyright 2017 Google Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import unittest + +import mock + + +class TestConnection(unittest.TestCase): + + @staticmethod + def _get_target_class(): + from google.cloud.bigquery.dbapi import Connection + return Connection + + def _make_one(self, *args, **kw): + return self._get_target_class()(*args, **kw) + + def _mock_client(self, rows=None, schema=None): + from google.cloud.bigquery import client + mock_client = mock.create_autospec(client.Client) + return mock_client + + def test_ctor(self): + from google.cloud.bigquery.dbapi import Connection + mock_client = self._mock_client() + connection = self._make_one(client=mock_client) + self.assertIsInstance(connection, Connection) + self.assertIs(connection._client, mock_client) + + @mock.patch('google.cloud.bigquery.Client', autospec=True) + def test_connect_wo_client(self, mock_client): + from google.cloud.bigquery.dbapi import connect + from google.cloud.bigquery.dbapi import Connection + connection = connect() + self.assertIsInstance(connection, Connection) + self.assertIsNotNone(connection._client) + + def test_connect_w_client(self): + from google.cloud.bigquery.dbapi import connect + from google.cloud.bigquery.dbapi import Connection + mock_client = self._mock_client() + connection = connect(client=mock_client) + self.assertIsInstance(connection, Connection) + self.assertIs(connection._client, mock_client) + + def test_close(self): + connection = self._make_one(client=self._mock_client()) + # close() is a no-op, there is nothing to test. + connection.close() + + def test_commit(self): + connection = self._make_one(client=self._mock_client()) + # commit() is a no-op, there is nothing to test. + connection.commit() + + def test_cursor(self): + from google.cloud.bigquery.dbapi import Cursor + connection = self._make_one(client=self._mock_client()) + cursor = connection.cursor() + self.assertIsInstance(cursor, Cursor) + self.assertIs(cursor.connection, connection) diff --git a/packages/google-cloud-bigquery/tests/unit/test_dbapi_cursor.py b/packages/google-cloud-bigquery/tests/unit/test_dbapi_cursor.py new file mode 100644 index 000000000000..901d2f176785 --- /dev/null +++ b/packages/google-cloud-bigquery/tests/unit/test_dbapi_cursor.py @@ -0,0 +1,269 @@ +# Copyright 2017 Google Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import unittest + +import mock + + +class TestCursor(unittest.TestCase): + + @staticmethod + def _get_target_class(): + from google.cloud.bigquery.dbapi import Cursor + return Cursor + + def _make_one(self, *args, **kw): + return self._get_target_class()(*args, **kw) + + def _mock_client( + self, rows=None, schema=None, num_dml_affected_rows=None): + from google.cloud.bigquery import client + mock_client = mock.create_autospec(client.Client) + mock_client.run_async_query.return_value = self._mock_job( + rows=rows, schema=schema, + num_dml_affected_rows=num_dml_affected_rows) + return mock_client + + def _mock_job( + self, rows=None, schema=None, num_dml_affected_rows=None): + from google.cloud.bigquery import job + mock_job = mock.create_autospec(job.QueryJob) + mock_job.error_result = None + mock_job.state = 'DONE' + mock_job.results.return_value = self._mock_results( + rows=rows, schema=schema, + num_dml_affected_rows=num_dml_affected_rows) + return mock_job + + def _mock_results( + self, rows=None, schema=None, num_dml_affected_rows=None): + from google.cloud.bigquery import query + mock_results = mock.create_autospec(query.QueryResults) + mock_results.schema = schema + mock_results.num_dml_affected_rows = num_dml_affected_rows + + if rows is None: + mock_results.total_rows = 0 + else: + mock_results.total_rows = len(rows) + + mock_results.fetch_data.return_value = rows + return mock_results + + def test_ctor(self): + from google.cloud.bigquery.dbapi import connect + from google.cloud.bigquery.dbapi import Cursor + connection = connect(self._mock_client()) + cursor = self._make_one(connection) + self.assertIsInstance(cursor, Cursor) + self.assertIs(cursor.connection, connection) + + def test_close(self): + from google.cloud.bigquery.dbapi import connect + connection = connect(self._mock_client()) + cursor = connection.cursor() + # close() is a no-op, there is nothing to test. + cursor.close() + + def test_fetchone_wo_execute_raises_error(self): + from google.cloud.bigquery import dbapi + connection = dbapi.connect(self._mock_client()) + cursor = connection.cursor() + self.assertRaises(dbapi.Error, cursor.fetchone) + + def test_fetchone_w_row(self): + from google.cloud.bigquery import dbapi + connection = dbapi.connect( + self._mock_client(rows=[(1,)])) + cursor = connection.cursor() + cursor.execute('SELECT 1;') + row = cursor.fetchone() + self.assertEquals(row, (1,)) + self.assertIsNone(cursor.fetchone()) + + def test_fetchmany_wo_execute_raises_error(self): + from google.cloud.bigquery import dbapi + connection = dbapi.connect(self._mock_client()) + cursor = connection.cursor() + self.assertRaises(dbapi.Error, cursor.fetchmany) + + def test_fetchmany_w_row(self): + from google.cloud.bigquery import dbapi + connection = dbapi.connect( + self._mock_client(rows=[(1,)])) + cursor = connection.cursor() + cursor.execute('SELECT 1;') + rows = cursor.fetchmany() + self.assertEquals(len(rows), 1) + self.assertEquals(rows[0], (1,)) + + def test_fetchmany_w_size(self): + from google.cloud.bigquery import dbapi + connection = dbapi.connect( + self._mock_client( + rows=[ + (1, 2, 3), + (4, 5, 6), + (7, 8, 9), + ])) + cursor = connection.cursor() + cursor.execute('SELECT a, b, c;') + rows = cursor.fetchmany(size=2) + self.assertEquals(len(rows), 2) + self.assertEquals(rows[0], (1, 2, 3)) + self.assertEquals(rows[1], (4, 5, 6)) + second_page = cursor.fetchmany(size=2) + self.assertEquals(len(second_page), 1) + self.assertEquals(second_page[0], (7, 8, 9)) + third_page = cursor.fetchmany(size=2) + self.assertEquals(third_page, []) + + def test_fetchmany_w_arraysize(self): + from google.cloud.bigquery import dbapi + connection = dbapi.connect( + self._mock_client( + rows=[ + (1, 2, 3), + (4, 5, 6), + (7, 8, 9), + ])) + cursor = connection.cursor() + cursor.arraysize = 2 + cursor.execute('SELECT a, b, c;') + rows = cursor.fetchmany() + self.assertEquals(len(rows), 2) + self.assertEquals(rows[0], (1, 2, 3)) + self.assertEquals(rows[1], (4, 5, 6)) + second_page = cursor.fetchmany() + self.assertEquals(len(second_page), 1) + self.assertEquals(second_page[0], (7, 8, 9)) + third_page = cursor.fetchmany() + self.assertEquals(third_page, []) + + def test_fetchall_wo_execute_raises_error(self): + from google.cloud.bigquery import dbapi + connection = dbapi.connect(self._mock_client()) + cursor = connection.cursor() + self.assertRaises(dbapi.Error, cursor.fetchall) + + def test_fetchall_w_row(self): + from google.cloud.bigquery import dbapi + connection = dbapi.connect( + self._mock_client(rows=[(1,)])) + cursor = connection.cursor() + cursor.execute('SELECT 1;') + self.assertIsNone(cursor.description) + self.assertEquals(cursor.rowcount, 1) + rows = cursor.fetchall() + self.assertEquals(len(rows), 1) + self.assertEquals(rows[0], (1,)) + + def test_execute_w_dml(self): + from google.cloud.bigquery.dbapi import connect + connection = connect( + self._mock_client(rows=[], num_dml_affected_rows=12)) + cursor = connection.cursor() + cursor.execute('DELETE FROM UserSessions WHERE user_id = \'test\';') + self.assertIsNone(cursor.description) + self.assertEquals(cursor.rowcount, 12) + + def test_execute_w_query(self): + from google.cloud.bigquery.schema import SchemaField + from google.cloud.bigquery import dbapi + + connection = dbapi.connect(self._mock_client( + rows=[('hello', 'world', 1), ('howdy', 'y\'all', 2)], + schema=[ + SchemaField('a', 'STRING', mode='NULLABLE'), + SchemaField('b', 'STRING', mode='REQUIRED'), + SchemaField('c', 'INTEGER', mode='NULLABLE')])) + cursor = connection.cursor() + cursor.execute('SELECT a, b, c FROM hello_world WHERE d > 3;') + + # Verify the description. + self.assertEquals(len(cursor.description), 3) + a_name, a_type, _, _, _, _, a_null_ok = cursor.description[0] + self.assertEquals(a_name, 'a') + self.assertEquals(a_type, 'STRING') + self.assertEquals(a_type, dbapi.STRING) + self.assertTrue(a_null_ok) + b_name, b_type, _, _, _, _, b_null_ok = cursor.description[1] + self.assertEquals(b_name, 'b') + self.assertEquals(b_type, 'STRING') + self.assertEquals(b_type, dbapi.STRING) + self.assertFalse(b_null_ok) + c_name, c_type, _, _, _, _, c_null_ok = cursor.description[2] + self.assertEquals(c_name, 'c') + self.assertEquals(c_type, 'INTEGER') + self.assertEquals(c_type, dbapi.NUMBER) + self.assertTrue(c_null_ok) + + # Verify the results. + self.assertEquals(cursor.rowcount, 2) + row = cursor.fetchone() + self.assertEquals(row, ('hello', 'world', 1)) + row = cursor.fetchone() + self.assertEquals(row, ('howdy', 'y\'all', 2)) + row = cursor.fetchone() + self.assertIsNone(row) + + def test_executemany_w_dml(self): + from google.cloud.bigquery.dbapi import connect + connection = connect( + self._mock_client(rows=[], num_dml_affected_rows=12)) + cursor = connection.cursor() + cursor.executemany( + 'DELETE FROM UserSessions WHERE user_id = %s;', + (('test',), ('anothertest',))) + self.assertIsNone(cursor.description) + self.assertEquals(cursor.rowcount, 12) + + def test__format_operation_w_dict(self): + from google.cloud.bigquery.dbapi import cursor + formatted_operation = cursor._format_operation( + 'SELECT %(somevalue)s, %(a `weird` one)s;', + { + 'somevalue': 'hi', + 'a `weird` one': 'world', + }) + self.assertEquals( + formatted_operation, 'SELECT @`somevalue`, @`a \\`weird\\` one`;') + + def test__format_operation_w_wrong_dict(self): + from google.cloud.bigquery import dbapi + from google.cloud.bigquery.dbapi import cursor + self.assertRaises( + dbapi.ProgrammingError, + cursor._format_operation, + 'SELECT %(somevalue)s, %(othervalue)s;', + { + 'somevalue-not-here': 'hi', + 'othervalue': 'world', + }) + + def test__format_operation_w_sequence(self): + from google.cloud.bigquery.dbapi import cursor + formatted_operation = cursor._format_operation( + 'SELECT %s, %s;', ('hello', 'world')) + self.assertEquals(formatted_operation, 'SELECT ?, ?;') + + def test__format_operation_w_too_short_sequence(self): + from google.cloud.bigquery import dbapi + from google.cloud.bigquery.dbapi import cursor + self.assertRaises( + dbapi.ProgrammingError, + cursor._format_operation, + 'SELECT %s, %s;', + ('hello',)) diff --git a/packages/google-cloud-bigquery/tests/unit/test_dbapi_types.py b/packages/google-cloud-bigquery/tests/unit/test_dbapi_types.py new file mode 100644 index 000000000000..afd45b259263 --- /dev/null +++ b/packages/google-cloud-bigquery/tests/unit/test_dbapi_types.py @@ -0,0 +1,40 @@ +# Copyright 2017 Google Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import datetime +import unittest + +import google.cloud._helpers +from google.cloud.bigquery.dbapi import types + + +class TestTypes(unittest.TestCase): + def test_binary_type(self): + self.assertEqual('BYTES', types.BINARY) + self.assertEqual('RECORD', types.BINARY) + self.assertEqual('STRUCT', types.BINARY) + self.assertNotEqual('STRING', types.BINARY) + + def test_binary_constructor(self): + self.assertEqual(types.Binary(u'hello'), b'hello') + self.assertEqual(types.Binary(u'\u1f60'), u'\u1f60'.encode('utf-8')) + + def test_timefromticks(self): + somedatetime = datetime.datetime( + 2017, 2, 18, 12, 47, 26, tzinfo=google.cloud._helpers.UTC) + epoch = datetime.datetime(1970, 1, 1, tzinfo=google.cloud._helpers.UTC) + ticks = (somedatetime - epoch).total_seconds() + self.assertEqual( + types.TimeFromTicks(ticks, google.cloud._helpers.UTC), + datetime.time(12, 47, 26, tzinfo=google.cloud._helpers.UTC)) From 82eaacae3edb22d6ee0ae6b0cb0c6163a0ee1a33 Mon Sep 17 00:00:00 2001 From: David Raleigh Date: Sun, 16 Jul 2017 09:09:26 -0700 Subject: [PATCH 0134/2016] fix big query documentation broken link (#3611) closes issue https://github.com/GoogleCloudPlatform/google-cloud-python/issues/3610 --- packages/google-cloud-bigquery/README.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/README.rst b/packages/google-cloud-bigquery/README.rst index 1dcea16e0cc5..97a94366a49a 100644 --- a/packages/google-cloud-bigquery/README.rst +++ b/packages/google-cloud-bigquery/README.rst @@ -9,7 +9,7 @@ Python Client for Google BigQuery - `Documentation`_ -.. _Documentation: https://googlecloudplatform.github.io/google-cloud-python/stable/bigquery-usage.html +.. _Documentation: https://googlecloudplatform.github.io/google-cloud-python/stable/bigquery/usage.html Quick Start ----------- From 6bb3fd15ac076123dae9a041c6f2eefcece25f57 Mon Sep 17 00:00:00 2001 From: Argyris Zymnis Date: Mon, 17 Jul 2017 10:54:45 -0700 Subject: [PATCH 0135/2016] Add a __hash__ implementation to SchemaField (#3601) * Add a __hash__ implementation to SchemaField * Modify default list of subfields to be the empty tuple * Making SchemaField immutable. * Adding SchemaField.__ne__. --- .../google/cloud/bigquery/schema.py | 92 ++++++++++-- .../google/cloud/bigquery/table.py | 4 +- .../tests/unit/test_query.py | 6 +- .../tests/unit/test_schema.py | 136 +++++++++++++----- 4 files changed, 186 insertions(+), 52 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/schema.py b/packages/google-cloud-bigquery/google/cloud/bigquery/schema.py index 6d4a437a809f..faec69f616da 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/schema.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/schema.py @@ -26,27 +26,89 @@ class SchemaField(object): 'FLOAT', 'BOOLEAN', 'TIMESTAMP' or 'RECORD'). :type mode: str - :param mode: the type of the field (one of 'NULLABLE', 'REQUIRED', + :param mode: the mode of the field (one of 'NULLABLE', 'REQUIRED', or 'REPEATED'). :type description: str :param description: optional description for the field. - :type fields: list of :class:`SchemaField`, or None + :type fields: tuple of :class:`SchemaField` :param fields: subfields (requires ``field_type`` of 'RECORD'). """ - def __init__(self, name, field_type, mode='NULLABLE', description=None, - fields=None): - self.name = name - self.field_type = field_type - self.mode = mode - self.description = description - self.fields = fields + def __init__(self, name, field_type, mode='NULLABLE', + description=None, fields=()): + self._name = name + self._field_type = field_type + self._mode = mode + self._description = description + self._fields = tuple(fields) - def __eq__(self, other): + @property + def name(self): + """str: The name of the field.""" + return self._name + + @property + def field_type(self): + """str: The type of the field. + + Will be one of 'STRING', 'INTEGER', 'FLOAT', 'BOOLEAN', + 'TIMESTAMP' or 'RECORD'. + """ + return self._field_type + + @property + def mode(self): + """str: The mode of the field. + + Will be one of 'NULLABLE', 'REQUIRED', or 'REPEATED'. + """ + return self._mode + + @property + def description(self): + """Optional[str]: Description for the field.""" + return self._description + + @property + def fields(self): + """tuple: Subfields contained in this field. + + If ``field_type`` is not 'RECORD', this property must be + empty / unset. + """ + return self._fields + + def _key(self): + """A tuple key that unique-ly describes this field. + + Used to compute this instance's hashcode and evaluate equality. + + Returns: + tuple: The contents of this :class:`SchemaField`. + """ return ( - self.name == other.name and - self.field_type.lower() == other.field_type.lower() and - self.mode == other.mode and - self.description == other.description and - self.fields == other.fields) + self._name, + self._field_type.lower(), + self._mode, + self._description, + self._fields, + ) + + def __eq__(self, other): + if isinstance(other, SchemaField): + return self._key() == other._key() + else: + return NotImplemented + + def __ne__(self, other): + if isinstance(other, SchemaField): + return self._key() != other._key() + else: + return NotImplemented + + def __hash__(self): + return hash(self._key()) + + def __repr__(self): + return 'SchemaField{}'.format(self._key()) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py index 37dc1159cc8e..2c4064e83e8f 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py @@ -1079,7 +1079,7 @@ def _parse_schema_resource(info): present in ``info``. """ if 'fields' not in info: - return None + return () schema = [] for r_field in info['fields']: @@ -1109,7 +1109,7 @@ def _build_schema_resource(fields): 'mode': field.mode} if field.description is not None: info['description'] = field.description - if field.fields is not None: + if field.fields: info['fields'] = _build_schema_resource(field.fields) infos.append(info) return infos diff --git a/packages/google-cloud-bigquery/tests/unit/test_query.py b/packages/google-cloud-bigquery/tests/unit/test_query.py index d7977a4e7d0c..76d5057f6450 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_query.py +++ b/packages/google-cloud-bigquery/tests/unit/test_query.py @@ -88,9 +88,9 @@ def _verifySchema(self, query, resource): self.assertEqual(found.mode, expected['mode']) self.assertEqual(found.description, expected.get('description')) - self.assertEqual(found.fields, expected.get('fields')) + self.assertEqual(found.fields, expected.get('fields', ())) else: - self.assertIsNone(query.schema) + self.assertEqual(query.schema, ()) def _verifyRows(self, query, resource): expected = resource.get('rows') @@ -166,7 +166,7 @@ def test_ctor_defaults(self): self.assertIsNone(query.page_token) self.assertEqual(query.query_parameters, []) self.assertEqual(query.rows, []) - self.assertIsNone(query.schema) + self.assertEqual(query.schema, ()) self.assertIsNone(query.total_rows) self.assertIsNone(query.total_bytes_processed) self.assertEqual(query.udf_resources, []) diff --git a/packages/google-cloud-bigquery/tests/unit/test_schema.py b/packages/google-cloud-bigquery/tests/unit/test_schema.py index 8081fcd6f4e0..018736d31bc1 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_schema.py +++ b/packages/google-cloud-bigquery/tests/unit/test_schema.py @@ -26,43 +26,72 @@ def _get_target_class(): def _make_one(self, *args, **kw): return self._get_target_class()(*args, **kw) - def test_ctor_defaults(self): + def test_constructor_defaults(self): field = self._make_one('test', 'STRING') - self.assertEqual(field.name, 'test') - self.assertEqual(field.field_type, 'STRING') - self.assertEqual(field.mode, 'NULLABLE') - self.assertIsNone(field.description) - self.assertIsNone(field.fields) + self.assertEqual(field._name, 'test') + self.assertEqual(field._field_type, 'STRING') + self.assertEqual(field._mode, 'NULLABLE') + self.assertIsNone(field._description) + self.assertEqual(field._fields, ()) - def test_ctor_explicit(self): + def test_constructor_explicit(self): field = self._make_one('test', 'STRING', mode='REQUIRED', description='Testing') - self.assertEqual(field.name, 'test') - self.assertEqual(field.field_type, 'STRING') - self.assertEqual(field.mode, 'REQUIRED') - self.assertEqual(field.description, 'Testing') - self.assertIsNone(field.fields) - - def test_ctor_subfields(self): + self.assertEqual(field._name, 'test') + self.assertEqual(field._field_type, 'STRING') + self.assertEqual(field._mode, 'REQUIRED') + self.assertEqual(field._description, 'Testing') + self.assertEqual(field._fields, ()) + + def test_constructor_subfields(self): + sub_field1 = self._make_one('area_code', 'STRING') + sub_field2 = self._make_one('local_number', 'STRING') field = self._make_one( - 'phone_number', 'RECORD', - fields=[self._make_one('area_code', 'STRING'), - self._make_one('local_number', 'STRING')]) - self.assertEqual(field.name, 'phone_number') - self.assertEqual(field.field_type, 'RECORD') - self.assertEqual(field.mode, 'NULLABLE') - self.assertIsNone(field.description) - self.assertEqual(len(field.fields), 2) - self.assertEqual(field.fields[0].name, 'area_code') - self.assertEqual(field.fields[0].field_type, 'STRING') - self.assertEqual(field.fields[0].mode, 'NULLABLE') - self.assertIsNone(field.fields[0].description) - self.assertIsNone(field.fields[0].fields) - self.assertEqual(field.fields[1].name, 'local_number') - self.assertEqual(field.fields[1].field_type, 'STRING') - self.assertEqual(field.fields[1].mode, 'NULLABLE') - self.assertIsNone(field.fields[1].description) - self.assertIsNone(field.fields[1].fields) + 'phone_number', + 'RECORD', + fields=[sub_field1, sub_field2], + ) + self.assertEqual(field._name, 'phone_number') + self.assertEqual(field._field_type, 'RECORD') + self.assertEqual(field._mode, 'NULLABLE') + self.assertIsNone(field._description) + self.assertEqual(len(field._fields), 2) + self.assertIs(field._fields[0], sub_field1) + self.assertIs(field._fields[1], sub_field2) + + def test_name_property(self): + name = 'lemon-ness' + schema_field = self._make_one(name, 'INTEGER') + self.assertIs(schema_field.name, name) + + def test_field_type_property(self): + field_type = 'BOOLEAN' + schema_field = self._make_one('whether', field_type) + self.assertIs(schema_field.field_type, field_type) + + def test_mode_property(self): + mode = 'REPEATED' + schema_field = self._make_one('again', 'FLOAT', mode=mode) + self.assertIs(schema_field.mode, mode) + + def test_description_property(self): + description = 'It holds some data.' + schema_field = self._make_one( + 'do', 'TIMESTAMP', description=description) + self.assertIs(schema_field.description, description) + + def test_fields_property(self): + sub_field1 = self._make_one('one', 'STRING') + sub_field2 = self._make_one('fish', 'INTEGER') + fields = (sub_field1, sub_field2) + schema_field = self._make_one('boat', 'RECORD', fields=fields) + self.assertIs(schema_field.fields, fields) + + def test___eq___wrong_type(self): + field = self._make_one('test', 'STRING') + other = object() + self.assertNotEqual(field, other) + self.assertIs(field.__eq__(other), NotImplemented) def test___eq___name_mismatch(self): field = self._make_one('test', 'STRING') @@ -111,3 +140,46 @@ def test___eq___hit_w_fields(self): field = self._make_one('test', 'RECORD', fields=[sub1, sub2]) other = self._make_one('test', 'RECORD', fields=[sub1, sub2]) self.assertEqual(field, other) + + def test___ne___wrong_type(self): + field = self._make_one('toast', 'INTEGER') + other = object() + self.assertNotEqual(field, other) + self.assertIs(field.__ne__(other), NotImplemented) + + def test___ne___same_value(self): + field1 = self._make_one('test', 'TIMESTAMP', mode='REPEATED') + field2 = self._make_one('test', 'TIMESTAMP', mode='REPEATED') + # unittest ``assertEqual`` uses ``==`` not ``!=``. + comparison_val = (field1 != field2) + self.assertFalse(comparison_val) + + def test___ne___different_values(self): + field1 = self._make_one( + 'test1', 'FLOAT', mode='REPEATED', description='Not same') + field2 = self._make_one( + 'test2', 'FLOAT', mode='NULLABLE', description='Knot saym') + self.assertNotEqual(field1, field2) + + def test___hash__set_equality(self): + sub1 = self._make_one('sub1', 'STRING') + sub2 = self._make_one('sub2', 'STRING') + field1 = self._make_one('test', 'RECORD', fields=[sub1]) + field2 = self._make_one('test', 'RECORD', fields=[sub2]) + set_one = {field1, field2} + set_two = {field1, field2} + self.assertEqual(set_one, set_two) + + def test___hash__not_equals(self): + sub1 = self._make_one('sub1', 'STRING') + sub2 = self._make_one('sub2', 'STRING') + field1 = self._make_one('test', 'RECORD', fields=[sub1]) + field2 = self._make_one('test', 'RECORD', fields=[sub2]) + set_one = {field1} + set_two = {field2} + self.assertNotEqual(set_one, set_two) + + def test___repr__(self): + field1 = self._make_one('field1', 'STRING') + expected = "SchemaField('field1', 'string', 'NULLABLE', None, ())" + self.assertEqual(repr(field1), expected) From 7594531724c8bbfed0382c299cf56faec4cc78b4 Mon Sep 17 00:00:00 2001 From: Evawere Ogbe Date: Mon, 17 Jul 2017 12:29:46 -0700 Subject: [PATCH 0136/2016] Add bigquery jobid to table (#3605) --- .../google/cloud/bigquery/table.py | 15 ++++++++++++--- .../tests/unit/test_table.py | 16 ++++++++++++++++ 2 files changed, 28 insertions(+), 3 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py index 2c4064e83e8f..7e21e35d1fb0 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py @@ -842,7 +842,8 @@ def upload_from_file(self, quote_character=None, skip_leading_rows=None, write_disposition=None, - client=None): + client=None, + job_name=None): """Upload the contents of this table from a file-like object. The content type of the upload will either be @@ -915,6 +916,10 @@ def upload_from_file(self, :param client: Optional. The client to use. If not passed, falls back to the ``client`` stored on the current dataset. + :type job_name: str + :param job_name: Optional. The id of the job. Generated if not + explicitly passed in. + :rtype: :class:`google.cloud.bigquery.jobs.LoadTableFromStorageJob` :returns: the job instance used to load the data (e.g., for querying status). Note that the job is already started: @@ -977,7 +982,7 @@ def upload_from_file(self, encoding, field_delimiter, ignore_unknown_values, max_bad_records, quote_character, skip_leading_rows, - write_disposition) + write_disposition, job_name) upload = Upload(file_obj, content_type, total_bytes, auto_transfer=False) @@ -1033,7 +1038,8 @@ def _configure_job_metadata(metadata, # pylint: disable=too-many-arguments max_bad_records, quote_character, skip_leading_rows, - write_disposition): + write_disposition, + job_name): """Helper for :meth:`Table.upload_from_file`.""" load_config = metadata['configuration']['load'] @@ -1067,6 +1073,9 @@ def _configure_job_metadata(metadata, # pylint: disable=too-many-arguments if write_disposition is not None: load_config['writeDisposition'] = write_disposition + if job_name is not None: + load_config['jobReference'] = {'jobId': job_name} + def _parse_schema_resource(info): """Parse a resource fragment into a schema field. diff --git a/packages/google-cloud-bigquery/tests/unit/test_table.py b/packages/google-cloud-bigquery/tests/unit/test_table.py index b27736fb896e..f535e8799628 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_table.py +++ b/packages/google-cloud-bigquery/tests/unit/test_table.py @@ -1844,6 +1844,22 @@ class _UploadConfig(object): self.assertEqual(req['body'], BODY) # pylint: enable=too-many-statements + def test_upload_from_file_w_jobid(self): + import json + from google.cloud._helpers import _to_bytes + + requested, PATH, BODY = self._upload_from_file_helper(job_name='foo') + parse_chunk = _email_chunk_parser() + req = requested[0] + ctype, boundary = [x.strip() + for x in req['headers']['content-type'].split(';')] + divider = b'--' + _to_bytes(boundary[len('boundary="'):-1]) + chunks = req['body'].split(divider)[1:-1] # discard prolog / epilog + text_msg = parse_chunk(chunks[0].strip()) + metadata = json.loads(text_msg._payload) + load_config = metadata['configuration']['load'] + self.assertEqual(load_config['jobReference'], {'jobId': 'foo'}) + class Test_parse_schema_resource(unittest.TestCase, _SchemaBase): From bd99f2486fc4ae4bb912d78898e1105577e03548 Mon Sep 17 00:00:00 2001 From: Danny Hermes Date: Tue, 18 Jul 2017 11:46:05 -0700 Subject: [PATCH 0137/2016] Using assertEqual instead of assertEquals. (#3619) `assertEquals` is deprecated (but still is a synonym). --- .../tests/unit/test_dbapi_cursor.py | 70 +++++++++---------- 1 file changed, 35 insertions(+), 35 deletions(-) diff --git a/packages/google-cloud-bigquery/tests/unit/test_dbapi_cursor.py b/packages/google-cloud-bigquery/tests/unit/test_dbapi_cursor.py index 901d2f176785..9671a27b8f8f 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_dbapi_cursor.py +++ b/packages/google-cloud-bigquery/tests/unit/test_dbapi_cursor.py @@ -90,7 +90,7 @@ def test_fetchone_w_row(self): cursor = connection.cursor() cursor.execute('SELECT 1;') row = cursor.fetchone() - self.assertEquals(row, (1,)) + self.assertEqual(row, (1,)) self.assertIsNone(cursor.fetchone()) def test_fetchmany_wo_execute_raises_error(self): @@ -106,8 +106,8 @@ def test_fetchmany_w_row(self): cursor = connection.cursor() cursor.execute('SELECT 1;') rows = cursor.fetchmany() - self.assertEquals(len(rows), 1) - self.assertEquals(rows[0], (1,)) + self.assertEqual(len(rows), 1) + self.assertEqual(rows[0], (1,)) def test_fetchmany_w_size(self): from google.cloud.bigquery import dbapi @@ -121,14 +121,14 @@ def test_fetchmany_w_size(self): cursor = connection.cursor() cursor.execute('SELECT a, b, c;') rows = cursor.fetchmany(size=2) - self.assertEquals(len(rows), 2) - self.assertEquals(rows[0], (1, 2, 3)) - self.assertEquals(rows[1], (4, 5, 6)) + self.assertEqual(len(rows), 2) + self.assertEqual(rows[0], (1, 2, 3)) + self.assertEqual(rows[1], (4, 5, 6)) second_page = cursor.fetchmany(size=2) - self.assertEquals(len(second_page), 1) - self.assertEquals(second_page[0], (7, 8, 9)) + self.assertEqual(len(second_page), 1) + self.assertEqual(second_page[0], (7, 8, 9)) third_page = cursor.fetchmany(size=2) - self.assertEquals(third_page, []) + self.assertEqual(third_page, []) def test_fetchmany_w_arraysize(self): from google.cloud.bigquery import dbapi @@ -143,14 +143,14 @@ def test_fetchmany_w_arraysize(self): cursor.arraysize = 2 cursor.execute('SELECT a, b, c;') rows = cursor.fetchmany() - self.assertEquals(len(rows), 2) - self.assertEquals(rows[0], (1, 2, 3)) - self.assertEquals(rows[1], (4, 5, 6)) + self.assertEqual(len(rows), 2) + self.assertEqual(rows[0], (1, 2, 3)) + self.assertEqual(rows[1], (4, 5, 6)) second_page = cursor.fetchmany() - self.assertEquals(len(second_page), 1) - self.assertEquals(second_page[0], (7, 8, 9)) + self.assertEqual(len(second_page), 1) + self.assertEqual(second_page[0], (7, 8, 9)) third_page = cursor.fetchmany() - self.assertEquals(third_page, []) + self.assertEqual(third_page, []) def test_fetchall_wo_execute_raises_error(self): from google.cloud.bigquery import dbapi @@ -165,10 +165,10 @@ def test_fetchall_w_row(self): cursor = connection.cursor() cursor.execute('SELECT 1;') self.assertIsNone(cursor.description) - self.assertEquals(cursor.rowcount, 1) + self.assertEqual(cursor.rowcount, 1) rows = cursor.fetchall() - self.assertEquals(len(rows), 1) - self.assertEquals(rows[0], (1,)) + self.assertEqual(len(rows), 1) + self.assertEqual(rows[0], (1,)) def test_execute_w_dml(self): from google.cloud.bigquery.dbapi import connect @@ -177,7 +177,7 @@ def test_execute_w_dml(self): cursor = connection.cursor() cursor.execute('DELETE FROM UserSessions WHERE user_id = \'test\';') self.assertIsNone(cursor.description) - self.assertEquals(cursor.rowcount, 12) + self.assertEqual(cursor.rowcount, 12) def test_execute_w_query(self): from google.cloud.bigquery.schema import SchemaField @@ -193,29 +193,29 @@ def test_execute_w_query(self): cursor.execute('SELECT a, b, c FROM hello_world WHERE d > 3;') # Verify the description. - self.assertEquals(len(cursor.description), 3) + self.assertEqual(len(cursor.description), 3) a_name, a_type, _, _, _, _, a_null_ok = cursor.description[0] - self.assertEquals(a_name, 'a') - self.assertEquals(a_type, 'STRING') - self.assertEquals(a_type, dbapi.STRING) + self.assertEqual(a_name, 'a') + self.assertEqual(a_type, 'STRING') + self.assertEqual(a_type, dbapi.STRING) self.assertTrue(a_null_ok) b_name, b_type, _, _, _, _, b_null_ok = cursor.description[1] - self.assertEquals(b_name, 'b') - self.assertEquals(b_type, 'STRING') - self.assertEquals(b_type, dbapi.STRING) + self.assertEqual(b_name, 'b') + self.assertEqual(b_type, 'STRING') + self.assertEqual(b_type, dbapi.STRING) self.assertFalse(b_null_ok) c_name, c_type, _, _, _, _, c_null_ok = cursor.description[2] - self.assertEquals(c_name, 'c') - self.assertEquals(c_type, 'INTEGER') - self.assertEquals(c_type, dbapi.NUMBER) + self.assertEqual(c_name, 'c') + self.assertEqual(c_type, 'INTEGER') + self.assertEqual(c_type, dbapi.NUMBER) self.assertTrue(c_null_ok) # Verify the results. - self.assertEquals(cursor.rowcount, 2) + self.assertEqual(cursor.rowcount, 2) row = cursor.fetchone() - self.assertEquals(row, ('hello', 'world', 1)) + self.assertEqual(row, ('hello', 'world', 1)) row = cursor.fetchone() - self.assertEquals(row, ('howdy', 'y\'all', 2)) + self.assertEqual(row, ('howdy', 'y\'all', 2)) row = cursor.fetchone() self.assertIsNone(row) @@ -228,7 +228,7 @@ def test_executemany_w_dml(self): 'DELETE FROM UserSessions WHERE user_id = %s;', (('test',), ('anothertest',))) self.assertIsNone(cursor.description) - self.assertEquals(cursor.rowcount, 12) + self.assertEqual(cursor.rowcount, 12) def test__format_operation_w_dict(self): from google.cloud.bigquery.dbapi import cursor @@ -238,7 +238,7 @@ def test__format_operation_w_dict(self): 'somevalue': 'hi', 'a `weird` one': 'world', }) - self.assertEquals( + self.assertEqual( formatted_operation, 'SELECT @`somevalue`, @`a \\`weird\\` one`;') def test__format_operation_w_wrong_dict(self): @@ -257,7 +257,7 @@ def test__format_operation_w_sequence(self): from google.cloud.bigquery.dbapi import cursor formatted_operation = cursor._format_operation( 'SELECT %s, %s;', ('hello', 'world')) - self.assertEquals(formatted_operation, 'SELECT ?, ?;') + self.assertEqual(formatted_operation, 'SELECT ?, ?;') def test__format_operation_w_too_short_sequence(self): from google.cloud.bigquery import dbapi From d1afb5e21ec523bfc7fa9deab8b62115d8064dc0 Mon Sep 17 00:00:00 2001 From: Son CHU Date: Tue, 18 Jul 2017 23:16:30 +0200 Subject: [PATCH 0138/2016] Add `is_nullable` method to check for `NULLABLE` mode (#3620) Resolves: #3548 --- .../google/cloud/bigquery/dbapi/cursor.py | 2 +- .../google/cloud/bigquery/schema.py | 5 +++++ .../google-cloud-bigquery/tests/unit/test_schema.py | 10 ++++++++++ 3 files changed, 16 insertions(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/dbapi/cursor.py b/packages/google-cloud-bigquery/google/cloud/bigquery/dbapi/cursor.py index 4398eec20b88..bcbb19cfd066 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/dbapi/cursor.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/dbapi/cursor.py @@ -76,7 +76,7 @@ def _set_description(self, schema): internal_size=None, precision=None, scale=None, - null_ok=field.mode == 'NULLABLE') + null_ok=field.is_nullable) for field in schema]) def _set_rowcount(self, query_results): diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/schema.py b/packages/google-cloud-bigquery/google/cloud/bigquery/schema.py index faec69f616da..edd8dd68f3bd 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/schema.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/schema.py @@ -65,6 +65,11 @@ def mode(self): """ return self._mode + @property + def is_nullable(self): + """Check whether 'mode' is 'nullable'.""" + return self._mode == 'NULLABLE' + @property def description(self): """Optional[str]: Description for the field.""" diff --git a/packages/google-cloud-bigquery/tests/unit/test_schema.py b/packages/google-cloud-bigquery/tests/unit/test_schema.py index 018736d31bc1..bf3cf2e025d1 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_schema.py +++ b/packages/google-cloud-bigquery/tests/unit/test_schema.py @@ -74,6 +74,16 @@ def test_mode_property(self): schema_field = self._make_one('again', 'FLOAT', mode=mode) self.assertIs(schema_field.mode, mode) + def test_is_nullable(self): + mode = 'NULLABLE' + schema_field = self._make_one('test', 'FLOAT', mode=mode) + self.assertTrue(schema_field.is_nullable) + + def test_is_not_nullable(self): + mode = 'REPEATED' + schema_field = self._make_one('test', 'FLOAT', mode=mode) + self.assertFalse(schema_field.is_nullable) + def test_description_property(self): description = 'It holds some data.' schema_field = self._make_one( From 0d2f02410175f7f3521e02881e3443e6732890a1 Mon Sep 17 00:00:00 2001 From: Danny Hermes Date: Wed, 19 Jul 2017 14:58:17 -0700 Subject: [PATCH 0139/2016] Fixing references to "dead" docs links. (#3631) * Fixing references to "dead" docs links. Done via: $ git grep -l 'google-cloud-auth.html' | \ > xargs sed -i s/'google-cloud-auth.html'/'core\/auth.html'/g $ git grep -l 'http\:\/\/google-cloud-python.readthedocs.io' | \ > xargs sed -i s/'http\:\/\/google-cloud-python.readthedocs.io'/\ > 'https\:\/\/google-cloud-python.readthedocs.io'/g Fixes #3531. * Fixing up other docs that were moved in #3459. --- packages/google-cloud-bigquery/README.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/packages/google-cloud-bigquery/README.rst b/packages/google-cloud-bigquery/README.rst index 97a94366a49a..bf5bc55f1fa4 100644 --- a/packages/google-cloud-bigquery/README.rst +++ b/packages/google-cloud-bigquery/README.rst @@ -26,7 +26,7 @@ possible. Check out the `Authentication section`_ in our documentation to learn more. You may also find the `authentication document`_ shared by all the ``google-cloud-*`` libraries to be helpful. -.. _Authentication section: http://google-cloud-python.readthedocs.io/en/latest/google-cloud-auth.html +.. _Authentication section: https://google-cloud-python.readthedocs.io/en/latest/core/auth.html .. _authentication document: https://github.com/GoogleCloudPlatform/gcloud-common/tree/master/authentication Using the API @@ -86,7 +86,7 @@ Perform a synchronous query See the ``google-cloud-python`` API `BigQuery documentation`_ to learn how to connect to BigQuery using this Client Library. -.. _BigQuery documentation: https://googlecloudplatform.github.io/google-cloud-python/stable/bigquery-usage.html +.. _BigQuery documentation: https://googlecloudplatform.github.io/google-cloud-python/stable/bigquery/usage.html .. |pypi| image:: https://img.shields.io/pypi/v/google-cloud-bigquery.svg :target: https://pypi.python.org/pypi/google-cloud-bigquery From 4d2e025fa5b510f30aa6ae0d7c12823ea9cfa2c9 Mon Sep 17 00:00:00 2001 From: Danny Hermes Date: Thu, 20 Jul 2017 09:33:21 -0700 Subject: [PATCH 0140/2016] Changing all pypi.python.org links to warehouse links. (#3641) Done via $ export OLD='https\:\/\/pypi.python.org\/pypi\/' $ export NEW='https\:\/\/pypi.org\/project\/' $ git grep -l ${OLD} | xargs sed -i s/${OLD}/${NEW}/g Then manually going through and adding a trailing slash to all warehouse links. (Though I did undo changes to `docs/json/`.) --- packages/google-cloud-bigquery/README.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/packages/google-cloud-bigquery/README.rst b/packages/google-cloud-bigquery/README.rst index bf5bc55f1fa4..7e4f0cb72dae 100644 --- a/packages/google-cloud-bigquery/README.rst +++ b/packages/google-cloud-bigquery/README.rst @@ -89,6 +89,6 @@ to connect to BigQuery using this Client Library. .. _BigQuery documentation: https://googlecloudplatform.github.io/google-cloud-python/stable/bigquery/usage.html .. |pypi| image:: https://img.shields.io/pypi/v/google-cloud-bigquery.svg - :target: https://pypi.python.org/pypi/google-cloud-bigquery + :target: https://pypi.org/project/google-cloud-bigquery/ .. |versions| image:: https://img.shields.io/pypi/pyversions/google-cloud-bigquery.svg - :target: https://pypi.python.org/pypi/google-cloud-bigquery + :target: https://pypi.org/project/google-cloud-bigquery/ From 9f3d9065ae16dd091bea473581a8cfba43b99286 Mon Sep 17 00:00:00 2001 From: Danny Hermes Date: Fri, 21 Jul 2017 14:41:13 -0700 Subject: [PATCH 0141/2016] Switched to google-resumable-media in BigQuery. (#3555) * Switched to google-resumable-media in BigQuery. * Upgrading google-resumable-media dependency to 0.2.1. --- .../google/cloud/bigquery/table.py | 432 ++++++--- packages/google-cloud-bigquery/nox.py | 33 +- packages/google-cloud-bigquery/setup.py | 3 + .../tests/unit/test_table.py | 872 +++++++++++------- 4 files changed, 861 insertions(+), 479 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py index 7e21e35d1fb0..f7752bb8fc36 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py @@ -15,22 +15,21 @@ """Define API Datasets.""" import datetime -import json import os import httplib2 import six +import google.auth.transport.requests +from google import resumable_media +from google.resumable_media.requests import MultipartUpload +from google.resumable_media.requests import ResumableUpload + from google.cloud._helpers import _datetime_from_microseconds from google.cloud._helpers import _millis_from_datetime from google.cloud.exceptions import NotFound from google.cloud.exceptions import make_exception from google.cloud.iterator import HTTPIterator -from google.cloud.streaming.exceptions import HttpError -from google.cloud.streaming.http_wrapper import Request -from google.cloud.streaming.http_wrapper import make_api_request -from google.cloud.streaming.transfer import RESUMABLE_UPLOAD -from google.cloud.streaming.transfer import Upload from google.cloud.bigquery.schema import SchemaField from google.cloud.bigquery._helpers import _item_to_row from google.cloud.bigquery._helpers import _rows_page_start @@ -39,6 +38,17 @@ _TABLE_HAS_NO_SCHEMA = "Table has no schema: call 'table.reload()'" _MARKER = object() +_DEFAULT_CHUNKSIZE = 1048576 # 1024 * 1024 B = 1 MB +_BASE_UPLOAD_TEMPLATE = ( + u'https://www.googleapis.com/upload/bigquery/v2/projects/' + u'{project}/jobs?uploadType=') +_MULTIPART_URL_TEMPLATE = _BASE_UPLOAD_TEMPLATE + u'multipart' +_RESUMABLE_URL_TEMPLATE = _BASE_UPLOAD_TEMPLATE + u'resumable' +_GENERIC_CONTENT_TYPE = u'*/*' +_READ_LESS_THAN_SIZE = ( + 'Size {:d} was specified but the file-like object only had ' + '{:d} bytes remaining.') +_DEFAULT_NUM_RETRIES = 6 class Table(object): @@ -815,15 +825,177 @@ def insert_data(self, return errors - @staticmethod - def _check_response_error(request, http_response): - """Helper for :meth:`upload_from_file`.""" - info = http_response.info - status = int(info['status']) - if not 200 <= status < 300: - faux_response = httplib2.Response({'status': status}) - raise make_exception(faux_response, http_response.content, - error_info=request.url) + def _make_transport(self, client): + """Make an authenticated transport with a client's credentials. + + :type client: :class:`~google.cloud.bigquery.client.Client` + :param client: The client to use. + + :rtype transport: + :class:`~google.auth.transport.requests.AuthorizedSession` + :returns: The transport (with credentials) that will + make authenticated requests. + """ + # Create a ``requests`` transport with the client's credentials. + transport = google.auth.transport.requests.AuthorizedSession( + client._credentials) + return transport + + def _initiate_resumable_upload(self, client, stream, + metadata, num_retries): + """Initiate a resumable upload. + + :type client: :class:`~google.cloud.bigquery.client.Client` + :param client: The client to use. + + :type stream: IO[bytes] + :param stream: A bytes IO object open for reading. + + :type metadata: dict + :param metadata: The metadata associated with the upload. + + :type num_retries: int + :param num_retries: Number of upload retries. (Deprecated: This + argument will be removed in a future release.) + + :rtype: tuple + :returns: + Pair of + + * The :class:`~google.resumable_media.requests.ResumableUpload` + that was created + * The ``transport`` used to initiate the upload. + """ + chunk_size = _DEFAULT_CHUNKSIZE + transport = self._make_transport(client) + headers = _get_upload_headers(client._connection.USER_AGENT) + upload_url = _RESUMABLE_URL_TEMPLATE.format(project=self.project) + upload = ResumableUpload(upload_url, chunk_size, headers=headers) + + if num_retries is not None: + upload._retry_strategy = resumable_media.RetryStrategy( + max_retries=num_retries) + + upload.initiate( + transport, stream, metadata, _GENERIC_CONTENT_TYPE, + stream_final=False) + + return upload, transport + + def _do_resumable_upload(self, client, stream, metadata, num_retries): + """Perform a resumable upload. + + :type client: :class:`~google.cloud.bigquery.client.Client` + :param client: The client to use. + + :type stream: IO[bytes] + :param stream: A bytes IO object open for reading. + + :type metadata: dict + :param metadata: The metadata associated with the upload. + + :type num_retries: int + :param num_retries: Number of upload retries. (Deprecated: This + argument will be removed in a future release.) + + :rtype: :class:`~requests.Response` + :returns: The "200 OK" response object returned after the final chunk + is uploaded. + """ + upload, transport = self._initiate_resumable_upload( + client, stream, metadata, num_retries) + + while not upload.finished: + response = upload.transmit_next_chunk(transport) + + return response + + def _do_multipart_upload(self, client, stream, metadata, + size, num_retries): + """Perform a multipart upload. + + :type client: :class:`~google.cloud.bigquery.client.Client` + :param client: The client to use. + + :type stream: IO[bytes] + :param stream: A bytes IO object open for reading. + + :type metadata: dict + :param metadata: The metadata associated with the upload. + + :type size: int + :param size: The number of bytes to be uploaded (which will be read + from ``stream``). If not provided, the upload will be + concluded once ``stream`` is exhausted (or :data:`None`). + + :type num_retries: int + :param num_retries: Number of upload retries. (Deprecated: This + argument will be removed in a future release.) + + :rtype: :class:`~requests.Response` + :returns: The "200 OK" response object returned after the multipart + upload request. + :raises: :exc:`ValueError` if the ``stream`` has fewer than ``size`` + bytes remaining. + """ + data = stream.read(size) + if len(data) < size: + msg = _READ_LESS_THAN_SIZE.format(size, len(data)) + raise ValueError(msg) + + transport = self._make_transport(client) + headers = _get_upload_headers(client._connection.USER_AGENT) + + upload_url = _MULTIPART_URL_TEMPLATE.format(project=self.project) + upload = MultipartUpload(upload_url, headers=headers) + + if num_retries is not None: + upload._retry_strategy = resumable_media.RetryStrategy( + max_retries=num_retries) + + response = upload.transmit( + transport, data, metadata, _GENERIC_CONTENT_TYPE) + + return response + + def _do_upload(self, client, stream, metadata, size, num_retries): + """Determine an upload strategy and then perform the upload. + + If ``size`` is :data:`None`, then a resumable upload will be used, + otherwise the content and the metadata will be uploaded + in a single multipart upload request. + + :type client: :class:`~google.cloud.bigquery.client.Client` + :param client: The client to use. + + :type stream: IO[bytes] + :param stream: A bytes IO object open for reading. + + :type metadata: dict + :param metadata: The metadata associated with the upload. + + :type size: int + :param size: The number of bytes to be uploaded (which will be read + from ``stream``). If not provided, the upload will be + concluded once ``stream`` is exhausted (or :data:`None`). + + :type num_retries: int + :param num_retries: Number of upload retries. (Deprecated: This + argument will be removed in a future release.) + + :rtype: dict + :returns: The parsed JSON from the "200 OK" response. This will be the + **only** response in the multipart case and it will be the + **final** response in the resumable case. + """ + if size is None: + response = self._do_resumable_upload( + client, stream, metadata, num_retries) + else: + response = self._do_multipart_upload( + client, stream, metadata, size, num_retries) + + return response.json() # pylint: disable=too-many-arguments,too-many-locals def upload_from_file(self, @@ -831,7 +1003,7 @@ def upload_from_file(self, source_format, rewind=False, size=None, - num_retries=6, + num_retries=_DEFAULT_NUM_RETRIES, allow_jagged_rows=None, allow_quoted_newlines=None, create_disposition=None, @@ -846,10 +1018,6 @@ def upload_from_file(self, job_name=None): """Upload the contents of this table from a file-like object. - The content type of the upload will either be - - The value passed in to the function (if any) - - ``text/csv``. - :type file_obj: file :param file_obj: A file handle opened in binary mode for reading. @@ -860,7 +1028,7 @@ def upload_from_file(self, :type rewind: bool :param rewind: If True, seek to the beginning of the file handle before - writing the file to Cloud Storage. + writing the file. :type size: int :param size: The number of bytes to read from the file handle. @@ -911,16 +1079,16 @@ def upload_from_file(self, :param write_disposition: job configuration option; see :meth:`google.cloud.bigquery.job.LoadJob`. - :type client: :class:`~google.cloud.storage.client.Client` or - ``NoneType`` - :param client: Optional. The client to use. If not passed, falls back - to the ``client`` stored on the current dataset. + :type client: :class:`~google.cloud.bigquery.client.Client` + :param client: (Optional) The client to use. If not passed, falls back + to the ``client`` stored on the current table. :type job_name: str :param job_name: Optional. The id of the job. Generated if not explicitly passed in. - :rtype: :class:`google.cloud.bigquery.jobs.LoadTableFromStorageJob` + :rtype: :class:`~google.cloud.bigquery.jobs.LoadTableFromStorageJob` + :returns: the job instance used to load the data (e.g., for querying status). Note that the job is already started: do not call ``job.begin()``. @@ -929,54 +1097,10 @@ def upload_from_file(self, a file opened in text mode. """ client = self._require_client(client) - connection = client._connection - content_type = 'application/octet-stream' - - # Rewind the file if desired. - if rewind: - file_obj.seek(0, os.SEEK_SET) - - mode = getattr(file_obj, 'mode', None) - - if mode is not None and mode not in ('rb', 'r+b', 'rb+'): - raise ValueError( - "Cannot upload files opened in text mode: use " - "open(filename, mode='rb') or open(filename, mode='r+b')") - - # Get the basic stats about the file. - total_bytes = size - if total_bytes is None: - if hasattr(file_obj, 'fileno'): - total_bytes = os.fstat(file_obj.fileno()).st_size - else: - raise ValueError('total bytes could not be determined. Please ' - 'pass an explicit size.') - headers = { - 'Accept': 'application/json', - 'Accept-Encoding': 'gzip, deflate', - 'User-Agent': connection.USER_AGENT, - 'content-type': 'application/json', - } - - metadata = { - 'configuration': { - 'load': { - 'sourceFormat': source_format, - 'destinationTable': { - 'projectId': self._dataset.project, - 'datasetId': self._dataset.name, - 'tableId': self.name, - } - } - } - } - - if len(self._schema) > 0: - load_config = metadata['configuration']['load'] - load_config['schema'] = { - 'fields': _build_schema_resource(self._schema) - } - + _maybe_rewind(file_obj, rewind=rewind) + _check_mode(file_obj) + metadata = _get_upload_metadata( + source_format, self._schema, self._dataset, self.name) _configure_job_metadata(metadata, allow_jagged_rows, allow_quoted_newlines, create_disposition, encoding, field_delimiter, @@ -984,47 +1108,12 @@ def upload_from_file(self, quote_character, skip_leading_rows, write_disposition, job_name) - upload = Upload(file_obj, content_type, total_bytes, - auto_transfer=False) - - url_builder = _UrlBuilder() - upload_config = _UploadConfig() - - # Base URL may change once we know simple vs. resumable. - base_url = connection.API_BASE_URL + '/upload' - path = '/projects/%s/jobs' % (self._dataset.project,) - upload_url = connection.build_api_url(api_base_url=base_url, path=path) - - # Use apitools 'Upload' facility. - request = Request(upload_url, 'POST', headers, - body=json.dumps(metadata)) - - upload.configure_request(upload_config, request, url_builder) - query_params = url_builder.query_params - base_url = connection.API_BASE_URL + '/upload' - request.url = connection.build_api_url(api_base_url=base_url, - path=path, - query_params=query_params) try: - upload.initialize_upload(request, connection.http) - except HttpError as err_response: - faux_response = httplib2.Response(err_response.response) - raise make_exception(faux_response, err_response.content, - error_info=request.url) - - if upload.strategy == RESUMABLE_UPLOAD: - http_response = upload.stream_file(use_chunks=True) - else: - http_response = make_api_request(connection.http, request, - retries=num_retries) - - self._check_response_error(request, http_response) - - response_content = http_response.content - if not isinstance(response_content, - six.string_types): # pragma: NO COVER Python3 - response_content = response_content.decode('utf-8') - return client.job_from_resource(json.loads(response_content)) + created_json = self._do_upload( + client, file_obj, metadata, size, num_retries) + return client.job_from_resource(created_json) + except resumable_media.InvalidResponse as exc: + _raise_from_invalid_response(exc) # pylint: enable=too-many-arguments,too-many-locals @@ -1122,20 +1211,109 @@ def _build_schema_resource(fields): info['fields'] = _build_schema_resource(field.fields) infos.append(info) return infos +# pylint: enable=unused-argument + +def _maybe_rewind(stream, rewind=False): + """Rewind the stream if desired. -class _UploadConfig(object): - """Faux message FBO apitools' 'configure_request'.""" - accept = ['*/*'] - max_size = None - resumable_multipart = True - resumable_path = u'/upload/bigquery/v2/projects/{project}/jobs' - simple_multipart = True - simple_path = u'/upload/bigquery/v2/projects/{project}/jobs' + :type stream: IO[bytes] + :param stream: A bytes IO object open for reading. + :type rewind: bool + :param rewind: Indicates if we should seek to the beginning of the stream. + """ + if rewind: + stream.seek(0, os.SEEK_SET) -class _UrlBuilder(object): - """Faux builder FBO apitools' 'configure_request'""" - def __init__(self): - self.query_params = {} - self._relative_path = '' + +def _check_mode(stream): + """Check that a stream was opened in read-binary mode. + + :type stream: IO[bytes] + :param stream: A bytes IO object open for reading. + + :raises: :exc:`ValueError` if the ``stream.mode`` is a valid attribute + and is not among ``rb``, ``r+b`` or ``rb+``. + """ + mode = getattr(stream, 'mode', None) + + if mode is not None and mode not in ('rb', 'r+b', 'rb+'): + raise ValueError( + "Cannot upload files opened in text mode: use " + "open(filename, mode='rb') or open(filename, mode='r+b')") + + +def _get_upload_headers(user_agent): + """Get the headers for an upload request. + + :type user_agent: str + :param user_agent: The user-agent for requests. + + :rtype: dict + :returns: The headers to be used for the request. + """ + return { + 'Accept': 'application/json', + 'Accept-Encoding': 'gzip, deflate', + 'User-Agent': user_agent, + 'content-type': 'application/json', + } + + +def _get_upload_metadata(source_format, schema, dataset, name): + """Get base metadata for creating a table. + + :type source_format: str + :param source_format: one of 'CSV' or 'NEWLINE_DELIMITED_JSON'. + job configuration option. + + :type schema: list + :param schema: List of :class:`SchemaField` associated with a table. + + :type dataset: :class:`~google.cloud.bigquery.dataset.Dataset` + :param dataset: A dataset which contains a table. + + :type name: str + :param name: The name of the table. + + :rtype: dict + :returns: The metadata dictionary. + """ + load_config = { + 'sourceFormat': source_format, + 'destinationTable': { + 'projectId': dataset.project, + 'datasetId': dataset.name, + 'tableId': name, + }, + } + if schema: + load_config['schema'] = { + 'fields': _build_schema_resource(schema), + } + + return { + 'configuration': { + 'load': load_config, + }, + } + + +def _raise_from_invalid_response(error, error_info=None): + """Re-wrap and raise an ``InvalidResponse`` exception. + + :type error: :exc:`google.resumable_media.InvalidResponse` + :param error: A caught exception from the ``google-resumable-media`` + library. + + :type error_info: str + :param error_info: (Optional) Extra information about the failed request. + + :raises: :class:`~google.cloud.exceptions.GoogleCloudError` corresponding + to the failed status code + """ + response = error.response + faux_response = httplib2.Response({'status': response.status_code}) + raise make_exception(faux_response, response.content, + error_info=error_info, use_json=False) diff --git a/packages/google-cloud-bigquery/nox.py b/packages/google-cloud-bigquery/nox.py index 19a8f5761701..989965443159 100644 --- a/packages/google-cloud-bigquery/nox.py +++ b/packages/google-cloud-bigquery/nox.py @@ -19,7 +19,9 @@ import nox -LOCAL_DEPS = ('../core/',) +LOCAL_DEPS = ( + os.path.join('..', 'core'), +) @nox.session @@ -38,10 +40,17 @@ def unit_tests(session, python_version): session.install('-e', '.') # Run py.test against the unit tests. - session.run('py.test', '--quiet', - '--cov=google.cloud.bigquery', '--cov=tests.unit', '--cov-append', - '--cov-config=.coveragerc', '--cov-report=', '--cov-fail-under=97', - 'tests/unit', + session.run( + 'py.test', + '--quiet', + '--cov=google.cloud.bigquery', + '--cov=tests.unit', + '--cov-append', + '--cov-config=.coveragerc', + '--cov-report=', + '--cov-fail-under=97', + os.path.join('tests', 'unit'), + *session.posargs ) @@ -63,11 +72,19 @@ def system_tests(session, python_version): # Install all test dependencies, then install this package into the # virutalenv's dist-packages. session.install('mock', 'pytest', *LOCAL_DEPS) - session.install('../storage/', '../test_utils/') + session.install( + os.path.join('..', 'storage'), + os.path.join('..', 'test_utils'), + ) session.install('.') # Run py.test against the system tests. - session.run('py.test', '--quiet', 'tests/system.py') + session.run( + 'py.test', + '--quiet', + os.path.join('tests', 'system.py'), + *session.posargs + ) @nox.session @@ -81,7 +98,7 @@ def lint(session): session.install('flake8', 'pylint', 'gcp-devrel-py-tools', *LOCAL_DEPS) session.install('.') - session.run('flake8', 'google/cloud/bigquery') + session.run('flake8', os.path.join('google', 'cloud', 'bigquery')) session.run('flake8', 'tests') session.run( 'gcp-devrel-py-tools', 'run-pylint', diff --git a/packages/google-cloud-bigquery/setup.py b/packages/google-cloud-bigquery/setup.py index 6d61064c88ba..eeb2d90549d8 100644 --- a/packages/google-cloud-bigquery/setup.py +++ b/packages/google-cloud-bigquery/setup.py @@ -52,6 +52,9 @@ REQUIREMENTS = [ 'google-cloud-core >= 0.25.0, < 0.26dev', + 'google-auth >= 1.0.0', + 'google-resumable-media >= 0.2.1', + 'requests >= 2.0.0', ] setup( diff --git a/packages/google-cloud-bigquery/tests/unit/test_table.py b/packages/google-cloud-bigquery/tests/unit/test_table.py index f535e8799628..502c0495f9c9 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_table.py +++ b/packages/google-cloud-bigquery/tests/unit/test_table.py @@ -12,8 +12,15 @@ # See the License for the specific language governing permissions and # limitations under the License. +import email +import io +import json import unittest +import mock +from six.moves import http_client +import pytest + class _SchemaBase(object): @@ -31,7 +38,8 @@ def _verifySchema(self, schema, resource): class TestTable(unittest.TestCase, _SchemaBase): - PROJECT = 'project' + + PROJECT = 'prahj-ekt' DS_NAME = 'dataset-name' TABLE_NAME = 'table-name' @@ -1553,312 +1561,476 @@ def _row_data(row): self.assertEqual(req['path'], '/%s' % PATH) self.assertEqual(req['data'], SENT) - def test_upload_from_file_text_mode_file_failure(self): + @mock.patch('google.auth.transport.requests.AuthorizedSession') + def test__make_transport(self, session_factory): + client = mock.Mock(spec=[u'_credentials']) + table = self._make_one(self.TABLE_NAME, None) + transport = table._make_transport(client) - class TextModeFile(object): - mode = 'r' + self.assertIs(transport, session_factory.return_value) + session_factory.assert_called_once_with(client._credentials) - conn = _Connection() - client = _Client(project=self.PROJECT, connection=conn) + @staticmethod + def _mock_requests_response(status_code, headers, content=b''): + return mock.Mock( + content=content, headers=headers, status_code=status_code, + spec=['content', 'headers', 'status_code']) + + def _mock_transport(self, status_code, headers, content=b''): + fake_transport = mock.Mock(spec=['request']) + fake_response = self._mock_requests_response( + status_code, headers, content=content) + fake_transport.request.return_value = fake_response + return fake_transport + + def _initiate_resumable_upload_helper(self, num_retries=None): + from google.resumable_media.requests import ResumableUpload + from google.cloud.bigquery.table import _DEFAULT_CHUNKSIZE + from google.cloud.bigquery.table import _GENERIC_CONTENT_TYPE + from google.cloud.bigquery.table import _get_upload_headers + from google.cloud.bigquery.table import _get_upload_metadata + + connection = _Connection() + client = _Client(self.PROJECT, connection=connection) dataset = _Dataset(client) - file_obj = TextModeFile() - table = self._make_one(self.TABLE_NAME, dataset=dataset) - with self.assertRaises(ValueError): - table.upload_from_file(file_obj, 'CSV', size=1234) + table = self._make_one(self.TABLE_NAME, dataset) - def test_upload_from_file_binary_mode_no_failure(self): - self._upload_from_file_helper(input_file_mode='r+b') + # Create mocks to be checked for doing transport. + resumable_url = 'http://test.invalid?upload_id=hey-you' + response_headers = {'location': resumable_url} + fake_transport = self._mock_transport( + http_client.OK, response_headers) + table._make_transport = mock.Mock( + return_value=fake_transport, spec=[]) + + # Create some mock arguments and call the method under test. + data = b'goodbye gudbi gootbee' + stream = io.BytesIO(data) + metadata = _get_upload_metadata( + 'CSV', table._schema, table._dataset, table.name) + upload, transport = table._initiate_resumable_upload( + client, stream, metadata, num_retries) + + # Check the returned values. + self.assertIsInstance(upload, ResumableUpload) + upload_url = ( + 'https://www.googleapis.com/upload/bigquery/v2/projects/' + + self.PROJECT + + '/jobs?uploadType=resumable') + self.assertEqual(upload.upload_url, upload_url) + expected_headers = _get_upload_headers(connection.USER_AGENT) + self.assertEqual(upload._headers, expected_headers) + self.assertFalse(upload.finished) + self.assertEqual(upload._chunk_size, _DEFAULT_CHUNKSIZE) + self.assertIs(upload._stream, stream) + self.assertIsNone(upload._total_bytes) + self.assertEqual(upload._content_type, _GENERIC_CONTENT_TYPE) + self.assertEqual(upload.resumable_url, resumable_url) + + retry_strategy = upload._retry_strategy + self.assertEqual(retry_strategy.max_sleep, 64.0) + if num_retries is None: + self.assertEqual(retry_strategy.max_cumulative_retry, 600.0) + self.assertIsNone(retry_strategy.max_retries) + else: + self.assertIsNone(retry_strategy.max_cumulative_retry) + self.assertEqual(retry_strategy.max_retries, num_retries) + self.assertIs(transport, fake_transport) + # Make sure we never read from the stream. + self.assertEqual(stream.tell(), 0) + + # Check the mocks. + table._make_transport.assert_called_once_with(client) + request_headers = expected_headers.copy() + request_headers['x-upload-content-type'] = _GENERIC_CONTENT_TYPE + fake_transport.request.assert_called_once_with( + 'POST', + upload_url, + data=json.dumps(metadata).encode('utf-8'), + headers=request_headers, + ) - def test_upload_from_file_size_failure(self): - conn = _Connection() - client = _Client(project=self.PROJECT, connection=conn) - dataset = _Dataset(client) - file_obj = object() - table = self._make_one(self.TABLE_NAME, dataset=dataset) - with self.assertRaises(ValueError): - table.upload_from_file(file_obj, 'CSV', size=None) + def test__initiate_resumable_upload(self): + self._initiate_resumable_upload_helper() - def test_upload_from_file_multipart_w_400(self): - import csv - import datetime - from six.moves.http_client import BAD_REQUEST - from google.cloud._testing import _NamedTemporaryFile - from google.cloud._helpers import UTC - from google.cloud.exceptions import BadRequest + def test__initiate_resumable_upload_with_retry(self): + self._initiate_resumable_upload_helper(num_retries=11) - WHEN_TS = 1437767599.006 - WHEN = datetime.datetime.utcfromtimestamp(WHEN_TS).replace( - tzinfo=UTC) - response = {'status': BAD_REQUEST} - conn = _Connection( - (response, b'{}'), - ) - client = _Client(project=self.PROJECT, connection=conn) + def _do_multipart_upload_success_helper( + self, get_boundary, num_retries=None): + from google.cloud.bigquery.table import _get_upload_headers + from google.cloud.bigquery.table import _get_upload_metadata + + connection = _Connection() + client = _Client(self.PROJECT, connection=connection) dataset = _Dataset(client) - table = self._make_one(self.TABLE_NAME, dataset=dataset) + table = self._make_one(self.TABLE_NAME, dataset) - with _NamedTemporaryFile() as temp: - with open(temp.name, 'w') as file_obj: - writer = csv.writer(file_obj) - writer.writerow(('full_name', 'age', 'joined')) - writer.writerow(('Phred Phlyntstone', 32, WHEN)) + # Create mocks to be checked for doing transport. + fake_transport = self._mock_transport(http_client.OK, {}) + table._make_transport = mock.Mock(return_value=fake_transport, spec=[]) + + # Create some mock arguments. + data = b'Bzzzz-zap \x00\x01\xf4' + stream = io.BytesIO(data) + metadata = _get_upload_metadata( + 'CSV', table._schema, table._dataset, table.name) + size = len(data) + response = table._do_multipart_upload( + client, stream, metadata, size, num_retries) + + # Check the mocks and the returned value. + self.assertIs(response, fake_transport.request.return_value) + self.assertEqual(stream.tell(), size) + table._make_transport.assert_called_once_with(client) + get_boundary.assert_called_once_with() + + upload_url = ( + 'https://www.googleapis.com/upload/bigquery/v2/projects/' + + self.PROJECT + + '/jobs?uploadType=multipart') + payload = ( + b'--==0==\r\n' + + b'content-type: application/json; charset=UTF-8\r\n\r\n' + + json.dumps(metadata).encode('utf-8') + b'\r\n' + + b'--==0==\r\n' + + b'content-type: */*\r\n\r\n' + + data + b'\r\n' + + b'--==0==--') + headers = _get_upload_headers(connection.USER_AGENT) + headers['content-type'] = b'multipart/related; boundary="==0=="' + fake_transport.request.assert_called_once_with( + 'POST', + upload_url, + data=payload, + headers=headers, + ) - with open(temp.name, 'rb') as file_obj: - with self.assertRaises(BadRequest): - table.upload_from_file( - file_obj, 'CSV', rewind=True) + @mock.patch(u'google.resumable_media._upload.get_boundary', + return_value=b'==0==') + def test__do_multipart_upload(self, get_boundary): + self._do_multipart_upload_success_helper(get_boundary) - def _upload_from_file_helper(self, **kw): - import csv - import datetime - from six.moves.http_client import OK - from google.cloud._helpers import UTC - from google.cloud._testing import _NamedTemporaryFile - from google.cloud.bigquery.table import SchemaField + @mock.patch(u'google.resumable_media._upload.get_boundary', + return_value=b'==0==') + def test__do_multipart_upload_with_retry(self, get_boundary): + self._do_multipart_upload_success_helper(get_boundary, num_retries=8) - WHEN_TS = 1437767599.006 - WHEN = datetime.datetime.utcfromtimestamp(WHEN_TS).replace( - tzinfo=UTC) - PATH = 'projects/%s/jobs' % (self.PROJECT,) - response = {'status': OK} - conn = _Connection( - (response, b'{}'), - ) - client = _Client(project=self.PROJECT, connection=conn) - expected_job = object() - if 'client' in kw: - kw['client']._job = expected_job - else: - client._job = expected_job - input_file_mode = kw.pop('input_file_mode', 'rb') - dataset = _Dataset(client) - full_name = SchemaField('full_name', 'STRING', mode='REQUIRED') - age = SchemaField('age', 'INTEGER', mode='REQUIRED') - joined = SchemaField('joined', 'TIMESTAMP', mode='NULLABLE') - table = self._make_one(self.TABLE_NAME, dataset=dataset, - schema=[full_name, age, joined]) - ROWS = [ - ('Phred Phlyntstone', 32, WHEN), - ('Bharney Rhubble', 33, WHEN + datetime.timedelta(seconds=1)), - ('Wylma Phlyntstone', 29, WHEN + datetime.timedelta(seconds=2)), - ('Bhettye Rhubble', 27, None), - ] - with _NamedTemporaryFile() as temp: - with open(temp.name, 'w') as file_obj: - writer = csv.writer(file_obj) - writer.writerow(('full_name', 'age', 'joined')) - writer.writerows(ROWS) - - with open(temp.name, input_file_mode) as file_obj: - BODY = file_obj.read() - explicit_size = kw.pop('_explicit_size', False) - if explicit_size: - kw['size'] = len(BODY) - job = table.upload_from_file( - file_obj, 'CSV', rewind=True, **kw) - - self.assertIs(job, expected_job) - return conn.http._requested, PATH, BODY - - def test_upload_from_file_w_bound_client_multipart(self): - import json - from six.moves.urllib.parse import parse_qsl - from six.moves.urllib.parse import urlsplit - from google.cloud._helpers import _to_bytes - - requested, PATH, BODY = self._upload_from_file_helper() - parse_chunk = _email_chunk_parser() - - self.assertEqual(len(requested), 1) - req = requested[0] - self.assertEqual(req['method'], 'POST') - uri = req['uri'] - scheme, netloc, path, qs, _ = urlsplit(uri) - self.assertEqual(scheme, 'http') - self.assertEqual(netloc, 'example.com') - self.assertEqual(path, '/%s' % PATH) - self.assertEqual(dict(parse_qsl(qs)), - {'uploadType': 'multipart'}) - - ctype, boundary = [x.strip() - for x in req['headers']['content-type'].split(';')] - self.assertEqual(ctype, 'multipart/related') - self.assertTrue(boundary.startswith('boundary="==')) - self.assertTrue(boundary.endswith('=="')) - - divider = b'--' + _to_bytes(boundary[len('boundary="'):-1]) - chunks = req['body'].split(divider)[1:-1] # discard prolog / epilog - self.assertEqual(len(chunks), 2) - - text_msg = parse_chunk(chunks[0].strip()) - self.assertEqual(dict(text_msg._headers), - {'Content-Type': 'application/json', - 'MIME-Version': '1.0'}) - metadata = json.loads(text_msg._payload) - load_config = metadata['configuration']['load'] - DESTINATION_TABLE = { - 'projectId': self.PROJECT, - 'datasetId': self.DS_NAME, - 'tableId': self.TABLE_NAME, - } - self.assertEqual(load_config['destinationTable'], DESTINATION_TABLE) - self.assertEqual(load_config['sourceFormat'], 'CSV') - - app_msg = parse_chunk(chunks[1].strip()) - self.assertEqual(dict(app_msg._headers), - {'Content-Type': 'application/octet-stream', - 'Content-Transfer-Encoding': 'binary', - 'MIME-Version': '1.0'}) - body = BODY.decode('ascii').rstrip() - body_lines = [line.strip() for line in body.splitlines()] - payload_lines = app_msg._payload.rstrip().splitlines() - self.assertEqual(payload_lines, body_lines) - - def test_upload_from_file_resumable_with_400(self): - import csv - import datetime - import mock - from six.moves.http_client import BAD_REQUEST - from google.cloud.exceptions import BadRequest - from google.cloud._helpers import UTC - from google.cloud._testing import _NamedTemporaryFile +class TestTableUpload(object): + # NOTE: This is a "partner" to `TestTable` meant to test some of the + # "upload" portions of `Table`. It also uses `pytest`-style tests + # rather than `unittest`-style. - WHEN_TS = 1437767599.006 - WHEN = datetime.datetime.utcfromtimestamp(WHEN_TS).replace( - tzinfo=UTC) - initial_response = {'status': BAD_REQUEST} - conn = _Connection( - (initial_response, b'{}'), - ) - client = _Client(project=self.PROJECT, connection=conn) + @staticmethod + def _make_table(): + from google.cloud.bigquery import _http + from google.cloud.bigquery import client + from google.cloud.bigquery import dataset + from google.cloud.bigquery import table - class _UploadConfig(object): - accept = ['*/*'] - max_size = None - resumable_multipart = True - resumable_path = u'/upload/bigquery/v2/projects/{project}/jobs' - simple_multipart = True - simple_path = u'' # force resumable - dataset = _Dataset(client) - table = self._make_one(self.TABLE_NAME, dataset=dataset) + connection = mock.create_autospec(_http.Connection, instance=True) + client = mock.create_autospec(client.Client, instance=True) + client._connection = connection + client._credentials = mock.sentinel.credentials + client.project = 'project_id' - with mock.patch('google.cloud.bigquery.table._UploadConfig', - new=_UploadConfig): - with _NamedTemporaryFile() as temp: - with open(temp.name, 'w') as file_obj: - writer = csv.writer(file_obj) - writer.writerow(('full_name', 'age', 'joined')) - writer.writerow(('Phred Phlyntstone', 32, WHEN)) - - with open(temp.name, 'rb') as file_obj: - with self.assertRaises(BadRequest): - table.upload_from_file( - file_obj, 'CSV', rewind=True) - - # pylint: disable=too-many-statements - def test_upload_from_file_w_explicit_client_resumable(self): - import json - import mock - from six.moves.http_client import OK - from six.moves.urllib.parse import parse_qsl - from six.moves.urllib.parse import urlsplit - - UPLOAD_PATH = 'https://example.com/upload/test' - initial_response = {'status': OK, 'location': UPLOAD_PATH} - upload_response = {'status': OK} - conn = _Connection( - (initial_response, b'{}'), - (upload_response, b'{}'), - ) - client = _Client(project=self.PROJECT, connection=conn) + dataset = dataset.Dataset('test_dataset', client) + table = table.Table('test_table', dataset) - class _UploadConfig(object): - accept = ['*/*'] - max_size = None - resumable_multipart = True - resumable_path = u'/upload/bigquery/v2/projects/{project}/jobs' - simple_multipart = True - simple_path = u'' # force resumable - - with mock.patch('google.cloud.bigquery.table._UploadConfig', - new=_UploadConfig): - orig_requested, PATH, BODY = self._upload_from_file_helper( - allow_jagged_rows=False, - allow_quoted_newlines=False, - create_disposition='CREATE_IF_NEEDED', - encoding='utf8', - field_delimiter=',', - ignore_unknown_values=False, - max_bad_records=0, - quote_character='"', - skip_leading_rows=1, - write_disposition='WRITE_APPEND', - client=client, - _explicit_size=True) - - self.assertEqual(len(orig_requested), 0) - - requested = conn.http._requested - self.assertEqual(len(requested), 2) - req = requested[0] - self.assertEqual(req['method'], 'POST') - uri = req['uri'] - scheme, netloc, path, qs, _ = urlsplit(uri) - self.assertEqual(scheme, 'http') - self.assertEqual(netloc, 'example.com') - self.assertEqual(path, '/%s' % PATH) - self.assertEqual(dict(parse_qsl(qs)), - {'uploadType': 'resumable'}) - - self.assertEqual(req['headers']['content-type'], 'application/json') - metadata = json.loads(req['body']) - load_config = metadata['configuration']['load'] - DESTINATION_TABLE = { - 'projectId': self.PROJECT, - 'datasetId': self.DS_NAME, - 'tableId': self.TABLE_NAME, + return table + + @staticmethod + def _make_response(status_code, content='', headers={}): + """Make a mock HTTP response.""" + import requests + response = mock.create_autospec(requests.Response, instance=True) + response.content = content.encode('utf-8') + response.headers = headers + response.status_code = status_code + return response + + @classmethod + def _make_do_upload_patch(cls, table, method, side_effect=None): + """Patches the low-level upload helpers.""" + if side_effect is None: + side_effect = [cls._make_response( + http_client.OK, + json.dumps({}), + {'Content-Type': 'application/json'})] + return mock.patch.object( + table, method, side_effect=side_effect, autospec=True) + + EXPECTED_CONFIGURATION = { + 'configuration': { + 'load': { + 'sourceFormat': 'CSV', + 'destinationTable': { + 'projectId': 'project_id', + 'datasetId': 'test_dataset', + 'tableId': 'test_table' + } + } + } + } + + @staticmethod + def _make_file_obj(): + return io.BytesIO(b'hello, is it me you\'re looking for?') + + # High-level tests + + def test_upload_from_file_resumable(self): + import google.cloud.bigquery.table + + table = self._make_table() + file_obj = self._make_file_obj() + + do_upload_patch = self._make_do_upload_patch( + table, '_do_resumable_upload') + with do_upload_patch as do_upload: + table.upload_from_file(file_obj, source_format='CSV') + + do_upload.assert_called_once_with( + table._dataset._client, + file_obj, + self.EXPECTED_CONFIGURATION, + google.cloud.bigquery.table._DEFAULT_NUM_RETRIES) + + def test_upload_file_resumable_metadata(self): + table = self._make_table() + file_obj = self._make_file_obj() + + config_args = { + 'source_format': 'CSV', + 'allow_jagged_rows': False, + 'allow_quoted_newlines': False, + 'create_disposition': 'CREATE_IF_NEEDED', + 'encoding': 'utf8', + 'field_delimiter': ',', + 'ignore_unknown_values': False, + 'max_bad_records': 0, + 'quote_character': '"', + 'skip_leading_rows': 1, + 'write_disposition': 'WRITE_APPEND', + 'job_name': 'oddjob' } - self.assertEqual(load_config['destinationTable'], DESTINATION_TABLE) - self.assertEqual(load_config['sourceFormat'], 'CSV') - self.assertEqual(load_config['allowJaggedRows'], False) - self.assertEqual(load_config['allowQuotedNewlines'], False) - self.assertEqual(load_config['createDisposition'], 'CREATE_IF_NEEDED') - self.assertEqual(load_config['encoding'], 'utf8') - self.assertEqual(load_config['fieldDelimiter'], ',') - self.assertEqual(load_config['ignoreUnknownValues'], False) - self.assertEqual(load_config['maxBadRecords'], 0) - self.assertEqual(load_config['quote'], '"') - self.assertEqual(load_config['skipLeadingRows'], 1) - self.assertEqual(load_config['writeDisposition'], 'WRITE_APPEND') - - req = requested[1] - self.assertEqual(req['method'], 'PUT') - self.assertEqual(req['uri'], UPLOAD_PATH) - headers = req['headers'] - length = len(BODY) - self.assertEqual(headers['Content-Type'], 'application/octet-stream') - self.assertEqual(headers['Content-Range'], - 'bytes 0-%d/%d' % (length - 1, length)) - self.assertEqual(headers['content-length'], '%d' % (length,)) - self.assertEqual(req['body'], BODY) - # pylint: enable=too-many-statements - - def test_upload_from_file_w_jobid(self): - import json - from google.cloud._helpers import _to_bytes - - requested, PATH, BODY = self._upload_from_file_helper(job_name='foo') - parse_chunk = _email_chunk_parser() - req = requested[0] - ctype, boundary = [x.strip() - for x in req['headers']['content-type'].split(';')] - divider = b'--' + _to_bytes(boundary[len('boundary="'):-1]) - chunks = req['body'].split(divider)[1:-1] # discard prolog / epilog - text_msg = parse_chunk(chunks[0].strip()) - metadata = json.loads(text_msg._payload) - load_config = metadata['configuration']['load'] - self.assertEqual(load_config['jobReference'], {'jobId': 'foo'}) + + expected_config = { + 'configuration': { + 'load': { + 'sourceFormat': config_args['source_format'], + 'destinationTable': { + 'projectId': table._dataset._client.project, + 'datasetId': table.dataset_name, + 'tableId': table.name + }, + 'allowJaggedRows': config_args['allow_jagged_rows'], + 'allowQuotedNewlines': + config_args['allow_quoted_newlines'], + 'createDisposition': config_args['create_disposition'], + 'encoding': config_args['encoding'], + 'fieldDelimiter': config_args['field_delimiter'], + 'ignoreUnknownValues': + config_args['ignore_unknown_values'], + 'maxBadRecords': config_args['max_bad_records'], + 'quote': config_args['quote_character'], + 'skipLeadingRows': config_args['skip_leading_rows'], + 'writeDisposition': config_args['write_disposition'], + 'jobReference': {'jobId': config_args['job_name']} + } + } + } + + do_upload_patch = self._make_do_upload_patch( + table, '_do_resumable_upload') + with do_upload_patch as do_upload: + table.upload_from_file( + file_obj, **config_args) + + do_upload.assert_called_once_with( + table._dataset._client, + file_obj, + expected_config, + mock.ANY) + + def test_upload_from_file_multipart(self): + import google.cloud.bigquery.table + + table = self._make_table() + file_obj = self._make_file_obj() + file_obj_size = 10 + + do_upload_patch = self._make_do_upload_patch( + table, '_do_multipart_upload') + with do_upload_patch as do_upload: + table.upload_from_file( + file_obj, source_format='CSV', size=file_obj_size) + + do_upload.assert_called_once_with( + table._dataset._client, + file_obj, + self.EXPECTED_CONFIGURATION, + file_obj_size, + google.cloud.bigquery.table._DEFAULT_NUM_RETRIES) + + def test_upload_from_file_with_retries(self): + table = self._make_table() + file_obj = self._make_file_obj() + num_retries = 20 + + do_upload_patch = self._make_do_upload_patch( + table, '_do_resumable_upload') + with do_upload_patch as do_upload: + table.upload_from_file( + file_obj, source_format='CSV', num_retries=num_retries) + + do_upload.assert_called_once_with( + table._dataset._client, + file_obj, + self.EXPECTED_CONFIGURATION, + num_retries) + + def test_upload_from_file_with_rewind(self): + table = self._make_table() + file_obj = self._make_file_obj() + file_obj.seek(2) + + with self._make_do_upload_patch(table, '_do_resumable_upload'): + table.upload_from_file( + file_obj, source_format='CSV', rewind=True) + + assert file_obj.tell() == 0 + + def test_upload_from_file_failure(self): + from google.resumable_media import InvalidResponse + from google.cloud import exceptions + + table = self._make_table() + file_obj = self._make_file_obj() + + response = self._make_response( + content='Someone is already in this spot.', + status_code=http_client.CONFLICT) + + do_upload_patch = self._make_do_upload_patch( + table, '_do_resumable_upload', + side_effect=InvalidResponse(response)) + + with do_upload_patch, pytest.raises(exceptions.Conflict) as exc_info: + table.upload_from_file( + file_obj, source_format='CSV', rewind=True) + + assert exc_info.value.message == response.content.decode('utf-8') + assert exc_info.value.errors == [] + + def test_upload_from_file_bad_mode(self): + table = self._make_table() + file_obj = mock.Mock(spec=['mode']) + file_obj.mode = 'x' + + with pytest.raises(ValueError): + table.upload_from_file( + file_obj, source_format='CSV',) + + # Low-level tests + + @classmethod + def _make_resumable_upload_responses(cls, size): + """Make a series of responses for a successful resumable upload.""" + from google import resumable_media + + resumable_url = 'http://test.invalid?upload_id=and-then-there-was-1' + initial_response = cls._make_response( + http_client.OK, '', {'location': resumable_url}) + data_response = cls._make_response( + resumable_media.PERMANENT_REDIRECT, + '', {'range': 'bytes=0-{:d}'.format(size - 1)}) + final_response = cls._make_response( + http_client.OK, + json.dumps({'size': size}), + {'Content-Type': 'application/json'}) + return [initial_response, data_response, final_response] + + @staticmethod + def _make_transport_patch(table, responses=None): + """Patch a table's _make_transport method to return given responses.""" + import google.auth.transport.requests + + transport = mock.create_autospec( + google.auth.transport.requests.AuthorizedSession, instance=True) + transport.request.side_effect = responses + return mock.patch.object( + table, '_make_transport', return_value=transport, autospec=True) + + def test__do_resumable_upload(self): + table = self._make_table() + file_obj = self._make_file_obj() + file_obj_len = len(file_obj.getvalue()) + responses = self._make_resumable_upload_responses(file_obj_len) + + with self._make_transport_patch(table, responses) as transport: + result = table._do_resumable_upload( + table._dataset._client, + file_obj, + self.EXPECTED_CONFIGURATION, + None) + + content = result.content.decode('utf-8') + assert json.loads(content) == {'size': file_obj_len} + + # Verify that configuration data was passed in with the initial + # request. + transport.return_value.request.assert_any_call( + 'POST', + mock.ANY, + data=json.dumps(self.EXPECTED_CONFIGURATION).encode('utf-8'), + headers=mock.ANY) + + def test__do_multipart_upload(self): + table = self._make_table() + file_obj = self._make_file_obj() + file_obj_len = len(file_obj.getvalue()) + responses = [self._make_response(http_client.OK)] + + with self._make_transport_patch(table, responses) as transport: + table._do_multipart_upload( + table._dataset._client, + file_obj, + self.EXPECTED_CONFIGURATION, + file_obj_len, + None) + + # Verify that configuration data was passed in with the initial + # request. + request_args = transport.return_value.request.mock_calls[0][2] + request_data = request_args['data'].decode('utf-8') + request_headers = request_args['headers'] + + request_content = email.message_from_string( + 'Content-Type: {}\r\n{}'.format( + request_headers['content-type'].decode('utf-8'), + request_data)) + + # There should be two payloads: the configuration and the binary daya. + configuration_data = request_content.get_payload(0).get_payload() + binary_data = request_content.get_payload(1).get_payload() + + assert json.loads(configuration_data) == self.EXPECTED_CONFIGURATION + assert binary_data.encode('utf-8') == file_obj.getvalue() + + def test__do_multipart_upload_wrong_size(self): + table = self._make_table() + file_obj = self._make_file_obj() + file_obj_len = len(file_obj.getvalue()) + + with pytest.raises(ValueError): + table._do_multipart_upload( + table._dataset._client, + file_obj, + {}, + file_obj_len+1, + None) class Test_parse_schema_resource(unittest.TestCase, _SchemaBase): @@ -1974,6 +2146,70 @@ def test_w_subfields(self): 'mode': 'REQUIRED'}]}) +class Test__get_upload_metadata(unittest.TestCase): + + @staticmethod + def _call_fut(source_format, schema, dataset, name): + from google.cloud.bigquery.table import _get_upload_metadata + + return _get_upload_metadata(source_format, schema, dataset, name) + + def test_empty_schema(self): + source_format = 'AVRO' + dataset = mock.Mock(project='prediction', spec=['name', 'project']) + dataset.name = 'market' # mock.Mock() treats `name` specially. + table_name = 'chairs' + metadata = self._call_fut(source_format, [], dataset, table_name) + + expected = { + 'configuration': { + 'load': { + 'sourceFormat': source_format, + 'destinationTable': { + 'projectId': dataset.project, + 'datasetId': dataset.name, + 'tableId': table_name, + }, + }, + }, + } + self.assertEqual(metadata, expected) + + def test_with_schema(self): + from google.cloud.bigquery.table import SchemaField + + source_format = 'CSV' + full_name = SchemaField('full_name', 'STRING', mode='REQUIRED') + dataset = mock.Mock(project='blind', spec=['name', 'project']) + dataset.name = 'movie' # mock.Mock() treats `name` specially. + table_name = 'teebull-neem' + metadata = self._call_fut( + source_format, [full_name], dataset, table_name) + + expected = { + 'configuration': { + 'load': { + 'sourceFormat': source_format, + 'destinationTable': { + 'projectId': dataset.project, + 'datasetId': dataset.name, + 'tableId': table_name, + }, + 'schema': { + 'fields': [ + { + 'name': full_name.name, + 'type': full_name.field_type, + 'mode': full_name.mode, + }, + ], + }, + }, + }, + } + self.assertEqual(metadata, expected) + + class _Client(object): _query_results = () @@ -1982,9 +2218,6 @@ def __init__(self, project='project', connection=None): self.project = project self._connection = connection - def job_from_resource(self, resource): # pylint: disable=unused-argument - return self._job - def run_sync_query(self, query): return _Query(query, self) @@ -2016,37 +2249,14 @@ def project(self): return self._client.project -class _Responder(object): - - def __init__(self, *responses): - self._responses = responses[:] - self._requested = [] - - def _respond(self, **kw): - self._requested.append(kw) - response, self._responses = self._responses[0], self._responses[1:] - return response - - -class _HTTP(_Responder): - - connections = {} # For google-apitools debugging. - - def request(self, uri, method, headers, body, **kw): - if hasattr(body, 'read'): - body = body.read() - return self._respond(uri=uri, method=method, headers=headers, - body=body, **kw) - - -class _Connection(_Responder): +class _Connection(object): API_BASE_URL = 'http://example.com' USER_AGENT = 'testing 1.2.3' def __init__(self, *responses): - super(_Connection, self).__init__(*responses) - self.http = _HTTP(*responses) + self._responses = responses[:] + self._requested = [] def api_request(self, **kw): from google.cloud.exceptions import NotFound @@ -2059,29 +2269,3 @@ def api_request(self, **kw): raise NotFound('miss') else: return response - - def build_api_url(self, path, query_params=None, - api_base_url=API_BASE_URL): - from six.moves.urllib.parse import urlencode - from six.moves.urllib.parse import urlsplit - from six.moves.urllib.parse import urlunsplit - - # Mimic the build_api_url interface. - qs = urlencode(query_params or {}) - scheme, netloc, _, _, _ = urlsplit(api_base_url) - return urlunsplit((scheme, netloc, path, qs, '')) - - -def _email_chunk_parser(): - import six - - if six.PY3: # pragma: NO COVER Python3 - from email.parser import BytesParser - - parser = BytesParser() - return parser.parsebytes - else: - from email.parser import Parser - - parser = Parser() - return parser.parsestr From 59e59cbd34be473a4493211a9f76b0dc79edd89b Mon Sep 17 00:00:00 2001 From: Jon Wayne Parrott Date: Fri, 21 Jul 2017 15:42:38 -0700 Subject: [PATCH 0142/2016] Add Future interface to BigQuery jobs (#3626) * Add future interface to bigquery Jobs. * Make QueryJob return QueryResults from result() * Deprecate QueryJob.results() --- .../google/cloud/bigquery/job.py | 201 ++++++++++++++++-- .../google-cloud-bigquery/tests/system.py | 10 + .../tests/unit/test_job.py | 102 ++++++++- 3 files changed, 291 insertions(+), 22 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/job.py b/packages/google-cloud-bigquery/google/cloud/bigquery/job.py index 4f791bdbea0c..35a423b755b9 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/job.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/job.py @@ -14,8 +14,14 @@ """Define API Jobs.""" +import collections +import threading +import warnings + import six +from six.moves import http_client +from google.cloud import exceptions from google.cloud.exceptions import NotFound from google.cloud._helpers import _datetime_from_microseconds from google.cloud.bigquery.dataset import Dataset @@ -27,6 +33,60 @@ from google.cloud.bigquery._helpers import UDFResourcesProperty from google.cloud.bigquery._helpers import _EnumProperty from google.cloud.bigquery._helpers import _TypedProperty +import google.cloud.future.base + +_DONE_STATE = 'DONE' +_STOPPED_REASON = 'stopped' + +_ERROR_REASON_TO_EXCEPTION = { + 'accessDenied': http_client.FORBIDDEN, + 'backendError': http_client.INTERNAL_SERVER_ERROR, + 'billingNotEnabled': http_client.FORBIDDEN, + 'billingTierLimitExceeded': http_client.BAD_REQUEST, + 'blocked': http_client.FORBIDDEN, + 'duplicate': http_client.CONFLICT, + 'internalError': http_client.INTERNAL_SERVER_ERROR, + 'invalid': http_client.BAD_REQUEST, + 'invalidQuery': http_client.BAD_REQUEST, + 'notFound': http_client.NOT_FOUND, + 'notImplemented': http_client.NOT_IMPLEMENTED, + 'quotaExceeded': http_client.FORBIDDEN, + 'rateLimitExceeded': http_client.FORBIDDEN, + 'resourceInUse': http_client.BAD_REQUEST, + 'resourcesExceeded': http_client.BAD_REQUEST, + 'responseTooLarge': http_client.FORBIDDEN, + 'stopped': http_client.OK, + 'tableUnavailable': http_client.BAD_REQUEST, +} + +_FakeResponse = collections.namedtuple('_FakeResponse', ['status']) + + +def _error_result_to_exception(error_result): + """Maps BigQuery error reasons to an exception. + + The reasons and their matching HTTP status codes are documented on + the `troubleshooting errors`_ page. + + .. _troubleshooting errors: https://cloud.google.com/bigquery\ + /troubleshooting-errors + + :type error_result: Mapping[str, str] + :param error_result: The error result from BigQuery. + + :rtype google.cloud.exceptions.GoogleCloudError: + :returns: The mapped exception. + """ + reason = error_result.get('reason') + status_code = _ERROR_REASON_TO_EXCEPTION.get( + reason, http_client.INTERNAL_SERVER_ERROR) + # make_exception expects an httplib2 response object. + fake_response = _FakeResponse(status=status_code) + return exceptions.make_exception( + fake_response, + error_result.get('message', ''), + error_info=error_result, + use_json=False) class Compression(_EnumProperty): @@ -82,16 +142,23 @@ class WriteDisposition(_EnumProperty): ALLOWED = (WRITE_APPEND, WRITE_TRUNCATE, WRITE_EMPTY) -class _BaseJob(object): - """Base class for jobs. +class _AsyncJob(google.cloud.future.base.PollingFuture): + """Base class for asynchronous jobs. + + :type name: str + :param name: the name of the job :type client: :class:`google.cloud.bigquery.client.Client` :param client: A client which holds credentials and project configuration for the dataset (which requires a project). """ - def __init__(self, client): + def __init__(self, name, client): + super(_AsyncJob, self).__init__() + self.name = name self._client = client self._properties = {} + self._result_set = False + self._completion_lock = threading.Lock() @property def project(self): @@ -117,21 +184,6 @@ def _require_client(self, client): client = self._client return client - -class _AsyncJob(_BaseJob): - """Base class for asynchronous jobs. - - :type name: str - :param name: the name of the job - - :type client: :class:`google.cloud.bigquery.client.Client` - :param client: A client which holds credentials and project configuration - for the dataset (which requires a project). - """ - def __init__(self, name, client): - super(_AsyncJob, self).__init__(client) - self.name = name - @property def job_type(self): """Type of job @@ -273,6 +325,9 @@ def _set_properties(self, api_response): self._properties.clear() self._properties.update(cleaned) + # For Future interface + self._set_future_result() + @classmethod def _get_resource_config(cls, resource): """Helper for :meth:`from_api_repr` @@ -345,7 +400,7 @@ def exists(self, client=None): return True def reload(self, client=None): - """API call: refresh job properties via a GET request + """API call: refresh job properties via a GET request. See https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs/get @@ -371,12 +426,85 @@ def cancel(self, client=None): ``NoneType`` :param client: the client to use. If not passed, falls back to the ``client`` stored on the current dataset. + + :rtype: bool + :returns: Boolean indicating that the cancel request was sent. """ client = self._require_client(client) api_response = client._connection.api_request( method='POST', path='%s/cancel' % (self.path,)) self._set_properties(api_response['job']) + # The Future interface requires that we return True if the *attempt* + # to cancel was successful. + return True + + # The following methods implement the PollingFuture interface. Note that + # the methods above are from the pre-Future interface and are left for + # compatibility. The only "overloaded" method is :meth:`cancel`, which + # satisfies both interfaces. + + def _set_future_result(self): + """Set the result or exception from the job if it is complete.""" + # This must be done in a lock to prevent the polling thread + # and main thread from both executing the completion logic + # at the same time. + with self._completion_lock: + # If the operation isn't complete or if the result has already been + # set, do not call set_result/set_exception again. + # Note: self._result_set is set to True in set_result and + # set_exception, in case those methods are invoked directly. + if self.state != _DONE_STATE or self._result_set: + return + + if self.error_result is not None: + exception = _error_result_to_exception(self.error_result) + self.set_exception(exception) + else: + self.set_result(self) + + def done(self): + """Refresh the job and checks if it is complete. + + :rtype: bool + :returns: True if the job is complete, False otherwise. + """ + # Do not refresh is the state is already done, as the job will not + # change once complete. + if self.state != _DONE_STATE: + self.reload() + return self.state == _DONE_STATE + + def result(self, timeout=None): + """Start the job and wait for it to complete and get the result. + + :type timeout: int + :param timeout: How long to wait for job to complete before raising + a :class:`TimeoutError`. + + :rtype: _AsyncJob + :returns: This instance. + + :raises: :class:`~google.cloud.exceptions.GoogleCloudError` if the job + failed or :class:`TimeoutError` if the job did not complete in the + given timeout. + """ + if self.state is None: + self.begin() + return super(_AsyncJob, self).result(timeout=timeout) + + def cancelled(self): + """Check if the job has been cancelled. + + This always returns False. It's not possible to check if a job was + cancelled in the API. This method is here to satisfy the interface + for :class:`google.cloud.future.Future`. + + :rtype: bool + :returns: False + """ + return (self.error_result is not None + and self.error_result.get('reason') == _STOPPED_REASON) class _LoadConfiguration(object): @@ -1127,7 +1255,7 @@ def from_api_repr(cls, resource, client): job._set_properties(resource) return job - def results(self): + def query_results(self): """Construct a QueryResults instance, bound to this job. :rtype: :class:`~google.cloud.bigquery.query.QueryResults` @@ -1135,3 +1263,36 @@ def results(self): """ from google.cloud.bigquery.query import QueryResults return QueryResults.from_query_job(self) + + def results(self): + """DEPRECATED. + + This method is deprecated. Use :meth:`query_results` or :meth:`result`. + + Construct a QueryResults instance, bound to this job. + + :rtype: :class:`~google.cloud.bigquery.query.QueryResults` + :returns: The query results. + """ + warnings.warn( + 'QueryJob.results() is deprecated. Please use query_results() or ' + 'result().', DeprecationWarning) + return self.query_results() + + def result(self, timeout=None): + """Start the job and wait for it to complete and get the result. + + :type timeout: int + :param timeout: How long to wait for job to complete before raising + a :class:`TimeoutError`. + + :rtype: :class:`~google.cloud.bigquery.query.QueryResults` + :returns: The query results. + + :raises: :class:`~google.cloud.exceptions.GoogleCloudError` if the job + failed or :class:`TimeoutError` if the job did not complete in the + given timeout. + """ + super(QueryJob, self).result(timeout=timeout) + # Return a QueryResults instance instead of returning the job. + return self.query_results() diff --git a/packages/google-cloud-bigquery/tests/system.py b/packages/google-cloud-bigquery/tests/system.py index 3391ec2bd2d8..1d3da3d2a83d 100644 --- a/packages/google-cloud-bigquery/tests/system.py +++ b/packages/google-cloud-bigquery/tests/system.py @@ -19,6 +19,7 @@ import os import time import unittest +import uuid from google.cloud import bigquery from google.cloud._helpers import UTC @@ -1013,6 +1014,15 @@ def test_large_query_w_public_data(self): rows = list(iterator) self.assertEqual(len(rows), LIMIT) + def test_async_query_future(self): + query_job = Config.CLIENT.run_async_query( + str(uuid.uuid4()), 'SELECT 1') + query_job.use_legacy_sql = False + + iterator = query_job.result().fetch_data() + rows = list(iterator) + self.assertEqual(rows, [(1,)]) + def test_insert_nested_nested(self): # See #2951 SF = bigquery.SchemaField diff --git a/packages/google-cloud-bigquery/tests/unit/test_job.py b/packages/google-cloud-bigquery/tests/unit/test_job.py index 57d96bf8ae15..8b9d079df148 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_job.py +++ b/packages/google-cloud-bigquery/tests/unit/test_job.py @@ -12,9 +12,34 @@ # See the License for the specific language governing permissions and # limitations under the License. +import copy +import warnings + +from six.moves import http_client import unittest +class Test__error_result_to_exception(unittest.TestCase): + def _call_fut(self, *args, **kwargs): + from google.cloud.bigquery import job + return job._error_result_to_exception(*args, **kwargs) + + def test_simple(self): + error_result = { + 'reason': 'invalid', + 'message': 'bad request' + } + exception = self._call_fut(error_result) + self.assertEqual(exception.code, http_client.BAD_REQUEST) + self.assertTrue(exception.message.startswith('bad request')) + self.assertIn("'reason': 'invalid'", exception.message) + + def test_missing_reason(self): + error_result = {} + exception = self._call_fut(error_result) + self.assertEqual(exception.code, http_client.INTERNAL_SERVER_ERROR) + + class _Base(object): PROJECT = 'project' SOURCE1 = 'http://example.com/source1.csv' @@ -1514,15 +1539,88 @@ def test_from_api_repr_w_properties(self): self.assertIs(dataset._client, client) self._verifyResourceProperties(dataset, RESOURCE) - def test_results(self): + def test_cancelled(self): + client = _Client(self.PROJECT) + job = self._make_one(self.JOB_NAME, self.QUERY, client) + job._properties['status'] = { + 'state': 'DONE', + 'errorResult': { + 'reason': 'stopped' + } + } + + self.assertTrue(job.cancelled()) + + def test_query_results(self): from google.cloud.bigquery.query import QueryResults client = _Client(self.PROJECT) job = self._make_one(self.JOB_NAME, self.QUERY, client) - results = job.results() + results = job.query_results() self.assertIsInstance(results, QueryResults) self.assertIs(results._job, job) + def test_results_is_deprecated(self): + client = _Client(self.PROJECT) + job = self._make_one(self.JOB_NAME, self.QUERY, client) + + with warnings.catch_warnings(record=True) as warned: + warnings.simplefilter('always') + job.results() + self.assertEqual(len(warned), 1) + self.assertIn('deprecated', str(warned[0])) + + def test_result(self): + from google.cloud.bigquery.query import QueryResults + + client = _Client(self.PROJECT) + job = self._make_one(self.JOB_NAME, self.QUERY, client) + job._properties['status'] = {'state': 'DONE'} + + result = job.result() + + self.assertIsInstance(result, QueryResults) + self.assertIs(result._job, job) + + def test_result_invokes_begins(self): + begun_resource = self._makeResource() + done_resource = copy.deepcopy(begun_resource) + done_resource['status'] = {'state': 'DONE'} + connection = _Connection(begun_resource, done_resource) + client = _Client(self.PROJECT, connection=connection) + job = self._make_one(self.JOB_NAME, self.QUERY, client) + + job.result() + + self.assertEqual(len(connection._requested), 2) + begin_request, reload_request = connection._requested + self.assertEqual(begin_request['method'], 'POST') + self.assertEqual(reload_request['method'], 'GET') + + def test_result_error(self): + from google.cloud import exceptions + + client = _Client(self.PROJECT) + job = self._make_one(self.JOB_NAME, self.QUERY, client) + error_result = { + 'debugInfo': 'DEBUG', + 'location': 'LOCATION', + 'message': 'MESSAGE', + 'reason': 'invalid' + } + job._properties['status'] = { + 'errorResult': error_result, + 'errors': [error_result], + 'state': 'DONE' + } + job._set_future_result() + + with self.assertRaises(exceptions.GoogleCloudError) as exc_info: + job.result() + + self.assertIsInstance(exc_info.exception, exceptions.GoogleCloudError) + self.assertEqual(exc_info.exception.code, http_client.BAD_REQUEST) + def test_begin_w_bound_client(self): PATH = '/projects/%s/jobs' % (self.PROJECT,) RESOURCE = self._makeResource() From 4c4602520e8e02490cb2e984b8902e657560b0ac Mon Sep 17 00:00:00 2001 From: Jon Wayne Parrott Date: Mon, 24 Jul 2017 14:17:14 -0700 Subject: [PATCH 0143/2016] Remove QueryJob.results() (#3661) --- .../google/cloud/bigquery/dbapi/_helpers.py | 21 ----------- .../google/cloud/bigquery/dbapi/cursor.py | 10 +++-- .../google/cloud/bigquery/job.py | 16 -------- .../tests/unit/test_dbapi__helpers.py | 37 ------------------- .../tests/unit/test_dbapi_cursor.py | 20 +++++++++- .../tests/unit/test_job.py | 11 ------ 6 files changed, 25 insertions(+), 90 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/dbapi/_helpers.py b/packages/google-cloud-bigquery/google/cloud/bigquery/dbapi/_helpers.py index 1a9a02fd7cc7..a9a358cbf0f5 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/dbapi/_helpers.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/dbapi/_helpers.py @@ -15,7 +15,6 @@ import collections import datetime import numbers -import time import six @@ -23,26 +22,6 @@ from google.cloud.bigquery.dbapi import exceptions -def wait_for_job(job): - """Waits for a job to complete by polling until the state is `DONE`. - - Sleeps 1 second between calls to the BigQuery API. - - :type job: :class:`~google.cloud.bigquery.job._AsyncJob` - :param job: Wait for this job to finish. - - :raises: :class:`~google.cloud.bigquery.dbapi.exceptions.DatabaseError` - if the job fails. - """ - while True: - job.reload() - if job.state == 'DONE': - if job.error_result: - raise exceptions.DatabaseError(job.errors) - return - time.sleep(1) - - def scalar_to_query_parameter(value, name=None): """Convert a scalar value into a query parameter. diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/dbapi/cursor.py b/packages/google-cloud-bigquery/google/cloud/bigquery/dbapi/cursor.py index bcbb19cfd066..7519c762ae1e 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/dbapi/cursor.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/dbapi/cursor.py @@ -21,7 +21,7 @@ from google.cloud.bigquery.dbapi import _helpers from google.cloud.bigquery.dbapi import exceptions - +import google.cloud.exceptions # Per PEP 249: A 7-item sequence containing information describing one result # column. The first two items (name and type_code) are mandatory, the other @@ -148,9 +148,11 @@ def execute(self, operation, parameters=None): formatted_operation, query_parameters=query_parameters) query_job.use_legacy_sql = False - query_job.begin() - _helpers.wait_for_job(query_job) - query_results = query_job.results() + + try: + query_results = query_job.result() + except google.cloud.exceptions.GoogleCloudError: + raise exceptions.DatabaseError(query_job.errors) # Force the iterator to run because the query_results doesn't # have the total_rows populated. See: diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/job.py b/packages/google-cloud-bigquery/google/cloud/bigquery/job.py index 35a423b755b9..3e6a9f93418b 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/job.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/job.py @@ -16,7 +16,6 @@ import collections import threading -import warnings import six from six.moves import http_client @@ -1264,21 +1263,6 @@ def query_results(self): from google.cloud.bigquery.query import QueryResults return QueryResults.from_query_job(self) - def results(self): - """DEPRECATED. - - This method is deprecated. Use :meth:`query_results` or :meth:`result`. - - Construct a QueryResults instance, bound to this job. - - :rtype: :class:`~google.cloud.bigquery.query.QueryResults` - :returns: The query results. - """ - warnings.warn( - 'QueryJob.results() is deprecated. Please use query_results() or ' - 'result().', DeprecationWarning) - return self.query_results() - def result(self, timeout=None): """Start the job and wait for it to complete and get the result. diff --git a/packages/google-cloud-bigquery/tests/unit/test_dbapi__helpers.py b/packages/google-cloud-bigquery/tests/unit/test_dbapi__helpers.py index e030ed49df0c..48bca5ae9a59 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_dbapi__helpers.py +++ b/packages/google-cloud-bigquery/tests/unit/test_dbapi__helpers.py @@ -16,48 +16,11 @@ import math import unittest -import mock - import google.cloud._helpers from google.cloud.bigquery.dbapi import _helpers from google.cloud.bigquery.dbapi import exceptions -class Test_wait_for_job(unittest.TestCase): - - def _mock_job(self): - from google.cloud.bigquery import job - mock_job = mock.create_autospec(job.QueryJob) - mock_job.state = 'RUNNING' - mock_job._mocked_iterations = 0 - - def mock_reload(): - mock_job._mocked_iterations += 1 - if mock_job._mocked_iterations >= 2: - mock_job.state = 'DONE' - - mock_job.reload.side_effect = mock_reload - return mock_job - - def _call_fut(self, job): - from google.cloud.bigquery.dbapi._helpers import wait_for_job - with mock.patch('time.sleep'): - wait_for_job(job) - - def test_wo_error(self): - mock_job = self._mock_job() - mock_job.error_result = None - self._call_fut(mock_job) - self.assertEqual('DONE', mock_job.state) - - def test_w_error(self): - from google.cloud.bigquery.dbapi import exceptions - mock_job = self._mock_job() - mock_job.error_result = {'reason': 'invalidQuery'} - self.assertRaises(exceptions.DatabaseError, self._call_fut, mock_job) - self.assertEqual('DONE', mock_job.state) - - class TestQueryParameters(unittest.TestCase): def test_scalar_to_query_parameter(self): diff --git a/packages/google-cloud-bigquery/tests/unit/test_dbapi_cursor.py b/packages/google-cloud-bigquery/tests/unit/test_dbapi_cursor.py index 9671a27b8f8f..2a2ccfd989a6 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_dbapi_cursor.py +++ b/packages/google-cloud-bigquery/tests/unit/test_dbapi_cursor.py @@ -42,7 +42,7 @@ def _mock_job( mock_job = mock.create_autospec(job.QueryJob) mock_job.error_result = None mock_job.state = 'DONE' - mock_job.results.return_value = self._mock_results( + mock_job.result.return_value = self._mock_results( rows=rows, schema=schema, num_dml_affected_rows=num_dml_affected_rows) return mock_job @@ -219,6 +219,24 @@ def test_execute_w_query(self): row = cursor.fetchone() self.assertIsNone(row) + def test_execute_raises_if_result_raises(self): + import google.cloud.exceptions + + from google.cloud.bigquery import client + from google.cloud.bigquery import job + from google.cloud.bigquery.dbapi import connect + from google.cloud.bigquery.dbapi import exceptions + + job = mock.create_autospec(job.QueryJob) + job.result.side_effect = google.cloud.exceptions.GoogleCloudError('') + client = mock.create_autospec(client.Client) + client.run_async_query.return_value = job + connection = connect(client) + cursor = connection.cursor() + + with self.assertRaises(exceptions.DatabaseError): + cursor.execute('SELECT 1') + def test_executemany_w_dml(self): from google.cloud.bigquery.dbapi import connect connection = connect( diff --git a/packages/google-cloud-bigquery/tests/unit/test_job.py b/packages/google-cloud-bigquery/tests/unit/test_job.py index 8b9d079df148..fcb518d9c502 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_job.py +++ b/packages/google-cloud-bigquery/tests/unit/test_job.py @@ -13,7 +13,6 @@ # limitations under the License. import copy -import warnings from six.moves import http_client import unittest @@ -1560,16 +1559,6 @@ def test_query_results(self): self.assertIsInstance(results, QueryResults) self.assertIs(results._job, job) - def test_results_is_deprecated(self): - client = _Client(self.PROJECT) - job = self._make_one(self.JOB_NAME, self.QUERY, client) - - with warnings.catch_warnings(record=True) as warned: - warnings.simplefilter('always') - job.results() - self.assertEqual(len(warned), 1) - self.assertIn('deprecated', str(warned[0])) - def test_result(self): from google.cloud.bigquery.query import QueryResults From bc833d8f19b4b055c1afc9264a81f6f5e8e00ebe Mon Sep 17 00:00:00 2001 From: Jon Wayne Parrott Date: Mon, 24 Jul 2017 15:29:38 -0700 Subject: [PATCH 0144/2016] Split polling future into its own module (#3662) --- packages/google-cloud-bigquery/google/cloud/bigquery/job.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/job.py b/packages/google-cloud-bigquery/google/cloud/bigquery/job.py index 3e6a9f93418b..ef5353f9ff14 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/job.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/job.py @@ -32,7 +32,7 @@ from google.cloud.bigquery._helpers import UDFResourcesProperty from google.cloud.bigquery._helpers import _EnumProperty from google.cloud.bigquery._helpers import _TypedProperty -import google.cloud.future.base +import google.cloud.future.polling _DONE_STATE = 'DONE' _STOPPED_REASON = 'stopped' @@ -141,7 +141,7 @@ class WriteDisposition(_EnumProperty): ALLOWED = (WRITE_APPEND, WRITE_TRUNCATE, WRITE_EMPTY) -class _AsyncJob(google.cloud.future.base.PollingFuture): +class _AsyncJob(google.cloud.future.polling.PollingFuture): """Base class for asynchronous jobs. :type name: str From 69a81a0794b43633e3cf7952fb670697552fb5a4 Mon Sep 17 00:00:00 2001 From: Jon Wayne Parrott Date: Thu, 27 Jul 2017 11:21:30 -0700 Subject: [PATCH 0145/2016] Remove httplib2, replace with Requests (#3674) * Core: remove httplib2, replace with Requests Additionally remove make_exception in favor of from_http_status and from_http_response. * Datastore: replace httplib2 with Requests * DNS: replace httplib2 with Requests * Error Reporting: replace httplib2 with requests * Language: replace httplib2 with Requests * Logging: replace httplib2 with requests * Monitoring: replace httplib2 with Requests * Pubsub: replace httplib2 with Requests * Resource Manager: replace httplib2 with Requests * Runtimeconfig: replace httplib2 with Requests * Speech: replace httplib2 with Requests * Storage: replace httplib2 with Requests * BigQuery: replace httplib2 with Requests * Translate: replace httplib2 with Requests * Vision: replace httplib2 with Requests --- .../google/cloud/bigquery/client.py | 4 +-- .../google/cloud/bigquery/dataset.py | 2 +- .../google/cloud/bigquery/job.py | 14 ++------- .../google/cloud/bigquery/query.py | 2 +- .../google/cloud/bigquery/table.py | 29 +++---------------- .../tests/unit/test__http.py | 14 +++++---- .../tests/unit/test_job.py | 2 +- .../tests/unit/test_table.py | 10 ++++--- 8 files changed, 26 insertions(+), 51 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py index 5f0101f35de5..f36d80978efd 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py @@ -64,10 +64,10 @@ class Client(ClientWithProject): passed), falls back to the default inferred from the environment. - :type _http: :class:`~httplib2.Http` + :type _http: :class:`~requests.Session` :param _http: (Optional) HTTP object to make requests. Can be any object that defines ``request()`` with the same interface as - :meth:`~httplib2.Http.request`. If not passed, an + :meth:`requests.Session.request`. If not passed, an ``_http`` object is created that is bound to the ``credentials`` for the current object. This parameter should be considered private, and could diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/dataset.py b/packages/google-cloud-bigquery/google/cloud/bigquery/dataset.py index bce74ca9f366..8fb986cb848d 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/dataset.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/dataset.py @@ -364,7 +364,7 @@ def _parse_access_grants(access): def _set_properties(self, api_response): """Update properties from resource in body of ``api_response`` - :type api_response: httplib2.Response + :type api_response: dict :param api_response: response returned from an API call. """ self._properties.clear() diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/job.py b/packages/google-cloud-bigquery/google/cloud/bigquery/job.py index ef5353f9ff14..c2d1feee7120 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/job.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/job.py @@ -14,7 +14,6 @@ """Define API Jobs.""" -import collections import threading import six @@ -58,8 +57,6 @@ 'tableUnavailable': http_client.BAD_REQUEST, } -_FakeResponse = collections.namedtuple('_FakeResponse', ['status']) - def _error_result_to_exception(error_result): """Maps BigQuery error reasons to an exception. @@ -79,13 +76,8 @@ def _error_result_to_exception(error_result): reason = error_result.get('reason') status_code = _ERROR_REASON_TO_EXCEPTION.get( reason, http_client.INTERNAL_SERVER_ERROR) - # make_exception expects an httplib2 response object. - fake_response = _FakeResponse(status=status_code) - return exceptions.make_exception( - fake_response, - error_result.get('message', ''), - error_info=error_result, - use_json=False) + return exceptions.from_http_status( + status_code, error_result.get('message', ''), errors=[error_result]) class Compression(_EnumProperty): @@ -307,7 +299,7 @@ def _scrub_local_properties(self, cleaned): def _set_properties(self, api_response): """Update properties from resource in body of ``api_response`` - :type api_response: httplib2.Response + :type api_response: dict :param api_response: response returned from an API call """ cleaned = api_response.copy() diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/query.py b/packages/google-cloud-bigquery/google/cloud/bigquery/query.py index d596deadfb40..502953b2c828 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/query.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/query.py @@ -310,7 +310,7 @@ def schema(self): def _set_properties(self, api_response): """Update properties from resource in body of ``api_response`` - :type api_response: httplib2.Response + :type api_response: dict :param api_response: response returned from an API call """ self._properties.clear() diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py index f7752bb8fc36..c32832a926ce 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py @@ -17,7 +17,6 @@ import datetime import os -import httplib2 import six import google.auth.transport.requests @@ -25,10 +24,9 @@ from google.resumable_media.requests import MultipartUpload from google.resumable_media.requests import ResumableUpload +from google.cloud import exceptions from google.cloud._helpers import _datetime_from_microseconds from google.cloud._helpers import _millis_from_datetime -from google.cloud.exceptions import NotFound -from google.cloud.exceptions import make_exception from google.cloud.iterator import HTTPIterator from google.cloud.bigquery.schema import SchemaField from google.cloud.bigquery._helpers import _item_to_row @@ -474,7 +472,7 @@ def _require_client(self, client): def _set_properties(self, api_response): """Update properties from resource in body of ``api_response`` - :type api_response: httplib2.Response + :type api_response: dict :param api_response: response returned from an API call """ self._properties.clear() @@ -563,7 +561,7 @@ def exists(self, client=None): try: client._connection.api_request(method='GET', path=self.path, query_params={'fields': 'id'}) - except NotFound: + except exceptions.NotFound: return False else: return True @@ -1113,7 +1111,7 @@ def upload_from_file(self, client, file_obj, metadata, size, num_retries) return client.job_from_resource(created_json) except resumable_media.InvalidResponse as exc: - _raise_from_invalid_response(exc) + raise exceptions.from_http_response(exc.response) # pylint: enable=too-many-arguments,too-many-locals @@ -1298,22 +1296,3 @@ def _get_upload_metadata(source_format, schema, dataset, name): 'load': load_config, }, } - - -def _raise_from_invalid_response(error, error_info=None): - """Re-wrap and raise an ``InvalidResponse`` exception. - - :type error: :exc:`google.resumable_media.InvalidResponse` - :param error: A caught exception from the ``google-resumable-media`` - library. - - :type error_info: str - :param error_info: (Optional) Extra information about the failed request. - - :raises: :class:`~google.cloud.exceptions.GoogleCloudError` corresponding - to the failed status code - """ - response = error.response - faux_response = httplib2.Response({'status': response.status_code}) - raise make_exception(faux_response, response.content, - error_info=error_info, use_json=False) diff --git a/packages/google-cloud-bigquery/tests/unit/test__http.py b/packages/google-cloud-bigquery/tests/unit/test__http.py index 9972e9859313..b8af254d3614 100644 --- a/packages/google-cloud-bigquery/tests/unit/test__http.py +++ b/packages/google-cloud-bigquery/tests/unit/test__http.py @@ -15,6 +15,7 @@ import unittest import mock +import requests class TestConnection(unittest.TestCase): @@ -55,10 +56,12 @@ def test_extra_headers(self): from google.cloud import _http as base_http from google.cloud.bigquery import _http as MUT - http = mock.Mock(spec=['request']) - response = mock.Mock(status=200, spec=['status']) + http = mock.create_autospec(requests.Session, instance=True) + response = requests.Response() + response.status_code = 200 data = b'brent-spiner' - http.request.return_value = response, data + response._content = data + http.request.return_value = response client = mock.Mock(_http=http, spec=['_http']) conn = self._make_one(client) @@ -68,15 +71,14 @@ def test_extra_headers(self): self.assertEqual(result, data) expected_headers = { - 'Content-Length': str(len(req_data)), 'Accept-Encoding': 'gzip', base_http.CLIENT_INFO_HEADER: MUT._CLIENT_INFO, 'User-Agent': conn.USER_AGENT, } expected_uri = conn.build_api_url('/rainbow') http.request.assert_called_once_with( - body=req_data, + data=req_data, headers=expected_headers, method='GET', - uri=expected_uri, + url=expected_uri, ) diff --git a/packages/google-cloud-bigquery/tests/unit/test_job.py b/packages/google-cloud-bigquery/tests/unit/test_job.py index fcb518d9c502..d2ec7027d5e6 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_job.py +++ b/packages/google-cloud-bigquery/tests/unit/test_job.py @@ -31,7 +31,7 @@ def test_simple(self): exception = self._call_fut(error_result) self.assertEqual(exception.code, http_client.BAD_REQUEST) self.assertTrue(exception.message.startswith('bad request')) - self.assertIn("'reason': 'invalid'", exception.message) + self.assertIn(error_result, exception.errors) def test_missing_reason(self): error_result = {} diff --git a/packages/google-cloud-bigquery/tests/unit/test_table.py b/packages/google-cloud-bigquery/tests/unit/test_table.py index 502c0495f9c9..eebb40a2e736 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_table.py +++ b/packages/google-cloud-bigquery/tests/unit/test_table.py @@ -1744,9 +1744,11 @@ def _make_table(): def _make_response(status_code, content='', headers={}): """Make a mock HTTP response.""" import requests - response = mock.create_autospec(requests.Response, instance=True) - response.content = content.encode('utf-8') - response.headers = headers + response = requests.Response() + response.request = requests.Request( + 'POST', 'http://example.com').prepare() + response._content = content.encode('utf-8') + response.headers.update(headers) response.status_code = status_code return response @@ -1921,7 +1923,7 @@ def test_upload_from_file_failure(self): table.upload_from_file( file_obj, source_format='CSV', rewind=True) - assert exc_info.value.message == response.content.decode('utf-8') + assert response.text in exc_info.value.message assert exc_info.value.errors == [] def test_upload_from_file_bad_mode(self): From 47b0b07c9ec91d64c51d3e3a47a46f286dcd0ae2 Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Thu, 27 Jul 2017 15:56:22 -0700 Subject: [PATCH 0146/2016] Increment BQ DB-API thread safety. (#3693) Increment to 2 per https://www.python.org/dev/peps/pep-0249/#threadsafety. The cursor object includes some state for paging through results and other things which are not protected by locs. Closes #3522. --- .../google/cloud/bigquery/dbapi/__init__.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/dbapi/__init__.py b/packages/google-cloud-bigquery/google/cloud/bigquery/dbapi/__init__.py index 4e9c9a810da4..4786ef8ef5fa 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/dbapi/__init__.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/dbapi/__init__.py @@ -55,8 +55,8 @@ apilevel = '2.0' -# Threads may share the module, but not connections. -threadsafety = 1 +# Threads may share the module and connections, but not cursors. +threadsafety = 2 paramstyle = 'pyformat' From 49036f888b41a4d68a681a90f91bbda221002b34 Mon Sep 17 00:00:00 2001 From: Willian Fuks Date: Fri, 28 Jul 2017 14:08:25 -0300 Subject: [PATCH 0147/2016] Added support for schema auto-detection feature in `LoadTableFromStorageJob` (#3648) --- .../google/cloud/bigquery/job.py | 45 ++++++++-- .../google-cloud-bigquery/tests/system.py | 81 ++++++++++++++++-- .../tests/unit/test_job.py | 82 +++++++++++++++++++ 3 files changed, 194 insertions(+), 14 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/job.py b/packages/google-cloud-bigquery/google/cloud/bigquery/job.py index c2d1feee7120..953a2c265580 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/job.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/job.py @@ -80,6 +80,20 @@ def _error_result_to_exception(error_result): status_code, error_result.get('message', ''), errors=[error_result]) +class AutoDetectSchema(_TypedProperty): + """Typed Property for ``autodetect`` properties. + + :raises ValueError: on ``set`` operation if ``instance.schema`` + is already defined. + """ + def __set__(self, instance, value): + self._validate(value) + if instance.schema: + raise ValueError('A schema should not be already defined ' + 'when using schema auto-detection') + setattr(instance._configuration, self._backing_name, value) + + class Compression(_EnumProperty): """Pseudo-enum for ``compression`` properties.""" GZIP = 'GZIP' @@ -505,6 +519,7 @@ class _LoadConfiguration(object): """ _allow_jagged_rows = None _allow_quoted_newlines = None + _autodetect = None _create_disposition = None _encoding = None _field_delimiter = None @@ -544,9 +559,10 @@ def __init__(self, name, destination, source_uris, client, schema=()): super(LoadTableFromStorageJob, self).__init__(name, client) self.destination = destination self.source_uris = source_uris - # Let the @property do validation. - self.schema = schema self._configuration = _LoadConfiguration() + # Let the @property do validation. This must occur after all other + # attributes have been set. + self.schema = schema @property def schema(self): @@ -564,12 +580,20 @@ def schema(self, value): :type value: list of :class:`SchemaField` :param value: fields describing the schema - :raises: TypeError if 'value' is not a sequence, or ValueError if - any item in the sequence is not a SchemaField + :raises TypeError: If ``value`is not a sequence. + :raises ValueError: If any item in the sequence is not + a ``SchemaField``. """ - if not all(isinstance(field, SchemaField) for field in value): - raise ValueError('Schema items must be fields') - self._schema = tuple(value) + if not value: + self._schema = () + else: + if not all(isinstance(field, SchemaField) for field in value): + raise ValueError('Schema items must be fields') + if self.autodetect: + raise ValueError( + 'Schema can not be set if `autodetect` property is True') + + self._schema = tuple(value) @property def input_file_bytes(self): @@ -625,6 +649,11 @@ def output_rows(self): https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.load.allowQuotedNewlines """ + autodetect = AutoDetectSchema('autodetect', bool) + """See + https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.load.autodetect + """ + create_disposition = CreateDisposition('create_disposition') """See https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.load.createDisposition @@ -676,6 +705,8 @@ def _populate_config_resource(self, configuration): configuration['allowJaggedRows'] = self.allow_jagged_rows if self.allow_quoted_newlines is not None: configuration['allowQuotedNewlines'] = self.allow_quoted_newlines + if self.autodetect is not None: + configuration['autodetect'] = self.autodetect if self.create_disposition is not None: configuration['createDisposition'] = self.create_disposition if self.encoding is not None: diff --git a/packages/google-cloud-bigquery/tests/system.py b/packages/google-cloud-bigquery/tests/system.py index 1d3da3d2a83d..9d3bb7794256 100644 --- a/packages/google-cloud-bigquery/tests/system.py +++ b/packages/google-cloud-bigquery/tests/system.py @@ -13,6 +13,7 @@ # limitations under the License. import base64 +import csv import datetime import json import operator @@ -21,6 +22,8 @@ import unittest import uuid +import six + from google.cloud import bigquery from google.cloud._helpers import UTC from google.cloud.bigquery import dbapi @@ -290,8 +293,6 @@ def test_update_table(self): @staticmethod def _fetch_single_page(table): - import six - iterator = table.fetch_data() page = six.next(iterator.pages) return list(page) @@ -341,7 +342,6 @@ def test_insert_data_then_dump_table(self): sorted(ROWS, key=by_age)) def test_load_table_from_local_file_then_dump_table(self): - import csv from google.cloud._testing import _NamedTemporaryFile ROWS = [ @@ -432,7 +432,6 @@ def test_load_table_from_local_avro_file_then_dump_table(self): sorted(ROWS, key=by_wavelength)) def test_load_table_from_storage_then_dump_table(self): - import csv from google.cloud._testing import _NamedTemporaryFile from google.cloud.storage import Client as StorageClient @@ -448,11 +447,11 @@ def test_load_table_from_storage_then_dump_table(self): ] TABLE_NAME = 'test_table' - s_client = StorageClient() + storage_client = StorageClient() # In the **very** rare case the bucket name is reserved, this # fails with a ConnectionError. - bucket = s_client.create_bucket(BUCKET_NAME) + bucket = storage_client.create_bucket(BUCKET_NAME) self.to_delete.append(bucket) blob = bucket.blob(BLOB_NAME) @@ -501,6 +500,75 @@ def test_load_table_from_storage_then_dump_table(self): self.assertEqual(sorted(rows, key=by_age), sorted(ROWS, key=by_age)) + def test_load_table_from_storage_w_autodetect_schema(self): + from google.cloud._testing import _NamedTemporaryFile + from google.cloud.storage import Client as StorageClient + from google.cloud.bigquery import SchemaField + + local_id = unique_resource_id() + bucket_name = 'bq_load_test' + local_id + blob_name = 'person_ages.csv' + gs_url = 'gs://{}/{}'.format(bucket_name, blob_name) + rows = [ + ('Phred Phlyntstone', 32), + ('Bharney Rhubble', 33), + ('Wylma Phlyntstone', 29), + ('Bhettye Rhubble', 27), + ] * 100 # BigQuery internally uses the first 100 rows to detect schema + table_name = 'test_table' + + storage_client = StorageClient() + + # In the **very** rare case the bucket name is reserved, this + # fails with a ConnectionError. + bucket = storage_client.create_bucket(bucket_name) + self.to_delete.append(bucket) + + blob = bucket.blob(blob_name) + + with _NamedTemporaryFile() as temp: + with open(temp.name, 'w') as csv_write: + writer = csv.writer(csv_write) + writer.writerow(('Full Name', 'Age')) + writer.writerows(rows) + + with open(temp.name, 'rb') as csv_read: + blob.upload_from_file(csv_read, content_type='text/csv') + + self.to_delete.insert(0, blob) + + dataset = Config.CLIENT.dataset( + _make_dataset_name('load_gcs_then_dump')) + + retry_403(dataset.create)() + self.to_delete.append(dataset) + + table = dataset.table(table_name) + self.to_delete.insert(0, table) + + job = Config.CLIENT.load_table_from_storage( + 'bq_load_storage_test_' + local_id, table, gs_url) + job.autodetect = True + + job.begin() + + # Allow for 90 seconds of "warm up" before rows visible. See + # https://cloud.google.com/bigquery/streaming-data-into-bigquery#dataavailability + # 8 tries -> 1 + 2 + 4 + 8 + 16 + 32 + 64 = 127 seconds + retry = RetryInstanceState(_job_done, max_tries=8) + retry(job.reload)() + + table.reload() + field_name = SchemaField( + u'Full_Name', u'string', u'NULLABLE', None, ()) + field_age = SchemaField(u'Age', u'integer', u'NULLABLE', None, ()) + self.assertEqual(table.schema, [field_name, field_age]) + + actual_rows = self._fetch_single_page(table) + by_age = operator.itemgetter(1) + self.assertEqual( + sorted(actual_rows, key=by_age), sorted(rows, key=by_age)) + def test_job_cancel(self): DATASET_NAME = _make_dataset_name('job_cancel') JOB_NAME = 'fetch_' + DATASET_NAME @@ -674,7 +742,6 @@ def test_dbapi_w_standard_sql_types(self): self.assertIsNone(row) def _load_table_for_dml(self, rows, dataset_name, table_name): - import csv from google.cloud._testing import _NamedTemporaryFile dataset = Config.CLIENT.dataset(dataset_name) diff --git a/packages/google-cloud-bigquery/tests/unit/test_job.py b/packages/google-cloud-bigquery/tests/unit/test_job.py index d2ec7027d5e6..46326441a5e1 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_job.py +++ b/packages/google-cloud-bigquery/tests/unit/test_job.py @@ -189,6 +189,11 @@ def _verifyBooleanConfigProperties(self, job, config): config['allowQuotedNewlines']) else: self.assertIsNone(job.allow_quoted_newlines) + if 'autodetect' in config: + self.assertEqual( + job.autodetect, config['autodetect']) + else: + self.assertIsNone(job.autodetect) if 'ignoreUnknownValues' in config: self.assertEqual(job.ignore_unknown_values, config['ignoreUnknownValues']) @@ -277,6 +282,7 @@ def test_ctor(self): # set/read from resource['configuration']['load'] self.assertIsNone(job.allow_jagged_rows) self.assertIsNone(job.allow_quoted_newlines) + self.assertIsNone(job.autodetect) self.assertIsNone(job.create_disposition) self.assertIsNone(job.encoding) self.assertIsNone(job.field_delimiter) @@ -326,6 +332,41 @@ def test_schema_setter(self): job.schema = [full_name, age] self.assertEqual(job.schema, [full_name, age]) + def test_schema_setter_w_autodetect(self): + from google.cloud.bigquery.schema import SchemaField + + client = _Client(self.PROJECT) + table = _Table() + full_name = SchemaField('full_name', 'STRING') + job = self._make_one(self.JOB_NAME, table, [self.SOURCE1], client) + job.autodetect = False + job.schema = [full_name] + self.assertEqual(job.schema, [full_name]) + + job = self._make_one(self.JOB_NAME, table, [self.SOURCE1], client) + job.autodetect = True + with self.assertRaises(ValueError): + job.schema = [full_name] + + def test_autodetect_setter_w_schema(self): + from google.cloud.bigquery.schema import SchemaField + + client = _Client(self.PROJECT) + table = _Table() + full_name = SchemaField('full_name', 'STRING') + job = self._make_one(self.JOB_NAME, table, [self.SOURCE1], client) + + job.autodetect = True + job.schema = [] + self.assertEqual(job.schema, []) + + job.autodetect = False + job.schema = [full_name] + self.assertEqual(job.autodetect, False) + + with self.assertRaises(ValueError): + job.autodetect = True + def test_props_set_by_server(self): import datetime from google.cloud._helpers import UTC @@ -491,6 +532,47 @@ def test_begin_w_bound_client(self): self.assertEqual(req['data'], SENT) self._verifyResourceProperties(job, RESOURCE) + def test_begin_w_autodetect(self): + path = '/projects/{}/jobs'.format(self.PROJECT) + resource = self._makeResource() + resource['configuration']['load']['autodetect'] = True + # Ensure None for missing server-set props + del resource['statistics']['creationTime'] + del resource['etag'] + del resource['selfLink'] + del resource['user_email'] + conn = _Connection(resource) + client = _Client(project=self.PROJECT, connection=conn) + table = _Table() + job = self._make_one(self.JOB_NAME, table, [self.SOURCE1], client) + job.autodetect = True + job.begin() + + sent = { + 'jobReference': { + 'projectId': self.PROJECT, + 'jobId': self.JOB_NAME, + }, + 'configuration': { + 'load': { + 'sourceUris': [self.SOURCE1], + 'destinationTable': { + 'projectId': self.PROJECT, + 'datasetId': self.DS_NAME, + 'tableId': self.TABLE_NAME, + }, + 'autodetect': True + }, + }, + } + expected_request = { + 'method': 'POST', + 'path': path, + 'data': sent, + } + self.assertEqual(conn._requested, [expected_request]) + self._verifyResourceProperties(job, resource) + def test_begin_w_alternate_client(self): from google.cloud.bigquery.schema import SchemaField From 6e69b63260ad8919a0a1a22caec68da3e2e25e11 Mon Sep 17 00:00:00 2001 From: Jon Wayne Parrott Date: Mon, 31 Jul 2017 12:29:57 -0700 Subject: [PATCH 0148/2016] BigQuery & Storage: use client http for resumable media (#3705) * BigQuery: Use client transport for resumable media * Storage: Use client transport for resumable media --- .../google/cloud/bigquery/table.py | 14 ++-- .../tests/unit/test_table.py | 65 +++++++++---------- 2 files changed, 35 insertions(+), 44 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py index c32832a926ce..9960b560624d 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py @@ -19,7 +19,6 @@ import six -import google.auth.transport.requests from google import resumable_media from google.resumable_media.requests import MultipartUpload from google.resumable_media.requests import ResumableUpload @@ -823,8 +822,8 @@ def insert_data(self, return errors - def _make_transport(self, client): - """Make an authenticated transport with a client's credentials. + def _get_transport(self, client): + """Return the client's transport. :type client: :class:`~google.cloud.bigquery.client.Client` :param client: The client to use. @@ -834,10 +833,7 @@ def _make_transport(self, client): :returns: The transport (with credentials) that will make authenticated requests. """ - # Create a ``requests`` transport with the client's credentials. - transport = google.auth.transport.requests.AuthorizedSession( - client._credentials) - return transport + return client._http def _initiate_resumable_upload(self, client, stream, metadata, num_retries): @@ -865,7 +861,7 @@ def _initiate_resumable_upload(self, client, stream, * The ``transport`` used to initiate the upload. """ chunk_size = _DEFAULT_CHUNKSIZE - transport = self._make_transport(client) + transport = self._get_transport(client) headers = _get_upload_headers(client._connection.USER_AGENT) upload_url = _RESUMABLE_URL_TEMPLATE.format(project=self.project) upload = ResumableUpload(upload_url, chunk_size, headers=headers) @@ -941,7 +937,7 @@ def _do_multipart_upload(self, client, stream, metadata, msg = _READ_LESS_THAN_SIZE.format(size, len(data)) raise ValueError(msg) - transport = self._make_transport(client) + transport = self._get_transport(client) headers = _get_upload_headers(client._connection.USER_AGENT) upload_url = _MULTIPART_URL_TEMPLATE.format(project=self.project) diff --git a/packages/google-cloud-bigquery/tests/unit/test_table.py b/packages/google-cloud-bigquery/tests/unit/test_table.py index eebb40a2e736..3bab58b6c8f8 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_table.py +++ b/packages/google-cloud-bigquery/tests/unit/test_table.py @@ -1561,14 +1561,14 @@ def _row_data(row): self.assertEqual(req['path'], '/%s' % PATH) self.assertEqual(req['data'], SENT) - @mock.patch('google.auth.transport.requests.AuthorizedSession') - def test__make_transport(self, session_factory): - client = mock.Mock(spec=[u'_credentials']) + def test__get_transport(self): + client = mock.Mock(spec=[u'_credentials', '_http']) + client._http = mock.sentinel.http table = self._make_one(self.TABLE_NAME, None) - transport = table._make_transport(client) - self.assertIs(transport, session_factory.return_value) - session_factory.assert_called_once_with(client._credentials) + transport = table._get_transport(client) + + self.assertIs(transport, mock.sentinel.http) @staticmethod def _mock_requests_response(status_code, headers, content=b''): @@ -1600,8 +1600,7 @@ def _initiate_resumable_upload_helper(self, num_retries=None): response_headers = {'location': resumable_url} fake_transport = self._mock_transport( http_client.OK, response_headers) - table._make_transport = mock.Mock( - return_value=fake_transport, spec=[]) + client._http = fake_transport # Create some mock arguments and call the method under test. data = b'goodbye gudbi gootbee' @@ -1640,7 +1639,6 @@ def _initiate_resumable_upload_helper(self, num_retries=None): self.assertEqual(stream.tell(), 0) # Check the mocks. - table._make_transport.assert_called_once_with(client) request_headers = expected_headers.copy() request_headers['x-upload-content-type'] = _GENERIC_CONTENT_TYPE fake_transport.request.assert_called_once_with( @@ -1668,7 +1666,7 @@ def _do_multipart_upload_success_helper( # Create mocks to be checked for doing transport. fake_transport = self._mock_transport(http_client.OK, {}) - table._make_transport = mock.Mock(return_value=fake_transport, spec=[]) + client._http = fake_transport # Create some mock arguments. data = b'Bzzzz-zap \x00\x01\xf4' @@ -1682,7 +1680,6 @@ def _do_multipart_upload_success_helper( # Check the mocks and the returned value. self.assertIs(response, fake_transport.request.return_value) self.assertEqual(stream.tell(), size) - table._make_transport.assert_called_once_with(client) get_boundary.assert_called_once_with() upload_url = ( @@ -1723,7 +1720,7 @@ class TestTableUpload(object): # rather than `unittest`-style. @staticmethod - def _make_table(): + def _make_table(transport=None): from google.cloud.bigquery import _http from google.cloud.bigquery import client from google.cloud.bigquery import dataset @@ -1733,6 +1730,7 @@ def _make_table(): client = mock.create_autospec(client.Client, instance=True) client._connection = connection client._credentials = mock.sentinel.credentials + client._http = transport client.project = 'project_id' dataset = dataset.Dataset('test_dataset', client) @@ -1955,57 +1953,54 @@ def _make_resumable_upload_responses(cls, size): return [initial_response, data_response, final_response] @staticmethod - def _make_transport_patch(table, responses=None): - """Patch a table's _make_transport method to return given responses.""" + def _make_transport(responses=None): import google.auth.transport.requests transport = mock.create_autospec( google.auth.transport.requests.AuthorizedSession, instance=True) transport.request.side_effect = responses - return mock.patch.object( - table, '_make_transport', return_value=transport, autospec=True) + return transport def test__do_resumable_upload(self): - table = self._make_table() file_obj = self._make_file_obj() file_obj_len = len(file_obj.getvalue()) - responses = self._make_resumable_upload_responses(file_obj_len) + transport = self._make_transport( + self._make_resumable_upload_responses(file_obj_len)) + table = self._make_table(transport) - with self._make_transport_patch(table, responses) as transport: - result = table._do_resumable_upload( - table._dataset._client, - file_obj, - self.EXPECTED_CONFIGURATION, - None) + result = table._do_resumable_upload( + table._dataset._client, + file_obj, + self.EXPECTED_CONFIGURATION, + None) content = result.content.decode('utf-8') assert json.loads(content) == {'size': file_obj_len} # Verify that configuration data was passed in with the initial # request. - transport.return_value.request.assert_any_call( + transport.request.assert_any_call( 'POST', mock.ANY, data=json.dumps(self.EXPECTED_CONFIGURATION).encode('utf-8'), headers=mock.ANY) def test__do_multipart_upload(self): - table = self._make_table() + transport = self._make_transport([self._make_response(http_client.OK)]) + table = self._make_table(transport) file_obj = self._make_file_obj() file_obj_len = len(file_obj.getvalue()) - responses = [self._make_response(http_client.OK)] - with self._make_transport_patch(table, responses) as transport: - table._do_multipart_upload( - table._dataset._client, - file_obj, - self.EXPECTED_CONFIGURATION, - file_obj_len, - None) + table._do_multipart_upload( + table._dataset._client, + file_obj, + self.EXPECTED_CONFIGURATION, + file_obj_len, + None) # Verify that configuration data was passed in with the initial # request. - request_args = transport.return_value.request.mock_calls[0][2] + request_args = transport.request.mock_calls[0][2] request_data = request_args['data'].decode('utf-8') request_headers = request_args['headers'] From 2cd95de6d9dea5398bf67eea8f8ad62468289851 Mon Sep 17 00:00:00 2001 From: Danny Hermes Date: Fri, 4 Aug 2017 16:45:43 -0700 Subject: [PATCH 0149/2016] Updating all affected packages after google-cloud-core update. (#3730) * Updating all affected packages after google-cloud-core update. * Moving 'pip install .' **after** subpackages in nox docs. @lukesneeringer still hasn't explained why it was moved. In it's current location, the depencencies are first retrieved from PyPI (which fails here for the unreleased versions), e.g. https://circleci.com/gh/GoogleCloudPlatform/google-cloud-python/2716 --- packages/google-cloud-bigquery/setup.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/packages/google-cloud-bigquery/setup.py b/packages/google-cloud-bigquery/setup.py index eeb2d90549d8..69fbb9cc5eb6 100644 --- a/packages/google-cloud-bigquery/setup.py +++ b/packages/google-cloud-bigquery/setup.py @@ -51,7 +51,7 @@ REQUIREMENTS = [ - 'google-cloud-core >= 0.25.0, < 0.26dev', + 'google-cloud-core >= 0.26.0, < 0.27dev', 'google-auth >= 1.0.0', 'google-resumable-media >= 0.2.1', 'requests >= 2.0.0', @@ -59,7 +59,7 @@ setup( name='google-cloud-bigquery', - version='0.25.0', + version='0.26.0', description='Python Client for Google BigQuery', long_description=README, namespace_packages=[ From 5c069545fb4cdae5996fe8568cea556f3ae32464 Mon Sep 17 00:00:00 2001 From: Luke Sneeringer Date: Mon, 7 Aug 2017 09:16:23 -0700 Subject: [PATCH 0150/2016] BigQuery: Remove client-side enum validation. (#3735) --- .../google/cloud/bigquery/_helpers.py | 10 ---------- .../google-cloud-bigquery/google/cloud/bigquery/job.py | 7 ------- .../google-cloud-bigquery/tests/unit/test__helpers.py | 5 +---- 3 files changed, 1 insertion(+), 21 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py b/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py index 6641fbe01b42..4da9be9f0723 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py @@ -306,19 +306,9 @@ def _validate(self, value): class _EnumProperty(_ConfigurationProperty): """Pseudo-enumeration class. - Subclasses must define ``ALLOWED`` as a class-level constant: it must - be a sequence of strings. - :type name: str :param name: name of the property. """ - def _validate(self, value): - """Check that ``value`` is one of the allowed values. - - :raises: ValueError if value is not allowed. - """ - if value not in self.ALLOWED: - raise ValueError('Pass one of: %s' % ', '.join(self.ALLOWED)) class UDFResource(object): diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/job.py b/packages/google-cloud-bigquery/google/cloud/bigquery/job.py index 953a2c265580..1519e2a0cf6e 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/job.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/job.py @@ -98,14 +98,12 @@ class Compression(_EnumProperty): """Pseudo-enum for ``compression`` properties.""" GZIP = 'GZIP' NONE = 'NONE' - ALLOWED = (GZIP, NONE) class CreateDisposition(_EnumProperty): """Pseudo-enum for ``create_disposition`` properties.""" CREATE_IF_NEEDED = 'CREATE_IF_NEEDED' CREATE_NEVER = 'CREATE_NEVER' - ALLOWED = (CREATE_IF_NEEDED, CREATE_NEVER) class DestinationFormat(_EnumProperty): @@ -113,21 +111,18 @@ class DestinationFormat(_EnumProperty): CSV = 'CSV' NEWLINE_DELIMITED_JSON = 'NEWLINE_DELIMITED_JSON' AVRO = 'AVRO' - ALLOWED = (CSV, NEWLINE_DELIMITED_JSON, AVRO) class Encoding(_EnumProperty): """Pseudo-enum for ``encoding`` properties.""" UTF_8 = 'UTF-8' ISO_8559_1 = 'ISO-8559-1' - ALLOWED = (UTF_8, ISO_8559_1) class QueryPriority(_EnumProperty): """Pseudo-enum for ``QueryJob.priority`` property.""" INTERACTIVE = 'INTERACTIVE' BATCH = 'BATCH' - ALLOWED = (INTERACTIVE, BATCH) class SourceFormat(_EnumProperty): @@ -136,7 +131,6 @@ class SourceFormat(_EnumProperty): DATASTORE_BACKUP = 'DATASTORE_BACKUP' NEWLINE_DELIMITED_JSON = 'NEWLINE_DELIMITED_JSON' AVRO = 'AVRO' - ALLOWED = (CSV, DATASTORE_BACKUP, NEWLINE_DELIMITED_JSON, AVRO) class WriteDisposition(_EnumProperty): @@ -144,7 +138,6 @@ class WriteDisposition(_EnumProperty): WRITE_APPEND = 'WRITE_APPEND' WRITE_TRUNCATE = 'WRITE_TRUNCATE' WRITE_EMPTY = 'WRITE_EMPTY' - ALLOWED = (WRITE_APPEND, WRITE_TRUNCATE, WRITE_EMPTY) class _AsyncJob(google.cloud.future.polling.PollingFuture): diff --git a/packages/google-cloud-bigquery/tests/unit/test__helpers.py b/packages/google-cloud-bigquery/tests/unit/test__helpers.py index a2b561e36e88..7648ed5bee18 100644 --- a/packages/google-cloud-bigquery/tests/unit/test__helpers.py +++ b/packages/google-cloud-bigquery/tests/unit/test__helpers.py @@ -765,7 +765,7 @@ def _get_target_class(): def test_it(self): class Sub(self._get_target_class()): - ALLOWED = ('FOO', 'BAR', 'BAZ') + pass class Configuration(object): _attr = None @@ -777,9 +777,6 @@ def __init__(self): self._configuration = Configuration() wrapper = Wrapper() - with self.assertRaises(ValueError): - wrapper.attr = 'BOGUS' - wrapper.attr = 'FOO' self.assertEqual(wrapper.attr, 'FOO') self.assertEqual(wrapper._configuration._attr, 'FOO') From bcd2c696cc016204085d2ac702701ceca40f3678 Mon Sep 17 00:00:00 2001 From: Luke Sneeringer Date: Mon, 7 Aug 2017 14:49:41 -0700 Subject: [PATCH 0151/2016] Bump requests minimum bound to 2.18.0 (#3748) --- packages/google-cloud-bigquery/setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/setup.py b/packages/google-cloud-bigquery/setup.py index 69fbb9cc5eb6..98dc37c8cc7a 100644 --- a/packages/google-cloud-bigquery/setup.py +++ b/packages/google-cloud-bigquery/setup.py @@ -54,7 +54,7 @@ 'google-cloud-core >= 0.26.0, < 0.27dev', 'google-auth >= 1.0.0', 'google-resumable-media >= 0.2.1', - 'requests >= 2.0.0', + 'requests >= 2.18.0', ] setup( From 8bc1b05c603ddb3e95ab5331e6f3b5a141a458e3 Mon Sep 17 00:00:00 2001 From: Tres Seaver Date: Mon, 7 Aug 2017 17:50:10 -0400 Subject: [PATCH 0152/2016] Add 'Table.row_from_mapping' helper. (#3425) --- .../google/cloud/bigquery/table.py | 29 +++++++++ .../tests/unit/test_table.py | 61 ++++++++++++++++++- 2 files changed, 87 insertions(+), 3 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py index 9960b560624d..ffbd47ca6c4c 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py @@ -733,6 +733,35 @@ def fetch_data(self, max_results=None, page_token=None, client=None): iterator._NEXT_TOKEN = 'pageToken' return iterator + def row_from_mapping(self, mapping): + """Convert a mapping to a row tuple using the schema. + + :type mapping: dict + :param mapping: Mapping of row data: must contain keys for all + required fields in the schema. Keys which do not correspond + to a field in the schema are ignored. + + :rtype: tuple + :returns: Tuple whose elements are ordered according to the table's + schema. + :raises: ValueError if table's schema is not set + """ + if len(self._schema) == 0: + raise ValueError(_TABLE_HAS_NO_SCHEMA) + + row = [] + for field in self.schema: + if field.mode == 'REQUIRED': + row.append(mapping[field.name]) + elif field.mode == 'REPEATED': + row.append(mapping.get(field.name, ())) + elif field.mode == 'NULLABLE': + row.append(mapping.get(field.name)) + else: + raise ValueError( + "Unknown field mode: {}".format(field.mode)) + return tuple(row) + def insert_data(self, rows, row_ids=None, diff --git a/packages/google-cloud-bigquery/tests/unit/test_table.py b/packages/google-cloud-bigquery/tests/unit/test_table.py index 3bab58b6c8f8..125114b6f3ac 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_table.py +++ b/packages/google-cloud-bigquery/tests/unit/test_table.py @@ -1347,6 +1347,61 @@ def test_fetch_data_w_record_schema(self): self.assertEqual(req['method'], 'GET') self.assertEqual(req['path'], '/%s' % PATH) + def test_row_from_mapping_wo_schema(self): + from google.cloud.bigquery.table import _TABLE_HAS_NO_SCHEMA + MAPPING = {'full_name': 'Phred Phlyntstone', 'age': 32} + client = _Client(project=self.PROJECT) + dataset = _Dataset(client) + table = self._make_one(self.TABLE_NAME, dataset=dataset) + + with self.assertRaises(ValueError) as exc: + table.row_from_mapping(MAPPING) + + self.assertEqual(exc.exception.args, (_TABLE_HAS_NO_SCHEMA,)) + + def test_row_from_mapping_w_invalid_schema(self): + from google.cloud.bigquery.table import SchemaField + MAPPING = { + 'full_name': 'Phred Phlyntstone', + 'age': 32, + 'colors': ['red', 'green'], + 'bogus': 'WHATEVER', + } + client = _Client(project=self.PROJECT) + dataset = _Dataset(client) + full_name = SchemaField('full_name', 'STRING', mode='REQUIRED') + age = SchemaField('age', 'INTEGER', mode='REQUIRED') + colors = SchemaField('colors', 'DATETIME', mode='REPEATED') + bogus = SchemaField('joined', 'STRING', mode='BOGUS') + table = self._make_one(self.TABLE_NAME, dataset=dataset, + schema=[full_name, age, colors, bogus]) + + with self.assertRaises(ValueError) as exc: + table.row_from_mapping(MAPPING) + + self.assertIn('Unknown field mode: BOGUS', str(exc.exception)) + + def test_row_from_mapping_w_schema(self): + from google.cloud.bigquery.table import SchemaField + MAPPING = { + 'full_name': 'Phred Phlyntstone', + 'age': 32, + 'colors': ['red', 'green'], + 'extra': 'IGNORED', + } + client = _Client(project=self.PROJECT) + dataset = _Dataset(client) + full_name = SchemaField('full_name', 'STRING', mode='REQUIRED') + age = SchemaField('age', 'INTEGER', mode='REQUIRED') + colors = SchemaField('colors', 'DATETIME', mode='REPEATED') + joined = SchemaField('joined', 'STRING', mode='NULLABLE') + table = self._make_one(self.TABLE_NAME, dataset=dataset, + schema=[full_name, age, colors, joined]) + + self.assertEqual( + table.row_from_mapping(MAPPING), + ('Phred Phlyntstone', 32, ['red', 'green'], None)) + def test_insert_data_wo_schema(self): from google.cloud.bigquery.table import _TABLE_HAS_NO_SCHEMA @@ -2055,7 +2110,7 @@ def test__parse_schema_resource_subfields(self): RESOURCE['schema']['fields'].append( {'name': 'phone', 'type': 'RECORD', - 'mode': 'REPEATABLE', + 'mode': 'REPEATED', 'fields': [{'name': 'type', 'type': 'STRING', 'mode': 'REQUIRED'}, @@ -2123,7 +2178,7 @@ def test_w_subfields(self): full_name = SchemaField('full_name', 'STRING', mode='REQUIRED') ph_type = SchemaField('type', 'STRING', 'REQUIRED') ph_num = SchemaField('number', 'STRING', 'REQUIRED') - phone = SchemaField('phone', 'RECORD', mode='REPEATABLE', + phone = SchemaField('phone', 'RECORD', mode='REPEATED', fields=[ph_type, ph_num]) resource = self._call_fut([full_name, phone]) self.assertEqual(len(resource), 2) @@ -2134,7 +2189,7 @@ def test_w_subfields(self): self.assertEqual(resource[1], {'name': 'phone', 'type': 'RECORD', - 'mode': 'REPEATABLE', + 'mode': 'REPEATED', 'fields': [{'name': 'type', 'type': 'STRING', 'mode': 'REQUIRED'}, From 6eb65415ed1311645105e785946279ec54ad78dc Mon Sep 17 00:00:00 2001 From: Luke Sneeringer Date: Tue, 8 Aug 2017 12:36:20 -0700 Subject: [PATCH 0153/2016] Reference valid input formats in API docs. (#3758) --- .../google-cloud-bigquery/google/cloud/bigquery/table.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py index ffbd47ca6c4c..c6bf5db893ab 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py @@ -1045,9 +1045,10 @@ def upload_from_file(self, :param file_obj: A file handle opened in binary mode for reading. :type source_format: str - :param source_format: one of 'CSV' or 'NEWLINE_DELIMITED_JSON'. - job configuration option; see - :meth:`google.cloud.bigquery.job.LoadJob` + :param source_format: Any supported format. The full list of supported + formats is documented under the + ``configuration.extract.destinationFormat`` property on this page: + https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs :type rewind: bool :param rewind: If True, seek to the beginning of the file handle before From c748e3aa7c41bdcdca6384b958baaf23042340e8 Mon Sep 17 00:00:00 2001 From: Jon Wayne Parrott Date: Tue, 8 Aug 2017 14:03:04 -0700 Subject: [PATCH 0154/2016] Move google.cloud.future to google.api.core (#3764) --- packages/google-cloud-bigquery/google/cloud/bigquery/job.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/job.py b/packages/google-cloud-bigquery/google/cloud/bigquery/job.py index 1519e2a0cf6e..43d7fd8f23c3 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/job.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/job.py @@ -19,6 +19,7 @@ import six from six.moves import http_client +import google.api.core.future.polling from google.cloud import exceptions from google.cloud.exceptions import NotFound from google.cloud._helpers import _datetime_from_microseconds @@ -31,7 +32,6 @@ from google.cloud.bigquery._helpers import UDFResourcesProperty from google.cloud.bigquery._helpers import _EnumProperty from google.cloud.bigquery._helpers import _TypedProperty -import google.cloud.future.polling _DONE_STATE = 'DONE' _STOPPED_REASON = 'stopped' @@ -140,7 +140,7 @@ class WriteDisposition(_EnumProperty): WRITE_EMPTY = 'WRITE_EMPTY' -class _AsyncJob(google.cloud.future.polling.PollingFuture): +class _AsyncJob(google.api.core.future.polling.PollingFuture): """Base class for asynchronous jobs. :type name: str @@ -496,7 +496,7 @@ def cancelled(self): This always returns False. It's not possible to check if a job was cancelled in the API. This method is here to satisfy the interface - for :class:`google.cloud.future.Future`. + for :class:`google.api.core.future.Future`. :rtype: bool :returns: False From 89736533e8b7fb79873db6250073119a7f33c96e Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Tue, 8 Aug 2017 14:50:31 -0700 Subject: [PATCH 0155/2016] Use latest/ directory for docs instead of stable/ (#3766) See also https://github.com/GoogleCloudPlatform/google-cloud-python/pull/3763 $ sed -i '' 's/googlecloudplatform.github.io\/google-cloud-python\/stable\//googlecloudplatform.github.io\/google-cloud-python\/latest\//g' **/*.rst --- packages/google-cloud-bigquery/README.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/packages/google-cloud-bigquery/README.rst b/packages/google-cloud-bigquery/README.rst index 7e4f0cb72dae..c25b84c6bebd 100644 --- a/packages/google-cloud-bigquery/README.rst +++ b/packages/google-cloud-bigquery/README.rst @@ -9,7 +9,7 @@ Python Client for Google BigQuery - `Documentation`_ -.. _Documentation: https://googlecloudplatform.github.io/google-cloud-python/stable/bigquery/usage.html +.. _Documentation: https://googlecloudplatform.github.io/google-cloud-python/latest/bigquery/usage.html Quick Start ----------- @@ -86,7 +86,7 @@ Perform a synchronous query See the ``google-cloud-python`` API `BigQuery documentation`_ to learn how to connect to BigQuery using this Client Library. -.. _BigQuery documentation: https://googlecloudplatform.github.io/google-cloud-python/stable/bigquery/usage.html +.. _BigQuery documentation: https://googlecloudplatform.github.io/google-cloud-python/latest/bigquery/usage.html .. |pypi| image:: https://img.shields.io/pypi/v/google-cloud-bigquery.svg :target: https://pypi.org/project/google-cloud-bigquery/ From 77a35decaab571aed565dfaddd5fb5af0e4dc052 Mon Sep 17 00:00:00 2001 From: Luke Sneeringer Date: Tue, 8 Aug 2017 14:51:50 -0700 Subject: [PATCH 0156/2016] Fix __eq__ and __ne__. (#3765) --- .../google/cloud/bigquery/_helpers.py | 5 +++++ .../google/cloud/bigquery/dataset.py | 5 +++++ .../google/cloud/bigquery/schema.py | 10 +++------- .../tests/unit/test__helpers.py | 14 ++++++++++++++ .../tests/unit/test_dataset.py | 7 +++++++ .../tests/unit/test_schema.py | 6 ++++-- 6 files changed, 38 insertions(+), 9 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py b/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py index 4da9be9f0723..deb83516b9d7 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py @@ -328,10 +328,15 @@ def __init__(self, udf_type, value): self.value = value def __eq__(self, other): + if not isinstance(other, UDFResource): + return NotImplemented return( self.udf_type == other.udf_type and self.value == other.value) + def __ne__(self, other): + return not self == other + class UDFResourcesProperty(object): """Custom property type, holding :class:`UDFResource` instances.""" diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/dataset.py b/packages/google-cloud-bigquery/google/cloud/bigquery/dataset.py index 8fb986cb848d..1304d5028873 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/dataset.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/dataset.py @@ -76,11 +76,16 @@ def __init__(self, role, entity_type, entity_id): self.entity_id = entity_id def __eq__(self, other): + if not isinstance(other, AccessGrant): + return NotImplemented return ( self.role == other.role and self.entity_type == other.entity_type and self.entity_id == other.entity_id) + def __ne__(self, other): + return not self == other + def __repr__(self): return '' % ( self.role, self.entity_type, self.entity_id) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/schema.py b/packages/google-cloud-bigquery/google/cloud/bigquery/schema.py index edd8dd68f3bd..e98d67c30fb6 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/schema.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/schema.py @@ -101,16 +101,12 @@ def _key(self): ) def __eq__(self, other): - if isinstance(other, SchemaField): - return self._key() == other._key() - else: + if not isinstance(other, SchemaField): return NotImplemented + return self._key() == other._key() def __ne__(self, other): - if isinstance(other, SchemaField): - return self._key() != other._key() - else: - return NotImplemented + return not self == other def __hash__(self): return hash(self._key()) diff --git a/packages/google-cloud-bigquery/tests/unit/test__helpers.py b/packages/google-cloud-bigquery/tests/unit/test__helpers.py index 7648ed5bee18..581b4b9a42fc 100644 --- a/packages/google-cloud-bigquery/tests/unit/test__helpers.py +++ b/packages/google-cloud-bigquery/tests/unit/test__helpers.py @@ -14,6 +14,8 @@ import unittest +import mock + class Test_not_null(unittest.TestCase): @@ -815,6 +817,18 @@ def test_instance_getter_empty(self): instance = klass() self.assertEqual(instance.udf_resources, []) + def test_resource_equality(self): + from google.cloud.bigquery._helpers import UDFResource + + resource1a = UDFResource('resourceUri', 'gs://bucket/file.js') + resource1b = UDFResource('resourceUri', 'gs://bucket/file.js') + resource2 = UDFResource('resourceUri', 'gs://bucket/other.js') + + self.assertEqual(resource1a, resource1b) + self.assertNotEqual(resource1a, resource2) + self.assertNotEqual(resource1a, object()) + self.assertEqual(resource1a, mock.ANY) + def test_instance_getter_w_non_empty_list(self): from google.cloud.bigquery._helpers import UDFResource diff --git a/packages/google-cloud-bigquery/tests/unit/test_dataset.py b/packages/google-cloud-bigquery/tests/unit/test_dataset.py index 97721554f1b6..164f9ed0a2b4 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_dataset.py +++ b/packages/google-cloud-bigquery/tests/unit/test_dataset.py @@ -14,6 +14,8 @@ import unittest +import mock + class TestAccessGrant(unittest.TestCase): @@ -77,6 +79,11 @@ def test___eq___hit(self): other = self._make_one('OWNER', 'userByEmail', 'phred@example.com') self.assertEqual(grant, other) + def test__eq___type_mismatch(self): + grant = self._make_one('OWNER', 'userByEmail', 'silly@example.com') + self.assertNotEqual(grant, object()) + self.assertEqual(grant, mock.ANY) + class TestDataset(unittest.TestCase): PROJECT = 'project' diff --git a/packages/google-cloud-bigquery/tests/unit/test_schema.py b/packages/google-cloud-bigquery/tests/unit/test_schema.py index bf3cf2e025d1..84f910a10d8e 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_schema.py +++ b/packages/google-cloud-bigquery/tests/unit/test_schema.py @@ -14,6 +14,8 @@ import unittest +import mock + class TestSchemaField(unittest.TestCase): @@ -101,7 +103,7 @@ def test___eq___wrong_type(self): field = self._make_one('test', 'STRING') other = object() self.assertNotEqual(field, other) - self.assertIs(field.__eq__(other), NotImplemented) + self.assertEqual(field, mock.ANY) def test___eq___name_mismatch(self): field = self._make_one('test', 'STRING') @@ -155,7 +157,7 @@ def test___ne___wrong_type(self): field = self._make_one('toast', 'INTEGER') other = object() self.assertNotEqual(field, other) - self.assertIs(field.__ne__(other), NotImplemented) + self.assertEqual(field, mock.ANY) def test___ne___same_value(self): field1 = self._make_one('test', 'TIMESTAMP', mode='REPEATED') From d614284d4bb2a36a9b327fc289d0265556e0ae83 Mon Sep 17 00:00:00 2001 From: Jon Wayne Parrott Date: Wed, 9 Aug 2017 10:02:05 -0700 Subject: [PATCH 0157/2016] Move google.cloud.iterator to google.api.core.page_iterator (#3770) * Move google.cloud.iterator to google.api.core.page_iterator * Re-write tests to pytest style. * Make GAXIterator private- it will soon be removed. * Pass api_request into HTTPIterator to avoid accessing private members * BigQuery: use google.api.core.page_iterator * DNS: use google.api.core.page_iterator * Logging: use google.api.core.page_iterator * PubSub: use google.api.core.page_iterator * Resource manager: use google.api.core.page_iterator * Runtimeconfig: use google.api.core.page_iterator * logging: use google.api.core._GAXIterator * Storage: use google.api.core.page_iterator * Pubsub: use google.api.core._GAXIterator * Trace: use google.api.core._GAXIterator * Spanner: use google.api.core._GAXIterator --- .../google/cloud/bigquery/_helpers.py | 4 +- .../google/cloud/bigquery/client.py | 51 ++++++++++++------- .../google/cloud/bigquery/dataset.py | 17 ++++--- .../google/cloud/bigquery/query.py | 23 +++++---- .../google/cloud/bigquery/table.py | 17 ++++--- 5 files changed, 69 insertions(+), 43 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py b/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py index deb83516b9d7..9358229e630a 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py @@ -684,7 +684,7 @@ def _item_to_row(iterator, resource): added to the iterator after being created, which should be done by the caller. - :type iterator: :class:`~google.cloud.iterator.Iterator` + :type iterator: :class:`~google.api.core.page_iterator.Iterator` :param iterator: The iterator that is currently in use. :type resource: dict @@ -700,7 +700,7 @@ def _item_to_row(iterator, resource): def _rows_page_start(iterator, page, response): """Grab total rows when :class:`~google.cloud.iterator.Page` starts. - :type iterator: :class:`~google.cloud.iterator.Iterator` + :type iterator: :class:`~google.api.core.page_iterator.Iterator` :param iterator: The iterator that is currently in use. :type page: :class:`~google.cloud.iterator.Page` diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py index f36d80978efd..d9ff17d71720 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py @@ -14,7 +14,7 @@ """Client for interacting with the Google BigQuery API.""" - +from google.api.core import page_iterator from google.cloud.client import ClientWithProject from google.cloud.bigquery._http import Connection from google.cloud.bigquery.dataset import Dataset @@ -23,7 +23,6 @@ from google.cloud.bigquery.job import LoadTableFromStorageJob from google.cloud.bigquery.job import QueryJob from google.cloud.bigquery.query import QueryResults -from google.cloud.iterator import HTTPIterator class Project(object): @@ -98,13 +97,17 @@ def list_projects(self, max_results=None, page_token=None): not passed, the API will return the first page of projects. - :rtype: :class:`~google.cloud.iterator.Iterator` + :rtype: :class:`~google.api.core.page_iterator.Iterator` :returns: Iterator of :class:`~google.cloud.bigquery.client.Project` accessible to the current client. """ - return HTTPIterator( - client=self, path='/projects', item_to_value=_item_to_project, - items_key='projects', page_token=page_token, + return page_iterator.HTTPIterator( + client=self, + api_request=self._connection.api_request, + path='/projects', + item_to_value=_item_to_project, + items_key='projects', + page_token=page_token, max_results=max_results) def list_datasets(self, include_all=False, max_results=None, @@ -126,7 +129,7 @@ def list_datasets(self, include_all=False, max_results=None, not passed, the API will return the first page of datasets. - :rtype: :class:`~google.cloud.iterator.Iterator` + :rtype: :class:`~google.api.core.page_iterator.Iterator` :returns: Iterator of :class:`~google.cloud.bigquery.dataset.Dataset`. accessible to the current client. """ @@ -134,10 +137,15 @@ def list_datasets(self, include_all=False, max_results=None, if include_all: extra_params['all'] = True path = '/projects/%s/datasets' % (self.project,) - return HTTPIterator( - client=self, path=path, item_to_value=_item_to_dataset, - items_key='datasets', page_token=page_token, - max_results=max_results, extra_params=extra_params) + return page_iterator.HTTPIterator( + client=self, + api_request=self._connection.api_request, + path=path, + item_to_value=_item_to_dataset, + items_key='datasets', + page_token=page_token, + max_results=max_results, + extra_params=extra_params) def dataset(self, dataset_name, project=None): """Construct a dataset bound to this client. @@ -207,7 +215,7 @@ def list_jobs(self, max_results=None, page_token=None, all_users=None, * ``"pending"`` * ``"running"`` - :rtype: :class:`~google.cloud.iterator.Iterator` + :rtype: :class:`~google.api.core.page_iterator.Iterator` :returns: Iterable of job instances. """ extra_params = {'projection': 'full'} @@ -219,10 +227,15 @@ def list_jobs(self, max_results=None, page_token=None, all_users=None, extra_params['stateFilter'] = state_filter path = '/projects/%s/jobs' % (self.project,) - return HTTPIterator( - client=self, path=path, item_to_value=_item_to_job, - items_key='jobs', page_token=page_token, - max_results=max_results, extra_params=extra_params) + return page_iterator.HTTPIterator( + client=self, + api_request=self._connection.api_request, + path=path, + item_to_value=_item_to_job, + items_key='jobs', + page_token=page_token, + max_results=max_results, + extra_params=extra_params) def load_table_from_storage(self, job_name, destination, *source_uris): """Construct a job for loading data into a table from CloudStorage. @@ -349,7 +362,7 @@ def run_sync_query(self, query, udf_resources=(), query_parameters=()): def _item_to_project(iterator, resource): """Convert a JSON project to the native object. - :type iterator: :class:`~google.cloud.iterator.Iterator` + :type iterator: :class:`~google.api.core.page_iterator.Iterator` :param iterator: The iterator that is currently in use. :type resource: dict @@ -365,7 +378,7 @@ def _item_to_project(iterator, resource): def _item_to_dataset(iterator, resource): """Convert a JSON dataset to the native object. - :type iterator: :class:`~google.cloud.iterator.Iterator` + :type iterator: :class:`~google.api.core.page_iterator.Iterator` :param iterator: The iterator that is currently in use. :type resource: dict @@ -380,7 +393,7 @@ def _item_to_dataset(iterator, resource): def _item_to_job(iterator, resource): """Convert a JSON job to the native object. - :type iterator: :class:`~google.cloud.iterator.Iterator` + :type iterator: :class:`~google.api.core.page_iterator.Iterator` :param iterator: The iterator that is currently in use. :type resource: dict diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/dataset.py b/packages/google-cloud-bigquery/google/cloud/bigquery/dataset.py index 1304d5028873..d25f6747285f 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/dataset.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/dataset.py @@ -15,10 +15,10 @@ """Define API Datasets.""" import six +from google.api.core import page_iterator from google.cloud._helpers import _datetime_from_microseconds from google.cloud.exceptions import NotFound from google.cloud.bigquery.table import Table -from google.cloud.iterator import HTTPIterator class AccessGrant(object): @@ -561,14 +561,19 @@ def list_tables(self, max_results=None, page_token=None): datasets. If not passed, the API will return the first page of datasets. - :rtype: :class:`~google.cloud.iterator.Iterator` + :rtype: :class:`~google.api.core.page_iterator.Iterator` :returns: Iterator of :class:`~google.cloud.bigquery.table.Table` contained within the current dataset. """ path = '/projects/%s/datasets/%s/tables' % (self.project, self.name) - result = HTTPIterator(client=self._client, path=path, - item_to_value=_item_to_table, items_key='tables', - page_token=page_token, max_results=max_results) + result = page_iterator.HTTPIterator( + client=self._client, + api_request=self._client._connection.api_request, + path=path, + item_to_value=_item_to_table, + items_key='tables', + page_token=page_token, + max_results=max_results) result.dataset = self return result @@ -590,7 +595,7 @@ def table(self, name, schema=()): def _item_to_table(iterator, resource): """Convert a JSON table to the native object. - :type iterator: :class:`~google.cloud.iterator.Iterator` + :type iterator: :class:`~google.api.core.page_iterator.Iterator` :param iterator: The iterator that is currently in use. :type resource: dict diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/query.py b/packages/google-cloud-bigquery/google/cloud/bigquery/query.py index 502953b2c828..dfa0a422a68a 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/query.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/query.py @@ -16,7 +16,7 @@ import six -from google.cloud.iterator import HTTPIterator +from google.api.core import page_iterator from google.cloud.bigquery._helpers import _TypedProperty from google.cloud.bigquery._helpers import _rows_from_json from google.cloud.bigquery.dataset import Dataset @@ -414,7 +414,7 @@ def fetch_data(self, max_results=None, page_token=None, start_index=None, :param client: the client to use. If not passed, falls back to the ``client`` stored on the current dataset. - :rtype: :class:`~google.cloud.iterator.Iterator` + :rtype: :class:`~google.api.core.page_iterator.Iterator` :returns: Iterator of row data :class:`tuple`s. During each page, the iterator will have the ``total_rows`` attribute set, which counts the total number of rows **in the result @@ -435,13 +435,16 @@ def fetch_data(self, max_results=None, page_token=None, start_index=None, params['timeoutMs'] = timeout_ms path = '/projects/%s/queries/%s' % (self.project, self.name) - iterator = HTTPIterator(client=client, path=path, - item_to_value=_item_to_row, - items_key='rows', - page_token=page_token, - max_results=max_results, - page_start=_rows_page_start_query, - extra_params=params) + iterator = page_iterator.HTTPIterator( + client=client, + api_request=client._connection.api_request, + path=path, + item_to_value=_item_to_row, + items_key='rows', + page_token=page_token, + max_results=max_results, + page_start=_rows_page_start_query, + extra_params=params) iterator.query_result = self # Over-ride the key used to retrieve the next page token. iterator._NEXT_TOKEN = 'pageToken' @@ -457,7 +460,7 @@ def _rows_page_start_query(iterator, page, response): added to the iterator after being created, which should be done by the caller. - :type iterator: :class:`~google.cloud.iterator.Iterator` + :type iterator: :class:`~google.api.core.page_iterator.Iterator` :param iterator: The iterator that is currently in use. :type page: :class:`~google.cloud.iterator.Page` diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py index c6bf5db893ab..87cff2980c7e 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py @@ -23,10 +23,10 @@ from google.resumable_media.requests import MultipartUpload from google.resumable_media.requests import ResumableUpload +from google.api.core import page_iterator from google.cloud import exceptions from google.cloud._helpers import _datetime_from_microseconds from google.cloud._helpers import _millis_from_datetime -from google.cloud.iterator import HTTPIterator from google.cloud.bigquery.schema import SchemaField from google.cloud.bigquery._helpers import _item_to_row from google.cloud.bigquery._helpers import _rows_page_start @@ -712,7 +712,7 @@ def fetch_data(self, max_results=None, page_token=None, client=None): :param client: (Optional) The client to use. If not passed, falls back to the ``client`` stored on the current dataset. - :rtype: :class:`~google.cloud.iterator.Iterator` + :rtype: :class:`~google.api.core.page_iterator.Iterator` :returns: Iterator of row data :class:`tuple`s. During each page, the iterator will have the ``total_rows`` attribute set, which counts the total number of rows **in the table** @@ -724,10 +724,15 @@ def fetch_data(self, max_results=None, page_token=None, client=None): client = self._require_client(client) path = '%s/data' % (self.path,) - iterator = HTTPIterator(client=client, path=path, - item_to_value=_item_to_row, items_key='rows', - page_token=page_token, max_results=max_results, - page_start=_rows_page_start) + iterator = page_iterator.HTTPIterator( + client=client, + api_request=client._connection.api_request, + path=path, + item_to_value=_item_to_row, + items_key='rows', + page_token=page_token, + max_results=max_results, + page_start=_rows_page_start) iterator.schema = self._schema # Over-ride the key used to retrieve the next page token. iterator._NEXT_TOKEN = 'pageToken' From fd6d5e7b8dc3605d5bcdcb0a85214fb6d6b975dd Mon Sep 17 00:00:00 2001 From: Leon de Almeida Date: Wed, 9 Aug 2017 17:28:44 -0300 Subject: [PATCH 0158/2016] nullMarker support for BigQuery Load Jobs (#3449) (#3777) --- .../google-cloud-bigquery/google/cloud/bigquery/job.py | 8 ++++++++ packages/google-cloud-bigquery/tests/unit/test_job.py | 8 ++++++++ 2 files changed, 16 insertions(+) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/job.py b/packages/google-cloud-bigquery/google/cloud/bigquery/job.py index 43d7fd8f23c3..48d440063fa3 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/job.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/job.py @@ -518,6 +518,7 @@ class _LoadConfiguration(object): _field_delimiter = None _ignore_unknown_values = None _max_bad_records = None + _null_marker = None _quote_character = None _skip_leading_rows = None _source_format = None @@ -672,6 +673,11 @@ def output_rows(self): https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.load.maxBadRecords """ + null_marker = _TypedProperty('null_marker', six.string_types) + """See + https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.load.nullMarker + """ + quote_character = _TypedProperty('quote_character', six.string_types) """See https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.load.quote @@ -710,6 +716,8 @@ def _populate_config_resource(self, configuration): configuration['ignoreUnknownValues'] = self.ignore_unknown_values if self.max_bad_records is not None: configuration['maxBadRecords'] = self.max_bad_records + if self.null_marker is not None: + configuration['nullMarker'] = self.null_marker if self.quote_character is not None: configuration['quote'] = self.quote_character if self.skip_leading_rows is not None: diff --git a/packages/google-cloud-bigquery/tests/unit/test_job.py b/packages/google-cloud-bigquery/tests/unit/test_job.py index 46326441a5e1..81d07b122eb0 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_job.py +++ b/packages/google-cloud-bigquery/tests/unit/test_job.py @@ -247,6 +247,11 @@ def _verifyResourceProperties(self, job, resource): config['maxBadRecords']) else: self.assertIsNone(job.max_bad_records) + if 'nullMarker' in config: + self.assertEqual(job.null_marker, + config['nullMarker']) + else: + self.assertIsNone(job.null_marker) if 'quote' in config: self.assertEqual(job.quote_character, config['quote']) @@ -288,6 +293,7 @@ def test_ctor(self): self.assertIsNone(job.field_delimiter) self.assertIsNone(job.ignore_unknown_values) self.assertIsNone(job.max_bad_records) + self.assertIsNone(job.null_marker) self.assertIsNone(job.quote_character) self.assertIsNone(job.skip_leading_rows) self.assertIsNone(job.source_format) @@ -592,6 +598,7 @@ def test_begin_w_alternate_client(self): 'fieldDelimiter': '|', 'ignoreUnknownValues': True, 'maxBadRecords': 100, + 'nullMarker': r'\N', 'quote': "'", 'skipLeadingRows': 1, 'sourceFormat': 'CSV', @@ -619,6 +626,7 @@ def test_begin_w_alternate_client(self): job.field_delimiter = '|' job.ignore_unknown_values = True job.max_bad_records = 100 + job.null_marker = r'\N' job.quote_character = "'" job.skip_leading_rows = 1 job.source_format = 'CSV' From 75dd908dc60b7a07ca70253b9a4a28ab564c9cbf Mon Sep 17 00:00:00 2001 From: Luke Sneeringer Date: Wed, 9 Aug 2017 13:29:07 -0700 Subject: [PATCH 0159/2016] Allow job_id to be explicitly specified. (#3779) --- .../google/cloud/bigquery/dbapi/cursor.py | 9 +++++++-- .../tests/unit/test_dbapi_cursor.py | 8 ++++++++ 2 files changed, 15 insertions(+), 2 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/dbapi/cursor.py b/packages/google-cloud-bigquery/google/cloud/bigquery/dbapi/cursor.py index 7519c762ae1e..167afb45e285 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/dbapi/cursor.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/dbapi/cursor.py @@ -100,7 +100,7 @@ def _set_rowcount(self, query_results): total_rows = num_dml_affected_rows self.rowcount = total_rows - def execute(self, operation, parameters=None): + def execute(self, operation, parameters=None, job_id=None): """Prepare and execute a database operation. .. note:: @@ -128,12 +128,17 @@ def execute(self, operation, parameters=None): :type parameters: Mapping[str, Any] or Sequence[Any] :param parameters: (Optional) dictionary or sequence of parameter values. + + :type job_id: str + :param job_id: (Optional) The job_id to use. If not set, a job ID + is generated at random. """ self._query_results = None self._page_token = None self._has_fetched_all_rows = False client = self.connection._client - job_id = str(uuid.uuid4()) + if job_id is None: + job_id = str(uuid.uuid4()) # The DB-API uses the pyformat formatting, since the way BigQuery does # query parameters was not one of the standard options. Convert both diff --git a/packages/google-cloud-bigquery/tests/unit/test_dbapi_cursor.py b/packages/google-cloud-bigquery/tests/unit/test_dbapi_cursor.py index 2a2ccfd989a6..49a332999f7e 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_dbapi_cursor.py +++ b/packages/google-cloud-bigquery/tests/unit/test_dbapi_cursor.py @@ -170,6 +170,14 @@ def test_fetchall_w_row(self): self.assertEqual(len(rows), 1) self.assertEqual(rows[0], (1,)) + def test_execute_custom_job_id(self): + from google.cloud.bigquery.dbapi import connect + client = self._mock_client(rows=[], num_dml_affected_rows=0) + connection = connect(client) + cursor = connection.cursor() + cursor.execute('SELECT 1;', job_id='foo') + self.assertEqual(client.run_async_query.mock_calls[0][1][0], 'foo') + def test_execute_w_dml(self): from google.cloud.bigquery.dbapi import connect connection = connect( From 67e73a1edad192c650beca14d85703ce73145eea Mon Sep 17 00:00:00 2001 From: Luke Sneeringer Date: Thu, 10 Aug 2017 09:30:34 -0700 Subject: [PATCH 0160/2016] Add support for a custom null marker. (#3776) --- .../google/cloud/bigquery/table.py | 14 +++++++++++--- .../google-cloud-bigquery/tests/unit/test_table.py | 12 +++++++----- 2 files changed, 18 insertions(+), 8 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py index 87cff2980c7e..b26125ec9ef4 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py @@ -1043,7 +1043,8 @@ def upload_from_file(self, skip_leading_rows=None, write_disposition=None, client=None, - job_name=None): + job_name=None, + null_marker=None): """Upload the contents of this table from a file-like object. :type file_obj: file @@ -1116,6 +1117,9 @@ def upload_from_file(self, :param job_name: Optional. The id of the job. Generated if not explicitly passed in. + :type null_marker: str + :param null_marker: Optional. A custom null marker (example: "\\N") + :rtype: :class:`~google.cloud.bigquery.jobs.LoadTableFromStorageJob` :returns: the job instance used to load the data (e.g., for @@ -1135,7 +1139,7 @@ def upload_from_file(self, encoding, field_delimiter, ignore_unknown_values, max_bad_records, quote_character, skip_leading_rows, - write_disposition, job_name) + write_disposition, job_name, null_marker) try: created_json = self._do_upload( @@ -1157,7 +1161,8 @@ def _configure_job_metadata(metadata, # pylint: disable=too-many-arguments quote_character, skip_leading_rows, write_disposition, - job_name): + job_name, + null_marker): """Helper for :meth:`Table.upload_from_file`.""" load_config = metadata['configuration']['load'] @@ -1194,6 +1199,9 @@ def _configure_job_metadata(metadata, # pylint: disable=too-many-arguments if job_name is not None: load_config['jobReference'] = {'jobId': job_name} + if null_marker is not None: + load_config['nullMarker'] = null_marker + def _parse_schema_resource(info): """Parse a resource fragment into a schema field. diff --git a/packages/google-cloud-bigquery/tests/unit/test_table.py b/packages/google-cloud-bigquery/tests/unit/test_table.py index 125114b6f3ac..aa9e00670655 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_table.py +++ b/packages/google-cloud-bigquery/tests/unit/test_table.py @@ -1868,7 +1868,8 @@ def test_upload_file_resumable_metadata(self): 'quote_character': '"', 'skip_leading_rows': 1, 'write_disposition': 'WRITE_APPEND', - 'job_name': 'oddjob' + 'job_name': 'oddjob', + 'null_marker': r'\N', } expected_config = { @@ -1878,7 +1879,7 @@ def test_upload_file_resumable_metadata(self): 'destinationTable': { 'projectId': table._dataset._client.project, 'datasetId': table.dataset_name, - 'tableId': table.name + 'tableId': table.name, }, 'allowJaggedRows': config_args['allow_jagged_rows'], 'allowQuotedNewlines': @@ -1892,9 +1893,10 @@ def test_upload_file_resumable_metadata(self): 'quote': config_args['quote_character'], 'skipLeadingRows': config_args['skip_leading_rows'], 'writeDisposition': config_args['write_disposition'], - 'jobReference': {'jobId': config_args['job_name']} - } - } + 'jobReference': {'jobId': config_args['job_name']}, + 'nullMarker': config_args['null_marker'], + }, + }, } do_upload_patch = self._make_do_upload_patch( From a66c1c0cde365869b36018146fb078ed849de14d Mon Sep 17 00:00:00 2001 From: Luke Sneeringer Date: Thu, 10 Aug 2017 11:34:25 -0700 Subject: [PATCH 0161/2016] Add SchemaField serialization and deserialization. (#3786) --- .../google/cloud/bigquery/schema.py | 41 +++++++++++++++++++ .../tests/unit/test_schema.py | 41 +++++++++++++++++++ 2 files changed, 82 insertions(+) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/schema.py b/packages/google-cloud-bigquery/google/cloud/bigquery/schema.py index e98d67c30fb6..4aea34ac22e0 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/schema.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/schema.py @@ -43,6 +43,25 @@ def __init__(self, name, field_type, mode='NULLABLE', self._description = description self._fields = tuple(fields) + @classmethod + def from_api_repr(cls, api_repr): + """Return a ``SchemaField`` object deserialized from a dictionary. + + Args: + api_repr (Mapping[str, str]): The serialized representation + of the SchemaField, such as what is output by + :meth:`to_api_repr`. + + Returns: + SchemaField: The ``SchemaField`` object. + """ + return cls( + field_type=api_repr['type'].upper(), + fields=[cls.from_api_repr(f) for f in api_repr.get('fields', ())], + mode=api_repr['mode'].upper(), + name=api_repr['name'], + ) + @property def name(self): """str: The name of the field.""" @@ -84,6 +103,28 @@ def fields(self): """ return self._fields + def to_api_repr(self): + """Return a dictionary representing this schema field. + + Returns: + dict: A dictionary representing the SchemaField in a serialized + form. + """ + # Put together the basic representation. See http://bit.ly/2hOAT5u. + answer = { + 'mode': self.mode.lower(), + 'name': self.name, + 'type': self.field_type.lower(), + } + + # If this is a RECORD type, then sub-fields are also included, + # add this to the serialized representation. + if self.field_type.upper() == 'RECORD': + answer['fields'] = [f.to_api_repr() for f in self.fields] + + # Done; return the serialized dictionary. + return answer + def _key(self): """A tuple key that unique-ly describes this field. diff --git a/packages/google-cloud-bigquery/tests/unit/test_schema.py b/packages/google-cloud-bigquery/tests/unit/test_schema.py index 84f910a10d8e..d08e7757063e 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_schema.py +++ b/packages/google-cloud-bigquery/tests/unit/test_schema.py @@ -61,6 +61,47 @@ def test_constructor_subfields(self): self.assertIs(field._fields[0], sub_field1) self.assertIs(field._fields[1], sub_field2) + def test_to_api_repr(self): + field = self._make_one('foo', 'INTEGER', 'NULLABLE') + self.assertEqual(field.to_api_repr(), { + 'mode': 'nullable', + 'name': 'foo', + 'type': 'integer', + }) + + def test_to_api_repr_with_subfield(self): + subfield = self._make_one('bar', 'INTEGER', 'NULLABLE') + field = self._make_one('foo', 'RECORD', 'REQUIRED', fields=(subfield,)) + self.assertEqual(field.to_api_repr(), { + 'fields': [{ + 'mode': 'nullable', + 'name': 'bar', + 'type': 'integer', + }], + 'mode': 'required', + 'name': 'foo', + 'type': 'record', + }) + + def test_from_api_repr(self): + field = self._get_target_class().from_api_repr({ + 'fields': [{ + 'mode': 'nullable', + 'name': 'bar', + 'type': 'integer', + }], + 'mode': 'required', + 'name': 'foo', + 'type': 'record', + }) + self.assertEqual(field.name, 'foo') + self.assertEqual(field.field_type, 'RECORD') + self.assertEqual(field.mode, 'REQUIRED') + self.assertEqual(len(field.fields), 1) + self.assertEqual(field.fields[0].name, 'bar') + self.assertEqual(field.fields[0].field_type, 'INTEGER') + self.assertEqual(field.fields[0].mode, 'NULLABLE') + def test_name_property(self): name = 'lemon-ness' schema_field = self._make_one(name, 'INTEGER') From 50c79a9b0cddf9be68687635d5dff69cec338aea Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Thu, 10 Aug 2017 19:34:10 -0700 Subject: [PATCH 0162/2016] Wait for load jobs to complete in system tests. (#3782) --- packages/google-cloud-bigquery/tests/system.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/packages/google-cloud-bigquery/tests/system.py b/packages/google-cloud-bigquery/tests/system.py index 9d3bb7794256..5d0b38ffac41 100644 --- a/packages/google-cloud-bigquery/tests/system.py +++ b/packages/google-cloud-bigquery/tests/system.py @@ -35,6 +35,7 @@ from test_utils.system import unique_resource_id +JOB_TIMEOUT = 120 # 2 minutes WHERE = os.path.abspath(os.path.dirname(__file__)) @@ -381,8 +382,7 @@ def test_load_table_from_local_file_then_dump_table(self): ) # Retry until done. - retry = RetryInstanceState(_job_done, max_tries=8) - retry(job.reload)() + job.result(timeout=JOB_TIMEOUT) self.assertEqual(job.output_rows, len(ROWS)) @@ -419,8 +419,7 @@ def test_load_table_from_local_avro_file_then_dump_table(self): ) # Retry until done. - retry = RetryInstanceState(_job_done, max_tries=8) - retry(job.reload)() + job.result(timeout=JOB_TIMEOUT) self.assertEqual(job.output_rows, len(ROWS)) @@ -770,8 +769,7 @@ def _load_table_for_dml(self, rows, dataset_name, table_name): ) # Retry until done. - retry = RetryInstanceState(_job_done, max_tries=8) - retry(job.reload)() + job.result(timeout=JOB_TIMEOUT) self._fetch_single_page(table) def test_sync_query_w_dml(self): @@ -799,7 +797,9 @@ def test_dbapi_w_dml(self): WHERE greeting = 'Hello World' """ - Config.CURSOR.execute(query_template.format(dataset_name, table_name)) + Config.CURSOR.execute( + query_template.format(dataset_name, table_name), + job_id='test_dbapi_w_dml_{}'.format(unique_resource_id())) self.assertEqual(Config.CURSOR.rowcount, 1) self.assertIsNone(Config.CURSOR.fetchone()) @@ -1086,7 +1086,7 @@ def test_async_query_future(self): str(uuid.uuid4()), 'SELECT 1') query_job.use_legacy_sql = False - iterator = query_job.result().fetch_data() + iterator = query_job.result(timeout=JOB_TIMEOUT).fetch_data() rows = list(iterator) self.assertEqual(rows, [(1,)]) From 82363f63e607a9458d02128e9ee44083d1a54ec0 Mon Sep 17 00:00:00 2001 From: Luke Sneeringer Date: Fri, 18 Aug 2017 09:32:13 -0700 Subject: [PATCH 0163/2016] Fix documentation link. (#3825) --- packages/google-cloud-bigquery/google/cloud/bigquery/dataset.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/dataset.py b/packages/google-cloud-bigquery/google/cloud/bigquery/dataset.py index d25f6747285f..645a68deada4 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/dataset.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/dataset.py @@ -536,7 +536,7 @@ def delete(self, client=None): """API call: delete the dataset via a DELETE request. See - https://cloud.google.com/bigquery/docs/reference/rest/v2/tables/delete + https://cloud.google.com/bigquery/docs/reference/rest/v2/datasets/delete :type client: :class:`~google.cloud.bigquery.client.Client` or ``NoneType`` From dac2a2f2482a546b4635ea2e33d2e29cad84a934 Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Fri, 18 Aug 2017 14:37:32 -0700 Subject: [PATCH 0164/2016] BigQuery - add get_query_results method. (#3838) This method calls the getQueryResults API directly and returns a QueryResults object. Note: the response from this API does not include the query, so I modified the constructor to make query optional in this case. --- .../google/cloud/bigquery/client.py | 35 +++++++++++ .../google/cloud/bigquery/query.py | 6 ++ .../google-cloud-bigquery/tests/system.py | 7 +++ .../tests/unit/test_client.py | 63 +++++++++++++++++++ 4 files changed, 111 insertions(+) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py index d9ff17d71720..52c462240097 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py @@ -162,6 +162,41 @@ def dataset(self, dataset_name, project=None): """ return Dataset(dataset_name, client=self, project=project) + def get_query_results(self, job_id, project=None, timeout_ms=None): + """Get the query results object for a query job. + + :type job_id: str + :param job_id: Name of the query job. + + :type project: str + :param project: + (Optional) project ID for the query job (defaults to the project of + the client). + + :type timeout_ms: int + :param timeout_ms: + (Optional) number of milliseconds the the API call should wait for + the query to complete before the request times out. + + :rtype: :class:`google.cloud.bigquery.query.QueryResults` + :returns: a new ``QueryResults`` instance + """ + + extra_params = {'maxResults': 0} + + if project is None: + project = self.project + + if timeout_ms is not None: + extra_params['timeoutMs'] = timeout_ms + + path = '/projects/{}/queries/{}'.format(project, job_id) + + resource = self._connection.api_request( + method='GET', path=path, query_params=extra_params) + + return QueryResults.from_api_repr(resource, self) + def job_from_resource(self, resource): """Detect correct job type from resource and instantiate. diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/query.py b/packages/google-cloud-bigquery/google/cloud/bigquery/query.py index dfa0a422a68a..c01017af0d30 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/query.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/query.py @@ -76,6 +76,12 @@ def __init__(self, query, client, udf_resources=(), query_parameters=()): self.query_parameters = query_parameters self._job = None + @classmethod + def from_api_repr(cls, api_response, client): + instance = cls(None, client) + instance._set_properties(api_response) + return instance + @classmethod def from_query_job(cls, job): """Factory: construct from an existing job. diff --git a/packages/google-cloud-bigquery/tests/system.py b/packages/google-cloud-bigquery/tests/system.py index 5d0b38ffac41..3cff1b001731 100644 --- a/packages/google-cloud-bigquery/tests/system.py +++ b/packages/google-cloud-bigquery/tests/system.py @@ -599,6 +599,13 @@ def test_job_cancel(self): # raise an error, and that the job completed (in the `retry()` # above). + def test_get_query_results(self): + job_id = 'test-get-query-results-' + str(uuid.uuid4()) + query_job = Config.CLIENT.run_async_query(job_id, 'SELECT 1') + query_job.begin() + results = Config.CLIENT.get_query_results(job_id) + self.assertEqual(results.total_rows, 1) + def test_sync_query_w_legacy_sql_types(self): naive = datetime.datetime(2016, 12, 5, 12, 41, 9) stamp = '%s %s' % (naive.date().isoformat(), naive.time().isoformat()) diff --git a/packages/google-cloud-bigquery/tests/unit/test_client.py b/packages/google-cloud-bigquery/tests/unit/test_client.py index e71f3b99fbe0..33cd59513efc 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_client.py +++ b/packages/google-cloud-bigquery/tests/unit/test_client.py @@ -45,6 +45,64 @@ def test_ctor(self): self.assertIs(client._connection.credentials, creds) self.assertIs(client._connection.http, http) + def test_get_job_miss_w_explicit_project_and_timeout(self): + from google.cloud.exceptions import NotFound + + project = 'PROJECT' + creds = _make_credentials() + client = self._make_one(project, creds) + conn = client._connection = _Connection() + + with self.assertRaises(NotFound): + client.get_query_results( + 'nothere', project='other-project', timeout_ms=500) + + self.assertEqual(len(conn._requested), 1) + req = conn._requested[0] + self.assertEqual(req['method'], 'GET') + self.assertEqual( + req['path'], '/projects/other-project/queries/nothere') + self.assertEqual( + req['query_params'], {'maxResults': 0, 'timeoutMs': 500}) + + def test_get_query_results_hit(self): + project = 'PROJECT' + job_id = 'query_job' + data = { + 'kind': 'bigquery#getQueryResultsResponse', + 'etag': 'some-tag', + 'schema': { + 'fields': [ + { + 'name': 'title', + 'type': 'STRING', + 'mode': 'NULLABLE' + }, + { + 'name': 'unique_words', + 'type': 'INTEGER', + 'mode': 'NULLABLE' + } + ] + }, + 'jobReference': { + 'projectId': project, + 'jobId': job_id, + }, + 'totalRows': '10', + 'totalBytesProcessed': '2464625', + 'jobComplete': True, + 'cacheHit': False, + } + + creds = _make_credentials() + client = self._make_one(project, creds) + client._connection = _Connection(data) + query_results = client.get_query_results(job_id) + + self.assertEqual(query_results.total_rows, 10) + self.assertTrue(query_results.complete) + def test_list_projects_defaults(self): import six from google.cloud.bigquery.client import Project @@ -607,6 +665,11 @@ def __init__(self, *responses): self._requested = [] def api_request(self, **kw): + from google.cloud.exceptions import NotFound self._requested.append(kw) + + if len(self._responses) == 0: + raise NotFound('miss') + response, self._responses = self._responses[0], self._responses[1:] return response From 9cd229a13f643b615da78cdec70dbf0eebf41c22 Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Mon, 21 Aug 2017 09:21:53 -0700 Subject: [PATCH 0165/2016] BigQuery: Poll via getQueryResults method. (#3844) This modifies the QueryJob's Futures interface implementation to poll using getQueryResults instead of jobs.get. This was recommended by BigQuery engineers because getQueryResults does HTTP long-polling for closer to realtime results. --- .../google/cloud/bigquery/dbapi/cursor.py | 13 +-- .../google/cloud/bigquery/job.py | 35 ++++---- .../google-cloud-bigquery/tests/system.py | 3 +- .../tests/unit/test_dbapi_cursor.py | 3 +- .../tests/unit/test_job.py | 80 ++++++++++++++++--- 5 files changed, 97 insertions(+), 37 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/dbapi/cursor.py b/packages/google-cloud-bigquery/google/cloud/bigquery/dbapi/cursor.py index 167afb45e285..a5f04e15c674 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/dbapi/cursor.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/dbapi/cursor.py @@ -154,20 +154,13 @@ def execute(self, operation, parameters=None, job_id=None): query_parameters=query_parameters) query_job.use_legacy_sql = False + # Wait for the query to finish. try: - query_results = query_job.result() + query_job = query_job.result() except google.cloud.exceptions.GoogleCloudError: raise exceptions.DatabaseError(query_job.errors) - # Force the iterator to run because the query_results doesn't - # have the total_rows populated. See: - # https://github.com/GoogleCloudPlatform/google-cloud-python/issues/3506 - query_iterator = query_results.fetch_data() - try: - six.next(iter(query_iterator)) - except StopIteration: - pass - + query_results = query_job.query_results() self._query_data = iter( query_results.fetch_data(max_results=self.arraysize)) self._set_rowcount(query_results) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/job.py b/packages/google-cloud-bigquery/google/cloud/bigquery/job.py index 48d440063fa3..a43aeecbb931 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/job.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/job.py @@ -1085,6 +1085,7 @@ def __init__(self, name, query, client, self.udf_resources = udf_resources self.query_parameters = query_parameters self._configuration = _AsyncQueryConfiguration() + self._query_results = None allow_large_results = _TypedProperty('allow_large_results', bool) """See @@ -1284,23 +1285,25 @@ def query_results(self): :rtype: :class:`~google.cloud.bigquery.query.QueryResults` :returns: results instance """ - from google.cloud.bigquery.query import QueryResults - return QueryResults.from_query_job(self) + if not self._query_results: + self._query_results = self._client.get_query_results(self.name) + return self._query_results - def result(self, timeout=None): - """Start the job and wait for it to complete and get the result. + def done(self): + """Refresh the job and checks if it is complete. - :type timeout: int - :param timeout: How long to wait for job to complete before raising - a :class:`TimeoutError`. + :rtype: bool + :returns: True if the job is complete, False otherwise. + """ + # Do not refresh is the state is already done, as the job will not + # change once complete. + if self.state != _DONE_STATE: + self._query_results = self._client.get_query_results(self.name) - :rtype: :class:`~google.cloud.bigquery.query.QueryResults` - :returns: The query results. + # Only reload the job once we know the query is complete. + # This will ensure that fields such as the destination table are + # correctly populated. + if self._query_results.complete: + self.reload() - :raises: :class:`~google.cloud.exceptions.GoogleCloudError` if the job - failed or :class:`TimeoutError` if the job did not complete in the - given timeout. - """ - super(QueryJob, self).result(timeout=timeout) - # Return a QueryResults instance instead of returning the job. - return self.query_results() + return self.state == _DONE_STATE diff --git a/packages/google-cloud-bigquery/tests/system.py b/packages/google-cloud-bigquery/tests/system.py index 3cff1b001731..fab7d4b175bd 100644 --- a/packages/google-cloud-bigquery/tests/system.py +++ b/packages/google-cloud-bigquery/tests/system.py @@ -1093,7 +1093,8 @@ def test_async_query_future(self): str(uuid.uuid4()), 'SELECT 1') query_job.use_legacy_sql = False - iterator = query_job.result(timeout=JOB_TIMEOUT).fetch_data() + query_job = query_job.result(timeout=JOB_TIMEOUT) + iterator = query_job.query_results().fetch_data() rows = list(iterator) self.assertEqual(rows, [(1,)]) diff --git a/packages/google-cloud-bigquery/tests/unit/test_dbapi_cursor.py b/packages/google-cloud-bigquery/tests/unit/test_dbapi_cursor.py index 49a332999f7e..7351db8f670b 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_dbapi_cursor.py +++ b/packages/google-cloud-bigquery/tests/unit/test_dbapi_cursor.py @@ -42,7 +42,8 @@ def _mock_job( mock_job = mock.create_autospec(job.QueryJob) mock_job.error_result = None mock_job.state = 'DONE' - mock_job.result.return_value = self._mock_results( + mock_job.result.return_value = mock_job + mock_job.query_results.return_value = self._mock_results( rows=rows, schema=schema, num_dml_affected_rows=num_dml_affected_rows) return mock_job diff --git a/packages/google-cloud-bigquery/tests/unit/test_job.py b/packages/google-cloud-bigquery/tests/unit/test_job.py index 81d07b122eb0..2a324b3ee347 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_job.py +++ b/packages/google-cloud-bigquery/tests/unit/test_job.py @@ -171,6 +171,7 @@ def _makeResource(self, started=False, ended=False): } if ended: + resource['status'] = {'state': 'DONE'} resource['statistics']['load']['inputFiles'] = self.INPUT_FILES resource['statistics']['load']['inputFileBytes'] = self.INPUT_BYTES resource['statistics']['load']['outputBytes'] = self.OUTPUT_BYTES @@ -310,6 +311,37 @@ def test_ctor_w_schema(self): schema=[full_name, age]) self.assertEqual(job.schema, [full_name, age]) + def test_done(self): + client = _Client(self.PROJECT) + resource = self._makeResource(ended=True) + job = self._get_target_class().from_api_repr(resource, client) + self.assertTrue(job.done()) + + def test_result(self): + client = _Client(self.PROJECT) + resource = self._makeResource(ended=True) + job = self._get_target_class().from_api_repr(resource, client) + + result = job.result() + + self.assertIs(result, job) + + def test_result_invokes_begins(self): + begun_resource = self._makeResource() + done_resource = copy.deepcopy(begun_resource) + done_resource['status'] = {'state': 'DONE'} + connection = _Connection(begun_resource, done_resource) + client = _Client(self.PROJECT, connection=connection) + table = _Table() + job = self._make_one(self.JOB_NAME, table, [self.SOURCE1], client) + + job.result() + + self.assertEqual(len(connection._requested), 2) + begin_request, reload_request = connection._requested + self.assertEqual(begin_request['method'], 'POST') + self.assertEqual(reload_request['method'], 'GET') + def test_schema_setter_non_list(self): client = _Client(self.PROJECT) table = _Table() @@ -1421,6 +1453,10 @@ def _makeResource(self, started=False, ended=False): started, ended) config = resource['configuration']['query'] config['query'] = self.QUERY + + if ended: + resource['status'] = {'state': 'DONE'} + return resource def _verifyBooleanResourceProperties(self, job, config): @@ -1640,40 +1676,60 @@ def test_cancelled(self): self.assertTrue(job.cancelled()) + def test_done(self): + client = _Client(self.PROJECT) + resource = self._makeResource(ended=True) + job = self._get_target_class().from_api_repr(resource, client) + self.assertTrue(job.done()) + def test_query_results(self): from google.cloud.bigquery.query import QueryResults - client = _Client(self.PROJECT) + query_resource = {'jobComplete': True} + connection = _Connection(query_resource) + client = _Client(self.PROJECT, connection=connection) job = self._make_one(self.JOB_NAME, self.QUERY, client) results = job.query_results() self.assertIsInstance(results, QueryResults) - self.assertIs(results._job, job) - def test_result(self): + def test_query_results_w_cached_value(self): from google.cloud.bigquery.query import QueryResults client = _Client(self.PROJECT) job = self._make_one(self.JOB_NAME, self.QUERY, client) - job._properties['status'] = {'state': 'DONE'} + query_results = QueryResults(None, client) + job._query_results = query_results + + results = job.query_results() + + self.assertIs(results, query_results) + + def test_result(self): + client = _Client(self.PROJECT) + resource = self._makeResource(ended=True) + job = self._get_target_class().from_api_repr(resource, client) result = job.result() - self.assertIsInstance(result, QueryResults) - self.assertIs(result._job, job) + self.assertIs(result, job) def test_result_invokes_begins(self): begun_resource = self._makeResource() + incomplete_resource = {'jobComplete': False} + query_resource = {'jobComplete': True} done_resource = copy.deepcopy(begun_resource) done_resource['status'] = {'state': 'DONE'} - connection = _Connection(begun_resource, done_resource) + connection = _Connection( + begun_resource, incomplete_resource, query_resource, done_resource) client = _Client(self.PROJECT, connection=connection) job = self._make_one(self.JOB_NAME, self.QUERY, client) job.result() - self.assertEqual(len(connection._requested), 2) - begin_request, reload_request = connection._requested + self.assertEqual(len(connection._requested), 4) + begin_request, _, query_request, reload_request = connection._requested self.assertEqual(begin_request['method'], 'POST') + self.assertEqual(query_request['method'], 'GET') self.assertEqual(reload_request['method'], 'GET') def test_result_error(self): @@ -2088,6 +2144,12 @@ def dataset(self, name): return Dataset(name, client=self) + def get_query_results(self, job_id): + from google.cloud.bigquery.query import QueryResults + + resource = self._connection.api_request(method='GET') + return QueryResults.from_api_repr(resource, self) + class _Table(object): From 66fb5ef2b73029827519014a088267b28c06feda Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Mon, 21 Aug 2017 12:11:12 -0700 Subject: [PATCH 0166/2016] Allow fetching more than the first page when max_results is set. (#3845) * BigQuery: reproduce error fetching multiple results with DB-API. Add a system test to call `fetchall()` when multiple rows are expected. * BigQuery: system test to reproduce error of only fetching first page. This error applies to all BigQuery iterators, not just DB-API. * BigQuery: allow arraysize to be set after execute() It was allowed before, but it didn't result in the correct behavior. * max_results in BigQuery API had a different meaning from HTTPIterator. In BigQuery it means the page size, but the HTTPIterator it meant "don't fetch any more pages once you have these many rows." * Fix lint errors --- .../google/cloud/bigquery/dbapi/cursor.py | 40 +++++++++++-------- .../google/cloud/bigquery/query.py | 7 ++-- .../google/cloud/bigquery/table.py | 12 ++++-- .../google-cloud-bigquery/tests/system.py | 12 +++++- .../tests/unit/test_dbapi_cursor.py | 2 +- 5 files changed, 48 insertions(+), 25 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/dbapi/cursor.py b/packages/google-cloud-bigquery/google/cloud/bigquery/dbapi/cursor.py index a5f04e15c674..0c56d87231fe 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/dbapi/cursor.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/dbapi/cursor.py @@ -52,8 +52,7 @@ def __init__(self, connection): # a single row at a time. self.arraysize = 1 self._query_data = None - self._page_token = None - self._has_fetched_all_rows = True + self._query_results = None def close(self): """No-op.""" @@ -133,9 +132,8 @@ def execute(self, operation, parameters=None, job_id=None): :param job_id: (Optional) The job_id to use. If not set, a job ID is generated at random. """ + self._query_data = None self._query_results = None - self._page_token = None - self._has_fetched_all_rows = False client = self.connection._client if job_id is None: job_id = str(uuid.uuid4()) @@ -161,8 +159,7 @@ def execute(self, operation, parameters=None, job_id=None): raise exceptions.DatabaseError(query_job.errors) query_results = query_job.query_results() - self._query_data = iter( - query_results.fetch_data(max_results=self.arraysize)) + self._query_results = query_results self._set_rowcount(query_results) self._set_description(query_results.schema) @@ -178,6 +175,22 @@ def executemany(self, operation, seq_of_parameters): for parameters in seq_of_parameters: self.execute(operation, parameters) + def _try_fetch(self, size=None): + """Try to start fetching data, if not yet started. + + Mutates self to indicate that iteration has started. + """ + if self._query_results is None: + raise exceptions.InterfaceError( + 'No query results: execute() must be called before fetch.') + + if size is None: + size = self.arraysize + + if self._query_data is None: + self._query_data = iter( + self._query_results.fetch_data(max_results=size)) + def fetchone(self): """Fetch a single row from the results of the last ``execute*()`` call. @@ -188,10 +201,7 @@ def fetchone(self): :raises: :class:`~google.cloud.bigquery.dbapi.InterfaceError` if called before ``execute()``. """ - if self._query_data is None: - raise exceptions.InterfaceError( - 'No query results: execute() must be called before fetch.') - + self._try_fetch() try: return six.next(self._query_data) except StopIteration: @@ -215,17 +225,17 @@ def fetchmany(self, size=None): :raises: :class:`~google.cloud.bigquery.dbapi.InterfaceError` if called before ``execute()``. """ - if self._query_data is None: - raise exceptions.InterfaceError( - 'No query results: execute() must be called before fetch.') if size is None: size = self.arraysize + self._try_fetch(size=size) rows = [] + for row in self._query_data: rows.append(row) if len(rows) >= size: break + return rows def fetchall(self): @@ -236,9 +246,7 @@ def fetchall(self): :raises: :class:`~google.cloud.bigquery.dbapi.InterfaceError` if called before ``execute()``. """ - if self._query_data is None: - raise exceptions.InterfaceError( - 'No query results: execute() must be called before fetch.') + self._try_fetch() return [row for row in self._query_data] def setinputsizes(self, sizes): diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/query.py b/packages/google-cloud-bigquery/google/cloud/bigquery/query.py index c01017af0d30..185b68deb104 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/query.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/query.py @@ -440,6 +440,9 @@ def fetch_data(self, max_results=None, page_token=None, start_index=None, if timeout_ms is not None: params['timeoutMs'] = timeout_ms + if max_results is not None: + params['maxResults'] = max_results + path = '/projects/%s/queries/%s' % (self.project, self.name) iterator = page_iterator.HTTPIterator( client=client, @@ -448,12 +451,10 @@ def fetch_data(self, max_results=None, page_token=None, start_index=None, item_to_value=_item_to_row, items_key='rows', page_token=page_token, - max_results=max_results, page_start=_rows_page_start_query, + next_token='pageToken', extra_params=params) iterator.query_result = self - # Over-ride the key used to retrieve the next page token. - iterator._NEXT_TOKEN = 'pageToken' return iterator diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py index b26125ec9ef4..9d100c06c711 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py @@ -722,6 +722,11 @@ def fetch_data(self, max_results=None, page_token=None, client=None): if len(self._schema) == 0: raise ValueError(_TABLE_HAS_NO_SCHEMA) + params = {} + + if max_results is not None: + params['maxResults'] = max_results + client = self._require_client(client) path = '%s/data' % (self.path,) iterator = page_iterator.HTTPIterator( @@ -731,11 +736,10 @@ def fetch_data(self, max_results=None, page_token=None, client=None): item_to_value=_item_to_row, items_key='rows', page_token=page_token, - max_results=max_results, - page_start=_rows_page_start) + page_start=_rows_page_start, + next_token='pageToken', + extra_params=params) iterator.schema = self._schema - # Over-ride the key used to retrieve the next page token. - iterator._NEXT_TOKEN = 'pageToken' return iterator def row_from_mapping(self, mapping): diff --git a/packages/google-cloud-bigquery/tests/system.py b/packages/google-cloud-bigquery/tests/system.py index fab7d4b175bd..701da91659db 100644 --- a/packages/google-cloud-bigquery/tests/system.py +++ b/packages/google-cloud-bigquery/tests/system.py @@ -747,6 +747,16 @@ def test_dbapi_w_standard_sql_types(self): row = Config.CURSOR.fetchone() self.assertIsNone(row) + def test_dbapi_fetchall(self): + query = 'SELECT * FROM UNNEST([(1, 2), (3, 4), (5, 6)])' + + for arraysize in range(1, 5): + Config.CURSOR.execute(query) + self.assertEqual(Config.CURSOR.rowcount, 3, "expected 3 rows") + Config.CURSOR.arraysize = arraysize + rows = Config.CURSOR.fetchall() + self.assertEqual(rows, [(1, 2), (3, 4), (5, 6)]) + def _load_table_for_dml(self, rows, dataset_name, table_name): from google.cloud._testing import _NamedTemporaryFile @@ -1084,7 +1094,7 @@ def test_large_query_w_public_data(self): query.use_legacy_sql = False query.run() - iterator = query.fetch_data() + iterator = query.fetch_data(max_results=100) rows = list(iterator) self.assertEqual(len(rows), LIMIT) diff --git a/packages/google-cloud-bigquery/tests/unit/test_dbapi_cursor.py b/packages/google-cloud-bigquery/tests/unit/test_dbapi_cursor.py index 7351db8f670b..be327a8962a2 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_dbapi_cursor.py +++ b/packages/google-cloud-bigquery/tests/unit/test_dbapi_cursor.py @@ -141,8 +141,8 @@ def test_fetchmany_w_arraysize(self): (7, 8, 9), ])) cursor = connection.cursor() - cursor.arraysize = 2 cursor.execute('SELECT a, b, c;') + cursor.arraysize = 2 rows = cursor.fetchmany() self.assertEqual(len(rows), 2) self.assertEqual(rows[0], (1, 2, 3)) From 623d0cb80504112342debafdabf3dcc2b103641f Mon Sep 17 00:00:00 2001 From: Luke Sneeringer Date: Thu, 24 Aug 2017 13:28:07 -0700 Subject: [PATCH 0167/2016] Bump core version number (#3864) --- packages/google-cloud-bigquery/setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/setup.py b/packages/google-cloud-bigquery/setup.py index 98dc37c8cc7a..912c1bf9cf8a 100644 --- a/packages/google-cloud-bigquery/setup.py +++ b/packages/google-cloud-bigquery/setup.py @@ -51,7 +51,7 @@ REQUIREMENTS = [ - 'google-cloud-core >= 0.26.0, < 0.27dev', + 'google-cloud-core >= 0.27.0, < 0.28dev', 'google-auth >= 1.0.0', 'google-resumable-media >= 0.2.1', 'requests >= 2.18.0', From 6732ce96c7bbfae86a612502c91590f04563233b Mon Sep 17 00:00:00 2001 From: Luke Sneeringer Date: Thu, 24 Aug 2017 15:20:22 -0700 Subject: [PATCH 0168/2016] Bump BigQuery to 0.27.0 (#3874) --- packages/google-cloud-bigquery/setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/setup.py b/packages/google-cloud-bigquery/setup.py index 912c1bf9cf8a..ea25589bdc8b 100644 --- a/packages/google-cloud-bigquery/setup.py +++ b/packages/google-cloud-bigquery/setup.py @@ -59,7 +59,7 @@ setup( name='google-cloud-bigquery', - version='0.26.0', + version='0.27.0', description='Python Client for Google BigQuery', long_description=README, namespace_packages=[ From bb324c81245e9aec79bbf435c5e622e95052921d Mon Sep 17 00:00:00 2001 From: Scott Page Date: Fri, 1 Sep 2017 11:24:29 -0600 Subject: [PATCH 0169/2016] BigQuery Support for Sub Second Datetimes (#3901) --- .../google/cloud/bigquery/_helpers.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py b/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py index 9358229e630a..83046352a421 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py @@ -73,8 +73,12 @@ def _timestamp_from_json(value, field): def _datetime_from_json(value, field): """Coerce 'value' to a datetime, if set or not nullable.""" if _not_null(value, field): - # value will be a string, in YYYY-MM-DDTHH:MM:SS form. - return datetime.datetime.strptime(value, _RFC3339_NO_FRACTION) + # value will be a string + # YYYY-MM-DDTHH:MM:SS or YYYY-MM-DDTHH:MM:SS.0000 + try: + return datetime.datetime.strptime(value, _RFC3339_MICROS_NO_ZULU) + except ValueError: + return datetime.datetime.strptime(value, _RFC3339_NO_FRACTION) def _date_from_json(value, field): From d6ce9d9b43101981995ba60311701e56ef3d9211 Mon Sep 17 00:00:00 2001 From: Danny Hermes Date: Tue, 5 Sep 2017 12:30:50 -0700 Subject: [PATCH 0170/2016] Use boolean instead of exception for BigQuery flow control. (#3915) * Use boolean instead of exception for BigQuery flow control. This is a follow-up to #3901. Also made sure to return an explicit None in the "field is null" branch. This follows the pattern of "all returns should be explicit or implicit, but don't mix". * Adding unit test for BigQuery datetime with microseconds. Also moving standard library imports in `test__helpers` to import time rather than run time. --- .../google/cloud/bigquery/_helpers.py | 22 ++++++++--- .../tests/unit/test__helpers.py | 39 ++++--------------- 2 files changed, 25 insertions(+), 36 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py b/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py index 83046352a421..e8f859681826 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py @@ -71,14 +71,26 @@ def _timestamp_from_json(value, field): def _datetime_from_json(value, field): - """Coerce 'value' to a datetime, if set or not nullable.""" + """Coerce 'value' to a datetime, if set or not nullable. + + Args: + value (str): The timestamp. + field (.SchemaField): The field corresponding to the value. + + Returns: + Optional[datetime.datetime]: The parsed datetime object from + ``value`` if the ``field`` is not null (otherwise it is + :data:`None`). + """ if _not_null(value, field): - # value will be a string - # YYYY-MM-DDTHH:MM:SS or YYYY-MM-DDTHH:MM:SS.0000 - try: + if '.' in value: + # YYYY-MM-DDTHH:MM:SS.ffffff return datetime.datetime.strptime(value, _RFC3339_MICROS_NO_ZULU) - except ValueError: + else: + # YYYY-MM-DDTHH:MM:SS return datetime.datetime.strptime(value, _RFC3339_NO_FRACTION) + else: + return None def _date_from_json(value, field): diff --git a/packages/google-cloud-bigquery/tests/unit/test__helpers.py b/packages/google-cloud-bigquery/tests/unit/test__helpers.py index 581b4b9a42fc..6470e30e5acd 100644 --- a/packages/google-cloud-bigquery/tests/unit/test__helpers.py +++ b/packages/google-cloud-bigquery/tests/unit/test__helpers.py @@ -12,6 +12,8 @@ # See the License for the specific language governing permissions and # limitations under the License. +import base64 +import datetime import unittest import mock @@ -144,16 +146,12 @@ def test_w_none_required(self): self._call_fut(None, _Field('REQUIRED')) def test_w_base64_encoded_bytes(self): - import base64 - expected = b'Wonderful!' encoded = base64.standard_b64encode(expected) coerced = self._call_fut(encoded, object()) self.assertEqual(coerced, expected) def test_w_base64_encoded_text(self): - import base64 - expected = b'Wonderful!' encoded = base64.standard_b64encode(expected).decode('ascii') coerced = self._call_fut(encoded, object()) @@ -175,7 +173,6 @@ def test_w_none_required(self): self._call_fut(None, _Field('REQUIRED')) def test_w_string_value(self): - import datetime from google.cloud._helpers import _EPOCH coerced = self._call_fut('1.234567', object()) @@ -184,7 +181,6 @@ def test_w_string_value(self): _EPOCH + datetime.timedelta(seconds=1, microseconds=234567)) def test_w_float_value(self): - import datetime from google.cloud._helpers import _EPOCH coerced = self._call_fut(1.234567, object()) @@ -208,13 +204,17 @@ def test_w_none_required(self): self._call_fut(None, _Field('REQUIRED')) def test_w_string_value(self): - import datetime - coerced = self._call_fut('2016-12-02T18:51:33', object()) self.assertEqual( coerced, datetime.datetime(2016, 12, 2, 18, 51, 33)) + def test_w_microseconds(self): + coerced = self._call_fut('2015-05-22T10:11:12.987654', object()) + self.assertEqual( + coerced, + datetime.datetime(2015, 5, 22, 10, 11, 12, 987654)) + class Test_date_from_json(unittest.TestCase): @@ -231,8 +231,6 @@ def test_w_none_required(self): self._call_fut(None, _Field('REQUIRED')) def test_w_string_value(self): - import datetime - coerced = self._call_fut('1987-09-22', object()) self.assertEqual( coerced, @@ -254,8 +252,6 @@ def test_w_none_required(self): self._call_fut(None, _Field('REQUIRED')) def test_w_string_value(self): - import datetime - coerced = self._call_fut('12:12:27', object()) self.assertEqual( coerced, @@ -578,15 +574,11 @@ def test_w_string(self): self.assertEqual(self._call_fut(ZULU), ZULU) def test_w_datetime_wo_zone(self): - import datetime - ZULU = '2016-12-20 15:58:27.339328+00:00' when = datetime.datetime(2016, 12, 20, 15, 58, 27, 339328) self.assertEqual(self._call_fut(when), ZULU) def test_w_datetime_w_non_utc_zone(self): - import datetime - class _Zone(datetime.tzinfo): def utcoffset(self, _): @@ -598,7 +590,6 @@ def utcoffset(self, _): self.assertEqual(self._call_fut(when), ZULU) def test_w_datetime_w_utc_zone(self): - import datetime from google.cloud._helpers import UTC ZULU = '2016-12-20 15:58:27.339328+00:00' @@ -621,7 +612,6 @@ def test_w_string(self): self.assertEqual(self._call_fut(ZULU), ZULU) def test_w_datetime(self): - import datetime from google.cloud._helpers import _microseconds_from_datetime when = datetime.datetime(2016, 12, 20, 15, 58, 27, 339328) @@ -641,7 +631,6 @@ def test_w_string(self): self.assertEqual(self._call_fut(RFC3339), RFC3339) def test_w_datetime(self): - import datetime from google.cloud._helpers import UTC when = datetime.datetime(2016, 12, 3, 14, 11, 27, 123456, tzinfo=UTC) @@ -660,8 +649,6 @@ def test_w_string(self): self.assertEqual(self._call_fut(RFC3339), RFC3339) def test_w_datetime(self): - import datetime - when = datetime.date(2016, 12, 3) self.assertEqual(self._call_fut(when), '2016-12-03') @@ -678,8 +665,6 @@ def test_w_string(self): self.assertEqual(self._call_fut(RFC3339), RFC3339) def test_w_datetime(self): - import datetime - when = datetime.time(12, 13, 41) self.assertEqual(self._call_fut(when), '12:13:41') @@ -1006,7 +991,6 @@ def test_to_api_repr_w_bool(self): def test_to_api_repr_w_timestamp_datetime(self): from google.cloud._helpers import UTC - import datetime STAMP = '2016-12-20 15:58:27.339328+00:00' when = datetime.datetime(2016, 12, 20, 15, 58, 27, 339328, tzinfo=UTC) @@ -1023,7 +1007,6 @@ def test_to_api_repr_w_timestamp_datetime(self): self.assertEqual(param.to_api_repr(), EXPECTED) def test_to_api_repr_w_timestamp_micros(self): - import datetime from google.cloud._helpers import _microseconds_from_datetime now = datetime.datetime.utcnow() @@ -1041,7 +1024,6 @@ def test_to_api_repr_w_timestamp_micros(self): self.assertEqual(param.to_api_repr(), EXPECTED) def test_to_api_repr_w_datetime_datetime(self): - import datetime from google.cloud._helpers import _datetime_to_rfc3339 now = datetime.datetime.utcnow() @@ -1058,7 +1040,6 @@ def test_to_api_repr_w_datetime_datetime(self): self.assertEqual(param.to_api_repr(), EXPECTED) def test_to_api_repr_w_datetime_string(self): - import datetime from google.cloud._helpers import _datetime_to_rfc3339 now = datetime.datetime.utcnow() @@ -1076,8 +1057,6 @@ def test_to_api_repr_w_datetime_string(self): self.assertEqual(param.to_api_repr(), EXPECTED) def test_to_api_repr_w_date_date(self): - import datetime - today = datetime.date.today() EXPECTED = { 'parameterType': { @@ -1092,8 +1071,6 @@ def test_to_api_repr_w_date_date(self): self.assertEqual(param.to_api_repr(), EXPECTED) def test_to_api_repr_w_date_string(self): - import datetime - today = datetime.date.today() today_str = today.isoformat(), EXPECTED = { From 9ec5b246a3498d20cb7c17ed7d4bf11cec9e0f87 Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Wed, 6 Sep 2017 13:20:39 -0700 Subject: [PATCH 0171/2016] BigQuery: Add system tests for microsecond-level precision datetimes (#3926) * BigQuery: add test for handling microsecond precision timestamps. * Forgot microseconds in actual query part of system test. * Fix lint error. --- packages/google-cloud-bigquery/tests/system.py | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/packages/google-cloud-bigquery/tests/system.py b/packages/google-cloud-bigquery/tests/system.py index 701da91659db..4ca4728b7404 100644 --- a/packages/google-cloud-bigquery/tests/system.py +++ b/packages/google-cloud-bigquery/tests/system.py @@ -646,8 +646,11 @@ def test_sync_query_w_legacy_sql_types(self): def _generate_standard_sql_types_examples(self): naive = datetime.datetime(2016, 12, 5, 12, 41, 9) + naive_microseconds = datetime.datetime(2016, 12, 5, 12, 41, 9, 250000) stamp = '%s %s' % (naive.date().isoformat(), naive.time().isoformat()) + stamp_microseconds = stamp + '.250000' zoned = naive.replace(tzinfo=UTC) + zoned_microseconds = naive_microseconds.replace(tzinfo=UTC) return [ { 'sql': 'SELECT 1', @@ -673,10 +676,19 @@ def _generate_standard_sql_types_examples(self): 'sql': 'SELECT TIMESTAMP "%s"' % (stamp,), 'expected': zoned, }, + { + 'sql': 'SELECT TIMESTAMP "%s"' % (stamp_microseconds,), + 'expected': zoned_microseconds, + }, { 'sql': 'SELECT DATETIME(TIMESTAMP "%s")' % (stamp,), 'expected': naive, }, + { + 'sql': 'SELECT DATETIME(TIMESTAMP "%s")' % ( + stamp_microseconds,), + 'expected': naive_microseconds, + }, { 'sql': 'SELECT DATE(TIMESTAMP "%s")' % (stamp,), 'expected': naive.date(), From 6cc5653958bbd703efc2d9a0bc5d5bf09bd7a2a1 Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Fri, 8 Sep 2017 12:25:41 -0700 Subject: [PATCH 0172/2016] BigQuery: make get_query_results private. Return rows for QueryJob.result() (#3883) get_query_results is too low level to expose directly to users. Based on feedback from others in the redesign plan, the result() object should just return the rows as an iterator. This simplifies the proposed query interface to: job = client.query('MY QUERY') rows = job.result() No need for separate fetch_data() call after calling result(). --- .../google/cloud/bigquery/client.py | 2 +- .../google/cloud/bigquery/dbapi/cursor.py | 2 +- .../google/cloud/bigquery/job.py | 28 ++++++++++++++-- .../google-cloud-bigquery/tests/system.py | 10 +----- .../tests/unit/test_client.py | 8 ++--- .../tests/unit/test_job.py | 33 +++++++++++++++---- 6 files changed, 60 insertions(+), 23 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py index 52c462240097..2ed2c15e13a9 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py @@ -162,7 +162,7 @@ def dataset(self, dataset_name, project=None): """ return Dataset(dataset_name, client=self, project=project) - def get_query_results(self, job_id, project=None, timeout_ms=None): + def _get_query_results(self, job_id, project=None, timeout_ms=None): """Get the query results object for a query job. :type job_id: str diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/dbapi/cursor.py b/packages/google-cloud-bigquery/google/cloud/bigquery/dbapi/cursor.py index 0c56d87231fe..c1683c16db79 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/dbapi/cursor.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/dbapi/cursor.py @@ -154,7 +154,7 @@ def execute(self, operation, parameters=None, job_id=None): # Wait for the query to finish. try: - query_job = query_job.result() + query_job.result() except google.cloud.exceptions.GoogleCloudError: raise exceptions.DatabaseError(query_job.errors) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/job.py b/packages/google-cloud-bigquery/google/cloud/bigquery/job.py index a43aeecbb931..3eada05d2ea0 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/job.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/job.py @@ -1286,7 +1286,7 @@ def query_results(self): :returns: results instance """ if not self._query_results: - self._query_results = self._client.get_query_results(self.name) + self._query_results = self._client._get_query_results(self.name) return self._query_results def done(self): @@ -1298,7 +1298,7 @@ def done(self): # Do not refresh is the state is already done, as the job will not # change once complete. if self.state != _DONE_STATE: - self._query_results = self._client.get_query_results(self.name) + self._query_results = self._client._get_query_results(self.name) # Only reload the job once we know the query is complete. # This will ensure that fields such as the destination table are @@ -1307,3 +1307,27 @@ def done(self): self.reload() return self.state == _DONE_STATE + + def result(self, timeout=None): + """Start the job and wait for it to complete and get the result. + + :type timeout: int + :param timeout: + How long to wait for job to complete before raising a + :class:`TimeoutError`. + + :rtype: :class:`~google.api.core.page_iterator.Iterator` + :returns: + Iterator of row data :class:`tuple`s. During each page, the + iterator will have the ``total_rows`` attribute set, which counts + the total number of rows **in the result set** (this is distinct + from the total number of rows in the current page: + ``iterator.page.num_items``). + + :raises: :class:`~google.cloud.exceptions.GoogleCloudError` if the job + failed or :class:`TimeoutError` if the job did not complete in the + given timeout. + """ + super(QueryJob, self).result(timeout=timeout) + # Return an iterator instead of returning the job. + return self.query_results().fetch_data() diff --git a/packages/google-cloud-bigquery/tests/system.py b/packages/google-cloud-bigquery/tests/system.py index 4ca4728b7404..a2da0b0383eb 100644 --- a/packages/google-cloud-bigquery/tests/system.py +++ b/packages/google-cloud-bigquery/tests/system.py @@ -599,13 +599,6 @@ def test_job_cancel(self): # raise an error, and that the job completed (in the `retry()` # above). - def test_get_query_results(self): - job_id = 'test-get-query-results-' + str(uuid.uuid4()) - query_job = Config.CLIENT.run_async_query(job_id, 'SELECT 1') - query_job.begin() - results = Config.CLIENT.get_query_results(job_id) - self.assertEqual(results.total_rows, 1) - def test_sync_query_w_legacy_sql_types(self): naive = datetime.datetime(2016, 12, 5, 12, 41, 9) stamp = '%s %s' % (naive.date().isoformat(), naive.time().isoformat()) @@ -1115,8 +1108,7 @@ def test_async_query_future(self): str(uuid.uuid4()), 'SELECT 1') query_job.use_legacy_sql = False - query_job = query_job.result(timeout=JOB_TIMEOUT) - iterator = query_job.query_results().fetch_data() + iterator = query_job.result(timeout=JOB_TIMEOUT) rows = list(iterator) self.assertEqual(rows, [(1,)]) diff --git a/packages/google-cloud-bigquery/tests/unit/test_client.py b/packages/google-cloud-bigquery/tests/unit/test_client.py index 33cd59513efc..dc998926d434 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_client.py +++ b/packages/google-cloud-bigquery/tests/unit/test_client.py @@ -45,7 +45,7 @@ def test_ctor(self): self.assertIs(client._connection.credentials, creds) self.assertIs(client._connection.http, http) - def test_get_job_miss_w_explicit_project_and_timeout(self): + def test__get_query_results_miss_w_explicit_project_and_timeout(self): from google.cloud.exceptions import NotFound project = 'PROJECT' @@ -54,7 +54,7 @@ def test_get_job_miss_w_explicit_project_and_timeout(self): conn = client._connection = _Connection() with self.assertRaises(NotFound): - client.get_query_results( + client._get_query_results( 'nothere', project='other-project', timeout_ms=500) self.assertEqual(len(conn._requested), 1) @@ -65,7 +65,7 @@ def test_get_job_miss_w_explicit_project_and_timeout(self): self.assertEqual( req['query_params'], {'maxResults': 0, 'timeoutMs': 500}) - def test_get_query_results_hit(self): + def test__get_query_results_hit(self): project = 'PROJECT' job_id = 'query_job' data = { @@ -98,7 +98,7 @@ def test_get_query_results_hit(self): creds = _make_credentials() client = self._make_one(project, creds) client._connection = _Connection(data) - query_results = client.get_query_results(job_id) + query_results = client._get_query_results(job_id) self.assertEqual(query_results.total_rows, 10) self.assertTrue(query_results.complete) diff --git a/packages/google-cloud-bigquery/tests/unit/test_job.py b/packages/google-cloud-bigquery/tests/unit/test_job.py index 2a324b3ee347..ab08701d352a 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_job.py +++ b/packages/google-cloud-bigquery/tests/unit/test_job.py @@ -1685,7 +1685,13 @@ def test_done(self): def test_query_results(self): from google.cloud.bigquery.query import QueryResults - query_resource = {'jobComplete': True} + query_resource = { + 'jobComplete': True, + 'jobReference': { + 'projectId': self.PROJECT, + 'jobId': self.JOB_NAME, + }, + } connection = _Connection(query_resource) client = _Client(self.PROJECT, connection=connection) job = self._make_one(self.JOB_NAME, self.QUERY, client) @@ -1705,22 +1711,37 @@ def test_query_results_w_cached_value(self): self.assertIs(results, query_results) def test_result(self): - client = _Client(self.PROJECT) + query_resource = { + 'jobComplete': True, + 'jobReference': { + 'projectId': self.PROJECT, + 'jobId': self.JOB_NAME, + }, + } + connection = _Connection(query_resource, query_resource) + client = _Client(self.PROJECT, connection=connection) resource = self._makeResource(ended=True) job = self._get_target_class().from_api_repr(resource, client) result = job.result() - self.assertIs(result, job) + self.assertEqual(list(result), []) def test_result_invokes_begins(self): begun_resource = self._makeResource() incomplete_resource = {'jobComplete': False} - query_resource = {'jobComplete': True} + query_resource = { + 'jobComplete': True, + 'jobReference': { + 'projectId': self.PROJECT, + 'jobId': self.JOB_NAME, + }, + } done_resource = copy.deepcopy(begun_resource) done_resource['status'] = {'state': 'DONE'} connection = _Connection( - begun_resource, incomplete_resource, query_resource, done_resource) + begun_resource, incomplete_resource, query_resource, done_resource, + query_resource) client = _Client(self.PROJECT, connection=connection) job = self._make_one(self.JOB_NAME, self.QUERY, client) @@ -2144,7 +2165,7 @@ def dataset(self, name): return Dataset(name, client=self) - def get_query_results(self, job_id): + def _get_query_results(self, job_id): from google.cloud.bigquery.query import QueryResults resource = self._connection.api_request(method='GET') From a46da7511749197d15c1bb7a442046bf279ee9de Mon Sep 17 00:00:00 2001 From: Willian Fuks Date: Fri, 15 Sep 2017 16:00:20 +0200 Subject: [PATCH 0173/2016] updated docstrings for method ``table.insert_data`` (#3960) --- .../google/cloud/bigquery/table.py | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py index 9d100c06c711..03e557248637 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py @@ -793,10 +793,18 @@ def insert_data(self, passed, no de-duplication occurs. :type skip_invalid_rows: bool - :param skip_invalid_rows: (Optional) skip rows w/ invalid data? + :param skip_invalid_rows: (Optional) Insert all valid rows of a + request, even if invalid rows exist. + The default value is False, which causes + the entire request to fail if any invalid + rows exist. :type ignore_unknown_values: bool - :param ignore_unknown_values: (Optional) ignore columns beyond schema? + :param ignore_unknown_values: (Optional) Accept rows that contain + values that do not match the schema. + The unknown values are ignored. Default + is False, which treats unknown values as + errors. :type template_suffix: str :param template_suffix: From d579a4a8d79c532ca9c36634581324365206441f Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Fri, 22 Sep 2017 13:53:22 -0700 Subject: [PATCH 0174/2016] BigQuery: fix parsing for array parameter with struct type. (#4040) Adds special cases for loading an array query parameter resource that contains structs. Similarly, adds special cases for loading a struct query parameter when it contains nested structs or arrays. --- .../google/cloud/bigquery/_helpers.py | 140 ++++++++- .../google-cloud-bigquery/tests/system.py | 18 ++ .../tests/unit/test__helpers.py | 291 ++++++++++++++++++ 3 files changed, 438 insertions(+), 11 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py b/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py index e8f859681826..ac6e9759c084 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py @@ -16,6 +16,7 @@ import base64 from collections import OrderedDict +import copy import datetime from google.cloud._helpers import UTC @@ -468,6 +469,31 @@ def to_api_repr(self): resource['name'] = self.name return resource + def _key(self): + """A tuple key that uniquely describes this field. + + Used to compute this instance's hashcode and evaluate equality. + + Returns: + tuple: The contents of this :class:`ScalarQueryParameter`. + """ + return ( + self.name, + self.type_.upper(), + self.value, + ) + + def __eq__(self, other): + if not isinstance(other, ScalarQueryParameter): + return NotImplemented + return self._key() == other._key() + + def __ne__(self, other): + return not self == other + + def __repr__(self): + return 'ScalarQueryParameter{}'.format(self._key()) + class ArrayQueryParameter(AbstractQueryParameter): """Named / positional query parameters for array values. @@ -507,15 +533,24 @@ def positional(cls, array_type, values): return cls(None, array_type, values) @classmethod - def from_api_repr(cls, resource): - """Factory: construct parameter from JSON resource. - - :type resource: dict - :param resource: JSON mapping of parameter + def _from_api_repr_struct(cls, resource): + name = resource.get('name') + converted = [] + # We need to flatten the array to use the StructQueryParameter + # parse code. + resource_template = { + # The arrayType includes all the types of the fields of the STRUCT + 'parameterType': resource['parameterType']['arrayType'] + } + for array_value in resource['parameterValue']['arrayValues']: + struct_resource = copy.deepcopy(resource_template) + struct_resource['parameterValue'] = array_value + struct_value = StructQueryParameter.from_api_repr(struct_resource) + converted.append(struct_value) + return cls(name, 'STRUCT', converted) - :rtype: :class:`ArrayQueryParameter` - :returns: instance - """ + @classmethod + def _from_api_repr_scalar(cls, resource): name = resource.get('name') array_type = resource['parameterType']['arrayType']['type'] values = [ @@ -526,6 +561,21 @@ def from_api_repr(cls, resource): _CELLDATA_FROM_JSON[array_type](value, None) for value in values] return cls(name, array_type, converted) + @classmethod + def from_api_repr(cls, resource): + """Factory: construct parameter from JSON resource. + + :type resource: dict + :param resource: JSON mapping of parameter + + :rtype: :class:`ArrayQueryParameter` + :returns: instance + """ + array_type = resource['parameterType']['arrayType']['type'] + if array_type == 'STRUCT': + return cls._from_api_repr_struct(resource) + return cls._from_api_repr_scalar(resource) + def to_api_repr(self): """Construct JSON API representation for the parameter. @@ -533,7 +583,7 @@ def to_api_repr(self): :returns: JSON mapping """ values = self.values - if self.array_type == 'RECORD': + if self.array_type == 'RECORD' or self.array_type == 'STRUCT': reprs = [value.to_api_repr() for value in values] a_type = reprs[0]['parameterType'] a_values = [repr_['parameterValue'] for repr_ in reprs] @@ -556,6 +606,31 @@ def to_api_repr(self): resource['name'] = self.name return resource + def _key(self): + """A tuple key that uniquely describes this field. + + Used to compute this instance's hashcode and evaluate equality. + + Returns: + tuple: The contents of this :class:`ArrayQueryParameter`. + """ + return ( + self.name, + self.array_type.upper(), + self.values, + ) + + def __eq__(self, other): + if not isinstance(other, ArrayQueryParameter): + return NotImplemented + return self._key() == other._key() + + def __ne__(self, other): + return not self == other + + def __repr__(self): + return 'ArrayQueryParameter{}'.format(self._key()) + class StructQueryParameter(AbstractQueryParameter): """Named / positional query parameters for struct values. @@ -606,14 +681,32 @@ def from_api_repr(cls, resource): """ name = resource.get('name') instance = cls(name) + type_resources = {} types = instance.struct_types for item in resource['parameterType']['structTypes']: types[item['name']] = item['type']['type'] + type_resources[item['name']] = item['type'] struct_values = resource['parameterValue']['structValues'] for key, value in struct_values.items(): type_ = types[key] - value = value['value'] - converted = _CELLDATA_FROM_JSON[type_](value, None) + converted = None + if type_ == 'STRUCT': + struct_resource = { + 'name': key, + 'parameterType': type_resources[key], + 'parameterValue': value, + } + converted = StructQueryParameter.from_api_repr(struct_resource) + elif type_ == 'ARRAY': + struct_resource = { + 'name': key, + 'parameterType': type_resources[key], + 'parameterValue': value, + } + converted = ArrayQueryParameter.from_api_repr(struct_resource) + else: + value = value['value'] + converted = _CELLDATA_FROM_JSON[type_](value, None) instance.struct_values[key] = converted return instance @@ -651,6 +744,31 @@ def to_api_repr(self): resource['name'] = self.name return resource + def _key(self): + """A tuple key that uniquely describes this field. + + Used to compute this instance's hashcode and evaluate equality. + + Returns: + tuple: The contents of this :class:`ArrayQueryParameter`. + """ + return ( + self.name, + self.struct_types, + self.struct_values, + ) + + def __eq__(self, other): + if not isinstance(other, StructQueryParameter): + return NotImplemented + return self._key() == other._key() + + def __ne__(self, other): + return not self == other + + def __repr__(self): + return 'StructQueryParameter{}'.format(self._key()) + class QueryParametersProperty(object): """Custom property type, holding query parameter instances.""" diff --git a/packages/google-cloud-bigquery/tests/system.py b/packages/google-cloud-bigquery/tests/system.py index a2da0b0383eb..0d5b9918fef3 100644 --- a/packages/google-cloud-bigquery/tests/system.py +++ b/packages/google-cloud-bigquery/tests/system.py @@ -887,6 +887,16 @@ def test_sync_query_w_query_params(self): name='friends', array_type='STRING', values=[phred_name, bharney_name]) with_friends_param = StructQueryParameter(None, friends_param) + top_left_param = StructQueryParameter( + 'top_left', + ScalarQueryParameter('x', 'INT64', 12), + ScalarQueryParameter('y', 'INT64', 102)) + bottom_right_param = StructQueryParameter( + 'bottom_right', + ScalarQueryParameter('x', 'INT64', 22), + ScalarQueryParameter('y', 'INT64', 92)) + rectangle_param = StructQueryParameter( + 'rectangle', top_left_param, bottom_right_param) examples = [ { 'sql': 'SELECT @question', @@ -943,6 +953,14 @@ def test_sync_query_w_query_params(self): 'expected': ({'_field_1': question, '_field_2': answer}), 'query_parameters': [struct_param], }, + { + 'sql': + 'SELECT ' + '((@rectangle.bottom_right.x - @rectangle.top_left.x) ' + '* (@rectangle.top_left.y - @rectangle.bottom_right.y))', + 'expected': 100, + 'query_parameters': [rectangle_param], + }, { 'sql': 'SELECT ?', 'expected': [ diff --git a/packages/google-cloud-bigquery/tests/unit/test__helpers.py b/packages/google-cloud-bigquery/tests/unit/test__helpers.py index 6470e30e5acd..9dc14f6e3a47 100644 --- a/packages/google-cloud-bigquery/tests/unit/test__helpers.py +++ b/packages/google-cloud-bigquery/tests/unit/test__helpers.py @@ -1098,6 +1098,55 @@ def test_to_api_repr_w_unknown_type(self): param = klass.positional(type_='UNKNOWN', value='unknown') self.assertEqual(param.to_api_repr(), EXPECTED) + def test___eq___wrong_type(self): + field = self._make_one('test', 'STRING', 'value') + other = object() + self.assertNotEqual(field, other) + self.assertEqual(field, mock.ANY) + + def test___eq___name_mismatch(self): + field = self._make_one('test', 'STRING', 'value') + other = self._make_one('other', 'STRING', 'value') + self.assertNotEqual(field, other) + + def test___eq___field_type_mismatch(self): + field = self._make_one('test', 'STRING', None) + other = self._make_one('test', 'INT64', None) + self.assertNotEqual(field, other) + + def test___eq___value_mismatch(self): + field = self._make_one('test', 'STRING', 'hello') + other = self._make_one('test', 'STRING', 'world') + self.assertNotEqual(field, other) + + def test___eq___hit(self): + field = self._make_one('test', 'STRING', 'gotcha') + other = self._make_one('test', 'STRING', 'gotcha') + self.assertEqual(field, other) + + def test___ne___wrong_type(self): + field = self._make_one('toast', 'INT64', 13) + other = object() + self.assertNotEqual(field, other) + self.assertEqual(field, mock.ANY) + + def test___ne___same_value(self): + field1 = self._make_one('test', 'INT64', 12) + field2 = self._make_one('test', 'INT64', 12) + # unittest ``assertEqual`` uses ``==`` not ``!=``. + comparison_val = (field1 != field2) + self.assertFalse(comparison_val) + + def test___ne___different_values(self): + field1 = self._make_one('test', 'INT64', 11) + field2 = self._make_one('test', 'INT64', 12) + self.assertNotEqual(field1, field2) + + def test___repr__(self): + field1 = self._make_one('field1', 'STRING', 'value') + expected = "ScalarQueryParameter('field1', 'STRING', 'value')" + self.assertEqual(repr(field1), expected) + def _make_subparam(name, type_, value): from google.cloud.bigquery._helpers import ScalarQueryParameter @@ -1180,6 +1229,58 @@ def test_from_api_repr_wo_name(self): self.assertEqual(param.array_type, 'INT64') self.assertEqual(param.values, [1, 2]) + def test_from_api_repr_w_struct_type(self): + from google.cloud.bigquery._helpers import StructQueryParameter + + RESOURCE = { + 'parameterType': { + 'type': 'ARRAY', + 'arrayType': { + 'type': 'STRUCT', + 'structTypes': [ + { + 'name': 'name', + 'type': {'type': 'STRING'}, + }, + { + 'name': 'age', + 'type': {'type': 'INT64'}, + }, + ], + }, + }, + 'parameterValue': { + 'arrayValues': [ + { + 'structValues': { + 'name': {'value': 'Phred Phlyntstone'}, + 'age': {'value': '32'}, + }, + }, + { + 'structValues': { + 'name': { + 'value': 'Bharney Rhubbyl', + }, + 'age': {'value': '31'}, + }, + }, + ], + }, + } + + klass = self._get_target_class() + param = klass.from_api_repr(RESOURCE) + + phred = StructQueryParameter.positional( + _make_subparam('name', 'STRING', 'Phred Phlyntstone'), + _make_subparam('age', 'INT64', 32)) + bharney = StructQueryParameter.positional( + _make_subparam('name', 'STRING', 'Bharney Rhubbyl'), + _make_subparam('age', 'INT64', 31)) + self.assertEqual(param.array_type, 'STRUCT') + self.assertEqual(param.values, [phred, bharney]) + def test_to_api_repr_w_name(self): EXPECTED = { 'name': 'foo', @@ -1276,6 +1377,55 @@ def test_to_api_repr_w_record_type(self): param = klass.positional(array_type='RECORD', values=[struct]) self.assertEqual(param.to_api_repr(), EXPECTED) + def test___eq___wrong_type(self): + field = self._make_one('test', 'STRING', ['value']) + other = object() + self.assertNotEqual(field, other) + self.assertEqual(field, mock.ANY) + + def test___eq___name_mismatch(self): + field = self._make_one('field', 'STRING', ['value']) + other = self._make_one('other', 'STRING', ['value']) + self.assertNotEqual(field, other) + + def test___eq___field_type_mismatch(self): + field = self._make_one('test', 'STRING', []) + other = self._make_one('test', 'INT64', []) + self.assertNotEqual(field, other) + + def test___eq___value_mismatch(self): + field = self._make_one('test', 'STRING', ['hello']) + other = self._make_one('test', 'STRING', ['hello', 'world']) + self.assertNotEqual(field, other) + + def test___eq___hit(self): + field = self._make_one('test', 'STRING', ['gotcha']) + other = self._make_one('test', 'STRING', ['gotcha']) + self.assertEqual(field, other) + + def test___ne___wrong_type(self): + field = self._make_one('toast', 'INT64', [13]) + other = object() + self.assertNotEqual(field, other) + self.assertEqual(field, mock.ANY) + + def test___ne___same_value(self): + field1 = self._make_one('test', 'INT64', [12]) + field2 = self._make_one('test', 'INT64', [12]) + # unittest ``assertEqual`` uses ``==`` not ``!=``. + comparison_val = (field1 != field2) + self.assertFalse(comparison_val) + + def test___ne___different_values(self): + field1 = self._make_one('test', 'INT64', [11]) + field2 = self._make_one('test', 'INT64', [12]) + self.assertNotEqual(field1, field2) + + def test___repr__(self): + field1 = self._make_one('field1', 'STRING', ['value']) + expected = "ArrayQueryParameter('field1', 'STRING', ['value'])" + self.assertEqual(repr(field1), expected) + class Test_StructQueryParameter(unittest.TestCase): @@ -1350,6 +1500,81 @@ def test_from_api_repr_wo_name(self): self.assertEqual(param.struct_types, {'bar': 'INT64', 'baz': 'STRING'}) self.assertEqual(param.struct_values, {'bar': 123, 'baz': 'abc'}) + def test_from_api_repr_w_nested_array(self): + from google.cloud.bigquery._helpers import ArrayQueryParameter + + RESOURCE = { + 'name': 'foo', + 'parameterType': { + 'type': 'STRUCT', + 'structTypes': [ + {'name': 'bar', 'type': {'type': 'STRING'}}, + {'name': 'baz', 'type': { + 'type': 'ARRAY', + 'arrayType': {'type': 'INT64'}, + }}, + ], + }, + 'parameterValue': { + 'structValues': { + 'bar': {'value': 'abc'}, + 'baz': {'arrayValues': [ + {'value': '123'}, + {'value': '456'}, + ]}, + }, + }, + } + klass = self._get_target_class() + param = klass.from_api_repr(RESOURCE) + self.assertEqual( + param, + self._make_one( + 'foo', + _make_subparam('bar', 'STRING', 'abc'), + ArrayQueryParameter('baz', 'INT64', [123, 456]))) + + def test_from_api_repr_w_nested_struct(self): + RESOURCE = { + 'name': 'foo', + 'parameterType': { + 'type': 'STRUCT', + 'structTypes': [ + {'name': 'bar', 'type': {'type': 'STRING'}}, + {'name': 'baz', 'type': { + 'type': 'STRUCT', + 'structTypes': [ + {'name': 'qux', 'type': {'type': 'INT64'}}, + {'name': 'spam', 'type': {'type': 'BOOL'}}, + ], + }}, + ], + }, + 'parameterValue': { + 'structValues': { + 'bar': {'value': 'abc'}, + 'baz': {'structValues': { + 'qux': {'value': '123'}, + 'spam': {'value': 'true'}, + }}, + }, + }, + } + + klass = self._get_target_class() + param = klass.from_api_repr(RESOURCE) + + expected = self._make_one( + 'foo', + _make_subparam('bar', 'STRING', 'abc'), + self._make_one( + 'baz', + _make_subparam('qux', 'INT64', 123), + _make_subparam('spam', 'BOOL', True))) + self.assertEqual(param.name, 'foo') + self.assertEqual(param.struct_types, expected.struct_types) + self.assertEqual(param.struct_values, expected.struct_values) + def test_to_api_repr_w_name(self): EXPECTED = { 'name': 'foo', @@ -1457,6 +1682,72 @@ def test_to_api_repr_w_nested_struct(self): param = self._make_one('foo', scalar_1, sub) self.assertEqual(param.to_api_repr(), EXPECTED) + def test___eq___wrong_type(self): + field = self._make_one( + 'test', _make_subparam('bar', 'STRING', 'abc')) + other = object() + self.assertNotEqual(field, other) + self.assertEqual(field, mock.ANY) + + def test___eq___name_mismatch(self): + field = self._make_one( + 'test', _make_subparam('bar', 'STRING', 'abc')) + other = self._make_one( + 'other ', _make_subparam('bar', 'STRING', 'abc')) + self.assertNotEqual(field, other) + + def test___eq___field_type_mismatch(self): + field = self._make_one( + 'test', _make_subparam('bar', 'STRING', None)) + other = self._make_one( + 'test', _make_subparam('bar', 'INT64', None)) + self.assertNotEqual(field, other) + + def test___eq___value_mismatch(self): + field = self._make_one( + 'test', _make_subparam('bar', 'STRING', 'hello')) + other = self._make_one( + 'test', _make_subparam('bar', 'STRING', 'world')) + self.assertNotEqual(field, other) + + def test___eq___hit(self): + field = self._make_one( + 'test', _make_subparam('bar', 'STRING', 'gotcha')) + other = self._make_one( + 'test', _make_subparam('bar', 'STRING', 'gotcha')) + self.assertEqual(field, other) + + def test___ne___wrong_type(self): + field = self._make_one( + 'test', _make_subparam('bar', 'STRING', 'hello')) + other = object() + self.assertNotEqual(field, other) + self.assertEqual(field, mock.ANY) + + def test___ne___same_value(self): + field1 = self._make_one( + 'test', _make_subparam('bar', 'STRING', 'hello')) + field2 = self._make_one( + 'test', _make_subparam('bar', 'STRING', 'hello')) + # unittest ``assertEqual`` uses ``==`` not ``!=``. + comparison_val = (field1 != field2) + self.assertFalse(comparison_val) + + def test___ne___different_values(self): + field1 = self._make_one( + 'test', _make_subparam('bar', 'STRING', 'hello')) + field2 = self._make_one( + 'test', _make_subparam('bar', 'STRING', 'world')) + self.assertNotEqual(field1, field2) + + def test___repr__(self): + field1 = self._make_one( + 'test', _make_subparam('field1', 'STRING', 'hello')) + got = repr(field1) + self.assertIn('StructQueryParameter', got) + self.assertIn("'field1', 'STRING'", got) + self.assertIn("'field1': 'hello'", got) + class Test_QueryParametersProperty(unittest.TestCase): From c0a4c2b68cbd53d7e821073af37b1982e9d5cddf Mon Sep 17 00:00:00 2001 From: Danny Hermes Date: Tue, 3 Oct 2017 13:02:49 -0700 Subject: [PATCH 0175/2016] Fixing virutal->virtual typo. (#4108) Done via: $ git grep -l virutal | xargs sed -i s/virutal/virtual/g --- packages/google-cloud-bigquery/nox.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/nox.py b/packages/google-cloud-bigquery/nox.py index 989965443159..23b2771bd523 100644 --- a/packages/google-cloud-bigquery/nox.py +++ b/packages/google-cloud-bigquery/nox.py @@ -70,7 +70,7 @@ def system_tests(session, python_version): session.virtualenv_dirname = 'sys-' + python_version # Install all test dependencies, then install this package into the - # virutalenv's dist-packages. + # virtualenv's dist-packages. session.install('mock', 'pytest', *LOCAL_DEPS) session.install( os.path.join('..', 'storage'), From 0b3cd9ea92937e188a276221482b64ecce5a2393 Mon Sep 17 00:00:00 2001 From: Tres Seaver Date: Fri, 18 Aug 2017 12:38:13 -0400 Subject: [PATCH 0176/2016] Rename job classes (#3797) * Rename class: 'jobs.LoadTableFromStorageJob' -> 'jobs.LoadJob'. * Rename class: 'jobs.ExtractTableToStorageJob' -> 'jobs.ExtractJob'. --- .../google/cloud/bigquery/client.py | 26 +++++++++---------- .../google/cloud/bigquery/job.py | 20 +++++++------- .../google/cloud/bigquery/table.py | 2 +- .../tests/unit/test_client.py | 20 +++++++------- .../tests/unit/test_job.py | 24 ++++++++--------- 5 files changed, 46 insertions(+), 46 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py index 2ed2c15e13a9..5578064c1cc7 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py @@ -19,8 +19,8 @@ from google.cloud.bigquery._http import Connection from google.cloud.bigquery.dataset import Dataset from google.cloud.bigquery.job import CopyJob -from google.cloud.bigquery.job import ExtractTableToStorageJob -from google.cloud.bigquery.job import LoadTableFromStorageJob +from google.cloud.bigquery.job import ExtractJob +from google.cloud.bigquery.job import LoadJob from google.cloud.bigquery.job import QueryJob from google.cloud.bigquery.query import QueryResults @@ -204,20 +204,20 @@ def job_from_resource(self, resource): :param resource: one job resource from API response :rtype: One of: - :class:`google.cloud.bigquery.job.LoadTableFromStorageJob`, + :class:`google.cloud.bigquery.job.LoadJob`, :class:`google.cloud.bigquery.job.CopyJob`, - :class:`google.cloud.bigquery.job.ExtractTableToStorageJob`, + :class:`google.cloud.bigquery.job.ExtractJob`, :class:`google.cloud.bigquery.job.QueryJob`, :class:`google.cloud.bigquery.job.RunSyncQueryJob` :returns: the job instance, constructed via the resource """ config = resource['configuration'] if 'load' in config: - return LoadTableFromStorageJob.from_api_repr(resource, self) + return LoadJob.from_api_repr(resource, self) elif 'copy' in config: return CopyJob.from_api_repr(resource, self) elif 'extract' in config: - return ExtractTableToStorageJob.from_api_repr(resource, self) + return ExtractJob.from_api_repr(resource, self) elif 'query' in config: return QueryJob.from_api_repr(resource, self) raise ValueError('Cannot parse job resource') @@ -288,11 +288,10 @@ def load_table_from_storage(self, job_name, destination, *source_uris): :param source_uris: URIs of data files to be loaded; in format ``gs:///``. - :rtype: :class:`google.cloud.bigquery.job.LoadTableFromStorageJob` - :returns: a new ``LoadTableFromStorageJob`` instance + :rtype: :class:`google.cloud.bigquery.job.LoadJob` + :returns: a new ``LoadJob`` instance """ - return LoadTableFromStorageJob(job_name, destination, source_uris, - client=self) + return LoadJob(job_name, destination, source_uris, client=self) def copy_table(self, job_name, destination, *sources): """Construct a job for copying one or more tables into another table. @@ -331,11 +330,10 @@ def extract_table_to_storage(self, job_name, source, *destination_uris): table data is to be extracted; in format ``gs:///``. - :rtype: :class:`google.cloud.bigquery.job.ExtractTableToStorageJob` - :returns: a new ``ExtractTableToStorageJob`` instance + :rtype: :class:`google.cloud.bigquery.job.ExtractJob` + :returns: a new ``ExtractJob`` instance """ - return ExtractTableToStorageJob(job_name, source, destination_uris, - client=self) + return ExtractJob(job_name, source, destination_uris, client=self) def run_async_query(self, job_name, query, udf_resources=(), query_parameters=()): diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/job.py b/packages/google-cloud-bigquery/google/cloud/bigquery/job.py index 3eada05d2ea0..5d37bef36160 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/job.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/job.py @@ -525,8 +525,8 @@ class _LoadConfiguration(object): _write_disposition = None -class LoadTableFromStorageJob(_AsyncJob): - """Asynchronous job for loading data into a table from CloudStorage. +class LoadJob(_AsyncJob): + """Asynchronous job for loading data into a table from remote URI. :type name: str :param name: the name of the job @@ -535,8 +535,10 @@ class LoadTableFromStorageJob(_AsyncJob): :param destination: Table into which data is to be loaded. :type source_uris: sequence of string - :param source_uris: URIs of one or more data files to be loaded, in - format ``gs:///``. + :param source_uris: + URIs of one or more data files to be loaded. See + https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.load.sourceUris + for supported URI formats. :type client: :class:`google.cloud.bigquery.client.Client` :param client: A client which holds credentials and project configuration @@ -550,7 +552,7 @@ class LoadTableFromStorageJob(_AsyncJob): _JOB_TYPE = 'load' def __init__(self, name, destination, source_uris, client, schema=()): - super(LoadTableFromStorageJob, self).__init__(name, client) + super(LoadJob, self).__init__(name, client) self.destination = destination self.source_uris = source_uris self._configuration = _LoadConfiguration() @@ -775,7 +777,7 @@ def from_api_repr(cls, resource, client): :param client: Client which holds credentials and project configuration for the dataset. - :rtype: :class:`google.cloud.bigquery.job.LoadTableFromStorageJob` + :rtype: :class:`google.cloud.bigquery.job.LoadJob` :returns: Job parsed from ``resource``. """ name, config = cls._get_resource_config(resource) @@ -919,7 +921,7 @@ class _ExtractConfiguration(object): _print_header = None -class ExtractTableToStorageJob(_AsyncJob): +class ExtractJob(_AsyncJob): """Asynchronous job: extract data from a table into Cloud Storage. :type name: str @@ -940,7 +942,7 @@ class ExtractTableToStorageJob(_AsyncJob): _JOB_TYPE = 'extract' def __init__(self, name, source, destination_uris, client): - super(ExtractTableToStorageJob, self).__init__(name, client) + super(ExtractJob, self).__init__(name, client) self.source = source self.destination_uris = destination_uris self._configuration = _ExtractConfiguration() @@ -1018,7 +1020,7 @@ def from_api_repr(cls, resource, client): :param client: Client which holds credentials and project configuration for the dataset. - :rtype: :class:`google.cloud.bigquery.job.ExtractTableToStorageJob` + :rtype: :class:`google.cloud.bigquery.job.ExtractJob` :returns: Job parsed from ``resource``. """ name, config = cls._get_resource_config(resource) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py index 03e557248637..2121730d3f79 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py @@ -1132,7 +1132,7 @@ def upload_from_file(self, :type null_marker: str :param null_marker: Optional. A custom null marker (example: "\\N") - :rtype: :class:`~google.cloud.bigquery.jobs.LoadTableFromStorageJob` + :rtype: :class:`~google.cloud.bigquery.jobs.LoadJob` :returns: the job instance used to load the data (e.g., for querying status). Note that the job is already started: diff --git a/packages/google-cloud-bigquery/tests/unit/test_client.py b/packages/google-cloud-bigquery/tests/unit/test_client.py index dc998926d434..6a9a98f2952e 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_client.py +++ b/packages/google-cloud-bigquery/tests/unit/test_client.py @@ -268,9 +268,9 @@ def test_job_from_resource_unknown_type(self): def test_list_jobs_defaults(self): import six - from google.cloud.bigquery.job import LoadTableFromStorageJob + from google.cloud.bigquery.job import LoadJob from google.cloud.bigquery.job import CopyJob - from google.cloud.bigquery.job import ExtractTableToStorageJob + from google.cloud.bigquery.job import ExtractJob from google.cloud.bigquery.job import QueryJob PROJECT = 'PROJECT' @@ -281,9 +281,9 @@ def test_list_jobs_defaults(self): SOURCE_URI = 'gs://test_bucket/src_object*' DESTINATION_URI = 'gs://test_bucket/dst_object*' JOB_TYPES = { - 'load_job': LoadTableFromStorageJob, + 'load_job': LoadJob, 'copy_job': CopyJob, - 'extract_job': ExtractTableToStorageJob, + 'extract_job': ExtractJob, 'query_job': QueryJob, } PATH = 'projects/%s/jobs' % PROJECT @@ -400,13 +400,13 @@ def test_list_jobs_defaults(self): def test_list_jobs_load_job_wo_sourceUris(self): import six - from google.cloud.bigquery.job import LoadTableFromStorageJob + from google.cloud.bigquery.job import LoadJob PROJECT = 'PROJECT' DATASET = 'test_dataset' SOURCE_TABLE = 'source_table' JOB_TYPES = { - 'load_job': LoadTableFromStorageJob, + 'load_job': LoadJob, } PATH = 'projects/%s/jobs' % PROJECT TOKEN = 'TOKEN' @@ -487,7 +487,7 @@ def test_list_jobs_explicit_missing(self): 'stateFilter': 'done'}) def test_load_table_from_storage(self): - from google.cloud.bigquery.job import LoadTableFromStorageJob + from google.cloud.bigquery.job import LoadJob PROJECT = 'PROJECT' JOB = 'job_name' @@ -500,7 +500,7 @@ def test_load_table_from_storage(self): dataset = client.dataset(DATASET) destination = dataset.table(DESTINATION) job = client.load_table_from_storage(JOB, destination, SOURCE_URI) - self.assertIsInstance(job, LoadTableFromStorageJob) + self.assertIsInstance(job, LoadJob) self.assertIs(job._client, client) self.assertEqual(job.name, JOB) self.assertEqual(list(job.source_uris), [SOURCE_URI]) @@ -528,7 +528,7 @@ def test_copy_table(self): self.assertIs(job.destination, destination) def test_extract_table_to_storage(self): - from google.cloud.bigquery.job import ExtractTableToStorageJob + from google.cloud.bigquery.job import ExtractJob PROJECT = 'PROJECT' JOB = 'job_name' @@ -541,7 +541,7 @@ def test_extract_table_to_storage(self): dataset = client.dataset(DATASET) source = dataset.table(SOURCE) job = client.extract_table_to_storage(JOB, source, DESTINATION) - self.assertIsInstance(job, ExtractTableToStorageJob) + self.assertIsInstance(job, ExtractJob) self.assertIs(job._client, client) self.assertEqual(job.name, JOB) self.assertEqual(job.source, source) diff --git a/packages/google-cloud-bigquery/tests/unit/test_job.py b/packages/google-cloud-bigquery/tests/unit/test_job.py index ab08701d352a..c9928732203e 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_job.py +++ b/packages/google-cloud-bigquery/tests/unit/test_job.py @@ -143,24 +143,24 @@ def _verifyReadonlyResourceProperties(self, job, resource): self.assertIsNone(job.user_email) -class TestLoadTableFromStorageJob(unittest.TestCase, _Base): +class TestLoadJob(unittest.TestCase, _Base): JOB_TYPE = 'load' @staticmethod def _get_target_class(): - from google.cloud.bigquery.job import LoadTableFromStorageJob + from google.cloud.bigquery.job import LoadJob - return LoadTableFromStorageJob + return LoadJob def _setUpConstants(self): - super(TestLoadTableFromStorageJob, self)._setUpConstants() + super(TestLoadJob, self)._setUpConstants() self.INPUT_FILES = 2 self.INPUT_BYTES = 12345 self.OUTPUT_BYTES = 23456 self.OUTPUT_ROWS = 345 def _makeResource(self, started=False, ended=False): - resource = super(TestLoadTableFromStorageJob, self)._makeResource( + resource = super(TestLoadJob, self)._makeResource( started, ended) config = resource['configuration']['load'] config['sourceUris'] = [self.SOURCE1] @@ -1142,19 +1142,19 @@ def test_reload_w_alternate_client(self): self._verifyResourceProperties(job, RESOURCE) -class TestExtractTableToStorageJob(unittest.TestCase, _Base): +class TestExtractJob(unittest.TestCase, _Base): JOB_TYPE = 'extract' SOURCE_TABLE = 'source_table' DESTINATION_URI = 'gs://bucket_name/object_name' @staticmethod def _get_target_class(): - from google.cloud.bigquery.job import ExtractTableToStorageJob + from google.cloud.bigquery.job import ExtractJob - return ExtractTableToStorageJob + return ExtractJob def _makeResource(self, started=False, ended=False): - resource = super(TestExtractTableToStorageJob, self)._makeResource( + resource = super(TestExtractJob, self)._makeResource( started, ended) config = resource['configuration']['extract'] config['sourceTable'] = { @@ -2181,15 +2181,15 @@ def __init__(self, name=None): def name(self): if self._name is not None: return self._name - return TestLoadTableFromStorageJob.TABLE_NAME + return TestLoadJob.TABLE_NAME @property def project(self): - return TestLoadTableFromStorageJob.PROJECT + return TestLoadJob.PROJECT @property def dataset_name(self): - return TestLoadTableFromStorageJob.DS_NAME + return TestLoadJob.DS_NAME class _Connection(object): From 05849504009e15e25cf3ce067f678aed01011707 Mon Sep 17 00:00:00 2001 From: Tres Seaver Date: Fri, 18 Aug 2017 13:02:34 -0400 Subject: [PATCH 0177/2016] Rename class: 'dataset.AccessGrant' -> 'dataset.AccessEntry'. (#3798) * Rename class: 'dataset.AccessGrant' -> 'dataset.AccessEntry'. * PEP8 names for unit test helpers. * Rename 'Dataset.access_grants' -> 'Dataaset.access_entries'. --- .../google/cloud/bigquery/__init__.py | 12 +- .../google/cloud/bigquery/dataset.py | 80 ++++----- .../tests/unit/test_dataset.py | 165 +++++++++--------- 3 files changed, 132 insertions(+), 125 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/__init__.py b/packages/google-cloud-bigquery/google/cloud/bigquery/__init__.py index e2eb29e866a3..00fa4445b0d0 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/__init__.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/__init__.py @@ -30,13 +30,19 @@ from google.cloud.bigquery._helpers import ScalarQueryParameter from google.cloud.bigquery._helpers import StructQueryParameter from google.cloud.bigquery.client import Client -from google.cloud.bigquery.dataset import AccessGrant +from google.cloud.bigquery.dataset import AccessEntry from google.cloud.bigquery.dataset import Dataset from google.cloud.bigquery.schema import SchemaField from google.cloud.bigquery.table import Table __all__ = [ - '__version__', 'AccessGrant', 'ArrayQueryParameter', 'Client', - 'Dataset', 'ScalarQueryParameter', 'SchemaField', 'StructQueryParameter', + '__version__', + 'AccessEntry', + 'ArrayQueryParameter', + 'Client', + 'Dataset', + 'ScalarQueryParameter', + 'SchemaField', + 'StructQueryParameter', 'Table', ] diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/dataset.py b/packages/google-cloud-bigquery/google/cloud/bigquery/dataset.py index 645a68deada4..a688cb3b560b 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/dataset.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/dataset.py @@ -21,7 +21,7 @@ from google.cloud.bigquery.table import Table -class AccessGrant(object): +class AccessEntry(object): """Represent grant of an access role to an entity. Every entry in the access list will have exactly one of @@ -76,7 +76,7 @@ def __init__(self, role, entity_type, entity_id): self.entity_id = entity_id def __eq__(self, other): - if not isinstance(other, AccessGrant): + if not isinstance(other, AccessEntry): return NotImplemented return ( self.role == other.role and @@ -87,7 +87,7 @@ def __ne__(self, other): return not self == other def __repr__(self): - return '' % ( + return '' % ( self.role, self.entity_type, self.entity_id) @@ -104,22 +104,22 @@ class Dataset(object): :param client: A client which holds credentials and project configuration for the dataset (which requires a project). - :type access_grants: list of :class:`AccessGrant` - :param access_grants: roles granted to entities for this dataset + :type access_entries: list of :class:`AccessEntry` + :param access_entries: roles granted to entities for this dataset :type project: str :param project: (Optional) project ID for the dataset (defaults to the project of the client). """ - _access_grants = None + _access_entries = None - def __init__(self, name, client, access_grants=(), project=None): + def __init__(self, name, client, access_entries=(), project=None): self.name = name self._client = client self._properties = {} # Let the @property do validation. - self.access_grants = access_grants + self.access_entries = access_entries self._project = project or client.project @property @@ -141,27 +141,27 @@ def path(self): return '/projects/%s/datasets/%s' % (self.project, self.name) @property - def access_grants(self): - """Dataset's access grants. + def access_entries(self): + """Dataset's access entries. - :rtype: list of :class:`AccessGrant` + :rtype: list of :class:`AccessEntry` :returns: roles granted to entities for this dataset """ - return list(self._access_grants) + return list(self._access_entries) - @access_grants.setter - def access_grants(self, value): - """Update dataset's access grants + @access_entries.setter + def access_entries(self, value): + """Update dataset's access entries - :type value: list of :class:`AccessGrant` + :type value: list of :class:`AccessEntry` :param value: roles granted to entities for this dataset :raises: TypeError if 'value' is not a sequence, or ValueError if - any item in the sequence is not an AccessGrant + any item in the sequence is not an AccessEntry """ - if not all(isinstance(field, AccessGrant) for field in value): - raise ValueError('Values must be AccessGrant instances') - self._access_grants = tuple(value) + if not all(isinstance(field, AccessEntry) for field in value): + raise ValueError('Values must be AccessEntry instances') + self._access_entries = tuple(value) @property def created(self): @@ -341,29 +341,29 @@ def _require_client(self, client): return client @staticmethod - def _parse_access_grants(access): - """Parse a resource fragment into a set of access grants. + def _parse_access_entries(access): + """Parse a resource fragment into a set of access entries. ``role`` augments the entity type and present **unless** the entity type is ``view``. :type access: list of mappings - :param access: each mapping represents a single access grant. + :param access: each mapping represents a single access entry. - :rtype: list of :class:`AccessGrant` - :returns: a list of parsed grants. - :raises: :class:`ValueError` if a grant in ``access`` has more keys + :rtype: list of :class:`AccessEntry` + :returns: a list of parsed entries. + :raises: :class:`ValueError` if a entry in ``access`` has more keys than ``role`` and one additional key. """ result = [] - for grant in access: - grant = grant.copy() - role = grant.pop('role', None) - entity_type, entity_id = grant.popitem() - if len(grant) != 0: - raise ValueError('Grant has unexpected keys remaining.', grant) + for entry in access: + entry = entry.copy() + role = entry.pop('role', None) + entity_type, entity_id = entry.popitem() + if len(entry) != 0: + raise ValueError('Entry has unexpected keys remaining.', entry) result.append( - AccessGrant(role, entity_type, entity_id)) + AccessEntry(role, entity_type, entity_id)) return result def _set_properties(self, api_response): @@ -375,7 +375,7 @@ def _set_properties(self, api_response): self._properties.clear() cleaned = api_response.copy() access = cleaned.pop('access', ()) - self.access_grants = self._parse_access_grants(access) + self.access_entries = self._parse_access_entries(access) if 'creationTime' in cleaned: cleaned['creationTime'] = float(cleaned['creationTime']) if 'lastModifiedTime' in cleaned: @@ -386,12 +386,12 @@ def _set_properties(self, api_response): self._properties.update(cleaned) def _build_access_resource(self): - """Generate a resource fragment for dataset's access grants.""" + """Generate a resource fragment for dataset's access entries.""" result = [] - for grant in self.access_grants: - info = {grant.entity_type: grant.entity_id} - if grant.role is not None: - info['role'] = grant.role + for entry in self.access_entries: + info = {entry.entity_type: entry.entity_id} + if entry.role is not None: + info['role'] = entry.role result.append(info) return result @@ -414,7 +414,7 @@ def _build_resource(self): if self.location is not None: resource['location'] = self.location - if len(self.access_grants) > 0: + if len(self.access_entries) > 0: resource['access'] = self._build_access_resource() return resource diff --git a/packages/google-cloud-bigquery/tests/unit/test_dataset.py b/packages/google-cloud-bigquery/tests/unit/test_dataset.py index 164f9ed0a2b4..10b832d7abe4 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_dataset.py +++ b/packages/google-cloud-bigquery/tests/unit/test_dataset.py @@ -17,22 +17,22 @@ import mock -class TestAccessGrant(unittest.TestCase): +class TestAccessEntry(unittest.TestCase): @staticmethod def _get_target_class(): - from google.cloud.bigquery.dataset import AccessGrant + from google.cloud.bigquery.dataset import AccessEntry - return AccessGrant + return AccessEntry def _make_one(self, *args, **kw): return self._get_target_class()(*args, **kw) def test_ctor_defaults(self): - grant = self._make_one('OWNER', 'userByEmail', 'phred@example.com') - self.assertEqual(grant.role, 'OWNER') - self.assertEqual(grant.entity_type, 'userByEmail') - self.assertEqual(grant.entity_id, 'phred@example.com') + entry = self._make_one('OWNER', 'userByEmail', 'phred@example.com') + self.assertEqual(entry.role, 'OWNER') + self.assertEqual(entry.entity_type, 'userByEmail') + self.assertEqual(entry.entity_id, 'phred@example.com') def test_ctor_bad_entity_type(self): with self.assertRaises(ValueError): @@ -48,10 +48,10 @@ def test_ctor_view_success(self): role = None entity_type = 'view' entity_id = object() - grant = self._make_one(role, entity_type, entity_id) - self.assertEqual(grant.role, role) - self.assertEqual(grant.entity_type, entity_type) - self.assertEqual(grant.entity_id, entity_id) + entry = self._make_one(role, entity_type, entity_id) + self.assertEqual(entry.role, role) + self.assertEqual(entry.entity_type, entity_type) + self.assertEqual(entry.entity_id, entity_id) def test_ctor_nonview_without_role(self): role = None @@ -60,29 +60,29 @@ def test_ctor_nonview_without_role(self): self._make_one(role, entity_type, None) def test___eq___role_mismatch(self): - grant = self._make_one('OWNER', 'userByEmail', 'phred@example.com') + entry = self._make_one('OWNER', 'userByEmail', 'phred@example.com') other = self._make_one('WRITER', 'userByEmail', 'phred@example.com') - self.assertNotEqual(grant, other) + self.assertNotEqual(entry, other) def test___eq___entity_type_mismatch(self): - grant = self._make_one('OWNER', 'userByEmail', 'phred@example.com') + entry = self._make_one('OWNER', 'userByEmail', 'phred@example.com') other = self._make_one('OWNER', 'groupByEmail', 'phred@example.com') - self.assertNotEqual(grant, other) + self.assertNotEqual(entry, other) def test___eq___entity_id_mismatch(self): - grant = self._make_one('OWNER', 'userByEmail', 'phred@example.com') + entry = self._make_one('OWNER', 'userByEmail', 'phred@example.com') other = self._make_one('OWNER', 'userByEmail', 'bharney@example.com') - self.assertNotEqual(grant, other) + self.assertNotEqual(entry, other) def test___eq___hit(self): - grant = self._make_one('OWNER', 'userByEmail', 'phred@example.com') + entry = self._make_one('OWNER', 'userByEmail', 'phred@example.com') other = self._make_one('OWNER', 'userByEmail', 'phred@example.com') - self.assertEqual(grant, other) + self.assertEqual(entry, other) def test__eq___type_mismatch(self): - grant = self._make_one('OWNER', 'userByEmail', 'silly@example.com') - self.assertNotEqual(grant, object()) - self.assertEqual(grant, mock.ANY) + entry = self._make_one('OWNER', 'userByEmail', 'silly@example.com') + self.assertNotEqual(entry, object()) + self.assertEqual(entry, mock.ANY) class TestDataset(unittest.TestCase): @@ -129,22 +129,23 @@ def _makeResource(self): {'role': 'READER', 'specialGroup': 'projectReaders'}], } - def _verifyAccessGrants(self, access_grants, resource): - r_grants = [] - for r_grant in resource['access']: - role = r_grant.pop('role') - for entity_type, entity_id in sorted(r_grant.items()): - r_grants.append({'role': role, - 'entity_type': entity_type, - 'entity_id': entity_id}) + def _verify_access_entry(self, access_entries, resource): + r_entries = [] + for r_entry in resource['access']: + role = r_entry.pop('role') + for entity_type, entity_id in sorted(r_entry.items()): + r_entries.append({ + 'role': role, + 'entity_type': entity_type, + 'entity_id': entity_id}) - self.assertEqual(len(access_grants), len(r_grants)) - for a_grant, r_grant in zip(access_grants, r_grants): - self.assertEqual(a_grant.role, r_grant['role']) - self.assertEqual(a_grant.entity_type, r_grant['entity_type']) - self.assertEqual(a_grant.entity_id, r_grant['entity_id']) + self.assertEqual(len(access_entries), len(r_entries)) + for a_entry, r_entry in zip(access_entries, r_entries): + self.assertEqual(a_entry.role, r_entry['role']) + self.assertEqual(a_entry.entity_type, r_entry['entity_type']) + self.assertEqual(a_entry.entity_id, r_entry['entity_id']) - def _verifyReadonlyResourceProperties(self, dataset, resource): + def _verify_readonly_resource_properties(self, dataset, resource): self.assertEqual(dataset.dataset_id, self.DS_ID) @@ -165,9 +166,9 @@ def _verifyReadonlyResourceProperties(self, dataset, resource): else: self.assertIsNone(dataset.self_link) - def _verifyResourceProperties(self, dataset, resource): + def _verify_resource_properties(self, dataset, resource): - self._verifyReadonlyResourceProperties(dataset, resource) + self._verify_readonly_resource_properties(dataset, resource) if 'defaultTableExpirationMs' in resource: self.assertEqual(dataset.default_table_expiration_ms, @@ -179,9 +180,9 @@ def _verifyResourceProperties(self, dataset, resource): self.assertEqual(dataset.location, resource.get('location')) if 'access' in resource: - self._verifyAccessGrants(dataset.access_grants, resource) + self._verify_access_entry(dataset.access_entries, resource) else: - self.assertEqual(dataset.access_grants, []) + self.assertEqual(dataset.access_entries, []) def test_ctor_defaults(self): client = _Client(self.PROJECT) @@ -192,7 +193,7 @@ def test_ctor_defaults(self): self.assertEqual( dataset.path, '/projects/%s/datasets/%s' % (self.PROJECT, self.DS_NAME)) - self.assertEqual(dataset.access_grants, []) + self.assertEqual(dataset.access_entries, []) self.assertIsNone(dataset.created) self.assertIsNone(dataset.dataset_id) @@ -206,15 +207,15 @@ def test_ctor_defaults(self): self.assertIsNone(dataset.location) def test_ctor_explicit(self): - from google.cloud.bigquery.dataset import AccessGrant + from google.cloud.bigquery.dataset import AccessEntry - phred = AccessGrant('OWNER', 'userByEmail', 'phred@example.com') - bharney = AccessGrant('OWNER', 'userByEmail', 'bharney@example.com') - grants = [phred, bharney] + phred = AccessEntry('OWNER', 'userByEmail', 'phred@example.com') + bharney = AccessEntry('OWNER', 'userByEmail', 'bharney@example.com') + entries = [phred, bharney] OTHER_PROJECT = 'foo-bar-123' client = _Client(self.PROJECT) dataset = self._make_one(self.DS_NAME, client, - access_grants=grants, + access_entries=entries, project=OTHER_PROJECT) self.assertEqual(dataset.name, self.DS_NAME) self.assertIs(dataset._client, client) @@ -222,7 +223,7 @@ def test_ctor_explicit(self): self.assertEqual( dataset.path, '/projects/%s/datasets/%s' % (OTHER_PROJECT, self.DS_NAME)) - self.assertEqual(dataset.access_grants, grants) + self.assertEqual(dataset.access_entries, entries) self.assertIsNone(dataset.created) self.assertIsNone(dataset.dataset_id) @@ -235,30 +236,30 @@ def test_ctor_explicit(self): self.assertIsNone(dataset.friendly_name) self.assertIsNone(dataset.location) - def test_access_grants_setter_non_list(self): + def test_access_entries_setter_non_list(self): client = _Client(self.PROJECT) dataset = self._make_one(self.DS_NAME, client) with self.assertRaises(TypeError): - dataset.access_grants = object() + dataset.access_entries = object() - def test_access_grants_setter_invalid_field(self): - from google.cloud.bigquery.dataset import AccessGrant + def test_access_entries_setter_invalid_field(self): + from google.cloud.bigquery.dataset import AccessEntry client = _Client(self.PROJECT) dataset = self._make_one(self.DS_NAME, client) - phred = AccessGrant('OWNER', 'userByEmail', 'phred@example.com') + phred = AccessEntry('OWNER', 'userByEmail', 'phred@example.com') with self.assertRaises(ValueError): - dataset.access_grants = [phred, object()] + dataset.access_entries = [phred, object()] - def test_access_grants_setter(self): - from google.cloud.bigquery.dataset import AccessGrant + def test_access_entries_setter(self): + from google.cloud.bigquery.dataset import AccessEntry client = _Client(self.PROJECT) dataset = self._make_one(self.DS_NAME, client) - phred = AccessGrant('OWNER', 'userByEmail', 'phred@example.com') - bharney = AccessGrant('OWNER', 'userByEmail', 'bharney@example.com') - dataset.access_grants = [phred, bharney] - self.assertEqual(dataset.access_grants, [phred, bharney]) + phred = AccessEntry('OWNER', 'userByEmail', 'phred@example.com') + bharney = AccessEntry('OWNER', 'userByEmail', 'bharney@example.com') + dataset.access_entries = [phred, bharney] + self.assertEqual(dataset.access_entries, [phred, bharney]) def test_default_table_expiration_ms_setter_bad_value(self): client = _Client(self.PROJECT) @@ -329,7 +330,7 @@ def test_from_api_repr_bare(self): klass = self._get_target_class() dataset = klass.from_api_repr(RESOURCE, client=client) self.assertIs(dataset._client, client) - self._verifyResourceProperties(dataset, RESOURCE) + self._verify_resource_properties(dataset, RESOURCE) def test_from_api_repr_w_properties(self): client = _Client(self.PROJECT) @@ -337,18 +338,18 @@ def test_from_api_repr_w_properties(self): klass = self._get_target_class() dataset = klass.from_api_repr(RESOURCE, client=client) self.assertIs(dataset._client, client) - self._verifyResourceProperties(dataset, RESOURCE) + self._verify_resource_properties(dataset, RESOURCE) - def test__parse_access_grants_w_unknown_entity_type(self): + def test__parse_access_entries_w_unknown_entity_type(self): ACCESS = [ {'role': 'READER', 'unknown': 'UNKNOWN'}, ] client = _Client(self.PROJECT) dataset = self._make_one(self.DS_NAME, client=client) with self.assertRaises(ValueError): - dataset._parse_access_grants(ACCESS) + dataset._parse_access_entries(ACCESS) - def test__parse_access_grants_w_extra_keys(self): + def test__parse_access_entries_w_extra_keys(self): USER_EMAIL = 'phred@example.com' ACCESS = [ { @@ -360,7 +361,7 @@ def test__parse_access_grants_w_extra_keys(self): client = _Client(self.PROJECT) dataset = self._make_one(self.DS_NAME, client=client) with self.assertRaises(ValueError): - dataset._parse_access_grants(ACCESS) + dataset._parse_access_entries(ACCESS) def test_create_w_bound_client(self): PATH = 'projects/%s/datasets' % self.PROJECT @@ -380,10 +381,10 @@ def test_create_w_bound_client(self): {'projectId': self.PROJECT, 'datasetId': self.DS_NAME}, } self.assertEqual(req['data'], SENT) - self._verifyResourceProperties(dataset, RESOURCE) + self._verify_resource_properties(dataset, RESOURCE) def test_create_w_alternate_client(self): - from google.cloud.bigquery.dataset import AccessGrant + from google.cloud.bigquery.dataset import AccessEntry PATH = 'projects/%s/datasets' % self.PROJECT USER_EMAIL = 'phred@example.com' @@ -405,13 +406,13 @@ def test_create_w_alternate_client(self): 'datasetId': 'starry-skies', 'tableId': 'northern-hemisphere', } - dataset.access_grants = [ - AccessGrant('OWNER', 'userByEmail', USER_EMAIL), - AccessGrant('OWNER', 'groupByEmail', GROUP_EMAIL), - AccessGrant('READER', 'domain', 'foo.com'), - AccessGrant('READER', 'specialGroup', 'projectReaders'), - AccessGrant('WRITER', 'specialGroup', 'projectWriters'), - AccessGrant(None, 'view', VIEW), + dataset.access_entries = [ + AccessEntry('OWNER', 'userByEmail', USER_EMAIL), + AccessEntry('OWNER', 'groupByEmail', GROUP_EMAIL), + AccessEntry('READER', 'domain', 'foo.com'), + AccessEntry('READER', 'specialGroup', 'projectReaders'), + AccessEntry('WRITER', 'specialGroup', 'projectWriters'), + AccessEntry(None, 'view', VIEW), ] dataset.create(client=CLIENT2) @@ -438,7 +439,7 @@ def test_create_w_alternate_client(self): ], } self.assertEqual(req['data'], SENT) - self._verifyResourceProperties(dataset, RESOURCE) + self._verify_resource_properties(dataset, RESOURCE) def test_create_w_missing_output_properties(self): # In the wild, the resource returned from 'dataset.create' sometimes @@ -463,7 +464,7 @@ def test_create_w_missing_output_properties(self): {'projectId': self.PROJECT, 'datasetId': self.DS_NAME}, } self.assertEqual(req['data'], SENT) - self._verifyResourceProperties(dataset, RESOURCE) + self._verify_resource_properties(dataset, RESOURCE) def test_exists_miss_w_bound_client(self): PATH = 'projects/%s/datasets/%s' % (self.PROJECT, self.DS_NAME) @@ -509,7 +510,7 @@ def test_reload_w_bound_client(self): req = conn._requested[0] self.assertEqual(req['method'], 'GET') self.assertEqual(req['path'], '/%s' % PATH) - self._verifyResourceProperties(dataset, RESOURCE) + self._verify_resource_properties(dataset, RESOURCE) def test_reload_w_alternate_client(self): PATH = 'projects/%s/datasets/%s' % (self.PROJECT, self.DS_NAME) @@ -527,7 +528,7 @@ def test_reload_w_alternate_client(self): req = conn2._requested[0] self.assertEqual(req['method'], 'GET') self.assertEqual(req['path'], '/%s' % PATH) - self._verifyResourceProperties(dataset, RESOURCE) + self._verify_resource_properties(dataset, RESOURCE) def test_patch_w_invalid_expiration(self): RESOURCE = self._makeResource() @@ -560,7 +561,7 @@ def test_patch_w_bound_client(self): } self.assertEqual(req['data'], SENT) self.assertEqual(req['path'], '/%s' % PATH) - self._verifyResourceProperties(dataset, RESOURCE) + self._verify_resource_properties(dataset, RESOURCE) def test_patch_w_alternate_client(self): PATH = 'projects/%s/datasets/%s' % (self.PROJECT, self.DS_NAME) @@ -589,7 +590,7 @@ def test_patch_w_alternate_client(self): 'location': LOCATION, } self.assertEqual(req['data'], SENT) - self._verifyResourceProperties(dataset, RESOURCE) + self._verify_resource_properties(dataset, RESOURCE) def test_update_w_bound_client(self): PATH = 'projects/%s/datasets/%s' % (self.PROJECT, self.DS_NAME) @@ -617,7 +618,7 @@ def test_update_w_bound_client(self): } self.assertEqual(req['data'], SENT) self.assertEqual(req['path'], '/%s' % PATH) - self._verifyResourceProperties(dataset, RESOURCE) + self._verify_resource_properties(dataset, RESOURCE) def test_update_w_alternate_client(self): PATH = 'projects/%s/datasets/%s' % (self.PROJECT, self.DS_NAME) @@ -648,7 +649,7 @@ def test_update_w_alternate_client(self): 'location': 'EU', } self.assertEqual(req['data'], SENT) - self._verifyResourceProperties(dataset, RESOURCE) + self._verify_resource_properties(dataset, RESOURCE) def test_delete_w_bound_client(self): PATH = 'projects/%s/datasets/%s' % (self.PROJECT, self.DS_NAME) From ed074a91935fa2c19625cdd0be141b64a5c55c64 Mon Sep 17 00:00:00 2001 From: Tres Seaver Date: Thu, 7 Sep 2017 15:05:03 -0400 Subject: [PATCH 0178/2016] Add 'QueryJob.query_plan' property. (#3799) --- .../google/cloud/bigquery/job.py | 165 ++++++++++++ .../tests/unit/test_job.py | 241 ++++++++++++++++++ 2 files changed, 406 insertions(+) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/job.py b/packages/google-cloud-bigquery/google/cloud/bigquery/job.py index 5d37bef36160..75182a22e909 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/job.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/job.py @@ -266,6 +266,11 @@ def ended(self): if millis is not None: return _datetime_from_microseconds(millis * 1000.0) + def _job_statistics(self): + """Helper for job-type specific statistics-based properties.""" + statistics = self._properties.get('statistics', {}) + return statistics.get(self._JOB_TYPE, {}) + @property def error_result(self): """Error information about the job as a whole. @@ -1281,6 +1286,20 @@ def from_api_repr(cls, resource, client): job._set_properties(resource) return job + @property + def query_plan(self): + """Return query plan from job statistics, if present. + + See: + https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#statistics.query.queryPlan + + :rtype: list of :class:`QueryPlanEntry` + :returns: mappings describing the query plan, or an empty list + if the query has not yet completed. + """ + plan_entries = self._job_statistics().get('queryPlan', ()) + return [QueryPlanEntry.from_api_repr(entry) for entry in plan_entries] + def query_results(self): """Construct a QueryResults instance, bound to this job. @@ -1333,3 +1352,149 @@ def result(self, timeout=None): super(QueryJob, self).result(timeout=timeout) # Return an iterator instead of returning the job. return self.query_results().fetch_data() + + +class QueryPlanEntryStep(object): + """Map a single step in a query plan entry. + + :type kind: str + :param kind: step type + + :type substeps: + :param substeps: names of substeps + """ + def __init__(self, kind, substeps): + self.kind = kind + self.substeps = list(substeps) + + @classmethod + def from_api_repr(cls, resource): + """Factory: construct instance from the JSON repr. + + :type resource: dict + :param resource: JSON representation of the entry + + :rtype: :class:`QueryPlanEntryStep` + :return: new instance built from the resource + """ + return cls( + kind=resource.get('kind'), + substeps=resource.get('substeps', ()), + ) + + def __eq__(self, other): + if not isinstance(other, self.__class__): + return NotImplemented + return self.kind == other.kind and self.substeps == other.substeps + + +class QueryPlanEntry(object): + """Map a single entry in a query plan. + + :type name: str + :param name: name of the entry + + :type entry_id: int + :param entry_id: ID of the entry + + :type wait_ratio_avg: float + :param wait_ratio_avg: average wait ratio + + :type wait_ratio_max: float + :param wait_ratio_avg: maximum wait ratio + + :type read_ratio_avg: float + :param read_ratio_avg: average read ratio + + :type read_ratio_max: float + :param read_ratio_avg: maximum read ratio + + :type copute_ratio_avg: float + :param copute_ratio_avg: average copute ratio + + :type copute_ratio_max: float + :param copute_ratio_avg: maximum copute ratio + + :type write_ratio_avg: float + :param write_ratio_avg: average write ratio + + :type write_ratio_max: float + :param write_ratio_avg: maximum write ratio + + :type records_read: int + :param records_read: number of records read + + :type records_written: int + :param records_written: number of records written + + :type status: str + :param status: entry status + + :type steps: List(QueryPlanEntryStep) + :param steps: steps in the entry + """ + def __init__(self, + name, + entry_id, + wait_ratio_avg, + wait_ratio_max, + read_ratio_avg, + read_ratio_max, + compute_ratio_avg, + compute_ratio_max, + write_ratio_avg, + write_ratio_max, + records_read, + records_written, + status, + steps): + self.name = name + self.entry_id = entry_id + self.wait_ratio_avg = wait_ratio_avg + self.wait_ratio_max = wait_ratio_max + self.read_ratio_avg = read_ratio_avg + self.read_ratio_max = read_ratio_max + self.compute_ratio_avg = compute_ratio_avg + self.compute_ratio_max = compute_ratio_max + self.write_ratio_avg = write_ratio_avg + self.write_ratio_max = write_ratio_max + self.records_read = records_read + self.records_written = records_written + self.status = status + self.steps = steps + + @classmethod + def from_api_repr(cls, resource): + """Factory: construct instance from the JSON repr. + + :type resource: dict + :param resource: JSON representation of the entry + + :rtype: :class:`QueryPlanEntry` + :return: new instance built from the resource + """ + records_read = resource.get('recordsRead') + if records_read is not None: + records_read = int(records_read) + + records_written = resource.get('recordsWritten') + if records_written is not None: + records_written = int(records_written) + + return cls( + name=resource.get('name'), + entry_id=resource.get('id'), + wait_ratio_avg=resource.get('waitRatioAvg'), + wait_ratio_max=resource.get('waitRatioMax'), + read_ratio_avg=resource.get('readRatioAvg'), + read_ratio_max=resource.get('readRatioMax'), + compute_ratio_avg=resource.get('computeRatioAvg'), + compute_ratio_max=resource.get('computeRatioMax'), + write_ratio_avg=resource.get('writeRatioAvg'), + write_ratio_max=resource.get('writeRatioMax'), + records_read=records_read, + records_written=records_written, + status=resource.get('status'), + steps=[QueryPlanEntryStep.from_api_repr(step) + for step in resource.get('steps', ())], + ) diff --git a/packages/google-cloud-bigquery/tests/unit/test_job.py b/packages/google-cloud-bigquery/tests/unit/test_job.py index c9928732203e..2c8da8ddc89a 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_job.py +++ b/packages/google-cloud-bigquery/tests/unit/test_job.py @@ -1682,6 +1682,68 @@ def test_done(self): job = self._get_target_class().from_api_repr(resource, client) self.assertTrue(job.done()) + def test_query_plan(self): + from google.cloud.bigquery.job import QueryPlanEntry + from google.cloud.bigquery.job import QueryPlanEntryStep + + plan_entries = [{ + 'name': 'NAME', + 'id': 1234, + 'waitRatioAvg': 2.71828, + 'waitRatioMax': 3.14159, + 'readRatioAvg': 1.41421, + 'readRatioMax': 1.73205, + 'computeRatioAvg': 0.69315, + 'computeRatioMax': 1.09861, + 'writeRatioAvg': 3.32193, + 'writeRatioMax': 2.30258, + 'recordsRead': '100', + 'recordsWritten': '1', + 'status': 'STATUS', + 'steps': [{ + 'kind': 'KIND', + 'substeps': ['SUBSTEP1', 'SUBSTEP2'], + }], + }] + client = _Client(self.PROJECT) + job = self._make_one(self.JOB_NAME, self.QUERY, client) + self.assertEqual(job.query_plan, []) + + statistics = job._properties['statistics'] = {} + self.assertEqual(job.query_plan, []) + + query_stats = statistics['query'] = {} + self.assertEqual(job.query_plan, []) + + query_stats['queryPlan'] = plan_entries + + self.assertEqual(len(job.query_plan), len(plan_entries)) + for found, expected in zip(job.query_plan, plan_entries): + self.assertIsInstance(found, QueryPlanEntry) + self.assertEqual(found.name, expected['name']) + self.assertEqual(found.entry_id, expected['id']) + self.assertEqual(found.wait_ratio_avg, expected['waitRatioAvg']) + self.assertEqual(found.wait_ratio_max, expected['waitRatioMax']) + self.assertEqual(found.read_ratio_avg, expected['readRatioAvg']) + self.assertEqual(found.read_ratio_max, expected['readRatioMax']) + self.assertEqual( + found.compute_ratio_avg, expected['computeRatioAvg']) + self.assertEqual( + found.compute_ratio_max, expected['computeRatioMax']) + self.assertEqual(found.write_ratio_avg, expected['writeRatioAvg']) + self.assertEqual(found.write_ratio_max, expected['writeRatioMax']) + self.assertEqual( + found.records_read, int(expected['recordsRead'])) + self.assertEqual( + found.records_written, int(expected['recordsWritten'])) + self.assertEqual(found.status, expected['status']) + + self.assertEqual(len(found.steps), len(expected['steps'])) + for f_step, e_step in zip(found.steps, expected['steps']): + self.assertIsInstance(f_step, QueryPlanEntryStep) + self.assertEqual(f_step.kind, e_step['kind']) + self.assertEqual(f_step.substeps, e_step['substeps']) + def test_query_results(self): from google.cloud.bigquery.query import QueryResults @@ -2154,6 +2216,185 @@ def test_reload_w_alternate_client(self): self._verifyResourceProperties(job, RESOURCE) +class TestQueryPlanEntryStep(unittest.TestCase, _Base): + KIND = 'KIND' + SUBSTEPS = ('SUB1', 'SUB2') + + @staticmethod + def _get_target_class(): + from google.cloud.bigquery.job import QueryPlanEntryStep + + return QueryPlanEntryStep + + def _make_one(self, *args, **kw): + return self._get_target_class()(*args, **kw) + + def test_ctor(self): + step = self._make_one(self.KIND, self.SUBSTEPS) + self.assertEqual(step.kind, self.KIND) + self.assertEqual(step.substeps, list(self.SUBSTEPS)) + + def test_from_api_repr_empty(self): + klass = self._get_target_class() + step = klass.from_api_repr({}) + self.assertIsNone(step.kind) + self.assertEqual(step.substeps, []) + + def test_from_api_repr_normal(self): + resource = { + 'kind': self.KIND, + 'substeps': self.SUBSTEPS, + } + klass = self._get_target_class() + step = klass.from_api_repr(resource) + self.assertEqual(step.kind, self.KIND) + self.assertEqual(step.substeps, list(self.SUBSTEPS)) + + def test___eq___mismatched_type(self): + step = self._make_one(self.KIND, self.SUBSTEPS) + self.assertNotEqual(step, object()) + + def test___eq___mismatch_kind(self): + step = self._make_one(self.KIND, self.SUBSTEPS) + other = self._make_one('OTHER', self.SUBSTEPS) + self.assertNotEqual(step, other) + + def test___eq___mismatch_substeps(self): + step = self._make_one(self.KIND, self.SUBSTEPS) + other = self._make_one(self.KIND, ()) + self.assertNotEqual(step, other) + + def test___eq___hit(self): + step = self._make_one(self.KIND, self.SUBSTEPS) + other = self._make_one(self.KIND, self.SUBSTEPS) + self.assertEqual(step, other) + + +class TestQueryPlanEntry(unittest.TestCase, _Base): + NAME = 'NAME' + ENTRY_ID = 1234 + WAIT_RATIO_AVG = 2.71828 + WAIT_RATIO_MAX = 3.14159 + READ_RATIO_AVG = 1.41421 + READ_RATIO_MAX = 1.73205 + COMPUTE_RATIO_AVG = 0.69315 + COMPUTE_RATIO_MAX = 1.09861 + WRITE_RATIO_AVG = 3.32193 + WRITE_RATIO_MAX = 2.30258 + RECORDS_READ = 100 + RECORDS_WRITTEN = 1 + STATUS = 'STATUS' + + @staticmethod + def _get_target_class(): + from google.cloud.bigquery.job import QueryPlanEntry + + return QueryPlanEntry + + def _make_one(self, *args, **kw): + return self._get_target_class()(*args, **kw) + + def test_ctor(self): + from google.cloud.bigquery.job import QueryPlanEntryStep + + steps = [QueryPlanEntryStep( + kind=TestQueryPlanEntryStep.KIND, + substeps=TestQueryPlanEntryStep.SUBSTEPS)] + entry = self._make_one( + name=self.NAME, + entry_id=self.ENTRY_ID, + wait_ratio_avg=self.WAIT_RATIO_AVG, + wait_ratio_max=self.WAIT_RATIO_MAX, + read_ratio_avg=self.READ_RATIO_AVG, + read_ratio_max=self.READ_RATIO_MAX, + compute_ratio_avg=self.COMPUTE_RATIO_AVG, + compute_ratio_max=self.COMPUTE_RATIO_MAX, + write_ratio_avg=self.WRITE_RATIO_AVG, + write_ratio_max=self.WRITE_RATIO_MAX, + records_read=self.RECORDS_READ, + records_written=self.RECORDS_WRITTEN, + status=self.STATUS, + steps=steps, + ) + self.assertEqual(entry.name, self.NAME) + self.assertEqual(entry.entry_id, self.ENTRY_ID) + self.assertEqual(entry.wait_ratio_avg, self.WAIT_RATIO_AVG) + self.assertEqual(entry.wait_ratio_max, self.WAIT_RATIO_MAX) + self.assertEqual(entry.read_ratio_avg, self.READ_RATIO_AVG) + self.assertEqual(entry.read_ratio_max, self.READ_RATIO_MAX) + self.assertEqual(entry.compute_ratio_avg, self.COMPUTE_RATIO_AVG) + self.assertEqual(entry.compute_ratio_max, self.COMPUTE_RATIO_MAX) + self.assertEqual(entry.write_ratio_avg, self.WRITE_RATIO_AVG) + self.assertEqual(entry.write_ratio_max, self.WRITE_RATIO_MAX) + self.assertEqual(entry.records_read, self.RECORDS_READ) + self.assertEqual(entry.records_written, self.RECORDS_WRITTEN) + self.assertEqual(entry.status, self.STATUS) + self.assertEqual(entry.steps, steps) + + def test_from_api_repr_empty(self): + klass = self._get_target_class() + + entry = klass.from_api_repr({}) + + self.assertIsNone(entry.name) + self.assertIsNone(entry.entry_id) + self.assertIsNone(entry.wait_ratio_avg) + self.assertIsNone(entry.wait_ratio_max) + self.assertIsNone(entry.read_ratio_avg) + self.assertIsNone(entry.read_ratio_max) + self.assertIsNone(entry.compute_ratio_avg) + self.assertIsNone(entry.compute_ratio_max) + self.assertIsNone(entry.write_ratio_avg) + self.assertIsNone(entry.write_ratio_max) + self.assertIsNone(entry.records_read) + self.assertIsNone(entry.records_written) + self.assertIsNone(entry.status) + self.assertEqual(entry.steps, []) + + def test_from_api_repr_normal(self): + from google.cloud.bigquery.job import QueryPlanEntryStep + + steps = [QueryPlanEntryStep( + kind=TestQueryPlanEntryStep.KIND, + substeps=TestQueryPlanEntryStep.SUBSTEPS)] + resource = { + 'name': self.NAME, + 'id': self.ENTRY_ID, + 'waitRatioAvg': self.WAIT_RATIO_AVG, + 'waitRatioMax': self.WAIT_RATIO_MAX, + 'readRatioAvg': self.READ_RATIO_AVG, + 'readRatioMax': self.READ_RATIO_MAX, + 'computeRatioAvg': self.COMPUTE_RATIO_AVG, + 'computeRatioMax': self.COMPUTE_RATIO_MAX, + 'writeRatioAvg': self.WRITE_RATIO_AVG, + 'writeRatioMax': self.WRITE_RATIO_MAX, + 'recordsRead': str(self.RECORDS_READ), + 'recordsWritten': str(self.RECORDS_WRITTEN), + 'status': self.STATUS, + 'steps': [{ + 'kind': TestQueryPlanEntryStep.KIND, + 'substeps': TestQueryPlanEntryStep.SUBSTEPS, + }] + } + klass = self._get_target_class() + + entry = klass.from_api_repr(resource) + self.assertEqual(entry.name, self.NAME) + self.assertEqual(entry.entry_id, self.ENTRY_ID) + self.assertEqual(entry.wait_ratio_avg, self.WAIT_RATIO_AVG) + self.assertEqual(entry.wait_ratio_max, self.WAIT_RATIO_MAX) + self.assertEqual(entry.read_ratio_avg, self.READ_RATIO_AVG) + self.assertEqual(entry.read_ratio_max, self.READ_RATIO_MAX) + self.assertEqual(entry.compute_ratio_avg, self.COMPUTE_RATIO_AVG) + self.assertEqual(entry.compute_ratio_max, self.COMPUTE_RATIO_MAX) + self.assertEqual(entry.write_ratio_avg, self.WRITE_RATIO_AVG) + self.assertEqual(entry.write_ratio_max, self.WRITE_RATIO_MAX) + self.assertEqual(entry.records_read, self.RECORDS_READ) + self.assertEqual(entry.records_written, self.RECORDS_WRITTEN) + self.assertEqual(entry.status, self.STATUS) + self.assertEqual(entry.steps, steps) + + class _Client(object): def __init__(self, project='project', connection=None): From 51d0f6283e9aa77bfaa6afe2443f5dfceb89a141 Mon Sep 17 00:00:00 2001 From: Tres Seaver Date: Thu, 7 Sep 2017 17:45:54 -0400 Subject: [PATCH 0179/2016] Add new scalar statistics properties to 'QueryJob' (#3800) * Add 'QueryJob.total_bytes_processed' property. * Add 'QueryJob.total_bytes_billed' property. * Add 'QueryJob.billing_tier' property. * Add 'QueryJob.cache_hit' property. * Add 'QueryJob.num_dml_affected_rows' property. * Add 'QueryJob.statement_type' property. --- .../google/cloud/bigquery/job.py | 87 ++++++++++++++++++ .../tests/unit/test_job.py | 89 +++++++++++++++++++ 2 files changed, 176 insertions(+) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/job.py b/packages/google-cloud-bigquery/google/cloud/bigquery/job.py index 75182a22e909..32ee5e535bf9 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/job.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/job.py @@ -1300,6 +1300,93 @@ def query_plan(self): plan_entries = self._job_statistics().get('queryPlan', ()) return [QueryPlanEntry.from_api_repr(entry) for entry in plan_entries] + @property + def total_bytes_processed(self): + """Return total bytes processed from job statistics, if present. + + See: + https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#statistics.query.totalBytesProcessed + + :rtype: int or None + :returns: total bytes processed by the job, or None if job is not + yet complete. + """ + result = self._job_statistics().get('totalBytesProcessed') + if result is not None: + result = int(result) + return result + + @property + def total_bytes_billed(self): + """Return total bytes billed from job statistics, if present. + + See: + https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#statistics.query.totalBytesBilled + + :rtype: int or None + :returns: total bytes processed by the job, or None if job is not + yet complete. + """ + result = self._job_statistics().get('totalBytesBilled') + if result is not None: + result = int(result) + return result + + @property + def billing_tier(self): + """Return billing tier from job statistics, if present. + + See: + https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#statistics.query.billingTier + + :rtype: int or None + :returns: billing tier used by the job, or None if job is not + yet complete. + """ + return self._job_statistics().get('billingTier') + + @property + def cache_hit(self): + """Return billing tier from job statistics, if present. + + See: + https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#statistics.query.cacheHit + + :rtype: bool or None + :returns: whether the query results were returned from cache, or None + if job is not yet complete. + """ + return self._job_statistics().get('cacheHit') + + @property + def num_dml_affected_rows(self): + """Return total bytes billed from job statistics, if present. + + See: + https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#statistics.query.numDmlAffectedRows + + :rtype: int or None + :returns: number of DML rows affectd by the job, or None if job is not + yet complete. + """ + result = self._job_statistics().get('numDmlAffectedRows') + if result is not None: + result = int(result) + return result + + @property + def statement_type(self): + """Return statement type from job statistics, if present. + + See: + https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#statistics.query.statementType + + :rtype: str or None + :returns: type of statement used by the job, or None if job is not + yet complete. + """ + return self._job_statistics().get('statementType') + def query_results(self): """Construct a QueryResults instance, bound to this job. diff --git a/packages/google-cloud-bigquery/tests/unit/test_job.py b/packages/google-cloud-bigquery/tests/unit/test_job.py index 2c8da8ddc89a..ecfcec83443c 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_job.py +++ b/packages/google-cloud-bigquery/tests/unit/test_job.py @@ -1744,6 +1744,95 @@ def test_query_plan(self): self.assertEqual(f_step.kind, e_step['kind']) self.assertEqual(f_step.substeps, e_step['substeps']) + def test_total_bytes_processed(self): + total_bytes = 1234 + client = _Client(self.PROJECT) + job = self._make_one(self.JOB_NAME, self.QUERY, client) + self.assertIsNone(job.total_bytes_processed) + + statistics = job._properties['statistics'] = {} + self.assertIsNone(job.total_bytes_processed) + + query_stats = statistics['query'] = {} + self.assertIsNone(job.total_bytes_processed) + + query_stats['totalBytesProcessed'] = str(total_bytes) + self.assertEqual(job.total_bytes_processed, total_bytes) + + def test_total_bytes_billed(self): + total_bytes = 1234 + client = _Client(self.PROJECT) + job = self._make_one(self.JOB_NAME, self.QUERY, client) + self.assertIsNone(job.total_bytes_billed) + + statistics = job._properties['statistics'] = {} + self.assertIsNone(job.total_bytes_billed) + + query_stats = statistics['query'] = {} + self.assertIsNone(job.total_bytes_billed) + + query_stats['totalBytesBilled'] = str(total_bytes) + self.assertEqual(job.total_bytes_billed, total_bytes) + + def test_billing_tier(self): + billing_tier = 1 + client = _Client(self.PROJECT) + job = self._make_one(self.JOB_NAME, self.QUERY, client) + self.assertIsNone(job.billing_tier) + + statistics = job._properties['statistics'] = {} + self.assertIsNone(job.billing_tier) + + query_stats = statistics['query'] = {} + self.assertIsNone(job.billing_tier) + + query_stats['billingTier'] = billing_tier + self.assertEqual(job.billing_tier, billing_tier) + + def test_cache_hit(self): + client = _Client(self.PROJECT) + job = self._make_one(self.JOB_NAME, self.QUERY, client) + self.assertIsNone(job.cache_hit) + + statistics = job._properties['statistics'] = {} + self.assertIsNone(job.cache_hit) + + query_stats = statistics['query'] = {} + self.assertIsNone(job.cache_hit) + + query_stats['cacheHit'] = True + self.assertTrue(job.cache_hit) + + def test_num_dml_affected_rows(self): + num_rows = 1234 + client = _Client(self.PROJECT) + job = self._make_one(self.JOB_NAME, self.QUERY, client) + self.assertIsNone(job.num_dml_affected_rows) + + statistics = job._properties['statistics'] = {} + self.assertIsNone(job.num_dml_affected_rows) + + query_stats = statistics['query'] = {} + self.assertIsNone(job.num_dml_affected_rows) + + query_stats['numDmlAffectedRows'] = str(num_rows) + self.assertEqual(job.num_dml_affected_rows, num_rows) + + def test_statement_type(self): + statement_type = 'SELECT' + client = _Client(self.PROJECT) + job = self._make_one(self.JOB_NAME, self.QUERY, client) + self.assertIsNone(job.statement_type) + + statistics = job._properties['statistics'] = {} + self.assertIsNone(job.statement_type) + + query_stats = statistics['query'] = {} + self.assertIsNone(job.statement_type) + + query_stats['statementType'] = statement_type + self.assertEqual(job.statement_type, statement_type) + def test_query_results(self): from google.cloud.bigquery.query import QueryResults From 61df9fd45a8598764bb3de1fad8e0ec41be2fc54 Mon Sep 17 00:00:00 2001 From: Tres Seaver Date: Fri, 8 Sep 2017 07:49:02 -0400 Subject: [PATCH 0180/2016] Add 'QueryJob.referenced_tables' property. (#3801) --- .../google/cloud/bigquery/job.py | 30 ++++++++++ .../tests/unit/test_job.py | 58 ++++++++++++++++++- 2 files changed, 86 insertions(+), 2 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/job.py b/packages/google-cloud-bigquery/google/cloud/bigquery/job.py index 32ee5e535bf9..6299b0821c58 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/job.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/job.py @@ -1387,6 +1387,36 @@ def statement_type(self): """ return self._job_statistics().get('statementType') + @property + def referenced_tables(self): + """Return referenced tables from job statistics, if present. + + See: + https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#statistics.query.referencedTables + + :rtype: list of dict + :returns: mappings describing the query plan, or an empty list + if the query has not yet completed. + """ + tables = [] + client = self._require_client(None) + datasets_by_project_name = {} + + for table in self._job_statistics().get('referencedTables', ()): + + t_project = table['projectId'] + + ds_name = table['datasetId'] + t_dataset = datasets_by_project_name.get((t_project, ds_name)) + if t_dataset is None: + t_dataset = client.dataset(ds_name, project=t_project) + datasets_by_project_name[(t_project, ds_name)] = t_dataset + + t_name = table['tableId'] + tables.append(t_dataset.table(t_name)) + + return tables + def query_results(self): """Construct a QueryResults instance, bound to this job. diff --git a/packages/google-cloud-bigquery/tests/unit/test_job.py b/packages/google-cloud-bigquery/tests/unit/test_job.py index ecfcec83443c..886539a9d55b 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_job.py +++ b/packages/google-cloud-bigquery/tests/unit/test_job.py @@ -1833,6 +1833,60 @@ def test_statement_type(self): query_stats['statementType'] = statement_type self.assertEqual(job.statement_type, statement_type) + def test_referenced_tables(self): + from google.cloud.bigquery.dataset import Dataset + from google.cloud.bigquery.table import Table + + ref_tables_resource = [{ + 'projectId': self.PROJECT, + 'datasetId': 'dataset', + 'tableId': 'local1', + }, { + + 'projectId': self.PROJECT, + 'datasetId': 'dataset', + 'tableId': 'local2', + }, { + + 'projectId': 'other-project-123', + 'datasetId': 'other-dataset', + 'tableId': 'other-table', + }] + client = _Client(self.PROJECT) + job = self._make_one(self.JOB_NAME, self.QUERY, client) + self.assertEqual(job.referenced_tables, []) + + statistics = job._properties['statistics'] = {} + self.assertEqual(job.referenced_tables, []) + + query_stats = statistics['query'] = {} + self.assertEqual(job.referenced_tables, []) + + query_stats['referencedTables'] = ref_tables_resource + + local1, local2, remote = job.referenced_tables + + self.assertIsInstance(local1, Table) + self.assertEqual(local1.name, 'local1') + self.assertIsInstance(local1._dataset, Dataset) + self.assertEqual(local1.dataset_name, 'dataset') + self.assertEqual(local1.project, self.PROJECT) + self.assertIs(local1._dataset._client, client) + + self.assertIsInstance(local2, Table) + self.assertEqual(local2.name, 'local2') + self.assertIsInstance(local2._dataset, Dataset) + self.assertEqual(local2.dataset_name, 'dataset') + self.assertEqual(local2.project, self.PROJECT) + self.assertIs(local2._dataset._client, client) + + self.assertIsInstance(remote, Table) + self.assertEqual(remote.name, 'other-table') + self.assertIsInstance(remote._dataset, Dataset) + self.assertEqual(remote.dataset_name, 'other-dataset') + self.assertEqual(remote.project, 'other-project-123') + self.assertIs(remote._dataset._client, client) + def test_query_results(self): from google.cloud.bigquery.query import QueryResults @@ -2490,10 +2544,10 @@ def __init__(self, project='project', connection=None): self.project = project self._connection = connection - def dataset(self, name): + def dataset(self, name, project=None): from google.cloud.bigquery.dataset import Dataset - return Dataset(name, client=self) + return Dataset(name, client=self, project=project) def _get_query_results(self, job_id): from google.cloud.bigquery.query import QueryResults From 28d8c44508520fad865331a2f72c3d44c6bb81dc Mon Sep 17 00:00:00 2001 From: Tres Seaver Date: Fri, 8 Sep 2017 07:57:46 -0400 Subject: [PATCH 0181/2016] Add 'QueryJob.undeclared_query_parameters' property. (#3802) --- .../google/cloud/bigquery/job.py | 33 ++++++++ .../tests/unit/test_job.py | 75 +++++++++++++++++++ 2 files changed, 108 insertions(+) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/job.py b/packages/google-cloud-bigquery/google/cloud/bigquery/job.py index 6299b0821c58..17f75f145c8a 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/job.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/job.py @@ -28,7 +28,10 @@ from google.cloud.bigquery.table import Table from google.cloud.bigquery.table import _build_schema_resource from google.cloud.bigquery.table import _parse_schema_resource +from google.cloud.bigquery._helpers import ArrayQueryParameter from google.cloud.bigquery._helpers import QueryParametersProperty +from google.cloud.bigquery._helpers import ScalarQueryParameter +from google.cloud.bigquery._helpers import StructQueryParameter from google.cloud.bigquery._helpers import UDFResourcesProperty from google.cloud.bigquery._helpers import _EnumProperty from google.cloud.bigquery._helpers import _TypedProperty @@ -1417,6 +1420,36 @@ def referenced_tables(self): return tables + @property + def undeclared_query_paramters(self): + """Return undeclared query parameters from job statistics, if present. + + See: + https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#statistics.query.undeclaredQueryParamters + + :rtype: + list of + :class:`~google.cloud.bigquery._helpers.AbstractQueryParameter` + :returns: undeclared parameters, or an empty list if the query has + not yet completed. + """ + parameters = [] + undeclared = self._job_statistics().get('undeclaredQueryParamters', ()) + + for parameter in undeclared: + p_type = parameter['parameterType'] + + if 'arrayType' in p_type: + klass = ArrayQueryParameter + elif 'structTypes' in p_type: + klass = StructQueryParameter + else: + klass = ScalarQueryParameter + + parameters.append(klass.from_api_repr(parameter)) + + return parameters + def query_results(self): """Construct a QueryResults instance, bound to this job. diff --git a/packages/google-cloud-bigquery/tests/unit/test_job.py b/packages/google-cloud-bigquery/tests/unit/test_job.py index 886539a9d55b..ccb101b184f1 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_job.py +++ b/packages/google-cloud-bigquery/tests/unit/test_job.py @@ -1887,6 +1887,81 @@ def test_referenced_tables(self): self.assertEqual(remote.project, 'other-project-123') self.assertIs(remote._dataset._client, client) + def test_undeclared_query_paramters(self): + from google.cloud.bigquery._helpers import ArrayQueryParameter + from google.cloud.bigquery._helpers import ScalarQueryParameter + from google.cloud.bigquery._helpers import StructQueryParameter + + undeclared = [{ + 'name': 'my_scalar', + 'parameterType': { + 'type': 'STRING', + }, + 'parameterValue': { + 'value': 'value', + }, + }, { + 'name': 'my_array', + 'parameterType': { + 'type': 'ARRAY', + 'arrayType': { + 'type': 'INT64', + }, + }, + 'parameterValue': { + 'arrayValues': [ + {'value': '1066'}, + {'value': '1745'}, + ], + }, + }, { + 'name': 'my_struct', + 'parameterType': { + 'type': 'STRUCT', + 'structTypes': [{ + 'name': 'count', + 'type': { + 'type': 'INT64', + } + }], + }, + 'parameterValue': { + 'structValues': { + 'count': { + 'value': '123', + }, + } + }, + }] + client = _Client(self.PROJECT) + job = self._make_one(self.JOB_NAME, self.QUERY, client) + self.assertEqual(job.undeclared_query_paramters, []) + + statistics = job._properties['statistics'] = {} + self.assertEqual(job.undeclared_query_paramters, []) + + query_stats = statistics['query'] = {} + self.assertEqual(job.undeclared_query_paramters, []) + + query_stats['undeclaredQueryParamters'] = undeclared + + scalar, array, struct = job.undeclared_query_paramters + + self.assertIsInstance(scalar, ScalarQueryParameter) + self.assertEqual(scalar.name, 'my_scalar') + self.assertEqual(scalar.type_, 'STRING') + self.assertEqual(scalar.value, 'value') + + self.assertIsInstance(array, ArrayQueryParameter) + self.assertEqual(array.name, 'my_array') + self.assertEqual(array.array_type, 'INT64') + self.assertEqual(array.values, [1066, 1745]) + + self.assertIsInstance(struct, StructQueryParameter) + self.assertEqual(struct.name, 'my_struct') + self.assertEqual(struct.struct_types, {'count': 'INT64'}) + self.assertEqual(struct.struct_values, {'count': 123}) + def test_query_results(self): from google.cloud.bigquery.query import QueryResults From 99cb6cf2cc09cb4582d1068fb32cb4151f55ef1a Mon Sep 17 00:00:00 2001 From: Tres Seaver Date: Fri, 8 Sep 2017 10:25:23 -0400 Subject: [PATCH 0182/2016] Fix system test broken by PR #3798. (#3936) --- packages/google-cloud-bigquery/tests/system.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/packages/google-cloud-bigquery/tests/system.py b/packages/google-cloud-bigquery/tests/system.py index 0d5b9918fef3..f5188118918e 100644 --- a/packages/google-cloud-bigquery/tests/system.py +++ b/packages/google-cloud-bigquery/tests/system.py @@ -156,14 +156,14 @@ def test_update_dataset(self): self.to_delete.append(dataset) self.assertTrue(dataset.exists()) - after = [grant for grant in dataset.access_grants - if grant.entity_id != 'projectWriters'] - dataset.access_grants = after + after = [entry for entry in dataset.access_entries + if entry.entity_id != 'projectWriters'] + dataset.access_entries = after retry_403(dataset.update)() - self.assertEqual(len(dataset.access_grants), len(after)) - for found, expected in zip(dataset.access_grants, after): + self.assertEqual(len(dataset.access_entries), len(after)) + for found, expected in zip(dataset.access_entries, after): self.assertEqual(found.role, expected.role) self.assertEqual(found.entity_type, expected.entity_type) self.assertEqual(found.entity_id, expected.entity_id) From 11b5e8756342c28b78d3b9d6d1287fdd792432fd Mon Sep 17 00:00:00 2001 From: Tres Seaver Date: Mon, 11 Sep 2017 12:32:05 -0400 Subject: [PATCH 0183/2016] Add 'Client.get_job' API wrapper. (#3804) * Allow assigning 'None' to '_TypedProperty' properties. * Ensure that configuration properties are copied when (re)loading jobs. --- packages/google-cloud-bigquery/.coveragerc | 2 + .../google/cloud/bigquery/_helpers.py | 38 ++++++ .../google/cloud/bigquery/client.py | 61 ++++++--- .../google/cloud/bigquery/job.py | 106 ++++++++++++++- .../tests/unit/test__helpers.py | 122 ++++++++++++++++++ .../tests/unit/test_client.py | 64 +++++++++ .../tests/unit/test_job.py | 105 +++++++++++---- 7 files changed, 457 insertions(+), 41 deletions(-) diff --git a/packages/google-cloud-bigquery/.coveragerc b/packages/google-cloud-bigquery/.coveragerc index a54b99aa14b7..d097511c3124 100644 --- a/packages/google-cloud-bigquery/.coveragerc +++ b/packages/google-cloud-bigquery/.coveragerc @@ -9,3 +9,5 @@ exclude_lines = pragma: NO COVER # Ignore debug-only repr def __repr__ + # Ignore abstract methods + raise NotImplementedError diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py b/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py index ac6e9759c084..5f6edf67dca5 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py @@ -316,6 +316,8 @@ def _validate(self, value): :raises: ValueError on a type mismatch. """ + if value is None: + return if not isinstance(value, self.property_type): raise ValueError('Required type: %s' % (self.property_type,)) @@ -413,6 +415,14 @@ def __init__(self, name, type_, value): self.type_ = type_ self.value = value + def __eq__(self, other): + if not isinstance(other, ScalarQueryParameter): + return NotImplemented + return( + self.name == other.name and + self.type_ == other.type_ and + self.value == other.value) + @classmethod def positional(cls, type_, value): """Factory for positional paramater. @@ -515,6 +525,14 @@ def __init__(self, name, array_type, values): self.array_type = array_type self.values = values + def __eq__(self, other): + if not isinstance(other, ArrayQueryParameter): + return NotImplemented + return( + self.name == other.name and + self.array_type == other.array_type and + self.values == other.values) + @classmethod def positional(cls, array_type, values): """Factory for positional parameters. @@ -657,6 +675,14 @@ def __init__(self, name, *sub_params): types[sub.name] = sub.type_ values[sub.name] = sub.value + def __eq__(self, other): + if not isinstance(other, StructQueryParameter): + return NotImplemented + return( + self.name == other.name and + self.struct_types == other.struct_types and + self.struct_values == other.struct_values) + @classmethod def positional(cls, *sub_params): """Factory for positional parameters. @@ -770,6 +796,18 @@ def __repr__(self): return 'StructQueryParameter{}'.format(self._key()) +def _query_param_from_api_repr(resource): + """Helper: construct concrete query parameter from JSON resource.""" + qp_type = resource['parameterType'] + if 'arrayType' in qp_type: + klass = ArrayQueryParameter + elif 'structTypes' in qp_type: + klass = StructQueryParameter + else: + klass = ScalarQueryParameter + return klass.from_api_repr(resource) + + class QueryParametersProperty(object): """Custom property type, holding query parameter instances.""" diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py index 5578064c1cc7..2ae577a51708 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py @@ -222,6 +222,35 @@ def job_from_resource(self, resource): return QueryJob.from_api_repr(resource, self) raise ValueError('Cannot parse job resource') + def get_job(self, job_id, project=None): + """Fetch a job for the project associated with this client. + + See + https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs/get + + :type job_id: str + :param job_id: Name of the job. + + :type project: str + :param project: + project ID owning the job (defaults to the client's project) + + :rtype: :class:`~google.cloud.bigquery.job._AsyncJob` + :returns: + Concrete job instance, based on the resource returned by the API. + """ + extra_params = {'projection': 'full'} + + if project is None: + project = self.project + + path = '/projects/{}/jobs/{}'.format(project, job_id) + + resource = self._connection.api_request( + method='GET', path=path, query_params=extra_params) + + return self.job_from_resource(resource) + def list_jobs(self, max_results=None, page_token=None, all_users=None, state_filter=None): """List jobs for the project associated with this client. @@ -272,14 +301,14 @@ def list_jobs(self, max_results=None, page_token=None, all_users=None, max_results=max_results, extra_params=extra_params) - def load_table_from_storage(self, job_name, destination, *source_uris): + def load_table_from_storage(self, job_id, destination, *source_uris): """Construct a job for loading data into a table from CloudStorage. See https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.load - :type job_name: str - :param job_name: Name of the job. + :type job_id: str + :param job_id: Name of the job. :type destination: :class:`google.cloud.bigquery.table.Table` :param destination: Table into which data is to be loaded. @@ -291,16 +320,16 @@ def load_table_from_storage(self, job_name, destination, *source_uris): :rtype: :class:`google.cloud.bigquery.job.LoadJob` :returns: a new ``LoadJob`` instance """ - return LoadJob(job_name, destination, source_uris, client=self) + return LoadJob(job_id, destination, source_uris, client=self) - def copy_table(self, job_name, destination, *sources): + def copy_table(self, job_id, destination, *sources): """Construct a job for copying one or more tables into another table. See https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.copy - :type job_name: str - :param job_name: Name of the job. + :type job_id: str + :param job_id: Name of the job. :type destination: :class:`google.cloud.bigquery.table.Table` :param destination: Table into which data is to be copied. @@ -311,16 +340,16 @@ def copy_table(self, job_name, destination, *sources): :rtype: :class:`google.cloud.bigquery.job.CopyJob` :returns: a new ``CopyJob`` instance """ - return CopyJob(job_name, destination, sources, client=self) + return CopyJob(job_id, destination, sources, client=self) - def extract_table_to_storage(self, job_name, source, *destination_uris): + def extract_table_to_storage(self, job_id, source, *destination_uris): """Construct a job for extracting a table into Cloud Storage files. See https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.extract - :type job_name: str - :param job_name: Name of the job. + :type job_id: str + :param job_id: Name of the job. :type source: :class:`google.cloud.bigquery.table.Table` :param source: table to be extracted. @@ -333,17 +362,17 @@ def extract_table_to_storage(self, job_name, source, *destination_uris): :rtype: :class:`google.cloud.bigquery.job.ExtractJob` :returns: a new ``ExtractJob`` instance """ - return ExtractJob(job_name, source, destination_uris, client=self) + return ExtractJob(job_id, source, destination_uris, client=self) - def run_async_query(self, job_name, query, + def run_async_query(self, job_id, query, udf_resources=(), query_parameters=()): """Construct a job for running a SQL query asynchronously. See https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query - :type job_name: str - :param job_name: Name of the job. + :type job_id: str + :param job_id: Name of the job. :type query: str :param query: SQL query to be executed @@ -362,7 +391,7 @@ def run_async_query(self, job_name, query, :rtype: :class:`google.cloud.bigquery.job.QueryJob` :returns: a new ``QueryJob`` instance """ - return QueryJob(job_name, query, client=self, + return QueryJob(job_id, query, client=self, udf_resources=udf_resources, query_parameters=query_parameters) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/job.py b/packages/google-cloud-bigquery/google/cloud/bigquery/job.py index 17f75f145c8a..014eb2ee2740 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/job.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/job.py @@ -32,8 +32,10 @@ from google.cloud.bigquery._helpers import QueryParametersProperty from google.cloud.bigquery._helpers import ScalarQueryParameter from google.cloud.bigquery._helpers import StructQueryParameter +from google.cloud.bigquery._helpers import UDFResource from google.cloud.bigquery._helpers import UDFResourcesProperty from google.cloud.bigquery._helpers import _EnumProperty +from google.cloud.bigquery._helpers import _query_param_from_api_repr from google.cloud.bigquery._helpers import _TypedProperty _DONE_STATE = 'DONE' @@ -61,6 +63,22 @@ } +def _bool_or_none(value): + """Helper: deserialize boolean value from JSON string.""" + if isinstance(value, bool): + return value + if value is not None: + return value.lower() in ['t', 'true', '1'] + + +def _int_or_none(value): + """Helper: deserialize int value from JSON string.""" + if isinstance(value, int): + return value + if value is not None: + return int(value) + + def _error_result_to_exception(error_result): """Maps BigQuery error reasons to an exception. @@ -311,6 +329,10 @@ def _scrub_local_properties(self, cleaned): """Helper: handle subclass properties in cleaned.""" pass + def _copy_configuration_properties(self, configuration): + """Helper: assign subclass configuration properties in cleaned.""" + raise NotImplementedError("Abstract") + def _set_properties(self, api_response): """Update properties from resource in body of ``api_response`` @@ -330,6 +352,8 @@ def _set_properties(self, api_response): self._properties.clear() self._properties.update(cleaned) + configuration = cleaned['configuration'][self._JOB_TYPE] + self._copy_configuration_properties(configuration) # For Future interface self._set_future_result() @@ -731,7 +755,7 @@ def _populate_config_resource(self, configuration): if self.quote_character is not None: configuration['quote'] = self.quote_character if self.skip_leading_rows is not None: - configuration['skipLeadingRows'] = self.skip_leading_rows + configuration['skipLeadingRows'] = str(self.skip_leading_rows) if self.source_format is not None: configuration['sourceFormat'] = self.source_format if self.write_disposition is not None: @@ -769,6 +793,28 @@ def _scrub_local_properties(self, cleaned): schema = cleaned.pop('schema', {'fields': ()}) self.schema = _parse_schema_resource(schema) + def _copy_configuration_properties(self, configuration): + """Helper: assign subclass configuration properties in cleaned.""" + self.allow_jagged_rows = _bool_or_none( + configuration.get('allowJaggedRows')) + self.allow_quoted_newlines = _bool_or_none( + configuration.get('allowQuotedNewlines')) + self.autodetect = _bool_or_none( + configuration.get('autodetect')) + self.create_disposition = configuration.get('createDisposition') + self.encoding = configuration.get('encoding') + self.field_delimiter = configuration.get('fieldDelimiter') + self.ignore_unknown_values = _bool_or_none( + configuration.get('ignoreUnknownValues')) + self.max_bad_records = _int_or_none( + configuration.get('maxBadRecords')) + self.null_marker = configuration.get('nullMarker') + self.quote_character = configuration.get('quote') + self.skip_leading_rows = _int_or_none( + configuration.get('skipLeadingRows')) + self.source_format = configuration.get('sourceFormat') + self.write_disposition = configuration.get('writeDisposition') + @classmethod def from_api_repr(cls, resource, client): """Factory: construct a job given its API representation @@ -879,6 +925,11 @@ def _build_resource(self): return resource + def _copy_configuration_properties(self, configuration): + """Helper: assign subclass configuration properties in cleaned.""" + self.create_disposition = configuration.get('createDisposition') + self.write_disposition = configuration.get('writeDisposition') + @classmethod def from_api_repr(cls, resource, client): """Factory: construct a job given its API representation @@ -1012,6 +1063,14 @@ def _build_resource(self): return resource + def _copy_configuration_properties(self, configuration): + """Helper: assign subclass configuration properties in cleaned.""" + self.compression = configuration.get('compression') + self.destination_format = configuration.get('destinationFormat') + self.field_delimiter = configuration.get('fieldDelimiter') + self.print_header = _bool_or_none( + configuration.get('printHeader')) + @classmethod def from_api_repr(cls, resource, client): """Factory: construct a job given its API representation @@ -1208,7 +1267,8 @@ def _populate_config_resource(self, configuration): if self.maximum_billing_tier is not None: configuration['maximumBillingTier'] = self.maximum_billing_tier if self.maximum_bytes_billed is not None: - configuration['maximumBytesBilled'] = self.maximum_bytes_billed + configuration['maximumBytesBilled'] = str( + self.maximum_bytes_billed) if len(self._udf_resources) > 0: configuration[self._UDF_KEY] = [ {udf_resource.udf_type: udf_resource.value} @@ -1258,6 +1318,25 @@ def _scrub_local_properties(self, cleaned): configuration = cleaned['configuration']['query'] self.query = configuration['query'] + + def _copy_configuration_properties(self, configuration): + """Helper: assign subclass configuration properties in cleaned.""" + self.allow_large_results = _bool_or_none( + configuration.get('allowLargeResults')) + self.flatten_results = _bool_or_none( + configuration.get('flattenResults')) + self.use_query_cache = _bool_or_none( + configuration.get('useQueryCache')) + self.use_legacy_sql = _bool_or_none( + configuration.get('useLegacySql')) + + self.create_disposition = configuration.get('createDisposition') + self.priority = configuration.get('priority') + self.write_disposition = configuration.get('writeDisposition') + self.maximum_billing_tier = configuration.get('maximumBillingTier') + self.maximum_bytes_billed = _int_or_none( + configuration.get('maximumBytesBilled')) + dest_remote = configuration.get('destinationTable') if dest_remote is None: @@ -1266,9 +1345,30 @@ def _scrub_local_properties(self, cleaned): else: dest_local = self._destination_table_resource() if dest_remote != dest_local: - dataset = self._client.dataset(dest_remote['datasetId']) + project = dest_remote['projectId'] + dataset = self._client.dataset( + dest_remote['datasetId'], project=project) self.destination = dataset.table(dest_remote['tableId']) + def_ds = configuration.get('defaultDataset') + if def_ds is None: + if self.default_dataset is not None: + del self.default_dataset + else: + project = def_ds['projectId'] + self.default_dataset = self._client.dataset(def_ds['datasetId']) + + udf_resources = [] + for udf_mapping in configuration.get(self._UDF_KEY, ()): + key_val, = udf_mapping.items() + udf_resources.append(UDFResource(key_val[0], key_val[1])) + self._udf_resources = udf_resources + + self._query_parameters = [ + _query_param_from_api_repr(mapping) + for mapping in configuration.get(self._QUERY_PARAMETERS_KEY, ()) + ] + @classmethod def from_api_repr(cls, resource, client): """Factory: construct a job given its API representation diff --git a/packages/google-cloud-bigquery/tests/unit/test__helpers.py b/packages/google-cloud-bigquery/tests/unit/test__helpers.py index 9dc14f6e3a47..c43f7b6d1ae3 100644 --- a/packages/google-cloud-bigquery/tests/unit/test__helpers.py +++ b/packages/google-cloud-bigquery/tests/unit/test__helpers.py @@ -736,6 +736,14 @@ def __init__(self): self.assertEqual(wrapper.attr, 42) self.assertEqual(wrapper._configuration._attr, 42) + wrapper.attr = None + self.assertIsNone(wrapper.attr) + self.assertIsNone(wrapper._configuration._attr) + + wrapper.attr = 23 + self.assertEqual(wrapper.attr, 23) + self.assertEqual(wrapper._configuration._attr, 23) + del wrapper.attr self.assertIsNone(wrapper.attr) self.assertIsNone(wrapper._configuration._attr) @@ -899,6 +907,17 @@ def test_ctor(self): self.assertEqual(param.type_, 'INT64') self.assertEqual(param.value, 123) + def test___eq__(self): + param = self._make_one(name='foo', type_='INT64', value=123) + self.assertEqual(param, param) + self.assertNotEqual(param, object()) + alias = self._make_one(name='bar', type_='INT64', value=123) + self.assertNotEqual(param, alias) + wrong_type = self._make_one(name='foo', type_='FLOAT64', value=123.0) + self.assertNotEqual(param, wrong_type) + wrong_val = self._make_one(name='foo', type_='INT64', value=234) + self.assertNotEqual(param, wrong_val) + def test_positional(self): klass = self._get_target_class() param = klass.positional(type_='INT64', value=123) @@ -1171,6 +1190,19 @@ def test_ctor(self): self.assertEqual(param.array_type, 'INT64') self.assertEqual(param.values, [1, 2]) + def test___eq__(self): + param = self._make_one(name='foo', array_type='INT64', values=[123]) + self.assertEqual(param, param) + self.assertNotEqual(param, object()) + alias = self._make_one(name='bar', array_type='INT64', values=[123]) + self.assertNotEqual(param, alias) + wrong_type = self._make_one( + name='foo', array_type='FLOAT64', values=[123.0]) + self.assertNotEqual(param, wrong_type) + wrong_val = self._make_one( + name='foo', array_type='INT64', values=[234]) + self.assertNotEqual(param, wrong_val) + def test_positional(self): klass = self._get_target_class() param = klass.positional(array_type='INT64', values=[1, 2]) @@ -1446,6 +1478,21 @@ def test_ctor(self): self.assertEqual(param.struct_types, {'bar': 'INT64', 'baz': 'STRING'}) self.assertEqual(param.struct_values, {'bar': 123, 'baz': 'abc'}) + def test___eq__(self): + sub_1 = _make_subparam('bar', 'INT64', 123) + sub_2 = _make_subparam('baz', 'STRING', 'abc') + sub_3 = _make_subparam('baz', 'STRING', 'def') + sub_1_float = _make_subparam('bar', 'FLOAT64', 123.0) + param = self._make_one('foo', sub_1, sub_2) + self.assertEqual(param, param) + self.assertNotEqual(param, object()) + alias = self._make_one('bar', sub_1, sub_2) + self.assertNotEqual(param, alias) + wrong_type = self._make_one('foo', sub_1_float, sub_2) + self.assertNotEqual(param, wrong_type) + wrong_val = self._make_one('foo', sub_2, sub_3) + self.assertNotEqual(param, wrong_val) + def test_positional(self): sub_1 = _make_subparam('bar', 'INT64', 123) sub_2 = _make_subparam('baz', 'STRING', 'abc') @@ -1749,6 +1796,81 @@ def test___repr__(self): self.assertIn("'field1': 'hello'", got) +class Test__query_param_from_api_repr(unittest.TestCase): + + @staticmethod + def _call_fut(resource): + from google.cloud.bigquery._helpers import _query_param_from_api_repr + + return _query_param_from_api_repr(resource) + + def test_w_scalar(self): + from google.cloud.bigquery._helpers import ScalarQueryParameter + + RESOURCE = { + 'name': 'foo', + 'parameterType': {'type': 'INT64'}, + 'parameterValue': {'value': '123'}, + } + + parameter = self._call_fut(RESOURCE) + + self.assertIsInstance(parameter, ScalarQueryParameter) + self.assertEqual(parameter.name, 'foo') + self.assertEqual(parameter.type_, 'INT64') + self.assertEqual(parameter.value, 123) + + def test_w_array(self): + from google.cloud.bigquery._helpers import ArrayQueryParameter + + RESOURCE = { + 'name': 'foo', + 'parameterType': { + 'type': 'ARRAY', + 'arrayType': {'type': 'INT64'}, + }, + 'parameterValue': { + 'arrayValues': [ + {'value': '123'}, + ]}, + } + + parameter = self._call_fut(RESOURCE) + + self.assertIsInstance(parameter, ArrayQueryParameter) + self.assertEqual(parameter.name, 'foo') + self.assertEqual(parameter.array_type, 'INT64') + self.assertEqual(parameter.values, [123]) + + def test_w_struct(self): + from google.cloud.bigquery._helpers import StructQueryParameter + + RESOURCE = { + 'name': 'foo', + 'parameterType': { + 'type': 'STRUCT', + 'structTypes': [ + {'name': 'foo', 'type': {'type': 'STRING'}}, + {'name': 'bar', 'type': {'type': 'INT64'}}, + ], + }, + 'parameterValue': { + 'structValues': { + 'foo': {'value': 'Foo'}, + 'bar': {'value': '123'}, + } + }, + } + + parameter = self._call_fut(RESOURCE) + + self.assertIsInstance(parameter, StructQueryParameter) + self.assertEqual(parameter.name, 'foo') + self.assertEqual( + parameter.struct_types, {'foo': 'STRING', 'bar': 'INT64'}) + self.assertEqual(parameter.struct_values, {'foo': 'Foo', 'bar': 123}) + + class Test_QueryParametersProperty(unittest.TestCase): @staticmethod diff --git a/packages/google-cloud-bigquery/tests/unit/test_client.py b/packages/google-cloud-bigquery/tests/unit/test_client.py index 6a9a98f2952e..3cd4a24ceb43 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_client.py +++ b/packages/google-cloud-bigquery/tests/unit/test_client.py @@ -266,6 +266,70 @@ def test_job_from_resource_unknown_type(self): with self.assertRaises(ValueError): client.job_from_resource({'configuration': {'nonesuch': {}}}) + def test_get_job_miss_w_explict_project(self): + from google.cloud.exceptions import NotFound + + PROJECT = 'PROJECT' + OTHER_PROJECT = 'OTHER_PROJECT' + JOB_ID = 'NONESUCH' + creds = _make_credentials() + client = self._make_one(PROJECT, creds) + conn = client._connection = _Connection() + + with self.assertRaises(NotFound): + client.get_job(JOB_ID, project=OTHER_PROJECT) + + self.assertEqual(len(conn._requested), 1) + req = conn._requested[0] + self.assertEqual(req['method'], 'GET') + self.assertEqual(req['path'], '/projects/OTHER_PROJECT/jobs/NONESUCH') + self.assertEqual(req['query_params'], {'projection': 'full'}) + + def test_get_job_hit(self): + from google.cloud.bigquery.job import QueryJob + + PROJECT = 'PROJECT' + JOB_ID = 'query_job' + DATASET = 'test_dataset' + QUERY_DESTINATION_TABLE = 'query_destination_table' + QUERY = 'SELECT * from test_dataset:test_table' + ASYNC_QUERY_DATA = { + 'id': '{}:{}'.format(PROJECT, JOB_ID), + 'jobReference': { + 'projectId': PROJECT, + 'jobId': 'query_job', + }, + 'state': 'DONE', + 'configuration': { + 'query': { + 'query': QUERY, + 'destinationTable': { + 'projectId': PROJECT, + 'datasetId': DATASET, + 'tableId': QUERY_DESTINATION_TABLE, + }, + 'createDisposition': 'CREATE_IF_NEEDED', + 'writeDisposition': 'WRITE_TRUNCATE', + } + }, + } + creds = _make_credentials() + client = self._make_one(PROJECT, creds) + conn = client._connection = _Connection(ASYNC_QUERY_DATA) + + job = client.get_job(JOB_ID) + + self.assertIsInstance(job, QueryJob) + self.assertEqual(job.name, JOB_ID) + self.assertEqual(job.create_disposition, 'CREATE_IF_NEEDED') + self.assertEqual(job.write_disposition, 'WRITE_TRUNCATE') + + self.assertEqual(len(conn._requested), 1) + req = conn._requested[0] + self.assertEqual(req['method'], 'GET') + self.assertEqual(req['path'], '/projects/PROJECT/jobs/query_job') + self.assertEqual(req['query_params'], {'projection': 'full'}) + def test_list_jobs_defaults(self): import six from google.cloud.bigquery.job import LoadJob diff --git a/packages/google-cloud-bigquery/tests/unit/test_job.py b/packages/google-cloud-bigquery/tests/unit/test_job.py index ccb101b184f1..9e684faf4f8b 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_job.py +++ b/packages/google-cloud-bigquery/tests/unit/test_job.py @@ -18,9 +18,49 @@ import unittest +class Test__bool_or_none(unittest.TestCase): + + def _call_fut(self, *args, **kwargs): + from google.cloud.bigquery import job + + return job._bool_or_none(*args, **kwargs) + + def test_w_bool(self): + self.assertTrue(self._call_fut(True)) + self.assertFalse(self._call_fut(False)) + + def test_w_none(self): + self.assertIsNone(self._call_fut(None)) + + def test_w_str(self): + self.assertTrue(self._call_fut('1')) + self.assertTrue(self._call_fut('t')) + self.assertTrue(self._call_fut('true')) + self.assertFalse(self._call_fut('anything else')) + + +class Test__int_or_none(unittest.TestCase): + + def _call_fut(self, *args, **kwargs): + from google.cloud.bigquery import job + + return job._int_or_none(*args, **kwargs) + + def test_w_int(self): + self.assertEqual(self._call_fut(13), 13) + + def test_w_none(self): + self.assertIsNone(self._call_fut(None)) + + def test_w_str(self): + self.assertEqual(self._call_fut('13'), 13) + + class Test__error_result_to_exception(unittest.TestCase): + def _call_fut(self, *args, **kwargs): from google.cloud.bigquery import job + return job._error_result_to_exception(*args, **kwargs) def test_simple(self): @@ -259,7 +299,7 @@ def _verifyResourceProperties(self, job, resource): else: self.assertIsNone(job.quote_character) if 'skipLeadingRows' in config: - self.assertEqual(job.skip_leading_rows, + self.assertEqual(str(job.skip_leading_rows), config['skipLeadingRows']) else: self.assertIsNone(job.skip_leading_rows) @@ -517,10 +557,12 @@ def test_from_api_repr_bare(self): def test_from_api_repr_w_properties(self): client = _Client(self.PROJECT) RESOURCE = self._makeResource() + load_config = RESOURCE['configuration']['load'] + load_config['createDisposition'] = 'CREATE_IF_NEEDED' klass = self._get_target_class() - dataset = klass.from_api_repr(RESOURCE, client=client) - self.assertIs(dataset._client, client) - self._verifyResourceProperties(dataset, RESOURCE) + job = klass.from_api_repr(RESOURCE, client=client) + self.assertIs(job._client, client) + self._verifyResourceProperties(job, RESOURCE) def test_begin_w_already_running(self): conn = _Connection() @@ -632,7 +674,7 @@ def test_begin_w_alternate_client(self): 'maxBadRecords': 100, 'nullMarker': r'\N', 'quote': "'", - 'skipLeadingRows': 1, + 'skipLeadingRows': '1', 'sourceFormat': 'CSV', 'writeDisposition': 'WRITE_TRUNCATE', 'schema': {'fields': [ @@ -973,10 +1015,12 @@ def test_from_api_repr_wo_sources(self): def test_from_api_repr_w_properties(self): client = _Client(self.PROJECT) RESOURCE = self._makeResource() + copy_config = RESOURCE['configuration']['copy'] + copy_config['createDisposition'] = 'CREATE_IF_NEEDED' klass = self._get_target_class() - dataset = klass.from_api_repr(RESOURCE, client=client) - self.assertIs(dataset._client, client) - self._verifyResourceProperties(dataset, RESOURCE) + job = klass.from_api_repr(RESOURCE, client=client) + self.assertIs(job._client, client) + self._verifyResourceProperties(job, RESOURCE) def test_begin_w_bound_client(self): PATH = '/projects/%s/jobs' % (self.PROJECT,) @@ -1272,10 +1316,12 @@ def test_from_api_repr_bare(self): def test_from_api_repr_w_properties(self): client = _Client(self.PROJECT) RESOURCE = self._makeResource() + extract_config = RESOURCE['configuration']['extract'] + extract_config['compression'] = 'GZIP' klass = self._get_target_class() - dataset = klass.from_api_repr(RESOURCE, client=client) - self.assertIs(dataset._client, client) - self._verifyResourceProperties(dataset, RESOURCE) + job = klass.from_api_repr(RESOURCE, client=client) + self.assertIs(job._client, client) + self._verifyResourceProperties(job, RESOURCE) def test_begin_w_bound_client(self): PATH = '/projects/%s/jobs' % (self.PROJECT,) @@ -1484,13 +1530,14 @@ def _verifyBooleanResourceProperties(self, job, config): def _verifyIntegerResourceProperties(self, job, config): if 'maximumBillingTier' in config: - self.assertEqual(job.maximum_billing_tier, - config['maximumBillingTier']) + self.assertEqual( + job.maximum_billing_tier, config['maximumBillingTier']) else: self.assertIsNone(job.maximum_billing_tier) if 'maximumBytesBilled' in config: - self.assertEqual(job.maximum_bytes_billed, - config['maximumBytesBilled']) + self.assertEqual( + str(job.maximum_bytes_billed), config['maximumBytesBilled']) + self.assertIsInstance(job.maximum_bytes_billed, int) else: self.assertIsNone(job.maximum_bytes_billed) @@ -1643,7 +1690,7 @@ def test_from_api_repr_bare(self): 'jobId': self.JOB_NAME, }, 'configuration': { - 'query': {'query': self.QUERY} + 'query': {'query': self.QUERY}, }, } klass = self._get_target_class() @@ -1654,15 +1701,18 @@ def test_from_api_repr_bare(self): def test_from_api_repr_w_properties(self): client = _Client(self.PROJECT) RESOURCE = self._makeResource() - RESOURCE['configuration']['query']['destinationTable'] = { + query_config = RESOURCE['configuration']['query'] + query_config['createDisposition'] = 'CREATE_IF_NEEDED' + query_config['writeDisposition'] = 'WRITE_TRUNCATE' + query_config['destinationTable'] = { 'projectId': self.PROJECT, 'datasetId': self.DS_NAME, 'tableId': self.DESTINATION_TABLE, } klass = self._get_target_class() - dataset = klass.from_api_repr(RESOURCE, client=client) - self.assertIs(dataset._client, client) - self._verifyResourceProperties(dataset, RESOURCE) + job = klass.from_api_repr(RESOURCE, client=client) + self.assertIs(job._client, client) + self._verifyResourceProperties(job, RESOURCE) def test_cancelled(self): client = _Client(self.PROJECT) @@ -2058,7 +2108,10 @@ def test_result_error(self): self.assertEqual(exc_info.exception.code, http_client.BAD_REQUEST) def test_begin_w_bound_client(self): + from google.cloud.bigquery.dataset import Dataset + PATH = '/projects/%s/jobs' % (self.PROJECT,) + DS_NAME = 'DATASET' RESOURCE = self._makeResource() # Ensure None for missing server-set props del RESOURCE['statistics']['creationTime'] @@ -2067,9 +2120,13 @@ def test_begin_w_bound_client(self): del RESOURCE['user_email'] conn = _Connection(RESOURCE) client = _Client(project=self.PROJECT, connection=conn) + job = self._make_one(self.JOB_NAME, self.QUERY, client) + job.default_dataset = Dataset(DS_NAME, client) job.begin() + + self.assertIsNone(job.default_dataset) self.assertEqual(job.udf_resources, []) self.assertEqual(len(conn._requested), 1) req = conn._requested[0] @@ -2082,7 +2139,11 @@ def test_begin_w_bound_client(self): }, 'configuration': { 'query': { - 'query': self.QUERY + 'query': self.QUERY, + 'defaultDataset': { + 'projectId': self.PROJECT, + 'datasetId': DS_NAME, + }, }, }, } @@ -2116,7 +2177,7 @@ def test_begin_w_alternate_client(self): 'useLegacySql': True, 'writeDisposition': 'WRITE_TRUNCATE', 'maximumBillingTier': 4, - 'maximumBytesBilled': 123456 + 'maximumBytesBilled': '123456' } RESOURCE['configuration']['query'] = QUERY_CONFIGURATION conn1 = _Connection() From 4b9bb3d386bb75d9d4a38fbd6ac6483383580d14 Mon Sep 17 00:00:00 2001 From: Tres Seaver Date: Mon, 11 Sep 2017 12:38:03 -0400 Subject: [PATCH 0184/2016] Add 'ExtractTableStorageJob.destination_uri_file_counts' property. (#3803) --- .../google/cloud/bigquery/job.py | 16 ++++++++++++++++ .../tests/unit/test_job.py | 17 +++++++++++++++++ 2 files changed, 33 insertions(+) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/job.py b/packages/google-cloud-bigquery/google/cloud/bigquery/job.py index 014eb2ee2740..f060ba1bc2f9 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/job.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/job.py @@ -1026,6 +1026,22 @@ def __init__(self, name, source, destination_uris, client): https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.extract.printHeader """ + @property + def destination_uri_file_counts(self): + """Return file counts from job statistics, if present. + + See: + https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#statistics.extract.destinationUriFileCounts + + :rtype: int or None + :returns: number of DML rows affectd by the job, or None if job is not + yet complete. + """ + result = self._job_statistics().get('destinationUriFileCounts') + if result is not None: + result = int(result) + return result + def _populate_config_resource(self, configuration): """Helper for _build_resource: copy config properties to resource""" if self.compression is not None: diff --git a/packages/google-cloud-bigquery/tests/unit/test_job.py b/packages/google-cloud-bigquery/tests/unit/test_job.py index 9e684faf4f8b..23fb95eea123 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_job.py +++ b/packages/google-cloud-bigquery/tests/unit/test_job.py @@ -1266,6 +1266,23 @@ def test_ctor(self): self.assertIsNone(job.field_delimiter) self.assertIsNone(job.print_header) + def test_destination_uri_file_counts(self): + file_counts = 23 + client = _Client(self.PROJECT) + source = _Table(self.SOURCE_TABLE) + job = self._make_one(self.JOB_NAME, source, [self.DESTINATION_URI], + client) + self.assertIsNone(job.destination_uri_file_counts) + + statistics = job._properties['statistics'] = {} + self.assertIsNone(job.destination_uri_file_counts) + + extract_stats = statistics['extract'] = {} + self.assertIsNone(job.destination_uri_file_counts) + + extract_stats['destinationUriFileCounts'] = str(file_counts) + self.assertEqual(job.destination_uri_file_counts, file_counts) + def test_from_api_repr_missing_identity(self): self._setUpConstants() client = _Client(self.PROJECT) From 64ba4e80b98344558dacafd150b1650705512553 Mon Sep 17 00:00:00 2001 From: Alix Hamilton Date: Mon, 11 Sep 2017 12:08:09 -0700 Subject: [PATCH 0185/2016] bigquery add DatasetReference class and tests (#3938) --- .../google/cloud/bigquery/dataset.py | 36 +++++++++++++++++++ .../tests/unit/test_dataset.py | 17 +++++++++ 2 files changed, 53 insertions(+) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/dataset.py b/packages/google-cloud-bigquery/google/cloud/bigquery/dataset.py index a688cb3b560b..f4e6fd519e38 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/dataset.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/dataset.py @@ -91,6 +91,42 @@ def __repr__(self): self.role, self.entity_type, self.entity_id) +class DatasetReference(object): + """DatasetReferences are pointers to datasets. + + See + https://cloud.google.com/bigquery/docs/reference/rest/v2/datasets + + :type project_id: str + :param project_id: the ID of the project + + :type dataset_id: str + :param dataset_id: the ID of the dataset + """ + + def __init__(self, project_id, dataset_id): + self._project_id = project_id + self._dataset_id = dataset_id + + @property + def project_id(self): + """Project ID of the dataset. + + :rtype: str + :returns: the project ID. + """ + return self._project_id + + @property + def dataset_id(self): + """Dataset ID. + + :rtype: str + :returns: the dataset ID. + """ + return self._dataset_id + + class Dataset(object): """Datasets are containers for tables. diff --git a/packages/google-cloud-bigquery/tests/unit/test_dataset.py b/packages/google-cloud-bigquery/tests/unit/test_dataset.py index 10b832d7abe4..4da2ada5de66 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_dataset.py +++ b/packages/google-cloud-bigquery/tests/unit/test_dataset.py @@ -85,6 +85,23 @@ def test__eq___type_mismatch(self): self.assertEqual(entry, mock.ANY) +class TestDatasetReference(unittest.TestCase): + + @staticmethod + def _get_target_class(): + from google.cloud.bigquery.dataset import DatasetReference + + return DatasetReference + + def _make_one(self, *args, **kw): + return self._get_target_class()(*args, **kw) + + def test_ctor_defaults(self): + dataset_ref = self._make_one('some-project-1', 'dataset_1') + self.assertEqual(dataset_ref.project_id, 'some-project-1') + self.assertEqual(dataset_ref.dataset_id, 'dataset_1') + + class TestDataset(unittest.TestCase): PROJECT = 'project' DS_NAME = 'dataset-name' From abdb7ae26b198fa23c3c6003d6b956f93867d243 Mon Sep 17 00:00:00 2001 From: Alix Hamilton Date: Tue, 12 Sep 2017 17:24:21 -0700 Subject: [PATCH 0186/2016] BigQuery: Add TestReference class. Add table function to DatasetReference (#3942) --- .../google/cloud/bigquery/dataset.py | 9 +++++ .../google/cloud/bigquery/table.py | 36 +++++++++++++++++++ .../tests/unit/test_dataset.py | 6 ++++ .../tests/unit/test_table.py | 20 +++++++++++ 4 files changed, 71 insertions(+) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/dataset.py b/packages/google-cloud-bigquery/google/cloud/bigquery/dataset.py index f4e6fd519e38..e31b4a2a93b1 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/dataset.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/dataset.py @@ -19,6 +19,7 @@ from google.cloud._helpers import _datetime_from_microseconds from google.cloud.exceptions import NotFound from google.cloud.bigquery.table import Table +from google.cloud.bigquery.table import TableReference class AccessEntry(object): @@ -126,6 +127,14 @@ def dataset_id(self): """ return self._dataset_id + def table(self, table_id): + """Constructs a TableReference. + + :rtype: :class:`google.cloud.bigquery.table.TableReference` + :returns: a TableReference for a table in this dataset. + """ + return TableReference(self, table_id) + class Dataset(object): """Datasets are containers for tables. diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py index 2121730d3f79..0d651aa6b5f1 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py @@ -48,6 +48,42 @@ _DEFAULT_NUM_RETRIES = 6 +class TableReference(object): + """TableReferences are pointers to tables. + + See + https://cloud.google.com/bigquery/docs/reference/rest/v2/tables + + :type dataset_ref: :class:`google.cloud.bigquery.dataset.DatasetReference` + :param dataset_ref: a pointer to the dataset + + :type table_id: str + :param table_id: the ID of the table + """ + + def __init__(self, dataset_ref, table_id): + self._dataset_ref = dataset_ref + self._table_id = table_id + + @property + def dataset_ref(self): + """Pointer to the dataset. + + :rtype: :class:`google.cloud.bigquery.dataset.DatasetReference` + :returns: a pointer to the dataset. + """ + return self._dataset_ref + + @property + def table_id(self): + """Table ID. + + :rtype: str + :returns: the table ID. + """ + return self._table_id + + class Table(object): """Tables represent a set of rows whose values correspond to a schema. diff --git a/packages/google-cloud-bigquery/tests/unit/test_dataset.py b/packages/google-cloud-bigquery/tests/unit/test_dataset.py index 4da2ada5de66..c509be6838a1 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_dataset.py +++ b/packages/google-cloud-bigquery/tests/unit/test_dataset.py @@ -101,6 +101,12 @@ def test_ctor_defaults(self): self.assertEqual(dataset_ref.project_id, 'some-project-1') self.assertEqual(dataset_ref.dataset_id, 'dataset_1') + def test_table(self): + dataset_ref = self._make_one('some-project-1', 'dataset_1') + table_ref = dataset_ref.table('table_1') + self.assertIs(table_ref.dataset_ref, dataset_ref) + self.assertEqual(table_ref.table_id, 'table_1') + class TestDataset(unittest.TestCase): PROJECT = 'project' diff --git a/packages/google-cloud-bigquery/tests/unit/test_table.py b/packages/google-cloud-bigquery/tests/unit/test_table.py index aa9e00670655..2629f824e0b2 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_table.py +++ b/packages/google-cloud-bigquery/tests/unit/test_table.py @@ -37,6 +37,26 @@ def _verifySchema(self, schema, resource): self._verify_field(field, r_field) +class TestTableReference(unittest.TestCase): + + @staticmethod + def _get_target_class(): + from google.cloud.bigquery.table import TableReference + + return TableReference + + def _make_one(self, *args, **kw): + return self._get_target_class()(*args, **kw) + + def test_ctor_defaults(self): + from google.cloud.bigquery.dataset import DatasetReference + dataset_ref = DatasetReference('project_1', 'dataset_1') + + table_ref = self._make_one(dataset_ref, 'table_1') + self.assertIs(table_ref.dataset_ref, dataset_ref) + self.assertEqual(table_ref.table_id, 'table_1') + + class TestTable(unittest.TestCase, _SchemaBase): PROJECT = 'prahj-ekt' From 80101a92d3bd3bf33f8d074c6c0cc18aba3b0a81 Mon Sep 17 00:00:00 2001 From: Alix Hamilton Date: Wed, 13 Sep 2017 12:30:28 -0700 Subject: [PATCH 0187/2016] BigQuery: Modify client.dataset() to return DatasetReference instead of Dataset. (#3944) * BigQuery: Add TestReference class. Add table function to DatasetReference * BigQuery: Modify client.dataset() to return DatasetReference instead of Dataset. * Bigquery: client.dataset() uses default project if not specified --- .../google/cloud/bigquery/client.py | 12 ++-- .../google/cloud/bigquery/job.py | 8 +-- .../google-cloud-bigquery/tests/system.py | 55 ++++++++++--------- .../tests/unit/test_client.py | 23 ++++++-- .../tests/unit/test_job.py | 5 -- 5 files changed, 58 insertions(+), 45 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py index 2ae577a51708..5ab8ff820764 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py @@ -18,6 +18,7 @@ from google.cloud.client import ClientWithProject from google.cloud.bigquery._http import Connection from google.cloud.bigquery.dataset import Dataset +from google.cloud.bigquery.dataset import DatasetReference from google.cloud.bigquery.job import CopyJob from google.cloud.bigquery.job import ExtractJob from google.cloud.bigquery.job import LoadJob @@ -148,7 +149,7 @@ def list_datasets(self, include_all=False, max_results=None, extra_params=extra_params) def dataset(self, dataset_name, project=None): - """Construct a dataset bound to this client. + """Construct a reference to a dataset. :type dataset_name: str :param dataset_name: Name of the dataset. @@ -157,10 +158,13 @@ def dataset(self, dataset_name, project=None): :param project: (Optional) project ID for the dataset (defaults to the project of the client). - :rtype: :class:`google.cloud.bigquery.dataset.Dataset` - :returns: a new ``Dataset`` instance + :rtype: :class:`google.cloud.bigquery.dataset.DatasetReference` + :returns: a new ``DatasetReference`` instance """ - return Dataset(dataset_name, client=self, project=project) + if project is None: + project = self.project + + return DatasetReference(project, dataset_name) def _get_query_results(self, job_id, project=None, timeout_ms=None): """Get the query results object for a query job. diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/job.py b/packages/google-cloud-bigquery/google/cloud/bigquery/job.py index f060ba1bc2f9..6f5c2c294a0c 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/job.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/job.py @@ -1362,8 +1362,8 @@ def _copy_configuration_properties(self, configuration): dest_local = self._destination_table_resource() if dest_remote != dest_local: project = dest_remote['projectId'] - dataset = self._client.dataset( - dest_remote['datasetId'], project=project) + dataset = Dataset( + dest_remote['datasetId'], self._client, project=project) self.destination = dataset.table(dest_remote['tableId']) def_ds = configuration.get('defaultDataset') @@ -1372,7 +1372,7 @@ def _copy_configuration_properties(self, configuration): del self.default_dataset else: project = def_ds['projectId'] - self.default_dataset = self._client.dataset(def_ds['datasetId']) + self.default_dataset = Dataset(def_ds['datasetId'], self._client) udf_resources = [] for udf_mapping in configuration.get(self._UDF_KEY, ()): @@ -1528,7 +1528,7 @@ def referenced_tables(self): ds_name = table['datasetId'] t_dataset = datasets_by_project_name.get((t_project, ds_name)) if t_dataset is None: - t_dataset = client.dataset(ds_name, project=t_project) + t_dataset = Dataset(ds_name, client, project=t_project) datasets_by_project_name[(t_project, ds_name)] = t_dataset t_name = table['tableId'] diff --git a/packages/google-cloud-bigquery/tests/system.py b/packages/google-cloud-bigquery/tests/system.py index f5188118918e..3f1817706672 100644 --- a/packages/google-cloud-bigquery/tests/system.py +++ b/packages/google-cloud-bigquery/tests/system.py @@ -25,6 +25,7 @@ import six from google.cloud import bigquery +from google.cloud.bigquery.dataset import Dataset from google.cloud._helpers import UTC from google.cloud.bigquery import dbapi from google.cloud.exceptions import Forbidden @@ -111,7 +112,7 @@ def _still_in_use(bad_request): def test_create_dataset(self): DATASET_NAME = _make_dataset_name('create_dataset') - dataset = Config.CLIENT.dataset(DATASET_NAME) + dataset = Dataset(DATASET_NAME, Config.CLIENT) self.assertFalse(dataset.exists()) retry_403(dataset.create)() @@ -122,20 +123,20 @@ def test_create_dataset(self): def test_reload_dataset(self): DATASET_NAME = _make_dataset_name('reload_dataset') - dataset = Config.CLIENT.dataset(DATASET_NAME) + dataset = Dataset(DATASET_NAME, Config.CLIENT) dataset.friendly_name = 'Friendly' dataset.description = 'Description' retry_403(dataset.create)() self.to_delete.append(dataset) - other = Config.CLIENT.dataset(DATASET_NAME) + other = Dataset(DATASET_NAME, Config.CLIENT) other.reload() self.assertEqual(other.friendly_name, 'Friendly') self.assertEqual(other.description, 'Description') def test_patch_dataset(self): - dataset = Config.CLIENT.dataset(_make_dataset_name('patch_dataset')) + dataset = Dataset(_make_dataset_name('patch_dataset'), Config.CLIENT) self.assertFalse(dataset.exists()) retry_403(dataset.create)() @@ -149,7 +150,7 @@ def test_patch_dataset(self): self.assertEqual(dataset.description, 'Description') def test_update_dataset(self): - dataset = Config.CLIENT.dataset(_make_dataset_name('update_dataset')) + dataset = Dataset(_make_dataset_name('update_dataset'), Config.CLIENT) self.assertFalse(dataset.exists()) retry_403(dataset.create)() @@ -175,7 +176,7 @@ def test_list_datasets(self): 'newest' + unique_resource_id(), ] for dataset_name in datasets_to_create: - created_dataset = Config.CLIENT.dataset(dataset_name) + created_dataset = Dataset(dataset_name, Config.CLIENT) retry_403(created_dataset.create)() self.to_delete.append(created_dataset) @@ -189,7 +190,7 @@ def test_list_datasets(self): self.assertEqual(len(created), len(datasets_to_create)) def test_create_table(self): - dataset = Config.CLIENT.dataset(_make_dataset_name('create_table')) + dataset = Dataset(_make_dataset_name('create_table'), Config.CLIENT) self.assertFalse(dataset.exists()) retry_403(dataset.create)() @@ -208,7 +209,7 @@ def test_create_table(self): def test_list_tables(self): DATASET_NAME = _make_dataset_name('list_tables') - dataset = Config.CLIENT.dataset(DATASET_NAME) + dataset = Dataset(DATASET_NAME, Config.CLIENT) self.assertFalse(dataset.exists()) retry_403(dataset.create)() @@ -244,7 +245,7 @@ def test_list_tables(self): self.assertEqual(len(created), len(tables_to_create)) def test_patch_table(self): - dataset = Config.CLIENT.dataset(_make_dataset_name('patch_table')) + dataset = Dataset(_make_dataset_name('patch_table'), Config.CLIENT) self.assertFalse(dataset.exists()) retry_403(dataset.create)() @@ -266,7 +267,7 @@ def test_patch_table(self): self.assertEqual(table.description, 'Description') def test_update_table(self): - dataset = Config.CLIENT.dataset(_make_dataset_name('update_table')) + dataset = Dataset(_make_dataset_name('update_table'), Config.CLIENT) self.assertFalse(dataset.exists()) retry_403(dataset.create)() @@ -309,8 +310,8 @@ def test_insert_data_then_dump_table(self): ('Bhettye Rhubble', 27, None), ] ROW_IDS = range(len(ROWS)) - dataset = Config.CLIENT.dataset( - _make_dataset_name('insert_data_then_dump')) + dataset = Dataset( + _make_dataset_name('insert_data_then_dump'), Config.CLIENT) self.assertFalse(dataset.exists()) retry_403(dataset.create)() @@ -353,8 +354,8 @@ def test_load_table_from_local_file_then_dump_table(self): ] TABLE_NAME = 'test_table' - dataset = Config.CLIENT.dataset( - _make_dataset_name('load_local_then_dump')) + dataset = Dataset( + _make_dataset_name('load_local_then_dump'), Config.CLIENT) retry_403(dataset.create)() self.to_delete.append(dataset) @@ -402,8 +403,8 @@ def test_load_table_from_local_avro_file_then_dump_table(self): ("orange", 590), ("red", 650)] - dataset = Config.CLIENT.dataset( - _make_dataset_name('load_local_then_dump')) + dataset = Dataset( + _make_dataset_name('load_local_then_dump'), Config.CLIENT) retry_403(dataset.create)() self.to_delete.append(dataset) @@ -466,8 +467,8 @@ def test_load_table_from_storage_then_dump_table(self): self.to_delete.insert(0, blob) - dataset = Config.CLIENT.dataset( - _make_dataset_name('load_gcs_then_dump')) + dataset = Dataset( + _make_dataset_name('load_gcs_then_dump'), Config.CLIENT) retry_403(dataset.create)() self.to_delete.append(dataset) @@ -536,8 +537,8 @@ def test_load_table_from_storage_w_autodetect_schema(self): self.to_delete.insert(0, blob) - dataset = Config.CLIENT.dataset( - _make_dataset_name('load_gcs_then_dump')) + dataset = Dataset( + _make_dataset_name('load_gcs_then_dump'), Config.CLIENT) retry_403(dataset.create)() self.to_delete.append(dataset) @@ -574,7 +575,7 @@ def test_job_cancel(self): TABLE_NAME = 'test_table' QUERY = 'SELECT * FROM %s.%s' % (DATASET_NAME, TABLE_NAME) - dataset = Config.CLIENT.dataset(DATASET_NAME) + dataset = Dataset(DATASET_NAME, Config.CLIENT) retry_403(dataset.create)() self.to_delete.append(dataset) @@ -765,7 +766,7 @@ def test_dbapi_fetchall(self): def _load_table_for_dml(self, rows, dataset_name, table_name): from google.cloud._testing import _NamedTemporaryFile - dataset = Config.CLIENT.dataset(dataset_name) + dataset = Dataset(dataset_name, Config.CLIENT) retry_403(dataset.create)() self.to_delete.append(dataset) @@ -1099,7 +1100,7 @@ def test_dump_table_w_public_data(self): DATASET_NAME = 'samples' TABLE_NAME = 'natality' - dataset = Config.CLIENT.dataset(DATASET_NAME, project=PUBLIC) + dataset = Dataset(DATASET_NAME, Config.CLIENT, project=PUBLIC) table = dataset.table(TABLE_NAME) # Reload table to get the schema before fetching the rows. table.reload() @@ -1152,8 +1153,8 @@ def test_insert_nested_nested(self): ('Some value', record) ] table_name = 'test_table' - dataset = Config.CLIENT.dataset( - _make_dataset_name('issue_2951')) + dataset = Dataset( + _make_dataset_name('issue_2951'), Config.CLIENT) retry_403(dataset.create)() self.to_delete.append(dataset) @@ -1172,8 +1173,8 @@ def test_insert_nested_nested(self): def test_create_table_insert_fetch_nested_schema(self): table_name = 'test_table' - dataset = Config.CLIENT.dataset( - _make_dataset_name('create_table_nested_schema')) + dataset = Dataset( + _make_dataset_name('create_table_nested_schema'), Config.CLIENT) self.assertFalse(dataset.exists()) retry_403(dataset.create)() diff --git a/packages/google-cloud-bigquery/tests/unit/test_client.py b/packages/google-cloud-bigquery/tests/unit/test_client.py index 3cd4a24ceb43..70e1f1eea7c7 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_client.py +++ b/packages/google-cloud-bigquery/tests/unit/test_client.py @@ -246,8 +246,21 @@ def test_list_datasets_explicit_response_missing_datasets_key(self): self.assertEqual(req['query_params'], {'all': True, 'maxResults': 3, 'pageToken': TOKEN}) - def test_dataset(self): - from google.cloud.bigquery.dataset import Dataset + def test_dataset_with_specified_project(self): + from google.cloud.bigquery.dataset import DatasetReference + + PROJECT = 'PROJECT' + DATASET = 'dataset_name' + creds = _make_credentials() + http = object() + client = self._make_one(project=PROJECT, credentials=creds, _http=http) + dataset = client.dataset(DATASET, PROJECT) + self.assertIsInstance(dataset, DatasetReference) + self.assertEqual(dataset.dataset_id, DATASET) + self.assertEqual(dataset.project_id, PROJECT) + + def test_dataset_with_default_project(self): + from google.cloud.bigquery.dataset import DatasetReference PROJECT = 'PROJECT' DATASET = 'dataset_name' @@ -255,9 +268,9 @@ def test_dataset(self): http = object() client = self._make_one(project=PROJECT, credentials=creds, _http=http) dataset = client.dataset(DATASET) - self.assertIsInstance(dataset, Dataset) - self.assertEqual(dataset.name, DATASET) - self.assertIs(dataset._client, client) + self.assertIsInstance(dataset, DatasetReference) + self.assertEqual(dataset.dataset_id, DATASET) + self.assertEqual(dataset.project_id, PROJECT) def test_job_from_resource_unknown_type(self): PROJECT = 'PROJECT' diff --git a/packages/google-cloud-bigquery/tests/unit/test_job.py b/packages/google-cloud-bigquery/tests/unit/test_job.py index 23fb95eea123..a4b96470c2e7 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_job.py +++ b/packages/google-cloud-bigquery/tests/unit/test_job.py @@ -2697,11 +2697,6 @@ def __init__(self, project='project', connection=None): self.project = project self._connection = connection - def dataset(self, name, project=None): - from google.cloud.bigquery.dataset import Dataset - - return Dataset(name, client=self, project=project) - def _get_query_results(self, job_id): from google.cloud.bigquery.query import QueryResults From 35fa0e585dcc5e1d52c458a1f4150d218058cfbe Mon Sep 17 00:00:00 2001 From: Jonathan Amsterdam Date: Wed, 13 Sep 2017 18:00:53 -0400 Subject: [PATCH 0188/2016] bigquery: rename TableReference.dataset_ref (#3953) * bigquery: rename TableReference.dataset_ref Rename to dataset to be consistent with Client.dataset. Both methods actually return a DatasetReference. * fix broken tests --- packages/google-cloud-bigquery/google/cloud/bigquery/table.py | 2 +- packages/google-cloud-bigquery/tests/unit/test_dataset.py | 2 +- packages/google-cloud-bigquery/tests/unit/test_table.py | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py index 0d651aa6b5f1..69d99ab4450f 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py @@ -66,7 +66,7 @@ def __init__(self, dataset_ref, table_id): self._table_id = table_id @property - def dataset_ref(self): + def dataset(self): """Pointer to the dataset. :rtype: :class:`google.cloud.bigquery.dataset.DatasetReference` diff --git a/packages/google-cloud-bigquery/tests/unit/test_dataset.py b/packages/google-cloud-bigquery/tests/unit/test_dataset.py index c509be6838a1..09fdbbe034ce 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_dataset.py +++ b/packages/google-cloud-bigquery/tests/unit/test_dataset.py @@ -104,7 +104,7 @@ def test_ctor_defaults(self): def test_table(self): dataset_ref = self._make_one('some-project-1', 'dataset_1') table_ref = dataset_ref.table('table_1') - self.assertIs(table_ref.dataset_ref, dataset_ref) + self.assertIs(table_ref.dataset, dataset_ref) self.assertEqual(table_ref.table_id, 'table_1') diff --git a/packages/google-cloud-bigquery/tests/unit/test_table.py b/packages/google-cloud-bigquery/tests/unit/test_table.py index 2629f824e0b2..9e0db94bc6cb 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_table.py +++ b/packages/google-cloud-bigquery/tests/unit/test_table.py @@ -53,7 +53,7 @@ def test_ctor_defaults(self): dataset_ref = DatasetReference('project_1', 'dataset_1') table_ref = self._make_one(dataset_ref, 'table_1') - self.assertIs(table_ref.dataset_ref, dataset_ref) + self.assertIs(table_ref.dataset, dataset_ref) self.assertEqual(table_ref.table_id, 'table_1') From 45bba69ce4fc10f5450506d8ab59dca09fade5f2 Mon Sep 17 00:00:00 2001 From: Jonathan Amsterdam Date: Thu, 14 Sep 2017 14:43:08 -0400 Subject: [PATCH 0189/2016] bigquery: rename name field of Dataset to dataset_id (#3955) * bigquery: rename name field of Dataset to dataset_id Rename the former dataset_id property to full_dataset_id. Also rename Table.dataset_name to Table.dataset_id. Perform other renamings (of various variables and constants). These names match usage better. The API's Dataset.id field is "project:dataset_id", which is confusing and basically useless, so it's a mistake to call that dataset_id. * fix long line * fix long line --- .../google/cloud/bigquery/dataset.py | 25 +-- .../google/cloud/bigquery/job.py | 12 +- .../google/cloud/bigquery/query.py | 2 +- .../google/cloud/bigquery/table.py | 14 +- .../google-cloud-bigquery/tests/system.py | 70 ++++----- .../tests/unit/test_client.py | 2 +- .../tests/unit/test_dataset.py | 144 +++++++++--------- .../tests/unit/test_job.py | 96 ++++++------ .../tests/unit/test_table.py | 105 ++++++------- 9 files changed, 236 insertions(+), 234 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/dataset.py b/packages/google-cloud-bigquery/google/cloud/bigquery/dataset.py index e31b4a2a93b1..cd31f737e693 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/dataset.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/dataset.py @@ -142,8 +142,8 @@ class Dataset(object): See https://cloud.google.com/bigquery/docs/reference/rest/v2/datasets - :type name: str - :param name: the name of the dataset + :type dataset_id: str + :param dataset_id: the ID of the dataset :type client: :class:`google.cloud.bigquery.client.Client` :param client: A client which holds credentials and project configuration @@ -159,8 +159,8 @@ class Dataset(object): _access_entries = None - def __init__(self, name, client, access_entries=(), project=None): - self.name = name + def __init__(self, dataset_id, client, access_entries=(), project=None): + self.dataset_id = dataset_id self._client = client self._properties = {} # Let the @property do validation. @@ -181,9 +181,9 @@ def path(self): """URL path for the dataset's APIs. :rtype: str - :returns: the path based on project and dataste name. + :returns: the path based on project and dataset ID. """ - return '/projects/%s/datasets/%s' % (self.project, self.name) + return '/projects/%s/datasets/%s' % (self.project, self.dataset_id) @property def access_entries(self): @@ -221,8 +221,8 @@ def created(self): return _datetime_from_microseconds(1000.0 * creation_time) @property - def dataset_id(self): - """ID for the dataset resource. + def full_dataset_id(self): + """ID for the dataset resource, in the form "project_id:dataset_id". :rtype: str, or ``NoneType`` :returns: the ID (None until set from the server). @@ -365,8 +365,8 @@ def from_api_repr(cls, resource, client): 'datasetId' not in resource['datasetReference']): raise KeyError('Resource lacks required identity information:' '["datasetReference"]["datasetId"]') - name = resource['datasetReference']['datasetId'] - dataset = cls(name, client=client) + dataset_id = resource['datasetReference']['datasetId'] + dataset = cls(dataset_id, client=client) dataset._set_properties(resource) return dataset @@ -444,7 +444,7 @@ def _build_resource(self): """Generate a resource for ``create`` or ``update``.""" resource = { 'datasetReference': { - 'projectId': self.project, 'datasetId': self.name}, + 'projectId': self.project, 'datasetId': self.dataset_id}, } if self.default_table_expiration_ms is not None: value = self.default_table_expiration_ms @@ -610,7 +610,8 @@ def list_tables(self, max_results=None, page_token=None): :returns: Iterator of :class:`~google.cloud.bigquery.table.Table` contained within the current dataset. """ - path = '/projects/%s/datasets/%s/tables' % (self.project, self.name) + path = '/projects/%s/datasets/%s/tables' % ( + self.project, self.dataset_id) result = page_iterator.HTTPIterator( client=self._client, api_request=self._client._connection.api_request, diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/job.py b/packages/google-cloud-bigquery/google/cloud/bigquery/job.py index 6f5c2c294a0c..f513a98d23cd 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/job.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/job.py @@ -773,7 +773,7 @@ def _build_resource(self): 'sourceUris': self.source_uris, 'destinationTable': { 'projectId': self.destination.project, - 'datasetId': self.destination.dataset_name, + 'datasetId': self.destination.dataset_id, 'tableId': self.destination.name, }, }, @@ -900,7 +900,7 @@ def _build_resource(self): source_refs = [{ 'projectId': table.project, - 'datasetId': table.dataset_name, + 'datasetId': table.dataset_id, 'tableId': table.name, } for table in self.sources] @@ -914,7 +914,7 @@ def _build_resource(self): 'sourceTables': source_refs, 'destinationTable': { 'projectId': self.destination.project, - 'datasetId': self.destination.dataset_name, + 'datasetId': self.destination.dataset_id, 'tableId': self.destination.name, }, }, @@ -1058,7 +1058,7 @@ def _build_resource(self): source_ref = { 'projectId': self.source.project, - 'datasetId': self.source.dataset_name, + 'datasetId': self.source.dataset_id, 'tableId': self.source.name, } @@ -1247,7 +1247,7 @@ def _destination_table_resource(self): if self.destination is not None: return { 'projectId': self.destination.project, - 'datasetId': self.destination.dataset_name, + 'datasetId': self.destination.dataset_id, 'tableId': self.destination.name, } @@ -1271,7 +1271,7 @@ def _populate_config_resource(self, configuration): if self.default_dataset is not None: configuration['defaultDataset'] = { 'projectId': self.default_dataset.project, - 'datasetId': self.default_dataset.name, + 'datasetId': self.default_dataset.dataset_id, } if self.destination is not None: table_res = self._destination_table_resource() diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/query.py b/packages/google-cloud-bigquery/google/cloud/bigquery/query.py index 185b68deb104..fa03d373674d 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/query.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/query.py @@ -329,7 +329,7 @@ def _build_resource(self): if self.default_dataset is not None: resource['defaultDataset'] = { 'projectId': self.project, - 'datasetId': self.default_dataset.name, + 'datasetId': self.default_dataset.dataset_id, } if self.max_results is not None: diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py index 69d99ab4450f..e06e79271d0a 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py @@ -119,13 +119,13 @@ def project(self): return self._dataset.project @property - def dataset_name(self): - """Name of dataset containing the table. + def dataset_id(self): + """ID of dataset containing the table. :rtype: str :returns: the ID (derived from the dataset). """ - return self._dataset.name + return self._dataset.dataset_id @property def path(self): @@ -463,7 +463,7 @@ def list_partitions(self, client=None): """ query = self._require_client(client).run_sync_query( 'SELECT partition_id from [%s.%s$__PARTITIONS_SUMMARY__]' % - (self.dataset_name, self.name)) + (self.dataset_id, self.name)) query.run() return [row[0] for row in query.rows] @@ -527,7 +527,7 @@ def _build_resource(self): resource = { 'tableReference': { 'projectId': self._dataset.project, - 'datasetId': self._dataset.name, + 'datasetId': self._dataset.dataset_id, 'tableId': self.name}, } if self.description is not None: @@ -572,7 +572,7 @@ def create(self, client=None): """ client = self._require_client(client) path = '/projects/%s/datasets/%s/tables' % ( - self._dataset.project, self._dataset.name) + self._dataset.project, self._dataset.dataset_id) api_response = client._connection.api_request( method='POST', path=path, data=self._build_resource()) self._set_properties(api_response) @@ -1369,7 +1369,7 @@ def _get_upload_metadata(source_format, schema, dataset, name): 'sourceFormat': source_format, 'destinationTable': { 'projectId': dataset.project, - 'datasetId': dataset.name, + 'datasetId': dataset.dataset_id, 'tableId': name, }, } diff --git a/packages/google-cloud-bigquery/tests/system.py b/packages/google-cloud-bigquery/tests/system.py index 3f1817706672..ad93ac2c954e 100644 --- a/packages/google-cloud-bigquery/tests/system.py +++ b/packages/google-cloud-bigquery/tests/system.py @@ -44,7 +44,7 @@ def _has_rows(result): return len(result) > 0 -def _make_dataset_name(prefix): +def _make_dataset_id(prefix): return '%s%s' % (prefix, unique_resource_id()) @@ -111,32 +111,32 @@ def _still_in_use(bad_request): doomed.delete() def test_create_dataset(self): - DATASET_NAME = _make_dataset_name('create_dataset') - dataset = Dataset(DATASET_NAME, Config.CLIENT) + DATASET_ID = _make_dataset_id('create_dataset') + dataset = Dataset(DATASET_ID, Config.CLIENT) self.assertFalse(dataset.exists()) retry_403(dataset.create)() self.to_delete.append(dataset) self.assertTrue(dataset.exists()) - self.assertEqual(dataset.name, DATASET_NAME) + self.assertEqual(dataset.dataset_id, DATASET_ID) def test_reload_dataset(self): - DATASET_NAME = _make_dataset_name('reload_dataset') - dataset = Dataset(DATASET_NAME, Config.CLIENT) + DATASET_ID = _make_dataset_id('reload_dataset') + dataset = Dataset(DATASET_ID, Config.CLIENT) dataset.friendly_name = 'Friendly' dataset.description = 'Description' retry_403(dataset.create)() self.to_delete.append(dataset) - other = Dataset(DATASET_NAME, Config.CLIENT) + other = Dataset(DATASET_ID, Config.CLIENT) other.reload() self.assertEqual(other.friendly_name, 'Friendly') self.assertEqual(other.description, 'Description') def test_patch_dataset(self): - dataset = Dataset(_make_dataset_name('patch_dataset'), Config.CLIENT) + dataset = Dataset(_make_dataset_id('patch_dataset'), Config.CLIENT) self.assertFalse(dataset.exists()) retry_403(dataset.create)() @@ -150,7 +150,7 @@ def test_patch_dataset(self): self.assertEqual(dataset.description, 'Description') def test_update_dataset(self): - dataset = Dataset(_make_dataset_name('update_dataset'), Config.CLIENT) + dataset = Dataset(_make_dataset_id('update_dataset'), Config.CLIENT) self.assertFalse(dataset.exists()) retry_403(dataset.create)() @@ -175,8 +175,8 @@ def test_list_datasets(self): 'newer' + unique_resource_id(), 'newest' + unique_resource_id(), ] - for dataset_name in datasets_to_create: - created_dataset = Dataset(dataset_name, Config.CLIENT) + for dataset_id in datasets_to_create: + created_dataset = Dataset(dataset_id, Config.CLIENT) retry_403(created_dataset.create)() self.to_delete.append(created_dataset) @@ -185,12 +185,12 @@ def test_list_datasets(self): all_datasets = list(iterator) self.assertIsNone(iterator.next_page_token) created = [dataset for dataset in all_datasets - if dataset.name in datasets_to_create and + if dataset.dataset_id in datasets_to_create and dataset.project == Config.CLIENT.project] self.assertEqual(len(created), len(datasets_to_create)) def test_create_table(self): - dataset = Dataset(_make_dataset_name('create_table'), Config.CLIENT) + dataset = Dataset(_make_dataset_id('create_table'), Config.CLIENT) self.assertFalse(dataset.exists()) retry_403(dataset.create)() @@ -208,8 +208,8 @@ def test_create_table(self): self.assertEqual(table.name, TABLE_NAME) def test_list_tables(self): - DATASET_NAME = _make_dataset_name('list_tables') - dataset = Dataset(DATASET_NAME, Config.CLIENT) + DATASET_ID = _make_dataset_id('list_tables') + dataset = Dataset(DATASET_ID, Config.CLIENT) self.assertFalse(dataset.exists()) retry_403(dataset.create)() @@ -241,11 +241,11 @@ def test_list_tables(self): self.assertIsNone(iterator.next_page_token) created = [table for table in all_tables if (table.name in tables_to_create and - table.dataset_name == DATASET_NAME)] + table.dataset_id == DATASET_ID)] self.assertEqual(len(created), len(tables_to_create)) def test_patch_table(self): - dataset = Dataset(_make_dataset_name('patch_table'), Config.CLIENT) + dataset = Dataset(_make_dataset_id('patch_table'), Config.CLIENT) self.assertFalse(dataset.exists()) retry_403(dataset.create)() @@ -267,7 +267,7 @@ def test_patch_table(self): self.assertEqual(table.description, 'Description') def test_update_table(self): - dataset = Dataset(_make_dataset_name('update_table'), Config.CLIENT) + dataset = Dataset(_make_dataset_id('update_table'), Config.CLIENT) self.assertFalse(dataset.exists()) retry_403(dataset.create)() @@ -311,7 +311,7 @@ def test_insert_data_then_dump_table(self): ] ROW_IDS = range(len(ROWS)) dataset = Dataset( - _make_dataset_name('insert_data_then_dump'), Config.CLIENT) + _make_dataset_id('insert_data_then_dump'), Config.CLIENT) self.assertFalse(dataset.exists()) retry_403(dataset.create)() @@ -355,7 +355,7 @@ def test_load_table_from_local_file_then_dump_table(self): TABLE_NAME = 'test_table' dataset = Dataset( - _make_dataset_name('load_local_then_dump'), Config.CLIENT) + _make_dataset_id('load_local_then_dump'), Config.CLIENT) retry_403(dataset.create)() self.to_delete.append(dataset) @@ -404,7 +404,7 @@ def test_load_table_from_local_avro_file_then_dump_table(self): ("red", 650)] dataset = Dataset( - _make_dataset_name('load_local_then_dump'), Config.CLIENT) + _make_dataset_id('load_local_then_dump'), Config.CLIENT) retry_403(dataset.create)() self.to_delete.append(dataset) @@ -468,7 +468,7 @@ def test_load_table_from_storage_then_dump_table(self): self.to_delete.insert(0, blob) dataset = Dataset( - _make_dataset_name('load_gcs_then_dump'), Config.CLIENT) + _make_dataset_id('load_gcs_then_dump'), Config.CLIENT) retry_403(dataset.create)() self.to_delete.append(dataset) @@ -538,7 +538,7 @@ def test_load_table_from_storage_w_autodetect_schema(self): self.to_delete.insert(0, blob) dataset = Dataset( - _make_dataset_name('load_gcs_then_dump'), Config.CLIENT) + _make_dataset_id('load_gcs_then_dump'), Config.CLIENT) retry_403(dataset.create)() self.to_delete.append(dataset) @@ -570,12 +570,12 @@ def test_load_table_from_storage_w_autodetect_schema(self): sorted(actual_rows, key=by_age), sorted(rows, key=by_age)) def test_job_cancel(self): - DATASET_NAME = _make_dataset_name('job_cancel') - JOB_NAME = 'fetch_' + DATASET_NAME + DATASET_ID = _make_dataset_id('job_cancel') + JOB_NAME = 'fetch_' + DATASET_ID TABLE_NAME = 'test_table' - QUERY = 'SELECT * FROM %s.%s' % (DATASET_NAME, TABLE_NAME) + QUERY = 'SELECT * FROM %s.%s' % (DATASET_ID, TABLE_NAME) - dataset = Dataset(DATASET_NAME, Config.CLIENT) + dataset = Dataset(DATASET_ID, Config.CLIENT) retry_403(dataset.create)() self.to_delete.append(dataset) @@ -796,7 +796,7 @@ def _load_table_for_dml(self, rows, dataset_name, table_name): self._fetch_single_page(table) def test_sync_query_w_dml(self): - dataset_name = _make_dataset_name('dml_tests') + dataset_name = _make_dataset_id('dml_tests') table_name = 'test_table' self._load_table_for_dml([('Hello World',)], dataset_name, table_name) query_template = """UPDATE {}.{} @@ -812,7 +812,7 @@ def test_sync_query_w_dml(self): self.assertEqual(query.num_dml_affected_rows, 1) def test_dbapi_w_dml(self): - dataset_name = _make_dataset_name('dml_tests') + dataset_name = _make_dataset_id('dml_tests') table_name = 'test_table' self._load_table_for_dml([('Hello World',)], dataset_name, table_name) query_template = """UPDATE {}.{} @@ -1097,10 +1097,10 @@ def test_dbapi_w_query_parameters(self): def test_dump_table_w_public_data(self): PUBLIC = 'bigquery-public-data' - DATASET_NAME = 'samples' + DATASET_ID = 'samples' TABLE_NAME = 'natality' - dataset = Dataset(DATASET_NAME, Config.CLIENT, project=PUBLIC) + dataset = Dataset(DATASET_ID, Config.CLIENT, project=PUBLIC) table = dataset.table(TABLE_NAME) # Reload table to get the schema before fetching the rows. table.reload() @@ -1108,11 +1108,11 @@ def test_dump_table_w_public_data(self): def test_large_query_w_public_data(self): PUBLIC = 'bigquery-public-data' - DATASET_NAME = 'samples' + DATASET_ID = 'samples' TABLE_NAME = 'natality' LIMIT = 1000 SQL = 'SELECT * from `{}.{}.{}` LIMIT {}'.format( - PUBLIC, DATASET_NAME, TABLE_NAME, LIMIT) + PUBLIC, DATASET_ID, TABLE_NAME, LIMIT) query = Config.CLIENT.run_sync_query(SQL) query.use_legacy_sql = False @@ -1154,7 +1154,7 @@ def test_insert_nested_nested(self): ] table_name = 'test_table' dataset = Dataset( - _make_dataset_name('issue_2951'), Config.CLIENT) + _make_dataset_id('issue_2951'), Config.CLIENT) retry_403(dataset.create)() self.to_delete.append(dataset) @@ -1174,7 +1174,7 @@ def test_create_table_insert_fetch_nested_schema(self): table_name = 'test_table' dataset = Dataset( - _make_dataset_name('create_table_nested_schema'), Config.CLIENT) + _make_dataset_id('create_table_nested_schema'), Config.CLIENT) self.assertFalse(dataset.exists()) retry_403(dataset.create)() diff --git a/packages/google-cloud-bigquery/tests/unit/test_client.py b/packages/google-cloud-bigquery/tests/unit/test_client.py index 70e1f1eea7c7..fffffb9b2b25 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_client.py +++ b/packages/google-cloud-bigquery/tests/unit/test_client.py @@ -210,7 +210,7 @@ def test_list_datasets_defaults(self): self.assertEqual(len(datasets), len(DATA['datasets'])) for found, expected in zip(datasets, DATA['datasets']): self.assertIsInstance(found, Dataset) - self.assertEqual(found.dataset_id, expected['id']) + self.assertEqual(found.full_dataset_id, expected['id']) self.assertEqual(found.friendly_name, expected['friendlyName']) self.assertEqual(token, TOKEN) diff --git a/packages/google-cloud-bigquery/tests/unit/test_dataset.py b/packages/google-cloud-bigquery/tests/unit/test_dataset.py index 09fdbbe034ce..e1db93a973e4 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_dataset.py +++ b/packages/google-cloud-bigquery/tests/unit/test_dataset.py @@ -110,7 +110,7 @@ def test_table(self): class TestDataset(unittest.TestCase): PROJECT = 'project' - DS_NAME = 'dataset-name' + DS_ID = 'dataset-id' @staticmethod def _get_target_class(): @@ -129,7 +129,7 @@ def _setUpConstants(self): self.WHEN = datetime.datetime.utcfromtimestamp(self.WHEN_TS).replace( tzinfo=UTC) self.ETAG = 'ETAG' - self.DS_ID = '%s:%s' % (self.PROJECT, self.DS_NAME) + self.DS_FULL_ID = '%s:%s' % (self.PROJECT, self.DS_ID) self.RESOURCE_URL = 'http://example.com/path/to/resource' def _makeResource(self): @@ -139,9 +139,9 @@ def _makeResource(self): return { 'creationTime': self.WHEN_TS * 1000, 'datasetReference': - {'projectId': self.PROJECT, 'datasetId': self.DS_NAME}, + {'projectId': self.PROJECT, 'datasetId': self.DS_ID}, 'etag': self.ETAG, - 'id': self.DS_ID, + 'id': self.DS_FULL_ID, 'lastModifiedTime': self.WHEN_TS * 1000, 'location': 'US', 'selfLink': self.RESOURCE_URL, @@ -209,17 +209,17 @@ def _verify_resource_properties(self, dataset, resource): def test_ctor_defaults(self): client = _Client(self.PROJECT) - dataset = self._make_one(self.DS_NAME, client) - self.assertEqual(dataset.name, self.DS_NAME) + dataset = self._make_one(self.DS_ID, client) + self.assertEqual(dataset.dataset_id, self.DS_ID) self.assertIs(dataset._client, client) self.assertEqual(dataset.project, client.project) self.assertEqual( dataset.path, - '/projects/%s/datasets/%s' % (self.PROJECT, self.DS_NAME)) + '/projects/%s/datasets/%s' % (self.PROJECT, self.DS_ID)) self.assertEqual(dataset.access_entries, []) self.assertIsNone(dataset.created) - self.assertIsNone(dataset.dataset_id) + self.assertIsNone(dataset.full_dataset_id) self.assertIsNone(dataset.etag) self.assertIsNone(dataset.modified) self.assertIsNone(dataset.self_link) @@ -237,19 +237,19 @@ def test_ctor_explicit(self): entries = [phred, bharney] OTHER_PROJECT = 'foo-bar-123' client = _Client(self.PROJECT) - dataset = self._make_one(self.DS_NAME, client, + dataset = self._make_one(self.DS_ID, client, access_entries=entries, project=OTHER_PROJECT) - self.assertEqual(dataset.name, self.DS_NAME) + self.assertEqual(dataset.dataset_id, self.DS_ID) self.assertIs(dataset._client, client) self.assertEqual(dataset.project, OTHER_PROJECT) self.assertEqual( dataset.path, - '/projects/%s/datasets/%s' % (OTHER_PROJECT, self.DS_NAME)) + '/projects/%s/datasets/%s' % (OTHER_PROJECT, self.DS_ID)) self.assertEqual(dataset.access_entries, entries) self.assertIsNone(dataset.created) - self.assertIsNone(dataset.dataset_id) + self.assertIsNone(dataset.full_dataset_id) self.assertIsNone(dataset.etag) self.assertIsNone(dataset.modified) self.assertIsNone(dataset.self_link) @@ -261,7 +261,7 @@ def test_ctor_explicit(self): def test_access_entries_setter_non_list(self): client = _Client(self.PROJECT) - dataset = self._make_one(self.DS_NAME, client) + dataset = self._make_one(self.DS_ID, client) with self.assertRaises(TypeError): dataset.access_entries = object() @@ -269,7 +269,7 @@ def test_access_entries_setter_invalid_field(self): from google.cloud.bigquery.dataset import AccessEntry client = _Client(self.PROJECT) - dataset = self._make_one(self.DS_NAME, client) + dataset = self._make_one(self.DS_ID, client) phred = AccessEntry('OWNER', 'userByEmail', 'phred@example.com') with self.assertRaises(ValueError): dataset.access_entries = [phred, object()] @@ -278,7 +278,7 @@ def test_access_entries_setter(self): from google.cloud.bigquery.dataset import AccessEntry client = _Client(self.PROJECT) - dataset = self._make_one(self.DS_NAME, client) + dataset = self._make_one(self.DS_ID, client) phred = AccessEntry('OWNER', 'userByEmail', 'phred@example.com') bharney = AccessEntry('OWNER', 'userByEmail', 'bharney@example.com') dataset.access_entries = [phred, bharney] @@ -286,49 +286,49 @@ def test_access_entries_setter(self): def test_default_table_expiration_ms_setter_bad_value(self): client = _Client(self.PROJECT) - dataset = self._make_one(self.DS_NAME, client) + dataset = self._make_one(self.DS_ID, client) with self.assertRaises(ValueError): dataset.default_table_expiration_ms = 'bogus' def test_default_table_expiration_ms_setter(self): client = _Client(self.PROJECT) - dataset = self._make_one(self.DS_NAME, client) + dataset = self._make_one(self.DS_ID, client) dataset.default_table_expiration_ms = 12345 self.assertEqual(dataset.default_table_expiration_ms, 12345) def test_description_setter_bad_value(self): client = _Client(self.PROJECT) - dataset = self._make_one(self.DS_NAME, client) + dataset = self._make_one(self.DS_ID, client) with self.assertRaises(ValueError): dataset.description = 12345 def test_description_setter(self): client = _Client(self.PROJECT) - dataset = self._make_one(self.DS_NAME, client) + dataset = self._make_one(self.DS_ID, client) dataset.description = 'DESCRIPTION' self.assertEqual(dataset.description, 'DESCRIPTION') def test_friendly_name_setter_bad_value(self): client = _Client(self.PROJECT) - dataset = self._make_one(self.DS_NAME, client) + dataset = self._make_one(self.DS_ID, client) with self.assertRaises(ValueError): dataset.friendly_name = 12345 def test_friendly_name_setter(self): client = _Client(self.PROJECT) - dataset = self._make_one(self.DS_NAME, client) + dataset = self._make_one(self.DS_ID, client) dataset.friendly_name = 'FRIENDLY' self.assertEqual(dataset.friendly_name, 'FRIENDLY') def test_location_setter_bad_value(self): client = _Client(self.PROJECT) - dataset = self._make_one(self.DS_NAME, client) + dataset = self._make_one(self.DS_ID, client) with self.assertRaises(ValueError): dataset.location = 12345 def test_location_setter(self): client = _Client(self.PROJECT) - dataset = self._make_one(self.DS_NAME, client) + dataset = self._make_one(self.DS_ID, client) dataset.location = 'LOCATION' self.assertEqual(dataset.location, 'LOCATION') @@ -344,10 +344,10 @@ def test_from_api_repr_bare(self): self._setUpConstants() client = _Client(self.PROJECT) RESOURCE = { - 'id': '%s:%s' % (self.PROJECT, self.DS_NAME), + 'id': '%s:%s' % (self.PROJECT, self.DS_ID), 'datasetReference': { 'projectId': self.PROJECT, - 'datasetId': self.DS_NAME, + 'datasetId': self.DS_ID, } } klass = self._get_target_class() @@ -368,7 +368,7 @@ def test__parse_access_entries_w_unknown_entity_type(self): {'role': 'READER', 'unknown': 'UNKNOWN'}, ] client = _Client(self.PROJECT) - dataset = self._make_one(self.DS_NAME, client=client) + dataset = self._make_one(self.DS_ID, client=client) with self.assertRaises(ValueError): dataset._parse_access_entries(ACCESS) @@ -382,7 +382,7 @@ def test__parse_access_entries_w_extra_keys(self): }, ] client = _Client(self.PROJECT) - dataset = self._make_one(self.DS_NAME, client=client) + dataset = self._make_one(self.DS_ID, client=client) with self.assertRaises(ValueError): dataset._parse_access_entries(ACCESS) @@ -391,7 +391,7 @@ def test_create_w_bound_client(self): RESOURCE = self._makeResource() conn = _Connection(RESOURCE) client = _Client(project=self.PROJECT, connection=conn) - dataset = self._make_one(self.DS_NAME, client=client) + dataset = self._make_one(self.DS_ID, client=client) dataset.create() @@ -401,7 +401,7 @@ def test_create_w_bound_client(self): self.assertEqual(req['path'], '/%s' % PATH) SENT = { 'datasetReference': - {'projectId': self.PROJECT, 'datasetId': self.DS_NAME}, + {'projectId': self.PROJECT, 'datasetId': self.DS_ID}, } self.assertEqual(req['data'], SENT) self._verify_resource_properties(dataset, RESOURCE) @@ -421,7 +421,7 @@ def test_create_w_alternate_client(self): CLIENT1 = _Client(project=self.PROJECT, connection=conn1) conn2 = _Connection(RESOURCE) CLIENT2 = _Client(project=self.PROJECT, connection=conn2) - dataset = self._make_one(self.DS_NAME, client=CLIENT1) + dataset = self._make_one(self.DS_ID, client=CLIENT1) dataset.friendly_name = TITLE dataset.description = DESCRIPTION VIEW = { @@ -448,7 +448,7 @@ def test_create_w_alternate_client(self): SENT = { 'datasetReference': { 'projectId': self.PROJECT, - 'datasetId': self.DS_NAME, + 'datasetId': self.DS_ID, }, 'description': DESCRIPTION, 'friendlyName': TITLE, @@ -474,7 +474,7 @@ def test_create_w_missing_output_properties(self): self.WHEN = None conn = _Connection(RESOURCE) client = _Client(project=self.PROJECT, connection=conn) - dataset = self._make_one(self.DS_NAME, client=client) + dataset = self._make_one(self.DS_ID, client=client) dataset.create() @@ -484,16 +484,16 @@ def test_create_w_missing_output_properties(self): self.assertEqual(req['path'], '/%s' % PATH) SENT = { 'datasetReference': - {'projectId': self.PROJECT, 'datasetId': self.DS_NAME}, + {'projectId': self.PROJECT, 'datasetId': self.DS_ID}, } self.assertEqual(req['data'], SENT) self._verify_resource_properties(dataset, RESOURCE) def test_exists_miss_w_bound_client(self): - PATH = 'projects/%s/datasets/%s' % (self.PROJECT, self.DS_NAME) + PATH = 'projects/%s/datasets/%s' % (self.PROJECT, self.DS_ID) conn = _Connection() client = _Client(project=self.PROJECT, connection=conn) - dataset = self._make_one(self.DS_NAME, client=client) + dataset = self._make_one(self.DS_ID, client=client) self.assertFalse(dataset.exists()) @@ -504,12 +504,12 @@ def test_exists_miss_w_bound_client(self): self.assertEqual(req['query_params'], {'fields': 'id'}) def test_exists_hit_w_alternate_client(self): - PATH = 'projects/%s/datasets/%s' % (self.PROJECT, self.DS_NAME) + PATH = 'projects/%s/datasets/%s' % (self.PROJECT, self.DS_ID) conn1 = _Connection() CLIENT1 = _Client(project=self.PROJECT, connection=conn1) conn2 = _Connection({}) CLIENT2 = _Client(project=self.PROJECT, connection=conn2) - dataset = self._make_one(self.DS_NAME, client=CLIENT1) + dataset = self._make_one(self.DS_ID, client=CLIENT1) self.assertTrue(dataset.exists(client=CLIENT2)) @@ -521,11 +521,11 @@ def test_exists_hit_w_alternate_client(self): self.assertEqual(req['query_params'], {'fields': 'id'}) def test_reload_w_bound_client(self): - PATH = 'projects/%s/datasets/%s' % (self.PROJECT, self.DS_NAME) + PATH = 'projects/%s/datasets/%s' % (self.PROJECT, self.DS_ID) RESOURCE = self._makeResource() conn = _Connection(RESOURCE) client = _Client(project=self.PROJECT, connection=conn) - dataset = self._make_one(self.DS_NAME, client=client) + dataset = self._make_one(self.DS_ID, client=client) dataset.reload() @@ -536,13 +536,13 @@ def test_reload_w_bound_client(self): self._verify_resource_properties(dataset, RESOURCE) def test_reload_w_alternate_client(self): - PATH = 'projects/%s/datasets/%s' % (self.PROJECT, self.DS_NAME) + PATH = 'projects/%s/datasets/%s' % (self.PROJECT, self.DS_ID) RESOURCE = self._makeResource() conn1 = _Connection() CLIENT1 = _Client(project=self.PROJECT, connection=conn1) conn2 = _Connection(RESOURCE) CLIENT2 = _Client(project=self.PROJECT, connection=conn2) - dataset = self._make_one(self.DS_NAME, client=CLIENT1) + dataset = self._make_one(self.DS_ID, client=CLIENT1) dataset.reload(client=CLIENT2) @@ -557,13 +557,13 @@ def test_patch_w_invalid_expiration(self): RESOURCE = self._makeResource() conn = _Connection(RESOURCE) client = _Client(project=self.PROJECT, connection=conn) - dataset = self._make_one(self.DS_NAME, client=client) + dataset = self._make_one(self.DS_ID, client=client) with self.assertRaises(ValueError): dataset.patch(default_table_expiration_ms='BOGUS') def test_patch_w_bound_client(self): - PATH = 'projects/%s/datasets/%s' % (self.PROJECT, self.DS_NAME) + PATH = 'projects/%s/datasets/%s' % (self.PROJECT, self.DS_ID) DESCRIPTION = 'DESCRIPTION' TITLE = 'TITLE' RESOURCE = self._makeResource() @@ -571,7 +571,7 @@ def test_patch_w_bound_client(self): RESOURCE['friendlyName'] = TITLE conn = _Connection(RESOURCE) client = _Client(project=self.PROJECT, connection=conn) - dataset = self._make_one(self.DS_NAME, client=client) + dataset = self._make_one(self.DS_ID, client=client) dataset.patch(description=DESCRIPTION, friendly_name=TITLE) @@ -587,7 +587,7 @@ def test_patch_w_bound_client(self): self._verify_resource_properties(dataset, RESOURCE) def test_patch_w_alternate_client(self): - PATH = 'projects/%s/datasets/%s' % (self.PROJECT, self.DS_NAME) + PATH = 'projects/%s/datasets/%s' % (self.PROJECT, self.DS_ID) DEF_TABLE_EXP = 12345 LOCATION = 'EU' RESOURCE = self._makeResource() @@ -597,7 +597,7 @@ def test_patch_w_alternate_client(self): CLIENT1 = _Client(project=self.PROJECT, connection=conn1) conn2 = _Connection(RESOURCE) CLIENT2 = _Client(project=self.PROJECT, connection=conn2) - dataset = self._make_one(self.DS_NAME, client=CLIENT1) + dataset = self._make_one(self.DS_ID, client=CLIENT1) dataset.patch(client=CLIENT2, default_table_expiration_ms=DEF_TABLE_EXP, @@ -616,7 +616,7 @@ def test_patch_w_alternate_client(self): self._verify_resource_properties(dataset, RESOURCE) def test_update_w_bound_client(self): - PATH = 'projects/%s/datasets/%s' % (self.PROJECT, self.DS_NAME) + PATH = 'projects/%s/datasets/%s' % (self.PROJECT, self.DS_ID) DESCRIPTION = 'DESCRIPTION' TITLE = 'TITLE' RESOURCE = self._makeResource() @@ -624,7 +624,7 @@ def test_update_w_bound_client(self): RESOURCE['friendlyName'] = TITLE conn = _Connection(RESOURCE) client = _Client(project=self.PROJECT, connection=conn) - dataset = self._make_one(self.DS_NAME, client=client) + dataset = self._make_one(self.DS_ID, client=client) dataset.description = DESCRIPTION dataset.friendly_name = TITLE @@ -635,7 +635,7 @@ def test_update_w_bound_client(self): self.assertEqual(req['method'], 'PUT') SENT = { 'datasetReference': - {'projectId': self.PROJECT, 'datasetId': self.DS_NAME}, + {'projectId': self.PROJECT, 'datasetId': self.DS_ID}, 'description': DESCRIPTION, 'friendlyName': TITLE, } @@ -644,7 +644,7 @@ def test_update_w_bound_client(self): self._verify_resource_properties(dataset, RESOURCE) def test_update_w_alternate_client(self): - PATH = 'projects/%s/datasets/%s' % (self.PROJECT, self.DS_NAME) + PATH = 'projects/%s/datasets/%s' % (self.PROJECT, self.DS_ID) DEF_TABLE_EXP = 12345 LOCATION = 'EU' RESOURCE = self._makeResource() @@ -654,7 +654,7 @@ def test_update_w_alternate_client(self): CLIENT1 = _Client(project=self.PROJECT, connection=conn1) conn2 = _Connection(RESOURCE) CLIENT2 = _Client(project=self.PROJECT, connection=conn2) - dataset = self._make_one(self.DS_NAME, client=CLIENT1) + dataset = self._make_one(self.DS_ID, client=CLIENT1) dataset.default_table_expiration_ms = DEF_TABLE_EXP dataset.location = LOCATION @@ -667,7 +667,7 @@ def test_update_w_alternate_client(self): self.assertEqual(req['path'], '/%s' % PATH) SENT = { 'datasetReference': - {'projectId': self.PROJECT, 'datasetId': self.DS_NAME}, + {'projectId': self.PROJECT, 'datasetId': self.DS_ID}, 'defaultTableExpirationMs': 12345, 'location': 'EU', } @@ -675,10 +675,10 @@ def test_update_w_alternate_client(self): self._verify_resource_properties(dataset, RESOURCE) def test_delete_w_bound_client(self): - PATH = 'projects/%s/datasets/%s' % (self.PROJECT, self.DS_NAME) + PATH = 'projects/%s/datasets/%s' % (self.PROJECT, self.DS_ID) conn = _Connection({}) client = _Client(project=self.PROJECT, connection=conn) - dataset = self._make_one(self.DS_NAME, client=client) + dataset = self._make_one(self.DS_ID, client=client) dataset.delete() @@ -688,12 +688,12 @@ def test_delete_w_bound_client(self): self.assertEqual(req['path'], '/%s' % PATH) def test_delete_w_alternate_client(self): - PATH = 'projects/%s/datasets/%s' % (self.PROJECT, self.DS_NAME) + PATH = 'projects/%s/datasets/%s' % (self.PROJECT, self.DS_ID) conn1 = _Connection() CLIENT1 = _Client(project=self.PROJECT, connection=conn1) conn2 = _Connection({}) CLIENT2 = _Client(project=self.PROJECT, connection=conn2) - dataset = self._make_one(self.DS_NAME, client=CLIENT1) + dataset = self._make_one(self.DS_ID, client=CLIENT1) dataset.delete(client=CLIENT2) @@ -708,7 +708,7 @@ def test_list_tables_empty(self): conn = _Connection({}) client = _Client(project=self.PROJECT, connection=conn) - dataset = self._make_one(self.DS_NAME, client=client) + dataset = self._make_one(self.DS_ID, client=client) iterator = dataset.list_tables() self.assertIs(iterator.dataset, dataset) @@ -721,7 +721,7 @@ def test_list_tables_empty(self): self.assertEqual(len(conn._requested), 1) req = conn._requested[0] self.assertEqual(req['method'], 'GET') - PATH = 'projects/%s/datasets/%s/tables' % (self.PROJECT, self.DS_NAME) + PATH = 'projects/%s/datasets/%s/tables' % (self.PROJECT, self.DS_ID) self.assertEqual(req['path'], '/%s' % PATH) def test_list_tables_defaults(self): @@ -730,21 +730,21 @@ def test_list_tables_defaults(self): TABLE_1 = 'table_one' TABLE_2 = 'table_two' - PATH = 'projects/%s/datasets/%s/tables' % (self.PROJECT, self.DS_NAME) + PATH = 'projects/%s/datasets/%s/tables' % (self.PROJECT, self.DS_ID) TOKEN = 'TOKEN' DATA = { 'nextPageToken': TOKEN, 'tables': [ {'kind': 'bigquery#table', - 'id': '%s:%s.%s' % (self.PROJECT, self.DS_NAME, TABLE_1), + 'id': '%s:%s.%s' % (self.PROJECT, self.DS_ID, TABLE_1), 'tableReference': {'tableId': TABLE_1, - 'datasetId': self.DS_NAME, + 'datasetId': self.DS_ID, 'projectId': self.PROJECT}, 'type': 'TABLE'}, {'kind': 'bigquery#table', - 'id': '%s:%s.%s' % (self.PROJECT, self.DS_NAME, TABLE_2), + 'id': '%s:%s.%s' % (self.PROJECT, self.DS_ID, TABLE_2), 'tableReference': {'tableId': TABLE_2, - 'datasetId': self.DS_NAME, + 'datasetId': self.DS_ID, 'projectId': self.PROJECT}, 'type': 'TABLE'}, ] @@ -752,7 +752,7 @@ def test_list_tables_defaults(self): conn = _Connection(DATA) client = _Client(project=self.PROJECT, connection=conn) - dataset = self._make_one(self.DS_NAME, client=client) + dataset = self._make_one(self.DS_ID, client=client) iterator = dataset.list_tables() self.assertIs(iterator.dataset, dataset) @@ -778,20 +778,20 @@ def test_list_tables_explicit(self): TABLE_1 = 'table_one' TABLE_2 = 'table_two' - PATH = 'projects/%s/datasets/%s/tables' % (self.PROJECT, self.DS_NAME) + PATH = 'projects/%s/datasets/%s/tables' % (self.PROJECT, self.DS_ID) TOKEN = 'TOKEN' DATA = { 'tables': [ {'kind': 'bigquery#dataset', - 'id': '%s:%s.%s' % (self.PROJECT, self.DS_NAME, TABLE_1), + 'id': '%s:%s.%s' % (self.PROJECT, self.DS_ID, TABLE_1), 'tableReference': {'tableId': TABLE_1, - 'datasetId': self.DS_NAME, + 'datasetId': self.DS_ID, 'projectId': self.PROJECT}, 'type': 'TABLE'}, {'kind': 'bigquery#dataset', - 'id': '%s:%s.%s' % (self.PROJECT, self.DS_NAME, TABLE_2), + 'id': '%s:%s.%s' % (self.PROJECT, self.DS_ID, TABLE_2), 'tableReference': {'tableId': TABLE_2, - 'datasetId': self.DS_NAME, + 'datasetId': self.DS_ID, 'projectId': self.PROJECT}, 'type': 'TABLE'}, ] @@ -799,7 +799,7 @@ def test_list_tables_explicit(self): conn = _Connection(DATA) client = _Client(project=self.PROJECT, connection=conn) - dataset = self._make_one(self.DS_NAME, client=client) + dataset = self._make_one(self.DS_ID, client=client) iterator = dataset.list_tables(max_results=3, page_token=TOKEN) self.assertIs(iterator.dataset, dataset) @@ -826,7 +826,7 @@ def test_table_wo_schema(self): conn = _Connection({}) client = _Client(project=self.PROJECT, connection=conn) - dataset = self._make_one(self.DS_NAME, client=client) + dataset = self._make_one(self.DS_ID, client=client) table = dataset.table('table_name') self.assertIsInstance(table, Table) self.assertEqual(table.name, 'table_name') @@ -839,7 +839,7 @@ def test_table_w_schema(self): conn = _Connection({}) client = _Client(project=self.PROJECT, connection=conn) - dataset = self._make_one(self.DS_NAME, client=client) + dataset = self._make_one(self.DS_ID, client=client) full_name = SchemaField('full_name', 'STRING', mode='REQUIRED') age = SchemaField('age', 'INTEGER', mode='REQUIRED') table = dataset.table('table_name', schema=[full_name, age]) diff --git a/packages/google-cloud-bigquery/tests/unit/test_job.py b/packages/google-cloud-bigquery/tests/unit/test_job.py index a4b96470c2e7..09b57d7b7457 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_job.py +++ b/packages/google-cloud-bigquery/tests/unit/test_job.py @@ -82,7 +82,7 @@ def test_missing_reason(self): class _Base(object): PROJECT = 'project' SOURCE1 = 'http://example.com/source1.csv' - DS_NAME = 'datset_name' + DS_ID = 'datset_id' TABLE_NAME = 'table_name' JOB_NAME = 'job_name' @@ -206,7 +206,7 @@ def _makeResource(self, started=False, ended=False): config['sourceUris'] = [self.SOURCE1] config['destinationTable'] = { 'projectId': self.PROJECT, - 'datasetId': self.DS_NAME, + 'datasetId': self.DS_ID, 'tableId': self.TABLE_NAME, } @@ -275,7 +275,7 @@ def _verifyResourceProperties(self, job, resource): table_ref = config['destinationTable'] self.assertEqual(job.destination.project, table_ref['projectId']) - self.assertEqual(job.destination.dataset_name, table_ref['datasetId']) + self.assertEqual(job.destination.dataset_id, table_ref['datasetId']) self.assertEqual(job.destination.name, table_ref['tableId']) if 'fieldDelimiter' in config: @@ -519,7 +519,7 @@ def test_from_api_repr_missing_config(self): self._setUpConstants() client = _Client(self.PROJECT) RESOURCE = { - 'id': '%s:%s' % (self.PROJECT, self.DS_NAME), + 'id': '%s:%s' % (self.PROJECT, self.DS_ID), 'jobReference': { 'projectId': self.PROJECT, 'jobId': self.JOB_NAME, @@ -543,7 +543,7 @@ def test_from_api_repr_bare(self): 'sourceUris': [self.SOURCE1], 'destinationTable': { 'projectId': self.PROJECT, - 'datasetId': self.DS_NAME, + 'datasetId': self.DS_ID, 'tableId': self.TABLE_NAME, }, } @@ -603,7 +603,7 @@ def test_begin_w_bound_client(self): 'sourceUris': [self.SOURCE1], 'destinationTable': { 'projectId': self.PROJECT, - 'datasetId': self.DS_NAME, + 'datasetId': self.DS_ID, 'tableId': self.TABLE_NAME, }, }, @@ -638,7 +638,7 @@ def test_begin_w_autodetect(self): 'sourceUris': [self.SOURCE1], 'destinationTable': { 'projectId': self.PROJECT, - 'datasetId': self.DS_NAME, + 'datasetId': self.DS_ID, 'tableId': self.TABLE_NAME, }, 'autodetect': True @@ -662,7 +662,7 @@ def test_begin_w_alternate_client(self): 'sourceUris': [self.SOURCE1], 'destinationTable': { 'projectId': self.PROJECT, - 'datasetId': self.DS_NAME, + 'datasetId': self.DS_ID, 'tableId': self.TABLE_NAME, }, 'allowJaggedRows': True, @@ -848,12 +848,12 @@ def _makeResource(self, started=False, ended=False): config = resource['configuration']['copy'] config['sourceTables'] = [{ 'projectId': self.PROJECT, - 'datasetId': self.DS_NAME, + 'datasetId': self.DS_ID, 'tableId': self.SOURCE_TABLE, }] config['destinationTable'] = { 'projectId': self.PROJECT, - 'datasetId': self.DS_NAME, + 'datasetId': self.DS_ID, 'tableId': self.DESTINATION_TABLE, } @@ -866,7 +866,7 @@ def _verifyResourceProperties(self, job, resource): table_ref = config['destinationTable'] self.assertEqual(job.destination.project, table_ref['projectId']) - self.assertEqual(job.destination.dataset_name, table_ref['datasetId']) + self.assertEqual(job.destination.dataset_id, table_ref['datasetId']) self.assertEqual(job.destination.name, table_ref['tableId']) sources = config.get('sourceTables') @@ -875,7 +875,7 @@ def _verifyResourceProperties(self, job, resource): self.assertEqual(len(sources), len(job.sources)) for table_ref, table in zip(sources, job.sources): self.assertEqual(table.project, table_ref['projectId']) - self.assertEqual(table.dataset_name, table_ref['datasetId']) + self.assertEqual(table.dataset_id, table_ref['datasetId']) self.assertEqual(table.name, table_ref['tableId']) if 'createDisposition' in config: @@ -921,7 +921,7 @@ def test_from_api_repr_missing_config(self): self._setUpConstants() client = _Client(self.PROJECT) RESOURCE = { - 'id': '%s:%s' % (self.PROJECT, self.DS_NAME), + 'id': '%s:%s' % (self.PROJECT, self.DS_ID), 'jobReference': { 'projectId': self.PROJECT, 'jobId': self.JOB_NAME, @@ -944,12 +944,12 @@ def test_from_api_repr_bare(self): 'copy': { 'sourceTables': [{ 'projectId': self.PROJECT, - 'datasetId': self.DS_NAME, + 'datasetId': self.DS_ID, 'tableId': self.SOURCE_TABLE, }], 'destinationTable': { 'projectId': self.PROJECT, - 'datasetId': self.DS_NAME, + 'datasetId': self.DS_ID, 'tableId': self.DESTINATION_TABLE, }, } @@ -973,12 +973,12 @@ def test_from_api_repr_w_sourcetable(self): 'copy': { 'sourceTable': { 'projectId': self.PROJECT, - 'datasetId': self.DS_NAME, + 'datasetId': self.DS_ID, 'tableId': self.SOURCE_TABLE, }, 'destinationTable': { 'projectId': self.PROJECT, - 'datasetId': self.DS_NAME, + 'datasetId': self.DS_ID, 'tableId': self.DESTINATION_TABLE, }, } @@ -1002,7 +1002,7 @@ def test_from_api_repr_wo_sources(self): 'copy': { 'destinationTable': { 'projectId': self.PROJECT, - 'datasetId': self.DS_NAME, + 'datasetId': self.DS_ID, 'tableId': self.DESTINATION_TABLE, }, } @@ -1051,12 +1051,12 @@ def test_begin_w_bound_client(self): 'copy': { 'sourceTables': [{ 'projectId': self.PROJECT, - 'datasetId': self.DS_NAME, + 'datasetId': self.DS_ID, 'tableId': self.SOURCE_TABLE }], 'destinationTable': { 'projectId': self.PROJECT, - 'datasetId': self.DS_NAME, + 'datasetId': self.DS_ID, 'tableId': self.DESTINATION_TABLE, }, }, @@ -1071,12 +1071,12 @@ def test_begin_w_alternate_client(self): COPY_CONFIGURATION = { 'sourceTables': [{ 'projectId': self.PROJECT, - 'datasetId': self.DS_NAME, + 'datasetId': self.DS_ID, 'tableId': self.SOURCE_TABLE, }], 'destinationTable': { 'projectId': self.PROJECT, - 'datasetId': self.DS_NAME, + 'datasetId': self.DS_ID, 'tableId': self.DESTINATION_TABLE, }, 'createDisposition': 'CREATE_NEVER', @@ -1203,7 +1203,7 @@ def _makeResource(self, started=False, ended=False): config = resource['configuration']['extract'] config['sourceTable'] = { 'projectId': self.PROJECT, - 'datasetId': self.DS_NAME, + 'datasetId': self.DS_ID, 'tableId': self.SOURCE_TABLE, } config['destinationUris'] = [self.DESTINATION_URI] @@ -1218,7 +1218,7 @@ def _verifyResourceProperties(self, job, resource): table_ref = config['sourceTable'] self.assertEqual(job.source.project, table_ref['projectId']) - self.assertEqual(job.source.dataset_name, table_ref['datasetId']) + self.assertEqual(job.source.dataset_id, table_ref['datasetId']) self.assertEqual(job.source.name, table_ref['tableId']) if 'compression' in config: @@ -1295,7 +1295,7 @@ def test_from_api_repr_missing_config(self): self._setUpConstants() client = _Client(self.PROJECT) RESOURCE = { - 'id': '%s:%s' % (self.PROJECT, self.DS_NAME), + 'id': '%s:%s' % (self.PROJECT, self.DS_ID), 'jobReference': { 'projectId': self.PROJECT, 'jobId': self.JOB_NAME, @@ -1318,7 +1318,7 @@ def test_from_api_repr_bare(self): 'extract': { 'sourceTable': { 'projectId': self.PROJECT, - 'datasetId': self.DS_NAME, + 'datasetId': self.DS_ID, 'tableId': self.SOURCE_TABLE, }, 'destinationUris': [self.DESTINATION_URI], @@ -1369,7 +1369,7 @@ def test_begin_w_bound_client(self): 'extract': { 'sourceTable': { 'projectId': self.PROJECT, - 'datasetId': self.DS_NAME, + 'datasetId': self.DS_ID, 'tableId': self.SOURCE_TABLE }, 'destinationUris': [self.DESTINATION_URI], @@ -1385,7 +1385,7 @@ def test_begin_w_alternate_client(self): EXTRACT_CONFIGURATION = { 'sourceTable': { 'projectId': self.PROJECT, - 'datasetId': self.DS_NAME, + 'datasetId': self.DS_ID, 'tableId': self.SOURCE_TABLE, }, 'destinationUris': [self.DESTINATION_URI], @@ -1604,7 +1604,7 @@ def _verifyResourceProperties(self, job, resource): dataset = job.default_dataset ds_ref = { 'projectId': dataset.project, - 'datasetId': dataset.name, + 'datasetId': dataset.dataset_id, } self.assertEqual(ds_ref, query_config['defaultDataset']) else: @@ -1613,7 +1613,7 @@ def _verifyResourceProperties(self, job, resource): table = job.destination tb_ref = { 'projectId': table.project, - 'datasetId': table.dataset_name, + 'datasetId': table.dataset_id, 'tableId': table.name } self.assertEqual(tb_ref, query_config['destinationTable']) @@ -1687,7 +1687,7 @@ def test_from_api_repr_missing_config(self): self._setUpConstants() client = _Client(self.PROJECT) RESOURCE = { - 'id': '%s:%s' % (self.PROJECT, self.DS_NAME), + 'id': '%s:%s' % (self.PROJECT, self.DS_ID), 'jobReference': { 'projectId': self.PROJECT, 'jobId': self.JOB_NAME, @@ -1723,7 +1723,7 @@ def test_from_api_repr_w_properties(self): query_config['writeDisposition'] = 'WRITE_TRUNCATE' query_config['destinationTable'] = { 'projectId': self.PROJECT, - 'datasetId': self.DS_NAME, + 'datasetId': self.DS_ID, 'tableId': self.DESTINATION_TABLE, } klass = self._get_target_class() @@ -1936,21 +1936,21 @@ def test_referenced_tables(self): self.assertIsInstance(local1, Table) self.assertEqual(local1.name, 'local1') self.assertIsInstance(local1._dataset, Dataset) - self.assertEqual(local1.dataset_name, 'dataset') + self.assertEqual(local1.dataset_id, 'dataset') self.assertEqual(local1.project, self.PROJECT) self.assertIs(local1._dataset._client, client) self.assertIsInstance(local2, Table) self.assertEqual(local2.name, 'local2') self.assertIsInstance(local2._dataset, Dataset) - self.assertEqual(local2.dataset_name, 'dataset') + self.assertEqual(local2.dataset_id, 'dataset') self.assertEqual(local2.project, self.PROJECT) self.assertIs(local2._dataset._client, client) self.assertIsInstance(remote, Table) self.assertEqual(remote.name, 'other-table') self.assertIsInstance(remote._dataset, Dataset) - self.assertEqual(remote.dataset_name, 'other-dataset') + self.assertEqual(remote.dataset_id, 'other-dataset') self.assertEqual(remote.project, 'other-project-123') self.assertIs(remote._dataset._client, client) @@ -2128,7 +2128,7 @@ def test_begin_w_bound_client(self): from google.cloud.bigquery.dataset import Dataset PATH = '/projects/%s/jobs' % (self.PROJECT,) - DS_NAME = 'DATASET' + DS_ID = 'DATASET' RESOURCE = self._makeResource() # Ensure None for missing server-set props del RESOURCE['statistics']['creationTime'] @@ -2139,7 +2139,7 @@ def test_begin_w_bound_client(self): client = _Client(project=self.PROJECT, connection=conn) job = self._make_one(self.JOB_NAME, self.QUERY, client) - job.default_dataset = Dataset(DS_NAME, client) + job.default_dataset = Dataset(DS_ID, client) job.begin() @@ -2159,7 +2159,7 @@ def test_begin_w_bound_client(self): 'query': self.QUERY, 'defaultDataset': { 'projectId': self.PROJECT, - 'datasetId': DS_NAME, + 'datasetId': DS_ID, }, }, }, @@ -2173,7 +2173,7 @@ def test_begin_w_alternate_client(self): PATH = '/projects/%s/jobs' % (self.PROJECT,) TABLE = 'TABLE' - DS_NAME = 'DATASET' + DS_ID = 'DATASET' RESOURCE = self._makeResource(ended=True) QUERY_CONFIGURATION = { 'query': self.QUERY, @@ -2181,11 +2181,11 @@ def test_begin_w_alternate_client(self): 'createDisposition': 'CREATE_NEVER', 'defaultDataset': { 'projectId': self.PROJECT, - 'datasetId': DS_NAME, + 'datasetId': DS_ID, }, 'destinationTable': { 'projectId': self.PROJECT, - 'datasetId': DS_NAME, + 'datasetId': DS_ID, 'tableId': TABLE, }, 'flattenResults': True, @@ -2203,7 +2203,7 @@ def test_begin_w_alternate_client(self): client2 = _Client(project=self.PROJECT, connection=conn2) job = self._make_one(self.JOB_NAME, self.QUERY, client1) - dataset = Dataset(DS_NAME, client1) + dataset = Dataset(DS_ID, client1) table = Table(TABLE, dataset) job.allow_large_results = True @@ -2464,14 +2464,14 @@ def test_reload_w_bound_client(self): from google.cloud.bigquery.dataset import Table PATH = '/projects/%s/jobs/%s' % (self.PROJECT, self.JOB_NAME) - DS_NAME = 'DATASET' + DS_ID = 'DATASET' DEST_TABLE = 'dest_table' RESOURCE = self._makeResource() conn = _Connection(RESOURCE) client = _Client(project=self.PROJECT, connection=conn) job = self._make_one(self.JOB_NAME, None, client) - dataset = Dataset(DS_NAME, client) + dataset = Dataset(DS_ID, client) table = Table(DEST_TABLE, dataset) job.destination = table @@ -2487,13 +2487,13 @@ def test_reload_w_bound_client(self): def test_reload_w_alternate_client(self): PATH = '/projects/%s/jobs/%s' % (self.PROJECT, self.JOB_NAME) - DS_NAME = 'DATASET' + DS_ID = 'DATASET' DEST_TABLE = 'dest_table' RESOURCE = self._makeResource() q_config = RESOURCE['configuration']['query'] q_config['destinationTable'] = { 'projectId': self.PROJECT, - 'datasetId': DS_NAME, + 'datasetId': DS_ID, 'tableId': DEST_TABLE, } conn1 = _Connection() @@ -2720,8 +2720,8 @@ def project(self): return TestLoadJob.PROJECT @property - def dataset_name(self): - return TestLoadJob.DS_NAME + def dataset_id(self): + return TestLoadJob.DS_ID class _Connection(object): diff --git a/packages/google-cloud-bigquery/tests/unit/test_table.py b/packages/google-cloud-bigquery/tests/unit/test_table.py index 9e0db94bc6cb..cb481eac1932 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_table.py +++ b/packages/google-cloud-bigquery/tests/unit/test_table.py @@ -60,7 +60,7 @@ def test_ctor_defaults(self): class TestTable(unittest.TestCase, _SchemaBase): PROJECT = 'prahj-ekt' - DS_NAME = 'dataset-name' + DS_ID = 'dataset-name' TABLE_NAME = 'table-name' @staticmethod @@ -81,7 +81,7 @@ def _setUpConstants(self): tzinfo=UTC) self.ETAG = 'ETAG' self.TABLE_ID = '%s:%s:%s' % ( - self.PROJECT, self.DS_NAME, self.TABLE_NAME) + self.PROJECT, self.DS_ID, self.TABLE_NAME) self.RESOURCE_URL = 'http://example.com/path/to/resource' self.NUM_BYTES = 12345 self.NUM_ROWS = 67 @@ -92,7 +92,7 @@ def _makeResource(self): 'creationTime': self.WHEN_TS * 1000, 'tableReference': {'projectId': self.PROJECT, - 'datasetId': self.DS_NAME, + 'datasetId': self.DS_ID, 'tableId': self.TABLE_NAME}, 'schema': {'fields': [ {'name': 'full_name', 'type': 'STRING', 'mode': 'REQUIRED'}, @@ -171,11 +171,11 @@ def test_ctor(self): self.assertEqual(table.name, self.TABLE_NAME) self.assertIs(table._dataset, dataset) self.assertEqual(table.project, self.PROJECT) - self.assertEqual(table.dataset_name, self.DS_NAME) + self.assertEqual(table.dataset_id, self.DS_ID) self.assertEqual( table.path, '/projects/%s/datasets/%s/tables/%s' % ( - self.PROJECT, self.DS_NAME, self.TABLE_NAME)) + self.PROJECT, self.DS_ID, self.TABLE_NAME)) self.assertEqual(table.schema, []) self.assertIsNone(table.created) @@ -285,9 +285,9 @@ def test_props_set_by_server(self): CREATED = datetime.datetime(2015, 7, 29, 12, 13, 22, tzinfo=UTC) MODIFIED = datetime.datetime(2015, 7, 29, 14, 47, 15, tzinfo=UTC) TABLE_ID = '%s:%s:%s' % ( - self.PROJECT, self.DS_NAME, self.TABLE_NAME) + self.PROJECT, self.DS_ID, self.TABLE_NAME) URL = 'http://example.com/projects/%s/datasets/%s/tables/%s' % ( - self.PROJECT, self.DS_NAME, self.TABLE_NAME) + self.PROJECT, self.DS_ID, self.TABLE_NAME) client = _Client(self.PROJECT) dataset = _Dataset(client) table = self._make_one(self.TABLE_NAME, dataset) @@ -421,10 +421,10 @@ def test_from_api_repr_bare(self): client = _Client(self.PROJECT) dataset = _Dataset(client) RESOURCE = { - 'id': '%s:%s:%s' % (self.PROJECT, self.DS_NAME, self.TABLE_NAME), + 'id': '%s:%s:%s' % (self.PROJECT, self.DS_ID, self.TABLE_NAME), 'tableReference': { 'projectId': self.PROJECT, - 'datasetId': self.DS_NAME, + 'datasetId': self.DS_ID, 'tableId': self.TABLE_NAME, }, 'type': 'TABLE', @@ -445,7 +445,7 @@ def test_from_api_repr_w_properties(self): self._verifyResourceProperties(table, RESOURCE) def test_create_new_day_partitioned_table(self): - PATH = 'projects/%s/datasets/%s/tables' % (self.PROJECT, self.DS_NAME) + PATH = 'projects/%s/datasets/%s/tables' % (self.PROJECT, self.DS_ID) RESOURCE = self._makeResource() conn = _Connection(RESOURCE) client = _Client(project=self.PROJECT, connection=conn) @@ -461,7 +461,7 @@ def test_create_new_day_partitioned_table(self): SENT = { 'tableReference': { 'projectId': self.PROJECT, - 'datasetId': self.DS_NAME, + 'datasetId': self.DS_ID, 'tableId': self.TABLE_NAME}, 'timePartitioning': {'type': 'DAY'}, } @@ -471,7 +471,7 @@ def test_create_new_day_partitioned_table(self): def test_create_w_bound_client(self): from google.cloud.bigquery.table import SchemaField - PATH = 'projects/%s/datasets/%s/tables' % (self.PROJECT, self.DS_NAME) + PATH = 'projects/%s/datasets/%s/tables' % (self.PROJECT, self.DS_ID) RESOURCE = self._makeResource() conn = _Connection(RESOURCE) client = _Client(project=self.PROJECT, connection=conn) @@ -490,7 +490,7 @@ def test_create_w_bound_client(self): SENT = { 'tableReference': { 'projectId': self.PROJECT, - 'datasetId': self.DS_NAME, + 'datasetId': self.DS_ID, 'tableId': self.TABLE_NAME}, 'schema': {'fields': [ {'name': 'full_name', 'type': 'STRING', 'mode': 'REQUIRED'}, @@ -502,7 +502,7 @@ def test_create_w_bound_client(self): def test_create_w_partition_no_expire(self): from google.cloud.bigquery.table import SchemaField - PATH = 'projects/%s/datasets/%s/tables' % (self.PROJECT, self.DS_NAME) + PATH = 'projects/%s/datasets/%s/tables' % (self.PROJECT, self.DS_ID) RESOURCE = self._makeResource() conn = _Connection(RESOURCE) client = _Client(project=self.PROJECT, connection=conn) @@ -524,7 +524,7 @@ def test_create_w_partition_no_expire(self): SENT = { 'tableReference': { 'projectId': self.PROJECT, - 'datasetId': self.DS_NAME, + 'datasetId': self.DS_ID, 'tableId': self.TABLE_NAME}, 'timePartitioning': {'type': 'DAY'}, 'schema': {'fields': [ @@ -537,7 +537,7 @@ def test_create_w_partition_no_expire(self): def test_create_w_partition_and_expire(self): from google.cloud.bigquery.table import SchemaField - PATH = 'projects/%s/datasets/%s/tables' % (self.PROJECT, self.DS_NAME) + PATH = 'projects/%s/datasets/%s/tables' % (self.PROJECT, self.DS_ID) RESOURCE = self._makeResource() conn = _Connection(RESOURCE) client = _Client(project=self.PROJECT, connection=conn) @@ -559,7 +559,7 @@ def test_create_w_partition_and_expire(self): SENT = { 'tableReference': { 'projectId': self.PROJECT, - 'datasetId': self.DS_NAME, + 'datasetId': self.DS_ID, 'tableId': self.TABLE_NAME}, 'timePartitioning': {'type': 'DAY', 'expirationMs': 100}, 'schema': {'fields': [ @@ -712,7 +712,7 @@ def test_create_w_alternate_client(self): from google.cloud._helpers import UTC from google.cloud._helpers import _millis - PATH = 'projects/%s/datasets/%s/tables' % (self.PROJECT, self.DS_NAME) + PATH = 'projects/%s/datasets/%s/tables' % (self.PROJECT, self.DS_ID) DESCRIPTION = 'DESCRIPTION' TITLE = 'TITLE' QUERY = 'select fullname, age from person_ages' @@ -745,7 +745,7 @@ def test_create_w_alternate_client(self): SENT = { 'tableReference': { 'projectId': self.PROJECT, - 'datasetId': self.DS_NAME, + 'datasetId': self.DS_ID, 'tableId': self.TABLE_NAME}, 'description': DESCRIPTION, 'friendlyName': TITLE, @@ -759,7 +759,7 @@ def test_create_w_missing_output_properties(self): # lacks 'creationTime' / 'lastModifiedTime' from google.cloud.bigquery.table import SchemaField - PATH = 'projects/%s/datasets/%s/tables' % (self.PROJECT, self.DS_NAME) + PATH = 'projects/%s/datasets/%s/tables' % (self.PROJECT, self.DS_ID) RESOURCE = self._makeResource() del RESOURCE['creationTime'] del RESOURCE['lastModifiedTime'] @@ -781,7 +781,7 @@ def test_create_w_missing_output_properties(self): SENT = { 'tableReference': { 'projectId': self.PROJECT, - 'datasetId': self.DS_NAME, + 'datasetId': self.DS_ID, 'tableId': self.TABLE_NAME}, 'schema': {'fields': [ {'name': 'full_name', 'type': 'STRING', 'mode': 'REQUIRED'}, @@ -792,7 +792,7 @@ def test_create_w_missing_output_properties(self): def test_exists_miss_w_bound_client(self): PATH = 'projects/%s/datasets/%s/tables/%s' % ( - self.PROJECT, self.DS_NAME, self.TABLE_NAME) + self.PROJECT, self.DS_ID, self.TABLE_NAME) conn = _Connection() client = _Client(project=self.PROJECT, connection=conn) dataset = _Dataset(client) @@ -808,7 +808,7 @@ def test_exists_miss_w_bound_client(self): def test_exists_hit_w_alternate_client(self): PATH = 'projects/%s/datasets/%s/tables/%s' % ( - self.PROJECT, self.DS_NAME, self.TABLE_NAME) + self.PROJECT, self.DS_ID, self.TABLE_NAME) conn1 = _Connection() client1 = _Client(project=self.PROJECT, connection=conn1) conn2 = _Connection({}) @@ -827,7 +827,7 @@ def test_exists_hit_w_alternate_client(self): def test_reload_w_bound_client(self): PATH = 'projects/%s/datasets/%s/tables/%s' % ( - self.PROJECT, self.DS_NAME, self.TABLE_NAME) + self.PROJECT, self.DS_ID, self.TABLE_NAME) RESOURCE = self._makeResource() conn = _Connection(RESOURCE) client = _Client(project=self.PROJECT, connection=conn) @@ -844,7 +844,7 @@ def test_reload_w_bound_client(self): def test_reload_w_alternate_client(self): PATH = 'projects/%s/datasets/%s/tables/%s' % ( - self.PROJECT, self.DS_NAME, self.TABLE_NAME) + self.PROJECT, self.DS_ID, self.TABLE_NAME) RESOURCE = self._makeResource() conn1 = _Connection() client1 = _Client(project=self.PROJECT, connection=conn1) @@ -874,7 +874,7 @@ def test_patch_w_invalid_expiration(self): def test_patch_w_bound_client(self): PATH = 'projects/%s/datasets/%s/tables/%s' % ( - self.PROJECT, self.DS_NAME, self.TABLE_NAME) + self.PROJECT, self.DS_ID, self.TABLE_NAME) DESCRIPTION = 'DESCRIPTION' TITLE = 'TITLE' RESOURCE = self._makeResource() @@ -908,7 +908,7 @@ def test_patch_w_alternate_client(self): from google.cloud.bigquery.table import SchemaField PATH = 'projects/%s/datasets/%s/tables/%s' % ( - self.PROJECT, self.DS_NAME, self.TABLE_NAME) + self.PROJECT, self.DS_ID, self.TABLE_NAME) QUERY = 'select fullname, age from person_ages' LOCATION = 'EU' RESOURCE = self._makeResource() @@ -950,7 +950,7 @@ def test_patch_w_schema_None(self): # Simulate deleting schema: not sure if back-end will actually # allow this operation, but the spec says it is optional. PATH = 'projects/%s/datasets/%s/tables/%s' % ( - self.PROJECT, self.DS_NAME, self.TABLE_NAME) + self.PROJECT, self.DS_ID, self.TABLE_NAME) DESCRIPTION = 'DESCRIPTION' TITLE = 'TITLE' RESOURCE = self._makeResource() @@ -975,7 +975,7 @@ def test_update_w_bound_client(self): from google.cloud.bigquery.table import SchemaField PATH = 'projects/%s/datasets/%s/tables/%s' % ( - self.PROJECT, self.DS_NAME, self.TABLE_NAME) + self.PROJECT, self.DS_ID, self.TABLE_NAME) DESCRIPTION = 'DESCRIPTION' TITLE = 'TITLE' RESOURCE = self._makeResource() @@ -999,7 +999,7 @@ def test_update_w_bound_client(self): SENT = { 'tableReference': {'projectId': self.PROJECT, - 'datasetId': self.DS_NAME, + 'datasetId': self.DS_ID, 'tableId': self.TABLE_NAME}, 'schema': {'fields': [ {'name': 'full_name', 'type': 'STRING', 'mode': 'REQUIRED'}, @@ -1017,7 +1017,7 @@ def test_update_w_alternate_client(self): from google.cloud._helpers import _millis PATH = 'projects/%s/datasets/%s/tables/%s' % ( - self.PROJECT, self.DS_NAME, self.TABLE_NAME) + self.PROJECT, self.DS_ID, self.TABLE_NAME) DEF_TABLE_EXP = 12345 LOCATION = 'EU' QUERY = 'select fullname, age from person_ages' @@ -1051,7 +1051,7 @@ def test_update_w_alternate_client(self): SENT = { 'tableReference': {'projectId': self.PROJECT, - 'datasetId': self.DS_NAME, + 'datasetId': self.DS_ID, 'tableId': self.TABLE_NAME}, 'expirationTime': _millis(self.EXP_TIME), 'location': 'EU', @@ -1062,7 +1062,7 @@ def test_update_w_alternate_client(self): def test_delete_w_bound_client(self): PATH = 'projects/%s/datasets/%s/tables/%s' % ( - self.PROJECT, self.DS_NAME, self.TABLE_NAME) + self.PROJECT, self.DS_ID, self.TABLE_NAME) conn = _Connection({}) client = _Client(project=self.PROJECT, connection=conn) dataset = _Dataset(client) @@ -1077,7 +1077,7 @@ def test_delete_w_bound_client(self): def test_delete_w_alternate_client(self): PATH = 'projects/%s/datasets/%s/tables/%s' % ( - self.PROJECT, self.DS_NAME, self.TABLE_NAME) + self.PROJECT, self.DS_ID, self.TABLE_NAME) conn1 = _Connection() client1 = _Client(project=self.PROJECT, connection=conn1) conn2 = _Connection({}) @@ -1112,7 +1112,7 @@ def test_fetch_data_w_bound_client(self): from google.cloud.bigquery.table import SchemaField PATH = 'projects/%s/datasets/%s/tables/%s/data' % ( - self.PROJECT, self.DS_NAME, self.TABLE_NAME) + self.PROJECT, self.DS_ID, self.TABLE_NAME) WHEN_TS = 1437767599.006 WHEN = datetime.datetime.utcfromtimestamp(WHEN_TS).replace( tzinfo=UTC) @@ -1185,7 +1185,7 @@ def test_fetch_data_w_alternate_client(self): from google.cloud.bigquery.table import SchemaField PATH = 'projects/%s/datasets/%s/tables/%s/data' % ( - self.PROJECT, self.DS_NAME, self.TABLE_NAME) + self.PROJECT, self.DS_ID, self.TABLE_NAME) MAX = 10 TOKEN = 'TOKEN' DATA = { @@ -1256,7 +1256,7 @@ def test_fetch_data_w_repeated_fields(self): from google.cloud.bigquery.table import SchemaField PATH = 'projects/%s/datasets/%s/tables/%s/data' % ( - self.PROJECT, self.DS_NAME, self.TABLE_NAME) + self.PROJECT, self.DS_ID, self.TABLE_NAME) ROWS = 1234 TOKEN = 'TOKEN' DATA = { @@ -1309,7 +1309,7 @@ def test_fetch_data_w_record_schema(self): from google.cloud.bigquery.table import SchemaField PATH = 'projects/%s/datasets/%s/tables/%s/data' % ( - self.PROJECT, self.DS_NAME, self.TABLE_NAME) + self.PROJECT, self.DS_ID, self.TABLE_NAME) ROWS = 1234 TOKEN = 'TOKEN' DATA = { @@ -1451,7 +1451,7 @@ def test_insert_data_w_bound_client(self): WHEN = datetime.datetime.utcfromtimestamp(WHEN_TS).replace( tzinfo=UTC) PATH = 'projects/%s/datasets/%s/tables/%s/insertAll' % ( - self.PROJECT, self.DS_NAME, self.TABLE_NAME) + self.PROJECT, self.DS_ID, self.TABLE_NAME) conn = _Connection({}) client = _Client(project=self.PROJECT, connection=conn) dataset = _Dataset(client) @@ -1492,7 +1492,7 @@ def test_insert_data_w_alternate_client(self): from google.cloud.bigquery.table import SchemaField PATH = 'projects/%s/datasets/%s/tables/%s/insertAll' % ( - self.PROJECT, self.DS_NAME, self.TABLE_NAME) + self.PROJECT, self.DS_ID, self.TABLE_NAME) RESPONSE = { 'insertErrors': [ {'index': 1, @@ -1561,7 +1561,7 @@ def test_insert_data_w_repeated_fields(self): from google.cloud.bigquery.table import SchemaField PATH = 'projects/%s/datasets/%s/tables/%s/insertAll' % ( - self.PROJECT, self.DS_NAME, self.TABLE_NAME) + self.PROJECT, self.DS_ID, self.TABLE_NAME) conn = _Connection({}) client = _Client(project=self.PROJECT, connection=conn) dataset = _Dataset(client) @@ -1597,7 +1597,7 @@ def test_insert_data_w_record_schema(self): from google.cloud.bigquery.table import SchemaField PATH = 'projects/%s/datasets/%s/tables/%s/insertAll' % ( - self.PROJECT, self.DS_NAME, self.TABLE_NAME) + self.PROJECT, self.DS_ID, self.TABLE_NAME) conn = _Connection({}) client = _Client(project=self.PROJECT, connection=conn) dataset = _Dataset(client) @@ -1898,7 +1898,7 @@ def test_upload_file_resumable_metadata(self): 'sourceFormat': config_args['source_format'], 'destinationTable': { 'projectId': table._dataset._client.project, - 'datasetId': table.dataset_name, + 'datasetId': table.dataset_id, 'tableId': table.name, }, 'allowJaggedRows': config_args['allow_jagged_rows'], @@ -2230,8 +2230,9 @@ def _call_fut(source_format, schema, dataset, name): def test_empty_schema(self): source_format = 'AVRO' - dataset = mock.Mock(project='prediction', spec=['name', 'project']) - dataset.name = 'market' # mock.Mock() treats `name` specially. + dataset = mock.Mock(project='prediction', + spec=['dataset_id', 'project']) + dataset.dataset_id = 'market' # mock.Mock() treats `name` specially. table_name = 'chairs' metadata = self._call_fut(source_format, [], dataset, table_name) @@ -2241,7 +2242,7 @@ def test_empty_schema(self): 'sourceFormat': source_format, 'destinationTable': { 'projectId': dataset.project, - 'datasetId': dataset.name, + 'datasetId': dataset.dataset_id, 'tableId': table_name, }, }, @@ -2254,8 +2255,8 @@ def test_with_schema(self): source_format = 'CSV' full_name = SchemaField('full_name', 'STRING', mode='REQUIRED') - dataset = mock.Mock(project='blind', spec=['name', 'project']) - dataset.name = 'movie' # mock.Mock() treats `name` specially. + dataset = mock.Mock(project='blind', spec=['dataset_id', 'project']) + dataset.dataset_id = 'movie' # mock.Mock() treats `name` specially. table_name = 'teebull-neem' metadata = self._call_fut( source_format, [full_name], dataset, table_name) @@ -2266,7 +2267,7 @@ def test_with_schema(self): 'sourceFormat': source_format, 'destinationTable': { 'projectId': dataset.project, - 'datasetId': dataset.name, + 'datasetId': dataset.dataset_id, 'tableId': table_name, }, 'schema': { @@ -2309,14 +2310,14 @@ def run(self): class _Dataset(object): - def __init__(self, client, name=TestTable.DS_NAME): + def __init__(self, client, dataset_id=TestTable.DS_ID): self._client = client - self.name = name + self.dataset_id = dataset_id @property def path(self): return '/projects/%s/datasets/%s' % ( - self._client.project, self.name) + self._client.project, self.dataset_id) @property def project(self): From 5c6652eb724385898bb049c21fc26883b21c00e9 Mon Sep 17 00:00:00 2001 From: Jonathan Amsterdam Date: Thu, 14 Sep 2017 16:34:02 -0400 Subject: [PATCH 0190/2016] bigquery: rename name field of Table to table_id (#3959) * bigquery: rename name field of Table to table_id Also rename table_id to full_table_id. * fix lint errors * fix doc --- .../google/cloud/bigquery/job.py | 10 ++--- .../google/cloud/bigquery/table.py | 45 +++++++++++-------- .../tests/unit/test_dataset.py | 12 ++--- .../tests/unit/test_job.py | 40 ++++++++--------- .../tests/unit/test_table.py | 24 +++++----- 5 files changed, 70 insertions(+), 61 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/job.py b/packages/google-cloud-bigquery/google/cloud/bigquery/job.py index f513a98d23cd..da6962daec1b 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/job.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/job.py @@ -774,7 +774,7 @@ def _build_resource(self): 'destinationTable': { 'projectId': self.destination.project, 'datasetId': self.destination.dataset_id, - 'tableId': self.destination.name, + 'tableId': self.destination.table_id, }, }, }, @@ -901,7 +901,7 @@ def _build_resource(self): source_refs = [{ 'projectId': table.project, 'datasetId': table.dataset_id, - 'tableId': table.name, + 'tableId': table.table_id, } for table in self.sources] resource = { @@ -915,7 +915,7 @@ def _build_resource(self): 'destinationTable': { 'projectId': self.destination.project, 'datasetId': self.destination.dataset_id, - 'tableId': self.destination.name, + 'tableId': self.destination.table_id, }, }, }, @@ -1059,7 +1059,7 @@ def _build_resource(self): source_ref = { 'projectId': self.source.project, 'datasetId': self.source.dataset_id, - 'tableId': self.source.name, + 'tableId': self.source.table_id, } resource = { @@ -1248,7 +1248,7 @@ def _destination_table_resource(self): return { 'projectId': self.destination.project, 'datasetId': self.destination.dataset_id, - 'tableId': self.destination.name, + 'tableId': self.destination.table_id, } def _populate_config_resource_booleans(self, configuration): diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py index e06e79271d0a..f9c07b1e8ee6 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py @@ -90,8 +90,8 @@ class Table(object): See https://cloud.google.com/bigquery/docs/reference/rest/v2/tables - :type name: str - :param name: the name of the table + :type table_id: str + :param table_id: the ID of the table :type dataset: :class:`google.cloud.bigquery.dataset.Dataset` :param dataset: The dataset which contains the table. @@ -102,8 +102,8 @@ class Table(object): _schema = None - def __init__(self, name, dataset, schema=()): - self.name = name + def __init__(self, table_id, dataset, schema=()): + self._table_id = table_id self._dataset = dataset self._properties = {} # Let the @property do validation. @@ -127,14 +127,23 @@ def dataset_id(self): """ return self._dataset.dataset_id + @property + def table_id(self): + """ID of the table. + + :rtype: str + :returns: the table ID. + """ + return self._table_id + @property def path(self): """URL path for the table's APIs. :rtype: str - :returns: the path based on project and dataste name. + :returns: the path based on project, dataset and table IDs. """ - return '%s/tables/%s' % (self._dataset.path, self.name) + return '%s/tables/%s' % (self._dataset.path, self.table_id) @property def schema(self): @@ -224,11 +233,11 @@ def self_link(self): return self._properties.get('selfLink') @property - def table_id(self): - """ID for the table resource. + def full_table_id(self): + """ID for the table, in the form ``project_id:dataset_id:table_id``. :rtype: str, or ``NoneType`` - :returns: the ID (None until set from the server). + :returns: the full ID (None until set from the server). """ return self._properties.get('id') @@ -463,7 +472,7 @@ def list_partitions(self, client=None): """ query = self._require_client(client).run_sync_query( 'SELECT partition_id from [%s.%s$__PARTITIONS_SUMMARY__]' % - (self.dataset_id, self.name)) + (self.dataset_id, self.table_id)) query.run() return [row[0] for row in query.rows] @@ -484,8 +493,8 @@ def from_api_repr(cls, resource, dataset): 'tableId' not in resource['tableReference']): raise KeyError('Resource lacks required identity information:' '["tableReference"]["tableId"]') - table_name = resource['tableReference']['tableId'] - table = cls(table_name, dataset=dataset) + table_id = resource['tableReference']['tableId'] + table = cls(table_id, dataset=dataset) table._set_properties(resource) return table @@ -528,7 +537,7 @@ def _build_resource(self): 'tableReference': { 'projectId': self._dataset.project, 'datasetId': self._dataset.dataset_id, - 'tableId': self.name}, + 'tableId': self.table_id}, } if self.description is not None: resource['description'] = self.description @@ -1181,7 +1190,7 @@ def upload_from_file(self, _maybe_rewind(file_obj, rewind=rewind) _check_mode(file_obj) metadata = _get_upload_metadata( - source_format, self._schema, self._dataset, self.name) + source_format, self._schema, self._dataset, self.table_id) _configure_job_metadata(metadata, allow_jagged_rows, allow_quoted_newlines, create_disposition, encoding, field_delimiter, @@ -1346,7 +1355,7 @@ def _get_upload_headers(user_agent): } -def _get_upload_metadata(source_format, schema, dataset, name): +def _get_upload_metadata(source_format, schema, dataset, table_id): """Get base metadata for creating a table. :type source_format: str @@ -1359,8 +1368,8 @@ def _get_upload_metadata(source_format, schema, dataset, name): :type dataset: :class:`~google.cloud.bigquery.dataset.Dataset` :param dataset: A dataset which contains a table. - :type name: str - :param name: The name of the table. + :type table_id: str + :param table_id: The table_id of the table. :rtype: dict :returns: The metadata dictionary. @@ -1370,7 +1379,7 @@ def _get_upload_metadata(source_format, schema, dataset, name): 'destinationTable': { 'projectId': dataset.project, 'datasetId': dataset.dataset_id, - 'tableId': name, + 'tableId': table_id, }, } if schema: diff --git a/packages/google-cloud-bigquery/tests/unit/test_dataset.py b/packages/google-cloud-bigquery/tests/unit/test_dataset.py index e1db93a973e4..0689e93b0f20 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_dataset.py +++ b/packages/google-cloud-bigquery/tests/unit/test_dataset.py @@ -763,7 +763,7 @@ def test_list_tables_defaults(self): self.assertEqual(len(tables), len(DATA['tables'])) for found, expected in zip(tables, DATA['tables']): self.assertIsInstance(found, Table) - self.assertEqual(found.table_id, expected['id']) + self.assertEqual(found.full_table_id, expected['id']) self.assertEqual(found.table_type, expected['type']) self.assertEqual(token, TOKEN) @@ -810,7 +810,7 @@ def test_list_tables_explicit(self): self.assertEqual(len(tables), len(DATA['tables'])) for found, expected in zip(tables, DATA['tables']): self.assertIsInstance(found, Table) - self.assertEqual(found.table_id, expected['id']) + self.assertEqual(found.full_table_id, expected['id']) self.assertEqual(found.table_type, expected['type']) self.assertIsNone(token) @@ -827,9 +827,9 @@ def test_table_wo_schema(self): conn = _Connection({}) client = _Client(project=self.PROJECT, connection=conn) dataset = self._make_one(self.DS_ID, client=client) - table = dataset.table('table_name') + table = dataset.table('table_id') self.assertIsInstance(table, Table) - self.assertEqual(table.name, 'table_name') + self.assertEqual(table.table_id, 'table_id') self.assertIs(table._dataset, dataset) self.assertEqual(table.schema, []) @@ -842,9 +842,9 @@ def test_table_w_schema(self): dataset = self._make_one(self.DS_ID, client=client) full_name = SchemaField('full_name', 'STRING', mode='REQUIRED') age = SchemaField('age', 'INTEGER', mode='REQUIRED') - table = dataset.table('table_name', schema=[full_name, age]) + table = dataset.table('table_id', schema=[full_name, age]) self.assertIsInstance(table, Table) - self.assertEqual(table.name, 'table_name') + self.assertEqual(table.table_id, 'table_id') self.assertIs(table._dataset, dataset) self.assertEqual(table.schema, [full_name, age]) diff --git a/packages/google-cloud-bigquery/tests/unit/test_job.py b/packages/google-cloud-bigquery/tests/unit/test_job.py index 09b57d7b7457..7c9a84f4503a 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_job.py +++ b/packages/google-cloud-bigquery/tests/unit/test_job.py @@ -83,7 +83,7 @@ class _Base(object): PROJECT = 'project' SOURCE1 = 'http://example.com/source1.csv' DS_ID = 'datset_id' - TABLE_NAME = 'table_name' + TABLE_ID = 'table_id' JOB_NAME = 'job_name' def _make_one(self, *args, **kw): @@ -207,7 +207,7 @@ def _makeResource(self, started=False, ended=False): config['destinationTable'] = { 'projectId': self.PROJECT, 'datasetId': self.DS_ID, - 'tableId': self.TABLE_NAME, + 'tableId': self.TABLE_ID, } if ended: @@ -276,7 +276,7 @@ def _verifyResourceProperties(self, job, resource): table_ref = config['destinationTable'] self.assertEqual(job.destination.project, table_ref['projectId']) self.assertEqual(job.destination.dataset_id, table_ref['datasetId']) - self.assertEqual(job.destination.name, table_ref['tableId']) + self.assertEqual(job.destination.table_id, table_ref['tableId']) if 'fieldDelimiter' in config: self.assertEqual(job.field_delimiter, @@ -544,7 +544,7 @@ def test_from_api_repr_bare(self): 'destinationTable': { 'projectId': self.PROJECT, 'datasetId': self.DS_ID, - 'tableId': self.TABLE_NAME, + 'tableId': self.TABLE_ID, }, } }, @@ -604,7 +604,7 @@ def test_begin_w_bound_client(self): 'destinationTable': { 'projectId': self.PROJECT, 'datasetId': self.DS_ID, - 'tableId': self.TABLE_NAME, + 'tableId': self.TABLE_ID, }, }, }, @@ -639,7 +639,7 @@ def test_begin_w_autodetect(self): 'destinationTable': { 'projectId': self.PROJECT, 'datasetId': self.DS_ID, - 'tableId': self.TABLE_NAME, + 'tableId': self.TABLE_ID, }, 'autodetect': True }, @@ -663,7 +663,7 @@ def test_begin_w_alternate_client(self): 'destinationTable': { 'projectId': self.PROJECT, 'datasetId': self.DS_ID, - 'tableId': self.TABLE_NAME, + 'tableId': self.TABLE_ID, }, 'allowJaggedRows': True, 'allowQuotedNewlines': True, @@ -867,7 +867,7 @@ def _verifyResourceProperties(self, job, resource): table_ref = config['destinationTable'] self.assertEqual(job.destination.project, table_ref['projectId']) self.assertEqual(job.destination.dataset_id, table_ref['datasetId']) - self.assertEqual(job.destination.name, table_ref['tableId']) + self.assertEqual(job.destination.table_id, table_ref['tableId']) sources = config.get('sourceTables') if sources is None: @@ -876,7 +876,7 @@ def _verifyResourceProperties(self, job, resource): for table_ref, table in zip(sources, job.sources): self.assertEqual(table.project, table_ref['projectId']) self.assertEqual(table.dataset_id, table_ref['datasetId']) - self.assertEqual(table.name, table_ref['tableId']) + self.assertEqual(table.table_id, table_ref['tableId']) if 'createDisposition' in config: self.assertEqual(job.create_disposition, @@ -1219,7 +1219,7 @@ def _verifyResourceProperties(self, job, resource): table_ref = config['sourceTable'] self.assertEqual(job.source.project, table_ref['projectId']) self.assertEqual(job.source.dataset_id, table_ref['datasetId']) - self.assertEqual(job.source.name, table_ref['tableId']) + self.assertEqual(job.source.table_id, table_ref['tableId']) if 'compression' in config: self.assertEqual(job.compression, @@ -1614,7 +1614,7 @@ def _verifyResourceProperties(self, job, resource): tb_ref = { 'projectId': table.project, 'datasetId': table.dataset_id, - 'tableId': table.name + 'tableId': table.table_id } self.assertEqual(tb_ref, query_config['destinationTable']) else: @@ -1934,21 +1934,21 @@ def test_referenced_tables(self): local1, local2, remote = job.referenced_tables self.assertIsInstance(local1, Table) - self.assertEqual(local1.name, 'local1') + self.assertEqual(local1.table_id, 'local1') self.assertIsInstance(local1._dataset, Dataset) self.assertEqual(local1.dataset_id, 'dataset') self.assertEqual(local1.project, self.PROJECT) self.assertIs(local1._dataset._client, client) self.assertIsInstance(local2, Table) - self.assertEqual(local2.name, 'local2') + self.assertEqual(local2.table_id, 'local2') self.assertIsInstance(local2._dataset, Dataset) self.assertEqual(local2.dataset_id, 'dataset') self.assertEqual(local2.project, self.PROJECT) self.assertIs(local2._dataset._client, client) self.assertIsInstance(remote, Table) - self.assertEqual(remote.name, 'other-table') + self.assertEqual(remote.table_id, 'other-table') self.assertIsInstance(remote._dataset, Dataset) self.assertEqual(remote.dataset_id, 'other-dataset') self.assertEqual(remote.project, 'other-project-123') @@ -2706,14 +2706,14 @@ def _get_query_results(self, job_id): class _Table(object): - def __init__(self, name=None): - self._name = name + def __init__(self, table_id=None): + self._table_id = table_id @property - def name(self): - if self._name is not None: - return self._name - return TestLoadJob.TABLE_NAME + def table_id(self): + if self._table_id is not None: + return self._table_id + return TestLoadJob.TABLE_ID @property def project(self): diff --git a/packages/google-cloud-bigquery/tests/unit/test_table.py b/packages/google-cloud-bigquery/tests/unit/test_table.py index cb481eac1932..f076c6d39938 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_table.py +++ b/packages/google-cloud-bigquery/tests/unit/test_table.py @@ -80,7 +80,7 @@ def _setUpConstants(self): self.WHEN = datetime.datetime.utcfromtimestamp(self.WHEN_TS).replace( tzinfo=UTC) self.ETAG = 'ETAG' - self.TABLE_ID = '%s:%s:%s' % ( + self.TABLE_FULL_ID = '%s:%s:%s' % ( self.PROJECT, self.DS_ID, self.TABLE_NAME) self.RESOURCE_URL = 'http://example.com/path/to/resource' self.NUM_BYTES = 12345 @@ -98,7 +98,7 @@ def _makeResource(self): {'name': 'full_name', 'type': 'STRING', 'mode': 'REQUIRED'}, {'name': 'age', 'type': 'INTEGER', 'mode': 'REQUIRED'}]}, 'etag': 'ETAG', - 'id': self.TABLE_ID, + 'id': self.TABLE_FULL_ID, 'lastModifiedTime': self.WHEN_TS * 1000, 'location': 'US', 'selfLink': self.RESOURCE_URL, @@ -133,7 +133,7 @@ def _verifyReadonlyResourceProperties(self, table, resource): else: self.assertIsNone(table.self_link) - self.assertEqual(table.table_id, self.TABLE_ID) + self.assertEqual(table.full_table_id, self.TABLE_FULL_ID) self.assertEqual(table.table_type, 'TABLE' if 'view' not in resource else 'VIEW') @@ -168,7 +168,7 @@ def test_ctor(self): client = _Client(self.PROJECT) dataset = _Dataset(client) table = self._make_one(self.TABLE_NAME, dataset) - self.assertEqual(table.name, self.TABLE_NAME) + self.assertEqual(table.table_id, self.TABLE_NAME) self.assertIs(table._dataset, dataset) self.assertEqual(table.project, self.PROJECT) self.assertEqual(table.dataset_id, self.DS_ID) @@ -184,7 +184,7 @@ def test_ctor(self): self.assertIsNone(table.num_bytes) self.assertIsNone(table.num_rows) self.assertIsNone(table.self_link) - self.assertIsNone(table.table_id) + self.assertIsNone(table.full_table_id) self.assertIsNone(table.table_type) self.assertIsNone(table.description) @@ -284,7 +284,7 @@ def test_props_set_by_server(self): CREATED = datetime.datetime(2015, 7, 29, 12, 13, 22, tzinfo=UTC) MODIFIED = datetime.datetime(2015, 7, 29, 14, 47, 15, tzinfo=UTC) - TABLE_ID = '%s:%s:%s' % ( + TABLE_FULL_ID = '%s:%s:%s' % ( self.PROJECT, self.DS_ID, self.TABLE_NAME) URL = 'http://example.com/projects/%s/datasets/%s/tables/%s' % ( self.PROJECT, self.DS_ID, self.TABLE_NAME) @@ -297,7 +297,7 @@ def test_props_set_by_server(self): table._properties['numBytes'] = 12345 table._properties['numRows'] = 66 table._properties['selfLink'] = URL - table._properties['id'] = TABLE_ID + table._properties['id'] = TABLE_FULL_ID table._properties['type'] = 'TABLE' self.assertEqual(table.created, CREATED) @@ -306,7 +306,7 @@ def test_props_set_by_server(self): self.assertEqual(table.num_bytes, 12345) self.assertEqual(table.num_rows, 66) self.assertEqual(table.self_link, URL) - self.assertEqual(table.table_id, TABLE_ID) + self.assertEqual(table.full_table_id, TABLE_FULL_ID) self.assertEqual(table.table_type, 'TABLE') def test_description_setter_bad_value(self): @@ -431,7 +431,7 @@ def test_from_api_repr_bare(self): } klass = self._get_target_class() table = klass.from_api_repr(RESOURCE, dataset) - self.assertEqual(table.name, self.TABLE_NAME) + self.assertEqual(table.table_id, self.TABLE_NAME) self.assertIs(table._dataset, dataset) self._verifyResourceProperties(table, RESOURCE) @@ -1681,7 +1681,7 @@ def _initiate_resumable_upload_helper(self, num_retries=None): data = b'goodbye gudbi gootbee' stream = io.BytesIO(data) metadata = _get_upload_metadata( - 'CSV', table._schema, table._dataset, table.name) + 'CSV', table._schema, table._dataset, table.table_id) upload, transport = table._initiate_resumable_upload( client, stream, metadata, num_retries) @@ -1747,7 +1747,7 @@ def _do_multipart_upload_success_helper( data = b'Bzzzz-zap \x00\x01\xf4' stream = io.BytesIO(data) metadata = _get_upload_metadata( - 'CSV', table._schema, table._dataset, table.name) + 'CSV', table._schema, table._dataset, table.table_id) size = len(data) response = table._do_multipart_upload( client, stream, metadata, size, num_retries) @@ -1899,7 +1899,7 @@ def test_upload_file_resumable_metadata(self): 'destinationTable': { 'projectId': table._dataset._client.project, 'datasetId': table.dataset_id, - 'tableId': table.name, + 'tableId': table.table_id, }, 'allowJaggedRows': config_args['allow_jagged_rows'], 'allowQuotedNewlines': From 6561dea352b6c6c3cd350c373b25a6d7bf38566e Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Fri, 15 Sep 2017 11:19:37 -0700 Subject: [PATCH 0191/2016] BQ: rename XJob.name to XJob.job_id. (#3962) * BQ: rename XJob.name to XJob.job_id. * BQ: Remove references to table.name --- .../google/cloud/bigquery/job.py | 78 +++++++++---------- .../google/cloud/bigquery/query.py | 2 +- .../google-cloud-bigquery/tests/system.py | 6 +- .../tests/unit/test_client.py | 18 ++--- .../tests/unit/test_query.py | 2 +- 5 files changed, 53 insertions(+), 53 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/job.py b/packages/google-cloud-bigquery/google/cloud/bigquery/job.py index da6962daec1b..76a7d476cf6b 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/job.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/job.py @@ -164,16 +164,15 @@ class WriteDisposition(_EnumProperty): class _AsyncJob(google.api.core.future.polling.PollingFuture): """Base class for asynchronous jobs. - :type name: str - :param name: the name of the job + :type job_id: str + :param job_id: the job's ID in the project associated with the client. :type client: :class:`google.cloud.bigquery.client.Client` - :param client: A client which holds credentials and project configuration - for the dataset (which requires a project). + :param client: A client which holds credentials and project configuration. """ - def __init__(self, name, client): + def __init__(self, job_id, client): super(_AsyncJob, self).__init__() - self.name = name + self.job_id = job_id self._client = client self._properties = {} self._result_set = False @@ -217,9 +216,9 @@ def path(self): """URL path for the job's APIs. :rtype: str - :returns: the path based on project and job name. + :returns: the path based on project and job ID. """ - return '/projects/%s/jobs/%s' % (self.project, self.name) + return '/projects/%s/jobs/%s' % (self.project, self.job_id) @property def etag(self): @@ -367,7 +366,7 @@ def _get_resource_config(cls, resource): :rtype: dict :returns: tuple (string, dict), where the first element is the - job name and the second contains job-specific configuration. + job ID and the second contains job-specific configuration. :raises: :class:`KeyError` if the resource has no identifier, or is missing the appropriate configuration. """ @@ -375,13 +374,13 @@ def _get_resource_config(cls, resource): 'jobId' not in resource['jobReference']): raise KeyError('Resource lacks required identity information: ' '["jobReference"]["jobId"]') - name = resource['jobReference']['jobId'] + job_id = resource['jobReference']['jobId'] if ('configuration' not in resource or cls._JOB_TYPE not in resource['configuration']): raise KeyError('Resource lacks required configuration: ' '["configuration"]["%s"]' % cls._JOB_TYPE) config = resource['configuration'][cls._JOB_TYPE] - return name, config + return job_id, config def begin(self, client=None): """API call: begin the job via a POST request @@ -560,8 +559,9 @@ class _LoadConfiguration(object): class LoadJob(_AsyncJob): """Asynchronous job for loading data into a table from remote URI. - :type name: str - :param name: the name of the job + :type job_id: str + :param job_id: + The job's ID, belonging to the project associated with the client. :type destination: :class:`google.cloud.bigquery.table.Table` :param destination: Table into which data is to be loaded. @@ -766,7 +766,7 @@ def _build_resource(self): resource = { 'jobReference': { 'projectId': self.project, - 'jobId': self.name, + 'jobId': self.job_id, }, 'configuration': { self._JOB_TYPE: { @@ -834,12 +834,12 @@ def from_api_repr(cls, resource, client): :rtype: :class:`google.cloud.bigquery.job.LoadJob` :returns: Job parsed from ``resource``. """ - name, config = cls._get_resource_config(resource) + job_id, config = cls._get_resource_config(resource) dest_config = config['destinationTable'] dataset = Dataset(dest_config['datasetId'], client) destination = Table(dest_config['tableId'], dataset) source_urls = config.get('sourceUris', ()) - job = cls(name, destination, source_urls, client=client) + job = cls(job_id, destination, source_urls, client=client) job._set_properties(resource) return job @@ -856,8 +856,8 @@ class _CopyConfiguration(object): class CopyJob(_AsyncJob): """Asynchronous job: copy data into a table from other tables. - :type name: str - :param name: the name of the job + :type job_id: str + :param job_id: the job's ID, within the project belonging to ``client``. :type destination: :class:`google.cloud.bigquery.table.Table` :param destination: Table into which data is to be loaded. @@ -872,8 +872,8 @@ class CopyJob(_AsyncJob): _JOB_TYPE = 'copy' - def __init__(self, name, destination, sources, client): - super(CopyJob, self).__init__(name, client) + def __init__(self, job_id, destination, sources, client): + super(CopyJob, self).__init__(job_id, client) self.destination = destination self.sources = sources self._configuration = _CopyConfiguration() @@ -907,7 +907,7 @@ def _build_resource(self): resource = { 'jobReference': { 'projectId': self.project, - 'jobId': self.name, + 'jobId': self.job_id, }, 'configuration': { self._JOB_TYPE: { @@ -949,7 +949,7 @@ def from_api_repr(cls, resource, client): :rtype: :class:`google.cloud.bigquery.job.CopyJob` :returns: Job parsed from ``resource``. """ - name, config = cls._get_resource_config(resource) + job_id, config = cls._get_resource_config(resource) dest_config = config['destinationTable'] dataset = Dataset(dest_config['datasetId'], client) destination = Table(dest_config['tableId'], dataset) @@ -964,7 +964,7 @@ def from_api_repr(cls, resource, client): for source_config in source_configs: dataset = Dataset(source_config['datasetId'], client) sources.append(Table(source_config['tableId'], dataset)) - job = cls(name, destination, sources, client=client) + job = cls(job_id, destination, sources, client=client) job._set_properties(resource) return job @@ -983,8 +983,8 @@ class _ExtractConfiguration(object): class ExtractJob(_AsyncJob): """Asynchronous job: extract data from a table into Cloud Storage. - :type name: str - :param name: the name of the job + :type job_id: str + :param job_id: the job's ID, within the project belonging to ``client``. :type source: :class:`google.cloud.bigquery.table.Table` :param source: Table into which data is to be loaded. @@ -1000,8 +1000,8 @@ class ExtractJob(_AsyncJob): """ _JOB_TYPE = 'extract' - def __init__(self, name, source, destination_uris, client): - super(ExtractJob, self).__init__(name, client) + def __init__(self, job_id, source, destination_uris, client): + super(ExtractJob, self).__init__(job_id, client) self.source = source self.destination_uris = destination_uris self._configuration = _ExtractConfiguration() @@ -1065,7 +1065,7 @@ def _build_resource(self): resource = { 'jobReference': { 'projectId': self.project, - 'jobId': self.name, + 'jobId': self.job_id, }, 'configuration': { self._JOB_TYPE: { @@ -1106,12 +1106,12 @@ def from_api_repr(cls, resource, client): :rtype: :class:`google.cloud.bigquery.job.ExtractJob` :returns: Job parsed from ``resource``. """ - name, config = cls._get_resource_config(resource) + job_id, config = cls._get_resource_config(resource) source_config = config['sourceTable'] dataset = Dataset(source_config['datasetId'], client) source = Table(source_config['tableId'], dataset) destination_uris = config['destinationUris'] - job = cls(name, source, destination_uris, client=client) + job = cls(job_id, source, destination_uris, client=client) job._set_properties(resource) return job @@ -1138,8 +1138,8 @@ class _AsyncQueryConfiguration(object): class QueryJob(_AsyncJob): """Asynchronous job: query tables. - :type name: str - :param name: the name of the job + :type job_id: str + :param job_id: the job's ID, within the project belonging to ``client``. :type query: str :param query: SQL query string @@ -1163,9 +1163,9 @@ class QueryJob(_AsyncJob): _UDF_KEY = 'userDefinedFunctionResources' _QUERY_PARAMETERS_KEY = 'queryParameters' - def __init__(self, name, query, client, + def __init__(self, job_id, query, client, udf_resources=(), query_parameters=()): - super(QueryJob, self).__init__(name, client) + super(QueryJob, self).__init__(job_id, client) self.query = query self.udf_resources = udf_resources self.query_parameters = query_parameters @@ -1306,7 +1306,7 @@ def _build_resource(self): resource = { 'jobReference': { 'projectId': self.project, - 'jobId': self.name, + 'jobId': self.job_id, }, 'configuration': { self._JOB_TYPE: { @@ -1399,9 +1399,9 @@ def from_api_repr(cls, resource, client): :rtype: :class:`google.cloud.bigquery.job.RunAsyncQueryJob` :returns: Job parsed from ``resource``. """ - name, config = cls._get_resource_config(resource) + job_id, config = cls._get_resource_config(resource) query = config['query'] - job = cls(name, query, client=client) + job = cls(job_id, query, client=client) job._set_properties(resource) return job @@ -1573,7 +1573,7 @@ def query_results(self): :returns: results instance """ if not self._query_results: - self._query_results = self._client._get_query_results(self.name) + self._query_results = self._client._get_query_results(self.job_id) return self._query_results def done(self): @@ -1585,7 +1585,7 @@ def done(self): # Do not refresh is the state is already done, as the job will not # change once complete. if self.state != _DONE_STATE: - self._query_results = self._client._get_query_results(self.name) + self._query_results = self._client._get_query_results(self.job_id) # Only reload the job once we know the query is complete. # This will ensure that fields such as the destination table are diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/query.py b/packages/google-cloud-bigquery/google/cloud/bigquery/query.py index fa03d373674d..7abbbec76b9b 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/query.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/query.py @@ -95,7 +95,7 @@ def from_query_job(cls, job): instance = cls(job.query, job._client, job.udf_resources) instance._job = job job_ref = instance._properties.setdefault('jobReference', {}) - job_ref['jobId'] = job.name + job_ref['jobId'] = job.job_id if job.default_dataset is not None: instance.default_dataset = job.default_dataset if job.use_query_cache is not None: diff --git a/packages/google-cloud-bigquery/tests/system.py b/packages/google-cloud-bigquery/tests/system.py index ad93ac2c954e..ab955b64eacf 100644 --- a/packages/google-cloud-bigquery/tests/system.py +++ b/packages/google-cloud-bigquery/tests/system.py @@ -205,7 +205,7 @@ def test_create_table(self): table.create() self.to_delete.insert(0, table) self.assertTrue(table.exists()) - self.assertEqual(table.name, TABLE_NAME) + self.assertEqual(table.table_id, TABLE_NAME) def test_list_tables(self): DATASET_ID = _make_dataset_id('list_tables') @@ -240,7 +240,7 @@ def test_list_tables(self): all_tables = list(iterator) self.assertIsNone(iterator.next_page_token) created = [table for table in all_tables - if (table.name in tables_to_create and + if (table.table_id in tables_to_create and table.dataset_id == DATASET_ID)] self.assertEqual(len(created), len(tables_to_create)) @@ -1185,7 +1185,7 @@ def test_create_table_insert_fetch_nested_schema(self): table.create() self.to_delete.insert(0, table) self.assertTrue(table.exists()) - self.assertEqual(table.name, table_name) + self.assertEqual(table.table_id, table_name) to_insert = [] # Data is in "JSON Lines" format, see http://jsonlines.org/ diff --git a/packages/google-cloud-bigquery/tests/unit/test_client.py b/packages/google-cloud-bigquery/tests/unit/test_client.py index fffffb9b2b25..cb2e476e3f99 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_client.py +++ b/packages/google-cloud-bigquery/tests/unit/test_client.py @@ -333,7 +333,7 @@ def test_get_job_hit(self): job = client.get_job(JOB_ID) self.assertIsInstance(job, QueryJob) - self.assertEqual(job.name, JOB_ID) + self.assertEqual(job.job_id, JOB_ID) self.assertEqual(job.create_disposition, 'CREATE_IF_NEEDED') self.assertEqual(job.write_disposition, 'WRITE_TRUNCATE') @@ -466,7 +466,7 @@ def test_list_jobs_defaults(self): for found, expected in zip(jobs, DATA['jobs']): name = expected['jobReference']['jobId'] self.assertIsInstance(found, JOB_TYPES[name]) - self.assertEqual(found.name, name) + self.assertEqual(found.job_id, name) self.assertEqual(token, TOKEN) self.assertEqual(len(conn._requested), 1) @@ -523,7 +523,7 @@ def test_list_jobs_load_job_wo_sourceUris(self): for found, expected in zip(jobs, DATA['jobs']): name = expected['jobReference']['jobId'] self.assertIsInstance(found, JOB_TYPES[name]) - self.assertEqual(found.name, name) + self.assertEqual(found.job_id, name) self.assertEqual(token, TOKEN) self.assertEqual(len(conn._requested), 1) @@ -579,7 +579,7 @@ def test_load_table_from_storage(self): job = client.load_table_from_storage(JOB, destination, SOURCE_URI) self.assertIsInstance(job, LoadJob) self.assertIs(job._client, client) - self.assertEqual(job.name, JOB) + self.assertEqual(job.job_id, JOB) self.assertEqual(list(job.source_uris), [SOURCE_URI]) self.assertIs(job.destination, destination) @@ -600,7 +600,7 @@ def test_copy_table(self): job = client.copy_table(JOB, destination, source) self.assertIsInstance(job, CopyJob) self.assertIs(job._client, client) - self.assertEqual(job.name, JOB) + self.assertEqual(job.job_id, JOB) self.assertEqual(list(job.sources), [source]) self.assertIs(job.destination, destination) @@ -620,7 +620,7 @@ def test_extract_table_to_storage(self): job = client.extract_table_to_storage(JOB, source, DESTINATION) self.assertIsInstance(job, ExtractJob) self.assertIs(job._client, client) - self.assertEqual(job.name, JOB) + self.assertEqual(job.job_id, JOB) self.assertEqual(job.source, source) self.assertEqual(list(job.destination_uris), [DESTINATION]) @@ -636,7 +636,7 @@ def test_run_async_query_defaults(self): job = client.run_async_query(JOB, QUERY) self.assertIsInstance(job, QueryJob) self.assertIs(job._client, client) - self.assertEqual(job.name, JOB) + self.assertEqual(job.job_id, JOB) self.assertEqual(job.query, QUERY) self.assertEqual(job.udf_resources, []) self.assertEqual(job.query_parameters, []) @@ -656,7 +656,7 @@ def test_run_async_w_udf_resources(self): job = client.run_async_query(JOB, QUERY, udf_resources=udf_resources) self.assertIsInstance(job, QueryJob) self.assertIs(job._client, client) - self.assertEqual(job.name, JOB) + self.assertEqual(job.job_id, JOB) self.assertEqual(job.query, QUERY) self.assertEqual(job.udf_resources, udf_resources) self.assertEqual(job.query_parameters, []) @@ -676,7 +676,7 @@ def test_run_async_w_query_parameters(self): query_parameters=query_parameters) self.assertIsInstance(job, QueryJob) self.assertIs(job._client, client) - self.assertEqual(job.name, JOB) + self.assertEqual(job.job_id, JOB) self.assertEqual(job.query, QUERY) self.assertEqual(job.udf_resources, []) self.assertEqual(job.query_parameters, query_parameters) diff --git a/packages/google-cloud-bigquery/tests/unit/test_query.py b/packages/google-cloud-bigquery/tests/unit/test_query.py index 76d5057f6450..0bf0c17c3102 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_query.py +++ b/packages/google-cloud-bigquery/tests/unit/test_query.py @@ -262,7 +262,7 @@ def test_job_w_jobid(self): self.assertIsInstance(job, QueryJob) self.assertEqual(job.query, self.QUERY) self.assertIs(job._client, client) - self.assertEqual(job.name, SERVER_GENERATED) + self.assertEqual(job.job_id, SERVER_GENERATED) fetched_later = query.job self.assertIs(fetched_later, job) From dd799331d88f6660f4f7a9023c53e71b1af30faa Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Fri, 15 Sep 2017 12:37:30 -0700 Subject: [PATCH 0192/2016] Parse timestamps in query parameters using canonical format (#3945) * Parse timestamps in query parameters according to BigQuery canonical timestamp format. The timestamp format in query parameters follows the canonical format specified at https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#timestamp-type This fixes a system test error which was happening in the bigquery-b2 branch. * Support more possible timestamp formats. Any of these formats may be returned from the BigQuery API. * Chop and string-replace timestamps into a canonical format. * BQ: fix lint errors. Remove references to table.name --- .../google/cloud/bigquery/_helpers.py | 44 ++++++++- .../google-cloud-bigquery/tests/system.py | 8 ++ .../tests/unit/test__helpers.py | 95 +++++++++++++++++++ 3 files changed, 144 insertions(+), 3 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py b/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py index 5f6edf67dca5..e3d5c0f2f871 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py @@ -71,6 +71,39 @@ def _timestamp_from_json(value, field): return _datetime_from_microseconds(1e6 * float(value)) +def _timestamp_query_param_from_json(value, field): + """Coerce 'value' to a datetime, if set or not nullable. + + Args: + value (str): The timestamp. + field (.SchemaField): The field corresponding to the value. + + Returns: + Optional[datetime.datetime]: The parsed datetime object from + ``value`` if the ``field`` is not null (otherwise it is + :data:`None`). + """ + if _not_null(value, field): + # Canonical formats for timestamps in BigQuery are flexible. See: + # g.co/cloud/bigquery/docs/reference/standard-sql/data-types#timestamp-type + # The separator between the date and time can be 'T' or ' '. + value = value.replace(' ', 'T', 1) + # The UTC timezone may be formatted as Z or +00:00. + value = value.replace('Z', '') + value = value.replace('+00:00', '') + + if '.' in value: + # YYYY-MM-DDTHH:MM:SS.ffffff + return datetime.datetime.strptime( + value, _RFC3339_MICROS_NO_ZULU).replace(tzinfo=UTC) + else: + # YYYY-MM-DDTHH:MM:SS + return datetime.datetime.strptime( + value, _RFC3339_NO_FRACTION).replace(tzinfo=UTC) + else: + return None + + def _datetime_from_json(value, field): """Coerce 'value' to a datetime, if set or not nullable. @@ -139,6 +172,9 @@ def _record_from_json(value, field): 'RECORD': _record_from_json, } +_QUERY_PARAMS_FROM_JSON = dict(_CELLDATA_FROM_JSON) +_QUERY_PARAMS_FROM_JSON['TIMESTAMP'] = _timestamp_query_param_from_json + def _row_from_json(row, schema): """Convert JSON row data to row with appropriate types. @@ -454,7 +490,7 @@ def from_api_repr(cls, resource): name = resource.get('name') type_ = resource['parameterType']['type'] value = resource['parameterValue']['value'] - converted = _CELLDATA_FROM_JSON[type_](value, None) + converted = _QUERY_PARAMS_FROM_JSON[type_](value, None) return cls(name, type_, converted) def to_api_repr(self): @@ -576,7 +612,9 @@ def _from_api_repr_scalar(cls, resource): for value in resource['parameterValue']['arrayValues']] converted = [ - _CELLDATA_FROM_JSON[array_type](value, None) for value in values] + _QUERY_PARAMS_FROM_JSON[array_type](value, None) + for value in values + ] return cls(name, array_type, converted) @classmethod @@ -732,7 +770,7 @@ def from_api_repr(cls, resource): converted = ArrayQueryParameter.from_api_repr(struct_resource) else: value = value['value'] - converted = _CELLDATA_FROM_JSON[type_](value, None) + converted = _QUERY_PARAMS_FROM_JSON[type_](value, None) instance.struct_values[key] = converted return instance diff --git a/packages/google-cloud-bigquery/tests/system.py b/packages/google-cloud-bigquery/tests/system.py index ab955b64eacf..48d66c7be2e0 100644 --- a/packages/google-cloud-bigquery/tests/system.py +++ b/packages/google-cloud-bigquery/tests/system.py @@ -1081,6 +1081,14 @@ def test_dbapi_w_query_parameters(self): }, 'expected': datetime.datetime(2012, 3, 4, 5, 6, 0, tzinfo=UTC), }, + { + 'sql': 'SELECT TIMESTAMP_TRUNC(%(zoned)s, MINUTE)', + 'query_parameters': { + 'zoned': datetime.datetime( + 2012, 3, 4, 5, 6, 7, 250000, tzinfo=UTC), + }, + 'expected': datetime.datetime(2012, 3, 4, 5, 6, 0, tzinfo=UTC), + }, ] for example in examples: msg = 'sql: {} query_parameters: {}'.format( diff --git a/packages/google-cloud-bigquery/tests/unit/test__helpers.py b/packages/google-cloud-bigquery/tests/unit/test__helpers.py index c43f7b6d1ae3..091df8f7355a 100644 --- a/packages/google-cloud-bigquery/tests/unit/test__helpers.py +++ b/packages/google-cloud-bigquery/tests/unit/test__helpers.py @@ -158,6 +158,63 @@ def test_w_base64_encoded_text(self): self.assertEqual(coerced, expected) +class Test_timestamp_query_param_from_json(unittest.TestCase): + + def _call_fut(self, value, field): + from google.cloud.bigquery import _helpers + + return _helpers._timestamp_query_param_from_json(value, field) + + def test_w_none_nullable(self): + self.assertIsNone(self._call_fut(None, _Field('NULLABLE'))) + + def test_w_timestamp_valid(self): + from google.cloud._helpers import UTC + + samples = [ + ( + '2016-12-20 15:58:27.339328+00:00', + datetime.datetime(2016, 12, 20, 15, 58, 27, 339328, tzinfo=UTC) + ), + ( + '2016-12-20 15:58:27+00:00', + datetime.datetime(2016, 12, 20, 15, 58, 27, tzinfo=UTC) + ), + ( + '2016-12-20T15:58:27.339328+00:00', + datetime.datetime(2016, 12, 20, 15, 58, 27, 339328, tzinfo=UTC) + ), + ( + '2016-12-20T15:58:27+00:00', + datetime.datetime(2016, 12, 20, 15, 58, 27, tzinfo=UTC) + ), + ( + '2016-12-20 15:58:27.339328Z', + datetime.datetime(2016, 12, 20, 15, 58, 27, 339328, tzinfo=UTC) + ), + ( + '2016-12-20 15:58:27Z', + datetime.datetime(2016, 12, 20, 15, 58, 27, tzinfo=UTC) + ), + ( + '2016-12-20T15:58:27.339328Z', + datetime.datetime(2016, 12, 20, 15, 58, 27, 339328, tzinfo=UTC) + ), + ( + '2016-12-20T15:58:27Z', + datetime.datetime(2016, 12, 20, 15, 58, 27, tzinfo=UTC) + ), + ] + for timestamp_str, expected_result in samples: + self.assertEqual( + self._call_fut(timestamp_str, _Field('NULLABLE')), + expected_result) + + def test_w_timestamp_invalid(self): + with self.assertRaises(ValueError): + self._call_fut('definitely-not-a-timestamp', _Field('NULLABLE')) + + class Test_timestamp_from_json(unittest.TestCase): def _call_fut(self, value, field): @@ -1820,6 +1877,44 @@ def test_w_scalar(self): self.assertEqual(parameter.type_, 'INT64') self.assertEqual(parameter.value, 123) + def test_w_scalar_timestamp(self): + from google.cloud.bigquery._helpers import ScalarQueryParameter + from google.cloud._helpers import UTC + + RESOURCE = { + 'name': 'zoned', + 'parameterType': {'type': 'TIMESTAMP'}, + 'parameterValue': {'value': '2012-03-04 05:06:07+00:00'}, + } + + parameter = self._call_fut(RESOURCE) + + self.assertIsInstance(parameter, ScalarQueryParameter) + self.assertEqual(parameter.name, 'zoned') + self.assertEqual(parameter.type_, 'TIMESTAMP') + self.assertEqual( + parameter.value, + datetime.datetime(2012, 3, 4, 5, 6, 7, tzinfo=UTC)) + + def test_w_scalar_timestamp_micros(self): + from google.cloud.bigquery._helpers import ScalarQueryParameter + from google.cloud._helpers import UTC + + RESOURCE = { + 'name': 'zoned', + 'parameterType': {'type': 'TIMESTAMP'}, + 'parameterValue': {'value': '2012-03-04 05:06:07.250000+00:00'}, + } + + parameter = self._call_fut(RESOURCE) + + self.assertIsInstance(parameter, ScalarQueryParameter) + self.assertEqual(parameter.name, 'zoned') + self.assertEqual(parameter.type_, 'TIMESTAMP') + self.assertEqual( + parameter.value, + datetime.datetime(2012, 3, 4, 5, 6, 7, 250000, tzinfo=UTC)) + def test_w_array(self): from google.cloud.bigquery._helpers import ArrayQueryParameter From 9bebddb77d6ae6655c140092eb710a688c1d0c68 Mon Sep 17 00:00:00 2001 From: Alix Hamilton Date: Fri, 15 Sep 2017 16:07:48 -0700 Subject: [PATCH 0193/2016] BigQuery: Adds client.get_dataset() and removes dataset.reload() (#3973) * BigQuery: Adds client.get_dataset() and removes dataset.reload() * BigQuery: changes dataset.name to dataset.dataset_id in test * fixes client.get_dataset() docstring and removes unnecessary test variable --- .../google/cloud/bigquery/client.py | 14 ++++++++ .../google/cloud/bigquery/dataset.py | 26 +++++---------- .../google-cloud-bigquery/tests/system.py | 17 +++++----- .../tests/unit/test_client.py | 25 ++++++++++++++ .../tests/unit/test_dataset.py | 33 ------------------- 5 files changed, 57 insertions(+), 58 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py index 5ab8ff820764..f9a393c0bbb6 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py @@ -166,6 +166,20 @@ def dataset(self, dataset_name, project=None): return DatasetReference(project, dataset_name) + def get_dataset(self, dataset_ref): + """Fetch the dataset referenced by ``dataset_ref`` + + :type dataset_ref: + :class:`google.cloud.bigquery.dataset.DatasetReference` + :param dataset_ref: the dataset to use. + + :rtype: :class:`google.cloud.bigquery.dataset.Dataset` + :returns: a ``Dataset`` instance + """ + api_response = self._connection.api_request( + method='GET', path=dataset_ref.path) + return Dataset.from_api_repr(api_response, self) + def _get_query_results(self, job_id, project=None, timeout_ms=None): """Get the query results object for a query job. diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/dataset.py b/packages/google-cloud-bigquery/google/cloud/bigquery/dataset.py index cd31f737e693..8166e4fbec5e 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/dataset.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/dataset.py @@ -127,6 +127,15 @@ def dataset_id(self): """ return self._dataset_id + @property + def path(self): + """URL path for the dataset's APIs. + + :rtype: str + :returns: the path based on project and dataset name. + """ + return '/projects/%s/datasets/%s' % (self.project_id, self.dataset_id) + def table(self, table_id): """Constructs a TableReference. @@ -505,23 +514,6 @@ def exists(self, client=None): else: return True - def reload(self, client=None): - """API call: refresh dataset properties via a GET request. - - See - https://cloud.google.com/bigquery/docs/reference/rest/v2/datasets/get - - :type client: :class:`~google.cloud.bigquery.client.Client` or - ``NoneType`` - :param client: the client to use. If not passed, falls back to the - ``client`` stored on the current dataset. - """ - client = self._require_client(client) - - api_response = client._connection.api_request( - method='GET', path=self.path) - self._set_properties(api_response) - def patch(self, client=None, **kw): """API call: update individual dataset properties via a PATCH request. diff --git a/packages/google-cloud-bigquery/tests/system.py b/packages/google-cloud-bigquery/tests/system.py index 48d66c7be2e0..432dfa18b84d 100644 --- a/packages/google-cloud-bigquery/tests/system.py +++ b/packages/google-cloud-bigquery/tests/system.py @@ -121,19 +121,20 @@ def test_create_dataset(self): self.assertTrue(dataset.exists()) self.assertEqual(dataset.dataset_id, DATASET_ID) - def test_reload_dataset(self): - DATASET_ID = _make_dataset_id('reload_dataset') - dataset = Dataset(DATASET_ID, Config.CLIENT) + def test_get_dataset(self): + DATASET_ID = _make_dataset_id('get_dataset') + client = Config.CLIENT + dataset = Dataset(DATASET_ID, client) dataset.friendly_name = 'Friendly' dataset.description = 'Description' - retry_403(dataset.create)() self.to_delete.append(dataset) + dataset_ref = client.dataset(DATASET_ID) + + got = client.get_dataset(dataset_ref) - other = Dataset(DATASET_ID, Config.CLIENT) - other.reload() - self.assertEqual(other.friendly_name, 'Friendly') - self.assertEqual(other.description, 'Description') + self.assertEqual(got.friendly_name, 'Friendly') + self.assertEqual(got.description, 'Description') def test_patch_dataset(self): dataset = Dataset(_make_dataset_id('patch_dataset'), Config.CLIENT) diff --git a/packages/google-cloud-bigquery/tests/unit/test_client.py b/packages/google-cloud-bigquery/tests/unit/test_client.py index cb2e476e3f99..a011c59fec2a 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_client.py +++ b/packages/google-cloud-bigquery/tests/unit/test_client.py @@ -272,6 +272,31 @@ def test_dataset_with_default_project(self): self.assertEqual(dataset.dataset_id, DATASET) self.assertEqual(dataset.project_id, PROJECT) + def test_get_dataset(self): + project = 'PROJECT' + dataset_id = 'dataset_id' + path = 'projects/%s/datasets/%s' % (project, dataset_id) + creds = _make_credentials() + http = object() + client = self._make_one(project=project, credentials=creds, _http=http) + resource = { + 'id': '%s:%s' % (project, dataset_id), + 'datasetReference': { + 'projectId': project, + 'datasetId': dataset_id, + }, + } + conn = client._connection = _Connection(resource) + dataset_ref = client.dataset(dataset_id) + + dataset = client.get_dataset(dataset_ref) + + self.assertEqual(len(conn._requested), 1) + req = conn._requested[0] + self.assertEqual(req['method'], 'GET') + self.assertEqual(req['path'], '/%s' % path) + self.assertEqual(dataset.dataset_id, dataset_id) + def test_job_from_resource_unknown_type(self): PROJECT = 'PROJECT' creds = _make_credentials() diff --git a/packages/google-cloud-bigquery/tests/unit/test_dataset.py b/packages/google-cloud-bigquery/tests/unit/test_dataset.py index 0689e93b0f20..673fa69731cd 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_dataset.py +++ b/packages/google-cloud-bigquery/tests/unit/test_dataset.py @@ -520,39 +520,6 @@ def test_exists_hit_w_alternate_client(self): self.assertEqual(req['path'], '/%s' % PATH) self.assertEqual(req['query_params'], {'fields': 'id'}) - def test_reload_w_bound_client(self): - PATH = 'projects/%s/datasets/%s' % (self.PROJECT, self.DS_ID) - RESOURCE = self._makeResource() - conn = _Connection(RESOURCE) - client = _Client(project=self.PROJECT, connection=conn) - dataset = self._make_one(self.DS_ID, client=client) - - dataset.reload() - - self.assertEqual(len(conn._requested), 1) - req = conn._requested[0] - self.assertEqual(req['method'], 'GET') - self.assertEqual(req['path'], '/%s' % PATH) - self._verify_resource_properties(dataset, RESOURCE) - - def test_reload_w_alternate_client(self): - PATH = 'projects/%s/datasets/%s' % (self.PROJECT, self.DS_ID) - RESOURCE = self._makeResource() - conn1 = _Connection() - CLIENT1 = _Client(project=self.PROJECT, connection=conn1) - conn2 = _Connection(RESOURCE) - CLIENT2 = _Client(project=self.PROJECT, connection=conn2) - dataset = self._make_one(self.DS_ID, client=CLIENT1) - - dataset.reload(client=CLIENT2) - - self.assertEqual(len(conn1._requested), 0) - self.assertEqual(len(conn2._requested), 1) - req = conn2._requested[0] - self.assertEqual(req['method'], 'GET') - self.assertEqual(req['path'], '/%s' % PATH) - self._verify_resource_properties(dataset, RESOURCE) - def test_patch_w_invalid_expiration(self): RESOURCE = self._makeResource() conn = _Connection(RESOURCE) From f10618e35dde9b0cc7bb20dae3ba0ad508488e31 Mon Sep 17 00:00:00 2001 From: Alix Hamilton Date: Mon, 18 Sep 2017 13:57:15 -0700 Subject: [PATCH 0194/2016] BigQuery: Changes DatasetReference project_id property to project to match Dataset (#3993) --- .../google/cloud/bigquery/dataset.py | 14 +++++++------- .../tests/unit/test_client.py | 4 ++-- .../tests/unit/test_dataset.py | 2 +- 3 files changed, 10 insertions(+), 10 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/dataset.py b/packages/google-cloud-bigquery/google/cloud/bigquery/dataset.py index 8166e4fbec5e..0aba2c0928b6 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/dataset.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/dataset.py @@ -98,25 +98,25 @@ class DatasetReference(object): See https://cloud.google.com/bigquery/docs/reference/rest/v2/datasets - :type project_id: str - :param project_id: the ID of the project + :type project: str + :param project: the ID of the project :type dataset_id: str :param dataset_id: the ID of the dataset """ - def __init__(self, project_id, dataset_id): - self._project_id = project_id + def __init__(self, project, dataset_id): + self._project = project self._dataset_id = dataset_id @property - def project_id(self): + def project(self): """Project ID of the dataset. :rtype: str :returns: the project ID. """ - return self._project_id + return self._project @property def dataset_id(self): @@ -134,7 +134,7 @@ def path(self): :rtype: str :returns: the path based on project and dataset name. """ - return '/projects/%s/datasets/%s' % (self.project_id, self.dataset_id) + return '/projects/%s/datasets/%s' % (self.project, self.dataset_id) def table(self, table_id): """Constructs a TableReference. diff --git a/packages/google-cloud-bigquery/tests/unit/test_client.py b/packages/google-cloud-bigquery/tests/unit/test_client.py index a011c59fec2a..24236a93f497 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_client.py +++ b/packages/google-cloud-bigquery/tests/unit/test_client.py @@ -257,7 +257,7 @@ def test_dataset_with_specified_project(self): dataset = client.dataset(DATASET, PROJECT) self.assertIsInstance(dataset, DatasetReference) self.assertEqual(dataset.dataset_id, DATASET) - self.assertEqual(dataset.project_id, PROJECT) + self.assertEqual(dataset.project, PROJECT) def test_dataset_with_default_project(self): from google.cloud.bigquery.dataset import DatasetReference @@ -270,7 +270,7 @@ def test_dataset_with_default_project(self): dataset = client.dataset(DATASET) self.assertIsInstance(dataset, DatasetReference) self.assertEqual(dataset.dataset_id, DATASET) - self.assertEqual(dataset.project_id, PROJECT) + self.assertEqual(dataset.project, PROJECT) def test_get_dataset(self): project = 'PROJECT' diff --git a/packages/google-cloud-bigquery/tests/unit/test_dataset.py b/packages/google-cloud-bigquery/tests/unit/test_dataset.py index 673fa69731cd..a3ee600565ba 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_dataset.py +++ b/packages/google-cloud-bigquery/tests/unit/test_dataset.py @@ -98,7 +98,7 @@ def _make_one(self, *args, **kw): def test_ctor_defaults(self): dataset_ref = self._make_one('some-project-1', 'dataset_1') - self.assertEqual(dataset_ref.project_id, 'some-project-1') + self.assertEqual(dataset_ref.project, 'some-project-1') self.assertEqual(dataset_ref.dataset_id, 'dataset_1') def test_table(self): From 08c2958813c5b75c12e7321cf25a530650908cc8 Mon Sep 17 00:00:00 2001 From: Jonathan Amsterdam Date: Mon, 18 Sep 2017 17:39:35 -0400 Subject: [PATCH 0195/2016] bigquery: add client.create_dataset; remove dataset.create (#3982) * bigquery: add client.create_dataset; remove dataset.create * fix lint * increase coverage to 100% * really fix coverage * fix lint --- .../google/cloud/bigquery/client.py | 31 ++++- .../google/cloud/bigquery/dataset.py | 40 +++---- .../google-cloud-bigquery/tests/system.py | 106 ++++++------------ .../tests/unit/test_client.py | 85 ++++++++++++++ .../tests/unit/test_dataset.py | 103 ----------------- 5 files changed, 166 insertions(+), 199 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py index f9a393c0bbb6..f17f43deaf5c 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py @@ -148,11 +148,11 @@ def list_datasets(self, include_all=False, max_results=None, max_results=max_results, extra_params=extra_params) - def dataset(self, dataset_name, project=None): + def dataset(self, dataset_id, project=None): """Construct a reference to a dataset. - :type dataset_name: str - :param dataset_name: Name of the dataset. + :type dataset_id: str + :param dataset_id: ID of the dataset. :type project: str :param project: (Optional) project ID for the dataset (defaults to @@ -164,7 +164,30 @@ def dataset(self, dataset_name, project=None): if project is None: project = self.project - return DatasetReference(project, dataset_name) + return DatasetReference(project, dataset_id) + + def create_dataset(self, dataset): + """API call: create the dataset via a PUT request. + + See + https://cloud.google.com/bigquery/docs/reference/rest/v2/tables/insert + + :type dataset: :class:`~google.cloud.bigquery.dataset.Dataset` + :param dataset: A ``Dataset`` populated with the desired initial state. + If project is missing, it defaults to the project of + the client. + + :rtype: ":class:`~google.cloud.bigquery.dataset.Dataset`" + :returns: a new ``Dataset`` returned from the service. + """ + if dataset.project is None: + dataset._project = self.project + path = '/projects/%s/datasets' % (dataset.project,) + api_response = self._connection.api_request( + method='POST', path=path, data=dataset._build_resource()) + ds = Dataset(dataset.dataset_id, project=dataset.project, client=self) + ds._set_properties(api_response) + return ds def get_dataset(self, dataset_ref): """Fetch the dataset referenced by ``dataset_ref`` diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/dataset.py b/packages/google-cloud-bigquery/google/cloud/bigquery/dataset.py index 0aba2c0928b6..5a592adc4c29 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/dataset.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/dataset.py @@ -155,8 +155,8 @@ class Dataset(object): :param dataset_id: the ID of the dataset :type client: :class:`google.cloud.bigquery.client.Client` - :param client: A client which holds credentials and project configuration - for the dataset (which requires a project). + :param client: (Optional) A client which holds credentials and project + configuration for the dataset (which requires a project). :type access_entries: list of :class:`AccessEntry` :param access_entries: roles granted to entities for this dataset @@ -168,13 +168,17 @@ class Dataset(object): _access_entries = None - def __init__(self, dataset_id, client, access_entries=(), project=None): - self.dataset_id = dataset_id + def __init__(self, + dataset_id, + client=None, + access_entries=(), + project=None): + self._dataset_id = dataset_id self._client = client self._properties = {} # Let the @property do validation. self.access_entries = access_entries - self._project = project or client.project + self._project = project or (client and client.project) @property def project(self): @@ -229,6 +233,15 @@ def created(self): # creation_time will be in milliseconds. return _datetime_from_microseconds(1000.0 * creation_time) + @property + def dataset_id(self): + """Dataset ID. + + :rtype: str + :returns: the dataset ID. + """ + return self._dataset_id + @property def full_dataset_id(self): """ID for the dataset resource, in the form "project_id:dataset_id". @@ -473,23 +486,6 @@ def _build_resource(self): return resource - def create(self, client=None): - """API call: create the dataset via a PUT request. - - See - https://cloud.google.com/bigquery/docs/reference/rest/v2/tables/insert - - :type client: :class:`~google.cloud.bigquery.client.Client` or - ``NoneType`` - :param client: the client to use. If not passed, falls back to the - ``client`` stored on the current dataset. - """ - client = self._require_client(client) - path = '/projects/%s/datasets' % (self.project,) - api_response = client._connection.api_request( - method='POST', path=path, data=self._build_resource()) - self._set_properties(api_response) - def exists(self, client=None): """API call: test for the existence of the dataset via a GET request diff --git a/packages/google-cloud-bigquery/tests/system.py b/packages/google-cloud-bigquery/tests/system.py index 432dfa18b84d..f91f53c24996 100644 --- a/packages/google-cloud-bigquery/tests/system.py +++ b/packages/google-cloud-bigquery/tests/system.py @@ -112,10 +112,7 @@ def _still_in_use(bad_request): def test_create_dataset(self): DATASET_ID = _make_dataset_id('create_dataset') - dataset = Dataset(DATASET_ID, Config.CLIENT) - self.assertFalse(dataset.exists()) - - retry_403(dataset.create)() + dataset = retry_403(Config.CLIENT.create_dataset)(Dataset(DATASET_ID)) self.to_delete.append(dataset) self.assertTrue(dataset.exists()) @@ -124,10 +121,10 @@ def test_create_dataset(self): def test_get_dataset(self): DATASET_ID = _make_dataset_id('get_dataset') client = Config.CLIENT - dataset = Dataset(DATASET_ID, client) - dataset.friendly_name = 'Friendly' - dataset.description = 'Description' - retry_403(dataset.create)() + dataset_arg = Dataset(DATASET_ID) + dataset_arg.friendly_name = 'Friendly' + dataset_arg.description = 'Description' + dataset = retry_403(client.create_dataset)(dataset_arg) self.to_delete.append(dataset) dataset_ref = client.dataset(DATASET_ID) @@ -137,10 +134,8 @@ def test_get_dataset(self): self.assertEqual(got.description, 'Description') def test_patch_dataset(self): - dataset = Dataset(_make_dataset_id('patch_dataset'), Config.CLIENT) - self.assertFalse(dataset.exists()) - - retry_403(dataset.create)() + dataset = retry_403(Config.CLIENT.create_dataset)( + Dataset(_make_dataset_id('patch_dataset'))) self.to_delete.append(dataset) self.assertTrue(dataset.exists()) @@ -151,10 +146,8 @@ def test_patch_dataset(self): self.assertEqual(dataset.description, 'Description') def test_update_dataset(self): - dataset = Dataset(_make_dataset_id('update_dataset'), Config.CLIENT) - self.assertFalse(dataset.exists()) - - retry_403(dataset.create)() + dataset = retry_403(Config.CLIENT.create_dataset)( + Dataset(_make_dataset_id('update_dataset'))) self.to_delete.append(dataset) self.assertTrue(dataset.exists()) @@ -177,8 +170,8 @@ def test_list_datasets(self): 'newest' + unique_resource_id(), ] for dataset_id in datasets_to_create: - created_dataset = Dataset(dataset_id, Config.CLIENT) - retry_403(created_dataset.create)() + created_dataset = retry_403(Config.CLIENT.create_dataset)( + Dataset(dataset_id)) self.to_delete.append(created_dataset) # Retrieve the datasets. @@ -191,10 +184,8 @@ def test_list_datasets(self): self.assertEqual(len(created), len(datasets_to_create)) def test_create_table(self): - dataset = Dataset(_make_dataset_id('create_table'), Config.CLIENT) - self.assertFalse(dataset.exists()) - - retry_403(dataset.create)() + dataset = retry_403(Config.CLIENT.create_dataset)( + Dataset(_make_dataset_id('create_table'))) self.to_delete.append(dataset) TABLE_NAME = 'test_table' @@ -210,10 +201,7 @@ def test_create_table(self): def test_list_tables(self): DATASET_ID = _make_dataset_id('list_tables') - dataset = Dataset(DATASET_ID, Config.CLIENT) - self.assertFalse(dataset.exists()) - - retry_403(dataset.create)() + dataset = retry_403(Config.CLIENT.create_dataset)(Dataset(DATASET_ID)) self.to_delete.append(dataset) # Retrieve tables before any are created for the dataset. @@ -246,10 +234,8 @@ def test_list_tables(self): self.assertEqual(len(created), len(tables_to_create)) def test_patch_table(self): - dataset = Dataset(_make_dataset_id('patch_table'), Config.CLIENT) - self.assertFalse(dataset.exists()) - - retry_403(dataset.create)() + dataset = retry_403(Config.CLIENT.create_dataset)( + Dataset(_make_dataset_id('patch_table'))) self.to_delete.append(dataset) TABLE_NAME = 'test_table' @@ -268,10 +254,8 @@ def test_patch_table(self): self.assertEqual(table.description, 'Description') def test_update_table(self): - dataset = Dataset(_make_dataset_id('update_table'), Config.CLIENT) - self.assertFalse(dataset.exists()) - - retry_403(dataset.create)() + dataset = retry_403(Config.CLIENT.create_dataset)( + Dataset(_make_dataset_id('update_table'))) self.to_delete.append(dataset) TABLE_NAME = 'test_table' @@ -311,11 +295,9 @@ def test_insert_data_then_dump_table(self): ('Bhettye Rhubble', 27, None), ] ROW_IDS = range(len(ROWS)) - dataset = Dataset( - _make_dataset_id('insert_data_then_dump'), Config.CLIENT) - self.assertFalse(dataset.exists()) - retry_403(dataset.create)() + dataset = retry_403(Config.CLIENT.create_dataset)( + Dataset(_make_dataset_id('insert_data_then_dump'))) self.to_delete.append(dataset) TABLE_NAME = 'test_table' @@ -355,10 +337,8 @@ def test_load_table_from_local_file_then_dump_table(self): ] TABLE_NAME = 'test_table' - dataset = Dataset( - _make_dataset_id('load_local_then_dump'), Config.CLIENT) - - retry_403(dataset.create)() + dataset = retry_403(Config.CLIENT.create_dataset)( + Dataset(_make_dataset_id('load_local_then_dump'))) self.to_delete.append(dataset) full_name = bigquery.SchemaField('full_name', 'STRING', @@ -404,10 +384,8 @@ def test_load_table_from_local_avro_file_then_dump_table(self): ("orange", 590), ("red", 650)] - dataset = Dataset( - _make_dataset_id('load_local_then_dump'), Config.CLIENT) - - retry_403(dataset.create)() + dataset = retry_403(Config.CLIENT.create_dataset)( + Dataset(_make_dataset_id('load_local_then_dump'))) self.to_delete.append(dataset) table = dataset.table(TABLE_NAME) @@ -468,10 +446,8 @@ def test_load_table_from_storage_then_dump_table(self): self.to_delete.insert(0, blob) - dataset = Dataset( - _make_dataset_id('load_gcs_then_dump'), Config.CLIENT) - - retry_403(dataset.create)() + dataset = retry_403(Config.CLIENT.create_dataset)( + Dataset(_make_dataset_id('load_gcs_then_dump'))) self.to_delete.append(dataset) full_name = bigquery.SchemaField('full_name', 'STRING', @@ -538,10 +514,8 @@ def test_load_table_from_storage_w_autodetect_schema(self): self.to_delete.insert(0, blob) - dataset = Dataset( - _make_dataset_id('load_gcs_then_dump'), Config.CLIENT) - - retry_403(dataset.create)() + dataset = retry_403(Config.CLIENT.create_dataset)( + Dataset(_make_dataset_id('load_gcs_then_dump'))) self.to_delete.append(dataset) table = dataset.table(table_name) @@ -576,9 +550,7 @@ def test_job_cancel(self): TABLE_NAME = 'test_table' QUERY = 'SELECT * FROM %s.%s' % (DATASET_ID, TABLE_NAME) - dataset = Dataset(DATASET_ID, Config.CLIENT) - - retry_403(dataset.create)() + dataset = retry_403(Config.CLIENT.create_dataset)(Dataset(DATASET_ID)) self.to_delete.append(dataset) full_name = bigquery.SchemaField('full_name', 'STRING', @@ -764,16 +736,15 @@ def test_dbapi_fetchall(self): rows = Config.CURSOR.fetchall() self.assertEqual(rows, [(1, 2), (3, 4), (5, 6)]) - def _load_table_for_dml(self, rows, dataset_name, table_name): + def _load_table_for_dml(self, rows, dataset_id, table_id): from google.cloud._testing import _NamedTemporaryFile - dataset = Dataset(dataset_name, Config.CLIENT) - retry_403(dataset.create)() + dataset = retry_403(Config.CLIENT.create_dataset)(Dataset(dataset_id)) self.to_delete.append(dataset) greeting = bigquery.SchemaField( 'greeting', 'STRING', mode='NULLABLE') - table = dataset.table(table_name, schema=[greeting]) + table = dataset.table(table_id, schema=[greeting]) table.create() self.to_delete.insert(0, table) @@ -1162,10 +1133,8 @@ def test_insert_nested_nested(self): ('Some value', record) ] table_name = 'test_table' - dataset = Dataset( - _make_dataset_id('issue_2951'), Config.CLIENT) - - retry_403(dataset.create)() + dataset = retry_403(Config.CLIENT.create_dataset)( + Dataset(_make_dataset_id('issue_2951'))) self.to_delete.append(dataset) table = dataset.table(table_name, schema=schema) @@ -1182,11 +1151,8 @@ def test_insert_nested_nested(self): def test_create_table_insert_fetch_nested_schema(self): table_name = 'test_table' - dataset = Dataset( - _make_dataset_id('create_table_nested_schema'), Config.CLIENT) - self.assertFalse(dataset.exists()) - - retry_403(dataset.create)() + dataset = retry_403(Config.CLIENT.create_dataset)( + Dataset(_make_dataset_id('create_table_nested_schema'))) self.to_delete.append(dataset) schema = _load_json_schema() diff --git a/packages/google-cloud-bigquery/tests/unit/test_client.py b/packages/google-cloud-bigquery/tests/unit/test_client.py index 24236a93f497..12282e47d931 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_client.py +++ b/packages/google-cloud-bigquery/tests/unit/test_client.py @@ -297,6 +297,91 @@ def test_get_dataset(self): self.assertEqual(req['path'], '/%s' % path) self.assertEqual(dataset.dataset_id, dataset_id) + def test_create_dataset_minimal(self): + from google.cloud.bigquery.dataset import Dataset + + PROJECT = 'PROJECT' + DS_ID = 'DATASET_ID' + PATH = 'projects/%s/datasets' % PROJECT + RESOURCE = { + 'datasetReference': + {'projectId': PROJECT, 'datasetId': DS_ID}, + 'etag': "etag", + 'id': "%s:%s" % (PROJECT, DS_ID), + } + creds = _make_credentials() + client = self._make_one(project=PROJECT, credentials=creds) + conn = client._connection = _Connection(RESOURCE) + ds = client.create_dataset(Dataset(DS_ID)) + self.assertEqual(len(conn._requested), 1) + req = conn._requested[0] + self.assertEqual(req['method'], 'POST') + self.assertEqual(req['path'], '/%s' % PATH) + SENT = { + 'datasetReference': + {'projectId': PROJECT, 'datasetId': DS_ID}, + } + self.assertEqual(req['data'], SENT) + self.assertEqual(ds.dataset_id, DS_ID) + self.assertEqual(ds.project, PROJECT) + self.assertEqual(ds.etag, RESOURCE['etag']) + self.assertEqual(ds.full_dataset_id, RESOURCE['id']) + + def test_create_dataset_w_attrs(self): + from google.cloud.bigquery.dataset import Dataset, AccessEntry + + PROJECT = 'PROJECT' + DS_ID = 'DATASET_ID' + PATH = 'projects/%s/datasets' % PROJECT + DESCRIPTION = 'DESC' + FRIENDLY_NAME = 'FN' + USER_EMAIL = 'phred@example.com' + VIEW = { + 'projectId': 'my-proj', + 'datasetId': 'starry-skies', + 'tableId': 'northern-hemisphere', + } + RESOURCE = { + 'datasetReference': + {'projectId': PROJECT, 'datasetId': DS_ID}, + 'etag': "etag", + 'id': "%s:%s" % (PROJECT, DS_ID), + 'description': DESCRIPTION, + 'friendlyName': FRIENDLY_NAME, + 'access': [ + {'role': 'OWNER', 'userByEmail': USER_EMAIL}, + {'view': VIEW}], + } + creds = _make_credentials() + client = self._make_one(project=PROJECT, credentials=creds) + conn = client._connection = _Connection(RESOURCE) + entries = [AccessEntry('OWNER', 'userByEmail', USER_EMAIL), + AccessEntry(None, 'view', VIEW)] + ds_arg = Dataset(DS_ID, project=PROJECT, access_entries=entries) + ds_arg.description = DESCRIPTION + ds_arg.friendly_name = FRIENDLY_NAME + ds = client.create_dataset(ds_arg) + self.assertEqual(len(conn._requested), 1) + req = conn._requested[0] + self.assertEqual(req['method'], 'POST') + self.assertEqual(req['path'], '/%s' % PATH) + SENT = { + 'datasetReference': + {'projectId': PROJECT, 'datasetId': DS_ID}, + 'description': DESCRIPTION, + 'friendlyName': FRIENDLY_NAME, + 'access': [ + {'role': 'OWNER', 'userByEmail': USER_EMAIL}, + {'view': VIEW}], + } + self.assertEqual(req['data'], SENT) + self.assertEqual(ds.dataset_id, DS_ID) + self.assertEqual(ds.project, PROJECT) + self.assertEqual(ds.etag, RESOURCE['etag']) + self.assertEqual(ds.full_dataset_id, RESOURCE['id']) + self.assertEqual(ds.description, DESCRIPTION) + self.assertEqual(ds.friendly_name, FRIENDLY_NAME) + def test_job_from_resource_unknown_type(self): PROJECT = 'PROJECT' creds = _make_credentials() diff --git a/packages/google-cloud-bigquery/tests/unit/test_dataset.py b/packages/google-cloud-bigquery/tests/unit/test_dataset.py index a3ee600565ba..8fcc6a87a613 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_dataset.py +++ b/packages/google-cloud-bigquery/tests/unit/test_dataset.py @@ -386,109 +386,6 @@ def test__parse_access_entries_w_extra_keys(self): with self.assertRaises(ValueError): dataset._parse_access_entries(ACCESS) - def test_create_w_bound_client(self): - PATH = 'projects/%s/datasets' % self.PROJECT - RESOURCE = self._makeResource() - conn = _Connection(RESOURCE) - client = _Client(project=self.PROJECT, connection=conn) - dataset = self._make_one(self.DS_ID, client=client) - - dataset.create() - - self.assertEqual(len(conn._requested), 1) - req = conn._requested[0] - self.assertEqual(req['method'], 'POST') - self.assertEqual(req['path'], '/%s' % PATH) - SENT = { - 'datasetReference': - {'projectId': self.PROJECT, 'datasetId': self.DS_ID}, - } - self.assertEqual(req['data'], SENT) - self._verify_resource_properties(dataset, RESOURCE) - - def test_create_w_alternate_client(self): - from google.cloud.bigquery.dataset import AccessEntry - - PATH = 'projects/%s/datasets' % self.PROJECT - USER_EMAIL = 'phred@example.com' - GROUP_EMAIL = 'group-name@lists.example.com' - DESCRIPTION = 'DESCRIPTION' - TITLE = 'TITLE' - RESOURCE = self._makeResource() - RESOURCE['description'] = DESCRIPTION - RESOURCE['friendlyName'] = TITLE - conn1 = _Connection() - CLIENT1 = _Client(project=self.PROJECT, connection=conn1) - conn2 = _Connection(RESOURCE) - CLIENT2 = _Client(project=self.PROJECT, connection=conn2) - dataset = self._make_one(self.DS_ID, client=CLIENT1) - dataset.friendly_name = TITLE - dataset.description = DESCRIPTION - VIEW = { - 'projectId': 'my-proj', - 'datasetId': 'starry-skies', - 'tableId': 'northern-hemisphere', - } - dataset.access_entries = [ - AccessEntry('OWNER', 'userByEmail', USER_EMAIL), - AccessEntry('OWNER', 'groupByEmail', GROUP_EMAIL), - AccessEntry('READER', 'domain', 'foo.com'), - AccessEntry('READER', 'specialGroup', 'projectReaders'), - AccessEntry('WRITER', 'specialGroup', 'projectWriters'), - AccessEntry(None, 'view', VIEW), - ] - - dataset.create(client=CLIENT2) - - self.assertEqual(len(conn1._requested), 0) - self.assertEqual(len(conn2._requested), 1) - req = conn2._requested[0] - self.assertEqual(req['method'], 'POST') - self.assertEqual(req['path'], '/%s' % PATH) - SENT = { - 'datasetReference': { - 'projectId': self.PROJECT, - 'datasetId': self.DS_ID, - }, - 'description': DESCRIPTION, - 'friendlyName': TITLE, - 'access': [ - {'role': 'OWNER', 'userByEmail': USER_EMAIL}, - {'role': 'OWNER', 'groupByEmail': GROUP_EMAIL}, - {'role': 'READER', 'domain': 'foo.com'}, - {'role': 'READER', 'specialGroup': 'projectReaders'}, - {'role': 'WRITER', 'specialGroup': 'projectWriters'}, - {'view': VIEW}, - ], - } - self.assertEqual(req['data'], SENT) - self._verify_resource_properties(dataset, RESOURCE) - - def test_create_w_missing_output_properties(self): - # In the wild, the resource returned from 'dataset.create' sometimes - # lacks 'creationTime' / 'lastModifiedTime' - PATH = 'projects/%s/datasets' % (self.PROJECT,) - RESOURCE = self._makeResource() - del RESOURCE['creationTime'] - del RESOURCE['lastModifiedTime'] - self.WHEN = None - conn = _Connection(RESOURCE) - client = _Client(project=self.PROJECT, connection=conn) - dataset = self._make_one(self.DS_ID, client=client) - - dataset.create() - - self.assertEqual(len(conn._requested), 1) - req = conn._requested[0] - self.assertEqual(req['method'], 'POST') - self.assertEqual(req['path'], '/%s' % PATH) - SENT = { - 'datasetReference': - {'projectId': self.PROJECT, 'datasetId': self.DS_ID}, - } - self.assertEqual(req['data'], SENT) - self._verify_resource_properties(dataset, RESOURCE) - def test_exists_miss_w_bound_client(self): PATH = 'projects/%s/datasets/%s' % (self.PROJECT, self.DS_ID) conn = _Connection() From b0c908a3e7b655afca6c99fd877c5c4b7f49bfca Mon Sep 17 00:00:00 2001 From: Jonathan Amsterdam Date: Mon, 18 Sep 2017 19:40:31 -0400 Subject: [PATCH 0196/2016] bigquery: remove dataset.exists (#3996) * bigquery: remove dataset.exists Dataset won't be able to support this method when we remove its client. Don't add client.dataset_exists; the user can use client.get_dataset and catch NotFound. * fix lint * fix lint agian * fix more lint --- .../google/cloud/bigquery/dataset.py | 25 ----------- .../google-cloud-bigquery/tests/system.py | 19 ++++++--- .../tests/unit/test_dataset.py | 42 +------------------ 3 files changed, 15 insertions(+), 71 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/dataset.py b/packages/google-cloud-bigquery/google/cloud/bigquery/dataset.py index 5a592adc4c29..dcb52d20eacb 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/dataset.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/dataset.py @@ -17,7 +17,6 @@ from google.api.core import page_iterator from google.cloud._helpers import _datetime_from_microseconds -from google.cloud.exceptions import NotFound from google.cloud.bigquery.table import Table from google.cloud.bigquery.table import TableReference @@ -486,30 +485,6 @@ def _build_resource(self): return resource - def exists(self, client=None): - """API call: test for the existence of the dataset via a GET request - - See - https://cloud.google.com/bigquery/docs/reference/rest/v2/datasets/get - - :type client: :class:`~google.cloud.bigquery.client.Client` or - ``NoneType`` - :param client: the client to use. If not passed, falls back to the - ``client`` stored on the current dataset. - - :rtype: bool - :returns: Boolean indicating existence of the dataset. - """ - client = self._require_client(client) - - try: - client._connection.api_request(method='GET', path=self.path, - query_params={'fields': 'id'}) - except NotFound: - return False - else: - return True - def patch(self, client=None, **kw): """API call: update individual dataset properties via a PATCH request. diff --git a/packages/google-cloud-bigquery/tests/system.py b/packages/google-cloud-bigquery/tests/system.py index f91f53c24996..d40688104aba 100644 --- a/packages/google-cloud-bigquery/tests/system.py +++ b/packages/google-cloud-bigquery/tests/system.py @@ -25,10 +25,10 @@ import six from google.cloud import bigquery -from google.cloud.bigquery.dataset import Dataset +from google.cloud.bigquery.dataset import Dataset, DatasetReference from google.cloud._helpers import UTC from google.cloud.bigquery import dbapi -from google.cloud.exceptions import Forbidden +from google.cloud.exceptions import Forbidden, NotFound from test_utils.retry import RetryErrors from test_utils.retry import RetryInstanceState @@ -91,7 +91,6 @@ def setUp(self): self.to_delete = [] def tearDown(self): - from google.cloud.bigquery.dataset import Dataset from google.cloud.storage import Bucket from google.cloud.exceptions import BadRequest from google.cloud.exceptions import Conflict @@ -115,7 +114,7 @@ def test_create_dataset(self): dataset = retry_403(Config.CLIENT.create_dataset)(Dataset(DATASET_ID)) self.to_delete.append(dataset) - self.assertTrue(dataset.exists()) + self.assertTrue(_dataset_exists(dataset)) self.assertEqual(dataset.dataset_id, DATASET_ID) def test_get_dataset(self): @@ -138,7 +137,7 @@ def test_patch_dataset(self): Dataset(_make_dataset_id('patch_dataset'))) self.to_delete.append(dataset) - self.assertTrue(dataset.exists()) + self.assertTrue(_dataset_exists(dataset)) self.assertIsNone(dataset.friendly_name) self.assertIsNone(dataset.description) dataset.patch(friendly_name='Friendly', description='Description') @@ -150,7 +149,7 @@ def test_update_dataset(self): Dataset(_make_dataset_id('update_dataset'))) self.to_delete.append(dataset) - self.assertTrue(dataset.exists()) + self.assertTrue(_dataset_exists(dataset)) after = [entry for entry in dataset.access_entries if entry.entity_id != 'projectWriters'] dataset.access_entries = after @@ -1214,3 +1213,11 @@ def test_create_table_insert_fetch_nested_schema(self): def _job_done(instance): return instance.state.lower() == 'done' + + +def _dataset_exists(ds): + try: + Config.CLIENT.get_dataset(DatasetReference(ds.project, ds.dataset_id)) + return True + except NotFound: + return False diff --git a/packages/google-cloud-bigquery/tests/unit/test_dataset.py b/packages/google-cloud-bigquery/tests/unit/test_dataset.py index 8fcc6a87a613..4c5cfb57265e 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_dataset.py +++ b/packages/google-cloud-bigquery/tests/unit/test_dataset.py @@ -386,37 +386,6 @@ def test__parse_access_entries_w_extra_keys(self): with self.assertRaises(ValueError): dataset._parse_access_entries(ACCESS) - def test_exists_miss_w_bound_client(self): - PATH = 'projects/%s/datasets/%s' % (self.PROJECT, self.DS_ID) - conn = _Connection() - client = _Client(project=self.PROJECT, connection=conn) - dataset = self._make_one(self.DS_ID, client=client) - - self.assertFalse(dataset.exists()) - - self.assertEqual(len(conn._requested), 1) - req = conn._requested[0] - self.assertEqual(req['method'], 'GET') - self.assertEqual(req['path'], '/%s' % PATH) - self.assertEqual(req['query_params'], {'fields': 'id'}) - - def test_exists_hit_w_alternate_client(self): - PATH = 'projects/%s/datasets/%s' % (self.PROJECT, self.DS_ID) - conn1 = _Connection() - CLIENT1 = _Client(project=self.PROJECT, connection=conn1) - conn2 = _Connection({}) - CLIENT2 = _Client(project=self.PROJECT, connection=conn2) - dataset = self._make_one(self.DS_ID, client=CLIENT1) - - self.assertTrue(dataset.exists(client=CLIENT2)) - - self.assertEqual(len(conn1._requested), 0) - self.assertEqual(len(conn2._requested), 1) - req = conn2._requested[0] - self.assertEqual(req['method'], 'GET') - self.assertEqual(req['path'], '/%s' % PATH) - self.assertEqual(req['query_params'], {'fields': 'id'}) - def test_patch_w_invalid_expiration(self): RESOURCE = self._makeResource() conn = _Connection(RESOURCE) @@ -727,13 +696,6 @@ def __init__(self, *responses): self._requested = [] def api_request(self, **kw): - from google.cloud.exceptions import NotFound - self._requested.append(kw) - - try: - response, self._responses = self._responses[0], self._responses[1:] - except IndexError: - raise NotFound('miss') - else: - return response + response, self._responses = self._responses[0], self._responses[1:] + return response From 99553c6feba604526b0b7c84e22dcafcb5d7f0bf Mon Sep 17 00:00:00 2001 From: Alix Hamilton Date: Tue, 19 Sep 2017 09:59:05 -0700 Subject: [PATCH 0197/2016] BigQuery: Updates Table constructor to use TableReference as parameter (#3997) * wip update Table contructor * BigQuery: Updates Table constructor to use TableReference as parameter * fixes circular import error with Python 2.7 --- .../google/cloud/bigquery/client.py | 2 + .../google/cloud/bigquery/dataset.py | 6 +- .../google/cloud/bigquery/job.py | 13 +- .../google/cloud/bigquery/table.py | 28 +- .../tests/unit/test_job.py | 14 +- .../tests/unit/test_table.py | 426 ++++++++++-------- 6 files changed, 282 insertions(+), 207 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py index f17f43deaf5c..1b9e9a522a15 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py @@ -14,6 +14,8 @@ """Client for interacting with the Google BigQuery API.""" +from __future__ import absolute_import + from google.api.core import page_iterator from google.cloud.client import ClientWithProject from google.cloud.bigquery._http import Connection diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/dataset.py b/packages/google-cloud-bigquery/google/cloud/bigquery/dataset.py index dcb52d20eacb..29dc3af19458 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/dataset.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/dataset.py @@ -13,6 +13,9 @@ # limitations under the License. """Define API Datasets.""" + +from __future__ import absolute_import + import six from google.api.core import page_iterator @@ -598,7 +601,8 @@ def table(self, name, schema=()): :rtype: :class:`google.cloud.bigquery.table.Table` :returns: a new ``Table`` instance """ - return Table(name, dataset=self, schema=schema) + table_ref = TableReference(self, name) + return Table(table_ref, schema=schema, client=self._client) def _item_to_table(iterator, resource): diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/job.py b/packages/google-cloud-bigquery/google/cloud/bigquery/job.py index 76a7d476cf6b..5807fcd25e0b 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/job.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/job.py @@ -26,6 +26,7 @@ from google.cloud.bigquery.dataset import Dataset from google.cloud.bigquery.schema import SchemaField from google.cloud.bigquery.table import Table +from google.cloud.bigquery.table import TableReference from google.cloud.bigquery.table import _build_schema_resource from google.cloud.bigquery.table import _parse_schema_resource from google.cloud.bigquery._helpers import ArrayQueryParameter @@ -837,7 +838,8 @@ def from_api_repr(cls, resource, client): job_id, config = cls._get_resource_config(resource) dest_config = config['destinationTable'] dataset = Dataset(dest_config['datasetId'], client) - destination = Table(dest_config['tableId'], dataset) + table_ref = TableReference(dataset, dest_config['tableId']) + destination = Table(table_ref, client=client) source_urls = config.get('sourceUris', ()) job = cls(job_id, destination, source_urls, client=client) job._set_properties(resource) @@ -952,7 +954,8 @@ def from_api_repr(cls, resource, client): job_id, config = cls._get_resource_config(resource) dest_config = config['destinationTable'] dataset = Dataset(dest_config['datasetId'], client) - destination = Table(dest_config['tableId'], dataset) + table_ref = TableReference(dataset, dest_config['tableId']) + destination = Table(table_ref, client=client) sources = [] source_configs = config.get('sourceTables') if source_configs is None: @@ -963,7 +966,8 @@ def from_api_repr(cls, resource, client): source_configs = [single] for source_config in source_configs: dataset = Dataset(source_config['datasetId'], client) - sources.append(Table(source_config['tableId'], dataset)) + table_ref = TableReference(dataset, source_config['tableId']) + sources.append(Table(table_ref, client=client)) job = cls(job_id, destination, sources, client=client) job._set_properties(resource) return job @@ -1109,7 +1113,8 @@ def from_api_repr(cls, resource, client): job_id, config = cls._get_resource_config(resource) source_config = config['sourceTable'] dataset = Dataset(source_config['datasetId'], client) - source = Table(source_config['tableId'], dataset) + table_ref = TableReference(dataset, source_config['tableId']) + source = Table(table_ref, client=client) destination_uris = config['destinationUris'] job = cls(job_id, source, destination_uris, client=client) job._set_properties(resource) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py index f9c07b1e8ee6..fe1a9d3b4ec5 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py @@ -14,6 +14,8 @@ """Define API Datasets.""" +from __future__ import absolute_import + import datetime import os @@ -90,11 +92,8 @@ class Table(object): See https://cloud.google.com/bigquery/docs/reference/rest/v2/tables - :type table_id: str - :param table_id: the ID of the table - - :type dataset: :class:`google.cloud.bigquery.dataset.Dataset` - :param dataset: The dataset which contains the table. + :type table_ref: :class:`google.cloud.bigquery.table.TableReference` + :param table_ref: a pointer to a table :type schema: list of :class:`SchemaField` :param schema: The table's schema @@ -102,12 +101,13 @@ class Table(object): _schema = None - def __init__(self, table_id, dataset, schema=()): - self._table_id = table_id - self._dataset = dataset + def __init__(self, table_ref, schema=(), client=None): + self._table_id = table_ref.table_id + self._dataset = table_ref.dataset self._properties = {} # Let the @property do validation. self.schema = schema + self._client = client @property def project(self): @@ -477,7 +477,7 @@ def list_partitions(self, client=None): return [row[0] for row in query.rows] @classmethod - def from_api_repr(cls, resource, dataset): + def from_api_repr(cls, resource, client): """Factory: construct a table given its API representation :type resource: dict @@ -489,12 +489,18 @@ def from_api_repr(cls, resource, dataset): :rtype: :class:`google.cloud.bigquery.table.Table` :returns: Table parsed from ``resource``. """ + from google.cloud.bigquery import dataset + if ('tableReference' not in resource or 'tableId' not in resource['tableReference']): raise KeyError('Resource lacks required identity information:' '["tableReference"]["tableId"]') + project_id = resource['tableReference']['projectId'] table_id = resource['tableReference']['tableId'] - table = cls(table_id, dataset=dataset) + dataset_id = resource['tableReference']['datasetId'] + dataset_ref = dataset.DatasetReference(project_id, dataset_id) + + table = cls(dataset_ref.table(table_id), client=client) table._set_properties(resource) return table @@ -510,7 +516,7 @@ def _require_client(self, client): :returns: The client passed in or the currently bound client. """ if client is None: - client = self._dataset._client + client = self._client return client def _set_properties(self, api_response): diff --git a/packages/google-cloud-bigquery/tests/unit/test_job.py b/packages/google-cloud-bigquery/tests/unit/test_job.py index 7c9a84f4503a..11f4dec9870c 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_job.py +++ b/packages/google-cloud-bigquery/tests/unit/test_job.py @@ -2169,6 +2169,7 @@ def test_begin_w_bound_client(self): def test_begin_w_alternate_client(self): from google.cloud.bigquery.dataset import Dataset + from google.cloud.bigquery.dataset import DatasetReference from google.cloud.bigquery.dataset import Table PATH = '/projects/%s/jobs' % (self.PROJECT,) @@ -2203,8 +2204,10 @@ def test_begin_w_alternate_client(self): client2 = _Client(project=self.PROJECT, connection=conn2) job = self._make_one(self.JOB_NAME, self.QUERY, client1) + dataset_ref = DatasetReference(self.PROJECT, DS_ID) dataset = Dataset(DS_ID, client1) - table = Table(TABLE, dataset) + table_ref = dataset_ref.table(TABLE) + table = Table(table_ref, client=client1) job.allow_large_results = True job.create_disposition = 'CREATE_NEVER' @@ -2460,8 +2463,8 @@ def test_exists_hit_w_alternate_client(self): self.assertEqual(req['query_params'], {'fields': 'id'}) def test_reload_w_bound_client(self): - from google.cloud.bigquery.dataset import Dataset - from google.cloud.bigquery.dataset import Table + from google.cloud.bigquery.dataset import DatasetReference + from google.cloud.bigquery.table import Table PATH = '/projects/%s/jobs/%s' % (self.PROJECT, self.JOB_NAME) DS_ID = 'DATASET' @@ -2471,8 +2474,9 @@ def test_reload_w_bound_client(self): client = _Client(project=self.PROJECT, connection=conn) job = self._make_one(self.JOB_NAME, None, client) - dataset = Dataset(DS_ID, client) - table = Table(DEST_TABLE, dataset) + dataset_ref = DatasetReference(self.PROJECT, DS_ID) + table_ref = dataset_ref.table(DEST_TABLE) + table = Table(table_ref, client=client) job.destination = table job.reload() diff --git a/packages/google-cloud-bigquery/tests/unit/test_table.py b/packages/google-cloud-bigquery/tests/unit/test_table.py index f076c6d39938..7cc7bffe7080 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_table.py +++ b/packages/google-cloud-bigquery/tests/unit/test_table.py @@ -21,6 +21,8 @@ from six.moves import http_client import pytest +from google.cloud.bigquery.dataset import DatasetReference + class _SchemaBase(object): @@ -166,8 +168,10 @@ def _verifyResourceProperties(self, table, resource): def test_ctor(self): client = _Client(self.PROJECT) - dataset = _Dataset(client) - table = self._make_one(self.TABLE_NAME, dataset) + dataset = DatasetReference(self.PROJECT, self.DS_ID) + table_ref = dataset.table(self.TABLE_NAME) + table = self._make_one(table_ref, client=client) + self.assertEqual(table.table_id, self.TABLE_NAME) self.assertIs(table._dataset, dataset) self.assertEqual(table.project, self.PROJECT) @@ -198,17 +202,20 @@ def test_ctor_w_schema(self): from google.cloud.bigquery.table import SchemaField client = _Client(self.PROJECT) - dataset = _Dataset(client) + dataset = DatasetReference(self.PROJECT, self.DS_ID) + table_ref = dataset.table(self.TABLE_NAME) full_name = SchemaField('full_name', 'STRING', mode='REQUIRED') age = SchemaField('age', 'INTEGER', mode='REQUIRED') - table = self._make_one(self.TABLE_NAME, dataset, - schema=[full_name, age]) + table = self._make_one(table_ref, schema=[full_name, age], + client=client) + self.assertEqual(table.schema, [full_name, age]) def test_num_bytes_getter(self): client = _Client(self.PROJECT) - dataset = _Dataset(client) - table = self._make_one(self.TABLE_NAME, dataset) + dataset = DatasetReference(self.PROJECT, self.DS_ID) + table_ref = dataset.table(self.TABLE_NAME) + table = self._make_one(table_ref, client=client) # Check with no value set. self.assertIsNone(table.num_bytes) @@ -229,8 +236,9 @@ def test_num_bytes_getter(self): def test_num_rows_getter(self): client = _Client(self.PROJECT) - dataset = _Dataset(client) - table = self._make_one(self.TABLE_NAME, dataset) + dataset = DatasetReference(self.PROJECT, self.DS_ID) + table_ref = dataset.table(self.TABLE_NAME) + table = self._make_one(table_ref, client=client) # Check with no value set. self.assertIsNone(table.num_rows) @@ -251,8 +259,9 @@ def test_num_rows_getter(self): def test_schema_setter_non_list(self): client = _Client(self.PROJECT) - dataset = _Dataset(client) - table = self._make_one(self.TABLE_NAME, dataset) + dataset = DatasetReference(self.PROJECT, self.DS_ID) + table_ref = dataset.table(self.TABLE_NAME) + table = self._make_one(table_ref, client=client) with self.assertRaises(TypeError): table.schema = object() @@ -260,8 +269,9 @@ def test_schema_setter_invalid_field(self): from google.cloud.bigquery.table import SchemaField client = _Client(self.PROJECT) - dataset = _Dataset(client) - table = self._make_one(self.TABLE_NAME, dataset) + dataset = DatasetReference(self.PROJECT, self.DS_ID) + table_ref = dataset.table(self.TABLE_NAME) + table = self._make_one(table_ref, client=client) full_name = SchemaField('full_name', 'STRING', mode='REQUIRED') with self.assertRaises(ValueError): table.schema = [full_name, object()] @@ -270,8 +280,9 @@ def test_schema_setter(self): from google.cloud.bigquery.table import SchemaField client = _Client(self.PROJECT) - dataset = _Dataset(client) - table = self._make_one(self.TABLE_NAME, dataset) + dataset = DatasetReference(self.PROJECT, self.DS_ID) + table_ref = dataset.table(self.TABLE_NAME) + table = self._make_one(table_ref, client=client) full_name = SchemaField('full_name', 'STRING', mode='REQUIRED') age = SchemaField('age', 'INTEGER', mode='REQUIRED') table.schema = [full_name, age] @@ -289,8 +300,9 @@ def test_props_set_by_server(self): URL = 'http://example.com/projects/%s/datasets/%s/tables/%s' % ( self.PROJECT, self.DS_ID, self.TABLE_NAME) client = _Client(self.PROJECT) - dataset = _Dataset(client) - table = self._make_one(self.TABLE_NAME, dataset) + dataset = DatasetReference(self.PROJECT, self.DS_ID) + table_ref = dataset.table(self.TABLE_NAME) + table = self._make_one(table_ref, client=client) table._properties['creationTime'] = _millis(CREATED) table._properties['etag'] = 'ETAG' table._properties['lastModifiedTime'] = _millis(MODIFIED) @@ -311,22 +323,25 @@ def test_props_set_by_server(self): def test_description_setter_bad_value(self): client = _Client(self.PROJECT) - dataset = _Dataset(client) - table = self._make_one(self.TABLE_NAME, dataset) + dataset = DatasetReference(self.PROJECT, self.DS_ID) + table_ref = dataset.table(self.TABLE_NAME) + table = self._make_one(table_ref, client=client) with self.assertRaises(ValueError): table.description = 12345 def test_description_setter(self): client = _Client(self.PROJECT) - dataset = _Dataset(client) - table = self._make_one(self.TABLE_NAME, dataset) + dataset = DatasetReference(self.PROJECT, self.DS_ID) + table_ref = dataset.table(self.TABLE_NAME) + table = self._make_one(table_ref, client=client) table.description = 'DESCRIPTION' self.assertEqual(table.description, 'DESCRIPTION') def test_expires_setter_bad_value(self): client = _Client(self.PROJECT) - dataset = _Dataset(client) - table = self._make_one(self.TABLE_NAME, dataset) + dataset = DatasetReference(self.PROJECT, self.DS_ID) + table_ref = dataset.table(self.TABLE_NAME) + table = self._make_one(table_ref, client=client) with self.assertRaises(ValueError): table.expires = object() @@ -336,72 +351,82 @@ def test_expires_setter(self): WHEN = datetime.datetime(2015, 7, 28, 16, 39, tzinfo=UTC) client = _Client(self.PROJECT) - dataset = _Dataset(client) - table = self._make_one(self.TABLE_NAME, dataset) + dataset = DatasetReference(self.PROJECT, self.DS_ID) + table_ref = dataset.table(self.TABLE_NAME) + table = self._make_one(table_ref, client=client) table.expires = WHEN self.assertEqual(table.expires, WHEN) def test_friendly_name_setter_bad_value(self): client = _Client(self.PROJECT) - dataset = _Dataset(client) - table = self._make_one(self.TABLE_NAME, dataset) + dataset = DatasetReference(self.PROJECT, self.DS_ID) + table_ref = dataset.table(self.TABLE_NAME) + table = self._make_one(table_ref, client=client) with self.assertRaises(ValueError): table.friendly_name = 12345 def test_friendly_name_setter(self): client = _Client(self.PROJECT) - dataset = _Dataset(client) - table = self._make_one(self.TABLE_NAME, dataset) + dataset = DatasetReference(self.PROJECT, self.DS_ID) + table_ref = dataset.table(self.TABLE_NAME) + table = self._make_one(table_ref, client=client) table.friendly_name = 'FRIENDLY' self.assertEqual(table.friendly_name, 'FRIENDLY') def test_location_setter_bad_value(self): client = _Client(self.PROJECT) - dataset = _Dataset(client) - table = self._make_one(self.TABLE_NAME, dataset) + dataset = DatasetReference(self.PROJECT, self.DS_ID) + table_ref = dataset.table(self.TABLE_NAME) + table = self._make_one(table_ref, client=client) with self.assertRaises(ValueError): table.location = 12345 def test_location_setter(self): client = _Client(self.PROJECT) - dataset = _Dataset(client) - table = self._make_one(self.TABLE_NAME, dataset) + dataset = DatasetReference(self.PROJECT, self.DS_ID) + table_ref = dataset.table(self.TABLE_NAME) + table = self._make_one(table_ref, client=client) table.location = 'LOCATION' self.assertEqual(table.location, 'LOCATION') def test_view_query_setter_bad_value(self): client = _Client(self.PROJECT) - dataset = _Dataset(client) - table = self._make_one(self.TABLE_NAME, dataset) + dataset = DatasetReference(self.PROJECT, self.DS_ID) + table_ref = dataset.table(self.TABLE_NAME) + table = self._make_one(table_ref, client=client) with self.assertRaises(ValueError): table.view_query = 12345 def test_view_query_setter(self): client = _Client(self.PROJECT) - dataset = _Dataset(client) - table = self._make_one(self.TABLE_NAME, dataset) + dataset = DatasetReference(self.PROJECT, self.DS_ID) + table_ref = dataset.table(self.TABLE_NAME) + table = self._make_one(table_ref, client=client) table.view_query = 'select * from foo' self.assertEqual(table.view_query, 'select * from foo') def test_view_query_deleter(self): client = _Client(self.PROJECT) - dataset = _Dataset(client) - table = self._make_one(self.TABLE_NAME, dataset) + dataset = DatasetReference(self.PROJECT, self.DS_ID) + table_ref = dataset.table(self.TABLE_NAME) + table = self._make_one(table_ref, client=client) table.view_query = 'select * from foo' del table.view_query self.assertIsNone(table.view_query) def test_view_use_legacy_sql_setter_bad_value(self): client = _Client(self.PROJECT) - dataset = _Dataset(client) - table = self._make_one(self.TABLE_NAME, dataset) + dataset = DatasetReference(self.PROJECT, self.DS_ID) + table_ref = dataset.table(self.TABLE_NAME) + table = self._make_one(table_ref, client=client) with self.assertRaises(ValueError): table.view_use_legacy_sql = 12345 def test_view_use_legacy_sql_setter(self): client = _Client(self.PROJECT) - dataset = _Dataset(client) - table = self._make_one(self.TABLE_NAME, dataset) + dataset = DatasetReference(self.PROJECT, self.DS_ID) + table_ref = dataset.table(self.TABLE_NAME) + table = self._make_one(table_ref, client=client) table.view_use_legacy_sql = False table.view_query = 'select * from foo' self.assertEqual(table.view_use_legacy_sql, False) @@ -410,16 +435,14 @@ def test_view_use_legacy_sql_setter(self): def test_from_api_repr_missing_identity(self): self._setUpConstants() client = _Client(self.PROJECT) - dataset = _Dataset(client) RESOURCE = {} klass = self._get_target_class() with self.assertRaises(KeyError): - klass.from_api_repr(RESOURCE, dataset) + klass.from_api_repr(RESOURCE, client) def test_from_api_repr_bare(self): self._setUpConstants() client = _Client(self.PROJECT) - dataset = _Dataset(client) RESOURCE = { 'id': '%s:%s:%s' % (self.PROJECT, self.DS_ID, self.TABLE_NAME), 'tableReference': { @@ -430,18 +453,17 @@ def test_from_api_repr_bare(self): 'type': 'TABLE', } klass = self._get_target_class() - table = klass.from_api_repr(RESOURCE, dataset) + table = klass.from_api_repr(RESOURCE, client) self.assertEqual(table.table_id, self.TABLE_NAME) - self.assertIs(table._dataset, dataset) + self.assertIs(table._client, client) self._verifyResourceProperties(table, RESOURCE) def test_from_api_repr_w_properties(self): client = _Client(self.PROJECT) - dataset = _Dataset(client) RESOURCE = self._makeResource() klass = self._get_target_class() - table = klass.from_api_repr(RESOURCE, dataset) - self.assertIs(table._dataset._client, client) + table = klass.from_api_repr(RESOURCE, client) + self.assertIs(table._client, client) self._verifyResourceProperties(table, RESOURCE) def test_create_new_day_partitioned_table(self): @@ -449,8 +471,9 @@ def test_create_new_day_partitioned_table(self): RESOURCE = self._makeResource() conn = _Connection(RESOURCE) client = _Client(project=self.PROJECT, connection=conn) - dataset = _Dataset(client) - table = self._make_one(self.TABLE_NAME, dataset) + dataset = DatasetReference(self.PROJECT, self.DS_ID) + table_ref = dataset.table(self.TABLE_NAME) + table = self._make_one(table_ref, client=client) table.partitioning_type = 'DAY' table.create() @@ -475,11 +498,12 @@ def test_create_w_bound_client(self): RESOURCE = self._makeResource() conn = _Connection(RESOURCE) client = _Client(project=self.PROJECT, connection=conn) - dataset = _Dataset(client) + dataset = DatasetReference(self.PROJECT, self.DS_ID) + table_ref = dataset.table(self.TABLE_NAME) full_name = SchemaField('full_name', 'STRING', mode='REQUIRED') age = SchemaField('age', 'INTEGER', mode='REQUIRED') - table = self._make_one(self.TABLE_NAME, dataset, - schema=[full_name, age]) + table = self._make_one(table_ref, schema=[full_name, age], + client=client) table.create() @@ -506,11 +530,12 @@ def test_create_w_partition_no_expire(self): RESOURCE = self._makeResource() conn = _Connection(RESOURCE) client = _Client(project=self.PROJECT, connection=conn) - dataset = _Dataset(client) + dataset = DatasetReference(self.PROJECT, self.DS_ID) + table_ref = dataset.table(self.TABLE_NAME) full_name = SchemaField('full_name', 'STRING', mode='REQUIRED') age = SchemaField('age', 'INTEGER', mode='REQUIRED') - table = self._make_one(self.TABLE_NAME, dataset, - schema=[full_name, age]) + table = self._make_one(table_ref, schema=[full_name, age], + client=client) self.assertIsNone(table.partitioning_type) table.partitioning_type = "DAY" @@ -541,11 +566,12 @@ def test_create_w_partition_and_expire(self): RESOURCE = self._makeResource() conn = _Connection(RESOURCE) client = _Client(project=self.PROJECT, connection=conn) - dataset = _Dataset(client) + dataset = DatasetReference(self.PROJECT, self.DS_ID) + table_ref = dataset.table(self.TABLE_NAME) full_name = SchemaField('full_name', 'STRING', mode='REQUIRED') age = SchemaField('age', 'INTEGER', mode='REQUIRED') - table = self._make_one(self.TABLE_NAME, dataset, - schema=[full_name, age]) + table = self._make_one(table_ref, schema=[full_name, age], + client=client) self.assertIsNone(table.partition_expiration) table.partition_expiration = 100 self.assertEqual(table.partitioning_type, "DAY") @@ -575,11 +601,12 @@ def test_partition_type_setter_bad_type(self): RESOURCE = self._makeResource() conn = _Connection(RESOURCE) client = _Client(project=self.PROJECT, connection=conn) - dataset = _Dataset(client) + dataset = DatasetReference(self.PROJECT, self.DS_ID) + table_ref = dataset.table(self.TABLE_NAME) full_name = SchemaField('full_name', 'STRING', mode='REQUIRED') age = SchemaField('age', 'INTEGER', mode='REQUIRED') - table = self._make_one(self.TABLE_NAME, dataset, - schema=[full_name, age]) + table = self._make_one(table_ref, schema=[full_name, age], + client=client) with self.assertRaises(ValueError): table.partitioning_type = 123 @@ -589,11 +616,12 @@ def test_partition_type_setter_unknown_value(self): RESOURCE = self._makeResource() conn = _Connection(RESOURCE) client = _Client(project=self.PROJECT, connection=conn) - dataset = _Dataset(client) + dataset = DatasetReference(self.PROJECT, self.DS_ID) + table_ref = dataset.table(self.TABLE_NAME) full_name = SchemaField('full_name', 'STRING', mode='REQUIRED') age = SchemaField('age', 'INTEGER', mode='REQUIRED') - table = self._make_one(self.TABLE_NAME, dataset, - schema=[full_name, age]) + table = self._make_one(table_ref, schema=[full_name, age], + client=client) with self.assertRaises(ValueError): table.partitioning_type = "HASH" @@ -603,11 +631,12 @@ def test_partition_type_setter_w_known_value(self): RESOURCE = self._makeResource() conn = _Connection(RESOURCE) client = _Client(project=self.PROJECT, connection=conn) - dataset = _Dataset(client) + dataset = DatasetReference(self.PROJECT, self.DS_ID) + table_ref = dataset.table(self.TABLE_NAME) full_name = SchemaField('full_name', 'STRING', mode='REQUIRED') age = SchemaField('age', 'INTEGER', mode='REQUIRED') - table = self._make_one(self.TABLE_NAME, dataset, - schema=[full_name, age]) + table = self._make_one(table_ref, schema=[full_name, age], + client=client) self.assertIsNone(table.partitioning_type) table.partitioning_type = 'DAY' self.assertEqual(table.partitioning_type, 'DAY') @@ -618,11 +647,12 @@ def test_partition_type_setter_w_none(self): RESOURCE = self._makeResource() conn = _Connection(RESOURCE) client = _Client(project=self.PROJECT, connection=conn) - dataset = _Dataset(client) + dataset = DatasetReference(self.PROJECT, self.DS_ID) + table_ref = dataset.table(self.TABLE_NAME) full_name = SchemaField('full_name', 'STRING', mode='REQUIRED') age = SchemaField('age', 'INTEGER', mode='REQUIRED') - table = self._make_one(self.TABLE_NAME, dataset, - schema=[full_name, age]) + table = self._make_one(table_ref, schema=[full_name, age], + client=client) table._properties['timePartitioning'] = {'type': 'DAY'} table.partitioning_type = None self.assertIsNone(table.partitioning_type) @@ -634,11 +664,12 @@ def test_partition_experation_bad_type(self): RESOURCE = self._makeResource() conn = _Connection(RESOURCE) client = _Client(project=self.PROJECT, connection=conn) - dataset = _Dataset(client) + dataset = DatasetReference(self.PROJECT, self.DS_ID) + table_ref = dataset.table(self.TABLE_NAME) full_name = SchemaField('full_name', 'STRING', mode='REQUIRED') age = SchemaField('age', 'INTEGER', mode='REQUIRED') - table = self._make_one(self.TABLE_NAME, dataset, - schema=[full_name, age]) + table = self._make_one(table_ref, schema=[full_name, age], + client=client) with self.assertRaises(ValueError): table.partition_expiration = "NEVER" @@ -648,11 +679,12 @@ def test_partition_expiration_w_integer(self): RESOURCE = self._makeResource() conn = _Connection(RESOURCE) client = _Client(project=self.PROJECT, connection=conn) - dataset = _Dataset(client) + dataset = DatasetReference(self.PROJECT, self.DS_ID) + table_ref = dataset.table(self.TABLE_NAME) full_name = SchemaField('full_name', 'STRING', mode='REQUIRED') age = SchemaField('age', 'INTEGER', mode='REQUIRED') - table = self._make_one(self.TABLE_NAME, dataset, - schema=[full_name, age]) + table = self._make_one(table_ref, schema=[full_name, age], + client=client) self.assertIsNone(table.partition_expiration) table.partition_expiration = 100 self.assertEqual(table.partitioning_type, "DAY") @@ -664,11 +696,12 @@ def test_partition_expiration_w_none(self): RESOURCE = self._makeResource() conn = _Connection(RESOURCE) client = _Client(project=self.PROJECT, connection=conn) - dataset = _Dataset(client) + dataset = DatasetReference(self.PROJECT, self.DS_ID) + table_ref = dataset.table(self.TABLE_NAME) full_name = SchemaField('full_name', 'STRING', mode='REQUIRED') age = SchemaField('age', 'INTEGER', mode='REQUIRED') - table = self._make_one(self.TABLE_NAME, dataset, - schema=[full_name, age]) + table = self._make_one(table_ref, schema=[full_name, age], + client=client) self.assertIsNone(table.partition_expiration) table._properties['timePartitioning'] = { 'type': 'DAY', @@ -684,11 +717,12 @@ def test_partition_expiration_w_none_no_partition_set(self): RESOURCE = self._makeResource() conn = _Connection(RESOURCE) client = _Client(project=self.PROJECT, connection=conn) - dataset = _Dataset(client) + dataset = DatasetReference(self.PROJECT, self.DS_ID) + table_ref = dataset.table(self.TABLE_NAME) full_name = SchemaField('full_name', 'STRING', mode='REQUIRED') age = SchemaField('age', 'INTEGER', mode='REQUIRED') - table = self._make_one(self.TABLE_NAME, dataset, - schema=[full_name, age]) + table = self._make_one(table_ref, schema=[full_name, age], + client=client) self.assertIsNone(table.partition_expiration) table.partition_expiration = None self.assertIsNone(table.partitioning_type) @@ -700,11 +734,12 @@ def test_list_partitions(self): conn = _Connection() client = _Client(project=self.PROJECT, connection=conn) client._query_results = [(20160804, None), (20160805, None)] - dataset = _Dataset(client) + dataset = DatasetReference(self.PROJECT, self.DS_ID) + table_ref = dataset.table(self.TABLE_NAME) full_name = SchemaField('full_name', 'STRING', mode='REQUIRED') age = SchemaField('age', 'INTEGER', mode='REQUIRED') - table = self._make_one(self.TABLE_NAME, dataset, - schema=[full_name, age]) + table = self._make_one(table_ref, schema=[full_name, age], + client=client) self.assertEqual(table.list_partitions(), [20160804, 20160805]) def test_create_w_alternate_client(self): @@ -729,8 +764,9 @@ def test_create_w_alternate_client(self): client1 = _Client(project=self.PROJECT, connection=conn1) conn2 = _Connection(RESOURCE) client2 = _Client(project=self.PROJECT, connection=conn2) - dataset = _Dataset(client=client1) - table = self._make_one(self.TABLE_NAME, dataset=dataset) + dataset = DatasetReference(self.PROJECT, self.DS_ID) + table_ref = dataset.table(self.TABLE_NAME) + table = self._make_one(table_ref, client=client1) table.friendly_name = TITLE table.description = DESCRIPTION table.view_query = QUERY @@ -766,11 +802,12 @@ def test_create_w_missing_output_properties(self): self.WHEN = None conn = _Connection(RESOURCE) client = _Client(project=self.PROJECT, connection=conn) - dataset = _Dataset(client) + dataset = DatasetReference(self.PROJECT, self.DS_ID) + table_ref = dataset.table(self.TABLE_NAME) full_name = SchemaField('full_name', 'STRING', mode='REQUIRED') age = SchemaField('age', 'INTEGER', mode='REQUIRED') - table = self._make_one(self.TABLE_NAME, dataset, - schema=[full_name, age]) + table = self._make_one(table_ref, schema=[full_name, age], + client=client) table.create() @@ -795,8 +832,9 @@ def test_exists_miss_w_bound_client(self): self.PROJECT, self.DS_ID, self.TABLE_NAME) conn = _Connection() client = _Client(project=self.PROJECT, connection=conn) - dataset = _Dataset(client) - table = self._make_one(self.TABLE_NAME, dataset=dataset) + dataset = DatasetReference(self.PROJECT, self.DS_ID) + table_ref = dataset.table(self.TABLE_NAME) + table = self._make_one(table_ref, client=client) self.assertFalse(table.exists()) @@ -813,8 +851,9 @@ def test_exists_hit_w_alternate_client(self): client1 = _Client(project=self.PROJECT, connection=conn1) conn2 = _Connection({}) client2 = _Client(project=self.PROJECT, connection=conn2) - dataset = _Dataset(client1) - table = self._make_one(self.TABLE_NAME, dataset=dataset) + dataset = DatasetReference(self.PROJECT, self.DS_ID) + table_ref = dataset.table(self.TABLE_NAME) + table = self._make_one(table_ref, client=client1) self.assertTrue(table.exists(client=client2)) @@ -831,8 +870,9 @@ def test_reload_w_bound_client(self): RESOURCE = self._makeResource() conn = _Connection(RESOURCE) client = _Client(project=self.PROJECT, connection=conn) - dataset = _Dataset(client) - table = self._make_one(self.TABLE_NAME, dataset=dataset) + dataset = DatasetReference(self.PROJECT, self.DS_ID) + table_ref = dataset.table(self.TABLE_NAME) + table = self._make_one(table_ref, client=client) table.reload() @@ -850,8 +890,9 @@ def test_reload_w_alternate_client(self): client1 = _Client(project=self.PROJECT, connection=conn1) conn2 = _Connection(RESOURCE) client2 = _Client(project=self.PROJECT, connection=conn2) - dataset = _Dataset(client1) - table = self._make_one(self.TABLE_NAME, dataset=dataset) + dataset = DatasetReference(self.PROJECT, self.DS_ID) + table_ref = dataset.table(self.TABLE_NAME) + table = self._make_one(table_ref, client=client1) table.reload(client=client2) @@ -866,8 +907,9 @@ def test_patch_w_invalid_expiration(self): RESOURCE = self._makeResource() conn = _Connection(RESOURCE) client = _Client(project=self.PROJECT, connection=conn) - dataset = _Dataset(client) - table = self._make_one(self.TABLE_NAME, dataset=dataset) + dataset = DatasetReference(self.PROJECT, self.DS_ID) + table_ref = dataset.table(self.TABLE_NAME) + table = self._make_one(table_ref, client=client) with self.assertRaises(ValueError): table.patch(expires='BOGUS') @@ -882,8 +924,9 @@ def test_patch_w_bound_client(self): RESOURCE['friendlyName'] = TITLE conn = _Connection(RESOURCE) client = _Client(project=self.PROJECT, connection=conn) - dataset = _Dataset(client) - table = self._make_one(self.TABLE_NAME, dataset=dataset) + dataset = DatasetReference(self.PROJECT, self.DS_ID) + table_ref = dataset.table(self.TABLE_NAME) + table = self._make_one(table_ref, client=client) table.patch(description=DESCRIPTION, friendly_name=TITLE, @@ -922,8 +965,9 @@ def test_patch_w_alternate_client(self): client1 = _Client(project=self.PROJECT, connection=conn1) conn2 = _Connection(RESOURCE) client2 = _Client(project=self.PROJECT, connection=conn2) - dataset = _Dataset(client1) - table = self._make_one(self.TABLE_NAME, dataset=dataset) + dataset = DatasetReference(self.PROJECT, self.DS_ID) + table_ref = dataset.table(self.TABLE_NAME) + table = self._make_one(table_ref, client=client1) full_name = SchemaField('full_name', 'STRING', mode='REQUIRED') age = SchemaField('age', 'INTEGER', mode='NULLABLE') @@ -958,8 +1002,9 @@ def test_patch_w_schema_None(self): RESOURCE['friendlyName'] = TITLE conn = _Connection(RESOURCE) client = _Client(project=self.PROJECT, connection=conn) - dataset = _Dataset(client) - table = self._make_one(self.TABLE_NAME, dataset=dataset) + dataset = DatasetReference(self.PROJECT, self.DS_ID) + table_ref = dataset.table(self.TABLE_NAME) + table = self._make_one(table_ref, client=client) table.patch(schema=None) @@ -983,11 +1028,12 @@ def test_update_w_bound_client(self): RESOURCE['friendlyName'] = TITLE conn = _Connection(RESOURCE) client = _Client(project=self.PROJECT, connection=conn) - dataset = _Dataset(client) + dataset = DatasetReference(self.PROJECT, self.DS_ID) + table_ref = dataset.table(self.TABLE_NAME) full_name = SchemaField('full_name', 'STRING', mode='REQUIRED') age = SchemaField('age', 'INTEGER', mode='REQUIRED') - table = self._make_one(self.TABLE_NAME, dataset=dataset, - schema=[full_name, age]) + table = self._make_one(table_ref, schema=[full_name, age], + client=client) table.description = DESCRIPTION table.friendly_name = TITLE @@ -1033,8 +1079,9 @@ def test_update_w_alternate_client(self): client1 = _Client(project=self.PROJECT, connection=conn1) conn2 = _Connection(RESOURCE) client2 = _Client(project=self.PROJECT, connection=conn2) - dataset = _Dataset(client1) - table = self._make_one(self.TABLE_NAME, dataset=dataset) + dataset = DatasetReference(self.PROJECT, self.DS_ID) + table_ref = dataset.table(self.TABLE_NAME) + table = self._make_one(table_ref, client=client1) table.default_table_expiration_ms = DEF_TABLE_EXP table.location = LOCATION table.expires = self.EXP_TIME @@ -1065,8 +1112,9 @@ def test_delete_w_bound_client(self): self.PROJECT, self.DS_ID, self.TABLE_NAME) conn = _Connection({}) client = _Client(project=self.PROJECT, connection=conn) - dataset = _Dataset(client) - table = self._make_one(self.TABLE_NAME, dataset=dataset) + dataset = DatasetReference(self.PROJECT, self.DS_ID) + table_ref = dataset.table(self.TABLE_NAME) + table = self._make_one(table_ref, client=client) table.delete() @@ -1082,8 +1130,9 @@ def test_delete_w_alternate_client(self): client1 = _Client(project=self.PROJECT, connection=conn1) conn2 = _Connection({}) client2 = _Client(project=self.PROJECT, connection=conn2) - dataset = _Dataset(client1) - table = self._make_one(self.TABLE_NAME, dataset=dataset) + dataset = DatasetReference(self.PROJECT, self.DS_ID) + table_ref = dataset.table(self.TABLE_NAME) + table = self._make_one(table_ref, client=client1) table.delete(client=client2) @@ -1097,8 +1146,9 @@ def test_fetch_data_wo_schema(self): from google.cloud.bigquery.table import _TABLE_HAS_NO_SCHEMA client = _Client(project=self.PROJECT) - dataset = _Dataset(client) - table = self._make_one(self.TABLE_NAME, dataset=dataset) + dataset = DatasetReference(self.PROJECT, self.DS_ID) + table_ref = dataset.table(self.TABLE_NAME) + table = self._make_one(table_ref, client=client) with self.assertRaises(ValueError) as exc: table.fetch_data() @@ -1154,12 +1204,13 @@ def _bigquery_timestamp_float_repr(ts_float): conn = _Connection(DATA) client = _Client(project=self.PROJECT, connection=conn) - dataset = _Dataset(client) + dataset = DatasetReference(self.PROJECT, self.DS_ID) + table_ref = dataset.table(self.TABLE_NAME) full_name = SchemaField('full_name', 'STRING', mode='REQUIRED') age = SchemaField('age', 'INTEGER', mode='NULLABLE') joined = SchemaField('joined', 'TIMESTAMP', mode='NULLABLE') - table = self._make_one(self.TABLE_NAME, dataset=dataset, - schema=[full_name, age, joined]) + table = self._make_one(table_ref, schema=[full_name, age, joined], + client=client) iterator = table.fetch_data() page = six.next(iterator.pages) @@ -1220,13 +1271,15 @@ def test_fetch_data_w_alternate_client(self): client1 = _Client(project=self.PROJECT, connection=conn1) conn2 = _Connection(DATA) client2 = _Client(project=self.PROJECT, connection=conn2) - dataset = _Dataset(client1) + dataset = DatasetReference(self.PROJECT, self.DS_ID) + table_ref = dataset.table(self.TABLE_NAME) full_name = SchemaField('full_name', 'STRING', mode='REQUIRED') age = SchemaField('age', 'INTEGER', mode='REQUIRED') voter = SchemaField('voter', 'BOOLEAN', mode='NULLABLE') score = SchemaField('score', 'FLOAT', mode='NULLABLE') - table = self._make_one(self.TABLE_NAME, dataset=dataset, - schema=[full_name, age, voter, score]) + table = self._make_one(table_ref, + schema=[full_name, age, voter, score], + client=client1) iterator = table.fetch_data( client=client2, max_results=MAX, page_token=TOKEN) @@ -1277,14 +1330,15 @@ def test_fetch_data_w_repeated_fields(self): } conn = _Connection(DATA) client = _Client(project=self.PROJECT, connection=conn) - dataset = _Dataset(client) + dataset = DatasetReference(self.PROJECT, self.DS_ID) + table_ref = dataset.table(self.TABLE_NAME) color = SchemaField('color', 'STRING', mode='REPEATED') index = SchemaField('index', 'INTEGER', 'REPEATED') score = SchemaField('score', 'FLOAT', 'REPEATED') struct = SchemaField('struct', 'RECORD', mode='REPEATED', fields=[index, score]) - table = self._make_one(self.TABLE_NAME, dataset=dataset, - schema=[color, struct]) + table = self._make_one(table_ref, schema=[color, struct], + client=client) iterator = table.fetch_data() page = six.next(iterator.pages) @@ -1332,15 +1386,16 @@ def test_fetch_data_w_record_schema(self): } conn = _Connection(DATA) client = _Client(project=self.PROJECT, connection=conn) - dataset = _Dataset(client) + dataset = DatasetReference(self.PROJECT, self.DS_ID) + table_ref = dataset.table(self.TABLE_NAME) full_name = SchemaField('full_name', 'STRING', mode='REQUIRED') area_code = SchemaField('area_code', 'STRING', 'REQUIRED') local_number = SchemaField('local_number', 'STRING', 'REQUIRED') rank = SchemaField('rank', 'INTEGER', 'REQUIRED') phone = SchemaField('phone', 'RECORD', mode='NULLABLE', fields=[area_code, local_number, rank]) - table = self._make_one(self.TABLE_NAME, dataset=dataset, - schema=[full_name, phone]) + table = self._make_one(table_ref, schema=[full_name, phone], + client=client) iterator = table.fetch_data() page = six.next(iterator.pages) @@ -1371,8 +1426,9 @@ def test_row_from_mapping_wo_schema(self): from google.cloud.bigquery.table import _TABLE_HAS_NO_SCHEMA MAPPING = {'full_name': 'Phred Phlyntstone', 'age': 32} client = _Client(project=self.PROJECT) - dataset = _Dataset(client) - table = self._make_one(self.TABLE_NAME, dataset=dataset) + dataset = DatasetReference(self.PROJECT, self.DS_ID) + table_ref = dataset.table(self.TABLE_NAME) + table = self._make_one(table_ref, client=client) with self.assertRaises(ValueError) as exc: table.row_from_mapping(MAPPING) @@ -1388,13 +1444,15 @@ def test_row_from_mapping_w_invalid_schema(self): 'bogus': 'WHATEVER', } client = _Client(project=self.PROJECT) - dataset = _Dataset(client) + dataset = DatasetReference(self.PROJECT, self.DS_ID) + table_ref = dataset.table(self.TABLE_NAME) full_name = SchemaField('full_name', 'STRING', mode='REQUIRED') age = SchemaField('age', 'INTEGER', mode='REQUIRED') colors = SchemaField('colors', 'DATETIME', mode='REPEATED') bogus = SchemaField('joined', 'STRING', mode='BOGUS') - table = self._make_one(self.TABLE_NAME, dataset=dataset, - schema=[full_name, age, colors, bogus]) + table = self._make_one(table_ref, + schema=[full_name, age, colors, bogus], + client=client) with self.assertRaises(ValueError) as exc: table.row_from_mapping(MAPPING) @@ -1410,13 +1468,15 @@ def test_row_from_mapping_w_schema(self): 'extra': 'IGNORED', } client = _Client(project=self.PROJECT) - dataset = _Dataset(client) + dataset = DatasetReference(self.PROJECT, self.DS_ID) + table_ref = dataset.table(self.TABLE_NAME) full_name = SchemaField('full_name', 'STRING', mode='REQUIRED') age = SchemaField('age', 'INTEGER', mode='REQUIRED') colors = SchemaField('colors', 'DATETIME', mode='REPEATED') joined = SchemaField('joined', 'STRING', mode='NULLABLE') - table = self._make_one(self.TABLE_NAME, dataset=dataset, - schema=[full_name, age, colors, joined]) + table = self._make_one(table_ref, + schema=[full_name, age, colors, joined], + client=client) self.assertEqual( table.row_from_mapping(MAPPING), @@ -1426,8 +1486,9 @@ def test_insert_data_wo_schema(self): from google.cloud.bigquery.table import _TABLE_HAS_NO_SCHEMA client = _Client(project=self.PROJECT) - dataset = _Dataset(client) - table = self._make_one(self.TABLE_NAME, dataset=dataset) + dataset = DatasetReference(self.PROJECT, self.DS_ID) + table_ref = dataset.table(self.TABLE_NAME) + table = self._make_one(table_ref, client=client) ROWS = [ ('Phred Phlyntstone', 32), ('Bharney Rhubble', 33), @@ -1454,12 +1515,13 @@ def test_insert_data_w_bound_client(self): self.PROJECT, self.DS_ID, self.TABLE_NAME) conn = _Connection({}) client = _Client(project=self.PROJECT, connection=conn) - dataset = _Dataset(client) + dataset = DatasetReference(self.PROJECT, self.DS_ID) + table_ref = dataset.table(self.TABLE_NAME) full_name = SchemaField('full_name', 'STRING', mode='REQUIRED') age = SchemaField('age', 'INTEGER', mode='REQUIRED') joined = SchemaField('joined', 'TIMESTAMP', mode='NULLABLE') - table = self._make_one(self.TABLE_NAME, dataset=dataset, - schema=[full_name, age, joined]) + table = self._make_one(table_ref, schema=[full_name, age, joined], + client=client) ROWS = [ ('Phred Phlyntstone', 32, _datetime_to_rfc3339(WHEN)), ('Bharney Rhubble', 33, WHEN + datetime.timedelta(seconds=1)), @@ -1507,12 +1569,13 @@ def test_insert_data_w_alternate_client(self): client1 = _Client(project=self.PROJECT, connection=conn1) conn2 = _Connection(RESPONSE) client2 = _Client(project=self.PROJECT, connection=conn2) - dataset = _Dataset(client1) + dataset = DatasetReference(self.PROJECT, self.DS_ID) + table_ref = dataset.table(self.TABLE_NAME) full_name = SchemaField('full_name', 'STRING', mode='REQUIRED') age = SchemaField('age', 'INTEGER', mode='REQUIRED') voter = SchemaField('voter', 'BOOLEAN', mode='NULLABLE') - table = self._make_one(self.TABLE_NAME, dataset=dataset, - schema=[full_name, age, voter]) + table = self._make_one(table_ref, schema=[full_name, age, voter], + client=client1) ROWS = [ ('Phred Phlyntstone', 32, True), ('Bharney Rhubble', 33, False), @@ -1564,14 +1627,15 @@ def test_insert_data_w_repeated_fields(self): self.PROJECT, self.DS_ID, self.TABLE_NAME) conn = _Connection({}) client = _Client(project=self.PROJECT, connection=conn) - dataset = _Dataset(client) + dataset = DatasetReference(self.PROJECT, self.DS_ID) + table_ref = dataset.table(self.TABLE_NAME) full_name = SchemaField('color', 'STRING', mode='REPEATED') index = SchemaField('index', 'INTEGER', 'REPEATED') score = SchemaField('score', 'FLOAT', 'REPEATED') struct = SchemaField('struct', 'RECORD', mode='REPEATED', fields=[index, score]) - table = self._make_one(self.TABLE_NAME, dataset=dataset, - schema=[full_name, struct]) + table = self._make_one(table_ref, schema=[full_name, struct], + client=client) ROWS = [ (['red', 'green'], [{'index': [1, 2], 'score': [3.1415, 1.414]}]), ] @@ -1600,15 +1664,16 @@ def test_insert_data_w_record_schema(self): self.PROJECT, self.DS_ID, self.TABLE_NAME) conn = _Connection({}) client = _Client(project=self.PROJECT, connection=conn) - dataset = _Dataset(client) + dataset = DatasetReference(self.PROJECT, self.DS_ID) + table_ref = dataset.table(self.TABLE_NAME) full_name = SchemaField('full_name', 'STRING', mode='REQUIRED') area_code = SchemaField('area_code', 'STRING', 'REQUIRED') local_number = SchemaField('local_number', 'STRING', 'REQUIRED') rank = SchemaField('rank', 'INTEGER', 'REQUIRED') phone = SchemaField('phone', 'RECORD', mode='NULLABLE', fields=[area_code, local_number, rank]) - table = self._make_one(self.TABLE_NAME, dataset=dataset, - schema=[full_name, phone]) + table = self._make_one(table_ref, schema=[full_name, phone], + client=client) ROWS = [ ('Phred Phlyntstone', {'area_code': '800', 'local_number': '555-1212', @@ -1639,7 +1704,9 @@ def _row_data(row): def test__get_transport(self): client = mock.Mock(spec=[u'_credentials', '_http']) client._http = mock.sentinel.http - table = self._make_one(self.TABLE_NAME, None) + dataset = DatasetReference(self.PROJECT, self.DS_ID) + table_ref = dataset.table(self.TABLE_NAME) + table = self._make_one(table_ref, client=client) transport = table._get_transport(client) @@ -1667,8 +1734,9 @@ def _initiate_resumable_upload_helper(self, num_retries=None): connection = _Connection() client = _Client(self.PROJECT, connection=connection) - dataset = _Dataset(client) - table = self._make_one(self.TABLE_NAME, dataset) + dataset = DatasetReference(self.PROJECT, self.DS_ID) + table_ref = dataset.table(self.TABLE_NAME) + table = self._make_one(table_ref, client=client) # Create mocks to be checked for doing transport. resumable_url = 'http://test.invalid?upload_id=hey-you' @@ -1736,8 +1804,9 @@ def _do_multipart_upload_success_helper( connection = _Connection() client = _Client(self.PROJECT, connection=connection) - dataset = _Dataset(client) - table = self._make_one(self.TABLE_NAME, dataset) + dataset = DatasetReference(self.PROJECT, self.DS_ID) + table_ref = dataset.table(self.TABLE_NAME) + table = self._make_one(table_ref, client=client) # Create mocks to be checked for doing transport. fake_transport = self._mock_transport(http_client.OK, {}) @@ -1808,8 +1877,9 @@ def _make_table(transport=None): client._http = transport client.project = 'project_id' - dataset = dataset.Dataset('test_dataset', client) - table = table.Table('test_table', dataset) + dataset_ref = dataset.DatasetReference('project_id', 'test_dataset') + table_ref = dataset_ref.table('test_table') + table = table.Table(table_ref, client=client) return table @@ -1867,7 +1937,7 @@ def test_upload_from_file_resumable(self): table.upload_from_file(file_obj, source_format='CSV') do_upload.assert_called_once_with( - table._dataset._client, + table._client, file_obj, self.EXPECTED_CONFIGURATION, google.cloud.bigquery.table._DEFAULT_NUM_RETRIES) @@ -1897,7 +1967,7 @@ def test_upload_file_resumable_metadata(self): 'load': { 'sourceFormat': config_args['source_format'], 'destinationTable': { - 'projectId': table._dataset._client.project, + 'projectId': table._dataset.project, 'datasetId': table.dataset_id, 'tableId': table.table_id, }, @@ -1926,7 +1996,7 @@ def test_upload_file_resumable_metadata(self): file_obj, **config_args) do_upload.assert_called_once_with( - table._dataset._client, + table._client, file_obj, expected_config, mock.ANY) @@ -1945,7 +2015,7 @@ def test_upload_from_file_multipart(self): file_obj, source_format='CSV', size=file_obj_size) do_upload.assert_called_once_with( - table._dataset._client, + table._client, file_obj, self.EXPECTED_CONFIGURATION, file_obj_size, @@ -1963,7 +2033,7 @@ def test_upload_from_file_with_retries(self): file_obj, source_format='CSV', num_retries=num_retries) do_upload.assert_called_once_with( - table._dataset._client, + table._client, file_obj, self.EXPECTED_CONFIGURATION, num_retries) @@ -2046,7 +2116,7 @@ def test__do_resumable_upload(self): table = self._make_table(transport) result = table._do_resumable_upload( - table._dataset._client, + table._client, file_obj, self.EXPECTED_CONFIGURATION, None) @@ -2069,7 +2139,7 @@ def test__do_multipart_upload(self): file_obj_len = len(file_obj.getvalue()) table._do_multipart_upload( - table._dataset._client, + table._client, file_obj, self.EXPECTED_CONFIGURATION, file_obj_len, @@ -2100,7 +2170,7 @@ def test__do_multipart_upload_wrong_size(self): with pytest.raises(ValueError): table._do_multipart_upload( - table._dataset._client, + table._client, file_obj, {}, file_obj_len+1, @@ -2308,22 +2378,6 @@ def run(self): self.rows = self.client._query_results -class _Dataset(object): - - def __init__(self, client, dataset_id=TestTable.DS_ID): - self._client = client - self.dataset_id = dataset_id - - @property - def path(self): - return '/projects/%s/datasets/%s' % ( - self._client.project, self.dataset_id) - - @property - def project(self): - return self._client.project - - class _Connection(object): API_BASE_URL = 'http://example.com' From d77ed23e59f7fd494f6221e86611406046902337 Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Tue, 19 Sep 2017 14:02:02 -0700 Subject: [PATCH 0198/2016] BQ: client.extract_table starts extract job (#3991) * BQ: client.extract_table starts extract job Add system tests for extract_table. * BigQuery: client.extract_table use `**kwargs` for Python 2.7. * BQ: extract_table. Use dict.get for kwargs. job_id instead of job_name. --- .../google/cloud/bigquery/__init__.py | 2 + .../google/cloud/bigquery/_helpers.py | 76 +++++++ .../google/cloud/bigquery/client.py | 39 +++- .../google/cloud/bigquery/job.py | 186 ++++++++++++------ .../google-cloud-bigquery/tests/system.py | 101 ++++++++++ .../tests/unit/test__helpers.py | 43 ++++ .../tests/unit/test_client.py | 88 ++++++++- .../tests/unit/test_job.py | 49 +++-- 8 files changed, 484 insertions(+), 100 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/__init__.py b/packages/google-cloud-bigquery/google/cloud/bigquery/__init__.py index 00fa4445b0d0..333854035376 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/__init__.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/__init__.py @@ -32,6 +32,7 @@ from google.cloud.bigquery.client import Client from google.cloud.bigquery.dataset import AccessEntry from google.cloud.bigquery.dataset import Dataset +from google.cloud.bigquery.job import ExtractJobConfig from google.cloud.bigquery.schema import SchemaField from google.cloud.bigquery.table import Table @@ -41,6 +42,7 @@ 'ArrayQueryParameter', 'Client', 'Dataset', + 'ExtractJobConfig', 'ScalarQueryParameter', 'SchemaField', 'StructQueryParameter', diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py b/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py index e3d5c0f2f871..abe7a8934c96 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py @@ -300,6 +300,82 @@ def _time_to_json(value): _SCALAR_VALUE_TO_JSON_PARAM['TIMESTAMP'] = _timestamp_to_json_parameter +class _ApiResourceProperty(object): + """Base property implementation. + + Values will be stored on a `_properties` helper attribute of the + property's job instance. + + :type name: str + :param name: name of the property + + :type resource_name: str + :param resource_name: name of the property in the resource dictionary + """ + + def __init__(self, name, resource_name): + self.name = name + self.resource_name = resource_name + + def __get__(self, instance, owner): + """Descriptor protocol: accessor""" + if instance is None: + return self + return instance._properties.get(self.resource_name) + + def _validate(self, value): + """Subclasses override to impose validation policy.""" + pass + + def __set__(self, instance, value): + """Descriptor protocol: mutator""" + self._validate(value) + instance._properties[self.resource_name] = value + + def __delete__(self, instance): + """Descriptor protocol: deleter""" + del instance._properties[self.resource_name] + + +class _TypedApiResourceProperty(_ApiResourceProperty): + """Property implementation: validates based on value type. + + :type name: str + :param name: name of the property + + :type resource_name: str + :param resource_name: name of the property in the resource dictionary + + :type property_type: type or sequence of types + :param property_type: type to be validated + """ + def __init__(self, name, resource_name, property_type): + super(_TypedApiResourceProperty, self).__init__( + name, resource_name) + self.property_type = property_type + + def _validate(self, value): + """Ensure that 'value' is of the appropriate type. + + :raises: ValueError on a type mismatch. + """ + if value is None: + return + if not isinstance(value, self.property_type): + raise ValueError('Required type: %s' % (self.property_type,)) + + +class _EnumApiResourceProperty(_ApiResourceProperty): + """Pseudo-enumeration class. + + :type name: str + :param name: name of the property. + + :type resource_name: str + :param resource_name: name of the property in the resource dictionary + """ + + class _ConfigurationProperty(object): """Base property implementation. diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py index 1b9e9a522a15..05be0da8123d 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py @@ -16,6 +16,8 @@ from __future__ import absolute_import +import uuid + from google.api.core import page_iterator from google.cloud.client import ClientWithProject from google.cloud.bigquery._http import Connection @@ -385,27 +387,44 @@ def copy_table(self, job_id, destination, *sources): """ return CopyJob(job_id, destination, sources, client=self) - def extract_table_to_storage(self, job_id, source, *destination_uris): - """Construct a job for extracting a table into Cloud Storage files. + def extract_table(self, source, *destination_uris, **kwargs): + """Start a job to extract a table into Cloud Storage files. See https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.extract - :type job_id: str - :param job_id: Name of the job. - - :type source: :class:`google.cloud.bigquery.table.Table` + :type source: :class:`google.cloud.bigquery.table.TableReference` :param source: table to be extracted. :type destination_uris: sequence of string - :param destination_uris: URIs of CloudStorage file(s) into which - table data is to be extracted; in format - ``gs:///``. + :param destination_uris: + URIs of Cloud Storage file(s) into which table data is to be + extracted; in format ``gs:///``. + + :type kwargs: dict + :param kwargs: Additional keyword arguments. + + :Keyword Arguments: + * *job_config* + (:class:`google.cloud.bigquery.job.ExtractJobConfig`) -- + (Optional) Extra configuration options for the extract job. + * *job_id* (``str``) -- + Additional content + (Optional) The ID of the job. :rtype: :class:`google.cloud.bigquery.job.ExtractJob` :returns: a new ``ExtractJob`` instance """ - return ExtractJob(job_id, source, destination_uris, client=self) + job_config = kwargs.get('job_config') + job_id = kwargs.get('job_id') + if job_id is None: + job_id = str(uuid.uuid4()) + + job = ExtractJob( + job_id, source, list(destination_uris), client=self, + job_config=job_config) + job.begin() + return job def run_async_query(self, job_id, query, udf_resources=(), query_parameters=()): diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/job.py b/packages/google-cloud-bigquery/google/cloud/bigquery/job.py index 5807fcd25e0b..cfc861266355 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/job.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/job.py @@ -14,6 +14,7 @@ """Define API Jobs.""" +import copy import threading import six @@ -24,6 +25,7 @@ from google.cloud.exceptions import NotFound from google.cloud._helpers import _datetime_from_microseconds from google.cloud.bigquery.dataset import Dataset +from google.cloud.bigquery.dataset import DatasetReference from google.cloud.bigquery.schema import SchemaField from google.cloud.bigquery.table import Table from google.cloud.bigquery.table import TableReference @@ -35,8 +37,10 @@ from google.cloud.bigquery._helpers import StructQueryParameter from google.cloud.bigquery._helpers import UDFResource from google.cloud.bigquery._helpers import UDFResourcesProperty +from google.cloud.bigquery._helpers import _EnumApiResourceProperty from google.cloud.bigquery._helpers import _EnumProperty from google.cloud.bigquery._helpers import _query_param_from_api_repr +from google.cloud.bigquery._helpers import _TypedApiResourceProperty from google.cloud.bigquery._helpers import _TypedProperty _DONE_STATE = 'DONE' @@ -116,7 +120,7 @@ def __set__(self, instance, value): setattr(instance._configuration, self._backing_name, value) -class Compression(_EnumProperty): +class Compression(_EnumApiResourceProperty): """Pseudo-enum for ``compression`` properties.""" GZIP = 'GZIP' NONE = 'NONE' @@ -128,7 +132,7 @@ class CreateDisposition(_EnumProperty): CREATE_NEVER = 'CREATE_NEVER' -class DestinationFormat(_EnumProperty): +class DestinationFormat(_EnumApiResourceProperty): """Pseudo-enum for ``destination_format`` properties.""" CSV = 'CSV' NEWLINE_DELIMITED_JSON = 'NEWLINE_DELIMITED_JSON' @@ -401,6 +405,7 @@ def begin(self, client=None): client = self._require_client(client) path = '/projects/%s/jobs' % (self.project,) + api_response = client._connection.api_request( method='POST', path=path, data=self._build_resource()) self._set_properties(api_response) @@ -973,62 +978,126 @@ def from_api_repr(cls, resource, client): return job -class _ExtractConfiguration(object): - """User-settable configuration options for extract jobs. +class ExtractJobConfig(object): + """Configuration options for extract jobs. - Values which are ``None`` -> server defaults. + All properties in this class are optional. Values which are ``None`` -> + server defaults. """ - _compression = None - _destination_format = None - _field_delimiter = None - _print_header = None + + def __init__(self): + self._properties = {} + + compression = Compression('compression', 'compression') + """See + https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.extract.compression + """ + + destination_format = DestinationFormat( + 'destination_format', 'destinationFormat') + """See + https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.extract.destinationFormat + """ + + field_delimiter = _TypedApiResourceProperty( + 'field_delimiter', 'fieldDelimiter', six.string_types) + """See + https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.extract.fieldDelimiter + """ + + print_header = _TypedApiResourceProperty( + 'print_header', 'printHeader', bool) + """See + https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.extract.printHeader + """ + + def to_api_repr(self): + """Build an API representation of the extact job config. + + :rtype: dict + :returns: A dictionary in the format used by the BigQuery API. + """ + return copy.deepcopy(self._properties) + + @classmethod + def from_api_repr(cls, resource): + """Factory: construct a job configuration given its API representation + + :type resource: dict + :param resource: + An extract job configuration in the same representation as is + returned from the API. + + :rtype: :class:`google.cloud.bigquery.job.ExtractJobConfig` + :returns: Configuration parsed from ``resource``. + """ + config = cls() + config._properties = copy.deepcopy(resource) + return config class ExtractJob(_AsyncJob): """Asynchronous job: extract data from a table into Cloud Storage. :type job_id: str - :param job_id: the job's ID, within the project belonging to ``client``. + :param job_id: the job's ID - :type source: :class:`google.cloud.bigquery.table.Table` + :type source: :class:`google.cloud.bigquery.table.TableReference` :param source: Table into which data is to be loaded. :type destination_uris: list of string - :param destination_uris: URIs describing Cloud Storage blobs into which - extracted data will be written, in format - ``gs:///``. + :param destination_uris: + URIs describing where the extracted data will be written in Cloud + Storage, using the format ``gs:///``. :type client: :class:`google.cloud.bigquery.client.Client` - :param client: A client which holds credentials and project configuration - for the dataset (which requires a project). + :param client: + A client which holds credentials and project configuration. + + :type job_config: :class:`~google.cloud.bigquery.job.ExtractJobConfig` + :param job_config: + (Optional) Extra configuration options for the extract job. """ _JOB_TYPE = 'extract' - def __init__(self, job_id, source, destination_uris, client): + def __init__( + self, job_id, source, destination_uris, client, job_config=None): super(ExtractJob, self).__init__(job_id, client) + + if job_config is None: + job_config = ExtractJobConfig() + self.source = source self.destination_uris = destination_uris - self._configuration = _ExtractConfiguration() + self._configuration = job_config - compression = Compression('compression') - """See - https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.extract.compression - """ + @property + def compression(self): + """See + :class:`~google.cloud.bigquery.job.ExtractJobConfig.compression`. + """ + return self._configuration.compression - destination_format = DestinationFormat('destination_format') - """See - https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.extract.destinationFormat - """ + @property + def destination_format(self): + """See + :class:`~google.cloud.bigquery.job.ExtractJobConfig.destination_format`. + """ + return self._configuration.destination_format - field_delimiter = _TypedProperty('field_delimiter', six.string_types) - """See - https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.extract.fieldDelimiter - """ + @property + def field_delimiter(self): + """See + :class:`~google.cloud.bigquery.job.ExtractJobConfig.field_delimiter`. + """ + return self._configuration.field_delimiter - print_header = _TypedProperty('print_header', bool) - """See - https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.extract.printHeader - """ + @property + def print_header(self): + """See + :class:`~google.cloud.bigquery.job.ExtractJobConfig.print_header`. + """ + return self._configuration.print_header @property def destination_uri_file_counts(self): @@ -1046,50 +1115,34 @@ def destination_uri_file_counts(self): result = int(result) return result - def _populate_config_resource(self, configuration): - """Helper for _build_resource: copy config properties to resource""" - if self.compression is not None: - configuration['compression'] = self.compression - if self.destination_format is not None: - configuration['destinationFormat'] = self.destination_format - if self.field_delimiter is not None: - configuration['fieldDelimiter'] = self.field_delimiter - if self.print_header is not None: - configuration['printHeader'] = self.print_header - def _build_resource(self): """Generate a resource for :meth:`begin`.""" source_ref = { - 'projectId': self.source.project, - 'datasetId': self.source.dataset_id, + 'projectId': self.source.dataset.project, + 'datasetId': self.source.dataset.dataset_id, 'tableId': self.source.table_id, } + configuration = self._configuration.to_api_repr() + configuration['sourceTable'] = source_ref + configuration['destinationUris'] = self.destination_uris + resource = { 'jobReference': { 'projectId': self.project, 'jobId': self.job_id, }, 'configuration': { - self._JOB_TYPE: { - 'sourceTable': source_ref, - 'destinationUris': self.destination_uris, - }, + self._JOB_TYPE: configuration, }, } - configuration = resource['configuration'][self._JOB_TYPE] - self._populate_config_resource(configuration) return resource def _copy_configuration_properties(self, configuration): """Helper: assign subclass configuration properties in cleaned.""" - self.compression = configuration.get('compression') - self.destination_format = configuration.get('destinationFormat') - self.field_delimiter = configuration.get('fieldDelimiter') - self.print_header = _bool_or_none( - configuration.get('printHeader')) + self._configuration._properties = copy.deepcopy(configuration) @classmethod def from_api_repr(cls, resource, client): @@ -1110,13 +1163,16 @@ def from_api_repr(cls, resource, client): :rtype: :class:`google.cloud.bigquery.job.ExtractJob` :returns: Job parsed from ``resource``. """ - job_id, config = cls._get_resource_config(resource) - source_config = config['sourceTable'] - dataset = Dataset(source_config['datasetId'], client) - table_ref = TableReference(dataset, source_config['tableId']) - source = Table(table_ref, client=client) - destination_uris = config['destinationUris'] - job = cls(job_id, source, destination_uris, client=client) + job_id, config_resource = cls._get_resource_config(resource) + config = ExtractJobConfig.from_api_repr(config_resource) + source_config = config_resource['sourceTable'] + dataset = DatasetReference( + source_config['projectId'], source_config['datasetId']) + source = dataset.table(source_config['tableId']) + destination_uris = config_resource['destinationUris'] + + job = cls( + job_id, source, destination_uris, client=client, job_config=config) job._set_properties(resource) return job diff --git a/packages/google-cloud-bigquery/tests/system.py b/packages/google-cloud-bigquery/tests/system.py index d40688104aba..91ddca876853 100644 --- a/packages/google-cloud-bigquery/tests/system.py +++ b/packages/google-cloud-bigquery/tests/system.py @@ -543,6 +543,107 @@ def test_load_table_from_storage_w_autodetect_schema(self): self.assertEqual( sorted(actual_rows, key=by_age), sorted(rows, key=by_age)) + def _load_table_for_extract_table( + self, storage_client, rows, bucket_name, blob_name, table): + from google.cloud._testing import _NamedTemporaryFile + + local_id = unique_resource_id() + gs_url = 'gs://{}/{}'.format(bucket_name, blob_name) + + # In the **very** rare case the bucket name is reserved, this + # fails with a ConnectionError. + bucket = storage_client.create_bucket(bucket_name) + self.to_delete.append(bucket) + blob = bucket.blob(blob_name) + + with _NamedTemporaryFile() as temp: + with open(temp.name, 'w') as csv_write: + writer = csv.writer(csv_write) + writer.writerow(('Full Name', 'Age')) + writer.writerows(rows) + + with open(temp.name, 'rb') as csv_read: + blob.upload_from_file(csv_read, content_type='text/csv') + self.to_delete.insert(0, blob) + + dataset = retry_403(Config.CLIENT.create_dataset)( + Dataset(table.dataset.dataset_id)) + self.to_delete.append(dataset) + table = dataset.table(table.table_id) + self.to_delete.insert(0, table) + job = Config.CLIENT.load_table_from_storage( + 'bq_extract_storage_test_' + local_id, table, gs_url) + job.autodetect = True + job.begin() + # Allow for 90 seconds of "warm up" before rows visible. See + # https://cloud.google.com/bigquery/streaming-data-into-bigquery#dataavailability + # 8 tries -> 1 + 2 + 4 + 8 + 16 + 32 + 64 = 127 seconds + retry = RetryInstanceState(_job_done, max_tries=8) + retry(job.reload)() + + def test_extract_table(self): + from google.cloud.storage import Client as StorageClient + + storage_client = StorageClient() + local_id = unique_resource_id() + bucket_name = 'bq_extract_test' + local_id + blob_name = 'person_ages.csv' + dataset_id = _make_dataset_id('load_gcs_then_extract') + table_id = 'test_table' + table = Config.CLIENT.dataset(dataset_id).table(table_id) + rows = [ + ('Phred Phlyntstone', 32), + ('Bharney Rhubble', 33), + ('Wylma Phlyntstone', 29), + ('Bhettye Rhubble', 27), + ] + self._load_table_for_extract_table( + storage_client, rows, bucket_name, blob_name, table) + bucket = storage_client.bucket(bucket_name) + destination_blob_name = 'person_ages_out.csv' + destination = bucket.blob(destination_blob_name) + destination_uri = 'gs://{}/person_ages_out.csv'.format(bucket_name) + + job = Config.CLIENT.extract_table(table, destination_uri) + job.result() + + self.to_delete.insert(0, destination) + got = destination.download_as_string().decode('utf-8') + self.assertIn('Bharney Rhubble', got) + + def test_extract_table_w_job_config(self): + from google.cloud.storage import Client as StorageClient + + storage_client = StorageClient() + local_id = unique_resource_id() + bucket_name = 'bq_extract_test' + local_id + blob_name = 'person_ages.csv' + dataset_id = _make_dataset_id('load_gcs_then_extract') + table_id = 'test_table' + table = Config.CLIENT.dataset(dataset_id).table(table_id) + rows = [ + ('Phred Phlyntstone', 32), + ('Bharney Rhubble', 33), + ('Wylma Phlyntstone', 29), + ('Bhettye Rhubble', 27), + ] + self._load_table_for_extract_table( + storage_client, rows, bucket_name, blob_name, table) + bucket = storage_client.bucket(bucket_name) + destination_blob_name = 'person_ages_out.csv' + destination = bucket.blob(destination_blob_name) + destination_uri = 'gs://{}/person_ages_out.csv'.format(bucket_name) + + job_config = bigquery.ExtractJobConfig() + job_config.destination_format = 'NEWLINE_DELIMITED_JSON' + job = Config.CLIENT.extract_table( + table, destination_uri, job_config=job_config) + job.result() + + self.to_delete.insert(0, destination) + got = destination.download_as_string().decode('utf-8') + self.assertIn('"Bharney Rhubble"', got) + def test_job_cancel(self): DATASET_ID = _make_dataset_id('job_cancel') JOB_NAME = 'fetch_' + DATASET_ID diff --git a/packages/google-cloud-bigquery/tests/unit/test__helpers.py b/packages/google-cloud-bigquery/tests/unit/test__helpers.py index 091df8f7355a..3d40f38a5799 100644 --- a/packages/google-cloud-bigquery/tests/unit/test__helpers.py +++ b/packages/google-cloud-bigquery/tests/unit/test__helpers.py @@ -763,6 +763,49 @@ def __init__(self): self.assertIsNone(wrapper._configuration._attr) +class Test_TypedApiResourceProperty(unittest.TestCase): + + @staticmethod + def _get_target_class(): + from google.cloud.bigquery._helpers import _TypedApiResourceProperty + + return _TypedApiResourceProperty + + def _make_one(self, *args, **kw): + return self._get_target_class()(*args, **kw) + + def test_it(self): + + class Wrapper(object): + attr = self._make_one('attr', 'back', int) + + def __init__(self): + self._properties = {} + + self.assertIsNotNone(Wrapper.attr) + + wrapper = Wrapper() + with self.assertRaises(ValueError): + wrapper.attr = 'BOGUS' + + wrapper.attr = 42 + self.assertEqual(wrapper.attr, 42) + self.assertEqual(wrapper._properties['back'], 42) + + wrapper.attr = None + self.assertIsNone(wrapper.attr) + self.assertIsNone(wrapper._properties['back']) + + wrapper.attr = 23 + self.assertEqual(wrapper.attr, 23) + self.assertEqual(wrapper._properties['back'], 23) + + del wrapper.attr + self.assertIsNone(wrapper.attr) + with self.assertRaises(KeyError): + wrapper._properties['back'] + + class Test_TypedProperty(unittest.TestCase): @staticmethod diff --git a/packages/google-cloud-bigquery/tests/unit/test_client.py b/packages/google-cloud-bigquery/tests/unit/test_client.py index 12282e47d931..d49d8ba4391c 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_client.py +++ b/packages/google-cloud-bigquery/tests/unit/test_client.py @@ -15,6 +15,7 @@ import unittest import mock +import six def _make_credentials(): @@ -714,26 +715,105 @@ def test_copy_table(self): self.assertEqual(list(job.sources), [source]) self.assertIs(job.destination, destination) - def test_extract_table_to_storage(self): + def test_extract_table(self): from google.cloud.bigquery.job import ExtractJob PROJECT = 'PROJECT' - JOB = 'job_name' - DATASET = 'dataset_name' + JOB = 'job_id' + DATASET = 'dataset_id' SOURCE = 'source_table' DESTINATION = 'gs://bucket_name/object_name' + RESOURCE = { + 'jobReference': { + 'projectId': PROJECT, + 'jobId': JOB, + }, + 'configuration': { + 'extract': { + 'sourceTable': { + 'projectId': PROJECT, + 'datasetId': DATASET, + 'tableId': SOURCE, + }, + 'destinationUris': [DESTINATION], + }, + }, + } creds = _make_credentials() http = object() client = self._make_one(project=PROJECT, credentials=creds, _http=http) + conn = client._connection = _Connection(RESOURCE) dataset = client.dataset(DATASET) source = dataset.table(SOURCE) - job = client.extract_table_to_storage(JOB, source, DESTINATION) + + job = client.extract_table(source, DESTINATION, job_id=JOB) + + # Check that extract_table actually starts the job. + self.assertEqual(len(conn._requested), 1) + req = conn._requested[0] + self.assertEqual(req['method'], 'POST') + self.assertEqual(req['path'], '/projects/PROJECT/jobs') + + # Check the job resource. self.assertIsInstance(job, ExtractJob) self.assertIs(job._client, client) self.assertEqual(job.job_id, JOB) self.assertEqual(job.source, source) self.assertEqual(list(job.destination_uris), [DESTINATION]) + def test_extract_table_generated_job_id(self): + from google.cloud.bigquery.job import ExtractJob + from google.cloud.bigquery.job import ExtractJobConfig + from google.cloud.bigquery.job import DestinationFormat + + PROJECT = 'PROJECT' + JOB = 'job_id' + DATASET = 'dataset_id' + SOURCE = 'source_table' + DESTINATION = 'gs://bucket_name/object_name' + RESOURCE = { + 'jobReference': { + 'projectId': PROJECT, + 'jobId': JOB, + }, + 'configuration': { + 'extract': { + 'sourceTable': { + 'projectId': PROJECT, + 'datasetId': DATASET, + 'tableId': SOURCE, + }, + 'destinationUris': [DESTINATION], + 'destinationFormat': 'NEWLINE_DELIMITED_JSON', + }, + }, + } + creds = _make_credentials() + http = object() + client = self._make_one(project=PROJECT, credentials=creds, _http=http) + conn = client._connection = _Connection(RESOURCE) + dataset = client.dataset(DATASET) + source = dataset.table(SOURCE) + job_config = ExtractJobConfig() + job_config.destination_format = ( + DestinationFormat.NEWLINE_DELIMITED_JSON) + + job = client.extract_table(source, DESTINATION, job_config=job_config) + + # Check that extract_table actually starts the job. + self.assertEqual(len(conn._requested), 1) + req = conn._requested[0] + self.assertEqual(req['method'], 'POST') + self.assertEqual(req['path'], '/projects/PROJECT/jobs') + self.assertIsInstance( + req['data']['jobReference']['jobId'], six.string_types) + + # Check the job resource. + self.assertIsInstance(job, ExtractJob) + self.assertIs(job._client, client) + self.assertEqual(job.source, source) + self.assertEqual(list(job.destination_uris), [DESTINATION]) + def test_run_async_query_defaults(self): from google.cloud.bigquery.job import QueryJob diff --git a/packages/google-cloud-bigquery/tests/unit/test_job.py b/packages/google-cloud-bigquery/tests/unit/test_job.py index 11f4dec9870c..1da83260f06f 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_job.py +++ b/packages/google-cloud-bigquery/tests/unit/test_job.py @@ -17,6 +17,9 @@ from six.moves import http_client import unittest +from google.cloud.bigquery.job import ExtractJobConfig +from google.cloud.bigquery.dataset import DatasetReference + class Test__bool_or_none(unittest.TestCase): @@ -1217,31 +1220,31 @@ def _verifyResourceProperties(self, job, resource): self.assertEqual(job.destination_uris, config['destinationUris']) table_ref = config['sourceTable'] - self.assertEqual(job.source.project, table_ref['projectId']) - self.assertEqual(job.source.dataset_id, table_ref['datasetId']) + self.assertEqual(job.source.dataset.project, table_ref['projectId']) + self.assertEqual(job.source.dataset.dataset_id, table_ref['datasetId']) self.assertEqual(job.source.table_id, table_ref['tableId']) if 'compression' in config: - self.assertEqual(job.compression, - config['compression']) + self.assertEqual( + job.compression, config['compression']) else: self.assertIsNone(job.compression) if 'destinationFormat' in config: - self.assertEqual(job.destination_format, - config['destinationFormat']) + self.assertEqual( + job.destination_format, config['destinationFormat']) else: self.assertIsNone(job.destination_format) if 'fieldDelimiter' in config: - self.assertEqual(job.field_delimiter, - config['fieldDelimiter']) + self.assertEqual( + job.field_delimiter, config['fieldDelimiter']) else: self.assertIsNone(job.field_delimiter) if 'printHeader' in config: - self.assertEqual(job.print_header, - config['printHeader']) + self.assertEqual( + job.print_header, config['printHeader']) else: self.assertIsNone(job.print_header) @@ -1260,7 +1263,7 @@ def test_ctor(self): self._verifyInitialReadonlyProperties(job) - # set/read from resource['configuration']['copy'] + # set/read from resource['configuration']['extract'] self.assertIsNone(job.compression) self.assertIsNone(job.destination_format) self.assertIsNone(job.field_delimiter) @@ -1350,7 +1353,8 @@ def test_begin_w_bound_client(self): del RESOURCE['user_email'] conn = _Connection(RESOURCE) client = _Client(project=self.PROJECT, connection=conn) - source = _Table(self.SOURCE_TABLE) + source_dataset = DatasetReference(self.PROJECT, self.DS_ID) + source = source_dataset.table(self.SOURCE_TABLE) job = self._make_one(self.JOB_NAME, source, [self.DESTINATION_URI], client) @@ -1399,14 +1403,15 @@ def test_begin_w_alternate_client(self): client1 = _Client(project=self.PROJECT, connection=conn1) conn2 = _Connection(RESOURCE) client2 = _Client(project=self.PROJECT, connection=conn2) - source = _Table(self.SOURCE_TABLE) + source_dataset = DatasetReference(self.PROJECT, self.DS_ID) + source = source_dataset.table(self.SOURCE_TABLE) + job_config = ExtractJobConfig() + job_config.compression = 'GZIP' + job_config.destination_format = 'NEWLINE_DELIMITED_JSON' + job_config.field_delimiter = '|' + job_config.print_header = False job = self._make_one(self.JOB_NAME, source, [self.DESTINATION_URI], - client1) - - job.compression = 'GZIP' - job.destination_format = 'NEWLINE_DELIMITED_JSON' - job.field_delimiter = '|' - job.print_header = False + client1, job_config) job.begin(client=client2) @@ -1467,7 +1472,8 @@ def test_reload_w_bound_client(self): RESOURCE = self._makeResource() conn = _Connection(RESOURCE) client = _Client(project=self.PROJECT, connection=conn) - source = _Table(self.SOURCE_TABLE) + source_dataset = DatasetReference(self.PROJECT, self.DS_ID) + source = source_dataset.table(self.SOURCE_TABLE) job = self._make_one(self.JOB_NAME, source, [self.DESTINATION_URI], client) @@ -1486,7 +1492,8 @@ def test_reload_w_alternate_client(self): client1 = _Client(project=self.PROJECT, connection=conn1) conn2 = _Connection(RESOURCE) client2 = _Client(project=self.PROJECT, connection=conn2) - source = _Table(self.SOURCE_TABLE) + source_dataset = DatasetReference(self.PROJECT, self.DS_ID) + source = source_dataset.table(self.SOURCE_TABLE) job = self._make_one(self.JOB_NAME, source, [self.DESTINATION_URI], client1) From 4b8a4c8b62a53c79693ebd03ab4b61f143dd14bd Mon Sep 17 00:00:00 2001 From: Alix Hamilton Date: Wed, 20 Sep 2017 09:37:55 -0700 Subject: [PATCH 0199/2016] BigQuery: Adds client.get_table() and removes table.reload() (#4004) * WIP adds client.get_table() * BigQuery: Adds client.get_table() and removes table.reload() * removes unnecessary variable * adds system test for client.get_table() --- .../google/cloud/bigquery/client.py | 15 +++++++ .../google/cloud/bigquery/table.py | 30 ++++++-------- .../google-cloud-bigquery/tests/system.py | 25 +++++++++--- .../tests/unit/test_client.py | 28 +++++++++++++ .../tests/unit/test_table.py | 39 ------------------- 5 files changed, 73 insertions(+), 64 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py index 05be0da8123d..9c0bce5eba7e 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py @@ -23,6 +23,7 @@ from google.cloud.bigquery._http import Connection from google.cloud.bigquery.dataset import Dataset from google.cloud.bigquery.dataset import DatasetReference +from google.cloud.bigquery.dataset import Table from google.cloud.bigquery.job import CopyJob from google.cloud.bigquery.job import ExtractJob from google.cloud.bigquery.job import LoadJob @@ -207,6 +208,20 @@ def get_dataset(self, dataset_ref): method='GET', path=dataset_ref.path) return Dataset.from_api_repr(api_response, self) + def get_table(self, table_ref): + """Fetch the table referenced by ``table_ref`` + + :type table_ref: + :class:`google.cloud.bigquery.table.TableReference` + :param table_ref: the table to use. + + :rtype: :class:`google.cloud.bigquery.table.Table` + :returns: a ``Table`` instance + """ + api_response = self._connection.api_request( + method='GET', path=table_ref.path) + return Table.from_api_repr(api_response, self) + def _get_query_results(self, job_id, project=None, timeout_ms=None): """Get the query results object for a query job. diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py index fe1a9d3b4ec5..08ca377dc102 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py @@ -35,7 +35,7 @@ from google.cloud.bigquery._helpers import _SCALAR_VALUE_TO_JSON_ROW -_TABLE_HAS_NO_SCHEMA = "Table has no schema: call 'table.reload()'" +_TABLE_HAS_NO_SCHEMA = "Table has no schema: call 'client.get_table()'" _MARKER = object() _DEFAULT_CHUNKSIZE = 1048576 # 1024 * 1024 B = 1 MB _BASE_UPLOAD_TEMPLATE = ( @@ -85,6 +85,15 @@ def table_id(self): """ return self._table_id + @property + def path(self): + """URL path for the table's APIs. + + :rtype: str + :returns: the path based on project, dataset and table IDs. + """ + return '%s/tables/%s' % (self._dataset_ref.path, self._table_id) + class Table(object): """Tables represent a set of rows whose values correspond to a schema. @@ -616,23 +625,6 @@ def exists(self, client=None): else: return True - def reload(self, client=None): - """API call: refresh table properties via a GET request - - See - https://cloud.google.com/bigquery/docs/reference/rest/v2/tables/get - - :type client: :class:`~google.cloud.bigquery.client.Client` or - ``NoneType`` - :param client: the client to use. If not passed, falls back to the - ``client`` stored on the current dataset. - """ - client = self._require_client(client) - - api_response = client._connection.api_request( - method='GET', path=self.path) - self._set_properties(api_response) - def patch(self, client=None, friendly_name=_MARKER, @@ -750,7 +742,7 @@ def fetch_data(self, max_results=None, page_token=None, client=None): up-to-date with the schema as defined on the back-end: if the two schemas are not identical, the values returned may be incomplete. To ensure that the local copy of the schema is - up-to-date, call :meth:`reload`. + up-to-date, call ``client.get_table``. :type max_results: int :param max_results: (Optional) Maximum number of rows to return. diff --git a/packages/google-cloud-bigquery/tests/system.py b/packages/google-cloud-bigquery/tests/system.py index 91ddca876853..f04f99703dbc 100644 --- a/packages/google-cloud-bigquery/tests/system.py +++ b/packages/google-cloud-bigquery/tests/system.py @@ -198,6 +198,21 @@ def test_create_table(self): self.assertTrue(table.exists()) self.assertEqual(table.table_id, TABLE_NAME) + def test_get_table_w_public_dataset(self): + PUBLIC = 'bigquery-public-data' + DATASET_ID = 'samples' + TABLE_ID = 'shakespeare' + table_ref = DatasetReference(PUBLIC, DATASET_ID).table(TABLE_ID) + + table = Config.CLIENT.get_table(table_ref) + + self.assertEqual(table.table_id, TABLE_ID) + self.assertEqual(table.dataset_id, DATASET_ID) + self.assertEqual(table.project, PUBLIC) + schema_names = [field.name for field in table.schema] + self.assertEqual( + schema_names, ['word', 'word_count', 'corpus', 'corpus_date']) + def test_list_tables(self): DATASET_ID = _make_dataset_id('list_tables') dataset = retry_403(Config.CLIENT.create_dataset)(Dataset(DATASET_ID)) @@ -402,8 +417,7 @@ def test_load_table_from_local_avro_file_then_dump_table(self): self.assertEqual(job.output_rows, len(ROWS)) - # Reload table to get the schema before fetching the rows. - table.reload() + table = Config.CLIENT.get_table(table) rows = self._fetch_single_page(table) by_wavelength = operator.itemgetter(1) self.assertEqual(sorted(rows, key=by_wavelength), @@ -532,7 +546,7 @@ def test_load_table_from_storage_w_autodetect_schema(self): retry = RetryInstanceState(_job_done, max_tries=8) retry(job.reload)() - table.reload() + table = Config.CLIENT.get_table(table) field_name = SchemaField( u'Full_Name', u'string', u'NULLABLE', None, ()) field_age = SchemaField(u'Age', u'integer', u'NULLABLE', None, ()) @@ -1181,9 +1195,8 @@ def test_dump_table_w_public_data(self): TABLE_NAME = 'natality' dataset = Dataset(DATASET_ID, Config.CLIENT, project=PUBLIC) - table = dataset.table(TABLE_NAME) - # Reload table to get the schema before fetching the rows. - table.reload() + table_ref = dataset.table(TABLE_NAME) + table = Config.CLIENT.get_table(table_ref) self._fetch_single_page(table) def test_large_query_w_public_data(self): diff --git a/packages/google-cloud-bigquery/tests/unit/test_client.py b/packages/google-cloud-bigquery/tests/unit/test_client.py index d49d8ba4391c..90b06af2d022 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_client.py +++ b/packages/google-cloud-bigquery/tests/unit/test_client.py @@ -383,6 +383,34 @@ def test_create_dataset_w_attrs(self): self.assertEqual(ds.description, DESCRIPTION) self.assertEqual(ds.friendly_name, FRIENDLY_NAME) + def test_get_table(self): + project = 'PROJECT' + dataset_id = 'dataset_id' + table_id = 'table-id' + path = 'projects/%s/datasets/%s/tables/%s' % ( + project, dataset_id, table_id) + creds = _make_credentials() + http = object() + client = self._make_one(project=project, credentials=creds, _http=http) + resource = { + 'id': '%s:%s:%s' % (project, dataset_id, table_id), + 'tableReference': { + 'projectId': project, + 'datasetId': dataset_id, + 'tableId': table_id + }, + } + conn = client._connection = _Connection(resource) + table_ref = client.dataset(dataset_id).table(table_id) + + table = client.get_table(table_ref) + + self.assertEqual(len(conn._requested), 1) + req = conn._requested[0] + self.assertEqual(req['method'], 'GET') + self.assertEqual(req['path'], '/%s' % path) + self.assertEqual(table.table_id, table_id) + def test_job_from_resource_unknown_type(self): PROJECT = 'PROJECT' creds = _make_credentials() diff --git a/packages/google-cloud-bigquery/tests/unit/test_table.py b/packages/google-cloud-bigquery/tests/unit/test_table.py index 7cc7bffe7080..a36a4b216c04 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_table.py +++ b/packages/google-cloud-bigquery/tests/unit/test_table.py @@ -864,45 +864,6 @@ def test_exists_hit_w_alternate_client(self): self.assertEqual(req['path'], '/%s' % PATH) self.assertEqual(req['query_params'], {'fields': 'id'}) - def test_reload_w_bound_client(self): - PATH = 'projects/%s/datasets/%s/tables/%s' % ( - self.PROJECT, self.DS_ID, self.TABLE_NAME) - RESOURCE = self._makeResource() - conn = _Connection(RESOURCE) - client = _Client(project=self.PROJECT, connection=conn) - dataset = DatasetReference(self.PROJECT, self.DS_ID) - table_ref = dataset.table(self.TABLE_NAME) - table = self._make_one(table_ref, client=client) - - table.reload() - - self.assertEqual(len(conn._requested), 1) - req = conn._requested[0] - self.assertEqual(req['method'], 'GET') - self.assertEqual(req['path'], '/%s' % PATH) - self._verifyResourceProperties(table, RESOURCE) - - def test_reload_w_alternate_client(self): - PATH = 'projects/%s/datasets/%s/tables/%s' % ( - self.PROJECT, self.DS_ID, self.TABLE_NAME) - RESOURCE = self._makeResource() - conn1 = _Connection() - client1 = _Client(project=self.PROJECT, connection=conn1) - conn2 = _Connection(RESOURCE) - client2 = _Client(project=self.PROJECT, connection=conn2) - dataset = DatasetReference(self.PROJECT, self.DS_ID) - table_ref = dataset.table(self.TABLE_NAME) - table = self._make_one(table_ref, client=client1) - - table.reload(client=client2) - - self.assertEqual(len(conn1._requested), 0) - self.assertEqual(len(conn2._requested), 1) - req = conn2._requested[0] - self.assertEqual(req['method'], 'GET') - self.assertEqual(req['path'], '/%s' % PATH) - self._verifyResourceProperties(table, RESOURCE) - def test_patch_w_invalid_expiration(self): RESOURCE = self._makeResource() conn = _Connection(RESOURCE) From 5a00b7d1bf2857928cbf37a1803bca6e1f43f321 Mon Sep 17 00:00:00 2001 From: Jonathan Amsterdam Date: Wed, 20 Sep 2017 13:54:36 -0400 Subject: [PATCH 0200/2016] bigquery: add Client.update_dataset (#4003) * bigquery: add Client.update_dataset Remove Dataset.patch and Dataset.update. * improve cover * more coverage * update system tests * more coverage * add creds to client * small changes * . * convert Python field name to API field name --- .../google/cloud/bigquery/client.py | 50 ++++++- .../google/cloud/bigquery/dataset.py | 55 -------- .../google-cloud-bigquery/tests/system.py | 31 ++--- .../tests/unit/test_client.py | 69 ++++++++++ .../tests/unit/test_dataset.py | 122 +----------------- 5 files changed, 126 insertions(+), 201 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py index 9c0bce5eba7e..1f5620f66ca8 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py @@ -190,9 +190,7 @@ def create_dataset(self, dataset): path = '/projects/%s/datasets' % (dataset.project,) api_response = self._connection.api_request( method='POST', path=path, data=dataset._build_resource()) - ds = Dataset(dataset.dataset_id, project=dataset.project, client=self) - ds._set_properties(api_response) - return ds + return Dataset.from_api_repr(api_response, self) def get_dataset(self, dataset_ref): """Fetch the dataset referenced by ``dataset_ref`` @@ -222,6 +220,52 @@ def get_table(self, table_ref): method='GET', path=table_ref.path) return Table.from_api_repr(api_response, self) + def update_dataset(self, dataset, fields): + """Change some fields of a dataset. + + Use ``fields`` to specify which fields to update. At least one field + must be provided. If a field is listed in ``fields`` and is ``None`` in + ``dataset``, it will be deleted. + + If ``dataset.etag`` is not ``None``, the update will only + succeed if the dataset on the server has the same ETag. Thus + reading a dataset with ``get_dataset``, changing its fields, + and then passing it ``update_dataset`` will ensure that the changes + will only be saved if no modifications to the dataset occurred + since the read. + + :type dataset: :class:`google.cloud.bigquery.dataset.Dataset` + :param dataset: the dataset to update. + + :type fields: sequence of string + :param fields: the fields of ``dataset`` to change, spelled as the + Dataset properties (e.g. "friendly_name"). + + :rtype: :class:`google.cloud.bigquery.dataset.Dataset` + :returns: the modified ``Dataset`` instance + :raises: ValueError for fields that cannot be updated. + + """ + if dataset.project is None: + dataset._project = self.project + path = '/projects/%s/datasets/%s' % (dataset.project, + dataset.dataset_id) + partial = {} + for f in fields: + if not hasattr(dataset, f): + raise ValueError('No Dataset field %s' % f) + # snake case to camel case + words = f.split('_') + api_field = words[0] + ''.join(map(str.capitalize, words[1:])) + partial[api_field] = getattr(dataset, f) + if dataset.etag is not None: + headers = {'If-Match': dataset.etag} + else: + headers = None + api_response = self._connection.api_request( + method='PATCH', path=path, data=partial, headers=headers) + return Dataset.from_api_repr(api_response, self) + def _get_query_results(self, job_id, project=None, timeout_ms=None): """Get the query results object for a query job. diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/dataset.py b/packages/google-cloud-bigquery/google/cloud/bigquery/dataset.py index 29dc3af19458..dabd0a129ec1 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/dataset.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/dataset.py @@ -488,61 +488,6 @@ def _build_resource(self): return resource - def patch(self, client=None, **kw): - """API call: update individual dataset properties via a PATCH request. - - See - https://cloud.google.com/bigquery/docs/reference/rest/v2/datasets/patch - - :type client: :class:`~google.cloud.bigquery.client.Client` or - ``NoneType`` - :param client: the client to use. If not passed, falls back to the - ``client`` stored on the current dataset. - - :type kw: ``dict`` - :param kw: properties to be patched. - - :raises: ValueError for invalid value types. - """ - client = self._require_client(client) - - partial = {} - - if 'default_table_expiration_ms' in kw: - value = kw['default_table_expiration_ms'] - if not isinstance(value, six.integer_types) and value is not None: - raise ValueError("Pass an integer, or None") - partial['defaultTableExpirationMs'] = value - - if 'description' in kw: - partial['description'] = kw['description'] - - if 'friendly_name' in kw: - partial['friendlyName'] = kw['friendly_name'] - - if 'location' in kw: - partial['location'] = kw['location'] - - api_response = client._connection.api_request( - method='PATCH', path=self.path, data=partial) - self._set_properties(api_response) - - def update(self, client=None): - """API call: update dataset properties via a PUT request. - - See - https://cloud.google.com/bigquery/docs/reference/rest/v2/datasets/update - - :type client: :class:`~google.cloud.bigquery.client.Client` or - ``NoneType`` - :param client: the client to use. If not passed, falls back to the - ``client`` stored on the current dataset. - """ - client = self._require_client(client) - api_response = client._connection.api_request( - method='PUT', path=self.path, data=self._build_resource()) - self._set_properties(api_response) - def delete(self, client=None): """API call: delete the dataset via a DELETE request. diff --git a/packages/google-cloud-bigquery/tests/system.py b/packages/google-cloud-bigquery/tests/system.py index f04f99703dbc..badde5e412a5 100644 --- a/packages/google-cloud-bigquery/tests/system.py +++ b/packages/google-cloud-bigquery/tests/system.py @@ -132,7 +132,7 @@ def test_get_dataset(self): self.assertEqual(got.friendly_name, 'Friendly') self.assertEqual(got.description, 'Description') - def test_patch_dataset(self): + def test_update_dataset(self): dataset = retry_403(Config.CLIENT.create_dataset)( Dataset(_make_dataset_id('patch_dataset'))) self.to_delete.append(dataset) @@ -140,27 +140,14 @@ def test_patch_dataset(self): self.assertTrue(_dataset_exists(dataset)) self.assertIsNone(dataset.friendly_name) self.assertIsNone(dataset.description) - dataset.patch(friendly_name='Friendly', description='Description') - self.assertEqual(dataset.friendly_name, 'Friendly') - self.assertEqual(dataset.description, 'Description') - - def test_update_dataset(self): - dataset = retry_403(Config.CLIENT.create_dataset)( - Dataset(_make_dataset_id('update_dataset'))) - self.to_delete.append(dataset) - - self.assertTrue(_dataset_exists(dataset)) - after = [entry for entry in dataset.access_entries - if entry.entity_id != 'projectWriters'] - dataset.access_entries = after - - retry_403(dataset.update)() - - self.assertEqual(len(dataset.access_entries), len(after)) - for found, expected in zip(dataset.access_entries, after): - self.assertEqual(found.role, expected.role) - self.assertEqual(found.entity_type, expected.entity_type) - self.assertEqual(found.entity_id, expected.entity_id) + dataset.friendly_name = 'Friendly' + dataset.description = 'Description' + ds2 = Config.CLIENT.update_dataset(dataset, + ['friendly_name', 'description']) + self.assertEqual(ds2.friendly_name, 'Friendly') + self.assertEqual(ds2.description, 'Description') + + # TODO(jba): test that read-modify-write with ETag works. def test_list_datasets(self): datasets_to_create = [ diff --git a/packages/google-cloud-bigquery/tests/unit/test_client.py b/packages/google-cloud-bigquery/tests/unit/test_client.py index 90b06af2d022..173f059374da 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_client.py +++ b/packages/google-cloud-bigquery/tests/unit/test_client.py @@ -336,6 +336,7 @@ def test_create_dataset_w_attrs(self): PATH = 'projects/%s/datasets' % PROJECT DESCRIPTION = 'DESC' FRIENDLY_NAME = 'FN' + LOCATION = 'US' USER_EMAIL = 'phred@example.com' VIEW = { 'projectId': 'my-proj', @@ -349,6 +350,8 @@ def test_create_dataset_w_attrs(self): 'id': "%s:%s" % (PROJECT, DS_ID), 'description': DESCRIPTION, 'friendlyName': FRIENDLY_NAME, + 'location': LOCATION, + 'defaultTableExpirationMs': 3600, 'access': [ {'role': 'OWNER', 'userByEmail': USER_EMAIL}, {'view': VIEW}], @@ -361,6 +364,8 @@ def test_create_dataset_w_attrs(self): ds_arg = Dataset(DS_ID, project=PROJECT, access_entries=entries) ds_arg.description = DESCRIPTION ds_arg.friendly_name = FRIENDLY_NAME + ds_arg.default_table_expiration_ms = 3600 + ds_arg.location = LOCATION ds = client.create_dataset(ds_arg) self.assertEqual(len(conn._requested), 1) req = conn._requested[0] @@ -371,6 +376,8 @@ def test_create_dataset_w_attrs(self): {'projectId': PROJECT, 'datasetId': DS_ID}, 'description': DESCRIPTION, 'friendlyName': FRIENDLY_NAME, + 'location': LOCATION, + 'defaultTableExpirationMs': 3600, 'access': [ {'role': 'OWNER', 'userByEmail': USER_EMAIL}, {'view': VIEW}], @@ -382,6 +389,8 @@ def test_create_dataset_w_attrs(self): self.assertEqual(ds.full_dataset_id, RESOURCE['id']) self.assertEqual(ds.description, DESCRIPTION) self.assertEqual(ds.friendly_name, FRIENDLY_NAME) + self.assertEqual(ds.location, LOCATION) + self.assertEqual(ds.default_table_expiration_ms, 3600) def test_get_table(self): project = 'PROJECT' @@ -411,6 +420,66 @@ def test_get_table(self): self.assertEqual(req['path'], '/%s' % path) self.assertEqual(table.table_id, table_id) + def test_update_dataset_w_invalid_field(self): + from google.cloud.bigquery.dataset import Dataset + + PROJECT = 'PROJECT' + DS_ID = 'DATASET_ID' + creds = _make_credentials() + client = self._make_one(project=PROJECT, credentials=creds) + with self.assertRaises(ValueError): + client.update_dataset(Dataset(DS_ID), ["foo"]) + + def test_update_dataset(self): + from google.cloud.bigquery.dataset import Dataset + + PROJECT = 'PROJECT' + DS_ID = 'DATASET_ID' + PATH = 'projects/%s/datasets/%s' % (PROJECT, DS_ID) + DESCRIPTION = 'DESCRIPTION' + FRIENDLY_NAME = 'TITLE' + LOCATION = 'loc' + EXP = 17 + RESOURCE = { + 'datasetReference': + {'projectId': PROJECT, 'datasetId': DS_ID}, + 'etag': "etag", + 'description': DESCRIPTION, + 'friendlyName': FRIENDLY_NAME, + 'location': LOCATION, + 'defaultTableExpirationMs': EXP, + } + creds = _make_credentials() + client = self._make_one(project=PROJECT, credentials=creds) + conn = client._connection = _Connection(RESOURCE, RESOURCE) + ds = Dataset(DS_ID, project=PROJECT) + ds.description = DESCRIPTION + ds.friendly_name = FRIENDLY_NAME + ds.location = LOCATION + ds.default_table_expiration_ms = EXP + ds2 = client.update_dataset( + ds, ['description', 'friendly_name', 'location']) + self.assertEqual(len(conn._requested), 1) + req = conn._requested[0] + self.assertEqual(req['method'], 'PATCH') + SENT = { + 'description': DESCRIPTION, + 'friendlyName': FRIENDLY_NAME, + 'location': LOCATION, + } + self.assertEqual(req['data'], SENT) + self.assertEqual(req['path'], '/' + PATH) + self.assertIsNone(req['headers']) + self.assertEqual(ds2.description, ds.description) + self.assertEqual(ds2.friendly_name, ds.friendly_name) + self.assertEqual(ds2.location, ds.location) + + # ETag becomes If-Match header. + ds._properties['etag'] = 'etag' + client.update_dataset(ds, []) + req = conn._requested[1] + self.assertEqual(req['headers']['If-Match'], 'etag') + def test_job_from_resource_unknown_type(self): PROJECT = 'PROJECT' creds = _make_credentials() diff --git a/packages/google-cloud-bigquery/tests/unit/test_dataset.py b/packages/google-cloud-bigquery/tests/unit/test_dataset.py index 4c5cfb57265e..3f2a809955fb 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_dataset.py +++ b/packages/google-cloud-bigquery/tests/unit/test_dataset.py @@ -145,6 +145,7 @@ def _makeResource(self): 'lastModifiedTime': self.WHEN_TS * 1000, 'location': 'US', 'selfLink': self.RESOURCE_URL, + 'defaultTableExpirationMs': 3600, 'access': [ {'role': 'OWNER', 'userByEmail': USER_EMAIL}, {'role': 'OWNER', 'groupByEmail': GROUP_EMAIL}, @@ -386,127 +387,6 @@ def test__parse_access_entries_w_extra_keys(self): with self.assertRaises(ValueError): dataset._parse_access_entries(ACCESS) - def test_patch_w_invalid_expiration(self): - RESOURCE = self._makeResource() - conn = _Connection(RESOURCE) - client = _Client(project=self.PROJECT, connection=conn) - dataset = self._make_one(self.DS_ID, client=client) - - with self.assertRaises(ValueError): - dataset.patch(default_table_expiration_ms='BOGUS') - - def test_patch_w_bound_client(self): - PATH = 'projects/%s/datasets/%s' % (self.PROJECT, self.DS_ID) - DESCRIPTION = 'DESCRIPTION' - TITLE = 'TITLE' - RESOURCE = self._makeResource() - RESOURCE['description'] = DESCRIPTION - RESOURCE['friendlyName'] = TITLE - conn = _Connection(RESOURCE) - client = _Client(project=self.PROJECT, connection=conn) - dataset = self._make_one(self.DS_ID, client=client) - - dataset.patch(description=DESCRIPTION, friendly_name=TITLE) - - self.assertEqual(len(conn._requested), 1) - req = conn._requested[0] - self.assertEqual(req['method'], 'PATCH') - SENT = { - 'description': DESCRIPTION, - 'friendlyName': TITLE, - } - self.assertEqual(req['data'], SENT) - self.assertEqual(req['path'], '/%s' % PATH) - self._verify_resource_properties(dataset, RESOURCE) - - def test_patch_w_alternate_client(self): - PATH = 'projects/%s/datasets/%s' % (self.PROJECT, self.DS_ID) - DEF_TABLE_EXP = 12345 - LOCATION = 'EU' - RESOURCE = self._makeResource() - RESOURCE['defaultTableExpirationMs'] = str(DEF_TABLE_EXP) - RESOURCE['location'] = LOCATION - conn1 = _Connection() - CLIENT1 = _Client(project=self.PROJECT, connection=conn1) - conn2 = _Connection(RESOURCE) - CLIENT2 = _Client(project=self.PROJECT, connection=conn2) - dataset = self._make_one(self.DS_ID, client=CLIENT1) - - dataset.patch(client=CLIENT2, - default_table_expiration_ms=DEF_TABLE_EXP, - location=LOCATION) - - self.assertEqual(len(conn1._requested), 0) - self.assertEqual(len(conn2._requested), 1) - req = conn2._requested[0] - self.assertEqual(req['method'], 'PATCH') - self.assertEqual(req['path'], '/%s' % PATH) - SENT = { - 'defaultTableExpirationMs': DEF_TABLE_EXP, - 'location': LOCATION, - } - self.assertEqual(req['data'], SENT) - self._verify_resource_properties(dataset, RESOURCE) - - def test_update_w_bound_client(self): - PATH = 'projects/%s/datasets/%s' % (self.PROJECT, self.DS_ID) - DESCRIPTION = 'DESCRIPTION' - TITLE = 'TITLE' - RESOURCE = self._makeResource() - RESOURCE['description'] = DESCRIPTION - RESOURCE['friendlyName'] = TITLE - conn = _Connection(RESOURCE) - client = _Client(project=self.PROJECT, connection=conn) - dataset = self._make_one(self.DS_ID, client=client) - dataset.description = DESCRIPTION - dataset.friendly_name = TITLE - - dataset.update() - - self.assertEqual(len(conn._requested), 1) - req = conn._requested[0] - self.assertEqual(req['method'], 'PUT') - SENT = { - 'datasetReference': - {'projectId': self.PROJECT, 'datasetId': self.DS_ID}, - 'description': DESCRIPTION, - 'friendlyName': TITLE, - } - self.assertEqual(req['data'], SENT) - self.assertEqual(req['path'], '/%s' % PATH) - self._verify_resource_properties(dataset, RESOURCE) - - def test_update_w_alternate_client(self): - PATH = 'projects/%s/datasets/%s' % (self.PROJECT, self.DS_ID) - DEF_TABLE_EXP = 12345 - LOCATION = 'EU' - RESOURCE = self._makeResource() - RESOURCE['defaultTableExpirationMs'] = 12345 - RESOURCE['location'] = LOCATION - conn1 = _Connection() - CLIENT1 = _Client(project=self.PROJECT, connection=conn1) - conn2 = _Connection(RESOURCE) - CLIENT2 = _Client(project=self.PROJECT, connection=conn2) - dataset = self._make_one(self.DS_ID, client=CLIENT1) - dataset.default_table_expiration_ms = DEF_TABLE_EXP - dataset.location = LOCATION - - dataset.update(client=CLIENT2) - - self.assertEqual(len(conn1._requested), 0) - self.assertEqual(len(conn2._requested), 1) - req = conn2._requested[0] - self.assertEqual(req['method'], 'PUT') - self.assertEqual(req['path'], '/%s' % PATH) - SENT = { - 'datasetReference': - {'projectId': self.PROJECT, 'datasetId': self.DS_ID}, - 'defaultTableExpirationMs': 12345, - 'location': 'EU', - } - self.assertEqual(req['data'], SENT) - self._verify_resource_properties(dataset, RESOURCE) - def test_delete_w_bound_client(self): PATH = 'projects/%s/datasets/%s' % (self.PROJECT, self.DS_ID) conn = _Connection({}) From ff57b5ce66ea8002138f2fe49158af6aa5168dc1 Mon Sep 17 00:00:00 2001 From: Alix Hamilton Date: Wed, 20 Sep 2017 11:18:25 -0700 Subject: [PATCH 0201/2016] BigQuery: Remove dataset property from TableReference and add project/dataset_id properties (#4011) * adds dataset_id and project properties to TableReference * Remove dataset property from Table and TableReference --- .../google/cloud/bigquery/job.py | 4 +- .../google/cloud/bigquery/table.py | 57 ++++++++++++------- .../google-cloud-bigquery/tests/system.py | 2 +- .../tests/unit/test_dataset.py | 9 ++- .../tests/unit/test_job.py | 14 ++--- .../tests/unit/test_table.py | 22 ++++--- 6 files changed, 64 insertions(+), 44 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/job.py b/packages/google-cloud-bigquery/google/cloud/bigquery/job.py index cfc861266355..025b839bc59c 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/job.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/job.py @@ -1119,8 +1119,8 @@ def _build_resource(self): """Generate a resource for :meth:`begin`.""" source_ref = { - 'projectId': self.source.dataset.project, - 'datasetId': self.source.dataset.dataset_id, + 'projectId': self.source.project, + 'datasetId': self.source.dataset_id, 'tableId': self.source.table_id, } diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py index 08ca377dc102..37e5bddbd7aa 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py @@ -64,17 +64,27 @@ class TableReference(object): """ def __init__(self, dataset_ref, table_id): - self._dataset_ref = dataset_ref + self._project = dataset_ref.project + self._dataset_id = dataset_ref.dataset_id self._table_id = table_id @property - def dataset(self): - """Pointer to the dataset. + def project(self): + """Project bound to the table. - :rtype: :class:`google.cloud.bigquery.dataset.DatasetReference` - :returns: a pointer to the dataset. + :rtype: str + :returns: the project (derived from the dataset reference). """ - return self._dataset_ref + return self._project + + @property + def dataset_id(self): + """ID of dataset containing the table. + + :rtype: str + :returns: the ID (derived from the dataset reference). + """ + return self._dataset_id @property def table_id(self): @@ -92,7 +102,8 @@ def path(self): :rtype: str :returns: the path based on project, dataset and table IDs. """ - return '%s/tables/%s' % (self._dataset_ref.path, self._table_id) + return '/projects/%s/datasets/%s/tables/%s' % ( + self._project, self._dataset_id, self._table_id) class Table(object): @@ -111,8 +122,9 @@ class Table(object): _schema = None def __init__(self, table_ref, schema=(), client=None): + self._project = table_ref.project self._table_id = table_ref.table_id - self._dataset = table_ref.dataset + self._dataset_id = table_ref.dataset_id self._properties = {} # Let the @property do validation. self.schema = schema @@ -125,7 +137,7 @@ def project(self): :rtype: str :returns: the project (derived from the dataset). """ - return self._dataset.project + return self._project @property def dataset_id(self): @@ -134,7 +146,7 @@ def dataset_id(self): :rtype: str :returns: the ID (derived from the dataset). """ - return self._dataset.dataset_id + return self._dataset_id @property def table_id(self): @@ -152,7 +164,8 @@ def path(self): :rtype: str :returns: the path based on project, dataset and table IDs. """ - return '%s/tables/%s' % (self._dataset.path, self.table_id) + return '/projects/%s/datasets/%s/tables/%s' % ( + self._project, self._dataset_id, self._table_id) @property def schema(self): @@ -550,8 +563,8 @@ def _build_resource(self): """Generate a resource for ``create`` or ``update``.""" resource = { 'tableReference': { - 'projectId': self._dataset.project, - 'datasetId': self._dataset.dataset_id, + 'projectId': self._project, + 'datasetId': self._dataset_id, 'tableId': self.table_id}, } if self.description is not None: @@ -596,7 +609,7 @@ def create(self, client=None): """ client = self._require_client(client) path = '/projects/%s/datasets/%s/tables' % ( - self._dataset.project, self._dataset.dataset_id) + self._project, self._dataset_id) api_response = client._connection.api_request( method='POST', path=path, data=self._build_resource()) self._set_properties(api_response) @@ -1188,7 +1201,8 @@ def upload_from_file(self, _maybe_rewind(file_obj, rewind=rewind) _check_mode(file_obj) metadata = _get_upload_metadata( - source_format, self._schema, self._dataset, self.table_id) + source_format, self._schema, self._project, + self._dataset_id, self.table_id) _configure_job_metadata(metadata, allow_jagged_rows, allow_quoted_newlines, create_disposition, encoding, field_delimiter, @@ -1353,7 +1367,7 @@ def _get_upload_headers(user_agent): } -def _get_upload_metadata(source_format, schema, dataset, table_id): +def _get_upload_metadata(source_format, schema, project, dataset_id, table_id): """Get base metadata for creating a table. :type source_format: str @@ -1363,8 +1377,11 @@ def _get_upload_metadata(source_format, schema, dataset, table_id): :type schema: list :param schema: List of :class:`SchemaField` associated with a table. - :type dataset: :class:`~google.cloud.bigquery.dataset.Dataset` - :param dataset: A dataset which contains a table. + :type project: str + :param table_id: The project bound to the table. + + :type dataset_id: str + :param table_id: The dataset_id of the dataset. :type table_id: str :param table_id: The table_id of the table. @@ -1375,8 +1392,8 @@ def _get_upload_metadata(source_format, schema, dataset, table_id): load_config = { 'sourceFormat': source_format, 'destinationTable': { - 'projectId': dataset.project, - 'datasetId': dataset.dataset_id, + 'projectId': project, + 'datasetId': dataset_id, 'tableId': table_id, }, } diff --git a/packages/google-cloud-bigquery/tests/system.py b/packages/google-cloud-bigquery/tests/system.py index badde5e412a5..1e9a723631a3 100644 --- a/packages/google-cloud-bigquery/tests/system.py +++ b/packages/google-cloud-bigquery/tests/system.py @@ -568,7 +568,7 @@ def _load_table_for_extract_table( self.to_delete.insert(0, blob) dataset = retry_403(Config.CLIENT.create_dataset)( - Dataset(table.dataset.dataset_id)) + Dataset(table.dataset_id)) self.to_delete.append(dataset) table = dataset.table(table.table_id) self.to_delete.insert(0, table) diff --git a/packages/google-cloud-bigquery/tests/unit/test_dataset.py b/packages/google-cloud-bigquery/tests/unit/test_dataset.py index 3f2a809955fb..28c6790b793a 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_dataset.py +++ b/packages/google-cloud-bigquery/tests/unit/test_dataset.py @@ -104,7 +104,8 @@ def test_ctor_defaults(self): def test_table(self): dataset_ref = self._make_one('some-project-1', 'dataset_1') table_ref = dataset_ref.table('table_1') - self.assertIs(table_ref.dataset, dataset_ref) + self.assertEqual(table_ref.dataset_id, 'dataset_1') + self.assertEqual(table_ref.project, 'some-project-1') self.assertEqual(table_ref.table_id, 'table_1') @@ -543,7 +544,8 @@ def test_table_wo_schema(self): table = dataset.table('table_id') self.assertIsInstance(table, Table) self.assertEqual(table.table_id, 'table_id') - self.assertIs(table._dataset, dataset) + self.assertEqual(table.dataset_id, self.DS_ID) + self.assertEqual(table.project, self.PROJECT) self.assertEqual(table.schema, []) def test_table_w_schema(self): @@ -558,7 +560,8 @@ def test_table_w_schema(self): table = dataset.table('table_id', schema=[full_name, age]) self.assertIsInstance(table, Table) self.assertEqual(table.table_id, 'table_id') - self.assertIs(table._dataset, dataset) + self.assertEqual(table.dataset_id, self.DS_ID) + self.assertEqual(table.project, self.PROJECT) self.assertEqual(table.schema, [full_name, age]) diff --git a/packages/google-cloud-bigquery/tests/unit/test_job.py b/packages/google-cloud-bigquery/tests/unit/test_job.py index 1da83260f06f..e53eb08970fc 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_job.py +++ b/packages/google-cloud-bigquery/tests/unit/test_job.py @@ -1220,8 +1220,8 @@ def _verifyResourceProperties(self, job, resource): self.assertEqual(job.destination_uris, config['destinationUris']) table_ref = config['sourceTable'] - self.assertEqual(job.source.dataset.project, table_ref['projectId']) - self.assertEqual(job.source.dataset.dataset_id, table_ref['datasetId']) + self.assertEqual(job.source.project, table_ref['projectId']) + self.assertEqual(job.source.dataset_id, table_ref['datasetId']) self.assertEqual(job.source.table_id, table_ref['tableId']) if 'compression' in config: @@ -1908,7 +1908,6 @@ def test_statement_type(self): self.assertEqual(job.statement_type, statement_type) def test_referenced_tables(self): - from google.cloud.bigquery.dataset import Dataset from google.cloud.bigquery.table import Table ref_tables_resource = [{ @@ -1942,24 +1941,21 @@ def test_referenced_tables(self): self.assertIsInstance(local1, Table) self.assertEqual(local1.table_id, 'local1') - self.assertIsInstance(local1._dataset, Dataset) self.assertEqual(local1.dataset_id, 'dataset') self.assertEqual(local1.project, self.PROJECT) - self.assertIs(local1._dataset._client, client) + self.assertIs(local1._client, client) self.assertIsInstance(local2, Table) self.assertEqual(local2.table_id, 'local2') - self.assertIsInstance(local2._dataset, Dataset) self.assertEqual(local2.dataset_id, 'dataset') self.assertEqual(local2.project, self.PROJECT) - self.assertIs(local2._dataset._client, client) + self.assertIs(local2._client, client) self.assertIsInstance(remote, Table) self.assertEqual(remote.table_id, 'other-table') - self.assertIsInstance(remote._dataset, Dataset) self.assertEqual(remote.dataset_id, 'other-dataset') self.assertEqual(remote.project, 'other-project-123') - self.assertIs(remote._dataset._client, client) + self.assertIs(remote._client, client) def test_undeclared_query_paramters(self): from google.cloud.bigquery._helpers import ArrayQueryParameter diff --git a/packages/google-cloud-bigquery/tests/unit/test_table.py b/packages/google-cloud-bigquery/tests/unit/test_table.py index a36a4b216c04..2327d11b1ed3 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_table.py +++ b/packages/google-cloud-bigquery/tests/unit/test_table.py @@ -55,7 +55,7 @@ def test_ctor_defaults(self): dataset_ref = DatasetReference('project_1', 'dataset_1') table_ref = self._make_one(dataset_ref, 'table_1') - self.assertIs(table_ref.dataset, dataset_ref) + self.assertEqual(table_ref.dataset_id, dataset_ref.dataset_id) self.assertEqual(table_ref.table_id, 'table_1') @@ -173,7 +173,6 @@ def test_ctor(self): table = self._make_one(table_ref, client=client) self.assertEqual(table.table_id, self.TABLE_NAME) - self.assertIs(table._dataset, dataset) self.assertEqual(table.project, self.PROJECT) self.assertEqual(table.dataset_id, self.DS_ID) self.assertEqual( @@ -1710,7 +1709,8 @@ def _initiate_resumable_upload_helper(self, num_retries=None): data = b'goodbye gudbi gootbee' stream = io.BytesIO(data) metadata = _get_upload_metadata( - 'CSV', table._schema, table._dataset, table.table_id) + 'CSV', table._schema, table.project, + table.dataset_id, table.table_id) upload, transport = table._initiate_resumable_upload( client, stream, metadata, num_retries) @@ -1777,7 +1777,8 @@ def _do_multipart_upload_success_helper( data = b'Bzzzz-zap \x00\x01\xf4' stream = io.BytesIO(data) metadata = _get_upload_metadata( - 'CSV', table._schema, table._dataset, table.table_id) + 'CSV', table._schema, table.project, + table.dataset_id, table.table_id) size = len(data) response = table._do_multipart_upload( client, stream, metadata, size, num_retries) @@ -1928,7 +1929,7 @@ def test_upload_file_resumable_metadata(self): 'load': { 'sourceFormat': config_args['source_format'], 'destinationTable': { - 'projectId': table._dataset.project, + 'projectId': table.project, 'datasetId': table.dataset_id, 'tableId': table.table_id, }, @@ -2254,10 +2255,11 @@ def test_w_subfields(self): class Test__get_upload_metadata(unittest.TestCase): @staticmethod - def _call_fut(source_format, schema, dataset, name): + def _call_fut(source_format, schema, project, dataset_id, name): from google.cloud.bigquery.table import _get_upload_metadata - return _get_upload_metadata(source_format, schema, dataset, name) + return _get_upload_metadata( + source_format, schema, project, dataset_id, name) def test_empty_schema(self): source_format = 'AVRO' @@ -2265,7 +2267,8 @@ def test_empty_schema(self): spec=['dataset_id', 'project']) dataset.dataset_id = 'market' # mock.Mock() treats `name` specially. table_name = 'chairs' - metadata = self._call_fut(source_format, [], dataset, table_name) + metadata = self._call_fut(source_format, [], dataset.project, + dataset.dataset_id, table_name) expected = { 'configuration': { @@ -2290,7 +2293,8 @@ def test_with_schema(self): dataset.dataset_id = 'movie' # mock.Mock() treats `name` specially. table_name = 'teebull-neem' metadata = self._call_fut( - source_format, [full_name], dataset, table_name) + source_format, [full_name], dataset.project, + dataset.dataset_id, table_name) expected = { 'configuration': { From 71d43ee69f9591e950e6fa3352dc4bfc07278e16 Mon Sep 17 00:00:00 2001 From: Jonathan Amsterdam Date: Wed, 20 Sep 2017 17:38:14 -0400 Subject: [PATCH 0202/2016] bigquery: add client.delete_dataset (#4012) * bigquery: add client.delete_dataset * support Dataset as well as DatasetReference * fix lint --- .../google/cloud/bigquery/client.py | 18 ++++++++++-- .../google/cloud/bigquery/dataset.py | 29 ------------------- .../google-cloud-bigquery/tests/system.py | 2 +- .../tests/unit/test_client.py | 23 +++++++++++++++ .../tests/unit/test_dataset.py | 29 ------------------- 5 files changed, 40 insertions(+), 61 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py index 1f5620f66ca8..940b016645f3 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py @@ -243,8 +243,6 @@ def update_dataset(self, dataset, fields): :rtype: :class:`google.cloud.bigquery.dataset.Dataset` :returns: the modified ``Dataset`` instance - :raises: ValueError for fields that cannot be updated. - """ if dataset.project is None: dataset._project = self.project @@ -266,6 +264,22 @@ def update_dataset(self, dataset, fields): method='PATCH', path=path, data=partial, headers=headers) return Dataset.from_api_repr(api_response, self) + def delete_dataset(self, dataset): + """Delete a dataset. + + See + https://cloud.google.com/bigquery/docs/reference/rest/v2/datasets/delete + + :type dataset: One of: + :class:`~google.cloud.bigquery.dataset.Dataset` + :class:`~google.cloud.bigquery.dataset.DatasetReference` + + :param dataset: the dataset to delete, or a reference to it. + """ + if not isinstance(dataset, (Dataset, DatasetReference)): + raise TypeError('dataset must be a Dataset or a DatasetReference') + self._connection.api_request(method='DELETE', path=dataset.path) + def _get_query_results(self, job_id, project=None, timeout_ms=None): """Get the query results object for a query job. diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/dataset.py b/packages/google-cloud-bigquery/google/cloud/bigquery/dataset.py index dabd0a129ec1..0d9198809595 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/dataset.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/dataset.py @@ -394,21 +394,6 @@ def from_api_repr(cls, resource, client): dataset._set_properties(resource) return dataset - def _require_client(self, client): - """Check client or verify over-ride. - - :type client: :class:`~google.cloud.bigquery.client.Client` or - ``NoneType`` - :param client: the client to use. If not passed, falls back to the - ``client`` stored on the current dataset. - - :rtype: :class:`google.cloud.bigquery.client.Client` - :returns: The client passed in or the currently bound client. - """ - if client is None: - client = self._client - return client - @staticmethod def _parse_access_entries(access): """Parse a resource fragment into a set of access entries. @@ -488,20 +473,6 @@ def _build_resource(self): return resource - def delete(self, client=None): - """API call: delete the dataset via a DELETE request. - - See - https://cloud.google.com/bigquery/docs/reference/rest/v2/datasets/delete - - :type client: :class:`~google.cloud.bigquery.client.Client` or - ``NoneType`` - :param client: the client to use. If not passed, falls back to the - ``client`` stored on the current dataset. - """ - client = self._require_client(client) - client._connection.api_request(method='DELETE', path=self.path) - def list_tables(self, max_results=None, page_token=None): """List tables for the project associated with this client. diff --git a/packages/google-cloud-bigquery/tests/system.py b/packages/google-cloud-bigquery/tests/system.py index 1e9a723631a3..31cd0fb9dfdd 100644 --- a/packages/google-cloud-bigquery/tests/system.py +++ b/packages/google-cloud-bigquery/tests/system.py @@ -105,7 +105,7 @@ def _still_in_use(bad_request): if isinstance(doomed, Bucket): retry_409(doomed.delete)(force=True) elif isinstance(doomed, Dataset): - retry_in_use(doomed.delete)() + retry_in_use(Config.CLIENT.delete_dataset)(doomed) else: doomed.delete() diff --git a/packages/google-cloud-bigquery/tests/unit/test_client.py b/packages/google-cloud-bigquery/tests/unit/test_client.py index 173f059374da..ceb530d5a134 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_client.py +++ b/packages/google-cloud-bigquery/tests/unit/test_client.py @@ -480,6 +480,29 @@ def test_update_dataset(self): req = conn._requested[1] self.assertEqual(req['headers']['If-Match'], 'etag') + def test_delete_dataset(self): + from google.cloud.bigquery.dataset import Dataset + + PROJECT = 'PROJECT' + DS_ID = 'DATASET_ID' + PATH = 'projects/%s/datasets/%s' % (PROJECT, DS_ID) + creds = _make_credentials() + client = self._make_one(project=PROJECT, credentials=creds) + conn = client._connection = _Connection({}, {}) + for arg in (client.dataset(DS_ID), Dataset(DS_ID, project=PROJECT)): + client.delete_dataset(arg) + req = conn._requested[0] + self.assertEqual(req['method'], 'DELETE') + self.assertEqual(req['path'], '/%s' % PATH) + + def test_delete_dataset_wrong_type(self): + PROJECT = 'PROJECT' + DS_ID = 'DATASET_ID' + creds = _make_credentials() + client = self._make_one(project=PROJECT, credentials=creds) + with self.assertRaises(TypeError): + client.delete_dataset(client.dataset(DS_ID).table("foo")) + def test_job_from_resource_unknown_type(self): PROJECT = 'PROJECT' creds = _make_credentials() diff --git a/packages/google-cloud-bigquery/tests/unit/test_dataset.py b/packages/google-cloud-bigquery/tests/unit/test_dataset.py index 28c6790b793a..ac863d5052d5 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_dataset.py +++ b/packages/google-cloud-bigquery/tests/unit/test_dataset.py @@ -388,35 +388,6 @@ def test__parse_access_entries_w_extra_keys(self): with self.assertRaises(ValueError): dataset._parse_access_entries(ACCESS) - def test_delete_w_bound_client(self): - PATH = 'projects/%s/datasets/%s' % (self.PROJECT, self.DS_ID) - conn = _Connection({}) - client = _Client(project=self.PROJECT, connection=conn) - dataset = self._make_one(self.DS_ID, client=client) - - dataset.delete() - - self.assertEqual(len(conn._requested), 1) - req = conn._requested[0] - self.assertEqual(req['method'], 'DELETE') - self.assertEqual(req['path'], '/%s' % PATH) - - def test_delete_w_alternate_client(self): - PATH = 'projects/%s/datasets/%s' % (self.PROJECT, self.DS_ID) - conn1 = _Connection() - CLIENT1 = _Client(project=self.PROJECT, connection=conn1) - conn2 = _Connection({}) - CLIENT2 = _Client(project=self.PROJECT, connection=conn2) - dataset = self._make_one(self.DS_ID, client=CLIENT1) - - dataset.delete(client=CLIENT2) - - self.assertEqual(len(conn1._requested), 0) - self.assertEqual(len(conn2._requested), 1) - req = conn2._requested[0] - self.assertEqual(req['method'], 'DELETE') - self.assertEqual(req['path'], '/%s' % PATH) - def test_list_tables_empty(self): import six From a7a7fc400ac27c7fc11040dc8e57a128a92292c8 Mon Sep 17 00:00:00 2001 From: Alix Hamilton Date: Wed, 20 Sep 2017 17:14:00 -0700 Subject: [PATCH 0203/2016] updates dataset.table() to return a TableReference instead of a Table (#4014) --- .../google/cloud/bigquery/dataset.py | 21 +++--- .../google/cloud/bigquery/job.py | 2 +- .../google-cloud-bigquery/tests/system.py | 65 ++++++++++++------- .../tests/unit/test_dataset.py | 23 +------ .../tests/unit/test_job.py | 19 ++---- 5 files changed, 60 insertions(+), 70 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/dataset.py b/packages/google-cloud-bigquery/google/cloud/bigquery/dataset.py index 0d9198809595..3f5bb060c60b 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/dataset.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/dataset.py @@ -141,6 +141,9 @@ def path(self): def table(self, table_id): """Constructs a TableReference. + :type table_id: str + :param table_id: the ID of the table. + :rtype: :class:`google.cloud.bigquery.table.TableReference` :returns: a TableReference for a table in this dataset. """ @@ -505,20 +508,16 @@ def list_tables(self, max_results=None, page_token=None): result.dataset = self return result - def table(self, name, schema=()): - """Construct a table bound to this dataset. - - :type name: str - :param name: Name of the table. + def table(self, table_id): + """Constructs a TableReference. - :type schema: list of :class:`google.cloud.bigquery.table.SchemaField` - :param schema: The table's schema + :type table_id: str + :param table_id: the ID of the table. - :rtype: :class:`google.cloud.bigquery.table.Table` - :returns: a new ``Table`` instance + :rtype: :class:`google.cloud.bigquery.table.TableReference` + :returns: a TableReference for a table in this dataset. """ - table_ref = TableReference(self, name) - return Table(table_ref, schema=schema, client=self._client) + return TableReference(self, table_id) def _item_to_table(iterator, resource): diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/job.py b/packages/google-cloud-bigquery/google/cloud/bigquery/job.py index 025b839bc59c..1e0959565074 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/job.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/job.py @@ -1248,7 +1248,7 @@ def __init__(self, job_id, query, client, https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query.defaultDataset """ - destination = _TypedProperty('destination', Table) + destination = _TypedProperty('destination', TableReference) """See https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query.destinationTable """ diff --git a/packages/google-cloud-bigquery/tests/system.py b/packages/google-cloud-bigquery/tests/system.py index 31cd0fb9dfdd..1adc30d787b5 100644 --- a/packages/google-cloud-bigquery/tests/system.py +++ b/packages/google-cloud-bigquery/tests/system.py @@ -26,6 +26,7 @@ from google.cloud import bigquery from google.cloud.bigquery.dataset import Dataset, DatasetReference +from google.cloud.bigquery.table import Table from google.cloud._helpers import UTC from google.cloud.bigquery import dbapi from google.cloud.exceptions import Forbidden, NotFound @@ -178,7 +179,8 @@ def test_create_table(self): full_name = bigquery.SchemaField('full_name', 'STRING', mode='REQUIRED') age = bigquery.SchemaField('age', 'INTEGER', mode='REQUIRED') - table = dataset.table(TABLE_NAME, schema=[full_name, age]) + table = Table(dataset.table(TABLE_NAME), schema=[full_name, age], + client=Config.CLIENT) self.assertFalse(table.exists()) table.create() self.to_delete.insert(0, table) @@ -221,7 +223,9 @@ def test_list_tables(self): mode='REQUIRED') age = bigquery.SchemaField('age', 'INTEGER', mode='REQUIRED') for table_name in tables_to_create: - created_table = dataset.table(table_name, schema=[full_name, age]) + created_table = Table(dataset.table(table_name), + schema=[full_name, age], + client=Config.CLIENT) created_table.create() self.to_delete.insert(0, created_table) @@ -243,7 +247,8 @@ def test_patch_table(self): full_name = bigquery.SchemaField('full_name', 'STRING', mode='REQUIRED') age = bigquery.SchemaField('age', 'INTEGER', mode='REQUIRED') - table = dataset.table(TABLE_NAME, schema=[full_name, age]) + table = Table(dataset.table(TABLE_NAME), schema=[full_name, age], + client=Config.CLIENT) self.assertFalse(table.exists()) table.create() self.to_delete.insert(0, table) @@ -263,7 +268,8 @@ def test_update_table(self): full_name = bigquery.SchemaField('full_name', 'STRING', mode='REQUIRED') age = bigquery.SchemaField('age', 'INTEGER', mode='REQUIRED') - table = dataset.table(TABLE_NAME, schema=[full_name, age]) + table = Table(dataset.table(TABLE_NAME), schema=[full_name, age], + client=Config.CLIENT) self.assertFalse(table.exists()) table.create() self.to_delete.insert(0, table) @@ -306,7 +312,8 @@ def test_insert_data_then_dump_table(self): mode='REQUIRED') age = bigquery.SchemaField('age', 'INTEGER', mode='REQUIRED') now = bigquery.SchemaField('now', 'TIMESTAMP') - table = dataset.table(TABLE_NAME, schema=[full_name, age, now]) + table = Table(dataset.table(TABLE_NAME), schema=[full_name, age, now], + client=Config.CLIENT) self.assertFalse(table.exists()) table.create() self.to_delete.insert(0, table) @@ -345,7 +352,8 @@ def test_load_table_from_local_file_then_dump_table(self): full_name = bigquery.SchemaField('full_name', 'STRING', mode='REQUIRED') age = bigquery.SchemaField('age', 'INTEGER', mode='REQUIRED') - table = dataset.table(TABLE_NAME, schema=[full_name, age]) + table = Table(dataset.table(TABLE_NAME), schema=[full_name, age], + client=Config.CLIENT) table.create() self.to_delete.insert(0, table) @@ -389,7 +397,7 @@ def test_load_table_from_local_avro_file_then_dump_table(self): Dataset(_make_dataset_id('load_local_then_dump'))) self.to_delete.append(dataset) - table = dataset.table(TABLE_NAME) + table = Table(dataset.table(TABLE_NAME), client=Config.CLIENT) self.to_delete.insert(0, table) with open(os.path.join(WHERE, 'data', 'colors.avro'), 'rb') as avrof: @@ -453,7 +461,8 @@ def test_load_table_from_storage_then_dump_table(self): full_name = bigquery.SchemaField('full_name', 'STRING', mode='REQUIRED') age = bigquery.SchemaField('age', 'INTEGER', mode='REQUIRED') - table = dataset.table(TABLE_NAME, schema=[full_name, age]) + table = Table(dataset.table(TABLE_NAME), schema=[full_name, age], + client=Config.CLIENT) table.create() self.to_delete.insert(0, table) @@ -518,11 +527,10 @@ def test_load_table_from_storage_w_autodetect_schema(self): Dataset(_make_dataset_id('load_gcs_then_dump'))) self.to_delete.append(dataset) - table = dataset.table(table_name) - self.to_delete.insert(0, table) + table_ref = dataset.table(table_name) job = Config.CLIENT.load_table_from_storage( - 'bq_load_storage_test_' + local_id, table, gs_url) + 'bq_load_storage_test_' + local_id, table_ref, gs_url) job.autodetect = True job.begin() @@ -533,7 +541,8 @@ def test_load_table_from_storage_w_autodetect_schema(self): retry = RetryInstanceState(_job_done, max_tries=8) retry(job.reload)() - table = Config.CLIENT.get_table(table) + table = Config.CLIENT.get_table(table_ref) + self.to_delete.insert(0, table) field_name = SchemaField( u'Full_Name', u'string', u'NULLABLE', None, ()) field_age = SchemaField(u'Age', u'integer', u'NULLABLE', None, ()) @@ -570,10 +579,9 @@ def _load_table_for_extract_table( dataset = retry_403(Config.CLIENT.create_dataset)( Dataset(table.dataset_id)) self.to_delete.append(dataset) - table = dataset.table(table.table_id) - self.to_delete.insert(0, table) + table_ref = dataset.table(table.table_id) job = Config.CLIENT.load_table_from_storage( - 'bq_extract_storage_test_' + local_id, table, gs_url) + 'bq_extract_storage_test_' + local_id, table_ref, gs_url) job.autodetect = True job.begin() # Allow for 90 seconds of "warm up" before rows visible. See @@ -591,7 +599,9 @@ def test_extract_table(self): blob_name = 'person_ages.csv' dataset_id = _make_dataset_id('load_gcs_then_extract') table_id = 'test_table' - table = Config.CLIENT.dataset(dataset_id).table(table_id) + table_ref = Config.CLIENT.dataset(dataset_id).table(table_id) + table = Table(table_ref, client=Config.CLIENT) + self.to_delete.insert(0, table) rows = [ ('Phred Phlyntstone', 32), ('Bharney Rhubble', 33), @@ -599,13 +609,13 @@ def test_extract_table(self): ('Bhettye Rhubble', 27), ] self._load_table_for_extract_table( - storage_client, rows, bucket_name, blob_name, table) + storage_client, rows, bucket_name, blob_name, table_ref) bucket = storage_client.bucket(bucket_name) destination_blob_name = 'person_ages_out.csv' destination = bucket.blob(destination_blob_name) destination_uri = 'gs://{}/person_ages_out.csv'.format(bucket_name) - job = Config.CLIENT.extract_table(table, destination_uri) + job = Config.CLIENT.extract_table(table_ref, destination_uri) job.result() self.to_delete.insert(0, destination) @@ -621,7 +631,9 @@ def test_extract_table_w_job_config(self): blob_name = 'person_ages.csv' dataset_id = _make_dataset_id('load_gcs_then_extract') table_id = 'test_table' - table = Config.CLIENT.dataset(dataset_id).table(table_id) + table_ref = Config.CLIENT.dataset(dataset_id).table(table_id) + table = Table(table_ref, client=Config.CLIENT) + self.to_delete.insert(0, table) rows = [ ('Phred Phlyntstone', 32), ('Bharney Rhubble', 33), @@ -629,7 +641,7 @@ def test_extract_table_w_job_config(self): ('Bhettye Rhubble', 27), ] self._load_table_for_extract_table( - storage_client, rows, bucket_name, blob_name, table) + storage_client, rows, bucket_name, blob_name, table_ref) bucket = storage_client.bucket(bucket_name) destination_blob_name = 'person_ages_out.csv' destination = bucket.blob(destination_blob_name) @@ -657,7 +669,8 @@ def test_job_cancel(self): full_name = bigquery.SchemaField('full_name', 'STRING', mode='REQUIRED') age = bigquery.SchemaField('age', 'INTEGER', mode='REQUIRED') - table = dataset.table(TABLE_NAME, schema=[full_name, age]) + table = Table(dataset.table(TABLE_NAME), schema=[full_name, age], + client=Config.CLIENT) table.create() self.to_delete.insert(0, table) @@ -845,7 +858,8 @@ def _load_table_for_dml(self, rows, dataset_id, table_id): greeting = bigquery.SchemaField( 'greeting', 'STRING', mode='NULLABLE') - table = dataset.table(table_id, schema=[greeting]) + table = Table(dataset.table(table_id), schema=[greeting], + client=Config.CLIENT) table.create() self.to_delete.insert(0, table) @@ -1237,7 +1251,8 @@ def test_insert_nested_nested(self): Dataset(_make_dataset_id('issue_2951'))) self.to_delete.append(dataset) - table = dataset.table(table_name, schema=schema) + table = Table(dataset.table(table_name), schema=schema, + client=Config.CLIENT) table.create() self.to_delete.insert(0, table) @@ -1249,14 +1264,14 @@ def test_insert_nested_nested(self): self.assertEqual(rows, to_insert) def test_create_table_insert_fetch_nested_schema(self): - table_name = 'test_table' dataset = retry_403(Config.CLIENT.create_dataset)( Dataset(_make_dataset_id('create_table_nested_schema'))) self.to_delete.append(dataset) schema = _load_json_schema() - table = dataset.table(table_name, schema=schema) + table = Table(dataset.table(table_name), schema=schema, + client=Config.CLIENT) table.create() self.to_delete.insert(0, table) self.assertTrue(table.exists()) diff --git a/packages/google-cloud-bigquery/tests/unit/test_dataset.py b/packages/google-cloud-bigquery/tests/unit/test_dataset.py index ac863d5052d5..c6c62652d366 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_dataset.py +++ b/packages/google-cloud-bigquery/tests/unit/test_dataset.py @@ -506,34 +506,17 @@ def test_list_tables_explicit(self): self.assertEqual(req['query_params'], {'maxResults': 3, 'pageToken': TOKEN}) - def test_table_wo_schema(self): - from google.cloud.bigquery.table import Table + def test_table(self): + from google.cloud.bigquery.table import TableReference conn = _Connection({}) client = _Client(project=self.PROJECT, connection=conn) dataset = self._make_one(self.DS_ID, client=client) table = dataset.table('table_id') - self.assertIsInstance(table, Table) - self.assertEqual(table.table_id, 'table_id') - self.assertEqual(table.dataset_id, self.DS_ID) - self.assertEqual(table.project, self.PROJECT) - self.assertEqual(table.schema, []) - - def test_table_w_schema(self): - from google.cloud.bigquery.schema import SchemaField - from google.cloud.bigquery.table import Table - - conn = _Connection({}) - client = _Client(project=self.PROJECT, connection=conn) - dataset = self._make_one(self.DS_ID, client=client) - full_name = SchemaField('full_name', 'STRING', mode='REQUIRED') - age = SchemaField('age', 'INTEGER', mode='REQUIRED') - table = dataset.table('table_id', schema=[full_name, age]) - self.assertIsInstance(table, Table) + self.assertIsInstance(table, TableReference) self.assertEqual(table.table_id, 'table_id') self.assertEqual(table.dataset_id, self.DS_ID) self.assertEqual(table.project, self.PROJECT) - self.assertEqual(table.schema, [full_name, age]) class _Client(object): diff --git a/packages/google-cloud-bigquery/tests/unit/test_job.py b/packages/google-cloud-bigquery/tests/unit/test_job.py index e53eb08970fc..7c662d01d8c6 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_job.py +++ b/packages/google-cloud-bigquery/tests/unit/test_job.py @@ -1908,7 +1908,7 @@ def test_statement_type(self): self.assertEqual(job.statement_type, statement_type) def test_referenced_tables(self): - from google.cloud.bigquery.table import Table + from google.cloud.bigquery.table import TableReference ref_tables_resource = [{ 'projectId': self.PROJECT, @@ -1939,23 +1939,20 @@ def test_referenced_tables(self): local1, local2, remote = job.referenced_tables - self.assertIsInstance(local1, Table) + self.assertIsInstance(local1, TableReference) self.assertEqual(local1.table_id, 'local1') self.assertEqual(local1.dataset_id, 'dataset') self.assertEqual(local1.project, self.PROJECT) - self.assertIs(local1._client, client) - self.assertIsInstance(local2, Table) + self.assertIsInstance(local2, TableReference) self.assertEqual(local2.table_id, 'local2') self.assertEqual(local2.dataset_id, 'dataset') self.assertEqual(local2.project, self.PROJECT) - self.assertIs(local2._client, client) - self.assertIsInstance(remote, Table) + self.assertIsInstance(remote, TableReference) self.assertEqual(remote.table_id, 'other-table') self.assertEqual(remote.dataset_id, 'other-dataset') self.assertEqual(remote.project, 'other-project-123') - self.assertIs(remote._client, client) def test_undeclared_query_paramters(self): from google.cloud.bigquery._helpers import ArrayQueryParameter @@ -2173,7 +2170,6 @@ def test_begin_w_bound_client(self): def test_begin_w_alternate_client(self): from google.cloud.bigquery.dataset import Dataset from google.cloud.bigquery.dataset import DatasetReference - from google.cloud.bigquery.dataset import Table PATH = '/projects/%s/jobs' % (self.PROJECT,) TABLE = 'TABLE' @@ -2210,12 +2206,11 @@ def test_begin_w_alternate_client(self): dataset_ref = DatasetReference(self.PROJECT, DS_ID) dataset = Dataset(DS_ID, client1) table_ref = dataset_ref.table(TABLE) - table = Table(table_ref, client=client1) job.allow_large_results = True job.create_disposition = 'CREATE_NEVER' job.default_dataset = dataset - job.destination = table + job.destination = table_ref job.flatten_results = True job.priority = 'INTERACTIVE' job.use_query_cache = True @@ -2467,7 +2462,6 @@ def test_exists_hit_w_alternate_client(self): def test_reload_w_bound_client(self): from google.cloud.bigquery.dataset import DatasetReference - from google.cloud.bigquery.table import Table PATH = '/projects/%s/jobs/%s' % (self.PROJECT, self.JOB_NAME) DS_ID = 'DATASET' @@ -2479,8 +2473,7 @@ def test_reload_w_bound_client(self): dataset_ref = DatasetReference(self.PROJECT, DS_ID) table_ref = dataset_ref.table(DEST_TABLE) - table = Table(table_ref, client=client) - job.destination = table + job.destination = table_ref job.reload() From 947cc3cbebdf16cd4680b1961d63ec238d03f025 Mon Sep 17 00:00:00 2001 From: Jonathan Amsterdam Date: Thu, 21 Sep 2017 12:38:09 -0400 Subject: [PATCH 0204/2016] bigquery: add client.list_dataset_tables (#4013) Remove Dataset.list_tables --- .../google/cloud/bigquery/client.py | 55 ++++++- .../google/cloud/bigquery/dataset.py | 49 ------- .../google-cloud-bigquery/tests/system.py | 6 +- .../tests/unit/test_client.py | 136 ++++++++++++++++++ .../tests/unit/test_dataset.py | 136 +----------------- 5 files changed, 195 insertions(+), 187 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py index 940b016645f3..3c02a8e10df2 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py @@ -23,7 +23,7 @@ from google.cloud.bigquery._http import Connection from google.cloud.bigquery.dataset import Dataset from google.cloud.bigquery.dataset import DatasetReference -from google.cloud.bigquery.dataset import Table +from google.cloud.bigquery.table import Table from google.cloud.bigquery.job import CopyJob from google.cloud.bigquery.job import ExtractJob from google.cloud.bigquery.job import LoadJob @@ -264,6 +264,44 @@ def update_dataset(self, dataset, fields): method='PATCH', path=path, data=partial, headers=headers) return Dataset.from_api_repr(api_response, self) + def list_dataset_tables(self, dataset, max_results=None, page_token=None): + """List tables in the dataset. + + See + https://cloud.google.com/bigquery/docs/reference/rest/v2/tables/list + + :type dataset: One of: + :class:`~google.cloud.bigquery.dataset.Dataset` + :class:`~google.cloud.bigquery.dataset.DatasetReference` + :param dataset: the dataset whose tables to list, or a reference to it. + + :type max_results: int + :param max_results: (Optional) Maximum number of tables to return. + If not passed, defaults to a value set by the API. + + :type page_token: str + :param page_token: (Optional) Opaque marker for the next "page" of + datasets. If not passed, the API will return the + first page of datasets. + + :rtype: :class:`~google.api.core.page_iterator.Iterator` + :returns: Iterator of :class:`~google.cloud.bigquery.table.Table` + contained within the current dataset. + """ + if not isinstance(dataset, (Dataset, DatasetReference)): + raise TypeError('dataset must be a Dataset or a DatasetReference') + path = '%s/tables' % dataset.path + result = page_iterator.HTTPIterator( + client=self, + api_request=self._connection.api_request, + path=path, + item_to_value=_item_to_table, + items_key='tables', + page_token=page_token, + max_results=max_results) + result.dataset = dataset + return result + def delete_dataset(self, dataset): """Delete a dataset. @@ -600,3 +638,18 @@ def _item_to_job(iterator, resource): :returns: The next job in the page. """ return iterator.client.job_from_resource(resource) + + +def _item_to_table(iterator, resource): + """Convert a JSON table to the native object. + + :type iterator: :class:`~google.api.core.page_iterator.Iterator` + :param iterator: The iterator that is currently in use. + + :type resource: dict + :param resource: An item to be converted to a table. + + :rtype: :class:`~google.cloud.bigquery.table.Table` + :returns: The next table in the page. + """ + return Table.from_api_repr(resource, iterator.client) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/dataset.py b/packages/google-cloud-bigquery/google/cloud/bigquery/dataset.py index 3f5bb060c60b..fb41ee2e8a95 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/dataset.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/dataset.py @@ -18,9 +18,7 @@ import six -from google.api.core import page_iterator from google.cloud._helpers import _datetime_from_microseconds -from google.cloud.bigquery.table import Table from google.cloud.bigquery.table import TableReference @@ -476,38 +474,6 @@ def _build_resource(self): return resource - def list_tables(self, max_results=None, page_token=None): - """List tables for the project associated with this client. - - See - https://cloud.google.com/bigquery/docs/reference/rest/v2/tables/list - - :type max_results: int - :param max_results: (Optional) Maximum number of tables to return. - If not passed, defaults to a value set by the API. - - :type page_token: str - :param page_token: (Optional) Opaque marker for the next "page" of - datasets. If not passed, the API will return the - first page of datasets. - - :rtype: :class:`~google.api.core.page_iterator.Iterator` - :returns: Iterator of :class:`~google.cloud.bigquery.table.Table` - contained within the current dataset. - """ - path = '/projects/%s/datasets/%s/tables' % ( - self.project, self.dataset_id) - result = page_iterator.HTTPIterator( - client=self._client, - api_request=self._client._connection.api_request, - path=path, - item_to_value=_item_to_table, - items_key='tables', - page_token=page_token, - max_results=max_results) - result.dataset = self - return result - def table(self, table_id): """Constructs a TableReference. @@ -518,18 +484,3 @@ def table(self, table_id): :returns: a TableReference for a table in this dataset. """ return TableReference(self, table_id) - - -def _item_to_table(iterator, resource): - """Convert a JSON table to the native object. - - :type iterator: :class:`~google.api.core.page_iterator.Iterator` - :param iterator: The iterator that is currently in use. - - :type resource: dict - :param resource: An item to be converted to a table. - - :rtype: :class:`~google.cloud.bigquery.table.Table` - :returns: The next table in the page. - """ - return Table.from_api_repr(resource, iterator.dataset) diff --git a/packages/google-cloud-bigquery/tests/system.py b/packages/google-cloud-bigquery/tests/system.py index 1adc30d787b5..82763d89724b 100644 --- a/packages/google-cloud-bigquery/tests/system.py +++ b/packages/google-cloud-bigquery/tests/system.py @@ -202,13 +202,13 @@ def test_get_table_w_public_dataset(self): self.assertEqual( schema_names, ['word', 'word_count', 'corpus', 'corpus_date']) - def test_list_tables(self): + def test_list_dataset_tables(self): DATASET_ID = _make_dataset_id('list_tables') dataset = retry_403(Config.CLIENT.create_dataset)(Dataset(DATASET_ID)) self.to_delete.append(dataset) # Retrieve tables before any are created for the dataset. - iterator = dataset.list_tables() + iterator = Config.CLIENT.list_dataset_tables(dataset) all_tables = list(iterator) self.assertEqual(all_tables, []) self.assertIsNone(iterator.next_page_token) @@ -230,7 +230,7 @@ def test_list_tables(self): self.to_delete.insert(0, created_table) # Retrieve the tables. - iterator = dataset.list_tables() + iterator = Config.CLIENT.list_dataset_tables(dataset) all_tables = list(iterator) self.assertIsNone(iterator.next_page_token) created = [table for table in all_tables diff --git a/packages/google-cloud-bigquery/tests/unit/test_client.py b/packages/google-cloud-bigquery/tests/unit/test_client.py index ceb530d5a134..8916e9682ac1 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_client.py +++ b/packages/google-cloud-bigquery/tests/unit/test_client.py @@ -480,6 +480,142 @@ def test_update_dataset(self): req = conn._requested[1] self.assertEqual(req['headers']['If-Match'], 'etag') + def test_list_dataset_tables_empty(self): + import six + + PROJECT = 'PROJECT' + DS_ID = 'DATASET_ID' + creds = _make_credentials() + client = self._make_one(project=PROJECT, credentials=creds) + conn = client._connection = _Connection({}) + + dataset = client.dataset(DS_ID) + iterator = client.list_dataset_tables(dataset) + self.assertIs(iterator.dataset, dataset) + page = six.next(iterator.pages) + tables = list(page) + token = iterator.next_page_token + + self.assertEqual(tables, []) + self.assertIsNone(token) + self.assertEqual(len(conn._requested), 1) + req = conn._requested[0] + self.assertEqual(req['method'], 'GET') + PATH = 'projects/%s/datasets/%s/tables' % (PROJECT, DS_ID) + self.assertEqual(req['path'], '/%s' % PATH) + + def test_list_dataset_tables_defaults(self): + import six + from google.cloud.bigquery.table import Table + + PROJECT = 'PROJECT' + DS_ID = 'DATASET_ID' + TABLE_1 = 'table_one' + TABLE_2 = 'table_two' + PATH = 'projects/%s/datasets/%s/tables' % (PROJECT, DS_ID) + TOKEN = 'TOKEN' + DATA = { + 'nextPageToken': TOKEN, + 'tables': [ + {'kind': 'bigquery#table', + 'id': '%s:%s.%s' % (PROJECT, DS_ID, TABLE_1), + 'tableReference': {'tableId': TABLE_1, + 'datasetId': DS_ID, + 'projectId': PROJECT}, + 'type': 'TABLE'}, + {'kind': 'bigquery#table', + 'id': '%s:%s.%s' % (PROJECT, DS_ID, TABLE_2), + 'tableReference': {'tableId': TABLE_2, + 'datasetId': DS_ID, + 'projectId': PROJECT}, + 'type': 'TABLE'}, + ] + } + + creds = _make_credentials() + client = self._make_one(project=PROJECT, credentials=creds) + conn = client._connection = _Connection(DATA) + dataset = client.dataset(DS_ID) + + iterator = client.list_dataset_tables(dataset) + self.assertIs(iterator.dataset, dataset) + page = six.next(iterator.pages) + tables = list(page) + token = iterator.next_page_token + + self.assertEqual(len(tables), len(DATA['tables'])) + for found, expected in zip(tables, DATA['tables']): + self.assertIsInstance(found, Table) + self.assertEqual(found.full_table_id, expected['id']) + self.assertEqual(found.table_type, expected['type']) + self.assertEqual(token, TOKEN) + + self.assertEqual(len(conn._requested), 1) + req = conn._requested[0] + self.assertEqual(req['method'], 'GET') + self.assertEqual(req['path'], '/%s' % PATH) + + def test_list_dataset_tables_explicit(self): + import six + from google.cloud.bigquery.table import Table + + PROJECT = 'PROJECT' + DS_ID = 'DATASET_ID' + TABLE_1 = 'table_one' + TABLE_2 = 'table_two' + PATH = 'projects/%s/datasets/%s/tables' % (PROJECT, DS_ID) + TOKEN = 'TOKEN' + DATA = { + 'tables': [ + {'kind': 'bigquery#dataset', + 'id': '%s:%s.%s' % (PROJECT, DS_ID, TABLE_1), + 'tableReference': {'tableId': TABLE_1, + 'datasetId': DS_ID, + 'projectId': PROJECT}, + 'type': 'TABLE'}, + {'kind': 'bigquery#dataset', + 'id': '%s:%s.%s' % (PROJECT, DS_ID, TABLE_2), + 'tableReference': {'tableId': TABLE_2, + 'datasetId': DS_ID, + 'projectId': PROJECT}, + 'type': 'TABLE'}, + ] + } + + creds = _make_credentials() + client = self._make_one(project=PROJECT, credentials=creds) + conn = client._connection = _Connection(DATA) + dataset = client.dataset(DS_ID) + + iterator = client.list_dataset_tables( + dataset, max_results=3, page_token=TOKEN) + self.assertIs(iterator.dataset, dataset) + page = six.next(iterator.pages) + tables = list(page) + token = iterator.next_page_token + + self.assertEqual(len(tables), len(DATA['tables'])) + for found, expected in zip(tables, DATA['tables']): + self.assertIsInstance(found, Table) + self.assertEqual(found.full_table_id, expected['id']) + self.assertEqual(found.table_type, expected['type']) + self.assertIsNone(token) + + self.assertEqual(len(conn._requested), 1) + req = conn._requested[0] + self.assertEqual(req['method'], 'GET') + self.assertEqual(req['path'], '/%s' % PATH) + self.assertEqual(req['query_params'], + {'maxResults': 3, 'pageToken': TOKEN}) + + def test_list_dataset_tables_wrong_type(self): + PROJECT = 'PROJECT' + DS_ID = 'DATASET_ID' + creds = _make_credentials() + client = self._make_one(project=PROJECT, credentials=creds) + with self.assertRaises(TypeError): + client.list_dataset_tables(client.dataset(DS_ID).table("foo")) + def test_delete_dataset(self): from google.cloud.bigquery.dataset import Dataset diff --git a/packages/google-cloud-bigquery/tests/unit/test_dataset.py b/packages/google-cloud-bigquery/tests/unit/test_dataset.py index c6c62652d366..c2fa2a024f17 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_dataset.py +++ b/packages/google-cloud-bigquery/tests/unit/test_dataset.py @@ -388,129 +388,10 @@ def test__parse_access_entries_w_extra_keys(self): with self.assertRaises(ValueError): dataset._parse_access_entries(ACCESS) - def test_list_tables_empty(self): - import six - - conn = _Connection({}) - client = _Client(project=self.PROJECT, connection=conn) - dataset = self._make_one(self.DS_ID, client=client) - - iterator = dataset.list_tables() - self.assertIs(iterator.dataset, dataset) - page = six.next(iterator.pages) - tables = list(page) - token = iterator.next_page_token - - self.assertEqual(tables, []) - self.assertIsNone(token) - self.assertEqual(len(conn._requested), 1) - req = conn._requested[0] - self.assertEqual(req['method'], 'GET') - PATH = 'projects/%s/datasets/%s/tables' % (self.PROJECT, self.DS_ID) - self.assertEqual(req['path'], '/%s' % PATH) - - def test_list_tables_defaults(self): - import six - from google.cloud.bigquery.table import Table - - TABLE_1 = 'table_one' - TABLE_2 = 'table_two' - PATH = 'projects/%s/datasets/%s/tables' % (self.PROJECT, self.DS_ID) - TOKEN = 'TOKEN' - DATA = { - 'nextPageToken': TOKEN, - 'tables': [ - {'kind': 'bigquery#table', - 'id': '%s:%s.%s' % (self.PROJECT, self.DS_ID, TABLE_1), - 'tableReference': {'tableId': TABLE_1, - 'datasetId': self.DS_ID, - 'projectId': self.PROJECT}, - 'type': 'TABLE'}, - {'kind': 'bigquery#table', - 'id': '%s:%s.%s' % (self.PROJECT, self.DS_ID, TABLE_2), - 'tableReference': {'tableId': TABLE_2, - 'datasetId': self.DS_ID, - 'projectId': self.PROJECT}, - 'type': 'TABLE'}, - ] - } - - conn = _Connection(DATA) - client = _Client(project=self.PROJECT, connection=conn) - dataset = self._make_one(self.DS_ID, client=client) - - iterator = dataset.list_tables() - self.assertIs(iterator.dataset, dataset) - page = six.next(iterator.pages) - tables = list(page) - token = iterator.next_page_token - - self.assertEqual(len(tables), len(DATA['tables'])) - for found, expected in zip(tables, DATA['tables']): - self.assertIsInstance(found, Table) - self.assertEqual(found.full_table_id, expected['id']) - self.assertEqual(found.table_type, expected['type']) - self.assertEqual(token, TOKEN) - - self.assertEqual(len(conn._requested), 1) - req = conn._requested[0] - self.assertEqual(req['method'], 'GET') - self.assertEqual(req['path'], '/%s' % PATH) - - def test_list_tables_explicit(self): - import six - from google.cloud.bigquery.table import Table - - TABLE_1 = 'table_one' - TABLE_2 = 'table_two' - PATH = 'projects/%s/datasets/%s/tables' % (self.PROJECT, self.DS_ID) - TOKEN = 'TOKEN' - DATA = { - 'tables': [ - {'kind': 'bigquery#dataset', - 'id': '%s:%s.%s' % (self.PROJECT, self.DS_ID, TABLE_1), - 'tableReference': {'tableId': TABLE_1, - 'datasetId': self.DS_ID, - 'projectId': self.PROJECT}, - 'type': 'TABLE'}, - {'kind': 'bigquery#dataset', - 'id': '%s:%s.%s' % (self.PROJECT, self.DS_ID, TABLE_2), - 'tableReference': {'tableId': TABLE_2, - 'datasetId': self.DS_ID, - 'projectId': self.PROJECT}, - 'type': 'TABLE'}, - ] - } - - conn = _Connection(DATA) - client = _Client(project=self.PROJECT, connection=conn) - dataset = self._make_one(self.DS_ID, client=client) - - iterator = dataset.list_tables(max_results=3, page_token=TOKEN) - self.assertIs(iterator.dataset, dataset) - page = six.next(iterator.pages) - tables = list(page) - token = iterator.next_page_token - - self.assertEqual(len(tables), len(DATA['tables'])) - for found, expected in zip(tables, DATA['tables']): - self.assertIsInstance(found, Table) - self.assertEqual(found.full_table_id, expected['id']) - self.assertEqual(found.table_type, expected['type']) - self.assertIsNone(token) - - self.assertEqual(len(conn._requested), 1) - req = conn._requested[0] - self.assertEqual(req['method'], 'GET') - self.assertEqual(req['path'], '/%s' % PATH) - self.assertEqual(req['query_params'], - {'maxResults': 3, 'pageToken': TOKEN}) - def test_table(self): from google.cloud.bigquery.table import TableReference - conn = _Connection({}) - client = _Client(project=self.PROJECT, connection=conn) + client = _Client(project=self.PROJECT) dataset = self._make_one(self.DS_ID, client=client) table = dataset.table('table_id') self.assertIsInstance(table, TableReference) @@ -521,18 +402,5 @@ def test_table(self): class _Client(object): - def __init__(self, project='project', connection=None): + def __init__(self, project='project'): self.project = project - self._connection = connection - - -class _Connection(object): - - def __init__(self, *responses): - self._responses = responses - self._requested = [] - - def api_request(self, **kw): - self._requested.append(kw) - response, self._responses = self._responses[0], self._responses[1:] - return response From 9b12a14c4d76fbb54a6b2f5aba09231395561e4a Mon Sep 17 00:00:00 2001 From: Jonathan Amsterdam Date: Thu, 21 Sep 2017 16:26:23 -0400 Subject: [PATCH 0205/2016] bigquery: remove client from Dataset (#4018) --- .../google/cloud/bigquery/client.py | 8 +-- .../google/cloud/bigquery/dataset.py | 27 +++----- .../google/cloud/bigquery/job.py | 19 ++--- .../google-cloud-bigquery/tests/system.py | 5 +- .../tests/unit/test_dataset.py | 69 ++++++------------- .../tests/unit/test_job.py | 4 +- .../tests/unit/test_query.py | 4 +- 7 files changed, 49 insertions(+), 87 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py index 3c02a8e10df2..f40904c2057e 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py @@ -190,7 +190,7 @@ def create_dataset(self, dataset): path = '/projects/%s/datasets' % (dataset.project,) api_response = self._connection.api_request( method='POST', path=path, data=dataset._build_resource()) - return Dataset.from_api_repr(api_response, self) + return Dataset.from_api_repr(api_response) def get_dataset(self, dataset_ref): """Fetch the dataset referenced by ``dataset_ref`` @@ -204,7 +204,7 @@ def get_dataset(self, dataset_ref): """ api_response = self._connection.api_request( method='GET', path=dataset_ref.path) - return Dataset.from_api_repr(api_response, self) + return Dataset.from_api_repr(api_response) def get_table(self, table_ref): """Fetch the table referenced by ``table_ref`` @@ -262,7 +262,7 @@ def update_dataset(self, dataset, fields): headers = None api_response = self._connection.api_request( method='PATCH', path=path, data=partial, headers=headers) - return Dataset.from_api_repr(api_response, self) + return Dataset.from_api_repr(api_response) def list_dataset_tables(self, dataset, max_results=None, page_token=None): """List tables in the dataset. @@ -622,7 +622,7 @@ def _item_to_dataset(iterator, resource): :rtype: :class:`.Dataset` :returns: The next dataset in the page. """ - return Dataset.from_api_repr(resource, iterator.client) + return Dataset.from_api_repr(resource) def _item_to_job(iterator, resource): diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/dataset.py b/packages/google-cloud-bigquery/google/cloud/bigquery/dataset.py index fb41ee2e8a95..c5bed721bab4 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/dataset.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/dataset.py @@ -157,38 +157,31 @@ class Dataset(object): :type dataset_id: str :param dataset_id: the ID of the dataset - :type client: :class:`google.cloud.bigquery.client.Client` - :param client: (Optional) A client which holds credentials and project - configuration for the dataset (which requires a project). - :type access_entries: list of :class:`AccessEntry` :param access_entries: roles granted to entities for this dataset :type project: str - :param project: (Optional) project ID for the dataset (defaults to - the project of the client). + :param project: (Optional) project ID for the dataset. """ _access_entries = None def __init__(self, dataset_id, - client=None, access_entries=(), project=None): self._dataset_id = dataset_id - self._client = client self._properties = {} # Let the @property do validation. self.access_entries = access_entries - self._project = project or (client and client.project) + self._project = project @property def project(self): """Project bound to the dataset. :rtype: str - :returns: the project (derived from the client). + :returns: the project. """ return self._project @@ -373,25 +366,21 @@ def location(self, value): self._properties['location'] = value @classmethod - def from_api_repr(cls, resource, client): + def from_api_repr(cls, resource): """Factory: construct a dataset given its API representation :type resource: dict :param resource: dataset resource representation returned from the API - :type client: :class:`google.cloud.bigquery.client.Client` - :param client: Client which holds credentials and project - configuration for the dataset. - :rtype: :class:`google.cloud.bigquery.dataset.Dataset` :returns: Dataset parsed from ``resource``. """ - if ('datasetReference' not in resource or - 'datasetId' not in resource['datasetReference']): + dsr = resource.get('datasetReference') + if dsr is None or 'datasetId' not in dsr: raise KeyError('Resource lacks required identity information:' '["datasetReference"]["datasetId"]') - dataset_id = resource['datasetReference']['datasetId'] - dataset = cls(dataset_id, client=client) + dataset_id = dsr['datasetId'] + dataset = cls(dataset_id, project=dsr['projectId']) dataset._set_properties(resource) return dataset diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/job.py b/packages/google-cloud-bigquery/google/cloud/bigquery/job.py index 1e0959565074..42f12ac39838 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/job.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/job.py @@ -842,7 +842,8 @@ def from_api_repr(cls, resource, client): """ job_id, config = cls._get_resource_config(resource) dest_config = config['destinationTable'] - dataset = Dataset(dest_config['datasetId'], client) + dataset = Dataset(dest_config['datasetId'], + project=dest_config['projectId']) table_ref = TableReference(dataset, dest_config['tableId']) destination = Table(table_ref, client=client) source_urls = config.get('sourceUris', ()) @@ -958,7 +959,8 @@ def from_api_repr(cls, resource, client): """ job_id, config = cls._get_resource_config(resource) dest_config = config['destinationTable'] - dataset = Dataset(dest_config['datasetId'], client) + dataset = Dataset(dest_config['datasetId'], + project=dest_config['projectId']) table_ref = TableReference(dataset, dest_config['tableId']) destination = Table(table_ref, client=client) sources = [] @@ -970,7 +972,8 @@ def from_api_repr(cls, resource, client): "Resource missing 'sourceTables' / 'sourceTable'") source_configs = [single] for source_config in source_configs: - dataset = Dataset(source_config['datasetId'], client) + dataset = Dataset(source_config['datasetId'], + project=source_config['projectId']) table_ref = TableReference(dataset, source_config['tableId']) sources.append(Table(table_ref, client=client)) job = cls(job_id, destination, sources, client=client) @@ -1423,8 +1426,7 @@ def _copy_configuration_properties(self, configuration): dest_local = self._destination_table_resource() if dest_remote != dest_local: project = dest_remote['projectId'] - dataset = Dataset( - dest_remote['datasetId'], self._client, project=project) + dataset = Dataset(dest_remote['datasetId'], project=project) self.destination = dataset.table(dest_remote['tableId']) def_ds = configuration.get('defaultDataset') @@ -1432,8 +1434,8 @@ def _copy_configuration_properties(self, configuration): if self.default_dataset is not None: del self.default_dataset else: - project = def_ds['projectId'] - self.default_dataset = Dataset(def_ds['datasetId'], self._client) + self.default_dataset = Dataset(def_ds['datasetId'], + project=def_ds['projectId']) udf_resources = [] for udf_mapping in configuration.get(self._UDF_KEY, ()): @@ -1579,7 +1581,6 @@ def referenced_tables(self): if the query has not yet completed. """ tables = [] - client = self._require_client(None) datasets_by_project_name = {} for table in self._job_statistics().get('referencedTables', ()): @@ -1589,7 +1590,7 @@ def referenced_tables(self): ds_name = table['datasetId'] t_dataset = datasets_by_project_name.get((t_project, ds_name)) if t_dataset is None: - t_dataset = Dataset(ds_name, client, project=t_project) + t_dataset = Dataset(ds_name, project=t_project) datasets_by_project_name[(t_project, ds_name)] = t_dataset t_name = table['tableId'] diff --git a/packages/google-cloud-bigquery/tests/system.py b/packages/google-cloud-bigquery/tests/system.py index 82763d89724b..d621a661f06d 100644 --- a/packages/google-cloud-bigquery/tests/system.py +++ b/packages/google-cloud-bigquery/tests/system.py @@ -117,11 +117,12 @@ def test_create_dataset(self): self.assertTrue(_dataset_exists(dataset)) self.assertEqual(dataset.dataset_id, DATASET_ID) + self.assertEqual(dataset.project, Config.CLIENT.project) def test_get_dataset(self): DATASET_ID = _make_dataset_id('get_dataset') client = Config.CLIENT - dataset_arg = Dataset(DATASET_ID) + dataset_arg = Dataset(DATASET_ID, project=client.project) dataset_arg.friendly_name = 'Friendly' dataset_arg.description = 'Description' dataset = retry_403(client.create_dataset)(dataset_arg) @@ -1195,7 +1196,7 @@ def test_dump_table_w_public_data(self): DATASET_ID = 'samples' TABLE_NAME = 'natality' - dataset = Dataset(DATASET_ID, Config.CLIENT, project=PUBLIC) + dataset = Dataset(DATASET_ID, project=PUBLIC) table_ref = dataset.table(TABLE_NAME) table = Config.CLIENT.get_table(table_ref) self._fetch_single_page(table) diff --git a/packages/google-cloud-bigquery/tests/unit/test_dataset.py b/packages/google-cloud-bigquery/tests/unit/test_dataset.py index c2fa2a024f17..89114196d828 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_dataset.py +++ b/packages/google-cloud-bigquery/tests/unit/test_dataset.py @@ -210,11 +210,9 @@ def _verify_resource_properties(self, dataset, resource): self.assertEqual(dataset.access_entries, []) def test_ctor_defaults(self): - client = _Client(self.PROJECT) - dataset = self._make_one(self.DS_ID, client) + dataset = self._make_one(self.DS_ID, project=self.PROJECT) self.assertEqual(dataset.dataset_id, self.DS_ID) - self.assertIs(dataset._client, client) - self.assertEqual(dataset.project, client.project) + self.assertEqual(dataset.project, self.PROJECT) self.assertEqual( dataset.path, '/projects/%s/datasets/%s' % (self.PROJECT, self.DS_ID)) @@ -238,12 +236,10 @@ def test_ctor_explicit(self): bharney = AccessEntry('OWNER', 'userByEmail', 'bharney@example.com') entries = [phred, bharney] OTHER_PROJECT = 'foo-bar-123' - client = _Client(self.PROJECT) - dataset = self._make_one(self.DS_ID, client, + dataset = self._make_one(self.DS_ID, access_entries=entries, project=OTHER_PROJECT) self.assertEqual(dataset.dataset_id, self.DS_ID) - self.assertIs(dataset._client, client) self.assertEqual(dataset.project, OTHER_PROJECT) self.assertEqual( dataset.path, @@ -262,16 +258,14 @@ def test_ctor_explicit(self): self.assertIsNone(dataset.location) def test_access_entries_setter_non_list(self): - client = _Client(self.PROJECT) - dataset = self._make_one(self.DS_ID, client) + dataset = self._make_one(self.DS_ID) with self.assertRaises(TypeError): dataset.access_entries = object() def test_access_entries_setter_invalid_field(self): from google.cloud.bigquery.dataset import AccessEntry - client = _Client(self.PROJECT) - dataset = self._make_one(self.DS_ID, client) + dataset = self._make_one(self.DS_ID) phred = AccessEntry('OWNER', 'userByEmail', 'phred@example.com') with self.assertRaises(ValueError): dataset.access_entries = [phred, object()] @@ -279,72 +273,61 @@ def test_access_entries_setter_invalid_field(self): def test_access_entries_setter(self): from google.cloud.bigquery.dataset import AccessEntry - client = _Client(self.PROJECT) - dataset = self._make_one(self.DS_ID, client) + dataset = self._make_one(self.DS_ID) phred = AccessEntry('OWNER', 'userByEmail', 'phred@example.com') bharney = AccessEntry('OWNER', 'userByEmail', 'bharney@example.com') dataset.access_entries = [phred, bharney] self.assertEqual(dataset.access_entries, [phred, bharney]) def test_default_table_expiration_ms_setter_bad_value(self): - client = _Client(self.PROJECT) - dataset = self._make_one(self.DS_ID, client) + dataset = self._make_one(self.DS_ID) with self.assertRaises(ValueError): dataset.default_table_expiration_ms = 'bogus' def test_default_table_expiration_ms_setter(self): - client = _Client(self.PROJECT) - dataset = self._make_one(self.DS_ID, client) + dataset = self._make_one(self.DS_ID) dataset.default_table_expiration_ms = 12345 self.assertEqual(dataset.default_table_expiration_ms, 12345) def test_description_setter_bad_value(self): - client = _Client(self.PROJECT) - dataset = self._make_one(self.DS_ID, client) + dataset = self._make_one(self.DS_ID) with self.assertRaises(ValueError): dataset.description = 12345 def test_description_setter(self): - client = _Client(self.PROJECT) - dataset = self._make_one(self.DS_ID, client) + dataset = self._make_one(self.DS_ID) dataset.description = 'DESCRIPTION' self.assertEqual(dataset.description, 'DESCRIPTION') def test_friendly_name_setter_bad_value(self): - client = _Client(self.PROJECT) - dataset = self._make_one(self.DS_ID, client) + dataset = self._make_one(self.DS_ID) with self.assertRaises(ValueError): dataset.friendly_name = 12345 def test_friendly_name_setter(self): - client = _Client(self.PROJECT) - dataset = self._make_one(self.DS_ID, client) + dataset = self._make_one(self.DS_ID) dataset.friendly_name = 'FRIENDLY' self.assertEqual(dataset.friendly_name, 'FRIENDLY') def test_location_setter_bad_value(self): - client = _Client(self.PROJECT) - dataset = self._make_one(self.DS_ID, client) + dataset = self._make_one(self.DS_ID) with self.assertRaises(ValueError): dataset.location = 12345 def test_location_setter(self): - client = _Client(self.PROJECT) - dataset = self._make_one(self.DS_ID, client) + dataset = self._make_one(self.DS_ID) dataset.location = 'LOCATION' self.assertEqual(dataset.location, 'LOCATION') def test_from_api_repr_missing_identity(self): self._setUpConstants() - client = _Client(self.PROJECT) RESOURCE = {} klass = self._get_target_class() with self.assertRaises(KeyError): - klass.from_api_repr(RESOURCE, client=client) + klass.from_api_repr(RESOURCE) def test_from_api_repr_bare(self): self._setUpConstants() - client = _Client(self.PROJECT) RESOURCE = { 'id': '%s:%s' % (self.PROJECT, self.DS_ID), 'datasetReference': { @@ -353,24 +336,20 @@ def test_from_api_repr_bare(self): } } klass = self._get_target_class() - dataset = klass.from_api_repr(RESOURCE, client=client) - self.assertIs(dataset._client, client) + dataset = klass.from_api_repr(RESOURCE) self._verify_resource_properties(dataset, RESOURCE) def test_from_api_repr_w_properties(self): - client = _Client(self.PROJECT) RESOURCE = self._makeResource() klass = self._get_target_class() - dataset = klass.from_api_repr(RESOURCE, client=client) - self.assertIs(dataset._client, client) + dataset = klass.from_api_repr(RESOURCE) self._verify_resource_properties(dataset, RESOURCE) def test__parse_access_entries_w_unknown_entity_type(self): ACCESS = [ {'role': 'READER', 'unknown': 'UNKNOWN'}, ] - client = _Client(self.PROJECT) - dataset = self._make_one(self.DS_ID, client=client) + dataset = self._make_one(self.DS_ID) with self.assertRaises(ValueError): dataset._parse_access_entries(ACCESS) @@ -383,24 +362,16 @@ def test__parse_access_entries_w_extra_keys(self): 'userByEmail': USER_EMAIL, }, ] - client = _Client(self.PROJECT) - dataset = self._make_one(self.DS_ID, client=client) + dataset = self._make_one(self.DS_ID) with self.assertRaises(ValueError): dataset._parse_access_entries(ACCESS) def test_table(self): from google.cloud.bigquery.table import TableReference - client = _Client(project=self.PROJECT) - dataset = self._make_one(self.DS_ID, client=client) + dataset = self._make_one(self.DS_ID, project=self.PROJECT) table = dataset.table('table_id') self.assertIsInstance(table, TableReference) self.assertEqual(table.table_id, 'table_id') self.assertEqual(table.dataset_id, self.DS_ID) self.assertEqual(table.project, self.PROJECT) - - -class _Client(object): - - def __init__(self, project='project'): - self.project = project diff --git a/packages/google-cloud-bigquery/tests/unit/test_job.py b/packages/google-cloud-bigquery/tests/unit/test_job.py index 7c662d01d8c6..470e802d1150 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_job.py +++ b/packages/google-cloud-bigquery/tests/unit/test_job.py @@ -2139,7 +2139,7 @@ def test_begin_w_bound_client(self): client = _Client(project=self.PROJECT, connection=conn) job = self._make_one(self.JOB_NAME, self.QUERY, client) - job.default_dataset = Dataset(DS_ID, client) + job.default_dataset = Dataset(DS_ID, project=self.PROJECT) job.begin() @@ -2204,7 +2204,7 @@ def test_begin_w_alternate_client(self): job = self._make_one(self.JOB_NAME, self.QUERY, client1) dataset_ref = DatasetReference(self.PROJECT, DS_ID) - dataset = Dataset(DS_ID, client1) + dataset = Dataset(DS_ID, project=self.PROJECT) table_ref = dataset_ref.table(TABLE) job.allow_large_results = True diff --git a/packages/google-cloud-bigquery/tests/unit/test_query.py b/packages/google-cloud-bigquery/tests/unit/test_query.py index 0bf0c17c3102..ee2783744c94 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_query.py +++ b/packages/google-cloud-bigquery/tests/unit/test_query.py @@ -206,7 +206,7 @@ def test_from_query_job(self): job = QueryJob( self.JOB_NAME, self.QUERY, client, udf_resources=[UDFResource("resourceUri", RESOURCE_URI)]) - dataset = job.default_dataset = Dataset(DS_NAME, client) + dataset = job.default_dataset = Dataset(DS_NAME) job.use_query_cache = True job.use_legacy_sql = True klass = self._get_target_class() @@ -744,7 +744,7 @@ def __init__(self, project='project', connection=None): def dataset(self, name): from google.cloud.bigquery.dataset import Dataset - return Dataset(name, client=self) + return Dataset(name) class _Connection(object): From 0a96869e74f20cf7e1f03b58e17a80a6896e819d Mon Sep 17 00:00:00 2001 From: Jonathan Amsterdam Date: Fri, 22 Sep 2017 14:37:57 -0400 Subject: [PATCH 0206/2016] bigquery: dataset labels (#4026) Add a labels property to Dataset, gettable and settable. --- .../google/cloud/bigquery/dataset.py | 32 ++++++++++++++++++- .../google-cloud-bigquery/tests/system.py | 20 +++++++++--- .../tests/unit/test_client.py | 13 +++++++- .../tests/unit/test_dataset.py | 10 ++++++ 4 files changed, 69 insertions(+), 6 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/dataset.py b/packages/google-cloud-bigquery/google/cloud/bigquery/dataset.py index c5bed721bab4..105772261449 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/dataset.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/dataset.py @@ -171,7 +171,7 @@ def __init__(self, access_entries=(), project=None): self._dataset_id = dataset_id - self._properties = {} + self._properties = {'labels': {}} # Let the @property do validation. self.access_entries = access_entries self._project = project @@ -365,6 +365,32 @@ def location(self, value): raise ValueError("Pass a string, or None") self._properties['location'] = value + @property + def labels(self): + """Labels for the dataset. + + This method always returns a dict. To change a dataset's labels, + modify the dict, then call ``Client.update_dataset``. To delete a + label, set its value to ``None`` before updating. + + :rtype: dict, {str -> str} + :returns: A dict of the the dataset's labels. + """ + return self._properties['labels'] + + @labels.setter + def labels(self, value): + """Update labels for the dataset. + + :type value: dict, {str -> str} + :param value: new labels + + :raises: ValueError for invalid value types. + """ + if not isinstance(value, dict): + raise ValueError("Pass a dict") + self._properties['labels'] = value + @classmethod def from_api_repr(cls, resource): """Factory: construct a dataset given its API representation @@ -427,6 +453,8 @@ def _set_properties(self, api_response): if 'defaultTableExpirationMs' in cleaned: cleaned['defaultTableExpirationMs'] = int( cleaned['defaultTableExpirationMs']) + if 'labels' not in cleaned: + cleaned['labels'] = {} self._properties.update(cleaned) def _build_access_resource(self): @@ -461,6 +489,8 @@ def _build_resource(self): if len(self.access_entries) > 0: resource['access'] = self._build_access_resource() + resource['labels'] = self.labels # labels is never None + return resource def table(self, table_id): diff --git a/packages/google-cloud-bigquery/tests/system.py b/packages/google-cloud-bigquery/tests/system.py index d621a661f06d..e1fd218c8580 100644 --- a/packages/google-cloud-bigquery/tests/system.py +++ b/packages/google-cloud-bigquery/tests/system.py @@ -136,21 +136,33 @@ def test_get_dataset(self): def test_update_dataset(self): dataset = retry_403(Config.CLIENT.create_dataset)( - Dataset(_make_dataset_id('patch_dataset'))) + Dataset(_make_dataset_id('update_dataset'))) self.to_delete.append(dataset) self.assertTrue(_dataset_exists(dataset)) self.assertIsNone(dataset.friendly_name) self.assertIsNone(dataset.description) + self.assertEquals(dataset.labels, {}) + dataset.friendly_name = 'Friendly' dataset.description = 'Description' - ds2 = Config.CLIENT.update_dataset(dataset, - ['friendly_name', 'description']) + dataset.labels = {'priority': 'high', 'color': 'blue'} + ds2 = Config.CLIENT.update_dataset( + dataset, + ('friendly_name', 'description', 'labels')) self.assertEqual(ds2.friendly_name, 'Friendly') self.assertEqual(ds2.description, 'Description') + self.assertEqual(ds2.labels, {'priority': 'high', 'color': 'blue'}) - # TODO(jba): test that read-modify-write with ETag works. + ds2.labels = { + 'color': 'green', # change + 'shape': 'circle', # add + 'priority': None, # delete + } + ds3 = Config.CLIENT.update_dataset(ds2, ['labels']) + self.assertEqual(ds3.labels, {'color': 'green', 'shape': 'circle'}) + # TODO(jba): test that read-modify-write with ETag works. def test_list_datasets(self): datasets_to_create = [ 'new' + unique_resource_id(), diff --git a/packages/google-cloud-bigquery/tests/unit/test_client.py b/packages/google-cloud-bigquery/tests/unit/test_client.py index 8916e9682ac1..ec12be72efae 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_client.py +++ b/packages/google-cloud-bigquery/tests/unit/test_client.py @@ -321,6 +321,7 @@ def test_create_dataset_minimal(self): SENT = { 'datasetReference': {'projectId': PROJECT, 'datasetId': DS_ID}, + 'labels': {}, } self.assertEqual(req['data'], SENT) self.assertEqual(ds.dataset_id, DS_ID) @@ -338,6 +339,7 @@ def test_create_dataset_w_attrs(self): FRIENDLY_NAME = 'FN' LOCATION = 'US' USER_EMAIL = 'phred@example.com' + LABELS = {'color': 'red'} VIEW = { 'projectId': 'my-proj', 'datasetId': 'starry-skies', @@ -352,6 +354,7 @@ def test_create_dataset_w_attrs(self): 'friendlyName': FRIENDLY_NAME, 'location': LOCATION, 'defaultTableExpirationMs': 3600, + 'labels': LABELS, 'access': [ {'role': 'OWNER', 'userByEmail': USER_EMAIL}, {'view': VIEW}], @@ -366,6 +369,7 @@ def test_create_dataset_w_attrs(self): ds_arg.friendly_name = FRIENDLY_NAME ds_arg.default_table_expiration_ms = 3600 ds_arg.location = LOCATION + ds_arg.labels = LABELS ds = client.create_dataset(ds_arg) self.assertEqual(len(conn._requested), 1) req = conn._requested[0] @@ -381,6 +385,7 @@ def test_create_dataset_w_attrs(self): 'access': [ {'role': 'OWNER', 'userByEmail': USER_EMAIL}, {'view': VIEW}], + 'labels': LABELS, } self.assertEqual(req['data'], SENT) self.assertEqual(ds.dataset_id, DS_ID) @@ -391,6 +396,7 @@ def test_create_dataset_w_attrs(self): self.assertEqual(ds.friendly_name, FRIENDLY_NAME) self.assertEqual(ds.location, LOCATION) self.assertEqual(ds.default_table_expiration_ms, 3600) + self.assertEqual(ds.labels, LABELS) def test_get_table(self): project = 'PROJECT' @@ -439,6 +445,7 @@ def test_update_dataset(self): DESCRIPTION = 'DESCRIPTION' FRIENDLY_NAME = 'TITLE' LOCATION = 'loc' + LABELS = {'priority': 'high'} EXP = 17 RESOURCE = { 'datasetReference': @@ -448,6 +455,7 @@ def test_update_dataset(self): 'friendlyName': FRIENDLY_NAME, 'location': LOCATION, 'defaultTableExpirationMs': EXP, + 'labels': LABELS, } creds = _make_credentials() client = self._make_one(project=PROJECT, credentials=creds) @@ -457,8 +465,9 @@ def test_update_dataset(self): ds.friendly_name = FRIENDLY_NAME ds.location = LOCATION ds.default_table_expiration_ms = EXP + ds.labels = LABELS ds2 = client.update_dataset( - ds, ['description', 'friendly_name', 'location']) + ds, ['description', 'friendly_name', 'location', 'labels']) self.assertEqual(len(conn._requested), 1) req = conn._requested[0] self.assertEqual(req['method'], 'PATCH') @@ -466,6 +475,7 @@ def test_update_dataset(self): 'description': DESCRIPTION, 'friendlyName': FRIENDLY_NAME, 'location': LOCATION, + 'labels': LABELS, } self.assertEqual(req['data'], SENT) self.assertEqual(req['path'], '/' + PATH) @@ -473,6 +483,7 @@ def test_update_dataset(self): self.assertEqual(ds2.description, ds.description) self.assertEqual(ds2.friendly_name, ds.friendly_name) self.assertEqual(ds2.location, ds.location) + self.assertEqual(ds2.labels, ds.labels) # ETag becomes If-Match header. ds._properties['etag'] = 'etag' diff --git a/packages/google-cloud-bigquery/tests/unit/test_dataset.py b/packages/google-cloud-bigquery/tests/unit/test_dataset.py index 89114196d828..9d13ebb9bc4b 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_dataset.py +++ b/packages/google-cloud-bigquery/tests/unit/test_dataset.py @@ -319,6 +319,16 @@ def test_location_setter(self): dataset.location = 'LOCATION' self.assertEqual(dataset.location, 'LOCATION') + def test_labels_setter(self): + dataset = self._make_one(self.DS_ID) + dataset.labels = {'color': 'green'} + self.assertEqual(dataset.labels, {'color': 'green'}) + + def test_labels_setter_bad_value(self): + dataset = self._make_one(self.DS_ID) + with self.assertRaises(ValueError): + dataset.labels = None + def test_from_api_repr_missing_identity(self): self._setUpConstants() RESOURCE = {} From de259e8d993f9cc15c154712c57c36248eb1179e Mon Sep 17 00:00:00 2001 From: Jonathan Amsterdam Date: Fri, 22 Sep 2017 15:45:52 -0400 Subject: [PATCH 0207/2016] bigquery: Dataset constructor takes a DatasetReference (#4036) --- .../google/cloud/bigquery/client.py | 4 - .../google/cloud/bigquery/dataset.py | 30 +++---- .../google/cloud/bigquery/job.py | 32 +++---- .../google-cloud-bigquery/tests/system.py | 86 ++++++------------- .../tests/unit/test_client.py | 12 +-- .../tests/unit/test_dataset.py | 50 ++++++----- .../tests/unit/test_job.py | 4 +- .../tests/unit/test_query.py | 13 +-- 8 files changed, 100 insertions(+), 131 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py index f40904c2057e..1bd46e407968 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py @@ -185,8 +185,6 @@ def create_dataset(self, dataset): :rtype: ":class:`~google.cloud.bigquery.dataset.Dataset`" :returns: a new ``Dataset`` returned from the service. """ - if dataset.project is None: - dataset._project = self.project path = '/projects/%s/datasets' % (dataset.project,) api_response = self._connection.api_request( method='POST', path=path, data=dataset._build_resource()) @@ -244,8 +242,6 @@ def update_dataset(self, dataset, fields): :rtype: :class:`google.cloud.bigquery.dataset.Dataset` :returns: the modified ``Dataset`` instance """ - if dataset.project is None: - dataset._project = self.project path = '/projects/%s/datasets/%s' % (dataset.project, dataset.dataset_id) partial = {} diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/dataset.py b/packages/google-cloud-bigquery/google/cloud/bigquery/dataset.py index 105772261449..fc641c3a4d58 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/dataset.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/dataset.py @@ -106,6 +106,10 @@ class DatasetReference(object): """ def __init__(self, project, dataset_id): + if not isinstance(project, six.string_types): + raise ValueError("Pass a string for project") + if not isinstance(dataset_id, six.string_types): + raise ValueError("Pass a string for dataset_id") self._project = project self._dataset_id = dataset_id @@ -154,27 +158,15 @@ class Dataset(object): See https://cloud.google.com/bigquery/docs/reference/rest/v2/datasets - :type dataset_id: str - :param dataset_id: the ID of the dataset - - :type access_entries: list of :class:`AccessEntry` - :param access_entries: roles granted to entities for this dataset - - :type project: str - :param project: (Optional) project ID for the dataset. + :type dataset_ref: :class:`~google.cloud.bigquery.dataset.DatasetReference` + :param dataset_ref: a pointer to a dataset """ - _access_entries = None - - def __init__(self, - dataset_id, - access_entries=(), - project=None): - self._dataset_id = dataset_id + def __init__(self, dataset_ref): + self._project = dataset_ref.project + self._dataset_id = dataset_ref.dataset_id self._properties = {'labels': {}} - # Let the @property do validation. - self.access_entries = access_entries - self._project = project + self._access_entries = () @property def project(self): @@ -406,7 +398,7 @@ def from_api_repr(cls, resource): raise KeyError('Resource lacks required identity information:' '["datasetReference"]["datasetId"]') dataset_id = dsr['datasetId'] - dataset = cls(dataset_id, project=dsr['projectId']) + dataset = cls(DatasetReference(dsr['projectId'], dataset_id)) dataset._set_properties(resource) return dataset diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/job.py b/packages/google-cloud-bigquery/google/cloud/bigquery/job.py index 42f12ac39838..84cca80e22a0 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/job.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/job.py @@ -842,8 +842,9 @@ def from_api_repr(cls, resource, client): """ job_id, config = cls._get_resource_config(resource) dest_config = config['destinationTable'] - dataset = Dataset(dest_config['datasetId'], - project=dest_config['projectId']) + ds_ref = DatasetReference(dest_config['projectId'], + dest_config['datasetId'],) + dataset = Dataset(ds_ref) table_ref = TableReference(dataset, dest_config['tableId']) destination = Table(table_ref, client=client) source_urls = config.get('sourceUris', ()) @@ -959,8 +960,9 @@ def from_api_repr(cls, resource, client): """ job_id, config = cls._get_resource_config(resource) dest_config = config['destinationTable'] - dataset = Dataset(dest_config['datasetId'], - project=dest_config['projectId']) + ds_ref = DatasetReference(dest_config['projectId'], + dest_config['datasetId'],) + dataset = Dataset(ds_ref) table_ref = TableReference(dataset, dest_config['tableId']) destination = Table(table_ref, client=client) sources = [] @@ -972,9 +974,9 @@ def from_api_repr(cls, resource, client): "Resource missing 'sourceTables' / 'sourceTable'") source_configs = [single] for source_config in source_configs: - dataset = Dataset(source_config['datasetId'], - project=source_config['projectId']) - table_ref = TableReference(dataset, source_config['tableId']) + ds_ref = DatasetReference(source_config['projectId'], + source_config['datasetId']) + table_ref = ds_ref.table(source_config['tableId']) sources.append(Table(table_ref, client=client)) job = cls(job_id, destination, sources, client=client) job._set_properties(resource) @@ -1426,7 +1428,8 @@ def _copy_configuration_properties(self, configuration): dest_local = self._destination_table_resource() if dest_remote != dest_local: project = dest_remote['projectId'] - dataset = Dataset(dest_remote['datasetId'], project=project) + dataset = Dataset(DatasetReference(project, + dest_remote['datasetId'])) self.destination = dataset.table(dest_remote['tableId']) def_ds = configuration.get('defaultDataset') @@ -1434,9 +1437,8 @@ def _copy_configuration_properties(self, configuration): if self.default_dataset is not None: del self.default_dataset else: - self.default_dataset = Dataset(def_ds['datasetId'], - project=def_ds['projectId']) - + self.default_dataset = Dataset( + DatasetReference(def_ds['projectId'], def_ds['datasetId'])) udf_resources = [] for udf_mapping in configuration.get(self._UDF_KEY, ()): key_val, = udf_mapping.items() @@ -1587,11 +1589,11 @@ def referenced_tables(self): t_project = table['projectId'] - ds_name = table['datasetId'] - t_dataset = datasets_by_project_name.get((t_project, ds_name)) + ds_id = table['datasetId'] + t_dataset = datasets_by_project_name.get((t_project, ds_id)) if t_dataset is None: - t_dataset = Dataset(ds_name, project=t_project) - datasets_by_project_name[(t_project, ds_name)] = t_dataset + t_dataset = DatasetReference(t_project, ds_id) + datasets_by_project_name[(t_project, ds_id)] = t_dataset t_name = table['tableId'] tables.append(t_dataset.table(t_name)) diff --git a/packages/google-cloud-bigquery/tests/system.py b/packages/google-cloud-bigquery/tests/system.py index e1fd218c8580..de19a7da01c5 100644 --- a/packages/google-cloud-bigquery/tests/system.py +++ b/packages/google-cloud-bigquery/tests/system.py @@ -112,8 +112,7 @@ def _still_in_use(bad_request): def test_create_dataset(self): DATASET_ID = _make_dataset_id('create_dataset') - dataset = retry_403(Config.CLIENT.create_dataset)(Dataset(DATASET_ID)) - self.to_delete.append(dataset) + dataset = self.temp_dataset(DATASET_ID) self.assertTrue(_dataset_exists(dataset)) self.assertEqual(dataset.dataset_id, DATASET_ID) @@ -122,7 +121,7 @@ def test_create_dataset(self): def test_get_dataset(self): DATASET_ID = _make_dataset_id('get_dataset') client = Config.CLIENT - dataset_arg = Dataset(DATASET_ID, project=client.project) + dataset_arg = Dataset(client.dataset(DATASET_ID)) dataset_arg.friendly_name = 'Friendly' dataset_arg.description = 'Description' dataset = retry_403(client.create_dataset)(dataset_arg) @@ -135,10 +134,7 @@ def test_get_dataset(self): self.assertEqual(got.description, 'Description') def test_update_dataset(self): - dataset = retry_403(Config.CLIENT.create_dataset)( - Dataset(_make_dataset_id('update_dataset'))) - self.to_delete.append(dataset) - + dataset = self.temp_dataset(_make_dataset_id('update_dataset')) self.assertTrue(_dataset_exists(dataset)) self.assertIsNone(dataset.friendly_name) self.assertIsNone(dataset.description) @@ -163,6 +159,7 @@ def test_update_dataset(self): self.assertEqual(ds3.labels, {'color': 'green', 'shape': 'circle'}) # TODO(jba): test that read-modify-write with ETag works. + def test_list_datasets(self): datasets_to_create = [ 'new' + unique_resource_id(), @@ -170,9 +167,7 @@ def test_list_datasets(self): 'newest' + unique_resource_id(), ] for dataset_id in datasets_to_create: - created_dataset = retry_403(Config.CLIENT.create_dataset)( - Dataset(dataset_id)) - self.to_delete.append(created_dataset) + self.temp_dataset(dataset_id) # Retrieve the datasets. iterator = Config.CLIENT.list_datasets() @@ -184,9 +179,7 @@ def test_list_datasets(self): self.assertEqual(len(created), len(datasets_to_create)) def test_create_table(self): - dataset = retry_403(Config.CLIENT.create_dataset)( - Dataset(_make_dataset_id('create_table'))) - self.to_delete.append(dataset) + dataset = self.temp_dataset(_make_dataset_id('create_table')) TABLE_NAME = 'test_table' full_name = bigquery.SchemaField('full_name', 'STRING', @@ -217,9 +210,7 @@ def test_get_table_w_public_dataset(self): def test_list_dataset_tables(self): DATASET_ID = _make_dataset_id('list_tables') - dataset = retry_403(Config.CLIENT.create_dataset)(Dataset(DATASET_ID)) - self.to_delete.append(dataset) - + dataset = self.temp_dataset(DATASET_ID) # Retrieve tables before any are created for the dataset. iterator = Config.CLIENT.list_dataset_tables(dataset) all_tables = list(iterator) @@ -252,9 +243,7 @@ def test_list_dataset_tables(self): self.assertEqual(len(created), len(tables_to_create)) def test_patch_table(self): - dataset = retry_403(Config.CLIENT.create_dataset)( - Dataset(_make_dataset_id('patch_table'))) - self.to_delete.append(dataset) + dataset = self.temp_dataset(_make_dataset_id('patch_table')) TABLE_NAME = 'test_table' full_name = bigquery.SchemaField('full_name', 'STRING', @@ -273,9 +262,7 @@ def test_patch_table(self): self.assertEqual(table.description, 'Description') def test_update_table(self): - dataset = retry_403(Config.CLIENT.create_dataset)( - Dataset(_make_dataset_id('update_table'))) - self.to_delete.append(dataset) + dataset = self.temp_dataset(_make_dataset_id('update_table')) TABLE_NAME = 'test_table' full_name = bigquery.SchemaField('full_name', 'STRING', @@ -316,10 +303,7 @@ def test_insert_data_then_dump_table(self): ] ROW_IDS = range(len(ROWS)) - dataset = retry_403(Config.CLIENT.create_dataset)( - Dataset(_make_dataset_id('insert_data_then_dump'))) - self.to_delete.append(dataset) - + dataset = self.temp_dataset(_make_dataset_id('insert_data_then_dump')) TABLE_NAME = 'test_table' full_name = bigquery.SchemaField('full_name', 'STRING', mode='REQUIRED') @@ -358,10 +342,7 @@ def test_load_table_from_local_file_then_dump_table(self): ] TABLE_NAME = 'test_table' - dataset = retry_403(Config.CLIENT.create_dataset)( - Dataset(_make_dataset_id('load_local_then_dump'))) - self.to_delete.append(dataset) - + dataset = self.temp_dataset(_make_dataset_id('load_local_then_dump')) full_name = bigquery.SchemaField('full_name', 'STRING', mode='REQUIRED') age = bigquery.SchemaField('age', 'INTEGER', mode='REQUIRED') @@ -406,10 +387,7 @@ def test_load_table_from_local_avro_file_then_dump_table(self): ("orange", 590), ("red", 650)] - dataset = retry_403(Config.CLIENT.create_dataset)( - Dataset(_make_dataset_id('load_local_then_dump'))) - self.to_delete.append(dataset) - + dataset = self.temp_dataset(_make_dataset_id('load_local_then_dump')) table = Table(dataset.table(TABLE_NAME), client=Config.CLIENT) self.to_delete.insert(0, table) @@ -467,9 +445,7 @@ def test_load_table_from_storage_then_dump_table(self): self.to_delete.insert(0, blob) - dataset = retry_403(Config.CLIENT.create_dataset)( - Dataset(_make_dataset_id('load_gcs_then_dump'))) - self.to_delete.append(dataset) + dataset = self.temp_dataset(_make_dataset_id('load_gcs_then_dump')) full_name = bigquery.SchemaField('full_name', 'STRING', mode='REQUIRED') @@ -536,10 +512,7 @@ def test_load_table_from_storage_w_autodetect_schema(self): self.to_delete.insert(0, blob) - dataset = retry_403(Config.CLIENT.create_dataset)( - Dataset(_make_dataset_id('load_gcs_then_dump'))) - self.to_delete.append(dataset) - + dataset = self.temp_dataset(_make_dataset_id('load_gcs_then_dump')) table_ref = dataset.table(table_name) job = Config.CLIENT.load_table_from_storage( @@ -589,9 +562,7 @@ def _load_table_for_extract_table( blob.upload_from_file(csv_read, content_type='text/csv') self.to_delete.insert(0, blob) - dataset = retry_403(Config.CLIENT.create_dataset)( - Dataset(table.dataset_id)) - self.to_delete.append(dataset) + dataset = self.temp_dataset(table.dataset_id) table_ref = dataset.table(table.table_id) job = Config.CLIENT.load_table_from_storage( 'bq_extract_storage_test_' + local_id, table_ref, gs_url) @@ -676,8 +647,7 @@ def test_job_cancel(self): TABLE_NAME = 'test_table' QUERY = 'SELECT * FROM %s.%s' % (DATASET_ID, TABLE_NAME) - dataset = retry_403(Config.CLIENT.create_dataset)(Dataset(DATASET_ID)) - self.to_delete.append(dataset) + dataset = self.temp_dataset(DATASET_ID) full_name = bigquery.SchemaField('full_name', 'STRING', mode='REQUIRED') @@ -866,9 +836,7 @@ def test_dbapi_fetchall(self): def _load_table_for_dml(self, rows, dataset_id, table_id): from google.cloud._testing import _NamedTemporaryFile - dataset = retry_403(Config.CLIENT.create_dataset)(Dataset(dataset_id)) - self.to_delete.append(dataset) - + dataset = self.temp_dataset(dataset_id) greeting = bigquery.SchemaField( 'greeting', 'STRING', mode='NULLABLE') table = Table(dataset.table(table_id), schema=[greeting], @@ -1208,8 +1176,7 @@ def test_dump_table_w_public_data(self): DATASET_ID = 'samples' TABLE_NAME = 'natality' - dataset = Dataset(DATASET_ID, project=PUBLIC) - table_ref = dataset.table(TABLE_NAME) + table_ref = DatasetReference(PUBLIC, DATASET_ID).table(TABLE_NAME) table = Config.CLIENT.get_table(table_ref) self._fetch_single_page(table) @@ -1260,10 +1227,7 @@ def test_insert_nested_nested(self): ('Some value', record) ] table_name = 'test_table' - dataset = retry_403(Config.CLIENT.create_dataset)( - Dataset(_make_dataset_id('issue_2951'))) - self.to_delete.append(dataset) - + dataset = self.temp_dataset(_make_dataset_id('issue_2951')) table = Table(dataset.table(table_name), schema=schema, client=Config.CLIENT) table.create() @@ -1278,10 +1242,8 @@ def test_insert_nested_nested(self): def test_create_table_insert_fetch_nested_schema(self): table_name = 'test_table' - dataset = retry_403(Config.CLIENT.create_dataset)( - Dataset(_make_dataset_id('create_table_nested_schema'))) - self.to_delete.append(dataset) - + dataset = self.temp_dataset( + _make_dataset_id('create_table_nested_schema')) schema = _load_json_schema() table = Table(dataset.table(table_name), schema=schema, client=Config.CLIENT) @@ -1339,6 +1301,12 @@ def test_create_table_insert_fetch_nested_schema(self): e_favtime = datetime.datetime(*parts[0:6]) self.assertEqual(found[7], e_favtime) # FavoriteTime + def temp_dataset(self, dataset_id): + dataset = retry_403(Config.CLIENT.create_dataset)( + Dataset(Config.CLIENT.dataset(dataset_id))) + self.to_delete.append(dataset) + return dataset + def _job_done(instance): return instance.state.lower() == 'done' diff --git a/packages/google-cloud-bigquery/tests/unit/test_client.py b/packages/google-cloud-bigquery/tests/unit/test_client.py index ec12be72efae..50c324ebfc32 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_client.py +++ b/packages/google-cloud-bigquery/tests/unit/test_client.py @@ -313,7 +313,7 @@ def test_create_dataset_minimal(self): creds = _make_credentials() client = self._make_one(project=PROJECT, credentials=creds) conn = client._connection = _Connection(RESOURCE) - ds = client.create_dataset(Dataset(DS_ID)) + ds = client.create_dataset(Dataset(client.dataset(DS_ID))) self.assertEqual(len(conn._requested), 1) req = conn._requested[0] self.assertEqual(req['method'], 'POST') @@ -364,7 +364,8 @@ def test_create_dataset_w_attrs(self): conn = client._connection = _Connection(RESOURCE) entries = [AccessEntry('OWNER', 'userByEmail', USER_EMAIL), AccessEntry(None, 'view', VIEW)] - ds_arg = Dataset(DS_ID, project=PROJECT, access_entries=entries) + ds_arg = Dataset(client.dataset(DS_ID)) + ds_arg.access_entries = entries ds_arg.description = DESCRIPTION ds_arg.friendly_name = FRIENDLY_NAME ds_arg.default_table_expiration_ms = 3600 @@ -434,7 +435,7 @@ def test_update_dataset_w_invalid_field(self): creds = _make_credentials() client = self._make_one(project=PROJECT, credentials=creds) with self.assertRaises(ValueError): - client.update_dataset(Dataset(DS_ID), ["foo"]) + client.update_dataset(Dataset(client.dataset(DS_ID)), ["foo"]) def test_update_dataset(self): from google.cloud.bigquery.dataset import Dataset @@ -460,7 +461,7 @@ def test_update_dataset(self): creds = _make_credentials() client = self._make_one(project=PROJECT, credentials=creds) conn = client._connection = _Connection(RESOURCE, RESOURCE) - ds = Dataset(DS_ID, project=PROJECT) + ds = Dataset(client.dataset(DS_ID)) ds.description = DESCRIPTION ds.friendly_name = FRIENDLY_NAME ds.location = LOCATION @@ -636,7 +637,8 @@ def test_delete_dataset(self): creds = _make_credentials() client = self._make_one(project=PROJECT, credentials=creds) conn = client._connection = _Connection({}, {}) - for arg in (client.dataset(DS_ID), Dataset(DS_ID, project=PROJECT)): + ds_ref = client.dataset(DS_ID) + for arg in (ds_ref, Dataset(ds_ref)): client.delete_dataset(arg) req = conn._requested[0] self.assertEqual(req['method'], 'DELETE') diff --git a/packages/google-cloud-bigquery/tests/unit/test_dataset.py b/packages/google-cloud-bigquery/tests/unit/test_dataset.py index 9d13ebb9bc4b..ced77990a65d 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_dataset.py +++ b/packages/google-cloud-bigquery/tests/unit/test_dataset.py @@ -101,6 +101,12 @@ def test_ctor_defaults(self): self.assertEqual(dataset_ref.project, 'some-project-1') self.assertEqual(dataset_ref.dataset_id, 'dataset_1') + def test_ctor_bad_args(self): + with self.assertRaises(ValueError): + self._make_one(1, 'd') + with self.assertRaises(ValueError): + self._make_one('p', 2) + def test_table(self): dataset_ref = self._make_one('some-project-1', 'dataset_1') table_ref = dataset_ref.table('table_1') @@ -110,8 +116,11 @@ def test_table(self): class TestDataset(unittest.TestCase): + from google.cloud.bigquery.dataset import DatasetReference + PROJECT = 'project' DS_ID = 'dataset-id' + DS_REF = DatasetReference(PROJECT, DS_ID) @staticmethod def _get_target_class(): @@ -210,7 +219,7 @@ def _verify_resource_properties(self, dataset, resource): self.assertEqual(dataset.access_entries, []) def test_ctor_defaults(self): - dataset = self._make_one(self.DS_ID, project=self.PROJECT) + dataset = self._make_one(self.DS_REF) self.assertEqual(dataset.dataset_id, self.DS_ID) self.assertEqual(dataset.project, self.PROJECT) self.assertEqual( @@ -230,15 +239,14 @@ def test_ctor_defaults(self): self.assertIsNone(dataset.location) def test_ctor_explicit(self): - from google.cloud.bigquery.dataset import AccessEntry + from google.cloud.bigquery.dataset import DatasetReference, AccessEntry phred = AccessEntry('OWNER', 'userByEmail', 'phred@example.com') bharney = AccessEntry('OWNER', 'userByEmail', 'bharney@example.com') entries = [phred, bharney] OTHER_PROJECT = 'foo-bar-123' - dataset = self._make_one(self.DS_ID, - access_entries=entries, - project=OTHER_PROJECT) + dataset = self._make_one(DatasetReference(OTHER_PROJECT, self.DS_ID)) + dataset.access_entries = entries self.assertEqual(dataset.dataset_id, self.DS_ID) self.assertEqual(dataset.project, OTHER_PROJECT) self.assertEqual( @@ -258,14 +266,14 @@ def test_ctor_explicit(self): self.assertIsNone(dataset.location) def test_access_entries_setter_non_list(self): - dataset = self._make_one(self.DS_ID) + dataset = self._make_one(self.DS_REF) with self.assertRaises(TypeError): dataset.access_entries = object() def test_access_entries_setter_invalid_field(self): from google.cloud.bigquery.dataset import AccessEntry - dataset = self._make_one(self.DS_ID) + dataset = self._make_one(self.DS_REF) phred = AccessEntry('OWNER', 'userByEmail', 'phred@example.com') with self.assertRaises(ValueError): dataset.access_entries = [phred, object()] @@ -273,59 +281,59 @@ def test_access_entries_setter_invalid_field(self): def test_access_entries_setter(self): from google.cloud.bigquery.dataset import AccessEntry - dataset = self._make_one(self.DS_ID) + dataset = self._make_one(self.DS_REF) phred = AccessEntry('OWNER', 'userByEmail', 'phred@example.com') bharney = AccessEntry('OWNER', 'userByEmail', 'bharney@example.com') dataset.access_entries = [phred, bharney] self.assertEqual(dataset.access_entries, [phred, bharney]) def test_default_table_expiration_ms_setter_bad_value(self): - dataset = self._make_one(self.DS_ID) + dataset = self._make_one(self.DS_REF) with self.assertRaises(ValueError): dataset.default_table_expiration_ms = 'bogus' def test_default_table_expiration_ms_setter(self): - dataset = self._make_one(self.DS_ID) + dataset = self._make_one(self.DS_REF) dataset.default_table_expiration_ms = 12345 self.assertEqual(dataset.default_table_expiration_ms, 12345) def test_description_setter_bad_value(self): - dataset = self._make_one(self.DS_ID) + dataset = self._make_one(self.DS_REF) with self.assertRaises(ValueError): dataset.description = 12345 def test_description_setter(self): - dataset = self._make_one(self.DS_ID) + dataset = self._make_one(self.DS_REF) dataset.description = 'DESCRIPTION' self.assertEqual(dataset.description, 'DESCRIPTION') def test_friendly_name_setter_bad_value(self): - dataset = self._make_one(self.DS_ID) + dataset = self._make_one(self.DS_REF) with self.assertRaises(ValueError): dataset.friendly_name = 12345 def test_friendly_name_setter(self): - dataset = self._make_one(self.DS_ID) + dataset = self._make_one(self.DS_REF) dataset.friendly_name = 'FRIENDLY' self.assertEqual(dataset.friendly_name, 'FRIENDLY') def test_location_setter_bad_value(self): - dataset = self._make_one(self.DS_ID) + dataset = self._make_one(self.DS_REF) with self.assertRaises(ValueError): dataset.location = 12345 def test_location_setter(self): - dataset = self._make_one(self.DS_ID) + dataset = self._make_one(self.DS_REF) dataset.location = 'LOCATION' self.assertEqual(dataset.location, 'LOCATION') def test_labels_setter(self): - dataset = self._make_one(self.DS_ID) + dataset = self._make_one(self.DS_REF) dataset.labels = {'color': 'green'} self.assertEqual(dataset.labels, {'color': 'green'}) def test_labels_setter_bad_value(self): - dataset = self._make_one(self.DS_ID) + dataset = self._make_one(self.DS_REF) with self.assertRaises(ValueError): dataset.labels = None @@ -359,7 +367,7 @@ def test__parse_access_entries_w_unknown_entity_type(self): ACCESS = [ {'role': 'READER', 'unknown': 'UNKNOWN'}, ] - dataset = self._make_one(self.DS_ID) + dataset = self._make_one(self.DS_REF) with self.assertRaises(ValueError): dataset._parse_access_entries(ACCESS) @@ -372,14 +380,14 @@ def test__parse_access_entries_w_extra_keys(self): 'userByEmail': USER_EMAIL, }, ] - dataset = self._make_one(self.DS_ID) + dataset = self._make_one(self.DS_REF) with self.assertRaises(ValueError): dataset._parse_access_entries(ACCESS) def test_table(self): from google.cloud.bigquery.table import TableReference - dataset = self._make_one(self.DS_ID, project=self.PROJECT) + dataset = self._make_one(self.DS_REF) table = dataset.table('table_id') self.assertIsInstance(table, TableReference) self.assertEqual(table.table_id, 'table_id') diff --git a/packages/google-cloud-bigquery/tests/unit/test_job.py b/packages/google-cloud-bigquery/tests/unit/test_job.py index 470e802d1150..029db44cd534 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_job.py +++ b/packages/google-cloud-bigquery/tests/unit/test_job.py @@ -2139,7 +2139,7 @@ def test_begin_w_bound_client(self): client = _Client(project=self.PROJECT, connection=conn) job = self._make_one(self.JOB_NAME, self.QUERY, client) - job.default_dataset = Dataset(DS_ID, project=self.PROJECT) + job.default_dataset = Dataset(DatasetReference(self.PROJECT, DS_ID)) job.begin() @@ -2204,7 +2204,7 @@ def test_begin_w_alternate_client(self): job = self._make_one(self.JOB_NAME, self.QUERY, client1) dataset_ref = DatasetReference(self.PROJECT, DS_ID) - dataset = Dataset(DS_ID, project=self.PROJECT) + dataset = Dataset(dataset_ref) table_ref = dataset_ref.table(TABLE) job.allow_large_results = True diff --git a/packages/google-cloud-bigquery/tests/unit/test_query.py b/packages/google-cloud-bigquery/tests/unit/test_query.py index ee2783744c94..73f23cb1bf6a 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_query.py +++ b/packages/google-cloud-bigquery/tests/unit/test_query.py @@ -196,17 +196,18 @@ def test_ctor_w_query_parameters(self): self.assertEqual(query.query_parameters, query_parameters) def test_from_query_job(self): - from google.cloud.bigquery.dataset import Dataset + from google.cloud.bigquery.dataset import Dataset, DatasetReference from google.cloud.bigquery.job import QueryJob from google.cloud.bigquery._helpers import UDFResource - DS_NAME = 'DATASET' + DS_ID = 'DATASET' RESOURCE_URI = 'gs://some-bucket/js/lib.js' client = _Client(self.PROJECT) job = QueryJob( self.JOB_NAME, self.QUERY, client, udf_resources=[UDFResource("resourceUri", RESOURCE_URI)]) - dataset = job.default_dataset = Dataset(DS_NAME) + dataset = Dataset(DatasetReference(self.PROJECT, DS_ID)) + job.default_dataset = dataset job.use_query_cache = True job.use_legacy_sql = True klass = self._get_target_class() @@ -741,10 +742,10 @@ def __init__(self, project='project', connection=None): self.project = project self._connection = connection - def dataset(self, name): - from google.cloud.bigquery.dataset import Dataset + def dataset(self, dataset_id): + from google.cloud.bigquery.dataset import Dataset, DatasetReference - return Dataset(name) + return Dataset(DatasetReference(self.project, dataset_id)) class _Connection(object): From da174d5f1b97520a956815bc20e7ef8e1fa1b5f9 Mon Sep 17 00:00:00 2001 From: Jonathan Amsterdam Date: Fri, 22 Sep 2017 16:52:21 -0400 Subject: [PATCH 0208/2016] bigquery: Job and Query default_dataset is DatasetReference (#4037) Change the type of the Job.default_dataset and Query.default_dataset properties to DatasetReference. --- .../google/cloud/bigquery/job.py | 6 +++--- .../google/cloud/bigquery/query.py | 4 ++-- .../google-cloud-bigquery/tests/unit/test_job.py | 14 ++++++-------- .../google-cloud-bigquery/tests/unit/test_query.py | 12 ++++++------ 4 files changed, 17 insertions(+), 19 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/job.py b/packages/google-cloud-bigquery/google/cloud/bigquery/job.py index 84cca80e22a0..a79fc8e53d20 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/job.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/job.py @@ -1248,7 +1248,7 @@ def __init__(self, job_id, query, client, https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query.createDisposition """ - default_dataset = _TypedProperty('default_dataset', Dataset) + default_dataset = _TypedProperty('default_dataset', DatasetReference) """See https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query.defaultDataset """ @@ -1437,8 +1437,8 @@ def _copy_configuration_properties(self, configuration): if self.default_dataset is not None: del self.default_dataset else: - self.default_dataset = Dataset( - DatasetReference(def_ds['projectId'], def_ds['datasetId'])) + self.default_dataset = DatasetReference( + def_ds['projectId'], def_ds['datasetId']) udf_resources = [] for udf_mapping in configuration.get(self._UDF_KEY, ()): key_val, = udf_mapping.items() diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/query.py b/packages/google-cloud-bigquery/google/cloud/bigquery/query.py index 7abbbec76b9b..38400659bdaf 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/query.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/query.py @@ -19,7 +19,7 @@ from google.api.core import page_iterator from google.cloud.bigquery._helpers import _TypedProperty from google.cloud.bigquery._helpers import _rows_from_json -from google.cloud.bigquery.dataset import Dataset +from google.cloud.bigquery.dataset import DatasetReference from google.cloud.bigquery.job import QueryJob from google.cloud.bigquery.table import _parse_schema_resource from google.cloud.bigquery._helpers import QueryParametersProperty @@ -273,7 +273,7 @@ def schema(self): """ return _parse_schema_resource(self._properties.get('schema', {})) - default_dataset = _TypedProperty('default_dataset', Dataset) + default_dataset = _TypedProperty('default_dataset', DatasetReference) """See https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs/query#defaultDataset """ diff --git a/packages/google-cloud-bigquery/tests/unit/test_job.py b/packages/google-cloud-bigquery/tests/unit/test_job.py index 029db44cd534..ca348704127c 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_job.py +++ b/packages/google-cloud-bigquery/tests/unit/test_job.py @@ -1608,10 +1608,10 @@ def _verifyResourceProperties(self, job, resource): else: self.assertIsNone(job.create_disposition) if 'defaultDataset' in query_config: - dataset = job.default_dataset + ds_ref = job.default_dataset ds_ref = { - 'projectId': dataset.project, - 'datasetId': dataset.dataset_id, + 'projectId': ds_ref.project, + 'datasetId': ds_ref.dataset_id, } self.assertEqual(ds_ref, query_config['defaultDataset']) else: @@ -2125,7 +2125,7 @@ def test_result_error(self): self.assertEqual(exc_info.exception.code, http_client.BAD_REQUEST) def test_begin_w_bound_client(self): - from google.cloud.bigquery.dataset import Dataset + from google.cloud.bigquery.dataset import DatasetReference PATH = '/projects/%s/jobs' % (self.PROJECT,) DS_ID = 'DATASET' @@ -2139,7 +2139,7 @@ def test_begin_w_bound_client(self): client = _Client(project=self.PROJECT, connection=conn) job = self._make_one(self.JOB_NAME, self.QUERY, client) - job.default_dataset = Dataset(DatasetReference(self.PROJECT, DS_ID)) + job.default_dataset = DatasetReference(self.PROJECT, DS_ID) job.begin() @@ -2168,7 +2168,6 @@ def test_begin_w_bound_client(self): self._verifyResourceProperties(job, RESOURCE) def test_begin_w_alternate_client(self): - from google.cloud.bigquery.dataset import Dataset from google.cloud.bigquery.dataset import DatasetReference PATH = '/projects/%s/jobs' % (self.PROJECT,) @@ -2204,12 +2203,11 @@ def test_begin_w_alternate_client(self): job = self._make_one(self.JOB_NAME, self.QUERY, client1) dataset_ref = DatasetReference(self.PROJECT, DS_ID) - dataset = Dataset(dataset_ref) table_ref = dataset_ref.table(TABLE) job.allow_large_results = True job.create_disposition = 'CREATE_NEVER' - job.default_dataset = dataset + job.default_dataset = dataset_ref job.destination = table_ref job.flatten_results = True job.priority = 'INTERACTIVE' diff --git a/packages/google-cloud-bigquery/tests/unit/test_query.py b/packages/google-cloud-bigquery/tests/unit/test_query.py index 73f23cb1bf6a..9340689315a7 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_query.py +++ b/packages/google-cloud-bigquery/tests/unit/test_query.py @@ -196,7 +196,7 @@ def test_ctor_w_query_parameters(self): self.assertEqual(query.query_parameters, query_parameters) def test_from_query_job(self): - from google.cloud.bigquery.dataset import Dataset, DatasetReference + from google.cloud.bigquery.dataset import DatasetReference from google.cloud.bigquery.job import QueryJob from google.cloud.bigquery._helpers import UDFResource @@ -206,8 +206,8 @@ def test_from_query_job(self): job = QueryJob( self.JOB_NAME, self.QUERY, client, udf_resources=[UDFResource("resourceUri", RESOURCE_URI)]) - dataset = Dataset(DatasetReference(self.PROJECT, DS_ID)) - job.default_dataset = dataset + ds_ref = DatasetReference(self.PROJECT, DS_ID) + job.default_dataset = ds_ref job.use_query_cache = True job.use_legacy_sql = True klass = self._get_target_class() @@ -219,7 +219,7 @@ def test_from_query_job(self): self.assertIs(query._client, client) self.assertIs(query._job, job) self.assertEqual(query.udf_resources, job.udf_resources) - self.assertIs(query.default_dataset, dataset) + self.assertIs(query.default_dataset, ds_ref) self.assertTrue(query.use_query_cache) self.assertTrue(query.use_legacy_sql) @@ -743,9 +743,9 @@ def __init__(self, project='project', connection=None): self._connection = connection def dataset(self, dataset_id): - from google.cloud.bigquery.dataset import Dataset, DatasetReference + from google.cloud.bigquery.dataset import DatasetReference - return Dataset(DatasetReference(self.project, dataset_id)) + return DatasetReference(self.project, dataset_id) class _Connection(object): From b4ae97d094eacdb9047a53267ec191f3e64ef073 Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Fri, 22 Sep 2017 14:27:07 -0700 Subject: [PATCH 0209/2016] BigQuery: add to/from API representation for Table & Dataset references. (#4020) * BigQuery: add to/from API representation for Table & Dataset references. Also, implement equality and hashing for Table & Dataset references. This will make it easier to use the TableReference and DatasetReference classes as typed properties in the QueryJob and other job classes. * Fix lint errors. * Replace unique-ly with uniquely. --- .../google/cloud/bigquery/dataset.py | 39 ++++++++ .../google/cloud/bigquery/schema.py | 2 +- .../google/cloud/bigquery/table.py | 57 ++++++++++++ .../tests/unit/test_dataset.py | 64 +++++++++++++ .../tests/unit/test_table.py | 92 +++++++++++++++++++ 5 files changed, 253 insertions(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/dataset.py b/packages/google-cloud-bigquery/google/cloud/bigquery/dataset.py index fc641c3a4d58..e464fcfb93bd 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/dataset.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/dataset.py @@ -151,6 +151,45 @@ def table(self, table_id): """ return TableReference(self, table_id) + @classmethod + def from_api_repr(cls, resource): + project = resource['projectId'] + dataset_id = resource['datasetId'] + return cls(project, dataset_id) + + def to_api_repr(self): + return { + 'projectId': self._project, + 'datasetId': self._dataset_id, + } + + def _key(self): + """A tuple key that uniquely describes this field. + + Used to compute this instance's hashcode and evaluate equality. + + Returns: + tuple: The contents of this :class:`DatasetReference`. + """ + return ( + self._project, + self._dataset_id, + ) + + def __eq__(self, other): + if not isinstance(other, DatasetReference): + return NotImplemented + return self._key() == other._key() + + def __ne__(self, other): + return not self == other + + def __hash__(self): + return hash(self._key()) + + def __repr__(self): + return 'DatasetReference{}'.format(self._key()) + class Dataset(object): """Datasets are containers for tables. diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/schema.py b/packages/google-cloud-bigquery/google/cloud/bigquery/schema.py index 4aea34ac22e0..535c445a3726 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/schema.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/schema.py @@ -126,7 +126,7 @@ def to_api_repr(self): return answer def _key(self): - """A tuple key that unique-ly describes this field. + """A tuple key that uniquely describes this field. Used to compute this instance's hashcode and evaluate equality. diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py index 37e5bddbd7aa..7173e8603a8e 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py @@ -105,6 +105,63 @@ def path(self): return '/projects/%s/datasets/%s/tables/%s' % ( self._project, self._dataset_id, self._table_id) + @classmethod + def from_api_repr(cls, resource): + """Factory: construct a table reference given its API representation + + :type resource: dict + :param resource: table reference representation returned from the API + + :rtype: :class:`google.cloud.bigquery.table.TableReference` + :returns: Table reference parsed from ``resource``. + """ + from google.cloud.bigquery.dataset import DatasetReference + + project = resource['projectId'] + dataset_id = resource['datasetId'] + table_id = resource['tableId'] + return cls(DatasetReference(project, dataset_id), table_id) + + def to_api_repr(self): + """Construct the API resource representation of this table reference. + + :rtype: dict + :returns: Table reference as represented as an API resource + """ + return { + 'projectId': self._project, + 'datasetId': self._dataset_id, + 'tableId': self._table_id, + } + + def _key(self): + """A tuple key that uniquely describes this field. + + Used to compute this instance's hashcode and evaluate equality. + + Returns: + tuple: The contents of this :class:`DatasetReference`. + """ + return ( + self._project, + self._dataset_id, + self._table_id, + ) + + def __eq__(self, other): + if not isinstance(other, TableReference): + return NotImplemented + return self._key() == other._key() + + def __ne__(self, other): + return not self == other + + def __hash__(self): + return hash(self._key()) + + def __repr__(self): + return 'TableReference{}'.format(self._key()) + class Table(object): """Tables represent a set of rows whose values correspond to a schema. diff --git a/packages/google-cloud-bigquery/tests/unit/test_dataset.py b/packages/google-cloud-bigquery/tests/unit/test_dataset.py index ced77990a65d..c04d154b52da 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_dataset.py +++ b/packages/google-cloud-bigquery/tests/unit/test_dataset.py @@ -114,6 +114,70 @@ def test_table(self): self.assertEqual(table_ref.project, 'some-project-1') self.assertEqual(table_ref.table_id, 'table_1') + def test_to_api_repr(self): + dataset = self._make_one('project_1', 'dataset_1') + + resource = dataset.to_api_repr() + + self.assertEqual( + resource, + { + 'projectId': 'project_1', + 'datasetId': 'dataset_1', + }) + + def test_from_api_repr(self): + from google.cloud.bigquery.dataset import DatasetReference + expected = self._make_one('project_1', 'dataset_1') + + got = DatasetReference.from_api_repr( + { + 'projectId': 'project_1', + 'datasetId': 'dataset_1', + }) + + self.assertEqual(expected, got) + + def test___eq___wrong_type(self): + dataset = self._make_one('project_1', 'dataset_1') + other = object() + self.assertNotEqual(dataset, other) + self.assertEqual(dataset, mock.ANY) + + def test___eq___project_mismatch(self): + dataset = self._make_one('project_1', 'dataset_1') + other = self._make_one('project_2', 'dataset_1') + self.assertNotEqual(dataset, other) + + def test___eq___dataset_mismatch(self): + dataset = self._make_one('project_1', 'dataset_1') + other = self._make_one('project_1', 'dataset_2') + self.assertNotEqual(dataset, other) + + def test___eq___equality(self): + dataset = self._make_one('project_1', 'dataset_1') + other = self._make_one('project_1', 'dataset_1') + self.assertEqual(dataset, other) + + def test___hash__set_equality(self): + dataset1 = self._make_one('project_1', 'dataset_1') + dataset2 = self._make_one('project_1', 'dataset_2') + set_one = {dataset1, dataset2} + set_two = {dataset1, dataset2} + self.assertEqual(set_one, set_two) + + def test___hash__not_equals(self): + dataset1 = self._make_one('project_1', 'dataset_1') + dataset2 = self._make_one('project_1', 'dataset_2') + set_one = {dataset1} + set_two = {dataset2} + self.assertNotEqual(set_one, set_two) + + def test___repr__(self): + dataset = self._make_one('project1', 'dataset1') + expected = "DatasetReference('project1', 'dataset1')" + self.assertEqual(repr(dataset), expected) + class TestDataset(unittest.TestCase): from google.cloud.bigquery.dataset import DatasetReference diff --git a/packages/google-cloud-bigquery/tests/unit/test_table.py b/packages/google-cloud-bigquery/tests/unit/test_table.py index 2327d11b1ed3..f2c2297d244b 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_table.py +++ b/packages/google-cloud-bigquery/tests/unit/test_table.py @@ -58,6 +58,98 @@ def test_ctor_defaults(self): self.assertEqual(table_ref.dataset_id, dataset_ref.dataset_id) self.assertEqual(table_ref.table_id, 'table_1') + def test_to_api_repr(self): + from google.cloud.bigquery.dataset import DatasetReference + dataset_ref = DatasetReference('project_1', 'dataset_1') + table_ref = self._make_one(dataset_ref, 'table_1') + + resource = table_ref.to_api_repr() + + self.assertEqual( + resource, + { + 'projectId': 'project_1', + 'datasetId': 'dataset_1', + 'tableId': 'table_1', + }) + + def test_from_api_repr(self): + from google.cloud.bigquery.dataset import DatasetReference + from google.cloud.bigquery.table import TableReference + dataset_ref = DatasetReference('project_1', 'dataset_1') + expected = self._make_one(dataset_ref, 'table_1') + + got = TableReference.from_api_repr( + { + 'projectId': 'project_1', + 'datasetId': 'dataset_1', + 'tableId': 'table_1', + }) + + self.assertEqual(expected, got) + + def test___eq___wrong_type(self): + from google.cloud.bigquery.dataset import DatasetReference + dataset_ref = DatasetReference('project_1', 'dataset_1') + table = self._make_one(dataset_ref, 'table_1') + other = object() + self.assertNotEqual(table, other) + self.assertEqual(table, mock.ANY) + + def test___eq___project_mismatch(self): + from google.cloud.bigquery.dataset import DatasetReference + dataset = DatasetReference('project_1', 'dataset_1') + other_dataset = DatasetReference('project_2', 'dataset_1') + table = self._make_one(dataset, 'table_1') + other = self._make_one(other_dataset, 'table_1') + self.assertNotEqual(table, other) + + def test___eq___dataset_mismatch(self): + from google.cloud.bigquery.dataset import DatasetReference + dataset = DatasetReference('project_1', 'dataset_1') + other_dataset = DatasetReference('project_1', 'dataset_2') + table = self._make_one(dataset, 'table_1') + other = self._make_one(other_dataset, 'table_1') + self.assertNotEqual(table, other) + + def test___eq___table_mismatch(self): + from google.cloud.bigquery.dataset import DatasetReference + dataset = DatasetReference('project_1', 'dataset_1') + table = self._make_one(dataset, 'table_1') + other = self._make_one(dataset, 'table_2') + self.assertNotEqual(table, other) + + def test___eq___equality(self): + from google.cloud.bigquery.dataset import DatasetReference + dataset = DatasetReference('project_1', 'dataset_1') + table = self._make_one(dataset, 'table_1') + other = self._make_one(dataset, 'table_1') + self.assertEqual(table, other) + + def test___hash__set_equality(self): + from google.cloud.bigquery.dataset import DatasetReference + dataset = DatasetReference('project_1', 'dataset_1') + table1 = self._make_one(dataset, 'table1') + table2 = self._make_one(dataset, 'table2') + set_one = {table1, table2} + set_two = {table1, table2} + self.assertEqual(set_one, set_two) + + def test___hash__not_equals(self): + from google.cloud.bigquery.dataset import DatasetReference + dataset = DatasetReference('project_1', 'dataset_1') + table1 = self._make_one(dataset, 'table1') + table2 = self._make_one(dataset, 'table2') + set_one = {table1} + set_two = {table2} + self.assertNotEqual(set_one, set_two) + + def test___repr__(self): + dataset = DatasetReference('project1', 'dataset1') + table1 = self._make_one(dataset, 'table1') + expected = "TableReference('project1', 'dataset1', 'table1')" + self.assertEqual(repr(table1), expected) + class TestTable(unittest.TestCase, _SchemaBase): From 8774fe573dc1c8192d6c8f3a336281fad62715e7 Mon Sep 17 00:00:00 2001 From: Alix Hamilton Date: Fri, 22 Sep 2017 15:25:22 -0700 Subject: [PATCH 0210/2016] BigQuery: replaces table.create() with client.create_table() (#4038) * adds client.create_table() * removes table.create() * passes system tests * fixes rebase conflicts * fixes coverage --- .../google/cloud/bigquery/client.py | 18 ++ .../google/cloud/bigquery/table.py | 18 -- .../google-cloud-bigquery/tests/system.py | 83 +++---- .../tests/unit/test_client.py | 145 ++++++++++++ .../tests/unit/test_table.py | 214 ------------------ 5 files changed, 205 insertions(+), 273 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py index 1bd46e407968..bbeac294680d 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py @@ -190,6 +190,24 @@ def create_dataset(self, dataset): method='POST', path=path, data=dataset._build_resource()) return Dataset.from_api_repr(api_response) + def create_table(self, table): + """API call: create a table via a PUT request + + See + https://cloud.google.com/bigquery/docs/reference/rest/v2/tables/insert + + :type table: :class:`~google.cloud.bigquery.table.Table` + :param table: A ``Table`` populated with the desired initial state. + + :rtype: ":class:`~google.cloud.bigquery.table.Table`" + :returns: a new ``Table`` returned from the service. + """ + path = '/projects/%s/datasets/%s/tables' % ( + table.project, table.dataset_id) + api_response = self._connection.api_request( + method='POST', path=path, data=table._build_resource()) + return Table.from_api_repr(api_response, self) + def get_dataset(self, dataset_ref): """Fetch the dataset referenced by ``dataset_ref`` diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py index 7173e8603a8e..03214e52a7be 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py @@ -653,24 +653,6 @@ def _build_resource(self): return resource - def create(self, client=None): - """API call: create the table via a PUT request - - See - https://cloud.google.com/bigquery/docs/reference/rest/v2/tables/insert - - :type client: :class:`~google.cloud.bigquery.client.Client` or - ``NoneType`` - :param client: the client to use. If not passed, falls back to the - ``client`` stored on the current dataset. - """ - client = self._require_client(client) - path = '/projects/%s/datasets/%s/tables' % ( - self._project, self._dataset_id) - api_response = client._connection.api_request( - method='POST', path=path, data=self._build_resource()) - self._set_properties(api_response) - def exists(self, client=None): """API call: test for the existence of the table via a GET request diff --git a/packages/google-cloud-bigquery/tests/system.py b/packages/google-cloud-bigquery/tests/system.py index de19a7da01c5..81b3219275b2 100644 --- a/packages/google-cloud-bigquery/tests/system.py +++ b/packages/google-cloud-bigquery/tests/system.py @@ -180,18 +180,19 @@ def test_list_datasets(self): def test_create_table(self): dataset = self.temp_dataset(_make_dataset_id('create_table')) - - TABLE_NAME = 'test_table' + table_id = 'test_table' full_name = bigquery.SchemaField('full_name', 'STRING', mode='REQUIRED') age = bigquery.SchemaField('age', 'INTEGER', mode='REQUIRED') - table = Table(dataset.table(TABLE_NAME), schema=[full_name, age], - client=Config.CLIENT) - self.assertFalse(table.exists()) - table.create() + table_arg = Table(dataset.table(table_id), schema=[full_name, age], + client=Config.CLIENT) + self.assertFalse(table_arg.exists()) + + table = retry_403(Config.CLIENT.create_table)(table_arg) self.to_delete.insert(0, table) + self.assertTrue(table.exists()) - self.assertEqual(table.table_id, TABLE_NAME) + self.assertEqual(table.table_id, table_id) def test_get_table_w_public_dataset(self): PUBLIC = 'bigquery-public-data' @@ -227,10 +228,10 @@ def test_list_dataset_tables(self): mode='REQUIRED') age = bigquery.SchemaField('age', 'INTEGER', mode='REQUIRED') for table_name in tables_to_create: - created_table = Table(dataset.table(table_name), - schema=[full_name, age], - client=Config.CLIENT) - created_table.create() + table = Table(dataset.table(table_name), + schema=[full_name, age], + client=Config.CLIENT) + created_table = retry_403(Config.CLIENT.create_table)(table) self.to_delete.insert(0, created_table) # Retrieve the tables. @@ -249,10 +250,10 @@ def test_patch_table(self): full_name = bigquery.SchemaField('full_name', 'STRING', mode='REQUIRED') age = bigquery.SchemaField('age', 'INTEGER', mode='REQUIRED') - table = Table(dataset.table(TABLE_NAME), schema=[full_name, age], - client=Config.CLIENT) - self.assertFalse(table.exists()) - table.create() + table_arg = Table(dataset.table(TABLE_NAME), schema=[full_name, age], + client=Config.CLIENT) + self.assertFalse(table_arg.exists()) + table = retry_403(Config.CLIENT.create_table)(table_arg) self.to_delete.insert(0, table) self.assertTrue(table.exists()) self.assertIsNone(table.friendly_name) @@ -268,10 +269,10 @@ def test_update_table(self): full_name = bigquery.SchemaField('full_name', 'STRING', mode='REQUIRED') age = bigquery.SchemaField('age', 'INTEGER', mode='REQUIRED') - table = Table(dataset.table(TABLE_NAME), schema=[full_name, age], - client=Config.CLIENT) - self.assertFalse(table.exists()) - table.create() + table_arg = Table(dataset.table(TABLE_NAME), schema=[full_name, age], + client=Config.CLIENT) + self.assertFalse(table_arg.exists()) + table = retry_403(Config.CLIENT.create_table)(table_arg) self.to_delete.insert(0, table) self.assertTrue(table.exists()) voter = bigquery.SchemaField('voter', 'BOOLEAN', mode='NULLABLE') @@ -309,10 +310,10 @@ def test_insert_data_then_dump_table(self): mode='REQUIRED') age = bigquery.SchemaField('age', 'INTEGER', mode='REQUIRED') now = bigquery.SchemaField('now', 'TIMESTAMP') - table = Table(dataset.table(TABLE_NAME), schema=[full_name, age, now], - client=Config.CLIENT) - self.assertFalse(table.exists()) - table.create() + table_arg = Table(dataset.table(TABLE_NAME), + schema=[full_name, age, now], client=Config.CLIENT) + self.assertFalse(table_arg.exists()) + table = retry_403(Config.CLIENT.create_table)(table_arg) self.to_delete.insert(0, table) self.assertTrue(table.exists()) @@ -346,9 +347,9 @@ def test_load_table_from_local_file_then_dump_table(self): full_name = bigquery.SchemaField('full_name', 'STRING', mode='REQUIRED') age = bigquery.SchemaField('age', 'INTEGER', mode='REQUIRED') - table = Table(dataset.table(TABLE_NAME), schema=[full_name, age], - client=Config.CLIENT) - table.create() + table_arg = Table(dataset.table(TABLE_NAME), schema=[full_name, age], + client=Config.CLIENT) + table = retry_403(Config.CLIENT.create_table)(table_arg) self.to_delete.insert(0, table) with _NamedTemporaryFile() as temp: @@ -450,9 +451,9 @@ def test_load_table_from_storage_then_dump_table(self): full_name = bigquery.SchemaField('full_name', 'STRING', mode='REQUIRED') age = bigquery.SchemaField('age', 'INTEGER', mode='REQUIRED') - table = Table(dataset.table(TABLE_NAME), schema=[full_name, age], - client=Config.CLIENT) - table.create() + table_arg = Table(dataset.table(TABLE_NAME), schema=[full_name, age], + client=Config.CLIENT) + table = retry_403(Config.CLIENT.create_table)(table_arg) self.to_delete.insert(0, table) job = Config.CLIENT.load_table_from_storage( @@ -652,9 +653,9 @@ def test_job_cancel(self): full_name = bigquery.SchemaField('full_name', 'STRING', mode='REQUIRED') age = bigquery.SchemaField('age', 'INTEGER', mode='REQUIRED') - table = Table(dataset.table(TABLE_NAME), schema=[full_name, age], - client=Config.CLIENT) - table.create() + table_arg = Table(dataset.table(TABLE_NAME), schema=[full_name, age], + client=Config.CLIENT) + table = retry_403(Config.CLIENT.create_table)(table_arg) self.to_delete.insert(0, table) job = Config.CLIENT.run_async_query(JOB_NAME, QUERY) @@ -839,9 +840,9 @@ def _load_table_for_dml(self, rows, dataset_id, table_id): dataset = self.temp_dataset(dataset_id) greeting = bigquery.SchemaField( 'greeting', 'STRING', mode='NULLABLE') - table = Table(dataset.table(table_id), schema=[greeting], - client=Config.CLIENT) - table.create() + table_arg = Table(dataset.table(table_id), schema=[greeting], + client=Config.CLIENT) + table = retry_403(Config.CLIENT.create_table)(table_arg) self.to_delete.insert(0, table) with _NamedTemporaryFile() as temp: @@ -1228,9 +1229,9 @@ def test_insert_nested_nested(self): ] table_name = 'test_table' dataset = self.temp_dataset(_make_dataset_id('issue_2951')) - table = Table(dataset.table(table_name), schema=schema, - client=Config.CLIENT) - table.create() + table_arg = Table(dataset.table(table_name), schema=schema, + client=Config.CLIENT) + table = retry_403(Config.CLIENT.create_table)(table_arg) self.to_delete.insert(0, table) table.insert_data(to_insert) @@ -1245,9 +1246,9 @@ def test_create_table_insert_fetch_nested_schema(self): dataset = self.temp_dataset( _make_dataset_id('create_table_nested_schema')) schema = _load_json_schema() - table = Table(dataset.table(table_name), schema=schema, - client=Config.CLIENT) - table.create() + table_arg = Table(dataset.table(table_name), schema=schema, + client=Config.CLIENT) + table = retry_403(Config.CLIENT.create_table)(table_arg) self.to_delete.insert(0, table) self.assertTrue(table.exists()) self.assertEqual(table.table_id, table_name) diff --git a/packages/google-cloud-bigquery/tests/unit/test_client.py b/packages/google-cloud-bigquery/tests/unit/test_client.py index 50c324ebfc32..96a9dd240132 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_client.py +++ b/packages/google-cloud-bigquery/tests/unit/test_client.py @@ -399,6 +399,151 @@ def test_create_dataset_w_attrs(self): self.assertEqual(ds.default_table_expiration_ms, 3600) self.assertEqual(ds.labels, LABELS) + def test_create_table_w_day_partition(self): + from google.cloud.bigquery.table import Table + + project = 'PROJECT' + dataset_id = 'dataset_id' + table_id = 'table-id' + path = 'projects/%s/datasets/%s/tables' % ( + project, dataset_id) + creds = _make_credentials() + client = self._make_one(project=project, credentials=creds) + resource = { + 'id': '%s:%s:%s' % (project, dataset_id, table_id), + 'tableReference': { + 'projectId': project, + 'datasetId': dataset_id, + 'tableId': table_id + }, + } + conn = client._connection = _Connection(resource) + table_ref = client.dataset(dataset_id).table(table_id) + table = Table(table_ref, client=client) + table.partitioning_type = 'DAY' + + got = client.create_table(table) + + self.assertEqual(len(conn._requested), 1) + req = conn._requested[0] + self.assertEqual(req['method'], 'POST') + self.assertEqual(req['path'], '/%s' % path) + sent = { + 'tableReference': { + 'projectId': project, + 'datasetId': dataset_id, + 'tableId': table_id + }, + 'timePartitioning': {'type': 'DAY'}, + } + self.assertEqual(req['data'], sent) + self.assertEqual(table.partitioning_type, "DAY") + self.assertEqual(got.table_id, table_id) + + def test_create_table_w_day_partition_and_expire(self): + from google.cloud.bigquery.table import Table + + project = 'PROJECT' + dataset_id = 'dataset_id' + table_id = 'table-id' + path = 'projects/%s/datasets/%s/tables' % ( + project, dataset_id) + creds = _make_credentials() + client = self._make_one(project=project, credentials=creds) + resource = { + 'id': '%s:%s:%s' % (project, dataset_id, table_id), + 'tableReference': { + 'projectId': project, + 'datasetId': dataset_id, + 'tableId': table_id + }, + } + conn = client._connection = _Connection(resource) + table_ref = client.dataset(dataset_id).table(table_id) + table = Table(table_ref, client=client) + table.partitioning_type = 'DAY' + table.partition_expiration = 100 + + got = client.create_table(table) + + self.assertEqual(len(conn._requested), 1) + req = conn._requested[0] + self.assertEqual(req['method'], 'POST') + self.assertEqual(req['path'], '/%s' % path) + sent = { + 'tableReference': { + 'projectId': project, + 'datasetId': dataset_id, + 'tableId': table_id + }, + 'timePartitioning': {'type': 'DAY', 'expirationMs': 100}, + } + self.assertEqual(req['data'], sent) + self.assertEqual(table.partitioning_type, "DAY") + self.assertEqual(table.partition_expiration, 100) + self.assertEqual(got.table_id, table_id) + + def test_create_table_w_schema_and_query(self): + from google.cloud.bigquery.table import Table, SchemaField + + project = 'PROJECT' + dataset_id = 'dataset_id' + table_id = 'table-id' + path = 'projects/%s/datasets/%s/tables' % ( + project, dataset_id) + query = 'SELECT * from %s:%s' % (dataset_id, table_id) + creds = _make_credentials() + client = self._make_one(project=project, credentials=creds) + resource = { + 'id': '%s:%s:%s' % (project, dataset_id, table_id), + 'tableReference': { + 'projectId': project, + 'datasetId': dataset_id, + 'tableId': table_id + }, + 'schema': {'fields': [ + {'name': 'full_name', 'type': 'STRING', 'mode': 'REQUIRED'}, + {'name': 'age', 'type': 'INTEGER', 'mode': 'REQUIRED'}] + }, + 'view': { + 'query': query, + 'useLegacySql': True + }, + } + schema = [ + SchemaField('full_name', 'STRING', mode='REQUIRED'), + SchemaField('age', 'INTEGER', mode='REQUIRED') + ] + conn = client._connection = _Connection(resource) + table_ref = client.dataset(dataset_id).table(table_id) + table = Table(table_ref, schema=schema, client=client) + table.view_query = query + + got = client.create_table(table) + + self.assertEqual(len(conn._requested), 1) + req = conn._requested[0] + self.assertEqual(req['method'], 'POST') + self.assertEqual(req['path'], '/%s' % path) + sent = { + 'tableReference': { + 'projectId': project, + 'datasetId': dataset_id, + 'tableId': table_id + }, + 'schema': {'fields': [ + {'name': 'full_name', 'type': 'STRING', 'mode': 'REQUIRED'}, + {'name': 'age', 'type': 'INTEGER', 'mode': 'REQUIRED'}] + }, + 'view': {'query': query}, + } + self.assertEqual(req['data'], sent) + self.assertEqual(got.table_id, table_id) + self.assertEqual(got.project, project) + self.assertEqual(got.dataset_id, dataset_id) + self.assertEqual(got.schema, schema) + self.assertEqual(got.view_query, query) + def test_get_table(self): project = 'PROJECT' dataset_id = 'dataset_id' diff --git a/packages/google-cloud-bigquery/tests/unit/test_table.py b/packages/google-cloud-bigquery/tests/unit/test_table.py index f2c2297d244b..22dce9012188 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_table.py +++ b/packages/google-cloud-bigquery/tests/unit/test_table.py @@ -557,135 +557,6 @@ def test_from_api_repr_w_properties(self): self.assertIs(table._client, client) self._verifyResourceProperties(table, RESOURCE) - def test_create_new_day_partitioned_table(self): - PATH = 'projects/%s/datasets/%s/tables' % (self.PROJECT, self.DS_ID) - RESOURCE = self._makeResource() - conn = _Connection(RESOURCE) - client = _Client(project=self.PROJECT, connection=conn) - dataset = DatasetReference(self.PROJECT, self.DS_ID) - table_ref = dataset.table(self.TABLE_NAME) - table = self._make_one(table_ref, client=client) - table.partitioning_type = 'DAY' - table.create() - - self.assertEqual(len(conn._requested), 1) - req = conn._requested[0] - self.assertEqual(req['method'], 'POST') - self.assertEqual(req['path'], '/%s' % PATH) - SENT = { - 'tableReference': { - 'projectId': self.PROJECT, - 'datasetId': self.DS_ID, - 'tableId': self.TABLE_NAME}, - 'timePartitioning': {'type': 'DAY'}, - } - self.assertEqual(req['data'], SENT) - self._verifyResourceProperties(table, RESOURCE) - - def test_create_w_bound_client(self): - from google.cloud.bigquery.table import SchemaField - - PATH = 'projects/%s/datasets/%s/tables' % (self.PROJECT, self.DS_ID) - RESOURCE = self._makeResource() - conn = _Connection(RESOURCE) - client = _Client(project=self.PROJECT, connection=conn) - dataset = DatasetReference(self.PROJECT, self.DS_ID) - table_ref = dataset.table(self.TABLE_NAME) - full_name = SchemaField('full_name', 'STRING', mode='REQUIRED') - age = SchemaField('age', 'INTEGER', mode='REQUIRED') - table = self._make_one(table_ref, schema=[full_name, age], - client=client) - - table.create() - - self.assertEqual(len(conn._requested), 1) - req = conn._requested[0] - self.assertEqual(req['method'], 'POST') - self.assertEqual(req['path'], '/%s' % PATH) - SENT = { - 'tableReference': { - 'projectId': self.PROJECT, - 'datasetId': self.DS_ID, - 'tableId': self.TABLE_NAME}, - 'schema': {'fields': [ - {'name': 'full_name', 'type': 'STRING', 'mode': 'REQUIRED'}, - {'name': 'age', 'type': 'INTEGER', 'mode': 'REQUIRED'}]}, - } - self.assertEqual(req['data'], SENT) - self._verifyResourceProperties(table, RESOURCE) - - def test_create_w_partition_no_expire(self): - from google.cloud.bigquery.table import SchemaField - - PATH = 'projects/%s/datasets/%s/tables' % (self.PROJECT, self.DS_ID) - RESOURCE = self._makeResource() - conn = _Connection(RESOURCE) - client = _Client(project=self.PROJECT, connection=conn) - dataset = DatasetReference(self.PROJECT, self.DS_ID) - table_ref = dataset.table(self.TABLE_NAME) - full_name = SchemaField('full_name', 'STRING', mode='REQUIRED') - age = SchemaField('age', 'INTEGER', mode='REQUIRED') - table = self._make_one(table_ref, schema=[full_name, age], - client=client) - - self.assertIsNone(table.partitioning_type) - table.partitioning_type = "DAY" - self.assertEqual(table.partitioning_type, "DAY") - table.create() - - self.assertEqual(len(conn._requested), 1) - req = conn._requested[0] - self.assertEqual(req['method'], 'POST') - self.assertEqual(req['path'], '/%s' % PATH) - SENT = { - 'tableReference': { - 'projectId': self.PROJECT, - 'datasetId': self.DS_ID, - 'tableId': self.TABLE_NAME}, - 'timePartitioning': {'type': 'DAY'}, - 'schema': {'fields': [ - {'name': 'full_name', 'type': 'STRING', 'mode': 'REQUIRED'}, - {'name': 'age', 'type': 'INTEGER', 'mode': 'REQUIRED'}]}, - } - self.assertEqual(req['data'], SENT) - self._verifyResourceProperties(table, RESOURCE) - - def test_create_w_partition_and_expire(self): - from google.cloud.bigquery.table import SchemaField - - PATH = 'projects/%s/datasets/%s/tables' % (self.PROJECT, self.DS_ID) - RESOURCE = self._makeResource() - conn = _Connection(RESOURCE) - client = _Client(project=self.PROJECT, connection=conn) - dataset = DatasetReference(self.PROJECT, self.DS_ID) - table_ref = dataset.table(self.TABLE_NAME) - full_name = SchemaField('full_name', 'STRING', mode='REQUIRED') - age = SchemaField('age', 'INTEGER', mode='REQUIRED') - table = self._make_one(table_ref, schema=[full_name, age], - client=client) - self.assertIsNone(table.partition_expiration) - table.partition_expiration = 100 - self.assertEqual(table.partitioning_type, "DAY") - self.assertEqual(table.partition_expiration, 100) - table.create() - - self.assertEqual(len(conn._requested), 1) - req = conn._requested[0] - self.assertEqual(req['method'], 'POST') - self.assertEqual(req['path'], '/%s' % PATH) - SENT = { - 'tableReference': { - 'projectId': self.PROJECT, - 'datasetId': self.DS_ID, - 'tableId': self.TABLE_NAME}, - 'timePartitioning': {'type': 'DAY', 'expirationMs': 100}, - 'schema': {'fields': [ - {'name': 'full_name', 'type': 'STRING', 'mode': 'REQUIRED'}, - {'name': 'age', 'type': 'INTEGER', 'mode': 'REQUIRED'}]}, - } - self.assertEqual(req['data'], SENT) - self._verifyResourceProperties(table, RESOURCE) - def test_partition_type_setter_bad_type(self): from google.cloud.bigquery.table import SchemaField @@ -833,91 +704,6 @@ def test_list_partitions(self): client=client) self.assertEqual(table.list_partitions(), [20160804, 20160805]) - def test_create_w_alternate_client(self): - import datetime - from google.cloud._helpers import UTC - from google.cloud._helpers import _millis - - PATH = 'projects/%s/datasets/%s/tables' % (self.PROJECT, self.DS_ID) - DESCRIPTION = 'DESCRIPTION' - TITLE = 'TITLE' - QUERY = 'select fullname, age from person_ages' - RESOURCE = self._makeResource() - RESOURCE['description'] = DESCRIPTION - RESOURCE['friendlyName'] = TITLE - self.EXP_TIME = datetime.datetime(2015, 8, 1, 23, 59, 59, - tzinfo=UTC) - RESOURCE['expirationTime'] = _millis(self.EXP_TIME) - RESOURCE['view'] = {} - RESOURCE['view']['query'] = QUERY - RESOURCE['type'] = 'VIEW' - conn1 = _Connection() - client1 = _Client(project=self.PROJECT, connection=conn1) - conn2 = _Connection(RESOURCE) - client2 = _Client(project=self.PROJECT, connection=conn2) - dataset = DatasetReference(self.PROJECT, self.DS_ID) - table_ref = dataset.table(self.TABLE_NAME) - table = self._make_one(table_ref, client=client1) - table.friendly_name = TITLE - table.description = DESCRIPTION - table.view_query = QUERY - - table.create(client=client2) - - self.assertEqual(len(conn1._requested), 0) - self.assertEqual(len(conn2._requested), 1) - req = conn2._requested[0] - self.assertEqual(req['method'], 'POST') - self.assertEqual(req['path'], '/%s' % PATH) - SENT = { - 'tableReference': { - 'projectId': self.PROJECT, - 'datasetId': self.DS_ID, - 'tableId': self.TABLE_NAME}, - 'description': DESCRIPTION, - 'friendlyName': TITLE, - 'view': {'query': QUERY}, - } - self.assertEqual(req['data'], SENT) - self._verifyResourceProperties(table, RESOURCE) - - def test_create_w_missing_output_properties(self): - # In the wild, the resource returned from 'dataset.create' sometimes - # lacks 'creationTime' / 'lastModifiedTime' - from google.cloud.bigquery.table import SchemaField - - PATH = 'projects/%s/datasets/%s/tables' % (self.PROJECT, self.DS_ID) - RESOURCE = self._makeResource() - del RESOURCE['creationTime'] - del RESOURCE['lastModifiedTime'] - self.WHEN = None - conn = _Connection(RESOURCE) - client = _Client(project=self.PROJECT, connection=conn) - dataset = DatasetReference(self.PROJECT, self.DS_ID) - table_ref = dataset.table(self.TABLE_NAME) - full_name = SchemaField('full_name', 'STRING', mode='REQUIRED') - age = SchemaField('age', 'INTEGER', mode='REQUIRED') - table = self._make_one(table_ref, schema=[full_name, age], - client=client) - - table.create() - - self.assertEqual(len(conn._requested), 1) - req = conn._requested[0] - self.assertEqual(req['method'], 'POST') - self.assertEqual(req['path'], '/%s' % PATH) - SENT = { - 'tableReference': { - 'projectId': self.PROJECT, - 'datasetId': self.DS_ID, - 'tableId': self.TABLE_NAME}, - 'schema': {'fields': [ - {'name': 'full_name', 'type': 'STRING', 'mode': 'REQUIRED'}, - {'name': 'age', 'type': 'INTEGER', 'mode': 'REQUIRED'}]}, - } - self.assertEqual(req['data'], SENT) - self._verifyResourceProperties(table, RESOURCE) - def test_exists_miss_w_bound_client(self): PATH = 'projects/%s/datasets/%s/tables/%s' % ( self.PROJECT, self.DS_ID, self.TABLE_NAME) From e2ba936022752a17ed6bc6cdaa036d9516533754 Mon Sep 17 00:00:00 2001 From: Alix Hamilton Date: Fri, 22 Sep 2017 17:18:43 -0700 Subject: [PATCH 0211/2016] BigQuery: Remove unnecessary line from client.create_table() test (#4043) --- packages/google-cloud-bigquery/tests/unit/test_client.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/packages/google-cloud-bigquery/tests/unit/test_client.py b/packages/google-cloud-bigquery/tests/unit/test_client.py index 96a9dd240132..9cfa61234fcc 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_client.py +++ b/packages/google-cloud-bigquery/tests/unit/test_client.py @@ -505,10 +505,7 @@ def test_create_table_w_schema_and_query(self): {'name': 'full_name', 'type': 'STRING', 'mode': 'REQUIRED'}, {'name': 'age', 'type': 'INTEGER', 'mode': 'REQUIRED'}] }, - 'view': { - 'query': query, - 'useLegacySql': True - }, + 'view': {'query': query}, } schema = [ SchemaField('full_name', 'STRING', mode='REQUIRED'), From 8ef47b06ca52b9bb596ac6f9cf89c0b4938ace6c Mon Sep 17 00:00:00 2001 From: Jonathan Amsterdam Date: Mon, 25 Sep 2017 12:35:20 -0400 Subject: [PATCH 0212/2016] bigquery: add system test for dataset update with etag (#4052) --- packages/google-cloud-bigquery/tests/system.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/tests/system.py b/packages/google-cloud-bigquery/tests/system.py index 81b3219275b2..a50ae06ee5d8 100644 --- a/packages/google-cloud-bigquery/tests/system.py +++ b/packages/google-cloud-bigquery/tests/system.py @@ -24,6 +24,7 @@ import six +from google.api.core.exceptions import PreconditionFailed from google.cloud import bigquery from google.cloud.bigquery.dataset import Dataset, DatasetReference from google.cloud.bigquery.table import Table @@ -158,7 +159,11 @@ def test_update_dataset(self): ds3 = Config.CLIENT.update_dataset(ds2, ['labels']) self.assertEqual(ds3.labels, {'color': 'green', 'shape': 'circle'}) - # TODO(jba): test that read-modify-write with ETag works. + # If we try to update using d2 again, it will fail because the + # previous update changed the ETag. + ds2.description = 'no good' + with self.assertRaises(PreconditionFailed): + Config.CLIENT.update_dataset(ds2, ['description']) def test_list_datasets(self): datasets_to_create = [ From a9dc0fea377a1be9d87f9f04aaf78d6a46c6a44b Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Mon, 25 Sep 2017 12:21:17 -0700 Subject: [PATCH 0213/2016] BQ: remove redundant __eq__ from Query Parameters. (#4055) Got duplicate definitions after the rebase. --- .../google/cloud/bigquery/_helpers.py | 24 ------------------- 1 file changed, 24 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py b/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py index abe7a8934c96..51000148fb0b 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py @@ -527,14 +527,6 @@ def __init__(self, name, type_, value): self.type_ = type_ self.value = value - def __eq__(self, other): - if not isinstance(other, ScalarQueryParameter): - return NotImplemented - return( - self.name == other.name and - self.type_ == other.type_ and - self.value == other.value) - @classmethod def positional(cls, type_, value): """Factory for positional paramater. @@ -637,14 +629,6 @@ def __init__(self, name, array_type, values): self.array_type = array_type self.values = values - def __eq__(self, other): - if not isinstance(other, ArrayQueryParameter): - return NotImplemented - return( - self.name == other.name and - self.array_type == other.array_type and - self.values == other.values) - @classmethod def positional(cls, array_type, values): """Factory for positional parameters. @@ -789,14 +773,6 @@ def __init__(self, name, *sub_params): types[sub.name] = sub.type_ values[sub.name] = sub.value - def __eq__(self, other): - if not isinstance(other, StructQueryParameter): - return NotImplemented - return( - self.name == other.name and - self.struct_types == other.struct_types and - self.struct_values == other.struct_values) - @classmethod def positional(cls, *sub_params): """Factory for positional parameters. From f2487eba7dcfa381ffde8336add286fcb15a8b5d Mon Sep 17 00:00:00 2001 From: Jonathan Amsterdam Date: Mon, 25 Sep 2017 15:55:00 -0400 Subject: [PATCH 0214/2016] bigquery: modify CopyJob (#4051) Update CopyJob and CopyJobConfig to conform to the new design for jobs. --- .../google/cloud/bigquery/__init__.py | 2 + .../google/cloud/bigquery/client.py | 49 ++++-- .../google/cloud/bigquery/job.py | 164 +++++++++++------- .../google-cloud-bigquery/tests/system.py | 32 ++++ .../tests/unit/test_client.py | 40 ++++- .../tests/unit/test_job.py | 63 ++++--- 6 files changed, 247 insertions(+), 103 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/__init__.py b/packages/google-cloud-bigquery/google/cloud/bigquery/__init__.py index 333854035376..ec92e7c40128 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/__init__.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/__init__.py @@ -32,6 +32,7 @@ from google.cloud.bigquery.client import Client from google.cloud.bigquery.dataset import AccessEntry from google.cloud.bigquery.dataset import Dataset +from google.cloud.bigquery.job import CopyJobConfig from google.cloud.bigquery.job import ExtractJobConfig from google.cloud.bigquery.schema import SchemaField from google.cloud.bigquery.table import Table @@ -42,6 +43,7 @@ 'ArrayQueryParameter', 'Client', 'Dataset', + 'CopyJobConfig', 'ExtractJobConfig', 'ScalarQueryParameter', 'SchemaField', diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py index bbeac294680d..972f00b317f8 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py @@ -16,6 +16,7 @@ from __future__ import absolute_import +import collections import uuid from google.api.core import page_iterator @@ -492,25 +493,39 @@ def load_table_from_storage(self, job_id, destination, *source_uris): """ return LoadJob(job_id, destination, source_uris, client=self) - def copy_table(self, job_id, destination, *sources): - """Construct a job for copying one or more tables into another table. + def copy_table(self, sources, destination, job_id=None, job_config=None): + """Start a job for copying one or more tables into another table. See https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.copy - :type job_id: str - :param job_id: Name of the job. + :type sources: One of: + :class:`~google.cloud.bigquery.table.TableReference` + sequence of + :class:`~google.cloud.bigquery.table.TableReference` + :param sources: Table or tables to be copied. - :type destination: :class:`google.cloud.bigquery.table.Table` + + :type destination: :class:`google.cloud.bigquery.table.TableReference` :param destination: Table into which data is to be copied. - :type sources: sequence of :class:`google.cloud.bigquery.table.Table` - :param sources: tables to be copied. + :type job_id: str + :param job_id: (Optional) The ID of the job. + + :type job_config: :class:`google.cloud.bigquery.job.CopyJobConfig` + :param job_config: (Optional) Extra configuration options for the job. :rtype: :class:`google.cloud.bigquery.job.CopyJob` :returns: a new ``CopyJob`` instance """ - return CopyJob(job_id, destination, sources, client=self) + job_id = _make_job_id(job_id) + + if not isinstance(sources, collections.Sequence): + sources = [sources] + job = CopyJob(job_id, sources, destination, client=self, + job_config=job_config) + job.begin() + return job def extract_table(self, source, *destination_uris, **kwargs): """Start a job to extract a table into Cloud Storage files. @@ -541,9 +556,7 @@ def extract_table(self, source, *destination_uris, **kwargs): :returns: a new ``ExtractJob`` instance """ job_config = kwargs.get('job_config') - job_id = kwargs.get('job_id') - if job_id is None: - job_id = str(uuid.uuid4()) + job_id = _make_job_id(kwargs.get('job_id')) job = ExtractJob( job_id, source, list(destination_uris), client=self, @@ -667,3 +680,17 @@ def _item_to_table(iterator, resource): :returns: The next table in the page. """ return Table.from_api_repr(resource, iterator.client) + + +def _make_job_id(job_id): + """Construct an ID for a new job. + + :type job_id: str or ``NoneType`` + :param job_id: the user-provided job ID + + :rtype: str + :returns: A job ID + """ + if job_id is None: + return str(uuid.uuid4()) + return job_id diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/job.py b/packages/google-cloud-bigquery/google/cloud/bigquery/job.py index a79fc8e53d20..11b8beee2b7b 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/job.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/job.py @@ -126,7 +126,7 @@ class Compression(_EnumApiResourceProperty): NONE = 'NONE' -class CreateDisposition(_EnumProperty): +class CreateDisposition(_EnumApiResourceProperty): """Pseudo-enum for ``create_disposition`` properties.""" CREATE_IF_NEEDED = 'CREATE_IF_NEEDED' CREATE_NEVER = 'CREATE_NEVER' @@ -159,7 +159,7 @@ class SourceFormat(_EnumProperty): AVRO = 'AVRO' -class WriteDisposition(_EnumProperty): +class WriteDisposition(_EnumApiResourceProperty): """Pseudo-enum for ``write_disposition`` properties.""" WRITE_APPEND = 'WRITE_APPEND' WRITE_TRUNCATE = 'WRITE_TRUNCATE' @@ -688,7 +688,8 @@ def output_rows(self): https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.load.autodetect """ - create_disposition = CreateDisposition('create_disposition') + create_disposition = CreateDisposition('create_disposition', + 'createDisposition') """See https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.load.createDisposition """ @@ -733,7 +734,8 @@ def output_rows(self): https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.load.sourceFormat """ - write_disposition = WriteDisposition('write_disposition') + write_disposition = WriteDisposition('write_disposition', + 'writeDisposition') """See https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.load.writeDisposition """ @@ -853,13 +855,51 @@ def from_api_repr(cls, resource, client): return job -class _CopyConfiguration(object): - """User-settable configuration options for copy jobs. +class CopyJobConfig(object): + """Configuration options for copy jobs. - Values which are ``None`` -> server defaults. + All properties in this class are optional. Values which are ``None`` -> + server defaults. """ - _create_disposition = None - _write_disposition = None + + def __init__(self): + self._properties = {} + + create_disposition = CreateDisposition('create_disposition', + 'createDisposition') + """See + https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.copy.createDisposition + """ + + write_disposition = WriteDisposition('write_disposition', + 'writeDisposition') + """See + https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.copy.writeDisposition + """ + + def to_api_repr(self): + """Build an API representation of the copy job config. + + :rtype: dict + :returns: A dictionary in the format used by the BigQuery API. + """ + return copy.deepcopy(self._properties) + + @classmethod + def from_api_repr(cls, resource): + """Factory: construct a job configuration given its API representation + + :type resource: dict + :param resource: + An extract job configuration in the same representation as is + returned from the API. + + :rtype: :class:`google.cloud.bigquery.job.ExtractJobConfig` + :returns: Configuration parsed from ``resource``. + """ + config = cls() + config._properties = copy.deepcopy(resource) + return config class CopyJob(_AsyncJob): @@ -868,41 +908,45 @@ class CopyJob(_AsyncJob): :type job_id: str :param job_id: the job's ID, within the project belonging to ``client``. - :type destination: :class:`google.cloud.bigquery.table.Table` - :param destination: Table into which data is to be loaded. - - :type sources: list of :class:`google.cloud.bigquery.table.Table` + :type sources: list of :class:`google.cloud.bigquery.table.TableReference` :param sources: Table into which data is to be loaded. + :type destination: :class:`google.cloud.bigquery.table.TableReference` + :param destination: Table into which data is to be loaded. + :type client: :class:`google.cloud.bigquery.client.Client` :param client: A client which holds credentials and project configuration for the dataset (which requires a project). - """ + :type job_config: :class:`~google.cloud.bigquery.job.CopyJobConfig` + :param job_config: + (Optional) Extra configuration options for the copy job. + """ _JOB_TYPE = 'copy' - def __init__(self, job_id, destination, sources, client): + def __init__(self, job_id, sources, destination, client, job_config=None): super(CopyJob, self).__init__(job_id, client) + + if job_config is None: + job_config = CopyJobConfig() + self.destination = destination self.sources = sources - self._configuration = _CopyConfiguration() - - create_disposition = CreateDisposition('create_disposition') - """See - https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.copy.createDisposition - """ + self._configuration = job_config - write_disposition = WriteDisposition('write_disposition') - """See - https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.copy.writeDisposition - """ + @property + def create_disposition(self): + """See + :class:`~google.cloud.bigquery.job.CopyJobConfig.create_disposition`. + """ + return self._configuration.create_disposition - def _populate_config_resource(self, configuration): - """Helper for _build_resource: copy config properties to resource""" - if self.create_disposition is not None: - configuration['createDisposition'] = self.create_disposition - if self.write_disposition is not None: - configuration['writeDisposition'] = self.write_disposition + @property + def write_disposition(self): + """See + :class:`~google.cloud.bigquery.job.CopyJobConfig.write_disposition`. + """ + return self._configuration.write_disposition def _build_resource(self): """Generate a resource for :meth:`begin`.""" @@ -913,31 +957,27 @@ def _build_resource(self): 'tableId': table.table_id, } for table in self.sources] - resource = { + configuration = self._configuration.to_api_repr() + configuration['sourceTables'] = source_refs + configuration['destinationTable'] = { + 'projectId': self.destination.project, + 'datasetId': self.destination.dataset_id, + 'tableId': self.destination.table_id, + } + + return { 'jobReference': { 'projectId': self.project, 'jobId': self.job_id, }, 'configuration': { - self._JOB_TYPE: { - 'sourceTables': source_refs, - 'destinationTable': { - 'projectId': self.destination.project, - 'datasetId': self.destination.dataset_id, - 'tableId': self.destination.table_id, - }, - }, + self._JOB_TYPE: configuration, }, } - configuration = resource['configuration'][self._JOB_TYPE] - self._populate_config_resource(configuration) - - return resource def _copy_configuration_properties(self, configuration): """Helper: assign subclass configuration properties in cleaned.""" - self.create_disposition = configuration.get('createDisposition') - self.write_disposition = configuration.get('writeDisposition') + self._configuration._properties = copy.deepcopy(configuration) @classmethod def from_api_repr(cls, resource, client): @@ -958,27 +998,23 @@ def from_api_repr(cls, resource, client): :rtype: :class:`google.cloud.bigquery.job.CopyJob` :returns: Job parsed from ``resource``. """ - job_id, config = cls._get_resource_config(resource) - dest_config = config['destinationTable'] - ds_ref = DatasetReference(dest_config['projectId'], - dest_config['datasetId'],) - dataset = Dataset(ds_ref) - table_ref = TableReference(dataset, dest_config['tableId']) - destination = Table(table_ref, client=client) + job_id, config_resource = cls._get_resource_config(resource) + config = CopyJobConfig.from_api_repr(config_resource) + destination = TableReference.from_api_repr( + config_resource['destinationTable']) sources = [] - source_configs = config.get('sourceTables') + source_configs = config_resource.get('sourceTables') if source_configs is None: - single = config.get('sourceTable') + single = config_resource.get('sourceTable') if single is None: raise KeyError( "Resource missing 'sourceTables' / 'sourceTable'") source_configs = [single] for source_config in source_configs: - ds_ref = DatasetReference(source_config['projectId'], - source_config['datasetId']) - table_ref = ds_ref.table(source_config['tableId']) - sources.append(Table(table_ref, client=client)) - job = cls(job_id, destination, sources, client=client) + table_ref = TableReference.from_api_repr(source_config) + sources.append(table_ref) + job = cls( + job_id, sources, destination, client=client, job_config=config) job._set_properties(resource) return job @@ -1017,7 +1053,7 @@ def __init__(self): """ def to_api_repr(self): - """Build an API representation of the extact job config. + """Build an API representation of the extract job config. :rtype: dict :returns: A dictionary in the format used by the BigQuery API. @@ -1243,7 +1279,8 @@ def __init__(self, job_id, query, client, https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query.allowLargeResults """ - create_disposition = CreateDisposition('create_disposition') + create_disposition = CreateDisposition('create_disposition', + 'createDisposition') """See https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query.createDisposition """ @@ -1289,7 +1326,8 @@ def __init__(self, job_id, query, client, reference/rest/v2/jobs#configuration.dryRun """ - write_disposition = WriteDisposition('write_disposition') + write_disposition = WriteDisposition('write_disposition', + 'writeDisposition') """See https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query.writeDisposition """ diff --git a/packages/google-cloud-bigquery/tests/system.py b/packages/google-cloud-bigquery/tests/system.py index a50ae06ee5d8..1f0e917d34fd 100644 --- a/packages/google-cloud-bigquery/tests/system.py +++ b/packages/google-cloud-bigquery/tests/system.py @@ -647,6 +647,38 @@ def test_extract_table_w_job_config(self): got = destination.download_as_string().decode('utf-8') self.assertIn('"Bharney Rhubble"', got) + def test_copy_table(self): + dataset = self.temp_dataset(_make_dataset_id('copy_table')) + schema = ( + bigquery.SchemaField('full_name', 'STRING', mode='REQUIRED'), + bigquery.SchemaField('age', 'INTEGER', mode='REQUIRED'), + ) + source_ref = dataset.table('source_table') + source_arg = Table(source_ref, schema=schema, client=Config.CLIENT) + source_table = retry_403(Config.CLIENT.create_table)(source_arg) + self.to_delete.insert(0, source_table) + rows = [ + ('Phred Phlyntstone', 32), + ('Bharney Rhubble', 33), + ('Wylma Phlyntstone', 29), + ('Bhettye Rhubble', 27), + ] + errors = source_table.insert_data(rows) + self.assertEqual(len(errors), 0) + + destination_ref = dataset.table('destination_table') + job_config = bigquery.CopyJobConfig() + job = Config.CLIENT.copy_table( + source_ref, destination_ref, job_config=job_config) + job.result() + + destination_table = Config.CLIENT.get_table(destination_ref) + self.to_delete.insert(0, destination_table) + got_rows = self._fetch_single_page(destination_table) + by_age = operator.itemgetter(1) + self.assertEqual(sorted(got_rows, key=by_age), + sorted(rows, key=by_age)) + def test_job_cancel(self): DATASET_ID = _make_dataset_id('job_cancel') JOB_NAME = 'fetch_' + DATASET_ID diff --git a/packages/google-cloud-bigquery/tests/unit/test_client.py b/packages/google-cloud-bigquery/tests/unit/test_client.py index 9cfa61234fcc..9f9354cdd2cb 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_client.py +++ b/packages/google-cloud-bigquery/tests/unit/test_client.py @@ -1113,19 +1113,57 @@ def test_copy_table(self): DATASET = 'dataset_name' SOURCE = 'source_table' DESTINATION = 'destination_table' + RESOURCE = { + 'jobReference': { + 'projectId': PROJECT, + 'jobId': JOB, + }, + 'configuration': { + 'copy': { + 'sourceTable': { + 'projectId': PROJECT, + 'datasetId': DATASET, + 'tableId': SOURCE, + }, + 'destinationTable': { + 'projectId': PROJECT, + 'datasetId': DATASET, + 'tableId': DESTINATION, + }, + }, + }, + } creds = _make_credentials() http = object() client = self._make_one(project=PROJECT, credentials=creds, _http=http) + conn = client._connection = _Connection(RESOURCE) dataset = client.dataset(DATASET) source = dataset.table(SOURCE) destination = dataset.table(DESTINATION) - job = client.copy_table(JOB, destination, source) + + job = client.copy_table(source, destination, job_id=JOB) + + # Check that copy_table actually starts the job. + self.assertEqual(len(conn._requested), 1) + req = conn._requested[0] + self.assertEqual(req['method'], 'POST') + self.assertEqual(req['path'], '/projects/%s/jobs' % PROJECT) + self.assertIsInstance(job, CopyJob) self.assertIs(job._client, client) self.assertEqual(job.job_id, JOB) self.assertEqual(list(job.sources), [source]) self.assertIs(job.destination, destination) + conn = client._connection = _Connection(RESOURCE) + source2 = dataset.table(SOURCE + '2') + job = client.copy_table([source, source2], destination, job_id=JOB) + self.assertIsInstance(job, CopyJob) + self.assertIs(job._client, client) + self.assertEqual(job.job_id, JOB) + self.assertEqual(list(job.sources), [source, source2]) + self.assertIs(job.destination, destination) + def test_extract_table(self): from google.cloud.bigquery.job import ExtractJob diff --git a/packages/google-cloud-bigquery/tests/unit/test_job.py b/packages/google-cloud-bigquery/tests/unit/test_job.py index ca348704127c..d0a654c0c15d 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_job.py +++ b/packages/google-cloud-bigquery/tests/unit/test_job.py @@ -17,7 +17,7 @@ from six.moves import http_client import unittest -from google.cloud.bigquery.job import ExtractJobConfig +from google.cloud.bigquery.job import ExtractJobConfig, CopyJobConfig from google.cloud.bigquery.dataset import DatasetReference @@ -83,9 +83,12 @@ def test_missing_reason(self): class _Base(object): + from google.cloud.bigquery.dataset import DatasetReference + PROJECT = 'project' SOURCE1 = 'http://example.com/source1.csv' DS_ID = 'datset_id' + DS_REF = DatasetReference(PROJECT, DS_ID) TABLE_ID = 'table_id' JOB_NAME = 'job_name' @@ -104,6 +107,11 @@ def _setUpConstants(self): self.RESOURCE_URL = 'http://example.com/path/to/resource' self.USER_EMAIL = 'phred@example.com' + def _table_ref(self, table_id): + from google.cloud.bigquery.table import TableReference + + return TableReference(self.DS_REF, table_id) + def _makeResource(self, started=False, ended=False): self._setUpConstants() resource = { @@ -895,9 +903,9 @@ def _verifyResourceProperties(self, job, resource): def test_ctor(self): client = _Client(self.PROJECT) - source = _Table(self.SOURCE_TABLE) - destination = _Table(self.DESTINATION_TABLE) - job = self._make_one(self.JOB_NAME, destination, [source], client) + source = self._table_ref(self.SOURCE_TABLE) + destination = self._table_ref(self.DESTINATION_TABLE) + job = self._make_one(self.JOB_NAME, [source], destination, client) self.assertIs(job.destination, destination) self.assertEqual(job.sources, [source]) self.assertIs(job._client, client) @@ -1035,9 +1043,9 @@ def test_begin_w_bound_client(self): del RESOURCE['user_email'] conn = _Connection(RESOURCE) client = _Client(project=self.PROJECT, connection=conn) - source = _Table(self.SOURCE_TABLE) - destination = _Table(self.DESTINATION_TABLE) - job = self._make_one(self.JOB_NAME, destination, [source], client) + source = self._table_ref(self.SOURCE_TABLE) + destination = self._table_ref(self.DESTINATION_TABLE) + job = self._make_one(self.JOB_NAME, [source], destination, client) job.begin() @@ -1090,13 +1098,13 @@ def test_begin_w_alternate_client(self): client1 = _Client(project=self.PROJECT, connection=conn1) conn2 = _Connection(RESOURCE) client2 = _Client(project=self.PROJECT, connection=conn2) - source = _Table(self.SOURCE_TABLE) - destination = _Table(self.DESTINATION_TABLE) - job = self._make_one(self.JOB_NAME, destination, [source], client1) - - job.create_disposition = 'CREATE_NEVER' - job.write_disposition = 'WRITE_TRUNCATE' - + source = self._table_ref(self.SOURCE_TABLE) + destination = self._table_ref(self.DESTINATION_TABLE) + config = CopyJobConfig() + config.create_disposition = 'CREATE_NEVER' + config.write_disposition = 'WRITE_TRUNCATE' + job = self._make_one(self.JOB_NAME, [source], destination, client1, + config) job.begin(client=client2) self.assertEqual(len(conn1._requested), 0) @@ -1120,9 +1128,10 @@ def test_exists_miss_w_bound_client(self): PATH = '/projects/%s/jobs/%s' % (self.PROJECT, self.JOB_NAME) conn = _Connection() client = _Client(project=self.PROJECT, connection=conn) - source = _Table(self.SOURCE_TABLE) - destination = _Table(self.DESTINATION_TABLE) - job = self._make_one(self.JOB_NAME, destination, [source], client) + + source = self._table_ref(self.SOURCE_TABLE) + destination = self._table_ref(self.DESTINATION_TABLE) + job = self._make_one(self.JOB_NAME, [source], destination, client) self.assertFalse(job.exists()) @@ -1138,9 +1147,9 @@ def test_exists_hit_w_alternate_client(self): client1 = _Client(project=self.PROJECT, connection=conn1) conn2 = _Connection({}) client2 = _Client(project=self.PROJECT, connection=conn2) - source = _Table(self.SOURCE_TABLE) - destination = _Table(self.DESTINATION_TABLE) - job = self._make_one(self.JOB_NAME, destination, [source], client1) + source = self._table_ref(self.SOURCE_TABLE) + destination = self._table_ref(self.DESTINATION_TABLE) + job = self._make_one(self.JOB_NAME, [source], destination, client1) self.assertTrue(job.exists(client=client2)) @@ -1156,9 +1165,9 @@ def test_reload_w_bound_client(self): RESOURCE = self._makeResource() conn = _Connection(RESOURCE) client = _Client(project=self.PROJECT, connection=conn) - source = _Table(self.SOURCE_TABLE) - destination = _Table(self.DESTINATION_TABLE) - job = self._make_one(self.JOB_NAME, destination, [source], client) + source = self._table_ref(self.SOURCE_TABLE) + destination = self._table_ref(self.DESTINATION_TABLE) + job = self._make_one(self.JOB_NAME, [source], destination, client) job.reload() @@ -1175,9 +1184,9 @@ def test_reload_w_alternate_client(self): client1 = _Client(project=self.PROJECT, connection=conn1) conn2 = _Connection(RESOURCE) client2 = _Client(project=self.PROJECT, connection=conn2) - source = _Table(self.SOURCE_TABLE) - destination = _Table(self.DESTINATION_TABLE) - job = self._make_one(self.JOB_NAME, destination, [source], client1) + source = self._table_ref(self.SOURCE_TABLE) + destination = self._table_ref(self.DESTINATION_TABLE) + job = self._make_one(self.JOB_NAME, [source], destination, client1) job.reload(client=client2) @@ -2709,8 +2718,6 @@ def __init__(self, table_id=None): @property def table_id(self): - if self._table_id is not None: - return self._table_id return TestLoadJob.TABLE_ID @property From 4f582d87480000fd04beeb70db062d973871c7b3 Mon Sep 17 00:00:00 2001 From: Jonathan Amsterdam Date: Tue, 26 Sep 2017 12:18:14 -0400 Subject: [PATCH 0215/2016] bigquery: fix copy job system test (#4059) --- .../google-cloud-bigquery/tests/system.py | 41 +++++++------------ 1 file changed, 15 insertions(+), 26 deletions(-) diff --git a/packages/google-cloud-bigquery/tests/system.py b/packages/google-cloud-bigquery/tests/system.py index 1f0e917d34fd..478ac065baa0 100644 --- a/packages/google-cloud-bigquery/tests/system.py +++ b/packages/google-cloud-bigquery/tests/system.py @@ -648,36 +648,25 @@ def test_extract_table_w_job_config(self): self.assertIn('"Bharney Rhubble"', got) def test_copy_table(self): - dataset = self.temp_dataset(_make_dataset_id('copy_table')) - schema = ( - bigquery.SchemaField('full_name', 'STRING', mode='REQUIRED'), - bigquery.SchemaField('age', 'INTEGER', mode='REQUIRED'), - ) - source_ref = dataset.table('source_table') - source_arg = Table(source_ref, schema=schema, client=Config.CLIENT) - source_table = retry_403(Config.CLIENT.create_table)(source_arg) - self.to_delete.insert(0, source_table) - rows = [ - ('Phred Phlyntstone', 32), - ('Bharney Rhubble', 33), - ('Wylma Phlyntstone', 29), - ('Bhettye Rhubble', 27), - ] - errors = source_table.insert_data(rows) - self.assertEqual(len(errors), 0) - - destination_ref = dataset.table('destination_table') + # If we create a new table to copy from, the test won't work + # because the new rows will be stored in the streaming buffer, + # and copy jobs don't read the streaming buffer. + # We could wait for the streaming buffer to empty, but that could + # take minutes. Instead we copy a small public table. + source_dataset = DatasetReference('bigquery-public-data', 'samples') + source_ref = source_dataset.table('shakespeare') + dest_dataset = self.temp_dataset(_make_dataset_id('copy_table')) + dest_ref = dest_dataset.table('destination_table') job_config = bigquery.CopyJobConfig() job = Config.CLIENT.copy_table( - source_ref, destination_ref, job_config=job_config) + source_ref, dest_ref, job_config=job_config) job.result() - destination_table = Config.CLIENT.get_table(destination_ref) - self.to_delete.insert(0, destination_table) - got_rows = self._fetch_single_page(destination_table) - by_age = operator.itemgetter(1) - self.assertEqual(sorted(got_rows, key=by_age), - sorted(rows, key=by_age)) + dest_table = Config.CLIENT.get_table(dest_ref) + self.to_delete.insert(0, dest_table) + # Just check that we got some rows. + got_rows = self._fetch_single_page(dest_table) + self.assertTrue(len(got_rows) > 0) def test_job_cancel(self): DATASET_ID = _make_dataset_id('job_cancel') From a9d4b180a6ab7b96c992f60cfc1d86f01a7968e2 Mon Sep 17 00:00:00 2001 From: Alix Hamilton Date: Wed, 27 Sep 2017 11:32:46 -0700 Subject: [PATCH 0216/2016] BigQuery: replaces table.delete() with client.delete_table() (#4066) --- .../google/cloud/bigquery/client.py | 17 +++++++++ .../google/cloud/bigquery/table.py | 14 -------- .../google-cloud-bigquery/tests/system.py | 2 ++ .../tests/unit/test_client.py | 28 +++++++++++++++ .../tests/unit/test_table.py | 35 ------------------- 5 files changed, 47 insertions(+), 49 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py index 972f00b317f8..bae5613b629a 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py @@ -25,6 +25,7 @@ from google.cloud.bigquery.dataset import Dataset from google.cloud.bigquery.dataset import DatasetReference from google.cloud.bigquery.table import Table +from google.cloud.bigquery.table import TableReference from google.cloud.bigquery.job import CopyJob from google.cloud.bigquery.job import ExtractJob from google.cloud.bigquery.job import LoadJob @@ -333,6 +334,22 @@ def delete_dataset(self, dataset): raise TypeError('dataset must be a Dataset or a DatasetReference') self._connection.api_request(method='DELETE', path=dataset.path) + def delete_table(self, table): + """Delete a table + + See + https://cloud.google.com/bigquery/docs/reference/rest/v2/tables/delete + + :type table: One of: + :class:`~google.cloud.bigquery.table.Table` + :class:`~google.cloud.bigquery.table.TableReference` + + :param table: the table to delete, or a reference to it. + """ + if not isinstance(table, (Table, TableReference)): + raise TypeError('table must be a Table or a TableReference') + self._connection.api_request(method='DELETE', path=table.path) + def _get_query_results(self, job_id, project=None, timeout_ms=None): """Get the query results object for a query job. diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py index 03214e52a7be..a4f1933303a0 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py @@ -768,20 +768,6 @@ def update(self, client=None): method='PUT', path=self.path, data=self._build_resource()) self._set_properties(api_response) - def delete(self, client=None): - """API call: delete the table via a DELETE request - - See - https://cloud.google.com/bigquery/docs/reference/rest/v2/tables/delete - - :type client: :class:`~google.cloud.bigquery.client.Client` or - ``NoneType`` - :param client: the client to use. If not passed, falls back to the - ``client`` stored on the current dataset. - """ - client = self._require_client(client) - client._connection.api_request(method='DELETE', path=self.path) - def fetch_data(self, max_results=None, page_token=None, client=None): """API call: fetch the table data via a GET request diff --git a/packages/google-cloud-bigquery/tests/system.py b/packages/google-cloud-bigquery/tests/system.py index 478ac065baa0..2785f21cb2a3 100644 --- a/packages/google-cloud-bigquery/tests/system.py +++ b/packages/google-cloud-bigquery/tests/system.py @@ -108,6 +108,8 @@ def _still_in_use(bad_request): retry_409(doomed.delete)(force=True) elif isinstance(doomed, Dataset): retry_in_use(Config.CLIENT.delete_dataset)(doomed) + elif isinstance(doomed, Table): + retry_in_use(Config.CLIENT.delete_table)(doomed) else: doomed.delete() diff --git a/packages/google-cloud-bigquery/tests/unit/test_client.py b/packages/google-cloud-bigquery/tests/unit/test_client.py index 9f9354cdd2cb..bdab1d36c2cb 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_client.py +++ b/packages/google-cloud-bigquery/tests/unit/test_client.py @@ -794,6 +794,34 @@ def test_delete_dataset_wrong_type(self): with self.assertRaises(TypeError): client.delete_dataset(client.dataset(DS_ID).table("foo")) + def test_delete_table(self): + from google.cloud.bigquery.table import Table + + project = 'PROJECT' + dataset_id = 'dataset_id' + table_id = 'table-id' + path = 'projects/%s/datasets/%s/tables/%s' % ( + project, dataset_id, table_id) + creds = _make_credentials() + http = object() + client = self._make_one(project=project, credentials=creds, _http=http) + conn = client._connection = _Connection({}, {}) + table_ref = client.dataset(dataset_id).table(table_id) + + for arg in (table_ref, Table(table_ref)): + client.delete_table(arg) + req = conn._requested[0] + self.assertEqual(req['method'], 'DELETE') + self.assertEqual(req['path'], '/%s' % path) + + def test_delete_table_w_wrong_type(self): + project = 'PROJECT' + dataset_id = 'DATASET_ID' + creds = _make_credentials() + client = self._make_one(project=project, credentials=creds) + with self.assertRaises(TypeError): + client.delete_table(client.dataset(dataset_id)) + def test_job_from_resource_unknown_type(self): PROJECT = 'PROJECT' creds = _make_credentials() diff --git a/packages/google-cloud-bigquery/tests/unit/test_table.py b/packages/google-cloud-bigquery/tests/unit/test_table.py index 22dce9012188..c86c21880bda 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_table.py +++ b/packages/google-cloud-bigquery/tests/unit/test_table.py @@ -945,41 +945,6 @@ def test_update_w_alternate_client(self): self.assertEqual(req['data'], SENT) self._verifyResourceProperties(table, RESOURCE) - def test_delete_w_bound_client(self): - PATH = 'projects/%s/datasets/%s/tables/%s' % ( - self.PROJECT, self.DS_ID, self.TABLE_NAME) - conn = _Connection({}) - client = _Client(project=self.PROJECT, connection=conn) - dataset = DatasetReference(self.PROJECT, self.DS_ID) - table_ref = dataset.table(self.TABLE_NAME) - table = self._make_one(table_ref, client=client) - - table.delete() - - self.assertEqual(len(conn._requested), 1) - req = conn._requested[0] - self.assertEqual(req['method'], 'DELETE') - self.assertEqual(req['path'], '/%s' % PATH) - - def test_delete_w_alternate_client(self): - PATH = 'projects/%s/datasets/%s/tables/%s' % ( - self.PROJECT, self.DS_ID, self.TABLE_NAME) - conn1 = _Connection() - client1 = _Client(project=self.PROJECT, connection=conn1) - conn2 = _Connection({}) - client2 = _Client(project=self.PROJECT, connection=conn2) - dataset = DatasetReference(self.PROJECT, self.DS_ID) - table_ref = dataset.table(self.TABLE_NAME) - table = self._make_one(table_ref, client=client1) - - table.delete(client=client2) - - self.assertEqual(len(conn1._requested), 0) - self.assertEqual(len(conn2._requested), 1) - req = conn2._requested[0] - self.assertEqual(req['method'], 'DELETE') - self.assertEqual(req['path'], '/%s' % PATH) - def test_fetch_data_wo_schema(self): from google.cloud.bigquery.table import _TABLE_HAS_NO_SCHEMA From c8691f48dc663c69ab0dbf871a98b7a6388a085b Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Wed, 27 Sep 2017 16:29:59 -0700 Subject: [PATCH 0217/2016] BigQuery: add client.query_rows(), remove client.run_sync_query(). (#4065) * BigQuery: add client.query_rows(), remove client.run_sync_query(). The query_rows() method will be the new way to run a query synchronously. It starts a query job, then waits for the results, returning the rows as results. --- .../google/cloud/bigquery/__init__.py | 2 + .../google/cloud/bigquery/client.py | 50 +++-- .../google/cloud/bigquery/job.py | 163 +++++++++++----- .../google/cloud/bigquery/query.py | 1 + .../google-cloud-bigquery/tests/system.py | 67 ++++--- .../tests/unit/test_client.py | 184 ++++++++++++++---- 6 files changed, 337 insertions(+), 130 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/__init__.py b/packages/google-cloud-bigquery/google/cloud/bigquery/__init__.py index ec92e7c40128..7bbcc7782ee2 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/__init__.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/__init__.py @@ -34,6 +34,7 @@ from google.cloud.bigquery.dataset import Dataset from google.cloud.bigquery.job import CopyJobConfig from google.cloud.bigquery.job import ExtractJobConfig +from google.cloud.bigquery.job import QueryJobConfig from google.cloud.bigquery.schema import SchemaField from google.cloud.bigquery.table import Table @@ -45,6 +46,7 @@ 'Dataset', 'CopyJobConfig', 'ExtractJobConfig', + 'QueryJobConfig', 'ScalarQueryParameter', 'SchemaField', 'StructQueryParameter', diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py index bae5613b629a..7ceed4fc1e41 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py @@ -30,6 +30,7 @@ from google.cloud.bigquery.job import ExtractJob from google.cloud.bigquery.job import LoadJob from google.cloud.bigquery.job import QueryJob +from google.cloud.bigquery.job import QueryJobConfig from google.cloud.bigquery.query import QueryResults @@ -612,29 +613,46 @@ def run_async_query(self, job_id, query, udf_resources=udf_resources, query_parameters=query_parameters) - def run_sync_query(self, query, udf_resources=(), query_parameters=()): - """Run a SQL query synchronously. + def query_rows(self, query, job_config=None, job_id=None, timeout=None): + """Start a query job and wait for the results. + + See + https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query :type query: str :param query: SQL query to be executed - :type udf_resources: tuple - :param udf_resources: An iterable of - :class:`google.cloud.bigquery._helpers.UDFResource` - (empty by default) + :type job_id: str + :param job_id: (Optional) ID to use for the query job. - :type query_parameters: tuple - :param query_parameters: - An iterable of - :class:`google.cloud.bigquery._helpers.AbstractQueryParameter` - (empty by default) + :type timeout: int + :param timeout: + (Optional) How long to wait for job to complete before raising a + :class:`TimeoutError`. - :rtype: :class:`google.cloud.bigquery.query.QueryResults` - :returns: a new ``QueryResults`` instance + :rtype: :class:`~google.api.core.page_iterator.Iterator` + :returns: + Iterator of row data :class:`tuple`s. During each page, the + iterator will have the ``total_rows`` attribute set, which counts + the total number of rows **in the result set** (this is distinct + from the total number of rows in the current page: + ``iterator.page.num_items``). + + :raises: :class:`~google.cloud.exceptions.GoogleCloudError` if the job + failed or :class:`TimeoutError` if the job did not complete in the + given timeout. """ - return QueryResults(query, client=self, - udf_resources=udf_resources, - query_parameters=query_parameters) + job_id = _make_job_id(job_id) + + # TODO(swast): move standard SQL default to QueryJobConfig class. + if job_config is None: + job_config = QueryJobConfig() + if job_config.use_legacy_sql is None: + job_config.use_legacy_sql = False + + job = QueryJob(job_id, query, client=self, job_config=job_config) + job.begin() + return job.result(timeout=timeout) # pylint: disable=unused-argument diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/job.py b/packages/google-cloud-bigquery/google/cloud/bigquery/job.py index 11b8beee2b7b..812dde4b32a3 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/job.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/job.py @@ -1218,11 +1218,52 @@ def from_api_repr(cls, resource, client): return job -class _AsyncQueryConfiguration(object): - """User-settable configuration options for asynchronous query jobs. +class QueryJobConfig(object): + """Configuration options for query jobs. - Values which are ``None`` -> server defaults. + All properties in this class are optional. Values which are ``None`` -> + server defaults. """ + + def __init__(self): + self._properties = {} + + def to_api_repr(self): + """Build an API representation of the copy job config. + + :rtype: dict + :returns: A dictionary in the format used by the BigQuery API. + """ + return copy.deepcopy(self._properties) + + @classmethod + def from_api_repr(cls, resource): + """Factory: construct a job configuration given its API representation + + :type resource: dict + :param resource: + An extract job configuration in the same representation as is + returned from the API. + + :rtype: :class:`google.cloud.bigquery.job.ExtractJobConfig` + :returns: Configuration parsed from ``resource``. + """ + config = cls() + config._properties = copy.deepcopy(resource) + return config + + use_legacy_sql = _TypedApiResourceProperty( + 'use_legacy_sql', 'useLegacySql', bool) + """See + https://cloud.google.com/bigquery/docs/\ + reference/v2/jobs#configuration.query.useLegacySql + """ + + dry_run = _TypedApiResourceProperty('dry_run', 'dryRun', bool) + """See + https://g.co/cloud/bigquery/docs/reference/v2/jobs#configuration.dryRun + """ + _allow_large_results = None _create_disposition = None _default_dataset = None @@ -1231,7 +1272,6 @@ class _AsyncQueryConfiguration(object): _priority = None _use_query_cache = None _use_legacy_sql = None - _dry_run = None _write_disposition = None _maximum_billing_tier = None _maximum_bytes_billed = None @@ -1260,20 +1300,60 @@ class QueryJob(_AsyncJob): An iterable of :class:`google.cloud.bigquery._helpers.AbstractQueryParameter` (empty by default) + + :type job_config: :class:`~google.cloud.bigquery.job.QueryJobConfig` + :param job_config: + (Optional) Extra configuration options for the query job. """ _JOB_TYPE = 'query' _UDF_KEY = 'userDefinedFunctionResources' _QUERY_PARAMETERS_KEY = 'queryParameters' def __init__(self, job_id, query, client, - udf_resources=(), query_parameters=()): + udf_resources=(), query_parameters=(), job_config=None): super(QueryJob, self).__init__(job_id, client) + + if job_config is None: + job_config = QueryJobConfig() + self.query = query self.udf_resources = udf_resources self.query_parameters = query_parameters - self._configuration = _AsyncQueryConfiguration() + self._configuration = job_config self._query_results = None + @property + def use_legacy_sql(self): + """See + :class:`~google.cloud.bigquery.job.QueryJobConfig.use_legacy_sql`. + """ + return self._configuration.use_legacy_sql + + @use_legacy_sql.setter + def use_legacy_sql(self, value): + """See + :class:`~google.cloud.bigquery.job.QueryJobConfig.use_legacy_sql`. + """ + # TODO(swast): remove this method and only allow setting use_legacy_sql + # on QueryJobConfig objects. + self._configuration.use_legacy_sql = value + + @property + def dry_run(self): + """See + :class:`~google.cloud.bigquery.job.QueryJobConfig.dry_run`. + """ + return self._configuration.dry_run + + @dry_run.setter + def dry_run(self, value): + """See + :class:`~google.cloud.bigquery.job.QueryJobConfig.dry_run`. + """ + # TODO(swast): remove this method and only allow setting dry_run + # on QueryJobConfig objects. + self._configuration.dry_run = value + allow_large_results = _TypedProperty('allow_large_results', bool) """See https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query.allowLargeResults @@ -1314,20 +1394,8 @@ def __init__(self, job_id, query, client, https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query.useQueryCache """ - use_legacy_sql = _TypedProperty('use_legacy_sql', bool) - """See - https://cloud.google.com/bigquery/docs/\ - reference/v2/jobs#configuration.query.useLegacySql - """ - - dry_run = _TypedProperty('dry_run', bool) - """See - https://cloud.google.com/bigquery/docs/\ - reference/rest/v2/jobs#configuration.dryRun - """ - - write_disposition = WriteDisposition('write_disposition', - 'writeDisposition') + write_disposition = WriteDisposition( + 'write_disposition', 'writeDisposition') """See https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query.writeDisposition """ @@ -1363,8 +1431,6 @@ def _populate_config_resource_booleans(self, configuration): configuration['flattenResults'] = self.flatten_results if self.use_query_cache is not None: configuration['useQueryCache'] = self.use_query_cache - if self.use_legacy_sql is not None: - configuration['useLegacySql'] = self.use_legacy_sql def _populate_config_resource(self, configuration): """Helper for _build_resource: copy config properties to resource""" @@ -1377,8 +1443,8 @@ def _populate_config_resource(self, configuration): 'projectId': self.default_dataset.project, 'datasetId': self.default_dataset.dataset_id, } - if self.destination is not None: - table_res = self._destination_table_resource() + table_res = self._destination_table_resource() + if table_res is not None: configuration['destinationTable'] = table_res if self.priority is not None: configuration['priority'] = self.priority @@ -1406,6 +1472,7 @@ def _populate_config_resource(self, configuration): def _build_resource(self): """Generate a resource for :meth:`begin`.""" + configuration = self._configuration.to_api_repr() resource = { 'jobReference': { @@ -1413,16 +1480,18 @@ def _build_resource(self): 'jobId': self.job_id, }, 'configuration': { - self._JOB_TYPE: { - 'query': self.query, - }, + self._JOB_TYPE: configuration, }, } - if self.dry_run is not None: - resource['configuration']['dryRun'] = self.dry_run + # The dryRun property only applies to query jobs, but it is defined at + # a level higher up. We need to remove it from the query config. + if 'dryRun' in configuration: + dry_run = configuration['dryRun'] + del configuration['dryRun'] + resource['configuration']['dryRun'] = dry_run - configuration = resource['configuration'][self._JOB_TYPE] + configuration['query'] = self.query self._populate_config_resource(configuration) return resource @@ -1436,19 +1505,28 @@ def _scrub_local_properties(self, cleaned): the client's project. """ configuration = cleaned['configuration']['query'] - self.query = configuration['query'] + # The dryRun property only applies to query jobs, but it is defined at + # a level higher up. We need to copy it to the query config. + self._configuration.dry_run = cleaned['configuration'].get('dryRun') + def _copy_configuration_properties(self, configuration): """Helper: assign subclass configuration properties in cleaned.""" + # The dryRun property only applies to query jobs, but it is defined at + # a level higher up. We need to copy it to the query config. + # It should already be correctly set by the _scrub_local_properties() + # method. + dry_run = self.dry_run + self._configuration = QueryJobConfig.from_api_repr(configuration) + self._configuration.dry_run = dry_run + self.allow_large_results = _bool_or_none( configuration.get('allowLargeResults')) self.flatten_results = _bool_or_none( configuration.get('flattenResults')) self.use_query_cache = _bool_or_none( configuration.get('useQueryCache')) - self.use_legacy_sql = _bool_or_none( - configuration.get('useLegacySql')) self.create_disposition = configuration.get('createDisposition') self.priority = configuration.get('priority') @@ -1459,22 +1537,13 @@ def _copy_configuration_properties(self, configuration): dest_remote = configuration.get('destinationTable') - if dest_remote is None: - if self.destination is not None: - del self.destination - else: - dest_local = self._destination_table_resource() - if dest_remote != dest_local: - project = dest_remote['projectId'] - dataset = Dataset(DatasetReference(project, - dest_remote['datasetId'])) - self.destination = dataset.table(dest_remote['tableId']) + if dest_remote is not None: + dataset = DatasetReference( + dest_remote['projectId'], dest_remote['datasetId']) + self.destination = dataset.table(dest_remote['tableId']) def_ds = configuration.get('defaultDataset') - if def_ds is None: - if self.default_dataset is not None: - del self.default_dataset - else: + if def_ds is not None: self.default_dataset = DatasetReference( def_ds['projectId'], def_ds['datasetId']) udf_resources = [] diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/query.py b/packages/google-cloud-bigquery/google/cloud/bigquery/query.py index 38400659bdaf..57199556ed84 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/query.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/query.py @@ -455,6 +455,7 @@ def fetch_data(self, max_results=None, page_token=None, start_index=None, next_token='pageToken', extra_params=params) iterator.query_result = self + iterator.job = self.job return iterator diff --git a/packages/google-cloud-bigquery/tests/system.py b/packages/google-cloud-bigquery/tests/system.py index 2785f21cb2a3..2fd43f7951c4 100644 --- a/packages/google-cloud-bigquery/tests/system.py +++ b/packages/google-cloud-bigquery/tests/system.py @@ -699,7 +699,7 @@ def test_job_cancel(self): # raise an error, and that the job completed (in the `retry()` # above). - def test_sync_query_w_legacy_sql_types(self): + def test_query_rows_w_legacy_sql_types(self): naive = datetime.datetime(2016, 12, 5, 12, 41, 9) stamp = '%s %s' % (naive.date().isoformat(), naive.time().isoformat()) zoned = naive.replace(tzinfo=UTC) @@ -730,12 +730,13 @@ def test_sync_query_w_legacy_sql_types(self): }, ] for example in examples: - query = Config.CLIENT.run_sync_query(example['sql']) - query.use_legacy_sql = True - query.run() - self.assertEqual(len(query.rows), 1) - self.assertEqual(len(query.rows[0]), 1) - self.assertEqual(query.rows[0][0], example['expected']) + job_config = bigquery.QueryJobConfig() + job_config.use_legacy_sql = True + rows = list(Config.CLIENT.query_rows( + example['sql'], job_config=job_config)) + self.assertEqual(len(rows), 1) + self.assertEqual(len(rows[0]), 1) + self.assertEqual(rows[0][0], example['expected']) def _generate_standard_sql_types_examples(self): naive = datetime.datetime(2016, 12, 5, 12, 41, 9) @@ -831,15 +832,20 @@ def _generate_standard_sql_types_examples(self): }, ] - def test_sync_query_w_standard_sql_types(self): + def test_query_rows_w_standard_sql_types(self): examples = self._generate_standard_sql_types_examples() for example in examples: - query = Config.CLIENT.run_sync_query(example['sql']) - query.use_legacy_sql = False - query.run() - self.assertEqual(len(query.rows), 1) - self.assertEqual(len(query.rows[0]), 1) - self.assertEqual(query.rows[0][0], example['expected']) + rows = list(Config.CLIENT.query_rows(example['sql'])) + self.assertEqual(len(rows), 1) + self.assertEqual(len(rows[0]), 1) + self.assertEqual(rows[0][0], example['expected']) + + def test_query_rows_w_failed_query(self): + from google.api.core.exceptions import BadRequest + + with self.assertRaises(BadRequest): + Config.CLIENT.query_rows('invalid syntax;') + # TODO(swast): Ensure that job ID is surfaced in the exception. def test_dbapi_w_standard_sql_types(self): examples = self._generate_standard_sql_types_examples() @@ -892,7 +898,7 @@ def _load_table_for_dml(self, rows, dataset_id, table_id): job.result(timeout=JOB_TIMEOUT) self._fetch_single_page(table) - def test_sync_query_w_dml(self): + def test_query_w_dml(self): dataset_name = _make_dataset_id('dml_tests') table_name = 'test_table' self._load_table_for_dml([('Hello World',)], dataset_name, table_name) @@ -901,12 +907,14 @@ def test_sync_query_w_dml(self): WHERE greeting = 'Hello World' """ - query = Config.CLIENT.run_sync_query( + query_job = Config.CLIENT.run_async_query( + 'test_query_w_dml_{}'.format(unique_resource_id()), query_template.format(dataset_name, table_name)) - query.use_legacy_sql = False - query.run() + query_job.use_legacy_sql = False + query_job.begin() + query_job.result() - self.assertEqual(query.num_dml_affected_rows, 1) + self.assertEqual(query_job.num_dml_affected_rows, 1) def test_dbapi_w_dml(self): dataset_name = _make_dataset_id('dml_tests') @@ -923,7 +931,7 @@ def test_dbapi_w_dml(self): self.assertEqual(Config.CURSOR.rowcount, 1) self.assertIsNone(Config.CURSOR.fetchone()) - def test_sync_query_w_query_params(self): + def test_query_w_query_params(self): from google.cloud.bigquery._helpers import ArrayQueryParameter from google.cloud.bigquery._helpers import ScalarQueryParameter from google.cloud.bigquery._helpers import StructQueryParameter @@ -1084,14 +1092,16 @@ def test_sync_query_w_query_params(self): }, ] for example in examples: - query = Config.CLIENT.run_sync_query( + query_job = Config.CLIENT.run_async_query( + 'test_query_w_query_params{}'.format(unique_resource_id()), example['sql'], query_parameters=example['query_parameters']) - query.use_legacy_sql = False - query.run() - self.assertEqual(len(query.rows), 1) - self.assertEqual(len(query.rows[0]), 1) - self.assertEqual(query.rows[0][0], example['expected']) + query_job.use_legacy_sql = False + query_job.begin() + rows = [row for row in query_job.result()] + self.assertEqual(len(rows), 1) + self.assertEqual(len(rows[0]), 1) + self.assertEqual(rows[0][0], example['expected']) def test_dbapi_w_query_parameters(self): examples = [ @@ -1217,11 +1227,8 @@ def test_large_query_w_public_data(self): SQL = 'SELECT * from `{}.{}.{}` LIMIT {}'.format( PUBLIC, DATASET_ID, TABLE_NAME, LIMIT) - query = Config.CLIENT.run_sync_query(SQL) - query.use_legacy_sql = False - query.run() + iterator = Config.CLIENT.query_rows(SQL) - iterator = query.fetch_data(max_results=100) rows = list(iterator) self.assertEqual(len(rows), LIMIT) diff --git a/packages/google-cloud-bigquery/tests/unit/test_client.py b/packages/google-cloud-bigquery/tests/unit/test_client.py index bdab1d36c2cb..d1a6d1218ae8 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_client.py +++ b/packages/google-cloud-bigquery/tests/unit/test_client.py @@ -12,6 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. +import copy import unittest import mock @@ -1348,58 +1349,167 @@ def test_run_async_w_query_parameters(self): self.assertEqual(job.udf_resources, []) self.assertEqual(job.query_parameters, query_parameters) - def test_run_sync_query_defaults(self): - from google.cloud.bigquery.query import QueryResults + def test_query_rows_defaults(self): + from google.api.core.page_iterator import HTTPIterator + JOB = 'job-id' PROJECT = 'PROJECT' - QUERY = 'select count(*) from persons' + QUERY = 'SELECT COUNT(*) FROM persons' + RESOURCE = { + 'jobReference': { + 'projectId': PROJECT, + 'jobId': JOB, + }, + 'configuration': { + 'query': { + 'query': QUERY, + }, + }, + 'status': { + 'state': 'DONE', + }, + } + RESULTS_RESOURCE = { + 'jobReference': RESOURCE['jobReference'], + 'jobComplete': True, + 'schema': { + 'fields': [ + {'name': 'field0', 'type': 'INTEGER', 'mode': 'NULLABLE'}, + ] + }, + 'totalRows': '3', + 'pageToken': 'next-page', + } + FIRST_PAGE = copy.deepcopy(RESULTS_RESOURCE) + FIRST_PAGE['rows'] = [ + {'f': [{'v': '1'}]}, + {'f': [{'v': '2'}]}, + ] + LAST_PAGE = copy.deepcopy(RESULTS_RESOURCE) + LAST_PAGE['rows'] = [ + {'f': [{'v': '3'}]}, + ] + del LAST_PAGE['pageToken'] creds = _make_credentials() http = object() client = self._make_one(project=PROJECT, credentials=creds, _http=http) - query = client.run_sync_query(QUERY) - self.assertIsInstance(query, QueryResults) - self.assertIs(query._client, client) - self.assertIsNone(query.name) - self.assertEqual(query.query, QUERY) - self.assertEqual(query.udf_resources, []) - self.assertEqual(query.query_parameters, []) - - def test_run_sync_query_w_udf_resources(self): - from google.cloud.bigquery._helpers import UDFResource - from google.cloud.bigquery.query import QueryResults + conn = client._connection = _Connection( + RESOURCE, RESULTS_RESOURCE, FIRST_PAGE, LAST_PAGE) - RESOURCE_URI = 'gs://some-bucket/js/lib.js' + rows_iter = client.query_rows(QUERY) + rows = list(rows_iter) + + self.assertEqual(rows, [(1,), (2,), (3,)]) + self.assertIs(rows_iter.client, client) + self.assertIsInstance(rows_iter, HTTPIterator) + self.assertEqual(len(conn._requested), 4) + req = conn._requested[0] + self.assertEqual(req['method'], 'POST') + self.assertEqual(req['path'], '/projects/PROJECT/jobs') + self.assertIsInstance( + req['data']['jobReference']['jobId'], six.string_types) + + def test_query_rows_w_job_id(self): + from google.api.core.page_iterator import HTTPIterator + + JOB = 'job-id' PROJECT = 'PROJECT' - QUERY = 'select count(*) from persons' + QUERY = 'SELECT COUNT(*) FROM persons' + RESOURCE = { + 'jobReference': { + 'projectId': PROJECT, + 'jobId': JOB, + }, + 'configuration': { + 'query': { + 'query': QUERY, + }, + }, + 'status': { + 'state': 'DONE', + }, + } + RESULTS_RESOURCE = { + 'jobReference': RESOURCE['jobReference'], + 'jobComplete': True, + 'schema': { + 'fields': [ + {'name': 'field0', 'type': 'INTEGER', 'mode': 'NULLABLE'}, + ] + }, + 'totalRows': '0', + } creds = _make_credentials() http = object() client = self._make_one(project=PROJECT, credentials=creds, _http=http) - udf_resources = [UDFResource("resourceUri", RESOURCE_URI)] - query = client.run_sync_query(QUERY, udf_resources=udf_resources) - self.assertIsInstance(query, QueryResults) - self.assertIs(query._client, client) - self.assertIsNone(query.name) - self.assertEqual(query.query, QUERY) - self.assertEqual(query.udf_resources, udf_resources) - self.assertEqual(query.query_parameters, []) - - def test_run_sync_query_w_query_parameters(self): - from google.cloud.bigquery._helpers import ScalarQueryParameter - from google.cloud.bigquery.query import QueryResults + conn = client._connection = _Connection( + RESOURCE, RESULTS_RESOURCE, RESULTS_RESOURCE) + + rows_iter = client.query_rows(QUERY, job_id=JOB) + rows = [row for row in rows_iter] + self.assertEqual(rows, []) + self.assertIs(rows_iter.client, client) + self.assertIsInstance(rows_iter, HTTPIterator) + self.assertEqual(len(conn._requested), 3) + req = conn._requested[0] + self.assertEqual(req['method'], 'POST') + self.assertEqual(req['path'], '/projects/PROJECT/jobs') + self.assertEqual(req['data']['jobReference']['jobId'], JOB) + + def test_query_rows_w_job_config(self): + from google.cloud.bigquery.job import QueryJobConfig + from google.api.core.page_iterator import HTTPIterator + + JOB = 'job-id' PROJECT = 'PROJECT' - QUERY = 'select count(*) from persons' + QUERY = 'SELECT COUNT(*) FROM persons' + RESOURCE = { + 'jobReference': { + 'projectId': PROJECT, + 'jobId': JOB, + }, + 'configuration': { + 'query': { + 'query': QUERY, + 'useLegacySql': True, + }, + 'dryRun': True, + }, + 'status': { + 'state': 'DONE', + }, + } + RESULTS_RESOURCE = { + 'jobReference': RESOURCE['jobReference'], + 'jobComplete': True, + 'schema': { + 'fields': [ + {'name': 'field0', 'type': 'INTEGER', 'mode': 'NULLABLE'}, + ] + }, + 'totalRows': '0', + } creds = _make_credentials() http = object() client = self._make_one(project=PROJECT, credentials=creds, _http=http) - query_parameters = [ScalarQueryParameter('foo', 'INT64', 123)] - query = client.run_sync_query(QUERY, query_parameters=query_parameters) - self.assertIsInstance(query, QueryResults) - self.assertIs(query._client, client) - self.assertIsNone(query.name) - self.assertEqual(query.query, QUERY) - self.assertEqual(query.udf_resources, []) - self.assertEqual(query.query_parameters, query_parameters) + conn = client._connection = _Connection( + RESOURCE, RESULTS_RESOURCE, RESULTS_RESOURCE) + + job_config = QueryJobConfig() + job_config.use_legacy_sql = True + job_config.dry_run = True + rows_iter = client.query_rows(QUERY, job_id=JOB, job_config=job_config) + + self.assertIsInstance(rows_iter, HTTPIterator) + self.assertEqual(len(conn._requested), 2) + req = conn._requested[0] + configuration = req['data']['configuration'] + self.assertEqual(req['method'], 'POST') + self.assertEqual(req['path'], '/projects/PROJECT/jobs') + self.assertEqual(req['data']['jobReference']['jobId'], JOB) + self.assertEqual(configuration['query']['useLegacySql'], True) + self.assertEqual(configuration['dryRun'], True) class _Connection(object): From f7bb04d8e994bd5b34ebcfa672ab2388e5c7f8fd Mon Sep 17 00:00:00 2001 From: Jonathan Amsterdam Date: Wed, 4 Oct 2017 14:32:42 -0400 Subject: [PATCH 0218/2016] bigquery: modify LoadJob (#4103) This PR handles loading from GCS. Loading from a local file will be done separately. --- .../google/cloud/bigquery/__init__.py | 2 + .../google/cloud/bigquery/client.py | 31 +- .../google/cloud/bigquery/job.py | 407 ++++++++---------- .../google-cloud-bigquery/tests/system.py | 34 +- .../tests/unit/test_client.py | 41 +- .../tests/unit/test_job.py | 231 +++++----- 6 files changed, 376 insertions(+), 370 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/__init__.py b/packages/google-cloud-bigquery/google/cloud/bigquery/__init__.py index 7bbcc7782ee2..3a7cc2be7a69 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/__init__.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/__init__.py @@ -35,6 +35,7 @@ from google.cloud.bigquery.job import CopyJobConfig from google.cloud.bigquery.job import ExtractJobConfig from google.cloud.bigquery.job import QueryJobConfig +from google.cloud.bigquery.job import LoadJobConfig from google.cloud.bigquery.schema import SchemaField from google.cloud.bigquery.table import Table @@ -47,6 +48,7 @@ 'CopyJobConfig', 'ExtractJobConfig', 'QueryJobConfig', + 'LoadJobConfig', 'ScalarQueryParameter', 'SchemaField', 'StructQueryParameter', diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py index 7ceed4fc1e41..da69decd03c8 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py @@ -19,6 +19,8 @@ import collections import uuid +import six + from google.api.core import page_iterator from google.cloud.client import ClientWithProject from google.cloud.bigquery._http import Connection @@ -490,26 +492,37 @@ def list_jobs(self, max_results=None, page_token=None, all_users=None, max_results=max_results, extra_params=extra_params) - def load_table_from_storage(self, job_id, destination, *source_uris): - """Construct a job for loading data into a table from CloudStorage. + def load_table_from_storage(self, source_uris, destination, + job_id=None, job_config=None): + """Starts a job for loading data into a table from CloudStorage. See https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.load - :type job_id: str - :param job_id: Name of the job. + :type source_uris: One of: + str + sequence of string + :param source_uris: URIs of data files to be loaded; in format + ``gs:///``. - :type destination: :class:`google.cloud.bigquery.table.Table` + :type destination: :class:`google.cloud.bigquery.table.TableReference` :param destination: Table into which data is to be loaded. - :type source_uris: sequence of string - :param source_uris: URIs of data files to be loaded; in format - ``gs:///``. + :type job_id: str + :param job_id: Name of the job. + + :type job_config: :class:`google.cloud.bigquery.job.LoadJobConfig` + :param job_config: (Optional) Extra configuration options for the job. :rtype: :class:`google.cloud.bigquery.job.LoadJob` :returns: a new ``LoadJob`` instance """ - return LoadJob(job_id, destination, source_uris, client=self) + job_id = _make_job_id(job_id) + if isinstance(source_uris, six.string_types): + source_uris = [source_uris] + job = LoadJob(job_id, source_uris, destination, self, job_config) + job.begin() + return job def copy_table(self, sources, destination, job_id=None, job_config=None): """Start a job for copying one or more tables into another table. diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/job.py b/packages/google-cloud-bigquery/google/cloud/bigquery/job.py index 812dde4b32a3..fd427c647a55 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/job.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/job.py @@ -24,10 +24,8 @@ from google.cloud import exceptions from google.cloud.exceptions import NotFound from google.cloud._helpers import _datetime_from_microseconds -from google.cloud.bigquery.dataset import Dataset from google.cloud.bigquery.dataset import DatasetReference from google.cloud.bigquery.schema import SchemaField -from google.cloud.bigquery.table import Table from google.cloud.bigquery.table import TableReference from google.cloud.bigquery.table import _build_schema_resource from google.cloud.bigquery.table import _parse_schema_resource @@ -106,20 +104,6 @@ def _error_result_to_exception(error_result): status_code, error_result.get('message', ''), errors=[error_result]) -class AutoDetectSchema(_TypedProperty): - """Typed Property for ``autodetect`` properties. - - :raises ValueError: on ``set`` operation if ``instance.schema`` - is already defined. - """ - def __set__(self, instance, value): - self._validate(value) - if instance.schema: - raise ValueError('A schema should not be already defined ' - 'when using schema auto-detection') - setattr(instance._configuration, self._backing_name, value) - - class Compression(_EnumApiResourceProperty): """Pseudo-enum for ``compression`` properties.""" GZIP = 'GZIP' @@ -139,7 +123,7 @@ class DestinationFormat(_EnumApiResourceProperty): AVRO = 'AVRO' -class Encoding(_EnumProperty): +class Encoding(_EnumApiResourceProperty): """Pseudo-enum for ``encoding`` properties.""" UTF_8 = 'UTF-8' ISO_8559_1 = 'ISO-8559-1' @@ -151,7 +135,7 @@ class QueryPriority(_EnumProperty): BATCH = 'BATCH' -class SourceFormat(_EnumProperty): +class SourceFormat(_EnumApiResourceProperty): """Pseudo-enum for ``source_format`` properties.""" CSV = 'CSV' DATASTORE_BACKUP = 'DATASTORE_BACKUP' @@ -166,6 +150,20 @@ class WriteDisposition(_EnumApiResourceProperty): WRITE_EMPTY = 'WRITE_EMPTY' +class AutoDetectSchema(_TypedApiResourceProperty): + """Property for ``autodetect`` properties. + + :raises ValueError: on ``set`` operation if ``instance.schema`` + is already defined. + """ + def __set__(self, instance, value): + self._validate(value) + if instance.schema: + raise ValueError('A schema should not be already defined ' + 'when using schema auto-detection') + instance._properties[self.resource_name] = value + + class _AsyncJob(google.api.core.future.polling.PollingFuture): """Base class for asynchronous jobs. @@ -542,35 +540,151 @@ def cancelled(self): and self.error_result.get('reason') == _STOPPED_REASON) -class _LoadConfiguration(object): - """User-settable configuration options for load jobs. +class LoadJobConfig(object): + """Configuration options for load jobs. - Values which are ``None`` -> server defaults. + All properties in this class are optional. Values which are ``None`` -> + server defaults. """ - _allow_jagged_rows = None - _allow_quoted_newlines = None - _autodetect = None - _create_disposition = None - _encoding = None - _field_delimiter = None - _ignore_unknown_values = None - _max_bad_records = None - _null_marker = None - _quote_character = None - _skip_leading_rows = None - _source_format = None - _write_disposition = None + + def __init__(self): + self._properties = {} + self._schema = () + + allow_jagged_rows = _TypedApiResourceProperty( + 'allow_jagged_rows', 'allowJaggedRows', bool) + """See + https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.load.allowJaggedRows + """ + + allow_quoted_newlines = _TypedApiResourceProperty( + 'allow_quoted_newlines', 'allowQuotedNewlines', bool) + """See + https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.load.allowQuotedNewlines + """ + + autodetect = AutoDetectSchema('autodetect', 'autodetect', bool) + """See + https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.load.autodetect + """ + + create_disposition = CreateDisposition('create_disposition', + 'createDisposition') + """See + https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.load.createDisposition + """ + + encoding = Encoding('encoding', 'encoding') + """See + https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.load.encoding + """ + + field_delimiter = _TypedApiResourceProperty( + 'field_delimiter', 'fieldDelimiter', six.string_types) + """See + https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.load.fieldDelimiter + """ + + ignore_unknown_values = _TypedApiResourceProperty( + 'ignore_unknown_values', 'ignoreUnknownValues', bool) + """See + https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.load.ignoreUnknownValues + """ + + max_bad_records = _TypedApiResourceProperty( + 'max_bad_records', 'maxBadRecords', six.integer_types) + """See + https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.load.maxBadRecords + """ + + null_marker = _TypedApiResourceProperty( + 'null_marker', 'nullMarker', six.string_types) + """See + https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.load.nullMarker + """ + + quote_character = _TypedApiResourceProperty( + 'quote_character', 'quote', six.string_types) + """See + https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.load.quote + """ + + skip_leading_rows = _TypedApiResourceProperty( + 'skip_leading_rows', 'skipLeadingRows', six.integer_types) + """See + https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.load.skipLeadingRows + """ + + source_format = SourceFormat('source_format', 'sourceFormat') + """See + https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.load.sourceFormat + """ + + write_disposition = WriteDisposition('write_disposition', + 'writeDisposition') + """See + https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.load.writeDisposition + """ + + @property + def schema(self): + """See + https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.load.schema + """ + return list(self._schema) + + @schema.setter + def schema(self, value): + if not all(isinstance(field, SchemaField) for field in value): + raise ValueError('Schema items must be fields') + if self.autodetect: + raise ValueError( + 'Schema can not be set if `autodetect` property is True') + self._schema = tuple(value) + + def to_api_repr(self): + """Build an API representation of the load job config. + + :rtype: dict + :returns: A dictionary in the format used by the BigQuery API. + """ + config = copy.deepcopy(self._properties) + if len(self.schema) > 0: + config['schema'] = {'fields': _build_schema_resource(self.schema)} + # skipLeadingRows is a string because it's defined as an int64, which + # can't be represented as a JSON number. + slr = config.get('skipLeadingRows') + if slr is not None: + config['skipLeadingRows'] = str(slr) + return config + + @classmethod + def from_api_repr(cls, resource): + """Factory: construct a job configuration given its API representation + + :type resource: dict + :param resource: + An extract job configuration in the same representation as is + returned from the API. + + :rtype: :class:`google.cloud.bigquery.job.ExtractJobConfig` + :returns: Configuration parsed from ``resource``. + """ + schema = resource.pop('schema', {'fields': ()}) + slr = resource.pop('skipLeadingRows', None) + config = cls() + config._properties = copy.deepcopy(resource) + config.schema = _parse_schema_resource(schema) + config.skip_leading_rows = _int_or_none(slr) class LoadJob(_AsyncJob): - """Asynchronous job for loading data into a table from remote URI. + """Asynchronous job for loading data into a table. - :type job_id: str - :param job_id: - The job's ID, belonging to the project associated with the client. + Can load from Google Cloud Storage URIs or from a file. - :type destination: :class:`google.cloud.bigquery.table.Table` - :param destination: Table into which data is to be loaded. + :type job_id: str + :param job_id: the job's ID :type source_uris: sequence of string :param source_uris: @@ -578,56 +692,34 @@ class LoadJob(_AsyncJob): https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.load.sourceUris for supported URI formats. + :type destination: :class:`google.cloud.bigquery.table.TableReference` + :param destination: reference to table into which data is to be loaded. + :type client: :class:`google.cloud.bigquery.client.Client` :param client: A client which holds credentials and project configuration for the dataset (which requires a project). - - :type schema: list of :class:`google.cloud.bigquery.table.SchemaField` - :param schema: The job's schema """ - _schema = None _JOB_TYPE = 'load' - def __init__(self, name, destination, source_uris, client, schema=()): - super(LoadJob, self).__init__(name, client) - self.destination = destination - self.source_uris = source_uris - self._configuration = _LoadConfiguration() - # Let the @property do validation. This must occur after all other - # attributes have been set. - self.schema = schema + def __init__(self, job_id, source_uris, destination, client, + job_config=None): + super(LoadJob, self).__init__(job_id, client) - @property - def schema(self): - """Table's schema. - - :rtype: list of :class:`SchemaField` - :returns: fields describing the schema - """ - return list(self._schema) + if job_config is None: + job_config = LoadJobConfig() - @schema.setter - def schema(self, value): - """Update table's schema + self.source_uris = source_uris + self.destination = destination + self._configuration = job_config - :type value: list of :class:`SchemaField` - :param value: fields describing the schema + @property + def configuration(self): + """Configuration for this job. - :raises TypeError: If ``value`is not a sequence. - :raises ValueError: If any item in the sequence is not - a ``SchemaField``. + :rtype: :class:`~google.cloud.bigquery.job.LoadJobConfig` """ - if not value: - self._schema = () - else: - if not all(isinstance(field, SchemaField) for field in value): - raise ValueError('Schema items must be fields') - if self.autodetect: - raise ValueError( - 'Schema can not be set if `autodetect` property is True') - - self._schema = tuple(value) + return self._configuration @property def input_file_bytes(self): @@ -673,155 +765,25 @@ def output_rows(self): if statistics is not None: return int(statistics['load']['outputRows']) - allow_jagged_rows = _TypedProperty('allow_jagged_rows', bool) - """See - https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.load.allowJaggedRows - """ - - allow_quoted_newlines = _TypedProperty('allow_quoted_newlines', bool) - """See - https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.load.allowQuotedNewlines - """ - - autodetect = AutoDetectSchema('autodetect', bool) - """See - https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.load.autodetect - """ - - create_disposition = CreateDisposition('create_disposition', - 'createDisposition') - """See - https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.load.createDisposition - """ - - encoding = Encoding('encoding') - """See - https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.load.encoding - """ - - field_delimiter = _TypedProperty('field_delimiter', six.string_types) - """See - https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.load.fieldDelimiter - """ - - ignore_unknown_values = _TypedProperty('ignore_unknown_values', bool) - """See - https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.load.ignoreUnknownValues - """ - - max_bad_records = _TypedProperty('max_bad_records', six.integer_types) - """See - https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.load.maxBadRecords - """ - - null_marker = _TypedProperty('null_marker', six.string_types) - """See - https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.load.nullMarker - """ - - quote_character = _TypedProperty('quote_character', six.string_types) - """See - https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.load.quote - """ - - skip_leading_rows = _TypedProperty('skip_leading_rows', six.integer_types) - """See - https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.load.skipLeadingRows - """ - - source_format = SourceFormat('source_format') - """See - https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.load.sourceFormat - """ - - write_disposition = WriteDisposition('write_disposition', - 'writeDisposition') - """See - https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.load.writeDisposition - """ - - def _populate_config_resource(self, configuration): - """Helper for _build_resource: copy config properties to resource""" - if self.allow_jagged_rows is not None: - configuration['allowJaggedRows'] = self.allow_jagged_rows - if self.allow_quoted_newlines is not None: - configuration['allowQuotedNewlines'] = self.allow_quoted_newlines - if self.autodetect is not None: - configuration['autodetect'] = self.autodetect - if self.create_disposition is not None: - configuration['createDisposition'] = self.create_disposition - if self.encoding is not None: - configuration['encoding'] = self.encoding - if self.field_delimiter is not None: - configuration['fieldDelimiter'] = self.field_delimiter - if self.ignore_unknown_values is not None: - configuration['ignoreUnknownValues'] = self.ignore_unknown_values - if self.max_bad_records is not None: - configuration['maxBadRecords'] = self.max_bad_records - if self.null_marker is not None: - configuration['nullMarker'] = self.null_marker - if self.quote_character is not None: - configuration['quote'] = self.quote_character - if self.skip_leading_rows is not None: - configuration['skipLeadingRows'] = str(self.skip_leading_rows) - if self.source_format is not None: - configuration['sourceFormat'] = self.source_format - if self.write_disposition is not None: - configuration['writeDisposition'] = self.write_disposition - def _build_resource(self): """Generate a resource for :meth:`begin`.""" - resource = { + configuration = self._configuration.to_api_repr() + configuration['sourceUris'] = self.source_uris + configuration['destinationTable'] = self.destination.to_api_repr() + + return { 'jobReference': { 'projectId': self.project, 'jobId': self.job_id, }, 'configuration': { - self._JOB_TYPE: { - 'sourceUris': self.source_uris, - 'destinationTable': { - 'projectId': self.destination.project, - 'datasetId': self.destination.dataset_id, - 'tableId': self.destination.table_id, - }, - }, + self._JOB_TYPE: configuration, }, } - configuration = resource['configuration'][self._JOB_TYPE] - self._populate_config_resource(configuration) - - if len(self.schema) > 0: - configuration['schema'] = { - 'fields': _build_schema_resource(self.schema)} - - return resource - - def _scrub_local_properties(self, cleaned): - """Helper: handle subclass properties in cleaned.""" - schema = cleaned.pop('schema', {'fields': ()}) - self.schema = _parse_schema_resource(schema) def _copy_configuration_properties(self, configuration): """Helper: assign subclass configuration properties in cleaned.""" - self.allow_jagged_rows = _bool_or_none( - configuration.get('allowJaggedRows')) - self.allow_quoted_newlines = _bool_or_none( - configuration.get('allowQuotedNewlines')) - self.autodetect = _bool_or_none( - configuration.get('autodetect')) - self.create_disposition = configuration.get('createDisposition') - self.encoding = configuration.get('encoding') - self.field_delimiter = configuration.get('fieldDelimiter') - self.ignore_unknown_values = _bool_or_none( - configuration.get('ignoreUnknownValues')) - self.max_bad_records = _int_or_none( - configuration.get('maxBadRecords')) - self.null_marker = configuration.get('nullMarker') - self.quote_character = configuration.get('quote') - self.skip_leading_rows = _int_or_none( - configuration.get('skipLeadingRows')) - self.source_format = configuration.get('sourceFormat') - self.write_disposition = configuration.get('writeDisposition') + self._configuration._properties = copy.deepcopy(configuration) @classmethod def from_api_repr(cls, resource, client): @@ -842,15 +804,16 @@ def from_api_repr(cls, resource, client): :rtype: :class:`google.cloud.bigquery.job.LoadJob` :returns: Job parsed from ``resource``. """ - job_id, config = cls._get_resource_config(resource) - dest_config = config['destinationTable'] + job_id, config_resource = cls._get_resource_config(resource) + config = LoadJobConfig.from_api_repr(config_resource) + dest_config = config_resource['destinationTable'] ds_ref = DatasetReference(dest_config['projectId'], dest_config['datasetId'],) - dataset = Dataset(ds_ref) - table_ref = TableReference(dataset, dest_config['tableId']) - destination = Table(table_ref, client=client) - source_urls = config.get('sourceUris', ()) - job = cls(job_id, destination, source_urls, client=client) + destination = TableReference(ds_ref, dest_config['tableId']) + # TODO(jba): sourceUris should not be absent if there are no LoadJobs + # for file uploads. + source_uris = config_resource.get('sourceUris') + job = cls(job_id, source_uris, destination, client, config) job._set_properties(resource) return job diff --git a/packages/google-cloud-bigquery/tests/system.py b/packages/google-cloud-bigquery/tests/system.py index 2fd43f7951c4..ada6d92b5050 100644 --- a/packages/google-cloud-bigquery/tests/system.py +++ b/packages/google-cloud-bigquery/tests/system.py @@ -463,14 +463,13 @@ def test_load_table_from_storage_then_dump_table(self): table = retry_403(Config.CLIENT.create_table)(table_arg) self.to_delete.insert(0, table) + config = bigquery.LoadJobConfig() + config.create_disposition = 'CREATE_NEVER' + config.skip_leading_rows = 1 + config.source_format = 'CSV' + config.write_disposition = 'WRITE_EMPTY' job = Config.CLIENT.load_table_from_storage( - 'bq_load_storage_test_' + local_id, table, GS_URL) - job.create_disposition = 'CREATE_NEVER' - job.skip_leading_rows = 1 - job.source_format = 'CSV' - job.write_disposition = 'WRITE_EMPTY' - - job.begin() + GS_URL, dataset.table(TABLE_NAME), job_config=config) # Allow for 90 seconds of "warm up" before rows visible. See # https://cloud.google.com/bigquery/streaming-data-into-bigquery#dataavailability @@ -523,11 +522,10 @@ def test_load_table_from_storage_w_autodetect_schema(self): dataset = self.temp_dataset(_make_dataset_id('load_gcs_then_dump')) table_ref = dataset.table(table_name) - job = Config.CLIENT.load_table_from_storage( - 'bq_load_storage_test_' + local_id, table_ref, gs_url) - job.autodetect = True - - job.begin() + config = bigquery.LoadJobConfig() + config.autodetect = True + job = Config.CLIENT.load_table_from_storage(gs_url, table_ref, + job_config=config) # Allow for 90 seconds of "warm up" before rows visible. See # https://cloud.google.com/bigquery/streaming-data-into-bigquery#dataavailability @@ -551,7 +549,6 @@ def _load_table_for_extract_table( self, storage_client, rows, bucket_name, blob_name, table): from google.cloud._testing import _NamedTemporaryFile - local_id = unique_resource_id() gs_url = 'gs://{}/{}'.format(bucket_name, blob_name) # In the **very** rare case the bucket name is reserved, this @@ -572,10 +569,11 @@ def _load_table_for_extract_table( dataset = self.temp_dataset(table.dataset_id) table_ref = dataset.table(table.table_id) - job = Config.CLIENT.load_table_from_storage( - 'bq_extract_storage_test_' + local_id, table_ref, gs_url) - job.autodetect = True - job.begin() + config = bigquery.LoadJobConfig() + config.autodetect = True + job = Config.CLIENT.load_table_from_storage(gs_url, table_ref, + job_config=config) + # TODO(jba): do we need this retry now that we have job.result()? # Allow for 90 seconds of "warm up" before rows visible. See # https://cloud.google.com/bigquery/streaming-data-into-bigquery#dataavailability # 8 tries -> 1 + 2 + 4 + 8 + 16 + 32 + 64 = 127 seconds @@ -608,7 +606,7 @@ def test_extract_table(self): destination_uri = 'gs://{}/person_ages_out.csv'.format(bucket_name) job = Config.CLIENT.extract_table(table_ref, destination_uri) - job.result() + job.result(timeout=100) self.to_delete.insert(0, destination) got = destination.download_as_string().decode('utf-8') diff --git a/packages/google-cloud-bigquery/tests/unit/test_client.py b/packages/google-cloud-bigquery/tests/unit/test_client.py index d1a6d1218ae8..3f667039f497 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_client.py +++ b/packages/google-cloud-bigquery/tests/unit/test_client.py @@ -1122,12 +1122,47 @@ def test_load_table_from_storage(self): DATASET = 'dataset_name' DESTINATION = 'destination_table' SOURCE_URI = 'http://example.com/source.csv' + RESOURCE = { + 'jobReference': { + 'projectId': PROJECT, + 'jobId': JOB, + }, + 'configuration': { + 'load': { + 'sourceUris': [SOURCE_URI], + 'destinationTable': { + 'projectId': PROJECT, + 'datasetId': DATASET, + 'tableId': DESTINATION, + }, + }, + }, + } creds = _make_credentials() http = object() client = self._make_one(project=PROJECT, credentials=creds, _http=http) - dataset = client.dataset(DATASET) - destination = dataset.table(DESTINATION) - job = client.load_table_from_storage(JOB, destination, SOURCE_URI) + conn = client._connection = _Connection(RESOURCE) + destination = client.dataset(DATASET).table(DESTINATION) + + job = client.load_table_from_storage(SOURCE_URI, destination, + job_id=JOB) + + # Check that load_table_from_storage actually starts the job. + self.assertEqual(len(conn._requested), 1) + req = conn._requested[0] + self.assertEqual(req['method'], 'POST') + self.assertEqual(req['path'], '/projects/%s/jobs' % PROJECT) + + self.assertIsInstance(job, LoadJob) + self.assertIs(job._client, client) + self.assertEqual(job.job_id, JOB) + self.assertEqual(list(job.source_uris), [SOURCE_URI]) + self.assertIs(job.destination, destination) + + conn = client._connection = _Connection(RESOURCE) + + job = client.load_table_from_storage([SOURCE_URI], destination, + job_id=JOB) self.assertIsInstance(job, LoadJob) self.assertIs(job._client, client) self.assertEqual(job.job_id, JOB) diff --git a/packages/google-cloud-bigquery/tests/unit/test_job.py b/packages/google-cloud-bigquery/tests/unit/test_job.py index d0a654c0c15d..e6b903bfebaf 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_job.py +++ b/packages/google-cloud-bigquery/tests/unit/test_job.py @@ -18,6 +18,7 @@ import unittest from google.cloud.bigquery.job import ExtractJobConfig, CopyJobConfig +from google.cloud.bigquery.job import LoadJobConfig from google.cloud.bigquery.dataset import DatasetReference @@ -84,12 +85,14 @@ def test_missing_reason(self): class _Base(object): from google.cloud.bigquery.dataset import DatasetReference + from google.cloud.bigquery.table import TableReference PROJECT = 'project' SOURCE1 = 'http://example.com/source1.csv' DS_ID = 'datset_id' DS_REF = DatasetReference(PROJECT, DS_ID) TABLE_ID = 'table_id' + TABLE_REF = TableReference(DS_REF, TABLE_ID) JOB_NAME = 'job_name' def _make_one(self, *args, **kw): @@ -231,50 +234,53 @@ def _makeResource(self, started=False, ended=False): return resource def _verifyBooleanConfigProperties(self, job, config): + jconfig = job.configuration if 'allowJaggedRows' in config: - self.assertEqual(job.allow_jagged_rows, + self.assertEqual(jconfig.allow_jagged_rows, config['allowJaggedRows']) else: - self.assertIsNone(job.allow_jagged_rows) + self.assertIsNone(jconfig.allow_jagged_rows) if 'allowQuotedNewlines' in config: - self.assertEqual(job.allow_quoted_newlines, + self.assertEqual(jconfig.allow_quoted_newlines, config['allowQuotedNewlines']) else: - self.assertIsNone(job.allow_quoted_newlines) + self.assertIsNone(jconfig.allow_quoted_newlines) if 'autodetect' in config: self.assertEqual( - job.autodetect, config['autodetect']) + jconfig.autodetect, config['autodetect']) else: - self.assertIsNone(job.autodetect) + self.assertIsNone(jconfig.autodetect) if 'ignoreUnknownValues' in config: - self.assertEqual(job.ignore_unknown_values, + self.assertEqual(jconfig.ignore_unknown_values, config['ignoreUnknownValues']) else: - self.assertIsNone(job.ignore_unknown_values) + self.assertIsNone(jconfig.ignore_unknown_values) def _verifyEnumConfigProperties(self, job, config): + jconfig = job.configuration if 'createDisposition' in config: - self.assertEqual(job.create_disposition, + self.assertEqual(jconfig.create_disposition, config['createDisposition']) else: - self.assertIsNone(job.create_disposition) + self.assertIsNone(jconfig.create_disposition) if 'encoding' in config: - self.assertEqual(job.encoding, + self.assertEqual(jconfig.encoding, config['encoding']) else: - self.assertIsNone(job.encoding) + self.assertIsNone(jconfig.encoding) if 'sourceFormat' in config: - self.assertEqual(job.source_format, + self.assertEqual(jconfig.source_format, config['sourceFormat']) else: - self.assertIsNone(job.source_format) + self.assertIsNone(jconfig.source_format) if 'writeDisposition' in config: - self.assertEqual(job.write_disposition, + self.assertEqual(jconfig.write_disposition, config['writeDisposition']) else: - self.assertIsNone(job.write_disposition) + self.assertIsNone(jconfig.write_disposition) def _verifyResourceProperties(self, job, resource): + jconfig = job.configuration self._verifyReadonlyResourceProperties(job, resource) config = resource.get('configuration', {}).get('load') @@ -290,43 +296,43 @@ def _verifyResourceProperties(self, job, resource): self.assertEqual(job.destination.table_id, table_ref['tableId']) if 'fieldDelimiter' in config: - self.assertEqual(job.field_delimiter, + self.assertEqual(jconfig.field_delimiter, config['fieldDelimiter']) else: - self.assertIsNone(job.field_delimiter) + self.assertIsNone(jconfig.field_delimiter) if 'maxBadRecords' in config: - self.assertEqual(job.max_bad_records, + self.assertEqual(jconfig.max_bad_records, config['maxBadRecords']) else: - self.assertIsNone(job.max_bad_records) + self.assertIsNone(jconfig.max_bad_records) if 'nullMarker' in config: - self.assertEqual(job.null_marker, + self.assertEqual(jconfig.null_marker, config['nullMarker']) else: - self.assertIsNone(job.null_marker) + self.assertIsNone(jconfig.null_marker) if 'quote' in config: - self.assertEqual(job.quote_character, + self.assertEqual(jconfig.quote_character, config['quote']) else: - self.assertIsNone(job.quote_character) + self.assertIsNone(jconfig.quote_character) if 'skipLeadingRows' in config: - self.assertEqual(str(job.skip_leading_rows), + self.assertEqual(str(jconfig.skip_leading_rows), config['skipLeadingRows']) else: - self.assertIsNone(job.skip_leading_rows) + self.assertIsNone(jconfig.skip_leading_rows) def test_ctor(self): client = _Client(self.PROJECT) - table = _Table() - job = self._make_one(self.JOB_NAME, table, [self.SOURCE1], client) - self.assertIs(job.destination, table) + job = self._make_one(self.JOB_NAME, [self.SOURCE1], self.TABLE_REF, + client) + self.assertIs(job.destination, self.TABLE_REF) self.assertEqual(list(job.source_uris), [self.SOURCE1]) self.assertIs(job._client, client) self.assertEqual(job.job_type, self.JOB_TYPE) self.assertEqual( job.path, '/projects/%s/jobs/%s' % (self.PROJECT, self.JOB_NAME)) - self.assertEqual(job.schema, []) + self.assertEqual(job.configuration.schema, []) self._verifyInitialReadonlyProperties(job) @@ -337,30 +343,32 @@ def test_ctor(self): self.assertIsNone(job.output_rows) # set/read from resource['configuration']['load'] - self.assertIsNone(job.allow_jagged_rows) - self.assertIsNone(job.allow_quoted_newlines) - self.assertIsNone(job.autodetect) - self.assertIsNone(job.create_disposition) - self.assertIsNone(job.encoding) - self.assertIsNone(job.field_delimiter) - self.assertIsNone(job.ignore_unknown_values) - self.assertIsNone(job.max_bad_records) - self.assertIsNone(job.null_marker) - self.assertIsNone(job.quote_character) - self.assertIsNone(job.skip_leading_rows) - self.assertIsNone(job.source_format) - self.assertIsNone(job.write_disposition) - - def test_ctor_w_schema(self): + jconfig = job.configuration + self.assertIsNone(jconfig.allow_jagged_rows) + self.assertIsNone(jconfig.allow_quoted_newlines) + self.assertIsNone(jconfig.autodetect) + self.assertIsNone(jconfig.create_disposition) + self.assertIsNone(jconfig.encoding) + self.assertIsNone(jconfig.field_delimiter) + self.assertIsNone(jconfig.ignore_unknown_values) + self.assertIsNone(jconfig.max_bad_records) + self.assertIsNone(jconfig.null_marker) + self.assertIsNone(jconfig.quote_character) + self.assertIsNone(jconfig.skip_leading_rows) + self.assertIsNone(jconfig.source_format) + self.assertIsNone(jconfig.write_disposition) + + def test_ctor_w_config(self): from google.cloud.bigquery.schema import SchemaField client = _Client(self.PROJECT) - table = _Table() full_name = SchemaField('full_name', 'STRING', mode='REQUIRED') age = SchemaField('age', 'INTEGER', mode='REQUIRED') - job = self._make_one(self.JOB_NAME, table, [self.SOURCE1], client, - schema=[full_name, age]) - self.assertEqual(job.schema, [full_name, age]) + config = LoadJobConfig() + config.schema = [full_name, age] + job = self._make_one(self.JOB_NAME, [self.SOURCE1], self.TABLE_REF, + client, config) + self.assertEqual(job.configuration.schema, [full_name, age]) def test_done(self): client = _Client(self.PROJECT) @@ -377,15 +385,15 @@ def test_result(self): self.assertIs(result, job) - def test_result_invokes_begins(self): + def test_result_invokes_begin(self): begun_resource = self._makeResource() done_resource = copy.deepcopy(begun_resource) done_resource['status'] = {'state': 'DONE'} connection = _Connection(begun_resource, done_resource) client = _Client(self.PROJECT, connection=connection) - table = _Table() - job = self._make_one(self.JOB_NAME, table, [self.SOURCE1], client) + job = self._make_one(self.JOB_NAME, [self.SOURCE1], self.TABLE_REF, + client) job.result() self.assertEqual(len(connection._requested), 2) @@ -394,67 +402,52 @@ def test_result_invokes_begins(self): self.assertEqual(reload_request['method'], 'GET') def test_schema_setter_non_list(self): - client = _Client(self.PROJECT) - table = _Table() - job = self._make_one(self.JOB_NAME, table, [self.SOURCE1], client) + config = LoadJobConfig() with self.assertRaises(TypeError): - job.schema = object() + config.schema = object() def test_schema_setter_invalid_field(self): from google.cloud.bigquery.schema import SchemaField - client = _Client(self.PROJECT) - table = _Table() - job = self._make_one(self.JOB_NAME, table, [self.SOURCE1], client) + config = LoadJobConfig() full_name = SchemaField('full_name', 'STRING', mode='REQUIRED') with self.assertRaises(ValueError): - job.schema = [full_name, object()] + config.schema = [full_name, object()] def test_schema_setter(self): from google.cloud.bigquery.schema import SchemaField - client = _Client(self.PROJECT) - table = _Table() - job = self._make_one(self.JOB_NAME, table, [self.SOURCE1], client) + config = LoadJobConfig() full_name = SchemaField('full_name', 'STRING', mode='REQUIRED') age = SchemaField('age', 'INTEGER', mode='REQUIRED') - job.schema = [full_name, age] - self.assertEqual(job.schema, [full_name, age]) + config.schema = [full_name, age] + self.assertEqual(config.schema, [full_name, age]) def test_schema_setter_w_autodetect(self): from google.cloud.bigquery.schema import SchemaField - client = _Client(self.PROJECT) - table = _Table() - full_name = SchemaField('full_name', 'STRING') - job = self._make_one(self.JOB_NAME, table, [self.SOURCE1], client) - job.autodetect = False - job.schema = [full_name] - self.assertEqual(job.schema, [full_name]) - - job = self._make_one(self.JOB_NAME, table, [self.SOURCE1], client) - job.autodetect = True + config = LoadJobConfig() + schema = [SchemaField('full_name', 'STRING')] + config.autodetect = False + config.schema = schema + self.assertEqual(config.schema, schema) + + config.schema = [] + config.autodetect = True with self.assertRaises(ValueError): - job.schema = [full_name] + config.schema = schema def test_autodetect_setter_w_schema(self): from google.cloud.bigquery.schema import SchemaField - client = _Client(self.PROJECT) - table = _Table() - full_name = SchemaField('full_name', 'STRING') - job = self._make_one(self.JOB_NAME, table, [self.SOURCE1], client) - - job.autodetect = True - job.schema = [] - self.assertEqual(job.schema, []) + config = LoadJobConfig() - job.autodetect = False - job.schema = [full_name] - self.assertEqual(job.autodetect, False) + config.autodetect = False + config.schema = [SchemaField('full_name', 'STRING')] + self.assertEqual(config.autodetect, False) with self.assertRaises(ValueError): - job.autodetect = True + config.autodetect = True def test_props_set_by_server(self): import datetime @@ -475,7 +468,7 @@ def test_props_set_by_server(self): client = _Client(self.PROJECT) table = _Table() - job = self._make_one(self.JOB_NAME, table, [self.SOURCE1], client) + job = self._make_one(self.JOB_NAME, [self.SOURCE1], table, client) job._properties['etag'] = 'ETAG' job._properties['id'] = JOB_ID job._properties['selfLink'] = URL @@ -578,8 +571,8 @@ def test_from_api_repr_w_properties(self): def test_begin_w_already_running(self): conn = _Connection() client = _Client(project=self.PROJECT, connection=conn) - table = _Table() - job = self._make_one(self.JOB_NAME, table, [self.SOURCE1], client) + job = self._make_one(self.JOB_NAME, [self.SOURCE1], self.TABLE_REF, + client) job._properties['status'] = {'state': 'RUNNING'} with self.assertRaises(ValueError): @@ -595,8 +588,8 @@ def test_begin_w_bound_client(self): del RESOURCE['user_email'] conn = _Connection(RESOURCE) client = _Client(project=self.PROJECT, connection=conn) - table = _Table() - job = self._make_one(self.JOB_NAME, table, [self.SOURCE1], client) + job = self._make_one(self.JOB_NAME, [self.SOURCE1], self.TABLE_REF, + client) job.begin() @@ -634,9 +627,10 @@ def test_begin_w_autodetect(self): del resource['user_email'] conn = _Connection(resource) client = _Client(project=self.PROJECT, connection=conn) - table = _Table() - job = self._make_one(self.JOB_NAME, table, [self.SOURCE1], client) - job.autodetect = True + config = LoadJobConfig() + config.autodetect = True + job = self._make_one(self.JOB_NAME, [self.SOURCE1], self.TABLE_REF, + client, config) job.begin() sent = { @@ -698,24 +692,24 @@ def test_begin_w_alternate_client(self): client1 = _Client(project=self.PROJECT, connection=conn1) conn2 = _Connection(RESOURCE) client2 = _Client(project=self.PROJECT, connection=conn2) - table = _Table() full_name = SchemaField('full_name', 'STRING', mode='REQUIRED') age = SchemaField('age', 'INTEGER', mode='REQUIRED') - job = self._make_one(self.JOB_NAME, table, [self.SOURCE1], client1, - schema=[full_name, age]) - - job.allow_jagged_rows = True - job.allow_quoted_newlines = True - job.create_disposition = 'CREATE_NEVER' - job.encoding = 'ISO-8559-1' - job.field_delimiter = '|' - job.ignore_unknown_values = True - job.max_bad_records = 100 - job.null_marker = r'\N' - job.quote_character = "'" - job.skip_leading_rows = 1 - job.source_format = 'CSV' - job.write_disposition = 'WRITE_TRUNCATE' + config = LoadJobConfig() + config.schema = [full_name, age] + job = self._make_one(self.JOB_NAME, [self.SOURCE1], self.TABLE_REF, + client1, config) + config.allow_jagged_rows = True + config.allow_quoted_newlines = True + config.create_disposition = 'CREATE_NEVER' + config.encoding = 'ISO-8559-1' + config.field_delimiter = '|' + config.ignore_unknown_values = True + config.max_bad_records = 100 + config.null_marker = r'\N' + config.quote_character = "'" + config.skip_leading_rows = 1 + config.source_format = 'CSV' + config.write_disposition = 'WRITE_TRUNCATE' job.begin(client=client2) @@ -733,6 +727,7 @@ def test_begin_w_alternate_client(self): 'load': LOAD_CONFIGURATION, }, } + self.maxDiff = None self.assertEqual(req['data'], SENT) self._verifyResourceProperties(job, RESOURCE) @@ -741,7 +736,7 @@ def test_exists_miss_w_bound_client(self): conn = _Connection() client = _Client(project=self.PROJECT, connection=conn) table = _Table() - job = self._make_one(self.JOB_NAME, table, [self.SOURCE1], client) + job = self._make_one(self.JOB_NAME, [self.SOURCE1], table, client) self.assertFalse(job.exists()) @@ -758,7 +753,7 @@ def test_exists_hit_w_alternate_client(self): conn2 = _Connection({}) client2 = _Client(project=self.PROJECT, connection=conn2) table = _Table() - job = self._make_one(self.JOB_NAME, table, [self.SOURCE1], client1) + job = self._make_one(self.JOB_NAME, [self.SOURCE1], table, client1) self.assertTrue(job.exists(client=client2)) @@ -775,7 +770,7 @@ def test_reload_w_bound_client(self): conn = _Connection(RESOURCE) client = _Client(project=self.PROJECT, connection=conn) table = _Table() - job = self._make_one(self.JOB_NAME, table, [self.SOURCE1], client) + job = self._make_one(self.JOB_NAME, [self.SOURCE1], table, client) job.reload() @@ -793,7 +788,7 @@ def test_reload_w_alternate_client(self): conn2 = _Connection(RESOURCE) client2 = _Client(project=self.PROJECT, connection=conn2) table = _Table() - job = self._make_one(self.JOB_NAME, table, [self.SOURCE1], client1) + job = self._make_one(self.JOB_NAME, [self.SOURCE1], table, client1) job.reload(client=client2) @@ -811,7 +806,7 @@ def test_cancel_w_bound_client(self): conn = _Connection(RESPONSE) client = _Client(project=self.PROJECT, connection=conn) table = _Table() - job = self._make_one(self.JOB_NAME, table, [self.SOURCE1], client) + job = self._make_one(self.JOB_NAME, [self.SOURCE1], table, client) job.cancel() @@ -830,7 +825,7 @@ def test_cancel_w_alternate_client(self): conn2 = _Connection(RESPONSE) client2 = _Client(project=self.PROJECT, connection=conn2) table = _Table() - job = self._make_one(self.JOB_NAME, table, [self.SOURCE1], client1) + job = self._make_one(self.JOB_NAME, [self.SOURCE1], table, client1) job.cancel(client=client2) From ceea3497dd847d56a2da6b8ae7f151e562cab53d Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Fri, 6 Oct 2017 12:13:22 -0700 Subject: [PATCH 0219/2016] Make QueryResults read-only. (#4094) Removes any QueryJob-related properties from QueryResults. Now the QueryResults class more closely reflects the backend resource. --- .../google/cloud/bigquery/query.py | 230 +----- .../tests/unit/test_client.py | 2 +- .../tests/unit/test_job.py | 15 +- .../tests/unit/test_query.py | 718 +++++------------- 4 files changed, 232 insertions(+), 733 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/query.py b/packages/google-cloud-bigquery/google/cloud/bigquery/query.py index 57199556ed84..888ce5853050 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/query.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/query.py @@ -12,97 +12,37 @@ # See the License for the specific language governing permissions and # limitations under the License. -"""Define API Queries.""" +"""BigQuery query processing.""" -import six +import copy from google.api.core import page_iterator -from google.cloud.bigquery._helpers import _TypedProperty from google.cloud.bigquery._helpers import _rows_from_json -from google.cloud.bigquery.dataset import DatasetReference -from google.cloud.bigquery.job import QueryJob from google.cloud.bigquery.table import _parse_schema_resource -from google.cloud.bigquery._helpers import QueryParametersProperty -from google.cloud.bigquery._helpers import UDFResourcesProperty from google.cloud.bigquery._helpers import _item_to_row from google.cloud.bigquery._helpers import _rows_page_start -class _SyncQueryConfiguration(object): - """User-settable configuration options for synchronous query jobs. - - Values which are ``None`` -> server defaults. - """ - _default_dataset = None - _dry_run = None - _max_results = None - _timeout_ms = None - _preserve_nulls = None - _use_query_cache = None - _use_legacy_sql = None - - class QueryResults(object): - """Synchronous job: query tables. + """Results of a query. - :type query: str - :param query: SQL query string + See: + https://g.co/cloud/bigquery/docs/reference/rest/v2/jobs/getQueryResults :type client: :class:`google.cloud.bigquery.client.Client` :param client: A client which holds credentials and project configuration for the dataset (which requires a project). - - :type udf_resources: tuple - :param udf_resources: An iterable of - :class:`google.cloud.bigquery.job.UDFResource` - (empty by default) - - :type query_parameters: tuple - :param query_parameters: - An iterable of - :class:`google.cloud.bigquery._helpers.AbstractQueryParameter` - (empty by default) """ - _UDF_KEY = 'userDefinedFunctionResources' - _QUERY_PARAMETERS_KEY = 'queryParameters' - - def __init__(self, query, client, udf_resources=(), query_parameters=()): + def __init__(self, client, properties): self._client = client self._properties = {} - self.query = query - self._configuration = _SyncQueryConfiguration() - self.udf_resources = udf_resources - self.query_parameters = query_parameters self._job = None + self._set_properties(properties) @classmethod def from_api_repr(cls, api_response, client): - instance = cls(None, client) - instance._set_properties(api_response) - return instance - - @classmethod - def from_query_job(cls, job): - """Factory: construct from an existing job. - - :type job: :class:`~google.cloud.bigquery.job.QueryJob` - :param job: existing job - - :rtype: :class:`QueryResults` - :returns: the instance, bound to the job - """ - instance = cls(job.query, job._client, job.udf_resources) - instance._job = job - job_ref = instance._properties.setdefault('jobReference', {}) - job_ref['jobId'] = job.job_id - if job.default_dataset is not None: - instance.default_dataset = job.default_dataset - if job.use_query_cache is not None: - instance.use_query_cache = job.use_query_cache - if job.use_legacy_sql is not None: - instance.use_legacy_sql = job.use_legacy_sql - return instance + return cls(client, api_response) @property def project(self): @@ -111,7 +51,7 @@ def project(self): :rtype: str :returns: the project (derived from the client). """ - return self._client.project + return self._properties.get('jobReference', {}).get('projectId') def _require_client(self, client): """Check client or verify over-ride. @@ -168,33 +108,17 @@ def errors(self): return self._properties.get('errors') @property - def name(self): - """Job name, generated by the back-end. + def job_id(self): + """Job ID of the query job these results are from. See https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs/query#jobReference - :rtype: list of mapping, or ``NoneType`` - :returns: Mappings describing errors generated on the server (None - until set by the server). + :rtype: string + :returns: Job ID of the query job. """ return self._properties.get('jobReference', {}).get('jobId') - @property - def job(self): - """Job instance used to run the query. - - :rtype: :class:`google.cloud.bigquery.job.QueryJob`, or ``NoneType`` - :returns: Job instance used to run the query (None until - ``jobReference`` property is set by the server). - """ - if self._job is None: - job_ref = self._properties.get('jobReference') - if job_ref is not None: - self._job = QueryJob(job_ref['jobId'], self.query, - self._client) - return self._job - @property def page_token(self): """Token for fetching next bach of results. @@ -273,119 +197,35 @@ def schema(self): """ return _parse_schema_resource(self._properties.get('schema', {})) - default_dataset = _TypedProperty('default_dataset', DatasetReference) - """See - https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs/query#defaultDataset - """ - - dry_run = _TypedProperty('dry_run', bool) - """See - https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs/query#dryRun - """ - - max_results = _TypedProperty('max_results', six.integer_types) - """See - https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs/query#maxResults - """ - - preserve_nulls = _TypedProperty('preserve_nulls', bool) - """See - https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs/query#preserveNulls - """ - - query_parameters = QueryParametersProperty() - - timeout_ms = _TypedProperty('timeout_ms', six.integer_types) - """See - https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs/query#timeoutMs - """ - - udf_resources = UDFResourcesProperty() - - use_query_cache = _TypedProperty('use_query_cache', bool) - """See - https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs/query#useQueryCache - """ - - use_legacy_sql = _TypedProperty('use_legacy_sql', bool) - """See - https://cloud.google.com/bigquery/docs/\ - reference/v2/jobs/query#useLegacySql - """ - def _set_properties(self, api_response): """Update properties from resource in body of ``api_response`` :type api_response: dict :param api_response: response returned from an API call """ - self._properties.clear() - self._properties.update(api_response) - - def _build_resource(self): - """Generate a resource for :meth:`begin`.""" - resource = {'query': self.query} - - if self.default_dataset is not None: - resource['defaultDataset'] = { - 'projectId': self.project, - 'datasetId': self.default_dataset.dataset_id, - } - - if self.max_results is not None: - resource['maxResults'] = self.max_results - - if self.preserve_nulls is not None: - resource['preserveNulls'] = self.preserve_nulls - - if self.timeout_ms is not None: - resource['timeoutMs'] = self.timeout_ms - - if self.use_query_cache is not None: - resource['useQueryCache'] = self.use_query_cache - - if self.use_legacy_sql is not None: - resource['useLegacySql'] = self.use_legacy_sql - - if self.dry_run is not None: - resource['dryRun'] = self.dry_run + job_id_present = ( + 'jobReference' in api_response + and 'jobId' in api_response['jobReference'] + and 'projectId' in api_response['jobReference']) + if not job_id_present: + raise ValueError('QueryResult requires a job reference') - if len(self._udf_resources) > 0: - resource[self._UDF_KEY] = [ - {udf_resource.udf_type: udf_resource.value} - for udf_resource in self._udf_resources - ] - if len(self._query_parameters) > 0: - resource[self._QUERY_PARAMETERS_KEY] = [ - query_parameter.to_api_repr() - for query_parameter in self._query_parameters - ] - if self._query_parameters[0].name is None: - resource['parameterMode'] = 'POSITIONAL' - else: - resource['parameterMode'] = 'NAMED' - - return resource - - def run(self, client=None): - """API call: run the query via a POST request + self._properties.clear() + self._properties.update(copy.deepcopy(api_response)) - See - https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs/query + def job(self): + """Job instance used to run the query. - :type client: :class:`~google.cloud.bigquery.client.Client` or - ``NoneType`` - :param client: the client to use. If not passed, falls back to the - ``client`` stored on the current dataset. + :rtype: :class:`google.cloud.bigquery.job.QueryJob`, or ``NoneType`` + :returns: Job instance used to run the query (None until + ``jobReference`` property is set by the server). """ - if self.job is not None: - raise ValueError("Query job is already running.") + if self._job is None: + job_ref = self._properties['jobReference'] + self._job = self._client.get_job( + job_ref['jobId'], project=job_ref['projectId']) - client = self._require_client(client) - path = '/projects/%s/queries' % (self.project,) - api_response = client._connection.api_request( - method='POST', path=path, data=self._build_resource()) - self._set_properties(api_response) + return self._job def fetch_data(self, max_results=None, page_token=None, start_index=None, timeout_ms=None, client=None): @@ -428,9 +268,6 @@ def fetch_data(self, max_results=None, page_token=None, start_index=None, the current page: ``iterator.page.num_items``). :raises: ValueError if the query has not yet been executed. """ - if self.name is None: - raise ValueError("Query not yet executed: call 'run()'") - client = self._require_client(client) params = {} @@ -443,7 +280,7 @@ def fetch_data(self, max_results=None, page_token=None, start_index=None, if max_results is not None: params['maxResults'] = max_results - path = '/projects/%s/queries/%s' % (self.project, self.name) + path = '/projects/%s/queries/%s' % (self.project, self.job_id) iterator = page_iterator.HTTPIterator( client=client, api_request=client._connection.api_request, @@ -455,7 +292,8 @@ def fetch_data(self, max_results=None, page_token=None, start_index=None, next_token='pageToken', extra_params=params) iterator.query_result = self - iterator.job = self.job + iterator.project = self.project + iterator.job_id = self.job_id return iterator diff --git a/packages/google-cloud-bigquery/tests/unit/test_client.py b/packages/google-cloud-bigquery/tests/unit/test_client.py index 3f667039f497..89e2ebef1b70 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_client.py +++ b/packages/google-cloud-bigquery/tests/unit/test_client.py @@ -1481,7 +1481,7 @@ def test_query_rows_w_job_id(self): RESOURCE, RESULTS_RESOURCE, RESULTS_RESOURCE) rows_iter = client.query_rows(QUERY, job_id=JOB) - rows = [row for row in rows_iter] + rows = list(rows_iter) self.assertEqual(rows, []) self.assertIs(rows_iter.client, client) diff --git a/packages/google-cloud-bigquery/tests/unit/test_job.py b/packages/google-cloud-bigquery/tests/unit/test_job.py index e6b903bfebaf..ebe9ed49ddda 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_job.py +++ b/packages/google-cloud-bigquery/tests/unit/test_job.py @@ -2054,7 +2054,13 @@ def test_query_results_w_cached_value(self): client = _Client(self.PROJECT) job = self._make_one(self.JOB_NAME, self.QUERY, client) - query_results = QueryResults(None, client) + resource = { + 'jobReference': { + 'projectId': self.PROJECT, + 'jobId': self.JOB_NAME, + }, + } + query_results = QueryResults(client, resource) job._query_results = query_results results = job.query_results() @@ -2080,14 +2086,15 @@ def test_result(self): def test_result_invokes_begins(self): begun_resource = self._makeResource() - incomplete_resource = {'jobComplete': False} - query_resource = { - 'jobComplete': True, + incomplete_resource = { + 'jobComplete': False, 'jobReference': { 'projectId': self.PROJECT, 'jobId': self.JOB_NAME, }, } + query_resource = copy.deepcopy(incomplete_resource) + query_resource['jobComplete'] = True done_resource = copy.deepcopy(begun_resource) done_resource['status'] = {'state': 'DONE'} connection = _Connection( diff --git a/packages/google-cloud-bigquery/tests/unit/test_query.py b/packages/google-cloud-bigquery/tests/unit/test_query.py index 9340689315a7..d2eae2ad77fb 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_query.py +++ b/packages/google-cloud-bigquery/tests/unit/test_query.py @@ -14,13 +14,20 @@ import unittest +import mock + +from google.cloud.bigquery import Client + + +def _make_credentials(): + import google.auth.credentials + + return mock.Mock(spec=google.auth.credentials.Credentials) + class TestQueryResults(unittest.TestCase): PROJECT = 'project' - JOB_NAME = 'job_name' - JOB_NAME = 'test-synchronous-query' - JOB_TYPE = 'query' - QUERY = 'select count(*) from persons' + JOB_ID = 'test-synchronous-query' TOKEN = 'TOKEN' @staticmethod @@ -32,49 +39,14 @@ def _get_target_class(): def _make_one(self, *args, **kw): return self._get_target_class()(*args, **kw) - def _makeResource(self, complete=False): - resource = { + def _makeResource(self): + return { 'jobReference': { 'projectId': self.PROJECT, - 'jobId': self.JOB_NAME, - }, - 'jobComplete': complete, - 'errors': [], - 'schema': { - 'fields': [ - {'name': 'full_name', 'type': 'STRING', 'mode': 'REQURED'}, - {'name': 'age', 'type': 'INTEGER', 'mode': 'REQURED'}, - ], + 'jobId': self.JOB_ID, }, } - if complete: - resource['totalRows'] = '1000' - resource['rows'] = [ - {'f': [ - {'v': 'Phred Phlyntstone'}, - {'v': 32}, - ]}, - {'f': [ - {'v': 'Bharney Rhubble'}, - {'v': 33}, - ]}, - {'f': [ - {'v': 'Wylma Phlyntstone'}, - {'v': 29}, - ]}, - {'f': [ - {'v': 'Bhettye Rhubble'}, - {'v': 27}, - ]}, - ] - resource['pageToken'] = self.TOKEN - resource['totalBytesProcessed'] = 100000 - resource['numDmlAffectedRows'] = 123 - resource['cacheHit'] = False - - return resource - def _verifySchema(self, query, resource): from google.cloud.bigquery.schema import SchemaField @@ -92,580 +64,230 @@ def _verifySchema(self, query, resource): else: self.assertEqual(query.schema, ()) - def _verifyRows(self, query, resource): - expected = resource.get('rows') - if expected is None: - self.assertEqual(query.rows, []) - else: - found = query.rows - self.assertEqual(len(found), len(expected)) - for f_row, e_row in zip(found, expected): - self.assertEqual(f_row, - tuple([cell['v'] for cell in e_row['f']])) - - def _verify_udf_resources(self, query, resource): - udf_resources = resource.get('userDefinedFunctionResources', ()) - self.assertEqual(len(query.udf_resources), len(udf_resources)) - for found, expected in zip(query.udf_resources, udf_resources): - if 'resourceUri' in expected: - self.assertEqual(found.udf_type, 'resourceUri') - self.assertEqual(found.value, expected['resourceUri']) - else: - self.assertEqual(found.udf_type, 'inlineCode') - self.assertEqual(found.value, expected['inlineCode']) - - def _verifyQueryParameters(self, query, resource): - query_parameters = resource.get('queryParameters', ()) - self.assertEqual(len(query.query_parameters), len(query_parameters)) - for found, expected in zip(query.query_parameters, query_parameters): - self.assertEqual(found.to_api_repr(), expected) - - def _verifyResourceProperties(self, query, resource): - self.assertEqual(query.cache_hit, resource.get('cacheHit')) - self.assertEqual(query.complete, resource.get('jobComplete')) - self.assertEqual(query.errors, resource.get('errors')) - self.assertEqual(query.page_token, resource.get('pageToken')) - - if 'totalRows' in resource: - self.assertEqual(query.total_rows, int(resource['totalRows'])) - else: - self.assertIsNone(query.total_rows) - - if 'totalBytesProcessed' in resource: - self.assertEqual(query.total_bytes_processed, - int(resource['totalBytesProcessed'])) - else: - self.assertIsNone(query.total_bytes_processed) - - if 'jobReference' in resource: - self.assertEqual(query.name, resource['jobReference']['jobId']) - else: - self.assertIsNone(query.name) - - if 'numDmlAffectedRows' in resource: - self.assertEqual(query.num_dml_affected_rows, - int(resource['numDmlAffectedRows'])) - else: - self.assertIsNone(query.num_dml_affected_rows) - - self._verify_udf_resources(query, resource) - self._verifyQueryParameters(query, resource) - self._verifySchema(query, resource) - self._verifyRows(query, resource) - def test_ctor_defaults(self): client = _Client(self.PROJECT) - query = self._make_one(self.QUERY, client) - self.assertEqual(query.query, self.QUERY) + query = self._make_one(client, self._makeResource()) self.assertIs(query._client, client) - self.assertIsNone(query.cache_hit) self.assertIsNone(query.complete) self.assertIsNone(query.errors) - self.assertIsNone(query.name) self.assertIsNone(query.page_token) - self.assertEqual(query.query_parameters, []) + self.assertEqual(query.project, self.PROJECT) self.assertEqual(query.rows, []) self.assertEqual(query.schema, ()) self.assertIsNone(query.total_rows) self.assertIsNone(query.total_bytes_processed) - self.assertEqual(query.udf_resources, []) - - self.assertIsNone(query.default_dataset) - self.assertIsNone(query.max_results) - self.assertIsNone(query.preserve_nulls) - self.assertIsNone(query.use_query_cache) - self.assertIsNone(query.use_legacy_sql) - - def test_ctor_w_udf_resources(self): - from google.cloud.bigquery._helpers import UDFResource - - RESOURCE_URI = 'gs://some-bucket/js/lib.js' - udf_resources = [UDFResource("resourceUri", RESOURCE_URI)] - client = _Client(self.PROJECT) - query = self._make_one(self.QUERY, client, udf_resources=udf_resources) - self.assertEqual(query.udf_resources, udf_resources) - - def test_ctor_w_query_parameters(self): - from google.cloud.bigquery._helpers import ScalarQueryParameter - - query_parameters = [ScalarQueryParameter("foo", 'INT64', 123)] - client = _Client(self.PROJECT) - query = self._make_one(self.QUERY, client, - query_parameters=query_parameters) - self.assertEqual(query.query_parameters, query_parameters) - - def test_from_query_job(self): - from google.cloud.bigquery.dataset import DatasetReference - from google.cloud.bigquery.job import QueryJob - from google.cloud.bigquery._helpers import UDFResource - - DS_ID = 'DATASET' - RESOURCE_URI = 'gs://some-bucket/js/lib.js' - client = _Client(self.PROJECT) - job = QueryJob( - self.JOB_NAME, self.QUERY, client, - udf_resources=[UDFResource("resourceUri", RESOURCE_URI)]) - ds_ref = DatasetReference(self.PROJECT, DS_ID) - job.default_dataset = ds_ref - job.use_query_cache = True - job.use_legacy_sql = True - klass = self._get_target_class() - - query = klass.from_query_job(job) - - self.assertEqual(query.name, self.JOB_NAME) - self.assertEqual(query.query, self.QUERY) - self.assertIs(query._client, client) - self.assertIs(query._job, job) - self.assertEqual(query.udf_resources, job.udf_resources) - self.assertIs(query.default_dataset, ds_ref) - self.assertTrue(query.use_query_cache) - self.assertTrue(query.use_legacy_sql) - - def test_from_query_job_wo_default_dataset(self): - from google.cloud.bigquery.job import QueryJob - from google.cloud.bigquery._helpers import UDFResource - - RESOURCE_URI = 'gs://some-bucket/js/lib.js' - client = _Client(self.PROJECT) - job = QueryJob( - self.JOB_NAME, self.QUERY, client, - udf_resources=[UDFResource("resourceUri", RESOURCE_URI)]) - klass = self._get_target_class() - - query = klass.from_query_job(job) - - self.assertEqual(query.query, self.QUERY) - self.assertIs(query._client, client) - self.assertIs(query._job, job) - self.assertEqual(query.udf_resources, job.udf_resources) - self.assertIsNone(query.default_dataset) - self.assertIsNone(query.use_query_cache) - self.assertIsNone(query.use_legacy_sql) - - def test_job_wo_jobid(self): - client = _Client(self.PROJECT) - query = self._make_one(self.QUERY, client) - self.assertIsNone(query.job) def test_job_w_jobid(self): from google.cloud.bigquery.job import QueryJob SERVER_GENERATED = 'SERVER_GENERATED' - client = _Client(self.PROJECT) - query = self._make_one(self.QUERY, client) - query._properties['jobReference'] = { - 'projectId': self.PROJECT, - 'jobId': SERVER_GENERATED, + job_resource = { + 'jobReference': { + 'projectId': self.PROJECT, + 'jobId': SERVER_GENERATED, + }, + 'configuration': {'query': {'query': 'SELECT 1'}}, + } + query_resource = { + 'jobReference': { + 'projectId': self.PROJECT, + 'jobId': SERVER_GENERATED, + }, } - job = query.job + conn = _Connection(job_resource) + client = _Client(self.PROJECT, conn) + query = self._make_one(client, query_resource) + job = query.job() self.assertIsInstance(job, QueryJob) - self.assertEqual(job.query, self.QUERY) self.assertIs(job._client, client) self.assertEqual(job.job_id, SERVER_GENERATED) - fetched_later = query.job + fetched_later = query.job() self.assertIs(fetched_later, job) + self.assertEqual(len(conn._requested), 1) + req = conn._requested[0] + self.assertEqual(req['method'], 'GET') + self.assertEqual( + req['path'], + '/projects/{}/jobs/{}'.format(self.PROJECT, SERVER_GENERATED)) def test_cache_hit_missing(self): client = _Client(self.PROJECT) - query = self._make_one(self.QUERY, client) + query = self._make_one(client, self._makeResource()) self.assertIsNone(query.cache_hit) def test_cache_hit_present(self): client = _Client(self.PROJECT) - query = self._make_one(self.QUERY, client) - resource = {'cacheHit': True} - query._set_properties(resource) + resource = self._makeResource() + resource['cacheHit'] = True + query = self._make_one(client, resource) self.assertTrue(query.cache_hit) def test_complete_missing(self): client = _Client(self.PROJECT) - query = self._make_one(self.QUERY, client) + query = self._make_one(client, self._makeResource()) self.assertIsNone(query.complete) def test_complete_present(self): client = _Client(self.PROJECT) - query = self._make_one(self.QUERY, client) - resource = {'jobComplete': True} - query._set_properties(resource) + resource = self._makeResource() + resource['jobComplete'] = True + query = self._make_one(client, resource) self.assertTrue(query.complete) def test_errors_missing(self): client = _Client(self.PROJECT) - query = self._make_one(self.QUERY, client) + query = self._make_one(client, self._makeResource()) self.assertIsNone(query.errors) def test_errors_present(self): ERRORS = [ {'reason': 'testing'}, ] + resource = self._makeResource() + resource['errors'] = ERRORS client = _Client(self.PROJECT) - query = self._make_one(self.QUERY, client) - resource = {'errors': ERRORS} - query._set_properties(resource) + query = self._make_one(client, resource) self.assertEqual(query.errors, ERRORS) - def test_name_missing(self): + def test_job_id_missing(self): client = _Client(self.PROJECT) - query = self._make_one(self.QUERY, client) - self.assertIsNone(query.name) + with self.assertRaises(ValueError): + self._make_one(client, {}) - def test_name_broken_job_reference(self): + def test_job_id_broken_job_reference(self): client = _Client(self.PROJECT) - query = self._make_one(self.QUERY, client) resource = {'jobReference': {'bogus': 'BOGUS'}} - query._set_properties(resource) - self.assertIsNone(query.name) + with self.assertRaises(ValueError): + self._make_one(client, resource) - def test_name_present(self): - JOB_ID = 'JOB_ID' + def test_job_id_present(self): client = _Client(self.PROJECT) - query = self._make_one(self.QUERY, client) - resource = {'jobReference': {'jobId': JOB_ID}} - query._set_properties(resource) - self.assertEqual(query.name, JOB_ID) + resource = self._makeResource() + resource['jobReference']['jobId'] = 'custom-job' + query = self._make_one(client, resource) + self.assertEqual(query.job_id, 'custom-job') def test_page_token_missing(self): client = _Client(self.PROJECT) - query = self._make_one(self.QUERY, client) + query = self._make_one(client, self._makeResource()) self.assertIsNone(query.page_token) def test_page_token_present(self): - TOKEN = 'TOKEN' client = _Client(self.PROJECT) - query = self._make_one(self.QUERY, client) - resource = {'pageToken': TOKEN} - query._set_properties(resource) - self.assertEqual(query.page_token, TOKEN) - - def test_total_rows_missing(self): - client = _Client(self.PROJECT) - query = self._make_one(self.QUERY, client) - self.assertIsNone(query.total_rows) + resource = self._makeResource() + resource['pageToken'] = 'TOKEN' + query = self._make_one(client, resource) + self.assertEqual(query.page_token, 'TOKEN') def test_total_rows_present_integer(self): - TOTAL_ROWS = 42 client = _Client(self.PROJECT) - query = self._make_one(self.QUERY, client) - resource = {'totalRows': TOTAL_ROWS} - query._set_properties(resource) - self.assertEqual(query.total_rows, TOTAL_ROWS) + resource = self._makeResource() + resource['totalRows'] = 42 + query = self._make_one(client, resource) + self.assertEqual(query.total_rows, 42) def test_total_rows_present_string(self): - TOTAL_ROWS = 42 client = _Client(self.PROJECT) - query = self._make_one(self.QUERY, client) - resource = {'totalRows': str(TOTAL_ROWS)} - query._set_properties(resource) - self.assertEqual(query.total_rows, TOTAL_ROWS) + resource = self._makeResource() + resource['totalRows'] = '42' + query = self._make_one(client, resource) + self.assertEqual(query.total_rows, 42) def test_total_bytes_processed_missing(self): client = _Client(self.PROJECT) - query = self._make_one(self.QUERY, client) + query = self._make_one(client, self._makeResource()) self.assertIsNone(query.total_bytes_processed) def test_total_bytes_processed_present_integer(self): - TOTAL_BYTES_PROCESSED = 123456 client = _Client(self.PROJECT) - query = self._make_one(self.QUERY, client) - resource = {'totalBytesProcessed': TOTAL_BYTES_PROCESSED} - query._set_properties(resource) - self.assertEqual(query.total_bytes_processed, TOTAL_BYTES_PROCESSED) + resource = self._makeResource() + resource['totalBytesProcessed'] = 123456 + query = self._make_one(client, resource) + self.assertEqual(query.total_bytes_processed, 123456) def test_total_bytes_processed_present_string(self): - TOTAL_BYTES_PROCESSED = 123456 client = _Client(self.PROJECT) - query = self._make_one(self.QUERY, client) - resource = {'totalBytesProcessed': str(TOTAL_BYTES_PROCESSED)} - query._set_properties(resource) - self.assertEqual(query.total_bytes_processed, TOTAL_BYTES_PROCESSED) + resource = self._makeResource() + resource['totalBytesProcessed'] = '123456' + query = self._make_one(client, resource) + self.assertEqual(query.total_bytes_processed, 123456) def test_num_dml_affected_rows_missing(self): client = _Client(self.PROJECT) - query = self._make_one(self.QUERY, client) + query = self._make_one(client, self._makeResource()) self.assertIsNone(query.num_dml_affected_rows) def test_num_dml_affected_rows_present_integer(self): - DML_AFFECTED_ROWS = 123456 client = _Client(self.PROJECT) - query = self._make_one(self.QUERY, client) - resource = {'numDmlAffectedRows': DML_AFFECTED_ROWS} - query._set_properties(resource) - self.assertEqual(query.num_dml_affected_rows, DML_AFFECTED_ROWS) + resource = self._makeResource() + resource['numDmlAffectedRows'] = 123456 + query = self._make_one(client, resource) + self.assertEqual(query.num_dml_affected_rows, 123456) def test_num_dml_affected_rows_present_string(self): - DML_AFFECTED_ROWS = 123456 client = _Client(self.PROJECT) - query = self._make_one(self.QUERY, client) - resource = {'numDmlAffectedRows': str(DML_AFFECTED_ROWS)} - query._set_properties(resource) - self.assertEqual(query.num_dml_affected_rows, DML_AFFECTED_ROWS) + resource = self._makeResource() + resource['numDmlAffectedRows'] = '123456' + query = self._make_one(client, resource) + self.assertEqual(query.num_dml_affected_rows, 123456) def test_schema(self): client = _Client(self.PROJECT) - query = self._make_one(self.QUERY, client) - self._verifyResourceProperties(query, {}) - resource = { - 'schema': { - 'fields': [ - {'name': 'full_name', 'type': 'STRING', 'mode': 'REQURED'}, - {'name': 'age', 'type': 'INTEGER', 'mode': 'REQURED'}, - ], - }, + query = self._make_one(client, self._makeResource()) + self._verifySchema(query, self._makeResource()) + resource = self._makeResource() + resource['schema'] = { + 'fields': [ + {'name': 'full_name', 'type': 'STRING', 'mode': 'REQURED'}, + {'name': 'age', 'type': 'INTEGER', 'mode': 'REQURED'}, + ], } query._set_properties(resource) - self._verifyResourceProperties(query, resource) - - def test_run_w_already_has_job(self): - conn = _Connection() - client = _Client(project=self.PROJECT, connection=conn) - query = self._make_one(self.QUERY, client) - query._job = object() # simulate already running - with self.assertRaises(ValueError): - query.run() - - def test_run_w_already_has_job_in_properties(self): - JOB_ID = 'JOB_ID' - conn = _Connection() - client = _Client(project=self.PROJECT, connection=conn) - query = self._make_one(self.QUERY, client) - query._properties['jobReference'] = {'jobId': JOB_ID} - with self.assertRaises(ValueError): - query.run() - - def test_run_w_bound_client(self): - PATH = 'projects/%s/queries' % self.PROJECT - RESOURCE = self._makeResource(complete=False) - conn = _Connection(RESOURCE) - client = _Client(project=self.PROJECT, connection=conn) - query = self._make_one(self.QUERY, client) - self.assertEqual(query.udf_resources, []) - query.run() - - self.assertEqual(len(conn._requested), 1) - req = conn._requested[0] - self.assertEqual(req['method'], 'POST') - self.assertEqual(req['path'], '/%s' % PATH) - SENT = {'query': self.QUERY} - self.assertEqual(req['data'], SENT) - self._verifyResourceProperties(query, RESOURCE) - - def test_run_w_alternate_client(self): - PATH = 'projects/%s/queries' % self.PROJECT - RESOURCE = self._makeResource(complete=True) - DATASET = 'test_dataset' - conn1 = _Connection() - client1 = _Client(project=self.PROJECT, connection=conn1) - conn2 = _Connection(RESOURCE) - client2 = _Client(project=self.PROJECT, connection=conn2) - query = self._make_one(self.QUERY, client1) - - query.default_dataset = client2.dataset(DATASET) - query.max_results = 100 - query.preserve_nulls = True - query.timeout_ms = 20000 - query.use_query_cache = False - query.use_legacy_sql = True - query.dry_run = True - - query.run(client=client2) - - self.assertEqual(len(conn1._requested), 0) - self.assertEqual(len(conn2._requested), 1) - req = conn2._requested[0] - self.assertEqual(req['method'], 'POST') - self.assertEqual(req['path'], '/%s' % PATH) - SENT = { - 'query': self.QUERY, - 'defaultDataset': { - 'projectId': self.PROJECT, - 'datasetId': DATASET, - }, - 'dryRun': True, - 'maxResults': 100, - 'preserveNulls': True, - 'timeoutMs': 20000, - 'useQueryCache': False, - 'useLegacySql': True, - } - self.assertEqual(req['data'], SENT) - self._verifyResourceProperties(query, RESOURCE) - - def test_run_w_inline_udf(self): - from google.cloud.bigquery._helpers import UDFResource - - INLINE_UDF_CODE = 'var someCode = "here";' - PATH = 'projects/%s/queries' % self.PROJECT - RESOURCE = self._makeResource(complete=False) - RESOURCE['userDefinedFunctionResources'] = [ - {'inlineCode': INLINE_UDF_CODE}, - ] - conn = _Connection(RESOURCE) - client = _Client(project=self.PROJECT, connection=conn) - query = self._make_one(self.QUERY, client) - query.udf_resources = [UDFResource("inlineCode", INLINE_UDF_CODE)] - - query.run() - - self.assertEqual(len(conn._requested), 1) - req = conn._requested[0] - self.assertEqual(req['method'], 'POST') - self.assertEqual(req['path'], '/%s' % PATH) - SENT = {'query': self.QUERY, - 'userDefinedFunctionResources': - [{'inlineCode': INLINE_UDF_CODE}]} - self.assertEqual(req['data'], SENT) - self._verifyResourceProperties(query, RESOURCE) - - def test_run_w_udf_resource_uri(self): - from google.cloud.bigquery._helpers import UDFResource - - RESOURCE_URI = 'gs://some-bucket/js/lib.js' - PATH = 'projects/%s/queries' % self.PROJECT - RESOURCE = self._makeResource(complete=False) - RESOURCE['userDefinedFunctionResources'] = [ - {'resourceUri': RESOURCE_URI}, - ] - conn = _Connection(RESOURCE) - client = _Client(project=self.PROJECT, connection=conn) - query = self._make_one(self.QUERY, client) - query.udf_resources = [UDFResource("resourceUri", RESOURCE_URI)] - - query.run() - - self.assertEqual(len(conn._requested), 1) - req = conn._requested[0] - self.assertEqual(req['method'], 'POST') - self.assertEqual(req['path'], '/%s' % PATH) - SENT = {'query': self.QUERY, - 'userDefinedFunctionResources': - [{'resourceUri': RESOURCE_URI}]} - self.assertEqual(req['data'], SENT) - self._verifyResourceProperties(query, RESOURCE) - - def test_run_w_mixed_udfs(self): - from google.cloud.bigquery._helpers import UDFResource - - RESOURCE_URI = 'gs://some-bucket/js/lib.js' - INLINE_UDF_CODE = 'var someCode = "here";' - PATH = 'projects/%s/queries' % self.PROJECT - RESOURCE = self._makeResource(complete=False) - RESOURCE['userDefinedFunctionResources'] = [ - {'resourceUri': RESOURCE_URI}, - {'inlineCode': INLINE_UDF_CODE}, - ] - conn = _Connection(RESOURCE) - client = _Client(project=self.PROJECT, connection=conn) - query = self._make_one(self.QUERY, client) - query.udf_resources = [UDFResource("resourceUri", RESOURCE_URI), - UDFResource("inlineCode", INLINE_UDF_CODE)] - - query.run() - - self.assertEqual(len(conn._requested), 1) - req = conn._requested[0] - self.assertEqual(req['method'], 'POST') - self.assertEqual(req['path'], '/%s' % PATH) - self.assertEqual(query.udf_resources, - [UDFResource("resourceUri", RESOURCE_URI), - UDFResource("inlineCode", INLINE_UDF_CODE)]) - SENT = {'query': self.QUERY, - 'userDefinedFunctionResources': [ - {'resourceUri': RESOURCE_URI}, - {"inlineCode": INLINE_UDF_CODE}]} - self.assertEqual(req['data'], SENT) - self._verifyResourceProperties(query, RESOURCE) - - def test_run_w_named_query_parameter(self): - from google.cloud.bigquery._helpers import ScalarQueryParameter - - PATH = 'projects/%s/queries' % self.PROJECT - RESOURCE = self._makeResource(complete=False) - RESOURCE['parameterMode'] = 'NAMED' - RESOURCE['queryParameters'] = [ - { - 'name': 'foo', - 'parameterType': { - 'type': 'INT64', - }, - 'parameterValue': { - 'value': '123', - }, - }, - ] - query_parameters = [ScalarQueryParameter('foo', 'INT64', 123)] - conn = _Connection(RESOURCE) - client = _Client(project=self.PROJECT, connection=conn) - query = self._make_one(self.QUERY, client, - query_parameters=query_parameters) - query.run() - - self.assertEqual(len(conn._requested), 1) - req = conn._requested[0] - self.assertEqual(req['method'], 'POST') - self.assertEqual(req['path'], '/%s' % PATH) - SENT = { - 'query': self.QUERY, - 'parameterMode': 'NAMED', - 'queryParameters': RESOURCE['queryParameters'], - } - self.assertEqual(req['data'], SENT) - self._verifyResourceProperties(query, RESOURCE) - - def test_run_w_positional_query_parameter(self): - from google.cloud.bigquery._helpers import ScalarQueryParameter - - PATH = 'projects/%s/queries' % self.PROJECT - RESOURCE = self._makeResource(complete=False) - RESOURCE['parameterMode'] = 'POSITIONAL' - RESOURCE['queryParameters'] = [ - { - 'parameterType': { - 'type': 'INT64', - }, - 'parameterValue': { - 'value': '123', - }, - }, - ] - query_parameters = [ScalarQueryParameter.positional('INT64', 123)] - conn = _Connection(RESOURCE) - client = _Client(project=self.PROJECT, connection=conn) - query = self._make_one(self.QUERY, client, - query_parameters=query_parameters) - query.run() - - self.assertEqual(len(conn._requested), 1) - req = conn._requested[0] - self.assertEqual(req['method'], 'POST') - self.assertEqual(req['path'], '/%s' % PATH) - SENT = { - 'query': self.QUERY, - 'parameterMode': 'POSITIONAL', - 'queryParameters': RESOURCE['queryParameters'], - } - self.assertEqual(req['data'], SENT) - self._verifyResourceProperties(query, RESOURCE) - - def test_fetch_data_query_not_yet_run(self): - conn = _Connection() - client = _Client(project=self.PROJECT, connection=conn) - query = self._make_one(self.QUERY, client) - self.assertRaises(ValueError, query.fetch_data) + self._verifySchema(query, resource) def test_fetch_data_w_bound_client(self): import six - PATH = 'projects/%s/queries/%s' % (self.PROJECT, self.JOB_NAME) - BEFORE = self._makeResource(complete=False) - AFTER = self._makeResource(complete=True) - del AFTER['totalRows'] + PATH = 'projects/%s/queries/%s' % (self.PROJECT, self.JOB_ID) + schema = { + 'fields': [ + {'name': 'full_name', 'type': 'STRING', 'mode': 'REQURED'}, + {'name': 'age', 'type': 'INTEGER', 'mode': 'REQURED'}, + ], + } + BEFORE = self._makeResource() + BEFORE['jobComplete'] = False + BEFORE['schema'] = schema + AFTER = self._makeResource() + AFTER['rows'] = [ + {'f': [ + {'v': 'Phred Phlyntstone'}, + {'v': 32}, + ]}, + {'f': [ + {'v': 'Bharney Rhubble'}, + {'v': 33}, + ]}, + {'f': [ + {'v': 'Wylma Phlyntstone'}, + {'v': 29}, + ]}, + {'f': [ + {'v': 'Bhettye Rhubble'}, + {'v': 27}, + ]}, + ] + AFTER['cacheHit'] = False + AFTER['jobComplete'] = True + AFTER['numDmlAffectedRows'] = 123 + AFTER['pageToken'] = self.TOKEN + AFTER['schema'] = schema + AFTER['totalBytesProcessed'] = 100000 conn = _Connection(AFTER) client = _Client(project=self.PROJECT, connection=conn) - query = self._make_one(self.QUERY, client) - query._set_properties(BEFORE) + query = self._make_one(client, BEFORE) self.assertFalse(query.complete) iterator = query.fetch_data() @@ -691,24 +313,55 @@ def test_fetch_data_w_bound_client(self): def test_fetch_data_w_alternate_client(self): import six - PATH = 'projects/%s/queries/%s' % (self.PROJECT, self.JOB_NAME) + PATH = 'projects/%s/queries/%s' % (self.PROJECT, self.JOB_ID) MAX = 10 - TOKEN = 'TOKEN' START = 2257 TIMEOUT = 20000 - BEFORE = self._makeResource(complete=False) - AFTER = self._makeResource(complete=True) + + schema = { + 'fields': [ + {'name': 'full_name', 'type': 'STRING', 'mode': 'REQURED'}, + {'name': 'age', 'type': 'INTEGER', 'mode': 'REQURED'}, + ], + } + BEFORE = self._makeResource() + BEFORE['jobComplete'] = False + BEFORE['schema'] = schema + AFTER = self._makeResource() + AFTER['rows'] = [ + {'f': [ + {'v': 'Phred Phlyntstone'}, + {'v': 32}, + ]}, + {'f': [ + {'v': 'Bharney Rhubble'}, + {'v': 33}, + ]}, + {'f': [ + {'v': 'Wylma Phlyntstone'}, + {'v': 29}, + ]}, + {'f': [ + {'v': 'Bhettye Rhubble'}, + {'v': 27}, + ]}, + ] + AFTER['cacheHit'] = False + AFTER['jobComplete'] = True + AFTER['numDmlAffectedRows'] = 123 + AFTER['pageToken'] = self.TOKEN + AFTER['schema'] = schema + AFTER['totalBytesProcessed'] = 100000 conn1 = _Connection() client1 = _Client(project=self.PROJECT, connection=conn1) conn2 = _Connection(AFTER) client2 = _Client(project=self.PROJECT, connection=conn2) - query = self._make_one(self.QUERY, client1) - query._set_properties(BEFORE) + query = self._make_one(client1, BEFORE) self.assertFalse(query.complete) iterator = query.fetch_data( - client=client2, max_results=MAX, page_token=TOKEN, + client=client2, max_results=MAX, page_token=self.TOKEN, start_index=START, timeout_ms=TIMEOUT) page = six.next(iterator.pages) rows = list(page) @@ -721,7 +374,7 @@ def test_fetch_data_w_alternate_client(self): self.assertEqual(rows[1], ('Bharney Rhubble', 33)) self.assertEqual(rows[2], ('Wylma Phlyntstone', 29)) self.assertEqual(rows[3], ('Bhettye Rhubble', 27)) - self.assertEqual(total_rows, int(AFTER['totalRows'])) + self.assertIsNone(total_rows) self.assertEqual(page_token, AFTER['pageToken']) self.assertEqual(len(conn1._requested), 0) @@ -731,21 +384,22 @@ def test_fetch_data_w_alternate_client(self): self.assertEqual(req['path'], '/%s' % PATH) self.assertEqual(req['query_params'], {'maxResults': MAX, - 'pageToken': TOKEN, + 'pageToken': self.TOKEN, 'startIndex': START, 'timeoutMs': TIMEOUT}) -class _Client(object): +class _Client(Client): def __init__(self, project='project', connection=None): - self.project = project - self._connection = connection + creds = _make_credentials() + http = object() + super(_Client, self).__init__( + project=project, credentials=creds, _http=http) - def dataset(self, dataset_id): - from google.cloud.bigquery.dataset import DatasetReference - - return DatasetReference(self.project, dataset_id) + if connection is None: + connection = _Connection() + self._connection = connection class _Connection(object): From cd482469ac5dc2951e0ba48cb4482d5612e550b5 Mon Sep 17 00:00:00 2001 From: Jonathan Amsterdam Date: Fri, 6 Oct 2017 15:39:54 -0400 Subject: [PATCH 0220/2016] bigquery: add Client.list_rows, remove Table.fetch_data (#4119) --- .../google/cloud/bigquery/client.py | 79 ++++- .../google/cloud/bigquery/table.py | 58 ---- .../google-cloud-bigquery/tests/system.py | 2 +- .../tests/unit/test_client.py | 269 ++++++++++++++++- .../tests/unit/test_table.py | 280 ------------------ 5 files changed, 333 insertions(+), 355 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py index da69decd03c8..db1a4b0f2138 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py @@ -26,7 +26,7 @@ from google.cloud.bigquery._http import Connection from google.cloud.bigquery.dataset import Dataset from google.cloud.bigquery.dataset import DatasetReference -from google.cloud.bigquery.table import Table +from google.cloud.bigquery.table import Table, _TABLE_HAS_NO_SCHEMA from google.cloud.bigquery.table import TableReference from google.cloud.bigquery.job import CopyJob from google.cloud.bigquery.job import ExtractJob @@ -34,6 +34,8 @@ from google.cloud.bigquery.job import QueryJob from google.cloud.bigquery.job import QueryJobConfig from google.cloud.bigquery.query import QueryResults +from google.cloud.bigquery._helpers import _item_to_row +from google.cloud.bigquery._helpers import _rows_page_start class Project(object): @@ -346,7 +348,6 @@ def delete_table(self, table): :type table: One of: :class:`~google.cloud.bigquery.table.Table` :class:`~google.cloud.bigquery.table.TableReference` - :param table: the table to delete, or a reference to it. """ if not isinstance(table, (Table, TableReference)): @@ -667,6 +668,80 @@ def query_rows(self, query, job_config=None, job_id=None, timeout=None): job.begin() return job.result(timeout=timeout) + def list_rows(self, table, selected_fields=None, max_results=None, + page_token=None, start_index=None): + """List the rows of the table. + + See + https://cloud.google.com/bigquery/docs/reference/rest/v2/tabledata/list + + .. note:: + + This method assumes that the provided schema is up-to-date with the + schema as defined on the back-end: if the two schemas are not + identical, the values returned may be incomplete. To ensure that the + local copy of the schema is up-to-date, call ``client.get_table``. + + :type table: One of: + :class:`~google.cloud.bigquery.table.Table` + :class:`~google.cloud.bigquery.table.TableReference` + :param table: the table to list, or a reference to it. + + :type selected_fields: list of :class:`SchemaField` + :param selected_fields: + The fields to return. Required if ``table`` is a + :class:`~google.cloud.bigquery.table.TableReference`. + + :type max_results: int + :param max_results: maximum number of rows to return. + + :type page_token: str + :param page_token: (Optional) Token representing a cursor into the + table's rows. + + :type start_index: int + :param page_token: (Optional) The zero-based index of the starting + row to read. + + :rtype: :class:`~google.api.core.page_iterator.Iterator` + :returns: Iterator of row data :class:`tuple`s. During each page, the + iterator will have the ``total_rows`` attribute set, + which counts the total number of rows **in the table** + (this is distinct from the total number of rows in the + current page: ``iterator.page.num_items``). + + """ + if selected_fields is not None: + schema = selected_fields + elif isinstance(table, TableReference): + raise ValueError('need selected_fields with TableReference') + elif isinstance(table, Table): + if len(table._schema) == 0: + raise ValueError(_TABLE_HAS_NO_SCHEMA) + schema = table.schema + else: + raise TypeError('table should be Table or TableReference') + + params = {} + if selected_fields is not None: + params['selectedFields'] = [f.name for f in selected_fields] + if start_index is not None: + params['startIndex'] = start_index + + iterator = page_iterator.HTTPIterator( + client=self, + api_request=self._connection.api_request, + path='%s/data' % (table.path,), + item_to_value=_item_to_row, + items_key='rows', + page_token=page_token, + next_token='pageToken', + max_results=max_results, + page_start=_rows_page_start, + extra_params=params) + iterator.schema = schema + return iterator + # pylint: disable=unused-argument def _item_to_project(iterator, resource): diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py index a4f1933303a0..dfc31be29745 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py @@ -25,13 +25,10 @@ from google.resumable_media.requests import MultipartUpload from google.resumable_media.requests import ResumableUpload -from google.api.core import page_iterator from google.cloud import exceptions from google.cloud._helpers import _datetime_from_microseconds from google.cloud._helpers import _millis_from_datetime from google.cloud.bigquery.schema import SchemaField -from google.cloud.bigquery._helpers import _item_to_row -from google.cloud.bigquery._helpers import _rows_page_start from google.cloud.bigquery._helpers import _SCALAR_VALUE_TO_JSON_ROW @@ -768,61 +765,6 @@ def update(self, client=None): method='PUT', path=self.path, data=self._build_resource()) self._set_properties(api_response) - def fetch_data(self, max_results=None, page_token=None, client=None): - """API call: fetch the table data via a GET request - - See - https://cloud.google.com/bigquery/docs/reference/rest/v2/tabledata/list - - .. note:: - - This method assumes that its instance's ``schema`` attribute is - up-to-date with the schema as defined on the back-end: if the - two schemas are not identical, the values returned may be - incomplete. To ensure that the local copy of the schema is - up-to-date, call ``client.get_table``. - - :type max_results: int - :param max_results: (Optional) Maximum number of rows to return. - - :type page_token: str - :param page_token: (Optional) Token representing a cursor into the - table's rows. - - :type client: :class:`~google.cloud.bigquery.client.Client` - :param client: (Optional) The client to use. If not passed, falls - back to the ``client`` stored on the current dataset. - - :rtype: :class:`~google.api.core.page_iterator.Iterator` - :returns: Iterator of row data :class:`tuple`s. During each page, the - iterator will have the ``total_rows`` attribute set, - which counts the total number of rows **in the table** - (this is distinct from the total number of rows in the - current page: ``iterator.page.num_items``). - """ - if len(self._schema) == 0: - raise ValueError(_TABLE_HAS_NO_SCHEMA) - - params = {} - - if max_results is not None: - params['maxResults'] = max_results - - client = self._require_client(client) - path = '%s/data' % (self.path,) - iterator = page_iterator.HTTPIterator( - client=client, - api_request=client._connection.api_request, - path=path, - item_to_value=_item_to_row, - items_key='rows', - page_token=page_token, - page_start=_rows_page_start, - next_token='pageToken', - extra_params=params) - iterator.schema = self._schema - return iterator - def row_from_mapping(self, mapping): """Convert a mapping to a row tuple using the schema. diff --git a/packages/google-cloud-bigquery/tests/system.py b/packages/google-cloud-bigquery/tests/system.py index ada6d92b5050..1936fc435e57 100644 --- a/packages/google-cloud-bigquery/tests/system.py +++ b/packages/google-cloud-bigquery/tests/system.py @@ -295,7 +295,7 @@ def test_update_table(self): @staticmethod def _fetch_single_page(table): - iterator = table.fetch_data() + iterator = Config.CLIENT.list_rows(table) page = six.next(iterator.pages) return list(page) diff --git a/packages/google-cloud-bigquery/tests/unit/test_client.py b/packages/google-cloud-bigquery/tests/unit/test_client.py index 89e2ebef1b70..cd0c3f6d71b0 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_client.py +++ b/packages/google-cloud-bigquery/tests/unit/test_client.py @@ -106,7 +106,6 @@ def test__get_query_results_hit(self): self.assertTrue(query_results.complete) def test_list_projects_defaults(self): - import six from google.cloud.bigquery.client import Project PROJECT_1 = 'PROJECT_ONE' @@ -151,8 +150,6 @@ def test_list_projects_defaults(self): self.assertEqual(req['path'], '/%s' % PATH) def test_list_projects_explicit_response_missing_projects_key(self): - import six - PROJECT = 'PROJECT' PATH = 'projects' TOKEN = 'TOKEN' @@ -177,7 +174,6 @@ def test_list_projects_explicit_response_missing_projects_key(self): {'maxResults': 3, 'pageToken': TOKEN}) def test_list_datasets_defaults(self): - import six from google.cloud.bigquery.dataset import Dataset PROJECT = 'PROJECT' @@ -222,8 +218,6 @@ def test_list_datasets_defaults(self): self.assertEqual(req['path'], '/%s' % PATH) def test_list_datasets_explicit_response_missing_datasets_key(self): - import six - PROJECT = 'PROJECT' PATH = 'projects/%s/datasets' % PROJECT TOKEN = 'TOKEN' @@ -636,8 +630,6 @@ def test_update_dataset(self): self.assertEqual(req['headers']['If-Match'], 'etag') def test_list_dataset_tables_empty(self): - import six - PROJECT = 'PROJECT' DS_ID = 'DATASET_ID' creds = _make_credentials() @@ -660,7 +652,6 @@ def test_list_dataset_tables_empty(self): self.assertEqual(req['path'], '/%s' % PATH) def test_list_dataset_tables_defaults(self): - import six from google.cloud.bigquery.table import Table PROJECT = 'PROJECT' @@ -711,7 +702,6 @@ def test_list_dataset_tables_defaults(self): self.assertEqual(req['path'], '/%s' % PATH) def test_list_dataset_tables_explicit(self): - import six from google.cloud.bigquery.table import Table PROJECT = 'PROJECT' @@ -895,7 +885,6 @@ def test_get_job_hit(self): self.assertEqual(req['query_params'], {'projection': 'full'}) def test_list_jobs_defaults(self): - import six from google.cloud.bigquery.job import LoadJob from google.cloud.bigquery.job import CopyJob from google.cloud.bigquery.job import ExtractJob @@ -1027,7 +1016,6 @@ def test_list_jobs_defaults(self): self.assertEqual(req['query_params'], {'projection': 'full'}) def test_list_jobs_load_job_wo_sourceUris(self): - import six from google.cloud.bigquery.job import LoadJob PROJECT = 'PROJECT' @@ -1084,8 +1072,6 @@ def test_list_jobs_load_job_wo_sourceUris(self): self.assertEqual(req['query_params'], {'projection': 'full'}) def test_list_jobs_explicit_missing(self): - import six - PROJECT = 'PROJECT' PATH = 'projects/%s/jobs' % PROJECT DATA = {} @@ -1546,6 +1532,261 @@ def test_query_rows_w_job_config(self): self.assertEqual(configuration['query']['useLegacySql'], True) self.assertEqual(configuration['dryRun'], True) + def test_list_rows(self): + import datetime + from google.cloud._helpers import UTC + from google.cloud.bigquery.dataset import DatasetReference + from google.cloud.bigquery.table import Table, SchemaField + + PROJECT = 'PROJECT' + DS_ID = 'DS_ID' + TABLE_ID = 'TABLE_ID' + PATH = 'projects/%s/datasets/%s/tables/%s/data' % ( + PROJECT, DS_ID, TABLE_ID) + WHEN_TS = 1437767599.006 + WHEN = datetime.datetime.utcfromtimestamp(WHEN_TS).replace( + tzinfo=UTC) + WHEN_1 = WHEN + datetime.timedelta(seconds=1) + WHEN_2 = WHEN + datetime.timedelta(seconds=2) + ROWS = 1234 + TOKEN = 'TOKEN' + + def _bigquery_timestamp_float_repr(ts_float): + # Preserve microsecond precision for E+09 timestamps + return '%0.15E' % (ts_float,) + + DATA = { + 'totalRows': str(ROWS), + 'pageToken': TOKEN, + 'rows': [ + {'f': [ + {'v': 'Phred Phlyntstone'}, + {'v': '32'}, + {'v': _bigquery_timestamp_float_repr(WHEN_TS)}, + ]}, + {'f': [ + {'v': 'Bharney Rhubble'}, + {'v': '33'}, + {'v': _bigquery_timestamp_float_repr(WHEN_TS + 1)}, + ]}, + {'f': [ + {'v': 'Wylma Phlyntstone'}, + {'v': '29'}, + {'v': _bigquery_timestamp_float_repr(WHEN_TS + 2)}, + ]}, + {'f': [ + {'v': 'Bhettye Rhubble'}, + {'v': None}, + {'v': None}, + ]}, + ] + } + creds = _make_credentials() + http = object() + client = self._make_one(project=PROJECT, credentials=creds, _http=http) + conn = client._connection = _Connection(DATA, DATA) + table_ref = DatasetReference(PROJECT, DS_ID).table(TABLE_ID) + full_name = SchemaField('full_name', 'STRING', mode='REQUIRED') + age = SchemaField('age', 'INTEGER', mode='NULLABLE') + joined = SchemaField('joined', 'TIMESTAMP', mode='NULLABLE') + table = Table(table_ref, schema=[full_name, age, joined]) + + iterator = client.list_rows(table) + page = six.next(iterator.pages) + rows = list(page) + total_rows = iterator.total_rows + page_token = iterator.next_page_token + + self.assertEqual(len(rows), 4) + self.assertEqual(rows[0], ('Phred Phlyntstone', 32, WHEN)) + self.assertEqual(rows[1], ('Bharney Rhubble', 33, WHEN_1)) + self.assertEqual(rows[2], ('Wylma Phlyntstone', 29, WHEN_2)) + self.assertEqual(rows[3], ('Bhettye Rhubble', None, None)) + self.assertEqual(total_rows, ROWS) + self.assertEqual(page_token, TOKEN) + + self.assertEqual(len(conn._requested), 1) + req = conn._requested[0] + self.assertEqual(req['method'], 'GET') + self.assertEqual(req['path'], '/%s' % PATH) + self.assertEqual(req['query_params'], {}) + + def test_list_rows_query_params(self): + from google.cloud.bigquery.dataset import DatasetReference + from google.cloud.bigquery.table import Table, SchemaField + + PROJECT = 'PROJECT' + DS_ID = 'DS_ID' + TABLE_ID = 'TABLE_ID' + creds = _make_credentials() + http = object() + client = self._make_one(project=PROJECT, credentials=creds, _http=http) + table_ref = DatasetReference(PROJECT, DS_ID).table(TABLE_ID) + table = Table(table_ref, + schema=[SchemaField('age', 'INTEGER', mode='NULLABLE')]) + tests = [ + ({}, {}), + ({'start_index': 1}, {'startIndex': 1}), + ({'max_results': 2}, {'maxResults': 2}), + ({'start_index': 1, 'max_results': 2}, + {'startIndex': 1, 'maxResults': 2}), + ] + conn = client._connection = _Connection(*len(tests)*[{}]) + for i, test in enumerate(tests): + iterator = client.list_rows(table, **test[0]) + six.next(iterator.pages) + req = conn._requested[i] + self.assertEqual(req['query_params'], test[1], + 'for kwargs %s' % test[0]) + + def test_list_rows_repeated_fields(self): + from google.cloud.bigquery.dataset import DatasetReference + from google.cloud.bigquery.table import SchemaField + + PROJECT = 'PROJECT' + DS_ID = 'DS_ID' + TABLE_ID = 'TABLE_ID' + PATH = 'projects/%s/datasets/%s/tables/%s/data' % ( + PROJECT, DS_ID, TABLE_ID) + ROWS = 1234 + TOKEN = 'TOKEN' + DATA = { + 'totalRows': ROWS, + 'pageToken': TOKEN, + 'rows': [ + {'f': [ + {'v': [{'v': 'red'}, {'v': 'green'}]}, + {'v': [{ + 'v': { + 'f': [ + {'v': [{'v': '1'}, {'v': '2'}]}, + {'v': [{'v': '3.1415'}, {'v': '1.414'}]}, + ]} + }]}, + ]}, + ] + } + creds = _make_credentials() + http = object() + client = self._make_one(project=PROJECT, credentials=creds, _http=http) + conn = client._connection = _Connection(DATA) + table_ref = DatasetReference(PROJECT, DS_ID).table(TABLE_ID) + color = SchemaField('color', 'STRING', mode='REPEATED') + index = SchemaField('index', 'INTEGER', 'REPEATED') + score = SchemaField('score', 'FLOAT', 'REPEATED') + struct = SchemaField('struct', 'RECORD', mode='REPEATED', + fields=[index, score]) + + iterator = client.list_rows(table_ref, selected_fields=[color, struct]) + page = six.next(iterator.pages) + rows = list(page) + total_rows = iterator.total_rows + page_token = iterator.next_page_token + + self.assertEqual(len(rows), 1) + self.assertEqual(rows[0][0], ['red', 'green']) + self.assertEqual(rows[0][1], [{'index': [1, 2], + 'score': [3.1415, 1.414]}]) + self.assertEqual(total_rows, ROWS) + self.assertEqual(page_token, TOKEN) + + self.assertEqual(len(conn._requested), 1) + req = conn._requested[0] + self.assertEqual(req['method'], 'GET') + self.assertEqual(req['path'], '/%s' % PATH) + + def test_list_rows_w_record_schema(self): + from google.cloud.bigquery.dataset import DatasetReference + from google.cloud.bigquery.table import Table, SchemaField + + PROJECT = 'PROJECT' + DS_ID = 'DS_ID' + TABLE_ID = 'TABLE_ID' + PATH = 'projects/%s/datasets/%s/tables/%s/data' % ( + PROJECT, DS_ID, TABLE_ID) + ROWS = 1234 + TOKEN = 'TOKEN' + DATA = { + 'totalRows': ROWS, + 'pageToken': TOKEN, + 'rows': [ + {'f': [ + {'v': 'Phred Phlyntstone'}, + {'v': {'f': [{'v': '800'}, {'v': '555-1212'}, {'v': 1}]}}, + ]}, + {'f': [ + {'v': 'Bharney Rhubble'}, + {'v': {'f': [{'v': '877'}, {'v': '768-5309'}, {'v': 2}]}}, + ]}, + {'f': [ + {'v': 'Wylma Phlyntstone'}, + {'v': None}, + ]}, + ] + } + creds = _make_credentials() + http = object() + client = self._make_one(project=PROJECT, credentials=creds, _http=http) + conn = client._connection = _Connection(DATA) + table_ref = DatasetReference(PROJECT, DS_ID).table(TABLE_ID) + full_name = SchemaField('full_name', 'STRING', mode='REQUIRED') + area_code = SchemaField('area_code', 'STRING', 'REQUIRED') + local_number = SchemaField('local_number', 'STRING', 'REQUIRED') + rank = SchemaField('rank', 'INTEGER', 'REQUIRED') + phone = SchemaField('phone', 'RECORD', mode='NULLABLE', + fields=[area_code, local_number, rank]) + table = Table(table_ref, schema=[full_name, phone]) + + iterator = client.list_rows(table) + page = six.next(iterator.pages) + rows = list(page) + total_rows = iterator.total_rows + page_token = iterator.next_page_token + + self.assertEqual(len(rows), 3) + self.assertEqual(rows[0][0], 'Phred Phlyntstone') + self.assertEqual(rows[0][1], {'area_code': '800', + 'local_number': '555-1212', + 'rank': 1}) + self.assertEqual(rows[1][0], 'Bharney Rhubble') + self.assertEqual(rows[1][1], {'area_code': '877', + 'local_number': '768-5309', + 'rank': 2}) + self.assertEqual(rows[2][0], 'Wylma Phlyntstone') + self.assertIsNone(rows[2][1]) + self.assertEqual(total_rows, ROWS) + self.assertEqual(page_token, TOKEN) + + self.assertEqual(len(conn._requested), 1) + req = conn._requested[0] + self.assertEqual(req['method'], 'GET') + self.assertEqual(req['path'], '/%s' % PATH) + + def test_list_rows_errors(self): + from google.cloud.bigquery.dataset import DatasetReference + from google.cloud.bigquery.table import Table + + PROJECT = 'PROJECT' + DS_ID = 'DS_ID' + TABLE_ID = 'TABLE_ID' + + creds = _make_credentials() + http = object() + client = self._make_one(project=PROJECT, credentials=creds, _http=http) + table_ref = DatasetReference(PROJECT, DS_ID).table(TABLE_ID) + + # table ref with no selected fields + with self.assertRaises(ValueError): + client.list_rows(table_ref) + + # table with no schema + with self.assertRaises(ValueError): + client.list_rows(Table(table_ref)) + + # neither Table nor tableReference + with self.assertRaises(TypeError): + client.list_rows(1) + class _Connection(object): diff --git a/packages/google-cloud-bigquery/tests/unit/test_table.py b/packages/google-cloud-bigquery/tests/unit/test_table.py index c86c21880bda..fc0ff3370974 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_table.py +++ b/packages/google-cloud-bigquery/tests/unit/test_table.py @@ -945,286 +945,6 @@ def test_update_w_alternate_client(self): self.assertEqual(req['data'], SENT) self._verifyResourceProperties(table, RESOURCE) - def test_fetch_data_wo_schema(self): - from google.cloud.bigquery.table import _TABLE_HAS_NO_SCHEMA - - client = _Client(project=self.PROJECT) - dataset = DatasetReference(self.PROJECT, self.DS_ID) - table_ref = dataset.table(self.TABLE_NAME) - table = self._make_one(table_ref, client=client) - - with self.assertRaises(ValueError) as exc: - table.fetch_data() - - self.assertEqual(exc.exception.args, (_TABLE_HAS_NO_SCHEMA,)) - - def test_fetch_data_w_bound_client(self): - import datetime - import six - from google.cloud._helpers import UTC - from google.cloud.bigquery.table import SchemaField - - PATH = 'projects/%s/datasets/%s/tables/%s/data' % ( - self.PROJECT, self.DS_ID, self.TABLE_NAME) - WHEN_TS = 1437767599.006 - WHEN = datetime.datetime.utcfromtimestamp(WHEN_TS).replace( - tzinfo=UTC) - WHEN_1 = WHEN + datetime.timedelta(seconds=1) - WHEN_2 = WHEN + datetime.timedelta(seconds=2) - ROWS = 1234 - TOKEN = 'TOKEN' - - def _bigquery_timestamp_float_repr(ts_float): - # Preserve microsecond precision for E+09 timestamps - return '%0.15E' % (ts_float,) - - DATA = { - 'totalRows': str(ROWS), - 'pageToken': TOKEN, - 'rows': [ - {'f': [ - {'v': 'Phred Phlyntstone'}, - {'v': '32'}, - {'v': _bigquery_timestamp_float_repr(WHEN_TS)}, - ]}, - {'f': [ - {'v': 'Bharney Rhubble'}, - {'v': '33'}, - {'v': _bigquery_timestamp_float_repr(WHEN_TS + 1)}, - ]}, - {'f': [ - {'v': 'Wylma Phlyntstone'}, - {'v': '29'}, - {'v': _bigquery_timestamp_float_repr(WHEN_TS + 2)}, - ]}, - {'f': [ - {'v': 'Bhettye Rhubble'}, - {'v': None}, - {'v': None}, - ]}, - ] - } - - conn = _Connection(DATA) - client = _Client(project=self.PROJECT, connection=conn) - dataset = DatasetReference(self.PROJECT, self.DS_ID) - table_ref = dataset.table(self.TABLE_NAME) - full_name = SchemaField('full_name', 'STRING', mode='REQUIRED') - age = SchemaField('age', 'INTEGER', mode='NULLABLE') - joined = SchemaField('joined', 'TIMESTAMP', mode='NULLABLE') - table = self._make_one(table_ref, schema=[full_name, age, joined], - client=client) - - iterator = table.fetch_data() - page = six.next(iterator.pages) - rows = list(page) - total_rows = iterator.total_rows - page_token = iterator.next_page_token - - self.assertEqual(len(rows), 4) - self.assertEqual(rows[0], ('Phred Phlyntstone', 32, WHEN)) - self.assertEqual(rows[1], ('Bharney Rhubble', 33, WHEN_1)) - self.assertEqual(rows[2], ('Wylma Phlyntstone', 29, WHEN_2)) - self.assertEqual(rows[3], ('Bhettye Rhubble', None, None)) - self.assertEqual(total_rows, ROWS) - self.assertEqual(page_token, TOKEN) - - self.assertEqual(len(conn._requested), 1) - req = conn._requested[0] - self.assertEqual(req['method'], 'GET') - self.assertEqual(req['path'], '/%s' % PATH) - - def test_fetch_data_w_alternate_client(self): - import six - from google.cloud.bigquery.table import SchemaField - - PATH = 'projects/%s/datasets/%s/tables/%s/data' % ( - self.PROJECT, self.DS_ID, self.TABLE_NAME) - MAX = 10 - TOKEN = 'TOKEN' - DATA = { - 'rows': [ - {'f': [ - {'v': 'Phred Phlyntstone'}, - {'v': '32'}, - {'v': 'true'}, - {'v': '3.1415926'}, - ]}, - {'f': [ - {'v': 'Bharney Rhubble'}, - {'v': '33'}, - {'v': 'false'}, - {'v': '1.414'}, - ]}, - {'f': [ - {'v': 'Wylma Phlyntstone'}, - {'v': '29'}, - {'v': 'true'}, - {'v': '2.71828'}, - ]}, - {'f': [ - {'v': 'Bhettye Rhubble'}, - {'v': '27'}, - {'v': None}, - {'v': None}, - ]}, - ] - } - conn1 = _Connection() - client1 = _Client(project=self.PROJECT, connection=conn1) - conn2 = _Connection(DATA) - client2 = _Client(project=self.PROJECT, connection=conn2) - dataset = DatasetReference(self.PROJECT, self.DS_ID) - table_ref = dataset.table(self.TABLE_NAME) - full_name = SchemaField('full_name', 'STRING', mode='REQUIRED') - age = SchemaField('age', 'INTEGER', mode='REQUIRED') - voter = SchemaField('voter', 'BOOLEAN', mode='NULLABLE') - score = SchemaField('score', 'FLOAT', mode='NULLABLE') - table = self._make_one(table_ref, - schema=[full_name, age, voter, score], - client=client1) - - iterator = table.fetch_data( - client=client2, max_results=MAX, page_token=TOKEN) - page = six.next(iterator.pages) - rows = list(page) - total_rows = iterator.total_rows - page_token = iterator.next_page_token - - self.assertEqual(len(rows), 4) - self.assertEqual(rows[0], ('Phred Phlyntstone', 32, True, 3.1415926)) - self.assertEqual(rows[1], ('Bharney Rhubble', 33, False, 1.414)) - self.assertEqual(rows[2], ('Wylma Phlyntstone', 29, True, 2.71828)) - self.assertEqual(rows[3], ('Bhettye Rhubble', 27, None, None)) - self.assertIsNone(total_rows) - self.assertIsNone(page_token) - - self.assertEqual(len(conn1._requested), 0) - self.assertEqual(len(conn2._requested), 1) - req = conn2._requested[0] - self.assertEqual(req['method'], 'GET') - self.assertEqual(req['path'], '/%s' % PATH) - self.assertEqual(req['query_params'], - {'maxResults': MAX, 'pageToken': TOKEN}) - - def test_fetch_data_w_repeated_fields(self): - import six - from google.cloud.bigquery.table import SchemaField - - PATH = 'projects/%s/datasets/%s/tables/%s/data' % ( - self.PROJECT, self.DS_ID, self.TABLE_NAME) - ROWS = 1234 - TOKEN = 'TOKEN' - DATA = { - 'totalRows': ROWS, - 'pageToken': TOKEN, - 'rows': [ - {'f': [ - {'v': [{'v': 'red'}, {'v': 'green'}]}, - {'v': [{ - 'v': { - 'f': [ - {'v': [{'v': '1'}, {'v': '2'}]}, - {'v': [{'v': '3.1415'}, {'v': '1.414'}]}, - ]} - }]}, - ]}, - ] - } - conn = _Connection(DATA) - client = _Client(project=self.PROJECT, connection=conn) - dataset = DatasetReference(self.PROJECT, self.DS_ID) - table_ref = dataset.table(self.TABLE_NAME) - color = SchemaField('color', 'STRING', mode='REPEATED') - index = SchemaField('index', 'INTEGER', 'REPEATED') - score = SchemaField('score', 'FLOAT', 'REPEATED') - struct = SchemaField('struct', 'RECORD', mode='REPEATED', - fields=[index, score]) - table = self._make_one(table_ref, schema=[color, struct], - client=client) - - iterator = table.fetch_data() - page = six.next(iterator.pages) - rows = list(page) - total_rows = iterator.total_rows - page_token = iterator.next_page_token - - self.assertEqual(len(rows), 1) - self.assertEqual(rows[0][0], ['red', 'green']) - self.assertEqual(rows[0][1], [{'index': [1, 2], - 'score': [3.1415, 1.414]}]) - self.assertEqual(total_rows, ROWS) - self.assertEqual(page_token, TOKEN) - - self.assertEqual(len(conn._requested), 1) - req = conn._requested[0] - self.assertEqual(req['method'], 'GET') - self.assertEqual(req['path'], '/%s' % PATH) - - def test_fetch_data_w_record_schema(self): - import six - from google.cloud.bigquery.table import SchemaField - - PATH = 'projects/%s/datasets/%s/tables/%s/data' % ( - self.PROJECT, self.DS_ID, self.TABLE_NAME) - ROWS = 1234 - TOKEN = 'TOKEN' - DATA = { - 'totalRows': ROWS, - 'pageToken': TOKEN, - 'rows': [ - {'f': [ - {'v': 'Phred Phlyntstone'}, - {'v': {'f': [{'v': '800'}, {'v': '555-1212'}, {'v': 1}]}}, - ]}, - {'f': [ - {'v': 'Bharney Rhubble'}, - {'v': {'f': [{'v': '877'}, {'v': '768-5309'}, {'v': 2}]}}, - ]}, - {'f': [ - {'v': 'Wylma Phlyntstone'}, - {'v': None}, - ]}, - ] - } - conn = _Connection(DATA) - client = _Client(project=self.PROJECT, connection=conn) - dataset = DatasetReference(self.PROJECT, self.DS_ID) - table_ref = dataset.table(self.TABLE_NAME) - full_name = SchemaField('full_name', 'STRING', mode='REQUIRED') - area_code = SchemaField('area_code', 'STRING', 'REQUIRED') - local_number = SchemaField('local_number', 'STRING', 'REQUIRED') - rank = SchemaField('rank', 'INTEGER', 'REQUIRED') - phone = SchemaField('phone', 'RECORD', mode='NULLABLE', - fields=[area_code, local_number, rank]) - table = self._make_one(table_ref, schema=[full_name, phone], - client=client) - - iterator = table.fetch_data() - page = six.next(iterator.pages) - rows = list(page) - total_rows = iterator.total_rows - page_token = iterator.next_page_token - - self.assertEqual(len(rows), 3) - self.assertEqual(rows[0][0], 'Phred Phlyntstone') - self.assertEqual(rows[0][1], {'area_code': '800', - 'local_number': '555-1212', - 'rank': 1}) - self.assertEqual(rows[1][0], 'Bharney Rhubble') - self.assertEqual(rows[1][1], {'area_code': '877', - 'local_number': '768-5309', - 'rank': 2}) - self.assertEqual(rows[2][0], 'Wylma Phlyntstone') - self.assertIsNone(rows[2][1]) - self.assertEqual(total_rows, ROWS) - self.assertEqual(page_token, TOKEN) - - self.assertEqual(len(conn._requested), 1) - req = conn._requested[0] - self.assertEqual(req['method'], 'GET') - self.assertEqual(req['path'], '/%s' % PATH) - def test_row_from_mapping_wo_schema(self): from google.cloud.bigquery.table import _TABLE_HAS_NO_SCHEMA MAPPING = {'full_name': 'Phred Phlyntstone', 'age': 32} From 8c36d8aec0fd9c1d0721ff18e1801b9fddc0a7b1 Mon Sep 17 00:00:00 2001 From: Alix Hamilton Date: Mon, 9 Oct 2017 10:36:37 -0700 Subject: [PATCH 0221/2016] BigQuery: replaces table.update() and table.patch() with client.update_table() (#4076) * adds client.update_table() * removes table.update() and table.patch() * adds coverage for _verifyResourceProperties() * adds test for deleting property and refactors table resource creation * fixes update_table tests * Fixes logic in _build_resource() --- .../google/cloud/bigquery/client.py | 28 +- .../google/cloud/bigquery/table.py | 177 ++++------- .../google-cloud-bigquery/tests/system.py | 53 ++-- .../tests/unit/test_client.py | 289 +++++++++++++++++- .../tests/unit/test_table.py | 228 ++------------ 5 files changed, 431 insertions(+), 344 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py index db1a4b0f2138..50db75f94560 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py @@ -211,8 +211,12 @@ def create_table(self, table): """ path = '/projects/%s/datasets/%s/tables' % ( table.project, table.dataset_id) + resource = table._build_resource(Table.all_fields) + doomed = [field for field in resource if resource[field] is None] + for field in doomed: + del resource[field] api_response = self._connection.api_request( - method='POST', path=path, data=table._build_resource()) + method='POST', path=path, data=resource) return Table.from_api_repr(api_response, self) def get_dataset(self, dataset_ref): @@ -285,6 +289,28 @@ def update_dataset(self, dataset, fields): method='PATCH', path=path, data=partial, headers=headers) return Dataset.from_api_repr(api_response) + def update_table(self, table, properties): + """API call: update table properties via a PUT request + + See + https://cloud.google.com/bigquery/docs/reference/rest/v2/tables/update + + :type table: + :class:`google.cloud.bigquery.table.Table` + :param table_ref: the table to update. + + :rtype: :class:`google.cloud.bigquery.table.Table` + :returns: a ``Table`` instance + """ + partial = table._build_resource(properties) + if table.etag is not None: + headers = {'If-Match': table.etag} + else: + headers = None + api_response = self._connection.api_request( + method='PATCH', path=table.path, data=partial, headers=headers) + return Table.from_api_repr(api_response, client=self) + def list_dataset_tables(self, dataset, max_results=None, page_token=None): """List tables in the dataset. diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py index dfc31be29745..e4814ae16c8e 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py @@ -175,6 +175,11 @@ class Table(object): _schema = None + all_fields = [ + 'description', 'friendly_name', 'expires', 'location', + 'partitioning_type', 'view_use_legacy_sql', 'view_query', 'schema' + ] + def __init__(self, table_ref, schema=(), client=None): self._project = table_ref.project self._table_id = table_ref.table_id @@ -240,9 +245,12 @@ def schema(self, value): :raises: TypeError if 'value' is not a sequence, or ValueError if any item in the sequence is not a SchemaField """ - if not all(isinstance(field, SchemaField) for field in value): + if value is None: + self._schema = () + elif not all(isinstance(field, SchemaField) for field in value): raise ValueError('Schema items must be fields') - self._schema = tuple(value) + else: + self._schema = tuple(value) @property def created(self): @@ -613,41 +621,59 @@ def _set_properties(self, api_response): cleaned['expirationTime'] = float(cleaned['expirationTime']) self._properties.update(cleaned) - def _build_resource(self): - """Generate a resource for ``create`` or ``update``.""" - resource = { - 'tableReference': { - 'projectId': self._project, - 'datasetId': self._dataset_id, - 'tableId': self.table_id}, - } - if self.description is not None: - resource['description'] = self.description - - if self.expires is not None: - value = _millis_from_datetime(self.expires) - resource['expirationTime'] = value - - if self.friendly_name is not None: - resource['friendlyName'] = self.friendly_name + def _populate_expires_resource(self, resource): + resource['expirationTime'] = _millis_from_datetime(self.expires) - if self.location is not None: - resource['location'] = self.location + def _populate_partitioning_type_resource(self, resource): + resource['timePartitioning'] = self._properties.get('timePartitioning') - if self.partitioning_type is not None: - resource['timePartitioning'] = self._properties['timePartitioning'] + def _populate_view_use_legacy_sql_resource(self, resource): + if 'view' not in resource: + resource['view'] = {} + resource['view']['useLegacySql'] = self.view_use_legacy_sql - if self.view_query is not None: - view = resource['view'] = {} - view['query'] = self.view_query - if self.view_use_legacy_sql is not None: - view['useLegacySql'] = self.view_use_legacy_sql + def _populate_view_query_resource(self, resource): + if self.view_query is None: + resource['view'] = None + return + if 'view' not in resource: + resource['view'] = {} + resource['view']['query'] = self.view_query - if self._schema: + def _populate_schema_resource(self, resource): + if not self._schema: + resource['schema'] = None + else: resource['schema'] = { - 'fields': _build_schema_resource(self._schema) + 'fields': _build_schema_resource(self._schema), } + custom_resource_fields = { + 'expires': _populate_expires_resource, + 'partitioning_type': _populate_partitioning_type_resource, + 'view_query': _populate_view_query_resource, + 'view_use_legacy_sql': _populate_view_use_legacy_sql_resource, + 'schema': _populate_schema_resource + } + + def _build_resource(self, filter_fields): + """Generate a resource for ``create`` or ``update``.""" + resource = { + 'tableReference': { + 'projectId': self._project, + 'datasetId': self._dataset_id, + 'tableId': self.table_id}, + } + for f in filter_fields: + if f in self.custom_resource_fields: + self.custom_resource_fields[f](self, resource) + else: + # TODO(alixh) refactor to use in both Table and Dataset + # snake case to camel case + words = f.split('_') + api_field = words[0] + ''.join( + map(str.capitalize, words[1:])) + resource[api_field] = getattr(self, f) return resource def exists(self, client=None): @@ -674,97 +700,6 @@ def exists(self, client=None): else: return True - def patch(self, - client=None, - friendly_name=_MARKER, - description=_MARKER, - location=_MARKER, - expires=_MARKER, - view_query=_MARKER, - schema=_MARKER): - """API call: update individual table properties via a PATCH request - - See - https://cloud.google.com/bigquery/docs/reference/rest/v2/tables/patch - - :type client: :class:`~google.cloud.bigquery.client.Client` or - ``NoneType`` - :param client: the client to use. If not passed, falls back to the - ``client`` stored on the current dataset. - - :type friendly_name: str - :param friendly_name: (Optional) a descriptive name for this table. - - :type description: str - :param description: (Optional) a description of this table. - - :type location: str - :param location: - (Optional) the geographic location where the table resides. - - :type expires: :class:`datetime.datetime` - :param expires: (Optional) point in time at which the table expires. - - :type view_query: str - :param view_query: SQL query defining the table as a view - - :type schema: list of :class:`SchemaField` - :param schema: fields describing the schema - - :raises: ValueError for invalid value types. - """ - client = self._require_client(client) - - partial = {} - - if expires is not _MARKER: - if (not isinstance(expires, datetime.datetime) and - expires is not None): - raise ValueError("Pass a datetime, or None") - partial['expirationTime'] = _millis_from_datetime(expires) - - if description is not _MARKER: - partial['description'] = description - - if friendly_name is not _MARKER: - partial['friendlyName'] = friendly_name - - if location is not _MARKER: - partial['location'] = location - - if view_query is not _MARKER: - if view_query is None: - partial['view'] = None - else: - partial['view'] = {'query': view_query} - - if schema is not _MARKER: - if schema is None: - partial['schema'] = None - else: - partial['schema'] = { - 'fields': _build_schema_resource(schema)} - - api_response = client._connection.api_request( - method='PATCH', path=self.path, data=partial) - self._set_properties(api_response) - - def update(self, client=None): - """API call: update table properties via a PUT request - - See - https://cloud.google.com/bigquery/docs/reference/rest/v2/tables/update - - :type client: :class:`~google.cloud.bigquery.client.Client` or - ``NoneType`` - :param client: the client to use. If not passed, falls back to the - ``client`` stored on the current dataset. - """ - client = self._require_client(client) - api_response = client._connection.api_request( - method='PUT', path=self.path, data=self._build_resource()) - self._set_properties(api_response) - def row_from_mapping(self, mapping): """Convert a mapping to a row tuple using the schema. diff --git a/packages/google-cloud-bigquery/tests/system.py b/packages/google-cloud-bigquery/tests/system.py index 1936fc435e57..83d08c7598a4 100644 --- a/packages/google-cloud-bigquery/tests/system.py +++ b/packages/google-cloud-bigquery/tests/system.py @@ -250,14 +250,15 @@ def test_list_dataset_tables(self): table.dataset_id == DATASET_ID)] self.assertEqual(len(created), len(tables_to_create)) - def test_patch_table(self): - dataset = self.temp_dataset(_make_dataset_id('patch_table')) + def test_update_table(self): + dataset = self.temp_dataset(_make_dataset_id('update_table')) TABLE_NAME = 'test_table' - full_name = bigquery.SchemaField('full_name', 'STRING', - mode='REQUIRED') - age = bigquery.SchemaField('age', 'INTEGER', mode='REQUIRED') - table_arg = Table(dataset.table(TABLE_NAME), schema=[full_name, age], + schema = [ + bigquery.SchemaField('full_name', 'STRING', mode='REQUIRED'), + bigquery.SchemaField('age', 'INTEGER', mode='REQUIRED') + ] + table_arg = Table(dataset.table(TABLE_NAME), schema=schema, client=Config.CLIENT) self.assertFalse(table_arg.exists()) table = retry_403(Config.CLIENT.create_table)(table_arg) @@ -265,18 +266,34 @@ def test_patch_table(self): self.assertTrue(table.exists()) self.assertIsNone(table.friendly_name) self.assertIsNone(table.description) - table.patch(friendly_name='Friendly', description='Description') - self.assertEqual(table.friendly_name, 'Friendly') - self.assertEqual(table.description, 'Description') + table.friendly_name = 'Friendly' + table.description = 'Description' - def test_update_table(self): + table2 = Config.CLIENT.update_table( + table, ['friendly_name', 'description']) + + self.assertEqual(table2.friendly_name, 'Friendly') + self.assertEqual(table2.description, 'Description') + + table2.description = None + table3 = Config.CLIENT.update_table(table2, ['description']) + self.assertIsNone(table3.description) + + # If we try to update using table2 again, it will fail because the + # previous update changed the ETag. + table2.description = 'no good' + with self.assertRaises(PreconditionFailed): + Config.CLIENT.update_table(table2, ['description']) + + def test_update_table_schema(self): dataset = self.temp_dataset(_make_dataset_id('update_table')) TABLE_NAME = 'test_table' - full_name = bigquery.SchemaField('full_name', 'STRING', - mode='REQUIRED') - age = bigquery.SchemaField('age', 'INTEGER', mode='REQUIRED') - table_arg = Table(dataset.table(TABLE_NAME), schema=[full_name, age], + schema = [ + bigquery.SchemaField('full_name', 'STRING', mode='REQUIRED'), + bigquery.SchemaField('age', 'INTEGER', mode='REQUIRED') + ] + table_arg = Table(dataset.table(TABLE_NAME), schema=schema, client=Config.CLIENT) self.assertFalse(table_arg.exists()) table = retry_403(Config.CLIENT.create_table)(table_arg) @@ -286,9 +303,11 @@ def test_update_table(self): schema = table.schema schema.append(voter) table.schema = schema - table.update() - self.assertEqual(len(table.schema), len(schema)) - for found, expected in zip(table.schema, schema): + + updated_table = Config.CLIENT.update_table(table, ['schema']) + + self.assertEqual(len(updated_table.schema), len(schema)) + for found, expected in zip(updated_table.schema, schema): self.assertEqual(found.name, expected.name) self.assertEqual(found.field_type, expected.field_type) self.assertEqual(found.mode, expected.mode) diff --git a/packages/google-cloud-bigquery/tests/unit/test_client.py b/packages/google-cloud-bigquery/tests/unit/test_client.py index cd0c3f6d71b0..aad64d980df1 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_client.py +++ b/packages/google-cloud-bigquery/tests/unit/test_client.py @@ -527,7 +527,8 @@ def test_create_table_w_schema_and_query(self): {'name': 'full_name', 'type': 'STRING', 'mode': 'REQUIRED'}, {'name': 'age', 'type': 'INTEGER', 'mode': 'REQUIRED'}] }, - 'view': {'query': query}, + # TODO(alixh) default to Standard SQL + 'view': {'query': query, 'useLegacySql': None}, } self.assertEqual(req['data'], sent) self.assertEqual(got.table_id, table_id) @@ -629,6 +630,292 @@ def test_update_dataset(self): req = conn._requested[1] self.assertEqual(req['headers']['If-Match'], 'etag') + def test_update_table(self): + from google.cloud.bigquery.table import Table, SchemaField + + project = 'PROJECT' + dataset_id = 'DATASET_ID' + table_id = 'table_id' + path = 'projects/%s/datasets/%s/tables/%s' % ( + project, dataset_id, table_id) + description = 'description' + title = 'title' + resource = { + 'id': '%s:%s:%s' % (project, dataset_id, table_id), + 'tableReference': { + 'projectId': project, + 'datasetId': dataset_id, + 'tableId': table_id + }, + 'schema': {'fields': [ + {'name': 'full_name', 'type': 'STRING', 'mode': 'REQUIRED'}, + {'name': 'age', 'type': 'INTEGER', 'mode': 'REQUIRED'}] + }, + 'etag': 'etag', + 'description': description, + 'friendlyName': title, + } + schema = [ + SchemaField('full_name', 'STRING', mode='REQUIRED'), + SchemaField('age', 'INTEGER', mode='REQUIRED') + ] + creds = _make_credentials() + client = self._make_one(project=project, credentials=creds) + conn = client._connection = _Connection(resource, resource) + table_ref = client.dataset(dataset_id).table(table_id) + table = Table(table_ref, schema=schema, client=client) + table.description = description + table.friendly_name = title + + updated_table = client.update_table( + table, ['schema', 'description', 'friendly_name']) + + sent = { + 'tableReference': { + 'projectId': project, + 'datasetId': dataset_id, + 'tableId': table_id + }, + 'schema': {'fields': [ + {'name': 'full_name', 'type': 'STRING', 'mode': 'REQUIRED'}, + {'name': 'age', 'type': 'INTEGER', 'mode': 'REQUIRED'}]}, + 'description': description, + 'friendlyName': title, + } + self.assertEqual(len(conn._requested), 1) + req = conn._requested[0] + self.assertEqual(req['method'], 'PATCH') + self.assertEqual(req['data'], sent) + self.assertEqual(req['path'], '/' + path) + self.assertIsNone(req['headers']) + self.assertEqual(updated_table.description, table.description) + self.assertEqual(updated_table.friendly_name, table.friendly_name) + self.assertEqual(updated_table.schema, table.schema) + + # ETag becomes If-Match header. + table._properties['etag'] = 'etag' + client.update_table(table, []) + req = conn._requested[1] + self.assertEqual(req['headers']['If-Match'], 'etag') + + def test_update_table_only_use_legacy_sql(self): + from google.cloud.bigquery.table import Table + + project = 'PROJECT' + dataset_id = 'DATASET_ID' + table_id = 'table_id' + path = 'projects/%s/datasets/%s/tables/%s' % ( + project, dataset_id, table_id) + resource = { + 'id': '%s:%s:%s' % (project, dataset_id, table_id), + 'tableReference': { + 'projectId': project, + 'datasetId': dataset_id, + 'tableId': table_id + }, + 'view': {'useLegacySql': True} + } + creds = _make_credentials() + client = self._make_one(project=project, credentials=creds) + conn = client._connection = _Connection(resource) + table_ref = client.dataset(dataset_id).table(table_id) + table = Table(table_ref, client=client) + table.view_use_legacy_sql = True + + updated_table = client.update_table(table, ['view_use_legacy_sql']) + + self.assertEqual(len(conn._requested), 1) + req = conn._requested[0] + self.assertEqual(req['method'], 'PATCH') + self.assertEqual(req['path'], '/%s' % path) + sent = { + 'tableReference': { + 'projectId': project, + 'datasetId': dataset_id, + 'tableId': table_id + }, + 'view': {'useLegacySql': True} + } + self.assertEqual(req['data'], sent) + self.assertEqual( + updated_table.view_use_legacy_sql, table.view_use_legacy_sql) + + def test_update_table_w_query(self): + import datetime + from google.cloud._helpers import UTC + from google.cloud._helpers import _millis + from google.cloud.bigquery.table import Table, SchemaField + + project = 'PROJECT' + dataset_id = 'DATASET_ID' + table_id = 'table_id' + path = 'projects/%s/datasets/%s/tables/%s' % ( + project, dataset_id, table_id) + query = 'select fullname, age from person_ages' + location = 'EU' + exp_time = datetime.datetime(2015, 8, 1, 23, 59, 59, tzinfo=UTC) + schema_resource = {'fields': [ + {'name': 'full_name', 'type': 'STRING', 'mode': 'REQUIRED'}, + {'name': 'age', 'type': 'INTEGER', 'mode': 'REQUIRED'}]} + schema = [ + SchemaField('full_name', 'STRING', mode='REQUIRED'), + SchemaField('age', 'INTEGER', mode='REQUIRED') + ] + resource = { + 'id': '%s:%s:%s' % (project, dataset_id, table_id), + 'tableReference': { + 'projectId': project, + 'datasetId': dataset_id, + 'tableId': table_id + }, + 'schema': schema_resource, + 'view': {'query': query, 'useLegacySql': True}, + 'location': location, + 'expirationTime': _millis(exp_time) + } + creds = _make_credentials() + client = self._make_one(project=project, credentials=creds) + conn = client._connection = _Connection(resource) + table_ref = client.dataset(dataset_id).table(table_id) + table = Table(table_ref, schema=schema, client=client) + table.location = location + table.expires = exp_time + table.view_query = query + table.view_use_legacy_sql = True + updated_properties = ['schema', 'view_query', 'location', + 'expires', 'view_use_legacy_sql'] + + updated_table = client.update_table(table, updated_properties) + + self.assertEqual(len(conn._requested), 1) + req = conn._requested[0] + self.assertEqual(req['method'], 'PATCH') + self.assertEqual(req['path'], '/%s' % path) + sent = { + 'tableReference': { + 'projectId': project, + 'datasetId': dataset_id, + 'tableId': table_id + }, + 'view': {'query': query, 'useLegacySql': True}, + 'location': location, + 'expirationTime': _millis(exp_time), + 'schema': schema_resource, + } + self.assertEqual(req['data'], sent) + self.assertEqual(updated_table.schema, table.schema) + self.assertEqual(updated_table.view_query, table.view_query) + self.assertEqual(updated_table.location, table.location) + self.assertEqual(updated_table.expires, table.expires) + self.assertEqual( + updated_table.view_use_legacy_sql, table.view_use_legacy_sql) + + def test_update_table_w_schema_None(self): + # Simulate deleting schema: not sure if back-end will actually + # allow this operation, but the spec says it is optional. + project = 'PROJECT' + dataset_id = 'DATASET_ID' + table_id = 'table_id' + path = 'projects/%s/datasets/%s/tables/%s' % ( + project, dataset_id, table_id) + resource1 = { + 'id': '%s:%s:%s' % (project, dataset_id, table_id), + 'tableReference': { + 'projectId': project, + 'datasetId': dataset_id, + 'tableId': table_id}, + 'schema': {'fields': [ + {'name': 'full_name', 'type': 'STRING', 'mode': 'REQUIRED'}, + {'name': 'age', 'type': 'INTEGER', 'mode': 'REQUIRED'}]} + } + resource2 = { + 'id': '%s:%s:%s' % (project, dataset_id, table_id), + 'tableReference': { + 'projectId': project, + 'datasetId': dataset_id, + 'tableId': table_id}, + 'schema': {'fields': []}, + } + creds = _make_credentials() + client = self._make_one(project=project, credentials=creds) + conn = client._connection = _Connection(resource1, resource2) + table_ref = client.dataset(dataset_id).table(table_id) + table = client.get_table(table_ref) + table.schema = None + + updated_table = client.update_table(table, ['schema']) + + self.assertEqual(len(conn._requested), 2) + req = conn._requested[1] + self.assertEqual(req['method'], 'PATCH') + sent = { + 'tableReference': { + 'projectId': project, + 'datasetId': dataset_id, + 'tableId': table_id + }, + 'schema': None + } + self.assertEqual(req['data'], sent) + self.assertEqual(req['path'], '/%s' % path) + self.assertEqual(updated_table.schema, table.schema) + + def test_update_table_delete_property(self): + from google.cloud.bigquery.table import Table + + project = 'PROJECT' + dataset_id = 'DATASET_ID' + table_id = 'table_id' + description = 'description' + title = 'title' + path = 'projects/%s/datasets/%s/tables/%s' % ( + project, dataset_id, table_id) + resource1 = { + 'id': '%s:%s:%s' % (project, dataset_id, table_id), + 'tableReference': { + 'projectId': project, + 'datasetId': dataset_id, + 'tableId': table_id + }, + 'description': description, + 'friendlyName': title, + } + resource2 = { + 'id': '%s:%s:%s' % (project, dataset_id, table_id), + 'tableReference': { + 'projectId': project, + 'datasetId': dataset_id, + 'tableId': table_id + }, + 'description': None, + } + creds = _make_credentials() + client = self._make_one(project=project, credentials=creds) + conn = client._connection = _Connection(resource1, resource2) + table_ref = client.dataset(dataset_id).table(table_id) + table = Table(table_ref, client=client) + table.description = description + table.friendly_name = title + table2 = client.update_table(table, ['description', 'friendly_name']) + self.assertEqual(table2.description, table.description) + table2.description = None + + table3 = client.update_table(table2, ['description']) + self.assertEqual(len(conn._requested), 2) + req = conn._requested[1] + self.assertEqual(req['method'], 'PATCH') + self.assertEqual(req['path'], '/%s' % path) + sent = { + 'tableReference': { + 'projectId': project, + 'datasetId': dataset_id, + 'tableId': table_id + }, + 'description': None, + } + self.assertEqual(req['data'], sent) + self.assertIsNone(table3.description) + def test_list_dataset_tables_empty(self): PROJECT = 'PROJECT' DS_ID = 'DATASET_ID' diff --git a/packages/google-cloud-bigquery/tests/unit/test_table.py b/packages/google-cloud-bigquery/tests/unit/test_table.py index fc0ff3370974..6e00bd73c9c6 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_table.py +++ b/packages/google-cloud-bigquery/tests/unit/test_table.py @@ -550,8 +550,17 @@ def test_from_api_repr_bare(self): self._verifyResourceProperties(table, RESOURCE) def test_from_api_repr_w_properties(self): + import datetime + from google.cloud._helpers import UTC + from google.cloud._helpers import _millis + client = _Client(self.PROJECT) RESOURCE = self._makeResource() + RESOURCE['view'] = {'query': 'select fullname, age from person_ages'} + RESOURCE['type'] = 'VIEW' + RESOURCE['location'] = 'EU' + self.EXP_TIME = datetime.datetime(2015, 8, 1, 23, 59, 59, tzinfo=UTC) + RESOURCE['expirationTime'] = _millis(self.EXP_TIME) klass = self._get_target_class() table = klass.from_api_repr(RESOURCE, client) self.assertIs(table._client, client) @@ -741,210 +750,6 @@ def test_exists_hit_w_alternate_client(self): self.assertEqual(req['path'], '/%s' % PATH) self.assertEqual(req['query_params'], {'fields': 'id'}) - def test_patch_w_invalid_expiration(self): - RESOURCE = self._makeResource() - conn = _Connection(RESOURCE) - client = _Client(project=self.PROJECT, connection=conn) - dataset = DatasetReference(self.PROJECT, self.DS_ID) - table_ref = dataset.table(self.TABLE_NAME) - table = self._make_one(table_ref, client=client) - - with self.assertRaises(ValueError): - table.patch(expires='BOGUS') - - def test_patch_w_bound_client(self): - PATH = 'projects/%s/datasets/%s/tables/%s' % ( - self.PROJECT, self.DS_ID, self.TABLE_NAME) - DESCRIPTION = 'DESCRIPTION' - TITLE = 'TITLE' - RESOURCE = self._makeResource() - RESOURCE['description'] = DESCRIPTION - RESOURCE['friendlyName'] = TITLE - conn = _Connection(RESOURCE) - client = _Client(project=self.PROJECT, connection=conn) - dataset = DatasetReference(self.PROJECT, self.DS_ID) - table_ref = dataset.table(self.TABLE_NAME) - table = self._make_one(table_ref, client=client) - - table.patch(description=DESCRIPTION, - friendly_name=TITLE, - view_query=None) - - self.assertEqual(len(conn._requested), 1) - req = conn._requested[0] - self.assertEqual(req['method'], 'PATCH') - SENT = { - 'description': DESCRIPTION, - 'friendlyName': TITLE, - 'view': None, - } - self.assertEqual(req['data'], SENT) - self.assertEqual(req['path'], '/%s' % PATH) - self._verifyResourceProperties(table, RESOURCE) - - def test_patch_w_alternate_client(self): - import datetime - from google.cloud._helpers import UTC - from google.cloud._helpers import _millis - from google.cloud.bigquery.table import SchemaField - - PATH = 'projects/%s/datasets/%s/tables/%s' % ( - self.PROJECT, self.DS_ID, self.TABLE_NAME) - QUERY = 'select fullname, age from person_ages' - LOCATION = 'EU' - RESOURCE = self._makeResource() - RESOURCE['view'] = {'query': QUERY} - RESOURCE['type'] = 'VIEW' - RESOURCE['location'] = LOCATION - self.EXP_TIME = datetime.datetime(2015, 8, 1, 23, 59, 59, - tzinfo=UTC) - RESOURCE['expirationTime'] = _millis(self.EXP_TIME) - conn1 = _Connection() - client1 = _Client(project=self.PROJECT, connection=conn1) - conn2 = _Connection(RESOURCE) - client2 = _Client(project=self.PROJECT, connection=conn2) - dataset = DatasetReference(self.PROJECT, self.DS_ID) - table_ref = dataset.table(self.TABLE_NAME) - table = self._make_one(table_ref, client=client1) - full_name = SchemaField('full_name', 'STRING', mode='REQUIRED') - age = SchemaField('age', 'INTEGER', mode='NULLABLE') - - table.patch(client=client2, view_query=QUERY, location=LOCATION, - expires=self.EXP_TIME, schema=[full_name, age]) - - self.assertEqual(len(conn1._requested), 0) - self.assertEqual(len(conn2._requested), 1) - req = conn2._requested[0] - self.assertEqual(req['method'], 'PATCH') - self.assertEqual(req['path'], '/%s' % PATH) - SENT = { - 'view': {'query': QUERY}, - 'location': LOCATION, - 'expirationTime': _millis(self.EXP_TIME), - 'schema': {'fields': [ - {'name': 'full_name', 'type': 'STRING', 'mode': 'REQUIRED'}, - {'name': 'age', 'type': 'INTEGER', 'mode': 'NULLABLE'}]}, - } - self.assertEqual(req['data'], SENT) - self._verifyResourceProperties(table, RESOURCE) - - def test_patch_w_schema_None(self): - # Simulate deleting schema: not sure if back-end will actually - # allow this operation, but the spec says it is optional. - PATH = 'projects/%s/datasets/%s/tables/%s' % ( - self.PROJECT, self.DS_ID, self.TABLE_NAME) - DESCRIPTION = 'DESCRIPTION' - TITLE = 'TITLE' - RESOURCE = self._makeResource() - RESOURCE['description'] = DESCRIPTION - RESOURCE['friendlyName'] = TITLE - conn = _Connection(RESOURCE) - client = _Client(project=self.PROJECT, connection=conn) - dataset = DatasetReference(self.PROJECT, self.DS_ID) - table_ref = dataset.table(self.TABLE_NAME) - table = self._make_one(table_ref, client=client) - - table.patch(schema=None) - - self.assertEqual(len(conn._requested), 1) - req = conn._requested[0] - self.assertEqual(req['method'], 'PATCH') - SENT = {'schema': None} - self.assertEqual(req['data'], SENT) - self.assertEqual(req['path'], '/%s' % PATH) - self._verifyResourceProperties(table, RESOURCE) - - def test_update_w_bound_client(self): - from google.cloud.bigquery.table import SchemaField - - PATH = 'projects/%s/datasets/%s/tables/%s' % ( - self.PROJECT, self.DS_ID, self.TABLE_NAME) - DESCRIPTION = 'DESCRIPTION' - TITLE = 'TITLE' - RESOURCE = self._makeResource() - RESOURCE['description'] = DESCRIPTION - RESOURCE['friendlyName'] = TITLE - conn = _Connection(RESOURCE) - client = _Client(project=self.PROJECT, connection=conn) - dataset = DatasetReference(self.PROJECT, self.DS_ID) - table_ref = dataset.table(self.TABLE_NAME) - full_name = SchemaField('full_name', 'STRING', mode='REQUIRED') - age = SchemaField('age', 'INTEGER', mode='REQUIRED') - table = self._make_one(table_ref, schema=[full_name, age], - client=client) - table.description = DESCRIPTION - table.friendly_name = TITLE - - table.update() - - self.assertEqual(len(conn._requested), 1) - req = conn._requested[0] - self.assertEqual(req['method'], 'PUT') - SENT = { - 'tableReference': - {'projectId': self.PROJECT, - 'datasetId': self.DS_ID, - 'tableId': self.TABLE_NAME}, - 'schema': {'fields': [ - {'name': 'full_name', 'type': 'STRING', 'mode': 'REQUIRED'}, - {'name': 'age', 'type': 'INTEGER', 'mode': 'REQUIRED'}]}, - 'description': DESCRIPTION, - 'friendlyName': TITLE, - } - self.assertEqual(req['data'], SENT) - self.assertEqual(req['path'], '/%s' % PATH) - self._verifyResourceProperties(table, RESOURCE) - - def test_update_w_alternate_client(self): - import datetime - from google.cloud._helpers import UTC - from google.cloud._helpers import _millis - - PATH = 'projects/%s/datasets/%s/tables/%s' % ( - self.PROJECT, self.DS_ID, self.TABLE_NAME) - DEF_TABLE_EXP = 12345 - LOCATION = 'EU' - QUERY = 'select fullname, age from person_ages' - RESOURCE = self._makeResource() - RESOURCE['defaultTableExpirationMs'] = 12345 - RESOURCE['location'] = LOCATION - self.EXP_TIME = datetime.datetime(2015, 8, 1, 23, 59, 59, - tzinfo=UTC) - RESOURCE['expirationTime'] = _millis(self.EXP_TIME) - RESOURCE['view'] = {'query': QUERY, 'useLegacySql': True} - RESOURCE['type'] = 'VIEW' - conn1 = _Connection() - client1 = _Client(project=self.PROJECT, connection=conn1) - conn2 = _Connection(RESOURCE) - client2 = _Client(project=self.PROJECT, connection=conn2) - dataset = DatasetReference(self.PROJECT, self.DS_ID) - table_ref = dataset.table(self.TABLE_NAME) - table = self._make_one(table_ref, client=client1) - table.default_table_expiration_ms = DEF_TABLE_EXP - table.location = LOCATION - table.expires = self.EXP_TIME - table.view_query = QUERY - table.view_use_legacy_sql = True - - table.update(client=client2) - - self.assertEqual(len(conn1._requested), 0) - self.assertEqual(len(conn2._requested), 1) - req = conn2._requested[0] - self.assertEqual(req['method'], 'PUT') - self.assertEqual(req['path'], '/%s' % PATH) - SENT = { - 'tableReference': - {'projectId': self.PROJECT, - 'datasetId': self.DS_ID, - 'tableId': self.TABLE_NAME}, - 'expirationTime': _millis(self.EXP_TIME), - 'location': 'EU', - 'view': {'query': QUERY, 'useLegacySql': True}, - } - self.assertEqual(req['data'], SENT) - self._verifyResourceProperties(table, RESOURCE) - def test_row_from_mapping_wo_schema(self): from google.cloud.bigquery.table import _TABLE_HAS_NO_SCHEMA MAPPING = {'full_name': 'Phred Phlyntstone', 'age': 32} @@ -1224,6 +1029,21 @@ def _row_data(row): self.assertEqual(req['path'], '/%s' % PATH) self.assertEqual(req['data'], SENT) + def test__populate_view_use_legacy_sql_resource_w_existing_view(self): + query = 'select * from foo' + resource = {'view': {'query': query}} + client = mock.Mock(spec=[u'_credentials', '_http']) + client._http = mock.sentinel.http + dataset = DatasetReference(self.PROJECT, self.DS_ID) + table = self._make_one(dataset.table(self.TABLE_NAME), client=client) + table.view_use_legacy_sql = True + + table._populate_view_use_legacy_sql_resource(resource) + + self.assertEqual( + resource['view']['useLegacySql'], table.view_use_legacy_sql) + self.assertEqual(resource['view']['query'], query) + def test__get_transport(self): client = mock.Mock(spec=[u'_credentials', '_http']) client._http = mock.sentinel.http From 9975f322a514ad38d0f3beb175f65bcd6c90d301 Mon Sep 17 00:00:00 2001 From: Jonathan Amsterdam Date: Mon, 9 Oct 2017 14:38:16 -0400 Subject: [PATCH 0222/2016] bigquery: add config getters to LoadJob (#4137) --- .../google/cloud/bigquery/job.py | 98 ++++++++++++++++++- .../tests/unit/test_job.py | 86 ++++++++-------- 2 files changed, 135 insertions(+), 49 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/job.py b/packages/google-cloud-bigquery/google/cloud/bigquery/job.py index fd427c647a55..0ec20c8c291c 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/job.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/job.py @@ -714,12 +714,102 @@ def __init__(self, job_id, source_uris, destination, client, self._configuration = job_config @property - def configuration(self): - """Configuration for this job. + def allow_jagged_rows(self): + """See + :class:`~google.cloud.bigquery.job.LoadJobConfig.allow_jagged_rows`. + """ + return self._configuration.allow_jagged_rows + + @property + def allow_quoted_newlines(self): + """See + :class:`~google.cloud.bigquery.job.LoadJobConfig.allow_quoted_newlines`. + """ + return self._configuration.allow_quoted_newlines + + @property + def autodetect(self): + """See + :class:`~google.cloud.bigquery.job.LoadJobConfig.autodetect`. + """ + return self._configuration.autodetect + + @property + def create_disposition(self): + """See + :class:`~google.cloud.bigquery.job.LoadJobConfig.create_disposition`. + """ + return self._configuration.create_disposition + + @property + def encoding(self): + """See + :class:`~google.cloud.bigquery.job.LoadJobConfig.encoding`. + """ + return self._configuration.encoding + + @property + def field_delimiter(self): + """See + :class:`~google.cloud.bigquery.job.LoadJobConfig.field_delimiter`. + """ + return self._configuration.field_delimiter + + @property + def ignore_unknown_values(self): + """See + :class:`~google.cloud.bigquery.job.LoadJobConfig.ignore_unknown_values`. + """ + return self._configuration.ignore_unknown_values - :rtype: :class:`~google.cloud.bigquery.job.LoadJobConfig` + @property + def max_bad_records(self): + """See + :class:`~google.cloud.bigquery.job.LoadJobConfig.max_bad_records`. + """ + return self._configuration.max_bad_records + + @property + def null_marker(self): + """See + :class:`~google.cloud.bigquery.job.LoadJobConfig.null_marker`. + """ + return self._configuration.null_marker + + @property + def quote_character(self): + """See + :class:`~google.cloud.bigquery.job.LoadJobConfig.quote_character`. + """ + return self._configuration.quote_character + + @property + def skip_leading_rows(self): + """See + :class:`~google.cloud.bigquery.job.LoadJobConfig.skip_leading_rows`. + """ + return self._configuration.skip_leading_rows + + @property + def source_format(self): + """See + :class:`~google.cloud.bigquery.job.LoadJobConfig.source_format`. + """ + return self._configuration.source_format + + @property + def write_disposition(self): + """See + :class:`~google.cloud.bigquery.job.LoadJobConfig.write_disposition`. + """ + return self._configuration.write_disposition + + @property + def schema(self): + """See + :class:`~google.cloud.bigquery.job.LoadJobConfig.schema`. """ - return self._configuration + return self._configuration.schema @property def input_file_bytes(self): diff --git a/packages/google-cloud-bigquery/tests/unit/test_job.py b/packages/google-cloud-bigquery/tests/unit/test_job.py index ebe9ed49ddda..a49ea1b6fab7 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_job.py +++ b/packages/google-cloud-bigquery/tests/unit/test_job.py @@ -234,53 +234,50 @@ def _makeResource(self, started=False, ended=False): return resource def _verifyBooleanConfigProperties(self, job, config): - jconfig = job.configuration if 'allowJaggedRows' in config: - self.assertEqual(jconfig.allow_jagged_rows, + self.assertEqual(job.allow_jagged_rows, config['allowJaggedRows']) else: - self.assertIsNone(jconfig.allow_jagged_rows) + self.assertIsNone(job.allow_jagged_rows) if 'allowQuotedNewlines' in config: - self.assertEqual(jconfig.allow_quoted_newlines, + self.assertEqual(job.allow_quoted_newlines, config['allowQuotedNewlines']) else: - self.assertIsNone(jconfig.allow_quoted_newlines) + self.assertIsNone(job.allow_quoted_newlines) if 'autodetect' in config: self.assertEqual( - jconfig.autodetect, config['autodetect']) + job.autodetect, config['autodetect']) else: - self.assertIsNone(jconfig.autodetect) + self.assertIsNone(job.autodetect) if 'ignoreUnknownValues' in config: - self.assertEqual(jconfig.ignore_unknown_values, + self.assertEqual(job.ignore_unknown_values, config['ignoreUnknownValues']) else: - self.assertIsNone(jconfig.ignore_unknown_values) + self.assertIsNone(job.ignore_unknown_values) def _verifyEnumConfigProperties(self, job, config): - jconfig = job.configuration if 'createDisposition' in config: - self.assertEqual(jconfig.create_disposition, + self.assertEqual(job.create_disposition, config['createDisposition']) else: - self.assertIsNone(jconfig.create_disposition) + self.assertIsNone(job.create_disposition) if 'encoding' in config: - self.assertEqual(jconfig.encoding, + self.assertEqual(job.encoding, config['encoding']) else: - self.assertIsNone(jconfig.encoding) + self.assertIsNone(job.encoding) if 'sourceFormat' in config: - self.assertEqual(jconfig.source_format, + self.assertEqual(job.source_format, config['sourceFormat']) else: - self.assertIsNone(jconfig.source_format) + self.assertIsNone(job.source_format) if 'writeDisposition' in config: - self.assertEqual(jconfig.write_disposition, + self.assertEqual(job.write_disposition, config['writeDisposition']) else: - self.assertIsNone(jconfig.write_disposition) + self.assertIsNone(job.write_disposition) def _verifyResourceProperties(self, job, resource): - jconfig = job.configuration self._verifyReadonlyResourceProperties(job, resource) config = resource.get('configuration', {}).get('load') @@ -296,30 +293,30 @@ def _verifyResourceProperties(self, job, resource): self.assertEqual(job.destination.table_id, table_ref['tableId']) if 'fieldDelimiter' in config: - self.assertEqual(jconfig.field_delimiter, + self.assertEqual(job.field_delimiter, config['fieldDelimiter']) else: - self.assertIsNone(jconfig.field_delimiter) + self.assertIsNone(job.field_delimiter) if 'maxBadRecords' in config: - self.assertEqual(jconfig.max_bad_records, + self.assertEqual(job.max_bad_records, config['maxBadRecords']) else: - self.assertIsNone(jconfig.max_bad_records) + self.assertIsNone(job.max_bad_records) if 'nullMarker' in config: - self.assertEqual(jconfig.null_marker, + self.assertEqual(job.null_marker, config['nullMarker']) else: - self.assertIsNone(jconfig.null_marker) + self.assertIsNone(job.null_marker) if 'quote' in config: - self.assertEqual(jconfig.quote_character, + self.assertEqual(job.quote_character, config['quote']) else: - self.assertIsNone(jconfig.quote_character) + self.assertIsNone(job.quote_character) if 'skipLeadingRows' in config: - self.assertEqual(str(jconfig.skip_leading_rows), + self.assertEqual(str(job.skip_leading_rows), config['skipLeadingRows']) else: - self.assertIsNone(jconfig.skip_leading_rows) + self.assertIsNone(job.skip_leading_rows) def test_ctor(self): client = _Client(self.PROJECT) @@ -332,7 +329,7 @@ def test_ctor(self): self.assertEqual( job.path, '/projects/%s/jobs/%s' % (self.PROJECT, self.JOB_NAME)) - self.assertEqual(job.configuration.schema, []) + self.assertEqual(job.schema, []) self._verifyInitialReadonlyProperties(job) @@ -343,20 +340,19 @@ def test_ctor(self): self.assertIsNone(job.output_rows) # set/read from resource['configuration']['load'] - jconfig = job.configuration - self.assertIsNone(jconfig.allow_jagged_rows) - self.assertIsNone(jconfig.allow_quoted_newlines) - self.assertIsNone(jconfig.autodetect) - self.assertIsNone(jconfig.create_disposition) - self.assertIsNone(jconfig.encoding) - self.assertIsNone(jconfig.field_delimiter) - self.assertIsNone(jconfig.ignore_unknown_values) - self.assertIsNone(jconfig.max_bad_records) - self.assertIsNone(jconfig.null_marker) - self.assertIsNone(jconfig.quote_character) - self.assertIsNone(jconfig.skip_leading_rows) - self.assertIsNone(jconfig.source_format) - self.assertIsNone(jconfig.write_disposition) + self.assertIsNone(job.allow_jagged_rows) + self.assertIsNone(job.allow_quoted_newlines) + self.assertIsNone(job.autodetect) + self.assertIsNone(job.create_disposition) + self.assertIsNone(job.encoding) + self.assertIsNone(job.field_delimiter) + self.assertIsNone(job.ignore_unknown_values) + self.assertIsNone(job.max_bad_records) + self.assertIsNone(job.null_marker) + self.assertIsNone(job.quote_character) + self.assertIsNone(job.skip_leading_rows) + self.assertIsNone(job.source_format) + self.assertIsNone(job.write_disposition) def test_ctor_w_config(self): from google.cloud.bigquery.schema import SchemaField @@ -368,7 +364,7 @@ def test_ctor_w_config(self): config.schema = [full_name, age] job = self._make_one(self.JOB_NAME, [self.SOURCE1], self.TABLE_REF, client, config) - self.assertEqual(job.configuration.schema, [full_name, age]) + self.assertEqual(job.schema, [full_name, age]) def test_done(self): client = _Client(self.PROJECT) From 389a9a67949adb7ac6b30c7e51e43ba70b7c6c70 Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Mon, 9 Oct 2017 14:02:39 -0700 Subject: [PATCH 0223/2016] BQ: remove client.run_async_query, add client.query (#4130) - Add a QueryJobConfig class. - Move configuration properties from QueryJob to QueryJobConfig. - Make standard SQL dialect the default. - Make query_rows use new query method --- .../google/cloud/bigquery/__init__.py | 6 + .../google/cloud/bigquery/_helpers.py | 93 ++-- .../google/cloud/bigquery/client.py | 51 +-- .../google/cloud/bigquery/dbapi/cursor.py | 14 +- .../google/cloud/bigquery/job.py | 428 ++++++++++-------- .../google-cloud-bigquery/tests/system.py | 35 +- .../tests/unit/test__helpers.py | 142 ++---- .../tests/unit/test_client.py | 123 ++++- .../tests/unit/test_dbapi_cursor.py | 8 +- .../tests/unit/test_job.py | 389 ++++++++++------ 10 files changed, 728 insertions(+), 561 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/__init__.py b/packages/google-cloud-bigquery/google/cloud/bigquery/__init__.py index 3a7cc2be7a69..d6c210e9843b 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/__init__.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/__init__.py @@ -29,15 +29,18 @@ from google.cloud.bigquery._helpers import ArrayQueryParameter from google.cloud.bigquery._helpers import ScalarQueryParameter from google.cloud.bigquery._helpers import StructQueryParameter +from google.cloud.bigquery._helpers import UDFResource from google.cloud.bigquery.client import Client from google.cloud.bigquery.dataset import AccessEntry from google.cloud.bigquery.dataset import Dataset +from google.cloud.bigquery.dataset import DatasetReference from google.cloud.bigquery.job import CopyJobConfig from google.cloud.bigquery.job import ExtractJobConfig from google.cloud.bigquery.job import QueryJobConfig from google.cloud.bigquery.job import LoadJobConfig from google.cloud.bigquery.schema import SchemaField from google.cloud.bigquery.table import Table +from google.cloud.bigquery.table import TableReference __all__ = [ '__version__', @@ -45,6 +48,7 @@ 'ArrayQueryParameter', 'Client', 'Dataset', + 'DatasetReference', 'CopyJobConfig', 'ExtractJobConfig', 'QueryJobConfig', @@ -53,4 +57,6 @@ 'SchemaField', 'StructQueryParameter', 'Table', + 'TableReference', + 'UDFResource', ] diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py b/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py index 51000148fb0b..dad87fde88bb 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py @@ -365,6 +365,44 @@ def _validate(self, value): raise ValueError('Required type: %s' % (self.property_type,)) +class _ListApiResourceProperty(_ApiResourceProperty): + """Property implementation: validates based on value type. + + :type name: str + :param name: name of the property + + :type resource_name: str + :param resource_name: name of the property in the resource dictionary + + :type property_type: type or sequence of types + :param property_type: type to be validated + """ + def __init__(self, name, resource_name, property_type): + super(_ListApiResourceProperty, self).__init__( + name, resource_name) + self.property_type = property_type + + def __get__(self, instance, owner): + """Descriptor protocol: accessor""" + if instance is None: + return self + return instance._properties.get(self.resource_name, []) + + def _validate(self, value): + """Ensure that 'value' is of the appropriate type. + + :raises: ValueError on a type mismatch. + """ + if value is None: + raise ValueError(( + 'Required type: list of {}. ' + 'To unset, use del or set to empty list').format( + self.property_type,)) + if not all(isinstance(item, self.property_type) for item in value): + raise ValueError( + 'Required type: list of %s' % (self.property_type,)) + + class _EnumApiResourceProperty(_ApiResourceProperty): """Pseudo-enumeration class. @@ -469,22 +507,6 @@ def __ne__(self, other): return not self == other -class UDFResourcesProperty(object): - """Custom property type, holding :class:`UDFResource` instances.""" - - def __get__(self, instance, owner): - """Descriptor protocol: accessor""" - if instance is None: - return self - return list(instance._udf_resources) - - def __set__(self, instance, value): - """Descriptor protocol: mutator""" - if not all(isinstance(u, UDFResource) for u in value): - raise ValueError("udf items must be UDFResource") - instance._udf_resources = tuple(value) - - class AbstractQueryParameter(object): """Base class for named / positional query parameters. """ @@ -898,45 +920,6 @@ def _query_param_from_api_repr(resource): return klass.from_api_repr(resource) -class QueryParametersProperty(object): - """Custom property type, holding query parameter instances.""" - - def __get__(self, instance, owner): - """Descriptor protocol: accessor - - :type instance: :class:`QueryParametersProperty` - :param instance: instance owning the property (None if accessed via - the class). - - :type owner: type - :param owner: the class owning the property. - - :rtype: list of instances of classes derived from - :class:`AbstractQueryParameter`. - :returns: the descriptor, if accessed via the class, or the instance's - query parameters. - """ - if instance is None: - return self - return list(instance._query_parameters) - - def __set__(self, instance, value): - """Descriptor protocol: mutator - - :type instance: :class:`QueryParametersProperty` - :param instance: instance owning the property (None if accessed via - the class). - - :type value: list of instances of classes derived from - :class:`AbstractQueryParameter`. - :param value: new query parameters for the instance. - """ - if not all(isinstance(u, AbstractQueryParameter) for u in value): - raise ValueError( - "query parameters must be derived from AbstractQueryParameter") - instance._query_parameters = tuple(value) - - def _item_to_row(iterator, resource): """Convert a JSON row to the native object. diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py index 50db75f94560..f460202a3631 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py @@ -32,7 +32,6 @@ from google.cloud.bigquery.job import ExtractJob from google.cloud.bigquery.job import LoadJob from google.cloud.bigquery.job import QueryJob -from google.cloud.bigquery.job import QueryJobConfig from google.cloud.bigquery.query import QueryResults from google.cloud.bigquery._helpers import _item_to_row from google.cloud.bigquery._helpers import _rows_page_start @@ -622,36 +621,30 @@ def extract_table(self, source, *destination_uris, **kwargs): job.begin() return job - def run_async_query(self, job_id, query, - udf_resources=(), query_parameters=()): - """Construct a job for running a SQL query asynchronously. + def query(self, query, job_config=None, job_id=None): + """Start a job that runs a SQL query. See https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query - :type job_id: str - :param job_id: Name of the job. - :type query: str - :param query: SQL query to be executed + :param query: + SQL query to be executed. Defaults to the standard SQL dialect. + Use the ``job_config`` parameter to change dialects. - :type udf_resources: tuple - :param udf_resources: An iterable of - :class:`google.cloud.bigquery._helpers.UDFResource` - (empty by default) + :type job_config: :class:`google.cloud.bigquery.job.QueryJobConfig` + :param job_config: (Optional) Extra configuration options for the job. - :type query_parameters: tuple - :param query_parameters: - An iterable of - :class:`google.cloud.bigquery._helpers.AbstractQueryParameter` - (empty by default) + :type job_id: str + :param job_id: (Optional) ID to use for the query job. :rtype: :class:`google.cloud.bigquery.job.QueryJob` :returns: a new ``QueryJob`` instance """ - return QueryJob(job_id, query, client=self, - udf_resources=udf_resources, - query_parameters=query_parameters) + job_id = _make_job_id(job_id) + job = QueryJob(job_id, query, client=self, job_config=job_config) + job.begin() + return job def query_rows(self, query, job_config=None, job_id=None, timeout=None): """Start a query job and wait for the results. @@ -660,7 +653,12 @@ def query_rows(self, query, job_config=None, job_id=None, timeout=None): https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query :type query: str - :param query: SQL query to be executed + :param query: + SQL query to be executed. Defaults to the standard SQL dialect. + Use the ``job_config`` parameter to change dialects. + + :type job_config: :class:`google.cloud.bigquery.job.QueryJobConfig` + :param job_config: (Optional) Extra configuration options for the job. :type job_id: str :param job_id: (Optional) ID to use for the query job. @@ -682,16 +680,7 @@ def query_rows(self, query, job_config=None, job_id=None, timeout=None): failed or :class:`TimeoutError` if the job did not complete in the given timeout. """ - job_id = _make_job_id(job_id) - - # TODO(swast): move standard SQL default to QueryJobConfig class. - if job_config is None: - job_config = QueryJobConfig() - if job_config.use_legacy_sql is None: - job_config.use_legacy_sql = False - - job = QueryJob(job_id, query, client=self, job_config=job_config) - job.begin() + job = self.query(query, job_config=job_config, job_id=job_id) return job.result(timeout=timeout) def list_rows(self, table, selected_fields=None, max_results=None, diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/dbapi/cursor.py b/packages/google-cloud-bigquery/google/cloud/bigquery/dbapi/cursor.py index c1683c16db79..b5a05de6d90b 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/dbapi/cursor.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/dbapi/cursor.py @@ -15,10 +15,10 @@ """Cursor for the Google BigQuery DB-API.""" import collections -import uuid import six +from google.cloud.bigquery import job from google.cloud.bigquery.dbapi import _helpers from google.cloud.bigquery.dbapi import exceptions import google.cloud.exceptions @@ -135,8 +135,6 @@ def execute(self, operation, parameters=None, job_id=None): self._query_data = None self._query_results = None client = self.connection._client - if job_id is None: - job_id = str(uuid.uuid4()) # The DB-API uses the pyformat formatting, since the way BigQuery does # query parameters was not one of the standard options. Convert both @@ -146,11 +144,11 @@ def execute(self, operation, parameters=None, job_id=None): operation, parameters=parameters) query_parameters = _helpers.to_query_parameters(parameters) - query_job = client.run_async_query( - job_id, - formatted_operation, - query_parameters=query_parameters) - query_job.use_legacy_sql = False + config = job.QueryJobConfig() + config.query_parameters = query_parameters + config.use_legacy_sql = False + query_job = client.query( + formatted_operation, job_config=config, job_id=job_id) # Wait for the query to finish. try: diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/job.py b/packages/google-cloud-bigquery/google/cloud/bigquery/job.py index 0ec20c8c291c..af3b1997f177 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/job.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/job.py @@ -29,17 +29,15 @@ from google.cloud.bigquery.table import TableReference from google.cloud.bigquery.table import _build_schema_resource from google.cloud.bigquery.table import _parse_schema_resource +from google.cloud.bigquery._helpers import AbstractQueryParameter from google.cloud.bigquery._helpers import ArrayQueryParameter -from google.cloud.bigquery._helpers import QueryParametersProperty from google.cloud.bigquery._helpers import ScalarQueryParameter from google.cloud.bigquery._helpers import StructQueryParameter from google.cloud.bigquery._helpers import UDFResource -from google.cloud.bigquery._helpers import UDFResourcesProperty from google.cloud.bigquery._helpers import _EnumApiResourceProperty -from google.cloud.bigquery._helpers import _EnumProperty +from google.cloud.bigquery._helpers import _ListApiResourceProperty from google.cloud.bigquery._helpers import _query_param_from_api_repr from google.cloud.bigquery._helpers import _TypedApiResourceProperty -from google.cloud.bigquery._helpers import _TypedProperty _DONE_STATE = 'DONE' _STOPPED_REASON = 'stopped' @@ -129,7 +127,7 @@ class Encoding(_EnumApiResourceProperty): ISO_8559_1 = 'ISO-8559-1' -class QueryPriority(_EnumProperty): +class QueryPriority(_EnumApiResourceProperty): """Pseudo-enum for ``QueryJob.priority`` property.""" INTERACTIVE = 'INTERACTIVE' BATCH = 'BATCH' @@ -1271,6 +1269,35 @@ def from_api_repr(cls, resource, client): return job +def _from_api_repr_query_parameters(resource): + return [ + _query_param_from_api_repr(mapping) + for mapping in resource + ] + + +def _to_api_repr_query_parameters(value): + return [ + query_parameter.to_api_repr() + for query_parameter in value + ] + + +def _from_api_repr_udf_resources(resource): + udf_resources = [] + for udf_mapping in resource: + for udf_type, udf_value in udf_mapping.items(): + udf_resources.append(UDFResource(udf_type, udf_value)) + return udf_resources + + +def _to_api_repr_udf_resources(value): + return [ + {udf_resource.udf_type: udf_resource.value} + for udf_resource in value + ] + + class QueryJobConfig(object): """Configuration options for query jobs. @@ -1278,6 +1305,9 @@ class QueryJobConfig(object): server defaults. """ + _QUERY_PARAMETERS_KEY = 'queryParameters' + _UDF_RESOURCES_KEY = 'userDefinedFunctionResources' + def __init__(self): self._properties = {} @@ -1287,7 +1317,24 @@ def to_api_repr(self): :rtype: dict :returns: A dictionary in the format used by the BigQuery API. """ - return copy.deepcopy(self._properties) + resource = copy.deepcopy(self._properties) + + # Query parameters have an addition property associated with them + # to indicate if the query is using named or positional parameters. + query_parameters = resource.get(self._QUERY_PARAMETERS_KEY) + if query_parameters: + if query_parameters[0].name is None: + resource['parameterMode'] = 'POSITIONAL' + else: + resource['parameterMode'] = 'NAMED' + + for prop, convert in self._NESTED_PROPERTIES.items(): + _, to_resource = convert + nested_resource = resource.get(prop) + if nested_resource is not None: + resource[prop] = to_resource(nested_resource) + + return resource @classmethod def from_api_repr(cls, resource): @@ -1303,13 +1350,37 @@ def from_api_repr(cls, resource): """ config = cls() config._properties = copy.deepcopy(resource) + + for prop, convert in cls._NESTED_PROPERTIES.items(): + from_resource, _ = convert + nested_resource = resource.get(prop) + if nested_resource is not None: + config._properties[prop] = from_resource(nested_resource) + return config - use_legacy_sql = _TypedApiResourceProperty( - 'use_legacy_sql', 'useLegacySql', bool) + allow_large_results = _TypedApiResourceProperty( + 'allow_large_results', 'allowLargeResults', bool) + """See + https://g.co/cloud/bigquery/docs/reference/rest/v2/jobs#configuration.query.allowLargeResults + """ + + create_disposition = CreateDisposition( + 'create_disposition', 'createDisposition') + """See + https://g.co/cloud/bigquery/docs/reference/rest/v2/jobs#configuration.query.createDisposition + """ + + default_dataset = _TypedApiResourceProperty( + 'default_dataset', 'defaultDataset', DatasetReference) + """See + https://g.co/cloud/bigquery/docs/reference/v2/jobs#configuration.query.defaultDataset + """ + + destination = _TypedApiResourceProperty( + 'destination', 'destinationTable', TableReference) """See - https://cloud.google.com/bigquery/docs/\ - reference/v2/jobs#configuration.query.useLegacySql + https://g.co/cloud/bigquery/docs/reference/rest/v2/jobs#configuration.query.destinationTable """ dry_run = _TypedApiResourceProperty('dry_run', 'dryRun', bool) @@ -1317,18 +1388,83 @@ def from_api_repr(cls, resource): https://g.co/cloud/bigquery/docs/reference/v2/jobs#configuration.dryRun """ - _allow_large_results = None - _create_disposition = None - _default_dataset = None - _destination = None - _flatten_results = None - _priority = None - _use_query_cache = None - _use_legacy_sql = None - _write_disposition = None + flatten_results = _TypedApiResourceProperty( + 'flatten_results', 'flattenResults', bool) + """See + https://g.co/cloud/bigquery/docs/reference/rest/v2/jobs#configuration.query.flattenResults + """ + + maximum_billing_tier = _TypedApiResourceProperty( + 'maximum_billing_tier', 'maximumBillingTier', int) + """See + https://g.co/cloud/bigquery/docs/reference/rest/v2/jobs#configuration.query.maximumBillingTier + """ + + maximum_bytes_billed = _TypedApiResourceProperty( + 'maximum_bytes_billed', 'maximumBytesBilled', int) + """See + https://g.co/cloud/bigquery/docs/reference/rest/v2/jobs#configuration.query.maximumBytesBilled + """ + + priority = QueryPriority('priority', 'priority') + """See + https://g.co/cloud/bigquery/docs/reference/rest/v2/jobs#configuration.query.priority + """ + + query_parameters = _ListApiResourceProperty( + 'query_parameters', _QUERY_PARAMETERS_KEY, AbstractQueryParameter) + """ + An list of + :class:`google.cloud.bigquery._helpers.AbstractQueryParameter` + (empty by default) + + See: + https://g.co/cloud/bigquery/docs/reference/rest/v2/jobs#configuration.query.queryParameters + """ + + udf_resources = _ListApiResourceProperty( + 'udf_resources', _UDF_RESOURCES_KEY, UDFResource) + """ + A list of :class:`google.cloud.bigquery._helpers.UDFResource` (empty + by default) + + See: + https://g.co/cloud/bigquery/docs/reference/rest/v2/jobs#configuration.query.userDefinedFunctionResources + """ + + use_legacy_sql = _TypedApiResourceProperty( + 'use_legacy_sql', 'useLegacySql', bool) + """See + https://g.co/cloud/bigquery/docs/reference/v2/jobs#configuration.query.useLegacySql + """ + + use_query_cache = _TypedApiResourceProperty( + 'use_query_cache', 'useQueryCache', bool) + """See + https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query.useQueryCache + """ + + write_disposition = WriteDisposition( + 'write_disposition', 'writeDisposition') + """See + https://g.co/cloud/bigquery/docs/reference/rest/v2/jobs#configuration.query.writeDisposition + """ + _maximum_billing_tier = None _maximum_bytes_billed = None + _NESTED_PROPERTIES = { + 'defaultDataset': ( + DatasetReference.from_api_repr, DatasetReference.to_api_repr), + 'destinationTable': ( + TableReference.from_api_repr, TableReference.to_api_repr), + 'maximumBytesBilled': (int, str), + _QUERY_PARAMETERS_KEY: ( + _from_api_repr_query_parameters, _to_api_repr_query_parameters), + _UDF_RESOURCES_KEY: ( + _from_api_repr_udf_resources, _to_api_repr_udf_resources), + } + class QueryJob(_AsyncJob): """Asynchronous job: query tables. @@ -1343,53 +1479,52 @@ class QueryJob(_AsyncJob): :param client: A client which holds credentials and project configuration for the dataset (which requires a project). - :type udf_resources: tuple - :param udf_resources: An iterable of - :class:`google.cloud.bigquery._helpers.UDFResource` - (empty by default) - - :type query_parameters: tuple - :param query_parameters: - An iterable of - :class:`google.cloud.bigquery._helpers.AbstractQueryParameter` - (empty by default) - :type job_config: :class:`~google.cloud.bigquery.job.QueryJobConfig` :param job_config: (Optional) Extra configuration options for the query job. """ _JOB_TYPE = 'query' _UDF_KEY = 'userDefinedFunctionResources' - _QUERY_PARAMETERS_KEY = 'queryParameters' - def __init__(self, job_id, query, client, - udf_resources=(), query_parameters=(), job_config=None): + def __init__(self, job_id, query, client, job_config=None): super(QueryJob, self).__init__(job_id, client) if job_config is None: job_config = QueryJobConfig() + if job_config.use_legacy_sql is None: + job_config.use_legacy_sql = False self.query = query - self.udf_resources = udf_resources - self.query_parameters = query_parameters self._configuration = job_config self._query_results = None @property - def use_legacy_sql(self): + def allow_large_results(self): """See - :class:`~google.cloud.bigquery.job.QueryJobConfig.use_legacy_sql`. + :class:`~google.cloud.bigquery.job.QueryJobConfig.allow_large_results`. """ - return self._configuration.use_legacy_sql + return self._configuration.allow_large_results - @use_legacy_sql.setter - def use_legacy_sql(self, value): + @property + def create_disposition(self): """See - :class:`~google.cloud.bigquery.job.QueryJobConfig.use_legacy_sql`. + :class:`~google.cloud.bigquery.job.QueryJobConfig.create_disposition`. """ - # TODO(swast): remove this method and only allow setting use_legacy_sql - # on QueryJobConfig objects. - self._configuration.use_legacy_sql = value + return self._configuration.create_disposition + + @property + def default_dataset(self): + """See + :class:`~google.cloud.bigquery.job.QueryJobConfig.default_dataset`. + """ + return self._configuration.default_dataset + + @property + def destination(self): + """See + :class:`~google.cloud.bigquery.job.QueryJobConfig.destination`. + """ + return self._configuration.destination @property def dry_run(self): @@ -1398,130 +1533,68 @@ def dry_run(self): """ return self._configuration.dry_run - @dry_run.setter - def dry_run(self, value): + @property + def flatten_results(self): """See - :class:`~google.cloud.bigquery.job.QueryJobConfig.dry_run`. + :class:`~google.cloud.bigquery.job.QueryJobConfig.flatten_results`. """ - # TODO(swast): remove this method and only allow setting dry_run - # on QueryJobConfig objects. - self._configuration.dry_run = value - - allow_large_results = _TypedProperty('allow_large_results', bool) - """See - https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query.allowLargeResults - """ - - create_disposition = CreateDisposition('create_disposition', - 'createDisposition') - """See - https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query.createDisposition - """ - - default_dataset = _TypedProperty('default_dataset', DatasetReference) - """See - https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query.defaultDataset - """ - - destination = _TypedProperty('destination', TableReference) - """See - https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query.destinationTable - """ - - flatten_results = _TypedProperty('flatten_results', bool) - """See - https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query.flattenResults - """ + return self._configuration.flatten_results - priority = QueryPriority('priority') - """See - https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query.priority - """ + @property + def priority(self): + """See + :class:`~google.cloud.bigquery.job.QueryJobConfig.priority`. + """ + return self._configuration.priority - query_parameters = QueryParametersProperty() + @property + def query_parameters(self): + """See + :class:`~google.cloud.bigquery.job.QueryJobConfig.query_parameters`. + """ + return self._configuration.query_parameters - udf_resources = UDFResourcesProperty() + @property + def udf_resources(self): + """See + :class:`~google.cloud.bigquery.job.QueryJobConfig.udf_resources`. + """ + return self._configuration.udf_resources - use_query_cache = _TypedProperty('use_query_cache', bool) - """See - https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query.useQueryCache - """ + @property + def use_legacy_sql(self): + """See + :class:`~google.cloud.bigquery.job.QueryJobConfig.use_legacy_sql`. + """ + return self._configuration.use_legacy_sql - write_disposition = WriteDisposition( - 'write_disposition', 'writeDisposition') - """See - https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query.writeDisposition - """ + @property + def use_query_cache(self): + """See + :class:`~google.cloud.bigquery.job.QueryJobConfig.use_query_cache`. + """ + return self._configuration.use_query_cache - maximum_billing_tier = _TypedProperty('maximum_billing_tier', int) - """See - https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query.maximumBillingTier - """ + @property + def write_disposition(self): + """See + :class:`~google.cloud.bigquery.job.QueryJobConfig.write_disposition`. + """ + return self._configuration.write_disposition - maximum_bytes_billed = _TypedProperty('maximum_bytes_billed', int) - """See - https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query.maximumBytesBilled - """ + @property + def maximum_billing_tier(self): + """See + :class:`~google.cloud.bigquery.job.QueryJobConfig.maximum_billing_tier`. + """ + return self._configuration.maximum_billing_tier - def _destination_table_resource(self): - """Create a JSON resource for the destination table. - - Helper for :meth:`_populate_config_resource` and - :meth:`_scrub_local_properties` - """ - if self.destination is not None: - return { - 'projectId': self.destination.project, - 'datasetId': self.destination.dataset_id, - 'tableId': self.destination.table_id, - } - - def _populate_config_resource_booleans(self, configuration): - """Helper for _populate_config_resource.""" - if self.allow_large_results is not None: - configuration['allowLargeResults'] = self.allow_large_results - if self.flatten_results is not None: - configuration['flattenResults'] = self.flatten_results - if self.use_query_cache is not None: - configuration['useQueryCache'] = self.use_query_cache - - def _populate_config_resource(self, configuration): - """Helper for _build_resource: copy config properties to resource""" - self._populate_config_resource_booleans(configuration) - - if self.create_disposition is not None: - configuration['createDisposition'] = self.create_disposition - if self.default_dataset is not None: - configuration['defaultDataset'] = { - 'projectId': self.default_dataset.project, - 'datasetId': self.default_dataset.dataset_id, - } - table_res = self._destination_table_resource() - if table_res is not None: - configuration['destinationTable'] = table_res - if self.priority is not None: - configuration['priority'] = self.priority - if self.write_disposition is not None: - configuration['writeDisposition'] = self.write_disposition - if self.maximum_billing_tier is not None: - configuration['maximumBillingTier'] = self.maximum_billing_tier - if self.maximum_bytes_billed is not None: - configuration['maximumBytesBilled'] = str( - self.maximum_bytes_billed) - if len(self._udf_resources) > 0: - configuration[self._UDF_KEY] = [ - {udf_resource.udf_type: udf_resource.value} - for udf_resource in self._udf_resources - ] - if len(self._query_parameters) > 0: - configuration[self._QUERY_PARAMETERS_KEY] = [ - query_parameter.to_api_repr() - for query_parameter in self._query_parameters - ] - if self._query_parameters[0].name is None: - configuration['parameterMode'] = 'POSITIONAL' - else: - configuration['parameterMode'] = 'NAMED' + @property + def maximum_bytes_billed(self): + """See + :class:`~google.cloud.bigquery.job.QueryJobConfig.maximum_bytes_billed`. + """ + return self._configuration.maximum_bytes_billed def _build_resource(self): """Generate a resource for :meth:`begin`.""" @@ -1545,7 +1618,6 @@ def _build_resource(self): resource['configuration']['dryRun'] = dry_run configuration['query'] = self.query - self._populate_config_resource(configuration) return resource @@ -1574,42 +1646,6 @@ def _copy_configuration_properties(self, configuration): self._configuration = QueryJobConfig.from_api_repr(configuration) self._configuration.dry_run = dry_run - self.allow_large_results = _bool_or_none( - configuration.get('allowLargeResults')) - self.flatten_results = _bool_or_none( - configuration.get('flattenResults')) - self.use_query_cache = _bool_or_none( - configuration.get('useQueryCache')) - - self.create_disposition = configuration.get('createDisposition') - self.priority = configuration.get('priority') - self.write_disposition = configuration.get('writeDisposition') - self.maximum_billing_tier = configuration.get('maximumBillingTier') - self.maximum_bytes_billed = _int_or_none( - configuration.get('maximumBytesBilled')) - - dest_remote = configuration.get('destinationTable') - - if dest_remote is not None: - dataset = DatasetReference( - dest_remote['projectId'], dest_remote['datasetId']) - self.destination = dataset.table(dest_remote['tableId']) - - def_ds = configuration.get('defaultDataset') - if def_ds is not None: - self.default_dataset = DatasetReference( - def_ds['projectId'], def_ds['datasetId']) - udf_resources = [] - for udf_mapping in configuration.get(self._UDF_KEY, ()): - key_val, = udf_mapping.items() - udf_resources.append(UDFResource(key_val[0], key_val[1])) - self._udf_resources = udf_resources - - self._query_parameters = [ - _query_param_from_api_repr(mapping) - for mapping in configuration.get(self._QUERY_PARAMETERS_KEY, ()) - ] - @classmethod def from_api_repr(cls, resource, client): """Factory: construct a job given its API representation @@ -1798,6 +1834,7 @@ def query_results(self): """ if not self._query_results: self._query_results = self._client._get_query_results(self.job_id) + self._query_results._job = self return self._query_results def done(self): @@ -1810,6 +1847,7 @@ def done(self): # change once complete. if self.state != _DONE_STATE: self._query_results = self._client._get_query_results(self.job_id) + self._query_results._job = self # Only reload the job once we know the query is complete. # This will ensure that fields such as the destination table are diff --git a/packages/google-cloud-bigquery/tests/system.py b/packages/google-cloud-bigquery/tests/system.py index 83d08c7598a4..ce49e88177e7 100644 --- a/packages/google-cloud-bigquery/tests/system.py +++ b/packages/google-cloud-bigquery/tests/system.py @@ -20,7 +20,6 @@ import os import time import unittest -import uuid import six @@ -689,7 +688,7 @@ def test_copy_table(self): def test_job_cancel(self): DATASET_ID = _make_dataset_id('job_cancel') - JOB_NAME = 'fetch_' + DATASET_ID + JOB_ID = 'fetch_' + DATASET_ID TABLE_NAME = 'test_table' QUERY = 'SELECT * FROM %s.%s' % (DATASET_ID, TABLE_NAME) @@ -703,8 +702,7 @@ def test_job_cancel(self): table = retry_403(Config.CLIENT.create_table)(table_arg) self.to_delete.insert(0, table) - job = Config.CLIENT.run_async_query(JOB_NAME, QUERY) - job.begin() + job = Config.CLIENT.query(QUERY, job_id=JOB_ID) job.cancel() retry = RetryInstanceState(_job_done, max_tries=8) @@ -924,11 +922,9 @@ def test_query_w_dml(self): WHERE greeting = 'Hello World' """ - query_job = Config.CLIENT.run_async_query( - 'test_query_w_dml_{}'.format(unique_resource_id()), - query_template.format(dataset_name, table_name)) - query_job.use_legacy_sql = False - query_job.begin() + query_job = Config.CLIENT.query( + query_template.format(dataset_name, table_name), + job_id='test_query_w_dml_{}'.format(unique_resource_id())) query_job.result() self.assertEqual(query_job.num_dml_affected_rows, 1) @@ -952,6 +948,7 @@ def test_query_w_query_params(self): from google.cloud.bigquery._helpers import ArrayQueryParameter from google.cloud.bigquery._helpers import ScalarQueryParameter from google.cloud.bigquery._helpers import StructQueryParameter + from google.cloud.bigquery.job import QueryJobConfig question = 'What is the answer to life, the universe, and everything?' question_param = ScalarQueryParameter( name='question', type_='STRING', value=question) @@ -1109,13 +1106,14 @@ def test_query_w_query_params(self): }, ] for example in examples: - query_job = Config.CLIENT.run_async_query( - 'test_query_w_query_params{}'.format(unique_resource_id()), + jconfig = QueryJobConfig() + jconfig.query_parameters = example['query_parameters'] + query_job = Config.CLIENT.query( example['sql'], - query_parameters=example['query_parameters']) - query_job.use_legacy_sql = False - query_job.begin() - rows = [row for row in query_job.result()] + job_config=jconfig, + job_id='test_query_w_query_params{}'.format( + unique_resource_id())) + rows = list(query_job.result()) self.assertEqual(len(rows), 1) self.assertEqual(len(rows[0]), 1) self.assertEqual(rows[0][0], example['expected']) @@ -1249,11 +1247,8 @@ def test_large_query_w_public_data(self): rows = list(iterator) self.assertEqual(len(rows), LIMIT) - def test_async_query_future(self): - query_job = Config.CLIENT.run_async_query( - str(uuid.uuid4()), 'SELECT 1') - query_job.use_legacy_sql = False - + def test_query_future(self): + query_job = Config.CLIENT.query('SELECT 1') iterator = query_job.result(timeout=JOB_TIMEOUT) rows = list(iterator) self.assertEqual(rows, [(1,)]) diff --git a/packages/google-cloud-bigquery/tests/unit/test__helpers.py b/packages/google-cloud-bigquery/tests/unit/test__helpers.py index 3d40f38a5799..2254f6b01d89 100644 --- a/packages/google-cloud-bigquery/tests/unit/test__helpers.py +++ b/packages/google-cloud-bigquery/tests/unit/test__helpers.py @@ -881,93 +881,6 @@ def __init__(self): self.assertIsNone(wrapper._configuration._attr) -class Test_UDFResourcesProperty(unittest.TestCase): - - @staticmethod - def _get_target_class(): - from google.cloud.bigquery._helpers import UDFResourcesProperty - - return UDFResourcesProperty - - def _make_one(self, *args, **kw): - return self._get_target_class()(*args, **kw) - - def _descriptor_and_klass(self): - descriptor = self._make_one() - - class _Test(object): - _udf_resources = () - udf_resources = descriptor - - return descriptor, _Test - - def test_class_getter(self): - descriptor, klass = self._descriptor_and_klass() - self.assertIs(klass.udf_resources, descriptor) - - def test_instance_getter_empty(self): - _, klass = self._descriptor_and_klass() - instance = klass() - self.assertEqual(instance.udf_resources, []) - - def test_resource_equality(self): - from google.cloud.bigquery._helpers import UDFResource - - resource1a = UDFResource('resourceUri', 'gs://bucket/file.js') - resource1b = UDFResource('resourceUri', 'gs://bucket/file.js') - resource2 = UDFResource('resourceUri', 'gs://bucket/other.js') - - self.assertEqual(resource1a, resource1b) - self.assertNotEqual(resource1a, resource2) - self.assertNotEqual(resource1a, object()) - self.assertEqual(resource1a, mock.ANY) - - def test_instance_getter_w_non_empty_list(self): - from google.cloud.bigquery._helpers import UDFResource - - RESOURCE_URI = 'gs://some-bucket/js/lib.js' - udf_resources = [UDFResource("resourceUri", RESOURCE_URI)] - _, klass = self._descriptor_and_klass() - instance = klass() - instance._udf_resources = tuple(udf_resources) - - self.assertEqual(instance.udf_resources, udf_resources) - - def test_instance_setter_w_empty_list(self): - from google.cloud.bigquery._helpers import UDFResource - - RESOURCE_URI = 'gs://some-bucket/js/lib.js' - udf_resources = [UDFResource("resourceUri", RESOURCE_URI)] - _, klass = self._descriptor_and_klass() - instance = klass() - instance._udf_resources = udf_resources - - instance.udf_resources = [] - - self.assertEqual(instance.udf_resources, []) - - def test_instance_setter_w_valid_udf(self): - from google.cloud.bigquery._helpers import UDFResource - - RESOURCE_URI = 'gs://some-bucket/js/lib.js' - udf_resources = [UDFResource("resourceUri", RESOURCE_URI)] - _, klass = self._descriptor_and_klass() - instance = klass() - - instance.udf_resources = udf_resources - - self.assertEqual(instance.udf_resources, udf_resources) - - def test_instance_setter_w_bad_udfs(self): - _, klass = self._descriptor_and_klass() - instance = klass() - - with self.assertRaises(ValueError): - instance.udf_resources = ["foo"] - - self.assertEqual(instance.udf_resources, []) - - class Test_AbstractQueryParameter(unittest.TestCase): @staticmethod @@ -2009,22 +1922,54 @@ def test_w_struct(self): self.assertEqual(parameter.struct_values, {'foo': 'Foo', 'bar': 123}) -class Test_QueryParametersProperty(unittest.TestCase): +class Test_UDFResource(unittest.TestCase): + + @staticmethod + def _get_target_class(): + from google.cloud.bigquery._helpers import UDFResource + + return UDFResource + + def _make_one(self, *args, **kw): + return self._get_target_class()(*args, **kw) + + def test_ctor(self): + udf = self._make_one('resourceUri', 'gs://some_bucket/some_file') + self.assertEqual(udf.udf_type, 'resourceUri') + self.assertEqual(udf.value, 'gs://some_bucket/some_file') + + def test___eq__(self): + udf = self._make_one('resourceUri', 'gs://some_bucket/some_file') + self.assertEqual(udf, udf) + self.assertNotEqual(udf, object()) + wrong_val = self._make_one( + 'resourceUri', 'gs://some_bucket/other_file') + self.assertNotEqual(udf, wrong_val) + wrong_type = self._make_one('inlineCode', udf.value) + self.assertNotEqual(udf, wrong_type) + + +class Test_ListApiResourceProperty(unittest.TestCase): @staticmethod def _get_target_class(): - from google.cloud.bigquery._helpers import QueryParametersProperty + from google.cloud.bigquery._helpers import _ListApiResourceProperty - return QueryParametersProperty + return _ListApiResourceProperty def _make_one(self, *args, **kw): return self._get_target_class()(*args, **kw) def _descriptor_and_klass(self): - descriptor = self._make_one() + from google.cloud.bigquery._helpers import AbstractQueryParameter + + descriptor = self._make_one( + 'query_parameters', 'queryParameters', AbstractQueryParameter) class _Test(object): - _query_parameters = () + def __init__(self): + self._properties = {} + query_parameters = descriptor return descriptor, _Test @@ -2044,7 +1989,7 @@ def test_instance_getter_w_non_empty_list(self): query_parameters = [ScalarQueryParameter("foo", 'INT64', 123)] _, klass = self._descriptor_and_klass() instance = klass() - instance._query_parameters = tuple(query_parameters) + instance._properties['queryParameters'] = query_parameters self.assertEqual(instance.query_parameters, query_parameters) @@ -2060,6 +2005,17 @@ def test_instance_setter_w_empty_list(self): self.assertEqual(instance.query_parameters, []) + def test_instance_setter_w_none(self): + from google.cloud.bigquery._helpers import ScalarQueryParameter + + query_parameters = [ScalarQueryParameter("foo", 'INT64', 123)] + _, klass = self._descriptor_and_klass() + instance = klass() + instance._query_parameters = query_parameters + + with self.assertRaises(ValueError): + instance.query_parameters = None + def test_instance_setter_w_valid_udf(self): from google.cloud.bigquery._helpers import ScalarQueryParameter diff --git a/packages/google-cloud-bigquery/tests/unit/test_client.py b/packages/google-cloud-bigquery/tests/unit/test_client.py index aad64d980df1..4f509da53f1c 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_client.py +++ b/packages/google-cloud-bigquery/tests/unit/test_client.py @@ -1600,36 +1600,84 @@ def test_extract_table_generated_job_id(self): self.assertEqual(job.source, source) self.assertEqual(list(job.destination_uris), [DESTINATION]) - def test_run_async_query_defaults(self): + def test_query_defaults(self): from google.cloud.bigquery.job import QueryJob PROJECT = 'PROJECT' - JOB = 'job_name' QUERY = 'select count(*) from persons' + RESOURCE = { + 'jobReference': { + 'projectId': PROJECT, + 'jobId': 'some-random-id', + }, + 'configuration': { + 'query': { + 'query': QUERY, + 'useLegacySql': False, + }, + }, + } creds = _make_credentials() http = object() client = self._make_one(project=PROJECT, credentials=creds, _http=http) - job = client.run_async_query(JOB, QUERY) + conn = client._connection = _Connection(RESOURCE) + + job = client.query(QUERY) + self.assertIsInstance(job, QueryJob) + self.assertIsInstance(job.job_id, six.string_types) self.assertIs(job._client, client) - self.assertEqual(job.job_id, JOB) self.assertEqual(job.query, QUERY) self.assertEqual(job.udf_resources, []) self.assertEqual(job.query_parameters, []) - def test_run_async_w_udf_resources(self): + # Check that query actually starts the job. + self.assertEqual(len(conn._requested), 1) + req = conn._requested[0] + self.assertEqual(req['method'], 'POST') + self.assertEqual(req['path'], '/projects/PROJECT/jobs') + sent = req['data'] + self.assertIsInstance( + sent['jobReference']['jobId'], six.string_types) + sent_config = sent['configuration']['query'] + self.assertEqual(sent_config['query'], QUERY) + self.assertFalse(sent_config['useLegacySql']) + + def test_query_w_udf_resources(self): from google.cloud.bigquery._helpers import UDFResource from google.cloud.bigquery.job import QueryJob + from google.cloud.bigquery.job import QueryJobConfig RESOURCE_URI = 'gs://some-bucket/js/lib.js' PROJECT = 'PROJECT' JOB = 'job_name' QUERY = 'select count(*) from persons' + RESOURCE = { + 'jobReference': { + 'projectId': PROJECT, + 'jobId': JOB, + }, + 'configuration': { + 'query': { + 'query': QUERY, + 'useLegacySql': True, + 'userDefinedFunctionResources': [ + {'resourceUri': RESOURCE_URI}, + ], + }, + }, + } creds = _make_credentials() http = object() client = self._make_one(project=PROJECT, credentials=creds, _http=http) + conn = client._connection = _Connection(RESOURCE) udf_resources = [UDFResource("resourceUri", RESOURCE_URI)] - job = client.run_async_query(JOB, QUERY, udf_resources=udf_resources) + config = QueryJobConfig() + config.udf_resources = udf_resources + config.use_legacy_sql = True + + job = client.query(QUERY, job_config=config, job_id=JOB) + self.assertIsInstance(job, QueryJob) self.assertIs(job._client, client) self.assertEqual(job.job_id, JOB) @@ -1637,19 +1685,58 @@ def test_run_async_w_udf_resources(self): self.assertEqual(job.udf_resources, udf_resources) self.assertEqual(job.query_parameters, []) - def test_run_async_w_query_parameters(self): + # Check that query actually starts the job. + self.assertEqual(len(conn._requested), 1) + req = conn._requested[0] + self.assertEqual(req['method'], 'POST') + self.assertEqual(req['path'], '/projects/PROJECT/jobs') + sent = req['data'] + self.assertIsInstance( + sent['jobReference']['jobId'], six.string_types) + sent_config = sent['configuration']['query'] + self.assertEqual(sent_config['query'], QUERY) + self.assertTrue(sent_config['useLegacySql']) + self.assertEqual( + sent_config['userDefinedFunctionResources'][0], + {'resourceUri': RESOURCE_URI}) + + def test_query_w_query_parameters(self): from google.cloud.bigquery._helpers import ScalarQueryParameter from google.cloud.bigquery.job import QueryJob + from google.cloud.bigquery.job import QueryJobConfig PROJECT = 'PROJECT' JOB = 'job_name' QUERY = 'select count(*) from persons' + RESOURCE = { + 'jobReference': { + 'projectId': PROJECT, + 'jobId': JOB, + }, + 'configuration': { + 'query': { + 'query': QUERY, + 'useLegacySql': False, + 'queryParameters': [ + { + 'name': 'foo', + 'parameterType': {'type': 'INT64'}, + 'parameterValue': {'value': '123'} + }, + ], + }, + }, + } creds = _make_credentials() http = object() client = self._make_one(project=PROJECT, credentials=creds, _http=http) + conn = client._connection = _Connection(RESOURCE) query_parameters = [ScalarQueryParameter('foo', 'INT64', 123)] - job = client.run_async_query(JOB, QUERY, - query_parameters=query_parameters) + config = QueryJobConfig() + config.query_parameters = query_parameters + + job = client.query(QUERY, job_config=config, job_id=JOB) + self.assertIsInstance(job, QueryJob) self.assertIs(job._client, client) self.assertEqual(job.job_id, JOB) @@ -1657,6 +1744,24 @@ def test_run_async_w_query_parameters(self): self.assertEqual(job.udf_resources, []) self.assertEqual(job.query_parameters, query_parameters) + # Check that query actually starts the job. + self.assertEqual(len(conn._requested), 1) + req = conn._requested[0] + self.assertEqual(req['method'], 'POST') + self.assertEqual(req['path'], '/projects/PROJECT/jobs') + sent = req['data'] + self.assertEqual(sent['jobReference']['jobId'], JOB) + sent_config = sent['configuration']['query'] + self.assertEqual(sent_config['query'], QUERY) + self.assertFalse(sent_config['useLegacySql']) + self.assertEqual( + sent_config['queryParameters'][0], + { + 'name': 'foo', + 'parameterType': {'type': 'INT64'}, + 'parameterValue': {'value': '123'} + }) + def test_query_rows_defaults(self): from google.api.core.page_iterator import HTTPIterator diff --git a/packages/google-cloud-bigquery/tests/unit/test_dbapi_cursor.py b/packages/google-cloud-bigquery/tests/unit/test_dbapi_cursor.py index be327a8962a2..7562acd13239 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_dbapi_cursor.py +++ b/packages/google-cloud-bigquery/tests/unit/test_dbapi_cursor.py @@ -31,7 +31,7 @@ def _mock_client( self, rows=None, schema=None, num_dml_affected_rows=None): from google.cloud.bigquery import client mock_client = mock.create_autospec(client.Client) - mock_client.run_async_query.return_value = self._mock_job( + mock_client.query.return_value = self._mock_job( rows=rows, schema=schema, num_dml_affected_rows=num_dml_affected_rows) return mock_client @@ -177,7 +177,9 @@ def test_execute_custom_job_id(self): connection = connect(client) cursor = connection.cursor() cursor.execute('SELECT 1;', job_id='foo') - self.assertEqual(client.run_async_query.mock_calls[0][1][0], 'foo') + args, kwargs = client.query.call_args + self.assertEqual(args[0], 'SELECT 1;') + self.assertEqual(kwargs['job_id'], 'foo') def test_execute_w_dml(self): from google.cloud.bigquery.dbapi import connect @@ -239,7 +241,7 @@ def test_execute_raises_if_result_raises(self): job = mock.create_autospec(job.QueryJob) job.result.side_effect = google.cloud.exceptions.GoogleCloudError('') client = mock.create_autospec(client.Client) - client.run_async_query.return_value = job + client.query.return_value = job connection = connect(client) cursor = connection.cursor() diff --git a/packages/google-cloud-bigquery/tests/unit/test_job.py b/packages/google-cloud-bigquery/tests/unit/test_job.py index a49ea1b6fab7..ad8cf965cd79 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_job.py +++ b/packages/google-cloud-bigquery/tests/unit/test_job.py @@ -93,7 +93,7 @@ class _Base(object): DS_REF = DatasetReference(PROJECT, DS_ID) TABLE_ID = 'table_id' TABLE_REF = TableReference(DS_REF, TABLE_ID) - JOB_NAME = 'job_name' + JOB_ID = 'JOB_ID' def _make_one(self, *args, **kw): return self._get_target_class()(*args, **kw) @@ -106,7 +106,7 @@ def _setUpConstants(self): self.WHEN = datetime.datetime.utcfromtimestamp(self.WHEN_TS).replace( tzinfo=UTC) self.ETAG = 'ETAG' - self.JOB_ID = '%s:%s' % (self.PROJECT, self.JOB_NAME) + self.FULL_JOB_ID = '%s:%s' % (self.PROJECT, self.JOB_ID) self.RESOURCE_URL = 'http://example.com/path/to/resource' self.USER_EMAIL = 'phred@example.com' @@ -128,10 +128,10 @@ def _makeResource(self, started=False, ended=False): } }, 'etag': self.ETAG, - 'id': self.JOB_ID, + 'id': self.FULL_JOB_ID, 'jobReference': { 'projectId': self.PROJECT, - 'jobId': self.JOB_NAME, + 'jobId': self.JOB_ID, }, 'selfLink': self.RESOURCE_URL, 'user_email': self.USER_EMAIL, @@ -320,7 +320,7 @@ def _verifyResourceProperties(self, job, resource): def test_ctor(self): client = _Client(self.PROJECT) - job = self._make_one(self.JOB_NAME, [self.SOURCE1], self.TABLE_REF, + job = self._make_one(self.JOB_ID, [self.SOURCE1], self.TABLE_REF, client) self.assertIs(job.destination, self.TABLE_REF) self.assertEqual(list(job.source_uris), [self.SOURCE1]) @@ -328,7 +328,7 @@ def test_ctor(self): self.assertEqual(job.job_type, self.JOB_TYPE) self.assertEqual( job.path, - '/projects/%s/jobs/%s' % (self.PROJECT, self.JOB_NAME)) + '/projects/%s/jobs/%s' % (self.PROJECT, self.JOB_ID)) self.assertEqual(job.schema, []) self._verifyInitialReadonlyProperties(job) @@ -362,7 +362,7 @@ def test_ctor_w_config(self): age = SchemaField('age', 'INTEGER', mode='REQUIRED') config = LoadJobConfig() config.schema = [full_name, age] - job = self._make_one(self.JOB_NAME, [self.SOURCE1], self.TABLE_REF, + job = self._make_one(self.JOB_ID, [self.SOURCE1], self.TABLE_REF, client, config) self.assertEqual(job.schema, [full_name, age]) @@ -388,7 +388,7 @@ def test_result_invokes_begin(self): connection = _Connection(begun_resource, done_resource) client = _Client(self.PROJECT, connection=connection) - job = self._make_one(self.JOB_NAME, [self.SOURCE1], self.TABLE_REF, + job = self._make_one(self.JOB_ID, [self.SOURCE1], self.TABLE_REF, client) job.result() @@ -453,9 +453,9 @@ def test_props_set_by_server(self): CREATED = datetime.datetime(2015, 8, 11, 12, 13, 22, tzinfo=UTC) STARTED = datetime.datetime(2015, 8, 11, 13, 47, 15, tzinfo=UTC) ENDED = datetime.datetime(2015, 8, 11, 14, 47, 15, tzinfo=UTC) - JOB_ID = '%s:%s' % (self.PROJECT, self.JOB_NAME) + FULL_JOB_ID = '%s:%s' % (self.PROJECT, self.JOB_ID) URL = 'http://example.com/projects/%s/jobs/%s' % ( - self.PROJECT, self.JOB_NAME) + self.PROJECT, self.JOB_ID) EMAIL = 'phred@example.com' ERROR_RESULT = {'debugInfo': 'DEBUG', 'location': 'LOCATION', @@ -464,9 +464,9 @@ def test_props_set_by_server(self): client = _Client(self.PROJECT) table = _Table() - job = self._make_one(self.JOB_NAME, [self.SOURCE1], table, client) + job = self._make_one(self.JOB_ID, [self.SOURCE1], table, client) job._properties['etag'] = 'ETAG' - job._properties['id'] = JOB_ID + job._properties['id'] = FULL_JOB_ID job._properties['selfLink'] = URL job._properties['user_email'] = EMAIL @@ -519,10 +519,10 @@ def test_from_api_repr_missing_config(self): self._setUpConstants() client = _Client(self.PROJECT) RESOURCE = { - 'id': '%s:%s' % (self.PROJECT, self.DS_ID), + 'id': '%s:%s' % (self.PROJECT, self.JOB_ID), 'jobReference': { 'projectId': self.PROJECT, - 'jobId': self.JOB_NAME, + 'jobId': self.JOB_ID, } } klass = self._get_target_class() @@ -533,10 +533,10 @@ def test_from_api_repr_bare(self): self._setUpConstants() client = _Client(self.PROJECT) RESOURCE = { - 'id': self.JOB_ID, + 'id': self.FULL_JOB_ID, 'jobReference': { 'projectId': self.PROJECT, - 'jobId': self.JOB_NAME, + 'jobId': self.JOB_ID, }, 'configuration': { 'load': { @@ -567,7 +567,7 @@ def test_from_api_repr_w_properties(self): def test_begin_w_already_running(self): conn = _Connection() client = _Client(project=self.PROJECT, connection=conn) - job = self._make_one(self.JOB_NAME, [self.SOURCE1], self.TABLE_REF, + job = self._make_one(self.JOB_ID, [self.SOURCE1], self.TABLE_REF, client) job._properties['status'] = {'state': 'RUNNING'} @@ -584,7 +584,7 @@ def test_begin_w_bound_client(self): del RESOURCE['user_email'] conn = _Connection(RESOURCE) client = _Client(project=self.PROJECT, connection=conn) - job = self._make_one(self.JOB_NAME, [self.SOURCE1], self.TABLE_REF, + job = self._make_one(self.JOB_ID, [self.SOURCE1], self.TABLE_REF, client) job.begin() @@ -596,7 +596,7 @@ def test_begin_w_bound_client(self): SENT = { 'jobReference': { 'projectId': self.PROJECT, - 'jobId': self.JOB_NAME, + 'jobId': self.JOB_ID, }, 'configuration': { 'load': { @@ -625,14 +625,14 @@ def test_begin_w_autodetect(self): client = _Client(project=self.PROJECT, connection=conn) config = LoadJobConfig() config.autodetect = True - job = self._make_one(self.JOB_NAME, [self.SOURCE1], self.TABLE_REF, + job = self._make_one(self.JOB_ID, [self.SOURCE1], self.TABLE_REF, client, config) job.begin() sent = { 'jobReference': { 'projectId': self.PROJECT, - 'jobId': self.JOB_NAME, + 'jobId': self.JOB_ID, }, 'configuration': { 'load': { @@ -692,7 +692,7 @@ def test_begin_w_alternate_client(self): age = SchemaField('age', 'INTEGER', mode='REQUIRED') config = LoadJobConfig() config.schema = [full_name, age] - job = self._make_one(self.JOB_NAME, [self.SOURCE1], self.TABLE_REF, + job = self._make_one(self.JOB_ID, [self.SOURCE1], self.TABLE_REF, client1, config) config.allow_jagged_rows = True config.allow_quoted_newlines = True @@ -717,7 +717,7 @@ def test_begin_w_alternate_client(self): SENT = { 'jobReference': { 'projectId': self.PROJECT, - 'jobId': self.JOB_NAME, + 'jobId': self.JOB_ID, }, 'configuration': { 'load': LOAD_CONFIGURATION, @@ -728,11 +728,11 @@ def test_begin_w_alternate_client(self): self._verifyResourceProperties(job, RESOURCE) def test_exists_miss_w_bound_client(self): - PATH = '/projects/%s/jobs/%s' % (self.PROJECT, self.JOB_NAME) + PATH = '/projects/%s/jobs/%s' % (self.PROJECT, self.JOB_ID) conn = _Connection() client = _Client(project=self.PROJECT, connection=conn) table = _Table() - job = self._make_one(self.JOB_NAME, [self.SOURCE1], table, client) + job = self._make_one(self.JOB_ID, [self.SOURCE1], table, client) self.assertFalse(job.exists()) @@ -743,13 +743,13 @@ def test_exists_miss_w_bound_client(self): self.assertEqual(req['query_params'], {'fields': 'id'}) def test_exists_hit_w_alternate_client(self): - PATH = '/projects/%s/jobs/%s' % (self.PROJECT, self.JOB_NAME) + PATH = '/projects/%s/jobs/%s' % (self.PROJECT, self.JOB_ID) conn1 = _Connection() client1 = _Client(project=self.PROJECT, connection=conn1) conn2 = _Connection({}) client2 = _Client(project=self.PROJECT, connection=conn2) table = _Table() - job = self._make_one(self.JOB_NAME, [self.SOURCE1], table, client1) + job = self._make_one(self.JOB_ID, [self.SOURCE1], table, client1) self.assertTrue(job.exists(client=client2)) @@ -761,12 +761,12 @@ def test_exists_hit_w_alternate_client(self): self.assertEqual(req['query_params'], {'fields': 'id'}) def test_reload_w_bound_client(self): - PATH = '/projects/%s/jobs/%s' % (self.PROJECT, self.JOB_NAME) + PATH = '/projects/%s/jobs/%s' % (self.PROJECT, self.JOB_ID) RESOURCE = self._makeResource() conn = _Connection(RESOURCE) client = _Client(project=self.PROJECT, connection=conn) table = _Table() - job = self._make_one(self.JOB_NAME, [self.SOURCE1], table, client) + job = self._make_one(self.JOB_ID, [self.SOURCE1], table, client) job.reload() @@ -777,14 +777,14 @@ def test_reload_w_bound_client(self): self._verifyResourceProperties(job, RESOURCE) def test_reload_w_alternate_client(self): - PATH = '/projects/%s/jobs/%s' % (self.PROJECT, self.JOB_NAME) + PATH = '/projects/%s/jobs/%s' % (self.PROJECT, self.JOB_ID) RESOURCE = self._makeResource() conn1 = _Connection() client1 = _Client(project=self.PROJECT, connection=conn1) conn2 = _Connection(RESOURCE) client2 = _Client(project=self.PROJECT, connection=conn2) table = _Table() - job = self._make_one(self.JOB_NAME, [self.SOURCE1], table, client1) + job = self._make_one(self.JOB_ID, [self.SOURCE1], table, client1) job.reload(client=client2) @@ -796,13 +796,13 @@ def test_reload_w_alternate_client(self): self._verifyResourceProperties(job, RESOURCE) def test_cancel_w_bound_client(self): - PATH = '/projects/%s/jobs/%s/cancel' % (self.PROJECT, self.JOB_NAME) + PATH = '/projects/%s/jobs/%s/cancel' % (self.PROJECT, self.JOB_ID) RESOURCE = self._makeResource(ended=True) RESPONSE = {'job': RESOURCE} conn = _Connection(RESPONSE) client = _Client(project=self.PROJECT, connection=conn) table = _Table() - job = self._make_one(self.JOB_NAME, [self.SOURCE1], table, client) + job = self._make_one(self.JOB_ID, [self.SOURCE1], table, client) job.cancel() @@ -813,7 +813,7 @@ def test_cancel_w_bound_client(self): self._verifyResourceProperties(job, RESOURCE) def test_cancel_w_alternate_client(self): - PATH = '/projects/%s/jobs/%s/cancel' % (self.PROJECT, self.JOB_NAME) + PATH = '/projects/%s/jobs/%s/cancel' % (self.PROJECT, self.JOB_ID) RESOURCE = self._makeResource(ended=True) RESPONSE = {'job': RESOURCE} conn1 = _Connection() @@ -821,7 +821,7 @@ def test_cancel_w_alternate_client(self): conn2 = _Connection(RESPONSE) client2 = _Client(project=self.PROJECT, connection=conn2) table = _Table() - job = self._make_one(self.JOB_NAME, [self.SOURCE1], table, client1) + job = self._make_one(self.JOB_ID, [self.SOURCE1], table, client1) job.cancel(client=client2) @@ -896,14 +896,14 @@ def test_ctor(self): client = _Client(self.PROJECT) source = self._table_ref(self.SOURCE_TABLE) destination = self._table_ref(self.DESTINATION_TABLE) - job = self._make_one(self.JOB_NAME, [source], destination, client) + job = self._make_one(self.JOB_ID, [source], destination, client) self.assertIs(job.destination, destination) self.assertEqual(job.sources, [source]) self.assertIs(job._client, client) self.assertEqual(job.job_type, self.JOB_TYPE) self.assertEqual( job.path, - '/projects/%s/jobs/%s' % (self.PROJECT, self.JOB_NAME)) + '/projects/%s/jobs/%s' % (self.PROJECT, self.JOB_ID)) self._verifyInitialReadonlyProperties(job) @@ -926,7 +926,7 @@ def test_from_api_repr_missing_config(self): 'id': '%s:%s' % (self.PROJECT, self.DS_ID), 'jobReference': { 'projectId': self.PROJECT, - 'jobId': self.JOB_NAME, + 'jobId': self.JOB_ID, } } klass = self._get_target_class() @@ -940,7 +940,7 @@ def test_from_api_repr_bare(self): 'id': self.JOB_ID, 'jobReference': { 'projectId': self.PROJECT, - 'jobId': self.JOB_NAME, + 'jobId': self.JOB_ID, }, 'configuration': { 'copy': { @@ -969,7 +969,7 @@ def test_from_api_repr_w_sourcetable(self): 'id': self.JOB_ID, 'jobReference': { 'projectId': self.PROJECT, - 'jobId': self.JOB_NAME, + 'jobId': self.JOB_ID, }, 'configuration': { 'copy': { @@ -998,7 +998,7 @@ def test_from_api_repr_wo_sources(self): 'id': self.JOB_ID, 'jobReference': { 'projectId': self.PROJECT, - 'jobId': self.JOB_NAME, + 'jobId': self.JOB_ID, }, 'configuration': { 'copy': { @@ -1036,7 +1036,7 @@ def test_begin_w_bound_client(self): client = _Client(project=self.PROJECT, connection=conn) source = self._table_ref(self.SOURCE_TABLE) destination = self._table_ref(self.DESTINATION_TABLE) - job = self._make_one(self.JOB_NAME, [source], destination, client) + job = self._make_one(self.JOB_ID, [source], destination, client) job.begin() @@ -1047,7 +1047,7 @@ def test_begin_w_bound_client(self): SENT = { 'jobReference': { 'projectId': self.PROJECT, - 'jobId': self.JOB_NAME, + 'jobId': self.JOB_ID, }, 'configuration': { 'copy': { @@ -1094,7 +1094,7 @@ def test_begin_w_alternate_client(self): config = CopyJobConfig() config.create_disposition = 'CREATE_NEVER' config.write_disposition = 'WRITE_TRUNCATE' - job = self._make_one(self.JOB_NAME, [source], destination, client1, + job = self._make_one(self.JOB_ID, [source], destination, client1, config) job.begin(client=client2) @@ -1106,7 +1106,7 @@ def test_begin_w_alternate_client(self): SENT = { 'jobReference': { 'projectId': self.PROJECT, - 'jobId': self.JOB_NAME, + 'jobId': self.JOB_ID, }, 'configuration': { 'copy': COPY_CONFIGURATION, @@ -1116,13 +1116,13 @@ def test_begin_w_alternate_client(self): self._verifyResourceProperties(job, RESOURCE) def test_exists_miss_w_bound_client(self): - PATH = '/projects/%s/jobs/%s' % (self.PROJECT, self.JOB_NAME) + PATH = '/projects/%s/jobs/%s' % (self.PROJECT, self.JOB_ID) conn = _Connection() client = _Client(project=self.PROJECT, connection=conn) source = self._table_ref(self.SOURCE_TABLE) destination = self._table_ref(self.DESTINATION_TABLE) - job = self._make_one(self.JOB_NAME, [source], destination, client) + job = self._make_one(self.JOB_ID, [source], destination, client) self.assertFalse(job.exists()) @@ -1133,14 +1133,14 @@ def test_exists_miss_w_bound_client(self): self.assertEqual(req['query_params'], {'fields': 'id'}) def test_exists_hit_w_alternate_client(self): - PATH = '/projects/%s/jobs/%s' % (self.PROJECT, self.JOB_NAME) + PATH = '/projects/%s/jobs/%s' % (self.PROJECT, self.JOB_ID) conn1 = _Connection() client1 = _Client(project=self.PROJECT, connection=conn1) conn2 = _Connection({}) client2 = _Client(project=self.PROJECT, connection=conn2) source = self._table_ref(self.SOURCE_TABLE) destination = self._table_ref(self.DESTINATION_TABLE) - job = self._make_one(self.JOB_NAME, [source], destination, client1) + job = self._make_one(self.JOB_ID, [source], destination, client1) self.assertTrue(job.exists(client=client2)) @@ -1152,13 +1152,13 @@ def test_exists_hit_w_alternate_client(self): self.assertEqual(req['query_params'], {'fields': 'id'}) def test_reload_w_bound_client(self): - PATH = '/projects/%s/jobs/%s' % (self.PROJECT, self.JOB_NAME) + PATH = '/projects/%s/jobs/%s' % (self.PROJECT, self.JOB_ID) RESOURCE = self._makeResource() conn = _Connection(RESOURCE) client = _Client(project=self.PROJECT, connection=conn) source = self._table_ref(self.SOURCE_TABLE) destination = self._table_ref(self.DESTINATION_TABLE) - job = self._make_one(self.JOB_NAME, [source], destination, client) + job = self._make_one(self.JOB_ID, [source], destination, client) job.reload() @@ -1169,7 +1169,7 @@ def test_reload_w_bound_client(self): self._verifyResourceProperties(job, RESOURCE) def test_reload_w_alternate_client(self): - PATH = '/projects/%s/jobs/%s' % (self.PROJECT, self.JOB_NAME) + PATH = '/projects/%s/jobs/%s' % (self.PROJECT, self.JOB_ID) RESOURCE = self._makeResource() conn1 = _Connection() client1 = _Client(project=self.PROJECT, connection=conn1) @@ -1177,7 +1177,7 @@ def test_reload_w_alternate_client(self): client2 = _Client(project=self.PROJECT, connection=conn2) source = self._table_ref(self.SOURCE_TABLE) destination = self._table_ref(self.DESTINATION_TABLE) - job = self._make_one(self.JOB_NAME, [source], destination, client1) + job = self._make_one(self.JOB_ID, [source], destination, client1) job.reload(client=client2) @@ -1251,7 +1251,7 @@ def _verifyResourceProperties(self, job, resource): def test_ctor(self): client = _Client(self.PROJECT) source = _Table(self.SOURCE_TABLE) - job = self._make_one(self.JOB_NAME, source, [self.DESTINATION_URI], + job = self._make_one(self.JOB_ID, source, [self.DESTINATION_URI], client) self.assertEqual(job.source, source) self.assertEqual(job.destination_uris, [self.DESTINATION_URI]) @@ -1259,7 +1259,7 @@ def test_ctor(self): self.assertEqual(job.job_type, self.JOB_TYPE) self.assertEqual( job.path, - '/projects/%s/jobs/%s' % (self.PROJECT, self.JOB_NAME)) + '/projects/%s/jobs/%s' % (self.PROJECT, self.JOB_ID)) self._verifyInitialReadonlyProperties(job) @@ -1273,7 +1273,7 @@ def test_destination_uri_file_counts(self): file_counts = 23 client = _Client(self.PROJECT) source = _Table(self.SOURCE_TABLE) - job = self._make_one(self.JOB_NAME, source, [self.DESTINATION_URI], + job = self._make_one(self.JOB_ID, source, [self.DESTINATION_URI], client) self.assertIsNone(job.destination_uri_file_counts) @@ -1301,7 +1301,7 @@ def test_from_api_repr_missing_config(self): 'id': '%s:%s' % (self.PROJECT, self.DS_ID), 'jobReference': { 'projectId': self.PROJECT, - 'jobId': self.JOB_NAME, + 'jobId': self.JOB_ID, } } klass = self._get_target_class() @@ -1315,7 +1315,7 @@ def test_from_api_repr_bare(self): 'id': self.JOB_ID, 'jobReference': { 'projectId': self.PROJECT, - 'jobId': self.JOB_NAME, + 'jobId': self.JOB_ID, }, 'configuration': { 'extract': { @@ -1355,7 +1355,7 @@ def test_begin_w_bound_client(self): client = _Client(project=self.PROJECT, connection=conn) source_dataset = DatasetReference(self.PROJECT, self.DS_ID) source = source_dataset.table(self.SOURCE_TABLE) - job = self._make_one(self.JOB_NAME, source, [self.DESTINATION_URI], + job = self._make_one(self.JOB_ID, source, [self.DESTINATION_URI], client) job.begin() @@ -1367,7 +1367,7 @@ def test_begin_w_bound_client(self): SENT = { 'jobReference': { 'projectId': self.PROJECT, - 'jobId': self.JOB_NAME, + 'jobId': self.JOB_ID, }, 'configuration': { 'extract': { @@ -1410,7 +1410,7 @@ def test_begin_w_alternate_client(self): job_config.destination_format = 'NEWLINE_DELIMITED_JSON' job_config.field_delimiter = '|' job_config.print_header = False - job = self._make_one(self.JOB_NAME, source, [self.DESTINATION_URI], + job = self._make_one(self.JOB_ID, source, [self.DESTINATION_URI], client1, job_config) job.begin(client=client2) @@ -1423,7 +1423,7 @@ def test_begin_w_alternate_client(self): SENT = { 'jobReference': { 'projectId': self.PROJECT, - 'jobId': self.JOB_NAME, + 'jobId': self.JOB_ID, }, 'configuration': { 'extract': EXTRACT_CONFIGURATION, @@ -1433,11 +1433,11 @@ def test_begin_w_alternate_client(self): self._verifyResourceProperties(job, RESOURCE) def test_exists_miss_w_bound_client(self): - PATH = '/projects/%s/jobs/%s' % (self.PROJECT, self.JOB_NAME) + PATH = '/projects/%s/jobs/%s' % (self.PROJECT, self.JOB_ID) conn = _Connection() client = _Client(project=self.PROJECT, connection=conn) source = _Table(self.SOURCE_TABLE) - job = self._make_one(self.JOB_NAME, source, [self.DESTINATION_URI], + job = self._make_one(self.JOB_ID, source, [self.DESTINATION_URI], client) self.assertFalse(job.exists()) @@ -1449,13 +1449,13 @@ def test_exists_miss_w_bound_client(self): self.assertEqual(req['query_params'], {'fields': 'id'}) def test_exists_hit_w_alternate_client(self): - PATH = '/projects/%s/jobs/%s' % (self.PROJECT, self.JOB_NAME) + PATH = '/projects/%s/jobs/%s' % (self.PROJECT, self.JOB_ID) conn1 = _Connection() client1 = _Client(project=self.PROJECT, connection=conn1) conn2 = _Connection({}) client2 = _Client(project=self.PROJECT, connection=conn2) source = _Table(self.SOURCE_TABLE) - job = self._make_one(self.JOB_NAME, source, [self.DESTINATION_URI], + job = self._make_one(self.JOB_ID, source, [self.DESTINATION_URI], client1) self.assertTrue(job.exists(client=client2)) @@ -1468,13 +1468,13 @@ def test_exists_hit_w_alternate_client(self): self.assertEqual(req['query_params'], {'fields': 'id'}) def test_reload_w_bound_client(self): - PATH = '/projects/%s/jobs/%s' % (self.PROJECT, self.JOB_NAME) + PATH = '/projects/%s/jobs/%s' % (self.PROJECT, self.JOB_ID) RESOURCE = self._makeResource() conn = _Connection(RESOURCE) client = _Client(project=self.PROJECT, connection=conn) source_dataset = DatasetReference(self.PROJECT, self.DS_ID) source = source_dataset.table(self.SOURCE_TABLE) - job = self._make_one(self.JOB_NAME, source, [self.DESTINATION_URI], + job = self._make_one(self.JOB_ID, source, [self.DESTINATION_URI], client) job.reload() @@ -1486,7 +1486,7 @@ def test_reload_w_bound_client(self): self._verifyResourceProperties(job, RESOURCE) def test_reload_w_alternate_client(self): - PATH = '/projects/%s/jobs/%s' % (self.PROJECT, self.JOB_NAME) + PATH = '/projects/%s/jobs/%s' % (self.PROJECT, self.JOB_ID) RESOURCE = self._makeResource() conn1 = _Connection() client1 = _Client(project=self.PROJECT, connection=conn1) @@ -1494,7 +1494,7 @@ def test_reload_w_alternate_client(self): client2 = _Client(project=self.PROJECT, connection=conn2) source_dataset = DatasetReference(self.PROJECT, self.DS_ID) source = source_dataset.table(self.SOURCE_TABLE) - job = self._make_one(self.JOB_NAME, source, [self.DESTINATION_URI], + job = self._make_one(self.JOB_ID, source, [self.DESTINATION_URI], client1) job.reload(client=client2) @@ -1507,6 +1507,69 @@ def test_reload_w_alternate_client(self): self._verifyResourceProperties(job, RESOURCE) +class TestQueryJobConfig(unittest.TestCase, _Base): + @staticmethod + def _get_target_class(): + from google.cloud.bigquery.job import QueryJobConfig + + return QueryJobConfig + + def _make_one(self, *args, **kw): + return self._get_target_class()(*args, **kw) + + def test_ctor(self): + config = self._make_one() + self.assertEqual(config._properties, {}) + + def test_from_api_repr_empty(self): + klass = self._get_target_class() + config = klass.from_api_repr({}) + self.assertIsNone(config.dry_run) + self.assertIsNone(config.use_legacy_sql) + self.assertIsNone(config.default_dataset) + + def test_from_api_repr_normal(self): + resource = { + 'useLegacySql': True, + 'query': 'no property for me', + 'defaultDataset': { + 'projectId': 'someproject', + 'datasetId': 'somedataset', + }, + 'someNewProperty': 'I should be saved, too.', + } + klass = self._get_target_class() + + config = klass.from_api_repr(resource) + + self.assertTrue(config.use_legacy_sql) + self.assertEqual( + config.default_dataset, + DatasetReference('someproject', 'somedataset')) + # Make sure unknown properties propagate. + self.assertEqual(config._properties['query'], 'no property for me') + self.assertEqual( + config._properties['someNewProperty'], 'I should be saved, too.') + + def test_to_api_repr_normal(self): + config = self._make_one() + config.use_legacy_sql = True + config.default_dataset = DatasetReference( + 'someproject', 'somedataset') + config._properties['someNewProperty'] = 'Woohoo, alpha stuff.' + + resource = config.to_api_repr() + + self.assertTrue(resource['useLegacySql']) + self.assertEqual( + resource['defaultDataset']['projectId'], 'someproject') + self.assertEqual( + resource['defaultDataset']['datasetId'], 'somedataset') + # Make sure unknown properties propagate. + self.assertEqual( + config._properties['someNewProperty'], 'Woohoo, alpha stuff.') + + class TestQueryJob(unittest.TestCase, _Base): JOB_TYPE = 'query' QUERY = 'select count(*) from persons' @@ -1639,17 +1702,19 @@ def _verifyResourceProperties(self, job, resource): def test_ctor_defaults(self): client = _Client(self.PROJECT) - job = self._make_one(self.JOB_NAME, self.QUERY, client) + job = self._make_one(self.JOB_ID, self.QUERY, client) self.assertEqual(job.query, self.QUERY) self.assertIs(job._client, client) self.assertEqual(job.job_type, self.JOB_TYPE) self.assertEqual( job.path, - '/projects/%s/jobs/%s' % (self.PROJECT, self.JOB_NAME)) + '/projects/%s/jobs/%s' % (self.PROJECT, self.JOB_ID)) self._verifyInitialReadonlyProperties(job) - # set/read from resource['configuration']['copy'] + self.assertFalse(job.use_legacy_sql) + + # set/read from resource['configuration']['query'] self.assertIsNone(job.allow_large_results) self.assertIsNone(job.create_disposition) self.assertIsNone(job.default_dataset) @@ -1657,7 +1722,6 @@ def test_ctor_defaults(self): self.assertIsNone(job.flatten_results) self.assertIsNone(job.priority) self.assertIsNone(job.use_query_cache) - self.assertIsNone(job.use_legacy_sql) self.assertIsNone(job.dry_run) self.assertIsNone(job.write_disposition) self.assertIsNone(job.maximum_billing_tier) @@ -1665,21 +1729,27 @@ def test_ctor_defaults(self): def test_ctor_w_udf_resources(self): from google.cloud.bigquery._helpers import UDFResource + from google.cloud.bigquery.job import QueryJobConfig RESOURCE_URI = 'gs://some-bucket/js/lib.js' udf_resources = [UDFResource("resourceUri", RESOURCE_URI)] client = _Client(self.PROJECT) - job = self._make_one(self.JOB_NAME, self.QUERY, client, - udf_resources=udf_resources) + config = QueryJobConfig() + config.udf_resources = udf_resources + job = self._make_one( + self.JOB_ID, self.QUERY, client, job_config=config) self.assertEqual(job.udf_resources, udf_resources) def test_ctor_w_query_parameters(self): from google.cloud.bigquery._helpers import ScalarQueryParameter + from google.cloud.bigquery.job import QueryJobConfig query_parameters = [ScalarQueryParameter("foo", 'INT64', 123)] client = _Client(self.PROJECT) - job = self._make_one(self.JOB_NAME, self.QUERY, client, - query_parameters=query_parameters) + config = QueryJobConfig() + config.query_parameters = query_parameters + job = self._make_one( + self.JOB_ID, self.QUERY, client, job_config=config) self.assertEqual(job.query_parameters, query_parameters) def test_from_api_repr_missing_identity(self): @@ -1697,7 +1767,7 @@ def test_from_api_repr_missing_config(self): 'id': '%s:%s' % (self.PROJECT, self.DS_ID), 'jobReference': { 'projectId': self.PROJECT, - 'jobId': self.JOB_NAME, + 'jobId': self.JOB_ID, } } klass = self._get_target_class() @@ -1711,7 +1781,7 @@ def test_from_api_repr_bare(self): 'id': self.JOB_ID, 'jobReference': { 'projectId': self.PROJECT, - 'jobId': self.JOB_NAME, + 'jobId': self.JOB_ID, }, 'configuration': { 'query': {'query': self.QUERY}, @@ -1740,7 +1810,7 @@ def test_from_api_repr_w_properties(self): def test_cancelled(self): client = _Client(self.PROJECT) - job = self._make_one(self.JOB_NAME, self.QUERY, client) + job = self._make_one(self.JOB_ID, self.QUERY, client) job._properties['status'] = { 'state': 'DONE', 'errorResult': { @@ -1780,7 +1850,7 @@ def test_query_plan(self): }], }] client = _Client(self.PROJECT) - job = self._make_one(self.JOB_NAME, self.QUERY, client) + job = self._make_one(self.JOB_ID, self.QUERY, client) self.assertEqual(job.query_plan, []) statistics = job._properties['statistics'] = {} @@ -1821,7 +1891,7 @@ def test_query_plan(self): def test_total_bytes_processed(self): total_bytes = 1234 client = _Client(self.PROJECT) - job = self._make_one(self.JOB_NAME, self.QUERY, client) + job = self._make_one(self.JOB_ID, self.QUERY, client) self.assertIsNone(job.total_bytes_processed) statistics = job._properties['statistics'] = {} @@ -1836,7 +1906,7 @@ def test_total_bytes_processed(self): def test_total_bytes_billed(self): total_bytes = 1234 client = _Client(self.PROJECT) - job = self._make_one(self.JOB_NAME, self.QUERY, client) + job = self._make_one(self.JOB_ID, self.QUERY, client) self.assertIsNone(job.total_bytes_billed) statistics = job._properties['statistics'] = {} @@ -1851,7 +1921,7 @@ def test_total_bytes_billed(self): def test_billing_tier(self): billing_tier = 1 client = _Client(self.PROJECT) - job = self._make_one(self.JOB_NAME, self.QUERY, client) + job = self._make_one(self.JOB_ID, self.QUERY, client) self.assertIsNone(job.billing_tier) statistics = job._properties['statistics'] = {} @@ -1865,7 +1935,7 @@ def test_billing_tier(self): def test_cache_hit(self): client = _Client(self.PROJECT) - job = self._make_one(self.JOB_NAME, self.QUERY, client) + job = self._make_one(self.JOB_ID, self.QUERY, client) self.assertIsNone(job.cache_hit) statistics = job._properties['statistics'] = {} @@ -1880,7 +1950,7 @@ def test_cache_hit(self): def test_num_dml_affected_rows(self): num_rows = 1234 client = _Client(self.PROJECT) - job = self._make_one(self.JOB_NAME, self.QUERY, client) + job = self._make_one(self.JOB_ID, self.QUERY, client) self.assertIsNone(job.num_dml_affected_rows) statistics = job._properties['statistics'] = {} @@ -1895,7 +1965,7 @@ def test_num_dml_affected_rows(self): def test_statement_type(self): statement_type = 'SELECT' client = _Client(self.PROJECT) - job = self._make_one(self.JOB_NAME, self.QUERY, client) + job = self._make_one(self.JOB_ID, self.QUERY, client) self.assertIsNone(job.statement_type) statistics = job._properties['statistics'] = {} @@ -1926,7 +1996,7 @@ def test_referenced_tables(self): 'tableId': 'other-table', }] client = _Client(self.PROJECT) - job = self._make_one(self.JOB_NAME, self.QUERY, client) + job = self._make_one(self.JOB_ID, self.QUERY, client) self.assertEqual(job.referenced_tables, []) statistics = job._properties['statistics'] = {} @@ -2001,7 +2071,7 @@ def test_undeclared_query_paramters(self): }, }] client = _Client(self.PROJECT) - job = self._make_one(self.JOB_NAME, self.QUERY, client) + job = self._make_one(self.JOB_ID, self.QUERY, client) self.assertEqual(job.undeclared_query_paramters, []) statistics = job._properties['statistics'] = {} @@ -2036,12 +2106,12 @@ def test_query_results(self): 'jobComplete': True, 'jobReference': { 'projectId': self.PROJECT, - 'jobId': self.JOB_NAME, + 'jobId': self.JOB_ID, }, } connection = _Connection(query_resource) client = _Client(self.PROJECT, connection=connection) - job = self._make_one(self.JOB_NAME, self.QUERY, client) + job = self._make_one(self.JOB_ID, self.QUERY, client) results = job.query_results() self.assertIsInstance(results, QueryResults) @@ -2049,11 +2119,11 @@ def test_query_results_w_cached_value(self): from google.cloud.bigquery.query import QueryResults client = _Client(self.PROJECT) - job = self._make_one(self.JOB_NAME, self.QUERY, client) + job = self._make_one(self.JOB_ID, self.QUERY, client) resource = { 'jobReference': { 'projectId': self.PROJECT, - 'jobId': self.JOB_NAME, + 'jobId': self.JOB_ID, }, } query_results = QueryResults(client, resource) @@ -2068,7 +2138,7 @@ def test_result(self): 'jobComplete': True, 'jobReference': { 'projectId': self.PROJECT, - 'jobId': self.JOB_NAME, + 'jobId': self.JOB_ID, }, } connection = _Connection(query_resource, query_resource) @@ -2086,7 +2156,7 @@ def test_result_invokes_begins(self): 'jobComplete': False, 'jobReference': { 'projectId': self.PROJECT, - 'jobId': self.JOB_NAME, + 'jobId': self.JOB_ID, }, } query_resource = copy.deepcopy(incomplete_resource) @@ -2097,7 +2167,7 @@ def test_result_invokes_begins(self): begun_resource, incomplete_resource, query_resource, done_resource, query_resource) client = _Client(self.PROJECT, connection=connection) - job = self._make_one(self.JOB_NAME, self.QUERY, client) + job = self._make_one(self.JOB_ID, self.QUERY, client) job.result() @@ -2111,7 +2181,7 @@ def test_result_error(self): from google.cloud import exceptions client = _Client(self.PROJECT) - job = self._make_one(self.JOB_NAME, self.QUERY, client) + job = self._make_one(self.JOB_ID, self.QUERY, client) error_result = { 'debugInfo': 'DEBUG', 'location': 'LOCATION', @@ -2133,6 +2203,7 @@ def test_result_error(self): def test_begin_w_bound_client(self): from google.cloud.bigquery.dataset import DatasetReference + from google.cloud.bigquery.job import QueryJobConfig PATH = '/projects/%s/jobs' % (self.PROJECT,) DS_ID = 'DATASET' @@ -2145,8 +2216,10 @@ def test_begin_w_bound_client(self): conn = _Connection(RESOURCE) client = _Client(project=self.PROJECT, connection=conn) - job = self._make_one(self.JOB_NAME, self.QUERY, client) - job.default_dataset = DatasetReference(self.PROJECT, DS_ID) + config = QueryJobConfig() + config.default_dataset = DatasetReference(self.PROJECT, DS_ID) + job = self._make_one( + self.JOB_ID, self.QUERY, client, job_config=config) job.begin() @@ -2159,11 +2232,12 @@ def test_begin_w_bound_client(self): SENT = { 'jobReference': { 'projectId': self.PROJECT, - 'jobId': self.JOB_NAME, + 'jobId': self.JOB_ID, }, 'configuration': { 'query': { 'query': self.QUERY, + 'useLegacySql': False, 'defaultDataset': { 'projectId': self.PROJECT, 'datasetId': DS_ID, @@ -2171,11 +2245,12 @@ def test_begin_w_bound_client(self): }, }, } - self.assertEqual(req['data'], SENT) self._verifyResourceProperties(job, RESOURCE) + self.assertEqual(req['data'], SENT) def test_begin_w_alternate_client(self): from google.cloud.bigquery.dataset import DatasetReference + from google.cloud.bigquery.job import QueryJobConfig PATH = '/projects/%s/jobs' % (self.PROJECT,) TABLE = 'TABLE' @@ -2203,28 +2278,29 @@ def test_begin_w_alternate_client(self): 'maximumBytesBilled': '123456' } RESOURCE['configuration']['query'] = QUERY_CONFIGURATION + RESOURCE['configuration']['dryRun'] = True conn1 = _Connection() client1 = _Client(project=self.PROJECT, connection=conn1) conn2 = _Connection(RESOURCE) client2 = _Client(project=self.PROJECT, connection=conn2) - job = self._make_one(self.JOB_NAME, self.QUERY, client1) - dataset_ref = DatasetReference(self.PROJECT, DS_ID) table_ref = dataset_ref.table(TABLE) - job.allow_large_results = True - job.create_disposition = 'CREATE_NEVER' - job.default_dataset = dataset_ref - job.destination = table_ref - job.flatten_results = True - job.priority = 'INTERACTIVE' - job.use_query_cache = True - job.use_legacy_sql = True - job.dry_run = True - RESOURCE['configuration']['dryRun'] = True - job.write_disposition = 'WRITE_TRUNCATE' - job.maximum_billing_tier = 4 - job.maximum_bytes_billed = 123456 + config = QueryJobConfig() + config.allow_large_results = True + config.create_disposition = 'CREATE_NEVER' + config.default_dataset = dataset_ref + config.destination = table_ref + config.dry_run = True + config.flatten_results = True + config.maximum_billing_tier = 4 + config.priority = 'INTERACTIVE' + config.use_legacy_sql = True + config.use_query_cache = True + config.write_disposition = 'WRITE_TRUNCATE' + config.maximum_bytes_billed = 123456 + job = self._make_one( + self.JOB_ID, self.QUERY, client1, job_config=config) job.begin(client=client2) @@ -2236,18 +2312,19 @@ def test_begin_w_alternate_client(self): SENT = { 'jobReference': { 'projectId': self.PROJECT, - 'jobId': self.JOB_NAME, + 'jobId': self.JOB_ID, }, 'configuration': { 'dryRun': True, 'query': QUERY_CONFIGURATION, }, } - self.assertEqual(req['data'], SENT) self._verifyResourceProperties(job, RESOURCE) + self.assertEqual(req['data'], SENT) def test_begin_w_udf(self): from google.cloud.bigquery._helpers import UDFResource + from google.cloud.bigquery.job import QueryJobConfig RESOURCE_URI = 'gs://some-bucket/js/lib.js' INLINE_UDF_CODE = 'var someCode = "here";' @@ -2268,8 +2345,11 @@ def test_begin_w_udf(self): UDFResource("resourceUri", RESOURCE_URI), UDFResource("inlineCode", INLINE_UDF_CODE), ] - job = self._make_one(self.JOB_NAME, self.QUERY, client, - udf_resources=udf_resources) + config = QueryJobConfig() + config.udf_resources = udf_resources + config.use_legacy_sql = True + job = self._make_one( + self.JOB_ID, self.QUERY, client, job_config=config) job.begin() @@ -2281,11 +2361,12 @@ def test_begin_w_udf(self): SENT = { 'jobReference': { 'projectId': self.PROJECT, - 'jobId': self.JOB_NAME, + 'jobId': self.JOB_ID, }, 'configuration': { 'query': { 'query': self.QUERY, + 'useLegacySql': True, 'userDefinedFunctionResources': [ {'resourceUri': RESOURCE_URI}, {'inlineCode': INLINE_UDF_CODE}, @@ -2293,11 +2374,12 @@ def test_begin_w_udf(self): }, }, } - self.assertEqual(req['data'], SENT) self._verifyResourceProperties(job, RESOURCE) + self.assertEqual(req['data'], SENT) def test_begin_w_named_query_parameter(self): from google.cloud.bigquery._helpers import ScalarQueryParameter + from google.cloud.bigquery.job import QueryJobConfig query_parameters = [ScalarQueryParameter('foo', 'INT64', 123)] PATH = '/projects/%s/jobs' % (self.PROJECT,) @@ -2322,8 +2404,10 @@ def test_begin_w_named_query_parameter(self): ] conn = _Connection(RESOURCE) client = _Client(project=self.PROJECT, connection=conn) - job = self._make_one(self.JOB_NAME, self.QUERY, client, - query_parameters=query_parameters) + jconfig = QueryJobConfig() + jconfig.query_parameters = query_parameters + job = self._make_one( + self.JOB_ID, self.QUERY, client, job_config=jconfig) job.begin() @@ -2335,21 +2419,23 @@ def test_begin_w_named_query_parameter(self): SENT = { 'jobReference': { 'projectId': self.PROJECT, - 'jobId': self.JOB_NAME, + 'jobId': self.JOB_ID, }, 'configuration': { 'query': { 'query': self.QUERY, + 'useLegacySql': False, 'parameterMode': 'NAMED', 'queryParameters': config['queryParameters'], }, }, } - self.assertEqual(req['data'], SENT) self._verifyResourceProperties(job, RESOURCE) + self.assertEqual(req['data'], SENT) def test_begin_w_positional_query_parameter(self): from google.cloud.bigquery._helpers import ScalarQueryParameter + from google.cloud.bigquery.job import QueryJobConfig query_parameters = [ScalarQueryParameter.positional('INT64', 123)] PATH = '/projects/%s/jobs' % (self.PROJECT,) @@ -2373,8 +2459,10 @@ def test_begin_w_positional_query_parameter(self): ] conn = _Connection(RESOURCE) client = _Client(project=self.PROJECT, connection=conn) - job = self._make_one(self.JOB_NAME, self.QUERY, client, - query_parameters=query_parameters) + jconfig = QueryJobConfig() + jconfig.query_parameters = query_parameters + job = self._make_one( + self.JOB_ID, self.QUERY, client, job_config=jconfig) job.begin() @@ -2386,20 +2474,23 @@ def test_begin_w_positional_query_parameter(self): SENT = { 'jobReference': { 'projectId': self.PROJECT, - 'jobId': self.JOB_NAME, + 'jobId': self.JOB_ID, }, 'configuration': { 'query': { 'query': self.QUERY, + 'useLegacySql': False, 'parameterMode': 'POSITIONAL', 'queryParameters': config['queryParameters'], }, }, } - self.assertEqual(req['data'], SENT) self._verifyResourceProperties(job, RESOURCE) + self.assertEqual(req['data'], SENT) def test_dry_run_query(self): + from google.cloud.bigquery.job import QueryJobConfig + PATH = '/projects/%s/jobs' % (self.PROJECT,) RESOURCE = self._makeResource() # Ensure None for missing server-set props @@ -2407,11 +2498,13 @@ def test_dry_run_query(self): del RESOURCE['etag'] del RESOURCE['selfLink'] del RESOURCE['user_email'] + RESOURCE['configuration']['dryRun'] = True conn = _Connection(RESOURCE) client = _Client(project=self.PROJECT, connection=conn) - job = self._make_one(self.JOB_NAME, self.QUERY, client) - job.dry_run = True - RESOURCE['configuration']['dryRun'] = True + config = QueryJobConfig() + config.dry_run = True + job = self._make_one( + self.JOB_ID, self.QUERY, client, job_config=config) job.begin() self.assertEqual(job.udf_resources, []) @@ -2422,23 +2515,24 @@ def test_dry_run_query(self): SENT = { 'jobReference': { 'projectId': self.PROJECT, - 'jobId': self.JOB_NAME, + 'jobId': self.JOB_ID, }, 'configuration': { 'query': { - 'query': self.QUERY + 'query': self.QUERY, + 'useLegacySql': False, }, 'dryRun': True, }, } - self.assertEqual(req['data'], SENT) self._verifyResourceProperties(job, RESOURCE) + self.assertEqual(req['data'], SENT) def test_exists_miss_w_bound_client(self): - PATH = '/projects/%s/jobs/%s' % (self.PROJECT, self.JOB_NAME) + PATH = '/projects/%s/jobs/%s' % (self.PROJECT, self.JOB_ID) conn = _Connection() client = _Client(project=self.PROJECT, connection=conn) - job = self._make_one(self.JOB_NAME, self.QUERY, client) + job = self._make_one(self.JOB_ID, self.QUERY, client) self.assertFalse(job.exists()) @@ -2449,12 +2543,12 @@ def test_exists_miss_w_bound_client(self): self.assertEqual(req['query_params'], {'fields': 'id'}) def test_exists_hit_w_alternate_client(self): - PATH = '/projects/%s/jobs/%s' % (self.PROJECT, self.JOB_NAME) + PATH = '/projects/%s/jobs/%s' % (self.PROJECT, self.JOB_ID) conn1 = _Connection() client1 = _Client(project=self.PROJECT, connection=conn1) conn2 = _Connection({}) client2 = _Client(project=self.PROJECT, connection=conn2) - job = self._make_one(self.JOB_NAME, self.QUERY, client1) + job = self._make_one(self.JOB_ID, self.QUERY, client1) self.assertTrue(job.exists(client=client2)) @@ -2467,18 +2561,19 @@ def test_exists_hit_w_alternate_client(self): def test_reload_w_bound_client(self): from google.cloud.bigquery.dataset import DatasetReference + from google.cloud.bigquery.job import QueryJobConfig - PATH = '/projects/%s/jobs/%s' % (self.PROJECT, self.JOB_NAME) + PATH = '/projects/%s/jobs/%s' % (self.PROJECT, self.JOB_ID) DS_ID = 'DATASET' DEST_TABLE = 'dest_table' RESOURCE = self._makeResource() conn = _Connection(RESOURCE) client = _Client(project=self.PROJECT, connection=conn) - job = self._make_one(self.JOB_NAME, None, client) - dataset_ref = DatasetReference(self.PROJECT, DS_ID) table_ref = dataset_ref.table(DEST_TABLE) - job.destination = table_ref + config = QueryJobConfig() + config.destination = table_ref + job = self._make_one(self.JOB_ID, None, client, job_config=config) job.reload() @@ -2491,7 +2586,7 @@ def test_reload_w_bound_client(self): self._verifyResourceProperties(job, RESOURCE) def test_reload_w_alternate_client(self): - PATH = '/projects/%s/jobs/%s' % (self.PROJECT, self.JOB_NAME) + PATH = '/projects/%s/jobs/%s' % (self.PROJECT, self.JOB_ID) DS_ID = 'DATASET' DEST_TABLE = 'dest_table' RESOURCE = self._makeResource() @@ -2505,7 +2600,7 @@ def test_reload_w_alternate_client(self): client1 = _Client(project=self.PROJECT, connection=conn1) conn2 = _Connection(RESOURCE) client2 = _Client(project=self.PROJECT, connection=conn2) - job = self._make_one(self.JOB_NAME, self.QUERY, client1) + job = self._make_one(self.JOB_ID, self.QUERY, client1) job.reload(client=client2) From 03bc3484a84bf7a4839efa6abe1633c832be1fd9 Mon Sep 17 00:00:00 2001 From: Jonathan Amsterdam Date: Mon, 9 Oct 2017 17:10:08 -0400 Subject: [PATCH 0224/2016] bigquery: add Client.load_table_from_file (#4136) Move the method from Table to Client. --- .../google/cloud/bigquery/client.py | 221 +++++++- .../google/cloud/bigquery/job.py | 10 +- .../google/cloud/bigquery/table.py | 410 --------------- .../google-cloud-bigquery/tests/system.py | 50 +- .../tests/unit/test_client.py | 475 +++++++++++++++++ .../tests/unit/test_table.py | 497 ------------------ 6 files changed, 726 insertions(+), 937 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py index f460202a3631..ce41824996bd 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py @@ -17,11 +17,17 @@ from __future__ import absolute_import import collections +import os import uuid import six +from google import resumable_media +from google.resumable_media.requests import MultipartUpload +from google.resumable_media.requests import ResumableUpload + from google.api.core import page_iterator +from google.cloud import exceptions from google.cloud.client import ClientWithProject from google.cloud.bigquery._http import Connection from google.cloud.bigquery.dataset import Dataset @@ -37,6 +43,20 @@ from google.cloud.bigquery._helpers import _rows_page_start +_DEFAULT_CHUNKSIZE = 1048576 # 1024 * 1024 B = 1 MB +_MAX_MULTIPART_SIZE = 5 * 1024 * 1024 +_DEFAULT_NUM_RETRIES = 6 +_BASE_UPLOAD_TEMPLATE = ( + u'https://www.googleapis.com/upload/bigquery/v2/projects/' + u'{project}/jobs?uploadType=') +_MULTIPART_URL_TEMPLATE = _BASE_UPLOAD_TEMPLATE + u'multipart' +_RESUMABLE_URL_TEMPLATE = _BASE_UPLOAD_TEMPLATE + u'resumable' +_GENERIC_CONTENT_TYPE = u'*/*' +_READ_LESS_THAN_SIZE = ( + 'Size {:d} was specified but the file-like object only had ' + '{:d} bytes remaining.') + + class Project(object): """Wrapper for resource describing a BigQuery project. @@ -535,7 +555,7 @@ def load_table_from_storage(self, source_uris, destination, :param destination: Table into which data is to be loaded. :type job_id: str - :param job_id: Name of the job. + :param job_id: (Optional) Name of the job. :type job_config: :class:`google.cloud.bigquery.job.LoadJobConfig` :param job_config: (Optional) Extra configuration options for the job. @@ -550,6 +570,171 @@ def load_table_from_storage(self, source_uris, destination, job.begin() return job + def load_table_from_file(self, file_obj, destination, + rewind=False, + size=None, + num_retries=_DEFAULT_NUM_RETRIES, + job_id=None, job_config=None): + """Upload the contents of this table from a file-like object. + + Like load_table_from_storage, this creates, starts and returns + a ``LoadJob``. + + :type file_obj: file + :param file_obj: A file handle opened in binary mode for reading. + + :type destination: :class:`google.cloud.bigquery.table.TableReference` + :param destination: Table into which data is to be loaded. + + :type rewind: bool + :param rewind: If True, seek to the beginning of the file handle before + reading the file. + + :type size: int + :param size: The number of bytes to read from the file handle. + If size is ``None`` or large, resumable upload will be + used. Otherwise, multipart upload will be used. + + :type num_retries: int + :param num_retries: Number of upload retries. Defaults to 6. + + :type job_id: str + :param job_id: (Optional) Name of the job. + + :type job_config: :class:`google.cloud.bigquery.job.LoadJobConfig` + :param job_config: (Optional) Extra configuration options for the job. + + :rtype: :class:`~google.cloud.bigquery.jobs.LoadJob` + + :returns: the job instance used to load the data (e.g., for + querying status). Note that the job is already started: + do not call ``job.begin()``. + :raises: :class:`ValueError` if ``size`` is not passed in and can not + be determined, or if the ``file_obj`` can be detected to be + a file opened in text mode. + """ + job_id = _make_job_id(job_id) + job = LoadJob(job_id, None, destination, self, job_config) + job_resource = job._build_resource() + if rewind: + file_obj.seek(0, os.SEEK_SET) + _check_mode(file_obj) + try: + if size is None or size >= _MAX_MULTIPART_SIZE: + response = self._do_resumable_upload( + file_obj, job_resource, num_retries) + else: + response = self._do_multipart_upload( + file_obj, job_resource, size, num_retries) + except resumable_media.InvalidResponse as exc: + raise exceptions.from_http_response(exc.response) + return self.job_from_resource(response.json()) + + def _do_resumable_upload(self, stream, metadata, num_retries): + """Perform a resumable upload. + + :type stream: IO[bytes] + :param stream: A bytes IO object open for reading. + + :type metadata: dict + :param metadata: The metadata associated with the upload. + + :type num_retries: int + :param num_retries: Number of upload retries. (Deprecated: This + argument will be removed in a future release.) + + :rtype: :class:`~requests.Response` + :returns: The "200 OK" response object returned after the final chunk + is uploaded. + """ + upload, transport = self._initiate_resumable_upload( + stream, metadata, num_retries) + + while not upload.finished: + response = upload.transmit_next_chunk(transport) + + return response + + def _initiate_resumable_upload(self, stream, metadata, num_retries): + """Initiate a resumable upload. + + :type stream: IO[bytes] + :param stream: A bytes IO object open for reading. + + :type metadata: dict + :param metadata: The metadata associated with the upload. + + :type num_retries: int + :param num_retries: Number of upload retries. (Deprecated: This + argument will be removed in a future release.) + + :rtype: tuple + :returns: + Pair of + + * The :class:`~google.resumable_media.requests.ResumableUpload` + that was created + * The ``transport`` used to initiate the upload. + """ + chunk_size = _DEFAULT_CHUNKSIZE + transport = self._http + headers = _get_upload_headers(self._connection.USER_AGENT) + upload_url = _RESUMABLE_URL_TEMPLATE.format(project=self.project) + upload = ResumableUpload(upload_url, chunk_size, headers=headers) + + if num_retries is not None: + upload._retry_strategy = resumable_media.RetryStrategy( + max_retries=num_retries) + + upload.initiate( + transport, stream, metadata, _GENERIC_CONTENT_TYPE, + stream_final=False) + + return upload, transport + + def _do_multipart_upload(self, stream, metadata, size, num_retries): + """Perform a multipart upload. + + :type stream: IO[bytes] + :param stream: A bytes IO object open for reading. + + :type metadata: dict + :param metadata: The metadata associated with the upload. + + :type size: int + :param size: The number of bytes to be uploaded (which will be read + from ``stream``). If not provided, the upload will be + concluded once ``stream`` is exhausted (or :data:`None`). + + :type num_retries: int + :param num_retries: Number of upload retries. (Deprecated: This + argument will be removed in a future release.) + + :rtype: :class:`~requests.Response` + :returns: The "200 OK" response object returned after the multipart + upload request. + :raises: :exc:`ValueError` if the ``stream`` has fewer than ``size`` + bytes remaining. + """ + data = stream.read(size) + if len(data) < size: + msg = _READ_LESS_THAN_SIZE.format(size, len(data)) + raise ValueError(msg) + + headers = _get_upload_headers(self._connection.USER_AGENT) + + upload_url = _MULTIPART_URL_TEMPLATE.format(project=self.project) + upload = MultipartUpload(upload_url, headers=headers) + + if num_retries is not None: + upload._retry_strategy = resumable_media.RetryStrategy( + max_retries=num_retries) + + response = upload.transmit( + self._http, data, metadata, _GENERIC_CONTENT_TYPE) + + return response + def copy_table(self, sources, destination, job_id=None, job_config=None): """Start a job for copying one or more tables into another table. @@ -832,3 +1017,37 @@ def _make_job_id(job_id): if job_id is None: return str(uuid.uuid4()) return job_id + + +def _check_mode(stream): + """Check that a stream was opened in read-binary mode. + + :type stream: IO[bytes] + :param stream: A bytes IO object open for reading. + + :raises: :exc:`ValueError` if the ``stream.mode`` is a valid attribute + and is not among ``rb``, ``r+b`` or ``rb+``. + """ + mode = getattr(stream, 'mode', None) + + if mode is not None and mode not in ('rb', 'r+b', 'rb+'): + raise ValueError( + "Cannot upload files opened in text mode: use " + "open(filename, mode='rb') or open(filename, mode='r+b')") + + +def _get_upload_headers(user_agent): + """Get the headers for an upload request. + + :type user_agent: str + :param user_agent: The user-agent for requests. + + :rtype: dict + :returns: The headers to be used for the request. + """ + return { + 'Accept': 'application/json', + 'Accept-Encoding': 'gzip, deflate', + 'User-Agent': user_agent, + 'content-type': 'application/json', + } diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/job.py b/packages/google-cloud-bigquery/google/cloud/bigquery/job.py index af3b1997f177..4f9c005a883e 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/job.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/job.py @@ -684,11 +684,11 @@ class LoadJob(_AsyncJob): :type job_id: str :param job_id: the job's ID - :type source_uris: sequence of string + :type source_uris: sequence of string or ``NoneType`` :param source_uris: URIs of one or more data files to be loaded. See https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.load.sourceUris - for supported URI formats. + for supported URI formats. Pass None for jobs that load from a file. :type destination: :class:`google.cloud.bigquery.table.TableReference` :param destination: reference to table into which data is to be loaded. @@ -856,7 +856,8 @@ def output_rows(self): def _build_resource(self): """Generate a resource for :meth:`begin`.""" configuration = self._configuration.to_api_repr() - configuration['sourceUris'] = self.source_uris + if self.source_uris is not None: + configuration['sourceUris'] = self.source_uris configuration['destinationTable'] = self.destination.to_api_repr() return { @@ -898,8 +899,7 @@ def from_api_repr(cls, resource, client): ds_ref = DatasetReference(dest_config['projectId'], dest_config['datasetId'],) destination = TableReference(ds_ref, dest_config['tableId']) - # TODO(jba): sourceUris should not be absent if there are no LoadJobs - # for file uploads. + # sourceUris will be absent if this is a file upload. source_uris = config_resource.get('sourceUris') job = cls(job_id, source_uris, destination, client, config) job._set_properties(resource) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py index e4814ae16c8e..8f56dffd18bf 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py @@ -17,14 +17,9 @@ from __future__ import absolute_import import datetime -import os import six -from google import resumable_media -from google.resumable_media.requests import MultipartUpload -from google.resumable_media.requests import ResumableUpload - from google.cloud import exceptions from google.cloud._helpers import _datetime_from_microseconds from google.cloud._helpers import _millis_from_datetime @@ -34,17 +29,6 @@ _TABLE_HAS_NO_SCHEMA = "Table has no schema: call 'client.get_table()'" _MARKER = object() -_DEFAULT_CHUNKSIZE = 1048576 # 1024 * 1024 B = 1 MB -_BASE_UPLOAD_TEMPLATE = ( - u'https://www.googleapis.com/upload/bigquery/v2/projects/' - u'{project}/jobs?uploadType=') -_MULTIPART_URL_TEMPLATE = _BASE_UPLOAD_TEMPLATE + u'multipart' -_RESUMABLE_URL_TEMPLATE = _BASE_UPLOAD_TEMPLATE + u'resumable' -_GENERIC_CONTENT_TYPE = u'*/*' -_READ_LESS_THAN_SIZE = ( - 'Size {:d} was specified but the file-like object only had ' - '{:d} bytes remaining.') -_DEFAULT_NUM_RETRIES = 6 class TableReference(object): @@ -826,353 +810,6 @@ def insert_data(self, return errors - def _get_transport(self, client): - """Return the client's transport. - - :type client: :class:`~google.cloud.bigquery.client.Client` - :param client: The client to use. - - :rtype transport: - :class:`~google.auth.transport.requests.AuthorizedSession` - :returns: The transport (with credentials) that will - make authenticated requests. - """ - return client._http - - def _initiate_resumable_upload(self, client, stream, - metadata, num_retries): - """Initiate a resumable upload. - - :type client: :class:`~google.cloud.bigquery.client.Client` - :param client: The client to use. - - :type stream: IO[bytes] - :param stream: A bytes IO object open for reading. - - :type metadata: dict - :param metadata: The metadata associated with the upload. - - :type num_retries: int - :param num_retries: Number of upload retries. (Deprecated: This - argument will be removed in a future release.) - - :rtype: tuple - :returns: - Pair of - - * The :class:`~google.resumable_media.requests.ResumableUpload` - that was created - * The ``transport`` used to initiate the upload. - """ - chunk_size = _DEFAULT_CHUNKSIZE - transport = self._get_transport(client) - headers = _get_upload_headers(client._connection.USER_AGENT) - upload_url = _RESUMABLE_URL_TEMPLATE.format(project=self.project) - upload = ResumableUpload(upload_url, chunk_size, headers=headers) - - if num_retries is not None: - upload._retry_strategy = resumable_media.RetryStrategy( - max_retries=num_retries) - - upload.initiate( - transport, stream, metadata, _GENERIC_CONTENT_TYPE, - stream_final=False) - - return upload, transport - - def _do_resumable_upload(self, client, stream, metadata, num_retries): - """Perform a resumable upload. - - :type client: :class:`~google.cloud.bigquery.client.Client` - :param client: The client to use. - - :type stream: IO[bytes] - :param stream: A bytes IO object open for reading. - - :type metadata: dict - :param metadata: The metadata associated with the upload. - - :type num_retries: int - :param num_retries: Number of upload retries. (Deprecated: This - argument will be removed in a future release.) - - :rtype: :class:`~requests.Response` - :returns: The "200 OK" response object returned after the final chunk - is uploaded. - """ - upload, transport = self._initiate_resumable_upload( - client, stream, metadata, num_retries) - - while not upload.finished: - response = upload.transmit_next_chunk(transport) - - return response - - def _do_multipart_upload(self, client, stream, metadata, - size, num_retries): - """Perform a multipart upload. - - :type client: :class:`~google.cloud.bigquery.client.Client` - :param client: The client to use. - - :type stream: IO[bytes] - :param stream: A bytes IO object open for reading. - - :type metadata: dict - :param metadata: The metadata associated with the upload. - - :type size: int - :param size: The number of bytes to be uploaded (which will be read - from ``stream``). If not provided, the upload will be - concluded once ``stream`` is exhausted (or :data:`None`). - - :type num_retries: int - :param num_retries: Number of upload retries. (Deprecated: This - argument will be removed in a future release.) - - :rtype: :class:`~requests.Response` - :returns: The "200 OK" response object returned after the multipart - upload request. - :raises: :exc:`ValueError` if the ``stream`` has fewer than ``size`` - bytes remaining. - """ - data = stream.read(size) - if len(data) < size: - msg = _READ_LESS_THAN_SIZE.format(size, len(data)) - raise ValueError(msg) - - transport = self._get_transport(client) - headers = _get_upload_headers(client._connection.USER_AGENT) - - upload_url = _MULTIPART_URL_TEMPLATE.format(project=self.project) - upload = MultipartUpload(upload_url, headers=headers) - - if num_retries is not None: - upload._retry_strategy = resumable_media.RetryStrategy( - max_retries=num_retries) - - response = upload.transmit( - transport, data, metadata, _GENERIC_CONTENT_TYPE) - - return response - - def _do_upload(self, client, stream, metadata, size, num_retries): - """Determine an upload strategy and then perform the upload. - - If ``size`` is :data:`None`, then a resumable upload will be used, - otherwise the content and the metadata will be uploaded - in a single multipart upload request. - - :type client: :class:`~google.cloud.bigquery.client.Client` - :param client: The client to use. - - :type stream: IO[bytes] - :param stream: A bytes IO object open for reading. - - :type metadata: dict - :param metadata: The metadata associated with the upload. - - :type size: int - :param size: The number of bytes to be uploaded (which will be read - from ``stream``). If not provided, the upload will be - concluded once ``stream`` is exhausted (or :data:`None`). - - :type num_retries: int - :param num_retries: Number of upload retries. (Deprecated: This - argument will be removed in a future release.) - - :rtype: dict - :returns: The parsed JSON from the "200 OK" response. This will be the - **only** response in the multipart case and it will be the - **final** response in the resumable case. - """ - if size is None: - response = self._do_resumable_upload( - client, stream, metadata, num_retries) - else: - response = self._do_multipart_upload( - client, stream, metadata, size, num_retries) - - return response.json() - - # pylint: disable=too-many-arguments,too-many-locals - def upload_from_file(self, - file_obj, - source_format, - rewind=False, - size=None, - num_retries=_DEFAULT_NUM_RETRIES, - allow_jagged_rows=None, - allow_quoted_newlines=None, - create_disposition=None, - encoding=None, - field_delimiter=None, - ignore_unknown_values=None, - max_bad_records=None, - quote_character=None, - skip_leading_rows=None, - write_disposition=None, - client=None, - job_name=None, - null_marker=None): - """Upload the contents of this table from a file-like object. - - :type file_obj: file - :param file_obj: A file handle opened in binary mode for reading. - - :type source_format: str - :param source_format: Any supported format. The full list of supported - formats is documented under the - ``configuration.extract.destinationFormat`` property on this page: - https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs - - :type rewind: bool - :param rewind: If True, seek to the beginning of the file handle before - writing the file. - - :type size: int - :param size: The number of bytes to read from the file handle. - If not provided, we'll try to guess the size using - :func:`os.fstat`. (If the file handle is not from the - filesystem this won't be possible.) - - :type num_retries: int - :param num_retries: Number of upload retries. Defaults to 6. - - :type allow_jagged_rows: bool - :param allow_jagged_rows: job configuration option; see - :meth:`google.cloud.bigquery.job.LoadJob`. - - :type allow_quoted_newlines: bool - :param allow_quoted_newlines: job configuration option; see - :meth:`google.cloud.bigquery.job.LoadJob`. - - :type create_disposition: str - :param create_disposition: job configuration option; see - :meth:`google.cloud.bigquery.job.LoadJob`. - - :type encoding: str - :param encoding: job configuration option; see - :meth:`google.cloud.bigquery.job.LoadJob`. - - :type field_delimiter: str - :param field_delimiter: job configuration option; see - :meth:`google.cloud.bigquery.job.LoadJob`. - - :type ignore_unknown_values: bool - :param ignore_unknown_values: job configuration option; see - :meth:`google.cloud.bigquery.job.LoadJob`. - - :type max_bad_records: int - :param max_bad_records: job configuration option; see - :meth:`google.cloud.bigquery.job.LoadJob`. - - :type quote_character: str - :param quote_character: job configuration option; see - :meth:`google.cloud.bigquery.job.LoadJob`. - - :type skip_leading_rows: int - :param skip_leading_rows: job configuration option; see - :meth:`google.cloud.bigquery.job.LoadJob`. - - :type write_disposition: str - :param write_disposition: job configuration option; see - :meth:`google.cloud.bigquery.job.LoadJob`. - - :type client: :class:`~google.cloud.bigquery.client.Client` - :param client: (Optional) The client to use. If not passed, falls back - to the ``client`` stored on the current table. - - :type job_name: str - :param job_name: Optional. The id of the job. Generated if not - explicitly passed in. - - :type null_marker: str - :param null_marker: Optional. A custom null marker (example: "\\N") - - :rtype: :class:`~google.cloud.bigquery.jobs.LoadJob` - - :returns: the job instance used to load the data (e.g., for - querying status). Note that the job is already started: - do not call ``job.begin()``. - :raises: :class:`ValueError` if ``size`` is not passed in and can not - be determined, or if the ``file_obj`` can be detected to be - a file opened in text mode. - """ - client = self._require_client(client) - _maybe_rewind(file_obj, rewind=rewind) - _check_mode(file_obj) - metadata = _get_upload_metadata( - source_format, self._schema, self._project, - self._dataset_id, self.table_id) - _configure_job_metadata(metadata, allow_jagged_rows, - allow_quoted_newlines, create_disposition, - encoding, field_delimiter, - ignore_unknown_values, max_bad_records, - quote_character, skip_leading_rows, - write_disposition, job_name, null_marker) - - try: - created_json = self._do_upload( - client, file_obj, metadata, size, num_retries) - return client.job_from_resource(created_json) - except resumable_media.InvalidResponse as exc: - raise exceptions.from_http_response(exc.response) - # pylint: enable=too-many-arguments,too-many-locals - - -def _configure_job_metadata(metadata, # pylint: disable=too-many-arguments - allow_jagged_rows, - allow_quoted_newlines, - create_disposition, - encoding, - field_delimiter, - ignore_unknown_values, - max_bad_records, - quote_character, - skip_leading_rows, - write_disposition, - job_name, - null_marker): - """Helper for :meth:`Table.upload_from_file`.""" - load_config = metadata['configuration']['load'] - - if allow_jagged_rows is not None: - load_config['allowJaggedRows'] = allow_jagged_rows - - if allow_quoted_newlines is not None: - load_config['allowQuotedNewlines'] = allow_quoted_newlines - - if create_disposition is not None: - load_config['createDisposition'] = create_disposition - - if encoding is not None: - load_config['encoding'] = encoding - - if field_delimiter is not None: - load_config['fieldDelimiter'] = field_delimiter - - if ignore_unknown_values is not None: - load_config['ignoreUnknownValues'] = ignore_unknown_values - - if max_bad_records is not None: - load_config['maxBadRecords'] = max_bad_records - - if quote_character is not None: - load_config['quote'] = quote_character - - if skip_leading_rows is not None: - load_config['skipLeadingRows'] = skip_leading_rows - - if write_disposition is not None: - load_config['writeDisposition'] = write_disposition - - if job_name is not None: - load_config['jobReference'] = {'jobId': job_name} - - if null_marker is not None: - load_config['nullMarker'] = null_marker - def _parse_schema_resource(info): """Parse a resource fragment into a schema field. @@ -1222,53 +859,6 @@ def _build_schema_resource(fields): # pylint: enable=unused-argument -def _maybe_rewind(stream, rewind=False): - """Rewind the stream if desired. - - :type stream: IO[bytes] - :param stream: A bytes IO object open for reading. - - :type rewind: bool - :param rewind: Indicates if we should seek to the beginning of the stream. - """ - if rewind: - stream.seek(0, os.SEEK_SET) - - -def _check_mode(stream): - """Check that a stream was opened in read-binary mode. - - :type stream: IO[bytes] - :param stream: A bytes IO object open for reading. - - :raises: :exc:`ValueError` if the ``stream.mode`` is a valid attribute - and is not among ``rb``, ``r+b`` or ``rb+``. - """ - mode = getattr(stream, 'mode', None) - - if mode is not None and mode not in ('rb', 'r+b', 'rb+'): - raise ValueError( - "Cannot upload files opened in text mode: use " - "open(filename, mode='rb') or open(filename, mode='r+b')") - - -def _get_upload_headers(user_agent): - """Get the headers for an upload request. - - :type user_agent: str - :param user_agent: The user-agent for requests. - - :rtype: dict - :returns: The headers to be used for the request. - """ - return { - 'Accept': 'application/json', - 'Accept-Encoding': 'gzip, deflate', - 'User-Agent': user_agent, - 'content-type': 'application/json', - } - - def _get_upload_metadata(source_format, schema, project, dataset_id, table_id): """Get base metadata for creating a table. diff --git a/packages/google-cloud-bigquery/tests/system.py b/packages/google-cloud-bigquery/tests/system.py index ce49e88177e7..1bf00a9b57ef 100644 --- a/packages/google-cloud-bigquery/tests/system.py +++ b/packages/google-cloud-bigquery/tests/system.py @@ -372,7 +372,8 @@ def test_load_table_from_local_file_then_dump_table(self): full_name = bigquery.SchemaField('full_name', 'STRING', mode='REQUIRED') age = bigquery.SchemaField('age', 'INTEGER', mode='REQUIRED') - table_arg = Table(dataset.table(TABLE_NAME), schema=[full_name, age], + table_ref = dataset.table(TABLE_NAME) + table_arg = Table(table_ref, schema=[full_name, age], client=Config.CLIENT) table = retry_403(Config.CLIENT.create_table)(table_arg) self.to_delete.insert(0, table) @@ -384,13 +385,14 @@ def test_load_table_from_local_file_then_dump_table(self): writer.writerows(ROWS) with open(temp.name, 'rb') as csv_read: - job = table.upload_from_file( - csv_read, - source_format='CSV', - skip_leading_rows=1, - create_disposition='CREATE_NEVER', - write_disposition='WRITE_EMPTY', - ) + config = bigquery.LoadJobConfig() + config.source_format = 'CSV' + config.skip_leading_rows = 1 + config.create_disposition = 'CREATE_NEVER' + config.write_disposition = 'WRITE_EMPTY' + config.schema = table.schema + job = Config.CLIENT.load_table_from_file( + csv_read, table_ref, job_config=config) # Retry until done. job.result(timeout=JOB_TIMEOUT) @@ -414,16 +416,16 @@ def test_load_table_from_local_avro_file_then_dump_table(self): ("red", 650)] dataset = self.temp_dataset(_make_dataset_id('load_local_then_dump')) - table = Table(dataset.table(TABLE_NAME), client=Config.CLIENT) + table_ref = dataset.table(TABLE_NAME) + table = Table(table_ref, client=Config.CLIENT) self.to_delete.insert(0, table) with open(os.path.join(WHERE, 'data', 'colors.avro'), 'rb') as avrof: - job = table.upload_from_file( - avrof, - source_format='AVRO', - write_disposition='WRITE_TRUNCATE' - ) - + config = bigquery.LoadJobConfig() + config.source_format = 'AVRO' + config.write_disposition = 'WRITE_TRUNCATE' + job = Config.CLIENT.load_table_from_file( + avrof, table_ref, job_config=config) # Retry until done. job.result(timeout=JOB_TIMEOUT) @@ -889,8 +891,8 @@ def _load_table_for_dml(self, rows, dataset_id, table_id): dataset = self.temp_dataset(dataset_id) greeting = bigquery.SchemaField( 'greeting', 'STRING', mode='NULLABLE') - table_arg = Table(dataset.table(table_id), schema=[greeting], - client=Config.CLIENT) + table_ref = dataset.table(table_id) + table_arg = Table(table_ref, schema=[greeting], client=Config.CLIENT) table = retry_403(Config.CLIENT.create_table)(table_arg) self.to_delete.insert(0, table) @@ -901,13 +903,13 @@ def _load_table_for_dml(self, rows, dataset_id, table_id): writer.writerows(rows) with open(temp.name, 'rb') as csv_read: - job = table.upload_from_file( - csv_read, - source_format='CSV', - skip_leading_rows=1, - create_disposition='CREATE_NEVER', - write_disposition='WRITE_EMPTY', - ) + config = bigquery.LoadJobConfig() + config.source_format = 'CSV' + config.skip_leading_rows = 1 + config.create_disposition = 'CREATE_NEVER' + config.write_disposition = 'WRITE_EMPTY' + job = Config.CLIENT.load_table_from_file( + csv_read, table_ref, job_config=config) # Retry until done. job.result(timeout=JOB_TIMEOUT) diff --git a/packages/google-cloud-bigquery/tests/unit/test_client.py b/packages/google-cloud-bigquery/tests/unit/test_client.py index 4f509da53f1c..f4537f3fba8b 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_client.py +++ b/packages/google-cloud-bigquery/tests/unit/test_client.py @@ -13,10 +13,17 @@ # limitations under the License. import copy +import email +import io +import json import unittest import mock import six +from six.moves import http_client +import pytest + +from google.cloud.bigquery.dataset import DatasetReference def _make_credentials(): @@ -1442,6 +1449,154 @@ def test_load_table_from_storage(self): self.assertEqual(list(job.source_uris), [SOURCE_URI]) self.assertIs(job.destination, destination) + @staticmethod + def _mock_requests_response(status_code, headers, content=b''): + return mock.Mock( + content=content, headers=headers, status_code=status_code, + spec=['content', 'headers', 'status_code']) + + def _mock_transport(self, status_code, headers, content=b''): + fake_transport = mock.Mock(spec=['request']) + fake_response = self._mock_requests_response( + status_code, headers, content=content) + fake_transport.request.return_value = fake_response + return fake_transport + + def _initiate_resumable_upload_helper(self, num_retries=None): + from google.resumable_media.requests import ResumableUpload + from google.cloud.bigquery.client import _DEFAULT_CHUNKSIZE + from google.cloud.bigquery.client import _GENERIC_CONTENT_TYPE + from google.cloud.bigquery.client import _get_upload_headers + from google.cloud.bigquery.job import LoadJob, LoadJobConfig + + PROJECT = 'PROJECT' + DS_ID = 'DATASET_ID' + TABLE_ID = 'TABLE_ID' + + # Create mocks to be checked for doing transport. + resumable_url = 'http://test.invalid?upload_id=hey-you' + response_headers = {'location': resumable_url} + fake_transport = self._mock_transport( + http_client.OK, response_headers) + client = self._make_one(project=PROJECT, _http=fake_transport) + conn = client._connection = _Connection() + table_ref = DatasetReference(PROJECT, DS_ID).table(TABLE_ID) + + # Create some mock arguments and call the method under test. + data = b'goodbye gudbi gootbee' + stream = io.BytesIO(data) + config = LoadJobConfig() + config.source_format = 'CSV' + job = LoadJob(None, None, table_ref, client, job_config=config) + metadata = job._build_resource() + upload, transport = client._initiate_resumable_upload( + stream, metadata, num_retries) + + # Check the returned values. + self.assertIsInstance(upload, ResumableUpload) + upload_url = ( + 'https://www.googleapis.com/upload/bigquery/v2/projects/' + + PROJECT + + '/jobs?uploadType=resumable') + self.assertEqual(upload.upload_url, upload_url) + expected_headers = _get_upload_headers(conn.USER_AGENT) + self.assertEqual(upload._headers, expected_headers) + self.assertFalse(upload.finished) + self.assertEqual(upload._chunk_size, _DEFAULT_CHUNKSIZE) + self.assertIs(upload._stream, stream) + self.assertIsNone(upload._total_bytes) + self.assertEqual(upload._content_type, _GENERIC_CONTENT_TYPE) + self.assertEqual(upload.resumable_url, resumable_url) + + retry_strategy = upload._retry_strategy + self.assertEqual(retry_strategy.max_sleep, 64.0) + if num_retries is None: + self.assertEqual(retry_strategy.max_cumulative_retry, 600.0) + self.assertIsNone(retry_strategy.max_retries) + else: + self.assertIsNone(retry_strategy.max_cumulative_retry) + self.assertEqual(retry_strategy.max_retries, num_retries) + self.assertIs(transport, fake_transport) + # Make sure we never read from the stream. + self.assertEqual(stream.tell(), 0) + + # Check the mocks. + request_headers = expected_headers.copy() + request_headers['x-upload-content-type'] = _GENERIC_CONTENT_TYPE + fake_transport.request.assert_called_once_with( + 'POST', + upload_url, + data=json.dumps(metadata).encode('utf-8'), + headers=request_headers, + ) + + def test__initiate_resumable_upload(self): + self._initiate_resumable_upload_helper() + + def test__initiate_resumable_upload_with_retry(self): + self._initiate_resumable_upload_helper(num_retries=11) + + def _do_multipart_upload_success_helper( + self, get_boundary, num_retries=None): + from google.cloud.bigquery.client import _get_upload_headers + from google.cloud.bigquery.job import LoadJob, LoadJobConfig + + PROJECT = 'PROJECT' + DS_ID = 'DATASET_ID' + TABLE_ID = 'TABLE_ID' + + fake_transport = self._mock_transport(http_client.OK, {}) + client = self._make_one(project=PROJECT, _http=fake_transport) + conn = client._connection = _Connection() + table_ref = DatasetReference(PROJECT, DS_ID).table(TABLE_ID) + + # Create some mock arguments. + data = b'Bzzzz-zap \x00\x01\xf4' + stream = io.BytesIO(data) + config = LoadJobConfig() + config.source_format = 'CSV' + job = LoadJob(None, None, table_ref, client, job_config=config) + metadata = job._build_resource() + size = len(data) + response = client._do_multipart_upload( + stream, metadata, size, num_retries) + + # Check the mocks and the returned value. + self.assertIs(response, fake_transport.request.return_value) + self.assertEqual(stream.tell(), size) + get_boundary.assert_called_once_with() + + upload_url = ( + 'https://www.googleapis.com/upload/bigquery/v2/projects/' + + PROJECT + + '/jobs?uploadType=multipart') + payload = ( + b'--==0==\r\n' + + b'content-type: application/json; charset=UTF-8\r\n\r\n' + + json.dumps(metadata).encode('utf-8') + b'\r\n' + + b'--==0==\r\n' + + b'content-type: */*\r\n\r\n' + + data + b'\r\n' + + b'--==0==--') + headers = _get_upload_headers(conn.USER_AGENT) + headers['content-type'] = b'multipart/related; boundary="==0=="' + fake_transport.request.assert_called_once_with( + 'POST', + upload_url, + data=payload, + headers=headers, + ) + + @mock.patch(u'google.resumable_media._upload.get_boundary', + return_value=b'==0==') + def test__do_multipart_upload(self, get_boundary): + self._do_multipart_upload_success_helper(get_boundary) + + @mock.patch(u'google.resumable_media._upload.get_boundary', + return_value=b'==0==') + def test__do_multipart_upload_with_retry(self, get_boundary): + self._do_multipart_upload_success_helper(get_boundary, num_retries=8) + def test_copy_table(self): from google.cloud.bigquery.job import CopyJob @@ -2180,8 +2335,328 @@ def test_list_rows_errors(self): client.list_rows(1) +class TestClientUpload(object): + # NOTE: This is a "partner" to `TestClient` meant to test some of the + # "load_table_from_file" portions of `Client`. It also uses + # `pytest`-style tests rather than `unittest`-style. + + TABLE_REF = DatasetReference( + 'project_id', 'test_dataset').table('test_table') + + @staticmethod + def _make_client(transport=None): + from google.cloud.bigquery import _http + from google.cloud.bigquery import client + + cl = client.Client(project='project_id', + credentials=_make_credentials(), + _http=transport) + cl._connection = mock.create_autospec(_http.Connection, instance=True) + return cl + + @staticmethod + def _make_response(status_code, content='', headers={}): + """Make a mock HTTP response.""" + import requests + response = requests.Response() + response.request = requests.Request( + 'POST', 'http://example.com').prepare() + response._content = content.encode('utf-8') + response.headers.update(headers) + response.status_code = status_code + return response + + @classmethod + def _make_do_upload_patch(cls, client, method, + resource={}, side_effect=None): + """Patches the low-level upload helpers.""" + if side_effect is None: + side_effect = [cls._make_response( + http_client.OK, + json.dumps(resource), + {'Content-Type': 'application/json'})] + return mock.patch.object( + client, method, side_effect=side_effect, autospec=True) + + EXPECTED_CONFIGURATION = { + 'jobReference': {'projectId': 'project_id', 'jobId': 'job_id'}, + 'configuration': { + 'load': { + 'sourceFormat': 'CSV', + 'destinationTable': { + 'projectId': 'project_id', + 'datasetId': 'test_dataset', + 'tableId': 'test_table' + } + } + } + } + + @staticmethod + def _make_file_obj(): + return io.BytesIO(b'hello, is it me you\'re looking for?') + + @staticmethod + def _make_config(): + from google.cloud.bigquery.job import LoadJobConfig + + config = LoadJobConfig() + config.source_format = 'CSV' + return config + + # High-level tests + + def test_load_table_from_file_resumable(self): + from google.cloud.bigquery.client import _DEFAULT_NUM_RETRIES + + client = self._make_client() + file_obj = self._make_file_obj() + + do_upload_patch = self._make_do_upload_patch( + client, '_do_resumable_upload', self.EXPECTED_CONFIGURATION) + with do_upload_patch as do_upload: + client.load_table_from_file(file_obj, self.TABLE_REF, + job_id='job_id', + job_config=self._make_config()) + + do_upload.assert_called_once_with( + file_obj, + self.EXPECTED_CONFIGURATION, + _DEFAULT_NUM_RETRIES) + + def test_load_table_from_file_resumable_metadata(self): + from google.cloud.bigquery.client import _DEFAULT_NUM_RETRIES + + client = self._make_client() + file_obj = self._make_file_obj() + + config = self._make_config() + config.allow_jagged_rows = False + config.allow_quoted_newlines = False + config.create_disposition = 'CREATE_IF_NEEDED' + config.encoding = 'utf8' + config.field_delimiter = ',' + config.ignore_unknown_values = False + config.max_bad_records = 0 + config.quote_character = '"' + config.skip_leading_rows = 1 + config.write_disposition = 'WRITE_APPEND' + config.null_marker = r'\N' + + expected_config = { + 'jobReference': {'projectId': 'project_id', 'jobId': 'job_id'}, + 'configuration': { + 'load': { + 'destinationTable': { + 'projectId': self.TABLE_REF.project, + 'datasetId': self.TABLE_REF.dataset_id, + 'tableId': self.TABLE_REF.table_id, + }, + 'sourceFormat': config.source_format, + 'allowJaggedRows': config.allow_jagged_rows, + 'allowQuotedNewlines': config.allow_quoted_newlines, + 'createDisposition': config.create_disposition, + 'encoding': config.encoding, + 'fieldDelimiter': config.field_delimiter, + 'ignoreUnknownValues': config.ignore_unknown_values, + 'maxBadRecords': config.max_bad_records, + 'quote': config.quote_character, + 'skipLeadingRows': str(config.skip_leading_rows), + 'writeDisposition': config.write_disposition, + 'nullMarker': config.null_marker, + }, + }, + } + + do_upload_patch = self._make_do_upload_patch( + client, '_do_resumable_upload', expected_config) + with do_upload_patch as do_upload: + client.load_table_from_file( + file_obj, self.TABLE_REF, job_id='job_id', job_config=config) + + do_upload.assert_called_once_with( + file_obj, + expected_config, + _DEFAULT_NUM_RETRIES) + + def test_load_table_from_file_multipart(self): + from google.cloud.bigquery.client import _DEFAULT_NUM_RETRIES + + client = self._make_client() + file_obj = self._make_file_obj() + file_obj_size = 10 + config = self._make_config() + + do_upload_patch = self._make_do_upload_patch( + client, '_do_multipart_upload', self.EXPECTED_CONFIGURATION) + with do_upload_patch as do_upload: + client.load_table_from_file( + file_obj, self.TABLE_REF, job_id='job_id', job_config=config, + size=file_obj_size) + + do_upload.assert_called_once_with( + file_obj, + self.EXPECTED_CONFIGURATION, + file_obj_size, + _DEFAULT_NUM_RETRIES) + + def test_load_table_from_file_with_retries(self): + client = self._make_client() + file_obj = self._make_file_obj() + num_retries = 20 + + do_upload_patch = self._make_do_upload_patch( + client, '_do_resumable_upload', self.EXPECTED_CONFIGURATION) + with do_upload_patch as do_upload: + client.load_table_from_file( + file_obj, self.TABLE_REF, num_retries=num_retries, + job_id='job_id', job_config=self._make_config()) + + do_upload.assert_called_once_with( + file_obj, + self.EXPECTED_CONFIGURATION, + num_retries) + + def test_load_table_from_file_with_rewind(self): + client = self._make_client() + file_obj = self._make_file_obj() + file_obj.seek(2) + + with self._make_do_upload_patch( + client, '_do_resumable_upload', self.EXPECTED_CONFIGURATION): + client.load_table_from_file( + file_obj, self.TABLE_REF, rewind=True) + + assert file_obj.tell() == 0 + + def test_load_table_from_file_failure(self): + from google.resumable_media import InvalidResponse + from google.cloud import exceptions + + client = self._make_client() + file_obj = self._make_file_obj() + + response = self._make_response( + content='Someone is already in this spot.', + status_code=http_client.CONFLICT) + + do_upload_patch = self._make_do_upload_patch( + client, '_do_resumable_upload', + side_effect=InvalidResponse(response)) + + with do_upload_patch, pytest.raises(exceptions.Conflict) as exc_info: + client.load_table_from_file( + file_obj, self.TABLE_REF, rewind=True) + + assert response.text in exc_info.value.message + assert exc_info.value.errors == [] + + def test_load_table_from_file_bad_mode(self): + client = self._make_client() + file_obj = mock.Mock(spec=['mode']) + file_obj.mode = 'x' + + with pytest.raises(ValueError): + client.load_table_from_file(file_obj, self.TABLE_REF) + + # Low-level tests + + @classmethod + def _make_resumable_upload_responses(cls, size): + """Make a series of responses for a successful resumable upload.""" + from google import resumable_media + + resumable_url = 'http://test.invalid?upload_id=and-then-there-was-1' + initial_response = cls._make_response( + http_client.OK, '', {'location': resumable_url}) + data_response = cls._make_response( + resumable_media.PERMANENT_REDIRECT, + '', {'range': 'bytes=0-{:d}'.format(size - 1)}) + final_response = cls._make_response( + http_client.OK, + json.dumps({'size': size}), + {'Content-Type': 'application/json'}) + return [initial_response, data_response, final_response] + + @staticmethod + def _make_transport(responses=None): + import google.auth.transport.requests + + transport = mock.create_autospec( + google.auth.transport.requests.AuthorizedSession, instance=True) + transport.request.side_effect = responses + return transport + + def test__do_resumable_upload(self): + file_obj = self._make_file_obj() + file_obj_len = len(file_obj.getvalue()) + transport = self._make_transport( + self._make_resumable_upload_responses(file_obj_len)) + client = self._make_client(transport) + + result = client._do_resumable_upload( + file_obj, + self.EXPECTED_CONFIGURATION, + None) + + content = result.content.decode('utf-8') + assert json.loads(content) == {'size': file_obj_len} + + # Verify that configuration data was passed in with the initial + # request. + transport.request.assert_any_call( + 'POST', + mock.ANY, + data=json.dumps(self.EXPECTED_CONFIGURATION).encode('utf-8'), + headers=mock.ANY) + + def test__do_multipart_upload(self): + transport = self._make_transport([self._make_response(http_client.OK)]) + client = self._make_client(transport) + file_obj = self._make_file_obj() + file_obj_len = len(file_obj.getvalue()) + + client._do_multipart_upload( + file_obj, + self.EXPECTED_CONFIGURATION, + file_obj_len, + None) + + # Verify that configuration data was passed in with the initial + # request. + request_args = transport.request.mock_calls[0][2] + request_data = request_args['data'].decode('utf-8') + request_headers = request_args['headers'] + + request_content = email.message_from_string( + 'Content-Type: {}\r\n{}'.format( + request_headers['content-type'].decode('utf-8'), + request_data)) + + # There should be two payloads: the configuration and the binary daya. + configuration_data = request_content.get_payload(0).get_payload() + binary_data = request_content.get_payload(1).get_payload() + + assert json.loads(configuration_data) == self.EXPECTED_CONFIGURATION + assert binary_data.encode('utf-8') == file_obj.getvalue() + + def test__do_multipart_upload_wrong_size(self): + client = self._make_client() + file_obj = self._make_file_obj() + file_obj_len = len(file_obj.getvalue()) + + with pytest.raises(ValueError): + client._do_multipart_upload( + file_obj, + {}, + file_obj_len+1, + None) + + class _Connection(object): + USER_AGENT = 'testing 1.2.3' + def __init__(self, *responses): self._responses = responses self._requested = [] diff --git a/packages/google-cloud-bigquery/tests/unit/test_table.py b/packages/google-cloud-bigquery/tests/unit/test_table.py index 6e00bd73c9c6..9661a449c4fb 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_table.py +++ b/packages/google-cloud-bigquery/tests/unit/test_table.py @@ -12,14 +12,9 @@ # See the License for the specific language governing permissions and # limitations under the License. -import email -import io -import json import unittest import mock -from six.moves import http_client -import pytest from google.cloud.bigquery.dataset import DatasetReference @@ -1029,498 +1024,6 @@ def _row_data(row): self.assertEqual(req['path'], '/%s' % PATH) self.assertEqual(req['data'], SENT) - def test__populate_view_use_legacy_sql_resource_w_existing_view(self): - query = 'select * from foo' - resource = {'view': {'query': query}} - client = mock.Mock(spec=[u'_credentials', '_http']) - client._http = mock.sentinel.http - dataset = DatasetReference(self.PROJECT, self.DS_ID) - table = self._make_one(dataset.table(self.TABLE_NAME), client=client) - table.view_use_legacy_sql = True - - table._populate_view_use_legacy_sql_resource(resource) - - self.assertEqual( - resource['view']['useLegacySql'], table.view_use_legacy_sql) - self.assertEqual(resource['view']['query'], query) - - def test__get_transport(self): - client = mock.Mock(spec=[u'_credentials', '_http']) - client._http = mock.sentinel.http - dataset = DatasetReference(self.PROJECT, self.DS_ID) - table_ref = dataset.table(self.TABLE_NAME) - table = self._make_one(table_ref, client=client) - - transport = table._get_transport(client) - - self.assertIs(transport, mock.sentinel.http) - - @staticmethod - def _mock_requests_response(status_code, headers, content=b''): - return mock.Mock( - content=content, headers=headers, status_code=status_code, - spec=['content', 'headers', 'status_code']) - - def _mock_transport(self, status_code, headers, content=b''): - fake_transport = mock.Mock(spec=['request']) - fake_response = self._mock_requests_response( - status_code, headers, content=content) - fake_transport.request.return_value = fake_response - return fake_transport - - def _initiate_resumable_upload_helper(self, num_retries=None): - from google.resumable_media.requests import ResumableUpload - from google.cloud.bigquery.table import _DEFAULT_CHUNKSIZE - from google.cloud.bigquery.table import _GENERIC_CONTENT_TYPE - from google.cloud.bigquery.table import _get_upload_headers - from google.cloud.bigquery.table import _get_upload_metadata - - connection = _Connection() - client = _Client(self.PROJECT, connection=connection) - dataset = DatasetReference(self.PROJECT, self.DS_ID) - table_ref = dataset.table(self.TABLE_NAME) - table = self._make_one(table_ref, client=client) - - # Create mocks to be checked for doing transport. - resumable_url = 'http://test.invalid?upload_id=hey-you' - response_headers = {'location': resumable_url} - fake_transport = self._mock_transport( - http_client.OK, response_headers) - client._http = fake_transport - - # Create some mock arguments and call the method under test. - data = b'goodbye gudbi gootbee' - stream = io.BytesIO(data) - metadata = _get_upload_metadata( - 'CSV', table._schema, table.project, - table.dataset_id, table.table_id) - upload, transport = table._initiate_resumable_upload( - client, stream, metadata, num_retries) - - # Check the returned values. - self.assertIsInstance(upload, ResumableUpload) - upload_url = ( - 'https://www.googleapis.com/upload/bigquery/v2/projects/' + - self.PROJECT + - '/jobs?uploadType=resumable') - self.assertEqual(upload.upload_url, upload_url) - expected_headers = _get_upload_headers(connection.USER_AGENT) - self.assertEqual(upload._headers, expected_headers) - self.assertFalse(upload.finished) - self.assertEqual(upload._chunk_size, _DEFAULT_CHUNKSIZE) - self.assertIs(upload._stream, stream) - self.assertIsNone(upload._total_bytes) - self.assertEqual(upload._content_type, _GENERIC_CONTENT_TYPE) - self.assertEqual(upload.resumable_url, resumable_url) - - retry_strategy = upload._retry_strategy - self.assertEqual(retry_strategy.max_sleep, 64.0) - if num_retries is None: - self.assertEqual(retry_strategy.max_cumulative_retry, 600.0) - self.assertIsNone(retry_strategy.max_retries) - else: - self.assertIsNone(retry_strategy.max_cumulative_retry) - self.assertEqual(retry_strategy.max_retries, num_retries) - self.assertIs(transport, fake_transport) - # Make sure we never read from the stream. - self.assertEqual(stream.tell(), 0) - - # Check the mocks. - request_headers = expected_headers.copy() - request_headers['x-upload-content-type'] = _GENERIC_CONTENT_TYPE - fake_transport.request.assert_called_once_with( - 'POST', - upload_url, - data=json.dumps(metadata).encode('utf-8'), - headers=request_headers, - ) - - def test__initiate_resumable_upload(self): - self._initiate_resumable_upload_helper() - - def test__initiate_resumable_upload_with_retry(self): - self._initiate_resumable_upload_helper(num_retries=11) - - def _do_multipart_upload_success_helper( - self, get_boundary, num_retries=None): - from google.cloud.bigquery.table import _get_upload_headers - from google.cloud.bigquery.table import _get_upload_metadata - - connection = _Connection() - client = _Client(self.PROJECT, connection=connection) - dataset = DatasetReference(self.PROJECT, self.DS_ID) - table_ref = dataset.table(self.TABLE_NAME) - table = self._make_one(table_ref, client=client) - - # Create mocks to be checked for doing transport. - fake_transport = self._mock_transport(http_client.OK, {}) - client._http = fake_transport - - # Create some mock arguments. - data = b'Bzzzz-zap \x00\x01\xf4' - stream = io.BytesIO(data) - metadata = _get_upload_metadata( - 'CSV', table._schema, table.project, - table.dataset_id, table.table_id) - size = len(data) - response = table._do_multipart_upload( - client, stream, metadata, size, num_retries) - - # Check the mocks and the returned value. - self.assertIs(response, fake_transport.request.return_value) - self.assertEqual(stream.tell(), size) - get_boundary.assert_called_once_with() - - upload_url = ( - 'https://www.googleapis.com/upload/bigquery/v2/projects/' + - self.PROJECT + - '/jobs?uploadType=multipart') - payload = ( - b'--==0==\r\n' + - b'content-type: application/json; charset=UTF-8\r\n\r\n' + - json.dumps(metadata).encode('utf-8') + b'\r\n' + - b'--==0==\r\n' + - b'content-type: */*\r\n\r\n' + - data + b'\r\n' + - b'--==0==--') - headers = _get_upload_headers(connection.USER_AGENT) - headers['content-type'] = b'multipart/related; boundary="==0=="' - fake_transport.request.assert_called_once_with( - 'POST', - upload_url, - data=payload, - headers=headers, - ) - - @mock.patch(u'google.resumable_media._upload.get_boundary', - return_value=b'==0==') - def test__do_multipart_upload(self, get_boundary): - self._do_multipart_upload_success_helper(get_boundary) - - @mock.patch(u'google.resumable_media._upload.get_boundary', - return_value=b'==0==') - def test__do_multipart_upload_with_retry(self, get_boundary): - self._do_multipart_upload_success_helper(get_boundary, num_retries=8) - - -class TestTableUpload(object): - # NOTE: This is a "partner" to `TestTable` meant to test some of the - # "upload" portions of `Table`. It also uses `pytest`-style tests - # rather than `unittest`-style. - - @staticmethod - def _make_table(transport=None): - from google.cloud.bigquery import _http - from google.cloud.bigquery import client - from google.cloud.bigquery import dataset - from google.cloud.bigquery import table - - connection = mock.create_autospec(_http.Connection, instance=True) - client = mock.create_autospec(client.Client, instance=True) - client._connection = connection - client._credentials = mock.sentinel.credentials - client._http = transport - client.project = 'project_id' - - dataset_ref = dataset.DatasetReference('project_id', 'test_dataset') - table_ref = dataset_ref.table('test_table') - table = table.Table(table_ref, client=client) - - return table - - @staticmethod - def _make_response(status_code, content='', headers={}): - """Make a mock HTTP response.""" - import requests - response = requests.Response() - response.request = requests.Request( - 'POST', 'http://example.com').prepare() - response._content = content.encode('utf-8') - response.headers.update(headers) - response.status_code = status_code - return response - - @classmethod - def _make_do_upload_patch(cls, table, method, side_effect=None): - """Patches the low-level upload helpers.""" - if side_effect is None: - side_effect = [cls._make_response( - http_client.OK, - json.dumps({}), - {'Content-Type': 'application/json'})] - return mock.patch.object( - table, method, side_effect=side_effect, autospec=True) - - EXPECTED_CONFIGURATION = { - 'configuration': { - 'load': { - 'sourceFormat': 'CSV', - 'destinationTable': { - 'projectId': 'project_id', - 'datasetId': 'test_dataset', - 'tableId': 'test_table' - } - } - } - } - - @staticmethod - def _make_file_obj(): - return io.BytesIO(b'hello, is it me you\'re looking for?') - - # High-level tests - - def test_upload_from_file_resumable(self): - import google.cloud.bigquery.table - - table = self._make_table() - file_obj = self._make_file_obj() - - do_upload_patch = self._make_do_upload_patch( - table, '_do_resumable_upload') - with do_upload_patch as do_upload: - table.upload_from_file(file_obj, source_format='CSV') - - do_upload.assert_called_once_with( - table._client, - file_obj, - self.EXPECTED_CONFIGURATION, - google.cloud.bigquery.table._DEFAULT_NUM_RETRIES) - - def test_upload_file_resumable_metadata(self): - table = self._make_table() - file_obj = self._make_file_obj() - - config_args = { - 'source_format': 'CSV', - 'allow_jagged_rows': False, - 'allow_quoted_newlines': False, - 'create_disposition': 'CREATE_IF_NEEDED', - 'encoding': 'utf8', - 'field_delimiter': ',', - 'ignore_unknown_values': False, - 'max_bad_records': 0, - 'quote_character': '"', - 'skip_leading_rows': 1, - 'write_disposition': 'WRITE_APPEND', - 'job_name': 'oddjob', - 'null_marker': r'\N', - } - - expected_config = { - 'configuration': { - 'load': { - 'sourceFormat': config_args['source_format'], - 'destinationTable': { - 'projectId': table.project, - 'datasetId': table.dataset_id, - 'tableId': table.table_id, - }, - 'allowJaggedRows': config_args['allow_jagged_rows'], - 'allowQuotedNewlines': - config_args['allow_quoted_newlines'], - 'createDisposition': config_args['create_disposition'], - 'encoding': config_args['encoding'], - 'fieldDelimiter': config_args['field_delimiter'], - 'ignoreUnknownValues': - config_args['ignore_unknown_values'], - 'maxBadRecords': config_args['max_bad_records'], - 'quote': config_args['quote_character'], - 'skipLeadingRows': config_args['skip_leading_rows'], - 'writeDisposition': config_args['write_disposition'], - 'jobReference': {'jobId': config_args['job_name']}, - 'nullMarker': config_args['null_marker'], - }, - }, - } - - do_upload_patch = self._make_do_upload_patch( - table, '_do_resumable_upload') - with do_upload_patch as do_upload: - table.upload_from_file( - file_obj, **config_args) - - do_upload.assert_called_once_with( - table._client, - file_obj, - expected_config, - mock.ANY) - - def test_upload_from_file_multipart(self): - import google.cloud.bigquery.table - - table = self._make_table() - file_obj = self._make_file_obj() - file_obj_size = 10 - - do_upload_patch = self._make_do_upload_patch( - table, '_do_multipart_upload') - with do_upload_patch as do_upload: - table.upload_from_file( - file_obj, source_format='CSV', size=file_obj_size) - - do_upload.assert_called_once_with( - table._client, - file_obj, - self.EXPECTED_CONFIGURATION, - file_obj_size, - google.cloud.bigquery.table._DEFAULT_NUM_RETRIES) - - def test_upload_from_file_with_retries(self): - table = self._make_table() - file_obj = self._make_file_obj() - num_retries = 20 - - do_upload_patch = self._make_do_upload_patch( - table, '_do_resumable_upload') - with do_upload_patch as do_upload: - table.upload_from_file( - file_obj, source_format='CSV', num_retries=num_retries) - - do_upload.assert_called_once_with( - table._client, - file_obj, - self.EXPECTED_CONFIGURATION, - num_retries) - - def test_upload_from_file_with_rewind(self): - table = self._make_table() - file_obj = self._make_file_obj() - file_obj.seek(2) - - with self._make_do_upload_patch(table, '_do_resumable_upload'): - table.upload_from_file( - file_obj, source_format='CSV', rewind=True) - - assert file_obj.tell() == 0 - - def test_upload_from_file_failure(self): - from google.resumable_media import InvalidResponse - from google.cloud import exceptions - - table = self._make_table() - file_obj = self._make_file_obj() - - response = self._make_response( - content='Someone is already in this spot.', - status_code=http_client.CONFLICT) - - do_upload_patch = self._make_do_upload_patch( - table, '_do_resumable_upload', - side_effect=InvalidResponse(response)) - - with do_upload_patch, pytest.raises(exceptions.Conflict) as exc_info: - table.upload_from_file( - file_obj, source_format='CSV', rewind=True) - - assert response.text in exc_info.value.message - assert exc_info.value.errors == [] - - def test_upload_from_file_bad_mode(self): - table = self._make_table() - file_obj = mock.Mock(spec=['mode']) - file_obj.mode = 'x' - - with pytest.raises(ValueError): - table.upload_from_file( - file_obj, source_format='CSV',) - - # Low-level tests - - @classmethod - def _make_resumable_upload_responses(cls, size): - """Make a series of responses for a successful resumable upload.""" - from google import resumable_media - - resumable_url = 'http://test.invalid?upload_id=and-then-there-was-1' - initial_response = cls._make_response( - http_client.OK, '', {'location': resumable_url}) - data_response = cls._make_response( - resumable_media.PERMANENT_REDIRECT, - '', {'range': 'bytes=0-{:d}'.format(size - 1)}) - final_response = cls._make_response( - http_client.OK, - json.dumps({'size': size}), - {'Content-Type': 'application/json'}) - return [initial_response, data_response, final_response] - - @staticmethod - def _make_transport(responses=None): - import google.auth.transport.requests - - transport = mock.create_autospec( - google.auth.transport.requests.AuthorizedSession, instance=True) - transport.request.side_effect = responses - return transport - - def test__do_resumable_upload(self): - file_obj = self._make_file_obj() - file_obj_len = len(file_obj.getvalue()) - transport = self._make_transport( - self._make_resumable_upload_responses(file_obj_len)) - table = self._make_table(transport) - - result = table._do_resumable_upload( - table._client, - file_obj, - self.EXPECTED_CONFIGURATION, - None) - - content = result.content.decode('utf-8') - assert json.loads(content) == {'size': file_obj_len} - - # Verify that configuration data was passed in with the initial - # request. - transport.request.assert_any_call( - 'POST', - mock.ANY, - data=json.dumps(self.EXPECTED_CONFIGURATION).encode('utf-8'), - headers=mock.ANY) - - def test__do_multipart_upload(self): - transport = self._make_transport([self._make_response(http_client.OK)]) - table = self._make_table(transport) - file_obj = self._make_file_obj() - file_obj_len = len(file_obj.getvalue()) - - table._do_multipart_upload( - table._client, - file_obj, - self.EXPECTED_CONFIGURATION, - file_obj_len, - None) - - # Verify that configuration data was passed in with the initial - # request. - request_args = transport.request.mock_calls[0][2] - request_data = request_args['data'].decode('utf-8') - request_headers = request_args['headers'] - - request_content = email.message_from_string( - 'Content-Type: {}\r\n{}'.format( - request_headers['content-type'].decode('utf-8'), - request_data)) - - # There should be two payloads: the configuration and the binary daya. - configuration_data = request_content.get_payload(0).get_payload() - binary_data = request_content.get_payload(1).get_payload() - - assert json.loads(configuration_data) == self.EXPECTED_CONFIGURATION - assert binary_data.encode('utf-8') == file_obj.getvalue() - - def test__do_multipart_upload_wrong_size(self): - table = self._make_table() - file_obj = self._make_file_obj() - file_obj_len = len(file_obj.getvalue()) - - with pytest.raises(ValueError): - table._do_multipart_upload( - table._client, - file_obj, - {}, - file_obj_len+1, - None) - class Test_parse_schema_resource(unittest.TestCase, _SchemaBase): From b5701b93e766e1693f43ac51e412131d1807bde1 Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Mon, 9 Oct 2017 14:20:03 -0700 Subject: [PATCH 0225/2016] BQ: Pass selected_fields as a string to tabledata.list. (#4143) BigQuery was only returning the first column when passing in a list instead of a comma-separated string. --- .../google/cloud/bigquery/client.py | 3 ++- .../google-cloud-bigquery/tests/system.py | 22 +++++++++++++++++-- 2 files changed, 22 insertions(+), 3 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py index ce41824996bd..756e2bb7d41d 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py @@ -924,7 +924,8 @@ def list_rows(self, table, selected_fields=None, max_results=None, params = {} if selected_fields is not None: - params['selectedFields'] = [f.name for f in selected_fields] + params['selectedFields'] = ','.join( + [f.name for f in selected_fields]) if start_index is not None: params['startIndex'] = start_index diff --git a/packages/google-cloud-bigquery/tests/system.py b/packages/google-cloud-bigquery/tests/system.py index 1bf00a9b57ef..e59747578bb3 100644 --- a/packages/google-cloud-bigquery/tests/system.py +++ b/packages/google-cloud-bigquery/tests/system.py @@ -312,8 +312,9 @@ def test_update_table_schema(self): self.assertEqual(found.mode, expected.mode) @staticmethod - def _fetch_single_page(table): - iterator = Config.CLIENT.list_rows(table) + def _fetch_single_page(table, selected_fields=None): + iterator = Config.CLIENT.list_rows( + table, selected_fields=selected_fields) page = six.next(iterator.pages) return list(page) @@ -1236,6 +1237,23 @@ def test_dump_table_w_public_data(self): table = Config.CLIENT.get_table(table_ref) self._fetch_single_page(table) + def test_dump_table_w_public_data_selected_fields(self): + PUBLIC = 'bigquery-public-data' + DATASET_ID = 'samples' + TABLE_NAME = 'natality' + selected_fields = [ + bigquery.SchemaField('year', 'INTEGER', mode='NULLABLE'), + bigquery.SchemaField('month', 'INTEGER', mode='NULLABLE'), + bigquery.SchemaField('day', 'INTEGER', mode='NULLABLE'), + ] + table_ref = DatasetReference(PUBLIC, DATASET_ID).table(TABLE_NAME) + + rows = self._fetch_single_page( + table_ref, selected_fields=selected_fields) + + self.assertGreater(len(rows), 0) + self.assertEqual(len(rows[0]), 3) + def test_large_query_w_public_data(self): PUBLIC = 'bigquery-public-data' DATASET_ID = 'samples' From 1975e3fd02b1f2da0538248f4c5692613b5cdb33 Mon Sep 17 00:00:00 2001 From: Jonathan Amsterdam Date: Mon, 9 Oct 2017 19:25:04 -0400 Subject: [PATCH 0226/2016] bigquery: remove Table.exists (#4145) --- .../google/cloud/bigquery/table.py | 25 ---------- .../google-cloud-bigquery/tests/system.py | 27 +++++++---- .../tests/unit/test_table.py | 48 +------------------ 3 files changed, 20 insertions(+), 80 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py index 8f56dffd18bf..3464ddacaf91 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py @@ -20,7 +20,6 @@ import six -from google.cloud import exceptions from google.cloud._helpers import _datetime_from_microseconds from google.cloud._helpers import _millis_from_datetime from google.cloud.bigquery.schema import SchemaField @@ -660,30 +659,6 @@ def _build_resource(self, filter_fields): resource[api_field] = getattr(self, f) return resource - def exists(self, client=None): - """API call: test for the existence of the table via a GET request - - See - https://cloud.google.com/bigquery/docs/reference/rest/v2/tables/get - - :type client: :class:`~google.cloud.bigquery.client.Client` or - ``NoneType`` - :param client: the client to use. If not passed, falls back to the - ``client`` stored on the current dataset. - - :rtype: bool - :returns: Boolean indicating existence of the table. - """ - client = self._require_client(client) - - try: - client._connection.api_request(method='GET', path=self.path, - query_params={'fields': 'id'}) - except exceptions.NotFound: - return False - else: - return True - def row_from_mapping(self, mapping): """Convert a mapping to a row tuple using the schema. diff --git a/packages/google-cloud-bigquery/tests/system.py b/packages/google-cloud-bigquery/tests/system.py index e59747578bb3..9ef835a9bb2a 100644 --- a/packages/google-cloud-bigquery/tests/system.py +++ b/packages/google-cloud-bigquery/tests/system.py @@ -192,12 +192,12 @@ def test_create_table(self): age = bigquery.SchemaField('age', 'INTEGER', mode='REQUIRED') table_arg = Table(dataset.table(table_id), schema=[full_name, age], client=Config.CLIENT) - self.assertFalse(table_arg.exists()) + self.assertFalse(_table_exists(table_arg)) table = retry_403(Config.CLIENT.create_table)(table_arg) self.to_delete.insert(0, table) - self.assertTrue(table.exists()) + self.assertTrue(_table_exists(table)) self.assertEqual(table.table_id, table_id) def test_get_table_w_public_dataset(self): @@ -259,10 +259,10 @@ def test_update_table(self): ] table_arg = Table(dataset.table(TABLE_NAME), schema=schema, client=Config.CLIENT) - self.assertFalse(table_arg.exists()) + self.assertFalse(_table_exists(table_arg)) table = retry_403(Config.CLIENT.create_table)(table_arg) self.to_delete.insert(0, table) - self.assertTrue(table.exists()) + self.assertTrue(_table_exists(table)) self.assertIsNone(table.friendly_name) self.assertIsNone(table.description) table.friendly_name = 'Friendly' @@ -294,10 +294,10 @@ def test_update_table_schema(self): ] table_arg = Table(dataset.table(TABLE_NAME), schema=schema, client=Config.CLIENT) - self.assertFalse(table_arg.exists()) + self.assertFalse(_table_exists(table_arg)) table = retry_403(Config.CLIENT.create_table)(table_arg) self.to_delete.insert(0, table) - self.assertTrue(table.exists()) + self.assertTrue(_table_exists(table)) voter = bigquery.SchemaField('voter', 'BOOLEAN', mode='NULLABLE') schema = table.schema schema.append(voter) @@ -338,10 +338,10 @@ def test_insert_data_then_dump_table(self): now = bigquery.SchemaField('now', 'TIMESTAMP') table_arg = Table(dataset.table(TABLE_NAME), schema=[full_name, age, now], client=Config.CLIENT) - self.assertFalse(table_arg.exists()) + self.assertFalse(_table_exists(table_arg)) table = retry_403(Config.CLIENT.create_table)(table_arg) self.to_delete.insert(0, table) - self.assertTrue(table.exists()) + self.assertTrue(_table_exists(table)) errors = table.insert_data(ROWS, ROW_IDS) self.assertEqual(len(errors), 0) @@ -1317,7 +1317,7 @@ def test_create_table_insert_fetch_nested_schema(self): client=Config.CLIENT) table = retry_403(Config.CLIENT.create_table)(table_arg) self.to_delete.insert(0, table) - self.assertTrue(table.exists()) + self.assertTrue(_table_exists(table)) self.assertEqual(table.table_id, table_name) to_insert = [] @@ -1386,3 +1386,12 @@ def _dataset_exists(ds): return True except NotFound: return False + + +def _table_exists(t): + try: + tr = DatasetReference(t.project, t.dataset_id).table(t.table_id) + Config.CLIENT.get_table(tr) + return True + except NotFound: + return False diff --git a/packages/google-cloud-bigquery/tests/unit/test_table.py b/packages/google-cloud-bigquery/tests/unit/test_table.py index 9661a449c4fb..951042020748 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_table.py +++ b/packages/google-cloud-bigquery/tests/unit/test_table.py @@ -708,43 +708,6 @@ def test_list_partitions(self): client=client) self.assertEqual(table.list_partitions(), [20160804, 20160805]) - def test_exists_miss_w_bound_client(self): - PATH = 'projects/%s/datasets/%s/tables/%s' % ( - self.PROJECT, self.DS_ID, self.TABLE_NAME) - conn = _Connection() - client = _Client(project=self.PROJECT, connection=conn) - dataset = DatasetReference(self.PROJECT, self.DS_ID) - table_ref = dataset.table(self.TABLE_NAME) - table = self._make_one(table_ref, client=client) - - self.assertFalse(table.exists()) - - self.assertEqual(len(conn._requested), 1) - req = conn._requested[0] - self.assertEqual(req['method'], 'GET') - self.assertEqual(req['path'], '/%s' % PATH) - self.assertEqual(req['query_params'], {'fields': 'id'}) - - def test_exists_hit_w_alternate_client(self): - PATH = 'projects/%s/datasets/%s/tables/%s' % ( - self.PROJECT, self.DS_ID, self.TABLE_NAME) - conn1 = _Connection() - client1 = _Client(project=self.PROJECT, connection=conn1) - conn2 = _Connection({}) - client2 = _Client(project=self.PROJECT, connection=conn2) - dataset = DatasetReference(self.PROJECT, self.DS_ID) - table_ref = dataset.table(self.TABLE_NAME) - table = self._make_one(table_ref, client=client1) - - self.assertTrue(table.exists(client=client2)) - - self.assertEqual(len(conn1._requested), 0) - self.assertEqual(len(conn2._requested), 1) - req = conn2._requested[0] - self.assertEqual(req['method'], 'GET') - self.assertEqual(req['path'], '/%s' % PATH) - self.assertEqual(req['query_params'], {'fields': 'id'}) - def test_row_from_mapping_wo_schema(self): from google.cloud.bigquery.table import _TABLE_HAS_NO_SCHEMA MAPPING = {'full_name': 'Phred Phlyntstone', 'age': 32} @@ -1239,13 +1202,6 @@ def __init__(self, *responses): self._requested = [] def api_request(self, **kw): - from google.cloud.exceptions import NotFound - self._requested.append(kw) - - try: - response, self._responses = self._responses[0], self._responses[1:] - except IndexError: - raise NotFound('miss') - else: - return response + response, self._responses = self._responses[0], self._responses[1:] + return response From a55d95ffc5e44b68aa45e2f0cbf8c9417d59d6cc Mon Sep 17 00:00:00 2001 From: Jonathan Amsterdam Date: Mon, 9 Oct 2017 20:31:56 -0400 Subject: [PATCH 0227/2016] bigquery: remove unused function (#4147) --- .../google/cloud/bigquery/table.py | 43 ------------ .../tests/unit/test_table.py | 68 ------------------- 2 files changed, 111 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py index 3464ddacaf91..89df28de28d5 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py @@ -831,46 +831,3 @@ def _build_schema_resource(fields): info['fields'] = _build_schema_resource(field.fields) infos.append(info) return infos -# pylint: enable=unused-argument - - -def _get_upload_metadata(source_format, schema, project, dataset_id, table_id): - """Get base metadata for creating a table. - - :type source_format: str - :param source_format: one of 'CSV' or 'NEWLINE_DELIMITED_JSON'. - job configuration option. - - :type schema: list - :param schema: List of :class:`SchemaField` associated with a table. - - :type project: str - :param table_id: The project bound to the table. - - :type dataset_id: str - :param table_id: The dataset_id of the dataset. - - :type table_id: str - :param table_id: The table_id of the table. - - :rtype: dict - :returns: The metadata dictionary. - """ - load_config = { - 'sourceFormat': source_format, - 'destinationTable': { - 'projectId': project, - 'datasetId': dataset_id, - 'tableId': table_id, - }, - } - if schema: - load_config['schema'] = { - 'fields': _build_schema_resource(schema), - } - - return { - 'configuration': { - 'load': load_config, - }, - } diff --git a/packages/google-cloud-bigquery/tests/unit/test_table.py b/packages/google-cloud-bigquery/tests/unit/test_table.py index 951042020748..85a6f3a73b2d 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_table.py +++ b/packages/google-cloud-bigquery/tests/unit/test_table.py @@ -1101,74 +1101,6 @@ def test_w_subfields(self): 'mode': 'REQUIRED'}]}) -class Test__get_upload_metadata(unittest.TestCase): - - @staticmethod - def _call_fut(source_format, schema, project, dataset_id, name): - from google.cloud.bigquery.table import _get_upload_metadata - - return _get_upload_metadata( - source_format, schema, project, dataset_id, name) - - def test_empty_schema(self): - source_format = 'AVRO' - dataset = mock.Mock(project='prediction', - spec=['dataset_id', 'project']) - dataset.dataset_id = 'market' # mock.Mock() treats `name` specially. - table_name = 'chairs' - metadata = self._call_fut(source_format, [], dataset.project, - dataset.dataset_id, table_name) - - expected = { - 'configuration': { - 'load': { - 'sourceFormat': source_format, - 'destinationTable': { - 'projectId': dataset.project, - 'datasetId': dataset.dataset_id, - 'tableId': table_name, - }, - }, - }, - } - self.assertEqual(metadata, expected) - - def test_with_schema(self): - from google.cloud.bigquery.table import SchemaField - - source_format = 'CSV' - full_name = SchemaField('full_name', 'STRING', mode='REQUIRED') - dataset = mock.Mock(project='blind', spec=['dataset_id', 'project']) - dataset.dataset_id = 'movie' # mock.Mock() treats `name` specially. - table_name = 'teebull-neem' - metadata = self._call_fut( - source_format, [full_name], dataset.project, - dataset.dataset_id, table_name) - - expected = { - 'configuration': { - 'load': { - 'sourceFormat': source_format, - 'destinationTable': { - 'projectId': dataset.project, - 'datasetId': dataset.dataset_id, - 'tableId': table_name, - }, - 'schema': { - 'fields': [ - { - 'name': full_name.name, - 'type': full_name.field_type, - 'mode': full_name.mode, - }, - ], - }, - }, - }, - } - self.assertEqual(metadata, expected) - - class _Client(object): _query_results = () From 7eb87aa84770d7525afaf16abb310acc6dee215f Mon Sep 17 00:00:00 2001 From: Jonathan Amsterdam Date: Mon, 9 Oct 2017 20:32:11 -0400 Subject: [PATCH 0228/2016] bigquery: add Client.list_partitions (#4146) Remove from Table. --- .../google/cloud/bigquery/client.py | 21 ++++++++- .../google/cloud/bigquery/table.py | 17 -------- .../tests/unit/test_client.py | 43 +++++++++++++++++++ .../tests/unit/test_table.py | 30 ------------- 4 files changed, 63 insertions(+), 48 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py index 756e2bb7d41d..e8642f8afb79 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py @@ -37,7 +37,7 @@ from google.cloud.bigquery.job import CopyJob from google.cloud.bigquery.job import ExtractJob from google.cloud.bigquery.job import LoadJob -from google.cloud.bigquery.job import QueryJob +from google.cloud.bigquery.job import QueryJob, QueryJobConfig from google.cloud.bigquery.query import QueryResults from google.cloud.bigquery._helpers import _item_to_row from google.cloud.bigquery._helpers import _rows_page_start @@ -943,6 +943,25 @@ def list_rows(self, table, selected_fields=None, max_results=None, iterator.schema = schema return iterator + def list_partitions(self, table): + """List the partitions in a table. + + :type table: One of: + :class:`~google.cloud.bigquery.table.Table` + :class:`~google.cloud.bigquery.table.TableReference` + :param table: the table to list, or a reference to it. + + :rtype: list + :returns: a list of time partitions + """ + config = QueryJobConfig() + config.use_legacy_sql = True # required for '$' syntax + rows = self.query_rows( + 'SELECT partition_id from [%s:%s.%s$__PARTITIONS_SUMMARY__]' % + (table.project, table.dataset_id, table.table_id), + job_config=config) + return [row[0] for row in rows] + # pylint: disable=unused-argument def _item_to_project(iterator, resource): diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py index 89df28de28d5..9630f1495290 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py @@ -526,23 +526,6 @@ def view_use_legacy_sql(self, value): self._properties['view'] = {} self._properties['view']['useLegacySql'] = value - def list_partitions(self, client=None): - """List the partitions in a table. - - :type client: :class:`~google.cloud.bigquery.client.Client` or - ``NoneType`` - :param client: the client to use. If not passed, falls back to the - ``client`` stored on the current dataset. - - :rtype: list - :returns: a list of time partitions - """ - query = self._require_client(client).run_sync_query( - 'SELECT partition_id from [%s.%s$__PARTITIONS_SUMMARY__]' % - (self.dataset_id, self.table_id)) - query.run() - return [row[0] for row in query.rows] - @classmethod def from_api_repr(cls, resource, client): """Factory: construct a table given its API representation diff --git a/packages/google-cloud-bigquery/tests/unit/test_client.py b/packages/google-cloud-bigquery/tests/unit/test_client.py index f4537f3fba8b..23757933d747 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_client.py +++ b/packages/google-cloud-bigquery/tests/unit/test_client.py @@ -2334,6 +2334,49 @@ def test_list_rows_errors(self): with self.assertRaises(TypeError): client.list_rows(1) + def test_list_partitions(self): + PROJECT = 'PROJECT' + RESOURCE = { + 'jobReference': { + 'projectId': PROJECT, + 'jobId': 'JOB_ID', + }, + 'configuration': { + 'query': { + 'query': 'q', + }, + }, + 'status': { + 'state': 'DONE', + }, + } + RESULTS_RESOURCE = { + 'jobReference': RESOURCE['jobReference'], + 'jobComplete': True, + 'schema': { + 'fields': [ + {'name': 'partition_id', 'type': 'INTEGER', + 'mode': 'REQUIRED'}, + ] + }, + 'totalRows': '2', + 'pageToken': 'next-page', + } + FIRST_PAGE = copy.deepcopy(RESULTS_RESOURCE) + FIRST_PAGE['rows'] = [ + {'f': [{'v': 20160804}]}, + {'f': [{'v': 20160805}]}, + ] + del FIRST_PAGE['pageToken'] + creds = _make_credentials() + http = object() + client = self._make_one(project=PROJECT, credentials=creds, _http=http) + client._connection = _Connection( + RESOURCE, RESULTS_RESOURCE, FIRST_PAGE) + table_ref = DatasetReference(PROJECT, 'DS_ID').table('TABLE_ID') + self.assertEqual(client.list_partitions(table_ref), + [20160804, 20160805]) + class TestClientUpload(object): # NOTE: This is a "partner" to `TestClient` meant to test some of the diff --git a/packages/google-cloud-bigquery/tests/unit/test_table.py b/packages/google-cloud-bigquery/tests/unit/test_table.py index 85a6f3a73b2d..f9fe1ddab2d1 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_table.py +++ b/packages/google-cloud-bigquery/tests/unit/test_table.py @@ -694,20 +694,6 @@ def test_partition_expiration_w_none_no_partition_set(self): self.assertIsNone(table.partitioning_type) self.assertIsNone(table.partition_expiration) - def test_list_partitions(self): - from google.cloud.bigquery.table import SchemaField - - conn = _Connection() - client = _Client(project=self.PROJECT, connection=conn) - client._query_results = [(20160804, None), (20160805, None)] - dataset = DatasetReference(self.PROJECT, self.DS_ID) - table_ref = dataset.table(self.TABLE_NAME) - full_name = SchemaField('full_name', 'STRING', mode='REQUIRED') - age = SchemaField('age', 'INTEGER', mode='REQUIRED') - table = self._make_one(table_ref, schema=[full_name, age], - client=client) - self.assertEqual(table.list_partitions(), [20160804, 20160805]) - def test_row_from_mapping_wo_schema(self): from google.cloud.bigquery.table import _TABLE_HAS_NO_SCHEMA MAPPING = {'full_name': 'Phred Phlyntstone', 'age': 32} @@ -1103,26 +1089,10 @@ def test_w_subfields(self): class _Client(object): - _query_results = () - def __init__(self, project='project', connection=None): self.project = project self._connection = connection - def run_sync_query(self, query): - return _Query(query, self) - - -class _Query(object): - - def __init__(self, query, client): - self.query = query - self.rows = [] - self.client = client - - def run(self): - self.rows = self.client._query_results - class _Connection(object): From a41621427071b42534e2c9ea2d1909354b51223b Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Tue, 10 Oct 2017 11:26:09 -0700 Subject: [PATCH 0229/2016] BQ: remove fetch_data and client references from QueryResults (#4144) Now QueryResults is just a container for the getQueryResults response. --- .../google/cloud/bigquery/client.py | 5 +- .../google/cloud/bigquery/dbapi/cursor.py | 33 ++- .../google/cloud/bigquery/job.py | 6 +- .../google/cloud/bigquery/query.py | 140 +-------- .../tests/unit/test_client.py | 15 + .../tests/unit/test_dbapi_cursor.py | 32 +- .../tests/unit/test_job.py | 227 ++++++++------- .../tests/unit/test_query.py | 274 ++---------------- 8 files changed, 209 insertions(+), 523 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py index e8642f8afb79..1825190f1043 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py @@ -432,7 +432,7 @@ def _get_query_results(self, job_id, project=None, timeout_ms=None): resource = self._connection.api_request( method='GET', path=path, query_params=extra_params) - return QueryResults.from_api_repr(resource, self) + return QueryResults.from_api_repr(resource) def job_from_resource(self, resource): """Detect correct job type from resource and instantiate. @@ -925,7 +925,8 @@ def list_rows(self, table, selected_fields=None, max_results=None, params = {} if selected_fields is not None: params['selectedFields'] = ','.join( - [f.name for f in selected_fields]) + field.name for field in selected_fields) + if start_index is not None: params['startIndex'] = start_index diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/dbapi/cursor.py b/packages/google-cloud-bigquery/google/cloud/bigquery/dbapi/cursor.py index b5a05de6d90b..914d2e07c553 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/dbapi/cursor.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/dbapi/cursor.py @@ -52,7 +52,7 @@ def __init__(self, connection): # a single row at a time. self.arraysize = 1 self._query_data = None - self._query_results = None + self._query_job = None def close(self): """No-op.""" @@ -133,7 +133,7 @@ def execute(self, operation, parameters=None, job_id=None): is generated at random. """ self._query_data = None - self._query_results = None + self._query_job = None client = self.connection._client # The DB-API uses the pyformat formatting, since the way BigQuery does @@ -147,17 +147,16 @@ def execute(self, operation, parameters=None, job_id=None): config = job.QueryJobConfig() config.query_parameters = query_parameters config.use_legacy_sql = False - query_job = client.query( + self._query_job = client.query( formatted_operation, job_config=config, job_id=job_id) # Wait for the query to finish. try: - query_job.result() + self._query_job.result() except google.cloud.exceptions.GoogleCloudError: - raise exceptions.DatabaseError(query_job.errors) + raise exceptions.DatabaseError(self._query_job.errors) - query_results = query_job.query_results() - self._query_results = query_results + query_results = self._query_job.query_results() self._set_rowcount(query_results) self._set_description(query_results.schema) @@ -178,16 +177,24 @@ def _try_fetch(self, size=None): Mutates self to indicate that iteration has started. """ - if self._query_results is None: + if self._query_job is None: raise exceptions.InterfaceError( 'No query results: execute() must be called before fetch.') - if size is None: - size = self.arraysize + is_dml = ( + self._query_job.statement_type + and self._query_job.statement_type.upper() != 'SELECT') + if is_dml: + self._query_data = iter([]) + return if self._query_data is None: - self._query_data = iter( - self._query_results.fetch_data(max_results=size)) + client = self.connection._client + # TODO(tswast): pass in page size to list_rows based on arraysize + rows_iter = client.list_rows( + self._query_job.destination, + selected_fields=self._query_job.query_results().schema) + self._query_data = iter(rows_iter) def fetchone(self): """Fetch a single row from the results of the last ``execute*()`` call. @@ -245,7 +252,7 @@ def fetchall(self): if called before ``execute()``. """ self._try_fetch() - return [row for row in self._query_data] + return list(self._query_data) def setinputsizes(self, sizes): """No-op.""" diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/job.py b/packages/google-cloud-bigquery/google/cloud/bigquery/job.py index 4f9c005a883e..137072f04ddb 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/job.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/job.py @@ -1834,7 +1834,6 @@ def query_results(self): """ if not self._query_results: self._query_results = self._client._get_query_results(self.job_id) - self._query_results._job = self return self._query_results def done(self): @@ -1847,7 +1846,6 @@ def done(self): # change once complete. if self.state != _DONE_STATE: self._query_results = self._client._get_query_results(self.job_id) - self._query_results._job = self # Only reload the job once we know the query is complete. # This will ensure that fields such as the destination table are @@ -1879,7 +1877,9 @@ def result(self, timeout=None): """ super(QueryJob, self).result(timeout=timeout) # Return an iterator instead of returning the job. - return self.query_results().fetch_data() + schema = self.query_results().schema + dest_table = self.destination + return self._client.list_rows(dest_table, selected_fields=schema) class QueryPlanEntryStep(object): diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/query.py b/packages/google-cloud-bigquery/google/cloud/bigquery/query.py index 888ce5853050..5d6beca77ea6 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/query.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/query.py @@ -16,11 +16,8 @@ import copy -from google.api.core import page_iterator from google.cloud.bigquery._helpers import _rows_from_json from google.cloud.bigquery.table import _parse_schema_resource -from google.cloud.bigquery._helpers import _item_to_row -from google.cloud.bigquery._helpers import _rows_page_start class QueryResults(object): @@ -28,46 +25,25 @@ class QueryResults(object): See: https://g.co/cloud/bigquery/docs/reference/rest/v2/jobs/getQueryResults - - :type client: :class:`google.cloud.bigquery.client.Client` - :param client: A client which holds credentials and project configuration - for the dataset (which requires a project). """ - def __init__(self, client, properties): - self._client = client + def __init__(self, properties): self._properties = {} - self._job = None self._set_properties(properties) @classmethod - def from_api_repr(cls, api_response, client): - return cls(client, api_response) + def from_api_repr(cls, api_response): + return cls(api_response) @property def project(self): - """Project bound to the job. + """Project bound to the query job. :rtype: str - :returns: the project (derived from the client). + :returns: the project that the query job is associated with. """ return self._properties.get('jobReference', {}).get('projectId') - def _require_client(self, client): - """Check client or verify over-ride. - - :type client: :class:`~google.cloud.bigquery.client.Client` or - ``NoneType`` - :param client: the client to use. If not passed, falls back to the - ``client`` stored on the current dataset. - - :rtype: :class:`google.cloud.bigquery.client.Client` - :returns: The client passed in or the currently bound client. - """ - if client is None: - client = self._client - return client - @property def cache_hit(self): """Query results served from cache. @@ -212,109 +188,3 @@ def _set_properties(self, api_response): self._properties.clear() self._properties.update(copy.deepcopy(api_response)) - - def job(self): - """Job instance used to run the query. - - :rtype: :class:`google.cloud.bigquery.job.QueryJob`, or ``NoneType`` - :returns: Job instance used to run the query (None until - ``jobReference`` property is set by the server). - """ - if self._job is None: - job_ref = self._properties['jobReference'] - self._job = self._client.get_job( - job_ref['jobId'], project=job_ref['projectId']) - - return self._job - - def fetch_data(self, max_results=None, page_token=None, start_index=None, - timeout_ms=None, client=None): - """API call: fetch a page of query result data via a GET request - - See - https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs/getQueryResults - - :type max_results: int - :param max_results: (Optional) maximum number of rows to return. - - :type page_token: str - :param page_token: - (Optional) token representing a cursor into the table's rows. - - :type start_index: int - :param start_index: (Optional) zero-based index of starting row - - :type timeout_ms: int - :param timeout_ms: - (Optional) How long to wait for the query to complete, in - milliseconds, before the request times out and returns. Note that - this is only a timeout for the request, not the query. If the query - takes longer to run than the timeout value, the call returns - without any results and with the 'jobComplete' flag set to false. - You can call GetQueryResults() to wait for the query to complete - and read the results. The default value is 10000 milliseconds (10 - seconds). - - :type client: :class:`~google.cloud.bigquery.client.Client` or - ``NoneType`` - :param client: the client to use. If not passed, falls back to the - ``client`` stored on the current dataset. - - :rtype: :class:`~google.api.core.page_iterator.Iterator` - :returns: Iterator of row data :class:`tuple`s. During each page, the - iterator will have the ``total_rows`` attribute set, - which counts the total number of rows **in the result - set** (this is distinct from the total number of rows in - the current page: ``iterator.page.num_items``). - :raises: ValueError if the query has not yet been executed. - """ - client = self._require_client(client) - params = {} - - if start_index is not None: - params['startIndex'] = start_index - - if timeout_ms is not None: - params['timeoutMs'] = timeout_ms - - if max_results is not None: - params['maxResults'] = max_results - - path = '/projects/%s/queries/%s' % (self.project, self.job_id) - iterator = page_iterator.HTTPIterator( - client=client, - api_request=client._connection.api_request, - path=path, - item_to_value=_item_to_row, - items_key='rows', - page_token=page_token, - page_start=_rows_page_start_query, - next_token='pageToken', - extra_params=params) - iterator.query_result = self - iterator.project = self.project - iterator.job_id = self.job_id - return iterator - - -def _rows_page_start_query(iterator, page, response): - """Update query response when :class:`~google.cloud.iterator.Page` starts. - - .. note:: - - This assumes that the ``query_response`` attribute has been - added to the iterator after being created, which - should be done by the caller. - - :type iterator: :class:`~google.api.core.page_iterator.Iterator` - :param iterator: The iterator that is currently in use. - - :type page: :class:`~google.cloud.iterator.Page` - :param page: The page that was just created. - - :type response: dict - :param response: The JSON API response for a page of rows in a table. - """ - iterator.query_result._set_properties(response) - iterator.schema = iterator.query_result.schema - _rows_page_start(iterator, page, response) diff --git a/packages/google-cloud-bigquery/tests/unit/test_client.py b/packages/google-cloud-bigquery/tests/unit/test_client.py index 23757933d747..54b4ac460d84 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_client.py +++ b/packages/google-cloud-bigquery/tests/unit/test_client.py @@ -1931,6 +1931,11 @@ def test_query_rows_defaults(self): 'configuration': { 'query': { 'query': QUERY, + 'destinationTable': { + 'projectId': PROJECT, + 'datasetId': '_temp_dataset', + 'tableId': '_temp_table', + }, }, }, 'status': { @@ -1991,6 +1996,11 @@ def test_query_rows_w_job_id(self): 'configuration': { 'query': { 'query': QUERY, + 'destinationTable': { + 'projectId': PROJECT, + 'datasetId': '_temp_dataset', + 'tableId': '_temp_table', + }, }, }, 'status': { @@ -2041,6 +2051,11 @@ def test_query_rows_w_job_config(self): 'query': { 'query': QUERY, 'useLegacySql': True, + 'destinationTable': { + 'projectId': PROJECT, + 'datasetId': '_temp_dataset', + 'tableId': '_temp_table', + }, }, 'dryRun': True, }, diff --git a/packages/google-cloud-bigquery/tests/unit/test_dbapi_cursor.py b/packages/google-cloud-bigquery/tests/unit/test_dbapi_cursor.py index 7562acd13239..a16b7b47ee3f 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_dbapi_cursor.py +++ b/packages/google-cloud-bigquery/tests/unit/test_dbapi_cursor.py @@ -30,37 +30,45 @@ def _make_one(self, *args, **kw): def _mock_client( self, rows=None, schema=None, num_dml_affected_rows=None): from google.cloud.bigquery import client + + if rows is None: + total_rows = 0 + else: + total_rows = len(rows) + mock_client = mock.create_autospec(client.Client) mock_client.query.return_value = self._mock_job( - rows=rows, schema=schema, + total_rows=total_rows, + schema=schema, num_dml_affected_rows=num_dml_affected_rows) + mock_client.list_rows.return_value = rows return mock_client def _mock_job( - self, rows=None, schema=None, num_dml_affected_rows=None): + self, total_rows=0, schema=None, num_dml_affected_rows=None): from google.cloud.bigquery import job mock_job = mock.create_autospec(job.QueryJob) mock_job.error_result = None mock_job.state = 'DONE' mock_job.result.return_value = mock_job + + if num_dml_affected_rows is None: + mock_job.statement_type = None # API sends back None for SELECT + else: + mock_job.statement_type = 'UPDATE' + mock_job.query_results.return_value = self._mock_results( - rows=rows, schema=schema, + total_rows=total_rows, schema=schema, num_dml_affected_rows=num_dml_affected_rows) return mock_job def _mock_results( - self, rows=None, schema=None, num_dml_affected_rows=None): + self, total_rows=0, schema=None, num_dml_affected_rows=None): from google.cloud.bigquery import query mock_results = mock.create_autospec(query.QueryResults) mock_results.schema = schema mock_results.num_dml_affected_rows = num_dml_affected_rows - - if rows is None: - mock_results.total_rows = 0 - else: - mock_results.total_rows = len(rows) - - mock_results.fetch_data.return_value = rows + mock_results.total_rows = total_rows return mock_results def test_ctor(self): @@ -187,8 +195,10 @@ def test_execute_w_dml(self): self._mock_client(rows=[], num_dml_affected_rows=12)) cursor = connection.cursor() cursor.execute('DELETE FROM UserSessions WHERE user_id = \'test\';') + rows = cursor.fetchall() self.assertIsNone(cursor.description) self.assertEqual(cursor.rowcount, 12) + self.assertEqual(rows, []) def test_execute_w_query(self): from google.cloud.bigquery.schema import SchemaField diff --git a/packages/google-cloud-bigquery/tests/unit/test_job.py b/packages/google-cloud-bigquery/tests/unit/test_job.py index ad8cf965cd79..a9da40ee1d5e 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_job.py +++ b/packages/google-cloud-bigquery/tests/unit/test_job.py @@ -21,6 +21,26 @@ from google.cloud.bigquery.job import LoadJobConfig from google.cloud.bigquery.dataset import DatasetReference +import mock + + +def _make_credentials(): + import google.auth.credentials + + return mock.Mock(spec=google.auth.credentials.Credentials) + + +def _make_client(project='test-project', connection=None): + from google.cloud.bigquery.client import Client + + if connection is None: + connection = _Connection() + + client = Client( + project=project, credentials=_make_credentials(), _http=object()) + client._connection = connection + return client + class Test__bool_or_none(unittest.TestCase): @@ -143,6 +163,13 @@ def _makeResource(self, started=False, ended=False): if ended: resource['statistics']['endTime'] = (self.WHEN_TS + 1000) * 1000 + if self.JOB_TYPE == 'query': + resource['configuration']['query']['destinationTable'] = { + 'projectId': self.PROJECT, + 'datasetId': '_temp_dataset', + 'tableId': '_temp_table', + } + return resource def _verifyInitialReadonlyProperties(self, job): @@ -319,7 +346,7 @@ def _verifyResourceProperties(self, job, resource): self.assertIsNone(job.skip_leading_rows) def test_ctor(self): - client = _Client(self.PROJECT) + client = _make_client(project=self.PROJECT) job = self._make_one(self.JOB_ID, [self.SOURCE1], self.TABLE_REF, client) self.assertIs(job.destination, self.TABLE_REF) @@ -357,7 +384,7 @@ def test_ctor(self): def test_ctor_w_config(self): from google.cloud.bigquery.schema import SchemaField - client = _Client(self.PROJECT) + client = _make_client(project=self.PROJECT) full_name = SchemaField('full_name', 'STRING', mode='REQUIRED') age = SchemaField('age', 'INTEGER', mode='REQUIRED') config = LoadJobConfig() @@ -367,13 +394,13 @@ def test_ctor_w_config(self): self.assertEqual(job.schema, [full_name, age]) def test_done(self): - client = _Client(self.PROJECT) + client = _make_client(project=self.PROJECT) resource = self._makeResource(ended=True) job = self._get_target_class().from_api_repr(resource, client) self.assertTrue(job.done()) def test_result(self): - client = _Client(self.PROJECT) + client = _make_client(project=self.PROJECT) resource = self._makeResource(ended=True) job = self._get_target_class().from_api_repr(resource, client) @@ -386,7 +413,8 @@ def test_result_invokes_begin(self): done_resource = copy.deepcopy(begun_resource) done_resource['status'] = {'state': 'DONE'} connection = _Connection(begun_resource, done_resource) - client = _Client(self.PROJECT, connection=connection) + client = _make_client(self.PROJECT) + client._connection = connection job = self._make_one(self.JOB_ID, [self.SOURCE1], self.TABLE_REF, client) @@ -462,7 +490,7 @@ def test_props_set_by_server(self): 'message': 'MESSAGE', 'reason': 'REASON'} - client = _Client(self.PROJECT) + client = _make_client(project=self.PROJECT) table = _Table() job = self._make_one(self.JOB_ID, [self.SOURCE1], table, client) job._properties['etag'] = 'ETAG' @@ -509,7 +537,7 @@ def test_props_set_by_server(self): def test_from_api_repr_missing_identity(self): self._setUpConstants() - client = _Client(self.PROJECT) + client = _make_client(project=self.PROJECT) RESOURCE = {} klass = self._get_target_class() with self.assertRaises(KeyError): @@ -517,7 +545,7 @@ def test_from_api_repr_missing_identity(self): def test_from_api_repr_missing_config(self): self._setUpConstants() - client = _Client(self.PROJECT) + client = _make_client(project=self.PROJECT) RESOURCE = { 'id': '%s:%s' % (self.PROJECT, self.JOB_ID), 'jobReference': { @@ -531,7 +559,7 @@ def test_from_api_repr_missing_config(self): def test_from_api_repr_bare(self): self._setUpConstants() - client = _Client(self.PROJECT) + client = _make_client(project=self.PROJECT) RESOURCE = { 'id': self.FULL_JOB_ID, 'jobReference': { @@ -555,7 +583,7 @@ def test_from_api_repr_bare(self): self._verifyResourceProperties(job, RESOURCE) def test_from_api_repr_w_properties(self): - client = _Client(self.PROJECT) + client = _make_client(project=self.PROJECT) RESOURCE = self._makeResource() load_config = RESOURCE['configuration']['load'] load_config['createDisposition'] = 'CREATE_IF_NEEDED' @@ -566,7 +594,7 @@ def test_from_api_repr_w_properties(self): def test_begin_w_already_running(self): conn = _Connection() - client = _Client(project=self.PROJECT, connection=conn) + client = _make_client(project=self.PROJECT, connection=conn) job = self._make_one(self.JOB_ID, [self.SOURCE1], self.TABLE_REF, client) job._properties['status'] = {'state': 'RUNNING'} @@ -583,7 +611,7 @@ def test_begin_w_bound_client(self): del RESOURCE['selfLink'] del RESOURCE['user_email'] conn = _Connection(RESOURCE) - client = _Client(project=self.PROJECT, connection=conn) + client = _make_client(project=self.PROJECT, connection=conn) job = self._make_one(self.JOB_ID, [self.SOURCE1], self.TABLE_REF, client) @@ -622,7 +650,7 @@ def test_begin_w_autodetect(self): del resource['selfLink'] del resource['user_email'] conn = _Connection(resource) - client = _Client(project=self.PROJECT, connection=conn) + client = _make_client(project=self.PROJECT, connection=conn) config = LoadJobConfig() config.autodetect = True job = self._make_one(self.JOB_ID, [self.SOURCE1], self.TABLE_REF, @@ -685,9 +713,9 @@ def test_begin_w_alternate_client(self): } RESOURCE['configuration']['load'] = LOAD_CONFIGURATION conn1 = _Connection() - client1 = _Client(project=self.PROJECT, connection=conn1) + client1 = _make_client(project=self.PROJECT, connection=conn1) conn2 = _Connection(RESOURCE) - client2 = _Client(project=self.PROJECT, connection=conn2) + client2 = _make_client(project=self.PROJECT, connection=conn2) full_name = SchemaField('full_name', 'STRING', mode='REQUIRED') age = SchemaField('age', 'INTEGER', mode='REQUIRED') config = LoadJobConfig() @@ -730,7 +758,7 @@ def test_begin_w_alternate_client(self): def test_exists_miss_w_bound_client(self): PATH = '/projects/%s/jobs/%s' % (self.PROJECT, self.JOB_ID) conn = _Connection() - client = _Client(project=self.PROJECT, connection=conn) + client = _make_client(project=self.PROJECT, connection=conn) table = _Table() job = self._make_one(self.JOB_ID, [self.SOURCE1], table, client) @@ -745,9 +773,9 @@ def test_exists_miss_w_bound_client(self): def test_exists_hit_w_alternate_client(self): PATH = '/projects/%s/jobs/%s' % (self.PROJECT, self.JOB_ID) conn1 = _Connection() - client1 = _Client(project=self.PROJECT, connection=conn1) + client1 = _make_client(project=self.PROJECT, connection=conn1) conn2 = _Connection({}) - client2 = _Client(project=self.PROJECT, connection=conn2) + client2 = _make_client(project=self.PROJECT, connection=conn2) table = _Table() job = self._make_one(self.JOB_ID, [self.SOURCE1], table, client1) @@ -764,7 +792,7 @@ def test_reload_w_bound_client(self): PATH = '/projects/%s/jobs/%s' % (self.PROJECT, self.JOB_ID) RESOURCE = self._makeResource() conn = _Connection(RESOURCE) - client = _Client(project=self.PROJECT, connection=conn) + client = _make_client(project=self.PROJECT, connection=conn) table = _Table() job = self._make_one(self.JOB_ID, [self.SOURCE1], table, client) @@ -780,9 +808,9 @@ def test_reload_w_alternate_client(self): PATH = '/projects/%s/jobs/%s' % (self.PROJECT, self.JOB_ID) RESOURCE = self._makeResource() conn1 = _Connection() - client1 = _Client(project=self.PROJECT, connection=conn1) + client1 = _make_client(project=self.PROJECT, connection=conn1) conn2 = _Connection(RESOURCE) - client2 = _Client(project=self.PROJECT, connection=conn2) + client2 = _make_client(project=self.PROJECT, connection=conn2) table = _Table() job = self._make_one(self.JOB_ID, [self.SOURCE1], table, client1) @@ -800,7 +828,7 @@ def test_cancel_w_bound_client(self): RESOURCE = self._makeResource(ended=True) RESPONSE = {'job': RESOURCE} conn = _Connection(RESPONSE) - client = _Client(project=self.PROJECT, connection=conn) + client = _make_client(project=self.PROJECT, connection=conn) table = _Table() job = self._make_one(self.JOB_ID, [self.SOURCE1], table, client) @@ -817,9 +845,9 @@ def test_cancel_w_alternate_client(self): RESOURCE = self._makeResource(ended=True) RESPONSE = {'job': RESOURCE} conn1 = _Connection() - client1 = _Client(project=self.PROJECT, connection=conn1) + client1 = _make_client(project=self.PROJECT, connection=conn1) conn2 = _Connection(RESPONSE) - client2 = _Client(project=self.PROJECT, connection=conn2) + client2 = _make_client(project=self.PROJECT, connection=conn2) table = _Table() job = self._make_one(self.JOB_ID, [self.SOURCE1], table, client1) @@ -893,7 +921,7 @@ def _verifyResourceProperties(self, job, resource): self.assertIsNone(job.write_disposition) def test_ctor(self): - client = _Client(self.PROJECT) + client = _make_client(project=self.PROJECT) source = self._table_ref(self.SOURCE_TABLE) destination = self._table_ref(self.DESTINATION_TABLE) job = self._make_one(self.JOB_ID, [source], destination, client) @@ -913,7 +941,7 @@ def test_ctor(self): def test_from_api_repr_missing_identity(self): self._setUpConstants() - client = _Client(self.PROJECT) + client = _make_client(project=self.PROJECT) RESOURCE = {} klass = self._get_target_class() with self.assertRaises(KeyError): @@ -921,7 +949,7 @@ def test_from_api_repr_missing_identity(self): def test_from_api_repr_missing_config(self): self._setUpConstants() - client = _Client(self.PROJECT) + client = _make_client(project=self.PROJECT) RESOURCE = { 'id': '%s:%s' % (self.PROJECT, self.DS_ID), 'jobReference': { @@ -935,7 +963,7 @@ def test_from_api_repr_missing_config(self): def test_from_api_repr_bare(self): self._setUpConstants() - client = _Client(self.PROJECT) + client = _make_client(project=self.PROJECT) RESOURCE = { 'id': self.JOB_ID, 'jobReference': { @@ -964,7 +992,7 @@ def test_from_api_repr_bare(self): def test_from_api_repr_w_sourcetable(self): self._setUpConstants() - client = _Client(self.PROJECT) + client = _make_client(project=self.PROJECT) RESOURCE = { 'id': self.JOB_ID, 'jobReference': { @@ -993,7 +1021,7 @@ def test_from_api_repr_w_sourcetable(self): def test_from_api_repr_wo_sources(self): self._setUpConstants() - client = _Client(self.PROJECT) + client = _make_client(project=self.PROJECT) RESOURCE = { 'id': self.JOB_ID, 'jobReference': { @@ -1015,7 +1043,7 @@ def test_from_api_repr_wo_sources(self): klass.from_api_repr(RESOURCE, client=client) def test_from_api_repr_w_properties(self): - client = _Client(self.PROJECT) + client = _make_client(project=self.PROJECT) RESOURCE = self._makeResource() copy_config = RESOURCE['configuration']['copy'] copy_config['createDisposition'] = 'CREATE_IF_NEEDED' @@ -1033,7 +1061,7 @@ def test_begin_w_bound_client(self): del RESOURCE['selfLink'] del RESOURCE['user_email'] conn = _Connection(RESOURCE) - client = _Client(project=self.PROJECT, connection=conn) + client = _make_client(project=self.PROJECT, connection=conn) source = self._table_ref(self.SOURCE_TABLE) destination = self._table_ref(self.DESTINATION_TABLE) job = self._make_one(self.JOB_ID, [source], destination, client) @@ -1086,9 +1114,9 @@ def test_begin_w_alternate_client(self): } RESOURCE['configuration']['copy'] = COPY_CONFIGURATION conn1 = _Connection() - client1 = _Client(project=self.PROJECT, connection=conn1) + client1 = _make_client(project=self.PROJECT, connection=conn1) conn2 = _Connection(RESOURCE) - client2 = _Client(project=self.PROJECT, connection=conn2) + client2 = _make_client(project=self.PROJECT, connection=conn2) source = self._table_ref(self.SOURCE_TABLE) destination = self._table_ref(self.DESTINATION_TABLE) config = CopyJobConfig() @@ -1118,7 +1146,7 @@ def test_begin_w_alternate_client(self): def test_exists_miss_w_bound_client(self): PATH = '/projects/%s/jobs/%s' % (self.PROJECT, self.JOB_ID) conn = _Connection() - client = _Client(project=self.PROJECT, connection=conn) + client = _make_client(project=self.PROJECT, connection=conn) source = self._table_ref(self.SOURCE_TABLE) destination = self._table_ref(self.DESTINATION_TABLE) @@ -1135,9 +1163,9 @@ def test_exists_miss_w_bound_client(self): def test_exists_hit_w_alternate_client(self): PATH = '/projects/%s/jobs/%s' % (self.PROJECT, self.JOB_ID) conn1 = _Connection() - client1 = _Client(project=self.PROJECT, connection=conn1) + client1 = _make_client(project=self.PROJECT, connection=conn1) conn2 = _Connection({}) - client2 = _Client(project=self.PROJECT, connection=conn2) + client2 = _make_client(project=self.PROJECT, connection=conn2) source = self._table_ref(self.SOURCE_TABLE) destination = self._table_ref(self.DESTINATION_TABLE) job = self._make_one(self.JOB_ID, [source], destination, client1) @@ -1155,7 +1183,7 @@ def test_reload_w_bound_client(self): PATH = '/projects/%s/jobs/%s' % (self.PROJECT, self.JOB_ID) RESOURCE = self._makeResource() conn = _Connection(RESOURCE) - client = _Client(project=self.PROJECT, connection=conn) + client = _make_client(project=self.PROJECT, connection=conn) source = self._table_ref(self.SOURCE_TABLE) destination = self._table_ref(self.DESTINATION_TABLE) job = self._make_one(self.JOB_ID, [source], destination, client) @@ -1172,9 +1200,9 @@ def test_reload_w_alternate_client(self): PATH = '/projects/%s/jobs/%s' % (self.PROJECT, self.JOB_ID) RESOURCE = self._makeResource() conn1 = _Connection() - client1 = _Client(project=self.PROJECT, connection=conn1) + client1 = _make_client(project=self.PROJECT, connection=conn1) conn2 = _Connection(RESOURCE) - client2 = _Client(project=self.PROJECT, connection=conn2) + client2 = _make_client(project=self.PROJECT, connection=conn2) source = self._table_ref(self.SOURCE_TABLE) destination = self._table_ref(self.DESTINATION_TABLE) job = self._make_one(self.JOB_ID, [source], destination, client1) @@ -1249,7 +1277,7 @@ def _verifyResourceProperties(self, job, resource): self.assertIsNone(job.print_header) def test_ctor(self): - client = _Client(self.PROJECT) + client = _make_client(project=self.PROJECT) source = _Table(self.SOURCE_TABLE) job = self._make_one(self.JOB_ID, source, [self.DESTINATION_URI], client) @@ -1271,7 +1299,7 @@ def test_ctor(self): def test_destination_uri_file_counts(self): file_counts = 23 - client = _Client(self.PROJECT) + client = _make_client(project=self.PROJECT) source = _Table(self.SOURCE_TABLE) job = self._make_one(self.JOB_ID, source, [self.DESTINATION_URI], client) @@ -1288,7 +1316,7 @@ def test_destination_uri_file_counts(self): def test_from_api_repr_missing_identity(self): self._setUpConstants() - client = _Client(self.PROJECT) + client = _make_client(project=self.PROJECT) RESOURCE = {} klass = self._get_target_class() with self.assertRaises(KeyError): @@ -1296,7 +1324,7 @@ def test_from_api_repr_missing_identity(self): def test_from_api_repr_missing_config(self): self._setUpConstants() - client = _Client(self.PROJECT) + client = _make_client(project=self.PROJECT) RESOURCE = { 'id': '%s:%s' % (self.PROJECT, self.DS_ID), 'jobReference': { @@ -1310,7 +1338,7 @@ def test_from_api_repr_missing_config(self): def test_from_api_repr_bare(self): self._setUpConstants() - client = _Client(self.PROJECT) + client = _make_client(project=self.PROJECT) RESOURCE = { 'id': self.JOB_ID, 'jobReference': { @@ -1334,7 +1362,7 @@ def test_from_api_repr_bare(self): self._verifyResourceProperties(job, RESOURCE) def test_from_api_repr_w_properties(self): - client = _Client(self.PROJECT) + client = _make_client(project=self.PROJECT) RESOURCE = self._makeResource() extract_config = RESOURCE['configuration']['extract'] extract_config['compression'] = 'GZIP' @@ -1352,7 +1380,7 @@ def test_begin_w_bound_client(self): del RESOURCE['selfLink'] del RESOURCE['user_email'] conn = _Connection(RESOURCE) - client = _Client(project=self.PROJECT, connection=conn) + client = _make_client(project=self.PROJECT, connection=conn) source_dataset = DatasetReference(self.PROJECT, self.DS_ID) source = source_dataset.table(self.SOURCE_TABLE) job = self._make_one(self.JOB_ID, source, [self.DESTINATION_URI], @@ -1400,9 +1428,9 @@ def test_begin_w_alternate_client(self): } RESOURCE['configuration']['extract'] = EXTRACT_CONFIGURATION conn1 = _Connection() - client1 = _Client(project=self.PROJECT, connection=conn1) + client1 = _make_client(project=self.PROJECT, connection=conn1) conn2 = _Connection(RESOURCE) - client2 = _Client(project=self.PROJECT, connection=conn2) + client2 = _make_client(project=self.PROJECT, connection=conn2) source_dataset = DatasetReference(self.PROJECT, self.DS_ID) source = source_dataset.table(self.SOURCE_TABLE) job_config = ExtractJobConfig() @@ -1435,7 +1463,7 @@ def test_begin_w_alternate_client(self): def test_exists_miss_w_bound_client(self): PATH = '/projects/%s/jobs/%s' % (self.PROJECT, self.JOB_ID) conn = _Connection() - client = _Client(project=self.PROJECT, connection=conn) + client = _make_client(project=self.PROJECT, connection=conn) source = _Table(self.SOURCE_TABLE) job = self._make_one(self.JOB_ID, source, [self.DESTINATION_URI], client) @@ -1451,9 +1479,9 @@ def test_exists_miss_w_bound_client(self): def test_exists_hit_w_alternate_client(self): PATH = '/projects/%s/jobs/%s' % (self.PROJECT, self.JOB_ID) conn1 = _Connection() - client1 = _Client(project=self.PROJECT, connection=conn1) + client1 = _make_client(project=self.PROJECT, connection=conn1) conn2 = _Connection({}) - client2 = _Client(project=self.PROJECT, connection=conn2) + client2 = _make_client(project=self.PROJECT, connection=conn2) source = _Table(self.SOURCE_TABLE) job = self._make_one(self.JOB_ID, source, [self.DESTINATION_URI], client1) @@ -1471,7 +1499,7 @@ def test_reload_w_bound_client(self): PATH = '/projects/%s/jobs/%s' % (self.PROJECT, self.JOB_ID) RESOURCE = self._makeResource() conn = _Connection(RESOURCE) - client = _Client(project=self.PROJECT, connection=conn) + client = _make_client(project=self.PROJECT, connection=conn) source_dataset = DatasetReference(self.PROJECT, self.DS_ID) source = source_dataset.table(self.SOURCE_TABLE) job = self._make_one(self.JOB_ID, source, [self.DESTINATION_URI], @@ -1489,9 +1517,9 @@ def test_reload_w_alternate_client(self): PATH = '/projects/%s/jobs/%s' % (self.PROJECT, self.JOB_ID) RESOURCE = self._makeResource() conn1 = _Connection() - client1 = _Client(project=self.PROJECT, connection=conn1) + client1 = _make_client(project=self.PROJECT, connection=conn1) conn2 = _Connection(RESOURCE) - client2 = _Client(project=self.PROJECT, connection=conn2) + client2 = _make_client(project=self.PROJECT, connection=conn2) source_dataset = DatasetReference(self.PROJECT, self.DS_ID) source = source_dataset.table(self.SOURCE_TABLE) job = self._make_one(self.JOB_ID, source, [self.DESTINATION_URI], @@ -1701,7 +1729,7 @@ def _verifyResourceProperties(self, job, resource): self.assertIsNone(job.write_disposition) def test_ctor_defaults(self): - client = _Client(self.PROJECT) + client = _make_client(project=self.PROJECT) job = self._make_one(self.JOB_ID, self.QUERY, client) self.assertEqual(job.query, self.QUERY) self.assertIs(job._client, client) @@ -1733,7 +1761,7 @@ def test_ctor_w_udf_resources(self): RESOURCE_URI = 'gs://some-bucket/js/lib.js' udf_resources = [UDFResource("resourceUri", RESOURCE_URI)] - client = _Client(self.PROJECT) + client = _make_client(project=self.PROJECT) config = QueryJobConfig() config.udf_resources = udf_resources job = self._make_one( @@ -1745,7 +1773,7 @@ def test_ctor_w_query_parameters(self): from google.cloud.bigquery.job import QueryJobConfig query_parameters = [ScalarQueryParameter("foo", 'INT64', 123)] - client = _Client(self.PROJECT) + client = _make_client(project=self.PROJECT) config = QueryJobConfig() config.query_parameters = query_parameters job = self._make_one( @@ -1754,7 +1782,7 @@ def test_ctor_w_query_parameters(self): def test_from_api_repr_missing_identity(self): self._setUpConstants() - client = _Client(self.PROJECT) + client = _make_client(project=self.PROJECT) RESOURCE = {} klass = self._get_target_class() with self.assertRaises(KeyError): @@ -1762,7 +1790,7 @@ def test_from_api_repr_missing_identity(self): def test_from_api_repr_missing_config(self): self._setUpConstants() - client = _Client(self.PROJECT) + client = _make_client(project=self.PROJECT) RESOURCE = { 'id': '%s:%s' % (self.PROJECT, self.DS_ID), 'jobReference': { @@ -1776,7 +1804,7 @@ def test_from_api_repr_missing_config(self): def test_from_api_repr_bare(self): self._setUpConstants() - client = _Client(self.PROJECT) + client = _make_client(project=self.PROJECT) RESOURCE = { 'id': self.JOB_ID, 'jobReference': { @@ -1793,7 +1821,7 @@ def test_from_api_repr_bare(self): self._verifyResourceProperties(job, RESOURCE) def test_from_api_repr_w_properties(self): - client = _Client(self.PROJECT) + client = _make_client(project=self.PROJECT) RESOURCE = self._makeResource() query_config = RESOURCE['configuration']['query'] query_config['createDisposition'] = 'CREATE_IF_NEEDED' @@ -1809,7 +1837,7 @@ def test_from_api_repr_w_properties(self): self._verifyResourceProperties(job, RESOURCE) def test_cancelled(self): - client = _Client(self.PROJECT) + client = _make_client(project=self.PROJECT) job = self._make_one(self.JOB_ID, self.QUERY, client) job._properties['status'] = { 'state': 'DONE', @@ -1821,7 +1849,7 @@ def test_cancelled(self): self.assertTrue(job.cancelled()) def test_done(self): - client = _Client(self.PROJECT) + client = _make_client(project=self.PROJECT) resource = self._makeResource(ended=True) job = self._get_target_class().from_api_repr(resource, client) self.assertTrue(job.done()) @@ -1849,7 +1877,7 @@ def test_query_plan(self): 'substeps': ['SUBSTEP1', 'SUBSTEP2'], }], }] - client = _Client(self.PROJECT) + client = _make_client(project=self.PROJECT) job = self._make_one(self.JOB_ID, self.QUERY, client) self.assertEqual(job.query_plan, []) @@ -1890,7 +1918,7 @@ def test_query_plan(self): def test_total_bytes_processed(self): total_bytes = 1234 - client = _Client(self.PROJECT) + client = _make_client(project=self.PROJECT) job = self._make_one(self.JOB_ID, self.QUERY, client) self.assertIsNone(job.total_bytes_processed) @@ -1905,7 +1933,7 @@ def test_total_bytes_processed(self): def test_total_bytes_billed(self): total_bytes = 1234 - client = _Client(self.PROJECT) + client = _make_client(project=self.PROJECT) job = self._make_one(self.JOB_ID, self.QUERY, client) self.assertIsNone(job.total_bytes_billed) @@ -1920,7 +1948,7 @@ def test_total_bytes_billed(self): def test_billing_tier(self): billing_tier = 1 - client = _Client(self.PROJECT) + client = _make_client(project=self.PROJECT) job = self._make_one(self.JOB_ID, self.QUERY, client) self.assertIsNone(job.billing_tier) @@ -1934,7 +1962,7 @@ def test_billing_tier(self): self.assertEqual(job.billing_tier, billing_tier) def test_cache_hit(self): - client = _Client(self.PROJECT) + client = _make_client(project=self.PROJECT) job = self._make_one(self.JOB_ID, self.QUERY, client) self.assertIsNone(job.cache_hit) @@ -1949,7 +1977,7 @@ def test_cache_hit(self): def test_num_dml_affected_rows(self): num_rows = 1234 - client = _Client(self.PROJECT) + client = _make_client(project=self.PROJECT) job = self._make_one(self.JOB_ID, self.QUERY, client) self.assertIsNone(job.num_dml_affected_rows) @@ -1964,7 +1992,7 @@ def test_num_dml_affected_rows(self): def test_statement_type(self): statement_type = 'SELECT' - client = _Client(self.PROJECT) + client = _make_client(project=self.PROJECT) job = self._make_one(self.JOB_ID, self.QUERY, client) self.assertIsNone(job.statement_type) @@ -1995,7 +2023,7 @@ def test_referenced_tables(self): 'datasetId': 'other-dataset', 'tableId': 'other-table', }] - client = _Client(self.PROJECT) + client = _make_client(project=self.PROJECT) job = self._make_one(self.JOB_ID, self.QUERY, client) self.assertEqual(job.referenced_tables, []) @@ -2070,7 +2098,7 @@ def test_undeclared_query_paramters(self): } }, }] - client = _Client(self.PROJECT) + client = _make_client(project=self.PROJECT) job = self._make_one(self.JOB_ID, self.QUERY, client) self.assertEqual(job.undeclared_query_paramters, []) @@ -2110,7 +2138,7 @@ def test_query_results(self): }, } connection = _Connection(query_resource) - client = _Client(self.PROJECT, connection=connection) + client = _make_client(self.PROJECT, connection=connection) job = self._make_one(self.JOB_ID, self.QUERY, client) results = job.query_results() self.assertIsInstance(results, QueryResults) @@ -2118,7 +2146,7 @@ def test_query_results(self): def test_query_results_w_cached_value(self): from google.cloud.bigquery.query import QueryResults - client = _Client(self.PROJECT) + client = _make_client(project=self.PROJECT) job = self._make_one(self.JOB_ID, self.QUERY, client) resource = { 'jobReference': { @@ -2126,7 +2154,7 @@ def test_query_results_w_cached_value(self): 'jobId': self.JOB_ID, }, } - query_results = QueryResults(client, resource) + query_results = QueryResults(resource) job._query_results = query_results results = job.query_results() @@ -2142,7 +2170,7 @@ def test_result(self): }, } connection = _Connection(query_resource, query_resource) - client = _Client(self.PROJECT, connection=connection) + client = _make_client(self.PROJECT, connection=connection) resource = self._makeResource(ended=True) job = self._get_target_class().from_api_repr(resource, client) @@ -2166,7 +2194,7 @@ def test_result_invokes_begins(self): connection = _Connection( begun_resource, incomplete_resource, query_resource, done_resource, query_resource) - client = _Client(self.PROJECT, connection=connection) + client = _make_client(project=self.PROJECT, connection=connection) job = self._make_one(self.JOB_ID, self.QUERY, client) job.result() @@ -2180,7 +2208,7 @@ def test_result_invokes_begins(self): def test_result_error(self): from google.cloud import exceptions - client = _Client(self.PROJECT) + client = _make_client(project=self.PROJECT) job = self._make_one(self.JOB_ID, self.QUERY, client) error_result = { 'debugInfo': 'DEBUG', @@ -2214,7 +2242,7 @@ def test_begin_w_bound_client(self): del RESOURCE['selfLink'] del RESOURCE['user_email'] conn = _Connection(RESOURCE) - client = _Client(project=self.PROJECT, connection=conn) + client = _make_client(project=self.PROJECT, connection=conn) config = QueryJobConfig() config.default_dataset = DatasetReference(self.PROJECT, DS_ID) @@ -2280,9 +2308,9 @@ def test_begin_w_alternate_client(self): RESOURCE['configuration']['query'] = QUERY_CONFIGURATION RESOURCE['configuration']['dryRun'] = True conn1 = _Connection() - client1 = _Client(project=self.PROJECT, connection=conn1) + client1 = _make_client(project=self.PROJECT, connection=conn1) conn2 = _Connection(RESOURCE) - client2 = _Client(project=self.PROJECT, connection=conn2) + client2 = _make_client(project=self.PROJECT, connection=conn2) dataset_ref = DatasetReference(self.PROJECT, DS_ID) table_ref = dataset_ref.table(TABLE) @@ -2340,7 +2368,7 @@ def test_begin_w_udf(self): {'inlineCode': INLINE_UDF_CODE}, ] conn = _Connection(RESOURCE) - client = _Client(project=self.PROJECT, connection=conn) + client = _make_client(project=self.PROJECT, connection=conn) udf_resources = [ UDFResource("resourceUri", RESOURCE_URI), UDFResource("inlineCode", INLINE_UDF_CODE), @@ -2403,7 +2431,7 @@ def test_begin_w_named_query_parameter(self): }, ] conn = _Connection(RESOURCE) - client = _Client(project=self.PROJECT, connection=conn) + client = _make_client(project=self.PROJECT, connection=conn) jconfig = QueryJobConfig() jconfig.query_parameters = query_parameters job = self._make_one( @@ -2458,7 +2486,7 @@ def test_begin_w_positional_query_parameter(self): }, ] conn = _Connection(RESOURCE) - client = _Client(project=self.PROJECT, connection=conn) + client = _make_client(project=self.PROJECT, connection=conn) jconfig = QueryJobConfig() jconfig.query_parameters = query_parameters job = self._make_one( @@ -2500,7 +2528,7 @@ def test_dry_run_query(self): del RESOURCE['user_email'] RESOURCE['configuration']['dryRun'] = True conn = _Connection(RESOURCE) - client = _Client(project=self.PROJECT, connection=conn) + client = _make_client(project=self.PROJECT, connection=conn) config = QueryJobConfig() config.dry_run = True job = self._make_one( @@ -2531,7 +2559,7 @@ def test_dry_run_query(self): def test_exists_miss_w_bound_client(self): PATH = '/projects/%s/jobs/%s' % (self.PROJECT, self.JOB_ID) conn = _Connection() - client = _Client(project=self.PROJECT, connection=conn) + client = _make_client(project=self.PROJECT, connection=conn) job = self._make_one(self.JOB_ID, self.QUERY, client) self.assertFalse(job.exists()) @@ -2545,9 +2573,9 @@ def test_exists_miss_w_bound_client(self): def test_exists_hit_w_alternate_client(self): PATH = '/projects/%s/jobs/%s' % (self.PROJECT, self.JOB_ID) conn1 = _Connection() - client1 = _Client(project=self.PROJECT, connection=conn1) + client1 = _make_client(project=self.PROJECT, connection=conn1) conn2 = _Connection({}) - client2 = _Client(project=self.PROJECT, connection=conn2) + client2 = _make_client(project=self.PROJECT, connection=conn2) job = self._make_one(self.JOB_ID, self.QUERY, client1) self.assertTrue(job.exists(client=client2)) @@ -2568,7 +2596,7 @@ def test_reload_w_bound_client(self): DEST_TABLE = 'dest_table' RESOURCE = self._makeResource() conn = _Connection(RESOURCE) - client = _Client(project=self.PROJECT, connection=conn) + client = _make_client(project=self.PROJECT, connection=conn) dataset_ref = DatasetReference(self.PROJECT, DS_ID) table_ref = dataset_ref.table(DEST_TABLE) config = QueryJobConfig() @@ -2577,7 +2605,7 @@ def test_reload_w_bound_client(self): job.reload() - self.assertIsNone(job.destination) + self.assertNotEqual(job.destination, table_ref) self.assertEqual(len(conn._requested), 1) req = conn._requested[0] @@ -2597,9 +2625,9 @@ def test_reload_w_alternate_client(self): 'tableId': DEST_TABLE, } conn1 = _Connection() - client1 = _Client(project=self.PROJECT, connection=conn1) + client1 = _make_client(project=self.PROJECT, connection=conn1) conn2 = _Connection(RESOURCE) - client2 = _Client(project=self.PROJECT, connection=conn2) + client2 = _make_client(project=self.PROJECT, connection=conn2) job = self._make_one(self.JOB_ID, self.QUERY, client1) job.reload(client=client2) @@ -2791,19 +2819,6 @@ def test_from_api_repr_normal(self): self.assertEqual(entry.steps, steps) -class _Client(object): - - def __init__(self, project='project', connection=None): - self.project = project - self._connection = connection - - def _get_query_results(self, job_id): - from google.cloud.bigquery.query import QueryResults - - resource = self._connection.api_request(method='GET') - return QueryResults.from_api_repr(resource, self) - - class _Table(object): def __init__(self, table_id=None): diff --git a/packages/google-cloud-bigquery/tests/unit/test_query.py b/packages/google-cloud-bigquery/tests/unit/test_query.py index d2eae2ad77fb..d2322886daa5 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_query.py +++ b/packages/google-cloud-bigquery/tests/unit/test_query.py @@ -14,16 +14,6 @@ import unittest -import mock - -from google.cloud.bigquery import Client - - -def _make_credentials(): - import google.auth.credentials - - return mock.Mock(spec=google.auth.credentials.Credentials) - class TestQueryResults(unittest.TestCase): PROJECT = 'project' @@ -65,9 +55,7 @@ def _verifySchema(self, query, resource): self.assertEqual(query.schema, ()) def test_ctor_defaults(self): - client = _Client(self.PROJECT) - query = self._make_one(client, self._makeResource()) - self.assertIs(query._client, client) + query = self._make_one(self._makeResource()) self.assertIsNone(query.cache_hit) self.assertIsNone(query.complete) self.assertIsNone(query.errors) @@ -78,66 +66,28 @@ def test_ctor_defaults(self): self.assertIsNone(query.total_rows) self.assertIsNone(query.total_bytes_processed) - def test_job_w_jobid(self): - from google.cloud.bigquery.job import QueryJob - - SERVER_GENERATED = 'SERVER_GENERATED' - job_resource = { - 'jobReference': { - 'projectId': self.PROJECT, - 'jobId': SERVER_GENERATED, - }, - 'configuration': {'query': {'query': 'SELECT 1'}}, - } - query_resource = { - 'jobReference': { - 'projectId': self.PROJECT, - 'jobId': SERVER_GENERATED, - }, - } - conn = _Connection(job_resource) - client = _Client(self.PROJECT, conn) - query = self._make_one(client, query_resource) - job = query.job() - self.assertIsInstance(job, QueryJob) - self.assertIs(job._client, client) - self.assertEqual(job.job_id, SERVER_GENERATED) - fetched_later = query.job() - self.assertIs(fetched_later, job) - self.assertEqual(len(conn._requested), 1) - req = conn._requested[0] - self.assertEqual(req['method'], 'GET') - self.assertEqual( - req['path'], - '/projects/{}/jobs/{}'.format(self.PROJECT, SERVER_GENERATED)) - def test_cache_hit_missing(self): - client = _Client(self.PROJECT) - query = self._make_one(client, self._makeResource()) + query = self._make_one(self._makeResource()) self.assertIsNone(query.cache_hit) def test_cache_hit_present(self): - client = _Client(self.PROJECT) resource = self._makeResource() resource['cacheHit'] = True - query = self._make_one(client, resource) + query = self._make_one(resource) self.assertTrue(query.cache_hit) def test_complete_missing(self): - client = _Client(self.PROJECT) - query = self._make_one(client, self._makeResource()) + query = self._make_one(self._makeResource()) self.assertIsNone(query.complete) def test_complete_present(self): - client = _Client(self.PROJECT) resource = self._makeResource() resource['jobComplete'] = True - query = self._make_one(client, resource) + query = self._make_one(resource) self.assertTrue(query.complete) def test_errors_missing(self): - client = _Client(self.PROJECT) - query = self._make_one(client, self._makeResource()) + query = self._make_one(self._makeResource()) self.assertIsNone(query.errors) def test_errors_present(self): @@ -146,95 +96,80 @@ def test_errors_present(self): ] resource = self._makeResource() resource['errors'] = ERRORS - client = _Client(self.PROJECT) - query = self._make_one(client, resource) + query = self._make_one(resource) self.assertEqual(query.errors, ERRORS) def test_job_id_missing(self): - client = _Client(self.PROJECT) with self.assertRaises(ValueError): - self._make_one(client, {}) + self._make_one({}) def test_job_id_broken_job_reference(self): - client = _Client(self.PROJECT) resource = {'jobReference': {'bogus': 'BOGUS'}} with self.assertRaises(ValueError): - self._make_one(client, resource) + self._make_one(resource) def test_job_id_present(self): - client = _Client(self.PROJECT) resource = self._makeResource() resource['jobReference']['jobId'] = 'custom-job' - query = self._make_one(client, resource) + query = self._make_one(resource) self.assertEqual(query.job_id, 'custom-job') def test_page_token_missing(self): - client = _Client(self.PROJECT) - query = self._make_one(client, self._makeResource()) + query = self._make_one(self._makeResource()) self.assertIsNone(query.page_token) def test_page_token_present(self): - client = _Client(self.PROJECT) resource = self._makeResource() resource['pageToken'] = 'TOKEN' - query = self._make_one(client, resource) + query = self._make_one(resource) self.assertEqual(query.page_token, 'TOKEN') def test_total_rows_present_integer(self): - client = _Client(self.PROJECT) resource = self._makeResource() resource['totalRows'] = 42 - query = self._make_one(client, resource) + query = self._make_one(resource) self.assertEqual(query.total_rows, 42) def test_total_rows_present_string(self): - client = _Client(self.PROJECT) resource = self._makeResource() resource['totalRows'] = '42' - query = self._make_one(client, resource) + query = self._make_one(resource) self.assertEqual(query.total_rows, 42) def test_total_bytes_processed_missing(self): - client = _Client(self.PROJECT) - query = self._make_one(client, self._makeResource()) + query = self._make_one(self._makeResource()) self.assertIsNone(query.total_bytes_processed) def test_total_bytes_processed_present_integer(self): - client = _Client(self.PROJECT) resource = self._makeResource() resource['totalBytesProcessed'] = 123456 - query = self._make_one(client, resource) + query = self._make_one(resource) self.assertEqual(query.total_bytes_processed, 123456) def test_total_bytes_processed_present_string(self): - client = _Client(self.PROJECT) resource = self._makeResource() resource['totalBytesProcessed'] = '123456' - query = self._make_one(client, resource) + query = self._make_one(resource) self.assertEqual(query.total_bytes_processed, 123456) def test_num_dml_affected_rows_missing(self): - client = _Client(self.PROJECT) - query = self._make_one(client, self._makeResource()) + query = self._make_one(self._makeResource()) self.assertIsNone(query.num_dml_affected_rows) def test_num_dml_affected_rows_present_integer(self): - client = _Client(self.PROJECT) resource = self._makeResource() resource['numDmlAffectedRows'] = 123456 - query = self._make_one(client, resource) + query = self._make_one(resource) self.assertEqual(query.num_dml_affected_rows, 123456) def test_num_dml_affected_rows_present_string(self): - client = _Client(self.PROJECT) resource = self._makeResource() resource['numDmlAffectedRows'] = '123456' - query = self._make_one(client, resource) + query = self._make_one(resource) self.assertEqual(query.num_dml_affected_rows, 123456) def test_schema(self): - client = _Client(self.PROJECT) - query = self._make_one(client, self._makeResource()) + query = self._make_one(self._makeResource()) self._verifySchema(query, self._makeResource()) resource = self._makeResource() resource['schema'] = { @@ -245,170 +180,3 @@ def test_schema(self): } query._set_properties(resource) self._verifySchema(query, resource) - - def test_fetch_data_w_bound_client(self): - import six - - PATH = 'projects/%s/queries/%s' % (self.PROJECT, self.JOB_ID) - schema = { - 'fields': [ - {'name': 'full_name', 'type': 'STRING', 'mode': 'REQURED'}, - {'name': 'age', 'type': 'INTEGER', 'mode': 'REQURED'}, - ], - } - BEFORE = self._makeResource() - BEFORE['jobComplete'] = False - BEFORE['schema'] = schema - AFTER = self._makeResource() - AFTER['rows'] = [ - {'f': [ - {'v': 'Phred Phlyntstone'}, - {'v': 32}, - ]}, - {'f': [ - {'v': 'Bharney Rhubble'}, - {'v': 33}, - ]}, - {'f': [ - {'v': 'Wylma Phlyntstone'}, - {'v': 29}, - ]}, - {'f': [ - {'v': 'Bhettye Rhubble'}, - {'v': 27}, - ]}, - ] - AFTER['cacheHit'] = False - AFTER['jobComplete'] = True - AFTER['numDmlAffectedRows'] = 123 - AFTER['pageToken'] = self.TOKEN - AFTER['schema'] = schema - AFTER['totalBytesProcessed'] = 100000 - - conn = _Connection(AFTER) - client = _Client(project=self.PROJECT, connection=conn) - query = self._make_one(client, BEFORE) - self.assertFalse(query.complete) - - iterator = query.fetch_data() - page = six.next(iterator.pages) - rows = list(page) - total_rows = iterator.total_rows - page_token = iterator.next_page_token - - self.assertTrue(query.complete) - self.assertEqual(len(rows), 4) - self.assertEqual(rows[0], ('Phred Phlyntstone', 32)) - self.assertEqual(rows[1], ('Bharney Rhubble', 33)) - self.assertEqual(rows[2], ('Wylma Phlyntstone', 29)) - self.assertEqual(rows[3], ('Bhettye Rhubble', 27)) - self.assertIsNone(total_rows) - self.assertEqual(page_token, AFTER['pageToken']) - - self.assertEqual(len(conn._requested), 1) - req = conn._requested[0] - self.assertEqual(req['method'], 'GET') - self.assertEqual(req['path'], '/%s' % PATH) - - def test_fetch_data_w_alternate_client(self): - import six - - PATH = 'projects/%s/queries/%s' % (self.PROJECT, self.JOB_ID) - MAX = 10 - START = 2257 - TIMEOUT = 20000 - - schema = { - 'fields': [ - {'name': 'full_name', 'type': 'STRING', 'mode': 'REQURED'}, - {'name': 'age', 'type': 'INTEGER', 'mode': 'REQURED'}, - ], - } - BEFORE = self._makeResource() - BEFORE['jobComplete'] = False - BEFORE['schema'] = schema - AFTER = self._makeResource() - AFTER['rows'] = [ - {'f': [ - {'v': 'Phred Phlyntstone'}, - {'v': 32}, - ]}, - {'f': [ - {'v': 'Bharney Rhubble'}, - {'v': 33}, - ]}, - {'f': [ - {'v': 'Wylma Phlyntstone'}, - {'v': 29}, - ]}, - {'f': [ - {'v': 'Bhettye Rhubble'}, - {'v': 27}, - ]}, - ] - AFTER['cacheHit'] = False - AFTER['jobComplete'] = True - AFTER['numDmlAffectedRows'] = 123 - AFTER['pageToken'] = self.TOKEN - AFTER['schema'] = schema - AFTER['totalBytesProcessed'] = 100000 - - conn1 = _Connection() - client1 = _Client(project=self.PROJECT, connection=conn1) - conn2 = _Connection(AFTER) - client2 = _Client(project=self.PROJECT, connection=conn2) - query = self._make_one(client1, BEFORE) - self.assertFalse(query.complete) - - iterator = query.fetch_data( - client=client2, max_results=MAX, page_token=self.TOKEN, - start_index=START, timeout_ms=TIMEOUT) - page = six.next(iterator.pages) - rows = list(page) - total_rows = iterator.total_rows - page_token = iterator.next_page_token - - self.assertTrue(query.complete) - self.assertEqual(len(rows), 4) - self.assertEqual(rows[0], ('Phred Phlyntstone', 32)) - self.assertEqual(rows[1], ('Bharney Rhubble', 33)) - self.assertEqual(rows[2], ('Wylma Phlyntstone', 29)) - self.assertEqual(rows[3], ('Bhettye Rhubble', 27)) - self.assertIsNone(total_rows) - self.assertEqual(page_token, AFTER['pageToken']) - - self.assertEqual(len(conn1._requested), 0) - self.assertEqual(len(conn2._requested), 1) - req = conn2._requested[0] - self.assertEqual(req['method'], 'GET') - self.assertEqual(req['path'], '/%s' % PATH) - self.assertEqual(req['query_params'], - {'maxResults': MAX, - 'pageToken': self.TOKEN, - 'startIndex': START, - 'timeoutMs': TIMEOUT}) - - -class _Client(Client): - - def __init__(self, project='project', connection=None): - creds = _make_credentials() - http = object() - super(_Client, self).__init__( - project=project, credentials=creds, _http=http) - - if connection is None: - connection = _Connection() - self._connection = connection - - -class _Connection(object): - - def __init__(self, *responses): - self._responses = responses - self._requested = [] - - def api_request(self, **kw): - self._requested.append(kw) - response, self._responses = self._responses[0], self._responses[1:] - return response From 059acadd45de855b16dbb9ea050a885aee22824c Mon Sep 17 00:00:00 2001 From: Jonathan Amsterdam Date: Tue, 10 Oct 2017 16:41:10 -0400 Subject: [PATCH 0230/2016] bigquery: add Row class (#4149) Rows are represented by a new class, Row, which allows access by position or field name. --- .../google/cloud/bigquery/__init__.py | 2 + .../google/cloud/bigquery/_helpers.py | 79 +++++++++++++++++-- .../google/cloud/bigquery/client.py | 2 + .../google/cloud/bigquery/query.py | 4 +- .../google-cloud-bigquery/tests/system.py | 31 +++++--- .../tests/unit/test__helpers.py | 47 ++++++++--- .../tests/unit/test_client.py | 18 +++-- 7 files changed, 148 insertions(+), 35 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/__init__.py b/packages/google-cloud-bigquery/google/cloud/bigquery/__init__.py index d6c210e9843b..f075dfab4dcf 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/__init__.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/__init__.py @@ -30,6 +30,7 @@ from google.cloud.bigquery._helpers import ScalarQueryParameter from google.cloud.bigquery._helpers import StructQueryParameter from google.cloud.bigquery._helpers import UDFResource +from google.cloud.bigquery._helpers import Row from google.cloud.bigquery.client import Client from google.cloud.bigquery.dataset import AccessEntry from google.cloud.bigquery.dataset import Dataset @@ -52,6 +53,7 @@ 'CopyJobConfig', 'ExtractJobConfig', 'QueryJobConfig', + 'Row', 'LoadJobConfig', 'ScalarQueryParameter', 'SchemaField', diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py b/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py index dad87fde88bb..710e096ccc2b 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py @@ -18,6 +18,9 @@ from collections import OrderedDict import copy import datetime +import operator + +import six from google.cloud._helpers import UTC from google.cloud._helpers import _date_from_iso8601_date @@ -176,7 +179,70 @@ def _record_from_json(value, field): _QUERY_PARAMS_FROM_JSON['TIMESTAMP'] = _timestamp_query_param_from_json -def _row_from_json(row, schema): +class Row(object): + """A BigQuery row. + + Values can be accessed by position (index), by key like a dict, + or as properties. + + :type values: tuple + :param values: the row values + + :type field_to_index: dict + :param field_to_index: a mapping from schema field names to indexes + """ + + # Choose unusual field names to try to avoid conflict with schema fields. + __slots__ = ('_xxx_values', '_xxx_field_to_index') + + def __init__(self, values, field_to_index): + self._xxx_values = values + self._xxx_field_to_index = field_to_index + + def values(self): + return self._xxx_values + + def __getattr__(self, name): + i = self._xxx_field_to_index.get(name) + if i is None: + raise AttributeError('no row field "%s"' % name) + return self._xxx_values[i] + + def __len__(self): + return len(self._xxx_values) + + def __getitem__(self, key): + if isinstance(key, six.string_types): + i = self._xxx_field_to_index.get(key) + if i is None: + raise KeyError('no row field "%s"' % key) + key = i + return self._xxx_values[key] + + def __eq__(self, other): + if not isinstance(other, Row): + return NotImplemented + return( + self._xxx_values == other._xxx_values and + self._xxx_field_to_index == other._xxx_field_to_index) + + def __ne__(self, other): + return not self == other + + def __repr__(self): + # sort field dict by value, for determinism + items = sorted(self._xxx_field_to_index.items(), + key=operator.itemgetter(1)) + f2i = '{' + ', '.join('%r: %d' % i for i in items) + '}' + return 'Row({}, {})'.format(self._xxx_values, f2i) + + +def _field_to_index_mapping(schema): + """Create a mapping from schema field name to index of field.""" + return {f.name: i for i, f in enumerate(schema)} + + +def _row_tuple_from_json(row, schema): """Convert JSON row data to row with appropriate types. Note: ``row['f']`` and ``schema`` are presumed to be of the same length. @@ -203,9 +269,11 @@ def _row_from_json(row, schema): return tuple(row_data) -def _rows_from_json(rows, schema): +def _rows_from_json(values, schema): """Convert JSON row data to rows with appropriate types.""" - return [_row_from_json(row, schema) for row in rows] + field_to_index = _field_to_index_mapping(schema) + return [Row(_row_tuple_from_json(r, schema), field_to_index) + for r in values] def _int_to_json(value): @@ -935,10 +1003,11 @@ def _item_to_row(iterator, resource): :type resource: dict :param resource: An item to be converted to a row. - :rtype: tuple + :rtype: :class:`Row` :returns: The next row in the page. """ - return _row_from_json(resource, iterator.schema) + return Row(_row_tuple_from_json(resource, iterator.schema), + iterator._field_to_index) # pylint: disable=unused-argument diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py index 1825190f1043..a493e3dcd426 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py @@ -41,6 +41,7 @@ from google.cloud.bigquery.query import QueryResults from google.cloud.bigquery._helpers import _item_to_row from google.cloud.bigquery._helpers import _rows_page_start +from google.cloud.bigquery._helpers import _field_to_index_mapping _DEFAULT_CHUNKSIZE = 1048576 # 1024 * 1024 B = 1 MB @@ -942,6 +943,7 @@ def list_rows(self, table, selected_fields=None, max_results=None, page_start=_rows_page_start, extra_params=params) iterator.schema = schema + iterator._field_to_index = _field_to_index_mapping(schema) return iterator def list_partitions(self, table): diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/query.py b/packages/google-cloud-bigquery/google/cloud/bigquery/query.py index 5d6beca77ea6..5524ac1670b6 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/query.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/query.py @@ -16,8 +16,8 @@ import copy -from google.cloud.bigquery._helpers import _rows_from_json from google.cloud.bigquery.table import _parse_schema_resource +from google.cloud.bigquery._helpers import _rows_from_json class QueryResults(object): @@ -156,7 +156,7 @@ def rows(self): See https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs/query#rows - :rtype: list of tuples of row values, or ``NoneType`` + :rtype: list of :class:`~google.cloud.bigquery.Row` :returns: fields describing the schema (None until set by the server). """ return _rows_from_json(self._properties.get('rows', ()), self.schema) diff --git a/packages/google-cloud-bigquery/tests/system.py b/packages/google-cloud-bigquery/tests/system.py index 9ef835a9bb2a..8a78701a3808 100644 --- a/packages/google-cloud-bigquery/tests/system.py +++ b/packages/google-cloud-bigquery/tests/system.py @@ -353,9 +353,9 @@ def test_insert_data_then_dump_table(self): # 8 tries -> 1 + 2 + 4 + 8 + 16 + 32 + 64 = 127 seconds retry = RetryResult(_has_rows, max_tries=8) rows = retry(self._fetch_single_page)(table) - + row_tuples = [r.values() for r in rows] by_age = operator.itemgetter(1) - self.assertEqual(sorted(rows, key=by_age), + self.assertEqual(sorted(row_tuples, key=by_age), sorted(ROWS, key=by_age)) def test_load_table_from_local_file_then_dump_table(self): @@ -401,8 +401,9 @@ def test_load_table_from_local_file_then_dump_table(self): self.assertEqual(job.output_rows, len(ROWS)) rows = self._fetch_single_page(table) + row_tuples = [r.values() for r in rows] by_age = operator.itemgetter(1) - self.assertEqual(sorted(rows, key=by_age), + self.assertEqual(sorted(row_tuples, key=by_age), sorted(ROWS, key=by_age)) def test_load_table_from_local_avro_file_then_dump_table(self): @@ -434,8 +435,9 @@ def test_load_table_from_local_avro_file_then_dump_table(self): table = Config.CLIENT.get_table(table) rows = self._fetch_single_page(table) + row_tuples = [r.values() for r in rows] by_wavelength = operator.itemgetter(1) - self.assertEqual(sorted(rows, key=by_wavelength), + self.assertEqual(sorted(row_tuples, key=by_wavelength), sorted(ROWS, key=by_wavelength)) def test_load_table_from_storage_then_dump_table(self): @@ -499,8 +501,9 @@ def test_load_table_from_storage_then_dump_table(self): retry(job.reload)() rows = self._fetch_single_page(table) + row_tuples = [r.values() for r in rows] by_age = operator.itemgetter(1) - self.assertEqual(sorted(rows, key=by_age), + self.assertEqual(sorted(row_tuples, key=by_age), sorted(ROWS, key=by_age)) def test_load_table_from_storage_w_autodetect_schema(self): @@ -562,9 +565,10 @@ def test_load_table_from_storage_w_autodetect_schema(self): self.assertEqual(table.schema, [field_name, field_age]) actual_rows = self._fetch_single_page(table) + actual_row_tuples = [r.values() for r in actual_rows] by_age = operator.itemgetter(1) self.assertEqual( - sorted(actual_rows, key=by_age), sorted(rows, key=by_age)) + sorted(actual_row_tuples, key=by_age), sorted(rows, key=by_age)) def _load_table_for_extract_table( self, storage_client, rows, bucket_name, blob_name, table): @@ -884,7 +888,8 @@ def test_dbapi_fetchall(self): self.assertEqual(Config.CURSOR.rowcount, 3, "expected 3 rows") Config.CURSOR.arraysize = arraysize rows = Config.CURSOR.fetchall() - self.assertEqual(rows, [(1, 2), (3, 4), (5, 6)]) + row_tuples = [r.values() for r in rows] + self.assertEqual(row_tuples, [(1, 2), (3, 4), (5, 6)]) def _load_table_for_dml(self, rows, dataset_id, table_id): from google.cloud._testing import _NamedTemporaryFile @@ -1270,8 +1275,8 @@ def test_large_query_w_public_data(self): def test_query_future(self): query_job = Config.CLIENT.query('SELECT 1') iterator = query_job.result(timeout=JOB_TIMEOUT) - rows = list(iterator) - self.assertEqual(rows, [(1,)]) + row_tuples = [r.values() for r in iterator] + self.assertEqual(row_tuples, [(1,)]) def test_insert_nested_nested(self): # See #2951 @@ -1305,8 +1310,8 @@ def test_insert_nested_nested(self): retry = RetryResult(_has_rows, max_tries=8) rows = retry(self._fetch_single_page)(table) - - self.assertEqual(rows, to_insert) + row_tuples = [r.values() for r in rows] + self.assertEqual(row_tuples, to_insert) def test_create_table_insert_fetch_nested_schema(self): table_name = 'test_table' @@ -1334,9 +1339,11 @@ def test_create_table_insert_fetch_nested_schema(self): retry = RetryResult(_has_rows, max_tries=8) fetched = retry(self._fetch_single_page)(table) + fetched_tuples = [f.values() for f in fetched] + self.assertEqual(len(fetched), len(to_insert)) - for found, expected in zip(sorted(fetched), sorted(to_insert)): + for found, expected in zip(sorted(fetched_tuples), sorted(to_insert)): self.assertEqual(found[0], expected[0]) # Name self.assertEqual(found[1], int(expected[1])) # Age self.assertEqual(found[2], expected[2]) # Weight diff --git a/packages/google-cloud-bigquery/tests/unit/test__helpers.py b/packages/google-cloud-bigquery/tests/unit/test__helpers.py index 2254f6b01d89..2cf3f0f1f8e4 100644 --- a/packages/google-cloud-bigquery/tests/unit/test__helpers.py +++ b/packages/google-cloud-bigquery/tests/unit/test__helpers.py @@ -377,12 +377,12 @@ def test_w_record_subfield(self): self.assertEqual(coerced, expected) -class Test_row_from_json(unittest.TestCase): +class Test_row_tuple_from_json(unittest.TestCase): def _call_fut(self, row, schema): - from google.cloud.bigquery._helpers import _row_from_json + from google.cloud.bigquery._helpers import _row_tuple_from_json - return _row_from_json(row, schema) + return _row_tuple_from_json(row, schema) def test_w_single_scalar_column(self): # SELECT 1 AS col @@ -468,15 +468,36 @@ def test_w_array_of_struct_w_array(self): {u'first': [5, 6], u'second': 7}, ],)) + def test_row(self): + from google.cloud.bigquery._helpers import Row + + VALUES = (1, 2, 3) + r = Row(VALUES, {'a': 0, 'b': 1, 'c': 2}) + self.assertEqual(r.a, 1) + self.assertEqual(r[1], 2) + self.assertEqual(r['c'], 3) + self.assertEqual(len(r), 3) + self.assertEqual(r.values(), VALUES) + self.assertEqual(repr(r), + "Row((1, 2, 3), {'a': 0, 'b': 1, 'c': 2})") + self.assertFalse(r != r) + self.assertFalse(r == 3) + with self.assertRaises(AttributeError): + r.z + with self.assertRaises(KeyError): + r['z'] + class Test_rows_from_json(unittest.TestCase): - def _call_fut(self, value, field): + def _call_fut(self, rows, schema): from google.cloud.bigquery._helpers import _rows_from_json - return _rows_from_json(value, field) + return _rows_from_json(rows, schema) def test_w_record_subfield(self): + from google.cloud.bigquery._helpers import Row + full_name = _Field('REQUIRED', 'full_name', 'STRING') area_code = _Field('REQUIRED', 'area_code', 'STRING') local_number = _Field('REQUIRED', 'local_number', 'STRING') @@ -512,15 +533,18 @@ def test_w_record_subfield(self): 'local_number': '768-5309', 'rank': 2, } + f2i = {'full_name': 0, 'phone': 1, 'color': 2} expected = [ - ('Phred Phlyntstone', phred_phone, ['orange', 'black']), - ('Bharney Rhubble', bharney_phone, ['brown']), - ('Wylma Phlyntstone', None, []), + Row(('Phred Phlyntstone', phred_phone, ['orange', 'black']), f2i), + Row(('Bharney Rhubble', bharney_phone, ['brown']), f2i), + Row(('Wylma Phlyntstone', None, []), f2i), ] coerced = self._call_fut(rows, schema) self.assertEqual(coerced, expected) def test_w_int64_float64_bool(self): + from google.cloud.bigquery._helpers import Row + # "Standard" SQL dialect uses 'INT64', 'FLOAT64', 'BOOL'. candidate = _Field('REQUIRED', 'candidate', 'STRING') votes = _Field('REQUIRED', 'votes', 'INT64') @@ -547,10 +571,11 @@ def test_w_int64_float64_bool(self): {'v': 'false'}, ]}, ] + f2i = {'candidate': 0, 'votes': 1, 'percentage': 2, 'incumbent': 3} expected = [ - ('Phred Phlyntstone', 8, 0.25, True), - ('Bharney Rhubble', 4, 0.125, False), - ('Wylma Phlyntstone', 20, 0.625, False), + Row(('Phred Phlyntstone', 8, 0.25, True), f2i), + Row(('Bharney Rhubble', 4, 0.125, False), f2i), + Row(('Wylma Phlyntstone', 20, 0.625, False), f2i), ] coerced = self._call_fut(rows, schema) self.assertEqual(coerced, expected) diff --git a/packages/google-cloud-bigquery/tests/unit/test_client.py b/packages/google-cloud-bigquery/tests/unit/test_client.py index 54b4ac460d84..d34c192c3fac 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_client.py +++ b/packages/google-cloud-bigquery/tests/unit/test_client.py @@ -1919,6 +1919,7 @@ def test_query_w_query_parameters(self): def test_query_rows_defaults(self): from google.api.core.page_iterator import HTTPIterator + from google.cloud.bigquery._helpers import Row JOB = 'job-id' PROJECT = 'PROJECT' @@ -1972,7 +1973,7 @@ def test_query_rows_defaults(self): rows_iter = client.query_rows(QUERY) rows = list(rows_iter) - self.assertEqual(rows, [(1,), (2,), (3,)]) + self.assertEqual(rows, [Row((i,), {'field0': 0}) for i in (1, 2, 3)]) self.assertIs(rows_iter.client, client) self.assertIsInstance(rows_iter, HTTPIterator) self.assertEqual(len(conn._requested), 4) @@ -2099,6 +2100,7 @@ def test_list_rows(self): from google.cloud._helpers import UTC from google.cloud.bigquery.dataset import DatasetReference from google.cloud.bigquery.table import Table, SchemaField + from google.cloud.bigquery._helpers import Row PROJECT = 'PROJECT' DS_ID = 'DS_ID' @@ -2159,11 +2161,12 @@ def _bigquery_timestamp_float_repr(ts_float): total_rows = iterator.total_rows page_token = iterator.next_page_token + f2i = {'full_name': 0, 'age': 1, 'joined': 2} self.assertEqual(len(rows), 4) - self.assertEqual(rows[0], ('Phred Phlyntstone', 32, WHEN)) - self.assertEqual(rows[1], ('Bharney Rhubble', 33, WHEN_1)) - self.assertEqual(rows[2], ('Wylma Phlyntstone', 29, WHEN_2)) - self.assertEqual(rows[3], ('Bhettye Rhubble', None, None)) + self.assertEqual(rows[0], Row(('Phred Phlyntstone', 32, WHEN), f2i)) + self.assertEqual(rows[1], Row(('Bharney Rhubble', 33, WHEN_1), f2i)) + self.assertEqual(rows[2], Row(('Wylma Phlyntstone', 29, WHEN_2), f2i)) + self.assertEqual(rows[3], Row(('Bhettye Rhubble', None, None), f2i)) self.assertEqual(total_rows, ROWS) self.assertEqual(page_token, TOKEN) @@ -2359,6 +2362,11 @@ def test_list_partitions(self): 'configuration': { 'query': { 'query': 'q', + 'destinationTable': { + 'projectId': PROJECT, + 'datasetId': 'DS_ID', + 'tableId': 'TABLE_ID', + }, }, }, 'status': { From f29832cbd32811e2bd126ef8d3813a870c2ab337 Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Wed, 11 Oct 2017 12:19:17 -0700 Subject: [PATCH 0231/2016] BQ: move *QueryParameter and UDFResource classes to query. (#4156) These classes are expected to be constructed by users, so moving them to a public module. --- .../google/cloud/bigquery/__init__.py | 8 +- .../google/cloud/bigquery/_helpers.py | 442 ------- .../google/cloud/bigquery/dbapi/_helpers.py | 6 +- .../google/cloud/bigquery/job.py | 26 +- .../google/cloud/bigquery/query.py | 443 +++++++ .../google-cloud-bigquery/tests/system.py | 6 +- .../tests/unit/test__helpers.py | 1082 +---------------- .../tests/unit/test_client.py | 4 +- .../tests/unit/test_job.py | 16 +- .../tests/unit/test_query.py | 1071 ++++++++++++++++ 10 files changed, 1555 insertions(+), 1549 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/__init__.py b/packages/google-cloud-bigquery/google/cloud/bigquery/__init__.py index f075dfab4dcf..2682ca8ddb6d 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/__init__.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/__init__.py @@ -26,10 +26,6 @@ from pkg_resources import get_distribution __version__ = get_distribution('google-cloud-bigquery').version -from google.cloud.bigquery._helpers import ArrayQueryParameter -from google.cloud.bigquery._helpers import ScalarQueryParameter -from google.cloud.bigquery._helpers import StructQueryParameter -from google.cloud.bigquery._helpers import UDFResource from google.cloud.bigquery._helpers import Row from google.cloud.bigquery.client import Client from google.cloud.bigquery.dataset import AccessEntry @@ -39,6 +35,10 @@ from google.cloud.bigquery.job import ExtractJobConfig from google.cloud.bigquery.job import QueryJobConfig from google.cloud.bigquery.job import LoadJobConfig +from google.cloud.bigquery.query import ArrayQueryParameter +from google.cloud.bigquery.query import ScalarQueryParameter +from google.cloud.bigquery.query import StructQueryParameter +from google.cloud.bigquery.query import UDFResource from google.cloud.bigquery.schema import SchemaField from google.cloud.bigquery.table import Table from google.cloud.bigquery.table import TableReference diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py b/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py index 710e096ccc2b..ae7f1186fe40 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py @@ -15,8 +15,6 @@ """Shared helper functions for BigQuery API classes.""" import base64 -from collections import OrderedDict -import copy import datetime import operator @@ -548,446 +546,6 @@ class _EnumProperty(_ConfigurationProperty): """ -class UDFResource(object): - """Describe a single user-defined function (UDF) resource. - - :type udf_type: str - :param udf_type: the type of the resource ('inlineCode' or 'resourceUri') - - :type value: str - :param value: the inline code or resource URI. - - See - https://cloud.google.com/bigquery/user-defined-functions#api - """ - def __init__(self, udf_type, value): - self.udf_type = udf_type - self.value = value - - def __eq__(self, other): - if not isinstance(other, UDFResource): - return NotImplemented - return( - self.udf_type == other.udf_type and - self.value == other.value) - - def __ne__(self, other): - return not self == other - - -class AbstractQueryParameter(object): - """Base class for named / positional query parameters. - """ - @classmethod - def from_api_repr(cls, resource): - """Factory: construct parameter from JSON resource. - - :type resource: dict - :param resource: JSON mapping of parameter - - :rtype: :class:`ScalarQueryParameter` - """ - raise NotImplementedError - - def to_api_repr(self): - """Construct JSON API representation for the parameter. - - :rtype: dict - """ - raise NotImplementedError - - -class ScalarQueryParameter(AbstractQueryParameter): - """Named / positional query parameters for scalar values. - - :type name: str or None - :param name: Parameter name, used via ``@foo`` syntax. If None, the - parameter can only be addressed via position (``?``). - - :type type_: str - :param type_: name of parameter type. One of 'STRING', 'INT64', - 'FLOAT64', 'BOOL', 'TIMESTAMP', 'DATETIME', or 'DATE'. - - :type value: str, int, float, bool, :class:`datetime.datetime`, or - :class:`datetime.date`. - :param value: the scalar parameter value. - """ - def __init__(self, name, type_, value): - self.name = name - self.type_ = type_ - self.value = value - - @classmethod - def positional(cls, type_, value): - """Factory for positional paramater. - - :type type_: str - :param type_: - name of parameter type. One of 'STRING', 'INT64', - 'FLOAT64', 'BOOL', 'TIMESTAMP', 'DATETIME', or 'DATE'. - - :type value: str, int, float, bool, :class:`datetime.datetime`, or - :class:`datetime.date`. - :param value: the scalar parameter value. - - :rtype: :class:`ScalarQueryParameter` - :returns: instance without name - """ - return cls(None, type_, value) - - @classmethod - def from_api_repr(cls, resource): - """Factory: construct parameter from JSON resource. - - :type resource: dict - :param resource: JSON mapping of parameter - - :rtype: :class:`ScalarQueryParameter` - :returns: instance - """ - name = resource.get('name') - type_ = resource['parameterType']['type'] - value = resource['parameterValue']['value'] - converted = _QUERY_PARAMS_FROM_JSON[type_](value, None) - return cls(name, type_, converted) - - def to_api_repr(self): - """Construct JSON API representation for the parameter. - - :rtype: dict - :returns: JSON mapping - """ - value = self.value - converter = _SCALAR_VALUE_TO_JSON_PARAM.get(self.type_) - if converter is not None: - value = converter(value) - resource = { - 'parameterType': { - 'type': self.type_, - }, - 'parameterValue': { - 'value': value, - }, - } - if self.name is not None: - resource['name'] = self.name - return resource - - def _key(self): - """A tuple key that uniquely describes this field. - - Used to compute this instance's hashcode and evaluate equality. - - Returns: - tuple: The contents of this :class:`ScalarQueryParameter`. - """ - return ( - self.name, - self.type_.upper(), - self.value, - ) - - def __eq__(self, other): - if not isinstance(other, ScalarQueryParameter): - return NotImplemented - return self._key() == other._key() - - def __ne__(self, other): - return not self == other - - def __repr__(self): - return 'ScalarQueryParameter{}'.format(self._key()) - - -class ArrayQueryParameter(AbstractQueryParameter): - """Named / positional query parameters for array values. - - :type name: str or None - :param name: Parameter name, used via ``@foo`` syntax. If None, the - parameter can only be addressed via position (``?``). - - :type array_type: str - :param array_type: - name of type of array elements. One of `'STRING'`, `'INT64'`, - `'FLOAT64'`, `'BOOL'`, `'TIMESTAMP'`, or `'DATE'`. - - :type values: list of appropriate scalar type. - :param values: the parameter array values. - """ - def __init__(self, name, array_type, values): - self.name = name - self.array_type = array_type - self.values = values - - @classmethod - def positional(cls, array_type, values): - """Factory for positional parameters. - - :type array_type: str - :param array_type: - name of type of array elements. One of `'STRING'`, `'INT64'`, - `'FLOAT64'`, `'BOOL'`, `'TIMESTAMP'`, or `'DATE'`. - - :type values: list of appropriate scalar type - :param values: the parameter array values. - - :rtype: :class:`ArrayQueryParameter` - :returns: instance without name - """ - return cls(None, array_type, values) - - @classmethod - def _from_api_repr_struct(cls, resource): - name = resource.get('name') - converted = [] - # We need to flatten the array to use the StructQueryParameter - # parse code. - resource_template = { - # The arrayType includes all the types of the fields of the STRUCT - 'parameterType': resource['parameterType']['arrayType'] - } - for array_value in resource['parameterValue']['arrayValues']: - struct_resource = copy.deepcopy(resource_template) - struct_resource['parameterValue'] = array_value - struct_value = StructQueryParameter.from_api_repr(struct_resource) - converted.append(struct_value) - return cls(name, 'STRUCT', converted) - - @classmethod - def _from_api_repr_scalar(cls, resource): - name = resource.get('name') - array_type = resource['parameterType']['arrayType']['type'] - values = [ - value['value'] - for value - in resource['parameterValue']['arrayValues']] - converted = [ - _QUERY_PARAMS_FROM_JSON[array_type](value, None) - for value in values - ] - return cls(name, array_type, converted) - - @classmethod - def from_api_repr(cls, resource): - """Factory: construct parameter from JSON resource. - - :type resource: dict - :param resource: JSON mapping of parameter - - :rtype: :class:`ArrayQueryParameter` - :returns: instance - """ - array_type = resource['parameterType']['arrayType']['type'] - if array_type == 'STRUCT': - return cls._from_api_repr_struct(resource) - return cls._from_api_repr_scalar(resource) - - def to_api_repr(self): - """Construct JSON API representation for the parameter. - - :rtype: dict - :returns: JSON mapping - """ - values = self.values - if self.array_type == 'RECORD' or self.array_type == 'STRUCT': - reprs = [value.to_api_repr() for value in values] - a_type = reprs[0]['parameterType'] - a_values = [repr_['parameterValue'] for repr_ in reprs] - else: - a_type = {'type': self.array_type} - converter = _SCALAR_VALUE_TO_JSON_PARAM.get(self.array_type) - if converter is not None: - values = [converter(value) for value in values] - a_values = [{'value': value} for value in values] - resource = { - 'parameterType': { - 'type': 'ARRAY', - 'arrayType': a_type, - }, - 'parameterValue': { - 'arrayValues': a_values, - }, - } - if self.name is not None: - resource['name'] = self.name - return resource - - def _key(self): - """A tuple key that uniquely describes this field. - - Used to compute this instance's hashcode and evaluate equality. - - Returns: - tuple: The contents of this :class:`ArrayQueryParameter`. - """ - return ( - self.name, - self.array_type.upper(), - self.values, - ) - - def __eq__(self, other): - if not isinstance(other, ArrayQueryParameter): - return NotImplemented - return self._key() == other._key() - - def __ne__(self, other): - return not self == other - - def __repr__(self): - return 'ArrayQueryParameter{}'.format(self._key()) - - -class StructQueryParameter(AbstractQueryParameter): - """Named / positional query parameters for struct values. - - :type name: str or None - :param name: Parameter name, used via ``@foo`` syntax. If None, the - parameter can only be addressed via position (``?``). - - :type sub_params: tuple of :class:`ScalarQueryParameter` - :param sub_params: the sub-parameters for the struct - """ - def __init__(self, name, *sub_params): - self.name = name - types = self.struct_types = OrderedDict() - values = self.struct_values = {} - for sub in sub_params: - if isinstance(sub, self.__class__): - types[sub.name] = 'STRUCT' - values[sub.name] = sub - elif isinstance(sub, ArrayQueryParameter): - types[sub.name] = 'ARRAY' - values[sub.name] = sub - else: - types[sub.name] = sub.type_ - values[sub.name] = sub.value - - @classmethod - def positional(cls, *sub_params): - """Factory for positional parameters. - - :type sub_params: tuple of :class:`ScalarQueryParameter` - :param sub_params: the sub-parameters for the struct - - :rtype: :class:`StructQueryParameter` - :returns: instance without name - """ - return cls(None, *sub_params) - - @classmethod - def from_api_repr(cls, resource): - """Factory: construct parameter from JSON resource. - - :type resource: dict - :param resource: JSON mapping of parameter - - :rtype: :class:`StructQueryParameter` - :returns: instance - """ - name = resource.get('name') - instance = cls(name) - type_resources = {} - types = instance.struct_types - for item in resource['parameterType']['structTypes']: - types[item['name']] = item['type']['type'] - type_resources[item['name']] = item['type'] - struct_values = resource['parameterValue']['structValues'] - for key, value in struct_values.items(): - type_ = types[key] - converted = None - if type_ == 'STRUCT': - struct_resource = { - 'name': key, - 'parameterType': type_resources[key], - 'parameterValue': value, - } - converted = StructQueryParameter.from_api_repr(struct_resource) - elif type_ == 'ARRAY': - struct_resource = { - 'name': key, - 'parameterType': type_resources[key], - 'parameterValue': value, - } - converted = ArrayQueryParameter.from_api_repr(struct_resource) - else: - value = value['value'] - converted = _QUERY_PARAMS_FROM_JSON[type_](value, None) - instance.struct_values[key] = converted - return instance - - def to_api_repr(self): - """Construct JSON API representation for the parameter. - - :rtype: dict - :returns: JSON mapping - """ - s_types = {} - values = {} - for name, value in self.struct_values.items(): - type_ = self.struct_types[name] - if type_ in ('STRUCT', 'ARRAY'): - repr_ = value.to_api_repr() - s_types[name] = {'name': name, 'type': repr_['parameterType']} - values[name] = repr_['parameterValue'] - else: - s_types[name] = {'name': name, 'type': {'type': type_}} - converter = _SCALAR_VALUE_TO_JSON_PARAM.get(type_) - if converter is not None: - value = converter(value) - values[name] = {'value': value} - - resource = { - 'parameterType': { - 'type': 'STRUCT', - 'structTypes': [s_types[key] for key in self.struct_types], - }, - 'parameterValue': { - 'structValues': values, - }, - } - if self.name is not None: - resource['name'] = self.name - return resource - - def _key(self): - """A tuple key that uniquely describes this field. - - Used to compute this instance's hashcode and evaluate equality. - - Returns: - tuple: The contents of this :class:`ArrayQueryParameter`. - """ - return ( - self.name, - self.struct_types, - self.struct_values, - ) - - def __eq__(self, other): - if not isinstance(other, StructQueryParameter): - return NotImplemented - return self._key() == other._key() - - def __ne__(self, other): - return not self == other - - def __repr__(self): - return 'StructQueryParameter{}'.format(self._key()) - - -def _query_param_from_api_repr(resource): - """Helper: construct concrete query parameter from JSON resource.""" - qp_type = resource['parameterType'] - if 'arrayType' in qp_type: - klass = ArrayQueryParameter - elif 'structTypes' in qp_type: - klass = StructQueryParameter - else: - klass = ScalarQueryParameter - return klass.from_api_repr(resource) - - def _item_to_row(iterator, resource): """Convert a JSON row to the native object. diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/dbapi/_helpers.py b/packages/google-cloud-bigquery/google/cloud/bigquery/dbapi/_helpers.py index a9a358cbf0f5..a2cee9c5272b 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/dbapi/_helpers.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/dbapi/_helpers.py @@ -69,7 +69,7 @@ def to_query_parameters_list(parameters): :type parameters: Sequence[Any] :param parameters: Sequence of query parameter values. - :rtype: List[google.cloud.bigquery._helpers.AbstractQueryParameter] + :rtype: List[google.cloud.bigquery.query._AbstractQueryParameter] :returns: A list of query parameters. """ return [scalar_to_query_parameter(value) for value in parameters] @@ -81,7 +81,7 @@ def to_query_parameters_dict(parameters): :type parameters: Mapping[str, Any] :param parameters: Dictionary of query parameter values. - :rtype: List[google.cloud.bigquery._helpers.AbstractQueryParameter] + :rtype: List[google.cloud.bigquery.query._AbstractQueryParameter] :returns: A list of named query parameters. """ return [ @@ -96,7 +96,7 @@ def to_query_parameters(parameters): :type parameters: Mapping[str, Any] or Sequence[Any] :param parameters: A dictionary or sequence of query parameter values. - :rtype: List[google.cloud.bigquery._helpers.AbstractQueryParameter] + :rtype: List[google.cloud.bigquery.query._AbstractQueryParameter] :returns: A list of query parameters. """ if parameters is None: diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/job.py b/packages/google-cloud-bigquery/google/cloud/bigquery/job.py index 137072f04ddb..593b14e41fa1 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/job.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/job.py @@ -25,18 +25,18 @@ from google.cloud.exceptions import NotFound from google.cloud._helpers import _datetime_from_microseconds from google.cloud.bigquery.dataset import DatasetReference +from google.cloud.bigquery.query import _AbstractQueryParameter +from google.cloud.bigquery.query import _query_param_from_api_repr +from google.cloud.bigquery.query import ArrayQueryParameter +from google.cloud.bigquery.query import ScalarQueryParameter +from google.cloud.bigquery.query import StructQueryParameter +from google.cloud.bigquery.query import UDFResource from google.cloud.bigquery.schema import SchemaField from google.cloud.bigquery.table import TableReference from google.cloud.bigquery.table import _build_schema_resource from google.cloud.bigquery.table import _parse_schema_resource -from google.cloud.bigquery._helpers import AbstractQueryParameter -from google.cloud.bigquery._helpers import ArrayQueryParameter -from google.cloud.bigquery._helpers import ScalarQueryParameter -from google.cloud.bigquery._helpers import StructQueryParameter -from google.cloud.bigquery._helpers import UDFResource from google.cloud.bigquery._helpers import _EnumApiResourceProperty from google.cloud.bigquery._helpers import _ListApiResourceProperty -from google.cloud.bigquery._helpers import _query_param_from_api_repr from google.cloud.bigquery._helpers import _TypedApiResourceProperty _DONE_STATE = 'DONE' @@ -1412,10 +1412,12 @@ def from_api_repr(cls, resource): """ query_parameters = _ListApiResourceProperty( - 'query_parameters', _QUERY_PARAMETERS_KEY, AbstractQueryParameter) + 'query_parameters', _QUERY_PARAMETERS_KEY, _AbstractQueryParameter) """ - An list of - :class:`google.cloud.bigquery._helpers.AbstractQueryParameter` + A list of + :class:`google.cloud.bigquery.query.ArrayQueryParameter`, + :class:`google.cloud.bigquery.query.ScalarQueryParameter`, or + :class:`google.cloud.bigquery.query.StructQueryParameter` (empty by default) See: @@ -1425,7 +1427,7 @@ def from_api_repr(cls, resource): udf_resources = _ListApiResourceProperty( 'udf_resources', _UDF_RESOURCES_KEY, UDFResource) """ - A list of :class:`google.cloud.bigquery._helpers.UDFResource` (empty + A list of :class:`google.cloud.bigquery.query.UDFResource` (empty by default) See: @@ -1805,7 +1807,9 @@ def undeclared_query_paramters(self): :rtype: list of - :class:`~google.cloud.bigquery._helpers.AbstractQueryParameter` + :class:`~google.cloud.bigquery.query.ArrayQueryParameter`, + :class:`~google.cloud.bigquery.query.ScalarQueryParameter`, or + :class:`~google.cloud.bigquery.query.StructQueryParameter` :returns: undeclared parameters, or an empty list if the query has not yet completed. """ diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/query.py b/packages/google-cloud-bigquery/google/cloud/bigquery/query.py index 5524ac1670b6..9577fa57cc5d 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/query.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/query.py @@ -14,10 +14,441 @@ """BigQuery query processing.""" +from collections import OrderedDict import copy from google.cloud.bigquery.table import _parse_schema_resource from google.cloud.bigquery._helpers import _rows_from_json +from google.cloud.bigquery._helpers import _QUERY_PARAMS_FROM_JSON +from google.cloud.bigquery._helpers import _SCALAR_VALUE_TO_JSON_PARAM + + +class UDFResource(object): + """Describe a single user-defined function (UDF) resource. + + :type udf_type: str + :param udf_type: the type of the resource ('inlineCode' or 'resourceUri') + + :type value: str + :param value: the inline code or resource URI. + + See + https://cloud.google.com/bigquery/user-defined-functions#api + """ + def __init__(self, udf_type, value): + self.udf_type = udf_type + self.value = value + + def __eq__(self, other): + if not isinstance(other, UDFResource): + return NotImplemented + return( + self.udf_type == other.udf_type and + self.value == other.value) + + def __ne__(self, other): + return not self == other + + +class _AbstractQueryParameter(object): + """Base class for named / positional query parameters. + """ + @classmethod + def from_api_repr(cls, resource): + """Factory: construct parameter from JSON resource. + + :type resource: dict + :param resource: JSON mapping of parameter + + :rtype: :class:`ScalarQueryParameter` + """ + raise NotImplementedError + + def to_api_repr(self): + """Construct JSON API representation for the parameter. + + :rtype: dict + """ + raise NotImplementedError + + +class ScalarQueryParameter(_AbstractQueryParameter): + """Named / positional query parameters for scalar values. + + :type name: str or None + :param name: Parameter name, used via ``@foo`` syntax. If None, the + parameter can only be addressed via position (``?``). + + :type type_: str + :param type_: name of parameter type. One of 'STRING', 'INT64', + 'FLOAT64', 'BOOL', 'TIMESTAMP', 'DATETIME', or 'DATE'. + + :type value: str, int, float, bool, :class:`datetime.datetime`, or + :class:`datetime.date`. + :param value: the scalar parameter value. + """ + def __init__(self, name, type_, value): + self.name = name + self.type_ = type_ + self.value = value + + @classmethod + def positional(cls, type_, value): + """Factory for positional paramater. + + :type type_: str + :param type_: + name of parameter type. One of 'STRING', 'INT64', + 'FLOAT64', 'BOOL', 'TIMESTAMP', 'DATETIME', or 'DATE'. + + :type value: str, int, float, bool, :class:`datetime.datetime`, or + :class:`datetime.date`. + :param value: the scalar parameter value. + + :rtype: :class:`ScalarQueryParameter` + :returns: instance without name + """ + return cls(None, type_, value) + + @classmethod + def from_api_repr(cls, resource): + """Factory: construct parameter from JSON resource. + + :type resource: dict + :param resource: JSON mapping of parameter + + :rtype: :class:`ScalarQueryParameter` + :returns: instance + """ + name = resource.get('name') + type_ = resource['parameterType']['type'] + value = resource['parameterValue']['value'] + converted = _QUERY_PARAMS_FROM_JSON[type_](value, None) + return cls(name, type_, converted) + + def to_api_repr(self): + """Construct JSON API representation for the parameter. + + :rtype: dict + :returns: JSON mapping + """ + value = self.value + converter = _SCALAR_VALUE_TO_JSON_PARAM.get(self.type_) + if converter is not None: + value = converter(value) + resource = { + 'parameterType': { + 'type': self.type_, + }, + 'parameterValue': { + 'value': value, + }, + } + if self.name is not None: + resource['name'] = self.name + return resource + + def _key(self): + """A tuple key that uniquely describes this field. + + Used to compute this instance's hashcode and evaluate equality. + + Returns: + tuple: The contents of this :class:`ScalarQueryParameter`. + """ + return ( + self.name, + self.type_.upper(), + self.value, + ) + + def __eq__(self, other): + if not isinstance(other, ScalarQueryParameter): + return NotImplemented + return self._key() == other._key() + + def __ne__(self, other): + return not self == other + + def __repr__(self): + return 'ScalarQueryParameter{}'.format(self._key()) + + +class ArrayQueryParameter(_AbstractQueryParameter): + """Named / positional query parameters for array values. + + :type name: str or None + :param name: Parameter name, used via ``@foo`` syntax. If None, the + parameter can only be addressed via position (``?``). + + :type array_type: str + :param array_type: + name of type of array elements. One of `'STRING'`, `'INT64'`, + `'FLOAT64'`, `'BOOL'`, `'TIMESTAMP'`, or `'DATE'`. + + :type values: list of appropriate scalar type. + :param values: the parameter array values. + """ + def __init__(self, name, array_type, values): + self.name = name + self.array_type = array_type + self.values = values + + @classmethod + def positional(cls, array_type, values): + """Factory for positional parameters. + + :type array_type: str + :param array_type: + name of type of array elements. One of `'STRING'`, `'INT64'`, + `'FLOAT64'`, `'BOOL'`, `'TIMESTAMP'`, or `'DATE'`. + + :type values: list of appropriate scalar type + :param values: the parameter array values. + + :rtype: :class:`ArrayQueryParameter` + :returns: instance without name + """ + return cls(None, array_type, values) + + @classmethod + def _from_api_repr_struct(cls, resource): + name = resource.get('name') + converted = [] + # We need to flatten the array to use the StructQueryParameter + # parse code. + resource_template = { + # The arrayType includes all the types of the fields of the STRUCT + 'parameterType': resource['parameterType']['arrayType'] + } + for array_value in resource['parameterValue']['arrayValues']: + struct_resource = copy.deepcopy(resource_template) + struct_resource['parameterValue'] = array_value + struct_value = StructQueryParameter.from_api_repr(struct_resource) + converted.append(struct_value) + return cls(name, 'STRUCT', converted) + + @classmethod + def _from_api_repr_scalar(cls, resource): + name = resource.get('name') + array_type = resource['parameterType']['arrayType']['type'] + values = [ + value['value'] + for value + in resource['parameterValue']['arrayValues']] + converted = [ + _QUERY_PARAMS_FROM_JSON[array_type](value, None) + for value in values + ] + return cls(name, array_type, converted) + + @classmethod + def from_api_repr(cls, resource): + """Factory: construct parameter from JSON resource. + + :type resource: dict + :param resource: JSON mapping of parameter + + :rtype: :class:`ArrayQueryParameter` + :returns: instance + """ + array_type = resource['parameterType']['arrayType']['type'] + if array_type == 'STRUCT': + return cls._from_api_repr_struct(resource) + return cls._from_api_repr_scalar(resource) + + def to_api_repr(self): + """Construct JSON API representation for the parameter. + + :rtype: dict + :returns: JSON mapping + """ + values = self.values + if self.array_type == 'RECORD' or self.array_type == 'STRUCT': + reprs = [value.to_api_repr() for value in values] + a_type = reprs[0]['parameterType'] + a_values = [repr_['parameterValue'] for repr_ in reprs] + else: + a_type = {'type': self.array_type} + converter = _SCALAR_VALUE_TO_JSON_PARAM.get(self.array_type) + if converter is not None: + values = [converter(value) for value in values] + a_values = [{'value': value} for value in values] + resource = { + 'parameterType': { + 'type': 'ARRAY', + 'arrayType': a_type, + }, + 'parameterValue': { + 'arrayValues': a_values, + }, + } + if self.name is not None: + resource['name'] = self.name + return resource + + def _key(self): + """A tuple key that uniquely describes this field. + + Used to compute this instance's hashcode and evaluate equality. + + Returns: + tuple: The contents of this :class:`ArrayQueryParameter`. + """ + return ( + self.name, + self.array_type.upper(), + self.values, + ) + + def __eq__(self, other): + if not isinstance(other, ArrayQueryParameter): + return NotImplemented + return self._key() == other._key() + + def __ne__(self, other): + return not self == other + + def __repr__(self): + return 'ArrayQueryParameter{}'.format(self._key()) + + +class StructQueryParameter(_AbstractQueryParameter): + """Named / positional query parameters for struct values. + + :type name: str or None + :param name: Parameter name, used via ``@foo`` syntax. If None, the + parameter can only be addressed via position (``?``). + + :type sub_params: tuple of :class:`ScalarQueryParameter` + :param sub_params: the sub-parameters for the struct + """ + def __init__(self, name, *sub_params): + self.name = name + types = self.struct_types = OrderedDict() + values = self.struct_values = {} + for sub in sub_params: + if isinstance(sub, self.__class__): + types[sub.name] = 'STRUCT' + values[sub.name] = sub + elif isinstance(sub, ArrayQueryParameter): + types[sub.name] = 'ARRAY' + values[sub.name] = sub + else: + types[sub.name] = sub.type_ + values[sub.name] = sub.value + + @classmethod + def positional(cls, *sub_params): + """Factory for positional parameters. + + :type sub_params: tuple of :class:`ScalarQueryParameter` + :param sub_params: the sub-parameters for the struct + + :rtype: :class:`StructQueryParameter` + :returns: instance without name + """ + return cls(None, *sub_params) + + @classmethod + def from_api_repr(cls, resource): + """Factory: construct parameter from JSON resource. + + :type resource: dict + :param resource: JSON mapping of parameter + + :rtype: :class:`StructQueryParameter` + :returns: instance + """ + name = resource.get('name') + instance = cls(name) + type_resources = {} + types = instance.struct_types + for item in resource['parameterType']['structTypes']: + types[item['name']] = item['type']['type'] + type_resources[item['name']] = item['type'] + struct_values = resource['parameterValue']['structValues'] + for key, value in struct_values.items(): + type_ = types[key] + converted = None + if type_ == 'STRUCT': + struct_resource = { + 'name': key, + 'parameterType': type_resources[key], + 'parameterValue': value, + } + converted = StructQueryParameter.from_api_repr(struct_resource) + elif type_ == 'ARRAY': + struct_resource = { + 'name': key, + 'parameterType': type_resources[key], + 'parameterValue': value, + } + converted = ArrayQueryParameter.from_api_repr(struct_resource) + else: + value = value['value'] + converted = _QUERY_PARAMS_FROM_JSON[type_](value, None) + instance.struct_values[key] = converted + return instance + + def to_api_repr(self): + """Construct JSON API representation for the parameter. + + :rtype: dict + :returns: JSON mapping + """ + s_types = {} + values = {} + for name, value in self.struct_values.items(): + type_ = self.struct_types[name] + if type_ in ('STRUCT', 'ARRAY'): + repr_ = value.to_api_repr() + s_types[name] = {'name': name, 'type': repr_['parameterType']} + values[name] = repr_['parameterValue'] + else: + s_types[name] = {'name': name, 'type': {'type': type_}} + converter = _SCALAR_VALUE_TO_JSON_PARAM.get(type_) + if converter is not None: + value = converter(value) + values[name] = {'value': value} + + resource = { + 'parameterType': { + 'type': 'STRUCT', + 'structTypes': [s_types[key] for key in self.struct_types], + }, + 'parameterValue': { + 'structValues': values, + }, + } + if self.name is not None: + resource['name'] = self.name + return resource + + def _key(self): + """A tuple key that uniquely describes this field. + + Used to compute this instance's hashcode and evaluate equality. + + Returns: + tuple: The contents of this :class:`ArrayQueryParameter`. + """ + return ( + self.name, + self.struct_types, + self.struct_values, + ) + + def __eq__(self, other): + if not isinstance(other, StructQueryParameter): + return NotImplemented + return self._key() == other._key() + + def __ne__(self, other): + return not self == other + + def __repr__(self): + return 'StructQueryParameter{}'.format(self._key()) class QueryResults(object): @@ -188,3 +619,15 @@ def _set_properties(self, api_response): self._properties.clear() self._properties.update(copy.deepcopy(api_response)) + + +def _query_param_from_api_repr(resource): + """Helper: construct concrete query parameter from JSON resource.""" + qp_type = resource['parameterType'] + if 'arrayType' in qp_type: + klass = ArrayQueryParameter + elif 'structTypes' in qp_type: + klass = StructQueryParameter + else: + klass = ScalarQueryParameter + return klass.from_api_repr(resource) diff --git a/packages/google-cloud-bigquery/tests/system.py b/packages/google-cloud-bigquery/tests/system.py index 8a78701a3808..7fa3ff758897 100644 --- a/packages/google-cloud-bigquery/tests/system.py +++ b/packages/google-cloud-bigquery/tests/system.py @@ -953,10 +953,10 @@ def test_dbapi_w_dml(self): self.assertIsNone(Config.CURSOR.fetchone()) def test_query_w_query_params(self): - from google.cloud.bigquery._helpers import ArrayQueryParameter - from google.cloud.bigquery._helpers import ScalarQueryParameter - from google.cloud.bigquery._helpers import StructQueryParameter from google.cloud.bigquery.job import QueryJobConfig + from google.cloud.bigquery.query import ArrayQueryParameter + from google.cloud.bigquery.query import ScalarQueryParameter + from google.cloud.bigquery.query import StructQueryParameter question = 'What is the answer to life, the universe, and everything?' question_param = ScalarQueryParameter( name='question', type_='STRING', value=question) diff --git a/packages/google-cloud-bigquery/tests/unit/test__helpers.py b/packages/google-cloud-bigquery/tests/unit/test__helpers.py index 2cf3f0f1f8e4..6d2a43fffb11 100644 --- a/packages/google-cloud-bigquery/tests/unit/test__helpers.py +++ b/packages/google-cloud-bigquery/tests/unit/test__helpers.py @@ -16,8 +16,6 @@ import datetime import unittest -import mock - class Test_not_null(unittest.TestCase): @@ -906,1074 +904,6 @@ def __init__(self): self.assertIsNone(wrapper._configuration._attr) -class Test_AbstractQueryParameter(unittest.TestCase): - - @staticmethod - def _get_target_class(): - from google.cloud.bigquery._helpers import AbstractQueryParameter - - return AbstractQueryParameter - - def _make_one(self, *args, **kw): - return self._get_target_class()(*args, **kw) - - def test_from_api_virtual(self): - klass = self._get_target_class() - with self.assertRaises(NotImplementedError): - klass.from_api_repr({}) - - def test_to_api_virtual(self): - param = self._make_one() - with self.assertRaises(NotImplementedError): - param.to_api_repr() - - -class Test_ScalarQueryParameter(unittest.TestCase): - - @staticmethod - def _get_target_class(): - from google.cloud.bigquery._helpers import ScalarQueryParameter - - return ScalarQueryParameter - - def _make_one(self, *args, **kw): - return self._get_target_class()(*args, **kw) - - def test_ctor(self): - param = self._make_one(name='foo', type_='INT64', value=123) - self.assertEqual(param.name, 'foo') - self.assertEqual(param.type_, 'INT64') - self.assertEqual(param.value, 123) - - def test___eq__(self): - param = self._make_one(name='foo', type_='INT64', value=123) - self.assertEqual(param, param) - self.assertNotEqual(param, object()) - alias = self._make_one(name='bar', type_='INT64', value=123) - self.assertNotEqual(param, alias) - wrong_type = self._make_one(name='foo', type_='FLOAT64', value=123.0) - self.assertNotEqual(param, wrong_type) - wrong_val = self._make_one(name='foo', type_='INT64', value=234) - self.assertNotEqual(param, wrong_val) - - def test_positional(self): - klass = self._get_target_class() - param = klass.positional(type_='INT64', value=123) - self.assertEqual(param.name, None) - self.assertEqual(param.type_, 'INT64') - self.assertEqual(param.value, 123) - - def test_from_api_repr_w_name(self): - RESOURCE = { - 'name': 'foo', - 'parameterType': { - 'type': 'INT64', - }, - 'parameterValue': { - 'value': 123, - }, - } - klass = self._get_target_class() - param = klass.from_api_repr(RESOURCE) - self.assertEqual(param.name, 'foo') - self.assertEqual(param.type_, 'INT64') - self.assertEqual(param.value, 123) - - def test_from_api_repr_wo_name(self): - RESOURCE = { - 'parameterType': { - 'type': 'INT64', - }, - 'parameterValue': { - 'value': '123', - }, - } - klass = self._get_target_class() - param = klass.from_api_repr(RESOURCE) - self.assertEqual(param.name, None) - self.assertEqual(param.type_, 'INT64') - self.assertEqual(param.value, 123) - - def test_to_api_repr_w_name(self): - EXPECTED = { - 'name': 'foo', - 'parameterType': { - 'type': 'INT64', - }, - 'parameterValue': { - 'value': '123', - }, - } - param = self._make_one(name='foo', type_='INT64', value=123) - self.assertEqual(param.to_api_repr(), EXPECTED) - - def test_to_api_repr_wo_name(self): - EXPECTED = { - 'parameterType': { - 'type': 'INT64', - }, - 'parameterValue': { - 'value': '123', - }, - } - klass = self._get_target_class() - param = klass.positional(type_='INT64', value=123) - self.assertEqual(param.to_api_repr(), EXPECTED) - - def test_to_api_repr_w_float(self): - EXPECTED = { - 'parameterType': { - 'type': 'FLOAT64', - }, - 'parameterValue': { - 'value': 12.345, - }, - } - klass = self._get_target_class() - param = klass.positional(type_='FLOAT64', value=12.345) - self.assertEqual(param.to_api_repr(), EXPECTED) - - def test_to_api_repr_w_bool(self): - EXPECTED = { - 'parameterType': { - 'type': 'BOOL', - }, - 'parameterValue': { - 'value': 'false', - }, - } - klass = self._get_target_class() - param = klass.positional(type_='BOOL', value=False) - self.assertEqual(param.to_api_repr(), EXPECTED) - - def test_to_api_repr_w_timestamp_datetime(self): - from google.cloud._helpers import UTC - - STAMP = '2016-12-20 15:58:27.339328+00:00' - when = datetime.datetime(2016, 12, 20, 15, 58, 27, 339328, tzinfo=UTC) - EXPECTED = { - 'parameterType': { - 'type': 'TIMESTAMP', - }, - 'parameterValue': { - 'value': STAMP, - }, - } - klass = self._get_target_class() - param = klass.positional(type_='TIMESTAMP', value=when) - self.assertEqual(param.to_api_repr(), EXPECTED) - - def test_to_api_repr_w_timestamp_micros(self): - from google.cloud._helpers import _microseconds_from_datetime - - now = datetime.datetime.utcnow() - seconds = _microseconds_from_datetime(now) / 1.0e6 - EXPECTED = { - 'parameterType': { - 'type': 'TIMESTAMP', - }, - 'parameterValue': { - 'value': seconds, - }, - } - klass = self._get_target_class() - param = klass.positional(type_='TIMESTAMP', value=seconds) - self.assertEqual(param.to_api_repr(), EXPECTED) - - def test_to_api_repr_w_datetime_datetime(self): - from google.cloud._helpers import _datetime_to_rfc3339 - - now = datetime.datetime.utcnow() - EXPECTED = { - 'parameterType': { - 'type': 'DATETIME', - }, - 'parameterValue': { - 'value': _datetime_to_rfc3339(now)[:-1], # strip trailing 'Z' - }, - } - klass = self._get_target_class() - param = klass.positional(type_='DATETIME', value=now) - self.assertEqual(param.to_api_repr(), EXPECTED) - - def test_to_api_repr_w_datetime_string(self): - from google.cloud._helpers import _datetime_to_rfc3339 - - now = datetime.datetime.utcnow() - now_str = _datetime_to_rfc3339(now) - EXPECTED = { - 'parameterType': { - 'type': 'DATETIME', - }, - 'parameterValue': { - 'value': now_str, - }, - } - klass = self._get_target_class() - param = klass.positional(type_='DATETIME', value=now_str) - self.assertEqual(param.to_api_repr(), EXPECTED) - - def test_to_api_repr_w_date_date(self): - today = datetime.date.today() - EXPECTED = { - 'parameterType': { - 'type': 'DATE', - }, - 'parameterValue': { - 'value': today.isoformat(), - }, - } - klass = self._get_target_class() - param = klass.positional(type_='DATE', value=today) - self.assertEqual(param.to_api_repr(), EXPECTED) - - def test_to_api_repr_w_date_string(self): - today = datetime.date.today() - today_str = today.isoformat(), - EXPECTED = { - 'parameterType': { - 'type': 'DATE', - }, - 'parameterValue': { - 'value': today_str, - }, - } - klass = self._get_target_class() - param = klass.positional(type_='DATE', value=today_str) - self.assertEqual(param.to_api_repr(), EXPECTED) - - def test_to_api_repr_w_unknown_type(self): - EXPECTED = { - 'parameterType': { - 'type': 'UNKNOWN', - }, - 'parameterValue': { - 'value': 'unknown', - }, - } - klass = self._get_target_class() - param = klass.positional(type_='UNKNOWN', value='unknown') - self.assertEqual(param.to_api_repr(), EXPECTED) - - def test___eq___wrong_type(self): - field = self._make_one('test', 'STRING', 'value') - other = object() - self.assertNotEqual(field, other) - self.assertEqual(field, mock.ANY) - - def test___eq___name_mismatch(self): - field = self._make_one('test', 'STRING', 'value') - other = self._make_one('other', 'STRING', 'value') - self.assertNotEqual(field, other) - - def test___eq___field_type_mismatch(self): - field = self._make_one('test', 'STRING', None) - other = self._make_one('test', 'INT64', None) - self.assertNotEqual(field, other) - - def test___eq___value_mismatch(self): - field = self._make_one('test', 'STRING', 'hello') - other = self._make_one('test', 'STRING', 'world') - self.assertNotEqual(field, other) - - def test___eq___hit(self): - field = self._make_one('test', 'STRING', 'gotcha') - other = self._make_one('test', 'STRING', 'gotcha') - self.assertEqual(field, other) - - def test___ne___wrong_type(self): - field = self._make_one('toast', 'INT64', 13) - other = object() - self.assertNotEqual(field, other) - self.assertEqual(field, mock.ANY) - - def test___ne___same_value(self): - field1 = self._make_one('test', 'INT64', 12) - field2 = self._make_one('test', 'INT64', 12) - # unittest ``assertEqual`` uses ``==`` not ``!=``. - comparison_val = (field1 != field2) - self.assertFalse(comparison_val) - - def test___ne___different_values(self): - field1 = self._make_one('test', 'INT64', 11) - field2 = self._make_one('test', 'INT64', 12) - self.assertNotEqual(field1, field2) - - def test___repr__(self): - field1 = self._make_one('field1', 'STRING', 'value') - expected = "ScalarQueryParameter('field1', 'STRING', 'value')" - self.assertEqual(repr(field1), expected) - - -def _make_subparam(name, type_, value): - from google.cloud.bigquery._helpers import ScalarQueryParameter - - return ScalarQueryParameter(name, type_, value) - - -class Test_ArrayQueryParameter(unittest.TestCase): - - @staticmethod - def _get_target_class(): - from google.cloud.bigquery._helpers import ArrayQueryParameter - - return ArrayQueryParameter - - def _make_one(self, *args, **kw): - return self._get_target_class()(*args, **kw) - - def test_ctor(self): - param = self._make_one(name='foo', array_type='INT64', values=[1, 2]) - self.assertEqual(param.name, 'foo') - self.assertEqual(param.array_type, 'INT64') - self.assertEqual(param.values, [1, 2]) - - def test___eq__(self): - param = self._make_one(name='foo', array_type='INT64', values=[123]) - self.assertEqual(param, param) - self.assertNotEqual(param, object()) - alias = self._make_one(name='bar', array_type='INT64', values=[123]) - self.assertNotEqual(param, alias) - wrong_type = self._make_one( - name='foo', array_type='FLOAT64', values=[123.0]) - self.assertNotEqual(param, wrong_type) - wrong_val = self._make_one( - name='foo', array_type='INT64', values=[234]) - self.assertNotEqual(param, wrong_val) - - def test_positional(self): - klass = self._get_target_class() - param = klass.positional(array_type='INT64', values=[1, 2]) - self.assertEqual(param.name, None) - self.assertEqual(param.array_type, 'INT64') - self.assertEqual(param.values, [1, 2]) - - def test_from_api_repr_w_name(self): - RESOURCE = { - 'name': 'foo', - 'parameterType': { - 'type': 'ARRAY', - 'arrayType': { - 'type': 'INT64', - }, - }, - 'parameterValue': { - 'arrayValues': [ - { - 'value': '1', - }, - { - 'value': '2' - }, - ], - }, - } - klass = self._get_target_class() - param = klass.from_api_repr(RESOURCE) - self.assertEqual(param.name, 'foo') - self.assertEqual(param.array_type, 'INT64') - self.assertEqual(param.values, [1, 2]) - - def test_from_api_repr_wo_name(self): - RESOURCE = { - 'parameterType': { - 'type': 'ARRAY', - 'arrayType': { - 'type': 'INT64', - }, - }, - 'parameterValue': { - 'arrayValues': [ - { - 'value': '1', - }, - { - 'value': '2' - }, - ], - }, - } - klass = self._get_target_class() - param = klass.from_api_repr(RESOURCE) - self.assertEqual(param.name, None) - self.assertEqual(param.array_type, 'INT64') - self.assertEqual(param.values, [1, 2]) - - def test_from_api_repr_w_struct_type(self): - from google.cloud.bigquery._helpers import StructQueryParameter - - RESOURCE = { - 'parameterType': { - 'type': 'ARRAY', - 'arrayType': { - 'type': 'STRUCT', - 'structTypes': [ - { - 'name': 'name', - 'type': {'type': 'STRING'}, - }, - { - 'name': 'age', - 'type': {'type': 'INT64'}, - }, - ], - }, - }, - 'parameterValue': { - 'arrayValues': [ - { - 'structValues': { - 'name': {'value': 'Phred Phlyntstone'}, - 'age': {'value': '32'}, - }, - }, - { - 'structValues': { - 'name': { - 'value': 'Bharney Rhubbyl', - }, - 'age': {'value': '31'}, - }, - }, - ], - }, - } - - klass = self._get_target_class() - param = klass.from_api_repr(RESOURCE) - - phred = StructQueryParameter.positional( - _make_subparam('name', 'STRING', 'Phred Phlyntstone'), - _make_subparam('age', 'INT64', 32)) - bharney = StructQueryParameter.positional( - _make_subparam('name', 'STRING', 'Bharney Rhubbyl'), - _make_subparam('age', 'INT64', 31)) - self.assertEqual(param.array_type, 'STRUCT') - self.assertEqual(param.values, [phred, bharney]) - - def test_to_api_repr_w_name(self): - EXPECTED = { - 'name': 'foo', - 'parameterType': { - 'type': 'ARRAY', - 'arrayType': { - 'type': 'INT64', - }, - }, - 'parameterValue': { - 'arrayValues': [ - { - 'value': '1', - }, - { - 'value': '2' - }, - ], - }, - } - param = self._make_one(name='foo', array_type='INT64', values=[1, 2]) - self.assertEqual(param.to_api_repr(), EXPECTED) - - def test_to_api_repr_wo_name(self): - EXPECTED = { - 'parameterType': { - 'type': 'ARRAY', - 'arrayType': { - 'type': 'INT64', - }, - }, - 'parameterValue': { - 'arrayValues': [ - { - 'value': '1', - }, - { - 'value': '2' - }, - ], - }, - } - klass = self._get_target_class() - param = klass.positional(array_type='INT64', values=[1, 2]) - self.assertEqual(param.to_api_repr(), EXPECTED) - - def test_to_api_repr_w_unknown_type(self): - EXPECTED = { - 'parameterType': { - 'type': 'ARRAY', - 'arrayType': { - 'type': 'UNKNOWN', - }, - }, - 'parameterValue': { - 'arrayValues': [ - { - 'value': 'unknown', - } - ], - }, - } - klass = self._get_target_class() - param = klass.positional(array_type='UNKNOWN', values=['unknown']) - self.assertEqual(param.to_api_repr(), EXPECTED) - - def test_to_api_repr_w_record_type(self): - from google.cloud.bigquery._helpers import StructQueryParameter - - EXPECTED = { - 'parameterType': { - 'type': 'ARRAY', - 'arrayType': { - 'type': 'STRUCT', - 'structTypes': [ - {'name': 'foo', 'type': {'type': 'STRING'}}, - {'name': 'bar', 'type': {'type': 'INT64'}}, - ], - }, - }, - 'parameterValue': { - 'arrayValues': [{ - 'structValues': { - 'foo': {'value': 'Foo'}, - 'bar': {'value': '123'}, - } - }] - }, - } - one = _make_subparam('foo', 'STRING', 'Foo') - another = _make_subparam('bar', 'INT64', 123) - struct = StructQueryParameter.positional(one, another) - klass = self._get_target_class() - param = klass.positional(array_type='RECORD', values=[struct]) - self.assertEqual(param.to_api_repr(), EXPECTED) - - def test___eq___wrong_type(self): - field = self._make_one('test', 'STRING', ['value']) - other = object() - self.assertNotEqual(field, other) - self.assertEqual(field, mock.ANY) - - def test___eq___name_mismatch(self): - field = self._make_one('field', 'STRING', ['value']) - other = self._make_one('other', 'STRING', ['value']) - self.assertNotEqual(field, other) - - def test___eq___field_type_mismatch(self): - field = self._make_one('test', 'STRING', []) - other = self._make_one('test', 'INT64', []) - self.assertNotEqual(field, other) - - def test___eq___value_mismatch(self): - field = self._make_one('test', 'STRING', ['hello']) - other = self._make_one('test', 'STRING', ['hello', 'world']) - self.assertNotEqual(field, other) - - def test___eq___hit(self): - field = self._make_one('test', 'STRING', ['gotcha']) - other = self._make_one('test', 'STRING', ['gotcha']) - self.assertEqual(field, other) - - def test___ne___wrong_type(self): - field = self._make_one('toast', 'INT64', [13]) - other = object() - self.assertNotEqual(field, other) - self.assertEqual(field, mock.ANY) - - def test___ne___same_value(self): - field1 = self._make_one('test', 'INT64', [12]) - field2 = self._make_one('test', 'INT64', [12]) - # unittest ``assertEqual`` uses ``==`` not ``!=``. - comparison_val = (field1 != field2) - self.assertFalse(comparison_val) - - def test___ne___different_values(self): - field1 = self._make_one('test', 'INT64', [11]) - field2 = self._make_one('test', 'INT64', [12]) - self.assertNotEqual(field1, field2) - - def test___repr__(self): - field1 = self._make_one('field1', 'STRING', ['value']) - expected = "ArrayQueryParameter('field1', 'STRING', ['value'])" - self.assertEqual(repr(field1), expected) - - -class Test_StructQueryParameter(unittest.TestCase): - - @staticmethod - def _get_target_class(): - from google.cloud.bigquery._helpers import StructQueryParameter - - return StructQueryParameter - - def _make_one(self, *args, **kw): - return self._get_target_class()(*args, **kw) - - def test_ctor(self): - sub_1 = _make_subparam('bar', 'INT64', 123) - sub_2 = _make_subparam('baz', 'STRING', 'abc') - param = self._make_one('foo', sub_1, sub_2) - self.assertEqual(param.name, 'foo') - self.assertEqual(param.struct_types, {'bar': 'INT64', 'baz': 'STRING'}) - self.assertEqual(param.struct_values, {'bar': 123, 'baz': 'abc'}) - - def test___eq__(self): - sub_1 = _make_subparam('bar', 'INT64', 123) - sub_2 = _make_subparam('baz', 'STRING', 'abc') - sub_3 = _make_subparam('baz', 'STRING', 'def') - sub_1_float = _make_subparam('bar', 'FLOAT64', 123.0) - param = self._make_one('foo', sub_1, sub_2) - self.assertEqual(param, param) - self.assertNotEqual(param, object()) - alias = self._make_one('bar', sub_1, sub_2) - self.assertNotEqual(param, alias) - wrong_type = self._make_one('foo', sub_1_float, sub_2) - self.assertNotEqual(param, wrong_type) - wrong_val = self._make_one('foo', sub_2, sub_3) - self.assertNotEqual(param, wrong_val) - - def test_positional(self): - sub_1 = _make_subparam('bar', 'INT64', 123) - sub_2 = _make_subparam('baz', 'STRING', 'abc') - klass = self._get_target_class() - param = klass.positional(sub_1, sub_2) - self.assertEqual(param.name, None) - self.assertEqual(param.struct_types, {'bar': 'INT64', 'baz': 'STRING'}) - self.assertEqual(param.struct_values, {'bar': 123, 'baz': 'abc'}) - - def test_from_api_repr_w_name(self): - RESOURCE = { - 'name': 'foo', - 'parameterType': { - 'type': 'STRUCT', - 'structTypes': [ - {'name': 'bar', 'type': {'type': 'INT64'}}, - {'name': 'baz', 'type': {'type': 'STRING'}}, - ], - }, - 'parameterValue': { - 'structValues': { - 'bar': {'value': 123}, - 'baz': {'value': 'abc'}, - }, - }, - } - klass = self._get_target_class() - param = klass.from_api_repr(RESOURCE) - self.assertEqual(param.name, 'foo') - self.assertEqual(param.struct_types, {'bar': 'INT64', 'baz': 'STRING'}) - self.assertEqual(param.struct_values, {'bar': 123, 'baz': 'abc'}) - - def test_from_api_repr_wo_name(self): - RESOURCE = { - 'parameterType': { - 'type': 'STRUCT', - 'structTypes': [ - {'name': 'bar', 'type': {'type': 'INT64'}}, - {'name': 'baz', 'type': {'type': 'STRING'}}, - ], - }, - 'parameterValue': { - 'structValues': { - 'bar': {'value': 123}, - 'baz': {'value': 'abc'}, - }, - }, - } - klass = self._get_target_class() - param = klass.from_api_repr(RESOURCE) - self.assertEqual(param.name, None) - self.assertEqual(param.struct_types, {'bar': 'INT64', 'baz': 'STRING'}) - self.assertEqual(param.struct_values, {'bar': 123, 'baz': 'abc'}) - - def test_from_api_repr_w_nested_array(self): - from google.cloud.bigquery._helpers import ArrayQueryParameter - - RESOURCE = { - 'name': 'foo', - 'parameterType': { - 'type': 'STRUCT', - 'structTypes': [ - {'name': 'bar', 'type': {'type': 'STRING'}}, - {'name': 'baz', 'type': { - 'type': 'ARRAY', - 'arrayType': {'type': 'INT64'}, - }}, - ], - }, - 'parameterValue': { - 'structValues': { - 'bar': {'value': 'abc'}, - 'baz': {'arrayValues': [ - {'value': '123'}, - {'value': '456'}, - ]}, - }, - }, - } - klass = self._get_target_class() - param = klass.from_api_repr(RESOURCE) - self.assertEqual( - param, - self._make_one( - 'foo', - _make_subparam('bar', 'STRING', 'abc'), - ArrayQueryParameter('baz', 'INT64', [123, 456]))) - - def test_from_api_repr_w_nested_struct(self): - RESOURCE = { - 'name': 'foo', - 'parameterType': { - 'type': 'STRUCT', - 'structTypes': [ - {'name': 'bar', 'type': {'type': 'STRING'}}, - {'name': 'baz', 'type': { - 'type': 'STRUCT', - 'structTypes': [ - {'name': 'qux', 'type': {'type': 'INT64'}}, - {'name': 'spam', 'type': {'type': 'BOOL'}}, - ], - }}, - ], - }, - 'parameterValue': { - 'structValues': { - 'bar': {'value': 'abc'}, - 'baz': {'structValues': { - 'qux': {'value': '123'}, - 'spam': {'value': 'true'}, - }}, - }, - }, - } - - klass = self._get_target_class() - param = klass.from_api_repr(RESOURCE) - - expected = self._make_one( - 'foo', - _make_subparam('bar', 'STRING', 'abc'), - self._make_one( - 'baz', - _make_subparam('qux', 'INT64', 123), - _make_subparam('spam', 'BOOL', True))) - self.assertEqual(param.name, 'foo') - self.assertEqual(param.struct_types, expected.struct_types) - self.assertEqual(param.struct_values, expected.struct_values) - - def test_to_api_repr_w_name(self): - EXPECTED = { - 'name': 'foo', - 'parameterType': { - 'type': 'STRUCT', - 'structTypes': [ - {'name': 'bar', 'type': {'type': 'INT64'}}, - {'name': 'baz', 'type': {'type': 'STRING'}}, - ], - }, - 'parameterValue': { - 'structValues': { - 'bar': {'value': '123'}, - 'baz': {'value': 'abc'}, - }, - }, - } - sub_1 = _make_subparam('bar', 'INT64', 123) - sub_2 = _make_subparam('baz', 'STRING', 'abc') - param = self._make_one('foo', sub_1, sub_2) - self.assertEqual(param.to_api_repr(), EXPECTED) - - def test_to_api_repr_wo_name(self): - EXPECTED = { - 'parameterType': { - 'type': 'STRUCT', - 'structTypes': [ - {'name': 'bar', 'type': {'type': 'INT64'}}, - {'name': 'baz', 'type': {'type': 'STRING'}}, - ], - }, - 'parameterValue': { - 'structValues': { - 'bar': {'value': '123'}, - 'baz': {'value': 'abc'}, - }, - }, - } - sub_1 = _make_subparam('bar', 'INT64', 123) - sub_2 = _make_subparam('baz', 'STRING', 'abc') - klass = self._get_target_class() - param = klass.positional(sub_1, sub_2) - self.assertEqual(param.to_api_repr(), EXPECTED) - - def test_to_api_repr_w_nested_array(self): - from google.cloud.bigquery._helpers import ArrayQueryParameter - - EXPECTED = { - 'name': 'foo', - 'parameterType': { - 'type': 'STRUCT', - 'structTypes': [ - {'name': 'bar', 'type': {'type': 'STRING'}}, - {'name': 'baz', 'type': { - 'type': 'ARRAY', - 'arrayType': {'type': 'INT64'}, - }}, - ], - }, - 'parameterValue': { - 'structValues': { - 'bar': {'value': 'abc'}, - 'baz': {'arrayValues': [ - {'value': '123'}, - {'value': '456'}, - ]}, - }, - }, - } - scalar = _make_subparam('bar', 'STRING', 'abc') - array = ArrayQueryParameter('baz', 'INT64', [123, 456]) - param = self._make_one('foo', scalar, array) - self.assertEqual(param.to_api_repr(), EXPECTED) - - def test_to_api_repr_w_nested_struct(self): - EXPECTED = { - 'name': 'foo', - 'parameterType': { - 'type': 'STRUCT', - 'structTypes': [ - {'name': 'bar', 'type': {'type': 'STRING'}}, - {'name': 'baz', 'type': { - 'type': 'STRUCT', - 'structTypes': [ - {'name': 'qux', 'type': {'type': 'INT64'}}, - {'name': 'spam', 'type': {'type': 'BOOL'}}, - ], - }}, - ], - }, - 'parameterValue': { - 'structValues': { - 'bar': {'value': 'abc'}, - 'baz': {'structValues': { - 'qux': {'value': '123'}, - 'spam': {'value': 'true'}, - }}, - }, - }, - } - scalar_1 = _make_subparam('bar', 'STRING', 'abc') - scalar_2 = _make_subparam('qux', 'INT64', 123) - scalar_3 = _make_subparam('spam', 'BOOL', True) - sub = self._make_one('baz', scalar_2, scalar_3) - param = self._make_one('foo', scalar_1, sub) - self.assertEqual(param.to_api_repr(), EXPECTED) - - def test___eq___wrong_type(self): - field = self._make_one( - 'test', _make_subparam('bar', 'STRING', 'abc')) - other = object() - self.assertNotEqual(field, other) - self.assertEqual(field, mock.ANY) - - def test___eq___name_mismatch(self): - field = self._make_one( - 'test', _make_subparam('bar', 'STRING', 'abc')) - other = self._make_one( - 'other ', _make_subparam('bar', 'STRING', 'abc')) - self.assertNotEqual(field, other) - - def test___eq___field_type_mismatch(self): - field = self._make_one( - 'test', _make_subparam('bar', 'STRING', None)) - other = self._make_one( - 'test', _make_subparam('bar', 'INT64', None)) - self.assertNotEqual(field, other) - - def test___eq___value_mismatch(self): - field = self._make_one( - 'test', _make_subparam('bar', 'STRING', 'hello')) - other = self._make_one( - 'test', _make_subparam('bar', 'STRING', 'world')) - self.assertNotEqual(field, other) - - def test___eq___hit(self): - field = self._make_one( - 'test', _make_subparam('bar', 'STRING', 'gotcha')) - other = self._make_one( - 'test', _make_subparam('bar', 'STRING', 'gotcha')) - self.assertEqual(field, other) - - def test___ne___wrong_type(self): - field = self._make_one( - 'test', _make_subparam('bar', 'STRING', 'hello')) - other = object() - self.assertNotEqual(field, other) - self.assertEqual(field, mock.ANY) - - def test___ne___same_value(self): - field1 = self._make_one( - 'test', _make_subparam('bar', 'STRING', 'hello')) - field2 = self._make_one( - 'test', _make_subparam('bar', 'STRING', 'hello')) - # unittest ``assertEqual`` uses ``==`` not ``!=``. - comparison_val = (field1 != field2) - self.assertFalse(comparison_val) - - def test___ne___different_values(self): - field1 = self._make_one( - 'test', _make_subparam('bar', 'STRING', 'hello')) - field2 = self._make_one( - 'test', _make_subparam('bar', 'STRING', 'world')) - self.assertNotEqual(field1, field2) - - def test___repr__(self): - field1 = self._make_one( - 'test', _make_subparam('field1', 'STRING', 'hello')) - got = repr(field1) - self.assertIn('StructQueryParameter', got) - self.assertIn("'field1', 'STRING'", got) - self.assertIn("'field1': 'hello'", got) - - -class Test__query_param_from_api_repr(unittest.TestCase): - - @staticmethod - def _call_fut(resource): - from google.cloud.bigquery._helpers import _query_param_from_api_repr - - return _query_param_from_api_repr(resource) - - def test_w_scalar(self): - from google.cloud.bigquery._helpers import ScalarQueryParameter - - RESOURCE = { - 'name': 'foo', - 'parameterType': {'type': 'INT64'}, - 'parameterValue': {'value': '123'}, - } - - parameter = self._call_fut(RESOURCE) - - self.assertIsInstance(parameter, ScalarQueryParameter) - self.assertEqual(parameter.name, 'foo') - self.assertEqual(parameter.type_, 'INT64') - self.assertEqual(parameter.value, 123) - - def test_w_scalar_timestamp(self): - from google.cloud.bigquery._helpers import ScalarQueryParameter - from google.cloud._helpers import UTC - - RESOURCE = { - 'name': 'zoned', - 'parameterType': {'type': 'TIMESTAMP'}, - 'parameterValue': {'value': '2012-03-04 05:06:07+00:00'}, - } - - parameter = self._call_fut(RESOURCE) - - self.assertIsInstance(parameter, ScalarQueryParameter) - self.assertEqual(parameter.name, 'zoned') - self.assertEqual(parameter.type_, 'TIMESTAMP') - self.assertEqual( - parameter.value, - datetime.datetime(2012, 3, 4, 5, 6, 7, tzinfo=UTC)) - - def test_w_scalar_timestamp_micros(self): - from google.cloud.bigquery._helpers import ScalarQueryParameter - from google.cloud._helpers import UTC - - RESOURCE = { - 'name': 'zoned', - 'parameterType': {'type': 'TIMESTAMP'}, - 'parameterValue': {'value': '2012-03-04 05:06:07.250000+00:00'}, - } - - parameter = self._call_fut(RESOURCE) - - self.assertIsInstance(parameter, ScalarQueryParameter) - self.assertEqual(parameter.name, 'zoned') - self.assertEqual(parameter.type_, 'TIMESTAMP') - self.assertEqual( - parameter.value, - datetime.datetime(2012, 3, 4, 5, 6, 7, 250000, tzinfo=UTC)) - - def test_w_array(self): - from google.cloud.bigquery._helpers import ArrayQueryParameter - - RESOURCE = { - 'name': 'foo', - 'parameterType': { - 'type': 'ARRAY', - 'arrayType': {'type': 'INT64'}, - }, - 'parameterValue': { - 'arrayValues': [ - {'value': '123'}, - ]}, - } - - parameter = self._call_fut(RESOURCE) - - self.assertIsInstance(parameter, ArrayQueryParameter) - self.assertEqual(parameter.name, 'foo') - self.assertEqual(parameter.array_type, 'INT64') - self.assertEqual(parameter.values, [123]) - - def test_w_struct(self): - from google.cloud.bigquery._helpers import StructQueryParameter - - RESOURCE = { - 'name': 'foo', - 'parameterType': { - 'type': 'STRUCT', - 'structTypes': [ - {'name': 'foo', 'type': {'type': 'STRING'}}, - {'name': 'bar', 'type': {'type': 'INT64'}}, - ], - }, - 'parameterValue': { - 'structValues': { - 'foo': {'value': 'Foo'}, - 'bar': {'value': '123'}, - } - }, - } - - parameter = self._call_fut(RESOURCE) - - self.assertIsInstance(parameter, StructQueryParameter) - self.assertEqual(parameter.name, 'foo') - self.assertEqual( - parameter.struct_types, {'foo': 'STRING', 'bar': 'INT64'}) - self.assertEqual(parameter.struct_values, {'foo': 'Foo', 'bar': 123}) - - -class Test_UDFResource(unittest.TestCase): - - @staticmethod - def _get_target_class(): - from google.cloud.bigquery._helpers import UDFResource - - return UDFResource - - def _make_one(self, *args, **kw): - return self._get_target_class()(*args, **kw) - - def test_ctor(self): - udf = self._make_one('resourceUri', 'gs://some_bucket/some_file') - self.assertEqual(udf.udf_type, 'resourceUri') - self.assertEqual(udf.value, 'gs://some_bucket/some_file') - - def test___eq__(self): - udf = self._make_one('resourceUri', 'gs://some_bucket/some_file') - self.assertEqual(udf, udf) - self.assertNotEqual(udf, object()) - wrong_val = self._make_one( - 'resourceUri', 'gs://some_bucket/other_file') - self.assertNotEqual(udf, wrong_val) - wrong_type = self._make_one('inlineCode', udf.value) - self.assertNotEqual(udf, wrong_type) - - class Test_ListApiResourceProperty(unittest.TestCase): @staticmethod @@ -1986,10 +916,10 @@ def _make_one(self, *args, **kw): return self._get_target_class()(*args, **kw) def _descriptor_and_klass(self): - from google.cloud.bigquery._helpers import AbstractQueryParameter + from google.cloud.bigquery.query import _AbstractQueryParameter descriptor = self._make_one( - 'query_parameters', 'queryParameters', AbstractQueryParameter) + 'query_parameters', 'queryParameters', _AbstractQueryParameter) class _Test(object): def __init__(self): @@ -2009,7 +939,7 @@ def test_instance_getter_empty(self): self.assertEqual(instance.query_parameters, []) def test_instance_getter_w_non_empty_list(self): - from google.cloud.bigquery._helpers import ScalarQueryParameter + from google.cloud.bigquery.query import ScalarQueryParameter query_parameters = [ScalarQueryParameter("foo", 'INT64', 123)] _, klass = self._descriptor_and_klass() @@ -2019,7 +949,7 @@ def test_instance_getter_w_non_empty_list(self): self.assertEqual(instance.query_parameters, query_parameters) def test_instance_setter_w_empty_list(self): - from google.cloud.bigquery._helpers import ScalarQueryParameter + from google.cloud.bigquery.query import ScalarQueryParameter query_parameters = [ScalarQueryParameter("foo", 'INT64', 123)] _, klass = self._descriptor_and_klass() @@ -2031,7 +961,7 @@ def test_instance_setter_w_empty_list(self): self.assertEqual(instance.query_parameters, []) def test_instance_setter_w_none(self): - from google.cloud.bigquery._helpers import ScalarQueryParameter + from google.cloud.bigquery.query import ScalarQueryParameter query_parameters = [ScalarQueryParameter("foo", 'INT64', 123)] _, klass = self._descriptor_and_klass() @@ -2042,7 +972,7 @@ def test_instance_setter_w_none(self): instance.query_parameters = None def test_instance_setter_w_valid_udf(self): - from google.cloud.bigquery._helpers import ScalarQueryParameter + from google.cloud.bigquery.query import ScalarQueryParameter query_parameters = [ScalarQueryParameter("foo", 'INT64', 123)] _, klass = self._descriptor_and_klass() diff --git a/packages/google-cloud-bigquery/tests/unit/test_client.py b/packages/google-cloud-bigquery/tests/unit/test_client.py index d34c192c3fac..9cdf7129c9c3 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_client.py +++ b/packages/google-cloud-bigquery/tests/unit/test_client.py @@ -1799,9 +1799,9 @@ def test_query_defaults(self): self.assertFalse(sent_config['useLegacySql']) def test_query_w_udf_resources(self): - from google.cloud.bigquery._helpers import UDFResource from google.cloud.bigquery.job import QueryJob from google.cloud.bigquery.job import QueryJobConfig + from google.cloud.bigquery.query import UDFResource RESOURCE_URI = 'gs://some-bucket/js/lib.js' PROJECT = 'PROJECT' @@ -1856,9 +1856,9 @@ def test_query_w_udf_resources(self): {'resourceUri': RESOURCE_URI}) def test_query_w_query_parameters(self): - from google.cloud.bigquery._helpers import ScalarQueryParameter from google.cloud.bigquery.job import QueryJob from google.cloud.bigquery.job import QueryJobConfig + from google.cloud.bigquery.query import ScalarQueryParameter PROJECT = 'PROJECT' JOB = 'job_name' diff --git a/packages/google-cloud-bigquery/tests/unit/test_job.py b/packages/google-cloud-bigquery/tests/unit/test_job.py index a9da40ee1d5e..c1c190328968 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_job.py +++ b/packages/google-cloud-bigquery/tests/unit/test_job.py @@ -1756,8 +1756,8 @@ def test_ctor_defaults(self): self.assertIsNone(job.maximum_bytes_billed) def test_ctor_w_udf_resources(self): - from google.cloud.bigquery._helpers import UDFResource from google.cloud.bigquery.job import QueryJobConfig + from google.cloud.bigquery.query import UDFResource RESOURCE_URI = 'gs://some-bucket/js/lib.js' udf_resources = [UDFResource("resourceUri", RESOURCE_URI)] @@ -1769,8 +1769,8 @@ def test_ctor_w_udf_resources(self): self.assertEqual(job.udf_resources, udf_resources) def test_ctor_w_query_parameters(self): - from google.cloud.bigquery._helpers import ScalarQueryParameter from google.cloud.bigquery.job import QueryJobConfig + from google.cloud.bigquery.query import ScalarQueryParameter query_parameters = [ScalarQueryParameter("foo", 'INT64', 123)] client = _make_client(project=self.PROJECT) @@ -2053,9 +2053,9 @@ def test_referenced_tables(self): self.assertEqual(remote.project, 'other-project-123') def test_undeclared_query_paramters(self): - from google.cloud.bigquery._helpers import ArrayQueryParameter - from google.cloud.bigquery._helpers import ScalarQueryParameter - from google.cloud.bigquery._helpers import StructQueryParameter + from google.cloud.bigquery.query import ArrayQueryParameter + from google.cloud.bigquery.query import ScalarQueryParameter + from google.cloud.bigquery.query import StructQueryParameter undeclared = [{ 'name': 'my_scalar', @@ -2351,8 +2351,8 @@ def test_begin_w_alternate_client(self): self.assertEqual(req['data'], SENT) def test_begin_w_udf(self): - from google.cloud.bigquery._helpers import UDFResource from google.cloud.bigquery.job import QueryJobConfig + from google.cloud.bigquery.query import UDFResource RESOURCE_URI = 'gs://some-bucket/js/lib.js' INLINE_UDF_CODE = 'var someCode = "here";' @@ -2406,8 +2406,8 @@ def test_begin_w_udf(self): self.assertEqual(req['data'], SENT) def test_begin_w_named_query_parameter(self): - from google.cloud.bigquery._helpers import ScalarQueryParameter from google.cloud.bigquery.job import QueryJobConfig + from google.cloud.bigquery.query import ScalarQueryParameter query_parameters = [ScalarQueryParameter('foo', 'INT64', 123)] PATH = '/projects/%s/jobs' % (self.PROJECT,) @@ -2462,8 +2462,8 @@ def test_begin_w_named_query_parameter(self): self.assertEqual(req['data'], SENT) def test_begin_w_positional_query_parameter(self): - from google.cloud.bigquery._helpers import ScalarQueryParameter from google.cloud.bigquery.job import QueryJobConfig + from google.cloud.bigquery.query import ScalarQueryParameter query_parameters = [ScalarQueryParameter.positional('INT64', 123)] PATH = '/projects/%s/jobs' % (self.PROJECT,) diff --git a/packages/google-cloud-bigquery/tests/unit/test_query.py b/packages/google-cloud-bigquery/tests/unit/test_query.py index d2322886daa5..e5c78caf3b0b 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_query.py +++ b/packages/google-cloud-bigquery/tests/unit/test_query.py @@ -12,8 +12,966 @@ # See the License for the specific language governing permissions and # limitations under the License. +import datetime import unittest +import mock + + +class Test_UDFResource(unittest.TestCase): + + @staticmethod + def _get_target_class(): + from google.cloud.bigquery.query import UDFResource + + return UDFResource + + def _make_one(self, *args, **kw): + return self._get_target_class()(*args, **kw) + + def test_ctor(self): + udf = self._make_one('resourceUri', 'gs://some_bucket/some_file') + self.assertEqual(udf.udf_type, 'resourceUri') + self.assertEqual(udf.value, 'gs://some_bucket/some_file') + + def test___eq__(self): + udf = self._make_one('resourceUri', 'gs://some_bucket/some_file') + self.assertEqual(udf, udf) + self.assertNotEqual(udf, object()) + wrong_val = self._make_one( + 'resourceUri', 'gs://some_bucket/other_file') + self.assertNotEqual(udf, wrong_val) + wrong_type = self._make_one('inlineCode', udf.value) + self.assertNotEqual(udf, wrong_type) + + +class Test__AbstractQueryParameter(unittest.TestCase): + + @staticmethod + def _get_target_class(): + from google.cloud.bigquery.query import _AbstractQueryParameter + + return _AbstractQueryParameter + + def _make_one(self, *args, **kw): + return self._get_target_class()(*args, **kw) + + def test_from_api_virtual(self): + klass = self._get_target_class() + with self.assertRaises(NotImplementedError): + klass.from_api_repr({}) + + def test_to_api_virtual(self): + param = self._make_one() + with self.assertRaises(NotImplementedError): + param.to_api_repr() + + +class Test_ScalarQueryParameter(unittest.TestCase): + + @staticmethod + def _get_target_class(): + from google.cloud.bigquery.query import ScalarQueryParameter + + return ScalarQueryParameter + + def _make_one(self, *args, **kw): + return self._get_target_class()(*args, **kw) + + def test_ctor(self): + param = self._make_one(name='foo', type_='INT64', value=123) + self.assertEqual(param.name, 'foo') + self.assertEqual(param.type_, 'INT64') + self.assertEqual(param.value, 123) + + def test___eq__(self): + param = self._make_one(name='foo', type_='INT64', value=123) + self.assertEqual(param, param) + self.assertNotEqual(param, object()) + alias = self._make_one(name='bar', type_='INT64', value=123) + self.assertNotEqual(param, alias) + wrong_type = self._make_one(name='foo', type_='FLOAT64', value=123.0) + self.assertNotEqual(param, wrong_type) + wrong_val = self._make_one(name='foo', type_='INT64', value=234) + self.assertNotEqual(param, wrong_val) + + def test_positional(self): + klass = self._get_target_class() + param = klass.positional(type_='INT64', value=123) + self.assertEqual(param.name, None) + self.assertEqual(param.type_, 'INT64') + self.assertEqual(param.value, 123) + + def test_from_api_repr_w_name(self): + RESOURCE = { + 'name': 'foo', + 'parameterType': { + 'type': 'INT64', + }, + 'parameterValue': { + 'value': 123, + }, + } + klass = self._get_target_class() + param = klass.from_api_repr(RESOURCE) + self.assertEqual(param.name, 'foo') + self.assertEqual(param.type_, 'INT64') + self.assertEqual(param.value, 123) + + def test_from_api_repr_wo_name(self): + RESOURCE = { + 'parameterType': { + 'type': 'INT64', + }, + 'parameterValue': { + 'value': '123', + }, + } + klass = self._get_target_class() + param = klass.from_api_repr(RESOURCE) + self.assertEqual(param.name, None) + self.assertEqual(param.type_, 'INT64') + self.assertEqual(param.value, 123) + + def test_to_api_repr_w_name(self): + EXPECTED = { + 'name': 'foo', + 'parameterType': { + 'type': 'INT64', + }, + 'parameterValue': { + 'value': '123', + }, + } + param = self._make_one(name='foo', type_='INT64', value=123) + self.assertEqual(param.to_api_repr(), EXPECTED) + + def test_to_api_repr_wo_name(self): + EXPECTED = { + 'parameterType': { + 'type': 'INT64', + }, + 'parameterValue': { + 'value': '123', + }, + } + klass = self._get_target_class() + param = klass.positional(type_='INT64', value=123) + self.assertEqual(param.to_api_repr(), EXPECTED) + + def test_to_api_repr_w_float(self): + EXPECTED = { + 'parameterType': { + 'type': 'FLOAT64', + }, + 'parameterValue': { + 'value': 12.345, + }, + } + klass = self._get_target_class() + param = klass.positional(type_='FLOAT64', value=12.345) + self.assertEqual(param.to_api_repr(), EXPECTED) + + def test_to_api_repr_w_bool(self): + EXPECTED = { + 'parameterType': { + 'type': 'BOOL', + }, + 'parameterValue': { + 'value': 'false', + }, + } + klass = self._get_target_class() + param = klass.positional(type_='BOOL', value=False) + self.assertEqual(param.to_api_repr(), EXPECTED) + + def test_to_api_repr_w_timestamp_datetime(self): + from google.cloud._helpers import UTC + + STAMP = '2016-12-20 15:58:27.339328+00:00' + when = datetime.datetime(2016, 12, 20, 15, 58, 27, 339328, tzinfo=UTC) + EXPECTED = { + 'parameterType': { + 'type': 'TIMESTAMP', + }, + 'parameterValue': { + 'value': STAMP, + }, + } + klass = self._get_target_class() + param = klass.positional(type_='TIMESTAMP', value=when) + self.assertEqual(param.to_api_repr(), EXPECTED) + + def test_to_api_repr_w_timestamp_micros(self): + from google.cloud._helpers import _microseconds_from_datetime + + now = datetime.datetime.utcnow() + seconds = _microseconds_from_datetime(now) / 1.0e6 + EXPECTED = { + 'parameterType': { + 'type': 'TIMESTAMP', + }, + 'parameterValue': { + 'value': seconds, + }, + } + klass = self._get_target_class() + param = klass.positional(type_='TIMESTAMP', value=seconds) + self.assertEqual(param.to_api_repr(), EXPECTED) + + def test_to_api_repr_w_datetime_datetime(self): + from google.cloud._helpers import _datetime_to_rfc3339 + + now = datetime.datetime.utcnow() + EXPECTED = { + 'parameterType': { + 'type': 'DATETIME', + }, + 'parameterValue': { + 'value': _datetime_to_rfc3339(now)[:-1], # strip trailing 'Z' + }, + } + klass = self._get_target_class() + param = klass.positional(type_='DATETIME', value=now) + self.assertEqual(param.to_api_repr(), EXPECTED) + + def test_to_api_repr_w_datetime_string(self): + from google.cloud._helpers import _datetime_to_rfc3339 + + now = datetime.datetime.utcnow() + now_str = _datetime_to_rfc3339(now) + EXPECTED = { + 'parameterType': { + 'type': 'DATETIME', + }, + 'parameterValue': { + 'value': now_str, + }, + } + klass = self._get_target_class() + param = klass.positional(type_='DATETIME', value=now_str) + self.assertEqual(param.to_api_repr(), EXPECTED) + + def test_to_api_repr_w_date_date(self): + today = datetime.date.today() + EXPECTED = { + 'parameterType': { + 'type': 'DATE', + }, + 'parameterValue': { + 'value': today.isoformat(), + }, + } + klass = self._get_target_class() + param = klass.positional(type_='DATE', value=today) + self.assertEqual(param.to_api_repr(), EXPECTED) + + def test_to_api_repr_w_date_string(self): + today = datetime.date.today() + today_str = today.isoformat(), + EXPECTED = { + 'parameterType': { + 'type': 'DATE', + }, + 'parameterValue': { + 'value': today_str, + }, + } + klass = self._get_target_class() + param = klass.positional(type_='DATE', value=today_str) + self.assertEqual(param.to_api_repr(), EXPECTED) + + def test_to_api_repr_w_unknown_type(self): + EXPECTED = { + 'parameterType': { + 'type': 'UNKNOWN', + }, + 'parameterValue': { + 'value': 'unknown', + }, + } + klass = self._get_target_class() + param = klass.positional(type_='UNKNOWN', value='unknown') + self.assertEqual(param.to_api_repr(), EXPECTED) + + def test___eq___wrong_type(self): + field = self._make_one('test', 'STRING', 'value') + other = object() + self.assertNotEqual(field, other) + self.assertEqual(field, mock.ANY) + + def test___eq___name_mismatch(self): + field = self._make_one('test', 'STRING', 'value') + other = self._make_one('other', 'STRING', 'value') + self.assertNotEqual(field, other) + + def test___eq___field_type_mismatch(self): + field = self._make_one('test', 'STRING', None) + other = self._make_one('test', 'INT64', None) + self.assertNotEqual(field, other) + + def test___eq___value_mismatch(self): + field = self._make_one('test', 'STRING', 'hello') + other = self._make_one('test', 'STRING', 'world') + self.assertNotEqual(field, other) + + def test___eq___hit(self): + field = self._make_one('test', 'STRING', 'gotcha') + other = self._make_one('test', 'STRING', 'gotcha') + self.assertEqual(field, other) + + def test___ne___wrong_type(self): + field = self._make_one('toast', 'INT64', 13) + other = object() + self.assertNotEqual(field, other) + self.assertEqual(field, mock.ANY) + + def test___ne___same_value(self): + field1 = self._make_one('test', 'INT64', 12) + field2 = self._make_one('test', 'INT64', 12) + # unittest ``assertEqual`` uses ``==`` not ``!=``. + comparison_val = (field1 != field2) + self.assertFalse(comparison_val) + + def test___ne___different_values(self): + field1 = self._make_one('test', 'INT64', 11) + field2 = self._make_one('test', 'INT64', 12) + self.assertNotEqual(field1, field2) + + def test___repr__(self): + field1 = self._make_one('field1', 'STRING', 'value') + expected = "ScalarQueryParameter('field1', 'STRING', 'value')" + self.assertEqual(repr(field1), expected) + + +def _make_subparam(name, type_, value): + from google.cloud.bigquery.query import ScalarQueryParameter + + return ScalarQueryParameter(name, type_, value) + + +class Test_ArrayQueryParameter(unittest.TestCase): + + @staticmethod + def _get_target_class(): + from google.cloud.bigquery.query import ArrayQueryParameter + + return ArrayQueryParameter + + def _make_one(self, *args, **kw): + return self._get_target_class()(*args, **kw) + + def test_ctor(self): + param = self._make_one(name='foo', array_type='INT64', values=[1, 2]) + self.assertEqual(param.name, 'foo') + self.assertEqual(param.array_type, 'INT64') + self.assertEqual(param.values, [1, 2]) + + def test___eq__(self): + param = self._make_one(name='foo', array_type='INT64', values=[123]) + self.assertEqual(param, param) + self.assertNotEqual(param, object()) + alias = self._make_one(name='bar', array_type='INT64', values=[123]) + self.assertNotEqual(param, alias) + wrong_type = self._make_one( + name='foo', array_type='FLOAT64', values=[123.0]) + self.assertNotEqual(param, wrong_type) + wrong_val = self._make_one( + name='foo', array_type='INT64', values=[234]) + self.assertNotEqual(param, wrong_val) + + def test_positional(self): + klass = self._get_target_class() + param = klass.positional(array_type='INT64', values=[1, 2]) + self.assertEqual(param.name, None) + self.assertEqual(param.array_type, 'INT64') + self.assertEqual(param.values, [1, 2]) + + def test_from_api_repr_w_name(self): + RESOURCE = { + 'name': 'foo', + 'parameterType': { + 'type': 'ARRAY', + 'arrayType': { + 'type': 'INT64', + }, + }, + 'parameterValue': { + 'arrayValues': [ + { + 'value': '1', + }, + { + 'value': '2' + }, + ], + }, + } + klass = self._get_target_class() + param = klass.from_api_repr(RESOURCE) + self.assertEqual(param.name, 'foo') + self.assertEqual(param.array_type, 'INT64') + self.assertEqual(param.values, [1, 2]) + + def test_from_api_repr_wo_name(self): + RESOURCE = { + 'parameterType': { + 'type': 'ARRAY', + 'arrayType': { + 'type': 'INT64', + }, + }, + 'parameterValue': { + 'arrayValues': [ + { + 'value': '1', + }, + { + 'value': '2' + }, + ], + }, + } + klass = self._get_target_class() + param = klass.from_api_repr(RESOURCE) + self.assertEqual(param.name, None) + self.assertEqual(param.array_type, 'INT64') + self.assertEqual(param.values, [1, 2]) + + def test_from_api_repr_w_struct_type(self): + from google.cloud.bigquery.query import StructQueryParameter + + RESOURCE = { + 'parameterType': { + 'type': 'ARRAY', + 'arrayType': { + 'type': 'STRUCT', + 'structTypes': [ + { + 'name': 'name', + 'type': {'type': 'STRING'}, + }, + { + 'name': 'age', + 'type': {'type': 'INT64'}, + }, + ], + }, + }, + 'parameterValue': { + 'arrayValues': [ + { + 'structValues': { + 'name': {'value': 'Phred Phlyntstone'}, + 'age': {'value': '32'}, + }, + }, + { + 'structValues': { + 'name': { + 'value': 'Bharney Rhubbyl', + }, + 'age': {'value': '31'}, + }, + }, + ], + }, + } + + klass = self._get_target_class() + param = klass.from_api_repr(RESOURCE) + + phred = StructQueryParameter.positional( + _make_subparam('name', 'STRING', 'Phred Phlyntstone'), + _make_subparam('age', 'INT64', 32)) + bharney = StructQueryParameter.positional( + _make_subparam('name', 'STRING', 'Bharney Rhubbyl'), + _make_subparam('age', 'INT64', 31)) + self.assertEqual(param.array_type, 'STRUCT') + self.assertEqual(param.values, [phred, bharney]) + + def test_to_api_repr_w_name(self): + EXPECTED = { + 'name': 'foo', + 'parameterType': { + 'type': 'ARRAY', + 'arrayType': { + 'type': 'INT64', + }, + }, + 'parameterValue': { + 'arrayValues': [ + { + 'value': '1', + }, + { + 'value': '2' + }, + ], + }, + } + param = self._make_one(name='foo', array_type='INT64', values=[1, 2]) + self.assertEqual(param.to_api_repr(), EXPECTED) + + def test_to_api_repr_wo_name(self): + EXPECTED = { + 'parameterType': { + 'type': 'ARRAY', + 'arrayType': { + 'type': 'INT64', + }, + }, + 'parameterValue': { + 'arrayValues': [ + { + 'value': '1', + }, + { + 'value': '2' + }, + ], + }, + } + klass = self._get_target_class() + param = klass.positional(array_type='INT64', values=[1, 2]) + self.assertEqual(param.to_api_repr(), EXPECTED) + + def test_to_api_repr_w_unknown_type(self): + EXPECTED = { + 'parameterType': { + 'type': 'ARRAY', + 'arrayType': { + 'type': 'UNKNOWN', + }, + }, + 'parameterValue': { + 'arrayValues': [ + { + 'value': 'unknown', + } + ], + }, + } + klass = self._get_target_class() + param = klass.positional(array_type='UNKNOWN', values=['unknown']) + self.assertEqual(param.to_api_repr(), EXPECTED) + + def test_to_api_repr_w_record_type(self): + from google.cloud.bigquery.query import StructQueryParameter + + EXPECTED = { + 'parameterType': { + 'type': 'ARRAY', + 'arrayType': { + 'type': 'STRUCT', + 'structTypes': [ + {'name': 'foo', 'type': {'type': 'STRING'}}, + {'name': 'bar', 'type': {'type': 'INT64'}}, + ], + }, + }, + 'parameterValue': { + 'arrayValues': [{ + 'structValues': { + 'foo': {'value': 'Foo'}, + 'bar': {'value': '123'}, + } + }] + }, + } + one = _make_subparam('foo', 'STRING', 'Foo') + another = _make_subparam('bar', 'INT64', 123) + struct = StructQueryParameter.positional(one, another) + klass = self._get_target_class() + param = klass.positional(array_type='RECORD', values=[struct]) + self.assertEqual(param.to_api_repr(), EXPECTED) + + def test___eq___wrong_type(self): + field = self._make_one('test', 'STRING', ['value']) + other = object() + self.assertNotEqual(field, other) + self.assertEqual(field, mock.ANY) + + def test___eq___name_mismatch(self): + field = self._make_one('field', 'STRING', ['value']) + other = self._make_one('other', 'STRING', ['value']) + self.assertNotEqual(field, other) + + def test___eq___field_type_mismatch(self): + field = self._make_one('test', 'STRING', []) + other = self._make_one('test', 'INT64', []) + self.assertNotEqual(field, other) + + def test___eq___value_mismatch(self): + field = self._make_one('test', 'STRING', ['hello']) + other = self._make_one('test', 'STRING', ['hello', 'world']) + self.assertNotEqual(field, other) + + def test___eq___hit(self): + field = self._make_one('test', 'STRING', ['gotcha']) + other = self._make_one('test', 'STRING', ['gotcha']) + self.assertEqual(field, other) + + def test___ne___wrong_type(self): + field = self._make_one('toast', 'INT64', [13]) + other = object() + self.assertNotEqual(field, other) + self.assertEqual(field, mock.ANY) + + def test___ne___same_value(self): + field1 = self._make_one('test', 'INT64', [12]) + field2 = self._make_one('test', 'INT64', [12]) + # unittest ``assertEqual`` uses ``==`` not ``!=``. + comparison_val = (field1 != field2) + self.assertFalse(comparison_val) + + def test___ne___different_values(self): + field1 = self._make_one('test', 'INT64', [11]) + field2 = self._make_one('test', 'INT64', [12]) + self.assertNotEqual(field1, field2) + + def test___repr__(self): + field1 = self._make_one('field1', 'STRING', ['value']) + expected = "ArrayQueryParameter('field1', 'STRING', ['value'])" + self.assertEqual(repr(field1), expected) + + +class Test_StructQueryParameter(unittest.TestCase): + + @staticmethod + def _get_target_class(): + from google.cloud.bigquery.query import StructQueryParameter + + return StructQueryParameter + + def _make_one(self, *args, **kw): + return self._get_target_class()(*args, **kw) + + def test_ctor(self): + sub_1 = _make_subparam('bar', 'INT64', 123) + sub_2 = _make_subparam('baz', 'STRING', 'abc') + param = self._make_one('foo', sub_1, sub_2) + self.assertEqual(param.name, 'foo') + self.assertEqual(param.struct_types, {'bar': 'INT64', 'baz': 'STRING'}) + self.assertEqual(param.struct_values, {'bar': 123, 'baz': 'abc'}) + + def test___eq__(self): + sub_1 = _make_subparam('bar', 'INT64', 123) + sub_2 = _make_subparam('baz', 'STRING', 'abc') + sub_3 = _make_subparam('baz', 'STRING', 'def') + sub_1_float = _make_subparam('bar', 'FLOAT64', 123.0) + param = self._make_one('foo', sub_1, sub_2) + self.assertEqual(param, param) + self.assertNotEqual(param, object()) + alias = self._make_one('bar', sub_1, sub_2) + self.assertNotEqual(param, alias) + wrong_type = self._make_one('foo', sub_1_float, sub_2) + self.assertNotEqual(param, wrong_type) + wrong_val = self._make_one('foo', sub_2, sub_3) + self.assertNotEqual(param, wrong_val) + + def test_positional(self): + sub_1 = _make_subparam('bar', 'INT64', 123) + sub_2 = _make_subparam('baz', 'STRING', 'abc') + klass = self._get_target_class() + param = klass.positional(sub_1, sub_2) + self.assertEqual(param.name, None) + self.assertEqual(param.struct_types, {'bar': 'INT64', 'baz': 'STRING'}) + self.assertEqual(param.struct_values, {'bar': 123, 'baz': 'abc'}) + + def test_from_api_repr_w_name(self): + RESOURCE = { + 'name': 'foo', + 'parameterType': { + 'type': 'STRUCT', + 'structTypes': [ + {'name': 'bar', 'type': {'type': 'INT64'}}, + {'name': 'baz', 'type': {'type': 'STRING'}}, + ], + }, + 'parameterValue': { + 'structValues': { + 'bar': {'value': 123}, + 'baz': {'value': 'abc'}, + }, + }, + } + klass = self._get_target_class() + param = klass.from_api_repr(RESOURCE) + self.assertEqual(param.name, 'foo') + self.assertEqual(param.struct_types, {'bar': 'INT64', 'baz': 'STRING'}) + self.assertEqual(param.struct_values, {'bar': 123, 'baz': 'abc'}) + + def test_from_api_repr_wo_name(self): + RESOURCE = { + 'parameterType': { + 'type': 'STRUCT', + 'structTypes': [ + {'name': 'bar', 'type': {'type': 'INT64'}}, + {'name': 'baz', 'type': {'type': 'STRING'}}, + ], + }, + 'parameterValue': { + 'structValues': { + 'bar': {'value': 123}, + 'baz': {'value': 'abc'}, + }, + }, + } + klass = self._get_target_class() + param = klass.from_api_repr(RESOURCE) + self.assertEqual(param.name, None) + self.assertEqual(param.struct_types, {'bar': 'INT64', 'baz': 'STRING'}) + self.assertEqual(param.struct_values, {'bar': 123, 'baz': 'abc'}) + + def test_from_api_repr_w_nested_array(self): + from google.cloud.bigquery.query import ArrayQueryParameter + + RESOURCE = { + 'name': 'foo', + 'parameterType': { + 'type': 'STRUCT', + 'structTypes': [ + {'name': 'bar', 'type': {'type': 'STRING'}}, + {'name': 'baz', 'type': { + 'type': 'ARRAY', + 'arrayType': {'type': 'INT64'}, + }}, + ], + }, + 'parameterValue': { + 'structValues': { + 'bar': {'value': 'abc'}, + 'baz': {'arrayValues': [ + {'value': '123'}, + {'value': '456'}, + ]}, + }, + }, + } + klass = self._get_target_class() + param = klass.from_api_repr(RESOURCE) + self.assertEqual( + param, + self._make_one( + 'foo', + _make_subparam('bar', 'STRING', 'abc'), + ArrayQueryParameter('baz', 'INT64', [123, 456]))) + + def test_from_api_repr_w_nested_struct(self): + RESOURCE = { + 'name': 'foo', + 'parameterType': { + 'type': 'STRUCT', + 'structTypes': [ + {'name': 'bar', 'type': {'type': 'STRING'}}, + {'name': 'baz', 'type': { + 'type': 'STRUCT', + 'structTypes': [ + {'name': 'qux', 'type': {'type': 'INT64'}}, + {'name': 'spam', 'type': {'type': 'BOOL'}}, + ], + }}, + ], + }, + 'parameterValue': { + 'structValues': { + 'bar': {'value': 'abc'}, + 'baz': {'structValues': { + 'qux': {'value': '123'}, + 'spam': {'value': 'true'}, + }}, + }, + }, + } + + klass = self._get_target_class() + param = klass.from_api_repr(RESOURCE) + + expected = self._make_one( + 'foo', + _make_subparam('bar', 'STRING', 'abc'), + self._make_one( + 'baz', + _make_subparam('qux', 'INT64', 123), + _make_subparam('spam', 'BOOL', True))) + self.assertEqual(param.name, 'foo') + self.assertEqual(param.struct_types, expected.struct_types) + self.assertEqual(param.struct_values, expected.struct_values) + + def test_to_api_repr_w_name(self): + EXPECTED = { + 'name': 'foo', + 'parameterType': { + 'type': 'STRUCT', + 'structTypes': [ + {'name': 'bar', 'type': {'type': 'INT64'}}, + {'name': 'baz', 'type': {'type': 'STRING'}}, + ], + }, + 'parameterValue': { + 'structValues': { + 'bar': {'value': '123'}, + 'baz': {'value': 'abc'}, + }, + }, + } + sub_1 = _make_subparam('bar', 'INT64', 123) + sub_2 = _make_subparam('baz', 'STRING', 'abc') + param = self._make_one('foo', sub_1, sub_2) + self.assertEqual(param.to_api_repr(), EXPECTED) + + def test_to_api_repr_wo_name(self): + EXPECTED = { + 'parameterType': { + 'type': 'STRUCT', + 'structTypes': [ + {'name': 'bar', 'type': {'type': 'INT64'}}, + {'name': 'baz', 'type': {'type': 'STRING'}}, + ], + }, + 'parameterValue': { + 'structValues': { + 'bar': {'value': '123'}, + 'baz': {'value': 'abc'}, + }, + }, + } + sub_1 = _make_subparam('bar', 'INT64', 123) + sub_2 = _make_subparam('baz', 'STRING', 'abc') + klass = self._get_target_class() + param = klass.positional(sub_1, sub_2) + self.assertEqual(param.to_api_repr(), EXPECTED) + + def test_to_api_repr_w_nested_array(self): + from google.cloud.bigquery.query import ArrayQueryParameter + + EXPECTED = { + 'name': 'foo', + 'parameterType': { + 'type': 'STRUCT', + 'structTypes': [ + {'name': 'bar', 'type': {'type': 'STRING'}}, + {'name': 'baz', 'type': { + 'type': 'ARRAY', + 'arrayType': {'type': 'INT64'}, + }}, + ], + }, + 'parameterValue': { + 'structValues': { + 'bar': {'value': 'abc'}, + 'baz': {'arrayValues': [ + {'value': '123'}, + {'value': '456'}, + ]}, + }, + }, + } + scalar = _make_subparam('bar', 'STRING', 'abc') + array = ArrayQueryParameter('baz', 'INT64', [123, 456]) + param = self._make_one('foo', scalar, array) + self.assertEqual(param.to_api_repr(), EXPECTED) + + def test_to_api_repr_w_nested_struct(self): + EXPECTED = { + 'name': 'foo', + 'parameterType': { + 'type': 'STRUCT', + 'structTypes': [ + {'name': 'bar', 'type': {'type': 'STRING'}}, + {'name': 'baz', 'type': { + 'type': 'STRUCT', + 'structTypes': [ + {'name': 'qux', 'type': {'type': 'INT64'}}, + {'name': 'spam', 'type': {'type': 'BOOL'}}, + ], + }}, + ], + }, + 'parameterValue': { + 'structValues': { + 'bar': {'value': 'abc'}, + 'baz': {'structValues': { + 'qux': {'value': '123'}, + 'spam': {'value': 'true'}, + }}, + }, + }, + } + scalar_1 = _make_subparam('bar', 'STRING', 'abc') + scalar_2 = _make_subparam('qux', 'INT64', 123) + scalar_3 = _make_subparam('spam', 'BOOL', True) + sub = self._make_one('baz', scalar_2, scalar_3) + param = self._make_one('foo', scalar_1, sub) + self.assertEqual(param.to_api_repr(), EXPECTED) + + def test___eq___wrong_type(self): + field = self._make_one( + 'test', _make_subparam('bar', 'STRING', 'abc')) + other = object() + self.assertNotEqual(field, other) + self.assertEqual(field, mock.ANY) + + def test___eq___name_mismatch(self): + field = self._make_one( + 'test', _make_subparam('bar', 'STRING', 'abc')) + other = self._make_one( + 'other ', _make_subparam('bar', 'STRING', 'abc')) + self.assertNotEqual(field, other) + + def test___eq___field_type_mismatch(self): + field = self._make_one( + 'test', _make_subparam('bar', 'STRING', None)) + other = self._make_one( + 'test', _make_subparam('bar', 'INT64', None)) + self.assertNotEqual(field, other) + + def test___eq___value_mismatch(self): + field = self._make_one( + 'test', _make_subparam('bar', 'STRING', 'hello')) + other = self._make_one( + 'test', _make_subparam('bar', 'STRING', 'world')) + self.assertNotEqual(field, other) + + def test___eq___hit(self): + field = self._make_one( + 'test', _make_subparam('bar', 'STRING', 'gotcha')) + other = self._make_one( + 'test', _make_subparam('bar', 'STRING', 'gotcha')) + self.assertEqual(field, other) + + def test___ne___wrong_type(self): + field = self._make_one( + 'test', _make_subparam('bar', 'STRING', 'hello')) + other = object() + self.assertNotEqual(field, other) + self.assertEqual(field, mock.ANY) + + def test___ne___same_value(self): + field1 = self._make_one( + 'test', _make_subparam('bar', 'STRING', 'hello')) + field2 = self._make_one( + 'test', _make_subparam('bar', 'STRING', 'hello')) + # unittest ``assertEqual`` uses ``==`` not ``!=``. + comparison_val = (field1 != field2) + self.assertFalse(comparison_val) + + def test___ne___different_values(self): + field1 = self._make_one( + 'test', _make_subparam('bar', 'STRING', 'hello')) + field2 = self._make_one( + 'test', _make_subparam('bar', 'STRING', 'world')) + self.assertNotEqual(field1, field2) + + def test___repr__(self): + field1 = self._make_one( + 'test', _make_subparam('field1', 'STRING', 'hello')) + got = repr(field1) + self.assertIn('StructQueryParameter', got) + self.assertIn("'field1', 'STRING'", got) + self.assertIn("'field1': 'hello'", got) + class TestQueryResults(unittest.TestCase): PROJECT = 'project' @@ -180,3 +1138,116 @@ def test_schema(self): } query._set_properties(resource) self._verifySchema(query, resource) + + +class Test__query_param_from_api_repr(unittest.TestCase): + + @staticmethod + def _call_fut(resource): + from google.cloud.bigquery.query import _query_param_from_api_repr + + return _query_param_from_api_repr(resource) + + def test_w_scalar(self): + from google.cloud.bigquery.query import ScalarQueryParameter + + RESOURCE = { + 'name': 'foo', + 'parameterType': {'type': 'INT64'}, + 'parameterValue': {'value': '123'}, + } + + parameter = self._call_fut(RESOURCE) + + self.assertIsInstance(parameter, ScalarQueryParameter) + self.assertEqual(parameter.name, 'foo') + self.assertEqual(parameter.type_, 'INT64') + self.assertEqual(parameter.value, 123) + + def test_w_scalar_timestamp(self): + from google.cloud._helpers import UTC + from google.cloud.bigquery.query import ScalarQueryParameter + + RESOURCE = { + 'name': 'zoned', + 'parameterType': {'type': 'TIMESTAMP'}, + 'parameterValue': {'value': '2012-03-04 05:06:07+00:00'}, + } + + parameter = self._call_fut(RESOURCE) + + self.assertIsInstance(parameter, ScalarQueryParameter) + self.assertEqual(parameter.name, 'zoned') + self.assertEqual(parameter.type_, 'TIMESTAMP') + self.assertEqual( + parameter.value, + datetime.datetime(2012, 3, 4, 5, 6, 7, tzinfo=UTC)) + + def test_w_scalar_timestamp_micros(self): + from google.cloud._helpers import UTC + from google.cloud.bigquery.query import ScalarQueryParameter + + RESOURCE = { + 'name': 'zoned', + 'parameterType': {'type': 'TIMESTAMP'}, + 'parameterValue': {'value': '2012-03-04 05:06:07.250000+00:00'}, + } + + parameter = self._call_fut(RESOURCE) + + self.assertIsInstance(parameter, ScalarQueryParameter) + self.assertEqual(parameter.name, 'zoned') + self.assertEqual(parameter.type_, 'TIMESTAMP') + self.assertEqual( + parameter.value, + datetime.datetime(2012, 3, 4, 5, 6, 7, 250000, tzinfo=UTC)) + + def test_w_array(self): + from google.cloud.bigquery.query import ArrayQueryParameter + + RESOURCE = { + 'name': 'foo', + 'parameterType': { + 'type': 'ARRAY', + 'arrayType': {'type': 'INT64'}, + }, + 'parameterValue': { + 'arrayValues': [ + {'value': '123'}, + ]}, + } + + parameter = self._call_fut(RESOURCE) + + self.assertIsInstance(parameter, ArrayQueryParameter) + self.assertEqual(parameter.name, 'foo') + self.assertEqual(parameter.array_type, 'INT64') + self.assertEqual(parameter.values, [123]) + + def test_w_struct(self): + from google.cloud.bigquery.query import StructQueryParameter + + RESOURCE = { + 'name': 'foo', + 'parameterType': { + 'type': 'STRUCT', + 'structTypes': [ + {'name': 'foo', 'type': {'type': 'STRING'}}, + {'name': 'bar', 'type': {'type': 'INT64'}}, + ], + }, + 'parameterValue': { + 'structValues': { + 'foo': {'value': 'Foo'}, + 'bar': {'value': '123'}, + } + }, + } + + parameter = self._call_fut(RESOURCE) + + self.assertIsInstance(parameter, StructQueryParameter) + self.assertEqual(parameter.name, 'foo') + self.assertEqual( + parameter.struct_types, {'foo': 'STRING', 'bar': 'INT64'}) + self.assertEqual(parameter.struct_values, {'foo': 'Foo', 'bar': 123}) From 48e8566bab6b7b9c4032916e561dfd9691d595a4 Mon Sep 17 00:00:00 2001 From: Alix Hamilton Date: Wed, 11 Oct 2017 12:24:17 -0700 Subject: [PATCH 0232/2016] BigQuery: Replace table.insert_data() with client.create_rows() (#4151) * replaces table.insert_data() with client.create_rows() * client.create_rows() accepts list of dicts as rows parameter * adds system test for rows given as list of dictionaries to create_rows() * adds test for create_rows() with list of Rows * removes unused test function * client.create_rows() accepts TableReference --- .../google/cloud/bigquery/client.py | 115 ++++++ .../google/cloud/bigquery/table.py | 172 ++------ .../google-cloud-bigquery/tests/system.py | 69 +++- .../tests/unit/test_client.py | 379 ++++++++++++++++++ .../tests/unit/test_table.py | 268 ++----------- 5 files changed, 606 insertions(+), 397 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py index a493e3dcd426..c55a36ec1994 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py @@ -34,6 +34,7 @@ from google.cloud.bigquery.dataset import DatasetReference from google.cloud.bigquery.table import Table, _TABLE_HAS_NO_SCHEMA from google.cloud.bigquery.table import TableReference +from google.cloud.bigquery.table import _row_from_mapping from google.cloud.bigquery.job import CopyJob from google.cloud.bigquery.job import ExtractJob from google.cloud.bigquery.job import LoadJob @@ -42,6 +43,7 @@ from google.cloud.bigquery._helpers import _item_to_row from google.cloud.bigquery._helpers import _rows_page_start from google.cloud.bigquery._helpers import _field_to_index_mapping +from google.cloud.bigquery._helpers import _SCALAR_VALUE_TO_JSON_ROW _DEFAULT_CHUNKSIZE = 1048576 # 1024 * 1024 B = 1 MB @@ -832,6 +834,119 @@ def query(self, query, job_config=None, job_id=None): job.begin() return job + def create_rows(self, table, rows, row_ids=None, selected_fields=None, + skip_invalid_rows=None, ignore_unknown_values=None, + template_suffix=None): + """API call: insert table data via a POST request + + See + https://cloud.google.com/bigquery/docs/reference/rest/v2/tabledata/insertAll + + :type table: One of: + :class:`~google.cloud.bigquery.table.Table` + :class:`~google.cloud.bigquery.table.TableReference` + :param table: the destination table for the row data, or a reference + to it. + + :type rows: One of: + list of tuples + list of dictionaries + :param rows: Row data to be inserted. If a list of tuples is given, + each tuple should contain data for each schema field on + the current table and in the same order as the schema + fields. If a list of dictionaries is given, the keys must + include all required fields in the schema. Keys which do + not correspond to a field in the schema are ignored. + + :type row_ids: list of string + :param row_ids: (Optional) Unique ids, one per row being inserted. + If not passed, no de-duplication occurs. + + :type selected_fields: list of :class:`SchemaField` + :param selected_fields: + The fields to return. Required if ``table`` is a + :class:`~google.cloud.bigquery.table.TableReference`. + + :type skip_invalid_rows: bool + :param skip_invalid_rows: (Optional) Insert all valid rows of a + request, even if invalid rows exist. + The default value is False, which causes + the entire request to fail if any invalid + rows exist. + + :type ignore_unknown_values: bool + :param ignore_unknown_values: (Optional) Accept rows that contain + values that do not match the schema. + The unknown values are ignored. Default + is False, which treats unknown values as + errors. + + :type template_suffix: str + :param template_suffix: + (Optional) treat ``name`` as a template table and provide a suffix. + BigQuery will create the table `` + `` based + on the schema of the template table. See + https://cloud.google.com/bigquery/streaming-data-into-bigquery#template-tables + + :rtype: list of mappings + :returns: One mapping per row with insert errors: the "index" key + identifies the row, and the "errors" key contains a list + of the mappings describing one or more problems with the + row. + :raises: ValueError if table's schema is not set + """ + if selected_fields is not None: + schema = selected_fields + elif isinstance(table, TableReference): + raise ValueError('need selected_fields with TableReference') + elif isinstance(table, Table): + if len(table._schema) == 0: + raise ValueError(_TABLE_HAS_NO_SCHEMA) + schema = table.schema + else: + raise TypeError('table should be Table or TableReference') + + rows_info = [] + data = {'rows': rows_info} + + for index, row in enumerate(rows): + if isinstance(row, dict): + row = _row_from_mapping(row, schema) + row_info = {} + + for field, value in zip(schema, row): + converter = _SCALAR_VALUE_TO_JSON_ROW.get(field.field_type) + if converter is not None: # STRING doesn't need converting + value = converter(value) + row_info[field.name] = value + + info = {'json': row_info} + if row_ids is not None: + info['insertId'] = row_ids[index] + + rows_info.append(info) + + if skip_invalid_rows is not None: + data['skipInvalidRows'] = skip_invalid_rows + + if ignore_unknown_values is not None: + data['ignoreUnknownValues'] = ignore_unknown_values + + if template_suffix is not None: + data['templateSuffix'] = template_suffix + + response = self._connection.api_request( + method='POST', + path='%s/insertAll' % table.path, + data=data) + errors = [] + + for error in response.get('insertErrors', ()): + errors.append({'index': int(error['index']), + 'errors': error['errors']}) + + return errors + def query_rows(self, query, job_config=None, job_id=None, timeout=None): """Start a query job and wait for the results. diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py index 9630f1495290..74146b49385d 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py @@ -23,7 +23,6 @@ from google.cloud._helpers import _datetime_from_microseconds from google.cloud._helpers import _millis_from_datetime from google.cloud.bigquery.schema import SchemaField -from google.cloud.bigquery._helpers import _SCALAR_VALUE_TO_JSON_ROW _TABLE_HAS_NO_SCHEMA = "Table has no schema: call 'client.get_table()'" @@ -554,21 +553,6 @@ def from_api_repr(cls, resource, client): table._set_properties(resource) return table - def _require_client(self, client): - """Check client or verify over-ride. - - :type client: :class:`~google.cloud.bigquery.client.Client` or - ``NoneType`` - :param client: the client to use. If not passed, falls back to the - ``client`` stored on the current dataset. - - :rtype: :class:`google.cloud.bigquery.client.Client` - :returns: The client passed in or the currently bound client. - """ - if client is None: - client = self._client - return client - def _set_properties(self, api_response): """Update properties from resource in body of ``api_response`` @@ -642,131 +626,37 @@ def _build_resource(self, filter_fields): resource[api_field] = getattr(self, f) return resource - def row_from_mapping(self, mapping): - """Convert a mapping to a row tuple using the schema. - - :type mapping: dict - :param mapping: Mapping of row data: must contain keys for all - required fields in the schema. Keys which do not correspond - to a field in the schema are ignored. - - :rtype: tuple - :returns: Tuple whose elements are ordered according to the table's - schema. - :raises: ValueError if table's schema is not set - """ - if len(self._schema) == 0: - raise ValueError(_TABLE_HAS_NO_SCHEMA) - - row = [] - for field in self.schema: - if field.mode == 'REQUIRED': - row.append(mapping[field.name]) - elif field.mode == 'REPEATED': - row.append(mapping.get(field.name, ())) - elif field.mode == 'NULLABLE': - row.append(mapping.get(field.name)) - else: - raise ValueError( - "Unknown field mode: {}".format(field.mode)) - return tuple(row) - - def insert_data(self, - rows, - row_ids=None, - skip_invalid_rows=None, - ignore_unknown_values=None, - template_suffix=None, - client=None): - """API call: insert table data via a POST request - - See - https://cloud.google.com/bigquery/docs/reference/rest/v2/tabledata/insertAll - - :type rows: list of tuples - :param rows: Row data to be inserted. Each tuple should contain data - for each schema field on the current table and in the - same order as the schema fields. - - :type row_ids: list of string - :param row_ids: Unique ids, one per row being inserted. If not - passed, no de-duplication occurs. - - :type skip_invalid_rows: bool - :param skip_invalid_rows: (Optional) Insert all valid rows of a - request, even if invalid rows exist. - The default value is False, which causes - the entire request to fail if any invalid - rows exist. - - :type ignore_unknown_values: bool - :param ignore_unknown_values: (Optional) Accept rows that contain - values that do not match the schema. - The unknown values are ignored. Default - is False, which treats unknown values as - errors. - - :type template_suffix: str - :param template_suffix: - (Optional) treat ``name`` as a template table and provide a suffix. - BigQuery will create the table `` + `` based - on the schema of the template table. See - https://cloud.google.com/bigquery/streaming-data-into-bigquery#template-tables - - :type client: :class:`~google.cloud.bigquery.client.Client` or - ``NoneType`` - :param client: the client to use. If not passed, falls back to the - ``client`` stored on the current dataset. - - :rtype: list of mappings - :returns: One mapping per row with insert errors: the "index" key - identifies the row, and the "errors" key contains a list - of the mappings describing one or more problems with the - row. - :raises: ValueError if table's schema is not set - """ - if len(self._schema) == 0: - raise ValueError(_TABLE_HAS_NO_SCHEMA) - - client = self._require_client(client) - rows_info = [] - data = {'rows': rows_info} - - for index, row in enumerate(rows): - row_info = {} - - for field, value in zip(self._schema, row): - converter = _SCALAR_VALUE_TO_JSON_ROW.get(field.field_type) - if converter is not None: # STRING doesn't need converting - value = converter(value) - row_info[field.name] = value - - info = {'json': row_info} - if row_ids is not None: - info['insertId'] = row_ids[index] - - rows_info.append(info) - - if skip_invalid_rows is not None: - data['skipInvalidRows'] = skip_invalid_rows - - if ignore_unknown_values is not None: - data['ignoreUnknownValues'] = ignore_unknown_values - - if template_suffix is not None: - data['templateSuffix'] = template_suffix - - response = client._connection.api_request( - method='POST', - path='%s/insertAll' % self.path, - data=data) - errors = [] - - for error in response.get('insertErrors', ()): - errors.append({'index': int(error['index']), - 'errors': error['errors']}) - - return errors + +def _row_from_mapping(mapping, schema): + """Convert a mapping to a row tuple using the schema. + + :type mapping: dict + :param mapping: Mapping of row data: must contain keys for all + required fields in the schema. Keys which do not correspond + to a field in the schema are ignored. + + :type schema: list of :class:`SchemaField` + :param schema: The schema of the table destination for the rows + + :rtype: tuple + :returns: Tuple whose elements are ordered according to the schema. + :raises: ValueError if schema is empty + """ + if len(schema) == 0: + raise ValueError(_TABLE_HAS_NO_SCHEMA) + + row = [] + for field in schema: + if field.mode == 'REQUIRED': + row.append(mapping[field.name]) + elif field.mode == 'REPEATED': + row.append(mapping.get(field.name, ())) + elif field.mode == 'NULLABLE': + row.append(mapping.get(field.name)) + else: + raise ValueError( + "Unknown field mode: {}".format(field.mode)) + return tuple(row) def _parse_schema_resource(info): diff --git a/packages/google-cloud-bigquery/tests/system.py b/packages/google-cloud-bigquery/tests/system.py index 7fa3ff758897..39db9a69c3fd 100644 --- a/packages/google-cloud-bigquery/tests/system.py +++ b/packages/google-cloud-bigquery/tests/system.py @@ -318,7 +318,7 @@ def _fetch_single_page(table, selected_fields=None): page = six.next(iterator.pages) return list(page) - def test_insert_data_then_dump_table(self): + def test_create_rows_then_dump_table(self): NOW_SECONDS = 1448911495.484366 NOW = datetime.datetime.utcfromtimestamp( NOW_SECONDS).replace(tzinfo=UTC) @@ -330,20 +330,21 @@ def test_insert_data_then_dump_table(self): ] ROW_IDS = range(len(ROWS)) - dataset = self.temp_dataset(_make_dataset_id('insert_data_then_dump')) - TABLE_NAME = 'test_table' - full_name = bigquery.SchemaField('full_name', 'STRING', - mode='REQUIRED') - age = bigquery.SchemaField('age', 'INTEGER', mode='REQUIRED') - now = bigquery.SchemaField('now', 'TIMESTAMP') - table_arg = Table(dataset.table(TABLE_NAME), - schema=[full_name, age, now], client=Config.CLIENT) + dataset = self.temp_dataset(_make_dataset_id('create_rows_then_dump')) + TABLE_ID = 'test_table' + schema = [ + bigquery.SchemaField('full_name', 'STRING', mode='REQUIRED'), + bigquery.SchemaField('age', 'INTEGER', mode='REQUIRED'), + bigquery.SchemaField('now', 'TIMESTAMP'), + ] + table_arg = Table(dataset.table(TABLE_ID), schema=schema, + client=Config.CLIENT) self.assertFalse(_table_exists(table_arg)) table = retry_403(Config.CLIENT.create_table)(table_arg) self.to_delete.insert(0, table) self.assertTrue(_table_exists(table)) - errors = table.insert_data(ROWS, ROW_IDS) + errors = Config.CLIENT.create_rows(table, ROWS, ROW_IDS) self.assertEqual(len(errors), 0) rows = () @@ -1278,7 +1279,7 @@ def test_query_future(self): row_tuples = [r.values() for r in iterator] self.assertEqual(row_tuples, [(1,)]) - def test_insert_nested_nested(self): + def test_create_rows_nested_nested(self): # See #2951 SF = bigquery.SchemaField schema = [ @@ -1299,21 +1300,57 @@ def test_insert_nested_nested(self): to_insert = [ ('Some value', record) ] - table_name = 'test_table' + table_id = 'test_table' dataset = self.temp_dataset(_make_dataset_id('issue_2951')) - table_arg = Table(dataset.table(table_name), schema=schema, + table_arg = Table(dataset.table(table_id), schema=schema, client=Config.CLIENT) table = retry_403(Config.CLIENT.create_table)(table_arg) self.to_delete.insert(0, table) - table.insert_data(to_insert) + Config.CLIENT.create_rows(table, to_insert) retry = RetryResult(_has_rows, max_tries=8) rows = retry(self._fetch_single_page)(table) row_tuples = [r.values() for r in rows] self.assertEqual(row_tuples, to_insert) - def test_create_table_insert_fetch_nested_schema(self): + def test_create_rows_nested_nested_dictionary(self): + # See #2951 + SF = bigquery.SchemaField + schema = [ + SF('string_col', 'STRING', mode='NULLABLE'), + SF('record_col', 'RECORD', mode='NULLABLE', fields=[ + SF('nested_string', 'STRING', mode='NULLABLE'), + SF('nested_repeated', 'INTEGER', mode='REPEATED'), + SF('nested_record', 'RECORD', mode='NULLABLE', fields=[ + SF('nested_nested_string', 'STRING', mode='NULLABLE'), + ]), + ]), + ] + record = { + 'nested_string': 'another string value', + 'nested_repeated': [0, 1, 2], + 'nested_record': {'nested_nested_string': 'some deep insight'}, + } + to_insert = [ + {'string_col': 'Some value', 'record_col': record} + ] + table_id = 'test_table' + dataset = self.temp_dataset(_make_dataset_id('issue_2951')) + table_arg = Table(dataset.table(table_id), schema=schema, + client=Config.CLIENT) + table = retry_403(Config.CLIENT.create_table)(table_arg) + self.to_delete.insert(0, table) + + Config.CLIENT.create_rows(table, to_insert) + + retry = RetryResult(_has_rows, max_tries=8) + rows = retry(self._fetch_single_page)(table) + row_tuples = [r.values() for r in rows] + expected_rows = [('Some value', record)] + self.assertEqual(row_tuples, expected_rows) + + def test_create_table_rows_fetch_nested_schema(self): table_name = 'test_table' dataset = self.temp_dataset( _make_dataset_id('create_table_nested_schema')) @@ -1334,7 +1371,7 @@ def test_create_table_insert_fetch_nested_schema(self): to_insert.append( tuple(mapping[field.name] for field in schema)) - errors = table.insert_data(to_insert) + errors = Config.CLIENT.create_rows(table, to_insert) self.assertEqual(len(errors), 0) retry = RetryResult(_has_rows, max_tries=8) diff --git a/packages/google-cloud-bigquery/tests/unit/test_client.py b/packages/google-cloud-bigquery/tests/unit/test_client.py index 9cdf7129c9c3..49030463f78b 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_client.py +++ b/packages/google-cloud-bigquery/tests/unit/test_client.py @@ -1917,6 +1917,385 @@ def test_query_w_query_parameters(self): 'parameterValue': {'value': '123'} }) + def test_create_rows_wo_schema(self): + from google.cloud.bigquery.dataset import DatasetReference + from google.cloud.bigquery.table import Table, _TABLE_HAS_NO_SCHEMA + + PROJECT = 'PROJECT' + DS_ID = 'DS_ID' + TABLE_ID = 'TABLE_ID' + creds = _make_credentials() + http = object() + client = self._make_one(project=PROJECT, credentials=creds, _http=http) + table_ref = DatasetReference(PROJECT, DS_ID).table(TABLE_ID) + table = Table(table_ref) + ROWS = [ + ('Phred Phlyntstone', 32), + ('Bharney Rhubble', 33), + ('Wylma Phlyntstone', 29), + ('Bhettye Rhubble', 27), + ] + + with self.assertRaises(ValueError) as exc: + client.create_rows(table, ROWS) + + self.assertEqual(exc.exception.args, (_TABLE_HAS_NO_SCHEMA,)) + + def test_create_rows_w_schema(self): + import datetime + from google.cloud._helpers import UTC + from google.cloud._helpers import _datetime_to_rfc3339 + from google.cloud._helpers import _microseconds_from_datetime + from google.cloud.bigquery.table import Table, SchemaField + from google.cloud.bigquery.dataset import DatasetReference + + PROJECT = 'PROJECT' + DS_ID = 'DS_ID' + TABLE_ID = 'TABLE_ID' + WHEN_TS = 1437767599.006 + WHEN = datetime.datetime.utcfromtimestamp(WHEN_TS).replace( + tzinfo=UTC) + PATH = 'projects/%s/datasets/%s/tables/%s/insertAll' % ( + PROJECT, DS_ID, TABLE_ID) + creds = _make_credentials() + http = object() + client = self._make_one(project=PROJECT, credentials=creds, _http=http) + conn = client._connection = _Connection({}) + table_ref = DatasetReference(PROJECT, DS_ID).table(TABLE_ID) + schema = [ + SchemaField('full_name', 'STRING', mode='REQUIRED'), + SchemaField('age', 'INTEGER', mode='REQUIRED'), + SchemaField('joined', 'TIMESTAMP', mode='NULLABLE'), + ] + table = Table(table_ref, schema=schema) + ROWS = [ + ('Phred Phlyntstone', 32, _datetime_to_rfc3339(WHEN)), + ('Bharney Rhubble', 33, WHEN + datetime.timedelta(seconds=1)), + ('Wylma Phlyntstone', 29, WHEN + datetime.timedelta(seconds=2)), + ('Bhettye Rhubble', 27, None), + ] + + def _row_data(row): + joined = row[2] + if isinstance(row[2], datetime.datetime): + joined = _microseconds_from_datetime(joined) * 1e-6 + return {'full_name': row[0], + 'age': str(row[1]), + 'joined': joined} + + SENT = { + 'rows': [{'json': _row_data(row)} for row in ROWS], + } + + errors = client.create_rows(table, ROWS) + + self.assertEqual(len(errors), 0) + self.assertEqual(len(conn._requested), 1) + req = conn._requested[0] + self.assertEqual(req['method'], 'POST') + self.assertEqual(req['path'], '/%s' % PATH) + self.assertEqual(req['data'], SENT) + + def test_create_rows_w_list_of_dictionaries(self): + import datetime + from google.cloud._helpers import UTC + from google.cloud._helpers import _datetime_to_rfc3339 + from google.cloud._helpers import _microseconds_from_datetime + from google.cloud.bigquery.table import Table, SchemaField + from google.cloud.bigquery.dataset import DatasetReference + + PROJECT = 'PROJECT' + DS_ID = 'DS_ID' + TABLE_ID = 'TABLE_ID' + WHEN_TS = 1437767599.006 + WHEN = datetime.datetime.utcfromtimestamp(WHEN_TS).replace( + tzinfo=UTC) + PATH = 'projects/%s/datasets/%s/tables/%s/insertAll' % ( + PROJECT, DS_ID, TABLE_ID) + creds = _make_credentials() + http = object() + client = self._make_one(project=PROJECT, credentials=creds, _http=http) + conn = client._connection = _Connection({}) + table_ref = DatasetReference(PROJECT, DS_ID).table(TABLE_ID) + schema = [ + SchemaField('full_name', 'STRING', mode='REQUIRED'), + SchemaField('age', 'INTEGER', mode='REQUIRED'), + SchemaField('joined', 'TIMESTAMP', mode='NULLABLE'), + ] + table = Table(table_ref, schema=schema) + ROWS = [ + { + 'full_name': 'Phred Phlyntstone', 'age': 32, + 'joined': _datetime_to_rfc3339(WHEN) + }, + { + 'full_name': 'Bharney Rhubble', 'age': 33, + 'joined': WHEN + datetime.timedelta(seconds=1) + }, + { + 'full_name': 'Wylma Phlyntstone', 'age': 29, + 'joined': WHEN + datetime.timedelta(seconds=2) + }, + { + 'full_name': 'Bhettye Rhubble', 'age': 27, 'joined': None + }, + ] + + def _row_data(row): + joined = row['joined'] + if isinstance(joined, datetime.datetime): + row['joined'] = _microseconds_from_datetime(joined) * 1e-6 + row['age'] = str(row['age']) + return row + + SENT = { + 'rows': [{'json': _row_data(row)} for row in ROWS], + } + + errors = client.create_rows(table, ROWS) + + self.assertEqual(len(errors), 0) + self.assertEqual(len(conn._requested), 1) + req = conn._requested[0] + self.assertEqual(req['method'], 'POST') + self.assertEqual(req['path'], '/%s' % PATH) + self.assertEqual(req['data'], SENT) + + def test_create_rows_w_list_of_Rows(self): + from google.cloud.bigquery._helpers import Row + from google.cloud.bigquery.table import Table, SchemaField + from google.cloud.bigquery.dataset import DatasetReference + + PROJECT = 'PROJECT' + DS_ID = 'DS_ID' + TABLE_ID = 'TABLE_ID' + PATH = 'projects/%s/datasets/%s/tables/%s/insertAll' % ( + PROJECT, DS_ID, TABLE_ID) + creds = _make_credentials() + http = object() + client = self._make_one(project=PROJECT, credentials=creds, _http=http) + conn = client._connection = _Connection({}) + table_ref = DatasetReference(PROJECT, DS_ID).table(TABLE_ID) + schema = [ + SchemaField('full_name', 'STRING', mode='REQUIRED'), + SchemaField('age', 'INTEGER', mode='REQUIRED'), + ] + table = Table(table_ref, schema=schema) + f2i = {'full_name': 0, 'age': 1} + ROWS = [ + Row(('Phred Phlyntstone', 32), f2i), + Row(('Bharney Rhubble', 33), f2i), + Row(('Wylma Phlyntstone', 29), f2i), + Row(('Bhettye Rhubble', 27), f2i), + ] + + def _row_data(row): + return {'full_name': row[0], 'age': str(row[1])} + + SENT = { + 'rows': [{'json': _row_data(row)} for row in ROWS], + } + + errors = client.create_rows(table, ROWS) + + self.assertEqual(len(errors), 0) + self.assertEqual(len(conn._requested), 1) + req = conn._requested[0] + self.assertEqual(req['method'], 'POST') + self.assertEqual(req['path'], '/%s' % PATH) + self.assertEqual(req['data'], SENT) + + def test_create_rows_w_skip_invalid_and_ignore_unknown(self): + from google.cloud.bigquery.table import Table, SchemaField + from google.cloud.bigquery.dataset import DatasetReference + + PROJECT = 'PROJECT' + DS_ID = 'DS_ID' + TABLE_ID = 'TABLE_ID' + PATH = 'projects/%s/datasets/%s/tables/%s/insertAll' % ( + PROJECT, DS_ID, TABLE_ID) + RESPONSE = { + 'insertErrors': [ + {'index': 1, + 'errors': [ + {'reason': 'REASON', + 'location': 'LOCATION', + 'debugInfo': 'INFO', + 'message': 'MESSAGE'} + ]}, + ]} + creds = _make_credentials() + http = object() + client = self._make_one(project=PROJECT, credentials=creds, _http=http) + conn = client._connection = _Connection(RESPONSE) + schema = [ + SchemaField('full_name', 'STRING', mode='REQUIRED'), + SchemaField('age', 'INTEGER', mode='REQUIRED'), + SchemaField('voter', 'BOOLEAN', mode='NULLABLE'), + ] + table_ref = DatasetReference(PROJECT, DS_ID).table(TABLE_ID) + table = Table(table_ref, schema=schema) + ROWS = [ + ('Phred Phlyntstone', 32, True), + ('Bharney Rhubble', 33, False), + ('Wylma Phlyntstone', 29, True), + ('Bhettye Rhubble', 27, True), + ] + + def _row_data(row): + return { + 'full_name': row[0], + 'age': str(row[1]), + 'voter': row[2] and 'true' or 'false', + } + + SENT = { + 'skipInvalidRows': True, + 'ignoreUnknownValues': True, + 'templateSuffix': '20160303', + 'rows': [{'insertId': index, 'json': _row_data(row)} + for index, row in enumerate(ROWS)], + } + + errors = client.create_rows( + table, + ROWS, + row_ids=[index for index, _ in enumerate(ROWS)], + skip_invalid_rows=True, + ignore_unknown_values=True, + template_suffix='20160303', + ) + + self.assertEqual(len(errors), 1) + self.assertEqual(errors[0]['index'], 1) + self.assertEqual(len(errors[0]['errors']), 1) + self.assertEqual(errors[0]['errors'][0], + RESPONSE['insertErrors'][0]['errors'][0]) + self.assertEqual(len(conn._requested), 1) + req = conn._requested[0] + self.assertEqual(req['method'], 'POST') + self.assertEqual(req['path'], '/%s' % PATH) + self.assertEqual(req['data'], SENT) + + def test_create_rows_w_repeated_fields(self): + from google.cloud.bigquery.table import Table, SchemaField + from google.cloud.bigquery.dataset import DatasetReference + + PROJECT = 'PROJECT' + DS_ID = 'DS_ID' + TABLE_ID = 'TABLE_ID' + PATH = 'projects/%s/datasets/%s/tables/%s/insertAll' % ( + PROJECT, DS_ID, TABLE_ID) + creds = _make_credentials() + http = object() + client = self._make_one(project=PROJECT, credentials=creds, _http=http) + conn = client._connection = _Connection({}) + table_ref = DatasetReference(PROJECT, DS_ID).table(TABLE_ID) + full_name = SchemaField('color', 'STRING', mode='REPEATED') + index = SchemaField('index', 'INTEGER', 'REPEATED') + score = SchemaField('score', 'FLOAT', 'REPEATED') + struct = SchemaField('struct', 'RECORD', mode='REPEATED', + fields=[index, score]) + table = Table(table_ref, schema=[full_name, struct]) + ROWS = [ + (['red', 'green'], [{'index': [1, 2], 'score': [3.1415, 1.414]}]), + ] + + def _row_data(row): + return {'color': row[0], + 'struct': row[1]} + + SENT = { + 'rows': [{'json': _row_data(row)} for row in ROWS], + } + + errors = client.create_rows(table, ROWS) + + self.assertEqual(len(errors), 0) + self.assertEqual(len(conn._requested), 1) + req = conn._requested[0] + self.assertEqual(req['method'], 'POST') + self.assertEqual(req['path'], '/%s' % PATH) + self.assertEqual(req['data'], SENT) + + def test_create_rows_w_record_schema(self): + from google.cloud.bigquery.table import SchemaField + from google.cloud.bigquery.dataset import DatasetReference + + PROJECT = 'PROJECT' + DS_ID = 'DS_ID' + TABLE_ID = 'TABLE_ID' + PATH = 'projects/%s/datasets/%s/tables/%s/insertAll' % ( + PROJECT, DS_ID, TABLE_ID) + creds = _make_credentials() + http = object() + client = self._make_one(project=PROJECT, credentials=creds, _http=http) + conn = client._connection = _Connection({}) + table_ref = DatasetReference(PROJECT, DS_ID).table(TABLE_ID) + full_name = SchemaField('full_name', 'STRING', mode='REQUIRED') + area_code = SchemaField('area_code', 'STRING', 'REQUIRED') + local_number = SchemaField('local_number', 'STRING', 'REQUIRED') + rank = SchemaField('rank', 'INTEGER', 'REQUIRED') + phone = SchemaField('phone', 'RECORD', mode='NULLABLE', + fields=[area_code, local_number, rank]) + ROWS = [ + ('Phred Phlyntstone', {'area_code': '800', + 'local_number': '555-1212', + 'rank': 1}), + ('Bharney Rhubble', {'area_code': '877', + 'local_number': '768-5309', + 'rank': 2}), + ('Wylma Phlyntstone', None), + ] + + def _row_data(row): + return {'full_name': row[0], + 'phone': row[1]} + + SENT = { + 'rows': [{'json': _row_data(row)} for row in ROWS], + } + + errors = client.create_rows(table_ref, ROWS, + selected_fields=[full_name, phone]) + + self.assertEqual(len(errors), 0) + self.assertEqual(len(conn._requested), 1) + req = conn._requested[0] + self.assertEqual(req['method'], 'POST') + self.assertEqual(req['path'], '/%s' % PATH) + self.assertEqual(req['data'], SENT) + + def test_create_rows_errors(self): + from google.cloud.bigquery.dataset import DatasetReference + from google.cloud.bigquery.table import Table + + PROJECT = 'PROJECT' + DS_ID = 'DS_ID' + TABLE_ID = 'TABLE_ID' + ROWS = [ + ('Phred Phlyntstone', 32, True), + ('Bharney Rhubble', 33, False), + ('Wylma Phlyntstone', 29, True), + ('Bhettye Rhubble', 27, True), + ] + creds = _make_credentials() + http = object() + client = self._make_one(project=PROJECT, credentials=creds, _http=http) + table_ref = DatasetReference(PROJECT, DS_ID).table(TABLE_ID) + + # table ref with no selected fields + with self.assertRaises(ValueError): + client.create_rows(table_ref, ROWS) + + # table with no schema + with self.assertRaises(ValueError): + client.create_rows(Table(table_ref), ROWS) + + # neither Table nor tableReference + with self.assertRaises(TypeError): + client.create_rows(1, ROWS) + def test_query_rows_defaults(self): from google.api.core.page_iterator import HTTPIterator from google.cloud.bigquery._helpers import Row diff --git a/packages/google-cloud-bigquery/tests/unit/test_table.py b/packages/google-cloud-bigquery/tests/unit/test_table.py index f9fe1ddab2d1..73c1c9aab894 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_table.py +++ b/packages/google-cloud-bigquery/tests/unit/test_table.py @@ -694,21 +694,33 @@ def test_partition_expiration_w_none_no_partition_set(self): self.assertIsNone(table.partitioning_type) self.assertIsNone(table.partition_expiration) - def test_row_from_mapping_wo_schema(self): - from google.cloud.bigquery.table import _TABLE_HAS_NO_SCHEMA + +class Test_row_from_mapping(unittest.TestCase, _SchemaBase): + + PROJECT = 'prahj-ekt' + DS_ID = 'dataset-name' + TABLE_NAME = 'table-name' + + def _call_fut(self, mapping, schema): + from google.cloud.bigquery.table import _row_from_mapping + + return _row_from_mapping(mapping, schema) + + def test__row_from_mapping_wo_schema(self): + from google.cloud.bigquery.table import Table, _TABLE_HAS_NO_SCHEMA MAPPING = {'full_name': 'Phred Phlyntstone', 'age': 32} client = _Client(project=self.PROJECT) dataset = DatasetReference(self.PROJECT, self.DS_ID) table_ref = dataset.table(self.TABLE_NAME) - table = self._make_one(table_ref, client=client) + table = Table(table_ref, client=client) with self.assertRaises(ValueError) as exc: - table.row_from_mapping(MAPPING) + self._call_fut(MAPPING, table.schema) self.assertEqual(exc.exception.args, (_TABLE_HAS_NO_SCHEMA,)) - def test_row_from_mapping_w_invalid_schema(self): - from google.cloud.bigquery.table import SchemaField + def test__row_from_mapping_w_invalid_schema(self): + from google.cloud.bigquery.table import Table, SchemaField MAPPING = { 'full_name': 'Phred Phlyntstone', 'age': 32, @@ -722,17 +734,17 @@ def test_row_from_mapping_w_invalid_schema(self): age = SchemaField('age', 'INTEGER', mode='REQUIRED') colors = SchemaField('colors', 'DATETIME', mode='REPEATED') bogus = SchemaField('joined', 'STRING', mode='BOGUS') - table = self._make_one(table_ref, - schema=[full_name, age, colors, bogus], - client=client) + table = Table(table_ref, + schema=[full_name, age, colors, bogus], + client=client) with self.assertRaises(ValueError) as exc: - table.row_from_mapping(MAPPING) + self._call_fut(MAPPING, table.schema) self.assertIn('Unknown field mode: BOGUS', str(exc.exception)) - def test_row_from_mapping_w_schema(self): - from google.cloud.bigquery.table import SchemaField + def test__row_from_mapping_w_schema(self): + from google.cloud.bigquery.table import Table, SchemaField MAPPING = { 'full_name': 'Phred Phlyntstone', 'age': 32, @@ -746,233 +758,14 @@ def test_row_from_mapping_w_schema(self): age = SchemaField('age', 'INTEGER', mode='REQUIRED') colors = SchemaField('colors', 'DATETIME', mode='REPEATED') joined = SchemaField('joined', 'STRING', mode='NULLABLE') - table = self._make_one(table_ref, - schema=[full_name, age, colors, joined], - client=client) + table = Table(table_ref, + schema=[full_name, age, colors, joined], + client=client) self.assertEqual( - table.row_from_mapping(MAPPING), + self._call_fut(MAPPING, table.schema), ('Phred Phlyntstone', 32, ['red', 'green'], None)) - def test_insert_data_wo_schema(self): - from google.cloud.bigquery.table import _TABLE_HAS_NO_SCHEMA - - client = _Client(project=self.PROJECT) - dataset = DatasetReference(self.PROJECT, self.DS_ID) - table_ref = dataset.table(self.TABLE_NAME) - table = self._make_one(table_ref, client=client) - ROWS = [ - ('Phred Phlyntstone', 32), - ('Bharney Rhubble', 33), - ('Wylma Phlyntstone', 29), - ('Bhettye Rhubble', 27), - ] - - with self.assertRaises(ValueError) as exc: - table.insert_data(ROWS) - - self.assertEqual(exc.exception.args, (_TABLE_HAS_NO_SCHEMA,)) - - def test_insert_data_w_bound_client(self): - import datetime - from google.cloud._helpers import UTC - from google.cloud._helpers import _datetime_to_rfc3339 - from google.cloud._helpers import _microseconds_from_datetime - from google.cloud.bigquery.table import SchemaField - - WHEN_TS = 1437767599.006 - WHEN = datetime.datetime.utcfromtimestamp(WHEN_TS).replace( - tzinfo=UTC) - PATH = 'projects/%s/datasets/%s/tables/%s/insertAll' % ( - self.PROJECT, self.DS_ID, self.TABLE_NAME) - conn = _Connection({}) - client = _Client(project=self.PROJECT, connection=conn) - dataset = DatasetReference(self.PROJECT, self.DS_ID) - table_ref = dataset.table(self.TABLE_NAME) - full_name = SchemaField('full_name', 'STRING', mode='REQUIRED') - age = SchemaField('age', 'INTEGER', mode='REQUIRED') - joined = SchemaField('joined', 'TIMESTAMP', mode='NULLABLE') - table = self._make_one(table_ref, schema=[full_name, age, joined], - client=client) - ROWS = [ - ('Phred Phlyntstone', 32, _datetime_to_rfc3339(WHEN)), - ('Bharney Rhubble', 33, WHEN + datetime.timedelta(seconds=1)), - ('Wylma Phlyntstone', 29, WHEN + datetime.timedelta(seconds=2)), - ('Bhettye Rhubble', 27, None), - ] - - def _row_data(row): - joined = row[2] - if isinstance(row[2], datetime.datetime): - joined = _microseconds_from_datetime(joined) * 1e-6 - return {'full_name': row[0], - 'age': str(row[1]), - 'joined': joined} - - SENT = { - 'rows': [{'json': _row_data(row)} for row in ROWS], - } - - errors = table.insert_data(ROWS) - - self.assertEqual(len(errors), 0) - self.assertEqual(len(conn._requested), 1) - req = conn._requested[0] - self.assertEqual(req['method'], 'POST') - self.assertEqual(req['path'], '/%s' % PATH) - self.assertEqual(req['data'], SENT) - - def test_insert_data_w_alternate_client(self): - from google.cloud.bigquery.table import SchemaField - - PATH = 'projects/%s/datasets/%s/tables/%s/insertAll' % ( - self.PROJECT, self.DS_ID, self.TABLE_NAME) - RESPONSE = { - 'insertErrors': [ - {'index': 1, - 'errors': [ - {'reason': 'REASON', - 'location': 'LOCATION', - 'debugInfo': 'INFO', - 'message': 'MESSAGE'} - ]}, - ]} - conn1 = _Connection() - client1 = _Client(project=self.PROJECT, connection=conn1) - conn2 = _Connection(RESPONSE) - client2 = _Client(project=self.PROJECT, connection=conn2) - dataset = DatasetReference(self.PROJECT, self.DS_ID) - table_ref = dataset.table(self.TABLE_NAME) - full_name = SchemaField('full_name', 'STRING', mode='REQUIRED') - age = SchemaField('age', 'INTEGER', mode='REQUIRED') - voter = SchemaField('voter', 'BOOLEAN', mode='NULLABLE') - table = self._make_one(table_ref, schema=[full_name, age, voter], - client=client1) - ROWS = [ - ('Phred Phlyntstone', 32, True), - ('Bharney Rhubble', 33, False), - ('Wylma Phlyntstone', 29, True), - ('Bhettye Rhubble', 27, True), - ] - - def _row_data(row): - return { - 'full_name': row[0], - 'age': str(row[1]), - 'voter': row[2] and 'true' or 'false', - } - - SENT = { - 'skipInvalidRows': True, - 'ignoreUnknownValues': True, - 'templateSuffix': '20160303', - 'rows': [{'insertId': index, 'json': _row_data(row)} - for index, row in enumerate(ROWS)], - } - - errors = table.insert_data( - client=client2, - rows=ROWS, - row_ids=[index for index, _ in enumerate(ROWS)], - skip_invalid_rows=True, - ignore_unknown_values=True, - template_suffix='20160303', - ) - - self.assertEqual(len(errors), 1) - self.assertEqual(errors[0]['index'], 1) - self.assertEqual(len(errors[0]['errors']), 1) - self.assertEqual(errors[0]['errors'][0], - RESPONSE['insertErrors'][0]['errors'][0]) - - self.assertEqual(len(conn1._requested), 0) - self.assertEqual(len(conn2._requested), 1) - req = conn2._requested[0] - self.assertEqual(req['method'], 'POST') - self.assertEqual(req['path'], '/%s' % PATH) - self.assertEqual(req['data'], SENT) - - def test_insert_data_w_repeated_fields(self): - from google.cloud.bigquery.table import SchemaField - - PATH = 'projects/%s/datasets/%s/tables/%s/insertAll' % ( - self.PROJECT, self.DS_ID, self.TABLE_NAME) - conn = _Connection({}) - client = _Client(project=self.PROJECT, connection=conn) - dataset = DatasetReference(self.PROJECT, self.DS_ID) - table_ref = dataset.table(self.TABLE_NAME) - full_name = SchemaField('color', 'STRING', mode='REPEATED') - index = SchemaField('index', 'INTEGER', 'REPEATED') - score = SchemaField('score', 'FLOAT', 'REPEATED') - struct = SchemaField('struct', 'RECORD', mode='REPEATED', - fields=[index, score]) - table = self._make_one(table_ref, schema=[full_name, struct], - client=client) - ROWS = [ - (['red', 'green'], [{'index': [1, 2], 'score': [3.1415, 1.414]}]), - ] - - def _row_data(row): - return {'color': row[0], - 'struct': row[1]} - - SENT = { - 'rows': [{'json': _row_data(row)} for row in ROWS], - } - - errors = table.insert_data(ROWS) - - self.assertEqual(len(errors), 0) - self.assertEqual(len(conn._requested), 1) - req = conn._requested[0] - self.assertEqual(req['method'], 'POST') - self.assertEqual(req['path'], '/%s' % PATH) - self.assertEqual(req['data'], SENT) - - def test_insert_data_w_record_schema(self): - from google.cloud.bigquery.table import SchemaField - - PATH = 'projects/%s/datasets/%s/tables/%s/insertAll' % ( - self.PROJECT, self.DS_ID, self.TABLE_NAME) - conn = _Connection({}) - client = _Client(project=self.PROJECT, connection=conn) - dataset = DatasetReference(self.PROJECT, self.DS_ID) - table_ref = dataset.table(self.TABLE_NAME) - full_name = SchemaField('full_name', 'STRING', mode='REQUIRED') - area_code = SchemaField('area_code', 'STRING', 'REQUIRED') - local_number = SchemaField('local_number', 'STRING', 'REQUIRED') - rank = SchemaField('rank', 'INTEGER', 'REQUIRED') - phone = SchemaField('phone', 'RECORD', mode='NULLABLE', - fields=[area_code, local_number, rank]) - table = self._make_one(table_ref, schema=[full_name, phone], - client=client) - ROWS = [ - ('Phred Phlyntstone', {'area_code': '800', - 'local_number': '555-1212', - 'rank': 1}), - ('Bharney Rhubble', {'area_code': '877', - 'local_number': '768-5309', - 'rank': 2}), - ('Wylma Phlyntstone', None), - ] - - def _row_data(row): - return {'full_name': row[0], - 'phone': row[1]} - - SENT = { - 'rows': [{'json': _row_data(row)} for row in ROWS], - } - - errors = table.insert_data(ROWS) - - self.assertEqual(len(errors), 0) - self.assertEqual(len(conn._requested), 1) - req = conn._requested[0] - self.assertEqual(req['method'], 'POST') - self.assertEqual(req['path'], '/%s' % PATH) - self.assertEqual(req['data'], SENT) - class Test_parse_schema_resource(unittest.TestCase, _SchemaBase): @@ -1102,8 +895,3 @@ class _Connection(object): def __init__(self, *responses): self._responses = responses[:] self._requested = [] - - def api_request(self, **kw): - self._requested.append(kw) - response, self._responses = self._responses[0], self._responses[1:] - return response From 2625ccbcae47ba7d2d08d1b8c02e0dc7510bf46c Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Wed, 11 Oct 2017 12:29:19 -0700 Subject: [PATCH 0233/2016] BQ: remove unused ConfigurationProperty classes. (#4157) --- .../google/cloud/bigquery/_helpers.py | 66 ----------- .../tests/unit/test__helpers.py | 112 ------------------ 2 files changed, 178 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py b/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py index ae7f1186fe40..0ee7a9c01c6a 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py @@ -480,72 +480,6 @@ class _EnumApiResourceProperty(_ApiResourceProperty): """ -class _ConfigurationProperty(object): - """Base property implementation. - - Values will be stored on a `_configuration` helper attribute of the - property's job instance. - - :type name: str - :param name: name of the property - """ - - def __init__(self, name): - self.name = name - self._backing_name = '_%s' % (self.name,) - - def __get__(self, instance, owner): - """Descriptor protocal: accesstor""" - if instance is None: - return self - return getattr(instance._configuration, self._backing_name) - - def _validate(self, value): - """Subclasses override to impose validation policy.""" - pass - - def __set__(self, instance, value): - """Descriptor protocal: mutator""" - self._validate(value) - setattr(instance._configuration, self._backing_name, value) - - def __delete__(self, instance): - """Descriptor protocal: deleter""" - delattr(instance._configuration, self._backing_name) - - -class _TypedProperty(_ConfigurationProperty): - """Property implementation: validates based on value type. - - :type name: str - :param name: name of the property - - :type property_type: type or sequence of types - :param property_type: type to be validated - """ - def __init__(self, name, property_type): - super(_TypedProperty, self).__init__(name) - self.property_type = property_type - - def _validate(self, value): - """Ensure that 'value' is of the appropriate type. - - :raises: ValueError on a type mismatch. - """ - if value is None: - return - if not isinstance(value, self.property_type): - raise ValueError('Required type: %s' % (self.property_type,)) - - -class _EnumProperty(_ConfigurationProperty): - """Pseudo-enumeration class. - - :type name: str - :param name: name of the property. - """ - - def _item_to_row(iterator, resource): """Convert a JSON row to the native object. diff --git a/packages/google-cloud-bigquery/tests/unit/test__helpers.py b/packages/google-cloud-bigquery/tests/unit/test__helpers.py index 6d2a43fffb11..f37d39a4f823 100644 --- a/packages/google-cloud-bigquery/tests/unit/test__helpers.py +++ b/packages/google-cloud-bigquery/tests/unit/test__helpers.py @@ -749,43 +749,6 @@ def test_w_datetime(self): self.assertEqual(self._call_fut(when), '12:13:41') -class Test_ConfigurationProperty(unittest.TestCase): - - @staticmethod - def _get_target_class(): - from google.cloud.bigquery._helpers import _ConfigurationProperty - - return _ConfigurationProperty - - def _make_one(self, *args, **kw): - return self._get_target_class()(*args, **kw) - - def test_it(self): - - class Configuration(object): - _attr = None - - class Wrapper(object): - attr = self._make_one('attr') - - def __init__(self): - self._configuration = Configuration() - - self.assertEqual(Wrapper.attr.name, 'attr') - - wrapper = Wrapper() - self.assertIsNone(wrapper.attr) - - value = object() - wrapper.attr = value - self.assertIs(wrapper.attr, value) - self.assertIs(wrapper._configuration._attr, value) - - del wrapper.attr - self.assertIsNone(wrapper.attr) - self.assertIsNone(wrapper._configuration._attr) - - class Test_TypedApiResourceProperty(unittest.TestCase): @staticmethod @@ -829,81 +792,6 @@ def __init__(self): wrapper._properties['back'] -class Test_TypedProperty(unittest.TestCase): - - @staticmethod - def _get_target_class(): - from google.cloud.bigquery._helpers import _TypedProperty - - return _TypedProperty - - def _make_one(self, *args, **kw): - return self._get_target_class()(*args, **kw) - - def test_it(self): - - class Configuration(object): - _attr = None - - class Wrapper(object): - attr = self._make_one('attr', int) - - def __init__(self): - self._configuration = Configuration() - - wrapper = Wrapper() - with self.assertRaises(ValueError): - wrapper.attr = 'BOGUS' - - wrapper.attr = 42 - self.assertEqual(wrapper.attr, 42) - self.assertEqual(wrapper._configuration._attr, 42) - - wrapper.attr = None - self.assertIsNone(wrapper.attr) - self.assertIsNone(wrapper._configuration._attr) - - wrapper.attr = 23 - self.assertEqual(wrapper.attr, 23) - self.assertEqual(wrapper._configuration._attr, 23) - - del wrapper.attr - self.assertIsNone(wrapper.attr) - self.assertIsNone(wrapper._configuration._attr) - - -class Test_EnumProperty(unittest.TestCase): - - @staticmethod - def _get_target_class(): - from google.cloud.bigquery._helpers import _EnumProperty - - return _EnumProperty - - def test_it(self): - - class Sub(self._get_target_class()): - pass - - class Configuration(object): - _attr = None - - class Wrapper(object): - attr = Sub('attr') - - def __init__(self): - self._configuration = Configuration() - - wrapper = Wrapper() - wrapper.attr = 'FOO' - self.assertEqual(wrapper.attr, 'FOO') - self.assertEqual(wrapper._configuration._attr, 'FOO') - - del wrapper.attr - self.assertIsNone(wrapper.attr) - self.assertIsNone(wrapper._configuration._attr) - - class Test_ListApiResourceProperty(unittest.TestCase): @staticmethod From 155ba79c8bbe3566e5a7894e269a7c1b70b30145 Mon Sep 17 00:00:00 2001 From: Alix Hamilton Date: Wed, 11 Oct 2017 15:03:30 -0700 Subject: [PATCH 0234/2016] BigQuery: removes Client from Table class (#4159) --- .../google/cloud/bigquery/client.py | 8 +- .../google/cloud/bigquery/table.py | 7 +- .../google-cloud-bigquery/tests/system.py | 42 ++--- .../tests/unit/test_client.py | 14 +- .../tests/unit/test_table.py | 153 ++++-------------- 5 files changed, 64 insertions(+), 160 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py index c55a36ec1994..488b409ff77c 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py @@ -239,7 +239,7 @@ def create_table(self, table): del resource[field] api_response = self._connection.api_request( method='POST', path=path, data=resource) - return Table.from_api_repr(api_response, self) + return Table.from_api_repr(api_response) def get_dataset(self, dataset_ref): """Fetch the dataset referenced by ``dataset_ref`` @@ -267,7 +267,7 @@ def get_table(self, table_ref): """ api_response = self._connection.api_request( method='GET', path=table_ref.path) - return Table.from_api_repr(api_response, self) + return Table.from_api_repr(api_response) def update_dataset(self, dataset, fields): """Change some fields of a dataset. @@ -331,7 +331,7 @@ def update_table(self, table, properties): headers = None api_response = self._connection.api_request( method='PATCH', path=table.path, data=partial, headers=headers) - return Table.from_api_repr(api_response, client=self) + return Table.from_api_repr(api_response) def list_dataset_tables(self, dataset, max_results=None, page_token=None): """List tables in the dataset. @@ -1140,7 +1140,7 @@ def _item_to_table(iterator, resource): :rtype: :class:`~google.cloud.bigquery.table.Table` :returns: The next table in the page. """ - return Table.from_api_repr(resource, iterator.client) + return Table.from_api_repr(resource) def _make_job_id(job_id): diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py index 74146b49385d..9df62dcf66d0 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py @@ -162,14 +162,13 @@ class Table(object): 'partitioning_type', 'view_use_legacy_sql', 'view_query', 'schema' ] - def __init__(self, table_ref, schema=(), client=None): + def __init__(self, table_ref, schema=()): self._project = table_ref.project self._table_id = table_ref.table_id self._dataset_id = table_ref.dataset_id self._properties = {} # Let the @property do validation. self.schema = schema - self._client = client @property def project(self): @@ -526,7 +525,7 @@ def view_use_legacy_sql(self, value): self._properties['view']['useLegacySql'] = value @classmethod - def from_api_repr(cls, resource, client): + def from_api_repr(cls, resource): """Factory: construct a table given its API representation :type resource: dict @@ -549,7 +548,7 @@ def from_api_repr(cls, resource, client): dataset_id = resource['tableReference']['datasetId'] dataset_ref = dataset.DatasetReference(project_id, dataset_id) - table = cls(dataset_ref.table(table_id), client=client) + table = cls(dataset_ref.table(table_id)) table._set_properties(resource) return table diff --git a/packages/google-cloud-bigquery/tests/system.py b/packages/google-cloud-bigquery/tests/system.py index 39db9a69c3fd..61397d3d80cf 100644 --- a/packages/google-cloud-bigquery/tests/system.py +++ b/packages/google-cloud-bigquery/tests/system.py @@ -190,8 +190,7 @@ def test_create_table(self): full_name = bigquery.SchemaField('full_name', 'STRING', mode='REQUIRED') age = bigquery.SchemaField('age', 'INTEGER', mode='REQUIRED') - table_arg = Table(dataset.table(table_id), schema=[full_name, age], - client=Config.CLIENT) + table_arg = Table(dataset.table(table_id), schema=[full_name, age]) self.assertFalse(_table_exists(table_arg)) table = retry_403(Config.CLIENT.create_table)(table_arg) @@ -234,9 +233,7 @@ def test_list_dataset_tables(self): mode='REQUIRED') age = bigquery.SchemaField('age', 'INTEGER', mode='REQUIRED') for table_name in tables_to_create: - table = Table(dataset.table(table_name), - schema=[full_name, age], - client=Config.CLIENT) + table = Table(dataset.table(table_name), schema=[full_name, age]) created_table = retry_403(Config.CLIENT.create_table)(table) self.to_delete.insert(0, created_table) @@ -257,8 +254,7 @@ def test_update_table(self): bigquery.SchemaField('full_name', 'STRING', mode='REQUIRED'), bigquery.SchemaField('age', 'INTEGER', mode='REQUIRED') ] - table_arg = Table(dataset.table(TABLE_NAME), schema=schema, - client=Config.CLIENT) + table_arg = Table(dataset.table(TABLE_NAME), schema=schema) self.assertFalse(_table_exists(table_arg)) table = retry_403(Config.CLIENT.create_table)(table_arg) self.to_delete.insert(0, table) @@ -292,8 +288,7 @@ def test_update_table_schema(self): bigquery.SchemaField('full_name', 'STRING', mode='REQUIRED'), bigquery.SchemaField('age', 'INTEGER', mode='REQUIRED') ] - table_arg = Table(dataset.table(TABLE_NAME), schema=schema, - client=Config.CLIENT) + table_arg = Table(dataset.table(TABLE_NAME), schema=schema) self.assertFalse(_table_exists(table_arg)) table = retry_403(Config.CLIENT.create_table)(table_arg) self.to_delete.insert(0, table) @@ -337,8 +332,7 @@ def test_create_rows_then_dump_table(self): bigquery.SchemaField('age', 'INTEGER', mode='REQUIRED'), bigquery.SchemaField('now', 'TIMESTAMP'), ] - table_arg = Table(dataset.table(TABLE_ID), schema=schema, - client=Config.CLIENT) + table_arg = Table(dataset.table(TABLE_ID), schema=schema) self.assertFalse(_table_exists(table_arg)) table = retry_403(Config.CLIENT.create_table)(table_arg) self.to_delete.insert(0, table) @@ -375,8 +369,7 @@ def test_load_table_from_local_file_then_dump_table(self): mode='REQUIRED') age = bigquery.SchemaField('age', 'INTEGER', mode='REQUIRED') table_ref = dataset.table(TABLE_NAME) - table_arg = Table(table_ref, schema=[full_name, age], - client=Config.CLIENT) + table_arg = Table(table_ref, schema=[full_name, age]) table = retry_403(Config.CLIENT.create_table)(table_arg) self.to_delete.insert(0, table) @@ -420,7 +413,7 @@ def test_load_table_from_local_avro_file_then_dump_table(self): dataset = self.temp_dataset(_make_dataset_id('load_local_then_dump')) table_ref = dataset.table(TABLE_NAME) - table = Table(table_ref, client=Config.CLIENT) + table = Table(table_ref) self.to_delete.insert(0, table) with open(os.path.join(WHERE, 'data', 'colors.avro'), 'rb') as avrof: @@ -482,8 +475,7 @@ def test_load_table_from_storage_then_dump_table(self): full_name = bigquery.SchemaField('full_name', 'STRING', mode='REQUIRED') age = bigquery.SchemaField('age', 'INTEGER', mode='REQUIRED') - table_arg = Table(dataset.table(TABLE_NAME), schema=[full_name, age], - client=Config.CLIENT) + table_arg = Table(dataset.table(TABLE_NAME), schema=[full_name, age]) table = retry_403(Config.CLIENT.create_table)(table_arg) self.to_delete.insert(0, table) @@ -616,7 +608,7 @@ def test_extract_table(self): dataset_id = _make_dataset_id('load_gcs_then_extract') table_id = 'test_table' table_ref = Config.CLIENT.dataset(dataset_id).table(table_id) - table = Table(table_ref, client=Config.CLIENT) + table = Table(table_ref) self.to_delete.insert(0, table) rows = [ ('Phred Phlyntstone', 32), @@ -648,7 +640,7 @@ def test_extract_table_w_job_config(self): dataset_id = _make_dataset_id('load_gcs_then_extract') table_id = 'test_table' table_ref = Config.CLIENT.dataset(dataset_id).table(table_id) - table = Table(table_ref, client=Config.CLIENT) + table = Table(table_ref) self.to_delete.insert(0, table) rows = [ ('Phred Phlyntstone', 32), @@ -705,8 +697,7 @@ def test_job_cancel(self): full_name = bigquery.SchemaField('full_name', 'STRING', mode='REQUIRED') age = bigquery.SchemaField('age', 'INTEGER', mode='REQUIRED') - table_arg = Table(dataset.table(TABLE_NAME), schema=[full_name, age], - client=Config.CLIENT) + table_arg = Table(dataset.table(TABLE_NAME), schema=[full_name, age]) table = retry_403(Config.CLIENT.create_table)(table_arg) self.to_delete.insert(0, table) @@ -899,7 +890,7 @@ def _load_table_for_dml(self, rows, dataset_id, table_id): greeting = bigquery.SchemaField( 'greeting', 'STRING', mode='NULLABLE') table_ref = dataset.table(table_id) - table_arg = Table(table_ref, schema=[greeting], client=Config.CLIENT) + table_arg = Table(table_ref, schema=[greeting]) table = retry_403(Config.CLIENT.create_table)(table_arg) self.to_delete.insert(0, table) @@ -1302,8 +1293,7 @@ def test_create_rows_nested_nested(self): ] table_id = 'test_table' dataset = self.temp_dataset(_make_dataset_id('issue_2951')) - table_arg = Table(dataset.table(table_id), schema=schema, - client=Config.CLIENT) + table_arg = Table(dataset.table(table_id), schema=schema) table = retry_403(Config.CLIENT.create_table)(table_arg) self.to_delete.insert(0, table) @@ -1337,8 +1327,7 @@ def test_create_rows_nested_nested_dictionary(self): ] table_id = 'test_table' dataset = self.temp_dataset(_make_dataset_id('issue_2951')) - table_arg = Table(dataset.table(table_id), schema=schema, - client=Config.CLIENT) + table_arg = Table(dataset.table(table_id), schema=schema) table = retry_403(Config.CLIENT.create_table)(table_arg) self.to_delete.insert(0, table) @@ -1355,8 +1344,7 @@ def test_create_table_rows_fetch_nested_schema(self): dataset = self.temp_dataset( _make_dataset_id('create_table_nested_schema')) schema = _load_json_schema() - table_arg = Table(dataset.table(table_name), schema=schema, - client=Config.CLIENT) + table_arg = Table(dataset.table(table_name), schema=schema) table = retry_403(Config.CLIENT.create_table)(table_arg) self.to_delete.insert(0, table) self.assertTrue(_table_exists(table)) diff --git a/packages/google-cloud-bigquery/tests/unit/test_client.py b/packages/google-cloud-bigquery/tests/unit/test_client.py index 49030463f78b..02277f4c095a 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_client.py +++ b/packages/google-cloud-bigquery/tests/unit/test_client.py @@ -421,7 +421,7 @@ def test_create_table_w_day_partition(self): } conn = client._connection = _Connection(resource) table_ref = client.dataset(dataset_id).table(table_id) - table = Table(table_ref, client=client) + table = Table(table_ref) table.partitioning_type = 'DAY' got = client.create_table(table) @@ -462,7 +462,7 @@ def test_create_table_w_day_partition_and_expire(self): } conn = client._connection = _Connection(resource) table_ref = client.dataset(dataset_id).table(table_id) - table = Table(table_ref, client=client) + table = Table(table_ref) table.partitioning_type = 'DAY' table.partition_expiration = 100 @@ -515,7 +515,7 @@ def test_create_table_w_schema_and_query(self): ] conn = client._connection = _Connection(resource) table_ref = client.dataset(dataset_id).table(table_id) - table = Table(table_ref, schema=schema, client=client) + table = Table(table_ref, schema=schema) table.view_query = query got = client.create_table(table) @@ -670,7 +670,7 @@ def test_update_table(self): client = self._make_one(project=project, credentials=creds) conn = client._connection = _Connection(resource, resource) table_ref = client.dataset(dataset_id).table(table_id) - table = Table(table_ref, schema=schema, client=client) + table = Table(table_ref, schema=schema) table.description = description table.friendly_name = title @@ -726,7 +726,7 @@ def test_update_table_only_use_legacy_sql(self): client = self._make_one(project=project, credentials=creds) conn = client._connection = _Connection(resource) table_ref = client.dataset(dataset_id).table(table_id) - table = Table(table_ref, client=client) + table = Table(table_ref) table.view_use_legacy_sql = True updated_table = client.update_table(table, ['view_use_legacy_sql']) @@ -784,7 +784,7 @@ def test_update_table_w_query(self): client = self._make_one(project=project, credentials=creds) conn = client._connection = _Connection(resource) table_ref = client.dataset(dataset_id).table(table_id) - table = Table(table_ref, schema=schema, client=client) + table = Table(table_ref, schema=schema) table.location = location table.expires = exp_time table.view_query = query @@ -900,7 +900,7 @@ def test_update_table_delete_property(self): client = self._make_one(project=project, credentials=creds) conn = client._connection = _Connection(resource1, resource2) table_ref = client.dataset(dataset_id).table(table_id) - table = Table(table_ref, client=client) + table = Table(table_ref) table.description = description table.friendly_name = title table2 = client.update_table(table, ['description', 'friendly_name']) diff --git a/packages/google-cloud-bigquery/tests/unit/test_table.py b/packages/google-cloud-bigquery/tests/unit/test_table.py index 73c1c9aab894..0d598864e3c1 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_table.py +++ b/packages/google-cloud-bigquery/tests/unit/test_table.py @@ -254,10 +254,9 @@ def _verifyResourceProperties(self, table, resource): self.assertEqual(table.schema, []) def test_ctor(self): - client = _Client(self.PROJECT) dataset = DatasetReference(self.PROJECT, self.DS_ID) table_ref = dataset.table(self.TABLE_NAME) - table = self._make_one(table_ref, client=client) + table = self._make_one(table_ref) self.assertEqual(table.table_id, self.TABLE_NAME) self.assertEqual(table.project, self.PROJECT) @@ -287,21 +286,18 @@ def test_ctor(self): def test_ctor_w_schema(self): from google.cloud.bigquery.table import SchemaField - client = _Client(self.PROJECT) dataset = DatasetReference(self.PROJECT, self.DS_ID) table_ref = dataset.table(self.TABLE_NAME) full_name = SchemaField('full_name', 'STRING', mode='REQUIRED') age = SchemaField('age', 'INTEGER', mode='REQUIRED') - table = self._make_one(table_ref, schema=[full_name, age], - client=client) + table = self._make_one(table_ref, schema=[full_name, age]) self.assertEqual(table.schema, [full_name, age]) def test_num_bytes_getter(self): - client = _Client(self.PROJECT) dataset = DatasetReference(self.PROJECT, self.DS_ID) table_ref = dataset.table(self.TABLE_NAME) - table = self._make_one(table_ref, client=client) + table = self._make_one(table_ref) # Check with no value set. self.assertIsNone(table.num_bytes) @@ -321,10 +317,9 @@ def test_num_bytes_getter(self): getattr(table, 'num_bytes') def test_num_rows_getter(self): - client = _Client(self.PROJECT) dataset = DatasetReference(self.PROJECT, self.DS_ID) table_ref = dataset.table(self.TABLE_NAME) - table = self._make_one(table_ref, client=client) + table = self._make_one(table_ref) # Check with no value set. self.assertIsNone(table.num_rows) @@ -344,20 +339,18 @@ def test_num_rows_getter(self): getattr(table, 'num_rows') def test_schema_setter_non_list(self): - client = _Client(self.PROJECT) dataset = DatasetReference(self.PROJECT, self.DS_ID) table_ref = dataset.table(self.TABLE_NAME) - table = self._make_one(table_ref, client=client) + table = self._make_one(table_ref) with self.assertRaises(TypeError): table.schema = object() def test_schema_setter_invalid_field(self): from google.cloud.bigquery.table import SchemaField - client = _Client(self.PROJECT) dataset = DatasetReference(self.PROJECT, self.DS_ID) table_ref = dataset.table(self.TABLE_NAME) - table = self._make_one(table_ref, client=client) + table = self._make_one(table_ref) full_name = SchemaField('full_name', 'STRING', mode='REQUIRED') with self.assertRaises(ValueError): table.schema = [full_name, object()] @@ -365,10 +358,9 @@ def test_schema_setter_invalid_field(self): def test_schema_setter(self): from google.cloud.bigquery.table import SchemaField - client = _Client(self.PROJECT) dataset = DatasetReference(self.PROJECT, self.DS_ID) table_ref = dataset.table(self.TABLE_NAME) - table = self._make_one(table_ref, client=client) + table = self._make_one(table_ref) full_name = SchemaField('full_name', 'STRING', mode='REQUIRED') age = SchemaField('age', 'INTEGER', mode='REQUIRED') table.schema = [full_name, age] @@ -385,10 +377,9 @@ def test_props_set_by_server(self): self.PROJECT, self.DS_ID, self.TABLE_NAME) URL = 'http://example.com/projects/%s/datasets/%s/tables/%s' % ( self.PROJECT, self.DS_ID, self.TABLE_NAME) - client = _Client(self.PROJECT) dataset = DatasetReference(self.PROJECT, self.DS_ID) table_ref = dataset.table(self.TABLE_NAME) - table = self._make_one(table_ref, client=client) + table = self._make_one(table_ref) table._properties['creationTime'] = _millis(CREATED) table._properties['etag'] = 'ETAG' table._properties['lastModifiedTime'] = _millis(MODIFIED) @@ -408,26 +399,23 @@ def test_props_set_by_server(self): self.assertEqual(table.table_type, 'TABLE') def test_description_setter_bad_value(self): - client = _Client(self.PROJECT) dataset = DatasetReference(self.PROJECT, self.DS_ID) table_ref = dataset.table(self.TABLE_NAME) - table = self._make_one(table_ref, client=client) + table = self._make_one(table_ref) with self.assertRaises(ValueError): table.description = 12345 def test_description_setter(self): - client = _Client(self.PROJECT) dataset = DatasetReference(self.PROJECT, self.DS_ID) table_ref = dataset.table(self.TABLE_NAME) - table = self._make_one(table_ref, client=client) + table = self._make_one(table_ref) table.description = 'DESCRIPTION' self.assertEqual(table.description, 'DESCRIPTION') def test_expires_setter_bad_value(self): - client = _Client(self.PROJECT) dataset = DatasetReference(self.PROJECT, self.DS_ID) table_ref = dataset.table(self.TABLE_NAME) - table = self._make_one(table_ref, client=client) + table = self._make_one(table_ref) with self.assertRaises(ValueError): table.expires = object() @@ -436,83 +424,73 @@ def test_expires_setter(self): from google.cloud._helpers import UTC WHEN = datetime.datetime(2015, 7, 28, 16, 39, tzinfo=UTC) - client = _Client(self.PROJECT) dataset = DatasetReference(self.PROJECT, self.DS_ID) table_ref = dataset.table(self.TABLE_NAME) - table = self._make_one(table_ref, client=client) + table = self._make_one(table_ref) table.expires = WHEN self.assertEqual(table.expires, WHEN) def test_friendly_name_setter_bad_value(self): - client = _Client(self.PROJECT) dataset = DatasetReference(self.PROJECT, self.DS_ID) table_ref = dataset.table(self.TABLE_NAME) - table = self._make_one(table_ref, client=client) + table = self._make_one(table_ref) with self.assertRaises(ValueError): table.friendly_name = 12345 def test_friendly_name_setter(self): - client = _Client(self.PROJECT) dataset = DatasetReference(self.PROJECT, self.DS_ID) table_ref = dataset.table(self.TABLE_NAME) - table = self._make_one(table_ref, client=client) + table = self._make_one(table_ref) table.friendly_name = 'FRIENDLY' self.assertEqual(table.friendly_name, 'FRIENDLY') def test_location_setter_bad_value(self): - client = _Client(self.PROJECT) dataset = DatasetReference(self.PROJECT, self.DS_ID) table_ref = dataset.table(self.TABLE_NAME) - table = self._make_one(table_ref, client=client) + table = self._make_one(table_ref) with self.assertRaises(ValueError): table.location = 12345 def test_location_setter(self): - client = _Client(self.PROJECT) dataset = DatasetReference(self.PROJECT, self.DS_ID) table_ref = dataset.table(self.TABLE_NAME) - table = self._make_one(table_ref, client=client) + table = self._make_one(table_ref) table.location = 'LOCATION' self.assertEqual(table.location, 'LOCATION') def test_view_query_setter_bad_value(self): - client = _Client(self.PROJECT) dataset = DatasetReference(self.PROJECT, self.DS_ID) table_ref = dataset.table(self.TABLE_NAME) - table = self._make_one(table_ref, client=client) + table = self._make_one(table_ref) with self.assertRaises(ValueError): table.view_query = 12345 def test_view_query_setter(self): - client = _Client(self.PROJECT) dataset = DatasetReference(self.PROJECT, self.DS_ID) table_ref = dataset.table(self.TABLE_NAME) - table = self._make_one(table_ref, client=client) + table = self._make_one(table_ref) table.view_query = 'select * from foo' self.assertEqual(table.view_query, 'select * from foo') def test_view_query_deleter(self): - client = _Client(self.PROJECT) dataset = DatasetReference(self.PROJECT, self.DS_ID) table_ref = dataset.table(self.TABLE_NAME) - table = self._make_one(table_ref, client=client) + table = self._make_one(table_ref) table.view_query = 'select * from foo' del table.view_query self.assertIsNone(table.view_query) def test_view_use_legacy_sql_setter_bad_value(self): - client = _Client(self.PROJECT) dataset = DatasetReference(self.PROJECT, self.DS_ID) table_ref = dataset.table(self.TABLE_NAME) - table = self._make_one(table_ref, client=client) + table = self._make_one(table_ref) with self.assertRaises(ValueError): table.view_use_legacy_sql = 12345 def test_view_use_legacy_sql_setter(self): - client = _Client(self.PROJECT) dataset = DatasetReference(self.PROJECT, self.DS_ID) table_ref = dataset.table(self.TABLE_NAME) - table = self._make_one(table_ref, client=client) + table = self._make_one(table_ref) table.view_use_legacy_sql = False table.view_query = 'select * from foo' self.assertEqual(table.view_use_legacy_sql, False) @@ -520,15 +498,13 @@ def test_view_use_legacy_sql_setter(self): def test_from_api_repr_missing_identity(self): self._setUpConstants() - client = _Client(self.PROJECT) RESOURCE = {} klass = self._get_target_class() with self.assertRaises(KeyError): - klass.from_api_repr(RESOURCE, client) + klass.from_api_repr(RESOURCE) def test_from_api_repr_bare(self): self._setUpConstants() - client = _Client(self.PROJECT) RESOURCE = { 'id': '%s:%s:%s' % (self.PROJECT, self.DS_ID, self.TABLE_NAME), 'tableReference': { @@ -539,9 +515,8 @@ def test_from_api_repr_bare(self): 'type': 'TABLE', } klass = self._get_target_class() - table = klass.from_api_repr(RESOURCE, client) + table = klass.from_api_repr(RESOURCE) self.assertEqual(table.table_id, self.TABLE_NAME) - self.assertIs(table._client, client) self._verifyResourceProperties(table, RESOURCE) def test_from_api_repr_w_properties(self): @@ -549,7 +524,6 @@ def test_from_api_repr_w_properties(self): from google.cloud._helpers import UTC from google.cloud._helpers import _millis - client = _Client(self.PROJECT) RESOURCE = self._makeResource() RESOURCE['view'] = {'query': 'select fullname, age from person_ages'} RESOURCE['type'] = 'VIEW' @@ -557,52 +531,39 @@ def test_from_api_repr_w_properties(self): self.EXP_TIME = datetime.datetime(2015, 8, 1, 23, 59, 59, tzinfo=UTC) RESOURCE['expirationTime'] = _millis(self.EXP_TIME) klass = self._get_target_class() - table = klass.from_api_repr(RESOURCE, client) - self.assertIs(table._client, client) + table = klass.from_api_repr(RESOURCE) self._verifyResourceProperties(table, RESOURCE) def test_partition_type_setter_bad_type(self): from google.cloud.bigquery.table import SchemaField - RESOURCE = self._makeResource() - conn = _Connection(RESOURCE) - client = _Client(project=self.PROJECT, connection=conn) dataset = DatasetReference(self.PROJECT, self.DS_ID) table_ref = dataset.table(self.TABLE_NAME) full_name = SchemaField('full_name', 'STRING', mode='REQUIRED') age = SchemaField('age', 'INTEGER', mode='REQUIRED') - table = self._make_one(table_ref, schema=[full_name, age], - client=client) + table = self._make_one(table_ref, schema=[full_name, age]) with self.assertRaises(ValueError): table.partitioning_type = 123 def test_partition_type_setter_unknown_value(self): from google.cloud.bigquery.table import SchemaField - RESOURCE = self._makeResource() - conn = _Connection(RESOURCE) - client = _Client(project=self.PROJECT, connection=conn) dataset = DatasetReference(self.PROJECT, self.DS_ID) table_ref = dataset.table(self.TABLE_NAME) full_name = SchemaField('full_name', 'STRING', mode='REQUIRED') age = SchemaField('age', 'INTEGER', mode='REQUIRED') - table = self._make_one(table_ref, schema=[full_name, age], - client=client) + table = self._make_one(table_ref, schema=[full_name, age]) with self.assertRaises(ValueError): table.partitioning_type = "HASH" def test_partition_type_setter_w_known_value(self): from google.cloud.bigquery.table import SchemaField - RESOURCE = self._makeResource() - conn = _Connection(RESOURCE) - client = _Client(project=self.PROJECT, connection=conn) dataset = DatasetReference(self.PROJECT, self.DS_ID) table_ref = dataset.table(self.TABLE_NAME) full_name = SchemaField('full_name', 'STRING', mode='REQUIRED') age = SchemaField('age', 'INTEGER', mode='REQUIRED') - table = self._make_one(table_ref, schema=[full_name, age], - client=client) + table = self._make_one(table_ref, schema=[full_name, age]) self.assertIsNone(table.partitioning_type) table.partitioning_type = 'DAY' self.assertEqual(table.partitioning_type, 'DAY') @@ -610,15 +571,11 @@ def test_partition_type_setter_w_known_value(self): def test_partition_type_setter_w_none(self): from google.cloud.bigquery.table import SchemaField - RESOURCE = self._makeResource() - conn = _Connection(RESOURCE) - client = _Client(project=self.PROJECT, connection=conn) dataset = DatasetReference(self.PROJECT, self.DS_ID) table_ref = dataset.table(self.TABLE_NAME) full_name = SchemaField('full_name', 'STRING', mode='REQUIRED') age = SchemaField('age', 'INTEGER', mode='REQUIRED') - table = self._make_one(table_ref, schema=[full_name, age], - client=client) + table = self._make_one(table_ref, schema=[full_name, age]) table._properties['timePartitioning'] = {'type': 'DAY'} table.partitioning_type = None self.assertIsNone(table.partitioning_type) @@ -627,30 +584,22 @@ def test_partition_type_setter_w_none(self): def test_partition_experation_bad_type(self): from google.cloud.bigquery.table import SchemaField - RESOURCE = self._makeResource() - conn = _Connection(RESOURCE) - client = _Client(project=self.PROJECT, connection=conn) dataset = DatasetReference(self.PROJECT, self.DS_ID) table_ref = dataset.table(self.TABLE_NAME) full_name = SchemaField('full_name', 'STRING', mode='REQUIRED') age = SchemaField('age', 'INTEGER', mode='REQUIRED') - table = self._make_one(table_ref, schema=[full_name, age], - client=client) + table = self._make_one(table_ref, schema=[full_name, age]) with self.assertRaises(ValueError): table.partition_expiration = "NEVER" def test_partition_expiration_w_integer(self): from google.cloud.bigquery.table import SchemaField - RESOURCE = self._makeResource() - conn = _Connection(RESOURCE) - client = _Client(project=self.PROJECT, connection=conn) dataset = DatasetReference(self.PROJECT, self.DS_ID) table_ref = dataset.table(self.TABLE_NAME) full_name = SchemaField('full_name', 'STRING', mode='REQUIRED') age = SchemaField('age', 'INTEGER', mode='REQUIRED') - table = self._make_one(table_ref, schema=[full_name, age], - client=client) + table = self._make_one(table_ref, schema=[full_name, age]) self.assertIsNone(table.partition_expiration) table.partition_expiration = 100 self.assertEqual(table.partitioning_type, "DAY") @@ -659,15 +608,11 @@ def test_partition_expiration_w_integer(self): def test_partition_expiration_w_none(self): from google.cloud.bigquery.table import SchemaField - RESOURCE = self._makeResource() - conn = _Connection(RESOURCE) - client = _Client(project=self.PROJECT, connection=conn) dataset = DatasetReference(self.PROJECT, self.DS_ID) table_ref = dataset.table(self.TABLE_NAME) full_name = SchemaField('full_name', 'STRING', mode='REQUIRED') age = SchemaField('age', 'INTEGER', mode='REQUIRED') - table = self._make_one(table_ref, schema=[full_name, age], - client=client) + table = self._make_one(table_ref, schema=[full_name, age]) self.assertIsNone(table.partition_expiration) table._properties['timePartitioning'] = { 'type': 'DAY', @@ -680,15 +625,11 @@ def test_partition_expiration_w_none(self): def test_partition_expiration_w_none_no_partition_set(self): from google.cloud.bigquery.table import SchemaField - RESOURCE = self._makeResource() - conn = _Connection(RESOURCE) - client = _Client(project=self.PROJECT, connection=conn) dataset = DatasetReference(self.PROJECT, self.DS_ID) table_ref = dataset.table(self.TABLE_NAME) full_name = SchemaField('full_name', 'STRING', mode='REQUIRED') age = SchemaField('age', 'INTEGER', mode='REQUIRED') - table = self._make_one(table_ref, schema=[full_name, age], - client=client) + table = self._make_one(table_ref, schema=[full_name, age]) self.assertIsNone(table.partition_expiration) table.partition_expiration = None self.assertIsNone(table.partitioning_type) @@ -709,10 +650,9 @@ def _call_fut(self, mapping, schema): def test__row_from_mapping_wo_schema(self): from google.cloud.bigquery.table import Table, _TABLE_HAS_NO_SCHEMA MAPPING = {'full_name': 'Phred Phlyntstone', 'age': 32} - client = _Client(project=self.PROJECT) dataset = DatasetReference(self.PROJECT, self.DS_ID) table_ref = dataset.table(self.TABLE_NAME) - table = Table(table_ref, client=client) + table = Table(table_ref) with self.assertRaises(ValueError) as exc: self._call_fut(MAPPING, table.schema) @@ -727,16 +667,13 @@ def test__row_from_mapping_w_invalid_schema(self): 'colors': ['red', 'green'], 'bogus': 'WHATEVER', } - client = _Client(project=self.PROJECT) dataset = DatasetReference(self.PROJECT, self.DS_ID) table_ref = dataset.table(self.TABLE_NAME) full_name = SchemaField('full_name', 'STRING', mode='REQUIRED') age = SchemaField('age', 'INTEGER', mode='REQUIRED') colors = SchemaField('colors', 'DATETIME', mode='REPEATED') bogus = SchemaField('joined', 'STRING', mode='BOGUS') - table = Table(table_ref, - schema=[full_name, age, colors, bogus], - client=client) + table = Table(table_ref, schema=[full_name, age, colors, bogus]) with self.assertRaises(ValueError) as exc: self._call_fut(MAPPING, table.schema) @@ -751,16 +688,13 @@ def test__row_from_mapping_w_schema(self): 'colors': ['red', 'green'], 'extra': 'IGNORED', } - client = _Client(project=self.PROJECT) dataset = DatasetReference(self.PROJECT, self.DS_ID) table_ref = dataset.table(self.TABLE_NAME) full_name = SchemaField('full_name', 'STRING', mode='REQUIRED') age = SchemaField('age', 'INTEGER', mode='REQUIRED') colors = SchemaField('colors', 'DATETIME', mode='REPEATED') joined = SchemaField('joined', 'STRING', mode='NULLABLE') - table = Table(table_ref, - schema=[full_name, age, colors, joined], - client=client) + table = Table(table_ref, schema=[full_name, age, colors, joined]) self.assertEqual( self._call_fut(MAPPING, table.schema), @@ -878,20 +812,3 @@ def test_w_subfields(self): {'name': 'number', 'type': 'STRING', 'mode': 'REQUIRED'}]}) - - -class _Client(object): - - def __init__(self, project='project', connection=None): - self.project = project - self._connection = connection - - -class _Connection(object): - - API_BASE_URL = 'http://example.com' - USER_AGENT = 'testing 1.2.3' - - def __init__(self, *responses): - self._responses = responses[:] - self._requested = [] From 317b8980749e4c58adbcfcffabbe4232a411083d Mon Sep 17 00:00:00 2001 From: Jonathan Amsterdam Date: Wed, 11 Oct 2017 20:07:22 -0400 Subject: [PATCH 0235/2016] bigquery: add streaming buffer info (#4161) Unfortunately there's no good way to write a system test for this, since you can never be sure that one gets created. But I informally verified that the code works by running create_rows a lot until I got a streaming buffer. --- .../google/cloud/bigquery/table.py | 23 +++++++++++++++++++ .../tests/unit/test_table.py | 16 +++++++++++++ 2 files changed, 39 insertions(+) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py index 9df62dcf66d0..990349336433 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py @@ -524,6 +524,12 @@ def view_use_legacy_sql(self, value): self._properties['view'] = {} self._properties['view']['useLegacySql'] = value + @property + def streaming_buffer(self): + sb = self._properties.get('streamingBuffer') + if sb is not None: + return StreamingBuffer(sb) + @classmethod def from_api_repr(cls, resource): """Factory: construct a table given its API representation @@ -658,6 +664,23 @@ def _row_from_mapping(mapping, schema): return tuple(row) +class StreamingBuffer(object): + """Information about a table's streaming buffer. + + See https://cloud.google.com/bigquery/streaming-data-into-bigquery. + + :type resource: dict + :param resource: streaming buffer representation returned from the API + """ + + def __init__(self, resource): + self.estimated_bytes = int(resource['estimatedBytes']) + self.estimated_rows = int(resource['estimatedRows']) + # time is in milliseconds since the epoch. + self.oldest_entry_time = _datetime_from_microseconds( + 1000.0 * int(resource['oldestEntryTime'])) + + def _parse_schema_resource(info): """Parse a resource fragment into a schema field. diff --git a/packages/google-cloud-bigquery/tests/unit/test_table.py b/packages/google-cloud-bigquery/tests/unit/test_table.py index 0d598864e3c1..12b2ec98c4d4 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_table.py +++ b/packages/google-cloud-bigquery/tests/unit/test_table.py @@ -174,6 +174,8 @@ def _setUpConstants(self): self.RESOURCE_URL = 'http://example.com/path/to/resource' self.NUM_BYTES = 12345 self.NUM_ROWS = 67 + self.NUM_EST_BYTES = 1234 + self.NUM_EST_ROWS = 23 def _makeResource(self): self._setUpConstants() @@ -194,6 +196,10 @@ def _makeResource(self): 'numRows': self.NUM_ROWS, 'numBytes': self.NUM_BYTES, 'type': 'TABLE', + 'streamingBuffer': { + 'estimatedRows': str(self.NUM_EST_ROWS), + 'estimatedBytes': str(self.NUM_EST_BYTES), + 'oldestEntryTime': self.WHEN_TS * 1000}, } def _verifyReadonlyResourceProperties(self, table, resource): @@ -222,6 +228,16 @@ def _verifyReadonlyResourceProperties(self, table, resource): else: self.assertIsNone(table.self_link) + if 'streamingBuffer' in resource: + self.assertEqual(table.streaming_buffer.estimated_rows, + self.NUM_EST_ROWS) + self.assertEqual(table.streaming_buffer.estimated_bytes, + self.NUM_EST_BYTES) + self.assertEqual(table.streaming_buffer.oldest_entry_time, + self.WHEN) + else: + self.assertIsNone(table.streaming_buffer) + self.assertEqual(table.full_table_id, self.TABLE_FULL_ID) self.assertEqual(table.table_type, 'TABLE' if 'view' not in resource else 'VIEW') From dde622779dd1926c4dbe46bad6e67b198b1407a0 Mon Sep 17 00:00:00 2001 From: Jonathan Amsterdam Date: Thu, 12 Oct 2017 12:55:29 -0400 Subject: [PATCH 0236/2016] bigquery: factor out common values in test_client.py (#4162) --- .../tests/unit/test_client.py | 824 ++++++++---------- 1 file changed, 342 insertions(+), 482 deletions(-) diff --git a/packages/google-cloud-bigquery/tests/unit/test_client.py b/packages/google-cloud-bigquery/tests/unit/test_client.py index 02277f4c095a..22df27c6358c 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_client.py +++ b/packages/google-cloud-bigquery/tests/unit/test_client.py @@ -34,6 +34,11 @@ def _make_credentials(): class TestClient(unittest.TestCase): + PROJECT = 'PROJECT' + DS_ID = 'DATASET_ID' + TABLE_ID = 'TABLE_ID' + TABLE_REF = DatasetReference(PROJECT, DS_ID).table(TABLE_ID) + @staticmethod def _get_target_class(): from google.cloud.bigquery.client import Client @@ -46,10 +51,10 @@ def _make_one(self, *args, **kw): def test_ctor(self): from google.cloud.bigquery._http import Connection - PROJECT = 'PROJECT' creds = _make_credentials() http = object() - client = self._make_one(project=PROJECT, credentials=creds, _http=http) + client = self._make_one(project=self.PROJECT, credentials=creds, + _http=http) self.assertIsInstance(client._connection, Connection) self.assertIs(client._connection.credentials, creds) self.assertIs(client._connection.http, http) @@ -57,9 +62,8 @@ def test_ctor(self): def test__get_query_results_miss_w_explicit_project_and_timeout(self): from google.cloud.exceptions import NotFound - project = 'PROJECT' creds = _make_credentials() - client = self._make_one(project, creds) + client = self._make_one(self.PROJECT, creds) conn = client._connection = _Connection() with self.assertRaises(NotFound): @@ -75,7 +79,6 @@ def test__get_query_results_miss_w_explicit_project_and_timeout(self): req['query_params'], {'maxResults': 0, 'timeoutMs': 500}) def test__get_query_results_hit(self): - project = 'PROJECT' job_id = 'query_job' data = { 'kind': 'bigquery#getQueryResultsResponse', @@ -95,7 +98,7 @@ def test__get_query_results_hit(self): ] }, 'jobReference': { - 'projectId': project, + 'projectId': self.PROJECT, 'jobId': job_id, }, 'totalRows': '10', @@ -105,7 +108,7 @@ def test__get_query_results_hit(self): } creds = _make_credentials() - client = self._make_one(project, creds) + client = self._make_one(self.PROJECT, creds) client._connection = _Connection(data) query_results = client._get_query_results(job_id) @@ -157,12 +160,11 @@ def test_list_projects_defaults(self): self.assertEqual(req['path'], '/%s' % PATH) def test_list_projects_explicit_response_missing_projects_key(self): - PROJECT = 'PROJECT' PATH = 'projects' TOKEN = 'TOKEN' DATA = {} creds = _make_credentials() - client = self._make_one(PROJECT, creds) + client = self._make_one(self.PROJECT, creds) conn = client._connection = _Connection(DATA) iterator = client.list_projects(max_results=3, page_token=TOKEN) @@ -183,28 +185,27 @@ def test_list_projects_explicit_response_missing_projects_key(self): def test_list_datasets_defaults(self): from google.cloud.bigquery.dataset import Dataset - PROJECT = 'PROJECT' DATASET_1 = 'dataset_one' DATASET_2 = 'dataset_two' - PATH = 'projects/%s/datasets' % PROJECT + PATH = 'projects/%s/datasets' % self.PROJECT TOKEN = 'TOKEN' DATA = { 'nextPageToken': TOKEN, 'datasets': [ {'kind': 'bigquery#dataset', - 'id': '%s:%s' % (PROJECT, DATASET_1), + 'id': '%s:%s' % (self.PROJECT, DATASET_1), 'datasetReference': {'datasetId': DATASET_1, - 'projectId': PROJECT}, + 'projectId': self.PROJECT}, 'friendlyName': None}, {'kind': 'bigquery#dataset', - 'id': '%s:%s' % (PROJECT, DATASET_2), + 'id': '%s:%s' % (self.PROJECT, DATASET_2), 'datasetReference': {'datasetId': DATASET_2, - 'projectId': PROJECT}, + 'projectId': self.PROJECT}, 'friendlyName': 'Two'}, ] } creds = _make_credentials() - client = self._make_one(PROJECT, creds) + client = self._make_one(self.PROJECT, creds) conn = client._connection = _Connection(DATA) iterator = client.list_datasets() @@ -225,12 +226,11 @@ def test_list_datasets_defaults(self): self.assertEqual(req['path'], '/%s' % PATH) def test_list_datasets_explicit_response_missing_datasets_key(self): - PROJECT = 'PROJECT' - PATH = 'projects/%s/datasets' % PROJECT + PATH = 'projects/%s/datasets' % self.PROJECT TOKEN = 'TOKEN' DATA = {} creds = _make_credentials() - client = self._make_one(PROJECT, creds) + client = self._make_one(self.PROJECT, creds) conn = client._connection = _Connection(DATA) iterator = client.list_datasets( @@ -252,45 +252,42 @@ def test_list_datasets_explicit_response_missing_datasets_key(self): def test_dataset_with_specified_project(self): from google.cloud.bigquery.dataset import DatasetReference - PROJECT = 'PROJECT' - DATASET = 'dataset_name' creds = _make_credentials() http = object() - client = self._make_one(project=PROJECT, credentials=creds, _http=http) - dataset = client.dataset(DATASET, PROJECT) + client = self._make_one(project=self.PROJECT, credentials=creds, + _http=http) + dataset = client.dataset(self.DS_ID, self.PROJECT) self.assertIsInstance(dataset, DatasetReference) - self.assertEqual(dataset.dataset_id, DATASET) - self.assertEqual(dataset.project, PROJECT) + self.assertEqual(dataset.dataset_id, self.DS_ID) + self.assertEqual(dataset.project, self.PROJECT) def test_dataset_with_default_project(self): from google.cloud.bigquery.dataset import DatasetReference - PROJECT = 'PROJECT' - DATASET = 'dataset_name' creds = _make_credentials() http = object() - client = self._make_one(project=PROJECT, credentials=creds, _http=http) - dataset = client.dataset(DATASET) + client = self._make_one(project=self.PROJECT, credentials=creds, + _http=http) + dataset = client.dataset(self.DS_ID) self.assertIsInstance(dataset, DatasetReference) - self.assertEqual(dataset.dataset_id, DATASET) - self.assertEqual(dataset.project, PROJECT) + self.assertEqual(dataset.dataset_id, self.DS_ID) + self.assertEqual(dataset.project, self.PROJECT) def test_get_dataset(self): - project = 'PROJECT' - dataset_id = 'dataset_id' - path = 'projects/%s/datasets/%s' % (project, dataset_id) + path = 'projects/%s/datasets/%s' % (self.PROJECT, self.DS_ID) creds = _make_credentials() http = object() - client = self._make_one(project=project, credentials=creds, _http=http) + client = self._make_one(project=self.PROJECT, credentials=creds, + _http=http) resource = { - 'id': '%s:%s' % (project, dataset_id), + 'id': '%s:%s' % (self.PROJECT, self.DS_ID), 'datasetReference': { - 'projectId': project, - 'datasetId': dataset_id, + 'projectId': self.PROJECT, + 'datasetId': self.DS_ID, }, } conn = client._connection = _Connection(resource) - dataset_ref = client.dataset(dataset_id) + dataset_ref = client.dataset(self.DS_ID) dataset = client.get_dataset(dataset_ref) @@ -298,45 +295,41 @@ def test_get_dataset(self): req = conn._requested[0] self.assertEqual(req['method'], 'GET') self.assertEqual(req['path'], '/%s' % path) - self.assertEqual(dataset.dataset_id, dataset_id) + self.assertEqual(dataset.dataset_id, self.DS_ID) def test_create_dataset_minimal(self): from google.cloud.bigquery.dataset import Dataset - PROJECT = 'PROJECT' - DS_ID = 'DATASET_ID' - PATH = 'projects/%s/datasets' % PROJECT + PATH = 'projects/%s/datasets' % self.PROJECT RESOURCE = { 'datasetReference': - {'projectId': PROJECT, 'datasetId': DS_ID}, + {'projectId': self.PROJECT, 'datasetId': self.DS_ID}, 'etag': "etag", - 'id': "%s:%s" % (PROJECT, DS_ID), + 'id': "%s:%s" % (self.PROJECT, self.DS_ID), } creds = _make_credentials() - client = self._make_one(project=PROJECT, credentials=creds) + client = self._make_one(project=self.PROJECT, credentials=creds) conn = client._connection = _Connection(RESOURCE) - ds = client.create_dataset(Dataset(client.dataset(DS_ID))) + ds = client.create_dataset(Dataset(client.dataset(self.DS_ID))) self.assertEqual(len(conn._requested), 1) req = conn._requested[0] self.assertEqual(req['method'], 'POST') self.assertEqual(req['path'], '/%s' % PATH) SENT = { 'datasetReference': - {'projectId': PROJECT, 'datasetId': DS_ID}, + {'projectId': self.PROJECT, 'datasetId': self.DS_ID}, 'labels': {}, } self.assertEqual(req['data'], SENT) - self.assertEqual(ds.dataset_id, DS_ID) - self.assertEqual(ds.project, PROJECT) + self.assertEqual(ds.dataset_id, self.DS_ID) + self.assertEqual(ds.project, self.PROJECT) self.assertEqual(ds.etag, RESOURCE['etag']) self.assertEqual(ds.full_dataset_id, RESOURCE['id']) def test_create_dataset_w_attrs(self): from google.cloud.bigquery.dataset import Dataset, AccessEntry - PROJECT = 'PROJECT' - DS_ID = 'DATASET_ID' - PATH = 'projects/%s/datasets' % PROJECT + PATH = 'projects/%s/datasets' % self.PROJECT DESCRIPTION = 'DESC' FRIENDLY_NAME = 'FN' LOCATION = 'US' @@ -349,9 +342,9 @@ def test_create_dataset_w_attrs(self): } RESOURCE = { 'datasetReference': - {'projectId': PROJECT, 'datasetId': DS_ID}, + {'projectId': self.PROJECT, 'datasetId': self.DS_ID}, 'etag': "etag", - 'id': "%s:%s" % (PROJECT, DS_ID), + 'id': "%s:%s" % (self.PROJECT, self.DS_ID), 'description': DESCRIPTION, 'friendlyName': FRIENDLY_NAME, 'location': LOCATION, @@ -362,11 +355,11 @@ def test_create_dataset_w_attrs(self): {'view': VIEW}], } creds = _make_credentials() - client = self._make_one(project=PROJECT, credentials=creds) + client = self._make_one(project=self.PROJECT, credentials=creds) conn = client._connection = _Connection(RESOURCE) entries = [AccessEntry('OWNER', 'userByEmail', USER_EMAIL), AccessEntry(None, 'view', VIEW)] - ds_arg = Dataset(client.dataset(DS_ID)) + ds_arg = Dataset(client.dataset(self.DS_ID)) ds_arg.access_entries = entries ds_arg.description = DESCRIPTION ds_arg.friendly_name = FRIENDLY_NAME @@ -380,7 +373,7 @@ def test_create_dataset_w_attrs(self): self.assertEqual(req['path'], '/%s' % PATH) SENT = { 'datasetReference': - {'projectId': PROJECT, 'datasetId': DS_ID}, + {'projectId': self.PROJECT, 'datasetId': self.DS_ID}, 'description': DESCRIPTION, 'friendlyName': FRIENDLY_NAME, 'location': LOCATION, @@ -391,8 +384,8 @@ def test_create_dataset_w_attrs(self): 'labels': LABELS, } self.assertEqual(req['data'], SENT) - self.assertEqual(ds.dataset_id, DS_ID) - self.assertEqual(ds.project, PROJECT) + self.assertEqual(ds.dataset_id, self.DS_ID) + self.assertEqual(ds.project, self.PROJECT) self.assertEqual(ds.etag, RESOURCE['etag']) self.assertEqual(ds.full_dataset_id, RESOURCE['id']) self.assertEqual(ds.description, DESCRIPTION) @@ -404,24 +397,20 @@ def test_create_dataset_w_attrs(self): def test_create_table_w_day_partition(self): from google.cloud.bigquery.table import Table - project = 'PROJECT' - dataset_id = 'dataset_id' - table_id = 'table-id' path = 'projects/%s/datasets/%s/tables' % ( - project, dataset_id) + self.PROJECT, self.DS_ID) creds = _make_credentials() - client = self._make_one(project=project, credentials=creds) + client = self._make_one(project=self.PROJECT, credentials=creds) resource = { - 'id': '%s:%s:%s' % (project, dataset_id, table_id), + 'id': '%s:%s:%s' % (self.PROJECT, self.DS_ID, self.TABLE_ID), 'tableReference': { - 'projectId': project, - 'datasetId': dataset_id, - 'tableId': table_id + 'projectId': self.PROJECT, + 'datasetId': self.DS_ID, + 'tableId': self.TABLE_ID }, } conn = client._connection = _Connection(resource) - table_ref = client.dataset(dataset_id).table(table_id) - table = Table(table_ref) + table = Table(self.TABLE_REF) table.partitioning_type = 'DAY' got = client.create_table(table) @@ -432,37 +421,33 @@ def test_create_table_w_day_partition(self): self.assertEqual(req['path'], '/%s' % path) sent = { 'tableReference': { - 'projectId': project, - 'datasetId': dataset_id, - 'tableId': table_id + 'projectId': self.PROJECT, + 'datasetId': self.DS_ID, + 'tableId': self.TABLE_ID }, 'timePartitioning': {'type': 'DAY'}, } self.assertEqual(req['data'], sent) self.assertEqual(table.partitioning_type, "DAY") - self.assertEqual(got.table_id, table_id) + self.assertEqual(got.table_id, self.TABLE_ID) def test_create_table_w_day_partition_and_expire(self): from google.cloud.bigquery.table import Table - project = 'PROJECT' - dataset_id = 'dataset_id' - table_id = 'table-id' path = 'projects/%s/datasets/%s/tables' % ( - project, dataset_id) + self.PROJECT, self.DS_ID) creds = _make_credentials() - client = self._make_one(project=project, credentials=creds) + client = self._make_one(project=self.PROJECT, credentials=creds) resource = { - 'id': '%s:%s:%s' % (project, dataset_id, table_id), + 'id': '%s:%s:%s' % (self.PROJECT, self.DS_ID, self.TABLE_ID), 'tableReference': { - 'projectId': project, - 'datasetId': dataset_id, - 'tableId': table_id + 'projectId': self.PROJECT, + 'datasetId': self.DS_ID, + 'tableId': self.TABLE_ID }, } conn = client._connection = _Connection(resource) - table_ref = client.dataset(dataset_id).table(table_id) - table = Table(table_ref) + table = Table(self.TABLE_REF) table.partitioning_type = 'DAY' table.partition_expiration = 100 @@ -474,34 +459,31 @@ def test_create_table_w_day_partition_and_expire(self): self.assertEqual(req['path'], '/%s' % path) sent = { 'tableReference': { - 'projectId': project, - 'datasetId': dataset_id, - 'tableId': table_id + 'projectId': self.PROJECT, + 'datasetId': self.DS_ID, + 'tableId': self.TABLE_ID }, 'timePartitioning': {'type': 'DAY', 'expirationMs': 100}, } self.assertEqual(req['data'], sent) self.assertEqual(table.partitioning_type, "DAY") self.assertEqual(table.partition_expiration, 100) - self.assertEqual(got.table_id, table_id) + self.assertEqual(got.table_id, self.TABLE_ID) def test_create_table_w_schema_and_query(self): from google.cloud.bigquery.table import Table, SchemaField - project = 'PROJECT' - dataset_id = 'dataset_id' - table_id = 'table-id' path = 'projects/%s/datasets/%s/tables' % ( - project, dataset_id) - query = 'SELECT * from %s:%s' % (dataset_id, table_id) + self.PROJECT, self.DS_ID) + query = 'SELECT * from %s:%s' % (self.DS_ID, self.TABLE_ID) creds = _make_credentials() - client = self._make_one(project=project, credentials=creds) + client = self._make_one(project=self.PROJECT, credentials=creds) resource = { - 'id': '%s:%s:%s' % (project, dataset_id, table_id), + 'id': '%s:%s:%s' % (self.PROJECT, self.DS_ID, self.TABLE_ID), 'tableReference': { - 'projectId': project, - 'datasetId': dataset_id, - 'tableId': table_id + 'projectId': self.PROJECT, + 'datasetId': self.DS_ID, + 'tableId': self.TABLE_ID }, 'schema': {'fields': [ {'name': 'full_name', 'type': 'STRING', 'mode': 'REQUIRED'}, @@ -514,8 +496,7 @@ def test_create_table_w_schema_and_query(self): SchemaField('age', 'INTEGER', mode='REQUIRED') ] conn = client._connection = _Connection(resource) - table_ref = client.dataset(dataset_id).table(table_id) - table = Table(table_ref, schema=schema) + table = Table(self.TABLE_REF, schema=schema) table.view_query = query got = client.create_table(table) @@ -526,9 +507,9 @@ def test_create_table_w_schema_and_query(self): self.assertEqual(req['path'], '/%s' % path) sent = { 'tableReference': { - 'projectId': project, - 'datasetId': dataset_id, - 'tableId': table_id + 'projectId': self.PROJECT, + 'datasetId': self.DS_ID, + 'tableId': self.TABLE_ID, }, 'schema': {'fields': [ {'name': 'full_name', 'type': 'STRING', 'mode': 'REQUIRED'}, @@ -538,56 +519,48 @@ def test_create_table_w_schema_and_query(self): 'view': {'query': query, 'useLegacySql': None}, } self.assertEqual(req['data'], sent) - self.assertEqual(got.table_id, table_id) - self.assertEqual(got.project, project) - self.assertEqual(got.dataset_id, dataset_id) + self.assertEqual(got.table_id, self.TABLE_ID) + self.assertEqual(got.project, self.PROJECT) + self.assertEqual(got.dataset_id, self.DS_ID) self.assertEqual(got.schema, schema) self.assertEqual(got.view_query, query) def test_get_table(self): - project = 'PROJECT' - dataset_id = 'dataset_id' - table_id = 'table-id' path = 'projects/%s/datasets/%s/tables/%s' % ( - project, dataset_id, table_id) + self.PROJECT, self.DS_ID, self.TABLE_ID) creds = _make_credentials() http = object() - client = self._make_one(project=project, credentials=creds, _http=http) + client = self._make_one(project=self.PROJECT, credentials=creds, + _http=http) resource = { - 'id': '%s:%s:%s' % (project, dataset_id, table_id), + 'id': '%s:%s:%s' % (self.PROJECT, self.DS_ID, self.TABLE_ID), 'tableReference': { - 'projectId': project, - 'datasetId': dataset_id, - 'tableId': table_id + 'projectId': self.PROJECT, + 'datasetId': self.DS_ID, + 'tableId': self.TABLE_ID, }, } conn = client._connection = _Connection(resource) - table_ref = client.dataset(dataset_id).table(table_id) - - table = client.get_table(table_ref) + table = client.get_table(self.TABLE_REF) self.assertEqual(len(conn._requested), 1) req = conn._requested[0] self.assertEqual(req['method'], 'GET') self.assertEqual(req['path'], '/%s' % path) - self.assertEqual(table.table_id, table_id) + self.assertEqual(table.table_id, self.TABLE_ID) def test_update_dataset_w_invalid_field(self): from google.cloud.bigquery.dataset import Dataset - PROJECT = 'PROJECT' - DS_ID = 'DATASET_ID' creds = _make_credentials() - client = self._make_one(project=PROJECT, credentials=creds) + client = self._make_one(project=self.PROJECT, credentials=creds) with self.assertRaises(ValueError): - client.update_dataset(Dataset(client.dataset(DS_ID)), ["foo"]) + client.update_dataset(Dataset(client.dataset(self.DS_ID)), ["foo"]) def test_update_dataset(self): from google.cloud.bigquery.dataset import Dataset - PROJECT = 'PROJECT' - DS_ID = 'DATASET_ID' - PATH = 'projects/%s/datasets/%s' % (PROJECT, DS_ID) + PATH = 'projects/%s/datasets/%s' % (self.PROJECT, self.DS_ID) DESCRIPTION = 'DESCRIPTION' FRIENDLY_NAME = 'TITLE' LOCATION = 'loc' @@ -595,7 +568,7 @@ def test_update_dataset(self): EXP = 17 RESOURCE = { 'datasetReference': - {'projectId': PROJECT, 'datasetId': DS_ID}, + {'projectId': self.PROJECT, 'datasetId': self.DS_ID}, 'etag': "etag", 'description': DESCRIPTION, 'friendlyName': FRIENDLY_NAME, @@ -604,9 +577,9 @@ def test_update_dataset(self): 'labels': LABELS, } creds = _make_credentials() - client = self._make_one(project=PROJECT, credentials=creds) + client = self._make_one(project=self.PROJECT, credentials=creds) conn = client._connection = _Connection(RESOURCE, RESOURCE) - ds = Dataset(client.dataset(DS_ID)) + ds = Dataset(client.dataset(self.DS_ID)) ds.description = DESCRIPTION ds.friendly_name = FRIENDLY_NAME ds.location = LOCATION @@ -640,19 +613,16 @@ def test_update_dataset(self): def test_update_table(self): from google.cloud.bigquery.table import Table, SchemaField - project = 'PROJECT' - dataset_id = 'DATASET_ID' - table_id = 'table_id' path = 'projects/%s/datasets/%s/tables/%s' % ( - project, dataset_id, table_id) + self.PROJECT, self.DS_ID, self.TABLE_ID) description = 'description' title = 'title' resource = { - 'id': '%s:%s:%s' % (project, dataset_id, table_id), + 'id': '%s:%s:%s' % (self.PROJECT, self.DS_ID, self.TABLE_ID), 'tableReference': { - 'projectId': project, - 'datasetId': dataset_id, - 'tableId': table_id + 'projectId': self.PROJECT, + 'datasetId': self.DS_ID, + 'tableId': self.TABLE_ID }, 'schema': {'fields': [ {'name': 'full_name', 'type': 'STRING', 'mode': 'REQUIRED'}, @@ -667,10 +637,9 @@ def test_update_table(self): SchemaField('age', 'INTEGER', mode='REQUIRED') ] creds = _make_credentials() - client = self._make_one(project=project, credentials=creds) + client = self._make_one(project=self.PROJECT, credentials=creds) conn = client._connection = _Connection(resource, resource) - table_ref = client.dataset(dataset_id).table(table_id) - table = Table(table_ref, schema=schema) + table = Table(self.TABLE_REF, schema=schema) table.description = description table.friendly_name = title @@ -679,9 +648,9 @@ def test_update_table(self): sent = { 'tableReference': { - 'projectId': project, - 'datasetId': dataset_id, - 'tableId': table_id + 'projectId': self.PROJECT, + 'datasetId': self.DS_ID, + 'tableId': self.TABLE_ID }, 'schema': {'fields': [ {'name': 'full_name', 'type': 'STRING', 'mode': 'REQUIRED'}, @@ -708,25 +677,21 @@ def test_update_table(self): def test_update_table_only_use_legacy_sql(self): from google.cloud.bigquery.table import Table - project = 'PROJECT' - dataset_id = 'DATASET_ID' - table_id = 'table_id' path = 'projects/%s/datasets/%s/tables/%s' % ( - project, dataset_id, table_id) + self.PROJECT, self.DS_ID, self.TABLE_ID) resource = { - 'id': '%s:%s:%s' % (project, dataset_id, table_id), + 'id': '%s:%s:%s' % (self.PROJECT, self.DS_ID, self.TABLE_ID), 'tableReference': { - 'projectId': project, - 'datasetId': dataset_id, - 'tableId': table_id + 'projectId': self.PROJECT, + 'datasetId': self.DS_ID, + 'tableId': self.TABLE_ID }, 'view': {'useLegacySql': True} } creds = _make_credentials() - client = self._make_one(project=project, credentials=creds) + client = self._make_one(project=self.PROJECT, credentials=creds) conn = client._connection = _Connection(resource) - table_ref = client.dataset(dataset_id).table(table_id) - table = Table(table_ref) + table = Table(self.TABLE_REF) table.view_use_legacy_sql = True updated_table = client.update_table(table, ['view_use_legacy_sql']) @@ -737,9 +702,9 @@ def test_update_table_only_use_legacy_sql(self): self.assertEqual(req['path'], '/%s' % path) sent = { 'tableReference': { - 'projectId': project, - 'datasetId': dataset_id, - 'tableId': table_id + 'projectId': self.PROJECT, + 'datasetId': self.DS_ID, + 'tableId': self.TABLE_ID }, 'view': {'useLegacySql': True} } @@ -753,11 +718,8 @@ def test_update_table_w_query(self): from google.cloud._helpers import _millis from google.cloud.bigquery.table import Table, SchemaField - project = 'PROJECT' - dataset_id = 'DATASET_ID' - table_id = 'table_id' path = 'projects/%s/datasets/%s/tables/%s' % ( - project, dataset_id, table_id) + self.PROJECT, self.DS_ID, self.TABLE_ID) query = 'select fullname, age from person_ages' location = 'EU' exp_time = datetime.datetime(2015, 8, 1, 23, 59, 59, tzinfo=UTC) @@ -769,11 +731,11 @@ def test_update_table_w_query(self): SchemaField('age', 'INTEGER', mode='REQUIRED') ] resource = { - 'id': '%s:%s:%s' % (project, dataset_id, table_id), + 'id': '%s:%s:%s' % (self.PROJECT, self.DS_ID, self.TABLE_ID), 'tableReference': { - 'projectId': project, - 'datasetId': dataset_id, - 'tableId': table_id + 'projectId': self.PROJECT, + 'datasetId': self.DS_ID, + 'tableId': self.TABLE_ID }, 'schema': schema_resource, 'view': {'query': query, 'useLegacySql': True}, @@ -781,10 +743,9 @@ def test_update_table_w_query(self): 'expirationTime': _millis(exp_time) } creds = _make_credentials() - client = self._make_one(project=project, credentials=creds) + client = self._make_one(project=self.PROJECT, credentials=creds) conn = client._connection = _Connection(resource) - table_ref = client.dataset(dataset_id).table(table_id) - table = Table(table_ref, schema=schema) + table = Table(self.TABLE_REF, schema=schema) table.location = location table.expires = exp_time table.view_query = query @@ -800,9 +761,9 @@ def test_update_table_w_query(self): self.assertEqual(req['path'], '/%s' % path) sent = { 'tableReference': { - 'projectId': project, - 'datasetId': dataset_id, - 'tableId': table_id + 'projectId': self.PROJECT, + 'datasetId': self.DS_ID, + 'tableId': self.TABLE_ID }, 'view': {'query': query, 'useLegacySql': True}, 'location': location, @@ -820,34 +781,30 @@ def test_update_table_w_query(self): def test_update_table_w_schema_None(self): # Simulate deleting schema: not sure if back-end will actually # allow this operation, but the spec says it is optional. - project = 'PROJECT' - dataset_id = 'DATASET_ID' - table_id = 'table_id' path = 'projects/%s/datasets/%s/tables/%s' % ( - project, dataset_id, table_id) + self.PROJECT, self.DS_ID, self.TABLE_ID) resource1 = { - 'id': '%s:%s:%s' % (project, dataset_id, table_id), + 'id': '%s:%s:%s' % (self.PROJECT, self.DS_ID, self.TABLE_ID), 'tableReference': { - 'projectId': project, - 'datasetId': dataset_id, - 'tableId': table_id}, + 'projectId': self.PROJECT, + 'datasetId': self.DS_ID, + 'tableId': self.TABLE_ID}, 'schema': {'fields': [ {'name': 'full_name', 'type': 'STRING', 'mode': 'REQUIRED'}, {'name': 'age', 'type': 'INTEGER', 'mode': 'REQUIRED'}]} } resource2 = { - 'id': '%s:%s:%s' % (project, dataset_id, table_id), + 'id': '%s:%s:%s' % (self.PROJECT, self.DS_ID, self.TABLE_ID), 'tableReference': { - 'projectId': project, - 'datasetId': dataset_id, - 'tableId': table_id}, + 'projectId': self.PROJECT, + 'datasetId': self.DS_ID, + 'tableId': self.TABLE_ID}, 'schema': {'fields': []}, } creds = _make_credentials() - client = self._make_one(project=project, credentials=creds) + client = self._make_one(project=self.PROJECT, credentials=creds) conn = client._connection = _Connection(resource1, resource2) - table_ref = client.dataset(dataset_id).table(table_id) - table = client.get_table(table_ref) + table = client.get_table(self.TABLE_REF) table.schema = None updated_table = client.update_table(table, ['schema']) @@ -857,9 +814,9 @@ def test_update_table_w_schema_None(self): self.assertEqual(req['method'], 'PATCH') sent = { 'tableReference': { - 'projectId': project, - 'datasetId': dataset_id, - 'tableId': table_id + 'projectId': self.PROJECT, + 'datasetId': self.DS_ID, + 'tableId': self.TABLE_ID }, 'schema': None } @@ -870,37 +827,33 @@ def test_update_table_w_schema_None(self): def test_update_table_delete_property(self): from google.cloud.bigquery.table import Table - project = 'PROJECT' - dataset_id = 'DATASET_ID' - table_id = 'table_id' description = 'description' title = 'title' path = 'projects/%s/datasets/%s/tables/%s' % ( - project, dataset_id, table_id) + self.PROJECT, self.DS_ID, self.TABLE_ID) resource1 = { - 'id': '%s:%s:%s' % (project, dataset_id, table_id), + 'id': '%s:%s:%s' % (self.PROJECT, self.DS_ID, self.TABLE_ID), 'tableReference': { - 'projectId': project, - 'datasetId': dataset_id, - 'tableId': table_id + 'projectId': self.PROJECT, + 'datasetId': self.DS_ID, + 'tableId': self.TABLE_ID }, 'description': description, 'friendlyName': title, } resource2 = { - 'id': '%s:%s:%s' % (project, dataset_id, table_id), + 'id': '%s:%s:%s' % (self.PROJECT, self.DS_ID, self.TABLE_ID), 'tableReference': { - 'projectId': project, - 'datasetId': dataset_id, - 'tableId': table_id + 'projectId': self.PROJECT, + 'datasetId': self.DS_ID, + 'tableId': self.TABLE_ID }, 'description': None, } creds = _make_credentials() - client = self._make_one(project=project, credentials=creds) + client = self._make_one(project=self.PROJECT, credentials=creds) conn = client._connection = _Connection(resource1, resource2) - table_ref = client.dataset(dataset_id).table(table_id) - table = Table(table_ref) + table = Table(self.TABLE_REF) table.description = description table.friendly_name = title table2 = client.update_table(table, ['description', 'friendly_name']) @@ -914,9 +867,9 @@ def test_update_table_delete_property(self): self.assertEqual(req['path'], '/%s' % path) sent = { 'tableReference': { - 'projectId': project, - 'datasetId': dataset_id, - 'tableId': table_id + 'projectId': self.PROJECT, + 'datasetId': self.DS_ID, + 'tableId': self.TABLE_ID }, 'description': None, } @@ -924,13 +877,11 @@ def test_update_table_delete_property(self): self.assertIsNone(table3.description) def test_list_dataset_tables_empty(self): - PROJECT = 'PROJECT' - DS_ID = 'DATASET_ID' creds = _make_credentials() - client = self._make_one(project=PROJECT, credentials=creds) + client = self._make_one(project=self.PROJECT, credentials=creds) conn = client._connection = _Connection({}) - dataset = client.dataset(DS_ID) + dataset = client.dataset(self.DS_ID) iterator = client.list_dataset_tables(dataset) self.assertIs(iterator.dataset, dataset) page = six.next(iterator.pages) @@ -942,40 +893,38 @@ def test_list_dataset_tables_empty(self): self.assertEqual(len(conn._requested), 1) req = conn._requested[0] self.assertEqual(req['method'], 'GET') - PATH = 'projects/%s/datasets/%s/tables' % (PROJECT, DS_ID) + PATH = 'projects/%s/datasets/%s/tables' % (self.PROJECT, self.DS_ID) self.assertEqual(req['path'], '/%s' % PATH) def test_list_dataset_tables_defaults(self): from google.cloud.bigquery.table import Table - PROJECT = 'PROJECT' - DS_ID = 'DATASET_ID' TABLE_1 = 'table_one' TABLE_2 = 'table_two' - PATH = 'projects/%s/datasets/%s/tables' % (PROJECT, DS_ID) + PATH = 'projects/%s/datasets/%s/tables' % (self.PROJECT, self.DS_ID) TOKEN = 'TOKEN' DATA = { 'nextPageToken': TOKEN, 'tables': [ {'kind': 'bigquery#table', - 'id': '%s:%s.%s' % (PROJECT, DS_ID, TABLE_1), + 'id': '%s:%s.%s' % (self.PROJECT, self.DS_ID, TABLE_1), 'tableReference': {'tableId': TABLE_1, - 'datasetId': DS_ID, - 'projectId': PROJECT}, + 'datasetId': self.DS_ID, + 'projectId': self.PROJECT}, 'type': 'TABLE'}, {'kind': 'bigquery#table', - 'id': '%s:%s.%s' % (PROJECT, DS_ID, TABLE_2), + 'id': '%s:%s.%s' % (self.PROJECT, self.DS_ID, TABLE_2), 'tableReference': {'tableId': TABLE_2, - 'datasetId': DS_ID, - 'projectId': PROJECT}, + 'datasetId': self.DS_ID, + 'projectId': self.PROJECT}, 'type': 'TABLE'}, ] } creds = _make_credentials() - client = self._make_one(project=PROJECT, credentials=creds) + client = self._make_one(project=self.PROJECT, credentials=creds) conn = client._connection = _Connection(DATA) - dataset = client.dataset(DS_ID) + dataset = client.dataset(self.DS_ID) iterator = client.list_dataset_tables(dataset) self.assertIs(iterator.dataset, dataset) @@ -998,33 +947,31 @@ def test_list_dataset_tables_defaults(self): def test_list_dataset_tables_explicit(self): from google.cloud.bigquery.table import Table - PROJECT = 'PROJECT' - DS_ID = 'DATASET_ID' TABLE_1 = 'table_one' TABLE_2 = 'table_two' - PATH = 'projects/%s/datasets/%s/tables' % (PROJECT, DS_ID) + PATH = 'projects/%s/datasets/%s/tables' % (self.PROJECT, self.DS_ID) TOKEN = 'TOKEN' DATA = { 'tables': [ {'kind': 'bigquery#dataset', - 'id': '%s:%s.%s' % (PROJECT, DS_ID, TABLE_1), + 'id': '%s:%s.%s' % (self.PROJECT, self.DS_ID, TABLE_1), 'tableReference': {'tableId': TABLE_1, - 'datasetId': DS_ID, - 'projectId': PROJECT}, + 'datasetId': self.DS_ID, + 'projectId': self.PROJECT}, 'type': 'TABLE'}, {'kind': 'bigquery#dataset', - 'id': '%s:%s.%s' % (PROJECT, DS_ID, TABLE_2), + 'id': '%s:%s.%s' % (self.PROJECT, self.DS_ID, TABLE_2), 'tableReference': {'tableId': TABLE_2, - 'datasetId': DS_ID, - 'projectId': PROJECT}, + 'datasetId': self.DS_ID, + 'projectId': self.PROJECT}, 'type': 'TABLE'}, ] } creds = _make_credentials() - client = self._make_one(project=PROJECT, credentials=creds) + client = self._make_one(project=self.PROJECT, credentials=creds) conn = client._connection = _Connection(DATA) - dataset = client.dataset(DS_ID) + dataset = client.dataset(self.DS_ID) iterator = client.list_dataset_tables( dataset, max_results=3, page_token=TOKEN) @@ -1048,23 +995,19 @@ def test_list_dataset_tables_explicit(self): {'maxResults': 3, 'pageToken': TOKEN}) def test_list_dataset_tables_wrong_type(self): - PROJECT = 'PROJECT' - DS_ID = 'DATASET_ID' creds = _make_credentials() - client = self._make_one(project=PROJECT, credentials=creds) + client = self._make_one(project=self.PROJECT, credentials=creds) with self.assertRaises(TypeError): - client.list_dataset_tables(client.dataset(DS_ID).table("foo")) + client.list_dataset_tables(client.dataset(self.DS_ID).table("foo")) def test_delete_dataset(self): from google.cloud.bigquery.dataset import Dataset - PROJECT = 'PROJECT' - DS_ID = 'DATASET_ID' - PATH = 'projects/%s/datasets/%s' % (PROJECT, DS_ID) + PATH = 'projects/%s/datasets/%s' % (self.PROJECT, self.DS_ID) creds = _make_credentials() - client = self._make_one(project=PROJECT, credentials=creds) + client = self._make_one(project=self.PROJECT, credentials=creds) conn = client._connection = _Connection({}, {}) - ds_ref = client.dataset(DS_ID) + ds_ref = client.dataset(self.DS_ID) for arg in (ds_ref, Dataset(ds_ref)): client.delete_dataset(arg) req = conn._requested[0] @@ -1072,56 +1015,47 @@ def test_delete_dataset(self): self.assertEqual(req['path'], '/%s' % PATH) def test_delete_dataset_wrong_type(self): - PROJECT = 'PROJECT' - DS_ID = 'DATASET_ID' creds = _make_credentials() - client = self._make_one(project=PROJECT, credentials=creds) + client = self._make_one(project=self.PROJECT, credentials=creds) with self.assertRaises(TypeError): - client.delete_dataset(client.dataset(DS_ID).table("foo")) + client.delete_dataset(client.dataset(self.DS_ID).table("foo")) def test_delete_table(self): from google.cloud.bigquery.table import Table - project = 'PROJECT' - dataset_id = 'dataset_id' - table_id = 'table-id' path = 'projects/%s/datasets/%s/tables/%s' % ( - project, dataset_id, table_id) + self.PROJECT, self.DS_ID, self.TABLE_ID) creds = _make_credentials() http = object() - client = self._make_one(project=project, credentials=creds, _http=http) + client = self._make_one(project=self.PROJECT, credentials=creds, + _http=http) conn = client._connection = _Connection({}, {}) - table_ref = client.dataset(dataset_id).table(table_id) - for arg in (table_ref, Table(table_ref)): + for arg in (self.TABLE_REF, Table(self.TABLE_REF)): client.delete_table(arg) req = conn._requested[0] self.assertEqual(req['method'], 'DELETE') self.assertEqual(req['path'], '/%s' % path) def test_delete_table_w_wrong_type(self): - project = 'PROJECT' - dataset_id = 'DATASET_ID' creds = _make_credentials() - client = self._make_one(project=project, credentials=creds) + client = self._make_one(project=self.PROJECT, credentials=creds) with self.assertRaises(TypeError): - client.delete_table(client.dataset(dataset_id)) + client.delete_table(client.dataset(self.DS_ID)) def test_job_from_resource_unknown_type(self): - PROJECT = 'PROJECT' creds = _make_credentials() - client = self._make_one(PROJECT, creds) + client = self._make_one(self.PROJECT, creds) with self.assertRaises(ValueError): client.job_from_resource({'configuration': {'nonesuch': {}}}) def test_get_job_miss_w_explict_project(self): from google.cloud.exceptions import NotFound - PROJECT = 'PROJECT' OTHER_PROJECT = 'OTHER_PROJECT' JOB_ID = 'NONESUCH' creds = _make_credentials() - client = self._make_one(PROJECT, creds) + client = self._make_one(self.PROJECT, creds) conn = client._connection = _Connection() with self.assertRaises(NotFound): @@ -1136,15 +1070,13 @@ def test_get_job_miss_w_explict_project(self): def test_get_job_hit(self): from google.cloud.bigquery.job import QueryJob - PROJECT = 'PROJECT' JOB_ID = 'query_job' - DATASET = 'test_dataset' QUERY_DESTINATION_TABLE = 'query_destination_table' QUERY = 'SELECT * from test_dataset:test_table' ASYNC_QUERY_DATA = { - 'id': '{}:{}'.format(PROJECT, JOB_ID), + 'id': '{}:{}'.format(self.PROJECT, JOB_ID), 'jobReference': { - 'projectId': PROJECT, + 'projectId': self.PROJECT, 'jobId': 'query_job', }, 'state': 'DONE', @@ -1152,8 +1084,8 @@ def test_get_job_hit(self): 'query': { 'query': QUERY, 'destinationTable': { - 'projectId': PROJECT, - 'datasetId': DATASET, + 'projectId': self.PROJECT, + 'datasetId': self.DS_ID, 'tableId': QUERY_DESTINATION_TABLE, }, 'createDisposition': 'CREATE_IF_NEEDED', @@ -1162,7 +1094,7 @@ def test_get_job_hit(self): }, } creds = _make_credentials() - client = self._make_one(PROJECT, creds) + client = self._make_one(self.PROJECT, creds) conn = client._connection = _Connection(ASYNC_QUERY_DATA) job = client.get_job(JOB_ID) @@ -1184,8 +1116,6 @@ def test_list_jobs_defaults(self): from google.cloud.bigquery.job import ExtractJob from google.cloud.bigquery.job import QueryJob - PROJECT = 'PROJECT' - DATASET = 'test_dataset' SOURCE_TABLE = 'source_table' DESTINATION_TABLE = 'destination_table' QUERY_DESTINATION_TABLE = 'query_destination_table' @@ -1197,13 +1127,13 @@ def test_list_jobs_defaults(self): 'extract_job': ExtractJob, 'query_job': QueryJob, } - PATH = 'projects/%s/jobs' % PROJECT + PATH = 'projects/%s/jobs' % self.PROJECT TOKEN = 'TOKEN' QUERY = 'SELECT * from test_dataset:test_table' ASYNC_QUERY_DATA = { - 'id': '%s:%s' % (PROJECT, 'query_job'), + 'id': '%s:%s' % (self.PROJECT, 'query_job'), 'jobReference': { - 'projectId': PROJECT, + 'projectId': self.PROJECT, 'jobId': 'query_job', }, 'state': 'DONE', @@ -1211,8 +1141,8 @@ def test_list_jobs_defaults(self): 'query': { 'query': QUERY, 'destinationTable': { - 'projectId': PROJECT, - 'datasetId': DATASET, + 'projectId': self.PROJECT, + 'datasetId': self.DS_ID, 'tableId': QUERY_DESTINATION_TABLE, }, 'createDisposition': 'CREATE_IF_NEEDED', @@ -1221,17 +1151,17 @@ def test_list_jobs_defaults(self): }, } EXTRACT_DATA = { - 'id': '%s:%s' % (PROJECT, 'extract_job'), + 'id': '%s:%s' % (self.PROJECT, 'extract_job'), 'jobReference': { - 'projectId': PROJECT, + 'projectId': self.PROJECT, 'jobId': 'extract_job', }, 'state': 'DONE', 'configuration': { 'extract': { 'sourceTable': { - 'projectId': PROJECT, - 'datasetId': DATASET, + 'projectId': self.PROJECT, + 'datasetId': self.DS_ID, 'tableId': SOURCE_TABLE, }, 'destinationUris': [DESTINATION_URI], @@ -1239,39 +1169,39 @@ def test_list_jobs_defaults(self): }, } COPY_DATA = { - 'id': '%s:%s' % (PROJECT, 'copy_job'), + 'id': '%s:%s' % (self.PROJECT, 'copy_job'), 'jobReference': { - 'projectId': PROJECT, + 'projectId': self.PROJECT, 'jobId': 'copy_job', }, 'state': 'DONE', 'configuration': { 'copy': { 'sourceTables': [{ - 'projectId': PROJECT, - 'datasetId': DATASET, + 'projectId': self.PROJECT, + 'datasetId': self.DS_ID, 'tableId': SOURCE_TABLE, }], 'destinationTable': { - 'projectId': PROJECT, - 'datasetId': DATASET, + 'projectId': self.PROJECT, + 'datasetId': self.DS_ID, 'tableId': DESTINATION_TABLE, }, } }, } LOAD_DATA = { - 'id': '%s:%s' % (PROJECT, 'load_job'), + 'id': '%s:%s' % (self.PROJECT, 'load_job'), 'jobReference': { - 'projectId': PROJECT, + 'projectId': self.PROJECT, 'jobId': 'load_job', }, 'state': 'DONE', 'configuration': { 'load': { 'destinationTable': { - 'projectId': PROJECT, - 'datasetId': DATASET, + 'projectId': self.PROJECT, + 'datasetId': self.DS_ID, 'tableId': SOURCE_TABLE, }, 'sourceUris': [SOURCE_URI], @@ -1288,7 +1218,7 @@ def test_list_jobs_defaults(self): ] } creds = _make_credentials() - client = self._make_one(PROJECT, creds) + client = self._make_one(self.PROJECT, creds) conn = client._connection = _Connection(DATA) iterator = client.list_jobs() @@ -1312,26 +1242,24 @@ def test_list_jobs_defaults(self): def test_list_jobs_load_job_wo_sourceUris(self): from google.cloud.bigquery.job import LoadJob - PROJECT = 'PROJECT' - DATASET = 'test_dataset' SOURCE_TABLE = 'source_table' JOB_TYPES = { 'load_job': LoadJob, } - PATH = 'projects/%s/jobs' % PROJECT + PATH = 'projects/%s/jobs' % self.PROJECT TOKEN = 'TOKEN' LOAD_DATA = { - 'id': '%s:%s' % (PROJECT, 'load_job'), + 'id': '%s:%s' % (self.PROJECT, 'load_job'), 'jobReference': { - 'projectId': PROJECT, + 'projectId': self.PROJECT, 'jobId': 'load_job', }, 'state': 'DONE', 'configuration': { 'load': { 'destinationTable': { - 'projectId': PROJECT, - 'datasetId': DATASET, + 'projectId': self.PROJECT, + 'datasetId': self.DS_ID, 'tableId': SOURCE_TABLE, }, } @@ -1344,7 +1272,7 @@ def test_list_jobs_load_job_wo_sourceUris(self): ] } creds = _make_credentials() - client = self._make_one(PROJECT, creds) + client = self._make_one(self.PROJECT, creds) conn = client._connection = _Connection(DATA) iterator = client.list_jobs() @@ -1366,12 +1294,11 @@ def test_list_jobs_load_job_wo_sourceUris(self): self.assertEqual(req['query_params'], {'projection': 'full'}) def test_list_jobs_explicit_missing(self): - PROJECT = 'PROJECT' - PATH = 'projects/%s/jobs' % PROJECT + PATH = 'projects/%s/jobs' % self.PROJECT DATA = {} TOKEN = 'TOKEN' creds = _make_credentials() - client = self._make_one(PROJECT, creds) + client = self._make_one(self.PROJECT, creds) conn = client._connection = _Connection(DATA) iterator = client.list_jobs(max_results=1000, page_token=TOKEN, @@ -1397,22 +1324,20 @@ def test_list_jobs_explicit_missing(self): def test_load_table_from_storage(self): from google.cloud.bigquery.job import LoadJob - PROJECT = 'PROJECT' JOB = 'job_name' - DATASET = 'dataset_name' DESTINATION = 'destination_table' SOURCE_URI = 'http://example.com/source.csv' RESOURCE = { 'jobReference': { - 'projectId': PROJECT, + 'projectId': self.PROJECT, 'jobId': JOB, }, 'configuration': { 'load': { 'sourceUris': [SOURCE_URI], 'destinationTable': { - 'projectId': PROJECT, - 'datasetId': DATASET, + 'projectId': self.PROJECT, + 'datasetId': self.DS_ID, 'tableId': DESTINATION, }, }, @@ -1420,9 +1345,10 @@ def test_load_table_from_storage(self): } creds = _make_credentials() http = object() - client = self._make_one(project=PROJECT, credentials=creds, _http=http) + client = self._make_one(project=self.PROJECT, credentials=creds, + _http=http) conn = client._connection = _Connection(RESOURCE) - destination = client.dataset(DATASET).table(DESTINATION) + destination = client.dataset(self.DS_ID).table(DESTINATION) job = client.load_table_from_storage(SOURCE_URI, destination, job_id=JOB) @@ -1431,7 +1357,7 @@ def test_load_table_from_storage(self): self.assertEqual(len(conn._requested), 1) req = conn._requested[0] self.assertEqual(req['method'], 'POST') - self.assertEqual(req['path'], '/projects/%s/jobs' % PROJECT) + self.assertEqual(req['path'], '/projects/%s/jobs' % self.PROJECT) self.assertIsInstance(job, LoadJob) self.assertIs(job._client, client) @@ -1469,25 +1395,20 @@ def _initiate_resumable_upload_helper(self, num_retries=None): from google.cloud.bigquery.client import _get_upload_headers from google.cloud.bigquery.job import LoadJob, LoadJobConfig - PROJECT = 'PROJECT' - DS_ID = 'DATASET_ID' - TABLE_ID = 'TABLE_ID' - # Create mocks to be checked for doing transport. resumable_url = 'http://test.invalid?upload_id=hey-you' response_headers = {'location': resumable_url} fake_transport = self._mock_transport( http_client.OK, response_headers) - client = self._make_one(project=PROJECT, _http=fake_transport) + client = self._make_one(project=self.PROJECT, _http=fake_transport) conn = client._connection = _Connection() - table_ref = DatasetReference(PROJECT, DS_ID).table(TABLE_ID) # Create some mock arguments and call the method under test. data = b'goodbye gudbi gootbee' stream = io.BytesIO(data) config = LoadJobConfig() config.source_format = 'CSV' - job = LoadJob(None, None, table_ref, client, job_config=config) + job = LoadJob(None, None, self.TABLE_REF, client, job_config=config) metadata = job._build_resource() upload, transport = client._initiate_resumable_upload( stream, metadata, num_retries) @@ -1496,7 +1417,7 @@ def _initiate_resumable_upload_helper(self, num_retries=None): self.assertIsInstance(upload, ResumableUpload) upload_url = ( 'https://www.googleapis.com/upload/bigquery/v2/projects/' + - PROJECT + + self.PROJECT + '/jobs?uploadType=resumable') self.assertEqual(upload.upload_url, upload_url) expected_headers = _get_upload_headers(conn.USER_AGENT) @@ -1541,21 +1462,16 @@ def _do_multipart_upload_success_helper( from google.cloud.bigquery.client import _get_upload_headers from google.cloud.bigquery.job import LoadJob, LoadJobConfig - PROJECT = 'PROJECT' - DS_ID = 'DATASET_ID' - TABLE_ID = 'TABLE_ID' - fake_transport = self._mock_transport(http_client.OK, {}) - client = self._make_one(project=PROJECT, _http=fake_transport) + client = self._make_one(project=self.PROJECT, _http=fake_transport) conn = client._connection = _Connection() - table_ref = DatasetReference(PROJECT, DS_ID).table(TABLE_ID) # Create some mock arguments. data = b'Bzzzz-zap \x00\x01\xf4' stream = io.BytesIO(data) config = LoadJobConfig() config.source_format = 'CSV' - job = LoadJob(None, None, table_ref, client, job_config=config) + job = LoadJob(None, None, self.TABLE_REF, client, job_config=config) metadata = job._build_resource() size = len(data) response = client._do_multipart_upload( @@ -1568,7 +1484,7 @@ def _do_multipart_upload_success_helper( upload_url = ( 'https://www.googleapis.com/upload/bigquery/v2/projects/' + - PROJECT + + self.PROJECT + '/jobs?uploadType=multipart') payload = ( b'--==0==\r\n' + @@ -1600,26 +1516,24 @@ def test__do_multipart_upload_with_retry(self, get_boundary): def test_copy_table(self): from google.cloud.bigquery.job import CopyJob - PROJECT = 'PROJECT' JOB = 'job_name' - DATASET = 'dataset_name' SOURCE = 'source_table' DESTINATION = 'destination_table' RESOURCE = { 'jobReference': { - 'projectId': PROJECT, + 'projectId': self.PROJECT, 'jobId': JOB, }, 'configuration': { 'copy': { 'sourceTable': { - 'projectId': PROJECT, - 'datasetId': DATASET, + 'projectId': self.PROJECT, + 'datasetId': self.DS_ID, 'tableId': SOURCE, }, 'destinationTable': { - 'projectId': PROJECT, - 'datasetId': DATASET, + 'projectId': self.PROJECT, + 'datasetId': self.DS_ID, 'tableId': DESTINATION, }, }, @@ -1627,9 +1541,10 @@ def test_copy_table(self): } creds = _make_credentials() http = object() - client = self._make_one(project=PROJECT, credentials=creds, _http=http) + client = self._make_one(project=self.PROJECT, credentials=creds, + _http=http) conn = client._connection = _Connection(RESOURCE) - dataset = client.dataset(DATASET) + dataset = client.dataset(self.DS_ID) source = dataset.table(SOURCE) destination = dataset.table(DESTINATION) @@ -1639,7 +1554,7 @@ def test_copy_table(self): self.assertEqual(len(conn._requested), 1) req = conn._requested[0] self.assertEqual(req['method'], 'POST') - self.assertEqual(req['path'], '/projects/%s/jobs' % PROJECT) + self.assertEqual(req['path'], '/projects/%s/jobs' % self.PROJECT) self.assertIsInstance(job, CopyJob) self.assertIs(job._client, client) @@ -1659,21 +1574,19 @@ def test_copy_table(self): def test_extract_table(self): from google.cloud.bigquery.job import ExtractJob - PROJECT = 'PROJECT' JOB = 'job_id' - DATASET = 'dataset_id' SOURCE = 'source_table' DESTINATION = 'gs://bucket_name/object_name' RESOURCE = { 'jobReference': { - 'projectId': PROJECT, + 'projectId': self.PROJECT, 'jobId': JOB, }, 'configuration': { 'extract': { 'sourceTable': { - 'projectId': PROJECT, - 'datasetId': DATASET, + 'projectId': self.PROJECT, + 'datasetId': self.DS_ID, 'tableId': SOURCE, }, 'destinationUris': [DESTINATION], @@ -1682,9 +1595,10 @@ def test_extract_table(self): } creds = _make_credentials() http = object() - client = self._make_one(project=PROJECT, credentials=creds, _http=http) + client = self._make_one(project=self.PROJECT, credentials=creds, + _http=http) conn = client._connection = _Connection(RESOURCE) - dataset = client.dataset(DATASET) + dataset = client.dataset(self.DS_ID) source = dataset.table(SOURCE) job = client.extract_table(source, DESTINATION, job_id=JOB) @@ -1707,21 +1621,19 @@ def test_extract_table_generated_job_id(self): from google.cloud.bigquery.job import ExtractJobConfig from google.cloud.bigquery.job import DestinationFormat - PROJECT = 'PROJECT' JOB = 'job_id' - DATASET = 'dataset_id' SOURCE = 'source_table' DESTINATION = 'gs://bucket_name/object_name' RESOURCE = { 'jobReference': { - 'projectId': PROJECT, + 'projectId': self.PROJECT, 'jobId': JOB, }, 'configuration': { 'extract': { 'sourceTable': { - 'projectId': PROJECT, - 'datasetId': DATASET, + 'projectId': self.PROJECT, + 'datasetId': self.DS_ID, 'tableId': SOURCE, }, 'destinationUris': [DESTINATION], @@ -1731,9 +1643,10 @@ def test_extract_table_generated_job_id(self): } creds = _make_credentials() http = object() - client = self._make_one(project=PROJECT, credentials=creds, _http=http) + client = self._make_one(project=self.PROJECT, credentials=creds, + _http=http) conn = client._connection = _Connection(RESOURCE) - dataset = client.dataset(DATASET) + dataset = client.dataset(self.DS_ID) source = dataset.table(SOURCE) job_config = ExtractJobConfig() job_config.destination_format = ( @@ -1758,11 +1671,10 @@ def test_extract_table_generated_job_id(self): def test_query_defaults(self): from google.cloud.bigquery.job import QueryJob - PROJECT = 'PROJECT' QUERY = 'select count(*) from persons' RESOURCE = { 'jobReference': { - 'projectId': PROJECT, + 'projectId': self.PROJECT, 'jobId': 'some-random-id', }, 'configuration': { @@ -1774,7 +1686,8 @@ def test_query_defaults(self): } creds = _make_credentials() http = object() - client = self._make_one(project=PROJECT, credentials=creds, _http=http) + client = self._make_one(project=self.PROJECT, credentials=creds, + _http=http) conn = client._connection = _Connection(RESOURCE) job = client.query(QUERY) @@ -1804,12 +1717,11 @@ def test_query_w_udf_resources(self): from google.cloud.bigquery.query import UDFResource RESOURCE_URI = 'gs://some-bucket/js/lib.js' - PROJECT = 'PROJECT' JOB = 'job_name' QUERY = 'select count(*) from persons' RESOURCE = { 'jobReference': { - 'projectId': PROJECT, + 'projectId': self.PROJECT, 'jobId': JOB, }, 'configuration': { @@ -1824,7 +1736,8 @@ def test_query_w_udf_resources(self): } creds = _make_credentials() http = object() - client = self._make_one(project=PROJECT, credentials=creds, _http=http) + client = self._make_one(project=self.PROJECT, credentials=creds, + _http=http) conn = client._connection = _Connection(RESOURCE) udf_resources = [UDFResource("resourceUri", RESOURCE_URI)] config = QueryJobConfig() @@ -1860,12 +1773,11 @@ def test_query_w_query_parameters(self): from google.cloud.bigquery.job import QueryJobConfig from google.cloud.bigquery.query import ScalarQueryParameter - PROJECT = 'PROJECT' JOB = 'job_name' QUERY = 'select count(*) from persons' RESOURCE = { 'jobReference': { - 'projectId': PROJECT, + 'projectId': self.PROJECT, 'jobId': JOB, }, 'configuration': { @@ -1884,7 +1796,8 @@ def test_query_w_query_parameters(self): } creds = _make_credentials() http = object() - client = self._make_one(project=PROJECT, credentials=creds, _http=http) + client = self._make_one(project=self.PROJECT, credentials=creds, + _http=http) conn = client._connection = _Connection(RESOURCE) query_parameters = [ScalarQueryParameter('foo', 'INT64', 123)] config = QueryJobConfig() @@ -1918,17 +1831,13 @@ def test_query_w_query_parameters(self): }) def test_create_rows_wo_schema(self): - from google.cloud.bigquery.dataset import DatasetReference from google.cloud.bigquery.table import Table, _TABLE_HAS_NO_SCHEMA - PROJECT = 'PROJECT' - DS_ID = 'DS_ID' - TABLE_ID = 'TABLE_ID' creds = _make_credentials() http = object() - client = self._make_one(project=PROJECT, credentials=creds, _http=http) - table_ref = DatasetReference(PROJECT, DS_ID).table(TABLE_ID) - table = Table(table_ref) + client = self._make_one(project=self.PROJECT, credentials=creds, + _http=http) + table = Table(self.TABLE_REF) ROWS = [ ('Phred Phlyntstone', 32), ('Bharney Rhubble', 33), @@ -1947,27 +1856,23 @@ def test_create_rows_w_schema(self): from google.cloud._helpers import _datetime_to_rfc3339 from google.cloud._helpers import _microseconds_from_datetime from google.cloud.bigquery.table import Table, SchemaField - from google.cloud.bigquery.dataset import DatasetReference - PROJECT = 'PROJECT' - DS_ID = 'DS_ID' - TABLE_ID = 'TABLE_ID' WHEN_TS = 1437767599.006 WHEN = datetime.datetime.utcfromtimestamp(WHEN_TS).replace( tzinfo=UTC) PATH = 'projects/%s/datasets/%s/tables/%s/insertAll' % ( - PROJECT, DS_ID, TABLE_ID) + self.PROJECT, self.DS_ID, self.TABLE_ID) creds = _make_credentials() http = object() - client = self._make_one(project=PROJECT, credentials=creds, _http=http) + client = self._make_one(project=self.PROJECT, credentials=creds, + _http=http) conn = client._connection = _Connection({}) - table_ref = DatasetReference(PROJECT, DS_ID).table(TABLE_ID) schema = [ SchemaField('full_name', 'STRING', mode='REQUIRED'), SchemaField('age', 'INTEGER', mode='REQUIRED'), SchemaField('joined', 'TIMESTAMP', mode='NULLABLE'), ] - table = Table(table_ref, schema=schema) + table = Table(self.TABLE_REF, schema=schema) ROWS = [ ('Phred Phlyntstone', 32, _datetime_to_rfc3339(WHEN)), ('Bharney Rhubble', 33, WHEN + datetime.timedelta(seconds=1)), @@ -2002,27 +1907,23 @@ def test_create_rows_w_list_of_dictionaries(self): from google.cloud._helpers import _datetime_to_rfc3339 from google.cloud._helpers import _microseconds_from_datetime from google.cloud.bigquery.table import Table, SchemaField - from google.cloud.bigquery.dataset import DatasetReference - PROJECT = 'PROJECT' - DS_ID = 'DS_ID' - TABLE_ID = 'TABLE_ID' WHEN_TS = 1437767599.006 WHEN = datetime.datetime.utcfromtimestamp(WHEN_TS).replace( tzinfo=UTC) PATH = 'projects/%s/datasets/%s/tables/%s/insertAll' % ( - PROJECT, DS_ID, TABLE_ID) + self.PROJECT, self.DS_ID, self.TABLE_ID) creds = _make_credentials() http = object() - client = self._make_one(project=PROJECT, credentials=creds, _http=http) + client = self._make_one(project=self.PROJECT, credentials=creds, + _http=http) conn = client._connection = _Connection({}) - table_ref = DatasetReference(PROJECT, DS_ID).table(TABLE_ID) schema = [ SchemaField('full_name', 'STRING', mode='REQUIRED'), SchemaField('age', 'INTEGER', mode='REQUIRED'), SchemaField('joined', 'TIMESTAMP', mode='NULLABLE'), ] - table = Table(table_ref, schema=schema) + table = Table(self.TABLE_REF, schema=schema) ROWS = [ { 'full_name': 'Phred Phlyntstone', 'age': 32, @@ -2064,23 +1965,19 @@ def _row_data(row): def test_create_rows_w_list_of_Rows(self): from google.cloud.bigquery._helpers import Row from google.cloud.bigquery.table import Table, SchemaField - from google.cloud.bigquery.dataset import DatasetReference - PROJECT = 'PROJECT' - DS_ID = 'DS_ID' - TABLE_ID = 'TABLE_ID' PATH = 'projects/%s/datasets/%s/tables/%s/insertAll' % ( - PROJECT, DS_ID, TABLE_ID) + self.PROJECT, self.DS_ID, self.TABLE_ID) creds = _make_credentials() http = object() - client = self._make_one(project=PROJECT, credentials=creds, _http=http) + client = self._make_one(project=self.PROJECT, credentials=creds, + _http=http) conn = client._connection = _Connection({}) - table_ref = DatasetReference(PROJECT, DS_ID).table(TABLE_ID) schema = [ SchemaField('full_name', 'STRING', mode='REQUIRED'), SchemaField('age', 'INTEGER', mode='REQUIRED'), ] - table = Table(table_ref, schema=schema) + table = Table(self.TABLE_REF, schema=schema) f2i = {'full_name': 0, 'age': 1} ROWS = [ Row(('Phred Phlyntstone', 32), f2i), @@ -2107,13 +2004,9 @@ def _row_data(row): def test_create_rows_w_skip_invalid_and_ignore_unknown(self): from google.cloud.bigquery.table import Table, SchemaField - from google.cloud.bigquery.dataset import DatasetReference - PROJECT = 'PROJECT' - DS_ID = 'DS_ID' - TABLE_ID = 'TABLE_ID' PATH = 'projects/%s/datasets/%s/tables/%s/insertAll' % ( - PROJECT, DS_ID, TABLE_ID) + self.PROJECT, self.DS_ID, self.TABLE_ID) RESPONSE = { 'insertErrors': [ {'index': 1, @@ -2126,15 +2019,15 @@ def test_create_rows_w_skip_invalid_and_ignore_unknown(self): ]} creds = _make_credentials() http = object() - client = self._make_one(project=PROJECT, credentials=creds, _http=http) + client = self._make_one(project=self.PROJECT, credentials=creds, + _http=http) conn = client._connection = _Connection(RESPONSE) schema = [ SchemaField('full_name', 'STRING', mode='REQUIRED'), SchemaField('age', 'INTEGER', mode='REQUIRED'), SchemaField('voter', 'BOOLEAN', mode='NULLABLE'), ] - table_ref = DatasetReference(PROJECT, DS_ID).table(TABLE_ID) - table = Table(table_ref, schema=schema) + table = Table(self.TABLE_REF, schema=schema) ROWS = [ ('Phred Phlyntstone', 32, True), ('Bharney Rhubble', 33, False), @@ -2179,24 +2072,20 @@ def _row_data(row): def test_create_rows_w_repeated_fields(self): from google.cloud.bigquery.table import Table, SchemaField - from google.cloud.bigquery.dataset import DatasetReference - PROJECT = 'PROJECT' - DS_ID = 'DS_ID' - TABLE_ID = 'TABLE_ID' PATH = 'projects/%s/datasets/%s/tables/%s/insertAll' % ( - PROJECT, DS_ID, TABLE_ID) + self.PROJECT, self.DS_ID, self.TABLE_ID) creds = _make_credentials() http = object() - client = self._make_one(project=PROJECT, credentials=creds, _http=http) + client = self._make_one(project=self.PROJECT, credentials=creds, + _http=http) conn = client._connection = _Connection({}) - table_ref = DatasetReference(PROJECT, DS_ID).table(TABLE_ID) full_name = SchemaField('color', 'STRING', mode='REPEATED') index = SchemaField('index', 'INTEGER', 'REPEATED') score = SchemaField('score', 'FLOAT', 'REPEATED') struct = SchemaField('struct', 'RECORD', mode='REPEATED', fields=[index, score]) - table = Table(table_ref, schema=[full_name, struct]) + table = Table(self.TABLE_REF, schema=[full_name, struct]) ROWS = [ (['red', 'green'], [{'index': [1, 2], 'score': [3.1415, 1.414]}]), ] @@ -2220,18 +2109,14 @@ def _row_data(row): def test_create_rows_w_record_schema(self): from google.cloud.bigquery.table import SchemaField - from google.cloud.bigquery.dataset import DatasetReference - PROJECT = 'PROJECT' - DS_ID = 'DS_ID' - TABLE_ID = 'TABLE_ID' PATH = 'projects/%s/datasets/%s/tables/%s/insertAll' % ( - PROJECT, DS_ID, TABLE_ID) + self.PROJECT, self.DS_ID, self.TABLE_ID) creds = _make_credentials() http = object() - client = self._make_one(project=PROJECT, credentials=creds, _http=http) + client = self._make_one(project=self.PROJECT, credentials=creds, + _http=http) conn = client._connection = _Connection({}) - table_ref = DatasetReference(PROJECT, DS_ID).table(TABLE_ID) full_name = SchemaField('full_name', 'STRING', mode='REQUIRED') area_code = SchemaField('area_code', 'STRING', 'REQUIRED') local_number = SchemaField('local_number', 'STRING', 'REQUIRED') @@ -2256,7 +2141,7 @@ def _row_data(row): 'rows': [{'json': _row_data(row)} for row in ROWS], } - errors = client.create_rows(table_ref, ROWS, + errors = client.create_rows(self.TABLE_REF, ROWS, selected_fields=[full_name, phone]) self.assertEqual(len(errors), 0) @@ -2267,12 +2152,8 @@ def _row_data(row): self.assertEqual(req['data'], SENT) def test_create_rows_errors(self): - from google.cloud.bigquery.dataset import DatasetReference from google.cloud.bigquery.table import Table - PROJECT = 'PROJECT' - DS_ID = 'DS_ID' - TABLE_ID = 'TABLE_ID' ROWS = [ ('Phred Phlyntstone', 32, True), ('Bharney Rhubble', 33, False), @@ -2281,16 +2162,16 @@ def test_create_rows_errors(self): ] creds = _make_credentials() http = object() - client = self._make_one(project=PROJECT, credentials=creds, _http=http) - table_ref = DatasetReference(PROJECT, DS_ID).table(TABLE_ID) + client = self._make_one(project=self.PROJECT, credentials=creds, + _http=http) # table ref with no selected fields with self.assertRaises(ValueError): - client.create_rows(table_ref, ROWS) + client.create_rows(self.TABLE_REF, ROWS) # table with no schema with self.assertRaises(ValueError): - client.create_rows(Table(table_ref), ROWS) + client.create_rows(Table(self.TABLE_REF), ROWS) # neither Table nor tableReference with self.assertRaises(TypeError): @@ -2301,18 +2182,17 @@ def test_query_rows_defaults(self): from google.cloud.bigquery._helpers import Row JOB = 'job-id' - PROJECT = 'PROJECT' QUERY = 'SELECT COUNT(*) FROM persons' RESOURCE = { 'jobReference': { - 'projectId': PROJECT, + 'projectId': self.PROJECT, 'jobId': JOB, }, 'configuration': { 'query': { 'query': QUERY, 'destinationTable': { - 'projectId': PROJECT, + 'projectId': self.PROJECT, 'datasetId': '_temp_dataset', 'tableId': '_temp_table', }, @@ -2345,7 +2225,8 @@ def test_query_rows_defaults(self): del LAST_PAGE['pageToken'] creds = _make_credentials() http = object() - client = self._make_one(project=PROJECT, credentials=creds, _http=http) + client = self._make_one(project=self.PROJECT, credentials=creds, + _http=http) conn = client._connection = _Connection( RESOURCE, RESULTS_RESOURCE, FIRST_PAGE, LAST_PAGE) @@ -2366,18 +2247,17 @@ def test_query_rows_w_job_id(self): from google.api.core.page_iterator import HTTPIterator JOB = 'job-id' - PROJECT = 'PROJECT' QUERY = 'SELECT COUNT(*) FROM persons' RESOURCE = { 'jobReference': { - 'projectId': PROJECT, + 'projectId': self.PROJECT, 'jobId': JOB, }, 'configuration': { 'query': { 'query': QUERY, 'destinationTable': { - 'projectId': PROJECT, + 'projectId': self.PROJECT, 'datasetId': '_temp_dataset', 'tableId': '_temp_table', }, @@ -2399,7 +2279,8 @@ def test_query_rows_w_job_id(self): } creds = _make_credentials() http = object() - client = self._make_one(project=PROJECT, credentials=creds, _http=http) + client = self._make_one(project=self.PROJECT, credentials=creds, + _http=http) conn = client._connection = _Connection( RESOURCE, RESULTS_RESOURCE, RESULTS_RESOURCE) @@ -2420,11 +2301,10 @@ def test_query_rows_w_job_config(self): from google.api.core.page_iterator import HTTPIterator JOB = 'job-id' - PROJECT = 'PROJECT' QUERY = 'SELECT COUNT(*) FROM persons' RESOURCE = { 'jobReference': { - 'projectId': PROJECT, + 'projectId': self.PROJECT, 'jobId': JOB, }, 'configuration': { @@ -2432,7 +2312,7 @@ def test_query_rows_w_job_config(self): 'query': QUERY, 'useLegacySql': True, 'destinationTable': { - 'projectId': PROJECT, + 'projectId': self.PROJECT, 'datasetId': '_temp_dataset', 'tableId': '_temp_table', }, @@ -2455,7 +2335,8 @@ def test_query_rows_w_job_config(self): } creds = _make_credentials() http = object() - client = self._make_one(project=PROJECT, credentials=creds, _http=http) + client = self._make_one(project=self.PROJECT, credentials=creds, + _http=http) conn = client._connection = _Connection( RESOURCE, RESULTS_RESOURCE, RESULTS_RESOURCE) @@ -2477,15 +2358,11 @@ def test_query_rows_w_job_config(self): def test_list_rows(self): import datetime from google.cloud._helpers import UTC - from google.cloud.bigquery.dataset import DatasetReference from google.cloud.bigquery.table import Table, SchemaField from google.cloud.bigquery._helpers import Row - PROJECT = 'PROJECT' - DS_ID = 'DS_ID' - TABLE_ID = 'TABLE_ID' PATH = 'projects/%s/datasets/%s/tables/%s/data' % ( - PROJECT, DS_ID, TABLE_ID) + self.PROJECT, self.DS_ID, self.TABLE_ID) WHEN_TS = 1437767599.006 WHEN = datetime.datetime.utcfromtimestamp(WHEN_TS).replace( tzinfo=UTC) @@ -2526,13 +2403,13 @@ def _bigquery_timestamp_float_repr(ts_float): } creds = _make_credentials() http = object() - client = self._make_one(project=PROJECT, credentials=creds, _http=http) + client = self._make_one(project=self.PROJECT, credentials=creds, + _http=http) conn = client._connection = _Connection(DATA, DATA) - table_ref = DatasetReference(PROJECT, DS_ID).table(TABLE_ID) full_name = SchemaField('full_name', 'STRING', mode='REQUIRED') age = SchemaField('age', 'INTEGER', mode='NULLABLE') joined = SchemaField('joined', 'TIMESTAMP', mode='NULLABLE') - table = Table(table_ref, schema=[full_name, age, joined]) + table = Table(self.TABLE_REF, schema=[full_name, age, joined]) iterator = client.list_rows(table) page = six.next(iterator.pages) @@ -2556,17 +2433,13 @@ def _bigquery_timestamp_float_repr(ts_float): self.assertEqual(req['query_params'], {}) def test_list_rows_query_params(self): - from google.cloud.bigquery.dataset import DatasetReference from google.cloud.bigquery.table import Table, SchemaField - PROJECT = 'PROJECT' - DS_ID = 'DS_ID' - TABLE_ID = 'TABLE_ID' creds = _make_credentials() http = object() - client = self._make_one(project=PROJECT, credentials=creds, _http=http) - table_ref = DatasetReference(PROJECT, DS_ID).table(TABLE_ID) - table = Table(table_ref, + client = self._make_one(project=self.PROJECT, credentials=creds, + _http=http) + table = Table(self.TABLE_REF, schema=[SchemaField('age', 'INTEGER', mode='NULLABLE')]) tests = [ ({}, {}), @@ -2584,14 +2457,10 @@ def test_list_rows_query_params(self): 'for kwargs %s' % test[0]) def test_list_rows_repeated_fields(self): - from google.cloud.bigquery.dataset import DatasetReference from google.cloud.bigquery.table import SchemaField - PROJECT = 'PROJECT' - DS_ID = 'DS_ID' - TABLE_ID = 'TABLE_ID' PATH = 'projects/%s/datasets/%s/tables/%s/data' % ( - PROJECT, DS_ID, TABLE_ID) + self.PROJECT, self.DS_ID, self.TABLE_ID) ROWS = 1234 TOKEN = 'TOKEN' DATA = { @@ -2612,16 +2481,17 @@ def test_list_rows_repeated_fields(self): } creds = _make_credentials() http = object() - client = self._make_one(project=PROJECT, credentials=creds, _http=http) + client = self._make_one(project=self.PROJECT, credentials=creds, + _http=http) conn = client._connection = _Connection(DATA) - table_ref = DatasetReference(PROJECT, DS_ID).table(TABLE_ID) color = SchemaField('color', 'STRING', mode='REPEATED') index = SchemaField('index', 'INTEGER', 'REPEATED') score = SchemaField('score', 'FLOAT', 'REPEATED') struct = SchemaField('struct', 'RECORD', mode='REPEATED', fields=[index, score]) - iterator = client.list_rows(table_ref, selected_fields=[color, struct]) + iterator = client.list_rows(self.TABLE_REF, + selected_fields=[color, struct]) page = six.next(iterator.pages) rows = list(page) total_rows = iterator.total_rows @@ -2640,14 +2510,10 @@ def test_list_rows_repeated_fields(self): self.assertEqual(req['path'], '/%s' % PATH) def test_list_rows_w_record_schema(self): - from google.cloud.bigquery.dataset import DatasetReference from google.cloud.bigquery.table import Table, SchemaField - PROJECT = 'PROJECT' - DS_ID = 'DS_ID' - TABLE_ID = 'TABLE_ID' PATH = 'projects/%s/datasets/%s/tables/%s/data' % ( - PROJECT, DS_ID, TABLE_ID) + self.PROJECT, self.DS_ID, self.TABLE_ID) ROWS = 1234 TOKEN = 'TOKEN' DATA = { @@ -2670,16 +2536,16 @@ def test_list_rows_w_record_schema(self): } creds = _make_credentials() http = object() - client = self._make_one(project=PROJECT, credentials=creds, _http=http) + client = self._make_one(project=self.PROJECT, credentials=creds, + _http=http) conn = client._connection = _Connection(DATA) - table_ref = DatasetReference(PROJECT, DS_ID).table(TABLE_ID) full_name = SchemaField('full_name', 'STRING', mode='REQUIRED') area_code = SchemaField('area_code', 'STRING', 'REQUIRED') local_number = SchemaField('local_number', 'STRING', 'REQUIRED') rank = SchemaField('rank', 'INTEGER', 'REQUIRED') phone = SchemaField('phone', 'RECORD', mode='NULLABLE', fields=[area_code, local_number, rank]) - table = Table(table_ref, schema=[full_name, phone]) + table = Table(self.TABLE_REF, schema=[full_name, phone]) iterator = client.list_rows(table) page = six.next(iterator.pages) @@ -2707,42 +2573,36 @@ def test_list_rows_w_record_schema(self): self.assertEqual(req['path'], '/%s' % PATH) def test_list_rows_errors(self): - from google.cloud.bigquery.dataset import DatasetReference from google.cloud.bigquery.table import Table - PROJECT = 'PROJECT' - DS_ID = 'DS_ID' - TABLE_ID = 'TABLE_ID' - creds = _make_credentials() http = object() - client = self._make_one(project=PROJECT, credentials=creds, _http=http) - table_ref = DatasetReference(PROJECT, DS_ID).table(TABLE_ID) + client = self._make_one(project=self.PROJECT, credentials=creds, + _http=http) # table ref with no selected fields with self.assertRaises(ValueError): - client.list_rows(table_ref) + client.list_rows(self.TABLE_REF) # table with no schema with self.assertRaises(ValueError): - client.list_rows(Table(table_ref)) + client.list_rows(Table(self.TABLE_REF)) # neither Table nor tableReference with self.assertRaises(TypeError): client.list_rows(1) def test_list_partitions(self): - PROJECT = 'PROJECT' RESOURCE = { 'jobReference': { - 'projectId': PROJECT, + 'projectId': self.PROJECT, 'jobId': 'JOB_ID', }, 'configuration': { 'query': { 'query': 'q', 'destinationTable': { - 'projectId': PROJECT, + 'projectId': self.PROJECT, 'datasetId': 'DS_ID', 'tableId': 'TABLE_ID', }, @@ -2772,11 +2632,11 @@ def test_list_partitions(self): del FIRST_PAGE['pageToken'] creds = _make_credentials() http = object() - client = self._make_one(project=PROJECT, credentials=creds, _http=http) + client = self._make_one(project=self.PROJECT, credentials=creds, + _http=http) client._connection = _Connection( RESOURCE, RESULTS_RESOURCE, FIRST_PAGE) - table_ref = DatasetReference(PROJECT, 'DS_ID').table('TABLE_ID') - self.assertEqual(client.list_partitions(table_ref), + self.assertEqual(client.list_partitions(self.TABLE_REF), [20160804, 20160805]) From 9e1fec7b064afc640355edf636b7063bec38efff Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Thu, 12 Oct 2017 10:13:40 -0700 Subject: [PATCH 0237/2016] BQ: use random job ID for system tests. (#4163) --- packages/google-cloud-bigquery/tests/system.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/packages/google-cloud-bigquery/tests/system.py b/packages/google-cloud-bigquery/tests/system.py index 61397d3d80cf..2c62e5efa9b0 100644 --- a/packages/google-cloud-bigquery/tests/system.py +++ b/packages/google-cloud-bigquery/tests/system.py @@ -20,6 +20,7 @@ import os import time import unittest +import uuid import six @@ -688,7 +689,7 @@ def test_copy_table(self): def test_job_cancel(self): DATASET_ID = _make_dataset_id('job_cancel') - JOB_ID = 'fetch_' + DATASET_ID + JOB_ID = 'fetch_' + DATASET_ID + str(uuid.uuid4()) TABLE_NAME = 'test_table' QUERY = 'SELECT * FROM %s.%s' % (DATASET_ID, TABLE_NAME) @@ -924,7 +925,7 @@ def test_query_w_dml(self): query_job = Config.CLIENT.query( query_template.format(dataset_name, table_name), - job_id='test_query_w_dml_{}'.format(unique_resource_id())) + job_id='test_query_w_dml_{}'.format(str(uuid.uuid4()))) query_job.result() self.assertEqual(query_job.num_dml_affected_rows, 1) @@ -940,7 +941,7 @@ def test_dbapi_w_dml(self): Config.CURSOR.execute( query_template.format(dataset_name, table_name), - job_id='test_dbapi_w_dml_{}'.format(unique_resource_id())) + job_id='test_dbapi_w_dml_{}'.format(str(uuid.uuid4()))) self.assertEqual(Config.CURSOR.rowcount, 1) self.assertIsNone(Config.CURSOR.fetchone()) @@ -1112,7 +1113,7 @@ def test_query_w_query_params(self): example['sql'], job_config=jconfig, job_id='test_query_w_query_params{}'.format( - unique_resource_id())) + str(uuid.uuid4()))) rows = list(query_job.result()) self.assertEqual(len(rows), 1) self.assertEqual(len(rows[0]), 1) From 91ddfd9b69bfceb6cb107520ed9919fcec8e0e4a Mon Sep 17 00:00:00 2001 From: Jonathan Amsterdam Date: Thu, 12 Oct 2017 14:31:04 -0400 Subject: [PATCH 0238/2016] bigquery: generate row IDs in create_rows (#4173) If the user doesn't provide row IDs, create unique IDs for them. --- .../google/cloud/bigquery/client.py | 6 ++- .../tests/unit/test_client.py | 42 ++++++++++++++----- 2 files changed, 35 insertions(+), 13 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py index 488b409ff77c..6a312bff4514 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py @@ -860,7 +860,7 @@ def create_rows(self, table, rows, row_ids=None, selected_fields=None, :type row_ids: list of string :param row_ids: (Optional) Unique ids, one per row being inserted. - If not passed, no de-duplication occurs. + If omitted, unique IDs are created. :type selected_fields: list of :class:`SchemaField` :param selected_fields: @@ -923,7 +923,8 @@ def create_rows(self, table, rows, row_ids=None, selected_fields=None, info = {'json': row_info} if row_ids is not None: info['insertId'] = row_ids[index] - + else: + info['insertId'] = str(uuid.uuid4()) rows_info.append(info) if skip_invalid_rows is not None: @@ -935,6 +936,7 @@ def create_rows(self, table, rows, row_ids=None, selected_fields=None, if template_suffix is not None: data['templateSuffix'] = template_suffix + # TODO(jba): use self._call_api here after #4148 is merged. response = self._connection.api_request( method='POST', path='%s/insertAll' % table.path, diff --git a/packages/google-cloud-bigquery/tests/unit/test_client.py b/packages/google-cloud-bigquery/tests/unit/test_client.py index 22df27c6358c..bb5517207ffc 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_client.py +++ b/packages/google-cloud-bigquery/tests/unit/test_client.py @@ -1889,10 +1889,14 @@ def _row_data(row): 'joined': joined} SENT = { - 'rows': [{'json': _row_data(row)} for row in ROWS], + 'rows': [{ + 'json': _row_data(row), + 'insertId': str(i), + } for i, row in enumerate(ROWS)], } - errors = client.create_rows(table, ROWS) + with mock.patch('uuid.uuid4', side_effect=map(str, range(len(ROWS)))): + errors = client.create_rows(table, ROWS) self.assertEqual(len(errors), 0) self.assertEqual(len(conn._requested), 1) @@ -1950,10 +1954,14 @@ def _row_data(row): return row SENT = { - 'rows': [{'json': _row_data(row)} for row in ROWS], + 'rows': [{ + 'json': _row_data(row), + 'insertId': str(i), + } for i, row in enumerate(ROWS)], } - errors = client.create_rows(table, ROWS) + with mock.patch('uuid.uuid4', side_effect=map(str, range(len(ROWS)))): + errors = client.create_rows(table, ROWS) self.assertEqual(len(errors), 0) self.assertEqual(len(conn._requested), 1) @@ -1990,10 +1998,14 @@ def _row_data(row): return {'full_name': row[0], 'age': str(row[1])} SENT = { - 'rows': [{'json': _row_data(row)} for row in ROWS], + 'rows': [{ + 'json': _row_data(row), + 'insertId': str(i), + } for i, row in enumerate(ROWS)], } - errors = client.create_rows(table, ROWS) + with mock.patch('uuid.uuid4', side_effect=map(str, range(len(ROWS)))): + errors = client.create_rows(table, ROWS) self.assertEqual(len(errors), 0) self.assertEqual(len(conn._requested), 1) @@ -2095,10 +2107,14 @@ def _row_data(row): 'struct': row[1]} SENT = { - 'rows': [{'json': _row_data(row)} for row in ROWS], + 'rows': [{ + 'json': _row_data(row), + 'insertId': str(i), + } for i, row in enumerate(ROWS)], } - errors = client.create_rows(table, ROWS) + with mock.patch('uuid.uuid4', side_effect=map(str, range(len(ROWS)))): + errors = client.create_rows(table, ROWS) self.assertEqual(len(errors), 0) self.assertEqual(len(conn._requested), 1) @@ -2138,11 +2154,15 @@ def _row_data(row): 'phone': row[1]} SENT = { - 'rows': [{'json': _row_data(row)} for row in ROWS], + 'rows': [{ + 'json': _row_data(row), + 'insertId': str(i), + } for i, row in enumerate(ROWS)], } - errors = client.create_rows(self.TABLE_REF, ROWS, - selected_fields=[full_name, phone]) + with mock.patch('uuid.uuid4', side_effect=map(str, range(len(ROWS)))): + errors = client.create_rows(self.TABLE_REF, ROWS, + selected_fields=[full_name, phone]) self.assertEqual(len(errors), 0) self.assertEqual(len(conn._requested), 1) From 7978e9b63fba698a94516c6b30dd80ef17468157 Mon Sep 17 00:00:00 2001 From: Jonathan Amsterdam Date: Thu, 12 Oct 2017 17:20:39 -0400 Subject: [PATCH 0239/2016] bigquery: retry idempotent RPCs (#4148) Add retry logic to every RPC for which it makes sense. Following the BigQuery team, we ignore the error code and use the "reason" field of the error to determine whether to retry. Outstanding issues: - Resumable upload consists of an initial call to get a URL, followed by posts to that URL. Getting the retry right on that initial call requires modifying the ResumableUpload class. At the same time, the num_retries argument should be removed. - Users can't modify the retry behavior of Job.result(), because PollingFuture.result() does not accept a retry argument. --- .../google/cloud/bigquery/__init__.py | 2 + .../google/cloud/bigquery/_helpers.py | 26 +++ .../google/cloud/bigquery/client.py | 161 +++++++++++++----- .../google/cloud/bigquery/job.py | 60 +++++-- .../tests/unit/test_client.py | 41 ++++- 5 files changed, 228 insertions(+), 62 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/__init__.py b/packages/google-cloud-bigquery/google/cloud/bigquery/__init__.py index 2682ca8ddb6d..545e0cde265a 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/__init__.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/__init__.py @@ -27,6 +27,7 @@ __version__ = get_distribution('google-cloud-bigquery').version from google.cloud.bigquery._helpers import Row +from google.cloud.bigquery._helpers import DEFAULT_RETRY from google.cloud.bigquery.client import Client from google.cloud.bigquery.dataset import AccessEntry from google.cloud.bigquery.dataset import Dataset @@ -61,4 +62,5 @@ 'Table', 'TableReference', 'UDFResource', + 'DEFAULT_RETRY', ] diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py b/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py index 0ee7a9c01c6a..d4230f9ff4f6 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py @@ -20,6 +20,7 @@ import six +from google.api.core import retry from google.cloud._helpers import UTC from google.cloud._helpers import _date_from_iso8601_date from google.cloud._helpers import _datetime_from_microseconds @@ -520,3 +521,28 @@ def _rows_page_start(iterator, page, response): total_rows = int(total_rows) iterator.total_rows = total_rows # pylint: enable=unused-argument + + +def _should_retry(exc): + """Predicate for determining when to retry. + + We retry if and only if the 'reason' is 'backendError' + or 'rateLimitExceeded'. + """ + if not hasattr(exc, 'errors'): + return False + if len(exc.errors) == 0: + return False + reason = exc.errors[0]['reason'] + return reason == 'backendError' or reason == 'rateLimitExceeded' + + +DEFAULT_RETRY = retry.Retry(predicate=_should_retry) +"""The default retry object. + +Any method with a ``retry`` parameter will be retried automatically, +with reasonable defaults. To disable retry, pass ``retry=None``. +To modify the default retry behavior, call a ``with_XXX`` method +on ``DEFAULT_RETRY``. For example, to change the deadline to 30 seconds, +pass ``retry=bigquery.DEFAULT_RETRY.with_deadline(30)``. +""" diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py index 6a312bff4514..c52fbbda1ba8 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py @@ -17,6 +17,7 @@ from __future__ import absolute_import import collections +import functools import os import uuid @@ -27,6 +28,7 @@ from google.resumable_media.requests import ResumableUpload from google.api.core import page_iterator + from google.cloud import exceptions from google.cloud.client import ClientWithProject from google.cloud.bigquery._http import Connection @@ -44,6 +46,7 @@ from google.cloud.bigquery._helpers import _rows_page_start from google.cloud.bigquery._helpers import _field_to_index_mapping from google.cloud.bigquery._helpers import _SCALAR_VALUE_TO_JSON_ROW +from google.cloud.bigquery._helpers import DEFAULT_RETRY _DEFAULT_CHUNKSIZE = 1048576 # 1024 * 1024 B = 1 MB @@ -117,7 +120,8 @@ def __init__(self, project=None, credentials=None, _http=None): project=project, credentials=credentials, _http=_http) self._connection = Connection(self) - def list_projects(self, max_results=None, page_token=None): + def list_projects(self, max_results=None, page_token=None, + retry=DEFAULT_RETRY): """List projects for the project associated with this client. See @@ -132,13 +136,16 @@ def list_projects(self, max_results=None, page_token=None): not passed, the API will return the first page of projects. + :type retry: :class:`google.api.core.retry.Retry` + :param retry: (Optional) How to retry the RPC. + :rtype: :class:`~google.api.core.page_iterator.Iterator` :returns: Iterator of :class:`~google.cloud.bigquery.client.Project` accessible to the current client. """ return page_iterator.HTTPIterator( client=self, - api_request=self._connection.api_request, + api_request=functools.partial(self._call_api, retry), path='/projects', item_to_value=_item_to_project, items_key='projects', @@ -146,7 +153,7 @@ def list_projects(self, max_results=None, page_token=None): max_results=max_results) def list_datasets(self, include_all=False, max_results=None, - page_token=None): + page_token=None, retry=DEFAULT_RETRY): """List datasets for the project associated with this client. See @@ -164,6 +171,9 @@ def list_datasets(self, include_all=False, max_results=None, not passed, the API will return the first page of datasets. + :type retry: :class:`google.api.core.retry.Retry` + :param retry: (Optional) How to retry the RPC. + :rtype: :class:`~google.api.core.page_iterator.Iterator` :returns: Iterator of :class:`~google.cloud.bigquery.dataset.Dataset`. accessible to the current client. @@ -174,7 +184,7 @@ def list_datasets(self, include_all=False, max_results=None, path = '/projects/%s/datasets' % (self.project,) return page_iterator.HTTPIterator( client=self, - api_request=self._connection.api_request, + api_request=functools.partial(self._call_api, retry), path=path, item_to_value=_item_to_dataset, items_key='datasets', @@ -241,35 +251,47 @@ def create_table(self, table): method='POST', path=path, data=resource) return Table.from_api_repr(api_response) - def get_dataset(self, dataset_ref): + def _call_api(self, retry, **kwargs): + call = functools.partial(self._connection.api_request, **kwargs) + if retry: + call = retry(call) + return call() + + def get_dataset(self, dataset_ref, retry=DEFAULT_RETRY): """Fetch the dataset referenced by ``dataset_ref`` :type dataset_ref: :class:`google.cloud.bigquery.dataset.DatasetReference` :param dataset_ref: the dataset to use. + :type retry: :class:`google.api.core.retry.Retry` + :param retry: (Optional) How to retry the RPC. + :rtype: :class:`google.cloud.bigquery.dataset.Dataset` :returns: a ``Dataset`` instance """ - api_response = self._connection.api_request( - method='GET', path=dataset_ref.path) + api_response = self._call_api(retry, + method='GET', + path=dataset_ref.path) return Dataset.from_api_repr(api_response) - def get_table(self, table_ref): + def get_table(self, table_ref, retry=DEFAULT_RETRY): """Fetch the table referenced by ``table_ref`` :type table_ref: :class:`google.cloud.bigquery.table.TableReference` :param table_ref: the table to use. + :type retry: :class:`google.api.core.retry.Retry` + :param retry: (Optional) How to retry the RPC. + :rtype: :class:`google.cloud.bigquery.table.Table` :returns: a ``Table`` instance """ - api_response = self._connection.api_request( - method='GET', path=table_ref.path) + api_response = self._call_api(retry, method='GET', path=table_ref.path) return Table.from_api_repr(api_response) - def update_dataset(self, dataset, fields): + def update_dataset(self, dataset, fields, retry=DEFAULT_RETRY): """Change some fields of a dataset. Use ``fields`` to specify which fields to update. At least one field @@ -290,6 +312,9 @@ def update_dataset(self, dataset, fields): :param fields: the fields of ``dataset`` to change, spelled as the Dataset properties (e.g. "friendly_name"). + :type retry: :class:`google.api.core.retry.Retry` + :param retry: (Optional) How to retry the RPC. + :rtype: :class:`google.cloud.bigquery.dataset.Dataset` :returns: the modified ``Dataset`` instance """ @@ -307,11 +332,11 @@ def update_dataset(self, dataset, fields): headers = {'If-Match': dataset.etag} else: headers = None - api_response = self._connection.api_request( - method='PATCH', path=path, data=partial, headers=headers) + api_response = self._call_api( + retry, method='PATCH', path=path, data=partial, headers=headers) return Dataset.from_api_repr(api_response) - def update_table(self, table, properties): + def update_table(self, table, properties, retry=DEFAULT_RETRY): """API call: update table properties via a PUT request See @@ -321,6 +346,9 @@ def update_table(self, table, properties): :class:`google.cloud.bigquery.table.Table` :param table_ref: the table to update. + :type retry: :class:`google.api.core.retry.Retry` + :param retry: (Optional) How to retry the RPC. + :rtype: :class:`google.cloud.bigquery.table.Table` :returns: a ``Table`` instance """ @@ -329,11 +357,13 @@ def update_table(self, table, properties): headers = {'If-Match': table.etag} else: headers = None - api_response = self._connection.api_request( + api_response = self._call_api( + retry, method='PATCH', path=table.path, data=partial, headers=headers) return Table.from_api_repr(api_response) - def list_dataset_tables(self, dataset, max_results=None, page_token=None): + def list_dataset_tables(self, dataset, max_results=None, page_token=None, + retry=DEFAULT_RETRY): """List tables in the dataset. See @@ -353,6 +383,9 @@ def list_dataset_tables(self, dataset, max_results=None, page_token=None): datasets. If not passed, the API will return the first page of datasets. + :type retry: :class:`google.api.core.retry.Retry` + :param retry: (Optional) How to retry the RPC. + :rtype: :class:`~google.api.core.page_iterator.Iterator` :returns: Iterator of :class:`~google.cloud.bigquery.table.Table` contained within the current dataset. @@ -362,7 +395,7 @@ def list_dataset_tables(self, dataset, max_results=None, page_token=None): path = '%s/tables' % dataset.path result = page_iterator.HTTPIterator( client=self, - api_request=self._connection.api_request, + api_request=functools.partial(self._call_api, retry), path=path, item_to_value=_item_to_table, items_key='tables', @@ -371,7 +404,7 @@ def list_dataset_tables(self, dataset, max_results=None, page_token=None): result.dataset = dataset return result - def delete_dataset(self, dataset): + def delete_dataset(self, dataset, retry=DEFAULT_RETRY): """Delete a dataset. See @@ -381,13 +414,16 @@ def delete_dataset(self, dataset): :class:`~google.cloud.bigquery.dataset.Dataset` :class:`~google.cloud.bigquery.dataset.DatasetReference` + :type retry: :class:`google.api.core.retry.Retry` + :param retry: (Optional) How to retry the RPC. + :param dataset: the dataset to delete, or a reference to it. """ if not isinstance(dataset, (Dataset, DatasetReference)): raise TypeError('dataset must be a Dataset or a DatasetReference') - self._connection.api_request(method='DELETE', path=dataset.path) + self._call_api(retry, method='DELETE', path=dataset.path) - def delete_table(self, table): + def delete_table(self, table, retry=DEFAULT_RETRY): """Delete a table See @@ -397,17 +433,23 @@ def delete_table(self, table): :class:`~google.cloud.bigquery.table.Table` :class:`~google.cloud.bigquery.table.TableReference` :param table: the table to delete, or a reference to it. + + :type retry: :class:`google.api.core.retry.Retry` + :param retry: (Optional) How to retry the RPC. """ if not isinstance(table, (Table, TableReference)): raise TypeError('table must be a Table or a TableReference') - self._connection.api_request(method='DELETE', path=table.path) + self._call_api(retry, method='DELETE', path=table.path) - def _get_query_results(self, job_id, project=None, timeout_ms=None): + def _get_query_results(self, job_id, retry, project=None, timeout_ms=None): """Get the query results object for a query job. :type job_id: str :param job_id: Name of the query job. + :type retry: :class:`google.api.core.retry.Retry` + :param retry: (Optional) How to retry the RPC. + :type project: str :param project: (Optional) project ID for the query job (defaults to the project of @@ -432,9 +474,11 @@ def _get_query_results(self, job_id, project=None, timeout_ms=None): path = '/projects/{}/queries/{}'.format(project, job_id) - resource = self._connection.api_request( - method='GET', path=path, query_params=extra_params) - + # This call is typically made in a polling loop that checks whether the + # job is complete (from QueryJob.done(), called ultimately from + # QueryJob.result()). So we don't need to poll here. + resource = self._call_api( + retry, method='GET', path=path, query_params=extra_params) return QueryResults.from_api_repr(resource) def job_from_resource(self, resource): @@ -462,7 +506,7 @@ def job_from_resource(self, resource): return QueryJob.from_api_repr(resource, self) raise ValueError('Cannot parse job resource') - def get_job(self, job_id, project=None): + def get_job(self, job_id, project=None, retry=DEFAULT_RETRY): """Fetch a job for the project associated with this client. See @@ -475,6 +519,9 @@ def get_job(self, job_id, project=None): :param project: project ID owning the job (defaults to the client's project) + :type retry: :class:`google.api.core.retry.Retry` + :param retry: (Optional) How to retry the RPC. + :rtype: :class:`~google.cloud.bigquery.job._AsyncJob` :returns: Concrete job instance, based on the resource returned by the API. @@ -486,13 +533,13 @@ def get_job(self, job_id, project=None): path = '/projects/{}/jobs/{}'.format(project, job_id) - resource = self._connection.api_request( - method='GET', path=path, query_params=extra_params) + resource = self._call_api( + retry, method='GET', path=path, query_params=extra_params) return self.job_from_resource(resource) def list_jobs(self, max_results=None, page_token=None, all_users=None, - state_filter=None): + state_filter=None, retry=DEFAULT_RETRY): """List jobs for the project associated with this client. See @@ -519,6 +566,9 @@ def list_jobs(self, max_results=None, page_token=None, all_users=None, * ``"pending"`` * ``"running"`` + :type retry: :class:`google.api.core.retry.Retry` + :param retry: (Optional) How to retry the RPC. + :rtype: :class:`~google.api.core.page_iterator.Iterator` :returns: Iterable of job instances. """ @@ -533,7 +583,7 @@ def list_jobs(self, max_results=None, page_token=None, all_users=None, path = '/projects/%s/jobs' % (self.project,) return page_iterator.HTTPIterator( client=self, - api_request=self._connection.api_request, + api_request=functools.partial(self._call_api, retry), path=path, item_to_value=_item_to_job, items_key='jobs', @@ -542,7 +592,8 @@ def list_jobs(self, max_results=None, page_token=None, all_users=None, extra_params=extra_params) def load_table_from_storage(self, source_uris, destination, - job_id=None, job_config=None): + job_id=None, job_config=None, + retry=DEFAULT_RETRY): """Starts a job for loading data into a table from CloudStorage. See @@ -563,6 +614,9 @@ def load_table_from_storage(self, source_uris, destination, :type job_config: :class:`google.cloud.bigquery.job.LoadJobConfig` :param job_config: (Optional) Extra configuration options for the job. + :type retry: :class:`google.api.core.retry.Retry` + :param retry: (Optional) How to retry the RPC. + :rtype: :class:`google.cloud.bigquery.job.LoadJob` :returns: a new ``LoadJob`` instance """ @@ -570,7 +624,7 @@ def load_table_from_storage(self, source_uris, destination, if isinstance(source_uris, six.string_types): source_uris = [source_uris] job = LoadJob(job_id, source_uris, destination, self, job_config) - job.begin() + job.begin(retry=retry) return job def load_table_from_file(self, file_obj, destination, @@ -683,6 +737,8 @@ def _initiate_resumable_upload(self, stream, metadata, num_retries): transport = self._http headers = _get_upload_headers(self._connection.USER_AGENT) upload_url = _RESUMABLE_URL_TEMPLATE.format(project=self.project) + # TODO: modify ResumableUpload to take a retry.Retry object + # that it can use for the initial RPC. upload = ResumableUpload(upload_url, chunk_size, headers=headers) if num_retries is not None: @@ -738,7 +794,8 @@ def _do_multipart_upload(self, stream, metadata, size, num_retries): return response - def copy_table(self, sources, destination, job_id=None, job_config=None): + def copy_table(self, sources, destination, job_id=None, job_config=None, + retry=DEFAULT_RETRY): """Start a job for copying one or more tables into another table. See @@ -760,6 +817,9 @@ def copy_table(self, sources, destination, job_id=None, job_config=None): :type job_config: :class:`google.cloud.bigquery.job.CopyJobConfig` :param job_config: (Optional) Extra configuration options for the job. + :type retry: :class:`google.api.core.retry.Retry` + :param retry: (Optional) How to retry the RPC. + :rtype: :class:`google.cloud.bigquery.job.CopyJob` :returns: a new ``CopyJob`` instance """ @@ -769,7 +829,7 @@ def copy_table(self, sources, destination, job_id=None, job_config=None): sources = [sources] job = CopyJob(job_id, sources, destination, client=self, job_config=job_config) - job.begin() + job.begin(retry=retry) return job def extract_table(self, source, *destination_uris, **kwargs): @@ -796,20 +856,23 @@ def extract_table(self, source, *destination_uris, **kwargs): * *job_id* (``str``) -- Additional content (Optional) The ID of the job. + * *retry* (:class:`google.api.core.retry.Retry`) + (Optional) How to retry the RPC. :rtype: :class:`google.cloud.bigquery.job.ExtractJob` :returns: a new ``ExtractJob`` instance """ job_config = kwargs.get('job_config') job_id = _make_job_id(kwargs.get('job_id')) + retry = kwargs.get('retry', DEFAULT_RETRY) job = ExtractJob( job_id, source, list(destination_uris), client=self, job_config=job_config) - job.begin() + job.begin(retry=retry) return job - def query(self, query, job_config=None, job_id=None): + def query(self, query, job_config=None, job_id=None, retry=DEFAULT_RETRY): """Start a job that runs a SQL query. See @@ -826,12 +889,15 @@ def query(self, query, job_config=None, job_id=None): :type job_id: str :param job_id: (Optional) ID to use for the query job. + :type retry: :class:`google.api.core.retry.Retry` + :param retry: (Optional) How to retry the RPC. + :rtype: :class:`google.cloud.bigquery.job.QueryJob` :returns: a new ``QueryJob`` instance """ job_id = _make_job_id(job_id) job = QueryJob(job_id, query, client=self, job_config=job_config) - job.begin() + job.begin(retry=retry) return job def create_rows(self, table, rows, row_ids=None, selected_fields=None, @@ -949,7 +1015,8 @@ def create_rows(self, table, rows, row_ids=None, selected_fields=None, return errors - def query_rows(self, query, job_config=None, job_id=None, timeout=None): + def query_rows(self, query, job_config=None, job_id=None, timeout=None, + retry=DEFAULT_RETRY): """Start a query job and wait for the results. See @@ -983,11 +1050,12 @@ def query_rows(self, query, job_config=None, job_id=None, timeout=None): failed or :class:`TimeoutError` if the job did not complete in the given timeout. """ - job = self.query(query, job_config=job_config, job_id=job_id) + job = self.query( + query, job_config=job_config, job_id=job_id, retry=retry) return job.result(timeout=timeout) def list_rows(self, table, selected_fields=None, max_results=None, - page_token=None, start_index=None): + page_token=None, start_index=None, retry=DEFAULT_RETRY): """List the rows of the table. See @@ -1021,6 +1089,9 @@ def list_rows(self, table, selected_fields=None, max_results=None, :param page_token: (Optional) The zero-based index of the starting row to read. + :type retry: :class:`google.api.core.retry.Retry` + :param retry: (Optional) How to retry the RPC. + :rtype: :class:`~google.api.core.page_iterator.Iterator` :returns: Iterator of row data :class:`tuple`s. During each page, the iterator will have the ``total_rows`` attribute set, @@ -1050,7 +1121,7 @@ def list_rows(self, table, selected_fields=None, max_results=None, iterator = page_iterator.HTTPIterator( client=self, - api_request=self._connection.api_request, + api_request=functools.partial(self._call_api, retry), path='%s/data' % (table.path,), item_to_value=_item_to_row, items_key='rows', @@ -1063,7 +1134,7 @@ def list_rows(self, table, selected_fields=None, max_results=None, iterator._field_to_index = _field_to_index_mapping(schema) return iterator - def list_partitions(self, table): + def list_partitions(self, table, retry=DEFAULT_RETRY): """List the partitions in a table. :type table: One of: @@ -1071,6 +1142,9 @@ def list_partitions(self, table): :class:`~google.cloud.bigquery.table.TableReference` :param table: the table to list, or a reference to it. + :type retry: :class:`google.api.core.retry.Retry` + :param retry: (Optional) How to retry the RPC. + :rtype: list :returns: a list of time partitions """ @@ -1079,7 +1153,8 @@ def list_partitions(self, table): rows = self.query_rows( 'SELECT partition_id from [%s:%s.%s$__PARTITIONS_SUMMARY__]' % (table.project, table.dataset_id, table.table_id), - job_config=config) + job_config=config, + retry=retry) return [row[0] for row in rows] diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/job.py b/packages/google-cloud-bigquery/google/cloud/bigquery/job.py index 593b14e41fa1..65da69956369 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/job.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/job.py @@ -38,6 +38,7 @@ from google.cloud.bigquery._helpers import _EnumApiResourceProperty from google.cloud.bigquery._helpers import _ListApiResourceProperty from google.cloud.bigquery._helpers import _TypedApiResourceProperty +from google.cloud.bigquery._helpers import DEFAULT_RETRY _DONE_STATE = 'DONE' _STOPPED_REASON = 'stopped' @@ -383,7 +384,7 @@ def _get_resource_config(cls, resource): config = resource['configuration'][cls._JOB_TYPE] return job_id, config - def begin(self, client=None): + def begin(self, client=None, retry=DEFAULT_RETRY): """API call: begin the job via a POST request See @@ -394,6 +395,9 @@ def begin(self, client=None): :param client: the client to use. If not passed, falls back to the ``client`` stored on the current dataset. + :type retry: :class:`google.api.core.retry.Retry` + :param retry: (Optional) How to retry the RPC. + :raises: :exc:`ValueError` if the job has already begin. """ if self.state is not None: @@ -402,11 +406,14 @@ def begin(self, client=None): client = self._require_client(client) path = '/projects/%s/jobs' % (self.project,) - api_response = client._connection.api_request( + # jobs.insert is idempotent because we ensure that every new + # job has an ID. + api_response = client._call_api( + retry, method='POST', path=path, data=self._build_resource()) self._set_properties(api_response) - def exists(self, client=None): + def exists(self, client=None, retry=DEFAULT_RETRY): """API call: test for the existence of the job via a GET request See @@ -417,20 +424,24 @@ def exists(self, client=None): :param client: the client to use. If not passed, falls back to the ``client`` stored on the current dataset. + :type retry: :class:`google.api.core.retry.Retry` + :param retry: (Optional) How to retry the RPC. + :rtype: bool :returns: Boolean indicating existence of the job. """ client = self._require_client(client) try: - client._connection.api_request(method='GET', path=self.path, - query_params={'fields': 'id'}) + client._call_api(retry, + method='GET', path=self.path, + query_params={'fields': 'id'}) except NotFound: return False else: return True - def reload(self, client=None): + def reload(self, client=None, retry=DEFAULT_RETRY): """API call: refresh job properties via a GET request. See @@ -440,11 +451,13 @@ def reload(self, client=None): ``NoneType`` :param client: the client to use. If not passed, falls back to the ``client`` stored on the current dataset. + + :type retry: :class:`google.api.core.retry.Retry` + :param retry: (Optional) How to retry the RPC. """ client = self._require_client(client) - api_response = client._connection.api_request( - method='GET', path=self.path) + api_response = client._call_api(retry, method='GET', path=self.path) self._set_properties(api_response) def cancel(self, client=None): @@ -494,16 +507,19 @@ def _set_future_result(self): else: self.set_result(self) - def done(self): + def done(self, retry=DEFAULT_RETRY): """Refresh the job and checks if it is complete. + :type retry: :class:`google.api.core.retry.Retry` + :param retry: (Optional) How to retry the RPC. + :rtype: bool :returns: True if the job is complete, False otherwise. """ # Do not refresh is the state is already done, as the job will not # change once complete. if self.state != _DONE_STATE: - self.reload() + self.reload(retry=retry) return self.state == _DONE_STATE def result(self, timeout=None): @@ -522,6 +538,7 @@ def result(self, timeout=None): """ if self.state is None: self.begin() + # TODO: modify PollingFuture so it can pass a retry argument to done(). return super(_AsyncJob, self).result(timeout=timeout) def cancelled(self): @@ -1830,17 +1847,21 @@ def undeclared_query_paramters(self): return parameters - def query_results(self): + def query_results(self, retry=DEFAULT_RETRY): """Construct a QueryResults instance, bound to this job. + :type retry: :class:`google.api.core.retry.Retry` + :param retry: (Optional) How to retry the RPC. + :rtype: :class:`~google.cloud.bigquery.query.QueryResults` :returns: results instance """ if not self._query_results: - self._query_results = self._client._get_query_results(self.job_id) + self._query_results = self._client._get_query_results( + self.job_id, retry) return self._query_results - def done(self): + def done(self, retry=DEFAULT_RETRY): """Refresh the job and checks if it is complete. :rtype: bool @@ -1849,17 +1870,18 @@ def done(self): # Do not refresh is the state is already done, as the job will not # change once complete. if self.state != _DONE_STATE: - self._query_results = self._client._get_query_results(self.job_id) + self._query_results = self._client._get_query_results( + self.job_id, retry) # Only reload the job once we know the query is complete. # This will ensure that fields such as the destination table are # correctly populated. if self._query_results.complete: - self.reload() + self.reload(retry=retry) return self.state == _DONE_STATE - def result(self, timeout=None): + def result(self, timeout=None, retry=DEFAULT_RETRY): """Start the job and wait for it to complete and get the result. :type timeout: int @@ -1867,6 +1889,9 @@ def result(self, timeout=None): How long to wait for job to complete before raising a :class:`TimeoutError`. + :type retry: :class:`google.api.core.retry.Retry` + :param retry: (Optional) How to retry the call that retrieves rows. + :rtype: :class:`~google.api.core.page_iterator.Iterator` :returns: Iterator of row data :class:`tuple`s. During each page, the @@ -1883,7 +1908,8 @@ def result(self, timeout=None): # Return an iterator instead of returning the job. schema = self.query_results().schema dest_table = self.destination - return self._client.list_rows(dest_table, selected_fields=schema) + return self._client.list_rows(dest_table, selected_fields=schema, + retry=retry) class QueryPlanEntryStep(object): diff --git a/packages/google-cloud-bigquery/tests/unit/test_client.py b/packages/google-cloud-bigquery/tests/unit/test_client.py index bb5517207ffc..6a40a65b390a 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_client.py +++ b/packages/google-cloud-bigquery/tests/unit/test_client.py @@ -68,7 +68,7 @@ def test__get_query_results_miss_w_explicit_project_and_timeout(self): with self.assertRaises(NotFound): client._get_query_results( - 'nothere', project='other-project', timeout_ms=500) + 'nothere', None, project='other-project', timeout_ms=500) self.assertEqual(len(conn._requested), 1) req = conn._requested[0] @@ -110,7 +110,7 @@ def test__get_query_results_hit(self): creds = _make_credentials() client = self._make_one(self.PROJECT, creds) client._connection = _Connection(data) - query_results = client._get_query_results(job_id) + query_results = client._get_query_results(job_id, None) self.assertEqual(query_results.total_rows, 10) self.assertTrue(query_results.complete) @@ -274,6 +274,8 @@ def test_dataset_with_default_project(self): self.assertEqual(dataset.project, self.PROJECT) def test_get_dataset(self): + from google.cloud.exceptions import ServerError + path = 'projects/%s/datasets/%s' % (self.PROJECT, self.DS_ID) creds = _make_credentials() http = object() @@ -297,6 +299,39 @@ def test_get_dataset(self): self.assertEqual(req['path'], '/%s' % path) self.assertEqual(dataset.dataset_id, self.DS_ID) + # Test retry. + + # Not a cloud API exception (missing 'errors' field). + client._connection = _Connection(Exception(''), resource) + with self.assertRaises(Exception): + client.get_dataset(dataset_ref) + + # Zero-length errors field. + client._connection = _Connection(ServerError(''), resource) + with self.assertRaises(ServerError): + client.get_dataset(dataset_ref) + + # Non-retryable reason. + client._connection = _Connection( + ServerError('', errors=[{'reason': 'serious'}]), + resource) + with self.assertRaises(ServerError): + client.get_dataset(dataset_ref) + + # Retryable reason, but retry is disabled. + client._connection = _Connection( + ServerError('', errors=[{'reason': 'backendError'}]), + resource) + with self.assertRaises(ServerError): + client.get_dataset(dataset_ref, retry=None) + + # Retryable reason, default retry: success. + client._connection = _Connection( + ServerError('', errors=[{'reason': 'backendError'}]), + resource) + dataset = client.get_dataset(dataset_ref) + self.assertEqual(dataset.dataset_id, self.DS_ID) + def test_create_dataset_minimal(self): from google.cloud.bigquery.dataset import Dataset @@ -2994,4 +3029,6 @@ def api_request(self, **kw): raise NotFound('miss') response, self._responses = self._responses[0], self._responses[1:] + if isinstance(response, Exception): + raise response return response From 4d543bc1b4e9d771b58de42396f852f32a6bee6d Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Thu, 12 Oct 2017 15:29:35 -0700 Subject: [PATCH 0240/2016] BQ: use a string or list of string for client.extract_table. (#4177) This will match behavior of copy_table and remove the need for special formatting of the keyword arguments. --- .../google/cloud/bigquery/client.py | 34 +++++++------ .../tests/unit/test_client.py | 51 +++++++++++++++++++ 2 files changed, 70 insertions(+), 15 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py index c52fbbda1ba8..d0ec7953ae1f 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py @@ -832,7 +832,9 @@ def copy_table(self, sources, destination, job_id=None, job_config=None, job.begin(retry=retry) return job - def extract_table(self, source, *destination_uris, **kwargs): + def extract_table( + self, source, destination_uris, job_config=None, job_id=None, + retry=DEFAULT_RETRY): """Start a job to extract a table into Cloud Storage files. See @@ -841,7 +843,9 @@ def extract_table(self, source, *destination_uris, **kwargs): :type source: :class:`google.cloud.bigquery.table.TableReference` :param source: table to be extracted. - :type destination_uris: sequence of string + :type destination_uris: One of: + str or + sequence of str :param destination_uris: URIs of Cloud Storage file(s) into which table data is to be extracted; in format ``gs:///``. @@ -849,25 +853,25 @@ def extract_table(self, source, *destination_uris, **kwargs): :type kwargs: dict :param kwargs: Additional keyword arguments. - :Keyword Arguments: - * *job_config* - (:class:`google.cloud.bigquery.job.ExtractJobConfig`) -- - (Optional) Extra configuration options for the extract job. - * *job_id* (``str``) -- - Additional content - (Optional) The ID of the job. - * *retry* (:class:`google.api.core.retry.Retry`) - (Optional) How to retry the RPC. + :type job_id: str + :param job_id: (Optional) The ID of the job. + + :type job_config: :class:`google.cloud.bigquery.job.ExtractJobConfig` + :param job_config: (Optional) Extra configuration options for the job. + + :type retry: :class:`google.api.core.retry.Retry` + :param retry: (Optional) How to retry the RPC. :rtype: :class:`google.cloud.bigquery.job.ExtractJob` :returns: a new ``ExtractJob`` instance """ - job_config = kwargs.get('job_config') - job_id = _make_job_id(kwargs.get('job_id')) - retry = kwargs.get('retry', DEFAULT_RETRY) + job_id = _make_job_id(job_id) + + if isinstance(destination_uris, six.string_types): + destination_uris = [destination_uris] job = ExtractJob( - job_id, source, list(destination_uris), client=self, + job_id, source, destination_uris, client=self, job_config=job_config) job.begin(retry=retry) return job diff --git a/packages/google-cloud-bigquery/tests/unit/test_client.py b/packages/google-cloud-bigquery/tests/unit/test_client.py index 6a40a65b390a..243a892fdb86 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_client.py +++ b/packages/google-cloud-bigquery/tests/unit/test_client.py @@ -1703,6 +1703,57 @@ def test_extract_table_generated_job_id(self): self.assertEqual(job.source, source) self.assertEqual(list(job.destination_uris), [DESTINATION]) + def test_extract_table_w_destination_uris(self): + from google.cloud.bigquery.job import ExtractJob + + JOB = 'job_id' + SOURCE = 'source_table' + DESTINATION1 = 'gs://bucket_name/object_one' + DESTINATION2 = 'gs://bucket_name/object_two' + RESOURCE = { + 'jobReference': { + 'projectId': self.PROJECT, + 'jobId': JOB, + }, + 'configuration': { + 'extract': { + 'sourceTable': { + 'projectId': self.PROJECT, + 'datasetId': self.DS_ID, + 'tableId': SOURCE, + }, + 'destinationUris': [ + DESTINATION1, + DESTINATION2, + ], + }, + }, + } + creds = _make_credentials() + http = object() + client = self._make_one(project=self.PROJECT, credentials=creds, + _http=http) + conn = client._connection = _Connection(RESOURCE) + dataset = client.dataset(self.DS_ID) + source = dataset.table(SOURCE) + + job = client.extract_table( + source, [DESTINATION1, DESTINATION2], job_id=JOB) + + # Check that extract_table actually starts the job. + self.assertEqual(len(conn._requested), 1) + req = conn._requested[0] + self.assertEqual(req['method'], 'POST') + self.assertEqual(req['path'], '/projects/PROJECT/jobs') + + # Check the job resource. + self.assertIsInstance(job, ExtractJob) + self.assertIs(job._client, client) + self.assertEqual(job.job_id, JOB) + self.assertEqual(job.source, source) + self.assertEqual( + list(job.destination_uris), [DESTINATION1, DESTINATION2]) + def test_query_defaults(self): from google.cloud.bigquery.job import QueryJob From 3f35a4254c3d7f6c0ef6a0a7ca93b33647d89598 Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Thu, 12 Oct 2017 17:13:19 -0700 Subject: [PATCH 0241/2016] s/gcloud-common/google-cloud-common/g (#4180) The gcloud-common repo moved to https://github.com/GoogleCloudPlatform/google-cloud-common --- packages/google-cloud-bigquery/README.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/README.rst b/packages/google-cloud-bigquery/README.rst index c25b84c6bebd..dad5783e973a 100644 --- a/packages/google-cloud-bigquery/README.rst +++ b/packages/google-cloud-bigquery/README.rst @@ -27,7 +27,7 @@ learn more. You may also find the `authentication document`_ shared by all the ``google-cloud-*`` libraries to be helpful. .. _Authentication section: https://google-cloud-python.readthedocs.io/en/latest/core/auth.html -.. _authentication document: https://github.com/GoogleCloudPlatform/gcloud-common/tree/master/authentication +.. _authentication document: https://github.com/GoogleCloudPlatform/google-cloud-common/tree/master/authentication Using the API ------------- From 226fc81eb99fd0b7028f7882b35e7a9348e3bf90 Mon Sep 17 00:00:00 2001 From: Jonathan Amsterdam Date: Fri, 13 Oct 2017 16:35:08 -0400 Subject: [PATCH 0242/2016] bigquery: retry Client.create_rows (#4178) --- .../google/cloud/bigquery/client.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py index d0ec7953ae1f..2d74c93d7568 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py @@ -906,7 +906,7 @@ def query(self, query, job_config=None, job_id=None, retry=DEFAULT_RETRY): def create_rows(self, table, rows, row_ids=None, selected_fields=None, skip_invalid_rows=None, ignore_unknown_values=None, - template_suffix=None): + template_suffix=None, retry=DEFAULT_RETRY): """API call: insert table data via a POST request See @@ -958,6 +958,9 @@ def create_rows(self, table, rows, row_ids=None, selected_fields=None, on the schema of the template table. See https://cloud.google.com/bigquery/streaming-data-into-bigquery#template-tables + :type retry: :class:`google.api.core.retry.Retry` + :param retry: (Optional) How to retry the RPC. + :rtype: list of mappings :returns: One mapping per row with insert errors: the "index" key identifies the row, and the "errors" key contains a list @@ -1006,8 +1009,9 @@ def create_rows(self, table, rows, row_ids=None, selected_fields=None, if template_suffix is not None: data['templateSuffix'] = template_suffix - # TODO(jba): use self._call_api here after #4148 is merged. - response = self._connection.api_request( + # We can always retry, because every row has an insert ID. + response = self._call_api( + retry, method='POST', path='%s/insertAll' % table.path, data=data) From 6f6b748c14ac30f94bbbd73edde861eaa1fbe59b Mon Sep 17 00:00:00 2001 From: michaelawyu Date: Fri, 13 Oct 2017 13:46:24 -0700 Subject: [PATCH 0243/2016] Update Docs with Python Setup Guide (#4187) --- packages/google-cloud-bigquery/README.rst | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/packages/google-cloud-bigquery/README.rst b/packages/google-cloud-bigquery/README.rst index dad5783e973a..721ba18a27b2 100644 --- a/packages/google-cloud-bigquery/README.rst +++ b/packages/google-cloud-bigquery/README.rst @@ -18,6 +18,10 @@ Quick Start $ pip install --upgrade google-cloud-bigquery +Fore more information on setting up your Python development environment, such as installing ``pip`` and on your system, please refer to `Python Development Environment Setup Guide`_ for Google Cloud Platform. + +.. _Python Development Environment Setup Guide: https://cloud.google.com/python/setup + Authentication -------------- From a4d08ece0b71ab6d9f53ced08787a63a790b0278 Mon Sep 17 00:00:00 2001 From: Jonathan Amsterdam Date: Sat, 14 Oct 2017 07:10:15 -0400 Subject: [PATCH 0244/2016] bigquery: classes to support external table config (#4182) * bigquery: classes to support external table config These classes will be used to configure external table definitions for queries (transient external tables) as well as external data configurations for tables (permanent external tables). This PR just establishes the classes. Subsequent PRs will wire them into query jobs and tables. --- .../google/cloud/bigquery/__init__.py | 12 + .../google/cloud/bigquery/external_config.py | 491 ++++++++++++++++++ .../tests/unit/test_external_config.py | 228 ++++++++ 3 files changed, 731 insertions(+) create mode 100644 packages/google-cloud-bigquery/google/cloud/bigquery/external_config.py create mode 100644 packages/google-cloud-bigquery/tests/unit/test_external_config.py diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/__init__.py b/packages/google-cloud-bigquery/google/cloud/bigquery/__init__.py index 545e0cde265a..4c3fcd7b3be0 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/__init__.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/__init__.py @@ -43,6 +43,12 @@ from google.cloud.bigquery.schema import SchemaField from google.cloud.bigquery.table import Table from google.cloud.bigquery.table import TableReference +from google.cloud.bigquery.external_config import ExternalConfig +from google.cloud.bigquery.external_config import BigtableOptions +from google.cloud.bigquery.external_config import BigtableColumnFamily +from google.cloud.bigquery.external_config import BigtableColumn +from google.cloud.bigquery.external_config import CSVOptions +from google.cloud.bigquery.external_config import GoogleSheetsOptions __all__ = [ '__version__', @@ -63,4 +69,10 @@ 'TableReference', 'UDFResource', 'DEFAULT_RETRY', + 'ExternalConfig', + 'BigtableOptions', + 'BigtableColumnFamily', + 'BigtableColumn', + 'CSVOptions', + 'GoogleSheetsOptions', ] diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/external_config.py b/packages/google-cloud-bigquery/google/cloud/bigquery/external_config.py new file mode 100644 index 000000000000..9177595da67c --- /dev/null +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/external_config.py @@ -0,0 +1,491 @@ +# Copyright 2017 Google Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Define classes that describe external data sources. + + These are used for both Table.externalDataConfiguration and + Job.configuration.query.tableDefinitions. +""" + +from __future__ import absolute_import + +import base64 +import copy + +import six + +from google.cloud.bigquery._helpers import _to_bytes +from google.cloud.bigquery._helpers import _bytes_to_json +from google.cloud.bigquery._helpers import _TypedApiResourceProperty +from google.cloud.bigquery._helpers import _ListApiResourceProperty +from google.cloud.bigquery.schema import SchemaField +from google.cloud.bigquery.table import _build_schema_resource +from google.cloud.bigquery.table import _parse_schema_resource +from google.cloud.bigquery.job import _int_or_none + + +class ExternalConfig(object): + """Description of an external data source. + + :type source_format: str + :param source_format: the format of the external data. See + the ``source_format`` property on this class. + """ + + def __init__(self, source_format): + self._properties = {'sourceFormat': source_format} + self._options = None + + @property + def source_format(self): + """See + https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query.tableDefinitions.(key).sourceFormat + https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#externalDataConfiguration.sourceFormat + """ + return self._properties['sourceFormat'] + + autodetect = _TypedApiResourceProperty( + 'autodetect', 'autodetect', bool) + """See + https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query.tableDefinitions.(key).autodetect + https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#externalDataConfiguration.autodetect + """ + + compression = _TypedApiResourceProperty( + 'compression', 'compression', six.string_types) + """See + https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query.tableDefinitions.(key).compression + https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#externalDataConfiguration.compression + """ + + ignore_unknown_values = _TypedApiResourceProperty( + 'ignore_unknown_values', 'ignoreUnknownValues', bool) + """See + https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query.tableDefinitions.(key).ignoreUnknownValues + https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#externalDataConfiguration.ignoreUnknownValues + """ + + max_bad_records = _TypedApiResourceProperty( + 'max_bad_records', 'maxBadRecords', six.integer_types) + """See + https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query.tableDefinitions.(key).maxBadRecords + https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#externalDataConfiguration.maxBadRecords + """ + + source_uris = _ListApiResourceProperty( + 'source_uris', 'sourceUris', six.string_types) + """See + https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query.tableDefinitions.(key).sourceUris + https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#externalDataConfiguration.sourceUris + """ + + schema = _ListApiResourceProperty('schema', 'schema', SchemaField) + """See + https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query.tableDefinitions.(key).schema + https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#externalDataConfiguration.schema + """ + + @property + def options(self): + """Source-specific options. A subclass of ExternalConfigOptions.""" + return self._options + + @options.setter + def options(self, value): + if self.source_format != value._SOURCE_FORMAT: + raise ValueError( + 'source format %s does not match option type %s' % ( + self.source_format, value.__class__.__name__)) + self._options = value + + def to_api_repr(self): + """Build an API representation of this object. + + :rtype: dict + :returns: A dictionary in the format used by the BigQuery API. + """ + config = copy.deepcopy(self._properties) + if self.schema: + config['schema'] = {'fields': _build_schema_resource(self.schema)} + if self.options is not None: + config[self.options._RESOURCE_NAME] = self.options.to_api_repr() + return config + + @classmethod + def from_api_repr(cls, resource): + """Factory: construct a CSVOptions given its API representation + + :type resource: dict + :param resource: + An extract job configuration in the same representation as is + returned from the API. + + :rtype: :class:`google.cloud.bigquery.external_config.CSVOptions` + :returns: Configuration parsed from ``resource``. + """ + config = cls(resource['sourceFormat']) + schema = resource.pop('schema', None) + for optcls in (BigtableOptions, CSVOptions, GoogleSheetsOptions): + opts = resource.pop(optcls._RESOURCE_NAME, None) + if opts is not None: + config.options = optcls.from_api_repr(opts) + break + config._properties = copy.deepcopy(resource) + if schema: + config.schema = _parse_schema_resource(schema) + return config + + +class BigtableColumn(object): + """Options for a Bigtable column.""" + + def __init__(self): + self._properties = {} + + encoding = _TypedApiResourceProperty( + 'encoding', 'encoding', six.string_types) + """See + https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query.tableDefinitions.(key).bigtableOptions.columnFamilies.columns.encoding + https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#externalDataConfiguration.bigtableOptions.columnFamilies.columns.encoding + """ + + field_name = _TypedApiResourceProperty( + 'field_name', 'fieldName', six.string_types) + """See + https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query.tableDefinitions.(key).bigtableOptions.columnFamilies.columns.field_name + https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#externalDataConfiguration.bigtableOptions.columnFamilies.columns.field_name + """ + + only_read_latest = _TypedApiResourceProperty( + 'only_read_latest', 'onlyReadLatest', bool) + """See + https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query.tableDefinitions.(key).bigtableOptions.columnFamilies.columns.only_read_latest + https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#externalDataConfiguration.bigtableOptions.columnFamilies.columns.only_read_latest + """ + + qualifier_encoded = _TypedApiResourceProperty( + 'qualifier_encoded', 'qualifierEncoded', six.binary_type) + """The qualifier encoded in binary. The type is ``str`` (Python 2.x) or + ``bytes`` (Python 3.x). The module will handle base64 encoding for you. + + See + https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query.tableDefinitions.(key).bigtableOptions.columnFamilies.columns.qualifier_encoded + https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#externalDataConfiguration.bigtableOptions.columnFamilies.columns.qualifier_encoded + """ + + qualifier_string = _TypedApiResourceProperty( + 'qualifier_string', 'qualifierString', six.string_types) + """See + https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query.tableDefinitions.(key).bigtableOptions.columnFamilies.columns.qualifier_string + https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#externalDataConfiguration.bigtableOptions.columnFamilies.columns.qualifier_string + """ + + type_ = _TypedApiResourceProperty('type_', 'type', six.string_types) + """See + https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query.tableDefinitions.(key).bigtableOptions.columnFamilies.columns.type + https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#externalDataConfiguration.bigtableOptions.columnFamilies.columns.type + """ + + def to_api_repr(self): + """Build an API representation of this object. + + :rtype: dict + :returns: A dictionary in the format used by the BigQuery API. + """ + config = copy.deepcopy(self._properties) + qe = config.get('qualifierEncoded') + if qe is not None: + config['qualifierEncoded'] = _bytes_to_json(qe) + return config + + @classmethod + def from_api_repr(cls, resource): + """Factory: construct a BigtableColumn given its API representation + + :type resource: dict + :param resource: + A column in the same representation as is returned from the API. + + :rtype: :class:`google.cloud.bigquery.external_config.BigtableColumn` + :returns: Configuration parsed from ``resource``. + """ + qe = resource.pop('qualifierEncoded', None) + config = cls() + config._properties = copy.deepcopy(resource) + if qe: + config.qualifier_encoded = base64.standard_b64decode(_to_bytes(qe)) + return config + + +class BigtableColumnFamily(object): + """Options for a Bigtable column family.""" + + def __init__(self): + self._properties = {} + + encoding = _TypedApiResourceProperty( + 'encoding', 'encoding', six.string_types) + """See + https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query.tableDefinitions.(key).bigtableOptions.columnFamilies.encoding + https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#externalDataConfiguration.bigtableOptions.columnFamilies.encoding + """ + + family_id = _TypedApiResourceProperty( + 'family_id', 'familyId', six.string_types) + """See + https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query.tableDefinitions.(key).bigtableOptions.columnFamilies.familyId + https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#externalDataConfiguration.bigtableOptions.columnFamilies.familyId + """ + + only_read_latest = _TypedApiResourceProperty( + 'only_read_latest', 'onlyReadLatest', bool) + """See + https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query.tableDefinitions.(key).bigtableOptions.columnFamilies.onlyReadLatest + https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#externalDataConfiguration.bigtableOptions.columnFamilies.onlyReadLatest + """ + + type_ = _TypedApiResourceProperty('type_', 'type', six.string_types) + """See + https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query.tableDefinitions.(key).bigtableOptions.columnFamilies.type + https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#externalDataConfiguration.bigtableOptions.columnFamilies.type + """ + + columns = _ListApiResourceProperty( + 'columns', 'columns', BigtableColumn) + """See + https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query.tableDefinitions.(key).bigtableOptions.columnFamilies.columns + https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#externalDataConfiguration.bigtableOptions.columnFamilies.columns + """ + + def to_api_repr(self): + """Build an API representation of this object. + + :rtype: dict + :returns: A dictionary in the format used by the BigQuery API. + """ + config = copy.deepcopy(self._properties) + config['columns'] = [c.to_api_repr() for c in config['columns']] + return config + + @classmethod + def from_api_repr(cls, resource): + """Factory: construct a BigtableColumnFamily given its + API representation + + :type resource: dict + :param resource: + A column family in the same representation as is returned + from the API. + + :rtype: + :class:`google.cloud.bigquery.external_config.BigtableColumnFamily` + :returns: Configuration parsed from ``resource``. + """ + config = cls() + config._properties = copy.deepcopy(resource) + config.columns = [BigtableColumn.from_api_repr(c) + for c in resource['columns']] + return config + + +class BigtableOptions(object): + """Options that describe how to treat Bigtable tables + as BigQuery tables.""" + + _SOURCE_FORMAT = 'BIGTABLE' + _RESOURCE_NAME = 'bigtableOptions' + + def __init__(self): + self._properties = {} + + ignore_unspecified_column_families = _TypedApiResourceProperty( + 'ignore_unspecified_column_families', + 'ignoreUnspecifiedColumnFamilies', bool) + """See + https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query.tableDefinitions.(key).bigtableOptions.ignoreUnspecifiedColumnFamilies + https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#externalDataConfiguration.bigtableOptions.ignoreUnspecifiedColumnFamilies + """ + + read_rowkey_as_string = _TypedApiResourceProperty( + 'read_rowkey_as_string', 'readRowkeyAsString', bool) + """See + https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query.tableDefinitions.(key).bigtableOptions.readRowkeyAsString + https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#externalDataConfiguration.bigtableOptions.readRowkeyAsString + """ + + column_families = _ListApiResourceProperty( + 'column_families', 'columnFamilies', BigtableColumnFamily) + """See + https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query.tableDefinitions.(key).bigtableOptions.columnFamilies + https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#externalDataConfiguration.bigtableOptions.columnFamilies + """ + + def to_api_repr(self): + """Build an API representation of this object. + + :rtype: dict + :returns: A dictionary in the format used by the BigQuery API. + """ + config = copy.deepcopy(self._properties) + config['columnFamilies'] = [cf.to_api_repr() + for cf in config['columnFamilies']] + return config + + @classmethod + def from_api_repr(cls, resource): + """Factory: construct a BigtableOptions given its API representation + + :type resource: dict + :param resource: + A BigtableOptions in the same representation as is returned + from the API. + + :rtype: :class:`google.cloud.bigquery.external_config.BigtableOptions` + :returns: Configuration parsed from ``resource``. + """ + config = cls() + config._properties = copy.deepcopy(resource) + config.column_families = [BigtableColumnFamily.from_api_repr(cf) + for cf in resource['columnFamilies']] + return config + + +class CSVOptions(object): + """Options that describe how to treat CSV files as BigQuery tables.""" + + _SOURCE_FORMAT = 'CSV' + _RESOURCE_NAME = 'csvOptions' + + def __init__(self): + self._properties = {} + + allow_jagged_rows = _TypedApiResourceProperty( + 'allow_jagged_rows', 'allowJaggedRows', bool) + """See + https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query.tableDefinitions.(key).csvOptions.allowJaggedRows + https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#externalDataConfiguration.csvOptions.allowJaggedRows + """ + + allow_quoted_newlines = _TypedApiResourceProperty( + 'allow_quoted_newlines', 'allowQuotedNewlines', bool) + """See + https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query.tableDefinitions.(key).csvOptions.allowQuotedNewlines + https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#externalDataConfiguration.csvOptions.allowQuotedNewlines + """ + + encoding = _TypedApiResourceProperty( + 'encoding', 'encoding', six.string_types) + """See + https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query.tableDefinitions.(key).csvOptions.encoding + https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#externalDataConfiguration.csvOptions.encoding + """ + + field_delimiter = _TypedApiResourceProperty( + 'field_delimiter', 'fieldDelimiter', six.string_types) + """See + https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query.tableDefinitions.(key).csvOptions.fieldDelimiter + https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#externalDataConfiguration.csvOptions.fieldDelimiter + """ + + quote_character = _TypedApiResourceProperty( + 'quote_character', 'quote', six.string_types) + """See + https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query.tableDefinitions.(key).csvOptions.quote + https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#externalDataConfiguration.csvOptions.quote + """ + + skip_leading_rows = _TypedApiResourceProperty( + 'skip_leading_rows', 'skipLeadingRows', six.integer_types) + """See + https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query.tableDefinitions.(key).csvOptions.skipLeadingRows + https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#externalDataConfiguration.csvOptions.skipLeadingRows + """ + + def to_api_repr(self): + """Build an API representation of this object. + + :rtype: dict + :returns: A dictionary in the format used by the BigQuery API. + """ + config = copy.deepcopy(self._properties) + slr = config.pop('skipLeadingRows', None) + if slr is not None: + config['skipLeadingRows'] = str(slr) + return config + + @classmethod + def from_api_repr(cls, resource): + """Factory: construct a CSVOptions given its API representation + + :type resource: dict + :param resource: + A CSVOptions in the same representation as is + returned from the API. + + :rtype: :class:`google.cloud.bigquery.external_config.CSVOptions` + :returns: Configuration parsed from ``resource``. + """ + slr = resource.pop('skipLeadingRows', None) + config = cls() + config._properties = copy.deepcopy(resource) + config.skip_leading_rows = _int_or_none(slr) + return config + + +class GoogleSheetsOptions(object): + """Options that describe how to treat Google Sheets as BigQuery tables.""" + + _SOURCE_FORMAT = 'GOOGLE_SHEETS' + _RESOURCE_NAME = 'googleSheetsOptions' + + def __init__(self): + self._properties = {} + + skip_leading_rows = _TypedApiResourceProperty( + 'skip_leading_rows', 'skipLeadingRows', six.integer_types) + """See + https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query.tableDefinitions.(key).googleSheetsOptions.skipLeadingRows + https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#externalDataConfiguration.googleSheetsOptions.skipLeadingRows + """ + + def to_api_repr(self): + """Build an API representation of this object. + + :rtype: dict + :returns: A dictionary in the format used by the BigQuery API. + """ + config = copy.deepcopy(self._properties) + slr = config.pop('skipLeadingRows', None) + if slr is not None: + config['skipLeadingRows'] = str(slr) + return config + + @classmethod + def from_api_repr(cls, resource): + """Factory: construct a GoogleSheetsOptions given its API representation + + :type resource: dict + :param resource: + An GoogleSheetsOptions in the same representation as is + returned from the API. + + :rtype: + :class:`google.cloud.bigquery.external_config.GoogleSheetsOptions` + :returns: Configuration parsed from ``resource``. + """ + slr = resource.pop('skipLeadingRows', None) + config = cls() + config._properties = copy.deepcopy(resource) + config.skip_leading_rows = _int_or_none(slr) + return config diff --git a/packages/google-cloud-bigquery/tests/unit/test_external_config.py b/packages/google-cloud-bigquery/tests/unit/test_external_config.py new file mode 100644 index 000000000000..6768093ed0b3 --- /dev/null +++ b/packages/google-cloud-bigquery/tests/unit/test_external_config.py @@ -0,0 +1,228 @@ +# Copyright 2017 Google Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import base64 +import copy +import unittest + +from google.cloud.bigquery.external_config import ExternalConfig + + +class TestExternalConfig(unittest.TestCase): + + SOURCE_URIS = ['gs://foo', 'gs://bar'] + + BASE_RESOURCE = { + 'sourceFormat': '', + 'sourceUris': SOURCE_URIS, + 'maxBadRecords': 17, + 'autodetect': True, + 'ignoreUnknownValues': False, + 'compression': 'compression', + } + + def test_api_repr_base(self): + from google.cloud.bigquery.schema import SchemaField + + resource = copy.deepcopy(self.BASE_RESOURCE) + ec = ExternalConfig.from_api_repr(resource) + self._verify_base(ec) + self.assertEqual(ec.schema, []) + self.assertIsNone(ec.options) + + got_resource = ec.to_api_repr() + self.assertEqual(got_resource, self.BASE_RESOURCE) + + resource = _copy_and_update(self.BASE_RESOURCE, { + 'schema': { + 'fields': [ + { + 'name': 'full_name', + 'type': 'STRING', + 'mode': 'REQUIRED', + }, + ], + }, + }) + want_resource = copy.deepcopy(resource) + ec = ExternalConfig.from_api_repr(resource) + self._verify_base(ec) + self.assertEqual(ec.schema, + [SchemaField('full_name', 'STRING', mode='REQUIRED')]) + self.assertIsNone(ec.options) + + got_resource = ec.to_api_repr() + self.assertEqual(got_resource, want_resource) + + def _verify_base(self, ec): + self.assertEqual(ec.autodetect, True) + self.assertEqual(ec.compression, 'compression') + self.assertEqual(ec.ignore_unknown_values, False) + self.assertEqual(ec.max_bad_records, 17) + self.assertEqual(ec.source_uris, self.SOURCE_URIS) + + def test_to_api_repr_source_format(self): + ec = ExternalConfig('CSV') + got = ec.to_api_repr() + want = {'sourceFormat': 'CSV'} + self.assertEqual(got, want) + + def test_api_repr_sheets(self): + from google.cloud.bigquery.external_config import GoogleSheetsOptions + + resource = _copy_and_update(self.BASE_RESOURCE, { + 'sourceFormat': 'GOOGLE_SHEETS', + 'googleSheetsOptions': {'skipLeadingRows': '123'}, + }) + want_resource = copy.deepcopy(resource) + + ec = ExternalConfig.from_api_repr(resource) + + self._verify_base(ec) + self.assertEqual(ec.source_format, 'GOOGLE_SHEETS') + self.assertIsInstance(ec.options, GoogleSheetsOptions) + self.assertEqual(ec.options.skip_leading_rows, 123) + + got_resource = ec.to_api_repr() + + self.assertEqual(got_resource, want_resource) + + del want_resource['googleSheetsOptions']['skipLeadingRows'] + ec = ExternalConfig.from_api_repr(copy.deepcopy(want_resource)) + self.assertIsNone(ec.options.skip_leading_rows) + got_resource = ec.to_api_repr() + self.assertEqual(got_resource, want_resource) + + def test_api_repr_csv(self): + from google.cloud.bigquery.external_config import CSVOptions + + resource = _copy_and_update(self.BASE_RESOURCE, { + 'sourceFormat': 'CSV', + 'csvOptions': { + 'fieldDelimiter': 'fieldDelimiter', + 'skipLeadingRows': '123', + 'quote': 'quote', + 'allowQuotedNewlines': True, + 'allowJaggedRows': False, + 'encoding': 'encoding', + }, + }) + want_resource = copy.deepcopy(resource) + + ec = ExternalConfig.from_api_repr(resource) + + self._verify_base(ec) + self.assertEqual(ec.source_format, 'CSV') + self.assertIsInstance(ec.options, CSVOptions) + self.assertEqual(ec.options.field_delimiter, 'fieldDelimiter') + self.assertEqual(ec.options.skip_leading_rows, 123) + self.assertEqual(ec.options.quote_character, 'quote') + self.assertEqual(ec.options.allow_quoted_newlines, True) + self.assertEqual(ec.options.allow_jagged_rows, False) + self.assertEqual(ec.options.encoding, 'encoding') + + got_resource = ec.to_api_repr() + + self.assertEqual(got_resource, want_resource) + + del want_resource['csvOptions']['skipLeadingRows'] + ec = ExternalConfig.from_api_repr(copy.deepcopy(want_resource)) + self.assertIsNone(ec.options.skip_leading_rows) + got_resource = ec.to_api_repr() + self.assertEqual(got_resource, want_resource) + + def test_api_repr_bigtable(self): + from google.cloud.bigquery.external_config import BigtableOptions + from google.cloud.bigquery.external_config import BigtableColumnFamily + + QUALIFIER_ENCODED = base64.standard_b64encode(b'q').decode('ascii') + resource = _copy_and_update(self.BASE_RESOURCE, { + 'sourceFormat': 'BIGTABLE', + 'bigtableOptions': { + 'ignoreUnspecifiedColumnFamilies': True, + 'readRowkeyAsString': False, + 'columnFamilies': [ + { + 'familyId': 'familyId', + 'type': 'type', + 'encoding': 'encoding', + 'columns': [ + { + 'qualifierString': 'q', + 'fieldName': 'fieldName1', + 'type': 'type1', + 'encoding': 'encoding1', + 'onlyReadLatest': True, + }, + { + 'qualifierEncoded': QUALIFIER_ENCODED, + 'fieldName': 'fieldName2', + 'type': 'type2', + 'encoding': 'encoding2', + }, + + ], + 'onlyReadLatest': False, + } + ], + }, + }) + want_resource = copy.deepcopy(resource) + + ec = ExternalConfig.from_api_repr(resource) + + self._verify_base(ec) + self.assertEqual(ec.source_format, 'BIGTABLE') + self.assertIsInstance(ec.options, BigtableOptions) + self.assertEqual(ec.options.ignore_unspecified_column_families, True) + self.assertEqual(ec.options.read_rowkey_as_string, False) + self.assertEqual(len(ec.options.column_families), 1) + fam1 = ec.options.column_families[0] + self.assertIsInstance(fam1, BigtableColumnFamily) + self.assertEqual(fam1.family_id, 'familyId') + self.assertEqual(fam1.type_, 'type') + self.assertEqual(fam1.encoding, 'encoding') + self.assertEqual(len(fam1.columns), 2) + col1 = fam1.columns[0] + self.assertEqual(col1.qualifier_string, 'q') + self.assertEqual(col1.field_name, 'fieldName1') + self.assertEqual(col1.type_, 'type1') + self.assertEqual(col1.encoding, 'encoding1') + col2 = ec.options.column_families[0].columns[1] + self.assertEqual(col2.qualifier_encoded, b'q') + self.assertEqual(col2.field_name, 'fieldName2') + self.assertEqual(col2.type_, 'type2') + self.assertEqual(col2.encoding, 'encoding2') + + got_resource = ec.to_api_repr() + + self.assertEqual(got_resource, want_resource) + + def test_option_mismatch(self): + from google.cloud.bigquery.external_config import CSVOptions + from google.cloud.bigquery.external_config import BigtableOptions + from google.cloud.bigquery.external_config import GoogleSheetsOptions + + for source_format, opts in (('BIGTABLE', CSVOptions()), + ('CSV', GoogleSheetsOptions()), + ('GOOGLE_SHEETS', BigtableOptions())): + ec = ExternalConfig(source_format) + with self.assertRaises(ValueError): + ec.options = opts + + +def _copy_and_update(d, u): + d = copy.deepcopy(d) + d.update(u) + return d From 8ec182c4844ddadbec6f73060405753c5a6b3106 Mon Sep 17 00:00:00 2001 From: Alix Hamilton Date: Mon, 16 Oct 2017 09:08:31 -0700 Subject: [PATCH 0245/2016] BigQuery: adds client.create_rows_json (#4189) Addresses client library requirement to "provide an option for JSON as argument" --- .../google/cloud/bigquery/client.py | 96 +++++++++++++------ .../google-cloud-bigquery/tests/system.py | 35 ++++--- .../tests/unit/test_client.py | 55 +++++++++++ 3 files changed, 137 insertions(+), 49 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py index 2d74c93d7568..da3b8dcaaea5 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py @@ -904,9 +904,7 @@ def query(self, query, job_config=None, job_id=None, retry=DEFAULT_RETRY): job.begin(retry=retry) return job - def create_rows(self, table, rows, row_ids=None, selected_fields=None, - skip_invalid_rows=None, ignore_unknown_values=None, - template_suffix=None, retry=DEFAULT_RETRY): + def create_rows(self, table, rows, selected_fields=None, **kwargs): """API call: insert table data via a POST request See @@ -928,15 +926,73 @@ def create_rows(self, table, rows, row_ids=None, selected_fields=None, include all required fields in the schema. Keys which do not correspond to a field in the schema are ignored. - :type row_ids: list of string - :param row_ids: (Optional) Unique ids, one per row being inserted. - If omitted, unique IDs are created. - :type selected_fields: list of :class:`SchemaField` :param selected_fields: The fields to return. Required if ``table`` is a :class:`~google.cloud.bigquery.table.TableReference`. + :type kwargs: dict + :param kwargs: Keyword arguments to + `~google.cloud.bigquery.client.Client.create_rows_json` + + :rtype: list of mappings + :returns: One mapping per row with insert errors: the "index" key + identifies the row, and the "errors" key contains a list + of the mappings describing one or more problems with the + row. + :raises: ValueError if table's schema is not set + """ + if selected_fields is not None: + schema = selected_fields + elif isinstance(table, TableReference): + raise ValueError('need selected_fields with TableReference') + elif isinstance(table, Table): + if len(table._schema) == 0: + raise ValueError(_TABLE_HAS_NO_SCHEMA) + schema = table.schema + else: + raise TypeError('table should be Table or TableReference') + + json_rows = [] + + for index, row in enumerate(rows): + if isinstance(row, dict): + row = _row_from_mapping(row, schema) + json_row = {} + + for field, value in zip(schema, row): + converter = _SCALAR_VALUE_TO_JSON_ROW.get(field.field_type) + if converter is not None: # STRING doesn't need converting + value = converter(value) + json_row[field.name] = value + + json_rows.append(json_row) + + return self.create_rows_json(table, json_rows, **kwargs) + + def create_rows_json(self, table, json_rows, row_ids=None, + skip_invalid_rows=None, ignore_unknown_values=None, + template_suffix=None, retry=DEFAULT_RETRY): + """API call: insert table data via a POST request + + See + https://cloud.google.com/bigquery/docs/reference/rest/v2/tabledata/insertAll + + :type table: One of: + :class:`~google.cloud.bigquery.table.Table` + :class:`~google.cloud.bigquery.table.TableReference` + :param table: the destination table for the row data, or a reference + to it. + + :type json_rows: list of dictionaries + :param json_rows: Row data to be inserted. Keys must match the table + schema fields and values must be JSON-compatible + representations. + + :type row_ids: list of string + :param row_ids: (Optional) Unique ids, one per row being inserted. + If omitted, unique IDs are created. + :type skip_invalid_rows: bool :param skip_invalid_rows: (Optional) Insert all valid rows of a request, even if invalid rows exist. @@ -966,34 +1022,12 @@ def create_rows(self, table, rows, row_ids=None, selected_fields=None, identifies the row, and the "errors" key contains a list of the mappings describing one or more problems with the row. - :raises: ValueError if table's schema is not set """ - if selected_fields is not None: - schema = selected_fields - elif isinstance(table, TableReference): - raise ValueError('need selected_fields with TableReference') - elif isinstance(table, Table): - if len(table._schema) == 0: - raise ValueError(_TABLE_HAS_NO_SCHEMA) - schema = table.schema - else: - raise TypeError('table should be Table or TableReference') - rows_info = [] data = {'rows': rows_info} - for index, row in enumerate(rows): - if isinstance(row, dict): - row = _row_from_mapping(row, schema) - row_info = {} - - for field, value in zip(schema, row): - converter = _SCALAR_VALUE_TO_JSON_ROW.get(field.field_type) - if converter is not None: # STRING doesn't need converting - value = converter(value) - row_info[field.name] = value - - info = {'json': row_info} + for index, row in enumerate(json_rows): + info = {'json': row} if row_ids is not None: info['insertId'] = row_ids[index] else: diff --git a/packages/google-cloud-bigquery/tests/system.py b/packages/google-cloud-bigquery/tests/system.py index 2c62e5efa9b0..4cb97c6601f4 100644 --- a/packages/google-cloud-bigquery/tests/system.py +++ b/packages/google-cloud-bigquery/tests/system.py @@ -339,7 +339,7 @@ def test_create_rows_then_dump_table(self): self.to_delete.insert(0, table) self.assertTrue(_table_exists(table)) - errors = Config.CLIENT.create_rows(table, ROWS, ROW_IDS) + errors = Config.CLIENT.create_rows(table, ROWS, row_ids=ROW_IDS) self.assertEqual(len(errors), 0) rows = () @@ -1356,11 +1356,9 @@ def test_create_table_rows_fetch_nested_schema(self): json_filename = os.path.join(WHERE, 'data', 'characters.jsonl') with open(json_filename) as rows_file: for line in rows_file: - mapping = json.loads(line) - to_insert.append( - tuple(mapping[field.name] for field in schema)) + to_insert.append(json.loads(line)) - errors = Config.CLIENT.create_rows(table, to_insert) + errors = Config.CLIENT.create_rows_json(table, to_insert) self.assertEqual(len(errors), 0) retry = RetryResult(_has_rows, max_tries=8) @@ -1369,14 +1367,14 @@ def test_create_table_rows_fetch_nested_schema(self): self.assertEqual(len(fetched), len(to_insert)) - for found, expected in zip(sorted(fetched_tuples), sorted(to_insert)): - self.assertEqual(found[0], expected[0]) # Name - self.assertEqual(found[1], int(expected[1])) # Age - self.assertEqual(found[2], expected[2]) # Weight - self.assertEqual(found[3], expected[3]) # IsMagic + for found, expected in zip(sorted(fetched_tuples), to_insert): + self.assertEqual(found[0], expected['Name']) + self.assertEqual(found[1], int(expected['Age'])) + self.assertEqual(found[2], expected['Weight']) + self.assertEqual(found[3], expected['IsMagic']) - self.assertEqual(len(found[4]), len(expected[4])) # Spells - for f_spell, e_spell in zip(found[4], expected[4]): + self.assertEqual(len(found[4]), len(expected['Spells'])) + for f_spell, e_spell in zip(found[4], expected['Spells']): self.assertEqual(f_spell['Name'], e_spell['Name']) parts = time.strptime( e_spell['LastUsed'], '%Y-%m-%d %H:%M:%S UTC') @@ -1390,17 +1388,18 @@ def test_create_table_rows_fetch_nested_schema(self): e_spell['Icon'].encode('ascii')) self.assertEqual(f_spell['Icon'], e_icon) - parts = time.strptime(expected[5], '%H:%M:%S') + parts = time.strptime(expected['TeaTime'], '%H:%M:%S') e_teatime = datetime.time(*parts[3:6]) - self.assertEqual(found[5], e_teatime) # TeaTime + self.assertEqual(found[5], e_teatime) - parts = time.strptime(expected[6], '%Y-%m-%d') + parts = time.strptime(expected['NextVacation'], '%Y-%m-%d') e_nextvac = datetime.date(*parts[0:3]) - self.assertEqual(found[6], e_nextvac) # NextVacation + self.assertEqual(found[6], e_nextvac) - parts = time.strptime(expected[7], '%Y-%m-%dT%H:%M:%S') + parts = time.strptime(expected['FavoriteTime'], + '%Y-%m-%dT%H:%M:%S') e_favtime = datetime.datetime(*parts[0:6]) - self.assertEqual(found[7], e_favtime) # FavoriteTime + self.assertEqual(found[7], e_favtime) def temp_dataset(self, dataset_id): dataset = retry_403(Config.CLIENT.create_dataset)( diff --git a/packages/google-cloud-bigquery/tests/unit/test_client.py b/packages/google-cloud-bigquery/tests/unit/test_client.py index 243a892fdb86..1ce9c2158a59 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_client.py +++ b/packages/google-cloud-bigquery/tests/unit/test_client.py @@ -2283,6 +2283,61 @@ def test_create_rows_errors(self): with self.assertRaises(TypeError): client.create_rows(1, ROWS) + def test_create_rows_json(self): + from google.cloud.bigquery.table import Table, SchemaField + from google.cloud.bigquery.dataset import DatasetReference + + PROJECT = 'PROJECT' + DS_ID = 'DS_ID' + TABLE_ID = 'TABLE_ID' + PATH = 'projects/%s/datasets/%s/tables/%s/insertAll' % ( + PROJECT, DS_ID, TABLE_ID) + creds = _make_credentials() + http = object() + client = self._make_one(project=PROJECT, credentials=creds, _http=http) + conn = client._connection = _Connection({}) + table_ref = DatasetReference(PROJECT, DS_ID).table(TABLE_ID) + schema = [ + SchemaField('full_name', 'STRING', mode='REQUIRED'), + SchemaField('age', 'INTEGER', mode='REQUIRED'), + SchemaField('joined', 'TIMESTAMP', mode='NULLABLE'), + ] + table = Table(table_ref, schema=schema) + ROWS = [ + { + 'full_name': 'Phred Phlyntstone', 'age': '32', + 'joined': '2015-07-24T19:53:19.006000Z' + }, + { + 'full_name': 'Bharney Rhubble', 'age': '33', + 'joined': 1437767600.006 + }, + { + 'full_name': 'Wylma Phlyntstone', 'age': '29', + 'joined': 1437767601.006 + }, + { + 'full_name': 'Bhettye Rhubble', 'age': '27', 'joined': None + }, + ] + + SENT = { + 'rows': [{ + 'json': row, + 'insertId': str(i), + } for i, row in enumerate(ROWS)], + } + + with mock.patch('uuid.uuid4', side_effect=map(str, range(len(ROWS)))): + errors = client.create_rows_json(table, ROWS) + + self.assertEqual(len(errors), 0) + self.assertEqual(len(conn._requested), 1) + req = conn._requested[0] + self.assertEqual(req['method'], 'POST') + self.assertEqual(req['path'], '/%s' % PATH) + self.assertEqual(req['data'], SENT) + def test_query_rows_defaults(self): from google.api.core.page_iterator import HTTPIterator from google.cloud.bigquery._helpers import Row From 8cfd145bcfb32e1400988be5cb40880a6aba22c4 Mon Sep 17 00:00:00 2001 From: Jonathan Amsterdam Date: Mon, 16 Oct 2017 13:03:38 -0400 Subject: [PATCH 0246/2016] bigquery: make views default to standard SQL (#4192) --- .../google/cloud/bigquery/table.py | 37 ++++++++++++------- .../tests/unit/test_client.py | 3 +- .../tests/unit/test_table.py | 11 ++++-- 3 files changed, 33 insertions(+), 18 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py index 990349336433..d4c6977c8755 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py @@ -463,6 +463,9 @@ def location(self, value): def view_query(self): """SQL query defining the table as a view. + By default, the query is treated as Standard SQL. To use Legacy + SQL, set view_use_legacy_sql to True. + :rtype: str, or ``NoneType`` :returns: The query as set by the user, or None (the default). """ @@ -481,9 +484,14 @@ def view_query(self, value): """ if not isinstance(value, six.string_types): raise ValueError("Pass a string") - if self._properties.get('view') is None: - self._properties['view'] = {} - self._properties['view']['query'] = value + view = self._properties.get('view') + if view is None: + view = self._properties['view'] = {} + view['query'] = value + # The service defaults useLegacySql to True, but this + # client uses Standard SQL by default. + if view.get('useLegacySql') is None: + view['useLegacySql'] = False @view_query.deleter def view_query(self): @@ -492,26 +500,29 @@ def view_query(self): @property def view_use_legacy_sql(self): - """Specifies whether to execute the view with legacy or standard SQL. + """Specifies whether to execute the view with Legacy or Standard SQL. - If not set, None is returned. BigQuery's default mode is equivalent to - useLegacySql = True. + The default is False for views (use Standard SQL). + If this table is not a view, None is returned. - :rtype: bool, or ``NoneType`` - :returns: The boolean for view.useLegacySql as set by the user, or - None (the default). + :rtype: bool or ``NoneType`` + :returns: The boolean for view.useLegacySql, or None if not a view. """ view = self._properties.get('view') if view is not None: - return view.get('useLegacySql') + # useLegacySql is never missing from the view dict if this table + # was created client-side, because the view_query setter populates + # it. So a missing or None can only come from the server, whose + # default is True. + return view.get('useLegacySql', True) @view_use_legacy_sql.setter def view_use_legacy_sql(self, value): """Update the view sub-property 'useLegacySql'. - This boolean specifies whether to execute the view with legacy SQL - (True) or standard SQL (False). The default, if not specified, is - 'True'. + This boolean specifies whether to execute the view with Legacy SQL + (True) or Standard SQL (False). The default, if not specified, is + 'False'. :type value: bool :param value: The boolean for view.useLegacySql diff --git a/packages/google-cloud-bigquery/tests/unit/test_client.py b/packages/google-cloud-bigquery/tests/unit/test_client.py index 1ce9c2158a59..9dfa311e806c 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_client.py +++ b/packages/google-cloud-bigquery/tests/unit/test_client.py @@ -550,8 +550,7 @@ def test_create_table_w_schema_and_query(self): {'name': 'full_name', 'type': 'STRING', 'mode': 'REQUIRED'}, {'name': 'age', 'type': 'INTEGER', 'mode': 'REQUIRED'}] }, - # TODO(alixh) default to Standard SQL - 'view': {'query': query, 'useLegacySql': None}, + 'view': {'query': query, 'useLegacySql': False}, } self.assertEqual(req['data'], sent) self.assertEqual(got.table_id, self.TABLE_ID) diff --git a/packages/google-cloud-bigquery/tests/unit/test_table.py b/packages/google-cloud-bigquery/tests/unit/test_table.py index 12b2ec98c4d4..63dafb8ac5ec 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_table.py +++ b/packages/google-cloud-bigquery/tests/unit/test_table.py @@ -259,7 +259,7 @@ def _verifyResourceProperties(self, table, resource): self.assertEqual(table.view_query, resource['view']['query']) self.assertEqual( table.view_use_legacy_sql, - resource['view'].get('useLegacySql')) + resource['view'].get('useLegacySql', True)) else: self.assertIsNone(table.view_query) self.assertIsNone(table.view_use_legacy_sql) @@ -487,6 +487,10 @@ def test_view_query_setter(self): table = self._make_one(table_ref) table.view_query = 'select * from foo' self.assertEqual(table.view_query, 'select * from foo') + self.assertEqual(table.view_use_legacy_sql, False) + + table.view_use_legacy_sql = True + self.assertEqual(table.view_use_legacy_sql, True) def test_view_query_deleter(self): dataset = DatasetReference(self.PROJECT, self.DS_ID) @@ -495,6 +499,7 @@ def test_view_query_deleter(self): table.view_query = 'select * from foo' del table.view_query self.assertIsNone(table.view_query) + self.assertIsNone(table.view_use_legacy_sql) def test_view_use_legacy_sql_setter_bad_value(self): dataset = DatasetReference(self.PROJECT, self.DS_ID) @@ -507,9 +512,9 @@ def test_view_use_legacy_sql_setter(self): dataset = DatasetReference(self.PROJECT, self.DS_ID) table_ref = dataset.table(self.TABLE_NAME) table = self._make_one(table_ref) - table.view_use_legacy_sql = False + table.view_use_legacy_sql = True table.view_query = 'select * from foo' - self.assertEqual(table.view_use_legacy_sql, False) + self.assertEqual(table.view_use_legacy_sql, True) self.assertEqual(table.view_query, 'select * from foo') def test_from_api_repr_missing_identity(self): From cc80dddac54feb1569a42b50a067a06fa4e8f237 Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Mon, 16 Oct 2017 10:26:47 -0700 Subject: [PATCH 0247/2016] BQ: update samples in README for beta 2 changes. (#4179) * BQ: update samples in README for beta 2 changes. * Fix typos. Split dataset create into its own sample. --- packages/google-cloud-bigquery/README.rst | 61 +++++++++++++++-------- 1 file changed, 40 insertions(+), 21 deletions(-) diff --git a/packages/google-cloud-bigquery/README.rst b/packages/google-cloud-bigquery/README.rst index c25b84c6bebd..ed299af347da 100644 --- a/packages/google-cloud-bigquery/README.rst +++ b/packages/google-cloud-bigquery/README.rst @@ -34,12 +34,27 @@ Using the API Querying massive datasets can be time consuming and expensive without the right hardware and infrastructure. Google `BigQuery`_ (`BigQuery API docs`_) -solves this problem by enabling super-fast, SQL-like queries against -append-only tables, using the processing power of Google's infrastructure. +solves this problem by enabling super-fast, SQL queries against +append-mostly tables, using the processing power of Google's infrastructure. .. _BigQuery: https://cloud.google.com/bigquery/what-is-bigquery .. _BigQuery API docs: https://cloud.google.com/bigquery/docs/reference/v2/ +Create a dataset +~~~~~~~~~~~~~~~~ + +.. code:: python + + from google.cloud import bigquery + from google.cloud.bigquery import Dataset + + client = bigquery.Client() + + dataset_ref = client.dataset('dataset_name') + dataset = Dataset(dataset_ref) + dataset.description = 'my dataset' + dataset = client.create_dataset(dataset) # API request + Load data from CSV ~~~~~~~~~~~~~~~~~~ @@ -48,39 +63,43 @@ Load data from CSV import csv from google.cloud import bigquery + from google.cloud.bigquery import LoadJobConfig from google.cloud.bigquery import SchemaField client = bigquery.Client() - dataset = client.dataset('dataset_name') - dataset.create() # API request - SCHEMA = [ SchemaField('full_name', 'STRING', mode='required'), SchemaField('age', 'INTEGER', mode='required'), ] - table = dataset.table('table_name', SCHEMA) - table.create() + table_ref = client.dataset('dataset_name').table('table_name') + + load_config = LoadJobConfig() + load_config.skip_leading_rows = 1 + load_config.schema = SCHEMA - with open('csv_file', 'rb') as readable: - table.upload_from_file( - readable, source_format='CSV', skip_leading_rows=1) + # Contents of csv_file.csv: + # Name,Age + # Tim,99 + with open('csv_file.csv', 'rb') as readable: + client.load_table_from_file( + readable, table_ref, job_config=load_config) # API request -Perform a synchronous query -~~~~~~~~~~~~~~~~~~~~~~~~~~~ +Perform a query +~~~~~~~~~~~~~~~ .. code:: python - # Perform a synchronous query. + # Perform a query. QUERY = ( - 'SELECT name FROM [bigquery-public-data:usa_names.usa_1910_2013] ' - 'WHERE state = "TX"') - query = client.run_sync_query('%s LIMIT 100' % QUERY) - query.timeout_ms = TIMEOUT_MS - query.run() - - for row in query.rows: - print(row) + 'SELECT name FROM `bigquery-public-data.usa_names.usa_1910_2013` ' + 'WHERE state = "TX" ' + 'LIMIT 100') + query_job = client.query(QUERY) # API request + rows = query_job.result() # Waits for query to finish + + for row in rows: + print(row.name) See the ``google-cloud-python`` API `BigQuery documentation`_ to learn how From 01813f4181813879a4468e0aa6ef0b5153c87874 Mon Sep 17 00:00:00 2001 From: Jonathan Amsterdam Date: Mon, 16 Oct 2017 14:32:52 -0400 Subject: [PATCH 0248/2016] bigquery: support external table definitions for query jobs (#4191) Also, set ExternalConfig.options based on source_format, and make read-only. Also, change from_api_repr functions in external_config.py so that they don't modify their resource argument. This simplifies tests. --- .../google/cloud/bigquery/_helpers.py | 8 + .../google/cloud/bigquery/external_config.py | 233 +++++++++--------- .../google/cloud/bigquery/job.py | 45 ++-- packages/google-cloud-bigquery/nox.py | 2 +- .../google-cloud-bigquery/tests/system.py | 126 +++++----- .../tests/unit/test_external_config.py | 36 +-- .../tests/unit/test_job.py | 121 +++++++-- 7 files changed, 332 insertions(+), 239 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py b/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py index d4230f9ff4f6..2d763109e745 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py @@ -546,3 +546,11 @@ def _should_retry(exc): on ``DEFAULT_RETRY``. For example, to change the deadline to 30 seconds, pass ``retry=bigquery.DEFAULT_RETRY.with_deadline(30)``. """ + + +def _int_or_none(value): + """Helper: deserialize int value from JSON string.""" + if isinstance(value, int): + return value + if value is not None: + return int(value) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/external_config.py b/packages/google-cloud-bigquery/google/cloud/bigquery/external_config.py index 9177595da67c..e3560224008c 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/external_config.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/external_config.py @@ -29,122 +29,10 @@ from google.cloud.bigquery._helpers import _bytes_to_json from google.cloud.bigquery._helpers import _TypedApiResourceProperty from google.cloud.bigquery._helpers import _ListApiResourceProperty +from google.cloud.bigquery._helpers import _int_or_none from google.cloud.bigquery.schema import SchemaField from google.cloud.bigquery.table import _build_schema_resource from google.cloud.bigquery.table import _parse_schema_resource -from google.cloud.bigquery.job import _int_or_none - - -class ExternalConfig(object): - """Description of an external data source. - - :type source_format: str - :param source_format: the format of the external data. See - the ``source_format`` property on this class. - """ - - def __init__(self, source_format): - self._properties = {'sourceFormat': source_format} - self._options = None - - @property - def source_format(self): - """See - https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query.tableDefinitions.(key).sourceFormat - https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#externalDataConfiguration.sourceFormat - """ - return self._properties['sourceFormat'] - - autodetect = _TypedApiResourceProperty( - 'autodetect', 'autodetect', bool) - """See - https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query.tableDefinitions.(key).autodetect - https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#externalDataConfiguration.autodetect - """ - - compression = _TypedApiResourceProperty( - 'compression', 'compression', six.string_types) - """See - https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query.tableDefinitions.(key).compression - https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#externalDataConfiguration.compression - """ - - ignore_unknown_values = _TypedApiResourceProperty( - 'ignore_unknown_values', 'ignoreUnknownValues', bool) - """See - https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query.tableDefinitions.(key).ignoreUnknownValues - https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#externalDataConfiguration.ignoreUnknownValues - """ - - max_bad_records = _TypedApiResourceProperty( - 'max_bad_records', 'maxBadRecords', six.integer_types) - """See - https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query.tableDefinitions.(key).maxBadRecords - https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#externalDataConfiguration.maxBadRecords - """ - - source_uris = _ListApiResourceProperty( - 'source_uris', 'sourceUris', six.string_types) - """See - https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query.tableDefinitions.(key).sourceUris - https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#externalDataConfiguration.sourceUris - """ - - schema = _ListApiResourceProperty('schema', 'schema', SchemaField) - """See - https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query.tableDefinitions.(key).schema - https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#externalDataConfiguration.schema - """ - - @property - def options(self): - """Source-specific options. A subclass of ExternalConfigOptions.""" - return self._options - - @options.setter - def options(self, value): - if self.source_format != value._SOURCE_FORMAT: - raise ValueError( - 'source format %s does not match option type %s' % ( - self.source_format, value.__class__.__name__)) - self._options = value - - def to_api_repr(self): - """Build an API representation of this object. - - :rtype: dict - :returns: A dictionary in the format used by the BigQuery API. - """ - config = copy.deepcopy(self._properties) - if self.schema: - config['schema'] = {'fields': _build_schema_resource(self.schema)} - if self.options is not None: - config[self.options._RESOURCE_NAME] = self.options.to_api_repr() - return config - - @classmethod - def from_api_repr(cls, resource): - """Factory: construct a CSVOptions given its API representation - - :type resource: dict - :param resource: - An extract job configuration in the same representation as is - returned from the API. - - :rtype: :class:`google.cloud.bigquery.external_config.CSVOptions` - :returns: Configuration parsed from ``resource``. - """ - config = cls(resource['sourceFormat']) - schema = resource.pop('schema', None) - for optcls in (BigtableOptions, CSVOptions, GoogleSheetsOptions): - opts = resource.pop(optcls._RESOURCE_NAME, None) - if opts is not None: - config.options = optcls.from_api_repr(opts) - break - config._properties = copy.deepcopy(resource) - if schema: - config.schema = _parse_schema_resource(schema) - return config class BigtableColumn(object): @@ -220,9 +108,9 @@ def from_api_repr(cls, resource): :rtype: :class:`google.cloud.bigquery.external_config.BigtableColumn` :returns: Configuration parsed from ``resource``. """ - qe = resource.pop('qualifierEncoded', None) config = cls() config._properties = copy.deepcopy(resource) + qe = resource.get('qualifierEncoded') if qe: config.qualifier_encoded = base64.standard_b64decode(_to_bytes(qe)) return config @@ -436,7 +324,7 @@ def from_api_repr(cls, resource): :rtype: :class:`google.cloud.bigquery.external_config.CSVOptions` :returns: Configuration parsed from ``resource``. """ - slr = resource.pop('skipLeadingRows', None) + slr = resource.get('skipLeadingRows') config = cls() config._properties = copy.deepcopy(resource) config.skip_leading_rows = _int_or_none(slr) @@ -484,8 +372,121 @@ def from_api_repr(cls, resource): :class:`google.cloud.bigquery.external_config.GoogleSheetsOptions` :returns: Configuration parsed from ``resource``. """ - slr = resource.pop('skipLeadingRows', None) + slr = resource.get('skipLeadingRows') config = cls() config._properties = copy.deepcopy(resource) config.skip_leading_rows = _int_or_none(slr) return config + + +_OPTION_CLASSES = (BigtableOptions, CSVOptions, GoogleSheetsOptions) + + +class ExternalConfig(object): + """Description of an external data source. + + :type source_format: str + :param source_format: the format of the external data. See + the ``source_format`` property on this class. + """ + + def __init__(self, source_format): + self._properties = {'sourceFormat': source_format} + self._options = None + for optcls in _OPTION_CLASSES: + if source_format == optcls._SOURCE_FORMAT: + self._options = optcls() + break + + @property + def source_format(self): + """See + https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query.tableDefinitions.(key).sourceFormat + https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#externalDataConfiguration.sourceFormat + """ + return self._properties['sourceFormat'] + + @property + def options(self): + """Source-specific options.""" + return self._options + + autodetect = _TypedApiResourceProperty( + 'autodetect', 'autodetect', bool) + """See + https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query.tableDefinitions.(key).autodetect + https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#externalDataConfiguration.autodetect + """ + + compression = _TypedApiResourceProperty( + 'compression', 'compression', six.string_types) + """See + https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query.tableDefinitions.(key).compression + https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#externalDataConfiguration.compression + """ + + ignore_unknown_values = _TypedApiResourceProperty( + 'ignore_unknown_values', 'ignoreUnknownValues', bool) + """See + https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query.tableDefinitions.(key).ignoreUnknownValues + https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#externalDataConfiguration.ignoreUnknownValues + """ + + max_bad_records = _TypedApiResourceProperty( + 'max_bad_records', 'maxBadRecords', six.integer_types) + """See + https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query.tableDefinitions.(key).maxBadRecords + https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#externalDataConfiguration.maxBadRecords + """ + + source_uris = _ListApiResourceProperty( + 'source_uris', 'sourceUris', six.string_types) + """See + https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query.tableDefinitions.(key).sourceUris + https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#externalDataConfiguration.sourceUris + """ + + schema = _ListApiResourceProperty('schema', 'schema', SchemaField) + """See + https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query.tableDefinitions.(key).schema + https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#externalDataConfiguration.schema + """ + + def to_api_repr(self): + """Build an API representation of this object. + + :rtype: dict + :returns: A dictionary in the format used by the BigQuery API. + """ + config = copy.deepcopy(self._properties) + if self.schema: + config['schema'] = {'fields': _build_schema_resource(self.schema)} + if self.options is not None: + r = self.options.to_api_repr() + if r != {}: + config[self.options._RESOURCE_NAME] = r + return config + + @classmethod + def from_api_repr(cls, resource): + """Factory: construct a CSVOptions given its API representation + + :type resource: dict + :param resource: + An extract job configuration in the same representation as is + returned from the API. + + :rtype: :class:`google.cloud.bigquery.external_config.CSVOptions` + :returns: Configuration parsed from ``resource``. + """ + config = cls(resource['sourceFormat']) + schema = resource.get('schema') + for optcls in _OPTION_CLASSES: + opts = resource.get(optcls._RESOURCE_NAME) + if opts is not None: + config._options = optcls.from_api_repr(opts) + break + config._properties = copy.deepcopy(resource) + if schema: + config.schema = _parse_schema_resource(schema) + return config diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/job.py b/packages/google-cloud-bigquery/google/cloud/bigquery/job.py index 65da69956369..350ad7ce579b 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/job.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/job.py @@ -25,6 +25,7 @@ from google.cloud.exceptions import NotFound from google.cloud._helpers import _datetime_from_microseconds from google.cloud.bigquery.dataset import DatasetReference +from google.cloud.bigquery.external_config import ExternalConfig from google.cloud.bigquery.query import _AbstractQueryParameter from google.cloud.bigquery.query import _query_param_from_api_repr from google.cloud.bigquery.query import ArrayQueryParameter @@ -39,6 +40,7 @@ from google.cloud.bigquery._helpers import _ListApiResourceProperty from google.cloud.bigquery._helpers import _TypedApiResourceProperty from google.cloud.bigquery._helpers import DEFAULT_RETRY +from google.cloud.bigquery._helpers import _int_or_none _DONE_STATE = 'DONE' _STOPPED_REASON = 'stopped' @@ -65,22 +67,6 @@ } -def _bool_or_none(value): - """Helper: deserialize boolean value from JSON string.""" - if isinstance(value, bool): - return value - if value is not None: - return value.lower() in ['t', 'true', '1'] - - -def _int_or_none(value): - """Helper: deserialize int value from JSON string.""" - if isinstance(value, int): - return value - if value is not None: - return int(value) - - def _error_result_to_exception(error_result): """Maps BigQuery error reasons to an exception. @@ -1315,6 +1301,14 @@ def _to_api_repr_udf_resources(value): ] +def _from_api_repr_table_defs(resource): + return {k: ExternalConfig.from_api_repr(v) for k, v in resource.items()} + + +def _to_api_repr_table_defs(value): + return {k: ExternalConfig.to_api_repr(v) for k, v in value.items()} + + class QueryJobConfig(object): """Configuration options for query jobs. @@ -1469,6 +1463,16 @@ def from_api_repr(cls, resource): https://g.co/cloud/bigquery/docs/reference/rest/v2/jobs#configuration.query.writeDisposition """ + table_definitions = _TypedApiResourceProperty( + 'table_definitions', 'tableDefinitions', dict) + """ + Definitions for external tables. A dictionary from table names (strings) + to :class:`google.cloud.bigquery.external_config.ExternalConfig`. + + See + https://g.co/cloud/bigquery/docs/reference/rest/v2/jobs#configuration.query.tableDefinitions + """ + _maximum_billing_tier = None _maximum_bytes_billed = None @@ -1478,6 +1482,8 @@ def from_api_repr(cls, resource): 'destinationTable': ( TableReference.from_api_repr, TableReference.to_api_repr), 'maximumBytesBilled': (int, str), + 'tableDefinitions': (_from_api_repr_table_defs, + _to_api_repr_table_defs), _QUERY_PARAMETERS_KEY: ( _from_api_repr_query_parameters, _to_api_repr_query_parameters), _UDF_RESOURCES_KEY: ( @@ -1615,6 +1621,13 @@ def maximum_bytes_billed(self): """ return self._configuration.maximum_bytes_billed + @property + def table_definitions(self): + """See + :class:`~google.cloud.bigquery.job.QueryJobConfig.table_definitions`. + """ + return self._configuration.table_definitions + def _build_resource(self): """Generate a resource for :meth:`begin`.""" configuration = self._configuration.to_api_repr() diff --git a/packages/google-cloud-bigquery/nox.py b/packages/google-cloud-bigquery/nox.py index 23b2771bd523..2dd4fb431e66 100644 --- a/packages/google-cloud-bigquery/nox.py +++ b/packages/google-cloud-bigquery/nox.py @@ -76,7 +76,7 @@ def system_tests(session, python_version): os.path.join('..', 'storage'), os.path.join('..', 'test_utils'), ) - session.install('.') + session.install('-e', '.') # Run py.test against the system tests. session.run( diff --git a/packages/google-cloud-bigquery/tests/system.py b/packages/google-cloud-bigquery/tests/system.py index 4cb97c6601f4..a25524a2a7dc 100644 --- a/packages/google-cloud-bigquery/tests/system.py +++ b/packages/google-cloud-bigquery/tests/system.py @@ -436,47 +436,23 @@ def test_load_table_from_local_avro_file_then_dump_table(self): sorted(ROWS, key=by_wavelength)) def test_load_table_from_storage_then_dump_table(self): - from google.cloud._testing import _NamedTemporaryFile - from google.cloud.storage import Client as StorageClient - - local_id = unique_resource_id() - BUCKET_NAME = 'bq_load_test' + local_id - BLOB_NAME = 'person_ages.csv' - GS_URL = 'gs://%s/%s' % (BUCKET_NAME, BLOB_NAME) + TABLE_ID = 'test_table' ROWS = [ ('Phred Phlyntstone', 32), ('Bharney Rhubble', 33), ('Wylma Phlyntstone', 29), ('Bhettye Rhubble', 27), ] - TABLE_NAME = 'test_table' - - storage_client = StorageClient() - - # In the **very** rare case the bucket name is reserved, this - # fails with a ConnectionError. - bucket = storage_client.create_bucket(BUCKET_NAME) - self.to_delete.append(bucket) - - blob = bucket.blob(BLOB_NAME) - - with _NamedTemporaryFile() as temp: - with open(temp.name, 'w') as csv_write: - writer = csv.writer(csv_write) - writer.writerow(('Full Name', 'Age')) - writer.writerows(ROWS) - - with open(temp.name, 'rb') as csv_read: - blob.upload_from_file(csv_read, content_type='text/csv') - - self.to_delete.insert(0, blob) + GS_URL = self._write_csv_to_storage( + 'bq_load_test' + unique_resource_id(), 'person_ages.csv', + ('Full Name', 'Age'), ROWS) dataset = self.temp_dataset(_make_dataset_id('load_gcs_then_dump')) full_name = bigquery.SchemaField('full_name', 'STRING', mode='REQUIRED') age = bigquery.SchemaField('age', 'INTEGER', mode='REQUIRED') - table_arg = Table(dataset.table(TABLE_NAME), schema=[full_name, age]) + table_arg = Table(dataset.table(TABLE_ID), schema=[full_name, age]) table = retry_403(Config.CLIENT.create_table)(table_arg) self.to_delete.insert(0, table) @@ -486,7 +462,7 @@ def test_load_table_from_storage_then_dump_table(self): config.source_format = 'CSV' config.write_disposition = 'WRITE_EMPTY' job = Config.CLIENT.load_table_from_storage( - GS_URL, dataset.table(TABLE_NAME), job_config=config) + GS_URL, dataset.table(TABLE_ID), job_config=config) # Allow for 90 seconds of "warm up" before rows visible. See # https://cloud.google.com/bigquery/streaming-data-into-bigquery#dataavailability @@ -501,44 +477,19 @@ def test_load_table_from_storage_then_dump_table(self): sorted(ROWS, key=by_age)) def test_load_table_from_storage_w_autodetect_schema(self): - from google.cloud._testing import _NamedTemporaryFile - from google.cloud.storage import Client as StorageClient from google.cloud.bigquery import SchemaField - local_id = unique_resource_id() - bucket_name = 'bq_load_test' + local_id - blob_name = 'person_ages.csv' - gs_url = 'gs://{}/{}'.format(bucket_name, blob_name) rows = [ ('Phred Phlyntstone', 32), ('Bharney Rhubble', 33), ('Wylma Phlyntstone', 29), ('Bhettye Rhubble', 27), ] * 100 # BigQuery internally uses the first 100 rows to detect schema - table_name = 'test_table' - - storage_client = StorageClient() - - # In the **very** rare case the bucket name is reserved, this - # fails with a ConnectionError. - bucket = storage_client.create_bucket(bucket_name) - self.to_delete.append(bucket) - - blob = bucket.blob(blob_name) - - with _NamedTemporaryFile() as temp: - with open(temp.name, 'w') as csv_write: - writer = csv.writer(csv_write) - writer.writerow(('Full Name', 'Age')) - writer.writerows(rows) - - with open(temp.name, 'rb') as csv_read: - blob.upload_from_file(csv_read, content_type='text/csv') - - self.to_delete.insert(0, blob) - + gs_url = self._write_csv_to_storage( + 'bq_load_test' + unique_resource_id(), 'person_ages.csv', + ('Full Name', 'Age'), rows) dataset = self.temp_dataset(_make_dataset_id('load_gcs_then_dump')) - table_ref = dataset.table(table_name) + table_ref = dataset.table('test_table') config = bigquery.LoadJobConfig() config.autodetect = True @@ -564,6 +515,33 @@ def test_load_table_from_storage_w_autodetect_schema(self): self.assertEqual( sorted(actual_row_tuples, key=by_age), sorted(rows, key=by_age)) + def _write_csv_to_storage(self, bucket_name, blob_name, header_row, + data_rows): + from google.cloud._testing import _NamedTemporaryFile + from google.cloud.storage import Client as StorageClient + + storage_client = StorageClient() + + # In the **very** rare case the bucket name is reserved, this + # fails with a ConnectionError. + bucket = storage_client.create_bucket(bucket_name) + self.to_delete.append(bucket) + + blob = bucket.blob(blob_name) + + with _NamedTemporaryFile() as temp: + with open(temp.name, 'w') as csv_write: + writer = csv.writer(csv_write) + writer.writerow(header_row) + writer.writerows(data_rows) + + with open(temp.name, 'rb') as csv_read: + blob.upload_from_file(csv_read, content_type='text/csv') + + self.to_delete.insert(0, blob) + + return 'gs://{}/{}'.format(bucket_name, blob_name) + def _load_table_for_extract_table( self, storage_client, rows, bucket_name, blob_name, table): from google.cloud._testing import _NamedTemporaryFile @@ -1271,6 +1249,36 @@ def test_query_future(self): row_tuples = [r.values() for r in iterator] self.assertEqual(row_tuples, [(1,)]) + def test_query_table_def(self): + rows = [ + ('Phred Phlyntstone', 32), + ('Bharney Rhubble', 33), + ('Wylma Phlyntstone', 29), + ('Bhettye Rhubble', 27), + ] + gs_url = self._write_csv_to_storage( + 'bq_load_test' + unique_resource_id(), 'person_ages.csv', + ('Full Name', 'Age'), rows) + + job_config = bigquery.QueryJobConfig() + table_id = 'flintstones' + ec = bigquery.ExternalConfig('CSV') + ec.source_uris = [gs_url] + ec.schema = [ + bigquery.SchemaField('full_name', 'STRING', mode='REQUIRED'), + bigquery.SchemaField('age', 'INTEGER', mode='REQUIRED'), + ] + ec.options.skip_leading_rows = 1 # skip the header row + job_config.table_definitions = {table_id: ec} + sql = 'SELECT * from %s' % table_id + + got_rows = Config.CLIENT.query_rows(sql, job_config=job_config) + + row_tuples = [r.values() for r in got_rows] + by_age = operator.itemgetter(1) + self.assertEqual(sorted(row_tuples, key=by_age), + sorted(rows, key=by_age)) + def test_create_rows_nested_nested(self): # See #2951 SF = bigquery.SchemaField diff --git a/packages/google-cloud-bigquery/tests/unit/test_external_config.py b/packages/google-cloud-bigquery/tests/unit/test_external_config.py index 6768093ed0b3..b7887428606d 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_external_config.py +++ b/packages/google-cloud-bigquery/tests/unit/test_external_config.py @@ -55,7 +55,6 @@ def test_api_repr_base(self): ], }, }) - want_resource = copy.deepcopy(resource) ec = ExternalConfig.from_api_repr(resource) self._verify_base(ec) self.assertEqual(ec.schema, @@ -63,7 +62,7 @@ def test_api_repr_base(self): self.assertIsNone(ec.options) got_resource = ec.to_api_repr() - self.assertEqual(got_resource, want_resource) + self.assertEqual(got_resource, resource) def _verify_base(self, ec): self.assertEqual(ec.autodetect, True) @@ -85,7 +84,6 @@ def test_api_repr_sheets(self): 'sourceFormat': 'GOOGLE_SHEETS', 'googleSheetsOptions': {'skipLeadingRows': '123'}, }) - want_resource = copy.deepcopy(resource) ec = ExternalConfig.from_api_repr(resource) @@ -96,13 +94,13 @@ def test_api_repr_sheets(self): got_resource = ec.to_api_repr() - self.assertEqual(got_resource, want_resource) + self.assertEqual(got_resource, resource) - del want_resource['googleSheetsOptions']['skipLeadingRows'] - ec = ExternalConfig.from_api_repr(copy.deepcopy(want_resource)) + del resource['googleSheetsOptions']['skipLeadingRows'] + ec = ExternalConfig.from_api_repr(resource) self.assertIsNone(ec.options.skip_leading_rows) got_resource = ec.to_api_repr() - self.assertEqual(got_resource, want_resource) + self.assertEqual(got_resource, resource) def test_api_repr_csv(self): from google.cloud.bigquery.external_config import CSVOptions @@ -118,7 +116,6 @@ def test_api_repr_csv(self): 'encoding': 'encoding', }, }) - want_resource = copy.deepcopy(resource) ec = ExternalConfig.from_api_repr(resource) @@ -134,13 +131,13 @@ def test_api_repr_csv(self): got_resource = ec.to_api_repr() - self.assertEqual(got_resource, want_resource) + self.assertEqual(got_resource, resource) - del want_resource['csvOptions']['skipLeadingRows'] - ec = ExternalConfig.from_api_repr(copy.deepcopy(want_resource)) + del resource['csvOptions']['skipLeadingRows'] + ec = ExternalConfig.from_api_repr(resource) self.assertIsNone(ec.options.skip_leading_rows) got_resource = ec.to_api_repr() - self.assertEqual(got_resource, want_resource) + self.assertEqual(got_resource, resource) def test_api_repr_bigtable(self): from google.cloud.bigquery.external_config import BigtableOptions @@ -178,7 +175,6 @@ def test_api_repr_bigtable(self): ], }, }) - want_resource = copy.deepcopy(resource) ec = ExternalConfig.from_api_repr(resource) @@ -207,19 +203,7 @@ def test_api_repr_bigtable(self): got_resource = ec.to_api_repr() - self.assertEqual(got_resource, want_resource) - - def test_option_mismatch(self): - from google.cloud.bigquery.external_config import CSVOptions - from google.cloud.bigquery.external_config import BigtableOptions - from google.cloud.bigquery.external_config import GoogleSheetsOptions - - for source_format, opts in (('BIGTABLE', CSVOptions()), - ('CSV', GoogleSheetsOptions()), - ('GOOGLE_SHEETS', BigtableOptions())): - ec = ExternalConfig(source_format) - with self.assertRaises(ValueError): - ec.options = opts + self.assertEqual(got_resource, resource) def _copy_and_update(d, u): diff --git a/packages/google-cloud-bigquery/tests/unit/test_job.py b/packages/google-cloud-bigquery/tests/unit/test_job.py index c1c190328968..0e0b667e704d 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_job.py +++ b/packages/google-cloud-bigquery/tests/unit/test_job.py @@ -42,27 +42,6 @@ def _make_client(project='test-project', connection=None): return client -class Test__bool_or_none(unittest.TestCase): - - def _call_fut(self, *args, **kwargs): - from google.cloud.bigquery import job - - return job._bool_or_none(*args, **kwargs) - - def test_w_bool(self): - self.assertTrue(self._call_fut(True)) - self.assertFalse(self._call_fut(False)) - - def test_w_none(self): - self.assertIsNone(self._call_fut(None)) - - def test_w_str(self): - self.assertTrue(self._call_fut('1')) - self.assertTrue(self._call_fut('t')) - self.assertTrue(self._call_fut('true')) - self.assertFalse(self._call_fut('anything else')) - - class Test__int_or_none(unittest.TestCase): def _call_fut(self, *args, **kwargs): @@ -1673,6 +1652,17 @@ def _verifyQueryParameters(self, job, config): for found, expected in zip(job.query_parameters, query_parameters): self.assertEqual(found.to_api_repr(), expected) + def _verify_table_definitions(self, job, config): + table_defs = config.get('tableDefinitions') + if job.table_definitions is None: + self.assertIsNone(table_defs) + else: + self.assertEqual(len(job.table_definitions), len(table_defs)) + for found_key, found_ec in job.table_definitions.items(): + expected_ec = table_defs.get(found_key) + self.assertIsNotNone(expected_ec) + self.assertEqual(found_ec.to_api_repr(), expected_ec) + def _verify_configuration_properties(self, job, configuration): if 'dryRun' in configuration: self.assertEqual(job.dry_run, @@ -1691,6 +1681,7 @@ def _verifyResourceProperties(self, job, resource): self._verifyIntegerResourceProperties(job, query_config) self._verify_udf_resources(job, query_config) self._verifyQueryParameters(job, query_config) + self._verify_table_definitions(job, query_config) self.assertEqual(job.query, query_config['query']) if 'createDisposition' in query_config: @@ -1754,6 +1745,7 @@ def test_ctor_defaults(self): self.assertIsNone(job.write_disposition) self.assertIsNone(job.maximum_billing_tier) self.assertIsNone(job.maximum_bytes_billed) + self.assertIsNone(job.table_definitions) def test_ctor_w_udf_resources(self): from google.cloud.bigquery.job import QueryJobConfig @@ -2516,6 +2508,93 @@ def test_begin_w_positional_query_parameter(self): self._verifyResourceProperties(job, RESOURCE) self.assertEqual(req['data'], SENT) + def test_begin_w_table_defs(self): + from google.cloud.bigquery.job import QueryJobConfig + from google.cloud.bigquery.external_config import ExternalConfig + from google.cloud.bigquery.external_config import BigtableColumn + from google.cloud.bigquery.external_config import BigtableColumnFamily + + PATH = '/projects/%s/jobs' % (self.PROJECT,) + RESOURCE = self._makeResource() + # Ensure None for missing server-set props + del RESOURCE['statistics']['creationTime'] + del RESOURCE['etag'] + del RESOURCE['selfLink'] + del RESOURCE['user_email'] + + bt_config = ExternalConfig('BIGTABLE') + bt_config.ignore_unknown_values = True + bt_config.options.read_rowkey_as_string = True + cf = BigtableColumnFamily() + cf.family_id = 'cf' + col = BigtableColumn() + col.field_name = 'fn' + cf.columns = [col] + bt_config.options.column_families = [cf] + BT_CONFIG_RESOURCE = { + 'sourceFormat': 'BIGTABLE', + 'ignoreUnknownValues': True, + 'bigtableOptions': { + 'readRowkeyAsString': True, + 'columnFamilies': [{ + 'familyId': 'cf', + 'columns': [{'fieldName': 'fn'}], + }], + }, + } + CSV_CONFIG_RESOURCE = { + 'sourceFormat': 'CSV', + 'maxBadRecords': 8, + 'csvOptions': { + 'allowJaggedRows': True, + }, + } + csv_config = ExternalConfig('CSV') + csv_config.max_bad_records = 8 + csv_config.options.allow_jagged_rows = True + bt_table = 'bigtable-table' + csv_table = 'csv-table' + RESOURCE['configuration']['query']['tableDefinitions'] = { + bt_table: BT_CONFIG_RESOURCE, + csv_table: CSV_CONFIG_RESOURCE, + } + want_resource = copy.deepcopy(RESOURCE) + conn = _Connection(RESOURCE) + client = _make_client(project=self.PROJECT, connection=conn) + config = QueryJobConfig() + config.table_definitions = { + bt_table: bt_config, + csv_table: csv_config, + } + config.use_legacy_sql = True + job = self._make_one( + self.JOB_ID, self.QUERY, client, job_config=config) + + job.begin() + + self.assertEqual(len(conn._requested), 1) + req = conn._requested[0] + self.assertEqual(req['method'], 'POST') + self.assertEqual(req['path'], PATH) + SENT = { + 'jobReference': { + 'projectId': self.PROJECT, + 'jobId': self.JOB_ID, + }, + 'configuration': { + 'query': { + 'query': self.QUERY, + 'useLegacySql': True, + 'tableDefinitions': { + bt_table: BT_CONFIG_RESOURCE, + csv_table: CSV_CONFIG_RESOURCE, + }, + }, + }, + } + self._verifyResourceProperties(job, want_resource) + self.assertEqual(req['data'], SENT) + def test_dry_run_query(self): from google.cloud.bigquery.job import QueryJobConfig From 78ea685c74bbd93bc2fa7e728a9c86f315543b07 Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Mon, 16 Oct 2017 13:04:16 -0700 Subject: [PATCH 0249/2016] BQ: Make snippets.py run as system tests. (#4188) * BQ: Make snippets.py run as system tests. Converts snippet function names to pytest conventions (shouldn't affect the docs, as the docs use region tags to grab sub-sections to include. I had to make some minor changes to ensure that the samples work on both Python 2 and Python 3, which is why I didn't try to bulk update the snippets for all the other products. * BigQuery: Use in-place development install for system and snippet tests. This will allow the use of the `-r` parameter for nox to work properly. --- packages/google-cloud-bigquery/nox.py | 37 ++++++++++++++++++++++++++- 1 file changed, 36 insertions(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/nox.py b/packages/google-cloud-bigquery/nox.py index 23b2771bd523..2d5772882fbe 100644 --- a/packages/google-cloud-bigquery/nox.py +++ b/packages/google-cloud-bigquery/nox.py @@ -76,7 +76,7 @@ def system_tests(session, python_version): os.path.join('..', 'storage'), os.path.join('..', 'test_utils'), ) - session.install('.') + session.install('-e', '.') # Run py.test against the system tests. session.run( @@ -87,6 +87,39 @@ def system_tests(session, python_version): ) +@nox.session +@nox.parametrize('python_version', ['2.7', '3.6']) +def snippets_tests(session, python_version): + """Run the system test suite.""" + + # Sanity check: Only run system tests if the environment variable is set. + if not os.environ.get('GOOGLE_APPLICATION_CREDENTIALS', ''): + session.skip('Credentials must be set via environment variable.') + + # Run the system tests against latest Python 2 and Python 3 only. + session.interpreter = 'python{}'.format(python_version) + + # Set the virtualenv dirname. + session.virtualenv_dirname = 'snip-' + python_version + + # Install all test dependencies, then install this package into the + # virtualenv's dist-packages. + session.install('mock', 'pytest', *LOCAL_DEPS) + session.install( + os.path.join('..', 'storage'), + os.path.join('..', 'test_utils'), + ) + session.install('-e', '.') + + # Run py.test against the system tests. + session.run( + 'py.test', + '--quiet', + os.path.join(os.pardir, 'docs', 'bigquery', 'snippets.py'), + *session.posargs + ) + + @nox.session def lint(session): """Run linters. @@ -100,6 +133,8 @@ def lint(session): session.install('.') session.run('flake8', os.path.join('google', 'cloud', 'bigquery')) session.run('flake8', 'tests') + session.run( + 'flake8', os.path.join(os.pardir, 'docs', 'bigquery', 'snippets.py')) session.run( 'gcp-devrel-py-tools', 'run-pylint', '--config', 'pylint.config.py', From 6a2636ed33a8a3d89b50a948c3cba208693ffb2b Mon Sep 17 00:00:00 2001 From: Jonathan Amsterdam Date: Mon, 16 Oct 2017 16:22:17 -0400 Subject: [PATCH 0250/2016] bigquery: support external data definition for tables (#4193) --- .../google/cloud/bigquery/schema.py | 47 +++++++ .../google/cloud/bigquery/table.py | 94 ++++++------- .../google-cloud-bigquery/tests/system.py | 37 ++++- .../tests/unit/test_client.py | 50 +++++++ .../tests/unit/test_schema.py | 129 +++++++++++++++++ .../tests/unit/test_table.py | 130 +++--------------- 6 files changed, 323 insertions(+), 164 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/schema.py b/packages/google-cloud-bigquery/google/cloud/bigquery/schema.py index 535c445a3726..1aa95271c70d 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/schema.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/schema.py @@ -154,3 +154,50 @@ def __hash__(self): def __repr__(self): return 'SchemaField{}'.format(self._key()) + + +def _parse_schema_resource(info): + """Parse a resource fragment into a schema field. + + :type info: mapping + :param info: should contain a "fields" key to be parsed + + :rtype: list of :class:`SchemaField`, or ``NoneType`` + :returns: a list of parsed fields, or ``None`` if no "fields" key is + present in ``info``. + """ + if 'fields' not in info: + return () + + schema = [] + for r_field in info['fields']: + name = r_field['name'] + field_type = r_field['type'] + mode = r_field.get('mode', 'NULLABLE') + description = r_field.get('description') + sub_fields = _parse_schema_resource(r_field) + schema.append( + SchemaField(name, field_type, mode, description, sub_fields)) + return schema + + +def _build_schema_resource(fields): + """Generate a resource fragment for a schema. + + :type fields: sequence of :class:`SchemaField` + :param fields: schema to be dumped + + :rtype: mapping + :returns: a mapping describing the schema of the supplied fields. + """ + infos = [] + for field in fields: + info = {'name': field.name, + 'type': field.field_type, + 'mode': field.mode} + if field.description is not None: + info['description'] = field.description + if field.fields: + info['fields'] = _build_schema_resource(field.fields) + infos.append(info) + return infos diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py index d4c6977c8755..592d1ad9def6 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py @@ -23,6 +23,9 @@ from google.cloud._helpers import _datetime_from_microseconds from google.cloud._helpers import _millis_from_datetime from google.cloud.bigquery.schema import SchemaField +from google.cloud.bigquery.schema import _build_schema_resource +from google.cloud.bigquery.schema import _parse_schema_resource +from google.cloud.bigquery.external_config import ExternalConfig _TABLE_HAS_NO_SCHEMA = "Table has no schema: call 'client.get_table()'" @@ -159,13 +162,15 @@ class Table(object): all_fields = [ 'description', 'friendly_name', 'expires', 'location', - 'partitioning_type', 'view_use_legacy_sql', 'view_query', 'schema' + 'partitioning_type', 'view_use_legacy_sql', 'view_query', 'schema', + 'external_data_configuration', ] def __init__(self, table_ref, schema=()): self._project = table_ref.project self._table_id = table_ref.table_id self._dataset_id = table_ref.dataset_id + self._external_config = None self._properties = {} # Let the @property do validation. self.schema = schema @@ -537,10 +542,37 @@ def view_use_legacy_sql(self, value): @property def streaming_buffer(self): + """Information about a table's streaming buffer. + + :rtype: :class:`StreamingBuffer` + :returns: Streaming buffer information, returned from get_table. + """ sb = self._properties.get('streamingBuffer') if sb is not None: return StreamingBuffer(sb) + @property + def external_data_configuration(self): + """Configuration for an external data source. + + If not set, None is returned. + + :rtype: :class:`ExternalConfig`, or ``NoneType`` + :returns: The external configuration, or None (the default). + """ + return self._external_config + + @external_data_configuration.setter + def external_data_configuration(self, value): + """Sets the configuration for an external data source. + + :type value: :class:`ExternalConfig`, or ``NoneType`` + :param value: The ExternalConfig, or None to unset. + """ + if not (value is None or isinstance(value, ExternalConfig)): + raise ValueError("Pass an ExternalConfig or None") + self._external_config = value + @classmethod def from_api_repr(cls, resource): """Factory: construct a table given its API representation @@ -579,6 +611,9 @@ def _set_properties(self, api_response): cleaned = api_response.copy() schema = cleaned.pop('schema', {'fields': ()}) self.schema = _parse_schema_resource(schema) + ec = cleaned.pop('externalDataConfiguration', None) + if ec: + self.external_data_configuration = ExternalConfig.from_api_repr(ec) if 'creationTime' in cleaned: cleaned['creationTime'] = float(cleaned['creationTime']) if 'lastModifiedTime' in cleaned: @@ -614,12 +649,20 @@ def _populate_schema_resource(self, resource): 'fields': _build_schema_resource(self._schema), } + def _populate_external_config(self, resource): + if not self.external_data_configuration: + resource['externalDataConfiguration'] = None + else: + resource['externalDataConfiguration'] = ExternalConfig.to_api_repr( + self.external_data_configuration) + custom_resource_fields = { 'expires': _populate_expires_resource, 'partitioning_type': _populate_partitioning_type_resource, 'view_query': _populate_view_query_resource, 'view_use_legacy_sql': _populate_view_use_legacy_sql_resource, - 'schema': _populate_schema_resource + 'schema': _populate_schema_resource, + 'external_data_configuration': _populate_external_config, } def _build_resource(self, filter_fields): @@ -690,50 +733,3 @@ def __init__(self, resource): # time is in milliseconds since the epoch. self.oldest_entry_time = _datetime_from_microseconds( 1000.0 * int(resource['oldestEntryTime'])) - - -def _parse_schema_resource(info): - """Parse a resource fragment into a schema field. - - :type info: mapping - :param info: should contain a "fields" key to be parsed - - :rtype: list of :class:`SchemaField`, or ``NoneType`` - :returns: a list of parsed fields, or ``None`` if no "fields" key is - present in ``info``. - """ - if 'fields' not in info: - return () - - schema = [] - for r_field in info['fields']: - name = r_field['name'] - field_type = r_field['type'] - mode = r_field.get('mode', 'NULLABLE') - description = r_field.get('description') - sub_fields = _parse_schema_resource(r_field) - schema.append( - SchemaField(name, field_type, mode, description, sub_fields)) - return schema - - -def _build_schema_resource(fields): - """Generate a resource fragment for a schema. - - :type fields: sequence of :class:`SchemaField` - :param fields: schema to be dumped - - :rtype: mapping - :returns: a mapping describing the schema of the supplied fields. - """ - infos = [] - for field in fields: - info = {'name': field.name, - 'type': field.field_type, - 'mode': field.mode} - if field.description is not None: - info['description'] = field.description - if field.fields: - info['fields'] = _build_schema_resource(field.fields) - infos.append(info) - return infos diff --git a/packages/google-cloud-bigquery/tests/system.py b/packages/google-cloud-bigquery/tests/system.py index a25524a2a7dc..d745be032b2c 100644 --- a/packages/google-cloud-bigquery/tests/system.py +++ b/packages/google-cloud-bigquery/tests/system.py @@ -1257,7 +1257,7 @@ def test_query_table_def(self): ('Bhettye Rhubble', 27), ] gs_url = self._write_csv_to_storage( - 'bq_load_test' + unique_resource_id(), 'person_ages.csv', + 'bq_external_test' + unique_resource_id(), 'person_ages.csv', ('Full Name', 'Age'), rows) job_config = bigquery.QueryJobConfig() @@ -1270,7 +1270,7 @@ def test_query_table_def(self): ] ec.options.skip_leading_rows = 1 # skip the header row job_config.table_definitions = {table_id: ec} - sql = 'SELECT * from %s' % table_id + sql = 'SELECT * FROM %s' % table_id got_rows = Config.CLIENT.query_rows(sql, job_config=job_config) @@ -1279,6 +1279,39 @@ def test_query_table_def(self): self.assertEqual(sorted(row_tuples, key=by_age), sorted(rows, key=by_age)) + def test_query_external_table(self): + rows = [ + ('Phred Phlyntstone', 32), + ('Bharney Rhubble', 33), + ('Wylma Phlyntstone', 29), + ('Bhettye Rhubble', 27), + ] + gs_url = self._write_csv_to_storage( + 'bq_external_test' + unique_resource_id(), 'person_ages.csv', + ('Full Name', 'Age'), rows) + dataset_id = _make_dataset_id('query_external_table') + dataset = self.temp_dataset(dataset_id) + table_id = 'flintstones' + full_name = bigquery.SchemaField('full_name', 'STRING', + mode='REQUIRED') + age = bigquery.SchemaField('age', 'INTEGER', mode='REQUIRED') + table_arg = Table(dataset.table(table_id), schema=[full_name, age]) + ec = bigquery.ExternalConfig('CSV') + ec.source_uris = [gs_url] + ec.options.skip_leading_rows = 1 # skip the header row + table_arg.external_data_configuration = ec + table = Config.CLIENT.create_table(table_arg) + self.to_delete.insert(0, table) + + sql = 'SELECT * FROM %s.%s' % (dataset_id, table_id) + + got_rows = Config.CLIENT.query_rows(sql) + + row_tuples = [r.values() for r in got_rows] + by_age = operator.itemgetter(1) + self.assertEqual(sorted(row_tuples, key=by_age), + sorted(rows, key=by_age)) + def test_create_rows_nested_nested(self): # See #2951 SF = bigquery.SchemaField diff --git a/packages/google-cloud-bigquery/tests/unit/test_client.py b/packages/google-cloud-bigquery/tests/unit/test_client.py index 9dfa311e806c..5bbdbc3121fe 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_client.py +++ b/packages/google-cloud-bigquery/tests/unit/test_client.py @@ -559,6 +559,56 @@ def test_create_table_w_schema_and_query(self): self.assertEqual(got.schema, schema) self.assertEqual(got.view_query, query) + def test_create_table_w_external(self): + from google.cloud.bigquery.table import Table + from google.cloud.bigquery.external_config import ExternalConfig + + path = 'projects/%s/datasets/%s/tables' % ( + self.PROJECT, self.DS_ID) + creds = _make_credentials() + client = self._make_one(project=self.PROJECT, credentials=creds) + resource = { + 'id': '%s:%s:%s' % (self.PROJECT, self.DS_ID, self.TABLE_ID), + 'tableReference': { + 'projectId': self.PROJECT, + 'datasetId': self.DS_ID, + 'tableId': self.TABLE_ID + }, + 'externalDataConfiguration': { + 'sourceFormat': 'CSV', + 'autodetect': True, + }, + } + conn = client._connection = _Connection(resource) + table = Table(self.TABLE_REF) + ec = ExternalConfig('CSV') + ec.autodetect = True + table.external_data_configuration = ec + + got = client.create_table(table) + + self.assertEqual(len(conn._requested), 1) + req = conn._requested[0] + self.assertEqual(req['method'], 'POST') + self.assertEqual(req['path'], '/%s' % path) + sent = { + 'tableReference': { + 'projectId': self.PROJECT, + 'datasetId': self.DS_ID, + 'tableId': self.TABLE_ID, + }, + 'externalDataConfiguration': { + 'sourceFormat': 'CSV', + 'autodetect': True, + } + } + self.assertEqual(req['data'], sent) + self.assertEqual(got.table_id, self.TABLE_ID) + self.assertEqual(got.project, self.PROJECT) + self.assertEqual(got.dataset_id, self.DS_ID) + self.assertEqual(got.external_data_configuration.source_format, 'CSV') + self.assertEqual(got.external_data_configuration.autodetect, True) + def test_get_table(self): path = 'projects/%s/datasets/%s/tables/%s' % ( self.PROJECT, self.DS_ID, self.TABLE_ID) diff --git a/packages/google-cloud-bigquery/tests/unit/test_schema.py b/packages/google-cloud-bigquery/tests/unit/test_schema.py index d08e7757063e..84e5d306c348 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_schema.py +++ b/packages/google-cloud-bigquery/tests/unit/test_schema.py @@ -236,3 +236,132 @@ def test___repr__(self): field1 = self._make_one('field1', 'STRING') expected = "SchemaField('field1', 'string', 'NULLABLE', None, ())" self.assertEqual(repr(field1), expected) + + +# TODO: dedup with the same class in test_table.py. +class _SchemaBase(object): + + def _verify_field(self, field, r_field): + self.assertEqual(field.name, r_field['name']) + self.assertEqual(field.field_type, r_field['type']) + self.assertEqual(field.mode, r_field.get('mode', 'NULLABLE')) + + def _verifySchema(self, schema, resource): + r_fields = resource['schema']['fields'] + self.assertEqual(len(schema), len(r_fields)) + + for field, r_field in zip(schema, r_fields): + self._verify_field(field, r_field) + + +class Test_parse_schema_resource(unittest.TestCase, _SchemaBase): + + def _call_fut(self, resource): + from google.cloud.bigquery.schema import _parse_schema_resource + + return _parse_schema_resource(resource) + + def _makeResource(self): + return { + 'schema': {'fields': [ + {'name': 'full_name', 'type': 'STRING', 'mode': 'REQUIRED'}, + {'name': 'age', 'type': 'INTEGER', 'mode': 'REQUIRED'}, + ]}, + } + + def test__parse_schema_resource_defaults(self): + RESOURCE = self._makeResource() + schema = self._call_fut(RESOURCE['schema']) + self._verifySchema(schema, RESOURCE) + + def test__parse_schema_resource_subfields(self): + RESOURCE = self._makeResource() + RESOURCE['schema']['fields'].append( + {'name': 'phone', + 'type': 'RECORD', + 'mode': 'REPEATED', + 'fields': [{'name': 'type', + 'type': 'STRING', + 'mode': 'REQUIRED'}, + {'name': 'number', + 'type': 'STRING', + 'mode': 'REQUIRED'}]}) + schema = self._call_fut(RESOURCE['schema']) + self._verifySchema(schema, RESOURCE) + + def test__parse_schema_resource_fields_without_mode(self): + RESOURCE = self._makeResource() + RESOURCE['schema']['fields'].append( + {'name': 'phone', + 'type': 'STRING'}) + + schema = self._call_fut(RESOURCE['schema']) + self._verifySchema(schema, RESOURCE) + + +class Test_build_schema_resource(unittest.TestCase, _SchemaBase): + + def _call_fut(self, resource): + from google.cloud.bigquery.schema import _build_schema_resource + + return _build_schema_resource(resource) + + def test_defaults(self): + from google.cloud.bigquery.schema import SchemaField + + full_name = SchemaField('full_name', 'STRING', mode='REQUIRED') + age = SchemaField('age', 'INTEGER', mode='REQUIRED') + resource = self._call_fut([full_name, age]) + self.assertEqual(len(resource), 2) + self.assertEqual(resource[0], + {'name': 'full_name', + 'type': 'STRING', + 'mode': 'REQUIRED'}) + self.assertEqual(resource[1], + {'name': 'age', + 'type': 'INTEGER', + 'mode': 'REQUIRED'}) + + def test_w_description(self): + from google.cloud.bigquery.schema import SchemaField + + DESCRIPTION = 'DESCRIPTION' + full_name = SchemaField('full_name', 'STRING', mode='REQUIRED', + description=DESCRIPTION) + age = SchemaField('age', 'INTEGER', mode='REQUIRED') + resource = self._call_fut([full_name, age]) + self.assertEqual(len(resource), 2) + self.assertEqual(resource[0], + {'name': 'full_name', + 'type': 'STRING', + 'mode': 'REQUIRED', + 'description': DESCRIPTION}) + self.assertEqual(resource[1], + {'name': 'age', + 'type': 'INTEGER', + 'mode': 'REQUIRED'}) + + def test_w_subfields(self): + from google.cloud.bigquery.schema import SchemaField + + full_name = SchemaField('full_name', 'STRING', mode='REQUIRED') + ph_type = SchemaField('type', 'STRING', 'REQUIRED') + ph_num = SchemaField('number', 'STRING', 'REQUIRED') + phone = SchemaField('phone', 'RECORD', mode='REPEATED', + fields=[ph_type, ph_num]) + resource = self._call_fut([full_name, phone]) + self.assertEqual(len(resource), 2) + self.assertEqual(resource[0], + {'name': 'full_name', + 'type': 'STRING', + 'mode': 'REQUIRED'}) + self.assertEqual(resource[1], + {'name': 'phone', + 'type': 'RECORD', + 'mode': 'REPEATED', + 'fields': [{'name': 'type', + 'type': 'STRING', + 'mode': 'REQUIRED'}, + {'name': 'number', + 'type': 'STRING', + 'mode': 'REQUIRED'}]}) diff --git a/packages/google-cloud-bigquery/tests/unit/test_table.py b/packages/google-cloud-bigquery/tests/unit/test_table.py index 63dafb8ac5ec..5216b1d8c7ba 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_table.py +++ b/packages/google-cloud-bigquery/tests/unit/test_table.py @@ -200,6 +200,11 @@ def _makeResource(self): 'estimatedRows': str(self.NUM_EST_ROWS), 'estimatedBytes': str(self.NUM_EST_BYTES), 'oldestEntryTime': self.WHEN_TS * 1000}, + 'externalDataConfiguration': { + 'sourceFormat': 'CSV', + 'csvOptions': { + 'allowJaggedRows': True, + 'encoding': 'encoding'}}, } def _verifyReadonlyResourceProperties(self, table, resource): @@ -269,6 +274,11 @@ def _verifyResourceProperties(self, table, resource): else: self.assertEqual(table.schema, []) + if 'externalDataConfiguration' in resource: + edc = table.external_data_configuration + self.assertEqual(edc.source_format, 'CSV') + self.assertEqual(edc.options.allow_jagged_rows, True) + def test_ctor(self): dataset = DatasetReference(self.PROJECT, self.DS_ID) table_ref = dataset.table(self.TABLE_NAME) @@ -517,6 +527,13 @@ def test_view_use_legacy_sql_setter(self): self.assertEqual(table.view_use_legacy_sql, True) self.assertEqual(table.view_query, 'select * from foo') + def test_external_data_configuration_setter_bad_value(self): + dataset = DatasetReference(self.PROJECT, self.DS_ID) + table_ref = dataset.table(self.TABLE_NAME) + table = self._make_one(table_ref) + with self.assertRaises(ValueError): + table.external_data_configuration = 12345 + def test_from_api_repr_missing_identity(self): self._setUpConstants() RESOURCE = {} @@ -720,116 +737,3 @@ def test__row_from_mapping_w_schema(self): self.assertEqual( self._call_fut(MAPPING, table.schema), ('Phred Phlyntstone', 32, ['red', 'green'], None)) - - -class Test_parse_schema_resource(unittest.TestCase, _SchemaBase): - - def _call_fut(self, resource): - from google.cloud.bigquery.table import _parse_schema_resource - - return _parse_schema_resource(resource) - - def _makeResource(self): - return { - 'schema': {'fields': [ - {'name': 'full_name', 'type': 'STRING', 'mode': 'REQUIRED'}, - {'name': 'age', 'type': 'INTEGER', 'mode': 'REQUIRED'}, - ]}, - } - - def test__parse_schema_resource_defaults(self): - RESOURCE = self._makeResource() - schema = self._call_fut(RESOURCE['schema']) - self._verifySchema(schema, RESOURCE) - - def test__parse_schema_resource_subfields(self): - RESOURCE = self._makeResource() - RESOURCE['schema']['fields'].append( - {'name': 'phone', - 'type': 'RECORD', - 'mode': 'REPEATED', - 'fields': [{'name': 'type', - 'type': 'STRING', - 'mode': 'REQUIRED'}, - {'name': 'number', - 'type': 'STRING', - 'mode': 'REQUIRED'}]}) - schema = self._call_fut(RESOURCE['schema']) - self._verifySchema(schema, RESOURCE) - - def test__parse_schema_resource_fields_without_mode(self): - RESOURCE = self._makeResource() - RESOURCE['schema']['fields'].append( - {'name': 'phone', - 'type': 'STRING'}) - - schema = self._call_fut(RESOURCE['schema']) - self._verifySchema(schema, RESOURCE) - - -class Test_build_schema_resource(unittest.TestCase, _SchemaBase): - - def _call_fut(self, resource): - from google.cloud.bigquery.table import _build_schema_resource - - return _build_schema_resource(resource) - - def test_defaults(self): - from google.cloud.bigquery.table import SchemaField - - full_name = SchemaField('full_name', 'STRING', mode='REQUIRED') - age = SchemaField('age', 'INTEGER', mode='REQUIRED') - resource = self._call_fut([full_name, age]) - self.assertEqual(len(resource), 2) - self.assertEqual(resource[0], - {'name': 'full_name', - 'type': 'STRING', - 'mode': 'REQUIRED'}) - self.assertEqual(resource[1], - {'name': 'age', - 'type': 'INTEGER', - 'mode': 'REQUIRED'}) - - def test_w_description(self): - from google.cloud.bigquery.table import SchemaField - - DESCRIPTION = 'DESCRIPTION' - full_name = SchemaField('full_name', 'STRING', mode='REQUIRED', - description=DESCRIPTION) - age = SchemaField('age', 'INTEGER', mode='REQUIRED') - resource = self._call_fut([full_name, age]) - self.assertEqual(len(resource), 2) - self.assertEqual(resource[0], - {'name': 'full_name', - 'type': 'STRING', - 'mode': 'REQUIRED', - 'description': DESCRIPTION}) - self.assertEqual(resource[1], - {'name': 'age', - 'type': 'INTEGER', - 'mode': 'REQUIRED'}) - - def test_w_subfields(self): - from google.cloud.bigquery.table import SchemaField - - full_name = SchemaField('full_name', 'STRING', mode='REQUIRED') - ph_type = SchemaField('type', 'STRING', 'REQUIRED') - ph_num = SchemaField('number', 'STRING', 'REQUIRED') - phone = SchemaField('phone', 'RECORD', mode='REPEATED', - fields=[ph_type, ph_num]) - resource = self._call_fut([full_name, phone]) - self.assertEqual(len(resource), 2) - self.assertEqual(resource[0], - {'name': 'full_name', - 'type': 'STRING', - 'mode': 'REQUIRED'}) - self.assertEqual(resource[1], - {'name': 'phone', - 'type': 'RECORD', - 'mode': 'REPEATED', - 'fields': [{'name': 'type', - 'type': 'STRING', - 'mode': 'REQUIRED'}, - {'name': 'number', - 'type': 'STRING', - 'mode': 'REQUIRED'}]}) From 8a97a1adb626ac0d9d23d1e5c79f0f1d3642d5cd Mon Sep 17 00:00:00 2001 From: Jonathan Amsterdam Date: Mon, 16 Oct 2017 16:28:29 -0400 Subject: [PATCH 0251/2016] bigquery: support update of dataset access entries (#4197) --- .../google/cloud/bigquery/client.py | 15 +++++++++++---- .../tests/unit/test_client.py | 13 +++++++++++-- 2 files changed, 22 insertions(+), 6 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py index da3b8dcaaea5..e3509deb388e 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py @@ -324,10 +324,17 @@ def update_dataset(self, dataset, fields, retry=DEFAULT_RETRY): for f in fields: if not hasattr(dataset, f): raise ValueError('No Dataset field %s' % f) - # snake case to camel case - words = f.split('_') - api_field = words[0] + ''.join(map(str.capitalize, words[1:])) - partial[api_field] = getattr(dataset, f) + # All dataset attributes are trivially convertible to JSON except + # for access entries. + if f == 'access_entries': + attr = dataset._build_access_resource() + api_field = 'access' + else: + attr = getattr(dataset, f) + # snake case to camel case + words = f.split('_') + api_field = words[0] + ''.join(map(str.capitalize, words[1:])) + partial[api_field] = attr if dataset.etag is not None: headers = {'If-Match': dataset.etag} else: diff --git a/packages/google-cloud-bigquery/tests/unit/test_client.py b/packages/google-cloud-bigquery/tests/unit/test_client.py index 5bbdbc3121fe..aca778ce3a92 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_client.py +++ b/packages/google-cloud-bigquery/tests/unit/test_client.py @@ -642,13 +642,16 @@ def test_update_dataset_w_invalid_field(self): client.update_dataset(Dataset(client.dataset(self.DS_ID)), ["foo"]) def test_update_dataset(self): - from google.cloud.bigquery.dataset import Dataset + from google.cloud.bigquery.dataset import Dataset, AccessEntry PATH = 'projects/%s/datasets/%s' % (self.PROJECT, self.DS_ID) DESCRIPTION = 'DESCRIPTION' FRIENDLY_NAME = 'TITLE' LOCATION = 'loc' LABELS = {'priority': 'high'} + ACCESS = [ + {'role': 'OWNER', 'userByEmail': 'phred@example.com'}, + ] EXP = 17 RESOURCE = { 'datasetReference': @@ -659,6 +662,7 @@ def test_update_dataset(self): 'location': LOCATION, 'defaultTableExpirationMs': EXP, 'labels': LABELS, + 'access': ACCESS, } creds = _make_credentials() client = self._make_one(project=self.PROJECT, credentials=creds) @@ -669,8 +673,11 @@ def test_update_dataset(self): ds.location = LOCATION ds.default_table_expiration_ms = EXP ds.labels = LABELS + ds.access_entries = [ + AccessEntry('OWNER', 'userByEmail', 'phred@example.com')] ds2 = client.update_dataset( - ds, ['description', 'friendly_name', 'location', 'labels']) + ds, ['description', 'friendly_name', 'location', 'labels', + 'access_entries']) self.assertEqual(len(conn._requested), 1) req = conn._requested[0] self.assertEqual(req['method'], 'PATCH') @@ -679,6 +686,7 @@ def test_update_dataset(self): 'friendlyName': FRIENDLY_NAME, 'location': LOCATION, 'labels': LABELS, + 'access': ACCESS, } self.assertEqual(req['data'], SENT) self.assertEqual(req['path'], '/' + PATH) @@ -687,6 +695,7 @@ def test_update_dataset(self): self.assertEqual(ds2.friendly_name, ds.friendly_name) self.assertEqual(ds2.location, ds.location) self.assertEqual(ds2.labels, ds.labels) + self.assertEqual(ds2.access_entries, ds.access_entries) # ETag becomes If-Match header. ds._properties['etag'] = 'etag' From 205b7c2c68e629f00e6d67390d1514b51430b7e8 Mon Sep 17 00:00:00 2001 From: Jonathan Amsterdam Date: Mon, 16 Oct 2017 17:54:35 -0400 Subject: [PATCH 0252/2016] biqquery: factor out common values in system.py (#4194) --- .../google-cloud-bigquery/tests/system.py | 125 +++++------------- 1 file changed, 35 insertions(+), 90 deletions(-) diff --git a/packages/google-cloud-bigquery/tests/system.py b/packages/google-cloud-bigquery/tests/system.py index d745be032b2c..390c88309c1c 100644 --- a/packages/google-cloud-bigquery/tests/system.py +++ b/packages/google-cloud-bigquery/tests/system.py @@ -41,6 +41,19 @@ JOB_TIMEOUT = 120 # 2 minutes WHERE = os.path.abspath(os.path.dirname(__file__)) +# Common table data used for many tests. +ROWS = [ + ('Phred Phlyntstone', 32), + ('Bharney Rhubble', 33), + ('Wylma Phlyntstone', 29), + ('Bhettye Rhubble', 27), +] +HEADER_ROW = ('Full Name', 'Age') +SCHEMA = [ + bigquery.SchemaField('full_name', 'STRING', mode='REQUIRED'), + bigquery.SchemaField('age', 'INTEGER', mode='REQUIRED'), +] + def _has_rows(result): return len(result) > 0 @@ -188,10 +201,7 @@ def test_list_datasets(self): def test_create_table(self): dataset = self.temp_dataset(_make_dataset_id('create_table')) table_id = 'test_table' - full_name = bigquery.SchemaField('full_name', 'STRING', - mode='REQUIRED') - age = bigquery.SchemaField('age', 'INTEGER', mode='REQUIRED') - table_arg = Table(dataset.table(table_id), schema=[full_name, age]) + table_arg = Table(dataset.table(table_id), schema=SCHEMA) self.assertFalse(_table_exists(table_arg)) table = retry_403(Config.CLIENT.create_table)(table_arg) @@ -230,11 +240,8 @@ def test_list_dataset_tables(self): 'newer' + unique_resource_id(), 'newest' + unique_resource_id(), ] - full_name = bigquery.SchemaField('full_name', 'STRING', - mode='REQUIRED') - age = bigquery.SchemaField('age', 'INTEGER', mode='REQUIRED') for table_name in tables_to_create: - table = Table(dataset.table(table_name), schema=[full_name, age]) + table = Table(dataset.table(table_name), schema=SCHEMA) created_table = retry_403(Config.CLIENT.create_table)(table) self.to_delete.insert(0, created_table) @@ -251,11 +258,7 @@ def test_update_table(self): dataset = self.temp_dataset(_make_dataset_id('update_table')) TABLE_NAME = 'test_table' - schema = [ - bigquery.SchemaField('full_name', 'STRING', mode='REQUIRED'), - bigquery.SchemaField('age', 'INTEGER', mode='REQUIRED') - ] - table_arg = Table(dataset.table(TABLE_NAME), schema=schema) + table_arg = Table(dataset.table(TABLE_NAME), schema=SCHEMA) self.assertFalse(_table_exists(table_arg)) table = retry_403(Config.CLIENT.create_table)(table_arg) self.to_delete.insert(0, table) @@ -285,11 +288,7 @@ def test_update_table_schema(self): dataset = self.temp_dataset(_make_dataset_id('update_table')) TABLE_NAME = 'test_table' - schema = [ - bigquery.SchemaField('full_name', 'STRING', mode='REQUIRED'), - bigquery.SchemaField('age', 'INTEGER', mode='REQUIRED') - ] - table_arg = Table(dataset.table(TABLE_NAME), schema=schema) + table_arg = Table(dataset.table(TABLE_NAME), schema=SCHEMA) self.assertFalse(_table_exists(table_arg)) table = retry_403(Config.CLIENT.create_table)(table_arg) self.to_delete.insert(0, table) @@ -357,27 +356,18 @@ def test_create_rows_then_dump_table(self): def test_load_table_from_local_file_then_dump_table(self): from google.cloud._testing import _NamedTemporaryFile - ROWS = [ - ('Phred Phlyntstone', 32), - ('Bharney Rhubble', 33), - ('Wylma Phlyntstone', 29), - ('Bhettye Rhubble', 27), - ] TABLE_NAME = 'test_table' dataset = self.temp_dataset(_make_dataset_id('load_local_then_dump')) - full_name = bigquery.SchemaField('full_name', 'STRING', - mode='REQUIRED') - age = bigquery.SchemaField('age', 'INTEGER', mode='REQUIRED') table_ref = dataset.table(TABLE_NAME) - table_arg = Table(table_ref, schema=[full_name, age]) + table_arg = Table(table_ref, schema=SCHEMA) table = retry_403(Config.CLIENT.create_table)(table_arg) self.to_delete.insert(0, table) with _NamedTemporaryFile() as temp: with open(temp.name, 'w') as csv_write: writer = csv.writer(csv_write) - writer.writerow(('Full Name', 'Age')) + writer.writerow(HEADER_ROW) writer.writerows(ROWS) with open(temp.name, 'rb') as csv_read: @@ -437,22 +427,13 @@ def test_load_table_from_local_avro_file_then_dump_table(self): def test_load_table_from_storage_then_dump_table(self): TABLE_ID = 'test_table' - ROWS = [ - ('Phred Phlyntstone', 32), - ('Bharney Rhubble', 33), - ('Wylma Phlyntstone', 29), - ('Bhettye Rhubble', 27), - ] GS_URL = self._write_csv_to_storage( 'bq_load_test' + unique_resource_id(), 'person_ages.csv', - ('Full Name', 'Age'), ROWS) + HEADER_ROW, ROWS) dataset = self.temp_dataset(_make_dataset_id('load_gcs_then_dump')) - full_name = bigquery.SchemaField('full_name', 'STRING', - mode='REQUIRED') - age = bigquery.SchemaField('age', 'INTEGER', mode='REQUIRED') - table_arg = Table(dataset.table(TABLE_ID), schema=[full_name, age]) + table_arg = Table(dataset.table(TABLE_ID), schema=SCHEMA) table = retry_403(Config.CLIENT.create_table)(table_arg) self.to_delete.insert(0, table) @@ -479,15 +460,12 @@ def test_load_table_from_storage_then_dump_table(self): def test_load_table_from_storage_w_autodetect_schema(self): from google.cloud.bigquery import SchemaField - rows = [ - ('Phred Phlyntstone', 32), - ('Bharney Rhubble', 33), - ('Wylma Phlyntstone', 29), - ('Bhettye Rhubble', 27), - ] * 100 # BigQuery internally uses the first 100 rows to detect schema + rows = ROWS * 100 + # BigQuery internally uses the first 100 rows to detect schema + gs_url = self._write_csv_to_storage( 'bq_load_test' + unique_resource_id(), 'person_ages.csv', - ('Full Name', 'Age'), rows) + HEADER_ROW, rows) dataset = self.temp_dataset(_make_dataset_id('load_gcs_then_dump')) table_ref = dataset.table('test_table') @@ -557,7 +535,7 @@ def _load_table_for_extract_table( with _NamedTemporaryFile() as temp: with open(temp.name, 'w') as csv_write: writer = csv.writer(csv_write) - writer.writerow(('Full Name', 'Age')) + writer.writerow(HEADER_ROW) writer.writerows(rows) with open(temp.name, 'rb') as csv_read: @@ -589,14 +567,8 @@ def test_extract_table(self): table_ref = Config.CLIENT.dataset(dataset_id).table(table_id) table = Table(table_ref) self.to_delete.insert(0, table) - rows = [ - ('Phred Phlyntstone', 32), - ('Bharney Rhubble', 33), - ('Wylma Phlyntstone', 29), - ('Bhettye Rhubble', 27), - ] self._load_table_for_extract_table( - storage_client, rows, bucket_name, blob_name, table_ref) + storage_client, ROWS, bucket_name, blob_name, table_ref) bucket = storage_client.bucket(bucket_name) destination_blob_name = 'person_ages_out.csv' destination = bucket.blob(destination_blob_name) @@ -621,14 +593,8 @@ def test_extract_table_w_job_config(self): table_ref = Config.CLIENT.dataset(dataset_id).table(table_id) table = Table(table_ref) self.to_delete.insert(0, table) - rows = [ - ('Phred Phlyntstone', 32), - ('Bharney Rhubble', 33), - ('Wylma Phlyntstone', 29), - ('Bhettye Rhubble', 27), - ] self._load_table_for_extract_table( - storage_client, rows, bucket_name, blob_name, table_ref) + storage_client, ROWS, bucket_name, blob_name, table_ref) bucket = storage_client.bucket(bucket_name) destination_blob_name = 'person_ages_out.csv' destination = bucket.blob(destination_blob_name) @@ -673,10 +639,7 @@ def test_job_cancel(self): dataset = self.temp_dataset(DATASET_ID) - full_name = bigquery.SchemaField('full_name', 'STRING', - mode='REQUIRED') - age = bigquery.SchemaField('age', 'INTEGER', mode='REQUIRED') - table_arg = Table(dataset.table(TABLE_NAME), schema=[full_name, age]) + table_arg = Table(dataset.table(TABLE_NAME), schema=SCHEMA) table = retry_403(Config.CLIENT.create_table)(table_arg) self.to_delete.insert(0, table) @@ -1250,24 +1213,15 @@ def test_query_future(self): self.assertEqual(row_tuples, [(1,)]) def test_query_table_def(self): - rows = [ - ('Phred Phlyntstone', 32), - ('Bharney Rhubble', 33), - ('Wylma Phlyntstone', 29), - ('Bhettye Rhubble', 27), - ] gs_url = self._write_csv_to_storage( 'bq_external_test' + unique_resource_id(), 'person_ages.csv', - ('Full Name', 'Age'), rows) + HEADER_ROW, ROWS) job_config = bigquery.QueryJobConfig() table_id = 'flintstones' ec = bigquery.ExternalConfig('CSV') ec.source_uris = [gs_url] - ec.schema = [ - bigquery.SchemaField('full_name', 'STRING', mode='REQUIRED'), - bigquery.SchemaField('age', 'INTEGER', mode='REQUIRED'), - ] + ec.schema = SCHEMA ec.options.skip_leading_rows = 1 # skip the header row job_config.table_definitions = {table_id: ec} sql = 'SELECT * FROM %s' % table_id @@ -1277,25 +1231,16 @@ def test_query_table_def(self): row_tuples = [r.values() for r in got_rows] by_age = operator.itemgetter(1) self.assertEqual(sorted(row_tuples, key=by_age), - sorted(rows, key=by_age)) + sorted(ROWS, key=by_age)) def test_query_external_table(self): - rows = [ - ('Phred Phlyntstone', 32), - ('Bharney Rhubble', 33), - ('Wylma Phlyntstone', 29), - ('Bhettye Rhubble', 27), - ] gs_url = self._write_csv_to_storage( 'bq_external_test' + unique_resource_id(), 'person_ages.csv', - ('Full Name', 'Age'), rows) + HEADER_ROW, ROWS) dataset_id = _make_dataset_id('query_external_table') dataset = self.temp_dataset(dataset_id) table_id = 'flintstones' - full_name = bigquery.SchemaField('full_name', 'STRING', - mode='REQUIRED') - age = bigquery.SchemaField('age', 'INTEGER', mode='REQUIRED') - table_arg = Table(dataset.table(table_id), schema=[full_name, age]) + table_arg = Table(dataset.table(table_id), schema=SCHEMA) ec = bigquery.ExternalConfig('CSV') ec.source_uris = [gs_url] ec.options.skip_leading_rows = 1 # skip the header row @@ -1310,7 +1255,7 @@ def test_query_external_table(self): row_tuples = [r.values() for r in got_rows] by_age = operator.itemgetter(1) self.assertEqual(sorted(row_tuples, key=by_age), - sorted(rows, key=by_age)) + sorted(ROWS, key=by_age)) def test_create_rows_nested_nested(self): # See #2951 From 0b8f5a1b3f5d449f1bf40f2a2f12b282eb86cd78 Mon Sep 17 00:00:00 2001 From: Alix Hamilton Date: Mon, 16 Oct 2017 14:55:41 -0700 Subject: [PATCH 0253/2016] BigQuery: option for job ID generation with user-supplied prefix (#4198) adds job_id_prefix to _make_job_id() --- .../google/cloud/bigquery/client.py | 62 ++++++++++++++----- .../google-cloud-bigquery/tests/system.py | 9 ++- .../tests/unit/test_client.py | 29 +++++++++ 3 files changed, 80 insertions(+), 20 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py index e3509deb388e..ce318ed91dc9 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py @@ -599,8 +599,8 @@ def list_jobs(self, max_results=None, page_token=None, all_users=None, extra_params=extra_params) def load_table_from_storage(self, source_uris, destination, - job_id=None, job_config=None, - retry=DEFAULT_RETRY): + job_id=None, job_id_prefix=None, + job_config=None, retry=DEFAULT_RETRY): """Starts a job for loading data into a table from CloudStorage. See @@ -618,6 +618,11 @@ def load_table_from_storage(self, source_uris, destination, :type job_id: str :param job_id: (Optional) Name of the job. + :type job_id_prefix: str or ``NoneType`` + :param job_id_prefix: (Optional) the user-provided prefix for a + randomly generated job ID. This parameter will be + ignored if a ``job_id`` is also given. + :type job_config: :class:`google.cloud.bigquery.job.LoadJobConfig` :param job_config: (Optional) Extra configuration options for the job. @@ -627,7 +632,7 @@ def load_table_from_storage(self, source_uris, destination, :rtype: :class:`google.cloud.bigquery.job.LoadJob` :returns: a new ``LoadJob`` instance """ - job_id = _make_job_id(job_id) + job_id = _make_job_id(job_id, job_id_prefix) if isinstance(source_uris, six.string_types): source_uris = [source_uris] job = LoadJob(job_id, source_uris, destination, self, job_config) @@ -638,7 +643,7 @@ def load_table_from_file(self, file_obj, destination, rewind=False, size=None, num_retries=_DEFAULT_NUM_RETRIES, - job_id=None, job_config=None): + job_id=None, job_id_prefix=None, job_config=None): """Upload the contents of this table from a file-like object. Like load_table_from_storage, this creates, starts and returns @@ -665,6 +670,11 @@ def load_table_from_file(self, file_obj, destination, :type job_id: str :param job_id: (Optional) Name of the job. + :type job_id_prefix: str or ``NoneType`` + :param job_id_prefix: (Optional) the user-provided prefix for a + randomly generated job ID. This parameter will be + ignored if a ``job_id`` is also given. + :type job_config: :class:`google.cloud.bigquery.job.LoadJobConfig` :param job_config: (Optional) Extra configuration options for the job. @@ -677,7 +687,7 @@ def load_table_from_file(self, file_obj, destination, be determined, or if the ``file_obj`` can be detected to be a file opened in text mode. """ - job_id = _make_job_id(job_id) + job_id = _make_job_id(job_id, job_id_prefix) job = LoadJob(job_id, None, destination, self, job_config) job_resource = job._build_resource() if rewind: @@ -801,8 +811,8 @@ def _do_multipart_upload(self, stream, metadata, size, num_retries): return response - def copy_table(self, sources, destination, job_id=None, job_config=None, - retry=DEFAULT_RETRY): + def copy_table(self, sources, destination, job_id=None, job_id_prefix=None, + job_config=None, retry=DEFAULT_RETRY): """Start a job for copying one or more tables into another table. See @@ -821,6 +831,11 @@ def copy_table(self, sources, destination, job_id=None, job_config=None, :type job_id: str :param job_id: (Optional) The ID of the job. + :type job_id_prefix: str or ``NoneType`` + :param job_id_prefix: (Optional) the user-provided prefix for a + randomly generated job ID. This parameter will be + ignored if a ``job_id`` is also given. + :type job_config: :class:`google.cloud.bigquery.job.CopyJobConfig` :param job_config: (Optional) Extra configuration options for the job. @@ -830,7 +845,7 @@ def copy_table(self, sources, destination, job_id=None, job_config=None, :rtype: :class:`google.cloud.bigquery.job.CopyJob` :returns: a new ``CopyJob`` instance """ - job_id = _make_job_id(job_id) + job_id = _make_job_id(job_id, job_id_prefix) if not isinstance(sources, collections.Sequence): sources = [sources] @@ -841,7 +856,7 @@ def copy_table(self, sources, destination, job_id=None, job_config=None, def extract_table( self, source, destination_uris, job_config=None, job_id=None, - retry=DEFAULT_RETRY): + job_id_prefix=None, retry=DEFAULT_RETRY): """Start a job to extract a table into Cloud Storage files. See @@ -863,6 +878,11 @@ def extract_table( :type job_id: str :param job_id: (Optional) The ID of the job. + :type job_id_prefix: str or ``NoneType`` + :param job_id_prefix: (Optional) the user-provided prefix for a + randomly generated job ID. This parameter will be + ignored if a ``job_id`` is also given. + :type job_config: :class:`google.cloud.bigquery.job.ExtractJobConfig` :param job_config: (Optional) Extra configuration options for the job. @@ -872,7 +892,7 @@ def extract_table( :rtype: :class:`google.cloud.bigquery.job.ExtractJob` :returns: a new ``ExtractJob`` instance """ - job_id = _make_job_id(job_id) + job_id = _make_job_id(job_id, job_id_prefix) if isinstance(destination_uris, six.string_types): destination_uris = [destination_uris] @@ -883,7 +903,8 @@ def extract_table( job.begin(retry=retry) return job - def query(self, query, job_config=None, job_id=None, retry=DEFAULT_RETRY): + def query(self, query, job_config=None, job_id=None, job_id_prefix=None, + retry=DEFAULT_RETRY): """Start a job that runs a SQL query. See @@ -900,13 +921,18 @@ def query(self, query, job_config=None, job_id=None, retry=DEFAULT_RETRY): :type job_id: str :param job_id: (Optional) ID to use for the query job. + :type job_id_prefix: str or ``NoneType`` + :param job_id_prefix: (Optional) the user-provided prefix for a + randomly generated job ID. This parameter will be + ignored if a ``job_id`` is also given. + :type retry: :class:`google.api.core.retry.Retry` :param retry: (Optional) How to retry the RPC. :rtype: :class:`google.cloud.bigquery.job.QueryJob` :returns: a new ``QueryJob`` instance """ - job_id = _make_job_id(job_id) + job_id = _make_job_id(job_id, job_id_prefix) job = QueryJob(job_id, query, client=self, job_config=job_config) job.begin(retry=retry) return job @@ -1269,18 +1295,24 @@ def _item_to_table(iterator, resource): return Table.from_api_repr(resource) -def _make_job_id(job_id): +def _make_job_id(job_id, prefix=None): """Construct an ID for a new job. :type job_id: str or ``NoneType`` :param job_id: the user-provided job ID + :type prefix: str or ``NoneType`` + :param prefix: (Optional) the user-provided prefix for a job ID + :rtype: str :returns: A job ID """ - if job_id is None: + if job_id is not None: + return job_id + elif prefix is not None: + return str(prefix) + str(uuid.uuid4()) + else: return str(uuid.uuid4()) - return job_id def _check_mode(stream): diff --git a/packages/google-cloud-bigquery/tests/system.py b/packages/google-cloud-bigquery/tests/system.py index 390c88309c1c..c434efcf70ea 100644 --- a/packages/google-cloud-bigquery/tests/system.py +++ b/packages/google-cloud-bigquery/tests/system.py @@ -633,7 +633,7 @@ def test_copy_table(self): def test_job_cancel(self): DATASET_ID = _make_dataset_id('job_cancel') - JOB_ID = 'fetch_' + DATASET_ID + str(uuid.uuid4()) + JOB_ID_PREFIX = 'fetch_' + DATASET_ID TABLE_NAME = 'test_table' QUERY = 'SELECT * FROM %s.%s' % (DATASET_ID, TABLE_NAME) @@ -643,7 +643,7 @@ def test_job_cancel(self): table = retry_403(Config.CLIENT.create_table)(table_arg) self.to_delete.insert(0, table) - job = Config.CLIENT.query(QUERY, job_id=JOB_ID) + job = Config.CLIENT.query(QUERY, job_id_prefix=JOB_ID_PREFIX) job.cancel() retry = RetryInstanceState(_job_done, max_tries=8) @@ -866,7 +866,7 @@ def test_query_w_dml(self): query_job = Config.CLIENT.query( query_template.format(dataset_name, table_name), - job_id='test_query_w_dml_{}'.format(str(uuid.uuid4()))) + job_id_prefix='test_query_w_dml_') query_job.result() self.assertEqual(query_job.num_dml_affected_rows, 1) @@ -1053,8 +1053,7 @@ def test_query_w_query_params(self): query_job = Config.CLIENT.query( example['sql'], job_config=jconfig, - job_id='test_query_w_query_params{}'.format( - str(uuid.uuid4()))) + job_id_prefix='test_query_w_query_params') rows = list(query_job.result()) self.assertEqual(len(rows), 1) self.assertEqual(len(rows[0]), 1) diff --git a/packages/google-cloud-bigquery/tests/unit/test_client.py b/packages/google-cloud-bigquery/tests/unit/test_client.py index aca778ce3a92..ed62a27f0d76 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_client.py +++ b/packages/google-cloud-bigquery/tests/unit/test_client.py @@ -2859,6 +2859,35 @@ def test_list_partitions(self): [20160804, 20160805]) +class Test_make_job_id(unittest.TestCase): + def _call_fut(self, job_id, prefix=None): + from google.cloud.bigquery.client import _make_job_id + + return _make_job_id(job_id, prefix=prefix) + + def test__make_job_id_wo_suffix(self): + job_id = self._call_fut('job_id') + + self.assertEqual(job_id, 'job_id') + + def test__make_job_id_w_suffix(self): + with mock.patch('uuid.uuid4', side_effect=['212345']): + job_id = self._call_fut(None, prefix='job_id') + + self.assertEqual(job_id, 'job_id212345') + + def test__make_random_job_id(self): + with mock.patch('uuid.uuid4', side_effect=['212345']): + job_id = self._call_fut(None) + + self.assertEqual(job_id, '212345') + + def test__make_job_id_w_job_id_overrides_prefix(self): + job_id = self._call_fut('job_id', prefix='unused_prefix') + + self.assertEqual(job_id, 'job_id') + + class TestClientUpload(object): # NOTE: This is a "partner" to `TestClient` meant to test some of the # "load_table_from_file" portions of `Client`. It also uses From 66fc4d59b84ee5da4abad0bc24943b8a52180dd8 Mon Sep 17 00:00:00 2001 From: Tres Seaver Date: Fri, 18 Aug 2017 12:38:13 -0400 Subject: [PATCH 0254/2016] Rename job classes (#3797) * Rename class: 'jobs.LoadTableFromStorageJob' -> 'jobs.LoadJob'. * Rename class: 'jobs.ExtractTableToStorageJob' -> 'jobs.ExtractJob'. --- .../google/cloud/bigquery/client.py | 26 +++++++++---------- .../google/cloud/bigquery/job.py | 20 +++++++------- .../google/cloud/bigquery/table.py | 2 +- .../tests/unit/test_client.py | 20 +++++++------- .../tests/unit/test_job.py | 24 ++++++++--------- 5 files changed, 46 insertions(+), 46 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py index 2ed2c15e13a9..5578064c1cc7 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py @@ -19,8 +19,8 @@ from google.cloud.bigquery._http import Connection from google.cloud.bigquery.dataset import Dataset from google.cloud.bigquery.job import CopyJob -from google.cloud.bigquery.job import ExtractTableToStorageJob -from google.cloud.bigquery.job import LoadTableFromStorageJob +from google.cloud.bigquery.job import ExtractJob +from google.cloud.bigquery.job import LoadJob from google.cloud.bigquery.job import QueryJob from google.cloud.bigquery.query import QueryResults @@ -204,20 +204,20 @@ def job_from_resource(self, resource): :param resource: one job resource from API response :rtype: One of: - :class:`google.cloud.bigquery.job.LoadTableFromStorageJob`, + :class:`google.cloud.bigquery.job.LoadJob`, :class:`google.cloud.bigquery.job.CopyJob`, - :class:`google.cloud.bigquery.job.ExtractTableToStorageJob`, + :class:`google.cloud.bigquery.job.ExtractJob`, :class:`google.cloud.bigquery.job.QueryJob`, :class:`google.cloud.bigquery.job.RunSyncQueryJob` :returns: the job instance, constructed via the resource """ config = resource['configuration'] if 'load' in config: - return LoadTableFromStorageJob.from_api_repr(resource, self) + return LoadJob.from_api_repr(resource, self) elif 'copy' in config: return CopyJob.from_api_repr(resource, self) elif 'extract' in config: - return ExtractTableToStorageJob.from_api_repr(resource, self) + return ExtractJob.from_api_repr(resource, self) elif 'query' in config: return QueryJob.from_api_repr(resource, self) raise ValueError('Cannot parse job resource') @@ -288,11 +288,10 @@ def load_table_from_storage(self, job_name, destination, *source_uris): :param source_uris: URIs of data files to be loaded; in format ``gs:///``. - :rtype: :class:`google.cloud.bigquery.job.LoadTableFromStorageJob` - :returns: a new ``LoadTableFromStorageJob`` instance + :rtype: :class:`google.cloud.bigquery.job.LoadJob` + :returns: a new ``LoadJob`` instance """ - return LoadTableFromStorageJob(job_name, destination, source_uris, - client=self) + return LoadJob(job_name, destination, source_uris, client=self) def copy_table(self, job_name, destination, *sources): """Construct a job for copying one or more tables into another table. @@ -331,11 +330,10 @@ def extract_table_to_storage(self, job_name, source, *destination_uris): table data is to be extracted; in format ``gs:///``. - :rtype: :class:`google.cloud.bigquery.job.ExtractTableToStorageJob` - :returns: a new ``ExtractTableToStorageJob`` instance + :rtype: :class:`google.cloud.bigquery.job.ExtractJob` + :returns: a new ``ExtractJob`` instance """ - return ExtractTableToStorageJob(job_name, source, destination_uris, - client=self) + return ExtractJob(job_name, source, destination_uris, client=self) def run_async_query(self, job_name, query, udf_resources=(), query_parameters=()): diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/job.py b/packages/google-cloud-bigquery/google/cloud/bigquery/job.py index 3eada05d2ea0..5d37bef36160 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/job.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/job.py @@ -525,8 +525,8 @@ class _LoadConfiguration(object): _write_disposition = None -class LoadTableFromStorageJob(_AsyncJob): - """Asynchronous job for loading data into a table from CloudStorage. +class LoadJob(_AsyncJob): + """Asynchronous job for loading data into a table from remote URI. :type name: str :param name: the name of the job @@ -535,8 +535,10 @@ class LoadTableFromStorageJob(_AsyncJob): :param destination: Table into which data is to be loaded. :type source_uris: sequence of string - :param source_uris: URIs of one or more data files to be loaded, in - format ``gs:///``. + :param source_uris: + URIs of one or more data files to be loaded. See + https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.load.sourceUris + for supported URI formats. :type client: :class:`google.cloud.bigquery.client.Client` :param client: A client which holds credentials and project configuration @@ -550,7 +552,7 @@ class LoadTableFromStorageJob(_AsyncJob): _JOB_TYPE = 'load' def __init__(self, name, destination, source_uris, client, schema=()): - super(LoadTableFromStorageJob, self).__init__(name, client) + super(LoadJob, self).__init__(name, client) self.destination = destination self.source_uris = source_uris self._configuration = _LoadConfiguration() @@ -775,7 +777,7 @@ def from_api_repr(cls, resource, client): :param client: Client which holds credentials and project configuration for the dataset. - :rtype: :class:`google.cloud.bigquery.job.LoadTableFromStorageJob` + :rtype: :class:`google.cloud.bigquery.job.LoadJob` :returns: Job parsed from ``resource``. """ name, config = cls._get_resource_config(resource) @@ -919,7 +921,7 @@ class _ExtractConfiguration(object): _print_header = None -class ExtractTableToStorageJob(_AsyncJob): +class ExtractJob(_AsyncJob): """Asynchronous job: extract data from a table into Cloud Storage. :type name: str @@ -940,7 +942,7 @@ class ExtractTableToStorageJob(_AsyncJob): _JOB_TYPE = 'extract' def __init__(self, name, source, destination_uris, client): - super(ExtractTableToStorageJob, self).__init__(name, client) + super(ExtractJob, self).__init__(name, client) self.source = source self.destination_uris = destination_uris self._configuration = _ExtractConfiguration() @@ -1018,7 +1020,7 @@ def from_api_repr(cls, resource, client): :param client: Client which holds credentials and project configuration for the dataset. - :rtype: :class:`google.cloud.bigquery.job.ExtractTableToStorageJob` + :rtype: :class:`google.cloud.bigquery.job.ExtractJob` :returns: Job parsed from ``resource``. """ name, config = cls._get_resource_config(resource) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py index 03e557248637..2121730d3f79 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py @@ -1132,7 +1132,7 @@ def upload_from_file(self, :type null_marker: str :param null_marker: Optional. A custom null marker (example: "\\N") - :rtype: :class:`~google.cloud.bigquery.jobs.LoadTableFromStorageJob` + :rtype: :class:`~google.cloud.bigquery.jobs.LoadJob` :returns: the job instance used to load the data (e.g., for querying status). Note that the job is already started: diff --git a/packages/google-cloud-bigquery/tests/unit/test_client.py b/packages/google-cloud-bigquery/tests/unit/test_client.py index dc998926d434..6a9a98f2952e 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_client.py +++ b/packages/google-cloud-bigquery/tests/unit/test_client.py @@ -268,9 +268,9 @@ def test_job_from_resource_unknown_type(self): def test_list_jobs_defaults(self): import six - from google.cloud.bigquery.job import LoadTableFromStorageJob + from google.cloud.bigquery.job import LoadJob from google.cloud.bigquery.job import CopyJob - from google.cloud.bigquery.job import ExtractTableToStorageJob + from google.cloud.bigquery.job import ExtractJob from google.cloud.bigquery.job import QueryJob PROJECT = 'PROJECT' @@ -281,9 +281,9 @@ def test_list_jobs_defaults(self): SOURCE_URI = 'gs://test_bucket/src_object*' DESTINATION_URI = 'gs://test_bucket/dst_object*' JOB_TYPES = { - 'load_job': LoadTableFromStorageJob, + 'load_job': LoadJob, 'copy_job': CopyJob, - 'extract_job': ExtractTableToStorageJob, + 'extract_job': ExtractJob, 'query_job': QueryJob, } PATH = 'projects/%s/jobs' % PROJECT @@ -400,13 +400,13 @@ def test_list_jobs_defaults(self): def test_list_jobs_load_job_wo_sourceUris(self): import six - from google.cloud.bigquery.job import LoadTableFromStorageJob + from google.cloud.bigquery.job import LoadJob PROJECT = 'PROJECT' DATASET = 'test_dataset' SOURCE_TABLE = 'source_table' JOB_TYPES = { - 'load_job': LoadTableFromStorageJob, + 'load_job': LoadJob, } PATH = 'projects/%s/jobs' % PROJECT TOKEN = 'TOKEN' @@ -487,7 +487,7 @@ def test_list_jobs_explicit_missing(self): 'stateFilter': 'done'}) def test_load_table_from_storage(self): - from google.cloud.bigquery.job import LoadTableFromStorageJob + from google.cloud.bigquery.job import LoadJob PROJECT = 'PROJECT' JOB = 'job_name' @@ -500,7 +500,7 @@ def test_load_table_from_storage(self): dataset = client.dataset(DATASET) destination = dataset.table(DESTINATION) job = client.load_table_from_storage(JOB, destination, SOURCE_URI) - self.assertIsInstance(job, LoadTableFromStorageJob) + self.assertIsInstance(job, LoadJob) self.assertIs(job._client, client) self.assertEqual(job.name, JOB) self.assertEqual(list(job.source_uris), [SOURCE_URI]) @@ -528,7 +528,7 @@ def test_copy_table(self): self.assertIs(job.destination, destination) def test_extract_table_to_storage(self): - from google.cloud.bigquery.job import ExtractTableToStorageJob + from google.cloud.bigquery.job import ExtractJob PROJECT = 'PROJECT' JOB = 'job_name' @@ -541,7 +541,7 @@ def test_extract_table_to_storage(self): dataset = client.dataset(DATASET) source = dataset.table(SOURCE) job = client.extract_table_to_storage(JOB, source, DESTINATION) - self.assertIsInstance(job, ExtractTableToStorageJob) + self.assertIsInstance(job, ExtractJob) self.assertIs(job._client, client) self.assertEqual(job.name, JOB) self.assertEqual(job.source, source) diff --git a/packages/google-cloud-bigquery/tests/unit/test_job.py b/packages/google-cloud-bigquery/tests/unit/test_job.py index ab08701d352a..c9928732203e 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_job.py +++ b/packages/google-cloud-bigquery/tests/unit/test_job.py @@ -143,24 +143,24 @@ def _verifyReadonlyResourceProperties(self, job, resource): self.assertIsNone(job.user_email) -class TestLoadTableFromStorageJob(unittest.TestCase, _Base): +class TestLoadJob(unittest.TestCase, _Base): JOB_TYPE = 'load' @staticmethod def _get_target_class(): - from google.cloud.bigquery.job import LoadTableFromStorageJob + from google.cloud.bigquery.job import LoadJob - return LoadTableFromStorageJob + return LoadJob def _setUpConstants(self): - super(TestLoadTableFromStorageJob, self)._setUpConstants() + super(TestLoadJob, self)._setUpConstants() self.INPUT_FILES = 2 self.INPUT_BYTES = 12345 self.OUTPUT_BYTES = 23456 self.OUTPUT_ROWS = 345 def _makeResource(self, started=False, ended=False): - resource = super(TestLoadTableFromStorageJob, self)._makeResource( + resource = super(TestLoadJob, self)._makeResource( started, ended) config = resource['configuration']['load'] config['sourceUris'] = [self.SOURCE1] @@ -1142,19 +1142,19 @@ def test_reload_w_alternate_client(self): self._verifyResourceProperties(job, RESOURCE) -class TestExtractTableToStorageJob(unittest.TestCase, _Base): +class TestExtractJob(unittest.TestCase, _Base): JOB_TYPE = 'extract' SOURCE_TABLE = 'source_table' DESTINATION_URI = 'gs://bucket_name/object_name' @staticmethod def _get_target_class(): - from google.cloud.bigquery.job import ExtractTableToStorageJob + from google.cloud.bigquery.job import ExtractJob - return ExtractTableToStorageJob + return ExtractJob def _makeResource(self, started=False, ended=False): - resource = super(TestExtractTableToStorageJob, self)._makeResource( + resource = super(TestExtractJob, self)._makeResource( started, ended) config = resource['configuration']['extract'] config['sourceTable'] = { @@ -2181,15 +2181,15 @@ def __init__(self, name=None): def name(self): if self._name is not None: return self._name - return TestLoadTableFromStorageJob.TABLE_NAME + return TestLoadJob.TABLE_NAME @property def project(self): - return TestLoadTableFromStorageJob.PROJECT + return TestLoadJob.PROJECT @property def dataset_name(self): - return TestLoadTableFromStorageJob.DS_NAME + return TestLoadJob.DS_NAME class _Connection(object): From 904e1caafc21b7f7c749858e229ea7198a223296 Mon Sep 17 00:00:00 2001 From: Tres Seaver Date: Fri, 18 Aug 2017 13:02:34 -0400 Subject: [PATCH 0255/2016] Rename class: 'dataset.AccessGrant' -> 'dataset.AccessEntry'. (#3798) * Rename class: 'dataset.AccessGrant' -> 'dataset.AccessEntry'. * PEP8 names for unit test helpers. * Rename 'Dataset.access_grants' -> 'Dataaset.access_entries'. --- .../google/cloud/bigquery/__init__.py | 12 +- .../google/cloud/bigquery/dataset.py | 80 ++++----- .../tests/unit/test_dataset.py | 165 +++++++++--------- 3 files changed, 132 insertions(+), 125 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/__init__.py b/packages/google-cloud-bigquery/google/cloud/bigquery/__init__.py index e2eb29e866a3..00fa4445b0d0 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/__init__.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/__init__.py @@ -30,13 +30,19 @@ from google.cloud.bigquery._helpers import ScalarQueryParameter from google.cloud.bigquery._helpers import StructQueryParameter from google.cloud.bigquery.client import Client -from google.cloud.bigquery.dataset import AccessGrant +from google.cloud.bigquery.dataset import AccessEntry from google.cloud.bigquery.dataset import Dataset from google.cloud.bigquery.schema import SchemaField from google.cloud.bigquery.table import Table __all__ = [ - '__version__', 'AccessGrant', 'ArrayQueryParameter', 'Client', - 'Dataset', 'ScalarQueryParameter', 'SchemaField', 'StructQueryParameter', + '__version__', + 'AccessEntry', + 'ArrayQueryParameter', + 'Client', + 'Dataset', + 'ScalarQueryParameter', + 'SchemaField', + 'StructQueryParameter', 'Table', ] diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/dataset.py b/packages/google-cloud-bigquery/google/cloud/bigquery/dataset.py index 645a68deada4..a688cb3b560b 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/dataset.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/dataset.py @@ -21,7 +21,7 @@ from google.cloud.bigquery.table import Table -class AccessGrant(object): +class AccessEntry(object): """Represent grant of an access role to an entity. Every entry in the access list will have exactly one of @@ -76,7 +76,7 @@ def __init__(self, role, entity_type, entity_id): self.entity_id = entity_id def __eq__(self, other): - if not isinstance(other, AccessGrant): + if not isinstance(other, AccessEntry): return NotImplemented return ( self.role == other.role and @@ -87,7 +87,7 @@ def __ne__(self, other): return not self == other def __repr__(self): - return '' % ( + return '' % ( self.role, self.entity_type, self.entity_id) @@ -104,22 +104,22 @@ class Dataset(object): :param client: A client which holds credentials and project configuration for the dataset (which requires a project). - :type access_grants: list of :class:`AccessGrant` - :param access_grants: roles granted to entities for this dataset + :type access_entries: list of :class:`AccessEntry` + :param access_entries: roles granted to entities for this dataset :type project: str :param project: (Optional) project ID for the dataset (defaults to the project of the client). """ - _access_grants = None + _access_entries = None - def __init__(self, name, client, access_grants=(), project=None): + def __init__(self, name, client, access_entries=(), project=None): self.name = name self._client = client self._properties = {} # Let the @property do validation. - self.access_grants = access_grants + self.access_entries = access_entries self._project = project or client.project @property @@ -141,27 +141,27 @@ def path(self): return '/projects/%s/datasets/%s' % (self.project, self.name) @property - def access_grants(self): - """Dataset's access grants. + def access_entries(self): + """Dataset's access entries. - :rtype: list of :class:`AccessGrant` + :rtype: list of :class:`AccessEntry` :returns: roles granted to entities for this dataset """ - return list(self._access_grants) + return list(self._access_entries) - @access_grants.setter - def access_grants(self, value): - """Update dataset's access grants + @access_entries.setter + def access_entries(self, value): + """Update dataset's access entries - :type value: list of :class:`AccessGrant` + :type value: list of :class:`AccessEntry` :param value: roles granted to entities for this dataset :raises: TypeError if 'value' is not a sequence, or ValueError if - any item in the sequence is not an AccessGrant + any item in the sequence is not an AccessEntry """ - if not all(isinstance(field, AccessGrant) for field in value): - raise ValueError('Values must be AccessGrant instances') - self._access_grants = tuple(value) + if not all(isinstance(field, AccessEntry) for field in value): + raise ValueError('Values must be AccessEntry instances') + self._access_entries = tuple(value) @property def created(self): @@ -341,29 +341,29 @@ def _require_client(self, client): return client @staticmethod - def _parse_access_grants(access): - """Parse a resource fragment into a set of access grants. + def _parse_access_entries(access): + """Parse a resource fragment into a set of access entries. ``role`` augments the entity type and present **unless** the entity type is ``view``. :type access: list of mappings - :param access: each mapping represents a single access grant. + :param access: each mapping represents a single access entry. - :rtype: list of :class:`AccessGrant` - :returns: a list of parsed grants. - :raises: :class:`ValueError` if a grant in ``access`` has more keys + :rtype: list of :class:`AccessEntry` + :returns: a list of parsed entries. + :raises: :class:`ValueError` if a entry in ``access`` has more keys than ``role`` and one additional key. """ result = [] - for grant in access: - grant = grant.copy() - role = grant.pop('role', None) - entity_type, entity_id = grant.popitem() - if len(grant) != 0: - raise ValueError('Grant has unexpected keys remaining.', grant) + for entry in access: + entry = entry.copy() + role = entry.pop('role', None) + entity_type, entity_id = entry.popitem() + if len(entry) != 0: + raise ValueError('Entry has unexpected keys remaining.', entry) result.append( - AccessGrant(role, entity_type, entity_id)) + AccessEntry(role, entity_type, entity_id)) return result def _set_properties(self, api_response): @@ -375,7 +375,7 @@ def _set_properties(self, api_response): self._properties.clear() cleaned = api_response.copy() access = cleaned.pop('access', ()) - self.access_grants = self._parse_access_grants(access) + self.access_entries = self._parse_access_entries(access) if 'creationTime' in cleaned: cleaned['creationTime'] = float(cleaned['creationTime']) if 'lastModifiedTime' in cleaned: @@ -386,12 +386,12 @@ def _set_properties(self, api_response): self._properties.update(cleaned) def _build_access_resource(self): - """Generate a resource fragment for dataset's access grants.""" + """Generate a resource fragment for dataset's access entries.""" result = [] - for grant in self.access_grants: - info = {grant.entity_type: grant.entity_id} - if grant.role is not None: - info['role'] = grant.role + for entry in self.access_entries: + info = {entry.entity_type: entry.entity_id} + if entry.role is not None: + info['role'] = entry.role result.append(info) return result @@ -414,7 +414,7 @@ def _build_resource(self): if self.location is not None: resource['location'] = self.location - if len(self.access_grants) > 0: + if len(self.access_entries) > 0: resource['access'] = self._build_access_resource() return resource diff --git a/packages/google-cloud-bigquery/tests/unit/test_dataset.py b/packages/google-cloud-bigquery/tests/unit/test_dataset.py index 164f9ed0a2b4..10b832d7abe4 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_dataset.py +++ b/packages/google-cloud-bigquery/tests/unit/test_dataset.py @@ -17,22 +17,22 @@ import mock -class TestAccessGrant(unittest.TestCase): +class TestAccessEntry(unittest.TestCase): @staticmethod def _get_target_class(): - from google.cloud.bigquery.dataset import AccessGrant + from google.cloud.bigquery.dataset import AccessEntry - return AccessGrant + return AccessEntry def _make_one(self, *args, **kw): return self._get_target_class()(*args, **kw) def test_ctor_defaults(self): - grant = self._make_one('OWNER', 'userByEmail', 'phred@example.com') - self.assertEqual(grant.role, 'OWNER') - self.assertEqual(grant.entity_type, 'userByEmail') - self.assertEqual(grant.entity_id, 'phred@example.com') + entry = self._make_one('OWNER', 'userByEmail', 'phred@example.com') + self.assertEqual(entry.role, 'OWNER') + self.assertEqual(entry.entity_type, 'userByEmail') + self.assertEqual(entry.entity_id, 'phred@example.com') def test_ctor_bad_entity_type(self): with self.assertRaises(ValueError): @@ -48,10 +48,10 @@ def test_ctor_view_success(self): role = None entity_type = 'view' entity_id = object() - grant = self._make_one(role, entity_type, entity_id) - self.assertEqual(grant.role, role) - self.assertEqual(grant.entity_type, entity_type) - self.assertEqual(grant.entity_id, entity_id) + entry = self._make_one(role, entity_type, entity_id) + self.assertEqual(entry.role, role) + self.assertEqual(entry.entity_type, entity_type) + self.assertEqual(entry.entity_id, entity_id) def test_ctor_nonview_without_role(self): role = None @@ -60,29 +60,29 @@ def test_ctor_nonview_without_role(self): self._make_one(role, entity_type, None) def test___eq___role_mismatch(self): - grant = self._make_one('OWNER', 'userByEmail', 'phred@example.com') + entry = self._make_one('OWNER', 'userByEmail', 'phred@example.com') other = self._make_one('WRITER', 'userByEmail', 'phred@example.com') - self.assertNotEqual(grant, other) + self.assertNotEqual(entry, other) def test___eq___entity_type_mismatch(self): - grant = self._make_one('OWNER', 'userByEmail', 'phred@example.com') + entry = self._make_one('OWNER', 'userByEmail', 'phred@example.com') other = self._make_one('OWNER', 'groupByEmail', 'phred@example.com') - self.assertNotEqual(grant, other) + self.assertNotEqual(entry, other) def test___eq___entity_id_mismatch(self): - grant = self._make_one('OWNER', 'userByEmail', 'phred@example.com') + entry = self._make_one('OWNER', 'userByEmail', 'phred@example.com') other = self._make_one('OWNER', 'userByEmail', 'bharney@example.com') - self.assertNotEqual(grant, other) + self.assertNotEqual(entry, other) def test___eq___hit(self): - grant = self._make_one('OWNER', 'userByEmail', 'phred@example.com') + entry = self._make_one('OWNER', 'userByEmail', 'phred@example.com') other = self._make_one('OWNER', 'userByEmail', 'phred@example.com') - self.assertEqual(grant, other) + self.assertEqual(entry, other) def test__eq___type_mismatch(self): - grant = self._make_one('OWNER', 'userByEmail', 'silly@example.com') - self.assertNotEqual(grant, object()) - self.assertEqual(grant, mock.ANY) + entry = self._make_one('OWNER', 'userByEmail', 'silly@example.com') + self.assertNotEqual(entry, object()) + self.assertEqual(entry, mock.ANY) class TestDataset(unittest.TestCase): @@ -129,22 +129,23 @@ def _makeResource(self): {'role': 'READER', 'specialGroup': 'projectReaders'}], } - def _verifyAccessGrants(self, access_grants, resource): - r_grants = [] - for r_grant in resource['access']: - role = r_grant.pop('role') - for entity_type, entity_id in sorted(r_grant.items()): - r_grants.append({'role': role, - 'entity_type': entity_type, - 'entity_id': entity_id}) + def _verify_access_entry(self, access_entries, resource): + r_entries = [] + for r_entry in resource['access']: + role = r_entry.pop('role') + for entity_type, entity_id in sorted(r_entry.items()): + r_entries.append({ + 'role': role, + 'entity_type': entity_type, + 'entity_id': entity_id}) - self.assertEqual(len(access_grants), len(r_grants)) - for a_grant, r_grant in zip(access_grants, r_grants): - self.assertEqual(a_grant.role, r_grant['role']) - self.assertEqual(a_grant.entity_type, r_grant['entity_type']) - self.assertEqual(a_grant.entity_id, r_grant['entity_id']) + self.assertEqual(len(access_entries), len(r_entries)) + for a_entry, r_entry in zip(access_entries, r_entries): + self.assertEqual(a_entry.role, r_entry['role']) + self.assertEqual(a_entry.entity_type, r_entry['entity_type']) + self.assertEqual(a_entry.entity_id, r_entry['entity_id']) - def _verifyReadonlyResourceProperties(self, dataset, resource): + def _verify_readonly_resource_properties(self, dataset, resource): self.assertEqual(dataset.dataset_id, self.DS_ID) @@ -165,9 +166,9 @@ def _verifyReadonlyResourceProperties(self, dataset, resource): else: self.assertIsNone(dataset.self_link) - def _verifyResourceProperties(self, dataset, resource): + def _verify_resource_properties(self, dataset, resource): - self._verifyReadonlyResourceProperties(dataset, resource) + self._verify_readonly_resource_properties(dataset, resource) if 'defaultTableExpirationMs' in resource: self.assertEqual(dataset.default_table_expiration_ms, @@ -179,9 +180,9 @@ def _verifyResourceProperties(self, dataset, resource): self.assertEqual(dataset.location, resource.get('location')) if 'access' in resource: - self._verifyAccessGrants(dataset.access_grants, resource) + self._verify_access_entry(dataset.access_entries, resource) else: - self.assertEqual(dataset.access_grants, []) + self.assertEqual(dataset.access_entries, []) def test_ctor_defaults(self): client = _Client(self.PROJECT) @@ -192,7 +193,7 @@ def test_ctor_defaults(self): self.assertEqual( dataset.path, '/projects/%s/datasets/%s' % (self.PROJECT, self.DS_NAME)) - self.assertEqual(dataset.access_grants, []) + self.assertEqual(dataset.access_entries, []) self.assertIsNone(dataset.created) self.assertIsNone(dataset.dataset_id) @@ -206,15 +207,15 @@ def test_ctor_defaults(self): self.assertIsNone(dataset.location) def test_ctor_explicit(self): - from google.cloud.bigquery.dataset import AccessGrant + from google.cloud.bigquery.dataset import AccessEntry - phred = AccessGrant('OWNER', 'userByEmail', 'phred@example.com') - bharney = AccessGrant('OWNER', 'userByEmail', 'bharney@example.com') - grants = [phred, bharney] + phred = AccessEntry('OWNER', 'userByEmail', 'phred@example.com') + bharney = AccessEntry('OWNER', 'userByEmail', 'bharney@example.com') + entries = [phred, bharney] OTHER_PROJECT = 'foo-bar-123' client = _Client(self.PROJECT) dataset = self._make_one(self.DS_NAME, client, - access_grants=grants, + access_entries=entries, project=OTHER_PROJECT) self.assertEqual(dataset.name, self.DS_NAME) self.assertIs(dataset._client, client) @@ -222,7 +223,7 @@ def test_ctor_explicit(self): self.assertEqual( dataset.path, '/projects/%s/datasets/%s' % (OTHER_PROJECT, self.DS_NAME)) - self.assertEqual(dataset.access_grants, grants) + self.assertEqual(dataset.access_entries, entries) self.assertIsNone(dataset.created) self.assertIsNone(dataset.dataset_id) @@ -235,30 +236,30 @@ def test_ctor_explicit(self): self.assertIsNone(dataset.friendly_name) self.assertIsNone(dataset.location) - def test_access_grants_setter_non_list(self): + def test_access_entries_setter_non_list(self): client = _Client(self.PROJECT) dataset = self._make_one(self.DS_NAME, client) with self.assertRaises(TypeError): - dataset.access_grants = object() + dataset.access_entries = object() - def test_access_grants_setter_invalid_field(self): - from google.cloud.bigquery.dataset import AccessGrant + def test_access_entries_setter_invalid_field(self): + from google.cloud.bigquery.dataset import AccessEntry client = _Client(self.PROJECT) dataset = self._make_one(self.DS_NAME, client) - phred = AccessGrant('OWNER', 'userByEmail', 'phred@example.com') + phred = AccessEntry('OWNER', 'userByEmail', 'phred@example.com') with self.assertRaises(ValueError): - dataset.access_grants = [phred, object()] + dataset.access_entries = [phred, object()] - def test_access_grants_setter(self): - from google.cloud.bigquery.dataset import AccessGrant + def test_access_entries_setter(self): + from google.cloud.bigquery.dataset import AccessEntry client = _Client(self.PROJECT) dataset = self._make_one(self.DS_NAME, client) - phred = AccessGrant('OWNER', 'userByEmail', 'phred@example.com') - bharney = AccessGrant('OWNER', 'userByEmail', 'bharney@example.com') - dataset.access_grants = [phred, bharney] - self.assertEqual(dataset.access_grants, [phred, bharney]) + phred = AccessEntry('OWNER', 'userByEmail', 'phred@example.com') + bharney = AccessEntry('OWNER', 'userByEmail', 'bharney@example.com') + dataset.access_entries = [phred, bharney] + self.assertEqual(dataset.access_entries, [phred, bharney]) def test_default_table_expiration_ms_setter_bad_value(self): client = _Client(self.PROJECT) @@ -329,7 +330,7 @@ def test_from_api_repr_bare(self): klass = self._get_target_class() dataset = klass.from_api_repr(RESOURCE, client=client) self.assertIs(dataset._client, client) - self._verifyResourceProperties(dataset, RESOURCE) + self._verify_resource_properties(dataset, RESOURCE) def test_from_api_repr_w_properties(self): client = _Client(self.PROJECT) @@ -337,18 +338,18 @@ def test_from_api_repr_w_properties(self): klass = self._get_target_class() dataset = klass.from_api_repr(RESOURCE, client=client) self.assertIs(dataset._client, client) - self._verifyResourceProperties(dataset, RESOURCE) + self._verify_resource_properties(dataset, RESOURCE) - def test__parse_access_grants_w_unknown_entity_type(self): + def test__parse_access_entries_w_unknown_entity_type(self): ACCESS = [ {'role': 'READER', 'unknown': 'UNKNOWN'}, ] client = _Client(self.PROJECT) dataset = self._make_one(self.DS_NAME, client=client) with self.assertRaises(ValueError): - dataset._parse_access_grants(ACCESS) + dataset._parse_access_entries(ACCESS) - def test__parse_access_grants_w_extra_keys(self): + def test__parse_access_entries_w_extra_keys(self): USER_EMAIL = 'phred@example.com' ACCESS = [ { @@ -360,7 +361,7 @@ def test__parse_access_grants_w_extra_keys(self): client = _Client(self.PROJECT) dataset = self._make_one(self.DS_NAME, client=client) with self.assertRaises(ValueError): - dataset._parse_access_grants(ACCESS) + dataset._parse_access_entries(ACCESS) def test_create_w_bound_client(self): PATH = 'projects/%s/datasets' % self.PROJECT @@ -380,10 +381,10 @@ def test_create_w_bound_client(self): {'projectId': self.PROJECT, 'datasetId': self.DS_NAME}, } self.assertEqual(req['data'], SENT) - self._verifyResourceProperties(dataset, RESOURCE) + self._verify_resource_properties(dataset, RESOURCE) def test_create_w_alternate_client(self): - from google.cloud.bigquery.dataset import AccessGrant + from google.cloud.bigquery.dataset import AccessEntry PATH = 'projects/%s/datasets' % self.PROJECT USER_EMAIL = 'phred@example.com' @@ -405,13 +406,13 @@ def test_create_w_alternate_client(self): 'datasetId': 'starry-skies', 'tableId': 'northern-hemisphere', } - dataset.access_grants = [ - AccessGrant('OWNER', 'userByEmail', USER_EMAIL), - AccessGrant('OWNER', 'groupByEmail', GROUP_EMAIL), - AccessGrant('READER', 'domain', 'foo.com'), - AccessGrant('READER', 'specialGroup', 'projectReaders'), - AccessGrant('WRITER', 'specialGroup', 'projectWriters'), - AccessGrant(None, 'view', VIEW), + dataset.access_entries = [ + AccessEntry('OWNER', 'userByEmail', USER_EMAIL), + AccessEntry('OWNER', 'groupByEmail', GROUP_EMAIL), + AccessEntry('READER', 'domain', 'foo.com'), + AccessEntry('READER', 'specialGroup', 'projectReaders'), + AccessEntry('WRITER', 'specialGroup', 'projectWriters'), + AccessEntry(None, 'view', VIEW), ] dataset.create(client=CLIENT2) @@ -438,7 +439,7 @@ def test_create_w_alternate_client(self): ], } self.assertEqual(req['data'], SENT) - self._verifyResourceProperties(dataset, RESOURCE) + self._verify_resource_properties(dataset, RESOURCE) def test_create_w_missing_output_properties(self): # In the wild, the resource returned from 'dataset.create' sometimes @@ -463,7 +464,7 @@ def test_create_w_missing_output_properties(self): {'projectId': self.PROJECT, 'datasetId': self.DS_NAME}, } self.assertEqual(req['data'], SENT) - self._verifyResourceProperties(dataset, RESOURCE) + self._verify_resource_properties(dataset, RESOURCE) def test_exists_miss_w_bound_client(self): PATH = 'projects/%s/datasets/%s' % (self.PROJECT, self.DS_NAME) @@ -509,7 +510,7 @@ def test_reload_w_bound_client(self): req = conn._requested[0] self.assertEqual(req['method'], 'GET') self.assertEqual(req['path'], '/%s' % PATH) - self._verifyResourceProperties(dataset, RESOURCE) + self._verify_resource_properties(dataset, RESOURCE) def test_reload_w_alternate_client(self): PATH = 'projects/%s/datasets/%s' % (self.PROJECT, self.DS_NAME) @@ -527,7 +528,7 @@ def test_reload_w_alternate_client(self): req = conn2._requested[0] self.assertEqual(req['method'], 'GET') self.assertEqual(req['path'], '/%s' % PATH) - self._verifyResourceProperties(dataset, RESOURCE) + self._verify_resource_properties(dataset, RESOURCE) def test_patch_w_invalid_expiration(self): RESOURCE = self._makeResource() @@ -560,7 +561,7 @@ def test_patch_w_bound_client(self): } self.assertEqual(req['data'], SENT) self.assertEqual(req['path'], '/%s' % PATH) - self._verifyResourceProperties(dataset, RESOURCE) + self._verify_resource_properties(dataset, RESOURCE) def test_patch_w_alternate_client(self): PATH = 'projects/%s/datasets/%s' % (self.PROJECT, self.DS_NAME) @@ -589,7 +590,7 @@ def test_patch_w_alternate_client(self): 'location': LOCATION, } self.assertEqual(req['data'], SENT) - self._verifyResourceProperties(dataset, RESOURCE) + self._verify_resource_properties(dataset, RESOURCE) def test_update_w_bound_client(self): PATH = 'projects/%s/datasets/%s' % (self.PROJECT, self.DS_NAME) @@ -617,7 +618,7 @@ def test_update_w_bound_client(self): } self.assertEqual(req['data'], SENT) self.assertEqual(req['path'], '/%s' % PATH) - self._verifyResourceProperties(dataset, RESOURCE) + self._verify_resource_properties(dataset, RESOURCE) def test_update_w_alternate_client(self): PATH = 'projects/%s/datasets/%s' % (self.PROJECT, self.DS_NAME) @@ -648,7 +649,7 @@ def test_update_w_alternate_client(self): 'location': 'EU', } self.assertEqual(req['data'], SENT) - self._verifyResourceProperties(dataset, RESOURCE) + self._verify_resource_properties(dataset, RESOURCE) def test_delete_w_bound_client(self): PATH = 'projects/%s/datasets/%s' % (self.PROJECT, self.DS_NAME) From a65fe03ecf59d9856fa65270fa3c969f2a63dc84 Mon Sep 17 00:00:00 2001 From: Tres Seaver Date: Thu, 7 Sep 2017 15:05:03 -0400 Subject: [PATCH 0256/2016] Add 'QueryJob.query_plan' property. (#3799) --- .../google/cloud/bigquery/job.py | 165 ++++++++++++ .../tests/unit/test_job.py | 241 ++++++++++++++++++ 2 files changed, 406 insertions(+) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/job.py b/packages/google-cloud-bigquery/google/cloud/bigquery/job.py index 5d37bef36160..75182a22e909 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/job.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/job.py @@ -266,6 +266,11 @@ def ended(self): if millis is not None: return _datetime_from_microseconds(millis * 1000.0) + def _job_statistics(self): + """Helper for job-type specific statistics-based properties.""" + statistics = self._properties.get('statistics', {}) + return statistics.get(self._JOB_TYPE, {}) + @property def error_result(self): """Error information about the job as a whole. @@ -1281,6 +1286,20 @@ def from_api_repr(cls, resource, client): job._set_properties(resource) return job + @property + def query_plan(self): + """Return query plan from job statistics, if present. + + See: + https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#statistics.query.queryPlan + + :rtype: list of :class:`QueryPlanEntry` + :returns: mappings describing the query plan, or an empty list + if the query has not yet completed. + """ + plan_entries = self._job_statistics().get('queryPlan', ()) + return [QueryPlanEntry.from_api_repr(entry) for entry in plan_entries] + def query_results(self): """Construct a QueryResults instance, bound to this job. @@ -1333,3 +1352,149 @@ def result(self, timeout=None): super(QueryJob, self).result(timeout=timeout) # Return an iterator instead of returning the job. return self.query_results().fetch_data() + + +class QueryPlanEntryStep(object): + """Map a single step in a query plan entry. + + :type kind: str + :param kind: step type + + :type substeps: + :param substeps: names of substeps + """ + def __init__(self, kind, substeps): + self.kind = kind + self.substeps = list(substeps) + + @classmethod + def from_api_repr(cls, resource): + """Factory: construct instance from the JSON repr. + + :type resource: dict + :param resource: JSON representation of the entry + + :rtype: :class:`QueryPlanEntryStep` + :return: new instance built from the resource + """ + return cls( + kind=resource.get('kind'), + substeps=resource.get('substeps', ()), + ) + + def __eq__(self, other): + if not isinstance(other, self.__class__): + return NotImplemented + return self.kind == other.kind and self.substeps == other.substeps + + +class QueryPlanEntry(object): + """Map a single entry in a query plan. + + :type name: str + :param name: name of the entry + + :type entry_id: int + :param entry_id: ID of the entry + + :type wait_ratio_avg: float + :param wait_ratio_avg: average wait ratio + + :type wait_ratio_max: float + :param wait_ratio_avg: maximum wait ratio + + :type read_ratio_avg: float + :param read_ratio_avg: average read ratio + + :type read_ratio_max: float + :param read_ratio_avg: maximum read ratio + + :type copute_ratio_avg: float + :param copute_ratio_avg: average copute ratio + + :type copute_ratio_max: float + :param copute_ratio_avg: maximum copute ratio + + :type write_ratio_avg: float + :param write_ratio_avg: average write ratio + + :type write_ratio_max: float + :param write_ratio_avg: maximum write ratio + + :type records_read: int + :param records_read: number of records read + + :type records_written: int + :param records_written: number of records written + + :type status: str + :param status: entry status + + :type steps: List(QueryPlanEntryStep) + :param steps: steps in the entry + """ + def __init__(self, + name, + entry_id, + wait_ratio_avg, + wait_ratio_max, + read_ratio_avg, + read_ratio_max, + compute_ratio_avg, + compute_ratio_max, + write_ratio_avg, + write_ratio_max, + records_read, + records_written, + status, + steps): + self.name = name + self.entry_id = entry_id + self.wait_ratio_avg = wait_ratio_avg + self.wait_ratio_max = wait_ratio_max + self.read_ratio_avg = read_ratio_avg + self.read_ratio_max = read_ratio_max + self.compute_ratio_avg = compute_ratio_avg + self.compute_ratio_max = compute_ratio_max + self.write_ratio_avg = write_ratio_avg + self.write_ratio_max = write_ratio_max + self.records_read = records_read + self.records_written = records_written + self.status = status + self.steps = steps + + @classmethod + def from_api_repr(cls, resource): + """Factory: construct instance from the JSON repr. + + :type resource: dict + :param resource: JSON representation of the entry + + :rtype: :class:`QueryPlanEntry` + :return: new instance built from the resource + """ + records_read = resource.get('recordsRead') + if records_read is not None: + records_read = int(records_read) + + records_written = resource.get('recordsWritten') + if records_written is not None: + records_written = int(records_written) + + return cls( + name=resource.get('name'), + entry_id=resource.get('id'), + wait_ratio_avg=resource.get('waitRatioAvg'), + wait_ratio_max=resource.get('waitRatioMax'), + read_ratio_avg=resource.get('readRatioAvg'), + read_ratio_max=resource.get('readRatioMax'), + compute_ratio_avg=resource.get('computeRatioAvg'), + compute_ratio_max=resource.get('computeRatioMax'), + write_ratio_avg=resource.get('writeRatioAvg'), + write_ratio_max=resource.get('writeRatioMax'), + records_read=records_read, + records_written=records_written, + status=resource.get('status'), + steps=[QueryPlanEntryStep.from_api_repr(step) + for step in resource.get('steps', ())], + ) diff --git a/packages/google-cloud-bigquery/tests/unit/test_job.py b/packages/google-cloud-bigquery/tests/unit/test_job.py index c9928732203e..2c8da8ddc89a 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_job.py +++ b/packages/google-cloud-bigquery/tests/unit/test_job.py @@ -1682,6 +1682,68 @@ def test_done(self): job = self._get_target_class().from_api_repr(resource, client) self.assertTrue(job.done()) + def test_query_plan(self): + from google.cloud.bigquery.job import QueryPlanEntry + from google.cloud.bigquery.job import QueryPlanEntryStep + + plan_entries = [{ + 'name': 'NAME', + 'id': 1234, + 'waitRatioAvg': 2.71828, + 'waitRatioMax': 3.14159, + 'readRatioAvg': 1.41421, + 'readRatioMax': 1.73205, + 'computeRatioAvg': 0.69315, + 'computeRatioMax': 1.09861, + 'writeRatioAvg': 3.32193, + 'writeRatioMax': 2.30258, + 'recordsRead': '100', + 'recordsWritten': '1', + 'status': 'STATUS', + 'steps': [{ + 'kind': 'KIND', + 'substeps': ['SUBSTEP1', 'SUBSTEP2'], + }], + }] + client = _Client(self.PROJECT) + job = self._make_one(self.JOB_NAME, self.QUERY, client) + self.assertEqual(job.query_plan, []) + + statistics = job._properties['statistics'] = {} + self.assertEqual(job.query_plan, []) + + query_stats = statistics['query'] = {} + self.assertEqual(job.query_plan, []) + + query_stats['queryPlan'] = plan_entries + + self.assertEqual(len(job.query_plan), len(plan_entries)) + for found, expected in zip(job.query_plan, plan_entries): + self.assertIsInstance(found, QueryPlanEntry) + self.assertEqual(found.name, expected['name']) + self.assertEqual(found.entry_id, expected['id']) + self.assertEqual(found.wait_ratio_avg, expected['waitRatioAvg']) + self.assertEqual(found.wait_ratio_max, expected['waitRatioMax']) + self.assertEqual(found.read_ratio_avg, expected['readRatioAvg']) + self.assertEqual(found.read_ratio_max, expected['readRatioMax']) + self.assertEqual( + found.compute_ratio_avg, expected['computeRatioAvg']) + self.assertEqual( + found.compute_ratio_max, expected['computeRatioMax']) + self.assertEqual(found.write_ratio_avg, expected['writeRatioAvg']) + self.assertEqual(found.write_ratio_max, expected['writeRatioMax']) + self.assertEqual( + found.records_read, int(expected['recordsRead'])) + self.assertEqual( + found.records_written, int(expected['recordsWritten'])) + self.assertEqual(found.status, expected['status']) + + self.assertEqual(len(found.steps), len(expected['steps'])) + for f_step, e_step in zip(found.steps, expected['steps']): + self.assertIsInstance(f_step, QueryPlanEntryStep) + self.assertEqual(f_step.kind, e_step['kind']) + self.assertEqual(f_step.substeps, e_step['substeps']) + def test_query_results(self): from google.cloud.bigquery.query import QueryResults @@ -2154,6 +2216,185 @@ def test_reload_w_alternate_client(self): self._verifyResourceProperties(job, RESOURCE) +class TestQueryPlanEntryStep(unittest.TestCase, _Base): + KIND = 'KIND' + SUBSTEPS = ('SUB1', 'SUB2') + + @staticmethod + def _get_target_class(): + from google.cloud.bigquery.job import QueryPlanEntryStep + + return QueryPlanEntryStep + + def _make_one(self, *args, **kw): + return self._get_target_class()(*args, **kw) + + def test_ctor(self): + step = self._make_one(self.KIND, self.SUBSTEPS) + self.assertEqual(step.kind, self.KIND) + self.assertEqual(step.substeps, list(self.SUBSTEPS)) + + def test_from_api_repr_empty(self): + klass = self._get_target_class() + step = klass.from_api_repr({}) + self.assertIsNone(step.kind) + self.assertEqual(step.substeps, []) + + def test_from_api_repr_normal(self): + resource = { + 'kind': self.KIND, + 'substeps': self.SUBSTEPS, + } + klass = self._get_target_class() + step = klass.from_api_repr(resource) + self.assertEqual(step.kind, self.KIND) + self.assertEqual(step.substeps, list(self.SUBSTEPS)) + + def test___eq___mismatched_type(self): + step = self._make_one(self.KIND, self.SUBSTEPS) + self.assertNotEqual(step, object()) + + def test___eq___mismatch_kind(self): + step = self._make_one(self.KIND, self.SUBSTEPS) + other = self._make_one('OTHER', self.SUBSTEPS) + self.assertNotEqual(step, other) + + def test___eq___mismatch_substeps(self): + step = self._make_one(self.KIND, self.SUBSTEPS) + other = self._make_one(self.KIND, ()) + self.assertNotEqual(step, other) + + def test___eq___hit(self): + step = self._make_one(self.KIND, self.SUBSTEPS) + other = self._make_one(self.KIND, self.SUBSTEPS) + self.assertEqual(step, other) + + +class TestQueryPlanEntry(unittest.TestCase, _Base): + NAME = 'NAME' + ENTRY_ID = 1234 + WAIT_RATIO_AVG = 2.71828 + WAIT_RATIO_MAX = 3.14159 + READ_RATIO_AVG = 1.41421 + READ_RATIO_MAX = 1.73205 + COMPUTE_RATIO_AVG = 0.69315 + COMPUTE_RATIO_MAX = 1.09861 + WRITE_RATIO_AVG = 3.32193 + WRITE_RATIO_MAX = 2.30258 + RECORDS_READ = 100 + RECORDS_WRITTEN = 1 + STATUS = 'STATUS' + + @staticmethod + def _get_target_class(): + from google.cloud.bigquery.job import QueryPlanEntry + + return QueryPlanEntry + + def _make_one(self, *args, **kw): + return self._get_target_class()(*args, **kw) + + def test_ctor(self): + from google.cloud.bigquery.job import QueryPlanEntryStep + + steps = [QueryPlanEntryStep( + kind=TestQueryPlanEntryStep.KIND, + substeps=TestQueryPlanEntryStep.SUBSTEPS)] + entry = self._make_one( + name=self.NAME, + entry_id=self.ENTRY_ID, + wait_ratio_avg=self.WAIT_RATIO_AVG, + wait_ratio_max=self.WAIT_RATIO_MAX, + read_ratio_avg=self.READ_RATIO_AVG, + read_ratio_max=self.READ_RATIO_MAX, + compute_ratio_avg=self.COMPUTE_RATIO_AVG, + compute_ratio_max=self.COMPUTE_RATIO_MAX, + write_ratio_avg=self.WRITE_RATIO_AVG, + write_ratio_max=self.WRITE_RATIO_MAX, + records_read=self.RECORDS_READ, + records_written=self.RECORDS_WRITTEN, + status=self.STATUS, + steps=steps, + ) + self.assertEqual(entry.name, self.NAME) + self.assertEqual(entry.entry_id, self.ENTRY_ID) + self.assertEqual(entry.wait_ratio_avg, self.WAIT_RATIO_AVG) + self.assertEqual(entry.wait_ratio_max, self.WAIT_RATIO_MAX) + self.assertEqual(entry.read_ratio_avg, self.READ_RATIO_AVG) + self.assertEqual(entry.read_ratio_max, self.READ_RATIO_MAX) + self.assertEqual(entry.compute_ratio_avg, self.COMPUTE_RATIO_AVG) + self.assertEqual(entry.compute_ratio_max, self.COMPUTE_RATIO_MAX) + self.assertEqual(entry.write_ratio_avg, self.WRITE_RATIO_AVG) + self.assertEqual(entry.write_ratio_max, self.WRITE_RATIO_MAX) + self.assertEqual(entry.records_read, self.RECORDS_READ) + self.assertEqual(entry.records_written, self.RECORDS_WRITTEN) + self.assertEqual(entry.status, self.STATUS) + self.assertEqual(entry.steps, steps) + + def test_from_api_repr_empty(self): + klass = self._get_target_class() + + entry = klass.from_api_repr({}) + + self.assertIsNone(entry.name) + self.assertIsNone(entry.entry_id) + self.assertIsNone(entry.wait_ratio_avg) + self.assertIsNone(entry.wait_ratio_max) + self.assertIsNone(entry.read_ratio_avg) + self.assertIsNone(entry.read_ratio_max) + self.assertIsNone(entry.compute_ratio_avg) + self.assertIsNone(entry.compute_ratio_max) + self.assertIsNone(entry.write_ratio_avg) + self.assertIsNone(entry.write_ratio_max) + self.assertIsNone(entry.records_read) + self.assertIsNone(entry.records_written) + self.assertIsNone(entry.status) + self.assertEqual(entry.steps, []) + + def test_from_api_repr_normal(self): + from google.cloud.bigquery.job import QueryPlanEntryStep + + steps = [QueryPlanEntryStep( + kind=TestQueryPlanEntryStep.KIND, + substeps=TestQueryPlanEntryStep.SUBSTEPS)] + resource = { + 'name': self.NAME, + 'id': self.ENTRY_ID, + 'waitRatioAvg': self.WAIT_RATIO_AVG, + 'waitRatioMax': self.WAIT_RATIO_MAX, + 'readRatioAvg': self.READ_RATIO_AVG, + 'readRatioMax': self.READ_RATIO_MAX, + 'computeRatioAvg': self.COMPUTE_RATIO_AVG, + 'computeRatioMax': self.COMPUTE_RATIO_MAX, + 'writeRatioAvg': self.WRITE_RATIO_AVG, + 'writeRatioMax': self.WRITE_RATIO_MAX, + 'recordsRead': str(self.RECORDS_READ), + 'recordsWritten': str(self.RECORDS_WRITTEN), + 'status': self.STATUS, + 'steps': [{ + 'kind': TestQueryPlanEntryStep.KIND, + 'substeps': TestQueryPlanEntryStep.SUBSTEPS, + }] + } + klass = self._get_target_class() + + entry = klass.from_api_repr(resource) + self.assertEqual(entry.name, self.NAME) + self.assertEqual(entry.entry_id, self.ENTRY_ID) + self.assertEqual(entry.wait_ratio_avg, self.WAIT_RATIO_AVG) + self.assertEqual(entry.wait_ratio_max, self.WAIT_RATIO_MAX) + self.assertEqual(entry.read_ratio_avg, self.READ_RATIO_AVG) + self.assertEqual(entry.read_ratio_max, self.READ_RATIO_MAX) + self.assertEqual(entry.compute_ratio_avg, self.COMPUTE_RATIO_AVG) + self.assertEqual(entry.compute_ratio_max, self.COMPUTE_RATIO_MAX) + self.assertEqual(entry.write_ratio_avg, self.WRITE_RATIO_AVG) + self.assertEqual(entry.write_ratio_max, self.WRITE_RATIO_MAX) + self.assertEqual(entry.records_read, self.RECORDS_READ) + self.assertEqual(entry.records_written, self.RECORDS_WRITTEN) + self.assertEqual(entry.status, self.STATUS) + self.assertEqual(entry.steps, steps) + + class _Client(object): def __init__(self, project='project', connection=None): From 8df52e2596e52e2cf49c17448ee943ac266a4d15 Mon Sep 17 00:00:00 2001 From: Tres Seaver Date: Thu, 7 Sep 2017 17:45:54 -0400 Subject: [PATCH 0257/2016] Add new scalar statistics properties to 'QueryJob' (#3800) * Add 'QueryJob.total_bytes_processed' property. * Add 'QueryJob.total_bytes_billed' property. * Add 'QueryJob.billing_tier' property. * Add 'QueryJob.cache_hit' property. * Add 'QueryJob.num_dml_affected_rows' property. * Add 'QueryJob.statement_type' property. --- .../google/cloud/bigquery/job.py | 87 ++++++++++++++++++ .../tests/unit/test_job.py | 89 +++++++++++++++++++ 2 files changed, 176 insertions(+) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/job.py b/packages/google-cloud-bigquery/google/cloud/bigquery/job.py index 75182a22e909..32ee5e535bf9 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/job.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/job.py @@ -1300,6 +1300,93 @@ def query_plan(self): plan_entries = self._job_statistics().get('queryPlan', ()) return [QueryPlanEntry.from_api_repr(entry) for entry in plan_entries] + @property + def total_bytes_processed(self): + """Return total bytes processed from job statistics, if present. + + See: + https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#statistics.query.totalBytesProcessed + + :rtype: int or None + :returns: total bytes processed by the job, or None if job is not + yet complete. + """ + result = self._job_statistics().get('totalBytesProcessed') + if result is not None: + result = int(result) + return result + + @property + def total_bytes_billed(self): + """Return total bytes billed from job statistics, if present. + + See: + https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#statistics.query.totalBytesBilled + + :rtype: int or None + :returns: total bytes processed by the job, or None if job is not + yet complete. + """ + result = self._job_statistics().get('totalBytesBilled') + if result is not None: + result = int(result) + return result + + @property + def billing_tier(self): + """Return billing tier from job statistics, if present. + + See: + https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#statistics.query.billingTier + + :rtype: int or None + :returns: billing tier used by the job, or None if job is not + yet complete. + """ + return self._job_statistics().get('billingTier') + + @property + def cache_hit(self): + """Return billing tier from job statistics, if present. + + See: + https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#statistics.query.cacheHit + + :rtype: bool or None + :returns: whether the query results were returned from cache, or None + if job is not yet complete. + """ + return self._job_statistics().get('cacheHit') + + @property + def num_dml_affected_rows(self): + """Return total bytes billed from job statistics, if present. + + See: + https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#statistics.query.numDmlAffectedRows + + :rtype: int or None + :returns: number of DML rows affectd by the job, or None if job is not + yet complete. + """ + result = self._job_statistics().get('numDmlAffectedRows') + if result is not None: + result = int(result) + return result + + @property + def statement_type(self): + """Return statement type from job statistics, if present. + + See: + https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#statistics.query.statementType + + :rtype: str or None + :returns: type of statement used by the job, or None if job is not + yet complete. + """ + return self._job_statistics().get('statementType') + def query_results(self): """Construct a QueryResults instance, bound to this job. diff --git a/packages/google-cloud-bigquery/tests/unit/test_job.py b/packages/google-cloud-bigquery/tests/unit/test_job.py index 2c8da8ddc89a..ecfcec83443c 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_job.py +++ b/packages/google-cloud-bigquery/tests/unit/test_job.py @@ -1744,6 +1744,95 @@ def test_query_plan(self): self.assertEqual(f_step.kind, e_step['kind']) self.assertEqual(f_step.substeps, e_step['substeps']) + def test_total_bytes_processed(self): + total_bytes = 1234 + client = _Client(self.PROJECT) + job = self._make_one(self.JOB_NAME, self.QUERY, client) + self.assertIsNone(job.total_bytes_processed) + + statistics = job._properties['statistics'] = {} + self.assertIsNone(job.total_bytes_processed) + + query_stats = statistics['query'] = {} + self.assertIsNone(job.total_bytes_processed) + + query_stats['totalBytesProcessed'] = str(total_bytes) + self.assertEqual(job.total_bytes_processed, total_bytes) + + def test_total_bytes_billed(self): + total_bytes = 1234 + client = _Client(self.PROJECT) + job = self._make_one(self.JOB_NAME, self.QUERY, client) + self.assertIsNone(job.total_bytes_billed) + + statistics = job._properties['statistics'] = {} + self.assertIsNone(job.total_bytes_billed) + + query_stats = statistics['query'] = {} + self.assertIsNone(job.total_bytes_billed) + + query_stats['totalBytesBilled'] = str(total_bytes) + self.assertEqual(job.total_bytes_billed, total_bytes) + + def test_billing_tier(self): + billing_tier = 1 + client = _Client(self.PROJECT) + job = self._make_one(self.JOB_NAME, self.QUERY, client) + self.assertIsNone(job.billing_tier) + + statistics = job._properties['statistics'] = {} + self.assertIsNone(job.billing_tier) + + query_stats = statistics['query'] = {} + self.assertIsNone(job.billing_tier) + + query_stats['billingTier'] = billing_tier + self.assertEqual(job.billing_tier, billing_tier) + + def test_cache_hit(self): + client = _Client(self.PROJECT) + job = self._make_one(self.JOB_NAME, self.QUERY, client) + self.assertIsNone(job.cache_hit) + + statistics = job._properties['statistics'] = {} + self.assertIsNone(job.cache_hit) + + query_stats = statistics['query'] = {} + self.assertIsNone(job.cache_hit) + + query_stats['cacheHit'] = True + self.assertTrue(job.cache_hit) + + def test_num_dml_affected_rows(self): + num_rows = 1234 + client = _Client(self.PROJECT) + job = self._make_one(self.JOB_NAME, self.QUERY, client) + self.assertIsNone(job.num_dml_affected_rows) + + statistics = job._properties['statistics'] = {} + self.assertIsNone(job.num_dml_affected_rows) + + query_stats = statistics['query'] = {} + self.assertIsNone(job.num_dml_affected_rows) + + query_stats['numDmlAffectedRows'] = str(num_rows) + self.assertEqual(job.num_dml_affected_rows, num_rows) + + def test_statement_type(self): + statement_type = 'SELECT' + client = _Client(self.PROJECT) + job = self._make_one(self.JOB_NAME, self.QUERY, client) + self.assertIsNone(job.statement_type) + + statistics = job._properties['statistics'] = {} + self.assertIsNone(job.statement_type) + + query_stats = statistics['query'] = {} + self.assertIsNone(job.statement_type) + + query_stats['statementType'] = statement_type + self.assertEqual(job.statement_type, statement_type) + def test_query_results(self): from google.cloud.bigquery.query import QueryResults From fb715665b081bad19dd2a77ea04e5e492ec028eb Mon Sep 17 00:00:00 2001 From: Tres Seaver Date: Fri, 8 Sep 2017 07:49:02 -0400 Subject: [PATCH 0258/2016] Add 'QueryJob.referenced_tables' property. (#3801) --- .../google/cloud/bigquery/job.py | 30 ++++++++++ .../tests/unit/test_job.py | 58 ++++++++++++++++++- 2 files changed, 86 insertions(+), 2 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/job.py b/packages/google-cloud-bigquery/google/cloud/bigquery/job.py index 32ee5e535bf9..6299b0821c58 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/job.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/job.py @@ -1387,6 +1387,36 @@ def statement_type(self): """ return self._job_statistics().get('statementType') + @property + def referenced_tables(self): + """Return referenced tables from job statistics, if present. + + See: + https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#statistics.query.referencedTables + + :rtype: list of dict + :returns: mappings describing the query plan, or an empty list + if the query has not yet completed. + """ + tables = [] + client = self._require_client(None) + datasets_by_project_name = {} + + for table in self._job_statistics().get('referencedTables', ()): + + t_project = table['projectId'] + + ds_name = table['datasetId'] + t_dataset = datasets_by_project_name.get((t_project, ds_name)) + if t_dataset is None: + t_dataset = client.dataset(ds_name, project=t_project) + datasets_by_project_name[(t_project, ds_name)] = t_dataset + + t_name = table['tableId'] + tables.append(t_dataset.table(t_name)) + + return tables + def query_results(self): """Construct a QueryResults instance, bound to this job. diff --git a/packages/google-cloud-bigquery/tests/unit/test_job.py b/packages/google-cloud-bigquery/tests/unit/test_job.py index ecfcec83443c..886539a9d55b 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_job.py +++ b/packages/google-cloud-bigquery/tests/unit/test_job.py @@ -1833,6 +1833,60 @@ def test_statement_type(self): query_stats['statementType'] = statement_type self.assertEqual(job.statement_type, statement_type) + def test_referenced_tables(self): + from google.cloud.bigquery.dataset import Dataset + from google.cloud.bigquery.table import Table + + ref_tables_resource = [{ + 'projectId': self.PROJECT, + 'datasetId': 'dataset', + 'tableId': 'local1', + }, { + + 'projectId': self.PROJECT, + 'datasetId': 'dataset', + 'tableId': 'local2', + }, { + + 'projectId': 'other-project-123', + 'datasetId': 'other-dataset', + 'tableId': 'other-table', + }] + client = _Client(self.PROJECT) + job = self._make_one(self.JOB_NAME, self.QUERY, client) + self.assertEqual(job.referenced_tables, []) + + statistics = job._properties['statistics'] = {} + self.assertEqual(job.referenced_tables, []) + + query_stats = statistics['query'] = {} + self.assertEqual(job.referenced_tables, []) + + query_stats['referencedTables'] = ref_tables_resource + + local1, local2, remote = job.referenced_tables + + self.assertIsInstance(local1, Table) + self.assertEqual(local1.name, 'local1') + self.assertIsInstance(local1._dataset, Dataset) + self.assertEqual(local1.dataset_name, 'dataset') + self.assertEqual(local1.project, self.PROJECT) + self.assertIs(local1._dataset._client, client) + + self.assertIsInstance(local2, Table) + self.assertEqual(local2.name, 'local2') + self.assertIsInstance(local2._dataset, Dataset) + self.assertEqual(local2.dataset_name, 'dataset') + self.assertEqual(local2.project, self.PROJECT) + self.assertIs(local2._dataset._client, client) + + self.assertIsInstance(remote, Table) + self.assertEqual(remote.name, 'other-table') + self.assertIsInstance(remote._dataset, Dataset) + self.assertEqual(remote.dataset_name, 'other-dataset') + self.assertEqual(remote.project, 'other-project-123') + self.assertIs(remote._dataset._client, client) + def test_query_results(self): from google.cloud.bigquery.query import QueryResults @@ -2490,10 +2544,10 @@ def __init__(self, project='project', connection=None): self.project = project self._connection = connection - def dataset(self, name): + def dataset(self, name, project=None): from google.cloud.bigquery.dataset import Dataset - return Dataset(name, client=self) + return Dataset(name, client=self, project=project) def _get_query_results(self, job_id): from google.cloud.bigquery.query import QueryResults From 86ca83402ff367c26071b498ad980358ee8642bb Mon Sep 17 00:00:00 2001 From: Tres Seaver Date: Fri, 8 Sep 2017 07:57:46 -0400 Subject: [PATCH 0259/2016] Add 'QueryJob.undeclared_query_parameters' property. (#3802) --- .../google/cloud/bigquery/job.py | 33 ++++++++ .../tests/unit/test_job.py | 75 +++++++++++++++++++ 2 files changed, 108 insertions(+) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/job.py b/packages/google-cloud-bigquery/google/cloud/bigquery/job.py index 6299b0821c58..17f75f145c8a 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/job.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/job.py @@ -28,7 +28,10 @@ from google.cloud.bigquery.table import Table from google.cloud.bigquery.table import _build_schema_resource from google.cloud.bigquery.table import _parse_schema_resource +from google.cloud.bigquery._helpers import ArrayQueryParameter from google.cloud.bigquery._helpers import QueryParametersProperty +from google.cloud.bigquery._helpers import ScalarQueryParameter +from google.cloud.bigquery._helpers import StructQueryParameter from google.cloud.bigquery._helpers import UDFResourcesProperty from google.cloud.bigquery._helpers import _EnumProperty from google.cloud.bigquery._helpers import _TypedProperty @@ -1417,6 +1420,36 @@ def referenced_tables(self): return tables + @property + def undeclared_query_paramters(self): + """Return undeclared query parameters from job statistics, if present. + + See: + https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#statistics.query.undeclaredQueryParamters + + :rtype: + list of + :class:`~google.cloud.bigquery._helpers.AbstractQueryParameter` + :returns: undeclared parameters, or an empty list if the query has + not yet completed. + """ + parameters = [] + undeclared = self._job_statistics().get('undeclaredQueryParamters', ()) + + for parameter in undeclared: + p_type = parameter['parameterType'] + + if 'arrayType' in p_type: + klass = ArrayQueryParameter + elif 'structTypes' in p_type: + klass = StructQueryParameter + else: + klass = ScalarQueryParameter + + parameters.append(klass.from_api_repr(parameter)) + + return parameters + def query_results(self): """Construct a QueryResults instance, bound to this job. diff --git a/packages/google-cloud-bigquery/tests/unit/test_job.py b/packages/google-cloud-bigquery/tests/unit/test_job.py index 886539a9d55b..ccb101b184f1 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_job.py +++ b/packages/google-cloud-bigquery/tests/unit/test_job.py @@ -1887,6 +1887,81 @@ def test_referenced_tables(self): self.assertEqual(remote.project, 'other-project-123') self.assertIs(remote._dataset._client, client) + def test_undeclared_query_paramters(self): + from google.cloud.bigquery._helpers import ArrayQueryParameter + from google.cloud.bigquery._helpers import ScalarQueryParameter + from google.cloud.bigquery._helpers import StructQueryParameter + + undeclared = [{ + 'name': 'my_scalar', + 'parameterType': { + 'type': 'STRING', + }, + 'parameterValue': { + 'value': 'value', + }, + }, { + 'name': 'my_array', + 'parameterType': { + 'type': 'ARRAY', + 'arrayType': { + 'type': 'INT64', + }, + }, + 'parameterValue': { + 'arrayValues': [ + {'value': '1066'}, + {'value': '1745'}, + ], + }, + }, { + 'name': 'my_struct', + 'parameterType': { + 'type': 'STRUCT', + 'structTypes': [{ + 'name': 'count', + 'type': { + 'type': 'INT64', + } + }], + }, + 'parameterValue': { + 'structValues': { + 'count': { + 'value': '123', + }, + } + }, + }] + client = _Client(self.PROJECT) + job = self._make_one(self.JOB_NAME, self.QUERY, client) + self.assertEqual(job.undeclared_query_paramters, []) + + statistics = job._properties['statistics'] = {} + self.assertEqual(job.undeclared_query_paramters, []) + + query_stats = statistics['query'] = {} + self.assertEqual(job.undeclared_query_paramters, []) + + query_stats['undeclaredQueryParamters'] = undeclared + + scalar, array, struct = job.undeclared_query_paramters + + self.assertIsInstance(scalar, ScalarQueryParameter) + self.assertEqual(scalar.name, 'my_scalar') + self.assertEqual(scalar.type_, 'STRING') + self.assertEqual(scalar.value, 'value') + + self.assertIsInstance(array, ArrayQueryParameter) + self.assertEqual(array.name, 'my_array') + self.assertEqual(array.array_type, 'INT64') + self.assertEqual(array.values, [1066, 1745]) + + self.assertIsInstance(struct, StructQueryParameter) + self.assertEqual(struct.name, 'my_struct') + self.assertEqual(struct.struct_types, {'count': 'INT64'}) + self.assertEqual(struct.struct_values, {'count': 123}) + def test_query_results(self): from google.cloud.bigquery.query import QueryResults From d7de2a69bda2b3819e66391af769c6040c9b29f8 Mon Sep 17 00:00:00 2001 From: Tres Seaver Date: Fri, 8 Sep 2017 10:25:23 -0400 Subject: [PATCH 0260/2016] Fix system test broken by PR #3798. (#3936) --- packages/google-cloud-bigquery/tests/system.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/packages/google-cloud-bigquery/tests/system.py b/packages/google-cloud-bigquery/tests/system.py index 0d5b9918fef3..f5188118918e 100644 --- a/packages/google-cloud-bigquery/tests/system.py +++ b/packages/google-cloud-bigquery/tests/system.py @@ -156,14 +156,14 @@ def test_update_dataset(self): self.to_delete.append(dataset) self.assertTrue(dataset.exists()) - after = [grant for grant in dataset.access_grants - if grant.entity_id != 'projectWriters'] - dataset.access_grants = after + after = [entry for entry in dataset.access_entries + if entry.entity_id != 'projectWriters'] + dataset.access_entries = after retry_403(dataset.update)() - self.assertEqual(len(dataset.access_grants), len(after)) - for found, expected in zip(dataset.access_grants, after): + self.assertEqual(len(dataset.access_entries), len(after)) + for found, expected in zip(dataset.access_entries, after): self.assertEqual(found.role, expected.role) self.assertEqual(found.entity_type, expected.entity_type) self.assertEqual(found.entity_id, expected.entity_id) From af1e2a1b9d30062f3ae747dd356ad70d8eefac2d Mon Sep 17 00:00:00 2001 From: Tres Seaver Date: Mon, 11 Sep 2017 12:32:05 -0400 Subject: [PATCH 0261/2016] Add 'Client.get_job' API wrapper. (#3804) * Allow assigning 'None' to '_TypedProperty' properties. * Ensure that configuration properties are copied when (re)loading jobs. --- packages/google-cloud-bigquery/.coveragerc | 2 + .../google/cloud/bigquery/_helpers.py | 38 ++++++ .../google/cloud/bigquery/client.py | 61 ++++++--- .../google/cloud/bigquery/job.py | 106 ++++++++++++++- .../tests/unit/test__helpers.py | 122 ++++++++++++++++++ .../tests/unit/test_client.py | 64 +++++++++ .../tests/unit/test_job.py | 105 +++++++++++---- 7 files changed, 457 insertions(+), 41 deletions(-) diff --git a/packages/google-cloud-bigquery/.coveragerc b/packages/google-cloud-bigquery/.coveragerc index a54b99aa14b7..d097511c3124 100644 --- a/packages/google-cloud-bigquery/.coveragerc +++ b/packages/google-cloud-bigquery/.coveragerc @@ -9,3 +9,5 @@ exclude_lines = pragma: NO COVER # Ignore debug-only repr def __repr__ + # Ignore abstract methods + raise NotImplementedError diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py b/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py index ac6e9759c084..5f6edf67dca5 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py @@ -316,6 +316,8 @@ def _validate(self, value): :raises: ValueError on a type mismatch. """ + if value is None: + return if not isinstance(value, self.property_type): raise ValueError('Required type: %s' % (self.property_type,)) @@ -413,6 +415,14 @@ def __init__(self, name, type_, value): self.type_ = type_ self.value = value + def __eq__(self, other): + if not isinstance(other, ScalarQueryParameter): + return NotImplemented + return( + self.name == other.name and + self.type_ == other.type_ and + self.value == other.value) + @classmethod def positional(cls, type_, value): """Factory for positional paramater. @@ -515,6 +525,14 @@ def __init__(self, name, array_type, values): self.array_type = array_type self.values = values + def __eq__(self, other): + if not isinstance(other, ArrayQueryParameter): + return NotImplemented + return( + self.name == other.name and + self.array_type == other.array_type and + self.values == other.values) + @classmethod def positional(cls, array_type, values): """Factory for positional parameters. @@ -657,6 +675,14 @@ def __init__(self, name, *sub_params): types[sub.name] = sub.type_ values[sub.name] = sub.value + def __eq__(self, other): + if not isinstance(other, StructQueryParameter): + return NotImplemented + return( + self.name == other.name and + self.struct_types == other.struct_types and + self.struct_values == other.struct_values) + @classmethod def positional(cls, *sub_params): """Factory for positional parameters. @@ -770,6 +796,18 @@ def __repr__(self): return 'StructQueryParameter{}'.format(self._key()) +def _query_param_from_api_repr(resource): + """Helper: construct concrete query parameter from JSON resource.""" + qp_type = resource['parameterType'] + if 'arrayType' in qp_type: + klass = ArrayQueryParameter + elif 'structTypes' in qp_type: + klass = StructQueryParameter + else: + klass = ScalarQueryParameter + return klass.from_api_repr(resource) + + class QueryParametersProperty(object): """Custom property type, holding query parameter instances.""" diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py index 5578064c1cc7..2ae577a51708 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py @@ -222,6 +222,35 @@ def job_from_resource(self, resource): return QueryJob.from_api_repr(resource, self) raise ValueError('Cannot parse job resource') + def get_job(self, job_id, project=None): + """Fetch a job for the project associated with this client. + + See + https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs/get + + :type job_id: str + :param job_id: Name of the job. + + :type project: str + :param project: + project ID owning the job (defaults to the client's project) + + :rtype: :class:`~google.cloud.bigquery.job._AsyncJob` + :returns: + Concrete job instance, based on the resource returned by the API. + """ + extra_params = {'projection': 'full'} + + if project is None: + project = self.project + + path = '/projects/{}/jobs/{}'.format(project, job_id) + + resource = self._connection.api_request( + method='GET', path=path, query_params=extra_params) + + return self.job_from_resource(resource) + def list_jobs(self, max_results=None, page_token=None, all_users=None, state_filter=None): """List jobs for the project associated with this client. @@ -272,14 +301,14 @@ def list_jobs(self, max_results=None, page_token=None, all_users=None, max_results=max_results, extra_params=extra_params) - def load_table_from_storage(self, job_name, destination, *source_uris): + def load_table_from_storage(self, job_id, destination, *source_uris): """Construct a job for loading data into a table from CloudStorage. See https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.load - :type job_name: str - :param job_name: Name of the job. + :type job_id: str + :param job_id: Name of the job. :type destination: :class:`google.cloud.bigquery.table.Table` :param destination: Table into which data is to be loaded. @@ -291,16 +320,16 @@ def load_table_from_storage(self, job_name, destination, *source_uris): :rtype: :class:`google.cloud.bigquery.job.LoadJob` :returns: a new ``LoadJob`` instance """ - return LoadJob(job_name, destination, source_uris, client=self) + return LoadJob(job_id, destination, source_uris, client=self) - def copy_table(self, job_name, destination, *sources): + def copy_table(self, job_id, destination, *sources): """Construct a job for copying one or more tables into another table. See https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.copy - :type job_name: str - :param job_name: Name of the job. + :type job_id: str + :param job_id: Name of the job. :type destination: :class:`google.cloud.bigquery.table.Table` :param destination: Table into which data is to be copied. @@ -311,16 +340,16 @@ def copy_table(self, job_name, destination, *sources): :rtype: :class:`google.cloud.bigquery.job.CopyJob` :returns: a new ``CopyJob`` instance """ - return CopyJob(job_name, destination, sources, client=self) + return CopyJob(job_id, destination, sources, client=self) - def extract_table_to_storage(self, job_name, source, *destination_uris): + def extract_table_to_storage(self, job_id, source, *destination_uris): """Construct a job for extracting a table into Cloud Storage files. See https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.extract - :type job_name: str - :param job_name: Name of the job. + :type job_id: str + :param job_id: Name of the job. :type source: :class:`google.cloud.bigquery.table.Table` :param source: table to be extracted. @@ -333,17 +362,17 @@ def extract_table_to_storage(self, job_name, source, *destination_uris): :rtype: :class:`google.cloud.bigquery.job.ExtractJob` :returns: a new ``ExtractJob`` instance """ - return ExtractJob(job_name, source, destination_uris, client=self) + return ExtractJob(job_id, source, destination_uris, client=self) - def run_async_query(self, job_name, query, + def run_async_query(self, job_id, query, udf_resources=(), query_parameters=()): """Construct a job for running a SQL query asynchronously. See https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query - :type job_name: str - :param job_name: Name of the job. + :type job_id: str + :param job_id: Name of the job. :type query: str :param query: SQL query to be executed @@ -362,7 +391,7 @@ def run_async_query(self, job_name, query, :rtype: :class:`google.cloud.bigquery.job.QueryJob` :returns: a new ``QueryJob`` instance """ - return QueryJob(job_name, query, client=self, + return QueryJob(job_id, query, client=self, udf_resources=udf_resources, query_parameters=query_parameters) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/job.py b/packages/google-cloud-bigquery/google/cloud/bigquery/job.py index 17f75f145c8a..014eb2ee2740 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/job.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/job.py @@ -32,8 +32,10 @@ from google.cloud.bigquery._helpers import QueryParametersProperty from google.cloud.bigquery._helpers import ScalarQueryParameter from google.cloud.bigquery._helpers import StructQueryParameter +from google.cloud.bigquery._helpers import UDFResource from google.cloud.bigquery._helpers import UDFResourcesProperty from google.cloud.bigquery._helpers import _EnumProperty +from google.cloud.bigquery._helpers import _query_param_from_api_repr from google.cloud.bigquery._helpers import _TypedProperty _DONE_STATE = 'DONE' @@ -61,6 +63,22 @@ } +def _bool_or_none(value): + """Helper: deserialize boolean value from JSON string.""" + if isinstance(value, bool): + return value + if value is not None: + return value.lower() in ['t', 'true', '1'] + + +def _int_or_none(value): + """Helper: deserialize int value from JSON string.""" + if isinstance(value, int): + return value + if value is not None: + return int(value) + + def _error_result_to_exception(error_result): """Maps BigQuery error reasons to an exception. @@ -311,6 +329,10 @@ def _scrub_local_properties(self, cleaned): """Helper: handle subclass properties in cleaned.""" pass + def _copy_configuration_properties(self, configuration): + """Helper: assign subclass configuration properties in cleaned.""" + raise NotImplementedError("Abstract") + def _set_properties(self, api_response): """Update properties from resource in body of ``api_response`` @@ -330,6 +352,8 @@ def _set_properties(self, api_response): self._properties.clear() self._properties.update(cleaned) + configuration = cleaned['configuration'][self._JOB_TYPE] + self._copy_configuration_properties(configuration) # For Future interface self._set_future_result() @@ -731,7 +755,7 @@ def _populate_config_resource(self, configuration): if self.quote_character is not None: configuration['quote'] = self.quote_character if self.skip_leading_rows is not None: - configuration['skipLeadingRows'] = self.skip_leading_rows + configuration['skipLeadingRows'] = str(self.skip_leading_rows) if self.source_format is not None: configuration['sourceFormat'] = self.source_format if self.write_disposition is not None: @@ -769,6 +793,28 @@ def _scrub_local_properties(self, cleaned): schema = cleaned.pop('schema', {'fields': ()}) self.schema = _parse_schema_resource(schema) + def _copy_configuration_properties(self, configuration): + """Helper: assign subclass configuration properties in cleaned.""" + self.allow_jagged_rows = _bool_or_none( + configuration.get('allowJaggedRows')) + self.allow_quoted_newlines = _bool_or_none( + configuration.get('allowQuotedNewlines')) + self.autodetect = _bool_or_none( + configuration.get('autodetect')) + self.create_disposition = configuration.get('createDisposition') + self.encoding = configuration.get('encoding') + self.field_delimiter = configuration.get('fieldDelimiter') + self.ignore_unknown_values = _bool_or_none( + configuration.get('ignoreUnknownValues')) + self.max_bad_records = _int_or_none( + configuration.get('maxBadRecords')) + self.null_marker = configuration.get('nullMarker') + self.quote_character = configuration.get('quote') + self.skip_leading_rows = _int_or_none( + configuration.get('skipLeadingRows')) + self.source_format = configuration.get('sourceFormat') + self.write_disposition = configuration.get('writeDisposition') + @classmethod def from_api_repr(cls, resource, client): """Factory: construct a job given its API representation @@ -879,6 +925,11 @@ def _build_resource(self): return resource + def _copy_configuration_properties(self, configuration): + """Helper: assign subclass configuration properties in cleaned.""" + self.create_disposition = configuration.get('createDisposition') + self.write_disposition = configuration.get('writeDisposition') + @classmethod def from_api_repr(cls, resource, client): """Factory: construct a job given its API representation @@ -1012,6 +1063,14 @@ def _build_resource(self): return resource + def _copy_configuration_properties(self, configuration): + """Helper: assign subclass configuration properties in cleaned.""" + self.compression = configuration.get('compression') + self.destination_format = configuration.get('destinationFormat') + self.field_delimiter = configuration.get('fieldDelimiter') + self.print_header = _bool_or_none( + configuration.get('printHeader')) + @classmethod def from_api_repr(cls, resource, client): """Factory: construct a job given its API representation @@ -1208,7 +1267,8 @@ def _populate_config_resource(self, configuration): if self.maximum_billing_tier is not None: configuration['maximumBillingTier'] = self.maximum_billing_tier if self.maximum_bytes_billed is not None: - configuration['maximumBytesBilled'] = self.maximum_bytes_billed + configuration['maximumBytesBilled'] = str( + self.maximum_bytes_billed) if len(self._udf_resources) > 0: configuration[self._UDF_KEY] = [ {udf_resource.udf_type: udf_resource.value} @@ -1258,6 +1318,25 @@ def _scrub_local_properties(self, cleaned): configuration = cleaned['configuration']['query'] self.query = configuration['query'] + + def _copy_configuration_properties(self, configuration): + """Helper: assign subclass configuration properties in cleaned.""" + self.allow_large_results = _bool_or_none( + configuration.get('allowLargeResults')) + self.flatten_results = _bool_or_none( + configuration.get('flattenResults')) + self.use_query_cache = _bool_or_none( + configuration.get('useQueryCache')) + self.use_legacy_sql = _bool_or_none( + configuration.get('useLegacySql')) + + self.create_disposition = configuration.get('createDisposition') + self.priority = configuration.get('priority') + self.write_disposition = configuration.get('writeDisposition') + self.maximum_billing_tier = configuration.get('maximumBillingTier') + self.maximum_bytes_billed = _int_or_none( + configuration.get('maximumBytesBilled')) + dest_remote = configuration.get('destinationTable') if dest_remote is None: @@ -1266,9 +1345,30 @@ def _scrub_local_properties(self, cleaned): else: dest_local = self._destination_table_resource() if dest_remote != dest_local: - dataset = self._client.dataset(dest_remote['datasetId']) + project = dest_remote['projectId'] + dataset = self._client.dataset( + dest_remote['datasetId'], project=project) self.destination = dataset.table(dest_remote['tableId']) + def_ds = configuration.get('defaultDataset') + if def_ds is None: + if self.default_dataset is not None: + del self.default_dataset + else: + project = def_ds['projectId'] + self.default_dataset = self._client.dataset(def_ds['datasetId']) + + udf_resources = [] + for udf_mapping in configuration.get(self._UDF_KEY, ()): + key_val, = udf_mapping.items() + udf_resources.append(UDFResource(key_val[0], key_val[1])) + self._udf_resources = udf_resources + + self._query_parameters = [ + _query_param_from_api_repr(mapping) + for mapping in configuration.get(self._QUERY_PARAMETERS_KEY, ()) + ] + @classmethod def from_api_repr(cls, resource, client): """Factory: construct a job given its API representation diff --git a/packages/google-cloud-bigquery/tests/unit/test__helpers.py b/packages/google-cloud-bigquery/tests/unit/test__helpers.py index 9dc14f6e3a47..c43f7b6d1ae3 100644 --- a/packages/google-cloud-bigquery/tests/unit/test__helpers.py +++ b/packages/google-cloud-bigquery/tests/unit/test__helpers.py @@ -736,6 +736,14 @@ def __init__(self): self.assertEqual(wrapper.attr, 42) self.assertEqual(wrapper._configuration._attr, 42) + wrapper.attr = None + self.assertIsNone(wrapper.attr) + self.assertIsNone(wrapper._configuration._attr) + + wrapper.attr = 23 + self.assertEqual(wrapper.attr, 23) + self.assertEqual(wrapper._configuration._attr, 23) + del wrapper.attr self.assertIsNone(wrapper.attr) self.assertIsNone(wrapper._configuration._attr) @@ -899,6 +907,17 @@ def test_ctor(self): self.assertEqual(param.type_, 'INT64') self.assertEqual(param.value, 123) + def test___eq__(self): + param = self._make_one(name='foo', type_='INT64', value=123) + self.assertEqual(param, param) + self.assertNotEqual(param, object()) + alias = self._make_one(name='bar', type_='INT64', value=123) + self.assertNotEqual(param, alias) + wrong_type = self._make_one(name='foo', type_='FLOAT64', value=123.0) + self.assertNotEqual(param, wrong_type) + wrong_val = self._make_one(name='foo', type_='INT64', value=234) + self.assertNotEqual(param, wrong_val) + def test_positional(self): klass = self._get_target_class() param = klass.positional(type_='INT64', value=123) @@ -1171,6 +1190,19 @@ def test_ctor(self): self.assertEqual(param.array_type, 'INT64') self.assertEqual(param.values, [1, 2]) + def test___eq__(self): + param = self._make_one(name='foo', array_type='INT64', values=[123]) + self.assertEqual(param, param) + self.assertNotEqual(param, object()) + alias = self._make_one(name='bar', array_type='INT64', values=[123]) + self.assertNotEqual(param, alias) + wrong_type = self._make_one( + name='foo', array_type='FLOAT64', values=[123.0]) + self.assertNotEqual(param, wrong_type) + wrong_val = self._make_one( + name='foo', array_type='INT64', values=[234]) + self.assertNotEqual(param, wrong_val) + def test_positional(self): klass = self._get_target_class() param = klass.positional(array_type='INT64', values=[1, 2]) @@ -1446,6 +1478,21 @@ def test_ctor(self): self.assertEqual(param.struct_types, {'bar': 'INT64', 'baz': 'STRING'}) self.assertEqual(param.struct_values, {'bar': 123, 'baz': 'abc'}) + def test___eq__(self): + sub_1 = _make_subparam('bar', 'INT64', 123) + sub_2 = _make_subparam('baz', 'STRING', 'abc') + sub_3 = _make_subparam('baz', 'STRING', 'def') + sub_1_float = _make_subparam('bar', 'FLOAT64', 123.0) + param = self._make_one('foo', sub_1, sub_2) + self.assertEqual(param, param) + self.assertNotEqual(param, object()) + alias = self._make_one('bar', sub_1, sub_2) + self.assertNotEqual(param, alias) + wrong_type = self._make_one('foo', sub_1_float, sub_2) + self.assertNotEqual(param, wrong_type) + wrong_val = self._make_one('foo', sub_2, sub_3) + self.assertNotEqual(param, wrong_val) + def test_positional(self): sub_1 = _make_subparam('bar', 'INT64', 123) sub_2 = _make_subparam('baz', 'STRING', 'abc') @@ -1749,6 +1796,81 @@ def test___repr__(self): self.assertIn("'field1': 'hello'", got) +class Test__query_param_from_api_repr(unittest.TestCase): + + @staticmethod + def _call_fut(resource): + from google.cloud.bigquery._helpers import _query_param_from_api_repr + + return _query_param_from_api_repr(resource) + + def test_w_scalar(self): + from google.cloud.bigquery._helpers import ScalarQueryParameter + + RESOURCE = { + 'name': 'foo', + 'parameterType': {'type': 'INT64'}, + 'parameterValue': {'value': '123'}, + } + + parameter = self._call_fut(RESOURCE) + + self.assertIsInstance(parameter, ScalarQueryParameter) + self.assertEqual(parameter.name, 'foo') + self.assertEqual(parameter.type_, 'INT64') + self.assertEqual(parameter.value, 123) + + def test_w_array(self): + from google.cloud.bigquery._helpers import ArrayQueryParameter + + RESOURCE = { + 'name': 'foo', + 'parameterType': { + 'type': 'ARRAY', + 'arrayType': {'type': 'INT64'}, + }, + 'parameterValue': { + 'arrayValues': [ + {'value': '123'}, + ]}, + } + + parameter = self._call_fut(RESOURCE) + + self.assertIsInstance(parameter, ArrayQueryParameter) + self.assertEqual(parameter.name, 'foo') + self.assertEqual(parameter.array_type, 'INT64') + self.assertEqual(parameter.values, [123]) + + def test_w_struct(self): + from google.cloud.bigquery._helpers import StructQueryParameter + + RESOURCE = { + 'name': 'foo', + 'parameterType': { + 'type': 'STRUCT', + 'structTypes': [ + {'name': 'foo', 'type': {'type': 'STRING'}}, + {'name': 'bar', 'type': {'type': 'INT64'}}, + ], + }, + 'parameterValue': { + 'structValues': { + 'foo': {'value': 'Foo'}, + 'bar': {'value': '123'}, + } + }, + } + + parameter = self._call_fut(RESOURCE) + + self.assertIsInstance(parameter, StructQueryParameter) + self.assertEqual(parameter.name, 'foo') + self.assertEqual( + parameter.struct_types, {'foo': 'STRING', 'bar': 'INT64'}) + self.assertEqual(parameter.struct_values, {'foo': 'Foo', 'bar': 123}) + + class Test_QueryParametersProperty(unittest.TestCase): @staticmethod diff --git a/packages/google-cloud-bigquery/tests/unit/test_client.py b/packages/google-cloud-bigquery/tests/unit/test_client.py index 6a9a98f2952e..3cd4a24ceb43 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_client.py +++ b/packages/google-cloud-bigquery/tests/unit/test_client.py @@ -266,6 +266,70 @@ def test_job_from_resource_unknown_type(self): with self.assertRaises(ValueError): client.job_from_resource({'configuration': {'nonesuch': {}}}) + def test_get_job_miss_w_explict_project(self): + from google.cloud.exceptions import NotFound + + PROJECT = 'PROJECT' + OTHER_PROJECT = 'OTHER_PROJECT' + JOB_ID = 'NONESUCH' + creds = _make_credentials() + client = self._make_one(PROJECT, creds) + conn = client._connection = _Connection() + + with self.assertRaises(NotFound): + client.get_job(JOB_ID, project=OTHER_PROJECT) + + self.assertEqual(len(conn._requested), 1) + req = conn._requested[0] + self.assertEqual(req['method'], 'GET') + self.assertEqual(req['path'], '/projects/OTHER_PROJECT/jobs/NONESUCH') + self.assertEqual(req['query_params'], {'projection': 'full'}) + + def test_get_job_hit(self): + from google.cloud.bigquery.job import QueryJob + + PROJECT = 'PROJECT' + JOB_ID = 'query_job' + DATASET = 'test_dataset' + QUERY_DESTINATION_TABLE = 'query_destination_table' + QUERY = 'SELECT * from test_dataset:test_table' + ASYNC_QUERY_DATA = { + 'id': '{}:{}'.format(PROJECT, JOB_ID), + 'jobReference': { + 'projectId': PROJECT, + 'jobId': 'query_job', + }, + 'state': 'DONE', + 'configuration': { + 'query': { + 'query': QUERY, + 'destinationTable': { + 'projectId': PROJECT, + 'datasetId': DATASET, + 'tableId': QUERY_DESTINATION_TABLE, + }, + 'createDisposition': 'CREATE_IF_NEEDED', + 'writeDisposition': 'WRITE_TRUNCATE', + } + }, + } + creds = _make_credentials() + client = self._make_one(PROJECT, creds) + conn = client._connection = _Connection(ASYNC_QUERY_DATA) + + job = client.get_job(JOB_ID) + + self.assertIsInstance(job, QueryJob) + self.assertEqual(job.name, JOB_ID) + self.assertEqual(job.create_disposition, 'CREATE_IF_NEEDED') + self.assertEqual(job.write_disposition, 'WRITE_TRUNCATE') + + self.assertEqual(len(conn._requested), 1) + req = conn._requested[0] + self.assertEqual(req['method'], 'GET') + self.assertEqual(req['path'], '/projects/PROJECT/jobs/query_job') + self.assertEqual(req['query_params'], {'projection': 'full'}) + def test_list_jobs_defaults(self): import six from google.cloud.bigquery.job import LoadJob diff --git a/packages/google-cloud-bigquery/tests/unit/test_job.py b/packages/google-cloud-bigquery/tests/unit/test_job.py index ccb101b184f1..9e684faf4f8b 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_job.py +++ b/packages/google-cloud-bigquery/tests/unit/test_job.py @@ -18,9 +18,49 @@ import unittest +class Test__bool_or_none(unittest.TestCase): + + def _call_fut(self, *args, **kwargs): + from google.cloud.bigquery import job + + return job._bool_or_none(*args, **kwargs) + + def test_w_bool(self): + self.assertTrue(self._call_fut(True)) + self.assertFalse(self._call_fut(False)) + + def test_w_none(self): + self.assertIsNone(self._call_fut(None)) + + def test_w_str(self): + self.assertTrue(self._call_fut('1')) + self.assertTrue(self._call_fut('t')) + self.assertTrue(self._call_fut('true')) + self.assertFalse(self._call_fut('anything else')) + + +class Test__int_or_none(unittest.TestCase): + + def _call_fut(self, *args, **kwargs): + from google.cloud.bigquery import job + + return job._int_or_none(*args, **kwargs) + + def test_w_int(self): + self.assertEqual(self._call_fut(13), 13) + + def test_w_none(self): + self.assertIsNone(self._call_fut(None)) + + def test_w_str(self): + self.assertEqual(self._call_fut('13'), 13) + + class Test__error_result_to_exception(unittest.TestCase): + def _call_fut(self, *args, **kwargs): from google.cloud.bigquery import job + return job._error_result_to_exception(*args, **kwargs) def test_simple(self): @@ -259,7 +299,7 @@ def _verifyResourceProperties(self, job, resource): else: self.assertIsNone(job.quote_character) if 'skipLeadingRows' in config: - self.assertEqual(job.skip_leading_rows, + self.assertEqual(str(job.skip_leading_rows), config['skipLeadingRows']) else: self.assertIsNone(job.skip_leading_rows) @@ -517,10 +557,12 @@ def test_from_api_repr_bare(self): def test_from_api_repr_w_properties(self): client = _Client(self.PROJECT) RESOURCE = self._makeResource() + load_config = RESOURCE['configuration']['load'] + load_config['createDisposition'] = 'CREATE_IF_NEEDED' klass = self._get_target_class() - dataset = klass.from_api_repr(RESOURCE, client=client) - self.assertIs(dataset._client, client) - self._verifyResourceProperties(dataset, RESOURCE) + job = klass.from_api_repr(RESOURCE, client=client) + self.assertIs(job._client, client) + self._verifyResourceProperties(job, RESOURCE) def test_begin_w_already_running(self): conn = _Connection() @@ -632,7 +674,7 @@ def test_begin_w_alternate_client(self): 'maxBadRecords': 100, 'nullMarker': r'\N', 'quote': "'", - 'skipLeadingRows': 1, + 'skipLeadingRows': '1', 'sourceFormat': 'CSV', 'writeDisposition': 'WRITE_TRUNCATE', 'schema': {'fields': [ @@ -973,10 +1015,12 @@ def test_from_api_repr_wo_sources(self): def test_from_api_repr_w_properties(self): client = _Client(self.PROJECT) RESOURCE = self._makeResource() + copy_config = RESOURCE['configuration']['copy'] + copy_config['createDisposition'] = 'CREATE_IF_NEEDED' klass = self._get_target_class() - dataset = klass.from_api_repr(RESOURCE, client=client) - self.assertIs(dataset._client, client) - self._verifyResourceProperties(dataset, RESOURCE) + job = klass.from_api_repr(RESOURCE, client=client) + self.assertIs(job._client, client) + self._verifyResourceProperties(job, RESOURCE) def test_begin_w_bound_client(self): PATH = '/projects/%s/jobs' % (self.PROJECT,) @@ -1272,10 +1316,12 @@ def test_from_api_repr_bare(self): def test_from_api_repr_w_properties(self): client = _Client(self.PROJECT) RESOURCE = self._makeResource() + extract_config = RESOURCE['configuration']['extract'] + extract_config['compression'] = 'GZIP' klass = self._get_target_class() - dataset = klass.from_api_repr(RESOURCE, client=client) - self.assertIs(dataset._client, client) - self._verifyResourceProperties(dataset, RESOURCE) + job = klass.from_api_repr(RESOURCE, client=client) + self.assertIs(job._client, client) + self._verifyResourceProperties(job, RESOURCE) def test_begin_w_bound_client(self): PATH = '/projects/%s/jobs' % (self.PROJECT,) @@ -1484,13 +1530,14 @@ def _verifyBooleanResourceProperties(self, job, config): def _verifyIntegerResourceProperties(self, job, config): if 'maximumBillingTier' in config: - self.assertEqual(job.maximum_billing_tier, - config['maximumBillingTier']) + self.assertEqual( + job.maximum_billing_tier, config['maximumBillingTier']) else: self.assertIsNone(job.maximum_billing_tier) if 'maximumBytesBilled' in config: - self.assertEqual(job.maximum_bytes_billed, - config['maximumBytesBilled']) + self.assertEqual( + str(job.maximum_bytes_billed), config['maximumBytesBilled']) + self.assertIsInstance(job.maximum_bytes_billed, int) else: self.assertIsNone(job.maximum_bytes_billed) @@ -1643,7 +1690,7 @@ def test_from_api_repr_bare(self): 'jobId': self.JOB_NAME, }, 'configuration': { - 'query': {'query': self.QUERY} + 'query': {'query': self.QUERY}, }, } klass = self._get_target_class() @@ -1654,15 +1701,18 @@ def test_from_api_repr_bare(self): def test_from_api_repr_w_properties(self): client = _Client(self.PROJECT) RESOURCE = self._makeResource() - RESOURCE['configuration']['query']['destinationTable'] = { + query_config = RESOURCE['configuration']['query'] + query_config['createDisposition'] = 'CREATE_IF_NEEDED' + query_config['writeDisposition'] = 'WRITE_TRUNCATE' + query_config['destinationTable'] = { 'projectId': self.PROJECT, 'datasetId': self.DS_NAME, 'tableId': self.DESTINATION_TABLE, } klass = self._get_target_class() - dataset = klass.from_api_repr(RESOURCE, client=client) - self.assertIs(dataset._client, client) - self._verifyResourceProperties(dataset, RESOURCE) + job = klass.from_api_repr(RESOURCE, client=client) + self.assertIs(job._client, client) + self._verifyResourceProperties(job, RESOURCE) def test_cancelled(self): client = _Client(self.PROJECT) @@ -2058,7 +2108,10 @@ def test_result_error(self): self.assertEqual(exc_info.exception.code, http_client.BAD_REQUEST) def test_begin_w_bound_client(self): + from google.cloud.bigquery.dataset import Dataset + PATH = '/projects/%s/jobs' % (self.PROJECT,) + DS_NAME = 'DATASET' RESOURCE = self._makeResource() # Ensure None for missing server-set props del RESOURCE['statistics']['creationTime'] @@ -2067,9 +2120,13 @@ def test_begin_w_bound_client(self): del RESOURCE['user_email'] conn = _Connection(RESOURCE) client = _Client(project=self.PROJECT, connection=conn) + job = self._make_one(self.JOB_NAME, self.QUERY, client) + job.default_dataset = Dataset(DS_NAME, client) job.begin() + + self.assertIsNone(job.default_dataset) self.assertEqual(job.udf_resources, []) self.assertEqual(len(conn._requested), 1) req = conn._requested[0] @@ -2082,7 +2139,11 @@ def test_begin_w_bound_client(self): }, 'configuration': { 'query': { - 'query': self.QUERY + 'query': self.QUERY, + 'defaultDataset': { + 'projectId': self.PROJECT, + 'datasetId': DS_NAME, + }, }, }, } @@ -2116,7 +2177,7 @@ def test_begin_w_alternate_client(self): 'useLegacySql': True, 'writeDisposition': 'WRITE_TRUNCATE', 'maximumBillingTier': 4, - 'maximumBytesBilled': 123456 + 'maximumBytesBilled': '123456' } RESOURCE['configuration']['query'] = QUERY_CONFIGURATION conn1 = _Connection() From abb4321b8a93c2fc521b1d42db05e6f4a161ad8e Mon Sep 17 00:00:00 2001 From: Tres Seaver Date: Mon, 11 Sep 2017 12:38:03 -0400 Subject: [PATCH 0262/2016] Add 'ExtractTableStorageJob.destination_uri_file_counts' property. (#3803) --- .../google/cloud/bigquery/job.py | 16 ++++++++++++++++ .../tests/unit/test_job.py | 17 +++++++++++++++++ 2 files changed, 33 insertions(+) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/job.py b/packages/google-cloud-bigquery/google/cloud/bigquery/job.py index 014eb2ee2740..f060ba1bc2f9 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/job.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/job.py @@ -1026,6 +1026,22 @@ def __init__(self, name, source, destination_uris, client): https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.extract.printHeader """ + @property + def destination_uri_file_counts(self): + """Return file counts from job statistics, if present. + + See: + https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#statistics.extract.destinationUriFileCounts + + :rtype: int or None + :returns: number of DML rows affectd by the job, or None if job is not + yet complete. + """ + result = self._job_statistics().get('destinationUriFileCounts') + if result is not None: + result = int(result) + return result + def _populate_config_resource(self, configuration): """Helper for _build_resource: copy config properties to resource""" if self.compression is not None: diff --git a/packages/google-cloud-bigquery/tests/unit/test_job.py b/packages/google-cloud-bigquery/tests/unit/test_job.py index 9e684faf4f8b..23fb95eea123 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_job.py +++ b/packages/google-cloud-bigquery/tests/unit/test_job.py @@ -1266,6 +1266,23 @@ def test_ctor(self): self.assertIsNone(job.field_delimiter) self.assertIsNone(job.print_header) + def test_destination_uri_file_counts(self): + file_counts = 23 + client = _Client(self.PROJECT) + source = _Table(self.SOURCE_TABLE) + job = self._make_one(self.JOB_NAME, source, [self.DESTINATION_URI], + client) + self.assertIsNone(job.destination_uri_file_counts) + + statistics = job._properties['statistics'] = {} + self.assertIsNone(job.destination_uri_file_counts) + + extract_stats = statistics['extract'] = {} + self.assertIsNone(job.destination_uri_file_counts) + + extract_stats['destinationUriFileCounts'] = str(file_counts) + self.assertEqual(job.destination_uri_file_counts, file_counts) + def test_from_api_repr_missing_identity(self): self._setUpConstants() client = _Client(self.PROJECT) From 018944c6427d7ae27d1ee2ab09b1075c020e3bf5 Mon Sep 17 00:00:00 2001 From: Alix Hamilton Date: Mon, 11 Sep 2017 12:08:09 -0700 Subject: [PATCH 0263/2016] bigquery add DatasetReference class and tests (#3938) --- .../google/cloud/bigquery/dataset.py | 36 +++++++++++++++++++ .../tests/unit/test_dataset.py | 17 +++++++++ 2 files changed, 53 insertions(+) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/dataset.py b/packages/google-cloud-bigquery/google/cloud/bigquery/dataset.py index a688cb3b560b..f4e6fd519e38 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/dataset.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/dataset.py @@ -91,6 +91,42 @@ def __repr__(self): self.role, self.entity_type, self.entity_id) +class DatasetReference(object): + """DatasetReferences are pointers to datasets. + + See + https://cloud.google.com/bigquery/docs/reference/rest/v2/datasets + + :type project_id: str + :param project_id: the ID of the project + + :type dataset_id: str + :param dataset_id: the ID of the dataset + """ + + def __init__(self, project_id, dataset_id): + self._project_id = project_id + self._dataset_id = dataset_id + + @property + def project_id(self): + """Project ID of the dataset. + + :rtype: str + :returns: the project ID. + """ + return self._project_id + + @property + def dataset_id(self): + """Dataset ID. + + :rtype: str + :returns: the dataset ID. + """ + return self._dataset_id + + class Dataset(object): """Datasets are containers for tables. diff --git a/packages/google-cloud-bigquery/tests/unit/test_dataset.py b/packages/google-cloud-bigquery/tests/unit/test_dataset.py index 10b832d7abe4..4da2ada5de66 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_dataset.py +++ b/packages/google-cloud-bigquery/tests/unit/test_dataset.py @@ -85,6 +85,23 @@ def test__eq___type_mismatch(self): self.assertEqual(entry, mock.ANY) +class TestDatasetReference(unittest.TestCase): + + @staticmethod + def _get_target_class(): + from google.cloud.bigquery.dataset import DatasetReference + + return DatasetReference + + def _make_one(self, *args, **kw): + return self._get_target_class()(*args, **kw) + + def test_ctor_defaults(self): + dataset_ref = self._make_one('some-project-1', 'dataset_1') + self.assertEqual(dataset_ref.project_id, 'some-project-1') + self.assertEqual(dataset_ref.dataset_id, 'dataset_1') + + class TestDataset(unittest.TestCase): PROJECT = 'project' DS_NAME = 'dataset-name' From b72f80fbd07844a36cc47869bcc97523ebbc0b3f Mon Sep 17 00:00:00 2001 From: Alix Hamilton Date: Tue, 12 Sep 2017 17:24:21 -0700 Subject: [PATCH 0264/2016] BigQuery: Add TestReference class. Add table function to DatasetReference (#3942) --- .../google/cloud/bigquery/dataset.py | 9 +++++ .../google/cloud/bigquery/table.py | 36 +++++++++++++++++++ .../tests/unit/test_dataset.py | 6 ++++ .../tests/unit/test_table.py | 20 +++++++++++ 4 files changed, 71 insertions(+) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/dataset.py b/packages/google-cloud-bigquery/google/cloud/bigquery/dataset.py index f4e6fd519e38..e31b4a2a93b1 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/dataset.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/dataset.py @@ -19,6 +19,7 @@ from google.cloud._helpers import _datetime_from_microseconds from google.cloud.exceptions import NotFound from google.cloud.bigquery.table import Table +from google.cloud.bigquery.table import TableReference class AccessEntry(object): @@ -126,6 +127,14 @@ def dataset_id(self): """ return self._dataset_id + def table(self, table_id): + """Constructs a TableReference. + + :rtype: :class:`google.cloud.bigquery.table.TableReference` + :returns: a TableReference for a table in this dataset. + """ + return TableReference(self, table_id) + class Dataset(object): """Datasets are containers for tables. diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py index 2121730d3f79..0d651aa6b5f1 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py @@ -48,6 +48,42 @@ _DEFAULT_NUM_RETRIES = 6 +class TableReference(object): + """TableReferences are pointers to tables. + + See + https://cloud.google.com/bigquery/docs/reference/rest/v2/tables + + :type dataset_ref: :class:`google.cloud.bigquery.dataset.DatasetReference` + :param dataset_ref: a pointer to the dataset + + :type table_id: str + :param table_id: the ID of the table + """ + + def __init__(self, dataset_ref, table_id): + self._dataset_ref = dataset_ref + self._table_id = table_id + + @property + def dataset_ref(self): + """Pointer to the dataset. + + :rtype: :class:`google.cloud.bigquery.dataset.DatasetReference` + :returns: a pointer to the dataset. + """ + return self._dataset_ref + + @property + def table_id(self): + """Table ID. + + :rtype: str + :returns: the table ID. + """ + return self._table_id + + class Table(object): """Tables represent a set of rows whose values correspond to a schema. diff --git a/packages/google-cloud-bigquery/tests/unit/test_dataset.py b/packages/google-cloud-bigquery/tests/unit/test_dataset.py index 4da2ada5de66..c509be6838a1 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_dataset.py +++ b/packages/google-cloud-bigquery/tests/unit/test_dataset.py @@ -101,6 +101,12 @@ def test_ctor_defaults(self): self.assertEqual(dataset_ref.project_id, 'some-project-1') self.assertEqual(dataset_ref.dataset_id, 'dataset_1') + def test_table(self): + dataset_ref = self._make_one('some-project-1', 'dataset_1') + table_ref = dataset_ref.table('table_1') + self.assertIs(table_ref.dataset_ref, dataset_ref) + self.assertEqual(table_ref.table_id, 'table_1') + class TestDataset(unittest.TestCase): PROJECT = 'project' diff --git a/packages/google-cloud-bigquery/tests/unit/test_table.py b/packages/google-cloud-bigquery/tests/unit/test_table.py index aa9e00670655..2629f824e0b2 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_table.py +++ b/packages/google-cloud-bigquery/tests/unit/test_table.py @@ -37,6 +37,26 @@ def _verifySchema(self, schema, resource): self._verify_field(field, r_field) +class TestTableReference(unittest.TestCase): + + @staticmethod + def _get_target_class(): + from google.cloud.bigquery.table import TableReference + + return TableReference + + def _make_one(self, *args, **kw): + return self._get_target_class()(*args, **kw) + + def test_ctor_defaults(self): + from google.cloud.bigquery.dataset import DatasetReference + dataset_ref = DatasetReference('project_1', 'dataset_1') + + table_ref = self._make_one(dataset_ref, 'table_1') + self.assertIs(table_ref.dataset_ref, dataset_ref) + self.assertEqual(table_ref.table_id, 'table_1') + + class TestTable(unittest.TestCase, _SchemaBase): PROJECT = 'prahj-ekt' From dc2622ae217298aafe3c3770eed44fdb7d885f0d Mon Sep 17 00:00:00 2001 From: Alix Hamilton Date: Wed, 13 Sep 2017 12:30:28 -0700 Subject: [PATCH 0265/2016] BigQuery: Modify client.dataset() to return DatasetReference instead of Dataset. (#3944) * BigQuery: Add TestReference class. Add table function to DatasetReference * BigQuery: Modify client.dataset() to return DatasetReference instead of Dataset. * Bigquery: client.dataset() uses default project if not specified --- .../google/cloud/bigquery/client.py | 12 ++-- .../google/cloud/bigquery/job.py | 8 +-- .../google-cloud-bigquery/tests/system.py | 55 ++++++++++--------- .../tests/unit/test_client.py | 23 ++++++-- .../tests/unit/test_job.py | 5 -- 5 files changed, 58 insertions(+), 45 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py index 2ae577a51708..5ab8ff820764 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py @@ -18,6 +18,7 @@ from google.cloud.client import ClientWithProject from google.cloud.bigquery._http import Connection from google.cloud.bigquery.dataset import Dataset +from google.cloud.bigquery.dataset import DatasetReference from google.cloud.bigquery.job import CopyJob from google.cloud.bigquery.job import ExtractJob from google.cloud.bigquery.job import LoadJob @@ -148,7 +149,7 @@ def list_datasets(self, include_all=False, max_results=None, extra_params=extra_params) def dataset(self, dataset_name, project=None): - """Construct a dataset bound to this client. + """Construct a reference to a dataset. :type dataset_name: str :param dataset_name: Name of the dataset. @@ -157,10 +158,13 @@ def dataset(self, dataset_name, project=None): :param project: (Optional) project ID for the dataset (defaults to the project of the client). - :rtype: :class:`google.cloud.bigquery.dataset.Dataset` - :returns: a new ``Dataset`` instance + :rtype: :class:`google.cloud.bigquery.dataset.DatasetReference` + :returns: a new ``DatasetReference`` instance """ - return Dataset(dataset_name, client=self, project=project) + if project is None: + project = self.project + + return DatasetReference(project, dataset_name) def _get_query_results(self, job_id, project=None, timeout_ms=None): """Get the query results object for a query job. diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/job.py b/packages/google-cloud-bigquery/google/cloud/bigquery/job.py index f060ba1bc2f9..6f5c2c294a0c 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/job.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/job.py @@ -1362,8 +1362,8 @@ def _copy_configuration_properties(self, configuration): dest_local = self._destination_table_resource() if dest_remote != dest_local: project = dest_remote['projectId'] - dataset = self._client.dataset( - dest_remote['datasetId'], project=project) + dataset = Dataset( + dest_remote['datasetId'], self._client, project=project) self.destination = dataset.table(dest_remote['tableId']) def_ds = configuration.get('defaultDataset') @@ -1372,7 +1372,7 @@ def _copy_configuration_properties(self, configuration): del self.default_dataset else: project = def_ds['projectId'] - self.default_dataset = self._client.dataset(def_ds['datasetId']) + self.default_dataset = Dataset(def_ds['datasetId'], self._client) udf_resources = [] for udf_mapping in configuration.get(self._UDF_KEY, ()): @@ -1528,7 +1528,7 @@ def referenced_tables(self): ds_name = table['datasetId'] t_dataset = datasets_by_project_name.get((t_project, ds_name)) if t_dataset is None: - t_dataset = client.dataset(ds_name, project=t_project) + t_dataset = Dataset(ds_name, client, project=t_project) datasets_by_project_name[(t_project, ds_name)] = t_dataset t_name = table['tableId'] diff --git a/packages/google-cloud-bigquery/tests/system.py b/packages/google-cloud-bigquery/tests/system.py index f5188118918e..3f1817706672 100644 --- a/packages/google-cloud-bigquery/tests/system.py +++ b/packages/google-cloud-bigquery/tests/system.py @@ -25,6 +25,7 @@ import six from google.cloud import bigquery +from google.cloud.bigquery.dataset import Dataset from google.cloud._helpers import UTC from google.cloud.bigquery import dbapi from google.cloud.exceptions import Forbidden @@ -111,7 +112,7 @@ def _still_in_use(bad_request): def test_create_dataset(self): DATASET_NAME = _make_dataset_name('create_dataset') - dataset = Config.CLIENT.dataset(DATASET_NAME) + dataset = Dataset(DATASET_NAME, Config.CLIENT) self.assertFalse(dataset.exists()) retry_403(dataset.create)() @@ -122,20 +123,20 @@ def test_create_dataset(self): def test_reload_dataset(self): DATASET_NAME = _make_dataset_name('reload_dataset') - dataset = Config.CLIENT.dataset(DATASET_NAME) + dataset = Dataset(DATASET_NAME, Config.CLIENT) dataset.friendly_name = 'Friendly' dataset.description = 'Description' retry_403(dataset.create)() self.to_delete.append(dataset) - other = Config.CLIENT.dataset(DATASET_NAME) + other = Dataset(DATASET_NAME, Config.CLIENT) other.reload() self.assertEqual(other.friendly_name, 'Friendly') self.assertEqual(other.description, 'Description') def test_patch_dataset(self): - dataset = Config.CLIENT.dataset(_make_dataset_name('patch_dataset')) + dataset = Dataset(_make_dataset_name('patch_dataset'), Config.CLIENT) self.assertFalse(dataset.exists()) retry_403(dataset.create)() @@ -149,7 +150,7 @@ def test_patch_dataset(self): self.assertEqual(dataset.description, 'Description') def test_update_dataset(self): - dataset = Config.CLIENT.dataset(_make_dataset_name('update_dataset')) + dataset = Dataset(_make_dataset_name('update_dataset'), Config.CLIENT) self.assertFalse(dataset.exists()) retry_403(dataset.create)() @@ -175,7 +176,7 @@ def test_list_datasets(self): 'newest' + unique_resource_id(), ] for dataset_name in datasets_to_create: - created_dataset = Config.CLIENT.dataset(dataset_name) + created_dataset = Dataset(dataset_name, Config.CLIENT) retry_403(created_dataset.create)() self.to_delete.append(created_dataset) @@ -189,7 +190,7 @@ def test_list_datasets(self): self.assertEqual(len(created), len(datasets_to_create)) def test_create_table(self): - dataset = Config.CLIENT.dataset(_make_dataset_name('create_table')) + dataset = Dataset(_make_dataset_name('create_table'), Config.CLIENT) self.assertFalse(dataset.exists()) retry_403(dataset.create)() @@ -208,7 +209,7 @@ def test_create_table(self): def test_list_tables(self): DATASET_NAME = _make_dataset_name('list_tables') - dataset = Config.CLIENT.dataset(DATASET_NAME) + dataset = Dataset(DATASET_NAME, Config.CLIENT) self.assertFalse(dataset.exists()) retry_403(dataset.create)() @@ -244,7 +245,7 @@ def test_list_tables(self): self.assertEqual(len(created), len(tables_to_create)) def test_patch_table(self): - dataset = Config.CLIENT.dataset(_make_dataset_name('patch_table')) + dataset = Dataset(_make_dataset_name('patch_table'), Config.CLIENT) self.assertFalse(dataset.exists()) retry_403(dataset.create)() @@ -266,7 +267,7 @@ def test_patch_table(self): self.assertEqual(table.description, 'Description') def test_update_table(self): - dataset = Config.CLIENT.dataset(_make_dataset_name('update_table')) + dataset = Dataset(_make_dataset_name('update_table'), Config.CLIENT) self.assertFalse(dataset.exists()) retry_403(dataset.create)() @@ -309,8 +310,8 @@ def test_insert_data_then_dump_table(self): ('Bhettye Rhubble', 27, None), ] ROW_IDS = range(len(ROWS)) - dataset = Config.CLIENT.dataset( - _make_dataset_name('insert_data_then_dump')) + dataset = Dataset( + _make_dataset_name('insert_data_then_dump'), Config.CLIENT) self.assertFalse(dataset.exists()) retry_403(dataset.create)() @@ -353,8 +354,8 @@ def test_load_table_from_local_file_then_dump_table(self): ] TABLE_NAME = 'test_table' - dataset = Config.CLIENT.dataset( - _make_dataset_name('load_local_then_dump')) + dataset = Dataset( + _make_dataset_name('load_local_then_dump'), Config.CLIENT) retry_403(dataset.create)() self.to_delete.append(dataset) @@ -402,8 +403,8 @@ def test_load_table_from_local_avro_file_then_dump_table(self): ("orange", 590), ("red", 650)] - dataset = Config.CLIENT.dataset( - _make_dataset_name('load_local_then_dump')) + dataset = Dataset( + _make_dataset_name('load_local_then_dump'), Config.CLIENT) retry_403(dataset.create)() self.to_delete.append(dataset) @@ -466,8 +467,8 @@ def test_load_table_from_storage_then_dump_table(self): self.to_delete.insert(0, blob) - dataset = Config.CLIENT.dataset( - _make_dataset_name('load_gcs_then_dump')) + dataset = Dataset( + _make_dataset_name('load_gcs_then_dump'), Config.CLIENT) retry_403(dataset.create)() self.to_delete.append(dataset) @@ -536,8 +537,8 @@ def test_load_table_from_storage_w_autodetect_schema(self): self.to_delete.insert(0, blob) - dataset = Config.CLIENT.dataset( - _make_dataset_name('load_gcs_then_dump')) + dataset = Dataset( + _make_dataset_name('load_gcs_then_dump'), Config.CLIENT) retry_403(dataset.create)() self.to_delete.append(dataset) @@ -574,7 +575,7 @@ def test_job_cancel(self): TABLE_NAME = 'test_table' QUERY = 'SELECT * FROM %s.%s' % (DATASET_NAME, TABLE_NAME) - dataset = Config.CLIENT.dataset(DATASET_NAME) + dataset = Dataset(DATASET_NAME, Config.CLIENT) retry_403(dataset.create)() self.to_delete.append(dataset) @@ -765,7 +766,7 @@ def test_dbapi_fetchall(self): def _load_table_for_dml(self, rows, dataset_name, table_name): from google.cloud._testing import _NamedTemporaryFile - dataset = Config.CLIENT.dataset(dataset_name) + dataset = Dataset(dataset_name, Config.CLIENT) retry_403(dataset.create)() self.to_delete.append(dataset) @@ -1099,7 +1100,7 @@ def test_dump_table_w_public_data(self): DATASET_NAME = 'samples' TABLE_NAME = 'natality' - dataset = Config.CLIENT.dataset(DATASET_NAME, project=PUBLIC) + dataset = Dataset(DATASET_NAME, Config.CLIENT, project=PUBLIC) table = dataset.table(TABLE_NAME) # Reload table to get the schema before fetching the rows. table.reload() @@ -1152,8 +1153,8 @@ def test_insert_nested_nested(self): ('Some value', record) ] table_name = 'test_table' - dataset = Config.CLIENT.dataset( - _make_dataset_name('issue_2951')) + dataset = Dataset( + _make_dataset_name('issue_2951'), Config.CLIENT) retry_403(dataset.create)() self.to_delete.append(dataset) @@ -1172,8 +1173,8 @@ def test_insert_nested_nested(self): def test_create_table_insert_fetch_nested_schema(self): table_name = 'test_table' - dataset = Config.CLIENT.dataset( - _make_dataset_name('create_table_nested_schema')) + dataset = Dataset( + _make_dataset_name('create_table_nested_schema'), Config.CLIENT) self.assertFalse(dataset.exists()) retry_403(dataset.create)() diff --git a/packages/google-cloud-bigquery/tests/unit/test_client.py b/packages/google-cloud-bigquery/tests/unit/test_client.py index 3cd4a24ceb43..70e1f1eea7c7 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_client.py +++ b/packages/google-cloud-bigquery/tests/unit/test_client.py @@ -246,8 +246,21 @@ def test_list_datasets_explicit_response_missing_datasets_key(self): self.assertEqual(req['query_params'], {'all': True, 'maxResults': 3, 'pageToken': TOKEN}) - def test_dataset(self): - from google.cloud.bigquery.dataset import Dataset + def test_dataset_with_specified_project(self): + from google.cloud.bigquery.dataset import DatasetReference + + PROJECT = 'PROJECT' + DATASET = 'dataset_name' + creds = _make_credentials() + http = object() + client = self._make_one(project=PROJECT, credentials=creds, _http=http) + dataset = client.dataset(DATASET, PROJECT) + self.assertIsInstance(dataset, DatasetReference) + self.assertEqual(dataset.dataset_id, DATASET) + self.assertEqual(dataset.project_id, PROJECT) + + def test_dataset_with_default_project(self): + from google.cloud.bigquery.dataset import DatasetReference PROJECT = 'PROJECT' DATASET = 'dataset_name' @@ -255,9 +268,9 @@ def test_dataset(self): http = object() client = self._make_one(project=PROJECT, credentials=creds, _http=http) dataset = client.dataset(DATASET) - self.assertIsInstance(dataset, Dataset) - self.assertEqual(dataset.name, DATASET) - self.assertIs(dataset._client, client) + self.assertIsInstance(dataset, DatasetReference) + self.assertEqual(dataset.dataset_id, DATASET) + self.assertEqual(dataset.project_id, PROJECT) def test_job_from_resource_unknown_type(self): PROJECT = 'PROJECT' diff --git a/packages/google-cloud-bigquery/tests/unit/test_job.py b/packages/google-cloud-bigquery/tests/unit/test_job.py index 23fb95eea123..a4b96470c2e7 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_job.py +++ b/packages/google-cloud-bigquery/tests/unit/test_job.py @@ -2697,11 +2697,6 @@ def __init__(self, project='project', connection=None): self.project = project self._connection = connection - def dataset(self, name, project=None): - from google.cloud.bigquery.dataset import Dataset - - return Dataset(name, client=self, project=project) - def _get_query_results(self, job_id): from google.cloud.bigquery.query import QueryResults From dff9972d16b3f0b81e9c75b7220368d1c4b0d575 Mon Sep 17 00:00:00 2001 From: Jonathan Amsterdam Date: Wed, 13 Sep 2017 18:00:53 -0400 Subject: [PATCH 0266/2016] bigquery: rename TableReference.dataset_ref (#3953) * bigquery: rename TableReference.dataset_ref Rename to dataset to be consistent with Client.dataset. Both methods actually return a DatasetReference. * fix broken tests --- packages/google-cloud-bigquery/google/cloud/bigquery/table.py | 2 +- packages/google-cloud-bigquery/tests/unit/test_dataset.py | 2 +- packages/google-cloud-bigquery/tests/unit/test_table.py | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py index 0d651aa6b5f1..69d99ab4450f 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py @@ -66,7 +66,7 @@ def __init__(self, dataset_ref, table_id): self._table_id = table_id @property - def dataset_ref(self): + def dataset(self): """Pointer to the dataset. :rtype: :class:`google.cloud.bigquery.dataset.DatasetReference` diff --git a/packages/google-cloud-bigquery/tests/unit/test_dataset.py b/packages/google-cloud-bigquery/tests/unit/test_dataset.py index c509be6838a1..09fdbbe034ce 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_dataset.py +++ b/packages/google-cloud-bigquery/tests/unit/test_dataset.py @@ -104,7 +104,7 @@ def test_ctor_defaults(self): def test_table(self): dataset_ref = self._make_one('some-project-1', 'dataset_1') table_ref = dataset_ref.table('table_1') - self.assertIs(table_ref.dataset_ref, dataset_ref) + self.assertIs(table_ref.dataset, dataset_ref) self.assertEqual(table_ref.table_id, 'table_1') diff --git a/packages/google-cloud-bigquery/tests/unit/test_table.py b/packages/google-cloud-bigquery/tests/unit/test_table.py index 2629f824e0b2..9e0db94bc6cb 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_table.py +++ b/packages/google-cloud-bigquery/tests/unit/test_table.py @@ -53,7 +53,7 @@ def test_ctor_defaults(self): dataset_ref = DatasetReference('project_1', 'dataset_1') table_ref = self._make_one(dataset_ref, 'table_1') - self.assertIs(table_ref.dataset_ref, dataset_ref) + self.assertIs(table_ref.dataset, dataset_ref) self.assertEqual(table_ref.table_id, 'table_1') From 2b8fb345ac25ced96bd3e08f088151b5fecc8518 Mon Sep 17 00:00:00 2001 From: Jonathan Amsterdam Date: Thu, 14 Sep 2017 14:43:08 -0400 Subject: [PATCH 0267/2016] bigquery: rename name field of Dataset to dataset_id (#3955) * bigquery: rename name field of Dataset to dataset_id Rename the former dataset_id property to full_dataset_id. Also rename Table.dataset_name to Table.dataset_id. Perform other renamings (of various variables and constants). These names match usage better. The API's Dataset.id field is "project:dataset_id", which is confusing and basically useless, so it's a mistake to call that dataset_id. * fix long line * fix long line --- .../google/cloud/bigquery/dataset.py | 25 +-- .../google/cloud/bigquery/job.py | 12 +- .../google/cloud/bigquery/query.py | 2 +- .../google/cloud/bigquery/table.py | 14 +- .../google-cloud-bigquery/tests/system.py | 70 ++++----- .../tests/unit/test_client.py | 2 +- .../tests/unit/test_dataset.py | 144 +++++++++--------- .../tests/unit/test_job.py | 96 ++++++------ .../tests/unit/test_table.py | 105 ++++++------- 9 files changed, 236 insertions(+), 234 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/dataset.py b/packages/google-cloud-bigquery/google/cloud/bigquery/dataset.py index e31b4a2a93b1..cd31f737e693 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/dataset.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/dataset.py @@ -142,8 +142,8 @@ class Dataset(object): See https://cloud.google.com/bigquery/docs/reference/rest/v2/datasets - :type name: str - :param name: the name of the dataset + :type dataset_id: str + :param dataset_id: the ID of the dataset :type client: :class:`google.cloud.bigquery.client.Client` :param client: A client which holds credentials and project configuration @@ -159,8 +159,8 @@ class Dataset(object): _access_entries = None - def __init__(self, name, client, access_entries=(), project=None): - self.name = name + def __init__(self, dataset_id, client, access_entries=(), project=None): + self.dataset_id = dataset_id self._client = client self._properties = {} # Let the @property do validation. @@ -181,9 +181,9 @@ def path(self): """URL path for the dataset's APIs. :rtype: str - :returns: the path based on project and dataste name. + :returns: the path based on project and dataset ID. """ - return '/projects/%s/datasets/%s' % (self.project, self.name) + return '/projects/%s/datasets/%s' % (self.project, self.dataset_id) @property def access_entries(self): @@ -221,8 +221,8 @@ def created(self): return _datetime_from_microseconds(1000.0 * creation_time) @property - def dataset_id(self): - """ID for the dataset resource. + def full_dataset_id(self): + """ID for the dataset resource, in the form "project_id:dataset_id". :rtype: str, or ``NoneType`` :returns: the ID (None until set from the server). @@ -365,8 +365,8 @@ def from_api_repr(cls, resource, client): 'datasetId' not in resource['datasetReference']): raise KeyError('Resource lacks required identity information:' '["datasetReference"]["datasetId"]') - name = resource['datasetReference']['datasetId'] - dataset = cls(name, client=client) + dataset_id = resource['datasetReference']['datasetId'] + dataset = cls(dataset_id, client=client) dataset._set_properties(resource) return dataset @@ -444,7 +444,7 @@ def _build_resource(self): """Generate a resource for ``create`` or ``update``.""" resource = { 'datasetReference': { - 'projectId': self.project, 'datasetId': self.name}, + 'projectId': self.project, 'datasetId': self.dataset_id}, } if self.default_table_expiration_ms is not None: value = self.default_table_expiration_ms @@ -610,7 +610,8 @@ def list_tables(self, max_results=None, page_token=None): :returns: Iterator of :class:`~google.cloud.bigquery.table.Table` contained within the current dataset. """ - path = '/projects/%s/datasets/%s/tables' % (self.project, self.name) + path = '/projects/%s/datasets/%s/tables' % ( + self.project, self.dataset_id) result = page_iterator.HTTPIterator( client=self._client, api_request=self._client._connection.api_request, diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/job.py b/packages/google-cloud-bigquery/google/cloud/bigquery/job.py index 6f5c2c294a0c..f513a98d23cd 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/job.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/job.py @@ -773,7 +773,7 @@ def _build_resource(self): 'sourceUris': self.source_uris, 'destinationTable': { 'projectId': self.destination.project, - 'datasetId': self.destination.dataset_name, + 'datasetId': self.destination.dataset_id, 'tableId': self.destination.name, }, }, @@ -900,7 +900,7 @@ def _build_resource(self): source_refs = [{ 'projectId': table.project, - 'datasetId': table.dataset_name, + 'datasetId': table.dataset_id, 'tableId': table.name, } for table in self.sources] @@ -914,7 +914,7 @@ def _build_resource(self): 'sourceTables': source_refs, 'destinationTable': { 'projectId': self.destination.project, - 'datasetId': self.destination.dataset_name, + 'datasetId': self.destination.dataset_id, 'tableId': self.destination.name, }, }, @@ -1058,7 +1058,7 @@ def _build_resource(self): source_ref = { 'projectId': self.source.project, - 'datasetId': self.source.dataset_name, + 'datasetId': self.source.dataset_id, 'tableId': self.source.name, } @@ -1247,7 +1247,7 @@ def _destination_table_resource(self): if self.destination is not None: return { 'projectId': self.destination.project, - 'datasetId': self.destination.dataset_name, + 'datasetId': self.destination.dataset_id, 'tableId': self.destination.name, } @@ -1271,7 +1271,7 @@ def _populate_config_resource(self, configuration): if self.default_dataset is not None: configuration['defaultDataset'] = { 'projectId': self.default_dataset.project, - 'datasetId': self.default_dataset.name, + 'datasetId': self.default_dataset.dataset_id, } if self.destination is not None: table_res = self._destination_table_resource() diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/query.py b/packages/google-cloud-bigquery/google/cloud/bigquery/query.py index 185b68deb104..fa03d373674d 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/query.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/query.py @@ -329,7 +329,7 @@ def _build_resource(self): if self.default_dataset is not None: resource['defaultDataset'] = { 'projectId': self.project, - 'datasetId': self.default_dataset.name, + 'datasetId': self.default_dataset.dataset_id, } if self.max_results is not None: diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py index 69d99ab4450f..e06e79271d0a 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py @@ -119,13 +119,13 @@ def project(self): return self._dataset.project @property - def dataset_name(self): - """Name of dataset containing the table. + def dataset_id(self): + """ID of dataset containing the table. :rtype: str :returns: the ID (derived from the dataset). """ - return self._dataset.name + return self._dataset.dataset_id @property def path(self): @@ -463,7 +463,7 @@ def list_partitions(self, client=None): """ query = self._require_client(client).run_sync_query( 'SELECT partition_id from [%s.%s$__PARTITIONS_SUMMARY__]' % - (self.dataset_name, self.name)) + (self.dataset_id, self.name)) query.run() return [row[0] for row in query.rows] @@ -527,7 +527,7 @@ def _build_resource(self): resource = { 'tableReference': { 'projectId': self._dataset.project, - 'datasetId': self._dataset.name, + 'datasetId': self._dataset.dataset_id, 'tableId': self.name}, } if self.description is not None: @@ -572,7 +572,7 @@ def create(self, client=None): """ client = self._require_client(client) path = '/projects/%s/datasets/%s/tables' % ( - self._dataset.project, self._dataset.name) + self._dataset.project, self._dataset.dataset_id) api_response = client._connection.api_request( method='POST', path=path, data=self._build_resource()) self._set_properties(api_response) @@ -1369,7 +1369,7 @@ def _get_upload_metadata(source_format, schema, dataset, name): 'sourceFormat': source_format, 'destinationTable': { 'projectId': dataset.project, - 'datasetId': dataset.name, + 'datasetId': dataset.dataset_id, 'tableId': name, }, } diff --git a/packages/google-cloud-bigquery/tests/system.py b/packages/google-cloud-bigquery/tests/system.py index 3f1817706672..ad93ac2c954e 100644 --- a/packages/google-cloud-bigquery/tests/system.py +++ b/packages/google-cloud-bigquery/tests/system.py @@ -44,7 +44,7 @@ def _has_rows(result): return len(result) > 0 -def _make_dataset_name(prefix): +def _make_dataset_id(prefix): return '%s%s' % (prefix, unique_resource_id()) @@ -111,32 +111,32 @@ def _still_in_use(bad_request): doomed.delete() def test_create_dataset(self): - DATASET_NAME = _make_dataset_name('create_dataset') - dataset = Dataset(DATASET_NAME, Config.CLIENT) + DATASET_ID = _make_dataset_id('create_dataset') + dataset = Dataset(DATASET_ID, Config.CLIENT) self.assertFalse(dataset.exists()) retry_403(dataset.create)() self.to_delete.append(dataset) self.assertTrue(dataset.exists()) - self.assertEqual(dataset.name, DATASET_NAME) + self.assertEqual(dataset.dataset_id, DATASET_ID) def test_reload_dataset(self): - DATASET_NAME = _make_dataset_name('reload_dataset') - dataset = Dataset(DATASET_NAME, Config.CLIENT) + DATASET_ID = _make_dataset_id('reload_dataset') + dataset = Dataset(DATASET_ID, Config.CLIENT) dataset.friendly_name = 'Friendly' dataset.description = 'Description' retry_403(dataset.create)() self.to_delete.append(dataset) - other = Dataset(DATASET_NAME, Config.CLIENT) + other = Dataset(DATASET_ID, Config.CLIENT) other.reload() self.assertEqual(other.friendly_name, 'Friendly') self.assertEqual(other.description, 'Description') def test_patch_dataset(self): - dataset = Dataset(_make_dataset_name('patch_dataset'), Config.CLIENT) + dataset = Dataset(_make_dataset_id('patch_dataset'), Config.CLIENT) self.assertFalse(dataset.exists()) retry_403(dataset.create)() @@ -150,7 +150,7 @@ def test_patch_dataset(self): self.assertEqual(dataset.description, 'Description') def test_update_dataset(self): - dataset = Dataset(_make_dataset_name('update_dataset'), Config.CLIENT) + dataset = Dataset(_make_dataset_id('update_dataset'), Config.CLIENT) self.assertFalse(dataset.exists()) retry_403(dataset.create)() @@ -175,8 +175,8 @@ def test_list_datasets(self): 'newer' + unique_resource_id(), 'newest' + unique_resource_id(), ] - for dataset_name in datasets_to_create: - created_dataset = Dataset(dataset_name, Config.CLIENT) + for dataset_id in datasets_to_create: + created_dataset = Dataset(dataset_id, Config.CLIENT) retry_403(created_dataset.create)() self.to_delete.append(created_dataset) @@ -185,12 +185,12 @@ def test_list_datasets(self): all_datasets = list(iterator) self.assertIsNone(iterator.next_page_token) created = [dataset for dataset in all_datasets - if dataset.name in datasets_to_create and + if dataset.dataset_id in datasets_to_create and dataset.project == Config.CLIENT.project] self.assertEqual(len(created), len(datasets_to_create)) def test_create_table(self): - dataset = Dataset(_make_dataset_name('create_table'), Config.CLIENT) + dataset = Dataset(_make_dataset_id('create_table'), Config.CLIENT) self.assertFalse(dataset.exists()) retry_403(dataset.create)() @@ -208,8 +208,8 @@ def test_create_table(self): self.assertEqual(table.name, TABLE_NAME) def test_list_tables(self): - DATASET_NAME = _make_dataset_name('list_tables') - dataset = Dataset(DATASET_NAME, Config.CLIENT) + DATASET_ID = _make_dataset_id('list_tables') + dataset = Dataset(DATASET_ID, Config.CLIENT) self.assertFalse(dataset.exists()) retry_403(dataset.create)() @@ -241,11 +241,11 @@ def test_list_tables(self): self.assertIsNone(iterator.next_page_token) created = [table for table in all_tables if (table.name in tables_to_create and - table.dataset_name == DATASET_NAME)] + table.dataset_id == DATASET_ID)] self.assertEqual(len(created), len(tables_to_create)) def test_patch_table(self): - dataset = Dataset(_make_dataset_name('patch_table'), Config.CLIENT) + dataset = Dataset(_make_dataset_id('patch_table'), Config.CLIENT) self.assertFalse(dataset.exists()) retry_403(dataset.create)() @@ -267,7 +267,7 @@ def test_patch_table(self): self.assertEqual(table.description, 'Description') def test_update_table(self): - dataset = Dataset(_make_dataset_name('update_table'), Config.CLIENT) + dataset = Dataset(_make_dataset_id('update_table'), Config.CLIENT) self.assertFalse(dataset.exists()) retry_403(dataset.create)() @@ -311,7 +311,7 @@ def test_insert_data_then_dump_table(self): ] ROW_IDS = range(len(ROWS)) dataset = Dataset( - _make_dataset_name('insert_data_then_dump'), Config.CLIENT) + _make_dataset_id('insert_data_then_dump'), Config.CLIENT) self.assertFalse(dataset.exists()) retry_403(dataset.create)() @@ -355,7 +355,7 @@ def test_load_table_from_local_file_then_dump_table(self): TABLE_NAME = 'test_table' dataset = Dataset( - _make_dataset_name('load_local_then_dump'), Config.CLIENT) + _make_dataset_id('load_local_then_dump'), Config.CLIENT) retry_403(dataset.create)() self.to_delete.append(dataset) @@ -404,7 +404,7 @@ def test_load_table_from_local_avro_file_then_dump_table(self): ("red", 650)] dataset = Dataset( - _make_dataset_name('load_local_then_dump'), Config.CLIENT) + _make_dataset_id('load_local_then_dump'), Config.CLIENT) retry_403(dataset.create)() self.to_delete.append(dataset) @@ -468,7 +468,7 @@ def test_load_table_from_storage_then_dump_table(self): self.to_delete.insert(0, blob) dataset = Dataset( - _make_dataset_name('load_gcs_then_dump'), Config.CLIENT) + _make_dataset_id('load_gcs_then_dump'), Config.CLIENT) retry_403(dataset.create)() self.to_delete.append(dataset) @@ -538,7 +538,7 @@ def test_load_table_from_storage_w_autodetect_schema(self): self.to_delete.insert(0, blob) dataset = Dataset( - _make_dataset_name('load_gcs_then_dump'), Config.CLIENT) + _make_dataset_id('load_gcs_then_dump'), Config.CLIENT) retry_403(dataset.create)() self.to_delete.append(dataset) @@ -570,12 +570,12 @@ def test_load_table_from_storage_w_autodetect_schema(self): sorted(actual_rows, key=by_age), sorted(rows, key=by_age)) def test_job_cancel(self): - DATASET_NAME = _make_dataset_name('job_cancel') - JOB_NAME = 'fetch_' + DATASET_NAME + DATASET_ID = _make_dataset_id('job_cancel') + JOB_NAME = 'fetch_' + DATASET_ID TABLE_NAME = 'test_table' - QUERY = 'SELECT * FROM %s.%s' % (DATASET_NAME, TABLE_NAME) + QUERY = 'SELECT * FROM %s.%s' % (DATASET_ID, TABLE_NAME) - dataset = Dataset(DATASET_NAME, Config.CLIENT) + dataset = Dataset(DATASET_ID, Config.CLIENT) retry_403(dataset.create)() self.to_delete.append(dataset) @@ -796,7 +796,7 @@ def _load_table_for_dml(self, rows, dataset_name, table_name): self._fetch_single_page(table) def test_sync_query_w_dml(self): - dataset_name = _make_dataset_name('dml_tests') + dataset_name = _make_dataset_id('dml_tests') table_name = 'test_table' self._load_table_for_dml([('Hello World',)], dataset_name, table_name) query_template = """UPDATE {}.{} @@ -812,7 +812,7 @@ def test_sync_query_w_dml(self): self.assertEqual(query.num_dml_affected_rows, 1) def test_dbapi_w_dml(self): - dataset_name = _make_dataset_name('dml_tests') + dataset_name = _make_dataset_id('dml_tests') table_name = 'test_table' self._load_table_for_dml([('Hello World',)], dataset_name, table_name) query_template = """UPDATE {}.{} @@ -1097,10 +1097,10 @@ def test_dbapi_w_query_parameters(self): def test_dump_table_w_public_data(self): PUBLIC = 'bigquery-public-data' - DATASET_NAME = 'samples' + DATASET_ID = 'samples' TABLE_NAME = 'natality' - dataset = Dataset(DATASET_NAME, Config.CLIENT, project=PUBLIC) + dataset = Dataset(DATASET_ID, Config.CLIENT, project=PUBLIC) table = dataset.table(TABLE_NAME) # Reload table to get the schema before fetching the rows. table.reload() @@ -1108,11 +1108,11 @@ def test_dump_table_w_public_data(self): def test_large_query_w_public_data(self): PUBLIC = 'bigquery-public-data' - DATASET_NAME = 'samples' + DATASET_ID = 'samples' TABLE_NAME = 'natality' LIMIT = 1000 SQL = 'SELECT * from `{}.{}.{}` LIMIT {}'.format( - PUBLIC, DATASET_NAME, TABLE_NAME, LIMIT) + PUBLIC, DATASET_ID, TABLE_NAME, LIMIT) query = Config.CLIENT.run_sync_query(SQL) query.use_legacy_sql = False @@ -1154,7 +1154,7 @@ def test_insert_nested_nested(self): ] table_name = 'test_table' dataset = Dataset( - _make_dataset_name('issue_2951'), Config.CLIENT) + _make_dataset_id('issue_2951'), Config.CLIENT) retry_403(dataset.create)() self.to_delete.append(dataset) @@ -1174,7 +1174,7 @@ def test_create_table_insert_fetch_nested_schema(self): table_name = 'test_table' dataset = Dataset( - _make_dataset_name('create_table_nested_schema'), Config.CLIENT) + _make_dataset_id('create_table_nested_schema'), Config.CLIENT) self.assertFalse(dataset.exists()) retry_403(dataset.create)() diff --git a/packages/google-cloud-bigquery/tests/unit/test_client.py b/packages/google-cloud-bigquery/tests/unit/test_client.py index 70e1f1eea7c7..fffffb9b2b25 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_client.py +++ b/packages/google-cloud-bigquery/tests/unit/test_client.py @@ -210,7 +210,7 @@ def test_list_datasets_defaults(self): self.assertEqual(len(datasets), len(DATA['datasets'])) for found, expected in zip(datasets, DATA['datasets']): self.assertIsInstance(found, Dataset) - self.assertEqual(found.dataset_id, expected['id']) + self.assertEqual(found.full_dataset_id, expected['id']) self.assertEqual(found.friendly_name, expected['friendlyName']) self.assertEqual(token, TOKEN) diff --git a/packages/google-cloud-bigquery/tests/unit/test_dataset.py b/packages/google-cloud-bigquery/tests/unit/test_dataset.py index 09fdbbe034ce..e1db93a973e4 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_dataset.py +++ b/packages/google-cloud-bigquery/tests/unit/test_dataset.py @@ -110,7 +110,7 @@ def test_table(self): class TestDataset(unittest.TestCase): PROJECT = 'project' - DS_NAME = 'dataset-name' + DS_ID = 'dataset-id' @staticmethod def _get_target_class(): @@ -129,7 +129,7 @@ def _setUpConstants(self): self.WHEN = datetime.datetime.utcfromtimestamp(self.WHEN_TS).replace( tzinfo=UTC) self.ETAG = 'ETAG' - self.DS_ID = '%s:%s' % (self.PROJECT, self.DS_NAME) + self.DS_FULL_ID = '%s:%s' % (self.PROJECT, self.DS_ID) self.RESOURCE_URL = 'http://example.com/path/to/resource' def _makeResource(self): @@ -139,9 +139,9 @@ def _makeResource(self): return { 'creationTime': self.WHEN_TS * 1000, 'datasetReference': - {'projectId': self.PROJECT, 'datasetId': self.DS_NAME}, + {'projectId': self.PROJECT, 'datasetId': self.DS_ID}, 'etag': self.ETAG, - 'id': self.DS_ID, + 'id': self.DS_FULL_ID, 'lastModifiedTime': self.WHEN_TS * 1000, 'location': 'US', 'selfLink': self.RESOURCE_URL, @@ -209,17 +209,17 @@ def _verify_resource_properties(self, dataset, resource): def test_ctor_defaults(self): client = _Client(self.PROJECT) - dataset = self._make_one(self.DS_NAME, client) - self.assertEqual(dataset.name, self.DS_NAME) + dataset = self._make_one(self.DS_ID, client) + self.assertEqual(dataset.dataset_id, self.DS_ID) self.assertIs(dataset._client, client) self.assertEqual(dataset.project, client.project) self.assertEqual( dataset.path, - '/projects/%s/datasets/%s' % (self.PROJECT, self.DS_NAME)) + '/projects/%s/datasets/%s' % (self.PROJECT, self.DS_ID)) self.assertEqual(dataset.access_entries, []) self.assertIsNone(dataset.created) - self.assertIsNone(dataset.dataset_id) + self.assertIsNone(dataset.full_dataset_id) self.assertIsNone(dataset.etag) self.assertIsNone(dataset.modified) self.assertIsNone(dataset.self_link) @@ -237,19 +237,19 @@ def test_ctor_explicit(self): entries = [phred, bharney] OTHER_PROJECT = 'foo-bar-123' client = _Client(self.PROJECT) - dataset = self._make_one(self.DS_NAME, client, + dataset = self._make_one(self.DS_ID, client, access_entries=entries, project=OTHER_PROJECT) - self.assertEqual(dataset.name, self.DS_NAME) + self.assertEqual(dataset.dataset_id, self.DS_ID) self.assertIs(dataset._client, client) self.assertEqual(dataset.project, OTHER_PROJECT) self.assertEqual( dataset.path, - '/projects/%s/datasets/%s' % (OTHER_PROJECT, self.DS_NAME)) + '/projects/%s/datasets/%s' % (OTHER_PROJECT, self.DS_ID)) self.assertEqual(dataset.access_entries, entries) self.assertIsNone(dataset.created) - self.assertIsNone(dataset.dataset_id) + self.assertIsNone(dataset.full_dataset_id) self.assertIsNone(dataset.etag) self.assertIsNone(dataset.modified) self.assertIsNone(dataset.self_link) @@ -261,7 +261,7 @@ def test_ctor_explicit(self): def test_access_entries_setter_non_list(self): client = _Client(self.PROJECT) - dataset = self._make_one(self.DS_NAME, client) + dataset = self._make_one(self.DS_ID, client) with self.assertRaises(TypeError): dataset.access_entries = object() @@ -269,7 +269,7 @@ def test_access_entries_setter_invalid_field(self): from google.cloud.bigquery.dataset import AccessEntry client = _Client(self.PROJECT) - dataset = self._make_one(self.DS_NAME, client) + dataset = self._make_one(self.DS_ID, client) phred = AccessEntry('OWNER', 'userByEmail', 'phred@example.com') with self.assertRaises(ValueError): dataset.access_entries = [phred, object()] @@ -278,7 +278,7 @@ def test_access_entries_setter(self): from google.cloud.bigquery.dataset import AccessEntry client = _Client(self.PROJECT) - dataset = self._make_one(self.DS_NAME, client) + dataset = self._make_one(self.DS_ID, client) phred = AccessEntry('OWNER', 'userByEmail', 'phred@example.com') bharney = AccessEntry('OWNER', 'userByEmail', 'bharney@example.com') dataset.access_entries = [phred, bharney] @@ -286,49 +286,49 @@ def test_access_entries_setter(self): def test_default_table_expiration_ms_setter_bad_value(self): client = _Client(self.PROJECT) - dataset = self._make_one(self.DS_NAME, client) + dataset = self._make_one(self.DS_ID, client) with self.assertRaises(ValueError): dataset.default_table_expiration_ms = 'bogus' def test_default_table_expiration_ms_setter(self): client = _Client(self.PROJECT) - dataset = self._make_one(self.DS_NAME, client) + dataset = self._make_one(self.DS_ID, client) dataset.default_table_expiration_ms = 12345 self.assertEqual(dataset.default_table_expiration_ms, 12345) def test_description_setter_bad_value(self): client = _Client(self.PROJECT) - dataset = self._make_one(self.DS_NAME, client) + dataset = self._make_one(self.DS_ID, client) with self.assertRaises(ValueError): dataset.description = 12345 def test_description_setter(self): client = _Client(self.PROJECT) - dataset = self._make_one(self.DS_NAME, client) + dataset = self._make_one(self.DS_ID, client) dataset.description = 'DESCRIPTION' self.assertEqual(dataset.description, 'DESCRIPTION') def test_friendly_name_setter_bad_value(self): client = _Client(self.PROJECT) - dataset = self._make_one(self.DS_NAME, client) + dataset = self._make_one(self.DS_ID, client) with self.assertRaises(ValueError): dataset.friendly_name = 12345 def test_friendly_name_setter(self): client = _Client(self.PROJECT) - dataset = self._make_one(self.DS_NAME, client) + dataset = self._make_one(self.DS_ID, client) dataset.friendly_name = 'FRIENDLY' self.assertEqual(dataset.friendly_name, 'FRIENDLY') def test_location_setter_bad_value(self): client = _Client(self.PROJECT) - dataset = self._make_one(self.DS_NAME, client) + dataset = self._make_one(self.DS_ID, client) with self.assertRaises(ValueError): dataset.location = 12345 def test_location_setter(self): client = _Client(self.PROJECT) - dataset = self._make_one(self.DS_NAME, client) + dataset = self._make_one(self.DS_ID, client) dataset.location = 'LOCATION' self.assertEqual(dataset.location, 'LOCATION') @@ -344,10 +344,10 @@ def test_from_api_repr_bare(self): self._setUpConstants() client = _Client(self.PROJECT) RESOURCE = { - 'id': '%s:%s' % (self.PROJECT, self.DS_NAME), + 'id': '%s:%s' % (self.PROJECT, self.DS_ID), 'datasetReference': { 'projectId': self.PROJECT, - 'datasetId': self.DS_NAME, + 'datasetId': self.DS_ID, } } klass = self._get_target_class() @@ -368,7 +368,7 @@ def test__parse_access_entries_w_unknown_entity_type(self): {'role': 'READER', 'unknown': 'UNKNOWN'}, ] client = _Client(self.PROJECT) - dataset = self._make_one(self.DS_NAME, client=client) + dataset = self._make_one(self.DS_ID, client=client) with self.assertRaises(ValueError): dataset._parse_access_entries(ACCESS) @@ -382,7 +382,7 @@ def test__parse_access_entries_w_extra_keys(self): }, ] client = _Client(self.PROJECT) - dataset = self._make_one(self.DS_NAME, client=client) + dataset = self._make_one(self.DS_ID, client=client) with self.assertRaises(ValueError): dataset._parse_access_entries(ACCESS) @@ -391,7 +391,7 @@ def test_create_w_bound_client(self): RESOURCE = self._makeResource() conn = _Connection(RESOURCE) client = _Client(project=self.PROJECT, connection=conn) - dataset = self._make_one(self.DS_NAME, client=client) + dataset = self._make_one(self.DS_ID, client=client) dataset.create() @@ -401,7 +401,7 @@ def test_create_w_bound_client(self): self.assertEqual(req['path'], '/%s' % PATH) SENT = { 'datasetReference': - {'projectId': self.PROJECT, 'datasetId': self.DS_NAME}, + {'projectId': self.PROJECT, 'datasetId': self.DS_ID}, } self.assertEqual(req['data'], SENT) self._verify_resource_properties(dataset, RESOURCE) @@ -421,7 +421,7 @@ def test_create_w_alternate_client(self): CLIENT1 = _Client(project=self.PROJECT, connection=conn1) conn2 = _Connection(RESOURCE) CLIENT2 = _Client(project=self.PROJECT, connection=conn2) - dataset = self._make_one(self.DS_NAME, client=CLIENT1) + dataset = self._make_one(self.DS_ID, client=CLIENT1) dataset.friendly_name = TITLE dataset.description = DESCRIPTION VIEW = { @@ -448,7 +448,7 @@ def test_create_w_alternate_client(self): SENT = { 'datasetReference': { 'projectId': self.PROJECT, - 'datasetId': self.DS_NAME, + 'datasetId': self.DS_ID, }, 'description': DESCRIPTION, 'friendlyName': TITLE, @@ -474,7 +474,7 @@ def test_create_w_missing_output_properties(self): self.WHEN = None conn = _Connection(RESOURCE) client = _Client(project=self.PROJECT, connection=conn) - dataset = self._make_one(self.DS_NAME, client=client) + dataset = self._make_one(self.DS_ID, client=client) dataset.create() @@ -484,16 +484,16 @@ def test_create_w_missing_output_properties(self): self.assertEqual(req['path'], '/%s' % PATH) SENT = { 'datasetReference': - {'projectId': self.PROJECT, 'datasetId': self.DS_NAME}, + {'projectId': self.PROJECT, 'datasetId': self.DS_ID}, } self.assertEqual(req['data'], SENT) self._verify_resource_properties(dataset, RESOURCE) def test_exists_miss_w_bound_client(self): - PATH = 'projects/%s/datasets/%s' % (self.PROJECT, self.DS_NAME) + PATH = 'projects/%s/datasets/%s' % (self.PROJECT, self.DS_ID) conn = _Connection() client = _Client(project=self.PROJECT, connection=conn) - dataset = self._make_one(self.DS_NAME, client=client) + dataset = self._make_one(self.DS_ID, client=client) self.assertFalse(dataset.exists()) @@ -504,12 +504,12 @@ def test_exists_miss_w_bound_client(self): self.assertEqual(req['query_params'], {'fields': 'id'}) def test_exists_hit_w_alternate_client(self): - PATH = 'projects/%s/datasets/%s' % (self.PROJECT, self.DS_NAME) + PATH = 'projects/%s/datasets/%s' % (self.PROJECT, self.DS_ID) conn1 = _Connection() CLIENT1 = _Client(project=self.PROJECT, connection=conn1) conn2 = _Connection({}) CLIENT2 = _Client(project=self.PROJECT, connection=conn2) - dataset = self._make_one(self.DS_NAME, client=CLIENT1) + dataset = self._make_one(self.DS_ID, client=CLIENT1) self.assertTrue(dataset.exists(client=CLIENT2)) @@ -521,11 +521,11 @@ def test_exists_hit_w_alternate_client(self): self.assertEqual(req['query_params'], {'fields': 'id'}) def test_reload_w_bound_client(self): - PATH = 'projects/%s/datasets/%s' % (self.PROJECT, self.DS_NAME) + PATH = 'projects/%s/datasets/%s' % (self.PROJECT, self.DS_ID) RESOURCE = self._makeResource() conn = _Connection(RESOURCE) client = _Client(project=self.PROJECT, connection=conn) - dataset = self._make_one(self.DS_NAME, client=client) + dataset = self._make_one(self.DS_ID, client=client) dataset.reload() @@ -536,13 +536,13 @@ def test_reload_w_bound_client(self): self._verify_resource_properties(dataset, RESOURCE) def test_reload_w_alternate_client(self): - PATH = 'projects/%s/datasets/%s' % (self.PROJECT, self.DS_NAME) + PATH = 'projects/%s/datasets/%s' % (self.PROJECT, self.DS_ID) RESOURCE = self._makeResource() conn1 = _Connection() CLIENT1 = _Client(project=self.PROJECT, connection=conn1) conn2 = _Connection(RESOURCE) CLIENT2 = _Client(project=self.PROJECT, connection=conn2) - dataset = self._make_one(self.DS_NAME, client=CLIENT1) + dataset = self._make_one(self.DS_ID, client=CLIENT1) dataset.reload(client=CLIENT2) @@ -557,13 +557,13 @@ def test_patch_w_invalid_expiration(self): RESOURCE = self._makeResource() conn = _Connection(RESOURCE) client = _Client(project=self.PROJECT, connection=conn) - dataset = self._make_one(self.DS_NAME, client=client) + dataset = self._make_one(self.DS_ID, client=client) with self.assertRaises(ValueError): dataset.patch(default_table_expiration_ms='BOGUS') def test_patch_w_bound_client(self): - PATH = 'projects/%s/datasets/%s' % (self.PROJECT, self.DS_NAME) + PATH = 'projects/%s/datasets/%s' % (self.PROJECT, self.DS_ID) DESCRIPTION = 'DESCRIPTION' TITLE = 'TITLE' RESOURCE = self._makeResource() @@ -571,7 +571,7 @@ def test_patch_w_bound_client(self): RESOURCE['friendlyName'] = TITLE conn = _Connection(RESOURCE) client = _Client(project=self.PROJECT, connection=conn) - dataset = self._make_one(self.DS_NAME, client=client) + dataset = self._make_one(self.DS_ID, client=client) dataset.patch(description=DESCRIPTION, friendly_name=TITLE) @@ -587,7 +587,7 @@ def test_patch_w_bound_client(self): self._verify_resource_properties(dataset, RESOURCE) def test_patch_w_alternate_client(self): - PATH = 'projects/%s/datasets/%s' % (self.PROJECT, self.DS_NAME) + PATH = 'projects/%s/datasets/%s' % (self.PROJECT, self.DS_ID) DEF_TABLE_EXP = 12345 LOCATION = 'EU' RESOURCE = self._makeResource() @@ -597,7 +597,7 @@ def test_patch_w_alternate_client(self): CLIENT1 = _Client(project=self.PROJECT, connection=conn1) conn2 = _Connection(RESOURCE) CLIENT2 = _Client(project=self.PROJECT, connection=conn2) - dataset = self._make_one(self.DS_NAME, client=CLIENT1) + dataset = self._make_one(self.DS_ID, client=CLIENT1) dataset.patch(client=CLIENT2, default_table_expiration_ms=DEF_TABLE_EXP, @@ -616,7 +616,7 @@ def test_patch_w_alternate_client(self): self._verify_resource_properties(dataset, RESOURCE) def test_update_w_bound_client(self): - PATH = 'projects/%s/datasets/%s' % (self.PROJECT, self.DS_NAME) + PATH = 'projects/%s/datasets/%s' % (self.PROJECT, self.DS_ID) DESCRIPTION = 'DESCRIPTION' TITLE = 'TITLE' RESOURCE = self._makeResource() @@ -624,7 +624,7 @@ def test_update_w_bound_client(self): RESOURCE['friendlyName'] = TITLE conn = _Connection(RESOURCE) client = _Client(project=self.PROJECT, connection=conn) - dataset = self._make_one(self.DS_NAME, client=client) + dataset = self._make_one(self.DS_ID, client=client) dataset.description = DESCRIPTION dataset.friendly_name = TITLE @@ -635,7 +635,7 @@ def test_update_w_bound_client(self): self.assertEqual(req['method'], 'PUT') SENT = { 'datasetReference': - {'projectId': self.PROJECT, 'datasetId': self.DS_NAME}, + {'projectId': self.PROJECT, 'datasetId': self.DS_ID}, 'description': DESCRIPTION, 'friendlyName': TITLE, } @@ -644,7 +644,7 @@ def test_update_w_bound_client(self): self._verify_resource_properties(dataset, RESOURCE) def test_update_w_alternate_client(self): - PATH = 'projects/%s/datasets/%s' % (self.PROJECT, self.DS_NAME) + PATH = 'projects/%s/datasets/%s' % (self.PROJECT, self.DS_ID) DEF_TABLE_EXP = 12345 LOCATION = 'EU' RESOURCE = self._makeResource() @@ -654,7 +654,7 @@ def test_update_w_alternate_client(self): CLIENT1 = _Client(project=self.PROJECT, connection=conn1) conn2 = _Connection(RESOURCE) CLIENT2 = _Client(project=self.PROJECT, connection=conn2) - dataset = self._make_one(self.DS_NAME, client=CLIENT1) + dataset = self._make_one(self.DS_ID, client=CLIENT1) dataset.default_table_expiration_ms = DEF_TABLE_EXP dataset.location = LOCATION @@ -667,7 +667,7 @@ def test_update_w_alternate_client(self): self.assertEqual(req['path'], '/%s' % PATH) SENT = { 'datasetReference': - {'projectId': self.PROJECT, 'datasetId': self.DS_NAME}, + {'projectId': self.PROJECT, 'datasetId': self.DS_ID}, 'defaultTableExpirationMs': 12345, 'location': 'EU', } @@ -675,10 +675,10 @@ def test_update_w_alternate_client(self): self._verify_resource_properties(dataset, RESOURCE) def test_delete_w_bound_client(self): - PATH = 'projects/%s/datasets/%s' % (self.PROJECT, self.DS_NAME) + PATH = 'projects/%s/datasets/%s' % (self.PROJECT, self.DS_ID) conn = _Connection({}) client = _Client(project=self.PROJECT, connection=conn) - dataset = self._make_one(self.DS_NAME, client=client) + dataset = self._make_one(self.DS_ID, client=client) dataset.delete() @@ -688,12 +688,12 @@ def test_delete_w_bound_client(self): self.assertEqual(req['path'], '/%s' % PATH) def test_delete_w_alternate_client(self): - PATH = 'projects/%s/datasets/%s' % (self.PROJECT, self.DS_NAME) + PATH = 'projects/%s/datasets/%s' % (self.PROJECT, self.DS_ID) conn1 = _Connection() CLIENT1 = _Client(project=self.PROJECT, connection=conn1) conn2 = _Connection({}) CLIENT2 = _Client(project=self.PROJECT, connection=conn2) - dataset = self._make_one(self.DS_NAME, client=CLIENT1) + dataset = self._make_one(self.DS_ID, client=CLIENT1) dataset.delete(client=CLIENT2) @@ -708,7 +708,7 @@ def test_list_tables_empty(self): conn = _Connection({}) client = _Client(project=self.PROJECT, connection=conn) - dataset = self._make_one(self.DS_NAME, client=client) + dataset = self._make_one(self.DS_ID, client=client) iterator = dataset.list_tables() self.assertIs(iterator.dataset, dataset) @@ -721,7 +721,7 @@ def test_list_tables_empty(self): self.assertEqual(len(conn._requested), 1) req = conn._requested[0] self.assertEqual(req['method'], 'GET') - PATH = 'projects/%s/datasets/%s/tables' % (self.PROJECT, self.DS_NAME) + PATH = 'projects/%s/datasets/%s/tables' % (self.PROJECT, self.DS_ID) self.assertEqual(req['path'], '/%s' % PATH) def test_list_tables_defaults(self): @@ -730,21 +730,21 @@ def test_list_tables_defaults(self): TABLE_1 = 'table_one' TABLE_2 = 'table_two' - PATH = 'projects/%s/datasets/%s/tables' % (self.PROJECT, self.DS_NAME) + PATH = 'projects/%s/datasets/%s/tables' % (self.PROJECT, self.DS_ID) TOKEN = 'TOKEN' DATA = { 'nextPageToken': TOKEN, 'tables': [ {'kind': 'bigquery#table', - 'id': '%s:%s.%s' % (self.PROJECT, self.DS_NAME, TABLE_1), + 'id': '%s:%s.%s' % (self.PROJECT, self.DS_ID, TABLE_1), 'tableReference': {'tableId': TABLE_1, - 'datasetId': self.DS_NAME, + 'datasetId': self.DS_ID, 'projectId': self.PROJECT}, 'type': 'TABLE'}, {'kind': 'bigquery#table', - 'id': '%s:%s.%s' % (self.PROJECT, self.DS_NAME, TABLE_2), + 'id': '%s:%s.%s' % (self.PROJECT, self.DS_ID, TABLE_2), 'tableReference': {'tableId': TABLE_2, - 'datasetId': self.DS_NAME, + 'datasetId': self.DS_ID, 'projectId': self.PROJECT}, 'type': 'TABLE'}, ] @@ -752,7 +752,7 @@ def test_list_tables_defaults(self): conn = _Connection(DATA) client = _Client(project=self.PROJECT, connection=conn) - dataset = self._make_one(self.DS_NAME, client=client) + dataset = self._make_one(self.DS_ID, client=client) iterator = dataset.list_tables() self.assertIs(iterator.dataset, dataset) @@ -778,20 +778,20 @@ def test_list_tables_explicit(self): TABLE_1 = 'table_one' TABLE_2 = 'table_two' - PATH = 'projects/%s/datasets/%s/tables' % (self.PROJECT, self.DS_NAME) + PATH = 'projects/%s/datasets/%s/tables' % (self.PROJECT, self.DS_ID) TOKEN = 'TOKEN' DATA = { 'tables': [ {'kind': 'bigquery#dataset', - 'id': '%s:%s.%s' % (self.PROJECT, self.DS_NAME, TABLE_1), + 'id': '%s:%s.%s' % (self.PROJECT, self.DS_ID, TABLE_1), 'tableReference': {'tableId': TABLE_1, - 'datasetId': self.DS_NAME, + 'datasetId': self.DS_ID, 'projectId': self.PROJECT}, 'type': 'TABLE'}, {'kind': 'bigquery#dataset', - 'id': '%s:%s.%s' % (self.PROJECT, self.DS_NAME, TABLE_2), + 'id': '%s:%s.%s' % (self.PROJECT, self.DS_ID, TABLE_2), 'tableReference': {'tableId': TABLE_2, - 'datasetId': self.DS_NAME, + 'datasetId': self.DS_ID, 'projectId': self.PROJECT}, 'type': 'TABLE'}, ] @@ -799,7 +799,7 @@ def test_list_tables_explicit(self): conn = _Connection(DATA) client = _Client(project=self.PROJECT, connection=conn) - dataset = self._make_one(self.DS_NAME, client=client) + dataset = self._make_one(self.DS_ID, client=client) iterator = dataset.list_tables(max_results=3, page_token=TOKEN) self.assertIs(iterator.dataset, dataset) @@ -826,7 +826,7 @@ def test_table_wo_schema(self): conn = _Connection({}) client = _Client(project=self.PROJECT, connection=conn) - dataset = self._make_one(self.DS_NAME, client=client) + dataset = self._make_one(self.DS_ID, client=client) table = dataset.table('table_name') self.assertIsInstance(table, Table) self.assertEqual(table.name, 'table_name') @@ -839,7 +839,7 @@ def test_table_w_schema(self): conn = _Connection({}) client = _Client(project=self.PROJECT, connection=conn) - dataset = self._make_one(self.DS_NAME, client=client) + dataset = self._make_one(self.DS_ID, client=client) full_name = SchemaField('full_name', 'STRING', mode='REQUIRED') age = SchemaField('age', 'INTEGER', mode='REQUIRED') table = dataset.table('table_name', schema=[full_name, age]) diff --git a/packages/google-cloud-bigquery/tests/unit/test_job.py b/packages/google-cloud-bigquery/tests/unit/test_job.py index a4b96470c2e7..09b57d7b7457 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_job.py +++ b/packages/google-cloud-bigquery/tests/unit/test_job.py @@ -82,7 +82,7 @@ def test_missing_reason(self): class _Base(object): PROJECT = 'project' SOURCE1 = 'http://example.com/source1.csv' - DS_NAME = 'datset_name' + DS_ID = 'datset_id' TABLE_NAME = 'table_name' JOB_NAME = 'job_name' @@ -206,7 +206,7 @@ def _makeResource(self, started=False, ended=False): config['sourceUris'] = [self.SOURCE1] config['destinationTable'] = { 'projectId': self.PROJECT, - 'datasetId': self.DS_NAME, + 'datasetId': self.DS_ID, 'tableId': self.TABLE_NAME, } @@ -275,7 +275,7 @@ def _verifyResourceProperties(self, job, resource): table_ref = config['destinationTable'] self.assertEqual(job.destination.project, table_ref['projectId']) - self.assertEqual(job.destination.dataset_name, table_ref['datasetId']) + self.assertEqual(job.destination.dataset_id, table_ref['datasetId']) self.assertEqual(job.destination.name, table_ref['tableId']) if 'fieldDelimiter' in config: @@ -519,7 +519,7 @@ def test_from_api_repr_missing_config(self): self._setUpConstants() client = _Client(self.PROJECT) RESOURCE = { - 'id': '%s:%s' % (self.PROJECT, self.DS_NAME), + 'id': '%s:%s' % (self.PROJECT, self.DS_ID), 'jobReference': { 'projectId': self.PROJECT, 'jobId': self.JOB_NAME, @@ -543,7 +543,7 @@ def test_from_api_repr_bare(self): 'sourceUris': [self.SOURCE1], 'destinationTable': { 'projectId': self.PROJECT, - 'datasetId': self.DS_NAME, + 'datasetId': self.DS_ID, 'tableId': self.TABLE_NAME, }, } @@ -603,7 +603,7 @@ def test_begin_w_bound_client(self): 'sourceUris': [self.SOURCE1], 'destinationTable': { 'projectId': self.PROJECT, - 'datasetId': self.DS_NAME, + 'datasetId': self.DS_ID, 'tableId': self.TABLE_NAME, }, }, @@ -638,7 +638,7 @@ def test_begin_w_autodetect(self): 'sourceUris': [self.SOURCE1], 'destinationTable': { 'projectId': self.PROJECT, - 'datasetId': self.DS_NAME, + 'datasetId': self.DS_ID, 'tableId': self.TABLE_NAME, }, 'autodetect': True @@ -662,7 +662,7 @@ def test_begin_w_alternate_client(self): 'sourceUris': [self.SOURCE1], 'destinationTable': { 'projectId': self.PROJECT, - 'datasetId': self.DS_NAME, + 'datasetId': self.DS_ID, 'tableId': self.TABLE_NAME, }, 'allowJaggedRows': True, @@ -848,12 +848,12 @@ def _makeResource(self, started=False, ended=False): config = resource['configuration']['copy'] config['sourceTables'] = [{ 'projectId': self.PROJECT, - 'datasetId': self.DS_NAME, + 'datasetId': self.DS_ID, 'tableId': self.SOURCE_TABLE, }] config['destinationTable'] = { 'projectId': self.PROJECT, - 'datasetId': self.DS_NAME, + 'datasetId': self.DS_ID, 'tableId': self.DESTINATION_TABLE, } @@ -866,7 +866,7 @@ def _verifyResourceProperties(self, job, resource): table_ref = config['destinationTable'] self.assertEqual(job.destination.project, table_ref['projectId']) - self.assertEqual(job.destination.dataset_name, table_ref['datasetId']) + self.assertEqual(job.destination.dataset_id, table_ref['datasetId']) self.assertEqual(job.destination.name, table_ref['tableId']) sources = config.get('sourceTables') @@ -875,7 +875,7 @@ def _verifyResourceProperties(self, job, resource): self.assertEqual(len(sources), len(job.sources)) for table_ref, table in zip(sources, job.sources): self.assertEqual(table.project, table_ref['projectId']) - self.assertEqual(table.dataset_name, table_ref['datasetId']) + self.assertEqual(table.dataset_id, table_ref['datasetId']) self.assertEqual(table.name, table_ref['tableId']) if 'createDisposition' in config: @@ -921,7 +921,7 @@ def test_from_api_repr_missing_config(self): self._setUpConstants() client = _Client(self.PROJECT) RESOURCE = { - 'id': '%s:%s' % (self.PROJECT, self.DS_NAME), + 'id': '%s:%s' % (self.PROJECT, self.DS_ID), 'jobReference': { 'projectId': self.PROJECT, 'jobId': self.JOB_NAME, @@ -944,12 +944,12 @@ def test_from_api_repr_bare(self): 'copy': { 'sourceTables': [{ 'projectId': self.PROJECT, - 'datasetId': self.DS_NAME, + 'datasetId': self.DS_ID, 'tableId': self.SOURCE_TABLE, }], 'destinationTable': { 'projectId': self.PROJECT, - 'datasetId': self.DS_NAME, + 'datasetId': self.DS_ID, 'tableId': self.DESTINATION_TABLE, }, } @@ -973,12 +973,12 @@ def test_from_api_repr_w_sourcetable(self): 'copy': { 'sourceTable': { 'projectId': self.PROJECT, - 'datasetId': self.DS_NAME, + 'datasetId': self.DS_ID, 'tableId': self.SOURCE_TABLE, }, 'destinationTable': { 'projectId': self.PROJECT, - 'datasetId': self.DS_NAME, + 'datasetId': self.DS_ID, 'tableId': self.DESTINATION_TABLE, }, } @@ -1002,7 +1002,7 @@ def test_from_api_repr_wo_sources(self): 'copy': { 'destinationTable': { 'projectId': self.PROJECT, - 'datasetId': self.DS_NAME, + 'datasetId': self.DS_ID, 'tableId': self.DESTINATION_TABLE, }, } @@ -1051,12 +1051,12 @@ def test_begin_w_bound_client(self): 'copy': { 'sourceTables': [{ 'projectId': self.PROJECT, - 'datasetId': self.DS_NAME, + 'datasetId': self.DS_ID, 'tableId': self.SOURCE_TABLE }], 'destinationTable': { 'projectId': self.PROJECT, - 'datasetId': self.DS_NAME, + 'datasetId': self.DS_ID, 'tableId': self.DESTINATION_TABLE, }, }, @@ -1071,12 +1071,12 @@ def test_begin_w_alternate_client(self): COPY_CONFIGURATION = { 'sourceTables': [{ 'projectId': self.PROJECT, - 'datasetId': self.DS_NAME, + 'datasetId': self.DS_ID, 'tableId': self.SOURCE_TABLE, }], 'destinationTable': { 'projectId': self.PROJECT, - 'datasetId': self.DS_NAME, + 'datasetId': self.DS_ID, 'tableId': self.DESTINATION_TABLE, }, 'createDisposition': 'CREATE_NEVER', @@ -1203,7 +1203,7 @@ def _makeResource(self, started=False, ended=False): config = resource['configuration']['extract'] config['sourceTable'] = { 'projectId': self.PROJECT, - 'datasetId': self.DS_NAME, + 'datasetId': self.DS_ID, 'tableId': self.SOURCE_TABLE, } config['destinationUris'] = [self.DESTINATION_URI] @@ -1218,7 +1218,7 @@ def _verifyResourceProperties(self, job, resource): table_ref = config['sourceTable'] self.assertEqual(job.source.project, table_ref['projectId']) - self.assertEqual(job.source.dataset_name, table_ref['datasetId']) + self.assertEqual(job.source.dataset_id, table_ref['datasetId']) self.assertEqual(job.source.name, table_ref['tableId']) if 'compression' in config: @@ -1295,7 +1295,7 @@ def test_from_api_repr_missing_config(self): self._setUpConstants() client = _Client(self.PROJECT) RESOURCE = { - 'id': '%s:%s' % (self.PROJECT, self.DS_NAME), + 'id': '%s:%s' % (self.PROJECT, self.DS_ID), 'jobReference': { 'projectId': self.PROJECT, 'jobId': self.JOB_NAME, @@ -1318,7 +1318,7 @@ def test_from_api_repr_bare(self): 'extract': { 'sourceTable': { 'projectId': self.PROJECT, - 'datasetId': self.DS_NAME, + 'datasetId': self.DS_ID, 'tableId': self.SOURCE_TABLE, }, 'destinationUris': [self.DESTINATION_URI], @@ -1369,7 +1369,7 @@ def test_begin_w_bound_client(self): 'extract': { 'sourceTable': { 'projectId': self.PROJECT, - 'datasetId': self.DS_NAME, + 'datasetId': self.DS_ID, 'tableId': self.SOURCE_TABLE }, 'destinationUris': [self.DESTINATION_URI], @@ -1385,7 +1385,7 @@ def test_begin_w_alternate_client(self): EXTRACT_CONFIGURATION = { 'sourceTable': { 'projectId': self.PROJECT, - 'datasetId': self.DS_NAME, + 'datasetId': self.DS_ID, 'tableId': self.SOURCE_TABLE, }, 'destinationUris': [self.DESTINATION_URI], @@ -1604,7 +1604,7 @@ def _verifyResourceProperties(self, job, resource): dataset = job.default_dataset ds_ref = { 'projectId': dataset.project, - 'datasetId': dataset.name, + 'datasetId': dataset.dataset_id, } self.assertEqual(ds_ref, query_config['defaultDataset']) else: @@ -1613,7 +1613,7 @@ def _verifyResourceProperties(self, job, resource): table = job.destination tb_ref = { 'projectId': table.project, - 'datasetId': table.dataset_name, + 'datasetId': table.dataset_id, 'tableId': table.name } self.assertEqual(tb_ref, query_config['destinationTable']) @@ -1687,7 +1687,7 @@ def test_from_api_repr_missing_config(self): self._setUpConstants() client = _Client(self.PROJECT) RESOURCE = { - 'id': '%s:%s' % (self.PROJECT, self.DS_NAME), + 'id': '%s:%s' % (self.PROJECT, self.DS_ID), 'jobReference': { 'projectId': self.PROJECT, 'jobId': self.JOB_NAME, @@ -1723,7 +1723,7 @@ def test_from_api_repr_w_properties(self): query_config['writeDisposition'] = 'WRITE_TRUNCATE' query_config['destinationTable'] = { 'projectId': self.PROJECT, - 'datasetId': self.DS_NAME, + 'datasetId': self.DS_ID, 'tableId': self.DESTINATION_TABLE, } klass = self._get_target_class() @@ -1936,21 +1936,21 @@ def test_referenced_tables(self): self.assertIsInstance(local1, Table) self.assertEqual(local1.name, 'local1') self.assertIsInstance(local1._dataset, Dataset) - self.assertEqual(local1.dataset_name, 'dataset') + self.assertEqual(local1.dataset_id, 'dataset') self.assertEqual(local1.project, self.PROJECT) self.assertIs(local1._dataset._client, client) self.assertIsInstance(local2, Table) self.assertEqual(local2.name, 'local2') self.assertIsInstance(local2._dataset, Dataset) - self.assertEqual(local2.dataset_name, 'dataset') + self.assertEqual(local2.dataset_id, 'dataset') self.assertEqual(local2.project, self.PROJECT) self.assertIs(local2._dataset._client, client) self.assertIsInstance(remote, Table) self.assertEqual(remote.name, 'other-table') self.assertIsInstance(remote._dataset, Dataset) - self.assertEqual(remote.dataset_name, 'other-dataset') + self.assertEqual(remote.dataset_id, 'other-dataset') self.assertEqual(remote.project, 'other-project-123') self.assertIs(remote._dataset._client, client) @@ -2128,7 +2128,7 @@ def test_begin_w_bound_client(self): from google.cloud.bigquery.dataset import Dataset PATH = '/projects/%s/jobs' % (self.PROJECT,) - DS_NAME = 'DATASET' + DS_ID = 'DATASET' RESOURCE = self._makeResource() # Ensure None for missing server-set props del RESOURCE['statistics']['creationTime'] @@ -2139,7 +2139,7 @@ def test_begin_w_bound_client(self): client = _Client(project=self.PROJECT, connection=conn) job = self._make_one(self.JOB_NAME, self.QUERY, client) - job.default_dataset = Dataset(DS_NAME, client) + job.default_dataset = Dataset(DS_ID, client) job.begin() @@ -2159,7 +2159,7 @@ def test_begin_w_bound_client(self): 'query': self.QUERY, 'defaultDataset': { 'projectId': self.PROJECT, - 'datasetId': DS_NAME, + 'datasetId': DS_ID, }, }, }, @@ -2173,7 +2173,7 @@ def test_begin_w_alternate_client(self): PATH = '/projects/%s/jobs' % (self.PROJECT,) TABLE = 'TABLE' - DS_NAME = 'DATASET' + DS_ID = 'DATASET' RESOURCE = self._makeResource(ended=True) QUERY_CONFIGURATION = { 'query': self.QUERY, @@ -2181,11 +2181,11 @@ def test_begin_w_alternate_client(self): 'createDisposition': 'CREATE_NEVER', 'defaultDataset': { 'projectId': self.PROJECT, - 'datasetId': DS_NAME, + 'datasetId': DS_ID, }, 'destinationTable': { 'projectId': self.PROJECT, - 'datasetId': DS_NAME, + 'datasetId': DS_ID, 'tableId': TABLE, }, 'flattenResults': True, @@ -2203,7 +2203,7 @@ def test_begin_w_alternate_client(self): client2 = _Client(project=self.PROJECT, connection=conn2) job = self._make_one(self.JOB_NAME, self.QUERY, client1) - dataset = Dataset(DS_NAME, client1) + dataset = Dataset(DS_ID, client1) table = Table(TABLE, dataset) job.allow_large_results = True @@ -2464,14 +2464,14 @@ def test_reload_w_bound_client(self): from google.cloud.bigquery.dataset import Table PATH = '/projects/%s/jobs/%s' % (self.PROJECT, self.JOB_NAME) - DS_NAME = 'DATASET' + DS_ID = 'DATASET' DEST_TABLE = 'dest_table' RESOURCE = self._makeResource() conn = _Connection(RESOURCE) client = _Client(project=self.PROJECT, connection=conn) job = self._make_one(self.JOB_NAME, None, client) - dataset = Dataset(DS_NAME, client) + dataset = Dataset(DS_ID, client) table = Table(DEST_TABLE, dataset) job.destination = table @@ -2487,13 +2487,13 @@ def test_reload_w_bound_client(self): def test_reload_w_alternate_client(self): PATH = '/projects/%s/jobs/%s' % (self.PROJECT, self.JOB_NAME) - DS_NAME = 'DATASET' + DS_ID = 'DATASET' DEST_TABLE = 'dest_table' RESOURCE = self._makeResource() q_config = RESOURCE['configuration']['query'] q_config['destinationTable'] = { 'projectId': self.PROJECT, - 'datasetId': DS_NAME, + 'datasetId': DS_ID, 'tableId': DEST_TABLE, } conn1 = _Connection() @@ -2720,8 +2720,8 @@ def project(self): return TestLoadJob.PROJECT @property - def dataset_name(self): - return TestLoadJob.DS_NAME + def dataset_id(self): + return TestLoadJob.DS_ID class _Connection(object): diff --git a/packages/google-cloud-bigquery/tests/unit/test_table.py b/packages/google-cloud-bigquery/tests/unit/test_table.py index 9e0db94bc6cb..cb481eac1932 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_table.py +++ b/packages/google-cloud-bigquery/tests/unit/test_table.py @@ -60,7 +60,7 @@ def test_ctor_defaults(self): class TestTable(unittest.TestCase, _SchemaBase): PROJECT = 'prahj-ekt' - DS_NAME = 'dataset-name' + DS_ID = 'dataset-name' TABLE_NAME = 'table-name' @staticmethod @@ -81,7 +81,7 @@ def _setUpConstants(self): tzinfo=UTC) self.ETAG = 'ETAG' self.TABLE_ID = '%s:%s:%s' % ( - self.PROJECT, self.DS_NAME, self.TABLE_NAME) + self.PROJECT, self.DS_ID, self.TABLE_NAME) self.RESOURCE_URL = 'http://example.com/path/to/resource' self.NUM_BYTES = 12345 self.NUM_ROWS = 67 @@ -92,7 +92,7 @@ def _makeResource(self): 'creationTime': self.WHEN_TS * 1000, 'tableReference': {'projectId': self.PROJECT, - 'datasetId': self.DS_NAME, + 'datasetId': self.DS_ID, 'tableId': self.TABLE_NAME}, 'schema': {'fields': [ {'name': 'full_name', 'type': 'STRING', 'mode': 'REQUIRED'}, @@ -171,11 +171,11 @@ def test_ctor(self): self.assertEqual(table.name, self.TABLE_NAME) self.assertIs(table._dataset, dataset) self.assertEqual(table.project, self.PROJECT) - self.assertEqual(table.dataset_name, self.DS_NAME) + self.assertEqual(table.dataset_id, self.DS_ID) self.assertEqual( table.path, '/projects/%s/datasets/%s/tables/%s' % ( - self.PROJECT, self.DS_NAME, self.TABLE_NAME)) + self.PROJECT, self.DS_ID, self.TABLE_NAME)) self.assertEqual(table.schema, []) self.assertIsNone(table.created) @@ -285,9 +285,9 @@ def test_props_set_by_server(self): CREATED = datetime.datetime(2015, 7, 29, 12, 13, 22, tzinfo=UTC) MODIFIED = datetime.datetime(2015, 7, 29, 14, 47, 15, tzinfo=UTC) TABLE_ID = '%s:%s:%s' % ( - self.PROJECT, self.DS_NAME, self.TABLE_NAME) + self.PROJECT, self.DS_ID, self.TABLE_NAME) URL = 'http://example.com/projects/%s/datasets/%s/tables/%s' % ( - self.PROJECT, self.DS_NAME, self.TABLE_NAME) + self.PROJECT, self.DS_ID, self.TABLE_NAME) client = _Client(self.PROJECT) dataset = _Dataset(client) table = self._make_one(self.TABLE_NAME, dataset) @@ -421,10 +421,10 @@ def test_from_api_repr_bare(self): client = _Client(self.PROJECT) dataset = _Dataset(client) RESOURCE = { - 'id': '%s:%s:%s' % (self.PROJECT, self.DS_NAME, self.TABLE_NAME), + 'id': '%s:%s:%s' % (self.PROJECT, self.DS_ID, self.TABLE_NAME), 'tableReference': { 'projectId': self.PROJECT, - 'datasetId': self.DS_NAME, + 'datasetId': self.DS_ID, 'tableId': self.TABLE_NAME, }, 'type': 'TABLE', @@ -445,7 +445,7 @@ def test_from_api_repr_w_properties(self): self._verifyResourceProperties(table, RESOURCE) def test_create_new_day_partitioned_table(self): - PATH = 'projects/%s/datasets/%s/tables' % (self.PROJECT, self.DS_NAME) + PATH = 'projects/%s/datasets/%s/tables' % (self.PROJECT, self.DS_ID) RESOURCE = self._makeResource() conn = _Connection(RESOURCE) client = _Client(project=self.PROJECT, connection=conn) @@ -461,7 +461,7 @@ def test_create_new_day_partitioned_table(self): SENT = { 'tableReference': { 'projectId': self.PROJECT, - 'datasetId': self.DS_NAME, + 'datasetId': self.DS_ID, 'tableId': self.TABLE_NAME}, 'timePartitioning': {'type': 'DAY'}, } @@ -471,7 +471,7 @@ def test_create_new_day_partitioned_table(self): def test_create_w_bound_client(self): from google.cloud.bigquery.table import SchemaField - PATH = 'projects/%s/datasets/%s/tables' % (self.PROJECT, self.DS_NAME) + PATH = 'projects/%s/datasets/%s/tables' % (self.PROJECT, self.DS_ID) RESOURCE = self._makeResource() conn = _Connection(RESOURCE) client = _Client(project=self.PROJECT, connection=conn) @@ -490,7 +490,7 @@ def test_create_w_bound_client(self): SENT = { 'tableReference': { 'projectId': self.PROJECT, - 'datasetId': self.DS_NAME, + 'datasetId': self.DS_ID, 'tableId': self.TABLE_NAME}, 'schema': {'fields': [ {'name': 'full_name', 'type': 'STRING', 'mode': 'REQUIRED'}, @@ -502,7 +502,7 @@ def test_create_w_bound_client(self): def test_create_w_partition_no_expire(self): from google.cloud.bigquery.table import SchemaField - PATH = 'projects/%s/datasets/%s/tables' % (self.PROJECT, self.DS_NAME) + PATH = 'projects/%s/datasets/%s/tables' % (self.PROJECT, self.DS_ID) RESOURCE = self._makeResource() conn = _Connection(RESOURCE) client = _Client(project=self.PROJECT, connection=conn) @@ -524,7 +524,7 @@ def test_create_w_partition_no_expire(self): SENT = { 'tableReference': { 'projectId': self.PROJECT, - 'datasetId': self.DS_NAME, + 'datasetId': self.DS_ID, 'tableId': self.TABLE_NAME}, 'timePartitioning': {'type': 'DAY'}, 'schema': {'fields': [ @@ -537,7 +537,7 @@ def test_create_w_partition_no_expire(self): def test_create_w_partition_and_expire(self): from google.cloud.bigquery.table import SchemaField - PATH = 'projects/%s/datasets/%s/tables' % (self.PROJECT, self.DS_NAME) + PATH = 'projects/%s/datasets/%s/tables' % (self.PROJECT, self.DS_ID) RESOURCE = self._makeResource() conn = _Connection(RESOURCE) client = _Client(project=self.PROJECT, connection=conn) @@ -559,7 +559,7 @@ def test_create_w_partition_and_expire(self): SENT = { 'tableReference': { 'projectId': self.PROJECT, - 'datasetId': self.DS_NAME, + 'datasetId': self.DS_ID, 'tableId': self.TABLE_NAME}, 'timePartitioning': {'type': 'DAY', 'expirationMs': 100}, 'schema': {'fields': [ @@ -712,7 +712,7 @@ def test_create_w_alternate_client(self): from google.cloud._helpers import UTC from google.cloud._helpers import _millis - PATH = 'projects/%s/datasets/%s/tables' % (self.PROJECT, self.DS_NAME) + PATH = 'projects/%s/datasets/%s/tables' % (self.PROJECT, self.DS_ID) DESCRIPTION = 'DESCRIPTION' TITLE = 'TITLE' QUERY = 'select fullname, age from person_ages' @@ -745,7 +745,7 @@ def test_create_w_alternate_client(self): SENT = { 'tableReference': { 'projectId': self.PROJECT, - 'datasetId': self.DS_NAME, + 'datasetId': self.DS_ID, 'tableId': self.TABLE_NAME}, 'description': DESCRIPTION, 'friendlyName': TITLE, @@ -759,7 +759,7 @@ def test_create_w_missing_output_properties(self): # lacks 'creationTime' / 'lastModifiedTime' from google.cloud.bigquery.table import SchemaField - PATH = 'projects/%s/datasets/%s/tables' % (self.PROJECT, self.DS_NAME) + PATH = 'projects/%s/datasets/%s/tables' % (self.PROJECT, self.DS_ID) RESOURCE = self._makeResource() del RESOURCE['creationTime'] del RESOURCE['lastModifiedTime'] @@ -781,7 +781,7 @@ def test_create_w_missing_output_properties(self): SENT = { 'tableReference': { 'projectId': self.PROJECT, - 'datasetId': self.DS_NAME, + 'datasetId': self.DS_ID, 'tableId': self.TABLE_NAME}, 'schema': {'fields': [ {'name': 'full_name', 'type': 'STRING', 'mode': 'REQUIRED'}, @@ -792,7 +792,7 @@ def test_create_w_missing_output_properties(self): def test_exists_miss_w_bound_client(self): PATH = 'projects/%s/datasets/%s/tables/%s' % ( - self.PROJECT, self.DS_NAME, self.TABLE_NAME) + self.PROJECT, self.DS_ID, self.TABLE_NAME) conn = _Connection() client = _Client(project=self.PROJECT, connection=conn) dataset = _Dataset(client) @@ -808,7 +808,7 @@ def test_exists_miss_w_bound_client(self): def test_exists_hit_w_alternate_client(self): PATH = 'projects/%s/datasets/%s/tables/%s' % ( - self.PROJECT, self.DS_NAME, self.TABLE_NAME) + self.PROJECT, self.DS_ID, self.TABLE_NAME) conn1 = _Connection() client1 = _Client(project=self.PROJECT, connection=conn1) conn2 = _Connection({}) @@ -827,7 +827,7 @@ def test_exists_hit_w_alternate_client(self): def test_reload_w_bound_client(self): PATH = 'projects/%s/datasets/%s/tables/%s' % ( - self.PROJECT, self.DS_NAME, self.TABLE_NAME) + self.PROJECT, self.DS_ID, self.TABLE_NAME) RESOURCE = self._makeResource() conn = _Connection(RESOURCE) client = _Client(project=self.PROJECT, connection=conn) @@ -844,7 +844,7 @@ def test_reload_w_bound_client(self): def test_reload_w_alternate_client(self): PATH = 'projects/%s/datasets/%s/tables/%s' % ( - self.PROJECT, self.DS_NAME, self.TABLE_NAME) + self.PROJECT, self.DS_ID, self.TABLE_NAME) RESOURCE = self._makeResource() conn1 = _Connection() client1 = _Client(project=self.PROJECT, connection=conn1) @@ -874,7 +874,7 @@ def test_patch_w_invalid_expiration(self): def test_patch_w_bound_client(self): PATH = 'projects/%s/datasets/%s/tables/%s' % ( - self.PROJECT, self.DS_NAME, self.TABLE_NAME) + self.PROJECT, self.DS_ID, self.TABLE_NAME) DESCRIPTION = 'DESCRIPTION' TITLE = 'TITLE' RESOURCE = self._makeResource() @@ -908,7 +908,7 @@ def test_patch_w_alternate_client(self): from google.cloud.bigquery.table import SchemaField PATH = 'projects/%s/datasets/%s/tables/%s' % ( - self.PROJECT, self.DS_NAME, self.TABLE_NAME) + self.PROJECT, self.DS_ID, self.TABLE_NAME) QUERY = 'select fullname, age from person_ages' LOCATION = 'EU' RESOURCE = self._makeResource() @@ -950,7 +950,7 @@ def test_patch_w_schema_None(self): # Simulate deleting schema: not sure if back-end will actually # allow this operation, but the spec says it is optional. PATH = 'projects/%s/datasets/%s/tables/%s' % ( - self.PROJECT, self.DS_NAME, self.TABLE_NAME) + self.PROJECT, self.DS_ID, self.TABLE_NAME) DESCRIPTION = 'DESCRIPTION' TITLE = 'TITLE' RESOURCE = self._makeResource() @@ -975,7 +975,7 @@ def test_update_w_bound_client(self): from google.cloud.bigquery.table import SchemaField PATH = 'projects/%s/datasets/%s/tables/%s' % ( - self.PROJECT, self.DS_NAME, self.TABLE_NAME) + self.PROJECT, self.DS_ID, self.TABLE_NAME) DESCRIPTION = 'DESCRIPTION' TITLE = 'TITLE' RESOURCE = self._makeResource() @@ -999,7 +999,7 @@ def test_update_w_bound_client(self): SENT = { 'tableReference': {'projectId': self.PROJECT, - 'datasetId': self.DS_NAME, + 'datasetId': self.DS_ID, 'tableId': self.TABLE_NAME}, 'schema': {'fields': [ {'name': 'full_name', 'type': 'STRING', 'mode': 'REQUIRED'}, @@ -1017,7 +1017,7 @@ def test_update_w_alternate_client(self): from google.cloud._helpers import _millis PATH = 'projects/%s/datasets/%s/tables/%s' % ( - self.PROJECT, self.DS_NAME, self.TABLE_NAME) + self.PROJECT, self.DS_ID, self.TABLE_NAME) DEF_TABLE_EXP = 12345 LOCATION = 'EU' QUERY = 'select fullname, age from person_ages' @@ -1051,7 +1051,7 @@ def test_update_w_alternate_client(self): SENT = { 'tableReference': {'projectId': self.PROJECT, - 'datasetId': self.DS_NAME, + 'datasetId': self.DS_ID, 'tableId': self.TABLE_NAME}, 'expirationTime': _millis(self.EXP_TIME), 'location': 'EU', @@ -1062,7 +1062,7 @@ def test_update_w_alternate_client(self): def test_delete_w_bound_client(self): PATH = 'projects/%s/datasets/%s/tables/%s' % ( - self.PROJECT, self.DS_NAME, self.TABLE_NAME) + self.PROJECT, self.DS_ID, self.TABLE_NAME) conn = _Connection({}) client = _Client(project=self.PROJECT, connection=conn) dataset = _Dataset(client) @@ -1077,7 +1077,7 @@ def test_delete_w_bound_client(self): def test_delete_w_alternate_client(self): PATH = 'projects/%s/datasets/%s/tables/%s' % ( - self.PROJECT, self.DS_NAME, self.TABLE_NAME) + self.PROJECT, self.DS_ID, self.TABLE_NAME) conn1 = _Connection() client1 = _Client(project=self.PROJECT, connection=conn1) conn2 = _Connection({}) @@ -1112,7 +1112,7 @@ def test_fetch_data_w_bound_client(self): from google.cloud.bigquery.table import SchemaField PATH = 'projects/%s/datasets/%s/tables/%s/data' % ( - self.PROJECT, self.DS_NAME, self.TABLE_NAME) + self.PROJECT, self.DS_ID, self.TABLE_NAME) WHEN_TS = 1437767599.006 WHEN = datetime.datetime.utcfromtimestamp(WHEN_TS).replace( tzinfo=UTC) @@ -1185,7 +1185,7 @@ def test_fetch_data_w_alternate_client(self): from google.cloud.bigquery.table import SchemaField PATH = 'projects/%s/datasets/%s/tables/%s/data' % ( - self.PROJECT, self.DS_NAME, self.TABLE_NAME) + self.PROJECT, self.DS_ID, self.TABLE_NAME) MAX = 10 TOKEN = 'TOKEN' DATA = { @@ -1256,7 +1256,7 @@ def test_fetch_data_w_repeated_fields(self): from google.cloud.bigquery.table import SchemaField PATH = 'projects/%s/datasets/%s/tables/%s/data' % ( - self.PROJECT, self.DS_NAME, self.TABLE_NAME) + self.PROJECT, self.DS_ID, self.TABLE_NAME) ROWS = 1234 TOKEN = 'TOKEN' DATA = { @@ -1309,7 +1309,7 @@ def test_fetch_data_w_record_schema(self): from google.cloud.bigquery.table import SchemaField PATH = 'projects/%s/datasets/%s/tables/%s/data' % ( - self.PROJECT, self.DS_NAME, self.TABLE_NAME) + self.PROJECT, self.DS_ID, self.TABLE_NAME) ROWS = 1234 TOKEN = 'TOKEN' DATA = { @@ -1451,7 +1451,7 @@ def test_insert_data_w_bound_client(self): WHEN = datetime.datetime.utcfromtimestamp(WHEN_TS).replace( tzinfo=UTC) PATH = 'projects/%s/datasets/%s/tables/%s/insertAll' % ( - self.PROJECT, self.DS_NAME, self.TABLE_NAME) + self.PROJECT, self.DS_ID, self.TABLE_NAME) conn = _Connection({}) client = _Client(project=self.PROJECT, connection=conn) dataset = _Dataset(client) @@ -1492,7 +1492,7 @@ def test_insert_data_w_alternate_client(self): from google.cloud.bigquery.table import SchemaField PATH = 'projects/%s/datasets/%s/tables/%s/insertAll' % ( - self.PROJECT, self.DS_NAME, self.TABLE_NAME) + self.PROJECT, self.DS_ID, self.TABLE_NAME) RESPONSE = { 'insertErrors': [ {'index': 1, @@ -1561,7 +1561,7 @@ def test_insert_data_w_repeated_fields(self): from google.cloud.bigquery.table import SchemaField PATH = 'projects/%s/datasets/%s/tables/%s/insertAll' % ( - self.PROJECT, self.DS_NAME, self.TABLE_NAME) + self.PROJECT, self.DS_ID, self.TABLE_NAME) conn = _Connection({}) client = _Client(project=self.PROJECT, connection=conn) dataset = _Dataset(client) @@ -1597,7 +1597,7 @@ def test_insert_data_w_record_schema(self): from google.cloud.bigquery.table import SchemaField PATH = 'projects/%s/datasets/%s/tables/%s/insertAll' % ( - self.PROJECT, self.DS_NAME, self.TABLE_NAME) + self.PROJECT, self.DS_ID, self.TABLE_NAME) conn = _Connection({}) client = _Client(project=self.PROJECT, connection=conn) dataset = _Dataset(client) @@ -1898,7 +1898,7 @@ def test_upload_file_resumable_metadata(self): 'sourceFormat': config_args['source_format'], 'destinationTable': { 'projectId': table._dataset._client.project, - 'datasetId': table.dataset_name, + 'datasetId': table.dataset_id, 'tableId': table.name, }, 'allowJaggedRows': config_args['allow_jagged_rows'], @@ -2230,8 +2230,9 @@ def _call_fut(source_format, schema, dataset, name): def test_empty_schema(self): source_format = 'AVRO' - dataset = mock.Mock(project='prediction', spec=['name', 'project']) - dataset.name = 'market' # mock.Mock() treats `name` specially. + dataset = mock.Mock(project='prediction', + spec=['dataset_id', 'project']) + dataset.dataset_id = 'market' # mock.Mock() treats `name` specially. table_name = 'chairs' metadata = self._call_fut(source_format, [], dataset, table_name) @@ -2241,7 +2242,7 @@ def test_empty_schema(self): 'sourceFormat': source_format, 'destinationTable': { 'projectId': dataset.project, - 'datasetId': dataset.name, + 'datasetId': dataset.dataset_id, 'tableId': table_name, }, }, @@ -2254,8 +2255,8 @@ def test_with_schema(self): source_format = 'CSV' full_name = SchemaField('full_name', 'STRING', mode='REQUIRED') - dataset = mock.Mock(project='blind', spec=['name', 'project']) - dataset.name = 'movie' # mock.Mock() treats `name` specially. + dataset = mock.Mock(project='blind', spec=['dataset_id', 'project']) + dataset.dataset_id = 'movie' # mock.Mock() treats `name` specially. table_name = 'teebull-neem' metadata = self._call_fut( source_format, [full_name], dataset, table_name) @@ -2266,7 +2267,7 @@ def test_with_schema(self): 'sourceFormat': source_format, 'destinationTable': { 'projectId': dataset.project, - 'datasetId': dataset.name, + 'datasetId': dataset.dataset_id, 'tableId': table_name, }, 'schema': { @@ -2309,14 +2310,14 @@ def run(self): class _Dataset(object): - def __init__(self, client, name=TestTable.DS_NAME): + def __init__(self, client, dataset_id=TestTable.DS_ID): self._client = client - self.name = name + self.dataset_id = dataset_id @property def path(self): return '/projects/%s/datasets/%s' % ( - self._client.project, self.name) + self._client.project, self.dataset_id) @property def project(self): From 120fc6a57d4ba50c736e576c398cff6da04b893a Mon Sep 17 00:00:00 2001 From: Jonathan Amsterdam Date: Thu, 14 Sep 2017 16:34:02 -0400 Subject: [PATCH 0268/2016] bigquery: rename name field of Table to table_id (#3959) * bigquery: rename name field of Table to table_id Also rename table_id to full_table_id. * fix lint errors * fix doc --- .../google/cloud/bigquery/job.py | 10 ++--- .../google/cloud/bigquery/table.py | 45 +++++++++++-------- .../tests/unit/test_dataset.py | 12 ++--- .../tests/unit/test_job.py | 40 ++++++++--------- .../tests/unit/test_table.py | 24 +++++----- 5 files changed, 70 insertions(+), 61 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/job.py b/packages/google-cloud-bigquery/google/cloud/bigquery/job.py index f513a98d23cd..da6962daec1b 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/job.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/job.py @@ -774,7 +774,7 @@ def _build_resource(self): 'destinationTable': { 'projectId': self.destination.project, 'datasetId': self.destination.dataset_id, - 'tableId': self.destination.name, + 'tableId': self.destination.table_id, }, }, }, @@ -901,7 +901,7 @@ def _build_resource(self): source_refs = [{ 'projectId': table.project, 'datasetId': table.dataset_id, - 'tableId': table.name, + 'tableId': table.table_id, } for table in self.sources] resource = { @@ -915,7 +915,7 @@ def _build_resource(self): 'destinationTable': { 'projectId': self.destination.project, 'datasetId': self.destination.dataset_id, - 'tableId': self.destination.name, + 'tableId': self.destination.table_id, }, }, }, @@ -1059,7 +1059,7 @@ def _build_resource(self): source_ref = { 'projectId': self.source.project, 'datasetId': self.source.dataset_id, - 'tableId': self.source.name, + 'tableId': self.source.table_id, } resource = { @@ -1248,7 +1248,7 @@ def _destination_table_resource(self): return { 'projectId': self.destination.project, 'datasetId': self.destination.dataset_id, - 'tableId': self.destination.name, + 'tableId': self.destination.table_id, } def _populate_config_resource_booleans(self, configuration): diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py index e06e79271d0a..f9c07b1e8ee6 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py @@ -90,8 +90,8 @@ class Table(object): See https://cloud.google.com/bigquery/docs/reference/rest/v2/tables - :type name: str - :param name: the name of the table + :type table_id: str + :param table_id: the ID of the table :type dataset: :class:`google.cloud.bigquery.dataset.Dataset` :param dataset: The dataset which contains the table. @@ -102,8 +102,8 @@ class Table(object): _schema = None - def __init__(self, name, dataset, schema=()): - self.name = name + def __init__(self, table_id, dataset, schema=()): + self._table_id = table_id self._dataset = dataset self._properties = {} # Let the @property do validation. @@ -127,14 +127,23 @@ def dataset_id(self): """ return self._dataset.dataset_id + @property + def table_id(self): + """ID of the table. + + :rtype: str + :returns: the table ID. + """ + return self._table_id + @property def path(self): """URL path for the table's APIs. :rtype: str - :returns: the path based on project and dataste name. + :returns: the path based on project, dataset and table IDs. """ - return '%s/tables/%s' % (self._dataset.path, self.name) + return '%s/tables/%s' % (self._dataset.path, self.table_id) @property def schema(self): @@ -224,11 +233,11 @@ def self_link(self): return self._properties.get('selfLink') @property - def table_id(self): - """ID for the table resource. + def full_table_id(self): + """ID for the table, in the form ``project_id:dataset_id:table_id``. :rtype: str, or ``NoneType`` - :returns: the ID (None until set from the server). + :returns: the full ID (None until set from the server). """ return self._properties.get('id') @@ -463,7 +472,7 @@ def list_partitions(self, client=None): """ query = self._require_client(client).run_sync_query( 'SELECT partition_id from [%s.%s$__PARTITIONS_SUMMARY__]' % - (self.dataset_id, self.name)) + (self.dataset_id, self.table_id)) query.run() return [row[0] for row in query.rows] @@ -484,8 +493,8 @@ def from_api_repr(cls, resource, dataset): 'tableId' not in resource['tableReference']): raise KeyError('Resource lacks required identity information:' '["tableReference"]["tableId"]') - table_name = resource['tableReference']['tableId'] - table = cls(table_name, dataset=dataset) + table_id = resource['tableReference']['tableId'] + table = cls(table_id, dataset=dataset) table._set_properties(resource) return table @@ -528,7 +537,7 @@ def _build_resource(self): 'tableReference': { 'projectId': self._dataset.project, 'datasetId': self._dataset.dataset_id, - 'tableId': self.name}, + 'tableId': self.table_id}, } if self.description is not None: resource['description'] = self.description @@ -1181,7 +1190,7 @@ def upload_from_file(self, _maybe_rewind(file_obj, rewind=rewind) _check_mode(file_obj) metadata = _get_upload_metadata( - source_format, self._schema, self._dataset, self.name) + source_format, self._schema, self._dataset, self.table_id) _configure_job_metadata(metadata, allow_jagged_rows, allow_quoted_newlines, create_disposition, encoding, field_delimiter, @@ -1346,7 +1355,7 @@ def _get_upload_headers(user_agent): } -def _get_upload_metadata(source_format, schema, dataset, name): +def _get_upload_metadata(source_format, schema, dataset, table_id): """Get base metadata for creating a table. :type source_format: str @@ -1359,8 +1368,8 @@ def _get_upload_metadata(source_format, schema, dataset, name): :type dataset: :class:`~google.cloud.bigquery.dataset.Dataset` :param dataset: A dataset which contains a table. - :type name: str - :param name: The name of the table. + :type table_id: str + :param table_id: The table_id of the table. :rtype: dict :returns: The metadata dictionary. @@ -1370,7 +1379,7 @@ def _get_upload_metadata(source_format, schema, dataset, name): 'destinationTable': { 'projectId': dataset.project, 'datasetId': dataset.dataset_id, - 'tableId': name, + 'tableId': table_id, }, } if schema: diff --git a/packages/google-cloud-bigquery/tests/unit/test_dataset.py b/packages/google-cloud-bigquery/tests/unit/test_dataset.py index e1db93a973e4..0689e93b0f20 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_dataset.py +++ b/packages/google-cloud-bigquery/tests/unit/test_dataset.py @@ -763,7 +763,7 @@ def test_list_tables_defaults(self): self.assertEqual(len(tables), len(DATA['tables'])) for found, expected in zip(tables, DATA['tables']): self.assertIsInstance(found, Table) - self.assertEqual(found.table_id, expected['id']) + self.assertEqual(found.full_table_id, expected['id']) self.assertEqual(found.table_type, expected['type']) self.assertEqual(token, TOKEN) @@ -810,7 +810,7 @@ def test_list_tables_explicit(self): self.assertEqual(len(tables), len(DATA['tables'])) for found, expected in zip(tables, DATA['tables']): self.assertIsInstance(found, Table) - self.assertEqual(found.table_id, expected['id']) + self.assertEqual(found.full_table_id, expected['id']) self.assertEqual(found.table_type, expected['type']) self.assertIsNone(token) @@ -827,9 +827,9 @@ def test_table_wo_schema(self): conn = _Connection({}) client = _Client(project=self.PROJECT, connection=conn) dataset = self._make_one(self.DS_ID, client=client) - table = dataset.table('table_name') + table = dataset.table('table_id') self.assertIsInstance(table, Table) - self.assertEqual(table.name, 'table_name') + self.assertEqual(table.table_id, 'table_id') self.assertIs(table._dataset, dataset) self.assertEqual(table.schema, []) @@ -842,9 +842,9 @@ def test_table_w_schema(self): dataset = self._make_one(self.DS_ID, client=client) full_name = SchemaField('full_name', 'STRING', mode='REQUIRED') age = SchemaField('age', 'INTEGER', mode='REQUIRED') - table = dataset.table('table_name', schema=[full_name, age]) + table = dataset.table('table_id', schema=[full_name, age]) self.assertIsInstance(table, Table) - self.assertEqual(table.name, 'table_name') + self.assertEqual(table.table_id, 'table_id') self.assertIs(table._dataset, dataset) self.assertEqual(table.schema, [full_name, age]) diff --git a/packages/google-cloud-bigquery/tests/unit/test_job.py b/packages/google-cloud-bigquery/tests/unit/test_job.py index 09b57d7b7457..7c9a84f4503a 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_job.py +++ b/packages/google-cloud-bigquery/tests/unit/test_job.py @@ -83,7 +83,7 @@ class _Base(object): PROJECT = 'project' SOURCE1 = 'http://example.com/source1.csv' DS_ID = 'datset_id' - TABLE_NAME = 'table_name' + TABLE_ID = 'table_id' JOB_NAME = 'job_name' def _make_one(self, *args, **kw): @@ -207,7 +207,7 @@ def _makeResource(self, started=False, ended=False): config['destinationTable'] = { 'projectId': self.PROJECT, 'datasetId': self.DS_ID, - 'tableId': self.TABLE_NAME, + 'tableId': self.TABLE_ID, } if ended: @@ -276,7 +276,7 @@ def _verifyResourceProperties(self, job, resource): table_ref = config['destinationTable'] self.assertEqual(job.destination.project, table_ref['projectId']) self.assertEqual(job.destination.dataset_id, table_ref['datasetId']) - self.assertEqual(job.destination.name, table_ref['tableId']) + self.assertEqual(job.destination.table_id, table_ref['tableId']) if 'fieldDelimiter' in config: self.assertEqual(job.field_delimiter, @@ -544,7 +544,7 @@ def test_from_api_repr_bare(self): 'destinationTable': { 'projectId': self.PROJECT, 'datasetId': self.DS_ID, - 'tableId': self.TABLE_NAME, + 'tableId': self.TABLE_ID, }, } }, @@ -604,7 +604,7 @@ def test_begin_w_bound_client(self): 'destinationTable': { 'projectId': self.PROJECT, 'datasetId': self.DS_ID, - 'tableId': self.TABLE_NAME, + 'tableId': self.TABLE_ID, }, }, }, @@ -639,7 +639,7 @@ def test_begin_w_autodetect(self): 'destinationTable': { 'projectId': self.PROJECT, 'datasetId': self.DS_ID, - 'tableId': self.TABLE_NAME, + 'tableId': self.TABLE_ID, }, 'autodetect': True }, @@ -663,7 +663,7 @@ def test_begin_w_alternate_client(self): 'destinationTable': { 'projectId': self.PROJECT, 'datasetId': self.DS_ID, - 'tableId': self.TABLE_NAME, + 'tableId': self.TABLE_ID, }, 'allowJaggedRows': True, 'allowQuotedNewlines': True, @@ -867,7 +867,7 @@ def _verifyResourceProperties(self, job, resource): table_ref = config['destinationTable'] self.assertEqual(job.destination.project, table_ref['projectId']) self.assertEqual(job.destination.dataset_id, table_ref['datasetId']) - self.assertEqual(job.destination.name, table_ref['tableId']) + self.assertEqual(job.destination.table_id, table_ref['tableId']) sources = config.get('sourceTables') if sources is None: @@ -876,7 +876,7 @@ def _verifyResourceProperties(self, job, resource): for table_ref, table in zip(sources, job.sources): self.assertEqual(table.project, table_ref['projectId']) self.assertEqual(table.dataset_id, table_ref['datasetId']) - self.assertEqual(table.name, table_ref['tableId']) + self.assertEqual(table.table_id, table_ref['tableId']) if 'createDisposition' in config: self.assertEqual(job.create_disposition, @@ -1219,7 +1219,7 @@ def _verifyResourceProperties(self, job, resource): table_ref = config['sourceTable'] self.assertEqual(job.source.project, table_ref['projectId']) self.assertEqual(job.source.dataset_id, table_ref['datasetId']) - self.assertEqual(job.source.name, table_ref['tableId']) + self.assertEqual(job.source.table_id, table_ref['tableId']) if 'compression' in config: self.assertEqual(job.compression, @@ -1614,7 +1614,7 @@ def _verifyResourceProperties(self, job, resource): tb_ref = { 'projectId': table.project, 'datasetId': table.dataset_id, - 'tableId': table.name + 'tableId': table.table_id } self.assertEqual(tb_ref, query_config['destinationTable']) else: @@ -1934,21 +1934,21 @@ def test_referenced_tables(self): local1, local2, remote = job.referenced_tables self.assertIsInstance(local1, Table) - self.assertEqual(local1.name, 'local1') + self.assertEqual(local1.table_id, 'local1') self.assertIsInstance(local1._dataset, Dataset) self.assertEqual(local1.dataset_id, 'dataset') self.assertEqual(local1.project, self.PROJECT) self.assertIs(local1._dataset._client, client) self.assertIsInstance(local2, Table) - self.assertEqual(local2.name, 'local2') + self.assertEqual(local2.table_id, 'local2') self.assertIsInstance(local2._dataset, Dataset) self.assertEqual(local2.dataset_id, 'dataset') self.assertEqual(local2.project, self.PROJECT) self.assertIs(local2._dataset._client, client) self.assertIsInstance(remote, Table) - self.assertEqual(remote.name, 'other-table') + self.assertEqual(remote.table_id, 'other-table') self.assertIsInstance(remote._dataset, Dataset) self.assertEqual(remote.dataset_id, 'other-dataset') self.assertEqual(remote.project, 'other-project-123') @@ -2706,14 +2706,14 @@ def _get_query_results(self, job_id): class _Table(object): - def __init__(self, name=None): - self._name = name + def __init__(self, table_id=None): + self._table_id = table_id @property - def name(self): - if self._name is not None: - return self._name - return TestLoadJob.TABLE_NAME + def table_id(self): + if self._table_id is not None: + return self._table_id + return TestLoadJob.TABLE_ID @property def project(self): diff --git a/packages/google-cloud-bigquery/tests/unit/test_table.py b/packages/google-cloud-bigquery/tests/unit/test_table.py index cb481eac1932..f076c6d39938 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_table.py +++ b/packages/google-cloud-bigquery/tests/unit/test_table.py @@ -80,7 +80,7 @@ def _setUpConstants(self): self.WHEN = datetime.datetime.utcfromtimestamp(self.WHEN_TS).replace( tzinfo=UTC) self.ETAG = 'ETAG' - self.TABLE_ID = '%s:%s:%s' % ( + self.TABLE_FULL_ID = '%s:%s:%s' % ( self.PROJECT, self.DS_ID, self.TABLE_NAME) self.RESOURCE_URL = 'http://example.com/path/to/resource' self.NUM_BYTES = 12345 @@ -98,7 +98,7 @@ def _makeResource(self): {'name': 'full_name', 'type': 'STRING', 'mode': 'REQUIRED'}, {'name': 'age', 'type': 'INTEGER', 'mode': 'REQUIRED'}]}, 'etag': 'ETAG', - 'id': self.TABLE_ID, + 'id': self.TABLE_FULL_ID, 'lastModifiedTime': self.WHEN_TS * 1000, 'location': 'US', 'selfLink': self.RESOURCE_URL, @@ -133,7 +133,7 @@ def _verifyReadonlyResourceProperties(self, table, resource): else: self.assertIsNone(table.self_link) - self.assertEqual(table.table_id, self.TABLE_ID) + self.assertEqual(table.full_table_id, self.TABLE_FULL_ID) self.assertEqual(table.table_type, 'TABLE' if 'view' not in resource else 'VIEW') @@ -168,7 +168,7 @@ def test_ctor(self): client = _Client(self.PROJECT) dataset = _Dataset(client) table = self._make_one(self.TABLE_NAME, dataset) - self.assertEqual(table.name, self.TABLE_NAME) + self.assertEqual(table.table_id, self.TABLE_NAME) self.assertIs(table._dataset, dataset) self.assertEqual(table.project, self.PROJECT) self.assertEqual(table.dataset_id, self.DS_ID) @@ -184,7 +184,7 @@ def test_ctor(self): self.assertIsNone(table.num_bytes) self.assertIsNone(table.num_rows) self.assertIsNone(table.self_link) - self.assertIsNone(table.table_id) + self.assertIsNone(table.full_table_id) self.assertIsNone(table.table_type) self.assertIsNone(table.description) @@ -284,7 +284,7 @@ def test_props_set_by_server(self): CREATED = datetime.datetime(2015, 7, 29, 12, 13, 22, tzinfo=UTC) MODIFIED = datetime.datetime(2015, 7, 29, 14, 47, 15, tzinfo=UTC) - TABLE_ID = '%s:%s:%s' % ( + TABLE_FULL_ID = '%s:%s:%s' % ( self.PROJECT, self.DS_ID, self.TABLE_NAME) URL = 'http://example.com/projects/%s/datasets/%s/tables/%s' % ( self.PROJECT, self.DS_ID, self.TABLE_NAME) @@ -297,7 +297,7 @@ def test_props_set_by_server(self): table._properties['numBytes'] = 12345 table._properties['numRows'] = 66 table._properties['selfLink'] = URL - table._properties['id'] = TABLE_ID + table._properties['id'] = TABLE_FULL_ID table._properties['type'] = 'TABLE' self.assertEqual(table.created, CREATED) @@ -306,7 +306,7 @@ def test_props_set_by_server(self): self.assertEqual(table.num_bytes, 12345) self.assertEqual(table.num_rows, 66) self.assertEqual(table.self_link, URL) - self.assertEqual(table.table_id, TABLE_ID) + self.assertEqual(table.full_table_id, TABLE_FULL_ID) self.assertEqual(table.table_type, 'TABLE') def test_description_setter_bad_value(self): @@ -431,7 +431,7 @@ def test_from_api_repr_bare(self): } klass = self._get_target_class() table = klass.from_api_repr(RESOURCE, dataset) - self.assertEqual(table.name, self.TABLE_NAME) + self.assertEqual(table.table_id, self.TABLE_NAME) self.assertIs(table._dataset, dataset) self._verifyResourceProperties(table, RESOURCE) @@ -1681,7 +1681,7 @@ def _initiate_resumable_upload_helper(self, num_retries=None): data = b'goodbye gudbi gootbee' stream = io.BytesIO(data) metadata = _get_upload_metadata( - 'CSV', table._schema, table._dataset, table.name) + 'CSV', table._schema, table._dataset, table.table_id) upload, transport = table._initiate_resumable_upload( client, stream, metadata, num_retries) @@ -1747,7 +1747,7 @@ def _do_multipart_upload_success_helper( data = b'Bzzzz-zap \x00\x01\xf4' stream = io.BytesIO(data) metadata = _get_upload_metadata( - 'CSV', table._schema, table._dataset, table.name) + 'CSV', table._schema, table._dataset, table.table_id) size = len(data) response = table._do_multipart_upload( client, stream, metadata, size, num_retries) @@ -1899,7 +1899,7 @@ def test_upload_file_resumable_metadata(self): 'destinationTable': { 'projectId': table._dataset._client.project, 'datasetId': table.dataset_id, - 'tableId': table.name, + 'tableId': table.table_id, }, 'allowJaggedRows': config_args['allow_jagged_rows'], 'allowQuotedNewlines': From acaa8922fa21c9b9d589d78fd4b5075ae06c4d07 Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Fri, 15 Sep 2017 11:19:37 -0700 Subject: [PATCH 0269/2016] BQ: rename XJob.name to XJob.job_id. (#3962) * BQ: rename XJob.name to XJob.job_id. * BQ: Remove references to table.name --- .../google/cloud/bigquery/job.py | 78 +++++++++---------- .../google/cloud/bigquery/query.py | 2 +- .../google-cloud-bigquery/tests/system.py | 6 +- .../tests/unit/test_client.py | 18 ++--- .../tests/unit/test_query.py | 2 +- 5 files changed, 53 insertions(+), 53 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/job.py b/packages/google-cloud-bigquery/google/cloud/bigquery/job.py index da6962daec1b..76a7d476cf6b 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/job.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/job.py @@ -164,16 +164,15 @@ class WriteDisposition(_EnumProperty): class _AsyncJob(google.api.core.future.polling.PollingFuture): """Base class for asynchronous jobs. - :type name: str - :param name: the name of the job + :type job_id: str + :param job_id: the job's ID in the project associated with the client. :type client: :class:`google.cloud.bigquery.client.Client` - :param client: A client which holds credentials and project configuration - for the dataset (which requires a project). + :param client: A client which holds credentials and project configuration. """ - def __init__(self, name, client): + def __init__(self, job_id, client): super(_AsyncJob, self).__init__() - self.name = name + self.job_id = job_id self._client = client self._properties = {} self._result_set = False @@ -217,9 +216,9 @@ def path(self): """URL path for the job's APIs. :rtype: str - :returns: the path based on project and job name. + :returns: the path based on project and job ID. """ - return '/projects/%s/jobs/%s' % (self.project, self.name) + return '/projects/%s/jobs/%s' % (self.project, self.job_id) @property def etag(self): @@ -367,7 +366,7 @@ def _get_resource_config(cls, resource): :rtype: dict :returns: tuple (string, dict), where the first element is the - job name and the second contains job-specific configuration. + job ID and the second contains job-specific configuration. :raises: :class:`KeyError` if the resource has no identifier, or is missing the appropriate configuration. """ @@ -375,13 +374,13 @@ def _get_resource_config(cls, resource): 'jobId' not in resource['jobReference']): raise KeyError('Resource lacks required identity information: ' '["jobReference"]["jobId"]') - name = resource['jobReference']['jobId'] + job_id = resource['jobReference']['jobId'] if ('configuration' not in resource or cls._JOB_TYPE not in resource['configuration']): raise KeyError('Resource lacks required configuration: ' '["configuration"]["%s"]' % cls._JOB_TYPE) config = resource['configuration'][cls._JOB_TYPE] - return name, config + return job_id, config def begin(self, client=None): """API call: begin the job via a POST request @@ -560,8 +559,9 @@ class _LoadConfiguration(object): class LoadJob(_AsyncJob): """Asynchronous job for loading data into a table from remote URI. - :type name: str - :param name: the name of the job + :type job_id: str + :param job_id: + The job's ID, belonging to the project associated with the client. :type destination: :class:`google.cloud.bigquery.table.Table` :param destination: Table into which data is to be loaded. @@ -766,7 +766,7 @@ def _build_resource(self): resource = { 'jobReference': { 'projectId': self.project, - 'jobId': self.name, + 'jobId': self.job_id, }, 'configuration': { self._JOB_TYPE: { @@ -834,12 +834,12 @@ def from_api_repr(cls, resource, client): :rtype: :class:`google.cloud.bigquery.job.LoadJob` :returns: Job parsed from ``resource``. """ - name, config = cls._get_resource_config(resource) + job_id, config = cls._get_resource_config(resource) dest_config = config['destinationTable'] dataset = Dataset(dest_config['datasetId'], client) destination = Table(dest_config['tableId'], dataset) source_urls = config.get('sourceUris', ()) - job = cls(name, destination, source_urls, client=client) + job = cls(job_id, destination, source_urls, client=client) job._set_properties(resource) return job @@ -856,8 +856,8 @@ class _CopyConfiguration(object): class CopyJob(_AsyncJob): """Asynchronous job: copy data into a table from other tables. - :type name: str - :param name: the name of the job + :type job_id: str + :param job_id: the job's ID, within the project belonging to ``client``. :type destination: :class:`google.cloud.bigquery.table.Table` :param destination: Table into which data is to be loaded. @@ -872,8 +872,8 @@ class CopyJob(_AsyncJob): _JOB_TYPE = 'copy' - def __init__(self, name, destination, sources, client): - super(CopyJob, self).__init__(name, client) + def __init__(self, job_id, destination, sources, client): + super(CopyJob, self).__init__(job_id, client) self.destination = destination self.sources = sources self._configuration = _CopyConfiguration() @@ -907,7 +907,7 @@ def _build_resource(self): resource = { 'jobReference': { 'projectId': self.project, - 'jobId': self.name, + 'jobId': self.job_id, }, 'configuration': { self._JOB_TYPE: { @@ -949,7 +949,7 @@ def from_api_repr(cls, resource, client): :rtype: :class:`google.cloud.bigquery.job.CopyJob` :returns: Job parsed from ``resource``. """ - name, config = cls._get_resource_config(resource) + job_id, config = cls._get_resource_config(resource) dest_config = config['destinationTable'] dataset = Dataset(dest_config['datasetId'], client) destination = Table(dest_config['tableId'], dataset) @@ -964,7 +964,7 @@ def from_api_repr(cls, resource, client): for source_config in source_configs: dataset = Dataset(source_config['datasetId'], client) sources.append(Table(source_config['tableId'], dataset)) - job = cls(name, destination, sources, client=client) + job = cls(job_id, destination, sources, client=client) job._set_properties(resource) return job @@ -983,8 +983,8 @@ class _ExtractConfiguration(object): class ExtractJob(_AsyncJob): """Asynchronous job: extract data from a table into Cloud Storage. - :type name: str - :param name: the name of the job + :type job_id: str + :param job_id: the job's ID, within the project belonging to ``client``. :type source: :class:`google.cloud.bigquery.table.Table` :param source: Table into which data is to be loaded. @@ -1000,8 +1000,8 @@ class ExtractJob(_AsyncJob): """ _JOB_TYPE = 'extract' - def __init__(self, name, source, destination_uris, client): - super(ExtractJob, self).__init__(name, client) + def __init__(self, job_id, source, destination_uris, client): + super(ExtractJob, self).__init__(job_id, client) self.source = source self.destination_uris = destination_uris self._configuration = _ExtractConfiguration() @@ -1065,7 +1065,7 @@ def _build_resource(self): resource = { 'jobReference': { 'projectId': self.project, - 'jobId': self.name, + 'jobId': self.job_id, }, 'configuration': { self._JOB_TYPE: { @@ -1106,12 +1106,12 @@ def from_api_repr(cls, resource, client): :rtype: :class:`google.cloud.bigquery.job.ExtractJob` :returns: Job parsed from ``resource``. """ - name, config = cls._get_resource_config(resource) + job_id, config = cls._get_resource_config(resource) source_config = config['sourceTable'] dataset = Dataset(source_config['datasetId'], client) source = Table(source_config['tableId'], dataset) destination_uris = config['destinationUris'] - job = cls(name, source, destination_uris, client=client) + job = cls(job_id, source, destination_uris, client=client) job._set_properties(resource) return job @@ -1138,8 +1138,8 @@ class _AsyncQueryConfiguration(object): class QueryJob(_AsyncJob): """Asynchronous job: query tables. - :type name: str - :param name: the name of the job + :type job_id: str + :param job_id: the job's ID, within the project belonging to ``client``. :type query: str :param query: SQL query string @@ -1163,9 +1163,9 @@ class QueryJob(_AsyncJob): _UDF_KEY = 'userDefinedFunctionResources' _QUERY_PARAMETERS_KEY = 'queryParameters' - def __init__(self, name, query, client, + def __init__(self, job_id, query, client, udf_resources=(), query_parameters=()): - super(QueryJob, self).__init__(name, client) + super(QueryJob, self).__init__(job_id, client) self.query = query self.udf_resources = udf_resources self.query_parameters = query_parameters @@ -1306,7 +1306,7 @@ def _build_resource(self): resource = { 'jobReference': { 'projectId': self.project, - 'jobId': self.name, + 'jobId': self.job_id, }, 'configuration': { self._JOB_TYPE: { @@ -1399,9 +1399,9 @@ def from_api_repr(cls, resource, client): :rtype: :class:`google.cloud.bigquery.job.RunAsyncQueryJob` :returns: Job parsed from ``resource``. """ - name, config = cls._get_resource_config(resource) + job_id, config = cls._get_resource_config(resource) query = config['query'] - job = cls(name, query, client=client) + job = cls(job_id, query, client=client) job._set_properties(resource) return job @@ -1573,7 +1573,7 @@ def query_results(self): :returns: results instance """ if not self._query_results: - self._query_results = self._client._get_query_results(self.name) + self._query_results = self._client._get_query_results(self.job_id) return self._query_results def done(self): @@ -1585,7 +1585,7 @@ def done(self): # Do not refresh is the state is already done, as the job will not # change once complete. if self.state != _DONE_STATE: - self._query_results = self._client._get_query_results(self.name) + self._query_results = self._client._get_query_results(self.job_id) # Only reload the job once we know the query is complete. # This will ensure that fields such as the destination table are diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/query.py b/packages/google-cloud-bigquery/google/cloud/bigquery/query.py index fa03d373674d..7abbbec76b9b 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/query.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/query.py @@ -95,7 +95,7 @@ def from_query_job(cls, job): instance = cls(job.query, job._client, job.udf_resources) instance._job = job job_ref = instance._properties.setdefault('jobReference', {}) - job_ref['jobId'] = job.name + job_ref['jobId'] = job.job_id if job.default_dataset is not None: instance.default_dataset = job.default_dataset if job.use_query_cache is not None: diff --git a/packages/google-cloud-bigquery/tests/system.py b/packages/google-cloud-bigquery/tests/system.py index ad93ac2c954e..ab955b64eacf 100644 --- a/packages/google-cloud-bigquery/tests/system.py +++ b/packages/google-cloud-bigquery/tests/system.py @@ -205,7 +205,7 @@ def test_create_table(self): table.create() self.to_delete.insert(0, table) self.assertTrue(table.exists()) - self.assertEqual(table.name, TABLE_NAME) + self.assertEqual(table.table_id, TABLE_NAME) def test_list_tables(self): DATASET_ID = _make_dataset_id('list_tables') @@ -240,7 +240,7 @@ def test_list_tables(self): all_tables = list(iterator) self.assertIsNone(iterator.next_page_token) created = [table for table in all_tables - if (table.name in tables_to_create and + if (table.table_id in tables_to_create and table.dataset_id == DATASET_ID)] self.assertEqual(len(created), len(tables_to_create)) @@ -1185,7 +1185,7 @@ def test_create_table_insert_fetch_nested_schema(self): table.create() self.to_delete.insert(0, table) self.assertTrue(table.exists()) - self.assertEqual(table.name, table_name) + self.assertEqual(table.table_id, table_name) to_insert = [] # Data is in "JSON Lines" format, see http://jsonlines.org/ diff --git a/packages/google-cloud-bigquery/tests/unit/test_client.py b/packages/google-cloud-bigquery/tests/unit/test_client.py index fffffb9b2b25..cb2e476e3f99 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_client.py +++ b/packages/google-cloud-bigquery/tests/unit/test_client.py @@ -333,7 +333,7 @@ def test_get_job_hit(self): job = client.get_job(JOB_ID) self.assertIsInstance(job, QueryJob) - self.assertEqual(job.name, JOB_ID) + self.assertEqual(job.job_id, JOB_ID) self.assertEqual(job.create_disposition, 'CREATE_IF_NEEDED') self.assertEqual(job.write_disposition, 'WRITE_TRUNCATE') @@ -466,7 +466,7 @@ def test_list_jobs_defaults(self): for found, expected in zip(jobs, DATA['jobs']): name = expected['jobReference']['jobId'] self.assertIsInstance(found, JOB_TYPES[name]) - self.assertEqual(found.name, name) + self.assertEqual(found.job_id, name) self.assertEqual(token, TOKEN) self.assertEqual(len(conn._requested), 1) @@ -523,7 +523,7 @@ def test_list_jobs_load_job_wo_sourceUris(self): for found, expected in zip(jobs, DATA['jobs']): name = expected['jobReference']['jobId'] self.assertIsInstance(found, JOB_TYPES[name]) - self.assertEqual(found.name, name) + self.assertEqual(found.job_id, name) self.assertEqual(token, TOKEN) self.assertEqual(len(conn._requested), 1) @@ -579,7 +579,7 @@ def test_load_table_from_storage(self): job = client.load_table_from_storage(JOB, destination, SOURCE_URI) self.assertIsInstance(job, LoadJob) self.assertIs(job._client, client) - self.assertEqual(job.name, JOB) + self.assertEqual(job.job_id, JOB) self.assertEqual(list(job.source_uris), [SOURCE_URI]) self.assertIs(job.destination, destination) @@ -600,7 +600,7 @@ def test_copy_table(self): job = client.copy_table(JOB, destination, source) self.assertIsInstance(job, CopyJob) self.assertIs(job._client, client) - self.assertEqual(job.name, JOB) + self.assertEqual(job.job_id, JOB) self.assertEqual(list(job.sources), [source]) self.assertIs(job.destination, destination) @@ -620,7 +620,7 @@ def test_extract_table_to_storage(self): job = client.extract_table_to_storage(JOB, source, DESTINATION) self.assertIsInstance(job, ExtractJob) self.assertIs(job._client, client) - self.assertEqual(job.name, JOB) + self.assertEqual(job.job_id, JOB) self.assertEqual(job.source, source) self.assertEqual(list(job.destination_uris), [DESTINATION]) @@ -636,7 +636,7 @@ def test_run_async_query_defaults(self): job = client.run_async_query(JOB, QUERY) self.assertIsInstance(job, QueryJob) self.assertIs(job._client, client) - self.assertEqual(job.name, JOB) + self.assertEqual(job.job_id, JOB) self.assertEqual(job.query, QUERY) self.assertEqual(job.udf_resources, []) self.assertEqual(job.query_parameters, []) @@ -656,7 +656,7 @@ def test_run_async_w_udf_resources(self): job = client.run_async_query(JOB, QUERY, udf_resources=udf_resources) self.assertIsInstance(job, QueryJob) self.assertIs(job._client, client) - self.assertEqual(job.name, JOB) + self.assertEqual(job.job_id, JOB) self.assertEqual(job.query, QUERY) self.assertEqual(job.udf_resources, udf_resources) self.assertEqual(job.query_parameters, []) @@ -676,7 +676,7 @@ def test_run_async_w_query_parameters(self): query_parameters=query_parameters) self.assertIsInstance(job, QueryJob) self.assertIs(job._client, client) - self.assertEqual(job.name, JOB) + self.assertEqual(job.job_id, JOB) self.assertEqual(job.query, QUERY) self.assertEqual(job.udf_resources, []) self.assertEqual(job.query_parameters, query_parameters) diff --git a/packages/google-cloud-bigquery/tests/unit/test_query.py b/packages/google-cloud-bigquery/tests/unit/test_query.py index 76d5057f6450..0bf0c17c3102 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_query.py +++ b/packages/google-cloud-bigquery/tests/unit/test_query.py @@ -262,7 +262,7 @@ def test_job_w_jobid(self): self.assertIsInstance(job, QueryJob) self.assertEqual(job.query, self.QUERY) self.assertIs(job._client, client) - self.assertEqual(job.name, SERVER_GENERATED) + self.assertEqual(job.job_id, SERVER_GENERATED) fetched_later = query.job self.assertIs(fetched_later, job) From cfa7f9c43571f515e19c4ffa76a98eb6ad90ad0e Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Fri, 15 Sep 2017 12:37:30 -0700 Subject: [PATCH 0270/2016] Parse timestamps in query parameters using canonical format (#3945) * Parse timestamps in query parameters according to BigQuery canonical timestamp format. The timestamp format in query parameters follows the canonical format specified at https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#timestamp-type This fixes a system test error which was happening in the bigquery-b2 branch. * Support more possible timestamp formats. Any of these formats may be returned from the BigQuery API. * Chop and string-replace timestamps into a canonical format. * BQ: fix lint errors. Remove references to table.name --- .../google/cloud/bigquery/_helpers.py | 44 ++++++++- .../google-cloud-bigquery/tests/system.py | 8 ++ .../tests/unit/test__helpers.py | 95 +++++++++++++++++++ 3 files changed, 144 insertions(+), 3 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py b/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py index 5f6edf67dca5..e3d5c0f2f871 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py @@ -71,6 +71,39 @@ def _timestamp_from_json(value, field): return _datetime_from_microseconds(1e6 * float(value)) +def _timestamp_query_param_from_json(value, field): + """Coerce 'value' to a datetime, if set or not nullable. + + Args: + value (str): The timestamp. + field (.SchemaField): The field corresponding to the value. + + Returns: + Optional[datetime.datetime]: The parsed datetime object from + ``value`` if the ``field`` is not null (otherwise it is + :data:`None`). + """ + if _not_null(value, field): + # Canonical formats for timestamps in BigQuery are flexible. See: + # g.co/cloud/bigquery/docs/reference/standard-sql/data-types#timestamp-type + # The separator between the date and time can be 'T' or ' '. + value = value.replace(' ', 'T', 1) + # The UTC timezone may be formatted as Z or +00:00. + value = value.replace('Z', '') + value = value.replace('+00:00', '') + + if '.' in value: + # YYYY-MM-DDTHH:MM:SS.ffffff + return datetime.datetime.strptime( + value, _RFC3339_MICROS_NO_ZULU).replace(tzinfo=UTC) + else: + # YYYY-MM-DDTHH:MM:SS + return datetime.datetime.strptime( + value, _RFC3339_NO_FRACTION).replace(tzinfo=UTC) + else: + return None + + def _datetime_from_json(value, field): """Coerce 'value' to a datetime, if set or not nullable. @@ -139,6 +172,9 @@ def _record_from_json(value, field): 'RECORD': _record_from_json, } +_QUERY_PARAMS_FROM_JSON = dict(_CELLDATA_FROM_JSON) +_QUERY_PARAMS_FROM_JSON['TIMESTAMP'] = _timestamp_query_param_from_json + def _row_from_json(row, schema): """Convert JSON row data to row with appropriate types. @@ -454,7 +490,7 @@ def from_api_repr(cls, resource): name = resource.get('name') type_ = resource['parameterType']['type'] value = resource['parameterValue']['value'] - converted = _CELLDATA_FROM_JSON[type_](value, None) + converted = _QUERY_PARAMS_FROM_JSON[type_](value, None) return cls(name, type_, converted) def to_api_repr(self): @@ -576,7 +612,9 @@ def _from_api_repr_scalar(cls, resource): for value in resource['parameterValue']['arrayValues']] converted = [ - _CELLDATA_FROM_JSON[array_type](value, None) for value in values] + _QUERY_PARAMS_FROM_JSON[array_type](value, None) + for value in values + ] return cls(name, array_type, converted) @classmethod @@ -732,7 +770,7 @@ def from_api_repr(cls, resource): converted = ArrayQueryParameter.from_api_repr(struct_resource) else: value = value['value'] - converted = _CELLDATA_FROM_JSON[type_](value, None) + converted = _QUERY_PARAMS_FROM_JSON[type_](value, None) instance.struct_values[key] = converted return instance diff --git a/packages/google-cloud-bigquery/tests/system.py b/packages/google-cloud-bigquery/tests/system.py index ab955b64eacf..48d66c7be2e0 100644 --- a/packages/google-cloud-bigquery/tests/system.py +++ b/packages/google-cloud-bigquery/tests/system.py @@ -1081,6 +1081,14 @@ def test_dbapi_w_query_parameters(self): }, 'expected': datetime.datetime(2012, 3, 4, 5, 6, 0, tzinfo=UTC), }, + { + 'sql': 'SELECT TIMESTAMP_TRUNC(%(zoned)s, MINUTE)', + 'query_parameters': { + 'zoned': datetime.datetime( + 2012, 3, 4, 5, 6, 7, 250000, tzinfo=UTC), + }, + 'expected': datetime.datetime(2012, 3, 4, 5, 6, 0, tzinfo=UTC), + }, ] for example in examples: msg = 'sql: {} query_parameters: {}'.format( diff --git a/packages/google-cloud-bigquery/tests/unit/test__helpers.py b/packages/google-cloud-bigquery/tests/unit/test__helpers.py index c43f7b6d1ae3..091df8f7355a 100644 --- a/packages/google-cloud-bigquery/tests/unit/test__helpers.py +++ b/packages/google-cloud-bigquery/tests/unit/test__helpers.py @@ -158,6 +158,63 @@ def test_w_base64_encoded_text(self): self.assertEqual(coerced, expected) +class Test_timestamp_query_param_from_json(unittest.TestCase): + + def _call_fut(self, value, field): + from google.cloud.bigquery import _helpers + + return _helpers._timestamp_query_param_from_json(value, field) + + def test_w_none_nullable(self): + self.assertIsNone(self._call_fut(None, _Field('NULLABLE'))) + + def test_w_timestamp_valid(self): + from google.cloud._helpers import UTC + + samples = [ + ( + '2016-12-20 15:58:27.339328+00:00', + datetime.datetime(2016, 12, 20, 15, 58, 27, 339328, tzinfo=UTC) + ), + ( + '2016-12-20 15:58:27+00:00', + datetime.datetime(2016, 12, 20, 15, 58, 27, tzinfo=UTC) + ), + ( + '2016-12-20T15:58:27.339328+00:00', + datetime.datetime(2016, 12, 20, 15, 58, 27, 339328, tzinfo=UTC) + ), + ( + '2016-12-20T15:58:27+00:00', + datetime.datetime(2016, 12, 20, 15, 58, 27, tzinfo=UTC) + ), + ( + '2016-12-20 15:58:27.339328Z', + datetime.datetime(2016, 12, 20, 15, 58, 27, 339328, tzinfo=UTC) + ), + ( + '2016-12-20 15:58:27Z', + datetime.datetime(2016, 12, 20, 15, 58, 27, tzinfo=UTC) + ), + ( + '2016-12-20T15:58:27.339328Z', + datetime.datetime(2016, 12, 20, 15, 58, 27, 339328, tzinfo=UTC) + ), + ( + '2016-12-20T15:58:27Z', + datetime.datetime(2016, 12, 20, 15, 58, 27, tzinfo=UTC) + ), + ] + for timestamp_str, expected_result in samples: + self.assertEqual( + self._call_fut(timestamp_str, _Field('NULLABLE')), + expected_result) + + def test_w_timestamp_invalid(self): + with self.assertRaises(ValueError): + self._call_fut('definitely-not-a-timestamp', _Field('NULLABLE')) + + class Test_timestamp_from_json(unittest.TestCase): def _call_fut(self, value, field): @@ -1820,6 +1877,44 @@ def test_w_scalar(self): self.assertEqual(parameter.type_, 'INT64') self.assertEqual(parameter.value, 123) + def test_w_scalar_timestamp(self): + from google.cloud.bigquery._helpers import ScalarQueryParameter + from google.cloud._helpers import UTC + + RESOURCE = { + 'name': 'zoned', + 'parameterType': {'type': 'TIMESTAMP'}, + 'parameterValue': {'value': '2012-03-04 05:06:07+00:00'}, + } + + parameter = self._call_fut(RESOURCE) + + self.assertIsInstance(parameter, ScalarQueryParameter) + self.assertEqual(parameter.name, 'zoned') + self.assertEqual(parameter.type_, 'TIMESTAMP') + self.assertEqual( + parameter.value, + datetime.datetime(2012, 3, 4, 5, 6, 7, tzinfo=UTC)) + + def test_w_scalar_timestamp_micros(self): + from google.cloud.bigquery._helpers import ScalarQueryParameter + from google.cloud._helpers import UTC + + RESOURCE = { + 'name': 'zoned', + 'parameterType': {'type': 'TIMESTAMP'}, + 'parameterValue': {'value': '2012-03-04 05:06:07.250000+00:00'}, + } + + parameter = self._call_fut(RESOURCE) + + self.assertIsInstance(parameter, ScalarQueryParameter) + self.assertEqual(parameter.name, 'zoned') + self.assertEqual(parameter.type_, 'TIMESTAMP') + self.assertEqual( + parameter.value, + datetime.datetime(2012, 3, 4, 5, 6, 7, 250000, tzinfo=UTC)) + def test_w_array(self): from google.cloud.bigquery._helpers import ArrayQueryParameter From a716eb75a6d9b7237b6dd6adc6c67fc0fec34772 Mon Sep 17 00:00:00 2001 From: Alix Hamilton Date: Fri, 15 Sep 2017 16:07:48 -0700 Subject: [PATCH 0271/2016] BigQuery: Adds client.get_dataset() and removes dataset.reload() (#3973) * BigQuery: Adds client.get_dataset() and removes dataset.reload() * BigQuery: changes dataset.name to dataset.dataset_id in test * fixes client.get_dataset() docstring and removes unnecessary test variable --- .../google/cloud/bigquery/client.py | 14 ++++++++ .../google/cloud/bigquery/dataset.py | 26 +++++---------- .../google-cloud-bigquery/tests/system.py | 17 +++++----- .../tests/unit/test_client.py | 25 ++++++++++++++ .../tests/unit/test_dataset.py | 33 ------------------- 5 files changed, 57 insertions(+), 58 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py index 5ab8ff820764..f9a393c0bbb6 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py @@ -166,6 +166,20 @@ def dataset(self, dataset_name, project=None): return DatasetReference(project, dataset_name) + def get_dataset(self, dataset_ref): + """Fetch the dataset referenced by ``dataset_ref`` + + :type dataset_ref: + :class:`google.cloud.bigquery.dataset.DatasetReference` + :param dataset_ref: the dataset to use. + + :rtype: :class:`google.cloud.bigquery.dataset.Dataset` + :returns: a ``Dataset`` instance + """ + api_response = self._connection.api_request( + method='GET', path=dataset_ref.path) + return Dataset.from_api_repr(api_response, self) + def _get_query_results(self, job_id, project=None, timeout_ms=None): """Get the query results object for a query job. diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/dataset.py b/packages/google-cloud-bigquery/google/cloud/bigquery/dataset.py index cd31f737e693..8166e4fbec5e 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/dataset.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/dataset.py @@ -127,6 +127,15 @@ def dataset_id(self): """ return self._dataset_id + @property + def path(self): + """URL path for the dataset's APIs. + + :rtype: str + :returns: the path based on project and dataset name. + """ + return '/projects/%s/datasets/%s' % (self.project_id, self.dataset_id) + def table(self, table_id): """Constructs a TableReference. @@ -505,23 +514,6 @@ def exists(self, client=None): else: return True - def reload(self, client=None): - """API call: refresh dataset properties via a GET request. - - See - https://cloud.google.com/bigquery/docs/reference/rest/v2/datasets/get - - :type client: :class:`~google.cloud.bigquery.client.Client` or - ``NoneType`` - :param client: the client to use. If not passed, falls back to the - ``client`` stored on the current dataset. - """ - client = self._require_client(client) - - api_response = client._connection.api_request( - method='GET', path=self.path) - self._set_properties(api_response) - def patch(self, client=None, **kw): """API call: update individual dataset properties via a PATCH request. diff --git a/packages/google-cloud-bigquery/tests/system.py b/packages/google-cloud-bigquery/tests/system.py index 48d66c7be2e0..432dfa18b84d 100644 --- a/packages/google-cloud-bigquery/tests/system.py +++ b/packages/google-cloud-bigquery/tests/system.py @@ -121,19 +121,20 @@ def test_create_dataset(self): self.assertTrue(dataset.exists()) self.assertEqual(dataset.dataset_id, DATASET_ID) - def test_reload_dataset(self): - DATASET_ID = _make_dataset_id('reload_dataset') - dataset = Dataset(DATASET_ID, Config.CLIENT) + def test_get_dataset(self): + DATASET_ID = _make_dataset_id('get_dataset') + client = Config.CLIENT + dataset = Dataset(DATASET_ID, client) dataset.friendly_name = 'Friendly' dataset.description = 'Description' - retry_403(dataset.create)() self.to_delete.append(dataset) + dataset_ref = client.dataset(DATASET_ID) + + got = client.get_dataset(dataset_ref) - other = Dataset(DATASET_ID, Config.CLIENT) - other.reload() - self.assertEqual(other.friendly_name, 'Friendly') - self.assertEqual(other.description, 'Description') + self.assertEqual(got.friendly_name, 'Friendly') + self.assertEqual(got.description, 'Description') def test_patch_dataset(self): dataset = Dataset(_make_dataset_id('patch_dataset'), Config.CLIENT) diff --git a/packages/google-cloud-bigquery/tests/unit/test_client.py b/packages/google-cloud-bigquery/tests/unit/test_client.py index cb2e476e3f99..a011c59fec2a 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_client.py +++ b/packages/google-cloud-bigquery/tests/unit/test_client.py @@ -272,6 +272,31 @@ def test_dataset_with_default_project(self): self.assertEqual(dataset.dataset_id, DATASET) self.assertEqual(dataset.project_id, PROJECT) + def test_get_dataset(self): + project = 'PROJECT' + dataset_id = 'dataset_id' + path = 'projects/%s/datasets/%s' % (project, dataset_id) + creds = _make_credentials() + http = object() + client = self._make_one(project=project, credentials=creds, _http=http) + resource = { + 'id': '%s:%s' % (project, dataset_id), + 'datasetReference': { + 'projectId': project, + 'datasetId': dataset_id, + }, + } + conn = client._connection = _Connection(resource) + dataset_ref = client.dataset(dataset_id) + + dataset = client.get_dataset(dataset_ref) + + self.assertEqual(len(conn._requested), 1) + req = conn._requested[0] + self.assertEqual(req['method'], 'GET') + self.assertEqual(req['path'], '/%s' % path) + self.assertEqual(dataset.dataset_id, dataset_id) + def test_job_from_resource_unknown_type(self): PROJECT = 'PROJECT' creds = _make_credentials() diff --git a/packages/google-cloud-bigquery/tests/unit/test_dataset.py b/packages/google-cloud-bigquery/tests/unit/test_dataset.py index 0689e93b0f20..673fa69731cd 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_dataset.py +++ b/packages/google-cloud-bigquery/tests/unit/test_dataset.py @@ -520,39 +520,6 @@ def test_exists_hit_w_alternate_client(self): self.assertEqual(req['path'], '/%s' % PATH) self.assertEqual(req['query_params'], {'fields': 'id'}) - def test_reload_w_bound_client(self): - PATH = 'projects/%s/datasets/%s' % (self.PROJECT, self.DS_ID) - RESOURCE = self._makeResource() - conn = _Connection(RESOURCE) - client = _Client(project=self.PROJECT, connection=conn) - dataset = self._make_one(self.DS_ID, client=client) - - dataset.reload() - - self.assertEqual(len(conn._requested), 1) - req = conn._requested[0] - self.assertEqual(req['method'], 'GET') - self.assertEqual(req['path'], '/%s' % PATH) - self._verify_resource_properties(dataset, RESOURCE) - - def test_reload_w_alternate_client(self): - PATH = 'projects/%s/datasets/%s' % (self.PROJECT, self.DS_ID) - RESOURCE = self._makeResource() - conn1 = _Connection() - CLIENT1 = _Client(project=self.PROJECT, connection=conn1) - conn2 = _Connection(RESOURCE) - CLIENT2 = _Client(project=self.PROJECT, connection=conn2) - dataset = self._make_one(self.DS_ID, client=CLIENT1) - - dataset.reload(client=CLIENT2) - - self.assertEqual(len(conn1._requested), 0) - self.assertEqual(len(conn2._requested), 1) - req = conn2._requested[0] - self.assertEqual(req['method'], 'GET') - self.assertEqual(req['path'], '/%s' % PATH) - self._verify_resource_properties(dataset, RESOURCE) - def test_patch_w_invalid_expiration(self): RESOURCE = self._makeResource() conn = _Connection(RESOURCE) From 36af46e6f1e73b7515a39843fd6a5237db571576 Mon Sep 17 00:00:00 2001 From: Alix Hamilton Date: Mon, 18 Sep 2017 13:57:15 -0700 Subject: [PATCH 0272/2016] BigQuery: Changes DatasetReference project_id property to project to match Dataset (#3993) --- .../google/cloud/bigquery/dataset.py | 14 +++++++------- .../tests/unit/test_client.py | 4 ++-- .../tests/unit/test_dataset.py | 2 +- 3 files changed, 10 insertions(+), 10 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/dataset.py b/packages/google-cloud-bigquery/google/cloud/bigquery/dataset.py index 8166e4fbec5e..0aba2c0928b6 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/dataset.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/dataset.py @@ -98,25 +98,25 @@ class DatasetReference(object): See https://cloud.google.com/bigquery/docs/reference/rest/v2/datasets - :type project_id: str - :param project_id: the ID of the project + :type project: str + :param project: the ID of the project :type dataset_id: str :param dataset_id: the ID of the dataset """ - def __init__(self, project_id, dataset_id): - self._project_id = project_id + def __init__(self, project, dataset_id): + self._project = project self._dataset_id = dataset_id @property - def project_id(self): + def project(self): """Project ID of the dataset. :rtype: str :returns: the project ID. """ - return self._project_id + return self._project @property def dataset_id(self): @@ -134,7 +134,7 @@ def path(self): :rtype: str :returns: the path based on project and dataset name. """ - return '/projects/%s/datasets/%s' % (self.project_id, self.dataset_id) + return '/projects/%s/datasets/%s' % (self.project, self.dataset_id) def table(self, table_id): """Constructs a TableReference. diff --git a/packages/google-cloud-bigquery/tests/unit/test_client.py b/packages/google-cloud-bigquery/tests/unit/test_client.py index a011c59fec2a..24236a93f497 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_client.py +++ b/packages/google-cloud-bigquery/tests/unit/test_client.py @@ -257,7 +257,7 @@ def test_dataset_with_specified_project(self): dataset = client.dataset(DATASET, PROJECT) self.assertIsInstance(dataset, DatasetReference) self.assertEqual(dataset.dataset_id, DATASET) - self.assertEqual(dataset.project_id, PROJECT) + self.assertEqual(dataset.project, PROJECT) def test_dataset_with_default_project(self): from google.cloud.bigquery.dataset import DatasetReference @@ -270,7 +270,7 @@ def test_dataset_with_default_project(self): dataset = client.dataset(DATASET) self.assertIsInstance(dataset, DatasetReference) self.assertEqual(dataset.dataset_id, DATASET) - self.assertEqual(dataset.project_id, PROJECT) + self.assertEqual(dataset.project, PROJECT) def test_get_dataset(self): project = 'PROJECT' diff --git a/packages/google-cloud-bigquery/tests/unit/test_dataset.py b/packages/google-cloud-bigquery/tests/unit/test_dataset.py index 673fa69731cd..a3ee600565ba 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_dataset.py +++ b/packages/google-cloud-bigquery/tests/unit/test_dataset.py @@ -98,7 +98,7 @@ def _make_one(self, *args, **kw): def test_ctor_defaults(self): dataset_ref = self._make_one('some-project-1', 'dataset_1') - self.assertEqual(dataset_ref.project_id, 'some-project-1') + self.assertEqual(dataset_ref.project, 'some-project-1') self.assertEqual(dataset_ref.dataset_id, 'dataset_1') def test_table(self): From 435af668e98d0be1155436a32cb374b09a6c6162 Mon Sep 17 00:00:00 2001 From: Jonathan Amsterdam Date: Mon, 18 Sep 2017 17:39:35 -0400 Subject: [PATCH 0273/2016] bigquery: add client.create_dataset; remove dataset.create (#3982) * bigquery: add client.create_dataset; remove dataset.create * fix lint * increase coverage to 100% * really fix coverage * fix lint --- .../google/cloud/bigquery/client.py | 31 ++++- .../google/cloud/bigquery/dataset.py | 40 +++---- .../google-cloud-bigquery/tests/system.py | 106 ++++++------------ .../tests/unit/test_client.py | 85 ++++++++++++++ .../tests/unit/test_dataset.py | 103 ----------------- 5 files changed, 166 insertions(+), 199 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py index f9a393c0bbb6..f17f43deaf5c 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py @@ -148,11 +148,11 @@ def list_datasets(self, include_all=False, max_results=None, max_results=max_results, extra_params=extra_params) - def dataset(self, dataset_name, project=None): + def dataset(self, dataset_id, project=None): """Construct a reference to a dataset. - :type dataset_name: str - :param dataset_name: Name of the dataset. + :type dataset_id: str + :param dataset_id: ID of the dataset. :type project: str :param project: (Optional) project ID for the dataset (defaults to @@ -164,7 +164,30 @@ def dataset(self, dataset_name, project=None): if project is None: project = self.project - return DatasetReference(project, dataset_name) + return DatasetReference(project, dataset_id) + + def create_dataset(self, dataset): + """API call: create the dataset via a PUT request. + + See + https://cloud.google.com/bigquery/docs/reference/rest/v2/tables/insert + + :type dataset: :class:`~google.cloud.bigquery.dataset.Dataset` + :param dataset: A ``Dataset`` populated with the desired initial state. + If project is missing, it defaults to the project of + the client. + + :rtype: ":class:`~google.cloud.bigquery.dataset.Dataset`" + :returns: a new ``Dataset`` returned from the service. + """ + if dataset.project is None: + dataset._project = self.project + path = '/projects/%s/datasets' % (dataset.project,) + api_response = self._connection.api_request( + method='POST', path=path, data=dataset._build_resource()) + ds = Dataset(dataset.dataset_id, project=dataset.project, client=self) + ds._set_properties(api_response) + return ds def get_dataset(self, dataset_ref): """Fetch the dataset referenced by ``dataset_ref`` diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/dataset.py b/packages/google-cloud-bigquery/google/cloud/bigquery/dataset.py index 0aba2c0928b6..5a592adc4c29 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/dataset.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/dataset.py @@ -155,8 +155,8 @@ class Dataset(object): :param dataset_id: the ID of the dataset :type client: :class:`google.cloud.bigquery.client.Client` - :param client: A client which holds credentials and project configuration - for the dataset (which requires a project). + :param client: (Optional) A client which holds credentials and project + configuration for the dataset (which requires a project). :type access_entries: list of :class:`AccessEntry` :param access_entries: roles granted to entities for this dataset @@ -168,13 +168,17 @@ class Dataset(object): _access_entries = None - def __init__(self, dataset_id, client, access_entries=(), project=None): - self.dataset_id = dataset_id + def __init__(self, + dataset_id, + client=None, + access_entries=(), + project=None): + self._dataset_id = dataset_id self._client = client self._properties = {} # Let the @property do validation. self.access_entries = access_entries - self._project = project or client.project + self._project = project or (client and client.project) @property def project(self): @@ -229,6 +233,15 @@ def created(self): # creation_time will be in milliseconds. return _datetime_from_microseconds(1000.0 * creation_time) + @property + def dataset_id(self): + """Dataset ID. + + :rtype: str + :returns: the dataset ID. + """ + return self._dataset_id + @property def full_dataset_id(self): """ID for the dataset resource, in the form "project_id:dataset_id". @@ -473,23 +486,6 @@ def _build_resource(self): return resource - def create(self, client=None): - """API call: create the dataset via a PUT request. - - See - https://cloud.google.com/bigquery/docs/reference/rest/v2/tables/insert - - :type client: :class:`~google.cloud.bigquery.client.Client` or - ``NoneType`` - :param client: the client to use. If not passed, falls back to the - ``client`` stored on the current dataset. - """ - client = self._require_client(client) - path = '/projects/%s/datasets' % (self.project,) - api_response = client._connection.api_request( - method='POST', path=path, data=self._build_resource()) - self._set_properties(api_response) - def exists(self, client=None): """API call: test for the existence of the dataset via a GET request diff --git a/packages/google-cloud-bigquery/tests/system.py b/packages/google-cloud-bigquery/tests/system.py index 432dfa18b84d..f91f53c24996 100644 --- a/packages/google-cloud-bigquery/tests/system.py +++ b/packages/google-cloud-bigquery/tests/system.py @@ -112,10 +112,7 @@ def _still_in_use(bad_request): def test_create_dataset(self): DATASET_ID = _make_dataset_id('create_dataset') - dataset = Dataset(DATASET_ID, Config.CLIENT) - self.assertFalse(dataset.exists()) - - retry_403(dataset.create)() + dataset = retry_403(Config.CLIENT.create_dataset)(Dataset(DATASET_ID)) self.to_delete.append(dataset) self.assertTrue(dataset.exists()) @@ -124,10 +121,10 @@ def test_create_dataset(self): def test_get_dataset(self): DATASET_ID = _make_dataset_id('get_dataset') client = Config.CLIENT - dataset = Dataset(DATASET_ID, client) - dataset.friendly_name = 'Friendly' - dataset.description = 'Description' - retry_403(dataset.create)() + dataset_arg = Dataset(DATASET_ID) + dataset_arg.friendly_name = 'Friendly' + dataset_arg.description = 'Description' + dataset = retry_403(client.create_dataset)(dataset_arg) self.to_delete.append(dataset) dataset_ref = client.dataset(DATASET_ID) @@ -137,10 +134,8 @@ def test_get_dataset(self): self.assertEqual(got.description, 'Description') def test_patch_dataset(self): - dataset = Dataset(_make_dataset_id('patch_dataset'), Config.CLIENT) - self.assertFalse(dataset.exists()) - - retry_403(dataset.create)() + dataset = retry_403(Config.CLIENT.create_dataset)( + Dataset(_make_dataset_id('patch_dataset'))) self.to_delete.append(dataset) self.assertTrue(dataset.exists()) @@ -151,10 +146,8 @@ def test_patch_dataset(self): self.assertEqual(dataset.description, 'Description') def test_update_dataset(self): - dataset = Dataset(_make_dataset_id('update_dataset'), Config.CLIENT) - self.assertFalse(dataset.exists()) - - retry_403(dataset.create)() + dataset = retry_403(Config.CLIENT.create_dataset)( + Dataset(_make_dataset_id('update_dataset'))) self.to_delete.append(dataset) self.assertTrue(dataset.exists()) @@ -177,8 +170,8 @@ def test_list_datasets(self): 'newest' + unique_resource_id(), ] for dataset_id in datasets_to_create: - created_dataset = Dataset(dataset_id, Config.CLIENT) - retry_403(created_dataset.create)() + created_dataset = retry_403(Config.CLIENT.create_dataset)( + Dataset(dataset_id)) self.to_delete.append(created_dataset) # Retrieve the datasets. @@ -191,10 +184,8 @@ def test_list_datasets(self): self.assertEqual(len(created), len(datasets_to_create)) def test_create_table(self): - dataset = Dataset(_make_dataset_id('create_table'), Config.CLIENT) - self.assertFalse(dataset.exists()) - - retry_403(dataset.create)() + dataset = retry_403(Config.CLIENT.create_dataset)( + Dataset(_make_dataset_id('create_table'))) self.to_delete.append(dataset) TABLE_NAME = 'test_table' @@ -210,10 +201,7 @@ def test_create_table(self): def test_list_tables(self): DATASET_ID = _make_dataset_id('list_tables') - dataset = Dataset(DATASET_ID, Config.CLIENT) - self.assertFalse(dataset.exists()) - - retry_403(dataset.create)() + dataset = retry_403(Config.CLIENT.create_dataset)(Dataset(DATASET_ID)) self.to_delete.append(dataset) # Retrieve tables before any are created for the dataset. @@ -246,10 +234,8 @@ def test_list_tables(self): self.assertEqual(len(created), len(tables_to_create)) def test_patch_table(self): - dataset = Dataset(_make_dataset_id('patch_table'), Config.CLIENT) - self.assertFalse(dataset.exists()) - - retry_403(dataset.create)() + dataset = retry_403(Config.CLIENT.create_dataset)( + Dataset(_make_dataset_id('patch_table'))) self.to_delete.append(dataset) TABLE_NAME = 'test_table' @@ -268,10 +254,8 @@ def test_patch_table(self): self.assertEqual(table.description, 'Description') def test_update_table(self): - dataset = Dataset(_make_dataset_id('update_table'), Config.CLIENT) - self.assertFalse(dataset.exists()) - - retry_403(dataset.create)() + dataset = retry_403(Config.CLIENT.create_dataset)( + Dataset(_make_dataset_id('update_table'))) self.to_delete.append(dataset) TABLE_NAME = 'test_table' @@ -311,11 +295,9 @@ def test_insert_data_then_dump_table(self): ('Bhettye Rhubble', 27, None), ] ROW_IDS = range(len(ROWS)) - dataset = Dataset( - _make_dataset_id('insert_data_then_dump'), Config.CLIENT) - self.assertFalse(dataset.exists()) - retry_403(dataset.create)() + dataset = retry_403(Config.CLIENT.create_dataset)( + Dataset(_make_dataset_id('insert_data_then_dump'))) self.to_delete.append(dataset) TABLE_NAME = 'test_table' @@ -355,10 +337,8 @@ def test_load_table_from_local_file_then_dump_table(self): ] TABLE_NAME = 'test_table' - dataset = Dataset( - _make_dataset_id('load_local_then_dump'), Config.CLIENT) - - retry_403(dataset.create)() + dataset = retry_403(Config.CLIENT.create_dataset)( + Dataset(_make_dataset_id('load_local_then_dump'))) self.to_delete.append(dataset) full_name = bigquery.SchemaField('full_name', 'STRING', @@ -404,10 +384,8 @@ def test_load_table_from_local_avro_file_then_dump_table(self): ("orange", 590), ("red", 650)] - dataset = Dataset( - _make_dataset_id('load_local_then_dump'), Config.CLIENT) - - retry_403(dataset.create)() + dataset = retry_403(Config.CLIENT.create_dataset)( + Dataset(_make_dataset_id('load_local_then_dump'))) self.to_delete.append(dataset) table = dataset.table(TABLE_NAME) @@ -468,10 +446,8 @@ def test_load_table_from_storage_then_dump_table(self): self.to_delete.insert(0, blob) - dataset = Dataset( - _make_dataset_id('load_gcs_then_dump'), Config.CLIENT) - - retry_403(dataset.create)() + dataset = retry_403(Config.CLIENT.create_dataset)( + Dataset(_make_dataset_id('load_gcs_then_dump'))) self.to_delete.append(dataset) full_name = bigquery.SchemaField('full_name', 'STRING', @@ -538,10 +514,8 @@ def test_load_table_from_storage_w_autodetect_schema(self): self.to_delete.insert(0, blob) - dataset = Dataset( - _make_dataset_id('load_gcs_then_dump'), Config.CLIENT) - - retry_403(dataset.create)() + dataset = retry_403(Config.CLIENT.create_dataset)( + Dataset(_make_dataset_id('load_gcs_then_dump'))) self.to_delete.append(dataset) table = dataset.table(table_name) @@ -576,9 +550,7 @@ def test_job_cancel(self): TABLE_NAME = 'test_table' QUERY = 'SELECT * FROM %s.%s' % (DATASET_ID, TABLE_NAME) - dataset = Dataset(DATASET_ID, Config.CLIENT) - - retry_403(dataset.create)() + dataset = retry_403(Config.CLIENT.create_dataset)(Dataset(DATASET_ID)) self.to_delete.append(dataset) full_name = bigquery.SchemaField('full_name', 'STRING', @@ -764,16 +736,15 @@ def test_dbapi_fetchall(self): rows = Config.CURSOR.fetchall() self.assertEqual(rows, [(1, 2), (3, 4), (5, 6)]) - def _load_table_for_dml(self, rows, dataset_name, table_name): + def _load_table_for_dml(self, rows, dataset_id, table_id): from google.cloud._testing import _NamedTemporaryFile - dataset = Dataset(dataset_name, Config.CLIENT) - retry_403(dataset.create)() + dataset = retry_403(Config.CLIENT.create_dataset)(Dataset(dataset_id)) self.to_delete.append(dataset) greeting = bigquery.SchemaField( 'greeting', 'STRING', mode='NULLABLE') - table = dataset.table(table_name, schema=[greeting]) + table = dataset.table(table_id, schema=[greeting]) table.create() self.to_delete.insert(0, table) @@ -1162,10 +1133,8 @@ def test_insert_nested_nested(self): ('Some value', record) ] table_name = 'test_table' - dataset = Dataset( - _make_dataset_id('issue_2951'), Config.CLIENT) - - retry_403(dataset.create)() + dataset = retry_403(Config.CLIENT.create_dataset)( + Dataset(_make_dataset_id('issue_2951'))) self.to_delete.append(dataset) table = dataset.table(table_name, schema=schema) @@ -1182,11 +1151,8 @@ def test_insert_nested_nested(self): def test_create_table_insert_fetch_nested_schema(self): table_name = 'test_table' - dataset = Dataset( - _make_dataset_id('create_table_nested_schema'), Config.CLIENT) - self.assertFalse(dataset.exists()) - - retry_403(dataset.create)() + dataset = retry_403(Config.CLIENT.create_dataset)( + Dataset(_make_dataset_id('create_table_nested_schema'))) self.to_delete.append(dataset) schema = _load_json_schema() diff --git a/packages/google-cloud-bigquery/tests/unit/test_client.py b/packages/google-cloud-bigquery/tests/unit/test_client.py index 24236a93f497..12282e47d931 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_client.py +++ b/packages/google-cloud-bigquery/tests/unit/test_client.py @@ -297,6 +297,91 @@ def test_get_dataset(self): self.assertEqual(req['path'], '/%s' % path) self.assertEqual(dataset.dataset_id, dataset_id) + def test_create_dataset_minimal(self): + from google.cloud.bigquery.dataset import Dataset + + PROJECT = 'PROJECT' + DS_ID = 'DATASET_ID' + PATH = 'projects/%s/datasets' % PROJECT + RESOURCE = { + 'datasetReference': + {'projectId': PROJECT, 'datasetId': DS_ID}, + 'etag': "etag", + 'id': "%s:%s" % (PROJECT, DS_ID), + } + creds = _make_credentials() + client = self._make_one(project=PROJECT, credentials=creds) + conn = client._connection = _Connection(RESOURCE) + ds = client.create_dataset(Dataset(DS_ID)) + self.assertEqual(len(conn._requested), 1) + req = conn._requested[0] + self.assertEqual(req['method'], 'POST') + self.assertEqual(req['path'], '/%s' % PATH) + SENT = { + 'datasetReference': + {'projectId': PROJECT, 'datasetId': DS_ID}, + } + self.assertEqual(req['data'], SENT) + self.assertEqual(ds.dataset_id, DS_ID) + self.assertEqual(ds.project, PROJECT) + self.assertEqual(ds.etag, RESOURCE['etag']) + self.assertEqual(ds.full_dataset_id, RESOURCE['id']) + + def test_create_dataset_w_attrs(self): + from google.cloud.bigquery.dataset import Dataset, AccessEntry + + PROJECT = 'PROJECT' + DS_ID = 'DATASET_ID' + PATH = 'projects/%s/datasets' % PROJECT + DESCRIPTION = 'DESC' + FRIENDLY_NAME = 'FN' + USER_EMAIL = 'phred@example.com' + VIEW = { + 'projectId': 'my-proj', + 'datasetId': 'starry-skies', + 'tableId': 'northern-hemisphere', + } + RESOURCE = { + 'datasetReference': + {'projectId': PROJECT, 'datasetId': DS_ID}, + 'etag': "etag", + 'id': "%s:%s" % (PROJECT, DS_ID), + 'description': DESCRIPTION, + 'friendlyName': FRIENDLY_NAME, + 'access': [ + {'role': 'OWNER', 'userByEmail': USER_EMAIL}, + {'view': VIEW}], + } + creds = _make_credentials() + client = self._make_one(project=PROJECT, credentials=creds) + conn = client._connection = _Connection(RESOURCE) + entries = [AccessEntry('OWNER', 'userByEmail', USER_EMAIL), + AccessEntry(None, 'view', VIEW)] + ds_arg = Dataset(DS_ID, project=PROJECT, access_entries=entries) + ds_arg.description = DESCRIPTION + ds_arg.friendly_name = FRIENDLY_NAME + ds = client.create_dataset(ds_arg) + self.assertEqual(len(conn._requested), 1) + req = conn._requested[0] + self.assertEqual(req['method'], 'POST') + self.assertEqual(req['path'], '/%s' % PATH) + SENT = { + 'datasetReference': + {'projectId': PROJECT, 'datasetId': DS_ID}, + 'description': DESCRIPTION, + 'friendlyName': FRIENDLY_NAME, + 'access': [ + {'role': 'OWNER', 'userByEmail': USER_EMAIL}, + {'view': VIEW}], + } + self.assertEqual(req['data'], SENT) + self.assertEqual(ds.dataset_id, DS_ID) + self.assertEqual(ds.project, PROJECT) + self.assertEqual(ds.etag, RESOURCE['etag']) + self.assertEqual(ds.full_dataset_id, RESOURCE['id']) + self.assertEqual(ds.description, DESCRIPTION) + self.assertEqual(ds.friendly_name, FRIENDLY_NAME) + def test_job_from_resource_unknown_type(self): PROJECT = 'PROJECT' creds = _make_credentials() diff --git a/packages/google-cloud-bigquery/tests/unit/test_dataset.py b/packages/google-cloud-bigquery/tests/unit/test_dataset.py index a3ee600565ba..8fcc6a87a613 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_dataset.py +++ b/packages/google-cloud-bigquery/tests/unit/test_dataset.py @@ -386,109 +386,6 @@ def test__parse_access_entries_w_extra_keys(self): with self.assertRaises(ValueError): dataset._parse_access_entries(ACCESS) - def test_create_w_bound_client(self): - PATH = 'projects/%s/datasets' % self.PROJECT - RESOURCE = self._makeResource() - conn = _Connection(RESOURCE) - client = _Client(project=self.PROJECT, connection=conn) - dataset = self._make_one(self.DS_ID, client=client) - - dataset.create() - - self.assertEqual(len(conn._requested), 1) - req = conn._requested[0] - self.assertEqual(req['method'], 'POST') - self.assertEqual(req['path'], '/%s' % PATH) - SENT = { - 'datasetReference': - {'projectId': self.PROJECT, 'datasetId': self.DS_ID}, - } - self.assertEqual(req['data'], SENT) - self._verify_resource_properties(dataset, RESOURCE) - - def test_create_w_alternate_client(self): - from google.cloud.bigquery.dataset import AccessEntry - - PATH = 'projects/%s/datasets' % self.PROJECT - USER_EMAIL = 'phred@example.com' - GROUP_EMAIL = 'group-name@lists.example.com' - DESCRIPTION = 'DESCRIPTION' - TITLE = 'TITLE' - RESOURCE = self._makeResource() - RESOURCE['description'] = DESCRIPTION - RESOURCE['friendlyName'] = TITLE - conn1 = _Connection() - CLIENT1 = _Client(project=self.PROJECT, connection=conn1) - conn2 = _Connection(RESOURCE) - CLIENT2 = _Client(project=self.PROJECT, connection=conn2) - dataset = self._make_one(self.DS_ID, client=CLIENT1) - dataset.friendly_name = TITLE - dataset.description = DESCRIPTION - VIEW = { - 'projectId': 'my-proj', - 'datasetId': 'starry-skies', - 'tableId': 'northern-hemisphere', - } - dataset.access_entries = [ - AccessEntry('OWNER', 'userByEmail', USER_EMAIL), - AccessEntry('OWNER', 'groupByEmail', GROUP_EMAIL), - AccessEntry('READER', 'domain', 'foo.com'), - AccessEntry('READER', 'specialGroup', 'projectReaders'), - AccessEntry('WRITER', 'specialGroup', 'projectWriters'), - AccessEntry(None, 'view', VIEW), - ] - - dataset.create(client=CLIENT2) - - self.assertEqual(len(conn1._requested), 0) - self.assertEqual(len(conn2._requested), 1) - req = conn2._requested[0] - self.assertEqual(req['method'], 'POST') - self.assertEqual(req['path'], '/%s' % PATH) - SENT = { - 'datasetReference': { - 'projectId': self.PROJECT, - 'datasetId': self.DS_ID, - }, - 'description': DESCRIPTION, - 'friendlyName': TITLE, - 'access': [ - {'role': 'OWNER', 'userByEmail': USER_EMAIL}, - {'role': 'OWNER', 'groupByEmail': GROUP_EMAIL}, - {'role': 'READER', 'domain': 'foo.com'}, - {'role': 'READER', 'specialGroup': 'projectReaders'}, - {'role': 'WRITER', 'specialGroup': 'projectWriters'}, - {'view': VIEW}, - ], - } - self.assertEqual(req['data'], SENT) - self._verify_resource_properties(dataset, RESOURCE) - - def test_create_w_missing_output_properties(self): - # In the wild, the resource returned from 'dataset.create' sometimes - # lacks 'creationTime' / 'lastModifiedTime' - PATH = 'projects/%s/datasets' % (self.PROJECT,) - RESOURCE = self._makeResource() - del RESOURCE['creationTime'] - del RESOURCE['lastModifiedTime'] - self.WHEN = None - conn = _Connection(RESOURCE) - client = _Client(project=self.PROJECT, connection=conn) - dataset = self._make_one(self.DS_ID, client=client) - - dataset.create() - - self.assertEqual(len(conn._requested), 1) - req = conn._requested[0] - self.assertEqual(req['method'], 'POST') - self.assertEqual(req['path'], '/%s' % PATH) - SENT = { - 'datasetReference': - {'projectId': self.PROJECT, 'datasetId': self.DS_ID}, - } - self.assertEqual(req['data'], SENT) - self._verify_resource_properties(dataset, RESOURCE) - def test_exists_miss_w_bound_client(self): PATH = 'projects/%s/datasets/%s' % (self.PROJECT, self.DS_ID) conn = _Connection() From fcc9d864edadb67eec38dfc5cf576f1bedce80df Mon Sep 17 00:00:00 2001 From: Jonathan Amsterdam Date: Mon, 18 Sep 2017 19:40:31 -0400 Subject: [PATCH 0274/2016] bigquery: remove dataset.exists (#3996) * bigquery: remove dataset.exists Dataset won't be able to support this method when we remove its client. Don't add client.dataset_exists; the user can use client.get_dataset and catch NotFound. * fix lint * fix lint agian * fix more lint --- .../google/cloud/bigquery/dataset.py | 25 ----------- .../google-cloud-bigquery/tests/system.py | 19 ++++++--- .../tests/unit/test_dataset.py | 42 +------------------ 3 files changed, 15 insertions(+), 71 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/dataset.py b/packages/google-cloud-bigquery/google/cloud/bigquery/dataset.py index 5a592adc4c29..dcb52d20eacb 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/dataset.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/dataset.py @@ -17,7 +17,6 @@ from google.api.core import page_iterator from google.cloud._helpers import _datetime_from_microseconds -from google.cloud.exceptions import NotFound from google.cloud.bigquery.table import Table from google.cloud.bigquery.table import TableReference @@ -486,30 +485,6 @@ def _build_resource(self): return resource - def exists(self, client=None): - """API call: test for the existence of the dataset via a GET request - - See - https://cloud.google.com/bigquery/docs/reference/rest/v2/datasets/get - - :type client: :class:`~google.cloud.bigquery.client.Client` or - ``NoneType`` - :param client: the client to use. If not passed, falls back to the - ``client`` stored on the current dataset. - - :rtype: bool - :returns: Boolean indicating existence of the dataset. - """ - client = self._require_client(client) - - try: - client._connection.api_request(method='GET', path=self.path, - query_params={'fields': 'id'}) - except NotFound: - return False - else: - return True - def patch(self, client=None, **kw): """API call: update individual dataset properties via a PATCH request. diff --git a/packages/google-cloud-bigquery/tests/system.py b/packages/google-cloud-bigquery/tests/system.py index f91f53c24996..d40688104aba 100644 --- a/packages/google-cloud-bigquery/tests/system.py +++ b/packages/google-cloud-bigquery/tests/system.py @@ -25,10 +25,10 @@ import six from google.cloud import bigquery -from google.cloud.bigquery.dataset import Dataset +from google.cloud.bigquery.dataset import Dataset, DatasetReference from google.cloud._helpers import UTC from google.cloud.bigquery import dbapi -from google.cloud.exceptions import Forbidden +from google.cloud.exceptions import Forbidden, NotFound from test_utils.retry import RetryErrors from test_utils.retry import RetryInstanceState @@ -91,7 +91,6 @@ def setUp(self): self.to_delete = [] def tearDown(self): - from google.cloud.bigquery.dataset import Dataset from google.cloud.storage import Bucket from google.cloud.exceptions import BadRequest from google.cloud.exceptions import Conflict @@ -115,7 +114,7 @@ def test_create_dataset(self): dataset = retry_403(Config.CLIENT.create_dataset)(Dataset(DATASET_ID)) self.to_delete.append(dataset) - self.assertTrue(dataset.exists()) + self.assertTrue(_dataset_exists(dataset)) self.assertEqual(dataset.dataset_id, DATASET_ID) def test_get_dataset(self): @@ -138,7 +137,7 @@ def test_patch_dataset(self): Dataset(_make_dataset_id('patch_dataset'))) self.to_delete.append(dataset) - self.assertTrue(dataset.exists()) + self.assertTrue(_dataset_exists(dataset)) self.assertIsNone(dataset.friendly_name) self.assertIsNone(dataset.description) dataset.patch(friendly_name='Friendly', description='Description') @@ -150,7 +149,7 @@ def test_update_dataset(self): Dataset(_make_dataset_id('update_dataset'))) self.to_delete.append(dataset) - self.assertTrue(dataset.exists()) + self.assertTrue(_dataset_exists(dataset)) after = [entry for entry in dataset.access_entries if entry.entity_id != 'projectWriters'] dataset.access_entries = after @@ -1214,3 +1213,11 @@ def test_create_table_insert_fetch_nested_schema(self): def _job_done(instance): return instance.state.lower() == 'done' + + +def _dataset_exists(ds): + try: + Config.CLIENT.get_dataset(DatasetReference(ds.project, ds.dataset_id)) + return True + except NotFound: + return False diff --git a/packages/google-cloud-bigquery/tests/unit/test_dataset.py b/packages/google-cloud-bigquery/tests/unit/test_dataset.py index 8fcc6a87a613..4c5cfb57265e 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_dataset.py +++ b/packages/google-cloud-bigquery/tests/unit/test_dataset.py @@ -386,37 +386,6 @@ def test__parse_access_entries_w_extra_keys(self): with self.assertRaises(ValueError): dataset._parse_access_entries(ACCESS) - def test_exists_miss_w_bound_client(self): - PATH = 'projects/%s/datasets/%s' % (self.PROJECT, self.DS_ID) - conn = _Connection() - client = _Client(project=self.PROJECT, connection=conn) - dataset = self._make_one(self.DS_ID, client=client) - - self.assertFalse(dataset.exists()) - - self.assertEqual(len(conn._requested), 1) - req = conn._requested[0] - self.assertEqual(req['method'], 'GET') - self.assertEqual(req['path'], '/%s' % PATH) - self.assertEqual(req['query_params'], {'fields': 'id'}) - - def test_exists_hit_w_alternate_client(self): - PATH = 'projects/%s/datasets/%s' % (self.PROJECT, self.DS_ID) - conn1 = _Connection() - CLIENT1 = _Client(project=self.PROJECT, connection=conn1) - conn2 = _Connection({}) - CLIENT2 = _Client(project=self.PROJECT, connection=conn2) - dataset = self._make_one(self.DS_ID, client=CLIENT1) - - self.assertTrue(dataset.exists(client=CLIENT2)) - - self.assertEqual(len(conn1._requested), 0) - self.assertEqual(len(conn2._requested), 1) - req = conn2._requested[0] - self.assertEqual(req['method'], 'GET') - self.assertEqual(req['path'], '/%s' % PATH) - self.assertEqual(req['query_params'], {'fields': 'id'}) - def test_patch_w_invalid_expiration(self): RESOURCE = self._makeResource() conn = _Connection(RESOURCE) @@ -727,13 +696,6 @@ def __init__(self, *responses): self._requested = [] def api_request(self, **kw): - from google.cloud.exceptions import NotFound - self._requested.append(kw) - - try: - response, self._responses = self._responses[0], self._responses[1:] - except IndexError: - raise NotFound('miss') - else: - return response + response, self._responses = self._responses[0], self._responses[1:] + return response From d8280839be5e683b3df38136393c523f5f06e141 Mon Sep 17 00:00:00 2001 From: Alix Hamilton Date: Tue, 19 Sep 2017 09:59:05 -0700 Subject: [PATCH 0275/2016] BigQuery: Updates Table constructor to use TableReference as parameter (#3997) * wip update Table contructor * BigQuery: Updates Table constructor to use TableReference as parameter * fixes circular import error with Python 2.7 --- .../google/cloud/bigquery/client.py | 2 + .../google/cloud/bigquery/dataset.py | 6 +- .../google/cloud/bigquery/job.py | 13 +- .../google/cloud/bigquery/table.py | 28 +- .../tests/unit/test_job.py | 14 +- .../tests/unit/test_table.py | 426 ++++++++++-------- 6 files changed, 282 insertions(+), 207 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py index f17f43deaf5c..1b9e9a522a15 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py @@ -14,6 +14,8 @@ """Client for interacting with the Google BigQuery API.""" +from __future__ import absolute_import + from google.api.core import page_iterator from google.cloud.client import ClientWithProject from google.cloud.bigquery._http import Connection diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/dataset.py b/packages/google-cloud-bigquery/google/cloud/bigquery/dataset.py index dcb52d20eacb..29dc3af19458 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/dataset.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/dataset.py @@ -13,6 +13,9 @@ # limitations under the License. """Define API Datasets.""" + +from __future__ import absolute_import + import six from google.api.core import page_iterator @@ -598,7 +601,8 @@ def table(self, name, schema=()): :rtype: :class:`google.cloud.bigquery.table.Table` :returns: a new ``Table`` instance """ - return Table(name, dataset=self, schema=schema) + table_ref = TableReference(self, name) + return Table(table_ref, schema=schema, client=self._client) def _item_to_table(iterator, resource): diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/job.py b/packages/google-cloud-bigquery/google/cloud/bigquery/job.py index 76a7d476cf6b..5807fcd25e0b 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/job.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/job.py @@ -26,6 +26,7 @@ from google.cloud.bigquery.dataset import Dataset from google.cloud.bigquery.schema import SchemaField from google.cloud.bigquery.table import Table +from google.cloud.bigquery.table import TableReference from google.cloud.bigquery.table import _build_schema_resource from google.cloud.bigquery.table import _parse_schema_resource from google.cloud.bigquery._helpers import ArrayQueryParameter @@ -837,7 +838,8 @@ def from_api_repr(cls, resource, client): job_id, config = cls._get_resource_config(resource) dest_config = config['destinationTable'] dataset = Dataset(dest_config['datasetId'], client) - destination = Table(dest_config['tableId'], dataset) + table_ref = TableReference(dataset, dest_config['tableId']) + destination = Table(table_ref, client=client) source_urls = config.get('sourceUris', ()) job = cls(job_id, destination, source_urls, client=client) job._set_properties(resource) @@ -952,7 +954,8 @@ def from_api_repr(cls, resource, client): job_id, config = cls._get_resource_config(resource) dest_config = config['destinationTable'] dataset = Dataset(dest_config['datasetId'], client) - destination = Table(dest_config['tableId'], dataset) + table_ref = TableReference(dataset, dest_config['tableId']) + destination = Table(table_ref, client=client) sources = [] source_configs = config.get('sourceTables') if source_configs is None: @@ -963,7 +966,8 @@ def from_api_repr(cls, resource, client): source_configs = [single] for source_config in source_configs: dataset = Dataset(source_config['datasetId'], client) - sources.append(Table(source_config['tableId'], dataset)) + table_ref = TableReference(dataset, source_config['tableId']) + sources.append(Table(table_ref, client=client)) job = cls(job_id, destination, sources, client=client) job._set_properties(resource) return job @@ -1109,7 +1113,8 @@ def from_api_repr(cls, resource, client): job_id, config = cls._get_resource_config(resource) source_config = config['sourceTable'] dataset = Dataset(source_config['datasetId'], client) - source = Table(source_config['tableId'], dataset) + table_ref = TableReference(dataset, source_config['tableId']) + source = Table(table_ref, client=client) destination_uris = config['destinationUris'] job = cls(job_id, source, destination_uris, client=client) job._set_properties(resource) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py index f9c07b1e8ee6..fe1a9d3b4ec5 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py @@ -14,6 +14,8 @@ """Define API Datasets.""" +from __future__ import absolute_import + import datetime import os @@ -90,11 +92,8 @@ class Table(object): See https://cloud.google.com/bigquery/docs/reference/rest/v2/tables - :type table_id: str - :param table_id: the ID of the table - - :type dataset: :class:`google.cloud.bigquery.dataset.Dataset` - :param dataset: The dataset which contains the table. + :type table_ref: :class:`google.cloud.bigquery.table.TableReference` + :param table_ref: a pointer to a table :type schema: list of :class:`SchemaField` :param schema: The table's schema @@ -102,12 +101,13 @@ class Table(object): _schema = None - def __init__(self, table_id, dataset, schema=()): - self._table_id = table_id - self._dataset = dataset + def __init__(self, table_ref, schema=(), client=None): + self._table_id = table_ref.table_id + self._dataset = table_ref.dataset self._properties = {} # Let the @property do validation. self.schema = schema + self._client = client @property def project(self): @@ -477,7 +477,7 @@ def list_partitions(self, client=None): return [row[0] for row in query.rows] @classmethod - def from_api_repr(cls, resource, dataset): + def from_api_repr(cls, resource, client): """Factory: construct a table given its API representation :type resource: dict @@ -489,12 +489,18 @@ def from_api_repr(cls, resource, dataset): :rtype: :class:`google.cloud.bigquery.table.Table` :returns: Table parsed from ``resource``. """ + from google.cloud.bigquery import dataset + if ('tableReference' not in resource or 'tableId' not in resource['tableReference']): raise KeyError('Resource lacks required identity information:' '["tableReference"]["tableId"]') + project_id = resource['tableReference']['projectId'] table_id = resource['tableReference']['tableId'] - table = cls(table_id, dataset=dataset) + dataset_id = resource['tableReference']['datasetId'] + dataset_ref = dataset.DatasetReference(project_id, dataset_id) + + table = cls(dataset_ref.table(table_id), client=client) table._set_properties(resource) return table @@ -510,7 +516,7 @@ def _require_client(self, client): :returns: The client passed in or the currently bound client. """ if client is None: - client = self._dataset._client + client = self._client return client def _set_properties(self, api_response): diff --git a/packages/google-cloud-bigquery/tests/unit/test_job.py b/packages/google-cloud-bigquery/tests/unit/test_job.py index 7c9a84f4503a..11f4dec9870c 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_job.py +++ b/packages/google-cloud-bigquery/tests/unit/test_job.py @@ -2169,6 +2169,7 @@ def test_begin_w_bound_client(self): def test_begin_w_alternate_client(self): from google.cloud.bigquery.dataset import Dataset + from google.cloud.bigquery.dataset import DatasetReference from google.cloud.bigquery.dataset import Table PATH = '/projects/%s/jobs' % (self.PROJECT,) @@ -2203,8 +2204,10 @@ def test_begin_w_alternate_client(self): client2 = _Client(project=self.PROJECT, connection=conn2) job = self._make_one(self.JOB_NAME, self.QUERY, client1) + dataset_ref = DatasetReference(self.PROJECT, DS_ID) dataset = Dataset(DS_ID, client1) - table = Table(TABLE, dataset) + table_ref = dataset_ref.table(TABLE) + table = Table(table_ref, client=client1) job.allow_large_results = True job.create_disposition = 'CREATE_NEVER' @@ -2460,8 +2463,8 @@ def test_exists_hit_w_alternate_client(self): self.assertEqual(req['query_params'], {'fields': 'id'}) def test_reload_w_bound_client(self): - from google.cloud.bigquery.dataset import Dataset - from google.cloud.bigquery.dataset import Table + from google.cloud.bigquery.dataset import DatasetReference + from google.cloud.bigquery.table import Table PATH = '/projects/%s/jobs/%s' % (self.PROJECT, self.JOB_NAME) DS_ID = 'DATASET' @@ -2471,8 +2474,9 @@ def test_reload_w_bound_client(self): client = _Client(project=self.PROJECT, connection=conn) job = self._make_one(self.JOB_NAME, None, client) - dataset = Dataset(DS_ID, client) - table = Table(DEST_TABLE, dataset) + dataset_ref = DatasetReference(self.PROJECT, DS_ID) + table_ref = dataset_ref.table(DEST_TABLE) + table = Table(table_ref, client=client) job.destination = table job.reload() diff --git a/packages/google-cloud-bigquery/tests/unit/test_table.py b/packages/google-cloud-bigquery/tests/unit/test_table.py index f076c6d39938..7cc7bffe7080 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_table.py +++ b/packages/google-cloud-bigquery/tests/unit/test_table.py @@ -21,6 +21,8 @@ from six.moves import http_client import pytest +from google.cloud.bigquery.dataset import DatasetReference + class _SchemaBase(object): @@ -166,8 +168,10 @@ def _verifyResourceProperties(self, table, resource): def test_ctor(self): client = _Client(self.PROJECT) - dataset = _Dataset(client) - table = self._make_one(self.TABLE_NAME, dataset) + dataset = DatasetReference(self.PROJECT, self.DS_ID) + table_ref = dataset.table(self.TABLE_NAME) + table = self._make_one(table_ref, client=client) + self.assertEqual(table.table_id, self.TABLE_NAME) self.assertIs(table._dataset, dataset) self.assertEqual(table.project, self.PROJECT) @@ -198,17 +202,20 @@ def test_ctor_w_schema(self): from google.cloud.bigquery.table import SchemaField client = _Client(self.PROJECT) - dataset = _Dataset(client) + dataset = DatasetReference(self.PROJECT, self.DS_ID) + table_ref = dataset.table(self.TABLE_NAME) full_name = SchemaField('full_name', 'STRING', mode='REQUIRED') age = SchemaField('age', 'INTEGER', mode='REQUIRED') - table = self._make_one(self.TABLE_NAME, dataset, - schema=[full_name, age]) + table = self._make_one(table_ref, schema=[full_name, age], + client=client) + self.assertEqual(table.schema, [full_name, age]) def test_num_bytes_getter(self): client = _Client(self.PROJECT) - dataset = _Dataset(client) - table = self._make_one(self.TABLE_NAME, dataset) + dataset = DatasetReference(self.PROJECT, self.DS_ID) + table_ref = dataset.table(self.TABLE_NAME) + table = self._make_one(table_ref, client=client) # Check with no value set. self.assertIsNone(table.num_bytes) @@ -229,8 +236,9 @@ def test_num_bytes_getter(self): def test_num_rows_getter(self): client = _Client(self.PROJECT) - dataset = _Dataset(client) - table = self._make_one(self.TABLE_NAME, dataset) + dataset = DatasetReference(self.PROJECT, self.DS_ID) + table_ref = dataset.table(self.TABLE_NAME) + table = self._make_one(table_ref, client=client) # Check with no value set. self.assertIsNone(table.num_rows) @@ -251,8 +259,9 @@ def test_num_rows_getter(self): def test_schema_setter_non_list(self): client = _Client(self.PROJECT) - dataset = _Dataset(client) - table = self._make_one(self.TABLE_NAME, dataset) + dataset = DatasetReference(self.PROJECT, self.DS_ID) + table_ref = dataset.table(self.TABLE_NAME) + table = self._make_one(table_ref, client=client) with self.assertRaises(TypeError): table.schema = object() @@ -260,8 +269,9 @@ def test_schema_setter_invalid_field(self): from google.cloud.bigquery.table import SchemaField client = _Client(self.PROJECT) - dataset = _Dataset(client) - table = self._make_one(self.TABLE_NAME, dataset) + dataset = DatasetReference(self.PROJECT, self.DS_ID) + table_ref = dataset.table(self.TABLE_NAME) + table = self._make_one(table_ref, client=client) full_name = SchemaField('full_name', 'STRING', mode='REQUIRED') with self.assertRaises(ValueError): table.schema = [full_name, object()] @@ -270,8 +280,9 @@ def test_schema_setter(self): from google.cloud.bigquery.table import SchemaField client = _Client(self.PROJECT) - dataset = _Dataset(client) - table = self._make_one(self.TABLE_NAME, dataset) + dataset = DatasetReference(self.PROJECT, self.DS_ID) + table_ref = dataset.table(self.TABLE_NAME) + table = self._make_one(table_ref, client=client) full_name = SchemaField('full_name', 'STRING', mode='REQUIRED') age = SchemaField('age', 'INTEGER', mode='REQUIRED') table.schema = [full_name, age] @@ -289,8 +300,9 @@ def test_props_set_by_server(self): URL = 'http://example.com/projects/%s/datasets/%s/tables/%s' % ( self.PROJECT, self.DS_ID, self.TABLE_NAME) client = _Client(self.PROJECT) - dataset = _Dataset(client) - table = self._make_one(self.TABLE_NAME, dataset) + dataset = DatasetReference(self.PROJECT, self.DS_ID) + table_ref = dataset.table(self.TABLE_NAME) + table = self._make_one(table_ref, client=client) table._properties['creationTime'] = _millis(CREATED) table._properties['etag'] = 'ETAG' table._properties['lastModifiedTime'] = _millis(MODIFIED) @@ -311,22 +323,25 @@ def test_props_set_by_server(self): def test_description_setter_bad_value(self): client = _Client(self.PROJECT) - dataset = _Dataset(client) - table = self._make_one(self.TABLE_NAME, dataset) + dataset = DatasetReference(self.PROJECT, self.DS_ID) + table_ref = dataset.table(self.TABLE_NAME) + table = self._make_one(table_ref, client=client) with self.assertRaises(ValueError): table.description = 12345 def test_description_setter(self): client = _Client(self.PROJECT) - dataset = _Dataset(client) - table = self._make_one(self.TABLE_NAME, dataset) + dataset = DatasetReference(self.PROJECT, self.DS_ID) + table_ref = dataset.table(self.TABLE_NAME) + table = self._make_one(table_ref, client=client) table.description = 'DESCRIPTION' self.assertEqual(table.description, 'DESCRIPTION') def test_expires_setter_bad_value(self): client = _Client(self.PROJECT) - dataset = _Dataset(client) - table = self._make_one(self.TABLE_NAME, dataset) + dataset = DatasetReference(self.PROJECT, self.DS_ID) + table_ref = dataset.table(self.TABLE_NAME) + table = self._make_one(table_ref, client=client) with self.assertRaises(ValueError): table.expires = object() @@ -336,72 +351,82 @@ def test_expires_setter(self): WHEN = datetime.datetime(2015, 7, 28, 16, 39, tzinfo=UTC) client = _Client(self.PROJECT) - dataset = _Dataset(client) - table = self._make_one(self.TABLE_NAME, dataset) + dataset = DatasetReference(self.PROJECT, self.DS_ID) + table_ref = dataset.table(self.TABLE_NAME) + table = self._make_one(table_ref, client=client) table.expires = WHEN self.assertEqual(table.expires, WHEN) def test_friendly_name_setter_bad_value(self): client = _Client(self.PROJECT) - dataset = _Dataset(client) - table = self._make_one(self.TABLE_NAME, dataset) + dataset = DatasetReference(self.PROJECT, self.DS_ID) + table_ref = dataset.table(self.TABLE_NAME) + table = self._make_one(table_ref, client=client) with self.assertRaises(ValueError): table.friendly_name = 12345 def test_friendly_name_setter(self): client = _Client(self.PROJECT) - dataset = _Dataset(client) - table = self._make_one(self.TABLE_NAME, dataset) + dataset = DatasetReference(self.PROJECT, self.DS_ID) + table_ref = dataset.table(self.TABLE_NAME) + table = self._make_one(table_ref, client=client) table.friendly_name = 'FRIENDLY' self.assertEqual(table.friendly_name, 'FRIENDLY') def test_location_setter_bad_value(self): client = _Client(self.PROJECT) - dataset = _Dataset(client) - table = self._make_one(self.TABLE_NAME, dataset) + dataset = DatasetReference(self.PROJECT, self.DS_ID) + table_ref = dataset.table(self.TABLE_NAME) + table = self._make_one(table_ref, client=client) with self.assertRaises(ValueError): table.location = 12345 def test_location_setter(self): client = _Client(self.PROJECT) - dataset = _Dataset(client) - table = self._make_one(self.TABLE_NAME, dataset) + dataset = DatasetReference(self.PROJECT, self.DS_ID) + table_ref = dataset.table(self.TABLE_NAME) + table = self._make_one(table_ref, client=client) table.location = 'LOCATION' self.assertEqual(table.location, 'LOCATION') def test_view_query_setter_bad_value(self): client = _Client(self.PROJECT) - dataset = _Dataset(client) - table = self._make_one(self.TABLE_NAME, dataset) + dataset = DatasetReference(self.PROJECT, self.DS_ID) + table_ref = dataset.table(self.TABLE_NAME) + table = self._make_one(table_ref, client=client) with self.assertRaises(ValueError): table.view_query = 12345 def test_view_query_setter(self): client = _Client(self.PROJECT) - dataset = _Dataset(client) - table = self._make_one(self.TABLE_NAME, dataset) + dataset = DatasetReference(self.PROJECT, self.DS_ID) + table_ref = dataset.table(self.TABLE_NAME) + table = self._make_one(table_ref, client=client) table.view_query = 'select * from foo' self.assertEqual(table.view_query, 'select * from foo') def test_view_query_deleter(self): client = _Client(self.PROJECT) - dataset = _Dataset(client) - table = self._make_one(self.TABLE_NAME, dataset) + dataset = DatasetReference(self.PROJECT, self.DS_ID) + table_ref = dataset.table(self.TABLE_NAME) + table = self._make_one(table_ref, client=client) table.view_query = 'select * from foo' del table.view_query self.assertIsNone(table.view_query) def test_view_use_legacy_sql_setter_bad_value(self): client = _Client(self.PROJECT) - dataset = _Dataset(client) - table = self._make_one(self.TABLE_NAME, dataset) + dataset = DatasetReference(self.PROJECT, self.DS_ID) + table_ref = dataset.table(self.TABLE_NAME) + table = self._make_one(table_ref, client=client) with self.assertRaises(ValueError): table.view_use_legacy_sql = 12345 def test_view_use_legacy_sql_setter(self): client = _Client(self.PROJECT) - dataset = _Dataset(client) - table = self._make_one(self.TABLE_NAME, dataset) + dataset = DatasetReference(self.PROJECT, self.DS_ID) + table_ref = dataset.table(self.TABLE_NAME) + table = self._make_one(table_ref, client=client) table.view_use_legacy_sql = False table.view_query = 'select * from foo' self.assertEqual(table.view_use_legacy_sql, False) @@ -410,16 +435,14 @@ def test_view_use_legacy_sql_setter(self): def test_from_api_repr_missing_identity(self): self._setUpConstants() client = _Client(self.PROJECT) - dataset = _Dataset(client) RESOURCE = {} klass = self._get_target_class() with self.assertRaises(KeyError): - klass.from_api_repr(RESOURCE, dataset) + klass.from_api_repr(RESOURCE, client) def test_from_api_repr_bare(self): self._setUpConstants() client = _Client(self.PROJECT) - dataset = _Dataset(client) RESOURCE = { 'id': '%s:%s:%s' % (self.PROJECT, self.DS_ID, self.TABLE_NAME), 'tableReference': { @@ -430,18 +453,17 @@ def test_from_api_repr_bare(self): 'type': 'TABLE', } klass = self._get_target_class() - table = klass.from_api_repr(RESOURCE, dataset) + table = klass.from_api_repr(RESOURCE, client) self.assertEqual(table.table_id, self.TABLE_NAME) - self.assertIs(table._dataset, dataset) + self.assertIs(table._client, client) self._verifyResourceProperties(table, RESOURCE) def test_from_api_repr_w_properties(self): client = _Client(self.PROJECT) - dataset = _Dataset(client) RESOURCE = self._makeResource() klass = self._get_target_class() - table = klass.from_api_repr(RESOURCE, dataset) - self.assertIs(table._dataset._client, client) + table = klass.from_api_repr(RESOURCE, client) + self.assertIs(table._client, client) self._verifyResourceProperties(table, RESOURCE) def test_create_new_day_partitioned_table(self): @@ -449,8 +471,9 @@ def test_create_new_day_partitioned_table(self): RESOURCE = self._makeResource() conn = _Connection(RESOURCE) client = _Client(project=self.PROJECT, connection=conn) - dataset = _Dataset(client) - table = self._make_one(self.TABLE_NAME, dataset) + dataset = DatasetReference(self.PROJECT, self.DS_ID) + table_ref = dataset.table(self.TABLE_NAME) + table = self._make_one(table_ref, client=client) table.partitioning_type = 'DAY' table.create() @@ -475,11 +498,12 @@ def test_create_w_bound_client(self): RESOURCE = self._makeResource() conn = _Connection(RESOURCE) client = _Client(project=self.PROJECT, connection=conn) - dataset = _Dataset(client) + dataset = DatasetReference(self.PROJECT, self.DS_ID) + table_ref = dataset.table(self.TABLE_NAME) full_name = SchemaField('full_name', 'STRING', mode='REQUIRED') age = SchemaField('age', 'INTEGER', mode='REQUIRED') - table = self._make_one(self.TABLE_NAME, dataset, - schema=[full_name, age]) + table = self._make_one(table_ref, schema=[full_name, age], + client=client) table.create() @@ -506,11 +530,12 @@ def test_create_w_partition_no_expire(self): RESOURCE = self._makeResource() conn = _Connection(RESOURCE) client = _Client(project=self.PROJECT, connection=conn) - dataset = _Dataset(client) + dataset = DatasetReference(self.PROJECT, self.DS_ID) + table_ref = dataset.table(self.TABLE_NAME) full_name = SchemaField('full_name', 'STRING', mode='REQUIRED') age = SchemaField('age', 'INTEGER', mode='REQUIRED') - table = self._make_one(self.TABLE_NAME, dataset, - schema=[full_name, age]) + table = self._make_one(table_ref, schema=[full_name, age], + client=client) self.assertIsNone(table.partitioning_type) table.partitioning_type = "DAY" @@ -541,11 +566,12 @@ def test_create_w_partition_and_expire(self): RESOURCE = self._makeResource() conn = _Connection(RESOURCE) client = _Client(project=self.PROJECT, connection=conn) - dataset = _Dataset(client) + dataset = DatasetReference(self.PROJECT, self.DS_ID) + table_ref = dataset.table(self.TABLE_NAME) full_name = SchemaField('full_name', 'STRING', mode='REQUIRED') age = SchemaField('age', 'INTEGER', mode='REQUIRED') - table = self._make_one(self.TABLE_NAME, dataset, - schema=[full_name, age]) + table = self._make_one(table_ref, schema=[full_name, age], + client=client) self.assertIsNone(table.partition_expiration) table.partition_expiration = 100 self.assertEqual(table.partitioning_type, "DAY") @@ -575,11 +601,12 @@ def test_partition_type_setter_bad_type(self): RESOURCE = self._makeResource() conn = _Connection(RESOURCE) client = _Client(project=self.PROJECT, connection=conn) - dataset = _Dataset(client) + dataset = DatasetReference(self.PROJECT, self.DS_ID) + table_ref = dataset.table(self.TABLE_NAME) full_name = SchemaField('full_name', 'STRING', mode='REQUIRED') age = SchemaField('age', 'INTEGER', mode='REQUIRED') - table = self._make_one(self.TABLE_NAME, dataset, - schema=[full_name, age]) + table = self._make_one(table_ref, schema=[full_name, age], + client=client) with self.assertRaises(ValueError): table.partitioning_type = 123 @@ -589,11 +616,12 @@ def test_partition_type_setter_unknown_value(self): RESOURCE = self._makeResource() conn = _Connection(RESOURCE) client = _Client(project=self.PROJECT, connection=conn) - dataset = _Dataset(client) + dataset = DatasetReference(self.PROJECT, self.DS_ID) + table_ref = dataset.table(self.TABLE_NAME) full_name = SchemaField('full_name', 'STRING', mode='REQUIRED') age = SchemaField('age', 'INTEGER', mode='REQUIRED') - table = self._make_one(self.TABLE_NAME, dataset, - schema=[full_name, age]) + table = self._make_one(table_ref, schema=[full_name, age], + client=client) with self.assertRaises(ValueError): table.partitioning_type = "HASH" @@ -603,11 +631,12 @@ def test_partition_type_setter_w_known_value(self): RESOURCE = self._makeResource() conn = _Connection(RESOURCE) client = _Client(project=self.PROJECT, connection=conn) - dataset = _Dataset(client) + dataset = DatasetReference(self.PROJECT, self.DS_ID) + table_ref = dataset.table(self.TABLE_NAME) full_name = SchemaField('full_name', 'STRING', mode='REQUIRED') age = SchemaField('age', 'INTEGER', mode='REQUIRED') - table = self._make_one(self.TABLE_NAME, dataset, - schema=[full_name, age]) + table = self._make_one(table_ref, schema=[full_name, age], + client=client) self.assertIsNone(table.partitioning_type) table.partitioning_type = 'DAY' self.assertEqual(table.partitioning_type, 'DAY') @@ -618,11 +647,12 @@ def test_partition_type_setter_w_none(self): RESOURCE = self._makeResource() conn = _Connection(RESOURCE) client = _Client(project=self.PROJECT, connection=conn) - dataset = _Dataset(client) + dataset = DatasetReference(self.PROJECT, self.DS_ID) + table_ref = dataset.table(self.TABLE_NAME) full_name = SchemaField('full_name', 'STRING', mode='REQUIRED') age = SchemaField('age', 'INTEGER', mode='REQUIRED') - table = self._make_one(self.TABLE_NAME, dataset, - schema=[full_name, age]) + table = self._make_one(table_ref, schema=[full_name, age], + client=client) table._properties['timePartitioning'] = {'type': 'DAY'} table.partitioning_type = None self.assertIsNone(table.partitioning_type) @@ -634,11 +664,12 @@ def test_partition_experation_bad_type(self): RESOURCE = self._makeResource() conn = _Connection(RESOURCE) client = _Client(project=self.PROJECT, connection=conn) - dataset = _Dataset(client) + dataset = DatasetReference(self.PROJECT, self.DS_ID) + table_ref = dataset.table(self.TABLE_NAME) full_name = SchemaField('full_name', 'STRING', mode='REQUIRED') age = SchemaField('age', 'INTEGER', mode='REQUIRED') - table = self._make_one(self.TABLE_NAME, dataset, - schema=[full_name, age]) + table = self._make_one(table_ref, schema=[full_name, age], + client=client) with self.assertRaises(ValueError): table.partition_expiration = "NEVER" @@ -648,11 +679,12 @@ def test_partition_expiration_w_integer(self): RESOURCE = self._makeResource() conn = _Connection(RESOURCE) client = _Client(project=self.PROJECT, connection=conn) - dataset = _Dataset(client) + dataset = DatasetReference(self.PROJECT, self.DS_ID) + table_ref = dataset.table(self.TABLE_NAME) full_name = SchemaField('full_name', 'STRING', mode='REQUIRED') age = SchemaField('age', 'INTEGER', mode='REQUIRED') - table = self._make_one(self.TABLE_NAME, dataset, - schema=[full_name, age]) + table = self._make_one(table_ref, schema=[full_name, age], + client=client) self.assertIsNone(table.partition_expiration) table.partition_expiration = 100 self.assertEqual(table.partitioning_type, "DAY") @@ -664,11 +696,12 @@ def test_partition_expiration_w_none(self): RESOURCE = self._makeResource() conn = _Connection(RESOURCE) client = _Client(project=self.PROJECT, connection=conn) - dataset = _Dataset(client) + dataset = DatasetReference(self.PROJECT, self.DS_ID) + table_ref = dataset.table(self.TABLE_NAME) full_name = SchemaField('full_name', 'STRING', mode='REQUIRED') age = SchemaField('age', 'INTEGER', mode='REQUIRED') - table = self._make_one(self.TABLE_NAME, dataset, - schema=[full_name, age]) + table = self._make_one(table_ref, schema=[full_name, age], + client=client) self.assertIsNone(table.partition_expiration) table._properties['timePartitioning'] = { 'type': 'DAY', @@ -684,11 +717,12 @@ def test_partition_expiration_w_none_no_partition_set(self): RESOURCE = self._makeResource() conn = _Connection(RESOURCE) client = _Client(project=self.PROJECT, connection=conn) - dataset = _Dataset(client) + dataset = DatasetReference(self.PROJECT, self.DS_ID) + table_ref = dataset.table(self.TABLE_NAME) full_name = SchemaField('full_name', 'STRING', mode='REQUIRED') age = SchemaField('age', 'INTEGER', mode='REQUIRED') - table = self._make_one(self.TABLE_NAME, dataset, - schema=[full_name, age]) + table = self._make_one(table_ref, schema=[full_name, age], + client=client) self.assertIsNone(table.partition_expiration) table.partition_expiration = None self.assertIsNone(table.partitioning_type) @@ -700,11 +734,12 @@ def test_list_partitions(self): conn = _Connection() client = _Client(project=self.PROJECT, connection=conn) client._query_results = [(20160804, None), (20160805, None)] - dataset = _Dataset(client) + dataset = DatasetReference(self.PROJECT, self.DS_ID) + table_ref = dataset.table(self.TABLE_NAME) full_name = SchemaField('full_name', 'STRING', mode='REQUIRED') age = SchemaField('age', 'INTEGER', mode='REQUIRED') - table = self._make_one(self.TABLE_NAME, dataset, - schema=[full_name, age]) + table = self._make_one(table_ref, schema=[full_name, age], + client=client) self.assertEqual(table.list_partitions(), [20160804, 20160805]) def test_create_w_alternate_client(self): @@ -729,8 +764,9 @@ def test_create_w_alternate_client(self): client1 = _Client(project=self.PROJECT, connection=conn1) conn2 = _Connection(RESOURCE) client2 = _Client(project=self.PROJECT, connection=conn2) - dataset = _Dataset(client=client1) - table = self._make_one(self.TABLE_NAME, dataset=dataset) + dataset = DatasetReference(self.PROJECT, self.DS_ID) + table_ref = dataset.table(self.TABLE_NAME) + table = self._make_one(table_ref, client=client1) table.friendly_name = TITLE table.description = DESCRIPTION table.view_query = QUERY @@ -766,11 +802,12 @@ def test_create_w_missing_output_properties(self): self.WHEN = None conn = _Connection(RESOURCE) client = _Client(project=self.PROJECT, connection=conn) - dataset = _Dataset(client) + dataset = DatasetReference(self.PROJECT, self.DS_ID) + table_ref = dataset.table(self.TABLE_NAME) full_name = SchemaField('full_name', 'STRING', mode='REQUIRED') age = SchemaField('age', 'INTEGER', mode='REQUIRED') - table = self._make_one(self.TABLE_NAME, dataset, - schema=[full_name, age]) + table = self._make_one(table_ref, schema=[full_name, age], + client=client) table.create() @@ -795,8 +832,9 @@ def test_exists_miss_w_bound_client(self): self.PROJECT, self.DS_ID, self.TABLE_NAME) conn = _Connection() client = _Client(project=self.PROJECT, connection=conn) - dataset = _Dataset(client) - table = self._make_one(self.TABLE_NAME, dataset=dataset) + dataset = DatasetReference(self.PROJECT, self.DS_ID) + table_ref = dataset.table(self.TABLE_NAME) + table = self._make_one(table_ref, client=client) self.assertFalse(table.exists()) @@ -813,8 +851,9 @@ def test_exists_hit_w_alternate_client(self): client1 = _Client(project=self.PROJECT, connection=conn1) conn2 = _Connection({}) client2 = _Client(project=self.PROJECT, connection=conn2) - dataset = _Dataset(client1) - table = self._make_one(self.TABLE_NAME, dataset=dataset) + dataset = DatasetReference(self.PROJECT, self.DS_ID) + table_ref = dataset.table(self.TABLE_NAME) + table = self._make_one(table_ref, client=client1) self.assertTrue(table.exists(client=client2)) @@ -831,8 +870,9 @@ def test_reload_w_bound_client(self): RESOURCE = self._makeResource() conn = _Connection(RESOURCE) client = _Client(project=self.PROJECT, connection=conn) - dataset = _Dataset(client) - table = self._make_one(self.TABLE_NAME, dataset=dataset) + dataset = DatasetReference(self.PROJECT, self.DS_ID) + table_ref = dataset.table(self.TABLE_NAME) + table = self._make_one(table_ref, client=client) table.reload() @@ -850,8 +890,9 @@ def test_reload_w_alternate_client(self): client1 = _Client(project=self.PROJECT, connection=conn1) conn2 = _Connection(RESOURCE) client2 = _Client(project=self.PROJECT, connection=conn2) - dataset = _Dataset(client1) - table = self._make_one(self.TABLE_NAME, dataset=dataset) + dataset = DatasetReference(self.PROJECT, self.DS_ID) + table_ref = dataset.table(self.TABLE_NAME) + table = self._make_one(table_ref, client=client1) table.reload(client=client2) @@ -866,8 +907,9 @@ def test_patch_w_invalid_expiration(self): RESOURCE = self._makeResource() conn = _Connection(RESOURCE) client = _Client(project=self.PROJECT, connection=conn) - dataset = _Dataset(client) - table = self._make_one(self.TABLE_NAME, dataset=dataset) + dataset = DatasetReference(self.PROJECT, self.DS_ID) + table_ref = dataset.table(self.TABLE_NAME) + table = self._make_one(table_ref, client=client) with self.assertRaises(ValueError): table.patch(expires='BOGUS') @@ -882,8 +924,9 @@ def test_patch_w_bound_client(self): RESOURCE['friendlyName'] = TITLE conn = _Connection(RESOURCE) client = _Client(project=self.PROJECT, connection=conn) - dataset = _Dataset(client) - table = self._make_one(self.TABLE_NAME, dataset=dataset) + dataset = DatasetReference(self.PROJECT, self.DS_ID) + table_ref = dataset.table(self.TABLE_NAME) + table = self._make_one(table_ref, client=client) table.patch(description=DESCRIPTION, friendly_name=TITLE, @@ -922,8 +965,9 @@ def test_patch_w_alternate_client(self): client1 = _Client(project=self.PROJECT, connection=conn1) conn2 = _Connection(RESOURCE) client2 = _Client(project=self.PROJECT, connection=conn2) - dataset = _Dataset(client1) - table = self._make_one(self.TABLE_NAME, dataset=dataset) + dataset = DatasetReference(self.PROJECT, self.DS_ID) + table_ref = dataset.table(self.TABLE_NAME) + table = self._make_one(table_ref, client=client1) full_name = SchemaField('full_name', 'STRING', mode='REQUIRED') age = SchemaField('age', 'INTEGER', mode='NULLABLE') @@ -958,8 +1002,9 @@ def test_patch_w_schema_None(self): RESOURCE['friendlyName'] = TITLE conn = _Connection(RESOURCE) client = _Client(project=self.PROJECT, connection=conn) - dataset = _Dataset(client) - table = self._make_one(self.TABLE_NAME, dataset=dataset) + dataset = DatasetReference(self.PROJECT, self.DS_ID) + table_ref = dataset.table(self.TABLE_NAME) + table = self._make_one(table_ref, client=client) table.patch(schema=None) @@ -983,11 +1028,12 @@ def test_update_w_bound_client(self): RESOURCE['friendlyName'] = TITLE conn = _Connection(RESOURCE) client = _Client(project=self.PROJECT, connection=conn) - dataset = _Dataset(client) + dataset = DatasetReference(self.PROJECT, self.DS_ID) + table_ref = dataset.table(self.TABLE_NAME) full_name = SchemaField('full_name', 'STRING', mode='REQUIRED') age = SchemaField('age', 'INTEGER', mode='REQUIRED') - table = self._make_one(self.TABLE_NAME, dataset=dataset, - schema=[full_name, age]) + table = self._make_one(table_ref, schema=[full_name, age], + client=client) table.description = DESCRIPTION table.friendly_name = TITLE @@ -1033,8 +1079,9 @@ def test_update_w_alternate_client(self): client1 = _Client(project=self.PROJECT, connection=conn1) conn2 = _Connection(RESOURCE) client2 = _Client(project=self.PROJECT, connection=conn2) - dataset = _Dataset(client1) - table = self._make_one(self.TABLE_NAME, dataset=dataset) + dataset = DatasetReference(self.PROJECT, self.DS_ID) + table_ref = dataset.table(self.TABLE_NAME) + table = self._make_one(table_ref, client=client1) table.default_table_expiration_ms = DEF_TABLE_EXP table.location = LOCATION table.expires = self.EXP_TIME @@ -1065,8 +1112,9 @@ def test_delete_w_bound_client(self): self.PROJECT, self.DS_ID, self.TABLE_NAME) conn = _Connection({}) client = _Client(project=self.PROJECT, connection=conn) - dataset = _Dataset(client) - table = self._make_one(self.TABLE_NAME, dataset=dataset) + dataset = DatasetReference(self.PROJECT, self.DS_ID) + table_ref = dataset.table(self.TABLE_NAME) + table = self._make_one(table_ref, client=client) table.delete() @@ -1082,8 +1130,9 @@ def test_delete_w_alternate_client(self): client1 = _Client(project=self.PROJECT, connection=conn1) conn2 = _Connection({}) client2 = _Client(project=self.PROJECT, connection=conn2) - dataset = _Dataset(client1) - table = self._make_one(self.TABLE_NAME, dataset=dataset) + dataset = DatasetReference(self.PROJECT, self.DS_ID) + table_ref = dataset.table(self.TABLE_NAME) + table = self._make_one(table_ref, client=client1) table.delete(client=client2) @@ -1097,8 +1146,9 @@ def test_fetch_data_wo_schema(self): from google.cloud.bigquery.table import _TABLE_HAS_NO_SCHEMA client = _Client(project=self.PROJECT) - dataset = _Dataset(client) - table = self._make_one(self.TABLE_NAME, dataset=dataset) + dataset = DatasetReference(self.PROJECT, self.DS_ID) + table_ref = dataset.table(self.TABLE_NAME) + table = self._make_one(table_ref, client=client) with self.assertRaises(ValueError) as exc: table.fetch_data() @@ -1154,12 +1204,13 @@ def _bigquery_timestamp_float_repr(ts_float): conn = _Connection(DATA) client = _Client(project=self.PROJECT, connection=conn) - dataset = _Dataset(client) + dataset = DatasetReference(self.PROJECT, self.DS_ID) + table_ref = dataset.table(self.TABLE_NAME) full_name = SchemaField('full_name', 'STRING', mode='REQUIRED') age = SchemaField('age', 'INTEGER', mode='NULLABLE') joined = SchemaField('joined', 'TIMESTAMP', mode='NULLABLE') - table = self._make_one(self.TABLE_NAME, dataset=dataset, - schema=[full_name, age, joined]) + table = self._make_one(table_ref, schema=[full_name, age, joined], + client=client) iterator = table.fetch_data() page = six.next(iterator.pages) @@ -1220,13 +1271,15 @@ def test_fetch_data_w_alternate_client(self): client1 = _Client(project=self.PROJECT, connection=conn1) conn2 = _Connection(DATA) client2 = _Client(project=self.PROJECT, connection=conn2) - dataset = _Dataset(client1) + dataset = DatasetReference(self.PROJECT, self.DS_ID) + table_ref = dataset.table(self.TABLE_NAME) full_name = SchemaField('full_name', 'STRING', mode='REQUIRED') age = SchemaField('age', 'INTEGER', mode='REQUIRED') voter = SchemaField('voter', 'BOOLEAN', mode='NULLABLE') score = SchemaField('score', 'FLOAT', mode='NULLABLE') - table = self._make_one(self.TABLE_NAME, dataset=dataset, - schema=[full_name, age, voter, score]) + table = self._make_one(table_ref, + schema=[full_name, age, voter, score], + client=client1) iterator = table.fetch_data( client=client2, max_results=MAX, page_token=TOKEN) @@ -1277,14 +1330,15 @@ def test_fetch_data_w_repeated_fields(self): } conn = _Connection(DATA) client = _Client(project=self.PROJECT, connection=conn) - dataset = _Dataset(client) + dataset = DatasetReference(self.PROJECT, self.DS_ID) + table_ref = dataset.table(self.TABLE_NAME) color = SchemaField('color', 'STRING', mode='REPEATED') index = SchemaField('index', 'INTEGER', 'REPEATED') score = SchemaField('score', 'FLOAT', 'REPEATED') struct = SchemaField('struct', 'RECORD', mode='REPEATED', fields=[index, score]) - table = self._make_one(self.TABLE_NAME, dataset=dataset, - schema=[color, struct]) + table = self._make_one(table_ref, schema=[color, struct], + client=client) iterator = table.fetch_data() page = six.next(iterator.pages) @@ -1332,15 +1386,16 @@ def test_fetch_data_w_record_schema(self): } conn = _Connection(DATA) client = _Client(project=self.PROJECT, connection=conn) - dataset = _Dataset(client) + dataset = DatasetReference(self.PROJECT, self.DS_ID) + table_ref = dataset.table(self.TABLE_NAME) full_name = SchemaField('full_name', 'STRING', mode='REQUIRED') area_code = SchemaField('area_code', 'STRING', 'REQUIRED') local_number = SchemaField('local_number', 'STRING', 'REQUIRED') rank = SchemaField('rank', 'INTEGER', 'REQUIRED') phone = SchemaField('phone', 'RECORD', mode='NULLABLE', fields=[area_code, local_number, rank]) - table = self._make_one(self.TABLE_NAME, dataset=dataset, - schema=[full_name, phone]) + table = self._make_one(table_ref, schema=[full_name, phone], + client=client) iterator = table.fetch_data() page = six.next(iterator.pages) @@ -1371,8 +1426,9 @@ def test_row_from_mapping_wo_schema(self): from google.cloud.bigquery.table import _TABLE_HAS_NO_SCHEMA MAPPING = {'full_name': 'Phred Phlyntstone', 'age': 32} client = _Client(project=self.PROJECT) - dataset = _Dataset(client) - table = self._make_one(self.TABLE_NAME, dataset=dataset) + dataset = DatasetReference(self.PROJECT, self.DS_ID) + table_ref = dataset.table(self.TABLE_NAME) + table = self._make_one(table_ref, client=client) with self.assertRaises(ValueError) as exc: table.row_from_mapping(MAPPING) @@ -1388,13 +1444,15 @@ def test_row_from_mapping_w_invalid_schema(self): 'bogus': 'WHATEVER', } client = _Client(project=self.PROJECT) - dataset = _Dataset(client) + dataset = DatasetReference(self.PROJECT, self.DS_ID) + table_ref = dataset.table(self.TABLE_NAME) full_name = SchemaField('full_name', 'STRING', mode='REQUIRED') age = SchemaField('age', 'INTEGER', mode='REQUIRED') colors = SchemaField('colors', 'DATETIME', mode='REPEATED') bogus = SchemaField('joined', 'STRING', mode='BOGUS') - table = self._make_one(self.TABLE_NAME, dataset=dataset, - schema=[full_name, age, colors, bogus]) + table = self._make_one(table_ref, + schema=[full_name, age, colors, bogus], + client=client) with self.assertRaises(ValueError) as exc: table.row_from_mapping(MAPPING) @@ -1410,13 +1468,15 @@ def test_row_from_mapping_w_schema(self): 'extra': 'IGNORED', } client = _Client(project=self.PROJECT) - dataset = _Dataset(client) + dataset = DatasetReference(self.PROJECT, self.DS_ID) + table_ref = dataset.table(self.TABLE_NAME) full_name = SchemaField('full_name', 'STRING', mode='REQUIRED') age = SchemaField('age', 'INTEGER', mode='REQUIRED') colors = SchemaField('colors', 'DATETIME', mode='REPEATED') joined = SchemaField('joined', 'STRING', mode='NULLABLE') - table = self._make_one(self.TABLE_NAME, dataset=dataset, - schema=[full_name, age, colors, joined]) + table = self._make_one(table_ref, + schema=[full_name, age, colors, joined], + client=client) self.assertEqual( table.row_from_mapping(MAPPING), @@ -1426,8 +1486,9 @@ def test_insert_data_wo_schema(self): from google.cloud.bigquery.table import _TABLE_HAS_NO_SCHEMA client = _Client(project=self.PROJECT) - dataset = _Dataset(client) - table = self._make_one(self.TABLE_NAME, dataset=dataset) + dataset = DatasetReference(self.PROJECT, self.DS_ID) + table_ref = dataset.table(self.TABLE_NAME) + table = self._make_one(table_ref, client=client) ROWS = [ ('Phred Phlyntstone', 32), ('Bharney Rhubble', 33), @@ -1454,12 +1515,13 @@ def test_insert_data_w_bound_client(self): self.PROJECT, self.DS_ID, self.TABLE_NAME) conn = _Connection({}) client = _Client(project=self.PROJECT, connection=conn) - dataset = _Dataset(client) + dataset = DatasetReference(self.PROJECT, self.DS_ID) + table_ref = dataset.table(self.TABLE_NAME) full_name = SchemaField('full_name', 'STRING', mode='REQUIRED') age = SchemaField('age', 'INTEGER', mode='REQUIRED') joined = SchemaField('joined', 'TIMESTAMP', mode='NULLABLE') - table = self._make_one(self.TABLE_NAME, dataset=dataset, - schema=[full_name, age, joined]) + table = self._make_one(table_ref, schema=[full_name, age, joined], + client=client) ROWS = [ ('Phred Phlyntstone', 32, _datetime_to_rfc3339(WHEN)), ('Bharney Rhubble', 33, WHEN + datetime.timedelta(seconds=1)), @@ -1507,12 +1569,13 @@ def test_insert_data_w_alternate_client(self): client1 = _Client(project=self.PROJECT, connection=conn1) conn2 = _Connection(RESPONSE) client2 = _Client(project=self.PROJECT, connection=conn2) - dataset = _Dataset(client1) + dataset = DatasetReference(self.PROJECT, self.DS_ID) + table_ref = dataset.table(self.TABLE_NAME) full_name = SchemaField('full_name', 'STRING', mode='REQUIRED') age = SchemaField('age', 'INTEGER', mode='REQUIRED') voter = SchemaField('voter', 'BOOLEAN', mode='NULLABLE') - table = self._make_one(self.TABLE_NAME, dataset=dataset, - schema=[full_name, age, voter]) + table = self._make_one(table_ref, schema=[full_name, age, voter], + client=client1) ROWS = [ ('Phred Phlyntstone', 32, True), ('Bharney Rhubble', 33, False), @@ -1564,14 +1627,15 @@ def test_insert_data_w_repeated_fields(self): self.PROJECT, self.DS_ID, self.TABLE_NAME) conn = _Connection({}) client = _Client(project=self.PROJECT, connection=conn) - dataset = _Dataset(client) + dataset = DatasetReference(self.PROJECT, self.DS_ID) + table_ref = dataset.table(self.TABLE_NAME) full_name = SchemaField('color', 'STRING', mode='REPEATED') index = SchemaField('index', 'INTEGER', 'REPEATED') score = SchemaField('score', 'FLOAT', 'REPEATED') struct = SchemaField('struct', 'RECORD', mode='REPEATED', fields=[index, score]) - table = self._make_one(self.TABLE_NAME, dataset=dataset, - schema=[full_name, struct]) + table = self._make_one(table_ref, schema=[full_name, struct], + client=client) ROWS = [ (['red', 'green'], [{'index': [1, 2], 'score': [3.1415, 1.414]}]), ] @@ -1600,15 +1664,16 @@ def test_insert_data_w_record_schema(self): self.PROJECT, self.DS_ID, self.TABLE_NAME) conn = _Connection({}) client = _Client(project=self.PROJECT, connection=conn) - dataset = _Dataset(client) + dataset = DatasetReference(self.PROJECT, self.DS_ID) + table_ref = dataset.table(self.TABLE_NAME) full_name = SchemaField('full_name', 'STRING', mode='REQUIRED') area_code = SchemaField('area_code', 'STRING', 'REQUIRED') local_number = SchemaField('local_number', 'STRING', 'REQUIRED') rank = SchemaField('rank', 'INTEGER', 'REQUIRED') phone = SchemaField('phone', 'RECORD', mode='NULLABLE', fields=[area_code, local_number, rank]) - table = self._make_one(self.TABLE_NAME, dataset=dataset, - schema=[full_name, phone]) + table = self._make_one(table_ref, schema=[full_name, phone], + client=client) ROWS = [ ('Phred Phlyntstone', {'area_code': '800', 'local_number': '555-1212', @@ -1639,7 +1704,9 @@ def _row_data(row): def test__get_transport(self): client = mock.Mock(spec=[u'_credentials', '_http']) client._http = mock.sentinel.http - table = self._make_one(self.TABLE_NAME, None) + dataset = DatasetReference(self.PROJECT, self.DS_ID) + table_ref = dataset.table(self.TABLE_NAME) + table = self._make_one(table_ref, client=client) transport = table._get_transport(client) @@ -1667,8 +1734,9 @@ def _initiate_resumable_upload_helper(self, num_retries=None): connection = _Connection() client = _Client(self.PROJECT, connection=connection) - dataset = _Dataset(client) - table = self._make_one(self.TABLE_NAME, dataset) + dataset = DatasetReference(self.PROJECT, self.DS_ID) + table_ref = dataset.table(self.TABLE_NAME) + table = self._make_one(table_ref, client=client) # Create mocks to be checked for doing transport. resumable_url = 'http://test.invalid?upload_id=hey-you' @@ -1736,8 +1804,9 @@ def _do_multipart_upload_success_helper( connection = _Connection() client = _Client(self.PROJECT, connection=connection) - dataset = _Dataset(client) - table = self._make_one(self.TABLE_NAME, dataset) + dataset = DatasetReference(self.PROJECT, self.DS_ID) + table_ref = dataset.table(self.TABLE_NAME) + table = self._make_one(table_ref, client=client) # Create mocks to be checked for doing transport. fake_transport = self._mock_transport(http_client.OK, {}) @@ -1808,8 +1877,9 @@ def _make_table(transport=None): client._http = transport client.project = 'project_id' - dataset = dataset.Dataset('test_dataset', client) - table = table.Table('test_table', dataset) + dataset_ref = dataset.DatasetReference('project_id', 'test_dataset') + table_ref = dataset_ref.table('test_table') + table = table.Table(table_ref, client=client) return table @@ -1867,7 +1937,7 @@ def test_upload_from_file_resumable(self): table.upload_from_file(file_obj, source_format='CSV') do_upload.assert_called_once_with( - table._dataset._client, + table._client, file_obj, self.EXPECTED_CONFIGURATION, google.cloud.bigquery.table._DEFAULT_NUM_RETRIES) @@ -1897,7 +1967,7 @@ def test_upload_file_resumable_metadata(self): 'load': { 'sourceFormat': config_args['source_format'], 'destinationTable': { - 'projectId': table._dataset._client.project, + 'projectId': table._dataset.project, 'datasetId': table.dataset_id, 'tableId': table.table_id, }, @@ -1926,7 +1996,7 @@ def test_upload_file_resumable_metadata(self): file_obj, **config_args) do_upload.assert_called_once_with( - table._dataset._client, + table._client, file_obj, expected_config, mock.ANY) @@ -1945,7 +2015,7 @@ def test_upload_from_file_multipart(self): file_obj, source_format='CSV', size=file_obj_size) do_upload.assert_called_once_with( - table._dataset._client, + table._client, file_obj, self.EXPECTED_CONFIGURATION, file_obj_size, @@ -1963,7 +2033,7 @@ def test_upload_from_file_with_retries(self): file_obj, source_format='CSV', num_retries=num_retries) do_upload.assert_called_once_with( - table._dataset._client, + table._client, file_obj, self.EXPECTED_CONFIGURATION, num_retries) @@ -2046,7 +2116,7 @@ def test__do_resumable_upload(self): table = self._make_table(transport) result = table._do_resumable_upload( - table._dataset._client, + table._client, file_obj, self.EXPECTED_CONFIGURATION, None) @@ -2069,7 +2139,7 @@ def test__do_multipart_upload(self): file_obj_len = len(file_obj.getvalue()) table._do_multipart_upload( - table._dataset._client, + table._client, file_obj, self.EXPECTED_CONFIGURATION, file_obj_len, @@ -2100,7 +2170,7 @@ def test__do_multipart_upload_wrong_size(self): with pytest.raises(ValueError): table._do_multipart_upload( - table._dataset._client, + table._client, file_obj, {}, file_obj_len+1, @@ -2308,22 +2378,6 @@ def run(self): self.rows = self.client._query_results -class _Dataset(object): - - def __init__(self, client, dataset_id=TestTable.DS_ID): - self._client = client - self.dataset_id = dataset_id - - @property - def path(self): - return '/projects/%s/datasets/%s' % ( - self._client.project, self.dataset_id) - - @property - def project(self): - return self._client.project - - class _Connection(object): API_BASE_URL = 'http://example.com' From 020197ce8daa1f42fa78d7ced84c454921791f10 Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Tue, 19 Sep 2017 14:02:02 -0700 Subject: [PATCH 0276/2016] BQ: client.extract_table starts extract job (#3991) * BQ: client.extract_table starts extract job Add system tests for extract_table. * BigQuery: client.extract_table use `**kwargs` for Python 2.7. * BQ: extract_table. Use dict.get for kwargs. job_id instead of job_name. --- .../google/cloud/bigquery/__init__.py | 2 + .../google/cloud/bigquery/_helpers.py | 76 +++++++ .../google/cloud/bigquery/client.py | 39 +++- .../google/cloud/bigquery/job.py | 186 ++++++++++++------ .../google-cloud-bigquery/tests/system.py | 101 ++++++++++ .../tests/unit/test__helpers.py | 43 ++++ .../tests/unit/test_client.py | 88 ++++++++- .../tests/unit/test_job.py | 49 +++-- 8 files changed, 484 insertions(+), 100 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/__init__.py b/packages/google-cloud-bigquery/google/cloud/bigquery/__init__.py index 00fa4445b0d0..333854035376 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/__init__.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/__init__.py @@ -32,6 +32,7 @@ from google.cloud.bigquery.client import Client from google.cloud.bigquery.dataset import AccessEntry from google.cloud.bigquery.dataset import Dataset +from google.cloud.bigquery.job import ExtractJobConfig from google.cloud.bigquery.schema import SchemaField from google.cloud.bigquery.table import Table @@ -41,6 +42,7 @@ 'ArrayQueryParameter', 'Client', 'Dataset', + 'ExtractJobConfig', 'ScalarQueryParameter', 'SchemaField', 'StructQueryParameter', diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py b/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py index e3d5c0f2f871..abe7a8934c96 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py @@ -300,6 +300,82 @@ def _time_to_json(value): _SCALAR_VALUE_TO_JSON_PARAM['TIMESTAMP'] = _timestamp_to_json_parameter +class _ApiResourceProperty(object): + """Base property implementation. + + Values will be stored on a `_properties` helper attribute of the + property's job instance. + + :type name: str + :param name: name of the property + + :type resource_name: str + :param resource_name: name of the property in the resource dictionary + """ + + def __init__(self, name, resource_name): + self.name = name + self.resource_name = resource_name + + def __get__(self, instance, owner): + """Descriptor protocol: accessor""" + if instance is None: + return self + return instance._properties.get(self.resource_name) + + def _validate(self, value): + """Subclasses override to impose validation policy.""" + pass + + def __set__(self, instance, value): + """Descriptor protocol: mutator""" + self._validate(value) + instance._properties[self.resource_name] = value + + def __delete__(self, instance): + """Descriptor protocol: deleter""" + del instance._properties[self.resource_name] + + +class _TypedApiResourceProperty(_ApiResourceProperty): + """Property implementation: validates based on value type. + + :type name: str + :param name: name of the property + + :type resource_name: str + :param resource_name: name of the property in the resource dictionary + + :type property_type: type or sequence of types + :param property_type: type to be validated + """ + def __init__(self, name, resource_name, property_type): + super(_TypedApiResourceProperty, self).__init__( + name, resource_name) + self.property_type = property_type + + def _validate(self, value): + """Ensure that 'value' is of the appropriate type. + + :raises: ValueError on a type mismatch. + """ + if value is None: + return + if not isinstance(value, self.property_type): + raise ValueError('Required type: %s' % (self.property_type,)) + + +class _EnumApiResourceProperty(_ApiResourceProperty): + """Pseudo-enumeration class. + + :type name: str + :param name: name of the property. + + :type resource_name: str + :param resource_name: name of the property in the resource dictionary + """ + + class _ConfigurationProperty(object): """Base property implementation. diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py index 1b9e9a522a15..05be0da8123d 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py @@ -16,6 +16,8 @@ from __future__ import absolute_import +import uuid + from google.api.core import page_iterator from google.cloud.client import ClientWithProject from google.cloud.bigquery._http import Connection @@ -385,27 +387,44 @@ def copy_table(self, job_id, destination, *sources): """ return CopyJob(job_id, destination, sources, client=self) - def extract_table_to_storage(self, job_id, source, *destination_uris): - """Construct a job for extracting a table into Cloud Storage files. + def extract_table(self, source, *destination_uris, **kwargs): + """Start a job to extract a table into Cloud Storage files. See https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.extract - :type job_id: str - :param job_id: Name of the job. - - :type source: :class:`google.cloud.bigquery.table.Table` + :type source: :class:`google.cloud.bigquery.table.TableReference` :param source: table to be extracted. :type destination_uris: sequence of string - :param destination_uris: URIs of CloudStorage file(s) into which - table data is to be extracted; in format - ``gs:///``. + :param destination_uris: + URIs of Cloud Storage file(s) into which table data is to be + extracted; in format ``gs:///``. + + :type kwargs: dict + :param kwargs: Additional keyword arguments. + + :Keyword Arguments: + * *job_config* + (:class:`google.cloud.bigquery.job.ExtractJobConfig`) -- + (Optional) Extra configuration options for the extract job. + * *job_id* (``str``) -- + Additional content + (Optional) The ID of the job. :rtype: :class:`google.cloud.bigquery.job.ExtractJob` :returns: a new ``ExtractJob`` instance """ - return ExtractJob(job_id, source, destination_uris, client=self) + job_config = kwargs.get('job_config') + job_id = kwargs.get('job_id') + if job_id is None: + job_id = str(uuid.uuid4()) + + job = ExtractJob( + job_id, source, list(destination_uris), client=self, + job_config=job_config) + job.begin() + return job def run_async_query(self, job_id, query, udf_resources=(), query_parameters=()): diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/job.py b/packages/google-cloud-bigquery/google/cloud/bigquery/job.py index 5807fcd25e0b..cfc861266355 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/job.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/job.py @@ -14,6 +14,7 @@ """Define API Jobs.""" +import copy import threading import six @@ -24,6 +25,7 @@ from google.cloud.exceptions import NotFound from google.cloud._helpers import _datetime_from_microseconds from google.cloud.bigquery.dataset import Dataset +from google.cloud.bigquery.dataset import DatasetReference from google.cloud.bigquery.schema import SchemaField from google.cloud.bigquery.table import Table from google.cloud.bigquery.table import TableReference @@ -35,8 +37,10 @@ from google.cloud.bigquery._helpers import StructQueryParameter from google.cloud.bigquery._helpers import UDFResource from google.cloud.bigquery._helpers import UDFResourcesProperty +from google.cloud.bigquery._helpers import _EnumApiResourceProperty from google.cloud.bigquery._helpers import _EnumProperty from google.cloud.bigquery._helpers import _query_param_from_api_repr +from google.cloud.bigquery._helpers import _TypedApiResourceProperty from google.cloud.bigquery._helpers import _TypedProperty _DONE_STATE = 'DONE' @@ -116,7 +120,7 @@ def __set__(self, instance, value): setattr(instance._configuration, self._backing_name, value) -class Compression(_EnumProperty): +class Compression(_EnumApiResourceProperty): """Pseudo-enum for ``compression`` properties.""" GZIP = 'GZIP' NONE = 'NONE' @@ -128,7 +132,7 @@ class CreateDisposition(_EnumProperty): CREATE_NEVER = 'CREATE_NEVER' -class DestinationFormat(_EnumProperty): +class DestinationFormat(_EnumApiResourceProperty): """Pseudo-enum for ``destination_format`` properties.""" CSV = 'CSV' NEWLINE_DELIMITED_JSON = 'NEWLINE_DELIMITED_JSON' @@ -401,6 +405,7 @@ def begin(self, client=None): client = self._require_client(client) path = '/projects/%s/jobs' % (self.project,) + api_response = client._connection.api_request( method='POST', path=path, data=self._build_resource()) self._set_properties(api_response) @@ -973,62 +978,126 @@ def from_api_repr(cls, resource, client): return job -class _ExtractConfiguration(object): - """User-settable configuration options for extract jobs. +class ExtractJobConfig(object): + """Configuration options for extract jobs. - Values which are ``None`` -> server defaults. + All properties in this class are optional. Values which are ``None`` -> + server defaults. """ - _compression = None - _destination_format = None - _field_delimiter = None - _print_header = None + + def __init__(self): + self._properties = {} + + compression = Compression('compression', 'compression') + """See + https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.extract.compression + """ + + destination_format = DestinationFormat( + 'destination_format', 'destinationFormat') + """See + https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.extract.destinationFormat + """ + + field_delimiter = _TypedApiResourceProperty( + 'field_delimiter', 'fieldDelimiter', six.string_types) + """See + https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.extract.fieldDelimiter + """ + + print_header = _TypedApiResourceProperty( + 'print_header', 'printHeader', bool) + """See + https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.extract.printHeader + """ + + def to_api_repr(self): + """Build an API representation of the extact job config. + + :rtype: dict + :returns: A dictionary in the format used by the BigQuery API. + """ + return copy.deepcopy(self._properties) + + @classmethod + def from_api_repr(cls, resource): + """Factory: construct a job configuration given its API representation + + :type resource: dict + :param resource: + An extract job configuration in the same representation as is + returned from the API. + + :rtype: :class:`google.cloud.bigquery.job.ExtractJobConfig` + :returns: Configuration parsed from ``resource``. + """ + config = cls() + config._properties = copy.deepcopy(resource) + return config class ExtractJob(_AsyncJob): """Asynchronous job: extract data from a table into Cloud Storage. :type job_id: str - :param job_id: the job's ID, within the project belonging to ``client``. + :param job_id: the job's ID - :type source: :class:`google.cloud.bigquery.table.Table` + :type source: :class:`google.cloud.bigquery.table.TableReference` :param source: Table into which data is to be loaded. :type destination_uris: list of string - :param destination_uris: URIs describing Cloud Storage blobs into which - extracted data will be written, in format - ``gs:///``. + :param destination_uris: + URIs describing where the extracted data will be written in Cloud + Storage, using the format ``gs:///``. :type client: :class:`google.cloud.bigquery.client.Client` - :param client: A client which holds credentials and project configuration - for the dataset (which requires a project). + :param client: + A client which holds credentials and project configuration. + + :type job_config: :class:`~google.cloud.bigquery.job.ExtractJobConfig` + :param job_config: + (Optional) Extra configuration options for the extract job. """ _JOB_TYPE = 'extract' - def __init__(self, job_id, source, destination_uris, client): + def __init__( + self, job_id, source, destination_uris, client, job_config=None): super(ExtractJob, self).__init__(job_id, client) + + if job_config is None: + job_config = ExtractJobConfig() + self.source = source self.destination_uris = destination_uris - self._configuration = _ExtractConfiguration() + self._configuration = job_config - compression = Compression('compression') - """See - https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.extract.compression - """ + @property + def compression(self): + """See + :class:`~google.cloud.bigquery.job.ExtractJobConfig.compression`. + """ + return self._configuration.compression - destination_format = DestinationFormat('destination_format') - """See - https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.extract.destinationFormat - """ + @property + def destination_format(self): + """See + :class:`~google.cloud.bigquery.job.ExtractJobConfig.destination_format`. + """ + return self._configuration.destination_format - field_delimiter = _TypedProperty('field_delimiter', six.string_types) - """See - https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.extract.fieldDelimiter - """ + @property + def field_delimiter(self): + """See + :class:`~google.cloud.bigquery.job.ExtractJobConfig.field_delimiter`. + """ + return self._configuration.field_delimiter - print_header = _TypedProperty('print_header', bool) - """See - https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.extract.printHeader - """ + @property + def print_header(self): + """See + :class:`~google.cloud.bigquery.job.ExtractJobConfig.print_header`. + """ + return self._configuration.print_header @property def destination_uri_file_counts(self): @@ -1046,50 +1115,34 @@ def destination_uri_file_counts(self): result = int(result) return result - def _populate_config_resource(self, configuration): - """Helper for _build_resource: copy config properties to resource""" - if self.compression is not None: - configuration['compression'] = self.compression - if self.destination_format is not None: - configuration['destinationFormat'] = self.destination_format - if self.field_delimiter is not None: - configuration['fieldDelimiter'] = self.field_delimiter - if self.print_header is not None: - configuration['printHeader'] = self.print_header - def _build_resource(self): """Generate a resource for :meth:`begin`.""" source_ref = { - 'projectId': self.source.project, - 'datasetId': self.source.dataset_id, + 'projectId': self.source.dataset.project, + 'datasetId': self.source.dataset.dataset_id, 'tableId': self.source.table_id, } + configuration = self._configuration.to_api_repr() + configuration['sourceTable'] = source_ref + configuration['destinationUris'] = self.destination_uris + resource = { 'jobReference': { 'projectId': self.project, 'jobId': self.job_id, }, 'configuration': { - self._JOB_TYPE: { - 'sourceTable': source_ref, - 'destinationUris': self.destination_uris, - }, + self._JOB_TYPE: configuration, }, } - configuration = resource['configuration'][self._JOB_TYPE] - self._populate_config_resource(configuration) return resource def _copy_configuration_properties(self, configuration): """Helper: assign subclass configuration properties in cleaned.""" - self.compression = configuration.get('compression') - self.destination_format = configuration.get('destinationFormat') - self.field_delimiter = configuration.get('fieldDelimiter') - self.print_header = _bool_or_none( - configuration.get('printHeader')) + self._configuration._properties = copy.deepcopy(configuration) @classmethod def from_api_repr(cls, resource, client): @@ -1110,13 +1163,16 @@ def from_api_repr(cls, resource, client): :rtype: :class:`google.cloud.bigquery.job.ExtractJob` :returns: Job parsed from ``resource``. """ - job_id, config = cls._get_resource_config(resource) - source_config = config['sourceTable'] - dataset = Dataset(source_config['datasetId'], client) - table_ref = TableReference(dataset, source_config['tableId']) - source = Table(table_ref, client=client) - destination_uris = config['destinationUris'] - job = cls(job_id, source, destination_uris, client=client) + job_id, config_resource = cls._get_resource_config(resource) + config = ExtractJobConfig.from_api_repr(config_resource) + source_config = config_resource['sourceTable'] + dataset = DatasetReference( + source_config['projectId'], source_config['datasetId']) + source = dataset.table(source_config['tableId']) + destination_uris = config_resource['destinationUris'] + + job = cls( + job_id, source, destination_uris, client=client, job_config=config) job._set_properties(resource) return job diff --git a/packages/google-cloud-bigquery/tests/system.py b/packages/google-cloud-bigquery/tests/system.py index d40688104aba..91ddca876853 100644 --- a/packages/google-cloud-bigquery/tests/system.py +++ b/packages/google-cloud-bigquery/tests/system.py @@ -543,6 +543,107 @@ def test_load_table_from_storage_w_autodetect_schema(self): self.assertEqual( sorted(actual_rows, key=by_age), sorted(rows, key=by_age)) + def _load_table_for_extract_table( + self, storage_client, rows, bucket_name, blob_name, table): + from google.cloud._testing import _NamedTemporaryFile + + local_id = unique_resource_id() + gs_url = 'gs://{}/{}'.format(bucket_name, blob_name) + + # In the **very** rare case the bucket name is reserved, this + # fails with a ConnectionError. + bucket = storage_client.create_bucket(bucket_name) + self.to_delete.append(bucket) + blob = bucket.blob(blob_name) + + with _NamedTemporaryFile() as temp: + with open(temp.name, 'w') as csv_write: + writer = csv.writer(csv_write) + writer.writerow(('Full Name', 'Age')) + writer.writerows(rows) + + with open(temp.name, 'rb') as csv_read: + blob.upload_from_file(csv_read, content_type='text/csv') + self.to_delete.insert(0, blob) + + dataset = retry_403(Config.CLIENT.create_dataset)( + Dataset(table.dataset.dataset_id)) + self.to_delete.append(dataset) + table = dataset.table(table.table_id) + self.to_delete.insert(0, table) + job = Config.CLIENT.load_table_from_storage( + 'bq_extract_storage_test_' + local_id, table, gs_url) + job.autodetect = True + job.begin() + # Allow for 90 seconds of "warm up" before rows visible. See + # https://cloud.google.com/bigquery/streaming-data-into-bigquery#dataavailability + # 8 tries -> 1 + 2 + 4 + 8 + 16 + 32 + 64 = 127 seconds + retry = RetryInstanceState(_job_done, max_tries=8) + retry(job.reload)() + + def test_extract_table(self): + from google.cloud.storage import Client as StorageClient + + storage_client = StorageClient() + local_id = unique_resource_id() + bucket_name = 'bq_extract_test' + local_id + blob_name = 'person_ages.csv' + dataset_id = _make_dataset_id('load_gcs_then_extract') + table_id = 'test_table' + table = Config.CLIENT.dataset(dataset_id).table(table_id) + rows = [ + ('Phred Phlyntstone', 32), + ('Bharney Rhubble', 33), + ('Wylma Phlyntstone', 29), + ('Bhettye Rhubble', 27), + ] + self._load_table_for_extract_table( + storage_client, rows, bucket_name, blob_name, table) + bucket = storage_client.bucket(bucket_name) + destination_blob_name = 'person_ages_out.csv' + destination = bucket.blob(destination_blob_name) + destination_uri = 'gs://{}/person_ages_out.csv'.format(bucket_name) + + job = Config.CLIENT.extract_table(table, destination_uri) + job.result() + + self.to_delete.insert(0, destination) + got = destination.download_as_string().decode('utf-8') + self.assertIn('Bharney Rhubble', got) + + def test_extract_table_w_job_config(self): + from google.cloud.storage import Client as StorageClient + + storage_client = StorageClient() + local_id = unique_resource_id() + bucket_name = 'bq_extract_test' + local_id + blob_name = 'person_ages.csv' + dataset_id = _make_dataset_id('load_gcs_then_extract') + table_id = 'test_table' + table = Config.CLIENT.dataset(dataset_id).table(table_id) + rows = [ + ('Phred Phlyntstone', 32), + ('Bharney Rhubble', 33), + ('Wylma Phlyntstone', 29), + ('Bhettye Rhubble', 27), + ] + self._load_table_for_extract_table( + storage_client, rows, bucket_name, blob_name, table) + bucket = storage_client.bucket(bucket_name) + destination_blob_name = 'person_ages_out.csv' + destination = bucket.blob(destination_blob_name) + destination_uri = 'gs://{}/person_ages_out.csv'.format(bucket_name) + + job_config = bigquery.ExtractJobConfig() + job_config.destination_format = 'NEWLINE_DELIMITED_JSON' + job = Config.CLIENT.extract_table( + table, destination_uri, job_config=job_config) + job.result() + + self.to_delete.insert(0, destination) + got = destination.download_as_string().decode('utf-8') + self.assertIn('"Bharney Rhubble"', got) + def test_job_cancel(self): DATASET_ID = _make_dataset_id('job_cancel') JOB_NAME = 'fetch_' + DATASET_ID diff --git a/packages/google-cloud-bigquery/tests/unit/test__helpers.py b/packages/google-cloud-bigquery/tests/unit/test__helpers.py index 091df8f7355a..3d40f38a5799 100644 --- a/packages/google-cloud-bigquery/tests/unit/test__helpers.py +++ b/packages/google-cloud-bigquery/tests/unit/test__helpers.py @@ -763,6 +763,49 @@ def __init__(self): self.assertIsNone(wrapper._configuration._attr) +class Test_TypedApiResourceProperty(unittest.TestCase): + + @staticmethod + def _get_target_class(): + from google.cloud.bigquery._helpers import _TypedApiResourceProperty + + return _TypedApiResourceProperty + + def _make_one(self, *args, **kw): + return self._get_target_class()(*args, **kw) + + def test_it(self): + + class Wrapper(object): + attr = self._make_one('attr', 'back', int) + + def __init__(self): + self._properties = {} + + self.assertIsNotNone(Wrapper.attr) + + wrapper = Wrapper() + with self.assertRaises(ValueError): + wrapper.attr = 'BOGUS' + + wrapper.attr = 42 + self.assertEqual(wrapper.attr, 42) + self.assertEqual(wrapper._properties['back'], 42) + + wrapper.attr = None + self.assertIsNone(wrapper.attr) + self.assertIsNone(wrapper._properties['back']) + + wrapper.attr = 23 + self.assertEqual(wrapper.attr, 23) + self.assertEqual(wrapper._properties['back'], 23) + + del wrapper.attr + self.assertIsNone(wrapper.attr) + with self.assertRaises(KeyError): + wrapper._properties['back'] + + class Test_TypedProperty(unittest.TestCase): @staticmethod diff --git a/packages/google-cloud-bigquery/tests/unit/test_client.py b/packages/google-cloud-bigquery/tests/unit/test_client.py index 12282e47d931..d49d8ba4391c 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_client.py +++ b/packages/google-cloud-bigquery/tests/unit/test_client.py @@ -15,6 +15,7 @@ import unittest import mock +import six def _make_credentials(): @@ -714,26 +715,105 @@ def test_copy_table(self): self.assertEqual(list(job.sources), [source]) self.assertIs(job.destination, destination) - def test_extract_table_to_storage(self): + def test_extract_table(self): from google.cloud.bigquery.job import ExtractJob PROJECT = 'PROJECT' - JOB = 'job_name' - DATASET = 'dataset_name' + JOB = 'job_id' + DATASET = 'dataset_id' SOURCE = 'source_table' DESTINATION = 'gs://bucket_name/object_name' + RESOURCE = { + 'jobReference': { + 'projectId': PROJECT, + 'jobId': JOB, + }, + 'configuration': { + 'extract': { + 'sourceTable': { + 'projectId': PROJECT, + 'datasetId': DATASET, + 'tableId': SOURCE, + }, + 'destinationUris': [DESTINATION], + }, + }, + } creds = _make_credentials() http = object() client = self._make_one(project=PROJECT, credentials=creds, _http=http) + conn = client._connection = _Connection(RESOURCE) dataset = client.dataset(DATASET) source = dataset.table(SOURCE) - job = client.extract_table_to_storage(JOB, source, DESTINATION) + + job = client.extract_table(source, DESTINATION, job_id=JOB) + + # Check that extract_table actually starts the job. + self.assertEqual(len(conn._requested), 1) + req = conn._requested[0] + self.assertEqual(req['method'], 'POST') + self.assertEqual(req['path'], '/projects/PROJECT/jobs') + + # Check the job resource. self.assertIsInstance(job, ExtractJob) self.assertIs(job._client, client) self.assertEqual(job.job_id, JOB) self.assertEqual(job.source, source) self.assertEqual(list(job.destination_uris), [DESTINATION]) + def test_extract_table_generated_job_id(self): + from google.cloud.bigquery.job import ExtractJob + from google.cloud.bigquery.job import ExtractJobConfig + from google.cloud.bigquery.job import DestinationFormat + + PROJECT = 'PROJECT' + JOB = 'job_id' + DATASET = 'dataset_id' + SOURCE = 'source_table' + DESTINATION = 'gs://bucket_name/object_name' + RESOURCE = { + 'jobReference': { + 'projectId': PROJECT, + 'jobId': JOB, + }, + 'configuration': { + 'extract': { + 'sourceTable': { + 'projectId': PROJECT, + 'datasetId': DATASET, + 'tableId': SOURCE, + }, + 'destinationUris': [DESTINATION], + 'destinationFormat': 'NEWLINE_DELIMITED_JSON', + }, + }, + } + creds = _make_credentials() + http = object() + client = self._make_one(project=PROJECT, credentials=creds, _http=http) + conn = client._connection = _Connection(RESOURCE) + dataset = client.dataset(DATASET) + source = dataset.table(SOURCE) + job_config = ExtractJobConfig() + job_config.destination_format = ( + DestinationFormat.NEWLINE_DELIMITED_JSON) + + job = client.extract_table(source, DESTINATION, job_config=job_config) + + # Check that extract_table actually starts the job. + self.assertEqual(len(conn._requested), 1) + req = conn._requested[0] + self.assertEqual(req['method'], 'POST') + self.assertEqual(req['path'], '/projects/PROJECT/jobs') + self.assertIsInstance( + req['data']['jobReference']['jobId'], six.string_types) + + # Check the job resource. + self.assertIsInstance(job, ExtractJob) + self.assertIs(job._client, client) + self.assertEqual(job.source, source) + self.assertEqual(list(job.destination_uris), [DESTINATION]) + def test_run_async_query_defaults(self): from google.cloud.bigquery.job import QueryJob diff --git a/packages/google-cloud-bigquery/tests/unit/test_job.py b/packages/google-cloud-bigquery/tests/unit/test_job.py index 11f4dec9870c..1da83260f06f 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_job.py +++ b/packages/google-cloud-bigquery/tests/unit/test_job.py @@ -17,6 +17,9 @@ from six.moves import http_client import unittest +from google.cloud.bigquery.job import ExtractJobConfig +from google.cloud.bigquery.dataset import DatasetReference + class Test__bool_or_none(unittest.TestCase): @@ -1217,31 +1220,31 @@ def _verifyResourceProperties(self, job, resource): self.assertEqual(job.destination_uris, config['destinationUris']) table_ref = config['sourceTable'] - self.assertEqual(job.source.project, table_ref['projectId']) - self.assertEqual(job.source.dataset_id, table_ref['datasetId']) + self.assertEqual(job.source.dataset.project, table_ref['projectId']) + self.assertEqual(job.source.dataset.dataset_id, table_ref['datasetId']) self.assertEqual(job.source.table_id, table_ref['tableId']) if 'compression' in config: - self.assertEqual(job.compression, - config['compression']) + self.assertEqual( + job.compression, config['compression']) else: self.assertIsNone(job.compression) if 'destinationFormat' in config: - self.assertEqual(job.destination_format, - config['destinationFormat']) + self.assertEqual( + job.destination_format, config['destinationFormat']) else: self.assertIsNone(job.destination_format) if 'fieldDelimiter' in config: - self.assertEqual(job.field_delimiter, - config['fieldDelimiter']) + self.assertEqual( + job.field_delimiter, config['fieldDelimiter']) else: self.assertIsNone(job.field_delimiter) if 'printHeader' in config: - self.assertEqual(job.print_header, - config['printHeader']) + self.assertEqual( + job.print_header, config['printHeader']) else: self.assertIsNone(job.print_header) @@ -1260,7 +1263,7 @@ def test_ctor(self): self._verifyInitialReadonlyProperties(job) - # set/read from resource['configuration']['copy'] + # set/read from resource['configuration']['extract'] self.assertIsNone(job.compression) self.assertIsNone(job.destination_format) self.assertIsNone(job.field_delimiter) @@ -1350,7 +1353,8 @@ def test_begin_w_bound_client(self): del RESOURCE['user_email'] conn = _Connection(RESOURCE) client = _Client(project=self.PROJECT, connection=conn) - source = _Table(self.SOURCE_TABLE) + source_dataset = DatasetReference(self.PROJECT, self.DS_ID) + source = source_dataset.table(self.SOURCE_TABLE) job = self._make_one(self.JOB_NAME, source, [self.DESTINATION_URI], client) @@ -1399,14 +1403,15 @@ def test_begin_w_alternate_client(self): client1 = _Client(project=self.PROJECT, connection=conn1) conn2 = _Connection(RESOURCE) client2 = _Client(project=self.PROJECT, connection=conn2) - source = _Table(self.SOURCE_TABLE) + source_dataset = DatasetReference(self.PROJECT, self.DS_ID) + source = source_dataset.table(self.SOURCE_TABLE) + job_config = ExtractJobConfig() + job_config.compression = 'GZIP' + job_config.destination_format = 'NEWLINE_DELIMITED_JSON' + job_config.field_delimiter = '|' + job_config.print_header = False job = self._make_one(self.JOB_NAME, source, [self.DESTINATION_URI], - client1) - - job.compression = 'GZIP' - job.destination_format = 'NEWLINE_DELIMITED_JSON' - job.field_delimiter = '|' - job.print_header = False + client1, job_config) job.begin(client=client2) @@ -1467,7 +1472,8 @@ def test_reload_w_bound_client(self): RESOURCE = self._makeResource() conn = _Connection(RESOURCE) client = _Client(project=self.PROJECT, connection=conn) - source = _Table(self.SOURCE_TABLE) + source_dataset = DatasetReference(self.PROJECT, self.DS_ID) + source = source_dataset.table(self.SOURCE_TABLE) job = self._make_one(self.JOB_NAME, source, [self.DESTINATION_URI], client) @@ -1486,7 +1492,8 @@ def test_reload_w_alternate_client(self): client1 = _Client(project=self.PROJECT, connection=conn1) conn2 = _Connection(RESOURCE) client2 = _Client(project=self.PROJECT, connection=conn2) - source = _Table(self.SOURCE_TABLE) + source_dataset = DatasetReference(self.PROJECT, self.DS_ID) + source = source_dataset.table(self.SOURCE_TABLE) job = self._make_one(self.JOB_NAME, source, [self.DESTINATION_URI], client1) From 14d9d56f037ac84719028220110830e8f1e2c2d0 Mon Sep 17 00:00:00 2001 From: Alix Hamilton Date: Wed, 20 Sep 2017 09:37:55 -0700 Subject: [PATCH 0277/2016] BigQuery: Adds client.get_table() and removes table.reload() (#4004) * WIP adds client.get_table() * BigQuery: Adds client.get_table() and removes table.reload() * removes unnecessary variable * adds system test for client.get_table() --- .../google/cloud/bigquery/client.py | 15 +++++++ .../google/cloud/bigquery/table.py | 30 ++++++-------- .../google-cloud-bigquery/tests/system.py | 25 +++++++++--- .../tests/unit/test_client.py | 28 +++++++++++++ .../tests/unit/test_table.py | 39 ------------------- 5 files changed, 73 insertions(+), 64 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py index 05be0da8123d..9c0bce5eba7e 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py @@ -23,6 +23,7 @@ from google.cloud.bigquery._http import Connection from google.cloud.bigquery.dataset import Dataset from google.cloud.bigquery.dataset import DatasetReference +from google.cloud.bigquery.dataset import Table from google.cloud.bigquery.job import CopyJob from google.cloud.bigquery.job import ExtractJob from google.cloud.bigquery.job import LoadJob @@ -207,6 +208,20 @@ def get_dataset(self, dataset_ref): method='GET', path=dataset_ref.path) return Dataset.from_api_repr(api_response, self) + def get_table(self, table_ref): + """Fetch the table referenced by ``table_ref`` + + :type table_ref: + :class:`google.cloud.bigquery.table.TableReference` + :param table_ref: the table to use. + + :rtype: :class:`google.cloud.bigquery.table.Table` + :returns: a ``Table`` instance + """ + api_response = self._connection.api_request( + method='GET', path=table_ref.path) + return Table.from_api_repr(api_response, self) + def _get_query_results(self, job_id, project=None, timeout_ms=None): """Get the query results object for a query job. diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py index fe1a9d3b4ec5..08ca377dc102 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py @@ -35,7 +35,7 @@ from google.cloud.bigquery._helpers import _SCALAR_VALUE_TO_JSON_ROW -_TABLE_HAS_NO_SCHEMA = "Table has no schema: call 'table.reload()'" +_TABLE_HAS_NO_SCHEMA = "Table has no schema: call 'client.get_table()'" _MARKER = object() _DEFAULT_CHUNKSIZE = 1048576 # 1024 * 1024 B = 1 MB _BASE_UPLOAD_TEMPLATE = ( @@ -85,6 +85,15 @@ def table_id(self): """ return self._table_id + @property + def path(self): + """URL path for the table's APIs. + + :rtype: str + :returns: the path based on project, dataset and table IDs. + """ + return '%s/tables/%s' % (self._dataset_ref.path, self._table_id) + class Table(object): """Tables represent a set of rows whose values correspond to a schema. @@ -616,23 +625,6 @@ def exists(self, client=None): else: return True - def reload(self, client=None): - """API call: refresh table properties via a GET request - - See - https://cloud.google.com/bigquery/docs/reference/rest/v2/tables/get - - :type client: :class:`~google.cloud.bigquery.client.Client` or - ``NoneType`` - :param client: the client to use. If not passed, falls back to the - ``client`` stored on the current dataset. - """ - client = self._require_client(client) - - api_response = client._connection.api_request( - method='GET', path=self.path) - self._set_properties(api_response) - def patch(self, client=None, friendly_name=_MARKER, @@ -750,7 +742,7 @@ def fetch_data(self, max_results=None, page_token=None, client=None): up-to-date with the schema as defined on the back-end: if the two schemas are not identical, the values returned may be incomplete. To ensure that the local copy of the schema is - up-to-date, call :meth:`reload`. + up-to-date, call ``client.get_table``. :type max_results: int :param max_results: (Optional) Maximum number of rows to return. diff --git a/packages/google-cloud-bigquery/tests/system.py b/packages/google-cloud-bigquery/tests/system.py index 91ddca876853..f04f99703dbc 100644 --- a/packages/google-cloud-bigquery/tests/system.py +++ b/packages/google-cloud-bigquery/tests/system.py @@ -198,6 +198,21 @@ def test_create_table(self): self.assertTrue(table.exists()) self.assertEqual(table.table_id, TABLE_NAME) + def test_get_table_w_public_dataset(self): + PUBLIC = 'bigquery-public-data' + DATASET_ID = 'samples' + TABLE_ID = 'shakespeare' + table_ref = DatasetReference(PUBLIC, DATASET_ID).table(TABLE_ID) + + table = Config.CLIENT.get_table(table_ref) + + self.assertEqual(table.table_id, TABLE_ID) + self.assertEqual(table.dataset_id, DATASET_ID) + self.assertEqual(table.project, PUBLIC) + schema_names = [field.name for field in table.schema] + self.assertEqual( + schema_names, ['word', 'word_count', 'corpus', 'corpus_date']) + def test_list_tables(self): DATASET_ID = _make_dataset_id('list_tables') dataset = retry_403(Config.CLIENT.create_dataset)(Dataset(DATASET_ID)) @@ -402,8 +417,7 @@ def test_load_table_from_local_avro_file_then_dump_table(self): self.assertEqual(job.output_rows, len(ROWS)) - # Reload table to get the schema before fetching the rows. - table.reload() + table = Config.CLIENT.get_table(table) rows = self._fetch_single_page(table) by_wavelength = operator.itemgetter(1) self.assertEqual(sorted(rows, key=by_wavelength), @@ -532,7 +546,7 @@ def test_load_table_from_storage_w_autodetect_schema(self): retry = RetryInstanceState(_job_done, max_tries=8) retry(job.reload)() - table.reload() + table = Config.CLIENT.get_table(table) field_name = SchemaField( u'Full_Name', u'string', u'NULLABLE', None, ()) field_age = SchemaField(u'Age', u'integer', u'NULLABLE', None, ()) @@ -1181,9 +1195,8 @@ def test_dump_table_w_public_data(self): TABLE_NAME = 'natality' dataset = Dataset(DATASET_ID, Config.CLIENT, project=PUBLIC) - table = dataset.table(TABLE_NAME) - # Reload table to get the schema before fetching the rows. - table.reload() + table_ref = dataset.table(TABLE_NAME) + table = Config.CLIENT.get_table(table_ref) self._fetch_single_page(table) def test_large_query_w_public_data(self): diff --git a/packages/google-cloud-bigquery/tests/unit/test_client.py b/packages/google-cloud-bigquery/tests/unit/test_client.py index d49d8ba4391c..90b06af2d022 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_client.py +++ b/packages/google-cloud-bigquery/tests/unit/test_client.py @@ -383,6 +383,34 @@ def test_create_dataset_w_attrs(self): self.assertEqual(ds.description, DESCRIPTION) self.assertEqual(ds.friendly_name, FRIENDLY_NAME) + def test_get_table(self): + project = 'PROJECT' + dataset_id = 'dataset_id' + table_id = 'table-id' + path = 'projects/%s/datasets/%s/tables/%s' % ( + project, dataset_id, table_id) + creds = _make_credentials() + http = object() + client = self._make_one(project=project, credentials=creds, _http=http) + resource = { + 'id': '%s:%s:%s' % (project, dataset_id, table_id), + 'tableReference': { + 'projectId': project, + 'datasetId': dataset_id, + 'tableId': table_id + }, + } + conn = client._connection = _Connection(resource) + table_ref = client.dataset(dataset_id).table(table_id) + + table = client.get_table(table_ref) + + self.assertEqual(len(conn._requested), 1) + req = conn._requested[0] + self.assertEqual(req['method'], 'GET') + self.assertEqual(req['path'], '/%s' % path) + self.assertEqual(table.table_id, table_id) + def test_job_from_resource_unknown_type(self): PROJECT = 'PROJECT' creds = _make_credentials() diff --git a/packages/google-cloud-bigquery/tests/unit/test_table.py b/packages/google-cloud-bigquery/tests/unit/test_table.py index 7cc7bffe7080..a36a4b216c04 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_table.py +++ b/packages/google-cloud-bigquery/tests/unit/test_table.py @@ -864,45 +864,6 @@ def test_exists_hit_w_alternate_client(self): self.assertEqual(req['path'], '/%s' % PATH) self.assertEqual(req['query_params'], {'fields': 'id'}) - def test_reload_w_bound_client(self): - PATH = 'projects/%s/datasets/%s/tables/%s' % ( - self.PROJECT, self.DS_ID, self.TABLE_NAME) - RESOURCE = self._makeResource() - conn = _Connection(RESOURCE) - client = _Client(project=self.PROJECT, connection=conn) - dataset = DatasetReference(self.PROJECT, self.DS_ID) - table_ref = dataset.table(self.TABLE_NAME) - table = self._make_one(table_ref, client=client) - - table.reload() - - self.assertEqual(len(conn._requested), 1) - req = conn._requested[0] - self.assertEqual(req['method'], 'GET') - self.assertEqual(req['path'], '/%s' % PATH) - self._verifyResourceProperties(table, RESOURCE) - - def test_reload_w_alternate_client(self): - PATH = 'projects/%s/datasets/%s/tables/%s' % ( - self.PROJECT, self.DS_ID, self.TABLE_NAME) - RESOURCE = self._makeResource() - conn1 = _Connection() - client1 = _Client(project=self.PROJECT, connection=conn1) - conn2 = _Connection(RESOURCE) - client2 = _Client(project=self.PROJECT, connection=conn2) - dataset = DatasetReference(self.PROJECT, self.DS_ID) - table_ref = dataset.table(self.TABLE_NAME) - table = self._make_one(table_ref, client=client1) - - table.reload(client=client2) - - self.assertEqual(len(conn1._requested), 0) - self.assertEqual(len(conn2._requested), 1) - req = conn2._requested[0] - self.assertEqual(req['method'], 'GET') - self.assertEqual(req['path'], '/%s' % PATH) - self._verifyResourceProperties(table, RESOURCE) - def test_patch_w_invalid_expiration(self): RESOURCE = self._makeResource() conn = _Connection(RESOURCE) From b6901151f5204da3a26f6a0794d588c5c7716bfa Mon Sep 17 00:00:00 2001 From: Jonathan Amsterdam Date: Wed, 20 Sep 2017 13:54:36 -0400 Subject: [PATCH 0278/2016] bigquery: add Client.update_dataset (#4003) * bigquery: add Client.update_dataset Remove Dataset.patch and Dataset.update. * improve cover * more coverage * update system tests * more coverage * add creds to client * small changes * . * convert Python field name to API field name --- .../google/cloud/bigquery/client.py | 50 ++++++- .../google/cloud/bigquery/dataset.py | 55 -------- .../google-cloud-bigquery/tests/system.py | 31 ++--- .../tests/unit/test_client.py | 69 ++++++++++ .../tests/unit/test_dataset.py | 122 +----------------- 5 files changed, 126 insertions(+), 201 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py index 9c0bce5eba7e..1f5620f66ca8 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py @@ -190,9 +190,7 @@ def create_dataset(self, dataset): path = '/projects/%s/datasets' % (dataset.project,) api_response = self._connection.api_request( method='POST', path=path, data=dataset._build_resource()) - ds = Dataset(dataset.dataset_id, project=dataset.project, client=self) - ds._set_properties(api_response) - return ds + return Dataset.from_api_repr(api_response, self) def get_dataset(self, dataset_ref): """Fetch the dataset referenced by ``dataset_ref`` @@ -222,6 +220,52 @@ def get_table(self, table_ref): method='GET', path=table_ref.path) return Table.from_api_repr(api_response, self) + def update_dataset(self, dataset, fields): + """Change some fields of a dataset. + + Use ``fields`` to specify which fields to update. At least one field + must be provided. If a field is listed in ``fields`` and is ``None`` in + ``dataset``, it will be deleted. + + If ``dataset.etag`` is not ``None``, the update will only + succeed if the dataset on the server has the same ETag. Thus + reading a dataset with ``get_dataset``, changing its fields, + and then passing it ``update_dataset`` will ensure that the changes + will only be saved if no modifications to the dataset occurred + since the read. + + :type dataset: :class:`google.cloud.bigquery.dataset.Dataset` + :param dataset: the dataset to update. + + :type fields: sequence of string + :param fields: the fields of ``dataset`` to change, spelled as the + Dataset properties (e.g. "friendly_name"). + + :rtype: :class:`google.cloud.bigquery.dataset.Dataset` + :returns: the modified ``Dataset`` instance + :raises: ValueError for fields that cannot be updated. + + """ + if dataset.project is None: + dataset._project = self.project + path = '/projects/%s/datasets/%s' % (dataset.project, + dataset.dataset_id) + partial = {} + for f in fields: + if not hasattr(dataset, f): + raise ValueError('No Dataset field %s' % f) + # snake case to camel case + words = f.split('_') + api_field = words[0] + ''.join(map(str.capitalize, words[1:])) + partial[api_field] = getattr(dataset, f) + if dataset.etag is not None: + headers = {'If-Match': dataset.etag} + else: + headers = None + api_response = self._connection.api_request( + method='PATCH', path=path, data=partial, headers=headers) + return Dataset.from_api_repr(api_response, self) + def _get_query_results(self, job_id, project=None, timeout_ms=None): """Get the query results object for a query job. diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/dataset.py b/packages/google-cloud-bigquery/google/cloud/bigquery/dataset.py index 29dc3af19458..dabd0a129ec1 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/dataset.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/dataset.py @@ -488,61 +488,6 @@ def _build_resource(self): return resource - def patch(self, client=None, **kw): - """API call: update individual dataset properties via a PATCH request. - - See - https://cloud.google.com/bigquery/docs/reference/rest/v2/datasets/patch - - :type client: :class:`~google.cloud.bigquery.client.Client` or - ``NoneType`` - :param client: the client to use. If not passed, falls back to the - ``client`` stored on the current dataset. - - :type kw: ``dict`` - :param kw: properties to be patched. - - :raises: ValueError for invalid value types. - """ - client = self._require_client(client) - - partial = {} - - if 'default_table_expiration_ms' in kw: - value = kw['default_table_expiration_ms'] - if not isinstance(value, six.integer_types) and value is not None: - raise ValueError("Pass an integer, or None") - partial['defaultTableExpirationMs'] = value - - if 'description' in kw: - partial['description'] = kw['description'] - - if 'friendly_name' in kw: - partial['friendlyName'] = kw['friendly_name'] - - if 'location' in kw: - partial['location'] = kw['location'] - - api_response = client._connection.api_request( - method='PATCH', path=self.path, data=partial) - self._set_properties(api_response) - - def update(self, client=None): - """API call: update dataset properties via a PUT request. - - See - https://cloud.google.com/bigquery/docs/reference/rest/v2/datasets/update - - :type client: :class:`~google.cloud.bigquery.client.Client` or - ``NoneType`` - :param client: the client to use. If not passed, falls back to the - ``client`` stored on the current dataset. - """ - client = self._require_client(client) - api_response = client._connection.api_request( - method='PUT', path=self.path, data=self._build_resource()) - self._set_properties(api_response) - def delete(self, client=None): """API call: delete the dataset via a DELETE request. diff --git a/packages/google-cloud-bigquery/tests/system.py b/packages/google-cloud-bigquery/tests/system.py index f04f99703dbc..badde5e412a5 100644 --- a/packages/google-cloud-bigquery/tests/system.py +++ b/packages/google-cloud-bigquery/tests/system.py @@ -132,7 +132,7 @@ def test_get_dataset(self): self.assertEqual(got.friendly_name, 'Friendly') self.assertEqual(got.description, 'Description') - def test_patch_dataset(self): + def test_update_dataset(self): dataset = retry_403(Config.CLIENT.create_dataset)( Dataset(_make_dataset_id('patch_dataset'))) self.to_delete.append(dataset) @@ -140,27 +140,14 @@ def test_patch_dataset(self): self.assertTrue(_dataset_exists(dataset)) self.assertIsNone(dataset.friendly_name) self.assertIsNone(dataset.description) - dataset.patch(friendly_name='Friendly', description='Description') - self.assertEqual(dataset.friendly_name, 'Friendly') - self.assertEqual(dataset.description, 'Description') - - def test_update_dataset(self): - dataset = retry_403(Config.CLIENT.create_dataset)( - Dataset(_make_dataset_id('update_dataset'))) - self.to_delete.append(dataset) - - self.assertTrue(_dataset_exists(dataset)) - after = [entry for entry in dataset.access_entries - if entry.entity_id != 'projectWriters'] - dataset.access_entries = after - - retry_403(dataset.update)() - - self.assertEqual(len(dataset.access_entries), len(after)) - for found, expected in zip(dataset.access_entries, after): - self.assertEqual(found.role, expected.role) - self.assertEqual(found.entity_type, expected.entity_type) - self.assertEqual(found.entity_id, expected.entity_id) + dataset.friendly_name = 'Friendly' + dataset.description = 'Description' + ds2 = Config.CLIENT.update_dataset(dataset, + ['friendly_name', 'description']) + self.assertEqual(ds2.friendly_name, 'Friendly') + self.assertEqual(ds2.description, 'Description') + + # TODO(jba): test that read-modify-write with ETag works. def test_list_datasets(self): datasets_to_create = [ diff --git a/packages/google-cloud-bigquery/tests/unit/test_client.py b/packages/google-cloud-bigquery/tests/unit/test_client.py index 90b06af2d022..173f059374da 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_client.py +++ b/packages/google-cloud-bigquery/tests/unit/test_client.py @@ -336,6 +336,7 @@ def test_create_dataset_w_attrs(self): PATH = 'projects/%s/datasets' % PROJECT DESCRIPTION = 'DESC' FRIENDLY_NAME = 'FN' + LOCATION = 'US' USER_EMAIL = 'phred@example.com' VIEW = { 'projectId': 'my-proj', @@ -349,6 +350,8 @@ def test_create_dataset_w_attrs(self): 'id': "%s:%s" % (PROJECT, DS_ID), 'description': DESCRIPTION, 'friendlyName': FRIENDLY_NAME, + 'location': LOCATION, + 'defaultTableExpirationMs': 3600, 'access': [ {'role': 'OWNER', 'userByEmail': USER_EMAIL}, {'view': VIEW}], @@ -361,6 +364,8 @@ def test_create_dataset_w_attrs(self): ds_arg = Dataset(DS_ID, project=PROJECT, access_entries=entries) ds_arg.description = DESCRIPTION ds_arg.friendly_name = FRIENDLY_NAME + ds_arg.default_table_expiration_ms = 3600 + ds_arg.location = LOCATION ds = client.create_dataset(ds_arg) self.assertEqual(len(conn._requested), 1) req = conn._requested[0] @@ -371,6 +376,8 @@ def test_create_dataset_w_attrs(self): {'projectId': PROJECT, 'datasetId': DS_ID}, 'description': DESCRIPTION, 'friendlyName': FRIENDLY_NAME, + 'location': LOCATION, + 'defaultTableExpirationMs': 3600, 'access': [ {'role': 'OWNER', 'userByEmail': USER_EMAIL}, {'view': VIEW}], @@ -382,6 +389,8 @@ def test_create_dataset_w_attrs(self): self.assertEqual(ds.full_dataset_id, RESOURCE['id']) self.assertEqual(ds.description, DESCRIPTION) self.assertEqual(ds.friendly_name, FRIENDLY_NAME) + self.assertEqual(ds.location, LOCATION) + self.assertEqual(ds.default_table_expiration_ms, 3600) def test_get_table(self): project = 'PROJECT' @@ -411,6 +420,66 @@ def test_get_table(self): self.assertEqual(req['path'], '/%s' % path) self.assertEqual(table.table_id, table_id) + def test_update_dataset_w_invalid_field(self): + from google.cloud.bigquery.dataset import Dataset + + PROJECT = 'PROJECT' + DS_ID = 'DATASET_ID' + creds = _make_credentials() + client = self._make_one(project=PROJECT, credentials=creds) + with self.assertRaises(ValueError): + client.update_dataset(Dataset(DS_ID), ["foo"]) + + def test_update_dataset(self): + from google.cloud.bigquery.dataset import Dataset + + PROJECT = 'PROJECT' + DS_ID = 'DATASET_ID' + PATH = 'projects/%s/datasets/%s' % (PROJECT, DS_ID) + DESCRIPTION = 'DESCRIPTION' + FRIENDLY_NAME = 'TITLE' + LOCATION = 'loc' + EXP = 17 + RESOURCE = { + 'datasetReference': + {'projectId': PROJECT, 'datasetId': DS_ID}, + 'etag': "etag", + 'description': DESCRIPTION, + 'friendlyName': FRIENDLY_NAME, + 'location': LOCATION, + 'defaultTableExpirationMs': EXP, + } + creds = _make_credentials() + client = self._make_one(project=PROJECT, credentials=creds) + conn = client._connection = _Connection(RESOURCE, RESOURCE) + ds = Dataset(DS_ID, project=PROJECT) + ds.description = DESCRIPTION + ds.friendly_name = FRIENDLY_NAME + ds.location = LOCATION + ds.default_table_expiration_ms = EXP + ds2 = client.update_dataset( + ds, ['description', 'friendly_name', 'location']) + self.assertEqual(len(conn._requested), 1) + req = conn._requested[0] + self.assertEqual(req['method'], 'PATCH') + SENT = { + 'description': DESCRIPTION, + 'friendlyName': FRIENDLY_NAME, + 'location': LOCATION, + } + self.assertEqual(req['data'], SENT) + self.assertEqual(req['path'], '/' + PATH) + self.assertIsNone(req['headers']) + self.assertEqual(ds2.description, ds.description) + self.assertEqual(ds2.friendly_name, ds.friendly_name) + self.assertEqual(ds2.location, ds.location) + + # ETag becomes If-Match header. + ds._properties['etag'] = 'etag' + client.update_dataset(ds, []) + req = conn._requested[1] + self.assertEqual(req['headers']['If-Match'], 'etag') + def test_job_from_resource_unknown_type(self): PROJECT = 'PROJECT' creds = _make_credentials() diff --git a/packages/google-cloud-bigquery/tests/unit/test_dataset.py b/packages/google-cloud-bigquery/tests/unit/test_dataset.py index 4c5cfb57265e..3f2a809955fb 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_dataset.py +++ b/packages/google-cloud-bigquery/tests/unit/test_dataset.py @@ -145,6 +145,7 @@ def _makeResource(self): 'lastModifiedTime': self.WHEN_TS * 1000, 'location': 'US', 'selfLink': self.RESOURCE_URL, + 'defaultTableExpirationMs': 3600, 'access': [ {'role': 'OWNER', 'userByEmail': USER_EMAIL}, {'role': 'OWNER', 'groupByEmail': GROUP_EMAIL}, @@ -386,127 +387,6 @@ def test__parse_access_entries_w_extra_keys(self): with self.assertRaises(ValueError): dataset._parse_access_entries(ACCESS) - def test_patch_w_invalid_expiration(self): - RESOURCE = self._makeResource() - conn = _Connection(RESOURCE) - client = _Client(project=self.PROJECT, connection=conn) - dataset = self._make_one(self.DS_ID, client=client) - - with self.assertRaises(ValueError): - dataset.patch(default_table_expiration_ms='BOGUS') - - def test_patch_w_bound_client(self): - PATH = 'projects/%s/datasets/%s' % (self.PROJECT, self.DS_ID) - DESCRIPTION = 'DESCRIPTION' - TITLE = 'TITLE' - RESOURCE = self._makeResource() - RESOURCE['description'] = DESCRIPTION - RESOURCE['friendlyName'] = TITLE - conn = _Connection(RESOURCE) - client = _Client(project=self.PROJECT, connection=conn) - dataset = self._make_one(self.DS_ID, client=client) - - dataset.patch(description=DESCRIPTION, friendly_name=TITLE) - - self.assertEqual(len(conn._requested), 1) - req = conn._requested[0] - self.assertEqual(req['method'], 'PATCH') - SENT = { - 'description': DESCRIPTION, - 'friendlyName': TITLE, - } - self.assertEqual(req['data'], SENT) - self.assertEqual(req['path'], '/%s' % PATH) - self._verify_resource_properties(dataset, RESOURCE) - - def test_patch_w_alternate_client(self): - PATH = 'projects/%s/datasets/%s' % (self.PROJECT, self.DS_ID) - DEF_TABLE_EXP = 12345 - LOCATION = 'EU' - RESOURCE = self._makeResource() - RESOURCE['defaultTableExpirationMs'] = str(DEF_TABLE_EXP) - RESOURCE['location'] = LOCATION - conn1 = _Connection() - CLIENT1 = _Client(project=self.PROJECT, connection=conn1) - conn2 = _Connection(RESOURCE) - CLIENT2 = _Client(project=self.PROJECT, connection=conn2) - dataset = self._make_one(self.DS_ID, client=CLIENT1) - - dataset.patch(client=CLIENT2, - default_table_expiration_ms=DEF_TABLE_EXP, - location=LOCATION) - - self.assertEqual(len(conn1._requested), 0) - self.assertEqual(len(conn2._requested), 1) - req = conn2._requested[0] - self.assertEqual(req['method'], 'PATCH') - self.assertEqual(req['path'], '/%s' % PATH) - SENT = { - 'defaultTableExpirationMs': DEF_TABLE_EXP, - 'location': LOCATION, - } - self.assertEqual(req['data'], SENT) - self._verify_resource_properties(dataset, RESOURCE) - - def test_update_w_bound_client(self): - PATH = 'projects/%s/datasets/%s' % (self.PROJECT, self.DS_ID) - DESCRIPTION = 'DESCRIPTION' - TITLE = 'TITLE' - RESOURCE = self._makeResource() - RESOURCE['description'] = DESCRIPTION - RESOURCE['friendlyName'] = TITLE - conn = _Connection(RESOURCE) - client = _Client(project=self.PROJECT, connection=conn) - dataset = self._make_one(self.DS_ID, client=client) - dataset.description = DESCRIPTION - dataset.friendly_name = TITLE - - dataset.update() - - self.assertEqual(len(conn._requested), 1) - req = conn._requested[0] - self.assertEqual(req['method'], 'PUT') - SENT = { - 'datasetReference': - {'projectId': self.PROJECT, 'datasetId': self.DS_ID}, - 'description': DESCRIPTION, - 'friendlyName': TITLE, - } - self.assertEqual(req['data'], SENT) - self.assertEqual(req['path'], '/%s' % PATH) - self._verify_resource_properties(dataset, RESOURCE) - - def test_update_w_alternate_client(self): - PATH = 'projects/%s/datasets/%s' % (self.PROJECT, self.DS_ID) - DEF_TABLE_EXP = 12345 - LOCATION = 'EU' - RESOURCE = self._makeResource() - RESOURCE['defaultTableExpirationMs'] = 12345 - RESOURCE['location'] = LOCATION - conn1 = _Connection() - CLIENT1 = _Client(project=self.PROJECT, connection=conn1) - conn2 = _Connection(RESOURCE) - CLIENT2 = _Client(project=self.PROJECT, connection=conn2) - dataset = self._make_one(self.DS_ID, client=CLIENT1) - dataset.default_table_expiration_ms = DEF_TABLE_EXP - dataset.location = LOCATION - - dataset.update(client=CLIENT2) - - self.assertEqual(len(conn1._requested), 0) - self.assertEqual(len(conn2._requested), 1) - req = conn2._requested[0] - self.assertEqual(req['method'], 'PUT') - self.assertEqual(req['path'], '/%s' % PATH) - SENT = { - 'datasetReference': - {'projectId': self.PROJECT, 'datasetId': self.DS_ID}, - 'defaultTableExpirationMs': 12345, - 'location': 'EU', - } - self.assertEqual(req['data'], SENT) - self._verify_resource_properties(dataset, RESOURCE) - def test_delete_w_bound_client(self): PATH = 'projects/%s/datasets/%s' % (self.PROJECT, self.DS_ID) conn = _Connection({}) From 2080e302cbca79ec1a8985f753f5a882bc8b509a Mon Sep 17 00:00:00 2001 From: Alix Hamilton Date: Wed, 20 Sep 2017 11:18:25 -0700 Subject: [PATCH 0279/2016] BigQuery: Remove dataset property from TableReference and add project/dataset_id properties (#4011) * adds dataset_id and project properties to TableReference * Remove dataset property from Table and TableReference --- .../google/cloud/bigquery/job.py | 4 +- .../google/cloud/bigquery/table.py | 57 ++++++++++++------- .../google-cloud-bigquery/tests/system.py | 2 +- .../tests/unit/test_dataset.py | 9 ++- .../tests/unit/test_job.py | 14 ++--- .../tests/unit/test_table.py | 22 ++++--- 6 files changed, 64 insertions(+), 44 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/job.py b/packages/google-cloud-bigquery/google/cloud/bigquery/job.py index cfc861266355..025b839bc59c 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/job.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/job.py @@ -1119,8 +1119,8 @@ def _build_resource(self): """Generate a resource for :meth:`begin`.""" source_ref = { - 'projectId': self.source.dataset.project, - 'datasetId': self.source.dataset.dataset_id, + 'projectId': self.source.project, + 'datasetId': self.source.dataset_id, 'tableId': self.source.table_id, } diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py index 08ca377dc102..37e5bddbd7aa 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py @@ -64,17 +64,27 @@ class TableReference(object): """ def __init__(self, dataset_ref, table_id): - self._dataset_ref = dataset_ref + self._project = dataset_ref.project + self._dataset_id = dataset_ref.dataset_id self._table_id = table_id @property - def dataset(self): - """Pointer to the dataset. + def project(self): + """Project bound to the table. - :rtype: :class:`google.cloud.bigquery.dataset.DatasetReference` - :returns: a pointer to the dataset. + :rtype: str + :returns: the project (derived from the dataset reference). """ - return self._dataset_ref + return self._project + + @property + def dataset_id(self): + """ID of dataset containing the table. + + :rtype: str + :returns: the ID (derived from the dataset reference). + """ + return self._dataset_id @property def table_id(self): @@ -92,7 +102,8 @@ def path(self): :rtype: str :returns: the path based on project, dataset and table IDs. """ - return '%s/tables/%s' % (self._dataset_ref.path, self._table_id) + return '/projects/%s/datasets/%s/tables/%s' % ( + self._project, self._dataset_id, self._table_id) class Table(object): @@ -111,8 +122,9 @@ class Table(object): _schema = None def __init__(self, table_ref, schema=(), client=None): + self._project = table_ref.project self._table_id = table_ref.table_id - self._dataset = table_ref.dataset + self._dataset_id = table_ref.dataset_id self._properties = {} # Let the @property do validation. self.schema = schema @@ -125,7 +137,7 @@ def project(self): :rtype: str :returns: the project (derived from the dataset). """ - return self._dataset.project + return self._project @property def dataset_id(self): @@ -134,7 +146,7 @@ def dataset_id(self): :rtype: str :returns: the ID (derived from the dataset). """ - return self._dataset.dataset_id + return self._dataset_id @property def table_id(self): @@ -152,7 +164,8 @@ def path(self): :rtype: str :returns: the path based on project, dataset and table IDs. """ - return '%s/tables/%s' % (self._dataset.path, self.table_id) + return '/projects/%s/datasets/%s/tables/%s' % ( + self._project, self._dataset_id, self._table_id) @property def schema(self): @@ -550,8 +563,8 @@ def _build_resource(self): """Generate a resource for ``create`` or ``update``.""" resource = { 'tableReference': { - 'projectId': self._dataset.project, - 'datasetId': self._dataset.dataset_id, + 'projectId': self._project, + 'datasetId': self._dataset_id, 'tableId': self.table_id}, } if self.description is not None: @@ -596,7 +609,7 @@ def create(self, client=None): """ client = self._require_client(client) path = '/projects/%s/datasets/%s/tables' % ( - self._dataset.project, self._dataset.dataset_id) + self._project, self._dataset_id) api_response = client._connection.api_request( method='POST', path=path, data=self._build_resource()) self._set_properties(api_response) @@ -1188,7 +1201,8 @@ def upload_from_file(self, _maybe_rewind(file_obj, rewind=rewind) _check_mode(file_obj) metadata = _get_upload_metadata( - source_format, self._schema, self._dataset, self.table_id) + source_format, self._schema, self._project, + self._dataset_id, self.table_id) _configure_job_metadata(metadata, allow_jagged_rows, allow_quoted_newlines, create_disposition, encoding, field_delimiter, @@ -1353,7 +1367,7 @@ def _get_upload_headers(user_agent): } -def _get_upload_metadata(source_format, schema, dataset, table_id): +def _get_upload_metadata(source_format, schema, project, dataset_id, table_id): """Get base metadata for creating a table. :type source_format: str @@ -1363,8 +1377,11 @@ def _get_upload_metadata(source_format, schema, dataset, table_id): :type schema: list :param schema: List of :class:`SchemaField` associated with a table. - :type dataset: :class:`~google.cloud.bigquery.dataset.Dataset` - :param dataset: A dataset which contains a table. + :type project: str + :param table_id: The project bound to the table. + + :type dataset_id: str + :param table_id: The dataset_id of the dataset. :type table_id: str :param table_id: The table_id of the table. @@ -1375,8 +1392,8 @@ def _get_upload_metadata(source_format, schema, dataset, table_id): load_config = { 'sourceFormat': source_format, 'destinationTable': { - 'projectId': dataset.project, - 'datasetId': dataset.dataset_id, + 'projectId': project, + 'datasetId': dataset_id, 'tableId': table_id, }, } diff --git a/packages/google-cloud-bigquery/tests/system.py b/packages/google-cloud-bigquery/tests/system.py index badde5e412a5..1e9a723631a3 100644 --- a/packages/google-cloud-bigquery/tests/system.py +++ b/packages/google-cloud-bigquery/tests/system.py @@ -568,7 +568,7 @@ def _load_table_for_extract_table( self.to_delete.insert(0, blob) dataset = retry_403(Config.CLIENT.create_dataset)( - Dataset(table.dataset.dataset_id)) + Dataset(table.dataset_id)) self.to_delete.append(dataset) table = dataset.table(table.table_id) self.to_delete.insert(0, table) diff --git a/packages/google-cloud-bigquery/tests/unit/test_dataset.py b/packages/google-cloud-bigquery/tests/unit/test_dataset.py index 3f2a809955fb..28c6790b793a 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_dataset.py +++ b/packages/google-cloud-bigquery/tests/unit/test_dataset.py @@ -104,7 +104,8 @@ def test_ctor_defaults(self): def test_table(self): dataset_ref = self._make_one('some-project-1', 'dataset_1') table_ref = dataset_ref.table('table_1') - self.assertIs(table_ref.dataset, dataset_ref) + self.assertEqual(table_ref.dataset_id, 'dataset_1') + self.assertEqual(table_ref.project, 'some-project-1') self.assertEqual(table_ref.table_id, 'table_1') @@ -543,7 +544,8 @@ def test_table_wo_schema(self): table = dataset.table('table_id') self.assertIsInstance(table, Table) self.assertEqual(table.table_id, 'table_id') - self.assertIs(table._dataset, dataset) + self.assertEqual(table.dataset_id, self.DS_ID) + self.assertEqual(table.project, self.PROJECT) self.assertEqual(table.schema, []) def test_table_w_schema(self): @@ -558,7 +560,8 @@ def test_table_w_schema(self): table = dataset.table('table_id', schema=[full_name, age]) self.assertIsInstance(table, Table) self.assertEqual(table.table_id, 'table_id') - self.assertIs(table._dataset, dataset) + self.assertEqual(table.dataset_id, self.DS_ID) + self.assertEqual(table.project, self.PROJECT) self.assertEqual(table.schema, [full_name, age]) diff --git a/packages/google-cloud-bigquery/tests/unit/test_job.py b/packages/google-cloud-bigquery/tests/unit/test_job.py index 1da83260f06f..e53eb08970fc 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_job.py +++ b/packages/google-cloud-bigquery/tests/unit/test_job.py @@ -1220,8 +1220,8 @@ def _verifyResourceProperties(self, job, resource): self.assertEqual(job.destination_uris, config['destinationUris']) table_ref = config['sourceTable'] - self.assertEqual(job.source.dataset.project, table_ref['projectId']) - self.assertEqual(job.source.dataset.dataset_id, table_ref['datasetId']) + self.assertEqual(job.source.project, table_ref['projectId']) + self.assertEqual(job.source.dataset_id, table_ref['datasetId']) self.assertEqual(job.source.table_id, table_ref['tableId']) if 'compression' in config: @@ -1908,7 +1908,6 @@ def test_statement_type(self): self.assertEqual(job.statement_type, statement_type) def test_referenced_tables(self): - from google.cloud.bigquery.dataset import Dataset from google.cloud.bigquery.table import Table ref_tables_resource = [{ @@ -1942,24 +1941,21 @@ def test_referenced_tables(self): self.assertIsInstance(local1, Table) self.assertEqual(local1.table_id, 'local1') - self.assertIsInstance(local1._dataset, Dataset) self.assertEqual(local1.dataset_id, 'dataset') self.assertEqual(local1.project, self.PROJECT) - self.assertIs(local1._dataset._client, client) + self.assertIs(local1._client, client) self.assertIsInstance(local2, Table) self.assertEqual(local2.table_id, 'local2') - self.assertIsInstance(local2._dataset, Dataset) self.assertEqual(local2.dataset_id, 'dataset') self.assertEqual(local2.project, self.PROJECT) - self.assertIs(local2._dataset._client, client) + self.assertIs(local2._client, client) self.assertIsInstance(remote, Table) self.assertEqual(remote.table_id, 'other-table') - self.assertIsInstance(remote._dataset, Dataset) self.assertEqual(remote.dataset_id, 'other-dataset') self.assertEqual(remote.project, 'other-project-123') - self.assertIs(remote._dataset._client, client) + self.assertIs(remote._client, client) def test_undeclared_query_paramters(self): from google.cloud.bigquery._helpers import ArrayQueryParameter diff --git a/packages/google-cloud-bigquery/tests/unit/test_table.py b/packages/google-cloud-bigquery/tests/unit/test_table.py index a36a4b216c04..2327d11b1ed3 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_table.py +++ b/packages/google-cloud-bigquery/tests/unit/test_table.py @@ -55,7 +55,7 @@ def test_ctor_defaults(self): dataset_ref = DatasetReference('project_1', 'dataset_1') table_ref = self._make_one(dataset_ref, 'table_1') - self.assertIs(table_ref.dataset, dataset_ref) + self.assertEqual(table_ref.dataset_id, dataset_ref.dataset_id) self.assertEqual(table_ref.table_id, 'table_1') @@ -173,7 +173,6 @@ def test_ctor(self): table = self._make_one(table_ref, client=client) self.assertEqual(table.table_id, self.TABLE_NAME) - self.assertIs(table._dataset, dataset) self.assertEqual(table.project, self.PROJECT) self.assertEqual(table.dataset_id, self.DS_ID) self.assertEqual( @@ -1710,7 +1709,8 @@ def _initiate_resumable_upload_helper(self, num_retries=None): data = b'goodbye gudbi gootbee' stream = io.BytesIO(data) metadata = _get_upload_metadata( - 'CSV', table._schema, table._dataset, table.table_id) + 'CSV', table._schema, table.project, + table.dataset_id, table.table_id) upload, transport = table._initiate_resumable_upload( client, stream, metadata, num_retries) @@ -1777,7 +1777,8 @@ def _do_multipart_upload_success_helper( data = b'Bzzzz-zap \x00\x01\xf4' stream = io.BytesIO(data) metadata = _get_upload_metadata( - 'CSV', table._schema, table._dataset, table.table_id) + 'CSV', table._schema, table.project, + table.dataset_id, table.table_id) size = len(data) response = table._do_multipart_upload( client, stream, metadata, size, num_retries) @@ -1928,7 +1929,7 @@ def test_upload_file_resumable_metadata(self): 'load': { 'sourceFormat': config_args['source_format'], 'destinationTable': { - 'projectId': table._dataset.project, + 'projectId': table.project, 'datasetId': table.dataset_id, 'tableId': table.table_id, }, @@ -2254,10 +2255,11 @@ def test_w_subfields(self): class Test__get_upload_metadata(unittest.TestCase): @staticmethod - def _call_fut(source_format, schema, dataset, name): + def _call_fut(source_format, schema, project, dataset_id, name): from google.cloud.bigquery.table import _get_upload_metadata - return _get_upload_metadata(source_format, schema, dataset, name) + return _get_upload_metadata( + source_format, schema, project, dataset_id, name) def test_empty_schema(self): source_format = 'AVRO' @@ -2265,7 +2267,8 @@ def test_empty_schema(self): spec=['dataset_id', 'project']) dataset.dataset_id = 'market' # mock.Mock() treats `name` specially. table_name = 'chairs' - metadata = self._call_fut(source_format, [], dataset, table_name) + metadata = self._call_fut(source_format, [], dataset.project, + dataset.dataset_id, table_name) expected = { 'configuration': { @@ -2290,7 +2293,8 @@ def test_with_schema(self): dataset.dataset_id = 'movie' # mock.Mock() treats `name` specially. table_name = 'teebull-neem' metadata = self._call_fut( - source_format, [full_name], dataset, table_name) + source_format, [full_name], dataset.project, + dataset.dataset_id, table_name) expected = { 'configuration': { From a078edf1069117b97a4c8b9a10a3197d960828b1 Mon Sep 17 00:00:00 2001 From: Jonathan Amsterdam Date: Wed, 20 Sep 2017 17:38:14 -0400 Subject: [PATCH 0280/2016] bigquery: add client.delete_dataset (#4012) * bigquery: add client.delete_dataset * support Dataset as well as DatasetReference * fix lint --- .../google/cloud/bigquery/client.py | 18 ++++++++++-- .../google/cloud/bigquery/dataset.py | 29 ------------------- .../google-cloud-bigquery/tests/system.py | 2 +- .../tests/unit/test_client.py | 23 +++++++++++++++ .../tests/unit/test_dataset.py | 29 ------------------- 5 files changed, 40 insertions(+), 61 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py index 1f5620f66ca8..940b016645f3 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py @@ -243,8 +243,6 @@ def update_dataset(self, dataset, fields): :rtype: :class:`google.cloud.bigquery.dataset.Dataset` :returns: the modified ``Dataset`` instance - :raises: ValueError for fields that cannot be updated. - """ if dataset.project is None: dataset._project = self.project @@ -266,6 +264,22 @@ def update_dataset(self, dataset, fields): method='PATCH', path=path, data=partial, headers=headers) return Dataset.from_api_repr(api_response, self) + def delete_dataset(self, dataset): + """Delete a dataset. + + See + https://cloud.google.com/bigquery/docs/reference/rest/v2/datasets/delete + + :type dataset: One of: + :class:`~google.cloud.bigquery.dataset.Dataset` + :class:`~google.cloud.bigquery.dataset.DatasetReference` + + :param dataset: the dataset to delete, or a reference to it. + """ + if not isinstance(dataset, (Dataset, DatasetReference)): + raise TypeError('dataset must be a Dataset or a DatasetReference') + self._connection.api_request(method='DELETE', path=dataset.path) + def _get_query_results(self, job_id, project=None, timeout_ms=None): """Get the query results object for a query job. diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/dataset.py b/packages/google-cloud-bigquery/google/cloud/bigquery/dataset.py index dabd0a129ec1..0d9198809595 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/dataset.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/dataset.py @@ -394,21 +394,6 @@ def from_api_repr(cls, resource, client): dataset._set_properties(resource) return dataset - def _require_client(self, client): - """Check client or verify over-ride. - - :type client: :class:`~google.cloud.bigquery.client.Client` or - ``NoneType`` - :param client: the client to use. If not passed, falls back to the - ``client`` stored on the current dataset. - - :rtype: :class:`google.cloud.bigquery.client.Client` - :returns: The client passed in or the currently bound client. - """ - if client is None: - client = self._client - return client - @staticmethod def _parse_access_entries(access): """Parse a resource fragment into a set of access entries. @@ -488,20 +473,6 @@ def _build_resource(self): return resource - def delete(self, client=None): - """API call: delete the dataset via a DELETE request. - - See - https://cloud.google.com/bigquery/docs/reference/rest/v2/datasets/delete - - :type client: :class:`~google.cloud.bigquery.client.Client` or - ``NoneType`` - :param client: the client to use. If not passed, falls back to the - ``client`` stored on the current dataset. - """ - client = self._require_client(client) - client._connection.api_request(method='DELETE', path=self.path) - def list_tables(self, max_results=None, page_token=None): """List tables for the project associated with this client. diff --git a/packages/google-cloud-bigquery/tests/system.py b/packages/google-cloud-bigquery/tests/system.py index 1e9a723631a3..31cd0fb9dfdd 100644 --- a/packages/google-cloud-bigquery/tests/system.py +++ b/packages/google-cloud-bigquery/tests/system.py @@ -105,7 +105,7 @@ def _still_in_use(bad_request): if isinstance(doomed, Bucket): retry_409(doomed.delete)(force=True) elif isinstance(doomed, Dataset): - retry_in_use(doomed.delete)() + retry_in_use(Config.CLIENT.delete_dataset)(doomed) else: doomed.delete() diff --git a/packages/google-cloud-bigquery/tests/unit/test_client.py b/packages/google-cloud-bigquery/tests/unit/test_client.py index 173f059374da..ceb530d5a134 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_client.py +++ b/packages/google-cloud-bigquery/tests/unit/test_client.py @@ -480,6 +480,29 @@ def test_update_dataset(self): req = conn._requested[1] self.assertEqual(req['headers']['If-Match'], 'etag') + def test_delete_dataset(self): + from google.cloud.bigquery.dataset import Dataset + + PROJECT = 'PROJECT' + DS_ID = 'DATASET_ID' + PATH = 'projects/%s/datasets/%s' % (PROJECT, DS_ID) + creds = _make_credentials() + client = self._make_one(project=PROJECT, credentials=creds) + conn = client._connection = _Connection({}, {}) + for arg in (client.dataset(DS_ID), Dataset(DS_ID, project=PROJECT)): + client.delete_dataset(arg) + req = conn._requested[0] + self.assertEqual(req['method'], 'DELETE') + self.assertEqual(req['path'], '/%s' % PATH) + + def test_delete_dataset_wrong_type(self): + PROJECT = 'PROJECT' + DS_ID = 'DATASET_ID' + creds = _make_credentials() + client = self._make_one(project=PROJECT, credentials=creds) + with self.assertRaises(TypeError): + client.delete_dataset(client.dataset(DS_ID).table("foo")) + def test_job_from_resource_unknown_type(self): PROJECT = 'PROJECT' creds = _make_credentials() diff --git a/packages/google-cloud-bigquery/tests/unit/test_dataset.py b/packages/google-cloud-bigquery/tests/unit/test_dataset.py index 28c6790b793a..ac863d5052d5 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_dataset.py +++ b/packages/google-cloud-bigquery/tests/unit/test_dataset.py @@ -388,35 +388,6 @@ def test__parse_access_entries_w_extra_keys(self): with self.assertRaises(ValueError): dataset._parse_access_entries(ACCESS) - def test_delete_w_bound_client(self): - PATH = 'projects/%s/datasets/%s' % (self.PROJECT, self.DS_ID) - conn = _Connection({}) - client = _Client(project=self.PROJECT, connection=conn) - dataset = self._make_one(self.DS_ID, client=client) - - dataset.delete() - - self.assertEqual(len(conn._requested), 1) - req = conn._requested[0] - self.assertEqual(req['method'], 'DELETE') - self.assertEqual(req['path'], '/%s' % PATH) - - def test_delete_w_alternate_client(self): - PATH = 'projects/%s/datasets/%s' % (self.PROJECT, self.DS_ID) - conn1 = _Connection() - CLIENT1 = _Client(project=self.PROJECT, connection=conn1) - conn2 = _Connection({}) - CLIENT2 = _Client(project=self.PROJECT, connection=conn2) - dataset = self._make_one(self.DS_ID, client=CLIENT1) - - dataset.delete(client=CLIENT2) - - self.assertEqual(len(conn1._requested), 0) - self.assertEqual(len(conn2._requested), 1) - req = conn2._requested[0] - self.assertEqual(req['method'], 'DELETE') - self.assertEqual(req['path'], '/%s' % PATH) - def test_list_tables_empty(self): import six From 88399e3fbd9a066de8d58adbf9663d51a78d6c25 Mon Sep 17 00:00:00 2001 From: Alix Hamilton Date: Wed, 20 Sep 2017 17:14:00 -0700 Subject: [PATCH 0281/2016] updates dataset.table() to return a TableReference instead of a Table (#4014) --- .../google/cloud/bigquery/dataset.py | 21 +++--- .../google/cloud/bigquery/job.py | 2 +- .../google-cloud-bigquery/tests/system.py | 65 ++++++++++++------- .../tests/unit/test_dataset.py | 23 +------ .../tests/unit/test_job.py | 19 ++---- 5 files changed, 60 insertions(+), 70 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/dataset.py b/packages/google-cloud-bigquery/google/cloud/bigquery/dataset.py index 0d9198809595..3f5bb060c60b 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/dataset.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/dataset.py @@ -141,6 +141,9 @@ def path(self): def table(self, table_id): """Constructs a TableReference. + :type table_id: str + :param table_id: the ID of the table. + :rtype: :class:`google.cloud.bigquery.table.TableReference` :returns: a TableReference for a table in this dataset. """ @@ -505,20 +508,16 @@ def list_tables(self, max_results=None, page_token=None): result.dataset = self return result - def table(self, name, schema=()): - """Construct a table bound to this dataset. - - :type name: str - :param name: Name of the table. + def table(self, table_id): + """Constructs a TableReference. - :type schema: list of :class:`google.cloud.bigquery.table.SchemaField` - :param schema: The table's schema + :type table_id: str + :param table_id: the ID of the table. - :rtype: :class:`google.cloud.bigquery.table.Table` - :returns: a new ``Table`` instance + :rtype: :class:`google.cloud.bigquery.table.TableReference` + :returns: a TableReference for a table in this dataset. """ - table_ref = TableReference(self, name) - return Table(table_ref, schema=schema, client=self._client) + return TableReference(self, table_id) def _item_to_table(iterator, resource): diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/job.py b/packages/google-cloud-bigquery/google/cloud/bigquery/job.py index 025b839bc59c..1e0959565074 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/job.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/job.py @@ -1248,7 +1248,7 @@ def __init__(self, job_id, query, client, https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query.defaultDataset """ - destination = _TypedProperty('destination', Table) + destination = _TypedProperty('destination', TableReference) """See https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query.destinationTable """ diff --git a/packages/google-cloud-bigquery/tests/system.py b/packages/google-cloud-bigquery/tests/system.py index 31cd0fb9dfdd..1adc30d787b5 100644 --- a/packages/google-cloud-bigquery/tests/system.py +++ b/packages/google-cloud-bigquery/tests/system.py @@ -26,6 +26,7 @@ from google.cloud import bigquery from google.cloud.bigquery.dataset import Dataset, DatasetReference +from google.cloud.bigquery.table import Table from google.cloud._helpers import UTC from google.cloud.bigquery import dbapi from google.cloud.exceptions import Forbidden, NotFound @@ -178,7 +179,8 @@ def test_create_table(self): full_name = bigquery.SchemaField('full_name', 'STRING', mode='REQUIRED') age = bigquery.SchemaField('age', 'INTEGER', mode='REQUIRED') - table = dataset.table(TABLE_NAME, schema=[full_name, age]) + table = Table(dataset.table(TABLE_NAME), schema=[full_name, age], + client=Config.CLIENT) self.assertFalse(table.exists()) table.create() self.to_delete.insert(0, table) @@ -221,7 +223,9 @@ def test_list_tables(self): mode='REQUIRED') age = bigquery.SchemaField('age', 'INTEGER', mode='REQUIRED') for table_name in tables_to_create: - created_table = dataset.table(table_name, schema=[full_name, age]) + created_table = Table(dataset.table(table_name), + schema=[full_name, age], + client=Config.CLIENT) created_table.create() self.to_delete.insert(0, created_table) @@ -243,7 +247,8 @@ def test_patch_table(self): full_name = bigquery.SchemaField('full_name', 'STRING', mode='REQUIRED') age = bigquery.SchemaField('age', 'INTEGER', mode='REQUIRED') - table = dataset.table(TABLE_NAME, schema=[full_name, age]) + table = Table(dataset.table(TABLE_NAME), schema=[full_name, age], + client=Config.CLIENT) self.assertFalse(table.exists()) table.create() self.to_delete.insert(0, table) @@ -263,7 +268,8 @@ def test_update_table(self): full_name = bigquery.SchemaField('full_name', 'STRING', mode='REQUIRED') age = bigquery.SchemaField('age', 'INTEGER', mode='REQUIRED') - table = dataset.table(TABLE_NAME, schema=[full_name, age]) + table = Table(dataset.table(TABLE_NAME), schema=[full_name, age], + client=Config.CLIENT) self.assertFalse(table.exists()) table.create() self.to_delete.insert(0, table) @@ -306,7 +312,8 @@ def test_insert_data_then_dump_table(self): mode='REQUIRED') age = bigquery.SchemaField('age', 'INTEGER', mode='REQUIRED') now = bigquery.SchemaField('now', 'TIMESTAMP') - table = dataset.table(TABLE_NAME, schema=[full_name, age, now]) + table = Table(dataset.table(TABLE_NAME), schema=[full_name, age, now], + client=Config.CLIENT) self.assertFalse(table.exists()) table.create() self.to_delete.insert(0, table) @@ -345,7 +352,8 @@ def test_load_table_from_local_file_then_dump_table(self): full_name = bigquery.SchemaField('full_name', 'STRING', mode='REQUIRED') age = bigquery.SchemaField('age', 'INTEGER', mode='REQUIRED') - table = dataset.table(TABLE_NAME, schema=[full_name, age]) + table = Table(dataset.table(TABLE_NAME), schema=[full_name, age], + client=Config.CLIENT) table.create() self.to_delete.insert(0, table) @@ -389,7 +397,7 @@ def test_load_table_from_local_avro_file_then_dump_table(self): Dataset(_make_dataset_id('load_local_then_dump'))) self.to_delete.append(dataset) - table = dataset.table(TABLE_NAME) + table = Table(dataset.table(TABLE_NAME), client=Config.CLIENT) self.to_delete.insert(0, table) with open(os.path.join(WHERE, 'data', 'colors.avro'), 'rb') as avrof: @@ -453,7 +461,8 @@ def test_load_table_from_storage_then_dump_table(self): full_name = bigquery.SchemaField('full_name', 'STRING', mode='REQUIRED') age = bigquery.SchemaField('age', 'INTEGER', mode='REQUIRED') - table = dataset.table(TABLE_NAME, schema=[full_name, age]) + table = Table(dataset.table(TABLE_NAME), schema=[full_name, age], + client=Config.CLIENT) table.create() self.to_delete.insert(0, table) @@ -518,11 +527,10 @@ def test_load_table_from_storage_w_autodetect_schema(self): Dataset(_make_dataset_id('load_gcs_then_dump'))) self.to_delete.append(dataset) - table = dataset.table(table_name) - self.to_delete.insert(0, table) + table_ref = dataset.table(table_name) job = Config.CLIENT.load_table_from_storage( - 'bq_load_storage_test_' + local_id, table, gs_url) + 'bq_load_storage_test_' + local_id, table_ref, gs_url) job.autodetect = True job.begin() @@ -533,7 +541,8 @@ def test_load_table_from_storage_w_autodetect_schema(self): retry = RetryInstanceState(_job_done, max_tries=8) retry(job.reload)() - table = Config.CLIENT.get_table(table) + table = Config.CLIENT.get_table(table_ref) + self.to_delete.insert(0, table) field_name = SchemaField( u'Full_Name', u'string', u'NULLABLE', None, ()) field_age = SchemaField(u'Age', u'integer', u'NULLABLE', None, ()) @@ -570,10 +579,9 @@ def _load_table_for_extract_table( dataset = retry_403(Config.CLIENT.create_dataset)( Dataset(table.dataset_id)) self.to_delete.append(dataset) - table = dataset.table(table.table_id) - self.to_delete.insert(0, table) + table_ref = dataset.table(table.table_id) job = Config.CLIENT.load_table_from_storage( - 'bq_extract_storage_test_' + local_id, table, gs_url) + 'bq_extract_storage_test_' + local_id, table_ref, gs_url) job.autodetect = True job.begin() # Allow for 90 seconds of "warm up" before rows visible. See @@ -591,7 +599,9 @@ def test_extract_table(self): blob_name = 'person_ages.csv' dataset_id = _make_dataset_id('load_gcs_then_extract') table_id = 'test_table' - table = Config.CLIENT.dataset(dataset_id).table(table_id) + table_ref = Config.CLIENT.dataset(dataset_id).table(table_id) + table = Table(table_ref, client=Config.CLIENT) + self.to_delete.insert(0, table) rows = [ ('Phred Phlyntstone', 32), ('Bharney Rhubble', 33), @@ -599,13 +609,13 @@ def test_extract_table(self): ('Bhettye Rhubble', 27), ] self._load_table_for_extract_table( - storage_client, rows, bucket_name, blob_name, table) + storage_client, rows, bucket_name, blob_name, table_ref) bucket = storage_client.bucket(bucket_name) destination_blob_name = 'person_ages_out.csv' destination = bucket.blob(destination_blob_name) destination_uri = 'gs://{}/person_ages_out.csv'.format(bucket_name) - job = Config.CLIENT.extract_table(table, destination_uri) + job = Config.CLIENT.extract_table(table_ref, destination_uri) job.result() self.to_delete.insert(0, destination) @@ -621,7 +631,9 @@ def test_extract_table_w_job_config(self): blob_name = 'person_ages.csv' dataset_id = _make_dataset_id('load_gcs_then_extract') table_id = 'test_table' - table = Config.CLIENT.dataset(dataset_id).table(table_id) + table_ref = Config.CLIENT.dataset(dataset_id).table(table_id) + table = Table(table_ref, client=Config.CLIENT) + self.to_delete.insert(0, table) rows = [ ('Phred Phlyntstone', 32), ('Bharney Rhubble', 33), @@ -629,7 +641,7 @@ def test_extract_table_w_job_config(self): ('Bhettye Rhubble', 27), ] self._load_table_for_extract_table( - storage_client, rows, bucket_name, blob_name, table) + storage_client, rows, bucket_name, blob_name, table_ref) bucket = storage_client.bucket(bucket_name) destination_blob_name = 'person_ages_out.csv' destination = bucket.blob(destination_blob_name) @@ -657,7 +669,8 @@ def test_job_cancel(self): full_name = bigquery.SchemaField('full_name', 'STRING', mode='REQUIRED') age = bigquery.SchemaField('age', 'INTEGER', mode='REQUIRED') - table = dataset.table(TABLE_NAME, schema=[full_name, age]) + table = Table(dataset.table(TABLE_NAME), schema=[full_name, age], + client=Config.CLIENT) table.create() self.to_delete.insert(0, table) @@ -845,7 +858,8 @@ def _load_table_for_dml(self, rows, dataset_id, table_id): greeting = bigquery.SchemaField( 'greeting', 'STRING', mode='NULLABLE') - table = dataset.table(table_id, schema=[greeting]) + table = Table(dataset.table(table_id), schema=[greeting], + client=Config.CLIENT) table.create() self.to_delete.insert(0, table) @@ -1237,7 +1251,8 @@ def test_insert_nested_nested(self): Dataset(_make_dataset_id('issue_2951'))) self.to_delete.append(dataset) - table = dataset.table(table_name, schema=schema) + table = Table(dataset.table(table_name), schema=schema, + client=Config.CLIENT) table.create() self.to_delete.insert(0, table) @@ -1249,14 +1264,14 @@ def test_insert_nested_nested(self): self.assertEqual(rows, to_insert) def test_create_table_insert_fetch_nested_schema(self): - table_name = 'test_table' dataset = retry_403(Config.CLIENT.create_dataset)( Dataset(_make_dataset_id('create_table_nested_schema'))) self.to_delete.append(dataset) schema = _load_json_schema() - table = dataset.table(table_name, schema=schema) + table = Table(dataset.table(table_name), schema=schema, + client=Config.CLIENT) table.create() self.to_delete.insert(0, table) self.assertTrue(table.exists()) diff --git a/packages/google-cloud-bigquery/tests/unit/test_dataset.py b/packages/google-cloud-bigquery/tests/unit/test_dataset.py index ac863d5052d5..c6c62652d366 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_dataset.py +++ b/packages/google-cloud-bigquery/tests/unit/test_dataset.py @@ -506,34 +506,17 @@ def test_list_tables_explicit(self): self.assertEqual(req['query_params'], {'maxResults': 3, 'pageToken': TOKEN}) - def test_table_wo_schema(self): - from google.cloud.bigquery.table import Table + def test_table(self): + from google.cloud.bigquery.table import TableReference conn = _Connection({}) client = _Client(project=self.PROJECT, connection=conn) dataset = self._make_one(self.DS_ID, client=client) table = dataset.table('table_id') - self.assertIsInstance(table, Table) - self.assertEqual(table.table_id, 'table_id') - self.assertEqual(table.dataset_id, self.DS_ID) - self.assertEqual(table.project, self.PROJECT) - self.assertEqual(table.schema, []) - - def test_table_w_schema(self): - from google.cloud.bigquery.schema import SchemaField - from google.cloud.bigquery.table import Table - - conn = _Connection({}) - client = _Client(project=self.PROJECT, connection=conn) - dataset = self._make_one(self.DS_ID, client=client) - full_name = SchemaField('full_name', 'STRING', mode='REQUIRED') - age = SchemaField('age', 'INTEGER', mode='REQUIRED') - table = dataset.table('table_id', schema=[full_name, age]) - self.assertIsInstance(table, Table) + self.assertIsInstance(table, TableReference) self.assertEqual(table.table_id, 'table_id') self.assertEqual(table.dataset_id, self.DS_ID) self.assertEqual(table.project, self.PROJECT) - self.assertEqual(table.schema, [full_name, age]) class _Client(object): diff --git a/packages/google-cloud-bigquery/tests/unit/test_job.py b/packages/google-cloud-bigquery/tests/unit/test_job.py index e53eb08970fc..7c662d01d8c6 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_job.py +++ b/packages/google-cloud-bigquery/tests/unit/test_job.py @@ -1908,7 +1908,7 @@ def test_statement_type(self): self.assertEqual(job.statement_type, statement_type) def test_referenced_tables(self): - from google.cloud.bigquery.table import Table + from google.cloud.bigquery.table import TableReference ref_tables_resource = [{ 'projectId': self.PROJECT, @@ -1939,23 +1939,20 @@ def test_referenced_tables(self): local1, local2, remote = job.referenced_tables - self.assertIsInstance(local1, Table) + self.assertIsInstance(local1, TableReference) self.assertEqual(local1.table_id, 'local1') self.assertEqual(local1.dataset_id, 'dataset') self.assertEqual(local1.project, self.PROJECT) - self.assertIs(local1._client, client) - self.assertIsInstance(local2, Table) + self.assertIsInstance(local2, TableReference) self.assertEqual(local2.table_id, 'local2') self.assertEqual(local2.dataset_id, 'dataset') self.assertEqual(local2.project, self.PROJECT) - self.assertIs(local2._client, client) - self.assertIsInstance(remote, Table) + self.assertIsInstance(remote, TableReference) self.assertEqual(remote.table_id, 'other-table') self.assertEqual(remote.dataset_id, 'other-dataset') self.assertEqual(remote.project, 'other-project-123') - self.assertIs(remote._client, client) def test_undeclared_query_paramters(self): from google.cloud.bigquery._helpers import ArrayQueryParameter @@ -2173,7 +2170,6 @@ def test_begin_w_bound_client(self): def test_begin_w_alternate_client(self): from google.cloud.bigquery.dataset import Dataset from google.cloud.bigquery.dataset import DatasetReference - from google.cloud.bigquery.dataset import Table PATH = '/projects/%s/jobs' % (self.PROJECT,) TABLE = 'TABLE' @@ -2210,12 +2206,11 @@ def test_begin_w_alternate_client(self): dataset_ref = DatasetReference(self.PROJECT, DS_ID) dataset = Dataset(DS_ID, client1) table_ref = dataset_ref.table(TABLE) - table = Table(table_ref, client=client1) job.allow_large_results = True job.create_disposition = 'CREATE_NEVER' job.default_dataset = dataset - job.destination = table + job.destination = table_ref job.flatten_results = True job.priority = 'INTERACTIVE' job.use_query_cache = True @@ -2467,7 +2462,6 @@ def test_exists_hit_w_alternate_client(self): def test_reload_w_bound_client(self): from google.cloud.bigquery.dataset import DatasetReference - from google.cloud.bigquery.table import Table PATH = '/projects/%s/jobs/%s' % (self.PROJECT, self.JOB_NAME) DS_ID = 'DATASET' @@ -2479,8 +2473,7 @@ def test_reload_w_bound_client(self): dataset_ref = DatasetReference(self.PROJECT, DS_ID) table_ref = dataset_ref.table(DEST_TABLE) - table = Table(table_ref, client=client) - job.destination = table + job.destination = table_ref job.reload() From 443e2d713d50c5a3a65469f033c16ae440a1fb60 Mon Sep 17 00:00:00 2001 From: Jonathan Amsterdam Date: Thu, 21 Sep 2017 12:38:09 -0400 Subject: [PATCH 0282/2016] bigquery: add client.list_dataset_tables (#4013) Remove Dataset.list_tables --- .../google/cloud/bigquery/client.py | 55 ++++++- .../google/cloud/bigquery/dataset.py | 49 ------- .../google-cloud-bigquery/tests/system.py | 6 +- .../tests/unit/test_client.py | 136 ++++++++++++++++++ .../tests/unit/test_dataset.py | 136 +----------------- 5 files changed, 195 insertions(+), 187 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py index 940b016645f3..3c02a8e10df2 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py @@ -23,7 +23,7 @@ from google.cloud.bigquery._http import Connection from google.cloud.bigquery.dataset import Dataset from google.cloud.bigquery.dataset import DatasetReference -from google.cloud.bigquery.dataset import Table +from google.cloud.bigquery.table import Table from google.cloud.bigquery.job import CopyJob from google.cloud.bigquery.job import ExtractJob from google.cloud.bigquery.job import LoadJob @@ -264,6 +264,44 @@ def update_dataset(self, dataset, fields): method='PATCH', path=path, data=partial, headers=headers) return Dataset.from_api_repr(api_response, self) + def list_dataset_tables(self, dataset, max_results=None, page_token=None): + """List tables in the dataset. + + See + https://cloud.google.com/bigquery/docs/reference/rest/v2/tables/list + + :type dataset: One of: + :class:`~google.cloud.bigquery.dataset.Dataset` + :class:`~google.cloud.bigquery.dataset.DatasetReference` + :param dataset: the dataset whose tables to list, or a reference to it. + + :type max_results: int + :param max_results: (Optional) Maximum number of tables to return. + If not passed, defaults to a value set by the API. + + :type page_token: str + :param page_token: (Optional) Opaque marker for the next "page" of + datasets. If not passed, the API will return the + first page of datasets. + + :rtype: :class:`~google.api.core.page_iterator.Iterator` + :returns: Iterator of :class:`~google.cloud.bigquery.table.Table` + contained within the current dataset. + """ + if not isinstance(dataset, (Dataset, DatasetReference)): + raise TypeError('dataset must be a Dataset or a DatasetReference') + path = '%s/tables' % dataset.path + result = page_iterator.HTTPIterator( + client=self, + api_request=self._connection.api_request, + path=path, + item_to_value=_item_to_table, + items_key='tables', + page_token=page_token, + max_results=max_results) + result.dataset = dataset + return result + def delete_dataset(self, dataset): """Delete a dataset. @@ -600,3 +638,18 @@ def _item_to_job(iterator, resource): :returns: The next job in the page. """ return iterator.client.job_from_resource(resource) + + +def _item_to_table(iterator, resource): + """Convert a JSON table to the native object. + + :type iterator: :class:`~google.api.core.page_iterator.Iterator` + :param iterator: The iterator that is currently in use. + + :type resource: dict + :param resource: An item to be converted to a table. + + :rtype: :class:`~google.cloud.bigquery.table.Table` + :returns: The next table in the page. + """ + return Table.from_api_repr(resource, iterator.client) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/dataset.py b/packages/google-cloud-bigquery/google/cloud/bigquery/dataset.py index 3f5bb060c60b..fb41ee2e8a95 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/dataset.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/dataset.py @@ -18,9 +18,7 @@ import six -from google.api.core import page_iterator from google.cloud._helpers import _datetime_from_microseconds -from google.cloud.bigquery.table import Table from google.cloud.bigquery.table import TableReference @@ -476,38 +474,6 @@ def _build_resource(self): return resource - def list_tables(self, max_results=None, page_token=None): - """List tables for the project associated with this client. - - See - https://cloud.google.com/bigquery/docs/reference/rest/v2/tables/list - - :type max_results: int - :param max_results: (Optional) Maximum number of tables to return. - If not passed, defaults to a value set by the API. - - :type page_token: str - :param page_token: (Optional) Opaque marker for the next "page" of - datasets. If not passed, the API will return the - first page of datasets. - - :rtype: :class:`~google.api.core.page_iterator.Iterator` - :returns: Iterator of :class:`~google.cloud.bigquery.table.Table` - contained within the current dataset. - """ - path = '/projects/%s/datasets/%s/tables' % ( - self.project, self.dataset_id) - result = page_iterator.HTTPIterator( - client=self._client, - api_request=self._client._connection.api_request, - path=path, - item_to_value=_item_to_table, - items_key='tables', - page_token=page_token, - max_results=max_results) - result.dataset = self - return result - def table(self, table_id): """Constructs a TableReference. @@ -518,18 +484,3 @@ def table(self, table_id): :returns: a TableReference for a table in this dataset. """ return TableReference(self, table_id) - - -def _item_to_table(iterator, resource): - """Convert a JSON table to the native object. - - :type iterator: :class:`~google.api.core.page_iterator.Iterator` - :param iterator: The iterator that is currently in use. - - :type resource: dict - :param resource: An item to be converted to a table. - - :rtype: :class:`~google.cloud.bigquery.table.Table` - :returns: The next table in the page. - """ - return Table.from_api_repr(resource, iterator.dataset) diff --git a/packages/google-cloud-bigquery/tests/system.py b/packages/google-cloud-bigquery/tests/system.py index 1adc30d787b5..82763d89724b 100644 --- a/packages/google-cloud-bigquery/tests/system.py +++ b/packages/google-cloud-bigquery/tests/system.py @@ -202,13 +202,13 @@ def test_get_table_w_public_dataset(self): self.assertEqual( schema_names, ['word', 'word_count', 'corpus', 'corpus_date']) - def test_list_tables(self): + def test_list_dataset_tables(self): DATASET_ID = _make_dataset_id('list_tables') dataset = retry_403(Config.CLIENT.create_dataset)(Dataset(DATASET_ID)) self.to_delete.append(dataset) # Retrieve tables before any are created for the dataset. - iterator = dataset.list_tables() + iterator = Config.CLIENT.list_dataset_tables(dataset) all_tables = list(iterator) self.assertEqual(all_tables, []) self.assertIsNone(iterator.next_page_token) @@ -230,7 +230,7 @@ def test_list_tables(self): self.to_delete.insert(0, created_table) # Retrieve the tables. - iterator = dataset.list_tables() + iterator = Config.CLIENT.list_dataset_tables(dataset) all_tables = list(iterator) self.assertIsNone(iterator.next_page_token) created = [table for table in all_tables diff --git a/packages/google-cloud-bigquery/tests/unit/test_client.py b/packages/google-cloud-bigquery/tests/unit/test_client.py index ceb530d5a134..8916e9682ac1 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_client.py +++ b/packages/google-cloud-bigquery/tests/unit/test_client.py @@ -480,6 +480,142 @@ def test_update_dataset(self): req = conn._requested[1] self.assertEqual(req['headers']['If-Match'], 'etag') + def test_list_dataset_tables_empty(self): + import six + + PROJECT = 'PROJECT' + DS_ID = 'DATASET_ID' + creds = _make_credentials() + client = self._make_one(project=PROJECT, credentials=creds) + conn = client._connection = _Connection({}) + + dataset = client.dataset(DS_ID) + iterator = client.list_dataset_tables(dataset) + self.assertIs(iterator.dataset, dataset) + page = six.next(iterator.pages) + tables = list(page) + token = iterator.next_page_token + + self.assertEqual(tables, []) + self.assertIsNone(token) + self.assertEqual(len(conn._requested), 1) + req = conn._requested[0] + self.assertEqual(req['method'], 'GET') + PATH = 'projects/%s/datasets/%s/tables' % (PROJECT, DS_ID) + self.assertEqual(req['path'], '/%s' % PATH) + + def test_list_dataset_tables_defaults(self): + import six + from google.cloud.bigquery.table import Table + + PROJECT = 'PROJECT' + DS_ID = 'DATASET_ID' + TABLE_1 = 'table_one' + TABLE_2 = 'table_two' + PATH = 'projects/%s/datasets/%s/tables' % (PROJECT, DS_ID) + TOKEN = 'TOKEN' + DATA = { + 'nextPageToken': TOKEN, + 'tables': [ + {'kind': 'bigquery#table', + 'id': '%s:%s.%s' % (PROJECT, DS_ID, TABLE_1), + 'tableReference': {'tableId': TABLE_1, + 'datasetId': DS_ID, + 'projectId': PROJECT}, + 'type': 'TABLE'}, + {'kind': 'bigquery#table', + 'id': '%s:%s.%s' % (PROJECT, DS_ID, TABLE_2), + 'tableReference': {'tableId': TABLE_2, + 'datasetId': DS_ID, + 'projectId': PROJECT}, + 'type': 'TABLE'}, + ] + } + + creds = _make_credentials() + client = self._make_one(project=PROJECT, credentials=creds) + conn = client._connection = _Connection(DATA) + dataset = client.dataset(DS_ID) + + iterator = client.list_dataset_tables(dataset) + self.assertIs(iterator.dataset, dataset) + page = six.next(iterator.pages) + tables = list(page) + token = iterator.next_page_token + + self.assertEqual(len(tables), len(DATA['tables'])) + for found, expected in zip(tables, DATA['tables']): + self.assertIsInstance(found, Table) + self.assertEqual(found.full_table_id, expected['id']) + self.assertEqual(found.table_type, expected['type']) + self.assertEqual(token, TOKEN) + + self.assertEqual(len(conn._requested), 1) + req = conn._requested[0] + self.assertEqual(req['method'], 'GET') + self.assertEqual(req['path'], '/%s' % PATH) + + def test_list_dataset_tables_explicit(self): + import six + from google.cloud.bigquery.table import Table + + PROJECT = 'PROJECT' + DS_ID = 'DATASET_ID' + TABLE_1 = 'table_one' + TABLE_2 = 'table_two' + PATH = 'projects/%s/datasets/%s/tables' % (PROJECT, DS_ID) + TOKEN = 'TOKEN' + DATA = { + 'tables': [ + {'kind': 'bigquery#dataset', + 'id': '%s:%s.%s' % (PROJECT, DS_ID, TABLE_1), + 'tableReference': {'tableId': TABLE_1, + 'datasetId': DS_ID, + 'projectId': PROJECT}, + 'type': 'TABLE'}, + {'kind': 'bigquery#dataset', + 'id': '%s:%s.%s' % (PROJECT, DS_ID, TABLE_2), + 'tableReference': {'tableId': TABLE_2, + 'datasetId': DS_ID, + 'projectId': PROJECT}, + 'type': 'TABLE'}, + ] + } + + creds = _make_credentials() + client = self._make_one(project=PROJECT, credentials=creds) + conn = client._connection = _Connection(DATA) + dataset = client.dataset(DS_ID) + + iterator = client.list_dataset_tables( + dataset, max_results=3, page_token=TOKEN) + self.assertIs(iterator.dataset, dataset) + page = six.next(iterator.pages) + tables = list(page) + token = iterator.next_page_token + + self.assertEqual(len(tables), len(DATA['tables'])) + for found, expected in zip(tables, DATA['tables']): + self.assertIsInstance(found, Table) + self.assertEqual(found.full_table_id, expected['id']) + self.assertEqual(found.table_type, expected['type']) + self.assertIsNone(token) + + self.assertEqual(len(conn._requested), 1) + req = conn._requested[0] + self.assertEqual(req['method'], 'GET') + self.assertEqual(req['path'], '/%s' % PATH) + self.assertEqual(req['query_params'], + {'maxResults': 3, 'pageToken': TOKEN}) + + def test_list_dataset_tables_wrong_type(self): + PROJECT = 'PROJECT' + DS_ID = 'DATASET_ID' + creds = _make_credentials() + client = self._make_one(project=PROJECT, credentials=creds) + with self.assertRaises(TypeError): + client.list_dataset_tables(client.dataset(DS_ID).table("foo")) + def test_delete_dataset(self): from google.cloud.bigquery.dataset import Dataset diff --git a/packages/google-cloud-bigquery/tests/unit/test_dataset.py b/packages/google-cloud-bigquery/tests/unit/test_dataset.py index c6c62652d366..c2fa2a024f17 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_dataset.py +++ b/packages/google-cloud-bigquery/tests/unit/test_dataset.py @@ -388,129 +388,10 @@ def test__parse_access_entries_w_extra_keys(self): with self.assertRaises(ValueError): dataset._parse_access_entries(ACCESS) - def test_list_tables_empty(self): - import six - - conn = _Connection({}) - client = _Client(project=self.PROJECT, connection=conn) - dataset = self._make_one(self.DS_ID, client=client) - - iterator = dataset.list_tables() - self.assertIs(iterator.dataset, dataset) - page = six.next(iterator.pages) - tables = list(page) - token = iterator.next_page_token - - self.assertEqual(tables, []) - self.assertIsNone(token) - self.assertEqual(len(conn._requested), 1) - req = conn._requested[0] - self.assertEqual(req['method'], 'GET') - PATH = 'projects/%s/datasets/%s/tables' % (self.PROJECT, self.DS_ID) - self.assertEqual(req['path'], '/%s' % PATH) - - def test_list_tables_defaults(self): - import six - from google.cloud.bigquery.table import Table - - TABLE_1 = 'table_one' - TABLE_2 = 'table_two' - PATH = 'projects/%s/datasets/%s/tables' % (self.PROJECT, self.DS_ID) - TOKEN = 'TOKEN' - DATA = { - 'nextPageToken': TOKEN, - 'tables': [ - {'kind': 'bigquery#table', - 'id': '%s:%s.%s' % (self.PROJECT, self.DS_ID, TABLE_1), - 'tableReference': {'tableId': TABLE_1, - 'datasetId': self.DS_ID, - 'projectId': self.PROJECT}, - 'type': 'TABLE'}, - {'kind': 'bigquery#table', - 'id': '%s:%s.%s' % (self.PROJECT, self.DS_ID, TABLE_2), - 'tableReference': {'tableId': TABLE_2, - 'datasetId': self.DS_ID, - 'projectId': self.PROJECT}, - 'type': 'TABLE'}, - ] - } - - conn = _Connection(DATA) - client = _Client(project=self.PROJECT, connection=conn) - dataset = self._make_one(self.DS_ID, client=client) - - iterator = dataset.list_tables() - self.assertIs(iterator.dataset, dataset) - page = six.next(iterator.pages) - tables = list(page) - token = iterator.next_page_token - - self.assertEqual(len(tables), len(DATA['tables'])) - for found, expected in zip(tables, DATA['tables']): - self.assertIsInstance(found, Table) - self.assertEqual(found.full_table_id, expected['id']) - self.assertEqual(found.table_type, expected['type']) - self.assertEqual(token, TOKEN) - - self.assertEqual(len(conn._requested), 1) - req = conn._requested[0] - self.assertEqual(req['method'], 'GET') - self.assertEqual(req['path'], '/%s' % PATH) - - def test_list_tables_explicit(self): - import six - from google.cloud.bigquery.table import Table - - TABLE_1 = 'table_one' - TABLE_2 = 'table_two' - PATH = 'projects/%s/datasets/%s/tables' % (self.PROJECT, self.DS_ID) - TOKEN = 'TOKEN' - DATA = { - 'tables': [ - {'kind': 'bigquery#dataset', - 'id': '%s:%s.%s' % (self.PROJECT, self.DS_ID, TABLE_1), - 'tableReference': {'tableId': TABLE_1, - 'datasetId': self.DS_ID, - 'projectId': self.PROJECT}, - 'type': 'TABLE'}, - {'kind': 'bigquery#dataset', - 'id': '%s:%s.%s' % (self.PROJECT, self.DS_ID, TABLE_2), - 'tableReference': {'tableId': TABLE_2, - 'datasetId': self.DS_ID, - 'projectId': self.PROJECT}, - 'type': 'TABLE'}, - ] - } - - conn = _Connection(DATA) - client = _Client(project=self.PROJECT, connection=conn) - dataset = self._make_one(self.DS_ID, client=client) - - iterator = dataset.list_tables(max_results=3, page_token=TOKEN) - self.assertIs(iterator.dataset, dataset) - page = six.next(iterator.pages) - tables = list(page) - token = iterator.next_page_token - - self.assertEqual(len(tables), len(DATA['tables'])) - for found, expected in zip(tables, DATA['tables']): - self.assertIsInstance(found, Table) - self.assertEqual(found.full_table_id, expected['id']) - self.assertEqual(found.table_type, expected['type']) - self.assertIsNone(token) - - self.assertEqual(len(conn._requested), 1) - req = conn._requested[0] - self.assertEqual(req['method'], 'GET') - self.assertEqual(req['path'], '/%s' % PATH) - self.assertEqual(req['query_params'], - {'maxResults': 3, 'pageToken': TOKEN}) - def test_table(self): from google.cloud.bigquery.table import TableReference - conn = _Connection({}) - client = _Client(project=self.PROJECT, connection=conn) + client = _Client(project=self.PROJECT) dataset = self._make_one(self.DS_ID, client=client) table = dataset.table('table_id') self.assertIsInstance(table, TableReference) @@ -521,18 +402,5 @@ def test_table(self): class _Client(object): - def __init__(self, project='project', connection=None): + def __init__(self, project='project'): self.project = project - self._connection = connection - - -class _Connection(object): - - def __init__(self, *responses): - self._responses = responses - self._requested = [] - - def api_request(self, **kw): - self._requested.append(kw) - response, self._responses = self._responses[0], self._responses[1:] - return response From 16ab8a2a2163483f53dd22474867a038017b2b7c Mon Sep 17 00:00:00 2001 From: Jonathan Amsterdam Date: Thu, 21 Sep 2017 16:26:23 -0400 Subject: [PATCH 0283/2016] bigquery: remove client from Dataset (#4018) --- .../google/cloud/bigquery/client.py | 8 +-- .../google/cloud/bigquery/dataset.py | 27 +++----- .../google/cloud/bigquery/job.py | 19 ++--- .../google-cloud-bigquery/tests/system.py | 5 +- .../tests/unit/test_dataset.py | 69 ++++++------------- .../tests/unit/test_job.py | 4 +- .../tests/unit/test_query.py | 4 +- 7 files changed, 49 insertions(+), 87 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py index 3c02a8e10df2..f40904c2057e 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py @@ -190,7 +190,7 @@ def create_dataset(self, dataset): path = '/projects/%s/datasets' % (dataset.project,) api_response = self._connection.api_request( method='POST', path=path, data=dataset._build_resource()) - return Dataset.from_api_repr(api_response, self) + return Dataset.from_api_repr(api_response) def get_dataset(self, dataset_ref): """Fetch the dataset referenced by ``dataset_ref`` @@ -204,7 +204,7 @@ def get_dataset(self, dataset_ref): """ api_response = self._connection.api_request( method='GET', path=dataset_ref.path) - return Dataset.from_api_repr(api_response, self) + return Dataset.from_api_repr(api_response) def get_table(self, table_ref): """Fetch the table referenced by ``table_ref`` @@ -262,7 +262,7 @@ def update_dataset(self, dataset, fields): headers = None api_response = self._connection.api_request( method='PATCH', path=path, data=partial, headers=headers) - return Dataset.from_api_repr(api_response, self) + return Dataset.from_api_repr(api_response) def list_dataset_tables(self, dataset, max_results=None, page_token=None): """List tables in the dataset. @@ -622,7 +622,7 @@ def _item_to_dataset(iterator, resource): :rtype: :class:`.Dataset` :returns: The next dataset in the page. """ - return Dataset.from_api_repr(resource, iterator.client) + return Dataset.from_api_repr(resource) def _item_to_job(iterator, resource): diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/dataset.py b/packages/google-cloud-bigquery/google/cloud/bigquery/dataset.py index fb41ee2e8a95..c5bed721bab4 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/dataset.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/dataset.py @@ -157,38 +157,31 @@ class Dataset(object): :type dataset_id: str :param dataset_id: the ID of the dataset - :type client: :class:`google.cloud.bigquery.client.Client` - :param client: (Optional) A client which holds credentials and project - configuration for the dataset (which requires a project). - :type access_entries: list of :class:`AccessEntry` :param access_entries: roles granted to entities for this dataset :type project: str - :param project: (Optional) project ID for the dataset (defaults to - the project of the client). + :param project: (Optional) project ID for the dataset. """ _access_entries = None def __init__(self, dataset_id, - client=None, access_entries=(), project=None): self._dataset_id = dataset_id - self._client = client self._properties = {} # Let the @property do validation. self.access_entries = access_entries - self._project = project or (client and client.project) + self._project = project @property def project(self): """Project bound to the dataset. :rtype: str - :returns: the project (derived from the client). + :returns: the project. """ return self._project @@ -373,25 +366,21 @@ def location(self, value): self._properties['location'] = value @classmethod - def from_api_repr(cls, resource, client): + def from_api_repr(cls, resource): """Factory: construct a dataset given its API representation :type resource: dict :param resource: dataset resource representation returned from the API - :type client: :class:`google.cloud.bigquery.client.Client` - :param client: Client which holds credentials and project - configuration for the dataset. - :rtype: :class:`google.cloud.bigquery.dataset.Dataset` :returns: Dataset parsed from ``resource``. """ - if ('datasetReference' not in resource or - 'datasetId' not in resource['datasetReference']): + dsr = resource.get('datasetReference') + if dsr is None or 'datasetId' not in dsr: raise KeyError('Resource lacks required identity information:' '["datasetReference"]["datasetId"]') - dataset_id = resource['datasetReference']['datasetId'] - dataset = cls(dataset_id, client=client) + dataset_id = dsr['datasetId'] + dataset = cls(dataset_id, project=dsr['projectId']) dataset._set_properties(resource) return dataset diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/job.py b/packages/google-cloud-bigquery/google/cloud/bigquery/job.py index 1e0959565074..42f12ac39838 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/job.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/job.py @@ -842,7 +842,8 @@ def from_api_repr(cls, resource, client): """ job_id, config = cls._get_resource_config(resource) dest_config = config['destinationTable'] - dataset = Dataset(dest_config['datasetId'], client) + dataset = Dataset(dest_config['datasetId'], + project=dest_config['projectId']) table_ref = TableReference(dataset, dest_config['tableId']) destination = Table(table_ref, client=client) source_urls = config.get('sourceUris', ()) @@ -958,7 +959,8 @@ def from_api_repr(cls, resource, client): """ job_id, config = cls._get_resource_config(resource) dest_config = config['destinationTable'] - dataset = Dataset(dest_config['datasetId'], client) + dataset = Dataset(dest_config['datasetId'], + project=dest_config['projectId']) table_ref = TableReference(dataset, dest_config['tableId']) destination = Table(table_ref, client=client) sources = [] @@ -970,7 +972,8 @@ def from_api_repr(cls, resource, client): "Resource missing 'sourceTables' / 'sourceTable'") source_configs = [single] for source_config in source_configs: - dataset = Dataset(source_config['datasetId'], client) + dataset = Dataset(source_config['datasetId'], + project=source_config['projectId']) table_ref = TableReference(dataset, source_config['tableId']) sources.append(Table(table_ref, client=client)) job = cls(job_id, destination, sources, client=client) @@ -1423,8 +1426,7 @@ def _copy_configuration_properties(self, configuration): dest_local = self._destination_table_resource() if dest_remote != dest_local: project = dest_remote['projectId'] - dataset = Dataset( - dest_remote['datasetId'], self._client, project=project) + dataset = Dataset(dest_remote['datasetId'], project=project) self.destination = dataset.table(dest_remote['tableId']) def_ds = configuration.get('defaultDataset') @@ -1432,8 +1434,8 @@ def _copy_configuration_properties(self, configuration): if self.default_dataset is not None: del self.default_dataset else: - project = def_ds['projectId'] - self.default_dataset = Dataset(def_ds['datasetId'], self._client) + self.default_dataset = Dataset(def_ds['datasetId'], + project=def_ds['projectId']) udf_resources = [] for udf_mapping in configuration.get(self._UDF_KEY, ()): @@ -1579,7 +1581,6 @@ def referenced_tables(self): if the query has not yet completed. """ tables = [] - client = self._require_client(None) datasets_by_project_name = {} for table in self._job_statistics().get('referencedTables', ()): @@ -1589,7 +1590,7 @@ def referenced_tables(self): ds_name = table['datasetId'] t_dataset = datasets_by_project_name.get((t_project, ds_name)) if t_dataset is None: - t_dataset = Dataset(ds_name, client, project=t_project) + t_dataset = Dataset(ds_name, project=t_project) datasets_by_project_name[(t_project, ds_name)] = t_dataset t_name = table['tableId'] diff --git a/packages/google-cloud-bigquery/tests/system.py b/packages/google-cloud-bigquery/tests/system.py index 82763d89724b..d621a661f06d 100644 --- a/packages/google-cloud-bigquery/tests/system.py +++ b/packages/google-cloud-bigquery/tests/system.py @@ -117,11 +117,12 @@ def test_create_dataset(self): self.assertTrue(_dataset_exists(dataset)) self.assertEqual(dataset.dataset_id, DATASET_ID) + self.assertEqual(dataset.project, Config.CLIENT.project) def test_get_dataset(self): DATASET_ID = _make_dataset_id('get_dataset') client = Config.CLIENT - dataset_arg = Dataset(DATASET_ID) + dataset_arg = Dataset(DATASET_ID, project=client.project) dataset_arg.friendly_name = 'Friendly' dataset_arg.description = 'Description' dataset = retry_403(client.create_dataset)(dataset_arg) @@ -1195,7 +1196,7 @@ def test_dump_table_w_public_data(self): DATASET_ID = 'samples' TABLE_NAME = 'natality' - dataset = Dataset(DATASET_ID, Config.CLIENT, project=PUBLIC) + dataset = Dataset(DATASET_ID, project=PUBLIC) table_ref = dataset.table(TABLE_NAME) table = Config.CLIENT.get_table(table_ref) self._fetch_single_page(table) diff --git a/packages/google-cloud-bigquery/tests/unit/test_dataset.py b/packages/google-cloud-bigquery/tests/unit/test_dataset.py index c2fa2a024f17..89114196d828 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_dataset.py +++ b/packages/google-cloud-bigquery/tests/unit/test_dataset.py @@ -210,11 +210,9 @@ def _verify_resource_properties(self, dataset, resource): self.assertEqual(dataset.access_entries, []) def test_ctor_defaults(self): - client = _Client(self.PROJECT) - dataset = self._make_one(self.DS_ID, client) + dataset = self._make_one(self.DS_ID, project=self.PROJECT) self.assertEqual(dataset.dataset_id, self.DS_ID) - self.assertIs(dataset._client, client) - self.assertEqual(dataset.project, client.project) + self.assertEqual(dataset.project, self.PROJECT) self.assertEqual( dataset.path, '/projects/%s/datasets/%s' % (self.PROJECT, self.DS_ID)) @@ -238,12 +236,10 @@ def test_ctor_explicit(self): bharney = AccessEntry('OWNER', 'userByEmail', 'bharney@example.com') entries = [phred, bharney] OTHER_PROJECT = 'foo-bar-123' - client = _Client(self.PROJECT) - dataset = self._make_one(self.DS_ID, client, + dataset = self._make_one(self.DS_ID, access_entries=entries, project=OTHER_PROJECT) self.assertEqual(dataset.dataset_id, self.DS_ID) - self.assertIs(dataset._client, client) self.assertEqual(dataset.project, OTHER_PROJECT) self.assertEqual( dataset.path, @@ -262,16 +258,14 @@ def test_ctor_explicit(self): self.assertIsNone(dataset.location) def test_access_entries_setter_non_list(self): - client = _Client(self.PROJECT) - dataset = self._make_one(self.DS_ID, client) + dataset = self._make_one(self.DS_ID) with self.assertRaises(TypeError): dataset.access_entries = object() def test_access_entries_setter_invalid_field(self): from google.cloud.bigquery.dataset import AccessEntry - client = _Client(self.PROJECT) - dataset = self._make_one(self.DS_ID, client) + dataset = self._make_one(self.DS_ID) phred = AccessEntry('OWNER', 'userByEmail', 'phred@example.com') with self.assertRaises(ValueError): dataset.access_entries = [phred, object()] @@ -279,72 +273,61 @@ def test_access_entries_setter_invalid_field(self): def test_access_entries_setter(self): from google.cloud.bigquery.dataset import AccessEntry - client = _Client(self.PROJECT) - dataset = self._make_one(self.DS_ID, client) + dataset = self._make_one(self.DS_ID) phred = AccessEntry('OWNER', 'userByEmail', 'phred@example.com') bharney = AccessEntry('OWNER', 'userByEmail', 'bharney@example.com') dataset.access_entries = [phred, bharney] self.assertEqual(dataset.access_entries, [phred, bharney]) def test_default_table_expiration_ms_setter_bad_value(self): - client = _Client(self.PROJECT) - dataset = self._make_one(self.DS_ID, client) + dataset = self._make_one(self.DS_ID) with self.assertRaises(ValueError): dataset.default_table_expiration_ms = 'bogus' def test_default_table_expiration_ms_setter(self): - client = _Client(self.PROJECT) - dataset = self._make_one(self.DS_ID, client) + dataset = self._make_one(self.DS_ID) dataset.default_table_expiration_ms = 12345 self.assertEqual(dataset.default_table_expiration_ms, 12345) def test_description_setter_bad_value(self): - client = _Client(self.PROJECT) - dataset = self._make_one(self.DS_ID, client) + dataset = self._make_one(self.DS_ID) with self.assertRaises(ValueError): dataset.description = 12345 def test_description_setter(self): - client = _Client(self.PROJECT) - dataset = self._make_one(self.DS_ID, client) + dataset = self._make_one(self.DS_ID) dataset.description = 'DESCRIPTION' self.assertEqual(dataset.description, 'DESCRIPTION') def test_friendly_name_setter_bad_value(self): - client = _Client(self.PROJECT) - dataset = self._make_one(self.DS_ID, client) + dataset = self._make_one(self.DS_ID) with self.assertRaises(ValueError): dataset.friendly_name = 12345 def test_friendly_name_setter(self): - client = _Client(self.PROJECT) - dataset = self._make_one(self.DS_ID, client) + dataset = self._make_one(self.DS_ID) dataset.friendly_name = 'FRIENDLY' self.assertEqual(dataset.friendly_name, 'FRIENDLY') def test_location_setter_bad_value(self): - client = _Client(self.PROJECT) - dataset = self._make_one(self.DS_ID, client) + dataset = self._make_one(self.DS_ID) with self.assertRaises(ValueError): dataset.location = 12345 def test_location_setter(self): - client = _Client(self.PROJECT) - dataset = self._make_one(self.DS_ID, client) + dataset = self._make_one(self.DS_ID) dataset.location = 'LOCATION' self.assertEqual(dataset.location, 'LOCATION') def test_from_api_repr_missing_identity(self): self._setUpConstants() - client = _Client(self.PROJECT) RESOURCE = {} klass = self._get_target_class() with self.assertRaises(KeyError): - klass.from_api_repr(RESOURCE, client=client) + klass.from_api_repr(RESOURCE) def test_from_api_repr_bare(self): self._setUpConstants() - client = _Client(self.PROJECT) RESOURCE = { 'id': '%s:%s' % (self.PROJECT, self.DS_ID), 'datasetReference': { @@ -353,24 +336,20 @@ def test_from_api_repr_bare(self): } } klass = self._get_target_class() - dataset = klass.from_api_repr(RESOURCE, client=client) - self.assertIs(dataset._client, client) + dataset = klass.from_api_repr(RESOURCE) self._verify_resource_properties(dataset, RESOURCE) def test_from_api_repr_w_properties(self): - client = _Client(self.PROJECT) RESOURCE = self._makeResource() klass = self._get_target_class() - dataset = klass.from_api_repr(RESOURCE, client=client) - self.assertIs(dataset._client, client) + dataset = klass.from_api_repr(RESOURCE) self._verify_resource_properties(dataset, RESOURCE) def test__parse_access_entries_w_unknown_entity_type(self): ACCESS = [ {'role': 'READER', 'unknown': 'UNKNOWN'}, ] - client = _Client(self.PROJECT) - dataset = self._make_one(self.DS_ID, client=client) + dataset = self._make_one(self.DS_ID) with self.assertRaises(ValueError): dataset._parse_access_entries(ACCESS) @@ -383,24 +362,16 @@ def test__parse_access_entries_w_extra_keys(self): 'userByEmail': USER_EMAIL, }, ] - client = _Client(self.PROJECT) - dataset = self._make_one(self.DS_ID, client=client) + dataset = self._make_one(self.DS_ID) with self.assertRaises(ValueError): dataset._parse_access_entries(ACCESS) def test_table(self): from google.cloud.bigquery.table import TableReference - client = _Client(project=self.PROJECT) - dataset = self._make_one(self.DS_ID, client=client) + dataset = self._make_one(self.DS_ID, project=self.PROJECT) table = dataset.table('table_id') self.assertIsInstance(table, TableReference) self.assertEqual(table.table_id, 'table_id') self.assertEqual(table.dataset_id, self.DS_ID) self.assertEqual(table.project, self.PROJECT) - - -class _Client(object): - - def __init__(self, project='project'): - self.project = project diff --git a/packages/google-cloud-bigquery/tests/unit/test_job.py b/packages/google-cloud-bigquery/tests/unit/test_job.py index 7c662d01d8c6..470e802d1150 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_job.py +++ b/packages/google-cloud-bigquery/tests/unit/test_job.py @@ -2139,7 +2139,7 @@ def test_begin_w_bound_client(self): client = _Client(project=self.PROJECT, connection=conn) job = self._make_one(self.JOB_NAME, self.QUERY, client) - job.default_dataset = Dataset(DS_ID, client) + job.default_dataset = Dataset(DS_ID, project=self.PROJECT) job.begin() @@ -2204,7 +2204,7 @@ def test_begin_w_alternate_client(self): job = self._make_one(self.JOB_NAME, self.QUERY, client1) dataset_ref = DatasetReference(self.PROJECT, DS_ID) - dataset = Dataset(DS_ID, client1) + dataset = Dataset(DS_ID, project=self.PROJECT) table_ref = dataset_ref.table(TABLE) job.allow_large_results = True diff --git a/packages/google-cloud-bigquery/tests/unit/test_query.py b/packages/google-cloud-bigquery/tests/unit/test_query.py index 0bf0c17c3102..ee2783744c94 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_query.py +++ b/packages/google-cloud-bigquery/tests/unit/test_query.py @@ -206,7 +206,7 @@ def test_from_query_job(self): job = QueryJob( self.JOB_NAME, self.QUERY, client, udf_resources=[UDFResource("resourceUri", RESOURCE_URI)]) - dataset = job.default_dataset = Dataset(DS_NAME, client) + dataset = job.default_dataset = Dataset(DS_NAME) job.use_query_cache = True job.use_legacy_sql = True klass = self._get_target_class() @@ -744,7 +744,7 @@ def __init__(self, project='project', connection=None): def dataset(self, name): from google.cloud.bigquery.dataset import Dataset - return Dataset(name, client=self) + return Dataset(name) class _Connection(object): From b478f17698bd391555c6824ca2c27e1ee27fbd35 Mon Sep 17 00:00:00 2001 From: Jonathan Amsterdam Date: Fri, 22 Sep 2017 14:37:57 -0400 Subject: [PATCH 0284/2016] bigquery: dataset labels (#4026) Add a labels property to Dataset, gettable and settable. --- .../google/cloud/bigquery/dataset.py | 32 ++++++++++++++++++- .../google-cloud-bigquery/tests/system.py | 20 +++++++++--- .../tests/unit/test_client.py | 13 +++++++- .../tests/unit/test_dataset.py | 10 ++++++ 4 files changed, 69 insertions(+), 6 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/dataset.py b/packages/google-cloud-bigquery/google/cloud/bigquery/dataset.py index c5bed721bab4..105772261449 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/dataset.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/dataset.py @@ -171,7 +171,7 @@ def __init__(self, access_entries=(), project=None): self._dataset_id = dataset_id - self._properties = {} + self._properties = {'labels': {}} # Let the @property do validation. self.access_entries = access_entries self._project = project @@ -365,6 +365,32 @@ def location(self, value): raise ValueError("Pass a string, or None") self._properties['location'] = value + @property + def labels(self): + """Labels for the dataset. + + This method always returns a dict. To change a dataset's labels, + modify the dict, then call ``Client.update_dataset``. To delete a + label, set its value to ``None`` before updating. + + :rtype: dict, {str -> str} + :returns: A dict of the the dataset's labels. + """ + return self._properties['labels'] + + @labels.setter + def labels(self, value): + """Update labels for the dataset. + + :type value: dict, {str -> str} + :param value: new labels + + :raises: ValueError for invalid value types. + """ + if not isinstance(value, dict): + raise ValueError("Pass a dict") + self._properties['labels'] = value + @classmethod def from_api_repr(cls, resource): """Factory: construct a dataset given its API representation @@ -427,6 +453,8 @@ def _set_properties(self, api_response): if 'defaultTableExpirationMs' in cleaned: cleaned['defaultTableExpirationMs'] = int( cleaned['defaultTableExpirationMs']) + if 'labels' not in cleaned: + cleaned['labels'] = {} self._properties.update(cleaned) def _build_access_resource(self): @@ -461,6 +489,8 @@ def _build_resource(self): if len(self.access_entries) > 0: resource['access'] = self._build_access_resource() + resource['labels'] = self.labels # labels is never None + return resource def table(self, table_id): diff --git a/packages/google-cloud-bigquery/tests/system.py b/packages/google-cloud-bigquery/tests/system.py index d621a661f06d..e1fd218c8580 100644 --- a/packages/google-cloud-bigquery/tests/system.py +++ b/packages/google-cloud-bigquery/tests/system.py @@ -136,21 +136,33 @@ def test_get_dataset(self): def test_update_dataset(self): dataset = retry_403(Config.CLIENT.create_dataset)( - Dataset(_make_dataset_id('patch_dataset'))) + Dataset(_make_dataset_id('update_dataset'))) self.to_delete.append(dataset) self.assertTrue(_dataset_exists(dataset)) self.assertIsNone(dataset.friendly_name) self.assertIsNone(dataset.description) + self.assertEquals(dataset.labels, {}) + dataset.friendly_name = 'Friendly' dataset.description = 'Description' - ds2 = Config.CLIENT.update_dataset(dataset, - ['friendly_name', 'description']) + dataset.labels = {'priority': 'high', 'color': 'blue'} + ds2 = Config.CLIENT.update_dataset( + dataset, + ('friendly_name', 'description', 'labels')) self.assertEqual(ds2.friendly_name, 'Friendly') self.assertEqual(ds2.description, 'Description') + self.assertEqual(ds2.labels, {'priority': 'high', 'color': 'blue'}) - # TODO(jba): test that read-modify-write with ETag works. + ds2.labels = { + 'color': 'green', # change + 'shape': 'circle', # add + 'priority': None, # delete + } + ds3 = Config.CLIENT.update_dataset(ds2, ['labels']) + self.assertEqual(ds3.labels, {'color': 'green', 'shape': 'circle'}) + # TODO(jba): test that read-modify-write with ETag works. def test_list_datasets(self): datasets_to_create = [ 'new' + unique_resource_id(), diff --git a/packages/google-cloud-bigquery/tests/unit/test_client.py b/packages/google-cloud-bigquery/tests/unit/test_client.py index 8916e9682ac1..ec12be72efae 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_client.py +++ b/packages/google-cloud-bigquery/tests/unit/test_client.py @@ -321,6 +321,7 @@ def test_create_dataset_minimal(self): SENT = { 'datasetReference': {'projectId': PROJECT, 'datasetId': DS_ID}, + 'labels': {}, } self.assertEqual(req['data'], SENT) self.assertEqual(ds.dataset_id, DS_ID) @@ -338,6 +339,7 @@ def test_create_dataset_w_attrs(self): FRIENDLY_NAME = 'FN' LOCATION = 'US' USER_EMAIL = 'phred@example.com' + LABELS = {'color': 'red'} VIEW = { 'projectId': 'my-proj', 'datasetId': 'starry-skies', @@ -352,6 +354,7 @@ def test_create_dataset_w_attrs(self): 'friendlyName': FRIENDLY_NAME, 'location': LOCATION, 'defaultTableExpirationMs': 3600, + 'labels': LABELS, 'access': [ {'role': 'OWNER', 'userByEmail': USER_EMAIL}, {'view': VIEW}], @@ -366,6 +369,7 @@ def test_create_dataset_w_attrs(self): ds_arg.friendly_name = FRIENDLY_NAME ds_arg.default_table_expiration_ms = 3600 ds_arg.location = LOCATION + ds_arg.labels = LABELS ds = client.create_dataset(ds_arg) self.assertEqual(len(conn._requested), 1) req = conn._requested[0] @@ -381,6 +385,7 @@ def test_create_dataset_w_attrs(self): 'access': [ {'role': 'OWNER', 'userByEmail': USER_EMAIL}, {'view': VIEW}], + 'labels': LABELS, } self.assertEqual(req['data'], SENT) self.assertEqual(ds.dataset_id, DS_ID) @@ -391,6 +396,7 @@ def test_create_dataset_w_attrs(self): self.assertEqual(ds.friendly_name, FRIENDLY_NAME) self.assertEqual(ds.location, LOCATION) self.assertEqual(ds.default_table_expiration_ms, 3600) + self.assertEqual(ds.labels, LABELS) def test_get_table(self): project = 'PROJECT' @@ -439,6 +445,7 @@ def test_update_dataset(self): DESCRIPTION = 'DESCRIPTION' FRIENDLY_NAME = 'TITLE' LOCATION = 'loc' + LABELS = {'priority': 'high'} EXP = 17 RESOURCE = { 'datasetReference': @@ -448,6 +455,7 @@ def test_update_dataset(self): 'friendlyName': FRIENDLY_NAME, 'location': LOCATION, 'defaultTableExpirationMs': EXP, + 'labels': LABELS, } creds = _make_credentials() client = self._make_one(project=PROJECT, credentials=creds) @@ -457,8 +465,9 @@ def test_update_dataset(self): ds.friendly_name = FRIENDLY_NAME ds.location = LOCATION ds.default_table_expiration_ms = EXP + ds.labels = LABELS ds2 = client.update_dataset( - ds, ['description', 'friendly_name', 'location']) + ds, ['description', 'friendly_name', 'location', 'labels']) self.assertEqual(len(conn._requested), 1) req = conn._requested[0] self.assertEqual(req['method'], 'PATCH') @@ -466,6 +475,7 @@ def test_update_dataset(self): 'description': DESCRIPTION, 'friendlyName': FRIENDLY_NAME, 'location': LOCATION, + 'labels': LABELS, } self.assertEqual(req['data'], SENT) self.assertEqual(req['path'], '/' + PATH) @@ -473,6 +483,7 @@ def test_update_dataset(self): self.assertEqual(ds2.description, ds.description) self.assertEqual(ds2.friendly_name, ds.friendly_name) self.assertEqual(ds2.location, ds.location) + self.assertEqual(ds2.labels, ds.labels) # ETag becomes If-Match header. ds._properties['etag'] = 'etag' diff --git a/packages/google-cloud-bigquery/tests/unit/test_dataset.py b/packages/google-cloud-bigquery/tests/unit/test_dataset.py index 89114196d828..9d13ebb9bc4b 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_dataset.py +++ b/packages/google-cloud-bigquery/tests/unit/test_dataset.py @@ -319,6 +319,16 @@ def test_location_setter(self): dataset.location = 'LOCATION' self.assertEqual(dataset.location, 'LOCATION') + def test_labels_setter(self): + dataset = self._make_one(self.DS_ID) + dataset.labels = {'color': 'green'} + self.assertEqual(dataset.labels, {'color': 'green'}) + + def test_labels_setter_bad_value(self): + dataset = self._make_one(self.DS_ID) + with self.assertRaises(ValueError): + dataset.labels = None + def test_from_api_repr_missing_identity(self): self._setUpConstants() RESOURCE = {} From 004e66cb027306c9110cfa05d3d10d3271fe06a0 Mon Sep 17 00:00:00 2001 From: Jonathan Amsterdam Date: Fri, 22 Sep 2017 15:45:52 -0400 Subject: [PATCH 0285/2016] bigquery: Dataset constructor takes a DatasetReference (#4036) --- .../google/cloud/bigquery/client.py | 4 - .../google/cloud/bigquery/dataset.py | 30 +++---- .../google/cloud/bigquery/job.py | 32 +++---- .../google-cloud-bigquery/tests/system.py | 86 ++++++------------- .../tests/unit/test_client.py | 12 +-- .../tests/unit/test_dataset.py | 50 ++++++----- .../tests/unit/test_job.py | 4 +- .../tests/unit/test_query.py | 13 +-- 8 files changed, 100 insertions(+), 131 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py index f40904c2057e..1bd46e407968 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py @@ -185,8 +185,6 @@ def create_dataset(self, dataset): :rtype: ":class:`~google.cloud.bigquery.dataset.Dataset`" :returns: a new ``Dataset`` returned from the service. """ - if dataset.project is None: - dataset._project = self.project path = '/projects/%s/datasets' % (dataset.project,) api_response = self._connection.api_request( method='POST', path=path, data=dataset._build_resource()) @@ -244,8 +242,6 @@ def update_dataset(self, dataset, fields): :rtype: :class:`google.cloud.bigquery.dataset.Dataset` :returns: the modified ``Dataset`` instance """ - if dataset.project is None: - dataset._project = self.project path = '/projects/%s/datasets/%s' % (dataset.project, dataset.dataset_id) partial = {} diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/dataset.py b/packages/google-cloud-bigquery/google/cloud/bigquery/dataset.py index 105772261449..fc641c3a4d58 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/dataset.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/dataset.py @@ -106,6 +106,10 @@ class DatasetReference(object): """ def __init__(self, project, dataset_id): + if not isinstance(project, six.string_types): + raise ValueError("Pass a string for project") + if not isinstance(dataset_id, six.string_types): + raise ValueError("Pass a string for dataset_id") self._project = project self._dataset_id = dataset_id @@ -154,27 +158,15 @@ class Dataset(object): See https://cloud.google.com/bigquery/docs/reference/rest/v2/datasets - :type dataset_id: str - :param dataset_id: the ID of the dataset - - :type access_entries: list of :class:`AccessEntry` - :param access_entries: roles granted to entities for this dataset - - :type project: str - :param project: (Optional) project ID for the dataset. + :type dataset_ref: :class:`~google.cloud.bigquery.dataset.DatasetReference` + :param dataset_ref: a pointer to a dataset """ - _access_entries = None - - def __init__(self, - dataset_id, - access_entries=(), - project=None): - self._dataset_id = dataset_id + def __init__(self, dataset_ref): + self._project = dataset_ref.project + self._dataset_id = dataset_ref.dataset_id self._properties = {'labels': {}} - # Let the @property do validation. - self.access_entries = access_entries - self._project = project + self._access_entries = () @property def project(self): @@ -406,7 +398,7 @@ def from_api_repr(cls, resource): raise KeyError('Resource lacks required identity information:' '["datasetReference"]["datasetId"]') dataset_id = dsr['datasetId'] - dataset = cls(dataset_id, project=dsr['projectId']) + dataset = cls(DatasetReference(dsr['projectId'], dataset_id)) dataset._set_properties(resource) return dataset diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/job.py b/packages/google-cloud-bigquery/google/cloud/bigquery/job.py index 42f12ac39838..84cca80e22a0 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/job.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/job.py @@ -842,8 +842,9 @@ def from_api_repr(cls, resource, client): """ job_id, config = cls._get_resource_config(resource) dest_config = config['destinationTable'] - dataset = Dataset(dest_config['datasetId'], - project=dest_config['projectId']) + ds_ref = DatasetReference(dest_config['projectId'], + dest_config['datasetId'],) + dataset = Dataset(ds_ref) table_ref = TableReference(dataset, dest_config['tableId']) destination = Table(table_ref, client=client) source_urls = config.get('sourceUris', ()) @@ -959,8 +960,9 @@ def from_api_repr(cls, resource, client): """ job_id, config = cls._get_resource_config(resource) dest_config = config['destinationTable'] - dataset = Dataset(dest_config['datasetId'], - project=dest_config['projectId']) + ds_ref = DatasetReference(dest_config['projectId'], + dest_config['datasetId'],) + dataset = Dataset(ds_ref) table_ref = TableReference(dataset, dest_config['tableId']) destination = Table(table_ref, client=client) sources = [] @@ -972,9 +974,9 @@ def from_api_repr(cls, resource, client): "Resource missing 'sourceTables' / 'sourceTable'") source_configs = [single] for source_config in source_configs: - dataset = Dataset(source_config['datasetId'], - project=source_config['projectId']) - table_ref = TableReference(dataset, source_config['tableId']) + ds_ref = DatasetReference(source_config['projectId'], + source_config['datasetId']) + table_ref = ds_ref.table(source_config['tableId']) sources.append(Table(table_ref, client=client)) job = cls(job_id, destination, sources, client=client) job._set_properties(resource) @@ -1426,7 +1428,8 @@ def _copy_configuration_properties(self, configuration): dest_local = self._destination_table_resource() if dest_remote != dest_local: project = dest_remote['projectId'] - dataset = Dataset(dest_remote['datasetId'], project=project) + dataset = Dataset(DatasetReference(project, + dest_remote['datasetId'])) self.destination = dataset.table(dest_remote['tableId']) def_ds = configuration.get('defaultDataset') @@ -1434,9 +1437,8 @@ def _copy_configuration_properties(self, configuration): if self.default_dataset is not None: del self.default_dataset else: - self.default_dataset = Dataset(def_ds['datasetId'], - project=def_ds['projectId']) - + self.default_dataset = Dataset( + DatasetReference(def_ds['projectId'], def_ds['datasetId'])) udf_resources = [] for udf_mapping in configuration.get(self._UDF_KEY, ()): key_val, = udf_mapping.items() @@ -1587,11 +1589,11 @@ def referenced_tables(self): t_project = table['projectId'] - ds_name = table['datasetId'] - t_dataset = datasets_by_project_name.get((t_project, ds_name)) + ds_id = table['datasetId'] + t_dataset = datasets_by_project_name.get((t_project, ds_id)) if t_dataset is None: - t_dataset = Dataset(ds_name, project=t_project) - datasets_by_project_name[(t_project, ds_name)] = t_dataset + t_dataset = DatasetReference(t_project, ds_id) + datasets_by_project_name[(t_project, ds_id)] = t_dataset t_name = table['tableId'] tables.append(t_dataset.table(t_name)) diff --git a/packages/google-cloud-bigquery/tests/system.py b/packages/google-cloud-bigquery/tests/system.py index e1fd218c8580..de19a7da01c5 100644 --- a/packages/google-cloud-bigquery/tests/system.py +++ b/packages/google-cloud-bigquery/tests/system.py @@ -112,8 +112,7 @@ def _still_in_use(bad_request): def test_create_dataset(self): DATASET_ID = _make_dataset_id('create_dataset') - dataset = retry_403(Config.CLIENT.create_dataset)(Dataset(DATASET_ID)) - self.to_delete.append(dataset) + dataset = self.temp_dataset(DATASET_ID) self.assertTrue(_dataset_exists(dataset)) self.assertEqual(dataset.dataset_id, DATASET_ID) @@ -122,7 +121,7 @@ def test_create_dataset(self): def test_get_dataset(self): DATASET_ID = _make_dataset_id('get_dataset') client = Config.CLIENT - dataset_arg = Dataset(DATASET_ID, project=client.project) + dataset_arg = Dataset(client.dataset(DATASET_ID)) dataset_arg.friendly_name = 'Friendly' dataset_arg.description = 'Description' dataset = retry_403(client.create_dataset)(dataset_arg) @@ -135,10 +134,7 @@ def test_get_dataset(self): self.assertEqual(got.description, 'Description') def test_update_dataset(self): - dataset = retry_403(Config.CLIENT.create_dataset)( - Dataset(_make_dataset_id('update_dataset'))) - self.to_delete.append(dataset) - + dataset = self.temp_dataset(_make_dataset_id('update_dataset')) self.assertTrue(_dataset_exists(dataset)) self.assertIsNone(dataset.friendly_name) self.assertIsNone(dataset.description) @@ -163,6 +159,7 @@ def test_update_dataset(self): self.assertEqual(ds3.labels, {'color': 'green', 'shape': 'circle'}) # TODO(jba): test that read-modify-write with ETag works. + def test_list_datasets(self): datasets_to_create = [ 'new' + unique_resource_id(), @@ -170,9 +167,7 @@ def test_list_datasets(self): 'newest' + unique_resource_id(), ] for dataset_id in datasets_to_create: - created_dataset = retry_403(Config.CLIENT.create_dataset)( - Dataset(dataset_id)) - self.to_delete.append(created_dataset) + self.temp_dataset(dataset_id) # Retrieve the datasets. iterator = Config.CLIENT.list_datasets() @@ -184,9 +179,7 @@ def test_list_datasets(self): self.assertEqual(len(created), len(datasets_to_create)) def test_create_table(self): - dataset = retry_403(Config.CLIENT.create_dataset)( - Dataset(_make_dataset_id('create_table'))) - self.to_delete.append(dataset) + dataset = self.temp_dataset(_make_dataset_id('create_table')) TABLE_NAME = 'test_table' full_name = bigquery.SchemaField('full_name', 'STRING', @@ -217,9 +210,7 @@ def test_get_table_w_public_dataset(self): def test_list_dataset_tables(self): DATASET_ID = _make_dataset_id('list_tables') - dataset = retry_403(Config.CLIENT.create_dataset)(Dataset(DATASET_ID)) - self.to_delete.append(dataset) - + dataset = self.temp_dataset(DATASET_ID) # Retrieve tables before any are created for the dataset. iterator = Config.CLIENT.list_dataset_tables(dataset) all_tables = list(iterator) @@ -252,9 +243,7 @@ def test_list_dataset_tables(self): self.assertEqual(len(created), len(tables_to_create)) def test_patch_table(self): - dataset = retry_403(Config.CLIENT.create_dataset)( - Dataset(_make_dataset_id('patch_table'))) - self.to_delete.append(dataset) + dataset = self.temp_dataset(_make_dataset_id('patch_table')) TABLE_NAME = 'test_table' full_name = bigquery.SchemaField('full_name', 'STRING', @@ -273,9 +262,7 @@ def test_patch_table(self): self.assertEqual(table.description, 'Description') def test_update_table(self): - dataset = retry_403(Config.CLIENT.create_dataset)( - Dataset(_make_dataset_id('update_table'))) - self.to_delete.append(dataset) + dataset = self.temp_dataset(_make_dataset_id('update_table')) TABLE_NAME = 'test_table' full_name = bigquery.SchemaField('full_name', 'STRING', @@ -316,10 +303,7 @@ def test_insert_data_then_dump_table(self): ] ROW_IDS = range(len(ROWS)) - dataset = retry_403(Config.CLIENT.create_dataset)( - Dataset(_make_dataset_id('insert_data_then_dump'))) - self.to_delete.append(dataset) - + dataset = self.temp_dataset(_make_dataset_id('insert_data_then_dump')) TABLE_NAME = 'test_table' full_name = bigquery.SchemaField('full_name', 'STRING', mode='REQUIRED') @@ -358,10 +342,7 @@ def test_load_table_from_local_file_then_dump_table(self): ] TABLE_NAME = 'test_table' - dataset = retry_403(Config.CLIENT.create_dataset)( - Dataset(_make_dataset_id('load_local_then_dump'))) - self.to_delete.append(dataset) - + dataset = self.temp_dataset(_make_dataset_id('load_local_then_dump')) full_name = bigquery.SchemaField('full_name', 'STRING', mode='REQUIRED') age = bigquery.SchemaField('age', 'INTEGER', mode='REQUIRED') @@ -406,10 +387,7 @@ def test_load_table_from_local_avro_file_then_dump_table(self): ("orange", 590), ("red", 650)] - dataset = retry_403(Config.CLIENT.create_dataset)( - Dataset(_make_dataset_id('load_local_then_dump'))) - self.to_delete.append(dataset) - + dataset = self.temp_dataset(_make_dataset_id('load_local_then_dump')) table = Table(dataset.table(TABLE_NAME), client=Config.CLIENT) self.to_delete.insert(0, table) @@ -467,9 +445,7 @@ def test_load_table_from_storage_then_dump_table(self): self.to_delete.insert(0, blob) - dataset = retry_403(Config.CLIENT.create_dataset)( - Dataset(_make_dataset_id('load_gcs_then_dump'))) - self.to_delete.append(dataset) + dataset = self.temp_dataset(_make_dataset_id('load_gcs_then_dump')) full_name = bigquery.SchemaField('full_name', 'STRING', mode='REQUIRED') @@ -536,10 +512,7 @@ def test_load_table_from_storage_w_autodetect_schema(self): self.to_delete.insert(0, blob) - dataset = retry_403(Config.CLIENT.create_dataset)( - Dataset(_make_dataset_id('load_gcs_then_dump'))) - self.to_delete.append(dataset) - + dataset = self.temp_dataset(_make_dataset_id('load_gcs_then_dump')) table_ref = dataset.table(table_name) job = Config.CLIENT.load_table_from_storage( @@ -589,9 +562,7 @@ def _load_table_for_extract_table( blob.upload_from_file(csv_read, content_type='text/csv') self.to_delete.insert(0, blob) - dataset = retry_403(Config.CLIENT.create_dataset)( - Dataset(table.dataset_id)) - self.to_delete.append(dataset) + dataset = self.temp_dataset(table.dataset_id) table_ref = dataset.table(table.table_id) job = Config.CLIENT.load_table_from_storage( 'bq_extract_storage_test_' + local_id, table_ref, gs_url) @@ -676,8 +647,7 @@ def test_job_cancel(self): TABLE_NAME = 'test_table' QUERY = 'SELECT * FROM %s.%s' % (DATASET_ID, TABLE_NAME) - dataset = retry_403(Config.CLIENT.create_dataset)(Dataset(DATASET_ID)) - self.to_delete.append(dataset) + dataset = self.temp_dataset(DATASET_ID) full_name = bigquery.SchemaField('full_name', 'STRING', mode='REQUIRED') @@ -866,9 +836,7 @@ def test_dbapi_fetchall(self): def _load_table_for_dml(self, rows, dataset_id, table_id): from google.cloud._testing import _NamedTemporaryFile - dataset = retry_403(Config.CLIENT.create_dataset)(Dataset(dataset_id)) - self.to_delete.append(dataset) - + dataset = self.temp_dataset(dataset_id) greeting = bigquery.SchemaField( 'greeting', 'STRING', mode='NULLABLE') table = Table(dataset.table(table_id), schema=[greeting], @@ -1208,8 +1176,7 @@ def test_dump_table_w_public_data(self): DATASET_ID = 'samples' TABLE_NAME = 'natality' - dataset = Dataset(DATASET_ID, project=PUBLIC) - table_ref = dataset.table(TABLE_NAME) + table_ref = DatasetReference(PUBLIC, DATASET_ID).table(TABLE_NAME) table = Config.CLIENT.get_table(table_ref) self._fetch_single_page(table) @@ -1260,10 +1227,7 @@ def test_insert_nested_nested(self): ('Some value', record) ] table_name = 'test_table' - dataset = retry_403(Config.CLIENT.create_dataset)( - Dataset(_make_dataset_id('issue_2951'))) - self.to_delete.append(dataset) - + dataset = self.temp_dataset(_make_dataset_id('issue_2951')) table = Table(dataset.table(table_name), schema=schema, client=Config.CLIENT) table.create() @@ -1278,10 +1242,8 @@ def test_insert_nested_nested(self): def test_create_table_insert_fetch_nested_schema(self): table_name = 'test_table' - dataset = retry_403(Config.CLIENT.create_dataset)( - Dataset(_make_dataset_id('create_table_nested_schema'))) - self.to_delete.append(dataset) - + dataset = self.temp_dataset( + _make_dataset_id('create_table_nested_schema')) schema = _load_json_schema() table = Table(dataset.table(table_name), schema=schema, client=Config.CLIENT) @@ -1339,6 +1301,12 @@ def test_create_table_insert_fetch_nested_schema(self): e_favtime = datetime.datetime(*parts[0:6]) self.assertEqual(found[7], e_favtime) # FavoriteTime + def temp_dataset(self, dataset_id): + dataset = retry_403(Config.CLIENT.create_dataset)( + Dataset(Config.CLIENT.dataset(dataset_id))) + self.to_delete.append(dataset) + return dataset + def _job_done(instance): return instance.state.lower() == 'done' diff --git a/packages/google-cloud-bigquery/tests/unit/test_client.py b/packages/google-cloud-bigquery/tests/unit/test_client.py index ec12be72efae..50c324ebfc32 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_client.py +++ b/packages/google-cloud-bigquery/tests/unit/test_client.py @@ -313,7 +313,7 @@ def test_create_dataset_minimal(self): creds = _make_credentials() client = self._make_one(project=PROJECT, credentials=creds) conn = client._connection = _Connection(RESOURCE) - ds = client.create_dataset(Dataset(DS_ID)) + ds = client.create_dataset(Dataset(client.dataset(DS_ID))) self.assertEqual(len(conn._requested), 1) req = conn._requested[0] self.assertEqual(req['method'], 'POST') @@ -364,7 +364,8 @@ def test_create_dataset_w_attrs(self): conn = client._connection = _Connection(RESOURCE) entries = [AccessEntry('OWNER', 'userByEmail', USER_EMAIL), AccessEntry(None, 'view', VIEW)] - ds_arg = Dataset(DS_ID, project=PROJECT, access_entries=entries) + ds_arg = Dataset(client.dataset(DS_ID)) + ds_arg.access_entries = entries ds_arg.description = DESCRIPTION ds_arg.friendly_name = FRIENDLY_NAME ds_arg.default_table_expiration_ms = 3600 @@ -434,7 +435,7 @@ def test_update_dataset_w_invalid_field(self): creds = _make_credentials() client = self._make_one(project=PROJECT, credentials=creds) with self.assertRaises(ValueError): - client.update_dataset(Dataset(DS_ID), ["foo"]) + client.update_dataset(Dataset(client.dataset(DS_ID)), ["foo"]) def test_update_dataset(self): from google.cloud.bigquery.dataset import Dataset @@ -460,7 +461,7 @@ def test_update_dataset(self): creds = _make_credentials() client = self._make_one(project=PROJECT, credentials=creds) conn = client._connection = _Connection(RESOURCE, RESOURCE) - ds = Dataset(DS_ID, project=PROJECT) + ds = Dataset(client.dataset(DS_ID)) ds.description = DESCRIPTION ds.friendly_name = FRIENDLY_NAME ds.location = LOCATION @@ -636,7 +637,8 @@ def test_delete_dataset(self): creds = _make_credentials() client = self._make_one(project=PROJECT, credentials=creds) conn = client._connection = _Connection({}, {}) - for arg in (client.dataset(DS_ID), Dataset(DS_ID, project=PROJECT)): + ds_ref = client.dataset(DS_ID) + for arg in (ds_ref, Dataset(ds_ref)): client.delete_dataset(arg) req = conn._requested[0] self.assertEqual(req['method'], 'DELETE') diff --git a/packages/google-cloud-bigquery/tests/unit/test_dataset.py b/packages/google-cloud-bigquery/tests/unit/test_dataset.py index 9d13ebb9bc4b..ced77990a65d 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_dataset.py +++ b/packages/google-cloud-bigquery/tests/unit/test_dataset.py @@ -101,6 +101,12 @@ def test_ctor_defaults(self): self.assertEqual(dataset_ref.project, 'some-project-1') self.assertEqual(dataset_ref.dataset_id, 'dataset_1') + def test_ctor_bad_args(self): + with self.assertRaises(ValueError): + self._make_one(1, 'd') + with self.assertRaises(ValueError): + self._make_one('p', 2) + def test_table(self): dataset_ref = self._make_one('some-project-1', 'dataset_1') table_ref = dataset_ref.table('table_1') @@ -110,8 +116,11 @@ def test_table(self): class TestDataset(unittest.TestCase): + from google.cloud.bigquery.dataset import DatasetReference + PROJECT = 'project' DS_ID = 'dataset-id' + DS_REF = DatasetReference(PROJECT, DS_ID) @staticmethod def _get_target_class(): @@ -210,7 +219,7 @@ def _verify_resource_properties(self, dataset, resource): self.assertEqual(dataset.access_entries, []) def test_ctor_defaults(self): - dataset = self._make_one(self.DS_ID, project=self.PROJECT) + dataset = self._make_one(self.DS_REF) self.assertEqual(dataset.dataset_id, self.DS_ID) self.assertEqual(dataset.project, self.PROJECT) self.assertEqual( @@ -230,15 +239,14 @@ def test_ctor_defaults(self): self.assertIsNone(dataset.location) def test_ctor_explicit(self): - from google.cloud.bigquery.dataset import AccessEntry + from google.cloud.bigquery.dataset import DatasetReference, AccessEntry phred = AccessEntry('OWNER', 'userByEmail', 'phred@example.com') bharney = AccessEntry('OWNER', 'userByEmail', 'bharney@example.com') entries = [phred, bharney] OTHER_PROJECT = 'foo-bar-123' - dataset = self._make_one(self.DS_ID, - access_entries=entries, - project=OTHER_PROJECT) + dataset = self._make_one(DatasetReference(OTHER_PROJECT, self.DS_ID)) + dataset.access_entries = entries self.assertEqual(dataset.dataset_id, self.DS_ID) self.assertEqual(dataset.project, OTHER_PROJECT) self.assertEqual( @@ -258,14 +266,14 @@ def test_ctor_explicit(self): self.assertIsNone(dataset.location) def test_access_entries_setter_non_list(self): - dataset = self._make_one(self.DS_ID) + dataset = self._make_one(self.DS_REF) with self.assertRaises(TypeError): dataset.access_entries = object() def test_access_entries_setter_invalid_field(self): from google.cloud.bigquery.dataset import AccessEntry - dataset = self._make_one(self.DS_ID) + dataset = self._make_one(self.DS_REF) phred = AccessEntry('OWNER', 'userByEmail', 'phred@example.com') with self.assertRaises(ValueError): dataset.access_entries = [phred, object()] @@ -273,59 +281,59 @@ def test_access_entries_setter_invalid_field(self): def test_access_entries_setter(self): from google.cloud.bigquery.dataset import AccessEntry - dataset = self._make_one(self.DS_ID) + dataset = self._make_one(self.DS_REF) phred = AccessEntry('OWNER', 'userByEmail', 'phred@example.com') bharney = AccessEntry('OWNER', 'userByEmail', 'bharney@example.com') dataset.access_entries = [phred, bharney] self.assertEqual(dataset.access_entries, [phred, bharney]) def test_default_table_expiration_ms_setter_bad_value(self): - dataset = self._make_one(self.DS_ID) + dataset = self._make_one(self.DS_REF) with self.assertRaises(ValueError): dataset.default_table_expiration_ms = 'bogus' def test_default_table_expiration_ms_setter(self): - dataset = self._make_one(self.DS_ID) + dataset = self._make_one(self.DS_REF) dataset.default_table_expiration_ms = 12345 self.assertEqual(dataset.default_table_expiration_ms, 12345) def test_description_setter_bad_value(self): - dataset = self._make_one(self.DS_ID) + dataset = self._make_one(self.DS_REF) with self.assertRaises(ValueError): dataset.description = 12345 def test_description_setter(self): - dataset = self._make_one(self.DS_ID) + dataset = self._make_one(self.DS_REF) dataset.description = 'DESCRIPTION' self.assertEqual(dataset.description, 'DESCRIPTION') def test_friendly_name_setter_bad_value(self): - dataset = self._make_one(self.DS_ID) + dataset = self._make_one(self.DS_REF) with self.assertRaises(ValueError): dataset.friendly_name = 12345 def test_friendly_name_setter(self): - dataset = self._make_one(self.DS_ID) + dataset = self._make_one(self.DS_REF) dataset.friendly_name = 'FRIENDLY' self.assertEqual(dataset.friendly_name, 'FRIENDLY') def test_location_setter_bad_value(self): - dataset = self._make_one(self.DS_ID) + dataset = self._make_one(self.DS_REF) with self.assertRaises(ValueError): dataset.location = 12345 def test_location_setter(self): - dataset = self._make_one(self.DS_ID) + dataset = self._make_one(self.DS_REF) dataset.location = 'LOCATION' self.assertEqual(dataset.location, 'LOCATION') def test_labels_setter(self): - dataset = self._make_one(self.DS_ID) + dataset = self._make_one(self.DS_REF) dataset.labels = {'color': 'green'} self.assertEqual(dataset.labels, {'color': 'green'}) def test_labels_setter_bad_value(self): - dataset = self._make_one(self.DS_ID) + dataset = self._make_one(self.DS_REF) with self.assertRaises(ValueError): dataset.labels = None @@ -359,7 +367,7 @@ def test__parse_access_entries_w_unknown_entity_type(self): ACCESS = [ {'role': 'READER', 'unknown': 'UNKNOWN'}, ] - dataset = self._make_one(self.DS_ID) + dataset = self._make_one(self.DS_REF) with self.assertRaises(ValueError): dataset._parse_access_entries(ACCESS) @@ -372,14 +380,14 @@ def test__parse_access_entries_w_extra_keys(self): 'userByEmail': USER_EMAIL, }, ] - dataset = self._make_one(self.DS_ID) + dataset = self._make_one(self.DS_REF) with self.assertRaises(ValueError): dataset._parse_access_entries(ACCESS) def test_table(self): from google.cloud.bigquery.table import TableReference - dataset = self._make_one(self.DS_ID, project=self.PROJECT) + dataset = self._make_one(self.DS_REF) table = dataset.table('table_id') self.assertIsInstance(table, TableReference) self.assertEqual(table.table_id, 'table_id') diff --git a/packages/google-cloud-bigquery/tests/unit/test_job.py b/packages/google-cloud-bigquery/tests/unit/test_job.py index 470e802d1150..029db44cd534 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_job.py +++ b/packages/google-cloud-bigquery/tests/unit/test_job.py @@ -2139,7 +2139,7 @@ def test_begin_w_bound_client(self): client = _Client(project=self.PROJECT, connection=conn) job = self._make_one(self.JOB_NAME, self.QUERY, client) - job.default_dataset = Dataset(DS_ID, project=self.PROJECT) + job.default_dataset = Dataset(DatasetReference(self.PROJECT, DS_ID)) job.begin() @@ -2204,7 +2204,7 @@ def test_begin_w_alternate_client(self): job = self._make_one(self.JOB_NAME, self.QUERY, client1) dataset_ref = DatasetReference(self.PROJECT, DS_ID) - dataset = Dataset(DS_ID, project=self.PROJECT) + dataset = Dataset(dataset_ref) table_ref = dataset_ref.table(TABLE) job.allow_large_results = True diff --git a/packages/google-cloud-bigquery/tests/unit/test_query.py b/packages/google-cloud-bigquery/tests/unit/test_query.py index ee2783744c94..73f23cb1bf6a 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_query.py +++ b/packages/google-cloud-bigquery/tests/unit/test_query.py @@ -196,17 +196,18 @@ def test_ctor_w_query_parameters(self): self.assertEqual(query.query_parameters, query_parameters) def test_from_query_job(self): - from google.cloud.bigquery.dataset import Dataset + from google.cloud.bigquery.dataset import Dataset, DatasetReference from google.cloud.bigquery.job import QueryJob from google.cloud.bigquery._helpers import UDFResource - DS_NAME = 'DATASET' + DS_ID = 'DATASET' RESOURCE_URI = 'gs://some-bucket/js/lib.js' client = _Client(self.PROJECT) job = QueryJob( self.JOB_NAME, self.QUERY, client, udf_resources=[UDFResource("resourceUri", RESOURCE_URI)]) - dataset = job.default_dataset = Dataset(DS_NAME) + dataset = Dataset(DatasetReference(self.PROJECT, DS_ID)) + job.default_dataset = dataset job.use_query_cache = True job.use_legacy_sql = True klass = self._get_target_class() @@ -741,10 +742,10 @@ def __init__(self, project='project', connection=None): self.project = project self._connection = connection - def dataset(self, name): - from google.cloud.bigquery.dataset import Dataset + def dataset(self, dataset_id): + from google.cloud.bigquery.dataset import Dataset, DatasetReference - return Dataset(name) + return Dataset(DatasetReference(self.project, dataset_id)) class _Connection(object): From d3422f5f238a76ec14c263e8d5f49f03ba0a1611 Mon Sep 17 00:00:00 2001 From: Jonathan Amsterdam Date: Fri, 22 Sep 2017 16:52:21 -0400 Subject: [PATCH 0286/2016] bigquery: Job and Query default_dataset is DatasetReference (#4037) Change the type of the Job.default_dataset and Query.default_dataset properties to DatasetReference. --- .../google/cloud/bigquery/job.py | 6 +++--- .../google/cloud/bigquery/query.py | 4 ++-- .../google-cloud-bigquery/tests/unit/test_job.py | 14 ++++++-------- .../google-cloud-bigquery/tests/unit/test_query.py | 12 ++++++------ 4 files changed, 17 insertions(+), 19 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/job.py b/packages/google-cloud-bigquery/google/cloud/bigquery/job.py index 84cca80e22a0..a79fc8e53d20 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/job.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/job.py @@ -1248,7 +1248,7 @@ def __init__(self, job_id, query, client, https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query.createDisposition """ - default_dataset = _TypedProperty('default_dataset', Dataset) + default_dataset = _TypedProperty('default_dataset', DatasetReference) """See https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query.defaultDataset """ @@ -1437,8 +1437,8 @@ def _copy_configuration_properties(self, configuration): if self.default_dataset is not None: del self.default_dataset else: - self.default_dataset = Dataset( - DatasetReference(def_ds['projectId'], def_ds['datasetId'])) + self.default_dataset = DatasetReference( + def_ds['projectId'], def_ds['datasetId']) udf_resources = [] for udf_mapping in configuration.get(self._UDF_KEY, ()): key_val, = udf_mapping.items() diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/query.py b/packages/google-cloud-bigquery/google/cloud/bigquery/query.py index 7abbbec76b9b..38400659bdaf 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/query.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/query.py @@ -19,7 +19,7 @@ from google.api.core import page_iterator from google.cloud.bigquery._helpers import _TypedProperty from google.cloud.bigquery._helpers import _rows_from_json -from google.cloud.bigquery.dataset import Dataset +from google.cloud.bigquery.dataset import DatasetReference from google.cloud.bigquery.job import QueryJob from google.cloud.bigquery.table import _parse_schema_resource from google.cloud.bigquery._helpers import QueryParametersProperty @@ -273,7 +273,7 @@ def schema(self): """ return _parse_schema_resource(self._properties.get('schema', {})) - default_dataset = _TypedProperty('default_dataset', Dataset) + default_dataset = _TypedProperty('default_dataset', DatasetReference) """See https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs/query#defaultDataset """ diff --git a/packages/google-cloud-bigquery/tests/unit/test_job.py b/packages/google-cloud-bigquery/tests/unit/test_job.py index 029db44cd534..ca348704127c 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_job.py +++ b/packages/google-cloud-bigquery/tests/unit/test_job.py @@ -1608,10 +1608,10 @@ def _verifyResourceProperties(self, job, resource): else: self.assertIsNone(job.create_disposition) if 'defaultDataset' in query_config: - dataset = job.default_dataset + ds_ref = job.default_dataset ds_ref = { - 'projectId': dataset.project, - 'datasetId': dataset.dataset_id, + 'projectId': ds_ref.project, + 'datasetId': ds_ref.dataset_id, } self.assertEqual(ds_ref, query_config['defaultDataset']) else: @@ -2125,7 +2125,7 @@ def test_result_error(self): self.assertEqual(exc_info.exception.code, http_client.BAD_REQUEST) def test_begin_w_bound_client(self): - from google.cloud.bigquery.dataset import Dataset + from google.cloud.bigquery.dataset import DatasetReference PATH = '/projects/%s/jobs' % (self.PROJECT,) DS_ID = 'DATASET' @@ -2139,7 +2139,7 @@ def test_begin_w_bound_client(self): client = _Client(project=self.PROJECT, connection=conn) job = self._make_one(self.JOB_NAME, self.QUERY, client) - job.default_dataset = Dataset(DatasetReference(self.PROJECT, DS_ID)) + job.default_dataset = DatasetReference(self.PROJECT, DS_ID) job.begin() @@ -2168,7 +2168,6 @@ def test_begin_w_bound_client(self): self._verifyResourceProperties(job, RESOURCE) def test_begin_w_alternate_client(self): - from google.cloud.bigquery.dataset import Dataset from google.cloud.bigquery.dataset import DatasetReference PATH = '/projects/%s/jobs' % (self.PROJECT,) @@ -2204,12 +2203,11 @@ def test_begin_w_alternate_client(self): job = self._make_one(self.JOB_NAME, self.QUERY, client1) dataset_ref = DatasetReference(self.PROJECT, DS_ID) - dataset = Dataset(dataset_ref) table_ref = dataset_ref.table(TABLE) job.allow_large_results = True job.create_disposition = 'CREATE_NEVER' - job.default_dataset = dataset + job.default_dataset = dataset_ref job.destination = table_ref job.flatten_results = True job.priority = 'INTERACTIVE' diff --git a/packages/google-cloud-bigquery/tests/unit/test_query.py b/packages/google-cloud-bigquery/tests/unit/test_query.py index 73f23cb1bf6a..9340689315a7 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_query.py +++ b/packages/google-cloud-bigquery/tests/unit/test_query.py @@ -196,7 +196,7 @@ def test_ctor_w_query_parameters(self): self.assertEqual(query.query_parameters, query_parameters) def test_from_query_job(self): - from google.cloud.bigquery.dataset import Dataset, DatasetReference + from google.cloud.bigquery.dataset import DatasetReference from google.cloud.bigquery.job import QueryJob from google.cloud.bigquery._helpers import UDFResource @@ -206,8 +206,8 @@ def test_from_query_job(self): job = QueryJob( self.JOB_NAME, self.QUERY, client, udf_resources=[UDFResource("resourceUri", RESOURCE_URI)]) - dataset = Dataset(DatasetReference(self.PROJECT, DS_ID)) - job.default_dataset = dataset + ds_ref = DatasetReference(self.PROJECT, DS_ID) + job.default_dataset = ds_ref job.use_query_cache = True job.use_legacy_sql = True klass = self._get_target_class() @@ -219,7 +219,7 @@ def test_from_query_job(self): self.assertIs(query._client, client) self.assertIs(query._job, job) self.assertEqual(query.udf_resources, job.udf_resources) - self.assertIs(query.default_dataset, dataset) + self.assertIs(query.default_dataset, ds_ref) self.assertTrue(query.use_query_cache) self.assertTrue(query.use_legacy_sql) @@ -743,9 +743,9 @@ def __init__(self, project='project', connection=None): self._connection = connection def dataset(self, dataset_id): - from google.cloud.bigquery.dataset import Dataset, DatasetReference + from google.cloud.bigquery.dataset import DatasetReference - return Dataset(DatasetReference(self.project, dataset_id)) + return DatasetReference(self.project, dataset_id) class _Connection(object): From 288978e73e04aadb229d7726032aa2f745ee94c1 Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Fri, 22 Sep 2017 14:27:07 -0700 Subject: [PATCH 0287/2016] BigQuery: add to/from API representation for Table & Dataset references. (#4020) * BigQuery: add to/from API representation for Table & Dataset references. Also, implement equality and hashing for Table & Dataset references. This will make it easier to use the TableReference and DatasetReference classes as typed properties in the QueryJob and other job classes. * Fix lint errors. * Replace unique-ly with uniquely. --- .../google/cloud/bigquery/dataset.py | 39 ++++++++ .../google/cloud/bigquery/schema.py | 2 +- .../google/cloud/bigquery/table.py | 57 ++++++++++++ .../tests/unit/test_dataset.py | 64 +++++++++++++ .../tests/unit/test_table.py | 92 +++++++++++++++++++ 5 files changed, 253 insertions(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/dataset.py b/packages/google-cloud-bigquery/google/cloud/bigquery/dataset.py index fc641c3a4d58..e464fcfb93bd 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/dataset.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/dataset.py @@ -151,6 +151,45 @@ def table(self, table_id): """ return TableReference(self, table_id) + @classmethod + def from_api_repr(cls, resource): + project = resource['projectId'] + dataset_id = resource['datasetId'] + return cls(project, dataset_id) + + def to_api_repr(self): + return { + 'projectId': self._project, + 'datasetId': self._dataset_id, + } + + def _key(self): + """A tuple key that uniquely describes this field. + + Used to compute this instance's hashcode and evaluate equality. + + Returns: + tuple: The contents of this :class:`DatasetReference`. + """ + return ( + self._project, + self._dataset_id, + ) + + def __eq__(self, other): + if not isinstance(other, DatasetReference): + return NotImplemented + return self._key() == other._key() + + def __ne__(self, other): + return not self == other + + def __hash__(self): + return hash(self._key()) + + def __repr__(self): + return 'DatasetReference{}'.format(self._key()) + class Dataset(object): """Datasets are containers for tables. diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/schema.py b/packages/google-cloud-bigquery/google/cloud/bigquery/schema.py index 4aea34ac22e0..535c445a3726 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/schema.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/schema.py @@ -126,7 +126,7 @@ def to_api_repr(self): return answer def _key(self): - """A tuple key that unique-ly describes this field. + """A tuple key that uniquely describes this field. Used to compute this instance's hashcode and evaluate equality. diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py index 37e5bddbd7aa..7173e8603a8e 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py @@ -105,6 +105,63 @@ def path(self): return '/projects/%s/datasets/%s/tables/%s' % ( self._project, self._dataset_id, self._table_id) + @classmethod + def from_api_repr(cls, resource): + """Factory: construct a table reference given its API representation + + :type resource: dict + :param resource: table reference representation returned from the API + + :rtype: :class:`google.cloud.bigquery.table.TableReference` + :returns: Table reference parsed from ``resource``. + """ + from google.cloud.bigquery.dataset import DatasetReference + + project = resource['projectId'] + dataset_id = resource['datasetId'] + table_id = resource['tableId'] + return cls(DatasetReference(project, dataset_id), table_id) + + def to_api_repr(self): + """Construct the API resource representation of this table reference. + + :rtype: dict + :returns: Table reference as represented as an API resource + """ + return { + 'projectId': self._project, + 'datasetId': self._dataset_id, + 'tableId': self._table_id, + } + + def _key(self): + """A tuple key that uniquely describes this field. + + Used to compute this instance's hashcode and evaluate equality. + + Returns: + tuple: The contents of this :class:`DatasetReference`. + """ + return ( + self._project, + self._dataset_id, + self._table_id, + ) + + def __eq__(self, other): + if not isinstance(other, TableReference): + return NotImplemented + return self._key() == other._key() + + def __ne__(self, other): + return not self == other + + def __hash__(self): + return hash(self._key()) + + def __repr__(self): + return 'TableReference{}'.format(self._key()) + class Table(object): """Tables represent a set of rows whose values correspond to a schema. diff --git a/packages/google-cloud-bigquery/tests/unit/test_dataset.py b/packages/google-cloud-bigquery/tests/unit/test_dataset.py index ced77990a65d..c04d154b52da 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_dataset.py +++ b/packages/google-cloud-bigquery/tests/unit/test_dataset.py @@ -114,6 +114,70 @@ def test_table(self): self.assertEqual(table_ref.project, 'some-project-1') self.assertEqual(table_ref.table_id, 'table_1') + def test_to_api_repr(self): + dataset = self._make_one('project_1', 'dataset_1') + + resource = dataset.to_api_repr() + + self.assertEqual( + resource, + { + 'projectId': 'project_1', + 'datasetId': 'dataset_1', + }) + + def test_from_api_repr(self): + from google.cloud.bigquery.dataset import DatasetReference + expected = self._make_one('project_1', 'dataset_1') + + got = DatasetReference.from_api_repr( + { + 'projectId': 'project_1', + 'datasetId': 'dataset_1', + }) + + self.assertEqual(expected, got) + + def test___eq___wrong_type(self): + dataset = self._make_one('project_1', 'dataset_1') + other = object() + self.assertNotEqual(dataset, other) + self.assertEqual(dataset, mock.ANY) + + def test___eq___project_mismatch(self): + dataset = self._make_one('project_1', 'dataset_1') + other = self._make_one('project_2', 'dataset_1') + self.assertNotEqual(dataset, other) + + def test___eq___dataset_mismatch(self): + dataset = self._make_one('project_1', 'dataset_1') + other = self._make_one('project_1', 'dataset_2') + self.assertNotEqual(dataset, other) + + def test___eq___equality(self): + dataset = self._make_one('project_1', 'dataset_1') + other = self._make_one('project_1', 'dataset_1') + self.assertEqual(dataset, other) + + def test___hash__set_equality(self): + dataset1 = self._make_one('project_1', 'dataset_1') + dataset2 = self._make_one('project_1', 'dataset_2') + set_one = {dataset1, dataset2} + set_two = {dataset1, dataset2} + self.assertEqual(set_one, set_two) + + def test___hash__not_equals(self): + dataset1 = self._make_one('project_1', 'dataset_1') + dataset2 = self._make_one('project_1', 'dataset_2') + set_one = {dataset1} + set_two = {dataset2} + self.assertNotEqual(set_one, set_two) + + def test___repr__(self): + dataset = self._make_one('project1', 'dataset1') + expected = "DatasetReference('project1', 'dataset1')" + self.assertEqual(repr(dataset), expected) + class TestDataset(unittest.TestCase): from google.cloud.bigquery.dataset import DatasetReference diff --git a/packages/google-cloud-bigquery/tests/unit/test_table.py b/packages/google-cloud-bigquery/tests/unit/test_table.py index 2327d11b1ed3..f2c2297d244b 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_table.py +++ b/packages/google-cloud-bigquery/tests/unit/test_table.py @@ -58,6 +58,98 @@ def test_ctor_defaults(self): self.assertEqual(table_ref.dataset_id, dataset_ref.dataset_id) self.assertEqual(table_ref.table_id, 'table_1') + def test_to_api_repr(self): + from google.cloud.bigquery.dataset import DatasetReference + dataset_ref = DatasetReference('project_1', 'dataset_1') + table_ref = self._make_one(dataset_ref, 'table_1') + + resource = table_ref.to_api_repr() + + self.assertEqual( + resource, + { + 'projectId': 'project_1', + 'datasetId': 'dataset_1', + 'tableId': 'table_1', + }) + + def test_from_api_repr(self): + from google.cloud.bigquery.dataset import DatasetReference + from google.cloud.bigquery.table import TableReference + dataset_ref = DatasetReference('project_1', 'dataset_1') + expected = self._make_one(dataset_ref, 'table_1') + + got = TableReference.from_api_repr( + { + 'projectId': 'project_1', + 'datasetId': 'dataset_1', + 'tableId': 'table_1', + }) + + self.assertEqual(expected, got) + + def test___eq___wrong_type(self): + from google.cloud.bigquery.dataset import DatasetReference + dataset_ref = DatasetReference('project_1', 'dataset_1') + table = self._make_one(dataset_ref, 'table_1') + other = object() + self.assertNotEqual(table, other) + self.assertEqual(table, mock.ANY) + + def test___eq___project_mismatch(self): + from google.cloud.bigquery.dataset import DatasetReference + dataset = DatasetReference('project_1', 'dataset_1') + other_dataset = DatasetReference('project_2', 'dataset_1') + table = self._make_one(dataset, 'table_1') + other = self._make_one(other_dataset, 'table_1') + self.assertNotEqual(table, other) + + def test___eq___dataset_mismatch(self): + from google.cloud.bigquery.dataset import DatasetReference + dataset = DatasetReference('project_1', 'dataset_1') + other_dataset = DatasetReference('project_1', 'dataset_2') + table = self._make_one(dataset, 'table_1') + other = self._make_one(other_dataset, 'table_1') + self.assertNotEqual(table, other) + + def test___eq___table_mismatch(self): + from google.cloud.bigquery.dataset import DatasetReference + dataset = DatasetReference('project_1', 'dataset_1') + table = self._make_one(dataset, 'table_1') + other = self._make_one(dataset, 'table_2') + self.assertNotEqual(table, other) + + def test___eq___equality(self): + from google.cloud.bigquery.dataset import DatasetReference + dataset = DatasetReference('project_1', 'dataset_1') + table = self._make_one(dataset, 'table_1') + other = self._make_one(dataset, 'table_1') + self.assertEqual(table, other) + + def test___hash__set_equality(self): + from google.cloud.bigquery.dataset import DatasetReference + dataset = DatasetReference('project_1', 'dataset_1') + table1 = self._make_one(dataset, 'table1') + table2 = self._make_one(dataset, 'table2') + set_one = {table1, table2} + set_two = {table1, table2} + self.assertEqual(set_one, set_two) + + def test___hash__not_equals(self): + from google.cloud.bigquery.dataset import DatasetReference + dataset = DatasetReference('project_1', 'dataset_1') + table1 = self._make_one(dataset, 'table1') + table2 = self._make_one(dataset, 'table2') + set_one = {table1} + set_two = {table2} + self.assertNotEqual(set_one, set_two) + + def test___repr__(self): + dataset = DatasetReference('project1', 'dataset1') + table1 = self._make_one(dataset, 'table1') + expected = "TableReference('project1', 'dataset1', 'table1')" + self.assertEqual(repr(table1), expected) + class TestTable(unittest.TestCase, _SchemaBase): From cc73e3a1a5c401696e2047f18eec248dc7f1c2ba Mon Sep 17 00:00:00 2001 From: Alix Hamilton Date: Fri, 22 Sep 2017 15:25:22 -0700 Subject: [PATCH 0288/2016] BigQuery: replaces table.create() with client.create_table() (#4038) * adds client.create_table() * removes table.create() * passes system tests * fixes rebase conflicts * fixes coverage --- .../google/cloud/bigquery/client.py | 18 ++ .../google/cloud/bigquery/table.py | 18 -- .../google-cloud-bigquery/tests/system.py | 83 +++---- .../tests/unit/test_client.py | 145 ++++++++++++ .../tests/unit/test_table.py | 214 ------------------ 5 files changed, 205 insertions(+), 273 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py index 1bd46e407968..bbeac294680d 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py @@ -190,6 +190,24 @@ def create_dataset(self, dataset): method='POST', path=path, data=dataset._build_resource()) return Dataset.from_api_repr(api_response) + def create_table(self, table): + """API call: create a table via a PUT request + + See + https://cloud.google.com/bigquery/docs/reference/rest/v2/tables/insert + + :type table: :class:`~google.cloud.bigquery.table.Table` + :param table: A ``Table`` populated with the desired initial state. + + :rtype: ":class:`~google.cloud.bigquery.table.Table`" + :returns: a new ``Table`` returned from the service. + """ + path = '/projects/%s/datasets/%s/tables' % ( + table.project, table.dataset_id) + api_response = self._connection.api_request( + method='POST', path=path, data=table._build_resource()) + return Table.from_api_repr(api_response, self) + def get_dataset(self, dataset_ref): """Fetch the dataset referenced by ``dataset_ref`` diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py index 7173e8603a8e..03214e52a7be 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py @@ -653,24 +653,6 @@ def _build_resource(self): return resource - def create(self, client=None): - """API call: create the table via a PUT request - - See - https://cloud.google.com/bigquery/docs/reference/rest/v2/tables/insert - - :type client: :class:`~google.cloud.bigquery.client.Client` or - ``NoneType`` - :param client: the client to use. If not passed, falls back to the - ``client`` stored on the current dataset. - """ - client = self._require_client(client) - path = '/projects/%s/datasets/%s/tables' % ( - self._project, self._dataset_id) - api_response = client._connection.api_request( - method='POST', path=path, data=self._build_resource()) - self._set_properties(api_response) - def exists(self, client=None): """API call: test for the existence of the table via a GET request diff --git a/packages/google-cloud-bigquery/tests/system.py b/packages/google-cloud-bigquery/tests/system.py index de19a7da01c5..81b3219275b2 100644 --- a/packages/google-cloud-bigquery/tests/system.py +++ b/packages/google-cloud-bigquery/tests/system.py @@ -180,18 +180,19 @@ def test_list_datasets(self): def test_create_table(self): dataset = self.temp_dataset(_make_dataset_id('create_table')) - - TABLE_NAME = 'test_table' + table_id = 'test_table' full_name = bigquery.SchemaField('full_name', 'STRING', mode='REQUIRED') age = bigquery.SchemaField('age', 'INTEGER', mode='REQUIRED') - table = Table(dataset.table(TABLE_NAME), schema=[full_name, age], - client=Config.CLIENT) - self.assertFalse(table.exists()) - table.create() + table_arg = Table(dataset.table(table_id), schema=[full_name, age], + client=Config.CLIENT) + self.assertFalse(table_arg.exists()) + + table = retry_403(Config.CLIENT.create_table)(table_arg) self.to_delete.insert(0, table) + self.assertTrue(table.exists()) - self.assertEqual(table.table_id, TABLE_NAME) + self.assertEqual(table.table_id, table_id) def test_get_table_w_public_dataset(self): PUBLIC = 'bigquery-public-data' @@ -227,10 +228,10 @@ def test_list_dataset_tables(self): mode='REQUIRED') age = bigquery.SchemaField('age', 'INTEGER', mode='REQUIRED') for table_name in tables_to_create: - created_table = Table(dataset.table(table_name), - schema=[full_name, age], - client=Config.CLIENT) - created_table.create() + table = Table(dataset.table(table_name), + schema=[full_name, age], + client=Config.CLIENT) + created_table = retry_403(Config.CLIENT.create_table)(table) self.to_delete.insert(0, created_table) # Retrieve the tables. @@ -249,10 +250,10 @@ def test_patch_table(self): full_name = bigquery.SchemaField('full_name', 'STRING', mode='REQUIRED') age = bigquery.SchemaField('age', 'INTEGER', mode='REQUIRED') - table = Table(dataset.table(TABLE_NAME), schema=[full_name, age], - client=Config.CLIENT) - self.assertFalse(table.exists()) - table.create() + table_arg = Table(dataset.table(TABLE_NAME), schema=[full_name, age], + client=Config.CLIENT) + self.assertFalse(table_arg.exists()) + table = retry_403(Config.CLIENT.create_table)(table_arg) self.to_delete.insert(0, table) self.assertTrue(table.exists()) self.assertIsNone(table.friendly_name) @@ -268,10 +269,10 @@ def test_update_table(self): full_name = bigquery.SchemaField('full_name', 'STRING', mode='REQUIRED') age = bigquery.SchemaField('age', 'INTEGER', mode='REQUIRED') - table = Table(dataset.table(TABLE_NAME), schema=[full_name, age], - client=Config.CLIENT) - self.assertFalse(table.exists()) - table.create() + table_arg = Table(dataset.table(TABLE_NAME), schema=[full_name, age], + client=Config.CLIENT) + self.assertFalse(table_arg.exists()) + table = retry_403(Config.CLIENT.create_table)(table_arg) self.to_delete.insert(0, table) self.assertTrue(table.exists()) voter = bigquery.SchemaField('voter', 'BOOLEAN', mode='NULLABLE') @@ -309,10 +310,10 @@ def test_insert_data_then_dump_table(self): mode='REQUIRED') age = bigquery.SchemaField('age', 'INTEGER', mode='REQUIRED') now = bigquery.SchemaField('now', 'TIMESTAMP') - table = Table(dataset.table(TABLE_NAME), schema=[full_name, age, now], - client=Config.CLIENT) - self.assertFalse(table.exists()) - table.create() + table_arg = Table(dataset.table(TABLE_NAME), + schema=[full_name, age, now], client=Config.CLIENT) + self.assertFalse(table_arg.exists()) + table = retry_403(Config.CLIENT.create_table)(table_arg) self.to_delete.insert(0, table) self.assertTrue(table.exists()) @@ -346,9 +347,9 @@ def test_load_table_from_local_file_then_dump_table(self): full_name = bigquery.SchemaField('full_name', 'STRING', mode='REQUIRED') age = bigquery.SchemaField('age', 'INTEGER', mode='REQUIRED') - table = Table(dataset.table(TABLE_NAME), schema=[full_name, age], - client=Config.CLIENT) - table.create() + table_arg = Table(dataset.table(TABLE_NAME), schema=[full_name, age], + client=Config.CLIENT) + table = retry_403(Config.CLIENT.create_table)(table_arg) self.to_delete.insert(0, table) with _NamedTemporaryFile() as temp: @@ -450,9 +451,9 @@ def test_load_table_from_storage_then_dump_table(self): full_name = bigquery.SchemaField('full_name', 'STRING', mode='REQUIRED') age = bigquery.SchemaField('age', 'INTEGER', mode='REQUIRED') - table = Table(dataset.table(TABLE_NAME), schema=[full_name, age], - client=Config.CLIENT) - table.create() + table_arg = Table(dataset.table(TABLE_NAME), schema=[full_name, age], + client=Config.CLIENT) + table = retry_403(Config.CLIENT.create_table)(table_arg) self.to_delete.insert(0, table) job = Config.CLIENT.load_table_from_storage( @@ -652,9 +653,9 @@ def test_job_cancel(self): full_name = bigquery.SchemaField('full_name', 'STRING', mode='REQUIRED') age = bigquery.SchemaField('age', 'INTEGER', mode='REQUIRED') - table = Table(dataset.table(TABLE_NAME), schema=[full_name, age], - client=Config.CLIENT) - table.create() + table_arg = Table(dataset.table(TABLE_NAME), schema=[full_name, age], + client=Config.CLIENT) + table = retry_403(Config.CLIENT.create_table)(table_arg) self.to_delete.insert(0, table) job = Config.CLIENT.run_async_query(JOB_NAME, QUERY) @@ -839,9 +840,9 @@ def _load_table_for_dml(self, rows, dataset_id, table_id): dataset = self.temp_dataset(dataset_id) greeting = bigquery.SchemaField( 'greeting', 'STRING', mode='NULLABLE') - table = Table(dataset.table(table_id), schema=[greeting], - client=Config.CLIENT) - table.create() + table_arg = Table(dataset.table(table_id), schema=[greeting], + client=Config.CLIENT) + table = retry_403(Config.CLIENT.create_table)(table_arg) self.to_delete.insert(0, table) with _NamedTemporaryFile() as temp: @@ -1228,9 +1229,9 @@ def test_insert_nested_nested(self): ] table_name = 'test_table' dataset = self.temp_dataset(_make_dataset_id('issue_2951')) - table = Table(dataset.table(table_name), schema=schema, - client=Config.CLIENT) - table.create() + table_arg = Table(dataset.table(table_name), schema=schema, + client=Config.CLIENT) + table = retry_403(Config.CLIENT.create_table)(table_arg) self.to_delete.insert(0, table) table.insert_data(to_insert) @@ -1245,9 +1246,9 @@ def test_create_table_insert_fetch_nested_schema(self): dataset = self.temp_dataset( _make_dataset_id('create_table_nested_schema')) schema = _load_json_schema() - table = Table(dataset.table(table_name), schema=schema, - client=Config.CLIENT) - table.create() + table_arg = Table(dataset.table(table_name), schema=schema, + client=Config.CLIENT) + table = retry_403(Config.CLIENT.create_table)(table_arg) self.to_delete.insert(0, table) self.assertTrue(table.exists()) self.assertEqual(table.table_id, table_name) diff --git a/packages/google-cloud-bigquery/tests/unit/test_client.py b/packages/google-cloud-bigquery/tests/unit/test_client.py index 50c324ebfc32..96a9dd240132 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_client.py +++ b/packages/google-cloud-bigquery/tests/unit/test_client.py @@ -399,6 +399,151 @@ def test_create_dataset_w_attrs(self): self.assertEqual(ds.default_table_expiration_ms, 3600) self.assertEqual(ds.labels, LABELS) + def test_create_table_w_day_partition(self): + from google.cloud.bigquery.table import Table + + project = 'PROJECT' + dataset_id = 'dataset_id' + table_id = 'table-id' + path = 'projects/%s/datasets/%s/tables' % ( + project, dataset_id) + creds = _make_credentials() + client = self._make_one(project=project, credentials=creds) + resource = { + 'id': '%s:%s:%s' % (project, dataset_id, table_id), + 'tableReference': { + 'projectId': project, + 'datasetId': dataset_id, + 'tableId': table_id + }, + } + conn = client._connection = _Connection(resource) + table_ref = client.dataset(dataset_id).table(table_id) + table = Table(table_ref, client=client) + table.partitioning_type = 'DAY' + + got = client.create_table(table) + + self.assertEqual(len(conn._requested), 1) + req = conn._requested[0] + self.assertEqual(req['method'], 'POST') + self.assertEqual(req['path'], '/%s' % path) + sent = { + 'tableReference': { + 'projectId': project, + 'datasetId': dataset_id, + 'tableId': table_id + }, + 'timePartitioning': {'type': 'DAY'}, + } + self.assertEqual(req['data'], sent) + self.assertEqual(table.partitioning_type, "DAY") + self.assertEqual(got.table_id, table_id) + + def test_create_table_w_day_partition_and_expire(self): + from google.cloud.bigquery.table import Table + + project = 'PROJECT' + dataset_id = 'dataset_id' + table_id = 'table-id' + path = 'projects/%s/datasets/%s/tables' % ( + project, dataset_id) + creds = _make_credentials() + client = self._make_one(project=project, credentials=creds) + resource = { + 'id': '%s:%s:%s' % (project, dataset_id, table_id), + 'tableReference': { + 'projectId': project, + 'datasetId': dataset_id, + 'tableId': table_id + }, + } + conn = client._connection = _Connection(resource) + table_ref = client.dataset(dataset_id).table(table_id) + table = Table(table_ref, client=client) + table.partitioning_type = 'DAY' + table.partition_expiration = 100 + + got = client.create_table(table) + + self.assertEqual(len(conn._requested), 1) + req = conn._requested[0] + self.assertEqual(req['method'], 'POST') + self.assertEqual(req['path'], '/%s' % path) + sent = { + 'tableReference': { + 'projectId': project, + 'datasetId': dataset_id, + 'tableId': table_id + }, + 'timePartitioning': {'type': 'DAY', 'expirationMs': 100}, + } + self.assertEqual(req['data'], sent) + self.assertEqual(table.partitioning_type, "DAY") + self.assertEqual(table.partition_expiration, 100) + self.assertEqual(got.table_id, table_id) + + def test_create_table_w_schema_and_query(self): + from google.cloud.bigquery.table import Table, SchemaField + + project = 'PROJECT' + dataset_id = 'dataset_id' + table_id = 'table-id' + path = 'projects/%s/datasets/%s/tables' % ( + project, dataset_id) + query = 'SELECT * from %s:%s' % (dataset_id, table_id) + creds = _make_credentials() + client = self._make_one(project=project, credentials=creds) + resource = { + 'id': '%s:%s:%s' % (project, dataset_id, table_id), + 'tableReference': { + 'projectId': project, + 'datasetId': dataset_id, + 'tableId': table_id + }, + 'schema': {'fields': [ + {'name': 'full_name', 'type': 'STRING', 'mode': 'REQUIRED'}, + {'name': 'age', 'type': 'INTEGER', 'mode': 'REQUIRED'}] + }, + 'view': { + 'query': query, + 'useLegacySql': True + }, + } + schema = [ + SchemaField('full_name', 'STRING', mode='REQUIRED'), + SchemaField('age', 'INTEGER', mode='REQUIRED') + ] + conn = client._connection = _Connection(resource) + table_ref = client.dataset(dataset_id).table(table_id) + table = Table(table_ref, schema=schema, client=client) + table.view_query = query + + got = client.create_table(table) + + self.assertEqual(len(conn._requested), 1) + req = conn._requested[0] + self.assertEqual(req['method'], 'POST') + self.assertEqual(req['path'], '/%s' % path) + sent = { + 'tableReference': { + 'projectId': project, + 'datasetId': dataset_id, + 'tableId': table_id + }, + 'schema': {'fields': [ + {'name': 'full_name', 'type': 'STRING', 'mode': 'REQUIRED'}, + {'name': 'age', 'type': 'INTEGER', 'mode': 'REQUIRED'}] + }, + 'view': {'query': query}, + } + self.assertEqual(req['data'], sent) + self.assertEqual(got.table_id, table_id) + self.assertEqual(got.project, project) + self.assertEqual(got.dataset_id, dataset_id) + self.assertEqual(got.schema, schema) + self.assertEqual(got.view_query, query) + def test_get_table(self): project = 'PROJECT' dataset_id = 'dataset_id' diff --git a/packages/google-cloud-bigquery/tests/unit/test_table.py b/packages/google-cloud-bigquery/tests/unit/test_table.py index f2c2297d244b..22dce9012188 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_table.py +++ b/packages/google-cloud-bigquery/tests/unit/test_table.py @@ -557,135 +557,6 @@ def test_from_api_repr_w_properties(self): self.assertIs(table._client, client) self._verifyResourceProperties(table, RESOURCE) - def test_create_new_day_partitioned_table(self): - PATH = 'projects/%s/datasets/%s/tables' % (self.PROJECT, self.DS_ID) - RESOURCE = self._makeResource() - conn = _Connection(RESOURCE) - client = _Client(project=self.PROJECT, connection=conn) - dataset = DatasetReference(self.PROJECT, self.DS_ID) - table_ref = dataset.table(self.TABLE_NAME) - table = self._make_one(table_ref, client=client) - table.partitioning_type = 'DAY' - table.create() - - self.assertEqual(len(conn._requested), 1) - req = conn._requested[0] - self.assertEqual(req['method'], 'POST') - self.assertEqual(req['path'], '/%s' % PATH) - SENT = { - 'tableReference': { - 'projectId': self.PROJECT, - 'datasetId': self.DS_ID, - 'tableId': self.TABLE_NAME}, - 'timePartitioning': {'type': 'DAY'}, - } - self.assertEqual(req['data'], SENT) - self._verifyResourceProperties(table, RESOURCE) - - def test_create_w_bound_client(self): - from google.cloud.bigquery.table import SchemaField - - PATH = 'projects/%s/datasets/%s/tables' % (self.PROJECT, self.DS_ID) - RESOURCE = self._makeResource() - conn = _Connection(RESOURCE) - client = _Client(project=self.PROJECT, connection=conn) - dataset = DatasetReference(self.PROJECT, self.DS_ID) - table_ref = dataset.table(self.TABLE_NAME) - full_name = SchemaField('full_name', 'STRING', mode='REQUIRED') - age = SchemaField('age', 'INTEGER', mode='REQUIRED') - table = self._make_one(table_ref, schema=[full_name, age], - client=client) - - table.create() - - self.assertEqual(len(conn._requested), 1) - req = conn._requested[0] - self.assertEqual(req['method'], 'POST') - self.assertEqual(req['path'], '/%s' % PATH) - SENT = { - 'tableReference': { - 'projectId': self.PROJECT, - 'datasetId': self.DS_ID, - 'tableId': self.TABLE_NAME}, - 'schema': {'fields': [ - {'name': 'full_name', 'type': 'STRING', 'mode': 'REQUIRED'}, - {'name': 'age', 'type': 'INTEGER', 'mode': 'REQUIRED'}]}, - } - self.assertEqual(req['data'], SENT) - self._verifyResourceProperties(table, RESOURCE) - - def test_create_w_partition_no_expire(self): - from google.cloud.bigquery.table import SchemaField - - PATH = 'projects/%s/datasets/%s/tables' % (self.PROJECT, self.DS_ID) - RESOURCE = self._makeResource() - conn = _Connection(RESOURCE) - client = _Client(project=self.PROJECT, connection=conn) - dataset = DatasetReference(self.PROJECT, self.DS_ID) - table_ref = dataset.table(self.TABLE_NAME) - full_name = SchemaField('full_name', 'STRING', mode='REQUIRED') - age = SchemaField('age', 'INTEGER', mode='REQUIRED') - table = self._make_one(table_ref, schema=[full_name, age], - client=client) - - self.assertIsNone(table.partitioning_type) - table.partitioning_type = "DAY" - self.assertEqual(table.partitioning_type, "DAY") - table.create() - - self.assertEqual(len(conn._requested), 1) - req = conn._requested[0] - self.assertEqual(req['method'], 'POST') - self.assertEqual(req['path'], '/%s' % PATH) - SENT = { - 'tableReference': { - 'projectId': self.PROJECT, - 'datasetId': self.DS_ID, - 'tableId': self.TABLE_NAME}, - 'timePartitioning': {'type': 'DAY'}, - 'schema': {'fields': [ - {'name': 'full_name', 'type': 'STRING', 'mode': 'REQUIRED'}, - {'name': 'age', 'type': 'INTEGER', 'mode': 'REQUIRED'}]}, - } - self.assertEqual(req['data'], SENT) - self._verifyResourceProperties(table, RESOURCE) - - def test_create_w_partition_and_expire(self): - from google.cloud.bigquery.table import SchemaField - - PATH = 'projects/%s/datasets/%s/tables' % (self.PROJECT, self.DS_ID) - RESOURCE = self._makeResource() - conn = _Connection(RESOURCE) - client = _Client(project=self.PROJECT, connection=conn) - dataset = DatasetReference(self.PROJECT, self.DS_ID) - table_ref = dataset.table(self.TABLE_NAME) - full_name = SchemaField('full_name', 'STRING', mode='REQUIRED') - age = SchemaField('age', 'INTEGER', mode='REQUIRED') - table = self._make_one(table_ref, schema=[full_name, age], - client=client) - self.assertIsNone(table.partition_expiration) - table.partition_expiration = 100 - self.assertEqual(table.partitioning_type, "DAY") - self.assertEqual(table.partition_expiration, 100) - table.create() - - self.assertEqual(len(conn._requested), 1) - req = conn._requested[0] - self.assertEqual(req['method'], 'POST') - self.assertEqual(req['path'], '/%s' % PATH) - SENT = { - 'tableReference': { - 'projectId': self.PROJECT, - 'datasetId': self.DS_ID, - 'tableId': self.TABLE_NAME}, - 'timePartitioning': {'type': 'DAY', 'expirationMs': 100}, - 'schema': {'fields': [ - {'name': 'full_name', 'type': 'STRING', 'mode': 'REQUIRED'}, - {'name': 'age', 'type': 'INTEGER', 'mode': 'REQUIRED'}]}, - } - self.assertEqual(req['data'], SENT) - self._verifyResourceProperties(table, RESOURCE) - def test_partition_type_setter_bad_type(self): from google.cloud.bigquery.table import SchemaField @@ -833,91 +704,6 @@ def test_list_partitions(self): client=client) self.assertEqual(table.list_partitions(), [20160804, 20160805]) - def test_create_w_alternate_client(self): - import datetime - from google.cloud._helpers import UTC - from google.cloud._helpers import _millis - - PATH = 'projects/%s/datasets/%s/tables' % (self.PROJECT, self.DS_ID) - DESCRIPTION = 'DESCRIPTION' - TITLE = 'TITLE' - QUERY = 'select fullname, age from person_ages' - RESOURCE = self._makeResource() - RESOURCE['description'] = DESCRIPTION - RESOURCE['friendlyName'] = TITLE - self.EXP_TIME = datetime.datetime(2015, 8, 1, 23, 59, 59, - tzinfo=UTC) - RESOURCE['expirationTime'] = _millis(self.EXP_TIME) - RESOURCE['view'] = {} - RESOURCE['view']['query'] = QUERY - RESOURCE['type'] = 'VIEW' - conn1 = _Connection() - client1 = _Client(project=self.PROJECT, connection=conn1) - conn2 = _Connection(RESOURCE) - client2 = _Client(project=self.PROJECT, connection=conn2) - dataset = DatasetReference(self.PROJECT, self.DS_ID) - table_ref = dataset.table(self.TABLE_NAME) - table = self._make_one(table_ref, client=client1) - table.friendly_name = TITLE - table.description = DESCRIPTION - table.view_query = QUERY - - table.create(client=client2) - - self.assertEqual(len(conn1._requested), 0) - self.assertEqual(len(conn2._requested), 1) - req = conn2._requested[0] - self.assertEqual(req['method'], 'POST') - self.assertEqual(req['path'], '/%s' % PATH) - SENT = { - 'tableReference': { - 'projectId': self.PROJECT, - 'datasetId': self.DS_ID, - 'tableId': self.TABLE_NAME}, - 'description': DESCRIPTION, - 'friendlyName': TITLE, - 'view': {'query': QUERY}, - } - self.assertEqual(req['data'], SENT) - self._verifyResourceProperties(table, RESOURCE) - - def test_create_w_missing_output_properties(self): - # In the wild, the resource returned from 'dataset.create' sometimes - # lacks 'creationTime' / 'lastModifiedTime' - from google.cloud.bigquery.table import SchemaField - - PATH = 'projects/%s/datasets/%s/tables' % (self.PROJECT, self.DS_ID) - RESOURCE = self._makeResource() - del RESOURCE['creationTime'] - del RESOURCE['lastModifiedTime'] - self.WHEN = None - conn = _Connection(RESOURCE) - client = _Client(project=self.PROJECT, connection=conn) - dataset = DatasetReference(self.PROJECT, self.DS_ID) - table_ref = dataset.table(self.TABLE_NAME) - full_name = SchemaField('full_name', 'STRING', mode='REQUIRED') - age = SchemaField('age', 'INTEGER', mode='REQUIRED') - table = self._make_one(table_ref, schema=[full_name, age], - client=client) - - table.create() - - self.assertEqual(len(conn._requested), 1) - req = conn._requested[0] - self.assertEqual(req['method'], 'POST') - self.assertEqual(req['path'], '/%s' % PATH) - SENT = { - 'tableReference': { - 'projectId': self.PROJECT, - 'datasetId': self.DS_ID, - 'tableId': self.TABLE_NAME}, - 'schema': {'fields': [ - {'name': 'full_name', 'type': 'STRING', 'mode': 'REQUIRED'}, - {'name': 'age', 'type': 'INTEGER', 'mode': 'REQUIRED'}]}, - } - self.assertEqual(req['data'], SENT) - self._verifyResourceProperties(table, RESOURCE) - def test_exists_miss_w_bound_client(self): PATH = 'projects/%s/datasets/%s/tables/%s' % ( self.PROJECT, self.DS_ID, self.TABLE_NAME) From 9b81419d227739dcda1eee1d353f2c74a659e322 Mon Sep 17 00:00:00 2001 From: Alix Hamilton Date: Fri, 22 Sep 2017 17:18:43 -0700 Subject: [PATCH 0289/2016] BigQuery: Remove unnecessary line from client.create_table() test (#4043) --- packages/google-cloud-bigquery/tests/unit/test_client.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/packages/google-cloud-bigquery/tests/unit/test_client.py b/packages/google-cloud-bigquery/tests/unit/test_client.py index 96a9dd240132..9cfa61234fcc 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_client.py +++ b/packages/google-cloud-bigquery/tests/unit/test_client.py @@ -505,10 +505,7 @@ def test_create_table_w_schema_and_query(self): {'name': 'full_name', 'type': 'STRING', 'mode': 'REQUIRED'}, {'name': 'age', 'type': 'INTEGER', 'mode': 'REQUIRED'}] }, - 'view': { - 'query': query, - 'useLegacySql': True - }, + 'view': {'query': query}, } schema = [ SchemaField('full_name', 'STRING', mode='REQUIRED'), From 1ffaaf652b37dea55180aeb47513aa5fe20c6b72 Mon Sep 17 00:00:00 2001 From: Jonathan Amsterdam Date: Mon, 25 Sep 2017 12:35:20 -0400 Subject: [PATCH 0290/2016] bigquery: add system test for dataset update with etag (#4052) --- packages/google-cloud-bigquery/tests/system.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/tests/system.py b/packages/google-cloud-bigquery/tests/system.py index 81b3219275b2..a50ae06ee5d8 100644 --- a/packages/google-cloud-bigquery/tests/system.py +++ b/packages/google-cloud-bigquery/tests/system.py @@ -24,6 +24,7 @@ import six +from google.api.core.exceptions import PreconditionFailed from google.cloud import bigquery from google.cloud.bigquery.dataset import Dataset, DatasetReference from google.cloud.bigquery.table import Table @@ -158,7 +159,11 @@ def test_update_dataset(self): ds3 = Config.CLIENT.update_dataset(ds2, ['labels']) self.assertEqual(ds3.labels, {'color': 'green', 'shape': 'circle'}) - # TODO(jba): test that read-modify-write with ETag works. + # If we try to update using d2 again, it will fail because the + # previous update changed the ETag. + ds2.description = 'no good' + with self.assertRaises(PreconditionFailed): + Config.CLIENT.update_dataset(ds2, ['description']) def test_list_datasets(self): datasets_to_create = [ From aa5417bd8fe6177601253b0a997edb856fd71bfe Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Mon, 25 Sep 2017 12:21:17 -0700 Subject: [PATCH 0291/2016] BQ: remove redundant __eq__ from Query Parameters. (#4055) Got duplicate definitions after the rebase. --- .../google/cloud/bigquery/_helpers.py | 24 ------------------- 1 file changed, 24 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py b/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py index abe7a8934c96..51000148fb0b 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py @@ -527,14 +527,6 @@ def __init__(self, name, type_, value): self.type_ = type_ self.value = value - def __eq__(self, other): - if not isinstance(other, ScalarQueryParameter): - return NotImplemented - return( - self.name == other.name and - self.type_ == other.type_ and - self.value == other.value) - @classmethod def positional(cls, type_, value): """Factory for positional paramater. @@ -637,14 +629,6 @@ def __init__(self, name, array_type, values): self.array_type = array_type self.values = values - def __eq__(self, other): - if not isinstance(other, ArrayQueryParameter): - return NotImplemented - return( - self.name == other.name and - self.array_type == other.array_type and - self.values == other.values) - @classmethod def positional(cls, array_type, values): """Factory for positional parameters. @@ -789,14 +773,6 @@ def __init__(self, name, *sub_params): types[sub.name] = sub.type_ values[sub.name] = sub.value - def __eq__(self, other): - if not isinstance(other, StructQueryParameter): - return NotImplemented - return( - self.name == other.name and - self.struct_types == other.struct_types and - self.struct_values == other.struct_values) - @classmethod def positional(cls, *sub_params): """Factory for positional parameters. From 73796cb4fcba4ddf3872312ce6db2a5e9f8b4c87 Mon Sep 17 00:00:00 2001 From: Jonathan Amsterdam Date: Mon, 25 Sep 2017 15:55:00 -0400 Subject: [PATCH 0292/2016] bigquery: modify CopyJob (#4051) Update CopyJob and CopyJobConfig to conform to the new design for jobs. --- .../google/cloud/bigquery/__init__.py | 2 + .../google/cloud/bigquery/client.py | 49 ++++-- .../google/cloud/bigquery/job.py | 164 +++++++++++------- .../google-cloud-bigquery/tests/system.py | 32 ++++ .../tests/unit/test_client.py | 40 ++++- .../tests/unit/test_job.py | 63 ++++--- 6 files changed, 247 insertions(+), 103 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/__init__.py b/packages/google-cloud-bigquery/google/cloud/bigquery/__init__.py index 333854035376..ec92e7c40128 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/__init__.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/__init__.py @@ -32,6 +32,7 @@ from google.cloud.bigquery.client import Client from google.cloud.bigquery.dataset import AccessEntry from google.cloud.bigquery.dataset import Dataset +from google.cloud.bigquery.job import CopyJobConfig from google.cloud.bigquery.job import ExtractJobConfig from google.cloud.bigquery.schema import SchemaField from google.cloud.bigquery.table import Table @@ -42,6 +43,7 @@ 'ArrayQueryParameter', 'Client', 'Dataset', + 'CopyJobConfig', 'ExtractJobConfig', 'ScalarQueryParameter', 'SchemaField', diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py index bbeac294680d..972f00b317f8 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py @@ -16,6 +16,7 @@ from __future__ import absolute_import +import collections import uuid from google.api.core import page_iterator @@ -492,25 +493,39 @@ def load_table_from_storage(self, job_id, destination, *source_uris): """ return LoadJob(job_id, destination, source_uris, client=self) - def copy_table(self, job_id, destination, *sources): - """Construct a job for copying one or more tables into another table. + def copy_table(self, sources, destination, job_id=None, job_config=None): + """Start a job for copying one or more tables into another table. See https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.copy - :type job_id: str - :param job_id: Name of the job. + :type sources: One of: + :class:`~google.cloud.bigquery.table.TableReference` + sequence of + :class:`~google.cloud.bigquery.table.TableReference` + :param sources: Table or tables to be copied. - :type destination: :class:`google.cloud.bigquery.table.Table` + + :type destination: :class:`google.cloud.bigquery.table.TableReference` :param destination: Table into which data is to be copied. - :type sources: sequence of :class:`google.cloud.bigquery.table.Table` - :param sources: tables to be copied. + :type job_id: str + :param job_id: (Optional) The ID of the job. + + :type job_config: :class:`google.cloud.bigquery.job.CopyJobConfig` + :param job_config: (Optional) Extra configuration options for the job. :rtype: :class:`google.cloud.bigquery.job.CopyJob` :returns: a new ``CopyJob`` instance """ - return CopyJob(job_id, destination, sources, client=self) + job_id = _make_job_id(job_id) + + if not isinstance(sources, collections.Sequence): + sources = [sources] + job = CopyJob(job_id, sources, destination, client=self, + job_config=job_config) + job.begin() + return job def extract_table(self, source, *destination_uris, **kwargs): """Start a job to extract a table into Cloud Storage files. @@ -541,9 +556,7 @@ def extract_table(self, source, *destination_uris, **kwargs): :returns: a new ``ExtractJob`` instance """ job_config = kwargs.get('job_config') - job_id = kwargs.get('job_id') - if job_id is None: - job_id = str(uuid.uuid4()) + job_id = _make_job_id(kwargs.get('job_id')) job = ExtractJob( job_id, source, list(destination_uris), client=self, @@ -667,3 +680,17 @@ def _item_to_table(iterator, resource): :returns: The next table in the page. """ return Table.from_api_repr(resource, iterator.client) + + +def _make_job_id(job_id): + """Construct an ID for a new job. + + :type job_id: str or ``NoneType`` + :param job_id: the user-provided job ID + + :rtype: str + :returns: A job ID + """ + if job_id is None: + return str(uuid.uuid4()) + return job_id diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/job.py b/packages/google-cloud-bigquery/google/cloud/bigquery/job.py index a79fc8e53d20..11b8beee2b7b 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/job.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/job.py @@ -126,7 +126,7 @@ class Compression(_EnumApiResourceProperty): NONE = 'NONE' -class CreateDisposition(_EnumProperty): +class CreateDisposition(_EnumApiResourceProperty): """Pseudo-enum for ``create_disposition`` properties.""" CREATE_IF_NEEDED = 'CREATE_IF_NEEDED' CREATE_NEVER = 'CREATE_NEVER' @@ -159,7 +159,7 @@ class SourceFormat(_EnumProperty): AVRO = 'AVRO' -class WriteDisposition(_EnumProperty): +class WriteDisposition(_EnumApiResourceProperty): """Pseudo-enum for ``write_disposition`` properties.""" WRITE_APPEND = 'WRITE_APPEND' WRITE_TRUNCATE = 'WRITE_TRUNCATE' @@ -688,7 +688,8 @@ def output_rows(self): https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.load.autodetect """ - create_disposition = CreateDisposition('create_disposition') + create_disposition = CreateDisposition('create_disposition', + 'createDisposition') """See https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.load.createDisposition """ @@ -733,7 +734,8 @@ def output_rows(self): https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.load.sourceFormat """ - write_disposition = WriteDisposition('write_disposition') + write_disposition = WriteDisposition('write_disposition', + 'writeDisposition') """See https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.load.writeDisposition """ @@ -853,13 +855,51 @@ def from_api_repr(cls, resource, client): return job -class _CopyConfiguration(object): - """User-settable configuration options for copy jobs. +class CopyJobConfig(object): + """Configuration options for copy jobs. - Values which are ``None`` -> server defaults. + All properties in this class are optional. Values which are ``None`` -> + server defaults. """ - _create_disposition = None - _write_disposition = None + + def __init__(self): + self._properties = {} + + create_disposition = CreateDisposition('create_disposition', + 'createDisposition') + """See + https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.copy.createDisposition + """ + + write_disposition = WriteDisposition('write_disposition', + 'writeDisposition') + """See + https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.copy.writeDisposition + """ + + def to_api_repr(self): + """Build an API representation of the copy job config. + + :rtype: dict + :returns: A dictionary in the format used by the BigQuery API. + """ + return copy.deepcopy(self._properties) + + @classmethod + def from_api_repr(cls, resource): + """Factory: construct a job configuration given its API representation + + :type resource: dict + :param resource: + An extract job configuration in the same representation as is + returned from the API. + + :rtype: :class:`google.cloud.bigquery.job.ExtractJobConfig` + :returns: Configuration parsed from ``resource``. + """ + config = cls() + config._properties = copy.deepcopy(resource) + return config class CopyJob(_AsyncJob): @@ -868,41 +908,45 @@ class CopyJob(_AsyncJob): :type job_id: str :param job_id: the job's ID, within the project belonging to ``client``. - :type destination: :class:`google.cloud.bigquery.table.Table` - :param destination: Table into which data is to be loaded. - - :type sources: list of :class:`google.cloud.bigquery.table.Table` + :type sources: list of :class:`google.cloud.bigquery.table.TableReference` :param sources: Table into which data is to be loaded. + :type destination: :class:`google.cloud.bigquery.table.TableReference` + :param destination: Table into which data is to be loaded. + :type client: :class:`google.cloud.bigquery.client.Client` :param client: A client which holds credentials and project configuration for the dataset (which requires a project). - """ + :type job_config: :class:`~google.cloud.bigquery.job.CopyJobConfig` + :param job_config: + (Optional) Extra configuration options for the copy job. + """ _JOB_TYPE = 'copy' - def __init__(self, job_id, destination, sources, client): + def __init__(self, job_id, sources, destination, client, job_config=None): super(CopyJob, self).__init__(job_id, client) + + if job_config is None: + job_config = CopyJobConfig() + self.destination = destination self.sources = sources - self._configuration = _CopyConfiguration() - - create_disposition = CreateDisposition('create_disposition') - """See - https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.copy.createDisposition - """ + self._configuration = job_config - write_disposition = WriteDisposition('write_disposition') - """See - https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.copy.writeDisposition - """ + @property + def create_disposition(self): + """See + :class:`~google.cloud.bigquery.job.CopyJobConfig.create_disposition`. + """ + return self._configuration.create_disposition - def _populate_config_resource(self, configuration): - """Helper for _build_resource: copy config properties to resource""" - if self.create_disposition is not None: - configuration['createDisposition'] = self.create_disposition - if self.write_disposition is not None: - configuration['writeDisposition'] = self.write_disposition + @property + def write_disposition(self): + """See + :class:`~google.cloud.bigquery.job.CopyJobConfig.write_disposition`. + """ + return self._configuration.write_disposition def _build_resource(self): """Generate a resource for :meth:`begin`.""" @@ -913,31 +957,27 @@ def _build_resource(self): 'tableId': table.table_id, } for table in self.sources] - resource = { + configuration = self._configuration.to_api_repr() + configuration['sourceTables'] = source_refs + configuration['destinationTable'] = { + 'projectId': self.destination.project, + 'datasetId': self.destination.dataset_id, + 'tableId': self.destination.table_id, + } + + return { 'jobReference': { 'projectId': self.project, 'jobId': self.job_id, }, 'configuration': { - self._JOB_TYPE: { - 'sourceTables': source_refs, - 'destinationTable': { - 'projectId': self.destination.project, - 'datasetId': self.destination.dataset_id, - 'tableId': self.destination.table_id, - }, - }, + self._JOB_TYPE: configuration, }, } - configuration = resource['configuration'][self._JOB_TYPE] - self._populate_config_resource(configuration) - - return resource def _copy_configuration_properties(self, configuration): """Helper: assign subclass configuration properties in cleaned.""" - self.create_disposition = configuration.get('createDisposition') - self.write_disposition = configuration.get('writeDisposition') + self._configuration._properties = copy.deepcopy(configuration) @classmethod def from_api_repr(cls, resource, client): @@ -958,27 +998,23 @@ def from_api_repr(cls, resource, client): :rtype: :class:`google.cloud.bigquery.job.CopyJob` :returns: Job parsed from ``resource``. """ - job_id, config = cls._get_resource_config(resource) - dest_config = config['destinationTable'] - ds_ref = DatasetReference(dest_config['projectId'], - dest_config['datasetId'],) - dataset = Dataset(ds_ref) - table_ref = TableReference(dataset, dest_config['tableId']) - destination = Table(table_ref, client=client) + job_id, config_resource = cls._get_resource_config(resource) + config = CopyJobConfig.from_api_repr(config_resource) + destination = TableReference.from_api_repr( + config_resource['destinationTable']) sources = [] - source_configs = config.get('sourceTables') + source_configs = config_resource.get('sourceTables') if source_configs is None: - single = config.get('sourceTable') + single = config_resource.get('sourceTable') if single is None: raise KeyError( "Resource missing 'sourceTables' / 'sourceTable'") source_configs = [single] for source_config in source_configs: - ds_ref = DatasetReference(source_config['projectId'], - source_config['datasetId']) - table_ref = ds_ref.table(source_config['tableId']) - sources.append(Table(table_ref, client=client)) - job = cls(job_id, destination, sources, client=client) + table_ref = TableReference.from_api_repr(source_config) + sources.append(table_ref) + job = cls( + job_id, sources, destination, client=client, job_config=config) job._set_properties(resource) return job @@ -1017,7 +1053,7 @@ def __init__(self): """ def to_api_repr(self): - """Build an API representation of the extact job config. + """Build an API representation of the extract job config. :rtype: dict :returns: A dictionary in the format used by the BigQuery API. @@ -1243,7 +1279,8 @@ def __init__(self, job_id, query, client, https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query.allowLargeResults """ - create_disposition = CreateDisposition('create_disposition') + create_disposition = CreateDisposition('create_disposition', + 'createDisposition') """See https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query.createDisposition """ @@ -1289,7 +1326,8 @@ def __init__(self, job_id, query, client, reference/rest/v2/jobs#configuration.dryRun """ - write_disposition = WriteDisposition('write_disposition') + write_disposition = WriteDisposition('write_disposition', + 'writeDisposition') """See https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query.writeDisposition """ diff --git a/packages/google-cloud-bigquery/tests/system.py b/packages/google-cloud-bigquery/tests/system.py index a50ae06ee5d8..1f0e917d34fd 100644 --- a/packages/google-cloud-bigquery/tests/system.py +++ b/packages/google-cloud-bigquery/tests/system.py @@ -647,6 +647,38 @@ def test_extract_table_w_job_config(self): got = destination.download_as_string().decode('utf-8') self.assertIn('"Bharney Rhubble"', got) + def test_copy_table(self): + dataset = self.temp_dataset(_make_dataset_id('copy_table')) + schema = ( + bigquery.SchemaField('full_name', 'STRING', mode='REQUIRED'), + bigquery.SchemaField('age', 'INTEGER', mode='REQUIRED'), + ) + source_ref = dataset.table('source_table') + source_arg = Table(source_ref, schema=schema, client=Config.CLIENT) + source_table = retry_403(Config.CLIENT.create_table)(source_arg) + self.to_delete.insert(0, source_table) + rows = [ + ('Phred Phlyntstone', 32), + ('Bharney Rhubble', 33), + ('Wylma Phlyntstone', 29), + ('Bhettye Rhubble', 27), + ] + errors = source_table.insert_data(rows) + self.assertEqual(len(errors), 0) + + destination_ref = dataset.table('destination_table') + job_config = bigquery.CopyJobConfig() + job = Config.CLIENT.copy_table( + source_ref, destination_ref, job_config=job_config) + job.result() + + destination_table = Config.CLIENT.get_table(destination_ref) + self.to_delete.insert(0, destination_table) + got_rows = self._fetch_single_page(destination_table) + by_age = operator.itemgetter(1) + self.assertEqual(sorted(got_rows, key=by_age), + sorted(rows, key=by_age)) + def test_job_cancel(self): DATASET_ID = _make_dataset_id('job_cancel') JOB_NAME = 'fetch_' + DATASET_ID diff --git a/packages/google-cloud-bigquery/tests/unit/test_client.py b/packages/google-cloud-bigquery/tests/unit/test_client.py index 9cfa61234fcc..9f9354cdd2cb 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_client.py +++ b/packages/google-cloud-bigquery/tests/unit/test_client.py @@ -1113,19 +1113,57 @@ def test_copy_table(self): DATASET = 'dataset_name' SOURCE = 'source_table' DESTINATION = 'destination_table' + RESOURCE = { + 'jobReference': { + 'projectId': PROJECT, + 'jobId': JOB, + }, + 'configuration': { + 'copy': { + 'sourceTable': { + 'projectId': PROJECT, + 'datasetId': DATASET, + 'tableId': SOURCE, + }, + 'destinationTable': { + 'projectId': PROJECT, + 'datasetId': DATASET, + 'tableId': DESTINATION, + }, + }, + }, + } creds = _make_credentials() http = object() client = self._make_one(project=PROJECT, credentials=creds, _http=http) + conn = client._connection = _Connection(RESOURCE) dataset = client.dataset(DATASET) source = dataset.table(SOURCE) destination = dataset.table(DESTINATION) - job = client.copy_table(JOB, destination, source) + + job = client.copy_table(source, destination, job_id=JOB) + + # Check that copy_table actually starts the job. + self.assertEqual(len(conn._requested), 1) + req = conn._requested[0] + self.assertEqual(req['method'], 'POST') + self.assertEqual(req['path'], '/projects/%s/jobs' % PROJECT) + self.assertIsInstance(job, CopyJob) self.assertIs(job._client, client) self.assertEqual(job.job_id, JOB) self.assertEqual(list(job.sources), [source]) self.assertIs(job.destination, destination) + conn = client._connection = _Connection(RESOURCE) + source2 = dataset.table(SOURCE + '2') + job = client.copy_table([source, source2], destination, job_id=JOB) + self.assertIsInstance(job, CopyJob) + self.assertIs(job._client, client) + self.assertEqual(job.job_id, JOB) + self.assertEqual(list(job.sources), [source, source2]) + self.assertIs(job.destination, destination) + def test_extract_table(self): from google.cloud.bigquery.job import ExtractJob diff --git a/packages/google-cloud-bigquery/tests/unit/test_job.py b/packages/google-cloud-bigquery/tests/unit/test_job.py index ca348704127c..d0a654c0c15d 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_job.py +++ b/packages/google-cloud-bigquery/tests/unit/test_job.py @@ -17,7 +17,7 @@ from six.moves import http_client import unittest -from google.cloud.bigquery.job import ExtractJobConfig +from google.cloud.bigquery.job import ExtractJobConfig, CopyJobConfig from google.cloud.bigquery.dataset import DatasetReference @@ -83,9 +83,12 @@ def test_missing_reason(self): class _Base(object): + from google.cloud.bigquery.dataset import DatasetReference + PROJECT = 'project' SOURCE1 = 'http://example.com/source1.csv' DS_ID = 'datset_id' + DS_REF = DatasetReference(PROJECT, DS_ID) TABLE_ID = 'table_id' JOB_NAME = 'job_name' @@ -104,6 +107,11 @@ def _setUpConstants(self): self.RESOURCE_URL = 'http://example.com/path/to/resource' self.USER_EMAIL = 'phred@example.com' + def _table_ref(self, table_id): + from google.cloud.bigquery.table import TableReference + + return TableReference(self.DS_REF, table_id) + def _makeResource(self, started=False, ended=False): self._setUpConstants() resource = { @@ -895,9 +903,9 @@ def _verifyResourceProperties(self, job, resource): def test_ctor(self): client = _Client(self.PROJECT) - source = _Table(self.SOURCE_TABLE) - destination = _Table(self.DESTINATION_TABLE) - job = self._make_one(self.JOB_NAME, destination, [source], client) + source = self._table_ref(self.SOURCE_TABLE) + destination = self._table_ref(self.DESTINATION_TABLE) + job = self._make_one(self.JOB_NAME, [source], destination, client) self.assertIs(job.destination, destination) self.assertEqual(job.sources, [source]) self.assertIs(job._client, client) @@ -1035,9 +1043,9 @@ def test_begin_w_bound_client(self): del RESOURCE['user_email'] conn = _Connection(RESOURCE) client = _Client(project=self.PROJECT, connection=conn) - source = _Table(self.SOURCE_TABLE) - destination = _Table(self.DESTINATION_TABLE) - job = self._make_one(self.JOB_NAME, destination, [source], client) + source = self._table_ref(self.SOURCE_TABLE) + destination = self._table_ref(self.DESTINATION_TABLE) + job = self._make_one(self.JOB_NAME, [source], destination, client) job.begin() @@ -1090,13 +1098,13 @@ def test_begin_w_alternate_client(self): client1 = _Client(project=self.PROJECT, connection=conn1) conn2 = _Connection(RESOURCE) client2 = _Client(project=self.PROJECT, connection=conn2) - source = _Table(self.SOURCE_TABLE) - destination = _Table(self.DESTINATION_TABLE) - job = self._make_one(self.JOB_NAME, destination, [source], client1) - - job.create_disposition = 'CREATE_NEVER' - job.write_disposition = 'WRITE_TRUNCATE' - + source = self._table_ref(self.SOURCE_TABLE) + destination = self._table_ref(self.DESTINATION_TABLE) + config = CopyJobConfig() + config.create_disposition = 'CREATE_NEVER' + config.write_disposition = 'WRITE_TRUNCATE' + job = self._make_one(self.JOB_NAME, [source], destination, client1, + config) job.begin(client=client2) self.assertEqual(len(conn1._requested), 0) @@ -1120,9 +1128,10 @@ def test_exists_miss_w_bound_client(self): PATH = '/projects/%s/jobs/%s' % (self.PROJECT, self.JOB_NAME) conn = _Connection() client = _Client(project=self.PROJECT, connection=conn) - source = _Table(self.SOURCE_TABLE) - destination = _Table(self.DESTINATION_TABLE) - job = self._make_one(self.JOB_NAME, destination, [source], client) + + source = self._table_ref(self.SOURCE_TABLE) + destination = self._table_ref(self.DESTINATION_TABLE) + job = self._make_one(self.JOB_NAME, [source], destination, client) self.assertFalse(job.exists()) @@ -1138,9 +1147,9 @@ def test_exists_hit_w_alternate_client(self): client1 = _Client(project=self.PROJECT, connection=conn1) conn2 = _Connection({}) client2 = _Client(project=self.PROJECT, connection=conn2) - source = _Table(self.SOURCE_TABLE) - destination = _Table(self.DESTINATION_TABLE) - job = self._make_one(self.JOB_NAME, destination, [source], client1) + source = self._table_ref(self.SOURCE_TABLE) + destination = self._table_ref(self.DESTINATION_TABLE) + job = self._make_one(self.JOB_NAME, [source], destination, client1) self.assertTrue(job.exists(client=client2)) @@ -1156,9 +1165,9 @@ def test_reload_w_bound_client(self): RESOURCE = self._makeResource() conn = _Connection(RESOURCE) client = _Client(project=self.PROJECT, connection=conn) - source = _Table(self.SOURCE_TABLE) - destination = _Table(self.DESTINATION_TABLE) - job = self._make_one(self.JOB_NAME, destination, [source], client) + source = self._table_ref(self.SOURCE_TABLE) + destination = self._table_ref(self.DESTINATION_TABLE) + job = self._make_one(self.JOB_NAME, [source], destination, client) job.reload() @@ -1175,9 +1184,9 @@ def test_reload_w_alternate_client(self): client1 = _Client(project=self.PROJECT, connection=conn1) conn2 = _Connection(RESOURCE) client2 = _Client(project=self.PROJECT, connection=conn2) - source = _Table(self.SOURCE_TABLE) - destination = _Table(self.DESTINATION_TABLE) - job = self._make_one(self.JOB_NAME, destination, [source], client1) + source = self._table_ref(self.SOURCE_TABLE) + destination = self._table_ref(self.DESTINATION_TABLE) + job = self._make_one(self.JOB_NAME, [source], destination, client1) job.reload(client=client2) @@ -2709,8 +2718,6 @@ def __init__(self, table_id=None): @property def table_id(self): - if self._table_id is not None: - return self._table_id return TestLoadJob.TABLE_ID @property From 8b4e6a96ada765df9a71025ed91e8bfbb35a558f Mon Sep 17 00:00:00 2001 From: Jonathan Amsterdam Date: Tue, 26 Sep 2017 12:18:14 -0400 Subject: [PATCH 0293/2016] bigquery: fix copy job system test (#4059) --- .../google-cloud-bigquery/tests/system.py | 41 +++++++------------ 1 file changed, 15 insertions(+), 26 deletions(-) diff --git a/packages/google-cloud-bigquery/tests/system.py b/packages/google-cloud-bigquery/tests/system.py index 1f0e917d34fd..478ac065baa0 100644 --- a/packages/google-cloud-bigquery/tests/system.py +++ b/packages/google-cloud-bigquery/tests/system.py @@ -648,36 +648,25 @@ def test_extract_table_w_job_config(self): self.assertIn('"Bharney Rhubble"', got) def test_copy_table(self): - dataset = self.temp_dataset(_make_dataset_id('copy_table')) - schema = ( - bigquery.SchemaField('full_name', 'STRING', mode='REQUIRED'), - bigquery.SchemaField('age', 'INTEGER', mode='REQUIRED'), - ) - source_ref = dataset.table('source_table') - source_arg = Table(source_ref, schema=schema, client=Config.CLIENT) - source_table = retry_403(Config.CLIENT.create_table)(source_arg) - self.to_delete.insert(0, source_table) - rows = [ - ('Phred Phlyntstone', 32), - ('Bharney Rhubble', 33), - ('Wylma Phlyntstone', 29), - ('Bhettye Rhubble', 27), - ] - errors = source_table.insert_data(rows) - self.assertEqual(len(errors), 0) - - destination_ref = dataset.table('destination_table') + # If we create a new table to copy from, the test won't work + # because the new rows will be stored in the streaming buffer, + # and copy jobs don't read the streaming buffer. + # We could wait for the streaming buffer to empty, but that could + # take minutes. Instead we copy a small public table. + source_dataset = DatasetReference('bigquery-public-data', 'samples') + source_ref = source_dataset.table('shakespeare') + dest_dataset = self.temp_dataset(_make_dataset_id('copy_table')) + dest_ref = dest_dataset.table('destination_table') job_config = bigquery.CopyJobConfig() job = Config.CLIENT.copy_table( - source_ref, destination_ref, job_config=job_config) + source_ref, dest_ref, job_config=job_config) job.result() - destination_table = Config.CLIENT.get_table(destination_ref) - self.to_delete.insert(0, destination_table) - got_rows = self._fetch_single_page(destination_table) - by_age = operator.itemgetter(1) - self.assertEqual(sorted(got_rows, key=by_age), - sorted(rows, key=by_age)) + dest_table = Config.CLIENT.get_table(dest_ref) + self.to_delete.insert(0, dest_table) + # Just check that we got some rows. + got_rows = self._fetch_single_page(dest_table) + self.assertTrue(len(got_rows) > 0) def test_job_cancel(self): DATASET_ID = _make_dataset_id('job_cancel') From 0cc86457af873a51519bfdbb52d9d8acff44c1c5 Mon Sep 17 00:00:00 2001 From: Alix Hamilton Date: Wed, 27 Sep 2017 11:32:46 -0700 Subject: [PATCH 0294/2016] BigQuery: replaces table.delete() with client.delete_table() (#4066) --- .../google/cloud/bigquery/client.py | 17 +++++++++ .../google/cloud/bigquery/table.py | 14 -------- .../google-cloud-bigquery/tests/system.py | 2 ++ .../tests/unit/test_client.py | 28 +++++++++++++++ .../tests/unit/test_table.py | 35 ------------------- 5 files changed, 47 insertions(+), 49 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py index 972f00b317f8..bae5613b629a 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py @@ -25,6 +25,7 @@ from google.cloud.bigquery.dataset import Dataset from google.cloud.bigquery.dataset import DatasetReference from google.cloud.bigquery.table import Table +from google.cloud.bigquery.table import TableReference from google.cloud.bigquery.job import CopyJob from google.cloud.bigquery.job import ExtractJob from google.cloud.bigquery.job import LoadJob @@ -333,6 +334,22 @@ def delete_dataset(self, dataset): raise TypeError('dataset must be a Dataset or a DatasetReference') self._connection.api_request(method='DELETE', path=dataset.path) + def delete_table(self, table): + """Delete a table + + See + https://cloud.google.com/bigquery/docs/reference/rest/v2/tables/delete + + :type table: One of: + :class:`~google.cloud.bigquery.table.Table` + :class:`~google.cloud.bigquery.table.TableReference` + + :param table: the table to delete, or a reference to it. + """ + if not isinstance(table, (Table, TableReference)): + raise TypeError('table must be a Table or a TableReference') + self._connection.api_request(method='DELETE', path=table.path) + def _get_query_results(self, job_id, project=None, timeout_ms=None): """Get the query results object for a query job. diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py index 03214e52a7be..a4f1933303a0 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py @@ -768,20 +768,6 @@ def update(self, client=None): method='PUT', path=self.path, data=self._build_resource()) self._set_properties(api_response) - def delete(self, client=None): - """API call: delete the table via a DELETE request - - See - https://cloud.google.com/bigquery/docs/reference/rest/v2/tables/delete - - :type client: :class:`~google.cloud.bigquery.client.Client` or - ``NoneType`` - :param client: the client to use. If not passed, falls back to the - ``client`` stored on the current dataset. - """ - client = self._require_client(client) - client._connection.api_request(method='DELETE', path=self.path) - def fetch_data(self, max_results=None, page_token=None, client=None): """API call: fetch the table data via a GET request diff --git a/packages/google-cloud-bigquery/tests/system.py b/packages/google-cloud-bigquery/tests/system.py index 478ac065baa0..2785f21cb2a3 100644 --- a/packages/google-cloud-bigquery/tests/system.py +++ b/packages/google-cloud-bigquery/tests/system.py @@ -108,6 +108,8 @@ def _still_in_use(bad_request): retry_409(doomed.delete)(force=True) elif isinstance(doomed, Dataset): retry_in_use(Config.CLIENT.delete_dataset)(doomed) + elif isinstance(doomed, Table): + retry_in_use(Config.CLIENT.delete_table)(doomed) else: doomed.delete() diff --git a/packages/google-cloud-bigquery/tests/unit/test_client.py b/packages/google-cloud-bigquery/tests/unit/test_client.py index 9f9354cdd2cb..bdab1d36c2cb 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_client.py +++ b/packages/google-cloud-bigquery/tests/unit/test_client.py @@ -794,6 +794,34 @@ def test_delete_dataset_wrong_type(self): with self.assertRaises(TypeError): client.delete_dataset(client.dataset(DS_ID).table("foo")) + def test_delete_table(self): + from google.cloud.bigquery.table import Table + + project = 'PROJECT' + dataset_id = 'dataset_id' + table_id = 'table-id' + path = 'projects/%s/datasets/%s/tables/%s' % ( + project, dataset_id, table_id) + creds = _make_credentials() + http = object() + client = self._make_one(project=project, credentials=creds, _http=http) + conn = client._connection = _Connection({}, {}) + table_ref = client.dataset(dataset_id).table(table_id) + + for arg in (table_ref, Table(table_ref)): + client.delete_table(arg) + req = conn._requested[0] + self.assertEqual(req['method'], 'DELETE') + self.assertEqual(req['path'], '/%s' % path) + + def test_delete_table_w_wrong_type(self): + project = 'PROJECT' + dataset_id = 'DATASET_ID' + creds = _make_credentials() + client = self._make_one(project=project, credentials=creds) + with self.assertRaises(TypeError): + client.delete_table(client.dataset(dataset_id)) + def test_job_from_resource_unknown_type(self): PROJECT = 'PROJECT' creds = _make_credentials() diff --git a/packages/google-cloud-bigquery/tests/unit/test_table.py b/packages/google-cloud-bigquery/tests/unit/test_table.py index 22dce9012188..c86c21880bda 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_table.py +++ b/packages/google-cloud-bigquery/tests/unit/test_table.py @@ -945,41 +945,6 @@ def test_update_w_alternate_client(self): self.assertEqual(req['data'], SENT) self._verifyResourceProperties(table, RESOURCE) - def test_delete_w_bound_client(self): - PATH = 'projects/%s/datasets/%s/tables/%s' % ( - self.PROJECT, self.DS_ID, self.TABLE_NAME) - conn = _Connection({}) - client = _Client(project=self.PROJECT, connection=conn) - dataset = DatasetReference(self.PROJECT, self.DS_ID) - table_ref = dataset.table(self.TABLE_NAME) - table = self._make_one(table_ref, client=client) - - table.delete() - - self.assertEqual(len(conn._requested), 1) - req = conn._requested[0] - self.assertEqual(req['method'], 'DELETE') - self.assertEqual(req['path'], '/%s' % PATH) - - def test_delete_w_alternate_client(self): - PATH = 'projects/%s/datasets/%s/tables/%s' % ( - self.PROJECT, self.DS_ID, self.TABLE_NAME) - conn1 = _Connection() - client1 = _Client(project=self.PROJECT, connection=conn1) - conn2 = _Connection({}) - client2 = _Client(project=self.PROJECT, connection=conn2) - dataset = DatasetReference(self.PROJECT, self.DS_ID) - table_ref = dataset.table(self.TABLE_NAME) - table = self._make_one(table_ref, client=client1) - - table.delete(client=client2) - - self.assertEqual(len(conn1._requested), 0) - self.assertEqual(len(conn2._requested), 1) - req = conn2._requested[0] - self.assertEqual(req['method'], 'DELETE') - self.assertEqual(req['path'], '/%s' % PATH) - def test_fetch_data_wo_schema(self): from google.cloud.bigquery.table import _TABLE_HAS_NO_SCHEMA From 5a31804562e56adddd5d98e30ade59120a9a824c Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Wed, 27 Sep 2017 16:29:59 -0700 Subject: [PATCH 0295/2016] BigQuery: add client.query_rows(), remove client.run_sync_query(). (#4065) * BigQuery: add client.query_rows(), remove client.run_sync_query(). The query_rows() method will be the new way to run a query synchronously. It starts a query job, then waits for the results, returning the rows as results. --- .../google/cloud/bigquery/__init__.py | 2 + .../google/cloud/bigquery/client.py | 50 +++-- .../google/cloud/bigquery/job.py | 163 +++++++++++----- .../google/cloud/bigquery/query.py | 1 + .../google-cloud-bigquery/tests/system.py | 67 ++++--- .../tests/unit/test_client.py | 184 ++++++++++++++---- 6 files changed, 337 insertions(+), 130 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/__init__.py b/packages/google-cloud-bigquery/google/cloud/bigquery/__init__.py index ec92e7c40128..7bbcc7782ee2 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/__init__.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/__init__.py @@ -34,6 +34,7 @@ from google.cloud.bigquery.dataset import Dataset from google.cloud.bigquery.job import CopyJobConfig from google.cloud.bigquery.job import ExtractJobConfig +from google.cloud.bigquery.job import QueryJobConfig from google.cloud.bigquery.schema import SchemaField from google.cloud.bigquery.table import Table @@ -45,6 +46,7 @@ 'Dataset', 'CopyJobConfig', 'ExtractJobConfig', + 'QueryJobConfig', 'ScalarQueryParameter', 'SchemaField', 'StructQueryParameter', diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py index bae5613b629a..7ceed4fc1e41 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py @@ -30,6 +30,7 @@ from google.cloud.bigquery.job import ExtractJob from google.cloud.bigquery.job import LoadJob from google.cloud.bigquery.job import QueryJob +from google.cloud.bigquery.job import QueryJobConfig from google.cloud.bigquery.query import QueryResults @@ -612,29 +613,46 @@ def run_async_query(self, job_id, query, udf_resources=udf_resources, query_parameters=query_parameters) - def run_sync_query(self, query, udf_resources=(), query_parameters=()): - """Run a SQL query synchronously. + def query_rows(self, query, job_config=None, job_id=None, timeout=None): + """Start a query job and wait for the results. + + See + https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query :type query: str :param query: SQL query to be executed - :type udf_resources: tuple - :param udf_resources: An iterable of - :class:`google.cloud.bigquery._helpers.UDFResource` - (empty by default) + :type job_id: str + :param job_id: (Optional) ID to use for the query job. - :type query_parameters: tuple - :param query_parameters: - An iterable of - :class:`google.cloud.bigquery._helpers.AbstractQueryParameter` - (empty by default) + :type timeout: int + :param timeout: + (Optional) How long to wait for job to complete before raising a + :class:`TimeoutError`. - :rtype: :class:`google.cloud.bigquery.query.QueryResults` - :returns: a new ``QueryResults`` instance + :rtype: :class:`~google.api.core.page_iterator.Iterator` + :returns: + Iterator of row data :class:`tuple`s. During each page, the + iterator will have the ``total_rows`` attribute set, which counts + the total number of rows **in the result set** (this is distinct + from the total number of rows in the current page: + ``iterator.page.num_items``). + + :raises: :class:`~google.cloud.exceptions.GoogleCloudError` if the job + failed or :class:`TimeoutError` if the job did not complete in the + given timeout. """ - return QueryResults(query, client=self, - udf_resources=udf_resources, - query_parameters=query_parameters) + job_id = _make_job_id(job_id) + + # TODO(swast): move standard SQL default to QueryJobConfig class. + if job_config is None: + job_config = QueryJobConfig() + if job_config.use_legacy_sql is None: + job_config.use_legacy_sql = False + + job = QueryJob(job_id, query, client=self, job_config=job_config) + job.begin() + return job.result(timeout=timeout) # pylint: disable=unused-argument diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/job.py b/packages/google-cloud-bigquery/google/cloud/bigquery/job.py index 11b8beee2b7b..812dde4b32a3 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/job.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/job.py @@ -1218,11 +1218,52 @@ def from_api_repr(cls, resource, client): return job -class _AsyncQueryConfiguration(object): - """User-settable configuration options for asynchronous query jobs. +class QueryJobConfig(object): + """Configuration options for query jobs. - Values which are ``None`` -> server defaults. + All properties in this class are optional. Values which are ``None`` -> + server defaults. """ + + def __init__(self): + self._properties = {} + + def to_api_repr(self): + """Build an API representation of the copy job config. + + :rtype: dict + :returns: A dictionary in the format used by the BigQuery API. + """ + return copy.deepcopy(self._properties) + + @classmethod + def from_api_repr(cls, resource): + """Factory: construct a job configuration given its API representation + + :type resource: dict + :param resource: + An extract job configuration in the same representation as is + returned from the API. + + :rtype: :class:`google.cloud.bigquery.job.ExtractJobConfig` + :returns: Configuration parsed from ``resource``. + """ + config = cls() + config._properties = copy.deepcopy(resource) + return config + + use_legacy_sql = _TypedApiResourceProperty( + 'use_legacy_sql', 'useLegacySql', bool) + """See + https://cloud.google.com/bigquery/docs/\ + reference/v2/jobs#configuration.query.useLegacySql + """ + + dry_run = _TypedApiResourceProperty('dry_run', 'dryRun', bool) + """See + https://g.co/cloud/bigquery/docs/reference/v2/jobs#configuration.dryRun + """ + _allow_large_results = None _create_disposition = None _default_dataset = None @@ -1231,7 +1272,6 @@ class _AsyncQueryConfiguration(object): _priority = None _use_query_cache = None _use_legacy_sql = None - _dry_run = None _write_disposition = None _maximum_billing_tier = None _maximum_bytes_billed = None @@ -1260,20 +1300,60 @@ class QueryJob(_AsyncJob): An iterable of :class:`google.cloud.bigquery._helpers.AbstractQueryParameter` (empty by default) + + :type job_config: :class:`~google.cloud.bigquery.job.QueryJobConfig` + :param job_config: + (Optional) Extra configuration options for the query job. """ _JOB_TYPE = 'query' _UDF_KEY = 'userDefinedFunctionResources' _QUERY_PARAMETERS_KEY = 'queryParameters' def __init__(self, job_id, query, client, - udf_resources=(), query_parameters=()): + udf_resources=(), query_parameters=(), job_config=None): super(QueryJob, self).__init__(job_id, client) + + if job_config is None: + job_config = QueryJobConfig() + self.query = query self.udf_resources = udf_resources self.query_parameters = query_parameters - self._configuration = _AsyncQueryConfiguration() + self._configuration = job_config self._query_results = None + @property + def use_legacy_sql(self): + """See + :class:`~google.cloud.bigquery.job.QueryJobConfig.use_legacy_sql`. + """ + return self._configuration.use_legacy_sql + + @use_legacy_sql.setter + def use_legacy_sql(self, value): + """See + :class:`~google.cloud.bigquery.job.QueryJobConfig.use_legacy_sql`. + """ + # TODO(swast): remove this method and only allow setting use_legacy_sql + # on QueryJobConfig objects. + self._configuration.use_legacy_sql = value + + @property + def dry_run(self): + """See + :class:`~google.cloud.bigquery.job.QueryJobConfig.dry_run`. + """ + return self._configuration.dry_run + + @dry_run.setter + def dry_run(self, value): + """See + :class:`~google.cloud.bigquery.job.QueryJobConfig.dry_run`. + """ + # TODO(swast): remove this method and only allow setting dry_run + # on QueryJobConfig objects. + self._configuration.dry_run = value + allow_large_results = _TypedProperty('allow_large_results', bool) """See https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query.allowLargeResults @@ -1314,20 +1394,8 @@ def __init__(self, job_id, query, client, https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query.useQueryCache """ - use_legacy_sql = _TypedProperty('use_legacy_sql', bool) - """See - https://cloud.google.com/bigquery/docs/\ - reference/v2/jobs#configuration.query.useLegacySql - """ - - dry_run = _TypedProperty('dry_run', bool) - """See - https://cloud.google.com/bigquery/docs/\ - reference/rest/v2/jobs#configuration.dryRun - """ - - write_disposition = WriteDisposition('write_disposition', - 'writeDisposition') + write_disposition = WriteDisposition( + 'write_disposition', 'writeDisposition') """See https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query.writeDisposition """ @@ -1363,8 +1431,6 @@ def _populate_config_resource_booleans(self, configuration): configuration['flattenResults'] = self.flatten_results if self.use_query_cache is not None: configuration['useQueryCache'] = self.use_query_cache - if self.use_legacy_sql is not None: - configuration['useLegacySql'] = self.use_legacy_sql def _populate_config_resource(self, configuration): """Helper for _build_resource: copy config properties to resource""" @@ -1377,8 +1443,8 @@ def _populate_config_resource(self, configuration): 'projectId': self.default_dataset.project, 'datasetId': self.default_dataset.dataset_id, } - if self.destination is not None: - table_res = self._destination_table_resource() + table_res = self._destination_table_resource() + if table_res is not None: configuration['destinationTable'] = table_res if self.priority is not None: configuration['priority'] = self.priority @@ -1406,6 +1472,7 @@ def _populate_config_resource(self, configuration): def _build_resource(self): """Generate a resource for :meth:`begin`.""" + configuration = self._configuration.to_api_repr() resource = { 'jobReference': { @@ -1413,16 +1480,18 @@ def _build_resource(self): 'jobId': self.job_id, }, 'configuration': { - self._JOB_TYPE: { - 'query': self.query, - }, + self._JOB_TYPE: configuration, }, } - if self.dry_run is not None: - resource['configuration']['dryRun'] = self.dry_run + # The dryRun property only applies to query jobs, but it is defined at + # a level higher up. We need to remove it from the query config. + if 'dryRun' in configuration: + dry_run = configuration['dryRun'] + del configuration['dryRun'] + resource['configuration']['dryRun'] = dry_run - configuration = resource['configuration'][self._JOB_TYPE] + configuration['query'] = self.query self._populate_config_resource(configuration) return resource @@ -1436,19 +1505,28 @@ def _scrub_local_properties(self, cleaned): the client's project. """ configuration = cleaned['configuration']['query'] - self.query = configuration['query'] + # The dryRun property only applies to query jobs, but it is defined at + # a level higher up. We need to copy it to the query config. + self._configuration.dry_run = cleaned['configuration'].get('dryRun') + def _copy_configuration_properties(self, configuration): """Helper: assign subclass configuration properties in cleaned.""" + # The dryRun property only applies to query jobs, but it is defined at + # a level higher up. We need to copy it to the query config. + # It should already be correctly set by the _scrub_local_properties() + # method. + dry_run = self.dry_run + self._configuration = QueryJobConfig.from_api_repr(configuration) + self._configuration.dry_run = dry_run + self.allow_large_results = _bool_or_none( configuration.get('allowLargeResults')) self.flatten_results = _bool_or_none( configuration.get('flattenResults')) self.use_query_cache = _bool_or_none( configuration.get('useQueryCache')) - self.use_legacy_sql = _bool_or_none( - configuration.get('useLegacySql')) self.create_disposition = configuration.get('createDisposition') self.priority = configuration.get('priority') @@ -1459,22 +1537,13 @@ def _copy_configuration_properties(self, configuration): dest_remote = configuration.get('destinationTable') - if dest_remote is None: - if self.destination is not None: - del self.destination - else: - dest_local = self._destination_table_resource() - if dest_remote != dest_local: - project = dest_remote['projectId'] - dataset = Dataset(DatasetReference(project, - dest_remote['datasetId'])) - self.destination = dataset.table(dest_remote['tableId']) + if dest_remote is not None: + dataset = DatasetReference( + dest_remote['projectId'], dest_remote['datasetId']) + self.destination = dataset.table(dest_remote['tableId']) def_ds = configuration.get('defaultDataset') - if def_ds is None: - if self.default_dataset is not None: - del self.default_dataset - else: + if def_ds is not None: self.default_dataset = DatasetReference( def_ds['projectId'], def_ds['datasetId']) udf_resources = [] diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/query.py b/packages/google-cloud-bigquery/google/cloud/bigquery/query.py index 38400659bdaf..57199556ed84 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/query.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/query.py @@ -455,6 +455,7 @@ def fetch_data(self, max_results=None, page_token=None, start_index=None, next_token='pageToken', extra_params=params) iterator.query_result = self + iterator.job = self.job return iterator diff --git a/packages/google-cloud-bigquery/tests/system.py b/packages/google-cloud-bigquery/tests/system.py index 2785f21cb2a3..2fd43f7951c4 100644 --- a/packages/google-cloud-bigquery/tests/system.py +++ b/packages/google-cloud-bigquery/tests/system.py @@ -699,7 +699,7 @@ def test_job_cancel(self): # raise an error, and that the job completed (in the `retry()` # above). - def test_sync_query_w_legacy_sql_types(self): + def test_query_rows_w_legacy_sql_types(self): naive = datetime.datetime(2016, 12, 5, 12, 41, 9) stamp = '%s %s' % (naive.date().isoformat(), naive.time().isoformat()) zoned = naive.replace(tzinfo=UTC) @@ -730,12 +730,13 @@ def test_sync_query_w_legacy_sql_types(self): }, ] for example in examples: - query = Config.CLIENT.run_sync_query(example['sql']) - query.use_legacy_sql = True - query.run() - self.assertEqual(len(query.rows), 1) - self.assertEqual(len(query.rows[0]), 1) - self.assertEqual(query.rows[0][0], example['expected']) + job_config = bigquery.QueryJobConfig() + job_config.use_legacy_sql = True + rows = list(Config.CLIENT.query_rows( + example['sql'], job_config=job_config)) + self.assertEqual(len(rows), 1) + self.assertEqual(len(rows[0]), 1) + self.assertEqual(rows[0][0], example['expected']) def _generate_standard_sql_types_examples(self): naive = datetime.datetime(2016, 12, 5, 12, 41, 9) @@ -831,15 +832,20 @@ def _generate_standard_sql_types_examples(self): }, ] - def test_sync_query_w_standard_sql_types(self): + def test_query_rows_w_standard_sql_types(self): examples = self._generate_standard_sql_types_examples() for example in examples: - query = Config.CLIENT.run_sync_query(example['sql']) - query.use_legacy_sql = False - query.run() - self.assertEqual(len(query.rows), 1) - self.assertEqual(len(query.rows[0]), 1) - self.assertEqual(query.rows[0][0], example['expected']) + rows = list(Config.CLIENT.query_rows(example['sql'])) + self.assertEqual(len(rows), 1) + self.assertEqual(len(rows[0]), 1) + self.assertEqual(rows[0][0], example['expected']) + + def test_query_rows_w_failed_query(self): + from google.api.core.exceptions import BadRequest + + with self.assertRaises(BadRequest): + Config.CLIENT.query_rows('invalid syntax;') + # TODO(swast): Ensure that job ID is surfaced in the exception. def test_dbapi_w_standard_sql_types(self): examples = self._generate_standard_sql_types_examples() @@ -892,7 +898,7 @@ def _load_table_for_dml(self, rows, dataset_id, table_id): job.result(timeout=JOB_TIMEOUT) self._fetch_single_page(table) - def test_sync_query_w_dml(self): + def test_query_w_dml(self): dataset_name = _make_dataset_id('dml_tests') table_name = 'test_table' self._load_table_for_dml([('Hello World',)], dataset_name, table_name) @@ -901,12 +907,14 @@ def test_sync_query_w_dml(self): WHERE greeting = 'Hello World' """ - query = Config.CLIENT.run_sync_query( + query_job = Config.CLIENT.run_async_query( + 'test_query_w_dml_{}'.format(unique_resource_id()), query_template.format(dataset_name, table_name)) - query.use_legacy_sql = False - query.run() + query_job.use_legacy_sql = False + query_job.begin() + query_job.result() - self.assertEqual(query.num_dml_affected_rows, 1) + self.assertEqual(query_job.num_dml_affected_rows, 1) def test_dbapi_w_dml(self): dataset_name = _make_dataset_id('dml_tests') @@ -923,7 +931,7 @@ def test_dbapi_w_dml(self): self.assertEqual(Config.CURSOR.rowcount, 1) self.assertIsNone(Config.CURSOR.fetchone()) - def test_sync_query_w_query_params(self): + def test_query_w_query_params(self): from google.cloud.bigquery._helpers import ArrayQueryParameter from google.cloud.bigquery._helpers import ScalarQueryParameter from google.cloud.bigquery._helpers import StructQueryParameter @@ -1084,14 +1092,16 @@ def test_sync_query_w_query_params(self): }, ] for example in examples: - query = Config.CLIENT.run_sync_query( + query_job = Config.CLIENT.run_async_query( + 'test_query_w_query_params{}'.format(unique_resource_id()), example['sql'], query_parameters=example['query_parameters']) - query.use_legacy_sql = False - query.run() - self.assertEqual(len(query.rows), 1) - self.assertEqual(len(query.rows[0]), 1) - self.assertEqual(query.rows[0][0], example['expected']) + query_job.use_legacy_sql = False + query_job.begin() + rows = [row for row in query_job.result()] + self.assertEqual(len(rows), 1) + self.assertEqual(len(rows[0]), 1) + self.assertEqual(rows[0][0], example['expected']) def test_dbapi_w_query_parameters(self): examples = [ @@ -1217,11 +1227,8 @@ def test_large_query_w_public_data(self): SQL = 'SELECT * from `{}.{}.{}` LIMIT {}'.format( PUBLIC, DATASET_ID, TABLE_NAME, LIMIT) - query = Config.CLIENT.run_sync_query(SQL) - query.use_legacy_sql = False - query.run() + iterator = Config.CLIENT.query_rows(SQL) - iterator = query.fetch_data(max_results=100) rows = list(iterator) self.assertEqual(len(rows), LIMIT) diff --git a/packages/google-cloud-bigquery/tests/unit/test_client.py b/packages/google-cloud-bigquery/tests/unit/test_client.py index bdab1d36c2cb..d1a6d1218ae8 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_client.py +++ b/packages/google-cloud-bigquery/tests/unit/test_client.py @@ -12,6 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. +import copy import unittest import mock @@ -1348,58 +1349,167 @@ def test_run_async_w_query_parameters(self): self.assertEqual(job.udf_resources, []) self.assertEqual(job.query_parameters, query_parameters) - def test_run_sync_query_defaults(self): - from google.cloud.bigquery.query import QueryResults + def test_query_rows_defaults(self): + from google.api.core.page_iterator import HTTPIterator + JOB = 'job-id' PROJECT = 'PROJECT' - QUERY = 'select count(*) from persons' + QUERY = 'SELECT COUNT(*) FROM persons' + RESOURCE = { + 'jobReference': { + 'projectId': PROJECT, + 'jobId': JOB, + }, + 'configuration': { + 'query': { + 'query': QUERY, + }, + }, + 'status': { + 'state': 'DONE', + }, + } + RESULTS_RESOURCE = { + 'jobReference': RESOURCE['jobReference'], + 'jobComplete': True, + 'schema': { + 'fields': [ + {'name': 'field0', 'type': 'INTEGER', 'mode': 'NULLABLE'}, + ] + }, + 'totalRows': '3', + 'pageToken': 'next-page', + } + FIRST_PAGE = copy.deepcopy(RESULTS_RESOURCE) + FIRST_PAGE['rows'] = [ + {'f': [{'v': '1'}]}, + {'f': [{'v': '2'}]}, + ] + LAST_PAGE = copy.deepcopy(RESULTS_RESOURCE) + LAST_PAGE['rows'] = [ + {'f': [{'v': '3'}]}, + ] + del LAST_PAGE['pageToken'] creds = _make_credentials() http = object() client = self._make_one(project=PROJECT, credentials=creds, _http=http) - query = client.run_sync_query(QUERY) - self.assertIsInstance(query, QueryResults) - self.assertIs(query._client, client) - self.assertIsNone(query.name) - self.assertEqual(query.query, QUERY) - self.assertEqual(query.udf_resources, []) - self.assertEqual(query.query_parameters, []) - - def test_run_sync_query_w_udf_resources(self): - from google.cloud.bigquery._helpers import UDFResource - from google.cloud.bigquery.query import QueryResults + conn = client._connection = _Connection( + RESOURCE, RESULTS_RESOURCE, FIRST_PAGE, LAST_PAGE) - RESOURCE_URI = 'gs://some-bucket/js/lib.js' + rows_iter = client.query_rows(QUERY) + rows = list(rows_iter) + + self.assertEqual(rows, [(1,), (2,), (3,)]) + self.assertIs(rows_iter.client, client) + self.assertIsInstance(rows_iter, HTTPIterator) + self.assertEqual(len(conn._requested), 4) + req = conn._requested[0] + self.assertEqual(req['method'], 'POST') + self.assertEqual(req['path'], '/projects/PROJECT/jobs') + self.assertIsInstance( + req['data']['jobReference']['jobId'], six.string_types) + + def test_query_rows_w_job_id(self): + from google.api.core.page_iterator import HTTPIterator + + JOB = 'job-id' PROJECT = 'PROJECT' - QUERY = 'select count(*) from persons' + QUERY = 'SELECT COUNT(*) FROM persons' + RESOURCE = { + 'jobReference': { + 'projectId': PROJECT, + 'jobId': JOB, + }, + 'configuration': { + 'query': { + 'query': QUERY, + }, + }, + 'status': { + 'state': 'DONE', + }, + } + RESULTS_RESOURCE = { + 'jobReference': RESOURCE['jobReference'], + 'jobComplete': True, + 'schema': { + 'fields': [ + {'name': 'field0', 'type': 'INTEGER', 'mode': 'NULLABLE'}, + ] + }, + 'totalRows': '0', + } creds = _make_credentials() http = object() client = self._make_one(project=PROJECT, credentials=creds, _http=http) - udf_resources = [UDFResource("resourceUri", RESOURCE_URI)] - query = client.run_sync_query(QUERY, udf_resources=udf_resources) - self.assertIsInstance(query, QueryResults) - self.assertIs(query._client, client) - self.assertIsNone(query.name) - self.assertEqual(query.query, QUERY) - self.assertEqual(query.udf_resources, udf_resources) - self.assertEqual(query.query_parameters, []) - - def test_run_sync_query_w_query_parameters(self): - from google.cloud.bigquery._helpers import ScalarQueryParameter - from google.cloud.bigquery.query import QueryResults + conn = client._connection = _Connection( + RESOURCE, RESULTS_RESOURCE, RESULTS_RESOURCE) + + rows_iter = client.query_rows(QUERY, job_id=JOB) + rows = [row for row in rows_iter] + self.assertEqual(rows, []) + self.assertIs(rows_iter.client, client) + self.assertIsInstance(rows_iter, HTTPIterator) + self.assertEqual(len(conn._requested), 3) + req = conn._requested[0] + self.assertEqual(req['method'], 'POST') + self.assertEqual(req['path'], '/projects/PROJECT/jobs') + self.assertEqual(req['data']['jobReference']['jobId'], JOB) + + def test_query_rows_w_job_config(self): + from google.cloud.bigquery.job import QueryJobConfig + from google.api.core.page_iterator import HTTPIterator + + JOB = 'job-id' PROJECT = 'PROJECT' - QUERY = 'select count(*) from persons' + QUERY = 'SELECT COUNT(*) FROM persons' + RESOURCE = { + 'jobReference': { + 'projectId': PROJECT, + 'jobId': JOB, + }, + 'configuration': { + 'query': { + 'query': QUERY, + 'useLegacySql': True, + }, + 'dryRun': True, + }, + 'status': { + 'state': 'DONE', + }, + } + RESULTS_RESOURCE = { + 'jobReference': RESOURCE['jobReference'], + 'jobComplete': True, + 'schema': { + 'fields': [ + {'name': 'field0', 'type': 'INTEGER', 'mode': 'NULLABLE'}, + ] + }, + 'totalRows': '0', + } creds = _make_credentials() http = object() client = self._make_one(project=PROJECT, credentials=creds, _http=http) - query_parameters = [ScalarQueryParameter('foo', 'INT64', 123)] - query = client.run_sync_query(QUERY, query_parameters=query_parameters) - self.assertIsInstance(query, QueryResults) - self.assertIs(query._client, client) - self.assertIsNone(query.name) - self.assertEqual(query.query, QUERY) - self.assertEqual(query.udf_resources, []) - self.assertEqual(query.query_parameters, query_parameters) + conn = client._connection = _Connection( + RESOURCE, RESULTS_RESOURCE, RESULTS_RESOURCE) + + job_config = QueryJobConfig() + job_config.use_legacy_sql = True + job_config.dry_run = True + rows_iter = client.query_rows(QUERY, job_id=JOB, job_config=job_config) + + self.assertIsInstance(rows_iter, HTTPIterator) + self.assertEqual(len(conn._requested), 2) + req = conn._requested[0] + configuration = req['data']['configuration'] + self.assertEqual(req['method'], 'POST') + self.assertEqual(req['path'], '/projects/PROJECT/jobs') + self.assertEqual(req['data']['jobReference']['jobId'], JOB) + self.assertEqual(configuration['query']['useLegacySql'], True) + self.assertEqual(configuration['dryRun'], True) class _Connection(object): From 4111da698fd0a5e2e6da9243031f7cc75f2d1372 Mon Sep 17 00:00:00 2001 From: Jonathan Amsterdam Date: Wed, 4 Oct 2017 14:32:42 -0400 Subject: [PATCH 0296/2016] bigquery: modify LoadJob (#4103) This PR handles loading from GCS. Loading from a local file will be done separately. --- .../google/cloud/bigquery/__init__.py | 2 + .../google/cloud/bigquery/client.py | 31 +- .../google/cloud/bigquery/job.py | 407 ++++++++---------- .../google-cloud-bigquery/tests/system.py | 34 +- .../tests/unit/test_client.py | 41 +- .../tests/unit/test_job.py | 231 +++++----- 6 files changed, 376 insertions(+), 370 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/__init__.py b/packages/google-cloud-bigquery/google/cloud/bigquery/__init__.py index 7bbcc7782ee2..3a7cc2be7a69 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/__init__.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/__init__.py @@ -35,6 +35,7 @@ from google.cloud.bigquery.job import CopyJobConfig from google.cloud.bigquery.job import ExtractJobConfig from google.cloud.bigquery.job import QueryJobConfig +from google.cloud.bigquery.job import LoadJobConfig from google.cloud.bigquery.schema import SchemaField from google.cloud.bigquery.table import Table @@ -47,6 +48,7 @@ 'CopyJobConfig', 'ExtractJobConfig', 'QueryJobConfig', + 'LoadJobConfig', 'ScalarQueryParameter', 'SchemaField', 'StructQueryParameter', diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py index 7ceed4fc1e41..da69decd03c8 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py @@ -19,6 +19,8 @@ import collections import uuid +import six + from google.api.core import page_iterator from google.cloud.client import ClientWithProject from google.cloud.bigquery._http import Connection @@ -490,26 +492,37 @@ def list_jobs(self, max_results=None, page_token=None, all_users=None, max_results=max_results, extra_params=extra_params) - def load_table_from_storage(self, job_id, destination, *source_uris): - """Construct a job for loading data into a table from CloudStorage. + def load_table_from_storage(self, source_uris, destination, + job_id=None, job_config=None): + """Starts a job for loading data into a table from CloudStorage. See https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.load - :type job_id: str - :param job_id: Name of the job. + :type source_uris: One of: + str + sequence of string + :param source_uris: URIs of data files to be loaded; in format + ``gs:///``. - :type destination: :class:`google.cloud.bigquery.table.Table` + :type destination: :class:`google.cloud.bigquery.table.TableReference` :param destination: Table into which data is to be loaded. - :type source_uris: sequence of string - :param source_uris: URIs of data files to be loaded; in format - ``gs:///``. + :type job_id: str + :param job_id: Name of the job. + + :type job_config: :class:`google.cloud.bigquery.job.LoadJobConfig` + :param job_config: (Optional) Extra configuration options for the job. :rtype: :class:`google.cloud.bigquery.job.LoadJob` :returns: a new ``LoadJob`` instance """ - return LoadJob(job_id, destination, source_uris, client=self) + job_id = _make_job_id(job_id) + if isinstance(source_uris, six.string_types): + source_uris = [source_uris] + job = LoadJob(job_id, source_uris, destination, self, job_config) + job.begin() + return job def copy_table(self, sources, destination, job_id=None, job_config=None): """Start a job for copying one or more tables into another table. diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/job.py b/packages/google-cloud-bigquery/google/cloud/bigquery/job.py index 812dde4b32a3..fd427c647a55 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/job.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/job.py @@ -24,10 +24,8 @@ from google.cloud import exceptions from google.cloud.exceptions import NotFound from google.cloud._helpers import _datetime_from_microseconds -from google.cloud.bigquery.dataset import Dataset from google.cloud.bigquery.dataset import DatasetReference from google.cloud.bigquery.schema import SchemaField -from google.cloud.bigquery.table import Table from google.cloud.bigquery.table import TableReference from google.cloud.bigquery.table import _build_schema_resource from google.cloud.bigquery.table import _parse_schema_resource @@ -106,20 +104,6 @@ def _error_result_to_exception(error_result): status_code, error_result.get('message', ''), errors=[error_result]) -class AutoDetectSchema(_TypedProperty): - """Typed Property for ``autodetect`` properties. - - :raises ValueError: on ``set`` operation if ``instance.schema`` - is already defined. - """ - def __set__(self, instance, value): - self._validate(value) - if instance.schema: - raise ValueError('A schema should not be already defined ' - 'when using schema auto-detection') - setattr(instance._configuration, self._backing_name, value) - - class Compression(_EnumApiResourceProperty): """Pseudo-enum for ``compression`` properties.""" GZIP = 'GZIP' @@ -139,7 +123,7 @@ class DestinationFormat(_EnumApiResourceProperty): AVRO = 'AVRO' -class Encoding(_EnumProperty): +class Encoding(_EnumApiResourceProperty): """Pseudo-enum for ``encoding`` properties.""" UTF_8 = 'UTF-8' ISO_8559_1 = 'ISO-8559-1' @@ -151,7 +135,7 @@ class QueryPriority(_EnumProperty): BATCH = 'BATCH' -class SourceFormat(_EnumProperty): +class SourceFormat(_EnumApiResourceProperty): """Pseudo-enum for ``source_format`` properties.""" CSV = 'CSV' DATASTORE_BACKUP = 'DATASTORE_BACKUP' @@ -166,6 +150,20 @@ class WriteDisposition(_EnumApiResourceProperty): WRITE_EMPTY = 'WRITE_EMPTY' +class AutoDetectSchema(_TypedApiResourceProperty): + """Property for ``autodetect`` properties. + + :raises ValueError: on ``set`` operation if ``instance.schema`` + is already defined. + """ + def __set__(self, instance, value): + self._validate(value) + if instance.schema: + raise ValueError('A schema should not be already defined ' + 'when using schema auto-detection') + instance._properties[self.resource_name] = value + + class _AsyncJob(google.api.core.future.polling.PollingFuture): """Base class for asynchronous jobs. @@ -542,35 +540,151 @@ def cancelled(self): and self.error_result.get('reason') == _STOPPED_REASON) -class _LoadConfiguration(object): - """User-settable configuration options for load jobs. +class LoadJobConfig(object): + """Configuration options for load jobs. - Values which are ``None`` -> server defaults. + All properties in this class are optional. Values which are ``None`` -> + server defaults. """ - _allow_jagged_rows = None - _allow_quoted_newlines = None - _autodetect = None - _create_disposition = None - _encoding = None - _field_delimiter = None - _ignore_unknown_values = None - _max_bad_records = None - _null_marker = None - _quote_character = None - _skip_leading_rows = None - _source_format = None - _write_disposition = None + + def __init__(self): + self._properties = {} + self._schema = () + + allow_jagged_rows = _TypedApiResourceProperty( + 'allow_jagged_rows', 'allowJaggedRows', bool) + """See + https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.load.allowJaggedRows + """ + + allow_quoted_newlines = _TypedApiResourceProperty( + 'allow_quoted_newlines', 'allowQuotedNewlines', bool) + """See + https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.load.allowQuotedNewlines + """ + + autodetect = AutoDetectSchema('autodetect', 'autodetect', bool) + """See + https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.load.autodetect + """ + + create_disposition = CreateDisposition('create_disposition', + 'createDisposition') + """See + https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.load.createDisposition + """ + + encoding = Encoding('encoding', 'encoding') + """See + https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.load.encoding + """ + + field_delimiter = _TypedApiResourceProperty( + 'field_delimiter', 'fieldDelimiter', six.string_types) + """See + https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.load.fieldDelimiter + """ + + ignore_unknown_values = _TypedApiResourceProperty( + 'ignore_unknown_values', 'ignoreUnknownValues', bool) + """See + https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.load.ignoreUnknownValues + """ + + max_bad_records = _TypedApiResourceProperty( + 'max_bad_records', 'maxBadRecords', six.integer_types) + """See + https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.load.maxBadRecords + """ + + null_marker = _TypedApiResourceProperty( + 'null_marker', 'nullMarker', six.string_types) + """See + https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.load.nullMarker + """ + + quote_character = _TypedApiResourceProperty( + 'quote_character', 'quote', six.string_types) + """See + https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.load.quote + """ + + skip_leading_rows = _TypedApiResourceProperty( + 'skip_leading_rows', 'skipLeadingRows', six.integer_types) + """See + https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.load.skipLeadingRows + """ + + source_format = SourceFormat('source_format', 'sourceFormat') + """See + https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.load.sourceFormat + """ + + write_disposition = WriteDisposition('write_disposition', + 'writeDisposition') + """See + https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.load.writeDisposition + """ + + @property + def schema(self): + """See + https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.load.schema + """ + return list(self._schema) + + @schema.setter + def schema(self, value): + if not all(isinstance(field, SchemaField) for field in value): + raise ValueError('Schema items must be fields') + if self.autodetect: + raise ValueError( + 'Schema can not be set if `autodetect` property is True') + self._schema = tuple(value) + + def to_api_repr(self): + """Build an API representation of the load job config. + + :rtype: dict + :returns: A dictionary in the format used by the BigQuery API. + """ + config = copy.deepcopy(self._properties) + if len(self.schema) > 0: + config['schema'] = {'fields': _build_schema_resource(self.schema)} + # skipLeadingRows is a string because it's defined as an int64, which + # can't be represented as a JSON number. + slr = config.get('skipLeadingRows') + if slr is not None: + config['skipLeadingRows'] = str(slr) + return config + + @classmethod + def from_api_repr(cls, resource): + """Factory: construct a job configuration given its API representation + + :type resource: dict + :param resource: + An extract job configuration in the same representation as is + returned from the API. + + :rtype: :class:`google.cloud.bigquery.job.ExtractJobConfig` + :returns: Configuration parsed from ``resource``. + """ + schema = resource.pop('schema', {'fields': ()}) + slr = resource.pop('skipLeadingRows', None) + config = cls() + config._properties = copy.deepcopy(resource) + config.schema = _parse_schema_resource(schema) + config.skip_leading_rows = _int_or_none(slr) class LoadJob(_AsyncJob): - """Asynchronous job for loading data into a table from remote URI. + """Asynchronous job for loading data into a table. - :type job_id: str - :param job_id: - The job's ID, belonging to the project associated with the client. + Can load from Google Cloud Storage URIs or from a file. - :type destination: :class:`google.cloud.bigquery.table.Table` - :param destination: Table into which data is to be loaded. + :type job_id: str + :param job_id: the job's ID :type source_uris: sequence of string :param source_uris: @@ -578,56 +692,34 @@ class LoadJob(_AsyncJob): https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.load.sourceUris for supported URI formats. + :type destination: :class:`google.cloud.bigquery.table.TableReference` + :param destination: reference to table into which data is to be loaded. + :type client: :class:`google.cloud.bigquery.client.Client` :param client: A client which holds credentials and project configuration for the dataset (which requires a project). - - :type schema: list of :class:`google.cloud.bigquery.table.SchemaField` - :param schema: The job's schema """ - _schema = None _JOB_TYPE = 'load' - def __init__(self, name, destination, source_uris, client, schema=()): - super(LoadJob, self).__init__(name, client) - self.destination = destination - self.source_uris = source_uris - self._configuration = _LoadConfiguration() - # Let the @property do validation. This must occur after all other - # attributes have been set. - self.schema = schema + def __init__(self, job_id, source_uris, destination, client, + job_config=None): + super(LoadJob, self).__init__(job_id, client) - @property - def schema(self): - """Table's schema. - - :rtype: list of :class:`SchemaField` - :returns: fields describing the schema - """ - return list(self._schema) + if job_config is None: + job_config = LoadJobConfig() - @schema.setter - def schema(self, value): - """Update table's schema + self.source_uris = source_uris + self.destination = destination + self._configuration = job_config - :type value: list of :class:`SchemaField` - :param value: fields describing the schema + @property + def configuration(self): + """Configuration for this job. - :raises TypeError: If ``value`is not a sequence. - :raises ValueError: If any item in the sequence is not - a ``SchemaField``. + :rtype: :class:`~google.cloud.bigquery.job.LoadJobConfig` """ - if not value: - self._schema = () - else: - if not all(isinstance(field, SchemaField) for field in value): - raise ValueError('Schema items must be fields') - if self.autodetect: - raise ValueError( - 'Schema can not be set if `autodetect` property is True') - - self._schema = tuple(value) + return self._configuration @property def input_file_bytes(self): @@ -673,155 +765,25 @@ def output_rows(self): if statistics is not None: return int(statistics['load']['outputRows']) - allow_jagged_rows = _TypedProperty('allow_jagged_rows', bool) - """See - https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.load.allowJaggedRows - """ - - allow_quoted_newlines = _TypedProperty('allow_quoted_newlines', bool) - """See - https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.load.allowQuotedNewlines - """ - - autodetect = AutoDetectSchema('autodetect', bool) - """See - https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.load.autodetect - """ - - create_disposition = CreateDisposition('create_disposition', - 'createDisposition') - """See - https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.load.createDisposition - """ - - encoding = Encoding('encoding') - """See - https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.load.encoding - """ - - field_delimiter = _TypedProperty('field_delimiter', six.string_types) - """See - https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.load.fieldDelimiter - """ - - ignore_unknown_values = _TypedProperty('ignore_unknown_values', bool) - """See - https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.load.ignoreUnknownValues - """ - - max_bad_records = _TypedProperty('max_bad_records', six.integer_types) - """See - https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.load.maxBadRecords - """ - - null_marker = _TypedProperty('null_marker', six.string_types) - """See - https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.load.nullMarker - """ - - quote_character = _TypedProperty('quote_character', six.string_types) - """See - https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.load.quote - """ - - skip_leading_rows = _TypedProperty('skip_leading_rows', six.integer_types) - """See - https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.load.skipLeadingRows - """ - - source_format = SourceFormat('source_format') - """See - https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.load.sourceFormat - """ - - write_disposition = WriteDisposition('write_disposition', - 'writeDisposition') - """See - https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.load.writeDisposition - """ - - def _populate_config_resource(self, configuration): - """Helper for _build_resource: copy config properties to resource""" - if self.allow_jagged_rows is not None: - configuration['allowJaggedRows'] = self.allow_jagged_rows - if self.allow_quoted_newlines is not None: - configuration['allowQuotedNewlines'] = self.allow_quoted_newlines - if self.autodetect is not None: - configuration['autodetect'] = self.autodetect - if self.create_disposition is not None: - configuration['createDisposition'] = self.create_disposition - if self.encoding is not None: - configuration['encoding'] = self.encoding - if self.field_delimiter is not None: - configuration['fieldDelimiter'] = self.field_delimiter - if self.ignore_unknown_values is not None: - configuration['ignoreUnknownValues'] = self.ignore_unknown_values - if self.max_bad_records is not None: - configuration['maxBadRecords'] = self.max_bad_records - if self.null_marker is not None: - configuration['nullMarker'] = self.null_marker - if self.quote_character is not None: - configuration['quote'] = self.quote_character - if self.skip_leading_rows is not None: - configuration['skipLeadingRows'] = str(self.skip_leading_rows) - if self.source_format is not None: - configuration['sourceFormat'] = self.source_format - if self.write_disposition is not None: - configuration['writeDisposition'] = self.write_disposition - def _build_resource(self): """Generate a resource for :meth:`begin`.""" - resource = { + configuration = self._configuration.to_api_repr() + configuration['sourceUris'] = self.source_uris + configuration['destinationTable'] = self.destination.to_api_repr() + + return { 'jobReference': { 'projectId': self.project, 'jobId': self.job_id, }, 'configuration': { - self._JOB_TYPE: { - 'sourceUris': self.source_uris, - 'destinationTable': { - 'projectId': self.destination.project, - 'datasetId': self.destination.dataset_id, - 'tableId': self.destination.table_id, - }, - }, + self._JOB_TYPE: configuration, }, } - configuration = resource['configuration'][self._JOB_TYPE] - self._populate_config_resource(configuration) - - if len(self.schema) > 0: - configuration['schema'] = { - 'fields': _build_schema_resource(self.schema)} - - return resource - - def _scrub_local_properties(self, cleaned): - """Helper: handle subclass properties in cleaned.""" - schema = cleaned.pop('schema', {'fields': ()}) - self.schema = _parse_schema_resource(schema) def _copy_configuration_properties(self, configuration): """Helper: assign subclass configuration properties in cleaned.""" - self.allow_jagged_rows = _bool_or_none( - configuration.get('allowJaggedRows')) - self.allow_quoted_newlines = _bool_or_none( - configuration.get('allowQuotedNewlines')) - self.autodetect = _bool_or_none( - configuration.get('autodetect')) - self.create_disposition = configuration.get('createDisposition') - self.encoding = configuration.get('encoding') - self.field_delimiter = configuration.get('fieldDelimiter') - self.ignore_unknown_values = _bool_or_none( - configuration.get('ignoreUnknownValues')) - self.max_bad_records = _int_or_none( - configuration.get('maxBadRecords')) - self.null_marker = configuration.get('nullMarker') - self.quote_character = configuration.get('quote') - self.skip_leading_rows = _int_or_none( - configuration.get('skipLeadingRows')) - self.source_format = configuration.get('sourceFormat') - self.write_disposition = configuration.get('writeDisposition') + self._configuration._properties = copy.deepcopy(configuration) @classmethod def from_api_repr(cls, resource, client): @@ -842,15 +804,16 @@ def from_api_repr(cls, resource, client): :rtype: :class:`google.cloud.bigquery.job.LoadJob` :returns: Job parsed from ``resource``. """ - job_id, config = cls._get_resource_config(resource) - dest_config = config['destinationTable'] + job_id, config_resource = cls._get_resource_config(resource) + config = LoadJobConfig.from_api_repr(config_resource) + dest_config = config_resource['destinationTable'] ds_ref = DatasetReference(dest_config['projectId'], dest_config['datasetId'],) - dataset = Dataset(ds_ref) - table_ref = TableReference(dataset, dest_config['tableId']) - destination = Table(table_ref, client=client) - source_urls = config.get('sourceUris', ()) - job = cls(job_id, destination, source_urls, client=client) + destination = TableReference(ds_ref, dest_config['tableId']) + # TODO(jba): sourceUris should not be absent if there are no LoadJobs + # for file uploads. + source_uris = config_resource.get('sourceUris') + job = cls(job_id, source_uris, destination, client, config) job._set_properties(resource) return job diff --git a/packages/google-cloud-bigquery/tests/system.py b/packages/google-cloud-bigquery/tests/system.py index 2fd43f7951c4..ada6d92b5050 100644 --- a/packages/google-cloud-bigquery/tests/system.py +++ b/packages/google-cloud-bigquery/tests/system.py @@ -463,14 +463,13 @@ def test_load_table_from_storage_then_dump_table(self): table = retry_403(Config.CLIENT.create_table)(table_arg) self.to_delete.insert(0, table) + config = bigquery.LoadJobConfig() + config.create_disposition = 'CREATE_NEVER' + config.skip_leading_rows = 1 + config.source_format = 'CSV' + config.write_disposition = 'WRITE_EMPTY' job = Config.CLIENT.load_table_from_storage( - 'bq_load_storage_test_' + local_id, table, GS_URL) - job.create_disposition = 'CREATE_NEVER' - job.skip_leading_rows = 1 - job.source_format = 'CSV' - job.write_disposition = 'WRITE_EMPTY' - - job.begin() + GS_URL, dataset.table(TABLE_NAME), job_config=config) # Allow for 90 seconds of "warm up" before rows visible. See # https://cloud.google.com/bigquery/streaming-data-into-bigquery#dataavailability @@ -523,11 +522,10 @@ def test_load_table_from_storage_w_autodetect_schema(self): dataset = self.temp_dataset(_make_dataset_id('load_gcs_then_dump')) table_ref = dataset.table(table_name) - job = Config.CLIENT.load_table_from_storage( - 'bq_load_storage_test_' + local_id, table_ref, gs_url) - job.autodetect = True - - job.begin() + config = bigquery.LoadJobConfig() + config.autodetect = True + job = Config.CLIENT.load_table_from_storage(gs_url, table_ref, + job_config=config) # Allow for 90 seconds of "warm up" before rows visible. See # https://cloud.google.com/bigquery/streaming-data-into-bigquery#dataavailability @@ -551,7 +549,6 @@ def _load_table_for_extract_table( self, storage_client, rows, bucket_name, blob_name, table): from google.cloud._testing import _NamedTemporaryFile - local_id = unique_resource_id() gs_url = 'gs://{}/{}'.format(bucket_name, blob_name) # In the **very** rare case the bucket name is reserved, this @@ -572,10 +569,11 @@ def _load_table_for_extract_table( dataset = self.temp_dataset(table.dataset_id) table_ref = dataset.table(table.table_id) - job = Config.CLIENT.load_table_from_storage( - 'bq_extract_storage_test_' + local_id, table_ref, gs_url) - job.autodetect = True - job.begin() + config = bigquery.LoadJobConfig() + config.autodetect = True + job = Config.CLIENT.load_table_from_storage(gs_url, table_ref, + job_config=config) + # TODO(jba): do we need this retry now that we have job.result()? # Allow for 90 seconds of "warm up" before rows visible. See # https://cloud.google.com/bigquery/streaming-data-into-bigquery#dataavailability # 8 tries -> 1 + 2 + 4 + 8 + 16 + 32 + 64 = 127 seconds @@ -608,7 +606,7 @@ def test_extract_table(self): destination_uri = 'gs://{}/person_ages_out.csv'.format(bucket_name) job = Config.CLIENT.extract_table(table_ref, destination_uri) - job.result() + job.result(timeout=100) self.to_delete.insert(0, destination) got = destination.download_as_string().decode('utf-8') diff --git a/packages/google-cloud-bigquery/tests/unit/test_client.py b/packages/google-cloud-bigquery/tests/unit/test_client.py index d1a6d1218ae8..3f667039f497 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_client.py +++ b/packages/google-cloud-bigquery/tests/unit/test_client.py @@ -1122,12 +1122,47 @@ def test_load_table_from_storage(self): DATASET = 'dataset_name' DESTINATION = 'destination_table' SOURCE_URI = 'http://example.com/source.csv' + RESOURCE = { + 'jobReference': { + 'projectId': PROJECT, + 'jobId': JOB, + }, + 'configuration': { + 'load': { + 'sourceUris': [SOURCE_URI], + 'destinationTable': { + 'projectId': PROJECT, + 'datasetId': DATASET, + 'tableId': DESTINATION, + }, + }, + }, + } creds = _make_credentials() http = object() client = self._make_one(project=PROJECT, credentials=creds, _http=http) - dataset = client.dataset(DATASET) - destination = dataset.table(DESTINATION) - job = client.load_table_from_storage(JOB, destination, SOURCE_URI) + conn = client._connection = _Connection(RESOURCE) + destination = client.dataset(DATASET).table(DESTINATION) + + job = client.load_table_from_storage(SOURCE_URI, destination, + job_id=JOB) + + # Check that load_table_from_storage actually starts the job. + self.assertEqual(len(conn._requested), 1) + req = conn._requested[0] + self.assertEqual(req['method'], 'POST') + self.assertEqual(req['path'], '/projects/%s/jobs' % PROJECT) + + self.assertIsInstance(job, LoadJob) + self.assertIs(job._client, client) + self.assertEqual(job.job_id, JOB) + self.assertEqual(list(job.source_uris), [SOURCE_URI]) + self.assertIs(job.destination, destination) + + conn = client._connection = _Connection(RESOURCE) + + job = client.load_table_from_storage([SOURCE_URI], destination, + job_id=JOB) self.assertIsInstance(job, LoadJob) self.assertIs(job._client, client) self.assertEqual(job.job_id, JOB) diff --git a/packages/google-cloud-bigquery/tests/unit/test_job.py b/packages/google-cloud-bigquery/tests/unit/test_job.py index d0a654c0c15d..e6b903bfebaf 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_job.py +++ b/packages/google-cloud-bigquery/tests/unit/test_job.py @@ -18,6 +18,7 @@ import unittest from google.cloud.bigquery.job import ExtractJobConfig, CopyJobConfig +from google.cloud.bigquery.job import LoadJobConfig from google.cloud.bigquery.dataset import DatasetReference @@ -84,12 +85,14 @@ def test_missing_reason(self): class _Base(object): from google.cloud.bigquery.dataset import DatasetReference + from google.cloud.bigquery.table import TableReference PROJECT = 'project' SOURCE1 = 'http://example.com/source1.csv' DS_ID = 'datset_id' DS_REF = DatasetReference(PROJECT, DS_ID) TABLE_ID = 'table_id' + TABLE_REF = TableReference(DS_REF, TABLE_ID) JOB_NAME = 'job_name' def _make_one(self, *args, **kw): @@ -231,50 +234,53 @@ def _makeResource(self, started=False, ended=False): return resource def _verifyBooleanConfigProperties(self, job, config): + jconfig = job.configuration if 'allowJaggedRows' in config: - self.assertEqual(job.allow_jagged_rows, + self.assertEqual(jconfig.allow_jagged_rows, config['allowJaggedRows']) else: - self.assertIsNone(job.allow_jagged_rows) + self.assertIsNone(jconfig.allow_jagged_rows) if 'allowQuotedNewlines' in config: - self.assertEqual(job.allow_quoted_newlines, + self.assertEqual(jconfig.allow_quoted_newlines, config['allowQuotedNewlines']) else: - self.assertIsNone(job.allow_quoted_newlines) + self.assertIsNone(jconfig.allow_quoted_newlines) if 'autodetect' in config: self.assertEqual( - job.autodetect, config['autodetect']) + jconfig.autodetect, config['autodetect']) else: - self.assertIsNone(job.autodetect) + self.assertIsNone(jconfig.autodetect) if 'ignoreUnknownValues' in config: - self.assertEqual(job.ignore_unknown_values, + self.assertEqual(jconfig.ignore_unknown_values, config['ignoreUnknownValues']) else: - self.assertIsNone(job.ignore_unknown_values) + self.assertIsNone(jconfig.ignore_unknown_values) def _verifyEnumConfigProperties(self, job, config): + jconfig = job.configuration if 'createDisposition' in config: - self.assertEqual(job.create_disposition, + self.assertEqual(jconfig.create_disposition, config['createDisposition']) else: - self.assertIsNone(job.create_disposition) + self.assertIsNone(jconfig.create_disposition) if 'encoding' in config: - self.assertEqual(job.encoding, + self.assertEqual(jconfig.encoding, config['encoding']) else: - self.assertIsNone(job.encoding) + self.assertIsNone(jconfig.encoding) if 'sourceFormat' in config: - self.assertEqual(job.source_format, + self.assertEqual(jconfig.source_format, config['sourceFormat']) else: - self.assertIsNone(job.source_format) + self.assertIsNone(jconfig.source_format) if 'writeDisposition' in config: - self.assertEqual(job.write_disposition, + self.assertEqual(jconfig.write_disposition, config['writeDisposition']) else: - self.assertIsNone(job.write_disposition) + self.assertIsNone(jconfig.write_disposition) def _verifyResourceProperties(self, job, resource): + jconfig = job.configuration self._verifyReadonlyResourceProperties(job, resource) config = resource.get('configuration', {}).get('load') @@ -290,43 +296,43 @@ def _verifyResourceProperties(self, job, resource): self.assertEqual(job.destination.table_id, table_ref['tableId']) if 'fieldDelimiter' in config: - self.assertEqual(job.field_delimiter, + self.assertEqual(jconfig.field_delimiter, config['fieldDelimiter']) else: - self.assertIsNone(job.field_delimiter) + self.assertIsNone(jconfig.field_delimiter) if 'maxBadRecords' in config: - self.assertEqual(job.max_bad_records, + self.assertEqual(jconfig.max_bad_records, config['maxBadRecords']) else: - self.assertIsNone(job.max_bad_records) + self.assertIsNone(jconfig.max_bad_records) if 'nullMarker' in config: - self.assertEqual(job.null_marker, + self.assertEqual(jconfig.null_marker, config['nullMarker']) else: - self.assertIsNone(job.null_marker) + self.assertIsNone(jconfig.null_marker) if 'quote' in config: - self.assertEqual(job.quote_character, + self.assertEqual(jconfig.quote_character, config['quote']) else: - self.assertIsNone(job.quote_character) + self.assertIsNone(jconfig.quote_character) if 'skipLeadingRows' in config: - self.assertEqual(str(job.skip_leading_rows), + self.assertEqual(str(jconfig.skip_leading_rows), config['skipLeadingRows']) else: - self.assertIsNone(job.skip_leading_rows) + self.assertIsNone(jconfig.skip_leading_rows) def test_ctor(self): client = _Client(self.PROJECT) - table = _Table() - job = self._make_one(self.JOB_NAME, table, [self.SOURCE1], client) - self.assertIs(job.destination, table) + job = self._make_one(self.JOB_NAME, [self.SOURCE1], self.TABLE_REF, + client) + self.assertIs(job.destination, self.TABLE_REF) self.assertEqual(list(job.source_uris), [self.SOURCE1]) self.assertIs(job._client, client) self.assertEqual(job.job_type, self.JOB_TYPE) self.assertEqual( job.path, '/projects/%s/jobs/%s' % (self.PROJECT, self.JOB_NAME)) - self.assertEqual(job.schema, []) + self.assertEqual(job.configuration.schema, []) self._verifyInitialReadonlyProperties(job) @@ -337,30 +343,32 @@ def test_ctor(self): self.assertIsNone(job.output_rows) # set/read from resource['configuration']['load'] - self.assertIsNone(job.allow_jagged_rows) - self.assertIsNone(job.allow_quoted_newlines) - self.assertIsNone(job.autodetect) - self.assertIsNone(job.create_disposition) - self.assertIsNone(job.encoding) - self.assertIsNone(job.field_delimiter) - self.assertIsNone(job.ignore_unknown_values) - self.assertIsNone(job.max_bad_records) - self.assertIsNone(job.null_marker) - self.assertIsNone(job.quote_character) - self.assertIsNone(job.skip_leading_rows) - self.assertIsNone(job.source_format) - self.assertIsNone(job.write_disposition) - - def test_ctor_w_schema(self): + jconfig = job.configuration + self.assertIsNone(jconfig.allow_jagged_rows) + self.assertIsNone(jconfig.allow_quoted_newlines) + self.assertIsNone(jconfig.autodetect) + self.assertIsNone(jconfig.create_disposition) + self.assertIsNone(jconfig.encoding) + self.assertIsNone(jconfig.field_delimiter) + self.assertIsNone(jconfig.ignore_unknown_values) + self.assertIsNone(jconfig.max_bad_records) + self.assertIsNone(jconfig.null_marker) + self.assertIsNone(jconfig.quote_character) + self.assertIsNone(jconfig.skip_leading_rows) + self.assertIsNone(jconfig.source_format) + self.assertIsNone(jconfig.write_disposition) + + def test_ctor_w_config(self): from google.cloud.bigquery.schema import SchemaField client = _Client(self.PROJECT) - table = _Table() full_name = SchemaField('full_name', 'STRING', mode='REQUIRED') age = SchemaField('age', 'INTEGER', mode='REQUIRED') - job = self._make_one(self.JOB_NAME, table, [self.SOURCE1], client, - schema=[full_name, age]) - self.assertEqual(job.schema, [full_name, age]) + config = LoadJobConfig() + config.schema = [full_name, age] + job = self._make_one(self.JOB_NAME, [self.SOURCE1], self.TABLE_REF, + client, config) + self.assertEqual(job.configuration.schema, [full_name, age]) def test_done(self): client = _Client(self.PROJECT) @@ -377,15 +385,15 @@ def test_result(self): self.assertIs(result, job) - def test_result_invokes_begins(self): + def test_result_invokes_begin(self): begun_resource = self._makeResource() done_resource = copy.deepcopy(begun_resource) done_resource['status'] = {'state': 'DONE'} connection = _Connection(begun_resource, done_resource) client = _Client(self.PROJECT, connection=connection) - table = _Table() - job = self._make_one(self.JOB_NAME, table, [self.SOURCE1], client) + job = self._make_one(self.JOB_NAME, [self.SOURCE1], self.TABLE_REF, + client) job.result() self.assertEqual(len(connection._requested), 2) @@ -394,67 +402,52 @@ def test_result_invokes_begins(self): self.assertEqual(reload_request['method'], 'GET') def test_schema_setter_non_list(self): - client = _Client(self.PROJECT) - table = _Table() - job = self._make_one(self.JOB_NAME, table, [self.SOURCE1], client) + config = LoadJobConfig() with self.assertRaises(TypeError): - job.schema = object() + config.schema = object() def test_schema_setter_invalid_field(self): from google.cloud.bigquery.schema import SchemaField - client = _Client(self.PROJECT) - table = _Table() - job = self._make_one(self.JOB_NAME, table, [self.SOURCE1], client) + config = LoadJobConfig() full_name = SchemaField('full_name', 'STRING', mode='REQUIRED') with self.assertRaises(ValueError): - job.schema = [full_name, object()] + config.schema = [full_name, object()] def test_schema_setter(self): from google.cloud.bigquery.schema import SchemaField - client = _Client(self.PROJECT) - table = _Table() - job = self._make_one(self.JOB_NAME, table, [self.SOURCE1], client) + config = LoadJobConfig() full_name = SchemaField('full_name', 'STRING', mode='REQUIRED') age = SchemaField('age', 'INTEGER', mode='REQUIRED') - job.schema = [full_name, age] - self.assertEqual(job.schema, [full_name, age]) + config.schema = [full_name, age] + self.assertEqual(config.schema, [full_name, age]) def test_schema_setter_w_autodetect(self): from google.cloud.bigquery.schema import SchemaField - client = _Client(self.PROJECT) - table = _Table() - full_name = SchemaField('full_name', 'STRING') - job = self._make_one(self.JOB_NAME, table, [self.SOURCE1], client) - job.autodetect = False - job.schema = [full_name] - self.assertEqual(job.schema, [full_name]) - - job = self._make_one(self.JOB_NAME, table, [self.SOURCE1], client) - job.autodetect = True + config = LoadJobConfig() + schema = [SchemaField('full_name', 'STRING')] + config.autodetect = False + config.schema = schema + self.assertEqual(config.schema, schema) + + config.schema = [] + config.autodetect = True with self.assertRaises(ValueError): - job.schema = [full_name] + config.schema = schema def test_autodetect_setter_w_schema(self): from google.cloud.bigquery.schema import SchemaField - client = _Client(self.PROJECT) - table = _Table() - full_name = SchemaField('full_name', 'STRING') - job = self._make_one(self.JOB_NAME, table, [self.SOURCE1], client) - - job.autodetect = True - job.schema = [] - self.assertEqual(job.schema, []) + config = LoadJobConfig() - job.autodetect = False - job.schema = [full_name] - self.assertEqual(job.autodetect, False) + config.autodetect = False + config.schema = [SchemaField('full_name', 'STRING')] + self.assertEqual(config.autodetect, False) with self.assertRaises(ValueError): - job.autodetect = True + config.autodetect = True def test_props_set_by_server(self): import datetime @@ -475,7 +468,7 @@ def test_props_set_by_server(self): client = _Client(self.PROJECT) table = _Table() - job = self._make_one(self.JOB_NAME, table, [self.SOURCE1], client) + job = self._make_one(self.JOB_NAME, [self.SOURCE1], table, client) job._properties['etag'] = 'ETAG' job._properties['id'] = JOB_ID job._properties['selfLink'] = URL @@ -578,8 +571,8 @@ def test_from_api_repr_w_properties(self): def test_begin_w_already_running(self): conn = _Connection() client = _Client(project=self.PROJECT, connection=conn) - table = _Table() - job = self._make_one(self.JOB_NAME, table, [self.SOURCE1], client) + job = self._make_one(self.JOB_NAME, [self.SOURCE1], self.TABLE_REF, + client) job._properties['status'] = {'state': 'RUNNING'} with self.assertRaises(ValueError): @@ -595,8 +588,8 @@ def test_begin_w_bound_client(self): del RESOURCE['user_email'] conn = _Connection(RESOURCE) client = _Client(project=self.PROJECT, connection=conn) - table = _Table() - job = self._make_one(self.JOB_NAME, table, [self.SOURCE1], client) + job = self._make_one(self.JOB_NAME, [self.SOURCE1], self.TABLE_REF, + client) job.begin() @@ -634,9 +627,10 @@ def test_begin_w_autodetect(self): del resource['user_email'] conn = _Connection(resource) client = _Client(project=self.PROJECT, connection=conn) - table = _Table() - job = self._make_one(self.JOB_NAME, table, [self.SOURCE1], client) - job.autodetect = True + config = LoadJobConfig() + config.autodetect = True + job = self._make_one(self.JOB_NAME, [self.SOURCE1], self.TABLE_REF, + client, config) job.begin() sent = { @@ -698,24 +692,24 @@ def test_begin_w_alternate_client(self): client1 = _Client(project=self.PROJECT, connection=conn1) conn2 = _Connection(RESOURCE) client2 = _Client(project=self.PROJECT, connection=conn2) - table = _Table() full_name = SchemaField('full_name', 'STRING', mode='REQUIRED') age = SchemaField('age', 'INTEGER', mode='REQUIRED') - job = self._make_one(self.JOB_NAME, table, [self.SOURCE1], client1, - schema=[full_name, age]) - - job.allow_jagged_rows = True - job.allow_quoted_newlines = True - job.create_disposition = 'CREATE_NEVER' - job.encoding = 'ISO-8559-1' - job.field_delimiter = '|' - job.ignore_unknown_values = True - job.max_bad_records = 100 - job.null_marker = r'\N' - job.quote_character = "'" - job.skip_leading_rows = 1 - job.source_format = 'CSV' - job.write_disposition = 'WRITE_TRUNCATE' + config = LoadJobConfig() + config.schema = [full_name, age] + job = self._make_one(self.JOB_NAME, [self.SOURCE1], self.TABLE_REF, + client1, config) + config.allow_jagged_rows = True + config.allow_quoted_newlines = True + config.create_disposition = 'CREATE_NEVER' + config.encoding = 'ISO-8559-1' + config.field_delimiter = '|' + config.ignore_unknown_values = True + config.max_bad_records = 100 + config.null_marker = r'\N' + config.quote_character = "'" + config.skip_leading_rows = 1 + config.source_format = 'CSV' + config.write_disposition = 'WRITE_TRUNCATE' job.begin(client=client2) @@ -733,6 +727,7 @@ def test_begin_w_alternate_client(self): 'load': LOAD_CONFIGURATION, }, } + self.maxDiff = None self.assertEqual(req['data'], SENT) self._verifyResourceProperties(job, RESOURCE) @@ -741,7 +736,7 @@ def test_exists_miss_w_bound_client(self): conn = _Connection() client = _Client(project=self.PROJECT, connection=conn) table = _Table() - job = self._make_one(self.JOB_NAME, table, [self.SOURCE1], client) + job = self._make_one(self.JOB_NAME, [self.SOURCE1], table, client) self.assertFalse(job.exists()) @@ -758,7 +753,7 @@ def test_exists_hit_w_alternate_client(self): conn2 = _Connection({}) client2 = _Client(project=self.PROJECT, connection=conn2) table = _Table() - job = self._make_one(self.JOB_NAME, table, [self.SOURCE1], client1) + job = self._make_one(self.JOB_NAME, [self.SOURCE1], table, client1) self.assertTrue(job.exists(client=client2)) @@ -775,7 +770,7 @@ def test_reload_w_bound_client(self): conn = _Connection(RESOURCE) client = _Client(project=self.PROJECT, connection=conn) table = _Table() - job = self._make_one(self.JOB_NAME, table, [self.SOURCE1], client) + job = self._make_one(self.JOB_NAME, [self.SOURCE1], table, client) job.reload() @@ -793,7 +788,7 @@ def test_reload_w_alternate_client(self): conn2 = _Connection(RESOURCE) client2 = _Client(project=self.PROJECT, connection=conn2) table = _Table() - job = self._make_one(self.JOB_NAME, table, [self.SOURCE1], client1) + job = self._make_one(self.JOB_NAME, [self.SOURCE1], table, client1) job.reload(client=client2) @@ -811,7 +806,7 @@ def test_cancel_w_bound_client(self): conn = _Connection(RESPONSE) client = _Client(project=self.PROJECT, connection=conn) table = _Table() - job = self._make_one(self.JOB_NAME, table, [self.SOURCE1], client) + job = self._make_one(self.JOB_NAME, [self.SOURCE1], table, client) job.cancel() @@ -830,7 +825,7 @@ def test_cancel_w_alternate_client(self): conn2 = _Connection(RESPONSE) client2 = _Client(project=self.PROJECT, connection=conn2) table = _Table() - job = self._make_one(self.JOB_NAME, table, [self.SOURCE1], client1) + job = self._make_one(self.JOB_NAME, [self.SOURCE1], table, client1) job.cancel(client=client2) From 2d1411c0b83359a50a6cb71efb4a06d0f5e9b6c5 Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Fri, 6 Oct 2017 12:13:22 -0700 Subject: [PATCH 0297/2016] Make QueryResults read-only. (#4094) Removes any QueryJob-related properties from QueryResults. Now the QueryResults class more closely reflects the backend resource. --- .../google/cloud/bigquery/query.py | 230 +----- .../tests/unit/test_client.py | 2 +- .../tests/unit/test_job.py | 15 +- .../tests/unit/test_query.py | 718 +++++------------- 4 files changed, 232 insertions(+), 733 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/query.py b/packages/google-cloud-bigquery/google/cloud/bigquery/query.py index 57199556ed84..888ce5853050 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/query.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/query.py @@ -12,97 +12,37 @@ # See the License for the specific language governing permissions and # limitations under the License. -"""Define API Queries.""" +"""BigQuery query processing.""" -import six +import copy from google.api.core import page_iterator -from google.cloud.bigquery._helpers import _TypedProperty from google.cloud.bigquery._helpers import _rows_from_json -from google.cloud.bigquery.dataset import DatasetReference -from google.cloud.bigquery.job import QueryJob from google.cloud.bigquery.table import _parse_schema_resource -from google.cloud.bigquery._helpers import QueryParametersProperty -from google.cloud.bigquery._helpers import UDFResourcesProperty from google.cloud.bigquery._helpers import _item_to_row from google.cloud.bigquery._helpers import _rows_page_start -class _SyncQueryConfiguration(object): - """User-settable configuration options for synchronous query jobs. - - Values which are ``None`` -> server defaults. - """ - _default_dataset = None - _dry_run = None - _max_results = None - _timeout_ms = None - _preserve_nulls = None - _use_query_cache = None - _use_legacy_sql = None - - class QueryResults(object): - """Synchronous job: query tables. + """Results of a query. - :type query: str - :param query: SQL query string + See: + https://g.co/cloud/bigquery/docs/reference/rest/v2/jobs/getQueryResults :type client: :class:`google.cloud.bigquery.client.Client` :param client: A client which holds credentials and project configuration for the dataset (which requires a project). - - :type udf_resources: tuple - :param udf_resources: An iterable of - :class:`google.cloud.bigquery.job.UDFResource` - (empty by default) - - :type query_parameters: tuple - :param query_parameters: - An iterable of - :class:`google.cloud.bigquery._helpers.AbstractQueryParameter` - (empty by default) """ - _UDF_KEY = 'userDefinedFunctionResources' - _QUERY_PARAMETERS_KEY = 'queryParameters' - - def __init__(self, query, client, udf_resources=(), query_parameters=()): + def __init__(self, client, properties): self._client = client self._properties = {} - self.query = query - self._configuration = _SyncQueryConfiguration() - self.udf_resources = udf_resources - self.query_parameters = query_parameters self._job = None + self._set_properties(properties) @classmethod def from_api_repr(cls, api_response, client): - instance = cls(None, client) - instance._set_properties(api_response) - return instance - - @classmethod - def from_query_job(cls, job): - """Factory: construct from an existing job. - - :type job: :class:`~google.cloud.bigquery.job.QueryJob` - :param job: existing job - - :rtype: :class:`QueryResults` - :returns: the instance, bound to the job - """ - instance = cls(job.query, job._client, job.udf_resources) - instance._job = job - job_ref = instance._properties.setdefault('jobReference', {}) - job_ref['jobId'] = job.job_id - if job.default_dataset is not None: - instance.default_dataset = job.default_dataset - if job.use_query_cache is not None: - instance.use_query_cache = job.use_query_cache - if job.use_legacy_sql is not None: - instance.use_legacy_sql = job.use_legacy_sql - return instance + return cls(client, api_response) @property def project(self): @@ -111,7 +51,7 @@ def project(self): :rtype: str :returns: the project (derived from the client). """ - return self._client.project + return self._properties.get('jobReference', {}).get('projectId') def _require_client(self, client): """Check client or verify over-ride. @@ -168,33 +108,17 @@ def errors(self): return self._properties.get('errors') @property - def name(self): - """Job name, generated by the back-end. + def job_id(self): + """Job ID of the query job these results are from. See https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs/query#jobReference - :rtype: list of mapping, or ``NoneType`` - :returns: Mappings describing errors generated on the server (None - until set by the server). + :rtype: string + :returns: Job ID of the query job. """ return self._properties.get('jobReference', {}).get('jobId') - @property - def job(self): - """Job instance used to run the query. - - :rtype: :class:`google.cloud.bigquery.job.QueryJob`, or ``NoneType`` - :returns: Job instance used to run the query (None until - ``jobReference`` property is set by the server). - """ - if self._job is None: - job_ref = self._properties.get('jobReference') - if job_ref is not None: - self._job = QueryJob(job_ref['jobId'], self.query, - self._client) - return self._job - @property def page_token(self): """Token for fetching next bach of results. @@ -273,119 +197,35 @@ def schema(self): """ return _parse_schema_resource(self._properties.get('schema', {})) - default_dataset = _TypedProperty('default_dataset', DatasetReference) - """See - https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs/query#defaultDataset - """ - - dry_run = _TypedProperty('dry_run', bool) - """See - https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs/query#dryRun - """ - - max_results = _TypedProperty('max_results', six.integer_types) - """See - https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs/query#maxResults - """ - - preserve_nulls = _TypedProperty('preserve_nulls', bool) - """See - https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs/query#preserveNulls - """ - - query_parameters = QueryParametersProperty() - - timeout_ms = _TypedProperty('timeout_ms', six.integer_types) - """See - https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs/query#timeoutMs - """ - - udf_resources = UDFResourcesProperty() - - use_query_cache = _TypedProperty('use_query_cache', bool) - """See - https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs/query#useQueryCache - """ - - use_legacy_sql = _TypedProperty('use_legacy_sql', bool) - """See - https://cloud.google.com/bigquery/docs/\ - reference/v2/jobs/query#useLegacySql - """ - def _set_properties(self, api_response): """Update properties from resource in body of ``api_response`` :type api_response: dict :param api_response: response returned from an API call """ - self._properties.clear() - self._properties.update(api_response) - - def _build_resource(self): - """Generate a resource for :meth:`begin`.""" - resource = {'query': self.query} - - if self.default_dataset is not None: - resource['defaultDataset'] = { - 'projectId': self.project, - 'datasetId': self.default_dataset.dataset_id, - } - - if self.max_results is not None: - resource['maxResults'] = self.max_results - - if self.preserve_nulls is not None: - resource['preserveNulls'] = self.preserve_nulls - - if self.timeout_ms is not None: - resource['timeoutMs'] = self.timeout_ms - - if self.use_query_cache is not None: - resource['useQueryCache'] = self.use_query_cache - - if self.use_legacy_sql is not None: - resource['useLegacySql'] = self.use_legacy_sql - - if self.dry_run is not None: - resource['dryRun'] = self.dry_run + job_id_present = ( + 'jobReference' in api_response + and 'jobId' in api_response['jobReference'] + and 'projectId' in api_response['jobReference']) + if not job_id_present: + raise ValueError('QueryResult requires a job reference') - if len(self._udf_resources) > 0: - resource[self._UDF_KEY] = [ - {udf_resource.udf_type: udf_resource.value} - for udf_resource in self._udf_resources - ] - if len(self._query_parameters) > 0: - resource[self._QUERY_PARAMETERS_KEY] = [ - query_parameter.to_api_repr() - for query_parameter in self._query_parameters - ] - if self._query_parameters[0].name is None: - resource['parameterMode'] = 'POSITIONAL' - else: - resource['parameterMode'] = 'NAMED' - - return resource - - def run(self, client=None): - """API call: run the query via a POST request + self._properties.clear() + self._properties.update(copy.deepcopy(api_response)) - See - https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs/query + def job(self): + """Job instance used to run the query. - :type client: :class:`~google.cloud.bigquery.client.Client` or - ``NoneType`` - :param client: the client to use. If not passed, falls back to the - ``client`` stored on the current dataset. + :rtype: :class:`google.cloud.bigquery.job.QueryJob`, or ``NoneType`` + :returns: Job instance used to run the query (None until + ``jobReference`` property is set by the server). """ - if self.job is not None: - raise ValueError("Query job is already running.") + if self._job is None: + job_ref = self._properties['jobReference'] + self._job = self._client.get_job( + job_ref['jobId'], project=job_ref['projectId']) - client = self._require_client(client) - path = '/projects/%s/queries' % (self.project,) - api_response = client._connection.api_request( - method='POST', path=path, data=self._build_resource()) - self._set_properties(api_response) + return self._job def fetch_data(self, max_results=None, page_token=None, start_index=None, timeout_ms=None, client=None): @@ -428,9 +268,6 @@ def fetch_data(self, max_results=None, page_token=None, start_index=None, the current page: ``iterator.page.num_items``). :raises: ValueError if the query has not yet been executed. """ - if self.name is None: - raise ValueError("Query not yet executed: call 'run()'") - client = self._require_client(client) params = {} @@ -443,7 +280,7 @@ def fetch_data(self, max_results=None, page_token=None, start_index=None, if max_results is not None: params['maxResults'] = max_results - path = '/projects/%s/queries/%s' % (self.project, self.name) + path = '/projects/%s/queries/%s' % (self.project, self.job_id) iterator = page_iterator.HTTPIterator( client=client, api_request=client._connection.api_request, @@ -455,7 +292,8 @@ def fetch_data(self, max_results=None, page_token=None, start_index=None, next_token='pageToken', extra_params=params) iterator.query_result = self - iterator.job = self.job + iterator.project = self.project + iterator.job_id = self.job_id return iterator diff --git a/packages/google-cloud-bigquery/tests/unit/test_client.py b/packages/google-cloud-bigquery/tests/unit/test_client.py index 3f667039f497..89e2ebef1b70 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_client.py +++ b/packages/google-cloud-bigquery/tests/unit/test_client.py @@ -1481,7 +1481,7 @@ def test_query_rows_w_job_id(self): RESOURCE, RESULTS_RESOURCE, RESULTS_RESOURCE) rows_iter = client.query_rows(QUERY, job_id=JOB) - rows = [row for row in rows_iter] + rows = list(rows_iter) self.assertEqual(rows, []) self.assertIs(rows_iter.client, client) diff --git a/packages/google-cloud-bigquery/tests/unit/test_job.py b/packages/google-cloud-bigquery/tests/unit/test_job.py index e6b903bfebaf..ebe9ed49ddda 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_job.py +++ b/packages/google-cloud-bigquery/tests/unit/test_job.py @@ -2054,7 +2054,13 @@ def test_query_results_w_cached_value(self): client = _Client(self.PROJECT) job = self._make_one(self.JOB_NAME, self.QUERY, client) - query_results = QueryResults(None, client) + resource = { + 'jobReference': { + 'projectId': self.PROJECT, + 'jobId': self.JOB_NAME, + }, + } + query_results = QueryResults(client, resource) job._query_results = query_results results = job.query_results() @@ -2080,14 +2086,15 @@ def test_result(self): def test_result_invokes_begins(self): begun_resource = self._makeResource() - incomplete_resource = {'jobComplete': False} - query_resource = { - 'jobComplete': True, + incomplete_resource = { + 'jobComplete': False, 'jobReference': { 'projectId': self.PROJECT, 'jobId': self.JOB_NAME, }, } + query_resource = copy.deepcopy(incomplete_resource) + query_resource['jobComplete'] = True done_resource = copy.deepcopy(begun_resource) done_resource['status'] = {'state': 'DONE'} connection = _Connection( diff --git a/packages/google-cloud-bigquery/tests/unit/test_query.py b/packages/google-cloud-bigquery/tests/unit/test_query.py index 9340689315a7..d2eae2ad77fb 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_query.py +++ b/packages/google-cloud-bigquery/tests/unit/test_query.py @@ -14,13 +14,20 @@ import unittest +import mock + +from google.cloud.bigquery import Client + + +def _make_credentials(): + import google.auth.credentials + + return mock.Mock(spec=google.auth.credentials.Credentials) + class TestQueryResults(unittest.TestCase): PROJECT = 'project' - JOB_NAME = 'job_name' - JOB_NAME = 'test-synchronous-query' - JOB_TYPE = 'query' - QUERY = 'select count(*) from persons' + JOB_ID = 'test-synchronous-query' TOKEN = 'TOKEN' @staticmethod @@ -32,49 +39,14 @@ def _get_target_class(): def _make_one(self, *args, **kw): return self._get_target_class()(*args, **kw) - def _makeResource(self, complete=False): - resource = { + def _makeResource(self): + return { 'jobReference': { 'projectId': self.PROJECT, - 'jobId': self.JOB_NAME, - }, - 'jobComplete': complete, - 'errors': [], - 'schema': { - 'fields': [ - {'name': 'full_name', 'type': 'STRING', 'mode': 'REQURED'}, - {'name': 'age', 'type': 'INTEGER', 'mode': 'REQURED'}, - ], + 'jobId': self.JOB_ID, }, } - if complete: - resource['totalRows'] = '1000' - resource['rows'] = [ - {'f': [ - {'v': 'Phred Phlyntstone'}, - {'v': 32}, - ]}, - {'f': [ - {'v': 'Bharney Rhubble'}, - {'v': 33}, - ]}, - {'f': [ - {'v': 'Wylma Phlyntstone'}, - {'v': 29}, - ]}, - {'f': [ - {'v': 'Bhettye Rhubble'}, - {'v': 27}, - ]}, - ] - resource['pageToken'] = self.TOKEN - resource['totalBytesProcessed'] = 100000 - resource['numDmlAffectedRows'] = 123 - resource['cacheHit'] = False - - return resource - def _verifySchema(self, query, resource): from google.cloud.bigquery.schema import SchemaField @@ -92,580 +64,230 @@ def _verifySchema(self, query, resource): else: self.assertEqual(query.schema, ()) - def _verifyRows(self, query, resource): - expected = resource.get('rows') - if expected is None: - self.assertEqual(query.rows, []) - else: - found = query.rows - self.assertEqual(len(found), len(expected)) - for f_row, e_row in zip(found, expected): - self.assertEqual(f_row, - tuple([cell['v'] for cell in e_row['f']])) - - def _verify_udf_resources(self, query, resource): - udf_resources = resource.get('userDefinedFunctionResources', ()) - self.assertEqual(len(query.udf_resources), len(udf_resources)) - for found, expected in zip(query.udf_resources, udf_resources): - if 'resourceUri' in expected: - self.assertEqual(found.udf_type, 'resourceUri') - self.assertEqual(found.value, expected['resourceUri']) - else: - self.assertEqual(found.udf_type, 'inlineCode') - self.assertEqual(found.value, expected['inlineCode']) - - def _verifyQueryParameters(self, query, resource): - query_parameters = resource.get('queryParameters', ()) - self.assertEqual(len(query.query_parameters), len(query_parameters)) - for found, expected in zip(query.query_parameters, query_parameters): - self.assertEqual(found.to_api_repr(), expected) - - def _verifyResourceProperties(self, query, resource): - self.assertEqual(query.cache_hit, resource.get('cacheHit')) - self.assertEqual(query.complete, resource.get('jobComplete')) - self.assertEqual(query.errors, resource.get('errors')) - self.assertEqual(query.page_token, resource.get('pageToken')) - - if 'totalRows' in resource: - self.assertEqual(query.total_rows, int(resource['totalRows'])) - else: - self.assertIsNone(query.total_rows) - - if 'totalBytesProcessed' in resource: - self.assertEqual(query.total_bytes_processed, - int(resource['totalBytesProcessed'])) - else: - self.assertIsNone(query.total_bytes_processed) - - if 'jobReference' in resource: - self.assertEqual(query.name, resource['jobReference']['jobId']) - else: - self.assertIsNone(query.name) - - if 'numDmlAffectedRows' in resource: - self.assertEqual(query.num_dml_affected_rows, - int(resource['numDmlAffectedRows'])) - else: - self.assertIsNone(query.num_dml_affected_rows) - - self._verify_udf_resources(query, resource) - self._verifyQueryParameters(query, resource) - self._verifySchema(query, resource) - self._verifyRows(query, resource) - def test_ctor_defaults(self): client = _Client(self.PROJECT) - query = self._make_one(self.QUERY, client) - self.assertEqual(query.query, self.QUERY) + query = self._make_one(client, self._makeResource()) self.assertIs(query._client, client) - self.assertIsNone(query.cache_hit) self.assertIsNone(query.complete) self.assertIsNone(query.errors) - self.assertIsNone(query.name) self.assertIsNone(query.page_token) - self.assertEqual(query.query_parameters, []) + self.assertEqual(query.project, self.PROJECT) self.assertEqual(query.rows, []) self.assertEqual(query.schema, ()) self.assertIsNone(query.total_rows) self.assertIsNone(query.total_bytes_processed) - self.assertEqual(query.udf_resources, []) - - self.assertIsNone(query.default_dataset) - self.assertIsNone(query.max_results) - self.assertIsNone(query.preserve_nulls) - self.assertIsNone(query.use_query_cache) - self.assertIsNone(query.use_legacy_sql) - - def test_ctor_w_udf_resources(self): - from google.cloud.bigquery._helpers import UDFResource - - RESOURCE_URI = 'gs://some-bucket/js/lib.js' - udf_resources = [UDFResource("resourceUri", RESOURCE_URI)] - client = _Client(self.PROJECT) - query = self._make_one(self.QUERY, client, udf_resources=udf_resources) - self.assertEqual(query.udf_resources, udf_resources) - - def test_ctor_w_query_parameters(self): - from google.cloud.bigquery._helpers import ScalarQueryParameter - - query_parameters = [ScalarQueryParameter("foo", 'INT64', 123)] - client = _Client(self.PROJECT) - query = self._make_one(self.QUERY, client, - query_parameters=query_parameters) - self.assertEqual(query.query_parameters, query_parameters) - - def test_from_query_job(self): - from google.cloud.bigquery.dataset import DatasetReference - from google.cloud.bigquery.job import QueryJob - from google.cloud.bigquery._helpers import UDFResource - - DS_ID = 'DATASET' - RESOURCE_URI = 'gs://some-bucket/js/lib.js' - client = _Client(self.PROJECT) - job = QueryJob( - self.JOB_NAME, self.QUERY, client, - udf_resources=[UDFResource("resourceUri", RESOURCE_URI)]) - ds_ref = DatasetReference(self.PROJECT, DS_ID) - job.default_dataset = ds_ref - job.use_query_cache = True - job.use_legacy_sql = True - klass = self._get_target_class() - - query = klass.from_query_job(job) - - self.assertEqual(query.name, self.JOB_NAME) - self.assertEqual(query.query, self.QUERY) - self.assertIs(query._client, client) - self.assertIs(query._job, job) - self.assertEqual(query.udf_resources, job.udf_resources) - self.assertIs(query.default_dataset, ds_ref) - self.assertTrue(query.use_query_cache) - self.assertTrue(query.use_legacy_sql) - - def test_from_query_job_wo_default_dataset(self): - from google.cloud.bigquery.job import QueryJob - from google.cloud.bigquery._helpers import UDFResource - - RESOURCE_URI = 'gs://some-bucket/js/lib.js' - client = _Client(self.PROJECT) - job = QueryJob( - self.JOB_NAME, self.QUERY, client, - udf_resources=[UDFResource("resourceUri", RESOURCE_URI)]) - klass = self._get_target_class() - - query = klass.from_query_job(job) - - self.assertEqual(query.query, self.QUERY) - self.assertIs(query._client, client) - self.assertIs(query._job, job) - self.assertEqual(query.udf_resources, job.udf_resources) - self.assertIsNone(query.default_dataset) - self.assertIsNone(query.use_query_cache) - self.assertIsNone(query.use_legacy_sql) - - def test_job_wo_jobid(self): - client = _Client(self.PROJECT) - query = self._make_one(self.QUERY, client) - self.assertIsNone(query.job) def test_job_w_jobid(self): from google.cloud.bigquery.job import QueryJob SERVER_GENERATED = 'SERVER_GENERATED' - client = _Client(self.PROJECT) - query = self._make_one(self.QUERY, client) - query._properties['jobReference'] = { - 'projectId': self.PROJECT, - 'jobId': SERVER_GENERATED, + job_resource = { + 'jobReference': { + 'projectId': self.PROJECT, + 'jobId': SERVER_GENERATED, + }, + 'configuration': {'query': {'query': 'SELECT 1'}}, + } + query_resource = { + 'jobReference': { + 'projectId': self.PROJECT, + 'jobId': SERVER_GENERATED, + }, } - job = query.job + conn = _Connection(job_resource) + client = _Client(self.PROJECT, conn) + query = self._make_one(client, query_resource) + job = query.job() self.assertIsInstance(job, QueryJob) - self.assertEqual(job.query, self.QUERY) self.assertIs(job._client, client) self.assertEqual(job.job_id, SERVER_GENERATED) - fetched_later = query.job + fetched_later = query.job() self.assertIs(fetched_later, job) + self.assertEqual(len(conn._requested), 1) + req = conn._requested[0] + self.assertEqual(req['method'], 'GET') + self.assertEqual( + req['path'], + '/projects/{}/jobs/{}'.format(self.PROJECT, SERVER_GENERATED)) def test_cache_hit_missing(self): client = _Client(self.PROJECT) - query = self._make_one(self.QUERY, client) + query = self._make_one(client, self._makeResource()) self.assertIsNone(query.cache_hit) def test_cache_hit_present(self): client = _Client(self.PROJECT) - query = self._make_one(self.QUERY, client) - resource = {'cacheHit': True} - query._set_properties(resource) + resource = self._makeResource() + resource['cacheHit'] = True + query = self._make_one(client, resource) self.assertTrue(query.cache_hit) def test_complete_missing(self): client = _Client(self.PROJECT) - query = self._make_one(self.QUERY, client) + query = self._make_one(client, self._makeResource()) self.assertIsNone(query.complete) def test_complete_present(self): client = _Client(self.PROJECT) - query = self._make_one(self.QUERY, client) - resource = {'jobComplete': True} - query._set_properties(resource) + resource = self._makeResource() + resource['jobComplete'] = True + query = self._make_one(client, resource) self.assertTrue(query.complete) def test_errors_missing(self): client = _Client(self.PROJECT) - query = self._make_one(self.QUERY, client) + query = self._make_one(client, self._makeResource()) self.assertIsNone(query.errors) def test_errors_present(self): ERRORS = [ {'reason': 'testing'}, ] + resource = self._makeResource() + resource['errors'] = ERRORS client = _Client(self.PROJECT) - query = self._make_one(self.QUERY, client) - resource = {'errors': ERRORS} - query._set_properties(resource) + query = self._make_one(client, resource) self.assertEqual(query.errors, ERRORS) - def test_name_missing(self): + def test_job_id_missing(self): client = _Client(self.PROJECT) - query = self._make_one(self.QUERY, client) - self.assertIsNone(query.name) + with self.assertRaises(ValueError): + self._make_one(client, {}) - def test_name_broken_job_reference(self): + def test_job_id_broken_job_reference(self): client = _Client(self.PROJECT) - query = self._make_one(self.QUERY, client) resource = {'jobReference': {'bogus': 'BOGUS'}} - query._set_properties(resource) - self.assertIsNone(query.name) + with self.assertRaises(ValueError): + self._make_one(client, resource) - def test_name_present(self): - JOB_ID = 'JOB_ID' + def test_job_id_present(self): client = _Client(self.PROJECT) - query = self._make_one(self.QUERY, client) - resource = {'jobReference': {'jobId': JOB_ID}} - query._set_properties(resource) - self.assertEqual(query.name, JOB_ID) + resource = self._makeResource() + resource['jobReference']['jobId'] = 'custom-job' + query = self._make_one(client, resource) + self.assertEqual(query.job_id, 'custom-job') def test_page_token_missing(self): client = _Client(self.PROJECT) - query = self._make_one(self.QUERY, client) + query = self._make_one(client, self._makeResource()) self.assertIsNone(query.page_token) def test_page_token_present(self): - TOKEN = 'TOKEN' client = _Client(self.PROJECT) - query = self._make_one(self.QUERY, client) - resource = {'pageToken': TOKEN} - query._set_properties(resource) - self.assertEqual(query.page_token, TOKEN) - - def test_total_rows_missing(self): - client = _Client(self.PROJECT) - query = self._make_one(self.QUERY, client) - self.assertIsNone(query.total_rows) + resource = self._makeResource() + resource['pageToken'] = 'TOKEN' + query = self._make_one(client, resource) + self.assertEqual(query.page_token, 'TOKEN') def test_total_rows_present_integer(self): - TOTAL_ROWS = 42 client = _Client(self.PROJECT) - query = self._make_one(self.QUERY, client) - resource = {'totalRows': TOTAL_ROWS} - query._set_properties(resource) - self.assertEqual(query.total_rows, TOTAL_ROWS) + resource = self._makeResource() + resource['totalRows'] = 42 + query = self._make_one(client, resource) + self.assertEqual(query.total_rows, 42) def test_total_rows_present_string(self): - TOTAL_ROWS = 42 client = _Client(self.PROJECT) - query = self._make_one(self.QUERY, client) - resource = {'totalRows': str(TOTAL_ROWS)} - query._set_properties(resource) - self.assertEqual(query.total_rows, TOTAL_ROWS) + resource = self._makeResource() + resource['totalRows'] = '42' + query = self._make_one(client, resource) + self.assertEqual(query.total_rows, 42) def test_total_bytes_processed_missing(self): client = _Client(self.PROJECT) - query = self._make_one(self.QUERY, client) + query = self._make_one(client, self._makeResource()) self.assertIsNone(query.total_bytes_processed) def test_total_bytes_processed_present_integer(self): - TOTAL_BYTES_PROCESSED = 123456 client = _Client(self.PROJECT) - query = self._make_one(self.QUERY, client) - resource = {'totalBytesProcessed': TOTAL_BYTES_PROCESSED} - query._set_properties(resource) - self.assertEqual(query.total_bytes_processed, TOTAL_BYTES_PROCESSED) + resource = self._makeResource() + resource['totalBytesProcessed'] = 123456 + query = self._make_one(client, resource) + self.assertEqual(query.total_bytes_processed, 123456) def test_total_bytes_processed_present_string(self): - TOTAL_BYTES_PROCESSED = 123456 client = _Client(self.PROJECT) - query = self._make_one(self.QUERY, client) - resource = {'totalBytesProcessed': str(TOTAL_BYTES_PROCESSED)} - query._set_properties(resource) - self.assertEqual(query.total_bytes_processed, TOTAL_BYTES_PROCESSED) + resource = self._makeResource() + resource['totalBytesProcessed'] = '123456' + query = self._make_one(client, resource) + self.assertEqual(query.total_bytes_processed, 123456) def test_num_dml_affected_rows_missing(self): client = _Client(self.PROJECT) - query = self._make_one(self.QUERY, client) + query = self._make_one(client, self._makeResource()) self.assertIsNone(query.num_dml_affected_rows) def test_num_dml_affected_rows_present_integer(self): - DML_AFFECTED_ROWS = 123456 client = _Client(self.PROJECT) - query = self._make_one(self.QUERY, client) - resource = {'numDmlAffectedRows': DML_AFFECTED_ROWS} - query._set_properties(resource) - self.assertEqual(query.num_dml_affected_rows, DML_AFFECTED_ROWS) + resource = self._makeResource() + resource['numDmlAffectedRows'] = 123456 + query = self._make_one(client, resource) + self.assertEqual(query.num_dml_affected_rows, 123456) def test_num_dml_affected_rows_present_string(self): - DML_AFFECTED_ROWS = 123456 client = _Client(self.PROJECT) - query = self._make_one(self.QUERY, client) - resource = {'numDmlAffectedRows': str(DML_AFFECTED_ROWS)} - query._set_properties(resource) - self.assertEqual(query.num_dml_affected_rows, DML_AFFECTED_ROWS) + resource = self._makeResource() + resource['numDmlAffectedRows'] = '123456' + query = self._make_one(client, resource) + self.assertEqual(query.num_dml_affected_rows, 123456) def test_schema(self): client = _Client(self.PROJECT) - query = self._make_one(self.QUERY, client) - self._verifyResourceProperties(query, {}) - resource = { - 'schema': { - 'fields': [ - {'name': 'full_name', 'type': 'STRING', 'mode': 'REQURED'}, - {'name': 'age', 'type': 'INTEGER', 'mode': 'REQURED'}, - ], - }, + query = self._make_one(client, self._makeResource()) + self._verifySchema(query, self._makeResource()) + resource = self._makeResource() + resource['schema'] = { + 'fields': [ + {'name': 'full_name', 'type': 'STRING', 'mode': 'REQURED'}, + {'name': 'age', 'type': 'INTEGER', 'mode': 'REQURED'}, + ], } query._set_properties(resource) - self._verifyResourceProperties(query, resource) - - def test_run_w_already_has_job(self): - conn = _Connection() - client = _Client(project=self.PROJECT, connection=conn) - query = self._make_one(self.QUERY, client) - query._job = object() # simulate already running - with self.assertRaises(ValueError): - query.run() - - def test_run_w_already_has_job_in_properties(self): - JOB_ID = 'JOB_ID' - conn = _Connection() - client = _Client(project=self.PROJECT, connection=conn) - query = self._make_one(self.QUERY, client) - query._properties['jobReference'] = {'jobId': JOB_ID} - with self.assertRaises(ValueError): - query.run() - - def test_run_w_bound_client(self): - PATH = 'projects/%s/queries' % self.PROJECT - RESOURCE = self._makeResource(complete=False) - conn = _Connection(RESOURCE) - client = _Client(project=self.PROJECT, connection=conn) - query = self._make_one(self.QUERY, client) - self.assertEqual(query.udf_resources, []) - query.run() - - self.assertEqual(len(conn._requested), 1) - req = conn._requested[0] - self.assertEqual(req['method'], 'POST') - self.assertEqual(req['path'], '/%s' % PATH) - SENT = {'query': self.QUERY} - self.assertEqual(req['data'], SENT) - self._verifyResourceProperties(query, RESOURCE) - - def test_run_w_alternate_client(self): - PATH = 'projects/%s/queries' % self.PROJECT - RESOURCE = self._makeResource(complete=True) - DATASET = 'test_dataset' - conn1 = _Connection() - client1 = _Client(project=self.PROJECT, connection=conn1) - conn2 = _Connection(RESOURCE) - client2 = _Client(project=self.PROJECT, connection=conn2) - query = self._make_one(self.QUERY, client1) - - query.default_dataset = client2.dataset(DATASET) - query.max_results = 100 - query.preserve_nulls = True - query.timeout_ms = 20000 - query.use_query_cache = False - query.use_legacy_sql = True - query.dry_run = True - - query.run(client=client2) - - self.assertEqual(len(conn1._requested), 0) - self.assertEqual(len(conn2._requested), 1) - req = conn2._requested[0] - self.assertEqual(req['method'], 'POST') - self.assertEqual(req['path'], '/%s' % PATH) - SENT = { - 'query': self.QUERY, - 'defaultDataset': { - 'projectId': self.PROJECT, - 'datasetId': DATASET, - }, - 'dryRun': True, - 'maxResults': 100, - 'preserveNulls': True, - 'timeoutMs': 20000, - 'useQueryCache': False, - 'useLegacySql': True, - } - self.assertEqual(req['data'], SENT) - self._verifyResourceProperties(query, RESOURCE) - - def test_run_w_inline_udf(self): - from google.cloud.bigquery._helpers import UDFResource - - INLINE_UDF_CODE = 'var someCode = "here";' - PATH = 'projects/%s/queries' % self.PROJECT - RESOURCE = self._makeResource(complete=False) - RESOURCE['userDefinedFunctionResources'] = [ - {'inlineCode': INLINE_UDF_CODE}, - ] - conn = _Connection(RESOURCE) - client = _Client(project=self.PROJECT, connection=conn) - query = self._make_one(self.QUERY, client) - query.udf_resources = [UDFResource("inlineCode", INLINE_UDF_CODE)] - - query.run() - - self.assertEqual(len(conn._requested), 1) - req = conn._requested[0] - self.assertEqual(req['method'], 'POST') - self.assertEqual(req['path'], '/%s' % PATH) - SENT = {'query': self.QUERY, - 'userDefinedFunctionResources': - [{'inlineCode': INLINE_UDF_CODE}]} - self.assertEqual(req['data'], SENT) - self._verifyResourceProperties(query, RESOURCE) - - def test_run_w_udf_resource_uri(self): - from google.cloud.bigquery._helpers import UDFResource - - RESOURCE_URI = 'gs://some-bucket/js/lib.js' - PATH = 'projects/%s/queries' % self.PROJECT - RESOURCE = self._makeResource(complete=False) - RESOURCE['userDefinedFunctionResources'] = [ - {'resourceUri': RESOURCE_URI}, - ] - conn = _Connection(RESOURCE) - client = _Client(project=self.PROJECT, connection=conn) - query = self._make_one(self.QUERY, client) - query.udf_resources = [UDFResource("resourceUri", RESOURCE_URI)] - - query.run() - - self.assertEqual(len(conn._requested), 1) - req = conn._requested[0] - self.assertEqual(req['method'], 'POST') - self.assertEqual(req['path'], '/%s' % PATH) - SENT = {'query': self.QUERY, - 'userDefinedFunctionResources': - [{'resourceUri': RESOURCE_URI}]} - self.assertEqual(req['data'], SENT) - self._verifyResourceProperties(query, RESOURCE) - - def test_run_w_mixed_udfs(self): - from google.cloud.bigquery._helpers import UDFResource - - RESOURCE_URI = 'gs://some-bucket/js/lib.js' - INLINE_UDF_CODE = 'var someCode = "here";' - PATH = 'projects/%s/queries' % self.PROJECT - RESOURCE = self._makeResource(complete=False) - RESOURCE['userDefinedFunctionResources'] = [ - {'resourceUri': RESOURCE_URI}, - {'inlineCode': INLINE_UDF_CODE}, - ] - conn = _Connection(RESOURCE) - client = _Client(project=self.PROJECT, connection=conn) - query = self._make_one(self.QUERY, client) - query.udf_resources = [UDFResource("resourceUri", RESOURCE_URI), - UDFResource("inlineCode", INLINE_UDF_CODE)] - - query.run() - - self.assertEqual(len(conn._requested), 1) - req = conn._requested[0] - self.assertEqual(req['method'], 'POST') - self.assertEqual(req['path'], '/%s' % PATH) - self.assertEqual(query.udf_resources, - [UDFResource("resourceUri", RESOURCE_URI), - UDFResource("inlineCode", INLINE_UDF_CODE)]) - SENT = {'query': self.QUERY, - 'userDefinedFunctionResources': [ - {'resourceUri': RESOURCE_URI}, - {"inlineCode": INLINE_UDF_CODE}]} - self.assertEqual(req['data'], SENT) - self._verifyResourceProperties(query, RESOURCE) - - def test_run_w_named_query_parameter(self): - from google.cloud.bigquery._helpers import ScalarQueryParameter - - PATH = 'projects/%s/queries' % self.PROJECT - RESOURCE = self._makeResource(complete=False) - RESOURCE['parameterMode'] = 'NAMED' - RESOURCE['queryParameters'] = [ - { - 'name': 'foo', - 'parameterType': { - 'type': 'INT64', - }, - 'parameterValue': { - 'value': '123', - }, - }, - ] - query_parameters = [ScalarQueryParameter('foo', 'INT64', 123)] - conn = _Connection(RESOURCE) - client = _Client(project=self.PROJECT, connection=conn) - query = self._make_one(self.QUERY, client, - query_parameters=query_parameters) - query.run() - - self.assertEqual(len(conn._requested), 1) - req = conn._requested[0] - self.assertEqual(req['method'], 'POST') - self.assertEqual(req['path'], '/%s' % PATH) - SENT = { - 'query': self.QUERY, - 'parameterMode': 'NAMED', - 'queryParameters': RESOURCE['queryParameters'], - } - self.assertEqual(req['data'], SENT) - self._verifyResourceProperties(query, RESOURCE) - - def test_run_w_positional_query_parameter(self): - from google.cloud.bigquery._helpers import ScalarQueryParameter - - PATH = 'projects/%s/queries' % self.PROJECT - RESOURCE = self._makeResource(complete=False) - RESOURCE['parameterMode'] = 'POSITIONAL' - RESOURCE['queryParameters'] = [ - { - 'parameterType': { - 'type': 'INT64', - }, - 'parameterValue': { - 'value': '123', - }, - }, - ] - query_parameters = [ScalarQueryParameter.positional('INT64', 123)] - conn = _Connection(RESOURCE) - client = _Client(project=self.PROJECT, connection=conn) - query = self._make_one(self.QUERY, client, - query_parameters=query_parameters) - query.run() - - self.assertEqual(len(conn._requested), 1) - req = conn._requested[0] - self.assertEqual(req['method'], 'POST') - self.assertEqual(req['path'], '/%s' % PATH) - SENT = { - 'query': self.QUERY, - 'parameterMode': 'POSITIONAL', - 'queryParameters': RESOURCE['queryParameters'], - } - self.assertEqual(req['data'], SENT) - self._verifyResourceProperties(query, RESOURCE) - - def test_fetch_data_query_not_yet_run(self): - conn = _Connection() - client = _Client(project=self.PROJECT, connection=conn) - query = self._make_one(self.QUERY, client) - self.assertRaises(ValueError, query.fetch_data) + self._verifySchema(query, resource) def test_fetch_data_w_bound_client(self): import six - PATH = 'projects/%s/queries/%s' % (self.PROJECT, self.JOB_NAME) - BEFORE = self._makeResource(complete=False) - AFTER = self._makeResource(complete=True) - del AFTER['totalRows'] + PATH = 'projects/%s/queries/%s' % (self.PROJECT, self.JOB_ID) + schema = { + 'fields': [ + {'name': 'full_name', 'type': 'STRING', 'mode': 'REQURED'}, + {'name': 'age', 'type': 'INTEGER', 'mode': 'REQURED'}, + ], + } + BEFORE = self._makeResource() + BEFORE['jobComplete'] = False + BEFORE['schema'] = schema + AFTER = self._makeResource() + AFTER['rows'] = [ + {'f': [ + {'v': 'Phred Phlyntstone'}, + {'v': 32}, + ]}, + {'f': [ + {'v': 'Bharney Rhubble'}, + {'v': 33}, + ]}, + {'f': [ + {'v': 'Wylma Phlyntstone'}, + {'v': 29}, + ]}, + {'f': [ + {'v': 'Bhettye Rhubble'}, + {'v': 27}, + ]}, + ] + AFTER['cacheHit'] = False + AFTER['jobComplete'] = True + AFTER['numDmlAffectedRows'] = 123 + AFTER['pageToken'] = self.TOKEN + AFTER['schema'] = schema + AFTER['totalBytesProcessed'] = 100000 conn = _Connection(AFTER) client = _Client(project=self.PROJECT, connection=conn) - query = self._make_one(self.QUERY, client) - query._set_properties(BEFORE) + query = self._make_one(client, BEFORE) self.assertFalse(query.complete) iterator = query.fetch_data() @@ -691,24 +313,55 @@ def test_fetch_data_w_bound_client(self): def test_fetch_data_w_alternate_client(self): import six - PATH = 'projects/%s/queries/%s' % (self.PROJECT, self.JOB_NAME) + PATH = 'projects/%s/queries/%s' % (self.PROJECT, self.JOB_ID) MAX = 10 - TOKEN = 'TOKEN' START = 2257 TIMEOUT = 20000 - BEFORE = self._makeResource(complete=False) - AFTER = self._makeResource(complete=True) + + schema = { + 'fields': [ + {'name': 'full_name', 'type': 'STRING', 'mode': 'REQURED'}, + {'name': 'age', 'type': 'INTEGER', 'mode': 'REQURED'}, + ], + } + BEFORE = self._makeResource() + BEFORE['jobComplete'] = False + BEFORE['schema'] = schema + AFTER = self._makeResource() + AFTER['rows'] = [ + {'f': [ + {'v': 'Phred Phlyntstone'}, + {'v': 32}, + ]}, + {'f': [ + {'v': 'Bharney Rhubble'}, + {'v': 33}, + ]}, + {'f': [ + {'v': 'Wylma Phlyntstone'}, + {'v': 29}, + ]}, + {'f': [ + {'v': 'Bhettye Rhubble'}, + {'v': 27}, + ]}, + ] + AFTER['cacheHit'] = False + AFTER['jobComplete'] = True + AFTER['numDmlAffectedRows'] = 123 + AFTER['pageToken'] = self.TOKEN + AFTER['schema'] = schema + AFTER['totalBytesProcessed'] = 100000 conn1 = _Connection() client1 = _Client(project=self.PROJECT, connection=conn1) conn2 = _Connection(AFTER) client2 = _Client(project=self.PROJECT, connection=conn2) - query = self._make_one(self.QUERY, client1) - query._set_properties(BEFORE) + query = self._make_one(client1, BEFORE) self.assertFalse(query.complete) iterator = query.fetch_data( - client=client2, max_results=MAX, page_token=TOKEN, + client=client2, max_results=MAX, page_token=self.TOKEN, start_index=START, timeout_ms=TIMEOUT) page = six.next(iterator.pages) rows = list(page) @@ -721,7 +374,7 @@ def test_fetch_data_w_alternate_client(self): self.assertEqual(rows[1], ('Bharney Rhubble', 33)) self.assertEqual(rows[2], ('Wylma Phlyntstone', 29)) self.assertEqual(rows[3], ('Bhettye Rhubble', 27)) - self.assertEqual(total_rows, int(AFTER['totalRows'])) + self.assertIsNone(total_rows) self.assertEqual(page_token, AFTER['pageToken']) self.assertEqual(len(conn1._requested), 0) @@ -731,21 +384,22 @@ def test_fetch_data_w_alternate_client(self): self.assertEqual(req['path'], '/%s' % PATH) self.assertEqual(req['query_params'], {'maxResults': MAX, - 'pageToken': TOKEN, + 'pageToken': self.TOKEN, 'startIndex': START, 'timeoutMs': TIMEOUT}) -class _Client(object): +class _Client(Client): def __init__(self, project='project', connection=None): - self.project = project - self._connection = connection + creds = _make_credentials() + http = object() + super(_Client, self).__init__( + project=project, credentials=creds, _http=http) - def dataset(self, dataset_id): - from google.cloud.bigquery.dataset import DatasetReference - - return DatasetReference(self.project, dataset_id) + if connection is None: + connection = _Connection() + self._connection = connection class _Connection(object): From 07adc705ff5be0340a5adc471a308022f8ac4ba3 Mon Sep 17 00:00:00 2001 From: Jonathan Amsterdam Date: Fri, 6 Oct 2017 15:39:54 -0400 Subject: [PATCH 0298/2016] bigquery: add Client.list_rows, remove Table.fetch_data (#4119) --- .../google/cloud/bigquery/client.py | 79 ++++- .../google/cloud/bigquery/table.py | 58 ---- .../google-cloud-bigquery/tests/system.py | 2 +- .../tests/unit/test_client.py | 269 ++++++++++++++++- .../tests/unit/test_table.py | 280 ------------------ 5 files changed, 333 insertions(+), 355 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py index da69decd03c8..db1a4b0f2138 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py @@ -26,7 +26,7 @@ from google.cloud.bigquery._http import Connection from google.cloud.bigquery.dataset import Dataset from google.cloud.bigquery.dataset import DatasetReference -from google.cloud.bigquery.table import Table +from google.cloud.bigquery.table import Table, _TABLE_HAS_NO_SCHEMA from google.cloud.bigquery.table import TableReference from google.cloud.bigquery.job import CopyJob from google.cloud.bigquery.job import ExtractJob @@ -34,6 +34,8 @@ from google.cloud.bigquery.job import QueryJob from google.cloud.bigquery.job import QueryJobConfig from google.cloud.bigquery.query import QueryResults +from google.cloud.bigquery._helpers import _item_to_row +from google.cloud.bigquery._helpers import _rows_page_start class Project(object): @@ -346,7 +348,6 @@ def delete_table(self, table): :type table: One of: :class:`~google.cloud.bigquery.table.Table` :class:`~google.cloud.bigquery.table.TableReference` - :param table: the table to delete, or a reference to it. """ if not isinstance(table, (Table, TableReference)): @@ -667,6 +668,80 @@ def query_rows(self, query, job_config=None, job_id=None, timeout=None): job.begin() return job.result(timeout=timeout) + def list_rows(self, table, selected_fields=None, max_results=None, + page_token=None, start_index=None): + """List the rows of the table. + + See + https://cloud.google.com/bigquery/docs/reference/rest/v2/tabledata/list + + .. note:: + + This method assumes that the provided schema is up-to-date with the + schema as defined on the back-end: if the two schemas are not + identical, the values returned may be incomplete. To ensure that the + local copy of the schema is up-to-date, call ``client.get_table``. + + :type table: One of: + :class:`~google.cloud.bigquery.table.Table` + :class:`~google.cloud.bigquery.table.TableReference` + :param table: the table to list, or a reference to it. + + :type selected_fields: list of :class:`SchemaField` + :param selected_fields: + The fields to return. Required if ``table`` is a + :class:`~google.cloud.bigquery.table.TableReference`. + + :type max_results: int + :param max_results: maximum number of rows to return. + + :type page_token: str + :param page_token: (Optional) Token representing a cursor into the + table's rows. + + :type start_index: int + :param page_token: (Optional) The zero-based index of the starting + row to read. + + :rtype: :class:`~google.api.core.page_iterator.Iterator` + :returns: Iterator of row data :class:`tuple`s. During each page, the + iterator will have the ``total_rows`` attribute set, + which counts the total number of rows **in the table** + (this is distinct from the total number of rows in the + current page: ``iterator.page.num_items``). + + """ + if selected_fields is not None: + schema = selected_fields + elif isinstance(table, TableReference): + raise ValueError('need selected_fields with TableReference') + elif isinstance(table, Table): + if len(table._schema) == 0: + raise ValueError(_TABLE_HAS_NO_SCHEMA) + schema = table.schema + else: + raise TypeError('table should be Table or TableReference') + + params = {} + if selected_fields is not None: + params['selectedFields'] = [f.name for f in selected_fields] + if start_index is not None: + params['startIndex'] = start_index + + iterator = page_iterator.HTTPIterator( + client=self, + api_request=self._connection.api_request, + path='%s/data' % (table.path,), + item_to_value=_item_to_row, + items_key='rows', + page_token=page_token, + next_token='pageToken', + max_results=max_results, + page_start=_rows_page_start, + extra_params=params) + iterator.schema = schema + return iterator + # pylint: disable=unused-argument def _item_to_project(iterator, resource): diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py index a4f1933303a0..dfc31be29745 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py @@ -25,13 +25,10 @@ from google.resumable_media.requests import MultipartUpload from google.resumable_media.requests import ResumableUpload -from google.api.core import page_iterator from google.cloud import exceptions from google.cloud._helpers import _datetime_from_microseconds from google.cloud._helpers import _millis_from_datetime from google.cloud.bigquery.schema import SchemaField -from google.cloud.bigquery._helpers import _item_to_row -from google.cloud.bigquery._helpers import _rows_page_start from google.cloud.bigquery._helpers import _SCALAR_VALUE_TO_JSON_ROW @@ -768,61 +765,6 @@ def update(self, client=None): method='PUT', path=self.path, data=self._build_resource()) self._set_properties(api_response) - def fetch_data(self, max_results=None, page_token=None, client=None): - """API call: fetch the table data via a GET request - - See - https://cloud.google.com/bigquery/docs/reference/rest/v2/tabledata/list - - .. note:: - - This method assumes that its instance's ``schema`` attribute is - up-to-date with the schema as defined on the back-end: if the - two schemas are not identical, the values returned may be - incomplete. To ensure that the local copy of the schema is - up-to-date, call ``client.get_table``. - - :type max_results: int - :param max_results: (Optional) Maximum number of rows to return. - - :type page_token: str - :param page_token: (Optional) Token representing a cursor into the - table's rows. - - :type client: :class:`~google.cloud.bigquery.client.Client` - :param client: (Optional) The client to use. If not passed, falls - back to the ``client`` stored on the current dataset. - - :rtype: :class:`~google.api.core.page_iterator.Iterator` - :returns: Iterator of row data :class:`tuple`s. During each page, the - iterator will have the ``total_rows`` attribute set, - which counts the total number of rows **in the table** - (this is distinct from the total number of rows in the - current page: ``iterator.page.num_items``). - """ - if len(self._schema) == 0: - raise ValueError(_TABLE_HAS_NO_SCHEMA) - - params = {} - - if max_results is not None: - params['maxResults'] = max_results - - client = self._require_client(client) - path = '%s/data' % (self.path,) - iterator = page_iterator.HTTPIterator( - client=client, - api_request=client._connection.api_request, - path=path, - item_to_value=_item_to_row, - items_key='rows', - page_token=page_token, - page_start=_rows_page_start, - next_token='pageToken', - extra_params=params) - iterator.schema = self._schema - return iterator - def row_from_mapping(self, mapping): """Convert a mapping to a row tuple using the schema. diff --git a/packages/google-cloud-bigquery/tests/system.py b/packages/google-cloud-bigquery/tests/system.py index ada6d92b5050..1936fc435e57 100644 --- a/packages/google-cloud-bigquery/tests/system.py +++ b/packages/google-cloud-bigquery/tests/system.py @@ -295,7 +295,7 @@ def test_update_table(self): @staticmethod def _fetch_single_page(table): - iterator = table.fetch_data() + iterator = Config.CLIENT.list_rows(table) page = six.next(iterator.pages) return list(page) diff --git a/packages/google-cloud-bigquery/tests/unit/test_client.py b/packages/google-cloud-bigquery/tests/unit/test_client.py index 89e2ebef1b70..cd0c3f6d71b0 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_client.py +++ b/packages/google-cloud-bigquery/tests/unit/test_client.py @@ -106,7 +106,6 @@ def test__get_query_results_hit(self): self.assertTrue(query_results.complete) def test_list_projects_defaults(self): - import six from google.cloud.bigquery.client import Project PROJECT_1 = 'PROJECT_ONE' @@ -151,8 +150,6 @@ def test_list_projects_defaults(self): self.assertEqual(req['path'], '/%s' % PATH) def test_list_projects_explicit_response_missing_projects_key(self): - import six - PROJECT = 'PROJECT' PATH = 'projects' TOKEN = 'TOKEN' @@ -177,7 +174,6 @@ def test_list_projects_explicit_response_missing_projects_key(self): {'maxResults': 3, 'pageToken': TOKEN}) def test_list_datasets_defaults(self): - import six from google.cloud.bigquery.dataset import Dataset PROJECT = 'PROJECT' @@ -222,8 +218,6 @@ def test_list_datasets_defaults(self): self.assertEqual(req['path'], '/%s' % PATH) def test_list_datasets_explicit_response_missing_datasets_key(self): - import six - PROJECT = 'PROJECT' PATH = 'projects/%s/datasets' % PROJECT TOKEN = 'TOKEN' @@ -636,8 +630,6 @@ def test_update_dataset(self): self.assertEqual(req['headers']['If-Match'], 'etag') def test_list_dataset_tables_empty(self): - import six - PROJECT = 'PROJECT' DS_ID = 'DATASET_ID' creds = _make_credentials() @@ -660,7 +652,6 @@ def test_list_dataset_tables_empty(self): self.assertEqual(req['path'], '/%s' % PATH) def test_list_dataset_tables_defaults(self): - import six from google.cloud.bigquery.table import Table PROJECT = 'PROJECT' @@ -711,7 +702,6 @@ def test_list_dataset_tables_defaults(self): self.assertEqual(req['path'], '/%s' % PATH) def test_list_dataset_tables_explicit(self): - import six from google.cloud.bigquery.table import Table PROJECT = 'PROJECT' @@ -895,7 +885,6 @@ def test_get_job_hit(self): self.assertEqual(req['query_params'], {'projection': 'full'}) def test_list_jobs_defaults(self): - import six from google.cloud.bigquery.job import LoadJob from google.cloud.bigquery.job import CopyJob from google.cloud.bigquery.job import ExtractJob @@ -1027,7 +1016,6 @@ def test_list_jobs_defaults(self): self.assertEqual(req['query_params'], {'projection': 'full'}) def test_list_jobs_load_job_wo_sourceUris(self): - import six from google.cloud.bigquery.job import LoadJob PROJECT = 'PROJECT' @@ -1084,8 +1072,6 @@ def test_list_jobs_load_job_wo_sourceUris(self): self.assertEqual(req['query_params'], {'projection': 'full'}) def test_list_jobs_explicit_missing(self): - import six - PROJECT = 'PROJECT' PATH = 'projects/%s/jobs' % PROJECT DATA = {} @@ -1546,6 +1532,261 @@ def test_query_rows_w_job_config(self): self.assertEqual(configuration['query']['useLegacySql'], True) self.assertEqual(configuration['dryRun'], True) + def test_list_rows(self): + import datetime + from google.cloud._helpers import UTC + from google.cloud.bigquery.dataset import DatasetReference + from google.cloud.bigquery.table import Table, SchemaField + + PROJECT = 'PROJECT' + DS_ID = 'DS_ID' + TABLE_ID = 'TABLE_ID' + PATH = 'projects/%s/datasets/%s/tables/%s/data' % ( + PROJECT, DS_ID, TABLE_ID) + WHEN_TS = 1437767599.006 + WHEN = datetime.datetime.utcfromtimestamp(WHEN_TS).replace( + tzinfo=UTC) + WHEN_1 = WHEN + datetime.timedelta(seconds=1) + WHEN_2 = WHEN + datetime.timedelta(seconds=2) + ROWS = 1234 + TOKEN = 'TOKEN' + + def _bigquery_timestamp_float_repr(ts_float): + # Preserve microsecond precision for E+09 timestamps + return '%0.15E' % (ts_float,) + + DATA = { + 'totalRows': str(ROWS), + 'pageToken': TOKEN, + 'rows': [ + {'f': [ + {'v': 'Phred Phlyntstone'}, + {'v': '32'}, + {'v': _bigquery_timestamp_float_repr(WHEN_TS)}, + ]}, + {'f': [ + {'v': 'Bharney Rhubble'}, + {'v': '33'}, + {'v': _bigquery_timestamp_float_repr(WHEN_TS + 1)}, + ]}, + {'f': [ + {'v': 'Wylma Phlyntstone'}, + {'v': '29'}, + {'v': _bigquery_timestamp_float_repr(WHEN_TS + 2)}, + ]}, + {'f': [ + {'v': 'Bhettye Rhubble'}, + {'v': None}, + {'v': None}, + ]}, + ] + } + creds = _make_credentials() + http = object() + client = self._make_one(project=PROJECT, credentials=creds, _http=http) + conn = client._connection = _Connection(DATA, DATA) + table_ref = DatasetReference(PROJECT, DS_ID).table(TABLE_ID) + full_name = SchemaField('full_name', 'STRING', mode='REQUIRED') + age = SchemaField('age', 'INTEGER', mode='NULLABLE') + joined = SchemaField('joined', 'TIMESTAMP', mode='NULLABLE') + table = Table(table_ref, schema=[full_name, age, joined]) + + iterator = client.list_rows(table) + page = six.next(iterator.pages) + rows = list(page) + total_rows = iterator.total_rows + page_token = iterator.next_page_token + + self.assertEqual(len(rows), 4) + self.assertEqual(rows[0], ('Phred Phlyntstone', 32, WHEN)) + self.assertEqual(rows[1], ('Bharney Rhubble', 33, WHEN_1)) + self.assertEqual(rows[2], ('Wylma Phlyntstone', 29, WHEN_2)) + self.assertEqual(rows[3], ('Bhettye Rhubble', None, None)) + self.assertEqual(total_rows, ROWS) + self.assertEqual(page_token, TOKEN) + + self.assertEqual(len(conn._requested), 1) + req = conn._requested[0] + self.assertEqual(req['method'], 'GET') + self.assertEqual(req['path'], '/%s' % PATH) + self.assertEqual(req['query_params'], {}) + + def test_list_rows_query_params(self): + from google.cloud.bigquery.dataset import DatasetReference + from google.cloud.bigquery.table import Table, SchemaField + + PROJECT = 'PROJECT' + DS_ID = 'DS_ID' + TABLE_ID = 'TABLE_ID' + creds = _make_credentials() + http = object() + client = self._make_one(project=PROJECT, credentials=creds, _http=http) + table_ref = DatasetReference(PROJECT, DS_ID).table(TABLE_ID) + table = Table(table_ref, + schema=[SchemaField('age', 'INTEGER', mode='NULLABLE')]) + tests = [ + ({}, {}), + ({'start_index': 1}, {'startIndex': 1}), + ({'max_results': 2}, {'maxResults': 2}), + ({'start_index': 1, 'max_results': 2}, + {'startIndex': 1, 'maxResults': 2}), + ] + conn = client._connection = _Connection(*len(tests)*[{}]) + for i, test in enumerate(tests): + iterator = client.list_rows(table, **test[0]) + six.next(iterator.pages) + req = conn._requested[i] + self.assertEqual(req['query_params'], test[1], + 'for kwargs %s' % test[0]) + + def test_list_rows_repeated_fields(self): + from google.cloud.bigquery.dataset import DatasetReference + from google.cloud.bigquery.table import SchemaField + + PROJECT = 'PROJECT' + DS_ID = 'DS_ID' + TABLE_ID = 'TABLE_ID' + PATH = 'projects/%s/datasets/%s/tables/%s/data' % ( + PROJECT, DS_ID, TABLE_ID) + ROWS = 1234 + TOKEN = 'TOKEN' + DATA = { + 'totalRows': ROWS, + 'pageToken': TOKEN, + 'rows': [ + {'f': [ + {'v': [{'v': 'red'}, {'v': 'green'}]}, + {'v': [{ + 'v': { + 'f': [ + {'v': [{'v': '1'}, {'v': '2'}]}, + {'v': [{'v': '3.1415'}, {'v': '1.414'}]}, + ]} + }]}, + ]}, + ] + } + creds = _make_credentials() + http = object() + client = self._make_one(project=PROJECT, credentials=creds, _http=http) + conn = client._connection = _Connection(DATA) + table_ref = DatasetReference(PROJECT, DS_ID).table(TABLE_ID) + color = SchemaField('color', 'STRING', mode='REPEATED') + index = SchemaField('index', 'INTEGER', 'REPEATED') + score = SchemaField('score', 'FLOAT', 'REPEATED') + struct = SchemaField('struct', 'RECORD', mode='REPEATED', + fields=[index, score]) + + iterator = client.list_rows(table_ref, selected_fields=[color, struct]) + page = six.next(iterator.pages) + rows = list(page) + total_rows = iterator.total_rows + page_token = iterator.next_page_token + + self.assertEqual(len(rows), 1) + self.assertEqual(rows[0][0], ['red', 'green']) + self.assertEqual(rows[0][1], [{'index': [1, 2], + 'score': [3.1415, 1.414]}]) + self.assertEqual(total_rows, ROWS) + self.assertEqual(page_token, TOKEN) + + self.assertEqual(len(conn._requested), 1) + req = conn._requested[0] + self.assertEqual(req['method'], 'GET') + self.assertEqual(req['path'], '/%s' % PATH) + + def test_list_rows_w_record_schema(self): + from google.cloud.bigquery.dataset import DatasetReference + from google.cloud.bigquery.table import Table, SchemaField + + PROJECT = 'PROJECT' + DS_ID = 'DS_ID' + TABLE_ID = 'TABLE_ID' + PATH = 'projects/%s/datasets/%s/tables/%s/data' % ( + PROJECT, DS_ID, TABLE_ID) + ROWS = 1234 + TOKEN = 'TOKEN' + DATA = { + 'totalRows': ROWS, + 'pageToken': TOKEN, + 'rows': [ + {'f': [ + {'v': 'Phred Phlyntstone'}, + {'v': {'f': [{'v': '800'}, {'v': '555-1212'}, {'v': 1}]}}, + ]}, + {'f': [ + {'v': 'Bharney Rhubble'}, + {'v': {'f': [{'v': '877'}, {'v': '768-5309'}, {'v': 2}]}}, + ]}, + {'f': [ + {'v': 'Wylma Phlyntstone'}, + {'v': None}, + ]}, + ] + } + creds = _make_credentials() + http = object() + client = self._make_one(project=PROJECT, credentials=creds, _http=http) + conn = client._connection = _Connection(DATA) + table_ref = DatasetReference(PROJECT, DS_ID).table(TABLE_ID) + full_name = SchemaField('full_name', 'STRING', mode='REQUIRED') + area_code = SchemaField('area_code', 'STRING', 'REQUIRED') + local_number = SchemaField('local_number', 'STRING', 'REQUIRED') + rank = SchemaField('rank', 'INTEGER', 'REQUIRED') + phone = SchemaField('phone', 'RECORD', mode='NULLABLE', + fields=[area_code, local_number, rank]) + table = Table(table_ref, schema=[full_name, phone]) + + iterator = client.list_rows(table) + page = six.next(iterator.pages) + rows = list(page) + total_rows = iterator.total_rows + page_token = iterator.next_page_token + + self.assertEqual(len(rows), 3) + self.assertEqual(rows[0][0], 'Phred Phlyntstone') + self.assertEqual(rows[0][1], {'area_code': '800', + 'local_number': '555-1212', + 'rank': 1}) + self.assertEqual(rows[1][0], 'Bharney Rhubble') + self.assertEqual(rows[1][1], {'area_code': '877', + 'local_number': '768-5309', + 'rank': 2}) + self.assertEqual(rows[2][0], 'Wylma Phlyntstone') + self.assertIsNone(rows[2][1]) + self.assertEqual(total_rows, ROWS) + self.assertEqual(page_token, TOKEN) + + self.assertEqual(len(conn._requested), 1) + req = conn._requested[0] + self.assertEqual(req['method'], 'GET') + self.assertEqual(req['path'], '/%s' % PATH) + + def test_list_rows_errors(self): + from google.cloud.bigquery.dataset import DatasetReference + from google.cloud.bigquery.table import Table + + PROJECT = 'PROJECT' + DS_ID = 'DS_ID' + TABLE_ID = 'TABLE_ID' + + creds = _make_credentials() + http = object() + client = self._make_one(project=PROJECT, credentials=creds, _http=http) + table_ref = DatasetReference(PROJECT, DS_ID).table(TABLE_ID) + + # table ref with no selected fields + with self.assertRaises(ValueError): + client.list_rows(table_ref) + + # table with no schema + with self.assertRaises(ValueError): + client.list_rows(Table(table_ref)) + + # neither Table nor tableReference + with self.assertRaises(TypeError): + client.list_rows(1) + class _Connection(object): diff --git a/packages/google-cloud-bigquery/tests/unit/test_table.py b/packages/google-cloud-bigquery/tests/unit/test_table.py index c86c21880bda..fc0ff3370974 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_table.py +++ b/packages/google-cloud-bigquery/tests/unit/test_table.py @@ -945,286 +945,6 @@ def test_update_w_alternate_client(self): self.assertEqual(req['data'], SENT) self._verifyResourceProperties(table, RESOURCE) - def test_fetch_data_wo_schema(self): - from google.cloud.bigquery.table import _TABLE_HAS_NO_SCHEMA - - client = _Client(project=self.PROJECT) - dataset = DatasetReference(self.PROJECT, self.DS_ID) - table_ref = dataset.table(self.TABLE_NAME) - table = self._make_one(table_ref, client=client) - - with self.assertRaises(ValueError) as exc: - table.fetch_data() - - self.assertEqual(exc.exception.args, (_TABLE_HAS_NO_SCHEMA,)) - - def test_fetch_data_w_bound_client(self): - import datetime - import six - from google.cloud._helpers import UTC - from google.cloud.bigquery.table import SchemaField - - PATH = 'projects/%s/datasets/%s/tables/%s/data' % ( - self.PROJECT, self.DS_ID, self.TABLE_NAME) - WHEN_TS = 1437767599.006 - WHEN = datetime.datetime.utcfromtimestamp(WHEN_TS).replace( - tzinfo=UTC) - WHEN_1 = WHEN + datetime.timedelta(seconds=1) - WHEN_2 = WHEN + datetime.timedelta(seconds=2) - ROWS = 1234 - TOKEN = 'TOKEN' - - def _bigquery_timestamp_float_repr(ts_float): - # Preserve microsecond precision for E+09 timestamps - return '%0.15E' % (ts_float,) - - DATA = { - 'totalRows': str(ROWS), - 'pageToken': TOKEN, - 'rows': [ - {'f': [ - {'v': 'Phred Phlyntstone'}, - {'v': '32'}, - {'v': _bigquery_timestamp_float_repr(WHEN_TS)}, - ]}, - {'f': [ - {'v': 'Bharney Rhubble'}, - {'v': '33'}, - {'v': _bigquery_timestamp_float_repr(WHEN_TS + 1)}, - ]}, - {'f': [ - {'v': 'Wylma Phlyntstone'}, - {'v': '29'}, - {'v': _bigquery_timestamp_float_repr(WHEN_TS + 2)}, - ]}, - {'f': [ - {'v': 'Bhettye Rhubble'}, - {'v': None}, - {'v': None}, - ]}, - ] - } - - conn = _Connection(DATA) - client = _Client(project=self.PROJECT, connection=conn) - dataset = DatasetReference(self.PROJECT, self.DS_ID) - table_ref = dataset.table(self.TABLE_NAME) - full_name = SchemaField('full_name', 'STRING', mode='REQUIRED') - age = SchemaField('age', 'INTEGER', mode='NULLABLE') - joined = SchemaField('joined', 'TIMESTAMP', mode='NULLABLE') - table = self._make_one(table_ref, schema=[full_name, age, joined], - client=client) - - iterator = table.fetch_data() - page = six.next(iterator.pages) - rows = list(page) - total_rows = iterator.total_rows - page_token = iterator.next_page_token - - self.assertEqual(len(rows), 4) - self.assertEqual(rows[0], ('Phred Phlyntstone', 32, WHEN)) - self.assertEqual(rows[1], ('Bharney Rhubble', 33, WHEN_1)) - self.assertEqual(rows[2], ('Wylma Phlyntstone', 29, WHEN_2)) - self.assertEqual(rows[3], ('Bhettye Rhubble', None, None)) - self.assertEqual(total_rows, ROWS) - self.assertEqual(page_token, TOKEN) - - self.assertEqual(len(conn._requested), 1) - req = conn._requested[0] - self.assertEqual(req['method'], 'GET') - self.assertEqual(req['path'], '/%s' % PATH) - - def test_fetch_data_w_alternate_client(self): - import six - from google.cloud.bigquery.table import SchemaField - - PATH = 'projects/%s/datasets/%s/tables/%s/data' % ( - self.PROJECT, self.DS_ID, self.TABLE_NAME) - MAX = 10 - TOKEN = 'TOKEN' - DATA = { - 'rows': [ - {'f': [ - {'v': 'Phred Phlyntstone'}, - {'v': '32'}, - {'v': 'true'}, - {'v': '3.1415926'}, - ]}, - {'f': [ - {'v': 'Bharney Rhubble'}, - {'v': '33'}, - {'v': 'false'}, - {'v': '1.414'}, - ]}, - {'f': [ - {'v': 'Wylma Phlyntstone'}, - {'v': '29'}, - {'v': 'true'}, - {'v': '2.71828'}, - ]}, - {'f': [ - {'v': 'Bhettye Rhubble'}, - {'v': '27'}, - {'v': None}, - {'v': None}, - ]}, - ] - } - conn1 = _Connection() - client1 = _Client(project=self.PROJECT, connection=conn1) - conn2 = _Connection(DATA) - client2 = _Client(project=self.PROJECT, connection=conn2) - dataset = DatasetReference(self.PROJECT, self.DS_ID) - table_ref = dataset.table(self.TABLE_NAME) - full_name = SchemaField('full_name', 'STRING', mode='REQUIRED') - age = SchemaField('age', 'INTEGER', mode='REQUIRED') - voter = SchemaField('voter', 'BOOLEAN', mode='NULLABLE') - score = SchemaField('score', 'FLOAT', mode='NULLABLE') - table = self._make_one(table_ref, - schema=[full_name, age, voter, score], - client=client1) - - iterator = table.fetch_data( - client=client2, max_results=MAX, page_token=TOKEN) - page = six.next(iterator.pages) - rows = list(page) - total_rows = iterator.total_rows - page_token = iterator.next_page_token - - self.assertEqual(len(rows), 4) - self.assertEqual(rows[0], ('Phred Phlyntstone', 32, True, 3.1415926)) - self.assertEqual(rows[1], ('Bharney Rhubble', 33, False, 1.414)) - self.assertEqual(rows[2], ('Wylma Phlyntstone', 29, True, 2.71828)) - self.assertEqual(rows[3], ('Bhettye Rhubble', 27, None, None)) - self.assertIsNone(total_rows) - self.assertIsNone(page_token) - - self.assertEqual(len(conn1._requested), 0) - self.assertEqual(len(conn2._requested), 1) - req = conn2._requested[0] - self.assertEqual(req['method'], 'GET') - self.assertEqual(req['path'], '/%s' % PATH) - self.assertEqual(req['query_params'], - {'maxResults': MAX, 'pageToken': TOKEN}) - - def test_fetch_data_w_repeated_fields(self): - import six - from google.cloud.bigquery.table import SchemaField - - PATH = 'projects/%s/datasets/%s/tables/%s/data' % ( - self.PROJECT, self.DS_ID, self.TABLE_NAME) - ROWS = 1234 - TOKEN = 'TOKEN' - DATA = { - 'totalRows': ROWS, - 'pageToken': TOKEN, - 'rows': [ - {'f': [ - {'v': [{'v': 'red'}, {'v': 'green'}]}, - {'v': [{ - 'v': { - 'f': [ - {'v': [{'v': '1'}, {'v': '2'}]}, - {'v': [{'v': '3.1415'}, {'v': '1.414'}]}, - ]} - }]}, - ]}, - ] - } - conn = _Connection(DATA) - client = _Client(project=self.PROJECT, connection=conn) - dataset = DatasetReference(self.PROJECT, self.DS_ID) - table_ref = dataset.table(self.TABLE_NAME) - color = SchemaField('color', 'STRING', mode='REPEATED') - index = SchemaField('index', 'INTEGER', 'REPEATED') - score = SchemaField('score', 'FLOAT', 'REPEATED') - struct = SchemaField('struct', 'RECORD', mode='REPEATED', - fields=[index, score]) - table = self._make_one(table_ref, schema=[color, struct], - client=client) - - iterator = table.fetch_data() - page = six.next(iterator.pages) - rows = list(page) - total_rows = iterator.total_rows - page_token = iterator.next_page_token - - self.assertEqual(len(rows), 1) - self.assertEqual(rows[0][0], ['red', 'green']) - self.assertEqual(rows[0][1], [{'index': [1, 2], - 'score': [3.1415, 1.414]}]) - self.assertEqual(total_rows, ROWS) - self.assertEqual(page_token, TOKEN) - - self.assertEqual(len(conn._requested), 1) - req = conn._requested[0] - self.assertEqual(req['method'], 'GET') - self.assertEqual(req['path'], '/%s' % PATH) - - def test_fetch_data_w_record_schema(self): - import six - from google.cloud.bigquery.table import SchemaField - - PATH = 'projects/%s/datasets/%s/tables/%s/data' % ( - self.PROJECT, self.DS_ID, self.TABLE_NAME) - ROWS = 1234 - TOKEN = 'TOKEN' - DATA = { - 'totalRows': ROWS, - 'pageToken': TOKEN, - 'rows': [ - {'f': [ - {'v': 'Phred Phlyntstone'}, - {'v': {'f': [{'v': '800'}, {'v': '555-1212'}, {'v': 1}]}}, - ]}, - {'f': [ - {'v': 'Bharney Rhubble'}, - {'v': {'f': [{'v': '877'}, {'v': '768-5309'}, {'v': 2}]}}, - ]}, - {'f': [ - {'v': 'Wylma Phlyntstone'}, - {'v': None}, - ]}, - ] - } - conn = _Connection(DATA) - client = _Client(project=self.PROJECT, connection=conn) - dataset = DatasetReference(self.PROJECT, self.DS_ID) - table_ref = dataset.table(self.TABLE_NAME) - full_name = SchemaField('full_name', 'STRING', mode='REQUIRED') - area_code = SchemaField('area_code', 'STRING', 'REQUIRED') - local_number = SchemaField('local_number', 'STRING', 'REQUIRED') - rank = SchemaField('rank', 'INTEGER', 'REQUIRED') - phone = SchemaField('phone', 'RECORD', mode='NULLABLE', - fields=[area_code, local_number, rank]) - table = self._make_one(table_ref, schema=[full_name, phone], - client=client) - - iterator = table.fetch_data() - page = six.next(iterator.pages) - rows = list(page) - total_rows = iterator.total_rows - page_token = iterator.next_page_token - - self.assertEqual(len(rows), 3) - self.assertEqual(rows[0][0], 'Phred Phlyntstone') - self.assertEqual(rows[0][1], {'area_code': '800', - 'local_number': '555-1212', - 'rank': 1}) - self.assertEqual(rows[1][0], 'Bharney Rhubble') - self.assertEqual(rows[1][1], {'area_code': '877', - 'local_number': '768-5309', - 'rank': 2}) - self.assertEqual(rows[2][0], 'Wylma Phlyntstone') - self.assertIsNone(rows[2][1]) - self.assertEqual(total_rows, ROWS) - self.assertEqual(page_token, TOKEN) - - self.assertEqual(len(conn._requested), 1) - req = conn._requested[0] - self.assertEqual(req['method'], 'GET') - self.assertEqual(req['path'], '/%s' % PATH) - def test_row_from_mapping_wo_schema(self): from google.cloud.bigquery.table import _TABLE_HAS_NO_SCHEMA MAPPING = {'full_name': 'Phred Phlyntstone', 'age': 32} From 831372fd070fb7d51eba5409bd40071e562b96b3 Mon Sep 17 00:00:00 2001 From: Alix Hamilton Date: Mon, 9 Oct 2017 10:36:37 -0700 Subject: [PATCH 0299/2016] BigQuery: replaces table.update() and table.patch() with client.update_table() (#4076) * adds client.update_table() * removes table.update() and table.patch() * adds coverage for _verifyResourceProperties() * adds test for deleting property and refactors table resource creation * fixes update_table tests * Fixes logic in _build_resource() --- .../google/cloud/bigquery/client.py | 28 +- .../google/cloud/bigquery/table.py | 177 ++++------- .../google-cloud-bigquery/tests/system.py | 53 ++-- .../tests/unit/test_client.py | 289 +++++++++++++++++- .../tests/unit/test_table.py | 228 ++------------ 5 files changed, 431 insertions(+), 344 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py index db1a4b0f2138..50db75f94560 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py @@ -211,8 +211,12 @@ def create_table(self, table): """ path = '/projects/%s/datasets/%s/tables' % ( table.project, table.dataset_id) + resource = table._build_resource(Table.all_fields) + doomed = [field for field in resource if resource[field] is None] + for field in doomed: + del resource[field] api_response = self._connection.api_request( - method='POST', path=path, data=table._build_resource()) + method='POST', path=path, data=resource) return Table.from_api_repr(api_response, self) def get_dataset(self, dataset_ref): @@ -285,6 +289,28 @@ def update_dataset(self, dataset, fields): method='PATCH', path=path, data=partial, headers=headers) return Dataset.from_api_repr(api_response) + def update_table(self, table, properties): + """API call: update table properties via a PUT request + + See + https://cloud.google.com/bigquery/docs/reference/rest/v2/tables/update + + :type table: + :class:`google.cloud.bigquery.table.Table` + :param table_ref: the table to update. + + :rtype: :class:`google.cloud.bigquery.table.Table` + :returns: a ``Table`` instance + """ + partial = table._build_resource(properties) + if table.etag is not None: + headers = {'If-Match': table.etag} + else: + headers = None + api_response = self._connection.api_request( + method='PATCH', path=table.path, data=partial, headers=headers) + return Table.from_api_repr(api_response, client=self) + def list_dataset_tables(self, dataset, max_results=None, page_token=None): """List tables in the dataset. diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py index dfc31be29745..e4814ae16c8e 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py @@ -175,6 +175,11 @@ class Table(object): _schema = None + all_fields = [ + 'description', 'friendly_name', 'expires', 'location', + 'partitioning_type', 'view_use_legacy_sql', 'view_query', 'schema' + ] + def __init__(self, table_ref, schema=(), client=None): self._project = table_ref.project self._table_id = table_ref.table_id @@ -240,9 +245,12 @@ def schema(self, value): :raises: TypeError if 'value' is not a sequence, or ValueError if any item in the sequence is not a SchemaField """ - if not all(isinstance(field, SchemaField) for field in value): + if value is None: + self._schema = () + elif not all(isinstance(field, SchemaField) for field in value): raise ValueError('Schema items must be fields') - self._schema = tuple(value) + else: + self._schema = tuple(value) @property def created(self): @@ -613,41 +621,59 @@ def _set_properties(self, api_response): cleaned['expirationTime'] = float(cleaned['expirationTime']) self._properties.update(cleaned) - def _build_resource(self): - """Generate a resource for ``create`` or ``update``.""" - resource = { - 'tableReference': { - 'projectId': self._project, - 'datasetId': self._dataset_id, - 'tableId': self.table_id}, - } - if self.description is not None: - resource['description'] = self.description - - if self.expires is not None: - value = _millis_from_datetime(self.expires) - resource['expirationTime'] = value - - if self.friendly_name is not None: - resource['friendlyName'] = self.friendly_name + def _populate_expires_resource(self, resource): + resource['expirationTime'] = _millis_from_datetime(self.expires) - if self.location is not None: - resource['location'] = self.location + def _populate_partitioning_type_resource(self, resource): + resource['timePartitioning'] = self._properties.get('timePartitioning') - if self.partitioning_type is not None: - resource['timePartitioning'] = self._properties['timePartitioning'] + def _populate_view_use_legacy_sql_resource(self, resource): + if 'view' not in resource: + resource['view'] = {} + resource['view']['useLegacySql'] = self.view_use_legacy_sql - if self.view_query is not None: - view = resource['view'] = {} - view['query'] = self.view_query - if self.view_use_legacy_sql is not None: - view['useLegacySql'] = self.view_use_legacy_sql + def _populate_view_query_resource(self, resource): + if self.view_query is None: + resource['view'] = None + return + if 'view' not in resource: + resource['view'] = {} + resource['view']['query'] = self.view_query - if self._schema: + def _populate_schema_resource(self, resource): + if not self._schema: + resource['schema'] = None + else: resource['schema'] = { - 'fields': _build_schema_resource(self._schema) + 'fields': _build_schema_resource(self._schema), } + custom_resource_fields = { + 'expires': _populate_expires_resource, + 'partitioning_type': _populate_partitioning_type_resource, + 'view_query': _populate_view_query_resource, + 'view_use_legacy_sql': _populate_view_use_legacy_sql_resource, + 'schema': _populate_schema_resource + } + + def _build_resource(self, filter_fields): + """Generate a resource for ``create`` or ``update``.""" + resource = { + 'tableReference': { + 'projectId': self._project, + 'datasetId': self._dataset_id, + 'tableId': self.table_id}, + } + for f in filter_fields: + if f in self.custom_resource_fields: + self.custom_resource_fields[f](self, resource) + else: + # TODO(alixh) refactor to use in both Table and Dataset + # snake case to camel case + words = f.split('_') + api_field = words[0] + ''.join( + map(str.capitalize, words[1:])) + resource[api_field] = getattr(self, f) return resource def exists(self, client=None): @@ -674,97 +700,6 @@ def exists(self, client=None): else: return True - def patch(self, - client=None, - friendly_name=_MARKER, - description=_MARKER, - location=_MARKER, - expires=_MARKER, - view_query=_MARKER, - schema=_MARKER): - """API call: update individual table properties via a PATCH request - - See - https://cloud.google.com/bigquery/docs/reference/rest/v2/tables/patch - - :type client: :class:`~google.cloud.bigquery.client.Client` or - ``NoneType`` - :param client: the client to use. If not passed, falls back to the - ``client`` stored on the current dataset. - - :type friendly_name: str - :param friendly_name: (Optional) a descriptive name for this table. - - :type description: str - :param description: (Optional) a description of this table. - - :type location: str - :param location: - (Optional) the geographic location where the table resides. - - :type expires: :class:`datetime.datetime` - :param expires: (Optional) point in time at which the table expires. - - :type view_query: str - :param view_query: SQL query defining the table as a view - - :type schema: list of :class:`SchemaField` - :param schema: fields describing the schema - - :raises: ValueError for invalid value types. - """ - client = self._require_client(client) - - partial = {} - - if expires is not _MARKER: - if (not isinstance(expires, datetime.datetime) and - expires is not None): - raise ValueError("Pass a datetime, or None") - partial['expirationTime'] = _millis_from_datetime(expires) - - if description is not _MARKER: - partial['description'] = description - - if friendly_name is not _MARKER: - partial['friendlyName'] = friendly_name - - if location is not _MARKER: - partial['location'] = location - - if view_query is not _MARKER: - if view_query is None: - partial['view'] = None - else: - partial['view'] = {'query': view_query} - - if schema is not _MARKER: - if schema is None: - partial['schema'] = None - else: - partial['schema'] = { - 'fields': _build_schema_resource(schema)} - - api_response = client._connection.api_request( - method='PATCH', path=self.path, data=partial) - self._set_properties(api_response) - - def update(self, client=None): - """API call: update table properties via a PUT request - - See - https://cloud.google.com/bigquery/docs/reference/rest/v2/tables/update - - :type client: :class:`~google.cloud.bigquery.client.Client` or - ``NoneType`` - :param client: the client to use. If not passed, falls back to the - ``client`` stored on the current dataset. - """ - client = self._require_client(client) - api_response = client._connection.api_request( - method='PUT', path=self.path, data=self._build_resource()) - self._set_properties(api_response) - def row_from_mapping(self, mapping): """Convert a mapping to a row tuple using the schema. diff --git a/packages/google-cloud-bigquery/tests/system.py b/packages/google-cloud-bigquery/tests/system.py index 1936fc435e57..83d08c7598a4 100644 --- a/packages/google-cloud-bigquery/tests/system.py +++ b/packages/google-cloud-bigquery/tests/system.py @@ -250,14 +250,15 @@ def test_list_dataset_tables(self): table.dataset_id == DATASET_ID)] self.assertEqual(len(created), len(tables_to_create)) - def test_patch_table(self): - dataset = self.temp_dataset(_make_dataset_id('patch_table')) + def test_update_table(self): + dataset = self.temp_dataset(_make_dataset_id('update_table')) TABLE_NAME = 'test_table' - full_name = bigquery.SchemaField('full_name', 'STRING', - mode='REQUIRED') - age = bigquery.SchemaField('age', 'INTEGER', mode='REQUIRED') - table_arg = Table(dataset.table(TABLE_NAME), schema=[full_name, age], + schema = [ + bigquery.SchemaField('full_name', 'STRING', mode='REQUIRED'), + bigquery.SchemaField('age', 'INTEGER', mode='REQUIRED') + ] + table_arg = Table(dataset.table(TABLE_NAME), schema=schema, client=Config.CLIENT) self.assertFalse(table_arg.exists()) table = retry_403(Config.CLIENT.create_table)(table_arg) @@ -265,18 +266,34 @@ def test_patch_table(self): self.assertTrue(table.exists()) self.assertIsNone(table.friendly_name) self.assertIsNone(table.description) - table.patch(friendly_name='Friendly', description='Description') - self.assertEqual(table.friendly_name, 'Friendly') - self.assertEqual(table.description, 'Description') + table.friendly_name = 'Friendly' + table.description = 'Description' - def test_update_table(self): + table2 = Config.CLIENT.update_table( + table, ['friendly_name', 'description']) + + self.assertEqual(table2.friendly_name, 'Friendly') + self.assertEqual(table2.description, 'Description') + + table2.description = None + table3 = Config.CLIENT.update_table(table2, ['description']) + self.assertIsNone(table3.description) + + # If we try to update using table2 again, it will fail because the + # previous update changed the ETag. + table2.description = 'no good' + with self.assertRaises(PreconditionFailed): + Config.CLIENT.update_table(table2, ['description']) + + def test_update_table_schema(self): dataset = self.temp_dataset(_make_dataset_id('update_table')) TABLE_NAME = 'test_table' - full_name = bigquery.SchemaField('full_name', 'STRING', - mode='REQUIRED') - age = bigquery.SchemaField('age', 'INTEGER', mode='REQUIRED') - table_arg = Table(dataset.table(TABLE_NAME), schema=[full_name, age], + schema = [ + bigquery.SchemaField('full_name', 'STRING', mode='REQUIRED'), + bigquery.SchemaField('age', 'INTEGER', mode='REQUIRED') + ] + table_arg = Table(dataset.table(TABLE_NAME), schema=schema, client=Config.CLIENT) self.assertFalse(table_arg.exists()) table = retry_403(Config.CLIENT.create_table)(table_arg) @@ -286,9 +303,11 @@ def test_update_table(self): schema = table.schema schema.append(voter) table.schema = schema - table.update() - self.assertEqual(len(table.schema), len(schema)) - for found, expected in zip(table.schema, schema): + + updated_table = Config.CLIENT.update_table(table, ['schema']) + + self.assertEqual(len(updated_table.schema), len(schema)) + for found, expected in zip(updated_table.schema, schema): self.assertEqual(found.name, expected.name) self.assertEqual(found.field_type, expected.field_type) self.assertEqual(found.mode, expected.mode) diff --git a/packages/google-cloud-bigquery/tests/unit/test_client.py b/packages/google-cloud-bigquery/tests/unit/test_client.py index cd0c3f6d71b0..aad64d980df1 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_client.py +++ b/packages/google-cloud-bigquery/tests/unit/test_client.py @@ -527,7 +527,8 @@ def test_create_table_w_schema_and_query(self): {'name': 'full_name', 'type': 'STRING', 'mode': 'REQUIRED'}, {'name': 'age', 'type': 'INTEGER', 'mode': 'REQUIRED'}] }, - 'view': {'query': query}, + # TODO(alixh) default to Standard SQL + 'view': {'query': query, 'useLegacySql': None}, } self.assertEqual(req['data'], sent) self.assertEqual(got.table_id, table_id) @@ -629,6 +630,292 @@ def test_update_dataset(self): req = conn._requested[1] self.assertEqual(req['headers']['If-Match'], 'etag') + def test_update_table(self): + from google.cloud.bigquery.table import Table, SchemaField + + project = 'PROJECT' + dataset_id = 'DATASET_ID' + table_id = 'table_id' + path = 'projects/%s/datasets/%s/tables/%s' % ( + project, dataset_id, table_id) + description = 'description' + title = 'title' + resource = { + 'id': '%s:%s:%s' % (project, dataset_id, table_id), + 'tableReference': { + 'projectId': project, + 'datasetId': dataset_id, + 'tableId': table_id + }, + 'schema': {'fields': [ + {'name': 'full_name', 'type': 'STRING', 'mode': 'REQUIRED'}, + {'name': 'age', 'type': 'INTEGER', 'mode': 'REQUIRED'}] + }, + 'etag': 'etag', + 'description': description, + 'friendlyName': title, + } + schema = [ + SchemaField('full_name', 'STRING', mode='REQUIRED'), + SchemaField('age', 'INTEGER', mode='REQUIRED') + ] + creds = _make_credentials() + client = self._make_one(project=project, credentials=creds) + conn = client._connection = _Connection(resource, resource) + table_ref = client.dataset(dataset_id).table(table_id) + table = Table(table_ref, schema=schema, client=client) + table.description = description + table.friendly_name = title + + updated_table = client.update_table( + table, ['schema', 'description', 'friendly_name']) + + sent = { + 'tableReference': { + 'projectId': project, + 'datasetId': dataset_id, + 'tableId': table_id + }, + 'schema': {'fields': [ + {'name': 'full_name', 'type': 'STRING', 'mode': 'REQUIRED'}, + {'name': 'age', 'type': 'INTEGER', 'mode': 'REQUIRED'}]}, + 'description': description, + 'friendlyName': title, + } + self.assertEqual(len(conn._requested), 1) + req = conn._requested[0] + self.assertEqual(req['method'], 'PATCH') + self.assertEqual(req['data'], sent) + self.assertEqual(req['path'], '/' + path) + self.assertIsNone(req['headers']) + self.assertEqual(updated_table.description, table.description) + self.assertEqual(updated_table.friendly_name, table.friendly_name) + self.assertEqual(updated_table.schema, table.schema) + + # ETag becomes If-Match header. + table._properties['etag'] = 'etag' + client.update_table(table, []) + req = conn._requested[1] + self.assertEqual(req['headers']['If-Match'], 'etag') + + def test_update_table_only_use_legacy_sql(self): + from google.cloud.bigquery.table import Table + + project = 'PROJECT' + dataset_id = 'DATASET_ID' + table_id = 'table_id' + path = 'projects/%s/datasets/%s/tables/%s' % ( + project, dataset_id, table_id) + resource = { + 'id': '%s:%s:%s' % (project, dataset_id, table_id), + 'tableReference': { + 'projectId': project, + 'datasetId': dataset_id, + 'tableId': table_id + }, + 'view': {'useLegacySql': True} + } + creds = _make_credentials() + client = self._make_one(project=project, credentials=creds) + conn = client._connection = _Connection(resource) + table_ref = client.dataset(dataset_id).table(table_id) + table = Table(table_ref, client=client) + table.view_use_legacy_sql = True + + updated_table = client.update_table(table, ['view_use_legacy_sql']) + + self.assertEqual(len(conn._requested), 1) + req = conn._requested[0] + self.assertEqual(req['method'], 'PATCH') + self.assertEqual(req['path'], '/%s' % path) + sent = { + 'tableReference': { + 'projectId': project, + 'datasetId': dataset_id, + 'tableId': table_id + }, + 'view': {'useLegacySql': True} + } + self.assertEqual(req['data'], sent) + self.assertEqual( + updated_table.view_use_legacy_sql, table.view_use_legacy_sql) + + def test_update_table_w_query(self): + import datetime + from google.cloud._helpers import UTC + from google.cloud._helpers import _millis + from google.cloud.bigquery.table import Table, SchemaField + + project = 'PROJECT' + dataset_id = 'DATASET_ID' + table_id = 'table_id' + path = 'projects/%s/datasets/%s/tables/%s' % ( + project, dataset_id, table_id) + query = 'select fullname, age from person_ages' + location = 'EU' + exp_time = datetime.datetime(2015, 8, 1, 23, 59, 59, tzinfo=UTC) + schema_resource = {'fields': [ + {'name': 'full_name', 'type': 'STRING', 'mode': 'REQUIRED'}, + {'name': 'age', 'type': 'INTEGER', 'mode': 'REQUIRED'}]} + schema = [ + SchemaField('full_name', 'STRING', mode='REQUIRED'), + SchemaField('age', 'INTEGER', mode='REQUIRED') + ] + resource = { + 'id': '%s:%s:%s' % (project, dataset_id, table_id), + 'tableReference': { + 'projectId': project, + 'datasetId': dataset_id, + 'tableId': table_id + }, + 'schema': schema_resource, + 'view': {'query': query, 'useLegacySql': True}, + 'location': location, + 'expirationTime': _millis(exp_time) + } + creds = _make_credentials() + client = self._make_one(project=project, credentials=creds) + conn = client._connection = _Connection(resource) + table_ref = client.dataset(dataset_id).table(table_id) + table = Table(table_ref, schema=schema, client=client) + table.location = location + table.expires = exp_time + table.view_query = query + table.view_use_legacy_sql = True + updated_properties = ['schema', 'view_query', 'location', + 'expires', 'view_use_legacy_sql'] + + updated_table = client.update_table(table, updated_properties) + + self.assertEqual(len(conn._requested), 1) + req = conn._requested[0] + self.assertEqual(req['method'], 'PATCH') + self.assertEqual(req['path'], '/%s' % path) + sent = { + 'tableReference': { + 'projectId': project, + 'datasetId': dataset_id, + 'tableId': table_id + }, + 'view': {'query': query, 'useLegacySql': True}, + 'location': location, + 'expirationTime': _millis(exp_time), + 'schema': schema_resource, + } + self.assertEqual(req['data'], sent) + self.assertEqual(updated_table.schema, table.schema) + self.assertEqual(updated_table.view_query, table.view_query) + self.assertEqual(updated_table.location, table.location) + self.assertEqual(updated_table.expires, table.expires) + self.assertEqual( + updated_table.view_use_legacy_sql, table.view_use_legacy_sql) + + def test_update_table_w_schema_None(self): + # Simulate deleting schema: not sure if back-end will actually + # allow this operation, but the spec says it is optional. + project = 'PROJECT' + dataset_id = 'DATASET_ID' + table_id = 'table_id' + path = 'projects/%s/datasets/%s/tables/%s' % ( + project, dataset_id, table_id) + resource1 = { + 'id': '%s:%s:%s' % (project, dataset_id, table_id), + 'tableReference': { + 'projectId': project, + 'datasetId': dataset_id, + 'tableId': table_id}, + 'schema': {'fields': [ + {'name': 'full_name', 'type': 'STRING', 'mode': 'REQUIRED'}, + {'name': 'age', 'type': 'INTEGER', 'mode': 'REQUIRED'}]} + } + resource2 = { + 'id': '%s:%s:%s' % (project, dataset_id, table_id), + 'tableReference': { + 'projectId': project, + 'datasetId': dataset_id, + 'tableId': table_id}, + 'schema': {'fields': []}, + } + creds = _make_credentials() + client = self._make_one(project=project, credentials=creds) + conn = client._connection = _Connection(resource1, resource2) + table_ref = client.dataset(dataset_id).table(table_id) + table = client.get_table(table_ref) + table.schema = None + + updated_table = client.update_table(table, ['schema']) + + self.assertEqual(len(conn._requested), 2) + req = conn._requested[1] + self.assertEqual(req['method'], 'PATCH') + sent = { + 'tableReference': { + 'projectId': project, + 'datasetId': dataset_id, + 'tableId': table_id + }, + 'schema': None + } + self.assertEqual(req['data'], sent) + self.assertEqual(req['path'], '/%s' % path) + self.assertEqual(updated_table.schema, table.schema) + + def test_update_table_delete_property(self): + from google.cloud.bigquery.table import Table + + project = 'PROJECT' + dataset_id = 'DATASET_ID' + table_id = 'table_id' + description = 'description' + title = 'title' + path = 'projects/%s/datasets/%s/tables/%s' % ( + project, dataset_id, table_id) + resource1 = { + 'id': '%s:%s:%s' % (project, dataset_id, table_id), + 'tableReference': { + 'projectId': project, + 'datasetId': dataset_id, + 'tableId': table_id + }, + 'description': description, + 'friendlyName': title, + } + resource2 = { + 'id': '%s:%s:%s' % (project, dataset_id, table_id), + 'tableReference': { + 'projectId': project, + 'datasetId': dataset_id, + 'tableId': table_id + }, + 'description': None, + } + creds = _make_credentials() + client = self._make_one(project=project, credentials=creds) + conn = client._connection = _Connection(resource1, resource2) + table_ref = client.dataset(dataset_id).table(table_id) + table = Table(table_ref, client=client) + table.description = description + table.friendly_name = title + table2 = client.update_table(table, ['description', 'friendly_name']) + self.assertEqual(table2.description, table.description) + table2.description = None + + table3 = client.update_table(table2, ['description']) + self.assertEqual(len(conn._requested), 2) + req = conn._requested[1] + self.assertEqual(req['method'], 'PATCH') + self.assertEqual(req['path'], '/%s' % path) + sent = { + 'tableReference': { + 'projectId': project, + 'datasetId': dataset_id, + 'tableId': table_id + }, + 'description': None, + } + self.assertEqual(req['data'], sent) + self.assertIsNone(table3.description) + def test_list_dataset_tables_empty(self): PROJECT = 'PROJECT' DS_ID = 'DATASET_ID' diff --git a/packages/google-cloud-bigquery/tests/unit/test_table.py b/packages/google-cloud-bigquery/tests/unit/test_table.py index fc0ff3370974..6e00bd73c9c6 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_table.py +++ b/packages/google-cloud-bigquery/tests/unit/test_table.py @@ -550,8 +550,17 @@ def test_from_api_repr_bare(self): self._verifyResourceProperties(table, RESOURCE) def test_from_api_repr_w_properties(self): + import datetime + from google.cloud._helpers import UTC + from google.cloud._helpers import _millis + client = _Client(self.PROJECT) RESOURCE = self._makeResource() + RESOURCE['view'] = {'query': 'select fullname, age from person_ages'} + RESOURCE['type'] = 'VIEW' + RESOURCE['location'] = 'EU' + self.EXP_TIME = datetime.datetime(2015, 8, 1, 23, 59, 59, tzinfo=UTC) + RESOURCE['expirationTime'] = _millis(self.EXP_TIME) klass = self._get_target_class() table = klass.from_api_repr(RESOURCE, client) self.assertIs(table._client, client) @@ -741,210 +750,6 @@ def test_exists_hit_w_alternate_client(self): self.assertEqual(req['path'], '/%s' % PATH) self.assertEqual(req['query_params'], {'fields': 'id'}) - def test_patch_w_invalid_expiration(self): - RESOURCE = self._makeResource() - conn = _Connection(RESOURCE) - client = _Client(project=self.PROJECT, connection=conn) - dataset = DatasetReference(self.PROJECT, self.DS_ID) - table_ref = dataset.table(self.TABLE_NAME) - table = self._make_one(table_ref, client=client) - - with self.assertRaises(ValueError): - table.patch(expires='BOGUS') - - def test_patch_w_bound_client(self): - PATH = 'projects/%s/datasets/%s/tables/%s' % ( - self.PROJECT, self.DS_ID, self.TABLE_NAME) - DESCRIPTION = 'DESCRIPTION' - TITLE = 'TITLE' - RESOURCE = self._makeResource() - RESOURCE['description'] = DESCRIPTION - RESOURCE['friendlyName'] = TITLE - conn = _Connection(RESOURCE) - client = _Client(project=self.PROJECT, connection=conn) - dataset = DatasetReference(self.PROJECT, self.DS_ID) - table_ref = dataset.table(self.TABLE_NAME) - table = self._make_one(table_ref, client=client) - - table.patch(description=DESCRIPTION, - friendly_name=TITLE, - view_query=None) - - self.assertEqual(len(conn._requested), 1) - req = conn._requested[0] - self.assertEqual(req['method'], 'PATCH') - SENT = { - 'description': DESCRIPTION, - 'friendlyName': TITLE, - 'view': None, - } - self.assertEqual(req['data'], SENT) - self.assertEqual(req['path'], '/%s' % PATH) - self._verifyResourceProperties(table, RESOURCE) - - def test_patch_w_alternate_client(self): - import datetime - from google.cloud._helpers import UTC - from google.cloud._helpers import _millis - from google.cloud.bigquery.table import SchemaField - - PATH = 'projects/%s/datasets/%s/tables/%s' % ( - self.PROJECT, self.DS_ID, self.TABLE_NAME) - QUERY = 'select fullname, age from person_ages' - LOCATION = 'EU' - RESOURCE = self._makeResource() - RESOURCE['view'] = {'query': QUERY} - RESOURCE['type'] = 'VIEW' - RESOURCE['location'] = LOCATION - self.EXP_TIME = datetime.datetime(2015, 8, 1, 23, 59, 59, - tzinfo=UTC) - RESOURCE['expirationTime'] = _millis(self.EXP_TIME) - conn1 = _Connection() - client1 = _Client(project=self.PROJECT, connection=conn1) - conn2 = _Connection(RESOURCE) - client2 = _Client(project=self.PROJECT, connection=conn2) - dataset = DatasetReference(self.PROJECT, self.DS_ID) - table_ref = dataset.table(self.TABLE_NAME) - table = self._make_one(table_ref, client=client1) - full_name = SchemaField('full_name', 'STRING', mode='REQUIRED') - age = SchemaField('age', 'INTEGER', mode='NULLABLE') - - table.patch(client=client2, view_query=QUERY, location=LOCATION, - expires=self.EXP_TIME, schema=[full_name, age]) - - self.assertEqual(len(conn1._requested), 0) - self.assertEqual(len(conn2._requested), 1) - req = conn2._requested[0] - self.assertEqual(req['method'], 'PATCH') - self.assertEqual(req['path'], '/%s' % PATH) - SENT = { - 'view': {'query': QUERY}, - 'location': LOCATION, - 'expirationTime': _millis(self.EXP_TIME), - 'schema': {'fields': [ - {'name': 'full_name', 'type': 'STRING', 'mode': 'REQUIRED'}, - {'name': 'age', 'type': 'INTEGER', 'mode': 'NULLABLE'}]}, - } - self.assertEqual(req['data'], SENT) - self._verifyResourceProperties(table, RESOURCE) - - def test_patch_w_schema_None(self): - # Simulate deleting schema: not sure if back-end will actually - # allow this operation, but the spec says it is optional. - PATH = 'projects/%s/datasets/%s/tables/%s' % ( - self.PROJECT, self.DS_ID, self.TABLE_NAME) - DESCRIPTION = 'DESCRIPTION' - TITLE = 'TITLE' - RESOURCE = self._makeResource() - RESOURCE['description'] = DESCRIPTION - RESOURCE['friendlyName'] = TITLE - conn = _Connection(RESOURCE) - client = _Client(project=self.PROJECT, connection=conn) - dataset = DatasetReference(self.PROJECT, self.DS_ID) - table_ref = dataset.table(self.TABLE_NAME) - table = self._make_one(table_ref, client=client) - - table.patch(schema=None) - - self.assertEqual(len(conn._requested), 1) - req = conn._requested[0] - self.assertEqual(req['method'], 'PATCH') - SENT = {'schema': None} - self.assertEqual(req['data'], SENT) - self.assertEqual(req['path'], '/%s' % PATH) - self._verifyResourceProperties(table, RESOURCE) - - def test_update_w_bound_client(self): - from google.cloud.bigquery.table import SchemaField - - PATH = 'projects/%s/datasets/%s/tables/%s' % ( - self.PROJECT, self.DS_ID, self.TABLE_NAME) - DESCRIPTION = 'DESCRIPTION' - TITLE = 'TITLE' - RESOURCE = self._makeResource() - RESOURCE['description'] = DESCRIPTION - RESOURCE['friendlyName'] = TITLE - conn = _Connection(RESOURCE) - client = _Client(project=self.PROJECT, connection=conn) - dataset = DatasetReference(self.PROJECT, self.DS_ID) - table_ref = dataset.table(self.TABLE_NAME) - full_name = SchemaField('full_name', 'STRING', mode='REQUIRED') - age = SchemaField('age', 'INTEGER', mode='REQUIRED') - table = self._make_one(table_ref, schema=[full_name, age], - client=client) - table.description = DESCRIPTION - table.friendly_name = TITLE - - table.update() - - self.assertEqual(len(conn._requested), 1) - req = conn._requested[0] - self.assertEqual(req['method'], 'PUT') - SENT = { - 'tableReference': - {'projectId': self.PROJECT, - 'datasetId': self.DS_ID, - 'tableId': self.TABLE_NAME}, - 'schema': {'fields': [ - {'name': 'full_name', 'type': 'STRING', 'mode': 'REQUIRED'}, - {'name': 'age', 'type': 'INTEGER', 'mode': 'REQUIRED'}]}, - 'description': DESCRIPTION, - 'friendlyName': TITLE, - } - self.assertEqual(req['data'], SENT) - self.assertEqual(req['path'], '/%s' % PATH) - self._verifyResourceProperties(table, RESOURCE) - - def test_update_w_alternate_client(self): - import datetime - from google.cloud._helpers import UTC - from google.cloud._helpers import _millis - - PATH = 'projects/%s/datasets/%s/tables/%s' % ( - self.PROJECT, self.DS_ID, self.TABLE_NAME) - DEF_TABLE_EXP = 12345 - LOCATION = 'EU' - QUERY = 'select fullname, age from person_ages' - RESOURCE = self._makeResource() - RESOURCE['defaultTableExpirationMs'] = 12345 - RESOURCE['location'] = LOCATION - self.EXP_TIME = datetime.datetime(2015, 8, 1, 23, 59, 59, - tzinfo=UTC) - RESOURCE['expirationTime'] = _millis(self.EXP_TIME) - RESOURCE['view'] = {'query': QUERY, 'useLegacySql': True} - RESOURCE['type'] = 'VIEW' - conn1 = _Connection() - client1 = _Client(project=self.PROJECT, connection=conn1) - conn2 = _Connection(RESOURCE) - client2 = _Client(project=self.PROJECT, connection=conn2) - dataset = DatasetReference(self.PROJECT, self.DS_ID) - table_ref = dataset.table(self.TABLE_NAME) - table = self._make_one(table_ref, client=client1) - table.default_table_expiration_ms = DEF_TABLE_EXP - table.location = LOCATION - table.expires = self.EXP_TIME - table.view_query = QUERY - table.view_use_legacy_sql = True - - table.update(client=client2) - - self.assertEqual(len(conn1._requested), 0) - self.assertEqual(len(conn2._requested), 1) - req = conn2._requested[0] - self.assertEqual(req['method'], 'PUT') - self.assertEqual(req['path'], '/%s' % PATH) - SENT = { - 'tableReference': - {'projectId': self.PROJECT, - 'datasetId': self.DS_ID, - 'tableId': self.TABLE_NAME}, - 'expirationTime': _millis(self.EXP_TIME), - 'location': 'EU', - 'view': {'query': QUERY, 'useLegacySql': True}, - } - self.assertEqual(req['data'], SENT) - self._verifyResourceProperties(table, RESOURCE) - def test_row_from_mapping_wo_schema(self): from google.cloud.bigquery.table import _TABLE_HAS_NO_SCHEMA MAPPING = {'full_name': 'Phred Phlyntstone', 'age': 32} @@ -1224,6 +1029,21 @@ def _row_data(row): self.assertEqual(req['path'], '/%s' % PATH) self.assertEqual(req['data'], SENT) + def test__populate_view_use_legacy_sql_resource_w_existing_view(self): + query = 'select * from foo' + resource = {'view': {'query': query}} + client = mock.Mock(spec=[u'_credentials', '_http']) + client._http = mock.sentinel.http + dataset = DatasetReference(self.PROJECT, self.DS_ID) + table = self._make_one(dataset.table(self.TABLE_NAME), client=client) + table.view_use_legacy_sql = True + + table._populate_view_use_legacy_sql_resource(resource) + + self.assertEqual( + resource['view']['useLegacySql'], table.view_use_legacy_sql) + self.assertEqual(resource['view']['query'], query) + def test__get_transport(self): client = mock.Mock(spec=[u'_credentials', '_http']) client._http = mock.sentinel.http From 370b6916558bd8efcef206bbb4c577a622101486 Mon Sep 17 00:00:00 2001 From: Jonathan Amsterdam Date: Mon, 9 Oct 2017 14:38:16 -0400 Subject: [PATCH 0300/2016] bigquery: add config getters to LoadJob (#4137) --- .../google/cloud/bigquery/job.py | 98 ++++++++++++++++++- .../tests/unit/test_job.py | 86 ++++++++-------- 2 files changed, 135 insertions(+), 49 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/job.py b/packages/google-cloud-bigquery/google/cloud/bigquery/job.py index fd427c647a55..0ec20c8c291c 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/job.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/job.py @@ -714,12 +714,102 @@ def __init__(self, job_id, source_uris, destination, client, self._configuration = job_config @property - def configuration(self): - """Configuration for this job. + def allow_jagged_rows(self): + """See + :class:`~google.cloud.bigquery.job.LoadJobConfig.allow_jagged_rows`. + """ + return self._configuration.allow_jagged_rows + + @property + def allow_quoted_newlines(self): + """See + :class:`~google.cloud.bigquery.job.LoadJobConfig.allow_quoted_newlines`. + """ + return self._configuration.allow_quoted_newlines + + @property + def autodetect(self): + """See + :class:`~google.cloud.bigquery.job.LoadJobConfig.autodetect`. + """ + return self._configuration.autodetect + + @property + def create_disposition(self): + """See + :class:`~google.cloud.bigquery.job.LoadJobConfig.create_disposition`. + """ + return self._configuration.create_disposition + + @property + def encoding(self): + """See + :class:`~google.cloud.bigquery.job.LoadJobConfig.encoding`. + """ + return self._configuration.encoding + + @property + def field_delimiter(self): + """See + :class:`~google.cloud.bigquery.job.LoadJobConfig.field_delimiter`. + """ + return self._configuration.field_delimiter + + @property + def ignore_unknown_values(self): + """See + :class:`~google.cloud.bigquery.job.LoadJobConfig.ignore_unknown_values`. + """ + return self._configuration.ignore_unknown_values - :rtype: :class:`~google.cloud.bigquery.job.LoadJobConfig` + @property + def max_bad_records(self): + """See + :class:`~google.cloud.bigquery.job.LoadJobConfig.max_bad_records`. + """ + return self._configuration.max_bad_records + + @property + def null_marker(self): + """See + :class:`~google.cloud.bigquery.job.LoadJobConfig.null_marker`. + """ + return self._configuration.null_marker + + @property + def quote_character(self): + """See + :class:`~google.cloud.bigquery.job.LoadJobConfig.quote_character`. + """ + return self._configuration.quote_character + + @property + def skip_leading_rows(self): + """See + :class:`~google.cloud.bigquery.job.LoadJobConfig.skip_leading_rows`. + """ + return self._configuration.skip_leading_rows + + @property + def source_format(self): + """See + :class:`~google.cloud.bigquery.job.LoadJobConfig.source_format`. + """ + return self._configuration.source_format + + @property + def write_disposition(self): + """See + :class:`~google.cloud.bigquery.job.LoadJobConfig.write_disposition`. + """ + return self._configuration.write_disposition + + @property + def schema(self): + """See + :class:`~google.cloud.bigquery.job.LoadJobConfig.schema`. """ - return self._configuration + return self._configuration.schema @property def input_file_bytes(self): diff --git a/packages/google-cloud-bigquery/tests/unit/test_job.py b/packages/google-cloud-bigquery/tests/unit/test_job.py index ebe9ed49ddda..a49ea1b6fab7 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_job.py +++ b/packages/google-cloud-bigquery/tests/unit/test_job.py @@ -234,53 +234,50 @@ def _makeResource(self, started=False, ended=False): return resource def _verifyBooleanConfigProperties(self, job, config): - jconfig = job.configuration if 'allowJaggedRows' in config: - self.assertEqual(jconfig.allow_jagged_rows, + self.assertEqual(job.allow_jagged_rows, config['allowJaggedRows']) else: - self.assertIsNone(jconfig.allow_jagged_rows) + self.assertIsNone(job.allow_jagged_rows) if 'allowQuotedNewlines' in config: - self.assertEqual(jconfig.allow_quoted_newlines, + self.assertEqual(job.allow_quoted_newlines, config['allowQuotedNewlines']) else: - self.assertIsNone(jconfig.allow_quoted_newlines) + self.assertIsNone(job.allow_quoted_newlines) if 'autodetect' in config: self.assertEqual( - jconfig.autodetect, config['autodetect']) + job.autodetect, config['autodetect']) else: - self.assertIsNone(jconfig.autodetect) + self.assertIsNone(job.autodetect) if 'ignoreUnknownValues' in config: - self.assertEqual(jconfig.ignore_unknown_values, + self.assertEqual(job.ignore_unknown_values, config['ignoreUnknownValues']) else: - self.assertIsNone(jconfig.ignore_unknown_values) + self.assertIsNone(job.ignore_unknown_values) def _verifyEnumConfigProperties(self, job, config): - jconfig = job.configuration if 'createDisposition' in config: - self.assertEqual(jconfig.create_disposition, + self.assertEqual(job.create_disposition, config['createDisposition']) else: - self.assertIsNone(jconfig.create_disposition) + self.assertIsNone(job.create_disposition) if 'encoding' in config: - self.assertEqual(jconfig.encoding, + self.assertEqual(job.encoding, config['encoding']) else: - self.assertIsNone(jconfig.encoding) + self.assertIsNone(job.encoding) if 'sourceFormat' in config: - self.assertEqual(jconfig.source_format, + self.assertEqual(job.source_format, config['sourceFormat']) else: - self.assertIsNone(jconfig.source_format) + self.assertIsNone(job.source_format) if 'writeDisposition' in config: - self.assertEqual(jconfig.write_disposition, + self.assertEqual(job.write_disposition, config['writeDisposition']) else: - self.assertIsNone(jconfig.write_disposition) + self.assertIsNone(job.write_disposition) def _verifyResourceProperties(self, job, resource): - jconfig = job.configuration self._verifyReadonlyResourceProperties(job, resource) config = resource.get('configuration', {}).get('load') @@ -296,30 +293,30 @@ def _verifyResourceProperties(self, job, resource): self.assertEqual(job.destination.table_id, table_ref['tableId']) if 'fieldDelimiter' in config: - self.assertEqual(jconfig.field_delimiter, + self.assertEqual(job.field_delimiter, config['fieldDelimiter']) else: - self.assertIsNone(jconfig.field_delimiter) + self.assertIsNone(job.field_delimiter) if 'maxBadRecords' in config: - self.assertEqual(jconfig.max_bad_records, + self.assertEqual(job.max_bad_records, config['maxBadRecords']) else: - self.assertIsNone(jconfig.max_bad_records) + self.assertIsNone(job.max_bad_records) if 'nullMarker' in config: - self.assertEqual(jconfig.null_marker, + self.assertEqual(job.null_marker, config['nullMarker']) else: - self.assertIsNone(jconfig.null_marker) + self.assertIsNone(job.null_marker) if 'quote' in config: - self.assertEqual(jconfig.quote_character, + self.assertEqual(job.quote_character, config['quote']) else: - self.assertIsNone(jconfig.quote_character) + self.assertIsNone(job.quote_character) if 'skipLeadingRows' in config: - self.assertEqual(str(jconfig.skip_leading_rows), + self.assertEqual(str(job.skip_leading_rows), config['skipLeadingRows']) else: - self.assertIsNone(jconfig.skip_leading_rows) + self.assertIsNone(job.skip_leading_rows) def test_ctor(self): client = _Client(self.PROJECT) @@ -332,7 +329,7 @@ def test_ctor(self): self.assertEqual( job.path, '/projects/%s/jobs/%s' % (self.PROJECT, self.JOB_NAME)) - self.assertEqual(job.configuration.schema, []) + self.assertEqual(job.schema, []) self._verifyInitialReadonlyProperties(job) @@ -343,20 +340,19 @@ def test_ctor(self): self.assertIsNone(job.output_rows) # set/read from resource['configuration']['load'] - jconfig = job.configuration - self.assertIsNone(jconfig.allow_jagged_rows) - self.assertIsNone(jconfig.allow_quoted_newlines) - self.assertIsNone(jconfig.autodetect) - self.assertIsNone(jconfig.create_disposition) - self.assertIsNone(jconfig.encoding) - self.assertIsNone(jconfig.field_delimiter) - self.assertIsNone(jconfig.ignore_unknown_values) - self.assertIsNone(jconfig.max_bad_records) - self.assertIsNone(jconfig.null_marker) - self.assertIsNone(jconfig.quote_character) - self.assertIsNone(jconfig.skip_leading_rows) - self.assertIsNone(jconfig.source_format) - self.assertIsNone(jconfig.write_disposition) + self.assertIsNone(job.allow_jagged_rows) + self.assertIsNone(job.allow_quoted_newlines) + self.assertIsNone(job.autodetect) + self.assertIsNone(job.create_disposition) + self.assertIsNone(job.encoding) + self.assertIsNone(job.field_delimiter) + self.assertIsNone(job.ignore_unknown_values) + self.assertIsNone(job.max_bad_records) + self.assertIsNone(job.null_marker) + self.assertIsNone(job.quote_character) + self.assertIsNone(job.skip_leading_rows) + self.assertIsNone(job.source_format) + self.assertIsNone(job.write_disposition) def test_ctor_w_config(self): from google.cloud.bigquery.schema import SchemaField @@ -368,7 +364,7 @@ def test_ctor_w_config(self): config.schema = [full_name, age] job = self._make_one(self.JOB_NAME, [self.SOURCE1], self.TABLE_REF, client, config) - self.assertEqual(job.configuration.schema, [full_name, age]) + self.assertEqual(job.schema, [full_name, age]) def test_done(self): client = _Client(self.PROJECT) From 1f4c9e1169a5242ed9b74abf96b45ff76f6653eb Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Mon, 9 Oct 2017 14:02:39 -0700 Subject: [PATCH 0301/2016] BQ: remove client.run_async_query, add client.query (#4130) - Add a QueryJobConfig class. - Move configuration properties from QueryJob to QueryJobConfig. - Make standard SQL dialect the default. - Make query_rows use new query method --- .../google/cloud/bigquery/__init__.py | 6 + .../google/cloud/bigquery/_helpers.py | 93 ++-- .../google/cloud/bigquery/client.py | 51 +-- .../google/cloud/bigquery/dbapi/cursor.py | 14 +- .../google/cloud/bigquery/job.py | 428 ++++++++++-------- .../google-cloud-bigquery/tests/system.py | 35 +- .../tests/unit/test__helpers.py | 142 ++---- .../tests/unit/test_client.py | 123 ++++- .../tests/unit/test_dbapi_cursor.py | 8 +- .../tests/unit/test_job.py | 389 ++++++++++------ 10 files changed, 728 insertions(+), 561 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/__init__.py b/packages/google-cloud-bigquery/google/cloud/bigquery/__init__.py index 3a7cc2be7a69..d6c210e9843b 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/__init__.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/__init__.py @@ -29,15 +29,18 @@ from google.cloud.bigquery._helpers import ArrayQueryParameter from google.cloud.bigquery._helpers import ScalarQueryParameter from google.cloud.bigquery._helpers import StructQueryParameter +from google.cloud.bigquery._helpers import UDFResource from google.cloud.bigquery.client import Client from google.cloud.bigquery.dataset import AccessEntry from google.cloud.bigquery.dataset import Dataset +from google.cloud.bigquery.dataset import DatasetReference from google.cloud.bigquery.job import CopyJobConfig from google.cloud.bigquery.job import ExtractJobConfig from google.cloud.bigquery.job import QueryJobConfig from google.cloud.bigquery.job import LoadJobConfig from google.cloud.bigquery.schema import SchemaField from google.cloud.bigquery.table import Table +from google.cloud.bigquery.table import TableReference __all__ = [ '__version__', @@ -45,6 +48,7 @@ 'ArrayQueryParameter', 'Client', 'Dataset', + 'DatasetReference', 'CopyJobConfig', 'ExtractJobConfig', 'QueryJobConfig', @@ -53,4 +57,6 @@ 'SchemaField', 'StructQueryParameter', 'Table', + 'TableReference', + 'UDFResource', ] diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py b/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py index 51000148fb0b..dad87fde88bb 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py @@ -365,6 +365,44 @@ def _validate(self, value): raise ValueError('Required type: %s' % (self.property_type,)) +class _ListApiResourceProperty(_ApiResourceProperty): + """Property implementation: validates based on value type. + + :type name: str + :param name: name of the property + + :type resource_name: str + :param resource_name: name of the property in the resource dictionary + + :type property_type: type or sequence of types + :param property_type: type to be validated + """ + def __init__(self, name, resource_name, property_type): + super(_ListApiResourceProperty, self).__init__( + name, resource_name) + self.property_type = property_type + + def __get__(self, instance, owner): + """Descriptor protocol: accessor""" + if instance is None: + return self + return instance._properties.get(self.resource_name, []) + + def _validate(self, value): + """Ensure that 'value' is of the appropriate type. + + :raises: ValueError on a type mismatch. + """ + if value is None: + raise ValueError(( + 'Required type: list of {}. ' + 'To unset, use del or set to empty list').format( + self.property_type,)) + if not all(isinstance(item, self.property_type) for item in value): + raise ValueError( + 'Required type: list of %s' % (self.property_type,)) + + class _EnumApiResourceProperty(_ApiResourceProperty): """Pseudo-enumeration class. @@ -469,22 +507,6 @@ def __ne__(self, other): return not self == other -class UDFResourcesProperty(object): - """Custom property type, holding :class:`UDFResource` instances.""" - - def __get__(self, instance, owner): - """Descriptor protocol: accessor""" - if instance is None: - return self - return list(instance._udf_resources) - - def __set__(self, instance, value): - """Descriptor protocol: mutator""" - if not all(isinstance(u, UDFResource) for u in value): - raise ValueError("udf items must be UDFResource") - instance._udf_resources = tuple(value) - - class AbstractQueryParameter(object): """Base class for named / positional query parameters. """ @@ -898,45 +920,6 @@ def _query_param_from_api_repr(resource): return klass.from_api_repr(resource) -class QueryParametersProperty(object): - """Custom property type, holding query parameter instances.""" - - def __get__(self, instance, owner): - """Descriptor protocol: accessor - - :type instance: :class:`QueryParametersProperty` - :param instance: instance owning the property (None if accessed via - the class). - - :type owner: type - :param owner: the class owning the property. - - :rtype: list of instances of classes derived from - :class:`AbstractQueryParameter`. - :returns: the descriptor, if accessed via the class, or the instance's - query parameters. - """ - if instance is None: - return self - return list(instance._query_parameters) - - def __set__(self, instance, value): - """Descriptor protocol: mutator - - :type instance: :class:`QueryParametersProperty` - :param instance: instance owning the property (None if accessed via - the class). - - :type value: list of instances of classes derived from - :class:`AbstractQueryParameter`. - :param value: new query parameters for the instance. - """ - if not all(isinstance(u, AbstractQueryParameter) for u in value): - raise ValueError( - "query parameters must be derived from AbstractQueryParameter") - instance._query_parameters = tuple(value) - - def _item_to_row(iterator, resource): """Convert a JSON row to the native object. diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py index 50db75f94560..f460202a3631 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py @@ -32,7 +32,6 @@ from google.cloud.bigquery.job import ExtractJob from google.cloud.bigquery.job import LoadJob from google.cloud.bigquery.job import QueryJob -from google.cloud.bigquery.job import QueryJobConfig from google.cloud.bigquery.query import QueryResults from google.cloud.bigquery._helpers import _item_to_row from google.cloud.bigquery._helpers import _rows_page_start @@ -622,36 +621,30 @@ def extract_table(self, source, *destination_uris, **kwargs): job.begin() return job - def run_async_query(self, job_id, query, - udf_resources=(), query_parameters=()): - """Construct a job for running a SQL query asynchronously. + def query(self, query, job_config=None, job_id=None): + """Start a job that runs a SQL query. See https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query - :type job_id: str - :param job_id: Name of the job. - :type query: str - :param query: SQL query to be executed + :param query: + SQL query to be executed. Defaults to the standard SQL dialect. + Use the ``job_config`` parameter to change dialects. - :type udf_resources: tuple - :param udf_resources: An iterable of - :class:`google.cloud.bigquery._helpers.UDFResource` - (empty by default) + :type job_config: :class:`google.cloud.bigquery.job.QueryJobConfig` + :param job_config: (Optional) Extra configuration options for the job. - :type query_parameters: tuple - :param query_parameters: - An iterable of - :class:`google.cloud.bigquery._helpers.AbstractQueryParameter` - (empty by default) + :type job_id: str + :param job_id: (Optional) ID to use for the query job. :rtype: :class:`google.cloud.bigquery.job.QueryJob` :returns: a new ``QueryJob`` instance """ - return QueryJob(job_id, query, client=self, - udf_resources=udf_resources, - query_parameters=query_parameters) + job_id = _make_job_id(job_id) + job = QueryJob(job_id, query, client=self, job_config=job_config) + job.begin() + return job def query_rows(self, query, job_config=None, job_id=None, timeout=None): """Start a query job and wait for the results. @@ -660,7 +653,12 @@ def query_rows(self, query, job_config=None, job_id=None, timeout=None): https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query :type query: str - :param query: SQL query to be executed + :param query: + SQL query to be executed. Defaults to the standard SQL dialect. + Use the ``job_config`` parameter to change dialects. + + :type job_config: :class:`google.cloud.bigquery.job.QueryJobConfig` + :param job_config: (Optional) Extra configuration options for the job. :type job_id: str :param job_id: (Optional) ID to use for the query job. @@ -682,16 +680,7 @@ def query_rows(self, query, job_config=None, job_id=None, timeout=None): failed or :class:`TimeoutError` if the job did not complete in the given timeout. """ - job_id = _make_job_id(job_id) - - # TODO(swast): move standard SQL default to QueryJobConfig class. - if job_config is None: - job_config = QueryJobConfig() - if job_config.use_legacy_sql is None: - job_config.use_legacy_sql = False - - job = QueryJob(job_id, query, client=self, job_config=job_config) - job.begin() + job = self.query(query, job_config=job_config, job_id=job_id) return job.result(timeout=timeout) def list_rows(self, table, selected_fields=None, max_results=None, diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/dbapi/cursor.py b/packages/google-cloud-bigquery/google/cloud/bigquery/dbapi/cursor.py index c1683c16db79..b5a05de6d90b 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/dbapi/cursor.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/dbapi/cursor.py @@ -15,10 +15,10 @@ """Cursor for the Google BigQuery DB-API.""" import collections -import uuid import six +from google.cloud.bigquery import job from google.cloud.bigquery.dbapi import _helpers from google.cloud.bigquery.dbapi import exceptions import google.cloud.exceptions @@ -135,8 +135,6 @@ def execute(self, operation, parameters=None, job_id=None): self._query_data = None self._query_results = None client = self.connection._client - if job_id is None: - job_id = str(uuid.uuid4()) # The DB-API uses the pyformat formatting, since the way BigQuery does # query parameters was not one of the standard options. Convert both @@ -146,11 +144,11 @@ def execute(self, operation, parameters=None, job_id=None): operation, parameters=parameters) query_parameters = _helpers.to_query_parameters(parameters) - query_job = client.run_async_query( - job_id, - formatted_operation, - query_parameters=query_parameters) - query_job.use_legacy_sql = False + config = job.QueryJobConfig() + config.query_parameters = query_parameters + config.use_legacy_sql = False + query_job = client.query( + formatted_operation, job_config=config, job_id=job_id) # Wait for the query to finish. try: diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/job.py b/packages/google-cloud-bigquery/google/cloud/bigquery/job.py index 0ec20c8c291c..af3b1997f177 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/job.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/job.py @@ -29,17 +29,15 @@ from google.cloud.bigquery.table import TableReference from google.cloud.bigquery.table import _build_schema_resource from google.cloud.bigquery.table import _parse_schema_resource +from google.cloud.bigquery._helpers import AbstractQueryParameter from google.cloud.bigquery._helpers import ArrayQueryParameter -from google.cloud.bigquery._helpers import QueryParametersProperty from google.cloud.bigquery._helpers import ScalarQueryParameter from google.cloud.bigquery._helpers import StructQueryParameter from google.cloud.bigquery._helpers import UDFResource -from google.cloud.bigquery._helpers import UDFResourcesProperty from google.cloud.bigquery._helpers import _EnumApiResourceProperty -from google.cloud.bigquery._helpers import _EnumProperty +from google.cloud.bigquery._helpers import _ListApiResourceProperty from google.cloud.bigquery._helpers import _query_param_from_api_repr from google.cloud.bigquery._helpers import _TypedApiResourceProperty -from google.cloud.bigquery._helpers import _TypedProperty _DONE_STATE = 'DONE' _STOPPED_REASON = 'stopped' @@ -129,7 +127,7 @@ class Encoding(_EnumApiResourceProperty): ISO_8559_1 = 'ISO-8559-1' -class QueryPriority(_EnumProperty): +class QueryPriority(_EnumApiResourceProperty): """Pseudo-enum for ``QueryJob.priority`` property.""" INTERACTIVE = 'INTERACTIVE' BATCH = 'BATCH' @@ -1271,6 +1269,35 @@ def from_api_repr(cls, resource, client): return job +def _from_api_repr_query_parameters(resource): + return [ + _query_param_from_api_repr(mapping) + for mapping in resource + ] + + +def _to_api_repr_query_parameters(value): + return [ + query_parameter.to_api_repr() + for query_parameter in value + ] + + +def _from_api_repr_udf_resources(resource): + udf_resources = [] + for udf_mapping in resource: + for udf_type, udf_value in udf_mapping.items(): + udf_resources.append(UDFResource(udf_type, udf_value)) + return udf_resources + + +def _to_api_repr_udf_resources(value): + return [ + {udf_resource.udf_type: udf_resource.value} + for udf_resource in value + ] + + class QueryJobConfig(object): """Configuration options for query jobs. @@ -1278,6 +1305,9 @@ class QueryJobConfig(object): server defaults. """ + _QUERY_PARAMETERS_KEY = 'queryParameters' + _UDF_RESOURCES_KEY = 'userDefinedFunctionResources' + def __init__(self): self._properties = {} @@ -1287,7 +1317,24 @@ def to_api_repr(self): :rtype: dict :returns: A dictionary in the format used by the BigQuery API. """ - return copy.deepcopy(self._properties) + resource = copy.deepcopy(self._properties) + + # Query parameters have an addition property associated with them + # to indicate if the query is using named or positional parameters. + query_parameters = resource.get(self._QUERY_PARAMETERS_KEY) + if query_parameters: + if query_parameters[0].name is None: + resource['parameterMode'] = 'POSITIONAL' + else: + resource['parameterMode'] = 'NAMED' + + for prop, convert in self._NESTED_PROPERTIES.items(): + _, to_resource = convert + nested_resource = resource.get(prop) + if nested_resource is not None: + resource[prop] = to_resource(nested_resource) + + return resource @classmethod def from_api_repr(cls, resource): @@ -1303,13 +1350,37 @@ def from_api_repr(cls, resource): """ config = cls() config._properties = copy.deepcopy(resource) + + for prop, convert in cls._NESTED_PROPERTIES.items(): + from_resource, _ = convert + nested_resource = resource.get(prop) + if nested_resource is not None: + config._properties[prop] = from_resource(nested_resource) + return config - use_legacy_sql = _TypedApiResourceProperty( - 'use_legacy_sql', 'useLegacySql', bool) + allow_large_results = _TypedApiResourceProperty( + 'allow_large_results', 'allowLargeResults', bool) + """See + https://g.co/cloud/bigquery/docs/reference/rest/v2/jobs#configuration.query.allowLargeResults + """ + + create_disposition = CreateDisposition( + 'create_disposition', 'createDisposition') + """See + https://g.co/cloud/bigquery/docs/reference/rest/v2/jobs#configuration.query.createDisposition + """ + + default_dataset = _TypedApiResourceProperty( + 'default_dataset', 'defaultDataset', DatasetReference) + """See + https://g.co/cloud/bigquery/docs/reference/v2/jobs#configuration.query.defaultDataset + """ + + destination = _TypedApiResourceProperty( + 'destination', 'destinationTable', TableReference) """See - https://cloud.google.com/bigquery/docs/\ - reference/v2/jobs#configuration.query.useLegacySql + https://g.co/cloud/bigquery/docs/reference/rest/v2/jobs#configuration.query.destinationTable """ dry_run = _TypedApiResourceProperty('dry_run', 'dryRun', bool) @@ -1317,18 +1388,83 @@ def from_api_repr(cls, resource): https://g.co/cloud/bigquery/docs/reference/v2/jobs#configuration.dryRun """ - _allow_large_results = None - _create_disposition = None - _default_dataset = None - _destination = None - _flatten_results = None - _priority = None - _use_query_cache = None - _use_legacy_sql = None - _write_disposition = None + flatten_results = _TypedApiResourceProperty( + 'flatten_results', 'flattenResults', bool) + """See + https://g.co/cloud/bigquery/docs/reference/rest/v2/jobs#configuration.query.flattenResults + """ + + maximum_billing_tier = _TypedApiResourceProperty( + 'maximum_billing_tier', 'maximumBillingTier', int) + """See + https://g.co/cloud/bigquery/docs/reference/rest/v2/jobs#configuration.query.maximumBillingTier + """ + + maximum_bytes_billed = _TypedApiResourceProperty( + 'maximum_bytes_billed', 'maximumBytesBilled', int) + """See + https://g.co/cloud/bigquery/docs/reference/rest/v2/jobs#configuration.query.maximumBytesBilled + """ + + priority = QueryPriority('priority', 'priority') + """See + https://g.co/cloud/bigquery/docs/reference/rest/v2/jobs#configuration.query.priority + """ + + query_parameters = _ListApiResourceProperty( + 'query_parameters', _QUERY_PARAMETERS_KEY, AbstractQueryParameter) + """ + An list of + :class:`google.cloud.bigquery._helpers.AbstractQueryParameter` + (empty by default) + + See: + https://g.co/cloud/bigquery/docs/reference/rest/v2/jobs#configuration.query.queryParameters + """ + + udf_resources = _ListApiResourceProperty( + 'udf_resources', _UDF_RESOURCES_KEY, UDFResource) + """ + A list of :class:`google.cloud.bigquery._helpers.UDFResource` (empty + by default) + + See: + https://g.co/cloud/bigquery/docs/reference/rest/v2/jobs#configuration.query.userDefinedFunctionResources + """ + + use_legacy_sql = _TypedApiResourceProperty( + 'use_legacy_sql', 'useLegacySql', bool) + """See + https://g.co/cloud/bigquery/docs/reference/v2/jobs#configuration.query.useLegacySql + """ + + use_query_cache = _TypedApiResourceProperty( + 'use_query_cache', 'useQueryCache', bool) + """See + https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query.useQueryCache + """ + + write_disposition = WriteDisposition( + 'write_disposition', 'writeDisposition') + """See + https://g.co/cloud/bigquery/docs/reference/rest/v2/jobs#configuration.query.writeDisposition + """ + _maximum_billing_tier = None _maximum_bytes_billed = None + _NESTED_PROPERTIES = { + 'defaultDataset': ( + DatasetReference.from_api_repr, DatasetReference.to_api_repr), + 'destinationTable': ( + TableReference.from_api_repr, TableReference.to_api_repr), + 'maximumBytesBilled': (int, str), + _QUERY_PARAMETERS_KEY: ( + _from_api_repr_query_parameters, _to_api_repr_query_parameters), + _UDF_RESOURCES_KEY: ( + _from_api_repr_udf_resources, _to_api_repr_udf_resources), + } + class QueryJob(_AsyncJob): """Asynchronous job: query tables. @@ -1343,53 +1479,52 @@ class QueryJob(_AsyncJob): :param client: A client which holds credentials and project configuration for the dataset (which requires a project). - :type udf_resources: tuple - :param udf_resources: An iterable of - :class:`google.cloud.bigquery._helpers.UDFResource` - (empty by default) - - :type query_parameters: tuple - :param query_parameters: - An iterable of - :class:`google.cloud.bigquery._helpers.AbstractQueryParameter` - (empty by default) - :type job_config: :class:`~google.cloud.bigquery.job.QueryJobConfig` :param job_config: (Optional) Extra configuration options for the query job. """ _JOB_TYPE = 'query' _UDF_KEY = 'userDefinedFunctionResources' - _QUERY_PARAMETERS_KEY = 'queryParameters' - def __init__(self, job_id, query, client, - udf_resources=(), query_parameters=(), job_config=None): + def __init__(self, job_id, query, client, job_config=None): super(QueryJob, self).__init__(job_id, client) if job_config is None: job_config = QueryJobConfig() + if job_config.use_legacy_sql is None: + job_config.use_legacy_sql = False self.query = query - self.udf_resources = udf_resources - self.query_parameters = query_parameters self._configuration = job_config self._query_results = None @property - def use_legacy_sql(self): + def allow_large_results(self): """See - :class:`~google.cloud.bigquery.job.QueryJobConfig.use_legacy_sql`. + :class:`~google.cloud.bigquery.job.QueryJobConfig.allow_large_results`. """ - return self._configuration.use_legacy_sql + return self._configuration.allow_large_results - @use_legacy_sql.setter - def use_legacy_sql(self, value): + @property + def create_disposition(self): """See - :class:`~google.cloud.bigquery.job.QueryJobConfig.use_legacy_sql`. + :class:`~google.cloud.bigquery.job.QueryJobConfig.create_disposition`. """ - # TODO(swast): remove this method and only allow setting use_legacy_sql - # on QueryJobConfig objects. - self._configuration.use_legacy_sql = value + return self._configuration.create_disposition + + @property + def default_dataset(self): + """See + :class:`~google.cloud.bigquery.job.QueryJobConfig.default_dataset`. + """ + return self._configuration.default_dataset + + @property + def destination(self): + """See + :class:`~google.cloud.bigquery.job.QueryJobConfig.destination`. + """ + return self._configuration.destination @property def dry_run(self): @@ -1398,130 +1533,68 @@ def dry_run(self): """ return self._configuration.dry_run - @dry_run.setter - def dry_run(self, value): + @property + def flatten_results(self): """See - :class:`~google.cloud.bigquery.job.QueryJobConfig.dry_run`. + :class:`~google.cloud.bigquery.job.QueryJobConfig.flatten_results`. """ - # TODO(swast): remove this method and only allow setting dry_run - # on QueryJobConfig objects. - self._configuration.dry_run = value - - allow_large_results = _TypedProperty('allow_large_results', bool) - """See - https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query.allowLargeResults - """ - - create_disposition = CreateDisposition('create_disposition', - 'createDisposition') - """See - https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query.createDisposition - """ - - default_dataset = _TypedProperty('default_dataset', DatasetReference) - """See - https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query.defaultDataset - """ - - destination = _TypedProperty('destination', TableReference) - """See - https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query.destinationTable - """ - - flatten_results = _TypedProperty('flatten_results', bool) - """See - https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query.flattenResults - """ + return self._configuration.flatten_results - priority = QueryPriority('priority') - """See - https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query.priority - """ + @property + def priority(self): + """See + :class:`~google.cloud.bigquery.job.QueryJobConfig.priority`. + """ + return self._configuration.priority - query_parameters = QueryParametersProperty() + @property + def query_parameters(self): + """See + :class:`~google.cloud.bigquery.job.QueryJobConfig.query_parameters`. + """ + return self._configuration.query_parameters - udf_resources = UDFResourcesProperty() + @property + def udf_resources(self): + """See + :class:`~google.cloud.bigquery.job.QueryJobConfig.udf_resources`. + """ + return self._configuration.udf_resources - use_query_cache = _TypedProperty('use_query_cache', bool) - """See - https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query.useQueryCache - """ + @property + def use_legacy_sql(self): + """See + :class:`~google.cloud.bigquery.job.QueryJobConfig.use_legacy_sql`. + """ + return self._configuration.use_legacy_sql - write_disposition = WriteDisposition( - 'write_disposition', 'writeDisposition') - """See - https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query.writeDisposition - """ + @property + def use_query_cache(self): + """See + :class:`~google.cloud.bigquery.job.QueryJobConfig.use_query_cache`. + """ + return self._configuration.use_query_cache - maximum_billing_tier = _TypedProperty('maximum_billing_tier', int) - """See - https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query.maximumBillingTier - """ + @property + def write_disposition(self): + """See + :class:`~google.cloud.bigquery.job.QueryJobConfig.write_disposition`. + """ + return self._configuration.write_disposition - maximum_bytes_billed = _TypedProperty('maximum_bytes_billed', int) - """See - https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query.maximumBytesBilled - """ + @property + def maximum_billing_tier(self): + """See + :class:`~google.cloud.bigquery.job.QueryJobConfig.maximum_billing_tier`. + """ + return self._configuration.maximum_billing_tier - def _destination_table_resource(self): - """Create a JSON resource for the destination table. - - Helper for :meth:`_populate_config_resource` and - :meth:`_scrub_local_properties` - """ - if self.destination is not None: - return { - 'projectId': self.destination.project, - 'datasetId': self.destination.dataset_id, - 'tableId': self.destination.table_id, - } - - def _populate_config_resource_booleans(self, configuration): - """Helper for _populate_config_resource.""" - if self.allow_large_results is not None: - configuration['allowLargeResults'] = self.allow_large_results - if self.flatten_results is not None: - configuration['flattenResults'] = self.flatten_results - if self.use_query_cache is not None: - configuration['useQueryCache'] = self.use_query_cache - - def _populate_config_resource(self, configuration): - """Helper for _build_resource: copy config properties to resource""" - self._populate_config_resource_booleans(configuration) - - if self.create_disposition is not None: - configuration['createDisposition'] = self.create_disposition - if self.default_dataset is not None: - configuration['defaultDataset'] = { - 'projectId': self.default_dataset.project, - 'datasetId': self.default_dataset.dataset_id, - } - table_res = self._destination_table_resource() - if table_res is not None: - configuration['destinationTable'] = table_res - if self.priority is not None: - configuration['priority'] = self.priority - if self.write_disposition is not None: - configuration['writeDisposition'] = self.write_disposition - if self.maximum_billing_tier is not None: - configuration['maximumBillingTier'] = self.maximum_billing_tier - if self.maximum_bytes_billed is not None: - configuration['maximumBytesBilled'] = str( - self.maximum_bytes_billed) - if len(self._udf_resources) > 0: - configuration[self._UDF_KEY] = [ - {udf_resource.udf_type: udf_resource.value} - for udf_resource in self._udf_resources - ] - if len(self._query_parameters) > 0: - configuration[self._QUERY_PARAMETERS_KEY] = [ - query_parameter.to_api_repr() - for query_parameter in self._query_parameters - ] - if self._query_parameters[0].name is None: - configuration['parameterMode'] = 'POSITIONAL' - else: - configuration['parameterMode'] = 'NAMED' + @property + def maximum_bytes_billed(self): + """See + :class:`~google.cloud.bigquery.job.QueryJobConfig.maximum_bytes_billed`. + """ + return self._configuration.maximum_bytes_billed def _build_resource(self): """Generate a resource for :meth:`begin`.""" @@ -1545,7 +1618,6 @@ def _build_resource(self): resource['configuration']['dryRun'] = dry_run configuration['query'] = self.query - self._populate_config_resource(configuration) return resource @@ -1574,42 +1646,6 @@ def _copy_configuration_properties(self, configuration): self._configuration = QueryJobConfig.from_api_repr(configuration) self._configuration.dry_run = dry_run - self.allow_large_results = _bool_or_none( - configuration.get('allowLargeResults')) - self.flatten_results = _bool_or_none( - configuration.get('flattenResults')) - self.use_query_cache = _bool_or_none( - configuration.get('useQueryCache')) - - self.create_disposition = configuration.get('createDisposition') - self.priority = configuration.get('priority') - self.write_disposition = configuration.get('writeDisposition') - self.maximum_billing_tier = configuration.get('maximumBillingTier') - self.maximum_bytes_billed = _int_or_none( - configuration.get('maximumBytesBilled')) - - dest_remote = configuration.get('destinationTable') - - if dest_remote is not None: - dataset = DatasetReference( - dest_remote['projectId'], dest_remote['datasetId']) - self.destination = dataset.table(dest_remote['tableId']) - - def_ds = configuration.get('defaultDataset') - if def_ds is not None: - self.default_dataset = DatasetReference( - def_ds['projectId'], def_ds['datasetId']) - udf_resources = [] - for udf_mapping in configuration.get(self._UDF_KEY, ()): - key_val, = udf_mapping.items() - udf_resources.append(UDFResource(key_val[0], key_val[1])) - self._udf_resources = udf_resources - - self._query_parameters = [ - _query_param_from_api_repr(mapping) - for mapping in configuration.get(self._QUERY_PARAMETERS_KEY, ()) - ] - @classmethod def from_api_repr(cls, resource, client): """Factory: construct a job given its API representation @@ -1798,6 +1834,7 @@ def query_results(self): """ if not self._query_results: self._query_results = self._client._get_query_results(self.job_id) + self._query_results._job = self return self._query_results def done(self): @@ -1810,6 +1847,7 @@ def done(self): # change once complete. if self.state != _DONE_STATE: self._query_results = self._client._get_query_results(self.job_id) + self._query_results._job = self # Only reload the job once we know the query is complete. # This will ensure that fields such as the destination table are diff --git a/packages/google-cloud-bigquery/tests/system.py b/packages/google-cloud-bigquery/tests/system.py index 83d08c7598a4..ce49e88177e7 100644 --- a/packages/google-cloud-bigquery/tests/system.py +++ b/packages/google-cloud-bigquery/tests/system.py @@ -20,7 +20,6 @@ import os import time import unittest -import uuid import six @@ -689,7 +688,7 @@ def test_copy_table(self): def test_job_cancel(self): DATASET_ID = _make_dataset_id('job_cancel') - JOB_NAME = 'fetch_' + DATASET_ID + JOB_ID = 'fetch_' + DATASET_ID TABLE_NAME = 'test_table' QUERY = 'SELECT * FROM %s.%s' % (DATASET_ID, TABLE_NAME) @@ -703,8 +702,7 @@ def test_job_cancel(self): table = retry_403(Config.CLIENT.create_table)(table_arg) self.to_delete.insert(0, table) - job = Config.CLIENT.run_async_query(JOB_NAME, QUERY) - job.begin() + job = Config.CLIENT.query(QUERY, job_id=JOB_ID) job.cancel() retry = RetryInstanceState(_job_done, max_tries=8) @@ -924,11 +922,9 @@ def test_query_w_dml(self): WHERE greeting = 'Hello World' """ - query_job = Config.CLIENT.run_async_query( - 'test_query_w_dml_{}'.format(unique_resource_id()), - query_template.format(dataset_name, table_name)) - query_job.use_legacy_sql = False - query_job.begin() + query_job = Config.CLIENT.query( + query_template.format(dataset_name, table_name), + job_id='test_query_w_dml_{}'.format(unique_resource_id())) query_job.result() self.assertEqual(query_job.num_dml_affected_rows, 1) @@ -952,6 +948,7 @@ def test_query_w_query_params(self): from google.cloud.bigquery._helpers import ArrayQueryParameter from google.cloud.bigquery._helpers import ScalarQueryParameter from google.cloud.bigquery._helpers import StructQueryParameter + from google.cloud.bigquery.job import QueryJobConfig question = 'What is the answer to life, the universe, and everything?' question_param = ScalarQueryParameter( name='question', type_='STRING', value=question) @@ -1109,13 +1106,14 @@ def test_query_w_query_params(self): }, ] for example in examples: - query_job = Config.CLIENT.run_async_query( - 'test_query_w_query_params{}'.format(unique_resource_id()), + jconfig = QueryJobConfig() + jconfig.query_parameters = example['query_parameters'] + query_job = Config.CLIENT.query( example['sql'], - query_parameters=example['query_parameters']) - query_job.use_legacy_sql = False - query_job.begin() - rows = [row for row in query_job.result()] + job_config=jconfig, + job_id='test_query_w_query_params{}'.format( + unique_resource_id())) + rows = list(query_job.result()) self.assertEqual(len(rows), 1) self.assertEqual(len(rows[0]), 1) self.assertEqual(rows[0][0], example['expected']) @@ -1249,11 +1247,8 @@ def test_large_query_w_public_data(self): rows = list(iterator) self.assertEqual(len(rows), LIMIT) - def test_async_query_future(self): - query_job = Config.CLIENT.run_async_query( - str(uuid.uuid4()), 'SELECT 1') - query_job.use_legacy_sql = False - + def test_query_future(self): + query_job = Config.CLIENT.query('SELECT 1') iterator = query_job.result(timeout=JOB_TIMEOUT) rows = list(iterator) self.assertEqual(rows, [(1,)]) diff --git a/packages/google-cloud-bigquery/tests/unit/test__helpers.py b/packages/google-cloud-bigquery/tests/unit/test__helpers.py index 3d40f38a5799..2254f6b01d89 100644 --- a/packages/google-cloud-bigquery/tests/unit/test__helpers.py +++ b/packages/google-cloud-bigquery/tests/unit/test__helpers.py @@ -881,93 +881,6 @@ def __init__(self): self.assertIsNone(wrapper._configuration._attr) -class Test_UDFResourcesProperty(unittest.TestCase): - - @staticmethod - def _get_target_class(): - from google.cloud.bigquery._helpers import UDFResourcesProperty - - return UDFResourcesProperty - - def _make_one(self, *args, **kw): - return self._get_target_class()(*args, **kw) - - def _descriptor_and_klass(self): - descriptor = self._make_one() - - class _Test(object): - _udf_resources = () - udf_resources = descriptor - - return descriptor, _Test - - def test_class_getter(self): - descriptor, klass = self._descriptor_and_klass() - self.assertIs(klass.udf_resources, descriptor) - - def test_instance_getter_empty(self): - _, klass = self._descriptor_and_klass() - instance = klass() - self.assertEqual(instance.udf_resources, []) - - def test_resource_equality(self): - from google.cloud.bigquery._helpers import UDFResource - - resource1a = UDFResource('resourceUri', 'gs://bucket/file.js') - resource1b = UDFResource('resourceUri', 'gs://bucket/file.js') - resource2 = UDFResource('resourceUri', 'gs://bucket/other.js') - - self.assertEqual(resource1a, resource1b) - self.assertNotEqual(resource1a, resource2) - self.assertNotEqual(resource1a, object()) - self.assertEqual(resource1a, mock.ANY) - - def test_instance_getter_w_non_empty_list(self): - from google.cloud.bigquery._helpers import UDFResource - - RESOURCE_URI = 'gs://some-bucket/js/lib.js' - udf_resources = [UDFResource("resourceUri", RESOURCE_URI)] - _, klass = self._descriptor_and_klass() - instance = klass() - instance._udf_resources = tuple(udf_resources) - - self.assertEqual(instance.udf_resources, udf_resources) - - def test_instance_setter_w_empty_list(self): - from google.cloud.bigquery._helpers import UDFResource - - RESOURCE_URI = 'gs://some-bucket/js/lib.js' - udf_resources = [UDFResource("resourceUri", RESOURCE_URI)] - _, klass = self._descriptor_and_klass() - instance = klass() - instance._udf_resources = udf_resources - - instance.udf_resources = [] - - self.assertEqual(instance.udf_resources, []) - - def test_instance_setter_w_valid_udf(self): - from google.cloud.bigquery._helpers import UDFResource - - RESOURCE_URI = 'gs://some-bucket/js/lib.js' - udf_resources = [UDFResource("resourceUri", RESOURCE_URI)] - _, klass = self._descriptor_and_klass() - instance = klass() - - instance.udf_resources = udf_resources - - self.assertEqual(instance.udf_resources, udf_resources) - - def test_instance_setter_w_bad_udfs(self): - _, klass = self._descriptor_and_klass() - instance = klass() - - with self.assertRaises(ValueError): - instance.udf_resources = ["foo"] - - self.assertEqual(instance.udf_resources, []) - - class Test_AbstractQueryParameter(unittest.TestCase): @staticmethod @@ -2009,22 +1922,54 @@ def test_w_struct(self): self.assertEqual(parameter.struct_values, {'foo': 'Foo', 'bar': 123}) -class Test_QueryParametersProperty(unittest.TestCase): +class Test_UDFResource(unittest.TestCase): + + @staticmethod + def _get_target_class(): + from google.cloud.bigquery._helpers import UDFResource + + return UDFResource + + def _make_one(self, *args, **kw): + return self._get_target_class()(*args, **kw) + + def test_ctor(self): + udf = self._make_one('resourceUri', 'gs://some_bucket/some_file') + self.assertEqual(udf.udf_type, 'resourceUri') + self.assertEqual(udf.value, 'gs://some_bucket/some_file') + + def test___eq__(self): + udf = self._make_one('resourceUri', 'gs://some_bucket/some_file') + self.assertEqual(udf, udf) + self.assertNotEqual(udf, object()) + wrong_val = self._make_one( + 'resourceUri', 'gs://some_bucket/other_file') + self.assertNotEqual(udf, wrong_val) + wrong_type = self._make_one('inlineCode', udf.value) + self.assertNotEqual(udf, wrong_type) + + +class Test_ListApiResourceProperty(unittest.TestCase): @staticmethod def _get_target_class(): - from google.cloud.bigquery._helpers import QueryParametersProperty + from google.cloud.bigquery._helpers import _ListApiResourceProperty - return QueryParametersProperty + return _ListApiResourceProperty def _make_one(self, *args, **kw): return self._get_target_class()(*args, **kw) def _descriptor_and_klass(self): - descriptor = self._make_one() + from google.cloud.bigquery._helpers import AbstractQueryParameter + + descriptor = self._make_one( + 'query_parameters', 'queryParameters', AbstractQueryParameter) class _Test(object): - _query_parameters = () + def __init__(self): + self._properties = {} + query_parameters = descriptor return descriptor, _Test @@ -2044,7 +1989,7 @@ def test_instance_getter_w_non_empty_list(self): query_parameters = [ScalarQueryParameter("foo", 'INT64', 123)] _, klass = self._descriptor_and_klass() instance = klass() - instance._query_parameters = tuple(query_parameters) + instance._properties['queryParameters'] = query_parameters self.assertEqual(instance.query_parameters, query_parameters) @@ -2060,6 +2005,17 @@ def test_instance_setter_w_empty_list(self): self.assertEqual(instance.query_parameters, []) + def test_instance_setter_w_none(self): + from google.cloud.bigquery._helpers import ScalarQueryParameter + + query_parameters = [ScalarQueryParameter("foo", 'INT64', 123)] + _, klass = self._descriptor_and_klass() + instance = klass() + instance._query_parameters = query_parameters + + with self.assertRaises(ValueError): + instance.query_parameters = None + def test_instance_setter_w_valid_udf(self): from google.cloud.bigquery._helpers import ScalarQueryParameter diff --git a/packages/google-cloud-bigquery/tests/unit/test_client.py b/packages/google-cloud-bigquery/tests/unit/test_client.py index aad64d980df1..4f509da53f1c 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_client.py +++ b/packages/google-cloud-bigquery/tests/unit/test_client.py @@ -1600,36 +1600,84 @@ def test_extract_table_generated_job_id(self): self.assertEqual(job.source, source) self.assertEqual(list(job.destination_uris), [DESTINATION]) - def test_run_async_query_defaults(self): + def test_query_defaults(self): from google.cloud.bigquery.job import QueryJob PROJECT = 'PROJECT' - JOB = 'job_name' QUERY = 'select count(*) from persons' + RESOURCE = { + 'jobReference': { + 'projectId': PROJECT, + 'jobId': 'some-random-id', + }, + 'configuration': { + 'query': { + 'query': QUERY, + 'useLegacySql': False, + }, + }, + } creds = _make_credentials() http = object() client = self._make_one(project=PROJECT, credentials=creds, _http=http) - job = client.run_async_query(JOB, QUERY) + conn = client._connection = _Connection(RESOURCE) + + job = client.query(QUERY) + self.assertIsInstance(job, QueryJob) + self.assertIsInstance(job.job_id, six.string_types) self.assertIs(job._client, client) - self.assertEqual(job.job_id, JOB) self.assertEqual(job.query, QUERY) self.assertEqual(job.udf_resources, []) self.assertEqual(job.query_parameters, []) - def test_run_async_w_udf_resources(self): + # Check that query actually starts the job. + self.assertEqual(len(conn._requested), 1) + req = conn._requested[0] + self.assertEqual(req['method'], 'POST') + self.assertEqual(req['path'], '/projects/PROJECT/jobs') + sent = req['data'] + self.assertIsInstance( + sent['jobReference']['jobId'], six.string_types) + sent_config = sent['configuration']['query'] + self.assertEqual(sent_config['query'], QUERY) + self.assertFalse(sent_config['useLegacySql']) + + def test_query_w_udf_resources(self): from google.cloud.bigquery._helpers import UDFResource from google.cloud.bigquery.job import QueryJob + from google.cloud.bigquery.job import QueryJobConfig RESOURCE_URI = 'gs://some-bucket/js/lib.js' PROJECT = 'PROJECT' JOB = 'job_name' QUERY = 'select count(*) from persons' + RESOURCE = { + 'jobReference': { + 'projectId': PROJECT, + 'jobId': JOB, + }, + 'configuration': { + 'query': { + 'query': QUERY, + 'useLegacySql': True, + 'userDefinedFunctionResources': [ + {'resourceUri': RESOURCE_URI}, + ], + }, + }, + } creds = _make_credentials() http = object() client = self._make_one(project=PROJECT, credentials=creds, _http=http) + conn = client._connection = _Connection(RESOURCE) udf_resources = [UDFResource("resourceUri", RESOURCE_URI)] - job = client.run_async_query(JOB, QUERY, udf_resources=udf_resources) + config = QueryJobConfig() + config.udf_resources = udf_resources + config.use_legacy_sql = True + + job = client.query(QUERY, job_config=config, job_id=JOB) + self.assertIsInstance(job, QueryJob) self.assertIs(job._client, client) self.assertEqual(job.job_id, JOB) @@ -1637,19 +1685,58 @@ def test_run_async_w_udf_resources(self): self.assertEqual(job.udf_resources, udf_resources) self.assertEqual(job.query_parameters, []) - def test_run_async_w_query_parameters(self): + # Check that query actually starts the job. + self.assertEqual(len(conn._requested), 1) + req = conn._requested[0] + self.assertEqual(req['method'], 'POST') + self.assertEqual(req['path'], '/projects/PROJECT/jobs') + sent = req['data'] + self.assertIsInstance( + sent['jobReference']['jobId'], six.string_types) + sent_config = sent['configuration']['query'] + self.assertEqual(sent_config['query'], QUERY) + self.assertTrue(sent_config['useLegacySql']) + self.assertEqual( + sent_config['userDefinedFunctionResources'][0], + {'resourceUri': RESOURCE_URI}) + + def test_query_w_query_parameters(self): from google.cloud.bigquery._helpers import ScalarQueryParameter from google.cloud.bigquery.job import QueryJob + from google.cloud.bigquery.job import QueryJobConfig PROJECT = 'PROJECT' JOB = 'job_name' QUERY = 'select count(*) from persons' + RESOURCE = { + 'jobReference': { + 'projectId': PROJECT, + 'jobId': JOB, + }, + 'configuration': { + 'query': { + 'query': QUERY, + 'useLegacySql': False, + 'queryParameters': [ + { + 'name': 'foo', + 'parameterType': {'type': 'INT64'}, + 'parameterValue': {'value': '123'} + }, + ], + }, + }, + } creds = _make_credentials() http = object() client = self._make_one(project=PROJECT, credentials=creds, _http=http) + conn = client._connection = _Connection(RESOURCE) query_parameters = [ScalarQueryParameter('foo', 'INT64', 123)] - job = client.run_async_query(JOB, QUERY, - query_parameters=query_parameters) + config = QueryJobConfig() + config.query_parameters = query_parameters + + job = client.query(QUERY, job_config=config, job_id=JOB) + self.assertIsInstance(job, QueryJob) self.assertIs(job._client, client) self.assertEqual(job.job_id, JOB) @@ -1657,6 +1744,24 @@ def test_run_async_w_query_parameters(self): self.assertEqual(job.udf_resources, []) self.assertEqual(job.query_parameters, query_parameters) + # Check that query actually starts the job. + self.assertEqual(len(conn._requested), 1) + req = conn._requested[0] + self.assertEqual(req['method'], 'POST') + self.assertEqual(req['path'], '/projects/PROJECT/jobs') + sent = req['data'] + self.assertEqual(sent['jobReference']['jobId'], JOB) + sent_config = sent['configuration']['query'] + self.assertEqual(sent_config['query'], QUERY) + self.assertFalse(sent_config['useLegacySql']) + self.assertEqual( + sent_config['queryParameters'][0], + { + 'name': 'foo', + 'parameterType': {'type': 'INT64'}, + 'parameterValue': {'value': '123'} + }) + def test_query_rows_defaults(self): from google.api.core.page_iterator import HTTPIterator diff --git a/packages/google-cloud-bigquery/tests/unit/test_dbapi_cursor.py b/packages/google-cloud-bigquery/tests/unit/test_dbapi_cursor.py index be327a8962a2..7562acd13239 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_dbapi_cursor.py +++ b/packages/google-cloud-bigquery/tests/unit/test_dbapi_cursor.py @@ -31,7 +31,7 @@ def _mock_client( self, rows=None, schema=None, num_dml_affected_rows=None): from google.cloud.bigquery import client mock_client = mock.create_autospec(client.Client) - mock_client.run_async_query.return_value = self._mock_job( + mock_client.query.return_value = self._mock_job( rows=rows, schema=schema, num_dml_affected_rows=num_dml_affected_rows) return mock_client @@ -177,7 +177,9 @@ def test_execute_custom_job_id(self): connection = connect(client) cursor = connection.cursor() cursor.execute('SELECT 1;', job_id='foo') - self.assertEqual(client.run_async_query.mock_calls[0][1][0], 'foo') + args, kwargs = client.query.call_args + self.assertEqual(args[0], 'SELECT 1;') + self.assertEqual(kwargs['job_id'], 'foo') def test_execute_w_dml(self): from google.cloud.bigquery.dbapi import connect @@ -239,7 +241,7 @@ def test_execute_raises_if_result_raises(self): job = mock.create_autospec(job.QueryJob) job.result.side_effect = google.cloud.exceptions.GoogleCloudError('') client = mock.create_autospec(client.Client) - client.run_async_query.return_value = job + client.query.return_value = job connection = connect(client) cursor = connection.cursor() diff --git a/packages/google-cloud-bigquery/tests/unit/test_job.py b/packages/google-cloud-bigquery/tests/unit/test_job.py index a49ea1b6fab7..ad8cf965cd79 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_job.py +++ b/packages/google-cloud-bigquery/tests/unit/test_job.py @@ -93,7 +93,7 @@ class _Base(object): DS_REF = DatasetReference(PROJECT, DS_ID) TABLE_ID = 'table_id' TABLE_REF = TableReference(DS_REF, TABLE_ID) - JOB_NAME = 'job_name' + JOB_ID = 'JOB_ID' def _make_one(self, *args, **kw): return self._get_target_class()(*args, **kw) @@ -106,7 +106,7 @@ def _setUpConstants(self): self.WHEN = datetime.datetime.utcfromtimestamp(self.WHEN_TS).replace( tzinfo=UTC) self.ETAG = 'ETAG' - self.JOB_ID = '%s:%s' % (self.PROJECT, self.JOB_NAME) + self.FULL_JOB_ID = '%s:%s' % (self.PROJECT, self.JOB_ID) self.RESOURCE_URL = 'http://example.com/path/to/resource' self.USER_EMAIL = 'phred@example.com' @@ -128,10 +128,10 @@ def _makeResource(self, started=False, ended=False): } }, 'etag': self.ETAG, - 'id': self.JOB_ID, + 'id': self.FULL_JOB_ID, 'jobReference': { 'projectId': self.PROJECT, - 'jobId': self.JOB_NAME, + 'jobId': self.JOB_ID, }, 'selfLink': self.RESOURCE_URL, 'user_email': self.USER_EMAIL, @@ -320,7 +320,7 @@ def _verifyResourceProperties(self, job, resource): def test_ctor(self): client = _Client(self.PROJECT) - job = self._make_one(self.JOB_NAME, [self.SOURCE1], self.TABLE_REF, + job = self._make_one(self.JOB_ID, [self.SOURCE1], self.TABLE_REF, client) self.assertIs(job.destination, self.TABLE_REF) self.assertEqual(list(job.source_uris), [self.SOURCE1]) @@ -328,7 +328,7 @@ def test_ctor(self): self.assertEqual(job.job_type, self.JOB_TYPE) self.assertEqual( job.path, - '/projects/%s/jobs/%s' % (self.PROJECT, self.JOB_NAME)) + '/projects/%s/jobs/%s' % (self.PROJECT, self.JOB_ID)) self.assertEqual(job.schema, []) self._verifyInitialReadonlyProperties(job) @@ -362,7 +362,7 @@ def test_ctor_w_config(self): age = SchemaField('age', 'INTEGER', mode='REQUIRED') config = LoadJobConfig() config.schema = [full_name, age] - job = self._make_one(self.JOB_NAME, [self.SOURCE1], self.TABLE_REF, + job = self._make_one(self.JOB_ID, [self.SOURCE1], self.TABLE_REF, client, config) self.assertEqual(job.schema, [full_name, age]) @@ -388,7 +388,7 @@ def test_result_invokes_begin(self): connection = _Connection(begun_resource, done_resource) client = _Client(self.PROJECT, connection=connection) - job = self._make_one(self.JOB_NAME, [self.SOURCE1], self.TABLE_REF, + job = self._make_one(self.JOB_ID, [self.SOURCE1], self.TABLE_REF, client) job.result() @@ -453,9 +453,9 @@ def test_props_set_by_server(self): CREATED = datetime.datetime(2015, 8, 11, 12, 13, 22, tzinfo=UTC) STARTED = datetime.datetime(2015, 8, 11, 13, 47, 15, tzinfo=UTC) ENDED = datetime.datetime(2015, 8, 11, 14, 47, 15, tzinfo=UTC) - JOB_ID = '%s:%s' % (self.PROJECT, self.JOB_NAME) + FULL_JOB_ID = '%s:%s' % (self.PROJECT, self.JOB_ID) URL = 'http://example.com/projects/%s/jobs/%s' % ( - self.PROJECT, self.JOB_NAME) + self.PROJECT, self.JOB_ID) EMAIL = 'phred@example.com' ERROR_RESULT = {'debugInfo': 'DEBUG', 'location': 'LOCATION', @@ -464,9 +464,9 @@ def test_props_set_by_server(self): client = _Client(self.PROJECT) table = _Table() - job = self._make_one(self.JOB_NAME, [self.SOURCE1], table, client) + job = self._make_one(self.JOB_ID, [self.SOURCE1], table, client) job._properties['etag'] = 'ETAG' - job._properties['id'] = JOB_ID + job._properties['id'] = FULL_JOB_ID job._properties['selfLink'] = URL job._properties['user_email'] = EMAIL @@ -519,10 +519,10 @@ def test_from_api_repr_missing_config(self): self._setUpConstants() client = _Client(self.PROJECT) RESOURCE = { - 'id': '%s:%s' % (self.PROJECT, self.DS_ID), + 'id': '%s:%s' % (self.PROJECT, self.JOB_ID), 'jobReference': { 'projectId': self.PROJECT, - 'jobId': self.JOB_NAME, + 'jobId': self.JOB_ID, } } klass = self._get_target_class() @@ -533,10 +533,10 @@ def test_from_api_repr_bare(self): self._setUpConstants() client = _Client(self.PROJECT) RESOURCE = { - 'id': self.JOB_ID, + 'id': self.FULL_JOB_ID, 'jobReference': { 'projectId': self.PROJECT, - 'jobId': self.JOB_NAME, + 'jobId': self.JOB_ID, }, 'configuration': { 'load': { @@ -567,7 +567,7 @@ def test_from_api_repr_w_properties(self): def test_begin_w_already_running(self): conn = _Connection() client = _Client(project=self.PROJECT, connection=conn) - job = self._make_one(self.JOB_NAME, [self.SOURCE1], self.TABLE_REF, + job = self._make_one(self.JOB_ID, [self.SOURCE1], self.TABLE_REF, client) job._properties['status'] = {'state': 'RUNNING'} @@ -584,7 +584,7 @@ def test_begin_w_bound_client(self): del RESOURCE['user_email'] conn = _Connection(RESOURCE) client = _Client(project=self.PROJECT, connection=conn) - job = self._make_one(self.JOB_NAME, [self.SOURCE1], self.TABLE_REF, + job = self._make_one(self.JOB_ID, [self.SOURCE1], self.TABLE_REF, client) job.begin() @@ -596,7 +596,7 @@ def test_begin_w_bound_client(self): SENT = { 'jobReference': { 'projectId': self.PROJECT, - 'jobId': self.JOB_NAME, + 'jobId': self.JOB_ID, }, 'configuration': { 'load': { @@ -625,14 +625,14 @@ def test_begin_w_autodetect(self): client = _Client(project=self.PROJECT, connection=conn) config = LoadJobConfig() config.autodetect = True - job = self._make_one(self.JOB_NAME, [self.SOURCE1], self.TABLE_REF, + job = self._make_one(self.JOB_ID, [self.SOURCE1], self.TABLE_REF, client, config) job.begin() sent = { 'jobReference': { 'projectId': self.PROJECT, - 'jobId': self.JOB_NAME, + 'jobId': self.JOB_ID, }, 'configuration': { 'load': { @@ -692,7 +692,7 @@ def test_begin_w_alternate_client(self): age = SchemaField('age', 'INTEGER', mode='REQUIRED') config = LoadJobConfig() config.schema = [full_name, age] - job = self._make_one(self.JOB_NAME, [self.SOURCE1], self.TABLE_REF, + job = self._make_one(self.JOB_ID, [self.SOURCE1], self.TABLE_REF, client1, config) config.allow_jagged_rows = True config.allow_quoted_newlines = True @@ -717,7 +717,7 @@ def test_begin_w_alternate_client(self): SENT = { 'jobReference': { 'projectId': self.PROJECT, - 'jobId': self.JOB_NAME, + 'jobId': self.JOB_ID, }, 'configuration': { 'load': LOAD_CONFIGURATION, @@ -728,11 +728,11 @@ def test_begin_w_alternate_client(self): self._verifyResourceProperties(job, RESOURCE) def test_exists_miss_w_bound_client(self): - PATH = '/projects/%s/jobs/%s' % (self.PROJECT, self.JOB_NAME) + PATH = '/projects/%s/jobs/%s' % (self.PROJECT, self.JOB_ID) conn = _Connection() client = _Client(project=self.PROJECT, connection=conn) table = _Table() - job = self._make_one(self.JOB_NAME, [self.SOURCE1], table, client) + job = self._make_one(self.JOB_ID, [self.SOURCE1], table, client) self.assertFalse(job.exists()) @@ -743,13 +743,13 @@ def test_exists_miss_w_bound_client(self): self.assertEqual(req['query_params'], {'fields': 'id'}) def test_exists_hit_w_alternate_client(self): - PATH = '/projects/%s/jobs/%s' % (self.PROJECT, self.JOB_NAME) + PATH = '/projects/%s/jobs/%s' % (self.PROJECT, self.JOB_ID) conn1 = _Connection() client1 = _Client(project=self.PROJECT, connection=conn1) conn2 = _Connection({}) client2 = _Client(project=self.PROJECT, connection=conn2) table = _Table() - job = self._make_one(self.JOB_NAME, [self.SOURCE1], table, client1) + job = self._make_one(self.JOB_ID, [self.SOURCE1], table, client1) self.assertTrue(job.exists(client=client2)) @@ -761,12 +761,12 @@ def test_exists_hit_w_alternate_client(self): self.assertEqual(req['query_params'], {'fields': 'id'}) def test_reload_w_bound_client(self): - PATH = '/projects/%s/jobs/%s' % (self.PROJECT, self.JOB_NAME) + PATH = '/projects/%s/jobs/%s' % (self.PROJECT, self.JOB_ID) RESOURCE = self._makeResource() conn = _Connection(RESOURCE) client = _Client(project=self.PROJECT, connection=conn) table = _Table() - job = self._make_one(self.JOB_NAME, [self.SOURCE1], table, client) + job = self._make_one(self.JOB_ID, [self.SOURCE1], table, client) job.reload() @@ -777,14 +777,14 @@ def test_reload_w_bound_client(self): self._verifyResourceProperties(job, RESOURCE) def test_reload_w_alternate_client(self): - PATH = '/projects/%s/jobs/%s' % (self.PROJECT, self.JOB_NAME) + PATH = '/projects/%s/jobs/%s' % (self.PROJECT, self.JOB_ID) RESOURCE = self._makeResource() conn1 = _Connection() client1 = _Client(project=self.PROJECT, connection=conn1) conn2 = _Connection(RESOURCE) client2 = _Client(project=self.PROJECT, connection=conn2) table = _Table() - job = self._make_one(self.JOB_NAME, [self.SOURCE1], table, client1) + job = self._make_one(self.JOB_ID, [self.SOURCE1], table, client1) job.reload(client=client2) @@ -796,13 +796,13 @@ def test_reload_w_alternate_client(self): self._verifyResourceProperties(job, RESOURCE) def test_cancel_w_bound_client(self): - PATH = '/projects/%s/jobs/%s/cancel' % (self.PROJECT, self.JOB_NAME) + PATH = '/projects/%s/jobs/%s/cancel' % (self.PROJECT, self.JOB_ID) RESOURCE = self._makeResource(ended=True) RESPONSE = {'job': RESOURCE} conn = _Connection(RESPONSE) client = _Client(project=self.PROJECT, connection=conn) table = _Table() - job = self._make_one(self.JOB_NAME, [self.SOURCE1], table, client) + job = self._make_one(self.JOB_ID, [self.SOURCE1], table, client) job.cancel() @@ -813,7 +813,7 @@ def test_cancel_w_bound_client(self): self._verifyResourceProperties(job, RESOURCE) def test_cancel_w_alternate_client(self): - PATH = '/projects/%s/jobs/%s/cancel' % (self.PROJECT, self.JOB_NAME) + PATH = '/projects/%s/jobs/%s/cancel' % (self.PROJECT, self.JOB_ID) RESOURCE = self._makeResource(ended=True) RESPONSE = {'job': RESOURCE} conn1 = _Connection() @@ -821,7 +821,7 @@ def test_cancel_w_alternate_client(self): conn2 = _Connection(RESPONSE) client2 = _Client(project=self.PROJECT, connection=conn2) table = _Table() - job = self._make_one(self.JOB_NAME, [self.SOURCE1], table, client1) + job = self._make_one(self.JOB_ID, [self.SOURCE1], table, client1) job.cancel(client=client2) @@ -896,14 +896,14 @@ def test_ctor(self): client = _Client(self.PROJECT) source = self._table_ref(self.SOURCE_TABLE) destination = self._table_ref(self.DESTINATION_TABLE) - job = self._make_one(self.JOB_NAME, [source], destination, client) + job = self._make_one(self.JOB_ID, [source], destination, client) self.assertIs(job.destination, destination) self.assertEqual(job.sources, [source]) self.assertIs(job._client, client) self.assertEqual(job.job_type, self.JOB_TYPE) self.assertEqual( job.path, - '/projects/%s/jobs/%s' % (self.PROJECT, self.JOB_NAME)) + '/projects/%s/jobs/%s' % (self.PROJECT, self.JOB_ID)) self._verifyInitialReadonlyProperties(job) @@ -926,7 +926,7 @@ def test_from_api_repr_missing_config(self): 'id': '%s:%s' % (self.PROJECT, self.DS_ID), 'jobReference': { 'projectId': self.PROJECT, - 'jobId': self.JOB_NAME, + 'jobId': self.JOB_ID, } } klass = self._get_target_class() @@ -940,7 +940,7 @@ def test_from_api_repr_bare(self): 'id': self.JOB_ID, 'jobReference': { 'projectId': self.PROJECT, - 'jobId': self.JOB_NAME, + 'jobId': self.JOB_ID, }, 'configuration': { 'copy': { @@ -969,7 +969,7 @@ def test_from_api_repr_w_sourcetable(self): 'id': self.JOB_ID, 'jobReference': { 'projectId': self.PROJECT, - 'jobId': self.JOB_NAME, + 'jobId': self.JOB_ID, }, 'configuration': { 'copy': { @@ -998,7 +998,7 @@ def test_from_api_repr_wo_sources(self): 'id': self.JOB_ID, 'jobReference': { 'projectId': self.PROJECT, - 'jobId': self.JOB_NAME, + 'jobId': self.JOB_ID, }, 'configuration': { 'copy': { @@ -1036,7 +1036,7 @@ def test_begin_w_bound_client(self): client = _Client(project=self.PROJECT, connection=conn) source = self._table_ref(self.SOURCE_TABLE) destination = self._table_ref(self.DESTINATION_TABLE) - job = self._make_one(self.JOB_NAME, [source], destination, client) + job = self._make_one(self.JOB_ID, [source], destination, client) job.begin() @@ -1047,7 +1047,7 @@ def test_begin_w_bound_client(self): SENT = { 'jobReference': { 'projectId': self.PROJECT, - 'jobId': self.JOB_NAME, + 'jobId': self.JOB_ID, }, 'configuration': { 'copy': { @@ -1094,7 +1094,7 @@ def test_begin_w_alternate_client(self): config = CopyJobConfig() config.create_disposition = 'CREATE_NEVER' config.write_disposition = 'WRITE_TRUNCATE' - job = self._make_one(self.JOB_NAME, [source], destination, client1, + job = self._make_one(self.JOB_ID, [source], destination, client1, config) job.begin(client=client2) @@ -1106,7 +1106,7 @@ def test_begin_w_alternate_client(self): SENT = { 'jobReference': { 'projectId': self.PROJECT, - 'jobId': self.JOB_NAME, + 'jobId': self.JOB_ID, }, 'configuration': { 'copy': COPY_CONFIGURATION, @@ -1116,13 +1116,13 @@ def test_begin_w_alternate_client(self): self._verifyResourceProperties(job, RESOURCE) def test_exists_miss_w_bound_client(self): - PATH = '/projects/%s/jobs/%s' % (self.PROJECT, self.JOB_NAME) + PATH = '/projects/%s/jobs/%s' % (self.PROJECT, self.JOB_ID) conn = _Connection() client = _Client(project=self.PROJECT, connection=conn) source = self._table_ref(self.SOURCE_TABLE) destination = self._table_ref(self.DESTINATION_TABLE) - job = self._make_one(self.JOB_NAME, [source], destination, client) + job = self._make_one(self.JOB_ID, [source], destination, client) self.assertFalse(job.exists()) @@ -1133,14 +1133,14 @@ def test_exists_miss_w_bound_client(self): self.assertEqual(req['query_params'], {'fields': 'id'}) def test_exists_hit_w_alternate_client(self): - PATH = '/projects/%s/jobs/%s' % (self.PROJECT, self.JOB_NAME) + PATH = '/projects/%s/jobs/%s' % (self.PROJECT, self.JOB_ID) conn1 = _Connection() client1 = _Client(project=self.PROJECT, connection=conn1) conn2 = _Connection({}) client2 = _Client(project=self.PROJECT, connection=conn2) source = self._table_ref(self.SOURCE_TABLE) destination = self._table_ref(self.DESTINATION_TABLE) - job = self._make_one(self.JOB_NAME, [source], destination, client1) + job = self._make_one(self.JOB_ID, [source], destination, client1) self.assertTrue(job.exists(client=client2)) @@ -1152,13 +1152,13 @@ def test_exists_hit_w_alternate_client(self): self.assertEqual(req['query_params'], {'fields': 'id'}) def test_reload_w_bound_client(self): - PATH = '/projects/%s/jobs/%s' % (self.PROJECT, self.JOB_NAME) + PATH = '/projects/%s/jobs/%s' % (self.PROJECT, self.JOB_ID) RESOURCE = self._makeResource() conn = _Connection(RESOURCE) client = _Client(project=self.PROJECT, connection=conn) source = self._table_ref(self.SOURCE_TABLE) destination = self._table_ref(self.DESTINATION_TABLE) - job = self._make_one(self.JOB_NAME, [source], destination, client) + job = self._make_one(self.JOB_ID, [source], destination, client) job.reload() @@ -1169,7 +1169,7 @@ def test_reload_w_bound_client(self): self._verifyResourceProperties(job, RESOURCE) def test_reload_w_alternate_client(self): - PATH = '/projects/%s/jobs/%s' % (self.PROJECT, self.JOB_NAME) + PATH = '/projects/%s/jobs/%s' % (self.PROJECT, self.JOB_ID) RESOURCE = self._makeResource() conn1 = _Connection() client1 = _Client(project=self.PROJECT, connection=conn1) @@ -1177,7 +1177,7 @@ def test_reload_w_alternate_client(self): client2 = _Client(project=self.PROJECT, connection=conn2) source = self._table_ref(self.SOURCE_TABLE) destination = self._table_ref(self.DESTINATION_TABLE) - job = self._make_one(self.JOB_NAME, [source], destination, client1) + job = self._make_one(self.JOB_ID, [source], destination, client1) job.reload(client=client2) @@ -1251,7 +1251,7 @@ def _verifyResourceProperties(self, job, resource): def test_ctor(self): client = _Client(self.PROJECT) source = _Table(self.SOURCE_TABLE) - job = self._make_one(self.JOB_NAME, source, [self.DESTINATION_URI], + job = self._make_one(self.JOB_ID, source, [self.DESTINATION_URI], client) self.assertEqual(job.source, source) self.assertEqual(job.destination_uris, [self.DESTINATION_URI]) @@ -1259,7 +1259,7 @@ def test_ctor(self): self.assertEqual(job.job_type, self.JOB_TYPE) self.assertEqual( job.path, - '/projects/%s/jobs/%s' % (self.PROJECT, self.JOB_NAME)) + '/projects/%s/jobs/%s' % (self.PROJECT, self.JOB_ID)) self._verifyInitialReadonlyProperties(job) @@ -1273,7 +1273,7 @@ def test_destination_uri_file_counts(self): file_counts = 23 client = _Client(self.PROJECT) source = _Table(self.SOURCE_TABLE) - job = self._make_one(self.JOB_NAME, source, [self.DESTINATION_URI], + job = self._make_one(self.JOB_ID, source, [self.DESTINATION_URI], client) self.assertIsNone(job.destination_uri_file_counts) @@ -1301,7 +1301,7 @@ def test_from_api_repr_missing_config(self): 'id': '%s:%s' % (self.PROJECT, self.DS_ID), 'jobReference': { 'projectId': self.PROJECT, - 'jobId': self.JOB_NAME, + 'jobId': self.JOB_ID, } } klass = self._get_target_class() @@ -1315,7 +1315,7 @@ def test_from_api_repr_bare(self): 'id': self.JOB_ID, 'jobReference': { 'projectId': self.PROJECT, - 'jobId': self.JOB_NAME, + 'jobId': self.JOB_ID, }, 'configuration': { 'extract': { @@ -1355,7 +1355,7 @@ def test_begin_w_bound_client(self): client = _Client(project=self.PROJECT, connection=conn) source_dataset = DatasetReference(self.PROJECT, self.DS_ID) source = source_dataset.table(self.SOURCE_TABLE) - job = self._make_one(self.JOB_NAME, source, [self.DESTINATION_URI], + job = self._make_one(self.JOB_ID, source, [self.DESTINATION_URI], client) job.begin() @@ -1367,7 +1367,7 @@ def test_begin_w_bound_client(self): SENT = { 'jobReference': { 'projectId': self.PROJECT, - 'jobId': self.JOB_NAME, + 'jobId': self.JOB_ID, }, 'configuration': { 'extract': { @@ -1410,7 +1410,7 @@ def test_begin_w_alternate_client(self): job_config.destination_format = 'NEWLINE_DELIMITED_JSON' job_config.field_delimiter = '|' job_config.print_header = False - job = self._make_one(self.JOB_NAME, source, [self.DESTINATION_URI], + job = self._make_one(self.JOB_ID, source, [self.DESTINATION_URI], client1, job_config) job.begin(client=client2) @@ -1423,7 +1423,7 @@ def test_begin_w_alternate_client(self): SENT = { 'jobReference': { 'projectId': self.PROJECT, - 'jobId': self.JOB_NAME, + 'jobId': self.JOB_ID, }, 'configuration': { 'extract': EXTRACT_CONFIGURATION, @@ -1433,11 +1433,11 @@ def test_begin_w_alternate_client(self): self._verifyResourceProperties(job, RESOURCE) def test_exists_miss_w_bound_client(self): - PATH = '/projects/%s/jobs/%s' % (self.PROJECT, self.JOB_NAME) + PATH = '/projects/%s/jobs/%s' % (self.PROJECT, self.JOB_ID) conn = _Connection() client = _Client(project=self.PROJECT, connection=conn) source = _Table(self.SOURCE_TABLE) - job = self._make_one(self.JOB_NAME, source, [self.DESTINATION_URI], + job = self._make_one(self.JOB_ID, source, [self.DESTINATION_URI], client) self.assertFalse(job.exists()) @@ -1449,13 +1449,13 @@ def test_exists_miss_w_bound_client(self): self.assertEqual(req['query_params'], {'fields': 'id'}) def test_exists_hit_w_alternate_client(self): - PATH = '/projects/%s/jobs/%s' % (self.PROJECT, self.JOB_NAME) + PATH = '/projects/%s/jobs/%s' % (self.PROJECT, self.JOB_ID) conn1 = _Connection() client1 = _Client(project=self.PROJECT, connection=conn1) conn2 = _Connection({}) client2 = _Client(project=self.PROJECT, connection=conn2) source = _Table(self.SOURCE_TABLE) - job = self._make_one(self.JOB_NAME, source, [self.DESTINATION_URI], + job = self._make_one(self.JOB_ID, source, [self.DESTINATION_URI], client1) self.assertTrue(job.exists(client=client2)) @@ -1468,13 +1468,13 @@ def test_exists_hit_w_alternate_client(self): self.assertEqual(req['query_params'], {'fields': 'id'}) def test_reload_w_bound_client(self): - PATH = '/projects/%s/jobs/%s' % (self.PROJECT, self.JOB_NAME) + PATH = '/projects/%s/jobs/%s' % (self.PROJECT, self.JOB_ID) RESOURCE = self._makeResource() conn = _Connection(RESOURCE) client = _Client(project=self.PROJECT, connection=conn) source_dataset = DatasetReference(self.PROJECT, self.DS_ID) source = source_dataset.table(self.SOURCE_TABLE) - job = self._make_one(self.JOB_NAME, source, [self.DESTINATION_URI], + job = self._make_one(self.JOB_ID, source, [self.DESTINATION_URI], client) job.reload() @@ -1486,7 +1486,7 @@ def test_reload_w_bound_client(self): self._verifyResourceProperties(job, RESOURCE) def test_reload_w_alternate_client(self): - PATH = '/projects/%s/jobs/%s' % (self.PROJECT, self.JOB_NAME) + PATH = '/projects/%s/jobs/%s' % (self.PROJECT, self.JOB_ID) RESOURCE = self._makeResource() conn1 = _Connection() client1 = _Client(project=self.PROJECT, connection=conn1) @@ -1494,7 +1494,7 @@ def test_reload_w_alternate_client(self): client2 = _Client(project=self.PROJECT, connection=conn2) source_dataset = DatasetReference(self.PROJECT, self.DS_ID) source = source_dataset.table(self.SOURCE_TABLE) - job = self._make_one(self.JOB_NAME, source, [self.DESTINATION_URI], + job = self._make_one(self.JOB_ID, source, [self.DESTINATION_URI], client1) job.reload(client=client2) @@ -1507,6 +1507,69 @@ def test_reload_w_alternate_client(self): self._verifyResourceProperties(job, RESOURCE) +class TestQueryJobConfig(unittest.TestCase, _Base): + @staticmethod + def _get_target_class(): + from google.cloud.bigquery.job import QueryJobConfig + + return QueryJobConfig + + def _make_one(self, *args, **kw): + return self._get_target_class()(*args, **kw) + + def test_ctor(self): + config = self._make_one() + self.assertEqual(config._properties, {}) + + def test_from_api_repr_empty(self): + klass = self._get_target_class() + config = klass.from_api_repr({}) + self.assertIsNone(config.dry_run) + self.assertIsNone(config.use_legacy_sql) + self.assertIsNone(config.default_dataset) + + def test_from_api_repr_normal(self): + resource = { + 'useLegacySql': True, + 'query': 'no property for me', + 'defaultDataset': { + 'projectId': 'someproject', + 'datasetId': 'somedataset', + }, + 'someNewProperty': 'I should be saved, too.', + } + klass = self._get_target_class() + + config = klass.from_api_repr(resource) + + self.assertTrue(config.use_legacy_sql) + self.assertEqual( + config.default_dataset, + DatasetReference('someproject', 'somedataset')) + # Make sure unknown properties propagate. + self.assertEqual(config._properties['query'], 'no property for me') + self.assertEqual( + config._properties['someNewProperty'], 'I should be saved, too.') + + def test_to_api_repr_normal(self): + config = self._make_one() + config.use_legacy_sql = True + config.default_dataset = DatasetReference( + 'someproject', 'somedataset') + config._properties['someNewProperty'] = 'Woohoo, alpha stuff.' + + resource = config.to_api_repr() + + self.assertTrue(resource['useLegacySql']) + self.assertEqual( + resource['defaultDataset']['projectId'], 'someproject') + self.assertEqual( + resource['defaultDataset']['datasetId'], 'somedataset') + # Make sure unknown properties propagate. + self.assertEqual( + config._properties['someNewProperty'], 'Woohoo, alpha stuff.') + + class TestQueryJob(unittest.TestCase, _Base): JOB_TYPE = 'query' QUERY = 'select count(*) from persons' @@ -1639,17 +1702,19 @@ def _verifyResourceProperties(self, job, resource): def test_ctor_defaults(self): client = _Client(self.PROJECT) - job = self._make_one(self.JOB_NAME, self.QUERY, client) + job = self._make_one(self.JOB_ID, self.QUERY, client) self.assertEqual(job.query, self.QUERY) self.assertIs(job._client, client) self.assertEqual(job.job_type, self.JOB_TYPE) self.assertEqual( job.path, - '/projects/%s/jobs/%s' % (self.PROJECT, self.JOB_NAME)) + '/projects/%s/jobs/%s' % (self.PROJECT, self.JOB_ID)) self._verifyInitialReadonlyProperties(job) - # set/read from resource['configuration']['copy'] + self.assertFalse(job.use_legacy_sql) + + # set/read from resource['configuration']['query'] self.assertIsNone(job.allow_large_results) self.assertIsNone(job.create_disposition) self.assertIsNone(job.default_dataset) @@ -1657,7 +1722,6 @@ def test_ctor_defaults(self): self.assertIsNone(job.flatten_results) self.assertIsNone(job.priority) self.assertIsNone(job.use_query_cache) - self.assertIsNone(job.use_legacy_sql) self.assertIsNone(job.dry_run) self.assertIsNone(job.write_disposition) self.assertIsNone(job.maximum_billing_tier) @@ -1665,21 +1729,27 @@ def test_ctor_defaults(self): def test_ctor_w_udf_resources(self): from google.cloud.bigquery._helpers import UDFResource + from google.cloud.bigquery.job import QueryJobConfig RESOURCE_URI = 'gs://some-bucket/js/lib.js' udf_resources = [UDFResource("resourceUri", RESOURCE_URI)] client = _Client(self.PROJECT) - job = self._make_one(self.JOB_NAME, self.QUERY, client, - udf_resources=udf_resources) + config = QueryJobConfig() + config.udf_resources = udf_resources + job = self._make_one( + self.JOB_ID, self.QUERY, client, job_config=config) self.assertEqual(job.udf_resources, udf_resources) def test_ctor_w_query_parameters(self): from google.cloud.bigquery._helpers import ScalarQueryParameter + from google.cloud.bigquery.job import QueryJobConfig query_parameters = [ScalarQueryParameter("foo", 'INT64', 123)] client = _Client(self.PROJECT) - job = self._make_one(self.JOB_NAME, self.QUERY, client, - query_parameters=query_parameters) + config = QueryJobConfig() + config.query_parameters = query_parameters + job = self._make_one( + self.JOB_ID, self.QUERY, client, job_config=config) self.assertEqual(job.query_parameters, query_parameters) def test_from_api_repr_missing_identity(self): @@ -1697,7 +1767,7 @@ def test_from_api_repr_missing_config(self): 'id': '%s:%s' % (self.PROJECT, self.DS_ID), 'jobReference': { 'projectId': self.PROJECT, - 'jobId': self.JOB_NAME, + 'jobId': self.JOB_ID, } } klass = self._get_target_class() @@ -1711,7 +1781,7 @@ def test_from_api_repr_bare(self): 'id': self.JOB_ID, 'jobReference': { 'projectId': self.PROJECT, - 'jobId': self.JOB_NAME, + 'jobId': self.JOB_ID, }, 'configuration': { 'query': {'query': self.QUERY}, @@ -1740,7 +1810,7 @@ def test_from_api_repr_w_properties(self): def test_cancelled(self): client = _Client(self.PROJECT) - job = self._make_one(self.JOB_NAME, self.QUERY, client) + job = self._make_one(self.JOB_ID, self.QUERY, client) job._properties['status'] = { 'state': 'DONE', 'errorResult': { @@ -1780,7 +1850,7 @@ def test_query_plan(self): }], }] client = _Client(self.PROJECT) - job = self._make_one(self.JOB_NAME, self.QUERY, client) + job = self._make_one(self.JOB_ID, self.QUERY, client) self.assertEqual(job.query_plan, []) statistics = job._properties['statistics'] = {} @@ -1821,7 +1891,7 @@ def test_query_plan(self): def test_total_bytes_processed(self): total_bytes = 1234 client = _Client(self.PROJECT) - job = self._make_one(self.JOB_NAME, self.QUERY, client) + job = self._make_one(self.JOB_ID, self.QUERY, client) self.assertIsNone(job.total_bytes_processed) statistics = job._properties['statistics'] = {} @@ -1836,7 +1906,7 @@ def test_total_bytes_processed(self): def test_total_bytes_billed(self): total_bytes = 1234 client = _Client(self.PROJECT) - job = self._make_one(self.JOB_NAME, self.QUERY, client) + job = self._make_one(self.JOB_ID, self.QUERY, client) self.assertIsNone(job.total_bytes_billed) statistics = job._properties['statistics'] = {} @@ -1851,7 +1921,7 @@ def test_total_bytes_billed(self): def test_billing_tier(self): billing_tier = 1 client = _Client(self.PROJECT) - job = self._make_one(self.JOB_NAME, self.QUERY, client) + job = self._make_one(self.JOB_ID, self.QUERY, client) self.assertIsNone(job.billing_tier) statistics = job._properties['statistics'] = {} @@ -1865,7 +1935,7 @@ def test_billing_tier(self): def test_cache_hit(self): client = _Client(self.PROJECT) - job = self._make_one(self.JOB_NAME, self.QUERY, client) + job = self._make_one(self.JOB_ID, self.QUERY, client) self.assertIsNone(job.cache_hit) statistics = job._properties['statistics'] = {} @@ -1880,7 +1950,7 @@ def test_cache_hit(self): def test_num_dml_affected_rows(self): num_rows = 1234 client = _Client(self.PROJECT) - job = self._make_one(self.JOB_NAME, self.QUERY, client) + job = self._make_one(self.JOB_ID, self.QUERY, client) self.assertIsNone(job.num_dml_affected_rows) statistics = job._properties['statistics'] = {} @@ -1895,7 +1965,7 @@ def test_num_dml_affected_rows(self): def test_statement_type(self): statement_type = 'SELECT' client = _Client(self.PROJECT) - job = self._make_one(self.JOB_NAME, self.QUERY, client) + job = self._make_one(self.JOB_ID, self.QUERY, client) self.assertIsNone(job.statement_type) statistics = job._properties['statistics'] = {} @@ -1926,7 +1996,7 @@ def test_referenced_tables(self): 'tableId': 'other-table', }] client = _Client(self.PROJECT) - job = self._make_one(self.JOB_NAME, self.QUERY, client) + job = self._make_one(self.JOB_ID, self.QUERY, client) self.assertEqual(job.referenced_tables, []) statistics = job._properties['statistics'] = {} @@ -2001,7 +2071,7 @@ def test_undeclared_query_paramters(self): }, }] client = _Client(self.PROJECT) - job = self._make_one(self.JOB_NAME, self.QUERY, client) + job = self._make_one(self.JOB_ID, self.QUERY, client) self.assertEqual(job.undeclared_query_paramters, []) statistics = job._properties['statistics'] = {} @@ -2036,12 +2106,12 @@ def test_query_results(self): 'jobComplete': True, 'jobReference': { 'projectId': self.PROJECT, - 'jobId': self.JOB_NAME, + 'jobId': self.JOB_ID, }, } connection = _Connection(query_resource) client = _Client(self.PROJECT, connection=connection) - job = self._make_one(self.JOB_NAME, self.QUERY, client) + job = self._make_one(self.JOB_ID, self.QUERY, client) results = job.query_results() self.assertIsInstance(results, QueryResults) @@ -2049,11 +2119,11 @@ def test_query_results_w_cached_value(self): from google.cloud.bigquery.query import QueryResults client = _Client(self.PROJECT) - job = self._make_one(self.JOB_NAME, self.QUERY, client) + job = self._make_one(self.JOB_ID, self.QUERY, client) resource = { 'jobReference': { 'projectId': self.PROJECT, - 'jobId': self.JOB_NAME, + 'jobId': self.JOB_ID, }, } query_results = QueryResults(client, resource) @@ -2068,7 +2138,7 @@ def test_result(self): 'jobComplete': True, 'jobReference': { 'projectId': self.PROJECT, - 'jobId': self.JOB_NAME, + 'jobId': self.JOB_ID, }, } connection = _Connection(query_resource, query_resource) @@ -2086,7 +2156,7 @@ def test_result_invokes_begins(self): 'jobComplete': False, 'jobReference': { 'projectId': self.PROJECT, - 'jobId': self.JOB_NAME, + 'jobId': self.JOB_ID, }, } query_resource = copy.deepcopy(incomplete_resource) @@ -2097,7 +2167,7 @@ def test_result_invokes_begins(self): begun_resource, incomplete_resource, query_resource, done_resource, query_resource) client = _Client(self.PROJECT, connection=connection) - job = self._make_one(self.JOB_NAME, self.QUERY, client) + job = self._make_one(self.JOB_ID, self.QUERY, client) job.result() @@ -2111,7 +2181,7 @@ def test_result_error(self): from google.cloud import exceptions client = _Client(self.PROJECT) - job = self._make_one(self.JOB_NAME, self.QUERY, client) + job = self._make_one(self.JOB_ID, self.QUERY, client) error_result = { 'debugInfo': 'DEBUG', 'location': 'LOCATION', @@ -2133,6 +2203,7 @@ def test_result_error(self): def test_begin_w_bound_client(self): from google.cloud.bigquery.dataset import DatasetReference + from google.cloud.bigquery.job import QueryJobConfig PATH = '/projects/%s/jobs' % (self.PROJECT,) DS_ID = 'DATASET' @@ -2145,8 +2216,10 @@ def test_begin_w_bound_client(self): conn = _Connection(RESOURCE) client = _Client(project=self.PROJECT, connection=conn) - job = self._make_one(self.JOB_NAME, self.QUERY, client) - job.default_dataset = DatasetReference(self.PROJECT, DS_ID) + config = QueryJobConfig() + config.default_dataset = DatasetReference(self.PROJECT, DS_ID) + job = self._make_one( + self.JOB_ID, self.QUERY, client, job_config=config) job.begin() @@ -2159,11 +2232,12 @@ def test_begin_w_bound_client(self): SENT = { 'jobReference': { 'projectId': self.PROJECT, - 'jobId': self.JOB_NAME, + 'jobId': self.JOB_ID, }, 'configuration': { 'query': { 'query': self.QUERY, + 'useLegacySql': False, 'defaultDataset': { 'projectId': self.PROJECT, 'datasetId': DS_ID, @@ -2171,11 +2245,12 @@ def test_begin_w_bound_client(self): }, }, } - self.assertEqual(req['data'], SENT) self._verifyResourceProperties(job, RESOURCE) + self.assertEqual(req['data'], SENT) def test_begin_w_alternate_client(self): from google.cloud.bigquery.dataset import DatasetReference + from google.cloud.bigquery.job import QueryJobConfig PATH = '/projects/%s/jobs' % (self.PROJECT,) TABLE = 'TABLE' @@ -2203,28 +2278,29 @@ def test_begin_w_alternate_client(self): 'maximumBytesBilled': '123456' } RESOURCE['configuration']['query'] = QUERY_CONFIGURATION + RESOURCE['configuration']['dryRun'] = True conn1 = _Connection() client1 = _Client(project=self.PROJECT, connection=conn1) conn2 = _Connection(RESOURCE) client2 = _Client(project=self.PROJECT, connection=conn2) - job = self._make_one(self.JOB_NAME, self.QUERY, client1) - dataset_ref = DatasetReference(self.PROJECT, DS_ID) table_ref = dataset_ref.table(TABLE) - job.allow_large_results = True - job.create_disposition = 'CREATE_NEVER' - job.default_dataset = dataset_ref - job.destination = table_ref - job.flatten_results = True - job.priority = 'INTERACTIVE' - job.use_query_cache = True - job.use_legacy_sql = True - job.dry_run = True - RESOURCE['configuration']['dryRun'] = True - job.write_disposition = 'WRITE_TRUNCATE' - job.maximum_billing_tier = 4 - job.maximum_bytes_billed = 123456 + config = QueryJobConfig() + config.allow_large_results = True + config.create_disposition = 'CREATE_NEVER' + config.default_dataset = dataset_ref + config.destination = table_ref + config.dry_run = True + config.flatten_results = True + config.maximum_billing_tier = 4 + config.priority = 'INTERACTIVE' + config.use_legacy_sql = True + config.use_query_cache = True + config.write_disposition = 'WRITE_TRUNCATE' + config.maximum_bytes_billed = 123456 + job = self._make_one( + self.JOB_ID, self.QUERY, client1, job_config=config) job.begin(client=client2) @@ -2236,18 +2312,19 @@ def test_begin_w_alternate_client(self): SENT = { 'jobReference': { 'projectId': self.PROJECT, - 'jobId': self.JOB_NAME, + 'jobId': self.JOB_ID, }, 'configuration': { 'dryRun': True, 'query': QUERY_CONFIGURATION, }, } - self.assertEqual(req['data'], SENT) self._verifyResourceProperties(job, RESOURCE) + self.assertEqual(req['data'], SENT) def test_begin_w_udf(self): from google.cloud.bigquery._helpers import UDFResource + from google.cloud.bigquery.job import QueryJobConfig RESOURCE_URI = 'gs://some-bucket/js/lib.js' INLINE_UDF_CODE = 'var someCode = "here";' @@ -2268,8 +2345,11 @@ def test_begin_w_udf(self): UDFResource("resourceUri", RESOURCE_URI), UDFResource("inlineCode", INLINE_UDF_CODE), ] - job = self._make_one(self.JOB_NAME, self.QUERY, client, - udf_resources=udf_resources) + config = QueryJobConfig() + config.udf_resources = udf_resources + config.use_legacy_sql = True + job = self._make_one( + self.JOB_ID, self.QUERY, client, job_config=config) job.begin() @@ -2281,11 +2361,12 @@ def test_begin_w_udf(self): SENT = { 'jobReference': { 'projectId': self.PROJECT, - 'jobId': self.JOB_NAME, + 'jobId': self.JOB_ID, }, 'configuration': { 'query': { 'query': self.QUERY, + 'useLegacySql': True, 'userDefinedFunctionResources': [ {'resourceUri': RESOURCE_URI}, {'inlineCode': INLINE_UDF_CODE}, @@ -2293,11 +2374,12 @@ def test_begin_w_udf(self): }, }, } - self.assertEqual(req['data'], SENT) self._verifyResourceProperties(job, RESOURCE) + self.assertEqual(req['data'], SENT) def test_begin_w_named_query_parameter(self): from google.cloud.bigquery._helpers import ScalarQueryParameter + from google.cloud.bigquery.job import QueryJobConfig query_parameters = [ScalarQueryParameter('foo', 'INT64', 123)] PATH = '/projects/%s/jobs' % (self.PROJECT,) @@ -2322,8 +2404,10 @@ def test_begin_w_named_query_parameter(self): ] conn = _Connection(RESOURCE) client = _Client(project=self.PROJECT, connection=conn) - job = self._make_one(self.JOB_NAME, self.QUERY, client, - query_parameters=query_parameters) + jconfig = QueryJobConfig() + jconfig.query_parameters = query_parameters + job = self._make_one( + self.JOB_ID, self.QUERY, client, job_config=jconfig) job.begin() @@ -2335,21 +2419,23 @@ def test_begin_w_named_query_parameter(self): SENT = { 'jobReference': { 'projectId': self.PROJECT, - 'jobId': self.JOB_NAME, + 'jobId': self.JOB_ID, }, 'configuration': { 'query': { 'query': self.QUERY, + 'useLegacySql': False, 'parameterMode': 'NAMED', 'queryParameters': config['queryParameters'], }, }, } - self.assertEqual(req['data'], SENT) self._verifyResourceProperties(job, RESOURCE) + self.assertEqual(req['data'], SENT) def test_begin_w_positional_query_parameter(self): from google.cloud.bigquery._helpers import ScalarQueryParameter + from google.cloud.bigquery.job import QueryJobConfig query_parameters = [ScalarQueryParameter.positional('INT64', 123)] PATH = '/projects/%s/jobs' % (self.PROJECT,) @@ -2373,8 +2459,10 @@ def test_begin_w_positional_query_parameter(self): ] conn = _Connection(RESOURCE) client = _Client(project=self.PROJECT, connection=conn) - job = self._make_one(self.JOB_NAME, self.QUERY, client, - query_parameters=query_parameters) + jconfig = QueryJobConfig() + jconfig.query_parameters = query_parameters + job = self._make_one( + self.JOB_ID, self.QUERY, client, job_config=jconfig) job.begin() @@ -2386,20 +2474,23 @@ def test_begin_w_positional_query_parameter(self): SENT = { 'jobReference': { 'projectId': self.PROJECT, - 'jobId': self.JOB_NAME, + 'jobId': self.JOB_ID, }, 'configuration': { 'query': { 'query': self.QUERY, + 'useLegacySql': False, 'parameterMode': 'POSITIONAL', 'queryParameters': config['queryParameters'], }, }, } - self.assertEqual(req['data'], SENT) self._verifyResourceProperties(job, RESOURCE) + self.assertEqual(req['data'], SENT) def test_dry_run_query(self): + from google.cloud.bigquery.job import QueryJobConfig + PATH = '/projects/%s/jobs' % (self.PROJECT,) RESOURCE = self._makeResource() # Ensure None for missing server-set props @@ -2407,11 +2498,13 @@ def test_dry_run_query(self): del RESOURCE['etag'] del RESOURCE['selfLink'] del RESOURCE['user_email'] + RESOURCE['configuration']['dryRun'] = True conn = _Connection(RESOURCE) client = _Client(project=self.PROJECT, connection=conn) - job = self._make_one(self.JOB_NAME, self.QUERY, client) - job.dry_run = True - RESOURCE['configuration']['dryRun'] = True + config = QueryJobConfig() + config.dry_run = True + job = self._make_one( + self.JOB_ID, self.QUERY, client, job_config=config) job.begin() self.assertEqual(job.udf_resources, []) @@ -2422,23 +2515,24 @@ def test_dry_run_query(self): SENT = { 'jobReference': { 'projectId': self.PROJECT, - 'jobId': self.JOB_NAME, + 'jobId': self.JOB_ID, }, 'configuration': { 'query': { - 'query': self.QUERY + 'query': self.QUERY, + 'useLegacySql': False, }, 'dryRun': True, }, } - self.assertEqual(req['data'], SENT) self._verifyResourceProperties(job, RESOURCE) + self.assertEqual(req['data'], SENT) def test_exists_miss_w_bound_client(self): - PATH = '/projects/%s/jobs/%s' % (self.PROJECT, self.JOB_NAME) + PATH = '/projects/%s/jobs/%s' % (self.PROJECT, self.JOB_ID) conn = _Connection() client = _Client(project=self.PROJECT, connection=conn) - job = self._make_one(self.JOB_NAME, self.QUERY, client) + job = self._make_one(self.JOB_ID, self.QUERY, client) self.assertFalse(job.exists()) @@ -2449,12 +2543,12 @@ def test_exists_miss_w_bound_client(self): self.assertEqual(req['query_params'], {'fields': 'id'}) def test_exists_hit_w_alternate_client(self): - PATH = '/projects/%s/jobs/%s' % (self.PROJECT, self.JOB_NAME) + PATH = '/projects/%s/jobs/%s' % (self.PROJECT, self.JOB_ID) conn1 = _Connection() client1 = _Client(project=self.PROJECT, connection=conn1) conn2 = _Connection({}) client2 = _Client(project=self.PROJECT, connection=conn2) - job = self._make_one(self.JOB_NAME, self.QUERY, client1) + job = self._make_one(self.JOB_ID, self.QUERY, client1) self.assertTrue(job.exists(client=client2)) @@ -2467,18 +2561,19 @@ def test_exists_hit_w_alternate_client(self): def test_reload_w_bound_client(self): from google.cloud.bigquery.dataset import DatasetReference + from google.cloud.bigquery.job import QueryJobConfig - PATH = '/projects/%s/jobs/%s' % (self.PROJECT, self.JOB_NAME) + PATH = '/projects/%s/jobs/%s' % (self.PROJECT, self.JOB_ID) DS_ID = 'DATASET' DEST_TABLE = 'dest_table' RESOURCE = self._makeResource() conn = _Connection(RESOURCE) client = _Client(project=self.PROJECT, connection=conn) - job = self._make_one(self.JOB_NAME, None, client) - dataset_ref = DatasetReference(self.PROJECT, DS_ID) table_ref = dataset_ref.table(DEST_TABLE) - job.destination = table_ref + config = QueryJobConfig() + config.destination = table_ref + job = self._make_one(self.JOB_ID, None, client, job_config=config) job.reload() @@ -2491,7 +2586,7 @@ def test_reload_w_bound_client(self): self._verifyResourceProperties(job, RESOURCE) def test_reload_w_alternate_client(self): - PATH = '/projects/%s/jobs/%s' % (self.PROJECT, self.JOB_NAME) + PATH = '/projects/%s/jobs/%s' % (self.PROJECT, self.JOB_ID) DS_ID = 'DATASET' DEST_TABLE = 'dest_table' RESOURCE = self._makeResource() @@ -2505,7 +2600,7 @@ def test_reload_w_alternate_client(self): client1 = _Client(project=self.PROJECT, connection=conn1) conn2 = _Connection(RESOURCE) client2 = _Client(project=self.PROJECT, connection=conn2) - job = self._make_one(self.JOB_NAME, self.QUERY, client1) + job = self._make_one(self.JOB_ID, self.QUERY, client1) job.reload(client=client2) From 75efa10ebc0ce1079a3c9f158afc23bc0ee18acd Mon Sep 17 00:00:00 2001 From: Jonathan Amsterdam Date: Mon, 9 Oct 2017 17:10:08 -0400 Subject: [PATCH 0302/2016] bigquery: add Client.load_table_from_file (#4136) Move the method from Table to Client. --- .../google/cloud/bigquery/client.py | 221 +++++++- .../google/cloud/bigquery/job.py | 10 +- .../google/cloud/bigquery/table.py | 410 --------------- .../google-cloud-bigquery/tests/system.py | 50 +- .../tests/unit/test_client.py | 475 +++++++++++++++++ .../tests/unit/test_table.py | 497 ------------------ 6 files changed, 726 insertions(+), 937 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py index f460202a3631..ce41824996bd 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py @@ -17,11 +17,17 @@ from __future__ import absolute_import import collections +import os import uuid import six +from google import resumable_media +from google.resumable_media.requests import MultipartUpload +from google.resumable_media.requests import ResumableUpload + from google.api.core import page_iterator +from google.cloud import exceptions from google.cloud.client import ClientWithProject from google.cloud.bigquery._http import Connection from google.cloud.bigquery.dataset import Dataset @@ -37,6 +43,20 @@ from google.cloud.bigquery._helpers import _rows_page_start +_DEFAULT_CHUNKSIZE = 1048576 # 1024 * 1024 B = 1 MB +_MAX_MULTIPART_SIZE = 5 * 1024 * 1024 +_DEFAULT_NUM_RETRIES = 6 +_BASE_UPLOAD_TEMPLATE = ( + u'https://www.googleapis.com/upload/bigquery/v2/projects/' + u'{project}/jobs?uploadType=') +_MULTIPART_URL_TEMPLATE = _BASE_UPLOAD_TEMPLATE + u'multipart' +_RESUMABLE_URL_TEMPLATE = _BASE_UPLOAD_TEMPLATE + u'resumable' +_GENERIC_CONTENT_TYPE = u'*/*' +_READ_LESS_THAN_SIZE = ( + 'Size {:d} was specified but the file-like object only had ' + '{:d} bytes remaining.') + + class Project(object): """Wrapper for resource describing a BigQuery project. @@ -535,7 +555,7 @@ def load_table_from_storage(self, source_uris, destination, :param destination: Table into which data is to be loaded. :type job_id: str - :param job_id: Name of the job. + :param job_id: (Optional) Name of the job. :type job_config: :class:`google.cloud.bigquery.job.LoadJobConfig` :param job_config: (Optional) Extra configuration options for the job. @@ -550,6 +570,171 @@ def load_table_from_storage(self, source_uris, destination, job.begin() return job + def load_table_from_file(self, file_obj, destination, + rewind=False, + size=None, + num_retries=_DEFAULT_NUM_RETRIES, + job_id=None, job_config=None): + """Upload the contents of this table from a file-like object. + + Like load_table_from_storage, this creates, starts and returns + a ``LoadJob``. + + :type file_obj: file + :param file_obj: A file handle opened in binary mode for reading. + + :type destination: :class:`google.cloud.bigquery.table.TableReference` + :param destination: Table into which data is to be loaded. + + :type rewind: bool + :param rewind: If True, seek to the beginning of the file handle before + reading the file. + + :type size: int + :param size: The number of bytes to read from the file handle. + If size is ``None`` or large, resumable upload will be + used. Otherwise, multipart upload will be used. + + :type num_retries: int + :param num_retries: Number of upload retries. Defaults to 6. + + :type job_id: str + :param job_id: (Optional) Name of the job. + + :type job_config: :class:`google.cloud.bigquery.job.LoadJobConfig` + :param job_config: (Optional) Extra configuration options for the job. + + :rtype: :class:`~google.cloud.bigquery.jobs.LoadJob` + + :returns: the job instance used to load the data (e.g., for + querying status). Note that the job is already started: + do not call ``job.begin()``. + :raises: :class:`ValueError` if ``size`` is not passed in and can not + be determined, or if the ``file_obj`` can be detected to be + a file opened in text mode. + """ + job_id = _make_job_id(job_id) + job = LoadJob(job_id, None, destination, self, job_config) + job_resource = job._build_resource() + if rewind: + file_obj.seek(0, os.SEEK_SET) + _check_mode(file_obj) + try: + if size is None or size >= _MAX_MULTIPART_SIZE: + response = self._do_resumable_upload( + file_obj, job_resource, num_retries) + else: + response = self._do_multipart_upload( + file_obj, job_resource, size, num_retries) + except resumable_media.InvalidResponse as exc: + raise exceptions.from_http_response(exc.response) + return self.job_from_resource(response.json()) + + def _do_resumable_upload(self, stream, metadata, num_retries): + """Perform a resumable upload. + + :type stream: IO[bytes] + :param stream: A bytes IO object open for reading. + + :type metadata: dict + :param metadata: The metadata associated with the upload. + + :type num_retries: int + :param num_retries: Number of upload retries. (Deprecated: This + argument will be removed in a future release.) + + :rtype: :class:`~requests.Response` + :returns: The "200 OK" response object returned after the final chunk + is uploaded. + """ + upload, transport = self._initiate_resumable_upload( + stream, metadata, num_retries) + + while not upload.finished: + response = upload.transmit_next_chunk(transport) + + return response + + def _initiate_resumable_upload(self, stream, metadata, num_retries): + """Initiate a resumable upload. + + :type stream: IO[bytes] + :param stream: A bytes IO object open for reading. + + :type metadata: dict + :param metadata: The metadata associated with the upload. + + :type num_retries: int + :param num_retries: Number of upload retries. (Deprecated: This + argument will be removed in a future release.) + + :rtype: tuple + :returns: + Pair of + + * The :class:`~google.resumable_media.requests.ResumableUpload` + that was created + * The ``transport`` used to initiate the upload. + """ + chunk_size = _DEFAULT_CHUNKSIZE + transport = self._http + headers = _get_upload_headers(self._connection.USER_AGENT) + upload_url = _RESUMABLE_URL_TEMPLATE.format(project=self.project) + upload = ResumableUpload(upload_url, chunk_size, headers=headers) + + if num_retries is not None: + upload._retry_strategy = resumable_media.RetryStrategy( + max_retries=num_retries) + + upload.initiate( + transport, stream, metadata, _GENERIC_CONTENT_TYPE, + stream_final=False) + + return upload, transport + + def _do_multipart_upload(self, stream, metadata, size, num_retries): + """Perform a multipart upload. + + :type stream: IO[bytes] + :param stream: A bytes IO object open for reading. + + :type metadata: dict + :param metadata: The metadata associated with the upload. + + :type size: int + :param size: The number of bytes to be uploaded (which will be read + from ``stream``). If not provided, the upload will be + concluded once ``stream`` is exhausted (or :data:`None`). + + :type num_retries: int + :param num_retries: Number of upload retries. (Deprecated: This + argument will be removed in a future release.) + + :rtype: :class:`~requests.Response` + :returns: The "200 OK" response object returned after the multipart + upload request. + :raises: :exc:`ValueError` if the ``stream`` has fewer than ``size`` + bytes remaining. + """ + data = stream.read(size) + if len(data) < size: + msg = _READ_LESS_THAN_SIZE.format(size, len(data)) + raise ValueError(msg) + + headers = _get_upload_headers(self._connection.USER_AGENT) + + upload_url = _MULTIPART_URL_TEMPLATE.format(project=self.project) + upload = MultipartUpload(upload_url, headers=headers) + + if num_retries is not None: + upload._retry_strategy = resumable_media.RetryStrategy( + max_retries=num_retries) + + response = upload.transmit( + self._http, data, metadata, _GENERIC_CONTENT_TYPE) + + return response + def copy_table(self, sources, destination, job_id=None, job_config=None): """Start a job for copying one or more tables into another table. @@ -832,3 +1017,37 @@ def _make_job_id(job_id): if job_id is None: return str(uuid.uuid4()) return job_id + + +def _check_mode(stream): + """Check that a stream was opened in read-binary mode. + + :type stream: IO[bytes] + :param stream: A bytes IO object open for reading. + + :raises: :exc:`ValueError` if the ``stream.mode`` is a valid attribute + and is not among ``rb``, ``r+b`` or ``rb+``. + """ + mode = getattr(stream, 'mode', None) + + if mode is not None and mode not in ('rb', 'r+b', 'rb+'): + raise ValueError( + "Cannot upload files opened in text mode: use " + "open(filename, mode='rb') or open(filename, mode='r+b')") + + +def _get_upload_headers(user_agent): + """Get the headers for an upload request. + + :type user_agent: str + :param user_agent: The user-agent for requests. + + :rtype: dict + :returns: The headers to be used for the request. + """ + return { + 'Accept': 'application/json', + 'Accept-Encoding': 'gzip, deflate', + 'User-Agent': user_agent, + 'content-type': 'application/json', + } diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/job.py b/packages/google-cloud-bigquery/google/cloud/bigquery/job.py index af3b1997f177..4f9c005a883e 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/job.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/job.py @@ -684,11 +684,11 @@ class LoadJob(_AsyncJob): :type job_id: str :param job_id: the job's ID - :type source_uris: sequence of string + :type source_uris: sequence of string or ``NoneType`` :param source_uris: URIs of one or more data files to be loaded. See https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.load.sourceUris - for supported URI formats. + for supported URI formats. Pass None for jobs that load from a file. :type destination: :class:`google.cloud.bigquery.table.TableReference` :param destination: reference to table into which data is to be loaded. @@ -856,7 +856,8 @@ def output_rows(self): def _build_resource(self): """Generate a resource for :meth:`begin`.""" configuration = self._configuration.to_api_repr() - configuration['sourceUris'] = self.source_uris + if self.source_uris is not None: + configuration['sourceUris'] = self.source_uris configuration['destinationTable'] = self.destination.to_api_repr() return { @@ -898,8 +899,7 @@ def from_api_repr(cls, resource, client): ds_ref = DatasetReference(dest_config['projectId'], dest_config['datasetId'],) destination = TableReference(ds_ref, dest_config['tableId']) - # TODO(jba): sourceUris should not be absent if there are no LoadJobs - # for file uploads. + # sourceUris will be absent if this is a file upload. source_uris = config_resource.get('sourceUris') job = cls(job_id, source_uris, destination, client, config) job._set_properties(resource) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py index e4814ae16c8e..8f56dffd18bf 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py @@ -17,14 +17,9 @@ from __future__ import absolute_import import datetime -import os import six -from google import resumable_media -from google.resumable_media.requests import MultipartUpload -from google.resumable_media.requests import ResumableUpload - from google.cloud import exceptions from google.cloud._helpers import _datetime_from_microseconds from google.cloud._helpers import _millis_from_datetime @@ -34,17 +29,6 @@ _TABLE_HAS_NO_SCHEMA = "Table has no schema: call 'client.get_table()'" _MARKER = object() -_DEFAULT_CHUNKSIZE = 1048576 # 1024 * 1024 B = 1 MB -_BASE_UPLOAD_TEMPLATE = ( - u'https://www.googleapis.com/upload/bigquery/v2/projects/' - u'{project}/jobs?uploadType=') -_MULTIPART_URL_TEMPLATE = _BASE_UPLOAD_TEMPLATE + u'multipart' -_RESUMABLE_URL_TEMPLATE = _BASE_UPLOAD_TEMPLATE + u'resumable' -_GENERIC_CONTENT_TYPE = u'*/*' -_READ_LESS_THAN_SIZE = ( - 'Size {:d} was specified but the file-like object only had ' - '{:d} bytes remaining.') -_DEFAULT_NUM_RETRIES = 6 class TableReference(object): @@ -826,353 +810,6 @@ def insert_data(self, return errors - def _get_transport(self, client): - """Return the client's transport. - - :type client: :class:`~google.cloud.bigquery.client.Client` - :param client: The client to use. - - :rtype transport: - :class:`~google.auth.transport.requests.AuthorizedSession` - :returns: The transport (with credentials) that will - make authenticated requests. - """ - return client._http - - def _initiate_resumable_upload(self, client, stream, - metadata, num_retries): - """Initiate a resumable upload. - - :type client: :class:`~google.cloud.bigquery.client.Client` - :param client: The client to use. - - :type stream: IO[bytes] - :param stream: A bytes IO object open for reading. - - :type metadata: dict - :param metadata: The metadata associated with the upload. - - :type num_retries: int - :param num_retries: Number of upload retries. (Deprecated: This - argument will be removed in a future release.) - - :rtype: tuple - :returns: - Pair of - - * The :class:`~google.resumable_media.requests.ResumableUpload` - that was created - * The ``transport`` used to initiate the upload. - """ - chunk_size = _DEFAULT_CHUNKSIZE - transport = self._get_transport(client) - headers = _get_upload_headers(client._connection.USER_AGENT) - upload_url = _RESUMABLE_URL_TEMPLATE.format(project=self.project) - upload = ResumableUpload(upload_url, chunk_size, headers=headers) - - if num_retries is not None: - upload._retry_strategy = resumable_media.RetryStrategy( - max_retries=num_retries) - - upload.initiate( - transport, stream, metadata, _GENERIC_CONTENT_TYPE, - stream_final=False) - - return upload, transport - - def _do_resumable_upload(self, client, stream, metadata, num_retries): - """Perform a resumable upload. - - :type client: :class:`~google.cloud.bigquery.client.Client` - :param client: The client to use. - - :type stream: IO[bytes] - :param stream: A bytes IO object open for reading. - - :type metadata: dict - :param metadata: The metadata associated with the upload. - - :type num_retries: int - :param num_retries: Number of upload retries. (Deprecated: This - argument will be removed in a future release.) - - :rtype: :class:`~requests.Response` - :returns: The "200 OK" response object returned after the final chunk - is uploaded. - """ - upload, transport = self._initiate_resumable_upload( - client, stream, metadata, num_retries) - - while not upload.finished: - response = upload.transmit_next_chunk(transport) - - return response - - def _do_multipart_upload(self, client, stream, metadata, - size, num_retries): - """Perform a multipart upload. - - :type client: :class:`~google.cloud.bigquery.client.Client` - :param client: The client to use. - - :type stream: IO[bytes] - :param stream: A bytes IO object open for reading. - - :type metadata: dict - :param metadata: The metadata associated with the upload. - - :type size: int - :param size: The number of bytes to be uploaded (which will be read - from ``stream``). If not provided, the upload will be - concluded once ``stream`` is exhausted (or :data:`None`). - - :type num_retries: int - :param num_retries: Number of upload retries. (Deprecated: This - argument will be removed in a future release.) - - :rtype: :class:`~requests.Response` - :returns: The "200 OK" response object returned after the multipart - upload request. - :raises: :exc:`ValueError` if the ``stream`` has fewer than ``size`` - bytes remaining. - """ - data = stream.read(size) - if len(data) < size: - msg = _READ_LESS_THAN_SIZE.format(size, len(data)) - raise ValueError(msg) - - transport = self._get_transport(client) - headers = _get_upload_headers(client._connection.USER_AGENT) - - upload_url = _MULTIPART_URL_TEMPLATE.format(project=self.project) - upload = MultipartUpload(upload_url, headers=headers) - - if num_retries is not None: - upload._retry_strategy = resumable_media.RetryStrategy( - max_retries=num_retries) - - response = upload.transmit( - transport, data, metadata, _GENERIC_CONTENT_TYPE) - - return response - - def _do_upload(self, client, stream, metadata, size, num_retries): - """Determine an upload strategy and then perform the upload. - - If ``size`` is :data:`None`, then a resumable upload will be used, - otherwise the content and the metadata will be uploaded - in a single multipart upload request. - - :type client: :class:`~google.cloud.bigquery.client.Client` - :param client: The client to use. - - :type stream: IO[bytes] - :param stream: A bytes IO object open for reading. - - :type metadata: dict - :param metadata: The metadata associated with the upload. - - :type size: int - :param size: The number of bytes to be uploaded (which will be read - from ``stream``). If not provided, the upload will be - concluded once ``stream`` is exhausted (or :data:`None`). - - :type num_retries: int - :param num_retries: Number of upload retries. (Deprecated: This - argument will be removed in a future release.) - - :rtype: dict - :returns: The parsed JSON from the "200 OK" response. This will be the - **only** response in the multipart case and it will be the - **final** response in the resumable case. - """ - if size is None: - response = self._do_resumable_upload( - client, stream, metadata, num_retries) - else: - response = self._do_multipart_upload( - client, stream, metadata, size, num_retries) - - return response.json() - - # pylint: disable=too-many-arguments,too-many-locals - def upload_from_file(self, - file_obj, - source_format, - rewind=False, - size=None, - num_retries=_DEFAULT_NUM_RETRIES, - allow_jagged_rows=None, - allow_quoted_newlines=None, - create_disposition=None, - encoding=None, - field_delimiter=None, - ignore_unknown_values=None, - max_bad_records=None, - quote_character=None, - skip_leading_rows=None, - write_disposition=None, - client=None, - job_name=None, - null_marker=None): - """Upload the contents of this table from a file-like object. - - :type file_obj: file - :param file_obj: A file handle opened in binary mode for reading. - - :type source_format: str - :param source_format: Any supported format. The full list of supported - formats is documented under the - ``configuration.extract.destinationFormat`` property on this page: - https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs - - :type rewind: bool - :param rewind: If True, seek to the beginning of the file handle before - writing the file. - - :type size: int - :param size: The number of bytes to read from the file handle. - If not provided, we'll try to guess the size using - :func:`os.fstat`. (If the file handle is not from the - filesystem this won't be possible.) - - :type num_retries: int - :param num_retries: Number of upload retries. Defaults to 6. - - :type allow_jagged_rows: bool - :param allow_jagged_rows: job configuration option; see - :meth:`google.cloud.bigquery.job.LoadJob`. - - :type allow_quoted_newlines: bool - :param allow_quoted_newlines: job configuration option; see - :meth:`google.cloud.bigquery.job.LoadJob`. - - :type create_disposition: str - :param create_disposition: job configuration option; see - :meth:`google.cloud.bigquery.job.LoadJob`. - - :type encoding: str - :param encoding: job configuration option; see - :meth:`google.cloud.bigquery.job.LoadJob`. - - :type field_delimiter: str - :param field_delimiter: job configuration option; see - :meth:`google.cloud.bigquery.job.LoadJob`. - - :type ignore_unknown_values: bool - :param ignore_unknown_values: job configuration option; see - :meth:`google.cloud.bigquery.job.LoadJob`. - - :type max_bad_records: int - :param max_bad_records: job configuration option; see - :meth:`google.cloud.bigquery.job.LoadJob`. - - :type quote_character: str - :param quote_character: job configuration option; see - :meth:`google.cloud.bigquery.job.LoadJob`. - - :type skip_leading_rows: int - :param skip_leading_rows: job configuration option; see - :meth:`google.cloud.bigquery.job.LoadJob`. - - :type write_disposition: str - :param write_disposition: job configuration option; see - :meth:`google.cloud.bigquery.job.LoadJob`. - - :type client: :class:`~google.cloud.bigquery.client.Client` - :param client: (Optional) The client to use. If not passed, falls back - to the ``client`` stored on the current table. - - :type job_name: str - :param job_name: Optional. The id of the job. Generated if not - explicitly passed in. - - :type null_marker: str - :param null_marker: Optional. A custom null marker (example: "\\N") - - :rtype: :class:`~google.cloud.bigquery.jobs.LoadJob` - - :returns: the job instance used to load the data (e.g., for - querying status). Note that the job is already started: - do not call ``job.begin()``. - :raises: :class:`ValueError` if ``size`` is not passed in and can not - be determined, or if the ``file_obj`` can be detected to be - a file opened in text mode. - """ - client = self._require_client(client) - _maybe_rewind(file_obj, rewind=rewind) - _check_mode(file_obj) - metadata = _get_upload_metadata( - source_format, self._schema, self._project, - self._dataset_id, self.table_id) - _configure_job_metadata(metadata, allow_jagged_rows, - allow_quoted_newlines, create_disposition, - encoding, field_delimiter, - ignore_unknown_values, max_bad_records, - quote_character, skip_leading_rows, - write_disposition, job_name, null_marker) - - try: - created_json = self._do_upload( - client, file_obj, metadata, size, num_retries) - return client.job_from_resource(created_json) - except resumable_media.InvalidResponse as exc: - raise exceptions.from_http_response(exc.response) - # pylint: enable=too-many-arguments,too-many-locals - - -def _configure_job_metadata(metadata, # pylint: disable=too-many-arguments - allow_jagged_rows, - allow_quoted_newlines, - create_disposition, - encoding, - field_delimiter, - ignore_unknown_values, - max_bad_records, - quote_character, - skip_leading_rows, - write_disposition, - job_name, - null_marker): - """Helper for :meth:`Table.upload_from_file`.""" - load_config = metadata['configuration']['load'] - - if allow_jagged_rows is not None: - load_config['allowJaggedRows'] = allow_jagged_rows - - if allow_quoted_newlines is not None: - load_config['allowQuotedNewlines'] = allow_quoted_newlines - - if create_disposition is not None: - load_config['createDisposition'] = create_disposition - - if encoding is not None: - load_config['encoding'] = encoding - - if field_delimiter is not None: - load_config['fieldDelimiter'] = field_delimiter - - if ignore_unknown_values is not None: - load_config['ignoreUnknownValues'] = ignore_unknown_values - - if max_bad_records is not None: - load_config['maxBadRecords'] = max_bad_records - - if quote_character is not None: - load_config['quote'] = quote_character - - if skip_leading_rows is not None: - load_config['skipLeadingRows'] = skip_leading_rows - - if write_disposition is not None: - load_config['writeDisposition'] = write_disposition - - if job_name is not None: - load_config['jobReference'] = {'jobId': job_name} - - if null_marker is not None: - load_config['nullMarker'] = null_marker - def _parse_schema_resource(info): """Parse a resource fragment into a schema field. @@ -1222,53 +859,6 @@ def _build_schema_resource(fields): # pylint: enable=unused-argument -def _maybe_rewind(stream, rewind=False): - """Rewind the stream if desired. - - :type stream: IO[bytes] - :param stream: A bytes IO object open for reading. - - :type rewind: bool - :param rewind: Indicates if we should seek to the beginning of the stream. - """ - if rewind: - stream.seek(0, os.SEEK_SET) - - -def _check_mode(stream): - """Check that a stream was opened in read-binary mode. - - :type stream: IO[bytes] - :param stream: A bytes IO object open for reading. - - :raises: :exc:`ValueError` if the ``stream.mode`` is a valid attribute - and is not among ``rb``, ``r+b`` or ``rb+``. - """ - mode = getattr(stream, 'mode', None) - - if mode is not None and mode not in ('rb', 'r+b', 'rb+'): - raise ValueError( - "Cannot upload files opened in text mode: use " - "open(filename, mode='rb') or open(filename, mode='r+b')") - - -def _get_upload_headers(user_agent): - """Get the headers for an upload request. - - :type user_agent: str - :param user_agent: The user-agent for requests. - - :rtype: dict - :returns: The headers to be used for the request. - """ - return { - 'Accept': 'application/json', - 'Accept-Encoding': 'gzip, deflate', - 'User-Agent': user_agent, - 'content-type': 'application/json', - } - - def _get_upload_metadata(source_format, schema, project, dataset_id, table_id): """Get base metadata for creating a table. diff --git a/packages/google-cloud-bigquery/tests/system.py b/packages/google-cloud-bigquery/tests/system.py index ce49e88177e7..1bf00a9b57ef 100644 --- a/packages/google-cloud-bigquery/tests/system.py +++ b/packages/google-cloud-bigquery/tests/system.py @@ -372,7 +372,8 @@ def test_load_table_from_local_file_then_dump_table(self): full_name = bigquery.SchemaField('full_name', 'STRING', mode='REQUIRED') age = bigquery.SchemaField('age', 'INTEGER', mode='REQUIRED') - table_arg = Table(dataset.table(TABLE_NAME), schema=[full_name, age], + table_ref = dataset.table(TABLE_NAME) + table_arg = Table(table_ref, schema=[full_name, age], client=Config.CLIENT) table = retry_403(Config.CLIENT.create_table)(table_arg) self.to_delete.insert(0, table) @@ -384,13 +385,14 @@ def test_load_table_from_local_file_then_dump_table(self): writer.writerows(ROWS) with open(temp.name, 'rb') as csv_read: - job = table.upload_from_file( - csv_read, - source_format='CSV', - skip_leading_rows=1, - create_disposition='CREATE_NEVER', - write_disposition='WRITE_EMPTY', - ) + config = bigquery.LoadJobConfig() + config.source_format = 'CSV' + config.skip_leading_rows = 1 + config.create_disposition = 'CREATE_NEVER' + config.write_disposition = 'WRITE_EMPTY' + config.schema = table.schema + job = Config.CLIENT.load_table_from_file( + csv_read, table_ref, job_config=config) # Retry until done. job.result(timeout=JOB_TIMEOUT) @@ -414,16 +416,16 @@ def test_load_table_from_local_avro_file_then_dump_table(self): ("red", 650)] dataset = self.temp_dataset(_make_dataset_id('load_local_then_dump')) - table = Table(dataset.table(TABLE_NAME), client=Config.CLIENT) + table_ref = dataset.table(TABLE_NAME) + table = Table(table_ref, client=Config.CLIENT) self.to_delete.insert(0, table) with open(os.path.join(WHERE, 'data', 'colors.avro'), 'rb') as avrof: - job = table.upload_from_file( - avrof, - source_format='AVRO', - write_disposition='WRITE_TRUNCATE' - ) - + config = bigquery.LoadJobConfig() + config.source_format = 'AVRO' + config.write_disposition = 'WRITE_TRUNCATE' + job = Config.CLIENT.load_table_from_file( + avrof, table_ref, job_config=config) # Retry until done. job.result(timeout=JOB_TIMEOUT) @@ -889,8 +891,8 @@ def _load_table_for_dml(self, rows, dataset_id, table_id): dataset = self.temp_dataset(dataset_id) greeting = bigquery.SchemaField( 'greeting', 'STRING', mode='NULLABLE') - table_arg = Table(dataset.table(table_id), schema=[greeting], - client=Config.CLIENT) + table_ref = dataset.table(table_id) + table_arg = Table(table_ref, schema=[greeting], client=Config.CLIENT) table = retry_403(Config.CLIENT.create_table)(table_arg) self.to_delete.insert(0, table) @@ -901,13 +903,13 @@ def _load_table_for_dml(self, rows, dataset_id, table_id): writer.writerows(rows) with open(temp.name, 'rb') as csv_read: - job = table.upload_from_file( - csv_read, - source_format='CSV', - skip_leading_rows=1, - create_disposition='CREATE_NEVER', - write_disposition='WRITE_EMPTY', - ) + config = bigquery.LoadJobConfig() + config.source_format = 'CSV' + config.skip_leading_rows = 1 + config.create_disposition = 'CREATE_NEVER' + config.write_disposition = 'WRITE_EMPTY' + job = Config.CLIENT.load_table_from_file( + csv_read, table_ref, job_config=config) # Retry until done. job.result(timeout=JOB_TIMEOUT) diff --git a/packages/google-cloud-bigquery/tests/unit/test_client.py b/packages/google-cloud-bigquery/tests/unit/test_client.py index 4f509da53f1c..f4537f3fba8b 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_client.py +++ b/packages/google-cloud-bigquery/tests/unit/test_client.py @@ -13,10 +13,17 @@ # limitations under the License. import copy +import email +import io +import json import unittest import mock import six +from six.moves import http_client +import pytest + +from google.cloud.bigquery.dataset import DatasetReference def _make_credentials(): @@ -1442,6 +1449,154 @@ def test_load_table_from_storage(self): self.assertEqual(list(job.source_uris), [SOURCE_URI]) self.assertIs(job.destination, destination) + @staticmethod + def _mock_requests_response(status_code, headers, content=b''): + return mock.Mock( + content=content, headers=headers, status_code=status_code, + spec=['content', 'headers', 'status_code']) + + def _mock_transport(self, status_code, headers, content=b''): + fake_transport = mock.Mock(spec=['request']) + fake_response = self._mock_requests_response( + status_code, headers, content=content) + fake_transport.request.return_value = fake_response + return fake_transport + + def _initiate_resumable_upload_helper(self, num_retries=None): + from google.resumable_media.requests import ResumableUpload + from google.cloud.bigquery.client import _DEFAULT_CHUNKSIZE + from google.cloud.bigquery.client import _GENERIC_CONTENT_TYPE + from google.cloud.bigquery.client import _get_upload_headers + from google.cloud.bigquery.job import LoadJob, LoadJobConfig + + PROJECT = 'PROJECT' + DS_ID = 'DATASET_ID' + TABLE_ID = 'TABLE_ID' + + # Create mocks to be checked for doing transport. + resumable_url = 'http://test.invalid?upload_id=hey-you' + response_headers = {'location': resumable_url} + fake_transport = self._mock_transport( + http_client.OK, response_headers) + client = self._make_one(project=PROJECT, _http=fake_transport) + conn = client._connection = _Connection() + table_ref = DatasetReference(PROJECT, DS_ID).table(TABLE_ID) + + # Create some mock arguments and call the method under test. + data = b'goodbye gudbi gootbee' + stream = io.BytesIO(data) + config = LoadJobConfig() + config.source_format = 'CSV' + job = LoadJob(None, None, table_ref, client, job_config=config) + metadata = job._build_resource() + upload, transport = client._initiate_resumable_upload( + stream, metadata, num_retries) + + # Check the returned values. + self.assertIsInstance(upload, ResumableUpload) + upload_url = ( + 'https://www.googleapis.com/upload/bigquery/v2/projects/' + + PROJECT + + '/jobs?uploadType=resumable') + self.assertEqual(upload.upload_url, upload_url) + expected_headers = _get_upload_headers(conn.USER_AGENT) + self.assertEqual(upload._headers, expected_headers) + self.assertFalse(upload.finished) + self.assertEqual(upload._chunk_size, _DEFAULT_CHUNKSIZE) + self.assertIs(upload._stream, stream) + self.assertIsNone(upload._total_bytes) + self.assertEqual(upload._content_type, _GENERIC_CONTENT_TYPE) + self.assertEqual(upload.resumable_url, resumable_url) + + retry_strategy = upload._retry_strategy + self.assertEqual(retry_strategy.max_sleep, 64.0) + if num_retries is None: + self.assertEqual(retry_strategy.max_cumulative_retry, 600.0) + self.assertIsNone(retry_strategy.max_retries) + else: + self.assertIsNone(retry_strategy.max_cumulative_retry) + self.assertEqual(retry_strategy.max_retries, num_retries) + self.assertIs(transport, fake_transport) + # Make sure we never read from the stream. + self.assertEqual(stream.tell(), 0) + + # Check the mocks. + request_headers = expected_headers.copy() + request_headers['x-upload-content-type'] = _GENERIC_CONTENT_TYPE + fake_transport.request.assert_called_once_with( + 'POST', + upload_url, + data=json.dumps(metadata).encode('utf-8'), + headers=request_headers, + ) + + def test__initiate_resumable_upload(self): + self._initiate_resumable_upload_helper() + + def test__initiate_resumable_upload_with_retry(self): + self._initiate_resumable_upload_helper(num_retries=11) + + def _do_multipart_upload_success_helper( + self, get_boundary, num_retries=None): + from google.cloud.bigquery.client import _get_upload_headers + from google.cloud.bigquery.job import LoadJob, LoadJobConfig + + PROJECT = 'PROJECT' + DS_ID = 'DATASET_ID' + TABLE_ID = 'TABLE_ID' + + fake_transport = self._mock_transport(http_client.OK, {}) + client = self._make_one(project=PROJECT, _http=fake_transport) + conn = client._connection = _Connection() + table_ref = DatasetReference(PROJECT, DS_ID).table(TABLE_ID) + + # Create some mock arguments. + data = b'Bzzzz-zap \x00\x01\xf4' + stream = io.BytesIO(data) + config = LoadJobConfig() + config.source_format = 'CSV' + job = LoadJob(None, None, table_ref, client, job_config=config) + metadata = job._build_resource() + size = len(data) + response = client._do_multipart_upload( + stream, metadata, size, num_retries) + + # Check the mocks and the returned value. + self.assertIs(response, fake_transport.request.return_value) + self.assertEqual(stream.tell(), size) + get_boundary.assert_called_once_with() + + upload_url = ( + 'https://www.googleapis.com/upload/bigquery/v2/projects/' + + PROJECT + + '/jobs?uploadType=multipart') + payload = ( + b'--==0==\r\n' + + b'content-type: application/json; charset=UTF-8\r\n\r\n' + + json.dumps(metadata).encode('utf-8') + b'\r\n' + + b'--==0==\r\n' + + b'content-type: */*\r\n\r\n' + + data + b'\r\n' + + b'--==0==--') + headers = _get_upload_headers(conn.USER_AGENT) + headers['content-type'] = b'multipart/related; boundary="==0=="' + fake_transport.request.assert_called_once_with( + 'POST', + upload_url, + data=payload, + headers=headers, + ) + + @mock.patch(u'google.resumable_media._upload.get_boundary', + return_value=b'==0==') + def test__do_multipart_upload(self, get_boundary): + self._do_multipart_upload_success_helper(get_boundary) + + @mock.patch(u'google.resumable_media._upload.get_boundary', + return_value=b'==0==') + def test__do_multipart_upload_with_retry(self, get_boundary): + self._do_multipart_upload_success_helper(get_boundary, num_retries=8) + def test_copy_table(self): from google.cloud.bigquery.job import CopyJob @@ -2180,8 +2335,328 @@ def test_list_rows_errors(self): client.list_rows(1) +class TestClientUpload(object): + # NOTE: This is a "partner" to `TestClient` meant to test some of the + # "load_table_from_file" portions of `Client`. It also uses + # `pytest`-style tests rather than `unittest`-style. + + TABLE_REF = DatasetReference( + 'project_id', 'test_dataset').table('test_table') + + @staticmethod + def _make_client(transport=None): + from google.cloud.bigquery import _http + from google.cloud.bigquery import client + + cl = client.Client(project='project_id', + credentials=_make_credentials(), + _http=transport) + cl._connection = mock.create_autospec(_http.Connection, instance=True) + return cl + + @staticmethod + def _make_response(status_code, content='', headers={}): + """Make a mock HTTP response.""" + import requests + response = requests.Response() + response.request = requests.Request( + 'POST', 'http://example.com').prepare() + response._content = content.encode('utf-8') + response.headers.update(headers) + response.status_code = status_code + return response + + @classmethod + def _make_do_upload_patch(cls, client, method, + resource={}, side_effect=None): + """Patches the low-level upload helpers.""" + if side_effect is None: + side_effect = [cls._make_response( + http_client.OK, + json.dumps(resource), + {'Content-Type': 'application/json'})] + return mock.patch.object( + client, method, side_effect=side_effect, autospec=True) + + EXPECTED_CONFIGURATION = { + 'jobReference': {'projectId': 'project_id', 'jobId': 'job_id'}, + 'configuration': { + 'load': { + 'sourceFormat': 'CSV', + 'destinationTable': { + 'projectId': 'project_id', + 'datasetId': 'test_dataset', + 'tableId': 'test_table' + } + } + } + } + + @staticmethod + def _make_file_obj(): + return io.BytesIO(b'hello, is it me you\'re looking for?') + + @staticmethod + def _make_config(): + from google.cloud.bigquery.job import LoadJobConfig + + config = LoadJobConfig() + config.source_format = 'CSV' + return config + + # High-level tests + + def test_load_table_from_file_resumable(self): + from google.cloud.bigquery.client import _DEFAULT_NUM_RETRIES + + client = self._make_client() + file_obj = self._make_file_obj() + + do_upload_patch = self._make_do_upload_patch( + client, '_do_resumable_upload', self.EXPECTED_CONFIGURATION) + with do_upload_patch as do_upload: + client.load_table_from_file(file_obj, self.TABLE_REF, + job_id='job_id', + job_config=self._make_config()) + + do_upload.assert_called_once_with( + file_obj, + self.EXPECTED_CONFIGURATION, + _DEFAULT_NUM_RETRIES) + + def test_load_table_from_file_resumable_metadata(self): + from google.cloud.bigquery.client import _DEFAULT_NUM_RETRIES + + client = self._make_client() + file_obj = self._make_file_obj() + + config = self._make_config() + config.allow_jagged_rows = False + config.allow_quoted_newlines = False + config.create_disposition = 'CREATE_IF_NEEDED' + config.encoding = 'utf8' + config.field_delimiter = ',' + config.ignore_unknown_values = False + config.max_bad_records = 0 + config.quote_character = '"' + config.skip_leading_rows = 1 + config.write_disposition = 'WRITE_APPEND' + config.null_marker = r'\N' + + expected_config = { + 'jobReference': {'projectId': 'project_id', 'jobId': 'job_id'}, + 'configuration': { + 'load': { + 'destinationTable': { + 'projectId': self.TABLE_REF.project, + 'datasetId': self.TABLE_REF.dataset_id, + 'tableId': self.TABLE_REF.table_id, + }, + 'sourceFormat': config.source_format, + 'allowJaggedRows': config.allow_jagged_rows, + 'allowQuotedNewlines': config.allow_quoted_newlines, + 'createDisposition': config.create_disposition, + 'encoding': config.encoding, + 'fieldDelimiter': config.field_delimiter, + 'ignoreUnknownValues': config.ignore_unknown_values, + 'maxBadRecords': config.max_bad_records, + 'quote': config.quote_character, + 'skipLeadingRows': str(config.skip_leading_rows), + 'writeDisposition': config.write_disposition, + 'nullMarker': config.null_marker, + }, + }, + } + + do_upload_patch = self._make_do_upload_patch( + client, '_do_resumable_upload', expected_config) + with do_upload_patch as do_upload: + client.load_table_from_file( + file_obj, self.TABLE_REF, job_id='job_id', job_config=config) + + do_upload.assert_called_once_with( + file_obj, + expected_config, + _DEFAULT_NUM_RETRIES) + + def test_load_table_from_file_multipart(self): + from google.cloud.bigquery.client import _DEFAULT_NUM_RETRIES + + client = self._make_client() + file_obj = self._make_file_obj() + file_obj_size = 10 + config = self._make_config() + + do_upload_patch = self._make_do_upload_patch( + client, '_do_multipart_upload', self.EXPECTED_CONFIGURATION) + with do_upload_patch as do_upload: + client.load_table_from_file( + file_obj, self.TABLE_REF, job_id='job_id', job_config=config, + size=file_obj_size) + + do_upload.assert_called_once_with( + file_obj, + self.EXPECTED_CONFIGURATION, + file_obj_size, + _DEFAULT_NUM_RETRIES) + + def test_load_table_from_file_with_retries(self): + client = self._make_client() + file_obj = self._make_file_obj() + num_retries = 20 + + do_upload_patch = self._make_do_upload_patch( + client, '_do_resumable_upload', self.EXPECTED_CONFIGURATION) + with do_upload_patch as do_upload: + client.load_table_from_file( + file_obj, self.TABLE_REF, num_retries=num_retries, + job_id='job_id', job_config=self._make_config()) + + do_upload.assert_called_once_with( + file_obj, + self.EXPECTED_CONFIGURATION, + num_retries) + + def test_load_table_from_file_with_rewind(self): + client = self._make_client() + file_obj = self._make_file_obj() + file_obj.seek(2) + + with self._make_do_upload_patch( + client, '_do_resumable_upload', self.EXPECTED_CONFIGURATION): + client.load_table_from_file( + file_obj, self.TABLE_REF, rewind=True) + + assert file_obj.tell() == 0 + + def test_load_table_from_file_failure(self): + from google.resumable_media import InvalidResponse + from google.cloud import exceptions + + client = self._make_client() + file_obj = self._make_file_obj() + + response = self._make_response( + content='Someone is already in this spot.', + status_code=http_client.CONFLICT) + + do_upload_patch = self._make_do_upload_patch( + client, '_do_resumable_upload', + side_effect=InvalidResponse(response)) + + with do_upload_patch, pytest.raises(exceptions.Conflict) as exc_info: + client.load_table_from_file( + file_obj, self.TABLE_REF, rewind=True) + + assert response.text in exc_info.value.message + assert exc_info.value.errors == [] + + def test_load_table_from_file_bad_mode(self): + client = self._make_client() + file_obj = mock.Mock(spec=['mode']) + file_obj.mode = 'x' + + with pytest.raises(ValueError): + client.load_table_from_file(file_obj, self.TABLE_REF) + + # Low-level tests + + @classmethod + def _make_resumable_upload_responses(cls, size): + """Make a series of responses for a successful resumable upload.""" + from google import resumable_media + + resumable_url = 'http://test.invalid?upload_id=and-then-there-was-1' + initial_response = cls._make_response( + http_client.OK, '', {'location': resumable_url}) + data_response = cls._make_response( + resumable_media.PERMANENT_REDIRECT, + '', {'range': 'bytes=0-{:d}'.format(size - 1)}) + final_response = cls._make_response( + http_client.OK, + json.dumps({'size': size}), + {'Content-Type': 'application/json'}) + return [initial_response, data_response, final_response] + + @staticmethod + def _make_transport(responses=None): + import google.auth.transport.requests + + transport = mock.create_autospec( + google.auth.transport.requests.AuthorizedSession, instance=True) + transport.request.side_effect = responses + return transport + + def test__do_resumable_upload(self): + file_obj = self._make_file_obj() + file_obj_len = len(file_obj.getvalue()) + transport = self._make_transport( + self._make_resumable_upload_responses(file_obj_len)) + client = self._make_client(transport) + + result = client._do_resumable_upload( + file_obj, + self.EXPECTED_CONFIGURATION, + None) + + content = result.content.decode('utf-8') + assert json.loads(content) == {'size': file_obj_len} + + # Verify that configuration data was passed in with the initial + # request. + transport.request.assert_any_call( + 'POST', + mock.ANY, + data=json.dumps(self.EXPECTED_CONFIGURATION).encode('utf-8'), + headers=mock.ANY) + + def test__do_multipart_upload(self): + transport = self._make_transport([self._make_response(http_client.OK)]) + client = self._make_client(transport) + file_obj = self._make_file_obj() + file_obj_len = len(file_obj.getvalue()) + + client._do_multipart_upload( + file_obj, + self.EXPECTED_CONFIGURATION, + file_obj_len, + None) + + # Verify that configuration data was passed in with the initial + # request. + request_args = transport.request.mock_calls[0][2] + request_data = request_args['data'].decode('utf-8') + request_headers = request_args['headers'] + + request_content = email.message_from_string( + 'Content-Type: {}\r\n{}'.format( + request_headers['content-type'].decode('utf-8'), + request_data)) + + # There should be two payloads: the configuration and the binary daya. + configuration_data = request_content.get_payload(0).get_payload() + binary_data = request_content.get_payload(1).get_payload() + + assert json.loads(configuration_data) == self.EXPECTED_CONFIGURATION + assert binary_data.encode('utf-8') == file_obj.getvalue() + + def test__do_multipart_upload_wrong_size(self): + client = self._make_client() + file_obj = self._make_file_obj() + file_obj_len = len(file_obj.getvalue()) + + with pytest.raises(ValueError): + client._do_multipart_upload( + file_obj, + {}, + file_obj_len+1, + None) + + class _Connection(object): + USER_AGENT = 'testing 1.2.3' + def __init__(self, *responses): self._responses = responses self._requested = [] diff --git a/packages/google-cloud-bigquery/tests/unit/test_table.py b/packages/google-cloud-bigquery/tests/unit/test_table.py index 6e00bd73c9c6..9661a449c4fb 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_table.py +++ b/packages/google-cloud-bigquery/tests/unit/test_table.py @@ -12,14 +12,9 @@ # See the License for the specific language governing permissions and # limitations under the License. -import email -import io -import json import unittest import mock -from six.moves import http_client -import pytest from google.cloud.bigquery.dataset import DatasetReference @@ -1029,498 +1024,6 @@ def _row_data(row): self.assertEqual(req['path'], '/%s' % PATH) self.assertEqual(req['data'], SENT) - def test__populate_view_use_legacy_sql_resource_w_existing_view(self): - query = 'select * from foo' - resource = {'view': {'query': query}} - client = mock.Mock(spec=[u'_credentials', '_http']) - client._http = mock.sentinel.http - dataset = DatasetReference(self.PROJECT, self.DS_ID) - table = self._make_one(dataset.table(self.TABLE_NAME), client=client) - table.view_use_legacy_sql = True - - table._populate_view_use_legacy_sql_resource(resource) - - self.assertEqual( - resource['view']['useLegacySql'], table.view_use_legacy_sql) - self.assertEqual(resource['view']['query'], query) - - def test__get_transport(self): - client = mock.Mock(spec=[u'_credentials', '_http']) - client._http = mock.sentinel.http - dataset = DatasetReference(self.PROJECT, self.DS_ID) - table_ref = dataset.table(self.TABLE_NAME) - table = self._make_one(table_ref, client=client) - - transport = table._get_transport(client) - - self.assertIs(transport, mock.sentinel.http) - - @staticmethod - def _mock_requests_response(status_code, headers, content=b''): - return mock.Mock( - content=content, headers=headers, status_code=status_code, - spec=['content', 'headers', 'status_code']) - - def _mock_transport(self, status_code, headers, content=b''): - fake_transport = mock.Mock(spec=['request']) - fake_response = self._mock_requests_response( - status_code, headers, content=content) - fake_transport.request.return_value = fake_response - return fake_transport - - def _initiate_resumable_upload_helper(self, num_retries=None): - from google.resumable_media.requests import ResumableUpload - from google.cloud.bigquery.table import _DEFAULT_CHUNKSIZE - from google.cloud.bigquery.table import _GENERIC_CONTENT_TYPE - from google.cloud.bigquery.table import _get_upload_headers - from google.cloud.bigquery.table import _get_upload_metadata - - connection = _Connection() - client = _Client(self.PROJECT, connection=connection) - dataset = DatasetReference(self.PROJECT, self.DS_ID) - table_ref = dataset.table(self.TABLE_NAME) - table = self._make_one(table_ref, client=client) - - # Create mocks to be checked for doing transport. - resumable_url = 'http://test.invalid?upload_id=hey-you' - response_headers = {'location': resumable_url} - fake_transport = self._mock_transport( - http_client.OK, response_headers) - client._http = fake_transport - - # Create some mock arguments and call the method under test. - data = b'goodbye gudbi gootbee' - stream = io.BytesIO(data) - metadata = _get_upload_metadata( - 'CSV', table._schema, table.project, - table.dataset_id, table.table_id) - upload, transport = table._initiate_resumable_upload( - client, stream, metadata, num_retries) - - # Check the returned values. - self.assertIsInstance(upload, ResumableUpload) - upload_url = ( - 'https://www.googleapis.com/upload/bigquery/v2/projects/' + - self.PROJECT + - '/jobs?uploadType=resumable') - self.assertEqual(upload.upload_url, upload_url) - expected_headers = _get_upload_headers(connection.USER_AGENT) - self.assertEqual(upload._headers, expected_headers) - self.assertFalse(upload.finished) - self.assertEqual(upload._chunk_size, _DEFAULT_CHUNKSIZE) - self.assertIs(upload._stream, stream) - self.assertIsNone(upload._total_bytes) - self.assertEqual(upload._content_type, _GENERIC_CONTENT_TYPE) - self.assertEqual(upload.resumable_url, resumable_url) - - retry_strategy = upload._retry_strategy - self.assertEqual(retry_strategy.max_sleep, 64.0) - if num_retries is None: - self.assertEqual(retry_strategy.max_cumulative_retry, 600.0) - self.assertIsNone(retry_strategy.max_retries) - else: - self.assertIsNone(retry_strategy.max_cumulative_retry) - self.assertEqual(retry_strategy.max_retries, num_retries) - self.assertIs(transport, fake_transport) - # Make sure we never read from the stream. - self.assertEqual(stream.tell(), 0) - - # Check the mocks. - request_headers = expected_headers.copy() - request_headers['x-upload-content-type'] = _GENERIC_CONTENT_TYPE - fake_transport.request.assert_called_once_with( - 'POST', - upload_url, - data=json.dumps(metadata).encode('utf-8'), - headers=request_headers, - ) - - def test__initiate_resumable_upload(self): - self._initiate_resumable_upload_helper() - - def test__initiate_resumable_upload_with_retry(self): - self._initiate_resumable_upload_helper(num_retries=11) - - def _do_multipart_upload_success_helper( - self, get_boundary, num_retries=None): - from google.cloud.bigquery.table import _get_upload_headers - from google.cloud.bigquery.table import _get_upload_metadata - - connection = _Connection() - client = _Client(self.PROJECT, connection=connection) - dataset = DatasetReference(self.PROJECT, self.DS_ID) - table_ref = dataset.table(self.TABLE_NAME) - table = self._make_one(table_ref, client=client) - - # Create mocks to be checked for doing transport. - fake_transport = self._mock_transport(http_client.OK, {}) - client._http = fake_transport - - # Create some mock arguments. - data = b'Bzzzz-zap \x00\x01\xf4' - stream = io.BytesIO(data) - metadata = _get_upload_metadata( - 'CSV', table._schema, table.project, - table.dataset_id, table.table_id) - size = len(data) - response = table._do_multipart_upload( - client, stream, metadata, size, num_retries) - - # Check the mocks and the returned value. - self.assertIs(response, fake_transport.request.return_value) - self.assertEqual(stream.tell(), size) - get_boundary.assert_called_once_with() - - upload_url = ( - 'https://www.googleapis.com/upload/bigquery/v2/projects/' + - self.PROJECT + - '/jobs?uploadType=multipart') - payload = ( - b'--==0==\r\n' + - b'content-type: application/json; charset=UTF-8\r\n\r\n' + - json.dumps(metadata).encode('utf-8') + b'\r\n' + - b'--==0==\r\n' + - b'content-type: */*\r\n\r\n' + - data + b'\r\n' + - b'--==0==--') - headers = _get_upload_headers(connection.USER_AGENT) - headers['content-type'] = b'multipart/related; boundary="==0=="' - fake_transport.request.assert_called_once_with( - 'POST', - upload_url, - data=payload, - headers=headers, - ) - - @mock.patch(u'google.resumable_media._upload.get_boundary', - return_value=b'==0==') - def test__do_multipart_upload(self, get_boundary): - self._do_multipart_upload_success_helper(get_boundary) - - @mock.patch(u'google.resumable_media._upload.get_boundary', - return_value=b'==0==') - def test__do_multipart_upload_with_retry(self, get_boundary): - self._do_multipart_upload_success_helper(get_boundary, num_retries=8) - - -class TestTableUpload(object): - # NOTE: This is a "partner" to `TestTable` meant to test some of the - # "upload" portions of `Table`. It also uses `pytest`-style tests - # rather than `unittest`-style. - - @staticmethod - def _make_table(transport=None): - from google.cloud.bigquery import _http - from google.cloud.bigquery import client - from google.cloud.bigquery import dataset - from google.cloud.bigquery import table - - connection = mock.create_autospec(_http.Connection, instance=True) - client = mock.create_autospec(client.Client, instance=True) - client._connection = connection - client._credentials = mock.sentinel.credentials - client._http = transport - client.project = 'project_id' - - dataset_ref = dataset.DatasetReference('project_id', 'test_dataset') - table_ref = dataset_ref.table('test_table') - table = table.Table(table_ref, client=client) - - return table - - @staticmethod - def _make_response(status_code, content='', headers={}): - """Make a mock HTTP response.""" - import requests - response = requests.Response() - response.request = requests.Request( - 'POST', 'http://example.com').prepare() - response._content = content.encode('utf-8') - response.headers.update(headers) - response.status_code = status_code - return response - - @classmethod - def _make_do_upload_patch(cls, table, method, side_effect=None): - """Patches the low-level upload helpers.""" - if side_effect is None: - side_effect = [cls._make_response( - http_client.OK, - json.dumps({}), - {'Content-Type': 'application/json'})] - return mock.patch.object( - table, method, side_effect=side_effect, autospec=True) - - EXPECTED_CONFIGURATION = { - 'configuration': { - 'load': { - 'sourceFormat': 'CSV', - 'destinationTable': { - 'projectId': 'project_id', - 'datasetId': 'test_dataset', - 'tableId': 'test_table' - } - } - } - } - - @staticmethod - def _make_file_obj(): - return io.BytesIO(b'hello, is it me you\'re looking for?') - - # High-level tests - - def test_upload_from_file_resumable(self): - import google.cloud.bigquery.table - - table = self._make_table() - file_obj = self._make_file_obj() - - do_upload_patch = self._make_do_upload_patch( - table, '_do_resumable_upload') - with do_upload_patch as do_upload: - table.upload_from_file(file_obj, source_format='CSV') - - do_upload.assert_called_once_with( - table._client, - file_obj, - self.EXPECTED_CONFIGURATION, - google.cloud.bigquery.table._DEFAULT_NUM_RETRIES) - - def test_upload_file_resumable_metadata(self): - table = self._make_table() - file_obj = self._make_file_obj() - - config_args = { - 'source_format': 'CSV', - 'allow_jagged_rows': False, - 'allow_quoted_newlines': False, - 'create_disposition': 'CREATE_IF_NEEDED', - 'encoding': 'utf8', - 'field_delimiter': ',', - 'ignore_unknown_values': False, - 'max_bad_records': 0, - 'quote_character': '"', - 'skip_leading_rows': 1, - 'write_disposition': 'WRITE_APPEND', - 'job_name': 'oddjob', - 'null_marker': r'\N', - } - - expected_config = { - 'configuration': { - 'load': { - 'sourceFormat': config_args['source_format'], - 'destinationTable': { - 'projectId': table.project, - 'datasetId': table.dataset_id, - 'tableId': table.table_id, - }, - 'allowJaggedRows': config_args['allow_jagged_rows'], - 'allowQuotedNewlines': - config_args['allow_quoted_newlines'], - 'createDisposition': config_args['create_disposition'], - 'encoding': config_args['encoding'], - 'fieldDelimiter': config_args['field_delimiter'], - 'ignoreUnknownValues': - config_args['ignore_unknown_values'], - 'maxBadRecords': config_args['max_bad_records'], - 'quote': config_args['quote_character'], - 'skipLeadingRows': config_args['skip_leading_rows'], - 'writeDisposition': config_args['write_disposition'], - 'jobReference': {'jobId': config_args['job_name']}, - 'nullMarker': config_args['null_marker'], - }, - }, - } - - do_upload_patch = self._make_do_upload_patch( - table, '_do_resumable_upload') - with do_upload_patch as do_upload: - table.upload_from_file( - file_obj, **config_args) - - do_upload.assert_called_once_with( - table._client, - file_obj, - expected_config, - mock.ANY) - - def test_upload_from_file_multipart(self): - import google.cloud.bigquery.table - - table = self._make_table() - file_obj = self._make_file_obj() - file_obj_size = 10 - - do_upload_patch = self._make_do_upload_patch( - table, '_do_multipart_upload') - with do_upload_patch as do_upload: - table.upload_from_file( - file_obj, source_format='CSV', size=file_obj_size) - - do_upload.assert_called_once_with( - table._client, - file_obj, - self.EXPECTED_CONFIGURATION, - file_obj_size, - google.cloud.bigquery.table._DEFAULT_NUM_RETRIES) - - def test_upload_from_file_with_retries(self): - table = self._make_table() - file_obj = self._make_file_obj() - num_retries = 20 - - do_upload_patch = self._make_do_upload_patch( - table, '_do_resumable_upload') - with do_upload_patch as do_upload: - table.upload_from_file( - file_obj, source_format='CSV', num_retries=num_retries) - - do_upload.assert_called_once_with( - table._client, - file_obj, - self.EXPECTED_CONFIGURATION, - num_retries) - - def test_upload_from_file_with_rewind(self): - table = self._make_table() - file_obj = self._make_file_obj() - file_obj.seek(2) - - with self._make_do_upload_patch(table, '_do_resumable_upload'): - table.upload_from_file( - file_obj, source_format='CSV', rewind=True) - - assert file_obj.tell() == 0 - - def test_upload_from_file_failure(self): - from google.resumable_media import InvalidResponse - from google.cloud import exceptions - - table = self._make_table() - file_obj = self._make_file_obj() - - response = self._make_response( - content='Someone is already in this spot.', - status_code=http_client.CONFLICT) - - do_upload_patch = self._make_do_upload_patch( - table, '_do_resumable_upload', - side_effect=InvalidResponse(response)) - - with do_upload_patch, pytest.raises(exceptions.Conflict) as exc_info: - table.upload_from_file( - file_obj, source_format='CSV', rewind=True) - - assert response.text in exc_info.value.message - assert exc_info.value.errors == [] - - def test_upload_from_file_bad_mode(self): - table = self._make_table() - file_obj = mock.Mock(spec=['mode']) - file_obj.mode = 'x' - - with pytest.raises(ValueError): - table.upload_from_file( - file_obj, source_format='CSV',) - - # Low-level tests - - @classmethod - def _make_resumable_upload_responses(cls, size): - """Make a series of responses for a successful resumable upload.""" - from google import resumable_media - - resumable_url = 'http://test.invalid?upload_id=and-then-there-was-1' - initial_response = cls._make_response( - http_client.OK, '', {'location': resumable_url}) - data_response = cls._make_response( - resumable_media.PERMANENT_REDIRECT, - '', {'range': 'bytes=0-{:d}'.format(size - 1)}) - final_response = cls._make_response( - http_client.OK, - json.dumps({'size': size}), - {'Content-Type': 'application/json'}) - return [initial_response, data_response, final_response] - - @staticmethod - def _make_transport(responses=None): - import google.auth.transport.requests - - transport = mock.create_autospec( - google.auth.transport.requests.AuthorizedSession, instance=True) - transport.request.side_effect = responses - return transport - - def test__do_resumable_upload(self): - file_obj = self._make_file_obj() - file_obj_len = len(file_obj.getvalue()) - transport = self._make_transport( - self._make_resumable_upload_responses(file_obj_len)) - table = self._make_table(transport) - - result = table._do_resumable_upload( - table._client, - file_obj, - self.EXPECTED_CONFIGURATION, - None) - - content = result.content.decode('utf-8') - assert json.loads(content) == {'size': file_obj_len} - - # Verify that configuration data was passed in with the initial - # request. - transport.request.assert_any_call( - 'POST', - mock.ANY, - data=json.dumps(self.EXPECTED_CONFIGURATION).encode('utf-8'), - headers=mock.ANY) - - def test__do_multipart_upload(self): - transport = self._make_transport([self._make_response(http_client.OK)]) - table = self._make_table(transport) - file_obj = self._make_file_obj() - file_obj_len = len(file_obj.getvalue()) - - table._do_multipart_upload( - table._client, - file_obj, - self.EXPECTED_CONFIGURATION, - file_obj_len, - None) - - # Verify that configuration data was passed in with the initial - # request. - request_args = transport.request.mock_calls[0][2] - request_data = request_args['data'].decode('utf-8') - request_headers = request_args['headers'] - - request_content = email.message_from_string( - 'Content-Type: {}\r\n{}'.format( - request_headers['content-type'].decode('utf-8'), - request_data)) - - # There should be two payloads: the configuration and the binary daya. - configuration_data = request_content.get_payload(0).get_payload() - binary_data = request_content.get_payload(1).get_payload() - - assert json.loads(configuration_data) == self.EXPECTED_CONFIGURATION - assert binary_data.encode('utf-8') == file_obj.getvalue() - - def test__do_multipart_upload_wrong_size(self): - table = self._make_table() - file_obj = self._make_file_obj() - file_obj_len = len(file_obj.getvalue()) - - with pytest.raises(ValueError): - table._do_multipart_upload( - table._client, - file_obj, - {}, - file_obj_len+1, - None) - class Test_parse_schema_resource(unittest.TestCase, _SchemaBase): From d922a32ff41b5d649bd4caea0d838ef6bb6c06d5 Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Mon, 9 Oct 2017 14:20:03 -0700 Subject: [PATCH 0303/2016] BQ: Pass selected_fields as a string to tabledata.list. (#4143) BigQuery was only returning the first column when passing in a list instead of a comma-separated string. --- .../google/cloud/bigquery/client.py | 3 ++- .../google-cloud-bigquery/tests/system.py | 22 +++++++++++++++++-- 2 files changed, 22 insertions(+), 3 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py index ce41824996bd..756e2bb7d41d 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py @@ -924,7 +924,8 @@ def list_rows(self, table, selected_fields=None, max_results=None, params = {} if selected_fields is not None: - params['selectedFields'] = [f.name for f in selected_fields] + params['selectedFields'] = ','.join( + [f.name for f in selected_fields]) if start_index is not None: params['startIndex'] = start_index diff --git a/packages/google-cloud-bigquery/tests/system.py b/packages/google-cloud-bigquery/tests/system.py index 1bf00a9b57ef..e59747578bb3 100644 --- a/packages/google-cloud-bigquery/tests/system.py +++ b/packages/google-cloud-bigquery/tests/system.py @@ -312,8 +312,9 @@ def test_update_table_schema(self): self.assertEqual(found.mode, expected.mode) @staticmethod - def _fetch_single_page(table): - iterator = Config.CLIENT.list_rows(table) + def _fetch_single_page(table, selected_fields=None): + iterator = Config.CLIENT.list_rows( + table, selected_fields=selected_fields) page = six.next(iterator.pages) return list(page) @@ -1236,6 +1237,23 @@ def test_dump_table_w_public_data(self): table = Config.CLIENT.get_table(table_ref) self._fetch_single_page(table) + def test_dump_table_w_public_data_selected_fields(self): + PUBLIC = 'bigquery-public-data' + DATASET_ID = 'samples' + TABLE_NAME = 'natality' + selected_fields = [ + bigquery.SchemaField('year', 'INTEGER', mode='NULLABLE'), + bigquery.SchemaField('month', 'INTEGER', mode='NULLABLE'), + bigquery.SchemaField('day', 'INTEGER', mode='NULLABLE'), + ] + table_ref = DatasetReference(PUBLIC, DATASET_ID).table(TABLE_NAME) + + rows = self._fetch_single_page( + table_ref, selected_fields=selected_fields) + + self.assertGreater(len(rows), 0) + self.assertEqual(len(rows[0]), 3) + def test_large_query_w_public_data(self): PUBLIC = 'bigquery-public-data' DATASET_ID = 'samples' From 1578b4ade15f24e21f4aac4c688999027343b946 Mon Sep 17 00:00:00 2001 From: Jonathan Amsterdam Date: Mon, 9 Oct 2017 19:25:04 -0400 Subject: [PATCH 0304/2016] bigquery: remove Table.exists (#4145) --- .../google/cloud/bigquery/table.py | 25 ---------- .../google-cloud-bigquery/tests/system.py | 27 +++++++---- .../tests/unit/test_table.py | 48 +------------------ 3 files changed, 20 insertions(+), 80 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py index 8f56dffd18bf..3464ddacaf91 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py @@ -20,7 +20,6 @@ import six -from google.cloud import exceptions from google.cloud._helpers import _datetime_from_microseconds from google.cloud._helpers import _millis_from_datetime from google.cloud.bigquery.schema import SchemaField @@ -660,30 +659,6 @@ def _build_resource(self, filter_fields): resource[api_field] = getattr(self, f) return resource - def exists(self, client=None): - """API call: test for the existence of the table via a GET request - - See - https://cloud.google.com/bigquery/docs/reference/rest/v2/tables/get - - :type client: :class:`~google.cloud.bigquery.client.Client` or - ``NoneType`` - :param client: the client to use. If not passed, falls back to the - ``client`` stored on the current dataset. - - :rtype: bool - :returns: Boolean indicating existence of the table. - """ - client = self._require_client(client) - - try: - client._connection.api_request(method='GET', path=self.path, - query_params={'fields': 'id'}) - except exceptions.NotFound: - return False - else: - return True - def row_from_mapping(self, mapping): """Convert a mapping to a row tuple using the schema. diff --git a/packages/google-cloud-bigquery/tests/system.py b/packages/google-cloud-bigquery/tests/system.py index e59747578bb3..9ef835a9bb2a 100644 --- a/packages/google-cloud-bigquery/tests/system.py +++ b/packages/google-cloud-bigquery/tests/system.py @@ -192,12 +192,12 @@ def test_create_table(self): age = bigquery.SchemaField('age', 'INTEGER', mode='REQUIRED') table_arg = Table(dataset.table(table_id), schema=[full_name, age], client=Config.CLIENT) - self.assertFalse(table_arg.exists()) + self.assertFalse(_table_exists(table_arg)) table = retry_403(Config.CLIENT.create_table)(table_arg) self.to_delete.insert(0, table) - self.assertTrue(table.exists()) + self.assertTrue(_table_exists(table)) self.assertEqual(table.table_id, table_id) def test_get_table_w_public_dataset(self): @@ -259,10 +259,10 @@ def test_update_table(self): ] table_arg = Table(dataset.table(TABLE_NAME), schema=schema, client=Config.CLIENT) - self.assertFalse(table_arg.exists()) + self.assertFalse(_table_exists(table_arg)) table = retry_403(Config.CLIENT.create_table)(table_arg) self.to_delete.insert(0, table) - self.assertTrue(table.exists()) + self.assertTrue(_table_exists(table)) self.assertIsNone(table.friendly_name) self.assertIsNone(table.description) table.friendly_name = 'Friendly' @@ -294,10 +294,10 @@ def test_update_table_schema(self): ] table_arg = Table(dataset.table(TABLE_NAME), schema=schema, client=Config.CLIENT) - self.assertFalse(table_arg.exists()) + self.assertFalse(_table_exists(table_arg)) table = retry_403(Config.CLIENT.create_table)(table_arg) self.to_delete.insert(0, table) - self.assertTrue(table.exists()) + self.assertTrue(_table_exists(table)) voter = bigquery.SchemaField('voter', 'BOOLEAN', mode='NULLABLE') schema = table.schema schema.append(voter) @@ -338,10 +338,10 @@ def test_insert_data_then_dump_table(self): now = bigquery.SchemaField('now', 'TIMESTAMP') table_arg = Table(dataset.table(TABLE_NAME), schema=[full_name, age, now], client=Config.CLIENT) - self.assertFalse(table_arg.exists()) + self.assertFalse(_table_exists(table_arg)) table = retry_403(Config.CLIENT.create_table)(table_arg) self.to_delete.insert(0, table) - self.assertTrue(table.exists()) + self.assertTrue(_table_exists(table)) errors = table.insert_data(ROWS, ROW_IDS) self.assertEqual(len(errors), 0) @@ -1317,7 +1317,7 @@ def test_create_table_insert_fetch_nested_schema(self): client=Config.CLIENT) table = retry_403(Config.CLIENT.create_table)(table_arg) self.to_delete.insert(0, table) - self.assertTrue(table.exists()) + self.assertTrue(_table_exists(table)) self.assertEqual(table.table_id, table_name) to_insert = [] @@ -1386,3 +1386,12 @@ def _dataset_exists(ds): return True except NotFound: return False + + +def _table_exists(t): + try: + tr = DatasetReference(t.project, t.dataset_id).table(t.table_id) + Config.CLIENT.get_table(tr) + return True + except NotFound: + return False diff --git a/packages/google-cloud-bigquery/tests/unit/test_table.py b/packages/google-cloud-bigquery/tests/unit/test_table.py index 9661a449c4fb..951042020748 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_table.py +++ b/packages/google-cloud-bigquery/tests/unit/test_table.py @@ -708,43 +708,6 @@ def test_list_partitions(self): client=client) self.assertEqual(table.list_partitions(), [20160804, 20160805]) - def test_exists_miss_w_bound_client(self): - PATH = 'projects/%s/datasets/%s/tables/%s' % ( - self.PROJECT, self.DS_ID, self.TABLE_NAME) - conn = _Connection() - client = _Client(project=self.PROJECT, connection=conn) - dataset = DatasetReference(self.PROJECT, self.DS_ID) - table_ref = dataset.table(self.TABLE_NAME) - table = self._make_one(table_ref, client=client) - - self.assertFalse(table.exists()) - - self.assertEqual(len(conn._requested), 1) - req = conn._requested[0] - self.assertEqual(req['method'], 'GET') - self.assertEqual(req['path'], '/%s' % PATH) - self.assertEqual(req['query_params'], {'fields': 'id'}) - - def test_exists_hit_w_alternate_client(self): - PATH = 'projects/%s/datasets/%s/tables/%s' % ( - self.PROJECT, self.DS_ID, self.TABLE_NAME) - conn1 = _Connection() - client1 = _Client(project=self.PROJECT, connection=conn1) - conn2 = _Connection({}) - client2 = _Client(project=self.PROJECT, connection=conn2) - dataset = DatasetReference(self.PROJECT, self.DS_ID) - table_ref = dataset.table(self.TABLE_NAME) - table = self._make_one(table_ref, client=client1) - - self.assertTrue(table.exists(client=client2)) - - self.assertEqual(len(conn1._requested), 0) - self.assertEqual(len(conn2._requested), 1) - req = conn2._requested[0] - self.assertEqual(req['method'], 'GET') - self.assertEqual(req['path'], '/%s' % PATH) - self.assertEqual(req['query_params'], {'fields': 'id'}) - def test_row_from_mapping_wo_schema(self): from google.cloud.bigquery.table import _TABLE_HAS_NO_SCHEMA MAPPING = {'full_name': 'Phred Phlyntstone', 'age': 32} @@ -1239,13 +1202,6 @@ def __init__(self, *responses): self._requested = [] def api_request(self, **kw): - from google.cloud.exceptions import NotFound - self._requested.append(kw) - - try: - response, self._responses = self._responses[0], self._responses[1:] - except IndexError: - raise NotFound('miss') - else: - return response + response, self._responses = self._responses[0], self._responses[1:] + return response From 9fa92a7c8e176c4078073500b69e3b4b8f05c017 Mon Sep 17 00:00:00 2001 From: Jonathan Amsterdam Date: Mon, 9 Oct 2017 20:31:56 -0400 Subject: [PATCH 0305/2016] bigquery: remove unused function (#4147) --- .../google/cloud/bigquery/table.py | 43 ------------ .../tests/unit/test_table.py | 68 ------------------- 2 files changed, 111 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py index 3464ddacaf91..89df28de28d5 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py @@ -831,46 +831,3 @@ def _build_schema_resource(fields): info['fields'] = _build_schema_resource(field.fields) infos.append(info) return infos -# pylint: enable=unused-argument - - -def _get_upload_metadata(source_format, schema, project, dataset_id, table_id): - """Get base metadata for creating a table. - - :type source_format: str - :param source_format: one of 'CSV' or 'NEWLINE_DELIMITED_JSON'. - job configuration option. - - :type schema: list - :param schema: List of :class:`SchemaField` associated with a table. - - :type project: str - :param table_id: The project bound to the table. - - :type dataset_id: str - :param table_id: The dataset_id of the dataset. - - :type table_id: str - :param table_id: The table_id of the table. - - :rtype: dict - :returns: The metadata dictionary. - """ - load_config = { - 'sourceFormat': source_format, - 'destinationTable': { - 'projectId': project, - 'datasetId': dataset_id, - 'tableId': table_id, - }, - } - if schema: - load_config['schema'] = { - 'fields': _build_schema_resource(schema), - } - - return { - 'configuration': { - 'load': load_config, - }, - } diff --git a/packages/google-cloud-bigquery/tests/unit/test_table.py b/packages/google-cloud-bigquery/tests/unit/test_table.py index 951042020748..85a6f3a73b2d 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_table.py +++ b/packages/google-cloud-bigquery/tests/unit/test_table.py @@ -1101,74 +1101,6 @@ def test_w_subfields(self): 'mode': 'REQUIRED'}]}) -class Test__get_upload_metadata(unittest.TestCase): - - @staticmethod - def _call_fut(source_format, schema, project, dataset_id, name): - from google.cloud.bigquery.table import _get_upload_metadata - - return _get_upload_metadata( - source_format, schema, project, dataset_id, name) - - def test_empty_schema(self): - source_format = 'AVRO' - dataset = mock.Mock(project='prediction', - spec=['dataset_id', 'project']) - dataset.dataset_id = 'market' # mock.Mock() treats `name` specially. - table_name = 'chairs' - metadata = self._call_fut(source_format, [], dataset.project, - dataset.dataset_id, table_name) - - expected = { - 'configuration': { - 'load': { - 'sourceFormat': source_format, - 'destinationTable': { - 'projectId': dataset.project, - 'datasetId': dataset.dataset_id, - 'tableId': table_name, - }, - }, - }, - } - self.assertEqual(metadata, expected) - - def test_with_schema(self): - from google.cloud.bigquery.table import SchemaField - - source_format = 'CSV' - full_name = SchemaField('full_name', 'STRING', mode='REQUIRED') - dataset = mock.Mock(project='blind', spec=['dataset_id', 'project']) - dataset.dataset_id = 'movie' # mock.Mock() treats `name` specially. - table_name = 'teebull-neem' - metadata = self._call_fut( - source_format, [full_name], dataset.project, - dataset.dataset_id, table_name) - - expected = { - 'configuration': { - 'load': { - 'sourceFormat': source_format, - 'destinationTable': { - 'projectId': dataset.project, - 'datasetId': dataset.dataset_id, - 'tableId': table_name, - }, - 'schema': { - 'fields': [ - { - 'name': full_name.name, - 'type': full_name.field_type, - 'mode': full_name.mode, - }, - ], - }, - }, - }, - } - self.assertEqual(metadata, expected) - - class _Client(object): _query_results = () From bb56e16001deba373a078aed16b7863fbdd36139 Mon Sep 17 00:00:00 2001 From: Jonathan Amsterdam Date: Mon, 9 Oct 2017 20:32:11 -0400 Subject: [PATCH 0306/2016] bigquery: add Client.list_partitions (#4146) Remove from Table. --- .../google/cloud/bigquery/client.py | 21 ++++++++- .../google/cloud/bigquery/table.py | 17 -------- .../tests/unit/test_client.py | 43 +++++++++++++++++++ .../tests/unit/test_table.py | 30 ------------- 4 files changed, 63 insertions(+), 48 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py index 756e2bb7d41d..e8642f8afb79 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py @@ -37,7 +37,7 @@ from google.cloud.bigquery.job import CopyJob from google.cloud.bigquery.job import ExtractJob from google.cloud.bigquery.job import LoadJob -from google.cloud.bigquery.job import QueryJob +from google.cloud.bigquery.job import QueryJob, QueryJobConfig from google.cloud.bigquery.query import QueryResults from google.cloud.bigquery._helpers import _item_to_row from google.cloud.bigquery._helpers import _rows_page_start @@ -943,6 +943,25 @@ def list_rows(self, table, selected_fields=None, max_results=None, iterator.schema = schema return iterator + def list_partitions(self, table): + """List the partitions in a table. + + :type table: One of: + :class:`~google.cloud.bigquery.table.Table` + :class:`~google.cloud.bigquery.table.TableReference` + :param table: the table to list, or a reference to it. + + :rtype: list + :returns: a list of time partitions + """ + config = QueryJobConfig() + config.use_legacy_sql = True # required for '$' syntax + rows = self.query_rows( + 'SELECT partition_id from [%s:%s.%s$__PARTITIONS_SUMMARY__]' % + (table.project, table.dataset_id, table.table_id), + job_config=config) + return [row[0] for row in rows] + # pylint: disable=unused-argument def _item_to_project(iterator, resource): diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py index 89df28de28d5..9630f1495290 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py @@ -526,23 +526,6 @@ def view_use_legacy_sql(self, value): self._properties['view'] = {} self._properties['view']['useLegacySql'] = value - def list_partitions(self, client=None): - """List the partitions in a table. - - :type client: :class:`~google.cloud.bigquery.client.Client` or - ``NoneType`` - :param client: the client to use. If not passed, falls back to the - ``client`` stored on the current dataset. - - :rtype: list - :returns: a list of time partitions - """ - query = self._require_client(client).run_sync_query( - 'SELECT partition_id from [%s.%s$__PARTITIONS_SUMMARY__]' % - (self.dataset_id, self.table_id)) - query.run() - return [row[0] for row in query.rows] - @classmethod def from_api_repr(cls, resource, client): """Factory: construct a table given its API representation diff --git a/packages/google-cloud-bigquery/tests/unit/test_client.py b/packages/google-cloud-bigquery/tests/unit/test_client.py index f4537f3fba8b..23757933d747 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_client.py +++ b/packages/google-cloud-bigquery/tests/unit/test_client.py @@ -2334,6 +2334,49 @@ def test_list_rows_errors(self): with self.assertRaises(TypeError): client.list_rows(1) + def test_list_partitions(self): + PROJECT = 'PROJECT' + RESOURCE = { + 'jobReference': { + 'projectId': PROJECT, + 'jobId': 'JOB_ID', + }, + 'configuration': { + 'query': { + 'query': 'q', + }, + }, + 'status': { + 'state': 'DONE', + }, + } + RESULTS_RESOURCE = { + 'jobReference': RESOURCE['jobReference'], + 'jobComplete': True, + 'schema': { + 'fields': [ + {'name': 'partition_id', 'type': 'INTEGER', + 'mode': 'REQUIRED'}, + ] + }, + 'totalRows': '2', + 'pageToken': 'next-page', + } + FIRST_PAGE = copy.deepcopy(RESULTS_RESOURCE) + FIRST_PAGE['rows'] = [ + {'f': [{'v': 20160804}]}, + {'f': [{'v': 20160805}]}, + ] + del FIRST_PAGE['pageToken'] + creds = _make_credentials() + http = object() + client = self._make_one(project=PROJECT, credentials=creds, _http=http) + client._connection = _Connection( + RESOURCE, RESULTS_RESOURCE, FIRST_PAGE) + table_ref = DatasetReference(PROJECT, 'DS_ID').table('TABLE_ID') + self.assertEqual(client.list_partitions(table_ref), + [20160804, 20160805]) + class TestClientUpload(object): # NOTE: This is a "partner" to `TestClient` meant to test some of the diff --git a/packages/google-cloud-bigquery/tests/unit/test_table.py b/packages/google-cloud-bigquery/tests/unit/test_table.py index 85a6f3a73b2d..f9fe1ddab2d1 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_table.py +++ b/packages/google-cloud-bigquery/tests/unit/test_table.py @@ -694,20 +694,6 @@ def test_partition_expiration_w_none_no_partition_set(self): self.assertIsNone(table.partitioning_type) self.assertIsNone(table.partition_expiration) - def test_list_partitions(self): - from google.cloud.bigquery.table import SchemaField - - conn = _Connection() - client = _Client(project=self.PROJECT, connection=conn) - client._query_results = [(20160804, None), (20160805, None)] - dataset = DatasetReference(self.PROJECT, self.DS_ID) - table_ref = dataset.table(self.TABLE_NAME) - full_name = SchemaField('full_name', 'STRING', mode='REQUIRED') - age = SchemaField('age', 'INTEGER', mode='REQUIRED') - table = self._make_one(table_ref, schema=[full_name, age], - client=client) - self.assertEqual(table.list_partitions(), [20160804, 20160805]) - def test_row_from_mapping_wo_schema(self): from google.cloud.bigquery.table import _TABLE_HAS_NO_SCHEMA MAPPING = {'full_name': 'Phred Phlyntstone', 'age': 32} @@ -1103,26 +1089,10 @@ def test_w_subfields(self): class _Client(object): - _query_results = () - def __init__(self, project='project', connection=None): self.project = project self._connection = connection - def run_sync_query(self, query): - return _Query(query, self) - - -class _Query(object): - - def __init__(self, query, client): - self.query = query - self.rows = [] - self.client = client - - def run(self): - self.rows = self.client._query_results - class _Connection(object): From c7ea8206853124a760c4b9460a17a1ba42ebfb94 Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Tue, 10 Oct 2017 11:26:09 -0700 Subject: [PATCH 0307/2016] BQ: remove fetch_data and client references from QueryResults (#4144) Now QueryResults is just a container for the getQueryResults response. --- .../google/cloud/bigquery/client.py | 5 +- .../google/cloud/bigquery/dbapi/cursor.py | 33 ++- .../google/cloud/bigquery/job.py | 6 +- .../google/cloud/bigquery/query.py | 140 +-------- .../tests/unit/test_client.py | 15 + .../tests/unit/test_dbapi_cursor.py | 32 +- .../tests/unit/test_job.py | 227 ++++++++------- .../tests/unit/test_query.py | 274 ++---------------- 8 files changed, 209 insertions(+), 523 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py index e8642f8afb79..1825190f1043 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py @@ -432,7 +432,7 @@ def _get_query_results(self, job_id, project=None, timeout_ms=None): resource = self._connection.api_request( method='GET', path=path, query_params=extra_params) - return QueryResults.from_api_repr(resource, self) + return QueryResults.from_api_repr(resource) def job_from_resource(self, resource): """Detect correct job type from resource and instantiate. @@ -925,7 +925,8 @@ def list_rows(self, table, selected_fields=None, max_results=None, params = {} if selected_fields is not None: params['selectedFields'] = ','.join( - [f.name for f in selected_fields]) + field.name for field in selected_fields) + if start_index is not None: params['startIndex'] = start_index diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/dbapi/cursor.py b/packages/google-cloud-bigquery/google/cloud/bigquery/dbapi/cursor.py index b5a05de6d90b..914d2e07c553 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/dbapi/cursor.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/dbapi/cursor.py @@ -52,7 +52,7 @@ def __init__(self, connection): # a single row at a time. self.arraysize = 1 self._query_data = None - self._query_results = None + self._query_job = None def close(self): """No-op.""" @@ -133,7 +133,7 @@ def execute(self, operation, parameters=None, job_id=None): is generated at random. """ self._query_data = None - self._query_results = None + self._query_job = None client = self.connection._client # The DB-API uses the pyformat formatting, since the way BigQuery does @@ -147,17 +147,16 @@ def execute(self, operation, parameters=None, job_id=None): config = job.QueryJobConfig() config.query_parameters = query_parameters config.use_legacy_sql = False - query_job = client.query( + self._query_job = client.query( formatted_operation, job_config=config, job_id=job_id) # Wait for the query to finish. try: - query_job.result() + self._query_job.result() except google.cloud.exceptions.GoogleCloudError: - raise exceptions.DatabaseError(query_job.errors) + raise exceptions.DatabaseError(self._query_job.errors) - query_results = query_job.query_results() - self._query_results = query_results + query_results = self._query_job.query_results() self._set_rowcount(query_results) self._set_description(query_results.schema) @@ -178,16 +177,24 @@ def _try_fetch(self, size=None): Mutates self to indicate that iteration has started. """ - if self._query_results is None: + if self._query_job is None: raise exceptions.InterfaceError( 'No query results: execute() must be called before fetch.') - if size is None: - size = self.arraysize + is_dml = ( + self._query_job.statement_type + and self._query_job.statement_type.upper() != 'SELECT') + if is_dml: + self._query_data = iter([]) + return if self._query_data is None: - self._query_data = iter( - self._query_results.fetch_data(max_results=size)) + client = self.connection._client + # TODO(tswast): pass in page size to list_rows based on arraysize + rows_iter = client.list_rows( + self._query_job.destination, + selected_fields=self._query_job.query_results().schema) + self._query_data = iter(rows_iter) def fetchone(self): """Fetch a single row from the results of the last ``execute*()`` call. @@ -245,7 +252,7 @@ def fetchall(self): if called before ``execute()``. """ self._try_fetch() - return [row for row in self._query_data] + return list(self._query_data) def setinputsizes(self, sizes): """No-op.""" diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/job.py b/packages/google-cloud-bigquery/google/cloud/bigquery/job.py index 4f9c005a883e..137072f04ddb 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/job.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/job.py @@ -1834,7 +1834,6 @@ def query_results(self): """ if not self._query_results: self._query_results = self._client._get_query_results(self.job_id) - self._query_results._job = self return self._query_results def done(self): @@ -1847,7 +1846,6 @@ def done(self): # change once complete. if self.state != _DONE_STATE: self._query_results = self._client._get_query_results(self.job_id) - self._query_results._job = self # Only reload the job once we know the query is complete. # This will ensure that fields such as the destination table are @@ -1879,7 +1877,9 @@ def result(self, timeout=None): """ super(QueryJob, self).result(timeout=timeout) # Return an iterator instead of returning the job. - return self.query_results().fetch_data() + schema = self.query_results().schema + dest_table = self.destination + return self._client.list_rows(dest_table, selected_fields=schema) class QueryPlanEntryStep(object): diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/query.py b/packages/google-cloud-bigquery/google/cloud/bigquery/query.py index 888ce5853050..5d6beca77ea6 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/query.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/query.py @@ -16,11 +16,8 @@ import copy -from google.api.core import page_iterator from google.cloud.bigquery._helpers import _rows_from_json from google.cloud.bigquery.table import _parse_schema_resource -from google.cloud.bigquery._helpers import _item_to_row -from google.cloud.bigquery._helpers import _rows_page_start class QueryResults(object): @@ -28,46 +25,25 @@ class QueryResults(object): See: https://g.co/cloud/bigquery/docs/reference/rest/v2/jobs/getQueryResults - - :type client: :class:`google.cloud.bigquery.client.Client` - :param client: A client which holds credentials and project configuration - for the dataset (which requires a project). """ - def __init__(self, client, properties): - self._client = client + def __init__(self, properties): self._properties = {} - self._job = None self._set_properties(properties) @classmethod - def from_api_repr(cls, api_response, client): - return cls(client, api_response) + def from_api_repr(cls, api_response): + return cls(api_response) @property def project(self): - """Project bound to the job. + """Project bound to the query job. :rtype: str - :returns: the project (derived from the client). + :returns: the project that the query job is associated with. """ return self._properties.get('jobReference', {}).get('projectId') - def _require_client(self, client): - """Check client or verify over-ride. - - :type client: :class:`~google.cloud.bigquery.client.Client` or - ``NoneType`` - :param client: the client to use. If not passed, falls back to the - ``client`` stored on the current dataset. - - :rtype: :class:`google.cloud.bigquery.client.Client` - :returns: The client passed in or the currently bound client. - """ - if client is None: - client = self._client - return client - @property def cache_hit(self): """Query results served from cache. @@ -212,109 +188,3 @@ def _set_properties(self, api_response): self._properties.clear() self._properties.update(copy.deepcopy(api_response)) - - def job(self): - """Job instance used to run the query. - - :rtype: :class:`google.cloud.bigquery.job.QueryJob`, or ``NoneType`` - :returns: Job instance used to run the query (None until - ``jobReference`` property is set by the server). - """ - if self._job is None: - job_ref = self._properties['jobReference'] - self._job = self._client.get_job( - job_ref['jobId'], project=job_ref['projectId']) - - return self._job - - def fetch_data(self, max_results=None, page_token=None, start_index=None, - timeout_ms=None, client=None): - """API call: fetch a page of query result data via a GET request - - See - https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs/getQueryResults - - :type max_results: int - :param max_results: (Optional) maximum number of rows to return. - - :type page_token: str - :param page_token: - (Optional) token representing a cursor into the table's rows. - - :type start_index: int - :param start_index: (Optional) zero-based index of starting row - - :type timeout_ms: int - :param timeout_ms: - (Optional) How long to wait for the query to complete, in - milliseconds, before the request times out and returns. Note that - this is only a timeout for the request, not the query. If the query - takes longer to run than the timeout value, the call returns - without any results and with the 'jobComplete' flag set to false. - You can call GetQueryResults() to wait for the query to complete - and read the results. The default value is 10000 milliseconds (10 - seconds). - - :type client: :class:`~google.cloud.bigquery.client.Client` or - ``NoneType`` - :param client: the client to use. If not passed, falls back to the - ``client`` stored on the current dataset. - - :rtype: :class:`~google.api.core.page_iterator.Iterator` - :returns: Iterator of row data :class:`tuple`s. During each page, the - iterator will have the ``total_rows`` attribute set, - which counts the total number of rows **in the result - set** (this is distinct from the total number of rows in - the current page: ``iterator.page.num_items``). - :raises: ValueError if the query has not yet been executed. - """ - client = self._require_client(client) - params = {} - - if start_index is not None: - params['startIndex'] = start_index - - if timeout_ms is not None: - params['timeoutMs'] = timeout_ms - - if max_results is not None: - params['maxResults'] = max_results - - path = '/projects/%s/queries/%s' % (self.project, self.job_id) - iterator = page_iterator.HTTPIterator( - client=client, - api_request=client._connection.api_request, - path=path, - item_to_value=_item_to_row, - items_key='rows', - page_token=page_token, - page_start=_rows_page_start_query, - next_token='pageToken', - extra_params=params) - iterator.query_result = self - iterator.project = self.project - iterator.job_id = self.job_id - return iterator - - -def _rows_page_start_query(iterator, page, response): - """Update query response when :class:`~google.cloud.iterator.Page` starts. - - .. note:: - - This assumes that the ``query_response`` attribute has been - added to the iterator after being created, which - should be done by the caller. - - :type iterator: :class:`~google.api.core.page_iterator.Iterator` - :param iterator: The iterator that is currently in use. - - :type page: :class:`~google.cloud.iterator.Page` - :param page: The page that was just created. - - :type response: dict - :param response: The JSON API response for a page of rows in a table. - """ - iterator.query_result._set_properties(response) - iterator.schema = iterator.query_result.schema - _rows_page_start(iterator, page, response) diff --git a/packages/google-cloud-bigquery/tests/unit/test_client.py b/packages/google-cloud-bigquery/tests/unit/test_client.py index 23757933d747..54b4ac460d84 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_client.py +++ b/packages/google-cloud-bigquery/tests/unit/test_client.py @@ -1931,6 +1931,11 @@ def test_query_rows_defaults(self): 'configuration': { 'query': { 'query': QUERY, + 'destinationTable': { + 'projectId': PROJECT, + 'datasetId': '_temp_dataset', + 'tableId': '_temp_table', + }, }, }, 'status': { @@ -1991,6 +1996,11 @@ def test_query_rows_w_job_id(self): 'configuration': { 'query': { 'query': QUERY, + 'destinationTable': { + 'projectId': PROJECT, + 'datasetId': '_temp_dataset', + 'tableId': '_temp_table', + }, }, }, 'status': { @@ -2041,6 +2051,11 @@ def test_query_rows_w_job_config(self): 'query': { 'query': QUERY, 'useLegacySql': True, + 'destinationTable': { + 'projectId': PROJECT, + 'datasetId': '_temp_dataset', + 'tableId': '_temp_table', + }, }, 'dryRun': True, }, diff --git a/packages/google-cloud-bigquery/tests/unit/test_dbapi_cursor.py b/packages/google-cloud-bigquery/tests/unit/test_dbapi_cursor.py index 7562acd13239..a16b7b47ee3f 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_dbapi_cursor.py +++ b/packages/google-cloud-bigquery/tests/unit/test_dbapi_cursor.py @@ -30,37 +30,45 @@ def _make_one(self, *args, **kw): def _mock_client( self, rows=None, schema=None, num_dml_affected_rows=None): from google.cloud.bigquery import client + + if rows is None: + total_rows = 0 + else: + total_rows = len(rows) + mock_client = mock.create_autospec(client.Client) mock_client.query.return_value = self._mock_job( - rows=rows, schema=schema, + total_rows=total_rows, + schema=schema, num_dml_affected_rows=num_dml_affected_rows) + mock_client.list_rows.return_value = rows return mock_client def _mock_job( - self, rows=None, schema=None, num_dml_affected_rows=None): + self, total_rows=0, schema=None, num_dml_affected_rows=None): from google.cloud.bigquery import job mock_job = mock.create_autospec(job.QueryJob) mock_job.error_result = None mock_job.state = 'DONE' mock_job.result.return_value = mock_job + + if num_dml_affected_rows is None: + mock_job.statement_type = None # API sends back None for SELECT + else: + mock_job.statement_type = 'UPDATE' + mock_job.query_results.return_value = self._mock_results( - rows=rows, schema=schema, + total_rows=total_rows, schema=schema, num_dml_affected_rows=num_dml_affected_rows) return mock_job def _mock_results( - self, rows=None, schema=None, num_dml_affected_rows=None): + self, total_rows=0, schema=None, num_dml_affected_rows=None): from google.cloud.bigquery import query mock_results = mock.create_autospec(query.QueryResults) mock_results.schema = schema mock_results.num_dml_affected_rows = num_dml_affected_rows - - if rows is None: - mock_results.total_rows = 0 - else: - mock_results.total_rows = len(rows) - - mock_results.fetch_data.return_value = rows + mock_results.total_rows = total_rows return mock_results def test_ctor(self): @@ -187,8 +195,10 @@ def test_execute_w_dml(self): self._mock_client(rows=[], num_dml_affected_rows=12)) cursor = connection.cursor() cursor.execute('DELETE FROM UserSessions WHERE user_id = \'test\';') + rows = cursor.fetchall() self.assertIsNone(cursor.description) self.assertEqual(cursor.rowcount, 12) + self.assertEqual(rows, []) def test_execute_w_query(self): from google.cloud.bigquery.schema import SchemaField diff --git a/packages/google-cloud-bigquery/tests/unit/test_job.py b/packages/google-cloud-bigquery/tests/unit/test_job.py index ad8cf965cd79..a9da40ee1d5e 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_job.py +++ b/packages/google-cloud-bigquery/tests/unit/test_job.py @@ -21,6 +21,26 @@ from google.cloud.bigquery.job import LoadJobConfig from google.cloud.bigquery.dataset import DatasetReference +import mock + + +def _make_credentials(): + import google.auth.credentials + + return mock.Mock(spec=google.auth.credentials.Credentials) + + +def _make_client(project='test-project', connection=None): + from google.cloud.bigquery.client import Client + + if connection is None: + connection = _Connection() + + client = Client( + project=project, credentials=_make_credentials(), _http=object()) + client._connection = connection + return client + class Test__bool_or_none(unittest.TestCase): @@ -143,6 +163,13 @@ def _makeResource(self, started=False, ended=False): if ended: resource['statistics']['endTime'] = (self.WHEN_TS + 1000) * 1000 + if self.JOB_TYPE == 'query': + resource['configuration']['query']['destinationTable'] = { + 'projectId': self.PROJECT, + 'datasetId': '_temp_dataset', + 'tableId': '_temp_table', + } + return resource def _verifyInitialReadonlyProperties(self, job): @@ -319,7 +346,7 @@ def _verifyResourceProperties(self, job, resource): self.assertIsNone(job.skip_leading_rows) def test_ctor(self): - client = _Client(self.PROJECT) + client = _make_client(project=self.PROJECT) job = self._make_one(self.JOB_ID, [self.SOURCE1], self.TABLE_REF, client) self.assertIs(job.destination, self.TABLE_REF) @@ -357,7 +384,7 @@ def test_ctor(self): def test_ctor_w_config(self): from google.cloud.bigquery.schema import SchemaField - client = _Client(self.PROJECT) + client = _make_client(project=self.PROJECT) full_name = SchemaField('full_name', 'STRING', mode='REQUIRED') age = SchemaField('age', 'INTEGER', mode='REQUIRED') config = LoadJobConfig() @@ -367,13 +394,13 @@ def test_ctor_w_config(self): self.assertEqual(job.schema, [full_name, age]) def test_done(self): - client = _Client(self.PROJECT) + client = _make_client(project=self.PROJECT) resource = self._makeResource(ended=True) job = self._get_target_class().from_api_repr(resource, client) self.assertTrue(job.done()) def test_result(self): - client = _Client(self.PROJECT) + client = _make_client(project=self.PROJECT) resource = self._makeResource(ended=True) job = self._get_target_class().from_api_repr(resource, client) @@ -386,7 +413,8 @@ def test_result_invokes_begin(self): done_resource = copy.deepcopy(begun_resource) done_resource['status'] = {'state': 'DONE'} connection = _Connection(begun_resource, done_resource) - client = _Client(self.PROJECT, connection=connection) + client = _make_client(self.PROJECT) + client._connection = connection job = self._make_one(self.JOB_ID, [self.SOURCE1], self.TABLE_REF, client) @@ -462,7 +490,7 @@ def test_props_set_by_server(self): 'message': 'MESSAGE', 'reason': 'REASON'} - client = _Client(self.PROJECT) + client = _make_client(project=self.PROJECT) table = _Table() job = self._make_one(self.JOB_ID, [self.SOURCE1], table, client) job._properties['etag'] = 'ETAG' @@ -509,7 +537,7 @@ def test_props_set_by_server(self): def test_from_api_repr_missing_identity(self): self._setUpConstants() - client = _Client(self.PROJECT) + client = _make_client(project=self.PROJECT) RESOURCE = {} klass = self._get_target_class() with self.assertRaises(KeyError): @@ -517,7 +545,7 @@ def test_from_api_repr_missing_identity(self): def test_from_api_repr_missing_config(self): self._setUpConstants() - client = _Client(self.PROJECT) + client = _make_client(project=self.PROJECT) RESOURCE = { 'id': '%s:%s' % (self.PROJECT, self.JOB_ID), 'jobReference': { @@ -531,7 +559,7 @@ def test_from_api_repr_missing_config(self): def test_from_api_repr_bare(self): self._setUpConstants() - client = _Client(self.PROJECT) + client = _make_client(project=self.PROJECT) RESOURCE = { 'id': self.FULL_JOB_ID, 'jobReference': { @@ -555,7 +583,7 @@ def test_from_api_repr_bare(self): self._verifyResourceProperties(job, RESOURCE) def test_from_api_repr_w_properties(self): - client = _Client(self.PROJECT) + client = _make_client(project=self.PROJECT) RESOURCE = self._makeResource() load_config = RESOURCE['configuration']['load'] load_config['createDisposition'] = 'CREATE_IF_NEEDED' @@ -566,7 +594,7 @@ def test_from_api_repr_w_properties(self): def test_begin_w_already_running(self): conn = _Connection() - client = _Client(project=self.PROJECT, connection=conn) + client = _make_client(project=self.PROJECT, connection=conn) job = self._make_one(self.JOB_ID, [self.SOURCE1], self.TABLE_REF, client) job._properties['status'] = {'state': 'RUNNING'} @@ -583,7 +611,7 @@ def test_begin_w_bound_client(self): del RESOURCE['selfLink'] del RESOURCE['user_email'] conn = _Connection(RESOURCE) - client = _Client(project=self.PROJECT, connection=conn) + client = _make_client(project=self.PROJECT, connection=conn) job = self._make_one(self.JOB_ID, [self.SOURCE1], self.TABLE_REF, client) @@ -622,7 +650,7 @@ def test_begin_w_autodetect(self): del resource['selfLink'] del resource['user_email'] conn = _Connection(resource) - client = _Client(project=self.PROJECT, connection=conn) + client = _make_client(project=self.PROJECT, connection=conn) config = LoadJobConfig() config.autodetect = True job = self._make_one(self.JOB_ID, [self.SOURCE1], self.TABLE_REF, @@ -685,9 +713,9 @@ def test_begin_w_alternate_client(self): } RESOURCE['configuration']['load'] = LOAD_CONFIGURATION conn1 = _Connection() - client1 = _Client(project=self.PROJECT, connection=conn1) + client1 = _make_client(project=self.PROJECT, connection=conn1) conn2 = _Connection(RESOURCE) - client2 = _Client(project=self.PROJECT, connection=conn2) + client2 = _make_client(project=self.PROJECT, connection=conn2) full_name = SchemaField('full_name', 'STRING', mode='REQUIRED') age = SchemaField('age', 'INTEGER', mode='REQUIRED') config = LoadJobConfig() @@ -730,7 +758,7 @@ def test_begin_w_alternate_client(self): def test_exists_miss_w_bound_client(self): PATH = '/projects/%s/jobs/%s' % (self.PROJECT, self.JOB_ID) conn = _Connection() - client = _Client(project=self.PROJECT, connection=conn) + client = _make_client(project=self.PROJECT, connection=conn) table = _Table() job = self._make_one(self.JOB_ID, [self.SOURCE1], table, client) @@ -745,9 +773,9 @@ def test_exists_miss_w_bound_client(self): def test_exists_hit_w_alternate_client(self): PATH = '/projects/%s/jobs/%s' % (self.PROJECT, self.JOB_ID) conn1 = _Connection() - client1 = _Client(project=self.PROJECT, connection=conn1) + client1 = _make_client(project=self.PROJECT, connection=conn1) conn2 = _Connection({}) - client2 = _Client(project=self.PROJECT, connection=conn2) + client2 = _make_client(project=self.PROJECT, connection=conn2) table = _Table() job = self._make_one(self.JOB_ID, [self.SOURCE1], table, client1) @@ -764,7 +792,7 @@ def test_reload_w_bound_client(self): PATH = '/projects/%s/jobs/%s' % (self.PROJECT, self.JOB_ID) RESOURCE = self._makeResource() conn = _Connection(RESOURCE) - client = _Client(project=self.PROJECT, connection=conn) + client = _make_client(project=self.PROJECT, connection=conn) table = _Table() job = self._make_one(self.JOB_ID, [self.SOURCE1], table, client) @@ -780,9 +808,9 @@ def test_reload_w_alternate_client(self): PATH = '/projects/%s/jobs/%s' % (self.PROJECT, self.JOB_ID) RESOURCE = self._makeResource() conn1 = _Connection() - client1 = _Client(project=self.PROJECT, connection=conn1) + client1 = _make_client(project=self.PROJECT, connection=conn1) conn2 = _Connection(RESOURCE) - client2 = _Client(project=self.PROJECT, connection=conn2) + client2 = _make_client(project=self.PROJECT, connection=conn2) table = _Table() job = self._make_one(self.JOB_ID, [self.SOURCE1], table, client1) @@ -800,7 +828,7 @@ def test_cancel_w_bound_client(self): RESOURCE = self._makeResource(ended=True) RESPONSE = {'job': RESOURCE} conn = _Connection(RESPONSE) - client = _Client(project=self.PROJECT, connection=conn) + client = _make_client(project=self.PROJECT, connection=conn) table = _Table() job = self._make_one(self.JOB_ID, [self.SOURCE1], table, client) @@ -817,9 +845,9 @@ def test_cancel_w_alternate_client(self): RESOURCE = self._makeResource(ended=True) RESPONSE = {'job': RESOURCE} conn1 = _Connection() - client1 = _Client(project=self.PROJECT, connection=conn1) + client1 = _make_client(project=self.PROJECT, connection=conn1) conn2 = _Connection(RESPONSE) - client2 = _Client(project=self.PROJECT, connection=conn2) + client2 = _make_client(project=self.PROJECT, connection=conn2) table = _Table() job = self._make_one(self.JOB_ID, [self.SOURCE1], table, client1) @@ -893,7 +921,7 @@ def _verifyResourceProperties(self, job, resource): self.assertIsNone(job.write_disposition) def test_ctor(self): - client = _Client(self.PROJECT) + client = _make_client(project=self.PROJECT) source = self._table_ref(self.SOURCE_TABLE) destination = self._table_ref(self.DESTINATION_TABLE) job = self._make_one(self.JOB_ID, [source], destination, client) @@ -913,7 +941,7 @@ def test_ctor(self): def test_from_api_repr_missing_identity(self): self._setUpConstants() - client = _Client(self.PROJECT) + client = _make_client(project=self.PROJECT) RESOURCE = {} klass = self._get_target_class() with self.assertRaises(KeyError): @@ -921,7 +949,7 @@ def test_from_api_repr_missing_identity(self): def test_from_api_repr_missing_config(self): self._setUpConstants() - client = _Client(self.PROJECT) + client = _make_client(project=self.PROJECT) RESOURCE = { 'id': '%s:%s' % (self.PROJECT, self.DS_ID), 'jobReference': { @@ -935,7 +963,7 @@ def test_from_api_repr_missing_config(self): def test_from_api_repr_bare(self): self._setUpConstants() - client = _Client(self.PROJECT) + client = _make_client(project=self.PROJECT) RESOURCE = { 'id': self.JOB_ID, 'jobReference': { @@ -964,7 +992,7 @@ def test_from_api_repr_bare(self): def test_from_api_repr_w_sourcetable(self): self._setUpConstants() - client = _Client(self.PROJECT) + client = _make_client(project=self.PROJECT) RESOURCE = { 'id': self.JOB_ID, 'jobReference': { @@ -993,7 +1021,7 @@ def test_from_api_repr_w_sourcetable(self): def test_from_api_repr_wo_sources(self): self._setUpConstants() - client = _Client(self.PROJECT) + client = _make_client(project=self.PROJECT) RESOURCE = { 'id': self.JOB_ID, 'jobReference': { @@ -1015,7 +1043,7 @@ def test_from_api_repr_wo_sources(self): klass.from_api_repr(RESOURCE, client=client) def test_from_api_repr_w_properties(self): - client = _Client(self.PROJECT) + client = _make_client(project=self.PROJECT) RESOURCE = self._makeResource() copy_config = RESOURCE['configuration']['copy'] copy_config['createDisposition'] = 'CREATE_IF_NEEDED' @@ -1033,7 +1061,7 @@ def test_begin_w_bound_client(self): del RESOURCE['selfLink'] del RESOURCE['user_email'] conn = _Connection(RESOURCE) - client = _Client(project=self.PROJECT, connection=conn) + client = _make_client(project=self.PROJECT, connection=conn) source = self._table_ref(self.SOURCE_TABLE) destination = self._table_ref(self.DESTINATION_TABLE) job = self._make_one(self.JOB_ID, [source], destination, client) @@ -1086,9 +1114,9 @@ def test_begin_w_alternate_client(self): } RESOURCE['configuration']['copy'] = COPY_CONFIGURATION conn1 = _Connection() - client1 = _Client(project=self.PROJECT, connection=conn1) + client1 = _make_client(project=self.PROJECT, connection=conn1) conn2 = _Connection(RESOURCE) - client2 = _Client(project=self.PROJECT, connection=conn2) + client2 = _make_client(project=self.PROJECT, connection=conn2) source = self._table_ref(self.SOURCE_TABLE) destination = self._table_ref(self.DESTINATION_TABLE) config = CopyJobConfig() @@ -1118,7 +1146,7 @@ def test_begin_w_alternate_client(self): def test_exists_miss_w_bound_client(self): PATH = '/projects/%s/jobs/%s' % (self.PROJECT, self.JOB_ID) conn = _Connection() - client = _Client(project=self.PROJECT, connection=conn) + client = _make_client(project=self.PROJECT, connection=conn) source = self._table_ref(self.SOURCE_TABLE) destination = self._table_ref(self.DESTINATION_TABLE) @@ -1135,9 +1163,9 @@ def test_exists_miss_w_bound_client(self): def test_exists_hit_w_alternate_client(self): PATH = '/projects/%s/jobs/%s' % (self.PROJECT, self.JOB_ID) conn1 = _Connection() - client1 = _Client(project=self.PROJECT, connection=conn1) + client1 = _make_client(project=self.PROJECT, connection=conn1) conn2 = _Connection({}) - client2 = _Client(project=self.PROJECT, connection=conn2) + client2 = _make_client(project=self.PROJECT, connection=conn2) source = self._table_ref(self.SOURCE_TABLE) destination = self._table_ref(self.DESTINATION_TABLE) job = self._make_one(self.JOB_ID, [source], destination, client1) @@ -1155,7 +1183,7 @@ def test_reload_w_bound_client(self): PATH = '/projects/%s/jobs/%s' % (self.PROJECT, self.JOB_ID) RESOURCE = self._makeResource() conn = _Connection(RESOURCE) - client = _Client(project=self.PROJECT, connection=conn) + client = _make_client(project=self.PROJECT, connection=conn) source = self._table_ref(self.SOURCE_TABLE) destination = self._table_ref(self.DESTINATION_TABLE) job = self._make_one(self.JOB_ID, [source], destination, client) @@ -1172,9 +1200,9 @@ def test_reload_w_alternate_client(self): PATH = '/projects/%s/jobs/%s' % (self.PROJECT, self.JOB_ID) RESOURCE = self._makeResource() conn1 = _Connection() - client1 = _Client(project=self.PROJECT, connection=conn1) + client1 = _make_client(project=self.PROJECT, connection=conn1) conn2 = _Connection(RESOURCE) - client2 = _Client(project=self.PROJECT, connection=conn2) + client2 = _make_client(project=self.PROJECT, connection=conn2) source = self._table_ref(self.SOURCE_TABLE) destination = self._table_ref(self.DESTINATION_TABLE) job = self._make_one(self.JOB_ID, [source], destination, client1) @@ -1249,7 +1277,7 @@ def _verifyResourceProperties(self, job, resource): self.assertIsNone(job.print_header) def test_ctor(self): - client = _Client(self.PROJECT) + client = _make_client(project=self.PROJECT) source = _Table(self.SOURCE_TABLE) job = self._make_one(self.JOB_ID, source, [self.DESTINATION_URI], client) @@ -1271,7 +1299,7 @@ def test_ctor(self): def test_destination_uri_file_counts(self): file_counts = 23 - client = _Client(self.PROJECT) + client = _make_client(project=self.PROJECT) source = _Table(self.SOURCE_TABLE) job = self._make_one(self.JOB_ID, source, [self.DESTINATION_URI], client) @@ -1288,7 +1316,7 @@ def test_destination_uri_file_counts(self): def test_from_api_repr_missing_identity(self): self._setUpConstants() - client = _Client(self.PROJECT) + client = _make_client(project=self.PROJECT) RESOURCE = {} klass = self._get_target_class() with self.assertRaises(KeyError): @@ -1296,7 +1324,7 @@ def test_from_api_repr_missing_identity(self): def test_from_api_repr_missing_config(self): self._setUpConstants() - client = _Client(self.PROJECT) + client = _make_client(project=self.PROJECT) RESOURCE = { 'id': '%s:%s' % (self.PROJECT, self.DS_ID), 'jobReference': { @@ -1310,7 +1338,7 @@ def test_from_api_repr_missing_config(self): def test_from_api_repr_bare(self): self._setUpConstants() - client = _Client(self.PROJECT) + client = _make_client(project=self.PROJECT) RESOURCE = { 'id': self.JOB_ID, 'jobReference': { @@ -1334,7 +1362,7 @@ def test_from_api_repr_bare(self): self._verifyResourceProperties(job, RESOURCE) def test_from_api_repr_w_properties(self): - client = _Client(self.PROJECT) + client = _make_client(project=self.PROJECT) RESOURCE = self._makeResource() extract_config = RESOURCE['configuration']['extract'] extract_config['compression'] = 'GZIP' @@ -1352,7 +1380,7 @@ def test_begin_w_bound_client(self): del RESOURCE['selfLink'] del RESOURCE['user_email'] conn = _Connection(RESOURCE) - client = _Client(project=self.PROJECT, connection=conn) + client = _make_client(project=self.PROJECT, connection=conn) source_dataset = DatasetReference(self.PROJECT, self.DS_ID) source = source_dataset.table(self.SOURCE_TABLE) job = self._make_one(self.JOB_ID, source, [self.DESTINATION_URI], @@ -1400,9 +1428,9 @@ def test_begin_w_alternate_client(self): } RESOURCE['configuration']['extract'] = EXTRACT_CONFIGURATION conn1 = _Connection() - client1 = _Client(project=self.PROJECT, connection=conn1) + client1 = _make_client(project=self.PROJECT, connection=conn1) conn2 = _Connection(RESOURCE) - client2 = _Client(project=self.PROJECT, connection=conn2) + client2 = _make_client(project=self.PROJECT, connection=conn2) source_dataset = DatasetReference(self.PROJECT, self.DS_ID) source = source_dataset.table(self.SOURCE_TABLE) job_config = ExtractJobConfig() @@ -1435,7 +1463,7 @@ def test_begin_w_alternate_client(self): def test_exists_miss_w_bound_client(self): PATH = '/projects/%s/jobs/%s' % (self.PROJECT, self.JOB_ID) conn = _Connection() - client = _Client(project=self.PROJECT, connection=conn) + client = _make_client(project=self.PROJECT, connection=conn) source = _Table(self.SOURCE_TABLE) job = self._make_one(self.JOB_ID, source, [self.DESTINATION_URI], client) @@ -1451,9 +1479,9 @@ def test_exists_miss_w_bound_client(self): def test_exists_hit_w_alternate_client(self): PATH = '/projects/%s/jobs/%s' % (self.PROJECT, self.JOB_ID) conn1 = _Connection() - client1 = _Client(project=self.PROJECT, connection=conn1) + client1 = _make_client(project=self.PROJECT, connection=conn1) conn2 = _Connection({}) - client2 = _Client(project=self.PROJECT, connection=conn2) + client2 = _make_client(project=self.PROJECT, connection=conn2) source = _Table(self.SOURCE_TABLE) job = self._make_one(self.JOB_ID, source, [self.DESTINATION_URI], client1) @@ -1471,7 +1499,7 @@ def test_reload_w_bound_client(self): PATH = '/projects/%s/jobs/%s' % (self.PROJECT, self.JOB_ID) RESOURCE = self._makeResource() conn = _Connection(RESOURCE) - client = _Client(project=self.PROJECT, connection=conn) + client = _make_client(project=self.PROJECT, connection=conn) source_dataset = DatasetReference(self.PROJECT, self.DS_ID) source = source_dataset.table(self.SOURCE_TABLE) job = self._make_one(self.JOB_ID, source, [self.DESTINATION_URI], @@ -1489,9 +1517,9 @@ def test_reload_w_alternate_client(self): PATH = '/projects/%s/jobs/%s' % (self.PROJECT, self.JOB_ID) RESOURCE = self._makeResource() conn1 = _Connection() - client1 = _Client(project=self.PROJECT, connection=conn1) + client1 = _make_client(project=self.PROJECT, connection=conn1) conn2 = _Connection(RESOURCE) - client2 = _Client(project=self.PROJECT, connection=conn2) + client2 = _make_client(project=self.PROJECT, connection=conn2) source_dataset = DatasetReference(self.PROJECT, self.DS_ID) source = source_dataset.table(self.SOURCE_TABLE) job = self._make_one(self.JOB_ID, source, [self.DESTINATION_URI], @@ -1701,7 +1729,7 @@ def _verifyResourceProperties(self, job, resource): self.assertIsNone(job.write_disposition) def test_ctor_defaults(self): - client = _Client(self.PROJECT) + client = _make_client(project=self.PROJECT) job = self._make_one(self.JOB_ID, self.QUERY, client) self.assertEqual(job.query, self.QUERY) self.assertIs(job._client, client) @@ -1733,7 +1761,7 @@ def test_ctor_w_udf_resources(self): RESOURCE_URI = 'gs://some-bucket/js/lib.js' udf_resources = [UDFResource("resourceUri", RESOURCE_URI)] - client = _Client(self.PROJECT) + client = _make_client(project=self.PROJECT) config = QueryJobConfig() config.udf_resources = udf_resources job = self._make_one( @@ -1745,7 +1773,7 @@ def test_ctor_w_query_parameters(self): from google.cloud.bigquery.job import QueryJobConfig query_parameters = [ScalarQueryParameter("foo", 'INT64', 123)] - client = _Client(self.PROJECT) + client = _make_client(project=self.PROJECT) config = QueryJobConfig() config.query_parameters = query_parameters job = self._make_one( @@ -1754,7 +1782,7 @@ def test_ctor_w_query_parameters(self): def test_from_api_repr_missing_identity(self): self._setUpConstants() - client = _Client(self.PROJECT) + client = _make_client(project=self.PROJECT) RESOURCE = {} klass = self._get_target_class() with self.assertRaises(KeyError): @@ -1762,7 +1790,7 @@ def test_from_api_repr_missing_identity(self): def test_from_api_repr_missing_config(self): self._setUpConstants() - client = _Client(self.PROJECT) + client = _make_client(project=self.PROJECT) RESOURCE = { 'id': '%s:%s' % (self.PROJECT, self.DS_ID), 'jobReference': { @@ -1776,7 +1804,7 @@ def test_from_api_repr_missing_config(self): def test_from_api_repr_bare(self): self._setUpConstants() - client = _Client(self.PROJECT) + client = _make_client(project=self.PROJECT) RESOURCE = { 'id': self.JOB_ID, 'jobReference': { @@ -1793,7 +1821,7 @@ def test_from_api_repr_bare(self): self._verifyResourceProperties(job, RESOURCE) def test_from_api_repr_w_properties(self): - client = _Client(self.PROJECT) + client = _make_client(project=self.PROJECT) RESOURCE = self._makeResource() query_config = RESOURCE['configuration']['query'] query_config['createDisposition'] = 'CREATE_IF_NEEDED' @@ -1809,7 +1837,7 @@ def test_from_api_repr_w_properties(self): self._verifyResourceProperties(job, RESOURCE) def test_cancelled(self): - client = _Client(self.PROJECT) + client = _make_client(project=self.PROJECT) job = self._make_one(self.JOB_ID, self.QUERY, client) job._properties['status'] = { 'state': 'DONE', @@ -1821,7 +1849,7 @@ def test_cancelled(self): self.assertTrue(job.cancelled()) def test_done(self): - client = _Client(self.PROJECT) + client = _make_client(project=self.PROJECT) resource = self._makeResource(ended=True) job = self._get_target_class().from_api_repr(resource, client) self.assertTrue(job.done()) @@ -1849,7 +1877,7 @@ def test_query_plan(self): 'substeps': ['SUBSTEP1', 'SUBSTEP2'], }], }] - client = _Client(self.PROJECT) + client = _make_client(project=self.PROJECT) job = self._make_one(self.JOB_ID, self.QUERY, client) self.assertEqual(job.query_plan, []) @@ -1890,7 +1918,7 @@ def test_query_plan(self): def test_total_bytes_processed(self): total_bytes = 1234 - client = _Client(self.PROJECT) + client = _make_client(project=self.PROJECT) job = self._make_one(self.JOB_ID, self.QUERY, client) self.assertIsNone(job.total_bytes_processed) @@ -1905,7 +1933,7 @@ def test_total_bytes_processed(self): def test_total_bytes_billed(self): total_bytes = 1234 - client = _Client(self.PROJECT) + client = _make_client(project=self.PROJECT) job = self._make_one(self.JOB_ID, self.QUERY, client) self.assertIsNone(job.total_bytes_billed) @@ -1920,7 +1948,7 @@ def test_total_bytes_billed(self): def test_billing_tier(self): billing_tier = 1 - client = _Client(self.PROJECT) + client = _make_client(project=self.PROJECT) job = self._make_one(self.JOB_ID, self.QUERY, client) self.assertIsNone(job.billing_tier) @@ -1934,7 +1962,7 @@ def test_billing_tier(self): self.assertEqual(job.billing_tier, billing_tier) def test_cache_hit(self): - client = _Client(self.PROJECT) + client = _make_client(project=self.PROJECT) job = self._make_one(self.JOB_ID, self.QUERY, client) self.assertIsNone(job.cache_hit) @@ -1949,7 +1977,7 @@ def test_cache_hit(self): def test_num_dml_affected_rows(self): num_rows = 1234 - client = _Client(self.PROJECT) + client = _make_client(project=self.PROJECT) job = self._make_one(self.JOB_ID, self.QUERY, client) self.assertIsNone(job.num_dml_affected_rows) @@ -1964,7 +1992,7 @@ def test_num_dml_affected_rows(self): def test_statement_type(self): statement_type = 'SELECT' - client = _Client(self.PROJECT) + client = _make_client(project=self.PROJECT) job = self._make_one(self.JOB_ID, self.QUERY, client) self.assertIsNone(job.statement_type) @@ -1995,7 +2023,7 @@ def test_referenced_tables(self): 'datasetId': 'other-dataset', 'tableId': 'other-table', }] - client = _Client(self.PROJECT) + client = _make_client(project=self.PROJECT) job = self._make_one(self.JOB_ID, self.QUERY, client) self.assertEqual(job.referenced_tables, []) @@ -2070,7 +2098,7 @@ def test_undeclared_query_paramters(self): } }, }] - client = _Client(self.PROJECT) + client = _make_client(project=self.PROJECT) job = self._make_one(self.JOB_ID, self.QUERY, client) self.assertEqual(job.undeclared_query_paramters, []) @@ -2110,7 +2138,7 @@ def test_query_results(self): }, } connection = _Connection(query_resource) - client = _Client(self.PROJECT, connection=connection) + client = _make_client(self.PROJECT, connection=connection) job = self._make_one(self.JOB_ID, self.QUERY, client) results = job.query_results() self.assertIsInstance(results, QueryResults) @@ -2118,7 +2146,7 @@ def test_query_results(self): def test_query_results_w_cached_value(self): from google.cloud.bigquery.query import QueryResults - client = _Client(self.PROJECT) + client = _make_client(project=self.PROJECT) job = self._make_one(self.JOB_ID, self.QUERY, client) resource = { 'jobReference': { @@ -2126,7 +2154,7 @@ def test_query_results_w_cached_value(self): 'jobId': self.JOB_ID, }, } - query_results = QueryResults(client, resource) + query_results = QueryResults(resource) job._query_results = query_results results = job.query_results() @@ -2142,7 +2170,7 @@ def test_result(self): }, } connection = _Connection(query_resource, query_resource) - client = _Client(self.PROJECT, connection=connection) + client = _make_client(self.PROJECT, connection=connection) resource = self._makeResource(ended=True) job = self._get_target_class().from_api_repr(resource, client) @@ -2166,7 +2194,7 @@ def test_result_invokes_begins(self): connection = _Connection( begun_resource, incomplete_resource, query_resource, done_resource, query_resource) - client = _Client(self.PROJECT, connection=connection) + client = _make_client(project=self.PROJECT, connection=connection) job = self._make_one(self.JOB_ID, self.QUERY, client) job.result() @@ -2180,7 +2208,7 @@ def test_result_invokes_begins(self): def test_result_error(self): from google.cloud import exceptions - client = _Client(self.PROJECT) + client = _make_client(project=self.PROJECT) job = self._make_one(self.JOB_ID, self.QUERY, client) error_result = { 'debugInfo': 'DEBUG', @@ -2214,7 +2242,7 @@ def test_begin_w_bound_client(self): del RESOURCE['selfLink'] del RESOURCE['user_email'] conn = _Connection(RESOURCE) - client = _Client(project=self.PROJECT, connection=conn) + client = _make_client(project=self.PROJECT, connection=conn) config = QueryJobConfig() config.default_dataset = DatasetReference(self.PROJECT, DS_ID) @@ -2280,9 +2308,9 @@ def test_begin_w_alternate_client(self): RESOURCE['configuration']['query'] = QUERY_CONFIGURATION RESOURCE['configuration']['dryRun'] = True conn1 = _Connection() - client1 = _Client(project=self.PROJECT, connection=conn1) + client1 = _make_client(project=self.PROJECT, connection=conn1) conn2 = _Connection(RESOURCE) - client2 = _Client(project=self.PROJECT, connection=conn2) + client2 = _make_client(project=self.PROJECT, connection=conn2) dataset_ref = DatasetReference(self.PROJECT, DS_ID) table_ref = dataset_ref.table(TABLE) @@ -2340,7 +2368,7 @@ def test_begin_w_udf(self): {'inlineCode': INLINE_UDF_CODE}, ] conn = _Connection(RESOURCE) - client = _Client(project=self.PROJECT, connection=conn) + client = _make_client(project=self.PROJECT, connection=conn) udf_resources = [ UDFResource("resourceUri", RESOURCE_URI), UDFResource("inlineCode", INLINE_UDF_CODE), @@ -2403,7 +2431,7 @@ def test_begin_w_named_query_parameter(self): }, ] conn = _Connection(RESOURCE) - client = _Client(project=self.PROJECT, connection=conn) + client = _make_client(project=self.PROJECT, connection=conn) jconfig = QueryJobConfig() jconfig.query_parameters = query_parameters job = self._make_one( @@ -2458,7 +2486,7 @@ def test_begin_w_positional_query_parameter(self): }, ] conn = _Connection(RESOURCE) - client = _Client(project=self.PROJECT, connection=conn) + client = _make_client(project=self.PROJECT, connection=conn) jconfig = QueryJobConfig() jconfig.query_parameters = query_parameters job = self._make_one( @@ -2500,7 +2528,7 @@ def test_dry_run_query(self): del RESOURCE['user_email'] RESOURCE['configuration']['dryRun'] = True conn = _Connection(RESOURCE) - client = _Client(project=self.PROJECT, connection=conn) + client = _make_client(project=self.PROJECT, connection=conn) config = QueryJobConfig() config.dry_run = True job = self._make_one( @@ -2531,7 +2559,7 @@ def test_dry_run_query(self): def test_exists_miss_w_bound_client(self): PATH = '/projects/%s/jobs/%s' % (self.PROJECT, self.JOB_ID) conn = _Connection() - client = _Client(project=self.PROJECT, connection=conn) + client = _make_client(project=self.PROJECT, connection=conn) job = self._make_one(self.JOB_ID, self.QUERY, client) self.assertFalse(job.exists()) @@ -2545,9 +2573,9 @@ def test_exists_miss_w_bound_client(self): def test_exists_hit_w_alternate_client(self): PATH = '/projects/%s/jobs/%s' % (self.PROJECT, self.JOB_ID) conn1 = _Connection() - client1 = _Client(project=self.PROJECT, connection=conn1) + client1 = _make_client(project=self.PROJECT, connection=conn1) conn2 = _Connection({}) - client2 = _Client(project=self.PROJECT, connection=conn2) + client2 = _make_client(project=self.PROJECT, connection=conn2) job = self._make_one(self.JOB_ID, self.QUERY, client1) self.assertTrue(job.exists(client=client2)) @@ -2568,7 +2596,7 @@ def test_reload_w_bound_client(self): DEST_TABLE = 'dest_table' RESOURCE = self._makeResource() conn = _Connection(RESOURCE) - client = _Client(project=self.PROJECT, connection=conn) + client = _make_client(project=self.PROJECT, connection=conn) dataset_ref = DatasetReference(self.PROJECT, DS_ID) table_ref = dataset_ref.table(DEST_TABLE) config = QueryJobConfig() @@ -2577,7 +2605,7 @@ def test_reload_w_bound_client(self): job.reload() - self.assertIsNone(job.destination) + self.assertNotEqual(job.destination, table_ref) self.assertEqual(len(conn._requested), 1) req = conn._requested[0] @@ -2597,9 +2625,9 @@ def test_reload_w_alternate_client(self): 'tableId': DEST_TABLE, } conn1 = _Connection() - client1 = _Client(project=self.PROJECT, connection=conn1) + client1 = _make_client(project=self.PROJECT, connection=conn1) conn2 = _Connection(RESOURCE) - client2 = _Client(project=self.PROJECT, connection=conn2) + client2 = _make_client(project=self.PROJECT, connection=conn2) job = self._make_one(self.JOB_ID, self.QUERY, client1) job.reload(client=client2) @@ -2791,19 +2819,6 @@ def test_from_api_repr_normal(self): self.assertEqual(entry.steps, steps) -class _Client(object): - - def __init__(self, project='project', connection=None): - self.project = project - self._connection = connection - - def _get_query_results(self, job_id): - from google.cloud.bigquery.query import QueryResults - - resource = self._connection.api_request(method='GET') - return QueryResults.from_api_repr(resource, self) - - class _Table(object): def __init__(self, table_id=None): diff --git a/packages/google-cloud-bigquery/tests/unit/test_query.py b/packages/google-cloud-bigquery/tests/unit/test_query.py index d2eae2ad77fb..d2322886daa5 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_query.py +++ b/packages/google-cloud-bigquery/tests/unit/test_query.py @@ -14,16 +14,6 @@ import unittest -import mock - -from google.cloud.bigquery import Client - - -def _make_credentials(): - import google.auth.credentials - - return mock.Mock(spec=google.auth.credentials.Credentials) - class TestQueryResults(unittest.TestCase): PROJECT = 'project' @@ -65,9 +55,7 @@ def _verifySchema(self, query, resource): self.assertEqual(query.schema, ()) def test_ctor_defaults(self): - client = _Client(self.PROJECT) - query = self._make_one(client, self._makeResource()) - self.assertIs(query._client, client) + query = self._make_one(self._makeResource()) self.assertIsNone(query.cache_hit) self.assertIsNone(query.complete) self.assertIsNone(query.errors) @@ -78,66 +66,28 @@ def test_ctor_defaults(self): self.assertIsNone(query.total_rows) self.assertIsNone(query.total_bytes_processed) - def test_job_w_jobid(self): - from google.cloud.bigquery.job import QueryJob - - SERVER_GENERATED = 'SERVER_GENERATED' - job_resource = { - 'jobReference': { - 'projectId': self.PROJECT, - 'jobId': SERVER_GENERATED, - }, - 'configuration': {'query': {'query': 'SELECT 1'}}, - } - query_resource = { - 'jobReference': { - 'projectId': self.PROJECT, - 'jobId': SERVER_GENERATED, - }, - } - conn = _Connection(job_resource) - client = _Client(self.PROJECT, conn) - query = self._make_one(client, query_resource) - job = query.job() - self.assertIsInstance(job, QueryJob) - self.assertIs(job._client, client) - self.assertEqual(job.job_id, SERVER_GENERATED) - fetched_later = query.job() - self.assertIs(fetched_later, job) - self.assertEqual(len(conn._requested), 1) - req = conn._requested[0] - self.assertEqual(req['method'], 'GET') - self.assertEqual( - req['path'], - '/projects/{}/jobs/{}'.format(self.PROJECT, SERVER_GENERATED)) - def test_cache_hit_missing(self): - client = _Client(self.PROJECT) - query = self._make_one(client, self._makeResource()) + query = self._make_one(self._makeResource()) self.assertIsNone(query.cache_hit) def test_cache_hit_present(self): - client = _Client(self.PROJECT) resource = self._makeResource() resource['cacheHit'] = True - query = self._make_one(client, resource) + query = self._make_one(resource) self.assertTrue(query.cache_hit) def test_complete_missing(self): - client = _Client(self.PROJECT) - query = self._make_one(client, self._makeResource()) + query = self._make_one(self._makeResource()) self.assertIsNone(query.complete) def test_complete_present(self): - client = _Client(self.PROJECT) resource = self._makeResource() resource['jobComplete'] = True - query = self._make_one(client, resource) + query = self._make_one(resource) self.assertTrue(query.complete) def test_errors_missing(self): - client = _Client(self.PROJECT) - query = self._make_one(client, self._makeResource()) + query = self._make_one(self._makeResource()) self.assertIsNone(query.errors) def test_errors_present(self): @@ -146,95 +96,80 @@ def test_errors_present(self): ] resource = self._makeResource() resource['errors'] = ERRORS - client = _Client(self.PROJECT) - query = self._make_one(client, resource) + query = self._make_one(resource) self.assertEqual(query.errors, ERRORS) def test_job_id_missing(self): - client = _Client(self.PROJECT) with self.assertRaises(ValueError): - self._make_one(client, {}) + self._make_one({}) def test_job_id_broken_job_reference(self): - client = _Client(self.PROJECT) resource = {'jobReference': {'bogus': 'BOGUS'}} with self.assertRaises(ValueError): - self._make_one(client, resource) + self._make_one(resource) def test_job_id_present(self): - client = _Client(self.PROJECT) resource = self._makeResource() resource['jobReference']['jobId'] = 'custom-job' - query = self._make_one(client, resource) + query = self._make_one(resource) self.assertEqual(query.job_id, 'custom-job') def test_page_token_missing(self): - client = _Client(self.PROJECT) - query = self._make_one(client, self._makeResource()) + query = self._make_one(self._makeResource()) self.assertIsNone(query.page_token) def test_page_token_present(self): - client = _Client(self.PROJECT) resource = self._makeResource() resource['pageToken'] = 'TOKEN' - query = self._make_one(client, resource) + query = self._make_one(resource) self.assertEqual(query.page_token, 'TOKEN') def test_total_rows_present_integer(self): - client = _Client(self.PROJECT) resource = self._makeResource() resource['totalRows'] = 42 - query = self._make_one(client, resource) + query = self._make_one(resource) self.assertEqual(query.total_rows, 42) def test_total_rows_present_string(self): - client = _Client(self.PROJECT) resource = self._makeResource() resource['totalRows'] = '42' - query = self._make_one(client, resource) + query = self._make_one(resource) self.assertEqual(query.total_rows, 42) def test_total_bytes_processed_missing(self): - client = _Client(self.PROJECT) - query = self._make_one(client, self._makeResource()) + query = self._make_one(self._makeResource()) self.assertIsNone(query.total_bytes_processed) def test_total_bytes_processed_present_integer(self): - client = _Client(self.PROJECT) resource = self._makeResource() resource['totalBytesProcessed'] = 123456 - query = self._make_one(client, resource) + query = self._make_one(resource) self.assertEqual(query.total_bytes_processed, 123456) def test_total_bytes_processed_present_string(self): - client = _Client(self.PROJECT) resource = self._makeResource() resource['totalBytesProcessed'] = '123456' - query = self._make_one(client, resource) + query = self._make_one(resource) self.assertEqual(query.total_bytes_processed, 123456) def test_num_dml_affected_rows_missing(self): - client = _Client(self.PROJECT) - query = self._make_one(client, self._makeResource()) + query = self._make_one(self._makeResource()) self.assertIsNone(query.num_dml_affected_rows) def test_num_dml_affected_rows_present_integer(self): - client = _Client(self.PROJECT) resource = self._makeResource() resource['numDmlAffectedRows'] = 123456 - query = self._make_one(client, resource) + query = self._make_one(resource) self.assertEqual(query.num_dml_affected_rows, 123456) def test_num_dml_affected_rows_present_string(self): - client = _Client(self.PROJECT) resource = self._makeResource() resource['numDmlAffectedRows'] = '123456' - query = self._make_one(client, resource) + query = self._make_one(resource) self.assertEqual(query.num_dml_affected_rows, 123456) def test_schema(self): - client = _Client(self.PROJECT) - query = self._make_one(client, self._makeResource()) + query = self._make_one(self._makeResource()) self._verifySchema(query, self._makeResource()) resource = self._makeResource() resource['schema'] = { @@ -245,170 +180,3 @@ def test_schema(self): } query._set_properties(resource) self._verifySchema(query, resource) - - def test_fetch_data_w_bound_client(self): - import six - - PATH = 'projects/%s/queries/%s' % (self.PROJECT, self.JOB_ID) - schema = { - 'fields': [ - {'name': 'full_name', 'type': 'STRING', 'mode': 'REQURED'}, - {'name': 'age', 'type': 'INTEGER', 'mode': 'REQURED'}, - ], - } - BEFORE = self._makeResource() - BEFORE['jobComplete'] = False - BEFORE['schema'] = schema - AFTER = self._makeResource() - AFTER['rows'] = [ - {'f': [ - {'v': 'Phred Phlyntstone'}, - {'v': 32}, - ]}, - {'f': [ - {'v': 'Bharney Rhubble'}, - {'v': 33}, - ]}, - {'f': [ - {'v': 'Wylma Phlyntstone'}, - {'v': 29}, - ]}, - {'f': [ - {'v': 'Bhettye Rhubble'}, - {'v': 27}, - ]}, - ] - AFTER['cacheHit'] = False - AFTER['jobComplete'] = True - AFTER['numDmlAffectedRows'] = 123 - AFTER['pageToken'] = self.TOKEN - AFTER['schema'] = schema - AFTER['totalBytesProcessed'] = 100000 - - conn = _Connection(AFTER) - client = _Client(project=self.PROJECT, connection=conn) - query = self._make_one(client, BEFORE) - self.assertFalse(query.complete) - - iterator = query.fetch_data() - page = six.next(iterator.pages) - rows = list(page) - total_rows = iterator.total_rows - page_token = iterator.next_page_token - - self.assertTrue(query.complete) - self.assertEqual(len(rows), 4) - self.assertEqual(rows[0], ('Phred Phlyntstone', 32)) - self.assertEqual(rows[1], ('Bharney Rhubble', 33)) - self.assertEqual(rows[2], ('Wylma Phlyntstone', 29)) - self.assertEqual(rows[3], ('Bhettye Rhubble', 27)) - self.assertIsNone(total_rows) - self.assertEqual(page_token, AFTER['pageToken']) - - self.assertEqual(len(conn._requested), 1) - req = conn._requested[0] - self.assertEqual(req['method'], 'GET') - self.assertEqual(req['path'], '/%s' % PATH) - - def test_fetch_data_w_alternate_client(self): - import six - - PATH = 'projects/%s/queries/%s' % (self.PROJECT, self.JOB_ID) - MAX = 10 - START = 2257 - TIMEOUT = 20000 - - schema = { - 'fields': [ - {'name': 'full_name', 'type': 'STRING', 'mode': 'REQURED'}, - {'name': 'age', 'type': 'INTEGER', 'mode': 'REQURED'}, - ], - } - BEFORE = self._makeResource() - BEFORE['jobComplete'] = False - BEFORE['schema'] = schema - AFTER = self._makeResource() - AFTER['rows'] = [ - {'f': [ - {'v': 'Phred Phlyntstone'}, - {'v': 32}, - ]}, - {'f': [ - {'v': 'Bharney Rhubble'}, - {'v': 33}, - ]}, - {'f': [ - {'v': 'Wylma Phlyntstone'}, - {'v': 29}, - ]}, - {'f': [ - {'v': 'Bhettye Rhubble'}, - {'v': 27}, - ]}, - ] - AFTER['cacheHit'] = False - AFTER['jobComplete'] = True - AFTER['numDmlAffectedRows'] = 123 - AFTER['pageToken'] = self.TOKEN - AFTER['schema'] = schema - AFTER['totalBytesProcessed'] = 100000 - - conn1 = _Connection() - client1 = _Client(project=self.PROJECT, connection=conn1) - conn2 = _Connection(AFTER) - client2 = _Client(project=self.PROJECT, connection=conn2) - query = self._make_one(client1, BEFORE) - self.assertFalse(query.complete) - - iterator = query.fetch_data( - client=client2, max_results=MAX, page_token=self.TOKEN, - start_index=START, timeout_ms=TIMEOUT) - page = six.next(iterator.pages) - rows = list(page) - total_rows = iterator.total_rows - page_token = iterator.next_page_token - - self.assertTrue(query.complete) - self.assertEqual(len(rows), 4) - self.assertEqual(rows[0], ('Phred Phlyntstone', 32)) - self.assertEqual(rows[1], ('Bharney Rhubble', 33)) - self.assertEqual(rows[2], ('Wylma Phlyntstone', 29)) - self.assertEqual(rows[3], ('Bhettye Rhubble', 27)) - self.assertIsNone(total_rows) - self.assertEqual(page_token, AFTER['pageToken']) - - self.assertEqual(len(conn1._requested), 0) - self.assertEqual(len(conn2._requested), 1) - req = conn2._requested[0] - self.assertEqual(req['method'], 'GET') - self.assertEqual(req['path'], '/%s' % PATH) - self.assertEqual(req['query_params'], - {'maxResults': MAX, - 'pageToken': self.TOKEN, - 'startIndex': START, - 'timeoutMs': TIMEOUT}) - - -class _Client(Client): - - def __init__(self, project='project', connection=None): - creds = _make_credentials() - http = object() - super(_Client, self).__init__( - project=project, credentials=creds, _http=http) - - if connection is None: - connection = _Connection() - self._connection = connection - - -class _Connection(object): - - def __init__(self, *responses): - self._responses = responses - self._requested = [] - - def api_request(self, **kw): - self._requested.append(kw) - response, self._responses = self._responses[0], self._responses[1:] - return response From dce09d701f280d00562e539dc8df2042f4a0d7f0 Mon Sep 17 00:00:00 2001 From: Jonathan Amsterdam Date: Tue, 10 Oct 2017 16:41:10 -0400 Subject: [PATCH 0308/2016] bigquery: add Row class (#4149) Rows are represented by a new class, Row, which allows access by position or field name. --- .../google/cloud/bigquery/__init__.py | 2 + .../google/cloud/bigquery/_helpers.py | 79 +++++++++++++++++-- .../google/cloud/bigquery/client.py | 2 + .../google/cloud/bigquery/query.py | 4 +- .../google-cloud-bigquery/tests/system.py | 31 +++++--- .../tests/unit/test__helpers.py | 47 ++++++++--- .../tests/unit/test_client.py | 18 +++-- 7 files changed, 148 insertions(+), 35 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/__init__.py b/packages/google-cloud-bigquery/google/cloud/bigquery/__init__.py index d6c210e9843b..f075dfab4dcf 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/__init__.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/__init__.py @@ -30,6 +30,7 @@ from google.cloud.bigquery._helpers import ScalarQueryParameter from google.cloud.bigquery._helpers import StructQueryParameter from google.cloud.bigquery._helpers import UDFResource +from google.cloud.bigquery._helpers import Row from google.cloud.bigquery.client import Client from google.cloud.bigquery.dataset import AccessEntry from google.cloud.bigquery.dataset import Dataset @@ -52,6 +53,7 @@ 'CopyJobConfig', 'ExtractJobConfig', 'QueryJobConfig', + 'Row', 'LoadJobConfig', 'ScalarQueryParameter', 'SchemaField', diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py b/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py index dad87fde88bb..710e096ccc2b 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py @@ -18,6 +18,9 @@ from collections import OrderedDict import copy import datetime +import operator + +import six from google.cloud._helpers import UTC from google.cloud._helpers import _date_from_iso8601_date @@ -176,7 +179,70 @@ def _record_from_json(value, field): _QUERY_PARAMS_FROM_JSON['TIMESTAMP'] = _timestamp_query_param_from_json -def _row_from_json(row, schema): +class Row(object): + """A BigQuery row. + + Values can be accessed by position (index), by key like a dict, + or as properties. + + :type values: tuple + :param values: the row values + + :type field_to_index: dict + :param field_to_index: a mapping from schema field names to indexes + """ + + # Choose unusual field names to try to avoid conflict with schema fields. + __slots__ = ('_xxx_values', '_xxx_field_to_index') + + def __init__(self, values, field_to_index): + self._xxx_values = values + self._xxx_field_to_index = field_to_index + + def values(self): + return self._xxx_values + + def __getattr__(self, name): + i = self._xxx_field_to_index.get(name) + if i is None: + raise AttributeError('no row field "%s"' % name) + return self._xxx_values[i] + + def __len__(self): + return len(self._xxx_values) + + def __getitem__(self, key): + if isinstance(key, six.string_types): + i = self._xxx_field_to_index.get(key) + if i is None: + raise KeyError('no row field "%s"' % key) + key = i + return self._xxx_values[key] + + def __eq__(self, other): + if not isinstance(other, Row): + return NotImplemented + return( + self._xxx_values == other._xxx_values and + self._xxx_field_to_index == other._xxx_field_to_index) + + def __ne__(self, other): + return not self == other + + def __repr__(self): + # sort field dict by value, for determinism + items = sorted(self._xxx_field_to_index.items(), + key=operator.itemgetter(1)) + f2i = '{' + ', '.join('%r: %d' % i for i in items) + '}' + return 'Row({}, {})'.format(self._xxx_values, f2i) + + +def _field_to_index_mapping(schema): + """Create a mapping from schema field name to index of field.""" + return {f.name: i for i, f in enumerate(schema)} + + +def _row_tuple_from_json(row, schema): """Convert JSON row data to row with appropriate types. Note: ``row['f']`` and ``schema`` are presumed to be of the same length. @@ -203,9 +269,11 @@ def _row_from_json(row, schema): return tuple(row_data) -def _rows_from_json(rows, schema): +def _rows_from_json(values, schema): """Convert JSON row data to rows with appropriate types.""" - return [_row_from_json(row, schema) for row in rows] + field_to_index = _field_to_index_mapping(schema) + return [Row(_row_tuple_from_json(r, schema), field_to_index) + for r in values] def _int_to_json(value): @@ -935,10 +1003,11 @@ def _item_to_row(iterator, resource): :type resource: dict :param resource: An item to be converted to a row. - :rtype: tuple + :rtype: :class:`Row` :returns: The next row in the page. """ - return _row_from_json(resource, iterator.schema) + return Row(_row_tuple_from_json(resource, iterator.schema), + iterator._field_to_index) # pylint: disable=unused-argument diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py index 1825190f1043..a493e3dcd426 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py @@ -41,6 +41,7 @@ from google.cloud.bigquery.query import QueryResults from google.cloud.bigquery._helpers import _item_to_row from google.cloud.bigquery._helpers import _rows_page_start +from google.cloud.bigquery._helpers import _field_to_index_mapping _DEFAULT_CHUNKSIZE = 1048576 # 1024 * 1024 B = 1 MB @@ -942,6 +943,7 @@ def list_rows(self, table, selected_fields=None, max_results=None, page_start=_rows_page_start, extra_params=params) iterator.schema = schema + iterator._field_to_index = _field_to_index_mapping(schema) return iterator def list_partitions(self, table): diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/query.py b/packages/google-cloud-bigquery/google/cloud/bigquery/query.py index 5d6beca77ea6..5524ac1670b6 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/query.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/query.py @@ -16,8 +16,8 @@ import copy -from google.cloud.bigquery._helpers import _rows_from_json from google.cloud.bigquery.table import _parse_schema_resource +from google.cloud.bigquery._helpers import _rows_from_json class QueryResults(object): @@ -156,7 +156,7 @@ def rows(self): See https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs/query#rows - :rtype: list of tuples of row values, or ``NoneType`` + :rtype: list of :class:`~google.cloud.bigquery.Row` :returns: fields describing the schema (None until set by the server). """ return _rows_from_json(self._properties.get('rows', ()), self.schema) diff --git a/packages/google-cloud-bigquery/tests/system.py b/packages/google-cloud-bigquery/tests/system.py index 9ef835a9bb2a..8a78701a3808 100644 --- a/packages/google-cloud-bigquery/tests/system.py +++ b/packages/google-cloud-bigquery/tests/system.py @@ -353,9 +353,9 @@ def test_insert_data_then_dump_table(self): # 8 tries -> 1 + 2 + 4 + 8 + 16 + 32 + 64 = 127 seconds retry = RetryResult(_has_rows, max_tries=8) rows = retry(self._fetch_single_page)(table) - + row_tuples = [r.values() for r in rows] by_age = operator.itemgetter(1) - self.assertEqual(sorted(rows, key=by_age), + self.assertEqual(sorted(row_tuples, key=by_age), sorted(ROWS, key=by_age)) def test_load_table_from_local_file_then_dump_table(self): @@ -401,8 +401,9 @@ def test_load_table_from_local_file_then_dump_table(self): self.assertEqual(job.output_rows, len(ROWS)) rows = self._fetch_single_page(table) + row_tuples = [r.values() for r in rows] by_age = operator.itemgetter(1) - self.assertEqual(sorted(rows, key=by_age), + self.assertEqual(sorted(row_tuples, key=by_age), sorted(ROWS, key=by_age)) def test_load_table_from_local_avro_file_then_dump_table(self): @@ -434,8 +435,9 @@ def test_load_table_from_local_avro_file_then_dump_table(self): table = Config.CLIENT.get_table(table) rows = self._fetch_single_page(table) + row_tuples = [r.values() for r in rows] by_wavelength = operator.itemgetter(1) - self.assertEqual(sorted(rows, key=by_wavelength), + self.assertEqual(sorted(row_tuples, key=by_wavelength), sorted(ROWS, key=by_wavelength)) def test_load_table_from_storage_then_dump_table(self): @@ -499,8 +501,9 @@ def test_load_table_from_storage_then_dump_table(self): retry(job.reload)() rows = self._fetch_single_page(table) + row_tuples = [r.values() for r in rows] by_age = operator.itemgetter(1) - self.assertEqual(sorted(rows, key=by_age), + self.assertEqual(sorted(row_tuples, key=by_age), sorted(ROWS, key=by_age)) def test_load_table_from_storage_w_autodetect_schema(self): @@ -562,9 +565,10 @@ def test_load_table_from_storage_w_autodetect_schema(self): self.assertEqual(table.schema, [field_name, field_age]) actual_rows = self._fetch_single_page(table) + actual_row_tuples = [r.values() for r in actual_rows] by_age = operator.itemgetter(1) self.assertEqual( - sorted(actual_rows, key=by_age), sorted(rows, key=by_age)) + sorted(actual_row_tuples, key=by_age), sorted(rows, key=by_age)) def _load_table_for_extract_table( self, storage_client, rows, bucket_name, blob_name, table): @@ -884,7 +888,8 @@ def test_dbapi_fetchall(self): self.assertEqual(Config.CURSOR.rowcount, 3, "expected 3 rows") Config.CURSOR.arraysize = arraysize rows = Config.CURSOR.fetchall() - self.assertEqual(rows, [(1, 2), (3, 4), (5, 6)]) + row_tuples = [r.values() for r in rows] + self.assertEqual(row_tuples, [(1, 2), (3, 4), (5, 6)]) def _load_table_for_dml(self, rows, dataset_id, table_id): from google.cloud._testing import _NamedTemporaryFile @@ -1270,8 +1275,8 @@ def test_large_query_w_public_data(self): def test_query_future(self): query_job = Config.CLIENT.query('SELECT 1') iterator = query_job.result(timeout=JOB_TIMEOUT) - rows = list(iterator) - self.assertEqual(rows, [(1,)]) + row_tuples = [r.values() for r in iterator] + self.assertEqual(row_tuples, [(1,)]) def test_insert_nested_nested(self): # See #2951 @@ -1305,8 +1310,8 @@ def test_insert_nested_nested(self): retry = RetryResult(_has_rows, max_tries=8) rows = retry(self._fetch_single_page)(table) - - self.assertEqual(rows, to_insert) + row_tuples = [r.values() for r in rows] + self.assertEqual(row_tuples, to_insert) def test_create_table_insert_fetch_nested_schema(self): table_name = 'test_table' @@ -1334,9 +1339,11 @@ def test_create_table_insert_fetch_nested_schema(self): retry = RetryResult(_has_rows, max_tries=8) fetched = retry(self._fetch_single_page)(table) + fetched_tuples = [f.values() for f in fetched] + self.assertEqual(len(fetched), len(to_insert)) - for found, expected in zip(sorted(fetched), sorted(to_insert)): + for found, expected in zip(sorted(fetched_tuples), sorted(to_insert)): self.assertEqual(found[0], expected[0]) # Name self.assertEqual(found[1], int(expected[1])) # Age self.assertEqual(found[2], expected[2]) # Weight diff --git a/packages/google-cloud-bigquery/tests/unit/test__helpers.py b/packages/google-cloud-bigquery/tests/unit/test__helpers.py index 2254f6b01d89..2cf3f0f1f8e4 100644 --- a/packages/google-cloud-bigquery/tests/unit/test__helpers.py +++ b/packages/google-cloud-bigquery/tests/unit/test__helpers.py @@ -377,12 +377,12 @@ def test_w_record_subfield(self): self.assertEqual(coerced, expected) -class Test_row_from_json(unittest.TestCase): +class Test_row_tuple_from_json(unittest.TestCase): def _call_fut(self, row, schema): - from google.cloud.bigquery._helpers import _row_from_json + from google.cloud.bigquery._helpers import _row_tuple_from_json - return _row_from_json(row, schema) + return _row_tuple_from_json(row, schema) def test_w_single_scalar_column(self): # SELECT 1 AS col @@ -468,15 +468,36 @@ def test_w_array_of_struct_w_array(self): {u'first': [5, 6], u'second': 7}, ],)) + def test_row(self): + from google.cloud.bigquery._helpers import Row + + VALUES = (1, 2, 3) + r = Row(VALUES, {'a': 0, 'b': 1, 'c': 2}) + self.assertEqual(r.a, 1) + self.assertEqual(r[1], 2) + self.assertEqual(r['c'], 3) + self.assertEqual(len(r), 3) + self.assertEqual(r.values(), VALUES) + self.assertEqual(repr(r), + "Row((1, 2, 3), {'a': 0, 'b': 1, 'c': 2})") + self.assertFalse(r != r) + self.assertFalse(r == 3) + with self.assertRaises(AttributeError): + r.z + with self.assertRaises(KeyError): + r['z'] + class Test_rows_from_json(unittest.TestCase): - def _call_fut(self, value, field): + def _call_fut(self, rows, schema): from google.cloud.bigquery._helpers import _rows_from_json - return _rows_from_json(value, field) + return _rows_from_json(rows, schema) def test_w_record_subfield(self): + from google.cloud.bigquery._helpers import Row + full_name = _Field('REQUIRED', 'full_name', 'STRING') area_code = _Field('REQUIRED', 'area_code', 'STRING') local_number = _Field('REQUIRED', 'local_number', 'STRING') @@ -512,15 +533,18 @@ def test_w_record_subfield(self): 'local_number': '768-5309', 'rank': 2, } + f2i = {'full_name': 0, 'phone': 1, 'color': 2} expected = [ - ('Phred Phlyntstone', phred_phone, ['orange', 'black']), - ('Bharney Rhubble', bharney_phone, ['brown']), - ('Wylma Phlyntstone', None, []), + Row(('Phred Phlyntstone', phred_phone, ['orange', 'black']), f2i), + Row(('Bharney Rhubble', bharney_phone, ['brown']), f2i), + Row(('Wylma Phlyntstone', None, []), f2i), ] coerced = self._call_fut(rows, schema) self.assertEqual(coerced, expected) def test_w_int64_float64_bool(self): + from google.cloud.bigquery._helpers import Row + # "Standard" SQL dialect uses 'INT64', 'FLOAT64', 'BOOL'. candidate = _Field('REQUIRED', 'candidate', 'STRING') votes = _Field('REQUIRED', 'votes', 'INT64') @@ -547,10 +571,11 @@ def test_w_int64_float64_bool(self): {'v': 'false'}, ]}, ] + f2i = {'candidate': 0, 'votes': 1, 'percentage': 2, 'incumbent': 3} expected = [ - ('Phred Phlyntstone', 8, 0.25, True), - ('Bharney Rhubble', 4, 0.125, False), - ('Wylma Phlyntstone', 20, 0.625, False), + Row(('Phred Phlyntstone', 8, 0.25, True), f2i), + Row(('Bharney Rhubble', 4, 0.125, False), f2i), + Row(('Wylma Phlyntstone', 20, 0.625, False), f2i), ] coerced = self._call_fut(rows, schema) self.assertEqual(coerced, expected) diff --git a/packages/google-cloud-bigquery/tests/unit/test_client.py b/packages/google-cloud-bigquery/tests/unit/test_client.py index 54b4ac460d84..d34c192c3fac 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_client.py +++ b/packages/google-cloud-bigquery/tests/unit/test_client.py @@ -1919,6 +1919,7 @@ def test_query_w_query_parameters(self): def test_query_rows_defaults(self): from google.api.core.page_iterator import HTTPIterator + from google.cloud.bigquery._helpers import Row JOB = 'job-id' PROJECT = 'PROJECT' @@ -1972,7 +1973,7 @@ def test_query_rows_defaults(self): rows_iter = client.query_rows(QUERY) rows = list(rows_iter) - self.assertEqual(rows, [(1,), (2,), (3,)]) + self.assertEqual(rows, [Row((i,), {'field0': 0}) for i in (1, 2, 3)]) self.assertIs(rows_iter.client, client) self.assertIsInstance(rows_iter, HTTPIterator) self.assertEqual(len(conn._requested), 4) @@ -2099,6 +2100,7 @@ def test_list_rows(self): from google.cloud._helpers import UTC from google.cloud.bigquery.dataset import DatasetReference from google.cloud.bigquery.table import Table, SchemaField + from google.cloud.bigquery._helpers import Row PROJECT = 'PROJECT' DS_ID = 'DS_ID' @@ -2159,11 +2161,12 @@ def _bigquery_timestamp_float_repr(ts_float): total_rows = iterator.total_rows page_token = iterator.next_page_token + f2i = {'full_name': 0, 'age': 1, 'joined': 2} self.assertEqual(len(rows), 4) - self.assertEqual(rows[0], ('Phred Phlyntstone', 32, WHEN)) - self.assertEqual(rows[1], ('Bharney Rhubble', 33, WHEN_1)) - self.assertEqual(rows[2], ('Wylma Phlyntstone', 29, WHEN_2)) - self.assertEqual(rows[3], ('Bhettye Rhubble', None, None)) + self.assertEqual(rows[0], Row(('Phred Phlyntstone', 32, WHEN), f2i)) + self.assertEqual(rows[1], Row(('Bharney Rhubble', 33, WHEN_1), f2i)) + self.assertEqual(rows[2], Row(('Wylma Phlyntstone', 29, WHEN_2), f2i)) + self.assertEqual(rows[3], Row(('Bhettye Rhubble', None, None), f2i)) self.assertEqual(total_rows, ROWS) self.assertEqual(page_token, TOKEN) @@ -2359,6 +2362,11 @@ def test_list_partitions(self): 'configuration': { 'query': { 'query': 'q', + 'destinationTable': { + 'projectId': PROJECT, + 'datasetId': 'DS_ID', + 'tableId': 'TABLE_ID', + }, }, }, 'status': { From c309a5263f289b82c0b9857109ebdfd6c85a299c Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Wed, 11 Oct 2017 12:19:17 -0700 Subject: [PATCH 0309/2016] BQ: move *QueryParameter and UDFResource classes to query. (#4156) These classes are expected to be constructed by users, so moving them to a public module. --- .../google/cloud/bigquery/__init__.py | 8 +- .../google/cloud/bigquery/_helpers.py | 442 ------- .../google/cloud/bigquery/dbapi/_helpers.py | 6 +- .../google/cloud/bigquery/job.py | 26 +- .../google/cloud/bigquery/query.py | 443 +++++++ .../google-cloud-bigquery/tests/system.py | 6 +- .../tests/unit/test__helpers.py | 1082 +---------------- .../tests/unit/test_client.py | 4 +- .../tests/unit/test_job.py | 16 +- .../tests/unit/test_query.py | 1071 ++++++++++++++++ 10 files changed, 1555 insertions(+), 1549 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/__init__.py b/packages/google-cloud-bigquery/google/cloud/bigquery/__init__.py index f075dfab4dcf..2682ca8ddb6d 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/__init__.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/__init__.py @@ -26,10 +26,6 @@ from pkg_resources import get_distribution __version__ = get_distribution('google-cloud-bigquery').version -from google.cloud.bigquery._helpers import ArrayQueryParameter -from google.cloud.bigquery._helpers import ScalarQueryParameter -from google.cloud.bigquery._helpers import StructQueryParameter -from google.cloud.bigquery._helpers import UDFResource from google.cloud.bigquery._helpers import Row from google.cloud.bigquery.client import Client from google.cloud.bigquery.dataset import AccessEntry @@ -39,6 +35,10 @@ from google.cloud.bigquery.job import ExtractJobConfig from google.cloud.bigquery.job import QueryJobConfig from google.cloud.bigquery.job import LoadJobConfig +from google.cloud.bigquery.query import ArrayQueryParameter +from google.cloud.bigquery.query import ScalarQueryParameter +from google.cloud.bigquery.query import StructQueryParameter +from google.cloud.bigquery.query import UDFResource from google.cloud.bigquery.schema import SchemaField from google.cloud.bigquery.table import Table from google.cloud.bigquery.table import TableReference diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py b/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py index 710e096ccc2b..ae7f1186fe40 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py @@ -15,8 +15,6 @@ """Shared helper functions for BigQuery API classes.""" import base64 -from collections import OrderedDict -import copy import datetime import operator @@ -548,446 +546,6 @@ class _EnumProperty(_ConfigurationProperty): """ -class UDFResource(object): - """Describe a single user-defined function (UDF) resource. - - :type udf_type: str - :param udf_type: the type of the resource ('inlineCode' or 'resourceUri') - - :type value: str - :param value: the inline code or resource URI. - - See - https://cloud.google.com/bigquery/user-defined-functions#api - """ - def __init__(self, udf_type, value): - self.udf_type = udf_type - self.value = value - - def __eq__(self, other): - if not isinstance(other, UDFResource): - return NotImplemented - return( - self.udf_type == other.udf_type and - self.value == other.value) - - def __ne__(self, other): - return not self == other - - -class AbstractQueryParameter(object): - """Base class for named / positional query parameters. - """ - @classmethod - def from_api_repr(cls, resource): - """Factory: construct parameter from JSON resource. - - :type resource: dict - :param resource: JSON mapping of parameter - - :rtype: :class:`ScalarQueryParameter` - """ - raise NotImplementedError - - def to_api_repr(self): - """Construct JSON API representation for the parameter. - - :rtype: dict - """ - raise NotImplementedError - - -class ScalarQueryParameter(AbstractQueryParameter): - """Named / positional query parameters for scalar values. - - :type name: str or None - :param name: Parameter name, used via ``@foo`` syntax. If None, the - parameter can only be addressed via position (``?``). - - :type type_: str - :param type_: name of parameter type. One of 'STRING', 'INT64', - 'FLOAT64', 'BOOL', 'TIMESTAMP', 'DATETIME', or 'DATE'. - - :type value: str, int, float, bool, :class:`datetime.datetime`, or - :class:`datetime.date`. - :param value: the scalar parameter value. - """ - def __init__(self, name, type_, value): - self.name = name - self.type_ = type_ - self.value = value - - @classmethod - def positional(cls, type_, value): - """Factory for positional paramater. - - :type type_: str - :param type_: - name of parameter type. One of 'STRING', 'INT64', - 'FLOAT64', 'BOOL', 'TIMESTAMP', 'DATETIME', or 'DATE'. - - :type value: str, int, float, bool, :class:`datetime.datetime`, or - :class:`datetime.date`. - :param value: the scalar parameter value. - - :rtype: :class:`ScalarQueryParameter` - :returns: instance without name - """ - return cls(None, type_, value) - - @classmethod - def from_api_repr(cls, resource): - """Factory: construct parameter from JSON resource. - - :type resource: dict - :param resource: JSON mapping of parameter - - :rtype: :class:`ScalarQueryParameter` - :returns: instance - """ - name = resource.get('name') - type_ = resource['parameterType']['type'] - value = resource['parameterValue']['value'] - converted = _QUERY_PARAMS_FROM_JSON[type_](value, None) - return cls(name, type_, converted) - - def to_api_repr(self): - """Construct JSON API representation for the parameter. - - :rtype: dict - :returns: JSON mapping - """ - value = self.value - converter = _SCALAR_VALUE_TO_JSON_PARAM.get(self.type_) - if converter is not None: - value = converter(value) - resource = { - 'parameterType': { - 'type': self.type_, - }, - 'parameterValue': { - 'value': value, - }, - } - if self.name is not None: - resource['name'] = self.name - return resource - - def _key(self): - """A tuple key that uniquely describes this field. - - Used to compute this instance's hashcode and evaluate equality. - - Returns: - tuple: The contents of this :class:`ScalarQueryParameter`. - """ - return ( - self.name, - self.type_.upper(), - self.value, - ) - - def __eq__(self, other): - if not isinstance(other, ScalarQueryParameter): - return NotImplemented - return self._key() == other._key() - - def __ne__(self, other): - return not self == other - - def __repr__(self): - return 'ScalarQueryParameter{}'.format(self._key()) - - -class ArrayQueryParameter(AbstractQueryParameter): - """Named / positional query parameters for array values. - - :type name: str or None - :param name: Parameter name, used via ``@foo`` syntax. If None, the - parameter can only be addressed via position (``?``). - - :type array_type: str - :param array_type: - name of type of array elements. One of `'STRING'`, `'INT64'`, - `'FLOAT64'`, `'BOOL'`, `'TIMESTAMP'`, or `'DATE'`. - - :type values: list of appropriate scalar type. - :param values: the parameter array values. - """ - def __init__(self, name, array_type, values): - self.name = name - self.array_type = array_type - self.values = values - - @classmethod - def positional(cls, array_type, values): - """Factory for positional parameters. - - :type array_type: str - :param array_type: - name of type of array elements. One of `'STRING'`, `'INT64'`, - `'FLOAT64'`, `'BOOL'`, `'TIMESTAMP'`, or `'DATE'`. - - :type values: list of appropriate scalar type - :param values: the parameter array values. - - :rtype: :class:`ArrayQueryParameter` - :returns: instance without name - """ - return cls(None, array_type, values) - - @classmethod - def _from_api_repr_struct(cls, resource): - name = resource.get('name') - converted = [] - # We need to flatten the array to use the StructQueryParameter - # parse code. - resource_template = { - # The arrayType includes all the types of the fields of the STRUCT - 'parameterType': resource['parameterType']['arrayType'] - } - for array_value in resource['parameterValue']['arrayValues']: - struct_resource = copy.deepcopy(resource_template) - struct_resource['parameterValue'] = array_value - struct_value = StructQueryParameter.from_api_repr(struct_resource) - converted.append(struct_value) - return cls(name, 'STRUCT', converted) - - @classmethod - def _from_api_repr_scalar(cls, resource): - name = resource.get('name') - array_type = resource['parameterType']['arrayType']['type'] - values = [ - value['value'] - for value - in resource['parameterValue']['arrayValues']] - converted = [ - _QUERY_PARAMS_FROM_JSON[array_type](value, None) - for value in values - ] - return cls(name, array_type, converted) - - @classmethod - def from_api_repr(cls, resource): - """Factory: construct parameter from JSON resource. - - :type resource: dict - :param resource: JSON mapping of parameter - - :rtype: :class:`ArrayQueryParameter` - :returns: instance - """ - array_type = resource['parameterType']['arrayType']['type'] - if array_type == 'STRUCT': - return cls._from_api_repr_struct(resource) - return cls._from_api_repr_scalar(resource) - - def to_api_repr(self): - """Construct JSON API representation for the parameter. - - :rtype: dict - :returns: JSON mapping - """ - values = self.values - if self.array_type == 'RECORD' or self.array_type == 'STRUCT': - reprs = [value.to_api_repr() for value in values] - a_type = reprs[0]['parameterType'] - a_values = [repr_['parameterValue'] for repr_ in reprs] - else: - a_type = {'type': self.array_type} - converter = _SCALAR_VALUE_TO_JSON_PARAM.get(self.array_type) - if converter is not None: - values = [converter(value) for value in values] - a_values = [{'value': value} for value in values] - resource = { - 'parameterType': { - 'type': 'ARRAY', - 'arrayType': a_type, - }, - 'parameterValue': { - 'arrayValues': a_values, - }, - } - if self.name is not None: - resource['name'] = self.name - return resource - - def _key(self): - """A tuple key that uniquely describes this field. - - Used to compute this instance's hashcode and evaluate equality. - - Returns: - tuple: The contents of this :class:`ArrayQueryParameter`. - """ - return ( - self.name, - self.array_type.upper(), - self.values, - ) - - def __eq__(self, other): - if not isinstance(other, ArrayQueryParameter): - return NotImplemented - return self._key() == other._key() - - def __ne__(self, other): - return not self == other - - def __repr__(self): - return 'ArrayQueryParameter{}'.format(self._key()) - - -class StructQueryParameter(AbstractQueryParameter): - """Named / positional query parameters for struct values. - - :type name: str or None - :param name: Parameter name, used via ``@foo`` syntax. If None, the - parameter can only be addressed via position (``?``). - - :type sub_params: tuple of :class:`ScalarQueryParameter` - :param sub_params: the sub-parameters for the struct - """ - def __init__(self, name, *sub_params): - self.name = name - types = self.struct_types = OrderedDict() - values = self.struct_values = {} - for sub in sub_params: - if isinstance(sub, self.__class__): - types[sub.name] = 'STRUCT' - values[sub.name] = sub - elif isinstance(sub, ArrayQueryParameter): - types[sub.name] = 'ARRAY' - values[sub.name] = sub - else: - types[sub.name] = sub.type_ - values[sub.name] = sub.value - - @classmethod - def positional(cls, *sub_params): - """Factory for positional parameters. - - :type sub_params: tuple of :class:`ScalarQueryParameter` - :param sub_params: the sub-parameters for the struct - - :rtype: :class:`StructQueryParameter` - :returns: instance without name - """ - return cls(None, *sub_params) - - @classmethod - def from_api_repr(cls, resource): - """Factory: construct parameter from JSON resource. - - :type resource: dict - :param resource: JSON mapping of parameter - - :rtype: :class:`StructQueryParameter` - :returns: instance - """ - name = resource.get('name') - instance = cls(name) - type_resources = {} - types = instance.struct_types - for item in resource['parameterType']['structTypes']: - types[item['name']] = item['type']['type'] - type_resources[item['name']] = item['type'] - struct_values = resource['parameterValue']['structValues'] - for key, value in struct_values.items(): - type_ = types[key] - converted = None - if type_ == 'STRUCT': - struct_resource = { - 'name': key, - 'parameterType': type_resources[key], - 'parameterValue': value, - } - converted = StructQueryParameter.from_api_repr(struct_resource) - elif type_ == 'ARRAY': - struct_resource = { - 'name': key, - 'parameterType': type_resources[key], - 'parameterValue': value, - } - converted = ArrayQueryParameter.from_api_repr(struct_resource) - else: - value = value['value'] - converted = _QUERY_PARAMS_FROM_JSON[type_](value, None) - instance.struct_values[key] = converted - return instance - - def to_api_repr(self): - """Construct JSON API representation for the parameter. - - :rtype: dict - :returns: JSON mapping - """ - s_types = {} - values = {} - for name, value in self.struct_values.items(): - type_ = self.struct_types[name] - if type_ in ('STRUCT', 'ARRAY'): - repr_ = value.to_api_repr() - s_types[name] = {'name': name, 'type': repr_['parameterType']} - values[name] = repr_['parameterValue'] - else: - s_types[name] = {'name': name, 'type': {'type': type_}} - converter = _SCALAR_VALUE_TO_JSON_PARAM.get(type_) - if converter is not None: - value = converter(value) - values[name] = {'value': value} - - resource = { - 'parameterType': { - 'type': 'STRUCT', - 'structTypes': [s_types[key] for key in self.struct_types], - }, - 'parameterValue': { - 'structValues': values, - }, - } - if self.name is not None: - resource['name'] = self.name - return resource - - def _key(self): - """A tuple key that uniquely describes this field. - - Used to compute this instance's hashcode and evaluate equality. - - Returns: - tuple: The contents of this :class:`ArrayQueryParameter`. - """ - return ( - self.name, - self.struct_types, - self.struct_values, - ) - - def __eq__(self, other): - if not isinstance(other, StructQueryParameter): - return NotImplemented - return self._key() == other._key() - - def __ne__(self, other): - return not self == other - - def __repr__(self): - return 'StructQueryParameter{}'.format(self._key()) - - -def _query_param_from_api_repr(resource): - """Helper: construct concrete query parameter from JSON resource.""" - qp_type = resource['parameterType'] - if 'arrayType' in qp_type: - klass = ArrayQueryParameter - elif 'structTypes' in qp_type: - klass = StructQueryParameter - else: - klass = ScalarQueryParameter - return klass.from_api_repr(resource) - - def _item_to_row(iterator, resource): """Convert a JSON row to the native object. diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/dbapi/_helpers.py b/packages/google-cloud-bigquery/google/cloud/bigquery/dbapi/_helpers.py index a9a358cbf0f5..a2cee9c5272b 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/dbapi/_helpers.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/dbapi/_helpers.py @@ -69,7 +69,7 @@ def to_query_parameters_list(parameters): :type parameters: Sequence[Any] :param parameters: Sequence of query parameter values. - :rtype: List[google.cloud.bigquery._helpers.AbstractQueryParameter] + :rtype: List[google.cloud.bigquery.query._AbstractQueryParameter] :returns: A list of query parameters. """ return [scalar_to_query_parameter(value) for value in parameters] @@ -81,7 +81,7 @@ def to_query_parameters_dict(parameters): :type parameters: Mapping[str, Any] :param parameters: Dictionary of query parameter values. - :rtype: List[google.cloud.bigquery._helpers.AbstractQueryParameter] + :rtype: List[google.cloud.bigquery.query._AbstractQueryParameter] :returns: A list of named query parameters. """ return [ @@ -96,7 +96,7 @@ def to_query_parameters(parameters): :type parameters: Mapping[str, Any] or Sequence[Any] :param parameters: A dictionary or sequence of query parameter values. - :rtype: List[google.cloud.bigquery._helpers.AbstractQueryParameter] + :rtype: List[google.cloud.bigquery.query._AbstractQueryParameter] :returns: A list of query parameters. """ if parameters is None: diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/job.py b/packages/google-cloud-bigquery/google/cloud/bigquery/job.py index 137072f04ddb..593b14e41fa1 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/job.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/job.py @@ -25,18 +25,18 @@ from google.cloud.exceptions import NotFound from google.cloud._helpers import _datetime_from_microseconds from google.cloud.bigquery.dataset import DatasetReference +from google.cloud.bigquery.query import _AbstractQueryParameter +from google.cloud.bigquery.query import _query_param_from_api_repr +from google.cloud.bigquery.query import ArrayQueryParameter +from google.cloud.bigquery.query import ScalarQueryParameter +from google.cloud.bigquery.query import StructQueryParameter +from google.cloud.bigquery.query import UDFResource from google.cloud.bigquery.schema import SchemaField from google.cloud.bigquery.table import TableReference from google.cloud.bigquery.table import _build_schema_resource from google.cloud.bigquery.table import _parse_schema_resource -from google.cloud.bigquery._helpers import AbstractQueryParameter -from google.cloud.bigquery._helpers import ArrayQueryParameter -from google.cloud.bigquery._helpers import ScalarQueryParameter -from google.cloud.bigquery._helpers import StructQueryParameter -from google.cloud.bigquery._helpers import UDFResource from google.cloud.bigquery._helpers import _EnumApiResourceProperty from google.cloud.bigquery._helpers import _ListApiResourceProperty -from google.cloud.bigquery._helpers import _query_param_from_api_repr from google.cloud.bigquery._helpers import _TypedApiResourceProperty _DONE_STATE = 'DONE' @@ -1412,10 +1412,12 @@ def from_api_repr(cls, resource): """ query_parameters = _ListApiResourceProperty( - 'query_parameters', _QUERY_PARAMETERS_KEY, AbstractQueryParameter) + 'query_parameters', _QUERY_PARAMETERS_KEY, _AbstractQueryParameter) """ - An list of - :class:`google.cloud.bigquery._helpers.AbstractQueryParameter` + A list of + :class:`google.cloud.bigquery.query.ArrayQueryParameter`, + :class:`google.cloud.bigquery.query.ScalarQueryParameter`, or + :class:`google.cloud.bigquery.query.StructQueryParameter` (empty by default) See: @@ -1425,7 +1427,7 @@ def from_api_repr(cls, resource): udf_resources = _ListApiResourceProperty( 'udf_resources', _UDF_RESOURCES_KEY, UDFResource) """ - A list of :class:`google.cloud.bigquery._helpers.UDFResource` (empty + A list of :class:`google.cloud.bigquery.query.UDFResource` (empty by default) See: @@ -1805,7 +1807,9 @@ def undeclared_query_paramters(self): :rtype: list of - :class:`~google.cloud.bigquery._helpers.AbstractQueryParameter` + :class:`~google.cloud.bigquery.query.ArrayQueryParameter`, + :class:`~google.cloud.bigquery.query.ScalarQueryParameter`, or + :class:`~google.cloud.bigquery.query.StructQueryParameter` :returns: undeclared parameters, or an empty list if the query has not yet completed. """ diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/query.py b/packages/google-cloud-bigquery/google/cloud/bigquery/query.py index 5524ac1670b6..9577fa57cc5d 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/query.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/query.py @@ -14,10 +14,441 @@ """BigQuery query processing.""" +from collections import OrderedDict import copy from google.cloud.bigquery.table import _parse_schema_resource from google.cloud.bigquery._helpers import _rows_from_json +from google.cloud.bigquery._helpers import _QUERY_PARAMS_FROM_JSON +from google.cloud.bigquery._helpers import _SCALAR_VALUE_TO_JSON_PARAM + + +class UDFResource(object): + """Describe a single user-defined function (UDF) resource. + + :type udf_type: str + :param udf_type: the type of the resource ('inlineCode' or 'resourceUri') + + :type value: str + :param value: the inline code or resource URI. + + See + https://cloud.google.com/bigquery/user-defined-functions#api + """ + def __init__(self, udf_type, value): + self.udf_type = udf_type + self.value = value + + def __eq__(self, other): + if not isinstance(other, UDFResource): + return NotImplemented + return( + self.udf_type == other.udf_type and + self.value == other.value) + + def __ne__(self, other): + return not self == other + + +class _AbstractQueryParameter(object): + """Base class for named / positional query parameters. + """ + @classmethod + def from_api_repr(cls, resource): + """Factory: construct parameter from JSON resource. + + :type resource: dict + :param resource: JSON mapping of parameter + + :rtype: :class:`ScalarQueryParameter` + """ + raise NotImplementedError + + def to_api_repr(self): + """Construct JSON API representation for the parameter. + + :rtype: dict + """ + raise NotImplementedError + + +class ScalarQueryParameter(_AbstractQueryParameter): + """Named / positional query parameters for scalar values. + + :type name: str or None + :param name: Parameter name, used via ``@foo`` syntax. If None, the + parameter can only be addressed via position (``?``). + + :type type_: str + :param type_: name of parameter type. One of 'STRING', 'INT64', + 'FLOAT64', 'BOOL', 'TIMESTAMP', 'DATETIME', or 'DATE'. + + :type value: str, int, float, bool, :class:`datetime.datetime`, or + :class:`datetime.date`. + :param value: the scalar parameter value. + """ + def __init__(self, name, type_, value): + self.name = name + self.type_ = type_ + self.value = value + + @classmethod + def positional(cls, type_, value): + """Factory for positional paramater. + + :type type_: str + :param type_: + name of parameter type. One of 'STRING', 'INT64', + 'FLOAT64', 'BOOL', 'TIMESTAMP', 'DATETIME', or 'DATE'. + + :type value: str, int, float, bool, :class:`datetime.datetime`, or + :class:`datetime.date`. + :param value: the scalar parameter value. + + :rtype: :class:`ScalarQueryParameter` + :returns: instance without name + """ + return cls(None, type_, value) + + @classmethod + def from_api_repr(cls, resource): + """Factory: construct parameter from JSON resource. + + :type resource: dict + :param resource: JSON mapping of parameter + + :rtype: :class:`ScalarQueryParameter` + :returns: instance + """ + name = resource.get('name') + type_ = resource['parameterType']['type'] + value = resource['parameterValue']['value'] + converted = _QUERY_PARAMS_FROM_JSON[type_](value, None) + return cls(name, type_, converted) + + def to_api_repr(self): + """Construct JSON API representation for the parameter. + + :rtype: dict + :returns: JSON mapping + """ + value = self.value + converter = _SCALAR_VALUE_TO_JSON_PARAM.get(self.type_) + if converter is not None: + value = converter(value) + resource = { + 'parameterType': { + 'type': self.type_, + }, + 'parameterValue': { + 'value': value, + }, + } + if self.name is not None: + resource['name'] = self.name + return resource + + def _key(self): + """A tuple key that uniquely describes this field. + + Used to compute this instance's hashcode and evaluate equality. + + Returns: + tuple: The contents of this :class:`ScalarQueryParameter`. + """ + return ( + self.name, + self.type_.upper(), + self.value, + ) + + def __eq__(self, other): + if not isinstance(other, ScalarQueryParameter): + return NotImplemented + return self._key() == other._key() + + def __ne__(self, other): + return not self == other + + def __repr__(self): + return 'ScalarQueryParameter{}'.format(self._key()) + + +class ArrayQueryParameter(_AbstractQueryParameter): + """Named / positional query parameters for array values. + + :type name: str or None + :param name: Parameter name, used via ``@foo`` syntax. If None, the + parameter can only be addressed via position (``?``). + + :type array_type: str + :param array_type: + name of type of array elements. One of `'STRING'`, `'INT64'`, + `'FLOAT64'`, `'BOOL'`, `'TIMESTAMP'`, or `'DATE'`. + + :type values: list of appropriate scalar type. + :param values: the parameter array values. + """ + def __init__(self, name, array_type, values): + self.name = name + self.array_type = array_type + self.values = values + + @classmethod + def positional(cls, array_type, values): + """Factory for positional parameters. + + :type array_type: str + :param array_type: + name of type of array elements. One of `'STRING'`, `'INT64'`, + `'FLOAT64'`, `'BOOL'`, `'TIMESTAMP'`, or `'DATE'`. + + :type values: list of appropriate scalar type + :param values: the parameter array values. + + :rtype: :class:`ArrayQueryParameter` + :returns: instance without name + """ + return cls(None, array_type, values) + + @classmethod + def _from_api_repr_struct(cls, resource): + name = resource.get('name') + converted = [] + # We need to flatten the array to use the StructQueryParameter + # parse code. + resource_template = { + # The arrayType includes all the types of the fields of the STRUCT + 'parameterType': resource['parameterType']['arrayType'] + } + for array_value in resource['parameterValue']['arrayValues']: + struct_resource = copy.deepcopy(resource_template) + struct_resource['parameterValue'] = array_value + struct_value = StructQueryParameter.from_api_repr(struct_resource) + converted.append(struct_value) + return cls(name, 'STRUCT', converted) + + @classmethod + def _from_api_repr_scalar(cls, resource): + name = resource.get('name') + array_type = resource['parameterType']['arrayType']['type'] + values = [ + value['value'] + for value + in resource['parameterValue']['arrayValues']] + converted = [ + _QUERY_PARAMS_FROM_JSON[array_type](value, None) + for value in values + ] + return cls(name, array_type, converted) + + @classmethod + def from_api_repr(cls, resource): + """Factory: construct parameter from JSON resource. + + :type resource: dict + :param resource: JSON mapping of parameter + + :rtype: :class:`ArrayQueryParameter` + :returns: instance + """ + array_type = resource['parameterType']['arrayType']['type'] + if array_type == 'STRUCT': + return cls._from_api_repr_struct(resource) + return cls._from_api_repr_scalar(resource) + + def to_api_repr(self): + """Construct JSON API representation for the parameter. + + :rtype: dict + :returns: JSON mapping + """ + values = self.values + if self.array_type == 'RECORD' or self.array_type == 'STRUCT': + reprs = [value.to_api_repr() for value in values] + a_type = reprs[0]['parameterType'] + a_values = [repr_['parameterValue'] for repr_ in reprs] + else: + a_type = {'type': self.array_type} + converter = _SCALAR_VALUE_TO_JSON_PARAM.get(self.array_type) + if converter is not None: + values = [converter(value) for value in values] + a_values = [{'value': value} for value in values] + resource = { + 'parameterType': { + 'type': 'ARRAY', + 'arrayType': a_type, + }, + 'parameterValue': { + 'arrayValues': a_values, + }, + } + if self.name is not None: + resource['name'] = self.name + return resource + + def _key(self): + """A tuple key that uniquely describes this field. + + Used to compute this instance's hashcode and evaluate equality. + + Returns: + tuple: The contents of this :class:`ArrayQueryParameter`. + """ + return ( + self.name, + self.array_type.upper(), + self.values, + ) + + def __eq__(self, other): + if not isinstance(other, ArrayQueryParameter): + return NotImplemented + return self._key() == other._key() + + def __ne__(self, other): + return not self == other + + def __repr__(self): + return 'ArrayQueryParameter{}'.format(self._key()) + + +class StructQueryParameter(_AbstractQueryParameter): + """Named / positional query parameters for struct values. + + :type name: str or None + :param name: Parameter name, used via ``@foo`` syntax. If None, the + parameter can only be addressed via position (``?``). + + :type sub_params: tuple of :class:`ScalarQueryParameter` + :param sub_params: the sub-parameters for the struct + """ + def __init__(self, name, *sub_params): + self.name = name + types = self.struct_types = OrderedDict() + values = self.struct_values = {} + for sub in sub_params: + if isinstance(sub, self.__class__): + types[sub.name] = 'STRUCT' + values[sub.name] = sub + elif isinstance(sub, ArrayQueryParameter): + types[sub.name] = 'ARRAY' + values[sub.name] = sub + else: + types[sub.name] = sub.type_ + values[sub.name] = sub.value + + @classmethod + def positional(cls, *sub_params): + """Factory for positional parameters. + + :type sub_params: tuple of :class:`ScalarQueryParameter` + :param sub_params: the sub-parameters for the struct + + :rtype: :class:`StructQueryParameter` + :returns: instance without name + """ + return cls(None, *sub_params) + + @classmethod + def from_api_repr(cls, resource): + """Factory: construct parameter from JSON resource. + + :type resource: dict + :param resource: JSON mapping of parameter + + :rtype: :class:`StructQueryParameter` + :returns: instance + """ + name = resource.get('name') + instance = cls(name) + type_resources = {} + types = instance.struct_types + for item in resource['parameterType']['structTypes']: + types[item['name']] = item['type']['type'] + type_resources[item['name']] = item['type'] + struct_values = resource['parameterValue']['structValues'] + for key, value in struct_values.items(): + type_ = types[key] + converted = None + if type_ == 'STRUCT': + struct_resource = { + 'name': key, + 'parameterType': type_resources[key], + 'parameterValue': value, + } + converted = StructQueryParameter.from_api_repr(struct_resource) + elif type_ == 'ARRAY': + struct_resource = { + 'name': key, + 'parameterType': type_resources[key], + 'parameterValue': value, + } + converted = ArrayQueryParameter.from_api_repr(struct_resource) + else: + value = value['value'] + converted = _QUERY_PARAMS_FROM_JSON[type_](value, None) + instance.struct_values[key] = converted + return instance + + def to_api_repr(self): + """Construct JSON API representation for the parameter. + + :rtype: dict + :returns: JSON mapping + """ + s_types = {} + values = {} + for name, value in self.struct_values.items(): + type_ = self.struct_types[name] + if type_ in ('STRUCT', 'ARRAY'): + repr_ = value.to_api_repr() + s_types[name] = {'name': name, 'type': repr_['parameterType']} + values[name] = repr_['parameterValue'] + else: + s_types[name] = {'name': name, 'type': {'type': type_}} + converter = _SCALAR_VALUE_TO_JSON_PARAM.get(type_) + if converter is not None: + value = converter(value) + values[name] = {'value': value} + + resource = { + 'parameterType': { + 'type': 'STRUCT', + 'structTypes': [s_types[key] for key in self.struct_types], + }, + 'parameterValue': { + 'structValues': values, + }, + } + if self.name is not None: + resource['name'] = self.name + return resource + + def _key(self): + """A tuple key that uniquely describes this field. + + Used to compute this instance's hashcode and evaluate equality. + + Returns: + tuple: The contents of this :class:`ArrayQueryParameter`. + """ + return ( + self.name, + self.struct_types, + self.struct_values, + ) + + def __eq__(self, other): + if not isinstance(other, StructQueryParameter): + return NotImplemented + return self._key() == other._key() + + def __ne__(self, other): + return not self == other + + def __repr__(self): + return 'StructQueryParameter{}'.format(self._key()) class QueryResults(object): @@ -188,3 +619,15 @@ def _set_properties(self, api_response): self._properties.clear() self._properties.update(copy.deepcopy(api_response)) + + +def _query_param_from_api_repr(resource): + """Helper: construct concrete query parameter from JSON resource.""" + qp_type = resource['parameterType'] + if 'arrayType' in qp_type: + klass = ArrayQueryParameter + elif 'structTypes' in qp_type: + klass = StructQueryParameter + else: + klass = ScalarQueryParameter + return klass.from_api_repr(resource) diff --git a/packages/google-cloud-bigquery/tests/system.py b/packages/google-cloud-bigquery/tests/system.py index 8a78701a3808..7fa3ff758897 100644 --- a/packages/google-cloud-bigquery/tests/system.py +++ b/packages/google-cloud-bigquery/tests/system.py @@ -953,10 +953,10 @@ def test_dbapi_w_dml(self): self.assertIsNone(Config.CURSOR.fetchone()) def test_query_w_query_params(self): - from google.cloud.bigquery._helpers import ArrayQueryParameter - from google.cloud.bigquery._helpers import ScalarQueryParameter - from google.cloud.bigquery._helpers import StructQueryParameter from google.cloud.bigquery.job import QueryJobConfig + from google.cloud.bigquery.query import ArrayQueryParameter + from google.cloud.bigquery.query import ScalarQueryParameter + from google.cloud.bigquery.query import StructQueryParameter question = 'What is the answer to life, the universe, and everything?' question_param = ScalarQueryParameter( name='question', type_='STRING', value=question) diff --git a/packages/google-cloud-bigquery/tests/unit/test__helpers.py b/packages/google-cloud-bigquery/tests/unit/test__helpers.py index 2cf3f0f1f8e4..6d2a43fffb11 100644 --- a/packages/google-cloud-bigquery/tests/unit/test__helpers.py +++ b/packages/google-cloud-bigquery/tests/unit/test__helpers.py @@ -16,8 +16,6 @@ import datetime import unittest -import mock - class Test_not_null(unittest.TestCase): @@ -906,1074 +904,6 @@ def __init__(self): self.assertIsNone(wrapper._configuration._attr) -class Test_AbstractQueryParameter(unittest.TestCase): - - @staticmethod - def _get_target_class(): - from google.cloud.bigquery._helpers import AbstractQueryParameter - - return AbstractQueryParameter - - def _make_one(self, *args, **kw): - return self._get_target_class()(*args, **kw) - - def test_from_api_virtual(self): - klass = self._get_target_class() - with self.assertRaises(NotImplementedError): - klass.from_api_repr({}) - - def test_to_api_virtual(self): - param = self._make_one() - with self.assertRaises(NotImplementedError): - param.to_api_repr() - - -class Test_ScalarQueryParameter(unittest.TestCase): - - @staticmethod - def _get_target_class(): - from google.cloud.bigquery._helpers import ScalarQueryParameter - - return ScalarQueryParameter - - def _make_one(self, *args, **kw): - return self._get_target_class()(*args, **kw) - - def test_ctor(self): - param = self._make_one(name='foo', type_='INT64', value=123) - self.assertEqual(param.name, 'foo') - self.assertEqual(param.type_, 'INT64') - self.assertEqual(param.value, 123) - - def test___eq__(self): - param = self._make_one(name='foo', type_='INT64', value=123) - self.assertEqual(param, param) - self.assertNotEqual(param, object()) - alias = self._make_one(name='bar', type_='INT64', value=123) - self.assertNotEqual(param, alias) - wrong_type = self._make_one(name='foo', type_='FLOAT64', value=123.0) - self.assertNotEqual(param, wrong_type) - wrong_val = self._make_one(name='foo', type_='INT64', value=234) - self.assertNotEqual(param, wrong_val) - - def test_positional(self): - klass = self._get_target_class() - param = klass.positional(type_='INT64', value=123) - self.assertEqual(param.name, None) - self.assertEqual(param.type_, 'INT64') - self.assertEqual(param.value, 123) - - def test_from_api_repr_w_name(self): - RESOURCE = { - 'name': 'foo', - 'parameterType': { - 'type': 'INT64', - }, - 'parameterValue': { - 'value': 123, - }, - } - klass = self._get_target_class() - param = klass.from_api_repr(RESOURCE) - self.assertEqual(param.name, 'foo') - self.assertEqual(param.type_, 'INT64') - self.assertEqual(param.value, 123) - - def test_from_api_repr_wo_name(self): - RESOURCE = { - 'parameterType': { - 'type': 'INT64', - }, - 'parameterValue': { - 'value': '123', - }, - } - klass = self._get_target_class() - param = klass.from_api_repr(RESOURCE) - self.assertEqual(param.name, None) - self.assertEqual(param.type_, 'INT64') - self.assertEqual(param.value, 123) - - def test_to_api_repr_w_name(self): - EXPECTED = { - 'name': 'foo', - 'parameterType': { - 'type': 'INT64', - }, - 'parameterValue': { - 'value': '123', - }, - } - param = self._make_one(name='foo', type_='INT64', value=123) - self.assertEqual(param.to_api_repr(), EXPECTED) - - def test_to_api_repr_wo_name(self): - EXPECTED = { - 'parameterType': { - 'type': 'INT64', - }, - 'parameterValue': { - 'value': '123', - }, - } - klass = self._get_target_class() - param = klass.positional(type_='INT64', value=123) - self.assertEqual(param.to_api_repr(), EXPECTED) - - def test_to_api_repr_w_float(self): - EXPECTED = { - 'parameterType': { - 'type': 'FLOAT64', - }, - 'parameterValue': { - 'value': 12.345, - }, - } - klass = self._get_target_class() - param = klass.positional(type_='FLOAT64', value=12.345) - self.assertEqual(param.to_api_repr(), EXPECTED) - - def test_to_api_repr_w_bool(self): - EXPECTED = { - 'parameterType': { - 'type': 'BOOL', - }, - 'parameterValue': { - 'value': 'false', - }, - } - klass = self._get_target_class() - param = klass.positional(type_='BOOL', value=False) - self.assertEqual(param.to_api_repr(), EXPECTED) - - def test_to_api_repr_w_timestamp_datetime(self): - from google.cloud._helpers import UTC - - STAMP = '2016-12-20 15:58:27.339328+00:00' - when = datetime.datetime(2016, 12, 20, 15, 58, 27, 339328, tzinfo=UTC) - EXPECTED = { - 'parameterType': { - 'type': 'TIMESTAMP', - }, - 'parameterValue': { - 'value': STAMP, - }, - } - klass = self._get_target_class() - param = klass.positional(type_='TIMESTAMP', value=when) - self.assertEqual(param.to_api_repr(), EXPECTED) - - def test_to_api_repr_w_timestamp_micros(self): - from google.cloud._helpers import _microseconds_from_datetime - - now = datetime.datetime.utcnow() - seconds = _microseconds_from_datetime(now) / 1.0e6 - EXPECTED = { - 'parameterType': { - 'type': 'TIMESTAMP', - }, - 'parameterValue': { - 'value': seconds, - }, - } - klass = self._get_target_class() - param = klass.positional(type_='TIMESTAMP', value=seconds) - self.assertEqual(param.to_api_repr(), EXPECTED) - - def test_to_api_repr_w_datetime_datetime(self): - from google.cloud._helpers import _datetime_to_rfc3339 - - now = datetime.datetime.utcnow() - EXPECTED = { - 'parameterType': { - 'type': 'DATETIME', - }, - 'parameterValue': { - 'value': _datetime_to_rfc3339(now)[:-1], # strip trailing 'Z' - }, - } - klass = self._get_target_class() - param = klass.positional(type_='DATETIME', value=now) - self.assertEqual(param.to_api_repr(), EXPECTED) - - def test_to_api_repr_w_datetime_string(self): - from google.cloud._helpers import _datetime_to_rfc3339 - - now = datetime.datetime.utcnow() - now_str = _datetime_to_rfc3339(now) - EXPECTED = { - 'parameterType': { - 'type': 'DATETIME', - }, - 'parameterValue': { - 'value': now_str, - }, - } - klass = self._get_target_class() - param = klass.positional(type_='DATETIME', value=now_str) - self.assertEqual(param.to_api_repr(), EXPECTED) - - def test_to_api_repr_w_date_date(self): - today = datetime.date.today() - EXPECTED = { - 'parameterType': { - 'type': 'DATE', - }, - 'parameterValue': { - 'value': today.isoformat(), - }, - } - klass = self._get_target_class() - param = klass.positional(type_='DATE', value=today) - self.assertEqual(param.to_api_repr(), EXPECTED) - - def test_to_api_repr_w_date_string(self): - today = datetime.date.today() - today_str = today.isoformat(), - EXPECTED = { - 'parameterType': { - 'type': 'DATE', - }, - 'parameterValue': { - 'value': today_str, - }, - } - klass = self._get_target_class() - param = klass.positional(type_='DATE', value=today_str) - self.assertEqual(param.to_api_repr(), EXPECTED) - - def test_to_api_repr_w_unknown_type(self): - EXPECTED = { - 'parameterType': { - 'type': 'UNKNOWN', - }, - 'parameterValue': { - 'value': 'unknown', - }, - } - klass = self._get_target_class() - param = klass.positional(type_='UNKNOWN', value='unknown') - self.assertEqual(param.to_api_repr(), EXPECTED) - - def test___eq___wrong_type(self): - field = self._make_one('test', 'STRING', 'value') - other = object() - self.assertNotEqual(field, other) - self.assertEqual(field, mock.ANY) - - def test___eq___name_mismatch(self): - field = self._make_one('test', 'STRING', 'value') - other = self._make_one('other', 'STRING', 'value') - self.assertNotEqual(field, other) - - def test___eq___field_type_mismatch(self): - field = self._make_one('test', 'STRING', None) - other = self._make_one('test', 'INT64', None) - self.assertNotEqual(field, other) - - def test___eq___value_mismatch(self): - field = self._make_one('test', 'STRING', 'hello') - other = self._make_one('test', 'STRING', 'world') - self.assertNotEqual(field, other) - - def test___eq___hit(self): - field = self._make_one('test', 'STRING', 'gotcha') - other = self._make_one('test', 'STRING', 'gotcha') - self.assertEqual(field, other) - - def test___ne___wrong_type(self): - field = self._make_one('toast', 'INT64', 13) - other = object() - self.assertNotEqual(field, other) - self.assertEqual(field, mock.ANY) - - def test___ne___same_value(self): - field1 = self._make_one('test', 'INT64', 12) - field2 = self._make_one('test', 'INT64', 12) - # unittest ``assertEqual`` uses ``==`` not ``!=``. - comparison_val = (field1 != field2) - self.assertFalse(comparison_val) - - def test___ne___different_values(self): - field1 = self._make_one('test', 'INT64', 11) - field2 = self._make_one('test', 'INT64', 12) - self.assertNotEqual(field1, field2) - - def test___repr__(self): - field1 = self._make_one('field1', 'STRING', 'value') - expected = "ScalarQueryParameter('field1', 'STRING', 'value')" - self.assertEqual(repr(field1), expected) - - -def _make_subparam(name, type_, value): - from google.cloud.bigquery._helpers import ScalarQueryParameter - - return ScalarQueryParameter(name, type_, value) - - -class Test_ArrayQueryParameter(unittest.TestCase): - - @staticmethod - def _get_target_class(): - from google.cloud.bigquery._helpers import ArrayQueryParameter - - return ArrayQueryParameter - - def _make_one(self, *args, **kw): - return self._get_target_class()(*args, **kw) - - def test_ctor(self): - param = self._make_one(name='foo', array_type='INT64', values=[1, 2]) - self.assertEqual(param.name, 'foo') - self.assertEqual(param.array_type, 'INT64') - self.assertEqual(param.values, [1, 2]) - - def test___eq__(self): - param = self._make_one(name='foo', array_type='INT64', values=[123]) - self.assertEqual(param, param) - self.assertNotEqual(param, object()) - alias = self._make_one(name='bar', array_type='INT64', values=[123]) - self.assertNotEqual(param, alias) - wrong_type = self._make_one( - name='foo', array_type='FLOAT64', values=[123.0]) - self.assertNotEqual(param, wrong_type) - wrong_val = self._make_one( - name='foo', array_type='INT64', values=[234]) - self.assertNotEqual(param, wrong_val) - - def test_positional(self): - klass = self._get_target_class() - param = klass.positional(array_type='INT64', values=[1, 2]) - self.assertEqual(param.name, None) - self.assertEqual(param.array_type, 'INT64') - self.assertEqual(param.values, [1, 2]) - - def test_from_api_repr_w_name(self): - RESOURCE = { - 'name': 'foo', - 'parameterType': { - 'type': 'ARRAY', - 'arrayType': { - 'type': 'INT64', - }, - }, - 'parameterValue': { - 'arrayValues': [ - { - 'value': '1', - }, - { - 'value': '2' - }, - ], - }, - } - klass = self._get_target_class() - param = klass.from_api_repr(RESOURCE) - self.assertEqual(param.name, 'foo') - self.assertEqual(param.array_type, 'INT64') - self.assertEqual(param.values, [1, 2]) - - def test_from_api_repr_wo_name(self): - RESOURCE = { - 'parameterType': { - 'type': 'ARRAY', - 'arrayType': { - 'type': 'INT64', - }, - }, - 'parameterValue': { - 'arrayValues': [ - { - 'value': '1', - }, - { - 'value': '2' - }, - ], - }, - } - klass = self._get_target_class() - param = klass.from_api_repr(RESOURCE) - self.assertEqual(param.name, None) - self.assertEqual(param.array_type, 'INT64') - self.assertEqual(param.values, [1, 2]) - - def test_from_api_repr_w_struct_type(self): - from google.cloud.bigquery._helpers import StructQueryParameter - - RESOURCE = { - 'parameterType': { - 'type': 'ARRAY', - 'arrayType': { - 'type': 'STRUCT', - 'structTypes': [ - { - 'name': 'name', - 'type': {'type': 'STRING'}, - }, - { - 'name': 'age', - 'type': {'type': 'INT64'}, - }, - ], - }, - }, - 'parameterValue': { - 'arrayValues': [ - { - 'structValues': { - 'name': {'value': 'Phred Phlyntstone'}, - 'age': {'value': '32'}, - }, - }, - { - 'structValues': { - 'name': { - 'value': 'Bharney Rhubbyl', - }, - 'age': {'value': '31'}, - }, - }, - ], - }, - } - - klass = self._get_target_class() - param = klass.from_api_repr(RESOURCE) - - phred = StructQueryParameter.positional( - _make_subparam('name', 'STRING', 'Phred Phlyntstone'), - _make_subparam('age', 'INT64', 32)) - bharney = StructQueryParameter.positional( - _make_subparam('name', 'STRING', 'Bharney Rhubbyl'), - _make_subparam('age', 'INT64', 31)) - self.assertEqual(param.array_type, 'STRUCT') - self.assertEqual(param.values, [phred, bharney]) - - def test_to_api_repr_w_name(self): - EXPECTED = { - 'name': 'foo', - 'parameterType': { - 'type': 'ARRAY', - 'arrayType': { - 'type': 'INT64', - }, - }, - 'parameterValue': { - 'arrayValues': [ - { - 'value': '1', - }, - { - 'value': '2' - }, - ], - }, - } - param = self._make_one(name='foo', array_type='INT64', values=[1, 2]) - self.assertEqual(param.to_api_repr(), EXPECTED) - - def test_to_api_repr_wo_name(self): - EXPECTED = { - 'parameterType': { - 'type': 'ARRAY', - 'arrayType': { - 'type': 'INT64', - }, - }, - 'parameterValue': { - 'arrayValues': [ - { - 'value': '1', - }, - { - 'value': '2' - }, - ], - }, - } - klass = self._get_target_class() - param = klass.positional(array_type='INT64', values=[1, 2]) - self.assertEqual(param.to_api_repr(), EXPECTED) - - def test_to_api_repr_w_unknown_type(self): - EXPECTED = { - 'parameterType': { - 'type': 'ARRAY', - 'arrayType': { - 'type': 'UNKNOWN', - }, - }, - 'parameterValue': { - 'arrayValues': [ - { - 'value': 'unknown', - } - ], - }, - } - klass = self._get_target_class() - param = klass.positional(array_type='UNKNOWN', values=['unknown']) - self.assertEqual(param.to_api_repr(), EXPECTED) - - def test_to_api_repr_w_record_type(self): - from google.cloud.bigquery._helpers import StructQueryParameter - - EXPECTED = { - 'parameterType': { - 'type': 'ARRAY', - 'arrayType': { - 'type': 'STRUCT', - 'structTypes': [ - {'name': 'foo', 'type': {'type': 'STRING'}}, - {'name': 'bar', 'type': {'type': 'INT64'}}, - ], - }, - }, - 'parameterValue': { - 'arrayValues': [{ - 'structValues': { - 'foo': {'value': 'Foo'}, - 'bar': {'value': '123'}, - } - }] - }, - } - one = _make_subparam('foo', 'STRING', 'Foo') - another = _make_subparam('bar', 'INT64', 123) - struct = StructQueryParameter.positional(one, another) - klass = self._get_target_class() - param = klass.positional(array_type='RECORD', values=[struct]) - self.assertEqual(param.to_api_repr(), EXPECTED) - - def test___eq___wrong_type(self): - field = self._make_one('test', 'STRING', ['value']) - other = object() - self.assertNotEqual(field, other) - self.assertEqual(field, mock.ANY) - - def test___eq___name_mismatch(self): - field = self._make_one('field', 'STRING', ['value']) - other = self._make_one('other', 'STRING', ['value']) - self.assertNotEqual(field, other) - - def test___eq___field_type_mismatch(self): - field = self._make_one('test', 'STRING', []) - other = self._make_one('test', 'INT64', []) - self.assertNotEqual(field, other) - - def test___eq___value_mismatch(self): - field = self._make_one('test', 'STRING', ['hello']) - other = self._make_one('test', 'STRING', ['hello', 'world']) - self.assertNotEqual(field, other) - - def test___eq___hit(self): - field = self._make_one('test', 'STRING', ['gotcha']) - other = self._make_one('test', 'STRING', ['gotcha']) - self.assertEqual(field, other) - - def test___ne___wrong_type(self): - field = self._make_one('toast', 'INT64', [13]) - other = object() - self.assertNotEqual(field, other) - self.assertEqual(field, mock.ANY) - - def test___ne___same_value(self): - field1 = self._make_one('test', 'INT64', [12]) - field2 = self._make_one('test', 'INT64', [12]) - # unittest ``assertEqual`` uses ``==`` not ``!=``. - comparison_val = (field1 != field2) - self.assertFalse(comparison_val) - - def test___ne___different_values(self): - field1 = self._make_one('test', 'INT64', [11]) - field2 = self._make_one('test', 'INT64', [12]) - self.assertNotEqual(field1, field2) - - def test___repr__(self): - field1 = self._make_one('field1', 'STRING', ['value']) - expected = "ArrayQueryParameter('field1', 'STRING', ['value'])" - self.assertEqual(repr(field1), expected) - - -class Test_StructQueryParameter(unittest.TestCase): - - @staticmethod - def _get_target_class(): - from google.cloud.bigquery._helpers import StructQueryParameter - - return StructQueryParameter - - def _make_one(self, *args, **kw): - return self._get_target_class()(*args, **kw) - - def test_ctor(self): - sub_1 = _make_subparam('bar', 'INT64', 123) - sub_2 = _make_subparam('baz', 'STRING', 'abc') - param = self._make_one('foo', sub_1, sub_2) - self.assertEqual(param.name, 'foo') - self.assertEqual(param.struct_types, {'bar': 'INT64', 'baz': 'STRING'}) - self.assertEqual(param.struct_values, {'bar': 123, 'baz': 'abc'}) - - def test___eq__(self): - sub_1 = _make_subparam('bar', 'INT64', 123) - sub_2 = _make_subparam('baz', 'STRING', 'abc') - sub_3 = _make_subparam('baz', 'STRING', 'def') - sub_1_float = _make_subparam('bar', 'FLOAT64', 123.0) - param = self._make_one('foo', sub_1, sub_2) - self.assertEqual(param, param) - self.assertNotEqual(param, object()) - alias = self._make_one('bar', sub_1, sub_2) - self.assertNotEqual(param, alias) - wrong_type = self._make_one('foo', sub_1_float, sub_2) - self.assertNotEqual(param, wrong_type) - wrong_val = self._make_one('foo', sub_2, sub_3) - self.assertNotEqual(param, wrong_val) - - def test_positional(self): - sub_1 = _make_subparam('bar', 'INT64', 123) - sub_2 = _make_subparam('baz', 'STRING', 'abc') - klass = self._get_target_class() - param = klass.positional(sub_1, sub_2) - self.assertEqual(param.name, None) - self.assertEqual(param.struct_types, {'bar': 'INT64', 'baz': 'STRING'}) - self.assertEqual(param.struct_values, {'bar': 123, 'baz': 'abc'}) - - def test_from_api_repr_w_name(self): - RESOURCE = { - 'name': 'foo', - 'parameterType': { - 'type': 'STRUCT', - 'structTypes': [ - {'name': 'bar', 'type': {'type': 'INT64'}}, - {'name': 'baz', 'type': {'type': 'STRING'}}, - ], - }, - 'parameterValue': { - 'structValues': { - 'bar': {'value': 123}, - 'baz': {'value': 'abc'}, - }, - }, - } - klass = self._get_target_class() - param = klass.from_api_repr(RESOURCE) - self.assertEqual(param.name, 'foo') - self.assertEqual(param.struct_types, {'bar': 'INT64', 'baz': 'STRING'}) - self.assertEqual(param.struct_values, {'bar': 123, 'baz': 'abc'}) - - def test_from_api_repr_wo_name(self): - RESOURCE = { - 'parameterType': { - 'type': 'STRUCT', - 'structTypes': [ - {'name': 'bar', 'type': {'type': 'INT64'}}, - {'name': 'baz', 'type': {'type': 'STRING'}}, - ], - }, - 'parameterValue': { - 'structValues': { - 'bar': {'value': 123}, - 'baz': {'value': 'abc'}, - }, - }, - } - klass = self._get_target_class() - param = klass.from_api_repr(RESOURCE) - self.assertEqual(param.name, None) - self.assertEqual(param.struct_types, {'bar': 'INT64', 'baz': 'STRING'}) - self.assertEqual(param.struct_values, {'bar': 123, 'baz': 'abc'}) - - def test_from_api_repr_w_nested_array(self): - from google.cloud.bigquery._helpers import ArrayQueryParameter - - RESOURCE = { - 'name': 'foo', - 'parameterType': { - 'type': 'STRUCT', - 'structTypes': [ - {'name': 'bar', 'type': {'type': 'STRING'}}, - {'name': 'baz', 'type': { - 'type': 'ARRAY', - 'arrayType': {'type': 'INT64'}, - }}, - ], - }, - 'parameterValue': { - 'structValues': { - 'bar': {'value': 'abc'}, - 'baz': {'arrayValues': [ - {'value': '123'}, - {'value': '456'}, - ]}, - }, - }, - } - klass = self._get_target_class() - param = klass.from_api_repr(RESOURCE) - self.assertEqual( - param, - self._make_one( - 'foo', - _make_subparam('bar', 'STRING', 'abc'), - ArrayQueryParameter('baz', 'INT64', [123, 456]))) - - def test_from_api_repr_w_nested_struct(self): - RESOURCE = { - 'name': 'foo', - 'parameterType': { - 'type': 'STRUCT', - 'structTypes': [ - {'name': 'bar', 'type': {'type': 'STRING'}}, - {'name': 'baz', 'type': { - 'type': 'STRUCT', - 'structTypes': [ - {'name': 'qux', 'type': {'type': 'INT64'}}, - {'name': 'spam', 'type': {'type': 'BOOL'}}, - ], - }}, - ], - }, - 'parameterValue': { - 'structValues': { - 'bar': {'value': 'abc'}, - 'baz': {'structValues': { - 'qux': {'value': '123'}, - 'spam': {'value': 'true'}, - }}, - }, - }, - } - - klass = self._get_target_class() - param = klass.from_api_repr(RESOURCE) - - expected = self._make_one( - 'foo', - _make_subparam('bar', 'STRING', 'abc'), - self._make_one( - 'baz', - _make_subparam('qux', 'INT64', 123), - _make_subparam('spam', 'BOOL', True))) - self.assertEqual(param.name, 'foo') - self.assertEqual(param.struct_types, expected.struct_types) - self.assertEqual(param.struct_values, expected.struct_values) - - def test_to_api_repr_w_name(self): - EXPECTED = { - 'name': 'foo', - 'parameterType': { - 'type': 'STRUCT', - 'structTypes': [ - {'name': 'bar', 'type': {'type': 'INT64'}}, - {'name': 'baz', 'type': {'type': 'STRING'}}, - ], - }, - 'parameterValue': { - 'structValues': { - 'bar': {'value': '123'}, - 'baz': {'value': 'abc'}, - }, - }, - } - sub_1 = _make_subparam('bar', 'INT64', 123) - sub_2 = _make_subparam('baz', 'STRING', 'abc') - param = self._make_one('foo', sub_1, sub_2) - self.assertEqual(param.to_api_repr(), EXPECTED) - - def test_to_api_repr_wo_name(self): - EXPECTED = { - 'parameterType': { - 'type': 'STRUCT', - 'structTypes': [ - {'name': 'bar', 'type': {'type': 'INT64'}}, - {'name': 'baz', 'type': {'type': 'STRING'}}, - ], - }, - 'parameterValue': { - 'structValues': { - 'bar': {'value': '123'}, - 'baz': {'value': 'abc'}, - }, - }, - } - sub_1 = _make_subparam('bar', 'INT64', 123) - sub_2 = _make_subparam('baz', 'STRING', 'abc') - klass = self._get_target_class() - param = klass.positional(sub_1, sub_2) - self.assertEqual(param.to_api_repr(), EXPECTED) - - def test_to_api_repr_w_nested_array(self): - from google.cloud.bigquery._helpers import ArrayQueryParameter - - EXPECTED = { - 'name': 'foo', - 'parameterType': { - 'type': 'STRUCT', - 'structTypes': [ - {'name': 'bar', 'type': {'type': 'STRING'}}, - {'name': 'baz', 'type': { - 'type': 'ARRAY', - 'arrayType': {'type': 'INT64'}, - }}, - ], - }, - 'parameterValue': { - 'structValues': { - 'bar': {'value': 'abc'}, - 'baz': {'arrayValues': [ - {'value': '123'}, - {'value': '456'}, - ]}, - }, - }, - } - scalar = _make_subparam('bar', 'STRING', 'abc') - array = ArrayQueryParameter('baz', 'INT64', [123, 456]) - param = self._make_one('foo', scalar, array) - self.assertEqual(param.to_api_repr(), EXPECTED) - - def test_to_api_repr_w_nested_struct(self): - EXPECTED = { - 'name': 'foo', - 'parameterType': { - 'type': 'STRUCT', - 'structTypes': [ - {'name': 'bar', 'type': {'type': 'STRING'}}, - {'name': 'baz', 'type': { - 'type': 'STRUCT', - 'structTypes': [ - {'name': 'qux', 'type': {'type': 'INT64'}}, - {'name': 'spam', 'type': {'type': 'BOOL'}}, - ], - }}, - ], - }, - 'parameterValue': { - 'structValues': { - 'bar': {'value': 'abc'}, - 'baz': {'structValues': { - 'qux': {'value': '123'}, - 'spam': {'value': 'true'}, - }}, - }, - }, - } - scalar_1 = _make_subparam('bar', 'STRING', 'abc') - scalar_2 = _make_subparam('qux', 'INT64', 123) - scalar_3 = _make_subparam('spam', 'BOOL', True) - sub = self._make_one('baz', scalar_2, scalar_3) - param = self._make_one('foo', scalar_1, sub) - self.assertEqual(param.to_api_repr(), EXPECTED) - - def test___eq___wrong_type(self): - field = self._make_one( - 'test', _make_subparam('bar', 'STRING', 'abc')) - other = object() - self.assertNotEqual(field, other) - self.assertEqual(field, mock.ANY) - - def test___eq___name_mismatch(self): - field = self._make_one( - 'test', _make_subparam('bar', 'STRING', 'abc')) - other = self._make_one( - 'other ', _make_subparam('bar', 'STRING', 'abc')) - self.assertNotEqual(field, other) - - def test___eq___field_type_mismatch(self): - field = self._make_one( - 'test', _make_subparam('bar', 'STRING', None)) - other = self._make_one( - 'test', _make_subparam('bar', 'INT64', None)) - self.assertNotEqual(field, other) - - def test___eq___value_mismatch(self): - field = self._make_one( - 'test', _make_subparam('bar', 'STRING', 'hello')) - other = self._make_one( - 'test', _make_subparam('bar', 'STRING', 'world')) - self.assertNotEqual(field, other) - - def test___eq___hit(self): - field = self._make_one( - 'test', _make_subparam('bar', 'STRING', 'gotcha')) - other = self._make_one( - 'test', _make_subparam('bar', 'STRING', 'gotcha')) - self.assertEqual(field, other) - - def test___ne___wrong_type(self): - field = self._make_one( - 'test', _make_subparam('bar', 'STRING', 'hello')) - other = object() - self.assertNotEqual(field, other) - self.assertEqual(field, mock.ANY) - - def test___ne___same_value(self): - field1 = self._make_one( - 'test', _make_subparam('bar', 'STRING', 'hello')) - field2 = self._make_one( - 'test', _make_subparam('bar', 'STRING', 'hello')) - # unittest ``assertEqual`` uses ``==`` not ``!=``. - comparison_val = (field1 != field2) - self.assertFalse(comparison_val) - - def test___ne___different_values(self): - field1 = self._make_one( - 'test', _make_subparam('bar', 'STRING', 'hello')) - field2 = self._make_one( - 'test', _make_subparam('bar', 'STRING', 'world')) - self.assertNotEqual(field1, field2) - - def test___repr__(self): - field1 = self._make_one( - 'test', _make_subparam('field1', 'STRING', 'hello')) - got = repr(field1) - self.assertIn('StructQueryParameter', got) - self.assertIn("'field1', 'STRING'", got) - self.assertIn("'field1': 'hello'", got) - - -class Test__query_param_from_api_repr(unittest.TestCase): - - @staticmethod - def _call_fut(resource): - from google.cloud.bigquery._helpers import _query_param_from_api_repr - - return _query_param_from_api_repr(resource) - - def test_w_scalar(self): - from google.cloud.bigquery._helpers import ScalarQueryParameter - - RESOURCE = { - 'name': 'foo', - 'parameterType': {'type': 'INT64'}, - 'parameterValue': {'value': '123'}, - } - - parameter = self._call_fut(RESOURCE) - - self.assertIsInstance(parameter, ScalarQueryParameter) - self.assertEqual(parameter.name, 'foo') - self.assertEqual(parameter.type_, 'INT64') - self.assertEqual(parameter.value, 123) - - def test_w_scalar_timestamp(self): - from google.cloud.bigquery._helpers import ScalarQueryParameter - from google.cloud._helpers import UTC - - RESOURCE = { - 'name': 'zoned', - 'parameterType': {'type': 'TIMESTAMP'}, - 'parameterValue': {'value': '2012-03-04 05:06:07+00:00'}, - } - - parameter = self._call_fut(RESOURCE) - - self.assertIsInstance(parameter, ScalarQueryParameter) - self.assertEqual(parameter.name, 'zoned') - self.assertEqual(parameter.type_, 'TIMESTAMP') - self.assertEqual( - parameter.value, - datetime.datetime(2012, 3, 4, 5, 6, 7, tzinfo=UTC)) - - def test_w_scalar_timestamp_micros(self): - from google.cloud.bigquery._helpers import ScalarQueryParameter - from google.cloud._helpers import UTC - - RESOURCE = { - 'name': 'zoned', - 'parameterType': {'type': 'TIMESTAMP'}, - 'parameterValue': {'value': '2012-03-04 05:06:07.250000+00:00'}, - } - - parameter = self._call_fut(RESOURCE) - - self.assertIsInstance(parameter, ScalarQueryParameter) - self.assertEqual(parameter.name, 'zoned') - self.assertEqual(parameter.type_, 'TIMESTAMP') - self.assertEqual( - parameter.value, - datetime.datetime(2012, 3, 4, 5, 6, 7, 250000, tzinfo=UTC)) - - def test_w_array(self): - from google.cloud.bigquery._helpers import ArrayQueryParameter - - RESOURCE = { - 'name': 'foo', - 'parameterType': { - 'type': 'ARRAY', - 'arrayType': {'type': 'INT64'}, - }, - 'parameterValue': { - 'arrayValues': [ - {'value': '123'}, - ]}, - } - - parameter = self._call_fut(RESOURCE) - - self.assertIsInstance(parameter, ArrayQueryParameter) - self.assertEqual(parameter.name, 'foo') - self.assertEqual(parameter.array_type, 'INT64') - self.assertEqual(parameter.values, [123]) - - def test_w_struct(self): - from google.cloud.bigquery._helpers import StructQueryParameter - - RESOURCE = { - 'name': 'foo', - 'parameterType': { - 'type': 'STRUCT', - 'structTypes': [ - {'name': 'foo', 'type': {'type': 'STRING'}}, - {'name': 'bar', 'type': {'type': 'INT64'}}, - ], - }, - 'parameterValue': { - 'structValues': { - 'foo': {'value': 'Foo'}, - 'bar': {'value': '123'}, - } - }, - } - - parameter = self._call_fut(RESOURCE) - - self.assertIsInstance(parameter, StructQueryParameter) - self.assertEqual(parameter.name, 'foo') - self.assertEqual( - parameter.struct_types, {'foo': 'STRING', 'bar': 'INT64'}) - self.assertEqual(parameter.struct_values, {'foo': 'Foo', 'bar': 123}) - - -class Test_UDFResource(unittest.TestCase): - - @staticmethod - def _get_target_class(): - from google.cloud.bigquery._helpers import UDFResource - - return UDFResource - - def _make_one(self, *args, **kw): - return self._get_target_class()(*args, **kw) - - def test_ctor(self): - udf = self._make_one('resourceUri', 'gs://some_bucket/some_file') - self.assertEqual(udf.udf_type, 'resourceUri') - self.assertEqual(udf.value, 'gs://some_bucket/some_file') - - def test___eq__(self): - udf = self._make_one('resourceUri', 'gs://some_bucket/some_file') - self.assertEqual(udf, udf) - self.assertNotEqual(udf, object()) - wrong_val = self._make_one( - 'resourceUri', 'gs://some_bucket/other_file') - self.assertNotEqual(udf, wrong_val) - wrong_type = self._make_one('inlineCode', udf.value) - self.assertNotEqual(udf, wrong_type) - - class Test_ListApiResourceProperty(unittest.TestCase): @staticmethod @@ -1986,10 +916,10 @@ def _make_one(self, *args, **kw): return self._get_target_class()(*args, **kw) def _descriptor_and_klass(self): - from google.cloud.bigquery._helpers import AbstractQueryParameter + from google.cloud.bigquery.query import _AbstractQueryParameter descriptor = self._make_one( - 'query_parameters', 'queryParameters', AbstractQueryParameter) + 'query_parameters', 'queryParameters', _AbstractQueryParameter) class _Test(object): def __init__(self): @@ -2009,7 +939,7 @@ def test_instance_getter_empty(self): self.assertEqual(instance.query_parameters, []) def test_instance_getter_w_non_empty_list(self): - from google.cloud.bigquery._helpers import ScalarQueryParameter + from google.cloud.bigquery.query import ScalarQueryParameter query_parameters = [ScalarQueryParameter("foo", 'INT64', 123)] _, klass = self._descriptor_and_klass() @@ -2019,7 +949,7 @@ def test_instance_getter_w_non_empty_list(self): self.assertEqual(instance.query_parameters, query_parameters) def test_instance_setter_w_empty_list(self): - from google.cloud.bigquery._helpers import ScalarQueryParameter + from google.cloud.bigquery.query import ScalarQueryParameter query_parameters = [ScalarQueryParameter("foo", 'INT64', 123)] _, klass = self._descriptor_and_klass() @@ -2031,7 +961,7 @@ def test_instance_setter_w_empty_list(self): self.assertEqual(instance.query_parameters, []) def test_instance_setter_w_none(self): - from google.cloud.bigquery._helpers import ScalarQueryParameter + from google.cloud.bigquery.query import ScalarQueryParameter query_parameters = [ScalarQueryParameter("foo", 'INT64', 123)] _, klass = self._descriptor_and_klass() @@ -2042,7 +972,7 @@ def test_instance_setter_w_none(self): instance.query_parameters = None def test_instance_setter_w_valid_udf(self): - from google.cloud.bigquery._helpers import ScalarQueryParameter + from google.cloud.bigquery.query import ScalarQueryParameter query_parameters = [ScalarQueryParameter("foo", 'INT64', 123)] _, klass = self._descriptor_and_klass() diff --git a/packages/google-cloud-bigquery/tests/unit/test_client.py b/packages/google-cloud-bigquery/tests/unit/test_client.py index d34c192c3fac..9cdf7129c9c3 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_client.py +++ b/packages/google-cloud-bigquery/tests/unit/test_client.py @@ -1799,9 +1799,9 @@ def test_query_defaults(self): self.assertFalse(sent_config['useLegacySql']) def test_query_w_udf_resources(self): - from google.cloud.bigquery._helpers import UDFResource from google.cloud.bigquery.job import QueryJob from google.cloud.bigquery.job import QueryJobConfig + from google.cloud.bigquery.query import UDFResource RESOURCE_URI = 'gs://some-bucket/js/lib.js' PROJECT = 'PROJECT' @@ -1856,9 +1856,9 @@ def test_query_w_udf_resources(self): {'resourceUri': RESOURCE_URI}) def test_query_w_query_parameters(self): - from google.cloud.bigquery._helpers import ScalarQueryParameter from google.cloud.bigquery.job import QueryJob from google.cloud.bigquery.job import QueryJobConfig + from google.cloud.bigquery.query import ScalarQueryParameter PROJECT = 'PROJECT' JOB = 'job_name' diff --git a/packages/google-cloud-bigquery/tests/unit/test_job.py b/packages/google-cloud-bigquery/tests/unit/test_job.py index a9da40ee1d5e..c1c190328968 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_job.py +++ b/packages/google-cloud-bigquery/tests/unit/test_job.py @@ -1756,8 +1756,8 @@ def test_ctor_defaults(self): self.assertIsNone(job.maximum_bytes_billed) def test_ctor_w_udf_resources(self): - from google.cloud.bigquery._helpers import UDFResource from google.cloud.bigquery.job import QueryJobConfig + from google.cloud.bigquery.query import UDFResource RESOURCE_URI = 'gs://some-bucket/js/lib.js' udf_resources = [UDFResource("resourceUri", RESOURCE_URI)] @@ -1769,8 +1769,8 @@ def test_ctor_w_udf_resources(self): self.assertEqual(job.udf_resources, udf_resources) def test_ctor_w_query_parameters(self): - from google.cloud.bigquery._helpers import ScalarQueryParameter from google.cloud.bigquery.job import QueryJobConfig + from google.cloud.bigquery.query import ScalarQueryParameter query_parameters = [ScalarQueryParameter("foo", 'INT64', 123)] client = _make_client(project=self.PROJECT) @@ -2053,9 +2053,9 @@ def test_referenced_tables(self): self.assertEqual(remote.project, 'other-project-123') def test_undeclared_query_paramters(self): - from google.cloud.bigquery._helpers import ArrayQueryParameter - from google.cloud.bigquery._helpers import ScalarQueryParameter - from google.cloud.bigquery._helpers import StructQueryParameter + from google.cloud.bigquery.query import ArrayQueryParameter + from google.cloud.bigquery.query import ScalarQueryParameter + from google.cloud.bigquery.query import StructQueryParameter undeclared = [{ 'name': 'my_scalar', @@ -2351,8 +2351,8 @@ def test_begin_w_alternate_client(self): self.assertEqual(req['data'], SENT) def test_begin_w_udf(self): - from google.cloud.bigquery._helpers import UDFResource from google.cloud.bigquery.job import QueryJobConfig + from google.cloud.bigquery.query import UDFResource RESOURCE_URI = 'gs://some-bucket/js/lib.js' INLINE_UDF_CODE = 'var someCode = "here";' @@ -2406,8 +2406,8 @@ def test_begin_w_udf(self): self.assertEqual(req['data'], SENT) def test_begin_w_named_query_parameter(self): - from google.cloud.bigquery._helpers import ScalarQueryParameter from google.cloud.bigquery.job import QueryJobConfig + from google.cloud.bigquery.query import ScalarQueryParameter query_parameters = [ScalarQueryParameter('foo', 'INT64', 123)] PATH = '/projects/%s/jobs' % (self.PROJECT,) @@ -2462,8 +2462,8 @@ def test_begin_w_named_query_parameter(self): self.assertEqual(req['data'], SENT) def test_begin_w_positional_query_parameter(self): - from google.cloud.bigquery._helpers import ScalarQueryParameter from google.cloud.bigquery.job import QueryJobConfig + from google.cloud.bigquery.query import ScalarQueryParameter query_parameters = [ScalarQueryParameter.positional('INT64', 123)] PATH = '/projects/%s/jobs' % (self.PROJECT,) diff --git a/packages/google-cloud-bigquery/tests/unit/test_query.py b/packages/google-cloud-bigquery/tests/unit/test_query.py index d2322886daa5..e5c78caf3b0b 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_query.py +++ b/packages/google-cloud-bigquery/tests/unit/test_query.py @@ -12,8 +12,966 @@ # See the License for the specific language governing permissions and # limitations under the License. +import datetime import unittest +import mock + + +class Test_UDFResource(unittest.TestCase): + + @staticmethod + def _get_target_class(): + from google.cloud.bigquery.query import UDFResource + + return UDFResource + + def _make_one(self, *args, **kw): + return self._get_target_class()(*args, **kw) + + def test_ctor(self): + udf = self._make_one('resourceUri', 'gs://some_bucket/some_file') + self.assertEqual(udf.udf_type, 'resourceUri') + self.assertEqual(udf.value, 'gs://some_bucket/some_file') + + def test___eq__(self): + udf = self._make_one('resourceUri', 'gs://some_bucket/some_file') + self.assertEqual(udf, udf) + self.assertNotEqual(udf, object()) + wrong_val = self._make_one( + 'resourceUri', 'gs://some_bucket/other_file') + self.assertNotEqual(udf, wrong_val) + wrong_type = self._make_one('inlineCode', udf.value) + self.assertNotEqual(udf, wrong_type) + + +class Test__AbstractQueryParameter(unittest.TestCase): + + @staticmethod + def _get_target_class(): + from google.cloud.bigquery.query import _AbstractQueryParameter + + return _AbstractQueryParameter + + def _make_one(self, *args, **kw): + return self._get_target_class()(*args, **kw) + + def test_from_api_virtual(self): + klass = self._get_target_class() + with self.assertRaises(NotImplementedError): + klass.from_api_repr({}) + + def test_to_api_virtual(self): + param = self._make_one() + with self.assertRaises(NotImplementedError): + param.to_api_repr() + + +class Test_ScalarQueryParameter(unittest.TestCase): + + @staticmethod + def _get_target_class(): + from google.cloud.bigquery.query import ScalarQueryParameter + + return ScalarQueryParameter + + def _make_one(self, *args, **kw): + return self._get_target_class()(*args, **kw) + + def test_ctor(self): + param = self._make_one(name='foo', type_='INT64', value=123) + self.assertEqual(param.name, 'foo') + self.assertEqual(param.type_, 'INT64') + self.assertEqual(param.value, 123) + + def test___eq__(self): + param = self._make_one(name='foo', type_='INT64', value=123) + self.assertEqual(param, param) + self.assertNotEqual(param, object()) + alias = self._make_one(name='bar', type_='INT64', value=123) + self.assertNotEqual(param, alias) + wrong_type = self._make_one(name='foo', type_='FLOAT64', value=123.0) + self.assertNotEqual(param, wrong_type) + wrong_val = self._make_one(name='foo', type_='INT64', value=234) + self.assertNotEqual(param, wrong_val) + + def test_positional(self): + klass = self._get_target_class() + param = klass.positional(type_='INT64', value=123) + self.assertEqual(param.name, None) + self.assertEqual(param.type_, 'INT64') + self.assertEqual(param.value, 123) + + def test_from_api_repr_w_name(self): + RESOURCE = { + 'name': 'foo', + 'parameterType': { + 'type': 'INT64', + }, + 'parameterValue': { + 'value': 123, + }, + } + klass = self._get_target_class() + param = klass.from_api_repr(RESOURCE) + self.assertEqual(param.name, 'foo') + self.assertEqual(param.type_, 'INT64') + self.assertEqual(param.value, 123) + + def test_from_api_repr_wo_name(self): + RESOURCE = { + 'parameterType': { + 'type': 'INT64', + }, + 'parameterValue': { + 'value': '123', + }, + } + klass = self._get_target_class() + param = klass.from_api_repr(RESOURCE) + self.assertEqual(param.name, None) + self.assertEqual(param.type_, 'INT64') + self.assertEqual(param.value, 123) + + def test_to_api_repr_w_name(self): + EXPECTED = { + 'name': 'foo', + 'parameterType': { + 'type': 'INT64', + }, + 'parameterValue': { + 'value': '123', + }, + } + param = self._make_one(name='foo', type_='INT64', value=123) + self.assertEqual(param.to_api_repr(), EXPECTED) + + def test_to_api_repr_wo_name(self): + EXPECTED = { + 'parameterType': { + 'type': 'INT64', + }, + 'parameterValue': { + 'value': '123', + }, + } + klass = self._get_target_class() + param = klass.positional(type_='INT64', value=123) + self.assertEqual(param.to_api_repr(), EXPECTED) + + def test_to_api_repr_w_float(self): + EXPECTED = { + 'parameterType': { + 'type': 'FLOAT64', + }, + 'parameterValue': { + 'value': 12.345, + }, + } + klass = self._get_target_class() + param = klass.positional(type_='FLOAT64', value=12.345) + self.assertEqual(param.to_api_repr(), EXPECTED) + + def test_to_api_repr_w_bool(self): + EXPECTED = { + 'parameterType': { + 'type': 'BOOL', + }, + 'parameterValue': { + 'value': 'false', + }, + } + klass = self._get_target_class() + param = klass.positional(type_='BOOL', value=False) + self.assertEqual(param.to_api_repr(), EXPECTED) + + def test_to_api_repr_w_timestamp_datetime(self): + from google.cloud._helpers import UTC + + STAMP = '2016-12-20 15:58:27.339328+00:00' + when = datetime.datetime(2016, 12, 20, 15, 58, 27, 339328, tzinfo=UTC) + EXPECTED = { + 'parameterType': { + 'type': 'TIMESTAMP', + }, + 'parameterValue': { + 'value': STAMP, + }, + } + klass = self._get_target_class() + param = klass.positional(type_='TIMESTAMP', value=when) + self.assertEqual(param.to_api_repr(), EXPECTED) + + def test_to_api_repr_w_timestamp_micros(self): + from google.cloud._helpers import _microseconds_from_datetime + + now = datetime.datetime.utcnow() + seconds = _microseconds_from_datetime(now) / 1.0e6 + EXPECTED = { + 'parameterType': { + 'type': 'TIMESTAMP', + }, + 'parameterValue': { + 'value': seconds, + }, + } + klass = self._get_target_class() + param = klass.positional(type_='TIMESTAMP', value=seconds) + self.assertEqual(param.to_api_repr(), EXPECTED) + + def test_to_api_repr_w_datetime_datetime(self): + from google.cloud._helpers import _datetime_to_rfc3339 + + now = datetime.datetime.utcnow() + EXPECTED = { + 'parameterType': { + 'type': 'DATETIME', + }, + 'parameterValue': { + 'value': _datetime_to_rfc3339(now)[:-1], # strip trailing 'Z' + }, + } + klass = self._get_target_class() + param = klass.positional(type_='DATETIME', value=now) + self.assertEqual(param.to_api_repr(), EXPECTED) + + def test_to_api_repr_w_datetime_string(self): + from google.cloud._helpers import _datetime_to_rfc3339 + + now = datetime.datetime.utcnow() + now_str = _datetime_to_rfc3339(now) + EXPECTED = { + 'parameterType': { + 'type': 'DATETIME', + }, + 'parameterValue': { + 'value': now_str, + }, + } + klass = self._get_target_class() + param = klass.positional(type_='DATETIME', value=now_str) + self.assertEqual(param.to_api_repr(), EXPECTED) + + def test_to_api_repr_w_date_date(self): + today = datetime.date.today() + EXPECTED = { + 'parameterType': { + 'type': 'DATE', + }, + 'parameterValue': { + 'value': today.isoformat(), + }, + } + klass = self._get_target_class() + param = klass.positional(type_='DATE', value=today) + self.assertEqual(param.to_api_repr(), EXPECTED) + + def test_to_api_repr_w_date_string(self): + today = datetime.date.today() + today_str = today.isoformat(), + EXPECTED = { + 'parameterType': { + 'type': 'DATE', + }, + 'parameterValue': { + 'value': today_str, + }, + } + klass = self._get_target_class() + param = klass.positional(type_='DATE', value=today_str) + self.assertEqual(param.to_api_repr(), EXPECTED) + + def test_to_api_repr_w_unknown_type(self): + EXPECTED = { + 'parameterType': { + 'type': 'UNKNOWN', + }, + 'parameterValue': { + 'value': 'unknown', + }, + } + klass = self._get_target_class() + param = klass.positional(type_='UNKNOWN', value='unknown') + self.assertEqual(param.to_api_repr(), EXPECTED) + + def test___eq___wrong_type(self): + field = self._make_one('test', 'STRING', 'value') + other = object() + self.assertNotEqual(field, other) + self.assertEqual(field, mock.ANY) + + def test___eq___name_mismatch(self): + field = self._make_one('test', 'STRING', 'value') + other = self._make_one('other', 'STRING', 'value') + self.assertNotEqual(field, other) + + def test___eq___field_type_mismatch(self): + field = self._make_one('test', 'STRING', None) + other = self._make_one('test', 'INT64', None) + self.assertNotEqual(field, other) + + def test___eq___value_mismatch(self): + field = self._make_one('test', 'STRING', 'hello') + other = self._make_one('test', 'STRING', 'world') + self.assertNotEqual(field, other) + + def test___eq___hit(self): + field = self._make_one('test', 'STRING', 'gotcha') + other = self._make_one('test', 'STRING', 'gotcha') + self.assertEqual(field, other) + + def test___ne___wrong_type(self): + field = self._make_one('toast', 'INT64', 13) + other = object() + self.assertNotEqual(field, other) + self.assertEqual(field, mock.ANY) + + def test___ne___same_value(self): + field1 = self._make_one('test', 'INT64', 12) + field2 = self._make_one('test', 'INT64', 12) + # unittest ``assertEqual`` uses ``==`` not ``!=``. + comparison_val = (field1 != field2) + self.assertFalse(comparison_val) + + def test___ne___different_values(self): + field1 = self._make_one('test', 'INT64', 11) + field2 = self._make_one('test', 'INT64', 12) + self.assertNotEqual(field1, field2) + + def test___repr__(self): + field1 = self._make_one('field1', 'STRING', 'value') + expected = "ScalarQueryParameter('field1', 'STRING', 'value')" + self.assertEqual(repr(field1), expected) + + +def _make_subparam(name, type_, value): + from google.cloud.bigquery.query import ScalarQueryParameter + + return ScalarQueryParameter(name, type_, value) + + +class Test_ArrayQueryParameter(unittest.TestCase): + + @staticmethod + def _get_target_class(): + from google.cloud.bigquery.query import ArrayQueryParameter + + return ArrayQueryParameter + + def _make_one(self, *args, **kw): + return self._get_target_class()(*args, **kw) + + def test_ctor(self): + param = self._make_one(name='foo', array_type='INT64', values=[1, 2]) + self.assertEqual(param.name, 'foo') + self.assertEqual(param.array_type, 'INT64') + self.assertEqual(param.values, [1, 2]) + + def test___eq__(self): + param = self._make_one(name='foo', array_type='INT64', values=[123]) + self.assertEqual(param, param) + self.assertNotEqual(param, object()) + alias = self._make_one(name='bar', array_type='INT64', values=[123]) + self.assertNotEqual(param, alias) + wrong_type = self._make_one( + name='foo', array_type='FLOAT64', values=[123.0]) + self.assertNotEqual(param, wrong_type) + wrong_val = self._make_one( + name='foo', array_type='INT64', values=[234]) + self.assertNotEqual(param, wrong_val) + + def test_positional(self): + klass = self._get_target_class() + param = klass.positional(array_type='INT64', values=[1, 2]) + self.assertEqual(param.name, None) + self.assertEqual(param.array_type, 'INT64') + self.assertEqual(param.values, [1, 2]) + + def test_from_api_repr_w_name(self): + RESOURCE = { + 'name': 'foo', + 'parameterType': { + 'type': 'ARRAY', + 'arrayType': { + 'type': 'INT64', + }, + }, + 'parameterValue': { + 'arrayValues': [ + { + 'value': '1', + }, + { + 'value': '2' + }, + ], + }, + } + klass = self._get_target_class() + param = klass.from_api_repr(RESOURCE) + self.assertEqual(param.name, 'foo') + self.assertEqual(param.array_type, 'INT64') + self.assertEqual(param.values, [1, 2]) + + def test_from_api_repr_wo_name(self): + RESOURCE = { + 'parameterType': { + 'type': 'ARRAY', + 'arrayType': { + 'type': 'INT64', + }, + }, + 'parameterValue': { + 'arrayValues': [ + { + 'value': '1', + }, + { + 'value': '2' + }, + ], + }, + } + klass = self._get_target_class() + param = klass.from_api_repr(RESOURCE) + self.assertEqual(param.name, None) + self.assertEqual(param.array_type, 'INT64') + self.assertEqual(param.values, [1, 2]) + + def test_from_api_repr_w_struct_type(self): + from google.cloud.bigquery.query import StructQueryParameter + + RESOURCE = { + 'parameterType': { + 'type': 'ARRAY', + 'arrayType': { + 'type': 'STRUCT', + 'structTypes': [ + { + 'name': 'name', + 'type': {'type': 'STRING'}, + }, + { + 'name': 'age', + 'type': {'type': 'INT64'}, + }, + ], + }, + }, + 'parameterValue': { + 'arrayValues': [ + { + 'structValues': { + 'name': {'value': 'Phred Phlyntstone'}, + 'age': {'value': '32'}, + }, + }, + { + 'structValues': { + 'name': { + 'value': 'Bharney Rhubbyl', + }, + 'age': {'value': '31'}, + }, + }, + ], + }, + } + + klass = self._get_target_class() + param = klass.from_api_repr(RESOURCE) + + phred = StructQueryParameter.positional( + _make_subparam('name', 'STRING', 'Phred Phlyntstone'), + _make_subparam('age', 'INT64', 32)) + bharney = StructQueryParameter.positional( + _make_subparam('name', 'STRING', 'Bharney Rhubbyl'), + _make_subparam('age', 'INT64', 31)) + self.assertEqual(param.array_type, 'STRUCT') + self.assertEqual(param.values, [phred, bharney]) + + def test_to_api_repr_w_name(self): + EXPECTED = { + 'name': 'foo', + 'parameterType': { + 'type': 'ARRAY', + 'arrayType': { + 'type': 'INT64', + }, + }, + 'parameterValue': { + 'arrayValues': [ + { + 'value': '1', + }, + { + 'value': '2' + }, + ], + }, + } + param = self._make_one(name='foo', array_type='INT64', values=[1, 2]) + self.assertEqual(param.to_api_repr(), EXPECTED) + + def test_to_api_repr_wo_name(self): + EXPECTED = { + 'parameterType': { + 'type': 'ARRAY', + 'arrayType': { + 'type': 'INT64', + }, + }, + 'parameterValue': { + 'arrayValues': [ + { + 'value': '1', + }, + { + 'value': '2' + }, + ], + }, + } + klass = self._get_target_class() + param = klass.positional(array_type='INT64', values=[1, 2]) + self.assertEqual(param.to_api_repr(), EXPECTED) + + def test_to_api_repr_w_unknown_type(self): + EXPECTED = { + 'parameterType': { + 'type': 'ARRAY', + 'arrayType': { + 'type': 'UNKNOWN', + }, + }, + 'parameterValue': { + 'arrayValues': [ + { + 'value': 'unknown', + } + ], + }, + } + klass = self._get_target_class() + param = klass.positional(array_type='UNKNOWN', values=['unknown']) + self.assertEqual(param.to_api_repr(), EXPECTED) + + def test_to_api_repr_w_record_type(self): + from google.cloud.bigquery.query import StructQueryParameter + + EXPECTED = { + 'parameterType': { + 'type': 'ARRAY', + 'arrayType': { + 'type': 'STRUCT', + 'structTypes': [ + {'name': 'foo', 'type': {'type': 'STRING'}}, + {'name': 'bar', 'type': {'type': 'INT64'}}, + ], + }, + }, + 'parameterValue': { + 'arrayValues': [{ + 'structValues': { + 'foo': {'value': 'Foo'}, + 'bar': {'value': '123'}, + } + }] + }, + } + one = _make_subparam('foo', 'STRING', 'Foo') + another = _make_subparam('bar', 'INT64', 123) + struct = StructQueryParameter.positional(one, another) + klass = self._get_target_class() + param = klass.positional(array_type='RECORD', values=[struct]) + self.assertEqual(param.to_api_repr(), EXPECTED) + + def test___eq___wrong_type(self): + field = self._make_one('test', 'STRING', ['value']) + other = object() + self.assertNotEqual(field, other) + self.assertEqual(field, mock.ANY) + + def test___eq___name_mismatch(self): + field = self._make_one('field', 'STRING', ['value']) + other = self._make_one('other', 'STRING', ['value']) + self.assertNotEqual(field, other) + + def test___eq___field_type_mismatch(self): + field = self._make_one('test', 'STRING', []) + other = self._make_one('test', 'INT64', []) + self.assertNotEqual(field, other) + + def test___eq___value_mismatch(self): + field = self._make_one('test', 'STRING', ['hello']) + other = self._make_one('test', 'STRING', ['hello', 'world']) + self.assertNotEqual(field, other) + + def test___eq___hit(self): + field = self._make_one('test', 'STRING', ['gotcha']) + other = self._make_one('test', 'STRING', ['gotcha']) + self.assertEqual(field, other) + + def test___ne___wrong_type(self): + field = self._make_one('toast', 'INT64', [13]) + other = object() + self.assertNotEqual(field, other) + self.assertEqual(field, mock.ANY) + + def test___ne___same_value(self): + field1 = self._make_one('test', 'INT64', [12]) + field2 = self._make_one('test', 'INT64', [12]) + # unittest ``assertEqual`` uses ``==`` not ``!=``. + comparison_val = (field1 != field2) + self.assertFalse(comparison_val) + + def test___ne___different_values(self): + field1 = self._make_one('test', 'INT64', [11]) + field2 = self._make_one('test', 'INT64', [12]) + self.assertNotEqual(field1, field2) + + def test___repr__(self): + field1 = self._make_one('field1', 'STRING', ['value']) + expected = "ArrayQueryParameter('field1', 'STRING', ['value'])" + self.assertEqual(repr(field1), expected) + + +class Test_StructQueryParameter(unittest.TestCase): + + @staticmethod + def _get_target_class(): + from google.cloud.bigquery.query import StructQueryParameter + + return StructQueryParameter + + def _make_one(self, *args, **kw): + return self._get_target_class()(*args, **kw) + + def test_ctor(self): + sub_1 = _make_subparam('bar', 'INT64', 123) + sub_2 = _make_subparam('baz', 'STRING', 'abc') + param = self._make_one('foo', sub_1, sub_2) + self.assertEqual(param.name, 'foo') + self.assertEqual(param.struct_types, {'bar': 'INT64', 'baz': 'STRING'}) + self.assertEqual(param.struct_values, {'bar': 123, 'baz': 'abc'}) + + def test___eq__(self): + sub_1 = _make_subparam('bar', 'INT64', 123) + sub_2 = _make_subparam('baz', 'STRING', 'abc') + sub_3 = _make_subparam('baz', 'STRING', 'def') + sub_1_float = _make_subparam('bar', 'FLOAT64', 123.0) + param = self._make_one('foo', sub_1, sub_2) + self.assertEqual(param, param) + self.assertNotEqual(param, object()) + alias = self._make_one('bar', sub_1, sub_2) + self.assertNotEqual(param, alias) + wrong_type = self._make_one('foo', sub_1_float, sub_2) + self.assertNotEqual(param, wrong_type) + wrong_val = self._make_one('foo', sub_2, sub_3) + self.assertNotEqual(param, wrong_val) + + def test_positional(self): + sub_1 = _make_subparam('bar', 'INT64', 123) + sub_2 = _make_subparam('baz', 'STRING', 'abc') + klass = self._get_target_class() + param = klass.positional(sub_1, sub_2) + self.assertEqual(param.name, None) + self.assertEqual(param.struct_types, {'bar': 'INT64', 'baz': 'STRING'}) + self.assertEqual(param.struct_values, {'bar': 123, 'baz': 'abc'}) + + def test_from_api_repr_w_name(self): + RESOURCE = { + 'name': 'foo', + 'parameterType': { + 'type': 'STRUCT', + 'structTypes': [ + {'name': 'bar', 'type': {'type': 'INT64'}}, + {'name': 'baz', 'type': {'type': 'STRING'}}, + ], + }, + 'parameterValue': { + 'structValues': { + 'bar': {'value': 123}, + 'baz': {'value': 'abc'}, + }, + }, + } + klass = self._get_target_class() + param = klass.from_api_repr(RESOURCE) + self.assertEqual(param.name, 'foo') + self.assertEqual(param.struct_types, {'bar': 'INT64', 'baz': 'STRING'}) + self.assertEqual(param.struct_values, {'bar': 123, 'baz': 'abc'}) + + def test_from_api_repr_wo_name(self): + RESOURCE = { + 'parameterType': { + 'type': 'STRUCT', + 'structTypes': [ + {'name': 'bar', 'type': {'type': 'INT64'}}, + {'name': 'baz', 'type': {'type': 'STRING'}}, + ], + }, + 'parameterValue': { + 'structValues': { + 'bar': {'value': 123}, + 'baz': {'value': 'abc'}, + }, + }, + } + klass = self._get_target_class() + param = klass.from_api_repr(RESOURCE) + self.assertEqual(param.name, None) + self.assertEqual(param.struct_types, {'bar': 'INT64', 'baz': 'STRING'}) + self.assertEqual(param.struct_values, {'bar': 123, 'baz': 'abc'}) + + def test_from_api_repr_w_nested_array(self): + from google.cloud.bigquery.query import ArrayQueryParameter + + RESOURCE = { + 'name': 'foo', + 'parameterType': { + 'type': 'STRUCT', + 'structTypes': [ + {'name': 'bar', 'type': {'type': 'STRING'}}, + {'name': 'baz', 'type': { + 'type': 'ARRAY', + 'arrayType': {'type': 'INT64'}, + }}, + ], + }, + 'parameterValue': { + 'structValues': { + 'bar': {'value': 'abc'}, + 'baz': {'arrayValues': [ + {'value': '123'}, + {'value': '456'}, + ]}, + }, + }, + } + klass = self._get_target_class() + param = klass.from_api_repr(RESOURCE) + self.assertEqual( + param, + self._make_one( + 'foo', + _make_subparam('bar', 'STRING', 'abc'), + ArrayQueryParameter('baz', 'INT64', [123, 456]))) + + def test_from_api_repr_w_nested_struct(self): + RESOURCE = { + 'name': 'foo', + 'parameterType': { + 'type': 'STRUCT', + 'structTypes': [ + {'name': 'bar', 'type': {'type': 'STRING'}}, + {'name': 'baz', 'type': { + 'type': 'STRUCT', + 'structTypes': [ + {'name': 'qux', 'type': {'type': 'INT64'}}, + {'name': 'spam', 'type': {'type': 'BOOL'}}, + ], + }}, + ], + }, + 'parameterValue': { + 'structValues': { + 'bar': {'value': 'abc'}, + 'baz': {'structValues': { + 'qux': {'value': '123'}, + 'spam': {'value': 'true'}, + }}, + }, + }, + } + + klass = self._get_target_class() + param = klass.from_api_repr(RESOURCE) + + expected = self._make_one( + 'foo', + _make_subparam('bar', 'STRING', 'abc'), + self._make_one( + 'baz', + _make_subparam('qux', 'INT64', 123), + _make_subparam('spam', 'BOOL', True))) + self.assertEqual(param.name, 'foo') + self.assertEqual(param.struct_types, expected.struct_types) + self.assertEqual(param.struct_values, expected.struct_values) + + def test_to_api_repr_w_name(self): + EXPECTED = { + 'name': 'foo', + 'parameterType': { + 'type': 'STRUCT', + 'structTypes': [ + {'name': 'bar', 'type': {'type': 'INT64'}}, + {'name': 'baz', 'type': {'type': 'STRING'}}, + ], + }, + 'parameterValue': { + 'structValues': { + 'bar': {'value': '123'}, + 'baz': {'value': 'abc'}, + }, + }, + } + sub_1 = _make_subparam('bar', 'INT64', 123) + sub_2 = _make_subparam('baz', 'STRING', 'abc') + param = self._make_one('foo', sub_1, sub_2) + self.assertEqual(param.to_api_repr(), EXPECTED) + + def test_to_api_repr_wo_name(self): + EXPECTED = { + 'parameterType': { + 'type': 'STRUCT', + 'structTypes': [ + {'name': 'bar', 'type': {'type': 'INT64'}}, + {'name': 'baz', 'type': {'type': 'STRING'}}, + ], + }, + 'parameterValue': { + 'structValues': { + 'bar': {'value': '123'}, + 'baz': {'value': 'abc'}, + }, + }, + } + sub_1 = _make_subparam('bar', 'INT64', 123) + sub_2 = _make_subparam('baz', 'STRING', 'abc') + klass = self._get_target_class() + param = klass.positional(sub_1, sub_2) + self.assertEqual(param.to_api_repr(), EXPECTED) + + def test_to_api_repr_w_nested_array(self): + from google.cloud.bigquery.query import ArrayQueryParameter + + EXPECTED = { + 'name': 'foo', + 'parameterType': { + 'type': 'STRUCT', + 'structTypes': [ + {'name': 'bar', 'type': {'type': 'STRING'}}, + {'name': 'baz', 'type': { + 'type': 'ARRAY', + 'arrayType': {'type': 'INT64'}, + }}, + ], + }, + 'parameterValue': { + 'structValues': { + 'bar': {'value': 'abc'}, + 'baz': {'arrayValues': [ + {'value': '123'}, + {'value': '456'}, + ]}, + }, + }, + } + scalar = _make_subparam('bar', 'STRING', 'abc') + array = ArrayQueryParameter('baz', 'INT64', [123, 456]) + param = self._make_one('foo', scalar, array) + self.assertEqual(param.to_api_repr(), EXPECTED) + + def test_to_api_repr_w_nested_struct(self): + EXPECTED = { + 'name': 'foo', + 'parameterType': { + 'type': 'STRUCT', + 'structTypes': [ + {'name': 'bar', 'type': {'type': 'STRING'}}, + {'name': 'baz', 'type': { + 'type': 'STRUCT', + 'structTypes': [ + {'name': 'qux', 'type': {'type': 'INT64'}}, + {'name': 'spam', 'type': {'type': 'BOOL'}}, + ], + }}, + ], + }, + 'parameterValue': { + 'structValues': { + 'bar': {'value': 'abc'}, + 'baz': {'structValues': { + 'qux': {'value': '123'}, + 'spam': {'value': 'true'}, + }}, + }, + }, + } + scalar_1 = _make_subparam('bar', 'STRING', 'abc') + scalar_2 = _make_subparam('qux', 'INT64', 123) + scalar_3 = _make_subparam('spam', 'BOOL', True) + sub = self._make_one('baz', scalar_2, scalar_3) + param = self._make_one('foo', scalar_1, sub) + self.assertEqual(param.to_api_repr(), EXPECTED) + + def test___eq___wrong_type(self): + field = self._make_one( + 'test', _make_subparam('bar', 'STRING', 'abc')) + other = object() + self.assertNotEqual(field, other) + self.assertEqual(field, mock.ANY) + + def test___eq___name_mismatch(self): + field = self._make_one( + 'test', _make_subparam('bar', 'STRING', 'abc')) + other = self._make_one( + 'other ', _make_subparam('bar', 'STRING', 'abc')) + self.assertNotEqual(field, other) + + def test___eq___field_type_mismatch(self): + field = self._make_one( + 'test', _make_subparam('bar', 'STRING', None)) + other = self._make_one( + 'test', _make_subparam('bar', 'INT64', None)) + self.assertNotEqual(field, other) + + def test___eq___value_mismatch(self): + field = self._make_one( + 'test', _make_subparam('bar', 'STRING', 'hello')) + other = self._make_one( + 'test', _make_subparam('bar', 'STRING', 'world')) + self.assertNotEqual(field, other) + + def test___eq___hit(self): + field = self._make_one( + 'test', _make_subparam('bar', 'STRING', 'gotcha')) + other = self._make_one( + 'test', _make_subparam('bar', 'STRING', 'gotcha')) + self.assertEqual(field, other) + + def test___ne___wrong_type(self): + field = self._make_one( + 'test', _make_subparam('bar', 'STRING', 'hello')) + other = object() + self.assertNotEqual(field, other) + self.assertEqual(field, mock.ANY) + + def test___ne___same_value(self): + field1 = self._make_one( + 'test', _make_subparam('bar', 'STRING', 'hello')) + field2 = self._make_one( + 'test', _make_subparam('bar', 'STRING', 'hello')) + # unittest ``assertEqual`` uses ``==`` not ``!=``. + comparison_val = (field1 != field2) + self.assertFalse(comparison_val) + + def test___ne___different_values(self): + field1 = self._make_one( + 'test', _make_subparam('bar', 'STRING', 'hello')) + field2 = self._make_one( + 'test', _make_subparam('bar', 'STRING', 'world')) + self.assertNotEqual(field1, field2) + + def test___repr__(self): + field1 = self._make_one( + 'test', _make_subparam('field1', 'STRING', 'hello')) + got = repr(field1) + self.assertIn('StructQueryParameter', got) + self.assertIn("'field1', 'STRING'", got) + self.assertIn("'field1': 'hello'", got) + class TestQueryResults(unittest.TestCase): PROJECT = 'project' @@ -180,3 +1138,116 @@ def test_schema(self): } query._set_properties(resource) self._verifySchema(query, resource) + + +class Test__query_param_from_api_repr(unittest.TestCase): + + @staticmethod + def _call_fut(resource): + from google.cloud.bigquery.query import _query_param_from_api_repr + + return _query_param_from_api_repr(resource) + + def test_w_scalar(self): + from google.cloud.bigquery.query import ScalarQueryParameter + + RESOURCE = { + 'name': 'foo', + 'parameterType': {'type': 'INT64'}, + 'parameterValue': {'value': '123'}, + } + + parameter = self._call_fut(RESOURCE) + + self.assertIsInstance(parameter, ScalarQueryParameter) + self.assertEqual(parameter.name, 'foo') + self.assertEqual(parameter.type_, 'INT64') + self.assertEqual(parameter.value, 123) + + def test_w_scalar_timestamp(self): + from google.cloud._helpers import UTC + from google.cloud.bigquery.query import ScalarQueryParameter + + RESOURCE = { + 'name': 'zoned', + 'parameterType': {'type': 'TIMESTAMP'}, + 'parameterValue': {'value': '2012-03-04 05:06:07+00:00'}, + } + + parameter = self._call_fut(RESOURCE) + + self.assertIsInstance(parameter, ScalarQueryParameter) + self.assertEqual(parameter.name, 'zoned') + self.assertEqual(parameter.type_, 'TIMESTAMP') + self.assertEqual( + parameter.value, + datetime.datetime(2012, 3, 4, 5, 6, 7, tzinfo=UTC)) + + def test_w_scalar_timestamp_micros(self): + from google.cloud._helpers import UTC + from google.cloud.bigquery.query import ScalarQueryParameter + + RESOURCE = { + 'name': 'zoned', + 'parameterType': {'type': 'TIMESTAMP'}, + 'parameterValue': {'value': '2012-03-04 05:06:07.250000+00:00'}, + } + + parameter = self._call_fut(RESOURCE) + + self.assertIsInstance(parameter, ScalarQueryParameter) + self.assertEqual(parameter.name, 'zoned') + self.assertEqual(parameter.type_, 'TIMESTAMP') + self.assertEqual( + parameter.value, + datetime.datetime(2012, 3, 4, 5, 6, 7, 250000, tzinfo=UTC)) + + def test_w_array(self): + from google.cloud.bigquery.query import ArrayQueryParameter + + RESOURCE = { + 'name': 'foo', + 'parameterType': { + 'type': 'ARRAY', + 'arrayType': {'type': 'INT64'}, + }, + 'parameterValue': { + 'arrayValues': [ + {'value': '123'}, + ]}, + } + + parameter = self._call_fut(RESOURCE) + + self.assertIsInstance(parameter, ArrayQueryParameter) + self.assertEqual(parameter.name, 'foo') + self.assertEqual(parameter.array_type, 'INT64') + self.assertEqual(parameter.values, [123]) + + def test_w_struct(self): + from google.cloud.bigquery.query import StructQueryParameter + + RESOURCE = { + 'name': 'foo', + 'parameterType': { + 'type': 'STRUCT', + 'structTypes': [ + {'name': 'foo', 'type': {'type': 'STRING'}}, + {'name': 'bar', 'type': {'type': 'INT64'}}, + ], + }, + 'parameterValue': { + 'structValues': { + 'foo': {'value': 'Foo'}, + 'bar': {'value': '123'}, + } + }, + } + + parameter = self._call_fut(RESOURCE) + + self.assertIsInstance(parameter, StructQueryParameter) + self.assertEqual(parameter.name, 'foo') + self.assertEqual( + parameter.struct_types, {'foo': 'STRING', 'bar': 'INT64'}) + self.assertEqual(parameter.struct_values, {'foo': 'Foo', 'bar': 123}) From 8d9e8ba60dd26580b1c168f15a100d7a3ca6507c Mon Sep 17 00:00:00 2001 From: Alix Hamilton Date: Wed, 11 Oct 2017 12:24:17 -0700 Subject: [PATCH 0310/2016] BigQuery: Replace table.insert_data() with client.create_rows() (#4151) * replaces table.insert_data() with client.create_rows() * client.create_rows() accepts list of dicts as rows parameter * adds system test for rows given as list of dictionaries to create_rows() * adds test for create_rows() with list of Rows * removes unused test function * client.create_rows() accepts TableReference --- .../google/cloud/bigquery/client.py | 115 ++++++ .../google/cloud/bigquery/table.py | 172 ++------ .../google-cloud-bigquery/tests/system.py | 69 +++- .../tests/unit/test_client.py | 379 ++++++++++++++++++ .../tests/unit/test_table.py | 268 ++----------- 5 files changed, 606 insertions(+), 397 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py index a493e3dcd426..c55a36ec1994 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py @@ -34,6 +34,7 @@ from google.cloud.bigquery.dataset import DatasetReference from google.cloud.bigquery.table import Table, _TABLE_HAS_NO_SCHEMA from google.cloud.bigquery.table import TableReference +from google.cloud.bigquery.table import _row_from_mapping from google.cloud.bigquery.job import CopyJob from google.cloud.bigquery.job import ExtractJob from google.cloud.bigquery.job import LoadJob @@ -42,6 +43,7 @@ from google.cloud.bigquery._helpers import _item_to_row from google.cloud.bigquery._helpers import _rows_page_start from google.cloud.bigquery._helpers import _field_to_index_mapping +from google.cloud.bigquery._helpers import _SCALAR_VALUE_TO_JSON_ROW _DEFAULT_CHUNKSIZE = 1048576 # 1024 * 1024 B = 1 MB @@ -832,6 +834,119 @@ def query(self, query, job_config=None, job_id=None): job.begin() return job + def create_rows(self, table, rows, row_ids=None, selected_fields=None, + skip_invalid_rows=None, ignore_unknown_values=None, + template_suffix=None): + """API call: insert table data via a POST request + + See + https://cloud.google.com/bigquery/docs/reference/rest/v2/tabledata/insertAll + + :type table: One of: + :class:`~google.cloud.bigquery.table.Table` + :class:`~google.cloud.bigquery.table.TableReference` + :param table: the destination table for the row data, or a reference + to it. + + :type rows: One of: + list of tuples + list of dictionaries + :param rows: Row data to be inserted. If a list of tuples is given, + each tuple should contain data for each schema field on + the current table and in the same order as the schema + fields. If a list of dictionaries is given, the keys must + include all required fields in the schema. Keys which do + not correspond to a field in the schema are ignored. + + :type row_ids: list of string + :param row_ids: (Optional) Unique ids, one per row being inserted. + If not passed, no de-duplication occurs. + + :type selected_fields: list of :class:`SchemaField` + :param selected_fields: + The fields to return. Required if ``table`` is a + :class:`~google.cloud.bigquery.table.TableReference`. + + :type skip_invalid_rows: bool + :param skip_invalid_rows: (Optional) Insert all valid rows of a + request, even if invalid rows exist. + The default value is False, which causes + the entire request to fail if any invalid + rows exist. + + :type ignore_unknown_values: bool + :param ignore_unknown_values: (Optional) Accept rows that contain + values that do not match the schema. + The unknown values are ignored. Default + is False, which treats unknown values as + errors. + + :type template_suffix: str + :param template_suffix: + (Optional) treat ``name`` as a template table and provide a suffix. + BigQuery will create the table `` + `` based + on the schema of the template table. See + https://cloud.google.com/bigquery/streaming-data-into-bigquery#template-tables + + :rtype: list of mappings + :returns: One mapping per row with insert errors: the "index" key + identifies the row, and the "errors" key contains a list + of the mappings describing one or more problems with the + row. + :raises: ValueError if table's schema is not set + """ + if selected_fields is not None: + schema = selected_fields + elif isinstance(table, TableReference): + raise ValueError('need selected_fields with TableReference') + elif isinstance(table, Table): + if len(table._schema) == 0: + raise ValueError(_TABLE_HAS_NO_SCHEMA) + schema = table.schema + else: + raise TypeError('table should be Table or TableReference') + + rows_info = [] + data = {'rows': rows_info} + + for index, row in enumerate(rows): + if isinstance(row, dict): + row = _row_from_mapping(row, schema) + row_info = {} + + for field, value in zip(schema, row): + converter = _SCALAR_VALUE_TO_JSON_ROW.get(field.field_type) + if converter is not None: # STRING doesn't need converting + value = converter(value) + row_info[field.name] = value + + info = {'json': row_info} + if row_ids is not None: + info['insertId'] = row_ids[index] + + rows_info.append(info) + + if skip_invalid_rows is not None: + data['skipInvalidRows'] = skip_invalid_rows + + if ignore_unknown_values is not None: + data['ignoreUnknownValues'] = ignore_unknown_values + + if template_suffix is not None: + data['templateSuffix'] = template_suffix + + response = self._connection.api_request( + method='POST', + path='%s/insertAll' % table.path, + data=data) + errors = [] + + for error in response.get('insertErrors', ()): + errors.append({'index': int(error['index']), + 'errors': error['errors']}) + + return errors + def query_rows(self, query, job_config=None, job_id=None, timeout=None): """Start a query job and wait for the results. diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py index 9630f1495290..74146b49385d 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py @@ -23,7 +23,6 @@ from google.cloud._helpers import _datetime_from_microseconds from google.cloud._helpers import _millis_from_datetime from google.cloud.bigquery.schema import SchemaField -from google.cloud.bigquery._helpers import _SCALAR_VALUE_TO_JSON_ROW _TABLE_HAS_NO_SCHEMA = "Table has no schema: call 'client.get_table()'" @@ -554,21 +553,6 @@ def from_api_repr(cls, resource, client): table._set_properties(resource) return table - def _require_client(self, client): - """Check client or verify over-ride. - - :type client: :class:`~google.cloud.bigquery.client.Client` or - ``NoneType`` - :param client: the client to use. If not passed, falls back to the - ``client`` stored on the current dataset. - - :rtype: :class:`google.cloud.bigquery.client.Client` - :returns: The client passed in or the currently bound client. - """ - if client is None: - client = self._client - return client - def _set_properties(self, api_response): """Update properties from resource in body of ``api_response`` @@ -642,131 +626,37 @@ def _build_resource(self, filter_fields): resource[api_field] = getattr(self, f) return resource - def row_from_mapping(self, mapping): - """Convert a mapping to a row tuple using the schema. - - :type mapping: dict - :param mapping: Mapping of row data: must contain keys for all - required fields in the schema. Keys which do not correspond - to a field in the schema are ignored. - - :rtype: tuple - :returns: Tuple whose elements are ordered according to the table's - schema. - :raises: ValueError if table's schema is not set - """ - if len(self._schema) == 0: - raise ValueError(_TABLE_HAS_NO_SCHEMA) - - row = [] - for field in self.schema: - if field.mode == 'REQUIRED': - row.append(mapping[field.name]) - elif field.mode == 'REPEATED': - row.append(mapping.get(field.name, ())) - elif field.mode == 'NULLABLE': - row.append(mapping.get(field.name)) - else: - raise ValueError( - "Unknown field mode: {}".format(field.mode)) - return tuple(row) - - def insert_data(self, - rows, - row_ids=None, - skip_invalid_rows=None, - ignore_unknown_values=None, - template_suffix=None, - client=None): - """API call: insert table data via a POST request - - See - https://cloud.google.com/bigquery/docs/reference/rest/v2/tabledata/insertAll - - :type rows: list of tuples - :param rows: Row data to be inserted. Each tuple should contain data - for each schema field on the current table and in the - same order as the schema fields. - - :type row_ids: list of string - :param row_ids: Unique ids, one per row being inserted. If not - passed, no de-duplication occurs. - - :type skip_invalid_rows: bool - :param skip_invalid_rows: (Optional) Insert all valid rows of a - request, even if invalid rows exist. - The default value is False, which causes - the entire request to fail if any invalid - rows exist. - - :type ignore_unknown_values: bool - :param ignore_unknown_values: (Optional) Accept rows that contain - values that do not match the schema. - The unknown values are ignored. Default - is False, which treats unknown values as - errors. - - :type template_suffix: str - :param template_suffix: - (Optional) treat ``name`` as a template table and provide a suffix. - BigQuery will create the table `` + `` based - on the schema of the template table. See - https://cloud.google.com/bigquery/streaming-data-into-bigquery#template-tables - - :type client: :class:`~google.cloud.bigquery.client.Client` or - ``NoneType`` - :param client: the client to use. If not passed, falls back to the - ``client`` stored on the current dataset. - - :rtype: list of mappings - :returns: One mapping per row with insert errors: the "index" key - identifies the row, and the "errors" key contains a list - of the mappings describing one or more problems with the - row. - :raises: ValueError if table's schema is not set - """ - if len(self._schema) == 0: - raise ValueError(_TABLE_HAS_NO_SCHEMA) - - client = self._require_client(client) - rows_info = [] - data = {'rows': rows_info} - - for index, row in enumerate(rows): - row_info = {} - - for field, value in zip(self._schema, row): - converter = _SCALAR_VALUE_TO_JSON_ROW.get(field.field_type) - if converter is not None: # STRING doesn't need converting - value = converter(value) - row_info[field.name] = value - - info = {'json': row_info} - if row_ids is not None: - info['insertId'] = row_ids[index] - - rows_info.append(info) - - if skip_invalid_rows is not None: - data['skipInvalidRows'] = skip_invalid_rows - - if ignore_unknown_values is not None: - data['ignoreUnknownValues'] = ignore_unknown_values - - if template_suffix is not None: - data['templateSuffix'] = template_suffix - - response = client._connection.api_request( - method='POST', - path='%s/insertAll' % self.path, - data=data) - errors = [] - - for error in response.get('insertErrors', ()): - errors.append({'index': int(error['index']), - 'errors': error['errors']}) - - return errors + +def _row_from_mapping(mapping, schema): + """Convert a mapping to a row tuple using the schema. + + :type mapping: dict + :param mapping: Mapping of row data: must contain keys for all + required fields in the schema. Keys which do not correspond + to a field in the schema are ignored. + + :type schema: list of :class:`SchemaField` + :param schema: The schema of the table destination for the rows + + :rtype: tuple + :returns: Tuple whose elements are ordered according to the schema. + :raises: ValueError if schema is empty + """ + if len(schema) == 0: + raise ValueError(_TABLE_HAS_NO_SCHEMA) + + row = [] + for field in schema: + if field.mode == 'REQUIRED': + row.append(mapping[field.name]) + elif field.mode == 'REPEATED': + row.append(mapping.get(field.name, ())) + elif field.mode == 'NULLABLE': + row.append(mapping.get(field.name)) + else: + raise ValueError( + "Unknown field mode: {}".format(field.mode)) + return tuple(row) def _parse_schema_resource(info): diff --git a/packages/google-cloud-bigquery/tests/system.py b/packages/google-cloud-bigquery/tests/system.py index 7fa3ff758897..39db9a69c3fd 100644 --- a/packages/google-cloud-bigquery/tests/system.py +++ b/packages/google-cloud-bigquery/tests/system.py @@ -318,7 +318,7 @@ def _fetch_single_page(table, selected_fields=None): page = six.next(iterator.pages) return list(page) - def test_insert_data_then_dump_table(self): + def test_create_rows_then_dump_table(self): NOW_SECONDS = 1448911495.484366 NOW = datetime.datetime.utcfromtimestamp( NOW_SECONDS).replace(tzinfo=UTC) @@ -330,20 +330,21 @@ def test_insert_data_then_dump_table(self): ] ROW_IDS = range(len(ROWS)) - dataset = self.temp_dataset(_make_dataset_id('insert_data_then_dump')) - TABLE_NAME = 'test_table' - full_name = bigquery.SchemaField('full_name', 'STRING', - mode='REQUIRED') - age = bigquery.SchemaField('age', 'INTEGER', mode='REQUIRED') - now = bigquery.SchemaField('now', 'TIMESTAMP') - table_arg = Table(dataset.table(TABLE_NAME), - schema=[full_name, age, now], client=Config.CLIENT) + dataset = self.temp_dataset(_make_dataset_id('create_rows_then_dump')) + TABLE_ID = 'test_table' + schema = [ + bigquery.SchemaField('full_name', 'STRING', mode='REQUIRED'), + bigquery.SchemaField('age', 'INTEGER', mode='REQUIRED'), + bigquery.SchemaField('now', 'TIMESTAMP'), + ] + table_arg = Table(dataset.table(TABLE_ID), schema=schema, + client=Config.CLIENT) self.assertFalse(_table_exists(table_arg)) table = retry_403(Config.CLIENT.create_table)(table_arg) self.to_delete.insert(0, table) self.assertTrue(_table_exists(table)) - errors = table.insert_data(ROWS, ROW_IDS) + errors = Config.CLIENT.create_rows(table, ROWS, ROW_IDS) self.assertEqual(len(errors), 0) rows = () @@ -1278,7 +1279,7 @@ def test_query_future(self): row_tuples = [r.values() for r in iterator] self.assertEqual(row_tuples, [(1,)]) - def test_insert_nested_nested(self): + def test_create_rows_nested_nested(self): # See #2951 SF = bigquery.SchemaField schema = [ @@ -1299,21 +1300,57 @@ def test_insert_nested_nested(self): to_insert = [ ('Some value', record) ] - table_name = 'test_table' + table_id = 'test_table' dataset = self.temp_dataset(_make_dataset_id('issue_2951')) - table_arg = Table(dataset.table(table_name), schema=schema, + table_arg = Table(dataset.table(table_id), schema=schema, client=Config.CLIENT) table = retry_403(Config.CLIENT.create_table)(table_arg) self.to_delete.insert(0, table) - table.insert_data(to_insert) + Config.CLIENT.create_rows(table, to_insert) retry = RetryResult(_has_rows, max_tries=8) rows = retry(self._fetch_single_page)(table) row_tuples = [r.values() for r in rows] self.assertEqual(row_tuples, to_insert) - def test_create_table_insert_fetch_nested_schema(self): + def test_create_rows_nested_nested_dictionary(self): + # See #2951 + SF = bigquery.SchemaField + schema = [ + SF('string_col', 'STRING', mode='NULLABLE'), + SF('record_col', 'RECORD', mode='NULLABLE', fields=[ + SF('nested_string', 'STRING', mode='NULLABLE'), + SF('nested_repeated', 'INTEGER', mode='REPEATED'), + SF('nested_record', 'RECORD', mode='NULLABLE', fields=[ + SF('nested_nested_string', 'STRING', mode='NULLABLE'), + ]), + ]), + ] + record = { + 'nested_string': 'another string value', + 'nested_repeated': [0, 1, 2], + 'nested_record': {'nested_nested_string': 'some deep insight'}, + } + to_insert = [ + {'string_col': 'Some value', 'record_col': record} + ] + table_id = 'test_table' + dataset = self.temp_dataset(_make_dataset_id('issue_2951')) + table_arg = Table(dataset.table(table_id), schema=schema, + client=Config.CLIENT) + table = retry_403(Config.CLIENT.create_table)(table_arg) + self.to_delete.insert(0, table) + + Config.CLIENT.create_rows(table, to_insert) + + retry = RetryResult(_has_rows, max_tries=8) + rows = retry(self._fetch_single_page)(table) + row_tuples = [r.values() for r in rows] + expected_rows = [('Some value', record)] + self.assertEqual(row_tuples, expected_rows) + + def test_create_table_rows_fetch_nested_schema(self): table_name = 'test_table' dataset = self.temp_dataset( _make_dataset_id('create_table_nested_schema')) @@ -1334,7 +1371,7 @@ def test_create_table_insert_fetch_nested_schema(self): to_insert.append( tuple(mapping[field.name] for field in schema)) - errors = table.insert_data(to_insert) + errors = Config.CLIENT.create_rows(table, to_insert) self.assertEqual(len(errors), 0) retry = RetryResult(_has_rows, max_tries=8) diff --git a/packages/google-cloud-bigquery/tests/unit/test_client.py b/packages/google-cloud-bigquery/tests/unit/test_client.py index 9cdf7129c9c3..49030463f78b 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_client.py +++ b/packages/google-cloud-bigquery/tests/unit/test_client.py @@ -1917,6 +1917,385 @@ def test_query_w_query_parameters(self): 'parameterValue': {'value': '123'} }) + def test_create_rows_wo_schema(self): + from google.cloud.bigquery.dataset import DatasetReference + from google.cloud.bigquery.table import Table, _TABLE_HAS_NO_SCHEMA + + PROJECT = 'PROJECT' + DS_ID = 'DS_ID' + TABLE_ID = 'TABLE_ID' + creds = _make_credentials() + http = object() + client = self._make_one(project=PROJECT, credentials=creds, _http=http) + table_ref = DatasetReference(PROJECT, DS_ID).table(TABLE_ID) + table = Table(table_ref) + ROWS = [ + ('Phred Phlyntstone', 32), + ('Bharney Rhubble', 33), + ('Wylma Phlyntstone', 29), + ('Bhettye Rhubble', 27), + ] + + with self.assertRaises(ValueError) as exc: + client.create_rows(table, ROWS) + + self.assertEqual(exc.exception.args, (_TABLE_HAS_NO_SCHEMA,)) + + def test_create_rows_w_schema(self): + import datetime + from google.cloud._helpers import UTC + from google.cloud._helpers import _datetime_to_rfc3339 + from google.cloud._helpers import _microseconds_from_datetime + from google.cloud.bigquery.table import Table, SchemaField + from google.cloud.bigquery.dataset import DatasetReference + + PROJECT = 'PROJECT' + DS_ID = 'DS_ID' + TABLE_ID = 'TABLE_ID' + WHEN_TS = 1437767599.006 + WHEN = datetime.datetime.utcfromtimestamp(WHEN_TS).replace( + tzinfo=UTC) + PATH = 'projects/%s/datasets/%s/tables/%s/insertAll' % ( + PROJECT, DS_ID, TABLE_ID) + creds = _make_credentials() + http = object() + client = self._make_one(project=PROJECT, credentials=creds, _http=http) + conn = client._connection = _Connection({}) + table_ref = DatasetReference(PROJECT, DS_ID).table(TABLE_ID) + schema = [ + SchemaField('full_name', 'STRING', mode='REQUIRED'), + SchemaField('age', 'INTEGER', mode='REQUIRED'), + SchemaField('joined', 'TIMESTAMP', mode='NULLABLE'), + ] + table = Table(table_ref, schema=schema) + ROWS = [ + ('Phred Phlyntstone', 32, _datetime_to_rfc3339(WHEN)), + ('Bharney Rhubble', 33, WHEN + datetime.timedelta(seconds=1)), + ('Wylma Phlyntstone', 29, WHEN + datetime.timedelta(seconds=2)), + ('Bhettye Rhubble', 27, None), + ] + + def _row_data(row): + joined = row[2] + if isinstance(row[2], datetime.datetime): + joined = _microseconds_from_datetime(joined) * 1e-6 + return {'full_name': row[0], + 'age': str(row[1]), + 'joined': joined} + + SENT = { + 'rows': [{'json': _row_data(row)} for row in ROWS], + } + + errors = client.create_rows(table, ROWS) + + self.assertEqual(len(errors), 0) + self.assertEqual(len(conn._requested), 1) + req = conn._requested[0] + self.assertEqual(req['method'], 'POST') + self.assertEqual(req['path'], '/%s' % PATH) + self.assertEqual(req['data'], SENT) + + def test_create_rows_w_list_of_dictionaries(self): + import datetime + from google.cloud._helpers import UTC + from google.cloud._helpers import _datetime_to_rfc3339 + from google.cloud._helpers import _microseconds_from_datetime + from google.cloud.bigquery.table import Table, SchemaField + from google.cloud.bigquery.dataset import DatasetReference + + PROJECT = 'PROJECT' + DS_ID = 'DS_ID' + TABLE_ID = 'TABLE_ID' + WHEN_TS = 1437767599.006 + WHEN = datetime.datetime.utcfromtimestamp(WHEN_TS).replace( + tzinfo=UTC) + PATH = 'projects/%s/datasets/%s/tables/%s/insertAll' % ( + PROJECT, DS_ID, TABLE_ID) + creds = _make_credentials() + http = object() + client = self._make_one(project=PROJECT, credentials=creds, _http=http) + conn = client._connection = _Connection({}) + table_ref = DatasetReference(PROJECT, DS_ID).table(TABLE_ID) + schema = [ + SchemaField('full_name', 'STRING', mode='REQUIRED'), + SchemaField('age', 'INTEGER', mode='REQUIRED'), + SchemaField('joined', 'TIMESTAMP', mode='NULLABLE'), + ] + table = Table(table_ref, schema=schema) + ROWS = [ + { + 'full_name': 'Phred Phlyntstone', 'age': 32, + 'joined': _datetime_to_rfc3339(WHEN) + }, + { + 'full_name': 'Bharney Rhubble', 'age': 33, + 'joined': WHEN + datetime.timedelta(seconds=1) + }, + { + 'full_name': 'Wylma Phlyntstone', 'age': 29, + 'joined': WHEN + datetime.timedelta(seconds=2) + }, + { + 'full_name': 'Bhettye Rhubble', 'age': 27, 'joined': None + }, + ] + + def _row_data(row): + joined = row['joined'] + if isinstance(joined, datetime.datetime): + row['joined'] = _microseconds_from_datetime(joined) * 1e-6 + row['age'] = str(row['age']) + return row + + SENT = { + 'rows': [{'json': _row_data(row)} for row in ROWS], + } + + errors = client.create_rows(table, ROWS) + + self.assertEqual(len(errors), 0) + self.assertEqual(len(conn._requested), 1) + req = conn._requested[0] + self.assertEqual(req['method'], 'POST') + self.assertEqual(req['path'], '/%s' % PATH) + self.assertEqual(req['data'], SENT) + + def test_create_rows_w_list_of_Rows(self): + from google.cloud.bigquery._helpers import Row + from google.cloud.bigquery.table import Table, SchemaField + from google.cloud.bigquery.dataset import DatasetReference + + PROJECT = 'PROJECT' + DS_ID = 'DS_ID' + TABLE_ID = 'TABLE_ID' + PATH = 'projects/%s/datasets/%s/tables/%s/insertAll' % ( + PROJECT, DS_ID, TABLE_ID) + creds = _make_credentials() + http = object() + client = self._make_one(project=PROJECT, credentials=creds, _http=http) + conn = client._connection = _Connection({}) + table_ref = DatasetReference(PROJECT, DS_ID).table(TABLE_ID) + schema = [ + SchemaField('full_name', 'STRING', mode='REQUIRED'), + SchemaField('age', 'INTEGER', mode='REQUIRED'), + ] + table = Table(table_ref, schema=schema) + f2i = {'full_name': 0, 'age': 1} + ROWS = [ + Row(('Phred Phlyntstone', 32), f2i), + Row(('Bharney Rhubble', 33), f2i), + Row(('Wylma Phlyntstone', 29), f2i), + Row(('Bhettye Rhubble', 27), f2i), + ] + + def _row_data(row): + return {'full_name': row[0], 'age': str(row[1])} + + SENT = { + 'rows': [{'json': _row_data(row)} for row in ROWS], + } + + errors = client.create_rows(table, ROWS) + + self.assertEqual(len(errors), 0) + self.assertEqual(len(conn._requested), 1) + req = conn._requested[0] + self.assertEqual(req['method'], 'POST') + self.assertEqual(req['path'], '/%s' % PATH) + self.assertEqual(req['data'], SENT) + + def test_create_rows_w_skip_invalid_and_ignore_unknown(self): + from google.cloud.bigquery.table import Table, SchemaField + from google.cloud.bigquery.dataset import DatasetReference + + PROJECT = 'PROJECT' + DS_ID = 'DS_ID' + TABLE_ID = 'TABLE_ID' + PATH = 'projects/%s/datasets/%s/tables/%s/insertAll' % ( + PROJECT, DS_ID, TABLE_ID) + RESPONSE = { + 'insertErrors': [ + {'index': 1, + 'errors': [ + {'reason': 'REASON', + 'location': 'LOCATION', + 'debugInfo': 'INFO', + 'message': 'MESSAGE'} + ]}, + ]} + creds = _make_credentials() + http = object() + client = self._make_one(project=PROJECT, credentials=creds, _http=http) + conn = client._connection = _Connection(RESPONSE) + schema = [ + SchemaField('full_name', 'STRING', mode='REQUIRED'), + SchemaField('age', 'INTEGER', mode='REQUIRED'), + SchemaField('voter', 'BOOLEAN', mode='NULLABLE'), + ] + table_ref = DatasetReference(PROJECT, DS_ID).table(TABLE_ID) + table = Table(table_ref, schema=schema) + ROWS = [ + ('Phred Phlyntstone', 32, True), + ('Bharney Rhubble', 33, False), + ('Wylma Phlyntstone', 29, True), + ('Bhettye Rhubble', 27, True), + ] + + def _row_data(row): + return { + 'full_name': row[0], + 'age': str(row[1]), + 'voter': row[2] and 'true' or 'false', + } + + SENT = { + 'skipInvalidRows': True, + 'ignoreUnknownValues': True, + 'templateSuffix': '20160303', + 'rows': [{'insertId': index, 'json': _row_data(row)} + for index, row in enumerate(ROWS)], + } + + errors = client.create_rows( + table, + ROWS, + row_ids=[index for index, _ in enumerate(ROWS)], + skip_invalid_rows=True, + ignore_unknown_values=True, + template_suffix='20160303', + ) + + self.assertEqual(len(errors), 1) + self.assertEqual(errors[0]['index'], 1) + self.assertEqual(len(errors[0]['errors']), 1) + self.assertEqual(errors[0]['errors'][0], + RESPONSE['insertErrors'][0]['errors'][0]) + self.assertEqual(len(conn._requested), 1) + req = conn._requested[0] + self.assertEqual(req['method'], 'POST') + self.assertEqual(req['path'], '/%s' % PATH) + self.assertEqual(req['data'], SENT) + + def test_create_rows_w_repeated_fields(self): + from google.cloud.bigquery.table import Table, SchemaField + from google.cloud.bigquery.dataset import DatasetReference + + PROJECT = 'PROJECT' + DS_ID = 'DS_ID' + TABLE_ID = 'TABLE_ID' + PATH = 'projects/%s/datasets/%s/tables/%s/insertAll' % ( + PROJECT, DS_ID, TABLE_ID) + creds = _make_credentials() + http = object() + client = self._make_one(project=PROJECT, credentials=creds, _http=http) + conn = client._connection = _Connection({}) + table_ref = DatasetReference(PROJECT, DS_ID).table(TABLE_ID) + full_name = SchemaField('color', 'STRING', mode='REPEATED') + index = SchemaField('index', 'INTEGER', 'REPEATED') + score = SchemaField('score', 'FLOAT', 'REPEATED') + struct = SchemaField('struct', 'RECORD', mode='REPEATED', + fields=[index, score]) + table = Table(table_ref, schema=[full_name, struct]) + ROWS = [ + (['red', 'green'], [{'index': [1, 2], 'score': [3.1415, 1.414]}]), + ] + + def _row_data(row): + return {'color': row[0], + 'struct': row[1]} + + SENT = { + 'rows': [{'json': _row_data(row)} for row in ROWS], + } + + errors = client.create_rows(table, ROWS) + + self.assertEqual(len(errors), 0) + self.assertEqual(len(conn._requested), 1) + req = conn._requested[0] + self.assertEqual(req['method'], 'POST') + self.assertEqual(req['path'], '/%s' % PATH) + self.assertEqual(req['data'], SENT) + + def test_create_rows_w_record_schema(self): + from google.cloud.bigquery.table import SchemaField + from google.cloud.bigquery.dataset import DatasetReference + + PROJECT = 'PROJECT' + DS_ID = 'DS_ID' + TABLE_ID = 'TABLE_ID' + PATH = 'projects/%s/datasets/%s/tables/%s/insertAll' % ( + PROJECT, DS_ID, TABLE_ID) + creds = _make_credentials() + http = object() + client = self._make_one(project=PROJECT, credentials=creds, _http=http) + conn = client._connection = _Connection({}) + table_ref = DatasetReference(PROJECT, DS_ID).table(TABLE_ID) + full_name = SchemaField('full_name', 'STRING', mode='REQUIRED') + area_code = SchemaField('area_code', 'STRING', 'REQUIRED') + local_number = SchemaField('local_number', 'STRING', 'REQUIRED') + rank = SchemaField('rank', 'INTEGER', 'REQUIRED') + phone = SchemaField('phone', 'RECORD', mode='NULLABLE', + fields=[area_code, local_number, rank]) + ROWS = [ + ('Phred Phlyntstone', {'area_code': '800', + 'local_number': '555-1212', + 'rank': 1}), + ('Bharney Rhubble', {'area_code': '877', + 'local_number': '768-5309', + 'rank': 2}), + ('Wylma Phlyntstone', None), + ] + + def _row_data(row): + return {'full_name': row[0], + 'phone': row[1]} + + SENT = { + 'rows': [{'json': _row_data(row)} for row in ROWS], + } + + errors = client.create_rows(table_ref, ROWS, + selected_fields=[full_name, phone]) + + self.assertEqual(len(errors), 0) + self.assertEqual(len(conn._requested), 1) + req = conn._requested[0] + self.assertEqual(req['method'], 'POST') + self.assertEqual(req['path'], '/%s' % PATH) + self.assertEqual(req['data'], SENT) + + def test_create_rows_errors(self): + from google.cloud.bigquery.dataset import DatasetReference + from google.cloud.bigquery.table import Table + + PROJECT = 'PROJECT' + DS_ID = 'DS_ID' + TABLE_ID = 'TABLE_ID' + ROWS = [ + ('Phred Phlyntstone', 32, True), + ('Bharney Rhubble', 33, False), + ('Wylma Phlyntstone', 29, True), + ('Bhettye Rhubble', 27, True), + ] + creds = _make_credentials() + http = object() + client = self._make_one(project=PROJECT, credentials=creds, _http=http) + table_ref = DatasetReference(PROJECT, DS_ID).table(TABLE_ID) + + # table ref with no selected fields + with self.assertRaises(ValueError): + client.create_rows(table_ref, ROWS) + + # table with no schema + with self.assertRaises(ValueError): + client.create_rows(Table(table_ref), ROWS) + + # neither Table nor tableReference + with self.assertRaises(TypeError): + client.create_rows(1, ROWS) + def test_query_rows_defaults(self): from google.api.core.page_iterator import HTTPIterator from google.cloud.bigquery._helpers import Row diff --git a/packages/google-cloud-bigquery/tests/unit/test_table.py b/packages/google-cloud-bigquery/tests/unit/test_table.py index f9fe1ddab2d1..73c1c9aab894 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_table.py +++ b/packages/google-cloud-bigquery/tests/unit/test_table.py @@ -694,21 +694,33 @@ def test_partition_expiration_w_none_no_partition_set(self): self.assertIsNone(table.partitioning_type) self.assertIsNone(table.partition_expiration) - def test_row_from_mapping_wo_schema(self): - from google.cloud.bigquery.table import _TABLE_HAS_NO_SCHEMA + +class Test_row_from_mapping(unittest.TestCase, _SchemaBase): + + PROJECT = 'prahj-ekt' + DS_ID = 'dataset-name' + TABLE_NAME = 'table-name' + + def _call_fut(self, mapping, schema): + from google.cloud.bigquery.table import _row_from_mapping + + return _row_from_mapping(mapping, schema) + + def test__row_from_mapping_wo_schema(self): + from google.cloud.bigquery.table import Table, _TABLE_HAS_NO_SCHEMA MAPPING = {'full_name': 'Phred Phlyntstone', 'age': 32} client = _Client(project=self.PROJECT) dataset = DatasetReference(self.PROJECT, self.DS_ID) table_ref = dataset.table(self.TABLE_NAME) - table = self._make_one(table_ref, client=client) + table = Table(table_ref, client=client) with self.assertRaises(ValueError) as exc: - table.row_from_mapping(MAPPING) + self._call_fut(MAPPING, table.schema) self.assertEqual(exc.exception.args, (_TABLE_HAS_NO_SCHEMA,)) - def test_row_from_mapping_w_invalid_schema(self): - from google.cloud.bigquery.table import SchemaField + def test__row_from_mapping_w_invalid_schema(self): + from google.cloud.bigquery.table import Table, SchemaField MAPPING = { 'full_name': 'Phred Phlyntstone', 'age': 32, @@ -722,17 +734,17 @@ def test_row_from_mapping_w_invalid_schema(self): age = SchemaField('age', 'INTEGER', mode='REQUIRED') colors = SchemaField('colors', 'DATETIME', mode='REPEATED') bogus = SchemaField('joined', 'STRING', mode='BOGUS') - table = self._make_one(table_ref, - schema=[full_name, age, colors, bogus], - client=client) + table = Table(table_ref, + schema=[full_name, age, colors, bogus], + client=client) with self.assertRaises(ValueError) as exc: - table.row_from_mapping(MAPPING) + self._call_fut(MAPPING, table.schema) self.assertIn('Unknown field mode: BOGUS', str(exc.exception)) - def test_row_from_mapping_w_schema(self): - from google.cloud.bigquery.table import SchemaField + def test__row_from_mapping_w_schema(self): + from google.cloud.bigquery.table import Table, SchemaField MAPPING = { 'full_name': 'Phred Phlyntstone', 'age': 32, @@ -746,233 +758,14 @@ def test_row_from_mapping_w_schema(self): age = SchemaField('age', 'INTEGER', mode='REQUIRED') colors = SchemaField('colors', 'DATETIME', mode='REPEATED') joined = SchemaField('joined', 'STRING', mode='NULLABLE') - table = self._make_one(table_ref, - schema=[full_name, age, colors, joined], - client=client) + table = Table(table_ref, + schema=[full_name, age, colors, joined], + client=client) self.assertEqual( - table.row_from_mapping(MAPPING), + self._call_fut(MAPPING, table.schema), ('Phred Phlyntstone', 32, ['red', 'green'], None)) - def test_insert_data_wo_schema(self): - from google.cloud.bigquery.table import _TABLE_HAS_NO_SCHEMA - - client = _Client(project=self.PROJECT) - dataset = DatasetReference(self.PROJECT, self.DS_ID) - table_ref = dataset.table(self.TABLE_NAME) - table = self._make_one(table_ref, client=client) - ROWS = [ - ('Phred Phlyntstone', 32), - ('Bharney Rhubble', 33), - ('Wylma Phlyntstone', 29), - ('Bhettye Rhubble', 27), - ] - - with self.assertRaises(ValueError) as exc: - table.insert_data(ROWS) - - self.assertEqual(exc.exception.args, (_TABLE_HAS_NO_SCHEMA,)) - - def test_insert_data_w_bound_client(self): - import datetime - from google.cloud._helpers import UTC - from google.cloud._helpers import _datetime_to_rfc3339 - from google.cloud._helpers import _microseconds_from_datetime - from google.cloud.bigquery.table import SchemaField - - WHEN_TS = 1437767599.006 - WHEN = datetime.datetime.utcfromtimestamp(WHEN_TS).replace( - tzinfo=UTC) - PATH = 'projects/%s/datasets/%s/tables/%s/insertAll' % ( - self.PROJECT, self.DS_ID, self.TABLE_NAME) - conn = _Connection({}) - client = _Client(project=self.PROJECT, connection=conn) - dataset = DatasetReference(self.PROJECT, self.DS_ID) - table_ref = dataset.table(self.TABLE_NAME) - full_name = SchemaField('full_name', 'STRING', mode='REQUIRED') - age = SchemaField('age', 'INTEGER', mode='REQUIRED') - joined = SchemaField('joined', 'TIMESTAMP', mode='NULLABLE') - table = self._make_one(table_ref, schema=[full_name, age, joined], - client=client) - ROWS = [ - ('Phred Phlyntstone', 32, _datetime_to_rfc3339(WHEN)), - ('Bharney Rhubble', 33, WHEN + datetime.timedelta(seconds=1)), - ('Wylma Phlyntstone', 29, WHEN + datetime.timedelta(seconds=2)), - ('Bhettye Rhubble', 27, None), - ] - - def _row_data(row): - joined = row[2] - if isinstance(row[2], datetime.datetime): - joined = _microseconds_from_datetime(joined) * 1e-6 - return {'full_name': row[0], - 'age': str(row[1]), - 'joined': joined} - - SENT = { - 'rows': [{'json': _row_data(row)} for row in ROWS], - } - - errors = table.insert_data(ROWS) - - self.assertEqual(len(errors), 0) - self.assertEqual(len(conn._requested), 1) - req = conn._requested[0] - self.assertEqual(req['method'], 'POST') - self.assertEqual(req['path'], '/%s' % PATH) - self.assertEqual(req['data'], SENT) - - def test_insert_data_w_alternate_client(self): - from google.cloud.bigquery.table import SchemaField - - PATH = 'projects/%s/datasets/%s/tables/%s/insertAll' % ( - self.PROJECT, self.DS_ID, self.TABLE_NAME) - RESPONSE = { - 'insertErrors': [ - {'index': 1, - 'errors': [ - {'reason': 'REASON', - 'location': 'LOCATION', - 'debugInfo': 'INFO', - 'message': 'MESSAGE'} - ]}, - ]} - conn1 = _Connection() - client1 = _Client(project=self.PROJECT, connection=conn1) - conn2 = _Connection(RESPONSE) - client2 = _Client(project=self.PROJECT, connection=conn2) - dataset = DatasetReference(self.PROJECT, self.DS_ID) - table_ref = dataset.table(self.TABLE_NAME) - full_name = SchemaField('full_name', 'STRING', mode='REQUIRED') - age = SchemaField('age', 'INTEGER', mode='REQUIRED') - voter = SchemaField('voter', 'BOOLEAN', mode='NULLABLE') - table = self._make_one(table_ref, schema=[full_name, age, voter], - client=client1) - ROWS = [ - ('Phred Phlyntstone', 32, True), - ('Bharney Rhubble', 33, False), - ('Wylma Phlyntstone', 29, True), - ('Bhettye Rhubble', 27, True), - ] - - def _row_data(row): - return { - 'full_name': row[0], - 'age': str(row[1]), - 'voter': row[2] and 'true' or 'false', - } - - SENT = { - 'skipInvalidRows': True, - 'ignoreUnknownValues': True, - 'templateSuffix': '20160303', - 'rows': [{'insertId': index, 'json': _row_data(row)} - for index, row in enumerate(ROWS)], - } - - errors = table.insert_data( - client=client2, - rows=ROWS, - row_ids=[index for index, _ in enumerate(ROWS)], - skip_invalid_rows=True, - ignore_unknown_values=True, - template_suffix='20160303', - ) - - self.assertEqual(len(errors), 1) - self.assertEqual(errors[0]['index'], 1) - self.assertEqual(len(errors[0]['errors']), 1) - self.assertEqual(errors[0]['errors'][0], - RESPONSE['insertErrors'][0]['errors'][0]) - - self.assertEqual(len(conn1._requested), 0) - self.assertEqual(len(conn2._requested), 1) - req = conn2._requested[0] - self.assertEqual(req['method'], 'POST') - self.assertEqual(req['path'], '/%s' % PATH) - self.assertEqual(req['data'], SENT) - - def test_insert_data_w_repeated_fields(self): - from google.cloud.bigquery.table import SchemaField - - PATH = 'projects/%s/datasets/%s/tables/%s/insertAll' % ( - self.PROJECT, self.DS_ID, self.TABLE_NAME) - conn = _Connection({}) - client = _Client(project=self.PROJECT, connection=conn) - dataset = DatasetReference(self.PROJECT, self.DS_ID) - table_ref = dataset.table(self.TABLE_NAME) - full_name = SchemaField('color', 'STRING', mode='REPEATED') - index = SchemaField('index', 'INTEGER', 'REPEATED') - score = SchemaField('score', 'FLOAT', 'REPEATED') - struct = SchemaField('struct', 'RECORD', mode='REPEATED', - fields=[index, score]) - table = self._make_one(table_ref, schema=[full_name, struct], - client=client) - ROWS = [ - (['red', 'green'], [{'index': [1, 2], 'score': [3.1415, 1.414]}]), - ] - - def _row_data(row): - return {'color': row[0], - 'struct': row[1]} - - SENT = { - 'rows': [{'json': _row_data(row)} for row in ROWS], - } - - errors = table.insert_data(ROWS) - - self.assertEqual(len(errors), 0) - self.assertEqual(len(conn._requested), 1) - req = conn._requested[0] - self.assertEqual(req['method'], 'POST') - self.assertEqual(req['path'], '/%s' % PATH) - self.assertEqual(req['data'], SENT) - - def test_insert_data_w_record_schema(self): - from google.cloud.bigquery.table import SchemaField - - PATH = 'projects/%s/datasets/%s/tables/%s/insertAll' % ( - self.PROJECT, self.DS_ID, self.TABLE_NAME) - conn = _Connection({}) - client = _Client(project=self.PROJECT, connection=conn) - dataset = DatasetReference(self.PROJECT, self.DS_ID) - table_ref = dataset.table(self.TABLE_NAME) - full_name = SchemaField('full_name', 'STRING', mode='REQUIRED') - area_code = SchemaField('area_code', 'STRING', 'REQUIRED') - local_number = SchemaField('local_number', 'STRING', 'REQUIRED') - rank = SchemaField('rank', 'INTEGER', 'REQUIRED') - phone = SchemaField('phone', 'RECORD', mode='NULLABLE', - fields=[area_code, local_number, rank]) - table = self._make_one(table_ref, schema=[full_name, phone], - client=client) - ROWS = [ - ('Phred Phlyntstone', {'area_code': '800', - 'local_number': '555-1212', - 'rank': 1}), - ('Bharney Rhubble', {'area_code': '877', - 'local_number': '768-5309', - 'rank': 2}), - ('Wylma Phlyntstone', None), - ] - - def _row_data(row): - return {'full_name': row[0], - 'phone': row[1]} - - SENT = { - 'rows': [{'json': _row_data(row)} for row in ROWS], - } - - errors = table.insert_data(ROWS) - - self.assertEqual(len(errors), 0) - self.assertEqual(len(conn._requested), 1) - req = conn._requested[0] - self.assertEqual(req['method'], 'POST') - self.assertEqual(req['path'], '/%s' % PATH) - self.assertEqual(req['data'], SENT) - class Test_parse_schema_resource(unittest.TestCase, _SchemaBase): @@ -1102,8 +895,3 @@ class _Connection(object): def __init__(self, *responses): self._responses = responses[:] self._requested = [] - - def api_request(self, **kw): - self._requested.append(kw) - response, self._responses = self._responses[0], self._responses[1:] - return response From 7dbc1cf9ef5fdfb4389b64b1a0b516cd45d180ca Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Wed, 11 Oct 2017 12:29:19 -0700 Subject: [PATCH 0311/2016] BQ: remove unused ConfigurationProperty classes. (#4157) --- .../google/cloud/bigquery/_helpers.py | 66 ----------- .../tests/unit/test__helpers.py | 112 ------------------ 2 files changed, 178 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py b/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py index ae7f1186fe40..0ee7a9c01c6a 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py @@ -480,72 +480,6 @@ class _EnumApiResourceProperty(_ApiResourceProperty): """ -class _ConfigurationProperty(object): - """Base property implementation. - - Values will be stored on a `_configuration` helper attribute of the - property's job instance. - - :type name: str - :param name: name of the property - """ - - def __init__(self, name): - self.name = name - self._backing_name = '_%s' % (self.name,) - - def __get__(self, instance, owner): - """Descriptor protocal: accesstor""" - if instance is None: - return self - return getattr(instance._configuration, self._backing_name) - - def _validate(self, value): - """Subclasses override to impose validation policy.""" - pass - - def __set__(self, instance, value): - """Descriptor protocal: mutator""" - self._validate(value) - setattr(instance._configuration, self._backing_name, value) - - def __delete__(self, instance): - """Descriptor protocal: deleter""" - delattr(instance._configuration, self._backing_name) - - -class _TypedProperty(_ConfigurationProperty): - """Property implementation: validates based on value type. - - :type name: str - :param name: name of the property - - :type property_type: type or sequence of types - :param property_type: type to be validated - """ - def __init__(self, name, property_type): - super(_TypedProperty, self).__init__(name) - self.property_type = property_type - - def _validate(self, value): - """Ensure that 'value' is of the appropriate type. - - :raises: ValueError on a type mismatch. - """ - if value is None: - return - if not isinstance(value, self.property_type): - raise ValueError('Required type: %s' % (self.property_type,)) - - -class _EnumProperty(_ConfigurationProperty): - """Pseudo-enumeration class. - - :type name: str - :param name: name of the property. - """ - - def _item_to_row(iterator, resource): """Convert a JSON row to the native object. diff --git a/packages/google-cloud-bigquery/tests/unit/test__helpers.py b/packages/google-cloud-bigquery/tests/unit/test__helpers.py index 6d2a43fffb11..f37d39a4f823 100644 --- a/packages/google-cloud-bigquery/tests/unit/test__helpers.py +++ b/packages/google-cloud-bigquery/tests/unit/test__helpers.py @@ -749,43 +749,6 @@ def test_w_datetime(self): self.assertEqual(self._call_fut(when), '12:13:41') -class Test_ConfigurationProperty(unittest.TestCase): - - @staticmethod - def _get_target_class(): - from google.cloud.bigquery._helpers import _ConfigurationProperty - - return _ConfigurationProperty - - def _make_one(self, *args, **kw): - return self._get_target_class()(*args, **kw) - - def test_it(self): - - class Configuration(object): - _attr = None - - class Wrapper(object): - attr = self._make_one('attr') - - def __init__(self): - self._configuration = Configuration() - - self.assertEqual(Wrapper.attr.name, 'attr') - - wrapper = Wrapper() - self.assertIsNone(wrapper.attr) - - value = object() - wrapper.attr = value - self.assertIs(wrapper.attr, value) - self.assertIs(wrapper._configuration._attr, value) - - del wrapper.attr - self.assertIsNone(wrapper.attr) - self.assertIsNone(wrapper._configuration._attr) - - class Test_TypedApiResourceProperty(unittest.TestCase): @staticmethod @@ -829,81 +792,6 @@ def __init__(self): wrapper._properties['back'] -class Test_TypedProperty(unittest.TestCase): - - @staticmethod - def _get_target_class(): - from google.cloud.bigquery._helpers import _TypedProperty - - return _TypedProperty - - def _make_one(self, *args, **kw): - return self._get_target_class()(*args, **kw) - - def test_it(self): - - class Configuration(object): - _attr = None - - class Wrapper(object): - attr = self._make_one('attr', int) - - def __init__(self): - self._configuration = Configuration() - - wrapper = Wrapper() - with self.assertRaises(ValueError): - wrapper.attr = 'BOGUS' - - wrapper.attr = 42 - self.assertEqual(wrapper.attr, 42) - self.assertEqual(wrapper._configuration._attr, 42) - - wrapper.attr = None - self.assertIsNone(wrapper.attr) - self.assertIsNone(wrapper._configuration._attr) - - wrapper.attr = 23 - self.assertEqual(wrapper.attr, 23) - self.assertEqual(wrapper._configuration._attr, 23) - - del wrapper.attr - self.assertIsNone(wrapper.attr) - self.assertIsNone(wrapper._configuration._attr) - - -class Test_EnumProperty(unittest.TestCase): - - @staticmethod - def _get_target_class(): - from google.cloud.bigquery._helpers import _EnumProperty - - return _EnumProperty - - def test_it(self): - - class Sub(self._get_target_class()): - pass - - class Configuration(object): - _attr = None - - class Wrapper(object): - attr = Sub('attr') - - def __init__(self): - self._configuration = Configuration() - - wrapper = Wrapper() - wrapper.attr = 'FOO' - self.assertEqual(wrapper.attr, 'FOO') - self.assertEqual(wrapper._configuration._attr, 'FOO') - - del wrapper.attr - self.assertIsNone(wrapper.attr) - self.assertIsNone(wrapper._configuration._attr) - - class Test_ListApiResourceProperty(unittest.TestCase): @staticmethod From 70b2153678d9da17c9fee8482d0bf320b7a907e9 Mon Sep 17 00:00:00 2001 From: Alix Hamilton Date: Wed, 11 Oct 2017 15:03:30 -0700 Subject: [PATCH 0312/2016] BigQuery: removes Client from Table class (#4159) --- .../google/cloud/bigquery/client.py | 8 +- .../google/cloud/bigquery/table.py | 7 +- .../google-cloud-bigquery/tests/system.py | 42 ++--- .../tests/unit/test_client.py | 14 +- .../tests/unit/test_table.py | 153 ++++-------------- 5 files changed, 64 insertions(+), 160 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py index c55a36ec1994..488b409ff77c 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py @@ -239,7 +239,7 @@ def create_table(self, table): del resource[field] api_response = self._connection.api_request( method='POST', path=path, data=resource) - return Table.from_api_repr(api_response, self) + return Table.from_api_repr(api_response) def get_dataset(self, dataset_ref): """Fetch the dataset referenced by ``dataset_ref`` @@ -267,7 +267,7 @@ def get_table(self, table_ref): """ api_response = self._connection.api_request( method='GET', path=table_ref.path) - return Table.from_api_repr(api_response, self) + return Table.from_api_repr(api_response) def update_dataset(self, dataset, fields): """Change some fields of a dataset. @@ -331,7 +331,7 @@ def update_table(self, table, properties): headers = None api_response = self._connection.api_request( method='PATCH', path=table.path, data=partial, headers=headers) - return Table.from_api_repr(api_response, client=self) + return Table.from_api_repr(api_response) def list_dataset_tables(self, dataset, max_results=None, page_token=None): """List tables in the dataset. @@ -1140,7 +1140,7 @@ def _item_to_table(iterator, resource): :rtype: :class:`~google.cloud.bigquery.table.Table` :returns: The next table in the page. """ - return Table.from_api_repr(resource, iterator.client) + return Table.from_api_repr(resource) def _make_job_id(job_id): diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py index 74146b49385d..9df62dcf66d0 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py @@ -162,14 +162,13 @@ class Table(object): 'partitioning_type', 'view_use_legacy_sql', 'view_query', 'schema' ] - def __init__(self, table_ref, schema=(), client=None): + def __init__(self, table_ref, schema=()): self._project = table_ref.project self._table_id = table_ref.table_id self._dataset_id = table_ref.dataset_id self._properties = {} # Let the @property do validation. self.schema = schema - self._client = client @property def project(self): @@ -526,7 +525,7 @@ def view_use_legacy_sql(self, value): self._properties['view']['useLegacySql'] = value @classmethod - def from_api_repr(cls, resource, client): + def from_api_repr(cls, resource): """Factory: construct a table given its API representation :type resource: dict @@ -549,7 +548,7 @@ def from_api_repr(cls, resource, client): dataset_id = resource['tableReference']['datasetId'] dataset_ref = dataset.DatasetReference(project_id, dataset_id) - table = cls(dataset_ref.table(table_id), client=client) + table = cls(dataset_ref.table(table_id)) table._set_properties(resource) return table diff --git a/packages/google-cloud-bigquery/tests/system.py b/packages/google-cloud-bigquery/tests/system.py index 39db9a69c3fd..61397d3d80cf 100644 --- a/packages/google-cloud-bigquery/tests/system.py +++ b/packages/google-cloud-bigquery/tests/system.py @@ -190,8 +190,7 @@ def test_create_table(self): full_name = bigquery.SchemaField('full_name', 'STRING', mode='REQUIRED') age = bigquery.SchemaField('age', 'INTEGER', mode='REQUIRED') - table_arg = Table(dataset.table(table_id), schema=[full_name, age], - client=Config.CLIENT) + table_arg = Table(dataset.table(table_id), schema=[full_name, age]) self.assertFalse(_table_exists(table_arg)) table = retry_403(Config.CLIENT.create_table)(table_arg) @@ -234,9 +233,7 @@ def test_list_dataset_tables(self): mode='REQUIRED') age = bigquery.SchemaField('age', 'INTEGER', mode='REQUIRED') for table_name in tables_to_create: - table = Table(dataset.table(table_name), - schema=[full_name, age], - client=Config.CLIENT) + table = Table(dataset.table(table_name), schema=[full_name, age]) created_table = retry_403(Config.CLIENT.create_table)(table) self.to_delete.insert(0, created_table) @@ -257,8 +254,7 @@ def test_update_table(self): bigquery.SchemaField('full_name', 'STRING', mode='REQUIRED'), bigquery.SchemaField('age', 'INTEGER', mode='REQUIRED') ] - table_arg = Table(dataset.table(TABLE_NAME), schema=schema, - client=Config.CLIENT) + table_arg = Table(dataset.table(TABLE_NAME), schema=schema) self.assertFalse(_table_exists(table_arg)) table = retry_403(Config.CLIENT.create_table)(table_arg) self.to_delete.insert(0, table) @@ -292,8 +288,7 @@ def test_update_table_schema(self): bigquery.SchemaField('full_name', 'STRING', mode='REQUIRED'), bigquery.SchemaField('age', 'INTEGER', mode='REQUIRED') ] - table_arg = Table(dataset.table(TABLE_NAME), schema=schema, - client=Config.CLIENT) + table_arg = Table(dataset.table(TABLE_NAME), schema=schema) self.assertFalse(_table_exists(table_arg)) table = retry_403(Config.CLIENT.create_table)(table_arg) self.to_delete.insert(0, table) @@ -337,8 +332,7 @@ def test_create_rows_then_dump_table(self): bigquery.SchemaField('age', 'INTEGER', mode='REQUIRED'), bigquery.SchemaField('now', 'TIMESTAMP'), ] - table_arg = Table(dataset.table(TABLE_ID), schema=schema, - client=Config.CLIENT) + table_arg = Table(dataset.table(TABLE_ID), schema=schema) self.assertFalse(_table_exists(table_arg)) table = retry_403(Config.CLIENT.create_table)(table_arg) self.to_delete.insert(0, table) @@ -375,8 +369,7 @@ def test_load_table_from_local_file_then_dump_table(self): mode='REQUIRED') age = bigquery.SchemaField('age', 'INTEGER', mode='REQUIRED') table_ref = dataset.table(TABLE_NAME) - table_arg = Table(table_ref, schema=[full_name, age], - client=Config.CLIENT) + table_arg = Table(table_ref, schema=[full_name, age]) table = retry_403(Config.CLIENT.create_table)(table_arg) self.to_delete.insert(0, table) @@ -420,7 +413,7 @@ def test_load_table_from_local_avro_file_then_dump_table(self): dataset = self.temp_dataset(_make_dataset_id('load_local_then_dump')) table_ref = dataset.table(TABLE_NAME) - table = Table(table_ref, client=Config.CLIENT) + table = Table(table_ref) self.to_delete.insert(0, table) with open(os.path.join(WHERE, 'data', 'colors.avro'), 'rb') as avrof: @@ -482,8 +475,7 @@ def test_load_table_from_storage_then_dump_table(self): full_name = bigquery.SchemaField('full_name', 'STRING', mode='REQUIRED') age = bigquery.SchemaField('age', 'INTEGER', mode='REQUIRED') - table_arg = Table(dataset.table(TABLE_NAME), schema=[full_name, age], - client=Config.CLIENT) + table_arg = Table(dataset.table(TABLE_NAME), schema=[full_name, age]) table = retry_403(Config.CLIENT.create_table)(table_arg) self.to_delete.insert(0, table) @@ -616,7 +608,7 @@ def test_extract_table(self): dataset_id = _make_dataset_id('load_gcs_then_extract') table_id = 'test_table' table_ref = Config.CLIENT.dataset(dataset_id).table(table_id) - table = Table(table_ref, client=Config.CLIENT) + table = Table(table_ref) self.to_delete.insert(0, table) rows = [ ('Phred Phlyntstone', 32), @@ -648,7 +640,7 @@ def test_extract_table_w_job_config(self): dataset_id = _make_dataset_id('load_gcs_then_extract') table_id = 'test_table' table_ref = Config.CLIENT.dataset(dataset_id).table(table_id) - table = Table(table_ref, client=Config.CLIENT) + table = Table(table_ref) self.to_delete.insert(0, table) rows = [ ('Phred Phlyntstone', 32), @@ -705,8 +697,7 @@ def test_job_cancel(self): full_name = bigquery.SchemaField('full_name', 'STRING', mode='REQUIRED') age = bigquery.SchemaField('age', 'INTEGER', mode='REQUIRED') - table_arg = Table(dataset.table(TABLE_NAME), schema=[full_name, age], - client=Config.CLIENT) + table_arg = Table(dataset.table(TABLE_NAME), schema=[full_name, age]) table = retry_403(Config.CLIENT.create_table)(table_arg) self.to_delete.insert(0, table) @@ -899,7 +890,7 @@ def _load_table_for_dml(self, rows, dataset_id, table_id): greeting = bigquery.SchemaField( 'greeting', 'STRING', mode='NULLABLE') table_ref = dataset.table(table_id) - table_arg = Table(table_ref, schema=[greeting], client=Config.CLIENT) + table_arg = Table(table_ref, schema=[greeting]) table = retry_403(Config.CLIENT.create_table)(table_arg) self.to_delete.insert(0, table) @@ -1302,8 +1293,7 @@ def test_create_rows_nested_nested(self): ] table_id = 'test_table' dataset = self.temp_dataset(_make_dataset_id('issue_2951')) - table_arg = Table(dataset.table(table_id), schema=schema, - client=Config.CLIENT) + table_arg = Table(dataset.table(table_id), schema=schema) table = retry_403(Config.CLIENT.create_table)(table_arg) self.to_delete.insert(0, table) @@ -1337,8 +1327,7 @@ def test_create_rows_nested_nested_dictionary(self): ] table_id = 'test_table' dataset = self.temp_dataset(_make_dataset_id('issue_2951')) - table_arg = Table(dataset.table(table_id), schema=schema, - client=Config.CLIENT) + table_arg = Table(dataset.table(table_id), schema=schema) table = retry_403(Config.CLIENT.create_table)(table_arg) self.to_delete.insert(0, table) @@ -1355,8 +1344,7 @@ def test_create_table_rows_fetch_nested_schema(self): dataset = self.temp_dataset( _make_dataset_id('create_table_nested_schema')) schema = _load_json_schema() - table_arg = Table(dataset.table(table_name), schema=schema, - client=Config.CLIENT) + table_arg = Table(dataset.table(table_name), schema=schema) table = retry_403(Config.CLIENT.create_table)(table_arg) self.to_delete.insert(0, table) self.assertTrue(_table_exists(table)) diff --git a/packages/google-cloud-bigquery/tests/unit/test_client.py b/packages/google-cloud-bigquery/tests/unit/test_client.py index 49030463f78b..02277f4c095a 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_client.py +++ b/packages/google-cloud-bigquery/tests/unit/test_client.py @@ -421,7 +421,7 @@ def test_create_table_w_day_partition(self): } conn = client._connection = _Connection(resource) table_ref = client.dataset(dataset_id).table(table_id) - table = Table(table_ref, client=client) + table = Table(table_ref) table.partitioning_type = 'DAY' got = client.create_table(table) @@ -462,7 +462,7 @@ def test_create_table_w_day_partition_and_expire(self): } conn = client._connection = _Connection(resource) table_ref = client.dataset(dataset_id).table(table_id) - table = Table(table_ref, client=client) + table = Table(table_ref) table.partitioning_type = 'DAY' table.partition_expiration = 100 @@ -515,7 +515,7 @@ def test_create_table_w_schema_and_query(self): ] conn = client._connection = _Connection(resource) table_ref = client.dataset(dataset_id).table(table_id) - table = Table(table_ref, schema=schema, client=client) + table = Table(table_ref, schema=schema) table.view_query = query got = client.create_table(table) @@ -670,7 +670,7 @@ def test_update_table(self): client = self._make_one(project=project, credentials=creds) conn = client._connection = _Connection(resource, resource) table_ref = client.dataset(dataset_id).table(table_id) - table = Table(table_ref, schema=schema, client=client) + table = Table(table_ref, schema=schema) table.description = description table.friendly_name = title @@ -726,7 +726,7 @@ def test_update_table_only_use_legacy_sql(self): client = self._make_one(project=project, credentials=creds) conn = client._connection = _Connection(resource) table_ref = client.dataset(dataset_id).table(table_id) - table = Table(table_ref, client=client) + table = Table(table_ref) table.view_use_legacy_sql = True updated_table = client.update_table(table, ['view_use_legacy_sql']) @@ -784,7 +784,7 @@ def test_update_table_w_query(self): client = self._make_one(project=project, credentials=creds) conn = client._connection = _Connection(resource) table_ref = client.dataset(dataset_id).table(table_id) - table = Table(table_ref, schema=schema, client=client) + table = Table(table_ref, schema=schema) table.location = location table.expires = exp_time table.view_query = query @@ -900,7 +900,7 @@ def test_update_table_delete_property(self): client = self._make_one(project=project, credentials=creds) conn = client._connection = _Connection(resource1, resource2) table_ref = client.dataset(dataset_id).table(table_id) - table = Table(table_ref, client=client) + table = Table(table_ref) table.description = description table.friendly_name = title table2 = client.update_table(table, ['description', 'friendly_name']) diff --git a/packages/google-cloud-bigquery/tests/unit/test_table.py b/packages/google-cloud-bigquery/tests/unit/test_table.py index 73c1c9aab894..0d598864e3c1 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_table.py +++ b/packages/google-cloud-bigquery/tests/unit/test_table.py @@ -254,10 +254,9 @@ def _verifyResourceProperties(self, table, resource): self.assertEqual(table.schema, []) def test_ctor(self): - client = _Client(self.PROJECT) dataset = DatasetReference(self.PROJECT, self.DS_ID) table_ref = dataset.table(self.TABLE_NAME) - table = self._make_one(table_ref, client=client) + table = self._make_one(table_ref) self.assertEqual(table.table_id, self.TABLE_NAME) self.assertEqual(table.project, self.PROJECT) @@ -287,21 +286,18 @@ def test_ctor(self): def test_ctor_w_schema(self): from google.cloud.bigquery.table import SchemaField - client = _Client(self.PROJECT) dataset = DatasetReference(self.PROJECT, self.DS_ID) table_ref = dataset.table(self.TABLE_NAME) full_name = SchemaField('full_name', 'STRING', mode='REQUIRED') age = SchemaField('age', 'INTEGER', mode='REQUIRED') - table = self._make_one(table_ref, schema=[full_name, age], - client=client) + table = self._make_one(table_ref, schema=[full_name, age]) self.assertEqual(table.schema, [full_name, age]) def test_num_bytes_getter(self): - client = _Client(self.PROJECT) dataset = DatasetReference(self.PROJECT, self.DS_ID) table_ref = dataset.table(self.TABLE_NAME) - table = self._make_one(table_ref, client=client) + table = self._make_one(table_ref) # Check with no value set. self.assertIsNone(table.num_bytes) @@ -321,10 +317,9 @@ def test_num_bytes_getter(self): getattr(table, 'num_bytes') def test_num_rows_getter(self): - client = _Client(self.PROJECT) dataset = DatasetReference(self.PROJECT, self.DS_ID) table_ref = dataset.table(self.TABLE_NAME) - table = self._make_one(table_ref, client=client) + table = self._make_one(table_ref) # Check with no value set. self.assertIsNone(table.num_rows) @@ -344,20 +339,18 @@ def test_num_rows_getter(self): getattr(table, 'num_rows') def test_schema_setter_non_list(self): - client = _Client(self.PROJECT) dataset = DatasetReference(self.PROJECT, self.DS_ID) table_ref = dataset.table(self.TABLE_NAME) - table = self._make_one(table_ref, client=client) + table = self._make_one(table_ref) with self.assertRaises(TypeError): table.schema = object() def test_schema_setter_invalid_field(self): from google.cloud.bigquery.table import SchemaField - client = _Client(self.PROJECT) dataset = DatasetReference(self.PROJECT, self.DS_ID) table_ref = dataset.table(self.TABLE_NAME) - table = self._make_one(table_ref, client=client) + table = self._make_one(table_ref) full_name = SchemaField('full_name', 'STRING', mode='REQUIRED') with self.assertRaises(ValueError): table.schema = [full_name, object()] @@ -365,10 +358,9 @@ def test_schema_setter_invalid_field(self): def test_schema_setter(self): from google.cloud.bigquery.table import SchemaField - client = _Client(self.PROJECT) dataset = DatasetReference(self.PROJECT, self.DS_ID) table_ref = dataset.table(self.TABLE_NAME) - table = self._make_one(table_ref, client=client) + table = self._make_one(table_ref) full_name = SchemaField('full_name', 'STRING', mode='REQUIRED') age = SchemaField('age', 'INTEGER', mode='REQUIRED') table.schema = [full_name, age] @@ -385,10 +377,9 @@ def test_props_set_by_server(self): self.PROJECT, self.DS_ID, self.TABLE_NAME) URL = 'http://example.com/projects/%s/datasets/%s/tables/%s' % ( self.PROJECT, self.DS_ID, self.TABLE_NAME) - client = _Client(self.PROJECT) dataset = DatasetReference(self.PROJECT, self.DS_ID) table_ref = dataset.table(self.TABLE_NAME) - table = self._make_one(table_ref, client=client) + table = self._make_one(table_ref) table._properties['creationTime'] = _millis(CREATED) table._properties['etag'] = 'ETAG' table._properties['lastModifiedTime'] = _millis(MODIFIED) @@ -408,26 +399,23 @@ def test_props_set_by_server(self): self.assertEqual(table.table_type, 'TABLE') def test_description_setter_bad_value(self): - client = _Client(self.PROJECT) dataset = DatasetReference(self.PROJECT, self.DS_ID) table_ref = dataset.table(self.TABLE_NAME) - table = self._make_one(table_ref, client=client) + table = self._make_one(table_ref) with self.assertRaises(ValueError): table.description = 12345 def test_description_setter(self): - client = _Client(self.PROJECT) dataset = DatasetReference(self.PROJECT, self.DS_ID) table_ref = dataset.table(self.TABLE_NAME) - table = self._make_one(table_ref, client=client) + table = self._make_one(table_ref) table.description = 'DESCRIPTION' self.assertEqual(table.description, 'DESCRIPTION') def test_expires_setter_bad_value(self): - client = _Client(self.PROJECT) dataset = DatasetReference(self.PROJECT, self.DS_ID) table_ref = dataset.table(self.TABLE_NAME) - table = self._make_one(table_ref, client=client) + table = self._make_one(table_ref) with self.assertRaises(ValueError): table.expires = object() @@ -436,83 +424,73 @@ def test_expires_setter(self): from google.cloud._helpers import UTC WHEN = datetime.datetime(2015, 7, 28, 16, 39, tzinfo=UTC) - client = _Client(self.PROJECT) dataset = DatasetReference(self.PROJECT, self.DS_ID) table_ref = dataset.table(self.TABLE_NAME) - table = self._make_one(table_ref, client=client) + table = self._make_one(table_ref) table.expires = WHEN self.assertEqual(table.expires, WHEN) def test_friendly_name_setter_bad_value(self): - client = _Client(self.PROJECT) dataset = DatasetReference(self.PROJECT, self.DS_ID) table_ref = dataset.table(self.TABLE_NAME) - table = self._make_one(table_ref, client=client) + table = self._make_one(table_ref) with self.assertRaises(ValueError): table.friendly_name = 12345 def test_friendly_name_setter(self): - client = _Client(self.PROJECT) dataset = DatasetReference(self.PROJECT, self.DS_ID) table_ref = dataset.table(self.TABLE_NAME) - table = self._make_one(table_ref, client=client) + table = self._make_one(table_ref) table.friendly_name = 'FRIENDLY' self.assertEqual(table.friendly_name, 'FRIENDLY') def test_location_setter_bad_value(self): - client = _Client(self.PROJECT) dataset = DatasetReference(self.PROJECT, self.DS_ID) table_ref = dataset.table(self.TABLE_NAME) - table = self._make_one(table_ref, client=client) + table = self._make_one(table_ref) with self.assertRaises(ValueError): table.location = 12345 def test_location_setter(self): - client = _Client(self.PROJECT) dataset = DatasetReference(self.PROJECT, self.DS_ID) table_ref = dataset.table(self.TABLE_NAME) - table = self._make_one(table_ref, client=client) + table = self._make_one(table_ref) table.location = 'LOCATION' self.assertEqual(table.location, 'LOCATION') def test_view_query_setter_bad_value(self): - client = _Client(self.PROJECT) dataset = DatasetReference(self.PROJECT, self.DS_ID) table_ref = dataset.table(self.TABLE_NAME) - table = self._make_one(table_ref, client=client) + table = self._make_one(table_ref) with self.assertRaises(ValueError): table.view_query = 12345 def test_view_query_setter(self): - client = _Client(self.PROJECT) dataset = DatasetReference(self.PROJECT, self.DS_ID) table_ref = dataset.table(self.TABLE_NAME) - table = self._make_one(table_ref, client=client) + table = self._make_one(table_ref) table.view_query = 'select * from foo' self.assertEqual(table.view_query, 'select * from foo') def test_view_query_deleter(self): - client = _Client(self.PROJECT) dataset = DatasetReference(self.PROJECT, self.DS_ID) table_ref = dataset.table(self.TABLE_NAME) - table = self._make_one(table_ref, client=client) + table = self._make_one(table_ref) table.view_query = 'select * from foo' del table.view_query self.assertIsNone(table.view_query) def test_view_use_legacy_sql_setter_bad_value(self): - client = _Client(self.PROJECT) dataset = DatasetReference(self.PROJECT, self.DS_ID) table_ref = dataset.table(self.TABLE_NAME) - table = self._make_one(table_ref, client=client) + table = self._make_one(table_ref) with self.assertRaises(ValueError): table.view_use_legacy_sql = 12345 def test_view_use_legacy_sql_setter(self): - client = _Client(self.PROJECT) dataset = DatasetReference(self.PROJECT, self.DS_ID) table_ref = dataset.table(self.TABLE_NAME) - table = self._make_one(table_ref, client=client) + table = self._make_one(table_ref) table.view_use_legacy_sql = False table.view_query = 'select * from foo' self.assertEqual(table.view_use_legacy_sql, False) @@ -520,15 +498,13 @@ def test_view_use_legacy_sql_setter(self): def test_from_api_repr_missing_identity(self): self._setUpConstants() - client = _Client(self.PROJECT) RESOURCE = {} klass = self._get_target_class() with self.assertRaises(KeyError): - klass.from_api_repr(RESOURCE, client) + klass.from_api_repr(RESOURCE) def test_from_api_repr_bare(self): self._setUpConstants() - client = _Client(self.PROJECT) RESOURCE = { 'id': '%s:%s:%s' % (self.PROJECT, self.DS_ID, self.TABLE_NAME), 'tableReference': { @@ -539,9 +515,8 @@ def test_from_api_repr_bare(self): 'type': 'TABLE', } klass = self._get_target_class() - table = klass.from_api_repr(RESOURCE, client) + table = klass.from_api_repr(RESOURCE) self.assertEqual(table.table_id, self.TABLE_NAME) - self.assertIs(table._client, client) self._verifyResourceProperties(table, RESOURCE) def test_from_api_repr_w_properties(self): @@ -549,7 +524,6 @@ def test_from_api_repr_w_properties(self): from google.cloud._helpers import UTC from google.cloud._helpers import _millis - client = _Client(self.PROJECT) RESOURCE = self._makeResource() RESOURCE['view'] = {'query': 'select fullname, age from person_ages'} RESOURCE['type'] = 'VIEW' @@ -557,52 +531,39 @@ def test_from_api_repr_w_properties(self): self.EXP_TIME = datetime.datetime(2015, 8, 1, 23, 59, 59, tzinfo=UTC) RESOURCE['expirationTime'] = _millis(self.EXP_TIME) klass = self._get_target_class() - table = klass.from_api_repr(RESOURCE, client) - self.assertIs(table._client, client) + table = klass.from_api_repr(RESOURCE) self._verifyResourceProperties(table, RESOURCE) def test_partition_type_setter_bad_type(self): from google.cloud.bigquery.table import SchemaField - RESOURCE = self._makeResource() - conn = _Connection(RESOURCE) - client = _Client(project=self.PROJECT, connection=conn) dataset = DatasetReference(self.PROJECT, self.DS_ID) table_ref = dataset.table(self.TABLE_NAME) full_name = SchemaField('full_name', 'STRING', mode='REQUIRED') age = SchemaField('age', 'INTEGER', mode='REQUIRED') - table = self._make_one(table_ref, schema=[full_name, age], - client=client) + table = self._make_one(table_ref, schema=[full_name, age]) with self.assertRaises(ValueError): table.partitioning_type = 123 def test_partition_type_setter_unknown_value(self): from google.cloud.bigquery.table import SchemaField - RESOURCE = self._makeResource() - conn = _Connection(RESOURCE) - client = _Client(project=self.PROJECT, connection=conn) dataset = DatasetReference(self.PROJECT, self.DS_ID) table_ref = dataset.table(self.TABLE_NAME) full_name = SchemaField('full_name', 'STRING', mode='REQUIRED') age = SchemaField('age', 'INTEGER', mode='REQUIRED') - table = self._make_one(table_ref, schema=[full_name, age], - client=client) + table = self._make_one(table_ref, schema=[full_name, age]) with self.assertRaises(ValueError): table.partitioning_type = "HASH" def test_partition_type_setter_w_known_value(self): from google.cloud.bigquery.table import SchemaField - RESOURCE = self._makeResource() - conn = _Connection(RESOURCE) - client = _Client(project=self.PROJECT, connection=conn) dataset = DatasetReference(self.PROJECT, self.DS_ID) table_ref = dataset.table(self.TABLE_NAME) full_name = SchemaField('full_name', 'STRING', mode='REQUIRED') age = SchemaField('age', 'INTEGER', mode='REQUIRED') - table = self._make_one(table_ref, schema=[full_name, age], - client=client) + table = self._make_one(table_ref, schema=[full_name, age]) self.assertIsNone(table.partitioning_type) table.partitioning_type = 'DAY' self.assertEqual(table.partitioning_type, 'DAY') @@ -610,15 +571,11 @@ def test_partition_type_setter_w_known_value(self): def test_partition_type_setter_w_none(self): from google.cloud.bigquery.table import SchemaField - RESOURCE = self._makeResource() - conn = _Connection(RESOURCE) - client = _Client(project=self.PROJECT, connection=conn) dataset = DatasetReference(self.PROJECT, self.DS_ID) table_ref = dataset.table(self.TABLE_NAME) full_name = SchemaField('full_name', 'STRING', mode='REQUIRED') age = SchemaField('age', 'INTEGER', mode='REQUIRED') - table = self._make_one(table_ref, schema=[full_name, age], - client=client) + table = self._make_one(table_ref, schema=[full_name, age]) table._properties['timePartitioning'] = {'type': 'DAY'} table.partitioning_type = None self.assertIsNone(table.partitioning_type) @@ -627,30 +584,22 @@ def test_partition_type_setter_w_none(self): def test_partition_experation_bad_type(self): from google.cloud.bigquery.table import SchemaField - RESOURCE = self._makeResource() - conn = _Connection(RESOURCE) - client = _Client(project=self.PROJECT, connection=conn) dataset = DatasetReference(self.PROJECT, self.DS_ID) table_ref = dataset.table(self.TABLE_NAME) full_name = SchemaField('full_name', 'STRING', mode='REQUIRED') age = SchemaField('age', 'INTEGER', mode='REQUIRED') - table = self._make_one(table_ref, schema=[full_name, age], - client=client) + table = self._make_one(table_ref, schema=[full_name, age]) with self.assertRaises(ValueError): table.partition_expiration = "NEVER" def test_partition_expiration_w_integer(self): from google.cloud.bigquery.table import SchemaField - RESOURCE = self._makeResource() - conn = _Connection(RESOURCE) - client = _Client(project=self.PROJECT, connection=conn) dataset = DatasetReference(self.PROJECT, self.DS_ID) table_ref = dataset.table(self.TABLE_NAME) full_name = SchemaField('full_name', 'STRING', mode='REQUIRED') age = SchemaField('age', 'INTEGER', mode='REQUIRED') - table = self._make_one(table_ref, schema=[full_name, age], - client=client) + table = self._make_one(table_ref, schema=[full_name, age]) self.assertIsNone(table.partition_expiration) table.partition_expiration = 100 self.assertEqual(table.partitioning_type, "DAY") @@ -659,15 +608,11 @@ def test_partition_expiration_w_integer(self): def test_partition_expiration_w_none(self): from google.cloud.bigquery.table import SchemaField - RESOURCE = self._makeResource() - conn = _Connection(RESOURCE) - client = _Client(project=self.PROJECT, connection=conn) dataset = DatasetReference(self.PROJECT, self.DS_ID) table_ref = dataset.table(self.TABLE_NAME) full_name = SchemaField('full_name', 'STRING', mode='REQUIRED') age = SchemaField('age', 'INTEGER', mode='REQUIRED') - table = self._make_one(table_ref, schema=[full_name, age], - client=client) + table = self._make_one(table_ref, schema=[full_name, age]) self.assertIsNone(table.partition_expiration) table._properties['timePartitioning'] = { 'type': 'DAY', @@ -680,15 +625,11 @@ def test_partition_expiration_w_none(self): def test_partition_expiration_w_none_no_partition_set(self): from google.cloud.bigquery.table import SchemaField - RESOURCE = self._makeResource() - conn = _Connection(RESOURCE) - client = _Client(project=self.PROJECT, connection=conn) dataset = DatasetReference(self.PROJECT, self.DS_ID) table_ref = dataset.table(self.TABLE_NAME) full_name = SchemaField('full_name', 'STRING', mode='REQUIRED') age = SchemaField('age', 'INTEGER', mode='REQUIRED') - table = self._make_one(table_ref, schema=[full_name, age], - client=client) + table = self._make_one(table_ref, schema=[full_name, age]) self.assertIsNone(table.partition_expiration) table.partition_expiration = None self.assertIsNone(table.partitioning_type) @@ -709,10 +650,9 @@ def _call_fut(self, mapping, schema): def test__row_from_mapping_wo_schema(self): from google.cloud.bigquery.table import Table, _TABLE_HAS_NO_SCHEMA MAPPING = {'full_name': 'Phred Phlyntstone', 'age': 32} - client = _Client(project=self.PROJECT) dataset = DatasetReference(self.PROJECT, self.DS_ID) table_ref = dataset.table(self.TABLE_NAME) - table = Table(table_ref, client=client) + table = Table(table_ref) with self.assertRaises(ValueError) as exc: self._call_fut(MAPPING, table.schema) @@ -727,16 +667,13 @@ def test__row_from_mapping_w_invalid_schema(self): 'colors': ['red', 'green'], 'bogus': 'WHATEVER', } - client = _Client(project=self.PROJECT) dataset = DatasetReference(self.PROJECT, self.DS_ID) table_ref = dataset.table(self.TABLE_NAME) full_name = SchemaField('full_name', 'STRING', mode='REQUIRED') age = SchemaField('age', 'INTEGER', mode='REQUIRED') colors = SchemaField('colors', 'DATETIME', mode='REPEATED') bogus = SchemaField('joined', 'STRING', mode='BOGUS') - table = Table(table_ref, - schema=[full_name, age, colors, bogus], - client=client) + table = Table(table_ref, schema=[full_name, age, colors, bogus]) with self.assertRaises(ValueError) as exc: self._call_fut(MAPPING, table.schema) @@ -751,16 +688,13 @@ def test__row_from_mapping_w_schema(self): 'colors': ['red', 'green'], 'extra': 'IGNORED', } - client = _Client(project=self.PROJECT) dataset = DatasetReference(self.PROJECT, self.DS_ID) table_ref = dataset.table(self.TABLE_NAME) full_name = SchemaField('full_name', 'STRING', mode='REQUIRED') age = SchemaField('age', 'INTEGER', mode='REQUIRED') colors = SchemaField('colors', 'DATETIME', mode='REPEATED') joined = SchemaField('joined', 'STRING', mode='NULLABLE') - table = Table(table_ref, - schema=[full_name, age, colors, joined], - client=client) + table = Table(table_ref, schema=[full_name, age, colors, joined]) self.assertEqual( self._call_fut(MAPPING, table.schema), @@ -878,20 +812,3 @@ def test_w_subfields(self): {'name': 'number', 'type': 'STRING', 'mode': 'REQUIRED'}]}) - - -class _Client(object): - - def __init__(self, project='project', connection=None): - self.project = project - self._connection = connection - - -class _Connection(object): - - API_BASE_URL = 'http://example.com' - USER_AGENT = 'testing 1.2.3' - - def __init__(self, *responses): - self._responses = responses[:] - self._requested = [] From d1a0eb8c039be043a737cf0b780011cc3a03b912 Mon Sep 17 00:00:00 2001 From: Jonathan Amsterdam Date: Wed, 11 Oct 2017 20:07:22 -0400 Subject: [PATCH 0313/2016] bigquery: add streaming buffer info (#4161) Unfortunately there's no good way to write a system test for this, since you can never be sure that one gets created. But I informally verified that the code works by running create_rows a lot until I got a streaming buffer. --- .../google/cloud/bigquery/table.py | 23 +++++++++++++++++++ .../tests/unit/test_table.py | 16 +++++++++++++ 2 files changed, 39 insertions(+) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py index 9df62dcf66d0..990349336433 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py @@ -524,6 +524,12 @@ def view_use_legacy_sql(self, value): self._properties['view'] = {} self._properties['view']['useLegacySql'] = value + @property + def streaming_buffer(self): + sb = self._properties.get('streamingBuffer') + if sb is not None: + return StreamingBuffer(sb) + @classmethod def from_api_repr(cls, resource): """Factory: construct a table given its API representation @@ -658,6 +664,23 @@ def _row_from_mapping(mapping, schema): return tuple(row) +class StreamingBuffer(object): + """Information about a table's streaming buffer. + + See https://cloud.google.com/bigquery/streaming-data-into-bigquery. + + :type resource: dict + :param resource: streaming buffer representation returned from the API + """ + + def __init__(self, resource): + self.estimated_bytes = int(resource['estimatedBytes']) + self.estimated_rows = int(resource['estimatedRows']) + # time is in milliseconds since the epoch. + self.oldest_entry_time = _datetime_from_microseconds( + 1000.0 * int(resource['oldestEntryTime'])) + + def _parse_schema_resource(info): """Parse a resource fragment into a schema field. diff --git a/packages/google-cloud-bigquery/tests/unit/test_table.py b/packages/google-cloud-bigquery/tests/unit/test_table.py index 0d598864e3c1..12b2ec98c4d4 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_table.py +++ b/packages/google-cloud-bigquery/tests/unit/test_table.py @@ -174,6 +174,8 @@ def _setUpConstants(self): self.RESOURCE_URL = 'http://example.com/path/to/resource' self.NUM_BYTES = 12345 self.NUM_ROWS = 67 + self.NUM_EST_BYTES = 1234 + self.NUM_EST_ROWS = 23 def _makeResource(self): self._setUpConstants() @@ -194,6 +196,10 @@ def _makeResource(self): 'numRows': self.NUM_ROWS, 'numBytes': self.NUM_BYTES, 'type': 'TABLE', + 'streamingBuffer': { + 'estimatedRows': str(self.NUM_EST_ROWS), + 'estimatedBytes': str(self.NUM_EST_BYTES), + 'oldestEntryTime': self.WHEN_TS * 1000}, } def _verifyReadonlyResourceProperties(self, table, resource): @@ -222,6 +228,16 @@ def _verifyReadonlyResourceProperties(self, table, resource): else: self.assertIsNone(table.self_link) + if 'streamingBuffer' in resource: + self.assertEqual(table.streaming_buffer.estimated_rows, + self.NUM_EST_ROWS) + self.assertEqual(table.streaming_buffer.estimated_bytes, + self.NUM_EST_BYTES) + self.assertEqual(table.streaming_buffer.oldest_entry_time, + self.WHEN) + else: + self.assertIsNone(table.streaming_buffer) + self.assertEqual(table.full_table_id, self.TABLE_FULL_ID) self.assertEqual(table.table_type, 'TABLE' if 'view' not in resource else 'VIEW') From 48fa1f9113b3f0c8fe087a5e55974c53f287082f Mon Sep 17 00:00:00 2001 From: Jonathan Amsterdam Date: Thu, 12 Oct 2017 12:55:29 -0400 Subject: [PATCH 0314/2016] bigquery: factor out common values in test_client.py (#4162) --- .../tests/unit/test_client.py | 824 ++++++++---------- 1 file changed, 342 insertions(+), 482 deletions(-) diff --git a/packages/google-cloud-bigquery/tests/unit/test_client.py b/packages/google-cloud-bigquery/tests/unit/test_client.py index 02277f4c095a..22df27c6358c 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_client.py +++ b/packages/google-cloud-bigquery/tests/unit/test_client.py @@ -34,6 +34,11 @@ def _make_credentials(): class TestClient(unittest.TestCase): + PROJECT = 'PROJECT' + DS_ID = 'DATASET_ID' + TABLE_ID = 'TABLE_ID' + TABLE_REF = DatasetReference(PROJECT, DS_ID).table(TABLE_ID) + @staticmethod def _get_target_class(): from google.cloud.bigquery.client import Client @@ -46,10 +51,10 @@ def _make_one(self, *args, **kw): def test_ctor(self): from google.cloud.bigquery._http import Connection - PROJECT = 'PROJECT' creds = _make_credentials() http = object() - client = self._make_one(project=PROJECT, credentials=creds, _http=http) + client = self._make_one(project=self.PROJECT, credentials=creds, + _http=http) self.assertIsInstance(client._connection, Connection) self.assertIs(client._connection.credentials, creds) self.assertIs(client._connection.http, http) @@ -57,9 +62,8 @@ def test_ctor(self): def test__get_query_results_miss_w_explicit_project_and_timeout(self): from google.cloud.exceptions import NotFound - project = 'PROJECT' creds = _make_credentials() - client = self._make_one(project, creds) + client = self._make_one(self.PROJECT, creds) conn = client._connection = _Connection() with self.assertRaises(NotFound): @@ -75,7 +79,6 @@ def test__get_query_results_miss_w_explicit_project_and_timeout(self): req['query_params'], {'maxResults': 0, 'timeoutMs': 500}) def test__get_query_results_hit(self): - project = 'PROJECT' job_id = 'query_job' data = { 'kind': 'bigquery#getQueryResultsResponse', @@ -95,7 +98,7 @@ def test__get_query_results_hit(self): ] }, 'jobReference': { - 'projectId': project, + 'projectId': self.PROJECT, 'jobId': job_id, }, 'totalRows': '10', @@ -105,7 +108,7 @@ def test__get_query_results_hit(self): } creds = _make_credentials() - client = self._make_one(project, creds) + client = self._make_one(self.PROJECT, creds) client._connection = _Connection(data) query_results = client._get_query_results(job_id) @@ -157,12 +160,11 @@ def test_list_projects_defaults(self): self.assertEqual(req['path'], '/%s' % PATH) def test_list_projects_explicit_response_missing_projects_key(self): - PROJECT = 'PROJECT' PATH = 'projects' TOKEN = 'TOKEN' DATA = {} creds = _make_credentials() - client = self._make_one(PROJECT, creds) + client = self._make_one(self.PROJECT, creds) conn = client._connection = _Connection(DATA) iterator = client.list_projects(max_results=3, page_token=TOKEN) @@ -183,28 +185,27 @@ def test_list_projects_explicit_response_missing_projects_key(self): def test_list_datasets_defaults(self): from google.cloud.bigquery.dataset import Dataset - PROJECT = 'PROJECT' DATASET_1 = 'dataset_one' DATASET_2 = 'dataset_two' - PATH = 'projects/%s/datasets' % PROJECT + PATH = 'projects/%s/datasets' % self.PROJECT TOKEN = 'TOKEN' DATA = { 'nextPageToken': TOKEN, 'datasets': [ {'kind': 'bigquery#dataset', - 'id': '%s:%s' % (PROJECT, DATASET_1), + 'id': '%s:%s' % (self.PROJECT, DATASET_1), 'datasetReference': {'datasetId': DATASET_1, - 'projectId': PROJECT}, + 'projectId': self.PROJECT}, 'friendlyName': None}, {'kind': 'bigquery#dataset', - 'id': '%s:%s' % (PROJECT, DATASET_2), + 'id': '%s:%s' % (self.PROJECT, DATASET_2), 'datasetReference': {'datasetId': DATASET_2, - 'projectId': PROJECT}, + 'projectId': self.PROJECT}, 'friendlyName': 'Two'}, ] } creds = _make_credentials() - client = self._make_one(PROJECT, creds) + client = self._make_one(self.PROJECT, creds) conn = client._connection = _Connection(DATA) iterator = client.list_datasets() @@ -225,12 +226,11 @@ def test_list_datasets_defaults(self): self.assertEqual(req['path'], '/%s' % PATH) def test_list_datasets_explicit_response_missing_datasets_key(self): - PROJECT = 'PROJECT' - PATH = 'projects/%s/datasets' % PROJECT + PATH = 'projects/%s/datasets' % self.PROJECT TOKEN = 'TOKEN' DATA = {} creds = _make_credentials() - client = self._make_one(PROJECT, creds) + client = self._make_one(self.PROJECT, creds) conn = client._connection = _Connection(DATA) iterator = client.list_datasets( @@ -252,45 +252,42 @@ def test_list_datasets_explicit_response_missing_datasets_key(self): def test_dataset_with_specified_project(self): from google.cloud.bigquery.dataset import DatasetReference - PROJECT = 'PROJECT' - DATASET = 'dataset_name' creds = _make_credentials() http = object() - client = self._make_one(project=PROJECT, credentials=creds, _http=http) - dataset = client.dataset(DATASET, PROJECT) + client = self._make_one(project=self.PROJECT, credentials=creds, + _http=http) + dataset = client.dataset(self.DS_ID, self.PROJECT) self.assertIsInstance(dataset, DatasetReference) - self.assertEqual(dataset.dataset_id, DATASET) - self.assertEqual(dataset.project, PROJECT) + self.assertEqual(dataset.dataset_id, self.DS_ID) + self.assertEqual(dataset.project, self.PROJECT) def test_dataset_with_default_project(self): from google.cloud.bigquery.dataset import DatasetReference - PROJECT = 'PROJECT' - DATASET = 'dataset_name' creds = _make_credentials() http = object() - client = self._make_one(project=PROJECT, credentials=creds, _http=http) - dataset = client.dataset(DATASET) + client = self._make_one(project=self.PROJECT, credentials=creds, + _http=http) + dataset = client.dataset(self.DS_ID) self.assertIsInstance(dataset, DatasetReference) - self.assertEqual(dataset.dataset_id, DATASET) - self.assertEqual(dataset.project, PROJECT) + self.assertEqual(dataset.dataset_id, self.DS_ID) + self.assertEqual(dataset.project, self.PROJECT) def test_get_dataset(self): - project = 'PROJECT' - dataset_id = 'dataset_id' - path = 'projects/%s/datasets/%s' % (project, dataset_id) + path = 'projects/%s/datasets/%s' % (self.PROJECT, self.DS_ID) creds = _make_credentials() http = object() - client = self._make_one(project=project, credentials=creds, _http=http) + client = self._make_one(project=self.PROJECT, credentials=creds, + _http=http) resource = { - 'id': '%s:%s' % (project, dataset_id), + 'id': '%s:%s' % (self.PROJECT, self.DS_ID), 'datasetReference': { - 'projectId': project, - 'datasetId': dataset_id, + 'projectId': self.PROJECT, + 'datasetId': self.DS_ID, }, } conn = client._connection = _Connection(resource) - dataset_ref = client.dataset(dataset_id) + dataset_ref = client.dataset(self.DS_ID) dataset = client.get_dataset(dataset_ref) @@ -298,45 +295,41 @@ def test_get_dataset(self): req = conn._requested[0] self.assertEqual(req['method'], 'GET') self.assertEqual(req['path'], '/%s' % path) - self.assertEqual(dataset.dataset_id, dataset_id) + self.assertEqual(dataset.dataset_id, self.DS_ID) def test_create_dataset_minimal(self): from google.cloud.bigquery.dataset import Dataset - PROJECT = 'PROJECT' - DS_ID = 'DATASET_ID' - PATH = 'projects/%s/datasets' % PROJECT + PATH = 'projects/%s/datasets' % self.PROJECT RESOURCE = { 'datasetReference': - {'projectId': PROJECT, 'datasetId': DS_ID}, + {'projectId': self.PROJECT, 'datasetId': self.DS_ID}, 'etag': "etag", - 'id': "%s:%s" % (PROJECT, DS_ID), + 'id': "%s:%s" % (self.PROJECT, self.DS_ID), } creds = _make_credentials() - client = self._make_one(project=PROJECT, credentials=creds) + client = self._make_one(project=self.PROJECT, credentials=creds) conn = client._connection = _Connection(RESOURCE) - ds = client.create_dataset(Dataset(client.dataset(DS_ID))) + ds = client.create_dataset(Dataset(client.dataset(self.DS_ID))) self.assertEqual(len(conn._requested), 1) req = conn._requested[0] self.assertEqual(req['method'], 'POST') self.assertEqual(req['path'], '/%s' % PATH) SENT = { 'datasetReference': - {'projectId': PROJECT, 'datasetId': DS_ID}, + {'projectId': self.PROJECT, 'datasetId': self.DS_ID}, 'labels': {}, } self.assertEqual(req['data'], SENT) - self.assertEqual(ds.dataset_id, DS_ID) - self.assertEqual(ds.project, PROJECT) + self.assertEqual(ds.dataset_id, self.DS_ID) + self.assertEqual(ds.project, self.PROJECT) self.assertEqual(ds.etag, RESOURCE['etag']) self.assertEqual(ds.full_dataset_id, RESOURCE['id']) def test_create_dataset_w_attrs(self): from google.cloud.bigquery.dataset import Dataset, AccessEntry - PROJECT = 'PROJECT' - DS_ID = 'DATASET_ID' - PATH = 'projects/%s/datasets' % PROJECT + PATH = 'projects/%s/datasets' % self.PROJECT DESCRIPTION = 'DESC' FRIENDLY_NAME = 'FN' LOCATION = 'US' @@ -349,9 +342,9 @@ def test_create_dataset_w_attrs(self): } RESOURCE = { 'datasetReference': - {'projectId': PROJECT, 'datasetId': DS_ID}, + {'projectId': self.PROJECT, 'datasetId': self.DS_ID}, 'etag': "etag", - 'id': "%s:%s" % (PROJECT, DS_ID), + 'id': "%s:%s" % (self.PROJECT, self.DS_ID), 'description': DESCRIPTION, 'friendlyName': FRIENDLY_NAME, 'location': LOCATION, @@ -362,11 +355,11 @@ def test_create_dataset_w_attrs(self): {'view': VIEW}], } creds = _make_credentials() - client = self._make_one(project=PROJECT, credentials=creds) + client = self._make_one(project=self.PROJECT, credentials=creds) conn = client._connection = _Connection(RESOURCE) entries = [AccessEntry('OWNER', 'userByEmail', USER_EMAIL), AccessEntry(None, 'view', VIEW)] - ds_arg = Dataset(client.dataset(DS_ID)) + ds_arg = Dataset(client.dataset(self.DS_ID)) ds_arg.access_entries = entries ds_arg.description = DESCRIPTION ds_arg.friendly_name = FRIENDLY_NAME @@ -380,7 +373,7 @@ def test_create_dataset_w_attrs(self): self.assertEqual(req['path'], '/%s' % PATH) SENT = { 'datasetReference': - {'projectId': PROJECT, 'datasetId': DS_ID}, + {'projectId': self.PROJECT, 'datasetId': self.DS_ID}, 'description': DESCRIPTION, 'friendlyName': FRIENDLY_NAME, 'location': LOCATION, @@ -391,8 +384,8 @@ def test_create_dataset_w_attrs(self): 'labels': LABELS, } self.assertEqual(req['data'], SENT) - self.assertEqual(ds.dataset_id, DS_ID) - self.assertEqual(ds.project, PROJECT) + self.assertEqual(ds.dataset_id, self.DS_ID) + self.assertEqual(ds.project, self.PROJECT) self.assertEqual(ds.etag, RESOURCE['etag']) self.assertEqual(ds.full_dataset_id, RESOURCE['id']) self.assertEqual(ds.description, DESCRIPTION) @@ -404,24 +397,20 @@ def test_create_dataset_w_attrs(self): def test_create_table_w_day_partition(self): from google.cloud.bigquery.table import Table - project = 'PROJECT' - dataset_id = 'dataset_id' - table_id = 'table-id' path = 'projects/%s/datasets/%s/tables' % ( - project, dataset_id) + self.PROJECT, self.DS_ID) creds = _make_credentials() - client = self._make_one(project=project, credentials=creds) + client = self._make_one(project=self.PROJECT, credentials=creds) resource = { - 'id': '%s:%s:%s' % (project, dataset_id, table_id), + 'id': '%s:%s:%s' % (self.PROJECT, self.DS_ID, self.TABLE_ID), 'tableReference': { - 'projectId': project, - 'datasetId': dataset_id, - 'tableId': table_id + 'projectId': self.PROJECT, + 'datasetId': self.DS_ID, + 'tableId': self.TABLE_ID }, } conn = client._connection = _Connection(resource) - table_ref = client.dataset(dataset_id).table(table_id) - table = Table(table_ref) + table = Table(self.TABLE_REF) table.partitioning_type = 'DAY' got = client.create_table(table) @@ -432,37 +421,33 @@ def test_create_table_w_day_partition(self): self.assertEqual(req['path'], '/%s' % path) sent = { 'tableReference': { - 'projectId': project, - 'datasetId': dataset_id, - 'tableId': table_id + 'projectId': self.PROJECT, + 'datasetId': self.DS_ID, + 'tableId': self.TABLE_ID }, 'timePartitioning': {'type': 'DAY'}, } self.assertEqual(req['data'], sent) self.assertEqual(table.partitioning_type, "DAY") - self.assertEqual(got.table_id, table_id) + self.assertEqual(got.table_id, self.TABLE_ID) def test_create_table_w_day_partition_and_expire(self): from google.cloud.bigquery.table import Table - project = 'PROJECT' - dataset_id = 'dataset_id' - table_id = 'table-id' path = 'projects/%s/datasets/%s/tables' % ( - project, dataset_id) + self.PROJECT, self.DS_ID) creds = _make_credentials() - client = self._make_one(project=project, credentials=creds) + client = self._make_one(project=self.PROJECT, credentials=creds) resource = { - 'id': '%s:%s:%s' % (project, dataset_id, table_id), + 'id': '%s:%s:%s' % (self.PROJECT, self.DS_ID, self.TABLE_ID), 'tableReference': { - 'projectId': project, - 'datasetId': dataset_id, - 'tableId': table_id + 'projectId': self.PROJECT, + 'datasetId': self.DS_ID, + 'tableId': self.TABLE_ID }, } conn = client._connection = _Connection(resource) - table_ref = client.dataset(dataset_id).table(table_id) - table = Table(table_ref) + table = Table(self.TABLE_REF) table.partitioning_type = 'DAY' table.partition_expiration = 100 @@ -474,34 +459,31 @@ def test_create_table_w_day_partition_and_expire(self): self.assertEqual(req['path'], '/%s' % path) sent = { 'tableReference': { - 'projectId': project, - 'datasetId': dataset_id, - 'tableId': table_id + 'projectId': self.PROJECT, + 'datasetId': self.DS_ID, + 'tableId': self.TABLE_ID }, 'timePartitioning': {'type': 'DAY', 'expirationMs': 100}, } self.assertEqual(req['data'], sent) self.assertEqual(table.partitioning_type, "DAY") self.assertEqual(table.partition_expiration, 100) - self.assertEqual(got.table_id, table_id) + self.assertEqual(got.table_id, self.TABLE_ID) def test_create_table_w_schema_and_query(self): from google.cloud.bigquery.table import Table, SchemaField - project = 'PROJECT' - dataset_id = 'dataset_id' - table_id = 'table-id' path = 'projects/%s/datasets/%s/tables' % ( - project, dataset_id) - query = 'SELECT * from %s:%s' % (dataset_id, table_id) + self.PROJECT, self.DS_ID) + query = 'SELECT * from %s:%s' % (self.DS_ID, self.TABLE_ID) creds = _make_credentials() - client = self._make_one(project=project, credentials=creds) + client = self._make_one(project=self.PROJECT, credentials=creds) resource = { - 'id': '%s:%s:%s' % (project, dataset_id, table_id), + 'id': '%s:%s:%s' % (self.PROJECT, self.DS_ID, self.TABLE_ID), 'tableReference': { - 'projectId': project, - 'datasetId': dataset_id, - 'tableId': table_id + 'projectId': self.PROJECT, + 'datasetId': self.DS_ID, + 'tableId': self.TABLE_ID }, 'schema': {'fields': [ {'name': 'full_name', 'type': 'STRING', 'mode': 'REQUIRED'}, @@ -514,8 +496,7 @@ def test_create_table_w_schema_and_query(self): SchemaField('age', 'INTEGER', mode='REQUIRED') ] conn = client._connection = _Connection(resource) - table_ref = client.dataset(dataset_id).table(table_id) - table = Table(table_ref, schema=schema) + table = Table(self.TABLE_REF, schema=schema) table.view_query = query got = client.create_table(table) @@ -526,9 +507,9 @@ def test_create_table_w_schema_and_query(self): self.assertEqual(req['path'], '/%s' % path) sent = { 'tableReference': { - 'projectId': project, - 'datasetId': dataset_id, - 'tableId': table_id + 'projectId': self.PROJECT, + 'datasetId': self.DS_ID, + 'tableId': self.TABLE_ID, }, 'schema': {'fields': [ {'name': 'full_name', 'type': 'STRING', 'mode': 'REQUIRED'}, @@ -538,56 +519,48 @@ def test_create_table_w_schema_and_query(self): 'view': {'query': query, 'useLegacySql': None}, } self.assertEqual(req['data'], sent) - self.assertEqual(got.table_id, table_id) - self.assertEqual(got.project, project) - self.assertEqual(got.dataset_id, dataset_id) + self.assertEqual(got.table_id, self.TABLE_ID) + self.assertEqual(got.project, self.PROJECT) + self.assertEqual(got.dataset_id, self.DS_ID) self.assertEqual(got.schema, schema) self.assertEqual(got.view_query, query) def test_get_table(self): - project = 'PROJECT' - dataset_id = 'dataset_id' - table_id = 'table-id' path = 'projects/%s/datasets/%s/tables/%s' % ( - project, dataset_id, table_id) + self.PROJECT, self.DS_ID, self.TABLE_ID) creds = _make_credentials() http = object() - client = self._make_one(project=project, credentials=creds, _http=http) + client = self._make_one(project=self.PROJECT, credentials=creds, + _http=http) resource = { - 'id': '%s:%s:%s' % (project, dataset_id, table_id), + 'id': '%s:%s:%s' % (self.PROJECT, self.DS_ID, self.TABLE_ID), 'tableReference': { - 'projectId': project, - 'datasetId': dataset_id, - 'tableId': table_id + 'projectId': self.PROJECT, + 'datasetId': self.DS_ID, + 'tableId': self.TABLE_ID, }, } conn = client._connection = _Connection(resource) - table_ref = client.dataset(dataset_id).table(table_id) - - table = client.get_table(table_ref) + table = client.get_table(self.TABLE_REF) self.assertEqual(len(conn._requested), 1) req = conn._requested[0] self.assertEqual(req['method'], 'GET') self.assertEqual(req['path'], '/%s' % path) - self.assertEqual(table.table_id, table_id) + self.assertEqual(table.table_id, self.TABLE_ID) def test_update_dataset_w_invalid_field(self): from google.cloud.bigquery.dataset import Dataset - PROJECT = 'PROJECT' - DS_ID = 'DATASET_ID' creds = _make_credentials() - client = self._make_one(project=PROJECT, credentials=creds) + client = self._make_one(project=self.PROJECT, credentials=creds) with self.assertRaises(ValueError): - client.update_dataset(Dataset(client.dataset(DS_ID)), ["foo"]) + client.update_dataset(Dataset(client.dataset(self.DS_ID)), ["foo"]) def test_update_dataset(self): from google.cloud.bigquery.dataset import Dataset - PROJECT = 'PROJECT' - DS_ID = 'DATASET_ID' - PATH = 'projects/%s/datasets/%s' % (PROJECT, DS_ID) + PATH = 'projects/%s/datasets/%s' % (self.PROJECT, self.DS_ID) DESCRIPTION = 'DESCRIPTION' FRIENDLY_NAME = 'TITLE' LOCATION = 'loc' @@ -595,7 +568,7 @@ def test_update_dataset(self): EXP = 17 RESOURCE = { 'datasetReference': - {'projectId': PROJECT, 'datasetId': DS_ID}, + {'projectId': self.PROJECT, 'datasetId': self.DS_ID}, 'etag': "etag", 'description': DESCRIPTION, 'friendlyName': FRIENDLY_NAME, @@ -604,9 +577,9 @@ def test_update_dataset(self): 'labels': LABELS, } creds = _make_credentials() - client = self._make_one(project=PROJECT, credentials=creds) + client = self._make_one(project=self.PROJECT, credentials=creds) conn = client._connection = _Connection(RESOURCE, RESOURCE) - ds = Dataset(client.dataset(DS_ID)) + ds = Dataset(client.dataset(self.DS_ID)) ds.description = DESCRIPTION ds.friendly_name = FRIENDLY_NAME ds.location = LOCATION @@ -640,19 +613,16 @@ def test_update_dataset(self): def test_update_table(self): from google.cloud.bigquery.table import Table, SchemaField - project = 'PROJECT' - dataset_id = 'DATASET_ID' - table_id = 'table_id' path = 'projects/%s/datasets/%s/tables/%s' % ( - project, dataset_id, table_id) + self.PROJECT, self.DS_ID, self.TABLE_ID) description = 'description' title = 'title' resource = { - 'id': '%s:%s:%s' % (project, dataset_id, table_id), + 'id': '%s:%s:%s' % (self.PROJECT, self.DS_ID, self.TABLE_ID), 'tableReference': { - 'projectId': project, - 'datasetId': dataset_id, - 'tableId': table_id + 'projectId': self.PROJECT, + 'datasetId': self.DS_ID, + 'tableId': self.TABLE_ID }, 'schema': {'fields': [ {'name': 'full_name', 'type': 'STRING', 'mode': 'REQUIRED'}, @@ -667,10 +637,9 @@ def test_update_table(self): SchemaField('age', 'INTEGER', mode='REQUIRED') ] creds = _make_credentials() - client = self._make_one(project=project, credentials=creds) + client = self._make_one(project=self.PROJECT, credentials=creds) conn = client._connection = _Connection(resource, resource) - table_ref = client.dataset(dataset_id).table(table_id) - table = Table(table_ref, schema=schema) + table = Table(self.TABLE_REF, schema=schema) table.description = description table.friendly_name = title @@ -679,9 +648,9 @@ def test_update_table(self): sent = { 'tableReference': { - 'projectId': project, - 'datasetId': dataset_id, - 'tableId': table_id + 'projectId': self.PROJECT, + 'datasetId': self.DS_ID, + 'tableId': self.TABLE_ID }, 'schema': {'fields': [ {'name': 'full_name', 'type': 'STRING', 'mode': 'REQUIRED'}, @@ -708,25 +677,21 @@ def test_update_table(self): def test_update_table_only_use_legacy_sql(self): from google.cloud.bigquery.table import Table - project = 'PROJECT' - dataset_id = 'DATASET_ID' - table_id = 'table_id' path = 'projects/%s/datasets/%s/tables/%s' % ( - project, dataset_id, table_id) + self.PROJECT, self.DS_ID, self.TABLE_ID) resource = { - 'id': '%s:%s:%s' % (project, dataset_id, table_id), + 'id': '%s:%s:%s' % (self.PROJECT, self.DS_ID, self.TABLE_ID), 'tableReference': { - 'projectId': project, - 'datasetId': dataset_id, - 'tableId': table_id + 'projectId': self.PROJECT, + 'datasetId': self.DS_ID, + 'tableId': self.TABLE_ID }, 'view': {'useLegacySql': True} } creds = _make_credentials() - client = self._make_one(project=project, credentials=creds) + client = self._make_one(project=self.PROJECT, credentials=creds) conn = client._connection = _Connection(resource) - table_ref = client.dataset(dataset_id).table(table_id) - table = Table(table_ref) + table = Table(self.TABLE_REF) table.view_use_legacy_sql = True updated_table = client.update_table(table, ['view_use_legacy_sql']) @@ -737,9 +702,9 @@ def test_update_table_only_use_legacy_sql(self): self.assertEqual(req['path'], '/%s' % path) sent = { 'tableReference': { - 'projectId': project, - 'datasetId': dataset_id, - 'tableId': table_id + 'projectId': self.PROJECT, + 'datasetId': self.DS_ID, + 'tableId': self.TABLE_ID }, 'view': {'useLegacySql': True} } @@ -753,11 +718,8 @@ def test_update_table_w_query(self): from google.cloud._helpers import _millis from google.cloud.bigquery.table import Table, SchemaField - project = 'PROJECT' - dataset_id = 'DATASET_ID' - table_id = 'table_id' path = 'projects/%s/datasets/%s/tables/%s' % ( - project, dataset_id, table_id) + self.PROJECT, self.DS_ID, self.TABLE_ID) query = 'select fullname, age from person_ages' location = 'EU' exp_time = datetime.datetime(2015, 8, 1, 23, 59, 59, tzinfo=UTC) @@ -769,11 +731,11 @@ def test_update_table_w_query(self): SchemaField('age', 'INTEGER', mode='REQUIRED') ] resource = { - 'id': '%s:%s:%s' % (project, dataset_id, table_id), + 'id': '%s:%s:%s' % (self.PROJECT, self.DS_ID, self.TABLE_ID), 'tableReference': { - 'projectId': project, - 'datasetId': dataset_id, - 'tableId': table_id + 'projectId': self.PROJECT, + 'datasetId': self.DS_ID, + 'tableId': self.TABLE_ID }, 'schema': schema_resource, 'view': {'query': query, 'useLegacySql': True}, @@ -781,10 +743,9 @@ def test_update_table_w_query(self): 'expirationTime': _millis(exp_time) } creds = _make_credentials() - client = self._make_one(project=project, credentials=creds) + client = self._make_one(project=self.PROJECT, credentials=creds) conn = client._connection = _Connection(resource) - table_ref = client.dataset(dataset_id).table(table_id) - table = Table(table_ref, schema=schema) + table = Table(self.TABLE_REF, schema=schema) table.location = location table.expires = exp_time table.view_query = query @@ -800,9 +761,9 @@ def test_update_table_w_query(self): self.assertEqual(req['path'], '/%s' % path) sent = { 'tableReference': { - 'projectId': project, - 'datasetId': dataset_id, - 'tableId': table_id + 'projectId': self.PROJECT, + 'datasetId': self.DS_ID, + 'tableId': self.TABLE_ID }, 'view': {'query': query, 'useLegacySql': True}, 'location': location, @@ -820,34 +781,30 @@ def test_update_table_w_query(self): def test_update_table_w_schema_None(self): # Simulate deleting schema: not sure if back-end will actually # allow this operation, but the spec says it is optional. - project = 'PROJECT' - dataset_id = 'DATASET_ID' - table_id = 'table_id' path = 'projects/%s/datasets/%s/tables/%s' % ( - project, dataset_id, table_id) + self.PROJECT, self.DS_ID, self.TABLE_ID) resource1 = { - 'id': '%s:%s:%s' % (project, dataset_id, table_id), + 'id': '%s:%s:%s' % (self.PROJECT, self.DS_ID, self.TABLE_ID), 'tableReference': { - 'projectId': project, - 'datasetId': dataset_id, - 'tableId': table_id}, + 'projectId': self.PROJECT, + 'datasetId': self.DS_ID, + 'tableId': self.TABLE_ID}, 'schema': {'fields': [ {'name': 'full_name', 'type': 'STRING', 'mode': 'REQUIRED'}, {'name': 'age', 'type': 'INTEGER', 'mode': 'REQUIRED'}]} } resource2 = { - 'id': '%s:%s:%s' % (project, dataset_id, table_id), + 'id': '%s:%s:%s' % (self.PROJECT, self.DS_ID, self.TABLE_ID), 'tableReference': { - 'projectId': project, - 'datasetId': dataset_id, - 'tableId': table_id}, + 'projectId': self.PROJECT, + 'datasetId': self.DS_ID, + 'tableId': self.TABLE_ID}, 'schema': {'fields': []}, } creds = _make_credentials() - client = self._make_one(project=project, credentials=creds) + client = self._make_one(project=self.PROJECT, credentials=creds) conn = client._connection = _Connection(resource1, resource2) - table_ref = client.dataset(dataset_id).table(table_id) - table = client.get_table(table_ref) + table = client.get_table(self.TABLE_REF) table.schema = None updated_table = client.update_table(table, ['schema']) @@ -857,9 +814,9 @@ def test_update_table_w_schema_None(self): self.assertEqual(req['method'], 'PATCH') sent = { 'tableReference': { - 'projectId': project, - 'datasetId': dataset_id, - 'tableId': table_id + 'projectId': self.PROJECT, + 'datasetId': self.DS_ID, + 'tableId': self.TABLE_ID }, 'schema': None } @@ -870,37 +827,33 @@ def test_update_table_w_schema_None(self): def test_update_table_delete_property(self): from google.cloud.bigquery.table import Table - project = 'PROJECT' - dataset_id = 'DATASET_ID' - table_id = 'table_id' description = 'description' title = 'title' path = 'projects/%s/datasets/%s/tables/%s' % ( - project, dataset_id, table_id) + self.PROJECT, self.DS_ID, self.TABLE_ID) resource1 = { - 'id': '%s:%s:%s' % (project, dataset_id, table_id), + 'id': '%s:%s:%s' % (self.PROJECT, self.DS_ID, self.TABLE_ID), 'tableReference': { - 'projectId': project, - 'datasetId': dataset_id, - 'tableId': table_id + 'projectId': self.PROJECT, + 'datasetId': self.DS_ID, + 'tableId': self.TABLE_ID }, 'description': description, 'friendlyName': title, } resource2 = { - 'id': '%s:%s:%s' % (project, dataset_id, table_id), + 'id': '%s:%s:%s' % (self.PROJECT, self.DS_ID, self.TABLE_ID), 'tableReference': { - 'projectId': project, - 'datasetId': dataset_id, - 'tableId': table_id + 'projectId': self.PROJECT, + 'datasetId': self.DS_ID, + 'tableId': self.TABLE_ID }, 'description': None, } creds = _make_credentials() - client = self._make_one(project=project, credentials=creds) + client = self._make_one(project=self.PROJECT, credentials=creds) conn = client._connection = _Connection(resource1, resource2) - table_ref = client.dataset(dataset_id).table(table_id) - table = Table(table_ref) + table = Table(self.TABLE_REF) table.description = description table.friendly_name = title table2 = client.update_table(table, ['description', 'friendly_name']) @@ -914,9 +867,9 @@ def test_update_table_delete_property(self): self.assertEqual(req['path'], '/%s' % path) sent = { 'tableReference': { - 'projectId': project, - 'datasetId': dataset_id, - 'tableId': table_id + 'projectId': self.PROJECT, + 'datasetId': self.DS_ID, + 'tableId': self.TABLE_ID }, 'description': None, } @@ -924,13 +877,11 @@ def test_update_table_delete_property(self): self.assertIsNone(table3.description) def test_list_dataset_tables_empty(self): - PROJECT = 'PROJECT' - DS_ID = 'DATASET_ID' creds = _make_credentials() - client = self._make_one(project=PROJECT, credentials=creds) + client = self._make_one(project=self.PROJECT, credentials=creds) conn = client._connection = _Connection({}) - dataset = client.dataset(DS_ID) + dataset = client.dataset(self.DS_ID) iterator = client.list_dataset_tables(dataset) self.assertIs(iterator.dataset, dataset) page = six.next(iterator.pages) @@ -942,40 +893,38 @@ def test_list_dataset_tables_empty(self): self.assertEqual(len(conn._requested), 1) req = conn._requested[0] self.assertEqual(req['method'], 'GET') - PATH = 'projects/%s/datasets/%s/tables' % (PROJECT, DS_ID) + PATH = 'projects/%s/datasets/%s/tables' % (self.PROJECT, self.DS_ID) self.assertEqual(req['path'], '/%s' % PATH) def test_list_dataset_tables_defaults(self): from google.cloud.bigquery.table import Table - PROJECT = 'PROJECT' - DS_ID = 'DATASET_ID' TABLE_1 = 'table_one' TABLE_2 = 'table_two' - PATH = 'projects/%s/datasets/%s/tables' % (PROJECT, DS_ID) + PATH = 'projects/%s/datasets/%s/tables' % (self.PROJECT, self.DS_ID) TOKEN = 'TOKEN' DATA = { 'nextPageToken': TOKEN, 'tables': [ {'kind': 'bigquery#table', - 'id': '%s:%s.%s' % (PROJECT, DS_ID, TABLE_1), + 'id': '%s:%s.%s' % (self.PROJECT, self.DS_ID, TABLE_1), 'tableReference': {'tableId': TABLE_1, - 'datasetId': DS_ID, - 'projectId': PROJECT}, + 'datasetId': self.DS_ID, + 'projectId': self.PROJECT}, 'type': 'TABLE'}, {'kind': 'bigquery#table', - 'id': '%s:%s.%s' % (PROJECT, DS_ID, TABLE_2), + 'id': '%s:%s.%s' % (self.PROJECT, self.DS_ID, TABLE_2), 'tableReference': {'tableId': TABLE_2, - 'datasetId': DS_ID, - 'projectId': PROJECT}, + 'datasetId': self.DS_ID, + 'projectId': self.PROJECT}, 'type': 'TABLE'}, ] } creds = _make_credentials() - client = self._make_one(project=PROJECT, credentials=creds) + client = self._make_one(project=self.PROJECT, credentials=creds) conn = client._connection = _Connection(DATA) - dataset = client.dataset(DS_ID) + dataset = client.dataset(self.DS_ID) iterator = client.list_dataset_tables(dataset) self.assertIs(iterator.dataset, dataset) @@ -998,33 +947,31 @@ def test_list_dataset_tables_defaults(self): def test_list_dataset_tables_explicit(self): from google.cloud.bigquery.table import Table - PROJECT = 'PROJECT' - DS_ID = 'DATASET_ID' TABLE_1 = 'table_one' TABLE_2 = 'table_two' - PATH = 'projects/%s/datasets/%s/tables' % (PROJECT, DS_ID) + PATH = 'projects/%s/datasets/%s/tables' % (self.PROJECT, self.DS_ID) TOKEN = 'TOKEN' DATA = { 'tables': [ {'kind': 'bigquery#dataset', - 'id': '%s:%s.%s' % (PROJECT, DS_ID, TABLE_1), + 'id': '%s:%s.%s' % (self.PROJECT, self.DS_ID, TABLE_1), 'tableReference': {'tableId': TABLE_1, - 'datasetId': DS_ID, - 'projectId': PROJECT}, + 'datasetId': self.DS_ID, + 'projectId': self.PROJECT}, 'type': 'TABLE'}, {'kind': 'bigquery#dataset', - 'id': '%s:%s.%s' % (PROJECT, DS_ID, TABLE_2), + 'id': '%s:%s.%s' % (self.PROJECT, self.DS_ID, TABLE_2), 'tableReference': {'tableId': TABLE_2, - 'datasetId': DS_ID, - 'projectId': PROJECT}, + 'datasetId': self.DS_ID, + 'projectId': self.PROJECT}, 'type': 'TABLE'}, ] } creds = _make_credentials() - client = self._make_one(project=PROJECT, credentials=creds) + client = self._make_one(project=self.PROJECT, credentials=creds) conn = client._connection = _Connection(DATA) - dataset = client.dataset(DS_ID) + dataset = client.dataset(self.DS_ID) iterator = client.list_dataset_tables( dataset, max_results=3, page_token=TOKEN) @@ -1048,23 +995,19 @@ def test_list_dataset_tables_explicit(self): {'maxResults': 3, 'pageToken': TOKEN}) def test_list_dataset_tables_wrong_type(self): - PROJECT = 'PROJECT' - DS_ID = 'DATASET_ID' creds = _make_credentials() - client = self._make_one(project=PROJECT, credentials=creds) + client = self._make_one(project=self.PROJECT, credentials=creds) with self.assertRaises(TypeError): - client.list_dataset_tables(client.dataset(DS_ID).table("foo")) + client.list_dataset_tables(client.dataset(self.DS_ID).table("foo")) def test_delete_dataset(self): from google.cloud.bigquery.dataset import Dataset - PROJECT = 'PROJECT' - DS_ID = 'DATASET_ID' - PATH = 'projects/%s/datasets/%s' % (PROJECT, DS_ID) + PATH = 'projects/%s/datasets/%s' % (self.PROJECT, self.DS_ID) creds = _make_credentials() - client = self._make_one(project=PROJECT, credentials=creds) + client = self._make_one(project=self.PROJECT, credentials=creds) conn = client._connection = _Connection({}, {}) - ds_ref = client.dataset(DS_ID) + ds_ref = client.dataset(self.DS_ID) for arg in (ds_ref, Dataset(ds_ref)): client.delete_dataset(arg) req = conn._requested[0] @@ -1072,56 +1015,47 @@ def test_delete_dataset(self): self.assertEqual(req['path'], '/%s' % PATH) def test_delete_dataset_wrong_type(self): - PROJECT = 'PROJECT' - DS_ID = 'DATASET_ID' creds = _make_credentials() - client = self._make_one(project=PROJECT, credentials=creds) + client = self._make_one(project=self.PROJECT, credentials=creds) with self.assertRaises(TypeError): - client.delete_dataset(client.dataset(DS_ID).table("foo")) + client.delete_dataset(client.dataset(self.DS_ID).table("foo")) def test_delete_table(self): from google.cloud.bigquery.table import Table - project = 'PROJECT' - dataset_id = 'dataset_id' - table_id = 'table-id' path = 'projects/%s/datasets/%s/tables/%s' % ( - project, dataset_id, table_id) + self.PROJECT, self.DS_ID, self.TABLE_ID) creds = _make_credentials() http = object() - client = self._make_one(project=project, credentials=creds, _http=http) + client = self._make_one(project=self.PROJECT, credentials=creds, + _http=http) conn = client._connection = _Connection({}, {}) - table_ref = client.dataset(dataset_id).table(table_id) - for arg in (table_ref, Table(table_ref)): + for arg in (self.TABLE_REF, Table(self.TABLE_REF)): client.delete_table(arg) req = conn._requested[0] self.assertEqual(req['method'], 'DELETE') self.assertEqual(req['path'], '/%s' % path) def test_delete_table_w_wrong_type(self): - project = 'PROJECT' - dataset_id = 'DATASET_ID' creds = _make_credentials() - client = self._make_one(project=project, credentials=creds) + client = self._make_one(project=self.PROJECT, credentials=creds) with self.assertRaises(TypeError): - client.delete_table(client.dataset(dataset_id)) + client.delete_table(client.dataset(self.DS_ID)) def test_job_from_resource_unknown_type(self): - PROJECT = 'PROJECT' creds = _make_credentials() - client = self._make_one(PROJECT, creds) + client = self._make_one(self.PROJECT, creds) with self.assertRaises(ValueError): client.job_from_resource({'configuration': {'nonesuch': {}}}) def test_get_job_miss_w_explict_project(self): from google.cloud.exceptions import NotFound - PROJECT = 'PROJECT' OTHER_PROJECT = 'OTHER_PROJECT' JOB_ID = 'NONESUCH' creds = _make_credentials() - client = self._make_one(PROJECT, creds) + client = self._make_one(self.PROJECT, creds) conn = client._connection = _Connection() with self.assertRaises(NotFound): @@ -1136,15 +1070,13 @@ def test_get_job_miss_w_explict_project(self): def test_get_job_hit(self): from google.cloud.bigquery.job import QueryJob - PROJECT = 'PROJECT' JOB_ID = 'query_job' - DATASET = 'test_dataset' QUERY_DESTINATION_TABLE = 'query_destination_table' QUERY = 'SELECT * from test_dataset:test_table' ASYNC_QUERY_DATA = { - 'id': '{}:{}'.format(PROJECT, JOB_ID), + 'id': '{}:{}'.format(self.PROJECT, JOB_ID), 'jobReference': { - 'projectId': PROJECT, + 'projectId': self.PROJECT, 'jobId': 'query_job', }, 'state': 'DONE', @@ -1152,8 +1084,8 @@ def test_get_job_hit(self): 'query': { 'query': QUERY, 'destinationTable': { - 'projectId': PROJECT, - 'datasetId': DATASET, + 'projectId': self.PROJECT, + 'datasetId': self.DS_ID, 'tableId': QUERY_DESTINATION_TABLE, }, 'createDisposition': 'CREATE_IF_NEEDED', @@ -1162,7 +1094,7 @@ def test_get_job_hit(self): }, } creds = _make_credentials() - client = self._make_one(PROJECT, creds) + client = self._make_one(self.PROJECT, creds) conn = client._connection = _Connection(ASYNC_QUERY_DATA) job = client.get_job(JOB_ID) @@ -1184,8 +1116,6 @@ def test_list_jobs_defaults(self): from google.cloud.bigquery.job import ExtractJob from google.cloud.bigquery.job import QueryJob - PROJECT = 'PROJECT' - DATASET = 'test_dataset' SOURCE_TABLE = 'source_table' DESTINATION_TABLE = 'destination_table' QUERY_DESTINATION_TABLE = 'query_destination_table' @@ -1197,13 +1127,13 @@ def test_list_jobs_defaults(self): 'extract_job': ExtractJob, 'query_job': QueryJob, } - PATH = 'projects/%s/jobs' % PROJECT + PATH = 'projects/%s/jobs' % self.PROJECT TOKEN = 'TOKEN' QUERY = 'SELECT * from test_dataset:test_table' ASYNC_QUERY_DATA = { - 'id': '%s:%s' % (PROJECT, 'query_job'), + 'id': '%s:%s' % (self.PROJECT, 'query_job'), 'jobReference': { - 'projectId': PROJECT, + 'projectId': self.PROJECT, 'jobId': 'query_job', }, 'state': 'DONE', @@ -1211,8 +1141,8 @@ def test_list_jobs_defaults(self): 'query': { 'query': QUERY, 'destinationTable': { - 'projectId': PROJECT, - 'datasetId': DATASET, + 'projectId': self.PROJECT, + 'datasetId': self.DS_ID, 'tableId': QUERY_DESTINATION_TABLE, }, 'createDisposition': 'CREATE_IF_NEEDED', @@ -1221,17 +1151,17 @@ def test_list_jobs_defaults(self): }, } EXTRACT_DATA = { - 'id': '%s:%s' % (PROJECT, 'extract_job'), + 'id': '%s:%s' % (self.PROJECT, 'extract_job'), 'jobReference': { - 'projectId': PROJECT, + 'projectId': self.PROJECT, 'jobId': 'extract_job', }, 'state': 'DONE', 'configuration': { 'extract': { 'sourceTable': { - 'projectId': PROJECT, - 'datasetId': DATASET, + 'projectId': self.PROJECT, + 'datasetId': self.DS_ID, 'tableId': SOURCE_TABLE, }, 'destinationUris': [DESTINATION_URI], @@ -1239,39 +1169,39 @@ def test_list_jobs_defaults(self): }, } COPY_DATA = { - 'id': '%s:%s' % (PROJECT, 'copy_job'), + 'id': '%s:%s' % (self.PROJECT, 'copy_job'), 'jobReference': { - 'projectId': PROJECT, + 'projectId': self.PROJECT, 'jobId': 'copy_job', }, 'state': 'DONE', 'configuration': { 'copy': { 'sourceTables': [{ - 'projectId': PROJECT, - 'datasetId': DATASET, + 'projectId': self.PROJECT, + 'datasetId': self.DS_ID, 'tableId': SOURCE_TABLE, }], 'destinationTable': { - 'projectId': PROJECT, - 'datasetId': DATASET, + 'projectId': self.PROJECT, + 'datasetId': self.DS_ID, 'tableId': DESTINATION_TABLE, }, } }, } LOAD_DATA = { - 'id': '%s:%s' % (PROJECT, 'load_job'), + 'id': '%s:%s' % (self.PROJECT, 'load_job'), 'jobReference': { - 'projectId': PROJECT, + 'projectId': self.PROJECT, 'jobId': 'load_job', }, 'state': 'DONE', 'configuration': { 'load': { 'destinationTable': { - 'projectId': PROJECT, - 'datasetId': DATASET, + 'projectId': self.PROJECT, + 'datasetId': self.DS_ID, 'tableId': SOURCE_TABLE, }, 'sourceUris': [SOURCE_URI], @@ -1288,7 +1218,7 @@ def test_list_jobs_defaults(self): ] } creds = _make_credentials() - client = self._make_one(PROJECT, creds) + client = self._make_one(self.PROJECT, creds) conn = client._connection = _Connection(DATA) iterator = client.list_jobs() @@ -1312,26 +1242,24 @@ def test_list_jobs_defaults(self): def test_list_jobs_load_job_wo_sourceUris(self): from google.cloud.bigquery.job import LoadJob - PROJECT = 'PROJECT' - DATASET = 'test_dataset' SOURCE_TABLE = 'source_table' JOB_TYPES = { 'load_job': LoadJob, } - PATH = 'projects/%s/jobs' % PROJECT + PATH = 'projects/%s/jobs' % self.PROJECT TOKEN = 'TOKEN' LOAD_DATA = { - 'id': '%s:%s' % (PROJECT, 'load_job'), + 'id': '%s:%s' % (self.PROJECT, 'load_job'), 'jobReference': { - 'projectId': PROJECT, + 'projectId': self.PROJECT, 'jobId': 'load_job', }, 'state': 'DONE', 'configuration': { 'load': { 'destinationTable': { - 'projectId': PROJECT, - 'datasetId': DATASET, + 'projectId': self.PROJECT, + 'datasetId': self.DS_ID, 'tableId': SOURCE_TABLE, }, } @@ -1344,7 +1272,7 @@ def test_list_jobs_load_job_wo_sourceUris(self): ] } creds = _make_credentials() - client = self._make_one(PROJECT, creds) + client = self._make_one(self.PROJECT, creds) conn = client._connection = _Connection(DATA) iterator = client.list_jobs() @@ -1366,12 +1294,11 @@ def test_list_jobs_load_job_wo_sourceUris(self): self.assertEqual(req['query_params'], {'projection': 'full'}) def test_list_jobs_explicit_missing(self): - PROJECT = 'PROJECT' - PATH = 'projects/%s/jobs' % PROJECT + PATH = 'projects/%s/jobs' % self.PROJECT DATA = {} TOKEN = 'TOKEN' creds = _make_credentials() - client = self._make_one(PROJECT, creds) + client = self._make_one(self.PROJECT, creds) conn = client._connection = _Connection(DATA) iterator = client.list_jobs(max_results=1000, page_token=TOKEN, @@ -1397,22 +1324,20 @@ def test_list_jobs_explicit_missing(self): def test_load_table_from_storage(self): from google.cloud.bigquery.job import LoadJob - PROJECT = 'PROJECT' JOB = 'job_name' - DATASET = 'dataset_name' DESTINATION = 'destination_table' SOURCE_URI = 'http://example.com/source.csv' RESOURCE = { 'jobReference': { - 'projectId': PROJECT, + 'projectId': self.PROJECT, 'jobId': JOB, }, 'configuration': { 'load': { 'sourceUris': [SOURCE_URI], 'destinationTable': { - 'projectId': PROJECT, - 'datasetId': DATASET, + 'projectId': self.PROJECT, + 'datasetId': self.DS_ID, 'tableId': DESTINATION, }, }, @@ -1420,9 +1345,10 @@ def test_load_table_from_storage(self): } creds = _make_credentials() http = object() - client = self._make_one(project=PROJECT, credentials=creds, _http=http) + client = self._make_one(project=self.PROJECT, credentials=creds, + _http=http) conn = client._connection = _Connection(RESOURCE) - destination = client.dataset(DATASET).table(DESTINATION) + destination = client.dataset(self.DS_ID).table(DESTINATION) job = client.load_table_from_storage(SOURCE_URI, destination, job_id=JOB) @@ -1431,7 +1357,7 @@ def test_load_table_from_storage(self): self.assertEqual(len(conn._requested), 1) req = conn._requested[0] self.assertEqual(req['method'], 'POST') - self.assertEqual(req['path'], '/projects/%s/jobs' % PROJECT) + self.assertEqual(req['path'], '/projects/%s/jobs' % self.PROJECT) self.assertIsInstance(job, LoadJob) self.assertIs(job._client, client) @@ -1469,25 +1395,20 @@ def _initiate_resumable_upload_helper(self, num_retries=None): from google.cloud.bigquery.client import _get_upload_headers from google.cloud.bigquery.job import LoadJob, LoadJobConfig - PROJECT = 'PROJECT' - DS_ID = 'DATASET_ID' - TABLE_ID = 'TABLE_ID' - # Create mocks to be checked for doing transport. resumable_url = 'http://test.invalid?upload_id=hey-you' response_headers = {'location': resumable_url} fake_transport = self._mock_transport( http_client.OK, response_headers) - client = self._make_one(project=PROJECT, _http=fake_transport) + client = self._make_one(project=self.PROJECT, _http=fake_transport) conn = client._connection = _Connection() - table_ref = DatasetReference(PROJECT, DS_ID).table(TABLE_ID) # Create some mock arguments and call the method under test. data = b'goodbye gudbi gootbee' stream = io.BytesIO(data) config = LoadJobConfig() config.source_format = 'CSV' - job = LoadJob(None, None, table_ref, client, job_config=config) + job = LoadJob(None, None, self.TABLE_REF, client, job_config=config) metadata = job._build_resource() upload, transport = client._initiate_resumable_upload( stream, metadata, num_retries) @@ -1496,7 +1417,7 @@ def _initiate_resumable_upload_helper(self, num_retries=None): self.assertIsInstance(upload, ResumableUpload) upload_url = ( 'https://www.googleapis.com/upload/bigquery/v2/projects/' + - PROJECT + + self.PROJECT + '/jobs?uploadType=resumable') self.assertEqual(upload.upload_url, upload_url) expected_headers = _get_upload_headers(conn.USER_AGENT) @@ -1541,21 +1462,16 @@ def _do_multipart_upload_success_helper( from google.cloud.bigquery.client import _get_upload_headers from google.cloud.bigquery.job import LoadJob, LoadJobConfig - PROJECT = 'PROJECT' - DS_ID = 'DATASET_ID' - TABLE_ID = 'TABLE_ID' - fake_transport = self._mock_transport(http_client.OK, {}) - client = self._make_one(project=PROJECT, _http=fake_transport) + client = self._make_one(project=self.PROJECT, _http=fake_transport) conn = client._connection = _Connection() - table_ref = DatasetReference(PROJECT, DS_ID).table(TABLE_ID) # Create some mock arguments. data = b'Bzzzz-zap \x00\x01\xf4' stream = io.BytesIO(data) config = LoadJobConfig() config.source_format = 'CSV' - job = LoadJob(None, None, table_ref, client, job_config=config) + job = LoadJob(None, None, self.TABLE_REF, client, job_config=config) metadata = job._build_resource() size = len(data) response = client._do_multipart_upload( @@ -1568,7 +1484,7 @@ def _do_multipart_upload_success_helper( upload_url = ( 'https://www.googleapis.com/upload/bigquery/v2/projects/' + - PROJECT + + self.PROJECT + '/jobs?uploadType=multipart') payload = ( b'--==0==\r\n' + @@ -1600,26 +1516,24 @@ def test__do_multipart_upload_with_retry(self, get_boundary): def test_copy_table(self): from google.cloud.bigquery.job import CopyJob - PROJECT = 'PROJECT' JOB = 'job_name' - DATASET = 'dataset_name' SOURCE = 'source_table' DESTINATION = 'destination_table' RESOURCE = { 'jobReference': { - 'projectId': PROJECT, + 'projectId': self.PROJECT, 'jobId': JOB, }, 'configuration': { 'copy': { 'sourceTable': { - 'projectId': PROJECT, - 'datasetId': DATASET, + 'projectId': self.PROJECT, + 'datasetId': self.DS_ID, 'tableId': SOURCE, }, 'destinationTable': { - 'projectId': PROJECT, - 'datasetId': DATASET, + 'projectId': self.PROJECT, + 'datasetId': self.DS_ID, 'tableId': DESTINATION, }, }, @@ -1627,9 +1541,10 @@ def test_copy_table(self): } creds = _make_credentials() http = object() - client = self._make_one(project=PROJECT, credentials=creds, _http=http) + client = self._make_one(project=self.PROJECT, credentials=creds, + _http=http) conn = client._connection = _Connection(RESOURCE) - dataset = client.dataset(DATASET) + dataset = client.dataset(self.DS_ID) source = dataset.table(SOURCE) destination = dataset.table(DESTINATION) @@ -1639,7 +1554,7 @@ def test_copy_table(self): self.assertEqual(len(conn._requested), 1) req = conn._requested[0] self.assertEqual(req['method'], 'POST') - self.assertEqual(req['path'], '/projects/%s/jobs' % PROJECT) + self.assertEqual(req['path'], '/projects/%s/jobs' % self.PROJECT) self.assertIsInstance(job, CopyJob) self.assertIs(job._client, client) @@ -1659,21 +1574,19 @@ def test_copy_table(self): def test_extract_table(self): from google.cloud.bigquery.job import ExtractJob - PROJECT = 'PROJECT' JOB = 'job_id' - DATASET = 'dataset_id' SOURCE = 'source_table' DESTINATION = 'gs://bucket_name/object_name' RESOURCE = { 'jobReference': { - 'projectId': PROJECT, + 'projectId': self.PROJECT, 'jobId': JOB, }, 'configuration': { 'extract': { 'sourceTable': { - 'projectId': PROJECT, - 'datasetId': DATASET, + 'projectId': self.PROJECT, + 'datasetId': self.DS_ID, 'tableId': SOURCE, }, 'destinationUris': [DESTINATION], @@ -1682,9 +1595,10 @@ def test_extract_table(self): } creds = _make_credentials() http = object() - client = self._make_one(project=PROJECT, credentials=creds, _http=http) + client = self._make_one(project=self.PROJECT, credentials=creds, + _http=http) conn = client._connection = _Connection(RESOURCE) - dataset = client.dataset(DATASET) + dataset = client.dataset(self.DS_ID) source = dataset.table(SOURCE) job = client.extract_table(source, DESTINATION, job_id=JOB) @@ -1707,21 +1621,19 @@ def test_extract_table_generated_job_id(self): from google.cloud.bigquery.job import ExtractJobConfig from google.cloud.bigquery.job import DestinationFormat - PROJECT = 'PROJECT' JOB = 'job_id' - DATASET = 'dataset_id' SOURCE = 'source_table' DESTINATION = 'gs://bucket_name/object_name' RESOURCE = { 'jobReference': { - 'projectId': PROJECT, + 'projectId': self.PROJECT, 'jobId': JOB, }, 'configuration': { 'extract': { 'sourceTable': { - 'projectId': PROJECT, - 'datasetId': DATASET, + 'projectId': self.PROJECT, + 'datasetId': self.DS_ID, 'tableId': SOURCE, }, 'destinationUris': [DESTINATION], @@ -1731,9 +1643,10 @@ def test_extract_table_generated_job_id(self): } creds = _make_credentials() http = object() - client = self._make_one(project=PROJECT, credentials=creds, _http=http) + client = self._make_one(project=self.PROJECT, credentials=creds, + _http=http) conn = client._connection = _Connection(RESOURCE) - dataset = client.dataset(DATASET) + dataset = client.dataset(self.DS_ID) source = dataset.table(SOURCE) job_config = ExtractJobConfig() job_config.destination_format = ( @@ -1758,11 +1671,10 @@ def test_extract_table_generated_job_id(self): def test_query_defaults(self): from google.cloud.bigquery.job import QueryJob - PROJECT = 'PROJECT' QUERY = 'select count(*) from persons' RESOURCE = { 'jobReference': { - 'projectId': PROJECT, + 'projectId': self.PROJECT, 'jobId': 'some-random-id', }, 'configuration': { @@ -1774,7 +1686,8 @@ def test_query_defaults(self): } creds = _make_credentials() http = object() - client = self._make_one(project=PROJECT, credentials=creds, _http=http) + client = self._make_one(project=self.PROJECT, credentials=creds, + _http=http) conn = client._connection = _Connection(RESOURCE) job = client.query(QUERY) @@ -1804,12 +1717,11 @@ def test_query_w_udf_resources(self): from google.cloud.bigquery.query import UDFResource RESOURCE_URI = 'gs://some-bucket/js/lib.js' - PROJECT = 'PROJECT' JOB = 'job_name' QUERY = 'select count(*) from persons' RESOURCE = { 'jobReference': { - 'projectId': PROJECT, + 'projectId': self.PROJECT, 'jobId': JOB, }, 'configuration': { @@ -1824,7 +1736,8 @@ def test_query_w_udf_resources(self): } creds = _make_credentials() http = object() - client = self._make_one(project=PROJECT, credentials=creds, _http=http) + client = self._make_one(project=self.PROJECT, credentials=creds, + _http=http) conn = client._connection = _Connection(RESOURCE) udf_resources = [UDFResource("resourceUri", RESOURCE_URI)] config = QueryJobConfig() @@ -1860,12 +1773,11 @@ def test_query_w_query_parameters(self): from google.cloud.bigquery.job import QueryJobConfig from google.cloud.bigquery.query import ScalarQueryParameter - PROJECT = 'PROJECT' JOB = 'job_name' QUERY = 'select count(*) from persons' RESOURCE = { 'jobReference': { - 'projectId': PROJECT, + 'projectId': self.PROJECT, 'jobId': JOB, }, 'configuration': { @@ -1884,7 +1796,8 @@ def test_query_w_query_parameters(self): } creds = _make_credentials() http = object() - client = self._make_one(project=PROJECT, credentials=creds, _http=http) + client = self._make_one(project=self.PROJECT, credentials=creds, + _http=http) conn = client._connection = _Connection(RESOURCE) query_parameters = [ScalarQueryParameter('foo', 'INT64', 123)] config = QueryJobConfig() @@ -1918,17 +1831,13 @@ def test_query_w_query_parameters(self): }) def test_create_rows_wo_schema(self): - from google.cloud.bigquery.dataset import DatasetReference from google.cloud.bigquery.table import Table, _TABLE_HAS_NO_SCHEMA - PROJECT = 'PROJECT' - DS_ID = 'DS_ID' - TABLE_ID = 'TABLE_ID' creds = _make_credentials() http = object() - client = self._make_one(project=PROJECT, credentials=creds, _http=http) - table_ref = DatasetReference(PROJECT, DS_ID).table(TABLE_ID) - table = Table(table_ref) + client = self._make_one(project=self.PROJECT, credentials=creds, + _http=http) + table = Table(self.TABLE_REF) ROWS = [ ('Phred Phlyntstone', 32), ('Bharney Rhubble', 33), @@ -1947,27 +1856,23 @@ def test_create_rows_w_schema(self): from google.cloud._helpers import _datetime_to_rfc3339 from google.cloud._helpers import _microseconds_from_datetime from google.cloud.bigquery.table import Table, SchemaField - from google.cloud.bigquery.dataset import DatasetReference - PROJECT = 'PROJECT' - DS_ID = 'DS_ID' - TABLE_ID = 'TABLE_ID' WHEN_TS = 1437767599.006 WHEN = datetime.datetime.utcfromtimestamp(WHEN_TS).replace( tzinfo=UTC) PATH = 'projects/%s/datasets/%s/tables/%s/insertAll' % ( - PROJECT, DS_ID, TABLE_ID) + self.PROJECT, self.DS_ID, self.TABLE_ID) creds = _make_credentials() http = object() - client = self._make_one(project=PROJECT, credentials=creds, _http=http) + client = self._make_one(project=self.PROJECT, credentials=creds, + _http=http) conn = client._connection = _Connection({}) - table_ref = DatasetReference(PROJECT, DS_ID).table(TABLE_ID) schema = [ SchemaField('full_name', 'STRING', mode='REQUIRED'), SchemaField('age', 'INTEGER', mode='REQUIRED'), SchemaField('joined', 'TIMESTAMP', mode='NULLABLE'), ] - table = Table(table_ref, schema=schema) + table = Table(self.TABLE_REF, schema=schema) ROWS = [ ('Phred Phlyntstone', 32, _datetime_to_rfc3339(WHEN)), ('Bharney Rhubble', 33, WHEN + datetime.timedelta(seconds=1)), @@ -2002,27 +1907,23 @@ def test_create_rows_w_list_of_dictionaries(self): from google.cloud._helpers import _datetime_to_rfc3339 from google.cloud._helpers import _microseconds_from_datetime from google.cloud.bigquery.table import Table, SchemaField - from google.cloud.bigquery.dataset import DatasetReference - PROJECT = 'PROJECT' - DS_ID = 'DS_ID' - TABLE_ID = 'TABLE_ID' WHEN_TS = 1437767599.006 WHEN = datetime.datetime.utcfromtimestamp(WHEN_TS).replace( tzinfo=UTC) PATH = 'projects/%s/datasets/%s/tables/%s/insertAll' % ( - PROJECT, DS_ID, TABLE_ID) + self.PROJECT, self.DS_ID, self.TABLE_ID) creds = _make_credentials() http = object() - client = self._make_one(project=PROJECT, credentials=creds, _http=http) + client = self._make_one(project=self.PROJECT, credentials=creds, + _http=http) conn = client._connection = _Connection({}) - table_ref = DatasetReference(PROJECT, DS_ID).table(TABLE_ID) schema = [ SchemaField('full_name', 'STRING', mode='REQUIRED'), SchemaField('age', 'INTEGER', mode='REQUIRED'), SchemaField('joined', 'TIMESTAMP', mode='NULLABLE'), ] - table = Table(table_ref, schema=schema) + table = Table(self.TABLE_REF, schema=schema) ROWS = [ { 'full_name': 'Phred Phlyntstone', 'age': 32, @@ -2064,23 +1965,19 @@ def _row_data(row): def test_create_rows_w_list_of_Rows(self): from google.cloud.bigquery._helpers import Row from google.cloud.bigquery.table import Table, SchemaField - from google.cloud.bigquery.dataset import DatasetReference - PROJECT = 'PROJECT' - DS_ID = 'DS_ID' - TABLE_ID = 'TABLE_ID' PATH = 'projects/%s/datasets/%s/tables/%s/insertAll' % ( - PROJECT, DS_ID, TABLE_ID) + self.PROJECT, self.DS_ID, self.TABLE_ID) creds = _make_credentials() http = object() - client = self._make_one(project=PROJECT, credentials=creds, _http=http) + client = self._make_one(project=self.PROJECT, credentials=creds, + _http=http) conn = client._connection = _Connection({}) - table_ref = DatasetReference(PROJECT, DS_ID).table(TABLE_ID) schema = [ SchemaField('full_name', 'STRING', mode='REQUIRED'), SchemaField('age', 'INTEGER', mode='REQUIRED'), ] - table = Table(table_ref, schema=schema) + table = Table(self.TABLE_REF, schema=schema) f2i = {'full_name': 0, 'age': 1} ROWS = [ Row(('Phred Phlyntstone', 32), f2i), @@ -2107,13 +2004,9 @@ def _row_data(row): def test_create_rows_w_skip_invalid_and_ignore_unknown(self): from google.cloud.bigquery.table import Table, SchemaField - from google.cloud.bigquery.dataset import DatasetReference - PROJECT = 'PROJECT' - DS_ID = 'DS_ID' - TABLE_ID = 'TABLE_ID' PATH = 'projects/%s/datasets/%s/tables/%s/insertAll' % ( - PROJECT, DS_ID, TABLE_ID) + self.PROJECT, self.DS_ID, self.TABLE_ID) RESPONSE = { 'insertErrors': [ {'index': 1, @@ -2126,15 +2019,15 @@ def test_create_rows_w_skip_invalid_and_ignore_unknown(self): ]} creds = _make_credentials() http = object() - client = self._make_one(project=PROJECT, credentials=creds, _http=http) + client = self._make_one(project=self.PROJECT, credentials=creds, + _http=http) conn = client._connection = _Connection(RESPONSE) schema = [ SchemaField('full_name', 'STRING', mode='REQUIRED'), SchemaField('age', 'INTEGER', mode='REQUIRED'), SchemaField('voter', 'BOOLEAN', mode='NULLABLE'), ] - table_ref = DatasetReference(PROJECT, DS_ID).table(TABLE_ID) - table = Table(table_ref, schema=schema) + table = Table(self.TABLE_REF, schema=schema) ROWS = [ ('Phred Phlyntstone', 32, True), ('Bharney Rhubble', 33, False), @@ -2179,24 +2072,20 @@ def _row_data(row): def test_create_rows_w_repeated_fields(self): from google.cloud.bigquery.table import Table, SchemaField - from google.cloud.bigquery.dataset import DatasetReference - PROJECT = 'PROJECT' - DS_ID = 'DS_ID' - TABLE_ID = 'TABLE_ID' PATH = 'projects/%s/datasets/%s/tables/%s/insertAll' % ( - PROJECT, DS_ID, TABLE_ID) + self.PROJECT, self.DS_ID, self.TABLE_ID) creds = _make_credentials() http = object() - client = self._make_one(project=PROJECT, credentials=creds, _http=http) + client = self._make_one(project=self.PROJECT, credentials=creds, + _http=http) conn = client._connection = _Connection({}) - table_ref = DatasetReference(PROJECT, DS_ID).table(TABLE_ID) full_name = SchemaField('color', 'STRING', mode='REPEATED') index = SchemaField('index', 'INTEGER', 'REPEATED') score = SchemaField('score', 'FLOAT', 'REPEATED') struct = SchemaField('struct', 'RECORD', mode='REPEATED', fields=[index, score]) - table = Table(table_ref, schema=[full_name, struct]) + table = Table(self.TABLE_REF, schema=[full_name, struct]) ROWS = [ (['red', 'green'], [{'index': [1, 2], 'score': [3.1415, 1.414]}]), ] @@ -2220,18 +2109,14 @@ def _row_data(row): def test_create_rows_w_record_schema(self): from google.cloud.bigquery.table import SchemaField - from google.cloud.bigquery.dataset import DatasetReference - PROJECT = 'PROJECT' - DS_ID = 'DS_ID' - TABLE_ID = 'TABLE_ID' PATH = 'projects/%s/datasets/%s/tables/%s/insertAll' % ( - PROJECT, DS_ID, TABLE_ID) + self.PROJECT, self.DS_ID, self.TABLE_ID) creds = _make_credentials() http = object() - client = self._make_one(project=PROJECT, credentials=creds, _http=http) + client = self._make_one(project=self.PROJECT, credentials=creds, + _http=http) conn = client._connection = _Connection({}) - table_ref = DatasetReference(PROJECT, DS_ID).table(TABLE_ID) full_name = SchemaField('full_name', 'STRING', mode='REQUIRED') area_code = SchemaField('area_code', 'STRING', 'REQUIRED') local_number = SchemaField('local_number', 'STRING', 'REQUIRED') @@ -2256,7 +2141,7 @@ def _row_data(row): 'rows': [{'json': _row_data(row)} for row in ROWS], } - errors = client.create_rows(table_ref, ROWS, + errors = client.create_rows(self.TABLE_REF, ROWS, selected_fields=[full_name, phone]) self.assertEqual(len(errors), 0) @@ -2267,12 +2152,8 @@ def _row_data(row): self.assertEqual(req['data'], SENT) def test_create_rows_errors(self): - from google.cloud.bigquery.dataset import DatasetReference from google.cloud.bigquery.table import Table - PROJECT = 'PROJECT' - DS_ID = 'DS_ID' - TABLE_ID = 'TABLE_ID' ROWS = [ ('Phred Phlyntstone', 32, True), ('Bharney Rhubble', 33, False), @@ -2281,16 +2162,16 @@ def test_create_rows_errors(self): ] creds = _make_credentials() http = object() - client = self._make_one(project=PROJECT, credentials=creds, _http=http) - table_ref = DatasetReference(PROJECT, DS_ID).table(TABLE_ID) + client = self._make_one(project=self.PROJECT, credentials=creds, + _http=http) # table ref with no selected fields with self.assertRaises(ValueError): - client.create_rows(table_ref, ROWS) + client.create_rows(self.TABLE_REF, ROWS) # table with no schema with self.assertRaises(ValueError): - client.create_rows(Table(table_ref), ROWS) + client.create_rows(Table(self.TABLE_REF), ROWS) # neither Table nor tableReference with self.assertRaises(TypeError): @@ -2301,18 +2182,17 @@ def test_query_rows_defaults(self): from google.cloud.bigquery._helpers import Row JOB = 'job-id' - PROJECT = 'PROJECT' QUERY = 'SELECT COUNT(*) FROM persons' RESOURCE = { 'jobReference': { - 'projectId': PROJECT, + 'projectId': self.PROJECT, 'jobId': JOB, }, 'configuration': { 'query': { 'query': QUERY, 'destinationTable': { - 'projectId': PROJECT, + 'projectId': self.PROJECT, 'datasetId': '_temp_dataset', 'tableId': '_temp_table', }, @@ -2345,7 +2225,8 @@ def test_query_rows_defaults(self): del LAST_PAGE['pageToken'] creds = _make_credentials() http = object() - client = self._make_one(project=PROJECT, credentials=creds, _http=http) + client = self._make_one(project=self.PROJECT, credentials=creds, + _http=http) conn = client._connection = _Connection( RESOURCE, RESULTS_RESOURCE, FIRST_PAGE, LAST_PAGE) @@ -2366,18 +2247,17 @@ def test_query_rows_w_job_id(self): from google.api.core.page_iterator import HTTPIterator JOB = 'job-id' - PROJECT = 'PROJECT' QUERY = 'SELECT COUNT(*) FROM persons' RESOURCE = { 'jobReference': { - 'projectId': PROJECT, + 'projectId': self.PROJECT, 'jobId': JOB, }, 'configuration': { 'query': { 'query': QUERY, 'destinationTable': { - 'projectId': PROJECT, + 'projectId': self.PROJECT, 'datasetId': '_temp_dataset', 'tableId': '_temp_table', }, @@ -2399,7 +2279,8 @@ def test_query_rows_w_job_id(self): } creds = _make_credentials() http = object() - client = self._make_one(project=PROJECT, credentials=creds, _http=http) + client = self._make_one(project=self.PROJECT, credentials=creds, + _http=http) conn = client._connection = _Connection( RESOURCE, RESULTS_RESOURCE, RESULTS_RESOURCE) @@ -2420,11 +2301,10 @@ def test_query_rows_w_job_config(self): from google.api.core.page_iterator import HTTPIterator JOB = 'job-id' - PROJECT = 'PROJECT' QUERY = 'SELECT COUNT(*) FROM persons' RESOURCE = { 'jobReference': { - 'projectId': PROJECT, + 'projectId': self.PROJECT, 'jobId': JOB, }, 'configuration': { @@ -2432,7 +2312,7 @@ def test_query_rows_w_job_config(self): 'query': QUERY, 'useLegacySql': True, 'destinationTable': { - 'projectId': PROJECT, + 'projectId': self.PROJECT, 'datasetId': '_temp_dataset', 'tableId': '_temp_table', }, @@ -2455,7 +2335,8 @@ def test_query_rows_w_job_config(self): } creds = _make_credentials() http = object() - client = self._make_one(project=PROJECT, credentials=creds, _http=http) + client = self._make_one(project=self.PROJECT, credentials=creds, + _http=http) conn = client._connection = _Connection( RESOURCE, RESULTS_RESOURCE, RESULTS_RESOURCE) @@ -2477,15 +2358,11 @@ def test_query_rows_w_job_config(self): def test_list_rows(self): import datetime from google.cloud._helpers import UTC - from google.cloud.bigquery.dataset import DatasetReference from google.cloud.bigquery.table import Table, SchemaField from google.cloud.bigquery._helpers import Row - PROJECT = 'PROJECT' - DS_ID = 'DS_ID' - TABLE_ID = 'TABLE_ID' PATH = 'projects/%s/datasets/%s/tables/%s/data' % ( - PROJECT, DS_ID, TABLE_ID) + self.PROJECT, self.DS_ID, self.TABLE_ID) WHEN_TS = 1437767599.006 WHEN = datetime.datetime.utcfromtimestamp(WHEN_TS).replace( tzinfo=UTC) @@ -2526,13 +2403,13 @@ def _bigquery_timestamp_float_repr(ts_float): } creds = _make_credentials() http = object() - client = self._make_one(project=PROJECT, credentials=creds, _http=http) + client = self._make_one(project=self.PROJECT, credentials=creds, + _http=http) conn = client._connection = _Connection(DATA, DATA) - table_ref = DatasetReference(PROJECT, DS_ID).table(TABLE_ID) full_name = SchemaField('full_name', 'STRING', mode='REQUIRED') age = SchemaField('age', 'INTEGER', mode='NULLABLE') joined = SchemaField('joined', 'TIMESTAMP', mode='NULLABLE') - table = Table(table_ref, schema=[full_name, age, joined]) + table = Table(self.TABLE_REF, schema=[full_name, age, joined]) iterator = client.list_rows(table) page = six.next(iterator.pages) @@ -2556,17 +2433,13 @@ def _bigquery_timestamp_float_repr(ts_float): self.assertEqual(req['query_params'], {}) def test_list_rows_query_params(self): - from google.cloud.bigquery.dataset import DatasetReference from google.cloud.bigquery.table import Table, SchemaField - PROJECT = 'PROJECT' - DS_ID = 'DS_ID' - TABLE_ID = 'TABLE_ID' creds = _make_credentials() http = object() - client = self._make_one(project=PROJECT, credentials=creds, _http=http) - table_ref = DatasetReference(PROJECT, DS_ID).table(TABLE_ID) - table = Table(table_ref, + client = self._make_one(project=self.PROJECT, credentials=creds, + _http=http) + table = Table(self.TABLE_REF, schema=[SchemaField('age', 'INTEGER', mode='NULLABLE')]) tests = [ ({}, {}), @@ -2584,14 +2457,10 @@ def test_list_rows_query_params(self): 'for kwargs %s' % test[0]) def test_list_rows_repeated_fields(self): - from google.cloud.bigquery.dataset import DatasetReference from google.cloud.bigquery.table import SchemaField - PROJECT = 'PROJECT' - DS_ID = 'DS_ID' - TABLE_ID = 'TABLE_ID' PATH = 'projects/%s/datasets/%s/tables/%s/data' % ( - PROJECT, DS_ID, TABLE_ID) + self.PROJECT, self.DS_ID, self.TABLE_ID) ROWS = 1234 TOKEN = 'TOKEN' DATA = { @@ -2612,16 +2481,17 @@ def test_list_rows_repeated_fields(self): } creds = _make_credentials() http = object() - client = self._make_one(project=PROJECT, credentials=creds, _http=http) + client = self._make_one(project=self.PROJECT, credentials=creds, + _http=http) conn = client._connection = _Connection(DATA) - table_ref = DatasetReference(PROJECT, DS_ID).table(TABLE_ID) color = SchemaField('color', 'STRING', mode='REPEATED') index = SchemaField('index', 'INTEGER', 'REPEATED') score = SchemaField('score', 'FLOAT', 'REPEATED') struct = SchemaField('struct', 'RECORD', mode='REPEATED', fields=[index, score]) - iterator = client.list_rows(table_ref, selected_fields=[color, struct]) + iterator = client.list_rows(self.TABLE_REF, + selected_fields=[color, struct]) page = six.next(iterator.pages) rows = list(page) total_rows = iterator.total_rows @@ -2640,14 +2510,10 @@ def test_list_rows_repeated_fields(self): self.assertEqual(req['path'], '/%s' % PATH) def test_list_rows_w_record_schema(self): - from google.cloud.bigquery.dataset import DatasetReference from google.cloud.bigquery.table import Table, SchemaField - PROJECT = 'PROJECT' - DS_ID = 'DS_ID' - TABLE_ID = 'TABLE_ID' PATH = 'projects/%s/datasets/%s/tables/%s/data' % ( - PROJECT, DS_ID, TABLE_ID) + self.PROJECT, self.DS_ID, self.TABLE_ID) ROWS = 1234 TOKEN = 'TOKEN' DATA = { @@ -2670,16 +2536,16 @@ def test_list_rows_w_record_schema(self): } creds = _make_credentials() http = object() - client = self._make_one(project=PROJECT, credentials=creds, _http=http) + client = self._make_one(project=self.PROJECT, credentials=creds, + _http=http) conn = client._connection = _Connection(DATA) - table_ref = DatasetReference(PROJECT, DS_ID).table(TABLE_ID) full_name = SchemaField('full_name', 'STRING', mode='REQUIRED') area_code = SchemaField('area_code', 'STRING', 'REQUIRED') local_number = SchemaField('local_number', 'STRING', 'REQUIRED') rank = SchemaField('rank', 'INTEGER', 'REQUIRED') phone = SchemaField('phone', 'RECORD', mode='NULLABLE', fields=[area_code, local_number, rank]) - table = Table(table_ref, schema=[full_name, phone]) + table = Table(self.TABLE_REF, schema=[full_name, phone]) iterator = client.list_rows(table) page = six.next(iterator.pages) @@ -2707,42 +2573,36 @@ def test_list_rows_w_record_schema(self): self.assertEqual(req['path'], '/%s' % PATH) def test_list_rows_errors(self): - from google.cloud.bigquery.dataset import DatasetReference from google.cloud.bigquery.table import Table - PROJECT = 'PROJECT' - DS_ID = 'DS_ID' - TABLE_ID = 'TABLE_ID' - creds = _make_credentials() http = object() - client = self._make_one(project=PROJECT, credentials=creds, _http=http) - table_ref = DatasetReference(PROJECT, DS_ID).table(TABLE_ID) + client = self._make_one(project=self.PROJECT, credentials=creds, + _http=http) # table ref with no selected fields with self.assertRaises(ValueError): - client.list_rows(table_ref) + client.list_rows(self.TABLE_REF) # table with no schema with self.assertRaises(ValueError): - client.list_rows(Table(table_ref)) + client.list_rows(Table(self.TABLE_REF)) # neither Table nor tableReference with self.assertRaises(TypeError): client.list_rows(1) def test_list_partitions(self): - PROJECT = 'PROJECT' RESOURCE = { 'jobReference': { - 'projectId': PROJECT, + 'projectId': self.PROJECT, 'jobId': 'JOB_ID', }, 'configuration': { 'query': { 'query': 'q', 'destinationTable': { - 'projectId': PROJECT, + 'projectId': self.PROJECT, 'datasetId': 'DS_ID', 'tableId': 'TABLE_ID', }, @@ -2772,11 +2632,11 @@ def test_list_partitions(self): del FIRST_PAGE['pageToken'] creds = _make_credentials() http = object() - client = self._make_one(project=PROJECT, credentials=creds, _http=http) + client = self._make_one(project=self.PROJECT, credentials=creds, + _http=http) client._connection = _Connection( RESOURCE, RESULTS_RESOURCE, FIRST_PAGE) - table_ref = DatasetReference(PROJECT, 'DS_ID').table('TABLE_ID') - self.assertEqual(client.list_partitions(table_ref), + self.assertEqual(client.list_partitions(self.TABLE_REF), [20160804, 20160805]) From 089726f3b8974265cf0d2a921c038d6e8795d11f Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Thu, 12 Oct 2017 10:13:40 -0700 Subject: [PATCH 0315/2016] BQ: use random job ID for system tests. (#4163) --- packages/google-cloud-bigquery/tests/system.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/packages/google-cloud-bigquery/tests/system.py b/packages/google-cloud-bigquery/tests/system.py index 61397d3d80cf..2c62e5efa9b0 100644 --- a/packages/google-cloud-bigquery/tests/system.py +++ b/packages/google-cloud-bigquery/tests/system.py @@ -20,6 +20,7 @@ import os import time import unittest +import uuid import six @@ -688,7 +689,7 @@ def test_copy_table(self): def test_job_cancel(self): DATASET_ID = _make_dataset_id('job_cancel') - JOB_ID = 'fetch_' + DATASET_ID + JOB_ID = 'fetch_' + DATASET_ID + str(uuid.uuid4()) TABLE_NAME = 'test_table' QUERY = 'SELECT * FROM %s.%s' % (DATASET_ID, TABLE_NAME) @@ -924,7 +925,7 @@ def test_query_w_dml(self): query_job = Config.CLIENT.query( query_template.format(dataset_name, table_name), - job_id='test_query_w_dml_{}'.format(unique_resource_id())) + job_id='test_query_w_dml_{}'.format(str(uuid.uuid4()))) query_job.result() self.assertEqual(query_job.num_dml_affected_rows, 1) @@ -940,7 +941,7 @@ def test_dbapi_w_dml(self): Config.CURSOR.execute( query_template.format(dataset_name, table_name), - job_id='test_dbapi_w_dml_{}'.format(unique_resource_id())) + job_id='test_dbapi_w_dml_{}'.format(str(uuid.uuid4()))) self.assertEqual(Config.CURSOR.rowcount, 1) self.assertIsNone(Config.CURSOR.fetchone()) @@ -1112,7 +1113,7 @@ def test_query_w_query_params(self): example['sql'], job_config=jconfig, job_id='test_query_w_query_params{}'.format( - unique_resource_id())) + str(uuid.uuid4()))) rows = list(query_job.result()) self.assertEqual(len(rows), 1) self.assertEqual(len(rows[0]), 1) From 64e5e5eb50d2bf5c65b5bf3f483022fe28bf86b4 Mon Sep 17 00:00:00 2001 From: Jonathan Amsterdam Date: Thu, 12 Oct 2017 14:31:04 -0400 Subject: [PATCH 0316/2016] bigquery: generate row IDs in create_rows (#4173) If the user doesn't provide row IDs, create unique IDs for them. --- .../google/cloud/bigquery/client.py | 6 ++- .../tests/unit/test_client.py | 42 ++++++++++++++----- 2 files changed, 35 insertions(+), 13 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py index 488b409ff77c..6a312bff4514 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py @@ -860,7 +860,7 @@ def create_rows(self, table, rows, row_ids=None, selected_fields=None, :type row_ids: list of string :param row_ids: (Optional) Unique ids, one per row being inserted. - If not passed, no de-duplication occurs. + If omitted, unique IDs are created. :type selected_fields: list of :class:`SchemaField` :param selected_fields: @@ -923,7 +923,8 @@ def create_rows(self, table, rows, row_ids=None, selected_fields=None, info = {'json': row_info} if row_ids is not None: info['insertId'] = row_ids[index] - + else: + info['insertId'] = str(uuid.uuid4()) rows_info.append(info) if skip_invalid_rows is not None: @@ -935,6 +936,7 @@ def create_rows(self, table, rows, row_ids=None, selected_fields=None, if template_suffix is not None: data['templateSuffix'] = template_suffix + # TODO(jba): use self._call_api here after #4148 is merged. response = self._connection.api_request( method='POST', path='%s/insertAll' % table.path, diff --git a/packages/google-cloud-bigquery/tests/unit/test_client.py b/packages/google-cloud-bigquery/tests/unit/test_client.py index 22df27c6358c..bb5517207ffc 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_client.py +++ b/packages/google-cloud-bigquery/tests/unit/test_client.py @@ -1889,10 +1889,14 @@ def _row_data(row): 'joined': joined} SENT = { - 'rows': [{'json': _row_data(row)} for row in ROWS], + 'rows': [{ + 'json': _row_data(row), + 'insertId': str(i), + } for i, row in enumerate(ROWS)], } - errors = client.create_rows(table, ROWS) + with mock.patch('uuid.uuid4', side_effect=map(str, range(len(ROWS)))): + errors = client.create_rows(table, ROWS) self.assertEqual(len(errors), 0) self.assertEqual(len(conn._requested), 1) @@ -1950,10 +1954,14 @@ def _row_data(row): return row SENT = { - 'rows': [{'json': _row_data(row)} for row in ROWS], + 'rows': [{ + 'json': _row_data(row), + 'insertId': str(i), + } for i, row in enumerate(ROWS)], } - errors = client.create_rows(table, ROWS) + with mock.patch('uuid.uuid4', side_effect=map(str, range(len(ROWS)))): + errors = client.create_rows(table, ROWS) self.assertEqual(len(errors), 0) self.assertEqual(len(conn._requested), 1) @@ -1990,10 +1998,14 @@ def _row_data(row): return {'full_name': row[0], 'age': str(row[1])} SENT = { - 'rows': [{'json': _row_data(row)} for row in ROWS], + 'rows': [{ + 'json': _row_data(row), + 'insertId': str(i), + } for i, row in enumerate(ROWS)], } - errors = client.create_rows(table, ROWS) + with mock.patch('uuid.uuid4', side_effect=map(str, range(len(ROWS)))): + errors = client.create_rows(table, ROWS) self.assertEqual(len(errors), 0) self.assertEqual(len(conn._requested), 1) @@ -2095,10 +2107,14 @@ def _row_data(row): 'struct': row[1]} SENT = { - 'rows': [{'json': _row_data(row)} for row in ROWS], + 'rows': [{ + 'json': _row_data(row), + 'insertId': str(i), + } for i, row in enumerate(ROWS)], } - errors = client.create_rows(table, ROWS) + with mock.patch('uuid.uuid4', side_effect=map(str, range(len(ROWS)))): + errors = client.create_rows(table, ROWS) self.assertEqual(len(errors), 0) self.assertEqual(len(conn._requested), 1) @@ -2138,11 +2154,15 @@ def _row_data(row): 'phone': row[1]} SENT = { - 'rows': [{'json': _row_data(row)} for row in ROWS], + 'rows': [{ + 'json': _row_data(row), + 'insertId': str(i), + } for i, row in enumerate(ROWS)], } - errors = client.create_rows(self.TABLE_REF, ROWS, - selected_fields=[full_name, phone]) + with mock.patch('uuid.uuid4', side_effect=map(str, range(len(ROWS)))): + errors = client.create_rows(self.TABLE_REF, ROWS, + selected_fields=[full_name, phone]) self.assertEqual(len(errors), 0) self.assertEqual(len(conn._requested), 1) From caf1ae6ab21a22baee4b870a90fce17fc18990a7 Mon Sep 17 00:00:00 2001 From: Jonathan Amsterdam Date: Thu, 12 Oct 2017 17:20:39 -0400 Subject: [PATCH 0317/2016] bigquery: retry idempotent RPCs (#4148) Add retry logic to every RPC for which it makes sense. Following the BigQuery team, we ignore the error code and use the "reason" field of the error to determine whether to retry. Outstanding issues: - Resumable upload consists of an initial call to get a URL, followed by posts to that URL. Getting the retry right on that initial call requires modifying the ResumableUpload class. At the same time, the num_retries argument should be removed. - Users can't modify the retry behavior of Job.result(), because PollingFuture.result() does not accept a retry argument. --- .../google/cloud/bigquery/__init__.py | 2 + .../google/cloud/bigquery/_helpers.py | 26 +++ .../google/cloud/bigquery/client.py | 161 +++++++++++++----- .../google/cloud/bigquery/job.py | 60 +++++-- .../tests/unit/test_client.py | 41 ++++- 5 files changed, 228 insertions(+), 62 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/__init__.py b/packages/google-cloud-bigquery/google/cloud/bigquery/__init__.py index 2682ca8ddb6d..545e0cde265a 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/__init__.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/__init__.py @@ -27,6 +27,7 @@ __version__ = get_distribution('google-cloud-bigquery').version from google.cloud.bigquery._helpers import Row +from google.cloud.bigquery._helpers import DEFAULT_RETRY from google.cloud.bigquery.client import Client from google.cloud.bigquery.dataset import AccessEntry from google.cloud.bigquery.dataset import Dataset @@ -61,4 +62,5 @@ 'Table', 'TableReference', 'UDFResource', + 'DEFAULT_RETRY', ] diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py b/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py index 0ee7a9c01c6a..d4230f9ff4f6 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py @@ -20,6 +20,7 @@ import six +from google.api.core import retry from google.cloud._helpers import UTC from google.cloud._helpers import _date_from_iso8601_date from google.cloud._helpers import _datetime_from_microseconds @@ -520,3 +521,28 @@ def _rows_page_start(iterator, page, response): total_rows = int(total_rows) iterator.total_rows = total_rows # pylint: enable=unused-argument + + +def _should_retry(exc): + """Predicate for determining when to retry. + + We retry if and only if the 'reason' is 'backendError' + or 'rateLimitExceeded'. + """ + if not hasattr(exc, 'errors'): + return False + if len(exc.errors) == 0: + return False + reason = exc.errors[0]['reason'] + return reason == 'backendError' or reason == 'rateLimitExceeded' + + +DEFAULT_RETRY = retry.Retry(predicate=_should_retry) +"""The default retry object. + +Any method with a ``retry`` parameter will be retried automatically, +with reasonable defaults. To disable retry, pass ``retry=None``. +To modify the default retry behavior, call a ``with_XXX`` method +on ``DEFAULT_RETRY``. For example, to change the deadline to 30 seconds, +pass ``retry=bigquery.DEFAULT_RETRY.with_deadline(30)``. +""" diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py index 6a312bff4514..c52fbbda1ba8 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py @@ -17,6 +17,7 @@ from __future__ import absolute_import import collections +import functools import os import uuid @@ -27,6 +28,7 @@ from google.resumable_media.requests import ResumableUpload from google.api.core import page_iterator + from google.cloud import exceptions from google.cloud.client import ClientWithProject from google.cloud.bigquery._http import Connection @@ -44,6 +46,7 @@ from google.cloud.bigquery._helpers import _rows_page_start from google.cloud.bigquery._helpers import _field_to_index_mapping from google.cloud.bigquery._helpers import _SCALAR_VALUE_TO_JSON_ROW +from google.cloud.bigquery._helpers import DEFAULT_RETRY _DEFAULT_CHUNKSIZE = 1048576 # 1024 * 1024 B = 1 MB @@ -117,7 +120,8 @@ def __init__(self, project=None, credentials=None, _http=None): project=project, credentials=credentials, _http=_http) self._connection = Connection(self) - def list_projects(self, max_results=None, page_token=None): + def list_projects(self, max_results=None, page_token=None, + retry=DEFAULT_RETRY): """List projects for the project associated with this client. See @@ -132,13 +136,16 @@ def list_projects(self, max_results=None, page_token=None): not passed, the API will return the first page of projects. + :type retry: :class:`google.api.core.retry.Retry` + :param retry: (Optional) How to retry the RPC. + :rtype: :class:`~google.api.core.page_iterator.Iterator` :returns: Iterator of :class:`~google.cloud.bigquery.client.Project` accessible to the current client. """ return page_iterator.HTTPIterator( client=self, - api_request=self._connection.api_request, + api_request=functools.partial(self._call_api, retry), path='/projects', item_to_value=_item_to_project, items_key='projects', @@ -146,7 +153,7 @@ def list_projects(self, max_results=None, page_token=None): max_results=max_results) def list_datasets(self, include_all=False, max_results=None, - page_token=None): + page_token=None, retry=DEFAULT_RETRY): """List datasets for the project associated with this client. See @@ -164,6 +171,9 @@ def list_datasets(self, include_all=False, max_results=None, not passed, the API will return the first page of datasets. + :type retry: :class:`google.api.core.retry.Retry` + :param retry: (Optional) How to retry the RPC. + :rtype: :class:`~google.api.core.page_iterator.Iterator` :returns: Iterator of :class:`~google.cloud.bigquery.dataset.Dataset`. accessible to the current client. @@ -174,7 +184,7 @@ def list_datasets(self, include_all=False, max_results=None, path = '/projects/%s/datasets' % (self.project,) return page_iterator.HTTPIterator( client=self, - api_request=self._connection.api_request, + api_request=functools.partial(self._call_api, retry), path=path, item_to_value=_item_to_dataset, items_key='datasets', @@ -241,35 +251,47 @@ def create_table(self, table): method='POST', path=path, data=resource) return Table.from_api_repr(api_response) - def get_dataset(self, dataset_ref): + def _call_api(self, retry, **kwargs): + call = functools.partial(self._connection.api_request, **kwargs) + if retry: + call = retry(call) + return call() + + def get_dataset(self, dataset_ref, retry=DEFAULT_RETRY): """Fetch the dataset referenced by ``dataset_ref`` :type dataset_ref: :class:`google.cloud.bigquery.dataset.DatasetReference` :param dataset_ref: the dataset to use. + :type retry: :class:`google.api.core.retry.Retry` + :param retry: (Optional) How to retry the RPC. + :rtype: :class:`google.cloud.bigquery.dataset.Dataset` :returns: a ``Dataset`` instance """ - api_response = self._connection.api_request( - method='GET', path=dataset_ref.path) + api_response = self._call_api(retry, + method='GET', + path=dataset_ref.path) return Dataset.from_api_repr(api_response) - def get_table(self, table_ref): + def get_table(self, table_ref, retry=DEFAULT_RETRY): """Fetch the table referenced by ``table_ref`` :type table_ref: :class:`google.cloud.bigquery.table.TableReference` :param table_ref: the table to use. + :type retry: :class:`google.api.core.retry.Retry` + :param retry: (Optional) How to retry the RPC. + :rtype: :class:`google.cloud.bigquery.table.Table` :returns: a ``Table`` instance """ - api_response = self._connection.api_request( - method='GET', path=table_ref.path) + api_response = self._call_api(retry, method='GET', path=table_ref.path) return Table.from_api_repr(api_response) - def update_dataset(self, dataset, fields): + def update_dataset(self, dataset, fields, retry=DEFAULT_RETRY): """Change some fields of a dataset. Use ``fields`` to specify which fields to update. At least one field @@ -290,6 +312,9 @@ def update_dataset(self, dataset, fields): :param fields: the fields of ``dataset`` to change, spelled as the Dataset properties (e.g. "friendly_name"). + :type retry: :class:`google.api.core.retry.Retry` + :param retry: (Optional) How to retry the RPC. + :rtype: :class:`google.cloud.bigquery.dataset.Dataset` :returns: the modified ``Dataset`` instance """ @@ -307,11 +332,11 @@ def update_dataset(self, dataset, fields): headers = {'If-Match': dataset.etag} else: headers = None - api_response = self._connection.api_request( - method='PATCH', path=path, data=partial, headers=headers) + api_response = self._call_api( + retry, method='PATCH', path=path, data=partial, headers=headers) return Dataset.from_api_repr(api_response) - def update_table(self, table, properties): + def update_table(self, table, properties, retry=DEFAULT_RETRY): """API call: update table properties via a PUT request See @@ -321,6 +346,9 @@ def update_table(self, table, properties): :class:`google.cloud.bigquery.table.Table` :param table_ref: the table to update. + :type retry: :class:`google.api.core.retry.Retry` + :param retry: (Optional) How to retry the RPC. + :rtype: :class:`google.cloud.bigquery.table.Table` :returns: a ``Table`` instance """ @@ -329,11 +357,13 @@ def update_table(self, table, properties): headers = {'If-Match': table.etag} else: headers = None - api_response = self._connection.api_request( + api_response = self._call_api( + retry, method='PATCH', path=table.path, data=partial, headers=headers) return Table.from_api_repr(api_response) - def list_dataset_tables(self, dataset, max_results=None, page_token=None): + def list_dataset_tables(self, dataset, max_results=None, page_token=None, + retry=DEFAULT_RETRY): """List tables in the dataset. See @@ -353,6 +383,9 @@ def list_dataset_tables(self, dataset, max_results=None, page_token=None): datasets. If not passed, the API will return the first page of datasets. + :type retry: :class:`google.api.core.retry.Retry` + :param retry: (Optional) How to retry the RPC. + :rtype: :class:`~google.api.core.page_iterator.Iterator` :returns: Iterator of :class:`~google.cloud.bigquery.table.Table` contained within the current dataset. @@ -362,7 +395,7 @@ def list_dataset_tables(self, dataset, max_results=None, page_token=None): path = '%s/tables' % dataset.path result = page_iterator.HTTPIterator( client=self, - api_request=self._connection.api_request, + api_request=functools.partial(self._call_api, retry), path=path, item_to_value=_item_to_table, items_key='tables', @@ -371,7 +404,7 @@ def list_dataset_tables(self, dataset, max_results=None, page_token=None): result.dataset = dataset return result - def delete_dataset(self, dataset): + def delete_dataset(self, dataset, retry=DEFAULT_RETRY): """Delete a dataset. See @@ -381,13 +414,16 @@ def delete_dataset(self, dataset): :class:`~google.cloud.bigquery.dataset.Dataset` :class:`~google.cloud.bigquery.dataset.DatasetReference` + :type retry: :class:`google.api.core.retry.Retry` + :param retry: (Optional) How to retry the RPC. + :param dataset: the dataset to delete, or a reference to it. """ if not isinstance(dataset, (Dataset, DatasetReference)): raise TypeError('dataset must be a Dataset or a DatasetReference') - self._connection.api_request(method='DELETE', path=dataset.path) + self._call_api(retry, method='DELETE', path=dataset.path) - def delete_table(self, table): + def delete_table(self, table, retry=DEFAULT_RETRY): """Delete a table See @@ -397,17 +433,23 @@ def delete_table(self, table): :class:`~google.cloud.bigquery.table.Table` :class:`~google.cloud.bigquery.table.TableReference` :param table: the table to delete, or a reference to it. + + :type retry: :class:`google.api.core.retry.Retry` + :param retry: (Optional) How to retry the RPC. """ if not isinstance(table, (Table, TableReference)): raise TypeError('table must be a Table or a TableReference') - self._connection.api_request(method='DELETE', path=table.path) + self._call_api(retry, method='DELETE', path=table.path) - def _get_query_results(self, job_id, project=None, timeout_ms=None): + def _get_query_results(self, job_id, retry, project=None, timeout_ms=None): """Get the query results object for a query job. :type job_id: str :param job_id: Name of the query job. + :type retry: :class:`google.api.core.retry.Retry` + :param retry: (Optional) How to retry the RPC. + :type project: str :param project: (Optional) project ID for the query job (defaults to the project of @@ -432,9 +474,11 @@ def _get_query_results(self, job_id, project=None, timeout_ms=None): path = '/projects/{}/queries/{}'.format(project, job_id) - resource = self._connection.api_request( - method='GET', path=path, query_params=extra_params) - + # This call is typically made in a polling loop that checks whether the + # job is complete (from QueryJob.done(), called ultimately from + # QueryJob.result()). So we don't need to poll here. + resource = self._call_api( + retry, method='GET', path=path, query_params=extra_params) return QueryResults.from_api_repr(resource) def job_from_resource(self, resource): @@ -462,7 +506,7 @@ def job_from_resource(self, resource): return QueryJob.from_api_repr(resource, self) raise ValueError('Cannot parse job resource') - def get_job(self, job_id, project=None): + def get_job(self, job_id, project=None, retry=DEFAULT_RETRY): """Fetch a job for the project associated with this client. See @@ -475,6 +519,9 @@ def get_job(self, job_id, project=None): :param project: project ID owning the job (defaults to the client's project) + :type retry: :class:`google.api.core.retry.Retry` + :param retry: (Optional) How to retry the RPC. + :rtype: :class:`~google.cloud.bigquery.job._AsyncJob` :returns: Concrete job instance, based on the resource returned by the API. @@ -486,13 +533,13 @@ def get_job(self, job_id, project=None): path = '/projects/{}/jobs/{}'.format(project, job_id) - resource = self._connection.api_request( - method='GET', path=path, query_params=extra_params) + resource = self._call_api( + retry, method='GET', path=path, query_params=extra_params) return self.job_from_resource(resource) def list_jobs(self, max_results=None, page_token=None, all_users=None, - state_filter=None): + state_filter=None, retry=DEFAULT_RETRY): """List jobs for the project associated with this client. See @@ -519,6 +566,9 @@ def list_jobs(self, max_results=None, page_token=None, all_users=None, * ``"pending"`` * ``"running"`` + :type retry: :class:`google.api.core.retry.Retry` + :param retry: (Optional) How to retry the RPC. + :rtype: :class:`~google.api.core.page_iterator.Iterator` :returns: Iterable of job instances. """ @@ -533,7 +583,7 @@ def list_jobs(self, max_results=None, page_token=None, all_users=None, path = '/projects/%s/jobs' % (self.project,) return page_iterator.HTTPIterator( client=self, - api_request=self._connection.api_request, + api_request=functools.partial(self._call_api, retry), path=path, item_to_value=_item_to_job, items_key='jobs', @@ -542,7 +592,8 @@ def list_jobs(self, max_results=None, page_token=None, all_users=None, extra_params=extra_params) def load_table_from_storage(self, source_uris, destination, - job_id=None, job_config=None): + job_id=None, job_config=None, + retry=DEFAULT_RETRY): """Starts a job for loading data into a table from CloudStorage. See @@ -563,6 +614,9 @@ def load_table_from_storage(self, source_uris, destination, :type job_config: :class:`google.cloud.bigquery.job.LoadJobConfig` :param job_config: (Optional) Extra configuration options for the job. + :type retry: :class:`google.api.core.retry.Retry` + :param retry: (Optional) How to retry the RPC. + :rtype: :class:`google.cloud.bigquery.job.LoadJob` :returns: a new ``LoadJob`` instance """ @@ -570,7 +624,7 @@ def load_table_from_storage(self, source_uris, destination, if isinstance(source_uris, six.string_types): source_uris = [source_uris] job = LoadJob(job_id, source_uris, destination, self, job_config) - job.begin() + job.begin(retry=retry) return job def load_table_from_file(self, file_obj, destination, @@ -683,6 +737,8 @@ def _initiate_resumable_upload(self, stream, metadata, num_retries): transport = self._http headers = _get_upload_headers(self._connection.USER_AGENT) upload_url = _RESUMABLE_URL_TEMPLATE.format(project=self.project) + # TODO: modify ResumableUpload to take a retry.Retry object + # that it can use for the initial RPC. upload = ResumableUpload(upload_url, chunk_size, headers=headers) if num_retries is not None: @@ -738,7 +794,8 @@ def _do_multipart_upload(self, stream, metadata, size, num_retries): return response - def copy_table(self, sources, destination, job_id=None, job_config=None): + def copy_table(self, sources, destination, job_id=None, job_config=None, + retry=DEFAULT_RETRY): """Start a job for copying one or more tables into another table. See @@ -760,6 +817,9 @@ def copy_table(self, sources, destination, job_id=None, job_config=None): :type job_config: :class:`google.cloud.bigquery.job.CopyJobConfig` :param job_config: (Optional) Extra configuration options for the job. + :type retry: :class:`google.api.core.retry.Retry` + :param retry: (Optional) How to retry the RPC. + :rtype: :class:`google.cloud.bigquery.job.CopyJob` :returns: a new ``CopyJob`` instance """ @@ -769,7 +829,7 @@ def copy_table(self, sources, destination, job_id=None, job_config=None): sources = [sources] job = CopyJob(job_id, sources, destination, client=self, job_config=job_config) - job.begin() + job.begin(retry=retry) return job def extract_table(self, source, *destination_uris, **kwargs): @@ -796,20 +856,23 @@ def extract_table(self, source, *destination_uris, **kwargs): * *job_id* (``str``) -- Additional content (Optional) The ID of the job. + * *retry* (:class:`google.api.core.retry.Retry`) + (Optional) How to retry the RPC. :rtype: :class:`google.cloud.bigquery.job.ExtractJob` :returns: a new ``ExtractJob`` instance """ job_config = kwargs.get('job_config') job_id = _make_job_id(kwargs.get('job_id')) + retry = kwargs.get('retry', DEFAULT_RETRY) job = ExtractJob( job_id, source, list(destination_uris), client=self, job_config=job_config) - job.begin() + job.begin(retry=retry) return job - def query(self, query, job_config=None, job_id=None): + def query(self, query, job_config=None, job_id=None, retry=DEFAULT_RETRY): """Start a job that runs a SQL query. See @@ -826,12 +889,15 @@ def query(self, query, job_config=None, job_id=None): :type job_id: str :param job_id: (Optional) ID to use for the query job. + :type retry: :class:`google.api.core.retry.Retry` + :param retry: (Optional) How to retry the RPC. + :rtype: :class:`google.cloud.bigquery.job.QueryJob` :returns: a new ``QueryJob`` instance """ job_id = _make_job_id(job_id) job = QueryJob(job_id, query, client=self, job_config=job_config) - job.begin() + job.begin(retry=retry) return job def create_rows(self, table, rows, row_ids=None, selected_fields=None, @@ -949,7 +1015,8 @@ def create_rows(self, table, rows, row_ids=None, selected_fields=None, return errors - def query_rows(self, query, job_config=None, job_id=None, timeout=None): + def query_rows(self, query, job_config=None, job_id=None, timeout=None, + retry=DEFAULT_RETRY): """Start a query job and wait for the results. See @@ -983,11 +1050,12 @@ def query_rows(self, query, job_config=None, job_id=None, timeout=None): failed or :class:`TimeoutError` if the job did not complete in the given timeout. """ - job = self.query(query, job_config=job_config, job_id=job_id) + job = self.query( + query, job_config=job_config, job_id=job_id, retry=retry) return job.result(timeout=timeout) def list_rows(self, table, selected_fields=None, max_results=None, - page_token=None, start_index=None): + page_token=None, start_index=None, retry=DEFAULT_RETRY): """List the rows of the table. See @@ -1021,6 +1089,9 @@ def list_rows(self, table, selected_fields=None, max_results=None, :param page_token: (Optional) The zero-based index of the starting row to read. + :type retry: :class:`google.api.core.retry.Retry` + :param retry: (Optional) How to retry the RPC. + :rtype: :class:`~google.api.core.page_iterator.Iterator` :returns: Iterator of row data :class:`tuple`s. During each page, the iterator will have the ``total_rows`` attribute set, @@ -1050,7 +1121,7 @@ def list_rows(self, table, selected_fields=None, max_results=None, iterator = page_iterator.HTTPIterator( client=self, - api_request=self._connection.api_request, + api_request=functools.partial(self._call_api, retry), path='%s/data' % (table.path,), item_to_value=_item_to_row, items_key='rows', @@ -1063,7 +1134,7 @@ def list_rows(self, table, selected_fields=None, max_results=None, iterator._field_to_index = _field_to_index_mapping(schema) return iterator - def list_partitions(self, table): + def list_partitions(self, table, retry=DEFAULT_RETRY): """List the partitions in a table. :type table: One of: @@ -1071,6 +1142,9 @@ def list_partitions(self, table): :class:`~google.cloud.bigquery.table.TableReference` :param table: the table to list, or a reference to it. + :type retry: :class:`google.api.core.retry.Retry` + :param retry: (Optional) How to retry the RPC. + :rtype: list :returns: a list of time partitions """ @@ -1079,7 +1153,8 @@ def list_partitions(self, table): rows = self.query_rows( 'SELECT partition_id from [%s:%s.%s$__PARTITIONS_SUMMARY__]' % (table.project, table.dataset_id, table.table_id), - job_config=config) + job_config=config, + retry=retry) return [row[0] for row in rows] diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/job.py b/packages/google-cloud-bigquery/google/cloud/bigquery/job.py index 593b14e41fa1..65da69956369 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/job.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/job.py @@ -38,6 +38,7 @@ from google.cloud.bigquery._helpers import _EnumApiResourceProperty from google.cloud.bigquery._helpers import _ListApiResourceProperty from google.cloud.bigquery._helpers import _TypedApiResourceProperty +from google.cloud.bigquery._helpers import DEFAULT_RETRY _DONE_STATE = 'DONE' _STOPPED_REASON = 'stopped' @@ -383,7 +384,7 @@ def _get_resource_config(cls, resource): config = resource['configuration'][cls._JOB_TYPE] return job_id, config - def begin(self, client=None): + def begin(self, client=None, retry=DEFAULT_RETRY): """API call: begin the job via a POST request See @@ -394,6 +395,9 @@ def begin(self, client=None): :param client: the client to use. If not passed, falls back to the ``client`` stored on the current dataset. + :type retry: :class:`google.api.core.retry.Retry` + :param retry: (Optional) How to retry the RPC. + :raises: :exc:`ValueError` if the job has already begin. """ if self.state is not None: @@ -402,11 +406,14 @@ def begin(self, client=None): client = self._require_client(client) path = '/projects/%s/jobs' % (self.project,) - api_response = client._connection.api_request( + # jobs.insert is idempotent because we ensure that every new + # job has an ID. + api_response = client._call_api( + retry, method='POST', path=path, data=self._build_resource()) self._set_properties(api_response) - def exists(self, client=None): + def exists(self, client=None, retry=DEFAULT_RETRY): """API call: test for the existence of the job via a GET request See @@ -417,20 +424,24 @@ def exists(self, client=None): :param client: the client to use. If not passed, falls back to the ``client`` stored on the current dataset. + :type retry: :class:`google.api.core.retry.Retry` + :param retry: (Optional) How to retry the RPC. + :rtype: bool :returns: Boolean indicating existence of the job. """ client = self._require_client(client) try: - client._connection.api_request(method='GET', path=self.path, - query_params={'fields': 'id'}) + client._call_api(retry, + method='GET', path=self.path, + query_params={'fields': 'id'}) except NotFound: return False else: return True - def reload(self, client=None): + def reload(self, client=None, retry=DEFAULT_RETRY): """API call: refresh job properties via a GET request. See @@ -440,11 +451,13 @@ def reload(self, client=None): ``NoneType`` :param client: the client to use. If not passed, falls back to the ``client`` stored on the current dataset. + + :type retry: :class:`google.api.core.retry.Retry` + :param retry: (Optional) How to retry the RPC. """ client = self._require_client(client) - api_response = client._connection.api_request( - method='GET', path=self.path) + api_response = client._call_api(retry, method='GET', path=self.path) self._set_properties(api_response) def cancel(self, client=None): @@ -494,16 +507,19 @@ def _set_future_result(self): else: self.set_result(self) - def done(self): + def done(self, retry=DEFAULT_RETRY): """Refresh the job and checks if it is complete. + :type retry: :class:`google.api.core.retry.Retry` + :param retry: (Optional) How to retry the RPC. + :rtype: bool :returns: True if the job is complete, False otherwise. """ # Do not refresh is the state is already done, as the job will not # change once complete. if self.state != _DONE_STATE: - self.reload() + self.reload(retry=retry) return self.state == _DONE_STATE def result(self, timeout=None): @@ -522,6 +538,7 @@ def result(self, timeout=None): """ if self.state is None: self.begin() + # TODO: modify PollingFuture so it can pass a retry argument to done(). return super(_AsyncJob, self).result(timeout=timeout) def cancelled(self): @@ -1830,17 +1847,21 @@ def undeclared_query_paramters(self): return parameters - def query_results(self): + def query_results(self, retry=DEFAULT_RETRY): """Construct a QueryResults instance, bound to this job. + :type retry: :class:`google.api.core.retry.Retry` + :param retry: (Optional) How to retry the RPC. + :rtype: :class:`~google.cloud.bigquery.query.QueryResults` :returns: results instance """ if not self._query_results: - self._query_results = self._client._get_query_results(self.job_id) + self._query_results = self._client._get_query_results( + self.job_id, retry) return self._query_results - def done(self): + def done(self, retry=DEFAULT_RETRY): """Refresh the job and checks if it is complete. :rtype: bool @@ -1849,17 +1870,18 @@ def done(self): # Do not refresh is the state is already done, as the job will not # change once complete. if self.state != _DONE_STATE: - self._query_results = self._client._get_query_results(self.job_id) + self._query_results = self._client._get_query_results( + self.job_id, retry) # Only reload the job once we know the query is complete. # This will ensure that fields such as the destination table are # correctly populated. if self._query_results.complete: - self.reload() + self.reload(retry=retry) return self.state == _DONE_STATE - def result(self, timeout=None): + def result(self, timeout=None, retry=DEFAULT_RETRY): """Start the job and wait for it to complete and get the result. :type timeout: int @@ -1867,6 +1889,9 @@ def result(self, timeout=None): How long to wait for job to complete before raising a :class:`TimeoutError`. + :type retry: :class:`google.api.core.retry.Retry` + :param retry: (Optional) How to retry the call that retrieves rows. + :rtype: :class:`~google.api.core.page_iterator.Iterator` :returns: Iterator of row data :class:`tuple`s. During each page, the @@ -1883,7 +1908,8 @@ def result(self, timeout=None): # Return an iterator instead of returning the job. schema = self.query_results().schema dest_table = self.destination - return self._client.list_rows(dest_table, selected_fields=schema) + return self._client.list_rows(dest_table, selected_fields=schema, + retry=retry) class QueryPlanEntryStep(object): diff --git a/packages/google-cloud-bigquery/tests/unit/test_client.py b/packages/google-cloud-bigquery/tests/unit/test_client.py index bb5517207ffc..6a40a65b390a 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_client.py +++ b/packages/google-cloud-bigquery/tests/unit/test_client.py @@ -68,7 +68,7 @@ def test__get_query_results_miss_w_explicit_project_and_timeout(self): with self.assertRaises(NotFound): client._get_query_results( - 'nothere', project='other-project', timeout_ms=500) + 'nothere', None, project='other-project', timeout_ms=500) self.assertEqual(len(conn._requested), 1) req = conn._requested[0] @@ -110,7 +110,7 @@ def test__get_query_results_hit(self): creds = _make_credentials() client = self._make_one(self.PROJECT, creds) client._connection = _Connection(data) - query_results = client._get_query_results(job_id) + query_results = client._get_query_results(job_id, None) self.assertEqual(query_results.total_rows, 10) self.assertTrue(query_results.complete) @@ -274,6 +274,8 @@ def test_dataset_with_default_project(self): self.assertEqual(dataset.project, self.PROJECT) def test_get_dataset(self): + from google.cloud.exceptions import ServerError + path = 'projects/%s/datasets/%s' % (self.PROJECT, self.DS_ID) creds = _make_credentials() http = object() @@ -297,6 +299,39 @@ def test_get_dataset(self): self.assertEqual(req['path'], '/%s' % path) self.assertEqual(dataset.dataset_id, self.DS_ID) + # Test retry. + + # Not a cloud API exception (missing 'errors' field). + client._connection = _Connection(Exception(''), resource) + with self.assertRaises(Exception): + client.get_dataset(dataset_ref) + + # Zero-length errors field. + client._connection = _Connection(ServerError(''), resource) + with self.assertRaises(ServerError): + client.get_dataset(dataset_ref) + + # Non-retryable reason. + client._connection = _Connection( + ServerError('', errors=[{'reason': 'serious'}]), + resource) + with self.assertRaises(ServerError): + client.get_dataset(dataset_ref) + + # Retryable reason, but retry is disabled. + client._connection = _Connection( + ServerError('', errors=[{'reason': 'backendError'}]), + resource) + with self.assertRaises(ServerError): + client.get_dataset(dataset_ref, retry=None) + + # Retryable reason, default retry: success. + client._connection = _Connection( + ServerError('', errors=[{'reason': 'backendError'}]), + resource) + dataset = client.get_dataset(dataset_ref) + self.assertEqual(dataset.dataset_id, self.DS_ID) + def test_create_dataset_minimal(self): from google.cloud.bigquery.dataset import Dataset @@ -2994,4 +3029,6 @@ def api_request(self, **kw): raise NotFound('miss') response, self._responses = self._responses[0], self._responses[1:] + if isinstance(response, Exception): + raise response return response From 38440cbca76ba899a9a3a893b17650eb99047ed5 Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Thu, 12 Oct 2017 15:29:35 -0700 Subject: [PATCH 0318/2016] BQ: use a string or list of string for client.extract_table. (#4177) This will match behavior of copy_table and remove the need for special formatting of the keyword arguments. --- .../google/cloud/bigquery/client.py | 34 +++++++------ .../tests/unit/test_client.py | 51 +++++++++++++++++++ 2 files changed, 70 insertions(+), 15 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py index c52fbbda1ba8..d0ec7953ae1f 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py @@ -832,7 +832,9 @@ def copy_table(self, sources, destination, job_id=None, job_config=None, job.begin(retry=retry) return job - def extract_table(self, source, *destination_uris, **kwargs): + def extract_table( + self, source, destination_uris, job_config=None, job_id=None, + retry=DEFAULT_RETRY): """Start a job to extract a table into Cloud Storage files. See @@ -841,7 +843,9 @@ def extract_table(self, source, *destination_uris, **kwargs): :type source: :class:`google.cloud.bigquery.table.TableReference` :param source: table to be extracted. - :type destination_uris: sequence of string + :type destination_uris: One of: + str or + sequence of str :param destination_uris: URIs of Cloud Storage file(s) into which table data is to be extracted; in format ``gs:///``. @@ -849,25 +853,25 @@ def extract_table(self, source, *destination_uris, **kwargs): :type kwargs: dict :param kwargs: Additional keyword arguments. - :Keyword Arguments: - * *job_config* - (:class:`google.cloud.bigquery.job.ExtractJobConfig`) -- - (Optional) Extra configuration options for the extract job. - * *job_id* (``str``) -- - Additional content - (Optional) The ID of the job. - * *retry* (:class:`google.api.core.retry.Retry`) - (Optional) How to retry the RPC. + :type job_id: str + :param job_id: (Optional) The ID of the job. + + :type job_config: :class:`google.cloud.bigquery.job.ExtractJobConfig` + :param job_config: (Optional) Extra configuration options for the job. + + :type retry: :class:`google.api.core.retry.Retry` + :param retry: (Optional) How to retry the RPC. :rtype: :class:`google.cloud.bigquery.job.ExtractJob` :returns: a new ``ExtractJob`` instance """ - job_config = kwargs.get('job_config') - job_id = _make_job_id(kwargs.get('job_id')) - retry = kwargs.get('retry', DEFAULT_RETRY) + job_id = _make_job_id(job_id) + + if isinstance(destination_uris, six.string_types): + destination_uris = [destination_uris] job = ExtractJob( - job_id, source, list(destination_uris), client=self, + job_id, source, destination_uris, client=self, job_config=job_config) job.begin(retry=retry) return job diff --git a/packages/google-cloud-bigquery/tests/unit/test_client.py b/packages/google-cloud-bigquery/tests/unit/test_client.py index 6a40a65b390a..243a892fdb86 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_client.py +++ b/packages/google-cloud-bigquery/tests/unit/test_client.py @@ -1703,6 +1703,57 @@ def test_extract_table_generated_job_id(self): self.assertEqual(job.source, source) self.assertEqual(list(job.destination_uris), [DESTINATION]) + def test_extract_table_w_destination_uris(self): + from google.cloud.bigquery.job import ExtractJob + + JOB = 'job_id' + SOURCE = 'source_table' + DESTINATION1 = 'gs://bucket_name/object_one' + DESTINATION2 = 'gs://bucket_name/object_two' + RESOURCE = { + 'jobReference': { + 'projectId': self.PROJECT, + 'jobId': JOB, + }, + 'configuration': { + 'extract': { + 'sourceTable': { + 'projectId': self.PROJECT, + 'datasetId': self.DS_ID, + 'tableId': SOURCE, + }, + 'destinationUris': [ + DESTINATION1, + DESTINATION2, + ], + }, + }, + } + creds = _make_credentials() + http = object() + client = self._make_one(project=self.PROJECT, credentials=creds, + _http=http) + conn = client._connection = _Connection(RESOURCE) + dataset = client.dataset(self.DS_ID) + source = dataset.table(SOURCE) + + job = client.extract_table( + source, [DESTINATION1, DESTINATION2], job_id=JOB) + + # Check that extract_table actually starts the job. + self.assertEqual(len(conn._requested), 1) + req = conn._requested[0] + self.assertEqual(req['method'], 'POST') + self.assertEqual(req['path'], '/projects/PROJECT/jobs') + + # Check the job resource. + self.assertIsInstance(job, ExtractJob) + self.assertIs(job._client, client) + self.assertEqual(job.job_id, JOB) + self.assertEqual(job.source, source) + self.assertEqual( + list(job.destination_uris), [DESTINATION1, DESTINATION2]) + def test_query_defaults(self): from google.cloud.bigquery.job import QueryJob From 70e658be29fddeeccccf8378c804dd7bb90815af Mon Sep 17 00:00:00 2001 From: Jonathan Amsterdam Date: Fri, 13 Oct 2017 16:35:08 -0400 Subject: [PATCH 0319/2016] bigquery: retry Client.create_rows (#4178) --- .../google/cloud/bigquery/client.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py index d0ec7953ae1f..2d74c93d7568 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py @@ -906,7 +906,7 @@ def query(self, query, job_config=None, job_id=None, retry=DEFAULT_RETRY): def create_rows(self, table, rows, row_ids=None, selected_fields=None, skip_invalid_rows=None, ignore_unknown_values=None, - template_suffix=None): + template_suffix=None, retry=DEFAULT_RETRY): """API call: insert table data via a POST request See @@ -958,6 +958,9 @@ def create_rows(self, table, rows, row_ids=None, selected_fields=None, on the schema of the template table. See https://cloud.google.com/bigquery/streaming-data-into-bigquery#template-tables + :type retry: :class:`google.api.core.retry.Retry` + :param retry: (Optional) How to retry the RPC. + :rtype: list of mappings :returns: One mapping per row with insert errors: the "index" key identifies the row, and the "errors" key contains a list @@ -1006,8 +1009,9 @@ def create_rows(self, table, rows, row_ids=None, selected_fields=None, if template_suffix is not None: data['templateSuffix'] = template_suffix - # TODO(jba): use self._call_api here after #4148 is merged. - response = self._connection.api_request( + # We can always retry, because every row has an insert ID. + response = self._call_api( + retry, method='POST', path='%s/insertAll' % table.path, data=data) From a698c55f68ea5e31278b11d588f4b97d2b2019fc Mon Sep 17 00:00:00 2001 From: Jonathan Amsterdam Date: Sat, 14 Oct 2017 07:10:15 -0400 Subject: [PATCH 0320/2016] bigquery: classes to support external table config (#4182) * bigquery: classes to support external table config These classes will be used to configure external table definitions for queries (transient external tables) as well as external data configurations for tables (permanent external tables). This PR just establishes the classes. Subsequent PRs will wire them into query jobs and tables. --- .../google/cloud/bigquery/__init__.py | 12 + .../google/cloud/bigquery/external_config.py | 491 ++++++++++++++++++ .../tests/unit/test_external_config.py | 228 ++++++++ 3 files changed, 731 insertions(+) create mode 100644 packages/google-cloud-bigquery/google/cloud/bigquery/external_config.py create mode 100644 packages/google-cloud-bigquery/tests/unit/test_external_config.py diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/__init__.py b/packages/google-cloud-bigquery/google/cloud/bigquery/__init__.py index 545e0cde265a..4c3fcd7b3be0 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/__init__.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/__init__.py @@ -43,6 +43,12 @@ from google.cloud.bigquery.schema import SchemaField from google.cloud.bigquery.table import Table from google.cloud.bigquery.table import TableReference +from google.cloud.bigquery.external_config import ExternalConfig +from google.cloud.bigquery.external_config import BigtableOptions +from google.cloud.bigquery.external_config import BigtableColumnFamily +from google.cloud.bigquery.external_config import BigtableColumn +from google.cloud.bigquery.external_config import CSVOptions +from google.cloud.bigquery.external_config import GoogleSheetsOptions __all__ = [ '__version__', @@ -63,4 +69,10 @@ 'TableReference', 'UDFResource', 'DEFAULT_RETRY', + 'ExternalConfig', + 'BigtableOptions', + 'BigtableColumnFamily', + 'BigtableColumn', + 'CSVOptions', + 'GoogleSheetsOptions', ] diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/external_config.py b/packages/google-cloud-bigquery/google/cloud/bigquery/external_config.py new file mode 100644 index 000000000000..9177595da67c --- /dev/null +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/external_config.py @@ -0,0 +1,491 @@ +# Copyright 2017 Google Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Define classes that describe external data sources. + + These are used for both Table.externalDataConfiguration and + Job.configuration.query.tableDefinitions. +""" + +from __future__ import absolute_import + +import base64 +import copy + +import six + +from google.cloud.bigquery._helpers import _to_bytes +from google.cloud.bigquery._helpers import _bytes_to_json +from google.cloud.bigquery._helpers import _TypedApiResourceProperty +from google.cloud.bigquery._helpers import _ListApiResourceProperty +from google.cloud.bigquery.schema import SchemaField +from google.cloud.bigquery.table import _build_schema_resource +from google.cloud.bigquery.table import _parse_schema_resource +from google.cloud.bigquery.job import _int_or_none + + +class ExternalConfig(object): + """Description of an external data source. + + :type source_format: str + :param source_format: the format of the external data. See + the ``source_format`` property on this class. + """ + + def __init__(self, source_format): + self._properties = {'sourceFormat': source_format} + self._options = None + + @property + def source_format(self): + """See + https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query.tableDefinitions.(key).sourceFormat + https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#externalDataConfiguration.sourceFormat + """ + return self._properties['sourceFormat'] + + autodetect = _TypedApiResourceProperty( + 'autodetect', 'autodetect', bool) + """See + https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query.tableDefinitions.(key).autodetect + https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#externalDataConfiguration.autodetect + """ + + compression = _TypedApiResourceProperty( + 'compression', 'compression', six.string_types) + """See + https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query.tableDefinitions.(key).compression + https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#externalDataConfiguration.compression + """ + + ignore_unknown_values = _TypedApiResourceProperty( + 'ignore_unknown_values', 'ignoreUnknownValues', bool) + """See + https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query.tableDefinitions.(key).ignoreUnknownValues + https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#externalDataConfiguration.ignoreUnknownValues + """ + + max_bad_records = _TypedApiResourceProperty( + 'max_bad_records', 'maxBadRecords', six.integer_types) + """See + https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query.tableDefinitions.(key).maxBadRecords + https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#externalDataConfiguration.maxBadRecords + """ + + source_uris = _ListApiResourceProperty( + 'source_uris', 'sourceUris', six.string_types) + """See + https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query.tableDefinitions.(key).sourceUris + https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#externalDataConfiguration.sourceUris + """ + + schema = _ListApiResourceProperty('schema', 'schema', SchemaField) + """See + https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query.tableDefinitions.(key).schema + https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#externalDataConfiguration.schema + """ + + @property + def options(self): + """Source-specific options. A subclass of ExternalConfigOptions.""" + return self._options + + @options.setter + def options(self, value): + if self.source_format != value._SOURCE_FORMAT: + raise ValueError( + 'source format %s does not match option type %s' % ( + self.source_format, value.__class__.__name__)) + self._options = value + + def to_api_repr(self): + """Build an API representation of this object. + + :rtype: dict + :returns: A dictionary in the format used by the BigQuery API. + """ + config = copy.deepcopy(self._properties) + if self.schema: + config['schema'] = {'fields': _build_schema_resource(self.schema)} + if self.options is not None: + config[self.options._RESOURCE_NAME] = self.options.to_api_repr() + return config + + @classmethod + def from_api_repr(cls, resource): + """Factory: construct a CSVOptions given its API representation + + :type resource: dict + :param resource: + An extract job configuration in the same representation as is + returned from the API. + + :rtype: :class:`google.cloud.bigquery.external_config.CSVOptions` + :returns: Configuration parsed from ``resource``. + """ + config = cls(resource['sourceFormat']) + schema = resource.pop('schema', None) + for optcls in (BigtableOptions, CSVOptions, GoogleSheetsOptions): + opts = resource.pop(optcls._RESOURCE_NAME, None) + if opts is not None: + config.options = optcls.from_api_repr(opts) + break + config._properties = copy.deepcopy(resource) + if schema: + config.schema = _parse_schema_resource(schema) + return config + + +class BigtableColumn(object): + """Options for a Bigtable column.""" + + def __init__(self): + self._properties = {} + + encoding = _TypedApiResourceProperty( + 'encoding', 'encoding', six.string_types) + """See + https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query.tableDefinitions.(key).bigtableOptions.columnFamilies.columns.encoding + https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#externalDataConfiguration.bigtableOptions.columnFamilies.columns.encoding + """ + + field_name = _TypedApiResourceProperty( + 'field_name', 'fieldName', six.string_types) + """See + https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query.tableDefinitions.(key).bigtableOptions.columnFamilies.columns.field_name + https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#externalDataConfiguration.bigtableOptions.columnFamilies.columns.field_name + """ + + only_read_latest = _TypedApiResourceProperty( + 'only_read_latest', 'onlyReadLatest', bool) + """See + https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query.tableDefinitions.(key).bigtableOptions.columnFamilies.columns.only_read_latest + https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#externalDataConfiguration.bigtableOptions.columnFamilies.columns.only_read_latest + """ + + qualifier_encoded = _TypedApiResourceProperty( + 'qualifier_encoded', 'qualifierEncoded', six.binary_type) + """The qualifier encoded in binary. The type is ``str`` (Python 2.x) or + ``bytes`` (Python 3.x). The module will handle base64 encoding for you. + + See + https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query.tableDefinitions.(key).bigtableOptions.columnFamilies.columns.qualifier_encoded + https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#externalDataConfiguration.bigtableOptions.columnFamilies.columns.qualifier_encoded + """ + + qualifier_string = _TypedApiResourceProperty( + 'qualifier_string', 'qualifierString', six.string_types) + """See + https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query.tableDefinitions.(key).bigtableOptions.columnFamilies.columns.qualifier_string + https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#externalDataConfiguration.bigtableOptions.columnFamilies.columns.qualifier_string + """ + + type_ = _TypedApiResourceProperty('type_', 'type', six.string_types) + """See + https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query.tableDefinitions.(key).bigtableOptions.columnFamilies.columns.type + https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#externalDataConfiguration.bigtableOptions.columnFamilies.columns.type + """ + + def to_api_repr(self): + """Build an API representation of this object. + + :rtype: dict + :returns: A dictionary in the format used by the BigQuery API. + """ + config = copy.deepcopy(self._properties) + qe = config.get('qualifierEncoded') + if qe is not None: + config['qualifierEncoded'] = _bytes_to_json(qe) + return config + + @classmethod + def from_api_repr(cls, resource): + """Factory: construct a BigtableColumn given its API representation + + :type resource: dict + :param resource: + A column in the same representation as is returned from the API. + + :rtype: :class:`google.cloud.bigquery.external_config.BigtableColumn` + :returns: Configuration parsed from ``resource``. + """ + qe = resource.pop('qualifierEncoded', None) + config = cls() + config._properties = copy.deepcopy(resource) + if qe: + config.qualifier_encoded = base64.standard_b64decode(_to_bytes(qe)) + return config + + +class BigtableColumnFamily(object): + """Options for a Bigtable column family.""" + + def __init__(self): + self._properties = {} + + encoding = _TypedApiResourceProperty( + 'encoding', 'encoding', six.string_types) + """See + https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query.tableDefinitions.(key).bigtableOptions.columnFamilies.encoding + https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#externalDataConfiguration.bigtableOptions.columnFamilies.encoding + """ + + family_id = _TypedApiResourceProperty( + 'family_id', 'familyId', six.string_types) + """See + https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query.tableDefinitions.(key).bigtableOptions.columnFamilies.familyId + https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#externalDataConfiguration.bigtableOptions.columnFamilies.familyId + """ + + only_read_latest = _TypedApiResourceProperty( + 'only_read_latest', 'onlyReadLatest', bool) + """See + https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query.tableDefinitions.(key).bigtableOptions.columnFamilies.onlyReadLatest + https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#externalDataConfiguration.bigtableOptions.columnFamilies.onlyReadLatest + """ + + type_ = _TypedApiResourceProperty('type_', 'type', six.string_types) + """See + https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query.tableDefinitions.(key).bigtableOptions.columnFamilies.type + https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#externalDataConfiguration.bigtableOptions.columnFamilies.type + """ + + columns = _ListApiResourceProperty( + 'columns', 'columns', BigtableColumn) + """See + https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query.tableDefinitions.(key).bigtableOptions.columnFamilies.columns + https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#externalDataConfiguration.bigtableOptions.columnFamilies.columns + """ + + def to_api_repr(self): + """Build an API representation of this object. + + :rtype: dict + :returns: A dictionary in the format used by the BigQuery API. + """ + config = copy.deepcopy(self._properties) + config['columns'] = [c.to_api_repr() for c in config['columns']] + return config + + @classmethod + def from_api_repr(cls, resource): + """Factory: construct a BigtableColumnFamily given its + API representation + + :type resource: dict + :param resource: + A column family in the same representation as is returned + from the API. + + :rtype: + :class:`google.cloud.bigquery.external_config.BigtableColumnFamily` + :returns: Configuration parsed from ``resource``. + """ + config = cls() + config._properties = copy.deepcopy(resource) + config.columns = [BigtableColumn.from_api_repr(c) + for c in resource['columns']] + return config + + +class BigtableOptions(object): + """Options that describe how to treat Bigtable tables + as BigQuery tables.""" + + _SOURCE_FORMAT = 'BIGTABLE' + _RESOURCE_NAME = 'bigtableOptions' + + def __init__(self): + self._properties = {} + + ignore_unspecified_column_families = _TypedApiResourceProperty( + 'ignore_unspecified_column_families', + 'ignoreUnspecifiedColumnFamilies', bool) + """See + https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query.tableDefinitions.(key).bigtableOptions.ignoreUnspecifiedColumnFamilies + https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#externalDataConfiguration.bigtableOptions.ignoreUnspecifiedColumnFamilies + """ + + read_rowkey_as_string = _TypedApiResourceProperty( + 'read_rowkey_as_string', 'readRowkeyAsString', bool) + """See + https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query.tableDefinitions.(key).bigtableOptions.readRowkeyAsString + https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#externalDataConfiguration.bigtableOptions.readRowkeyAsString + """ + + column_families = _ListApiResourceProperty( + 'column_families', 'columnFamilies', BigtableColumnFamily) + """See + https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query.tableDefinitions.(key).bigtableOptions.columnFamilies + https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#externalDataConfiguration.bigtableOptions.columnFamilies + """ + + def to_api_repr(self): + """Build an API representation of this object. + + :rtype: dict + :returns: A dictionary in the format used by the BigQuery API. + """ + config = copy.deepcopy(self._properties) + config['columnFamilies'] = [cf.to_api_repr() + for cf in config['columnFamilies']] + return config + + @classmethod + def from_api_repr(cls, resource): + """Factory: construct a BigtableOptions given its API representation + + :type resource: dict + :param resource: + A BigtableOptions in the same representation as is returned + from the API. + + :rtype: :class:`google.cloud.bigquery.external_config.BigtableOptions` + :returns: Configuration parsed from ``resource``. + """ + config = cls() + config._properties = copy.deepcopy(resource) + config.column_families = [BigtableColumnFamily.from_api_repr(cf) + for cf in resource['columnFamilies']] + return config + + +class CSVOptions(object): + """Options that describe how to treat CSV files as BigQuery tables.""" + + _SOURCE_FORMAT = 'CSV' + _RESOURCE_NAME = 'csvOptions' + + def __init__(self): + self._properties = {} + + allow_jagged_rows = _TypedApiResourceProperty( + 'allow_jagged_rows', 'allowJaggedRows', bool) + """See + https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query.tableDefinitions.(key).csvOptions.allowJaggedRows + https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#externalDataConfiguration.csvOptions.allowJaggedRows + """ + + allow_quoted_newlines = _TypedApiResourceProperty( + 'allow_quoted_newlines', 'allowQuotedNewlines', bool) + """See + https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query.tableDefinitions.(key).csvOptions.allowQuotedNewlines + https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#externalDataConfiguration.csvOptions.allowQuotedNewlines + """ + + encoding = _TypedApiResourceProperty( + 'encoding', 'encoding', six.string_types) + """See + https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query.tableDefinitions.(key).csvOptions.encoding + https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#externalDataConfiguration.csvOptions.encoding + """ + + field_delimiter = _TypedApiResourceProperty( + 'field_delimiter', 'fieldDelimiter', six.string_types) + """See + https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query.tableDefinitions.(key).csvOptions.fieldDelimiter + https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#externalDataConfiguration.csvOptions.fieldDelimiter + """ + + quote_character = _TypedApiResourceProperty( + 'quote_character', 'quote', six.string_types) + """See + https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query.tableDefinitions.(key).csvOptions.quote + https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#externalDataConfiguration.csvOptions.quote + """ + + skip_leading_rows = _TypedApiResourceProperty( + 'skip_leading_rows', 'skipLeadingRows', six.integer_types) + """See + https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query.tableDefinitions.(key).csvOptions.skipLeadingRows + https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#externalDataConfiguration.csvOptions.skipLeadingRows + """ + + def to_api_repr(self): + """Build an API representation of this object. + + :rtype: dict + :returns: A dictionary in the format used by the BigQuery API. + """ + config = copy.deepcopy(self._properties) + slr = config.pop('skipLeadingRows', None) + if slr is not None: + config['skipLeadingRows'] = str(slr) + return config + + @classmethod + def from_api_repr(cls, resource): + """Factory: construct a CSVOptions given its API representation + + :type resource: dict + :param resource: + A CSVOptions in the same representation as is + returned from the API. + + :rtype: :class:`google.cloud.bigquery.external_config.CSVOptions` + :returns: Configuration parsed from ``resource``. + """ + slr = resource.pop('skipLeadingRows', None) + config = cls() + config._properties = copy.deepcopy(resource) + config.skip_leading_rows = _int_or_none(slr) + return config + + +class GoogleSheetsOptions(object): + """Options that describe how to treat Google Sheets as BigQuery tables.""" + + _SOURCE_FORMAT = 'GOOGLE_SHEETS' + _RESOURCE_NAME = 'googleSheetsOptions' + + def __init__(self): + self._properties = {} + + skip_leading_rows = _TypedApiResourceProperty( + 'skip_leading_rows', 'skipLeadingRows', six.integer_types) + """See + https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query.tableDefinitions.(key).googleSheetsOptions.skipLeadingRows + https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#externalDataConfiguration.googleSheetsOptions.skipLeadingRows + """ + + def to_api_repr(self): + """Build an API representation of this object. + + :rtype: dict + :returns: A dictionary in the format used by the BigQuery API. + """ + config = copy.deepcopy(self._properties) + slr = config.pop('skipLeadingRows', None) + if slr is not None: + config['skipLeadingRows'] = str(slr) + return config + + @classmethod + def from_api_repr(cls, resource): + """Factory: construct a GoogleSheetsOptions given its API representation + + :type resource: dict + :param resource: + An GoogleSheetsOptions in the same representation as is + returned from the API. + + :rtype: + :class:`google.cloud.bigquery.external_config.GoogleSheetsOptions` + :returns: Configuration parsed from ``resource``. + """ + slr = resource.pop('skipLeadingRows', None) + config = cls() + config._properties = copy.deepcopy(resource) + config.skip_leading_rows = _int_or_none(slr) + return config diff --git a/packages/google-cloud-bigquery/tests/unit/test_external_config.py b/packages/google-cloud-bigquery/tests/unit/test_external_config.py new file mode 100644 index 000000000000..6768093ed0b3 --- /dev/null +++ b/packages/google-cloud-bigquery/tests/unit/test_external_config.py @@ -0,0 +1,228 @@ +# Copyright 2017 Google Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import base64 +import copy +import unittest + +from google.cloud.bigquery.external_config import ExternalConfig + + +class TestExternalConfig(unittest.TestCase): + + SOURCE_URIS = ['gs://foo', 'gs://bar'] + + BASE_RESOURCE = { + 'sourceFormat': '', + 'sourceUris': SOURCE_URIS, + 'maxBadRecords': 17, + 'autodetect': True, + 'ignoreUnknownValues': False, + 'compression': 'compression', + } + + def test_api_repr_base(self): + from google.cloud.bigquery.schema import SchemaField + + resource = copy.deepcopy(self.BASE_RESOURCE) + ec = ExternalConfig.from_api_repr(resource) + self._verify_base(ec) + self.assertEqual(ec.schema, []) + self.assertIsNone(ec.options) + + got_resource = ec.to_api_repr() + self.assertEqual(got_resource, self.BASE_RESOURCE) + + resource = _copy_and_update(self.BASE_RESOURCE, { + 'schema': { + 'fields': [ + { + 'name': 'full_name', + 'type': 'STRING', + 'mode': 'REQUIRED', + }, + ], + }, + }) + want_resource = copy.deepcopy(resource) + ec = ExternalConfig.from_api_repr(resource) + self._verify_base(ec) + self.assertEqual(ec.schema, + [SchemaField('full_name', 'STRING', mode='REQUIRED')]) + self.assertIsNone(ec.options) + + got_resource = ec.to_api_repr() + self.assertEqual(got_resource, want_resource) + + def _verify_base(self, ec): + self.assertEqual(ec.autodetect, True) + self.assertEqual(ec.compression, 'compression') + self.assertEqual(ec.ignore_unknown_values, False) + self.assertEqual(ec.max_bad_records, 17) + self.assertEqual(ec.source_uris, self.SOURCE_URIS) + + def test_to_api_repr_source_format(self): + ec = ExternalConfig('CSV') + got = ec.to_api_repr() + want = {'sourceFormat': 'CSV'} + self.assertEqual(got, want) + + def test_api_repr_sheets(self): + from google.cloud.bigquery.external_config import GoogleSheetsOptions + + resource = _copy_and_update(self.BASE_RESOURCE, { + 'sourceFormat': 'GOOGLE_SHEETS', + 'googleSheetsOptions': {'skipLeadingRows': '123'}, + }) + want_resource = copy.deepcopy(resource) + + ec = ExternalConfig.from_api_repr(resource) + + self._verify_base(ec) + self.assertEqual(ec.source_format, 'GOOGLE_SHEETS') + self.assertIsInstance(ec.options, GoogleSheetsOptions) + self.assertEqual(ec.options.skip_leading_rows, 123) + + got_resource = ec.to_api_repr() + + self.assertEqual(got_resource, want_resource) + + del want_resource['googleSheetsOptions']['skipLeadingRows'] + ec = ExternalConfig.from_api_repr(copy.deepcopy(want_resource)) + self.assertIsNone(ec.options.skip_leading_rows) + got_resource = ec.to_api_repr() + self.assertEqual(got_resource, want_resource) + + def test_api_repr_csv(self): + from google.cloud.bigquery.external_config import CSVOptions + + resource = _copy_and_update(self.BASE_RESOURCE, { + 'sourceFormat': 'CSV', + 'csvOptions': { + 'fieldDelimiter': 'fieldDelimiter', + 'skipLeadingRows': '123', + 'quote': 'quote', + 'allowQuotedNewlines': True, + 'allowJaggedRows': False, + 'encoding': 'encoding', + }, + }) + want_resource = copy.deepcopy(resource) + + ec = ExternalConfig.from_api_repr(resource) + + self._verify_base(ec) + self.assertEqual(ec.source_format, 'CSV') + self.assertIsInstance(ec.options, CSVOptions) + self.assertEqual(ec.options.field_delimiter, 'fieldDelimiter') + self.assertEqual(ec.options.skip_leading_rows, 123) + self.assertEqual(ec.options.quote_character, 'quote') + self.assertEqual(ec.options.allow_quoted_newlines, True) + self.assertEqual(ec.options.allow_jagged_rows, False) + self.assertEqual(ec.options.encoding, 'encoding') + + got_resource = ec.to_api_repr() + + self.assertEqual(got_resource, want_resource) + + del want_resource['csvOptions']['skipLeadingRows'] + ec = ExternalConfig.from_api_repr(copy.deepcopy(want_resource)) + self.assertIsNone(ec.options.skip_leading_rows) + got_resource = ec.to_api_repr() + self.assertEqual(got_resource, want_resource) + + def test_api_repr_bigtable(self): + from google.cloud.bigquery.external_config import BigtableOptions + from google.cloud.bigquery.external_config import BigtableColumnFamily + + QUALIFIER_ENCODED = base64.standard_b64encode(b'q').decode('ascii') + resource = _copy_and_update(self.BASE_RESOURCE, { + 'sourceFormat': 'BIGTABLE', + 'bigtableOptions': { + 'ignoreUnspecifiedColumnFamilies': True, + 'readRowkeyAsString': False, + 'columnFamilies': [ + { + 'familyId': 'familyId', + 'type': 'type', + 'encoding': 'encoding', + 'columns': [ + { + 'qualifierString': 'q', + 'fieldName': 'fieldName1', + 'type': 'type1', + 'encoding': 'encoding1', + 'onlyReadLatest': True, + }, + { + 'qualifierEncoded': QUALIFIER_ENCODED, + 'fieldName': 'fieldName2', + 'type': 'type2', + 'encoding': 'encoding2', + }, + + ], + 'onlyReadLatest': False, + } + ], + }, + }) + want_resource = copy.deepcopy(resource) + + ec = ExternalConfig.from_api_repr(resource) + + self._verify_base(ec) + self.assertEqual(ec.source_format, 'BIGTABLE') + self.assertIsInstance(ec.options, BigtableOptions) + self.assertEqual(ec.options.ignore_unspecified_column_families, True) + self.assertEqual(ec.options.read_rowkey_as_string, False) + self.assertEqual(len(ec.options.column_families), 1) + fam1 = ec.options.column_families[0] + self.assertIsInstance(fam1, BigtableColumnFamily) + self.assertEqual(fam1.family_id, 'familyId') + self.assertEqual(fam1.type_, 'type') + self.assertEqual(fam1.encoding, 'encoding') + self.assertEqual(len(fam1.columns), 2) + col1 = fam1.columns[0] + self.assertEqual(col1.qualifier_string, 'q') + self.assertEqual(col1.field_name, 'fieldName1') + self.assertEqual(col1.type_, 'type1') + self.assertEqual(col1.encoding, 'encoding1') + col2 = ec.options.column_families[0].columns[1] + self.assertEqual(col2.qualifier_encoded, b'q') + self.assertEqual(col2.field_name, 'fieldName2') + self.assertEqual(col2.type_, 'type2') + self.assertEqual(col2.encoding, 'encoding2') + + got_resource = ec.to_api_repr() + + self.assertEqual(got_resource, want_resource) + + def test_option_mismatch(self): + from google.cloud.bigquery.external_config import CSVOptions + from google.cloud.bigquery.external_config import BigtableOptions + from google.cloud.bigquery.external_config import GoogleSheetsOptions + + for source_format, opts in (('BIGTABLE', CSVOptions()), + ('CSV', GoogleSheetsOptions()), + ('GOOGLE_SHEETS', BigtableOptions())): + ec = ExternalConfig(source_format) + with self.assertRaises(ValueError): + ec.options = opts + + +def _copy_and_update(d, u): + d = copy.deepcopy(d) + d.update(u) + return d From 32600d25b54f7500918263df0d6f50465068bd63 Mon Sep 17 00:00:00 2001 From: Alix Hamilton Date: Mon, 16 Oct 2017 09:08:31 -0700 Subject: [PATCH 0321/2016] BigQuery: adds client.create_rows_json (#4189) Addresses client library requirement to "provide an option for JSON as argument" --- .../google/cloud/bigquery/client.py | 96 +++++++++++++------ .../google-cloud-bigquery/tests/system.py | 35 ++++--- .../tests/unit/test_client.py | 55 +++++++++++ 3 files changed, 137 insertions(+), 49 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py index 2d74c93d7568..da3b8dcaaea5 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py @@ -904,9 +904,7 @@ def query(self, query, job_config=None, job_id=None, retry=DEFAULT_RETRY): job.begin(retry=retry) return job - def create_rows(self, table, rows, row_ids=None, selected_fields=None, - skip_invalid_rows=None, ignore_unknown_values=None, - template_suffix=None, retry=DEFAULT_RETRY): + def create_rows(self, table, rows, selected_fields=None, **kwargs): """API call: insert table data via a POST request See @@ -928,15 +926,73 @@ def create_rows(self, table, rows, row_ids=None, selected_fields=None, include all required fields in the schema. Keys which do not correspond to a field in the schema are ignored. - :type row_ids: list of string - :param row_ids: (Optional) Unique ids, one per row being inserted. - If omitted, unique IDs are created. - :type selected_fields: list of :class:`SchemaField` :param selected_fields: The fields to return. Required if ``table`` is a :class:`~google.cloud.bigquery.table.TableReference`. + :type kwargs: dict + :param kwargs: Keyword arguments to + `~google.cloud.bigquery.client.Client.create_rows_json` + + :rtype: list of mappings + :returns: One mapping per row with insert errors: the "index" key + identifies the row, and the "errors" key contains a list + of the mappings describing one or more problems with the + row. + :raises: ValueError if table's schema is not set + """ + if selected_fields is not None: + schema = selected_fields + elif isinstance(table, TableReference): + raise ValueError('need selected_fields with TableReference') + elif isinstance(table, Table): + if len(table._schema) == 0: + raise ValueError(_TABLE_HAS_NO_SCHEMA) + schema = table.schema + else: + raise TypeError('table should be Table or TableReference') + + json_rows = [] + + for index, row in enumerate(rows): + if isinstance(row, dict): + row = _row_from_mapping(row, schema) + json_row = {} + + for field, value in zip(schema, row): + converter = _SCALAR_VALUE_TO_JSON_ROW.get(field.field_type) + if converter is not None: # STRING doesn't need converting + value = converter(value) + json_row[field.name] = value + + json_rows.append(json_row) + + return self.create_rows_json(table, json_rows, **kwargs) + + def create_rows_json(self, table, json_rows, row_ids=None, + skip_invalid_rows=None, ignore_unknown_values=None, + template_suffix=None, retry=DEFAULT_RETRY): + """API call: insert table data via a POST request + + See + https://cloud.google.com/bigquery/docs/reference/rest/v2/tabledata/insertAll + + :type table: One of: + :class:`~google.cloud.bigquery.table.Table` + :class:`~google.cloud.bigquery.table.TableReference` + :param table: the destination table for the row data, or a reference + to it. + + :type json_rows: list of dictionaries + :param json_rows: Row data to be inserted. Keys must match the table + schema fields and values must be JSON-compatible + representations. + + :type row_ids: list of string + :param row_ids: (Optional) Unique ids, one per row being inserted. + If omitted, unique IDs are created. + :type skip_invalid_rows: bool :param skip_invalid_rows: (Optional) Insert all valid rows of a request, even if invalid rows exist. @@ -966,34 +1022,12 @@ def create_rows(self, table, rows, row_ids=None, selected_fields=None, identifies the row, and the "errors" key contains a list of the mappings describing one or more problems with the row. - :raises: ValueError if table's schema is not set """ - if selected_fields is not None: - schema = selected_fields - elif isinstance(table, TableReference): - raise ValueError('need selected_fields with TableReference') - elif isinstance(table, Table): - if len(table._schema) == 0: - raise ValueError(_TABLE_HAS_NO_SCHEMA) - schema = table.schema - else: - raise TypeError('table should be Table or TableReference') - rows_info = [] data = {'rows': rows_info} - for index, row in enumerate(rows): - if isinstance(row, dict): - row = _row_from_mapping(row, schema) - row_info = {} - - for field, value in zip(schema, row): - converter = _SCALAR_VALUE_TO_JSON_ROW.get(field.field_type) - if converter is not None: # STRING doesn't need converting - value = converter(value) - row_info[field.name] = value - - info = {'json': row_info} + for index, row in enumerate(json_rows): + info = {'json': row} if row_ids is not None: info['insertId'] = row_ids[index] else: diff --git a/packages/google-cloud-bigquery/tests/system.py b/packages/google-cloud-bigquery/tests/system.py index 2c62e5efa9b0..4cb97c6601f4 100644 --- a/packages/google-cloud-bigquery/tests/system.py +++ b/packages/google-cloud-bigquery/tests/system.py @@ -339,7 +339,7 @@ def test_create_rows_then_dump_table(self): self.to_delete.insert(0, table) self.assertTrue(_table_exists(table)) - errors = Config.CLIENT.create_rows(table, ROWS, ROW_IDS) + errors = Config.CLIENT.create_rows(table, ROWS, row_ids=ROW_IDS) self.assertEqual(len(errors), 0) rows = () @@ -1356,11 +1356,9 @@ def test_create_table_rows_fetch_nested_schema(self): json_filename = os.path.join(WHERE, 'data', 'characters.jsonl') with open(json_filename) as rows_file: for line in rows_file: - mapping = json.loads(line) - to_insert.append( - tuple(mapping[field.name] for field in schema)) + to_insert.append(json.loads(line)) - errors = Config.CLIENT.create_rows(table, to_insert) + errors = Config.CLIENT.create_rows_json(table, to_insert) self.assertEqual(len(errors), 0) retry = RetryResult(_has_rows, max_tries=8) @@ -1369,14 +1367,14 @@ def test_create_table_rows_fetch_nested_schema(self): self.assertEqual(len(fetched), len(to_insert)) - for found, expected in zip(sorted(fetched_tuples), sorted(to_insert)): - self.assertEqual(found[0], expected[0]) # Name - self.assertEqual(found[1], int(expected[1])) # Age - self.assertEqual(found[2], expected[2]) # Weight - self.assertEqual(found[3], expected[3]) # IsMagic + for found, expected in zip(sorted(fetched_tuples), to_insert): + self.assertEqual(found[0], expected['Name']) + self.assertEqual(found[1], int(expected['Age'])) + self.assertEqual(found[2], expected['Weight']) + self.assertEqual(found[3], expected['IsMagic']) - self.assertEqual(len(found[4]), len(expected[4])) # Spells - for f_spell, e_spell in zip(found[4], expected[4]): + self.assertEqual(len(found[4]), len(expected['Spells'])) + for f_spell, e_spell in zip(found[4], expected['Spells']): self.assertEqual(f_spell['Name'], e_spell['Name']) parts = time.strptime( e_spell['LastUsed'], '%Y-%m-%d %H:%M:%S UTC') @@ -1390,17 +1388,18 @@ def test_create_table_rows_fetch_nested_schema(self): e_spell['Icon'].encode('ascii')) self.assertEqual(f_spell['Icon'], e_icon) - parts = time.strptime(expected[5], '%H:%M:%S') + parts = time.strptime(expected['TeaTime'], '%H:%M:%S') e_teatime = datetime.time(*parts[3:6]) - self.assertEqual(found[5], e_teatime) # TeaTime + self.assertEqual(found[5], e_teatime) - parts = time.strptime(expected[6], '%Y-%m-%d') + parts = time.strptime(expected['NextVacation'], '%Y-%m-%d') e_nextvac = datetime.date(*parts[0:3]) - self.assertEqual(found[6], e_nextvac) # NextVacation + self.assertEqual(found[6], e_nextvac) - parts = time.strptime(expected[7], '%Y-%m-%dT%H:%M:%S') + parts = time.strptime(expected['FavoriteTime'], + '%Y-%m-%dT%H:%M:%S') e_favtime = datetime.datetime(*parts[0:6]) - self.assertEqual(found[7], e_favtime) # FavoriteTime + self.assertEqual(found[7], e_favtime) def temp_dataset(self, dataset_id): dataset = retry_403(Config.CLIENT.create_dataset)( diff --git a/packages/google-cloud-bigquery/tests/unit/test_client.py b/packages/google-cloud-bigquery/tests/unit/test_client.py index 243a892fdb86..1ce9c2158a59 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_client.py +++ b/packages/google-cloud-bigquery/tests/unit/test_client.py @@ -2283,6 +2283,61 @@ def test_create_rows_errors(self): with self.assertRaises(TypeError): client.create_rows(1, ROWS) + def test_create_rows_json(self): + from google.cloud.bigquery.table import Table, SchemaField + from google.cloud.bigquery.dataset import DatasetReference + + PROJECT = 'PROJECT' + DS_ID = 'DS_ID' + TABLE_ID = 'TABLE_ID' + PATH = 'projects/%s/datasets/%s/tables/%s/insertAll' % ( + PROJECT, DS_ID, TABLE_ID) + creds = _make_credentials() + http = object() + client = self._make_one(project=PROJECT, credentials=creds, _http=http) + conn = client._connection = _Connection({}) + table_ref = DatasetReference(PROJECT, DS_ID).table(TABLE_ID) + schema = [ + SchemaField('full_name', 'STRING', mode='REQUIRED'), + SchemaField('age', 'INTEGER', mode='REQUIRED'), + SchemaField('joined', 'TIMESTAMP', mode='NULLABLE'), + ] + table = Table(table_ref, schema=schema) + ROWS = [ + { + 'full_name': 'Phred Phlyntstone', 'age': '32', + 'joined': '2015-07-24T19:53:19.006000Z' + }, + { + 'full_name': 'Bharney Rhubble', 'age': '33', + 'joined': 1437767600.006 + }, + { + 'full_name': 'Wylma Phlyntstone', 'age': '29', + 'joined': 1437767601.006 + }, + { + 'full_name': 'Bhettye Rhubble', 'age': '27', 'joined': None + }, + ] + + SENT = { + 'rows': [{ + 'json': row, + 'insertId': str(i), + } for i, row in enumerate(ROWS)], + } + + with mock.patch('uuid.uuid4', side_effect=map(str, range(len(ROWS)))): + errors = client.create_rows_json(table, ROWS) + + self.assertEqual(len(errors), 0) + self.assertEqual(len(conn._requested), 1) + req = conn._requested[0] + self.assertEqual(req['method'], 'POST') + self.assertEqual(req['path'], '/%s' % PATH) + self.assertEqual(req['data'], SENT) + def test_query_rows_defaults(self): from google.api.core.page_iterator import HTTPIterator from google.cloud.bigquery._helpers import Row From 0a1037d20c2e392e69c3741c7b5174877062c8c5 Mon Sep 17 00:00:00 2001 From: Jonathan Amsterdam Date: Mon, 16 Oct 2017 13:03:38 -0400 Subject: [PATCH 0322/2016] bigquery: make views default to standard SQL (#4192) --- .../google/cloud/bigquery/table.py | 37 ++++++++++++------- .../tests/unit/test_client.py | 3 +- .../tests/unit/test_table.py | 11 ++++-- 3 files changed, 33 insertions(+), 18 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py index 990349336433..d4c6977c8755 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py @@ -463,6 +463,9 @@ def location(self, value): def view_query(self): """SQL query defining the table as a view. + By default, the query is treated as Standard SQL. To use Legacy + SQL, set view_use_legacy_sql to True. + :rtype: str, or ``NoneType`` :returns: The query as set by the user, or None (the default). """ @@ -481,9 +484,14 @@ def view_query(self, value): """ if not isinstance(value, six.string_types): raise ValueError("Pass a string") - if self._properties.get('view') is None: - self._properties['view'] = {} - self._properties['view']['query'] = value + view = self._properties.get('view') + if view is None: + view = self._properties['view'] = {} + view['query'] = value + # The service defaults useLegacySql to True, but this + # client uses Standard SQL by default. + if view.get('useLegacySql') is None: + view['useLegacySql'] = False @view_query.deleter def view_query(self): @@ -492,26 +500,29 @@ def view_query(self): @property def view_use_legacy_sql(self): - """Specifies whether to execute the view with legacy or standard SQL. + """Specifies whether to execute the view with Legacy or Standard SQL. - If not set, None is returned. BigQuery's default mode is equivalent to - useLegacySql = True. + The default is False for views (use Standard SQL). + If this table is not a view, None is returned. - :rtype: bool, or ``NoneType`` - :returns: The boolean for view.useLegacySql as set by the user, or - None (the default). + :rtype: bool or ``NoneType`` + :returns: The boolean for view.useLegacySql, or None if not a view. """ view = self._properties.get('view') if view is not None: - return view.get('useLegacySql') + # useLegacySql is never missing from the view dict if this table + # was created client-side, because the view_query setter populates + # it. So a missing or None can only come from the server, whose + # default is True. + return view.get('useLegacySql', True) @view_use_legacy_sql.setter def view_use_legacy_sql(self, value): """Update the view sub-property 'useLegacySql'. - This boolean specifies whether to execute the view with legacy SQL - (True) or standard SQL (False). The default, if not specified, is - 'True'. + This boolean specifies whether to execute the view with Legacy SQL + (True) or Standard SQL (False). The default, if not specified, is + 'False'. :type value: bool :param value: The boolean for view.useLegacySql diff --git a/packages/google-cloud-bigquery/tests/unit/test_client.py b/packages/google-cloud-bigquery/tests/unit/test_client.py index 1ce9c2158a59..9dfa311e806c 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_client.py +++ b/packages/google-cloud-bigquery/tests/unit/test_client.py @@ -550,8 +550,7 @@ def test_create_table_w_schema_and_query(self): {'name': 'full_name', 'type': 'STRING', 'mode': 'REQUIRED'}, {'name': 'age', 'type': 'INTEGER', 'mode': 'REQUIRED'}] }, - # TODO(alixh) default to Standard SQL - 'view': {'query': query, 'useLegacySql': None}, + 'view': {'query': query, 'useLegacySql': False}, } self.assertEqual(req['data'], sent) self.assertEqual(got.table_id, self.TABLE_ID) diff --git a/packages/google-cloud-bigquery/tests/unit/test_table.py b/packages/google-cloud-bigquery/tests/unit/test_table.py index 12b2ec98c4d4..63dafb8ac5ec 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_table.py +++ b/packages/google-cloud-bigquery/tests/unit/test_table.py @@ -259,7 +259,7 @@ def _verifyResourceProperties(self, table, resource): self.assertEqual(table.view_query, resource['view']['query']) self.assertEqual( table.view_use_legacy_sql, - resource['view'].get('useLegacySql')) + resource['view'].get('useLegacySql', True)) else: self.assertIsNone(table.view_query) self.assertIsNone(table.view_use_legacy_sql) @@ -487,6 +487,10 @@ def test_view_query_setter(self): table = self._make_one(table_ref) table.view_query = 'select * from foo' self.assertEqual(table.view_query, 'select * from foo') + self.assertEqual(table.view_use_legacy_sql, False) + + table.view_use_legacy_sql = True + self.assertEqual(table.view_use_legacy_sql, True) def test_view_query_deleter(self): dataset = DatasetReference(self.PROJECT, self.DS_ID) @@ -495,6 +499,7 @@ def test_view_query_deleter(self): table.view_query = 'select * from foo' del table.view_query self.assertIsNone(table.view_query) + self.assertIsNone(table.view_use_legacy_sql) def test_view_use_legacy_sql_setter_bad_value(self): dataset = DatasetReference(self.PROJECT, self.DS_ID) @@ -507,9 +512,9 @@ def test_view_use_legacy_sql_setter(self): dataset = DatasetReference(self.PROJECT, self.DS_ID) table_ref = dataset.table(self.TABLE_NAME) table = self._make_one(table_ref) - table.view_use_legacy_sql = False + table.view_use_legacy_sql = True table.view_query = 'select * from foo' - self.assertEqual(table.view_use_legacy_sql, False) + self.assertEqual(table.view_use_legacy_sql, True) self.assertEqual(table.view_query, 'select * from foo') def test_from_api_repr_missing_identity(self): From 43bbe6bfa6a6148ce459e23c7f44b9d0e83e30b3 Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Mon, 16 Oct 2017 10:26:47 -0700 Subject: [PATCH 0323/2016] BQ: update samples in README for beta 2 changes. (#4179) * BQ: update samples in README for beta 2 changes. * Fix typos. Split dataset create into its own sample. --- packages/google-cloud-bigquery/README.rst | 61 +++++++++++++++-------- 1 file changed, 40 insertions(+), 21 deletions(-) diff --git a/packages/google-cloud-bigquery/README.rst b/packages/google-cloud-bigquery/README.rst index 721ba18a27b2..01a1194c41fc 100644 --- a/packages/google-cloud-bigquery/README.rst +++ b/packages/google-cloud-bigquery/README.rst @@ -38,12 +38,27 @@ Using the API Querying massive datasets can be time consuming and expensive without the right hardware and infrastructure. Google `BigQuery`_ (`BigQuery API docs`_) -solves this problem by enabling super-fast, SQL-like queries against -append-only tables, using the processing power of Google's infrastructure. +solves this problem by enabling super-fast, SQL queries against +append-mostly tables, using the processing power of Google's infrastructure. .. _BigQuery: https://cloud.google.com/bigquery/what-is-bigquery .. _BigQuery API docs: https://cloud.google.com/bigquery/docs/reference/v2/ +Create a dataset +~~~~~~~~~~~~~~~~ + +.. code:: python + + from google.cloud import bigquery + from google.cloud.bigquery import Dataset + + client = bigquery.Client() + + dataset_ref = client.dataset('dataset_name') + dataset = Dataset(dataset_ref) + dataset.description = 'my dataset' + dataset = client.create_dataset(dataset) # API request + Load data from CSV ~~~~~~~~~~~~~~~~~~ @@ -52,39 +67,43 @@ Load data from CSV import csv from google.cloud import bigquery + from google.cloud.bigquery import LoadJobConfig from google.cloud.bigquery import SchemaField client = bigquery.Client() - dataset = client.dataset('dataset_name') - dataset.create() # API request - SCHEMA = [ SchemaField('full_name', 'STRING', mode='required'), SchemaField('age', 'INTEGER', mode='required'), ] - table = dataset.table('table_name', SCHEMA) - table.create() + table_ref = client.dataset('dataset_name').table('table_name') + + load_config = LoadJobConfig() + load_config.skip_leading_rows = 1 + load_config.schema = SCHEMA - with open('csv_file', 'rb') as readable: - table.upload_from_file( - readable, source_format='CSV', skip_leading_rows=1) + # Contents of csv_file.csv: + # Name,Age + # Tim,99 + with open('csv_file.csv', 'rb') as readable: + client.load_table_from_file( + readable, table_ref, job_config=load_config) # API request -Perform a synchronous query -~~~~~~~~~~~~~~~~~~~~~~~~~~~ +Perform a query +~~~~~~~~~~~~~~~ .. code:: python - # Perform a synchronous query. + # Perform a query. QUERY = ( - 'SELECT name FROM [bigquery-public-data:usa_names.usa_1910_2013] ' - 'WHERE state = "TX"') - query = client.run_sync_query('%s LIMIT 100' % QUERY) - query.timeout_ms = TIMEOUT_MS - query.run() - - for row in query.rows: - print(row) + 'SELECT name FROM `bigquery-public-data.usa_names.usa_1910_2013` ' + 'WHERE state = "TX" ' + 'LIMIT 100') + query_job = client.query(QUERY) # API request + rows = query_job.result() # Waits for query to finish + + for row in rows: + print(row.name) See the ``google-cloud-python`` API `BigQuery documentation`_ to learn how From 198592961ea9c2ca76a6685a8d93e4c9b4593c37 Mon Sep 17 00:00:00 2001 From: Jonathan Amsterdam Date: Mon, 16 Oct 2017 14:32:52 -0400 Subject: [PATCH 0324/2016] bigquery: support external table definitions for query jobs (#4191) Also, set ExternalConfig.options based on source_format, and make read-only. Also, change from_api_repr functions in external_config.py so that they don't modify their resource argument. This simplifies tests. --- .../google/cloud/bigquery/_helpers.py | 8 + .../google/cloud/bigquery/external_config.py | 233 +++++++++--------- .../google/cloud/bigquery/job.py | 45 ++-- .../google-cloud-bigquery/tests/system.py | 126 +++++----- .../tests/unit/test_external_config.py | 36 +-- .../tests/unit/test_job.py | 121 +++++++-- 6 files changed, 331 insertions(+), 238 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py b/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py index d4230f9ff4f6..2d763109e745 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py @@ -546,3 +546,11 @@ def _should_retry(exc): on ``DEFAULT_RETRY``. For example, to change the deadline to 30 seconds, pass ``retry=bigquery.DEFAULT_RETRY.with_deadline(30)``. """ + + +def _int_or_none(value): + """Helper: deserialize int value from JSON string.""" + if isinstance(value, int): + return value + if value is not None: + return int(value) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/external_config.py b/packages/google-cloud-bigquery/google/cloud/bigquery/external_config.py index 9177595da67c..e3560224008c 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/external_config.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/external_config.py @@ -29,122 +29,10 @@ from google.cloud.bigquery._helpers import _bytes_to_json from google.cloud.bigquery._helpers import _TypedApiResourceProperty from google.cloud.bigquery._helpers import _ListApiResourceProperty +from google.cloud.bigquery._helpers import _int_or_none from google.cloud.bigquery.schema import SchemaField from google.cloud.bigquery.table import _build_schema_resource from google.cloud.bigquery.table import _parse_schema_resource -from google.cloud.bigquery.job import _int_or_none - - -class ExternalConfig(object): - """Description of an external data source. - - :type source_format: str - :param source_format: the format of the external data. See - the ``source_format`` property on this class. - """ - - def __init__(self, source_format): - self._properties = {'sourceFormat': source_format} - self._options = None - - @property - def source_format(self): - """See - https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query.tableDefinitions.(key).sourceFormat - https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#externalDataConfiguration.sourceFormat - """ - return self._properties['sourceFormat'] - - autodetect = _TypedApiResourceProperty( - 'autodetect', 'autodetect', bool) - """See - https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query.tableDefinitions.(key).autodetect - https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#externalDataConfiguration.autodetect - """ - - compression = _TypedApiResourceProperty( - 'compression', 'compression', six.string_types) - """See - https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query.tableDefinitions.(key).compression - https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#externalDataConfiguration.compression - """ - - ignore_unknown_values = _TypedApiResourceProperty( - 'ignore_unknown_values', 'ignoreUnknownValues', bool) - """See - https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query.tableDefinitions.(key).ignoreUnknownValues - https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#externalDataConfiguration.ignoreUnknownValues - """ - - max_bad_records = _TypedApiResourceProperty( - 'max_bad_records', 'maxBadRecords', six.integer_types) - """See - https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query.tableDefinitions.(key).maxBadRecords - https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#externalDataConfiguration.maxBadRecords - """ - - source_uris = _ListApiResourceProperty( - 'source_uris', 'sourceUris', six.string_types) - """See - https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query.tableDefinitions.(key).sourceUris - https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#externalDataConfiguration.sourceUris - """ - - schema = _ListApiResourceProperty('schema', 'schema', SchemaField) - """See - https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query.tableDefinitions.(key).schema - https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#externalDataConfiguration.schema - """ - - @property - def options(self): - """Source-specific options. A subclass of ExternalConfigOptions.""" - return self._options - - @options.setter - def options(self, value): - if self.source_format != value._SOURCE_FORMAT: - raise ValueError( - 'source format %s does not match option type %s' % ( - self.source_format, value.__class__.__name__)) - self._options = value - - def to_api_repr(self): - """Build an API representation of this object. - - :rtype: dict - :returns: A dictionary in the format used by the BigQuery API. - """ - config = copy.deepcopy(self._properties) - if self.schema: - config['schema'] = {'fields': _build_schema_resource(self.schema)} - if self.options is not None: - config[self.options._RESOURCE_NAME] = self.options.to_api_repr() - return config - - @classmethod - def from_api_repr(cls, resource): - """Factory: construct a CSVOptions given its API representation - - :type resource: dict - :param resource: - An extract job configuration in the same representation as is - returned from the API. - - :rtype: :class:`google.cloud.bigquery.external_config.CSVOptions` - :returns: Configuration parsed from ``resource``. - """ - config = cls(resource['sourceFormat']) - schema = resource.pop('schema', None) - for optcls in (BigtableOptions, CSVOptions, GoogleSheetsOptions): - opts = resource.pop(optcls._RESOURCE_NAME, None) - if opts is not None: - config.options = optcls.from_api_repr(opts) - break - config._properties = copy.deepcopy(resource) - if schema: - config.schema = _parse_schema_resource(schema) - return config class BigtableColumn(object): @@ -220,9 +108,9 @@ def from_api_repr(cls, resource): :rtype: :class:`google.cloud.bigquery.external_config.BigtableColumn` :returns: Configuration parsed from ``resource``. """ - qe = resource.pop('qualifierEncoded', None) config = cls() config._properties = copy.deepcopy(resource) + qe = resource.get('qualifierEncoded') if qe: config.qualifier_encoded = base64.standard_b64decode(_to_bytes(qe)) return config @@ -436,7 +324,7 @@ def from_api_repr(cls, resource): :rtype: :class:`google.cloud.bigquery.external_config.CSVOptions` :returns: Configuration parsed from ``resource``. """ - slr = resource.pop('skipLeadingRows', None) + slr = resource.get('skipLeadingRows') config = cls() config._properties = copy.deepcopy(resource) config.skip_leading_rows = _int_or_none(slr) @@ -484,8 +372,121 @@ def from_api_repr(cls, resource): :class:`google.cloud.bigquery.external_config.GoogleSheetsOptions` :returns: Configuration parsed from ``resource``. """ - slr = resource.pop('skipLeadingRows', None) + slr = resource.get('skipLeadingRows') config = cls() config._properties = copy.deepcopy(resource) config.skip_leading_rows = _int_or_none(slr) return config + + +_OPTION_CLASSES = (BigtableOptions, CSVOptions, GoogleSheetsOptions) + + +class ExternalConfig(object): + """Description of an external data source. + + :type source_format: str + :param source_format: the format of the external data. See + the ``source_format`` property on this class. + """ + + def __init__(self, source_format): + self._properties = {'sourceFormat': source_format} + self._options = None + for optcls in _OPTION_CLASSES: + if source_format == optcls._SOURCE_FORMAT: + self._options = optcls() + break + + @property + def source_format(self): + """See + https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query.tableDefinitions.(key).sourceFormat + https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#externalDataConfiguration.sourceFormat + """ + return self._properties['sourceFormat'] + + @property + def options(self): + """Source-specific options.""" + return self._options + + autodetect = _TypedApiResourceProperty( + 'autodetect', 'autodetect', bool) + """See + https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query.tableDefinitions.(key).autodetect + https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#externalDataConfiguration.autodetect + """ + + compression = _TypedApiResourceProperty( + 'compression', 'compression', six.string_types) + """See + https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query.tableDefinitions.(key).compression + https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#externalDataConfiguration.compression + """ + + ignore_unknown_values = _TypedApiResourceProperty( + 'ignore_unknown_values', 'ignoreUnknownValues', bool) + """See + https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query.tableDefinitions.(key).ignoreUnknownValues + https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#externalDataConfiguration.ignoreUnknownValues + """ + + max_bad_records = _TypedApiResourceProperty( + 'max_bad_records', 'maxBadRecords', six.integer_types) + """See + https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query.tableDefinitions.(key).maxBadRecords + https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#externalDataConfiguration.maxBadRecords + """ + + source_uris = _ListApiResourceProperty( + 'source_uris', 'sourceUris', six.string_types) + """See + https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query.tableDefinitions.(key).sourceUris + https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#externalDataConfiguration.sourceUris + """ + + schema = _ListApiResourceProperty('schema', 'schema', SchemaField) + """See + https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query.tableDefinitions.(key).schema + https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#externalDataConfiguration.schema + """ + + def to_api_repr(self): + """Build an API representation of this object. + + :rtype: dict + :returns: A dictionary in the format used by the BigQuery API. + """ + config = copy.deepcopy(self._properties) + if self.schema: + config['schema'] = {'fields': _build_schema_resource(self.schema)} + if self.options is not None: + r = self.options.to_api_repr() + if r != {}: + config[self.options._RESOURCE_NAME] = r + return config + + @classmethod + def from_api_repr(cls, resource): + """Factory: construct a CSVOptions given its API representation + + :type resource: dict + :param resource: + An extract job configuration in the same representation as is + returned from the API. + + :rtype: :class:`google.cloud.bigquery.external_config.CSVOptions` + :returns: Configuration parsed from ``resource``. + """ + config = cls(resource['sourceFormat']) + schema = resource.get('schema') + for optcls in _OPTION_CLASSES: + opts = resource.get(optcls._RESOURCE_NAME) + if opts is not None: + config._options = optcls.from_api_repr(opts) + break + config._properties = copy.deepcopy(resource) + if schema: + config.schema = _parse_schema_resource(schema) + return config diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/job.py b/packages/google-cloud-bigquery/google/cloud/bigquery/job.py index 65da69956369..350ad7ce579b 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/job.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/job.py @@ -25,6 +25,7 @@ from google.cloud.exceptions import NotFound from google.cloud._helpers import _datetime_from_microseconds from google.cloud.bigquery.dataset import DatasetReference +from google.cloud.bigquery.external_config import ExternalConfig from google.cloud.bigquery.query import _AbstractQueryParameter from google.cloud.bigquery.query import _query_param_from_api_repr from google.cloud.bigquery.query import ArrayQueryParameter @@ -39,6 +40,7 @@ from google.cloud.bigquery._helpers import _ListApiResourceProperty from google.cloud.bigquery._helpers import _TypedApiResourceProperty from google.cloud.bigquery._helpers import DEFAULT_RETRY +from google.cloud.bigquery._helpers import _int_or_none _DONE_STATE = 'DONE' _STOPPED_REASON = 'stopped' @@ -65,22 +67,6 @@ } -def _bool_or_none(value): - """Helper: deserialize boolean value from JSON string.""" - if isinstance(value, bool): - return value - if value is not None: - return value.lower() in ['t', 'true', '1'] - - -def _int_or_none(value): - """Helper: deserialize int value from JSON string.""" - if isinstance(value, int): - return value - if value is not None: - return int(value) - - def _error_result_to_exception(error_result): """Maps BigQuery error reasons to an exception. @@ -1315,6 +1301,14 @@ def _to_api_repr_udf_resources(value): ] +def _from_api_repr_table_defs(resource): + return {k: ExternalConfig.from_api_repr(v) for k, v in resource.items()} + + +def _to_api_repr_table_defs(value): + return {k: ExternalConfig.to_api_repr(v) for k, v in value.items()} + + class QueryJobConfig(object): """Configuration options for query jobs. @@ -1469,6 +1463,16 @@ def from_api_repr(cls, resource): https://g.co/cloud/bigquery/docs/reference/rest/v2/jobs#configuration.query.writeDisposition """ + table_definitions = _TypedApiResourceProperty( + 'table_definitions', 'tableDefinitions', dict) + """ + Definitions for external tables. A dictionary from table names (strings) + to :class:`google.cloud.bigquery.external_config.ExternalConfig`. + + See + https://g.co/cloud/bigquery/docs/reference/rest/v2/jobs#configuration.query.tableDefinitions + """ + _maximum_billing_tier = None _maximum_bytes_billed = None @@ -1478,6 +1482,8 @@ def from_api_repr(cls, resource): 'destinationTable': ( TableReference.from_api_repr, TableReference.to_api_repr), 'maximumBytesBilled': (int, str), + 'tableDefinitions': (_from_api_repr_table_defs, + _to_api_repr_table_defs), _QUERY_PARAMETERS_KEY: ( _from_api_repr_query_parameters, _to_api_repr_query_parameters), _UDF_RESOURCES_KEY: ( @@ -1615,6 +1621,13 @@ def maximum_bytes_billed(self): """ return self._configuration.maximum_bytes_billed + @property + def table_definitions(self): + """See + :class:`~google.cloud.bigquery.job.QueryJobConfig.table_definitions`. + """ + return self._configuration.table_definitions + def _build_resource(self): """Generate a resource for :meth:`begin`.""" configuration = self._configuration.to_api_repr() diff --git a/packages/google-cloud-bigquery/tests/system.py b/packages/google-cloud-bigquery/tests/system.py index 4cb97c6601f4..a25524a2a7dc 100644 --- a/packages/google-cloud-bigquery/tests/system.py +++ b/packages/google-cloud-bigquery/tests/system.py @@ -436,47 +436,23 @@ def test_load_table_from_local_avro_file_then_dump_table(self): sorted(ROWS, key=by_wavelength)) def test_load_table_from_storage_then_dump_table(self): - from google.cloud._testing import _NamedTemporaryFile - from google.cloud.storage import Client as StorageClient - - local_id = unique_resource_id() - BUCKET_NAME = 'bq_load_test' + local_id - BLOB_NAME = 'person_ages.csv' - GS_URL = 'gs://%s/%s' % (BUCKET_NAME, BLOB_NAME) + TABLE_ID = 'test_table' ROWS = [ ('Phred Phlyntstone', 32), ('Bharney Rhubble', 33), ('Wylma Phlyntstone', 29), ('Bhettye Rhubble', 27), ] - TABLE_NAME = 'test_table' - - storage_client = StorageClient() - - # In the **very** rare case the bucket name is reserved, this - # fails with a ConnectionError. - bucket = storage_client.create_bucket(BUCKET_NAME) - self.to_delete.append(bucket) - - blob = bucket.blob(BLOB_NAME) - - with _NamedTemporaryFile() as temp: - with open(temp.name, 'w') as csv_write: - writer = csv.writer(csv_write) - writer.writerow(('Full Name', 'Age')) - writer.writerows(ROWS) - - with open(temp.name, 'rb') as csv_read: - blob.upload_from_file(csv_read, content_type='text/csv') - - self.to_delete.insert(0, blob) + GS_URL = self._write_csv_to_storage( + 'bq_load_test' + unique_resource_id(), 'person_ages.csv', + ('Full Name', 'Age'), ROWS) dataset = self.temp_dataset(_make_dataset_id('load_gcs_then_dump')) full_name = bigquery.SchemaField('full_name', 'STRING', mode='REQUIRED') age = bigquery.SchemaField('age', 'INTEGER', mode='REQUIRED') - table_arg = Table(dataset.table(TABLE_NAME), schema=[full_name, age]) + table_arg = Table(dataset.table(TABLE_ID), schema=[full_name, age]) table = retry_403(Config.CLIENT.create_table)(table_arg) self.to_delete.insert(0, table) @@ -486,7 +462,7 @@ def test_load_table_from_storage_then_dump_table(self): config.source_format = 'CSV' config.write_disposition = 'WRITE_EMPTY' job = Config.CLIENT.load_table_from_storage( - GS_URL, dataset.table(TABLE_NAME), job_config=config) + GS_URL, dataset.table(TABLE_ID), job_config=config) # Allow for 90 seconds of "warm up" before rows visible. See # https://cloud.google.com/bigquery/streaming-data-into-bigquery#dataavailability @@ -501,44 +477,19 @@ def test_load_table_from_storage_then_dump_table(self): sorted(ROWS, key=by_age)) def test_load_table_from_storage_w_autodetect_schema(self): - from google.cloud._testing import _NamedTemporaryFile - from google.cloud.storage import Client as StorageClient from google.cloud.bigquery import SchemaField - local_id = unique_resource_id() - bucket_name = 'bq_load_test' + local_id - blob_name = 'person_ages.csv' - gs_url = 'gs://{}/{}'.format(bucket_name, blob_name) rows = [ ('Phred Phlyntstone', 32), ('Bharney Rhubble', 33), ('Wylma Phlyntstone', 29), ('Bhettye Rhubble', 27), ] * 100 # BigQuery internally uses the first 100 rows to detect schema - table_name = 'test_table' - - storage_client = StorageClient() - - # In the **very** rare case the bucket name is reserved, this - # fails with a ConnectionError. - bucket = storage_client.create_bucket(bucket_name) - self.to_delete.append(bucket) - - blob = bucket.blob(blob_name) - - with _NamedTemporaryFile() as temp: - with open(temp.name, 'w') as csv_write: - writer = csv.writer(csv_write) - writer.writerow(('Full Name', 'Age')) - writer.writerows(rows) - - with open(temp.name, 'rb') as csv_read: - blob.upload_from_file(csv_read, content_type='text/csv') - - self.to_delete.insert(0, blob) - + gs_url = self._write_csv_to_storage( + 'bq_load_test' + unique_resource_id(), 'person_ages.csv', + ('Full Name', 'Age'), rows) dataset = self.temp_dataset(_make_dataset_id('load_gcs_then_dump')) - table_ref = dataset.table(table_name) + table_ref = dataset.table('test_table') config = bigquery.LoadJobConfig() config.autodetect = True @@ -564,6 +515,33 @@ def test_load_table_from_storage_w_autodetect_schema(self): self.assertEqual( sorted(actual_row_tuples, key=by_age), sorted(rows, key=by_age)) + def _write_csv_to_storage(self, bucket_name, blob_name, header_row, + data_rows): + from google.cloud._testing import _NamedTemporaryFile + from google.cloud.storage import Client as StorageClient + + storage_client = StorageClient() + + # In the **very** rare case the bucket name is reserved, this + # fails with a ConnectionError. + bucket = storage_client.create_bucket(bucket_name) + self.to_delete.append(bucket) + + blob = bucket.blob(blob_name) + + with _NamedTemporaryFile() as temp: + with open(temp.name, 'w') as csv_write: + writer = csv.writer(csv_write) + writer.writerow(header_row) + writer.writerows(data_rows) + + with open(temp.name, 'rb') as csv_read: + blob.upload_from_file(csv_read, content_type='text/csv') + + self.to_delete.insert(0, blob) + + return 'gs://{}/{}'.format(bucket_name, blob_name) + def _load_table_for_extract_table( self, storage_client, rows, bucket_name, blob_name, table): from google.cloud._testing import _NamedTemporaryFile @@ -1271,6 +1249,36 @@ def test_query_future(self): row_tuples = [r.values() for r in iterator] self.assertEqual(row_tuples, [(1,)]) + def test_query_table_def(self): + rows = [ + ('Phred Phlyntstone', 32), + ('Bharney Rhubble', 33), + ('Wylma Phlyntstone', 29), + ('Bhettye Rhubble', 27), + ] + gs_url = self._write_csv_to_storage( + 'bq_load_test' + unique_resource_id(), 'person_ages.csv', + ('Full Name', 'Age'), rows) + + job_config = bigquery.QueryJobConfig() + table_id = 'flintstones' + ec = bigquery.ExternalConfig('CSV') + ec.source_uris = [gs_url] + ec.schema = [ + bigquery.SchemaField('full_name', 'STRING', mode='REQUIRED'), + bigquery.SchemaField('age', 'INTEGER', mode='REQUIRED'), + ] + ec.options.skip_leading_rows = 1 # skip the header row + job_config.table_definitions = {table_id: ec} + sql = 'SELECT * from %s' % table_id + + got_rows = Config.CLIENT.query_rows(sql, job_config=job_config) + + row_tuples = [r.values() for r in got_rows] + by_age = operator.itemgetter(1) + self.assertEqual(sorted(row_tuples, key=by_age), + sorted(rows, key=by_age)) + def test_create_rows_nested_nested(self): # See #2951 SF = bigquery.SchemaField diff --git a/packages/google-cloud-bigquery/tests/unit/test_external_config.py b/packages/google-cloud-bigquery/tests/unit/test_external_config.py index 6768093ed0b3..b7887428606d 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_external_config.py +++ b/packages/google-cloud-bigquery/tests/unit/test_external_config.py @@ -55,7 +55,6 @@ def test_api_repr_base(self): ], }, }) - want_resource = copy.deepcopy(resource) ec = ExternalConfig.from_api_repr(resource) self._verify_base(ec) self.assertEqual(ec.schema, @@ -63,7 +62,7 @@ def test_api_repr_base(self): self.assertIsNone(ec.options) got_resource = ec.to_api_repr() - self.assertEqual(got_resource, want_resource) + self.assertEqual(got_resource, resource) def _verify_base(self, ec): self.assertEqual(ec.autodetect, True) @@ -85,7 +84,6 @@ def test_api_repr_sheets(self): 'sourceFormat': 'GOOGLE_SHEETS', 'googleSheetsOptions': {'skipLeadingRows': '123'}, }) - want_resource = copy.deepcopy(resource) ec = ExternalConfig.from_api_repr(resource) @@ -96,13 +94,13 @@ def test_api_repr_sheets(self): got_resource = ec.to_api_repr() - self.assertEqual(got_resource, want_resource) + self.assertEqual(got_resource, resource) - del want_resource['googleSheetsOptions']['skipLeadingRows'] - ec = ExternalConfig.from_api_repr(copy.deepcopy(want_resource)) + del resource['googleSheetsOptions']['skipLeadingRows'] + ec = ExternalConfig.from_api_repr(resource) self.assertIsNone(ec.options.skip_leading_rows) got_resource = ec.to_api_repr() - self.assertEqual(got_resource, want_resource) + self.assertEqual(got_resource, resource) def test_api_repr_csv(self): from google.cloud.bigquery.external_config import CSVOptions @@ -118,7 +116,6 @@ def test_api_repr_csv(self): 'encoding': 'encoding', }, }) - want_resource = copy.deepcopy(resource) ec = ExternalConfig.from_api_repr(resource) @@ -134,13 +131,13 @@ def test_api_repr_csv(self): got_resource = ec.to_api_repr() - self.assertEqual(got_resource, want_resource) + self.assertEqual(got_resource, resource) - del want_resource['csvOptions']['skipLeadingRows'] - ec = ExternalConfig.from_api_repr(copy.deepcopy(want_resource)) + del resource['csvOptions']['skipLeadingRows'] + ec = ExternalConfig.from_api_repr(resource) self.assertIsNone(ec.options.skip_leading_rows) got_resource = ec.to_api_repr() - self.assertEqual(got_resource, want_resource) + self.assertEqual(got_resource, resource) def test_api_repr_bigtable(self): from google.cloud.bigquery.external_config import BigtableOptions @@ -178,7 +175,6 @@ def test_api_repr_bigtable(self): ], }, }) - want_resource = copy.deepcopy(resource) ec = ExternalConfig.from_api_repr(resource) @@ -207,19 +203,7 @@ def test_api_repr_bigtable(self): got_resource = ec.to_api_repr() - self.assertEqual(got_resource, want_resource) - - def test_option_mismatch(self): - from google.cloud.bigquery.external_config import CSVOptions - from google.cloud.bigquery.external_config import BigtableOptions - from google.cloud.bigquery.external_config import GoogleSheetsOptions - - for source_format, opts in (('BIGTABLE', CSVOptions()), - ('CSV', GoogleSheetsOptions()), - ('GOOGLE_SHEETS', BigtableOptions())): - ec = ExternalConfig(source_format) - with self.assertRaises(ValueError): - ec.options = opts + self.assertEqual(got_resource, resource) def _copy_and_update(d, u): diff --git a/packages/google-cloud-bigquery/tests/unit/test_job.py b/packages/google-cloud-bigquery/tests/unit/test_job.py index c1c190328968..0e0b667e704d 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_job.py +++ b/packages/google-cloud-bigquery/tests/unit/test_job.py @@ -42,27 +42,6 @@ def _make_client(project='test-project', connection=None): return client -class Test__bool_or_none(unittest.TestCase): - - def _call_fut(self, *args, **kwargs): - from google.cloud.bigquery import job - - return job._bool_or_none(*args, **kwargs) - - def test_w_bool(self): - self.assertTrue(self._call_fut(True)) - self.assertFalse(self._call_fut(False)) - - def test_w_none(self): - self.assertIsNone(self._call_fut(None)) - - def test_w_str(self): - self.assertTrue(self._call_fut('1')) - self.assertTrue(self._call_fut('t')) - self.assertTrue(self._call_fut('true')) - self.assertFalse(self._call_fut('anything else')) - - class Test__int_or_none(unittest.TestCase): def _call_fut(self, *args, **kwargs): @@ -1673,6 +1652,17 @@ def _verifyQueryParameters(self, job, config): for found, expected in zip(job.query_parameters, query_parameters): self.assertEqual(found.to_api_repr(), expected) + def _verify_table_definitions(self, job, config): + table_defs = config.get('tableDefinitions') + if job.table_definitions is None: + self.assertIsNone(table_defs) + else: + self.assertEqual(len(job.table_definitions), len(table_defs)) + for found_key, found_ec in job.table_definitions.items(): + expected_ec = table_defs.get(found_key) + self.assertIsNotNone(expected_ec) + self.assertEqual(found_ec.to_api_repr(), expected_ec) + def _verify_configuration_properties(self, job, configuration): if 'dryRun' in configuration: self.assertEqual(job.dry_run, @@ -1691,6 +1681,7 @@ def _verifyResourceProperties(self, job, resource): self._verifyIntegerResourceProperties(job, query_config) self._verify_udf_resources(job, query_config) self._verifyQueryParameters(job, query_config) + self._verify_table_definitions(job, query_config) self.assertEqual(job.query, query_config['query']) if 'createDisposition' in query_config: @@ -1754,6 +1745,7 @@ def test_ctor_defaults(self): self.assertIsNone(job.write_disposition) self.assertIsNone(job.maximum_billing_tier) self.assertIsNone(job.maximum_bytes_billed) + self.assertIsNone(job.table_definitions) def test_ctor_w_udf_resources(self): from google.cloud.bigquery.job import QueryJobConfig @@ -2516,6 +2508,93 @@ def test_begin_w_positional_query_parameter(self): self._verifyResourceProperties(job, RESOURCE) self.assertEqual(req['data'], SENT) + def test_begin_w_table_defs(self): + from google.cloud.bigquery.job import QueryJobConfig + from google.cloud.bigquery.external_config import ExternalConfig + from google.cloud.bigquery.external_config import BigtableColumn + from google.cloud.bigquery.external_config import BigtableColumnFamily + + PATH = '/projects/%s/jobs' % (self.PROJECT,) + RESOURCE = self._makeResource() + # Ensure None for missing server-set props + del RESOURCE['statistics']['creationTime'] + del RESOURCE['etag'] + del RESOURCE['selfLink'] + del RESOURCE['user_email'] + + bt_config = ExternalConfig('BIGTABLE') + bt_config.ignore_unknown_values = True + bt_config.options.read_rowkey_as_string = True + cf = BigtableColumnFamily() + cf.family_id = 'cf' + col = BigtableColumn() + col.field_name = 'fn' + cf.columns = [col] + bt_config.options.column_families = [cf] + BT_CONFIG_RESOURCE = { + 'sourceFormat': 'BIGTABLE', + 'ignoreUnknownValues': True, + 'bigtableOptions': { + 'readRowkeyAsString': True, + 'columnFamilies': [{ + 'familyId': 'cf', + 'columns': [{'fieldName': 'fn'}], + }], + }, + } + CSV_CONFIG_RESOURCE = { + 'sourceFormat': 'CSV', + 'maxBadRecords': 8, + 'csvOptions': { + 'allowJaggedRows': True, + }, + } + csv_config = ExternalConfig('CSV') + csv_config.max_bad_records = 8 + csv_config.options.allow_jagged_rows = True + bt_table = 'bigtable-table' + csv_table = 'csv-table' + RESOURCE['configuration']['query']['tableDefinitions'] = { + bt_table: BT_CONFIG_RESOURCE, + csv_table: CSV_CONFIG_RESOURCE, + } + want_resource = copy.deepcopy(RESOURCE) + conn = _Connection(RESOURCE) + client = _make_client(project=self.PROJECT, connection=conn) + config = QueryJobConfig() + config.table_definitions = { + bt_table: bt_config, + csv_table: csv_config, + } + config.use_legacy_sql = True + job = self._make_one( + self.JOB_ID, self.QUERY, client, job_config=config) + + job.begin() + + self.assertEqual(len(conn._requested), 1) + req = conn._requested[0] + self.assertEqual(req['method'], 'POST') + self.assertEqual(req['path'], PATH) + SENT = { + 'jobReference': { + 'projectId': self.PROJECT, + 'jobId': self.JOB_ID, + }, + 'configuration': { + 'query': { + 'query': self.QUERY, + 'useLegacySql': True, + 'tableDefinitions': { + bt_table: BT_CONFIG_RESOURCE, + csv_table: CSV_CONFIG_RESOURCE, + }, + }, + }, + } + self._verifyResourceProperties(job, want_resource) + self.assertEqual(req['data'], SENT) + def test_dry_run_query(self): from google.cloud.bigquery.job import QueryJobConfig From fcae6b3b24e8f187d18979886c35cccba56ef0e9 Mon Sep 17 00:00:00 2001 From: Jonathan Amsterdam Date: Mon, 16 Oct 2017 16:22:17 -0400 Subject: [PATCH 0325/2016] bigquery: support external data definition for tables (#4193) --- .../google/cloud/bigquery/schema.py | 47 +++++++ .../google/cloud/bigquery/table.py | 94 ++++++------- .../google-cloud-bigquery/tests/system.py | 37 ++++- .../tests/unit/test_client.py | 50 +++++++ .../tests/unit/test_schema.py | 129 +++++++++++++++++ .../tests/unit/test_table.py | 130 +++--------------- 6 files changed, 323 insertions(+), 164 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/schema.py b/packages/google-cloud-bigquery/google/cloud/bigquery/schema.py index 535c445a3726..1aa95271c70d 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/schema.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/schema.py @@ -154,3 +154,50 @@ def __hash__(self): def __repr__(self): return 'SchemaField{}'.format(self._key()) + + +def _parse_schema_resource(info): + """Parse a resource fragment into a schema field. + + :type info: mapping + :param info: should contain a "fields" key to be parsed + + :rtype: list of :class:`SchemaField`, or ``NoneType`` + :returns: a list of parsed fields, or ``None`` if no "fields" key is + present in ``info``. + """ + if 'fields' not in info: + return () + + schema = [] + for r_field in info['fields']: + name = r_field['name'] + field_type = r_field['type'] + mode = r_field.get('mode', 'NULLABLE') + description = r_field.get('description') + sub_fields = _parse_schema_resource(r_field) + schema.append( + SchemaField(name, field_type, mode, description, sub_fields)) + return schema + + +def _build_schema_resource(fields): + """Generate a resource fragment for a schema. + + :type fields: sequence of :class:`SchemaField` + :param fields: schema to be dumped + + :rtype: mapping + :returns: a mapping describing the schema of the supplied fields. + """ + infos = [] + for field in fields: + info = {'name': field.name, + 'type': field.field_type, + 'mode': field.mode} + if field.description is not None: + info['description'] = field.description + if field.fields: + info['fields'] = _build_schema_resource(field.fields) + infos.append(info) + return infos diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py index d4c6977c8755..592d1ad9def6 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py @@ -23,6 +23,9 @@ from google.cloud._helpers import _datetime_from_microseconds from google.cloud._helpers import _millis_from_datetime from google.cloud.bigquery.schema import SchemaField +from google.cloud.bigquery.schema import _build_schema_resource +from google.cloud.bigquery.schema import _parse_schema_resource +from google.cloud.bigquery.external_config import ExternalConfig _TABLE_HAS_NO_SCHEMA = "Table has no schema: call 'client.get_table()'" @@ -159,13 +162,15 @@ class Table(object): all_fields = [ 'description', 'friendly_name', 'expires', 'location', - 'partitioning_type', 'view_use_legacy_sql', 'view_query', 'schema' + 'partitioning_type', 'view_use_legacy_sql', 'view_query', 'schema', + 'external_data_configuration', ] def __init__(self, table_ref, schema=()): self._project = table_ref.project self._table_id = table_ref.table_id self._dataset_id = table_ref.dataset_id + self._external_config = None self._properties = {} # Let the @property do validation. self.schema = schema @@ -537,10 +542,37 @@ def view_use_legacy_sql(self, value): @property def streaming_buffer(self): + """Information about a table's streaming buffer. + + :rtype: :class:`StreamingBuffer` + :returns: Streaming buffer information, returned from get_table. + """ sb = self._properties.get('streamingBuffer') if sb is not None: return StreamingBuffer(sb) + @property + def external_data_configuration(self): + """Configuration for an external data source. + + If not set, None is returned. + + :rtype: :class:`ExternalConfig`, or ``NoneType`` + :returns: The external configuration, or None (the default). + """ + return self._external_config + + @external_data_configuration.setter + def external_data_configuration(self, value): + """Sets the configuration for an external data source. + + :type value: :class:`ExternalConfig`, or ``NoneType`` + :param value: The ExternalConfig, or None to unset. + """ + if not (value is None or isinstance(value, ExternalConfig)): + raise ValueError("Pass an ExternalConfig or None") + self._external_config = value + @classmethod def from_api_repr(cls, resource): """Factory: construct a table given its API representation @@ -579,6 +611,9 @@ def _set_properties(self, api_response): cleaned = api_response.copy() schema = cleaned.pop('schema', {'fields': ()}) self.schema = _parse_schema_resource(schema) + ec = cleaned.pop('externalDataConfiguration', None) + if ec: + self.external_data_configuration = ExternalConfig.from_api_repr(ec) if 'creationTime' in cleaned: cleaned['creationTime'] = float(cleaned['creationTime']) if 'lastModifiedTime' in cleaned: @@ -614,12 +649,20 @@ def _populate_schema_resource(self, resource): 'fields': _build_schema_resource(self._schema), } + def _populate_external_config(self, resource): + if not self.external_data_configuration: + resource['externalDataConfiguration'] = None + else: + resource['externalDataConfiguration'] = ExternalConfig.to_api_repr( + self.external_data_configuration) + custom_resource_fields = { 'expires': _populate_expires_resource, 'partitioning_type': _populate_partitioning_type_resource, 'view_query': _populate_view_query_resource, 'view_use_legacy_sql': _populate_view_use_legacy_sql_resource, - 'schema': _populate_schema_resource + 'schema': _populate_schema_resource, + 'external_data_configuration': _populate_external_config, } def _build_resource(self, filter_fields): @@ -690,50 +733,3 @@ def __init__(self, resource): # time is in milliseconds since the epoch. self.oldest_entry_time = _datetime_from_microseconds( 1000.0 * int(resource['oldestEntryTime'])) - - -def _parse_schema_resource(info): - """Parse a resource fragment into a schema field. - - :type info: mapping - :param info: should contain a "fields" key to be parsed - - :rtype: list of :class:`SchemaField`, or ``NoneType`` - :returns: a list of parsed fields, or ``None`` if no "fields" key is - present in ``info``. - """ - if 'fields' not in info: - return () - - schema = [] - for r_field in info['fields']: - name = r_field['name'] - field_type = r_field['type'] - mode = r_field.get('mode', 'NULLABLE') - description = r_field.get('description') - sub_fields = _parse_schema_resource(r_field) - schema.append( - SchemaField(name, field_type, mode, description, sub_fields)) - return schema - - -def _build_schema_resource(fields): - """Generate a resource fragment for a schema. - - :type fields: sequence of :class:`SchemaField` - :param fields: schema to be dumped - - :rtype: mapping - :returns: a mapping describing the schema of the supplied fields. - """ - infos = [] - for field in fields: - info = {'name': field.name, - 'type': field.field_type, - 'mode': field.mode} - if field.description is not None: - info['description'] = field.description - if field.fields: - info['fields'] = _build_schema_resource(field.fields) - infos.append(info) - return infos diff --git a/packages/google-cloud-bigquery/tests/system.py b/packages/google-cloud-bigquery/tests/system.py index a25524a2a7dc..d745be032b2c 100644 --- a/packages/google-cloud-bigquery/tests/system.py +++ b/packages/google-cloud-bigquery/tests/system.py @@ -1257,7 +1257,7 @@ def test_query_table_def(self): ('Bhettye Rhubble', 27), ] gs_url = self._write_csv_to_storage( - 'bq_load_test' + unique_resource_id(), 'person_ages.csv', + 'bq_external_test' + unique_resource_id(), 'person_ages.csv', ('Full Name', 'Age'), rows) job_config = bigquery.QueryJobConfig() @@ -1270,7 +1270,7 @@ def test_query_table_def(self): ] ec.options.skip_leading_rows = 1 # skip the header row job_config.table_definitions = {table_id: ec} - sql = 'SELECT * from %s' % table_id + sql = 'SELECT * FROM %s' % table_id got_rows = Config.CLIENT.query_rows(sql, job_config=job_config) @@ -1279,6 +1279,39 @@ def test_query_table_def(self): self.assertEqual(sorted(row_tuples, key=by_age), sorted(rows, key=by_age)) + def test_query_external_table(self): + rows = [ + ('Phred Phlyntstone', 32), + ('Bharney Rhubble', 33), + ('Wylma Phlyntstone', 29), + ('Bhettye Rhubble', 27), + ] + gs_url = self._write_csv_to_storage( + 'bq_external_test' + unique_resource_id(), 'person_ages.csv', + ('Full Name', 'Age'), rows) + dataset_id = _make_dataset_id('query_external_table') + dataset = self.temp_dataset(dataset_id) + table_id = 'flintstones' + full_name = bigquery.SchemaField('full_name', 'STRING', + mode='REQUIRED') + age = bigquery.SchemaField('age', 'INTEGER', mode='REQUIRED') + table_arg = Table(dataset.table(table_id), schema=[full_name, age]) + ec = bigquery.ExternalConfig('CSV') + ec.source_uris = [gs_url] + ec.options.skip_leading_rows = 1 # skip the header row + table_arg.external_data_configuration = ec + table = Config.CLIENT.create_table(table_arg) + self.to_delete.insert(0, table) + + sql = 'SELECT * FROM %s.%s' % (dataset_id, table_id) + + got_rows = Config.CLIENT.query_rows(sql) + + row_tuples = [r.values() for r in got_rows] + by_age = operator.itemgetter(1) + self.assertEqual(sorted(row_tuples, key=by_age), + sorted(rows, key=by_age)) + def test_create_rows_nested_nested(self): # See #2951 SF = bigquery.SchemaField diff --git a/packages/google-cloud-bigquery/tests/unit/test_client.py b/packages/google-cloud-bigquery/tests/unit/test_client.py index 9dfa311e806c..5bbdbc3121fe 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_client.py +++ b/packages/google-cloud-bigquery/tests/unit/test_client.py @@ -559,6 +559,56 @@ def test_create_table_w_schema_and_query(self): self.assertEqual(got.schema, schema) self.assertEqual(got.view_query, query) + def test_create_table_w_external(self): + from google.cloud.bigquery.table import Table + from google.cloud.bigquery.external_config import ExternalConfig + + path = 'projects/%s/datasets/%s/tables' % ( + self.PROJECT, self.DS_ID) + creds = _make_credentials() + client = self._make_one(project=self.PROJECT, credentials=creds) + resource = { + 'id': '%s:%s:%s' % (self.PROJECT, self.DS_ID, self.TABLE_ID), + 'tableReference': { + 'projectId': self.PROJECT, + 'datasetId': self.DS_ID, + 'tableId': self.TABLE_ID + }, + 'externalDataConfiguration': { + 'sourceFormat': 'CSV', + 'autodetect': True, + }, + } + conn = client._connection = _Connection(resource) + table = Table(self.TABLE_REF) + ec = ExternalConfig('CSV') + ec.autodetect = True + table.external_data_configuration = ec + + got = client.create_table(table) + + self.assertEqual(len(conn._requested), 1) + req = conn._requested[0] + self.assertEqual(req['method'], 'POST') + self.assertEqual(req['path'], '/%s' % path) + sent = { + 'tableReference': { + 'projectId': self.PROJECT, + 'datasetId': self.DS_ID, + 'tableId': self.TABLE_ID, + }, + 'externalDataConfiguration': { + 'sourceFormat': 'CSV', + 'autodetect': True, + } + } + self.assertEqual(req['data'], sent) + self.assertEqual(got.table_id, self.TABLE_ID) + self.assertEqual(got.project, self.PROJECT) + self.assertEqual(got.dataset_id, self.DS_ID) + self.assertEqual(got.external_data_configuration.source_format, 'CSV') + self.assertEqual(got.external_data_configuration.autodetect, True) + def test_get_table(self): path = 'projects/%s/datasets/%s/tables/%s' % ( self.PROJECT, self.DS_ID, self.TABLE_ID) diff --git a/packages/google-cloud-bigquery/tests/unit/test_schema.py b/packages/google-cloud-bigquery/tests/unit/test_schema.py index d08e7757063e..84e5d306c348 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_schema.py +++ b/packages/google-cloud-bigquery/tests/unit/test_schema.py @@ -236,3 +236,132 @@ def test___repr__(self): field1 = self._make_one('field1', 'STRING') expected = "SchemaField('field1', 'string', 'NULLABLE', None, ())" self.assertEqual(repr(field1), expected) + + +# TODO: dedup with the same class in test_table.py. +class _SchemaBase(object): + + def _verify_field(self, field, r_field): + self.assertEqual(field.name, r_field['name']) + self.assertEqual(field.field_type, r_field['type']) + self.assertEqual(field.mode, r_field.get('mode', 'NULLABLE')) + + def _verifySchema(self, schema, resource): + r_fields = resource['schema']['fields'] + self.assertEqual(len(schema), len(r_fields)) + + for field, r_field in zip(schema, r_fields): + self._verify_field(field, r_field) + + +class Test_parse_schema_resource(unittest.TestCase, _SchemaBase): + + def _call_fut(self, resource): + from google.cloud.bigquery.schema import _parse_schema_resource + + return _parse_schema_resource(resource) + + def _makeResource(self): + return { + 'schema': {'fields': [ + {'name': 'full_name', 'type': 'STRING', 'mode': 'REQUIRED'}, + {'name': 'age', 'type': 'INTEGER', 'mode': 'REQUIRED'}, + ]}, + } + + def test__parse_schema_resource_defaults(self): + RESOURCE = self._makeResource() + schema = self._call_fut(RESOURCE['schema']) + self._verifySchema(schema, RESOURCE) + + def test__parse_schema_resource_subfields(self): + RESOURCE = self._makeResource() + RESOURCE['schema']['fields'].append( + {'name': 'phone', + 'type': 'RECORD', + 'mode': 'REPEATED', + 'fields': [{'name': 'type', + 'type': 'STRING', + 'mode': 'REQUIRED'}, + {'name': 'number', + 'type': 'STRING', + 'mode': 'REQUIRED'}]}) + schema = self._call_fut(RESOURCE['schema']) + self._verifySchema(schema, RESOURCE) + + def test__parse_schema_resource_fields_without_mode(self): + RESOURCE = self._makeResource() + RESOURCE['schema']['fields'].append( + {'name': 'phone', + 'type': 'STRING'}) + + schema = self._call_fut(RESOURCE['schema']) + self._verifySchema(schema, RESOURCE) + + +class Test_build_schema_resource(unittest.TestCase, _SchemaBase): + + def _call_fut(self, resource): + from google.cloud.bigquery.schema import _build_schema_resource + + return _build_schema_resource(resource) + + def test_defaults(self): + from google.cloud.bigquery.schema import SchemaField + + full_name = SchemaField('full_name', 'STRING', mode='REQUIRED') + age = SchemaField('age', 'INTEGER', mode='REQUIRED') + resource = self._call_fut([full_name, age]) + self.assertEqual(len(resource), 2) + self.assertEqual(resource[0], + {'name': 'full_name', + 'type': 'STRING', + 'mode': 'REQUIRED'}) + self.assertEqual(resource[1], + {'name': 'age', + 'type': 'INTEGER', + 'mode': 'REQUIRED'}) + + def test_w_description(self): + from google.cloud.bigquery.schema import SchemaField + + DESCRIPTION = 'DESCRIPTION' + full_name = SchemaField('full_name', 'STRING', mode='REQUIRED', + description=DESCRIPTION) + age = SchemaField('age', 'INTEGER', mode='REQUIRED') + resource = self._call_fut([full_name, age]) + self.assertEqual(len(resource), 2) + self.assertEqual(resource[0], + {'name': 'full_name', + 'type': 'STRING', + 'mode': 'REQUIRED', + 'description': DESCRIPTION}) + self.assertEqual(resource[1], + {'name': 'age', + 'type': 'INTEGER', + 'mode': 'REQUIRED'}) + + def test_w_subfields(self): + from google.cloud.bigquery.schema import SchemaField + + full_name = SchemaField('full_name', 'STRING', mode='REQUIRED') + ph_type = SchemaField('type', 'STRING', 'REQUIRED') + ph_num = SchemaField('number', 'STRING', 'REQUIRED') + phone = SchemaField('phone', 'RECORD', mode='REPEATED', + fields=[ph_type, ph_num]) + resource = self._call_fut([full_name, phone]) + self.assertEqual(len(resource), 2) + self.assertEqual(resource[0], + {'name': 'full_name', + 'type': 'STRING', + 'mode': 'REQUIRED'}) + self.assertEqual(resource[1], + {'name': 'phone', + 'type': 'RECORD', + 'mode': 'REPEATED', + 'fields': [{'name': 'type', + 'type': 'STRING', + 'mode': 'REQUIRED'}, + {'name': 'number', + 'type': 'STRING', + 'mode': 'REQUIRED'}]}) diff --git a/packages/google-cloud-bigquery/tests/unit/test_table.py b/packages/google-cloud-bigquery/tests/unit/test_table.py index 63dafb8ac5ec..5216b1d8c7ba 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_table.py +++ b/packages/google-cloud-bigquery/tests/unit/test_table.py @@ -200,6 +200,11 @@ def _makeResource(self): 'estimatedRows': str(self.NUM_EST_ROWS), 'estimatedBytes': str(self.NUM_EST_BYTES), 'oldestEntryTime': self.WHEN_TS * 1000}, + 'externalDataConfiguration': { + 'sourceFormat': 'CSV', + 'csvOptions': { + 'allowJaggedRows': True, + 'encoding': 'encoding'}}, } def _verifyReadonlyResourceProperties(self, table, resource): @@ -269,6 +274,11 @@ def _verifyResourceProperties(self, table, resource): else: self.assertEqual(table.schema, []) + if 'externalDataConfiguration' in resource: + edc = table.external_data_configuration + self.assertEqual(edc.source_format, 'CSV') + self.assertEqual(edc.options.allow_jagged_rows, True) + def test_ctor(self): dataset = DatasetReference(self.PROJECT, self.DS_ID) table_ref = dataset.table(self.TABLE_NAME) @@ -517,6 +527,13 @@ def test_view_use_legacy_sql_setter(self): self.assertEqual(table.view_use_legacy_sql, True) self.assertEqual(table.view_query, 'select * from foo') + def test_external_data_configuration_setter_bad_value(self): + dataset = DatasetReference(self.PROJECT, self.DS_ID) + table_ref = dataset.table(self.TABLE_NAME) + table = self._make_one(table_ref) + with self.assertRaises(ValueError): + table.external_data_configuration = 12345 + def test_from_api_repr_missing_identity(self): self._setUpConstants() RESOURCE = {} @@ -720,116 +737,3 @@ def test__row_from_mapping_w_schema(self): self.assertEqual( self._call_fut(MAPPING, table.schema), ('Phred Phlyntstone', 32, ['red', 'green'], None)) - - -class Test_parse_schema_resource(unittest.TestCase, _SchemaBase): - - def _call_fut(self, resource): - from google.cloud.bigquery.table import _parse_schema_resource - - return _parse_schema_resource(resource) - - def _makeResource(self): - return { - 'schema': {'fields': [ - {'name': 'full_name', 'type': 'STRING', 'mode': 'REQUIRED'}, - {'name': 'age', 'type': 'INTEGER', 'mode': 'REQUIRED'}, - ]}, - } - - def test__parse_schema_resource_defaults(self): - RESOURCE = self._makeResource() - schema = self._call_fut(RESOURCE['schema']) - self._verifySchema(schema, RESOURCE) - - def test__parse_schema_resource_subfields(self): - RESOURCE = self._makeResource() - RESOURCE['schema']['fields'].append( - {'name': 'phone', - 'type': 'RECORD', - 'mode': 'REPEATED', - 'fields': [{'name': 'type', - 'type': 'STRING', - 'mode': 'REQUIRED'}, - {'name': 'number', - 'type': 'STRING', - 'mode': 'REQUIRED'}]}) - schema = self._call_fut(RESOURCE['schema']) - self._verifySchema(schema, RESOURCE) - - def test__parse_schema_resource_fields_without_mode(self): - RESOURCE = self._makeResource() - RESOURCE['schema']['fields'].append( - {'name': 'phone', - 'type': 'STRING'}) - - schema = self._call_fut(RESOURCE['schema']) - self._verifySchema(schema, RESOURCE) - - -class Test_build_schema_resource(unittest.TestCase, _SchemaBase): - - def _call_fut(self, resource): - from google.cloud.bigquery.table import _build_schema_resource - - return _build_schema_resource(resource) - - def test_defaults(self): - from google.cloud.bigquery.table import SchemaField - - full_name = SchemaField('full_name', 'STRING', mode='REQUIRED') - age = SchemaField('age', 'INTEGER', mode='REQUIRED') - resource = self._call_fut([full_name, age]) - self.assertEqual(len(resource), 2) - self.assertEqual(resource[0], - {'name': 'full_name', - 'type': 'STRING', - 'mode': 'REQUIRED'}) - self.assertEqual(resource[1], - {'name': 'age', - 'type': 'INTEGER', - 'mode': 'REQUIRED'}) - - def test_w_description(self): - from google.cloud.bigquery.table import SchemaField - - DESCRIPTION = 'DESCRIPTION' - full_name = SchemaField('full_name', 'STRING', mode='REQUIRED', - description=DESCRIPTION) - age = SchemaField('age', 'INTEGER', mode='REQUIRED') - resource = self._call_fut([full_name, age]) - self.assertEqual(len(resource), 2) - self.assertEqual(resource[0], - {'name': 'full_name', - 'type': 'STRING', - 'mode': 'REQUIRED', - 'description': DESCRIPTION}) - self.assertEqual(resource[1], - {'name': 'age', - 'type': 'INTEGER', - 'mode': 'REQUIRED'}) - - def test_w_subfields(self): - from google.cloud.bigquery.table import SchemaField - - full_name = SchemaField('full_name', 'STRING', mode='REQUIRED') - ph_type = SchemaField('type', 'STRING', 'REQUIRED') - ph_num = SchemaField('number', 'STRING', 'REQUIRED') - phone = SchemaField('phone', 'RECORD', mode='REPEATED', - fields=[ph_type, ph_num]) - resource = self._call_fut([full_name, phone]) - self.assertEqual(len(resource), 2) - self.assertEqual(resource[0], - {'name': 'full_name', - 'type': 'STRING', - 'mode': 'REQUIRED'}) - self.assertEqual(resource[1], - {'name': 'phone', - 'type': 'RECORD', - 'mode': 'REPEATED', - 'fields': [{'name': 'type', - 'type': 'STRING', - 'mode': 'REQUIRED'}, - {'name': 'number', - 'type': 'STRING', - 'mode': 'REQUIRED'}]}) From 7fa49254e806b5a4ebdc4878a69f6c956cb9d44a Mon Sep 17 00:00:00 2001 From: Jonathan Amsterdam Date: Mon, 16 Oct 2017 16:28:29 -0400 Subject: [PATCH 0326/2016] bigquery: support update of dataset access entries (#4197) --- .../google/cloud/bigquery/client.py | 15 +++++++++++---- .../tests/unit/test_client.py | 13 +++++++++++-- 2 files changed, 22 insertions(+), 6 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py index da3b8dcaaea5..e3509deb388e 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py @@ -324,10 +324,17 @@ def update_dataset(self, dataset, fields, retry=DEFAULT_RETRY): for f in fields: if not hasattr(dataset, f): raise ValueError('No Dataset field %s' % f) - # snake case to camel case - words = f.split('_') - api_field = words[0] + ''.join(map(str.capitalize, words[1:])) - partial[api_field] = getattr(dataset, f) + # All dataset attributes are trivially convertible to JSON except + # for access entries. + if f == 'access_entries': + attr = dataset._build_access_resource() + api_field = 'access' + else: + attr = getattr(dataset, f) + # snake case to camel case + words = f.split('_') + api_field = words[0] + ''.join(map(str.capitalize, words[1:])) + partial[api_field] = attr if dataset.etag is not None: headers = {'If-Match': dataset.etag} else: diff --git a/packages/google-cloud-bigquery/tests/unit/test_client.py b/packages/google-cloud-bigquery/tests/unit/test_client.py index 5bbdbc3121fe..aca778ce3a92 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_client.py +++ b/packages/google-cloud-bigquery/tests/unit/test_client.py @@ -642,13 +642,16 @@ def test_update_dataset_w_invalid_field(self): client.update_dataset(Dataset(client.dataset(self.DS_ID)), ["foo"]) def test_update_dataset(self): - from google.cloud.bigquery.dataset import Dataset + from google.cloud.bigquery.dataset import Dataset, AccessEntry PATH = 'projects/%s/datasets/%s' % (self.PROJECT, self.DS_ID) DESCRIPTION = 'DESCRIPTION' FRIENDLY_NAME = 'TITLE' LOCATION = 'loc' LABELS = {'priority': 'high'} + ACCESS = [ + {'role': 'OWNER', 'userByEmail': 'phred@example.com'}, + ] EXP = 17 RESOURCE = { 'datasetReference': @@ -659,6 +662,7 @@ def test_update_dataset(self): 'location': LOCATION, 'defaultTableExpirationMs': EXP, 'labels': LABELS, + 'access': ACCESS, } creds = _make_credentials() client = self._make_one(project=self.PROJECT, credentials=creds) @@ -669,8 +673,11 @@ def test_update_dataset(self): ds.location = LOCATION ds.default_table_expiration_ms = EXP ds.labels = LABELS + ds.access_entries = [ + AccessEntry('OWNER', 'userByEmail', 'phred@example.com')] ds2 = client.update_dataset( - ds, ['description', 'friendly_name', 'location', 'labels']) + ds, ['description', 'friendly_name', 'location', 'labels', + 'access_entries']) self.assertEqual(len(conn._requested), 1) req = conn._requested[0] self.assertEqual(req['method'], 'PATCH') @@ -679,6 +686,7 @@ def test_update_dataset(self): 'friendlyName': FRIENDLY_NAME, 'location': LOCATION, 'labels': LABELS, + 'access': ACCESS, } self.assertEqual(req['data'], SENT) self.assertEqual(req['path'], '/' + PATH) @@ -687,6 +695,7 @@ def test_update_dataset(self): self.assertEqual(ds2.friendly_name, ds.friendly_name) self.assertEqual(ds2.location, ds.location) self.assertEqual(ds2.labels, ds.labels) + self.assertEqual(ds2.access_entries, ds.access_entries) # ETag becomes If-Match header. ds._properties['etag'] = 'etag' From 1185cd4b92acfe16113b6c4e3fa4604151fcc3f5 Mon Sep 17 00:00:00 2001 From: Jonathan Amsterdam Date: Mon, 16 Oct 2017 17:54:35 -0400 Subject: [PATCH 0327/2016] biqquery: factor out common values in system.py (#4194) --- .../google-cloud-bigquery/tests/system.py | 125 +++++------------- 1 file changed, 35 insertions(+), 90 deletions(-) diff --git a/packages/google-cloud-bigquery/tests/system.py b/packages/google-cloud-bigquery/tests/system.py index d745be032b2c..390c88309c1c 100644 --- a/packages/google-cloud-bigquery/tests/system.py +++ b/packages/google-cloud-bigquery/tests/system.py @@ -41,6 +41,19 @@ JOB_TIMEOUT = 120 # 2 minutes WHERE = os.path.abspath(os.path.dirname(__file__)) +# Common table data used for many tests. +ROWS = [ + ('Phred Phlyntstone', 32), + ('Bharney Rhubble', 33), + ('Wylma Phlyntstone', 29), + ('Bhettye Rhubble', 27), +] +HEADER_ROW = ('Full Name', 'Age') +SCHEMA = [ + bigquery.SchemaField('full_name', 'STRING', mode='REQUIRED'), + bigquery.SchemaField('age', 'INTEGER', mode='REQUIRED'), +] + def _has_rows(result): return len(result) > 0 @@ -188,10 +201,7 @@ def test_list_datasets(self): def test_create_table(self): dataset = self.temp_dataset(_make_dataset_id('create_table')) table_id = 'test_table' - full_name = bigquery.SchemaField('full_name', 'STRING', - mode='REQUIRED') - age = bigquery.SchemaField('age', 'INTEGER', mode='REQUIRED') - table_arg = Table(dataset.table(table_id), schema=[full_name, age]) + table_arg = Table(dataset.table(table_id), schema=SCHEMA) self.assertFalse(_table_exists(table_arg)) table = retry_403(Config.CLIENT.create_table)(table_arg) @@ -230,11 +240,8 @@ def test_list_dataset_tables(self): 'newer' + unique_resource_id(), 'newest' + unique_resource_id(), ] - full_name = bigquery.SchemaField('full_name', 'STRING', - mode='REQUIRED') - age = bigquery.SchemaField('age', 'INTEGER', mode='REQUIRED') for table_name in tables_to_create: - table = Table(dataset.table(table_name), schema=[full_name, age]) + table = Table(dataset.table(table_name), schema=SCHEMA) created_table = retry_403(Config.CLIENT.create_table)(table) self.to_delete.insert(0, created_table) @@ -251,11 +258,7 @@ def test_update_table(self): dataset = self.temp_dataset(_make_dataset_id('update_table')) TABLE_NAME = 'test_table' - schema = [ - bigquery.SchemaField('full_name', 'STRING', mode='REQUIRED'), - bigquery.SchemaField('age', 'INTEGER', mode='REQUIRED') - ] - table_arg = Table(dataset.table(TABLE_NAME), schema=schema) + table_arg = Table(dataset.table(TABLE_NAME), schema=SCHEMA) self.assertFalse(_table_exists(table_arg)) table = retry_403(Config.CLIENT.create_table)(table_arg) self.to_delete.insert(0, table) @@ -285,11 +288,7 @@ def test_update_table_schema(self): dataset = self.temp_dataset(_make_dataset_id('update_table')) TABLE_NAME = 'test_table' - schema = [ - bigquery.SchemaField('full_name', 'STRING', mode='REQUIRED'), - bigquery.SchemaField('age', 'INTEGER', mode='REQUIRED') - ] - table_arg = Table(dataset.table(TABLE_NAME), schema=schema) + table_arg = Table(dataset.table(TABLE_NAME), schema=SCHEMA) self.assertFalse(_table_exists(table_arg)) table = retry_403(Config.CLIENT.create_table)(table_arg) self.to_delete.insert(0, table) @@ -357,27 +356,18 @@ def test_create_rows_then_dump_table(self): def test_load_table_from_local_file_then_dump_table(self): from google.cloud._testing import _NamedTemporaryFile - ROWS = [ - ('Phred Phlyntstone', 32), - ('Bharney Rhubble', 33), - ('Wylma Phlyntstone', 29), - ('Bhettye Rhubble', 27), - ] TABLE_NAME = 'test_table' dataset = self.temp_dataset(_make_dataset_id('load_local_then_dump')) - full_name = bigquery.SchemaField('full_name', 'STRING', - mode='REQUIRED') - age = bigquery.SchemaField('age', 'INTEGER', mode='REQUIRED') table_ref = dataset.table(TABLE_NAME) - table_arg = Table(table_ref, schema=[full_name, age]) + table_arg = Table(table_ref, schema=SCHEMA) table = retry_403(Config.CLIENT.create_table)(table_arg) self.to_delete.insert(0, table) with _NamedTemporaryFile() as temp: with open(temp.name, 'w') as csv_write: writer = csv.writer(csv_write) - writer.writerow(('Full Name', 'Age')) + writer.writerow(HEADER_ROW) writer.writerows(ROWS) with open(temp.name, 'rb') as csv_read: @@ -437,22 +427,13 @@ def test_load_table_from_local_avro_file_then_dump_table(self): def test_load_table_from_storage_then_dump_table(self): TABLE_ID = 'test_table' - ROWS = [ - ('Phred Phlyntstone', 32), - ('Bharney Rhubble', 33), - ('Wylma Phlyntstone', 29), - ('Bhettye Rhubble', 27), - ] GS_URL = self._write_csv_to_storage( 'bq_load_test' + unique_resource_id(), 'person_ages.csv', - ('Full Name', 'Age'), ROWS) + HEADER_ROW, ROWS) dataset = self.temp_dataset(_make_dataset_id('load_gcs_then_dump')) - full_name = bigquery.SchemaField('full_name', 'STRING', - mode='REQUIRED') - age = bigquery.SchemaField('age', 'INTEGER', mode='REQUIRED') - table_arg = Table(dataset.table(TABLE_ID), schema=[full_name, age]) + table_arg = Table(dataset.table(TABLE_ID), schema=SCHEMA) table = retry_403(Config.CLIENT.create_table)(table_arg) self.to_delete.insert(0, table) @@ -479,15 +460,12 @@ def test_load_table_from_storage_then_dump_table(self): def test_load_table_from_storage_w_autodetect_schema(self): from google.cloud.bigquery import SchemaField - rows = [ - ('Phred Phlyntstone', 32), - ('Bharney Rhubble', 33), - ('Wylma Phlyntstone', 29), - ('Bhettye Rhubble', 27), - ] * 100 # BigQuery internally uses the first 100 rows to detect schema + rows = ROWS * 100 + # BigQuery internally uses the first 100 rows to detect schema + gs_url = self._write_csv_to_storage( 'bq_load_test' + unique_resource_id(), 'person_ages.csv', - ('Full Name', 'Age'), rows) + HEADER_ROW, rows) dataset = self.temp_dataset(_make_dataset_id('load_gcs_then_dump')) table_ref = dataset.table('test_table') @@ -557,7 +535,7 @@ def _load_table_for_extract_table( with _NamedTemporaryFile() as temp: with open(temp.name, 'w') as csv_write: writer = csv.writer(csv_write) - writer.writerow(('Full Name', 'Age')) + writer.writerow(HEADER_ROW) writer.writerows(rows) with open(temp.name, 'rb') as csv_read: @@ -589,14 +567,8 @@ def test_extract_table(self): table_ref = Config.CLIENT.dataset(dataset_id).table(table_id) table = Table(table_ref) self.to_delete.insert(0, table) - rows = [ - ('Phred Phlyntstone', 32), - ('Bharney Rhubble', 33), - ('Wylma Phlyntstone', 29), - ('Bhettye Rhubble', 27), - ] self._load_table_for_extract_table( - storage_client, rows, bucket_name, blob_name, table_ref) + storage_client, ROWS, bucket_name, blob_name, table_ref) bucket = storage_client.bucket(bucket_name) destination_blob_name = 'person_ages_out.csv' destination = bucket.blob(destination_blob_name) @@ -621,14 +593,8 @@ def test_extract_table_w_job_config(self): table_ref = Config.CLIENT.dataset(dataset_id).table(table_id) table = Table(table_ref) self.to_delete.insert(0, table) - rows = [ - ('Phred Phlyntstone', 32), - ('Bharney Rhubble', 33), - ('Wylma Phlyntstone', 29), - ('Bhettye Rhubble', 27), - ] self._load_table_for_extract_table( - storage_client, rows, bucket_name, blob_name, table_ref) + storage_client, ROWS, bucket_name, blob_name, table_ref) bucket = storage_client.bucket(bucket_name) destination_blob_name = 'person_ages_out.csv' destination = bucket.blob(destination_blob_name) @@ -673,10 +639,7 @@ def test_job_cancel(self): dataset = self.temp_dataset(DATASET_ID) - full_name = bigquery.SchemaField('full_name', 'STRING', - mode='REQUIRED') - age = bigquery.SchemaField('age', 'INTEGER', mode='REQUIRED') - table_arg = Table(dataset.table(TABLE_NAME), schema=[full_name, age]) + table_arg = Table(dataset.table(TABLE_NAME), schema=SCHEMA) table = retry_403(Config.CLIENT.create_table)(table_arg) self.to_delete.insert(0, table) @@ -1250,24 +1213,15 @@ def test_query_future(self): self.assertEqual(row_tuples, [(1,)]) def test_query_table_def(self): - rows = [ - ('Phred Phlyntstone', 32), - ('Bharney Rhubble', 33), - ('Wylma Phlyntstone', 29), - ('Bhettye Rhubble', 27), - ] gs_url = self._write_csv_to_storage( 'bq_external_test' + unique_resource_id(), 'person_ages.csv', - ('Full Name', 'Age'), rows) + HEADER_ROW, ROWS) job_config = bigquery.QueryJobConfig() table_id = 'flintstones' ec = bigquery.ExternalConfig('CSV') ec.source_uris = [gs_url] - ec.schema = [ - bigquery.SchemaField('full_name', 'STRING', mode='REQUIRED'), - bigquery.SchemaField('age', 'INTEGER', mode='REQUIRED'), - ] + ec.schema = SCHEMA ec.options.skip_leading_rows = 1 # skip the header row job_config.table_definitions = {table_id: ec} sql = 'SELECT * FROM %s' % table_id @@ -1277,25 +1231,16 @@ def test_query_table_def(self): row_tuples = [r.values() for r in got_rows] by_age = operator.itemgetter(1) self.assertEqual(sorted(row_tuples, key=by_age), - sorted(rows, key=by_age)) + sorted(ROWS, key=by_age)) def test_query_external_table(self): - rows = [ - ('Phred Phlyntstone', 32), - ('Bharney Rhubble', 33), - ('Wylma Phlyntstone', 29), - ('Bhettye Rhubble', 27), - ] gs_url = self._write_csv_to_storage( 'bq_external_test' + unique_resource_id(), 'person_ages.csv', - ('Full Name', 'Age'), rows) + HEADER_ROW, ROWS) dataset_id = _make_dataset_id('query_external_table') dataset = self.temp_dataset(dataset_id) table_id = 'flintstones' - full_name = bigquery.SchemaField('full_name', 'STRING', - mode='REQUIRED') - age = bigquery.SchemaField('age', 'INTEGER', mode='REQUIRED') - table_arg = Table(dataset.table(table_id), schema=[full_name, age]) + table_arg = Table(dataset.table(table_id), schema=SCHEMA) ec = bigquery.ExternalConfig('CSV') ec.source_uris = [gs_url] ec.options.skip_leading_rows = 1 # skip the header row @@ -1310,7 +1255,7 @@ def test_query_external_table(self): row_tuples = [r.values() for r in got_rows] by_age = operator.itemgetter(1) self.assertEqual(sorted(row_tuples, key=by_age), - sorted(rows, key=by_age)) + sorted(ROWS, key=by_age)) def test_create_rows_nested_nested(self): # See #2951 From c65988de9d53b29be294a8ad6b445b5dd39b839a Mon Sep 17 00:00:00 2001 From: Alix Hamilton Date: Mon, 16 Oct 2017 14:55:41 -0700 Subject: [PATCH 0328/2016] BigQuery: option for job ID generation with user-supplied prefix (#4198) adds job_id_prefix to _make_job_id() --- .../google/cloud/bigquery/client.py | 62 ++++++++++++++----- .../google-cloud-bigquery/tests/system.py | 9 ++- .../tests/unit/test_client.py | 29 +++++++++ 3 files changed, 80 insertions(+), 20 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py index e3509deb388e..ce318ed91dc9 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py @@ -599,8 +599,8 @@ def list_jobs(self, max_results=None, page_token=None, all_users=None, extra_params=extra_params) def load_table_from_storage(self, source_uris, destination, - job_id=None, job_config=None, - retry=DEFAULT_RETRY): + job_id=None, job_id_prefix=None, + job_config=None, retry=DEFAULT_RETRY): """Starts a job for loading data into a table from CloudStorage. See @@ -618,6 +618,11 @@ def load_table_from_storage(self, source_uris, destination, :type job_id: str :param job_id: (Optional) Name of the job. + :type job_id_prefix: str or ``NoneType`` + :param job_id_prefix: (Optional) the user-provided prefix for a + randomly generated job ID. This parameter will be + ignored if a ``job_id`` is also given. + :type job_config: :class:`google.cloud.bigquery.job.LoadJobConfig` :param job_config: (Optional) Extra configuration options for the job. @@ -627,7 +632,7 @@ def load_table_from_storage(self, source_uris, destination, :rtype: :class:`google.cloud.bigquery.job.LoadJob` :returns: a new ``LoadJob`` instance """ - job_id = _make_job_id(job_id) + job_id = _make_job_id(job_id, job_id_prefix) if isinstance(source_uris, six.string_types): source_uris = [source_uris] job = LoadJob(job_id, source_uris, destination, self, job_config) @@ -638,7 +643,7 @@ def load_table_from_file(self, file_obj, destination, rewind=False, size=None, num_retries=_DEFAULT_NUM_RETRIES, - job_id=None, job_config=None): + job_id=None, job_id_prefix=None, job_config=None): """Upload the contents of this table from a file-like object. Like load_table_from_storage, this creates, starts and returns @@ -665,6 +670,11 @@ def load_table_from_file(self, file_obj, destination, :type job_id: str :param job_id: (Optional) Name of the job. + :type job_id_prefix: str or ``NoneType`` + :param job_id_prefix: (Optional) the user-provided prefix for a + randomly generated job ID. This parameter will be + ignored if a ``job_id`` is also given. + :type job_config: :class:`google.cloud.bigquery.job.LoadJobConfig` :param job_config: (Optional) Extra configuration options for the job. @@ -677,7 +687,7 @@ def load_table_from_file(self, file_obj, destination, be determined, or if the ``file_obj`` can be detected to be a file opened in text mode. """ - job_id = _make_job_id(job_id) + job_id = _make_job_id(job_id, job_id_prefix) job = LoadJob(job_id, None, destination, self, job_config) job_resource = job._build_resource() if rewind: @@ -801,8 +811,8 @@ def _do_multipart_upload(self, stream, metadata, size, num_retries): return response - def copy_table(self, sources, destination, job_id=None, job_config=None, - retry=DEFAULT_RETRY): + def copy_table(self, sources, destination, job_id=None, job_id_prefix=None, + job_config=None, retry=DEFAULT_RETRY): """Start a job for copying one or more tables into another table. See @@ -821,6 +831,11 @@ def copy_table(self, sources, destination, job_id=None, job_config=None, :type job_id: str :param job_id: (Optional) The ID of the job. + :type job_id_prefix: str or ``NoneType`` + :param job_id_prefix: (Optional) the user-provided prefix for a + randomly generated job ID. This parameter will be + ignored if a ``job_id`` is also given. + :type job_config: :class:`google.cloud.bigquery.job.CopyJobConfig` :param job_config: (Optional) Extra configuration options for the job. @@ -830,7 +845,7 @@ def copy_table(self, sources, destination, job_id=None, job_config=None, :rtype: :class:`google.cloud.bigquery.job.CopyJob` :returns: a new ``CopyJob`` instance """ - job_id = _make_job_id(job_id) + job_id = _make_job_id(job_id, job_id_prefix) if not isinstance(sources, collections.Sequence): sources = [sources] @@ -841,7 +856,7 @@ def copy_table(self, sources, destination, job_id=None, job_config=None, def extract_table( self, source, destination_uris, job_config=None, job_id=None, - retry=DEFAULT_RETRY): + job_id_prefix=None, retry=DEFAULT_RETRY): """Start a job to extract a table into Cloud Storage files. See @@ -863,6 +878,11 @@ def extract_table( :type job_id: str :param job_id: (Optional) The ID of the job. + :type job_id_prefix: str or ``NoneType`` + :param job_id_prefix: (Optional) the user-provided prefix for a + randomly generated job ID. This parameter will be + ignored if a ``job_id`` is also given. + :type job_config: :class:`google.cloud.bigquery.job.ExtractJobConfig` :param job_config: (Optional) Extra configuration options for the job. @@ -872,7 +892,7 @@ def extract_table( :rtype: :class:`google.cloud.bigquery.job.ExtractJob` :returns: a new ``ExtractJob`` instance """ - job_id = _make_job_id(job_id) + job_id = _make_job_id(job_id, job_id_prefix) if isinstance(destination_uris, six.string_types): destination_uris = [destination_uris] @@ -883,7 +903,8 @@ def extract_table( job.begin(retry=retry) return job - def query(self, query, job_config=None, job_id=None, retry=DEFAULT_RETRY): + def query(self, query, job_config=None, job_id=None, job_id_prefix=None, + retry=DEFAULT_RETRY): """Start a job that runs a SQL query. See @@ -900,13 +921,18 @@ def query(self, query, job_config=None, job_id=None, retry=DEFAULT_RETRY): :type job_id: str :param job_id: (Optional) ID to use for the query job. + :type job_id_prefix: str or ``NoneType`` + :param job_id_prefix: (Optional) the user-provided prefix for a + randomly generated job ID. This parameter will be + ignored if a ``job_id`` is also given. + :type retry: :class:`google.api.core.retry.Retry` :param retry: (Optional) How to retry the RPC. :rtype: :class:`google.cloud.bigquery.job.QueryJob` :returns: a new ``QueryJob`` instance """ - job_id = _make_job_id(job_id) + job_id = _make_job_id(job_id, job_id_prefix) job = QueryJob(job_id, query, client=self, job_config=job_config) job.begin(retry=retry) return job @@ -1269,18 +1295,24 @@ def _item_to_table(iterator, resource): return Table.from_api_repr(resource) -def _make_job_id(job_id): +def _make_job_id(job_id, prefix=None): """Construct an ID for a new job. :type job_id: str or ``NoneType`` :param job_id: the user-provided job ID + :type prefix: str or ``NoneType`` + :param prefix: (Optional) the user-provided prefix for a job ID + :rtype: str :returns: A job ID """ - if job_id is None: + if job_id is not None: + return job_id + elif prefix is not None: + return str(prefix) + str(uuid.uuid4()) + else: return str(uuid.uuid4()) - return job_id def _check_mode(stream): diff --git a/packages/google-cloud-bigquery/tests/system.py b/packages/google-cloud-bigquery/tests/system.py index 390c88309c1c..c434efcf70ea 100644 --- a/packages/google-cloud-bigquery/tests/system.py +++ b/packages/google-cloud-bigquery/tests/system.py @@ -633,7 +633,7 @@ def test_copy_table(self): def test_job_cancel(self): DATASET_ID = _make_dataset_id('job_cancel') - JOB_ID = 'fetch_' + DATASET_ID + str(uuid.uuid4()) + JOB_ID_PREFIX = 'fetch_' + DATASET_ID TABLE_NAME = 'test_table' QUERY = 'SELECT * FROM %s.%s' % (DATASET_ID, TABLE_NAME) @@ -643,7 +643,7 @@ def test_job_cancel(self): table = retry_403(Config.CLIENT.create_table)(table_arg) self.to_delete.insert(0, table) - job = Config.CLIENT.query(QUERY, job_id=JOB_ID) + job = Config.CLIENT.query(QUERY, job_id_prefix=JOB_ID_PREFIX) job.cancel() retry = RetryInstanceState(_job_done, max_tries=8) @@ -866,7 +866,7 @@ def test_query_w_dml(self): query_job = Config.CLIENT.query( query_template.format(dataset_name, table_name), - job_id='test_query_w_dml_{}'.format(str(uuid.uuid4()))) + job_id_prefix='test_query_w_dml_') query_job.result() self.assertEqual(query_job.num_dml_affected_rows, 1) @@ -1053,8 +1053,7 @@ def test_query_w_query_params(self): query_job = Config.CLIENT.query( example['sql'], job_config=jconfig, - job_id='test_query_w_query_params{}'.format( - str(uuid.uuid4()))) + job_id_prefix='test_query_w_query_params') rows = list(query_job.result()) self.assertEqual(len(rows), 1) self.assertEqual(len(rows[0]), 1) diff --git a/packages/google-cloud-bigquery/tests/unit/test_client.py b/packages/google-cloud-bigquery/tests/unit/test_client.py index aca778ce3a92..ed62a27f0d76 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_client.py +++ b/packages/google-cloud-bigquery/tests/unit/test_client.py @@ -2859,6 +2859,35 @@ def test_list_partitions(self): [20160804, 20160805]) +class Test_make_job_id(unittest.TestCase): + def _call_fut(self, job_id, prefix=None): + from google.cloud.bigquery.client import _make_job_id + + return _make_job_id(job_id, prefix=prefix) + + def test__make_job_id_wo_suffix(self): + job_id = self._call_fut('job_id') + + self.assertEqual(job_id, 'job_id') + + def test__make_job_id_w_suffix(self): + with mock.patch('uuid.uuid4', side_effect=['212345']): + job_id = self._call_fut(None, prefix='job_id') + + self.assertEqual(job_id, 'job_id212345') + + def test__make_random_job_id(self): + with mock.patch('uuid.uuid4', side_effect=['212345']): + job_id = self._call_fut(None) + + self.assertEqual(job_id, '212345') + + def test__make_job_id_w_job_id_overrides_prefix(self): + job_id = self._call_fut('job_id', prefix='unused_prefix') + + self.assertEqual(job_id, 'job_id') + + class TestClientUpload(object): # NOTE: This is a "partner" to `TestClient` meant to test some of the # "load_table_from_file" portions of `Client`. It also uses From 41ec61b9bbbb4e5811d7fba10a79db85624c2dad Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Mon, 16 Oct 2017 15:13:01 -0700 Subject: [PATCH 0329/2016] BigQuery: skip snippets tests until they can be updated. --- packages/google-cloud-bigquery/nox.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/packages/google-cloud-bigquery/nox.py b/packages/google-cloud-bigquery/nox.py index 2d5772882fbe..d9f382fb2dfd 100644 --- a/packages/google-cloud-bigquery/nox.py +++ b/packages/google-cloud-bigquery/nox.py @@ -92,6 +92,8 @@ def system_tests(session, python_version): def snippets_tests(session, python_version): """Run the system test suite.""" + session.skip('Snippets need update for BigQuery Beta 2 changes.') + # Sanity check: Only run system tests if the environment variable is set. if not os.environ.get('GOOGLE_APPLICATION_CREDENTIALS', ''): session.skip('Credentials must be set via environment variable.') From d07ae644b473cd79229b29a5b021dcf1ea4d3556 Mon Sep 17 00:00:00 2001 From: Jonathan Amsterdam Date: Mon, 16 Oct 2017 19:27:44 -0400 Subject: [PATCH 0330/2016] bigquery: add filter to list_datasets (#4205) Support filtering datasets by label. --- .../google/cloud/bigquery/client.py | 11 ++++++++++- .../google-cloud-bigquery/tests/unit/test_client.py | 7 +++++-- 2 files changed, 15 insertions(+), 3 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py index ce318ed91dc9..dedfe21135c1 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py @@ -152,7 +152,7 @@ def list_projects(self, max_results=None, page_token=None, page_token=page_token, max_results=max_results) - def list_datasets(self, include_all=False, max_results=None, + def list_datasets(self, include_all=False, filter=None, max_results=None, page_token=None, retry=DEFAULT_RETRY): """List datasets for the project associated with this client. @@ -162,6 +162,11 @@ def list_datasets(self, include_all=False, max_results=None, :type include_all: bool :param include_all: True if results include hidden datasets. + :type filter: str + :param filter: an expression for filtering the results by label. + For syntax, see + https://cloud.google.com/bigquery/docs/reference/rest/v2/datasets/list#filter. + :type max_results: int :param max_results: maximum number of datasets to return, If not passed, defaults to a value set by the API. @@ -181,6 +186,10 @@ def list_datasets(self, include_all=False, max_results=None, extra_params = {} if include_all: extra_params['all'] = True + if filter: + # TODO: consider supporting a dict of label -> value for filter, + # and converting it into a string here. + extra_params['filter'] = filter path = '/projects/%s/datasets' % (self.project,) return page_iterator.HTTPIterator( client=self, diff --git a/packages/google-cloud-bigquery/tests/unit/test_client.py b/packages/google-cloud-bigquery/tests/unit/test_client.py index ed62a27f0d76..1dd908e6507e 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_client.py +++ b/packages/google-cloud-bigquery/tests/unit/test_client.py @@ -228,13 +228,15 @@ def test_list_datasets_defaults(self): def test_list_datasets_explicit_response_missing_datasets_key(self): PATH = 'projects/%s/datasets' % self.PROJECT TOKEN = 'TOKEN' + FILTER = 'FILTER' DATA = {} creds = _make_credentials() client = self._make_one(self.PROJECT, creds) conn = client._connection = _Connection(DATA) iterator = client.list_datasets( - include_all=True, max_results=3, page_token=TOKEN) + include_all=True, filter=FILTER, + max_results=3, page_token=TOKEN) page = six.next(iterator.pages) datasets = list(page) token = iterator.next_page_token @@ -247,7 +249,8 @@ def test_list_datasets_explicit_response_missing_datasets_key(self): self.assertEqual(req['method'], 'GET') self.assertEqual(req['path'], '/%s' % PATH) self.assertEqual(req['query_params'], - {'all': True, 'maxResults': 3, 'pageToken': TOKEN}) + {'all': True, 'filter': FILTER, + 'maxResults': 3, 'pageToken': TOKEN}) def test_dataset_with_specified_project(self): from google.cloud.bigquery.dataset import DatasetReference From 76b666c77370e009db632a40020e2e9c4fd12d03 Mon Sep 17 00:00:00 2001 From: Jonathan Amsterdam Date: Mon, 16 Oct 2017 19:39:13 -0400 Subject: [PATCH 0331/2016] bigquery: support table labels (#4207) --- .../google/cloud/bigquery/table.py | 32 +++++++++++++++++-- .../google-cloud-bigquery/tests/system.py | 13 ++++++-- .../tests/unit/test_client.py | 12 +++++-- .../tests/unit/test_table.py | 16 +++++++++- 4 files changed, 66 insertions(+), 7 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py index 592d1ad9def6..b75af6802014 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py @@ -163,7 +163,7 @@ class Table(object): all_fields = [ 'description', 'friendly_name', 'expires', 'location', 'partitioning_type', 'view_use_legacy_sql', 'view_query', 'schema', - 'external_data_configuration', + 'external_data_configuration', 'labels', ] def __init__(self, table_ref, schema=()): @@ -171,7 +171,7 @@ def __init__(self, table_ref, schema=()): self._table_id = table_ref.table_id self._dataset_id = table_ref.dataset_id self._external_config = None - self._properties = {} + self._properties = {'labels': {}} # Let the @property do validation. self.schema = schema @@ -238,6 +238,32 @@ def schema(self, value): else: self._schema = tuple(value) + @property + def labels(self): + """Labels for the table. + + This method always returns a dict. To change a table's labels, + modify the dict, then call ``Client.update_table``. To delete a + label, set its value to ``None`` before updating. + + :rtype: dict, {str -> str} + :returns: A dict of the the table's labels. + """ + return self._properties['labels'] + + @labels.setter + def labels(self, value): + """Update labels for the table. + + :type value: dict, {str -> str} + :param value: new labels + + :raises: ValueError for invalid value types. + """ + if not isinstance(value, dict): + raise ValueError("Pass a dict") + self._properties['labels'] = value + @property def created(self): """Datetime at which the table was created. @@ -620,6 +646,8 @@ def _set_properties(self, api_response): cleaned['lastModifiedTime'] = float(cleaned['lastModifiedTime']) if 'expirationTime' in cleaned: cleaned['expirationTime'] = float(cleaned['expirationTime']) + if 'labels' not in cleaned: + cleaned['labels'] = {} self._properties.update(cleaned) def _populate_expires_resource(self, resource): diff --git a/packages/google-cloud-bigquery/tests/system.py b/packages/google-cloud-bigquery/tests/system.py index c434efcf70ea..86082bf31982 100644 --- a/packages/google-cloud-bigquery/tests/system.py +++ b/packages/google-cloud-bigquery/tests/system.py @@ -265,18 +265,27 @@ def test_update_table(self): self.assertTrue(_table_exists(table)) self.assertIsNone(table.friendly_name) self.assertIsNone(table.description) + self.assertEquals(table.labels, {}) table.friendly_name = 'Friendly' table.description = 'Description' + table.labels = {'priority': 'high', 'color': 'blue'} table2 = Config.CLIENT.update_table( - table, ['friendly_name', 'description']) + table, ['friendly_name', 'description', 'labels']) self.assertEqual(table2.friendly_name, 'Friendly') self.assertEqual(table2.description, 'Description') + self.assertEqual(table2.labels, {'priority': 'high', 'color': 'blue'}) table2.description = None - table3 = Config.CLIENT.update_table(table2, ['description']) + table2.labels = { + 'color': 'green', # change + 'shape': 'circle', # add + 'priority': None, # delete + } + table3 = Config.CLIENT.update_table(table2, ['description', 'labels']) self.assertIsNone(table3.description) + self.assertEqual(table3.labels, {'color': 'green', 'shape': 'circle'}) # If we try to update using table2 again, it will fail because the # previous update changed the ETag. diff --git a/packages/google-cloud-bigquery/tests/unit/test_client.py b/packages/google-cloud-bigquery/tests/unit/test_client.py index 1dd908e6507e..a15f7a0cc1b1 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_client.py +++ b/packages/google-cloud-bigquery/tests/unit/test_client.py @@ -464,6 +464,7 @@ def test_create_table_w_day_partition(self): 'tableId': self.TABLE_ID }, 'timePartitioning': {'type': 'DAY'}, + 'labels': {}, } self.assertEqual(req['data'], sent) self.assertEqual(table.partitioning_type, "DAY") @@ -502,6 +503,7 @@ def test_create_table_w_day_partition_and_expire(self): 'tableId': self.TABLE_ID }, 'timePartitioning': {'type': 'DAY', 'expirationMs': 100}, + 'labels': {}, } self.assertEqual(req['data'], sent) self.assertEqual(table.partitioning_type, "DAY") @@ -554,6 +556,7 @@ def test_create_table_w_schema_and_query(self): {'name': 'age', 'type': 'INTEGER', 'mode': 'REQUIRED'}] }, 'view': {'query': query, 'useLegacySql': False}, + 'labels': {}, } self.assertEqual(req['data'], sent) self.assertEqual(got.table_id, self.TABLE_ID) @@ -603,7 +606,8 @@ def test_create_table_w_external(self): 'externalDataConfiguration': { 'sourceFormat': 'CSV', 'autodetect': True, - } + }, + 'labels': {}, } self.assertEqual(req['data'], sent) self.assertEqual(got.table_id, self.TABLE_ID) @@ -727,6 +731,7 @@ def test_update_table(self): 'etag': 'etag', 'description': description, 'friendlyName': title, + 'labels': {'x': 'y'}, } schema = [ SchemaField('full_name', 'STRING', mode='REQUIRED'), @@ -738,9 +743,10 @@ def test_update_table(self): table = Table(self.TABLE_REF, schema=schema) table.description = description table.friendly_name = title + table.labels = {'x': 'y'} updated_table = client.update_table( - table, ['schema', 'description', 'friendly_name']) + table, ['schema', 'description', 'friendly_name', 'labels']) sent = { 'tableReference': { @@ -753,6 +759,7 @@ def test_update_table(self): {'name': 'age', 'type': 'INTEGER', 'mode': 'REQUIRED'}]}, 'description': description, 'friendlyName': title, + 'labels': {'x': 'y'}, } self.assertEqual(len(conn._requested), 1) req = conn._requested[0] @@ -763,6 +770,7 @@ def test_update_table(self): self.assertEqual(updated_table.description, table.description) self.assertEqual(updated_table.friendly_name, table.friendly_name) self.assertEqual(updated_table.schema, table.schema) + self.assertEqual(updated_table.labels, table.labels) # ETag becomes If-Match header. table._properties['etag'] = 'etag' diff --git a/packages/google-cloud-bigquery/tests/unit/test_table.py b/packages/google-cloud-bigquery/tests/unit/test_table.py index 5216b1d8c7ba..a40ab160d970 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_table.py +++ b/packages/google-cloud-bigquery/tests/unit/test_table.py @@ -205,6 +205,7 @@ def _makeResource(self): 'csvOptions': { 'allowJaggedRows': True, 'encoding': 'encoding'}}, + 'labels': {'x': 'y'}, } def _verifyReadonlyResourceProperties(self, table, resource): @@ -279,6 +280,11 @@ def _verifyResourceProperties(self, table, resource): self.assertEqual(edc.source_format, 'CSV') self.assertEqual(edc.options.allow_jagged_rows, True) + if 'labels' in resource: + self.assertEqual(table.labels, {'x': 'y'}) + else: + self.assertEqual(table.labels, {}) + def test_ctor(self): dataset = DatasetReference(self.PROJECT, self.DS_ID) table_ref = dataset.table(self.TABLE_NAME) @@ -301,13 +307,14 @@ def test_ctor(self): self.assertIsNone(table.self_link) self.assertIsNone(table.full_table_id) self.assertIsNone(table.table_type) - self.assertIsNone(table.description) self.assertIsNone(table.expires) self.assertIsNone(table.friendly_name) self.assertIsNone(table.location) self.assertIsNone(table.view_query) self.assertIsNone(table.view_use_legacy_sql) + self.assertIsNone(table.external_data_configuration) + self.assertEquals(table.labels, {}) def test_ctor_w_schema(self): from google.cloud.bigquery.table import SchemaField @@ -534,6 +541,13 @@ def test_external_data_configuration_setter_bad_value(self): with self.assertRaises(ValueError): table.external_data_configuration = 12345 + def test_labels_setter_bad_value(self): + dataset = DatasetReference(self.PROJECT, self.DS_ID) + table_ref = dataset.table(self.TABLE_NAME) + table = self._make_one(table_ref) + with self.assertRaises(ValueError): + table.labels = 12345 + def test_from_api_repr_missing_identity(self): self._setUpConstants() RESOURCE = {} From ec80b4484c21f1f91793898f9d3db6203617d7ab Mon Sep 17 00:00:00 2001 From: Alix Hamilton Date: Wed, 18 Oct 2017 10:10:43 -0700 Subject: [PATCH 0332/2016] BigQuery: removes LoadJob error for autodetect + schema (#4213) --- .../google/cloud/bigquery/job.py | 6 ----- .../google-cloud-bigquery/tests/system.py | 14 +++++++--- .../tests/unit/test_job.py | 26 ------------------- 3 files changed, 11 insertions(+), 35 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/job.py b/packages/google-cloud-bigquery/google/cloud/bigquery/job.py index 350ad7ce579b..2472758b4a51 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/job.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/job.py @@ -143,9 +143,6 @@ class AutoDetectSchema(_TypedApiResourceProperty): """ def __set__(self, instance, value): self._validate(value) - if instance.schema: - raise ValueError('A schema should not be already defined ' - 'when using schema auto-detection') instance._properties[self.resource_name] = value @@ -638,9 +635,6 @@ def schema(self): def schema(self, value): if not all(isinstance(field, SchemaField) for field in value): raise ValueError('Schema items must be fields') - if self.autodetect: - raise ValueError( - 'Schema can not be set if `autodetect` property is True') self._schema = tuple(value) def to_api_repr(self): diff --git a/packages/google-cloud-bigquery/tests/system.py b/packages/google-cloud-bigquery/tests/system.py index 86082bf31982..767dac96a433 100644 --- a/packages/google-cloud-bigquery/tests/system.py +++ b/packages/google-cloud-bigquery/tests/system.py @@ -466,8 +466,9 @@ def test_load_table_from_storage_then_dump_table(self): self.assertEqual(sorted(row_tuples, key=by_age), sorted(ROWS, key=by_age)) - def test_load_table_from_storage_w_autodetect_schema(self): + def test_load_table_from_storage_w_autodetect_schema_then_get_job(self): from google.cloud.bigquery import SchemaField + from google.cloud.bigquery.job import LoadJob rows = ROWS * 100 # BigQuery internally uses the first 100 rows to detect schema @@ -477,11 +478,12 @@ def test_load_table_from_storage_w_autodetect_schema(self): HEADER_ROW, rows) dataset = self.temp_dataset(_make_dataset_id('load_gcs_then_dump')) table_ref = dataset.table('test_table') + JOB_ID = 'load_table_w_autodetect_{}'.format(str(uuid.uuid4())) config = bigquery.LoadJobConfig() config.autodetect = True - job = Config.CLIENT.load_table_from_storage(gs_url, table_ref, - job_config=config) + job = Config.CLIENT.load_table_from_storage( + gs_url, table_ref, job_config=config, job_id=JOB_ID) # Allow for 90 seconds of "warm up" before rows visible. See # https://cloud.google.com/bigquery/streaming-data-into-bigquery#dataavailability @@ -502,6 +504,12 @@ def test_load_table_from_storage_w_autodetect_schema(self): self.assertEqual( sorted(actual_row_tuples, key=by_age), sorted(rows, key=by_age)) + fetched_job = Config.CLIENT.get_job(JOB_ID) + + self.assertIsInstance(fetched_job, LoadJob) + self.assertEqual(fetched_job.job_id, JOB_ID) + self.assertEqual(fetched_job.autodetect, True) + def _write_csv_to_storage(self, bucket_name, blob_name, header_row, data_rows): from google.cloud._testing import _NamedTemporaryFile diff --git a/packages/google-cloud-bigquery/tests/unit/test_job.py b/packages/google-cloud-bigquery/tests/unit/test_job.py index 0e0b667e704d..0ad4071f8fc6 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_job.py +++ b/packages/google-cloud-bigquery/tests/unit/test_job.py @@ -426,32 +426,6 @@ def test_schema_setter(self): config.schema = [full_name, age] self.assertEqual(config.schema, [full_name, age]) - def test_schema_setter_w_autodetect(self): - from google.cloud.bigquery.schema import SchemaField - - config = LoadJobConfig() - schema = [SchemaField('full_name', 'STRING')] - config.autodetect = False - config.schema = schema - self.assertEqual(config.schema, schema) - - config.schema = [] - config.autodetect = True - with self.assertRaises(ValueError): - config.schema = schema - - def test_autodetect_setter_w_schema(self): - from google.cloud.bigquery.schema import SchemaField - - config = LoadJobConfig() - - config.autodetect = False - config.schema = [SchemaField('full_name', 'STRING')] - self.assertEqual(config.autodetect, False) - - with self.assertRaises(ValueError): - config.autodetect = True - def test_props_set_by_server(self): import datetime from google.cloud._helpers import UTC From 41b10307815b4150f5ccc7407b0d8a30dc1cf82d Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Wed, 18 Oct 2017 14:58:33 -0700 Subject: [PATCH 0333/2016] BigQuery: populate timeout parameter for getQueryResults (#4209) * BigQuery: populate timeout parameter for getQueryResults This will allow QueryJob to respect the timeout value for futures. * query_rows: Clarify that timeout is in seconds. * Wait until the end of calculations to convert to milliseconds. --- .../google/cloud/bigquery/client.py | 6 ++-- .../google/cloud/bigquery/job.py | 26 +++++++++++++++-- .../tests/unit/test_job.py | 28 +++++++++++++++++++ 3 files changed, 54 insertions(+), 6 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py index dedfe21135c1..05925604664d 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py @@ -1117,10 +1117,10 @@ def query_rows(self, query, job_config=None, job_id=None, timeout=None, :type job_id: str :param job_id: (Optional) ID to use for the query job. - :type timeout: int + :type timeout: float :param timeout: - (Optional) How long to wait for job to complete before raising a - :class:`TimeoutError`. + (Optional) How long (in seconds) to wait for job to complete + before raising a :class:`TimeoutError`. :rtype: :class:`~google.api.core.page_iterator.Iterator` :returns: diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/job.py b/packages/google-cloud-bigquery/google/cloud/bigquery/job.py index 2472758b4a51..97f38fa624c1 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/job.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/job.py @@ -44,6 +44,7 @@ _DONE_STATE = 'DONE' _STOPPED_REASON = 'stopped' +_TIMEOUT_BUFFER_SECS = 0.1 _ERROR_REASON_TO_EXCEPTION = { 'accessDenied': http_client.FORBIDDEN, @@ -1516,6 +1517,7 @@ def __init__(self, job_id, query, client, job_config=None): self.query = query self._configuration = job_config self._query_results = None + self._done_timeout = None @property def allow_large_results(self): @@ -1865,7 +1867,7 @@ def query_results(self, retry=DEFAULT_RETRY): """ if not self._query_results: self._query_results = self._client._get_query_results( - self.job_id, retry) + self.job_id, retry, project=self.project) return self._query_results def done(self, retry=DEFAULT_RETRY): @@ -1874,11 +1876,25 @@ def done(self, retry=DEFAULT_RETRY): :rtype: bool :returns: True if the job is complete, False otherwise. """ + # Since the API to getQueryResults can hang up to the timeout value + # (default of 10 seconds), set the timeout parameter to ensure that + # the timeout from the futures API is respected. See: + # https://github.com/GoogleCloudPlatform/google-cloud-python/issues/4135 + timeout_ms = None + if self._done_timeout is not None: + # Subtract a buffer for context switching, network latency, etc. + timeout = self._done_timeout - _TIMEOUT_BUFFER_SECS + timeout = max(min(timeout, 10), 0) + self._done_timeout -= timeout + self._done_timeout = max(0, self._done_timeout) + timeout_ms = int(timeout * 1000) + # Do not refresh is the state is already done, as the job will not # change once complete. if self.state != _DONE_STATE: self._query_results = self._client._get_query_results( - self.job_id, retry) + self.job_id, retry, + project=self.project, timeout_ms=timeout_ms) # Only reload the job once we know the query is complete. # This will ensure that fields such as the destination table are @@ -1888,10 +1904,14 @@ def done(self, retry=DEFAULT_RETRY): return self.state == _DONE_STATE + def _blocking_poll(self, timeout=None): + self._done_timeout = timeout + super(QueryJob, self)._blocking_poll(timeout=timeout) + def result(self, timeout=None, retry=DEFAULT_RETRY): """Start the job and wait for it to complete and get the result. - :type timeout: int + :type timeout: float :param timeout: How long to wait for job to complete before raising a :class:`TimeoutError`. diff --git a/packages/google-cloud-bigquery/tests/unit/test_job.py b/packages/google-cloud-bigquery/tests/unit/test_job.py index 0ad4071f8fc6..34c5c92ef4d9 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_job.py +++ b/packages/google-cloud-bigquery/tests/unit/test_job.py @@ -2171,6 +2171,34 @@ def test_result_invokes_begins(self): self.assertEqual(query_request['method'], 'GET') self.assertEqual(reload_request['method'], 'GET') + def test_result_w_timeout(self): + begun_resource = self._makeResource() + query_resource = { + 'jobComplete': True, + 'jobReference': { + 'projectId': self.PROJECT, + 'jobId': self.JOB_ID, + }, + } + done_resource = copy.deepcopy(begun_resource) + done_resource['status'] = {'state': 'DONE'} + connection = _Connection( + begun_resource, query_resource, done_resource) + client = _make_client(project=self.PROJECT, connection=connection) + job = self._make_one(self.JOB_ID, self.QUERY, client) + + job.result(timeout=1.0) + + self.assertEqual(len(connection._requested), 3) + begin_request, query_request, reload_request = connection._requested + self.assertEqual(begin_request['method'], 'POST') + self.assertEqual(query_request['method'], 'GET') + self.assertEqual( + query_request['path'], + '/projects/{}/queries/{}'.format(self.PROJECT, self.JOB_ID)) + self.assertEqual(query_request['query_params']['timeoutMs'], 900) + self.assertEqual(reload_request['method'], 'GET') + def test_result_error(self): from google.cloud import exceptions From af870bace911e8b6a3033b7e254497821a849f30 Mon Sep 17 00:00:00 2001 From: Jon Wayne Parrott Date: Wed, 18 Oct 2017 15:36:57 -0700 Subject: [PATCH 0334/2016] Replace usage of google.api.core with google.api_core (#4221) * Remove api.core packages from google.cloud.core, make google.cloud.core depend on api_core. * s/google.api.core/google.api_core/g and nox updates * Fixing core tests, addressing review feedback * Fix bigquery --- .../google/cloud/bigquery/_helpers.py | 6 +++--- .../google/cloud/bigquery/client.py | 14 +++++++------- .../google/cloud/bigquery/dataset.py | 6 +++--- .../google/cloud/bigquery/job.py | 8 ++++---- .../google/cloud/bigquery/query.py | 8 ++++---- .../google/cloud/bigquery/table.py | 4 ++-- packages/google-cloud-bigquery/nox.py | 1 + 7 files changed, 24 insertions(+), 23 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py b/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py index ac6e9759c084..aaabcec2a408 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py @@ -818,7 +818,7 @@ def _item_to_row(iterator, resource): added to the iterator after being created, which should be done by the caller. - :type iterator: :class:`~google.api.core.page_iterator.Iterator` + :type iterator: :class:`~google.api_core.page_iterator.Iterator` :param iterator: The iterator that is currently in use. :type resource: dict @@ -834,10 +834,10 @@ def _item_to_row(iterator, resource): def _rows_page_start(iterator, page, response): """Grab total rows when :class:`~google.cloud.iterator.Page` starts. - :type iterator: :class:`~google.api.core.page_iterator.Iterator` + :type iterator: :class:`~google.api_core.page_iterator.Iterator` :param iterator: The iterator that is currently in use. - :type page: :class:`~google.cloud.iterator.Page` + :type page: :class:`~google.api_core.page_iterator.Page` :param page: The page that was just created. :type response: dict diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py index 2ed2c15e13a9..6d77649dcaa5 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py @@ -14,7 +14,7 @@ """Client for interacting with the Google BigQuery API.""" -from google.api.core import page_iterator +from google.api_core import page_iterator from google.cloud.client import ClientWithProject from google.cloud.bigquery._http import Connection from google.cloud.bigquery.dataset import Dataset @@ -97,7 +97,7 @@ def list_projects(self, max_results=None, page_token=None): not passed, the API will return the first page of projects. - :rtype: :class:`~google.api.core.page_iterator.Iterator` + :rtype: :class:`~google.api_core.page_iterator.Iterator` :returns: Iterator of :class:`~google.cloud.bigquery.client.Project` accessible to the current client. """ @@ -129,7 +129,7 @@ def list_datasets(self, include_all=False, max_results=None, not passed, the API will return the first page of datasets. - :rtype: :class:`~google.api.core.page_iterator.Iterator` + :rtype: :class:`~google.api_core.page_iterator.Iterator` :returns: Iterator of :class:`~google.cloud.bigquery.dataset.Dataset`. accessible to the current client. """ @@ -250,7 +250,7 @@ def list_jobs(self, max_results=None, page_token=None, all_users=None, * ``"pending"`` * ``"running"`` - :rtype: :class:`~google.api.core.page_iterator.Iterator` + :rtype: :class:`~google.api_core.page_iterator.Iterator` :returns: Iterable of job instances. """ extra_params = {'projection': 'full'} @@ -397,7 +397,7 @@ def run_sync_query(self, query, udf_resources=(), query_parameters=()): def _item_to_project(iterator, resource): """Convert a JSON project to the native object. - :type iterator: :class:`~google.api.core.page_iterator.Iterator` + :type iterator: :class:`~google.api_core.page_iterator.Iterator` :param iterator: The iterator that is currently in use. :type resource: dict @@ -413,7 +413,7 @@ def _item_to_project(iterator, resource): def _item_to_dataset(iterator, resource): """Convert a JSON dataset to the native object. - :type iterator: :class:`~google.api.core.page_iterator.Iterator` + :type iterator: :class:`~google.api_core.page_iterator.Iterator` :param iterator: The iterator that is currently in use. :type resource: dict @@ -428,7 +428,7 @@ def _item_to_dataset(iterator, resource): def _item_to_job(iterator, resource): """Convert a JSON job to the native object. - :type iterator: :class:`~google.api.core.page_iterator.Iterator` + :type iterator: :class:`~google.api_core.page_iterator.Iterator` :param iterator: The iterator that is currently in use. :type resource: dict diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/dataset.py b/packages/google-cloud-bigquery/google/cloud/bigquery/dataset.py index 645a68deada4..95d238e271fa 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/dataset.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/dataset.py @@ -15,7 +15,7 @@ """Define API Datasets.""" import six -from google.api.core import page_iterator +from google.api_core import page_iterator from google.cloud._helpers import _datetime_from_microseconds from google.cloud.exceptions import NotFound from google.cloud.bigquery.table import Table @@ -561,7 +561,7 @@ def list_tables(self, max_results=None, page_token=None): datasets. If not passed, the API will return the first page of datasets. - :rtype: :class:`~google.api.core.page_iterator.Iterator` + :rtype: :class:`~google.api_core.page_iterator.Iterator` :returns: Iterator of :class:`~google.cloud.bigquery.table.Table` contained within the current dataset. """ @@ -595,7 +595,7 @@ def table(self, name, schema=()): def _item_to_table(iterator, resource): """Convert a JSON table to the native object. - :type iterator: :class:`~google.api.core.page_iterator.Iterator` + :type iterator: :class:`~google.api_core.page_iterator.Iterator` :param iterator: The iterator that is currently in use. :type resource: dict diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/job.py b/packages/google-cloud-bigquery/google/cloud/bigquery/job.py index 3eada05d2ea0..2ee535853277 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/job.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/job.py @@ -19,7 +19,7 @@ import six from six.moves import http_client -import google.api.core.future.polling +import google.api_core.future.polling from google.cloud import exceptions from google.cloud.exceptions import NotFound from google.cloud._helpers import _datetime_from_microseconds @@ -140,7 +140,7 @@ class WriteDisposition(_EnumProperty): WRITE_EMPTY = 'WRITE_EMPTY' -class _AsyncJob(google.api.core.future.polling.PollingFuture): +class _AsyncJob(google.api_core.future.polling.PollingFuture): """Base class for asynchronous jobs. :type name: str @@ -496,7 +496,7 @@ def cancelled(self): This always returns False. It's not possible to check if a job was cancelled in the API. This method is here to satisfy the interface - for :class:`google.api.core.future.Future`. + for :class:`google.api_core.future.Future`. :rtype: bool :returns: False @@ -1316,7 +1316,7 @@ def result(self, timeout=None): How long to wait for job to complete before raising a :class:`TimeoutError`. - :rtype: :class:`~google.api.core.page_iterator.Iterator` + :rtype: :class:`~google.api_core.page_iterator.Iterator` :returns: Iterator of row data :class:`tuple`s. During each page, the iterator will have the ``total_rows`` attribute set, which counts diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/query.py b/packages/google-cloud-bigquery/google/cloud/bigquery/query.py index 185b68deb104..b99cb3c5630e 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/query.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/query.py @@ -16,7 +16,7 @@ import six -from google.api.core import page_iterator +from google.api_core import page_iterator from google.cloud.bigquery._helpers import _TypedProperty from google.cloud.bigquery._helpers import _rows_from_json from google.cloud.bigquery.dataset import Dataset @@ -420,7 +420,7 @@ def fetch_data(self, max_results=None, page_token=None, start_index=None, :param client: the client to use. If not passed, falls back to the ``client`` stored on the current dataset. - :rtype: :class:`~google.api.core.page_iterator.Iterator` + :rtype: :class:`~google.api_core.page_iterator.Iterator` :returns: Iterator of row data :class:`tuple`s. During each page, the iterator will have the ``total_rows`` attribute set, which counts the total number of rows **in the result @@ -467,10 +467,10 @@ def _rows_page_start_query(iterator, page, response): added to the iterator after being created, which should be done by the caller. - :type iterator: :class:`~google.api.core.page_iterator.Iterator` + :type iterator: :class:`~google.api_core.page_iterator.Iterator` :param iterator: The iterator that is currently in use. - :type page: :class:`~google.cloud.iterator.Page` + :type page: :class:`~google.api_core.page_iterator.Page` :param page: The page that was just created. :type response: dict diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py index 03e557248637..20075bc50bd5 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py @@ -23,7 +23,7 @@ from google.resumable_media.requests import MultipartUpload from google.resumable_media.requests import ResumableUpload -from google.api.core import page_iterator +from google.api_core import page_iterator from google.cloud import exceptions from google.cloud._helpers import _datetime_from_microseconds from google.cloud._helpers import _millis_from_datetime @@ -712,7 +712,7 @@ def fetch_data(self, max_results=None, page_token=None, client=None): :param client: (Optional) The client to use. If not passed, falls back to the ``client`` stored on the current dataset. - :rtype: :class:`~google.api.core.page_iterator.Iterator` + :rtype: :class:`~google.api_core.page_iterator.Iterator` :returns: Iterator of row data :class:`tuple`s. During each page, the iterator will have the ``total_rows`` attribute set, which counts the total number of rows **in the table** diff --git a/packages/google-cloud-bigquery/nox.py b/packages/google-cloud-bigquery/nox.py index 2d5772882fbe..7cce569c1a44 100644 --- a/packages/google-cloud-bigquery/nox.py +++ b/packages/google-cloud-bigquery/nox.py @@ -20,6 +20,7 @@ LOCAL_DEPS = ( + os.path.join('..', 'api_core'), os.path.join('..', 'core'), ) From b8d457fcadcb6e32ef607b16f91c51818b2a8a45 Mon Sep 17 00:00:00 2001 From: Alix Hamilton Date: Wed, 18 Oct 2017 16:09:37 -0700 Subject: [PATCH 0335/2016] BigQuery: Adds helper function for snake to camel case conversion (#4160) * adds helper function for snake to camel case conversion * adds unit test --- .../google/cloud/bigquery/_helpers.py | 6 ++++++ .../google/cloud/bigquery/client.py | 5 ++--- .../google/cloud/bigquery/table.py | 7 ++----- .../tests/unit/test__helpers.py | 14 ++++++++++++++ 4 files changed, 24 insertions(+), 8 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py b/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py index 2d763109e745..77964e233fc3 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py @@ -367,6 +367,12 @@ def _time_to_json(value): _SCALAR_VALUE_TO_JSON_PARAM['TIMESTAMP'] = _timestamp_to_json_parameter +def _snake_to_camel_case(value): + """Convert snake case string to camel case.""" + words = value.split('_') + return words[0] + ''.join(map(str.capitalize, words[1:])) + + class _ApiResourceProperty(object): """Base property implementation. diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py index 05925604664d..1d64b848e594 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py @@ -47,6 +47,7 @@ from google.cloud.bigquery._helpers import _field_to_index_mapping from google.cloud.bigquery._helpers import _SCALAR_VALUE_TO_JSON_ROW from google.cloud.bigquery._helpers import DEFAULT_RETRY +from google.cloud.bigquery._helpers import _snake_to_camel_case _DEFAULT_CHUNKSIZE = 1048576 # 1024 * 1024 B = 1 MB @@ -340,9 +341,7 @@ def update_dataset(self, dataset, fields, retry=DEFAULT_RETRY): api_field = 'access' else: attr = getattr(dataset, f) - # snake case to camel case - words = f.split('_') - api_field = words[0] + ''.join(map(str.capitalize, words[1:])) + api_field = _snake_to_camel_case(f) partial[api_field] = attr if dataset.etag is not None: headers = {'If-Match': dataset.etag} diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py index b75af6802014..2b9dea02d34e 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py @@ -22,6 +22,7 @@ from google.cloud._helpers import _datetime_from_microseconds from google.cloud._helpers import _millis_from_datetime +from google.cloud.bigquery._helpers import _snake_to_camel_case from google.cloud.bigquery.schema import SchemaField from google.cloud.bigquery.schema import _build_schema_resource from google.cloud.bigquery.schema import _parse_schema_resource @@ -705,11 +706,7 @@ def _build_resource(self, filter_fields): if f in self.custom_resource_fields: self.custom_resource_fields[f](self, resource) else: - # TODO(alixh) refactor to use in both Table and Dataset - # snake case to camel case - words = f.split('_') - api_field = words[0] + ''.join( - map(str.capitalize, words[1:])) + api_field = _snake_to_camel_case(f) resource[api_field] = getattr(self, f) return resource diff --git a/packages/google-cloud-bigquery/tests/unit/test__helpers.py b/packages/google-cloud-bigquery/tests/unit/test__helpers.py index f37d39a4f823..15a62107c645 100644 --- a/packages/google-cloud-bigquery/tests/unit/test__helpers.py +++ b/packages/google-cloud-bigquery/tests/unit/test__helpers.py @@ -749,6 +749,20 @@ def test_w_datetime(self): self.assertEqual(self._call_fut(when), '12:13:41') +class Test_snake_to_camel_case(unittest.TestCase): + + def _call_fut(self, value): + from google.cloud.bigquery._helpers import _snake_to_camel_case + + return _snake_to_camel_case(value) + + def test_w_snake_case_string(self): + self.assertEqual(self._call_fut('friendly_name'), 'friendlyName') + + def test_w_camel_case_string(self): + self.assertEqual(self._call_fut('friendlyName'), 'friendlyName') + + class Test_TypedApiResourceProperty(unittest.TestCase): @staticmethod From 780ffe4d6ecd7722bce8f13c85c7d32c9ac5bde5 Mon Sep 17 00:00:00 2001 From: Alix Hamilton Date: Fri, 20 Oct 2017 15:41:44 -0700 Subject: [PATCH 0336/2016] Renames client.load_table_from_storage() to client.load_table_from_uri() (#4235) --- .../google/cloud/bigquery/client.py | 8 ++++---- packages/google-cloud-bigquery/tests/system.py | 12 ++++++------ .../google-cloud-bigquery/tests/unit/test_client.py | 10 ++++------ 3 files changed, 14 insertions(+), 16 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py index 1d64b848e594..4070c9b01285 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py @@ -606,9 +606,9 @@ def list_jobs(self, max_results=None, page_token=None, all_users=None, max_results=max_results, extra_params=extra_params) - def load_table_from_storage(self, source_uris, destination, - job_id=None, job_id_prefix=None, - job_config=None, retry=DEFAULT_RETRY): + def load_table_from_uri(self, source_uris, destination, + job_id=None, job_id_prefix=None, + job_config=None, retry=DEFAULT_RETRY): """Starts a job for loading data into a table from CloudStorage. See @@ -654,7 +654,7 @@ def load_table_from_file(self, file_obj, destination, job_id=None, job_id_prefix=None, job_config=None): """Upload the contents of this table from a file-like object. - Like load_table_from_storage, this creates, starts and returns + Like load_table_from_uri, this creates, starts and returns a ``LoadJob``. :type file_obj: file diff --git a/packages/google-cloud-bigquery/tests/system.py b/packages/google-cloud-bigquery/tests/system.py index 767dac96a433..c98ad8f09b5f 100644 --- a/packages/google-cloud-bigquery/tests/system.py +++ b/packages/google-cloud-bigquery/tests/system.py @@ -434,7 +434,7 @@ def test_load_table_from_local_avro_file_then_dump_table(self): self.assertEqual(sorted(row_tuples, key=by_wavelength), sorted(ROWS, key=by_wavelength)) - def test_load_table_from_storage_then_dump_table(self): + def test_load_table_from_uri_then_dump_table(self): TABLE_ID = 'test_table' GS_URL = self._write_csv_to_storage( 'bq_load_test' + unique_resource_id(), 'person_ages.csv', @@ -451,7 +451,7 @@ def test_load_table_from_storage_then_dump_table(self): config.skip_leading_rows = 1 config.source_format = 'CSV' config.write_disposition = 'WRITE_EMPTY' - job = Config.CLIENT.load_table_from_storage( + job = Config.CLIENT.load_table_from_uri( GS_URL, dataset.table(TABLE_ID), job_config=config) # Allow for 90 seconds of "warm up" before rows visible. See @@ -466,7 +466,7 @@ def test_load_table_from_storage_then_dump_table(self): self.assertEqual(sorted(row_tuples, key=by_age), sorted(ROWS, key=by_age)) - def test_load_table_from_storage_w_autodetect_schema_then_get_job(self): + def test_load_table_from_uri_w_autodetect_schema_then_get_job(self): from google.cloud.bigquery import SchemaField from google.cloud.bigquery.job import LoadJob @@ -482,7 +482,7 @@ def test_load_table_from_storage_w_autodetect_schema_then_get_job(self): config = bigquery.LoadJobConfig() config.autodetect = True - job = Config.CLIENT.load_table_from_storage( + job = Config.CLIENT.load_table_from_uri( gs_url, table_ref, job_config=config, job_id=JOB_ID) # Allow for 90 seconds of "warm up" before rows visible. See @@ -563,8 +563,8 @@ def _load_table_for_extract_table( table_ref = dataset.table(table.table_id) config = bigquery.LoadJobConfig() config.autodetect = True - job = Config.CLIENT.load_table_from_storage(gs_url, table_ref, - job_config=config) + job = Config.CLIENT.load_table_from_uri(gs_url, table_ref, + job_config=config) # TODO(jba): do we need this retry now that we have job.result()? # Allow for 90 seconds of "warm up" before rows visible. See # https://cloud.google.com/bigquery/streaming-data-into-bigquery#dataavailability diff --git a/packages/google-cloud-bigquery/tests/unit/test_client.py b/packages/google-cloud-bigquery/tests/unit/test_client.py index a15f7a0cc1b1..ca02179b3b74 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_client.py +++ b/packages/google-cloud-bigquery/tests/unit/test_client.py @@ -1425,7 +1425,7 @@ def test_list_jobs_explicit_missing(self): 'allUsers': True, 'stateFilter': 'done'}) - def test_load_table_from_storage(self): + def test_load_table_from_uri(self): from google.cloud.bigquery.job import LoadJob JOB = 'job_name' @@ -1454,10 +1454,9 @@ def test_load_table_from_storage(self): conn = client._connection = _Connection(RESOURCE) destination = client.dataset(self.DS_ID).table(DESTINATION) - job = client.load_table_from_storage(SOURCE_URI, destination, - job_id=JOB) + job = client.load_table_from_uri(SOURCE_URI, destination, job_id=JOB) - # Check that load_table_from_storage actually starts the job. + # Check that load_table_from_uri actually starts the job. self.assertEqual(len(conn._requested), 1) req = conn._requested[0] self.assertEqual(req['method'], 'POST') @@ -1471,8 +1470,7 @@ def test_load_table_from_storage(self): conn = client._connection = _Connection(RESOURCE) - job = client.load_table_from_storage([SOURCE_URI], destination, - job_id=JOB) + job = client.load_table_from_uri([SOURCE_URI], destination, job_id=JOB) self.assertIsInstance(job, LoadJob) self.assertIs(job._client, client) self.assertEqual(job.job_id, JOB) From cc34fdaa5452df23276b300eb8f4671c87baaf7b Mon Sep 17 00:00:00 2001 From: Alix Hamilton Date: Mon, 23 Oct 2017 12:24:39 -0700 Subject: [PATCH 0337/2016] BigQuery: Updates snippets for BigQuery Beta 2 changes (#4237) * Updates snippets for BigQuery Beta 2 changes * fixes flake8 issues * removes module imports * fixes snippets --- packages/google-cloud-bigquery/nox.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/packages/google-cloud-bigquery/nox.py b/packages/google-cloud-bigquery/nox.py index d9f382fb2dfd..2d5772882fbe 100644 --- a/packages/google-cloud-bigquery/nox.py +++ b/packages/google-cloud-bigquery/nox.py @@ -92,8 +92,6 @@ def system_tests(session, python_version): def snippets_tests(session, python_version): """Run the system test suite.""" - session.skip('Snippets need update for BigQuery Beta 2 changes.') - # Sanity check: Only run system tests if the environment variable is set. if not os.environ.get('GOOGLE_APPLICATION_CREDENTIALS', ''): session.skip('Credentials must be set via environment variable.') From 5ff8276499e705aac755011838eb38ddfc5080d6 Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Mon, 23 Oct 2017 14:42:22 -0700 Subject: [PATCH 0338/2016] BigQuery: make docstrings use bigquery module, like the samples do. (#4236) * BigQuery: make docstrings use bigquery module, like the samples do. All the public classes we expect developers to use are included in the `google.cloud.bigquery` module, and it is this module that we use in code samples. Also, I found one error in the Bigtable docs where `Row` was not being used as a local reference and conflicted with the BigQuery Row. * Adjust heading underline. --- .../google/cloud/bigquery/__init__.py | 30 ++-- .../google/cloud/bigquery/_helpers.py | 2 +- .../google/cloud/bigquery/client.py | 117 +++++++------- .../google/cloud/bigquery/dataset.py | 14 +- .../google/cloud/bigquery/dbapi/__init__.py | 5 - .../google/cloud/bigquery/external_config.py | 12 +- .../google/cloud/bigquery/job.py | 148 +++++++++--------- .../google/cloud/bigquery/query.py | 33 ++-- .../google/cloud/bigquery/schema.py | 2 +- .../google/cloud/bigquery/table.py | 27 ++-- 10 files changed, 205 insertions(+), 185 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/__init__.py b/packages/google-cloud-bigquery/google/cloud/bigquery/__init__.py index 4c3fcd7b3be0..cda5236d3c60 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/__init__.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/__init__.py @@ -26,15 +26,18 @@ from pkg_resources import get_distribution __version__ = get_distribution('google-cloud-bigquery').version -from google.cloud.bigquery._helpers import Row from google.cloud.bigquery._helpers import DEFAULT_RETRY from google.cloud.bigquery.client import Client from google.cloud.bigquery.dataset import AccessEntry from google.cloud.bigquery.dataset import Dataset from google.cloud.bigquery.dataset import DatasetReference +from google.cloud.bigquery.job import CopyJob from google.cloud.bigquery.job import CopyJobConfig +from google.cloud.bigquery.job import ExtractJob from google.cloud.bigquery.job import ExtractJobConfig +from google.cloud.bigquery.job import QueryJob from google.cloud.bigquery.job import QueryJobConfig +from google.cloud.bigquery.job import LoadJob from google.cloud.bigquery.job import LoadJobConfig from google.cloud.bigquery.query import ArrayQueryParameter from google.cloud.bigquery.query import ScalarQueryParameter @@ -52,27 +55,34 @@ __all__ = [ '__version__', - 'AccessEntry', - 'ArrayQueryParameter', 'Client', + # Queries + 'QueryJob', + 'QueryJobConfig', + 'ArrayQueryParameter', + 'ScalarQueryParameter', + 'StructQueryParameter', + # Datasets 'Dataset', 'DatasetReference', + 'AccessEntry', + # Tables + 'Table', + 'TableReference', + 'CopyJob', 'CopyJobConfig', + 'ExtractJob', 'ExtractJobConfig', - 'QueryJobConfig', - 'Row', + 'LoadJob', 'LoadJobConfig', - 'ScalarQueryParameter', + # Shared helpers 'SchemaField', - 'StructQueryParameter', - 'Table', - 'TableReference', 'UDFResource', - 'DEFAULT_RETRY', 'ExternalConfig', 'BigtableOptions', 'BigtableColumnFamily', 'BigtableColumn', 'CSVOptions', 'GoogleSheetsOptions', + 'DEFAULT_RETRY', ] diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py b/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py index 1ba9233dab71..8e321ee59866 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py @@ -502,7 +502,7 @@ def _item_to_row(iterator, resource): :type resource: dict :param resource: An item to be converted to a row. - :rtype: :class:`Row` + :rtype: :class:`~google.cloud.bigquery.Row` :returns: The next row in the page. """ return Row(_row_tuple_from_json(resource, iterator.schema), diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py index 712b21899d49..79970049c7d2 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py @@ -181,7 +181,7 @@ def list_datasets(self, include_all=False, filter=None, max_results=None, :param retry: (Optional) How to retry the RPC. :rtype: :class:`~google.api_core.page_iterator.Iterator` - :returns: Iterator of :class:`~google.cloud.bigquery.dataset.Dataset`. + :returns: Iterator of :class:`~google.cloud.bigquery.Dataset`. accessible to the current client. """ extra_params = {} @@ -212,7 +212,7 @@ def dataset(self, dataset_id, project=None): :param project: (Optional) project ID for the dataset (defaults to the project of the client). - :rtype: :class:`google.cloud.bigquery.dataset.DatasetReference` + :rtype: :class:`google.cloud.bigquery.DatasetReference` :returns: a new ``DatasetReference`` instance """ if project is None: @@ -226,12 +226,12 @@ def create_dataset(self, dataset): See https://cloud.google.com/bigquery/docs/reference/rest/v2/tables/insert - :type dataset: :class:`~google.cloud.bigquery.dataset.Dataset` + :type dataset: :class:`~google.cloud.bigquery.Dataset` :param dataset: A ``Dataset`` populated with the desired initial state. If project is missing, it defaults to the project of the client. - :rtype: ":class:`~google.cloud.bigquery.dataset.Dataset`" + :rtype: ":class:`~google.cloud.bigquery.Dataset`" :returns: a new ``Dataset`` returned from the service. """ path = '/projects/%s/datasets' % (dataset.project,) @@ -245,10 +245,10 @@ def create_table(self, table): See https://cloud.google.com/bigquery/docs/reference/rest/v2/tables/insert - :type table: :class:`~google.cloud.bigquery.table.Table` + :type table: :class:`~google.cloud.bigquery.Table` :param table: A ``Table`` populated with the desired initial state. - :rtype: ":class:`~google.cloud.bigquery.table.Table`" + :rtype: ":class:`~google.cloud.bigquery.Table`" :returns: a new ``Table`` returned from the service. """ path = '/projects/%s/datasets/%s/tables' % ( @@ -271,13 +271,13 @@ def get_dataset(self, dataset_ref, retry=DEFAULT_RETRY): """Fetch the dataset referenced by ``dataset_ref`` :type dataset_ref: - :class:`google.cloud.bigquery.dataset.DatasetReference` + :class:`google.cloud.bigquery.DatasetReference` :param dataset_ref: the dataset to use. :type retry: :class:`google.api_core.retry.Retry` :param retry: (Optional) How to retry the RPC. - :rtype: :class:`google.cloud.bigquery.dataset.Dataset` + :rtype: :class:`google.cloud.bigquery.Dataset` :returns: a ``Dataset`` instance """ api_response = self._call_api(retry, @@ -289,13 +289,13 @@ def get_table(self, table_ref, retry=DEFAULT_RETRY): """Fetch the table referenced by ``table_ref`` :type table_ref: - :class:`google.cloud.bigquery.table.TableReference` + :class:`google.cloud.bigquery.TableReference` :param table_ref: the table to use. :type retry: :class:`google.api_core.retry.Retry` :param retry: (Optional) How to retry the RPC. - :rtype: :class:`google.cloud.bigquery.table.Table` + :rtype: :class:`google.cloud.bigquery.Table` :returns: a ``Table`` instance """ api_response = self._call_api(retry, method='GET', path=table_ref.path) @@ -315,7 +315,7 @@ def update_dataset(self, dataset, fields, retry=DEFAULT_RETRY): will only be saved if no modifications to the dataset occurred since the read. - :type dataset: :class:`google.cloud.bigquery.dataset.Dataset` + :type dataset: :class:`google.cloud.bigquery.Dataset` :param dataset: the dataset to update. :type fields: sequence of string @@ -325,7 +325,7 @@ def update_dataset(self, dataset, fields, retry=DEFAULT_RETRY): :type retry: :class:`google.api_core.retry.Retry` :param retry: (Optional) How to retry the RPC. - :rtype: :class:`google.cloud.bigquery.dataset.Dataset` + :rtype: :class:`google.cloud.bigquery.Dataset` :returns: the modified ``Dataset`` instance """ path = '/projects/%s/datasets/%s' % (dataset.project, @@ -358,13 +358,13 @@ def update_table(self, table, properties, retry=DEFAULT_RETRY): https://cloud.google.com/bigquery/docs/reference/rest/v2/tables/update :type table: - :class:`google.cloud.bigquery.table.Table` + :class:`google.cloud.bigquery.Table` :param table_ref: the table to update. :type retry: :class:`google.api_core.retry.Retry` :param retry: (Optional) How to retry the RPC. - :rtype: :class:`google.cloud.bigquery.table.Table` + :rtype: :class:`google.cloud.bigquery.Table` :returns: a ``Table`` instance """ partial = table._build_resource(properties) @@ -385,8 +385,8 @@ def list_dataset_tables(self, dataset, max_results=None, page_token=None, https://cloud.google.com/bigquery/docs/reference/rest/v2/tables/list :type dataset: One of: - :class:`~google.cloud.bigquery.dataset.Dataset` - :class:`~google.cloud.bigquery.dataset.DatasetReference` + :class:`~google.cloud.bigquery.Dataset` + :class:`~google.cloud.bigquery.DatasetReference` :param dataset: the dataset whose tables to list, or a reference to it. :type max_results: int @@ -402,7 +402,7 @@ def list_dataset_tables(self, dataset, max_results=None, page_token=None, :param retry: (Optional) How to retry the RPC. :rtype: :class:`~google.api_core.page_iterator.Iterator` - :returns: Iterator of :class:`~google.cloud.bigquery.table.Table` + :returns: Iterator of :class:`~google.cloud.bigquery.Table` contained within the current dataset. """ if not isinstance(dataset, (Dataset, DatasetReference)): @@ -426,8 +426,8 @@ def delete_dataset(self, dataset, retry=DEFAULT_RETRY): https://cloud.google.com/bigquery/docs/reference/rest/v2/datasets/delete :type dataset: One of: - :class:`~google.cloud.bigquery.dataset.Dataset` - :class:`~google.cloud.bigquery.dataset.DatasetReference` + :class:`~google.cloud.bigquery.Dataset` + :class:`~google.cloud.bigquery.DatasetReference` :type retry: :class:`google.api_core.retry.Retry` :param retry: (Optional) How to retry the RPC. @@ -445,8 +445,8 @@ def delete_table(self, table, retry=DEFAULT_RETRY): https://cloud.google.com/bigquery/docs/reference/rest/v2/tables/delete :type table: One of: - :class:`~google.cloud.bigquery.table.Table` - :class:`~google.cloud.bigquery.table.TableReference` + :class:`~google.cloud.bigquery.Table` + :class:`~google.cloud.bigquery.TableReference` :param table: the table to delete, or a reference to it. :type retry: :class:`google.api_core.retry.Retry` @@ -475,7 +475,7 @@ def _get_query_results(self, job_id, retry, project=None, timeout_ms=None): (Optional) number of milliseconds the the API call should wait for the query to complete before the request times out. - :rtype: :class:`google.cloud.bigquery.query.QueryResults` + :rtype: :class:`google.cloud.bigquery.QueryResults` :returns: a new ``QueryResults`` instance """ @@ -503,11 +503,10 @@ def job_from_resource(self, resource): :param resource: one job resource from API response :rtype: One of: - :class:`google.cloud.bigquery.job.LoadJob`, - :class:`google.cloud.bigquery.job.CopyJob`, - :class:`google.cloud.bigquery.job.ExtractJob`, - :class:`google.cloud.bigquery.job.QueryJob`, - :class:`google.cloud.bigquery.job.RunSyncQueryJob` + :class:`google.cloud.bigquery.LoadJob`, + :class:`google.cloud.bigquery.CopyJob`, + :class:`google.cloud.bigquery.ExtractJob`, + :class:`google.cloud.bigquery.QueryJob` :returns: the job instance, constructed via the resource """ config = resource['configuration'] @@ -537,7 +536,11 @@ def get_job(self, job_id, project=None, retry=DEFAULT_RETRY): :type retry: :class:`google.api_core.retry.Retry` :param retry: (Optional) How to retry the RPC. - :rtype: :class:`~google.cloud.bigquery.job._AsyncJob` + :rtype: One of: + :class:`google.cloud.bigquery.LoadJob`, + :class:`google.cloud.bigquery.CopyJob`, + :class:`google.cloud.bigquery.ExtractJob`, + :class:`google.cloud.bigquery.QueryJob` :returns: Concrete job instance, based on the resource returned by the API. """ @@ -631,13 +634,13 @@ def load_table_from_uri(self, source_uris, destination, randomly generated job ID. This parameter will be ignored if a ``job_id`` is also given. - :type job_config: :class:`google.cloud.bigquery.job.LoadJobConfig` + :type job_config: :class:`google.cloud.bigquery.LoadJobConfig` :param job_config: (Optional) Extra configuration options for the job. :type retry: :class:`google.api_core.retry.Retry` :param retry: (Optional) How to retry the RPC. - :rtype: :class:`google.cloud.bigquery.job.LoadJob` + :rtype: :class:`google.cloud.bigquery.LoadJob` :returns: a new ``LoadJob`` instance """ job_id = _make_job_id(job_id, job_id_prefix) @@ -683,10 +686,10 @@ def load_table_from_file(self, file_obj, destination, randomly generated job ID. This parameter will be ignored if a ``job_id`` is also given. - :type job_config: :class:`google.cloud.bigquery.job.LoadJobConfig` + :type job_config: :class:`google.cloud.bigquery.LoadJobConfig` :param job_config: (Optional) Extra configuration options for the job. - :rtype: :class:`~google.cloud.bigquery.jobs.LoadJob` + :rtype: :class:`~google.cloud.bigquery.LoadJob` :returns: the job instance used to load the data (e.g., for querying status). Note that the job is already started: @@ -827,9 +830,9 @@ def copy_table(self, sources, destination, job_id=None, job_id_prefix=None, https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.copy :type sources: One of: - :class:`~google.cloud.bigquery.table.TableReference` + :class:`~google.cloud.bigquery.TableReference` sequence of - :class:`~google.cloud.bigquery.table.TableReference` + :class:`~google.cloud.bigquery.TableReference` :param sources: Table or tables to be copied. @@ -844,13 +847,13 @@ def copy_table(self, sources, destination, job_id=None, job_id_prefix=None, randomly generated job ID. This parameter will be ignored if a ``job_id`` is also given. - :type job_config: :class:`google.cloud.bigquery.job.CopyJobConfig` + :type job_config: :class:`google.cloud.bigquery.CopyJobConfig` :param job_config: (Optional) Extra configuration options for the job. :type retry: :class:`google.api_core.retry.Retry` :param retry: (Optional) How to retry the RPC. - :rtype: :class:`google.cloud.bigquery.job.CopyJob` + :rtype: :class:`google.cloud.bigquery.CopyJob` :returns: a new ``CopyJob`` instance """ job_id = _make_job_id(job_id, job_id_prefix) @@ -870,7 +873,7 @@ def extract_table( See https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.extract - :type source: :class:`google.cloud.bigquery.table.TableReference` + :type source: :class:`google.cloud.bigquery.TableReference` :param source: table to be extracted. :type destination_uris: One of: @@ -891,13 +894,13 @@ def extract_table( randomly generated job ID. This parameter will be ignored if a ``job_id`` is also given. - :type job_config: :class:`google.cloud.bigquery.job.ExtractJobConfig` + :type job_config: :class:`google.cloud.bigquery.ExtractJobConfig` :param job_config: (Optional) Extra configuration options for the job. :type retry: :class:`google.api_core.retry.Retry` :param retry: (Optional) How to retry the RPC. - :rtype: :class:`google.cloud.bigquery.job.ExtractJob` + :rtype: :class:`google.cloud.bigquery.ExtractJob` :returns: a new ``ExtractJob`` instance """ job_id = _make_job_id(job_id, job_id_prefix) @@ -923,7 +926,7 @@ def query(self, query, job_config=None, job_id=None, job_id_prefix=None, SQL query to be executed. Defaults to the standard SQL dialect. Use the ``job_config`` parameter to change dialects. - :type job_config: :class:`google.cloud.bigquery.job.QueryJobConfig` + :type job_config: :class:`google.cloud.bigquery.QueryJobConfig` :param job_config: (Optional) Extra configuration options for the job. :type job_id: str @@ -937,7 +940,7 @@ def query(self, query, job_config=None, job_id=None, job_id_prefix=None, :type retry: :class:`google.api_core.retry.Retry` :param retry: (Optional) How to retry the RPC. - :rtype: :class:`google.cloud.bigquery.job.QueryJob` + :rtype: :class:`google.cloud.bigquery.QueryJob` :returns: a new ``QueryJob`` instance """ job_id = _make_job_id(job_id, job_id_prefix) @@ -952,8 +955,8 @@ def create_rows(self, table, rows, selected_fields=None, **kwargs): https://cloud.google.com/bigquery/docs/reference/rest/v2/tabledata/insertAll :type table: One of: - :class:`~google.cloud.bigquery.table.Table` - :class:`~google.cloud.bigquery.table.TableReference` + :class:`~google.cloud.bigquery.Table` + :class:`~google.cloud.bigquery.TableReference` :param table: the destination table for the row data, or a reference to it. @@ -967,14 +970,15 @@ def create_rows(self, table, rows, selected_fields=None, **kwargs): include all required fields in the schema. Keys which do not correspond to a field in the schema are ignored. - :type selected_fields: list of :class:`SchemaField` + :type selected_fields: + list of :class:`~google.cloud.bigquery.SchemaField` :param selected_fields: The fields to return. Required if ``table`` is a - :class:`~google.cloud.bigquery.table.TableReference`. + :class:`~google.cloud.bigquery.TableReference`. :type kwargs: dict :param kwargs: Keyword arguments to - `~google.cloud.bigquery.client.Client.create_rows_json` + `~google.cloud.bigquery.Client.create_rows_json` :rtype: list of mappings :returns: One mapping per row with insert errors: the "index" key @@ -1020,8 +1024,8 @@ def create_rows_json(self, table, json_rows, row_ids=None, https://cloud.google.com/bigquery/docs/reference/rest/v2/tabledata/insertAll :type table: One of: - :class:`~google.cloud.bigquery.table.Table` - :class:`~google.cloud.bigquery.table.TableReference` + :class:`~google.cloud.bigquery.Table` + :class:`~google.cloud.bigquery.TableReference` :param table: the destination table for the row data, or a reference to it. @@ -1110,7 +1114,7 @@ def query_rows(self, query, job_config=None, job_id=None, timeout=None, SQL query to be executed. Defaults to the standard SQL dialect. Use the ``job_config`` parameter to change dialects. - :type job_config: :class:`google.cloud.bigquery.job.QueryJobConfig` + :type job_config: :class:`google.cloud.bigquery.QueryJobConfig` :param job_config: (Optional) Extra configuration options for the job. :type job_id: str @@ -1152,14 +1156,15 @@ def list_rows(self, table, selected_fields=None, max_results=None, local copy of the schema is up-to-date, call ``client.get_table``. :type table: One of: - :class:`~google.cloud.bigquery.table.Table` - :class:`~google.cloud.bigquery.table.TableReference` + :class:`~google.cloud.bigquery.Table` + :class:`~google.cloud.bigquery.TableReference` :param table: the table to list, or a reference to it. - :type selected_fields: list of :class:`SchemaField` + :type selected_fields: + list of :class:`~google.cloud.bigquery.SchemaField` :param selected_fields: The fields to return. Required if ``table`` is a - :class:`~google.cloud.bigquery.table.TableReference`. + :class:`~google.cloud.bigquery.TableReference`. :type max_results: int :param max_results: maximum number of rows to return. @@ -1221,8 +1226,8 @@ def list_partitions(self, table, retry=DEFAULT_RETRY): """List the partitions in a table. :type table: One of: - :class:`~google.cloud.bigquery.table.Table` - :class:`~google.cloud.bigquery.table.TableReference` + :class:`~google.cloud.bigquery.Table` + :class:`~google.cloud.bigquery.TableReference` :param table: the table to list, or a reference to it. :type retry: :class:`google.api_core.retry.Retry` @@ -1297,7 +1302,7 @@ def _item_to_table(iterator, resource): :type resource: dict :param resource: An item to be converted to a table. - :rtype: :class:`~google.cloud.bigquery.table.Table` + :rtype: :class:`~google.cloud.bigquery.Table` :returns: The next table in the page. """ return Table.from_api_repr(resource) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/dataset.py b/packages/google-cloud-bigquery/google/cloud/bigquery/dataset.py index e464fcfb93bd..ef1b59f869c6 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/dataset.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/dataset.py @@ -146,7 +146,7 @@ def table(self, table_id): :type table_id: str :param table_id: the ID of the table. - :rtype: :class:`google.cloud.bigquery.table.TableReference` + :rtype: :class:`google.cloud.bigquery.TableReference` :returns: a TableReference for a table in this dataset. """ return TableReference(self, table_id) @@ -169,7 +169,7 @@ def _key(self): Used to compute this instance's hashcode and evaluate equality. Returns: - tuple: The contents of this :class:`DatasetReference`. + tuple: The contents of this :class:`.DatasetReference`. """ return ( self._project, @@ -197,7 +197,7 @@ class Dataset(object): See https://cloud.google.com/bigquery/docs/reference/rest/v2/datasets - :type dataset_ref: :class:`~google.cloud.bigquery.dataset.DatasetReference` + :type dataset_ref: :class:`~google.cloud.bigquery.DatasetReference` :param dataset_ref: a pointer to a dataset """ @@ -238,7 +238,7 @@ def access_entries(self): def access_entries(self, value): """Update dataset's access entries - :type value: list of :class:`AccessEntry` + :type value: list of :class:`~google.cloud.bigquery.AccessEntry` :param value: roles granted to entities for this dataset :raises: TypeError if 'value' is not a sequence, or ValueError if @@ -429,7 +429,7 @@ def from_api_repr(cls, resource): :type resource: dict :param resource: dataset resource representation returned from the API - :rtype: :class:`google.cloud.bigquery.dataset.Dataset` + :rtype: :class:`~google.cloud.bigquery.Dataset` :returns: Dataset parsed from ``resource``. """ dsr = resource.get('datasetReference') @@ -451,7 +451,7 @@ def _parse_access_entries(access): :type access: list of mappings :param access: each mapping represents a single access entry. - :rtype: list of :class:`AccessEntry` + :rtype: list of :class:`~google.cloud.bigquery.AccessEntry` :returns: a list of parsed entries. :raises: :class:`ValueError` if a entry in ``access`` has more keys than ``role`` and one additional key. @@ -530,7 +530,7 @@ def table(self, table_id): :type table_id: str :param table_id: the ID of the table. - :rtype: :class:`google.cloud.bigquery.table.TableReference` + :rtype: :class:`~google.cloud.bigquery.TableReference` :returns: a TableReference for a table in this dataset. """ return TableReference(self, table_id) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/dbapi/__init__.py b/packages/google-cloud-bigquery/google/cloud/bigquery/dbapi/__init__.py index 4786ef8ef5fa..6d6f70f471d9 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/dbapi/__init__.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/dbapi/__init__.py @@ -19,11 +19,6 @@ .. _Python Database API Specification v2.0 (DB-API): https://www.python.org/dev/peps/pep-0249/ - -.. warning:: - The ``dbapi`` module is **alpha**. The implementation is not complete. It - might be changed in backward-incompatible ways and is not subject to any SLA - or deprecation policy. """ from google.cloud.bigquery.dbapi.connection import connect diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/external_config.py b/packages/google-cloud-bigquery/google/cloud/bigquery/external_config.py index e3560224008c..a40d873eea06 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/external_config.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/external_config.py @@ -105,7 +105,7 @@ def from_api_repr(cls, resource): :param resource: A column in the same representation as is returned from the API. - :rtype: :class:`google.cloud.bigquery.external_config.BigtableColumn` + :rtype: :class:`~google.cloud.bigquery.BigtableColumn` :returns: Configuration parsed from ``resource``. """ config = cls() @@ -177,7 +177,7 @@ def from_api_repr(cls, resource): from the API. :rtype: - :class:`google.cloud.bigquery.external_config.BigtableColumnFamily` + :class:`~google.cloud.bigquery.BigtableColumnFamily` :returns: Configuration parsed from ``resource``. """ config = cls() @@ -239,7 +239,7 @@ def from_api_repr(cls, resource): A BigtableOptions in the same representation as is returned from the API. - :rtype: :class:`google.cloud.bigquery.external_config.BigtableOptions` + :rtype: :class:`~google.cloud.bigquery.BigtableOptions` :returns: Configuration parsed from ``resource``. """ config = cls() @@ -321,7 +321,7 @@ def from_api_repr(cls, resource): A CSVOptions in the same representation as is returned from the API. - :rtype: :class:`google.cloud.bigquery.external_config.CSVOptions` + :rtype: :class:`~google.cloud.bigquery.CSVOptions` :returns: Configuration parsed from ``resource``. """ slr = resource.get('skipLeadingRows') @@ -369,7 +369,7 @@ def from_api_repr(cls, resource): returned from the API. :rtype: - :class:`google.cloud.bigquery.external_config.GoogleSheetsOptions` + :class:`~google.cloud.bigquery.GoogleSheetsOptions` :returns: Configuration parsed from ``resource``. """ slr = resource.get('skipLeadingRows') @@ -476,7 +476,7 @@ def from_api_repr(cls, resource): An extract job configuration in the same representation as is returned from the API. - :rtype: :class:`google.cloud.bigquery.external_config.CSVOptions` + :rtype: :class:`~google.cloud.bigquery.CSVOptions` :returns: Configuration parsed from ``resource``. """ config = cls(resource['sourceFormat']) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/job.py b/packages/google-cloud-bigquery/google/cloud/bigquery/job.py index 5c7ffd3ecb93..de21387bcf2d 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/job.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/job.py @@ -153,7 +153,7 @@ class _AsyncJob(google.api_core.future.polling.PollingFuture): :type job_id: str :param job_id: the job's ID in the project associated with the client. - :type client: :class:`google.cloud.bigquery.client.Client` + :type client: :class:`google.cloud.bigquery.Client` :param client: A client which holds credentials and project configuration. """ def __init__(self, job_id, client): @@ -176,12 +176,12 @@ def project(self): def _require_client(self, client): """Check client or verify over-ride. - :type client: :class:`~google.cloud.bigquery.client.Client` or + :type client: :class:`~google.cloud.bigquery.Client` or ``NoneType`` :param client: the client to use. If not passed, falls back to the ``client`` stored on the current dataset. - :rtype: :class:`google.cloud.bigquery.client.Client` + :rtype: :class:`google.cloud.bigquery.Client` :returns: The client passed in or the currently bound client. """ if client is None: @@ -374,7 +374,7 @@ def begin(self, client=None, retry=DEFAULT_RETRY): See https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs/insert - :type client: :class:`~google.cloud.bigquery.client.Client` or + :type client: :class:`~google.cloud.bigquery.Client` or ``NoneType`` :param client: the client to use. If not passed, falls back to the ``client`` stored on the current dataset. @@ -403,7 +403,7 @@ def exists(self, client=None, retry=DEFAULT_RETRY): See https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs/get - :type client: :class:`~google.cloud.bigquery.client.Client` or + :type client: :class:`~google.cloud.bigquery.Client` or ``NoneType`` :param client: the client to use. If not passed, falls back to the ``client`` stored on the current dataset. @@ -431,7 +431,7 @@ def reload(self, client=None, retry=DEFAULT_RETRY): See https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs/get - :type client: :class:`~google.cloud.bigquery.client.Client` or + :type client: :class:`~google.cloud.bigquery.Client` or ``NoneType`` :param client: the client to use. If not passed, falls back to the ``client`` stored on the current dataset. @@ -450,7 +450,7 @@ def cancel(self, client=None): See https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs/cancel - :type client: :class:`~google.cloud.bigquery.client.Client` or + :type client: :class:`~google.cloud.bigquery.Client` or ``NoneType`` :param client: the client to use. If not passed, falls back to the ``client`` stored on the current dataset. @@ -663,7 +663,7 @@ def from_api_repr(cls, resource): An extract job configuration in the same representation as is returned from the API. - :rtype: :class:`google.cloud.bigquery.job.ExtractJobConfig` + :rtype: :class:`google.cloud.bigquery.ExtractJobConfig` :returns: Configuration parsed from ``resource``. """ schema = resource.pop('schema', {'fields': ()}) @@ -688,10 +688,10 @@ class LoadJob(_AsyncJob): https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.load.sourceUris for supported URI formats. Pass None for jobs that load from a file. - :type destination: :class:`google.cloud.bigquery.table.TableReference` + :type destination: :class:`google.cloud.bigquery.TableReference` :param destination: reference to table into which data is to be loaded. - :type client: :class:`google.cloud.bigquery.client.Client` + :type client: :class:`google.cloud.bigquery.Client` :param client: A client which holds credentials and project configuration for the dataset (which requires a project). """ @@ -712,98 +712,98 @@ def __init__(self, job_id, source_uris, destination, client, @property def allow_jagged_rows(self): """See - :class:`~google.cloud.bigquery.job.LoadJobConfig.allow_jagged_rows`. + :class:`~google.cloud.bigquery.LoadJobConfig.allow_jagged_rows`. """ return self._configuration.allow_jagged_rows @property def allow_quoted_newlines(self): """See - :class:`~google.cloud.bigquery.job.LoadJobConfig.allow_quoted_newlines`. + :class:`~google.cloud.bigquery.LoadJobConfig.allow_quoted_newlines`. """ return self._configuration.allow_quoted_newlines @property def autodetect(self): """See - :class:`~google.cloud.bigquery.job.LoadJobConfig.autodetect`. + :class:`~google.cloud.bigquery.LoadJobConfig.autodetect`. """ return self._configuration.autodetect @property def create_disposition(self): """See - :class:`~google.cloud.bigquery.job.LoadJobConfig.create_disposition`. + :class:`~google.cloud.bigquery.LoadJobConfig.create_disposition`. """ return self._configuration.create_disposition @property def encoding(self): """See - :class:`~google.cloud.bigquery.job.LoadJobConfig.encoding`. + :class:`~google.cloud.bigquery.LoadJobConfig.encoding`. """ return self._configuration.encoding @property def field_delimiter(self): """See - :class:`~google.cloud.bigquery.job.LoadJobConfig.field_delimiter`. + :class:`~google.cloud.bigquery.LoadJobConfig.field_delimiter`. """ return self._configuration.field_delimiter @property def ignore_unknown_values(self): """See - :class:`~google.cloud.bigquery.job.LoadJobConfig.ignore_unknown_values`. + :class:`~google.cloud.bigquery.LoadJobConfig.ignore_unknown_values`. """ return self._configuration.ignore_unknown_values @property def max_bad_records(self): """See - :class:`~google.cloud.bigquery.job.LoadJobConfig.max_bad_records`. + :class:`~google.cloud.bigquery.LoadJobConfig.max_bad_records`. """ return self._configuration.max_bad_records @property def null_marker(self): """See - :class:`~google.cloud.bigquery.job.LoadJobConfig.null_marker`. + :class:`~google.cloud.bigquery.LoadJobConfig.null_marker`. """ return self._configuration.null_marker @property def quote_character(self): """See - :class:`~google.cloud.bigquery.job.LoadJobConfig.quote_character`. + :class:`~google.cloud.bigquery.LoadJobConfig.quote_character`. """ return self._configuration.quote_character @property def skip_leading_rows(self): """See - :class:`~google.cloud.bigquery.job.LoadJobConfig.skip_leading_rows`. + :class:`~google.cloud.bigquery.LoadJobConfig.skip_leading_rows`. """ return self._configuration.skip_leading_rows @property def source_format(self): """See - :class:`~google.cloud.bigquery.job.LoadJobConfig.source_format`. + :class:`~google.cloud.bigquery.LoadJobConfig.source_format`. """ return self._configuration.source_format @property def write_disposition(self): """See - :class:`~google.cloud.bigquery.job.LoadJobConfig.write_disposition`. + :class:`~google.cloud.bigquery.LoadJobConfig.write_disposition`. """ return self._configuration.write_disposition @property def schema(self): """See - :class:`~google.cloud.bigquery.job.LoadJobConfig.schema`. + :class:`~google.cloud.bigquery.LoadJobConfig.schema`. """ return self._configuration.schema @@ -884,11 +884,11 @@ def from_api_repr(cls, resource, client): :type resource: dict :param resource: dataset job representation returned from the API - :type client: :class:`google.cloud.bigquery.client.Client` + :type client: :class:`google.cloud.bigquery.Client` :param client: Client which holds credentials and project configuration for the dataset. - :rtype: :class:`google.cloud.bigquery.job.LoadJob` + :rtype: :class:`google.cloud.bigquery.LoadJob` :returns: Job parsed from ``resource``. """ job_id, config_resource = cls._get_resource_config(resource) @@ -943,7 +943,7 @@ def from_api_repr(cls, resource): An extract job configuration in the same representation as is returned from the API. - :rtype: :class:`google.cloud.bigquery.job.ExtractJobConfig` + :rtype: :class:`google.cloud.bigquery.ExtractJobConfig` :returns: Configuration parsed from ``resource``. """ config = cls() @@ -957,17 +957,17 @@ class CopyJob(_AsyncJob): :type job_id: str :param job_id: the job's ID, within the project belonging to ``client``. - :type sources: list of :class:`google.cloud.bigquery.table.TableReference` + :type sources: list of :class:`google.cloud.bigquery.TableReference` :param sources: Table into which data is to be loaded. - :type destination: :class:`google.cloud.bigquery.table.TableReference` + :type destination: :class:`google.cloud.bigquery.TableReference` :param destination: Table into which data is to be loaded. - :type client: :class:`google.cloud.bigquery.client.Client` + :type client: :class:`google.cloud.bigquery.Client` :param client: A client which holds credentials and project configuration for the dataset (which requires a project). - :type job_config: :class:`~google.cloud.bigquery.job.CopyJobConfig` + :type job_config: :class:`~google.cloud.bigquery.CopyJobConfig` :param job_config: (Optional) Extra configuration options for the copy job. """ @@ -986,14 +986,14 @@ def __init__(self, job_id, sources, destination, client, job_config=None): @property def create_disposition(self): """See - :class:`~google.cloud.bigquery.job.CopyJobConfig.create_disposition`. + :class:`~google.cloud.bigquery.CopyJobConfig.create_disposition`. """ return self._configuration.create_disposition @property def write_disposition(self): """See - :class:`~google.cloud.bigquery.job.CopyJobConfig.write_disposition`. + :class:`~google.cloud.bigquery.CopyJobConfig.write_disposition`. """ return self._configuration.write_disposition @@ -1040,11 +1040,11 @@ def from_api_repr(cls, resource, client): :type resource: dict :param resource: dataset job representation returned from the API - :type client: :class:`google.cloud.bigquery.client.Client` + :type client: :class:`google.cloud.bigquery.Client` :param client: Client which holds credentials and project configuration for the dataset. - :rtype: :class:`google.cloud.bigquery.job.CopyJob` + :rtype: :class:`google.cloud.bigquery.CopyJob` :returns: Job parsed from ``resource``. """ job_id, config_resource = cls._get_resource_config(resource) @@ -1118,7 +1118,7 @@ def from_api_repr(cls, resource): An extract job configuration in the same representation as is returned from the API. - :rtype: :class:`google.cloud.bigquery.job.ExtractJobConfig` + :rtype: :class:`google.cloud.bigquery.ExtractJobConfig` :returns: Configuration parsed from ``resource``. """ config = cls() @@ -1132,7 +1132,7 @@ class ExtractJob(_AsyncJob): :type job_id: str :param job_id: the job's ID - :type source: :class:`google.cloud.bigquery.table.TableReference` + :type source: :class:`google.cloud.bigquery.TableReference` :param source: Table into which data is to be loaded. :type destination_uris: list of string @@ -1140,11 +1140,11 @@ class ExtractJob(_AsyncJob): URIs describing where the extracted data will be written in Cloud Storage, using the format ``gs:///``. - :type client: :class:`google.cloud.bigquery.client.Client` + :type client: :class:`google.cloud.bigquery.Client` :param client: A client which holds credentials and project configuration. - :type job_config: :class:`~google.cloud.bigquery.job.ExtractJobConfig` + :type job_config: :class:`~google.cloud.bigquery.ExtractJobConfig` :param job_config: (Optional) Extra configuration options for the extract job. """ @@ -1164,28 +1164,28 @@ def __init__( @property def compression(self): """See - :class:`~google.cloud.bigquery.job.ExtractJobConfig.compression`. + :class:`~google.cloud.bigquery.ExtractJobConfig.compression`. """ return self._configuration.compression @property def destination_format(self): """See - :class:`~google.cloud.bigquery.job.ExtractJobConfig.destination_format`. + :class:`~google.cloud.bigquery.ExtractJobConfig.destination_format`. """ return self._configuration.destination_format @property def field_delimiter(self): """See - :class:`~google.cloud.bigquery.job.ExtractJobConfig.field_delimiter`. + :class:`~google.cloud.bigquery.ExtractJobConfig.field_delimiter`. """ return self._configuration.field_delimiter @property def print_header(self): """See - :class:`~google.cloud.bigquery.job.ExtractJobConfig.print_header`. + :class:`~google.cloud.bigquery.ExtractJobConfig.print_header`. """ return self._configuration.print_header @@ -1246,11 +1246,11 @@ def from_api_repr(cls, resource, client): :type resource: dict :param resource: dataset job representation returned from the API - :type client: :class:`google.cloud.bigquery.client.Client` + :type client: :class:`google.cloud.bigquery.Client` :param client: Client which holds credentials and project configuration for the dataset. - :rtype: :class:`google.cloud.bigquery.job.ExtractJob` + :rtype: :class:`google.cloud.bigquery.ExtractJob` :returns: Job parsed from ``resource``. """ job_id, config_resource = cls._get_resource_config(resource) @@ -1351,7 +1351,7 @@ def from_api_repr(cls, resource): An extract job configuration in the same representation as is returned from the API. - :rtype: :class:`google.cloud.bigquery.job.ExtractJobConfig` + :rtype: :class:`google.cloud.bigquery.ExtractJobConfig` :returns: Configuration parsed from ``resource``. """ config = cls() @@ -1421,9 +1421,9 @@ def from_api_repr(cls, resource): 'query_parameters', _QUERY_PARAMETERS_KEY, _AbstractQueryParameter) """ A list of - :class:`google.cloud.bigquery.query.ArrayQueryParameter`, - :class:`google.cloud.bigquery.query.ScalarQueryParameter`, or - :class:`google.cloud.bigquery.query.StructQueryParameter` + :class:`google.cloud.bigquery.ArrayQueryParameter`, + :class:`google.cloud.bigquery.ScalarQueryParameter`, or + :class:`google.cloud.bigquery.StructQueryParameter` (empty by default) See: @@ -1433,7 +1433,7 @@ def from_api_repr(cls, resource): udf_resources = _ListApiResourceProperty( 'udf_resources', _UDF_RESOURCES_KEY, UDFResource) """ - A list of :class:`google.cloud.bigquery.query.UDFResource` (empty + A list of :class:`google.cloud.bigquery.UDFResource` (empty by default) See: @@ -1462,7 +1462,7 @@ def from_api_repr(cls, resource): 'table_definitions', 'tableDefinitions', dict) """ Definitions for external tables. A dictionary from table names (strings) - to :class:`google.cloud.bigquery.external_config.ExternalConfig`. + to :class:`google.cloud.bigquery.ExternalConfig`. See https://g.co/cloud/bigquery/docs/reference/rest/v2/jobs#configuration.query.tableDefinitions @@ -1495,11 +1495,11 @@ class QueryJob(_AsyncJob): :type query: str :param query: SQL query string - :type client: :class:`google.cloud.bigquery.client.Client` + :type client: :class:`google.cloud.bigquery.Client` :param client: A client which holds credentials and project configuration for the dataset (which requires a project). - :type job_config: :class:`~google.cloud.bigquery.job.QueryJobConfig` + :type job_config: :class:`~google.cloud.bigquery.QueryJobConfig` :param job_config: (Optional) Extra configuration options for the query job. """ @@ -1522,105 +1522,105 @@ def __init__(self, job_id, query, client, job_config=None): @property def allow_large_results(self): """See - :class:`~google.cloud.bigquery.job.QueryJobConfig.allow_large_results`. + :class:`~google.cloud.bigquery.QueryJobConfig.allow_large_results`. """ return self._configuration.allow_large_results @property def create_disposition(self): """See - :class:`~google.cloud.bigquery.job.QueryJobConfig.create_disposition`. + :class:`~google.cloud.bigquery.QueryJobConfig.create_disposition`. """ return self._configuration.create_disposition @property def default_dataset(self): """See - :class:`~google.cloud.bigquery.job.QueryJobConfig.default_dataset`. + :class:`~google.cloud.bigquery.QueryJobConfig.default_dataset`. """ return self._configuration.default_dataset @property def destination(self): """See - :class:`~google.cloud.bigquery.job.QueryJobConfig.destination`. + :class:`~google.cloud.bigquery.QueryJobConfig.destination`. """ return self._configuration.destination @property def dry_run(self): """See - :class:`~google.cloud.bigquery.job.QueryJobConfig.dry_run`. + :class:`~google.cloud.bigquery.QueryJobConfig.dry_run`. """ return self._configuration.dry_run @property def flatten_results(self): """See - :class:`~google.cloud.bigquery.job.QueryJobConfig.flatten_results`. + :class:`~google.cloud.bigquery.QueryJobConfig.flatten_results`. """ return self._configuration.flatten_results @property def priority(self): """See - :class:`~google.cloud.bigquery.job.QueryJobConfig.priority`. + :class:`~google.cloud.bigquery.QueryJobConfig.priority`. """ return self._configuration.priority @property def query_parameters(self): """See - :class:`~google.cloud.bigquery.job.QueryJobConfig.query_parameters`. + :class:`~google.cloud.bigquery.QueryJobConfig.query_parameters`. """ return self._configuration.query_parameters @property def udf_resources(self): """See - :class:`~google.cloud.bigquery.job.QueryJobConfig.udf_resources`. + :class:`~google.cloud.bigquery.QueryJobConfig.udf_resources`. """ return self._configuration.udf_resources @property def use_legacy_sql(self): """See - :class:`~google.cloud.bigquery.job.QueryJobConfig.use_legacy_sql`. + :class:`~google.cloud.bigquery.QueryJobConfig.use_legacy_sql`. """ return self._configuration.use_legacy_sql @property def use_query_cache(self): """See - :class:`~google.cloud.bigquery.job.QueryJobConfig.use_query_cache`. + :class:`~google.cloud.bigquery.QueryJobConfig.use_query_cache`. """ return self._configuration.use_query_cache @property def write_disposition(self): """See - :class:`~google.cloud.bigquery.job.QueryJobConfig.write_disposition`. + :class:`~google.cloud.bigquery.QueryJobConfig.write_disposition`. """ return self._configuration.write_disposition @property def maximum_billing_tier(self): """See - :class:`~google.cloud.bigquery.job.QueryJobConfig.maximum_billing_tier`. + :class:`~google.cloud.bigquery.QueryJobConfig.maximum_billing_tier`. """ return self._configuration.maximum_billing_tier @property def maximum_bytes_billed(self): """See - :class:`~google.cloud.bigquery.job.QueryJobConfig.maximum_bytes_billed`. + :class:`~google.cloud.bigquery.QueryJobConfig.maximum_bytes_billed`. """ return self._configuration.maximum_bytes_billed @property def table_definitions(self): """See - :class:`~google.cloud.bigquery.job.QueryJobConfig.table_definitions`. + :class:`~google.cloud.bigquery.QueryJobConfig.table_definitions`. """ return self._configuration.table_definitions @@ -1681,11 +1681,11 @@ def from_api_repr(cls, resource, client): :type resource: dict :param resource: dataset job representation returned from the API - :type client: :class:`google.cloud.bigquery.client.Client` + :type client: :class:`google.cloud.bigquery.Client` :param client: Client which holds credentials and project configuration for the dataset. - :rtype: :class:`google.cloud.bigquery.job.RunAsyncQueryJob` + :rtype: :class:`google.cloud.bigquery.RunAsyncQueryJob` :returns: Job parsed from ``resource``. """ job_id, config = cls._get_resource_config(resource) @@ -1833,9 +1833,9 @@ def undeclared_query_paramters(self): :rtype: list of - :class:`~google.cloud.bigquery.query.ArrayQueryParameter`, - :class:`~google.cloud.bigquery.query.ScalarQueryParameter`, or - :class:`~google.cloud.bigquery.query.StructQueryParameter` + :class:`~google.cloud.bigquery.ArrayQueryParameter`, + :class:`~google.cloud.bigquery.ScalarQueryParameter`, or + :class:`~google.cloud.bigquery.StructQueryParameter` :returns: undeclared parameters, or an empty list if the query has not yet completed. """ @@ -1862,7 +1862,7 @@ def query_results(self, retry=DEFAULT_RETRY): :type retry: :class:`google.api_core.retry.Retry` :param retry: (Optional) How to retry the RPC. - :rtype: :class:`~google.cloud.bigquery.query.QueryResults` + :rtype: :class:`~google.cloud.bigquery.QueryResults` :returns: results instance """ if not self._query_results: diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/query.py b/packages/google-cloud-bigquery/google/cloud/bigquery/query.py index 9577fa57cc5d..0b8808dd44a9 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/query.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/query.py @@ -60,7 +60,7 @@ def from_api_repr(cls, resource): :type resource: dict :param resource: JSON mapping of parameter - :rtype: :class:`ScalarQueryParameter` + :rtype: :class:`~google.cloud.bigquery.ScalarQueryParameter` """ raise NotImplementedError @@ -105,7 +105,7 @@ def positional(cls, type_, value): :class:`datetime.date`. :param value: the scalar parameter value. - :rtype: :class:`ScalarQueryParameter` + :rtype: :class:`~google.cloud.bigquery.ScalarQueryParameter` :returns: instance without name """ return cls(None, type_, value) @@ -117,7 +117,7 @@ def from_api_repr(cls, resource): :type resource: dict :param resource: JSON mapping of parameter - :rtype: :class:`ScalarQueryParameter` + :rtype: :class:`~google.cloud.bigquery.ScalarQueryParameter` :returns: instance """ name = resource.get('name') @@ -154,7 +154,8 @@ def _key(self): Used to compute this instance's hashcode and evaluate equality. Returns: - tuple: The contents of this :class:`ScalarQueryParameter`. + tuple: The contents of this + :class:`~google.cloud.bigquery.ScalarQueryParameter`. """ return ( self.name, @@ -206,7 +207,7 @@ def positional(cls, array_type, values): :type values: list of appropriate scalar type :param values: the parameter array values. - :rtype: :class:`ArrayQueryParameter` + :rtype: :class:`~google.cloud.bigquery.ArrayQueryParameter` :returns: instance without name """ return cls(None, array_type, values) @@ -249,7 +250,7 @@ def from_api_repr(cls, resource): :type resource: dict :param resource: JSON mapping of parameter - :rtype: :class:`ArrayQueryParameter` + :rtype: :class:`~google.cloud.bigquery.ArrayQueryParameter` :returns: instance """ array_type = resource['parameterType']['arrayType']['type'] @@ -293,7 +294,8 @@ def _key(self): Used to compute this instance's hashcode and evaluate equality. Returns: - tuple: The contents of this :class:`ArrayQueryParameter`. + tuple: The contents of this + :class:`~google.cloud.bigquery.ArrayQueryParameter`. """ return ( self.name, @@ -320,7 +322,10 @@ class StructQueryParameter(_AbstractQueryParameter): :param name: Parameter name, used via ``@foo`` syntax. If None, the parameter can only be addressed via position (``?``). - :type sub_params: tuple of :class:`ScalarQueryParameter` + :type sub_params: + tuple of :class:`~google.cloud.bigquery.ScalarQueryParameter`, + :class:`~google.cloud.bigquery.ArrayQueryParameter`, or + :class:`~google.cloud.bigquery.StructQueryParameter` :param sub_params: the sub-parameters for the struct """ def __init__(self, name, *sub_params): @@ -342,10 +347,13 @@ def __init__(self, name, *sub_params): def positional(cls, *sub_params): """Factory for positional parameters. - :type sub_params: tuple of :class:`ScalarQueryParameter` + :type sub_params: + tuple of :class:`~google.cloud.bigquery.ScalarQueryParameter`, + :class:`~google.cloud.bigquery.ArrayQueryParameter`, or + :class:`~google.cloud.bigquery.StructQueryParameter` :param sub_params: the sub-parameters for the struct - :rtype: :class:`StructQueryParameter` + :rtype: :class:`~google.cloud.bigquery.StructQueryParameter` :returns: instance without name """ return cls(None, *sub_params) @@ -357,7 +365,7 @@ def from_api_repr(cls, resource): :type resource: dict :param resource: JSON mapping of parameter - :rtype: :class:`StructQueryParameter` + :rtype: :class:`~google.cloud.bigquery.StructQueryParameter` :returns: instance """ name = resource.get('name') @@ -431,7 +439,8 @@ def _key(self): Used to compute this instance's hashcode and evaluate equality. Returns: - tuple: The contents of this :class:`ArrayQueryParameter`. + tuple: The contents of this + :class:`~google.cloud.biquery.ArrayQueryParameter`. """ return ( self.name, diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/schema.py b/packages/google-cloud-bigquery/google/cloud/bigquery/schema.py index 1aa95271c70d..a9dc7b2eac1f 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/schema.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/schema.py @@ -32,7 +32,7 @@ class SchemaField(object): :type description: str :param description: optional description for the field. - :type fields: tuple of :class:`SchemaField` + :type fields: tuple of :class:`~google.cloud.bigquery.SchemaField` :param fields: subfields (requires ``field_type`` of 'RECORD'). """ def __init__(self, name, field_type, mode='NULLABLE', diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py index 2b9dea02d34e..238832ea23ba 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py @@ -39,7 +39,7 @@ class TableReference(object): See https://cloud.google.com/bigquery/docs/reference/rest/v2/tables - :type dataset_ref: :class:`google.cloud.bigquery.dataset.DatasetReference` + :type dataset_ref: :class:`google.cloud.bigquery.DatasetReference` :param dataset_ref: a pointer to the dataset :type table_id: str @@ -95,7 +95,7 @@ def from_api_repr(cls, resource): :type resource: dict :param resource: table reference representation returned from the API - :rtype: :class:`google.cloud.bigquery.table.TableReference` + :rtype: :class:`google.cloud.bigquery.TableReference` :returns: Table reference parsed from ``resource``. """ from google.cloud.bigquery.dataset import DatasetReference @@ -152,10 +152,10 @@ class Table(object): See https://cloud.google.com/bigquery/docs/reference/rest/v2/tables - :type table_ref: :class:`google.cloud.bigquery.table.TableReference` + :type table_ref: :class:`google.cloud.bigquery.TableReference` :param table_ref: a pointer to a table - :type schema: list of :class:`SchemaField` + :type schema: list of :class:`~google.cloud.bigquery.SchemaField` :param schema: The table's schema """ @@ -217,7 +217,7 @@ def path(self): def schema(self): """Table's schema. - :rtype: list of :class:`SchemaField` + :rtype: list of :class:`~google.cloud.bigquery.SchemaField` :returns: fields describing the schema """ return list(self._schema) @@ -226,7 +226,7 @@ def schema(self): def schema(self, value): """Update table's schema - :type value: list of :class:`SchemaField` + :type value: list of :class:`~google.cloud.bigquery.SchemaField` :param value: fields describing the schema :raises: TypeError if 'value' is not a sequence, or ValueError if @@ -386,7 +386,7 @@ def partition_expiration(self, value): """Update the experation time in ms for a partition :type value: int - :param value: partition experiation time in ms + :param value: partition experiation time in milliseconds """ if not isinstance(value, (int, type(None))): raise ValueError( @@ -571,7 +571,7 @@ def view_use_legacy_sql(self, value): def streaming_buffer(self): """Information about a table's streaming buffer. - :rtype: :class:`StreamingBuffer` + :rtype: :class:`~google.cloud.bigquery.StreamingBuffer` :returns: Streaming buffer information, returned from get_table. """ sb = self._properties.get('streamingBuffer') @@ -584,7 +584,7 @@ def external_data_configuration(self): If not set, None is returned. - :rtype: :class:`ExternalConfig`, or ``NoneType`` + :rtype: :class:`~google.cloud.bigquery.ExternalConfig`, or ``NoneType`` :returns: The external configuration, or None (the default). """ return self._external_config @@ -593,7 +593,8 @@ def external_data_configuration(self): def external_data_configuration(self, value): """Sets the configuration for an external data source. - :type value: :class:`ExternalConfig`, or ``NoneType`` + :type value: + :class:`~google.cloud.bigquery.ExternalConfig`, or ``NoneType`` :param value: The ExternalConfig, or None to unset. """ if not (value is None or isinstance(value, ExternalConfig)): @@ -607,10 +608,10 @@ def from_api_repr(cls, resource): :type resource: dict :param resource: table resource representation returned from the API - :type dataset: :class:`google.cloud.bigquery.dataset.Dataset` + :type dataset: :class:`google.cloud.bigquery.Dataset` :param dataset: The dataset containing the table. - :rtype: :class:`google.cloud.bigquery.table.Table` + :rtype: :class:`google.cloud.bigquery.Table` :returns: Table parsed from ``resource``. """ from google.cloud.bigquery import dataset @@ -719,7 +720,7 @@ def _row_from_mapping(mapping, schema): required fields in the schema. Keys which do not correspond to a field in the schema are ignored. - :type schema: list of :class:`SchemaField` + :type schema: list of :class:`~google.cloud.bigquery.SchemaField` :param schema: The schema of the table destination for the rows :rtype: tuple From d500ba62de5ce7550c67f13fa769539c34640001 Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Tue, 24 Oct 2017 09:23:27 -0700 Subject: [PATCH 0339/2016] BigQuery: Make job.begin() method private. (#4242) --- .../google/cloud/bigquery/client.py | 11 ++++--- .../google/cloud/bigquery/job.py | 4 +-- .../tests/unit/test_job.py | 30 +++++++++---------- 3 files changed, 22 insertions(+), 23 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py index 79970049c7d2..1062dc279725 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py @@ -647,7 +647,7 @@ def load_table_from_uri(self, source_uris, destination, if isinstance(source_uris, six.string_types): source_uris = [source_uris] job = LoadJob(job_id, source_uris, destination, self, job_config) - job.begin(retry=retry) + job._begin(retry=retry) return job def load_table_from_file(self, file_obj, destination, @@ -692,8 +692,7 @@ def load_table_from_file(self, file_obj, destination, :rtype: :class:`~google.cloud.bigquery.LoadJob` :returns: the job instance used to load the data (e.g., for - querying status). Note that the job is already started: - do not call ``job.begin()``. + querying status). :raises: :class:`ValueError` if ``size`` is not passed in and can not be determined, or if the ``file_obj`` can be detected to be a file opened in text mode. @@ -862,7 +861,7 @@ def copy_table(self, sources, destination, job_id=None, job_id_prefix=None, sources = [sources] job = CopyJob(job_id, sources, destination, client=self, job_config=job_config) - job.begin(retry=retry) + job._begin(retry=retry) return job def extract_table( @@ -911,7 +910,7 @@ def extract_table( job = ExtractJob( job_id, source, destination_uris, client=self, job_config=job_config) - job.begin(retry=retry) + job._begin(retry=retry) return job def query(self, query, job_config=None, job_id=None, job_id_prefix=None, @@ -945,7 +944,7 @@ def query(self, query, job_config=None, job_id=None, job_id_prefix=None, """ job_id = _make_job_id(job_id, job_id_prefix) job = QueryJob(job_id, query, client=self, job_config=job_config) - job.begin(retry=retry) + job._begin(retry=retry) return job def create_rows(self, table, rows, selected_fields=None, **kwargs): diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/job.py b/packages/google-cloud-bigquery/google/cloud/bigquery/job.py index de21387bcf2d..696086ac1ed2 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/job.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/job.py @@ -368,7 +368,7 @@ def _get_resource_config(cls, resource): config = resource['configuration'][cls._JOB_TYPE] return job_id, config - def begin(self, client=None, retry=DEFAULT_RETRY): + def _begin(self, client=None, retry=DEFAULT_RETRY): """API call: begin the job via a POST request See @@ -521,7 +521,7 @@ def result(self, timeout=None): given timeout. """ if self.state is None: - self.begin() + self._begin() # TODO: modify PollingFuture so it can pass a retry argument to done(). return super(_AsyncJob, self).result(timeout=timeout) diff --git a/packages/google-cloud-bigquery/tests/unit/test_job.py b/packages/google-cloud-bigquery/tests/unit/test_job.py index 34c5c92ef4d9..7c89a9ba7e01 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_job.py +++ b/packages/google-cloud-bigquery/tests/unit/test_job.py @@ -553,7 +553,7 @@ def test_begin_w_already_running(self): job._properties['status'] = {'state': 'RUNNING'} with self.assertRaises(ValueError): - job.begin() + job._begin() def test_begin_w_bound_client(self): PATH = '/projects/%s/jobs' % (self.PROJECT,) @@ -568,7 +568,7 @@ def test_begin_w_bound_client(self): job = self._make_one(self.JOB_ID, [self.SOURCE1], self.TABLE_REF, client) - job.begin() + job._begin() self.assertEqual(len(conn._requested), 1) req = conn._requested[0] @@ -608,7 +608,7 @@ def test_begin_w_autodetect(self): config.autodetect = True job = self._make_one(self.JOB_ID, [self.SOURCE1], self.TABLE_REF, client, config) - job.begin() + job._begin() sent = { 'jobReference': { @@ -688,7 +688,7 @@ def test_begin_w_alternate_client(self): config.source_format = 'CSV' config.write_disposition = 'WRITE_TRUNCATE' - job.begin(client=client2) + job._begin(client=client2) self.assertEqual(len(conn1._requested), 0) self.assertEqual(len(conn2._requested), 1) @@ -1019,7 +1019,7 @@ def test_begin_w_bound_client(self): destination = self._table_ref(self.DESTINATION_TABLE) job = self._make_one(self.JOB_ID, [source], destination, client) - job.begin() + job._begin() self.assertEqual(len(conn._requested), 1) req = conn._requested[0] @@ -1077,7 +1077,7 @@ def test_begin_w_alternate_client(self): config.write_disposition = 'WRITE_TRUNCATE' job = self._make_one(self.JOB_ID, [source], destination, client1, config) - job.begin(client=client2) + job._begin(client=client2) self.assertEqual(len(conn1._requested), 0) self.assertEqual(len(conn2._requested), 1) @@ -1339,7 +1339,7 @@ def test_begin_w_bound_client(self): job = self._make_one(self.JOB_ID, source, [self.DESTINATION_URI], client) - job.begin() + job._begin() self.assertEqual(len(conn._requested), 1) req = conn._requested[0] @@ -1394,7 +1394,7 @@ def test_begin_w_alternate_client(self): job = self._make_one(self.JOB_ID, source, [self.DESTINATION_URI], client1, job_config) - job.begin(client=client2) + job._begin(client=client2) self.assertEqual(len(conn1._requested), 0) self.assertEqual(len(conn2._requested), 1) @@ -2243,7 +2243,7 @@ def test_begin_w_bound_client(self): job = self._make_one( self.JOB_ID, self.QUERY, client, job_config=config) - job.begin() + job._begin() self.assertIsNone(job.default_dataset) self.assertEqual(job.udf_resources, []) @@ -2324,7 +2324,7 @@ def test_begin_w_alternate_client(self): job = self._make_one( self.JOB_ID, self.QUERY, client1, job_config=config) - job.begin(client=client2) + job._begin(client=client2) self.assertEqual(len(conn1._requested), 0) self.assertEqual(len(conn2._requested), 1) @@ -2373,7 +2373,7 @@ def test_begin_w_udf(self): job = self._make_one( self.JOB_ID, self.QUERY, client, job_config=config) - job.begin() + job._begin() self.assertEqual(len(conn._requested), 1) req = conn._requested[0] @@ -2431,7 +2431,7 @@ def test_begin_w_named_query_parameter(self): job = self._make_one( self.JOB_ID, self.QUERY, client, job_config=jconfig) - job.begin() + job._begin() self.assertEqual(len(conn._requested), 1) req = conn._requested[0] @@ -2486,7 +2486,7 @@ def test_begin_w_positional_query_parameter(self): job = self._make_one( self.JOB_ID, self.QUERY, client, job_config=jconfig) - job.begin() + job._begin() self.assertEqual(len(conn._requested), 1) req = conn._requested[0] @@ -2572,7 +2572,7 @@ def test_begin_w_table_defs(self): job = self._make_one( self.JOB_ID, self.QUERY, client, job_config=config) - job.begin() + job._begin() self.assertEqual(len(conn._requested), 1) req = conn._requested[0] @@ -2615,7 +2615,7 @@ def test_dry_run_query(self): job = self._make_one( self.JOB_ID, self.QUERY, client, job_config=config) - job.begin() + job._begin() self.assertEqual(job.udf_resources, []) self.assertEqual(len(conn._requested), 1) req = conn._requested[0] From 62310639d824734b591d43bbb014b502e3367da9 Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Tue, 24 Oct 2017 16:49:54 -0700 Subject: [PATCH 0340/2016] BigQuery: cancel query on exception in query_rows. (#4248) * BigQuery: cancel query on exception in query_rows. This covers the primary reason one would want the job ID from an exception in query rows: to cancel the job. * Lint fixes. * Fix api_core package docstring references. --- .../google/cloud/bigquery/client.py | 83 +++++++++-- .../google/cloud/bigquery/job.py | 25 ++-- .../google-cloud-bigquery/tests/system.py | 9 +- .../tests/unit/test_client.py | 136 +++++++++++++++++- 4 files changed, 229 insertions(+), 24 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py index 1062dc279725..0a3d1f79cfbf 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py @@ -17,6 +17,7 @@ from __future__ import absolute_import import collections +import concurrent.futures import functools import os import uuid @@ -28,6 +29,8 @@ from google.resumable_media.requests import ResumableUpload from google.api_core import page_iterator +from google.api_core.exceptions import GoogleAPICallError +from google.api_core.exceptions import NotFound from google.cloud import exceptions from google.cloud.client import ClientWithProject @@ -506,7 +509,7 @@ def job_from_resource(self, resource): :class:`google.cloud.bigquery.LoadJob`, :class:`google.cloud.bigquery.CopyJob`, :class:`google.cloud.bigquery.ExtractJob`, - :class:`google.cloud.bigquery.QueryJob` + or :class:`google.cloud.bigquery.QueryJob` :returns: the job instance, constructed via the resource """ config = resource['configuration'] @@ -540,7 +543,7 @@ def get_job(self, job_id, project=None, retry=DEFAULT_RETRY): :class:`google.cloud.bigquery.LoadJob`, :class:`google.cloud.bigquery.CopyJob`, :class:`google.cloud.bigquery.ExtractJob`, - :class:`google.cloud.bigquery.QueryJob` + or :class:`google.cloud.bigquery.QueryJob` :returns: Concrete job instance, based on the resource returned by the API. """ @@ -556,6 +559,42 @@ def get_job(self, job_id, project=None, retry=DEFAULT_RETRY): return self.job_from_resource(resource) + def cancel_job(self, job_id, project=None, retry=DEFAULT_RETRY): + """Attempt to cancel a job from a job ID. + + See + https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs/cancel + + :type job_id: str + :param job_id: Name of the job. + + :type project: str + :param project: + project ID owning the job (defaults to the client's project) + + :type retry: :class:`google.api_core.retry.Retry` + :param retry: (Optional) How to retry the RPC. + + :rtype: One of: + :class:`google.cloud.bigquery.job.LoadJob`, + :class:`google.cloud.bigquery.job.CopyJob`, + :class:`google.cloud.bigquery.job.ExtractJob`, + or :class:`google.cloud.bigquery.job.QueryJob` + :returns: + Concrete job instance, based on the resource returned by the API. + """ + extra_params = {'projection': 'full'} + + if project is None: + project = self.project + + path = '/projects/{}/jobs/{}/cancel'.format(project, job_id) + + resource = self._call_api( + retry, method='POST', path=path, query_params=extra_params) + + return self.job_from_resource(resource['job']) + def list_jobs(self, max_results=None, page_token=None, all_users=None, state_filter=None, retry=DEFAULT_RETRY): """List jobs for the project associated with this client. @@ -1101,8 +1140,9 @@ def create_rows_json(self, table, json_rows, row_ids=None, return errors - def query_rows(self, query, job_config=None, job_id=None, timeout=None, - retry=DEFAULT_RETRY): + def query_rows( + self, query, job_config=None, job_id=None, job_id_prefix=None, + timeout=None, retry=DEFAULT_RETRY): """Start a query job and wait for the results. See @@ -1119,10 +1159,15 @@ def query_rows(self, query, job_config=None, job_id=None, timeout=None, :type job_id: str :param job_id: (Optional) ID to use for the query job. + :type job_id_prefix: str or ``NoneType`` + :param job_id_prefix: (Optional) the user-provided prefix for a + randomly generated job ID. This parameter will be + ignored if a ``job_id`` is also given. + :type timeout: float :param timeout: (Optional) How long (in seconds) to wait for job to complete - before raising a :class:`TimeoutError`. + before raising a :class:`concurrent.futures.TimeoutError`. :rtype: :class:`~google.api_core.page_iterator.Iterator` :returns: @@ -1132,13 +1177,29 @@ def query_rows(self, query, job_config=None, job_id=None, timeout=None, from the total number of rows in the current page: ``iterator.page.num_items``). - :raises: :class:`~google.cloud.exceptions.GoogleCloudError` if the job - failed or :class:`TimeoutError` if the job did not complete in the - given timeout. + :raises: + :class:`~google.api_core.exceptions.GoogleAPICallError` if the + job failed or :class:`concurrent.futures.TimeoutError` if the job + did not complete in the given timeout. + + When an exception happens, the query job will be cancelled on a + best-effort basis. """ - job = self.query( - query, job_config=job_config, job_id=job_id, retry=retry) - return job.result(timeout=timeout) + job_id = _make_job_id(job_id, job_id_prefix) + + try: + job = self.query( + query, job_config=job_config, job_id=job_id, retry=retry) + rows_iterator = job.result(timeout=timeout) + except (GoogleAPICallError, concurrent.futures.TimeoutError): + try: + self.cancel_job(job_id) + except NotFound: + # It's OK if couldn't cancel because job never got created. + pass + raise + + return rows_iterator def list_rows(self, table, selected_fields=None, max_results=None, page_token=None, start_index=None, retry=DEFAULT_RETRY): diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/job.py b/packages/google-cloud-bigquery/google/cloud/bigquery/job.py index 696086ac1ed2..b3c63a9c7305 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/job.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/job.py @@ -509,16 +509,18 @@ def done(self, retry=DEFAULT_RETRY): def result(self, timeout=None): """Start the job and wait for it to complete and get the result. - :type timeout: int - :param timeout: How long to wait for job to complete before raising - a :class:`TimeoutError`. + :type timeout: float + :param timeout: + How long (in seconds) to wait for job to complete before raising + a :class:`concurrent.futures.TimeoutError`. :rtype: _AsyncJob :returns: This instance. - :raises: :class:`~google.cloud.exceptions.GoogleCloudError` if the job - failed or :class:`TimeoutError` if the job did not complete in the - given timeout. + :raises: + :class:`~google.cloud.exceptions.GoogleCloudError` if the job + failed or :class:`concurrent.futures.TimeoutError` if the job did + not complete in the given timeout. """ if self.state is None: self._begin() @@ -1913,8 +1915,8 @@ def result(self, timeout=None, retry=DEFAULT_RETRY): :type timeout: float :param timeout: - How long to wait for job to complete before raising a - :class:`TimeoutError`. + How long (in seconds) to wait for job to complete before raising + a :class:`concurrent.futures.TimeoutError`. :type retry: :class:`google.api_core.retry.Retry` :param retry: (Optional) How to retry the call that retrieves rows. @@ -1927,9 +1929,10 @@ def result(self, timeout=None, retry=DEFAULT_RETRY): from the total number of rows in the current page: ``iterator.page.num_items``). - :raises: :class:`~google.cloud.exceptions.GoogleCloudError` if the job - failed or :class:`TimeoutError` if the job did not complete in the - given timeout. + :raises: + :class:`~google.cloud.exceptions.GoogleCloudError` if the job + failed or :class:`concurrent.futures.TimeoutError` if the job did + not complete in the given timeout. """ super(QueryJob, self).result(timeout=timeout) # Return an iterator instead of returning the job. diff --git a/packages/google-cloud-bigquery/tests/system.py b/packages/google-cloud-bigquery/tests/system.py index 335beda29863..40782ec1f90e 100644 --- a/packages/google-cloud-bigquery/tests/system.py +++ b/packages/google-cloud-bigquery/tests/system.py @@ -13,6 +13,7 @@ # limitations under the License. import base64 +import concurrent.futures import csv import datetime import json @@ -818,7 +819,13 @@ def test_query_rows_w_failed_query(self): with self.assertRaises(BadRequest): Config.CLIENT.query_rows('invalid syntax;') - # TODO(swast): Ensure that job ID is surfaced in the exception. + + def test_query_rows_w_timeout(self): + with self.assertRaises(concurrent.futures.TimeoutError): + Config.CLIENT.query_rows( + 'SELECT * FROM `bigquery-public-data.github_repos.commits`;', + job_id_prefix='test_query_rows_w_timeout_', + timeout=1) # 1 second is much too short for this query. def test_dbapi_w_standard_sql_types(self): examples = self._generate_standard_sql_types_examples() diff --git a/packages/google-cloud-bigquery/tests/unit/test_client.py b/packages/google-cloud-bigquery/tests/unit/test_client.py index c71847e367e0..074567fd89ad 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_client.py +++ b/packages/google-cloud-bigquery/tests/unit/test_client.py @@ -12,6 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. +import concurrent.futures import copy import email import io @@ -1214,6 +1215,63 @@ def test_get_job_hit(self): self.assertEqual(req['path'], '/projects/PROJECT/jobs/query_job') self.assertEqual(req['query_params'], {'projection': 'full'}) + def test_cancel_job_miss_w_explict_project(self): + from google.cloud.exceptions import NotFound + + OTHER_PROJECT = 'OTHER_PROJECT' + JOB_ID = 'NONESUCH' + creds = _make_credentials() + client = self._make_one(self.PROJECT, creds) + conn = client._connection = _Connection() + + with self.assertRaises(NotFound): + client.cancel_job(JOB_ID, project=OTHER_PROJECT) + + self.assertEqual(len(conn._requested), 1) + req = conn._requested[0] + self.assertEqual(req['method'], 'POST') + self.assertEqual( + req['path'], '/projects/OTHER_PROJECT/jobs/NONESUCH/cancel') + self.assertEqual(req['query_params'], {'projection': 'full'}) + + def test_cancel_job_hit(self): + from google.cloud.bigquery.job import QueryJob + + JOB_ID = 'query_job' + QUERY = 'SELECT * from test_dataset:test_table' + QUERY_JOB_RESOURCE = { + 'id': '{}:{}'.format(self.PROJECT, JOB_ID), + 'jobReference': { + 'projectId': self.PROJECT, + 'jobId': 'query_job', + }, + 'state': 'RUNNING', + 'configuration': { + 'query': { + 'query': QUERY, + } + }, + } + RESOURCE = { + 'job': QUERY_JOB_RESOURCE, + } + creds = _make_credentials() + client = self._make_one(self.PROJECT, creds) + conn = client._connection = _Connection(RESOURCE) + + job = client.cancel_job(JOB_ID) + + self.assertIsInstance(job, QueryJob) + self.assertEqual(job.job_id, JOB_ID) + self.assertEqual(job.query, QUERY) + + self.assertEqual(len(conn._requested), 1) + req = conn._requested[0] + self.assertEqual(req['method'], 'POST') + self.assertEqual( + req['path'], '/projects/PROJECT/jobs/query_job/cancel') + self.assertEqual(req['query_params'], {'projection': 'full'}) + def test_list_jobs_defaults(self): from google.cloud.bigquery.job import LoadJob from google.cloud.bigquery.job import CopyJob @@ -2583,6 +2641,82 @@ def test_query_rows_w_job_config(self): self.assertEqual(configuration['query']['useLegacySql'], True) self.assertEqual(configuration['dryRun'], True) + def test_query_rows_w_timeout_error(self): + JOB = 'job-id' + QUERY = 'SELECT COUNT(*) FROM persons' + RESOURCE = { + 'jobReference': { + 'projectId': self.PROJECT, + 'jobId': JOB, + }, + 'configuration': { + 'query': { + 'query': QUERY, + }, + }, + 'status': { + 'state': 'RUNNING', + }, + } + CANCEL_RESOURCE = {'job': RESOURCE} + creds = _make_credentials() + http = object() + client = self._make_one( + project=self.PROJECT, credentials=creds, _http=http) + conn = client._connection = _Connection(RESOURCE, CANCEL_RESOURCE) + + with mock.patch( + 'google.cloud.bigquery.job.QueryJob.result') as mock_result: + mock_result.side_effect = concurrent.futures.TimeoutError( + 'time is up') + + with self.assertRaises(concurrent.futures.TimeoutError): + client.query_rows( + QUERY, + job_id_prefix='test_query_rows_w_timeout_', + timeout=1) + + # Should attempt to create and cancel the job. + self.assertEqual(len(conn._requested), 2) + req = conn._requested[0] + self.assertEqual(req['method'], 'POST') + self.assertEqual(req['path'], '/projects/PROJECT/jobs') + cancelreq = conn._requested[1] + self.assertEqual(cancelreq['method'], 'POST') + self.assertIn( + '/projects/PROJECT/jobs/test_query_rows_w_timeout_', + cancelreq['path']) + self.assertIn('/cancel', cancelreq['path']) + + def test_query_rows_w_api_error(self): + from google.api_core.exceptions import NotFound + + QUERY = 'SELECT COUNT(*) FROM persons' + creds = _make_credentials() + http = object() + client = self._make_one( + project=self.PROJECT, credentials=creds, _http=http) + conn = client._connection = _Connection() + + # Expect a 404 error since we didn't supply a job resource. + with self.assertRaises(NotFound): + client.query_rows( + QUERY, + job_id_prefix='test_query_rows_w_error_', + timeout=1) + + # Should attempt to create and cancel the job. + self.assertEqual(len(conn._requested), 2) + req = conn._requested[0] + self.assertEqual(req['method'], 'POST') + self.assertEqual(req['path'], '/projects/PROJECT/jobs') + cancelreq = conn._requested[1] + self.assertEqual(cancelreq['method'], 'POST') + self.assertIn( + '/projects/PROJECT/jobs/test_query_rows_w_error_', + cancelreq['path']) + self.assertIn('/cancel', cancelreq['path']) + def test_list_rows(self): import datetime from google.cloud._helpers import UTC @@ -3224,7 +3358,7 @@ def __init__(self, *responses): self._requested = [] def api_request(self, **kw): - from google.cloud.exceptions import NotFound + from google.api_core.exceptions import NotFound self._requested.append(kw) if len(self._responses) == 0: From 6e4a2fbf7122f6757e11f8a00de2994f312a64a9 Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Wed, 25 Oct 2017 14:39:38 -0700 Subject: [PATCH 0341/2016] BigQuery: use submodule aliases in docstrings. (#4255) This allows Sphinx to find the source files for documenting properties/attributes. --- .../google/cloud/bigquery/__init__.py | 5 + .../google/cloud/bigquery/client.py | 127 ++++++++-------- .../google/cloud/bigquery/dataset.py | 18 +-- .../google/cloud/bigquery/external_config.py | 11 +- .../google/cloud/bigquery/job.py | 136 +++++++++--------- .../google/cloud/bigquery/query.py | 31 ++-- .../google/cloud/bigquery/schema.py | 15 +- .../google/cloud/bigquery/table.py | 14 +- 8 files changed, 190 insertions(+), 167 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/__init__.py b/packages/google-cloud-bigquery/google/cloud/bigquery/__init__.py index cda5236d3c60..0407d5984f84 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/__init__.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/__init__.py @@ -16,6 +16,11 @@ The main concepts with this API are: +- :class:`~google.cloud.bigquery.client.Client` manages connections to the + BigQuery API. Use the client methods to run jobs (such as a + :class:`~google.cloud.bigquery.job.QueryJob` via + :meth:`~google.cloud.bigquery.client.Client.query`) and manage resources. + - :class:`~google.cloud.bigquery.dataset.Dataset` represents a collection of tables. diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py index 0a3d1f79cfbf..4148519c84f5 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py @@ -184,7 +184,7 @@ def list_datasets(self, include_all=False, filter=None, max_results=None, :param retry: (Optional) How to retry the RPC. :rtype: :class:`~google.api_core.page_iterator.Iterator` - :returns: Iterator of :class:`~google.cloud.bigquery.Dataset`. + :returns: Iterator of :class:`~google.cloud.bigquery.dataset.Dataset`. accessible to the current client. """ extra_params = {} @@ -215,7 +215,7 @@ def dataset(self, dataset_id, project=None): :param project: (Optional) project ID for the dataset (defaults to the project of the client). - :rtype: :class:`google.cloud.bigquery.DatasetReference` + :rtype: :class:`google.cloud.bigquery.dataset.DatasetReference` :returns: a new ``DatasetReference`` instance """ if project is None: @@ -229,12 +229,12 @@ def create_dataset(self, dataset): See https://cloud.google.com/bigquery/docs/reference/rest/v2/tables/insert - :type dataset: :class:`~google.cloud.bigquery.Dataset` + :type dataset: :class:`~google.cloud.bigquery.dataset.Dataset` :param dataset: A ``Dataset`` populated with the desired initial state. If project is missing, it defaults to the project of the client. - :rtype: ":class:`~google.cloud.bigquery.Dataset`" + :rtype: ":class:`~google.cloud.bigquery.dataset.Dataset`" :returns: a new ``Dataset`` returned from the service. """ path = '/projects/%s/datasets' % (dataset.project,) @@ -248,10 +248,10 @@ def create_table(self, table): See https://cloud.google.com/bigquery/docs/reference/rest/v2/tables/insert - :type table: :class:`~google.cloud.bigquery.Table` + :type table: :class:`~google.cloud.bigquery.table.Table` :param table: A ``Table`` populated with the desired initial state. - :rtype: ":class:`~google.cloud.bigquery.Table`" + :rtype: ":class:`~google.cloud.bigquery.table.Table`" :returns: a new ``Table`` returned from the service. """ path = '/projects/%s/datasets/%s/tables' % ( @@ -274,13 +274,13 @@ def get_dataset(self, dataset_ref, retry=DEFAULT_RETRY): """Fetch the dataset referenced by ``dataset_ref`` :type dataset_ref: - :class:`google.cloud.bigquery.DatasetReference` + :class:`google.cloud.bigquery.dataset.DatasetReference` :param dataset_ref: the dataset to use. :type retry: :class:`google.api_core.retry.Retry` :param retry: (Optional) How to retry the RPC. - :rtype: :class:`google.cloud.bigquery.Dataset` + :rtype: :class:`google.cloud.bigquery.dataset.Dataset` :returns: a ``Dataset`` instance """ api_response = self._call_api(retry, @@ -292,13 +292,13 @@ def get_table(self, table_ref, retry=DEFAULT_RETRY): """Fetch the table referenced by ``table_ref`` :type table_ref: - :class:`google.cloud.bigquery.TableReference` + :class:`google.cloud.bigquery.table.TableReference` :param table_ref: the table to use. :type retry: :class:`google.api_core.retry.Retry` :param retry: (Optional) How to retry the RPC. - :rtype: :class:`google.cloud.bigquery.Table` + :rtype: :class:`google.cloud.bigquery.table.Table` :returns: a ``Table`` instance """ api_response = self._call_api(retry, method='GET', path=table_ref.path) @@ -318,7 +318,7 @@ def update_dataset(self, dataset, fields, retry=DEFAULT_RETRY): will only be saved if no modifications to the dataset occurred since the read. - :type dataset: :class:`google.cloud.bigquery.Dataset` + :type dataset: :class:`google.cloud.bigquery.dataset.Dataset` :param dataset: the dataset to update. :type fields: sequence of string @@ -328,7 +328,7 @@ def update_dataset(self, dataset, fields, retry=DEFAULT_RETRY): :type retry: :class:`google.api_core.retry.Retry` :param retry: (Optional) How to retry the RPC. - :rtype: :class:`google.cloud.bigquery.Dataset` + :rtype: :class:`google.cloud.bigquery.dataset.Dataset` :returns: the modified ``Dataset`` instance """ path = '/projects/%s/datasets/%s' % (dataset.project, @@ -361,13 +361,13 @@ def update_table(self, table, properties, retry=DEFAULT_RETRY): https://cloud.google.com/bigquery/docs/reference/rest/v2/tables/update :type table: - :class:`google.cloud.bigquery.Table` + :class:`google.cloud.bigquery.table.Table` :param table_ref: the table to update. :type retry: :class:`google.api_core.retry.Retry` :param retry: (Optional) How to retry the RPC. - :rtype: :class:`google.cloud.bigquery.Table` + :rtype: :class:`google.cloud.bigquery.table.Table` :returns: a ``Table`` instance """ partial = table._build_resource(properties) @@ -388,8 +388,8 @@ def list_dataset_tables(self, dataset, max_results=None, page_token=None, https://cloud.google.com/bigquery/docs/reference/rest/v2/tables/list :type dataset: One of: - :class:`~google.cloud.bigquery.Dataset` - :class:`~google.cloud.bigquery.DatasetReference` + :class:`~google.cloud.bigquery.dataset.Dataset` + :class:`~google.cloud.bigquery.dataset.DatasetReference` :param dataset: the dataset whose tables to list, or a reference to it. :type max_results: int @@ -405,7 +405,7 @@ def list_dataset_tables(self, dataset, max_results=None, page_token=None, :param retry: (Optional) How to retry the RPC. :rtype: :class:`~google.api_core.page_iterator.Iterator` - :returns: Iterator of :class:`~google.cloud.bigquery.Table` + :returns: Iterator of :class:`~google.cloud.bigquery.table.Table` contained within the current dataset. """ if not isinstance(dataset, (Dataset, DatasetReference)): @@ -429,8 +429,8 @@ def delete_dataset(self, dataset, retry=DEFAULT_RETRY): https://cloud.google.com/bigquery/docs/reference/rest/v2/datasets/delete :type dataset: One of: - :class:`~google.cloud.bigquery.Dataset` - :class:`~google.cloud.bigquery.DatasetReference` + :class:`~google.cloud.bigquery.dataset.Dataset` + :class:`~google.cloud.bigquery.dataset.DatasetReference` :type retry: :class:`google.api_core.retry.Retry` :param retry: (Optional) How to retry the RPC. @@ -448,8 +448,8 @@ def delete_table(self, table, retry=DEFAULT_RETRY): https://cloud.google.com/bigquery/docs/reference/rest/v2/tables/delete :type table: One of: - :class:`~google.cloud.bigquery.Table` - :class:`~google.cloud.bigquery.TableReference` + :class:`~google.cloud.bigquery.table.Table` + :class:`~google.cloud.bigquery.table.TableReference` :param table: the table to delete, or a reference to it. :type retry: :class:`google.api_core.retry.Retry` @@ -478,7 +478,7 @@ def _get_query_results(self, job_id, retry, project=None, timeout_ms=None): (Optional) number of milliseconds the the API call should wait for the query to complete before the request times out. - :rtype: :class:`google.cloud.bigquery.QueryResults` + :rtype: :class:`google.cloud.bigquery.query.QueryResults` :returns: a new ``QueryResults`` instance """ @@ -506,10 +506,10 @@ def job_from_resource(self, resource): :param resource: one job resource from API response :rtype: One of: - :class:`google.cloud.bigquery.LoadJob`, - :class:`google.cloud.bigquery.CopyJob`, - :class:`google.cloud.bigquery.ExtractJob`, - or :class:`google.cloud.bigquery.QueryJob` + :class:`google.cloud.bigquery.job.LoadJob`, + :class:`google.cloud.bigquery.job.CopyJob`, + :class:`google.cloud.bigquery.job.ExtractJob`, + or :class:`google.cloud.bigquery.job.QueryJob` :returns: the job instance, constructed via the resource """ config = resource['configuration'] @@ -540,10 +540,10 @@ def get_job(self, job_id, project=None, retry=DEFAULT_RETRY): :param retry: (Optional) How to retry the RPC. :rtype: One of: - :class:`google.cloud.bigquery.LoadJob`, - :class:`google.cloud.bigquery.CopyJob`, - :class:`google.cloud.bigquery.ExtractJob`, - or :class:`google.cloud.bigquery.QueryJob` + :class:`google.cloud.bigquery.job.LoadJob`, + :class:`google.cloud.bigquery.job.CopyJob`, + :class:`google.cloud.bigquery.job.ExtractJob`, + or :class:`google.cloud.bigquery.job.QueryJob` :returns: Concrete job instance, based on the resource returned by the API. """ @@ -673,14 +673,14 @@ def load_table_from_uri(self, source_uris, destination, randomly generated job ID. This parameter will be ignored if a ``job_id`` is also given. - :type job_config: :class:`google.cloud.bigquery.LoadJobConfig` + :type job_config: :class:`google.cloud.bigquery.job.LoadJobConfig` :param job_config: (Optional) Extra configuration options for the job. :type retry: :class:`google.api_core.retry.Retry` :param retry: (Optional) How to retry the RPC. - :rtype: :class:`google.cloud.bigquery.LoadJob` - :returns: a new ``LoadJob`` instance + :rtype: :class:`google.cloud.bigquery.job.LoadJob` + :returns: a new :class:`~google.cloud.bigquery.job.LoadJob` instance """ job_id = _make_job_id(job_id, job_id_prefix) if isinstance(source_uris, six.string_types): @@ -725,10 +725,10 @@ def load_table_from_file(self, file_obj, destination, randomly generated job ID. This parameter will be ignored if a ``job_id`` is also given. - :type job_config: :class:`google.cloud.bigquery.LoadJobConfig` + :type job_config: :class:`google.cloud.bigquery.job.LoadJobConfig` :param job_config: (Optional) Extra configuration options for the job. - :rtype: :class:`~google.cloud.bigquery.LoadJob` + :rtype: :class:`~google.cloud.bigquery.job.LoadJob` :returns: the job instance used to load the data (e.g., for querying status). @@ -868,9 +868,9 @@ def copy_table(self, sources, destination, job_id=None, job_id_prefix=None, https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.copy :type sources: One of: - :class:`~google.cloud.bigquery.TableReference` + :class:`~google.cloud.bigquery.table.TableReference` sequence of - :class:`~google.cloud.bigquery.TableReference` + :class:`~google.cloud.bigquery.table.TableReference` :param sources: Table or tables to be copied. @@ -885,14 +885,14 @@ def copy_table(self, sources, destination, job_id=None, job_id_prefix=None, randomly generated job ID. This parameter will be ignored if a ``job_id`` is also given. - :type job_config: :class:`google.cloud.bigquery.CopyJobConfig` + :type job_config: :class:`google.cloud.bigquery.job.CopyJobConfig` :param job_config: (Optional) Extra configuration options for the job. :type retry: :class:`google.api_core.retry.Retry` :param retry: (Optional) How to retry the RPC. - :rtype: :class:`google.cloud.bigquery.CopyJob` - :returns: a new ``CopyJob`` instance + :rtype: :class:`google.cloud.bigquery.job.copyjob` + :returns: a new :class:`google.cloud.bigquery.job.copyjob` instance """ job_id = _make_job_id(job_id, job_id_prefix) @@ -911,7 +911,7 @@ def extract_table( See https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.extract - :type source: :class:`google.cloud.bigquery.TableReference` + :type source: :class:`google.cloud.bigquery.table.TableReference` :param source: table to be extracted. :type destination_uris: One of: @@ -932,14 +932,14 @@ def extract_table( randomly generated job ID. This parameter will be ignored if a ``job_id`` is also given. - :type job_config: :class:`google.cloud.bigquery.ExtractJobConfig` + :type job_config: :class:`google.cloud.bigquery.job.ExtractJobConfig` :param job_config: (Optional) Extra configuration options for the job. :type retry: :class:`google.api_core.retry.Retry` :param retry: (Optional) How to retry the RPC. - :rtype: :class:`google.cloud.bigquery.ExtractJob` - :returns: a new ``ExtractJob`` instance + :rtype: :class:`google.cloud.bigquery.job.ExtractJob` + :returns: a new :class:`google.cloud.bigquery.job.ExtractJob` instance """ job_id = _make_job_id(job_id, job_id_prefix) @@ -964,7 +964,7 @@ def query(self, query, job_config=None, job_id=None, job_id_prefix=None, SQL query to be executed. Defaults to the standard SQL dialect. Use the ``job_config`` parameter to change dialects. - :type job_config: :class:`google.cloud.bigquery.QueryJobConfig` + :type job_config: :class:`google.cloud.bigquery.job.QueryJobConfig` :param job_config: (Optional) Extra configuration options for the job. :type job_id: str @@ -978,8 +978,8 @@ def query(self, query, job_config=None, job_id=None, job_id_prefix=None, :type retry: :class:`google.api_core.retry.Retry` :param retry: (Optional) How to retry the RPC. - :rtype: :class:`google.cloud.bigquery.QueryJob` - :returns: a new ``QueryJob`` instance + :rtype: :class:`google.cloud.bigquery.job.QueryJob` + :returns: a new :class:`google.cloud.bigquery.job.QueryJob` instance """ job_id = _make_job_id(job_id, job_id_prefix) job = QueryJob(job_id, query, client=self, job_config=job_config) @@ -993,8 +993,8 @@ def create_rows(self, table, rows, selected_fields=None, **kwargs): https://cloud.google.com/bigquery/docs/reference/rest/v2/tabledata/insertAll :type table: One of: - :class:`~google.cloud.bigquery.Table` - :class:`~google.cloud.bigquery.TableReference` + :class:`~google.cloud.bigquery.table.Table` + :class:`~google.cloud.bigquery.table.TableReference` :param table: the destination table for the row data, or a reference to it. @@ -1009,14 +1009,15 @@ def create_rows(self, table, rows, selected_fields=None, **kwargs): not correspond to a field in the schema are ignored. :type selected_fields: - list of :class:`~google.cloud.bigquery.SchemaField` + list of :class:`~google.cloud.bigquery.schema.SchemaField` :param selected_fields: The fields to return. Required if ``table`` is a - :class:`~google.cloud.bigquery.TableReference`. + :class:`~google.cloud.bigquery.table.TableReference`. :type kwargs: dict - :param kwargs: Keyword arguments to - `~google.cloud.bigquery.Client.create_rows_json` + :param kwargs: + Keyword arguments to + :meth:`~google.cloud.bigquery.client.Client.create_rows_json` :rtype: list of mappings :returns: One mapping per row with insert errors: the "index" key @@ -1062,8 +1063,8 @@ def create_rows_json(self, table, json_rows, row_ids=None, https://cloud.google.com/bigquery/docs/reference/rest/v2/tabledata/insertAll :type table: One of: - :class:`~google.cloud.bigquery.Table` - :class:`~google.cloud.bigquery.TableReference` + :class:`~google.cloud.bigquery.table.Table` + :class:`~google.cloud.bigquery.table.TableReference` :param table: the destination table for the row data, or a reference to it. @@ -1153,7 +1154,7 @@ def query_rows( SQL query to be executed. Defaults to the standard SQL dialect. Use the ``job_config`` parameter to change dialects. - :type job_config: :class:`google.cloud.bigquery.QueryJobConfig` + :type job_config: :class:`google.cloud.bigquery.job.QueryJobConfig` :param job_config: (Optional) Extra configuration options for the job. :type job_id: str @@ -1216,15 +1217,15 @@ def list_rows(self, table, selected_fields=None, max_results=None, local copy of the schema is up-to-date, call ``client.get_table``. :type table: One of: - :class:`~google.cloud.bigquery.Table` - :class:`~google.cloud.bigquery.TableReference` + :class:`~google.cloud.bigquery.table.Table` + :class:`~google.cloud.bigquery.table.TableReference` :param table: the table to list, or a reference to it. :type selected_fields: - list of :class:`~google.cloud.bigquery.SchemaField` + list of :class:`~google.cloud.bigquery.schema.SchemaField` :param selected_fields: The fields to return. Required if ``table`` is a - :class:`~google.cloud.bigquery.TableReference`. + :class:`~google.cloud.bigquery.table.TableReference`. :type max_results: int :param max_results: maximum number of rows to return. @@ -1286,8 +1287,8 @@ def list_partitions(self, table, retry=DEFAULT_RETRY): """List the partitions in a table. :type table: One of: - :class:`~google.cloud.bigquery.Table` - :class:`~google.cloud.bigquery.TableReference` + :class:`~google.cloud.bigquery.table.Table` + :class:`~google.cloud.bigquery.table.TableReference` :param table: the table to list, or a reference to it. :type retry: :class:`google.api_core.retry.Retry` @@ -1362,7 +1363,7 @@ def _item_to_table(iterator, resource): :type resource: dict :param resource: An item to be converted to a table. - :rtype: :class:`~google.cloud.bigquery.Table` + :rtype: :class:`~google.cloud.bigquery.table.Table` :returns: The next table in the page. """ return Table.from_api_repr(resource) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/dataset.py b/packages/google-cloud-bigquery/google/cloud/bigquery/dataset.py index ef1b59f869c6..c29972d2728f 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/dataset.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/dataset.py @@ -146,7 +146,7 @@ def table(self, table_id): :type table_id: str :param table_id: the ID of the table. - :rtype: :class:`google.cloud.bigquery.TableReference` + :rtype: :class:`google.cloud.bigquery.table.TableReference` :returns: a TableReference for a table in this dataset. """ return TableReference(self, table_id) @@ -197,7 +197,7 @@ class Dataset(object): See https://cloud.google.com/bigquery/docs/reference/rest/v2/datasets - :type dataset_ref: :class:`~google.cloud.bigquery.DatasetReference` + :type dataset_ref: :class:`~google.cloud.bigquery.dataset.DatasetReference` :param dataset_ref: a pointer to a dataset """ @@ -238,7 +238,8 @@ def access_entries(self): def access_entries(self, value): """Update dataset's access entries - :type value: list of :class:`~google.cloud.bigquery.AccessEntry` + :type value: + list of :class:`~google.cloud.bigquery.dataset.AccessEntry` :param value: roles granted to entities for this dataset :raises: TypeError if 'value' is not a sequence, or ValueError if @@ -401,8 +402,9 @@ def labels(self): """Labels for the dataset. This method always returns a dict. To change a dataset's labels, - modify the dict, then call ``Client.update_dataset``. To delete a - label, set its value to ``None`` before updating. + modify the dict, then call + :meth:`google.cloud.bigquery.client.Client.update_dataset`. To delete + a label, set its value to ``None`` before updating. :rtype: dict, {str -> str} :returns: A dict of the the dataset's labels. @@ -429,7 +431,7 @@ def from_api_repr(cls, resource): :type resource: dict :param resource: dataset resource representation returned from the API - :rtype: :class:`~google.cloud.bigquery.Dataset` + :rtype: :class:`~google.cloud.bigquery.dataset.Dataset` :returns: Dataset parsed from ``resource``. """ dsr = resource.get('datasetReference') @@ -451,7 +453,7 @@ def _parse_access_entries(access): :type access: list of mappings :param access: each mapping represents a single access entry. - :rtype: list of :class:`~google.cloud.bigquery.AccessEntry` + :rtype: list of :class:`~google.cloud.bigquery.dataset.AccessEntry` :returns: a list of parsed entries. :raises: :class:`ValueError` if a entry in ``access`` has more keys than ``role`` and one additional key. @@ -530,7 +532,7 @@ def table(self, table_id): :type table_id: str :param table_id: the ID of the table. - :rtype: :class:`~google.cloud.bigquery.TableReference` + :rtype: :class:`~google.cloud.bigquery.table.TableReference` :returns: a TableReference for a table in this dataset. """ return TableReference(self, table_id) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/external_config.py b/packages/google-cloud-bigquery/google/cloud/bigquery/external_config.py index a40d873eea06..47c546baed9e 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/external_config.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/external_config.py @@ -177,7 +177,7 @@ def from_api_repr(cls, resource): from the API. :rtype: - :class:`~google.cloud.bigquery.BigtableColumnFamily` + :class:`~google.cloud.bigquery.external_config.BigtableColumnFamily` :returns: Configuration parsed from ``resource``. """ config = cls() @@ -239,7 +239,8 @@ def from_api_repr(cls, resource): A BigtableOptions in the same representation as is returned from the API. - :rtype: :class:`~google.cloud.bigquery.BigtableOptions` + :rtype: + :class:`~google.cloud.bigquery.external_config.BigtableOptions` :returns: Configuration parsed from ``resource``. """ config = cls() @@ -321,7 +322,7 @@ def from_api_repr(cls, resource): A CSVOptions in the same representation as is returned from the API. - :rtype: :class:`~google.cloud.bigquery.CSVOptions` + :rtype: :class:`~google.cloud.bigquery.external_config.CSVOptions` :returns: Configuration parsed from ``resource``. """ slr = resource.get('skipLeadingRows') @@ -369,7 +370,7 @@ def from_api_repr(cls, resource): returned from the API. :rtype: - :class:`~google.cloud.bigquery.GoogleSheetsOptions` + :class:`~google.cloud.bigquery.external_config.GoogleSheetsOptions` :returns: Configuration parsed from ``resource``. """ slr = resource.get('skipLeadingRows') @@ -476,7 +477,7 @@ def from_api_repr(cls, resource): An extract job configuration in the same representation as is returned from the API. - :rtype: :class:`~google.cloud.bigquery.CSVOptions` + :rtype: :class:`~google.cloud.bigquery.external_config.CSVOptions` :returns: Configuration parsed from ``resource``. """ config = cls(resource['sourceFormat']) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/job.py b/packages/google-cloud-bigquery/google/cloud/bigquery/job.py index b3c63a9c7305..bffea1e13229 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/job.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/job.py @@ -153,7 +153,7 @@ class _AsyncJob(google.api_core.future.polling.PollingFuture): :type job_id: str :param job_id: the job's ID in the project associated with the client. - :type client: :class:`google.cloud.bigquery.Client` + :type client: :class:`google.cloud.bigquery.client.Client` :param client: A client which holds credentials and project configuration. """ def __init__(self, job_id, client): @@ -176,12 +176,12 @@ def project(self): def _require_client(self, client): """Check client or verify over-ride. - :type client: :class:`~google.cloud.bigquery.Client` or + :type client: :class:`~google.cloud.bigquery.client.Client` or ``NoneType`` :param client: the client to use. If not passed, falls back to the ``client`` stored on the current dataset. - :rtype: :class:`google.cloud.bigquery.Client` + :rtype: :class:`google.cloud.bigquery.client.Client` :returns: The client passed in or the currently bound client. """ if client is None: @@ -374,7 +374,7 @@ def _begin(self, client=None, retry=DEFAULT_RETRY): See https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs/insert - :type client: :class:`~google.cloud.bigquery.Client` or + :type client: :class:`~google.cloud.bigquery.client.Client` or ``NoneType`` :param client: the client to use. If not passed, falls back to the ``client`` stored on the current dataset. @@ -403,7 +403,7 @@ def exists(self, client=None, retry=DEFAULT_RETRY): See https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs/get - :type client: :class:`~google.cloud.bigquery.Client` or + :type client: :class:`~google.cloud.bigquery.client.Client` or ``NoneType`` :param client: the client to use. If not passed, falls back to the ``client`` stored on the current dataset. @@ -431,7 +431,7 @@ def reload(self, client=None, retry=DEFAULT_RETRY): See https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs/get - :type client: :class:`~google.cloud.bigquery.Client` or + :type client: :class:`~google.cloud.bigquery.client.Client` or ``NoneType`` :param client: the client to use. If not passed, falls back to the ``client`` stored on the current dataset. @@ -450,7 +450,7 @@ def cancel(self, client=None): See https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs/cancel - :type client: :class:`~google.cloud.bigquery.Client` or + :type client: :class:`~google.cloud.bigquery.client.Client` or ``NoneType`` :param client: the client to use. If not passed, falls back to the ``client`` stored on the current dataset. @@ -665,7 +665,7 @@ def from_api_repr(cls, resource): An extract job configuration in the same representation as is returned from the API. - :rtype: :class:`google.cloud.bigquery.ExtractJobConfig` + :rtype: :class:`google.cloud.bigquery.job.LoadJobConfig` :returns: Configuration parsed from ``resource``. """ schema = resource.pop('schema', {'fields': ()}) @@ -693,7 +693,7 @@ class LoadJob(_AsyncJob): :type destination: :class:`google.cloud.bigquery.TableReference` :param destination: reference to table into which data is to be loaded. - :type client: :class:`google.cloud.bigquery.Client` + :type client: :class:`google.cloud.bigquery.client.Client` :param client: A client which holds credentials and project configuration for the dataset (which requires a project). """ @@ -714,98 +714,98 @@ def __init__(self, job_id, source_uris, destination, client, @property def allow_jagged_rows(self): """See - :class:`~google.cloud.bigquery.LoadJobConfig.allow_jagged_rows`. + :attr:`google.cloud.bigquery.job.LoadJobConfig.allow_jagged_rows`. """ return self._configuration.allow_jagged_rows @property def allow_quoted_newlines(self): """See - :class:`~google.cloud.bigquery.LoadJobConfig.allow_quoted_newlines`. + :attr:`google.cloud.bigquery.job.LoadJobConfig.allow_quoted_newlines`. """ return self._configuration.allow_quoted_newlines @property def autodetect(self): """See - :class:`~google.cloud.bigquery.LoadJobConfig.autodetect`. + :attr:`google.cloud.bigquery.job.LoadJobConfig.autodetect`. """ return self._configuration.autodetect @property def create_disposition(self): """See - :class:`~google.cloud.bigquery.LoadJobConfig.create_disposition`. + :attr:`google.cloud.bigquery.job.LoadJobConfig.create_disposition`. """ return self._configuration.create_disposition @property def encoding(self): """See - :class:`~google.cloud.bigquery.LoadJobConfig.encoding`. + :attr:`google.cloud.bigquery.job.LoadJobConfig.encoding`. """ return self._configuration.encoding @property def field_delimiter(self): """See - :class:`~google.cloud.bigquery.LoadJobConfig.field_delimiter`. + :attr:`google.cloud.bigquery.job.LoadJobConfig.field_delimiter`. """ return self._configuration.field_delimiter @property def ignore_unknown_values(self): """See - :class:`~google.cloud.bigquery.LoadJobConfig.ignore_unknown_values`. + :attr:`google.cloud.bigquery.job.LoadJobConfig.ignore_unknown_values`. """ return self._configuration.ignore_unknown_values @property def max_bad_records(self): """See - :class:`~google.cloud.bigquery.LoadJobConfig.max_bad_records`. + :attr:`google.cloud.bigquery.job.LoadJobConfig.max_bad_records`. """ return self._configuration.max_bad_records @property def null_marker(self): """See - :class:`~google.cloud.bigquery.LoadJobConfig.null_marker`. + :attr:`google.cloud.bigquery.job.LoadJobConfig.null_marker`. """ return self._configuration.null_marker @property def quote_character(self): """See - :class:`~google.cloud.bigquery.LoadJobConfig.quote_character`. + :attr:`google.cloud.bigquery.job.LoadJobConfig.quote_character`. """ return self._configuration.quote_character @property def skip_leading_rows(self): """See - :class:`~google.cloud.bigquery.LoadJobConfig.skip_leading_rows`. + :attr:`google.cloud.bigquery.job.LoadJobConfig.skip_leading_rows`. """ return self._configuration.skip_leading_rows @property def source_format(self): """See - :class:`~google.cloud.bigquery.LoadJobConfig.source_format`. + :attr:`google.cloud.bigquery.job.LoadJobConfig.source_format`. """ return self._configuration.source_format @property def write_disposition(self): """See - :class:`~google.cloud.bigquery.LoadJobConfig.write_disposition`. + :attr:`google.cloud.bigquery.job.LoadJobConfig.write_disposition`. """ return self._configuration.write_disposition @property def schema(self): """See - :class:`~google.cloud.bigquery.LoadJobConfig.schema`. + :attr:`google.cloud.bigquery.job.LoadJobConfig.schema`. """ return self._configuration.schema @@ -886,11 +886,11 @@ def from_api_repr(cls, resource, client): :type resource: dict :param resource: dataset job representation returned from the API - :type client: :class:`google.cloud.bigquery.Client` + :type client: :class:`google.cloud.bigquery.client.Client` :param client: Client which holds credentials and project configuration for the dataset. - :rtype: :class:`google.cloud.bigquery.LoadJob` + :rtype: :class:`google.cloud.bigquery.job.LoadJob` :returns: Job parsed from ``resource``. """ job_id, config_resource = cls._get_resource_config(resource) @@ -945,7 +945,7 @@ def from_api_repr(cls, resource): An extract job configuration in the same representation as is returned from the API. - :rtype: :class:`google.cloud.bigquery.ExtractJobConfig` + :rtype: :class:`google.cloud.bigquery.job.CopyJobConfig` :returns: Configuration parsed from ``resource``. """ config = cls() @@ -965,11 +965,11 @@ class CopyJob(_AsyncJob): :type destination: :class:`google.cloud.bigquery.TableReference` :param destination: Table into which data is to be loaded. - :type client: :class:`google.cloud.bigquery.Client` + :type client: :class:`google.cloud.bigquery.client.Client` :param client: A client which holds credentials and project configuration for the dataset (which requires a project). - :type job_config: :class:`~google.cloud.bigquery.CopyJobConfig` + :type job_config: :class:`~google.cloud.bigquery.job.CopyJobConfig` :param job_config: (Optional) Extra configuration options for the copy job. """ @@ -988,14 +988,14 @@ def __init__(self, job_id, sources, destination, client, job_config=None): @property def create_disposition(self): """See - :class:`~google.cloud.bigquery.CopyJobConfig.create_disposition`. + :attr:`google.cloud.bigquery.job.CopyJobConfig.create_disposition`. """ return self._configuration.create_disposition @property def write_disposition(self): """See - :class:`~google.cloud.bigquery.CopyJobConfig.write_disposition`. + :attr:`google.cloud.bigquery.job.CopyJobConfig.write_disposition`. """ return self._configuration.write_disposition @@ -1042,11 +1042,11 @@ def from_api_repr(cls, resource, client): :type resource: dict :param resource: dataset job representation returned from the API - :type client: :class:`google.cloud.bigquery.Client` + :type client: :class:`google.cloud.bigquery.client.Client` :param client: Client which holds credentials and project configuration for the dataset. - :rtype: :class:`google.cloud.bigquery.CopyJob` + :rtype: :class:`google.cloud.bigquery.job.CopyJob` :returns: Job parsed from ``resource``. """ job_id, config_resource = cls._get_resource_config(resource) @@ -1120,7 +1120,7 @@ def from_api_repr(cls, resource): An extract job configuration in the same representation as is returned from the API. - :rtype: :class:`google.cloud.bigquery.ExtractJobConfig` + :rtype: :class:`google.cloud.bigquery.job.ExtractJobConfig` :returns: Configuration parsed from ``resource``. """ config = cls() @@ -1142,11 +1142,11 @@ class ExtractJob(_AsyncJob): URIs describing where the extracted data will be written in Cloud Storage, using the format ``gs:///``. - :type client: :class:`google.cloud.bigquery.Client` + :type client: :class:`google.cloud.bigquery.client.Client` :param client: A client which holds credentials and project configuration. - :type job_config: :class:`~google.cloud.bigquery.ExtractJobConfig` + :type job_config: :class:`~google.cloud.bigquery.job.ExtractJobConfig` :param job_config: (Optional) Extra configuration options for the extract job. """ @@ -1166,28 +1166,28 @@ def __init__( @property def compression(self): """See - :class:`~google.cloud.bigquery.ExtractJobConfig.compression`. + :attr:`google.cloud.bigquery.job.ExtractJobConfig.compression`. """ return self._configuration.compression @property def destination_format(self): """See - :class:`~google.cloud.bigquery.ExtractJobConfig.destination_format`. + :attr:`google.cloud.bigquery.job.ExtractJobConfig.destination_format`. """ return self._configuration.destination_format @property def field_delimiter(self): """See - :class:`~google.cloud.bigquery.ExtractJobConfig.field_delimiter`. + :attr:`google.cloud.bigquery.job.ExtractJobConfig.field_delimiter`. """ return self._configuration.field_delimiter @property def print_header(self): """See - :class:`~google.cloud.bigquery.ExtractJobConfig.print_header`. + :attr:`google.cloud.bigquery.job.ExtractJobConfig.print_header`. """ return self._configuration.print_header @@ -1248,11 +1248,11 @@ def from_api_repr(cls, resource, client): :type resource: dict :param resource: dataset job representation returned from the API - :type client: :class:`google.cloud.bigquery.Client` + :type client: :class:`google.cloud.bigquery.client.Client` :param client: Client which holds credentials and project configuration for the dataset. - :rtype: :class:`google.cloud.bigquery.ExtractJob` + :rtype: :class:`google.cloud.bigquery.job.ExtractJob` :returns: Job parsed from ``resource``. """ job_id, config_resource = cls._get_resource_config(resource) @@ -1353,7 +1353,7 @@ def from_api_repr(cls, resource): An extract job configuration in the same representation as is returned from the API. - :rtype: :class:`google.cloud.bigquery.ExtractJobConfig` + :rtype: :class:`google.cloud.bigquery.job.QueryJobConfig` :returns: Configuration parsed from ``resource``. """ config = cls() @@ -1369,7 +1369,9 @@ def from_api_repr(cls, resource): allow_large_results = _TypedApiResourceProperty( 'allow_large_results', 'allowLargeResults', bool) - """See + """bool: Allow large query results tables (legacy SQL, only) + + See https://g.co/cloud/bigquery/docs/reference/rest/v2/jobs#configuration.query.allowLargeResults """ @@ -1387,12 +1389,18 @@ def from_api_repr(cls, resource): destination = _TypedApiResourceProperty( 'destination', 'destinationTable', TableReference) - """See + """ + google.cloud.bigquery.table.TableReference: table where results are written + + See https://g.co/cloud/bigquery/docs/reference/rest/v2/jobs#configuration.query.destinationTable """ dry_run = _TypedApiResourceProperty('dry_run', 'dryRun', bool) - """See + """ + bool: ``True`` if this query should be a dry run to estimate costs. + + See https://g.co/cloud/bigquery/docs/reference/v2/jobs#configuration.dryRun """ @@ -1497,11 +1505,11 @@ class QueryJob(_AsyncJob): :type query: str :param query: SQL query string - :type client: :class:`google.cloud.bigquery.Client` + :type client: :class:`google.cloud.bigquery.client.Client` :param client: A client which holds credentials and project configuration for the dataset (which requires a project). - :type job_config: :class:`~google.cloud.bigquery.QueryJobConfig` + :type job_config: :class:`~google.cloud.bigquery.job.QueryJobConfig` :param job_config: (Optional) Extra configuration options for the query job. """ @@ -1524,105 +1532,105 @@ def __init__(self, job_id, query, client, job_config=None): @property def allow_large_results(self): """See - :class:`~google.cloud.bigquery.QueryJobConfig.allow_large_results`. + :attr:`google.cloud.bigquery.job.QueryJobConfig.allow_large_results`. """ return self._configuration.allow_large_results @property def create_disposition(self): """See - :class:`~google.cloud.bigquery.QueryJobConfig.create_disposition`. + :attr:`google.cloud.bigquery.job.QueryJobConfig.create_disposition`. """ return self._configuration.create_disposition @property def default_dataset(self): """See - :class:`~google.cloud.bigquery.QueryJobConfig.default_dataset`. + :attr:`google.cloud.bigquery.job.QueryJobConfig.default_dataset`. """ return self._configuration.default_dataset @property def destination(self): """See - :class:`~google.cloud.bigquery.QueryJobConfig.destination`. + :attr:`google.cloud.bigquery.job.QueryJobConfig.destination`. """ return self._configuration.destination @property def dry_run(self): """See - :class:`~google.cloud.bigquery.QueryJobConfig.dry_run`. + :attr:`google.cloud.bigquery.job.QueryJobConfig.dry_run`. """ return self._configuration.dry_run @property def flatten_results(self): """See - :class:`~google.cloud.bigquery.QueryJobConfig.flatten_results`. + :attr:`google.cloud.bigquery.job.QueryJobConfig.flatten_results`. """ return self._configuration.flatten_results @property def priority(self): """See - :class:`~google.cloud.bigquery.QueryJobConfig.priority`. + :attr:`google.cloud.bigquery.job.QueryJobConfig.priority`. """ return self._configuration.priority @property def query_parameters(self): """See - :class:`~google.cloud.bigquery.QueryJobConfig.query_parameters`. + :attr:`google.cloud.bigquery.job.QueryJobConfig.query_parameters`. """ return self._configuration.query_parameters @property def udf_resources(self): """See - :class:`~google.cloud.bigquery.QueryJobConfig.udf_resources`. + :attr:`google.cloud.bigquery.job.QueryJobConfig.udf_resources`. """ return self._configuration.udf_resources @property def use_legacy_sql(self): """See - :class:`~google.cloud.bigquery.QueryJobConfig.use_legacy_sql`. + :attr:`google.cloud.bigquery.job.QueryJobConfig.use_legacy_sql`. """ return self._configuration.use_legacy_sql @property def use_query_cache(self): """See - :class:`~google.cloud.bigquery.QueryJobConfig.use_query_cache`. + :attr:`google.cloud.bigquery.job.QueryJobConfig.use_query_cache`. """ return self._configuration.use_query_cache @property def write_disposition(self): """See - :class:`~google.cloud.bigquery.QueryJobConfig.write_disposition`. + :attr:`google.cloud.bigquery.job.QueryJobConfig.write_disposition`. """ return self._configuration.write_disposition @property def maximum_billing_tier(self): """See - :class:`~google.cloud.bigquery.QueryJobConfig.maximum_billing_tier`. + :attr:`google.cloud.bigquery.job.QueryJobConfig.maximum_billing_tier`. """ return self._configuration.maximum_billing_tier @property def maximum_bytes_billed(self): """See - :class:`~google.cloud.bigquery.QueryJobConfig.maximum_bytes_billed`. + :attr:`google.cloud.bigquery.job.QueryJobConfig.maximum_bytes_billed`. """ return self._configuration.maximum_bytes_billed @property def table_definitions(self): """See - :class:`~google.cloud.bigquery.QueryJobConfig.table_definitions`. + :attr:`google.cloud.bigquery.job.QueryJobConfig.table_definitions`. """ return self._configuration.table_definitions @@ -1683,11 +1691,11 @@ def from_api_repr(cls, resource, client): :type resource: dict :param resource: dataset job representation returned from the API - :type client: :class:`google.cloud.bigquery.Client` + :type client: :class:`google.cloud.bigquery.client.Client` :param client: Client which holds credentials and project configuration for the dataset. - :rtype: :class:`google.cloud.bigquery.RunAsyncQueryJob` + :rtype: :class:`google.cloud.bigquery.job.QueryJob` :returns: Job parsed from ``resource``. """ job_id, config = cls._get_resource_config(resource) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/query.py b/packages/google-cloud-bigquery/google/cloud/bigquery/query.py index 0b8808dd44a9..6405605582dc 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/query.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/query.py @@ -60,7 +60,7 @@ def from_api_repr(cls, resource): :type resource: dict :param resource: JSON mapping of parameter - :rtype: :class:`~google.cloud.bigquery.ScalarQueryParameter` + :rtype: :class:`~google.cloud.bigquery.query.ScalarQueryParameter` """ raise NotImplementedError @@ -105,7 +105,7 @@ def positional(cls, type_, value): :class:`datetime.date`. :param value: the scalar parameter value. - :rtype: :class:`~google.cloud.bigquery.ScalarQueryParameter` + :rtype: :class:`~google.cloud.bigquery.query.ScalarQueryParameter` :returns: instance without name """ return cls(None, type_, value) @@ -117,7 +117,7 @@ def from_api_repr(cls, resource): :type resource: dict :param resource: JSON mapping of parameter - :rtype: :class:`~google.cloud.bigquery.ScalarQueryParameter` + :rtype: :class:`~google.cloud.bigquery.query.ScalarQueryParameter` :returns: instance """ name = resource.get('name') @@ -155,7 +155,7 @@ def _key(self): Returns: tuple: The contents of this - :class:`~google.cloud.bigquery.ScalarQueryParameter`. + :class:`~google.cloud.bigquery.query.ScalarQueryParameter`. """ return ( self.name, @@ -207,7 +207,7 @@ def positional(cls, array_type, values): :type values: list of appropriate scalar type :param values: the parameter array values. - :rtype: :class:`~google.cloud.bigquery.ArrayQueryParameter` + :rtype: :class:`~google.cloud.bigquery.query.ArrayQueryParameter` :returns: instance without name """ return cls(None, array_type, values) @@ -250,7 +250,7 @@ def from_api_repr(cls, resource): :type resource: dict :param resource: JSON mapping of parameter - :rtype: :class:`~google.cloud.bigquery.ArrayQueryParameter` + :rtype: :class:`~google.cloud.bigquery.query.ArrayQueryParameter` :returns: instance """ array_type = resource['parameterType']['arrayType']['type'] @@ -295,7 +295,7 @@ def _key(self): Returns: tuple: The contents of this - :class:`~google.cloud.bigquery.ArrayQueryParameter`. + :class:`~google.cloud.bigquery.query.ArrayQueryParameter`. """ return ( self.name, @@ -323,9 +323,9 @@ class StructQueryParameter(_AbstractQueryParameter): parameter can only be addressed via position (``?``). :type sub_params: - tuple of :class:`~google.cloud.bigquery.ScalarQueryParameter`, - :class:`~google.cloud.bigquery.ArrayQueryParameter`, or - :class:`~google.cloud.bigquery.StructQueryParameter` + tuple of :class:`~google.cloud.bigquery.query.ScalarQueryParameter`, + :class:`~google.cloud.bigquery.query.ArrayQueryParameter`, or + :class:`~google.cloud.bigquery.query.StructQueryParameter` :param sub_params: the sub-parameters for the struct """ def __init__(self, name, *sub_params): @@ -348,12 +348,13 @@ def positional(cls, *sub_params): """Factory for positional parameters. :type sub_params: - tuple of :class:`~google.cloud.bigquery.ScalarQueryParameter`, - :class:`~google.cloud.bigquery.ArrayQueryParameter`, or - :class:`~google.cloud.bigquery.StructQueryParameter` + tuple of + :class:`~google.cloud.bigquery.query.ScalarQueryParameter`, + :class:`~google.cloud.bigquery.query.ArrayQueryParameter`, or + :class:`~google.cloud.bigquery.query.StructQueryParameter` :param sub_params: the sub-parameters for the struct - :rtype: :class:`~google.cloud.bigquery.StructQueryParameter` + :rtype: :class:`~google.cloud.bigquery.query.StructQueryParameter` :returns: instance without name """ return cls(None, *sub_params) @@ -365,7 +366,7 @@ def from_api_repr(cls, resource): :type resource: dict :param resource: JSON mapping of parameter - :rtype: :class:`~google.cloud.bigquery.StructQueryParameter` + :rtype: :class:`~google.cloud.bigquery.query.StructQueryParameter` :returns: instance """ name = resource.get('name') diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/schema.py b/packages/google-cloud-bigquery/google/cloud/bigquery/schema.py index a9dc7b2eac1f..cb0ce2a96b5a 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/schema.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/schema.py @@ -32,7 +32,7 @@ class SchemaField(object): :type description: str :param description: optional description for the field. - :type fields: tuple of :class:`~google.cloud.bigquery.SchemaField` + :type fields: tuple of :class:`~google.cloud.bigquery.schema.SchemaField` :param fields: subfields (requires ``field_type`` of 'RECORD'). """ def __init__(self, name, field_type, mode='NULLABLE', @@ -53,7 +53,8 @@ def from_api_repr(cls, api_repr): :meth:`to_api_repr`. Returns: - SchemaField: The ``SchemaField`` object. + google.cloud.biquery.schema.SchemaField: + The ``SchemaField`` object. """ return cls( field_type=api_repr['type'].upper(), @@ -131,7 +132,8 @@ def _key(self): Used to compute this instance's hashcode and evaluate equality. Returns: - tuple: The contents of this :class:`SchemaField`. + tuple: The contents of this + :class:`~google.cloud.bigquery.schema.SchemaField`. """ return ( self._name, @@ -162,7 +164,9 @@ def _parse_schema_resource(info): :type info: mapping :param info: should contain a "fields" key to be parsed - :rtype: list of :class:`SchemaField`, or ``NoneType`` + :rtype: + list of :class:`google.cloud.bigquery.schema.SchemaField`, or + ``NoneType`` :returns: a list of parsed fields, or ``None`` if no "fields" key is present in ``info``. """ @@ -184,7 +188,8 @@ def _parse_schema_resource(info): def _build_schema_resource(fields): """Generate a resource fragment for a schema. - :type fields: sequence of :class:`SchemaField` + :type fields: + sequence of :class:`~google.cloud.bigquery.schema.SchemaField` :param fields: schema to be dumped :rtype: mapping diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py index 238832ea23ba..bc5c3044b55a 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py @@ -95,7 +95,7 @@ def from_api_repr(cls, resource): :type resource: dict :param resource: table reference representation returned from the API - :rtype: :class:`google.cloud.bigquery.TableReference` + :rtype: :class:`google.cloud.bigquery.table.TableReference` :returns: Table reference parsed from ``resource``. """ from google.cloud.bigquery.dataset import DatasetReference @@ -152,10 +152,10 @@ class Table(object): See https://cloud.google.com/bigquery/docs/reference/rest/v2/tables - :type table_ref: :class:`google.cloud.bigquery.TableReference` + :type table_ref: :class:`google.cloud.bigquery.table.TableReference` :param table_ref: a pointer to a table - :type schema: list of :class:`~google.cloud.bigquery.SchemaField` + :type schema: list of :class:`~google.cloud.bigquery.schema.SchemaField` :param schema: The table's schema """ @@ -217,7 +217,7 @@ def path(self): def schema(self): """Table's schema. - :rtype: list of :class:`~google.cloud.bigquery.SchemaField` + :rtype: list of :class:`~google.cloud.bigquery.schema.SchemaField` :returns: fields describing the schema """ return list(self._schema) @@ -226,7 +226,7 @@ def schema(self): def schema(self, value): """Update table's schema - :type value: list of :class:`~google.cloud.bigquery.SchemaField` + :type value: list of :class:`~google.cloud.bigquery.schema.SchemaField` :param value: fields describing the schema :raises: TypeError if 'value' is not a sequence, or ValueError if @@ -611,7 +611,7 @@ def from_api_repr(cls, resource): :type dataset: :class:`google.cloud.bigquery.Dataset` :param dataset: The dataset containing the table. - :rtype: :class:`google.cloud.bigquery.Table` + :rtype: :class:`google.cloud.bigquery.table.Table` :returns: Table parsed from ``resource``. """ from google.cloud.bigquery import dataset @@ -720,7 +720,7 @@ def _row_from_mapping(mapping, schema): required fields in the schema. Keys which do not correspond to a field in the schema are ignored. - :type schema: list of :class:`~google.cloud.bigquery.SchemaField` + :type schema: list of :class:`~google.cloud.bigquery.schema.SchemaField` :param schema: The schema of the table destination for the rows :rtype: tuple From 4cdaa9572c535b3710e9e0b6b829edd0c998c294 Mon Sep 17 00:00:00 2001 From: Michael Darakananda Date: Sat, 28 Oct 2017 03:14:59 +1100 Subject: [PATCH 0342/2016] bigquery: add simple benchmark (#4273) --- .../google-cloud-bigquery/benchmark/README.md | 8 +++++ .../benchmark/benchmark.py | 32 +++++++++++++++++++ .../benchmark/queries.json | 10 ++++++ 3 files changed, 50 insertions(+) create mode 100644 packages/google-cloud-bigquery/benchmark/README.md create mode 100644 packages/google-cloud-bigquery/benchmark/benchmark.py create mode 100644 packages/google-cloud-bigquery/benchmark/queries.json diff --git a/packages/google-cloud-bigquery/benchmark/README.md b/packages/google-cloud-bigquery/benchmark/README.md new file mode 100644 index 000000000000..435926acb045 --- /dev/null +++ b/packages/google-cloud-bigquery/benchmark/README.md @@ -0,0 +1,8 @@ +# BigQuery Benchmark +This directory contains benchmarks for BigQuery client. + +## Usage +`python benchmark.py queries.json` + +BigQuery service caches requests so the benchmark should be run +at least twice, disregarding the first result. diff --git a/packages/google-cloud-bigquery/benchmark/benchmark.py b/packages/google-cloud-bigquery/benchmark/benchmark.py new file mode 100644 index 000000000000..0281edbd1b6b --- /dev/null +++ b/packages/google-cloud-bigquery/benchmark/benchmark.py @@ -0,0 +1,32 @@ +from google.cloud import bigquery +from datetime import datetime +import json +import sys + +if len(sys.argv) < 2: + raise Exception('need query file, usage: python {0} '.format(sys.argv[0])) + +with open(sys.argv[1], 'r') as f: + queries = json.loads(f.read()) + +client = bigquery.Client() + +for query in queries: + start_time = datetime.now() + job = client.query(query) + rows = job.result() + + num_rows = 0 + num_cols = None + first_byte_time = None + + for row in rows: + if num_rows == 0: + num_cols = len(row) + first_byte_time = datetime.now() - start_time + elif num_cols != len(row): + raise Exception('found {0} columsn, expected {1}'.format(len(row), num_cols)) + num_rows += 1 + total_time = datetime.now() - start_time + print "query {0}: {1} rows, {2} cols, first byte {3} sec, total {4} sec"\ + .format(query, num_rows, num_cols, first_byte_time.total_seconds(), total_time.total_seconds()) diff --git a/packages/google-cloud-bigquery/benchmark/queries.json b/packages/google-cloud-bigquery/benchmark/queries.json new file mode 100644 index 000000000000..13fed38b52b3 --- /dev/null +++ b/packages/google-cloud-bigquery/benchmark/queries.json @@ -0,0 +1,10 @@ +[ + "SELECT * FROM `nyc-tlc.yellow.trips` LIMIT 10000", + "SELECT * FROM `nyc-tlc.yellow.trips` LIMIT 100000", + "SELECT * FROM `nyc-tlc.yellow.trips` LIMIT 1000000", + "SELECT title FROM `bigquery-public-data.samples.wikipedia` ORDER BY title LIMIT 1000", + "SELECT title, id, timestamp, contributor_ip FROM `bigquery-public-data.samples.wikipedia` WHERE title like 'Blo%' ORDER BY id", + "SELECT * FROM `bigquery-public-data.baseball.games_post_wide` ORDER BY gameId", + "SELECT * FROM `bigquery-public-data.samples.github_nested` WHERE repository.has_downloads ORDER BY repository.created_at LIMIT 10000", + "SELECT repo_name, path FROM `bigquery-public-data.github_repos.files` WHERE path LIKE '%.java' ORDER BY id LIMIT 1000000" +] From c60bc4337d355d994b49ea3e69739adb197b612e Mon Sep 17 00:00:00 2001 From: Danny Hermes Date: Mon, 30 Oct 2017 14:41:42 -0700 Subject: [PATCH 0343/2016] Cutting version 0.28.0 of `google-cloud-core`. (#4280) Also - updating all dependencies of `grpcio` to `>= 1.7.0`. This was due to an issue [1] with `1.6.0`. - updating the version of `google-api-core` (also to be released, This is required since the bounds on `grpcio` of `google-cloud-core==0.28.0` and `google-api-core==0.1.0` are mutually exclusive.) - Updating `google-api-core` CHANGELOG for release. - Updating packages to depend on `google-cloud-core>=0.28.0`. - Installing `nox -s lint` deps locally for vision. [1]: https://github.com/grpc/grpc/issues/12455 --- packages/google-cloud-bigquery/setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/setup.py b/packages/google-cloud-bigquery/setup.py index ea25589bdc8b..53d79dd87bb1 100644 --- a/packages/google-cloud-bigquery/setup.py +++ b/packages/google-cloud-bigquery/setup.py @@ -51,7 +51,7 @@ REQUIREMENTS = [ - 'google-cloud-core >= 0.27.0, < 0.28dev', + 'google-cloud-core >= 0.28.0, < 0.29dev', 'google-auth >= 1.0.0', 'google-resumable-media >= 0.2.1', 'requests >= 2.18.0', From ffeca4b6778982572074db3107b91e8091f6464e Mon Sep 17 00:00:00 2001 From: Michael Darakananda Date: Tue, 31 Oct 2017 11:04:09 +1100 Subject: [PATCH 0344/2016] add license header to benchmark.py (#4284) --- .../google-cloud-bigquery/benchmark/benchmark.py | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/packages/google-cloud-bigquery/benchmark/benchmark.py b/packages/google-cloud-bigquery/benchmark/benchmark.py index 0281edbd1b6b..1bad6933156e 100644 --- a/packages/google-cloud-bigquery/benchmark/benchmark.py +++ b/packages/google-cloud-bigquery/benchmark/benchmark.py @@ -1,3 +1,17 @@ +# Copyright 2017 Google Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + from google.cloud import bigquery from datetime import datetime import json From 1b78d6a0234a7836b48f127d9a0d2c76c94741e0 Mon Sep 17 00:00:00 2001 From: Luke Sneeringer Date: Tue, 31 Oct 2017 08:57:09 -0700 Subject: [PATCH 0345/2016] Switch copyright holder to "Google LLC" (#4287) --- packages/google-cloud-bigquery/benchmark/benchmark.py | 2 +- packages/google-cloud-bigquery/google/__init__.py | 2 +- packages/google-cloud-bigquery/google/cloud/__init__.py | 2 +- .../google-cloud-bigquery/google/cloud/bigquery/__init__.py | 2 +- .../google-cloud-bigquery/google/cloud/bigquery/_helpers.py | 2 +- packages/google-cloud-bigquery/google/cloud/bigquery/_http.py | 2 +- packages/google-cloud-bigquery/google/cloud/bigquery/client.py | 2 +- packages/google-cloud-bigquery/google/cloud/bigquery/dataset.py | 2 +- .../google/cloud/bigquery/dbapi/__init__.py | 2 +- .../google/cloud/bigquery/dbapi/_helpers.py | 2 +- .../google/cloud/bigquery/dbapi/connection.py | 2 +- .../google-cloud-bigquery/google/cloud/bigquery/dbapi/cursor.py | 2 +- .../google/cloud/bigquery/dbapi/exceptions.py | 2 +- .../google-cloud-bigquery/google/cloud/bigquery/dbapi/types.py | 2 +- .../google/cloud/bigquery/external_config.py | 2 +- packages/google-cloud-bigquery/google/cloud/bigquery/job.py | 2 +- packages/google-cloud-bigquery/google/cloud/bigquery/query.py | 2 +- packages/google-cloud-bigquery/google/cloud/bigquery/schema.py | 2 +- packages/google-cloud-bigquery/google/cloud/bigquery/table.py | 2 +- packages/google-cloud-bigquery/nox.py | 2 +- packages/google-cloud-bigquery/pylint.config.py | 2 +- packages/google-cloud-bigquery/setup.py | 2 +- packages/google-cloud-bigquery/tests/system.py | 2 +- packages/google-cloud-bigquery/tests/unit/__init__.py | 2 +- packages/google-cloud-bigquery/tests/unit/test__helpers.py | 2 +- packages/google-cloud-bigquery/tests/unit/test__http.py | 2 +- packages/google-cloud-bigquery/tests/unit/test_client.py | 2 +- packages/google-cloud-bigquery/tests/unit/test_dataset.py | 2 +- .../google-cloud-bigquery/tests/unit/test_dbapi__helpers.py | 2 +- .../google-cloud-bigquery/tests/unit/test_dbapi_connection.py | 2 +- packages/google-cloud-bigquery/tests/unit/test_dbapi_cursor.py | 2 +- packages/google-cloud-bigquery/tests/unit/test_dbapi_types.py | 2 +- .../google-cloud-bigquery/tests/unit/test_external_config.py | 2 +- packages/google-cloud-bigquery/tests/unit/test_job.py | 2 +- packages/google-cloud-bigquery/tests/unit/test_query.py | 2 +- packages/google-cloud-bigquery/tests/unit/test_schema.py | 2 +- packages/google-cloud-bigquery/tests/unit/test_table.py | 2 +- 37 files changed, 37 insertions(+), 37 deletions(-) diff --git a/packages/google-cloud-bigquery/benchmark/benchmark.py b/packages/google-cloud-bigquery/benchmark/benchmark.py index 1bad6933156e..9c71cb943271 100644 --- a/packages/google-cloud-bigquery/benchmark/benchmark.py +++ b/packages/google-cloud-bigquery/benchmark/benchmark.py @@ -1,4 +1,4 @@ -# Copyright 2017 Google Inc. +# Copyright 2017 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/packages/google-cloud-bigquery/google/__init__.py b/packages/google-cloud-bigquery/google/__init__.py index b2b833373882..9ee9bf4342ab 100644 --- a/packages/google-cloud-bigquery/google/__init__.py +++ b/packages/google-cloud-bigquery/google/__init__.py @@ -1,4 +1,4 @@ -# Copyright 2016 Google Inc. +# Copyright 2016 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/packages/google-cloud-bigquery/google/cloud/__init__.py b/packages/google-cloud-bigquery/google/cloud/__init__.py index b2b833373882..9ee9bf4342ab 100644 --- a/packages/google-cloud-bigquery/google/cloud/__init__.py +++ b/packages/google-cloud-bigquery/google/cloud/__init__.py @@ -1,4 +1,4 @@ -# Copyright 2016 Google Inc. +# Copyright 2016 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/__init__.py b/packages/google-cloud-bigquery/google/cloud/bigquery/__init__.py index 0407d5984f84..506038003cb7 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/__init__.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/__init__.py @@ -1,4 +1,4 @@ -# Copyright 2015 Google Inc. +# Copyright 2015 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py b/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py index 8e321ee59866..c0af22c5f578 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py @@ -1,4 +1,4 @@ -# Copyright 2015 Google Inc. +# Copyright 2015 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/_http.py b/packages/google-cloud-bigquery/google/cloud/bigquery/_http.py index 75fac77e5eae..c2698cd80bc2 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/_http.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/_http.py @@ -1,4 +1,4 @@ -# Copyright 2015 Google Inc. +# Copyright 2015 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py index 4148519c84f5..07fd6045629a 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py @@ -1,4 +1,4 @@ -# Copyright 2015 Google Inc. +# Copyright 2015 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/dataset.py b/packages/google-cloud-bigquery/google/cloud/bigquery/dataset.py index c29972d2728f..eb7372876c1f 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/dataset.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/dataset.py @@ -1,4 +1,4 @@ -# Copyright 2015 Google Inc. +# Copyright 2015 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/dbapi/__init__.py b/packages/google-cloud-bigquery/google/cloud/bigquery/dbapi/__init__.py index 6d6f70f471d9..de34d5553315 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/dbapi/__init__.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/dbapi/__init__.py @@ -1,4 +1,4 @@ -# Copyright 2017 Google Inc. +# Copyright 2017 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/dbapi/_helpers.py b/packages/google-cloud-bigquery/google/cloud/bigquery/dbapi/_helpers.py index a2cee9c5272b..a5a1add1f3a4 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/dbapi/_helpers.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/dbapi/_helpers.py @@ -1,4 +1,4 @@ -# Copyright 2017 Google Inc. +# Copyright 2017 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/dbapi/connection.py b/packages/google-cloud-bigquery/google/cloud/bigquery/dbapi/connection.py index 66aa0929b97e..5f962df97412 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/dbapi/connection.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/dbapi/connection.py @@ -1,4 +1,4 @@ -# Copyright 2017 Google Inc. +# Copyright 2017 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/dbapi/cursor.py b/packages/google-cloud-bigquery/google/cloud/bigquery/dbapi/cursor.py index 914d2e07c553..f4dc32e7e101 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/dbapi/cursor.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/dbapi/cursor.py @@ -1,4 +1,4 @@ -# Copyright 2017 Google Inc. +# Copyright 2017 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/dbapi/exceptions.py b/packages/google-cloud-bigquery/google/cloud/bigquery/dbapi/exceptions.py index 77494e5ff1e1..37f7129d0618 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/dbapi/exceptions.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/dbapi/exceptions.py @@ -1,4 +1,4 @@ -# Copyright 2017 Google Inc. +# Copyright 2017 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/dbapi/types.py b/packages/google-cloud-bigquery/google/cloud/bigquery/dbapi/types.py index 2d06f260e360..9636ce68bfc1 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/dbapi/types.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/dbapi/types.py @@ -1,4 +1,4 @@ -# Copyright 2017 Google Inc. +# Copyright 2017 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/external_config.py b/packages/google-cloud-bigquery/google/cloud/bigquery/external_config.py index 47c546baed9e..8702126311b4 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/external_config.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/external_config.py @@ -1,4 +1,4 @@ -# Copyright 2017 Google Inc. +# Copyright 2017 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/job.py b/packages/google-cloud-bigquery/google/cloud/bigquery/job.py index bffea1e13229..1ecf35cd4d04 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/job.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/job.py @@ -1,4 +1,4 @@ -# Copyright 2015 Google Inc. +# Copyright 2015 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/query.py b/packages/google-cloud-bigquery/google/cloud/bigquery/query.py index 6405605582dc..67f0f71f2180 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/query.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/query.py @@ -1,4 +1,4 @@ -# Copyright 2015 Google Inc. +# Copyright 2015 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/schema.py b/packages/google-cloud-bigquery/google/cloud/bigquery/schema.py index cb0ce2a96b5a..155ffe9a159a 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/schema.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/schema.py @@ -1,4 +1,4 @@ -# Copyright 2015 Google Inc. +# Copyright 2015 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py index bc5c3044b55a..6af789c9ce9b 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py @@ -1,4 +1,4 @@ -# Copyright 2015 Google Inc. +# Copyright 2015 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/packages/google-cloud-bigquery/nox.py b/packages/google-cloud-bigquery/nox.py index 7cce569c1a44..a6aa14316db5 100644 --- a/packages/google-cloud-bigquery/nox.py +++ b/packages/google-cloud-bigquery/nox.py @@ -1,4 +1,4 @@ -# Copyright 2016 Google Inc. +# Copyright 2016 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/packages/google-cloud-bigquery/pylint.config.py b/packages/google-cloud-bigquery/pylint.config.py index b618319b8b61..5d64b9d2f256 100644 --- a/packages/google-cloud-bigquery/pylint.config.py +++ b/packages/google-cloud-bigquery/pylint.config.py @@ -1,4 +1,4 @@ -# Copyright 2017 Google Inc. +# Copyright 2017 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/packages/google-cloud-bigquery/setup.py b/packages/google-cloud-bigquery/setup.py index 53d79dd87bb1..3486dae483cb 100644 --- a/packages/google-cloud-bigquery/setup.py +++ b/packages/google-cloud-bigquery/setup.py @@ -1,4 +1,4 @@ -# Copyright 2016 Google Inc. +# Copyright 2016 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/packages/google-cloud-bigquery/tests/system.py b/packages/google-cloud-bigquery/tests/system.py index 40782ec1f90e..7376005a8de1 100644 --- a/packages/google-cloud-bigquery/tests/system.py +++ b/packages/google-cloud-bigquery/tests/system.py @@ -1,4 +1,4 @@ -# Copyright 2015 Google Inc. +# Copyright 2015 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/packages/google-cloud-bigquery/tests/unit/__init__.py b/packages/google-cloud-bigquery/tests/unit/__init__.py index 58e0d9153632..df379f1e9d88 100644 --- a/packages/google-cloud-bigquery/tests/unit/__init__.py +++ b/packages/google-cloud-bigquery/tests/unit/__init__.py @@ -1,4 +1,4 @@ -# Copyright 2016 Google Inc. +# Copyright 2016 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/packages/google-cloud-bigquery/tests/unit/test__helpers.py b/packages/google-cloud-bigquery/tests/unit/test__helpers.py index 15a62107c645..56ceb78ea9ae 100644 --- a/packages/google-cloud-bigquery/tests/unit/test__helpers.py +++ b/packages/google-cloud-bigquery/tests/unit/test__helpers.py @@ -1,4 +1,4 @@ -# Copyright 2015 Google Inc. +# Copyright 2015 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/packages/google-cloud-bigquery/tests/unit/test__http.py b/packages/google-cloud-bigquery/tests/unit/test__http.py index b8af254d3614..c1cd48ffdca8 100644 --- a/packages/google-cloud-bigquery/tests/unit/test__http.py +++ b/packages/google-cloud-bigquery/tests/unit/test__http.py @@ -1,4 +1,4 @@ -# Copyright 2015 Google Inc. +# Copyright 2015 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/packages/google-cloud-bigquery/tests/unit/test_client.py b/packages/google-cloud-bigquery/tests/unit/test_client.py index 074567fd89ad..c42e880a0d47 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_client.py +++ b/packages/google-cloud-bigquery/tests/unit/test_client.py @@ -1,4 +1,4 @@ -# Copyright 2015 Google Inc. +# Copyright 2015 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/packages/google-cloud-bigquery/tests/unit/test_dataset.py b/packages/google-cloud-bigquery/tests/unit/test_dataset.py index c04d154b52da..1f8580ebbcfd 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_dataset.py +++ b/packages/google-cloud-bigquery/tests/unit/test_dataset.py @@ -1,4 +1,4 @@ -# Copyright 2015 Google Inc. +# Copyright 2015 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/packages/google-cloud-bigquery/tests/unit/test_dbapi__helpers.py b/packages/google-cloud-bigquery/tests/unit/test_dbapi__helpers.py index 48bca5ae9a59..78c5ea1ca18a 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_dbapi__helpers.py +++ b/packages/google-cloud-bigquery/tests/unit/test_dbapi__helpers.py @@ -1,4 +1,4 @@ -# Copyright 2017 Google Inc. +# Copyright 2017 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/packages/google-cloud-bigquery/tests/unit/test_dbapi_connection.py b/packages/google-cloud-bigquery/tests/unit/test_dbapi_connection.py index d30852377852..176d5f989b41 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_dbapi_connection.py +++ b/packages/google-cloud-bigquery/tests/unit/test_dbapi_connection.py @@ -1,4 +1,4 @@ -# Copyright 2017 Google Inc. +# Copyright 2017 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/packages/google-cloud-bigquery/tests/unit/test_dbapi_cursor.py b/packages/google-cloud-bigquery/tests/unit/test_dbapi_cursor.py index a16b7b47ee3f..542b053fae1a 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_dbapi_cursor.py +++ b/packages/google-cloud-bigquery/tests/unit/test_dbapi_cursor.py @@ -1,4 +1,4 @@ -# Copyright 2017 Google Inc. +# Copyright 2017 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/packages/google-cloud-bigquery/tests/unit/test_dbapi_types.py b/packages/google-cloud-bigquery/tests/unit/test_dbapi_types.py index afd45b259263..1803ea6d3c10 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_dbapi_types.py +++ b/packages/google-cloud-bigquery/tests/unit/test_dbapi_types.py @@ -1,4 +1,4 @@ -# Copyright 2017 Google Inc. +# Copyright 2017 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/packages/google-cloud-bigquery/tests/unit/test_external_config.py b/packages/google-cloud-bigquery/tests/unit/test_external_config.py index b7887428606d..05a058ffea82 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_external_config.py +++ b/packages/google-cloud-bigquery/tests/unit/test_external_config.py @@ -1,4 +1,4 @@ -# Copyright 2017 Google Inc. +# Copyright 2017 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/packages/google-cloud-bigquery/tests/unit/test_job.py b/packages/google-cloud-bigquery/tests/unit/test_job.py index 7c89a9ba7e01..4f94a1881a30 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_job.py +++ b/packages/google-cloud-bigquery/tests/unit/test_job.py @@ -1,4 +1,4 @@ -# Copyright 2015 Google Inc. +# Copyright 2015 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/packages/google-cloud-bigquery/tests/unit/test_query.py b/packages/google-cloud-bigquery/tests/unit/test_query.py index e5c78caf3b0b..1924d55c9959 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_query.py +++ b/packages/google-cloud-bigquery/tests/unit/test_query.py @@ -1,4 +1,4 @@ -# Copyright 2015 Google Inc. +# Copyright 2015 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/packages/google-cloud-bigquery/tests/unit/test_schema.py b/packages/google-cloud-bigquery/tests/unit/test_schema.py index 84e5d306c348..e9c13f75ea3d 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_schema.py +++ b/packages/google-cloud-bigquery/tests/unit/test_schema.py @@ -1,4 +1,4 @@ -# Copyright 2015 Google Inc. +# Copyright 2015 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/packages/google-cloud-bigquery/tests/unit/test_table.py b/packages/google-cloud-bigquery/tests/unit/test_table.py index a40ab160d970..1829231328d9 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_table.py +++ b/packages/google-cloud-bigquery/tests/unit/test_table.py @@ -1,4 +1,4 @@ -# Copyright 2015 Google Inc. +# Copyright 2015 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. From 8e9f7dbd9746f5f92fc42b69e2c36dd6ac61dcf8 Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Tue, 31 Oct 2017 09:09:15 -0700 Subject: [PATCH 0346/2016] BigQuery: Increment version to 0.28.0 (#4258) - Increment version to 0.28.0 for beta 2 launch. - Add changelog/release notes for BigQuery. - Adding **explicit** dependency on `google-api-core` to BQ. --- packages/google-cloud-bigquery/CHANGELOG.md | 174 ++++++++++++++++++++ packages/google-cloud-bigquery/setup.py | 3 +- 2 files changed, 176 insertions(+), 1 deletion(-) create mode 100644 packages/google-cloud-bigquery/CHANGELOG.md diff --git a/packages/google-cloud-bigquery/CHANGELOG.md b/packages/google-cloud-bigquery/CHANGELOG.md new file mode 100644 index 000000000000..de580d1fe0f8 --- /dev/null +++ b/packages/google-cloud-bigquery/CHANGELOG.md @@ -0,0 +1,174 @@ +# Changelog + +## v0.28.0 + +**v0.28.0 significantly changes the interface for this package.** For examples +of the differences between v0.28.0 and previous versions, see [Migrating to +the BigQuery Python client library +v0.28](https://cloud.google.com/bigquery/docs/python-client-migration). +These changes can be summarized as follows: + +- Query and view operations default to the standard SQL dialect. (#4192) +- Client functions related to + [jobs](https://cloud.google.com/bigquery/docs/jobs-overview), like running + queries, immediately start the job. +- Functions to create, get, update, delete datasets and tables moved to the + client class. + +### Fixes + +- Populate timeout parameter correctly for queries (#4209) +- Automatically retry idempotent RPCs (#4148, #4178) +- Parse timestamps in query parameters using canonical format (#3945) +- Parse array parameters that contain a struct type. (#4040) +- Support Sub Second Datetimes in row data (#3901, #3915, #3926), h/t @page1 + +### Interface changes / additions + +- Support external table configuration (#4182) in query jobs (#4191) and + tables (#4193). +- New `Row` class allows for access by integer index like a tuple, string + index like a dictionary, or attribute access like an object. (#4149) +- Add option for job ID generation with user-supplied prefix (#4198) +- Add support for update of dataset access entries (#4197) +- Add support for atomic read-modify-write of a dataset using etag (#4052) +- Add support for labels to `Dataset` (#4026) +- Add support for labels to `Table` (#4207) +- Add `Table.streaming_buffer` property (#4161) +- Add `TableReference` class (#3942) +- Add `DatasetReference` class (#3938, #3942, #3993) +- Add `ExtractJob.destination_uri_file_counts` property. (#3803) +- Add `client.create_rows_json()` to bypass conversions on streaming writes. + (#4189) +- Add `client.get_job()` to get arbitrary jobs. (#3804, #4213) +- Add filter to `client.list_datasets()` (#4205) +- Add `QueryJob.undeclared_query_parameters` property. (#3802) +- Add `QueryJob.referenced_tables` property. (#3801) +- Add new scalar statistics properties to `QueryJob` (#3800) +- Add `QueryJob.query_plan` property. (#3799) + +### Interface changes / breaking changes + +- Remove `client.run_async_query()`, use `client.query()` instead. (#4130) +- Remove `client.run_sync_query()`, use `client.query_rows()` instead. (#4065, #4248) +- Make `QueryResults` read-only. (#4094, #4144) +- Make `get_query_results` private. Return rows for `QueryJob.result()` (#3883) +- Move `*QueryParameter` and `UDFResource` classes to `query` module (also + exposed in `bigquery` module). (#4156) + +#### Changes to tables + +- Remove `client` from `Table` class (#4159) +- Remove `table.exists()` (#4145) +- Move `table.list_parations` to `client.list_partitions` (#4146) +- Move `table.upload_from_file` to `client.load_table_from_file` (#4136) +- Move `table.update()` and `table.patch()` to `client.update_table()` (#4076) +- Move `table.insert_data()` to `client.create_rows()`. Automatically + generates row IDs if not supplied. (#4151, #4173) +- Move `table.fetch_data()` to `client.list_rows()` (#4119, #4143) +- Move `table.delete()` to `client.delete_table()` (#4066) +- Move `table.create()` to `client.create_table()` (#4038, #4043) +- Move `table.reload()` to `client.get_table()` (#4004) +- Rename `Table.name` attribute to `Table.table_id` (#3959) +- `Table` constructor takes a `TableReference` as parameter (#3997) + +#### Changes to datasets + +- Remove `client` from `Dataset` class (#4018) +- Remove `dataset.exists()` (#3996) +- Move `dataset.list_tables()` to `client.list_dataset_tables()` (#4013) +- Move `dataset.delete()` to `client.delete_dataset()` (#4012) +- Move `dataset.patch()` and `dataset.update()` to `client.update_dataset()` (#4003) +- Move `dataset.create()` to `client.create_dataset()` (#3982) +- Move `dataset.reload()` to `client.get_dataset()` (#3973) +- Rename `Dataset.name` attribute to `Dataset.dataset_id` (#3955) +- `client.dataset()` returns a `DatasetReference` instead of `Dataset`. (#3944) +- Rename class: `dataset.AccessGrant -> dataset.AccessEntry`. (#3798) +- `dataset.table()` returns a `TableReference` instead of a `Table` (#4014) +- `Dataset` constructor takes a DatasetReference (#4036) + +#### Changes to jobs + +- Make `job.begin()` method private. (#4242) +- Add `LoadJobConfig` class and modify `LoadJob` (#4103, #4137) +- Add `CopyJobConfig` class and modify `CopyJob` (#4051, #4059) +- Type of Job's and Query's `default_dataset` changed from `Dataset` to + `DatasetReference` (#4037) +- Rename `client.load_table_from_storage()` to `client.load_table_from_uri()` + (#4235) +- Rename `client.extract_table_to_storage` to `client.extract_table()`. + Method starts the extract job immediately. (#3991, #4177) +- Rename `XJob.name` to `XJob.job_id`. (#3962) +- Rename job classes. `LoadTableFromStorageJob -> LoadJob` and + `ExtractTableToStorageJob -> jobs.ExtractJob` (#3797) + +### Dependencies + +- Updating to `google-cloud-core ~= 0.28`, in particular, the + `google-api-core` package has been moved out of `google-cloud-core`. (#4221) + +PyPI: https://pypi.org/project/google-cloud-bigquery/0.28.0/ + + +## v0.27.0 + +- Remove client-side enum validation. (#3735) +- Add `Table.row_from_mapping` helper. (#3425) +- Move `google.cloud.future` to `google.api.core` (#3764) +- Fix `__eq__` and `__ne__`. (#3765) +- Move `google.cloud.iterator` to `google.api.core.page_iterator` (#3770) +- `nullMarker` support for BigQuery Load Jobs (#3777), h/t @leondealmeida +- Allow `job_id` to be explicitly specified in DB-API. (#3779) +- Add support for a custom null marker. (#3776) +- Add `SchemaField` serialization and deserialization. (#3786) +- Add `get_query_results` method to the client. (#3838) +- Poll for query completion via `getQueryResults` method. (#3844) +- Allow fetching more than the first page when `max_results` is set. (#3845) + +PyPI: https://pypi.org/project/google-cloud-bigquery/0.27.0/ + +## 0.26.0 + +### Notable implementation changes + +- Using the `requests` transport attached to a Client for for resumable media + (i.e. downloads and uploads) (#3705) (this relates to the `httplib2` to + `requests` switch) + +### Interface changes / additions + +- Adding `autodetect` property on `LoadTableFromStorageJob` to enable schema + autodetection. (#3648) +- Implementing the Python Futures interface for Jobs. Call `job.result()` to + wait for jobs to complete instead of polling manually on the job status. + (#3626) +- Adding `is_nullable` property on `SchemaField`. Can be used to check if a + column is nullable. (#3620) +- `job_name` argument added to `Table.upload_from_file` for setting the job + ID. (#3605) +- Adding `google.cloud.bigquery.dbapi` package, which implements PEP-249 + DB-API specification. (#2921) +- Adding `Table.view_use_legacy_sql` property. Can be used to create views + with legacy or standard SQL. (#3514) + +### Interface changes / breaking changes + +- Removing `results()` method from the `QueryJob` class. Use + `query_results()` instead. (#3661) +- `SchemaField` is now immutable. It is also hashable so that it can be used + in sets. (#3601) + +### Dependencies + +- Updating to `google-cloud-core ~= 0.26`, in particular, the underlying HTTP + transport switched from `httplib2` to `requests` (#3654, #3674) +- Adding dependency on `google-resumable-media` for loading BigQuery tables + from local files. (#3555) + +### Packaging + +- Fix inclusion of `tests` (vs. `unit_tests`) in `MANIFEST.in` (#3552) +- Updating `author_email` in `setup.py` to `googleapis-publisher@google.com`. + (#3598) + +PyPI: https://pypi.org/project/google-cloud-bigquery/0.26.0/ diff --git a/packages/google-cloud-bigquery/setup.py b/packages/google-cloud-bigquery/setup.py index 3486dae483cb..dc712f0ae22f 100644 --- a/packages/google-cloud-bigquery/setup.py +++ b/packages/google-cloud-bigquery/setup.py @@ -52,6 +52,7 @@ REQUIREMENTS = [ 'google-cloud-core >= 0.28.0, < 0.29dev', + 'google-api-core >= 0.1.1, < 0.2.0dev', 'google-auth >= 1.0.0', 'google-resumable-media >= 0.2.1', 'requests >= 2.18.0', @@ -59,7 +60,7 @@ setup( name='google-cloud-bigquery', - version='0.27.0', + version='0.28.0', description='Python Client for Google BigQuery', long_description=README, namespace_packages=[ From 57faac08b87f44b932f30eb7c18912dbc7ab5b9c Mon Sep 17 00:00:00 2001 From: Danny Hermes Date: Tue, 31 Oct 2017 09:27:19 -0700 Subject: [PATCH 0347/2016] Explicitly marking `google-cloud-bigquery` as "dev". (#4290) This is to make it clear the code is between releases. Towards #4208. See: https://snarky.ca/how-i-manage-package-version-numbers/ --- packages/google-cloud-bigquery/setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/setup.py b/packages/google-cloud-bigquery/setup.py index dc712f0ae22f..1dd3a9ff6036 100644 --- a/packages/google-cloud-bigquery/setup.py +++ b/packages/google-cloud-bigquery/setup.py @@ -60,7 +60,7 @@ setup( name='google-cloud-bigquery', - version='0.28.0', + version='0.28.1.dev1', description='Python Client for Google BigQuery', long_description=README, namespace_packages=[ From 7861b4c9efc450ce04ebdb4e6a6bab05af55b4bd Mon Sep 17 00:00:00 2001 From: Danny Hermes Date: Tue, 31 Oct 2017 14:28:55 -0700 Subject: [PATCH 0348/2016] Making release for most packages. (#4296) * Making release for most packages. Every package except those that have already been released (`google-cloud-core`, `google-api-core`, `google-cloud-bigquery`): - `google-cloud` - `google-cloud-bigtable` - `google-cloud-datastore` - `google-cloud-dns` - `google-cloud-error-reporting` - `google-cloud-firestore` - `google-cloud-language` - `google-cloud-logging` - `google-cloud-monitoring` - `google-cloud-resource-manager` - `google-cloud-runtimeconfig` - `google-cloud-spanner` - `google-cloud-speech` - `google-cloud-storage` - `google-cloud-trace` - `google-cloud-translate` - `google-cloud-videointelligence` - `google-cloud-vision` * Adding changelog files for each package. --- packages/google-cloud-bigquery/CHANGELOG.md | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/packages/google-cloud-bigquery/CHANGELOG.md b/packages/google-cloud-bigquery/CHANGELOG.md index de580d1fe0f8..04f299c94361 100644 --- a/packages/google-cloud-bigquery/CHANGELOG.md +++ b/packages/google-cloud-bigquery/CHANGELOG.md @@ -1,11 +1,14 @@ # Changelog -## v0.28.0 +[PyPI History][1] -**v0.28.0 significantly changes the interface for this package.** For examples -of the differences between v0.28.0 and previous versions, see [Migrating to -the BigQuery Python client library -v0.28](https://cloud.google.com/bigquery/docs/python-client-migration). +[1]: https://pypi.org/project/google-cloud-bigquery/#history + +## 0.28.0 + +**0.28.0 significantly changes the interface for this package.** For examples +of the differences between 0.28.0 and previous versions, see +[Migrating to the BigQuery Python client library 0.28][2]. These changes can be summarized as follows: - Query and view operations default to the standard SQL dialect. (#4192) @@ -15,6 +18,8 @@ These changes can be summarized as follows: - Functions to create, get, update, delete datasets and tables moved to the client class. +[2]: https://cloud.google.com/bigquery/docs/python-client-migration + ### Fixes - Populate timeout parameter correctly for queries (#4209) @@ -110,7 +115,7 @@ These changes can be summarized as follows: PyPI: https://pypi.org/project/google-cloud-bigquery/0.28.0/ -## v0.27.0 +## 0.27.0 - Remove client-side enum validation. (#3735) - Add `Table.row_from_mapping` helper. (#3425) From e166f848a0c03a9fc29a2b6fafe09e34d5b9b3cd Mon Sep 17 00:00:00 2001 From: Danny Hermes Date: Wed, 1 Nov 2017 12:43:23 -0700 Subject: [PATCH 0349/2016] Fixing "Fore" -> "For" typo in README docs. (#4317) Also obeying an 80-column limit for the content and adding a missing "``virtualenv``" in the phrase "``pip`` and ``virtualenv``" in some of the docs. --- packages/google-cloud-bigquery/README.rst | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/README.rst b/packages/google-cloud-bigquery/README.rst index 01a1194c41fc..1a15304a1bef 100644 --- a/packages/google-cloud-bigquery/README.rst +++ b/packages/google-cloud-bigquery/README.rst @@ -18,7 +18,9 @@ Quick Start $ pip install --upgrade google-cloud-bigquery -Fore more information on setting up your Python development environment, such as installing ``pip`` and on your system, please refer to `Python Development Environment Setup Guide`_ for Google Cloud Platform. +For more information on setting up your Python development environment, +such as installing ``pip`` and ``virtualenv`` on your system, please refer +to `Python Development Environment Setup Guide`_ for Google Cloud Platform. .. _Python Development Environment Setup Guide: https://cloud.google.com/python/setup From cdb0454d1217215f83b42782388b3ca91bdc04de Mon Sep 17 00:00:00 2001 From: chemelnucfin Date: Wed, 1 Nov 2017 16:53:46 -0700 Subject: [PATCH 0350/2016] Closes #4319 - shorten test names (#4321) * Closes #4319 - shorten test names * #4319 update docs and config files --- packages/google-cloud-bigquery/nox.py | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/packages/google-cloud-bigquery/nox.py b/packages/google-cloud-bigquery/nox.py index a6aa14316db5..d835f8a7de43 100644 --- a/packages/google-cloud-bigquery/nox.py +++ b/packages/google-cloud-bigquery/nox.py @@ -26,15 +26,15 @@ @nox.session -@nox.parametrize('python_version', ['2.7', '3.4', '3.5', '3.6']) -def unit_tests(session, python_version): +@nox.parametrize('py', ['2.7', '3.4', '3.5', '3.6']) +def unit(session, py): """Run the unit test suite.""" # Run unit tests against all supported versions of Python. - session.interpreter = 'python{}'.format(python_version) + session.interpreter = 'python{}'.format(py) # Set the virtualenv dirname. - session.virtualenv_dirname = 'unit-' + python_version + session.virtualenv_dirname = 'unit-' + py # Install all test dependencies, then install this package in-place. session.install('mock', 'pytest', 'pytest-cov', *LOCAL_DEPS) @@ -56,8 +56,8 @@ def unit_tests(session, python_version): @nox.session -@nox.parametrize('python_version', ['2.7', '3.6']) -def system_tests(session, python_version): +@nox.parametrize('py', ['2.7', '3.6']) +def system(session, py): """Run the system test suite.""" # Sanity check: Only run system tests if the environment variable is set. @@ -65,10 +65,10 @@ def system_tests(session, python_version): session.skip('Credentials must be set via environment variable.') # Run the system tests against latest Python 2 and Python 3 only. - session.interpreter = 'python{}'.format(python_version) + session.interpreter = 'python{}'.format(py) # Set the virtualenv dirname. - session.virtualenv_dirname = 'sys-' + python_version + session.virtualenv_dirname = 'sys-' + py # Install all test dependencies, then install this package into the # virtualenv's dist-packages. @@ -89,8 +89,8 @@ def system_tests(session, python_version): @nox.session -@nox.parametrize('python_version', ['2.7', '3.6']) -def snippets_tests(session, python_version): +@nox.parametrize('py', ['2.7', '3.6']) +def snippets_tests(session, py): """Run the system test suite.""" # Sanity check: Only run system tests if the environment variable is set. @@ -98,10 +98,10 @@ def snippets_tests(session, python_version): session.skip('Credentials must be set via environment variable.') # Run the system tests against latest Python 2 and Python 3 only. - session.interpreter = 'python{}'.format(python_version) + session.interpreter = 'python{}'.format(py) # Set the virtualenv dirname. - session.virtualenv_dirname = 'snip-' + python_version + session.virtualenv_dirname = 'snip-' + py # Install all test dependencies, then install this package into the # virtualenv's dist-packages. From 98a1157f53f6e2d904a8021b298277b10757747c Mon Sep 17 00:00:00 2001 From: Danny Hermes Date: Wed, 1 Nov 2017 21:47:55 -0700 Subject: [PATCH 0351/2016] Making a `nox -s default` session for all packages. (#4324) * Making a `nox -s default` session for all packages. * Using "default" `nox` session on AppVeyor. This was 32-bit or 64-bit Python can be used, depending on which is the active `python` / the active `nox.exe`. --- packages/google-cloud-bigquery/nox.py | 30 +++++++++++++++++++-------- 1 file changed, 21 insertions(+), 9 deletions(-) diff --git a/packages/google-cloud-bigquery/nox.py b/packages/google-cloud-bigquery/nox.py index d835f8a7de43..fa0936ce619e 100644 --- a/packages/google-cloud-bigquery/nox.py +++ b/packages/google-cloud-bigquery/nox.py @@ -26,16 +26,14 @@ @nox.session -@nox.parametrize('py', ['2.7', '3.4', '3.5', '3.6']) -def unit(session, py): - """Run the unit test suite.""" - - # Run unit tests against all supported versions of Python. - session.interpreter = 'python{}'.format(py) - - # Set the virtualenv dirname. - session.virtualenv_dirname = 'unit-' + py +def default(session): + """Default unit test session. + This is intended to be run **without** an interpreter set, so + that the current ``python`` (on the ``PATH``) or the version of + Python corresponding to the ``nox`` binary the ``PATH`` can + run the tests. + """ # Install all test dependencies, then install this package in-place. session.install('mock', 'pytest', 'pytest-cov', *LOCAL_DEPS) session.install('-e', '.') @@ -55,6 +53,20 @@ def unit(session, py): ) +@nox.session +@nox.parametrize('py', ['2.7', '3.4', '3.5', '3.6']) +def unit(session, py): + """Run the unit test suite.""" + + # Run unit tests against all supported versions of Python. + session.interpreter = 'python{}'.format(py) + + # Set the virtualenv dirname. + session.virtualenv_dirname = 'unit-' + py + + default(session) + + @nox.session @nox.parametrize('py', ['2.7', '3.6']) def system(session, py): From a5a3ac9a2550412aeb4d8060235a11c4c14ac4c0 Mon Sep 17 00:00:00 2001 From: Alix Hamilton Date: Fri, 3 Nov 2017 14:18:09 -0700 Subject: [PATCH 0352/2016] BigQuery: moves Row class out of helpers and updates docstrings (#4291) * moves Row class out of helpers and updates docstrings to specify Row as rtype * updates docstring references to classes to include the appropriate module * breaks up imports to different lines * renames single letter variables and fixes string formatting * adds todo to fix circular import * updates references to bigtable's Row class to differentiate from bigquery's Row class * Revert "adds todo to fix circular import" This reverts commit 1c2bbb01a5fa70d5ddbaf81576e84e760d8b9f6e. --- .../google/cloud/bigquery/__init__.py | 2 + .../google/cloud/bigquery/_helpers.py | 67 ++----------------- .../google/cloud/bigquery/client.py | 17 ++--- .../google/cloud/bigquery/job.py | 18 ++--- .../google/cloud/bigquery/query.py | 2 +- .../google/cloud/bigquery/table.py | 65 +++++++++++++++++- .../tests/unit/test__helpers.py | 23 +------ .../tests/unit/test_client.py | 12 ++-- .../tests/unit/test_table.py | 22 ++++++ 9 files changed, 119 insertions(+), 109 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/__init__.py b/packages/google-cloud-bigquery/google/cloud/bigquery/__init__.py index 506038003cb7..ef40feb83586 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/__init__.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/__init__.py @@ -51,6 +51,7 @@ from google.cloud.bigquery.schema import SchemaField from google.cloud.bigquery.table import Table from google.cloud.bigquery.table import TableReference +from google.cloud.bigquery.table import Row from google.cloud.bigquery.external_config import ExternalConfig from google.cloud.bigquery.external_config import BigtableOptions from google.cloud.bigquery.external_config import BigtableColumnFamily @@ -74,6 +75,7 @@ # Tables 'Table', 'TableReference', + 'Row', 'CopyJob', 'CopyJobConfig', 'ExtractJob', diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py b/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py index c0af22c5f578..41b68db02c39 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py @@ -16,9 +16,6 @@ import base64 import datetime -import operator - -import six from google.api_core import retry from google.cloud._helpers import UTC @@ -178,64 +175,6 @@ def _record_from_json(value, field): _QUERY_PARAMS_FROM_JSON['TIMESTAMP'] = _timestamp_query_param_from_json -class Row(object): - """A BigQuery row. - - Values can be accessed by position (index), by key like a dict, - or as properties. - - :type values: tuple - :param values: the row values - - :type field_to_index: dict - :param field_to_index: a mapping from schema field names to indexes - """ - - # Choose unusual field names to try to avoid conflict with schema fields. - __slots__ = ('_xxx_values', '_xxx_field_to_index') - - def __init__(self, values, field_to_index): - self._xxx_values = values - self._xxx_field_to_index = field_to_index - - def values(self): - return self._xxx_values - - def __getattr__(self, name): - i = self._xxx_field_to_index.get(name) - if i is None: - raise AttributeError('no row field "%s"' % name) - return self._xxx_values[i] - - def __len__(self): - return len(self._xxx_values) - - def __getitem__(self, key): - if isinstance(key, six.string_types): - i = self._xxx_field_to_index.get(key) - if i is None: - raise KeyError('no row field "%s"' % key) - key = i - return self._xxx_values[key] - - def __eq__(self, other): - if not isinstance(other, Row): - return NotImplemented - return( - self._xxx_values == other._xxx_values and - self._xxx_field_to_index == other._xxx_field_to_index) - - def __ne__(self, other): - return not self == other - - def __repr__(self): - # sort field dict by value, for determinism - items = sorted(self._xxx_field_to_index.items(), - key=operator.itemgetter(1)) - f2i = '{' + ', '.join('%r: %d' % i for i in items) + '}' - return 'Row({}, {})'.format(self._xxx_values, f2i) - - def _field_to_index_mapping(schema): """Create a mapping from schema field name to index of field.""" return {f.name: i for i, f in enumerate(schema)} @@ -270,6 +209,8 @@ def _row_tuple_from_json(row, schema): def _rows_from_json(values, schema): """Convert JSON row data to rows with appropriate types.""" + from google.cloud.bigquery import Row + field_to_index = _field_to_index_mapping(schema) return [Row(_row_tuple_from_json(r, schema), field_to_index) for r in values] @@ -502,9 +443,11 @@ def _item_to_row(iterator, resource): :type resource: dict :param resource: An item to be converted to a row. - :rtype: :class:`~google.cloud.bigquery.Row` + :rtype: :class:`~google.cloud.bigquery.table.Row` :returns: The next row in the page. """ + from google.cloud.bigquery import Row + return Row(_row_tuple_from_json(resource, iterator.schema), iterator._field_to_index) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py index 07fd6045629a..18fb64c5e531 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py @@ -1172,11 +1172,11 @@ def query_rows( :rtype: :class:`~google.api_core.page_iterator.Iterator` :returns: - Iterator of row data :class:`tuple`s. During each page, the - iterator will have the ``total_rows`` attribute set, which counts - the total number of rows **in the result set** (this is distinct - from the total number of rows in the current page: - ``iterator.page.num_items``). + Iterator of row data :class:`~google.cloud.bigquery.table.Row`-s. + During each page, the iterator will have the ``total_rows`` + attribute set, which counts the total number of rows **in the + result set** (this is distinct from the total number of rows in + the current page: ``iterator.page.num_items``). :raises: :class:`~google.api_core.exceptions.GoogleAPICallError` if the @@ -1242,9 +1242,10 @@ def list_rows(self, table, selected_fields=None, max_results=None, :param retry: (Optional) How to retry the RPC. :rtype: :class:`~google.api_core.page_iterator.Iterator` - :returns: Iterator of row data :class:`tuple`s. During each page, the - iterator will have the ``total_rows`` attribute set, - which counts the total number of rows **in the table** + :returns: Iterator of row data + :class:`~google.cloud.bigquery.table.Row`-s. During each + page, the iterator will have the ``total_rows`` attribute + set, which counts the total number of rows **in the table** (this is distinct from the total number of rows in the current page: ``iterator.page.num_items``). diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/job.py b/packages/google-cloud-bigquery/google/cloud/bigquery/job.py index 1ecf35cd4d04..0b77ef0e1cc8 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/job.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/job.py @@ -690,7 +690,7 @@ class LoadJob(_AsyncJob): https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.load.sourceUris for supported URI formats. Pass None for jobs that load from a file. - :type destination: :class:`google.cloud.bigquery.TableReference` + :type destination: :class:`google.cloud.bigquery.table.TableReference` :param destination: reference to table into which data is to be loaded. :type client: :class:`google.cloud.bigquery.client.Client` @@ -959,10 +959,10 @@ class CopyJob(_AsyncJob): :type job_id: str :param job_id: the job's ID, within the project belonging to ``client``. - :type sources: list of :class:`google.cloud.bigquery.TableReference` + :type sources: list of :class:`google.cloud.bigquery.table.TableReference` :param sources: Table into which data is to be loaded. - :type destination: :class:`google.cloud.bigquery.TableReference` + :type destination: :class:`google.cloud.bigquery.table.TableReference` :param destination: Table into which data is to be loaded. :type client: :class:`google.cloud.bigquery.client.Client` @@ -1134,7 +1134,7 @@ class ExtractJob(_AsyncJob): :type job_id: str :param job_id: the job's ID - :type source: :class:`google.cloud.bigquery.TableReference` + :type source: :class:`google.cloud.bigquery.table.TableReference` :param source: Table into which data is to be loaded. :type destination_uris: list of string @@ -1931,11 +1931,11 @@ def result(self, timeout=None, retry=DEFAULT_RETRY): :rtype: :class:`~google.api_core.page_iterator.Iterator` :returns: - Iterator of row data :class:`tuple`s. During each page, the - iterator will have the ``total_rows`` attribute set, which counts - the total number of rows **in the result set** (this is distinct - from the total number of rows in the current page: - ``iterator.page.num_items``). + Iterator of row data :class:`~google.cloud.bigquery.table.Row`-s. + During each page, the iterator will have the ``total_rows`` + attribute set, which counts the total number of rows **in the + result set** (this is distinct from the total number of rows in + the current page: ``iterator.page.num_items``). :raises: :class:`~google.cloud.exceptions.GoogleCloudError` if the job diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/query.py b/packages/google-cloud-bigquery/google/cloud/bigquery/query.py index 67f0f71f2180..1d3009394c96 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/query.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/query.py @@ -597,7 +597,7 @@ def rows(self): See https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs/query#rows - :rtype: list of :class:`~google.cloud.bigquery.Row` + :rtype: list of :class:`~google.cloud.bigquery.table.Row` :returns: fields describing the schema (None until set by the server). """ return _rows_from_json(self._properties.get('rows', ()), self.schema) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py index 6af789c9ce9b..191eba2f20ce 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py @@ -12,11 +12,12 @@ # See the License for the specific language governing permissions and # limitations under the License. -"""Define API Datasets.""" +"""Define API Tables.""" from __future__ import absolute_import import datetime +import operator import six @@ -39,7 +40,7 @@ class TableReference(object): See https://cloud.google.com/bigquery/docs/reference/rest/v2/tables - :type dataset_ref: :class:`google.cloud.bigquery.DatasetReference` + :type dataset_ref: :class:`google.cloud.bigquery.dataset.DatasetReference` :param dataset_ref: a pointer to the dataset :type table_id: str @@ -608,7 +609,7 @@ def from_api_repr(cls, resource): :type resource: dict :param resource: table resource representation returned from the API - :type dataset: :class:`google.cloud.bigquery.Dataset` + :type dataset: :class:`google.cloud.bigquery.dataset.Dataset` :param dataset: The dataset containing the table. :rtype: :class:`google.cloud.bigquery.table.Table` @@ -759,3 +760,61 @@ def __init__(self, resource): # time is in milliseconds since the epoch. self.oldest_entry_time = _datetime_from_microseconds( 1000.0 * int(resource['oldestEntryTime'])) + + +class Row(object): + """A BigQuery row. + + Values can be accessed by position (index), by key like a dict, + or as properties. + + :type values: tuple + :param values: the row values + + :type field_to_index: dict + :param field_to_index: a mapping from schema field names to indexes + """ + + # Choose unusual field names to try to avoid conflict with schema fields. + __slots__ = ('_xxx_values', '_xxx_field_to_index') + + def __init__(self, values, field_to_index): + self._xxx_values = values + self._xxx_field_to_index = field_to_index + + def values(self): + return self._xxx_values + + def __getattr__(self, name): + value = self._xxx_field_to_index.get(name) + if value is None: + raise AttributeError('no row field {!r}'.format(name)) + return self._xxx_values[value] + + def __len__(self): + return len(self._xxx_values) + + def __getitem__(self, key): + if isinstance(key, six.string_types): + value = self._xxx_field_to_index.get(key) + if value is None: + raise KeyError('no row field {!r}'.format(key)) + key = value + return self._xxx_values[key] + + def __eq__(self, other): + if not isinstance(other, Row): + return NotImplemented + return( + self._xxx_values == other._xxx_values and + self._xxx_field_to_index == other._xxx_field_to_index) + + def __ne__(self, other): + return not self == other + + def __repr__(self): + # sort field dict by value, for determinism + items = sorted(self._xxx_field_to_index.items(), + key=operator.itemgetter(1)) + f2i = '{' + ', '.join('%r: %d' % item for item in items) + '}' + return 'Row({}, {})'.format(self._xxx_values, f2i) diff --git a/packages/google-cloud-bigquery/tests/unit/test__helpers.py b/packages/google-cloud-bigquery/tests/unit/test__helpers.py index 56ceb78ea9ae..9039ea120b06 100644 --- a/packages/google-cloud-bigquery/tests/unit/test__helpers.py +++ b/packages/google-cloud-bigquery/tests/unit/test__helpers.py @@ -466,25 +466,6 @@ def test_w_array_of_struct_w_array(self): {u'first': [5, 6], u'second': 7}, ],)) - def test_row(self): - from google.cloud.bigquery._helpers import Row - - VALUES = (1, 2, 3) - r = Row(VALUES, {'a': 0, 'b': 1, 'c': 2}) - self.assertEqual(r.a, 1) - self.assertEqual(r[1], 2) - self.assertEqual(r['c'], 3) - self.assertEqual(len(r), 3) - self.assertEqual(r.values(), VALUES) - self.assertEqual(repr(r), - "Row((1, 2, 3), {'a': 0, 'b': 1, 'c': 2})") - self.assertFalse(r != r) - self.assertFalse(r == 3) - with self.assertRaises(AttributeError): - r.z - with self.assertRaises(KeyError): - r['z'] - class Test_rows_from_json(unittest.TestCase): @@ -494,7 +475,7 @@ def _call_fut(self, rows, schema): return _rows_from_json(rows, schema) def test_w_record_subfield(self): - from google.cloud.bigquery._helpers import Row + from google.cloud.bigquery.table import Row full_name = _Field('REQUIRED', 'full_name', 'STRING') area_code = _Field('REQUIRED', 'area_code', 'STRING') @@ -541,7 +522,7 @@ def test_w_record_subfield(self): self.assertEqual(coerced, expected) def test_w_int64_float64_bool(self): - from google.cloud.bigquery._helpers import Row + from google.cloud.bigquery.table import Row # "Standard" SQL dialect uses 'INT64', 'FLOAT64', 'BOOL'. candidate = _Field('REQUIRED', 'candidate', 'STRING') diff --git a/packages/google-cloud-bigquery/tests/unit/test_client.py b/packages/google-cloud-bigquery/tests/unit/test_client.py index c42e880a0d47..6efecd606766 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_client.py +++ b/packages/google-cloud-bigquery/tests/unit/test_client.py @@ -2182,8 +2182,9 @@ def _row_data(row): self.assertEqual(req['data'], SENT) def test_create_rows_w_list_of_Rows(self): - from google.cloud.bigquery._helpers import Row - from google.cloud.bigquery.table import Table, SchemaField + from google.cloud.bigquery.table import Table + from google.cloud.bigquery.table import SchemaField + from google.cloud.bigquery.table import Row PATH = 'projects/%s/datasets/%s/tables/%s/insertAll' % ( self.PROJECT, self.DS_ID, self.TABLE_ID) @@ -2465,7 +2466,7 @@ def test_create_rows_json(self): def test_query_rows_defaults(self): from google.api_core.page_iterator import HTTPIterator - from google.cloud.bigquery._helpers import Row + from google.cloud.bigquery.table import Row JOB = 'job-id' QUERY = 'SELECT COUNT(*) FROM persons' @@ -2720,8 +2721,9 @@ def test_query_rows_w_api_error(self): def test_list_rows(self): import datetime from google.cloud._helpers import UTC - from google.cloud.bigquery.table import Table, SchemaField - from google.cloud.bigquery._helpers import Row + from google.cloud.bigquery.table import Table + from google.cloud.bigquery.table import SchemaField + from google.cloud.bigquery.table import Row PATH = 'projects/%s/datasets/%s/tables/%s/data' % ( self.PROJECT, self.DS_ID, self.TABLE_ID) diff --git a/packages/google-cloud-bigquery/tests/unit/test_table.py b/packages/google-cloud-bigquery/tests/unit/test_table.py index 1829231328d9..9bdd62c7404e 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_table.py +++ b/packages/google-cloud-bigquery/tests/unit/test_table.py @@ -751,3 +751,25 @@ def test__row_from_mapping_w_schema(self): self.assertEqual( self._call_fut(MAPPING, table.schema), ('Phred Phlyntstone', 32, ['red', 'green'], None)) + + +class TestRow(unittest.TestCase): + + def test_row(self): + from google.cloud.bigquery.table import Row + + VALUES = (1, 2, 3) + row = Row(VALUES, {'a': 0, 'b': 1, 'c': 2}) + self.assertEqual(row.a, 1) + self.assertEqual(row[1], 2) + self.assertEqual(row['c'], 3) + self.assertEqual(len(row), 3) + self.assertEqual(row.values(), VALUES) + self.assertEqual(repr(row), + "Row((1, 2, 3), {'a': 0, 'b': 1, 'c': 2})") + self.assertFalse(row != row) + self.assertFalse(row == 3) + with self.assertRaises(AttributeError): + row.z + with self.assertRaises(KeyError): + row['z'] From 0e3524b0c05d300cfaf7bd497d7dce2f893a7cd1 Mon Sep 17 00:00:00 2001 From: Danny Hermes Date: Tue, 7 Nov 2017 14:17:51 -0800 Subject: [PATCH 0353/2016] Renaming `makeResource` -> `make_resource`. (#4355) Done via: $ git grep -l makeResource | xargs sed -i s/makeResource/make_resource/g --- .../tests/unit/test_dataset.py | 4 +- .../tests/unit/test_job.py | 88 +++++++++---------- .../tests/unit/test_query.py | 44 +++++----- .../tests/unit/test_schema.py | 8 +- .../tests/unit/test_table.py | 4 +- 5 files changed, 74 insertions(+), 74 deletions(-) diff --git a/packages/google-cloud-bigquery/tests/unit/test_dataset.py b/packages/google-cloud-bigquery/tests/unit/test_dataset.py index 1f8580ebbcfd..8a34b82a460d 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_dataset.py +++ b/packages/google-cloud-bigquery/tests/unit/test_dataset.py @@ -206,7 +206,7 @@ def _setUpConstants(self): self.DS_FULL_ID = '%s:%s' % (self.PROJECT, self.DS_ID) self.RESOURCE_URL = 'http://example.com/path/to/resource' - def _makeResource(self): + def _make_resource(self): self._setUpConstants() USER_EMAIL = 'phred@example.com' GROUP_EMAIL = 'group-name@lists.example.com' @@ -422,7 +422,7 @@ def test_from_api_repr_bare(self): self._verify_resource_properties(dataset, RESOURCE) def test_from_api_repr_w_properties(self): - RESOURCE = self._makeResource() + RESOURCE = self._make_resource() klass = self._get_target_class() dataset = klass.from_api_repr(RESOURCE) self._verify_resource_properties(dataset, RESOURCE) diff --git a/packages/google-cloud-bigquery/tests/unit/test_job.py b/packages/google-cloud-bigquery/tests/unit/test_job.py index 4f94a1881a30..95000b24e8ea 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_job.py +++ b/packages/google-cloud-bigquery/tests/unit/test_job.py @@ -114,7 +114,7 @@ def _table_ref(self, table_id): return TableReference(self.DS_REF, table_id) - def _makeResource(self, started=False, ended=False): + def _make_resource(self, started=False, ended=False): self._setUpConstants() resource = { 'configuration': { @@ -219,8 +219,8 @@ def _setUpConstants(self): self.OUTPUT_BYTES = 23456 self.OUTPUT_ROWS = 345 - def _makeResource(self, started=False, ended=False): - resource = super(TestLoadJob, self)._makeResource( + def _make_resource(self, started=False, ended=False): + resource = super(TestLoadJob, self)._make_resource( started, ended) config = resource['configuration']['load'] config['sourceUris'] = [self.SOURCE1] @@ -374,13 +374,13 @@ def test_ctor_w_config(self): def test_done(self): client = _make_client(project=self.PROJECT) - resource = self._makeResource(ended=True) + resource = self._make_resource(ended=True) job = self._get_target_class().from_api_repr(resource, client) self.assertTrue(job.done()) def test_result(self): client = _make_client(project=self.PROJECT) - resource = self._makeResource(ended=True) + resource = self._make_resource(ended=True) job = self._get_target_class().from_api_repr(resource, client) result = job.result() @@ -388,7 +388,7 @@ def test_result(self): self.assertIs(result, job) def test_result_invokes_begin(self): - begun_resource = self._makeResource() + begun_resource = self._make_resource() done_resource = copy.deepcopy(begun_resource) done_resource['status'] = {'state': 'DONE'} connection = _Connection(begun_resource, done_resource) @@ -537,7 +537,7 @@ def test_from_api_repr_bare(self): def test_from_api_repr_w_properties(self): client = _make_client(project=self.PROJECT) - RESOURCE = self._makeResource() + RESOURCE = self._make_resource() load_config = RESOURCE['configuration']['load'] load_config['createDisposition'] = 'CREATE_IF_NEEDED' klass = self._get_target_class() @@ -557,7 +557,7 @@ def test_begin_w_already_running(self): def test_begin_w_bound_client(self): PATH = '/projects/%s/jobs' % (self.PROJECT,) - RESOURCE = self._makeResource() + RESOURCE = self._make_resource() # Ensure None for missing server-set props del RESOURCE['statistics']['creationTime'] del RESOURCE['etag'] @@ -595,7 +595,7 @@ def test_begin_w_bound_client(self): def test_begin_w_autodetect(self): path = '/projects/{}/jobs'.format(self.PROJECT) - resource = self._makeResource() + resource = self._make_resource() resource['configuration']['load']['autodetect'] = True # Ensure None for missing server-set props del resource['statistics']['creationTime'] @@ -639,7 +639,7 @@ def test_begin_w_alternate_client(self): from google.cloud.bigquery.schema import SchemaField PATH = '/projects/%s/jobs' % (self.PROJECT,) - RESOURCE = self._makeResource(ended=True) + RESOURCE = self._make_resource(ended=True) LOAD_CONFIGURATION = { 'sourceUris': [self.SOURCE1], 'destinationTable': { @@ -743,7 +743,7 @@ def test_exists_hit_w_alternate_client(self): def test_reload_w_bound_client(self): PATH = '/projects/%s/jobs/%s' % (self.PROJECT, self.JOB_ID) - RESOURCE = self._makeResource() + RESOURCE = self._make_resource() conn = _Connection(RESOURCE) client = _make_client(project=self.PROJECT, connection=conn) table = _Table() @@ -759,7 +759,7 @@ def test_reload_w_bound_client(self): def test_reload_w_alternate_client(self): PATH = '/projects/%s/jobs/%s' % (self.PROJECT, self.JOB_ID) - RESOURCE = self._makeResource() + RESOURCE = self._make_resource() conn1 = _Connection() client1 = _make_client(project=self.PROJECT, connection=conn1) conn2 = _Connection(RESOURCE) @@ -778,7 +778,7 @@ def test_reload_w_alternate_client(self): def test_cancel_w_bound_client(self): PATH = '/projects/%s/jobs/%s/cancel' % (self.PROJECT, self.JOB_ID) - RESOURCE = self._makeResource(ended=True) + RESOURCE = self._make_resource(ended=True) RESPONSE = {'job': RESOURCE} conn = _Connection(RESPONSE) client = _make_client(project=self.PROJECT, connection=conn) @@ -795,7 +795,7 @@ def test_cancel_w_bound_client(self): def test_cancel_w_alternate_client(self): PATH = '/projects/%s/jobs/%s/cancel' % (self.PROJECT, self.JOB_ID) - RESOURCE = self._makeResource(ended=True) + RESOURCE = self._make_resource(ended=True) RESPONSE = {'job': RESOURCE} conn1 = _Connection() client1 = _make_client(project=self.PROJECT, connection=conn1) @@ -825,8 +825,8 @@ def _get_target_class(): return CopyJob - def _makeResource(self, started=False, ended=False): - resource = super(TestCopyJob, self)._makeResource( + def _make_resource(self, started=False, ended=False): + resource = super(TestCopyJob, self)._make_resource( started, ended) config = resource['configuration']['copy'] config['sourceTables'] = [{ @@ -997,7 +997,7 @@ def test_from_api_repr_wo_sources(self): def test_from_api_repr_w_properties(self): client = _make_client(project=self.PROJECT) - RESOURCE = self._makeResource() + RESOURCE = self._make_resource() copy_config = RESOURCE['configuration']['copy'] copy_config['createDisposition'] = 'CREATE_IF_NEEDED' klass = self._get_target_class() @@ -1007,7 +1007,7 @@ def test_from_api_repr_w_properties(self): def test_begin_w_bound_client(self): PATH = '/projects/%s/jobs' % (self.PROJECT,) - RESOURCE = self._makeResource() + RESOURCE = self._make_resource() # Ensure None for missing server-set props del RESOURCE['statistics']['creationTime'] del RESOURCE['etag'] @@ -1050,7 +1050,7 @@ def test_begin_w_bound_client(self): def test_begin_w_alternate_client(self): PATH = '/projects/%s/jobs' % (self.PROJECT,) - RESOURCE = self._makeResource(ended=True) + RESOURCE = self._make_resource(ended=True) COPY_CONFIGURATION = { 'sourceTables': [{ 'projectId': self.PROJECT, @@ -1134,7 +1134,7 @@ def test_exists_hit_w_alternate_client(self): def test_reload_w_bound_client(self): PATH = '/projects/%s/jobs/%s' % (self.PROJECT, self.JOB_ID) - RESOURCE = self._makeResource() + RESOURCE = self._make_resource() conn = _Connection(RESOURCE) client = _make_client(project=self.PROJECT, connection=conn) source = self._table_ref(self.SOURCE_TABLE) @@ -1151,7 +1151,7 @@ def test_reload_w_bound_client(self): def test_reload_w_alternate_client(self): PATH = '/projects/%s/jobs/%s' % (self.PROJECT, self.JOB_ID) - RESOURCE = self._makeResource() + RESOURCE = self._make_resource() conn1 = _Connection() client1 = _make_client(project=self.PROJECT, connection=conn1) conn2 = _Connection(RESOURCE) @@ -1181,8 +1181,8 @@ def _get_target_class(): return ExtractJob - def _makeResource(self, started=False, ended=False): - resource = super(TestExtractJob, self)._makeResource( + def _make_resource(self, started=False, ended=False): + resource = super(TestExtractJob, self)._make_resource( started, ended) config = resource['configuration']['extract'] config['sourceTable'] = { @@ -1316,7 +1316,7 @@ def test_from_api_repr_bare(self): def test_from_api_repr_w_properties(self): client = _make_client(project=self.PROJECT) - RESOURCE = self._makeResource() + RESOURCE = self._make_resource() extract_config = RESOURCE['configuration']['extract'] extract_config['compression'] = 'GZIP' klass = self._get_target_class() @@ -1326,7 +1326,7 @@ def test_from_api_repr_w_properties(self): def test_begin_w_bound_client(self): PATH = '/projects/%s/jobs' % (self.PROJECT,) - RESOURCE = self._makeResource() + RESOURCE = self._make_resource() # Ensure None for missing server-set props del RESOURCE['statistics']['creationTime'] del RESOURCE['etag'] @@ -1366,7 +1366,7 @@ def test_begin_w_bound_client(self): def test_begin_w_alternate_client(self): PATH = '/projects/%s/jobs' % (self.PROJECT,) - RESOURCE = self._makeResource(ended=True) + RESOURCE = self._make_resource(ended=True) EXTRACT_CONFIGURATION = { 'sourceTable': { 'projectId': self.PROJECT, @@ -1450,7 +1450,7 @@ def test_exists_hit_w_alternate_client(self): def test_reload_w_bound_client(self): PATH = '/projects/%s/jobs/%s' % (self.PROJECT, self.JOB_ID) - RESOURCE = self._makeResource() + RESOURCE = self._make_resource() conn = _Connection(RESOURCE) client = _make_client(project=self.PROJECT, connection=conn) source_dataset = DatasetReference(self.PROJECT, self.DS_ID) @@ -1468,7 +1468,7 @@ def test_reload_w_bound_client(self): def test_reload_w_alternate_client(self): PATH = '/projects/%s/jobs/%s' % (self.PROJECT, self.JOB_ID) - RESOURCE = self._makeResource() + RESOURCE = self._make_resource() conn1 = _Connection() client1 = _make_client(project=self.PROJECT, connection=conn1) conn2 = _Connection(RESOURCE) @@ -1562,8 +1562,8 @@ def _get_target_class(): return QueryJob - def _makeResource(self, started=False, ended=False): - resource = super(TestQueryJob, self)._makeResource( + def _make_resource(self, started=False, ended=False): + resource = super(TestQueryJob, self)._make_resource( started, ended) config = resource['configuration']['query'] config['query'] = self.QUERY @@ -1788,7 +1788,7 @@ def test_from_api_repr_bare(self): def test_from_api_repr_w_properties(self): client = _make_client(project=self.PROJECT) - RESOURCE = self._makeResource() + RESOURCE = self._make_resource() query_config = RESOURCE['configuration']['query'] query_config['createDisposition'] = 'CREATE_IF_NEEDED' query_config['writeDisposition'] = 'WRITE_TRUNCATE' @@ -1816,7 +1816,7 @@ def test_cancelled(self): def test_done(self): client = _make_client(project=self.PROJECT) - resource = self._makeResource(ended=True) + resource = self._make_resource(ended=True) job = self._get_target_class().from_api_repr(resource, client) self.assertTrue(job.done()) @@ -2137,7 +2137,7 @@ def test_result(self): } connection = _Connection(query_resource, query_resource) client = _make_client(self.PROJECT, connection=connection) - resource = self._makeResource(ended=True) + resource = self._make_resource(ended=True) job = self._get_target_class().from_api_repr(resource, client) result = job.result() @@ -2145,7 +2145,7 @@ def test_result(self): self.assertEqual(list(result), []) def test_result_invokes_begins(self): - begun_resource = self._makeResource() + begun_resource = self._make_resource() incomplete_resource = { 'jobComplete': False, 'jobReference': { @@ -2172,7 +2172,7 @@ def test_result_invokes_begins(self): self.assertEqual(reload_request['method'], 'GET') def test_result_w_timeout(self): - begun_resource = self._makeResource() + begun_resource = self._make_resource() query_resource = { 'jobComplete': True, 'jobReference': { @@ -2229,7 +2229,7 @@ def test_begin_w_bound_client(self): PATH = '/projects/%s/jobs' % (self.PROJECT,) DS_ID = 'DATASET' - RESOURCE = self._makeResource() + RESOURCE = self._make_resource() # Ensure None for missing server-set props del RESOURCE['statistics']['creationTime'] del RESOURCE['etag'] @@ -2277,7 +2277,7 @@ def test_begin_w_alternate_client(self): PATH = '/projects/%s/jobs' % (self.PROJECT,) TABLE = 'TABLE' DS_ID = 'DATASET' - RESOURCE = self._makeResource(ended=True) + RESOURCE = self._make_resource(ended=True) QUERY_CONFIGURATION = { 'query': self.QUERY, 'allowLargeResults': True, @@ -2351,7 +2351,7 @@ def test_begin_w_udf(self): RESOURCE_URI = 'gs://some-bucket/js/lib.js' INLINE_UDF_CODE = 'var someCode = "here";' PATH = '/projects/%s/jobs' % (self.PROJECT,) - RESOURCE = self._makeResource() + RESOURCE = self._make_resource() # Ensure None for missing server-set props del RESOURCE['statistics']['creationTime'] del RESOURCE['etag'] @@ -2405,7 +2405,7 @@ def test_begin_w_named_query_parameter(self): query_parameters = [ScalarQueryParameter('foo', 'INT64', 123)] PATH = '/projects/%s/jobs' % (self.PROJECT,) - RESOURCE = self._makeResource() + RESOURCE = self._make_resource() # Ensure None for missing server-set props del RESOURCE['statistics']['creationTime'] del RESOURCE['etag'] @@ -2461,7 +2461,7 @@ def test_begin_w_positional_query_parameter(self): query_parameters = [ScalarQueryParameter.positional('INT64', 123)] PATH = '/projects/%s/jobs' % (self.PROJECT,) - RESOURCE = self._makeResource() + RESOURCE = self._make_resource() # Ensure None for missing server-set props del RESOURCE['statistics']['creationTime'] del RESOURCE['etag'] @@ -2517,7 +2517,7 @@ def test_begin_w_table_defs(self): from google.cloud.bigquery.external_config import BigtableColumnFamily PATH = '/projects/%s/jobs' % (self.PROJECT,) - RESOURCE = self._makeResource() + RESOURCE = self._make_resource() # Ensure None for missing server-set props del RESOURCE['statistics']['creationTime'] del RESOURCE['etag'] @@ -2601,7 +2601,7 @@ def test_dry_run_query(self): from google.cloud.bigquery.job import QueryJobConfig PATH = '/projects/%s/jobs' % (self.PROJECT,) - RESOURCE = self._makeResource() + RESOURCE = self._make_resource() # Ensure None for missing server-set props del RESOURCE['statistics']['creationTime'] del RESOURCE['etag'] @@ -2675,7 +2675,7 @@ def test_reload_w_bound_client(self): PATH = '/projects/%s/jobs/%s' % (self.PROJECT, self.JOB_ID) DS_ID = 'DATASET' DEST_TABLE = 'dest_table' - RESOURCE = self._makeResource() + RESOURCE = self._make_resource() conn = _Connection(RESOURCE) client = _make_client(project=self.PROJECT, connection=conn) dataset_ref = DatasetReference(self.PROJECT, DS_ID) @@ -2698,7 +2698,7 @@ def test_reload_w_alternate_client(self): PATH = '/projects/%s/jobs/%s' % (self.PROJECT, self.JOB_ID) DS_ID = 'DATASET' DEST_TABLE = 'dest_table' - RESOURCE = self._makeResource() + RESOURCE = self._make_resource() q_config = RESOURCE['configuration']['query'] q_config['destinationTable'] = { 'projectId': self.PROJECT, diff --git a/packages/google-cloud-bigquery/tests/unit/test_query.py b/packages/google-cloud-bigquery/tests/unit/test_query.py index 1924d55c9959..35def936946b 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_query.py +++ b/packages/google-cloud-bigquery/tests/unit/test_query.py @@ -987,7 +987,7 @@ def _get_target_class(): def _make_one(self, *args, **kw): return self._get_target_class()(*args, **kw) - def _makeResource(self): + def _make_resource(self): return { 'jobReference': { 'projectId': self.PROJECT, @@ -1013,7 +1013,7 @@ def _verifySchema(self, query, resource): self.assertEqual(query.schema, ()) def test_ctor_defaults(self): - query = self._make_one(self._makeResource()) + query = self._make_one(self._make_resource()) self.assertIsNone(query.cache_hit) self.assertIsNone(query.complete) self.assertIsNone(query.errors) @@ -1025,34 +1025,34 @@ def test_ctor_defaults(self): self.assertIsNone(query.total_bytes_processed) def test_cache_hit_missing(self): - query = self._make_one(self._makeResource()) + query = self._make_one(self._make_resource()) self.assertIsNone(query.cache_hit) def test_cache_hit_present(self): - resource = self._makeResource() + resource = self._make_resource() resource['cacheHit'] = True query = self._make_one(resource) self.assertTrue(query.cache_hit) def test_complete_missing(self): - query = self._make_one(self._makeResource()) + query = self._make_one(self._make_resource()) self.assertIsNone(query.complete) def test_complete_present(self): - resource = self._makeResource() + resource = self._make_resource() resource['jobComplete'] = True query = self._make_one(resource) self.assertTrue(query.complete) def test_errors_missing(self): - query = self._make_one(self._makeResource()) + query = self._make_one(self._make_resource()) self.assertIsNone(query.errors) def test_errors_present(self): ERRORS = [ {'reason': 'testing'}, ] - resource = self._makeResource() + resource = self._make_resource() resource['errors'] = ERRORS query = self._make_one(resource) self.assertEqual(query.errors, ERRORS) @@ -1067,69 +1067,69 @@ def test_job_id_broken_job_reference(self): self._make_one(resource) def test_job_id_present(self): - resource = self._makeResource() + resource = self._make_resource() resource['jobReference']['jobId'] = 'custom-job' query = self._make_one(resource) self.assertEqual(query.job_id, 'custom-job') def test_page_token_missing(self): - query = self._make_one(self._makeResource()) + query = self._make_one(self._make_resource()) self.assertIsNone(query.page_token) def test_page_token_present(self): - resource = self._makeResource() + resource = self._make_resource() resource['pageToken'] = 'TOKEN' query = self._make_one(resource) self.assertEqual(query.page_token, 'TOKEN') def test_total_rows_present_integer(self): - resource = self._makeResource() + resource = self._make_resource() resource['totalRows'] = 42 query = self._make_one(resource) self.assertEqual(query.total_rows, 42) def test_total_rows_present_string(self): - resource = self._makeResource() + resource = self._make_resource() resource['totalRows'] = '42' query = self._make_one(resource) self.assertEqual(query.total_rows, 42) def test_total_bytes_processed_missing(self): - query = self._make_one(self._makeResource()) + query = self._make_one(self._make_resource()) self.assertIsNone(query.total_bytes_processed) def test_total_bytes_processed_present_integer(self): - resource = self._makeResource() + resource = self._make_resource() resource['totalBytesProcessed'] = 123456 query = self._make_one(resource) self.assertEqual(query.total_bytes_processed, 123456) def test_total_bytes_processed_present_string(self): - resource = self._makeResource() + resource = self._make_resource() resource['totalBytesProcessed'] = '123456' query = self._make_one(resource) self.assertEqual(query.total_bytes_processed, 123456) def test_num_dml_affected_rows_missing(self): - query = self._make_one(self._makeResource()) + query = self._make_one(self._make_resource()) self.assertIsNone(query.num_dml_affected_rows) def test_num_dml_affected_rows_present_integer(self): - resource = self._makeResource() + resource = self._make_resource() resource['numDmlAffectedRows'] = 123456 query = self._make_one(resource) self.assertEqual(query.num_dml_affected_rows, 123456) def test_num_dml_affected_rows_present_string(self): - resource = self._makeResource() + resource = self._make_resource() resource['numDmlAffectedRows'] = '123456' query = self._make_one(resource) self.assertEqual(query.num_dml_affected_rows, 123456) def test_schema(self): - query = self._make_one(self._makeResource()) - self._verifySchema(query, self._makeResource()) - resource = self._makeResource() + query = self._make_one(self._make_resource()) + self._verifySchema(query, self._make_resource()) + resource = self._make_resource() resource['schema'] = { 'fields': [ {'name': 'full_name', 'type': 'STRING', 'mode': 'REQURED'}, diff --git a/packages/google-cloud-bigquery/tests/unit/test_schema.py b/packages/google-cloud-bigquery/tests/unit/test_schema.py index e9c13f75ea3d..a5d5ecacd619 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_schema.py +++ b/packages/google-cloud-bigquery/tests/unit/test_schema.py @@ -261,7 +261,7 @@ def _call_fut(self, resource): return _parse_schema_resource(resource) - def _makeResource(self): + def _make_resource(self): return { 'schema': {'fields': [ {'name': 'full_name', 'type': 'STRING', 'mode': 'REQUIRED'}, @@ -270,12 +270,12 @@ def _makeResource(self): } def test__parse_schema_resource_defaults(self): - RESOURCE = self._makeResource() + RESOURCE = self._make_resource() schema = self._call_fut(RESOURCE['schema']) self._verifySchema(schema, RESOURCE) def test__parse_schema_resource_subfields(self): - RESOURCE = self._makeResource() + RESOURCE = self._make_resource() RESOURCE['schema']['fields'].append( {'name': 'phone', 'type': 'RECORD', @@ -290,7 +290,7 @@ def test__parse_schema_resource_subfields(self): self._verifySchema(schema, RESOURCE) def test__parse_schema_resource_fields_without_mode(self): - RESOURCE = self._makeResource() + RESOURCE = self._make_resource() RESOURCE['schema']['fields'].append( {'name': 'phone', 'type': 'STRING'}) diff --git a/packages/google-cloud-bigquery/tests/unit/test_table.py b/packages/google-cloud-bigquery/tests/unit/test_table.py index 9bdd62c7404e..888974c3cdaf 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_table.py +++ b/packages/google-cloud-bigquery/tests/unit/test_table.py @@ -177,7 +177,7 @@ def _setUpConstants(self): self.NUM_EST_BYTES = 1234 self.NUM_EST_ROWS = 23 - def _makeResource(self): + def _make_resource(self): self._setUpConstants() return { 'creationTime': self.WHEN_TS * 1000, @@ -576,7 +576,7 @@ def test_from_api_repr_w_properties(self): from google.cloud._helpers import UTC from google.cloud._helpers import _millis - RESOURCE = self._makeResource() + RESOURCE = self._make_resource() RESOURCE['view'] = {'query': 'select fullname, age from person_ages'} RESOURCE['type'] = 'VIEW' RESOURCE['location'] = 'EU' From f551ed5551b2b422b257af5ca57b8248303f02d0 Mon Sep 17 00:00:00 2001 From: Alix Hamilton Date: Mon, 13 Nov 2017 14:21:50 -0800 Subject: [PATCH 0354/2016] BigQuery: Can iterate over QueryJob results without explicitly calling result() (#4350) --- .../google/cloud/bigquery/job.py | 3 +++ .../google-cloud-bigquery/tests/system.py | 7 +++++++ .../tests/unit/test_job.py | 19 +++++++++++++++++++ 3 files changed, 29 insertions(+) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/job.py b/packages/google-cloud-bigquery/google/cloud/bigquery/job.py index 0b77ef0e1cc8..91301b1ed8d2 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/job.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/job.py @@ -1949,6 +1949,9 @@ def result(self, timeout=None, retry=DEFAULT_RETRY): return self._client.list_rows(dest_table, selected_fields=schema, retry=retry) + def __iter__(self): + return iter(self.result()) + class QueryPlanEntryStep(object): """Map a single step in a query plan entry. diff --git a/packages/google-cloud-bigquery/tests/system.py b/packages/google-cloud-bigquery/tests/system.py index 7376005a8de1..05faf6fb71ee 100644 --- a/packages/google-cloud-bigquery/tests/system.py +++ b/packages/google-cloud-bigquery/tests/system.py @@ -1235,6 +1235,13 @@ def test_query_future(self): row_tuples = [r.values() for r in iterator] self.assertEqual(row_tuples, [(1,)]) + def test_query_iter(self): + import types + query_job = Config.CLIENT.query('SELECT 1') + self.assertIsInstance(iter(query_job), types.GeneratorType) + row_tuples = [r.values() for r in query_job] + self.assertEqual(row_tuples, [(1,)]) + def test_query_table_def(self): gs_url = self._write_csv_to_storage( 'bq_external_test' + unique_resource_id(), 'person_ages.csv', diff --git a/packages/google-cloud-bigquery/tests/unit/test_job.py b/packages/google-cloud-bigquery/tests/unit/test_job.py index 95000b24e8ea..1bd440c746c1 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_job.py +++ b/packages/google-cloud-bigquery/tests/unit/test_job.py @@ -2720,6 +2720,25 @@ def test_reload_w_alternate_client(self): self.assertEqual(req['path'], PATH) self._verifyResourceProperties(job, RESOURCE) + def test_iter(self): + import types + + begun_resource = self._makeResource() + query_resource = { + 'jobComplete': True, + 'jobReference': { + 'projectId': self.PROJECT, + 'jobId': self.JOB_ID, + }, + } + done_resource = copy.deepcopy(begun_resource) + done_resource['status'] = {'state': 'DONE'} + connection = _Connection(begun_resource, query_resource, done_resource) + client = _make_client(project=self.PROJECT, connection=connection) + job = self._make_one(self.JOB_ID, self.QUERY, client) + + self.assertIsInstance(iter(job), types.GeneratorType) + class TestQueryPlanEntryStep(unittest.TestCase, _Base): KIND = 'KIND' From d63a3587554480c3688d3ad0b619aa77fd805d8f Mon Sep 17 00:00:00 2001 From: Danny Hermes Date: Mon, 13 Nov 2017 14:36:34 -0800 Subject: [PATCH 0355/2016] Post #4350 cleanup. (#4389) This happened because #4350 was sent before #4355 was merged. --- packages/google-cloud-bigquery/tests/unit/test_job.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/tests/unit/test_job.py b/packages/google-cloud-bigquery/tests/unit/test_job.py index 1bd440c746c1..470835533181 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_job.py +++ b/packages/google-cloud-bigquery/tests/unit/test_job.py @@ -2723,7 +2723,7 @@ def test_reload_w_alternate_client(self): def test_iter(self): import types - begun_resource = self._makeResource() + begun_resource = self._make_resource() query_resource = { 'jobComplete': True, 'jobReference': { From a9e66dc1bb0f2459c6b4ce8dda458c6b89f1eb43 Mon Sep 17 00:00:00 2001 From: Josef Barta Date: Fri, 17 Nov 2017 16:49:47 +0000 Subject: [PATCH 0356/2016] BigQuery: added methods for getting keys, items and dict (#4393) * added methods for getting keys, items and dict This change enables to retrieve the row as a dict and iterate the keys and/or items, like with a normal dict; in other words, making the Row object a dict-like object: >>> row.dict() {'name': 'Isabel', 'profession': 'bridge builder'} >>> for k, v in row.items(): >>> for k in row.keys(): * fixed row length * removed whitespace from blank lines * removed trailing whitespace * responding to failed cover check * added docstrings * update unit tests * Update test_table.py * .keys() and .items() into generators * get method for Row * get method for Row * get method for Row * Update table.py * Update test_table.py * Update test_table.py --- .../google/cloud/bigquery/table.py | 41 +++++++++++++++++++ .../tests/unit/test_table.py | 7 ++++ 2 files changed, 48 insertions(+) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py index 191eba2f20ce..ce68e18ef735 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py @@ -785,6 +785,47 @@ def __init__(self, values, field_to_index): def values(self): return self._xxx_values + def keys(self): + """ + Return keys as of a dict: + >>> Row(('a', 'b'), {'x': 0, 'y': 1}).keys() + ['x', 'y'] + """ + keys = self._xxx_field_to_index.keys() + return keys + + def items(self): + """ + Return items as of a dict: + >>> Row(('a', 'b'), {'x': 0, 'y': 1}).items() + [('x', 'a'), ('y', 'b')] + """ + items = [ + (k, self._xxx_values[i]) + for k, i + in self._xxx_field_to_index.items() + ] + return items + + def get(self, key, default=None): + """ + Return value under specified key + Defaults to None or specified default + if key does not exist: + >>> Row(('a', 'b'), {'x': 0, 'y': 1}).get('x') + 'a' + >>> Row(('a', 'b'), {'x': 0, 'y': 1}).get('z') + None + >>> Row(('a', 'b'), {'x': 0, 'y': 1}).get('z', '') + '' + >>> Row(('a', 'b'), {'x': 0, 'y': 1}).get('z', default = '') + '' + """ + index = self._xxx_field_to_index.get(key) + if index is None: + return default + return self._xxx_values[index] + def __getattr__(self, name): value = self._xxx_field_to_index.get(name) if value is None: diff --git a/packages/google-cloud-bigquery/tests/unit/test_table.py b/packages/google-cloud-bigquery/tests/unit/test_table.py index 888974c3cdaf..49471f436be7 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_table.py +++ b/packages/google-cloud-bigquery/tests/unit/test_table.py @@ -765,6 +765,13 @@ def test_row(self): self.assertEqual(row['c'], 3) self.assertEqual(len(row), 3) self.assertEqual(row.values(), VALUES) + self.assertEqual(set(row.keys()), set({'a': 1, 'b': 2, 'c': 3}.keys())) + self.assertEqual(set(row.items()), + set({'a': 1, 'b': 2, 'c': 3}.items())) + self.assertEqual(row.get('a'), 1) + self.assertEqual(row.get('d'), None) + self.assertEqual(row.get('d', ''), '') + self.assertEqual(row.get('d', default=''), '') self.assertEqual(repr(row), "Row((1, 2, 3), {'a': 0, 'b': 1, 'c': 2})") self.assertFalse(row != row) From 3300db6dfbef387937249a0c2970cfb3d971e94a Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Fri, 17 Nov 2017 10:36:44 -0800 Subject: [PATCH 0357/2016] BigQuery: docs for Row.keys, items, values, and get functions. (#4410) * BigQuery: docs for Row.keys, items, values, and get functions. - Refactors items, keys to return iterators. - Copies values to prevent mutating the original row object. * BQ: change Row.values() into a generator --- .../google/cloud/bigquery/table.py | 89 +++++++++++++------ .../tests/unit/test_table.py | 2 +- 2 files changed, 61 insertions(+), 30 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py index ce68e18ef735..9d61a84ea2b1 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py @@ -16,6 +16,7 @@ from __future__ import absolute_import +import copy import datetime import operator @@ -783,43 +784,73 @@ def __init__(self, values, field_to_index): self._xxx_field_to_index = field_to_index def values(self): - return self._xxx_values + """Return the values included in this row. - def keys(self): + Returns: + Sequence[object]: A sequence of length ``len(row)``. """ - Return keys as of a dict: - >>> Row(('a', 'b'), {'x': 0, 'y': 1}).keys() - ['x', 'y'] + for value in self._xxx_values: + yield copy.deepcopy(value) + + def keys(self): + """Return the keys for using a row as a dict. + + Returns: + Sequence[str]: The keys corresponding to the columns of a row + + Examples: + + >>> list(Row(('a', 'b'), {'x': 0, 'y': 1}).keys()) + ['x', 'y'] """ - keys = self._xxx_field_to_index.keys() - return keys + return six.iterkeys(self._xxx_field_to_index) def items(self): + """Return items as ``(key, value)`` pairs. + + Returns: + Sequence[Tuple[str, object]]: + The ``(key, value)`` pairs representing this row. + + Examples: + + >>> list(Row(('a', 'b'), {'x': 0, 'y': 1}).items()) + [('x', 'a'), ('y', 'b')] """ - Return items as of a dict: - >>> Row(('a', 'b'), {'x': 0, 'y': 1}).items() - [('x', 'a'), ('y', 'b')] - """ - items = [ - (k, self._xxx_values[i]) - for k, i - in self._xxx_field_to_index.items() - ] - return items + for key, index in six.iteritems(self._xxx_field_to_index): + yield (key, copy.deepcopy(self._xxx_values[index])) def get(self, key, default=None): - """ - Return value under specified key - Defaults to None or specified default - if key does not exist: - >>> Row(('a', 'b'), {'x': 0, 'y': 1}).get('x') - 'a' - >>> Row(('a', 'b'), {'x': 0, 'y': 1}).get('z') - None - >>> Row(('a', 'b'), {'x': 0, 'y': 1}).get('z', '') - '' - >>> Row(('a', 'b'), {'x': 0, 'y': 1}).get('z', default = '') - '' + """Return a value for key, with a default value if it does not exist. + + Args: + key (str): The key of the column to access + default (object): + The default value to use if the key does not exist. (Defaults + to :data:`None`.) + + Returns: + object: + The value associated with the provided key, or a default value. + + Examples: + When the key exists, the value associated with it is returned. + + >>> Row(('a', 'b'), {'x': 0, 'y': 1}).get('x') + 'a' + + The default value is ``None`` when the key does not exist. + + >>> Row(('a', 'b'), {'x': 0, 'y': 1}).get('z') + None + + The default value can be overrided with the ``default`` parameter. + + >>> Row(('a', 'b'), {'x': 0, 'y': 1}).get('z', '') + '' + + >>> Row(('a', 'b'), {'x': 0, 'y': 1}).get('z', default = '') + '' """ index = self._xxx_field_to_index.get(key) if index is None: diff --git a/packages/google-cloud-bigquery/tests/unit/test_table.py b/packages/google-cloud-bigquery/tests/unit/test_table.py index 49471f436be7..ffcf0b92ecaf 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_table.py +++ b/packages/google-cloud-bigquery/tests/unit/test_table.py @@ -764,7 +764,7 @@ def test_row(self): self.assertEqual(row[1], 2) self.assertEqual(row['c'], 3) self.assertEqual(len(row), 3) - self.assertEqual(row.values(), VALUES) + self.assertEqual(tuple(row.values()), VALUES) self.assertEqual(set(row.keys()), set({'a': 1, 'b': 2, 'c': 3}.keys())) self.assertEqual(set(row.items()), set({'a': 1, 'b': 2, 'c': 3}.items())) From f8f006c93c417181bfb507e9b8ad18af94224343 Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Fri, 17 Nov 2017 11:16:06 -0800 Subject: [PATCH 0358/2016] Revert "BigQuery: docs for Row.keys, items, values, and get functions. (#4410)" (#4412) This reverts commit dd1b038edc499b41693f97a0b46b6ef6ca2058ec. --- .../google/cloud/bigquery/table.py | 89 ++++++------------- .../tests/unit/test_table.py | 2 +- 2 files changed, 30 insertions(+), 61 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py index 9d61a84ea2b1..ce68e18ef735 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py @@ -16,7 +16,6 @@ from __future__ import absolute_import -import copy import datetime import operator @@ -784,73 +783,43 @@ def __init__(self, values, field_to_index): self._xxx_field_to_index = field_to_index def values(self): - """Return the values included in this row. - - Returns: - Sequence[object]: A sequence of length ``len(row)``. - """ - for value in self._xxx_values: - yield copy.deepcopy(value) + return self._xxx_values def keys(self): - """Return the keys for using a row as a dict. - - Returns: - Sequence[str]: The keys corresponding to the columns of a row - - Examples: - - >>> list(Row(('a', 'b'), {'x': 0, 'y': 1}).keys()) - ['x', 'y'] """ - return six.iterkeys(self._xxx_field_to_index) + Return keys as of a dict: + >>> Row(('a', 'b'), {'x': 0, 'y': 1}).keys() + ['x', 'y'] + """ + keys = self._xxx_field_to_index.keys() + return keys def items(self): - """Return items as ``(key, value)`` pairs. - - Returns: - Sequence[Tuple[str, object]]: - The ``(key, value)`` pairs representing this row. - - Examples: - - >>> list(Row(('a', 'b'), {'x': 0, 'y': 1}).items()) - [('x', 'a'), ('y', 'b')] """ - for key, index in six.iteritems(self._xxx_field_to_index): - yield (key, copy.deepcopy(self._xxx_values[index])) + Return items as of a dict: + >>> Row(('a', 'b'), {'x': 0, 'y': 1}).items() + [('x', 'a'), ('y', 'b')] + """ + items = [ + (k, self._xxx_values[i]) + for k, i + in self._xxx_field_to_index.items() + ] + return items def get(self, key, default=None): - """Return a value for key, with a default value if it does not exist. - - Args: - key (str): The key of the column to access - default (object): - The default value to use if the key does not exist. (Defaults - to :data:`None`.) - - Returns: - object: - The value associated with the provided key, or a default value. - - Examples: - When the key exists, the value associated with it is returned. - - >>> Row(('a', 'b'), {'x': 0, 'y': 1}).get('x') - 'a' - - The default value is ``None`` when the key does not exist. - - >>> Row(('a', 'b'), {'x': 0, 'y': 1}).get('z') - None - - The default value can be overrided with the ``default`` parameter. - - >>> Row(('a', 'b'), {'x': 0, 'y': 1}).get('z', '') - '' - - >>> Row(('a', 'b'), {'x': 0, 'y': 1}).get('z', default = '') - '' + """ + Return value under specified key + Defaults to None or specified default + if key does not exist: + >>> Row(('a', 'b'), {'x': 0, 'y': 1}).get('x') + 'a' + >>> Row(('a', 'b'), {'x': 0, 'y': 1}).get('z') + None + >>> Row(('a', 'b'), {'x': 0, 'y': 1}).get('z', '') + '' + >>> Row(('a', 'b'), {'x': 0, 'y': 1}).get('z', default = '') + '' """ index = self._xxx_field_to_index.get(key) if index is None: diff --git a/packages/google-cloud-bigquery/tests/unit/test_table.py b/packages/google-cloud-bigquery/tests/unit/test_table.py index ffcf0b92ecaf..49471f436be7 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_table.py +++ b/packages/google-cloud-bigquery/tests/unit/test_table.py @@ -764,7 +764,7 @@ def test_row(self): self.assertEqual(row[1], 2) self.assertEqual(row['c'], 3) self.assertEqual(len(row), 3) - self.assertEqual(tuple(row.values()), VALUES) + self.assertEqual(row.values(), VALUES) self.assertEqual(set(row.keys()), set({'a': 1, 'b': 2, 'c': 3}.keys())) self.assertEqual(set(row.items()), set({'a': 1, 'b': 2, 'c': 3}.items())) From 7a415473f755629c1e5c0436f36e63bc78726820 Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Mon, 20 Nov 2017 15:00:50 -0800 Subject: [PATCH 0359/2016] BigQuery: Use TableListItem for table listing. (#4427) * BigQuery: Use TableListItem for table listing. The table list response only includes a subset of all table properties. This commit adds a new type to document explicitly which properties are included, but also make it clear that this object should not be used in place of a full Table object. * Get bigquery.client imports closer to accepted style. * Share property code between table and table list item for view_use_legacy_sql * Clarify TableListItem docs. * Fix link syntax. * Shrink property links. --- .../google/cloud/bigquery/client.py | 29 ++-- .../google/cloud/bigquery/table.py | 164 ++++++++++++++++-- .../tests/unit/test_client.py | 8 +- .../tests/unit/test_table.py | 81 +++++++++ 4 files changed, 248 insertions(+), 34 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py index 18fb64c5e531..c12979e98f30 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py @@ -31,26 +31,28 @@ from google.api_core import page_iterator from google.api_core.exceptions import GoogleAPICallError from google.api_core.exceptions import NotFound - from google.cloud import exceptions from google.cloud.client import ClientWithProject + +from google.cloud.bigquery._helpers import DEFAULT_RETRY +from google.cloud.bigquery._helpers import _SCALAR_VALUE_TO_JSON_ROW +from google.cloud.bigquery._helpers import _field_to_index_mapping +from google.cloud.bigquery._helpers import _item_to_row +from google.cloud.bigquery._helpers import _rows_page_start +from google.cloud.bigquery._helpers import _snake_to_camel_case from google.cloud.bigquery._http import Connection from google.cloud.bigquery.dataset import Dataset from google.cloud.bigquery.dataset import DatasetReference -from google.cloud.bigquery.table import Table, _TABLE_HAS_NO_SCHEMA -from google.cloud.bigquery.table import TableReference -from google.cloud.bigquery.table import _row_from_mapping from google.cloud.bigquery.job import CopyJob from google.cloud.bigquery.job import ExtractJob from google.cloud.bigquery.job import LoadJob from google.cloud.bigquery.job import QueryJob, QueryJobConfig from google.cloud.bigquery.query import QueryResults -from google.cloud.bigquery._helpers import _item_to_row -from google.cloud.bigquery._helpers import _rows_page_start -from google.cloud.bigquery._helpers import _field_to_index_mapping -from google.cloud.bigquery._helpers import _SCALAR_VALUE_TO_JSON_ROW -from google.cloud.bigquery._helpers import DEFAULT_RETRY -from google.cloud.bigquery._helpers import _snake_to_camel_case +from google.cloud.bigquery.table import Table +from google.cloud.bigquery.table import TableListItem +from google.cloud.bigquery.table import TableReference +from google.cloud.bigquery.table import _TABLE_HAS_NO_SCHEMA +from google.cloud.bigquery.table import _row_from_mapping _DEFAULT_CHUNKSIZE = 1048576 # 1024 * 1024 B = 1 MB @@ -405,8 +407,9 @@ def list_dataset_tables(self, dataset, max_results=None, page_token=None, :param retry: (Optional) How to retry the RPC. :rtype: :class:`~google.api_core.page_iterator.Iterator` - :returns: Iterator of :class:`~google.cloud.bigquery.table.Table` - contained within the current dataset. + :returns: + Iterator of :class:`~google.cloud.bigquery.table.TableListItem` + contained within the current dataset. """ if not isinstance(dataset, (Dataset, DatasetReference)): raise TypeError('dataset must be a Dataset or a DatasetReference') @@ -1367,7 +1370,7 @@ def _item_to_table(iterator, resource): :rtype: :class:`~google.cloud.bigquery.table.Table` :returns: The next table in the page. """ - return Table.from_api_repr(resource) + return TableListItem(resource) def _make_job_id(job_id, prefix=None): diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py index ce68e18ef735..eda14a409b3e 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py @@ -34,6 +34,25 @@ _MARKER = object() +def _view_use_legacy_sql_getter(table): + """Specifies whether to execute the view with Legacy or Standard SQL. + + If this table is not a view, None is returned. + + Returns: + bool: True if the view is using legacy SQL, or None if not a view + """ + view = table._properties.get('view') + if view is not None: + # The server-side default for useLegacySql is True. + return view.get('useLegacySql', True) + # In some cases, such as in a table list no view object is present, but the + # resource still represents a view. Use the type as a fallback. + if table.table_type == 'VIEW': + # The server-side default for useLegacySql is True. + return True + + class TableReference(object): """TableReferences are pointers to tables. @@ -531,23 +550,7 @@ def view_query(self): """Delete SQL query defining the table as a view.""" self._properties.pop('view', None) - @property - def view_use_legacy_sql(self): - """Specifies whether to execute the view with Legacy or Standard SQL. - - The default is False for views (use Standard SQL). - If this table is not a view, None is returned. - - :rtype: bool or ``NoneType`` - :returns: The boolean for view.useLegacySql, or None if not a view. - """ - view = self._properties.get('view') - if view is not None: - # useLegacySql is never missing from the view dict if this table - # was created client-side, because the view_query setter populates - # it. So a missing or None can only come from the server, whose - # default is True. - return view.get('useLegacySql', True) + view_use_legacy_sql = property(_view_use_legacy_sql_getter) @view_use_legacy_sql.setter def view_use_legacy_sql(self, value): @@ -713,6 +716,133 @@ def _build_resource(self, filter_fields): return resource +class TableListItem(object): + """A read-only table resource from a list operation. + + For performance reasons, the BigQuery API only includes some of the table + properties when listing tables. Notably, + :attr:`~google.cloud.bigquery.table.Table.schema` and + :attr:`~google.cloud.bigquery.table.Table.num_rows` are missing. + + For a full list of the properties that the BigQuery API returns, see the + `REST documentation for tables.list + `_. + + + Args: + resource (dict): + A table-like resource object from a table list response. + """ + + def __init__(self, resource): + self._properties = resource + + @property + def project(self): + """The project ID of the project this table belongs to. + + Returns: + str: the project ID of the table. + """ + return self._properties.get('tableReference', {}).get('projectId') + + @property + def dataset_id(self): + """The dataset ID of the dataset this table belongs to. + + Returns: + str: the dataset ID of the table. + """ + return self._properties.get('tableReference', {}).get('datasetId') + + @property + def table_id(self): + """The table ID. + + Returns: + str: the table ID. + """ + return self._properties.get('tableReference', {}).get('tableId') + + @property + def reference(self): + """A :class:`~google.cloud.bigquery.table.TableReference` pointing to + this table. + + Returns: + google.cloud.bigquery.table.TableReference: pointer to this table + """ + from google.cloud.bigquery import dataset + + dataset_ref = dataset.DatasetReference(self.project, self.dataset_id) + return TableReference(dataset_ref, self.table_id) + + @property + def labels(self): + """Labels for the table. + + This method always returns a dict. To change a table's labels, + modify the dict, then call ``Client.update_table``. To delete a + label, set its value to ``None`` before updating. + + Returns: + Map[str, str]: A dictionary of the the table's labels + """ + return self._properties.get('labels', {}) + + @property + def full_table_id(self): + """ID for the table, in the form ``project_id:dataset_id:table_id``. + + Returns: + str: The fully-qualified ID of the table + """ + return self._properties.get('id') + + @property + def table_type(self): + """The type of the table. + + Possible values are "TABLE", "VIEW", or "EXTERNAL". + + Returns: + str: The kind of table + """ + return self._properties.get('type') + + @property + def partitioning_type(self): + """Time partitioning of the table. + + Returns: + str: + Type of partitioning if the table is partitioned, None + otherwise. + """ + return self._properties.get('timePartitioning', {}).get('type') + + @property + def partition_expiration(self): + """Expiration time in ms for a partition + + Returns: + int: The time in ms for partition expiration + """ + return int( + self._properties.get('timePartitioning', {}).get('expirationMs')) + + @property + def friendly_name(self): + """Title of the table. + + Returns: + str: The name as set by the user, or None (the default) + """ + return self._properties.get('friendlyName') + + view_use_legacy_sql = property(_view_use_legacy_sql_getter) + + def _row_from_mapping(mapping, schema): """Convert a mapping to a row tuple using the schema. diff --git a/packages/google-cloud-bigquery/tests/unit/test_client.py b/packages/google-cloud-bigquery/tests/unit/test_client.py index 6efecd606766..e6f04382d724 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_client.py +++ b/packages/google-cloud-bigquery/tests/unit/test_client.py @@ -1002,7 +1002,7 @@ def test_list_dataset_tables_empty(self): self.assertEqual(req['path'], '/%s' % PATH) def test_list_dataset_tables_defaults(self): - from google.cloud.bigquery.table import Table + from google.cloud.bigquery.table import TableListItem TABLE_1 = 'table_one' TABLE_2 = 'table_two' @@ -1039,7 +1039,7 @@ def test_list_dataset_tables_defaults(self): self.assertEqual(len(tables), len(DATA['tables'])) for found, expected in zip(tables, DATA['tables']): - self.assertIsInstance(found, Table) + self.assertIsInstance(found, TableListItem) self.assertEqual(found.full_table_id, expected['id']) self.assertEqual(found.table_type, expected['type']) self.assertEqual(token, TOKEN) @@ -1050,7 +1050,7 @@ def test_list_dataset_tables_defaults(self): self.assertEqual(req['path'], '/%s' % PATH) def test_list_dataset_tables_explicit(self): - from google.cloud.bigquery.table import Table + from google.cloud.bigquery.table import TableListItem TABLE_1 = 'table_one' TABLE_2 = 'table_two' @@ -1087,7 +1087,7 @@ def test_list_dataset_tables_explicit(self): self.assertEqual(len(tables), len(DATA['tables'])) for found, expected in zip(tables, DATA['tables']): - self.assertIsInstance(found, Table) + self.assertIsInstance(found, TableListItem) self.assertEqual(found.full_table_id, expected['id']) self.assertEqual(found.table_type, expected['type']) self.assertIsNone(token) diff --git a/packages/google-cloud-bigquery/tests/unit/test_table.py b/packages/google-cloud-bigquery/tests/unit/test_table.py index 49471f436be7..5f6c5d58561c 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_table.py +++ b/packages/google-cloud-bigquery/tests/unit/test_table.py @@ -753,6 +753,87 @@ def test__row_from_mapping_w_schema(self): ('Phred Phlyntstone', 32, ['red', 'green'], None)) +class TestTableListItem(unittest.TestCase): + + @staticmethod + def _get_target_class(): + from google.cloud.bigquery.table import TableListItem + + return TableListItem + + def _make_one(self, *args, **kw): + return self._get_target_class()(*args, **kw) + + def test_ctor(self): + project = 'test-project' + dataset_id = 'test_dataset' + table_id = 'coffee_table' + resource = { + 'kind': 'bigquery#table', + 'id': '{}:{}:{}'.format(project, dataset_id, table_id), + 'tableReference': { + 'projectId': project, + 'datasetId': dataset_id, + 'tableId': table_id, + }, + 'friendlyName': 'Mahogany Coffee Table', + 'type': 'TABLE', + 'timePartitioning': { + 'type': 'DAY', + 'expirationMs': '10000', + }, + 'labels': { + 'some-stuff': 'this-is-a-label', + }, + } + + table = self._make_one(resource) + self.assertEqual(table.project, project) + self.assertEqual(table.dataset_id, dataset_id) + self.assertEqual(table.table_id, table_id) + self.assertEqual( + table.full_table_id, + '{}:{}:{}'.format(project, dataset_id, table_id)) + self.assertEqual(table.reference.project, project) + self.assertEqual(table.reference.dataset_id, dataset_id) + self.assertEqual(table.reference.table_id, table_id) + self.assertEqual(table.friendly_name, 'Mahogany Coffee Table') + self.assertEqual(table.table_type, 'TABLE') + self.assertEqual(table.partitioning_type, 'DAY') + self.assertEqual(table.partition_expiration, 10000) + self.assertEqual(table.labels['some-stuff'], 'this-is-a-label') + self.assertIsNone(table.view_use_legacy_sql) + + def test_ctor_view(self): + project = 'test-project' + dataset_id = 'test_dataset' + table_id = 'just_looking' + resource = { + 'kind': 'bigquery#table', + 'id': '{}:{}:{}'.format(project, dataset_id, table_id), + 'tableReference': { + 'projectId': project, + 'datasetId': dataset_id, + 'tableId': table_id, + }, + 'type': 'VIEW', + } + + table = self._make_one(resource) + self.assertEqual(table.project, project) + self.assertEqual(table.dataset_id, dataset_id) + self.assertEqual(table.table_id, table_id) + self.assertEqual( + table.full_table_id, + '{}:{}:{}'.format(project, dataset_id, table_id)) + self.assertEqual(table.reference.project, project) + self.assertEqual(table.reference.dataset_id, dataset_id) + self.assertEqual(table.reference.table_id, table_id) + self.assertEqual(table.table_type, 'VIEW') + # Server default for useLegacySql is True. + self.assertTrue(table.view_use_legacy_sql) + + class TestRow(unittest.TestCase): def test_row(self): From df8ca9e053d290b2e6b71a9c75233731c077da37 Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Tue, 21 Nov 2017 13:29:28 -0800 Subject: [PATCH 0360/2016] BigQuery: add reference property to Table and Dataset (#4405) * BigQuery: add reference property to Table and Dataset This makes it easier to call `get_table`, `get_dataset`, or other functions that want just a reference, not a full resource. * Use shared reference property between Table and TableListItem. --- .../google/cloud/bigquery/dataset.py | 11 ++++++++ .../google/cloud/bigquery/table.py | 28 +++++++++++-------- .../tests/unit/test_dataset.py | 3 ++ .../tests/unit/test_table.py | 3 ++ 4 files changed, 33 insertions(+), 12 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/dataset.py b/packages/google-cloud-bigquery/google/cloud/bigquery/dataset.py index eb7372876c1f..25e00405e2c8 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/dataset.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/dataset.py @@ -279,6 +279,17 @@ def full_dataset_id(self): """ return self._properties.get('id') + @property + def reference(self): + """A :class:`~google.cloud.bigquery.dataset.DatasetReference` pointing to + this dataset. + + Returns: + google.cloud.bigquery.dataset.DatasetReference: + A pointer to this dataset + """ + return DatasetReference(self.project, self.dataset_id) + @property def etag(self): """ETag for the dataset resource. diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py index eda14a409b3e..1433842ef61a 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py @@ -34,6 +34,19 @@ _MARKER = object() +def _reference_getter(table): + """A :class:`~google.cloud.bigquery.table.TableReference` pointing to + this table. + + Returns: + google.cloud.bigquery.table.TableReference: pointer to this table + """ + from google.cloud.bigquery import dataset + + dataset_ref = dataset.DatasetReference(table.project, table.dataset_id) + return TableReference(dataset_ref, table.table_id) + + def _view_use_legacy_sql_getter(table): """Specifies whether to execute the view with Legacy or Standard SQL. @@ -223,6 +236,8 @@ def table_id(self): """ return self._table_id + reference = property(_reference_getter) + @property def path(self): """URL path for the table's APIs. @@ -764,18 +779,7 @@ def table_id(self): """ return self._properties.get('tableReference', {}).get('tableId') - @property - def reference(self): - """A :class:`~google.cloud.bigquery.table.TableReference` pointing to - this table. - - Returns: - google.cloud.bigquery.table.TableReference: pointer to this table - """ - from google.cloud.bigquery import dataset - - dataset_ref = dataset.DatasetReference(self.project, self.dataset_id) - return TableReference(dataset_ref, self.table_id) + reference = property(_reference_getter) @property def labels(self): diff --git a/packages/google-cloud-bigquery/tests/unit/test_dataset.py b/packages/google-cloud-bigquery/tests/unit/test_dataset.py index 8a34b82a460d..e9e2f0dec813 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_dataset.py +++ b/packages/google-cloud-bigquery/tests/unit/test_dataset.py @@ -245,7 +245,10 @@ def _verify_access_entry(self, access_entries, resource): def _verify_readonly_resource_properties(self, dataset, resource): + self.assertEqual(dataset.project, self.PROJECT) self.assertEqual(dataset.dataset_id, self.DS_ID) + self.assertEqual(dataset.reference.project, self.PROJECT) + self.assertEqual(dataset.reference.dataset_id, self.DS_ID) if 'creationTime' in resource: self.assertEqual(dataset.created, self.WHEN) diff --git a/packages/google-cloud-bigquery/tests/unit/test_table.py b/packages/google-cloud-bigquery/tests/unit/test_table.py index 5f6c5d58561c..752239b7276b 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_table.py +++ b/packages/google-cloud-bigquery/tests/unit/test_table.py @@ -293,6 +293,9 @@ def test_ctor(self): self.assertEqual(table.table_id, self.TABLE_NAME) self.assertEqual(table.project, self.PROJECT) self.assertEqual(table.dataset_id, self.DS_ID) + self.assertEqual(table.reference.table_id, self.TABLE_NAME) + self.assertEqual(table.reference.project, self.PROJECT) + self.assertEqual(table.reference.dataset_id, self.DS_ID) self.assertEqual( table.path, '/projects/%s/datasets/%s/tables/%s' % ( From e0ba6c71040538c9ed73af3265e795711c3154c7 Mon Sep 17 00:00:00 2001 From: Alix Hamilton Date: Tue, 21 Nov 2017 14:14:45 -0800 Subject: [PATCH 0361/2016] BigQuery: removes Client.query_rows() (#4429) --- .../google/cloud/bigquery/client.py | 68 +---- .../google-cloud-bigquery/tests/system.py | 32 +-- .../tests/unit/test_client.py | 255 ------------------ 3 files changed, 19 insertions(+), 336 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py index c12979e98f30..757fe6bfd229 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py @@ -17,7 +17,6 @@ from __future__ import absolute_import import collections -import concurrent.futures import functools import os import uuid @@ -29,8 +28,6 @@ from google.resumable_media.requests import ResumableUpload from google.api_core import page_iterator -from google.api_core.exceptions import GoogleAPICallError -from google.api_core.exceptions import NotFound from google.cloud import exceptions from google.cloud.client import ClientWithProject @@ -1144,67 +1141,6 @@ def create_rows_json(self, table, json_rows, row_ids=None, return errors - def query_rows( - self, query, job_config=None, job_id=None, job_id_prefix=None, - timeout=None, retry=DEFAULT_RETRY): - """Start a query job and wait for the results. - - See - https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query - - :type query: str - :param query: - SQL query to be executed. Defaults to the standard SQL dialect. - Use the ``job_config`` parameter to change dialects. - - :type job_config: :class:`google.cloud.bigquery.job.QueryJobConfig` - :param job_config: (Optional) Extra configuration options for the job. - - :type job_id: str - :param job_id: (Optional) ID to use for the query job. - - :type job_id_prefix: str or ``NoneType`` - :param job_id_prefix: (Optional) the user-provided prefix for a - randomly generated job ID. This parameter will be - ignored if a ``job_id`` is also given. - - :type timeout: float - :param timeout: - (Optional) How long (in seconds) to wait for job to complete - before raising a :class:`concurrent.futures.TimeoutError`. - - :rtype: :class:`~google.api_core.page_iterator.Iterator` - :returns: - Iterator of row data :class:`~google.cloud.bigquery.table.Row`-s. - During each page, the iterator will have the ``total_rows`` - attribute set, which counts the total number of rows **in the - result set** (this is distinct from the total number of rows in - the current page: ``iterator.page.num_items``). - - :raises: - :class:`~google.api_core.exceptions.GoogleAPICallError` if the - job failed or :class:`concurrent.futures.TimeoutError` if the job - did not complete in the given timeout. - - When an exception happens, the query job will be cancelled on a - best-effort basis. - """ - job_id = _make_job_id(job_id, job_id_prefix) - - try: - job = self.query( - query, job_config=job_config, job_id=job_id, retry=retry) - rows_iterator = job.result(timeout=timeout) - except (GoogleAPICallError, concurrent.futures.TimeoutError): - try: - self.cancel_job(job_id) - except NotFound: - # It's OK if couldn't cancel because job never got created. - pass - raise - - return rows_iterator - def list_rows(self, table, selected_fields=None, max_results=None, page_token=None, start_index=None, retry=DEFAULT_RETRY): """List the rows of the table. @@ -1303,12 +1239,12 @@ def list_partitions(self, table, retry=DEFAULT_RETRY): """ config = QueryJobConfig() config.use_legacy_sql = True # required for '$' syntax - rows = self.query_rows( + query_job = self.query( 'SELECT partition_id from [%s:%s.%s$__PARTITIONS_SUMMARY__]' % (table.project, table.dataset_id, table.table_id), job_config=config, retry=retry) - return [row[0] for row in rows] + return [row[0] for row in query_job] # pylint: disable=unused-argument diff --git a/packages/google-cloud-bigquery/tests/system.py b/packages/google-cloud-bigquery/tests/system.py index 05faf6fb71ee..6a26922f3564 100644 --- a/packages/google-cloud-bigquery/tests/system.py +++ b/packages/google-cloud-bigquery/tests/system.py @@ -673,7 +673,7 @@ def test_job_cancel(self): # raise an error, and that the job completed (in the `retry()` # above). - def test_query_rows_w_legacy_sql_types(self): + def test_query_w_legacy_sql_types(self): naive = datetime.datetime(2016, 12, 5, 12, 41, 9) stamp = '%s %s' % (naive.date().isoformat(), naive.time().isoformat()) zoned = naive.replace(tzinfo=UTC) @@ -706,7 +706,7 @@ def test_query_rows_w_legacy_sql_types(self): for example in examples: job_config = bigquery.QueryJobConfig() job_config.use_legacy_sql = True - rows = list(Config.CLIENT.query_rows( + rows = list(Config.CLIENT.query( example['sql'], job_config=job_config)) self.assertEqual(len(rows), 1) self.assertEqual(len(rows[0]), 1) @@ -806,26 +806,28 @@ def _generate_standard_sql_types_examples(self): }, ] - def test_query_rows_w_standard_sql_types(self): + def test_query_w_standard_sql_types(self): examples = self._generate_standard_sql_types_examples() for example in examples: - rows = list(Config.CLIENT.query_rows(example['sql'])) + rows = list(Config.CLIENT.query(example['sql'])) self.assertEqual(len(rows), 1) self.assertEqual(len(rows[0]), 1) self.assertEqual(rows[0][0], example['expected']) - def test_query_rows_w_failed_query(self): + def test_query_w_failed_query(self): from google.api_core.exceptions import BadRequest with self.assertRaises(BadRequest): - Config.CLIENT.query_rows('invalid syntax;') + Config.CLIENT.query('invalid syntax;').result() + + def test_query_w_timeout(self): + query_job = Config.CLIENT.query( + 'SELECT * FROM `bigquery-public-data.github_repos.commits`;', + job_id_prefix='test_query_w_timeout_') - def test_query_rows_w_timeout(self): with self.assertRaises(concurrent.futures.TimeoutError): - Config.CLIENT.query_rows( - 'SELECT * FROM `bigquery-public-data.github_repos.commits`;', - job_id_prefix='test_query_rows_w_timeout_', - timeout=1) # 1 second is much too short for this query. + # 1 second is much too short for this query. + query_job.result(timeout=1) def test_dbapi_w_standard_sql_types(self): examples = self._generate_standard_sql_types_examples() @@ -1224,9 +1226,9 @@ def test_large_query_w_public_data(self): SQL = 'SELECT * from `{}.{}.{}` LIMIT {}'.format( PUBLIC, DATASET_ID, TABLE_NAME, LIMIT) - iterator = Config.CLIENT.query_rows(SQL) + query_job = Config.CLIENT.query(SQL) - rows = list(iterator) + rows = list(query_job) self.assertEqual(len(rows), LIMIT) def test_query_future(self): @@ -1256,7 +1258,7 @@ def test_query_table_def(self): job_config.table_definitions = {table_id: ec} sql = 'SELECT * FROM %s' % table_id - got_rows = Config.CLIENT.query_rows(sql, job_config=job_config) + got_rows = Config.CLIENT.query(sql, job_config=job_config) row_tuples = [r.values() for r in got_rows] by_age = operator.itemgetter(1) @@ -1280,7 +1282,7 @@ def test_query_external_table(self): sql = 'SELECT * FROM %s.%s' % (dataset_id, table_id) - got_rows = Config.CLIENT.query_rows(sql) + got_rows = Config.CLIENT.query(sql) row_tuples = [r.values() for r in got_rows] by_age = operator.itemgetter(1) diff --git a/packages/google-cloud-bigquery/tests/unit/test_client.py b/packages/google-cloud-bigquery/tests/unit/test_client.py index e6f04382d724..721181e3cc81 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_client.py +++ b/packages/google-cloud-bigquery/tests/unit/test_client.py @@ -12,7 +12,6 @@ # See the License for the specific language governing permissions and # limitations under the License. -import concurrent.futures import copy import email import io @@ -2464,260 +2463,6 @@ def test_create_rows_json(self): self.assertEqual(req['path'], '/%s' % PATH) self.assertEqual(req['data'], SENT) - def test_query_rows_defaults(self): - from google.api_core.page_iterator import HTTPIterator - from google.cloud.bigquery.table import Row - - JOB = 'job-id' - QUERY = 'SELECT COUNT(*) FROM persons' - RESOURCE = { - 'jobReference': { - 'projectId': self.PROJECT, - 'jobId': JOB, - }, - 'configuration': { - 'query': { - 'query': QUERY, - 'destinationTable': { - 'projectId': self.PROJECT, - 'datasetId': '_temp_dataset', - 'tableId': '_temp_table', - }, - }, - }, - 'status': { - 'state': 'DONE', - }, - } - RESULTS_RESOURCE = { - 'jobReference': RESOURCE['jobReference'], - 'jobComplete': True, - 'schema': { - 'fields': [ - {'name': 'field0', 'type': 'INTEGER', 'mode': 'NULLABLE'}, - ] - }, - 'totalRows': '3', - 'pageToken': 'next-page', - } - FIRST_PAGE = copy.deepcopy(RESULTS_RESOURCE) - FIRST_PAGE['rows'] = [ - {'f': [{'v': '1'}]}, - {'f': [{'v': '2'}]}, - ] - LAST_PAGE = copy.deepcopy(RESULTS_RESOURCE) - LAST_PAGE['rows'] = [ - {'f': [{'v': '3'}]}, - ] - del LAST_PAGE['pageToken'] - creds = _make_credentials() - http = object() - client = self._make_one(project=self.PROJECT, credentials=creds, - _http=http) - conn = client._connection = _Connection( - RESOURCE, RESULTS_RESOURCE, FIRST_PAGE, LAST_PAGE) - - rows_iter = client.query_rows(QUERY) - rows = list(rows_iter) - - self.assertEqual(rows, [Row((i,), {'field0': 0}) for i in (1, 2, 3)]) - self.assertIs(rows_iter.client, client) - self.assertIsInstance(rows_iter, HTTPIterator) - self.assertEqual(len(conn._requested), 4) - req = conn._requested[0] - self.assertEqual(req['method'], 'POST') - self.assertEqual(req['path'], '/projects/PROJECT/jobs') - self.assertIsInstance( - req['data']['jobReference']['jobId'], six.string_types) - - def test_query_rows_w_job_id(self): - from google.api_core.page_iterator import HTTPIterator - - JOB = 'job-id' - QUERY = 'SELECT COUNT(*) FROM persons' - RESOURCE = { - 'jobReference': { - 'projectId': self.PROJECT, - 'jobId': JOB, - }, - 'configuration': { - 'query': { - 'query': QUERY, - 'destinationTable': { - 'projectId': self.PROJECT, - 'datasetId': '_temp_dataset', - 'tableId': '_temp_table', - }, - }, - }, - 'status': { - 'state': 'DONE', - }, - } - RESULTS_RESOURCE = { - 'jobReference': RESOURCE['jobReference'], - 'jobComplete': True, - 'schema': { - 'fields': [ - {'name': 'field0', 'type': 'INTEGER', 'mode': 'NULLABLE'}, - ] - }, - 'totalRows': '0', - } - creds = _make_credentials() - http = object() - client = self._make_one(project=self.PROJECT, credentials=creds, - _http=http) - conn = client._connection = _Connection( - RESOURCE, RESULTS_RESOURCE, RESULTS_RESOURCE) - - rows_iter = client.query_rows(QUERY, job_id=JOB) - rows = list(rows_iter) - - self.assertEqual(rows, []) - self.assertIs(rows_iter.client, client) - self.assertIsInstance(rows_iter, HTTPIterator) - self.assertEqual(len(conn._requested), 3) - req = conn._requested[0] - self.assertEqual(req['method'], 'POST') - self.assertEqual(req['path'], '/projects/PROJECT/jobs') - self.assertEqual(req['data']['jobReference']['jobId'], JOB) - - def test_query_rows_w_job_config(self): - from google.cloud.bigquery.job import QueryJobConfig - from google.api_core.page_iterator import HTTPIterator - - JOB = 'job-id' - QUERY = 'SELECT COUNT(*) FROM persons' - RESOURCE = { - 'jobReference': { - 'projectId': self.PROJECT, - 'jobId': JOB, - }, - 'configuration': { - 'query': { - 'query': QUERY, - 'useLegacySql': True, - 'destinationTable': { - 'projectId': self.PROJECT, - 'datasetId': '_temp_dataset', - 'tableId': '_temp_table', - }, - }, - 'dryRun': True, - }, - 'status': { - 'state': 'DONE', - }, - } - RESULTS_RESOURCE = { - 'jobReference': RESOURCE['jobReference'], - 'jobComplete': True, - 'schema': { - 'fields': [ - {'name': 'field0', 'type': 'INTEGER', 'mode': 'NULLABLE'}, - ] - }, - 'totalRows': '0', - } - creds = _make_credentials() - http = object() - client = self._make_one(project=self.PROJECT, credentials=creds, - _http=http) - conn = client._connection = _Connection( - RESOURCE, RESULTS_RESOURCE, RESULTS_RESOURCE) - - job_config = QueryJobConfig() - job_config.use_legacy_sql = True - job_config.dry_run = True - rows_iter = client.query_rows(QUERY, job_id=JOB, job_config=job_config) - - self.assertIsInstance(rows_iter, HTTPIterator) - self.assertEqual(len(conn._requested), 2) - req = conn._requested[0] - configuration = req['data']['configuration'] - self.assertEqual(req['method'], 'POST') - self.assertEqual(req['path'], '/projects/PROJECT/jobs') - self.assertEqual(req['data']['jobReference']['jobId'], JOB) - self.assertEqual(configuration['query']['useLegacySql'], True) - self.assertEqual(configuration['dryRun'], True) - - def test_query_rows_w_timeout_error(self): - JOB = 'job-id' - QUERY = 'SELECT COUNT(*) FROM persons' - RESOURCE = { - 'jobReference': { - 'projectId': self.PROJECT, - 'jobId': JOB, - }, - 'configuration': { - 'query': { - 'query': QUERY, - }, - }, - 'status': { - 'state': 'RUNNING', - }, - } - CANCEL_RESOURCE = {'job': RESOURCE} - creds = _make_credentials() - http = object() - client = self._make_one( - project=self.PROJECT, credentials=creds, _http=http) - conn = client._connection = _Connection(RESOURCE, CANCEL_RESOURCE) - - with mock.patch( - 'google.cloud.bigquery.job.QueryJob.result') as mock_result: - mock_result.side_effect = concurrent.futures.TimeoutError( - 'time is up') - - with self.assertRaises(concurrent.futures.TimeoutError): - client.query_rows( - QUERY, - job_id_prefix='test_query_rows_w_timeout_', - timeout=1) - - # Should attempt to create and cancel the job. - self.assertEqual(len(conn._requested), 2) - req = conn._requested[0] - self.assertEqual(req['method'], 'POST') - self.assertEqual(req['path'], '/projects/PROJECT/jobs') - cancelreq = conn._requested[1] - self.assertEqual(cancelreq['method'], 'POST') - self.assertIn( - '/projects/PROJECT/jobs/test_query_rows_w_timeout_', - cancelreq['path']) - self.assertIn('/cancel', cancelreq['path']) - - def test_query_rows_w_api_error(self): - from google.api_core.exceptions import NotFound - - QUERY = 'SELECT COUNT(*) FROM persons' - creds = _make_credentials() - http = object() - client = self._make_one( - project=self.PROJECT, credentials=creds, _http=http) - conn = client._connection = _Connection() - - # Expect a 404 error since we didn't supply a job resource. - with self.assertRaises(NotFound): - client.query_rows( - QUERY, - job_id_prefix='test_query_rows_w_error_', - timeout=1) - - # Should attempt to create and cancel the job. - self.assertEqual(len(conn._requested), 2) - req = conn._requested[0] - self.assertEqual(req['method'], 'POST') - self.assertEqual(req['path'], '/projects/PROJECT/jobs') - cancelreq = conn._requested[1] - self.assertEqual(cancelreq['method'], 'POST') - self.assertIn( - '/projects/PROJECT/jobs/test_query_rows_w_error_', - cancelreq['path']) - self.assertIn('/cancel', cancelreq['path']) - def test_list_rows(self): import datetime from google.cloud._helpers import UTC From 9241e2a6037def06f12958e82af7e07150f6119d Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Wed, 22 Nov 2017 09:23:42 -0800 Subject: [PATCH 0362/2016] BigQuery: docs for Row.keys, items, values, and get functions. (#4413) - Refactors items, keys to return iterators. - Copies values to prevent mutating the original row object. --- .../google/cloud/bigquery/table.py | 88 +++++++++++++------ 1 file changed, 59 insertions(+), 29 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py index 1433842ef61a..79928230f08e 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py @@ -16,6 +16,7 @@ from __future__ import absolute_import +import copy import datetime import operator @@ -917,43 +918,72 @@ def __init__(self, values, field_to_index): self._xxx_field_to_index = field_to_index def values(self): - return self._xxx_values + """Return the values included in this row. - def keys(self): + Returns: + Sequence[object]: A sequence of length ``len(row)``. """ - Return keys as of a dict: - >>> Row(('a', 'b'), {'x': 0, 'y': 1}).keys() - ['x', 'y'] + return copy.deepcopy(self._xxx_values) + + def keys(self): + """Return the keys for using a row as a dict. + + Returns: + Sequence[str]: The keys corresponding to the columns of a row + + Examples: + + >>> list(Row(('a', 'b'), {'x': 0, 'y': 1}).keys()) + ['x', 'y'] """ - keys = self._xxx_field_to_index.keys() - return keys + return six.iterkeys(self._xxx_field_to_index) def items(self): + """Return items as ``(key, value)`` pairs. + + Returns: + Sequence[Tuple[str, object]]: + The ``(key, value)`` pairs representing this row. + + Examples: + + >>> list(Row(('a', 'b'), {'x': 0, 'y': 1}).items()) + [('x', 'a'), ('y', 'b')] """ - Return items as of a dict: - >>> Row(('a', 'b'), {'x': 0, 'y': 1}).items() - [('x', 'a'), ('y', 'b')] - """ - items = [ - (k, self._xxx_values[i]) - for k, i - in self._xxx_field_to_index.items() - ] - return items + for key, index in six.iteritems(self._xxx_field_to_index): + yield (key, copy.deepcopy(self._xxx_values[index])) def get(self, key, default=None): - """ - Return value under specified key - Defaults to None or specified default - if key does not exist: - >>> Row(('a', 'b'), {'x': 0, 'y': 1}).get('x') - 'a' - >>> Row(('a', 'b'), {'x': 0, 'y': 1}).get('z') - None - >>> Row(('a', 'b'), {'x': 0, 'y': 1}).get('z', '') - '' - >>> Row(('a', 'b'), {'x': 0, 'y': 1}).get('z', default = '') - '' + """Return a value for key, with a default value if it does not exist. + + Args: + key (str): The key of the column to access + default (object): + The default value to use if the key does not exist. (Defaults + to :data:`None`.) + + Returns: + object: + The value associated with the provided key, or a default value. + + Examples: + When the key exists, the value associated with it is returned. + + >>> Row(('a', 'b'), {'x': 0, 'y': 1}).get('x') + 'a' + + The default value is ``None`` when the key does not exist. + + >>> Row(('a', 'b'), {'x': 0, 'y': 1}).get('z') + None + + The default value can be overrided with the ``default`` parameter. + + >>> Row(('a', 'b'), {'x': 0, 'y': 1}).get('z', '') + '' + + >>> Row(('a', 'b'), {'x': 0, 'y': 1}).get('z', default = '') + '' """ index = self._xxx_field_to_index.get(key) if index is None: From 371dc92350ac794756555b3d21b170a8731b972e Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Wed, 29 Nov 2017 15:44:58 -0800 Subject: [PATCH 0363/2016] BigQuery: Rename `properties` arg to `fields` in `update_table` (#4453) * Rename properties arg to fields This makes `update_table` consistent with `update_dataset`. Also, documents the arguments to `update_table`. --- .../google/cloud/bigquery/client.py | 41 +++++++++++-------- 1 file changed, 25 insertions(+), 16 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py index 757fe6bfd229..6f62291e550f 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py @@ -313,7 +313,7 @@ def update_dataset(self, dataset, fields, retry=DEFAULT_RETRY): If ``dataset.etag`` is not ``None``, the update will only succeed if the dataset on the server has the same ETag. Thus reading a dataset with ``get_dataset``, changing its fields, - and then passing it ``update_dataset`` will ensure that the changes + and then passing it to ``update_dataset`` will ensure that the changes will only be saved if no modifications to the dataset occurred since the read. @@ -353,23 +353,32 @@ def update_dataset(self, dataset, fields, retry=DEFAULT_RETRY): retry, method='PATCH', path=path, data=partial, headers=headers) return Dataset.from_api_repr(api_response) - def update_table(self, table, properties, retry=DEFAULT_RETRY): - """API call: update table properties via a PUT request + def update_table(self, table, fields, retry=DEFAULT_RETRY): + """Change some fields of a table. - See - https://cloud.google.com/bigquery/docs/reference/rest/v2/tables/update - - :type table: - :class:`google.cloud.bigquery.table.Table` - :param table_ref: the table to update. - - :type retry: :class:`google.api_core.retry.Retry` - :param retry: (Optional) How to retry the RPC. - - :rtype: :class:`google.cloud.bigquery.table.Table` - :returns: a ``Table`` instance + Use ``fields`` to specify which fields to update. At least one field + must be provided. If a field is listed in ``fields`` and is ``None`` + in ``table``, it will be deleted. + + If ``table.etag`` is not ``None``, the update will only succeed if + the table on the server has the same ETag. Thus reading a table with + ``get_table``, changing its fields, and then passing it to + ``update_table`` will ensure that the changes will only be saved if + no modifications to the table occurred since the read. + + Args: + table (google.cloud.bigquery.table.Table): The table to update. + fields (Sequence[str]): + The fields of ``table`` to change, spelled as the Table + properties (e.g. "friendly_name"). + retry (google.api_core.retry.Retry): + (Optional) A description of how to retry the API call. + + Returns: + google.cloud.bigquery.table.Table: + The table resource returned from the API call. """ - partial = table._build_resource(properties) + partial = table._build_resource(fields) if table.etag is not None: headers = {'If-Match': table.etag} else: From 65cc0dff27eac6bd4b8ac94e394c0003e0a41ed4 Mon Sep 17 00:00:00 2001 From: Alix Hamilton Date: Mon, 4 Dec 2017 14:41:25 -0800 Subject: [PATCH 0364/2016] BigQuery: Add ability to get query results as a Pandas dataframe. (#4354) --- .../google/cloud/bigquery/_helpers.py | 2 +- .../google/cloud/bigquery/client.py | 17 +- .../google/cloud/bigquery/job.py | 15 +- .../google/cloud/bigquery/table.py | 78 +++++++ packages/google-cloud-bigquery/nox.py | 4 +- packages/google-cloud-bigquery/setup.py | 5 + .../google-cloud-bigquery/tests/system.py | 76 ++++++ .../tests/unit/test_job.py | 39 ++++ .../tests/unit/test_table.py | 217 ++++++++++++++++++ 9 files changed, 437 insertions(+), 16 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py b/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py index 41b68db02c39..e535642aa43d 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py @@ -468,7 +468,7 @@ def _rows_page_start(iterator, page, response): total_rows = response.get('totalRows') if total_rows is not None: total_rows = int(total_rows) - iterator.total_rows = total_rows + iterator._total_rows = total_rows # pylint: enable=unused-argument diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py index 6f62291e550f..34fc9114ce92 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py @@ -33,9 +33,6 @@ from google.cloud.bigquery._helpers import DEFAULT_RETRY from google.cloud.bigquery._helpers import _SCALAR_VALUE_TO_JSON_ROW -from google.cloud.bigquery._helpers import _field_to_index_mapping -from google.cloud.bigquery._helpers import _item_to_row -from google.cloud.bigquery._helpers import _rows_page_start from google.cloud.bigquery._helpers import _snake_to_camel_case from google.cloud.bigquery._http import Connection from google.cloud.bigquery.dataset import Dataset @@ -48,6 +45,7 @@ from google.cloud.bigquery.table import Table from google.cloud.bigquery.table import TableListItem from google.cloud.bigquery.table import TableReference +from google.cloud.bigquery.table import RowIterator from google.cloud.bigquery.table import _TABLE_HAS_NO_SCHEMA from google.cloud.bigquery.table import _row_from_mapping @@ -1189,7 +1187,7 @@ def list_rows(self, table, selected_fields=None, max_results=None, :type retry: :class:`google.api_core.retry.Retry` :param retry: (Optional) How to retry the RPC. - :rtype: :class:`~google.api_core.page_iterator.Iterator` + :rtype: :class:`~google.cloud.bigquery.table.RowIterator` :returns: Iterator of row data :class:`~google.cloud.bigquery.table.Row`-s. During each page, the iterator will have the ``total_rows`` attribute @@ -1217,20 +1215,15 @@ def list_rows(self, table, selected_fields=None, max_results=None, if start_index is not None: params['startIndex'] = start_index - iterator = page_iterator.HTTPIterator( + row_iterator = RowIterator( client=self, api_request=functools.partial(self._call_api, retry), path='%s/data' % (table.path,), - item_to_value=_item_to_row, - items_key='rows', + schema=schema, page_token=page_token, - next_token='pageToken', max_results=max_results, - page_start=_rows_page_start, extra_params=params) - iterator.schema = schema - iterator._field_to_index = _field_to_index_mapping(schema) - return iterator + return row_iterator def list_partitions(self, table, retry=DEFAULT_RETRY): """List the partitions in a table. diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/job.py b/packages/google-cloud-bigquery/google/cloud/bigquery/job.py index 91301b1ed8d2..9f66e3ec9ea0 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/job.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/job.py @@ -1929,7 +1929,7 @@ def result(self, timeout=None, retry=DEFAULT_RETRY): :type retry: :class:`google.api_core.retry.Retry` :param retry: (Optional) How to retry the call that retrieves rows. - :rtype: :class:`~google.api_core.page_iterator.Iterator` + :rtype: :class:`~google.cloud.bigquery.table.RowIterator` :returns: Iterator of row data :class:`~google.cloud.bigquery.table.Row`-s. During each page, the iterator will have the ``total_rows`` @@ -1949,6 +1949,19 @@ def result(self, timeout=None, retry=DEFAULT_RETRY): return self._client.list_rows(dest_table, selected_fields=schema, retry=retry) + def to_dataframe(self): + """Return a pandas DataFrame from a QueryJob + + Returns: + A :class:`~pandas.DataFrame` populated with row data and column + headers from the query results. The column headers are derived + from the destination table's schema. + + Raises: + ValueError: If the `pandas` library cannot be imported. + """ + return self.result().to_dataframe() + def __iter__(self): return iter(self.result()) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py index 79928230f08e..d7e4745fa8c8 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py @@ -21,10 +21,19 @@ import operator import six +try: + import pandas +except ImportError: # pragma: NO COVER + pandas = None + +from google.api_core.page_iterator import HTTPIterator from google.cloud._helpers import _datetime_from_microseconds from google.cloud._helpers import _millis_from_datetime +from google.cloud.bigquery._helpers import _item_to_row +from google.cloud.bigquery._helpers import _rows_page_start from google.cloud.bigquery._helpers import _snake_to_camel_case +from google.cloud.bigquery._helpers import _field_to_index_mapping from google.cloud.bigquery.schema import SchemaField from google.cloud.bigquery.schema import _build_schema_resource from google.cloud.bigquery.schema import _parse_schema_resource @@ -1023,3 +1032,72 @@ def __repr__(self): key=operator.itemgetter(1)) f2i = '{' + ', '.join('%r: %d' % item for item in items) + '}' return 'Row({}, {})'.format(self._xxx_values, f2i) + + +class RowIterator(HTTPIterator): + """A class for iterating through HTTP/JSON API row list responses. + + Args: + client (google.cloud.bigquery.Client): The API client. + api_request (Callable[google.cloud._http.JSONConnection.api_request]): + The function to use to make API requests. + path (str): The method path to query for the list of items. + page_token (str): A token identifying a page in a result set to start + fetching results from. + max_results (int): The maximum number of results to fetch. + extra_params (dict): Extra query string parameters for the API call. + + .. autoattribute:: pages + """ + + def __init__(self, client, api_request, path, schema, page_token=None, + max_results=None, extra_params=None): + super(RowIterator, self).__init__( + client, api_request, path, item_to_value=_item_to_row, + items_key='rows', page_token=page_token, max_results=max_results, + extra_params=extra_params, page_start=_rows_page_start, + next_token='pageToken') + self._schema = schema + self._field_to_index = _field_to_index_mapping(schema) + self._total_rows = None + + @property + def schema(self): + """Schema for the table containing the rows + + Returns: + list of :class:`~google.cloud.bigquery.schema.SchemaField`: + fields describing the schema + """ + return list(self._schema) + + @property + def total_rows(self): + """The total number of rows in the table. + + Returns: + int: the row count. + """ + return self._total_rows + + def to_dataframe(self): + """Create a pandas DataFrame from the query results. + + Returns: + A :class:`~pandas.DataFrame` populated with row data and column + headers from the query results. The column headers are derived + from the destination table's schema. + + Raises: + ValueError: If the `pandas` library cannot be imported. + + """ + if pandas is None: + raise ValueError('The pandas library is not installed, please ' + 'install pandas to use the to_dataframe() ' + 'function.') + + column_headers = [field.name for field in self.schema] + rows = [row.values() for row in iter(self)] + + return pandas.DataFrame(rows, columns=column_headers) diff --git a/packages/google-cloud-bigquery/nox.py b/packages/google-cloud-bigquery/nox.py index fa0936ce619e..86de5658a803 100644 --- a/packages/google-cloud-bigquery/nox.py +++ b/packages/google-cloud-bigquery/nox.py @@ -36,7 +36,7 @@ def default(session): """ # Install all test dependencies, then install this package in-place. session.install('mock', 'pytest', 'pytest-cov', *LOCAL_DEPS) - session.install('-e', '.') + session.install('-e', '.[pandas]') # Run py.test against the unit tests. session.run( @@ -89,7 +89,7 @@ def system(session, py): os.path.join('..', 'storage'), os.path.join('..', 'test_utils'), ) - session.install('-e', '.') + session.install('-e', '.[pandas]') # Run py.test against the system tests. session.run( diff --git a/packages/google-cloud-bigquery/setup.py b/packages/google-cloud-bigquery/setup.py index 1dd3a9ff6036..8d108c9c4dec 100644 --- a/packages/google-cloud-bigquery/setup.py +++ b/packages/google-cloud-bigquery/setup.py @@ -58,6 +58,10 @@ 'requests >= 2.18.0', ] +EXTRAS_REQUIREMENTS = { + 'pandas': ['pandas >= 0.17.1'], +} + setup( name='google-cloud-bigquery', version='0.28.1.dev1', @@ -69,5 +73,6 @@ ], packages=find_packages(exclude=('tests*',)), install_requires=REQUIREMENTS, + extras_require=EXTRAS_REQUIREMENTS, **SETUP_BASE ) diff --git a/packages/google-cloud-bigquery/tests/system.py b/packages/google-cloud-bigquery/tests/system.py index 6a26922f3564..629276cf46f2 100644 --- a/packages/google-cloud-bigquery/tests/system.py +++ b/packages/google-cloud-bigquery/tests/system.py @@ -24,6 +24,10 @@ import uuid import six +try: + import pandas +except ImportError: # pragma: NO COVER + pandas = None from google.api_core.exceptions import PreconditionFailed from google.cloud import bigquery @@ -1244,6 +1248,28 @@ def test_query_iter(self): row_tuples = [r.values() for r in query_job] self.assertEqual(row_tuples, [(1,)]) + @unittest.skipIf(pandas is None, 'Requires `pandas`') + def test_query_results_to_dataframe(self): + QUERY = """ + SELECT id, author, time_ts, dead + from `bigquery-public-data.hacker_news.comments` + LIMIT 10 + """ + + df = Config.CLIENT.query(QUERY).result().to_dataframe() + + self.assertIsInstance(df, pandas.DataFrame) + self.assertEqual(len(df), 10) # verify the number of rows + column_names = ['id', 'author', 'time_ts', 'dead'] + self.assertEqual(list(df), column_names) # verify the column names + exp_datatypes = {'id': int, 'author': str, + 'time_ts': pandas.Timestamp, 'dead': bool} + for index, row in df.iterrows(): + for col in column_names: + # all the schema fields are nullable, so None is acceptable + if not row[col] is None: + self.assertIsInstance(row[col], exp_datatypes[col]) + def test_query_table_def(self): gs_url = self._write_csv_to_storage( 'bq_external_test' + unique_resource_id(), 'person_ages.csv', @@ -1419,6 +1445,56 @@ def test_create_table_rows_fetch_nested_schema(self): e_favtime = datetime.datetime(*parts[0:6]) self.assertEqual(found[7], e_favtime) + def _fetch_dataframe(self, query): + return Config.CLIENT.query(query).result().to_dataframe() + + @unittest.skipIf(pandas is None, 'Requires `pandas`') + def test_nested_table_to_dataframe(self): + SF = bigquery.SchemaField + schema = [ + SF('string_col', 'STRING', mode='NULLABLE'), + SF('record_col', 'RECORD', mode='NULLABLE', fields=[ + SF('nested_string', 'STRING', mode='NULLABLE'), + SF('nested_repeated', 'INTEGER', mode='REPEATED'), + SF('nested_record', 'RECORD', mode='NULLABLE', fields=[ + SF('nested_nested_string', 'STRING', mode='NULLABLE'), + ]), + ]), + ] + record = { + 'nested_string': 'another string value', + 'nested_repeated': [0, 1, 2], + 'nested_record': {'nested_nested_string': 'some deep insight'}, + } + to_insert = [ + ('Some value', record) + ] + table_id = 'test_table' + dataset = self.temp_dataset(_make_dataset_id('nested_df')) + table_arg = Table(dataset.table(table_id), schema=schema) + table = retry_403(Config.CLIENT.create_table)(table_arg) + self.to_delete.insert(0, table) + Config.CLIENT.create_rows(table, to_insert) + QUERY = 'SELECT * from `{}.{}.{}`'.format( + Config.CLIENT.project, dataset.dataset_id, table_id) + + retry = RetryResult(_has_rows, max_tries=8) + df = retry(self._fetch_dataframe)(QUERY) + + self.assertIsInstance(df, pandas.DataFrame) + self.assertEqual(len(df), 1) # verify the number of rows + exp_columns = ['string_col', 'record_col'] + self.assertEqual(list(df), exp_columns) # verify the column names + row = df.iloc[0] + # verify the row content + self.assertEqual(row['string_col'], 'Some value') + self.assertEqual(row['record_col'], record) + # verify that nested data can be accessed with indices/keys + self.assertEqual(row['record_col']['nested_repeated'][0], 0) + self.assertEqual( + row['record_col']['nested_record']['nested_nested_string'], + 'some deep insight') + def temp_dataset(self, dataset_id): dataset = retry_403(Config.CLIENT.create_dataset)( Dataset(Config.CLIENT.dataset(dataset_id))) diff --git a/packages/google-cloud-bigquery/tests/unit/test_job.py b/packages/google-cloud-bigquery/tests/unit/test_job.py index 470835533181..2f141a4dc04d 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_job.py +++ b/packages/google-cloud-bigquery/tests/unit/test_job.py @@ -16,6 +16,10 @@ from six.moves import http_client import unittest +try: + import pandas +except ImportError: # pragma: NO COVER + pandas = None from google.cloud.bigquery.job import ExtractJobConfig, CopyJobConfig from google.cloud.bigquery.job import LoadJobConfig @@ -2720,6 +2724,41 @@ def test_reload_w_alternate_client(self): self.assertEqual(req['path'], PATH) self._verifyResourceProperties(job, RESOURCE) + @unittest.skipIf(pandas is None, 'Requires `pandas`') + def test_to_dataframe(self): + begun_resource = self._make_resource() + query_resource = { + 'jobComplete': True, + 'jobReference': { + 'projectId': self.PROJECT, + 'jobId': self.JOB_ID, + }, + 'schema': { + 'fields': [ + {'name': 'name', 'type': 'STRING', 'mode': 'NULLABLE'}, + {'name': 'age', 'type': 'INTEGER', 'mode': 'NULLABLE'}, + ], + }, + 'rows': [ + {'f': [{'v': 'Phred Phlyntstone'}, {'v': '32'}]}, + {'f': [{'v': 'Bharney Rhubble'}, {'v': '33'}]}, + {'f': [{'v': 'Wylma Phlyntstone'}, {'v': '29'}]}, + {'f': [{'v': 'Bhettye Rhubble'}, {'v': '27'}]}, + ], + } + done_resource = copy.deepcopy(begun_resource) + done_resource['status'] = {'state': 'DONE'} + connection = _Connection( + begun_resource, query_resource, done_resource, query_resource) + client = _make_client(project=self.PROJECT, connection=connection) + job = self._make_one(self.JOB_ID, self.QUERY, client) + + df = job.to_dataframe() + + self.assertIsInstance(df, pandas.DataFrame) + self.assertEqual(len(df), 4) # verify the number of rows + self.assertEqual(list(df), ['name', 'age']) # verify the column names + def test_iter(self): import types diff --git a/packages/google-cloud-bigquery/tests/unit/test_table.py b/packages/google-cloud-bigquery/tests/unit/test_table.py index 752239b7276b..dffa815511f1 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_table.py +++ b/packages/google-cloud-bigquery/tests/unit/test_table.py @@ -15,6 +15,11 @@ import unittest import mock +import six +try: + import pandas +except ImportError: # pragma: NO COVER + pandas = None from google.cloud.bigquery.dataset import DatasetReference @@ -864,3 +869,215 @@ def test_row(self): row.z with self.assertRaises(KeyError): row['z'] + + +class TestRowIterator(unittest.TestCase): + + def test_constructor(self): + from google.cloud.bigquery.table import RowIterator + from google.cloud.bigquery._helpers import _item_to_row + from google.cloud.bigquery._helpers import _rows_page_start + + client = mock.sentinel.client + api_request = mock.sentinel.api_request + path = '/foo' + schema = [] + iterator = RowIterator(client, api_request, path, schema) + + self.assertFalse(iterator._started) + self.assertIs(iterator.client, client) + self.assertEqual(iterator.path, path) + self.assertIs(iterator._item_to_value, _item_to_row) + self.assertEqual(iterator._items_key, 'rows') + self.assertIsNone(iterator.max_results) + self.assertEqual(iterator.extra_params, {}) + self.assertEqual(iterator._page_start, _rows_page_start) + # Changing attributes. + self.assertEqual(iterator.page_number, 0) + self.assertIsNone(iterator.next_page_token) + self.assertEqual(iterator.num_results, 0) + + def test_iterate(self): + from google.cloud.bigquery.table import RowIterator + from google.cloud.bigquery.table import SchemaField + + schema = [ + SchemaField('name', 'STRING', mode='REQUIRED'), + SchemaField('age', 'INTEGER', mode='REQUIRED') + ] + rows = [ + {'f': [{'v': 'Phred Phlyntstone'}, {'v': '32'}]}, + {'f': [{'v': 'Bharney Rhubble'}, {'v': '33'}]}, + ] + path = '/foo' + api_request = mock.Mock(return_value={'rows': rows}) + row_iterator = RowIterator( + mock.sentinel.client, api_request, path, schema) + self.assertEqual(row_iterator.num_results, 0) + + rows_iter = iter(row_iterator) + + val1 = six.next(rows_iter) + print(val1) + self.assertEqual(val1.name, 'Phred Phlyntstone') + self.assertEqual(row_iterator.num_results, 1) + + val2 = six.next(rows_iter) + self.assertEqual(val2.name, 'Bharney Rhubble') + self.assertEqual(row_iterator.num_results, 2) + + with self.assertRaises(StopIteration): + six.next(rows_iter) + + api_request.assert_called_once_with( + method='GET', path=path, query_params={}) + + @unittest.skipIf(pandas is None, 'Requires `pandas`') + def test_to_dataframe(self): + from google.cloud.bigquery.table import RowIterator + from google.cloud.bigquery.table import SchemaField + + schema = [ + SchemaField('name', 'STRING', mode='REQUIRED'), + SchemaField('age', 'INTEGER', mode='REQUIRED') + ] + rows = [ + {'f': [{'v': 'Phred Phlyntstone'}, {'v': '32'}]}, + {'f': [{'v': 'Bharney Rhubble'}, {'v': '33'}]}, + {'f': [{'v': 'Wylma Phlyntstone'}, {'v': '29'}]}, + {'f': [{'v': 'Bhettye Rhubble'}, {'v': '27'}]}, + ] + path = '/foo' + api_request = mock.Mock(return_value={'rows': rows}) + row_iterator = RowIterator( + mock.sentinel.client, api_request, path, schema) + + df = row_iterator.to_dataframe() + + self.assertIsInstance(df, pandas.DataFrame) + self.assertEqual(len(df), 4) # verify the number of rows + self.assertEqual(list(df), ['name', 'age']) # verify the column names + self.assertEqual(df.name.dtype.name, 'object') + self.assertEqual(df.age.dtype.name, 'int64') + + @unittest.skipIf(pandas is None, 'Requires `pandas`') + def test_to_dataframe_w_empty_results(self): + from google.cloud.bigquery.table import RowIterator + from google.cloud.bigquery.table import SchemaField + + schema = [ + SchemaField('name', 'STRING', mode='REQUIRED'), + SchemaField('age', 'INTEGER', mode='REQUIRED') + ] + path = '/foo' + api_request = mock.Mock(return_value={'rows': []}) + row_iterator = RowIterator( + mock.sentinel.client, api_request, path, schema) + + df = row_iterator.to_dataframe() + + self.assertIsInstance(df, pandas.DataFrame) + self.assertEqual(len(df), 0) # verify the number of rows + self.assertEqual(list(df), ['name', 'age']) # verify the column names + + @unittest.skipIf(pandas is None, 'Requires `pandas`') + def test_to_dataframe_w_various_types_nullable(self): + import datetime + from google.cloud.bigquery.table import RowIterator + from google.cloud.bigquery.table import SchemaField + + schema = [ + SchemaField('start_timestamp', 'TIMESTAMP'), + SchemaField('seconds', 'INT64'), + SchemaField('miles', 'FLOAT64'), + SchemaField('payment_type', 'STRING'), + SchemaField('complete', 'BOOL'), + SchemaField('date', 'DATE'), + ] + row_data = [ + [None, None, None, None, None, None], + ['1.4338368E9', '420', '1.1', 'Cash', 'true', '1999-12-01'], + ['1.3878117E9', '2580', '17.7', 'Cash', 'false', '1953-06-14'], + ['1.3855653E9', '2280', '4.4', 'Credit', 'true', '1981-11-04'], + ] + rows = [{'f': [{'v': field} for field in row]} for row in row_data] + path = '/foo' + api_request = mock.Mock(return_value={'rows': rows}) + row_iterator = RowIterator( + mock.sentinel.client, api_request, path, schema) + + df = row_iterator.to_dataframe() + + self.assertIsInstance(df, pandas.DataFrame) + self.assertEqual(len(df), 4) # verify the number of rows + exp_columns = [field.name for field in schema] + self.assertEqual(list(df), exp_columns) # verify the column names + + for index, row in df.iterrows(): + if index == 0: + self.assertTrue(row.isnull().all()) + else: + self.assertIsInstance(row.start_timestamp, pandas.Timestamp) + self.assertIsInstance(row.seconds, float) + self.assertIsInstance(row.payment_type, str) + self.assertIsInstance(row.complete, bool) + self.assertIsInstance(row.date, datetime.date) + + @unittest.skipIf(pandas is None, 'Requires `pandas`') + def test_to_dataframe_column_dtypes(self): + from google.cloud.bigquery.table import RowIterator + from google.cloud.bigquery.table import SchemaField + + schema = [ + SchemaField('start_timestamp', 'TIMESTAMP'), + SchemaField('seconds', 'INT64'), + SchemaField('miles', 'FLOAT64'), + SchemaField('payment_type', 'STRING'), + SchemaField('complete', 'BOOL'), + SchemaField('date', 'DATE'), + ] + row_data = [ + ['1.4338368E9', '420', '1.1', 'Cash', 'true', '1999-12-01'], + ['1.3878117E9', '2580', '17.7', 'Cash', 'false', '1953-06-14'], + ['1.3855653E9', '2280', '4.4', 'Credit', 'true', '1981-11-04'], + ] + rows = [{'f': [{'v': field} for field in row]} for row in row_data] + path = '/foo' + api_request = mock.Mock(return_value={'rows': rows}) + row_iterator = RowIterator( + mock.sentinel.client, api_request, path, schema) + + df = row_iterator.to_dataframe() + + self.assertIsInstance(df, pandas.DataFrame) + self.assertEqual(len(df), 3) # verify the number of rows + exp_columns = [field.name for field in schema] + self.assertEqual(list(df), exp_columns) # verify the column names + + self.assertEqual(df.start_timestamp.dtype.name, 'datetime64[ns, UTC]') + self.assertEqual(df.seconds.dtype.name, 'int64') + self.assertEqual(df.miles.dtype.name, 'float64') + self.assertEqual(df.payment_type.dtype.name, 'object') + self.assertEqual(df.complete.dtype.name, 'bool') + self.assertEqual(df.date.dtype.name, 'object') + + @mock.patch('google.cloud.bigquery.table.pandas', new=None) + def test_to_dataframe_error_if_pandas_is_none(self): + from google.cloud.bigquery.table import RowIterator + from google.cloud.bigquery.table import SchemaField + + schema = [ + SchemaField('name', 'STRING', mode='REQUIRED'), + SchemaField('age', 'INTEGER', mode='REQUIRED') + ] + rows = [ + {'f': [{'v': 'Phred Phlyntstone'}, {'v': '32'}]}, + {'f': [{'v': 'Bharney Rhubble'}, {'v': '33'}]}, + ] + path = '/foo' + api_request = mock.Mock(return_value={'rows': rows}) + row_iterator = RowIterator( + mock.sentinel.client, api_request, path, schema) + + with self.assertRaises(ValueError): + row_iterator.to_dataframe() From 64fae8d38943eff96b7948ccfd2f8dd0dcecf200 Mon Sep 17 00:00:00 2001 From: Alix Hamilton Date: Mon, 4 Dec 2017 16:24:48 -0800 Subject: [PATCH 0365/2016] fixes python 2.7 system test error with PR 4354 (#4528) --- packages/google-cloud-bigquery/tests/system.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/tests/system.py b/packages/google-cloud-bigquery/tests/system.py index 629276cf46f2..f6c27e3a8b50 100644 --- a/packages/google-cloud-bigquery/tests/system.py +++ b/packages/google-cloud-bigquery/tests/system.py @@ -1262,7 +1262,7 @@ def test_query_results_to_dataframe(self): self.assertEqual(len(df), 10) # verify the number of rows column_names = ['id', 'author', 'time_ts', 'dead'] self.assertEqual(list(df), column_names) # verify the column names - exp_datatypes = {'id': int, 'author': str, + exp_datatypes = {'id': int, 'author': six.text_type, 'time_ts': pandas.Timestamp, 'dead': bool} for index, row in df.iterrows(): for col in column_names: From d0fe3391853e7b0883918c95e48f0ee571d8c068 Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Tue, 5 Dec 2017 16:08:52 -0800 Subject: [PATCH 0366/2016] BigQuery: Use DatasetListItem for client.list_datasets (#4439) * BigQuery: Use DatasetListItem for client.list_datasets Listing datasets only includes a subset of the properties available on a dataset. The DatasetListItem class is used to explicitly document which features are available and to prevent confusion from trying to use the resulting object in other contexts, like updating. * Fix lint errors. * Make dataset & table reference required, labels optional. * Fix lint error --- .../google/cloud/bigquery/client.py | 11 +- .../google/cloud/bigquery/dataset.py | 108 +++++++++++++++++- .../google/cloud/bigquery/table.py | 36 ++++-- .../tests/unit/test_client.py | 4 +- .../tests/unit/test_dataset.py | 97 ++++++++++++++++ .../tests/unit/test_table.py | 54 +++++++++ 6 files changed, 292 insertions(+), 18 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py index 34fc9114ce92..5d3a70416c22 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py @@ -36,6 +36,7 @@ from google.cloud.bigquery._helpers import _snake_to_camel_case from google.cloud.bigquery._http import Connection from google.cloud.bigquery.dataset import Dataset +from google.cloud.bigquery.dataset import DatasetListItem from google.cloud.bigquery.dataset import DatasetReference from google.cloud.bigquery.job import CopyJob from google.cloud.bigquery.job import ExtractJob @@ -181,8 +182,10 @@ def list_datasets(self, include_all=False, filter=None, max_results=None, :param retry: (Optional) How to retry the RPC. :rtype: :class:`~google.api_core.page_iterator.Iterator` - :returns: Iterator of :class:`~google.cloud.bigquery.dataset.Dataset`. - accessible to the current client. + :returns: + Iterator of + :class:`~google.cloud.bigquery.dataset.DatasetListItem`. + associated with the client's project. """ extra_params = {} if include_all: @@ -1275,10 +1278,10 @@ def _item_to_dataset(iterator, resource): :type resource: dict :param resource: An item to be converted to a dataset. - :rtype: :class:`.Dataset` + :rtype: :class:`.DatasetListItem` :returns: The next dataset in the page. """ - return Dataset.from_api_repr(resource) + return DatasetListItem(resource) def _item_to_job(iterator, resource): diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/dataset.py b/packages/google-cloud-bigquery/google/cloud/bigquery/dataset.py index 25e00405e2c8..c8f588671ab5 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/dataset.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/dataset.py @@ -281,8 +281,7 @@ def full_dataset_id(self): @property def reference(self): - """A :class:`~google.cloud.bigquery.dataset.DatasetReference` pointing to - this dataset. + """A reference to this dataset. Returns: google.cloud.bigquery.dataset.DatasetReference: @@ -420,7 +419,7 @@ def labels(self): :rtype: dict, {str -> str} :returns: A dict of the the dataset's labels. """ - return self._properties['labels'] + return self._properties.get('labels', {}) @labels.setter def labels(self, value): @@ -546,4 +545,105 @@ def table(self, table_id): :rtype: :class:`~google.cloud.bigquery.table.TableReference` :returns: a TableReference for a table in this dataset. """ - return TableReference(self, table_id) + return TableReference(self.reference, table_id) + + +class DatasetListItem(object): + """A read-only dataset resource from a list operation. + + For performance reasons, the BigQuery API only includes some of the + dataset properties when listing datasets. Notably, + :attr:`~google.cloud.bigquery.dataset.Dataset.access_entries` is missing. + + For a full list of the properties that the BigQuery API returns, see the + `REST documentation for datasets.list + `_. + + + Args: + resource (dict): + A dataset-like resource object from a dataset list response. A + ``datasetReference`` property is required. + + Raises: + ValueError: + If ``datasetReference`` or one of its required members is missing + from ``resource``. + """ + + def __init__(self, resource): + if 'datasetReference' not in resource: + raise ValueError('resource must contain a datasetReference value') + if 'projectId' not in resource['datasetReference']: + raise ValueError( + "resource['datasetReference'] must contain a projectId value") + if 'datasetId' not in resource['datasetReference']: + raise ValueError( + "resource['datasetReference'] must contain a datasetId value") + self._properties = resource + + @property + def project(self): + """Project bound to the dataset. + + :rtype: str + :returns: the project. + """ + return self._properties['datasetReference']['projectId'] + + @property + def dataset_id(self): + """Dataset ID. + + :rtype: str + :returns: the dataset ID. + """ + return self._properties['datasetReference']['datasetId'] + + @property + def full_dataset_id(self): + """ID for the dataset resource, in the form "project_id:dataset_id". + + :rtype: str, or ``NoneType`` + :returns: the ID (None until set from the server). + """ + return self._properties.get('id') + + @property + def friendly_name(self): + """Title of the dataset. + + :rtype: str, or ``NoneType`` + :returns: The name as set by the user, or None (the default). + """ + return self._properties.get('friendlyName') + + @property + def labels(self): + """Labels for the dataset. + + :rtype: dict, {str -> str} + :returns: A dict of the the dataset's labels. + """ + return self._properties.get('labels', {}) + + @property + def reference(self): + """A reference to this dataset. + + Returns: + google.cloud.bigquery.dataset.DatasetReference: + A pointer to this dataset + """ + return DatasetReference(self.project, self.dataset_id) + + def table(self, table_id): + """Constructs a TableReference. + + :type table_id: str + :param table_id: the ID of the table. + + :rtype: :class:`~google.cloud.bigquery.table.TableReference` + :returns: a TableReference for a table in this dataset. + """ + return TableReference(self.reference, table_id) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py index d7e4745fa8c8..d240fa6d0910 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py @@ -49,7 +49,7 @@ def _reference_getter(table): this table. Returns: - google.cloud.bigquery.table.TableReference: pointer to this table + google.cloud.bigquery.table.TableReference: pointer to this table. """ from google.cloud.bigquery import dataset @@ -295,7 +295,7 @@ def labels(self): :rtype: dict, {str -> str} :returns: A dict of the the table's labels. """ - return self._properties['labels'] + return self._properties.get('labels', {}) @labels.setter def labels(self, value): @@ -756,10 +756,28 @@ class TableListItem(object): Args: resource (dict): - A table-like resource object from a table list response. + A table-like resource object from a table list response. A + ``tableReference`` property is required. + + Raises: + ValueError: + If ``tableReference`` or one of its required members is missing + from ``resource``. """ def __init__(self, resource): + if 'tableReference' not in resource: + raise ValueError('resource must contain a tableReference value') + if 'projectId' not in resource['tableReference']: + raise ValueError( + "resource['tableReference'] must contain a projectId value") + if 'datasetId' not in resource['tableReference']: + raise ValueError( + "resource['tableReference'] must contain a datasetId value") + if 'tableId' not in resource['tableReference']: + raise ValueError( + "resource['tableReference'] must contain a tableId value") + self._properties = resource @property @@ -769,7 +787,7 @@ def project(self): Returns: str: the project ID of the table. """ - return self._properties.get('tableReference', {}).get('projectId') + return self._properties['tableReference']['projectId'] @property def dataset_id(self): @@ -778,7 +796,7 @@ def dataset_id(self): Returns: str: the dataset ID of the table. """ - return self._properties.get('tableReference', {}).get('datasetId') + return self._properties['tableReference']['datasetId'] @property def table_id(self): @@ -787,7 +805,7 @@ def table_id(self): Returns: str: the table ID. """ - return self._properties.get('tableReference', {}).get('tableId') + return self._properties['tableReference']['tableId'] reference = property(_reference_getter) @@ -842,8 +860,10 @@ def partition_expiration(self): Returns: int: The time in ms for partition expiration """ - return int( - self._properties.get('timePartitioning', {}).get('expirationMs')) + expiration = self._properties.get( + 'timePartitioning', {}).get('expirationMs') + if expiration is not None: + return int(expiration) @property def friendly_name(self): diff --git a/packages/google-cloud-bigquery/tests/unit/test_client.py b/packages/google-cloud-bigquery/tests/unit/test_client.py index 721181e3cc81..f54feacdd93d 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_client.py +++ b/packages/google-cloud-bigquery/tests/unit/test_client.py @@ -183,7 +183,7 @@ def test_list_projects_explicit_response_missing_projects_key(self): {'maxResults': 3, 'pageToken': TOKEN}) def test_list_datasets_defaults(self): - from google.cloud.bigquery.dataset import Dataset + from google.cloud.bigquery.dataset import DatasetListItem DATASET_1 = 'dataset_one' DATASET_2 = 'dataset_two' @@ -215,7 +215,7 @@ def test_list_datasets_defaults(self): self.assertEqual(len(datasets), len(DATA['datasets'])) for found, expected in zip(datasets, DATA['datasets']): - self.assertIsInstance(found, Dataset) + self.assertIsInstance(found, DatasetListItem) self.assertEqual(found.full_dataset_id, expected['id']) self.assertEqual(found.friendly_name, expected['friendlyName']) self.assertEqual(token, TOKEN) diff --git a/packages/google-cloud-bigquery/tests/unit/test_dataset.py b/packages/google-cloud-bigquery/tests/unit/test_dataset.py index e9e2f0dec813..33c38720beac 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_dataset.py +++ b/packages/google-cloud-bigquery/tests/unit/test_dataset.py @@ -404,6 +404,10 @@ def test_labels_setter_bad_value(self): with self.assertRaises(ValueError): dataset.labels = None + def test_labels_getter_missing_value(self): + dataset = self._make_one(self.DS_REF) + self.assertEqual(dataset.labels, {}) + def test_from_api_repr_missing_identity(self): self._setUpConstants() RESOURCE = {} @@ -460,3 +464,96 @@ def test_table(self): self.assertEqual(table.table_id, 'table_id') self.assertEqual(table.dataset_id, self.DS_ID) self.assertEqual(table.project, self.PROJECT) + + +class TestDatasetListItem(unittest.TestCase): + + @staticmethod + def _get_target_class(): + from google.cloud.bigquery.dataset import DatasetListItem + + return DatasetListItem + + def _make_one(self, *args, **kw): + return self._get_target_class()(*args, **kw) + + def test_ctor(self): + project = 'test-project' + dataset_id = 'test_dataset' + resource = { + 'kind': 'bigquery#dataset', + 'id': '{}:{}'.format(project, dataset_id), + 'datasetReference': { + 'projectId': project, + 'datasetId': dataset_id, + }, + 'friendlyName': 'Data of the Test', + 'labels': { + 'some-stuff': 'this-is-a-label', + }, + } + + dataset = self._make_one(resource) + self.assertEqual(dataset.project, project) + self.assertEqual(dataset.dataset_id, dataset_id) + self.assertEqual( + dataset.full_dataset_id, + '{}:{}'.format(project, dataset_id)) + self.assertEqual(dataset.reference.project, project) + self.assertEqual(dataset.reference.dataset_id, dataset_id) + self.assertEqual(dataset.friendly_name, 'Data of the Test') + self.assertEqual(dataset.labels['some-stuff'], 'this-is-a-label') + + def test_ctor_missing_properties(self): + resource = { + 'datasetReference': { + 'projectId': 'testproject', + 'datasetId': 'testdataset', + }, + } + dataset = self._make_one(resource) + self.assertEqual(dataset.project, 'testproject') + self.assertEqual(dataset.dataset_id, 'testdataset') + self.assertIsNone(dataset.full_dataset_id) + self.assertIsNone(dataset.friendly_name) + self.assertEqual(dataset.labels, {}) + + def test_ctor_wo_project(self): + resource = { + 'datasetReference': { + 'datasetId': 'testdataset', + }, + } + with self.assertRaises(ValueError): + self._make_one(resource) + + def test_ctor_wo_dataset(self): + resource = { + 'datasetReference': { + 'projectId': 'testproject', + }, + } + with self.assertRaises(ValueError): + self._make_one(resource) + + def test_ctor_wo_reference(self): + with self.assertRaises(ValueError): + self._make_one({}) + + def test_table(self): + from google.cloud.bigquery.table import TableReference + + project = 'test-project' + dataset_id = 'test_dataset' + resource = { + 'datasetReference': { + 'projectId': project, + 'datasetId': dataset_id, + }, + } + dataset = self._make_one(resource) + table = dataset.table('table_id') + self.assertIsInstance(table, TableReference) + self.assertEqual(table.table_id, 'table_id') + self.assertEqual(table.dataset_id, dataset_id) + self.assertEqual(table.project, project) diff --git a/packages/google-cloud-bigquery/tests/unit/test_table.py b/packages/google-cloud-bigquery/tests/unit/test_table.py index dffa815511f1..a97bbe19c7fe 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_table.py +++ b/packages/google-cloud-bigquery/tests/unit/test_table.py @@ -841,6 +841,60 @@ def test_ctor_view(self): # Server default for useLegacySql is True. self.assertTrue(table.view_use_legacy_sql) + def test_ctor_missing_properties(self): + resource = { + 'tableReference': { + 'projectId': 'testproject', + 'datasetId': 'testdataset', + 'tableId': 'testtable', + }, + } + table = self._make_one(resource) + self.assertEqual(table.project, 'testproject') + self.assertEqual(table.dataset_id, 'testdataset') + self.assertEqual(table.table_id, 'testtable') + self.assertIsNone(table.full_table_id) + self.assertIsNone(table.friendly_name) + self.assertIsNone(table.table_type) + self.assertIsNone(table.partitioning_type) + self.assertIsNone(table.partition_expiration) + self.assertEqual(table.labels, {}) + self.assertIsNone(table.view_use_legacy_sql) + + def test_ctor_wo_project(self): + resource = { + 'tableReference': { + 'datasetId': 'testdataset', + 'tableId': 'testtable', + }, + } + with self.assertRaises(ValueError): + self._make_one(resource) + + def test_ctor_wo_dataset(self): + resource = { + 'tableReference': { + 'projectId': 'testproject', + 'tableId': 'testtable', + }, + } + with self.assertRaises(ValueError): + self._make_one(resource) + + def test_ctor_wo_table(self): + resource = { + 'tableReference': { + 'projectId': 'testproject', + 'datasetId': 'testdataset', + }, + } + with self.assertRaises(ValueError): + self._make_one(resource) + + def test_ctor_wo_reference(self): + with self.assertRaises(ValueError): + self._make_one({}) + class TestRow(unittest.TestCase): From 7115fbb7cf47a843adb8eb0547a8ca9902dfcb69 Mon Sep 17 00:00:00 2001 From: Alix Hamilton Date: Thu, 7 Dec 2017 12:43:13 -0800 Subject: [PATCH 0367/2016] fixes broken URLs and typos per #4548 (#4550) --- .../google/cloud/bigquery/external_config.py | 20 +++++++++---------- .../google/cloud/bigquery/job.py | 7 ++++--- .../tests/unit/test_job.py | 14 ++++++------- 3 files changed, 21 insertions(+), 20 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/external_config.py b/packages/google-cloud-bigquery/google/cloud/bigquery/external_config.py index 8702126311b4..17e78300b349 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/external_config.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/external_config.py @@ -44,22 +44,22 @@ def __init__(self): encoding = _TypedApiResourceProperty( 'encoding', 'encoding', six.string_types) """See - https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query.tableDefinitions.(key).bigtableOptions.columnFamilies.columns.encoding + https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query.tableDefinitions.%28key%29.bigtableOptions.columnFamilies.columns.encoding https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#externalDataConfiguration.bigtableOptions.columnFamilies.columns.encoding """ field_name = _TypedApiResourceProperty( 'field_name', 'fieldName', six.string_types) """See - https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query.tableDefinitions.(key).bigtableOptions.columnFamilies.columns.field_name - https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#externalDataConfiguration.bigtableOptions.columnFamilies.columns.field_name + https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query.tableDefinitions.%28key%29.bigtableOptions.columnFamilies.columns.fieldName + https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#externalDataConfiguration.bigtableOptions.columnFamilies.columns.fieldName """ only_read_latest = _TypedApiResourceProperty( 'only_read_latest', 'onlyReadLatest', bool) """See - https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query.tableDefinitions.(key).bigtableOptions.columnFamilies.columns.only_read_latest - https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#externalDataConfiguration.bigtableOptions.columnFamilies.columns.only_read_latest + https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query.tableDefinitions.%28key%29.bigtableOptions.columnFamilies.columns.onlyReadLatest + https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#externalDataConfiguration.bigtableOptions.columnFamilies.columns.onlyReadLatest """ qualifier_encoded = _TypedApiResourceProperty( @@ -68,20 +68,20 @@ def __init__(self): ``bytes`` (Python 3.x). The module will handle base64 encoding for you. See - https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query.tableDefinitions.(key).bigtableOptions.columnFamilies.columns.qualifier_encoded - https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#externalDataConfiguration.bigtableOptions.columnFamilies.columns.qualifier_encoded + https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query.tableDefinitions.%28key%29.bigtableOptions.columnFamilies.columns.qualifierEncoded + https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#externalDataConfiguration.bigtableOptions.columnFamilies.columns.qualifierEncoded """ qualifier_string = _TypedApiResourceProperty( 'qualifier_string', 'qualifierString', six.string_types) """See - https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query.tableDefinitions.(key).bigtableOptions.columnFamilies.columns.qualifier_string - https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#externalDataConfiguration.bigtableOptions.columnFamilies.columns.qualifier_string + https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query.tableDefinitions.%28key%29.bigtableOptions.columnFamilies.columns.qualifierString + https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#externalDataConfiguration.bigtableOptions.columnFamilies.columns.qualifierString """ type_ = _TypedApiResourceProperty('type_', 'type', six.string_types) """See - https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query.tableDefinitions.(key).bigtableOptions.columnFamilies.columns.type + https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query.tableDefinitions.%28key%29.bigtableOptions.columnFamilies.columns.type https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#externalDataConfiguration.bigtableOptions.columnFamilies.columns.type """ diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/job.py b/packages/google-cloud-bigquery/google/cloud/bigquery/job.py index 9f66e3ec9ea0..243dd32bea25 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/job.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/job.py @@ -1835,11 +1835,11 @@ def referenced_tables(self): return tables @property - def undeclared_query_paramters(self): + def undeclared_query_parameters(self): """Return undeclared query parameters from job statistics, if present. See: - https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#statistics.query.undeclaredQueryParamters + https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#statistics.query.undeclaredQueryParameters :rtype: list of @@ -1850,7 +1850,8 @@ def undeclared_query_paramters(self): not yet completed. """ parameters = [] - undeclared = self._job_statistics().get('undeclaredQueryParamters', ()) + undeclared = self._job_statistics().get( + 'undeclaredQueryParameters', ()) for parameter in undeclared: p_type = parameter['parameterType'] diff --git a/packages/google-cloud-bigquery/tests/unit/test_job.py b/packages/google-cloud-bigquery/tests/unit/test_job.py index 2f141a4dc04d..2436e71e77de 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_job.py +++ b/packages/google-cloud-bigquery/tests/unit/test_job.py @@ -92,7 +92,7 @@ class _Base(object): PROJECT = 'project' SOURCE1 = 'http://example.com/source1.csv' - DS_ID = 'datset_id' + DS_ID = 'dataset_id' DS_REF = DatasetReference(PROJECT, DS_ID) TABLE_ID = 'table_id' TABLE_REF = TableReference(DS_REF, TABLE_ID) @@ -2022,7 +2022,7 @@ def test_referenced_tables(self): self.assertEqual(remote.dataset_id, 'other-dataset') self.assertEqual(remote.project, 'other-project-123') - def test_undeclared_query_paramters(self): + def test_undeclared_query_parameters(self): from google.cloud.bigquery.query import ArrayQueryParameter from google.cloud.bigquery.query import ScalarQueryParameter from google.cloud.bigquery.query import StructQueryParameter @@ -2070,17 +2070,17 @@ def test_undeclared_query_paramters(self): }] client = _make_client(project=self.PROJECT) job = self._make_one(self.JOB_ID, self.QUERY, client) - self.assertEqual(job.undeclared_query_paramters, []) + self.assertEqual(job.undeclared_query_parameters, []) statistics = job._properties['statistics'] = {} - self.assertEqual(job.undeclared_query_paramters, []) + self.assertEqual(job.undeclared_query_parameters, []) query_stats = statistics['query'] = {} - self.assertEqual(job.undeclared_query_paramters, []) + self.assertEqual(job.undeclared_query_parameters, []) - query_stats['undeclaredQueryParamters'] = undeclared + query_stats['undeclaredQueryParameters'] = undeclared - scalar, array, struct = job.undeclared_query_paramters + scalar, array, struct = job.undeclared_query_parameters self.assertIsInstance(scalar, ScalarQueryParameter) self.assertEqual(scalar.name, 'my_scalar') From 89387271d8f1938522d14b05c39e048bd4905664 Mon Sep 17 00:00:00 2001 From: Ernest Landrito Date: Fri, 15 Dec 2017 11:16:58 -0800 Subject: [PATCH 0368/2016] BigQuery Data Transfer (#4585) --- packages/google-cloud-bigquery/tests/unit/test_job.py | 2 +- packages/google-cloud-bigquery/tests/unit/test_table.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/packages/google-cloud-bigquery/tests/unit/test_job.py b/packages/google-cloud-bigquery/tests/unit/test_job.py index 2436e71e77de..e2065429ff57 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_job.py +++ b/packages/google-cloud-bigquery/tests/unit/test_job.py @@ -18,7 +18,7 @@ import unittest try: import pandas -except ImportError: # pragma: NO COVER +except (ImportError, AttributeError): # pragma: NO COVER pandas = None from google.cloud.bigquery.job import ExtractJobConfig, CopyJobConfig diff --git a/packages/google-cloud-bigquery/tests/unit/test_table.py b/packages/google-cloud-bigquery/tests/unit/test_table.py index a97bbe19c7fe..8770256236cb 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_table.py +++ b/packages/google-cloud-bigquery/tests/unit/test_table.py @@ -18,7 +18,7 @@ import six try: import pandas -except ImportError: # pragma: NO COVER +except (ImportError, AttributeError): # pragma: NO COVER pandas = None from google.cloud.bigquery.dataset import DatasetReference From a7dfc8fc66fb236edaf99e6e54e1399965fc9a63 Mon Sep 17 00:00:00 2001 From: Matthew Wesley Date: Mon, 18 Dec 2017 15:04:18 -0800 Subject: [PATCH 0369/2016] Correct BigQuery job parameter annotations. (#4617) --- .../google/cloud/bigquery/job.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/job.py b/packages/google-cloud-bigquery/google/cloud/bigquery/job.py index 243dd32bea25..583538299766 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/job.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/job.py @@ -2014,25 +2014,25 @@ class QueryPlanEntry(object): :param wait_ratio_avg: average wait ratio :type wait_ratio_max: float - :param wait_ratio_avg: maximum wait ratio + :param wait_ratio_max: maximum wait ratio :type read_ratio_avg: float :param read_ratio_avg: average read ratio :type read_ratio_max: float - :param read_ratio_avg: maximum read ratio + :param read_ratio_max: maximum read ratio - :type copute_ratio_avg: float - :param copute_ratio_avg: average copute ratio + :type compute_ratio_avg: float + :param compute_ratio_avg: average compute ratio - :type copute_ratio_max: float - :param copute_ratio_avg: maximum copute ratio + :type compute_ratio_max: float + :param compute_ratio_max: maximum compute ratio :type write_ratio_avg: float :param write_ratio_avg: average write ratio :type write_ratio_max: float - :param write_ratio_avg: maximum write ratio + :param write_ratio_max: maximum write ratio :type records_read: int :param records_read: number of records read From 10a51a508930f3a1e42ae9135afd2638f8d216c3 Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Thu, 21 Dec 2017 15:10:38 -0800 Subject: [PATCH 0370/2016] BQ: remove QueryJob.query_results(), make QueryResults private (#4652) The QueryResults object is not necessary to be used by external developers, but it is still needed internally for the getQueryResults API response. From BigQuery team's GA review of the library. --- .../google/cloud/bigquery/client.py | 8 ++--- .../google/cloud/bigquery/dbapi/cursor.py | 6 ++-- .../google/cloud/bigquery/job.py | 19 +++-------- .../google/cloud/bigquery/query.py | 2 +- .../tests/unit/test_dbapi_cursor.py | 8 ++--- .../tests/unit/test_job.py | 34 ------------------- .../tests/unit/test_query.py | 6 ++-- 7 files changed, 19 insertions(+), 64 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py index 5d3a70416c22..d851d91f5e46 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py @@ -42,7 +42,7 @@ from google.cloud.bigquery.job import ExtractJob from google.cloud.bigquery.job import LoadJob from google.cloud.bigquery.job import QueryJob, QueryJobConfig -from google.cloud.bigquery.query import QueryResults +from google.cloud.bigquery.query import _QueryResults from google.cloud.bigquery.table import Table from google.cloud.bigquery.table import TableListItem from google.cloud.bigquery.table import TableReference @@ -488,8 +488,8 @@ def _get_query_results(self, job_id, retry, project=None, timeout_ms=None): (Optional) number of milliseconds the the API call should wait for the query to complete before the request times out. - :rtype: :class:`google.cloud.bigquery.query.QueryResults` - :returns: a new ``QueryResults`` instance + :rtype: :class:`google.cloud.bigquery.query._QueryResults` + :returns: a new ``_QueryResults`` instance """ extra_params = {'maxResults': 0} @@ -507,7 +507,7 @@ def _get_query_results(self, job_id, retry, project=None, timeout_ms=None): # QueryJob.result()). So we don't need to poll here. resource = self._call_api( retry, method='GET', path=path, query_params=extra_params) - return QueryResults.from_api_repr(resource) + return _QueryResults.from_api_repr(resource) def job_from_resource(self, resource): """Detect correct job type from resource and instantiate. diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/dbapi/cursor.py b/packages/google-cloud-bigquery/google/cloud/bigquery/dbapi/cursor.py index f4dc32e7e101..5df7117d5e8b 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/dbapi/cursor.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/dbapi/cursor.py @@ -86,7 +86,7 @@ def _set_rowcount(self, query_results): of modified rows. :type query_results: - :class:`~google.cloud.bigquery.query.QueryResults` + :class:`~google.cloud.bigquery.query._QueryResults` :param query_results: results of a query """ total_rows = 0 @@ -156,7 +156,7 @@ def execute(self, operation, parameters=None, job_id=None): except google.cloud.exceptions.GoogleCloudError: raise exceptions.DatabaseError(self._query_job.errors) - query_results = self._query_job.query_results() + query_results = self._query_job._query_results self._set_rowcount(query_results) self._set_description(query_results.schema) @@ -193,7 +193,7 @@ def _try_fetch(self, size=None): # TODO(tswast): pass in page size to list_rows based on arraysize rows_iter = client.list_rows( self._query_job.destination, - selected_fields=self._query_job.query_results().schema) + selected_fields=self._query_job._query_results.schema) self._query_data = iter(rows_iter) def fetchone(self): diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/job.py b/packages/google-cloud-bigquery/google/cloud/bigquery/job.py index 583538299766..9090f2b3c189 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/job.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/job.py @@ -1867,20 +1867,6 @@ def undeclared_query_parameters(self): return parameters - def query_results(self, retry=DEFAULT_RETRY): - """Construct a QueryResults instance, bound to this job. - - :type retry: :class:`google.api_core.retry.Retry` - :param retry: (Optional) How to retry the RPC. - - :rtype: :class:`~google.cloud.bigquery.QueryResults` - :returns: results instance - """ - if not self._query_results: - self._query_results = self._client._get_query_results( - self.job_id, retry, project=self.project) - return self._query_results - def done(self, retry=DEFAULT_RETRY): """Refresh the job and checks if it is complete. @@ -1945,7 +1931,10 @@ def result(self, timeout=None, retry=DEFAULT_RETRY): """ super(QueryJob, self).result(timeout=timeout) # Return an iterator instead of returning the job. - schema = self.query_results().schema + if not self._query_results: + self._query_results = self._client._get_query_results( + self.job_id, retry, project=self.project) + schema = self._query_results.schema dest_table = self.destination return self._client.list_rows(dest_table, selected_fields=schema, retry=retry) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/query.py b/packages/google-cloud-bigquery/google/cloud/bigquery/query.py index 1d3009394c96..e3bd5c196bec 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/query.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/query.py @@ -461,7 +461,7 @@ def __repr__(self): return 'StructQueryParameter{}'.format(self._key()) -class QueryResults(object): +class _QueryResults(object): """Results of a query. See: diff --git a/packages/google-cloud-bigquery/tests/unit/test_dbapi_cursor.py b/packages/google-cloud-bigquery/tests/unit/test_dbapi_cursor.py index 542b053fae1a..d0ad5401cf85 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_dbapi_cursor.py +++ b/packages/google-cloud-bigquery/tests/unit/test_dbapi_cursor.py @@ -51,21 +51,21 @@ def _mock_job( mock_job.error_result = None mock_job.state = 'DONE' mock_job.result.return_value = mock_job + mock_job._query_results = self._mock_results( + total_rows=total_rows, schema=schema, + num_dml_affected_rows=num_dml_affected_rows) if num_dml_affected_rows is None: mock_job.statement_type = None # API sends back None for SELECT else: mock_job.statement_type = 'UPDATE' - mock_job.query_results.return_value = self._mock_results( - total_rows=total_rows, schema=schema, - num_dml_affected_rows=num_dml_affected_rows) return mock_job def _mock_results( self, total_rows=0, schema=None, num_dml_affected_rows=None): from google.cloud.bigquery import query - mock_results = mock.create_autospec(query.QueryResults) + mock_results = mock.create_autospec(query._QueryResults) mock_results.schema = schema mock_results.num_dml_affected_rows = num_dml_affected_rows mock_results.total_rows = total_rows diff --git a/packages/google-cloud-bigquery/tests/unit/test_job.py b/packages/google-cloud-bigquery/tests/unit/test_job.py index e2065429ff57..40903a4bf059 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_job.py +++ b/packages/google-cloud-bigquery/tests/unit/test_job.py @@ -2097,40 +2097,6 @@ def test_undeclared_query_parameters(self): self.assertEqual(struct.struct_types, {'count': 'INT64'}) self.assertEqual(struct.struct_values, {'count': 123}) - def test_query_results(self): - from google.cloud.bigquery.query import QueryResults - - query_resource = { - 'jobComplete': True, - 'jobReference': { - 'projectId': self.PROJECT, - 'jobId': self.JOB_ID, - }, - } - connection = _Connection(query_resource) - client = _make_client(self.PROJECT, connection=connection) - job = self._make_one(self.JOB_ID, self.QUERY, client) - results = job.query_results() - self.assertIsInstance(results, QueryResults) - - def test_query_results_w_cached_value(self): - from google.cloud.bigquery.query import QueryResults - - client = _make_client(project=self.PROJECT) - job = self._make_one(self.JOB_ID, self.QUERY, client) - resource = { - 'jobReference': { - 'projectId': self.PROJECT, - 'jobId': self.JOB_ID, - }, - } - query_results = QueryResults(resource) - job._query_results = query_results - - results = job.query_results() - - self.assertIs(results, query_results) - def test_result(self): query_resource = { 'jobComplete': True, diff --git a/packages/google-cloud-bigquery/tests/unit/test_query.py b/packages/google-cloud-bigquery/tests/unit/test_query.py index 35def936946b..bce6d2cd726a 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_query.py +++ b/packages/google-cloud-bigquery/tests/unit/test_query.py @@ -973,16 +973,16 @@ def test___repr__(self): self.assertIn("'field1': 'hello'", got) -class TestQueryResults(unittest.TestCase): +class Test_QueryResults(unittest.TestCase): PROJECT = 'project' JOB_ID = 'test-synchronous-query' TOKEN = 'TOKEN' @staticmethod def _get_target_class(): - from google.cloud.bigquery.query import QueryResults + from google.cloud.bigquery.query import _QueryResults - return QueryResults + return _QueryResults def _make_one(self, *args, **kw): return self._get_target_class()(*args, **kw) From a441233840cc57203780a1773c50369f0bc3a3e9 Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Fri, 22 Dec 2017 14:19:27 -0800 Subject: [PATCH 0371/2016] BigQuery: deprecate list_dataset_tables in favor of list_tables (#4653) * BigQuery: deprecate list_dataset_tables in favor of list_tables * Update changelog for this change and already-merged changes. * Make 0.29 changelog headers consistent with previous versions. --- packages/google-cloud-bigquery/CHANGELOG.md | 33 +++++++++++++++++++ .../google/cloud/bigquery/client.py | 15 +++++++-- .../google-cloud-bigquery/tests/system.py | 6 ++-- 3 files changed, 49 insertions(+), 5 deletions(-) diff --git a/packages/google-cloud-bigquery/CHANGELOG.md b/packages/google-cloud-bigquery/CHANGELOG.md index 04f299c94361..124fff893b72 100644 --- a/packages/google-cloud-bigquery/CHANGELOG.md +++ b/packages/google-cloud-bigquery/CHANGELOG.md @@ -4,6 +4,39 @@ [1]: https://pypi.org/project/google-cloud-bigquery/#history +## 0.29.0 (unreleased) + +## Interface changes / additions + +- Add `to_dataframe()` method to row iterators. When Pandas is installed this + method returns a `DataFrame` containing the query's or table's rows. + ([#4354](https://github.com/GoogleCloudPlatform/google-cloud-python/pull/4354)) +- Iterate over a `QueryJob` to wait for and get the query results. + ([#4350](https://github.com/GoogleCloudPlatform/google-cloud-python/pull/4350)) +- Add `Table.reference` and `Dataset.reference` properties to get the + `TableReference` or `DatasetReference` corresponding to that `Table` or + `Dataset`, respectively. + ([#4405](https://github.com/GoogleCloudPlatform/google-cloud-python/pull/4405)) +- Add `Row.keys()`, `Row.items()`, and `Row.get()`. This makes `Row` act + more like a built-in dictionary. + ([#4393](https://github.com/GoogleCloudPlatform/google-cloud-python/pull/4393), + [#4413](https://github.com/GoogleCloudPlatform/google-cloud-python/pull/4413)) + +## Interface changes / breaking changes + +- Add `Client.list_tables`, deprecate `Client.list_dataset_tables`. + ([#4653](https://github.com/GoogleCloudPlatform/google-cloud-python/pull/4653)) +- `Client.list_tables` returns an iterators of `TableListItem`. The API + only returns a subset of properties of a table when listing. + ([#4427](https://github.com/GoogleCloudPlatform/google-cloud-python/pull/4427)) +- Remove `QueryJob.query_results()`. Use `QueryJob.result()` instead. + ([#4652](https://github.com/GoogleCloudPlatform/google-cloud-python/pull/4652)) +- Remove `Client.query_rows()`. Use `Client.query()` instead. + ([#4429](https://github.com/GoogleCloudPlatform/google-cloud-python/pull/4429)) +- `Client.list_datasets` returns an iterator of `DatasetListItem`. The API + only returns a subset of properties of a dataset when listing. + ([#4439](https://github.com/GoogleCloudPlatform/google-cloud-python/pull/4439)) + ## 0.28.0 **0.28.0 significantly changes the interface for this package.** For examples diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py index d851d91f5e46..84f38a26ae49 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py @@ -20,6 +20,7 @@ import functools import os import uuid +import warnings import six @@ -389,8 +390,8 @@ def update_table(self, table, fields, retry=DEFAULT_RETRY): method='PATCH', path=table.path, data=partial, headers=headers) return Table.from_api_repr(api_response) - def list_dataset_tables(self, dataset, max_results=None, page_token=None, - retry=DEFAULT_RETRY): + def list_tables(self, dataset, max_results=None, page_token=None, + retry=DEFAULT_RETRY): """List tables in the dataset. See @@ -432,6 +433,16 @@ def list_dataset_tables(self, dataset, max_results=None, page_token=None, result.dataset = dataset return result + def list_dataset_tables(self, *args, **kwargs): + """DEPRECATED: List tables in the dataset. + + Use :func:`~google.cloud.bigquery.client.Client.list_tables` instead. + """ + warnings.warn( + 'list_dataset_tables is deprecated, use list_tables instead.', + DeprecationWarning) + return self.list_tables(*args, **kwargs) + def delete_dataset(self, dataset, retry=DEFAULT_RETRY): """Delete a dataset. diff --git a/packages/google-cloud-bigquery/tests/system.py b/packages/google-cloud-bigquery/tests/system.py index f6c27e3a8b50..b49b39c80b48 100644 --- a/packages/google-cloud-bigquery/tests/system.py +++ b/packages/google-cloud-bigquery/tests/system.py @@ -230,11 +230,11 @@ def test_get_table_w_public_dataset(self): self.assertEqual( schema_names, ['word', 'word_count', 'corpus', 'corpus_date']) - def test_list_dataset_tables(self): + def test_list_tables(self): DATASET_ID = _make_dataset_id('list_tables') dataset = self.temp_dataset(DATASET_ID) # Retrieve tables before any are created for the dataset. - iterator = Config.CLIENT.list_dataset_tables(dataset) + iterator = Config.CLIENT.list_tables(dataset) all_tables = list(iterator) self.assertEqual(all_tables, []) self.assertIsNone(iterator.next_page_token) @@ -251,7 +251,7 @@ def test_list_dataset_tables(self): self.to_delete.insert(0, created_table) # Retrieve the tables. - iterator = Config.CLIENT.list_dataset_tables(dataset) + iterator = Config.CLIENT.list_tables(dataset) all_tables = list(iterator) self.assertIsNone(iterator.next_page_token) created = [table for table in all_tables From 527086efb41911d18dac38ded4e52a0428039885 Mon Sep 17 00:00:00 2001 From: Alix Hamilton Date: Tue, 2 Jan 2018 09:13:12 -0800 Subject: [PATCH 0372/2016] Skips pandas when running python 3.4 nox session (#4681) --- packages/google-cloud-bigquery/nox.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/nox.py b/packages/google-cloud-bigquery/nox.py index 86de5658a803..f459be3530d8 100644 --- a/packages/google-cloud-bigquery/nox.py +++ b/packages/google-cloud-bigquery/nox.py @@ -36,7 +36,10 @@ def default(session): """ # Install all test dependencies, then install this package in-place. session.install('mock', 'pytest', 'pytest-cov', *LOCAL_DEPS) - session.install('-e', '.[pandas]') + if session.interpreter == 'python3.4': + session.install('-e', '.') + else: + session.install('-e', '.[pandas]') # Run py.test against the unit tests. session.run( From 0a1bcdc51e440f4f9abb0a9167388beefcec9422 Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Tue, 2 Jan 2018 11:15:25 -0800 Subject: [PATCH 0373/2016] BigQuery: Add Client.insert_rows, deprecate Client.create_rows (#4657) * BigQuery: Add Client.insert_rows, deprecate Client.create_rows `insert_rows` aligns better with API request (Tabledata.insertAll). Feedback from BQ GA review. --- packages/google-cloud-bigquery/CHANGELOG.md | 3 ++ .../google/cloud/bigquery/client.py | 31 ++++++++++--- .../google-cloud-bigquery/tests/system.py | 43 +++++++++++-------- 3 files changed, 53 insertions(+), 24 deletions(-) diff --git a/packages/google-cloud-bigquery/CHANGELOG.md b/packages/google-cloud-bigquery/CHANGELOG.md index 124fff893b72..9061e4ad353f 100644 --- a/packages/google-cloud-bigquery/CHANGELOG.md +++ b/packages/google-cloud-bigquery/CHANGELOG.md @@ -24,6 +24,9 @@ ## Interface changes / breaking changes +- Add `Client.insert_rows()` and `Client.insert_rows_json()`, deprecate + `Client.create_rows()` and `Client.create_rows_json()`. + ([#4657](https://github.com/GoogleCloudPlatform/google-cloud-python/pull/4657)) - Add `Client.list_tables`, deprecate `Client.list_dataset_tables`. ([#4653](https://github.com/GoogleCloudPlatform/google-cloud-python/pull/4653)) - `Client.list_tables` returns an iterators of `TableListItem`. The API diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py index 84f38a26ae49..54885c3e24ef 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py @@ -1007,8 +1007,8 @@ def query(self, query, job_config=None, job_id=None, job_id_prefix=None, job._begin(retry=retry) return job - def create_rows(self, table, rows, selected_fields=None, **kwargs): - """API call: insert table data via a POST request + def insert_rows(self, table, rows, selected_fields=None, **kwargs): + """Insert rows into a table via the streaming API. See https://cloud.google.com/bigquery/docs/reference/rest/v2/tabledata/insertAll @@ -1073,12 +1073,12 @@ def create_rows(self, table, rows, selected_fields=None, **kwargs): json_rows.append(json_row) - return self.create_rows_json(table, json_rows, **kwargs) + return self.insert_rows_json(table, json_rows, **kwargs) - def create_rows_json(self, table, json_rows, row_ids=None, + def insert_rows_json(self, table, json_rows, row_ids=None, skip_invalid_rows=None, ignore_unknown_values=None, template_suffix=None, retry=DEFAULT_RETRY): - """API call: insert table data via a POST request + """Insert rows into a table without applying local type conversions. See https://cloud.google.com/bigquery/docs/reference/rest/v2/tabledata/insertAll @@ -1162,6 +1162,27 @@ def create_rows_json(self, table, json_rows, row_ids=None, return errors + def create_rows(self, *args, **kwargs): + """DEPRECATED: Insert rows into a table via the streaming API. + + Use :func:`~google.cloud.bigquery.client.Client.insert_rows` instead. + """ + warnings.warn( + 'create_rows is deprecated, use insert_rows instead.', + DeprecationWarning) + return self.insert_rows(*args, **kwargs) + + def create_rows_json(self, *args, **kwargs): + """DEPRECATED: Insert rows into a table without type conversions. + + Use :func:`~google.cloud.bigquery.client.Client.insert_rows_json` + instead. + """ + warnings.warn( + 'create_rows_json is deprecated, use insert_rows_json instead.', + DeprecationWarning) + return self.insert_rows_json(*args, **kwargs) + def list_rows(self, table, selected_fields=None, max_results=None, page_token=None, start_index=None, retry=DEFAULT_RETRY): """List the rows of the table. diff --git a/packages/google-cloud-bigquery/tests/system.py b/packages/google-cloud-bigquery/tests/system.py index b49b39c80b48..ecb342ed0c1d 100644 --- a/packages/google-cloud-bigquery/tests/system.py +++ b/packages/google-cloud-bigquery/tests/system.py @@ -124,9 +124,9 @@ def _still_in_use(bad_request): for doomed in self.to_delete: if isinstance(doomed, Bucket): retry_409(doomed.delete)(force=True) - elif isinstance(doomed, Dataset): + elif isinstance(doomed, (Dataset, bigquery.DatasetReference)): retry_in_use(Config.CLIENT.delete_dataset)(doomed) - elif isinstance(doomed, Table): + elif isinstance(doomed, (Table, bigquery.TableReference)): retry_in_use(Config.CLIENT.delete_table)(doomed) else: doomed.delete() @@ -327,7 +327,7 @@ def _fetch_single_page(table, selected_fields=None): page = six.next(iterator.pages) return list(page) - def test_create_rows_then_dump_table(self): + def test_insert_rows_then_dump_table(self): NOW_SECONDS = 1448911495.484366 NOW = datetime.datetime.utcfromtimestamp( NOW_SECONDS).replace(tzinfo=UTC) @@ -339,7 +339,7 @@ def test_create_rows_then_dump_table(self): ] ROW_IDS = range(len(ROWS)) - dataset = self.temp_dataset(_make_dataset_id('create_rows_then_dump')) + dataset = self.temp_dataset(_make_dataset_id('insert_rows_then_dump')) TABLE_ID = 'test_table' schema = [ bigquery.SchemaField('full_name', 'STRING', mode='REQUIRED'), @@ -352,7 +352,7 @@ def test_create_rows_then_dump_table(self): self.to_delete.insert(0, table) self.assertTrue(_table_exists(table)) - errors = Config.CLIENT.create_rows(table, ROWS, row_ids=ROW_IDS) + errors = Config.CLIENT.insert_rows(table, ROWS, row_ids=ROW_IDS) self.assertEqual(len(errors), 0) rows = () @@ -1315,7 +1315,7 @@ def test_query_external_table(self): self.assertEqual(sorted(row_tuples, key=by_age), sorted(ROWS, key=by_age)) - def test_create_rows_nested_nested(self): + def test_insert_rows_nested_nested(self): # See #2951 SF = bigquery.SchemaField schema = [ @@ -1342,14 +1342,14 @@ def test_create_rows_nested_nested(self): table = retry_403(Config.CLIENT.create_table)(table_arg) self.to_delete.insert(0, table) - Config.CLIENT.create_rows(table, to_insert) + Config.CLIENT.insert_rows(table, to_insert) retry = RetryResult(_has_rows, max_tries=8) rows = retry(self._fetch_single_page)(table) row_tuples = [r.values() for r in rows] self.assertEqual(row_tuples, to_insert) - def test_create_rows_nested_nested_dictionary(self): + def test_insert_rows_nested_nested_dictionary(self): # See #2951 SF = bigquery.SchemaField schema = [ @@ -1376,7 +1376,7 @@ def test_create_rows_nested_nested_dictionary(self): table = retry_403(Config.CLIENT.create_table)(table_arg) self.to_delete.insert(0, table) - Config.CLIENT.create_rows(table, to_insert) + Config.CLIENT.insert_rows(table, to_insert) retry = RetryResult(_has_rows, max_tries=8) rows = retry(self._fetch_single_page)(table) @@ -1402,7 +1402,7 @@ def test_create_table_rows_fetch_nested_schema(self): for line in rows_file: to_insert.append(json.loads(line)) - errors = Config.CLIENT.create_rows_json(table, to_insert) + errors = Config.CLIENT.insert_rows_json(table, to_insert) self.assertEqual(len(errors), 0) retry = RetryResult(_has_rows, max_tries=8) @@ -1467,19 +1467,24 @@ def test_nested_table_to_dataframe(self): 'nested_record': {'nested_nested_string': 'some deep insight'}, } to_insert = [ - ('Some value', record) + {'string_col': 'Some value', 'record_col': record}, ] + rows = [json.dumps(row) for row in to_insert] + body = six.StringIO('{}\n'.format('\n'.join(rows))) table_id = 'test_table' dataset = self.temp_dataset(_make_dataset_id('nested_df')) - table_arg = Table(dataset.table(table_id), schema=schema) - table = retry_403(Config.CLIENT.create_table)(table_arg) + table = dataset.table(table_id) self.to_delete.insert(0, table) - Config.CLIENT.create_rows(table, to_insert) - QUERY = 'SELECT * from `{}.{}.{}`'.format( - Config.CLIENT.project, dataset.dataset_id, table_id) - - retry = RetryResult(_has_rows, max_tries=8) - df = retry(self._fetch_dataframe)(QUERY) + job_config = bigquery.LoadJobConfig() + job_config.write_disposition = 'WRITE_TRUNCATE' + job_config.source_format = 'NEWLINE_DELIMITED_JSON' + job_config.schema = schema + # Load a table using a local JSON file from memory. + Config.CLIENT.load_table_from_file( + body, table, job_config=job_config).result() + + df = Config.CLIENT.list_rows( + table, selected_fields=schema).to_dataframe() self.assertIsInstance(df, pandas.DataFrame) self.assertEqual(len(df), 1) # verify the number of rows From 717cabd1d0e918b6d061242cf41ee6cde95668a2 Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Wed, 3 Jan 2018 15:05:59 -0800 Subject: [PATCH 0374/2016] Documentation: DefaultCredentialsError could be raised if credentials not supplied (#4688) Document DefaultCredentialsError could be raised if credentials not supplied. Also, use Napoleon-style docs for the changed docs. --- .../google/cloud/bigquery/client.py | 41 ++++++++++--------- 1 file changed, 22 insertions(+), 19 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py index 54885c3e24ef..e298468f0fbb 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py @@ -93,25 +93,28 @@ def from_api_repr(cls, resource): class Client(ClientWithProject): """Client to bundle configuration needed for API requests. - :type project: str - :param project: the project which the client acts on behalf of. Will be - passed when creating a dataset / job. If not passed, - falls back to the default inferred from the environment. - - :type credentials: :class:`~google.auth.credentials.Credentials` - :param credentials: (Optional) The OAuth2 Credentials to use for this - client. If not passed (and if no ``_http`` object is - passed), falls back to the default inferred from the - environment. - - :type _http: :class:`~requests.Session` - :param _http: (Optional) HTTP object to make requests. Can be any object - that defines ``request()`` with the same interface as - :meth:`requests.Session.request`. If not passed, an - ``_http`` object is created that is bound to the - ``credentials`` for the current object. - This parameter should be considered private, and could - change in the future. + Args: + project (str): + Project ID for the project which the client acts on behalf of. + Will be passed when creating a dataset / job. If not passed, + falls back to the default inferred from the environment. + credentials (google.auth.credentials.Credentials): + (Optional) The OAuth2 Credentials to use for this client. If not + passed (and if no ``_http`` object is passed), falls back to the + default inferred from the environment. + _http (requests.Session): + (Optional) HTTP object to make requests. Can be any object that + defines ``request()`` with the same interface as + :meth:`requests.Session.request`. If not passed, an ``_http`` + object is created that is bound to the ``credentials`` for the + current object. + This parameter should be considered private, and could change in + the future. + + Raises: + google.auth.exceptions.DefaultCredentialsError: + Raised if ``credentials`` is not specified and the library fails + to acquire default credentials. """ SCOPE = ('https://www.googleapis.com/auth/bigquery', From c00c70706259c10d62f4a342270e3a4ac6348086 Mon Sep 17 00:00:00 2001 From: Tres Seaver Date: Thu, 4 Jan 2018 13:01:48 -0500 Subject: [PATCH 0375/2016] Prep bigquery-0.29.0 release. (#4690) --- packages/google-cloud-bigquery/CHANGELOG.md | 6 +++--- packages/google-cloud-bigquery/setup.py | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/packages/google-cloud-bigquery/CHANGELOG.md b/packages/google-cloud-bigquery/CHANGELOG.md index 9061e4ad353f..dd01d5cce1bd 100644 --- a/packages/google-cloud-bigquery/CHANGELOG.md +++ b/packages/google-cloud-bigquery/CHANGELOG.md @@ -4,9 +4,9 @@ [1]: https://pypi.org/project/google-cloud-bigquery/#history -## 0.29.0 (unreleased) +## 0.29.0 -## Interface changes / additions +### Interface changes / additions - Add `to_dataframe()` method to row iterators. When Pandas is installed this method returns a `DataFrame` containing the query's or table's rows. @@ -22,7 +22,7 @@ ([#4393](https://github.com/GoogleCloudPlatform/google-cloud-python/pull/4393), [#4413](https://github.com/GoogleCloudPlatform/google-cloud-python/pull/4413)) -## Interface changes / breaking changes +### Interface changes / breaking changes - Add `Client.insert_rows()` and `Client.insert_rows_json()`, deprecate `Client.create_rows()` and `Client.create_rows_json()`. diff --git a/packages/google-cloud-bigquery/setup.py b/packages/google-cloud-bigquery/setup.py index 8d108c9c4dec..c389b70f9898 100644 --- a/packages/google-cloud-bigquery/setup.py +++ b/packages/google-cloud-bigquery/setup.py @@ -64,7 +64,7 @@ setup( name='google-cloud-bigquery', - version='0.28.1.dev1', + version='0.29.0', description='Python Client for Google BigQuery', long_description=README, namespace_packages=[ From 998a0eca3679b57261db384247efd459a83e0414 Mon Sep 17 00:00:00 2001 From: Tres Seaver Date: Thu, 4 Jan 2018 13:34:14 -0500 Subject: [PATCH 0376/2016] Mark bigquery as back under dev after 0.29.0 release. (#4691) --- packages/google-cloud-bigquery/CHANGELOG.md | 4 ++++ packages/google-cloud-bigquery/setup.py | 2 +- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/CHANGELOG.md b/packages/google-cloud-bigquery/CHANGELOG.md index dd01d5cce1bd..a4e113da65b3 100644 --- a/packages/google-cloud-bigquery/CHANGELOG.md +++ b/packages/google-cloud-bigquery/CHANGELOG.md @@ -4,6 +4,10 @@ [1]: https://pypi.org/project/google-cloud-bigquery/#history +## 0.29.1 (unreleased) + +- TBD + ## 0.29.0 ### Interface changes / additions diff --git a/packages/google-cloud-bigquery/setup.py b/packages/google-cloud-bigquery/setup.py index c389b70f9898..aaba8662de5d 100644 --- a/packages/google-cloud-bigquery/setup.py +++ b/packages/google-cloud-bigquery/setup.py @@ -64,7 +64,7 @@ setup( name='google-cloud-bigquery', - version='0.29.0', + version='0.29.1.dev1', description='Python Client for Google BigQuery', long_description=README, namespace_packages=[ From bcbf455647f490897d7de8221e9fbaf3a9016f5a Mon Sep 17 00:00:00 2001 From: Jon Wayne Parrott Date: Fri, 5 Jan 2018 12:59:07 -0800 Subject: [PATCH 0377/2016] api_core: Make PageIterator.item_to_value public. (#4702) --- packages/google-cloud-bigquery/tests/unit/test_table.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/tests/unit/test_table.py b/packages/google-cloud-bigquery/tests/unit/test_table.py index 8770256236cb..faa7c3cb7a5c 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_table.py +++ b/packages/google-cloud-bigquery/tests/unit/test_table.py @@ -941,7 +941,7 @@ def test_constructor(self): self.assertFalse(iterator._started) self.assertIs(iterator.client, client) self.assertEqual(iterator.path, path) - self.assertIs(iterator._item_to_value, _item_to_row) + self.assertIs(iterator.item_to_value, _item_to_row) self.assertEqual(iterator._items_key, 'rows') self.assertIsNone(iterator.max_results) self.assertEqual(iterator.extra_params, {}) From 66fdccd01435b35053543aa5ac2abb280e96fc88 Mon Sep 17 00:00:00 2001 From: Ofek Lev Date: Tue, 9 Jan 2018 08:56:47 -0500 Subject: [PATCH 0378/2016] Fix copy-paste error in docs (#4718) --- packages/google-cloud-bigquery/google/cloud/bigquery/job.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/job.py b/packages/google-cloud-bigquery/google/cloud/bigquery/job.py index 9090f2b3c189..9381f47b3ea6 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/job.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/job.py @@ -1765,7 +1765,7 @@ def billing_tier(self): @property def cache_hit(self): - """Return billing tier from job statistics, if present. + """Return whether or not query results were served from cache. See: https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#statistics.query.cacheHit From ebe5813feb9e00e3edb47c9c9a790d8b62e4904a Mon Sep 17 00:00:00 2001 From: chemelnucfin Date: Wed, 10 Jan 2018 12:55:21 -0800 Subject: [PATCH 0379/2016] BigQuery: expose delete contents (#4724) --- .../google/cloud/bigquery/client.py | 19 +++++++++++--- .../google-cloud-bigquery/tests/system.py | 25 +++++++++++++++++++ .../tests/unit/test_client.py | 16 ++++++++++++ 3 files changed, 57 insertions(+), 3 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py index e298468f0fbb..0ce1bb2debb6 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py @@ -446,7 +446,8 @@ def list_dataset_tables(self, *args, **kwargs): DeprecationWarning) return self.list_tables(*args, **kwargs) - def delete_dataset(self, dataset, retry=DEFAULT_RETRY): + def delete_dataset(self, dataset, delete_contents=False, + retry=DEFAULT_RETRY): """Delete a dataset. See @@ -455,15 +456,27 @@ def delete_dataset(self, dataset, retry=DEFAULT_RETRY): :type dataset: One of: :class:`~google.cloud.bigquery.dataset.Dataset` :class:`~google.cloud.bigquery.dataset.DatasetReference` + :param dataset: the dataset to delete, or a reference to it. :type retry: :class:`google.api_core.retry.Retry` :param retry: (Optional) How to retry the RPC. - :param dataset: the dataset to delete, or a reference to it. + :type delete_contents: boolean + :param delete_contents: (Optional) If True, delete all the tables + in the dataset. If False and the dataset contains tables, the + request will fail. Default is False """ if not isinstance(dataset, (Dataset, DatasetReference)): raise TypeError('dataset must be a Dataset or a DatasetReference') - self._call_api(retry, method='DELETE', path=dataset.path) + + params = {} + if delete_contents: + params['deleteContents'] = 'true' + + self._call_api(retry, + method='DELETE', + path=dataset.path, + query_params=params) def delete_table(self, table, retry=DEFAULT_RETRY): """Delete a table diff --git a/packages/google-cloud-bigquery/tests/system.py b/packages/google-cloud-bigquery/tests/system.py index ecb342ed0c1d..754c7e1508f3 100644 --- a/packages/google-cloud-bigquery/tests/system.py +++ b/packages/google-cloud-bigquery/tests/system.py @@ -215,6 +215,31 @@ def test_create_table(self): self.assertTrue(_table_exists(table)) self.assertEqual(table.table_id, table_id) + def test_delete_dataset_delete_contents_true(self): + dataset_id = _make_dataset_id('delete_table_true') + dataset = retry_403(Config.CLIENT.create_dataset)( + Dataset(Config.CLIENT.dataset(dataset_id))) + + table_id = 'test_table' + table_arg = Table(dataset.table(table_id), schema=SCHEMA) + table = retry_403(Config.CLIENT.create_table)(table_arg) + Config.CLIENT.delete_dataset(dataset, delete_contents=True) + + self.assertFalse(_table_exists(table)) + + def test_delete_dataset_delete_contents_false(self): + from google.api_core import exceptions + dataset_id = _make_dataset_id('delete_table_false') + dataset = retry_403(Config.CLIENT.create_dataset)( + Dataset(Config.CLIENT.dataset(dataset_id))) + + table_id = 'test_table' + table_arg = Table(dataset.table(table_id), schema=SCHEMA) + + retry_403(Config.CLIENT.create_table)(table_arg) + with self.assertRaises(exceptions.BadRequest): + Config.CLIENT.delete_dataset(dataset) + def test_get_table_w_public_dataset(self): PUBLIC = 'bigquery-public-data' DATASET_ID = 'samples' diff --git a/packages/google-cloud-bigquery/tests/unit/test_client.py b/packages/google-cloud-bigquery/tests/unit/test_client.py index f54feacdd93d..2b1573bf4e90 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_client.py +++ b/packages/google-cloud-bigquery/tests/unit/test_client.py @@ -1117,6 +1117,22 @@ def test_delete_dataset(self): req = conn._requested[0] self.assertEqual(req['method'], 'DELETE') self.assertEqual(req['path'], '/%s' % PATH) + self.assertEqual(req['query_params'], {}) + + def test_delete_dataset_delete_contents(self): + from google.cloud.bigquery.dataset import Dataset + + PATH = 'projects/%s/datasets/%s' % (self.PROJECT, self.DS_ID) + creds = _make_credentials() + client = self._make_one(project=self.PROJECT, credentials=creds) + conn = client._connection = _Connection({}, {}) + ds_ref = client.dataset(self.DS_ID) + for arg in (ds_ref, Dataset(ds_ref)): + client.delete_dataset(arg, delete_contents=True) + req = conn._requested[0] + self.assertEqual(req['method'], 'DELETE') + self.assertEqual(req['path'], '/%s' % PATH) + self.assertEqual(req['query_params'], {'deleteContents': 'true'}) def test_delete_dataset_wrong_type(self): creds = _make_credentials() From ea3ab5ebc656964c6968388535dee003626b9104 Mon Sep 17 00:00:00 2001 From: chemelnucfin Date: Wed, 10 Jan 2018 13:00:33 -0800 Subject: [PATCH 0380/2016] BigQuery: missing return value in LoadJobConfig.from_api_repr and LoadJobConfig Tests (#4727) * BigQuery: missing return job config * BigQuery: LoadJobConfig tests --- .../google/cloud/bigquery/job.py | 3 ++ .../tests/unit/test_job.py | 35 +++++++++++++++++++ 2 files changed, 38 insertions(+) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/job.py b/packages/google-cloud-bigquery/google/cloud/bigquery/job.py index 9381f47b3ea6..05fbb5c4a6e7 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/job.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/job.py @@ -674,6 +674,9 @@ def from_api_repr(cls, resource): config._properties = copy.deepcopy(resource) config.schema = _parse_schema_resource(schema) config.skip_leading_rows = _int_or_none(slr) + if config.skip_leading_rows is None: + del config.skip_leading_rows + return config class LoadJob(_AsyncJob): diff --git a/packages/google-cloud-bigquery/tests/unit/test_job.py b/packages/google-cloud-bigquery/tests/unit/test_job.py index 40903a4bf059..f958d0fe6bd7 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_job.py +++ b/packages/google-cloud-bigquery/tests/unit/test_job.py @@ -207,6 +207,41 @@ def _verifyReadonlyResourceProperties(self, job, resource): self.assertIsNone(job.user_email) +class TestLoadJobConfig(unittest.TestCase, _Base): + JOB_TYPE = 'load' + + def _make_resource(self, started=False, ended=False): + resource = super(TestLoadJobConfig, self)._make_resource( + started, ended) + config = resource['configuration']['load'] + config['sourceUris'] = [self.SOURCE1] + config['destinationTable'] = { + 'projectId': self.PROJECT, + 'datasetId': self.DS_ID, + 'tableId': self.TABLE_ID, + } + + return resource + + @staticmethod + def _get_target_class(): + from google.cloud.bigquery.job import LoadJobConfig + return LoadJobConfig + + def test_schema(self): + from google.cloud.bigquery.schema import SchemaField + config = self._get_target_class()() + full_name = SchemaField('full_name', 'STRING', mode='REQUIRED') + age = SchemaField('age', 'INTEGER', mode='REQUIRED') + config.schema = [full_name, age] + self.assertEqual(config.schema, [full_name, age]) + + def test_api_repr(self): + resource = self._make_resource() + config = self._get_target_class().from_api_repr(resource) + self.assertEqual(config.to_api_repr(), resource) + + class TestLoadJob(unittest.TestCase, _Base): JOB_TYPE = 'load' From a019be6cf64908b6d5b74cfab9dbf8270b4c6319 Mon Sep 17 00:00:00 2001 From: Jon Wayne Parrott Date: Wed, 10 Jan 2018 16:54:48 -0800 Subject: [PATCH 0381/2016] Revert "api_core: Make PageIterator.item_to_value public. (#4702)" (#4731) --- packages/google-cloud-bigquery/tests/unit/test_table.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/tests/unit/test_table.py b/packages/google-cloud-bigquery/tests/unit/test_table.py index faa7c3cb7a5c..8770256236cb 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_table.py +++ b/packages/google-cloud-bigquery/tests/unit/test_table.py @@ -941,7 +941,7 @@ def test_constructor(self): self.assertFalse(iterator._started) self.assertIs(iterator.client, client) self.assertEqual(iterator.path, path) - self.assertIs(iterator.item_to_value, _item_to_row) + self.assertIs(iterator._item_to_value, _item_to_row) self.assertEqual(iterator._items_key, 'rows') self.assertIsNone(iterator.max_results) self.assertEqual(iterator.extra_params, {}) From c061bb88f30a2b688b3d61aef5cb1d8c28900ade Mon Sep 17 00:00:00 2001 From: Alan Velasco Date: Wed, 17 Jan 2018 18:10:11 -0600 Subject: [PATCH 0382/2016] Add handling of missing properties in SchemaField.from_api_repr() (#4754) --- .../google/cloud/bigquery/schema.py | 7 +++++-- .../google-cloud-bigquery/tests/unit/test_schema.py | 10 ++++++++++ 2 files changed, 15 insertions(+), 2 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/schema.py b/packages/google-cloud-bigquery/google/cloud/bigquery/schema.py index 155ffe9a159a..f619b1cd9f2c 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/schema.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/schema.py @@ -56,10 +56,13 @@ def from_api_repr(cls, api_repr): google.cloud.biquery.schema.SchemaField: The ``SchemaField`` object. """ + # Handle optional properties with default values + mode = api_repr.get('mode', 'NULLABLE') + fields = api_repr.get('fields', ()) return cls( field_type=api_repr['type'].upper(), - fields=[cls.from_api_repr(f) for f in api_repr.get('fields', ())], - mode=api_repr['mode'].upper(), + fields=[cls.from_api_repr(f) for f in fields], + mode=mode.upper(), name=api_repr['name'], ) diff --git a/packages/google-cloud-bigquery/tests/unit/test_schema.py b/packages/google-cloud-bigquery/tests/unit/test_schema.py index a5d5ecacd619..1613b4feb465 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_schema.py +++ b/packages/google-cloud-bigquery/tests/unit/test_schema.py @@ -102,6 +102,16 @@ def test_from_api_repr(self): self.assertEqual(field.fields[0].field_type, 'INTEGER') self.assertEqual(field.fields[0].mode, 'NULLABLE') + def test_from_api_repr_defaults(self): + field = self._get_target_class().from_api_repr({ + 'name': 'foo', + 'type': 'record', + }) + self.assertEqual(field.name, 'foo') + self.assertEqual(field.field_type, 'RECORD') + self.assertEqual(field.mode, 'NULLABLE') + self.assertEqual(len(field.fields), 0) + def test_name_property(self): name = 'lemon-ness' schema_field = self._make_one(name, 'INTEGER') From 0a8bc268c0d637d11c0b7094db477b2f4a522a0f Mon Sep 17 00:00:00 2001 From: chemelnucfin Date: Wed, 24 Jan 2018 11:57:28 -0800 Subject: [PATCH 0383/2016] BigQuery: minor typo (#4782) --- packages/google-cloud-bigquery/google/cloud/bigquery/client.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py index 0ce1bb2debb6..ec0e47e9f790 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py @@ -1232,7 +1232,7 @@ def list_rows(self, table, selected_fields=None, max_results=None, table's rows. :type start_index: int - :param page_token: (Optional) The zero-based index of the starting + :param start_index: (Optional) The zero-based index of the starting row to read. :type retry: :class:`google.api_core.retry.Retry` From 6423640e406201481bb5a8a892cf53a238c5f30f Mon Sep 17 00:00:00 2001 From: chemelnucfin Date: Wed, 24 Jan 2018 17:00:15 -0800 Subject: [PATCH 0384/2016] BigQuery: Documentation clarification for page_token (#4784) --- .../google/cloud/bigquery/client.py | 61 +++++++++++++------ 1 file changed, 41 insertions(+), 20 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py index ec0e47e9f790..83df75d7caa1 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py @@ -134,13 +134,17 @@ def list_projects(self, max_results=None, page_token=None, https://cloud.google.com/bigquery/docs/reference/rest/v2/projects/list :type max_results: int - :param max_results: maximum number of projects to return, If not - passed, defaults to a value set by the API. + :param max_results: (Optional) maximum number of projects to return, + If not passed, defaults to a value set by the API. :type page_token: str - :param page_token: opaque marker for the next "page" of projects. If - not passed, the API will return the first page of - projects. + :param page_token: + (Optional) Token representing a cursor into the projects. If + not passed, the API will return the first page of projects. + The token marks the beginning of the iterator to be returned + and the value of the ``page_token`` can be accessed at + ``next_page_token`` of the + :class:`~google.api_core.page_iterator.HTTPIterator`. :type retry: :class:`google.api_core.retry.Retry` :param retry: (Optional) How to retry the RPC. @@ -169,18 +173,22 @@ def list_datasets(self, include_all=False, filter=None, max_results=None, :param include_all: True if results include hidden datasets. :type filter: str - :param filter: an expression for filtering the results by label. - For syntax, see + :param filter: (Optional) an expression for filtering the results by + label. For syntax, see https://cloud.google.com/bigquery/docs/reference/rest/v2/datasets/list#filter. :type max_results: int - :param max_results: maximum number of datasets to return, If not - passed, defaults to a value set by the API. + :param max_results: (Optional) maximum number of datasets to return, + if not passed, defaults to a value set by the API. :type page_token: str - :param page_token: opaque marker for the next "page" of datasets. If - not passed, the API will return the first page of - datasets. + :param page_token: + (Optional) Token representing a cursor into the datasets. If + not passed, the API will return the first page of datasets. + The token marks the beginning of the iterator to be returned + and the value of the ``page_token`` can be accessed at + ``next_page_token`` of the + :class:`~google.api_core.page_iterator.HTTPIterator`. :type retry: :class:`google.api_core.retry.Retry` :param retry: (Optional) How to retry the RPC. @@ -410,9 +418,13 @@ def list_tables(self, dataset, max_results=None, page_token=None, If not passed, defaults to a value set by the API. :type page_token: str - :param page_token: (Optional) Opaque marker for the next "page" of - datasets. If not passed, the API will return the - first page of datasets. + :param page_token: + (Optional) Token representing a cursor into the tables. If not + passed, the API will return the first page of tables. The + token marks the beginning of the iterator to be returned and + the value of the ``page_token`` can be accessed at + ``next_page_token`` of the + :class:`~google.api_core.page_iterator.HTTPIterator`. :type retry: :class:`google.api_core.retry.Retry` :param retry: (Optional) How to retry the RPC. @@ -644,9 +656,13 @@ def list_jobs(self, max_results=None, page_token=None, all_users=None, passed, defaults to a value set by the API. :type page_token: str - :param page_token: opaque marker for the next "page" of jobs. If - not passed, the API will return the first page of - jobs. + :param page_token: + (Optional) Opaque marker for the next "page" of jobs. If not + passed, the API will return the first page of jobs. The token + marks the beginning of the iterator to be returned and the + value of the ``page_token`` can be accessed at + ``next_page_token`` of + :class:`~google.api_core.page_iterator.HTTPIterator`. :type all_users: bool :param all_users: if true, include jobs owned by all users in the @@ -1225,11 +1241,16 @@ def list_rows(self, table, selected_fields=None, max_results=None, :class:`~google.cloud.bigquery.table.TableReference`. :type max_results: int - :param max_results: maximum number of rows to return. + :param max_results: (Optional) maximum number of rows to return. :type page_token: str :param page_token: (Optional) Token representing a cursor into the - table's rows. + table's rows. If not passed, the API will return + the first page of the rows. The token marks the + beginning of the iterator to be returned and the + value of the ``page_token`` can be accessed at + ``next_page_token`` of the + :class:`~google.cloud.bigquery.table.RowIterator`. :type start_index: int :param start_index: (Optional) The zero-based index of the starting From 6bd1132ec795d1b6a78d4bd27ecb0365a92601b9 Mon Sep 17 00:00:00 2001 From: chemelnucfin Date: Thu, 1 Feb 2018 10:25:19 -0800 Subject: [PATCH 0385/2016] BigQuery: Confirm loading avro from gcs is ok (#4821) --- .../google-cloud-bigquery/tests/system.py | 52 +++++++++++++++++-- 1 file changed, 48 insertions(+), 4 deletions(-) diff --git a/packages/google-cloud-bigquery/tests/system.py b/packages/google-cloud-bigquery/tests/system.py index 754c7e1508f3..f8991dcb527b 100644 --- a/packages/google-cloud-bigquery/tests/system.py +++ b/packages/google-cloud-bigquery/tests/system.py @@ -464,6 +464,41 @@ def test_load_table_from_local_avro_file_then_dump_table(self): self.assertEqual(sorted(row_tuples, key=by_wavelength), sorted(ROWS, key=by_wavelength)) + def test_load_avro_from_uri_then_dump_table(self): + table_name = 'test_table' + rows = [ + ("violet", 400), + ("indigo", 445), + ("blue", 475), + ("green", 510), + ("yellow", 570), + ("orange", 590), + ("red", 650) + ] + with open(os.path.join(WHERE, 'data', 'colors.avro'), 'rb') as f: + GS_URL = self._write_avro_to_storage( + 'bq_load_test' + unique_resource_id(), 'colors.avro', f) + + dataset = self.temp_dataset(_make_dataset_id('bq_load_test')) + table_arg = dataset.table(table_name) + table = retry_403(Config.CLIENT.create_table)(Table(table_arg)) + self.to_delete.insert(0, table) + + config = bigquery.LoadJobConfig() + config.create_disposition = 'CREATE_NEVER' + config.source_format = 'AVRO' + config.write_disposition = 'WRITE_EMPTY' + job = Config.CLIENT.load_table_from_uri( + GS_URL, table_arg, job_config=config) + job.result(timeout=JOB_TIMEOUT) + self.assertEqual(job.output_rows, len(rows)) + + table = Config.CLIENT.get_table(table) + fetched = self._fetch_single_page(table) + row_tuples = [r.values() for r in fetched] + self.assertEqual(sorted(row_tuples, key=lambda x: x[1]), + sorted(rows, key=lambda x: x[1])) + def test_load_table_from_uri_then_dump_table(self): TABLE_ID = 'test_table' GS_URL = self._write_csv_to_storage( @@ -540,9 +575,7 @@ def test_load_table_from_uri_w_autodetect_schema_then_get_job(self): self.assertEqual(fetched_job.job_id, JOB_ID) self.assertEqual(fetched_job.autodetect, True) - def _write_csv_to_storage(self, bucket_name, blob_name, header_row, - data_rows): - from google.cloud._testing import _NamedTemporaryFile + def _create_storage(self, bucket_name, blob_name): from google.cloud.storage import Client as StorageClient storage_client = StorageClient() @@ -552,8 +585,13 @@ def _write_csv_to_storage(self, bucket_name, blob_name, header_row, bucket = storage_client.create_bucket(bucket_name) self.to_delete.append(bucket) - blob = bucket.blob(blob_name) + return bucket.blob(blob_name) + def _write_csv_to_storage(self, bucket_name, blob_name, header_row, + data_rows): + from google.cloud._testing import _NamedTemporaryFile + + blob = self._create_storage(bucket_name, blob_name) with _NamedTemporaryFile() as temp: with open(temp.name, 'w') as csv_write: writer = csv.writer(csv_write) @@ -564,7 +602,13 @@ def _write_csv_to_storage(self, bucket_name, blob_name, header_row, blob.upload_from_file(csv_read, content_type='text/csv') self.to_delete.insert(0, blob) + return 'gs://{}/{}'.format(bucket_name, blob_name) + def _write_avro_to_storage(self, bucket_name, blob_name, avro_file): + blob = self._create_storage(bucket_name, blob_name) + blob.upload_from_file(avro_file, + content_type='application/x-avro-binary') + self.to_delete.insert(0, blob) return 'gs://{}/{}'.format(bucket_name, blob_name) def _load_table_for_extract_table( From f39c0fd90da4be63c30697aab05ffbfc51772da5 Mon Sep 17 00:00:00 2001 From: Ofek Lev Date: Sun, 4 Feb 2018 13:02:02 -0500 Subject: [PATCH 0386/2016] fix docs (#4835) --- packages/google-cloud-bigquery/google/cloud/bigquery/job.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/job.py b/packages/google-cloud-bigquery/google/cloud/bigquery/job.py index 05fbb5c4a6e7..35d651af7910 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/job.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/job.py @@ -1781,7 +1781,7 @@ def cache_hit(self): @property def num_dml_affected_rows(self): - """Return total bytes billed from job statistics, if present. + """Return the number of DML rows affectd by the job. See: https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#statistics.query.numDmlAffectedRows From 04937a07ab6b8ac8e704718376cd1ed3c4cb9ff9 Mon Sep 17 00:00:00 2001 From: Ofek Lev Date: Sun, 4 Feb 2018 19:15:06 -0500 Subject: [PATCH 0387/2016] fix typos (#4836) --- packages/google-cloud-bigquery/google/cloud/bigquery/job.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/job.py b/packages/google-cloud-bigquery/google/cloud/bigquery/job.py index 35d651af7910..10c109ce7393 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/job.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/job.py @@ -1781,13 +1781,13 @@ def cache_hit(self): @property def num_dml_affected_rows(self): - """Return the number of DML rows affectd by the job. + """Return the number of DML rows affected by the job. See: https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#statistics.query.numDmlAffectedRows :rtype: int or None - :returns: number of DML rows affectd by the job, or None if job is not + :returns: number of DML rows affected by the job, or None if job is not yet complete. """ result = self._job_statistics().get('numDmlAffectedRows') From 8c8c722aa00845ec6deb3a6411bfe68e561b7b30 Mon Sep 17 00:00:00 2001 From: Jon Wayne Parrott Date: Thu, 8 Feb 2018 08:40:44 -0800 Subject: [PATCH 0388/2016] Release BigQuery 0.30.0 (#4853) --- packages/google-cloud-bigquery/CHANGELOG.md | 17 +++++++++++++++-- packages/google-cloud-bigquery/setup.py | 2 +- 2 files changed, 16 insertions(+), 3 deletions(-) diff --git a/packages/google-cloud-bigquery/CHANGELOG.md b/packages/google-cloud-bigquery/CHANGELOG.md index a4e113da65b3..7931a8a58ec4 100644 --- a/packages/google-cloud-bigquery/CHANGELOG.md +++ b/packages/google-cloud-bigquery/CHANGELOG.md @@ -4,9 +4,22 @@ [1]: https://pypi.org/project/google-cloud-bigquery/#history -## 0.29.1 (unreleased) +## 0.30.0 -- TBD +This is the release candidate for v1.0.0. + +### Interface changes / additions + +- Add `delete_contents` to `delete_dataset`. (#4724) + +### Bugfixes + +- Add handling of missing properties in `SchemaField.from_api_repr()`. (#4754) +- Fix missing return value in `LoadJobConfig.from_api_repr`. (#4727) + +### Documentation + +- Minor documentation and typo fixes. (#4782, #4718, #4784, #4835, #4836) ## 0.29.0 diff --git a/packages/google-cloud-bigquery/setup.py b/packages/google-cloud-bigquery/setup.py index aaba8662de5d..d84ed3d8cf2c 100644 --- a/packages/google-cloud-bigquery/setup.py +++ b/packages/google-cloud-bigquery/setup.py @@ -64,7 +64,7 @@ setup( name='google-cloud-bigquery', - version='0.29.1.dev1', + version='0.30.0', description='Python Client for Google BigQuery', long_description=README, namespace_packages=[ From d0ebf30d82c40d90308e6b766857fa54a98d8952 Mon Sep 17 00:00:00 2001 From: vitaliili <36169562+vitaliili@users.noreply.github.com> Date: Thu, 8 Feb 2018 15:40:54 -0800 Subject: [PATCH 0389/2016] Add support for EncryptionConfiguration (#4845) * Add support for EncryptionConfiguration * Fix docstrings. * Remove unnecessary docstring from kms_key_name setter. * Use Napoleon-style for EncryptionConfiguration docstrings. Include EncryptionConfiguration in BigQuery module. Use _properties for EncryptionConfiguration implementation. * Use Napoleon-style for destination_encryption_configuration docstrings. * Fix EncryptionConfiguration kms_key_name getter. Note: I use _properties and kwargs because we want to be able to explicitly set kmsKeyName to None to allow unsetting it. * Fix unit tests for EncryptionConfiguration. Also, fix links to job configs for destination_encryption_configurations. * Add setter for kms_key_name. * Allow setting encryption configuration to None. * Fix lint errors in job unit tests. --- .../google/cloud/bigquery/__init__.py | 2 + .../google/cloud/bigquery/job.py | 110 +++++++++ .../google/cloud/bigquery/table.py | 86 +++++++- .../tests/unit/test_client.py | 40 ++++ .../tests/unit/test_job.py | 208 ++++++++++++++++++ .../tests/unit/test_table.py | 86 ++++++++ 6 files changed, 531 insertions(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/__init__.py b/packages/google-cloud-bigquery/google/cloud/bigquery/__init__.py index ef40feb83586..6b988607fd25 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/__init__.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/__init__.py @@ -49,6 +49,7 @@ from google.cloud.bigquery.query import StructQueryParameter from google.cloud.bigquery.query import UDFResource from google.cloud.bigquery.schema import SchemaField +from google.cloud.bigquery.table import EncryptionConfiguration from google.cloud.bigquery.table import Table from google.cloud.bigquery.table import TableReference from google.cloud.bigquery.table import Row @@ -73,6 +74,7 @@ 'DatasetReference', 'AccessEntry', # Tables + 'EncryptionConfiguration', 'Table', 'TableReference', 'Row', diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/job.py b/packages/google-cloud-bigquery/google/cloud/bigquery/job.py index 10c109ce7393..bd6402e4e0e4 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/job.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/job.py @@ -33,6 +33,7 @@ from google.cloud.bigquery.query import StructQueryParameter from google.cloud.bigquery.query import UDFResource from google.cloud.bigquery.schema import SchemaField +from google.cloud.bigquery.table import EncryptionConfiguration from google.cloud.bigquery.table import TableReference from google.cloud.bigquery.table import _build_schema_resource from google.cloud.bigquery.table import _parse_schema_resource @@ -640,6 +641,29 @@ def schema(self, value): raise ValueError('Schema items must be fields') self._schema = tuple(value) + @property + def destination_encryption_configuration(self): + """google.cloud.bigquery.table.EncryptionConfiguration: Custom + encryption configuration for the destination table. + + Custom encryption configuration (e.g., Cloud KMS keys) or ``None`` + if using default encryption. + + See + https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.load.destinationEncryptionConfiguration + """ + prop = self._properties.get('destinationEncryptionConfiguration') + if prop is not None: + prop = EncryptionConfiguration.from_api_repr(prop) + return prop + + @destination_encryption_configuration.setter + def destination_encryption_configuration(self, value): + api_repr = value + if value is not None: + api_repr = value.to_api_repr() + self._properties['destinationEncryptionConfiguration'] = api_repr + def to_api_repr(self): """Build an API representation of the load job config. @@ -812,12 +836,26 @@ def schema(self): """ return self._configuration.schema + @property + def destination_encryption_configuration(self): + """google.cloud.bigquery.table.EncryptionConfiguration: Custom + encryption configuration for the destination table. + + Custom encryption configuration (e.g., Cloud KMS keys) + or ``None`` if using default encryption. + + See + :attr:`google.cloud.bigquery.job.LoadJobConfig.destination_encryption_configuration`. + """ + return self._configuration.destination_encryption_configuration + @property def input_file_bytes(self): """Count of bytes loaded from source files. :rtype: int, or ``NoneType`` :returns: the count (None until set from the server). + :raises: ValueError for invalid value types. """ statistics = self._properties.get('statistics') if statistics is not None: @@ -931,6 +969,29 @@ def __init__(self): https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.copy.writeDisposition """ + @property + def destination_encryption_configuration(self): + """google.cloud.bigquery.table.EncryptionConfiguration: Custom + encryption configuration for the destination table. + + Custom encryption configuration (e.g., Cloud KMS keys) or ``None`` + if using default encryption. + + See + https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.copy.destinationEncryptionConfiguration + """ + prop = self._properties.get('destinationEncryptionConfiguration') + if prop is not None: + prop = EncryptionConfiguration.from_api_repr(prop) + return prop + + @destination_encryption_configuration.setter + def destination_encryption_configuration(self, value): + api_repr = value + if value is not None: + api_repr = value.to_api_repr() + self._properties['destinationEncryptionConfiguration'] = api_repr + def to_api_repr(self): """Build an API representation of the copy job config. @@ -1002,6 +1063,19 @@ def write_disposition(self): """ return self._configuration.write_disposition + @property + def destination_encryption_configuration(self): + """google.cloud.bigquery.table.EncryptionConfiguration: Custom + encryption configuration for the destination table. + + Custom encryption configuration (e.g., Cloud KMS keys) or ``None`` + if using default encryption. + + See + :attr:`google.cloud.bigquery.job.CopyJobConfig.destination_encryption_configuration`. + """ + return self._configuration.destination_encryption_configuration + def _build_resource(self): """Generate a resource for :meth:`begin`.""" @@ -1322,6 +1396,29 @@ class QueryJobConfig(object): def __init__(self): self._properties = {} + @property + def destination_encryption_configuration(self): + """google.cloud.bigquery.table.EncryptionConfiguration: Custom + encryption configuration for the destination table. + + Custom encryption configuration (e.g., Cloud KMS keys) or ``None`` + if using default encryption. + + See + https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query.destinationEncryptionConfiguration + """ + prop = self._properties.get('destinationEncryptionConfiguration') + if prop is not None: + prop = EncryptionConfiguration.from_api_repr(prop) + return prop + + @destination_encryption_configuration.setter + def destination_encryption_configuration(self, value): + api_repr = value + if value is not None: + api_repr = value.to_api_repr() + self._properties['destinationEncryptionConfiguration'] = api_repr + def to_api_repr(self): """Build an API representation of the copy job config. @@ -1560,6 +1657,19 @@ def destination(self): """ return self._configuration.destination + @property + def destination_encryption_configuration(self): + """google.cloud.bigquery.table.EncryptionConfiguration: Custom + encryption configuration for the destination table. + + Custom encryption configuration (e.g., Cloud KMS keys) or ``None`` + if using default encryption. + + See + :attr:`google.cloud.bigquery.job.QueryJobConfig.destination_encryption_configuration`. + """ + return self._configuration.destination_encryption_configuration + @property def dry_run(self): """See diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py index d240fa6d0910..bc2ec4a931e2 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py @@ -76,6 +76,56 @@ def _view_use_legacy_sql_getter(table): return True +class EncryptionConfiguration(object): + """Custom encryption configuration (e.g., Cloud KMS keys). + + Args: + kms_key_name (str): resource ID of Cloud KMS key used for encryption + """ + + def __init__(self, kms_key_name=None): + self._properties = {} + if kms_key_name is not None: + self._properties['kmsKeyName'] = kms_key_name + + @property + def kms_key_name(self): + """str: Resource ID of Cloud KMS key + + Resource ID of Cloud KMS key or ``None`` if using default encryption. + """ + return self._properties.get('kmsKeyName') + + @kms_key_name.setter + def kms_key_name(self, value): + self._properties['kmsKeyName'] = value + + @classmethod + def from_api_repr(cls, resource): + """Construct an encryption configuration from its API representation + + Args: + resource (dict): + An encryption configuration representation as returned from + the API. + + Returns: + google.cloud.bigquery.table.EncryptionConfiguration: + An encryption configuration parsed from ``resource``. + """ + config = cls() + config._properties = copy.deepcopy(resource) + return config + + def to_api_repr(self): + """Construct the API resource representation of this + + Returns: + dict: Encryption configuration as represented as an API resource + """ + return copy.deepcopy(self._properties) + + class TableReference(object): """TableReferences are pointers to tables. @@ -207,7 +257,7 @@ class Table(object): all_fields = [ 'description', 'friendly_name', 'expires', 'location', 'partitioning_type', 'view_use_legacy_sql', 'view_query', 'schema', - 'external_data_configuration', 'labels', + 'external_data_configuration', 'labels', 'encryption_configuration' ] def __init__(self, table_ref, schema=()): @@ -310,6 +360,30 @@ def labels(self, value): raise ValueError("Pass a dict") self._properties['labels'] = value + @property + def encryption_configuration(self): + """google.cloud.bigquery.table.EncryptionConfiguration: Custom + encryption configuration for the table. + + Custom encryption configuration (e.g., Cloud KMS keys) or ``None`` + if using default encryption. + + See `protecting data with Cloud KMS keys + `_ + in the BigQuery documentation. + """ + prop = self._properties.get('encryptionConfiguration') + if prop is not None: + prop = EncryptionConfiguration.from_api_repr(prop) + return prop + + @encryption_configuration.setter + def encryption_configuration(self, value): + api_repr = value + if value is not None: + api_repr = value.to_api_repr() + self._properties['encryptionConfiguration'] = api_repr + @property def created(self): """Datetime at which the table was created. @@ -656,6 +730,7 @@ def from_api_repr(cls, resource): table = cls(dataset_ref.table(table_id)) table._set_properties(resource) + return table def _set_properties(self, api_response): @@ -715,6 +790,14 @@ def _populate_external_config(self, resource): resource['externalDataConfiguration'] = ExternalConfig.to_api_repr( self.external_data_configuration) + def _populate_encryption_configuration(self, resource): + if not self.encryption_configuration: + resource['encryptionConfiguration'] = None + else: + encryptionConfig = EncryptionConfiguration.to_api_repr( + self.encryption_configuration) + resource['encryptionConfiguration'] = encryptionConfig + custom_resource_fields = { 'expires': _populate_expires_resource, 'partitioning_type': _populate_partitioning_type_resource, @@ -722,6 +805,7 @@ def _populate_external_config(self, resource): 'view_use_legacy_sql': _populate_view_use_legacy_sql_resource, 'schema': _populate_schema_resource, 'external_data_configuration': _populate_external_config, + 'encryption_configuration': _populate_encryption_configuration } def _build_resource(self, filter_fields): diff --git a/packages/google-cloud-bigquery/tests/unit/test_client.py b/packages/google-cloud-bigquery/tests/unit/test_client.py index 2b1573bf4e90..63c47757727a 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_client.py +++ b/packages/google-cloud-bigquery/tests/unit/test_client.py @@ -38,6 +38,7 @@ class TestClient(unittest.TestCase): DS_ID = 'DATASET_ID' TABLE_ID = 'TABLE_ID' TABLE_REF = DatasetReference(PROJECT, DS_ID).table(TABLE_ID) + KMS_KEY_NAME = 'projects/1/locations/global/keyRings/1/cryptoKeys/1' @staticmethod def _get_target_class(): @@ -470,6 +471,45 @@ def test_create_table_w_day_partition(self): self.assertEqual(table.partitioning_type, "DAY") self.assertEqual(got.table_id, self.TABLE_ID) + def test_create_table_w_encryption_configuration(self): + from google.cloud.bigquery.table import EncryptionConfiguration + from google.cloud.bigquery.table import Table + + path = 'projects/%s/datasets/%s/tables' % ( + self.PROJECT, self.DS_ID) + creds = _make_credentials() + client = self._make_one(project=self.PROJECT, credentials=creds) + resource = { + 'id': '%s:%s:%s' % (self.PROJECT, self.DS_ID, self.TABLE_ID), + 'tableReference': { + 'projectId': self.PROJECT, + 'datasetId': self.DS_ID, + 'tableId': self.TABLE_ID + }, + } + conn = client._connection = _Connection(resource) + table = Table(self.TABLE_REF) + table.encryption_configuration = EncryptionConfiguration( + kms_key_name=self.KMS_KEY_NAME) + + got = client.create_table(table) + + self.assertEqual(len(conn._requested), 1) + req = conn._requested[0] + self.assertEqual(req['method'], 'POST') + self.assertEqual(req['path'], '/%s' % path) + sent = { + 'tableReference': { + 'projectId': self.PROJECT, + 'datasetId': self.DS_ID, + 'tableId': self.TABLE_ID + }, + 'labels': {}, + 'encryptionConfiguration': {'kmsKeyName': self.KMS_KEY_NAME}, + } + self.assertEqual(req['data'], sent) + self.assertEqual(got.table_id, self.TABLE_ID) + def test_create_table_w_day_partition_and_expire(self): from google.cloud.bigquery.table import Table diff --git a/packages/google-cloud-bigquery/tests/unit/test_job.py b/packages/google-cloud-bigquery/tests/unit/test_job.py index f958d0fe6bd7..a68ead91e5fd 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_job.py +++ b/packages/google-cloud-bigquery/tests/unit/test_job.py @@ -24,6 +24,7 @@ from google.cloud.bigquery.job import ExtractJobConfig, CopyJobConfig from google.cloud.bigquery.job import LoadJobConfig from google.cloud.bigquery.dataset import DatasetReference +from google.cloud.bigquery.table import EncryptionConfiguration import mock @@ -97,6 +98,7 @@ class _Base(object): TABLE_ID = 'table_id' TABLE_REF = TableReference(DS_REF, TABLE_ID) JOB_ID = 'JOB_ID' + KMS_KEY_NAME = 'projects/1/locations/global/keyRings/1/cryptoKeys/1' def _make_one(self, *args, **kw): return self._get_target_class()(*args, **kw) @@ -220,6 +222,8 @@ def _make_resource(self, started=False, ended=False): 'datasetId': self.DS_ID, 'tableId': self.TABLE_ID, } + config['destinationEncryptionConfiguration'] = { + 'kmsKeyName': self.KMS_KEY_NAME} return resource @@ -241,6 +245,29 @@ def test_api_repr(self): config = self._get_target_class().from_api_repr(resource) self.assertEqual(config.to_api_repr(), resource) + def test_to_api_repr_with_encryption(self): + config = self._make_one() + config.destination_encryption_configuration = EncryptionConfiguration( + kms_key_name=self.KMS_KEY_NAME) + resource = config.to_api_repr() + self.assertEqual( + resource, + { + 'destinationEncryptionConfiguration': { + 'kmsKeyName': self.KMS_KEY_NAME, + }, + }) + + def test_to_api_repr_with_encryption_none(self): + config = self._make_one() + config.destination_encryption_configuration = None + resource = config.to_api_repr() + self.assertEqual( + resource, + { + 'destinationEncryptionConfiguration': None, + }) + class TestLoadJob(unittest.TestCase, _Base): JOB_TYPE = 'load' @@ -363,6 +390,14 @@ def _verifyResourceProperties(self, job, resource): else: self.assertIsNone(job.skip_leading_rows) + if 'destinationEncryptionConfiguration' in config: + self.assertIsNotNone(job.destination_encryption_configuration) + self.assertEqual( + job.destination_encryption_configuration.kms_key_name, + config['destinationEncryptionConfiguration']['kmsKeyName']) + else: + self.assertIsNone(job.destination_encryption_configuration) + def test_ctor(self): client = _make_client(project=self.PROJECT) job = self._make_one(self.JOB_ID, [self.SOURCE1], self.TABLE_REF, @@ -398,6 +433,7 @@ def test_ctor(self): self.assertIsNone(job.skip_leading_rows) self.assertIsNone(job.source_format) self.assertIsNone(job.write_disposition) + self.assertIsNone(job.destination_encryption_configuration) def test_ctor_w_config(self): from google.cloud.bigquery.schema import SchemaField @@ -574,6 +610,34 @@ def test_from_api_repr_bare(self): self.assertIs(job._client, client) self._verifyResourceProperties(job, RESOURCE) + def test_from_api_with_encryption(self): + self._setUpConstants() + client = _make_client(project=self.PROJECT) + RESOURCE = { + 'id': self.FULL_JOB_ID, + 'jobReference': { + 'projectId': self.PROJECT, + 'jobId': self.JOB_ID, + }, + 'configuration': { + 'load': { + 'sourceUris': [self.SOURCE1], + 'destinationTable': { + 'projectId': self.PROJECT, + 'datasetId': self.DS_ID, + 'tableId': self.TABLE_ID, + }, + 'destinationEncryptionConfiguration': { + 'kmsKeyName': self.KMS_KEY_NAME + } + } + }, + } + klass = self._get_target_class() + job = klass.from_api_repr(RESOURCE, client=client) + self.assertIs(job._client, client) + self._verifyResourceProperties(job, RESOURCE) + def test_from_api_repr_w_properties(self): client = _make_client(project=self.PROJECT) RESOURCE = self._make_resource() @@ -853,6 +917,38 @@ def test_cancel_w_alternate_client(self): self._verifyResourceProperties(job, RESOURCE) +class TestCopyJobConfig(unittest.TestCase, _Base): + JOB_TYPE = 'copy' + + @staticmethod + def _get_target_class(): + from google.cloud.bigquery.job import CopyJobConfig + return CopyJobConfig + + def test_to_api_repr_with_encryption(self): + config = self._make_one() + config.destination_encryption_configuration = EncryptionConfiguration( + kms_key_name=self.KMS_KEY_NAME) + resource = config.to_api_repr() + self.assertEqual( + resource, + { + 'destinationEncryptionConfiguration': { + 'kmsKeyName': self.KMS_KEY_NAME, + } + }) + + def test_to_api_repr_with_encryption_none(self): + config = self._make_one() + config.destination_encryption_configuration = None + resource = config.to_api_repr() + self.assertEqual( + resource, + { + 'destinationEncryptionConfiguration': None, + }) + + class TestCopyJob(unittest.TestCase, _Base): JOB_TYPE = 'copy' SOURCE_TABLE = 'source_table' @@ -912,6 +1008,14 @@ def _verifyResourceProperties(self, job, resource): else: self.assertIsNone(job.write_disposition) + if 'destinationEncryptionConfiguration' in config: + self.assertIsNotNone(job.destination_encryption_configuration) + self.assertEqual( + job.destination_encryption_configuration.kms_key_name, + config['destinationEncryptionConfiguration']['kmsKeyName']) + else: + self.assertIsNone(job.destination_encryption_configuration) + def test_ctor(self): client = _make_client(project=self.PROJECT) source = self._table_ref(self.SOURCE_TABLE) @@ -930,6 +1034,7 @@ def test_ctor(self): # set/read from resource['configuration']['copy'] self.assertIsNone(job.create_disposition) self.assertIsNone(job.write_disposition) + self.assertIsNone(job.destination_encryption_configuration) def test_from_api_repr_missing_identity(self): self._setUpConstants() @@ -982,6 +1087,38 @@ def test_from_api_repr_bare(self): self.assertIs(job._client, client) self._verifyResourceProperties(job, RESOURCE) + def test_from_api_with_encryption(self): + self._setUpConstants() + client = _make_client(project=self.PROJECT) + RESOURCE = { + 'id': self.JOB_ID, + 'jobReference': { + 'projectId': self.PROJECT, + 'jobId': self.JOB_ID, + }, + 'configuration': { + 'copy': { + 'sourceTables': [{ + 'projectId': self.PROJECT, + 'datasetId': self.DS_ID, + 'tableId': self.SOURCE_TABLE, + }], + 'destinationTable': { + 'projectId': self.PROJECT, + 'datasetId': self.DS_ID, + 'tableId': self.DESTINATION_TABLE, + }, + 'destinationEncryptionConfiguration': { + 'kmsKeyName': self.KMS_KEY_NAME + } + } + }, + } + klass = self._get_target_class() + job = klass.from_api_repr(RESOURCE, client=client) + self.assertIs(job._client, client) + self._verifyResourceProperties(job, RESOURCE) + def test_from_api_repr_w_sourcetable(self): self._setUpConstants() client = _make_client(project=self.PROJECT) @@ -1547,6 +1684,7 @@ def test_from_api_repr_empty(self): self.assertIsNone(config.dry_run) self.assertIsNone(config.use_legacy_sql) self.assertIsNone(config.default_dataset) + self.assertIsNone(config.destination_encryption_configuration) def test_from_api_repr_normal(self): resource = { @@ -1589,6 +1727,40 @@ def test_to_api_repr_normal(self): self.assertEqual( config._properties['someNewProperty'], 'Woohoo, alpha stuff.') + def test_to_api_repr_with_encryption(self): + config = self._make_one() + config.destination_encryption_configuration = EncryptionConfiguration( + kms_key_name=self.KMS_KEY_NAME) + resource = config.to_api_repr() + self.assertEqual( + resource, { + 'destinationEncryptionConfiguration': { + 'kmsKeyName': self.KMS_KEY_NAME, + }, + }) + + def test_to_api_repr_with_encryption_none(self): + config = self._make_one() + config.destination_encryption_configuration = None + resource = config.to_api_repr() + self.assertEqual( + resource, + { + 'destinationEncryptionConfiguration': None, + }) + + def test_from_api_repr_with_encryption(self): + resource = { + 'destinationEncryptionConfiguration': { + 'kmsKeyName': self.KMS_KEY_NAME + } + } + klass = self._get_target_class() + config = klass.from_api_repr(resource) + self.assertEqual( + config.destination_encryption_configuration.kms_key_name, + self.KMS_KEY_NAME) + class TestQueryJob(unittest.TestCase, _Base): JOB_TYPE = 'query' @@ -1731,6 +1903,14 @@ def _verifyResourceProperties(self, job, resource): query_config['writeDisposition']) else: self.assertIsNone(job.write_disposition) + if 'destinationEncryptionConfiguration' in query_config: + self.assertIsNotNone(job.destination_encryption_configuration) + self.assertEqual( + job.destination_encryption_configuration.kms_key_name, + query_config['destinationEncryptionConfiguration'][ + 'kmsKeyName']) + else: + self.assertIsNone(job.destination_encryption_configuration) def test_ctor_defaults(self): client = _make_client(project=self.PROJECT) @@ -1759,6 +1939,7 @@ def test_ctor_defaults(self): self.assertIsNone(job.maximum_billing_tier) self.assertIsNone(job.maximum_bytes_billed) self.assertIsNone(job.table_definitions) + self.assertIsNone(job.destination_encryption_configuration) def test_ctor_w_udf_resources(self): from google.cloud.bigquery.job import QueryJobConfig @@ -1825,6 +2006,29 @@ def test_from_api_repr_bare(self): self.assertIs(job._client, client) self._verifyResourceProperties(job, RESOURCE) + def test_from_api_repr_with_encryption(self): + self._setUpConstants() + client = _make_client(project=self.PROJECT) + RESOURCE = { + 'id': self.JOB_ID, + 'jobReference': { + 'projectId': self.PROJECT, + 'jobId': self.JOB_ID, + }, + 'configuration': { + 'query': { + 'query': self.QUERY, + 'destinationEncryptionConfiguration': { + 'kmsKeyName': self.KMS_KEY_NAME + } + }, + }, + } + klass = self._get_target_class() + job = klass.from_api_repr(RESOURCE, client=client) + self.assertIs(job._client, client) + self._verifyResourceProperties(job, RESOURCE) + def test_from_api_repr_w_properties(self): client = _make_client(project=self.PROJECT) RESOURCE = self._make_resource() @@ -2833,6 +3037,10 @@ def test___eq___hit(self): other = self._make_one(self.KIND, self.SUBSTEPS) self.assertEqual(step, other) + def test___eq___wrong_type(self): + step = self._make_one(self.KIND, self.SUBSTEPS) + self.assertFalse(step == 'hello') + class TestQueryPlanEntry(unittest.TestCase, _Base): NAME = 'NAME' diff --git a/packages/google-cloud-bigquery/tests/unit/test_table.py b/packages/google-cloud-bigquery/tests/unit/test_table.py index 8770256236cb..c1444ec6c084 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_table.py +++ b/packages/google-cloud-bigquery/tests/unit/test_table.py @@ -39,6 +39,52 @@ def _verifySchema(self, schema, resource): self._verify_field(field, r_field) +class TestEncryptionConfiguration(unittest.TestCase): + KMS_KEY_NAME = 'projects/1/locations/global/keyRings/1/cryptoKeys/1' + + @staticmethod + def _get_target_class(): + from google.cloud.bigquery.table import EncryptionConfiguration + + return EncryptionConfiguration + + def _make_one(self, *args, **kw): + return self._get_target_class()(*args, **kw) + + def test_ctor_defaults(self): + encryption_config = self._make_one() + self.assertIsNone(encryption_config.kms_key_name) + + def test_ctor_with_key(self): + encryption_config = self._make_one(kms_key_name=self.KMS_KEY_NAME) + self.assertEqual(encryption_config.kms_key_name, self.KMS_KEY_NAME) + + def test_kms_key_name_setter(self): + encryption_config = self._make_one() + self.assertIsNone(encryption_config.kms_key_name) + encryption_config.kms_key_name = self.KMS_KEY_NAME + self.assertEqual(encryption_config.kms_key_name, self.KMS_KEY_NAME) + encryption_config.kms_key_name = None + self.assertIsNone(encryption_config.kms_key_name) + + def test_from_api_repr(self): + RESOURCE = { + 'kmsKeyName': self.KMS_KEY_NAME, + } + klass = self._get_target_class() + encryption_config = klass.from_api_repr(RESOURCE) + self.assertEqual(encryption_config.kms_key_name, self.KMS_KEY_NAME) + + def test_to_api_repr(self): + encryption_config = self._make_one(kms_key_name=self.KMS_KEY_NAME) + resource = encryption_config.to_api_repr() + self.assertEqual( + resource, + { + 'kmsKeyName': self.KMS_KEY_NAME, + }) + + class TestTableReference(unittest.TestCase): @staticmethod @@ -156,6 +202,7 @@ class TestTable(unittest.TestCase, _SchemaBase): PROJECT = 'prahj-ekt' DS_ID = 'dataset-name' TABLE_NAME = 'table-name' + KMS_KEY_NAME = 'projects/1/locations/global/keyRings/1/cryptoKeys/1' @staticmethod def _get_target_class(): @@ -290,6 +337,13 @@ def _verifyResourceProperties(self, table, resource): else: self.assertEqual(table.labels, {}) + if 'encryptionConfiguration' in resource: + self.assertIsNotNone(table.encryption_configuration) + self.assertEqual(table.encryption_configuration.kms_key_name, + resource['encryptionConfiguration']['kmsKeyName']) + else: + self.assertIsNone(table.encryption_configuration) + def test_ctor(self): dataset = DatasetReference(self.PROJECT, self.DS_ID) table_ref = dataset.table(self.TABLE_NAME) @@ -323,6 +377,7 @@ def test_ctor(self): self.assertIsNone(table.view_use_legacy_sql) self.assertIsNone(table.external_data_configuration) self.assertEquals(table.labels, {}) + self.assertIsNone(table.encryption_configuration) def test_ctor_w_schema(self): from google.cloud.bigquery.table import SchemaField @@ -594,6 +649,24 @@ def test_from_api_repr_w_properties(self): table = klass.from_api_repr(RESOURCE) self._verifyResourceProperties(table, RESOURCE) + def test_from_api_with_encryption(self): + self._setUpConstants() + RESOURCE = { + 'id': '%s:%s:%s' % (self.PROJECT, self.DS_ID, self.TABLE_NAME), + 'tableReference': { + 'projectId': self.PROJECT, + 'datasetId': self.DS_ID, + 'tableId': self.TABLE_NAME, + }, + 'encryptionConfiguration': { + 'kmsKeyName': self.KMS_KEY_NAME + }, + 'type': 'TABLE', + } + klass = self._get_target_class() + table = klass.from_api_repr(RESOURCE) + self._verifyResourceProperties(table, RESOURCE) + def test_partition_type_setter_bad_type(self): from google.cloud.bigquery.table import SchemaField @@ -695,6 +768,19 @@ def test_partition_expiration_w_none_no_partition_set(self): self.assertIsNone(table.partitioning_type) self.assertIsNone(table.partition_expiration) + def test_encryption_configuration_setter(self): + from google.cloud.bigquery.table import EncryptionConfiguration + dataset = DatasetReference(self.PROJECT, self.DS_ID) + table_ref = dataset.table(self.TABLE_NAME) + table = self._make_one(table_ref) + encryption_configuration = EncryptionConfiguration( + kms_key_name=self.KMS_KEY_NAME) + table.encryption_configuration = encryption_configuration + self.assertEqual(table.encryption_configuration.kms_key_name, + self.KMS_KEY_NAME) + table.encryption_configuration = None + self.assertIsNone(table.encryption_configuration) + class Test_row_from_mapping(unittest.TestCase, _SchemaBase): From b82fb4edd90bb22cde0312162e84ed78a50948ef Mon Sep 17 00:00:00 2001 From: Alix Hamilton Date: Mon, 12 Feb 2018 21:49:56 -0800 Subject: [PATCH 0390/2016] Allow listing/getting jobs even when there is an "invalid" job. (#4786) --- .../google/cloud/bigquery/job.py | 191 ++++++++++-------- .../google-cloud-bigquery/tests/system.py | 21 ++ .../tests/unit/test_job.py | 1 + 3 files changed, 126 insertions(+), 87 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/job.py b/packages/google-cloud-bigquery/google/cloud/bigquery/job.py index bd6402e4e0e4..5eace9dbcf4e 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/job.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/job.py @@ -26,7 +26,6 @@ from google.cloud._helpers import _datetime_from_microseconds from google.cloud.bigquery.dataset import DatasetReference from google.cloud.bigquery.external_config import ExternalConfig -from google.cloud.bigquery.query import _AbstractQueryParameter from google.cloud.bigquery.query import _query_param_from_api_repr from google.cloud.bigquery.query import ArrayQueryParameter from google.cloud.bigquery.query import ScalarQueryParameter @@ -38,7 +37,6 @@ from google.cloud.bigquery.table import _build_schema_resource from google.cloud.bigquery.table import _parse_schema_resource from google.cloud.bigquery._helpers import _EnumApiResourceProperty -from google.cloud.bigquery._helpers import _ListApiResourceProperty from google.cloud.bigquery._helpers import _TypedApiResourceProperty from google.cloud.bigquery._helpers import DEFAULT_RETRY from google.cloud.bigquery._helpers import _int_or_none @@ -1390,9 +1388,6 @@ class QueryJobConfig(object): server defaults. """ - _QUERY_PARAMETERS_KEY = 'queryParameters' - _UDF_RESOURCES_KEY = 'userDefinedFunctionResources' - def __init__(self): self._properties = {} @@ -1420,51 +1415,40 @@ def destination_encryption_configuration(self, value): self._properties['destinationEncryptionConfiguration'] = api_repr def to_api_repr(self): - """Build an API representation of the copy job config. + """Build an API representation of the query job config. - :rtype: dict - :returns: A dictionary in the format used by the BigQuery API. + Returns: + dict: A dictionary in the format used by the BigQuery API. """ resource = copy.deepcopy(self._properties) # Query parameters have an addition property associated with them # to indicate if the query is using named or positional parameters. - query_parameters = resource.get(self._QUERY_PARAMETERS_KEY) + query_parameters = resource.get('queryParameters') if query_parameters: - if query_parameters[0].name is None: + if query_parameters[0].get('name') is None: resource['parameterMode'] = 'POSITIONAL' else: resource['parameterMode'] = 'NAMED' - for prop, convert in self._NESTED_PROPERTIES.items(): - _, to_resource = convert - nested_resource = resource.get(prop) - if nested_resource is not None: - resource[prop] = to_resource(nested_resource) - return resource @classmethod def from_api_repr(cls, resource): """Factory: construct a job configuration given its API representation - :type resource: dict - :param resource: - An extract job configuration in the same representation as is - returned from the API. + Args: + resource (dict): + A query job configuration in the same representation as is + returned from the API. - :rtype: :class:`google.cloud.bigquery.job.QueryJobConfig` - :returns: Configuration parsed from ``resource``. + Returns: + ~google.cloud.bigquery.job.QueryJobConfig: + Configuration parsed from ``resource``. """ config = cls() config._properties = copy.deepcopy(resource) - for prop, convert in cls._NESTED_PROPERTIES.items(): - from_resource, _ = convert - nested_resource = resource.get(prop) - if nested_resource is not None: - config._properties[prop] = from_resource(nested_resource) - return config allow_large_results = _TypedApiResourceProperty( @@ -1481,20 +1465,39 @@ def from_api_repr(cls, resource): https://g.co/cloud/bigquery/docs/reference/rest/v2/jobs#configuration.query.createDisposition """ - default_dataset = _TypedApiResourceProperty( - 'default_dataset', 'defaultDataset', DatasetReference) - """See - https://g.co/cloud/bigquery/docs/reference/v2/jobs#configuration.query.defaultDataset - """ + @property + def default_dataset(self): + """google.cloud.bigquery.dataset.DatasetReference: the default dataset + to use for unqualified table names in the query or ``None`` if not set. - destination = _TypedApiResourceProperty( - 'destination', 'destinationTable', TableReference) - """ - google.cloud.bigquery.table.TableReference: table where results are written + See + https://g.co/cloud/bigquery/docs/reference/v2/jobs#configuration.query.defaultDataset + """ + prop = self._properties.get('defaultDataset') + if prop is not None: + prop = DatasetReference.from_api_repr(prop) + return prop - See - https://g.co/cloud/bigquery/docs/reference/rest/v2/jobs#configuration.query.destinationTable - """ + @default_dataset.setter + def default_dataset(self, value): + self._properties['defaultDataset'] = value.to_api_repr() + + @property + def destination(self): + """google.cloud.bigquery.table.TableReference: table where results are + written or ``None`` if not set. + + See + https://g.co/cloud/bigquery/docs/reference/rest/v2/jobs#configuration.query.destinationTable + """ + prop = self._properties.get('destinationTable') + if prop is not None: + prop = TableReference.from_api_repr(prop) + return prop + + @destination.setter + def destination(self, value): + self._properties['destinationTable'] = value.to_api_repr() dry_run = _TypedApiResourceProperty('dry_run', 'dryRun', bool) """ @@ -1516,39 +1519,60 @@ def from_api_repr(cls, resource): https://g.co/cloud/bigquery/docs/reference/rest/v2/jobs#configuration.query.maximumBillingTier """ - maximum_bytes_billed = _TypedApiResourceProperty( - 'maximum_bytes_billed', 'maximumBytesBilled', int) - """See - https://g.co/cloud/bigquery/docs/reference/rest/v2/jobs#configuration.query.maximumBytesBilled - """ + @property + def maximum_bytes_billed(self): + """int: Maximum bytes to be billed for this job or ``None`` if not set. + + See + https://g.co/cloud/bigquery/docs/reference/rest/v2/jobs#configuration.query.maximumBytesBilled + """ + prop = self._properties.get('maximumBytesBilled') + if prop is not None: + prop = int(prop) + return prop + + @maximum_bytes_billed.setter + def maximum_bytes_billed(self, value): + self._properties['maximumBytesBilled'] = str(value) priority = QueryPriority('priority', 'priority') """See https://g.co/cloud/bigquery/docs/reference/rest/v2/jobs#configuration.query.priority """ - query_parameters = _ListApiResourceProperty( - 'query_parameters', _QUERY_PARAMETERS_KEY, _AbstractQueryParameter) - """ - A list of - :class:`google.cloud.bigquery.ArrayQueryParameter`, - :class:`google.cloud.bigquery.ScalarQueryParameter`, or - :class:`google.cloud.bigquery.StructQueryParameter` - (empty by default) - - See: - https://g.co/cloud/bigquery/docs/reference/rest/v2/jobs#configuration.query.queryParameters - """ + @property + def query_parameters(self): + """List[Union[google.cloud.bigquery.query.ArrayQueryParameter, \ + google.cloud.bigquery.query.ScalarQueryParameter, \ + google.cloud.bigquery.query.StructQueryParameter]]: list of parameters + for parameterized query (empty by default) - udf_resources = _ListApiResourceProperty( - 'udf_resources', _UDF_RESOURCES_KEY, UDFResource) - """ - A list of :class:`google.cloud.bigquery.UDFResource` (empty - by default) + See: + https://g.co/cloud/bigquery/docs/reference/rest/v2/jobs#configuration.query.queryParameters + """ + prop = self._properties.get('queryParameters', []) + return _from_api_repr_query_parameters(prop) - See: - https://g.co/cloud/bigquery/docs/reference/rest/v2/jobs#configuration.query.userDefinedFunctionResources - """ + @query_parameters.setter + def query_parameters(self, values): + self._properties['queryParameters'] = _to_api_repr_query_parameters( + values) + + @property + def udf_resources(self): + """List[google.cloud.bigquery.query.UDFResource]: user + defined function resources (empty by default) + + See: + https://g.co/cloud/bigquery/docs/reference/rest/v2/jobs#configuration.query.userDefinedFunctionResources + """ + prop = self._properties.get('userDefinedFunctionResources', []) + return _from_api_repr_udf_resources(prop) + + @udf_resources.setter + def udf_resources(self, values): + self._properties['userDefinedFunctionResources'] = ( + _to_api_repr_udf_resources(values)) use_legacy_sql = _TypedApiResourceProperty( 'use_legacy_sql', 'useLegacySql', bool) @@ -1568,33 +1592,26 @@ def from_api_repr(cls, resource): https://g.co/cloud/bigquery/docs/reference/rest/v2/jobs#configuration.query.writeDisposition """ - table_definitions = _TypedApiResourceProperty( - 'table_definitions', 'tableDefinitions', dict) - """ - Definitions for external tables. A dictionary from table names (strings) - to :class:`google.cloud.bigquery.ExternalConfig`. + @property + def table_definitions(self): + """Dict[str, google.cloud.bigquery.external_config.ExternalConfig]: + Definitions for external tables or ``None`` if not set. - See - https://g.co/cloud/bigquery/docs/reference/rest/v2/jobs#configuration.query.tableDefinitions - """ + See + https://g.co/cloud/bigquery/docs/reference/rest/v2/jobs#configuration.query.tableDefinitions + """ + prop = self._properties.get('tableDefinitions') + if prop is not None: + prop = _from_api_repr_table_defs(prop) + return prop + + @table_definitions.setter + def table_definitions(self, values): + self._properties['tableDefinitions'] = _to_api_repr_table_defs(values) _maximum_billing_tier = None _maximum_bytes_billed = None - _NESTED_PROPERTIES = { - 'defaultDataset': ( - DatasetReference.from_api_repr, DatasetReference.to_api_repr), - 'destinationTable': ( - TableReference.from_api_repr, TableReference.to_api_repr), - 'maximumBytesBilled': (int, str), - 'tableDefinitions': (_from_api_repr_table_defs, - _to_api_repr_table_defs), - _QUERY_PARAMETERS_KEY: ( - _from_api_repr_query_parameters, _to_api_repr_query_parameters), - _UDF_RESOURCES_KEY: ( - _from_api_repr_udf_resources, _to_api_repr_udf_resources), - } - class QueryJob(_AsyncJob): """Asynchronous job: query tables. diff --git a/packages/google-cloud-bigquery/tests/system.py b/packages/google-cloud-bigquery/tests/system.py index f8991dcb527b..f74e35a4bc55 100644 --- a/packages/google-cloud-bigquery/tests/system.py +++ b/packages/google-cloud-bigquery/tests/system.py @@ -746,6 +746,27 @@ def test_job_cancel(self): # raise an error, and that the job completed (in the `retry()` # above). + def test_get_failed_job(self): + # issue 4246 + from google.api_core.exceptions import BadRequest + + JOB_ID = 'invalid_{}'.format(str(uuid.uuid4())) + QUERY = 'SELECT TIMESTAMP_ADD(@ts_value, INTERVAL 1 HOUR);' + PARAM = bigquery.ScalarQueryParameter( + 'ts_value', 'TIMESTAMP', 1.4810976E9) + + job_config = bigquery.QueryJobConfig() + job_config.query_parameters = [PARAM] + + with self.assertRaises(BadRequest): + Config.CLIENT.query( + QUERY, job_id=JOB_ID, job_config=job_config).result() + + job = Config.CLIENT.get_job(JOB_ID) + + with self.assertRaises(ValueError): + job.query_parameters + def test_query_w_legacy_sql_types(self): naive = datetime.datetime(2016, 12, 5, 12, 41, 9) stamp = '%s %s' % (naive.date().isoformat(), naive.time().isoformat()) diff --git a/packages/google-cloud-bigquery/tests/unit/test_job.py b/packages/google-cloud-bigquery/tests/unit/test_job.py index a68ead91e5fd..83d00f819a11 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_job.py +++ b/packages/google-cloud-bigquery/tests/unit/test_job.py @@ -1684,6 +1684,7 @@ def test_from_api_repr_empty(self): self.assertIsNone(config.dry_run) self.assertIsNone(config.use_legacy_sql) self.assertIsNone(config.default_dataset) + self.assertIsNone(config.destination) self.assertIsNone(config.destination_encryption_configuration) def test_from_api_repr_normal(self): From 426bad8662eefd57f62ba8f4b132272c4c48b724 Mon Sep 17 00:00:00 2001 From: chemelnucfin Date: Wed, 14 Feb 2018 11:14:14 -0800 Subject: [PATCH 0391/2016] Use constant strings for job properties in tests (#4833) --- .../google-cloud-bigquery/tests/system.py | 57 +++++++++----- .../tests/unit/test_client.py | 55 +++++++++----- .../tests/unit/test_job.py | 75 ++++++++++++------- 3 files changed, 119 insertions(+), 68 deletions(-) diff --git a/packages/google-cloud-bigquery/tests/system.py b/packages/google-cloud-bigquery/tests/system.py index f74e35a4bc55..e96dd9458926 100644 --- a/packages/google-cloud-bigquery/tests/system.py +++ b/packages/google-cloud-bigquery/tests/system.py @@ -394,6 +394,9 @@ def test_insert_rows_then_dump_table(self): def test_load_table_from_local_file_then_dump_table(self): from google.cloud._testing import _NamedTemporaryFile + from google.cloud.bigquery.job import CreateDisposition + from google.cloud.bigquery.job import SourceFormat + from google.cloud.bigquery.job import WriteDisposition TABLE_NAME = 'test_table' @@ -411,10 +414,10 @@ def test_load_table_from_local_file_then_dump_table(self): with open(temp.name, 'rb') as csv_read: config = bigquery.LoadJobConfig() - config.source_format = 'CSV' + config.source_format = SourceFormat.CSV config.skip_leading_rows = 1 - config.create_disposition = 'CREATE_NEVER' - config.write_disposition = 'WRITE_EMPTY' + config.create_disposition = CreateDisposition.CREATE_NEVER + config.write_disposition = WriteDisposition.WRITE_EMPTY config.schema = table.schema job = Config.CLIENT.load_table_from_file( csv_read, table_ref, job_config=config) @@ -431,6 +434,8 @@ def test_load_table_from_local_file_then_dump_table(self): sorted(ROWS, key=by_age)) def test_load_table_from_local_avro_file_then_dump_table(self): + from google.cloud.bigquery.job import SourceFormat + from google.cloud.bigquery.job import WriteDisposition TABLE_NAME = 'test_table_avro' ROWS = [ ("violet", 400), @@ -448,8 +453,8 @@ def test_load_table_from_local_avro_file_then_dump_table(self): with open(os.path.join(WHERE, 'data', 'colors.avro'), 'rb') as avrof: config = bigquery.LoadJobConfig() - config.source_format = 'AVRO' - config.write_disposition = 'WRITE_TRUNCATE' + config.source_format = SourceFormat.AVRO + config.write_disposition = WriteDisposition.WRITE_TRUNCATE job = Config.CLIENT.load_table_from_file( avrof, table_ref, job_config=config) # Retry until done. @@ -465,6 +470,9 @@ def test_load_table_from_local_avro_file_then_dump_table(self): sorted(ROWS, key=by_wavelength)) def test_load_avro_from_uri_then_dump_table(self): + from google.cloud.bigquery.job import CreateDisposition + from google.cloud.bigquery.job import SourceFormat + from google.cloud.bigquery.job import WriteDisposition table_name = 'test_table' rows = [ ("violet", 400), @@ -485,9 +493,9 @@ def test_load_avro_from_uri_then_dump_table(self): self.to_delete.insert(0, table) config = bigquery.LoadJobConfig() - config.create_disposition = 'CREATE_NEVER' - config.source_format = 'AVRO' - config.write_disposition = 'WRITE_EMPTY' + config.create_disposition = CreateDisposition.CREATE_NEVER + config.source_format = SourceFormat.AVRO + config.write_disposition = WriteDisposition.WRITE_EMPTY job = Config.CLIENT.load_table_from_uri( GS_URL, table_arg, job_config=config) job.result(timeout=JOB_TIMEOUT) @@ -500,6 +508,10 @@ def test_load_avro_from_uri_then_dump_table(self): sorted(rows, key=lambda x: x[1])) def test_load_table_from_uri_then_dump_table(self): + from google.cloud.bigquery.job import CreateDisposition + from google.cloud.bigquery.job import SourceFormat + from google.cloud.bigquery.job import WriteDisposition + TABLE_ID = 'test_table' GS_URL = self._write_csv_to_storage( 'bq_load_test' + unique_resource_id(), 'person_ages.csv', @@ -512,10 +524,10 @@ def test_load_table_from_uri_then_dump_table(self): self.to_delete.insert(0, table) config = bigquery.LoadJobConfig() - config.create_disposition = 'CREATE_NEVER' + config.create_disposition = CreateDisposition.CREATE_NEVER config.skip_leading_rows = 1 - config.source_format = 'CSV' - config.write_disposition = 'WRITE_EMPTY' + config.source_format = SourceFormat.CSV + config.write_disposition = WriteDisposition.WRITE_EMPTY job = Config.CLIENT.load_table_from_uri( GS_URL, dataset.table(TABLE_ID), job_config=config) @@ -674,6 +686,7 @@ def test_extract_table(self): def test_extract_table_w_job_config(self): from google.cloud.storage import Client as StorageClient + from google.cloud.bigquery.job import DestinationFormat storage_client = StorageClient() local_id = unique_resource_id() @@ -691,10 +704,10 @@ def test_extract_table_w_job_config(self): destination = bucket.blob(destination_blob_name) destination_uri = 'gs://{}/person_ages_out.csv'.format(bucket_name) - job_config = bigquery.ExtractJobConfig() - job_config.destination_format = 'NEWLINE_DELIMITED_JSON' + config = bigquery.ExtractJobConfig() + config.destination_format = DestinationFormat.NEWLINE_DELIMITED_JSON job = Config.CLIENT.extract_table( - table, destination_uri, job_config=job_config) + table, destination_uri, job_config=config) job.result() self.to_delete.insert(0, destination) @@ -947,6 +960,9 @@ def test_dbapi_fetchall(self): def _load_table_for_dml(self, rows, dataset_id, table_id): from google.cloud._testing import _NamedTemporaryFile + from google.cloud.bigquery.job import CreateDisposition + from google.cloud.bigquery.job import SourceFormat + from google.cloud.bigquery.job import WriteDisposition dataset = self.temp_dataset(dataset_id) greeting = bigquery.SchemaField( @@ -964,10 +980,10 @@ def _load_table_for_dml(self, rows, dataset_id, table_id): with open(temp.name, 'rb') as csv_read: config = bigquery.LoadJobConfig() - config.source_format = 'CSV' + config.source_format = SourceFormat.CSV config.skip_leading_rows = 1 - config.create_disposition = 'CREATE_NEVER' - config.write_disposition = 'WRITE_EMPTY' + config.create_disposition = CreateDisposition.CREATE_NEVER + config.write_disposition = WriteDisposition.WRITE_EMPTY job = Config.CLIENT.load_table_from_file( csv_read, table_ref, job_config=config) @@ -1540,6 +1556,9 @@ def _fetch_dataframe(self, query): @unittest.skipIf(pandas is None, 'Requires `pandas`') def test_nested_table_to_dataframe(self): + from google.cloud.bigquery.job import SourceFormat + from google.cloud.bigquery.job import WriteDisposition + SF = bigquery.SchemaField schema = [ SF('string_col', 'STRING', mode='NULLABLE'), @@ -1566,8 +1585,8 @@ def test_nested_table_to_dataframe(self): table = dataset.table(table_id) self.to_delete.insert(0, table) job_config = bigquery.LoadJobConfig() - job_config.write_disposition = 'WRITE_TRUNCATE' - job_config.source_format = 'NEWLINE_DELIMITED_JSON' + job_config.write_disposition = WriteDisposition.WRITE_TRUNCATE + job_config.source_format = SourceFormat.NEWLINE_DELIMITED_JSON job_config.schema = schema # Load a table using a local JSON file from memory. Config.CLIENT.load_table_from_file( diff --git a/packages/google-cloud-bigquery/tests/unit/test_client.py b/packages/google-cloud-bigquery/tests/unit/test_client.py index 63c47757727a..9a0b24ad4bfe 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_client.py +++ b/packages/google-cloud-bigquery/tests/unit/test_client.py @@ -606,8 +606,9 @@ def test_create_table_w_schema_and_query(self): self.assertEqual(got.view_query, query) def test_create_table_w_external(self): - from google.cloud.bigquery.table import Table from google.cloud.bigquery.external_config import ExternalConfig + from google.cloud.bigquery.job import SourceFormat + from google.cloud.bigquery.table import Table path = 'projects/%s/datasets/%s/tables' % ( self.PROJECT, self.DS_ID) @@ -621,7 +622,7 @@ def test_create_table_w_external(self): 'tableId': self.TABLE_ID }, 'externalDataConfiguration': { - 'sourceFormat': 'CSV', + 'sourceFormat': SourceFormat.CSV, 'autodetect': True, }, } @@ -644,7 +645,7 @@ def test_create_table_w_external(self): 'tableId': self.TABLE_ID, }, 'externalDataConfiguration': { - 'sourceFormat': 'CSV', + 'sourceFormat': SourceFormat.CSV, 'autodetect': True, }, 'labels': {}, @@ -653,7 +654,8 @@ def test_create_table_w_external(self): self.assertEqual(got.table_id, self.TABLE_ID) self.assertEqual(got.project, self.PROJECT) self.assertEqual(got.dataset_id, self.DS_ID) - self.assertEqual(got.external_data_configuration.source_format, 'CSV') + self.assertEqual(got.external_data_configuration.source_format, + SourceFormat.CSV) self.assertEqual(got.external_data_configuration.autodetect, True) def test_get_table(self): @@ -1228,7 +1230,9 @@ def test_get_job_miss_w_explict_project(self): self.assertEqual(req['query_params'], {'projection': 'full'}) def test_get_job_hit(self): + from google.cloud.bigquery.job import CreateDisposition from google.cloud.bigquery.job import QueryJob + from google.cloud.bigquery.job import WriteDisposition JOB_ID = 'query_job' QUERY_DESTINATION_TABLE = 'query_destination_table' @@ -1248,8 +1252,8 @@ def test_get_job_hit(self): 'datasetId': self.DS_ID, 'tableId': QUERY_DESTINATION_TABLE, }, - 'createDisposition': 'CREATE_IF_NEEDED', - 'writeDisposition': 'WRITE_TRUNCATE', + 'createDisposition': CreateDisposition.CREATE_IF_NEEDED, + 'writeDisposition': WriteDisposition.WRITE_TRUNCATE, } }, } @@ -1261,8 +1265,10 @@ def test_get_job_hit(self): self.assertIsInstance(job, QueryJob) self.assertEqual(job.job_id, JOB_ID) - self.assertEqual(job.create_disposition, 'CREATE_IF_NEEDED') - self.assertEqual(job.write_disposition, 'WRITE_TRUNCATE') + self.assertEqual(job.create_disposition, + CreateDisposition.CREATE_IF_NEEDED) + self.assertEqual(job.write_disposition, + WriteDisposition.WRITE_TRUNCATE) self.assertEqual(len(conn._requested), 1) req = conn._requested[0] @@ -1328,10 +1334,12 @@ def test_cancel_job_hit(self): self.assertEqual(req['query_params'], {'projection': 'full'}) def test_list_jobs_defaults(self): - from google.cloud.bigquery.job import LoadJob from google.cloud.bigquery.job import CopyJob + from google.cloud.bigquery.job import CreateDisposition from google.cloud.bigquery.job import ExtractJob + from google.cloud.bigquery.job import LoadJob from google.cloud.bigquery.job import QueryJob + from google.cloud.bigquery.job import WriteDisposition SOURCE_TABLE = 'source_table' DESTINATION_TABLE = 'destination_table' @@ -1362,8 +1370,8 @@ def test_list_jobs_defaults(self): 'datasetId': self.DS_ID, 'tableId': QUERY_DESTINATION_TABLE, }, - 'createDisposition': 'CREATE_IF_NEEDED', - 'writeDisposition': 'WRITE_TRUNCATE', + 'createDisposition': CreateDisposition.CREATE_IF_NEEDED, + 'writeDisposition': WriteDisposition.WRITE_TRUNCATE, } }, } @@ -1608,7 +1616,9 @@ def _initiate_resumable_upload_helper(self, num_retries=None): from google.cloud.bigquery.client import _DEFAULT_CHUNKSIZE from google.cloud.bigquery.client import _GENERIC_CONTENT_TYPE from google.cloud.bigquery.client import _get_upload_headers - from google.cloud.bigquery.job import LoadJob, LoadJobConfig + from google.cloud.bigquery.job import LoadJob + from google.cloud.bigquery.job import LoadJobConfig + from google.cloud.bigquery.job import SourceFormat # Create mocks to be checked for doing transport. resumable_url = 'http://test.invalid?upload_id=hey-you' @@ -1622,7 +1632,7 @@ def _initiate_resumable_upload_helper(self, num_retries=None): data = b'goodbye gudbi gootbee' stream = io.BytesIO(data) config = LoadJobConfig() - config.source_format = 'CSV' + config.source_format = SourceFormat.CSV job = LoadJob(None, None, self.TABLE_REF, client, job_config=config) metadata = job._build_resource() upload, transport = client._initiate_resumable_upload( @@ -1675,7 +1685,9 @@ def test__initiate_resumable_upload_with_retry(self): def _do_multipart_upload_success_helper( self, get_boundary, num_retries=None): from google.cloud.bigquery.client import _get_upload_headers - from google.cloud.bigquery.job import LoadJob, LoadJobConfig + from google.cloud.bigquery.job import LoadJob + from google.cloud.bigquery.job import LoadJobConfig + from google.cloud.bigquery.job import SourceFormat fake_transport = self._mock_transport(http_client.OK, {}) client = self._make_one(project=self.PROJECT, _http=fake_transport) @@ -1685,7 +1697,7 @@ def _do_multipart_upload_success_helper( data = b'Bzzzz-zap \x00\x01\xf4' stream = io.BytesIO(data) config = LoadJobConfig() - config.source_format = 'CSV' + config.source_format = SourceFormat.CSV job = LoadJob(None, None, self.TABLE_REF, client, job_config=config) metadata = job._build_resource() size = len(data) @@ -2838,7 +2850,7 @@ class TestClientUpload(object): # NOTE: This is a "partner" to `TestClient` meant to test some of the # "load_table_from_file" portions of `Client`. It also uses # `pytest`-style tests rather than `unittest`-style. - + from google.cloud.bigquery.job import SourceFormat TABLE_REF = DatasetReference( 'project_id', 'test_dataset').table('test_table') @@ -2881,7 +2893,7 @@ def _make_do_upload_patch(cls, client, method, 'jobReference': {'projectId': 'project_id', 'jobId': 'job_id'}, 'configuration': { 'load': { - 'sourceFormat': 'CSV', + 'sourceFormat': SourceFormat.CSV, 'destinationTable': { 'projectId': 'project_id', 'datasetId': 'test_dataset', @@ -2898,9 +2910,10 @@ def _make_file_obj(): @staticmethod def _make_config(): from google.cloud.bigquery.job import LoadJobConfig + from google.cloud.bigquery.job import SourceFormat config = LoadJobConfig() - config.source_format = 'CSV' + config.source_format = SourceFormat.CSV return config # High-level tests @@ -2925,6 +2938,8 @@ def test_load_table_from_file_resumable(self): def test_load_table_from_file_resumable_metadata(self): from google.cloud.bigquery.client import _DEFAULT_NUM_RETRIES + from google.cloud.bigquery.job import CreateDisposition + from google.cloud.bigquery.job import WriteDisposition client = self._make_client() file_obj = self._make_file_obj() @@ -2932,14 +2947,14 @@ def test_load_table_from_file_resumable_metadata(self): config = self._make_config() config.allow_jagged_rows = False config.allow_quoted_newlines = False - config.create_disposition = 'CREATE_IF_NEEDED' + config.create_disposition = CreateDisposition.CREATE_IF_NEEDED config.encoding = 'utf8' config.field_delimiter = ',' config.ignore_unknown_values = False config.max_bad_records = 0 config.quote_character = '"' config.skip_leading_rows = 1 - config.write_disposition = 'WRITE_APPEND' + config.write_disposition = WriteDisposition.WRITE_APPEND config.null_marker = r'\N' expected_config = { diff --git a/packages/google-cloud-bigquery/tests/unit/test_job.py b/packages/google-cloud-bigquery/tests/unit/test_job.py index 83d00f819a11..2c63f94b5ab8 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_job.py +++ b/packages/google-cloud-bigquery/tests/unit/test_job.py @@ -20,8 +20,8 @@ import pandas except (ImportError, AttributeError): # pragma: NO COVER pandas = None - -from google.cloud.bigquery.job import ExtractJobConfig, CopyJobConfig +from google.cloud.bigquery.job import CopyJobConfig +from google.cloud.bigquery.job import ExtractJobConfig from google.cloud.bigquery.job import LoadJobConfig from google.cloud.bigquery.dataset import DatasetReference from google.cloud.bigquery.table import EncryptionConfiguration @@ -639,10 +639,12 @@ def test_from_api_with_encryption(self): self._verifyResourceProperties(job, RESOURCE) def test_from_api_repr_w_properties(self): + from google.cloud.bigquery.job import CreateDisposition + client = _make_client(project=self.PROJECT) RESOURCE = self._make_resource() load_config = RESOURCE['configuration']['load'] - load_config['createDisposition'] = 'CREATE_IF_NEEDED' + load_config['createDisposition'] = CreateDisposition.CREATE_IF_NEEDED klass = self._get_target_class() job = klass.from_api_repr(RESOURCE, client=client) self.assertIs(job._client, client) @@ -739,6 +741,8 @@ def test_begin_w_autodetect(self): self._verifyResourceProperties(job, resource) def test_begin_w_alternate_client(self): + from google.cloud.bigquery.job import CreateDisposition + from google.cloud.bigquery.job import WriteDisposition from google.cloud.bigquery.schema import SchemaField PATH = '/projects/%s/jobs' % (self.PROJECT,) @@ -752,7 +756,7 @@ def test_begin_w_alternate_client(self): }, 'allowJaggedRows': True, 'allowQuotedNewlines': True, - 'createDisposition': 'CREATE_NEVER', + 'createDisposition': CreateDisposition.CREATE_NEVER, 'encoding': 'ISO-8559-1', 'fieldDelimiter': '|', 'ignoreUnknownValues': True, @@ -761,7 +765,7 @@ def test_begin_w_alternate_client(self): 'quote': "'", 'skipLeadingRows': '1', 'sourceFormat': 'CSV', - 'writeDisposition': 'WRITE_TRUNCATE', + 'writeDisposition': WriteDisposition.WRITE_TRUNCATE, 'schema': {'fields': [ {'name': 'full_name', 'type': 'STRING', 'mode': 'REQUIRED'}, {'name': 'age', 'type': 'INTEGER', 'mode': 'REQUIRED'}, @@ -780,7 +784,7 @@ def test_begin_w_alternate_client(self): client1, config) config.allow_jagged_rows = True config.allow_quoted_newlines = True - config.create_disposition = 'CREATE_NEVER' + config.create_disposition = CreateDisposition.CREATE_NEVER config.encoding = 'ISO-8559-1' config.field_delimiter = '|' config.ignore_unknown_values = True @@ -789,7 +793,7 @@ def test_begin_w_alternate_client(self): config.quote_character = "'" config.skip_leading_rows = 1 config.source_format = 'CSV' - config.write_disposition = 'WRITE_TRUNCATE' + config.write_disposition = WriteDisposition.WRITE_TRUNCATE job._begin(client=client2) @@ -1172,10 +1176,11 @@ def test_from_api_repr_wo_sources(self): klass.from_api_repr(RESOURCE, client=client) def test_from_api_repr_w_properties(self): + from google.cloud.bigquery.job import CreateDisposition client = _make_client(project=self.PROJECT) RESOURCE = self._make_resource() copy_config = RESOURCE['configuration']['copy'] - copy_config['createDisposition'] = 'CREATE_IF_NEEDED' + copy_config['createDisposition'] = CreateDisposition.CREATE_IF_NEEDED klass = self._get_target_class() job = klass.from_api_repr(RESOURCE, client=client) self.assertIs(job._client, client) @@ -1225,6 +1230,8 @@ def test_begin_w_bound_client(self): self._verifyResourceProperties(job, RESOURCE) def test_begin_w_alternate_client(self): + from google.cloud.bigquery.job import CreateDisposition + from google.cloud.bigquery.job import WriteDisposition PATH = '/projects/%s/jobs' % (self.PROJECT,) RESOURCE = self._make_resource(ended=True) COPY_CONFIGURATION = { @@ -1238,8 +1245,8 @@ def test_begin_w_alternate_client(self): 'datasetId': self.DS_ID, 'tableId': self.DESTINATION_TABLE, }, - 'createDisposition': 'CREATE_NEVER', - 'writeDisposition': 'WRITE_TRUNCATE', + 'createDisposition': CreateDisposition.CREATE_NEVER, + 'writeDisposition': WriteDisposition.WRITE_TRUNCATE, } RESOURCE['configuration']['copy'] = COPY_CONFIGURATION conn1 = _Connection() @@ -1249,8 +1256,8 @@ def test_begin_w_alternate_client(self): source = self._table_ref(self.SOURCE_TABLE) destination = self._table_ref(self.DESTINATION_TABLE) config = CopyJobConfig() - config.create_disposition = 'CREATE_NEVER' - config.write_disposition = 'WRITE_TRUNCATE' + config.create_disposition = CreateDisposition.CREATE_NEVER + config.write_disposition = WriteDisposition.WRITE_TRUNCATE job = self._make_one(self.JOB_ID, [source], destination, client1, config) job._begin(client=client2) @@ -1491,10 +1498,11 @@ def test_from_api_repr_bare(self): self._verifyResourceProperties(job, RESOURCE) def test_from_api_repr_w_properties(self): + from google.cloud.bigquery.job import Compression client = _make_client(project=self.PROJECT) RESOURCE = self._make_resource() extract_config = RESOURCE['configuration']['extract'] - extract_config['compression'] = 'GZIP' + extract_config['compression'] = Compression.GZIP klass = self._get_target_class() job = klass.from_api_repr(RESOURCE, client=client) self.assertIs(job._client, client) @@ -1541,6 +1549,9 @@ def test_begin_w_bound_client(self): self._verifyResourceProperties(job, RESOURCE) def test_begin_w_alternate_client(self): + from google.cloud.bigquery.job import Compression + from google.cloud.bigquery.job import DestinationFormat + PATH = '/projects/%s/jobs' % (self.PROJECT,) RESOURCE = self._make_resource(ended=True) EXTRACT_CONFIGURATION = { @@ -1550,8 +1561,8 @@ def test_begin_w_alternate_client(self): 'tableId': self.SOURCE_TABLE, }, 'destinationUris': [self.DESTINATION_URI], - 'compression': 'GZIP', - 'destinationFormat': 'NEWLINE_DELIMITED_JSON', + 'compression': Compression.GZIP, + 'destinationFormat': DestinationFormat.NEWLINE_DELIMITED_JSON, 'fieldDelimiter': '|', 'printHeader': False, } @@ -1562,13 +1573,13 @@ def test_begin_w_alternate_client(self): client2 = _make_client(project=self.PROJECT, connection=conn2) source_dataset = DatasetReference(self.PROJECT, self.DS_ID) source = source_dataset.table(self.SOURCE_TABLE) - job_config = ExtractJobConfig() - job_config.compression = 'GZIP' - job_config.destination_format = 'NEWLINE_DELIMITED_JSON' - job_config.field_delimiter = '|' - job_config.print_header = False + config = ExtractJobConfig() + config.compression = Compression.GZIP + config.destination_format = DestinationFormat.NEWLINE_DELIMITED_JSON + config.field_delimiter = '|' + config.print_header = False job = self._make_one(self.JOB_ID, source, [self.DESTINATION_URI], - client1, job_config) + client1, config) job._begin(client=client2) @@ -2031,11 +2042,14 @@ def test_from_api_repr_with_encryption(self): self._verifyResourceProperties(job, RESOURCE) def test_from_api_repr_w_properties(self): + from google.cloud.bigquery.job import CreateDisposition + from google.cloud.bigquery.job import WriteDisposition + client = _make_client(project=self.PROJECT) RESOURCE = self._make_resource() query_config = RESOURCE['configuration']['query'] - query_config['createDisposition'] = 'CREATE_IF_NEEDED' - query_config['writeDisposition'] = 'WRITE_TRUNCATE' + query_config['createDisposition'] = CreateDisposition.CREATE_IF_NEEDED + query_config['writeDisposition'] = WriteDisposition.WRITE_TRUNCATE query_config['destinationTable'] = { 'projectId': self.PROJECT, 'datasetId': self.DS_ID, @@ -2482,7 +2496,10 @@ def test_begin_w_bound_client(self): def test_begin_w_alternate_client(self): from google.cloud.bigquery.dataset import DatasetReference + from google.cloud.bigquery.job import CreateDisposition from google.cloud.bigquery.job import QueryJobConfig + from google.cloud.bigquery.job import QueryPriority + from google.cloud.bigquery.job import WriteDisposition PATH = '/projects/%s/jobs' % (self.PROJECT,) TABLE = 'TABLE' @@ -2491,7 +2508,7 @@ def test_begin_w_alternate_client(self): QUERY_CONFIGURATION = { 'query': self.QUERY, 'allowLargeResults': True, - 'createDisposition': 'CREATE_NEVER', + 'createDisposition': CreateDisposition.CREATE_NEVER, 'defaultDataset': { 'projectId': self.PROJECT, 'datasetId': DS_ID, @@ -2502,10 +2519,10 @@ def test_begin_w_alternate_client(self): 'tableId': TABLE, }, 'flattenResults': True, - 'priority': 'INTERACTIVE', + 'priority': QueryPriority.INTERACTIVE, 'useQueryCache': True, 'useLegacySql': True, - 'writeDisposition': 'WRITE_TRUNCATE', + 'writeDisposition': WriteDisposition.WRITE_TRUNCATE, 'maximumBillingTier': 4, 'maximumBytesBilled': '123456' } @@ -2520,16 +2537,16 @@ def test_begin_w_alternate_client(self): config = QueryJobConfig() config.allow_large_results = True - config.create_disposition = 'CREATE_NEVER' + config.create_disposition = CreateDisposition.CREATE_NEVER config.default_dataset = dataset_ref config.destination = table_ref config.dry_run = True config.flatten_results = True config.maximum_billing_tier = 4 - config.priority = 'INTERACTIVE' + config.priority = QueryPriority.INTERACTIVE config.use_legacy_sql = True config.use_query_cache = True - config.write_disposition = 'WRITE_TRUNCATE' + config.write_disposition = WriteDisposition.WRITE_TRUNCATE config.maximum_bytes_billed = 123456 job = self._make_one( self.JOB_ID, self.QUERY, client1, job_config=config) From 84c5968e2acc6ec464f17409fda86dd83849a242 Mon Sep 17 00:00:00 2001 From: Clayton Mellina Date: Tue, 20 Feb 2018 15:19:10 -0800 Subject: [PATCH 0392/2016] Update format in `Table.full_table_id` and `TableListItem.full_table_id` docstrings. (#4906) --- .../google/cloud/bigquery/table.py | 4 ++-- .../tests/unit/test_table.py | 16 ++++++++-------- 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py index bc2ec4a931e2..cf7881f3af13 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py @@ -450,7 +450,7 @@ def self_link(self): @property def full_table_id(self): - """ID for the table, in the form ``project_id:dataset_id:table_id``. + """ID for the table, in the form ``project_id:dataset_id.table_id``. :rtype: str, or ``NoneType`` :returns: the full ID (None until set from the server). @@ -908,7 +908,7 @@ def labels(self): @property def full_table_id(self): - """ID for the table, in the form ``project_id:dataset_id:table_id``. + """ID for the table, in the form ``project_id:dataset_id.table_id``. Returns: str: The fully-qualified ID of the table diff --git a/packages/google-cloud-bigquery/tests/unit/test_table.py b/packages/google-cloud-bigquery/tests/unit/test_table.py index c1444ec6c084..4d5eb0f3a2a7 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_table.py +++ b/packages/google-cloud-bigquery/tests/unit/test_table.py @@ -221,7 +221,7 @@ def _setUpConstants(self): self.WHEN = datetime.datetime.utcfromtimestamp(self.WHEN_TS).replace( tzinfo=UTC) self.ETAG = 'ETAG' - self.TABLE_FULL_ID = '%s:%s:%s' % ( + self.TABLE_FULL_ID = '%s:%s.%s' % ( self.PROJECT, self.DS_ID, self.TABLE_NAME) self.RESOURCE_URL = 'http://example.com/path/to/resource' self.NUM_BYTES = 12345 @@ -469,7 +469,7 @@ def test_props_set_by_server(self): CREATED = datetime.datetime(2015, 7, 29, 12, 13, 22, tzinfo=UTC) MODIFIED = datetime.datetime(2015, 7, 29, 14, 47, 15, tzinfo=UTC) - TABLE_FULL_ID = '%s:%s:%s' % ( + TABLE_FULL_ID = '%s:%s.%s' % ( self.PROJECT, self.DS_ID, self.TABLE_NAME) URL = 'http://example.com/projects/%s/datasets/%s/tables/%s' % ( self.PROJECT, self.DS_ID, self.TABLE_NAME) @@ -621,7 +621,7 @@ def test_from_api_repr_missing_identity(self): def test_from_api_repr_bare(self): self._setUpConstants() RESOURCE = { - 'id': '%s:%s:%s' % (self.PROJECT, self.DS_ID, self.TABLE_NAME), + 'id': '%s:%s.%s' % (self.PROJECT, self.DS_ID, self.TABLE_NAME), 'tableReference': { 'projectId': self.PROJECT, 'datasetId': self.DS_ID, @@ -652,7 +652,7 @@ def test_from_api_repr_w_properties(self): def test_from_api_with_encryption(self): self._setUpConstants() RESOURCE = { - 'id': '%s:%s:%s' % (self.PROJECT, self.DS_ID, self.TABLE_NAME), + 'id': '%s:%s.%s' % (self.PROJECT, self.DS_ID, self.TABLE_NAME), 'tableReference': { 'projectId': self.PROJECT, 'datasetId': self.DS_ID, @@ -864,7 +864,7 @@ def test_ctor(self): table_id = 'coffee_table' resource = { 'kind': 'bigquery#table', - 'id': '{}:{}:{}'.format(project, dataset_id, table_id), + 'id': '{}:{}.{}'.format(project, dataset_id, table_id), 'tableReference': { 'projectId': project, 'datasetId': dataset_id, @@ -887,7 +887,7 @@ def test_ctor(self): self.assertEqual(table.table_id, table_id) self.assertEqual( table.full_table_id, - '{}:{}:{}'.format(project, dataset_id, table_id)) + '{}:{}.{}'.format(project, dataset_id, table_id)) self.assertEqual(table.reference.project, project) self.assertEqual(table.reference.dataset_id, dataset_id) self.assertEqual(table.reference.table_id, table_id) @@ -904,7 +904,7 @@ def test_ctor_view(self): table_id = 'just_looking' resource = { 'kind': 'bigquery#table', - 'id': '{}:{}:{}'.format(project, dataset_id, table_id), + 'id': '{}:{}.{}'.format(project, dataset_id, table_id), 'tableReference': { 'projectId': project, 'datasetId': dataset_id, @@ -919,7 +919,7 @@ def test_ctor_view(self): self.assertEqual(table.table_id, table_id) self.assertEqual( table.full_table_id, - '{}:{}:{}'.format(project, dataset_id, table_id)) + '{}:{}.{}'.format(project, dataset_id, table_id)) self.assertEqual(table.reference.project, project) self.assertEqual(table.reference.dataset_id, dataset_id) self.assertEqual(table.reference.table_id, table_id) From 42e3968132699dba02bb8842986262754e32f264 Mon Sep 17 00:00:00 2001 From: Alix Hamilton Date: Wed, 21 Feb 2018 09:17:42 -0800 Subject: [PATCH 0393/2016] Remove unnecessary debug print from tests (#4907) --- packages/google-cloud-bigquery/tests/unit/test_table.py | 1 - 1 file changed, 1 deletion(-) diff --git a/packages/google-cloud-bigquery/tests/unit/test_table.py b/packages/google-cloud-bigquery/tests/unit/test_table.py index 4d5eb0f3a2a7..76b8be291337 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_table.py +++ b/packages/google-cloud-bigquery/tests/unit/test_table.py @@ -1058,7 +1058,6 @@ def test_iterate(self): rows_iter = iter(row_iterator) val1 = six.next(rows_iter) - print(val1) self.assertEqual(val1.name, 'Phred Phlyntstone') self.assertEqual(row_iterator.num_results, 1) From 6724c37444d6ec86e37d6d4c951231633ba00881 Mon Sep 17 00:00:00 2001 From: Jon Wayne Parrott Date: Thu, 22 Feb 2018 10:28:50 -0800 Subject: [PATCH 0394/2016] Normalize all setup.py files (#4909) --- packages/google-cloud-bigquery/setup.py | 109 +++++++++++++----------- 1 file changed, 61 insertions(+), 48 deletions(-) diff --git a/packages/google-cloud-bigquery/setup.py b/packages/google-cloud-bigquery/setup.py index d84ed3d8cf2c..6a41c7a4c2eb 100644 --- a/packages/google-cloud-bigquery/setup.py +++ b/packages/google-cloud-bigquery/setup.py @@ -1,4 +1,4 @@ -# Copyright 2016 Google LLC +# Copyright 2018 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -12,67 +12,80 @@ # See the License for the specific language governing permissions and # limitations under the License. +import io import os -from setuptools import find_packages -from setuptools import setup +import setuptools -PACKAGE_ROOT = os.path.abspath(os.path.dirname(__file__)) +# Package metadata. -with open(os.path.join(PACKAGE_ROOT, 'README.rst')) as file_obj: - README = file_obj.read() +name = 'google-cloud-bigquery' +description = 'Google BigQuery API client library' +version = '0.30.0' +# Should be one of: +# 'Development Status :: 3 - Alpha' +# 'Development Status :: 4 - Beta' +# 'Development Status :: 5 - Stable' +release_status = 'Development Status :: 4 - Beta' +dependencies = [ + 'google-cloud-core<0.29dev,>=0.28.0', + 'google-api-core<0.2.0dev,>=0.1.1', + 'google-resumable-media>=0.2.1', +] +extras = { + 'pandas': 'pandas>=0.17.1', +} + + +# Setup boilerplate below this line. + +package_root = os.path.abspath(os.path.dirname(__file__)) -# NOTE: This is duplicated throughout and we should try to -# consolidate. -SETUP_BASE = { - 'author': 'Google Cloud Platform', - 'author_email': 'googleapis-publisher@google.com', - 'scripts': [], - 'url': 'https://github.com/GoogleCloudPlatform/google-cloud-python', - 'license': 'Apache 2.0', - 'platforms': 'Posix; MacOS X; Windows', - 'include_package_data': True, - 'zip_safe': False, - 'classifiers': [ - 'Development Status :: 4 - Beta', +readme_filename = os.path.join(package_root, 'README.rst') +with io.open(readme_filename, encoding='utf-8') as readme_file: + readme = readme_file.read() + +# Only include packages under the 'google' namespace. Do not include tests, +# benchmarks, etc. +packages = [ + package for package in setuptools.find_packages() + if package.startswith('google')] + +# Determine which namespaces are needed. +namespaces = ['google'] +if 'google.cloud' in packages: + namespaces.append('google.cloud') + + +setuptools.setup( + name=name, + version=version, + description=description, + long_description=readme, + author='Google LLC', + author_email='googleapis-packages@google.com', + license='Apache 2.0', + url='https://github.com/GoogleCloudPlatform/google-cloud-python', + classifiers=[ + release_status, 'Intended Audience :: Developers', 'License :: OSI Approved :: Apache Software License', - 'Operating System :: OS Independent', + 'Programming Language :: Python', 'Programming Language :: Python :: 2', 'Programming Language :: Python :: 2.7', 'Programming Language :: Python :: 3', 'Programming Language :: Python :: 3.4', 'Programming Language :: Python :: 3.5', 'Programming Language :: Python :: 3.6', + 'Operating System :: OS Independent', 'Topic :: Internet', ], -} - - -REQUIREMENTS = [ - 'google-cloud-core >= 0.28.0, < 0.29dev', - 'google-api-core >= 0.1.1, < 0.2.0dev', - 'google-auth >= 1.0.0', - 'google-resumable-media >= 0.2.1', - 'requests >= 2.18.0', -] - -EXTRAS_REQUIREMENTS = { - 'pandas': ['pandas >= 0.17.1'], -} - -setup( - name='google-cloud-bigquery', - version='0.30.0', - description='Python Client for Google BigQuery', - long_description=README, - namespace_packages=[ - 'google', - 'google.cloud', - ], - packages=find_packages(exclude=('tests*',)), - install_requires=REQUIREMENTS, - extras_require=EXTRAS_REQUIREMENTS, - **SETUP_BASE + platforms='Posix; MacOS X; Windows', + packages=packages, + namespace_packages=namespaces, + install_requires=dependencies, + extras_require=extras, + include_package_data=True, + zip_safe=False, ) From 2d40ca2af2e84c7a993045f5fec6e2abcbc60d45 Mon Sep 17 00:00:00 2001 From: Jon Wayne Parrott Date: Fri, 23 Feb 2018 16:34:37 -0800 Subject: [PATCH 0395/2016] Re-enable lint for tests, remove usage of pylint (#4921) --- packages/google-cloud-bigquery/.flake8 | 6 ++++++ packages/google-cloud-bigquery/nox.py | 9 +-------- 2 files changed, 7 insertions(+), 8 deletions(-) diff --git a/packages/google-cloud-bigquery/.flake8 b/packages/google-cloud-bigquery/.flake8 index 25168dc87605..1f44a90f8195 100644 --- a/packages/google-cloud-bigquery/.flake8 +++ b/packages/google-cloud-bigquery/.flake8 @@ -1,5 +1,11 @@ [flake8] exclude = + # Exclude generated code. + **/proto/** + **/gapic/** + *_pb2.py + + # Standard linting exemptions. __pycache__, .git, *.pyc, diff --git a/packages/google-cloud-bigquery/nox.py b/packages/google-cloud-bigquery/nox.py index f459be3530d8..6ef96c4bdc35 100644 --- a/packages/google-cloud-bigquery/nox.py +++ b/packages/google-cloud-bigquery/nox.py @@ -145,19 +145,12 @@ def lint(session): """ session.interpreter = 'python3.6' - session.install('flake8', 'pylint', 'gcp-devrel-py-tools', *LOCAL_DEPS) + session.install('flake8') session.install('.') session.run('flake8', os.path.join('google', 'cloud', 'bigquery')) session.run('flake8', 'tests') session.run( 'flake8', os.path.join(os.pardir, 'docs', 'bigquery', 'snippets.py')) - session.run( - 'gcp-devrel-py-tools', 'run-pylint', - '--config', 'pylint.config.py', - '--library-filesets', 'google', - '--test-filesets', 'tests', - # Temporarily allow this to fail. - success_codes=range(0, 100)) @nox.session From 170b4f67d056d63ca315524f6d61668c13b3cf8b Mon Sep 17 00:00:00 2001 From: Jon Wayne Parrott Date: Mon, 26 Feb 2018 14:24:04 -0800 Subject: [PATCH 0396/2016] Install local dependencies when running lint (#4936) --- packages/google-cloud-bigquery/nox.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/nox.py b/packages/google-cloud-bigquery/nox.py index 6ef96c4bdc35..1ca16293eff0 100644 --- a/packages/google-cloud-bigquery/nox.py +++ b/packages/google-cloud-bigquery/nox.py @@ -145,7 +145,7 @@ def lint(session): """ session.interpreter = 'python3.6' - session.install('flake8') + session.install('flake8', *LOCAL_DEPS) session.install('.') session.run('flake8', os.path.join('google', 'cloud', 'bigquery')) session.run('flake8', 'tests') From 7db29e1fa052093c9427d522004288b91197c9cf Mon Sep 17 00:00:00 2001 From: Jon Wayne Parrott Date: Tue, 27 Feb 2018 10:50:49 -0800 Subject: [PATCH 0397/2016] Make `api_core.page_iterator.PageIterator.item_to_value` public --- packages/google-cloud-bigquery/setup.py | 2 +- packages/google-cloud-bigquery/tests/unit/test_table.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/packages/google-cloud-bigquery/setup.py b/packages/google-cloud-bigquery/setup.py index 6a41c7a4c2eb..89f305b42400 100644 --- a/packages/google-cloud-bigquery/setup.py +++ b/packages/google-cloud-bigquery/setup.py @@ -30,7 +30,7 @@ release_status = 'Development Status :: 4 - Beta' dependencies = [ 'google-cloud-core<0.29dev,>=0.28.0', - 'google-api-core<0.2.0dev,>=0.1.1', + 'google-api-core<0.2.0dev,>=0.1.5.dev1', 'google-resumable-media>=0.2.1', ] extras = { diff --git a/packages/google-cloud-bigquery/tests/unit/test_table.py b/packages/google-cloud-bigquery/tests/unit/test_table.py index 76b8be291337..443b65c4910d 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_table.py +++ b/packages/google-cloud-bigquery/tests/unit/test_table.py @@ -1027,7 +1027,7 @@ def test_constructor(self): self.assertFalse(iterator._started) self.assertIs(iterator.client, client) self.assertEqual(iterator.path, path) - self.assertIs(iterator._item_to_value, _item_to_row) + self.assertIs(iterator.item_to_value, _item_to_row) self.assertEqual(iterator._items_key, 'rows') self.assertIsNone(iterator.max_results) self.assertEqual(iterator.extra_params, {}) From da5c35770d8ee8ccfc0929dd4b1c08278c0482f6 Mon Sep 17 00:00:00 2001 From: Jon Wayne Parrott Date: Tue, 27 Feb 2018 11:17:14 -0800 Subject: [PATCH 0398/2016] Update dependency range for api-core to include v1.0.0 releases (#4944) --- packages/google-cloud-bigquery/setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/setup.py b/packages/google-cloud-bigquery/setup.py index 89f305b42400..47ab9f560461 100644 --- a/packages/google-cloud-bigquery/setup.py +++ b/packages/google-cloud-bigquery/setup.py @@ -30,7 +30,7 @@ release_status = 'Development Status :: 4 - Beta' dependencies = [ 'google-cloud-core<0.29dev,>=0.28.0', - 'google-api-core<0.2.0dev,>=0.1.5.dev1', + 'google-api-core<2.0.0dev,>=0.1.5.dev1', 'google-resumable-media>=0.2.1', ] extras = { From a4d8c998aac105ea577994db8c470d2db6610a35 Mon Sep 17 00:00:00 2001 From: Jon Wayne Parrott Date: Tue, 27 Feb 2018 13:45:05 -0800 Subject: [PATCH 0399/2016] Update minimum api-core version to 1.0.0 for Datastore, BigQuery, Trace, Logging, and Spanner (#4946) --- packages/google-cloud-bigquery/setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/setup.py b/packages/google-cloud-bigquery/setup.py index 47ab9f560461..aa5a804a0d3b 100644 --- a/packages/google-cloud-bigquery/setup.py +++ b/packages/google-cloud-bigquery/setup.py @@ -30,7 +30,7 @@ release_status = 'Development Status :: 4 - Beta' dependencies = [ 'google-cloud-core<0.29dev,>=0.28.0', - 'google-api-core<2.0.0dev,>=0.1.5.dev1', + 'google-api-core<2.0.0dev,>=1.0.0', 'google-resumable-media>=0.2.1', ] extras = { From 0055c95fdb065dd606d1afd0b005a83d4db26377 Mon Sep 17 00:00:00 2001 From: Jon Wayne Parrott Date: Wed, 28 Feb 2018 09:04:52 -0800 Subject: [PATCH 0400/2016] Release bigquery 0.31.0 (#4948) --- packages/google-cloud-bigquery/CHANGELOG.md | 26 +++++++++++++++++++++ packages/google-cloud-bigquery/setup.py | 2 +- 2 files changed, 27 insertions(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/CHANGELOG.md b/packages/google-cloud-bigquery/CHANGELOG.md index 7931a8a58ec4..c832f39ada80 100644 --- a/packages/google-cloud-bigquery/CHANGELOG.md +++ b/packages/google-cloud-bigquery/CHANGELOG.md @@ -4,6 +4,32 @@ [1]: https://pypi.org/project/google-cloud-bigquery/#history +## 0.31.0 + +### Interface additions + +- Add support for `EncryptionConfiguration` (#4845) + +### Implementation changes + +- Allow listing/getting jobs even when there is an "invalid" job. (#4786) + +### Dependencies + +- The minimum version for `google-api-core` has been updated to version 1.0.0. This may cause some incompatibility with older google-cloud libraries, you will need to update those libraries if you have a dependency conflict. (#4944, #4946) + +### Documentation + +- Update format in `Table.full_table_id` and `TableListItem.full_table_id` docstrings. (#4906) + +### Testing and internal changes + +- Install local dependencies when running lint (#4936) +- Re-enable lint for tests, remove usage of pylint (#4921) +- Normalize all setup.py files (#4909) +- Remove unnecessary debug print from tests (#4907) +- Use constant strings for job properties in tests (#4833) + ## 0.30.0 This is the release candidate for v1.0.0. diff --git a/packages/google-cloud-bigquery/setup.py b/packages/google-cloud-bigquery/setup.py index aa5a804a0d3b..3386789262c4 100644 --- a/packages/google-cloud-bigquery/setup.py +++ b/packages/google-cloud-bigquery/setup.py @@ -22,7 +22,7 @@ name = 'google-cloud-bigquery' description = 'Google BigQuery API client library' -version = '0.30.0' +version = '0.31.0' # Should be one of: # 'Development Status :: 3 - Alpha' # 'Development Status :: 4 - Beta' From 8ba6fc993f8267e2d0f2573e83f8d95f209ad0e1 Mon Sep 17 00:00:00 2001 From: chemelnucfin Date: Mon, 5 Mar 2018 10:03:24 -0800 Subject: [PATCH 0401/2016] Bigquery: Job string constant parameters in init and snippets documentation (#4987) --- .../google/cloud/bigquery/__init__.py | 19 +++++- .../google/cloud/bigquery/job.py | 62 ++++++++++++++++--- 2 files changed, 72 insertions(+), 9 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/__init__.py b/packages/google-cloud-bigquery/google/cloud/bigquery/__init__.py index 6b988607fd25..5bfcefc9084e 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/__init__.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/__init__.py @@ -36,14 +36,21 @@ from google.cloud.bigquery.dataset import AccessEntry from google.cloud.bigquery.dataset import Dataset from google.cloud.bigquery.dataset import DatasetReference +from google.cloud.bigquery.job import Compression from google.cloud.bigquery.job import CopyJob from google.cloud.bigquery.job import CopyJobConfig +from google.cloud.bigquery.job import CreateDisposition +from google.cloud.bigquery.job import DestinationFormat +from google.cloud.bigquery.job import Encoding from google.cloud.bigquery.job import ExtractJob from google.cloud.bigquery.job import ExtractJobConfig -from google.cloud.bigquery.job import QueryJob -from google.cloud.bigquery.job import QueryJobConfig from google.cloud.bigquery.job import LoadJob from google.cloud.bigquery.job import LoadJobConfig +from google.cloud.bigquery.job import QueryJob +from google.cloud.bigquery.job import QueryJobConfig +from google.cloud.bigquery.job import QueryPriority +from google.cloud.bigquery.job import SourceFormat +from google.cloud.bigquery.job import WriteDisposition from google.cloud.bigquery.query import ArrayQueryParameter from google.cloud.bigquery.query import ScalarQueryParameter from google.cloud.bigquery.query import StructQueryParameter @@ -94,4 +101,12 @@ 'CSVOptions', 'GoogleSheetsOptions', 'DEFAULT_RETRY', + # Enum Constants + 'Compression', + 'CreateDisposition', + 'DestinationFormat', + 'Encoding', + 'QueryPriority', + 'SourceFormat', + 'WriteDisposition' ] diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/job.py b/packages/google-cloud-bigquery/google/cloud/bigquery/job.py index 5eace9dbcf4e..df2b9f96ce4a 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/job.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/job.py @@ -90,38 +90,72 @@ def _error_result_to_exception(error_result): class Compression(_EnumApiResourceProperty): - """Pseudo-enum for ``compression`` properties.""" + """The compression type to use for exported files. + + Possible values include `GZIP` and `NONE`. The default value is `NONE`. + """ GZIP = 'GZIP' NONE = 'NONE' class CreateDisposition(_EnumApiResourceProperty): - """Pseudo-enum for ``create_disposition`` properties.""" + """Specifies whether the job is allowed to create new tables. + + The following values are supported: + `CREATE_IF_NEEDED`: If the table does not exist, BigQuery creates + the table. + `CREATE_NEVER`: The table must already exist. If it does not, + a 'notFound' error is returned in the job result. + The default value is `CREATE_IF_NEEDED`. + + Creation, truncation and append actions occur as one atomic update + upon job completion. + """ CREATE_IF_NEEDED = 'CREATE_IF_NEEDED' CREATE_NEVER = 'CREATE_NEVER' class DestinationFormat(_EnumApiResourceProperty): - """Pseudo-enum for ``destination_format`` properties.""" + """The exported file format. + + Possible values include `CSV`, `NEWLINE_DELIMITED_JSON` and `AVRO`. + The default value is `CSV`. Tables with nested or repeated fields + cannot be exported as CSV. + """ CSV = 'CSV' NEWLINE_DELIMITED_JSON = 'NEWLINE_DELIMITED_JSON' AVRO = 'AVRO' class Encoding(_EnumApiResourceProperty): - """Pseudo-enum for ``encoding`` properties.""" + """The character encoding of the data. The supported values + are `UTF-8` or `ISO-8859-1`. The default value is `UTF-8`. + + BigQuery decodes the data after the raw, binary data has been + split using the values of the quote and fieldDelimiter properties. + """ UTF_8 = 'UTF-8' ISO_8559_1 = 'ISO-8559-1' class QueryPriority(_EnumApiResourceProperty): - """Pseudo-enum for ``QueryJob.priority`` property.""" + """Specifies a priority for the query. + + Possible values include `INTERACTIVE` and `BATCH`. The default value + is `INTERACTIVE`. + """ INTERACTIVE = 'INTERACTIVE' BATCH = 'BATCH' class SourceFormat(_EnumApiResourceProperty): - """Pseudo-enum for ``source_format`` properties.""" + """The format of the data files. + + For CSV files, specify `CSV`. For datastore backups, specify + `DATASTORE_BACKUP`. For newline-delimited json, specify + `NEWLINE_DELIMITED_JSON`. For Avro, specify `AVRO`. The default + value is `CSV`. + """ CSV = 'CSV' DATASTORE_BACKUP = 'DATASTORE_BACKUP' NEWLINE_DELIMITED_JSON = 'NEWLINE_DELIMITED_JSON' @@ -129,7 +163,21 @@ class SourceFormat(_EnumApiResourceProperty): class WriteDisposition(_EnumApiResourceProperty): - """Pseudo-enum for ``write_disposition`` properties.""" + """Specifies the action that occurs if destination table already exists. + + The following values are supported: + `WRITE_TRUNCATE`: If the table already exists, BigQuery overwrites the + table data. + `WRITE_APPEND`: If the table already exists, BigQuery appends the data + to the table. + `WRITE_EMPTY`: If the table already exists and contains data, a 'duplicate' + error is returned in the job result. + The default value is `WRITE_APPEND`. + + Each action is atomic and only occurs if BigQuery is able to complete + the job successfully. Creation, truncation and append actions occur as one + atomic update upon job completion. + """ WRITE_APPEND = 'WRITE_APPEND' WRITE_TRUNCATE = 'WRITE_TRUNCATE' WRITE_EMPTY = 'WRITE_EMPTY' From 78de4bf003543f3029896a6798a46835c374f178 Mon Sep 17 00:00:00 2001 From: chemelnucfin Date: Mon, 5 Mar 2018 14:17:54 -0800 Subject: [PATCH 0402/2016] Fix encoding variable and strings UTF-8 and ISO-8859-1 difference documentation (#4990) --- packages/google-cloud-bigquery/google/cloud/bigquery/job.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/job.py b/packages/google-cloud-bigquery/google/cloud/bigquery/job.py index df2b9f96ce4a..15167941e110 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/job.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/job.py @@ -129,7 +129,8 @@ class DestinationFormat(_EnumApiResourceProperty): class Encoding(_EnumApiResourceProperty): """The character encoding of the data. The supported values - are `UTF-8` or `ISO-8859-1`. The default value is `UTF-8`. + are `UTF_8` corresponding to `'UTF-8'` or `ISO_8859_1` corresponding to + `'ISO-8559-1'`. The default value is `UTF_8`. BigQuery decodes the data after the raw, binary data has been split using the values of the quote and fieldDelimiter properties. From 3823c67004ed4f85e41d45d1a5b89a1a5ae26b4c Mon Sep 17 00:00:00 2001 From: chemelnucfin Date: Thu, 8 Mar 2018 12:05:50 -0800 Subject: [PATCH 0403/2016] Bigquery: remove test_load_table_from_uri_w_autodetect_schema_then_get_job because of duplicate test in snippets (#5004) --- .../google-cloud-bigquery/tests/system.py | 44 ------------------- 1 file changed, 44 deletions(-) diff --git a/packages/google-cloud-bigquery/tests/system.py b/packages/google-cloud-bigquery/tests/system.py index e96dd9458926..3ed62dad0287 100644 --- a/packages/google-cloud-bigquery/tests/system.py +++ b/packages/google-cloud-bigquery/tests/system.py @@ -543,50 +543,6 @@ def test_load_table_from_uri_then_dump_table(self): self.assertEqual(sorted(row_tuples, key=by_age), sorted(ROWS, key=by_age)) - def test_load_table_from_uri_w_autodetect_schema_then_get_job(self): - from google.cloud.bigquery import SchemaField - from google.cloud.bigquery.job import LoadJob - - rows = ROWS * 100 - # BigQuery internally uses the first 100 rows to detect schema - - gs_url = self._write_csv_to_storage( - 'bq_load_test' + unique_resource_id(), 'person_ages.csv', - HEADER_ROW, rows) - dataset = self.temp_dataset(_make_dataset_id('load_gcs_then_dump')) - table_ref = dataset.table('test_table') - JOB_ID = 'load_table_w_autodetect_{}'.format(str(uuid.uuid4())) - - config = bigquery.LoadJobConfig() - config.autodetect = True - job = Config.CLIENT.load_table_from_uri( - gs_url, table_ref, job_config=config, job_id=JOB_ID) - - # Allow for 90 seconds of "warm up" before rows visible. See - # https://cloud.google.com/bigquery/streaming-data-into-bigquery#dataavailability - # 8 tries -> 1 + 2 + 4 + 8 + 16 + 32 + 64 = 127 seconds - retry = RetryInstanceState(_job_done, max_tries=8) - retry(job.reload)() - - table = Config.CLIENT.get_table(table_ref) - self.to_delete.insert(0, table) - field_name = SchemaField( - u'Full_Name', u'string', u'NULLABLE', None, ()) - field_age = SchemaField(u'Age', u'integer', u'NULLABLE', None, ()) - self.assertEqual(table.schema, [field_name, field_age]) - - actual_rows = self._fetch_single_page(table) - actual_row_tuples = [r.values() for r in actual_rows] - by_age = operator.itemgetter(1) - self.assertEqual( - sorted(actual_row_tuples, key=by_age), sorted(rows, key=by_age)) - - fetched_job = Config.CLIENT.get_job(JOB_ID) - - self.assertIsInstance(fetched_job, LoadJob) - self.assertEqual(fetched_job.job_id, JOB_ID) - self.assertEqual(fetched_job.autodetect, True) - def _create_storage(self, bucket_name, blob_name): from google.cloud.storage import Client as StorageClient From b02fee1c6a6d68f548b1fe4fb27e5aaf9cd69f0e Mon Sep 17 00:00:00 2001 From: Alix Hamilton Date: Mon, 12 Mar 2018 11:10:43 -0700 Subject: [PATCH 0404/2016] Add IPython magics for running queries (#4983) --- .../google/cloud/bigquery/__init__.py | 8 + .../google/cloud/bigquery/magics.py | 280 ++++++++++++++++++ packages/google-cloud-bigquery/nox.py | 4 +- .../google-cloud-bigquery/tests/system.py | 58 ++++ .../tests/unit/test_magics.py | 261 ++++++++++++++++ 5 files changed, 609 insertions(+), 2 deletions(-) create mode 100644 packages/google-cloud-bigquery/google/cloud/bigquery/magics.py create mode 100644 packages/google-cloud-bigquery/tests/unit/test_magics.py diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/__init__.py b/packages/google-cloud-bigquery/google/cloud/bigquery/__init__.py index 5bfcefc9084e..0068acea347b 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/__init__.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/__init__.py @@ -110,3 +110,11 @@ 'SourceFormat', 'WriteDisposition' ] + + +def load_ipython_extension(ipython): + """Called by IPython when this module is loaded as an IPython extension.""" + from google.cloud.bigquery.magics import _cell_magic + + ipython.register_magic_function( + _cell_magic, magic_kind='cell', magic_name='bigquery') diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/magics.py b/packages/google-cloud-bigquery/google/cloud/bigquery/magics.py new file mode 100644 index 000000000000..ed54d9c04b59 --- /dev/null +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/magics.py @@ -0,0 +1,280 @@ +# Copyright 2018 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""IPython Magics + +.. function:: %%bigquery + + IPython cell magic to run a query and display the result as a DataFrame + + .. code-block:: python + + %%bigquery [] [--project ] [--use_legacy_sql] + [--verbose] + + + Parameters: + + * ```` (optional, line argument): + variable to store the query results. + * ``--project `` (optional, line argument): + Project to use for running the query. Defaults to the context + :attr:`~google.cloud.bigquery.magics.Context.project`. + * ``--use_legacy_sql`` (optional, line argument): + Runs the query using Legacy SQL syntax. Defaults to Standard SQL if + this argument not used. + * ``--verbose`` (optional, line argument): + If this flag is used, information including the query job ID and the + amount of time for the query to complete will not be cleared after the + query is finished. By default, this information will be displayed but + will be cleared after the query is finished. + * ```` (required, cell argument): + SQL query to run. + + Returns: + A :class:`pandas.DataFrame` with the query results. + + .. note:: + All queries run using this magic will run using the context + :attr:`~google.cloud.bigquery.magics.Context.credentials`. + + Examples: + The following examples can be run in an IPython notebook after loading + the bigquery IPython extension (see ``In[1]``) and setting up + Application Default Credentials. + + .. code-block:: python + + In [1]: %load_ext google.cloud.bigquery + + In [2]: %%bigquery + ...: SELECT name, SUM(number) as count + ...: FROM `bigquery-public-data.usa_names.usa_1910_current` + ...: GROUP BY name + ...: ORDER BY count DESC + ...: LIMIT 3 + + Out[2]: name count + ...: ------------------- + ...: 0 James 4987296 + ...: 1 John 4866302 + ...: 2 Robert 4738204 + + In [3]: %%bigquery df --project my-alternate-project --verbose + ...: SELECT name, SUM(number) as count + ...: FROM `bigquery-public-data.usa_names.usa_1910_current` + ...: WHERE gender = 'F' + ...: GROUP BY name + ...: ORDER BY count DESC + ...: LIMIT 3 + Executing query with job ID: bf633912-af2c-4780-b568-5d868058632b + Query executing: 2.61s + Query complete after 2.92s + + Out[3]: name count + ...: ---------------------- + ...: 0 Mary 3736239 + ...: 1 Patricia 1568495 + ...: 2 Elizabeth 1519946 + + In [4]: df + + Out[4]: name count + ...: ---------------------- + ...: 0 Mary 3736239 + ...: 1 Patricia 1568495 + ...: 2 Elizabeth 1519946 + +""" + +from __future__ import print_function + +import time +from concurrent import futures + +try: + import IPython + from IPython import display + from IPython.core import magic_arguments +except ImportError: # pragma: NO COVER + raise ImportError('This module can only be loaded in IPython.') + +import google.auth +from google.cloud import bigquery + + +class Context(object): + """Storage for objects to be used throughout an IPython notebook session. + + A Context object is initialized when the ``magics`` module is imported, + and can be found at ``google.cloud.bigquery.magics.context``. + """ + def __init__(self): + self._credentials = None + self._project = None + + @property + def credentials(self): + """google.auth.credentials.Credentials: Credentials to use for queries + performed through IPython magics + + Note: + These credentials do not need to be explicitly defined if you are + using Application Default Credentials. If you are not using + Application Default Credentials, manually construct a + :class:`google.auth.credentials.Credentials` object and set it as + the context credentials as demonstrated in the example below. See + `auth docs`_ for more information on obtaining credentials. + + Example: + Manually setting the context credentials: + + >>> from google.cloud.bigquery import magics + >>> from google.oauth2 import service_account + >>> credentials = (service_account + ... .Credentials.from_service_account_file( + ... '/path/to/key.json')) + >>> magics.context.credentials = credentials + + + .. _auth docs: http://google-auth.readthedocs.io + /en/latest/user-guide.html#obtaining-credentials + """ + if self._credentials is None: + self._credentials, _ = google.auth.default() + return self._credentials + + @credentials.setter + def credentials(self, value): + self._credentials = value + + @property + def project(self): + """str: Default project to use for queries performed through IPython + magics + + Note: + The project does not need to be explicitly defined if you have an + environment default project set. If you do not have a default + project set in your environment, manually assign the project as + demonstrated in the example below. + + Example: + Manually setting the context project: + + >>> from google.cloud.bigquery import magics + >>> magics.context.project = 'my-project' + """ + if self._project is None: + _, self._project = google.auth.default() + return self._project + + @project.setter + def project(self, value): + self._project = value + + +context = Context() + + +def _run_query(client, query, job_config=None): + """Runs a query while printing status updates + + Args: + client (google.cloud.bigquery.client.Client): + Client to bundle configuration needed for API requests. + query (str): + SQL query to be executed. Defaults to the standard SQL dialect. + Use the ``job_config`` parameter to change dialects. + job_config (google.cloud.bigquery.job.QueryJobConfig, optional): + Extra configuration options for the job. + + Returns: + google.cloud.bigquery.job.QueryJob: the query job created + + Example: + >>> client = bigquery.Client() + >>> _run_query(client, "SELECT 17") + Executing query with job ID: bf633912-af2c-4780-b568-5d868058632b + Query executing: 1.66s + Query complete after 2.07s + 'bf633912-af2c-4780-b568-5d868058632b' + """ + start_time = time.time() + query_job = client.query(query, job_config=job_config) + print('Executing query with job ID: {}'.format(query_job.job_id)) + + while True: + print('\rQuery executing: {:0.2f}s'.format( + time.time() - start_time), end='') + try: + query_job.result(timeout=0.5) + break + except futures.TimeoutError: + continue + print('\nQuery complete after {:0.2f}s'.format(time.time() - start_time)) + return query_job + + +@magic_arguments.magic_arguments() +@magic_arguments.argument( + 'destination_var', + nargs='?', + help=('If provided, save the output to this variable in addition ' + 'to displaying it.')) +@magic_arguments.argument( + '--project', + type=str, + default=None, + help=('Project to use for executing this query. Defaults to the context ' + 'project.')) +@magic_arguments.argument( + '--use_legacy_sql', action='store_true', default=False, + help=('Sets query to use Legacy SQL instead of Standard SQL. Defaults to ' + 'Standard SQL if this argument is not used.')) +@magic_arguments.argument( + '--verbose', action='store_true', default=False, + help=('If set, print verbose output, including the query job ID and the ' + 'amount of time for the query to finish. By default, this ' + 'information will be displayed as the query runs, but will be ' + 'cleared after the query is finished.')) +def _cell_magic(line, query): + """Underlying function for bigquery cell magic + + Note: + This function contains the underlying logic for the 'bigquery' cell + magic. This function is not meant to be called directly. + + Args: + line (str): "%%bigquery" followed by arguments as required + query (str): SQL query to run + + Returns: + pandas.DataFrame: the query results. + """ + args = magic_arguments.parse_argstring(_cell_magic, line) + + project = args.project or context.project + client = bigquery.Client(project=project, credentials=context.credentials) + job_config = bigquery.job.QueryJobConfig() + job_config.use_legacy_sql = args.use_legacy_sql + query_job = _run_query(client, query, job_config) + + if not args.verbose: + display.clear_output() + + result = query_job.to_dataframe() + if args.destination_var: + IPython.get_ipython().push({args.destination_var: result}) + return result diff --git a/packages/google-cloud-bigquery/nox.py b/packages/google-cloud-bigquery/nox.py index 1ca16293eff0..11b135df79ca 100644 --- a/packages/google-cloud-bigquery/nox.py +++ b/packages/google-cloud-bigquery/nox.py @@ -35,7 +35,7 @@ def default(session): run the tests. """ # Install all test dependencies, then install this package in-place. - session.install('mock', 'pytest', 'pytest-cov', *LOCAL_DEPS) + session.install('mock', 'pytest', 'pytest-cov', 'ipython', *LOCAL_DEPS) if session.interpreter == 'python3.4': session.install('-e', '.') else: @@ -87,7 +87,7 @@ def system(session, py): # Install all test dependencies, then install this package into the # virtualenv's dist-packages. - session.install('mock', 'pytest', *LOCAL_DEPS) + session.install('mock', 'pytest', 'ipython', *LOCAL_DEPS) session.install( os.path.join('..', 'storage'), os.path.join('..', 'test_utils'), diff --git a/packages/google-cloud-bigquery/tests/system.py b/packages/google-cloud-bigquery/tests/system.py index 3ed62dad0287..515719f7ec77 100644 --- a/packages/google-cloud-bigquery/tests/system.py +++ b/packages/google-cloud-bigquery/tests/system.py @@ -22,12 +22,21 @@ import time import unittest import uuid +import re import six +import pytest try: import pandas except ImportError: # pragma: NO COVER pandas = None +try: + import IPython + from IPython.utils import io + from IPython.testing import tools + from IPython.terminal import interactiveshell +except ImportError: # pragma: NO COVER + IPython = None from google.api_core.exceptions import PreconditionFailed from google.cloud import bigquery @@ -1571,6 +1580,37 @@ def temp_dataset(self, dataset_id): self.to_delete.append(dataset) return dataset + @pytest.mark.skipif(pandas is None, reason='Requires `pandas`') + @pytest.mark.skipif(IPython is None, reason='Requires `ipython`') + @pytest.mark.usefixtures('ipython_interactive') + def test_bigquery_magic(self): + ip = IPython.get_ipython() + ip.extension_manager.load_extension('google.cloud.bigquery') + sql = """ + SELECT + CONCAT( + 'https://stackoverflow.com/questions/', + CAST(id as STRING)) as url, + view_count + FROM `bigquery-public-data.stackoverflow.posts_questions` + WHERE tags like '%google-bigquery%' + ORDER BY view_count DESC + LIMIT 10 + """ + with io.capture_output() as captured: + result = ip.run_cell_magic('bigquery', '', sql) + + lines = re.split('\n|\r', captured.stdout) + # Removes blanks & terminal code (result of display clearing) + updates = list(filter(lambda x: bool(x) and x != '\x1b[2K', lines)) + assert re.match("Executing query with job ID: .*", updates[0]) + assert all(re.match("Query executing: .*s", line) + for line in updates[1:-1]) + assert re.match("Query complete after .*s", updates[-1]) + assert isinstance(result, pandas.DataFrame) + assert len(result) == 10 # verify row count + assert list(result) == ['url', 'view_count'] # verify column names + def _job_done(instance): return instance.state.lower() == 'done' @@ -1591,3 +1631,21 @@ def _table_exists(t): return True except NotFound: return False + + +@pytest.fixture(scope='session') +def ipython(): + config = tools.default_config() + config.TerminalInteractiveShell.simple_prompt = True + shell = interactiveshell.TerminalInteractiveShell.instance(config=config) + return shell + + +@pytest.fixture() +def ipython_interactive(request, ipython): + """Activate IPython's builtin hooks + + for the duration of the test scope. + """ + with ipython.builtin_trap: + yield ipython diff --git a/packages/google-cloud-bigquery/tests/unit/test_magics.py b/packages/google-cloud-bigquery/tests/unit/test_magics.py new file mode 100644 index 000000000000..800edf2918bc --- /dev/null +++ b/packages/google-cloud-bigquery/tests/unit/test_magics.py @@ -0,0 +1,261 @@ +# Copyright 2018 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import re +import mock +from concurrent import futures + +import pytest +try: + import pandas +except ImportError: # pragma: NO COVER + pandas = None +try: + import IPython + from IPython.utils import io + from IPython.testing import tools + from IPython.terminal import interactiveshell +except ImportError: # pragma: NO COVER + IPython = None + +import google.auth.credentials +from google.cloud.bigquery import table +from google.cloud.bigquery import magics + + +pytestmark = pytest.mark.skipif(IPython is None, reason='Requires `ipython`') + + +@pytest.fixture(scope='session') +def ipython(): + config = tools.default_config() + config.TerminalInteractiveShell.simple_prompt = True + shell = interactiveshell.TerminalInteractiveShell.instance(config=config) + return shell + + +@pytest.fixture() +def ipython_interactive(request, ipython): + """Activate IPython's builtin hooks + + for the duration of the test scope. + """ + with ipython.builtin_trap: + yield ipython + + +def test_context_credentials_auto_set_w_application_default_credentials(): + """When Application Default Credentials are set, the context credentials + will be created the first time it is called + """ + assert magics.context._credentials is None + assert magics.context._project is None + + project = 'prahj-ekt' + credentials_mock = mock.create_autospec( + google.auth.credentials.Credentials, instance=True) + default_patch = mock.patch( + 'google.auth.default', return_value=(credentials_mock, project)) + with default_patch as default_mock: + assert magics.context.credentials is credentials_mock + assert magics.context.project == project + + assert default_mock.call_count == 2 + + +def test_context_credentials_and_project_can_be_set_explicitly(): + project1 = 'one-project-55564' + project2 = 'other-project-52569' + credentials_mock = mock.create_autospec( + google.auth.credentials.Credentials, instance=True) + default_patch = mock.patch( + 'google.auth.default', return_value=(credentials_mock, project1)) + with default_patch as default_mock: + magics.context.credentials = credentials_mock + magics.context.project = project2 + + assert magics.context.project == project2 + assert magics.context.credentials is credentials_mock + # default should not be called if credentials & project are explicitly set + assert default_mock.call_count == 0 + + +def test__run_query(): + magics.context._credentials = None + + job_id = 'job_1234' + sql = 'SELECT 17' + responses = [ + futures.TimeoutError, + futures.TimeoutError, + [table.Row((17,), {'num': 0})] + ] + + client_patch = mock.patch( + 'google.cloud.bigquery.magics.bigquery.Client', autospec=True) + with client_patch as client_mock, io.capture_output() as captured: + client_mock().query(sql).result.side_effect = responses + client_mock().query(sql).job_id = job_id + + query_job = magics._run_query(client_mock(), sql) + + lines = re.split('\n|\r', captured.stdout) + # Removes blanks & terminal code (result of display clearing) + updates = list(filter(lambda x: bool(x) and x != '\x1b[2K', lines)) + + assert query_job.job_id == job_id + expected_first_line = "Executing query with job ID: {}".format(job_id) + assert updates[0] == expected_first_line + execution_updates = updates[1:-1] + assert len(execution_updates) == 3 # one update per API response + assert all(re.match("Query executing: .*s", line) + for line in execution_updates) + assert re.match("Query complete after .*s", updates[-1]) + + +@pytest.mark.usefixtures('ipython_interactive') +def test_extension_load(): + ip = IPython.get_ipython() + ip.extension_manager.load_extension('google.cloud.bigquery') + + # verify that the magic is registered and has the correct source + magic = ip.magics_manager.magics['cell'].get('bigquery') + assert magic.__module__ == 'google.cloud.bigquery.magics' + + +@pytest.mark.usefixtures('ipython_interactive') +@pytest.mark.skipif(pandas is None, reason='Requires `pandas`') +def test_bigquery_magic_without_optional_arguments(): + ip = IPython.get_ipython() + ip.extension_manager.load_extension('google.cloud.bigquery') + magics.context.credentials = mock.create_autospec( + google.auth.credentials.Credentials, instance=True) + + sql = 'SELECT 17 AS num' + result = pandas.DataFrame([17], columns=['num']) + run_query_patch = mock.patch( + 'google.cloud.bigquery.magics._run_query', autospec=True) + query_job_mock = mock.create_autospec( + google.cloud.bigquery.job.QueryJob, instance=True) + query_job_mock.to_dataframe.return_value = result + with run_query_patch as run_query_mock: + run_query_mock.return_value = query_job_mock + + result = ip.run_cell_magic('bigquery', '', sql) + + assert isinstance(result, pandas.DataFrame) + assert len(result) == len(result) # verify row count + assert list(result) == list(result) # verify column names + + +@pytest.mark.usefixtures('ipython_interactive') +def test_bigquery_magic_with_legacy_sql(): + ip = IPython.get_ipython() + ip.extension_manager.load_extension('google.cloud.bigquery') + magics.context.credentials = mock.create_autospec( + google.auth.credentials.Credentials, instance=True) + + run_query_patch = mock.patch( + 'google.cloud.bigquery.magics._run_query', autospec=True) + with run_query_patch as run_query_mock: + ip.run_cell_magic( + 'bigquery', '--use_legacy_sql', 'SELECT 17 AS num') + + job_config_used = run_query_mock.call_args_list[0][0][-1] + assert job_config_used.use_legacy_sql is True + + +@pytest.mark.usefixtures('ipython_interactive') +@pytest.mark.skipif(pandas is None, reason='Requires `pandas`') +def test_bigquery_magic_with_result_saved_to_variable(): + ip = IPython.get_ipython() + ip.extension_manager.load_extension('google.cloud.bigquery') + magics.context.credentials = mock.create_autospec( + google.auth.credentials.Credentials, instance=True) + + sql = 'SELECT 17 AS num' + result = pandas.DataFrame([17], columns=['num']) + assert 'myvariable' not in ip.user_ns + + run_query_patch = mock.patch( + 'google.cloud.bigquery.magics._run_query', autospec=True) + query_job_mock = mock.create_autospec( + google.cloud.bigquery.job.QueryJob, instance=True) + query_job_mock.to_dataframe.return_value = result + with run_query_patch as run_query_mock: + run_query_mock.return_value = query_job_mock + + ip.run_cell_magic('bigquery', 'df', sql) + + assert 'df' in ip.user_ns # verify that variable exists + df = ip.user_ns['df'] + assert len(df) == len(result) # verify row count + assert list(df) == list(result) # verify column names + + +@pytest.mark.usefixtures('ipython_interactive') +def test_bigquery_magic_does_not_clear_display_in_verbose_mode(): + ip = IPython.get_ipython() + ip.extension_manager.load_extension('google.cloud.bigquery') + magics.context.credentials = mock.create_autospec( + google.auth.credentials.Credentials, instance=True) + + clear_patch = mock.patch( + 'google.cloud.bigquery.magics.display.clear_output', autospec=True) + run_query_patch = mock.patch( + 'google.cloud.bigquery.magics._run_query', autospec=True) + with clear_patch as clear_mock, run_query_patch: + ip.run_cell_magic('bigquery', '--verbose', 'SELECT 17 as num') + + assert clear_mock.call_count == 0 + + +@pytest.mark.usefixtures('ipython_interactive') +def test_bigquery_magic_clears_display_in_verbose_mode(): + ip = IPython.get_ipython() + ip.extension_manager.load_extension('google.cloud.bigquery') + magics.context.credentials = mock.create_autospec( + google.auth.credentials.Credentials, instance=True) + + clear_patch = mock.patch( + 'google.cloud.bigquery.magics.display.clear_output', autospec=True) + run_query_patch = mock.patch( + 'google.cloud.bigquery.magics._run_query', autospec=True) + with clear_patch as clear_mock, run_query_patch: + ip.run_cell_magic('bigquery', '', 'SELECT 17 as num') + + assert clear_mock.call_count == 1 + + +@pytest.mark.usefixtures('ipython_interactive') +def test_bigquery_magic_with_project(): + ip = IPython.get_ipython() + ip.extension_manager.load_extension('google.cloud.bigquery') + magics.context._project = None + + credentials_mock = mock.create_autospec( + google.auth.credentials.Credentials, instance=True) + default_patch = mock.patch( + 'google.auth.default', + return_value=(credentials_mock, 'general-project')) + run_query_patch = mock.patch( + 'google.cloud.bigquery.magics._run_query', autospec=True) + with run_query_patch as run_query_mock, default_patch: + ip.run_cell_magic( + 'bigquery', '--project=specific-project', 'SELECT 17 as num') + + client_used = run_query_mock.call_args_list[0][0][0] + assert client_used.project == 'specific-project' + # context project should not change + assert magics.context.project == 'general-project' From 39a0552f737a322b01c861d4722aa54b1a8fa7bd Mon Sep 17 00:00:00 2001 From: Jon Wayne Parrott Date: Thu, 15 Mar 2018 08:52:22 -0700 Subject: [PATCH 0405/2016] Fix bad trove classifier --- packages/google-cloud-bigquery/setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/setup.py b/packages/google-cloud-bigquery/setup.py index 3386789262c4..fc3e3562d3f9 100644 --- a/packages/google-cloud-bigquery/setup.py +++ b/packages/google-cloud-bigquery/setup.py @@ -26,7 +26,7 @@ # Should be one of: # 'Development Status :: 3 - Alpha' # 'Development Status :: 4 - Beta' -# 'Development Status :: 5 - Stable' +# 'Development Status :: 5 - Production/Stable' release_status = 'Development Status :: 4 - Beta' dependencies = [ 'google-cloud-core<0.29dev,>=0.28.0', From df16942dd432738e43281c87402d6c87c625eb27 Mon Sep 17 00:00:00 2001 From: Alix Hamilton Date: Fri, 16 Mar 2018 14:34:49 -0700 Subject: [PATCH 0406/2016] BigQuery: Adds Parquet SourceFormat and samples (#5057) * adds parquet and samples * changes StringIO to BytesIO in snippets --- packages/google-cloud-bigquery/google/cloud/bigquery/job.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/job.py b/packages/google-cloud-bigquery/google/cloud/bigquery/job.py index 15167941e110..e955d34be14f 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/job.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/job.py @@ -154,13 +154,14 @@ class SourceFormat(_EnumApiResourceProperty): For CSV files, specify `CSV`. For datastore backups, specify `DATASTORE_BACKUP`. For newline-delimited json, specify - `NEWLINE_DELIMITED_JSON`. For Avro, specify `AVRO`. The default - value is `CSV`. + `NEWLINE_DELIMITED_JSON`. For Avro, specify `AVRO`. For Parquet, specify + `PARQUET`. The default value is `CSV`. """ CSV = 'CSV' DATASTORE_BACKUP = 'DATASTORE_BACKUP' NEWLINE_DELIMITED_JSON = 'NEWLINE_DELIMITED_JSON' AVRO = 'AVRO' + PARQUET = 'PARQUET' class WriteDisposition(_EnumApiResourceProperty): From 55138184006346db2731e85d302911bb3fb1c1ff Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Mon, 19 Mar 2018 13:18:00 -0700 Subject: [PATCH 0407/2016] BigQuery: Use autospec for Connection in tests. (#5066) * Use autospec for Connection in job tests. Use more standard autospec mock for Connection mock class. * Use `assert_called_once_with()` for connection mocks. --- .../tests/unit/test_client.py | 802 ++++++++---------- .../tests/unit/test_job.py | 758 ++++++++--------- 2 files changed, 724 insertions(+), 836 deletions(-) diff --git a/packages/google-cloud-bigquery/tests/unit/test_client.py b/packages/google-cloud-bigquery/tests/unit/test_client.py index 9a0b24ad4bfe..ef3f94ab2998 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_client.py +++ b/packages/google-cloud-bigquery/tests/unit/test_client.py @@ -32,6 +32,16 @@ def _make_credentials(): return mock.Mock(spec=google.auth.credentials.Credentials) +def _make_connection(*responses): + import google.cloud.bigquery._http + from google.cloud.exceptions import NotFound + + mock_conn = mock.create_autospec(google.cloud.bigquery._http.Connection) + mock_conn.USER_AGENT = 'testing 1.2.3' + mock_conn.api_request.side_effect = list(responses) + [NotFound('miss')] + return mock_conn + + class TestClient(unittest.TestCase): PROJECT = 'PROJECT' @@ -65,19 +75,16 @@ def test__get_query_results_miss_w_explicit_project_and_timeout(self): creds = _make_credentials() client = self._make_one(self.PROJECT, creds) - conn = client._connection = _Connection() + conn = client._connection = _make_connection() with self.assertRaises(NotFound): client._get_query_results( 'nothere', None, project='other-project', timeout_ms=500) - self.assertEqual(len(conn._requested), 1) - req = conn._requested[0] - self.assertEqual(req['method'], 'GET') - self.assertEqual( - req['path'], '/projects/other-project/queries/nothere') - self.assertEqual( - req['query_params'], {'maxResults': 0, 'timeoutMs': 500}) + conn.api_request.assert_called_once_with( + method='GET', + path='/projects/other-project/queries/nothere', + query_params={'maxResults': 0, 'timeoutMs': 500}) def test__get_query_results_hit(self): job_id = 'query_job' @@ -110,7 +117,7 @@ def test__get_query_results_hit(self): creds = _make_credentials() client = self._make_one(self.PROJECT, creds) - client._connection = _Connection(data) + client._connection = _make_connection(data) query_results = client._get_query_results(job_id, None) self.assertEqual(query_results.total_rows, 10) @@ -121,7 +128,6 @@ def test_list_projects_defaults(self): PROJECT_1 = 'PROJECT_ONE' PROJECT_2 = 'PROJECT_TWO' - PATH = 'projects' TOKEN = 'TOKEN' DATA = { 'nextPageToken': TOKEN, @@ -140,7 +146,7 @@ def test_list_projects_defaults(self): } creds = _make_credentials() client = self._make_one(PROJECT_1, creds) - conn = client._connection = _Connection(DATA) + conn = client._connection = _make_connection(DATA) iterator = client.list_projects() page = six.next(iterator.pages) @@ -155,18 +161,15 @@ def test_list_projects_defaults(self): self.assertEqual(found.friendly_name, expected['friendlyName']) self.assertEqual(token, TOKEN) - self.assertEqual(len(conn._requested), 1) - req = conn._requested[0] - self.assertEqual(req['method'], 'GET') - self.assertEqual(req['path'], '/%s' % PATH) + conn.api_request.assert_called_once_with( + method='GET', path='/projects', query_params={}) def test_list_projects_explicit_response_missing_projects_key(self): - PATH = 'projects' TOKEN = 'TOKEN' DATA = {} creds = _make_credentials() client = self._make_one(self.PROJECT, creds) - conn = client._connection = _Connection(DATA) + conn = client._connection = _make_connection(DATA) iterator = client.list_projects(max_results=3, page_token=TOKEN) page = six.next(iterator.pages) @@ -176,12 +179,10 @@ def test_list_projects_explicit_response_missing_projects_key(self): self.assertEqual(len(projects), 0) self.assertIsNone(token) - self.assertEqual(len(conn._requested), 1) - req = conn._requested[0] - self.assertEqual(req['method'], 'GET') - self.assertEqual(req['path'], '/%s' % PATH) - self.assertEqual(req['query_params'], - {'maxResults': 3, 'pageToken': TOKEN}) + conn.api_request.assert_called_once_with( + method='GET', + path='/projects', + query_params={'maxResults': 3, 'pageToken': TOKEN}) def test_list_datasets_defaults(self): from google.cloud.bigquery.dataset import DatasetListItem @@ -207,7 +208,7 @@ def test_list_datasets_defaults(self): } creds = _make_credentials() client = self._make_one(self.PROJECT, creds) - conn = client._connection = _Connection(DATA) + conn = client._connection = _make_connection(DATA) iterator = client.list_datasets() page = six.next(iterator.pages) @@ -221,10 +222,8 @@ def test_list_datasets_defaults(self): self.assertEqual(found.friendly_name, expected['friendlyName']) self.assertEqual(token, TOKEN) - self.assertEqual(len(conn._requested), 1) - req = conn._requested[0] - self.assertEqual(req['method'], 'GET') - self.assertEqual(req['path'], '/%s' % PATH) + conn.api_request.assert_called_once_with( + method='GET', path='/%s' % PATH, query_params={}) def test_list_datasets_explicit_response_missing_datasets_key(self): PATH = 'projects/%s/datasets' % self.PROJECT @@ -233,7 +232,7 @@ def test_list_datasets_explicit_response_missing_datasets_key(self): DATA = {} creds = _make_credentials() client = self._make_one(self.PROJECT, creds) - conn = client._connection = _Connection(DATA) + conn = client._connection = _make_connection(DATA) iterator = client.list_datasets( include_all=True, filter=FILTER, @@ -245,13 +244,15 @@ def test_list_datasets_explicit_response_missing_datasets_key(self): self.assertEqual(len(datasets), 0) self.assertIsNone(token) - self.assertEqual(len(conn._requested), 1) - req = conn._requested[0] - self.assertEqual(req['method'], 'GET') - self.assertEqual(req['path'], '/%s' % PATH) - self.assertEqual(req['query_params'], - {'all': True, 'filter': FILTER, - 'maxResults': 3, 'pageToken': TOKEN}) + conn.api_request.assert_called_once_with( + method='GET', + path='/%s' % PATH, + query_params={ + 'all': True, + 'filter': FILTER, + 'maxResults': 3, + 'pageToken': TOKEN, + }) def test_dataset_with_specified_project(self): from google.cloud.bigquery.dataset import DatasetReference @@ -292,45 +293,43 @@ def test_get_dataset(self): 'datasetId': self.DS_ID, }, } - conn = client._connection = _Connection(resource) + conn = client._connection = _make_connection(resource) dataset_ref = client.dataset(self.DS_ID) dataset = client.get_dataset(dataset_ref) - self.assertEqual(len(conn._requested), 1) - req = conn._requested[0] - self.assertEqual(req['method'], 'GET') - self.assertEqual(req['path'], '/%s' % path) + conn.api_request.assert_called_once_with( + method='GET', path='/%s' % path) self.assertEqual(dataset.dataset_id, self.DS_ID) # Test retry. # Not a cloud API exception (missing 'errors' field). - client._connection = _Connection(Exception(''), resource) + client._connection = _make_connection(Exception(''), resource) with self.assertRaises(Exception): client.get_dataset(dataset_ref) # Zero-length errors field. - client._connection = _Connection(ServerError(''), resource) + client._connection = _make_connection(ServerError(''), resource) with self.assertRaises(ServerError): client.get_dataset(dataset_ref) # Non-retryable reason. - client._connection = _Connection( + client._connection = _make_connection( ServerError('', errors=[{'reason': 'serious'}]), resource) with self.assertRaises(ServerError): client.get_dataset(dataset_ref) # Retryable reason, but retry is disabled. - client._connection = _Connection( + client._connection = _make_connection( ServerError('', errors=[{'reason': 'backendError'}]), resource) with self.assertRaises(ServerError): client.get_dataset(dataset_ref, retry=None) # Retryable reason, default retry: success. - client._connection = _Connection( + client._connection = _make_connection( ServerError('', errors=[{'reason': 'backendError'}]), resource) dataset = client.get_dataset(dataset_ref) @@ -348,18 +347,16 @@ def test_create_dataset_minimal(self): } creds = _make_credentials() client = self._make_one(project=self.PROJECT, credentials=creds) - conn = client._connection = _Connection(RESOURCE) + conn = client._connection = _make_connection(RESOURCE) ds = client.create_dataset(Dataset(client.dataset(self.DS_ID))) - self.assertEqual(len(conn._requested), 1) - req = conn._requested[0] - self.assertEqual(req['method'], 'POST') - self.assertEqual(req['path'], '/%s' % PATH) - SENT = { - 'datasetReference': - {'projectId': self.PROJECT, 'datasetId': self.DS_ID}, - 'labels': {}, - } - self.assertEqual(req['data'], SENT) + conn.api_request.assert_called_once_with( + method='POST', + path='/%s' % PATH, + data={ + 'datasetReference': + {'projectId': self.PROJECT, 'datasetId': self.DS_ID}, + 'labels': {}, + }) self.assertEqual(ds.dataset_id, self.DS_ID) self.assertEqual(ds.project, self.PROJECT) self.assertEqual(ds.etag, RESOURCE['etag']) @@ -395,7 +392,7 @@ def test_create_dataset_w_attrs(self): } creds = _make_credentials() client = self._make_one(project=self.PROJECT, credentials=creds) - conn = client._connection = _Connection(RESOURCE) + conn = client._connection = _make_connection(RESOURCE) entries = [AccessEntry('OWNER', 'userByEmail', USER_EMAIL), AccessEntry(None, 'view', VIEW)] ds_arg = Dataset(client.dataset(self.DS_ID)) @@ -406,23 +403,21 @@ def test_create_dataset_w_attrs(self): ds_arg.location = LOCATION ds_arg.labels = LABELS ds = client.create_dataset(ds_arg) - self.assertEqual(len(conn._requested), 1) - req = conn._requested[0] - self.assertEqual(req['method'], 'POST') - self.assertEqual(req['path'], '/%s' % PATH) - SENT = { - 'datasetReference': - {'projectId': self.PROJECT, 'datasetId': self.DS_ID}, - 'description': DESCRIPTION, - 'friendlyName': FRIENDLY_NAME, - 'location': LOCATION, - 'defaultTableExpirationMs': 3600, - 'access': [ - {'role': 'OWNER', 'userByEmail': USER_EMAIL}, - {'view': VIEW}], - 'labels': LABELS, - } - self.assertEqual(req['data'], SENT) + conn.api_request.assert_called_once_with( + method='POST', + path='/%s' % PATH, + data={ + 'datasetReference': + {'projectId': self.PROJECT, 'datasetId': self.DS_ID}, + 'description': DESCRIPTION, + 'friendlyName': FRIENDLY_NAME, + 'location': LOCATION, + 'defaultTableExpirationMs': 3600, + 'access': [ + {'role': 'OWNER', 'userByEmail': USER_EMAIL}, + {'view': VIEW}], + 'labels': LABELS, + }) self.assertEqual(ds.dataset_id, self.DS_ID) self.assertEqual(ds.project, self.PROJECT) self.assertEqual(ds.etag, RESOURCE['etag']) @@ -448,26 +443,24 @@ def test_create_table_w_day_partition(self): 'tableId': self.TABLE_ID }, } - conn = client._connection = _Connection(resource) + conn = client._connection = _make_connection(resource) table = Table(self.TABLE_REF) table.partitioning_type = 'DAY' got = client.create_table(table) - self.assertEqual(len(conn._requested), 1) - req = conn._requested[0] - self.assertEqual(req['method'], 'POST') - self.assertEqual(req['path'], '/%s' % path) - sent = { - 'tableReference': { - 'projectId': self.PROJECT, - 'datasetId': self.DS_ID, - 'tableId': self.TABLE_ID - }, - 'timePartitioning': {'type': 'DAY'}, - 'labels': {}, - } - self.assertEqual(req['data'], sent) + conn.api_request.assert_called_once_with( + method='POST', + path='/%s' % path, + data={ + 'tableReference': { + 'projectId': self.PROJECT, + 'datasetId': self.DS_ID, + 'tableId': self.TABLE_ID + }, + 'timePartitioning': {'type': 'DAY'}, + 'labels': {}, + }) self.assertEqual(table.partitioning_type, "DAY") self.assertEqual(got.table_id, self.TABLE_ID) @@ -487,27 +480,25 @@ def test_create_table_w_encryption_configuration(self): 'tableId': self.TABLE_ID }, } - conn = client._connection = _Connection(resource) + conn = client._connection = _make_connection(resource) table = Table(self.TABLE_REF) table.encryption_configuration = EncryptionConfiguration( kms_key_name=self.KMS_KEY_NAME) got = client.create_table(table) - self.assertEqual(len(conn._requested), 1) - req = conn._requested[0] - self.assertEqual(req['method'], 'POST') - self.assertEqual(req['path'], '/%s' % path) - sent = { - 'tableReference': { - 'projectId': self.PROJECT, - 'datasetId': self.DS_ID, - 'tableId': self.TABLE_ID - }, - 'labels': {}, - 'encryptionConfiguration': {'kmsKeyName': self.KMS_KEY_NAME}, - } - self.assertEqual(req['data'], sent) + conn.api_request.assert_called_once_with( + method='POST', + path='/%s' % path, + data={ + 'tableReference': { + 'projectId': self.PROJECT, + 'datasetId': self.DS_ID, + 'tableId': self.TABLE_ID + }, + 'labels': {}, + 'encryptionConfiguration': {'kmsKeyName': self.KMS_KEY_NAME}, + }) self.assertEqual(got.table_id, self.TABLE_ID) def test_create_table_w_day_partition_and_expire(self): @@ -525,27 +516,25 @@ def test_create_table_w_day_partition_and_expire(self): 'tableId': self.TABLE_ID }, } - conn = client._connection = _Connection(resource) + conn = client._connection = _make_connection(resource) table = Table(self.TABLE_REF) table.partitioning_type = 'DAY' table.partition_expiration = 100 got = client.create_table(table) - self.assertEqual(len(conn._requested), 1) - req = conn._requested[0] - self.assertEqual(req['method'], 'POST') - self.assertEqual(req['path'], '/%s' % path) - sent = { - 'tableReference': { - 'projectId': self.PROJECT, - 'datasetId': self.DS_ID, - 'tableId': self.TABLE_ID - }, - 'timePartitioning': {'type': 'DAY', 'expirationMs': 100}, - 'labels': {}, - } - self.assertEqual(req['data'], sent) + conn.api_request.assert_called_once_with( + method='POST', + path='/%s' % path, + data={ + 'tableReference': { + 'projectId': self.PROJECT, + 'datasetId': self.DS_ID, + 'tableId': self.TABLE_ID + }, + 'timePartitioning': {'type': 'DAY', 'expirationMs': 100}, + 'labels': {}, + }) self.assertEqual(table.partitioning_type, "DAY") self.assertEqual(table.partition_expiration, 100) self.assertEqual(got.table_id, self.TABLE_ID) @@ -575,30 +564,38 @@ def test_create_table_w_schema_and_query(self): SchemaField('full_name', 'STRING', mode='REQUIRED'), SchemaField('age', 'INTEGER', mode='REQUIRED') ] - conn = client._connection = _Connection(resource) + conn = client._connection = _make_connection(resource) table = Table(self.TABLE_REF, schema=schema) table.view_query = query got = client.create_table(table) - self.assertEqual(len(conn._requested), 1) - req = conn._requested[0] - self.assertEqual(req['method'], 'POST') - self.assertEqual(req['path'], '/%s' % path) - sent = { - 'tableReference': { - 'projectId': self.PROJECT, - 'datasetId': self.DS_ID, - 'tableId': self.TABLE_ID, - }, - 'schema': {'fields': [ - {'name': 'full_name', 'type': 'STRING', 'mode': 'REQUIRED'}, - {'name': 'age', 'type': 'INTEGER', 'mode': 'REQUIRED'}] - }, - 'view': {'query': query, 'useLegacySql': False}, - 'labels': {}, - } - self.assertEqual(req['data'], sent) + conn.api_request.assert_called_once_with( + method='POST', + path='/%s' % path, + data={ + 'tableReference': { + 'projectId': self.PROJECT, + 'datasetId': self.DS_ID, + 'tableId': self.TABLE_ID, + }, + 'schema': { + 'fields': [ + { + 'name': 'full_name', + 'type': 'STRING', + 'mode': 'REQUIRED', + }, + { + 'name': 'age', + 'type': 'INTEGER', + 'mode': 'REQUIRED', + }, + ], + }, + 'view': {'query': query, 'useLegacySql': False}, + 'labels': {}, + }) self.assertEqual(got.table_id, self.TABLE_ID) self.assertEqual(got.project, self.PROJECT) self.assertEqual(got.dataset_id, self.DS_ID) @@ -626,7 +623,7 @@ def test_create_table_w_external(self): 'autodetect': True, }, } - conn = client._connection = _Connection(resource) + conn = client._connection = _make_connection(resource) table = Table(self.TABLE_REF) ec = ExternalConfig('CSV') ec.autodetect = True @@ -634,23 +631,21 @@ def test_create_table_w_external(self): got = client.create_table(table) - self.assertEqual(len(conn._requested), 1) - req = conn._requested[0] - self.assertEqual(req['method'], 'POST') - self.assertEqual(req['path'], '/%s' % path) - sent = { - 'tableReference': { - 'projectId': self.PROJECT, - 'datasetId': self.DS_ID, - 'tableId': self.TABLE_ID, - }, - 'externalDataConfiguration': { - 'sourceFormat': SourceFormat.CSV, - 'autodetect': True, - }, - 'labels': {}, - } - self.assertEqual(req['data'], sent) + conn.api_request.assert_called_once_with( + method='POST', + path='/%s' % path, + data={ + 'tableReference': { + 'projectId': self.PROJECT, + 'datasetId': self.DS_ID, + 'tableId': self.TABLE_ID, + }, + 'externalDataConfiguration': { + 'sourceFormat': SourceFormat.CSV, + 'autodetect': True, + }, + 'labels': {}, + }) self.assertEqual(got.table_id, self.TABLE_ID) self.assertEqual(got.project, self.PROJECT) self.assertEqual(got.dataset_id, self.DS_ID) @@ -673,13 +668,11 @@ def test_get_table(self): 'tableId': self.TABLE_ID, }, } - conn = client._connection = _Connection(resource) + conn = client._connection = _make_connection(resource) table = client.get_table(self.TABLE_REF) - self.assertEqual(len(conn._requested), 1) - req = conn._requested[0] - self.assertEqual(req['method'], 'GET') - self.assertEqual(req['path'], '/%s' % path) + conn.api_request.assert_called_once_with( + method='GET', path='/%s' % path) self.assertEqual(table.table_id, self.TABLE_ID) def test_update_dataset_w_invalid_field(self): @@ -715,7 +708,7 @@ def test_update_dataset(self): } creds = _make_credentials() client = self._make_one(project=self.PROJECT, credentials=creds) - conn = client._connection = _Connection(RESOURCE, RESOURCE) + conn = client._connection = _make_connection(RESOURCE, RESOURCE) ds = Dataset(client.dataset(self.DS_ID)) ds.description = DESCRIPTION ds.friendly_name = FRIENDLY_NAME @@ -727,19 +720,17 @@ def test_update_dataset(self): ds2 = client.update_dataset( ds, ['description', 'friendly_name', 'location', 'labels', 'access_entries']) - self.assertEqual(len(conn._requested), 1) - req = conn._requested[0] - self.assertEqual(req['method'], 'PATCH') - SENT = { - 'description': DESCRIPTION, - 'friendlyName': FRIENDLY_NAME, - 'location': LOCATION, - 'labels': LABELS, - 'access': ACCESS, - } - self.assertEqual(req['data'], SENT) - self.assertEqual(req['path'], '/' + PATH) - self.assertIsNone(req['headers']) + conn.api_request.assert_called_once_with( + method='PATCH', + data={ + 'description': DESCRIPTION, + 'friendlyName': FRIENDLY_NAME, + 'location': LOCATION, + 'labels': LABELS, + 'access': ACCESS, + }, + path='/' + PATH, + headers=None) self.assertEqual(ds2.description, ds.description) self.assertEqual(ds2.friendly_name, ds.friendly_name) self.assertEqual(ds2.location, ds.location) @@ -749,8 +740,8 @@ def test_update_dataset(self): # ETag becomes If-Match header. ds._properties['etag'] = 'etag' client.update_dataset(ds, []) - req = conn._requested[1] - self.assertEqual(req['headers']['If-Match'], 'etag') + req = conn.api_request.call_args + self.assertEqual(req[1]['headers']['If-Match'], 'etag') def test_update_table(self): from google.cloud.bigquery.table import Table, SchemaField @@ -781,7 +772,7 @@ def test_update_table(self): ] creds = _make_credentials() client = self._make_one(project=self.PROJECT, credentials=creds) - conn = client._connection = _Connection(resource, resource) + conn = client._connection = _make_connection(resource, resource) table = Table(self.TABLE_REF, schema=schema) table.description = description table.friendly_name = title @@ -803,12 +794,11 @@ def test_update_table(self): 'friendlyName': title, 'labels': {'x': 'y'}, } - self.assertEqual(len(conn._requested), 1) - req = conn._requested[0] - self.assertEqual(req['method'], 'PATCH') - self.assertEqual(req['data'], sent) - self.assertEqual(req['path'], '/' + path) - self.assertIsNone(req['headers']) + conn.api_request.assert_called_once_with( + method='PATCH', + data=sent, + path='/' + path, + headers=None) self.assertEqual(updated_table.description, table.description) self.assertEqual(updated_table.friendly_name, table.friendly_name) self.assertEqual(updated_table.schema, table.schema) @@ -817,8 +807,8 @@ def test_update_table(self): # ETag becomes If-Match header. table._properties['etag'] = 'etag' client.update_table(table, []) - req = conn._requested[1] - self.assertEqual(req['headers']['If-Match'], 'etag') + req = conn.api_request.call_args + self.assertEqual(req[1]['headers']['If-Match'], 'etag') def test_update_table_only_use_legacy_sql(self): from google.cloud.bigquery.table import Table @@ -836,25 +826,24 @@ def test_update_table_only_use_legacy_sql(self): } creds = _make_credentials() client = self._make_one(project=self.PROJECT, credentials=creds) - conn = client._connection = _Connection(resource) + conn = client._connection = _make_connection(resource) table = Table(self.TABLE_REF) table.view_use_legacy_sql = True updated_table = client.update_table(table, ['view_use_legacy_sql']) - self.assertEqual(len(conn._requested), 1) - req = conn._requested[0] - self.assertEqual(req['method'], 'PATCH') - self.assertEqual(req['path'], '/%s' % path) - sent = { - 'tableReference': { - 'projectId': self.PROJECT, - 'datasetId': self.DS_ID, - 'tableId': self.TABLE_ID + conn.api_request.assert_called_once_with( + method='PATCH', + path='/%s' % path, + data={ + 'tableReference': { + 'projectId': self.PROJECT, + 'datasetId': self.DS_ID, + 'tableId': self.TABLE_ID + }, + 'view': {'useLegacySql': True} }, - 'view': {'useLegacySql': True} - } - self.assertEqual(req['data'], sent) + headers=None) self.assertEqual( updated_table.view_use_legacy_sql, table.view_use_legacy_sql) @@ -890,7 +879,7 @@ def test_update_table_w_query(self): } creds = _make_credentials() client = self._make_one(project=self.PROJECT, credentials=creds) - conn = client._connection = _Connection(resource) + conn = client._connection = _make_connection(resource) table = Table(self.TABLE_REF, schema=schema) table.location = location table.expires = exp_time @@ -901,22 +890,21 @@ def test_update_table_w_query(self): updated_table = client.update_table(table, updated_properties) - self.assertEqual(len(conn._requested), 1) - req = conn._requested[0] - self.assertEqual(req['method'], 'PATCH') - self.assertEqual(req['path'], '/%s' % path) - sent = { - 'tableReference': { - 'projectId': self.PROJECT, - 'datasetId': self.DS_ID, - 'tableId': self.TABLE_ID + conn.api_request.assert_called_once_with( + method='PATCH', + path='/%s' % path, + data={ + 'tableReference': { + 'projectId': self.PROJECT, + 'datasetId': self.DS_ID, + 'tableId': self.TABLE_ID + }, + 'view': {'query': query, 'useLegacySql': True}, + 'location': location, + 'expirationTime': _millis(exp_time), + 'schema': schema_resource, }, - 'view': {'query': query, 'useLegacySql': True}, - 'location': location, - 'expirationTime': _millis(exp_time), - 'schema': schema_resource, - } - self.assertEqual(req['data'], sent) + headers=None) self.assertEqual(updated_table.schema, table.schema) self.assertEqual(updated_table.view_query, table.view_query) self.assertEqual(updated_table.location, table.location) @@ -949,15 +937,15 @@ def test_update_table_w_schema_None(self): } creds = _make_credentials() client = self._make_one(project=self.PROJECT, credentials=creds) - conn = client._connection = _Connection(resource1, resource2) + conn = client._connection = _make_connection(resource1, resource2) table = client.get_table(self.TABLE_REF) table.schema = None updated_table = client.update_table(table, ['schema']) - self.assertEqual(len(conn._requested), 2) - req = conn._requested[1] - self.assertEqual(req['method'], 'PATCH') + self.assertEqual(len(conn.api_request.call_args_list), 2) + req = conn.api_request.call_args_list[1] + self.assertEqual(req[1]['method'], 'PATCH') sent = { 'tableReference': { 'projectId': self.PROJECT, @@ -966,8 +954,8 @@ def test_update_table_w_schema_None(self): }, 'schema': None } - self.assertEqual(req['data'], sent) - self.assertEqual(req['path'], '/%s' % path) + self.assertEqual(req[1]['data'], sent) + self.assertEqual(req[1]['path'], '/%s' % path) self.assertEqual(updated_table.schema, table.schema) def test_update_table_delete_property(self): @@ -998,7 +986,7 @@ def test_update_table_delete_property(self): } creds = _make_credentials() client = self._make_one(project=self.PROJECT, credentials=creds) - conn = client._connection = _Connection(resource1, resource2) + conn = client._connection = _make_connection(resource1, resource2) table = Table(self.TABLE_REF) table.description = description table.friendly_name = title @@ -1007,10 +995,10 @@ def test_update_table_delete_property(self): table2.description = None table3 = client.update_table(table2, ['description']) - self.assertEqual(len(conn._requested), 2) - req = conn._requested[1] - self.assertEqual(req['method'], 'PATCH') - self.assertEqual(req['path'], '/%s' % path) + self.assertEqual(len(conn.api_request.call_args_list), 2) + req = conn.api_request.call_args_list[1] + self.assertEqual(req[1]['method'], 'PATCH') + self.assertEqual(req[1]['path'], '/%s' % path) sent = { 'tableReference': { 'projectId': self.PROJECT, @@ -1019,13 +1007,15 @@ def test_update_table_delete_property(self): }, 'description': None, } - self.assertEqual(req['data'], sent) + self.assertEqual(req[1]['data'], sent) self.assertIsNone(table3.description) def test_list_dataset_tables_empty(self): + path = '/projects/{}/datasets/{}/tables'.format( + self.PROJECT, self.DS_ID) creds = _make_credentials() client = self._make_one(project=self.PROJECT, credentials=creds) - conn = client._connection = _Connection({}) + conn = client._connection = _make_connection({}) dataset = client.dataset(self.DS_ID) iterator = client.list_dataset_tables(dataset) @@ -1036,11 +1026,8 @@ def test_list_dataset_tables_empty(self): self.assertEqual(tables, []) self.assertIsNone(token) - self.assertEqual(len(conn._requested), 1) - req = conn._requested[0] - self.assertEqual(req['method'], 'GET') - PATH = 'projects/%s/datasets/%s/tables' % (self.PROJECT, self.DS_ID) - self.assertEqual(req['path'], '/%s' % PATH) + conn.api_request.assert_called_once_with( + method='GET', path=path, query_params={}) def test_list_dataset_tables_defaults(self): from google.cloud.bigquery.table import TableListItem @@ -1069,7 +1056,7 @@ def test_list_dataset_tables_defaults(self): creds = _make_credentials() client = self._make_one(project=self.PROJECT, credentials=creds) - conn = client._connection = _Connection(DATA) + conn = client._connection = _make_connection(DATA) dataset = client.dataset(self.DS_ID) iterator = client.list_dataset_tables(dataset) @@ -1085,10 +1072,8 @@ def test_list_dataset_tables_defaults(self): self.assertEqual(found.table_type, expected['type']) self.assertEqual(token, TOKEN) - self.assertEqual(len(conn._requested), 1) - req = conn._requested[0] - self.assertEqual(req['method'], 'GET') - self.assertEqual(req['path'], '/%s' % PATH) + conn.api_request.assert_called_once_with( + method='GET', path='/%s' % PATH, query_params={}) def test_list_dataset_tables_explicit(self): from google.cloud.bigquery.table import TableListItem @@ -1116,7 +1101,7 @@ def test_list_dataset_tables_explicit(self): creds = _make_credentials() client = self._make_one(project=self.PROJECT, credentials=creds) - conn = client._connection = _Connection(DATA) + conn = client._connection = _make_connection(DATA) dataset = client.dataset(self.DS_ID) iterator = client.list_dataset_tables( @@ -1133,12 +1118,10 @@ def test_list_dataset_tables_explicit(self): self.assertEqual(found.table_type, expected['type']) self.assertIsNone(token) - self.assertEqual(len(conn._requested), 1) - req = conn._requested[0] - self.assertEqual(req['method'], 'GET') - self.assertEqual(req['path'], '/%s' % PATH) - self.assertEqual(req['query_params'], - {'maxResults': 3, 'pageToken': TOKEN}) + conn.api_request.assert_called_once_with( + method='GET', + path='/%s' % PATH, + query_params={'maxResults': 3, 'pageToken': TOKEN}) def test_list_dataset_tables_wrong_type(self): creds = _make_credentials() @@ -1152,14 +1135,14 @@ def test_delete_dataset(self): PATH = 'projects/%s/datasets/%s' % (self.PROJECT, self.DS_ID) creds = _make_credentials() client = self._make_one(project=self.PROJECT, credentials=creds) - conn = client._connection = _Connection({}, {}) + conn = client._connection = _make_connection({}, {}) ds_ref = client.dataset(self.DS_ID) for arg in (ds_ref, Dataset(ds_ref)): client.delete_dataset(arg) - req = conn._requested[0] - self.assertEqual(req['method'], 'DELETE') - self.assertEqual(req['path'], '/%s' % PATH) - self.assertEqual(req['query_params'], {}) + conn.api_request.assert_called_with( + method='DELETE', + path='/%s' % PATH, + query_params={}) def test_delete_dataset_delete_contents(self): from google.cloud.bigquery.dataset import Dataset @@ -1167,14 +1150,14 @@ def test_delete_dataset_delete_contents(self): PATH = 'projects/%s/datasets/%s' % (self.PROJECT, self.DS_ID) creds = _make_credentials() client = self._make_one(project=self.PROJECT, credentials=creds) - conn = client._connection = _Connection({}, {}) + conn = client._connection = _make_connection({}, {}) ds_ref = client.dataset(self.DS_ID) for arg in (ds_ref, Dataset(ds_ref)): client.delete_dataset(arg, delete_contents=True) - req = conn._requested[0] - self.assertEqual(req['method'], 'DELETE') - self.assertEqual(req['path'], '/%s' % PATH) - self.assertEqual(req['query_params'], {'deleteContents': 'true'}) + conn.api_request.assert_called_with( + method='DELETE', + path='/%s' % PATH, + query_params={'deleteContents': 'true'}) def test_delete_dataset_wrong_type(self): creds = _make_credentials() @@ -1191,13 +1174,12 @@ def test_delete_table(self): http = object() client = self._make_one(project=self.PROJECT, credentials=creds, _http=http) - conn = client._connection = _Connection({}, {}) + conn = client._connection = _make_connection({}, {}) for arg in (self.TABLE_REF, Table(self.TABLE_REF)): client.delete_table(arg) - req = conn._requested[0] - self.assertEqual(req['method'], 'DELETE') - self.assertEqual(req['path'], '/%s' % path) + conn.api_request.assert_called_with( + method='DELETE', path='/%s' % path) def test_delete_table_w_wrong_type(self): creds = _make_credentials() @@ -1218,16 +1200,15 @@ def test_get_job_miss_w_explict_project(self): JOB_ID = 'NONESUCH' creds = _make_credentials() client = self._make_one(self.PROJECT, creds) - conn = client._connection = _Connection() + conn = client._connection = _make_connection() with self.assertRaises(NotFound): client.get_job(JOB_ID, project=OTHER_PROJECT) - self.assertEqual(len(conn._requested), 1) - req = conn._requested[0] - self.assertEqual(req['method'], 'GET') - self.assertEqual(req['path'], '/projects/OTHER_PROJECT/jobs/NONESUCH') - self.assertEqual(req['query_params'], {'projection': 'full'}) + conn.api_request.assert_called_once_with( + method='GET', + path='/projects/OTHER_PROJECT/jobs/NONESUCH', + query_params={'projection': 'full'}) def test_get_job_hit(self): from google.cloud.bigquery.job import CreateDisposition @@ -1259,7 +1240,7 @@ def test_get_job_hit(self): } creds = _make_credentials() client = self._make_one(self.PROJECT, creds) - conn = client._connection = _Connection(ASYNC_QUERY_DATA) + conn = client._connection = _make_connection(ASYNC_QUERY_DATA) job = client.get_job(JOB_ID) @@ -1270,11 +1251,10 @@ def test_get_job_hit(self): self.assertEqual(job.write_disposition, WriteDisposition.WRITE_TRUNCATE) - self.assertEqual(len(conn._requested), 1) - req = conn._requested[0] - self.assertEqual(req['method'], 'GET') - self.assertEqual(req['path'], '/projects/PROJECT/jobs/query_job') - self.assertEqual(req['query_params'], {'projection': 'full'}) + conn.api_request.assert_called_once_with( + method='GET', + path='/projects/PROJECT/jobs/query_job', + query_params={'projection': 'full'}) def test_cancel_job_miss_w_explict_project(self): from google.cloud.exceptions import NotFound @@ -1283,17 +1263,15 @@ def test_cancel_job_miss_w_explict_project(self): JOB_ID = 'NONESUCH' creds = _make_credentials() client = self._make_one(self.PROJECT, creds) - conn = client._connection = _Connection() + conn = client._connection = _make_connection() with self.assertRaises(NotFound): client.cancel_job(JOB_ID, project=OTHER_PROJECT) - self.assertEqual(len(conn._requested), 1) - req = conn._requested[0] - self.assertEqual(req['method'], 'POST') - self.assertEqual( - req['path'], '/projects/OTHER_PROJECT/jobs/NONESUCH/cancel') - self.assertEqual(req['query_params'], {'projection': 'full'}) + conn.api_request.assert_called_once_with( + method='POST', + path='/projects/OTHER_PROJECT/jobs/NONESUCH/cancel', + query_params={'projection': 'full'}) def test_cancel_job_hit(self): from google.cloud.bigquery.job import QueryJob @@ -1318,7 +1296,7 @@ def test_cancel_job_hit(self): } creds = _make_credentials() client = self._make_one(self.PROJECT, creds) - conn = client._connection = _Connection(RESOURCE) + conn = client._connection = _make_connection(RESOURCE) job = client.cancel_job(JOB_ID) @@ -1326,12 +1304,10 @@ def test_cancel_job_hit(self): self.assertEqual(job.job_id, JOB_ID) self.assertEqual(job.query, QUERY) - self.assertEqual(len(conn._requested), 1) - req = conn._requested[0] - self.assertEqual(req['method'], 'POST') - self.assertEqual( - req['path'], '/projects/PROJECT/jobs/query_job/cancel') - self.assertEqual(req['query_params'], {'projection': 'full'}) + conn.api_request.assert_called_once_with( + method='POST', + path='/projects/PROJECT/jobs/query_job/cancel', + query_params={'projection': 'full'}) def test_list_jobs_defaults(self): from google.cloud.bigquery.job import CopyJob @@ -1444,7 +1420,7 @@ def test_list_jobs_defaults(self): } creds = _make_credentials() client = self._make_one(self.PROJECT, creds) - conn = client._connection = _Connection(DATA) + conn = client._connection = _make_connection(DATA) iterator = client.list_jobs() page = six.next(iterator.pages) @@ -1458,11 +1434,10 @@ def test_list_jobs_defaults(self): self.assertEqual(found.job_id, name) self.assertEqual(token, TOKEN) - self.assertEqual(len(conn._requested), 1) - req = conn._requested[0] - self.assertEqual(req['method'], 'GET') - self.assertEqual(req['path'], '/%s' % PATH) - self.assertEqual(req['query_params'], {'projection': 'full'}) + conn.api_request.assert_called_once_with( + method='GET', + path='/%s' % PATH, + query_params={'projection': 'full'}) def test_list_jobs_load_job_wo_sourceUris(self): from google.cloud.bigquery.job import LoadJob @@ -1498,7 +1473,7 @@ def test_list_jobs_load_job_wo_sourceUris(self): } creds = _make_credentials() client = self._make_one(self.PROJECT, creds) - conn = client._connection = _Connection(DATA) + conn = client._connection = _make_connection(DATA) iterator = client.list_jobs() page = six.next(iterator.pages) @@ -1512,11 +1487,10 @@ def test_list_jobs_load_job_wo_sourceUris(self): self.assertEqual(found.job_id, name) self.assertEqual(token, TOKEN) - self.assertEqual(len(conn._requested), 1) - req = conn._requested[0] - self.assertEqual(req['method'], 'GET') - self.assertEqual(req['path'], '/%s' % PATH) - self.assertEqual(req['query_params'], {'projection': 'full'}) + conn.api_request.assert_called_once_with( + method='GET', + path='/%s' % PATH, + query_params={'projection': 'full'}) def test_list_jobs_explicit_missing(self): PATH = 'projects/%s/jobs' % self.PROJECT @@ -1524,7 +1498,7 @@ def test_list_jobs_explicit_missing(self): TOKEN = 'TOKEN' creds = _make_credentials() client = self._make_one(self.PROJECT, creds) - conn = client._connection = _Connection(DATA) + conn = client._connection = _make_connection(DATA) iterator = client.list_jobs(max_results=1000, page_token=TOKEN, all_users=True, state_filter='done') @@ -1535,16 +1509,16 @@ def test_list_jobs_explicit_missing(self): self.assertEqual(len(jobs), 0) self.assertIsNone(token) - self.assertEqual(len(conn._requested), 1) - req = conn._requested[0] - self.assertEqual(req['method'], 'GET') - self.assertEqual(req['path'], '/%s' % PATH) - self.assertEqual(req['query_params'], - {'projection': 'full', - 'maxResults': 1000, - 'pageToken': TOKEN, - 'allUsers': True, - 'stateFilter': 'done'}) + conn.api_request.assert_called_once_with( + method='GET', + path='/%s' % PATH, + query_params={ + 'projection': 'full', + 'maxResults': 1000, + 'pageToken': TOKEN, + 'allUsers': True, + 'stateFilter': 'done' + }) def test_load_table_from_uri(self): from google.cloud.bigquery.job import LoadJob @@ -1572,16 +1546,16 @@ def test_load_table_from_uri(self): http = object() client = self._make_one(project=self.PROJECT, credentials=creds, _http=http) - conn = client._connection = _Connection(RESOURCE) + conn = client._connection = _make_connection(RESOURCE) destination = client.dataset(self.DS_ID).table(DESTINATION) job = client.load_table_from_uri(SOURCE_URI, destination, job_id=JOB) # Check that load_table_from_uri actually starts the job. - self.assertEqual(len(conn._requested), 1) - req = conn._requested[0] - self.assertEqual(req['method'], 'POST') - self.assertEqual(req['path'], '/projects/%s/jobs' % self.PROJECT) + conn.api_request.assert_called_once_with( + method='POST', + path='/projects/%s/jobs' % self.PROJECT, + data=RESOURCE) self.assertIsInstance(job, LoadJob) self.assertIs(job._client, client) @@ -1589,7 +1563,7 @@ def test_load_table_from_uri(self): self.assertEqual(list(job.source_uris), [SOURCE_URI]) self.assertIs(job.destination, destination) - conn = client._connection = _Connection(RESOURCE) + conn = client._connection = _make_connection(RESOURCE) job = client.load_table_from_uri([SOURCE_URI], destination, job_id=JOB) self.assertIsInstance(job, LoadJob) @@ -1626,7 +1600,7 @@ def _initiate_resumable_upload_helper(self, num_retries=None): fake_transport = self._mock_transport( http_client.OK, response_headers) client = self._make_one(project=self.PROJECT, _http=fake_transport) - conn = client._connection = _Connection() + conn = client._connection = _make_connection() # Create some mock arguments and call the method under test. data = b'goodbye gudbi gootbee' @@ -1691,7 +1665,7 @@ def _do_multipart_upload_success_helper( fake_transport = self._mock_transport(http_client.OK, {}) client = self._make_one(project=self.PROJECT, _http=fake_transport) - conn = client._connection = _Connection() + conn = client._connection = _make_connection() # Create some mock arguments. data = b'Bzzzz-zap \x00\x01\xf4' @@ -1753,11 +1727,13 @@ def test_copy_table(self): }, 'configuration': { 'copy': { - 'sourceTable': { - 'projectId': self.PROJECT, - 'datasetId': self.DS_ID, - 'tableId': SOURCE, - }, + 'sourceTables': [ + { + 'projectId': self.PROJECT, + 'datasetId': self.DS_ID, + 'tableId': SOURCE, + }, + ], 'destinationTable': { 'projectId': self.PROJECT, 'datasetId': self.DS_ID, @@ -1770,7 +1746,7 @@ def test_copy_table(self): http = object() client = self._make_one(project=self.PROJECT, credentials=creds, _http=http) - conn = client._connection = _Connection(RESOURCE) + conn = client._connection = _make_connection(RESOURCE) dataset = client.dataset(self.DS_ID) source = dataset.table(SOURCE) destination = dataset.table(DESTINATION) @@ -1778,10 +1754,10 @@ def test_copy_table(self): job = client.copy_table(source, destination, job_id=JOB) # Check that copy_table actually starts the job. - self.assertEqual(len(conn._requested), 1) - req = conn._requested[0] - self.assertEqual(req['method'], 'POST') - self.assertEqual(req['path'], '/projects/%s/jobs' % self.PROJECT) + conn.api_request.assert_called_once_with( + method='POST', + path='/projects/%s/jobs' % self.PROJECT, + data=RESOURCE) self.assertIsInstance(job, CopyJob) self.assertIs(job._client, client) @@ -1789,7 +1765,7 @@ def test_copy_table(self): self.assertEqual(list(job.sources), [source]) self.assertIs(job.destination, destination) - conn = client._connection = _Connection(RESOURCE) + conn = client._connection = _make_connection(RESOURCE) source2 = dataset.table(SOURCE + '2') job = client.copy_table([source, source2], destination, job_id=JOB) self.assertIsInstance(job, CopyJob) @@ -1824,17 +1800,17 @@ def test_extract_table(self): http = object() client = self._make_one(project=self.PROJECT, credentials=creds, _http=http) - conn = client._connection = _Connection(RESOURCE) + conn = client._connection = _make_connection(RESOURCE) dataset = client.dataset(self.DS_ID) source = dataset.table(SOURCE) job = client.extract_table(source, DESTINATION, job_id=JOB) # Check that extract_table actually starts the job. - self.assertEqual(len(conn._requested), 1) - req = conn._requested[0] - self.assertEqual(req['method'], 'POST') - self.assertEqual(req['path'], '/projects/PROJECT/jobs') + conn.api_request.assert_called_once_with( + method='POST', + path='/projects/PROJECT/jobs', + data=RESOURCE) # Check the job resource. self.assertIsInstance(job, ExtractJob) @@ -1872,7 +1848,7 @@ def test_extract_table_generated_job_id(self): http = object() client = self._make_one(project=self.PROJECT, credentials=creds, _http=http) - conn = client._connection = _Connection(RESOURCE) + conn = client._connection = _make_connection(RESOURCE) dataset = client.dataset(self.DS_ID) source = dataset.table(SOURCE) job_config = ExtractJobConfig() @@ -1882,8 +1858,8 @@ def test_extract_table_generated_job_id(self): job = client.extract_table(source, DESTINATION, job_config=job_config) # Check that extract_table actually starts the job. - self.assertEqual(len(conn._requested), 1) - req = conn._requested[0] + conn.api_request.assert_called_once() + _, req = conn.api_request.call_args self.assertEqual(req['method'], 'POST') self.assertEqual(req['path'], '/projects/PROJECT/jobs') self.assertIsInstance( @@ -1925,7 +1901,7 @@ def test_extract_table_w_destination_uris(self): http = object() client = self._make_one(project=self.PROJECT, credentials=creds, _http=http) - conn = client._connection = _Connection(RESOURCE) + conn = client._connection = _make_connection(RESOURCE) dataset = client.dataset(self.DS_ID) source = dataset.table(SOURCE) @@ -1933,8 +1909,8 @@ def test_extract_table_w_destination_uris(self): source, [DESTINATION1, DESTINATION2], job_id=JOB) # Check that extract_table actually starts the job. - self.assertEqual(len(conn._requested), 1) - req = conn._requested[0] + conn.api_request.assert_called_once() + _, req = conn.api_request.call_args self.assertEqual(req['method'], 'POST') self.assertEqual(req['path'], '/projects/PROJECT/jobs') @@ -1966,7 +1942,7 @@ def test_query_defaults(self): http = object() client = self._make_one(project=self.PROJECT, credentials=creds, _http=http) - conn = client._connection = _Connection(RESOURCE) + conn = client._connection = _make_connection(RESOURCE) job = client.query(QUERY) @@ -1978,8 +1954,8 @@ def test_query_defaults(self): self.assertEqual(job.query_parameters, []) # Check that query actually starts the job. - self.assertEqual(len(conn._requested), 1) - req = conn._requested[0] + conn.api_request.assert_called_once() + _, req = conn.api_request.call_args self.assertEqual(req['method'], 'POST') self.assertEqual(req['path'], '/projects/PROJECT/jobs') sent = req['data'] @@ -2016,7 +1992,7 @@ def test_query_w_udf_resources(self): http = object() client = self._make_one(project=self.PROJECT, credentials=creds, _http=http) - conn = client._connection = _Connection(RESOURCE) + conn = client._connection = _make_connection(RESOURCE) udf_resources = [UDFResource("resourceUri", RESOURCE_URI)] config = QueryJobConfig() config.udf_resources = udf_resources @@ -2032,8 +2008,8 @@ def test_query_w_udf_resources(self): self.assertEqual(job.query_parameters, []) # Check that query actually starts the job. - self.assertEqual(len(conn._requested), 1) - req = conn._requested[0] + conn.api_request.assert_called_once() + _, req = conn.api_request.call_args self.assertEqual(req['method'], 'POST') self.assertEqual(req['path'], '/projects/PROJECT/jobs') sent = req['data'] @@ -2076,7 +2052,7 @@ def test_query_w_query_parameters(self): http = object() client = self._make_one(project=self.PROJECT, credentials=creds, _http=http) - conn = client._connection = _Connection(RESOURCE) + conn = client._connection = _make_connection(RESOURCE) query_parameters = [ScalarQueryParameter('foo', 'INT64', 123)] config = QueryJobConfig() config.query_parameters = query_parameters @@ -2091,8 +2067,8 @@ def test_query_w_query_parameters(self): self.assertEqual(job.query_parameters, query_parameters) # Check that query actually starts the job. - self.assertEqual(len(conn._requested), 1) - req = conn._requested[0] + conn.api_request.assert_called_once() + _, req = conn.api_request.call_args self.assertEqual(req['method'], 'POST') self.assertEqual(req['path'], '/projects/PROJECT/jobs') sent = req['data'] @@ -2144,7 +2120,7 @@ def test_create_rows_w_schema(self): http = object() client = self._make_one(project=self.PROJECT, credentials=creds, _http=http) - conn = client._connection = _Connection({}) + conn = client._connection = _make_connection({}) schema = [ SchemaField('full_name', 'STRING', mode='REQUIRED'), SchemaField('age', 'INTEGER', mode='REQUIRED'), @@ -2177,8 +2153,8 @@ def _row_data(row): errors = client.create_rows(table, ROWS) self.assertEqual(len(errors), 0) - self.assertEqual(len(conn._requested), 1) - req = conn._requested[0] + conn.api_request.assert_called_once() + _, req = conn.api_request.call_args self.assertEqual(req['method'], 'POST') self.assertEqual(req['path'], '/%s' % PATH) self.assertEqual(req['data'], SENT) @@ -2199,7 +2175,7 @@ def test_create_rows_w_list_of_dictionaries(self): http = object() client = self._make_one(project=self.PROJECT, credentials=creds, _http=http) - conn = client._connection = _Connection({}) + conn = client._connection = _make_connection({}) schema = [ SchemaField('full_name', 'STRING', mode='REQUIRED'), SchemaField('age', 'INTEGER', mode='REQUIRED'), @@ -2242,11 +2218,10 @@ def _row_data(row): errors = client.create_rows(table, ROWS) self.assertEqual(len(errors), 0) - self.assertEqual(len(conn._requested), 1) - req = conn._requested[0] - self.assertEqual(req['method'], 'POST') - self.assertEqual(req['path'], '/%s' % PATH) - self.assertEqual(req['data'], SENT) + conn.api_request.assert_called_once_with( + method='POST', + path='/%s' % PATH, + data=SENT) def test_create_rows_w_list_of_Rows(self): from google.cloud.bigquery.table import Table @@ -2259,7 +2234,7 @@ def test_create_rows_w_list_of_Rows(self): http = object() client = self._make_one(project=self.PROJECT, credentials=creds, _http=http) - conn = client._connection = _Connection({}) + conn = client._connection = _make_connection({}) schema = [ SchemaField('full_name', 'STRING', mode='REQUIRED'), SchemaField('age', 'INTEGER', mode='REQUIRED'), @@ -2287,11 +2262,10 @@ def _row_data(row): errors = client.create_rows(table, ROWS) self.assertEqual(len(errors), 0) - self.assertEqual(len(conn._requested), 1) - req = conn._requested[0] - self.assertEqual(req['method'], 'POST') - self.assertEqual(req['path'], '/%s' % PATH) - self.assertEqual(req['data'], SENT) + conn.api_request.assert_called_once_with( + method='POST', + path='/%s' % PATH, + data=SENT) def test_create_rows_w_skip_invalid_and_ignore_unknown(self): from google.cloud.bigquery.table import Table, SchemaField @@ -2312,7 +2286,7 @@ def test_create_rows_w_skip_invalid_and_ignore_unknown(self): http = object() client = self._make_one(project=self.PROJECT, credentials=creds, _http=http) - conn = client._connection = _Connection(RESPONSE) + conn = client._connection = _make_connection(RESPONSE) schema = [ SchemaField('full_name', 'STRING', mode='REQUIRED'), SchemaField('age', 'INTEGER', mode='REQUIRED'), @@ -2355,11 +2329,10 @@ def _row_data(row): self.assertEqual(len(errors[0]['errors']), 1) self.assertEqual(errors[0]['errors'][0], RESPONSE['insertErrors'][0]['errors'][0]) - self.assertEqual(len(conn._requested), 1) - req = conn._requested[0] - self.assertEqual(req['method'], 'POST') - self.assertEqual(req['path'], '/%s' % PATH) - self.assertEqual(req['data'], SENT) + conn.api_request.assert_called_once_with( + method='POST', + path='/%s' % PATH, + data=SENT) def test_create_rows_w_repeated_fields(self): from google.cloud.bigquery.table import Table, SchemaField @@ -2370,7 +2343,7 @@ def test_create_rows_w_repeated_fields(self): http = object() client = self._make_one(project=self.PROJECT, credentials=creds, _http=http) - conn = client._connection = _Connection({}) + conn = client._connection = _make_connection({}) full_name = SchemaField('color', 'STRING', mode='REPEATED') index = SchemaField('index', 'INTEGER', 'REPEATED') score = SchemaField('score', 'FLOAT', 'REPEATED') @@ -2396,11 +2369,10 @@ def _row_data(row): errors = client.create_rows(table, ROWS) self.assertEqual(len(errors), 0) - self.assertEqual(len(conn._requested), 1) - req = conn._requested[0] - self.assertEqual(req['method'], 'POST') - self.assertEqual(req['path'], '/%s' % PATH) - self.assertEqual(req['data'], SENT) + conn.api_request.assert_called_once_with( + method='POST', + path='/%s' % PATH, + data=SENT) def test_create_rows_w_record_schema(self): from google.cloud.bigquery.table import SchemaField @@ -2411,7 +2383,7 @@ def test_create_rows_w_record_schema(self): http = object() client = self._make_one(project=self.PROJECT, credentials=creds, _http=http) - conn = client._connection = _Connection({}) + conn = client._connection = _make_connection({}) full_name = SchemaField('full_name', 'STRING', mode='REQUIRED') area_code = SchemaField('area_code', 'STRING', 'REQUIRED') local_number = SchemaField('local_number', 'STRING', 'REQUIRED') @@ -2444,11 +2416,10 @@ def _row_data(row): selected_fields=[full_name, phone]) self.assertEqual(len(errors), 0) - self.assertEqual(len(conn._requested), 1) - req = conn._requested[0] - self.assertEqual(req['method'], 'POST') - self.assertEqual(req['path'], '/%s' % PATH) - self.assertEqual(req['data'], SENT) + conn.api_request.assert_called_once_with( + method='POST', + path='/%s' % PATH, + data=SENT) def test_create_rows_errors(self): from google.cloud.bigquery.table import Table @@ -2488,7 +2459,7 @@ def test_create_rows_json(self): creds = _make_credentials() http = object() client = self._make_one(project=PROJECT, credentials=creds, _http=http) - conn = client._connection = _Connection({}) + conn = client._connection = _make_connection({}) table_ref = DatasetReference(PROJECT, DS_ID).table(TABLE_ID) schema = [ SchemaField('full_name', 'STRING', mode='REQUIRED'), @@ -2525,11 +2496,10 @@ def test_create_rows_json(self): errors = client.create_rows_json(table, ROWS) self.assertEqual(len(errors), 0) - self.assertEqual(len(conn._requested), 1) - req = conn._requested[0] - self.assertEqual(req['method'], 'POST') - self.assertEqual(req['path'], '/%s' % PATH) - self.assertEqual(req['data'], SENT) + conn.api_request.assert_called_once_with( + method='POST', + path='/%s' % PATH, + data=SENT) def test_list_rows(self): import datetime @@ -2582,7 +2552,7 @@ def _bigquery_timestamp_float_repr(ts_float): http = object() client = self._make_one(project=self.PROJECT, credentials=creds, _http=http) - conn = client._connection = _Connection(DATA, DATA) + conn = client._connection = _make_connection(DATA, DATA) full_name = SchemaField('full_name', 'STRING', mode='REQUIRED') age = SchemaField('age', 'INTEGER', mode='NULLABLE') joined = SchemaField('joined', 'TIMESTAMP', mode='NULLABLE') @@ -2603,11 +2573,10 @@ def _bigquery_timestamp_float_repr(ts_float): self.assertEqual(total_rows, ROWS) self.assertEqual(page_token, TOKEN) - self.assertEqual(len(conn._requested), 1) - req = conn._requested[0] - self.assertEqual(req['method'], 'GET') - self.assertEqual(req['path'], '/%s' % PATH) - self.assertEqual(req['query_params'], {}) + conn.api_request.assert_called_once_with( + method='GET', + path='/%s' % PATH, + query_params={}) def test_list_rows_query_params(self): from google.cloud.bigquery.table import Table, SchemaField @@ -2625,12 +2594,12 @@ def test_list_rows_query_params(self): ({'start_index': 1, 'max_results': 2}, {'startIndex': 1, 'maxResults': 2}), ] - conn = client._connection = _Connection(*len(tests)*[{}]) + conn = client._connection = _make_connection(*len(tests)*[{}]) for i, test in enumerate(tests): iterator = client.list_rows(table, **test[0]) six.next(iterator.pages) - req = conn._requested[i] - self.assertEqual(req['query_params'], test[1], + req = conn.api_request.call_args_list[i] + self.assertEqual(req[1]['query_params'], test[1], 'for kwargs %s' % test[0]) def test_list_rows_repeated_fields(self): @@ -2660,7 +2629,7 @@ def test_list_rows_repeated_fields(self): http = object() client = self._make_one(project=self.PROJECT, credentials=creds, _http=http) - conn = client._connection = _Connection(DATA) + conn = client._connection = _make_connection(DATA) color = SchemaField('color', 'STRING', mode='REPEATED') index = SchemaField('index', 'INTEGER', 'REPEATED') score = SchemaField('score', 'FLOAT', 'REPEATED') @@ -2681,10 +2650,10 @@ def test_list_rows_repeated_fields(self): self.assertEqual(total_rows, ROWS) self.assertEqual(page_token, TOKEN) - self.assertEqual(len(conn._requested), 1) - req = conn._requested[0] - self.assertEqual(req['method'], 'GET') - self.assertEqual(req['path'], '/%s' % PATH) + conn.api_request.assert_called_once_with( + method='GET', + path='/%s' % PATH, + query_params={'selectedFields': 'color,struct'}) def test_list_rows_w_record_schema(self): from google.cloud.bigquery.table import Table, SchemaField @@ -2715,7 +2684,7 @@ def test_list_rows_w_record_schema(self): http = object() client = self._make_one(project=self.PROJECT, credentials=creds, _http=http) - conn = client._connection = _Connection(DATA) + conn = client._connection = _make_connection(DATA) full_name = SchemaField('full_name', 'STRING', mode='REQUIRED') area_code = SchemaField('area_code', 'STRING', 'REQUIRED') local_number = SchemaField('local_number', 'STRING', 'REQUIRED') @@ -2744,10 +2713,8 @@ def test_list_rows_w_record_schema(self): self.assertEqual(total_rows, ROWS) self.assertEqual(page_token, TOKEN) - self.assertEqual(len(conn._requested), 1) - req = conn._requested[0] - self.assertEqual(req['method'], 'GET') - self.assertEqual(req['path'], '/%s' % PATH) + conn.api_request.assert_called_once_with( + method='GET', path='/%s' % PATH, query_params={}) def test_list_rows_errors(self): from google.cloud.bigquery.table import Table @@ -2811,7 +2778,7 @@ def test_list_partitions(self): http = object() client = self._make_one(project=self.PROJECT, credentials=creds, _http=http) - client._connection = _Connection( + client._connection = _make_connection( RESOURCE, RESULTS_RESOURCE, FIRST_PAGE) self.assertEqual(client.list_partitions(self.TABLE_REF), [20160804, 20160805]) @@ -3165,24 +3132,3 @@ def test__do_multipart_upload_wrong_size(self): {}, file_obj_len+1, None) - - -class _Connection(object): - - USER_AGENT = 'testing 1.2.3' - - def __init__(self, *responses): - self._responses = responses - self._requested = [] - - def api_request(self, **kw): - from google.api_core.exceptions import NotFound - self._requested.append(kw) - - if len(self._responses) == 0: - raise NotFound('miss') - - response, self._responses = self._responses[0], self._responses[1:] - if isinstance(response, Exception): - raise response - return response diff --git a/packages/google-cloud-bigquery/tests/unit/test_job.py b/packages/google-cloud-bigquery/tests/unit/test_job.py index 2c63f94b5ab8..f462f1d04f0b 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_job.py +++ b/packages/google-cloud-bigquery/tests/unit/test_job.py @@ -39,7 +39,7 @@ def _make_client(project='test-project', connection=None): from google.cloud.bigquery.client import Client if connection is None: - connection = _Connection() + connection = _make_connection() client = Client( project=project, credentials=_make_credentials(), _http=object()) @@ -47,6 +47,15 @@ def _make_client(project='test-project', connection=None): return client +def _make_connection(*responses): + import google.cloud.bigquery._http + from google.cloud.exceptions import NotFound + + mock_conn = mock.create_autospec(google.cloud.bigquery._http.Connection) + mock_conn.api_request.side_effect = list(responses) + [NotFound('miss')] + return mock_conn + + class Test__int_or_none(unittest.TestCase): def _call_fut(self, *args, **kwargs): @@ -466,7 +475,7 @@ def test_result_invokes_begin(self): begun_resource = self._make_resource() done_resource = copy.deepcopy(begun_resource) done_resource['status'] = {'state': 'DONE'} - connection = _Connection(begun_resource, done_resource) + connection = _make_connection(begun_resource, done_resource) client = _make_client(self.PROJECT) client._connection = connection @@ -474,10 +483,10 @@ def test_result_invokes_begin(self): client) job.result() - self.assertEqual(len(connection._requested), 2) - begin_request, reload_request = connection._requested - self.assertEqual(begin_request['method'], 'POST') - self.assertEqual(reload_request['method'], 'GET') + self.assertEqual(len(connection.api_request.call_args_list), 2) + begin_request, reload_request = connection.api_request.call_args_list + self.assertEqual(begin_request[1]['method'], 'POST') + self.assertEqual(reload_request[1]['method'], 'GET') def test_schema_setter_non_list(self): config = LoadJobConfig() @@ -651,7 +660,7 @@ def test_from_api_repr_w_properties(self): self._verifyResourceProperties(job, RESOURCE) def test_begin_w_already_running(self): - conn = _Connection() + conn = _make_connection() client = _make_client(project=self.PROJECT, connection=conn) job = self._make_one(self.JOB_ID, [self.SOURCE1], self.TABLE_REF, client) @@ -661,41 +670,38 @@ def test_begin_w_already_running(self): job._begin() def test_begin_w_bound_client(self): - PATH = '/projects/%s/jobs' % (self.PROJECT,) RESOURCE = self._make_resource() # Ensure None for missing server-set props del RESOURCE['statistics']['creationTime'] del RESOURCE['etag'] del RESOURCE['selfLink'] del RESOURCE['user_email'] - conn = _Connection(RESOURCE) + conn = _make_connection(RESOURCE) client = _make_client(project=self.PROJECT, connection=conn) job = self._make_one(self.JOB_ID, [self.SOURCE1], self.TABLE_REF, client) job._begin() - self.assertEqual(len(conn._requested), 1) - req = conn._requested[0] - self.assertEqual(req['method'], 'POST') - self.assertEqual(req['path'], PATH) - SENT = { - 'jobReference': { - 'projectId': self.PROJECT, - 'jobId': self.JOB_ID, - }, - 'configuration': { - 'load': { - 'sourceUris': [self.SOURCE1], - 'destinationTable': { - 'projectId': self.PROJECT, - 'datasetId': self.DS_ID, - 'tableId': self.TABLE_ID, + conn.api_request.assert_called_once_with( + method='POST', + path='/projects/{}/jobs'.format(self.PROJECT), + data={ + 'jobReference': { + 'projectId': self.PROJECT, + 'jobId': self.JOB_ID, + }, + 'configuration': { + 'load': { + 'sourceUris': [self.SOURCE1], + 'destinationTable': { + 'projectId': self.PROJECT, + 'datasetId': self.DS_ID, + 'tableId': self.TABLE_ID, + }, }, }, - }, - } - self.assertEqual(req['data'], SENT) + }) self._verifyResourceProperties(job, RESOURCE) def test_begin_w_autodetect(self): @@ -707,7 +713,7 @@ def test_begin_w_autodetect(self): del resource['etag'] del resource['selfLink'] del resource['user_email'] - conn = _Connection(resource) + conn = _make_connection(resource) client = _make_client(project=self.PROJECT, connection=conn) config = LoadJobConfig() config.autodetect = True @@ -732,12 +738,10 @@ def test_begin_w_autodetect(self): }, }, } - expected_request = { - 'method': 'POST', - 'path': path, - 'data': sent, - } - self.assertEqual(conn._requested, [expected_request]) + conn.api_request.assert_called_once_with( + method='POST', + path=path, + data=sent) self._verifyResourceProperties(job, resource) def test_begin_w_alternate_client(self): @@ -772,9 +776,9 @@ def test_begin_w_alternate_client(self): ]} } RESOURCE['configuration']['load'] = LOAD_CONFIGURATION - conn1 = _Connection() + conn1 = _make_connection() client1 = _make_client(project=self.PROJECT, connection=conn1) - conn2 = _Connection(RESOURCE) + conn2 = _make_connection(RESOURCE) client2 = _make_client(project=self.PROJECT, connection=conn2) full_name = SchemaField('full_name', 'STRING', mode='REQUIRED') age = SchemaField('age', 'INTEGER', mode='REQUIRED') @@ -797,11 +801,11 @@ def test_begin_w_alternate_client(self): job._begin(client=client2) - self.assertEqual(len(conn1._requested), 0) - self.assertEqual(len(conn2._requested), 1) - req = conn2._requested[0] - self.assertEqual(req['method'], 'POST') - self.assertEqual(req['path'], PATH) + conn1.api_request.assert_not_called() + self.assertEqual(len(conn2.api_request.call_args_list), 1) + req = conn2.api_request.call_args_list[0] + self.assertEqual(req[1]['method'], 'POST') + self.assertEqual(req[1]['path'], PATH) SENT = { 'jobReference': { 'projectId': self.PROJECT, @@ -812,112 +816,106 @@ def test_begin_w_alternate_client(self): }, } self.maxDiff = None - self.assertEqual(req['data'], SENT) + self.assertEqual(req[1]['data'], SENT) self._verifyResourceProperties(job, RESOURCE) def test_exists_miss_w_bound_client(self): PATH = '/projects/%s/jobs/%s' % (self.PROJECT, self.JOB_ID) - conn = _Connection() + conn = _make_connection() client = _make_client(project=self.PROJECT, connection=conn) table = _Table() job = self._make_one(self.JOB_ID, [self.SOURCE1], table, client) self.assertFalse(job.exists()) - self.assertEqual(len(conn._requested), 1) - req = conn._requested[0] - self.assertEqual(req['method'], 'GET') - self.assertEqual(req['path'], PATH) - self.assertEqual(req['query_params'], {'fields': 'id'}) + conn.api_request.assert_called_once_with( + method='GET', + path=PATH, + query_params={'fields': 'id'}) def test_exists_hit_w_alternate_client(self): PATH = '/projects/%s/jobs/%s' % (self.PROJECT, self.JOB_ID) - conn1 = _Connection() + conn1 = _make_connection() client1 = _make_client(project=self.PROJECT, connection=conn1) - conn2 = _Connection({}) + conn2 = _make_connection({}) client2 = _make_client(project=self.PROJECT, connection=conn2) table = _Table() job = self._make_one(self.JOB_ID, [self.SOURCE1], table, client1) self.assertTrue(job.exists(client=client2)) - self.assertEqual(len(conn1._requested), 0) - self.assertEqual(len(conn2._requested), 1) - req = conn2._requested[0] - self.assertEqual(req['method'], 'GET') - self.assertEqual(req['path'], PATH) - self.assertEqual(req['query_params'], {'fields': 'id'}) + conn1.api_request.assert_not_called() + conn2.api_request.assert_called_once_with( + method='GET', + path=PATH, + query_params={'fields': 'id'}) def test_reload_w_bound_client(self): PATH = '/projects/%s/jobs/%s' % (self.PROJECT, self.JOB_ID) RESOURCE = self._make_resource() - conn = _Connection(RESOURCE) + conn = _make_connection(RESOURCE) client = _make_client(project=self.PROJECT, connection=conn) table = _Table() job = self._make_one(self.JOB_ID, [self.SOURCE1], table, client) job.reload() - self.assertEqual(len(conn._requested), 1) - req = conn._requested[0] - self.assertEqual(req['method'], 'GET') - self.assertEqual(req['path'], PATH) + conn.api_request.assert_called_once_with( + method='GET', + path=PATH) self._verifyResourceProperties(job, RESOURCE) def test_reload_w_alternate_client(self): PATH = '/projects/%s/jobs/%s' % (self.PROJECT, self.JOB_ID) RESOURCE = self._make_resource() - conn1 = _Connection() + conn1 = _make_connection() client1 = _make_client(project=self.PROJECT, connection=conn1) - conn2 = _Connection(RESOURCE) + conn2 = _make_connection(RESOURCE) client2 = _make_client(project=self.PROJECT, connection=conn2) table = _Table() job = self._make_one(self.JOB_ID, [self.SOURCE1], table, client1) job.reload(client=client2) - self.assertEqual(len(conn1._requested), 0) - self.assertEqual(len(conn2._requested), 1) - req = conn2._requested[0] - self.assertEqual(req['method'], 'GET') - self.assertEqual(req['path'], PATH) + conn1.api_request.assert_not_called() + conn2.api_request.assert_called_once_with( + method='GET', + path=PATH) self._verifyResourceProperties(job, RESOURCE) def test_cancel_w_bound_client(self): PATH = '/projects/%s/jobs/%s/cancel' % (self.PROJECT, self.JOB_ID) RESOURCE = self._make_resource(ended=True) RESPONSE = {'job': RESOURCE} - conn = _Connection(RESPONSE) + conn = _make_connection(RESPONSE) client = _make_client(project=self.PROJECT, connection=conn) table = _Table() job = self._make_one(self.JOB_ID, [self.SOURCE1], table, client) job.cancel() - self.assertEqual(len(conn._requested), 1) - req = conn._requested[0] - self.assertEqual(req['method'], 'POST') - self.assertEqual(req['path'], PATH) + conn.api_request.assert_called_once_with( + method='POST', + path=PATH) self._verifyResourceProperties(job, RESOURCE) def test_cancel_w_alternate_client(self): PATH = '/projects/%s/jobs/%s/cancel' % (self.PROJECT, self.JOB_ID) RESOURCE = self._make_resource(ended=True) RESPONSE = {'job': RESOURCE} - conn1 = _Connection() + conn1 = _make_connection() client1 = _make_client(project=self.PROJECT, connection=conn1) - conn2 = _Connection(RESPONSE) + conn2 = _make_connection(RESPONSE) client2 = _make_client(project=self.PROJECT, connection=conn2) table = _Table() job = self._make_one(self.JOB_ID, [self.SOURCE1], table, client1) job.cancel(client=client2) - self.assertEqual(len(conn1._requested), 0) - self.assertEqual(len(conn2._requested), 1) - req = conn2._requested[0] - self.assertEqual(req['method'], 'POST') - self.assertEqual(req['path'], PATH) + conn1.api_request.assert_not_called() + conn2.api_request.assert_called_once_with( + method='POST', + path=PATH) self._verifyResourceProperties(job, RESOURCE) @@ -1194,7 +1192,7 @@ def test_begin_w_bound_client(self): del RESOURCE['etag'] del RESOURCE['selfLink'] del RESOURCE['user_email'] - conn = _Connection(RESOURCE) + conn = _make_connection(RESOURCE) client = _make_client(project=self.PROJECT, connection=conn) source = self._table_ref(self.SOURCE_TABLE) destination = self._table_ref(self.DESTINATION_TABLE) @@ -1202,31 +1200,29 @@ def test_begin_w_bound_client(self): job._begin() - self.assertEqual(len(conn._requested), 1) - req = conn._requested[0] - self.assertEqual(req['method'], 'POST') - self.assertEqual(req['path'], PATH) - SENT = { - 'jobReference': { - 'projectId': self.PROJECT, - 'jobId': self.JOB_ID, - }, - 'configuration': { - 'copy': { - 'sourceTables': [{ - 'projectId': self.PROJECT, - 'datasetId': self.DS_ID, - 'tableId': self.SOURCE_TABLE - }], - 'destinationTable': { - 'projectId': self.PROJECT, - 'datasetId': self.DS_ID, - 'tableId': self.DESTINATION_TABLE, + conn.api_request.assert_called_once_with( + method='POST', + path=PATH, + data={ + 'jobReference': { + 'projectId': self.PROJECT, + 'jobId': self.JOB_ID, + }, + 'configuration': { + 'copy': { + 'sourceTables': [{ + 'projectId': self.PROJECT, + 'datasetId': self.DS_ID, + 'tableId': self.SOURCE_TABLE + }], + 'destinationTable': { + 'projectId': self.PROJECT, + 'datasetId': self.DS_ID, + 'tableId': self.DESTINATION_TABLE, + }, }, }, - }, - } - self.assertEqual(req['data'], SENT) + }) self._verifyResourceProperties(job, RESOURCE) def test_begin_w_alternate_client(self): @@ -1249,9 +1245,9 @@ def test_begin_w_alternate_client(self): 'writeDisposition': WriteDisposition.WRITE_TRUNCATE, } RESOURCE['configuration']['copy'] = COPY_CONFIGURATION - conn1 = _Connection() + conn1 = _make_connection() client1 = _make_client(project=self.PROJECT, connection=conn1) - conn2 = _Connection(RESOURCE) + conn2 = _make_connection(RESOURCE) client2 = _make_client(project=self.PROJECT, connection=conn2) source = self._table_ref(self.SOURCE_TABLE) destination = self._table_ref(self.DESTINATION_TABLE) @@ -1262,26 +1258,24 @@ def test_begin_w_alternate_client(self): config) job._begin(client=client2) - self.assertEqual(len(conn1._requested), 0) - self.assertEqual(len(conn2._requested), 1) - req = conn2._requested[0] - self.assertEqual(req['method'], 'POST') - self.assertEqual(req['path'], PATH) - SENT = { - 'jobReference': { - 'projectId': self.PROJECT, - 'jobId': self.JOB_ID, - }, - 'configuration': { - 'copy': COPY_CONFIGURATION, - }, - } - self.assertEqual(req['data'], SENT) + conn1.api_request.assert_not_called() + conn2.api_request.assert_called_once_with( + method='POST', + path=PATH, + data={ + 'jobReference': { + 'projectId': self.PROJECT, + 'jobId': self.JOB_ID, + }, + 'configuration': { + 'copy': COPY_CONFIGURATION, + }, + }) self._verifyResourceProperties(job, RESOURCE) def test_exists_miss_w_bound_client(self): PATH = '/projects/%s/jobs/%s' % (self.PROJECT, self.JOB_ID) - conn = _Connection() + conn = _make_connection() client = _make_client(project=self.PROJECT, connection=conn) source = self._table_ref(self.SOURCE_TABLE) @@ -1290,17 +1284,16 @@ def test_exists_miss_w_bound_client(self): self.assertFalse(job.exists()) - self.assertEqual(len(conn._requested), 1) - req = conn._requested[0] - self.assertEqual(req['method'], 'GET') - self.assertEqual(req['path'], PATH) - self.assertEqual(req['query_params'], {'fields': 'id'}) + conn.api_request.assert_called_once_with( + method='GET', + path=PATH, + query_params={'fields': 'id'}) def test_exists_hit_w_alternate_client(self): PATH = '/projects/%s/jobs/%s' % (self.PROJECT, self.JOB_ID) - conn1 = _Connection() + conn1 = _make_connection() client1 = _make_client(project=self.PROJECT, connection=conn1) - conn2 = _Connection({}) + conn2 = _make_connection({}) client2 = _make_client(project=self.PROJECT, connection=conn2) source = self._table_ref(self.SOURCE_TABLE) destination = self._table_ref(self.DESTINATION_TABLE) @@ -1308,17 +1301,16 @@ def test_exists_hit_w_alternate_client(self): self.assertTrue(job.exists(client=client2)) - self.assertEqual(len(conn1._requested), 0) - self.assertEqual(len(conn2._requested), 1) - req = conn2._requested[0] - self.assertEqual(req['method'], 'GET') - self.assertEqual(req['path'], PATH) - self.assertEqual(req['query_params'], {'fields': 'id'}) + conn1.api_request.assert_not_called() + conn2.api_request.assert_called_once_with( + method='GET', + path=PATH, + query_params={'fields': 'id'}) def test_reload_w_bound_client(self): PATH = '/projects/%s/jobs/%s' % (self.PROJECT, self.JOB_ID) RESOURCE = self._make_resource() - conn = _Connection(RESOURCE) + conn = _make_connection(RESOURCE) client = _make_client(project=self.PROJECT, connection=conn) source = self._table_ref(self.SOURCE_TABLE) destination = self._table_ref(self.DESTINATION_TABLE) @@ -1326,18 +1318,17 @@ def test_reload_w_bound_client(self): job.reload() - self.assertEqual(len(conn._requested), 1) - req = conn._requested[0] - self.assertEqual(req['method'], 'GET') - self.assertEqual(req['path'], PATH) + conn.api_request.assert_called_once_with( + method='GET', + path=PATH) self._verifyResourceProperties(job, RESOURCE) def test_reload_w_alternate_client(self): PATH = '/projects/%s/jobs/%s' % (self.PROJECT, self.JOB_ID) RESOURCE = self._make_resource() - conn1 = _Connection() + conn1 = _make_connection() client1 = _make_client(project=self.PROJECT, connection=conn1) - conn2 = _Connection(RESOURCE) + conn2 = _make_connection(RESOURCE) client2 = _make_client(project=self.PROJECT, connection=conn2) source = self._table_ref(self.SOURCE_TABLE) destination = self._table_ref(self.DESTINATION_TABLE) @@ -1345,11 +1336,10 @@ def test_reload_w_alternate_client(self): job.reload(client=client2) - self.assertEqual(len(conn1._requested), 0) - self.assertEqual(len(conn2._requested), 1) - req = conn2._requested[0] - self.assertEqual(req['method'], 'GET') - self.assertEqual(req['path'], PATH) + conn1.api_request.assert_not_called() + conn2.api_request.assert_called_once_with( + method='GET', + path=PATH) self._verifyResourceProperties(job, RESOURCE) @@ -1516,7 +1506,7 @@ def test_begin_w_bound_client(self): del RESOURCE['etag'] del RESOURCE['selfLink'] del RESOURCE['user_email'] - conn = _Connection(RESOURCE) + conn = _make_connection(RESOURCE) client = _make_client(project=self.PROJECT, connection=conn) source_dataset = DatasetReference(self.PROJECT, self.DS_ID) source = source_dataset.table(self.SOURCE_TABLE) @@ -1525,27 +1515,25 @@ def test_begin_w_bound_client(self): job._begin() - self.assertEqual(len(conn._requested), 1) - req = conn._requested[0] - self.assertEqual(req['method'], 'POST') - self.assertEqual(req['path'], PATH) - SENT = { - 'jobReference': { - 'projectId': self.PROJECT, - 'jobId': self.JOB_ID, - }, - 'configuration': { - 'extract': { - 'sourceTable': { - 'projectId': self.PROJECT, - 'datasetId': self.DS_ID, - 'tableId': self.SOURCE_TABLE + conn.api_request.assert_called_once_with( + method='POST', + path=PATH, + data={ + 'jobReference': { + 'projectId': self.PROJECT, + 'jobId': self.JOB_ID, + }, + 'configuration': { + 'extract': { + 'sourceTable': { + 'projectId': self.PROJECT, + 'datasetId': self.DS_ID, + 'tableId': self.SOURCE_TABLE + }, + 'destinationUris': [self.DESTINATION_URI], }, - 'destinationUris': [self.DESTINATION_URI], }, - }, - } - self.assertEqual(req['data'], SENT) + }) self._verifyResourceProperties(job, RESOURCE) def test_begin_w_alternate_client(self): @@ -1567,9 +1555,9 @@ def test_begin_w_alternate_client(self): 'printHeader': False, } RESOURCE['configuration']['extract'] = EXTRACT_CONFIGURATION - conn1 = _Connection() + conn1 = _make_connection() client1 = _make_client(project=self.PROJECT, connection=conn1) - conn2 = _Connection(RESOURCE) + conn2 = _make_connection(RESOURCE) client2 = _make_client(project=self.PROJECT, connection=conn2) source_dataset = DatasetReference(self.PROJECT, self.DS_ID) source = source_dataset.table(self.SOURCE_TABLE) @@ -1583,26 +1571,24 @@ def test_begin_w_alternate_client(self): job._begin(client=client2) - self.assertEqual(len(conn1._requested), 0) - self.assertEqual(len(conn2._requested), 1) - req = conn2._requested[0] - self.assertEqual(req['method'], 'POST') - self.assertEqual(req['path'], PATH) - SENT = { - 'jobReference': { - 'projectId': self.PROJECT, - 'jobId': self.JOB_ID, - }, - 'configuration': { - 'extract': EXTRACT_CONFIGURATION, - }, - } - self.assertEqual(req['data'], SENT) + conn1.api_request.assert_not_called() + conn2.api_request.assert_called_once_with( + method='POST', + path=PATH, + data={ + 'jobReference': { + 'projectId': self.PROJECT, + 'jobId': self.JOB_ID, + }, + 'configuration': { + 'extract': EXTRACT_CONFIGURATION, + }, + }) self._verifyResourceProperties(job, RESOURCE) def test_exists_miss_w_bound_client(self): PATH = '/projects/%s/jobs/%s' % (self.PROJECT, self.JOB_ID) - conn = _Connection() + conn = _make_connection() client = _make_client(project=self.PROJECT, connection=conn) source = _Table(self.SOURCE_TABLE) job = self._make_one(self.JOB_ID, source, [self.DESTINATION_URI], @@ -1610,17 +1596,16 @@ def test_exists_miss_w_bound_client(self): self.assertFalse(job.exists()) - self.assertEqual(len(conn._requested), 1) - req = conn._requested[0] - self.assertEqual(req['method'], 'GET') - self.assertEqual(req['path'], PATH) - self.assertEqual(req['query_params'], {'fields': 'id'}) + conn.api_request.assert_called_once_with( + method='GET', + path=PATH, + query_params={'fields': 'id'}) def test_exists_hit_w_alternate_client(self): PATH = '/projects/%s/jobs/%s' % (self.PROJECT, self.JOB_ID) - conn1 = _Connection() + conn1 = _make_connection() client1 = _make_client(project=self.PROJECT, connection=conn1) - conn2 = _Connection({}) + conn2 = _make_connection({}) client2 = _make_client(project=self.PROJECT, connection=conn2) source = _Table(self.SOURCE_TABLE) job = self._make_one(self.JOB_ID, source, [self.DESTINATION_URI], @@ -1628,17 +1613,16 @@ def test_exists_hit_w_alternate_client(self): self.assertTrue(job.exists(client=client2)) - self.assertEqual(len(conn1._requested), 0) - self.assertEqual(len(conn2._requested), 1) - req = conn2._requested[0] - self.assertEqual(req['method'], 'GET') - self.assertEqual(req['path'], PATH) - self.assertEqual(req['query_params'], {'fields': 'id'}) + conn1.api_request.assert_not_called() + conn2.api_request.assert_called_once_with( + method='GET', + path=PATH, + query_params={'fields': 'id'}) def test_reload_w_bound_client(self): PATH = '/projects/%s/jobs/%s' % (self.PROJECT, self.JOB_ID) RESOURCE = self._make_resource() - conn = _Connection(RESOURCE) + conn = _make_connection(RESOURCE) client = _make_client(project=self.PROJECT, connection=conn) source_dataset = DatasetReference(self.PROJECT, self.DS_ID) source = source_dataset.table(self.SOURCE_TABLE) @@ -1647,18 +1631,15 @@ def test_reload_w_bound_client(self): job.reload() - self.assertEqual(len(conn._requested), 1) - req = conn._requested[0] - self.assertEqual(req['method'], 'GET') - self.assertEqual(req['path'], PATH) + conn.api_request.assert_called_once_with(method='GET', path=PATH) self._verifyResourceProperties(job, RESOURCE) def test_reload_w_alternate_client(self): PATH = '/projects/%s/jobs/%s' % (self.PROJECT, self.JOB_ID) RESOURCE = self._make_resource() - conn1 = _Connection() + conn1 = _make_connection() client1 = _make_client(project=self.PROJECT, connection=conn1) - conn2 = _Connection(RESOURCE) + conn2 = _make_connection(RESOURCE) client2 = _make_client(project=self.PROJECT, connection=conn2) source_dataset = DatasetReference(self.PROJECT, self.DS_ID) source = source_dataset.table(self.SOURCE_TABLE) @@ -1667,11 +1648,8 @@ def test_reload_w_alternate_client(self): job.reload(client=client2) - self.assertEqual(len(conn1._requested), 0) - self.assertEqual(len(conn2._requested), 1) - req = conn2._requested[0] - self.assertEqual(req['method'], 'GET') - self.assertEqual(req['path'], PATH) + conn1.api_request.assert_not_called() + conn2.api_request.assert_called_once_with(method='GET', path=PATH) self._verifyResourceProperties(job, RESOURCE) @@ -2359,7 +2337,7 @@ def test_result(self): 'jobId': self.JOB_ID, }, } - connection = _Connection(query_resource, query_resource) + connection = _make_connection(query_resource, query_resource) client = _make_client(self.PROJECT, connection=connection) resource = self._make_resource(ended=True) job = self._get_target_class().from_api_repr(resource, client) @@ -2381,7 +2359,7 @@ def test_result_invokes_begins(self): query_resource['jobComplete'] = True done_resource = copy.deepcopy(begun_resource) done_resource['status'] = {'state': 'DONE'} - connection = _Connection( + connection = _make_connection( begun_resource, incomplete_resource, query_resource, done_resource, query_resource) client = _make_client(project=self.PROJECT, connection=connection) @@ -2389,11 +2367,13 @@ def test_result_invokes_begins(self): job.result() - self.assertEqual(len(connection._requested), 4) - begin_request, _, query_request, reload_request = connection._requested - self.assertEqual(begin_request['method'], 'POST') - self.assertEqual(query_request['method'], 'GET') - self.assertEqual(reload_request['method'], 'GET') + self.assertEqual(len(connection.api_request.call_args_list), 4) + begin_request = connection.api_request.call_args_list[0] + query_request = connection.api_request.call_args_list[2] + reload_request = connection.api_request.call_args_list[3] + self.assertEqual(begin_request[1]['method'], 'POST') + self.assertEqual(query_request[1]['method'], 'GET') + self.assertEqual(reload_request[1]['method'], 'GET') def test_result_w_timeout(self): begun_resource = self._make_resource() @@ -2406,22 +2386,24 @@ def test_result_w_timeout(self): } done_resource = copy.deepcopy(begun_resource) done_resource['status'] = {'state': 'DONE'} - connection = _Connection( + connection = _make_connection( begun_resource, query_resource, done_resource) client = _make_client(project=self.PROJECT, connection=connection) job = self._make_one(self.JOB_ID, self.QUERY, client) job.result(timeout=1.0) - self.assertEqual(len(connection._requested), 3) - begin_request, query_request, reload_request = connection._requested - self.assertEqual(begin_request['method'], 'POST') - self.assertEqual(query_request['method'], 'GET') + self.assertEqual(len(connection.api_request.call_args_list), 3) + begin_request = connection.api_request.call_args_list[0] + query_request = connection.api_request.call_args_list[1] + reload_request = connection.api_request.call_args_list[2] + self.assertEqual(begin_request[1]['method'], 'POST') + self.assertEqual(query_request[1]['method'], 'GET') self.assertEqual( - query_request['path'], + query_request[1]['path'], '/projects/{}/queries/{}'.format(self.PROJECT, self.JOB_ID)) - self.assertEqual(query_request['query_params']['timeoutMs'], 900) - self.assertEqual(reload_request['method'], 'GET') + self.assertEqual(query_request[1]['query_params']['timeoutMs'], 900) + self.assertEqual(reload_request[1]['method'], 'GET') def test_result_error(self): from google.cloud import exceptions @@ -2459,7 +2441,7 @@ def test_begin_w_bound_client(self): del RESOURCE['etag'] del RESOURCE['selfLink'] del RESOURCE['user_email'] - conn = _Connection(RESOURCE) + conn = _make_connection(RESOURCE) client = _make_client(project=self.PROJECT, connection=conn) config = QueryJobConfig() @@ -2471,28 +2453,26 @@ def test_begin_w_bound_client(self): self.assertIsNone(job.default_dataset) self.assertEqual(job.udf_resources, []) - self.assertEqual(len(conn._requested), 1) - req = conn._requested[0] - self.assertEqual(req['method'], 'POST') - self.assertEqual(req['path'], PATH) - SENT = { - 'jobReference': { - 'projectId': self.PROJECT, - 'jobId': self.JOB_ID, - }, - 'configuration': { - 'query': { - 'query': self.QUERY, - 'useLegacySql': False, - 'defaultDataset': { - 'projectId': self.PROJECT, - 'datasetId': DS_ID, + conn.api_request.assert_called_once_with( + method='POST', + path=PATH, + data={ + 'jobReference': { + 'projectId': self.PROJECT, + 'jobId': self.JOB_ID, + }, + 'configuration': { + 'query': { + 'query': self.QUERY, + 'useLegacySql': False, + 'defaultDataset': { + 'projectId': self.PROJECT, + 'datasetId': DS_ID, + }, }, }, - }, - } + }) self._verifyResourceProperties(job, RESOURCE) - self.assertEqual(req['data'], SENT) def test_begin_w_alternate_client(self): from google.cloud.bigquery.dataset import DatasetReference @@ -2528,9 +2508,9 @@ def test_begin_w_alternate_client(self): } RESOURCE['configuration']['query'] = QUERY_CONFIGURATION RESOURCE['configuration']['dryRun'] = True - conn1 = _Connection() + conn1 = _make_connection() client1 = _make_client(project=self.PROJECT, connection=conn1) - conn2 = _Connection(RESOURCE) + conn2 = _make_connection(RESOURCE) client2 = _make_client(project=self.PROJECT, connection=conn2) dataset_ref = DatasetReference(self.PROJECT, DS_ID) table_ref = dataset_ref.table(TABLE) @@ -2553,23 +2533,21 @@ def test_begin_w_alternate_client(self): job._begin(client=client2) - self.assertEqual(len(conn1._requested), 0) - self.assertEqual(len(conn2._requested), 1) - req = conn2._requested[0] - self.assertEqual(req['method'], 'POST') - self.assertEqual(req['path'], PATH) - SENT = { - 'jobReference': { - 'projectId': self.PROJECT, - 'jobId': self.JOB_ID, - }, - 'configuration': { - 'dryRun': True, - 'query': QUERY_CONFIGURATION, - }, - } + conn1.api_request.assert_not_called() + conn2.api_request.assert_called_once_with( + method='POST', + path=PATH, + data={ + 'jobReference': { + 'projectId': self.PROJECT, + 'jobId': self.JOB_ID, + }, + 'configuration': { + 'dryRun': True, + 'query': QUERY_CONFIGURATION, + }, + }) self._verifyResourceProperties(job, RESOURCE) - self.assertEqual(req['data'], SENT) def test_begin_w_udf(self): from google.cloud.bigquery.job import QueryJobConfig @@ -2588,7 +2566,7 @@ def test_begin_w_udf(self): {'resourceUri': RESOURCE_URI}, {'inlineCode': INLINE_UDF_CODE}, ] - conn = _Connection(RESOURCE) + conn = _make_connection(RESOURCE) client = _make_client(project=self.PROJECT, connection=conn) udf_resources = [ UDFResource("resourceUri", RESOURCE_URI), @@ -2602,29 +2580,27 @@ def test_begin_w_udf(self): job._begin() - self.assertEqual(len(conn._requested), 1) - req = conn._requested[0] - self.assertEqual(req['method'], 'POST') - self.assertEqual(req['path'], PATH) self.assertEqual(job.udf_resources, udf_resources) - SENT = { - 'jobReference': { - 'projectId': self.PROJECT, - 'jobId': self.JOB_ID, - }, - 'configuration': { - 'query': { - 'query': self.QUERY, - 'useLegacySql': True, - 'userDefinedFunctionResources': [ - {'resourceUri': RESOURCE_URI}, - {'inlineCode': INLINE_UDF_CODE}, - ] + conn.api_request.assert_called_once_with( + method='POST', + path=PATH, + data={ + 'jobReference': { + 'projectId': self.PROJECT, + 'jobId': self.JOB_ID, }, - }, - } + 'configuration': { + 'query': { + 'query': self.QUERY, + 'useLegacySql': True, + 'userDefinedFunctionResources': [ + {'resourceUri': RESOURCE_URI}, + {'inlineCode': INLINE_UDF_CODE}, + ] + }, + }, + }) self._verifyResourceProperties(job, RESOURCE) - self.assertEqual(req['data'], SENT) def test_begin_w_named_query_parameter(self): from google.cloud.bigquery.job import QueryJobConfig @@ -2651,7 +2627,7 @@ def test_begin_w_named_query_parameter(self): }, }, ] - conn = _Connection(RESOURCE) + conn = _make_connection(RESOURCE) client = _make_client(project=self.PROJECT, connection=conn) jconfig = QueryJobConfig() jconfig.query_parameters = query_parameters @@ -2660,27 +2636,25 @@ def test_begin_w_named_query_parameter(self): job._begin() - self.assertEqual(len(conn._requested), 1) - req = conn._requested[0] - self.assertEqual(req['method'], 'POST') - self.assertEqual(req['path'], PATH) self.assertEqual(job.query_parameters, query_parameters) - SENT = { - 'jobReference': { - 'projectId': self.PROJECT, - 'jobId': self.JOB_ID, - }, - 'configuration': { - 'query': { - 'query': self.QUERY, - 'useLegacySql': False, - 'parameterMode': 'NAMED', - 'queryParameters': config['queryParameters'], + conn.api_request.assert_called_once_with( + method='POST', + path=PATH, + data={ + 'jobReference': { + 'projectId': self.PROJECT, + 'jobId': self.JOB_ID, }, - }, - } + 'configuration': { + 'query': { + 'query': self.QUERY, + 'useLegacySql': False, + 'parameterMode': 'NAMED', + 'queryParameters': config['queryParameters'], + }, + }, + }) self._verifyResourceProperties(job, RESOURCE) - self.assertEqual(req['data'], SENT) def test_begin_w_positional_query_parameter(self): from google.cloud.bigquery.job import QueryJobConfig @@ -2706,7 +2680,7 @@ def test_begin_w_positional_query_parameter(self): }, }, ] - conn = _Connection(RESOURCE) + conn = _make_connection(RESOURCE) client = _make_client(project=self.PROJECT, connection=conn) jconfig = QueryJobConfig() jconfig.query_parameters = query_parameters @@ -2715,27 +2689,25 @@ def test_begin_w_positional_query_parameter(self): job._begin() - self.assertEqual(len(conn._requested), 1) - req = conn._requested[0] - self.assertEqual(req['method'], 'POST') - self.assertEqual(req['path'], PATH) self.assertEqual(job.query_parameters, query_parameters) - SENT = { - 'jobReference': { - 'projectId': self.PROJECT, - 'jobId': self.JOB_ID, - }, - 'configuration': { - 'query': { - 'query': self.QUERY, - 'useLegacySql': False, - 'parameterMode': 'POSITIONAL', - 'queryParameters': config['queryParameters'], + conn.api_request.assert_called_once_with( + method='POST', + path=PATH, + data={ + 'jobReference': { + 'projectId': self.PROJECT, + 'jobId': self.JOB_ID, }, - }, - } + 'configuration': { + 'query': { + 'query': self.QUERY, + 'useLegacySql': False, + 'parameterMode': 'POSITIONAL', + 'queryParameters': config['queryParameters'], + }, + }, + }) self._verifyResourceProperties(job, RESOURCE) - self.assertEqual(req['data'], SENT) def test_begin_w_table_defs(self): from google.cloud.bigquery.job import QueryJobConfig @@ -2788,7 +2760,7 @@ def test_begin_w_table_defs(self): csv_table: CSV_CONFIG_RESOURCE, } want_resource = copy.deepcopy(RESOURCE) - conn = _Connection(RESOURCE) + conn = _make_connection(RESOURCE) client = _make_client(project=self.PROJECT, connection=conn) config = QueryJobConfig() config.table_definitions = { @@ -2801,28 +2773,26 @@ def test_begin_w_table_defs(self): job._begin() - self.assertEqual(len(conn._requested), 1) - req = conn._requested[0] - self.assertEqual(req['method'], 'POST') - self.assertEqual(req['path'], PATH) - SENT = { - 'jobReference': { - 'projectId': self.PROJECT, - 'jobId': self.JOB_ID, - }, - 'configuration': { - 'query': { - 'query': self.QUERY, - 'useLegacySql': True, - 'tableDefinitions': { - bt_table: BT_CONFIG_RESOURCE, - csv_table: CSV_CONFIG_RESOURCE, + conn.api_request.assert_called_once_with( + method='POST', + path=PATH, + data={ + 'jobReference': { + 'projectId': self.PROJECT, + 'jobId': self.JOB_ID, + }, + 'configuration': { + 'query': { + 'query': self.QUERY, + 'useLegacySql': True, + 'tableDefinitions': { + bt_table: BT_CONFIG_RESOURCE, + csv_table: CSV_CONFIG_RESOURCE, + }, }, }, - }, - } + }) self._verifyResourceProperties(job, want_resource) - self.assertEqual(req['data'], SENT) def test_dry_run_query(self): from google.cloud.bigquery.job import QueryJobConfig @@ -2835,7 +2805,7 @@ def test_dry_run_query(self): del RESOURCE['selfLink'] del RESOURCE['user_email'] RESOURCE['configuration']['dryRun'] = True - conn = _Connection(RESOURCE) + conn = _make_connection(RESOURCE) client = _make_client(project=self.PROJECT, connection=conn) config = QueryJobConfig() config.dry_run = True @@ -2844,56 +2814,52 @@ def test_dry_run_query(self): job._begin() self.assertEqual(job.udf_resources, []) - self.assertEqual(len(conn._requested), 1) - req = conn._requested[0] - self.assertEqual(req['method'], 'POST') - self.assertEqual(req['path'], PATH) - SENT = { - 'jobReference': { - 'projectId': self.PROJECT, - 'jobId': self.JOB_ID, - }, - 'configuration': { - 'query': { - 'query': self.QUERY, - 'useLegacySql': False, + conn.api_request.assert_called_once_with( + method='POST', + path=PATH, + data={ + 'jobReference': { + 'projectId': self.PROJECT, + 'jobId': self.JOB_ID, }, - 'dryRun': True, - }, - } + 'configuration': { + 'query': { + 'query': self.QUERY, + 'useLegacySql': False, + }, + 'dryRun': True, + }, + }) self._verifyResourceProperties(job, RESOURCE) - self.assertEqual(req['data'], SENT) def test_exists_miss_w_bound_client(self): PATH = '/projects/%s/jobs/%s' % (self.PROJECT, self.JOB_ID) - conn = _Connection() + conn = _make_connection() client = _make_client(project=self.PROJECT, connection=conn) job = self._make_one(self.JOB_ID, self.QUERY, client) self.assertFalse(job.exists()) - self.assertEqual(len(conn._requested), 1) - req = conn._requested[0] - self.assertEqual(req['method'], 'GET') - self.assertEqual(req['path'], PATH) - self.assertEqual(req['query_params'], {'fields': 'id'}) + conn.api_request.assert_called_once_with( + method='GET', + path=PATH, + query_params={'fields': 'id'}) def test_exists_hit_w_alternate_client(self): PATH = '/projects/%s/jobs/%s' % (self.PROJECT, self.JOB_ID) - conn1 = _Connection() + conn1 = _make_connection() client1 = _make_client(project=self.PROJECT, connection=conn1) - conn2 = _Connection({}) + conn2 = _make_connection({}) client2 = _make_client(project=self.PROJECT, connection=conn2) job = self._make_one(self.JOB_ID, self.QUERY, client1) self.assertTrue(job.exists(client=client2)) - self.assertEqual(len(conn1._requested), 0) - self.assertEqual(len(conn2._requested), 1) - req = conn2._requested[0] - self.assertEqual(req['method'], 'GET') - self.assertEqual(req['path'], PATH) - self.assertEqual(req['query_params'], {'fields': 'id'}) + conn1.api_request.assert_not_called() + conn2.api_request.assert_called_once_with( + method='GET', + path=PATH, + query_params={'fields': 'id'}) def test_reload_w_bound_client(self): from google.cloud.bigquery.dataset import DatasetReference @@ -2903,7 +2869,7 @@ def test_reload_w_bound_client(self): DS_ID = 'DATASET' DEST_TABLE = 'dest_table' RESOURCE = self._make_resource() - conn = _Connection(RESOURCE) + conn = _make_connection(RESOURCE) client = _make_client(project=self.PROJECT, connection=conn) dataset_ref = DatasetReference(self.PROJECT, DS_ID) table_ref = dataset_ref.table(DEST_TABLE) @@ -2915,10 +2881,7 @@ def test_reload_w_bound_client(self): self.assertNotEqual(job.destination, table_ref) - self.assertEqual(len(conn._requested), 1) - req = conn._requested[0] - self.assertEqual(req['method'], 'GET') - self.assertEqual(req['path'], PATH) + conn.api_request.assert_called_once_with(method='GET', path=PATH) self._verifyResourceProperties(job, RESOURCE) def test_reload_w_alternate_client(self): @@ -2932,19 +2895,16 @@ def test_reload_w_alternate_client(self): 'datasetId': DS_ID, 'tableId': DEST_TABLE, } - conn1 = _Connection() + conn1 = _make_connection() client1 = _make_client(project=self.PROJECT, connection=conn1) - conn2 = _Connection(RESOURCE) + conn2 = _make_connection(RESOURCE) client2 = _make_client(project=self.PROJECT, connection=conn2) job = self._make_one(self.JOB_ID, self.QUERY, client1) job.reload(client=client2) - self.assertEqual(len(conn1._requested), 0) - self.assertEqual(len(conn2._requested), 1) - req = conn2._requested[0] - self.assertEqual(req['method'], 'GET') - self.assertEqual(req['path'], PATH) + conn1.api_request.assert_not_called() + conn2.api_request.assert_called_once_with(method='GET', path=PATH) self._verifyResourceProperties(job, RESOURCE) @unittest.skipIf(pandas is None, 'Requires `pandas`') @@ -2971,7 +2931,7 @@ def test_to_dataframe(self): } done_resource = copy.deepcopy(begun_resource) done_resource['status'] = {'state': 'DONE'} - connection = _Connection( + connection = _make_connection( begun_resource, query_resource, done_resource, query_resource) client = _make_client(project=self.PROJECT, connection=connection) job = self._make_one(self.JOB_ID, self.QUERY, client) @@ -2995,7 +2955,8 @@ def test_iter(self): } done_resource = copy.deepcopy(begun_resource) done_resource['status'] = {'state': 'DONE'} - connection = _Connection(begun_resource, query_resource, done_resource) + connection = _make_connection( + begun_resource, query_resource, done_resource) client = _make_client(project=self.PROJECT, connection=connection) job = self._make_one(self.JOB_ID, self.QUERY, client) @@ -3201,22 +3162,3 @@ def project(self): @property def dataset_id(self): return TestLoadJob.DS_ID - - -class _Connection(object): - - def __init__(self, *responses): - self._responses = responses - self._requested = [] - - def api_request(self, **kw): - from google.cloud.exceptions import NotFound - - self._requested.append(kw) - - try: - response, self._responses = self._responses[0], self._responses[1:] - except IndexError: - raise NotFound('miss') - else: - return response From d841bb4825ca7d8cd58e3385b3b8dcbf9dc24078 Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Mon, 19 Mar 2018 13:44:11 -0700 Subject: [PATCH 0408/2016] BigQuery: Use `job.configuration` resource for XXXJobConfig classes (#5036) * BigQuery: LoadJobConfig change to job.configuration resource repr. * Fix tests for LoadJob * CopyJobConfig: use job.configuration for API repr Also, fixes some lint errors. * Use job.configuration API resource for ExtractJobConfig * Use job.configuration API resource for QueryJobConfig. * Remove unused _EnumAPIResourceProperty class. Since the API resource properties are no longer used for validation, there was a test coverage gap. * Remove second colon for docs rendering. * Fix docstring. Docstrings for private helper methods. * Add test for wrong config type. --- .../google/cloud/bigquery/_helpers.py | 101 +- .../google/cloud/bigquery/job.py | 916 +++++++++++------- .../google/cloud/bigquery/schema.py | 21 +- .../google-cloud-bigquery/tests/system.py | 12 + .../tests/unit/test_client.py | 76 +- .../tests/unit/test_external_config.py | 1 + .../tests/unit/test_job.py | 146 ++- .../tests/unit/test_schema.py | 36 +- 8 files changed, 854 insertions(+), 455 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py b/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py index e535642aa43d..e094f6d666f9 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py @@ -339,7 +339,7 @@ def __get__(self, instance, owner): def _validate(self, value): """Subclasses override to impose validation policy.""" - pass + raise NotImplementedError("Abstract") def __set__(self, instance, value): """Descriptor protocol: mutator""" @@ -417,17 +417,6 @@ def _validate(self, value): 'Required type: list of %s' % (self.property_type,)) -class _EnumApiResourceProperty(_ApiResourceProperty): - """Pseudo-enumeration class. - - :type name: str - :param name: name of the property. - - :type resource_name: str - :param resource_name: name of the property in the resource dictionary - """ - - def _item_to_row(iterator, resource): """Convert a JSON row to the native object. @@ -486,6 +475,94 @@ def _should_retry(exc): return reason == 'backendError' or reason == 'rateLimitExceeded' +def get_sub_prop(container, keys, default=None): + """Get a nested value from a dictionary. + + This method works like ``dict.get(key)``, but for nested values. + + Arguments: + container (dict): + A dictionary which may contain other dictionaries as values. + keys (iterable): + A sequence of keys to attempt to get the value for. Each item in + the sequence represents a deeper nesting. The first key is for + the top level. If there is a dictionary there, the second key + attempts to get the value within that, and so on. + default (object): + (Optional) Value to returned if any of the keys are not found. + Defaults to ``None``. + + Examples: + Get a top-level value (equivalent to ``container.get('key')``). + + >>> get_sub_prop({'key': 'value'}, ['key']) + 'value' + + Get a top-level value, providing a default (equivalent to + ``container.get('key', default='default')``). + + >>> get_sub_prop({'nothere': 123}, ['key'], default='not found') + 'not found' + + Get a nested value. + + >>> get_sub_prop({'key': {'subkey': 'value'}}, ['key', 'subkey']) + 'value' + + Returns: + object: The value if present or the default. + """ + sub_val = container + for key in keys: + if key not in sub_val: + return default + sub_val = sub_val[key] + return sub_val + + +def set_sub_prop(container, keys, value): + """Set a nested value in a dictionary. + + Arguments: + container (dict): + A dictionary which may contain other dictionaries as values. + keys (iterable): + A sequence of keys to attempt to set the value for. Each item in + the sequence represents a deeper nesting. The first key is for + the top level. If there is a dictionary there, the second key + attempts to get the value within that, and so on. + value (object): Value to set within the container. + + Examples: + Set a top-level value (equivalent to ``container['key'] = 'value'``). + + >>> container = {} + >>> set_sub_prop(container, ['key'], 'value') + >>> container + {'key': 'value'} + + Set a nested value. + + >>> container = {} + >>> set_sub_prop(container, ['key', 'subkey'], 'value') + >>> container + {'key': {'subkey': 'value'}} + + Replace a nested value. + + >>> container = {'key': {'subkey': 'prev'}} + >>> set_sub_prop(container, ['key', 'subkey'], 'new') + >>> container + {'key': {'subkey': 'new'}} + """ + sub_val = container + for key in keys[:-1]: + if key not in sub_val: + sub_val[key] = {} + sub_val = sub_val[key] + sub_val[keys[-1]] = value + + DEFAULT_RETRY = retry.Retry(predicate=_should_retry) """The default retry object. diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/job.py b/packages/google-cloud-bigquery/google/cloud/bigquery/job.py index e955d34be14f..1a5a9ad3e78d 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/job.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/job.py @@ -17,7 +17,6 @@ import copy import threading -import six from six.moves import http_client import google.api_core.future.polling @@ -34,10 +33,7 @@ from google.cloud.bigquery.schema import SchemaField from google.cloud.bigquery.table import EncryptionConfiguration from google.cloud.bigquery.table import TableReference -from google.cloud.bigquery.table import _build_schema_resource -from google.cloud.bigquery.table import _parse_schema_resource -from google.cloud.bigquery._helpers import _EnumApiResourceProperty -from google.cloud.bigquery._helpers import _TypedApiResourceProperty +from google.cloud.bigquery import _helpers from google.cloud.bigquery._helpers import DEFAULT_RETRY from google.cloud.bigquery._helpers import _int_or_none @@ -89,16 +85,20 @@ def _error_result_to_exception(error_result): status_code, error_result.get('message', ''), errors=[error_result]) -class Compression(_EnumApiResourceProperty): +class Compression(object): """The compression type to use for exported files. - Possible values include `GZIP` and `NONE`. The default value is `NONE`. + Possible values include `GZIP`, `DEFLATE`, `SNAPPY`, and `NONE`. The + default value is `NONE`. `DEFLATE` and `SNAPPY` are only supported for + Avro. """ GZIP = 'GZIP' + DEFLATE = 'DEFLATE' + SNAPPY = 'SNAPPY' NONE = 'NONE' -class CreateDisposition(_EnumApiResourceProperty): +class CreateDisposition(object): """Specifies whether the job is allowed to create new tables. The following values are supported: @@ -115,7 +115,7 @@ class CreateDisposition(_EnumApiResourceProperty): CREATE_NEVER = 'CREATE_NEVER' -class DestinationFormat(_EnumApiResourceProperty): +class DestinationFormat(object): """The exported file format. Possible values include `CSV`, `NEWLINE_DELIMITED_JSON` and `AVRO`. @@ -127,7 +127,7 @@ class DestinationFormat(_EnumApiResourceProperty): AVRO = 'AVRO' -class Encoding(_EnumApiResourceProperty): +class Encoding(object): """The character encoding of the data. The supported values are `UTF_8` corresponding to `'UTF-8'` or `ISO_8859_1` corresponding to `'ISO-8559-1'`. The default value is `UTF_8`. @@ -139,7 +139,7 @@ class Encoding(_EnumApiResourceProperty): ISO_8559_1 = 'ISO-8559-1' -class QueryPriority(_EnumApiResourceProperty): +class QueryPriority(object): """Specifies a priority for the query. Possible values include `INTERACTIVE` and `BATCH`. The default value @@ -149,7 +149,7 @@ class QueryPriority(_EnumApiResourceProperty): BATCH = 'BATCH' -class SourceFormat(_EnumApiResourceProperty): +class SourceFormat(object): """The format of the data files. For CSV files, specify `CSV`. For datastore backups, specify @@ -164,7 +164,7 @@ class SourceFormat(_EnumApiResourceProperty): PARQUET = 'PARQUET' -class WriteDisposition(_EnumApiResourceProperty): +class WriteDisposition(object): """Specifies the action that occurs if destination table already exists. The following values are supported: @@ -185,17 +185,6 @@ class WriteDisposition(_EnumApiResourceProperty): WRITE_EMPTY = 'WRITE_EMPTY' -class AutoDetectSchema(_TypedApiResourceProperty): - """Property for ``autodetect`` properties. - - :raises ValueError: on ``set`` operation if ``instance.schema`` - is already defined. - """ - def __set__(self, instance, value): - self._validate(value) - instance._properties[self.resource_name] = value - - class _AsyncJob(google.api_core.future.polling.PollingFuture): """Base class for asynchronous jobs. @@ -386,8 +375,7 @@ def _set_properties(self, api_response): self._properties.clear() self._properties.update(cleaned) - configuration = cleaned['configuration'][self._JOB_TYPE] - self._copy_configuration_properties(configuration) + self._copy_configuration_properties(cleaned['configuration']) # For Future interface self._set_future_result() @@ -414,8 +402,7 @@ def _get_resource_config(cls, resource): cls._JOB_TYPE not in resource['configuration']): raise KeyError('Resource lacks required configuration: ' '["configuration"]["%s"]' % cls._JOB_TYPE) - config = resource['configuration'][cls._JOB_TYPE] - return job_id, config + return job_id, resource['configuration'] def _begin(self, client=None, retry=DEFAULT_RETRY): """API call: begin the job via a POST request @@ -590,7 +577,93 @@ def cancelled(self): and self.error_result.get('reason') == _STOPPED_REASON) -class LoadJobConfig(object): +class _JobConfig(object): + """Abstract base class for job configuration objects. + + Arguments: + job_type (str): The key to use for the job configuration. + """ + + def __init__(self, job_type): + self._job_type = job_type + self._properties = {job_type: {}} + + def _get_sub_prop(self, key, default=None): + """Get a value in the ``self._properties[self._job_type]`` dictionary. + + Most job properties are inside the dictionary related to the job type + (e.g. 'copy', 'extract', 'load', 'query'). Use this method to access + those properties:: + + self._get_sub_prop('destinationTable') + + This is equivalent to using the ``_helper.get_sub_prop`` function:: + + _helper.get_sub_prop( + self._properties, ['query', 'destinationTable']) + + Arguments: + key (str): + Key for the value to get in the + ``self._properties[self._job_type]`` dictionary. + default (object): + (Optional) Default value to return if the key is not found. + Defaults to ``None``. + + Returns: + object: The value if present or the default. + """ + return _helpers.get_sub_prop( + self._properties, [self._job_type, key], default=default) + + def _set_sub_prop(self, key, value): + """Set a value in the ``self._properties[self._job_type]`` dictionary. + + Most job properties are inside the dictionary related to the job type + (e.g. 'copy', 'extract', 'load', 'query'). Use this method to set + those properties:: + + self._set_sub_prop('useLegacySql', False) + + This is equivalent to using the ``_helper.set_sub_prop`` function:: + + _helper.set_sub_prop( + self._properties, ['query', 'useLegacySql'], False) + + Arguments: + key (str): + Key to set in the ``self._properties[self._job_type]`` + dictionary. + value (object): Value to set. + """ + _helpers.set_sub_prop(self._properties, [self._job_type, key], value) + + def to_api_repr(self): + """Build an API representation of the job config. + + :rtype: dict + :returns: A dictionary in the format used by the BigQuery API. + """ + return copy.deepcopy(self._properties) + + @classmethod + def from_api_repr(cls, resource): + """Factory: construct a job configuration given its API representation + + :type resource: dict + :param resource: + An extract job configuration in the same representation as is + returned from the API. + + :rtype: :class:`google.cloud.bigquery.job._JobConfig` + :returns: Configuration parsed from ``resource``. + """ + config = cls() + config._properties = copy.deepcopy(resource) + return config + + +class LoadJobConfig(_JobConfig): """Configuration options for load jobs. All properties in this class are optional. Values which are ``None`` -> @@ -598,96 +671,202 @@ class LoadJobConfig(object): """ def __init__(self): - self._properties = {} - self._schema = () + super(LoadJobConfig, self).__init__('load') - allow_jagged_rows = _TypedApiResourceProperty( - 'allow_jagged_rows', 'allowJaggedRows', bool) - """See - https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.load.allowJaggedRows - """ + @property + def allow_jagged_rows(self): + """bool: Allow missing trailing optional columns (CSV only). - allow_quoted_newlines = _TypedApiResourceProperty( - 'allow_quoted_newlines', 'allowQuotedNewlines', bool) - """See - https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.load.allowQuotedNewlines - """ + See + https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.load.allowJaggedRows + """ + return self._get_sub_prop('allowJaggedRows') - autodetect = AutoDetectSchema('autodetect', 'autodetect', bool) - """See - https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.load.autodetect - """ + @allow_jagged_rows.setter + def allow_jagged_rows(self, value): + self._set_sub_prop('allowJaggedRows', value) - create_disposition = CreateDisposition('create_disposition', - 'createDisposition') - """See - https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.load.createDisposition - """ + @property + def allow_quoted_newlines(self): + """bool: Allow quoted data containing newline characters (CSV only). - encoding = Encoding('encoding', 'encoding') - """See - https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.load.encoding - """ + See + https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.load.allowQuotedNewlines + """ + return self._get_sub_prop('allowQuotedNewlines') - field_delimiter = _TypedApiResourceProperty( - 'field_delimiter', 'fieldDelimiter', six.string_types) - """See - https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.load.fieldDelimiter - """ + @allow_quoted_newlines.setter + def allow_quoted_newlines(self, value): + self._set_sub_prop('allowQuotedNewlines', value) - ignore_unknown_values = _TypedApiResourceProperty( - 'ignore_unknown_values', 'ignoreUnknownValues', bool) - """See - https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.load.ignoreUnknownValues - """ + @property + def autodetect(self): + """bool: Automatically infer the schema from a sample of the data. - max_bad_records = _TypedApiResourceProperty( - 'max_bad_records', 'maxBadRecords', six.integer_types) - """See - https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.load.maxBadRecords - """ + See + https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.load.autodetect + """ + return self._get_sub_prop('autodetect') - null_marker = _TypedApiResourceProperty( - 'null_marker', 'nullMarker', six.string_types) - """See - https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.load.nullMarker - """ + @autodetect.setter + def autodetect(self, value): + self._set_sub_prop('autodetect', value) - quote_character = _TypedApiResourceProperty( - 'quote_character', 'quote', six.string_types) - """See - https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.load.quote - """ + @property + def create_disposition(self): + """google.cloud.bigquery.job.CreateDisposition: Specifies behavior + for creating tables. - skip_leading_rows = _TypedApiResourceProperty( - 'skip_leading_rows', 'skipLeadingRows', six.integer_types) - """See - https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.load.skipLeadingRows - """ + See + https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.load.createDisposition + """ + return self._get_sub_prop('createDisposition') - source_format = SourceFormat('source_format', 'sourceFormat') - """See - https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.load.sourceFormat - """ + @create_disposition.setter + def create_disposition(self, value): + self._set_sub_prop('createDisposition', value) - write_disposition = WriteDisposition('write_disposition', - 'writeDisposition') - """See - https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.load.writeDisposition - """ + @property + def encoding(self): + """google.cloud.bigquery.job.Encoding: The character encoding of the + data. + + See + https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.load.encoding + """ + return self._get_sub_prop('encoding') + + @encoding.setter + def encoding(self, value): + self._set_sub_prop('encoding', value) + + @property + def field_delimiter(self): + """str: The separator for fields in a CSV file. + + See + https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.load.fieldDelimiter + """ + return self._get_sub_prop('fieldDelimiter') + + @field_delimiter.setter + def field_delimiter(self, value): + self._set_sub_prop('fieldDelimiter', value) + + @property + def ignore_unknown_values(self): + """bool: Ignore extra values not represented in the table schema. + + See + https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.load.ignoreUnknownValues + """ + return self._get_sub_prop('ignoreUnknownValues') + + @ignore_unknown_values.setter + def ignore_unknown_values(self, value): + self._set_sub_prop('ignoreUnknownValues', value) + + @property + def max_bad_records(self): + """int: Number of invalid rows to ignore. + + See + https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.load.maxBadRecords + """ + return self._get_sub_prop('maxBadRecords') + + @max_bad_records.setter + def max_bad_records(self, value): + self._set_sub_prop('maxBadRecords', value) + + @property + def null_marker(self): + """str: Represents a null value (CSV only). + + See + https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.load.nullMarker + """ + return self._get_sub_prop('nullMarker') + + @null_marker.setter + def null_marker(self, value): + self._set_sub_prop('nullMarker', value) + + @property + def quote_character(self): + """str: Character used to quote data sections (CSV only). + + See + https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.load.quote + """ + return self._get_sub_prop('quote') + + @quote_character.setter + def quote_character(self, value): + self._set_sub_prop('quote', value) + + @property + def skip_leading_rows(self): + """int: Number of rows to skip when reading data (CSV only). + + See + https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.load.skipLeadingRows + """ + return _int_or_none(self._get_sub_prop('skipLeadingRows')) + + @skip_leading_rows.setter + def skip_leading_rows(self, value): + self._set_sub_prop('skipLeadingRows', str(value)) + + @property + def source_format(self): + """google.cloud.bigquery.job.SourceFormat: File format of the data. + + See + https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.load.sourceFormat + """ + return self._get_sub_prop('sourceFormat') + + @source_format.setter + def source_format(self, value): + self._set_sub_prop('sourceFormat', value) + + @property + def write_disposition(self): + """google.cloud.bigquery.job.WriteDisposition: Action that occurs if + the destination table already exists. + + See + https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.load.writeDisposition + """ + return self._get_sub_prop('writeDisposition') + + @write_disposition.setter + def write_disposition(self, value): + self._set_sub_prop('writeDisposition', value) @property def schema(self): - """See + """List[google.cloud.bigquery.schema.SchemaField]: Schema of the + destination table. + + See https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.load.schema """ - return list(self._schema) + schema = _helpers.get_sub_prop( + self._properties, ['load', 'schema', 'fields']) + if schema is None: + return + return [SchemaField.from_api_repr(field) for field in schema] @schema.setter def schema(self, value): - if not all(isinstance(field, SchemaField) for field in value): + if not all(hasattr(field, 'to_api_repr') for field in value): raise ValueError('Schema items must be fields') - self._schema = tuple(value) + _helpers.set_sub_prop( + self._properties, + ['load', 'schema', 'fields'], + [field.to_api_repr() for field in value]) @property def destination_encryption_configuration(self): @@ -700,7 +879,7 @@ def destination_encryption_configuration(self): See https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.load.destinationEncryptionConfiguration """ - prop = self._properties.get('destinationEncryptionConfiguration') + prop = self._get_sub_prop('destinationEncryptionConfiguration') if prop is not None: prop = EncryptionConfiguration.from_api_repr(prop) return prop @@ -710,45 +889,7 @@ def destination_encryption_configuration(self, value): api_repr = value if value is not None: api_repr = value.to_api_repr() - self._properties['destinationEncryptionConfiguration'] = api_repr - - def to_api_repr(self): - """Build an API representation of the load job config. - - :rtype: dict - :returns: A dictionary in the format used by the BigQuery API. - """ - config = copy.deepcopy(self._properties) - if len(self.schema) > 0: - config['schema'] = {'fields': _build_schema_resource(self.schema)} - # skipLeadingRows is a string because it's defined as an int64, which - # can't be represented as a JSON number. - slr = config.get('skipLeadingRows') - if slr is not None: - config['skipLeadingRows'] = str(slr) - return config - - @classmethod - def from_api_repr(cls, resource): - """Factory: construct a job configuration given its API representation - - :type resource: dict - :param resource: - An extract job configuration in the same representation as is - returned from the API. - - :rtype: :class:`google.cloud.bigquery.job.LoadJobConfig` - :returns: Configuration parsed from ``resource``. - """ - schema = resource.pop('schema', {'fields': ()}) - slr = resource.pop('skipLeadingRows', None) - config = cls() - config._properties = copy.deepcopy(resource) - config.schema = _parse_schema_resource(schema) - config.skip_leading_rows = _int_or_none(slr) - if config.skip_leading_rows is None: - del config.skip_leading_rows - return config + self._set_sub_prop('destinationEncryptionConfiguration', api_repr) class LoadJob(_AsyncJob): @@ -946,17 +1087,19 @@ def _build_resource(self): """Generate a resource for :meth:`begin`.""" configuration = self._configuration.to_api_repr() if self.source_uris is not None: - configuration['sourceUris'] = self.source_uris - configuration['destinationTable'] = self.destination.to_api_repr() + _helpers.set_sub_prop( + configuration, ['load', 'sourceUris'], self.source_uris) + _helpers.set_sub_prop( + configuration, + ['load', 'destinationTable'], + self.destination.to_api_repr()) return { 'jobReference': { 'projectId': self.project, 'jobId': self.job_id, }, - 'configuration': { - self._JOB_TYPE: configuration, - }, + 'configuration': configuration, } def _copy_configuration_properties(self, configuration): @@ -984,18 +1127,20 @@ def from_api_repr(cls, resource, client): """ job_id, config_resource = cls._get_resource_config(resource) config = LoadJobConfig.from_api_repr(config_resource) - dest_config = config_resource['destinationTable'] + dest_config = _helpers.get_sub_prop( + config_resource, ['load', 'destinationTable']) ds_ref = DatasetReference(dest_config['projectId'], dest_config['datasetId'],) destination = TableReference(ds_ref, dest_config['tableId']) # sourceUris will be absent if this is a file upload. - source_uris = config_resource.get('sourceUris') + source_uris = _helpers.get_sub_prop( + config_resource, ['load', 'sourceUris']) job = cls(job_id, source_uris, destination, client, config) job._set_properties(resource) return job -class CopyJobConfig(object): +class CopyJobConfig(_JobConfig): """Configuration options for copy jobs. All properties in this class are optional. Values which are ``None`` -> @@ -1003,19 +1148,35 @@ class CopyJobConfig(object): """ def __init__(self): - self._properties = {} + super(CopyJobConfig, self).__init__('copy') - create_disposition = CreateDisposition('create_disposition', - 'createDisposition') - """See - https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.copy.createDisposition - """ + @property + def create_disposition(self): + """google.cloud.bigquery.job.CreateDisposition: Specifies behavior + for creating tables. - write_disposition = WriteDisposition('write_disposition', - 'writeDisposition') - """See - https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.copy.writeDisposition - """ + See + https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.copy.createDisposition + """ + return self._get_sub_prop('createDisposition') + + @create_disposition.setter + def create_disposition(self, value): + self._set_sub_prop('createDisposition', value) + + @property + def write_disposition(self): + """google.cloud.bigquery.job.WriteDisposition: Action that occurs if + the destination table already exists. + + See + https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.copy.writeDisposition + """ + return self._get_sub_prop('writeDisposition') + + @write_disposition.setter + def write_disposition(self, value): + self._set_sub_prop('writeDisposition', value) @property def destination_encryption_configuration(self): @@ -1028,7 +1189,7 @@ def destination_encryption_configuration(self): See https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.copy.destinationEncryptionConfiguration """ - prop = self._properties.get('destinationEncryptionConfiguration') + prop = self._get_sub_prop('destinationEncryptionConfiguration') if prop is not None: prop = EncryptionConfiguration.from_api_repr(prop) return prop @@ -1038,31 +1199,7 @@ def destination_encryption_configuration(self, value): api_repr = value if value is not None: api_repr = value.to_api_repr() - self._properties['destinationEncryptionConfiguration'] = api_repr - - def to_api_repr(self): - """Build an API representation of the copy job config. - - :rtype: dict - :returns: A dictionary in the format used by the BigQuery API. - """ - return copy.deepcopy(self._properties) - - @classmethod - def from_api_repr(cls, resource): - """Factory: construct a job configuration given its API representation - - :type resource: dict - :param resource: - An extract job configuration in the same representation as is - returned from the API. - - :rtype: :class:`google.cloud.bigquery.job.CopyJobConfig` - :returns: Configuration parsed from ``resource``. - """ - config = cls() - config._properties = copy.deepcopy(resource) - return config + self._set_sub_prop('destinationEncryptionConfiguration', api_repr) class CopyJob(_AsyncJob): @@ -1134,21 +1271,23 @@ def _build_resource(self): } for table in self.sources] configuration = self._configuration.to_api_repr() - configuration['sourceTables'] = source_refs - configuration['destinationTable'] = { - 'projectId': self.destination.project, - 'datasetId': self.destination.dataset_id, - 'tableId': self.destination.table_id, - } + _helpers.set_sub_prop( + configuration, ['copy', 'sourceTables'], source_refs) + _helpers.set_sub_prop( + configuration, + ['copy', 'destinationTable'], + { + 'projectId': self.destination.project, + 'datasetId': self.destination.dataset_id, + 'tableId': self.destination.table_id, + }) return { 'jobReference': { 'projectId': self.project, 'jobId': self.job_id, }, - 'configuration': { - self._JOB_TYPE: configuration, - }, + 'configuration': configuration, } def _copy_configuration_properties(self, configuration): @@ -1176,12 +1315,14 @@ def from_api_repr(cls, resource, client): """ job_id, config_resource = cls._get_resource_config(resource) config = CopyJobConfig.from_api_repr(config_resource) + # Copy required fields to the job. + copy_resource = config_resource['copy'] destination = TableReference.from_api_repr( - config_resource['destinationTable']) + copy_resource['destinationTable']) sources = [] - source_configs = config_resource.get('sourceTables') + source_configs = copy_resource.get('sourceTables') if source_configs is None: - single = config_resource.get('sourceTable') + single = copy_resource.get('sourceTable') if single is None: raise KeyError( "Resource missing 'sourceTables' / 'sourceTable'") @@ -1195,7 +1336,7 @@ def from_api_repr(cls, resource, client): return job -class ExtractJobConfig(object): +class ExtractJobConfig(_JobConfig): """Configuration options for extract jobs. All properties in this class are optional. Values which are ``None`` -> @@ -1203,54 +1344,60 @@ class ExtractJobConfig(object): """ def __init__(self): - self._properties = {} + super(ExtractJobConfig, self).__init__('extract') - compression = Compression('compression', 'compression') - """See - https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.extract.compression - """ + @property + def compression(self): + """google.cloud.bigquery.job.Compression: Compression type to use for + exported files. - destination_format = DestinationFormat( - 'destination_format', 'destinationFormat') - """See - https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.extract.destinationFormat - """ + See + https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.extract.compression + """ + return self._get_sub_prop('compression') - field_delimiter = _TypedApiResourceProperty( - 'field_delimiter', 'fieldDelimiter', six.string_types) - """See - https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.extract.fieldDelimiter - """ + @compression.setter + def compression(self, value): + self._set_sub_prop('compression', value) - print_header = _TypedApiResourceProperty( - 'print_header', 'printHeader', bool) - """See - https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.extract.printHeader - """ + @property + def destination_format(self): + """google.cloud.bigquery.job.DestinationFormat: Exported file format. - def to_api_repr(self): - """Build an API representation of the extract job config. + See + https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.extract.destinationFormat + """ + return self._get_sub_prop('destinationFormat') - :rtype: dict - :returns: A dictionary in the format used by the BigQuery API. + @destination_format.setter + def destination_format(self, value): + self._set_sub_prop('destinationFormat', value) + + @property + def field_delimiter(self): + """str: Delimiter to use between fields in the exported data. + + See + https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.extract.fieldDelimiter """ - return copy.deepcopy(self._properties) + return self._get_sub_prop('fieldDelimiter') - @classmethod - def from_api_repr(cls, resource): - """Factory: construct a job configuration given its API representation + @field_delimiter.setter + def field_delimiter(self, value): + self._set_sub_prop('fieldDelimiter', value) - :type resource: dict - :param resource: - An extract job configuration in the same representation as is - returned from the API. + @property + def print_header(self): + """bool: Print a header row in the exported data. - :rtype: :class:`google.cloud.bigquery.job.ExtractJobConfig` - :returns: Configuration parsed from ``resource``. + See + https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.extract.printHeader """ - config = cls() - config._properties = copy.deepcopy(resource) - return config + return self._get_sub_prop('printHeader') + + @print_header.setter + def print_header(self, value): + self._set_sub_prop('printHeader', value) class ExtractJob(_AsyncJob): @@ -1342,17 +1489,19 @@ def _build_resource(self): } configuration = self._configuration.to_api_repr() - configuration['sourceTable'] = source_ref - configuration['destinationUris'] = self.destination_uris + _helpers.set_sub_prop( + configuration, ['extract', 'sourceTable'], source_ref) + _helpers.set_sub_prop( + configuration, + ['extract', 'destinationUris'], + self.destination_uris) resource = { 'jobReference': { 'projectId': self.project, 'jobId': self.job_id, }, - 'configuration': { - self._JOB_TYPE: configuration, - }, + 'configuration': configuration, } return resource @@ -1382,11 +1531,13 @@ def from_api_repr(cls, resource, client): """ job_id, config_resource = cls._get_resource_config(resource) config = ExtractJobConfig.from_api_repr(config_resource) - source_config = config_resource['sourceTable'] + source_config = _helpers.get_sub_prop( + config_resource, ['extract', 'sourceTable']) dataset = DatasetReference( source_config['projectId'], source_config['datasetId']) source = dataset.table(source_config['tableId']) - destination_uris = config_resource['destinationUris'] + destination_uris = _helpers.get_sub_prop( + config_resource, ['extract', 'destinationUris']) job = cls( job_id, source, destination_uris, client=client, job_config=config) @@ -1431,7 +1582,7 @@ def _to_api_repr_table_defs(value): return {k: ExternalConfig.to_api_repr(v) for k, v in value.items()} -class QueryJobConfig(object): +class QueryJobConfig(_JobConfig): """Configuration options for query jobs. All properties in this class are optional. Values which are ``None`` -> @@ -1439,7 +1590,7 @@ class QueryJobConfig(object): """ def __init__(self): - self._properties = {} + super(QueryJobConfig, self).__init__('query') @property def destination_encryption_configuration(self): @@ -1452,7 +1603,7 @@ def destination_encryption_configuration(self): See https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query.destinationEncryptionConfiguration """ - prop = self._properties.get('destinationEncryptionConfiguration') + prop = self._get_sub_prop('destinationEncryptionConfiguration') if prop is not None: prop = EncryptionConfiguration.from_api_repr(prop) return prop @@ -1462,58 +1613,34 @@ def destination_encryption_configuration(self, value): api_repr = value if value is not None: api_repr = value.to_api_repr() - self._properties['destinationEncryptionConfiguration'] = api_repr + self._set_sub_prop('destinationEncryptionConfiguration', api_repr) - def to_api_repr(self): - """Build an API representation of the query job config. + @property + def allow_large_results(self): + """bool: Allow large query results tables (legacy SQL, only) - Returns: - dict: A dictionary in the format used by the BigQuery API. + See + https://g.co/cloud/bigquery/docs/reference/rest/v2/jobs#configuration.query.allowLargeResults """ - resource = copy.deepcopy(self._properties) - - # Query parameters have an addition property associated with them - # to indicate if the query is using named or positional parameters. - query_parameters = resource.get('queryParameters') - if query_parameters: - if query_parameters[0].get('name') is None: - resource['parameterMode'] = 'POSITIONAL' - else: - resource['parameterMode'] = 'NAMED' - - return resource + return self._get_sub_prop('allowLargeResults') - @classmethod - def from_api_repr(cls, resource): - """Factory: construct a job configuration given its API representation + @allow_large_results.setter + def allow_large_results(self, value): + self._set_sub_prop('allowLargeResults', value) - Args: - resource (dict): - A query job configuration in the same representation as is - returned from the API. + @property + def create_disposition(self): + """google.cloud.bigquery.job.CreateDisposition: Specifies behavior + for creating tables. - Returns: - ~google.cloud.bigquery.job.QueryJobConfig: - Configuration parsed from ``resource``. + See + https://g.co/cloud/bigquery/docs/reference/rest/v2/jobs#configuration.query.createDisposition """ - config = cls() - config._properties = copy.deepcopy(resource) - - return config + return self._get_sub_prop('createDisposition') - allow_large_results = _TypedApiResourceProperty( - 'allow_large_results', 'allowLargeResults', bool) - """bool: Allow large query results tables (legacy SQL, only) - - See - https://g.co/cloud/bigquery/docs/reference/rest/v2/jobs#configuration.query.allowLargeResults - """ - - create_disposition = CreateDisposition( - 'create_disposition', 'createDisposition') - """See - https://g.co/cloud/bigquery/docs/reference/rest/v2/jobs#configuration.query.createDisposition - """ + @create_disposition.setter + def create_disposition(self, value): + self._set_sub_prop('createDisposition', value) @property def default_dataset(self): @@ -1523,14 +1650,17 @@ def default_dataset(self): See https://g.co/cloud/bigquery/docs/reference/v2/jobs#configuration.query.defaultDataset """ - prop = self._properties.get('defaultDataset') + prop = self._get_sub_prop('defaultDataset') if prop is not None: prop = DatasetReference.from_api_repr(prop) return prop @default_dataset.setter def default_dataset(self, value): - self._properties['defaultDataset'] = value.to_api_repr() + resource = None + if value is not None: + resource = value.to_api_repr() + self._set_sub_prop('defaultDataset', resource) @property def destination(self): @@ -1540,34 +1670,57 @@ def destination(self): See https://g.co/cloud/bigquery/docs/reference/rest/v2/jobs#configuration.query.destinationTable """ - prop = self._properties.get('destinationTable') + prop = self._get_sub_prop('destinationTable') if prop is not None: prop = TableReference.from_api_repr(prop) return prop @destination.setter def destination(self, value): - self._properties['destinationTable'] = value.to_api_repr() + resource = None + if value is not None: + resource = value.to_api_repr() + self._set_sub_prop('destinationTable', resource) - dry_run = _TypedApiResourceProperty('dry_run', 'dryRun', bool) - """ - bool: ``True`` if this query should be a dry run to estimate costs. + @property + def dry_run(self): + """bool: ``True`` if this query should be a dry run to estimate costs. - See - https://g.co/cloud/bigquery/docs/reference/v2/jobs#configuration.dryRun - """ + See + https://g.co/cloud/bigquery/docs/reference/v2/jobs#configuration.dryRun + """ + return self._properties.get('dryRun') - flatten_results = _TypedApiResourceProperty( - 'flatten_results', 'flattenResults', bool) - """See - https://g.co/cloud/bigquery/docs/reference/rest/v2/jobs#configuration.query.flattenResults - """ + @dry_run.setter + def dry_run(self, value): + self._properties['dryRun'] = value - maximum_billing_tier = _TypedApiResourceProperty( - 'maximum_billing_tier', 'maximumBillingTier', int) - """See - https://g.co/cloud/bigquery/docs/reference/rest/v2/jobs#configuration.query.maximumBillingTier - """ + @property + def flatten_results(self): + """bool: Flatten nested/repeated fields in results. (Legacy SQL only) + + See + https://g.co/cloud/bigquery/docs/reference/rest/v2/jobs#configuration.query.flattenResults + """ + return self._get_sub_prop('flattenResults') + + @flatten_results.setter + def flatten_results(self, value): + self._set_sub_prop('flattenResults', value) + + @property + def maximum_billing_tier(self): + """int: Deprecated. Changes the billing tier to allow high-compute + queries. + + See + https://g.co/cloud/bigquery/docs/reference/rest/v2/jobs#configuration.query.maximumBillingTier + """ + return self._get_sub_prop('maximumBillingTier') + + @maximum_billing_tier.setter + def maximum_billing_tier(self, value): + self._set_sub_prop('maximumBillingTier', value) @property def maximum_bytes_billed(self): @@ -1576,19 +1729,24 @@ def maximum_bytes_billed(self): See https://g.co/cloud/bigquery/docs/reference/rest/v2/jobs#configuration.query.maximumBytesBilled """ - prop = self._properties.get('maximumBytesBilled') - if prop is not None: - prop = int(prop) - return prop + return _int_or_none(self._get_sub_prop('maximumBytesBilled')) @maximum_bytes_billed.setter def maximum_bytes_billed(self, value): - self._properties['maximumBytesBilled'] = str(value) + self._set_sub_prop('maximumBytesBilled', str(value)) - priority = QueryPriority('priority', 'priority') - """See - https://g.co/cloud/bigquery/docs/reference/rest/v2/jobs#configuration.query.priority - """ + @property + def priority(self): + """google.cloud.bigquery.job.QueryPriority: Priority of the query. + + See + https://g.co/cloud/bigquery/docs/reference/rest/v2/jobs#configuration.query.priority + """ + return self._get_sub_prop('priority') + + @priority.setter + def priority(self, value): + self._set_sub_prop('priority', value) @property def query_parameters(self): @@ -1600,13 +1758,13 @@ def query_parameters(self): See: https://g.co/cloud/bigquery/docs/reference/rest/v2/jobs#configuration.query.queryParameters """ - prop = self._properties.get('queryParameters', []) + prop = self._get_sub_prop('queryParameters', default=[]) return _from_api_repr_query_parameters(prop) @query_parameters.setter def query_parameters(self, values): - self._properties['queryParameters'] = _to_api_repr_query_parameters( - values) + self._set_sub_prop( + 'queryParameters', _to_api_repr_query_parameters(values)) @property def udf_resources(self): @@ -1616,31 +1774,54 @@ def udf_resources(self): See: https://g.co/cloud/bigquery/docs/reference/rest/v2/jobs#configuration.query.userDefinedFunctionResources """ - prop = self._properties.get('userDefinedFunctionResources', []) + prop = self._get_sub_prop('userDefinedFunctionResources', default=[]) return _from_api_repr_udf_resources(prop) @udf_resources.setter def udf_resources(self, values): - self._properties['userDefinedFunctionResources'] = ( + self._set_sub_prop( + 'userDefinedFunctionResources', _to_api_repr_udf_resources(values)) - use_legacy_sql = _TypedApiResourceProperty( - 'use_legacy_sql', 'useLegacySql', bool) - """See - https://g.co/cloud/bigquery/docs/reference/v2/jobs#configuration.query.useLegacySql - """ + @property + def use_legacy_sql(self): + """bool: Use legacy SQL syntax. - use_query_cache = _TypedApiResourceProperty( - 'use_query_cache', 'useQueryCache', bool) - """See - https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query.useQueryCache - """ + See + https://g.co/cloud/bigquery/docs/reference/v2/jobs#configuration.query.useLegacySql + """ + return self._get_sub_prop('useLegacySql') - write_disposition = WriteDisposition( - 'write_disposition', 'writeDisposition') - """See - https://g.co/cloud/bigquery/docs/reference/rest/v2/jobs#configuration.query.writeDisposition - """ + @use_legacy_sql.setter + def use_legacy_sql(self, value): + self._set_sub_prop('useLegacySql', value) + + @property + def use_query_cache(self): + """bool: Look for the query result in the cache. + + See + https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query.useQueryCache + """ + return self._get_sub_prop('useQueryCache') + + @use_query_cache.setter + def use_query_cache(self, value): + self._set_sub_prop('useQueryCache', value) + + @property + def write_disposition(self): + """google.cloud.bigquery.job.WriteDisposition: Action that occurs if + the destination table already exists. + + See + https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query.writeDisposition + """ + return self._get_sub_prop('writeDisposition') + + @write_disposition.setter + def write_disposition(self, value): + self._set_sub_prop('writeDisposition', value) @property def table_definitions(self): @@ -1650,17 +1831,34 @@ def table_definitions(self): See https://g.co/cloud/bigquery/docs/reference/rest/v2/jobs#configuration.query.tableDefinitions """ - prop = self._properties.get('tableDefinitions') + prop = self._get_sub_prop('tableDefinitions') if prop is not None: prop = _from_api_repr_table_defs(prop) return prop @table_definitions.setter def table_definitions(self, values): - self._properties['tableDefinitions'] = _to_api_repr_table_defs(values) + self._set_sub_prop( + 'tableDefinitions', _to_api_repr_table_defs(values)) + + def to_api_repr(self): + """Build an API representation of the query job config. + + Returns: + dict: A dictionary in the format used by the BigQuery API. + """ + resource = copy.deepcopy(self._properties) - _maximum_billing_tier = None - _maximum_bytes_billed = None + # Query parameters have an addition property associated with them + # to indicate if the query is using named or positional parameters. + query_parameters = resource['query'].get('queryParameters') + if query_parameters: + if query_parameters[0].get('name') is None: + resource['query']['parameterMode'] = 'POSITIONAL' + else: + resource['query']['parameterMode'] = 'NAMED' + + return resource class QueryJob(_AsyncJob): @@ -1823,46 +2021,16 @@ def _build_resource(self): 'projectId': self.project, 'jobId': self.job_id, }, - 'configuration': { - self._JOB_TYPE: configuration, - }, + 'configuration': configuration, } - - # The dryRun property only applies to query jobs, but it is defined at - # a level higher up. We need to remove it from the query config. - if 'dryRun' in configuration: - dry_run = configuration['dryRun'] - del configuration['dryRun'] - resource['configuration']['dryRun'] = dry_run - - configuration['query'] = self.query + configuration['query']['query'] = self.query return resource - def _scrub_local_properties(self, cleaned): - """Helper: handle subclass properties in cleaned. - - .. note: - - This method assumes that the project found in the resource matches - the client's project. - """ - configuration = cleaned['configuration']['query'] - self.query = configuration['query'] - - # The dryRun property only applies to query jobs, but it is defined at - # a level higher up. We need to copy it to the query config. - self._configuration.dry_run = cleaned['configuration'].get('dryRun') - def _copy_configuration_properties(self, configuration): """Helper: assign subclass configuration properties in cleaned.""" - # The dryRun property only applies to query jobs, but it is defined at - # a level higher up. We need to copy it to the query config. - # It should already be correctly set by the _scrub_local_properties() - # method. - dry_run = self.dry_run - self._configuration = QueryJobConfig.from_api_repr(configuration) - self._configuration.dry_run = dry_run + self._configuration._properties = copy.deepcopy(configuration) + self.query = _helpers.get_sub_prop(configuration, ['query', 'query']) @classmethod def from_api_repr(cls, resource, client): @@ -1879,7 +2047,7 @@ def from_api_repr(cls, resource, client): :returns: Job parsed from ``resource``. """ job_id, config = cls._get_resource_config(resource) - query = config['query'] + query = config['query']['query'] job = cls(job_id, query, client=client) job._set_properties(resource) return job diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/schema.py b/packages/google-cloud-bigquery/google/cloud/bigquery/schema.py index f619b1cd9f2c..e4ad5ce850a6 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/schema.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/schema.py @@ -116,9 +116,10 @@ def to_api_repr(self): """ # Put together the basic representation. See http://bit.ly/2hOAT5u. answer = { - 'mode': self.mode.lower(), + 'mode': self.mode.upper(), 'name': self.name, - 'type': self.field_type.lower(), + 'type': self.field_type.upper(), + 'description': self.description, } # If this is a RECORD type, then sub-fields are also included, @@ -140,8 +141,8 @@ def _key(self): """ return ( self._name, - self._field_type.lower(), - self._mode, + self._field_type.upper(), + self._mode.upper(), self._description, self._fields, ) @@ -198,14 +199,4 @@ def _build_schema_resource(fields): :rtype: mapping :returns: a mapping describing the schema of the supplied fields. """ - infos = [] - for field in fields: - info = {'name': field.name, - 'type': field.field_type, - 'mode': field.mode} - if field.description is not None: - info['description'] = field.description - if field.fields: - info['fields'] = _build_schema_resource(field.fields) - infos.append(info) - return infos + return [field.to_api_repr() for field in fields] diff --git a/packages/google-cloud-bigquery/tests/system.py b/packages/google-cloud-bigquery/tests/system.py index 515719f7ec77..7ba78dc0ad46 100644 --- a/packages/google-cloud-bigquery/tests/system.py +++ b/packages/google-cloud-bigquery/tests/system.py @@ -892,6 +892,18 @@ def test_query_w_failed_query(self): with self.assertRaises(BadRequest): Config.CLIENT.query('invalid syntax;').result() + def test_query_w_wrong_config(self): + from google.cloud.bigquery.job import LoadJobConfig + + good_query = 'SELECT 1;' + rows = list(Config.CLIENT.query('SELECT 1;').result()) + assert rows[0][0] == 1 + + bad_config = LoadJobConfig() + bad_config.destination = Config.CLIENT.dataset('dset').table('tbl') + with self.assertRaises(Exception): + Config.CLIENT.query(good_query, job_config=bad_config).result() + def test_query_w_timeout(self): query_job = Config.CLIENT.query( 'SELECT * FROM `bigquery-public-data.github_repos.commits`;', diff --git a/packages/google-cloud-bigquery/tests/unit/test_client.py b/packages/google-cloud-bigquery/tests/unit/test_client.py index ef3f94ab2998..90fd868d3862 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_client.py +++ b/packages/google-cloud-bigquery/tests/unit/test_client.py @@ -554,9 +554,21 @@ def test_create_table_w_schema_and_query(self): 'datasetId': self.DS_ID, 'tableId': self.TABLE_ID }, - 'schema': {'fields': [ - {'name': 'full_name', 'type': 'STRING', 'mode': 'REQUIRED'}, - {'name': 'age', 'type': 'INTEGER', 'mode': 'REQUIRED'}] + 'schema': { + 'fields': [ + { + 'name': 'full_name', + 'type': 'STRING', + 'mode': 'REQUIRED', + 'description': None + }, + { + 'name': 'age', + 'type': 'INTEGER', + 'mode': 'REQUIRED', + 'description': None + }, + ], }, 'view': {'query': query}, } @@ -585,11 +597,13 @@ def test_create_table_w_schema_and_query(self): 'name': 'full_name', 'type': 'STRING', 'mode': 'REQUIRED', + 'description': None, }, { 'name': 'age', 'type': 'INTEGER', 'mode': 'REQUIRED', + 'description': None, }, ], }, @@ -757,9 +771,21 @@ def test_update_table(self): 'datasetId': self.DS_ID, 'tableId': self.TABLE_ID }, - 'schema': {'fields': [ - {'name': 'full_name', 'type': 'STRING', 'mode': 'REQUIRED'}, - {'name': 'age', 'type': 'INTEGER', 'mode': 'REQUIRED'}] + 'schema': { + 'fields': [ + { + 'name': 'full_name', + 'type': 'STRING', + 'mode': 'REQUIRED', + 'description': None + }, + { + 'name': 'age', + 'type': 'INTEGER', + 'mode': 'REQUIRED', + 'description': None + }, + ], }, 'etag': 'etag', 'description': description, @@ -787,9 +813,22 @@ def test_update_table(self): 'datasetId': self.DS_ID, 'tableId': self.TABLE_ID }, - 'schema': {'fields': [ - {'name': 'full_name', 'type': 'STRING', 'mode': 'REQUIRED'}, - {'name': 'age', 'type': 'INTEGER', 'mode': 'REQUIRED'}]}, + 'schema': { + 'fields': [ + { + 'name': 'full_name', + 'type': 'STRING', + 'mode': 'REQUIRED', + 'description': None + }, + { + 'name': 'age', + 'type': 'INTEGER', + 'mode': 'REQUIRED', + 'description': None + }, + ], + }, 'description': description, 'friendlyName': title, 'labels': {'x': 'y'}, @@ -858,9 +897,22 @@ def test_update_table_w_query(self): query = 'select fullname, age from person_ages' location = 'EU' exp_time = datetime.datetime(2015, 8, 1, 23, 59, 59, tzinfo=UTC) - schema_resource = {'fields': [ - {'name': 'full_name', 'type': 'STRING', 'mode': 'REQUIRED'}, - {'name': 'age', 'type': 'INTEGER', 'mode': 'REQUIRED'}]} + schema_resource = { + 'fields': [ + { + 'name': 'full_name', + 'type': 'STRING', + 'mode': 'REQUIRED', + 'description': None + }, + { + 'name': 'age', + 'type': 'INTEGER', + 'mode': 'REQUIRED', + 'description': None + }, + ], + } schema = [ SchemaField('full_name', 'STRING', mode='REQUIRED'), SchemaField('age', 'INTEGER', mode='REQUIRED') diff --git a/packages/google-cloud-bigquery/tests/unit/test_external_config.py b/packages/google-cloud-bigquery/tests/unit/test_external_config.py index 05a058ffea82..51c49f6bdf04 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_external_config.py +++ b/packages/google-cloud-bigquery/tests/unit/test_external_config.py @@ -51,6 +51,7 @@ def test_api_repr_base(self): 'name': 'full_name', 'type': 'STRING', 'mode': 'REQUIRED', + 'description': None, }, ], }, diff --git a/packages/google-cloud-bigquery/tests/unit/test_job.py b/packages/google-cloud-bigquery/tests/unit/test_job.py index f462f1d04f0b..6e7be1de2347 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_job.py +++ b/packages/google-cloud-bigquery/tests/unit/test_job.py @@ -262,8 +262,10 @@ def test_to_api_repr_with_encryption(self): self.assertEqual( resource, { - 'destinationEncryptionConfiguration': { - 'kmsKeyName': self.KMS_KEY_NAME, + 'load': { + 'destinationEncryptionConfiguration': { + 'kmsKeyName': self.KMS_KEY_NAME, + }, }, }) @@ -274,7 +276,9 @@ def test_to_api_repr_with_encryption_none(self): self.assertEqual( resource, { - 'destinationEncryptionConfiguration': None, + 'load': { + 'destinationEncryptionConfiguration': None, + }, }) @@ -418,7 +422,6 @@ def test_ctor(self): self.assertEqual( job.path, '/projects/%s/jobs/%s' % (self.PROJECT, self.JOB_ID)) - self.assertEqual(job.schema, []) self._verifyInitialReadonlyProperties(job) @@ -429,6 +432,7 @@ def test_ctor(self): self.assertIsNone(job.output_rows) # set/read from resource['configuration']['load'] + self.assertIsNone(job.schema) self.assertIsNone(job.allow_jagged_rows) self.assertIsNone(job.allow_quoted_newlines) self.assertIsNone(job.autodetect) @@ -771,8 +775,18 @@ def test_begin_w_alternate_client(self): 'sourceFormat': 'CSV', 'writeDisposition': WriteDisposition.WRITE_TRUNCATE, 'schema': {'fields': [ - {'name': 'full_name', 'type': 'STRING', 'mode': 'REQUIRED'}, - {'name': 'age', 'type': 'INTEGER', 'mode': 'REQUIRED'}, + { + 'name': 'full_name', + 'type': 'STRING', + 'mode': 'REQUIRED', + 'description': None, + }, + { + 'name': 'age', + 'type': 'INTEGER', + 'mode': 'REQUIRED', + 'description': None, + }, ]} } RESOURCE['configuration']['load'] = LOAD_CONFIGURATION @@ -935,9 +949,11 @@ def test_to_api_repr_with_encryption(self): self.assertEqual( resource, { - 'destinationEncryptionConfiguration': { - 'kmsKeyName': self.KMS_KEY_NAME, - } + 'copy': { + 'destinationEncryptionConfiguration': { + 'kmsKeyName': self.KMS_KEY_NAME, + }, + }, }) def test_to_api_repr_with_encryption_none(self): @@ -947,7 +963,9 @@ def test_to_api_repr_with_encryption_none(self): self.assertEqual( resource, { - 'destinationEncryptionConfiguration': None, + 'copy': { + 'destinationEncryptionConfiguration': None, + }, }) @@ -1343,6 +1361,55 @@ def test_reload_w_alternate_client(self): self._verifyResourceProperties(job, RESOURCE) +class TestExtractJobConfig(unittest.TestCase, _Base): + JOB_TYPE = 'extract' + + @staticmethod + def _get_target_class(): + from google.cloud.bigquery.job import ExtractJobConfig + return ExtractJobConfig + + def test_to_api_repr(self): + from google.cloud.bigquery import job + config = self._make_one() + config.compression = job.Compression.SNAPPY + config.destination_format = job.DestinationFormat.AVRO + config.field_delimiter = 'ignored for avro' + config.print_header = False + config._properties['extract']['someNewField'] = 'some-value' + resource = config.to_api_repr() + self.assertEqual( + resource, + { + 'extract': { + 'compression': 'SNAPPY', + 'destinationFormat': 'AVRO', + 'fieldDelimiter': 'ignored for avro', + 'printHeader': False, + 'someNewField': 'some-value', + }, + }) + + def test_from_api_repr(self): + cls = self._get_target_class() + config = cls.from_api_repr( + { + 'extract': { + 'compression': 'NONE', + 'destinationFormat': 'CSV', + 'fieldDelimiter': '\t', + 'printHeader': True, + 'someNewField': 'some-value', + }, + }) + self.assertEqual(config.compression, 'NONE') + self.assertEqual(config.destination_format, 'CSV') + self.assertEqual(config.field_delimiter, '\t') + self.assertEqual(config.print_header, True) + self.assertEqual( + config._properties['extract']['someNewField'], 'some-value') + + class TestExtractJob(unittest.TestCase, _Base): JOB_TYPE = 'extract' SOURCE_TABLE = 'source_table' @@ -1665,7 +1732,14 @@ def _make_one(self, *args, **kw): def test_ctor(self): config = self._make_one() - self.assertEqual(config._properties, {}) + self.assertEqual(config._properties, {'query': {}}) + + def test_ctor_w_none(self): + config = self._make_one() + config.default_dataset = None + config.destination = None + self.assertIsNone(config.default_dataset) + self.assertIsNone(config.destination) def test_from_api_repr_empty(self): klass = self._get_target_class() @@ -1678,13 +1752,16 @@ def test_from_api_repr_empty(self): def test_from_api_repr_normal(self): resource = { - 'useLegacySql': True, - 'query': 'no property for me', - 'defaultDataset': { - 'projectId': 'someproject', - 'datasetId': 'somedataset', + 'query': { + 'useLegacySql': True, + 'query': 'no property for me', + 'defaultDataset': { + 'projectId': 'someproject', + 'datasetId': 'somedataset', + }, + 'someNewProperty': 'I should be saved, too.', }, - 'someNewProperty': 'I should be saved, too.', + 'dryRun': True, } klass = self._get_target_class() @@ -1694,28 +1771,33 @@ def test_from_api_repr_normal(self): self.assertEqual( config.default_dataset, DatasetReference('someproject', 'somedataset')) + self.assertTrue(config.dry_run) # Make sure unknown properties propagate. - self.assertEqual(config._properties['query'], 'no property for me') self.assertEqual( - config._properties['someNewProperty'], 'I should be saved, too.') + config._properties['query']['query'], 'no property for me') + self.assertEqual( + config._properties['query']['someNewProperty'], + 'I should be saved, too.') def test_to_api_repr_normal(self): config = self._make_one() config.use_legacy_sql = True config.default_dataset = DatasetReference( 'someproject', 'somedataset') + config.dry_run = False config._properties['someNewProperty'] = 'Woohoo, alpha stuff.' resource = config.to_api_repr() - self.assertTrue(resource['useLegacySql']) + self.assertFalse(resource['dryRun']) + self.assertTrue(resource['query']['useLegacySql']) self.assertEqual( - resource['defaultDataset']['projectId'], 'someproject') + resource['query']['defaultDataset']['projectId'], 'someproject') self.assertEqual( - resource['defaultDataset']['datasetId'], 'somedataset') + resource['query']['defaultDataset']['datasetId'], 'somedataset') # Make sure unknown properties propagate. self.assertEqual( - config._properties['someNewProperty'], 'Woohoo, alpha stuff.') + resource['someNewProperty'], 'Woohoo, alpha stuff.') def test_to_api_repr_with_encryption(self): config = self._make_one() @@ -1724,8 +1806,10 @@ def test_to_api_repr_with_encryption(self): resource = config.to_api_repr() self.assertEqual( resource, { - 'destinationEncryptionConfiguration': { - 'kmsKeyName': self.KMS_KEY_NAME, + 'query': { + 'destinationEncryptionConfiguration': { + 'kmsKeyName': self.KMS_KEY_NAME, + }, }, }) @@ -1736,14 +1820,18 @@ def test_to_api_repr_with_encryption_none(self): self.assertEqual( resource, { - 'destinationEncryptionConfiguration': None, + 'query': { + 'destinationEncryptionConfiguration': None, + }, }) def test_from_api_repr_with_encryption(self): resource = { - 'destinationEncryptionConfiguration': { - 'kmsKeyName': self.KMS_KEY_NAME - } + 'query': { + 'destinationEncryptionConfiguration': { + 'kmsKeyName': self.KMS_KEY_NAME, + }, + }, } klass = self._get_target_class() config = klass.from_api_repr(resource) diff --git a/packages/google-cloud-bigquery/tests/unit/test_schema.py b/packages/google-cloud-bigquery/tests/unit/test_schema.py index 1613b4feb465..d2574b33e420 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_schema.py +++ b/packages/google-cloud-bigquery/tests/unit/test_schema.py @@ -64,9 +64,10 @@ def test_constructor_subfields(self): def test_to_api_repr(self): field = self._make_one('foo', 'INTEGER', 'NULLABLE') self.assertEqual(field.to_api_repr(), { - 'mode': 'nullable', + 'mode': 'NULLABLE', 'name': 'foo', - 'type': 'integer', + 'type': 'INTEGER', + 'description': None, }) def test_to_api_repr_with_subfield(self): @@ -74,13 +75,15 @@ def test_to_api_repr_with_subfield(self): field = self._make_one('foo', 'RECORD', 'REQUIRED', fields=(subfield,)) self.assertEqual(field.to_api_repr(), { 'fields': [{ - 'mode': 'nullable', + 'mode': 'NULLABLE', 'name': 'bar', - 'type': 'integer', + 'type': 'INTEGER', + 'description': None, }], - 'mode': 'required', + 'mode': 'REQUIRED', 'name': 'foo', - 'type': 'record', + 'type': 'RECORD', + 'description': None, }) def test_from_api_repr(self): @@ -244,7 +247,7 @@ def test___hash__not_equals(self): def test___repr__(self): field1 = self._make_one('field1', 'STRING') - expected = "SchemaField('field1', 'string', 'NULLABLE', None, ())" + expected = "SchemaField('field1', 'STRING', 'NULLABLE', None, ())" self.assertEqual(repr(field1), expected) @@ -326,11 +329,13 @@ def test_defaults(self): self.assertEqual(resource[0], {'name': 'full_name', 'type': 'STRING', - 'mode': 'REQUIRED'}) + 'mode': 'REQUIRED', + 'description': None}) self.assertEqual(resource[1], {'name': 'age', 'type': 'INTEGER', - 'mode': 'REQUIRED'}) + 'mode': 'REQUIRED', + 'description': None}) def test_w_description(self): from google.cloud.bigquery.schema import SchemaField @@ -349,7 +354,8 @@ def test_w_description(self): self.assertEqual(resource[1], {'name': 'age', 'type': 'INTEGER', - 'mode': 'REQUIRED'}) + 'mode': 'REQUIRED', + 'description': None}) def test_w_subfields(self): from google.cloud.bigquery.schema import SchemaField @@ -364,14 +370,18 @@ def test_w_subfields(self): self.assertEqual(resource[0], {'name': 'full_name', 'type': 'STRING', - 'mode': 'REQUIRED'}) + 'mode': 'REQUIRED', + 'description': None}) self.assertEqual(resource[1], {'name': 'phone', 'type': 'RECORD', 'mode': 'REPEATED', + 'description': None, 'fields': [{'name': 'type', 'type': 'STRING', - 'mode': 'REQUIRED'}, + 'mode': 'REQUIRED', + 'description': None}, {'name': 'number', 'type': 'STRING', - 'mode': 'REQUIRED'}]}) + 'mode': 'REQUIRED', + 'description': None}]}) From c016332e6f6358544cf356b0f9079b6ef65e809e Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Tue, 20 Mar 2018 16:45:49 -0700 Subject: [PATCH 0409/2016] Add location property to job classes. (#5071) * BigQuery: LoadJobConfig change to job.configuration resource repr. * Fix tests for LoadJob * CopyJobConfig: use job.configuration for API repr Also, fixes some lint errors. * Use job.configuration API resource for ExtractJobConfig * Use job.configuration API resource for QueryJobConfig. * Remove unused _EnumAPIResourceProperty class. Since the API resource properties are no longer used for validation, there was a test coverage gap. * Remove second colon for docs rendering. * Fix docstring. Docstrings for private helper methods. * Add test for wrong config type. * BigQuery: Add location field to load job. * Use explicit location for loading data samples. * Add location and project parameters to query() method Uses explicit location in query snippets. * Add location field to copy and extract jobs. * Raise KeyError on bad LoadJob. Use autospec for job tests. - LoadJob expected a KeyError in unit tests for a bad job resource. - Use more standard autospec mock for Connection mock class. * Add tests for location params in job classes. * Use autospec for Connection in job tests. Use more standard autospec mock for Connection mock class. * Use autospec for Connection in client tests. Use more standard autospec mock for Connection mock class. * Fix _build_resource for jobs (add location field) * Add unit tests to cover explicit project/location in client. * Use `assert_called_once_with()` for connection mocks. * Update tests to use `assert_called_once_with` * Add location to `get_job` and `cancel_job` * Add system tests for `get` and `cancel` with wrong location * Tests to cover `job.exists()`, `get_job()` and `cancel_job()` * Fix docs format for `get_job` and `cancel_job`. --- .../google/cloud/bigquery/client.py | 471 ++++++++++-------- .../google/cloud/bigquery/job.py | 138 +++-- .../google-cloud-bigquery/tests/system.py | 116 ++++- .../tests/unit/test_client.py | 193 ++++++- .../tests/unit/test_job.py | 121 ++++- 5 files changed, 756 insertions(+), 283 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py index 83df75d7caa1..77214529a697 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py @@ -39,10 +39,7 @@ from google.cloud.bigquery.dataset import Dataset from google.cloud.bigquery.dataset import DatasetListItem from google.cloud.bigquery.dataset import DatasetReference -from google.cloud.bigquery.job import CopyJob -from google.cloud.bigquery.job import ExtractJob -from google.cloud.bigquery.job import LoadJob -from google.cloud.bigquery.job import QueryJob, QueryJobConfig +from google.cloud.bigquery import job from google.cloud.bigquery.query import _QueryResults from google.cloud.bigquery.table import Table from google.cloud.bigquery.table import TableListItem @@ -508,27 +505,25 @@ def delete_table(self, table, retry=DEFAULT_RETRY): raise TypeError('table must be a Table or a TableReference') self._call_api(retry, method='DELETE', path=table.path) - def _get_query_results(self, job_id, retry, project=None, timeout_ms=None): + def _get_query_results( + self, job_id, retry, project=None, timeout_ms=None, location=None): """Get the query results object for a query job. - :type job_id: str - :param job_id: Name of the query job. - - :type retry: :class:`google.api_core.retry.Retry` - :param retry: (Optional) How to retry the RPC. - - :type project: str - :param project: - (Optional) project ID for the query job (defaults to the project of - the client). - - :type timeout_ms: int - :param timeout_ms: - (Optional) number of milliseconds the the API call should wait for - the query to complete before the request times out. + Arguments: + job_id (str): Name of the query job. + retry (google.api_core.retry.Retry): + (Optional) How to retry the RPC. + project (str): + (Optional) project ID for the query job (defaults to the + project of the client). + timeout_ms (int): + (Optional) number of milliseconds the the API call should + wait for the query to complete before the request times out. + location (str): Location of the query job. - :rtype: :class:`google.cloud.bigquery.query._QueryResults` - :returns: a new ``_QueryResults`` instance + Returns: + google.cloud.bigquery.query._QueryResults: + A new ``_QueryResults`` instance. """ extra_params = {'maxResults': 0} @@ -539,6 +534,9 @@ def _get_query_results(self, job_id, retry, project=None, timeout_ms=None): if timeout_ms is not None: extra_params['timeoutMs'] = timeout_ms + if location is not None: + extra_params['location'] = location + path = '/projects/{}/queries/{}'.format(project, job_id) # This call is typically made in a polling loop that checks whether the @@ -563,43 +561,46 @@ def job_from_resource(self, resource): """ config = resource['configuration'] if 'load' in config: - return LoadJob.from_api_repr(resource, self) + return job.LoadJob.from_api_repr(resource, self) elif 'copy' in config: - return CopyJob.from_api_repr(resource, self) + return job.CopyJob.from_api_repr(resource, self) elif 'extract' in config: - return ExtractJob.from_api_repr(resource, self) + return job.ExtractJob.from_api_repr(resource, self) elif 'query' in config: - return QueryJob.from_api_repr(resource, self) + return job.QueryJob.from_api_repr(resource, self) raise ValueError('Cannot parse job resource') - def get_job(self, job_id, project=None, retry=DEFAULT_RETRY): + def get_job( + self, job_id, project=None, location=None, retry=DEFAULT_RETRY): """Fetch a job for the project associated with this client. See https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs/get - :type job_id: str - :param job_id: Name of the job. + Arguments: + job_id (str): Unique job identifier. - :type project: str - :param project: - project ID owning the job (defaults to the client's project) - - :type retry: :class:`google.api_core.retry.Retry` - :param retry: (Optional) How to retry the RPC. + Keyword Arguments: + project (str): + (Optional) ID of the project which ownsthe job (defaults to + the client's project). + location (str): Location where the job was run. + retry (google.api_core.retry.Retry): + (Optional) How to retry the RPC. - :rtype: One of: - :class:`google.cloud.bigquery.job.LoadJob`, - :class:`google.cloud.bigquery.job.CopyJob`, - :class:`google.cloud.bigquery.job.ExtractJob`, - or :class:`google.cloud.bigquery.job.QueryJob` - :returns: - Concrete job instance, based on the resource returned by the API. + Returns: + Union[google.cloud.bigquery.job.LoadJob, \ + google.cloud.bigquery.job.CopyJob, \ + google.cloud.bigquery.job.ExtractJob \ + google.cloud.bigquery.job.QueryJob]: + Job instance, based on the resource returned by the API. """ extra_params = {'projection': 'full'} if project is None: project = self.project + if location is not None: + extra_params['location'] = location path = '/projects/{}/jobs/{}'.format(project, job_id) @@ -608,34 +609,37 @@ def get_job(self, job_id, project=None, retry=DEFAULT_RETRY): return self.job_from_resource(resource) - def cancel_job(self, job_id, project=None, retry=DEFAULT_RETRY): + def cancel_job( + self, job_id, project=None, location=None, retry=DEFAULT_RETRY): """Attempt to cancel a job from a job ID. See https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs/cancel - :type job_id: str - :param job_id: Name of the job. - - :type project: str - :param project: - project ID owning the job (defaults to the client's project) + Arguments: + job_id (str): Unique job identifier. - :type retry: :class:`google.api_core.retry.Retry` - :param retry: (Optional) How to retry the RPC. + Keyword Arguments: + project (str): + (Optional) ID of the project which ownsthe job (defaults to + the client's project). + location (str): Location where the job was run. + retry (google.api_core.retry.Retry): + (Optional) How to retry the RPC. - :rtype: One of: - :class:`google.cloud.bigquery.job.LoadJob`, - :class:`google.cloud.bigquery.job.CopyJob`, - :class:`google.cloud.bigquery.job.ExtractJob`, - or :class:`google.cloud.bigquery.job.QueryJob` - :returns: - Concrete job instance, based on the resource returned by the API. + Returns: + Union[google.cloud.bigquery.job.LoadJob, \ + google.cloud.bigquery.job.CopyJob, \ + google.cloud.bigquery.job.ExtractJob \ + google.cloud.bigquery.job.QueryJob]: + Job instance, based on the resource returned by the API. """ extra_params = {'projection': 'full'} if project is None: project = self.project + if location is not None: + extra_params['location'] = location path = '/projects/{}/jobs/{}/cancel'.format(project, job_id) @@ -701,97 +705,106 @@ def list_jobs(self, max_results=None, page_token=None, all_users=None, max_results=max_results, extra_params=extra_params) - def load_table_from_uri(self, source_uris, destination, - job_id=None, job_id_prefix=None, - job_config=None, retry=DEFAULT_RETRY): + def load_table_from_uri( + self, source_uris, destination, job_id=None, job_id_prefix=None, + location=None, project=None, job_config=None, + retry=DEFAULT_RETRY): """Starts a job for loading data into a table from CloudStorage. See https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.load - :type source_uris: One of: - str - sequence of string - :param source_uris: URIs of data files to be loaded; in format - ``gs:///``. - - :type destination: :class:`google.cloud.bigquery.table.TableReference` - :param destination: Table into which data is to be loaded. - - :type job_id: str - :param job_id: (Optional) Name of the job. - - :type job_id_prefix: str or ``NoneType`` - :param job_id_prefix: (Optional) the user-provided prefix for a - randomly generated job ID. This parameter will be - ignored if a ``job_id`` is also given. - - :type job_config: :class:`google.cloud.bigquery.job.LoadJobConfig` - :param job_config: (Optional) Extra configuration options for the job. - - :type retry: :class:`google.api_core.retry.Retry` - :param retry: (Optional) How to retry the RPC. + Arguments: + source_uris (Union[str, Sequence[str]]): + URIs of data files to be loaded; in format + ``gs:///``. + destination (google.cloud.bigquery.table.TableReference): + Table into which data is to be loaded. + + Keyword Arguments: + job_id (str): (Optional) Name of the job. + job_id_prefix (str): + (Optional) the user-provided prefix for a randomly generated + job ID. This parameter will be ignored if a ``job_id`` is + also given. + location (str): + Location where to run the job. Must match the location of the + destination table. + project (str): + Project ID of the project of where to run the job. Defaults + to the client's project. + job_config (google.cloud.bigquery.job.LoadJobConfig): + (Optional) Extra configuration options for the job. + retry (google.api_core.retry.Retry): + (Optional) How to retry the RPC. - :rtype: :class:`google.cloud.bigquery.job.LoadJob` - :returns: a new :class:`~google.cloud.bigquery.job.LoadJob` instance + Returns: + google.cloud.bigquery.job.LoadJob: A new load job. """ job_id = _make_job_id(job_id, job_id_prefix) + if project is None: + project = self.project + job_ref = job._JobReference(job_id, project=project, location=location) if isinstance(source_uris, six.string_types): source_uris = [source_uris] - job = LoadJob(job_id, source_uris, destination, self, job_config) - job._begin(retry=retry) - return job - - def load_table_from_file(self, file_obj, destination, - rewind=False, - size=None, - num_retries=_DEFAULT_NUM_RETRIES, - job_id=None, job_id_prefix=None, job_config=None): + load_job = job.LoadJob( + job_ref, source_uris, destination, self, job_config) + load_job._begin(retry=retry) + return load_job + + def load_table_from_file( + self, file_obj, destination, rewind=False, size=None, + num_retries=_DEFAULT_NUM_RETRIES, job_id=None, + job_id_prefix=None, location=None, project=None, + job_config=None): """Upload the contents of this table from a file-like object. Like load_table_from_uri, this creates, starts and returns a ``LoadJob``. - :type file_obj: file - :param file_obj: A file handle opened in binary mode for reading. - - :type destination: :class:`google.cloud.bigquery.table.TableReference` - :param destination: Table into which data is to be loaded. - - :type rewind: bool - :param rewind: If True, seek to the beginning of the file handle before - reading the file. + Arguments: + file_obj (file): A file handle opened in binary mode for reading. + destination (google.cloud.bigquery.table.TableReference): + Table into which data is to be loaded. + + Keyword Arguments: + rewind (bool): + If True, seek to the beginning of the file handle before + reading the file. + size (int): + The number of bytes to read from the file handle. If size is + ``None`` or large, resumable upload will be used. Otherwise, + multipart upload will be used. + num_retries (int): Number of upload retries. Defaults to 6. + job_id (str): (Optional) Name of the job. + job_id_prefix (str): + (Optional) the user-provided prefix for a randomly generated + job ID. This parameter will be ignored if a ``job_id`` is + also given. + location (str): + Location where to run the job. Must match the location of the + destination table. + project (str): + Project ID of the project of where to run the job. Defaults + to the client's project. + job_config (google.cloud.bigquery.job.LoadJobConfig): + (Optional) Extra configuration options for the job. - :type size: int - :param size: The number of bytes to read from the file handle. - If size is ``None`` or large, resumable upload will be - used. Otherwise, multipart upload will be used. - - :type num_retries: int - :param num_retries: Number of upload retries. Defaults to 6. - - :type job_id: str - :param job_id: (Optional) Name of the job. - - :type job_id_prefix: str or ``NoneType`` - :param job_id_prefix: (Optional) the user-provided prefix for a - randomly generated job ID. This parameter will be - ignored if a ``job_id`` is also given. - - :type job_config: :class:`google.cloud.bigquery.job.LoadJobConfig` - :param job_config: (Optional) Extra configuration options for the job. - - :rtype: :class:`~google.cloud.bigquery.job.LoadJob` + Returns: + google.cloud.bigquery.job.LoadJob: A new load job. - :returns: the job instance used to load the data (e.g., for - querying status). - :raises: :class:`ValueError` if ``size`` is not passed in and can not - be determined, or if the ``file_obj`` can be detected to be - a file opened in text mode. + Raises: + ValueError: + If ``size`` is not passed in and can not be determined, or if + the ``file_obj`` can be detected to be a file opened in text + mode. """ job_id = _make_job_id(job_id, job_id_prefix) - job = LoadJob(job_id, None, destination, self, job_config) - job_resource = job._build_resource() + if project is None: + project = self.project + job_ref = job._JobReference(job_id, project=project, location=location) + load_job = job.LoadJob(job_ref, None, destination, self, job_config) + job_resource = load_job._build_resource() if rewind: file_obj.seek(0, os.SEEK_SET) _check_mode(file_obj) @@ -913,131 +926,149 @@ def _do_multipart_upload(self, stream, metadata, size, num_retries): return response - def copy_table(self, sources, destination, job_id=None, job_id_prefix=None, - job_config=None, retry=DEFAULT_RETRY): - """Start a job for copying one or more tables into another table. + def copy_table( + self, sources, destination, job_id=None, job_id_prefix=None, + location=None, project=None, job_config=None, + retry=DEFAULT_RETRY): + """Copy one or more tables to another table. See https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.copy - :type sources: One of: - :class:`~google.cloud.bigquery.table.TableReference` - sequence of - :class:`~google.cloud.bigquery.table.TableReference` - :param sources: Table or tables to be copied. - - - :type destination: :class:`google.cloud.bigquery.table.TableReference` - :param destination: Table into which data is to be copied. - - :type job_id: str - :param job_id: (Optional) The ID of the job. - - :type job_id_prefix: str or ``NoneType`` - :param job_id_prefix: (Optional) the user-provided prefix for a - randomly generated job ID. This parameter will be - ignored if a ``job_id`` is also given. - - :type job_config: :class:`google.cloud.bigquery.job.CopyJobConfig` - :param job_config: (Optional) Extra configuration options for the job. - - :type retry: :class:`google.api_core.retry.Retry` - :param retry: (Optional) How to retry the RPC. + Arguments: + sources (Union[google.cloud.bigquery.table.TableReference, \ + Sequence[google.cloud.bigquery.table.TableReference]]): + Table or tables to be copied. + destination (google.cloud.bigquery.table.TableReference): + Table into which data is to be copied. + + Keyword Arguments: + job_id (str): (Optional) The ID of the job. + job_id_prefix (str) + (Optional) the user-provided prefix for a randomly generated + job ID. This parameter will be ignored if a ``job_id`` is + also given. + location (str): + Location where to run the job. Must match the location of any + source table as well as the destination table. + project (str): + Project ID of the project of where to run the job. Defaults + to the client's project. + job_config (google.cloud.bigquery.job.CopyJobConfig): + (Optional) Extra configuration options for the job. + retry (google.api_core.retry.Retry): + (Optional) How to retry the RPC. - :rtype: :class:`google.cloud.bigquery.job.copyjob` - :returns: a new :class:`google.cloud.bigquery.job.copyjob` instance + Returns: + google.cloud.bigquery.job.CopyJob: A new copy job instance. """ job_id = _make_job_id(job_id, job_id_prefix) + if project is None: + project = self.project + job_ref = job._JobReference(job_id, project=project, location=location) if not isinstance(sources, collections.Sequence): sources = [sources] - job = CopyJob(job_id, sources, destination, client=self, - job_config=job_config) - job._begin(retry=retry) - return job + copy_job = job.CopyJob( + job_ref, sources, destination, client=self, + job_config=job_config) + copy_job._begin(retry=retry) + return copy_job def extract_table( - self, source, destination_uris, job_config=None, job_id=None, - job_id_prefix=None, retry=DEFAULT_RETRY): + self, source, destination_uris, job_id=None, job_id_prefix=None, + location=None, project=None, job_config=None, + retry=DEFAULT_RETRY): """Start a job to extract a table into Cloud Storage files. See https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.extract + Arguments: + source (google.cloud.bigquery.table.TableReference): + Table to be extracted. + destination_uris (Union[str, Sequence[str]]): + URIs of Cloud Storage file(s) into which table data is to be + extracted; in format + ``gs:///``. + + Keyword Arguments: + job_id (str): (Optional) The ID of the job. + job_id_prefix (str) + (Optional) the user-provided prefix for a randomly generated + job ID. This parameter will be ignored if a ``job_id`` is + also given. + location (str): + Location where to run the job. Must match the location of the + source table. + project (str): + Project ID of the project of where to run the job. Defaults + to the client's project. + job_config (google.cloud.bigquery.job.ExtractJobConfig): + (Optional) Extra configuration options for the job. + retry (google.api_core.retry.Retry): + (Optional) How to retry the RPC. :type source: :class:`google.cloud.bigquery.table.TableReference` :param source: table to be extracted. - :type destination_uris: One of: - str or - sequence of str - :param destination_uris: - URIs of Cloud Storage file(s) into which table data is to be - extracted; in format ``gs:///``. - - :type kwargs: dict - :param kwargs: Additional keyword arguments. - - :type job_id: str - :param job_id: (Optional) The ID of the job. - - :type job_id_prefix: str or ``NoneType`` - :param job_id_prefix: (Optional) the user-provided prefix for a - randomly generated job ID. This parameter will be - ignored if a ``job_id`` is also given. - - :type job_config: :class:`google.cloud.bigquery.job.ExtractJobConfig` - :param job_config: (Optional) Extra configuration options for the job. - :type retry: :class:`google.api_core.retry.Retry` - :param retry: (Optional) How to retry the RPC. - - :rtype: :class:`google.cloud.bigquery.job.ExtractJob` - :returns: a new :class:`google.cloud.bigquery.job.ExtractJob` instance + Returns: + google.cloud.bigquery.job.ExtractJob: A new extract job instance. """ job_id = _make_job_id(job_id, job_id_prefix) + if project is None: + project = self.project + job_ref = job._JobReference(job_id, project=project, location=location) if isinstance(destination_uris, six.string_types): destination_uris = [destination_uris] - job = ExtractJob( - job_id, source, destination_uris, client=self, + extract_job = job.ExtractJob( + job_ref, source, destination_uris, client=self, job_config=job_config) - job._begin(retry=retry) - return job + extract_job._begin(retry=retry) + return extract_job - def query(self, query, job_config=None, job_id=None, job_id_prefix=None, - retry=DEFAULT_RETRY): - """Start a job that runs a SQL query. + def query( + self, query, job_config=None, job_id=None, job_id_prefix=None, + location=None, project=None, retry=DEFAULT_RETRY): + """Run a SQL query. See https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query - :type query: str - :param query: - SQL query to be executed. Defaults to the standard SQL dialect. - Use the ``job_config`` parameter to change dialects. - - :type job_config: :class:`google.cloud.bigquery.job.QueryJobConfig` - :param job_config: (Optional) Extra configuration options for the job. - - :type job_id: str - :param job_id: (Optional) ID to use for the query job. - - :type job_id_prefix: str or ``NoneType`` - :param job_id_prefix: (Optional) the user-provided prefix for a - randomly generated job ID. This parameter will be - ignored if a ``job_id`` is also given. - - :type retry: :class:`google.api_core.retry.Retry` - :param retry: (Optional) How to retry the RPC. + Arguments: + query (str): + SQL query to be executed. Defaults to the standard SQL + dialect. Use the ``job_config`` parameter to change dialects. + + Keyword Arguments: + job_config (google.cloud.bigquery.job.QueryJobConfig): + (Optional) Extra configuration options for the job. + job_id (str): (Optional) ID to use for the query job. + job_id_prefix (str): + (Optional) The prefix to use for a randomly generated job ID. + This parameter will be ignored if a ``job_id`` is also given. + location (str): + Location where to run the job. Must match the location of the + any table used in the query as well as the destination table. + project (str): + Project ID of the project of where to run the job. Defaults + to the client's project. + retry (google.api_core.retry.Retry): + (Optional) How to retry the RPC. - :rtype: :class:`google.cloud.bigquery.job.QueryJob` - :returns: a new :class:`google.cloud.bigquery.job.QueryJob` instance + Returns: + google.cloud.bigquery.job.QueryJob: A new query job instance. """ job_id = _make_job_id(job_id, job_id_prefix) - job = QueryJob(job_id, query, client=self, job_config=job_config) - job._begin(retry=retry) - return job + if project is None: + project = self.project + job_ref = job._JobReference(job_id, project=project, location=location) + query_job = job.QueryJob( + job_ref, query, client=self, job_config=job_config) + query_job._begin(retry=retry) + return query_job def insert_rows(self, table, rows, selected_fields=None, **kwargs): """Insert rows into a table via the streaming API. @@ -1311,7 +1342,7 @@ def list_partitions(self, table, retry=DEFAULT_RETRY): :rtype: list :returns: a list of time partitions """ - config = QueryJobConfig() + config = job.QueryJobConfig() config.use_legacy_sql = True # required for '$' syntax query_job = self.query( 'SELECT partition_id from [%s:%s.%s$__PARTITIONS_SUMMARY__]' % diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/job.py b/packages/google-cloud-bigquery/google/cloud/bigquery/job.py index 1a5a9ad3e78d..cdaaaf0ed081 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/job.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/job.py @@ -185,23 +185,79 @@ class WriteDisposition(object): WRITE_EMPTY = 'WRITE_EMPTY' +class _JobReference(object): + """A reference to a job. + + Arguments: + job_id (str): ID of the job to run. + project (str): ID of the project where the job runs. + location (str): Location of where the job runs. + """ + + def __init__(self, job_id, project, location): + self._properties = { + 'jobId': job_id, + 'projectId': project, + } + # The location field must not be populated if it is None. + if location: + self._properties['location'] = location + + @property + def job_id(self): + """str: ID of the job.""" + return self._properties.get('jobId') + + @property + def project(self): + """str: ID of the project where the job runs.""" + return self._properties.get('projectId') + + @property + def location(self): + """str: Location where the job runs.""" + return self._properties.get('location') + + def _to_api_repr(self): + """Returns the API resource representation of the job reference.""" + return copy.deepcopy(self._properties) + + @classmethod + def _from_api_repr(cls, resource): + """Returns a job reference for an API resource representation.""" + job_id = resource.get('jobId') + project = resource.get('projectId') + location = resource.get('location') + job_ref = cls(job_id, project, location) + return job_ref + + class _AsyncJob(google.api_core.future.polling.PollingFuture): """Base class for asynchronous jobs. - :type job_id: str - :param job_id: the job's ID in the project associated with the client. - - :type client: :class:`google.cloud.bigquery.client.Client` - :param client: A client which holds credentials and project configuration. + Arguments: + job_id (Union[str, _JobReference]): + Job's ID in the project associated with the client or a + fully-qualified job reference. + client (google.cloud.bigquery.client.Client): + Client which holds credentials and project configuration. """ def __init__(self, job_id, client): super(_AsyncJob, self).__init__() - self.job_id = job_id + job_ref = job_id + if not isinstance(job_id, _JobReference): + job_ref = _JobReference(job_id, client.project, None) + self._job_ref = job_ref self._client = client self._properties = {} self._result_set = False self._completion_lock = threading.Lock() + @property + def job_id(self): + """str: ID of the job.""" + return self._job_ref.job_id + @property def project(self): """Project bound to the job. @@ -209,7 +265,12 @@ def project(self): :rtype: str :returns: the project (derived from the client). """ - return self._client.project + return self._job_ref.project + + @property + def location(self): + """str: Location where the job runs.""" + return self._job_ref.location def _require_client(self, client): """Check client or verify over-ride. @@ -452,10 +513,14 @@ def exists(self, client=None, retry=DEFAULT_RETRY): """ client = self._require_client(client) + extra_params = {'fields': 'id'} + if self.location: + extra_params['location'] = self.location + try: client._call_api(retry, method='GET', path=self.path, - query_params={'fields': 'id'}) + query_params=extra_params) except NotFound: return False else: @@ -477,7 +542,12 @@ def reload(self, client=None, retry=DEFAULT_RETRY): """ client = self._require_client(client) - api_response = client._call_api(retry, method='GET', path=self.path) + extra_params = {} + if self.location: + extra_params['location'] = self.location + + api_response = client._call_api( + retry, method='GET', path=self.path, query_params=extra_params) self._set_properties(api_response) def cancel(self, client=None): @@ -496,8 +566,13 @@ def cancel(self, client=None): """ client = self._require_client(client) + extra_params = {} + if self.location: + extra_params['location'] = self.location + api_response = client._connection.api_request( - method='POST', path='%s/cancel' % (self.path,)) + method='POST', path='%s/cancel' % (self.path,), + query_params=extra_params) self._set_properties(api_response['job']) # The Future interface requires that we return True if the *attempt* # to cancel was successful. @@ -1095,10 +1170,7 @@ def _build_resource(self): self.destination.to_api_repr()) return { - 'jobReference': { - 'projectId': self.project, - 'jobId': self.job_id, - }, + 'jobReference': self._job_ref._to_api_repr(), 'configuration': configuration, } @@ -1125,17 +1197,18 @@ def from_api_repr(cls, resource, client): :rtype: :class:`google.cloud.bigquery.job.LoadJob` :returns: Job parsed from ``resource``. """ - job_id, config_resource = cls._get_resource_config(resource) + config_resource = resource.get('configuration', {}) config = LoadJobConfig.from_api_repr(config_resource) - dest_config = _helpers.get_sub_prop( - config_resource, ['load', 'destinationTable']) - ds_ref = DatasetReference(dest_config['projectId'], - dest_config['datasetId'],) + # A load job requires a destination table. + dest_config = config_resource['load']['destinationTable'] + ds_ref = DatasetReference( + dest_config['projectId'], dest_config['datasetId']) destination = TableReference(ds_ref, dest_config['tableId']) # sourceUris will be absent if this is a file upload. source_uris = _helpers.get_sub_prop( config_resource, ['load', 'sourceUris']) - job = cls(job_id, source_uris, destination, client, config) + job_ref = _JobReference._from_api_repr(resource['jobReference']) + job = cls(job_ref, source_uris, destination, client, config) job._set_properties(resource) return job @@ -1283,10 +1356,7 @@ def _build_resource(self): }) return { - 'jobReference': { - 'projectId': self.project, - 'jobId': self.job_id, - }, + 'jobReference': self._job_ref._to_api_repr(), 'configuration': configuration, } @@ -1496,16 +1566,11 @@ def _build_resource(self): ['extract', 'destinationUris'], self.destination_uris) - resource = { - 'jobReference': { - 'projectId': self.project, - 'jobId': self.job_id, - }, + return { + 'jobReference': self._job_ref._to_api_repr(), 'configuration': configuration, } - return resource - def _copy_configuration_properties(self, configuration): """Helper: assign subclass configuration properties in cleaned.""" self._configuration._properties = copy.deepcopy(configuration) @@ -2017,10 +2082,7 @@ def _build_resource(self): configuration = self._configuration.to_api_repr() resource = { - 'jobReference': { - 'projectId': self.project, - 'jobId': self.job_id, - }, + 'jobReference': self._job_ref._to_api_repr(), 'configuration': configuration, } configuration['query']['query'] = self.query @@ -2239,7 +2301,8 @@ def done(self, retry=DEFAULT_RETRY): if self.state != _DONE_STATE: self._query_results = self._client._get_query_results( self.job_id, retry, - project=self.project, timeout_ms=timeout_ms) + project=self.project, timeout_ms=timeout_ms, + location=self.location) # Only reload the job once we know the query is complete. # This will ensure that fields such as the destination table are @@ -2281,7 +2344,8 @@ def result(self, timeout=None, retry=DEFAULT_RETRY): # Return an iterator instead of returning the job. if not self._query_results: self._query_results = self._client._get_query_results( - self.job_id, retry, project=self.project) + self.job_id, retry, project=self.project, + location=self.location) schema = self._query_results.schema dest_table = self.destination return self._client.list_rows(dest_table, selected_fields=schema, diff --git a/packages/google-cloud-bigquery/tests/system.py b/packages/google-cloud-bigquery/tests/system.py index 7ba78dc0ad46..9b7912118a1f 100644 --- a/packages/google-cloud-bigquery/tests/system.py +++ b/packages/google-cloud-bigquery/tests/system.py @@ -44,7 +44,8 @@ from google.cloud.bigquery.table import Table from google.cloud._helpers import UTC from google.cloud.bigquery import dbapi -from google.cloud.exceptions import Forbidden, NotFound +from google.cloud.exceptions import BadRequest, Forbidden, NotFound +from google.cloud import storage from test_utils.retry import RetryErrors from test_utils.retry import RetryInstanceState @@ -134,7 +135,8 @@ def _still_in_use(bad_request): if isinstance(doomed, Bucket): retry_409(doomed.delete)(force=True) elif isinstance(doomed, (Dataset, bigquery.DatasetReference)): - retry_in_use(Config.CLIENT.delete_dataset)(doomed) + retry_in_use(Config.CLIENT.delete_dataset)( + doomed, delete_contents=True) elif isinstance(doomed, (Table, bigquery.TableReference)): retry_in_use(Config.CLIENT.delete_table)(doomed) else: @@ -552,10 +554,106 @@ def test_load_table_from_uri_then_dump_table(self): self.assertEqual(sorted(row_tuples, key=by_age), sorted(ROWS, key=by_age)) - def _create_storage(self, bucket_name, blob_name): - from google.cloud.storage import Client as StorageClient + def test_load_table_from_file_w_explicit_location(self): + # Create a temporary bucket for extract files. + storage_client = storage.Client() + bucket_name = 'bq_load_table_eu_extract_test' + unique_resource_id() + bucket = storage_client.bucket(bucket_name) + bucket.location = 'eu' + self.to_delete.append(bucket) + bucket.create() - storage_client = StorageClient() + # Create a temporary dataset & table in the EU. + table_bytes = six.BytesIO(b'a,3\nb,2\nc,1\n') + client = Config.CLIENT + dataset = self.temp_dataset( + _make_dataset_id('eu_load_file'), location='EU') + table_ref = dataset.table('letters') + job_config = bigquery.LoadJobConfig() + job_config.skip_leading_rows = 0 + job_config.schema = [ + bigquery.SchemaField('letter', 'STRING'), + bigquery.SchemaField('value', 'INTEGER'), + ] + + # Load the file to an EU dataset with an EU load job. + load_job = client.load_table_from_file( + table_bytes, table_ref, location='EU', job_config=job_config) + load_job.result() + job_id = load_job.job_id + + # Can get the job from the EU. + load_job = client.get_job(job_id, location='EU') + self.assertEqual(job_id, load_job.job_id) + self.assertEqual('EU', load_job.location) + self.assertTrue(load_job.exists()) + + # Cannot get the job from the US. + with self.assertRaises(NotFound): + client.get_job(job_id, location='US') + + load_job_us = client.get_job(job_id) + load_job_us._job_ref._properties['location'] = 'US' + self.assertFalse(load_job_us.exists()) + with self.assertRaises(NotFound): + load_job_us.reload() + + # Can cancel the job from the EU. + self.assertTrue(load_job.cancel()) + load_job = client.cancel_job(job_id, location='EU') + self.assertEqual(job_id, load_job.job_id) + self.assertEqual('EU', load_job.location) + + # Cannot cancel the job from the US. + with self.assertRaises(NotFound): + client.cancel_job(job_id, location='US') + with self.assertRaises(NotFound): + load_job_us.cancel() + + # Can list the table rows. + table = client.get_table(table_ref) + self.assertEqual(table.num_rows, 3) + rows = [(row.letter, row.value) for row in client.list_rows(table)] + self.assertEqual( + list(sorted(rows)), [('a', 3), ('b', 2), ('c', 1)]) + + # Can query from EU. + query_string = 'SELECT MAX(value) FROM `{}.letters`'.format( + dataset.dataset_id) + max_value = list(client.query(query_string, location='EU'))[0][0] + self.assertEqual(max_value, 3) + + # Cannot query from US. + with self.assertRaises(BadRequest): + list(client.query(query_string, location='US')) + + # Can copy from EU. + copy_job = client.copy_table( + table_ref, dataset.table('letters2'), location='EU') + copy_job.result() + + # Cannot copy from US. + with self.assertRaises(BadRequest): + client.copy_table( + table_ref, dataset.table('letters2_us'), + location='US').result() + + # Can extract from EU. + extract_job = client.extract_table( + table_ref, + 'gs://{}/letters.csv'.format(bucket_name), + location='EU') + extract_job.result() + + # Cannot extract from US. + with self.assertRaises(BadRequest): + client.extract_table( + table_ref, + 'gs://{}/letters-us.csv'.format(bucket_name), + location='US').result() + + def _create_storage(self, bucket_name, blob_name): + storage_client = storage.Client() # In the **very** rare case the bucket name is reserved, this # fails with a ConnectionError. @@ -1586,9 +1684,11 @@ def test_nested_table_to_dataframe(self): row['record_col']['nested_record']['nested_nested_string'], 'some deep insight') - def temp_dataset(self, dataset_id): - dataset = retry_403(Config.CLIENT.create_dataset)( - Dataset(Config.CLIENT.dataset(dataset_id))) + def temp_dataset(self, dataset_id, location=None): + dataset = Dataset(Config.CLIENT.dataset(dataset_id)) + if location: + dataset.location = location + dataset = retry_403(Config.CLIENT.create_dataset)(dataset) self.to_delete.append(dataset) return dataset diff --git a/packages/google-cloud-bigquery/tests/unit/test_client.py b/packages/google-cloud-bigquery/tests/unit/test_client.py index 90fd868d3862..3f8e7e276e64 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_client.py +++ b/packages/google-cloud-bigquery/tests/unit/test_client.py @@ -79,12 +79,13 @@ def test__get_query_results_miss_w_explicit_project_and_timeout(self): with self.assertRaises(NotFound): client._get_query_results( - 'nothere', None, project='other-project', timeout_ms=500) + 'nothere', None, project='other-project', location='US', + timeout_ms=500) conn.api_request.assert_called_once_with( method='GET', path='/projects/other-project/queries/nothere', - query_params={'maxResults': 0, 'timeoutMs': 500}) + query_params={'maxResults': 0, 'timeoutMs': 500, 'location': 'US'}) def test__get_query_results_hit(self): job_id = 'query_job' @@ -560,13 +561,13 @@ def test_create_table_w_schema_and_query(self): 'name': 'full_name', 'type': 'STRING', 'mode': 'REQUIRED', - 'description': None + 'description': None, }, { 'name': 'age', 'type': 'INTEGER', 'mode': 'REQUIRED', - 'description': None + 'description': None, }, ], }, @@ -1255,12 +1256,12 @@ def test_get_job_miss_w_explict_project(self): conn = client._connection = _make_connection() with self.assertRaises(NotFound): - client.get_job(JOB_ID, project=OTHER_PROJECT) + client.get_job(JOB_ID, project=OTHER_PROJECT, location='EU') conn.api_request.assert_called_once_with( method='GET', path='/projects/OTHER_PROJECT/jobs/NONESUCH', - query_params={'projection': 'full'}) + query_params={'projection': 'full', 'location': 'EU'}) def test_get_job_hit(self): from google.cloud.bigquery.job import CreateDisposition @@ -1318,12 +1319,12 @@ def test_cancel_job_miss_w_explict_project(self): conn = client._connection = _make_connection() with self.assertRaises(NotFound): - client.cancel_job(JOB_ID, project=OTHER_PROJECT) + client.cancel_job(JOB_ID, project=OTHER_PROJECT, location='EU') conn.api_request.assert_called_once_with( method='POST', path='/projects/OTHER_PROJECT/jobs/NONESUCH/cancel', - query_params={'projection': 'full'}) + query_params={'projection': 'full', 'location': 'EU'}) def test_cancel_job_hit(self): from google.cloud.bigquery.job import QueryJob @@ -1624,6 +1625,44 @@ def test_load_table_from_uri(self): self.assertEqual(list(job.source_uris), [SOURCE_URI]) self.assertIs(job.destination, destination) + def test_load_table_from_uri_w_explicit_project(self): + job_id = 'this-is-a-job-id' + destination_id = 'destination_table' + source_uri = 'gs://example/source.csv' + resource = { + 'jobReference': { + 'projectId': 'other-project', + 'location': 'US', + 'jobId': job_id, + }, + 'configuration': { + 'load': { + 'sourceUris': [source_uri], + 'destinationTable': { + 'projectId': self.PROJECT, + 'datasetId': self.DS_ID, + 'tableId': destination_id, + }, + }, + }, + } + creds = _make_credentials() + http = object() + client = self._make_one( + project=self.PROJECT, credentials=creds, _http=http) + conn = client._connection = _make_connection(resource) + destination = client.dataset(self.DS_ID).table(destination_id) + + client.load_table_from_uri( + source_uri, destination, job_id=job_id, project='other-project', + location='US') + + # Check that load_table_from_uri actually starts the job. + conn.api_request.assert_called_once_with( + method='POST', + path='/projects/other-project/jobs', + data=resource) + @staticmethod def _mock_requests_response(status_code, headers, content=b''): return mock.Mock( @@ -1826,6 +1865,52 @@ def test_copy_table(self): self.assertEqual(list(job.sources), [source, source2]) self.assertIs(job.destination, destination) + def test_copy_table_w_explicit_project(self): + job_id = 'this-is-a-job-id' + source_id = 'source_table' + destination_id = 'destination_table' + resource = { + 'jobReference': { + 'projectId': 'other-project', + 'location': 'US', + 'jobId': job_id, + }, + 'configuration': { + 'copy': { + 'sourceTables': [ + { + 'projectId': self.PROJECT, + 'datasetId': self.DS_ID, + 'tableId': source_id, + }, + ], + 'destinationTable': { + 'projectId': self.PROJECT, + 'datasetId': self.DS_ID, + 'tableId': destination_id, + }, + }, + }, + } + creds = _make_credentials() + http = object() + client = self._make_one( + project=self.PROJECT, credentials=creds, _http=http) + conn = client._connection = _make_connection(resource) + dataset = client.dataset(self.DS_ID) + source = dataset.table(source_id) + destination = dataset.table(destination_id) + + client.copy_table( + source, destination, job_id=job_id, project='other-project', + location='US') + + # Check that copy_table actually starts the job. + conn.api_request.assert_called_once_with( + method='POST', + path='/projects/other-project/jobs', + data=resource) + def test_extract_table(self): from google.cloud.bigquery.job import ExtractJob @@ -1871,6 +1956,45 @@ def test_extract_table(self): self.assertEqual(job.source, source) self.assertEqual(list(job.destination_uris), [DESTINATION]) + def test_extract_table_w_explicit_project(self): + job_id = 'job_id' + source_id = 'source_table' + destination = 'gs://bucket_name/object_name' + resource = { + 'jobReference': { + 'projectId': 'other-project', + 'location': 'US', + 'jobId': job_id, + }, + 'configuration': { + 'extract': { + 'sourceTable': { + 'projectId': self.PROJECT, + 'datasetId': self.DS_ID, + 'tableId': source_id, + }, + 'destinationUris': [destination], + }, + }, + } + creds = _make_credentials() + http = object() + client = self._make_one( + project=self.PROJECT, credentials=creds, _http=http) + conn = client._connection = _make_connection(resource) + dataset = client.dataset(self.DS_ID) + source = dataset.table(source_id) + + client.extract_table( + source, destination, job_id=job_id, project='other-project', + location='US') + + # Check that extract_table actually starts the job. + conn.api_request.assert_called_once_with( + method='POST', + path='/projects/other-project/jobs', + data=resource) + def test_extract_table_generated_job_id(self): from google.cloud.bigquery.job import ExtractJob from google.cloud.bigquery.job import ExtractJobConfig @@ -2017,6 +2141,37 @@ def test_query_defaults(self): self.assertEqual(sent_config['query'], QUERY) self.assertFalse(sent_config['useLegacySql']) + def test_query_w_explicit_project(self): + job_id = 'some-job-id' + query = 'select count(*) from persons' + resource = { + 'jobReference': { + 'projectId': 'other-project', + 'location': 'US', + 'jobId': job_id, + }, + 'configuration': { + 'query': { + 'query': query, + 'useLegacySql': False, + }, + }, + } + creds = _make_credentials() + http = object() + client = self._make_one( + project=self.PROJECT, credentials=creds, _http=http) + conn = client._connection = _make_connection(resource) + + client.query( + query, job_id=job_id, project='other-project', location='US') + + # Check that query actually starts the job. + conn.api_request.assert_called_once_with( + method='POST', + path='/projects/other-project/jobs', + data=resource) + def test_query_w_udf_resources(self): from google.cloud.bigquery.job import QueryJob from google.cloud.bigquery.job import QueryJobConfig @@ -2955,6 +3110,28 @@ def test_load_table_from_file_resumable(self): self.EXPECTED_CONFIGURATION, _DEFAULT_NUM_RETRIES) + def test_load_table_from_file_w_explicit_project(self): + from google.cloud.bigquery.client import _DEFAULT_NUM_RETRIES + + client = self._make_client() + file_obj = self._make_file_obj() + + do_upload_patch = self._make_do_upload_patch( + client, '_do_resumable_upload', self.EXPECTED_CONFIGURATION) + with do_upload_patch as do_upload: + client.load_table_from_file( + file_obj, self.TABLE_REF, job_id='job_id', + project='other-project', location='US', + job_config=self._make_config()) + + expected_resource = copy.deepcopy(self.EXPECTED_CONFIGURATION) + expected_resource['jobReference']['location'] = 'US' + expected_resource['jobReference']['projectId'] = 'other-project' + do_upload.assert_called_once_with( + file_obj, + expected_resource, + _DEFAULT_NUM_RETRIES) + def test_load_table_from_file_resumable_metadata(self): from google.cloud.bigquery.client import _DEFAULT_NUM_RETRIES from google.cloud.bigquery.job import CreateDisposition diff --git a/packages/google-cloud-bigquery/tests/unit/test_job.py b/packages/google-cloud-bigquery/tests/unit/test_job.py index 6e7be1de2347..2d167321cc73 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_job.py +++ b/packages/google-cloud-bigquery/tests/unit/test_job.py @@ -460,6 +460,16 @@ def test_ctor_w_config(self): client, config) self.assertEqual(job.schema, [full_name, age]) + def test_ctor_w_job_reference(self): + from google.cloud.bigquery import job + + client = _make_client(project=self.PROJECT) + job_ref = job._JobReference(self.JOB_ID, 'alternative-project', 'US') + load_job = self._make_one( + job_ref, [self.SOURCE1], self.TABLE_REF, client) + self.assertEqual(load_job.project, 'alternative-project') + self.assertEqual(load_job.location, 'US') + def test_done(self): client = _make_client(project=self.PROJECT) resource = self._make_resource(ended=True) @@ -833,6 +843,31 @@ def test_begin_w_alternate_client(self): self.assertEqual(req[1]['data'], SENT) self._verifyResourceProperties(job, RESOURCE) + def test_begin_w_job_reference(self): + from google.cloud.bigquery import job + + resource = self._make_resource() + resource['jobReference']['projectId'] = 'alternative-project' + resource['jobReference']['location'] = 'US' + job_ref = job._JobReference(self.JOB_ID, 'alternative-project', 'US') + conn = _make_connection(resource) + client = _make_client(project=self.PROJECT, connection=conn) + load_job = self._make_one( + job_ref, [self.SOURCE1], self.TABLE_REF, client) + + load_job._begin() + + conn.api_request.assert_called_once() + _, request = conn.api_request.call_args + self.assertEqual(request['method'], 'POST') + self.assertEqual( + request['path'], '/projects/alternative-project/jobs') + self.assertEqual( + request['data']['jobReference']['projectId'], + 'alternative-project') + self.assertEqual(request['data']['jobReference']['location'], 'US') + self.assertEqual(request['data']['jobReference']['jobId'], self.JOB_ID) + def test_exists_miss_w_bound_client(self): PATH = '/projects/%s/jobs/%s' % (self.PROJECT, self.JOB_ID) conn = _make_connection() @@ -864,6 +899,22 @@ def test_exists_hit_w_alternate_client(self): path=PATH, query_params={'fields': 'id'}) + def test_exists_miss_w_job_reference(self): + from google.cloud.bigquery import job + + job_ref = job._JobReference('my-job-id', 'other-project', 'US') + conn = _make_connection() + client = _make_client(project=self.PROJECT, connection=conn) + load_job = self._make_one( + job_ref, [self.SOURCE1], self.TABLE_REF, client) + + self.assertFalse(load_job.exists()) + + conn.api_request.assert_called_once_with( + method='GET', + path='/projects/other-project/jobs/my-job-id', + query_params={'fields': 'id', 'location': 'US'}) + def test_reload_w_bound_client(self): PATH = '/projects/%s/jobs/%s' % (self.PROJECT, self.JOB_ID) RESOURCE = self._make_resource() @@ -876,7 +927,8 @@ def test_reload_w_bound_client(self): conn.api_request.assert_called_once_with( method='GET', - path=PATH) + path=PATH, + query_params={}) self._verifyResourceProperties(job, RESOURCE) def test_reload_w_alternate_client(self): @@ -894,9 +946,30 @@ def test_reload_w_alternate_client(self): conn1.api_request.assert_not_called() conn2.api_request.assert_called_once_with( method='GET', - path=PATH) + path=PATH, + query_params={}) self._verifyResourceProperties(job, RESOURCE) + def test_reload_w_job_reference(self): + from google.cloud.bigquery import job + + resource = self._make_resource(ended=True) + resource['jobReference']['projectId'] = 'alternative-project' + resource['jobReference']['location'] = 'US' + job_ref = job._JobReference(self.JOB_ID, 'alternative-project', 'US') + conn = _make_connection(resource) + client = _make_client(project=self.PROJECT, connection=conn) + load_job = self._make_one( + job_ref, [self.SOURCE1], self.TABLE_REF, client) + + load_job.reload() + + conn.api_request.assert_called_once_with( + method='GET', + path='/projects/alternative-project/jobs/{}'.format( + self.JOB_ID), + query_params={'location': 'US'}) + def test_cancel_w_bound_client(self): PATH = '/projects/%s/jobs/%s/cancel' % (self.PROJECT, self.JOB_ID) RESOURCE = self._make_resource(ended=True) @@ -910,7 +983,8 @@ def test_cancel_w_bound_client(self): conn.api_request.assert_called_once_with( method='POST', - path=PATH) + path=PATH, + query_params={}) self._verifyResourceProperties(job, RESOURCE) def test_cancel_w_alternate_client(self): @@ -929,9 +1003,30 @@ def test_cancel_w_alternate_client(self): conn1.api_request.assert_not_called() conn2.api_request.assert_called_once_with( method='POST', - path=PATH) + path=PATH, + query_params={}) self._verifyResourceProperties(job, RESOURCE) + def test_cancel_w_job_reference(self): + from google.cloud.bigquery import job + + resource = self._make_resource(ended=True) + resource['jobReference']['projectId'] = 'alternative-project' + resource['jobReference']['location'] = 'US' + job_ref = job._JobReference(self.JOB_ID, 'alternative-project', 'US') + conn = _make_connection({'job': resource}) + client = _make_client(project=self.PROJECT, connection=conn) + load_job = self._make_one( + job_ref, [self.SOURCE1], self.TABLE_REF, client) + + load_job.cancel() + + conn.api_request.assert_called_once_with( + method='POST', + path='/projects/alternative-project/jobs/{}/cancel'.format( + self.JOB_ID), + query_params={'location': 'US'}) + class TestCopyJobConfig(unittest.TestCase, _Base): JOB_TYPE = 'copy' @@ -1338,7 +1433,8 @@ def test_reload_w_bound_client(self): conn.api_request.assert_called_once_with( method='GET', - path=PATH) + path=PATH, + query_params={}) self._verifyResourceProperties(job, RESOURCE) def test_reload_w_alternate_client(self): @@ -1357,7 +1453,8 @@ def test_reload_w_alternate_client(self): conn1.api_request.assert_not_called() conn2.api_request.assert_called_once_with( method='GET', - path=PATH) + path=PATH, + query_params={}) self._verifyResourceProperties(job, RESOURCE) @@ -1698,7 +1795,8 @@ def test_reload_w_bound_client(self): job.reload() - conn.api_request.assert_called_once_with(method='GET', path=PATH) + conn.api_request.assert_called_once_with( + method='GET', path=PATH, query_params={}) self._verifyResourceProperties(job, RESOURCE) def test_reload_w_alternate_client(self): @@ -1716,7 +1814,8 @@ def test_reload_w_alternate_client(self): job.reload(client=client2) conn1.api_request.assert_not_called() - conn2.api_request.assert_called_once_with(method='GET', path=PATH) + conn2.api_request.assert_called_once_with( + method='GET', path=PATH, query_params={}) self._verifyResourceProperties(job, RESOURCE) @@ -2969,7 +3068,8 @@ def test_reload_w_bound_client(self): self.assertNotEqual(job.destination, table_ref) - conn.api_request.assert_called_once_with(method='GET', path=PATH) + conn.api_request.assert_called_once_with( + method='GET', path=PATH, query_params={}) self._verifyResourceProperties(job, RESOURCE) def test_reload_w_alternate_client(self): @@ -2992,7 +3092,8 @@ def test_reload_w_alternate_client(self): job.reload(client=client2) conn1.api_request.assert_not_called() - conn2.api_request.assert_called_once_with(method='GET', path=PATH) + conn2.api_request.assert_called_once_with( + method='GET', path=PATH, query_params={}) self._verifyResourceProperties(job, RESOURCE) @unittest.skipIf(pandas is None, 'Requires `pandas`') From 421a4ba765115004f0de8460bf3a2fbce0f460c7 Mon Sep 17 00:00:00 2001 From: Vikram Date: Wed, 21 Mar 2018 10:25:16 -0700 Subject: [PATCH 0410/2016] BigQuery: Don't use `selected_fields` for listing query result rows (#5072) * Update job.py fix for #4864 * Adds system tests for listing rows from table with many columns Order of rows in tabledata.list is not guaranteed, so use a set to verify expected results. * Include schema in job results unit tests. --- .../google/cloud/bigquery/job.py | 7 +-- .../google-cloud-bigquery/tests/system.py | 53 +++++++++++++++++++ .../tests/unit/test_job.py | 4 ++ 3 files changed, 61 insertions(+), 3 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/job.py b/packages/google-cloud-bigquery/google/cloud/bigquery/job.py index cdaaaf0ed081..340fe612a39c 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/job.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/job.py @@ -33,6 +33,7 @@ from google.cloud.bigquery.schema import SchemaField from google.cloud.bigquery.table import EncryptionConfiguration from google.cloud.bigquery.table import TableReference +from google.cloud.bigquery.table import Table from google.cloud.bigquery import _helpers from google.cloud.bigquery._helpers import DEFAULT_RETRY from google.cloud.bigquery._helpers import _int_or_none @@ -2347,9 +2348,9 @@ def result(self, timeout=None, retry=DEFAULT_RETRY): self.job_id, retry, project=self.project, location=self.location) schema = self._query_results.schema - dest_table = self.destination - return self._client.list_rows(dest_table, selected_fields=schema, - retry=retry) + dest_table_ref = self.destination + dest_table = Table(dest_table_ref, schema=schema) + return self._client.list_rows(dest_table, retry=retry) def to_dataframe(self): """Return a pandas DataFrame from a QueryJob diff --git a/packages/google-cloud-bigquery/tests/system.py b/packages/google-cloud-bigquery/tests/system.py index 9b7912118a1f..9e7d8bd95694 100644 --- a/packages/google-cloud-bigquery/tests/system.py +++ b/packages/google-cloud-bigquery/tests/system.py @@ -363,6 +363,59 @@ def _fetch_single_page(table, selected_fields=None): page = six.next(iterator.pages) return list(page) + def _create_table_many_columns(self, rows): + # Load a table with many columns + dataset = self.temp_dataset(_make_dataset_id('list_rows')) + table_id = 'many_columns' + table_ref = dataset.table(table_id) + self.to_delete.insert(0, table_ref) + schema = [ + bigquery.SchemaField( + 'column_{}_with_long_name'.format(col_i), + 'INTEGER') + for col_i in range(len(rows[0]))] + body = '' + for row in rows: + body += ','.join([str(item) for item in row]) + body += '\n' + config = bigquery.LoadJobConfig() + config.schema = schema + job = Config.CLIENT.load_table_from_file( + six.StringIO(body), table_ref, job_config=config) + job.result() + return bigquery.Table(table_ref, schema=schema) + + def test_list_rows_many_columns(self): + rows = [[], []] + # BigQuery tables can have max 10,000 columns + for col_i in range(9999): + rows[0].append(col_i) + rows[1].append(10000 - col_i) + expected_rows = frozenset([tuple(row) for row in rows]) + table = self._create_table_many_columns(rows) + + rows = list(Config.CLIENT.list_rows(table)) + + assert len(rows) == 2 + rows_set = frozenset([tuple(row.values()) for row in rows]) + assert rows_set == expected_rows + + def test_query_many_columns(self): + rows = [[], []] + # BigQuery tables can have max 10,000 columns + for col_i in range(9999): + rows[0].append(col_i) + rows[1].append(10000 - col_i) + expected_rows = frozenset([tuple(row) for row in rows]) + table = self._create_table_many_columns(rows) + + rows = list(Config.CLIENT.query( + 'SELECT * FROM `{}.many_columns`'.format(table.dataset_id))) + + assert len(rows) == 2 + rows_set = frozenset([tuple(row.values()) for row in rows]) + assert rows_set == expected_rows + def test_insert_rows_then_dump_table(self): NOW_SECONDS = 1448911495.484366 NOW = datetime.datetime.utcfromtimestamp( diff --git a/packages/google-cloud-bigquery/tests/unit/test_job.py b/packages/google-cloud-bigquery/tests/unit/test_job.py index 2d167321cc73..51f72b813767 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_job.py +++ b/packages/google-cloud-bigquery/tests/unit/test_job.py @@ -2523,6 +2523,7 @@ def test_result(self): 'projectId': self.PROJECT, 'jobId': self.JOB_ID, }, + 'schema': {'fields': [{'name': 'col1', 'type': 'STRING'}]}, } connection = _make_connection(query_resource, query_resource) client = _make_client(self.PROJECT, connection=connection) @@ -2541,6 +2542,7 @@ def test_result_invokes_begins(self): 'projectId': self.PROJECT, 'jobId': self.JOB_ID, }, + 'schema': {'fields': [{'name': 'col1', 'type': 'STRING'}]}, } query_resource = copy.deepcopy(incomplete_resource) query_resource['jobComplete'] = True @@ -2570,6 +2572,7 @@ def test_result_w_timeout(self): 'projectId': self.PROJECT, 'jobId': self.JOB_ID, }, + 'schema': {'fields': [{'name': 'col1', 'type': 'STRING'}]}, } done_resource = copy.deepcopy(begun_resource) done_resource['status'] = {'state': 'DONE'} @@ -3141,6 +3144,7 @@ def test_iter(self): 'projectId': self.PROJECT, 'jobId': self.JOB_ID, }, + 'schema': {'fields': [{'name': 'col1', 'type': 'STRING'}]}, } done_resource = copy.deepcopy(begun_resource) done_resource['status'] = {'state': 'DONE'} From 12c4534af01e48e1271d36dcc1bd04a2089e539d Mon Sep 17 00:00:00 2001 From: chemelnucfin Date: Thu, 22 Mar 2018 13:03:25 -0700 Subject: [PATCH 0411/2016] shorten snippets test name (#5091) --- packages/google-cloud-bigquery/nox.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/nox.py b/packages/google-cloud-bigquery/nox.py index 11b135df79ca..5f4741e61490 100644 --- a/packages/google-cloud-bigquery/nox.py +++ b/packages/google-cloud-bigquery/nox.py @@ -105,7 +105,7 @@ def system(session, py): @nox.session @nox.parametrize('py', ['2.7', '3.6']) -def snippets_tests(session, py): +def snippets(session, py): """Run the system test suite.""" # Sanity check: Only run system tests if the environment variable is set. From 66cf9ebbbbfa25505cf978eda7b24e127e15ddf5 Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Fri, 23 Mar 2018 13:51:07 -0700 Subject: [PATCH 0412/2016] BigQuery: Add dev version for 0.32.0 release (#5105) This will allow downstream packages (like pandas-gbq) to check for this version when using the package at HEAD in CI tests. --- packages/google-cloud-bigquery/setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/setup.py b/packages/google-cloud-bigquery/setup.py index fc3e3562d3f9..52e978836444 100644 --- a/packages/google-cloud-bigquery/setup.py +++ b/packages/google-cloud-bigquery/setup.py @@ -22,7 +22,7 @@ name = 'google-cloud-bigquery' description = 'Google BigQuery API client library' -version = '0.31.0' +version = '0.32.0.dev1' # Should be one of: # 'Development Status :: 3 - Alpha' # 'Development Status :: 4 - Beta' From da433bc2f951c1da40a4b76b303769a6f80e3503 Mon Sep 17 00:00:00 2001 From: chemelnucfin Date: Fri, 23 Mar 2018 14:37:30 -0700 Subject: [PATCH 0413/2016] StringIO to BytesIO (#5101) --- packages/google-cloud-bigquery/tests/system.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/packages/google-cloud-bigquery/tests/system.py b/packages/google-cloud-bigquery/tests/system.py index 9e7d8bd95694..7d876e32f676 100644 --- a/packages/google-cloud-bigquery/tests/system.py +++ b/packages/google-cloud-bigquery/tests/system.py @@ -381,7 +381,7 @@ def _create_table_many_columns(self, rows): config = bigquery.LoadJobConfig() config.schema = schema job = Config.CLIENT.load_table_from_file( - six.StringIO(body), table_ref, job_config=config) + six.BytesIO(body), table_ref, job_config=config) job.result() return bigquery.Table(table_ref, schema=schema) @@ -1707,7 +1707,7 @@ def test_nested_table_to_dataframe(self): {'string_col': 'Some value', 'record_col': record}, ] rows = [json.dumps(row) for row in to_insert] - body = six.StringIO('{}\n'.format('\n'.join(rows))) + body = six.BytesIO('{}\n'.format('\n'.join(rows))) table_id = 'test_table' dataset = self.temp_dataset(_make_dataset_id('nested_df')) table = dataset.table(table_id) From f773f237ea0a06c55ab8444379477a9685dc5ae6 Mon Sep 17 00:00:00 2001 From: chemelnucfin Date: Fri, 23 Mar 2018 14:38:04 -0700 Subject: [PATCH 0414/2016] Bigquery: adds `page_size` parameter for `list_rows` and use in DB-API for `arraysize` (#4931) --- .../google/cloud/bigquery/client.py | 9 +++-- .../google/cloud/bigquery/dbapi/cursor.py | 5 +-- .../google/cloud/bigquery/table.py | 20 +++++++++-- .../google-cloud-bigquery/tests/system.py | 36 +++++++++++++++++++ .../tests/unit/test_table.py | 23 ++++++++++++ 5 files changed, 87 insertions(+), 6 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py index 77214529a697..4d298958cff9 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py @@ -1247,7 +1247,8 @@ def create_rows_json(self, *args, **kwargs): return self.insert_rows_json(*args, **kwargs) def list_rows(self, table, selected_fields=None, max_results=None, - page_token=None, start_index=None, retry=DEFAULT_RETRY): + page_token=None, start_index=None, page_size=None, + retry=DEFAULT_RETRY): """List the rows of the table. See @@ -1287,6 +1288,10 @@ def list_rows(self, table, selected_fields=None, max_results=None, :param start_index: (Optional) The zero-based index of the starting row to read. + :type page_size: int + :param page_size: (Optional) The maximum number of items to return + per page in the iterator. + :type retry: :class:`google.api_core.retry.Retry` :param retry: (Optional) How to retry the RPC. @@ -1314,7 +1319,6 @@ def list_rows(self, table, selected_fields=None, max_results=None, if selected_fields is not None: params['selectedFields'] = ','.join( field.name for field in selected_fields) - if start_index is not None: params['startIndex'] = start_index @@ -1325,6 +1329,7 @@ def list_rows(self, table, selected_fields=None, max_results=None, schema=schema, page_token=page_token, max_results=max_results, + page_size=page_size, extra_params=params) return row_iterator diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/dbapi/cursor.py b/packages/google-cloud-bigquery/google/cloud/bigquery/dbapi/cursor.py index 5df7117d5e8b..c550287ecc4b 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/dbapi/cursor.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/dbapi/cursor.py @@ -190,10 +190,11 @@ def _try_fetch(self, size=None): if self._query_data is None: client = self.connection._client - # TODO(tswast): pass in page size to list_rows based on arraysize rows_iter = client.list_rows( self._query_job.destination, - selected_fields=self._query_job._query_results.schema) + selected_fields=self._query_job._query_results.schema, + page_size=self.arraysize + ) self._query_data = iter(rows_iter) def fetchone(self): diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py index cf7881f3af13..6d16ff606a60 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py @@ -1148,14 +1148,15 @@ class RowIterator(HTTPIterator): path (str): The method path to query for the list of items. page_token (str): A token identifying a page in a result set to start fetching results from. - max_results (int): The maximum number of results to fetch. + max_results (int): (Optional) The maximum number of results to fetch. + page_size (int): (Optional) The number of items to return per page. extra_params (dict): Extra query string parameters for the API call. .. autoattribute:: pages """ def __init__(self, client, api_request, path, schema, page_token=None, - max_results=None, extra_params=None): + max_results=None, page_size=None, extra_params=None): super(RowIterator, self).__init__( client, api_request, path, item_to_value=_item_to_row, items_key='rows', page_token=page_token, max_results=max_results, @@ -1164,6 +1165,21 @@ def __init__(self, client, api_request, path, schema, page_token=None, self._schema = schema self._field_to_index = _field_to_index_mapping(schema) self._total_rows = None + self._page_size = page_size + + def _get_next_page_response(self): + """Requests the next page from the path provided. + + Returns: + dict: The parsed JSON response of the next page's contents. + """ + params = self._get_query_params() + if self._page_size is not None: + params['maxResults'] = self._page_size + return self.api_request( + method=self._HTTP_METHOD, + path=self.path, + query_params=params) @property def schema(self): diff --git a/packages/google-cloud-bigquery/tests/system.py b/packages/google-cloud-bigquery/tests/system.py index 7d876e32f676..68b068daa3f6 100644 --- a/packages/google-cloud-bigquery/tests/system.py +++ b/packages/google-cloud-bigquery/tests/system.py @@ -1737,6 +1737,42 @@ def test_nested_table_to_dataframe(self): row['record_col']['nested_record']['nested_nested_string'], 'some deep insight') + def test_list_rows_page_size(self): + from google.cloud.bigquery.job import SourceFormat + from google.cloud.bigquery.job import WriteDisposition + + num_items = 7 + page_size = 3 + num_pages, num_last_page = divmod(num_items, page_size) + + SF = bigquery.SchemaField + schema = [SF('string_col', 'STRING', mode='NULLABLE')] + to_insert = [{'string_col': 'item%d' % i} for i in range(num_items)] + rows = [json.dumps(row) for row in to_insert] + body = six.BytesIO('{}\n'.format('\n'.join(rows))) + + table_id = 'test_table' + dataset = self.temp_dataset(_make_dataset_id('nested_df')) + table = dataset.table(table_id) + self.to_delete.insert(0, table) + job_config = bigquery.LoadJobConfig() + job_config.write_disposition = WriteDisposition.WRITE_TRUNCATE + job_config.source_format = SourceFormat.NEWLINE_DELIMITED_JSON + job_config.schema = schema + # Load a table using a local JSON file from memory. + Config.CLIENT.load_table_from_file( + body, table, job_config=job_config).result() + + df = Config.CLIENT.list_rows( + table, selected_fields=schema, page_size=page_size) + pages = df.pages + + for i in range(num_pages): + page = next(pages) + self.assertEqual(page.num_items, page_size) + page = next(pages) + self.assertEqual(page.num_items, num_last_page) + def temp_dataset(self, dataset_id, location=None): dataset = Dataset(Config.CLIENT.dataset(dataset_id)) if location: diff --git a/packages/google-cloud-bigquery/tests/unit/test_table.py b/packages/google-cloud-bigquery/tests/unit/test_table.py index 443b65c4910d..e703b1c9eef5 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_table.py +++ b/packages/google-cloud-bigquery/tests/unit/test_table.py @@ -1071,6 +1071,29 @@ def test_iterate(self): api_request.assert_called_once_with( method='GET', path=path, query_params={}) + def test_page_size(self): + from google.cloud.bigquery.table import RowIterator + from google.cloud.bigquery.table import SchemaField + + schema = [ + SchemaField('name', 'STRING', mode='REQUIRED'), + SchemaField('age', 'INTEGER', mode='REQUIRED') + ] + rows = [ + {'f': [{'v': 'Phred Phlyntstone'}, {'v': '32'}]}, + {'f': [{'v': 'Bharney Rhubble'}, {'v': '33'}]}, + ] + path = '/foo' + api_request = mock.Mock(return_value={'rows': rows}) + + row_iterator = RowIterator( + mock.sentinel.client, api_request, path, schema, page_size=4) + row_iterator._get_next_page_response() + + api_request.assert_called_once_with( + method='GET', path=path, query_params={ + 'maxResults': row_iterator._page_size}) + @unittest.skipIf(pandas is None, 'Requires `pandas`') def test_to_dataframe(self): from google.cloud.bigquery.table import RowIterator From e52cb3a96df31244dd989dfb3aba5d14a62f11c6 Mon Sep 17 00:00:00 2001 From: Yoshihiro MIYAI Date: Tue, 27 Mar 2018 04:25:28 +0900 Subject: [PATCH 0415/2016] Make SchemaField be able to include description via from_api_repr method (#5114) --- packages/google-cloud-bigquery/google/cloud/bigquery/schema.py | 2 ++ packages/google-cloud-bigquery/tests/unit/test_schema.py | 3 +++ 2 files changed, 5 insertions(+) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/schema.py b/packages/google-cloud-bigquery/google/cloud/bigquery/schema.py index e4ad5ce850a6..5f566025750c 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/schema.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/schema.py @@ -58,11 +58,13 @@ def from_api_repr(cls, api_repr): """ # Handle optional properties with default values mode = api_repr.get('mode', 'NULLABLE') + description = api_repr.get('description') fields = api_repr.get('fields', ()) return cls( field_type=api_repr['type'].upper(), fields=[cls.from_api_repr(f) for f in fields], mode=mode.upper(), + description=description, name=api_repr['name'], ) diff --git a/packages/google-cloud-bigquery/tests/unit/test_schema.py b/packages/google-cloud-bigquery/tests/unit/test_schema.py index d2574b33e420..6be6abeb56dc 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_schema.py +++ b/packages/google-cloud-bigquery/tests/unit/test_schema.py @@ -94,12 +94,14 @@ def test_from_api_repr(self): 'type': 'integer', }], 'mode': 'required', + 'description': 'test_description', 'name': 'foo', 'type': 'record', }) self.assertEqual(field.name, 'foo') self.assertEqual(field.field_type, 'RECORD') self.assertEqual(field.mode, 'REQUIRED') + self.assertEqual(field.description, 'test_description') self.assertEqual(len(field.fields), 1) self.assertEqual(field.fields[0].name, 'bar') self.assertEqual(field.fields[0].field_type, 'INTEGER') @@ -113,6 +115,7 @@ def test_from_api_repr_defaults(self): self.assertEqual(field.name, 'foo') self.assertEqual(field.field_type, 'RECORD') self.assertEqual(field.mode, 'NULLABLE') + self.assertEqual(field.description, None) self.assertEqual(len(field.fields), 0) def test_name_property(self): From a1f83b50996454ec2bcb1c77456a08da2a0f548e Mon Sep 17 00:00:00 2001 From: Tres Seaver Date: Mon, 26 Mar 2018 17:54:53 -0400 Subject: [PATCH 0416/2016] Use explicit bytes to initialize 'BytesIO'. (#5116) Fixes system test failures on Py3k. --- packages/google-cloud-bigquery/tests/system.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/packages/google-cloud-bigquery/tests/system.py b/packages/google-cloud-bigquery/tests/system.py index 68b068daa3f6..60d73ddd29a3 100644 --- a/packages/google-cloud-bigquery/tests/system.py +++ b/packages/google-cloud-bigquery/tests/system.py @@ -381,7 +381,7 @@ def _create_table_many_columns(self, rows): config = bigquery.LoadJobConfig() config.schema = schema job = Config.CLIENT.load_table_from_file( - six.BytesIO(body), table_ref, job_config=config) + six.BytesIO(body.encode('ascii')), table_ref, job_config=config) job.result() return bigquery.Table(table_ref, schema=schema) @@ -1707,7 +1707,7 @@ def test_nested_table_to_dataframe(self): {'string_col': 'Some value', 'record_col': record}, ] rows = [json.dumps(row) for row in to_insert] - body = six.BytesIO('{}\n'.format('\n'.join(rows))) + body = six.BytesIO('{}\n'.format('\n'.join(rows)).encode('ascii')) table_id = 'test_table' dataset = self.temp_dataset(_make_dataset_id('nested_df')) table = dataset.table(table_id) @@ -1749,7 +1749,7 @@ def test_list_rows_page_size(self): schema = [SF('string_col', 'STRING', mode='NULLABLE')] to_insert = [{'string_col': 'item%d' % i} for i in range(num_items)] rows = [json.dumps(row) for row in to_insert] - body = six.BytesIO('{}\n'.format('\n'.join(rows))) + body = six.BytesIO('{}\n'.format('\n'.join(rows)).encode('ascii')) table_id = 'test_table' dataset = self.temp_dataset(_make_dataset_id('nested_df')) From c408ced74dbd3e29617f3eca135519778b497353 Mon Sep 17 00:00:00 2001 From: Alix Hamilton Date: Wed, 28 Mar 2018 12:06:50 -0700 Subject: [PATCH 0417/2016] BigQuery: Removes _ApiResourceProperty class (#5107) * BigQuery: Removes ApiResourceProperty * Moves conversions into properties and refactors per comments * fixes coverage for setting schema to None * handles empty schema --- .../google/cloud/bigquery/_helpers.py | 103 --- .../google/cloud/bigquery/external_config.py | 589 ++++++++++++------ .../tests/unit/test__helpers.py | 131 ---- .../tests/unit/test_external_config.py | 207 +++++- 4 files changed, 561 insertions(+), 469 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py b/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py index e094f6d666f9..509b0455bfc3 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py @@ -314,109 +314,6 @@ def _snake_to_camel_case(value): return words[0] + ''.join(map(str.capitalize, words[1:])) -class _ApiResourceProperty(object): - """Base property implementation. - - Values will be stored on a `_properties` helper attribute of the - property's job instance. - - :type name: str - :param name: name of the property - - :type resource_name: str - :param resource_name: name of the property in the resource dictionary - """ - - def __init__(self, name, resource_name): - self.name = name - self.resource_name = resource_name - - def __get__(self, instance, owner): - """Descriptor protocol: accessor""" - if instance is None: - return self - return instance._properties.get(self.resource_name) - - def _validate(self, value): - """Subclasses override to impose validation policy.""" - raise NotImplementedError("Abstract") - - def __set__(self, instance, value): - """Descriptor protocol: mutator""" - self._validate(value) - instance._properties[self.resource_name] = value - - def __delete__(self, instance): - """Descriptor protocol: deleter""" - del instance._properties[self.resource_name] - - -class _TypedApiResourceProperty(_ApiResourceProperty): - """Property implementation: validates based on value type. - - :type name: str - :param name: name of the property - - :type resource_name: str - :param resource_name: name of the property in the resource dictionary - - :type property_type: type or sequence of types - :param property_type: type to be validated - """ - def __init__(self, name, resource_name, property_type): - super(_TypedApiResourceProperty, self).__init__( - name, resource_name) - self.property_type = property_type - - def _validate(self, value): - """Ensure that 'value' is of the appropriate type. - - :raises: ValueError on a type mismatch. - """ - if value is None: - return - if not isinstance(value, self.property_type): - raise ValueError('Required type: %s' % (self.property_type,)) - - -class _ListApiResourceProperty(_ApiResourceProperty): - """Property implementation: validates based on value type. - - :type name: str - :param name: name of the property - - :type resource_name: str - :param resource_name: name of the property in the resource dictionary - - :type property_type: type or sequence of types - :param property_type: type to be validated - """ - def __init__(self, name, resource_name, property_type): - super(_ListApiResourceProperty, self).__init__( - name, resource_name) - self.property_type = property_type - - def __get__(self, instance, owner): - """Descriptor protocol: accessor""" - if instance is None: - return self - return instance._properties.get(self.resource_name, []) - - def _validate(self, value): - """Ensure that 'value' is of the appropriate type. - - :raises: ValueError on a type mismatch. - """ - if value is None: - raise ValueError(( - 'Required type: list of {}. ' - 'To unset, use del or set to empty list').format( - self.property_type,)) - if not all(isinstance(item, self.property_type) for item in value): - raise ValueError( - 'Required type: list of %s' % (self.property_type,)) - - def _item_to_row(iterator, resource): """Convert a JSON row to the native object. diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/external_config.py b/packages/google-cloud-bigquery/google/cloud/bigquery/external_config.py index 17e78300b349..bd29e35dd9a9 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/external_config.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/external_config.py @@ -23,16 +23,10 @@ import base64 import copy -import six - from google.cloud.bigquery._helpers import _to_bytes from google.cloud.bigquery._helpers import _bytes_to_json -from google.cloud.bigquery._helpers import _TypedApiResourceProperty -from google.cloud.bigquery._helpers import _ListApiResourceProperty from google.cloud.bigquery._helpers import _int_or_none from google.cloud.bigquery.schema import SchemaField -from google.cloud.bigquery.table import _build_schema_resource -from google.cloud.bigquery.table import _parse_schema_resource class BigtableColumn(object): @@ -41,49 +35,97 @@ class BigtableColumn(object): def __init__(self): self._properties = {} - encoding = _TypedApiResourceProperty( - 'encoding', 'encoding', six.string_types) - """See - https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query.tableDefinitions.%28key%29.bigtableOptions.columnFamilies.columns.encoding - https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#externalDataConfiguration.bigtableOptions.columnFamilies.columns.encoding - """ + @property + def encoding(self): + """str: The encoding of the values when the type is not `STRING` - field_name = _TypedApiResourceProperty( - 'field_name', 'fieldName', six.string_types) - """See - https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query.tableDefinitions.%28key%29.bigtableOptions.columnFamilies.columns.fieldName - https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#externalDataConfiguration.bigtableOptions.columnFamilies.columns.fieldName - """ + See + https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query.tableDefinitions.%28key%29.bigtableOptions.columnFamilies.columns.encoding + https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#externalDataConfiguration.bigtableOptions.columnFamilies.columns.encoding + """ + return self._properties.get('encoding') - only_read_latest = _TypedApiResourceProperty( - 'only_read_latest', 'onlyReadLatest', bool) - """See - https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query.tableDefinitions.%28key%29.bigtableOptions.columnFamilies.columns.onlyReadLatest - https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#externalDataConfiguration.bigtableOptions.columnFamilies.columns.onlyReadLatest - """ + @encoding.setter + def encoding(self, value): + self._properties['encoding'] = value - qualifier_encoded = _TypedApiResourceProperty( - 'qualifier_encoded', 'qualifierEncoded', six.binary_type) - """The qualifier encoded in binary. The type is ``str`` (Python 2.x) or - ``bytes`` (Python 3.x). The module will handle base64 encoding for you. + @property + def field_name(self): + """str: An identifier to use if the qualifier is not a valid BigQuery + field identifier - See - https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query.tableDefinitions.%28key%29.bigtableOptions.columnFamilies.columns.qualifierEncoded - https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#externalDataConfiguration.bigtableOptions.columnFamilies.columns.qualifierEncoded - """ + See + https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query.tableDefinitions.%28key%29.bigtableOptions.columnFamilies.columns.fieldName + https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#externalDataConfiguration.bigtableOptions.columnFamilies.columns.fieldName + """ + return self._properties.get('fieldName') - qualifier_string = _TypedApiResourceProperty( - 'qualifier_string', 'qualifierString', six.string_types) - """See - https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query.tableDefinitions.%28key%29.bigtableOptions.columnFamilies.columns.qualifierString - https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#externalDataConfiguration.bigtableOptions.columnFamilies.columns.qualifierString - """ + @field_name.setter + def field_name(self, value): + self._properties['fieldName'] = value - type_ = _TypedApiResourceProperty('type_', 'type', six.string_types) - """See - https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query.tableDefinitions.%28key%29.bigtableOptions.columnFamilies.columns.type - https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#externalDataConfiguration.bigtableOptions.columnFamilies.columns.type - """ + @property + def only_read_latest(self): + """bool: If this is set, only the latest version of value in this + column are exposed. + + See + https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query.tableDefinitions.%28key%29.bigtableOptions.columnFamilies.columns.onlyReadLatest + https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#externalDataConfiguration.bigtableOptions.columnFamilies.columns.onlyReadLatest + """ + return self._properties.get('onlyReadLatest') + + @only_read_latest.setter + def only_read_latest(self, value): + self._properties['onlyReadLatest'] = value + + @property + def qualifier_encoded(self): + """Union[str, bytes]: The qualifier encoded in binary. + + The type is ``str`` (Python 2.x) or ``bytes`` (Python 3.x). The module + will handle base64 encoding for you. + + See + https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query.tableDefinitions.%28key%29.bigtableOptions.columnFamilies.columns.qualifierEncoded + https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#externalDataConfiguration.bigtableOptions.columnFamilies.columns.qualifierEncoded + """ + prop = self._properties.get('qualifierEncoded') + if prop is None: + return None + return base64.standard_b64decode(_to_bytes(prop)) + + @qualifier_encoded.setter + def qualifier_encoded(self, value): + self._properties['qualifierEncoded'] = _bytes_to_json(value) + + @property + def qualifier_string(self): + """str: A valid UTF-8 string qualifier + + See + https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query.tableDefinitions.%28key%29.bigtableOptions.columnFamilies.columns.qualifierEncoded + https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#externalDataConfiguration.bigtableOptions.columnFamilies.columns.qualifierEncoded + """ + return self._properties.get('qualifierString') + + @qualifier_string.setter + def qualifier_string(self, value): + self._properties['qualifierString'] = value + + @property + def type_(self): + """str: The type to convert the value in cells of this column. + + See + https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query.tableDefinitions.%28key%29.bigtableOptions.columnFamilies.columns.type + https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#externalDataConfiguration.bigtableOptions.columnFamilies.columns.type + """ + return self._properties.get('type') + + @type_.setter + def type_(self, value): + self._properties['type'] = value def to_api_repr(self): """Build an API representation of this object. @@ -91,11 +133,7 @@ def to_api_repr(self): :rtype: dict :returns: A dictionary in the format used by the BigQuery API. """ - config = copy.deepcopy(self._properties) - qe = config.get('qualifierEncoded') - if qe is not None: - config['qualifierEncoded'] = _bytes_to_json(qe) - return config + return copy.deepcopy(self._properties) @classmethod def from_api_repr(cls, resource): @@ -110,9 +148,6 @@ def from_api_repr(cls, resource): """ config = cls() config._properties = copy.deepcopy(resource) - qe = resource.get('qualifierEncoded') - if qe: - config.qualifier_encoded = base64.standard_b64decode(_to_bytes(qe)) return config @@ -122,39 +157,78 @@ class BigtableColumnFamily(object): def __init__(self): self._properties = {} - encoding = _TypedApiResourceProperty( - 'encoding', 'encoding', six.string_types) - """See - https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query.tableDefinitions.(key).bigtableOptions.columnFamilies.encoding - https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#externalDataConfiguration.bigtableOptions.columnFamilies.encoding - """ + @property + def encoding(self): + """str: The encoding of the values when the type is not `STRING` - family_id = _TypedApiResourceProperty( - 'family_id', 'familyId', six.string_types) - """See - https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query.tableDefinitions.(key).bigtableOptions.columnFamilies.familyId - https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#externalDataConfiguration.bigtableOptions.columnFamilies.familyId - """ + See + https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query.tableDefinitions.(key).bigtableOptions.columnFamilies.encoding + https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#externalDataConfiguration.bigtableOptions.columnFamilies.encoding + """ + return self._properties.get('encoding') - only_read_latest = _TypedApiResourceProperty( - 'only_read_latest', 'onlyReadLatest', bool) - """See - https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query.tableDefinitions.(key).bigtableOptions.columnFamilies.onlyReadLatest - https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#externalDataConfiguration.bigtableOptions.columnFamilies.onlyReadLatest - """ + @encoding.setter + def encoding(self, value): + self._properties['encoding'] = value - type_ = _TypedApiResourceProperty('type_', 'type', six.string_types) - """See - https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query.tableDefinitions.(key).bigtableOptions.columnFamilies.type - https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#externalDataConfiguration.bigtableOptions.columnFamilies.type - """ + @property + def family_id(self): + """str: Identifier of the column family. - columns = _ListApiResourceProperty( - 'columns', 'columns', BigtableColumn) - """See - https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query.tableDefinitions.(key).bigtableOptions.columnFamilies.columns - https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#externalDataConfiguration.bigtableOptions.columnFamilies.columns - """ + See + https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query.tableDefinitions.(key).bigtableOptions.columnFamilies.familyId + https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#externalDataConfiguration.bigtableOptions.columnFamilies.familyId + """ + return self._properties.get('familyId') + + @family_id.setter + def family_id(self, value): + self._properties['familyId'] = value + + @property + def only_read_latest(self): + """bool: If this is set only the latest version of value are exposed + for all columns in this column family. + + See + https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query.tableDefinitions.(key).bigtableOptions.columnFamilies.onlyReadLatest + https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#externalDataConfiguration.bigtableOptions.columnFamilies.onlyReadLatest + """ + return self._properties.get('onlyReadLatest') + + @only_read_latest.setter + def only_read_latest(self, value): + self._properties['onlyReadLatest'] = value + + @property + def type_(self): + """str: The type to convert the value in cells of this column family. + + See + https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query.tableDefinitions.(key).bigtableOptions.columnFamilies.type + https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#externalDataConfiguration.bigtableOptions.columnFamilies.type + """ + return self._properties.get('type') + + @type_.setter + def type_(self, value): + self._properties['type'] = value + + @property + def columns(self): + """List[google.cloud.bigquery.external_config.BigtableColumn]: Lists of + columns that should be exposed as individual fields + + See + https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query.tableDefinitions.(key).bigtableOptions.columnFamilies.columns + https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#externalDataConfiguration.bigtableOptions.columnFamilies.columns + """ + prop = self._properties.get('columns', []) + return [BigtableColumn.from_api_repr(col) for col in prop] + + @columns.setter + def columns(self, value): + self._properties['columns'] = [col.to_api_repr() for col in value] def to_api_repr(self): """Build an API representation of this object. @@ -162,9 +236,7 @@ def to_api_repr(self): :rtype: dict :returns: A dictionary in the format used by the BigQuery API. """ - config = copy.deepcopy(self._properties) - config['columns'] = [c.to_api_repr() for c in config['columns']] - return config + return copy.deepcopy(self._properties) @classmethod def from_api_repr(cls, resource): @@ -182,8 +254,6 @@ def from_api_repr(cls, resource): """ config = cls() config._properties = copy.deepcopy(resource) - config.columns = [BigtableColumn.from_api_repr(c) - for c in resource['columns']] return config @@ -197,27 +267,52 @@ class BigtableOptions(object): def __init__(self): self._properties = {} - ignore_unspecified_column_families = _TypedApiResourceProperty( - 'ignore_unspecified_column_families', - 'ignoreUnspecifiedColumnFamilies', bool) - """See - https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query.tableDefinitions.(key).bigtableOptions.ignoreUnspecifiedColumnFamilies - https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#externalDataConfiguration.bigtableOptions.ignoreUnspecifiedColumnFamilies - """ + @property + def ignore_unspecified_column_families(self): + """bool: If `True`, ignore columns not specified in columnFamilies + list. Defaults to `False`. - read_rowkey_as_string = _TypedApiResourceProperty( - 'read_rowkey_as_string', 'readRowkeyAsString', bool) - """See - https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query.tableDefinitions.(key).bigtableOptions.readRowkeyAsString - https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#externalDataConfiguration.bigtableOptions.readRowkeyAsString - """ + See + https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query.tableDefinitions.(key).bigtableOptions.ignoreUnspecifiedColumnFamilies + https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#externalDataConfiguration.bigtableOptions.ignoreUnspecifiedColumnFamilies + """ + return self._properties.get('ignoreUnspecifiedColumnFamilies') - column_families = _ListApiResourceProperty( - 'column_families', 'columnFamilies', BigtableColumnFamily) - """See - https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query.tableDefinitions.(key).bigtableOptions.columnFamilies - https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#externalDataConfiguration.bigtableOptions.columnFamilies - """ + @ignore_unspecified_column_families.setter + def ignore_unspecified_column_families(self, value): + self._properties['ignoreUnspecifiedColumnFamilies'] = value + + @property + def read_rowkey_as_string(self): + """bool: If `True`, rowkey column families will be read and converted + to string. Defaults to `False`. + + See + https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query.tableDefinitions.(key).bigtableOptions.readRowkeyAsString + https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#externalDataConfiguration.bigtableOptions.readRowkeyAsString + """ + return self._properties.get('readRowkeyAsString') + + @read_rowkey_as_string.setter + def read_rowkey_as_string(self, value): + self._properties['readRowkeyAsString'] = value + + @property + def column_families(self): + """List[google.cloud.bigquery.external_config.BigtableColumnFamily]: + List of column families to expose in the table schema along with their + types. + + See + https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query.tableDefinitions.(key).bigtableOptions.columnFamilies + https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#externalDataConfiguration.bigtableOptions.columnFamilies + """ + prop = self._properties.get('columnFamilies', []) + return [BigtableColumnFamily.from_api_repr(cf) for cf in prop] + + @column_families.setter + def column_families(self, value): + self._properties['columnFamilies'] = [cf.to_api_repr() for cf in value] def to_api_repr(self): """Build an API representation of this object. @@ -225,10 +320,7 @@ def to_api_repr(self): :rtype: dict :returns: A dictionary in the format used by the BigQuery API. """ - config = copy.deepcopy(self._properties) - config['columnFamilies'] = [cf.to_api_repr() - for cf in config['columnFamilies']] - return config + return copy.deepcopy(self._properties) @classmethod def from_api_repr(cls, resource): @@ -245,8 +337,6 @@ def from_api_repr(cls, resource): """ config = cls() config._properties = copy.deepcopy(resource) - config.column_families = [BigtableColumnFamily.from_api_repr(cf) - for cf in resource['columnFamilies']] return config @@ -259,47 +349,91 @@ class CSVOptions(object): def __init__(self): self._properties = {} - allow_jagged_rows = _TypedApiResourceProperty( - 'allow_jagged_rows', 'allowJaggedRows', bool) - """See - https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query.tableDefinitions.(key).csvOptions.allowJaggedRows - https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#externalDataConfiguration.csvOptions.allowJaggedRows - """ + @property + def allow_jagged_rows(self): + """bool: If `True`, BigQuery treats missing trailing columns as null + values. Defaults to `False`. - allow_quoted_newlines = _TypedApiResourceProperty( - 'allow_quoted_newlines', 'allowQuotedNewlines', bool) - """See - https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query.tableDefinitions.(key).csvOptions.allowQuotedNewlines - https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#externalDataConfiguration.csvOptions.allowQuotedNewlines - """ + See + https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query.tableDefinitions.(key).csvOptions.allowJaggedRows + https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#externalDataConfiguration.csvOptions.allowJaggedRows + """ + return self._properties.get('allowJaggedRows') - encoding = _TypedApiResourceProperty( - 'encoding', 'encoding', six.string_types) - """See - https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query.tableDefinitions.(key).csvOptions.encoding - https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#externalDataConfiguration.csvOptions.encoding - """ + @allow_jagged_rows.setter + def allow_jagged_rows(self, value): + self._properties['allowJaggedRows'] = value - field_delimiter = _TypedApiResourceProperty( - 'field_delimiter', 'fieldDelimiter', six.string_types) - """See - https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query.tableDefinitions.(key).csvOptions.fieldDelimiter - https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#externalDataConfiguration.csvOptions.fieldDelimiter - """ + @property + def allow_quoted_newlines(self): + """bool: If `True`, quoted data sections that contain newline + characters in a CSV file are allowed. Defaults to `False`. - quote_character = _TypedApiResourceProperty( - 'quote_character', 'quote', six.string_types) - """See - https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query.tableDefinitions.(key).csvOptions.quote - https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#externalDataConfiguration.csvOptions.quote - """ + See + https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query.tableDefinitions.(key).csvOptions.allowQuotedNewlines + https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#externalDataConfiguration.csvOptions.allowQuotedNewlines + """ + return self._properties.get('allowQuotedNewlines') - skip_leading_rows = _TypedApiResourceProperty( - 'skip_leading_rows', 'skipLeadingRows', six.integer_types) - """See - https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query.tableDefinitions.(key).csvOptions.skipLeadingRows - https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#externalDataConfiguration.csvOptions.skipLeadingRows - """ + @allow_quoted_newlines.setter + def allow_quoted_newlines(self, value): + self._properties['allowQuotedNewlines'] = value + + @property + def encoding(self): + """str: The character encoding of the data. + + See + https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query.tableDefinitions.(key).csvOptions.encoding + https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#externalDataConfiguration.csvOptions.encoding + """ + return self._properties.get('encoding') + + @encoding.setter + def encoding(self, value): + self._properties['encoding'] = value + + @property + def field_delimiter(self): + """str: The separator for fields in a CSV file. Defaults a comma (','). + + See + https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query.tableDefinitions.(key).csvOptions.fieldDelimiter + https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#externalDataConfiguration.csvOptions.fieldDelimiter + """ + return self._properties.get('fieldDelimiter') + + @field_delimiter.setter + def field_delimiter(self, value): + self._properties['fieldDelimiter'] = value + + @property + def quote_character(self): + """str: The value that is used to quote data sections in a CSV file. + + See + https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query.tableDefinitions.(key).csvOptions.quote + https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#externalDataConfiguration.csvOptions.quote + """ + return self._properties.get('quote') + + @quote_character.setter + def quote_character(self, value): + self._properties['quote'] = value + + @property + def skip_leading_rows(self): + """int: The number of rows at the top of a CSV file. + + See + https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query.tableDefinitions.(key).csvOptions.skipLeadingRows + https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#externalDataConfiguration.csvOptions.skipLeadingRows + """ + return _int_or_none(self._properties.get('skipLeadingRows')) + + @skip_leading_rows.setter + def skip_leading_rows(self, value): + self._properties['skipLeadingRows'] = str(value) def to_api_repr(self): """Build an API representation of this object. @@ -307,11 +441,7 @@ def to_api_repr(self): :rtype: dict :returns: A dictionary in the format used by the BigQuery API. """ - config = copy.deepcopy(self._properties) - slr = config.pop('skipLeadingRows', None) - if slr is not None: - config['skipLeadingRows'] = str(slr) - return config + return copy.deepcopy(self._properties) @classmethod def from_api_repr(cls, resource): @@ -325,10 +455,8 @@ def from_api_repr(cls, resource): :rtype: :class:`~google.cloud.bigquery.external_config.CSVOptions` :returns: Configuration parsed from ``resource``. """ - slr = resource.get('skipLeadingRows') config = cls() config._properties = copy.deepcopy(resource) - config.skip_leading_rows = _int_or_none(slr) return config @@ -341,12 +469,20 @@ class GoogleSheetsOptions(object): def __init__(self): self._properties = {} - skip_leading_rows = _TypedApiResourceProperty( - 'skip_leading_rows', 'skipLeadingRows', six.integer_types) - """See - https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query.tableDefinitions.(key).googleSheetsOptions.skipLeadingRows - https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#externalDataConfiguration.googleSheetsOptions.skipLeadingRows - """ + @property + def skip_leading_rows(self): + """int: The number of rows at the top of a sheet that BigQuery will + skip when reading the data. + + See + https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query.tableDefinitions.(key).googleSheetsOptions.skipLeadingRows + https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#externalDataConfiguration.googleSheetsOptions.skipLeadingRows + """ + return _int_or_none(self._properties.get('skipLeadingRows')) + + @skip_leading_rows.setter + def skip_leading_rows(self, value): + self._properties['skipLeadingRows'] = str(value) def to_api_repr(self): """Build an API representation of this object. @@ -354,11 +490,7 @@ def to_api_repr(self): :rtype: dict :returns: A dictionary in the format used by the BigQuery API. """ - config = copy.deepcopy(self._properties) - slr = config.pop('skipLeadingRows', None) - if slr is not None: - config['skipLeadingRows'] = str(slr) - return config + return copy.deepcopy(self._properties) @classmethod def from_api_repr(cls, resource): @@ -373,10 +505,8 @@ def from_api_repr(cls, resource): :class:`~google.cloud.bigquery.external_config.GoogleSheetsOptions` :returns: Configuration parsed from ``resource``. """ - slr = resource.get('skipLeadingRows') config = cls() config._properties = copy.deepcopy(resource) - config.skip_leading_rows = _int_or_none(slr) return config @@ -412,46 +542,98 @@ def options(self): """Source-specific options.""" return self._options - autodetect = _TypedApiResourceProperty( - 'autodetect', 'autodetect', bool) - """See - https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query.tableDefinitions.(key).autodetect - https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#externalDataConfiguration.autodetect - """ + @property + def autodetect(self): + """bool: If `True`, try to detect schema and format options + automatically. - compression = _TypedApiResourceProperty( - 'compression', 'compression', six.string_types) - """See - https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query.tableDefinitions.(key).compression - https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#externalDataConfiguration.compression - """ + See + https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query.tableDefinitions.(key).autodetect + https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#externalDataConfiguration.autodetect + """ + return self._properties.get('autodetect') - ignore_unknown_values = _TypedApiResourceProperty( - 'ignore_unknown_values', 'ignoreUnknownValues', bool) - """See - https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query.tableDefinitions.(key).ignoreUnknownValues - https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#externalDataConfiguration.ignoreUnknownValues - """ + @autodetect.setter + def autodetect(self, value): + self._properties['autodetect'] = value - max_bad_records = _TypedApiResourceProperty( - 'max_bad_records', 'maxBadRecords', six.integer_types) - """See - https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query.tableDefinitions.(key).maxBadRecords - https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#externalDataConfiguration.maxBadRecords - """ + @property + def compression(self): + """str: The compression type of the data source. - source_uris = _ListApiResourceProperty( - 'source_uris', 'sourceUris', six.string_types) - """See - https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query.tableDefinitions.(key).sourceUris - https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#externalDataConfiguration.sourceUris - """ + See + https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query.tableDefinitions.(key).compression + https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#externalDataConfiguration.compression + """ + return self._properties.get('compression') - schema = _ListApiResourceProperty('schema', 'schema', SchemaField) - """See - https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query.tableDefinitions.(key).schema - https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#externalDataConfiguration.schema - """ + @compression.setter + def compression(self, value): + self._properties['compression'] = value + + @property + def ignore_unknown_values(self): + """bool: If `True`, extra values that are not represented in the table + schema are ignored. Defaults to `False`. + + See + https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query.tableDefinitions.(key).ignoreUnknownValues + https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#externalDataConfiguration.ignoreUnknownValues + """ + return self._properties.get('ignoreUnknownValues') + + @ignore_unknown_values.setter + def ignore_unknown_values(self, value): + self._properties['ignoreUnknownValues'] = value + + @property + def max_bad_records(self): + """int: The maximum number of bad records that BigQuery can ignore when + reading data. + + See + https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query.tableDefinitions.(key).maxBadRecords + https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#externalDataConfiguration.maxBadRecords + """ + return self._properties.get('maxBadRecords') + + @max_bad_records.setter + def max_bad_records(self, value): + self._properties['maxBadRecords'] = value + + @property + def source_uris(self): + """List[str]: URIs that point to your data in Google Cloud. + + See + https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query.tableDefinitions.(key).sourceUris + https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#externalDataConfiguration.sourceUris + """ + return self._properties.get('sourceUris', []) + + @source_uris.setter + def source_uris(self, value): + self._properties['sourceUris'] = value + + @property + def schema(self): + """List[google.cloud.bigquery.schema.SchemaField]: The schema for the + data. + + See + https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query.tableDefinitions.(key).schema + https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#externalDataConfiguration.schema + """ + prop = self._properties.get('schema', {}) + return [SchemaField.from_api_repr(field) + for field in prop.get('fields', [])] + + @schema.setter + def schema(self, value): + prop = value + if value is not None: + prop = {'fields': [field.to_api_repr() for field in value]} + self._properties['schema'] = prop def to_api_repr(self): """Build an API representation of this object. @@ -460,8 +642,6 @@ def to_api_repr(self): :returns: A dictionary in the format used by the BigQuery API. """ config = copy.deepcopy(self._properties) - if self.schema: - config['schema'] = {'fields': _build_schema_resource(self.schema)} if self.options is not None: r = self.options.to_api_repr() if r != {}: @@ -481,13 +661,10 @@ def from_api_repr(cls, resource): :returns: Configuration parsed from ``resource``. """ config = cls(resource['sourceFormat']) - schema = resource.get('schema') for optcls in _OPTION_CLASSES: opts = resource.get(optcls._RESOURCE_NAME) if opts is not None: config._options = optcls.from_api_repr(opts) break config._properties = copy.deepcopy(resource) - if schema: - config.schema = _parse_schema_resource(schema) return config diff --git a/packages/google-cloud-bigquery/tests/unit/test__helpers.py b/packages/google-cloud-bigquery/tests/unit/test__helpers.py index 9039ea120b06..362e148b6b80 100644 --- a/packages/google-cloud-bigquery/tests/unit/test__helpers.py +++ b/packages/google-cloud-bigquery/tests/unit/test__helpers.py @@ -744,137 +744,6 @@ def test_w_camel_case_string(self): self.assertEqual(self._call_fut('friendlyName'), 'friendlyName') -class Test_TypedApiResourceProperty(unittest.TestCase): - - @staticmethod - def _get_target_class(): - from google.cloud.bigquery._helpers import _TypedApiResourceProperty - - return _TypedApiResourceProperty - - def _make_one(self, *args, **kw): - return self._get_target_class()(*args, **kw) - - def test_it(self): - - class Wrapper(object): - attr = self._make_one('attr', 'back', int) - - def __init__(self): - self._properties = {} - - self.assertIsNotNone(Wrapper.attr) - - wrapper = Wrapper() - with self.assertRaises(ValueError): - wrapper.attr = 'BOGUS' - - wrapper.attr = 42 - self.assertEqual(wrapper.attr, 42) - self.assertEqual(wrapper._properties['back'], 42) - - wrapper.attr = None - self.assertIsNone(wrapper.attr) - self.assertIsNone(wrapper._properties['back']) - - wrapper.attr = 23 - self.assertEqual(wrapper.attr, 23) - self.assertEqual(wrapper._properties['back'], 23) - - del wrapper.attr - self.assertIsNone(wrapper.attr) - with self.assertRaises(KeyError): - wrapper._properties['back'] - - -class Test_ListApiResourceProperty(unittest.TestCase): - - @staticmethod - def _get_target_class(): - from google.cloud.bigquery._helpers import _ListApiResourceProperty - - return _ListApiResourceProperty - - def _make_one(self, *args, **kw): - return self._get_target_class()(*args, **kw) - - def _descriptor_and_klass(self): - from google.cloud.bigquery.query import _AbstractQueryParameter - - descriptor = self._make_one( - 'query_parameters', 'queryParameters', _AbstractQueryParameter) - - class _Test(object): - def __init__(self): - self._properties = {} - - query_parameters = descriptor - - return descriptor, _Test - - def test_class_getter(self): - descriptor, klass = self._descriptor_and_klass() - self.assertIs(klass.query_parameters, descriptor) - - def test_instance_getter_empty(self): - _, klass = self._descriptor_and_klass() - instance = klass() - self.assertEqual(instance.query_parameters, []) - - def test_instance_getter_w_non_empty_list(self): - from google.cloud.bigquery.query import ScalarQueryParameter - - query_parameters = [ScalarQueryParameter("foo", 'INT64', 123)] - _, klass = self._descriptor_and_klass() - instance = klass() - instance._properties['queryParameters'] = query_parameters - - self.assertEqual(instance.query_parameters, query_parameters) - - def test_instance_setter_w_empty_list(self): - from google.cloud.bigquery.query import ScalarQueryParameter - - query_parameters = [ScalarQueryParameter("foo", 'INT64', 123)] - _, klass = self._descriptor_and_klass() - instance = klass() - instance._query_parameters = query_parameters - - instance.query_parameters = [] - - self.assertEqual(instance.query_parameters, []) - - def test_instance_setter_w_none(self): - from google.cloud.bigquery.query import ScalarQueryParameter - - query_parameters = [ScalarQueryParameter("foo", 'INT64', 123)] - _, klass = self._descriptor_and_klass() - instance = klass() - instance._query_parameters = query_parameters - - with self.assertRaises(ValueError): - instance.query_parameters = None - - def test_instance_setter_w_valid_udf(self): - from google.cloud.bigquery.query import ScalarQueryParameter - - query_parameters = [ScalarQueryParameter("foo", 'INT64', 123)] - _, klass = self._descriptor_and_klass() - instance = klass() - - instance.query_parameters = query_parameters - - self.assertEqual(instance.query_parameters, query_parameters) - - def test_instance_setter_w_bad_udfs(self): - _, klass = self._descriptor_and_klass() - instance = klass() - - with self.assertRaises(ValueError): - instance.query_parameters = ["foo"] - - self.assertEqual(instance.query_parameters, []) - - class _Field(object): def __init__(self, mode, name='unknown', field_type='UNKNOWN', fields=()): diff --git a/packages/google-cloud-bigquery/tests/unit/test_external_config.py b/packages/google-cloud-bigquery/tests/unit/test_external_config.py index 51c49f6bdf04..480645122554 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_external_config.py +++ b/packages/google-cloud-bigquery/tests/unit/test_external_config.py @@ -16,7 +16,8 @@ import copy import unittest -from google.cloud.bigquery.external_config import ExternalConfig +from google.cloud.bigquery import external_config +from google.cloud.bigquery import schema class TestExternalConfig(unittest.TestCase): @@ -32,11 +33,9 @@ class TestExternalConfig(unittest.TestCase): 'compression': 'compression', } - def test_api_repr_base(self): - from google.cloud.bigquery.schema import SchemaField - + def test_from_api_repr_base(self): resource = copy.deepcopy(self.BASE_RESOURCE) - ec = ExternalConfig.from_api_repr(resource) + ec = external_config.ExternalConfig.from_api_repr(resource) self._verify_base(ec) self.assertEqual(ec.schema, []) self.assertIsNone(ec.options) @@ -56,15 +55,64 @@ def test_api_repr_base(self): ], }, }) - ec = ExternalConfig.from_api_repr(resource) + ec = external_config.ExternalConfig.from_api_repr(resource) self._verify_base(ec) - self.assertEqual(ec.schema, - [SchemaField('full_name', 'STRING', mode='REQUIRED')]) + exp_schema = [ + schema.SchemaField('full_name', 'STRING', mode='REQUIRED') + ] + self.assertEqual(ec.schema, exp_schema) self.assertIsNone(ec.options) got_resource = ec.to_api_repr() self.assertEqual(got_resource, resource) + def test_to_api_repr_base(self): + ec = external_config.ExternalConfig('') + ec.source_uris = self.SOURCE_URIS + ec.max_bad_records = 17 + ec.autodetect = True + ec.ignore_unknown_values = False + ec.compression = 'compression' + ec.schema = [ + schema.SchemaField('full_name', 'STRING', mode='REQUIRED') + ] + + exp_schema = { + 'fields': [ + { + 'name': 'full_name', + 'type': 'STRING', + 'mode': 'REQUIRED', + 'description': None, + }, + ] + } + got_resource = ec.to_api_repr() + exp_resource = { + 'sourceFormat': '', + 'sourceUris': self.SOURCE_URIS, + 'maxBadRecords': 17, + 'autodetect': True, + 'ignoreUnknownValues': False, + 'compression': 'compression', + 'schema': exp_schema + } + self.assertEqual(got_resource, exp_resource) + + def test_schema_None(self): + ec = external_config.ExternalConfig('') + ec.schema = None + got = ec.to_api_repr() + want = {'sourceFormat': '', 'schema': None} + self.assertEqual(got, want) + + def test_schema_empty(self): + ec = external_config.ExternalConfig('') + ec.schema = [] + got = ec.to_api_repr() + want = {'sourceFormat': '', 'schema': {'fields': []}} + self.assertEqual(got, want) + def _verify_base(self, ec): self.assertEqual(ec.autodetect, True) self.assertEqual(ec.compression, 'compression') @@ -73,24 +121,22 @@ def _verify_base(self, ec): self.assertEqual(ec.source_uris, self.SOURCE_URIS) def test_to_api_repr_source_format(self): - ec = ExternalConfig('CSV') + ec = external_config.ExternalConfig('CSV') got = ec.to_api_repr() want = {'sourceFormat': 'CSV'} self.assertEqual(got, want) - def test_api_repr_sheets(self): - from google.cloud.bigquery.external_config import GoogleSheetsOptions - + def test_from_api_repr_sheets(self): resource = _copy_and_update(self.BASE_RESOURCE, { 'sourceFormat': 'GOOGLE_SHEETS', 'googleSheetsOptions': {'skipLeadingRows': '123'}, - }) + }) - ec = ExternalConfig.from_api_repr(resource) + ec = external_config.ExternalConfig.from_api_repr(resource) self._verify_base(ec) self.assertEqual(ec.source_format, 'GOOGLE_SHEETS') - self.assertIsInstance(ec.options, GoogleSheetsOptions) + self.assertIsInstance(ec.options, external_config.GoogleSheetsOptions) self.assertEqual(ec.options.skip_leading_rows, 123) got_resource = ec.to_api_repr() @@ -98,14 +144,27 @@ def test_api_repr_sheets(self): self.assertEqual(got_resource, resource) del resource['googleSheetsOptions']['skipLeadingRows'] - ec = ExternalConfig.from_api_repr(resource) + ec = external_config.ExternalConfig.from_api_repr(resource) self.assertIsNone(ec.options.skip_leading_rows) got_resource = ec.to_api_repr() self.assertEqual(got_resource, resource) - def test_api_repr_csv(self): - from google.cloud.bigquery.external_config import CSVOptions + def test_to_api_repr_sheets(self): + ec = external_config.ExternalConfig('GOOGLE_SHEETS') + options = external_config.GoogleSheetsOptions() + options.skip_leading_rows = 123 + ec._options = options + + exp_resource = { + 'sourceFormat': 'GOOGLE_SHEETS', + 'googleSheetsOptions': {'skipLeadingRows': '123'}, + } + + got_resource = ec.to_api_repr() + + self.assertEqual(got_resource, exp_resource) + def test_from_api_repr_csv(self): resource = _copy_and_update(self.BASE_RESOURCE, { 'sourceFormat': 'CSV', 'csvOptions': { @@ -118,11 +177,11 @@ def test_api_repr_csv(self): }, }) - ec = ExternalConfig.from_api_repr(resource) + ec = external_config.ExternalConfig.from_api_repr(resource) self._verify_base(ec) self.assertEqual(ec.source_format, 'CSV') - self.assertIsInstance(ec.options, CSVOptions) + self.assertIsInstance(ec.options, external_config.CSVOptions) self.assertEqual(ec.options.field_delimiter, 'fieldDelimiter') self.assertEqual(ec.options.skip_leading_rows, 123) self.assertEqual(ec.options.quote_character, 'quote') @@ -135,16 +194,40 @@ def test_api_repr_csv(self): self.assertEqual(got_resource, resource) del resource['csvOptions']['skipLeadingRows'] - ec = ExternalConfig.from_api_repr(resource) + ec = external_config.ExternalConfig.from_api_repr(resource) self.assertIsNone(ec.options.skip_leading_rows) got_resource = ec.to_api_repr() self.assertEqual(got_resource, resource) - def test_api_repr_bigtable(self): - from google.cloud.bigquery.external_config import BigtableOptions - from google.cloud.bigquery.external_config import BigtableColumnFamily + def test_to_api_repr_csv(self): + ec = external_config.ExternalConfig('CSV') + options = external_config.CSVOptions() + options.allow_quoted_newlines = True + options.encoding = 'encoding' + options.field_delimiter = 'fieldDelimiter' + options.quote_character = 'quote' + options.skip_leading_rows = 123 + options.allow_jagged_rows = False + ec._options = options + + exp_resource = { + 'sourceFormat': 'CSV', + 'csvOptions': { + 'fieldDelimiter': 'fieldDelimiter', + 'skipLeadingRows': '123', + 'quote': 'quote', + 'allowQuotedNewlines': True, + 'allowJaggedRows': False, + 'encoding': 'encoding', + }, + } + + got_resource = ec.to_api_repr() - QUALIFIER_ENCODED = base64.standard_b64encode(b'q').decode('ascii') + self.assertEqual(got_resource, exp_resource) + + def test_from_api_repr_bigtable(self): + qualifier_encoded = base64.standard_b64encode(b'q').decode('ascii') resource = _copy_and_update(self.BASE_RESOURCE, { 'sourceFormat': 'BIGTABLE', 'bigtableOptions': { @@ -164,7 +247,7 @@ def test_api_repr_bigtable(self): 'onlyReadLatest': True, }, { - 'qualifierEncoded': QUALIFIER_ENCODED, + 'qualifierEncoded': qualifier_encoded, 'fieldName': 'fieldName2', 'type': 'type2', 'encoding': 'encoding2', @@ -177,25 +260,28 @@ def test_api_repr_bigtable(self): }, }) - ec = ExternalConfig.from_api_repr(resource) + ec = external_config.ExternalConfig.from_api_repr(resource) self._verify_base(ec) self.assertEqual(ec.source_format, 'BIGTABLE') - self.assertIsInstance(ec.options, BigtableOptions) + self.assertIsInstance(ec.options, external_config.BigtableOptions) self.assertEqual(ec.options.ignore_unspecified_column_families, True) self.assertEqual(ec.options.read_rowkey_as_string, False) self.assertEqual(len(ec.options.column_families), 1) fam1 = ec.options.column_families[0] - self.assertIsInstance(fam1, BigtableColumnFamily) + self.assertIsInstance(fam1, external_config.BigtableColumnFamily) self.assertEqual(fam1.family_id, 'familyId') self.assertEqual(fam1.type_, 'type') self.assertEqual(fam1.encoding, 'encoding') self.assertEqual(len(fam1.columns), 2) + self.assertFalse(fam1.only_read_latest) col1 = fam1.columns[0] self.assertEqual(col1.qualifier_string, 'q') self.assertEqual(col1.field_name, 'fieldName1') self.assertEqual(col1.type_, 'type1') self.assertEqual(col1.encoding, 'encoding1') + self.assertTrue(col1.only_read_latest) + self.assertIsNone(col1.qualifier_encoded) col2 = ec.options.column_families[0].columns[1] self.assertEqual(col2.qualifier_encoded, b'q') self.assertEqual(col2.field_name, 'fieldName2') @@ -206,6 +292,69 @@ def test_api_repr_bigtable(self): self.assertEqual(got_resource, resource) + def test_to_api_repr_bigtable(self): + ec = external_config.ExternalConfig('BIGTABLE') + options = external_config.BigtableOptions() + options.ignore_unspecified_column_families = True + options.read_rowkey_as_string = False + ec._options = options + + fam1 = external_config.BigtableColumnFamily() + fam1.family_id = 'familyId' + fam1.type_ = 'type' + fam1.encoding = 'encoding' + fam1.only_read_latest = False + col1 = external_config.BigtableColumn() + col1.qualifier_string = 'q' + col1.field_name = 'fieldName1' + col1.type_ = 'type1' + col1.encoding = 'encoding1' + col1.only_read_latest = True + col2 = external_config.BigtableColumn() + col2.qualifier_encoded = b'q' + col2.field_name = 'fieldName2' + col2.type_ = 'type2' + col2.encoding = 'encoding2' + fam1.columns = [col1, col2] + options.column_families = [fam1] + + qualifier_encoded = base64.standard_b64encode(b'q').decode('ascii') + exp_resource = { + 'sourceFormat': 'BIGTABLE', + 'bigtableOptions': { + 'ignoreUnspecifiedColumnFamilies': True, + 'readRowkeyAsString': False, + 'columnFamilies': [ + { + 'familyId': 'familyId', + 'type': 'type', + 'encoding': 'encoding', + 'columns': [ + { + 'qualifierString': 'q', + 'fieldName': 'fieldName1', + 'type': 'type1', + 'encoding': 'encoding1', + 'onlyReadLatest': True, + }, + { + 'qualifierEncoded': qualifier_encoded, + 'fieldName': 'fieldName2', + 'type': 'type2', + 'encoding': 'encoding2', + }, + + ], + 'onlyReadLatest': False, + } + ], + }, + } + + got_resource = ec.to_api_repr() + + self.assertEqual(got_resource, exp_resource) + def _copy_and_update(d, u): d = copy.deepcopy(d) From 9e1d59904cd9bd5fefffb3c89e67c7320162ba48 Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Wed, 28 Mar 2018 16:12:59 -0700 Subject: [PATCH 0418/2016] BigQuery: Remove unnecessary _Table class from test_job.py (#5126) * BigQuery: Remove unnecessary _Table class from test_job.py The way the `_Table` class is used in test_job.py is redundant with the TABLE_REF property. --- .../tests/unit/test_job.py | 73 +++++++------------ 1 file changed, 28 insertions(+), 45 deletions(-) diff --git a/packages/google-cloud-bigquery/tests/unit/test_job.py b/packages/google-cloud-bigquery/tests/unit/test_job.py index 51f72b813767..0cb85eccd745 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_job.py +++ b/packages/google-cloud-bigquery/tests/unit/test_job.py @@ -542,8 +542,8 @@ def test_props_set_by_server(self): 'reason': 'REASON'} client = _make_client(project=self.PROJECT) - table = _Table() - job = self._make_one(self.JOB_ID, [self.SOURCE1], table, client) + job = self._make_one( + self.JOB_ID, [self.SOURCE1], self.TABLE_REF, client) job._properties['etag'] = 'ETAG' job._properties['id'] = FULL_JOB_ID job._properties['selfLink'] = URL @@ -872,8 +872,8 @@ def test_exists_miss_w_bound_client(self): PATH = '/projects/%s/jobs/%s' % (self.PROJECT, self.JOB_ID) conn = _make_connection() client = _make_client(project=self.PROJECT, connection=conn) - table = _Table() - job = self._make_one(self.JOB_ID, [self.SOURCE1], table, client) + job = self._make_one( + self.JOB_ID, [self.SOURCE1], self.TABLE_REF, client) self.assertFalse(job.exists()) @@ -888,8 +888,8 @@ def test_exists_hit_w_alternate_client(self): client1 = _make_client(project=self.PROJECT, connection=conn1) conn2 = _make_connection({}) client2 = _make_client(project=self.PROJECT, connection=conn2) - table = _Table() - job = self._make_one(self.JOB_ID, [self.SOURCE1], table, client1) + job = self._make_one( + self.JOB_ID, [self.SOURCE1], self.TABLE_REF, client1) self.assertTrue(job.exists(client=client2)) @@ -920,8 +920,8 @@ def test_reload_w_bound_client(self): RESOURCE = self._make_resource() conn = _make_connection(RESOURCE) client = _make_client(project=self.PROJECT, connection=conn) - table = _Table() - job = self._make_one(self.JOB_ID, [self.SOURCE1], table, client) + job = self._make_one( + self.JOB_ID, [self.SOURCE1], self.TABLE_REF, client) job.reload() @@ -938,8 +938,8 @@ def test_reload_w_alternate_client(self): client1 = _make_client(project=self.PROJECT, connection=conn1) conn2 = _make_connection(RESOURCE) client2 = _make_client(project=self.PROJECT, connection=conn2) - table = _Table() - job = self._make_one(self.JOB_ID, [self.SOURCE1], table, client1) + job = self._make_one( + self.JOB_ID, [self.SOURCE1], self.TABLE_REF, client1) job.reload(client=client2) @@ -976,8 +976,8 @@ def test_cancel_w_bound_client(self): RESPONSE = {'job': RESOURCE} conn = _make_connection(RESPONSE) client = _make_client(project=self.PROJECT, connection=conn) - table = _Table() - job = self._make_one(self.JOB_ID, [self.SOURCE1], table, client) + job = self._make_one( + self.JOB_ID, [self.SOURCE1], self.TABLE_REF, client) job.cancel() @@ -995,8 +995,8 @@ def test_cancel_w_alternate_client(self): client1 = _make_client(project=self.PROJECT, connection=conn1) conn2 = _make_connection(RESPONSE) client2 = _make_client(project=self.PROJECT, connection=conn2) - table = _Table() - job = self._make_one(self.JOB_ID, [self.SOURCE1], table, client1) + job = self._make_one( + self.JOB_ID, [self.SOURCE1], self.TABLE_REF, client1) job.cancel(client=client2) @@ -1567,11 +1567,15 @@ def _verifyResourceProperties(self, job, resource): self.assertIsNone(job.print_header) def test_ctor(self): + from google.cloud.bigquery.table import Table + client = _make_client(project=self.PROJECT) - source = _Table(self.SOURCE_TABLE) - job = self._make_one(self.JOB_ID, source, [self.DESTINATION_URI], - client) - self.assertEqual(job.source, source) + source = Table(self.TABLE_REF) + job = self._make_one( + self.JOB_ID, source, [self.DESTINATION_URI], client) + self.assertEqual(job.source.project, self.PROJECT) + self.assertEqual(job.source.dataset_id, self.DS_ID) + self.assertEqual(job.source.table_id, self.TABLE_ID) self.assertEqual(job.destination_uris, [self.DESTINATION_URI]) self.assertIs(job._client, client) self.assertEqual(job.job_type, self.JOB_TYPE) @@ -1590,9 +1594,8 @@ def test_ctor(self): def test_destination_uri_file_counts(self): file_counts = 23 client = _make_client(project=self.PROJECT) - source = _Table(self.SOURCE_TABLE) - job = self._make_one(self.JOB_ID, source, [self.DESTINATION_URI], - client) + job = self._make_one( + self.JOB_ID, self.TABLE_REF, [self.DESTINATION_URI], client) self.assertIsNone(job.destination_uri_file_counts) statistics = job._properties['statistics'] = {} @@ -1754,9 +1757,8 @@ def test_exists_miss_w_bound_client(self): PATH = '/projects/%s/jobs/%s' % (self.PROJECT, self.JOB_ID) conn = _make_connection() client = _make_client(project=self.PROJECT, connection=conn) - source = _Table(self.SOURCE_TABLE) - job = self._make_one(self.JOB_ID, source, [self.DESTINATION_URI], - client) + job = self._make_one( + self.JOB_ID, self.TABLE_REF, [self.DESTINATION_URI], client) self.assertFalse(job.exists()) @@ -1771,9 +1773,8 @@ def test_exists_hit_w_alternate_client(self): client1 = _make_client(project=self.PROJECT, connection=conn1) conn2 = _make_connection({}) client2 = _make_client(project=self.PROJECT, connection=conn2) - source = _Table(self.SOURCE_TABLE) - job = self._make_one(self.JOB_ID, source, [self.DESTINATION_URI], - client1) + job = self._make_one( + self.JOB_ID, self.TABLE_REF, [self.DESTINATION_URI], client1) self.assertTrue(job.exists(client=client2)) @@ -3337,21 +3338,3 @@ def test_from_api_repr_normal(self): self.assertEqual(entry.records_written, self.RECORDS_WRITTEN) self.assertEqual(entry.status, self.STATUS) self.assertEqual(entry.steps, steps) - - -class _Table(object): - - def __init__(self, table_id=None): - self._table_id = table_id - - @property - def table_id(self): - return TestLoadJob.TABLE_ID - - @property - def project(self): - return TestLoadJob.PROJECT - - @property - def dataset_id(self): - return TestLoadJob.DS_ID From edb9b65aec3ebe529f64205e7b12dab34436ec76 Mon Sep 17 00:00:00 2001 From: Alix Hamilton Date: Fri, 30 Mar 2018 17:23:47 -0700 Subject: [PATCH 0419/2016] BigQuery: Moves all Dataset property conversion logic into properties (#5130) * adds to_api_repr() and from_api_repr() for AccessEntry * moves Dataset python/API type conversions into properties * fixes property docstrings * removes items from _PROPERTY_TO_API_FIELD that don't need conversion * adds _str_or_none() helper * fixes property conversion for non-string properties * fixes type notation for dicts and tuples * updates _build_resource() to be consistent with table * clarifies create_dataset docstring * fixes _build_resource docstring * uses google.cloud._helper method from the source * fixes one-letter variable * clarifies comment in _build_resource() --- .../google/cloud/bigquery/_helpers.py | 6 + .../google/cloud/bigquery/client.py | 68 ++- .../google/cloud/bigquery/dataset.py | 523 ++++++++---------- .../tests/unit/test__helpers.py | 38 ++ .../tests/unit/test_client.py | 68 ++- .../tests/unit/test_dataset.py | 77 ++- 6 files changed, 437 insertions(+), 343 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py b/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py index 509b0455bfc3..1733f8e360b9 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py @@ -477,3 +477,9 @@ def _int_or_none(value): return value if value is not None: return int(value) + + +def _str_or_none(value): + """Helper: serialize value to JSON string.""" + if value is not None: + return str(value) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py index 4d298958cff9..e44d062edca0 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py @@ -34,7 +34,6 @@ from google.cloud.bigquery._helpers import DEFAULT_RETRY from google.cloud.bigquery._helpers import _SCALAR_VALUE_TO_JSON_ROW -from google.cloud.bigquery._helpers import _snake_to_camel_case from google.cloud.bigquery._http import Connection from google.cloud.bigquery.dataset import Dataset from google.cloud.bigquery.dataset import DatasetListItem @@ -233,22 +232,30 @@ def dataset(self, dataset_id, project=None): return DatasetReference(project, dataset_id) def create_dataset(self, dataset): - """API call: create the dataset via a PUT request. + """API call: create the dataset via a POST request. See https://cloud.google.com/bigquery/docs/reference/rest/v2/tables/insert - :type dataset: :class:`~google.cloud.bigquery.dataset.Dataset` - :param dataset: A ``Dataset`` populated with the desired initial state. - If project is missing, it defaults to the project of - the client. + Args: + dataset (google.cloud.bigquery.dataset.Dataset): + A ``Dataset`` populated with the desired initial state. + + Returns: + google.cloud.bigquery.dataset.Dataset: + A new ``Dataset`` returned from the API. + + Example: + + >>> from google.cloud import bigquery + >>> client = bigquery.Client() + >>> dataset = bigquery.Dataset(client.dataset('my_dataset')) + >>> dataset = client.create_dataset(dataset) - :rtype: ":class:`~google.cloud.bigquery.dataset.Dataset`" - :returns: a new ``Dataset`` returned from the service. """ path = '/projects/%s/datasets' % (dataset.project,) api_response = self._connection.api_request( - method='POST', path=path, data=dataset._build_resource()) + method='POST', path=path, data=dataset.to_api_repr()) return Dataset.from_api_repr(api_response) def create_table(self, table): @@ -327,40 +334,29 @@ def update_dataset(self, dataset, fields, retry=DEFAULT_RETRY): will only be saved if no modifications to the dataset occurred since the read. - :type dataset: :class:`google.cloud.bigquery.dataset.Dataset` - :param dataset: the dataset to update. - - :type fields: sequence of string - :param fields: the fields of ``dataset`` to change, spelled as the - Dataset properties (e.g. "friendly_name"). - - :type retry: :class:`google.api_core.retry.Retry` - :param retry: (Optional) How to retry the RPC. + Args: + dataset (google.cloud.bigquery.dataset.Dataset): + The dataset to update. + fields (Sequence[str]): + The properties of ``dataset`` to change (e.g. "friendly_name"). + retry (google.api_core.retry.Retry, optional): + How to retry the RPC. - :rtype: :class:`google.cloud.bigquery.dataset.Dataset` - :returns: the modified ``Dataset`` instance + Returns: + google.cloud.bigquery.dataset.Dataset: + The modified ``Dataset`` instance. """ - path = '/projects/%s/datasets/%s' % (dataset.project, - dataset.dataset_id) - partial = {} - for f in fields: - if not hasattr(dataset, f): - raise ValueError('No Dataset field %s' % f) - # All dataset attributes are trivially convertible to JSON except - # for access entries. - if f == 'access_entries': - attr = dataset._build_access_resource() - api_field = 'access' - else: - attr = getattr(dataset, f) - api_field = _snake_to_camel_case(f) - partial[api_field] = attr + partial = dataset._build_resource(fields) if dataset.etag is not None: headers = {'If-Match': dataset.etag} else: headers = None api_response = self._call_api( - retry, method='PATCH', path=path, data=partial, headers=headers) + retry, + method='PATCH', + path=dataset.path, + data=partial, + headers=headers) return Dataset.from_api_repr(api_response) def update_table(self, table, fields, retry=DEFAULT_RETRY): diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/dataset.py b/packages/google-cloud-bigquery/google/cloud/bigquery/dataset.py index c8f588671ab5..b290ec7bb49e 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/dataset.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/dataset.py @@ -17,41 +17,59 @@ from __future__ import absolute_import import six +import copy -from google.cloud._helpers import _datetime_from_microseconds +import google.cloud._helpers +from google.cloud.bigquery import _helpers from google.cloud.bigquery.table import TableReference class AccessEntry(object): - """Represent grant of an access role to an entity. + """Represents grant of an access role to an entity. - Every entry in the access list will have exactly one of - ``userByEmail``, ``groupByEmail``, ``domain``, ``specialGroup`` or - ``view`` set. And if anything but ``view`` is set, it'll also have a - ``role`` specified. ``role`` is omitted for a ``view``, since - ``view`` s are always read-only. + An entry must have exactly one of the allowed :attr:`ENTITY_TYPES`. If + anything but ``view`` is set, a ``role`` is also required. ``role`` is + omitted for a ``view``, because ``view`` s are always read-only. See https://cloud.google.com/bigquery/docs/reference/rest/v2/datasets. - :type role: str - :param role: Role granted to the entity. One of + Attributes: + role (str): + Role granted to the entity. The following string values are + supported: `'READER'`, `'WRITER'`, `'OWNER'`. It may also be + :data:`None` if the ``entity_type`` is ``view``. - * ``'OWNER'`` - * ``'WRITER'`` - * ``'READER'`` + entity_type (str): + Type of entity being granted the role. One of :attr:`ENTITY_TYPES`. - May also be ``None`` if the ``entity_type`` is ``view``. + entity_id (Union[str, Dict[str, str]]): + If the ``entity_type`` is not 'view', the ``entity_id`` is the + ``str`` ID of the entity being granted the role. If the + ``entity_type`` is 'view', the ``entity_id`` is a ``dict`` + representing the view from a different dataset to grant access to + in the following format:: - :type entity_type: str - :param entity_type: Type of entity being granted the role. One of - :attr:`ENTITY_TYPES`. + { + 'projectId': string, + 'datasetId': string, + 'tableId': string + } - :type entity_id: str - :param entity_id: ID of entity being granted the role. - - :raises: :class:`ValueError` if the ``entity_type`` is not among - :attr:`ENTITY_TYPES`, or if a ``view`` has ``role`` set or - a non ``view`` **does not** have a ``role`` set. + Raises: + ValueError: + If the ``entity_type`` is not among :attr:`ENTITY_TYPES`, or if a + ``view`` has ``role`` set, or a non ``view`` **does not** have a + ``role`` set. + + Examples: + >>> entry = AccessEntry('OWNER', 'userByEmail', 'user@example.com') + + >>> view = { + ... 'projectId': 'my-project', + ... 'datasetId': 'my_dataset', + ... 'tableId': 'my_table' + ... } + >>> entry = AccessEntry(None, 'view', view) """ ENTITY_TYPES = frozenset(['userByEmail', 'groupByEmail', 'domain', @@ -91,6 +109,41 @@ def __repr__(self): return '' % ( self.role, self.entity_type, self.entity_id) + def to_api_repr(self): + """Construct the API resource representation of this access entry + + Returns: + Dict[str, object]: Access entry represented as an API resource + """ + resource = {self.entity_type: self.entity_id} + if self.role is not None: + resource['role'] = self.role + return resource + + @classmethod + def from_api_repr(cls, resource): + """Factory: construct an access entry given its API representation + + Args: + resource (Dict[str, object]): + Access entry resource representation returned from the API + + Returns: + google.cloud.bigquery.dataset.AccessEntry: + Access entry parsed from ``resource``. + + Raises: + ValueError: + If the resource has more keys than ``role`` and one additional + key. + """ + entry = resource.copy() + role = entry.pop('role', None) + entity_type, entity_id = entry.popitem() + if len(entry) != 0: + raise ValueError('Entry has unexpected keys remaining.', entry) + return cls(role, entity_type, entity_id) + class DatasetReference(object): """DatasetReferences are pointers to datasets. @@ -98,11 +151,12 @@ class DatasetReference(object): See https://cloud.google.com/bigquery/docs/reference/rest/v2/datasets - :type project: str - :param project: the ID of the project + Args: + project (str): The ID of the project + dataset_id (str): The ID of the dataset - :type dataset_id: str - :param dataset_id: the ID of the dataset + Raises: + ValueError: If either argument is not of type ``str``. """ def __init__(self, project, dataset_id): @@ -115,49 +169,53 @@ def __init__(self, project, dataset_id): @property def project(self): - """Project ID of the dataset. - - :rtype: str - :returns: the project ID. - """ + """str: Project ID of the dataset.""" return self._project @property def dataset_id(self): - """Dataset ID. - - :rtype: str - :returns: the dataset ID. - """ + """str: Dataset ID.""" return self._dataset_id @property def path(self): - """URL path for the dataset's APIs. - - :rtype: str - :returns: the path based on project and dataset name. - """ + """str: URL path for the dataset based on project and dataset ID.""" return '/projects/%s/datasets/%s' % (self.project, self.dataset_id) def table(self, table_id): """Constructs a TableReference. - :type table_id: str - :param table_id: the ID of the table. + Args: + table_id (str): The ID of the table. - :rtype: :class:`google.cloud.bigquery.table.TableReference` - :returns: a TableReference for a table in this dataset. + Returns: + google.cloud.bigquery.table.TableReference: + A table reference for a table in this dataset. """ return TableReference(self, table_id) @classmethod def from_api_repr(cls, resource): + """Factory: construct a dataset reference given its API representation + + Args: + resource (Dict[str, str]): + Dataset reference resource representation returned from the API + + Returns: + google.cloud.bigquery.dataset.DatasetReference: + Dataset reference parsed from ``resource``. + """ project = resource['projectId'] dataset_id = resource['datasetId'] return cls(project, dataset_id) def to_api_repr(self): + """Construct the API resource representation of this dataset reference + + Returns: + Dict[str, str]: dataset reference represented as an API resource + """ return { 'projectId': self._project, 'datasetId': self._dataset_id, @@ -169,7 +227,7 @@ def _key(self): Used to compute this instance's hashcode and evaluate equality. Returns: - tuple: The contents of this :class:`.DatasetReference`. + Tuple[str]: The contents of this :class:`.DatasetReference`. """ return ( self._project, @@ -197,353 +255,256 @@ class Dataset(object): See https://cloud.google.com/bigquery/docs/reference/rest/v2/datasets - :type dataset_ref: :class:`~google.cloud.bigquery.dataset.DatasetReference` - :param dataset_ref: a pointer to a dataset + Args: + dataset_ref (google.cloud.bigquery.dataset.DatasetReference): + a pointer to a dataset """ + _PROPERTY_TO_API_FIELD = { + 'access_entries': 'access', + 'created': 'creationTime', + 'default_table_expiration_ms': 'defaultTableExpirationMs', + 'friendly_name': 'friendlyName', + } + def __init__(self, dataset_ref): - self._project = dataset_ref.project - self._dataset_id = dataset_ref.dataset_id - self._properties = {'labels': {}} - self._access_entries = () + self._properties = { + 'datasetReference': dataset_ref.to_api_repr(), + 'labels': {}, + } @property def project(self): - """Project bound to the dataset. - - :rtype: str - :returns: the project. - """ - return self._project + """str: Project ID of the project bound to the dataset.""" + return self._properties['datasetReference']['projectId'] @property def path(self): - """URL path for the dataset's APIs. - - :rtype: str - :returns: the path based on project and dataset ID. - """ + """str: URL path for the dataset based on project and dataset ID.""" return '/projects/%s/datasets/%s' % (self.project, self.dataset_id) @property def access_entries(self): - """Dataset's access entries. + """List[google.cloud.bigquery.dataset.AccessEntry]: Dataset's access + entries. + + ``role`` augments the entity type and must be present **unless** the + entity type is ``view``. - :rtype: list of :class:`AccessEntry` - :returns: roles granted to entities for this dataset + Raises: + TypeError: If 'value' is not a sequence + ValueError: + If any item in the sequence is not an + :class:`~google.cloud.bigquery.dataset.AccessEntry`. """ - return list(self._access_entries) + entries = self._properties.get('access', []) + return [AccessEntry.from_api_repr(entry) for entry in entries] @access_entries.setter def access_entries(self, value): - """Update dataset's access entries - - :type value: - list of :class:`~google.cloud.bigquery.dataset.AccessEntry` - :param value: roles granted to entities for this dataset - - :raises: TypeError if 'value' is not a sequence, or ValueError if - any item in the sequence is not an AccessEntry - """ if not all(isinstance(field, AccessEntry) for field in value): raise ValueError('Values must be AccessEntry instances') - self._access_entries = tuple(value) + entries = [entry.to_api_repr() for entry in value] + self._properties['access'] = entries @property def created(self): - """Datetime at which the dataset was created. - - :rtype: ``datetime.datetime``, or ``NoneType`` - :returns: the creation time (None until set from the server). + """Union[datetime.datetime, None]: Datetime at which the dataset was + created (:data:`None` until set from the server). """ creation_time = self._properties.get('creationTime') if creation_time is not None: # creation_time will be in milliseconds. - return _datetime_from_microseconds(1000.0 * creation_time) + return google.cloud._helpers._datetime_from_microseconds( + 1000.0 * float(creation_time)) @property def dataset_id(self): - """Dataset ID. - - :rtype: str - :returns: the dataset ID. - """ - return self._dataset_id + """str: Dataset ID.""" + return self._properties['datasetReference']['datasetId'] @property def full_dataset_id(self): - """ID for the dataset resource, in the form "project_id:dataset_id". + """Union[str, None]: ID for the dataset resource (:data:`None` until + set from the server) - :rtype: str, or ``NoneType`` - :returns: the ID (None until set from the server). + In the format ``project_id:dataset_id``. """ return self._properties.get('id') @property def reference(self): - """A reference to this dataset. - - Returns: - google.cloud.bigquery.dataset.DatasetReference: - A pointer to this dataset + """google.cloud.bigquery.dataset.DatasetReference: A reference to this + dataset. """ return DatasetReference(self.project, self.dataset_id) @property def etag(self): - """ETag for the dataset resource. - - :rtype: str, or ``NoneType`` - :returns: the ETag (None until set from the server). + """Union[str, None]: ETag for the dataset resource (:data:`None` until + set from the server). """ return self._properties.get('etag') @property def modified(self): - """Datetime at which the dataset was last modified. - - :rtype: ``datetime.datetime``, or ``NoneType`` - :returns: the modification time (None until set from the server). + """Union[datetime.datetime, None]: Datetime at which the dataset was + last modified (:data:`None` until set from the server). """ modified_time = self._properties.get('lastModifiedTime') if modified_time is not None: # modified_time will be in milliseconds. - return _datetime_from_microseconds(1000.0 * modified_time) + return google.cloud._helpers._datetime_from_microseconds( + 1000.0 * float(modified_time)) @property def self_link(self): - """URL for the dataset resource. - - :rtype: str, or ``NoneType`` - :returns: the URL (None until set from the server). + """Union[str, None]: URL for the dataset resource (:data:`None` until + set from the server). """ return self._properties.get('selfLink') @property def default_table_expiration_ms(self): - """Default expiration time for tables in the dataset. + """Union[int, None]: Default expiration time for tables in the dataset + (defaults to :data:`None`). - :rtype: int, or ``NoneType`` - :returns: The time in milliseconds, or None (the default). + Raises: + ValueError: For invalid value types. """ - return self._properties.get('defaultTableExpirationMs') + return _helpers._int_or_none( + self._properties.get('defaultTableExpirationMs')) @default_table_expiration_ms.setter def default_table_expiration_ms(self, value): - """Update default expiration time for tables in the dataset. - - :type value: int - :param value: (Optional) new default time, in milliseconds - - :raises: ValueError for invalid value types. - """ if not isinstance(value, six.integer_types) and value is not None: raise ValueError("Pass an integer, or None") - self._properties['defaultTableExpirationMs'] = value + self._properties['defaultTableExpirationMs'] = _helpers._str_or_none( + value) @property def description(self): - """Description of the dataset. + """Union[str, None]: Description of the dataset as set by the user + (defaults to :data:`None`). - :rtype: str, or ``NoneType`` - :returns: The description as set by the user, or None (the default). + Raises: + ValueError: for invalid value types. """ return self._properties.get('description') @description.setter def description(self, value): - """Update description of the dataset. - - :type value: str - :param value: (Optional) new description - - :raises: ValueError for invalid value types. - """ if not isinstance(value, six.string_types) and value is not None: raise ValueError("Pass a string, or None") self._properties['description'] = value @property def friendly_name(self): - """Title of the dataset. + """Union[str, None]: Title of the dataset as set by the user + (defaults to :data:`None`). - :rtype: str, or ``NoneType`` - :returns: The name as set by the user, or None (the default). + Raises: + ValueError: for invalid value types. """ return self._properties.get('friendlyName') @friendly_name.setter def friendly_name(self, value): - """Update title of the dataset. - - :type value: str - :param value: (Optional) new title - - :raises: ValueError for invalid value types. - """ if not isinstance(value, six.string_types) and value is not None: raise ValueError("Pass a string, or None") self._properties['friendlyName'] = value @property def location(self): - """Location in which the dataset is hosted. + """Union[str, None]: Location in which the dataset is hosted as set by + the user (defaults to :data:`None`). - :rtype: str, or ``NoneType`` - :returns: The location as set by the user, or None (the default). + Raises: + ValueError: for invalid value types. """ return self._properties.get('location') @location.setter def location(self, value): - """Update location in which the dataset is hosted. - - :type value: str - :param value: (Optional) new location - - :raises: ValueError for invalid value types. - """ if not isinstance(value, six.string_types) and value is not None: raise ValueError("Pass a string, or None") self._properties['location'] = value @property def labels(self): - """Labels for the dataset. + """Dict[str, str]: Labels for the dataset. This method always returns a dict. To change a dataset's labels, modify the dict, then call :meth:`google.cloud.bigquery.client.Client.update_dataset`. To delete - a label, set its value to ``None`` before updating. + a label, set its value to :data:`None` before updating. - :rtype: dict, {str -> str} - :returns: A dict of the the dataset's labels. + Raises: + ValueError: for invalid value types. """ return self._properties.get('labels', {}) @labels.setter def labels(self, value): - """Update labels for the dataset. - - :type value: dict, {str -> str} - :param value: new labels - - :raises: ValueError for invalid value types. - """ if not isinstance(value, dict): raise ValueError("Pass a dict") self._properties['labels'] = value @classmethod def from_api_repr(cls, resource): - """Factory: construct a dataset given its API representation + """Factory: construct a dataset given its API representation - :type resource: dict - :param resource: dataset resource representation returned from the API + Args: + resource (Dict[str: object]): + Dataset resource representation returned from the API - :rtype: :class:`~google.cloud.bigquery.dataset.Dataset` - :returns: Dataset parsed from ``resource``. + Returns: + google.cloud.bigquery.dataset.Dataset: + Dataset parsed from ``resource``. """ - dsr = resource.get('datasetReference') - if dsr is None or 'datasetId' not in dsr: + if ('datasetReference' not in resource or + 'datasetId' not in resource['datasetReference']): raise KeyError('Resource lacks required identity information:' '["datasetReference"]["datasetId"]') - dataset_id = dsr['datasetId'] - dataset = cls(DatasetReference(dsr['projectId'], dataset_id)) - dataset._set_properties(resource) + project_id = resource['datasetReference']['projectId'] + dataset_id = resource['datasetReference']['datasetId'] + dataset = cls(DatasetReference(project_id, dataset_id)) + dataset._properties = copy.deepcopy(resource) return dataset - @staticmethod - def _parse_access_entries(access): - """Parse a resource fragment into a set of access entries. - - ``role`` augments the entity type and present **unless** the entity - type is ``view``. - - :type access: list of mappings - :param access: each mapping represents a single access entry. - - :rtype: list of :class:`~google.cloud.bigquery.dataset.AccessEntry` - :returns: a list of parsed entries. - :raises: :class:`ValueError` if a entry in ``access`` has more keys - than ``role`` and one additional key. - """ - result = [] - for entry in access: - entry = entry.copy() - role = entry.pop('role', None) - entity_type, entity_id = entry.popitem() - if len(entry) != 0: - raise ValueError('Entry has unexpected keys remaining.', entry) - result.append( - AccessEntry(role, entity_type, entity_id)) - return result - - def _set_properties(self, api_response): - """Update properties from resource in body of ``api_response`` - - :type api_response: dict - :param api_response: response returned from an API call. - """ - self._properties.clear() - cleaned = api_response.copy() - access = cleaned.pop('access', ()) - self.access_entries = self._parse_access_entries(access) - if 'creationTime' in cleaned: - cleaned['creationTime'] = float(cleaned['creationTime']) - if 'lastModifiedTime' in cleaned: - cleaned['lastModifiedTime'] = float(cleaned['lastModifiedTime']) - if 'defaultTableExpirationMs' in cleaned: - cleaned['defaultTableExpirationMs'] = int( - cleaned['defaultTableExpirationMs']) - if 'labels' not in cleaned: - cleaned['labels'] = {} - self._properties.update(cleaned) - - def _build_access_resource(self): - """Generate a resource fragment for dataset's access entries.""" - result = [] - for entry in self.access_entries: - info = {entry.entity_type: entry.entity_id} - if entry.role is not None: - info['role'] = entry.role - result.append(info) - return result - - def _build_resource(self): - """Generate a resource for ``create`` or ``update``.""" - resource = { - 'datasetReference': { - 'projectId': self.project, 'datasetId': self.dataset_id}, - } - if self.default_table_expiration_ms is not None: - value = self.default_table_expiration_ms - resource['defaultTableExpirationMs'] = value - - if self.description is not None: - resource['description'] = self.description - - if self.friendly_name is not None: - resource['friendlyName'] = self.friendly_name - - if self.location is not None: - resource['location'] = self.location - - if len(self.access_entries) > 0: - resource['access'] = self._build_access_resource() - - resource['labels'] = self.labels # labels is never None + def to_api_repr(self): + """Construct the API resource representation of this dataset - return resource + Returns: + Dict[str, object]: The dataset represented as an API resource + """ + return copy.deepcopy(self._properties) + + def _build_resource(self, filter_fields): + """Generate a resource for ``update``.""" + partial = {} + for filter_field in filter_fields: + api_field = self._PROPERTY_TO_API_FIELD.get(filter_field) + if api_field is None and filter_field not in self._properties: + raise ValueError('No Dataset property %s' % filter_field) + elif api_field is not None: + partial[api_field] = self._properties.get(api_field) + else: + # allows properties that are not defined in the library + # and properties that have the same name as API resource key + partial[filter_field] = self._properties[filter_field] + + return partial def table(self, table_id): """Constructs a TableReference. - :type table_id: str - :param table_id: the ID of the table. + Args: + table_id (str): the ID of the table. - :rtype: :class:`~google.cloud.bigquery.table.TableReference` - :returns: a TableReference for a table in this dataset. + Returns: + google.cloud.bigquery.table.TableReference: + A TableReference for a table in this dataset. """ return TableReference(self.reference, table_id) @@ -561,7 +522,7 @@ class DatasetListItem(object): Args: - resource (dict): + resource (Dict[str, str]): A dataset-like resource object from a dataset list response. A ``datasetReference`` property is required. @@ -584,66 +545,50 @@ def __init__(self, resource): @property def project(self): - """Project bound to the dataset. - - :rtype: str - :returns: the project. - """ + """str: Project bound to the dataset.""" return self._properties['datasetReference']['projectId'] @property def dataset_id(self): - """Dataset ID. - - :rtype: str - :returns: the dataset ID. - """ + """str: Dataset ID.""" return self._properties['datasetReference']['datasetId'] @property def full_dataset_id(self): - """ID for the dataset resource, in the form "project_id:dataset_id". + """Union[str, None]: ID for the dataset resource (:data:`None` until + set from the server) - :rtype: str, or ``NoneType`` - :returns: the ID (None until set from the server). + In the format ``project_id:dataset_id``. """ return self._properties.get('id') @property def friendly_name(self): - """Title of the dataset. - - :rtype: str, or ``NoneType`` - :returns: The name as set by the user, or None (the default). + """Union[str, None]: Title of the dataset as set by the user + (defaults to :data:`None`). """ return self._properties.get('friendlyName') @property def labels(self): - """Labels for the dataset. - - :rtype: dict, {str -> str} - :returns: A dict of the the dataset's labels. - """ + """Dict[str, str]: Labels for the dataset.""" return self._properties.get('labels', {}) @property def reference(self): - """A reference to this dataset. - - Returns: - google.cloud.bigquery.dataset.DatasetReference: - A pointer to this dataset + """google.cloud.bigquery.dataset.DatasetReference: A reference to this + dataset. """ return DatasetReference(self.project, self.dataset_id) def table(self, table_id): """Constructs a TableReference. - :type table_id: str - :param table_id: the ID of the table. + Args: + table_id (str): the ID of the table. - :rtype: :class:`~google.cloud.bigquery.table.TableReference` - :returns: a TableReference for a table in this dataset. + Returns: + google.cloud.bigquery.table.TableReference: + A TableReference for a table in this dataset. """ return TableReference(self.reference, table_id) diff --git a/packages/google-cloud-bigquery/tests/unit/test__helpers.py b/packages/google-cloud-bigquery/tests/unit/test__helpers.py index 362e148b6b80..0e407551f395 100644 --- a/packages/google-cloud-bigquery/tests/unit/test__helpers.py +++ b/packages/google-cloud-bigquery/tests/unit/test__helpers.py @@ -744,6 +744,44 @@ def test_w_camel_case_string(self): self.assertEqual(self._call_fut('friendlyName'), 'friendlyName') +class Test__int_or_none(unittest.TestCase): + + def _call_fut(self, value): + from google.cloud.bigquery._helpers import _int_or_none + + return _int_or_none(value) + + def test_w_num_string(self): + self.assertEqual(self._call_fut('123'), 123) + + def test_w_none(self): + self.assertIsNone(self._call_fut(None)) + + def test_w_int(self): + self.assertEqual(self._call_fut(123), 123) + + def test_w_non_num_string(self): + with self.assertRaises(ValueError): + self._call_fut('ham') + + +class Test__str_or_none(unittest.TestCase): + + def _call_fut(self, value): + from google.cloud.bigquery._helpers import _str_or_none + + return _str_or_none(value) + + def test_w_int(self): + self.assertEqual(self._call_fut(123), '123') + + def test_w_none(self): + self.assertIsNone(self._call_fut(None)) + + def test_w_str(self): + self.assertEqual(self._call_fut('ham'), 'ham') + + class _Field(object): def __init__(self, mode, name='unknown', field_type='UNKNOWN', fields=()): diff --git a/packages/google-cloud-bigquery/tests/unit/test_client.py b/packages/google-cloud-bigquery/tests/unit/test_client.py index 3f8e7e276e64..091250ab8d77 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_client.py +++ b/packages/google-cloud-bigquery/tests/unit/test_client.py @@ -385,7 +385,7 @@ def test_create_dataset_w_attrs(self): 'description': DESCRIPTION, 'friendlyName': FRIENDLY_NAME, 'location': LOCATION, - 'defaultTableExpirationMs': 3600, + 'defaultTableExpirationMs': '3600', 'labels': LABELS, 'access': [ {'role': 'OWNER', 'userByEmail': USER_EMAIL}, @@ -413,7 +413,7 @@ def test_create_dataset_w_attrs(self): 'description': DESCRIPTION, 'friendlyName': FRIENDLY_NAME, 'location': LOCATION, - 'defaultTableExpirationMs': 3600, + 'defaultTableExpirationMs': '3600', 'access': [ {'role': 'OWNER', 'userByEmail': USER_EMAIL}, {'view': VIEW}], @@ -429,6 +429,40 @@ def test_create_dataset_w_attrs(self): self.assertEqual(ds.default_table_expiration_ms, 3600) self.assertEqual(ds.labels, LABELS) + def test_create_dataset_w_custom_property(self): + # The library should handle sending properties to the API that are not + # yet part of the library + from google.cloud.bigquery.dataset import Dataset + + path = '/projects/%s/datasets' % self.PROJECT + resource = { + 'datasetReference': + {'projectId': self.PROJECT, 'datasetId': self.DS_ID}, + 'newAlphaProperty': 'unreleased property', + } + creds = _make_credentials() + client = self._make_one(project=self.PROJECT, credentials=creds) + conn = client._connection = _make_connection(resource) + dataset = Dataset(client.dataset(self.DS_ID)) + dataset._properties['newAlphaProperty'] = 'unreleased property' + + dataset = client.create_dataset(dataset) + conn.api_request.assert_called_once_with( + method='POST', + path=path, + data={ + 'datasetReference': + {'projectId': self.PROJECT, 'datasetId': self.DS_ID}, + 'newAlphaProperty': 'unreleased property', + 'labels': {}, + } + ) + + self.assertEqual(dataset.dataset_id, self.DS_ID) + self.assertEqual(dataset.project, self.PROJECT) + self.assertEqual( + dataset._properties['newAlphaProperty'], 'unreleased property') + def test_create_table_w_day_partition(self): from google.cloud.bigquery.table import Table @@ -758,6 +792,36 @@ def test_update_dataset(self): req = conn.api_request.call_args self.assertEqual(req[1]['headers']['If-Match'], 'etag') + def test_update_dataset_w_custom_property(self): + # The library should handle sending properties to the API that are not + # yet part of the library + from google.cloud.bigquery.dataset import Dataset + + path = '/projects/%s/datasets/%s' % (self.PROJECT, self.DS_ID) + resource = { + 'datasetReference': + {'projectId': self.PROJECT, 'datasetId': self.DS_ID}, + 'newAlphaProperty': 'unreleased property', + } + creds = _make_credentials() + client = self._make_one(project=self.PROJECT, credentials=creds) + conn = client._connection = _make_connection(resource) + dataset = Dataset(client.dataset(self.DS_ID)) + dataset._properties['newAlphaProperty'] = 'unreleased property' + + dataset = client.update_dataset(dataset, ['newAlphaProperty']) + conn.api_request.assert_called_once_with( + method='PATCH', + data={'newAlphaProperty': 'unreleased property'}, + path=path, + headers=None, + ) + + self.assertEqual(dataset.dataset_id, self.DS_ID) + self.assertEqual(dataset.project, self.PROJECT) + self.assertEqual( + dataset._properties['newAlphaProperty'], 'unreleased property') + def test_update_table(self): from google.cloud.bigquery.table import Table, SchemaField diff --git a/packages/google-cloud-bigquery/tests/unit/test_dataset.py b/packages/google-cloud-bigquery/tests/unit/test_dataset.py index 33c38720beac..1dbf91f46e27 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_dataset.py +++ b/packages/google-cloud-bigquery/tests/unit/test_dataset.py @@ -84,6 +84,44 @@ def test__eq___type_mismatch(self): self.assertNotEqual(entry, object()) self.assertEqual(entry, mock.ANY) + def test_to_api_repr(self): + entry = self._make_one('OWNER', 'userByEmail', 'salmon@example.com') + resource = entry.to_api_repr() + exp_resource = {'role': 'OWNER', 'userByEmail': 'salmon@example.com'} + self.assertEqual(resource, exp_resource) + + def test_to_api_repr_view(self): + view = { + 'projectId': 'my-project', + 'datasetId': 'my_dataset', + 'tableId': 'my_table' + } + entry = self._make_one(None, 'view', view) + resource = entry.to_api_repr() + exp_resource = {'view': view} + self.assertEqual(resource, exp_resource) + + def test_from_api_repr(self): + resource = {'role': 'OWNER', 'userByEmail': 'salmon@example.com'} + entry = self._get_target_class().from_api_repr(resource) + self.assertEqual(entry.role, 'OWNER') + self.assertEqual(entry.entity_type, 'userByEmail') + self.assertEqual(entry.entity_id, 'salmon@example.com') + + def test_from_api_repr_w_unknown_entity_type(self): + resource = {'role': 'READER', 'unknown': 'UNKNOWN'} + with self.assertRaises(ValueError): + self._get_target_class().from_api_repr(resource) + + def test_from_api_repr_entries_w_extra_keys(self): + resource = { + 'role': 'READER', + 'specialGroup': 'projectReaders', + 'userByEmail': 'salmon@example.com', + } + with self.assertRaises(ValueError): + self._get_target_class().from_api_repr(resource) + class TestDatasetReference(unittest.TestCase): @@ -434,26 +472,33 @@ def test_from_api_repr_w_properties(self): dataset = klass.from_api_repr(RESOURCE) self._verify_resource_properties(dataset, RESOURCE) - def test__parse_access_entries_w_unknown_entity_type(self): - ACCESS = [ - {'role': 'READER', 'unknown': 'UNKNOWN'}, - ] + def test_to_api_repr_w_custom_field(self): dataset = self._make_one(self.DS_REF) - with self.assertRaises(ValueError): - dataset._parse_access_entries(ACCESS) + dataset._properties['newAlphaProperty'] = 'unreleased property' + resource = dataset.to_api_repr() - def test__parse_access_entries_w_extra_keys(self): - USER_EMAIL = 'phred@example.com' - ACCESS = [ - { - 'role': 'READER', - 'specialGroup': 'projectReaders', - 'userByEmail': USER_EMAIL, - }, - ] + exp_resource = { + 'datasetReference': self.DS_REF.to_api_repr(), + 'labels': {}, + 'newAlphaProperty': 'unreleased property', + } + self.assertEqual(resource, exp_resource) + + def test__build_resource_w_custom_field(self): + dataset = self._make_one(self.DS_REF) + dataset._properties['newAlphaProperty'] = 'unreleased property' + resource = dataset._build_resource(['newAlphaProperty']) + + exp_resource = { + 'newAlphaProperty': 'unreleased property' + } + self.assertEqual(resource, exp_resource) + + def test__build_resource_w_custom_field_not_in__properties(self): dataset = self._make_one(self.DS_REF) + dataset.bad = 'value' with self.assertRaises(ValueError): - dataset._parse_access_entries(ACCESS) + dataset._build_resource(['bad']) def test_table(self): from google.cloud.bigquery.table import TableReference From 5ddd04c4361bba599862d761cff97e34a3c0b9ec Mon Sep 17 00:00:00 2001 From: Alix Hamilton Date: Mon, 2 Apr 2018 15:02:45 -0700 Subject: [PATCH 0420/2016] BigQuery: Moves all Table property conversion logic into properties (#5125) * moves Table python/API type conversions into properties * handles empty schema * updates per comments and adds Table.to_api_repr() * updates docstrings to google style * removes properties with the same name as API field from _PROPERTY_TO_API_FIELD * _build_resource() is only for update * fixes variable name and comment * fixes type handling for non-string properties * adds partition_expiration to _PROPERTY_TO_API_FIELD --- .../google/cloud/bigquery/client.py | 10 +- .../google/cloud/bigquery/table.py | 685 +++++++----------- .../tests/unit/test_client.py | 118 ++- .../tests/unit/test_table.py | 56 ++ 4 files changed, 388 insertions(+), 481 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py index e44d062edca0..595c614f5217 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py @@ -272,12 +272,8 @@ def create_table(self, table): """ path = '/projects/%s/datasets/%s/tables' % ( table.project, table.dataset_id) - resource = table._build_resource(Table.all_fields) - doomed = [field for field in resource if resource[field] is None] - for field in doomed: - del resource[field] api_response = self._connection.api_request( - method='POST', path=path, data=resource) + method='POST', path=path, data=table.to_api_repr()) return Table.from_api_repr(api_response) def _call_api(self, retry, **kwargs): @@ -1111,7 +1107,7 @@ def insert_rows(self, table, rows, selected_fields=None, **kwargs): elif isinstance(table, TableReference): raise ValueError('need selected_fields with TableReference') elif isinstance(table, Table): - if len(table._schema) == 0: + if len(table.schema) == 0: raise ValueError(_TABLE_HAS_NO_SCHEMA) schema = table.schema else: @@ -1305,7 +1301,7 @@ def list_rows(self, table, selected_fields=None, max_results=None, elif isinstance(table, TableReference): raise ValueError('need selected_fields with TableReference') elif isinstance(table, Table): - if len(table._schema) == 0: + if len(table.schema) == 0: raise ValueError(_TABLE_HAS_NO_SCHEMA) schema = table.schema else: diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py index 6d16ff606a60..61be2004531b 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py @@ -28,12 +28,8 @@ from google.api_core.page_iterator import HTTPIterator -from google.cloud._helpers import _datetime_from_microseconds -from google.cloud._helpers import _millis_from_datetime -from google.cloud.bigquery._helpers import _item_to_row -from google.cloud.bigquery._helpers import _rows_page_start -from google.cloud.bigquery._helpers import _snake_to_camel_case -from google.cloud.bigquery._helpers import _field_to_index_mapping +import google.cloud._helpers +from google.cloud.bigquery import _helpers from google.cloud.bigquery.schema import SchemaField from google.cloud.bigquery.schema import _build_schema_resource from google.cloud.bigquery.schema import _parse_schema_resource @@ -58,12 +54,15 @@ def _reference_getter(table): def _view_use_legacy_sql_getter(table): - """Specifies whether to execute the view with Legacy or Standard SQL. + """bool: Specifies whether to execute the view with Legacy or Standard SQL. - If this table is not a view, None is returned. + This boolean specifies whether to execute the view with Legacy SQL + (:data:`True`) or Standard SQL (:data:`False`). The client side default is + :data:`False`. The server-side default is :data:`True`. If this table is + not a view, :data:`None` is returned. - Returns: - bool: True if the view is using legacy SQL, or None if not a view + Raises: + ValueError: For invalid value types. """ view = table._properties.get('view') if view is not None: @@ -92,7 +91,8 @@ def __init__(self, kms_key_name=None): def kms_key_name(self): """str: Resource ID of Cloud KMS key - Resource ID of Cloud KMS key or ``None`` if using default encryption. + Resource ID of Cloud KMS key or :data:`None` if using default + encryption. """ return self._properties.get('kmsKeyName') @@ -105,7 +105,7 @@ def from_api_repr(cls, resource): """Construct an encryption configuration from its API representation Args: - resource (dict): + resource (Dict[str, object]): An encryption configuration representation as returned from the API. @@ -118,10 +118,12 @@ def from_api_repr(cls, resource): return config def to_api_repr(self): - """Construct the API resource representation of this + """Construct the API resource representation of this encryption + configuration. Returns: - dict: Encryption configuration as represented as an API resource + Dict[str, object]: + Encryption configuration as represented as an API resource """ return copy.deepcopy(self._properties) @@ -132,11 +134,10 @@ class TableReference(object): See https://cloud.google.com/bigquery/docs/reference/rest/v2/tables - :type dataset_ref: :class:`google.cloud.bigquery.dataset.DatasetReference` - :param dataset_ref: a pointer to the dataset - - :type table_id: str - :param table_id: the ID of the table + Args: + dataset_ref (google.cloud.bigquery.dataset.DatasetReference): + A pointer to the dataset + table_id (str): The ID of the table """ def __init__(self, dataset_ref, table_id): @@ -146,38 +147,22 @@ def __init__(self, dataset_ref, table_id): @property def project(self): - """Project bound to the table. - - :rtype: str - :returns: the project (derived from the dataset reference). - """ + """str: Project bound to the table""" return self._project @property def dataset_id(self): - """ID of dataset containing the table. - - :rtype: str - :returns: the ID (derived from the dataset reference). - """ + """str: ID of dataset containing the table.""" return self._dataset_id @property def table_id(self): - """Table ID. - - :rtype: str - :returns: the table ID. - """ + """str: The table ID.""" return self._table_id @property def path(self): - """URL path for the table's APIs. - - :rtype: str - :returns: the path based on project, dataset and table IDs. - """ + """str: URL path for the table's APIs.""" return '/projects/%s/datasets/%s/tables/%s' % ( self._project, self._dataset_id, self._table_id) @@ -185,11 +170,13 @@ def path(self): def from_api_repr(cls, resource): """Factory: construct a table reference given its API representation - :type resource: dict - :param resource: table reference representation returned from the API + Args: + resource (Dict[str, object]): + Table reference representation returned from the API - :rtype: :class:`google.cloud.bigquery.table.TableReference` - :returns: Table reference parsed from ``resource``. + Returns: + google.cloud.bigquery.table.TableReference: + Table reference parsed from ``resource``. """ from google.cloud.bigquery.dataset import DatasetReference @@ -201,8 +188,8 @@ def from_api_repr(cls, resource): def to_api_repr(self): """Construct the API resource representation of this table reference. - :rtype: dict - :returns: Table reference as represented as an API resource + Returns: + Dict[str, object]: Table reference represented as an API resource """ return { 'projectId': self._project, @@ -216,7 +203,7 @@ def _key(self): Used to compute this instance's hashcode and evaluate equality. Returns: - tuple: The contents of this :class:`DatasetReference`. + Tuple[str]: The contents of this :class:`DatasetReference`. """ return ( self._project, @@ -245,117 +232,98 @@ class Table(object): See https://cloud.google.com/bigquery/docs/reference/rest/v2/tables - :type table_ref: :class:`google.cloud.bigquery.table.TableReference` - :param table_ref: a pointer to a table - - :type schema: list of :class:`~google.cloud.bigquery.schema.SchemaField` - :param schema: The table's schema + Args: + table_ref (google.cloud.bigquery.table.TableReference): + A pointer to a table + schema (List[google.cloud.bigquery.schema.SchemaField]): + The table's schema """ - _schema = None - - all_fields = [ - 'description', 'friendly_name', 'expires', 'location', - 'partitioning_type', 'view_use_legacy_sql', 'view_query', 'schema', - 'external_data_configuration', 'labels', 'encryption_configuration' - ] + _PROPERTY_TO_API_FIELD = { + 'friendly_name': 'friendlyName', + 'expires': 'expirationTime', + 'partitioning_type': 'timePartitioning', + 'partition_expiration': 'timePartitioning', + 'view_use_legacy_sql': 'view', + 'view_query': 'view', + 'external_data_configuration': 'externalDataConfiguration', + 'encryption_configuration': 'encryptionConfiguration', + } - def __init__(self, table_ref, schema=()): - self._project = table_ref.project - self._table_id = table_ref.table_id - self._dataset_id = table_ref.dataset_id - self._external_config = None - self._properties = {'labels': {}} + def __init__(self, table_ref, schema=None): + self._properties = { + 'tableReference': table_ref.to_api_repr(), + 'labels': {}, + } # Let the @property do validation. - self.schema = schema + if schema is not None: + self.schema = schema @property def project(self): - """Project bound to the table. - - :rtype: str - :returns: the project (derived from the dataset). - """ - return self._project + """str: Project bound to the table.""" + return self._properties['tableReference']['projectId'] @property def dataset_id(self): - """ID of dataset containing the table. - - :rtype: str - :returns: the ID (derived from the dataset). - """ - return self._dataset_id + """str: ID of dataset containing the table.""" + return self._properties['tableReference']['datasetId'] @property def table_id(self): - """ID of the table. - - :rtype: str - :returns: the table ID. - """ - return self._table_id + """str: ID of the table.""" + return self._properties['tableReference']['tableId'] reference = property(_reference_getter) @property def path(self): - """URL path for the table's APIs. - - :rtype: str - :returns: the path based on project, dataset and table IDs. - """ + """str: URL path for the table's APIs.""" return '/projects/%s/datasets/%s/tables/%s' % ( - self._project, self._dataset_id, self._table_id) + self.project, self.dataset_id, self.table_id) @property def schema(self): - """Table's schema. + """List[google.cloud.bigquery.schema.SchemaField]: Table's schema. - :rtype: list of :class:`~google.cloud.bigquery.schema.SchemaField` - :returns: fields describing the schema - """ - return list(self._schema) + Raises: + TypeError: If 'value' is not a sequence + ValueError: + If any item in the sequence is not a + :class:`~google.cloud.bigquery.schema.SchemaField` + """ + prop = self._properties.get('schema') + if not prop: + return [] + else: + return _parse_schema_resource(prop) @schema.setter def schema(self, value): - """Update table's schema - - :type value: list of :class:`~google.cloud.bigquery.schema.SchemaField` - :param value: fields describing the schema - - :raises: TypeError if 'value' is not a sequence, or ValueError if - any item in the sequence is not a SchemaField - """ if value is None: - self._schema = () + self._properties['schema'] = None elif not all(isinstance(field, SchemaField) for field in value): raise ValueError('Schema items must be fields') else: - self._schema = tuple(value) + self._properties['schema'] = { + 'fields': _build_schema_resource(value) + } @property def labels(self): - """Labels for the table. + """Dict[str, str]: Labels for the table. This method always returns a dict. To change a table's labels, modify the dict, then call ``Client.update_table``. To delete a - label, set its value to ``None`` before updating. + label, set its value to :data:`None` before updating. - :rtype: dict, {str -> str} - :returns: A dict of the the table's labels. + Raises: + ValueError: If ``value`` type is invalid. """ return self._properties.get('labels', {}) @labels.setter def labels(self, value): - """Update labels for the table. - - :type value: dict, {str -> str} - :param value: new labels - - :raises: ValueError for invalid value types. - """ if not isinstance(value, dict): raise ValueError("Pass a dict") self._properties['labels'] = value @@ -365,7 +333,7 @@ def encryption_configuration(self): """google.cloud.bigquery.table.EncryptionConfiguration: Custom encryption configuration for the table. - Custom encryption configuration (e.g., Cloud KMS keys) or ``None`` + Custom encryption configuration (e.g., Cloud KMS keys) or :data:`None` if using default encryption. See `protecting data with Cloud KMS keys @@ -386,103 +354,86 @@ def encryption_configuration(self, value): @property def created(self): - """Datetime at which the table was created. - - :rtype: ``datetime.datetime``, or ``NoneType`` - :returns: the creation time (None until set from the server). + """Union[datetime.datetime, None]: Datetime at which the table was + created (:data:`None` until set from the server). """ creation_time = self._properties.get('creationTime') if creation_time is not None: # creation_time will be in milliseconds. - return _datetime_from_microseconds(1000.0 * creation_time) + return google.cloud._helpers._datetime_from_microseconds( + 1000.0 * float(creation_time)) @property def etag(self): - """ETag for the table resource. - - :rtype: str, or ``NoneType`` - :returns: the ETag (None until set from the server). + """Union[str, None]: ETag for the table resource (:data:`None` until + set from the server). """ return self._properties.get('etag') @property def modified(self): - """Datetime at which the table was last modified. - - :rtype: ``datetime.datetime``, or ``NoneType`` - :returns: the modification time (None until set from the server). + """Union[datetime.datetime, None]: Datetime at which the table was last + modified (:data:`None` until set from the server). """ modified_time = self._properties.get('lastModifiedTime') if modified_time is not None: # modified_time will be in milliseconds. - return _datetime_from_microseconds(1000.0 * modified_time) + return google.cloud._helpers._datetime_from_microseconds( + 1000.0 * float(modified_time)) @property def num_bytes(self): - """The size of the table in bytes. - - :rtype: int, or ``NoneType`` - :returns: the byte count (None until set from the server). + """Union[int, None]: The size of the table in bytes (:data:`None` until + set from the server). """ - num_bytes_as_str = self._properties.get('numBytes') - if num_bytes_as_str is not None: - return int(num_bytes_as_str) + return _helpers._int_or_none(self._properties.get('numBytes')) @property def num_rows(self): - """The number of rows in the table. - - :rtype: int, or ``NoneType`` - :returns: the row count (None until set from the server). + """Union[int, None]: The number of rows in the table (:data:`None` + until set from the server). """ - num_rows_as_str = self._properties.get('numRows') - if num_rows_as_str is not None: - return int(num_rows_as_str) + return _helpers._int_or_none(self._properties.get('numRows')) @property def self_link(self): - """URL for the table resource. - - :rtype: str, or ``NoneType`` - :returns: the URL (None until set from the server). + """Union[str, None]: URL for the table resource (:data:`None` until set + from the server). """ return self._properties.get('selfLink') @property def full_table_id(self): - """ID for the table, in the form ``project_id:dataset_id.table_id``. + """Union[str, None]: ID for the table (:data:`None` until set from the + server). - :rtype: str, or ``NoneType`` - :returns: the full ID (None until set from the server). + In the format ``project_id:dataset_id.table_id``. """ return self._properties.get('id') @property def table_type(self): - """The type of the table. - - Possible values are "TABLE", "VIEW", or "EXTERNAL". + """Union[str, None]: The type of the table (:data:`None` until set from + the server). - :rtype: str, or ``NoneType`` - :returns: the URL (None until set from the server). + Possible values are ``'TABLE'``, ``'VIEW'``, or ``'EXTERNAL'``. """ return self._properties.get('type') @property def partitioning_type(self): - """Time partitioning of the table. - :rtype: str, or ``NoneType`` - :returns: Returns type if the table is partitioned, None otherwise. + """Union[str, None]: Time partitioning of the table if it is + partitioned (Defaults to :data:`None`). + + The only partitioning type that is currently supported is ``'DAY'``. + + Raises: + ValueError: If the value is not ``'DAY'`` or :data:`None`. """ return self._properties.get('timePartitioning', {}).get('type') @partitioning_type.setter def partitioning_type(self, value): - """Update the partitioning type of the table - - :type value: str - :param value: partitioning type only "DAY" is currently supported - """ if value not in ('DAY', None): raise ValueError("value must be one of ['DAY', None]") @@ -494,131 +445,106 @@ def partitioning_type(self, value): @property def partition_expiration(self): - """Expiration time in ms for a partition - :rtype: int, or ``NoneType`` - :returns: Returns the time in ms for partition expiration + """Union[int, None]: Expiration time in milliseconds for a partition. """ - return self._properties.get('timePartitioning', {}).get('expirationMs') + return _helpers._int_or_none( + self._properties.get('timePartitioning', {}).get('expirationMs')) @partition_expiration.setter def partition_expiration(self, value): - """Update the experation time in ms for a partition - - :type value: int - :param value: partition experiation time in milliseconds - """ if not isinstance(value, (int, type(None))): raise ValueError( - "must be an integer representing millisseconds or None") + "must be an integer representing milliseconds or None") if value is None: if 'timePartitioning' in self._properties: self._properties['timePartitioning'].pop('expirationMs') else: + api_repr = str(value) try: - self._properties['timePartitioning']['expirationMs'] = value + self._properties['timePartitioning']['expirationMs'] = api_repr except KeyError: self._properties['timePartitioning'] = {'type': 'DAY'} - self._properties['timePartitioning']['expirationMs'] = value + self._properties['timePartitioning']['expirationMs'] = api_repr @property def description(self): - """Description of the table. + """Union[str, None]: Description of the table (defaults to + :data:`None`). - :rtype: str, or ``NoneType`` - :returns: The description as set by the user, or None (the default). + Raises: + ValueError: For invalid value types. """ return self._properties.get('description') @description.setter def description(self, value): - """Update description of the table. - - :type value: str - :param value: (Optional) new description - - :raises: ValueError for invalid value types. - """ if not isinstance(value, six.string_types) and value is not None: raise ValueError("Pass a string, or None") self._properties['description'] = value @property def expires(self): - """Datetime at which the table will be removed. + """Union[datetime.datetime, None]: Datetime at which the table will be + deleted. - :rtype: ``datetime.datetime``, or ``NoneType`` - :returns: the expiration time, or None + Raises: + ValueError: For invalid value types. """ expiration_time = self._properties.get('expirationTime') if expiration_time is not None: # expiration_time will be in milliseconds. - return _datetime_from_microseconds(1000.0 * expiration_time) + return google.cloud._helpers._datetime_from_microseconds( + 1000.0 * float(expiration_time)) @expires.setter def expires(self, value): - """Update datetime at which the table will be removed. - - :type value: ``datetime.datetime`` - :param value: (Optional) the new expiration time, or None - """ if not isinstance(value, datetime.datetime) and value is not None: raise ValueError("Pass a datetime, or None") - self._properties['expirationTime'] = _millis_from_datetime(value) + value_ms = google.cloud._helpers._millis_from_datetime(value) + self._properties['expirationTime'] = _helpers._str_or_none(value_ms) @property def friendly_name(self): - """Title of the table. + """Union[str, None]: Title of the table (defaults to :data:`None`). - :rtype: str, or ``NoneType`` - :returns: The name as set by the user, or None (the default). + Raises: + ValueError: For invalid value types. """ return self._properties.get('friendlyName') @friendly_name.setter def friendly_name(self, value): - """Update title of the table. - - :type value: str - :param value: (Optional) new title - - :raises: ValueError for invalid value types. - """ if not isinstance(value, six.string_types) and value is not None: raise ValueError("Pass a string, or None") self._properties['friendlyName'] = value @property def location(self): - """Location in which the table is hosted. + """Union[str, None]: Location in which the table is hosted (defaults + to :data:`None`). - :rtype: str, or ``NoneType`` - :returns: The location as set by the user, or None (the default). + Raises: + ValueError: For invalid value types. """ return self._properties.get('location') @location.setter def location(self, value): - """Update location in which the table is hosted. - - :type value: str - :param value: (Optional) new location - - :raises: ValueError for invalid value types. - """ if not isinstance(value, six.string_types) and value is not None: raise ValueError("Pass a string, or None") self._properties['location'] = value @property def view_query(self): - """SQL query defining the table as a view. + """Union[str, None]: SQL query defining the table as a view (defaults + to :data:`None`). By default, the query is treated as Standard SQL. To use Legacy SQL, set view_use_legacy_sql to True. - :rtype: str, or ``NoneType`` - :returns: The query as set by the user, or None (the default). + Raises: + ValueError: For invalid value types. """ view = self._properties.get('view') if view is not None: @@ -626,13 +552,6 @@ def view_query(self): @view_query.setter def view_query(self, value): - """Update SQL query defining the table as a view. - - :type value: str - :param value: new query - - :raises: ValueError for invalid value types. - """ if not isinstance(value, six.string_types): raise ValueError("Pass a string") view = self._properties.get('view') @@ -653,17 +572,6 @@ def view_query(self): @view_use_legacy_sql.setter def view_use_legacy_sql(self, value): - """Update the view sub-property 'useLegacySql'. - - This boolean specifies whether to execute the view with Legacy SQL - (True) or Standard SQL (False). The default, if not specified, is - 'False'. - - :type value: bool - :param value: The boolean for view.useLegacySql - - :raises: ValueError for invalid value types. - """ if not isinstance(value, bool): raise ValueError("Pass a boolean") if self._properties.get('view') is None: @@ -672,10 +580,8 @@ def view_use_legacy_sql(self, value): @property def streaming_buffer(self): - """Information about a table's streaming buffer. - - :rtype: :class:`~google.cloud.bigquery.StreamingBuffer` - :returns: Streaming buffer information, returned from get_table. + """google.cloud.bigquery.StreamingBuffer: Information about a table's + streaming buffer. """ sb = self._properties.get('streamingBuffer') if sb is not None: @@ -683,39 +589,44 @@ def streaming_buffer(self): @property def external_data_configuration(self): - """Configuration for an external data source. + """Union[google.cloud.bigquery.ExternalConfig, None]: Configuration for + an external data source (defaults to :data:`None`). - If not set, None is returned. - - :rtype: :class:`~google.cloud.bigquery.ExternalConfig`, or ``NoneType`` - :returns: The external configuration, or None (the default). + Raises: + ValueError: For invalid value types. """ - return self._external_config + prop = self._properties.get('externalDataConfiguration') + if prop is not None: + prop = ExternalConfig.from_api_repr(prop) + return prop @external_data_configuration.setter def external_data_configuration(self, value): - """Sets the configuration for an external data source. - - :type value: - :class:`~google.cloud.bigquery.ExternalConfig`, or ``NoneType`` - :param value: The ExternalConfig, or None to unset. - """ if not (value is None or isinstance(value, ExternalConfig)): raise ValueError("Pass an ExternalConfig or None") - self._external_config = value + api_repr = value + if value is not None: + api_repr = value.to_api_repr() + self._properties['externalDataConfiguration'] = api_repr @classmethod def from_api_repr(cls, resource): - """Factory: construct a table given its API representation + """Factory: construct a table given its API representation - :type resource: dict - :param resource: table resource representation returned from the API + Args: + resource (Dict[str, object]): + Table resource representation from the API + dataset (google.cloud.bigquery.dataset.Dataset): + The dataset containing the table. - :type dataset: :class:`google.cloud.bigquery.dataset.Dataset` - :param dataset: The dataset containing the table. + Returns: + google.cloud.bigquery.table.Table: Table parsed from ``resource``. - :rtype: :class:`google.cloud.bigquery.table.Table` - :returns: Table parsed from ``resource``. + Raises: + KeyError: + If the ``resource`` lacks the key ``'tableReference'``, or if + the ``dict`` stored within the key ``'tableReference'`` lacks + the keys ``'tableId'``, ``'projectId'``, or ``'datasetId'``. """ from google.cloud.bigquery import dataset @@ -729,100 +640,33 @@ def from_api_repr(cls, resource): dataset_ref = dataset.DatasetReference(project_id, dataset_id) table = cls(dataset_ref.table(table_id)) - table._set_properties(resource) + table._properties = resource return table - def _set_properties(self, api_response): - """Update properties from resource in body of ``api_response`` + def to_api_repr(self): + """Constructs the API resource of this table - :type api_response: dict - :param api_response: response returned from an API call + Returns: + Dict[str, object]: Table represented as an API resource """ - self._properties.clear() - cleaned = api_response.copy() - schema = cleaned.pop('schema', {'fields': ()}) - self.schema = _parse_schema_resource(schema) - ec = cleaned.pop('externalDataConfiguration', None) - if ec: - self.external_data_configuration = ExternalConfig.from_api_repr(ec) - if 'creationTime' in cleaned: - cleaned['creationTime'] = float(cleaned['creationTime']) - if 'lastModifiedTime' in cleaned: - cleaned['lastModifiedTime'] = float(cleaned['lastModifiedTime']) - if 'expirationTime' in cleaned: - cleaned['expirationTime'] = float(cleaned['expirationTime']) - if 'labels' not in cleaned: - cleaned['labels'] = {} - self._properties.update(cleaned) - - def _populate_expires_resource(self, resource): - resource['expirationTime'] = _millis_from_datetime(self.expires) - - def _populate_partitioning_type_resource(self, resource): - resource['timePartitioning'] = self._properties.get('timePartitioning') - - def _populate_view_use_legacy_sql_resource(self, resource): - if 'view' not in resource: - resource['view'] = {} - resource['view']['useLegacySql'] = self.view_use_legacy_sql - - def _populate_view_query_resource(self, resource): - if self.view_query is None: - resource['view'] = None - return - if 'view' not in resource: - resource['view'] = {} - resource['view']['query'] = self.view_query - - def _populate_schema_resource(self, resource): - if not self._schema: - resource['schema'] = None - else: - resource['schema'] = { - 'fields': _build_schema_resource(self._schema), - } - - def _populate_external_config(self, resource): - if not self.external_data_configuration: - resource['externalDataConfiguration'] = None - else: - resource['externalDataConfiguration'] = ExternalConfig.to_api_repr( - self.external_data_configuration) - - def _populate_encryption_configuration(self, resource): - if not self.encryption_configuration: - resource['encryptionConfiguration'] = None - else: - encryptionConfig = EncryptionConfiguration.to_api_repr( - self.encryption_configuration) - resource['encryptionConfiguration'] = encryptionConfig - - custom_resource_fields = { - 'expires': _populate_expires_resource, - 'partitioning_type': _populate_partitioning_type_resource, - 'view_query': _populate_view_query_resource, - 'view_use_legacy_sql': _populate_view_use_legacy_sql_resource, - 'schema': _populate_schema_resource, - 'external_data_configuration': _populate_external_config, - 'encryption_configuration': _populate_encryption_configuration - } + return copy.deepcopy(self._properties) def _build_resource(self, filter_fields): - """Generate a resource for ``create`` or ``update``.""" - resource = { - 'tableReference': { - 'projectId': self._project, - 'datasetId': self._dataset_id, - 'tableId': self.table_id}, - } - for f in filter_fields: - if f in self.custom_resource_fields: - self.custom_resource_fields[f](self, resource) + """Generate a resource for ``update``.""" + partial = {} + for filter_field in filter_fields: + api_field = self._PROPERTY_TO_API_FIELD.get(filter_field) + if api_field is None and filter_field not in self._properties: + raise ValueError('No Table property %s' % filter_field) + elif api_field is not None: + partial[api_field] = self._properties.get(api_field) else: - api_field = _snake_to_camel_case(f) - resource[api_field] = getattr(self, f) - return resource + # allows properties that are not defined in the library + # and properties that have the same name as API resource key + partial[filter_field] = self._properties[filter_field] + + return partial class TableListItem(object): @@ -839,7 +683,7 @@ class TableListItem(object): Args: - resource (dict): + resource (Dict[str, object]): A table-like resource object from a table list response. A ``tableReference`` property is required. @@ -866,96 +710,69 @@ def __init__(self, resource): @property def project(self): - """The project ID of the project this table belongs to. - - Returns: - str: the project ID of the table. - """ + """str: Project bound to the table.""" return self._properties['tableReference']['projectId'] @property def dataset_id(self): - """The dataset ID of the dataset this table belongs to. - - Returns: - str: the dataset ID of the table. - """ + """str: ID of dataset containing the table.""" return self._properties['tableReference']['datasetId'] @property def table_id(self): - """The table ID. - - Returns: - str: the table ID. - """ + """str: ID of the table.""" return self._properties['tableReference']['tableId'] reference = property(_reference_getter) @property def labels(self): - """Labels for the table. + """Dict[str, str]: Labels for the table. This method always returns a dict. To change a table's labels, modify the dict, then call ``Client.update_table``. To delete a - label, set its value to ``None`` before updating. - - Returns: - Map[str, str]: A dictionary of the the table's labels + label, set its value to :data:`None` before updating. """ return self._properties.get('labels', {}) @property def full_table_id(self): - """ID for the table, in the form ``project_id:dataset_id.table_id``. + """Union[str, None]: ID for the table (:data:`None` until set from the + server). - Returns: - str: The fully-qualified ID of the table + In the format ``project_id:dataset_id.table_id``. """ return self._properties.get('id') @property def table_type(self): - """The type of the table. - - Possible values are "TABLE", "VIEW", or "EXTERNAL". + """Union[str, None]: The type of the table (:data:`None` until set from + the server). - Returns: - str: The kind of table + Possible values are ``'TABLE'``, ``'VIEW'``, or ``'EXTERNAL'``. """ return self._properties.get('type') @property def partitioning_type(self): - """Time partitioning of the table. + """Union[str, None]: Time partitioning of the table if it is + partitioned (Defaults to :data:`None`). - Returns: - str: - Type of partitioning if the table is partitioned, None - otherwise. + The only partitioning type that is currently supported is ``'DAY'``. """ return self._properties.get('timePartitioning', {}).get('type') @property def partition_expiration(self): - """Expiration time in ms for a partition - - Returns: - int: The time in ms for partition expiration + """Union[int, None]: Expiration time in milliseconds for a partition. """ expiration = self._properties.get( 'timePartitioning', {}).get('expirationMs') - if expiration is not None: - return int(expiration) + return _helpers._int_or_none(expiration) @property def friendly_name(self): - """Title of the table. - - Returns: - str: The name as set by the user, or None (the default) - """ + """Union[str, None]: Title of the table (defaults to :data:`None`).""" return self._properties.get('friendlyName') view_use_legacy_sql = property(_view_use_legacy_sql_getter) @@ -964,17 +781,20 @@ def friendly_name(self): def _row_from_mapping(mapping, schema): """Convert a mapping to a row tuple using the schema. - :type mapping: dict - :param mapping: Mapping of row data: must contain keys for all - required fields in the schema. Keys which do not correspond - to a field in the schema are ignored. + Args: + mapping (Dict[str, object]) + Mapping of row data: must contain keys for all required fields in + the schema. Keys which do not correspond to a field in the schema + are ignored. + schema (List[google.cloud.bigquery.schema.SchemaField]): + The schema of the table destination for the rows - :type schema: list of :class:`~google.cloud.bigquery.schema.SchemaField` - :param schema: The schema of the table destination for the rows + Returns: + Tuple[object]: + Tuple whose elements are ordered according to the schema. - :rtype: tuple - :returns: Tuple whose elements are ordered according to the schema. - :raises: ValueError if schema is empty + Raises: + ValueError: If schema is empty. """ if len(schema) == 0: raise ValueError(_TABLE_HAS_NO_SCHEMA) @@ -998,16 +818,18 @@ class StreamingBuffer(object): See https://cloud.google.com/bigquery/streaming-data-into-bigquery. - :type resource: dict - :param resource: streaming buffer representation returned from the API + Args: + resource (Dict[str, object]): + streaming buffer representation returned from the API """ def __init__(self, resource): self.estimated_bytes = int(resource['estimatedBytes']) self.estimated_rows = int(resource['estimatedRows']) # time is in milliseconds since the epoch. - self.oldest_entry_time = _datetime_from_microseconds( - 1000.0 * int(resource['oldestEntryTime'])) + self.oldest_entry_time = ( + google.cloud._helpers._datetime_from_microseconds( + 1000.0 * int(resource['oldestEntryTime']))) class Row(object): @@ -1016,11 +838,10 @@ class Row(object): Values can be accessed by position (index), by key like a dict, or as properties. - :type values: tuple - :param values: the row values - - :type field_to_index: dict - :param field_to_index: a mapping from schema field names to indexes + Args: + values (Sequence[object]): The row values + field_to_index (Dict[str, int]): + A mapping from schema field names to indexes """ # Choose unusual field names to try to avoid conflict with schema fields. @@ -1042,7 +863,7 @@ def keys(self): """Return the keys for using a row as a dict. Returns: - Sequence[str]: The keys corresponding to the columns of a row + Iterable[str]: The keys corresponding to the columns of a row Examples: @@ -1055,7 +876,7 @@ def items(self): """Return items as ``(key, value)`` pairs. Returns: - Sequence[Tuple[str, object]]: + Iterable[Tuple[str, object]]: The ``(key, value)`` pairs representing this row. Examples: @@ -1085,7 +906,7 @@ def get(self, key, default=None): >>> Row(('a', 'b'), {'x': 0, 'y': 1}).get('x') 'a' - The default value is ``None`` when the key does not exist. + The default value is :data:`None` when the key does not exist. >>> Row(('a', 'b'), {'x': 0, 'y': 1}).get('z') None @@ -1148,9 +969,10 @@ class RowIterator(HTTPIterator): path (str): The method path to query for the list of items. page_token (str): A token identifying a page in a result set to start fetching results from. - max_results (int): (Optional) The maximum number of results to fetch. - page_size (int): (Optional) The number of items to return per page. - extra_params (dict): Extra query string parameters for the API call. + max_results (int, optional): The maximum number of results to fetch. + page_size (int, optional): The number of items to return per page. + extra_params (Dict[str, object]): + Extra query string parameters for the API call. .. autoattribute:: pages """ @@ -1158,12 +980,12 @@ class RowIterator(HTTPIterator): def __init__(self, client, api_request, path, schema, page_token=None, max_results=None, page_size=None, extra_params=None): super(RowIterator, self).__init__( - client, api_request, path, item_to_value=_item_to_row, + client, api_request, path, item_to_value=_helpers._item_to_row, items_key='rows', page_token=page_token, max_results=max_results, - extra_params=extra_params, page_start=_rows_page_start, + extra_params=extra_params, page_start=_helpers._rows_page_start, next_token='pageToken') self._schema = schema - self._field_to_index = _field_to_index_mapping(schema) + self._field_to_index = _helpers._field_to_index_mapping(schema) self._total_rows = None self._page_size = page_size @@ -1171,7 +993,8 @@ def _get_next_page_response(self): """Requests the next page from the path provided. Returns: - dict: The parsed JSON response of the next page's contents. + Dict[str, object]: + The parsed JSON response of the next page's contents. """ params = self._get_query_params() if self._page_size is not None: @@ -1183,33 +1006,25 @@ def _get_next_page_response(self): @property def schema(self): - """Schema for the table containing the rows - - Returns: - list of :class:`~google.cloud.bigquery.schema.SchemaField`: - fields describing the schema - """ + """List[google.cloud.bigquery.schema.SchemaField]: Table's schema.""" return list(self._schema) @property def total_rows(self): - """The total number of rows in the table. - - Returns: - int: the row count. - """ + """int: The total number of rows in the table.""" return self._total_rows def to_dataframe(self): """Create a pandas DataFrame from the query results. Returns: - A :class:`~pandas.DataFrame` populated with row data and column - headers from the query results. The column headers are derived - from the destination table's schema. + pandas.DataFrame: + A :class:`~pandas.DataFrame` populated with row data and column + headers from the query results. The column headers are derived + from the destination table's schema. Raises: - ValueError: If the `pandas` library cannot be imported. + ValueError: If the :mod:`pandas` library cannot be imported. """ if pandas is None: diff --git a/packages/google-cloud-bigquery/tests/unit/test_client.py b/packages/google-cloud-bigquery/tests/unit/test_client.py index 091250ab8d77..28d53657f82c 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_client.py +++ b/packages/google-cloud-bigquery/tests/unit/test_client.py @@ -499,6 +499,46 @@ def test_create_table_w_day_partition(self): self.assertEqual(table.partitioning_type, "DAY") self.assertEqual(got.table_id, self.TABLE_ID) + def test_create_table_w_custom_property(self): + # The library should handle sending properties to the API that are not + # yet part of the library + from google.cloud.bigquery.table import Table + + path = 'projects/%s/datasets/%s/tables' % ( + self.PROJECT, self.DS_ID) + creds = _make_credentials() + client = self._make_one(project=self.PROJECT, credentials=creds) + resource = { + 'id': '%s:%s:%s' % (self.PROJECT, self.DS_ID, self.TABLE_ID), + 'tableReference': { + 'projectId': self.PROJECT, + 'datasetId': self.DS_ID, + 'tableId': self.TABLE_ID + }, + 'newAlphaProperty': 'unreleased property', + } + conn = client._connection = _make_connection(resource) + table = Table(self.TABLE_REF) + table._properties['newAlphaProperty'] = 'unreleased property' + + got = client.create_table(table) + + conn.api_request.assert_called_once_with( + method='POST', + path='/%s' % path, + data={ + 'tableReference': { + 'projectId': self.PROJECT, + 'datasetId': self.DS_ID, + 'tableId': self.TABLE_ID + }, + 'newAlphaProperty': 'unreleased property', + 'labels': {}, + }) + self.assertEqual( + got._properties['newAlphaProperty'], 'unreleased property') + self.assertEqual(got.table_id, self.TABLE_ID) + def test_create_table_w_encryption_configuration(self): from google.cloud.bigquery.table import EncryptionConfiguration from google.cloud.bigquery.table import Table @@ -567,7 +607,7 @@ def test_create_table_w_day_partition_and_expire(self): 'datasetId': self.DS_ID, 'tableId': self.TABLE_ID }, - 'timePartitioning': {'type': 'DAY', 'expirationMs': 100}, + 'timePartitioning': {'type': 'DAY', 'expirationMs': '100'}, 'labels': {}, }) self.assertEqual(table.partitioning_type, "DAY") @@ -873,11 +913,6 @@ def test_update_table(self): table, ['schema', 'description', 'friendly_name', 'labels']) sent = { - 'tableReference': { - 'projectId': self.PROJECT, - 'datasetId': self.DS_ID, - 'tableId': self.TABLE_ID - }, 'schema': { 'fields': [ { @@ -914,6 +949,37 @@ def test_update_table(self): req = conn.api_request.call_args self.assertEqual(req[1]['headers']['If-Match'], 'etag') + def test_update_table_w_custom_property(self): + from google.cloud.bigquery.table import Table + + path = 'projects/%s/datasets/%s/tables/%s' % ( + self.PROJECT, self.DS_ID, self.TABLE_ID) + resource = { + 'id': '%s:%s:%s' % (self.PROJECT, self.DS_ID, self.TABLE_ID), + 'tableReference': { + 'projectId': self.PROJECT, + 'datasetId': self.DS_ID, + 'tableId': self.TABLE_ID + }, + 'newAlphaProperty': 'unreleased property', + } + creds = _make_credentials() + client = self._make_one(project=self.PROJECT, credentials=creds) + conn = client._connection = _make_connection(resource) + table = Table(self.TABLE_REF) + table._properties['newAlphaProperty'] = 'unreleased property' + + updated_table = client.update_table(table, ['newAlphaProperty']) + + conn.api_request.assert_called_once_with( + method='PATCH', + path='/%s' % path, + data={'newAlphaProperty': 'unreleased property'}, + headers=None) + self.assertEqual( + updated_table._properties['newAlphaProperty'], + 'unreleased property') + def test_update_table_only_use_legacy_sql(self): from google.cloud.bigquery.table import Table @@ -939,14 +1005,7 @@ def test_update_table_only_use_legacy_sql(self): conn.api_request.assert_called_once_with( method='PATCH', path='/%s' % path, - data={ - 'tableReference': { - 'projectId': self.PROJECT, - 'datasetId': self.DS_ID, - 'tableId': self.TABLE_ID - }, - 'view': {'useLegacySql': True} - }, + data={'view': {'useLegacySql': True}}, headers=None) self.assertEqual( updated_table.view_use_legacy_sql, table.view_use_legacy_sql) @@ -1011,14 +1070,9 @@ def test_update_table_w_query(self): method='PATCH', path='/%s' % path, data={ - 'tableReference': { - 'projectId': self.PROJECT, - 'datasetId': self.DS_ID, - 'tableId': self.TABLE_ID - }, 'view': {'query': query, 'useLegacySql': True}, 'location': location, - 'expirationTime': _millis(exp_time), + 'expirationTime': str(_millis(exp_time)), 'schema': schema_resource, }, headers=None) @@ -1049,8 +1103,8 @@ def test_update_table_w_schema_None(self): 'tableReference': { 'projectId': self.PROJECT, 'datasetId': self.DS_ID, - 'tableId': self.TABLE_ID}, - 'schema': {'fields': []}, + 'tableId': self.TABLE_ID, + }, } creds = _make_credentials() client = self._make_one(project=self.PROJECT, credentials=creds) @@ -1063,17 +1117,10 @@ def test_update_table_w_schema_None(self): self.assertEqual(len(conn.api_request.call_args_list), 2) req = conn.api_request.call_args_list[1] self.assertEqual(req[1]['method'], 'PATCH') - sent = { - 'tableReference': { - 'projectId': self.PROJECT, - 'datasetId': self.DS_ID, - 'tableId': self.TABLE_ID - }, - 'schema': None - } + sent = {'schema': None} self.assertEqual(req[1]['data'], sent) self.assertEqual(req[1]['path'], '/%s' % path) - self.assertEqual(updated_table.schema, table.schema) + self.assertEqual(len(updated_table.schema), 0) def test_update_table_delete_property(self): from google.cloud.bigquery.table import Table @@ -1116,14 +1163,7 @@ def test_update_table_delete_property(self): req = conn.api_request.call_args_list[1] self.assertEqual(req[1]['method'], 'PATCH') self.assertEqual(req[1]['path'], '/%s' % path) - sent = { - 'tableReference': { - 'projectId': self.PROJECT, - 'datasetId': self.DS_ID, - 'tableId': self.TABLE_ID - }, - 'description': None, - } + sent = {'description': None} self.assertEqual(req[1]['data'], sent) self.assertIsNone(table3.description) diff --git a/packages/google-cloud-bigquery/tests/unit/test_table.py b/packages/google-cloud-bigquery/tests/unit/test_table.py index e703b1c9eef5..7482e9f88ea3 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_table.py +++ b/packages/google-cloud-bigquery/tests/unit/test_table.py @@ -597,6 +597,29 @@ def test_view_use_legacy_sql_setter(self): self.assertEqual(table.view_use_legacy_sql, True) self.assertEqual(table.view_query, 'select * from foo') + def test_external_data_configuration_setter(self): + from google.cloud.bigquery.external_config import ExternalConfig + + external_config = ExternalConfig('CSV') + dataset = DatasetReference(self.PROJECT, self.DS_ID) + table_ref = dataset.table(self.TABLE_NAME) + table = self._make_one(table_ref) + + table.external_data_configuration = external_config + + self.assertEqual( + table.external_data_configuration.source_format, + external_config.source_format) + + def test_external_data_configuration_setter_none(self): + dataset = DatasetReference(self.PROJECT, self.DS_ID) + table_ref = dataset.table(self.TABLE_NAME) + table = self._make_one(table_ref) + + table.external_data_configuration = None + + self.assertIsNone(table.external_data_configuration) + def test_external_data_configuration_setter_bad_value(self): dataset = DatasetReference(self.PROJECT, self.DS_ID) table_ref = dataset.table(self.TABLE_NAME) @@ -667,6 +690,39 @@ def test_from_api_with_encryption(self): table = klass.from_api_repr(RESOURCE) self._verifyResourceProperties(table, RESOURCE) + def test_to_api_repr_w_custom_field(self): + dataset = DatasetReference(self.PROJECT, self.DS_ID) + table_ref = dataset.table(self.TABLE_NAME) + table = self._make_one(table_ref) + table._properties['newAlphaProperty'] = 'unreleased property' + resource = table.to_api_repr() + + exp_resource = { + 'tableReference': table_ref.to_api_repr(), + 'labels': {}, + 'newAlphaProperty': 'unreleased property' + } + self.assertEqual(resource, exp_resource) + + def test__build_resource_w_custom_field(self): + dataset = DatasetReference(self.PROJECT, self.DS_ID) + table_ref = dataset.table(self.TABLE_NAME) + table = self._make_one(table_ref) + table._properties['newAlphaProperty'] = 'unreleased property' + resource = table._build_resource(['newAlphaProperty']) + + exp_resource = { + 'newAlphaProperty': 'unreleased property' + } + self.assertEqual(resource, exp_resource) + + def test__build_resource_w_custom_field_not_in__properties(self): + dataset = DatasetReference(self.PROJECT, self.DS_ID) + table = self._make_one(dataset.table(self.TABLE_NAME)) + table.bad = 'value' + with self.assertRaises(ValueError): + table._build_resource(['bad']) + def test_partition_type_setter_bad_type(self): from google.cloud.bigquery.table import SchemaField From 6bb7dc1fcc4a23c12007032f8f888e25b581ad0d Mon Sep 17 00:00:00 2001 From: Alix Hamilton Date: Wed, 4 Apr 2018 09:48:36 -0700 Subject: [PATCH 0421/2016] Specifies IPython version 5.5 when running Python 2.7 tests (#5145) --- packages/google-cloud-bigquery/nox.py | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/packages/google-cloud-bigquery/nox.py b/packages/google-cloud-bigquery/nox.py index 5f4741e61490..07c5ea80408f 100644 --- a/packages/google-cloud-bigquery/nox.py +++ b/packages/google-cloud-bigquery/nox.py @@ -35,12 +35,20 @@ def default(session): run the tests. """ # Install all test dependencies, then install this package in-place. - session.install('mock', 'pytest', 'pytest-cov', 'ipython', *LOCAL_DEPS) + session.install('mock', 'pytest', 'pytest-cov', *LOCAL_DEPS) + + # Pandas does not support Python 3.4 if session.interpreter == 'python3.4': session.install('-e', '.') else: session.install('-e', '.[pandas]') + # IPython does not support Python 2 after version 5.x + if session.interpreter == 'python2.7': + session.install('ipython==5.5') + else: + session.install('ipython') + # Run py.test against the unit tests. session.run( 'py.test', @@ -87,13 +95,19 @@ def system(session, py): # Install all test dependencies, then install this package into the # virtualenv's dist-packages. - session.install('mock', 'pytest', 'ipython', *LOCAL_DEPS) + session.install('mock', 'pytest', *LOCAL_DEPS) session.install( os.path.join('..', 'storage'), os.path.join('..', 'test_utils'), ) session.install('-e', '.[pandas]') + # IPython does not support Python 2 after version 5.x + if session.interpreter == 'python2.7': + session.install('ipython==5.5') + else: + session.install('ipython') + # Run py.test against the system tests. session.run( 'py.test', From 8e259968519663f1a42644cff36353356a51b95f Mon Sep 17 00:00:00 2001 From: Christopher Wilcox Date: Wed, 4 Apr 2018 13:11:36 -0700 Subject: [PATCH 0422/2016] Release bigquery 0.32.0 (#5147) * Release 0.32.0 * Update changelog.md Modify changelog to have matching tense and separate change types. --- packages/google-cloud-bigquery/CHANGELOG.md | 33 +++++++++++++++++++++ packages/google-cloud-bigquery/setup.py | 2 +- 2 files changed, 34 insertions(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/CHANGELOG.md b/packages/google-cloud-bigquery/CHANGELOG.md index c832f39ada80..038e378098b0 100644 --- a/packages/google-cloud-bigquery/CHANGELOG.md +++ b/packages/google-cloud-bigquery/CHANGELOG.md @@ -4,6 +4,39 @@ [1]: https://pypi.org/project/google-cloud-bigquery/#history +## 0.32.0 + +### :warning: Interface changes + +- Use `job.configuration` resource for XXXJobConfig classes (#5036) + +### Interface additions + +- Add `page_size` parameter for `list_rows` and use in DB-API for `arraysize` (#4931) +- Add IPython magics for running queries (#4983) + +### Documentation + +- Add job string constant parameters in init and snippets documentation (#4987) + +### Internal / Testing changes + +- Specify IPython version 5.5 when running Python 2.7 tests (#5145) +- Move all Dataset property conversion logic into properties (#5130) +- Remove unnecessary _Table class from test_job.py (#5126) +- Use explicit bytes to initialize 'BytesIO'. (#5116) +- Make SchemaField be able to include description via from_api_repr method (#5114) +- Remove _ApiResourceProperty class (#5107) +- Add dev version for 0.32.0 release (#5105) +- StringIO to BytesIO (#5101) +- Shorten snippets test name (#5091) +- Don't use `selected_fields` for listing query result rows (#5072) +- Add location property to job classes. (#5071) +- Use autospec for Connection in tests. (#5066) +- Add Parquet SourceFormat and samples (#5057) +- Remove test_load_table_from_uri_w_autodetect_schema_then_get_job because of duplicate test in snippets (#5004) +- Fix encoding variable and strings UTF-8 and ISO-8859-1 difference documentation (#4990) + ## 0.31.0 ### Interface additions diff --git a/packages/google-cloud-bigquery/setup.py b/packages/google-cloud-bigquery/setup.py index 52e978836444..e04a59f88a57 100644 --- a/packages/google-cloud-bigquery/setup.py +++ b/packages/google-cloud-bigquery/setup.py @@ -22,7 +22,7 @@ name = 'google-cloud-bigquery' description = 'Google BigQuery API client library' -version = '0.32.0.dev1' +version = '0.32.0' # Should be one of: # 'Development Status :: 3 - Alpha' # 'Development Status :: 4 - Beta' From 77ade6f0c6e4567fa65f67413f50596d9317d315 Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Wed, 11 Apr 2018 11:15:34 -0700 Subject: [PATCH 0423/2016] Remove deprecated Client methods (#5182) Remove the `Client.list_dataset_tables()` method (replaced by `Client.list_tables()`), the `Client.create_rows()` method (replaced by `Client.insert_rows()`), and the `Client.create_rows_json()` method (replaced by `Client.insert_rows_json()`). --- .../google/cloud/bigquery/client.py | 34 +---------- .../tests/unit/test_client.py | 56 +++++++++---------- 2 files changed, 29 insertions(+), 61 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py index 595c614f5217..b0f56f3998ac 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py @@ -20,7 +20,6 @@ import functools import os import uuid -import warnings import six @@ -437,16 +436,6 @@ def list_tables(self, dataset, max_results=None, page_token=None, result.dataset = dataset return result - def list_dataset_tables(self, *args, **kwargs): - """DEPRECATED: List tables in the dataset. - - Use :func:`~google.cloud.bigquery.client.Client.list_tables` instead. - """ - warnings.warn( - 'list_dataset_tables is deprecated, use list_tables instead.', - DeprecationWarning) - return self.list_tables(*args, **kwargs) - def delete_dataset(self, dataset, delete_contents=False, retry=DEFAULT_RETRY): """Delete a dataset. @@ -1093,7 +1082,7 @@ def insert_rows(self, table, rows, selected_fields=None, **kwargs): :type kwargs: dict :param kwargs: Keyword arguments to - :meth:`~google.cloud.bigquery.client.Client.create_rows_json` + :meth:`~google.cloud.bigquery.client.Client.insert_rows_json` :rtype: list of mappings :returns: One mapping per row with insert errors: the "index" key @@ -1217,27 +1206,6 @@ def insert_rows_json(self, table, json_rows, row_ids=None, return errors - def create_rows(self, *args, **kwargs): - """DEPRECATED: Insert rows into a table via the streaming API. - - Use :func:`~google.cloud.bigquery.client.Client.insert_rows` instead. - """ - warnings.warn( - 'create_rows is deprecated, use insert_rows instead.', - DeprecationWarning) - return self.insert_rows(*args, **kwargs) - - def create_rows_json(self, *args, **kwargs): - """DEPRECATED: Insert rows into a table without type conversions. - - Use :func:`~google.cloud.bigquery.client.Client.insert_rows_json` - instead. - """ - warnings.warn( - 'create_rows_json is deprecated, use insert_rows_json instead.', - DeprecationWarning) - return self.insert_rows_json(*args, **kwargs) - def list_rows(self, table, selected_fields=None, max_results=None, page_token=None, start_index=None, page_size=None, retry=DEFAULT_RETRY): diff --git a/packages/google-cloud-bigquery/tests/unit/test_client.py b/packages/google-cloud-bigquery/tests/unit/test_client.py index 28d53657f82c..d7261d4424ed 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_client.py +++ b/packages/google-cloud-bigquery/tests/unit/test_client.py @@ -1167,7 +1167,7 @@ def test_update_table_delete_property(self): self.assertEqual(req[1]['data'], sent) self.assertIsNone(table3.description) - def test_list_dataset_tables_empty(self): + def test_list_tables_empty(self): path = '/projects/{}/datasets/{}/tables'.format( self.PROJECT, self.DS_ID) creds = _make_credentials() @@ -1175,7 +1175,7 @@ def test_list_dataset_tables_empty(self): conn = client._connection = _make_connection({}) dataset = client.dataset(self.DS_ID) - iterator = client.list_dataset_tables(dataset) + iterator = client.list_tables(dataset) self.assertIs(iterator.dataset, dataset) page = six.next(iterator.pages) tables = list(page) @@ -1186,7 +1186,7 @@ def test_list_dataset_tables_empty(self): conn.api_request.assert_called_once_with( method='GET', path=path, query_params={}) - def test_list_dataset_tables_defaults(self): + def test_list_tables_defaults(self): from google.cloud.bigquery.table import TableListItem TABLE_1 = 'table_one' @@ -1216,7 +1216,7 @@ def test_list_dataset_tables_defaults(self): conn = client._connection = _make_connection(DATA) dataset = client.dataset(self.DS_ID) - iterator = client.list_dataset_tables(dataset) + iterator = client.list_tables(dataset) self.assertIs(iterator.dataset, dataset) page = six.next(iterator.pages) tables = list(page) @@ -1232,7 +1232,7 @@ def test_list_dataset_tables_defaults(self): conn.api_request.assert_called_once_with( method='GET', path='/%s' % PATH, query_params={}) - def test_list_dataset_tables_explicit(self): + def test_list_tables_explicit(self): from google.cloud.bigquery.table import TableListItem TABLE_1 = 'table_one' @@ -1261,7 +1261,7 @@ def test_list_dataset_tables_explicit(self): conn = client._connection = _make_connection(DATA) dataset = client.dataset(self.DS_ID) - iterator = client.list_dataset_tables( + iterator = client.list_tables( dataset, max_results=3, page_token=TOKEN) self.assertIs(iterator.dataset, dataset) page = six.next(iterator.pages) @@ -1280,11 +1280,11 @@ def test_list_dataset_tables_explicit(self): path='/%s' % PATH, query_params={'maxResults': 3, 'pageToken': TOKEN}) - def test_list_dataset_tables_wrong_type(self): + def test_list_tables_wrong_type(self): creds = _make_credentials() client = self._make_one(project=self.PROJECT, credentials=creds) with self.assertRaises(TypeError): - client.list_dataset_tables(client.dataset(self.DS_ID).table("foo")) + client.list_tables(client.dataset(self.DS_ID).table("foo")) def test_delete_dataset(self): from google.cloud.bigquery.dataset import Dataset @@ -2395,7 +2395,7 @@ def test_query_w_query_parameters(self): 'parameterValue': {'value': '123'} }) - def test_create_rows_wo_schema(self): + def test_insert_rows_wo_schema(self): from google.cloud.bigquery.table import Table, _TABLE_HAS_NO_SCHEMA creds = _make_credentials() @@ -2411,11 +2411,11 @@ def test_create_rows_wo_schema(self): ] with self.assertRaises(ValueError) as exc: - client.create_rows(table, ROWS) + client.insert_rows(table, ROWS) self.assertEqual(exc.exception.args, (_TABLE_HAS_NO_SCHEMA,)) - def test_create_rows_w_schema(self): + def test_insert_rows_w_schema(self): import datetime from google.cloud._helpers import UTC from google.cloud._helpers import _datetime_to_rfc3339 @@ -2461,7 +2461,7 @@ def _row_data(row): } with mock.patch('uuid.uuid4', side_effect=map(str, range(len(ROWS)))): - errors = client.create_rows(table, ROWS) + errors = client.insert_rows(table, ROWS) self.assertEqual(len(errors), 0) conn.api_request.assert_called_once() @@ -2470,7 +2470,7 @@ def _row_data(row): self.assertEqual(req['path'], '/%s' % PATH) self.assertEqual(req['data'], SENT) - def test_create_rows_w_list_of_dictionaries(self): + def test_insert_rows_w_list_of_dictionaries(self): import datetime from google.cloud._helpers import UTC from google.cloud._helpers import _datetime_to_rfc3339 @@ -2526,7 +2526,7 @@ def _row_data(row): } with mock.patch('uuid.uuid4', side_effect=map(str, range(len(ROWS)))): - errors = client.create_rows(table, ROWS) + errors = client.insert_rows(table, ROWS) self.assertEqual(len(errors), 0) conn.api_request.assert_called_once_with( @@ -2534,7 +2534,7 @@ def _row_data(row): path='/%s' % PATH, data=SENT) - def test_create_rows_w_list_of_Rows(self): + def test_insert_rows_w_list_of_Rows(self): from google.cloud.bigquery.table import Table from google.cloud.bigquery.table import SchemaField from google.cloud.bigquery.table import Row @@ -2570,7 +2570,7 @@ def _row_data(row): } with mock.patch('uuid.uuid4', side_effect=map(str, range(len(ROWS)))): - errors = client.create_rows(table, ROWS) + errors = client.insert_rows(table, ROWS) self.assertEqual(len(errors), 0) conn.api_request.assert_called_once_with( @@ -2578,7 +2578,7 @@ def _row_data(row): path='/%s' % PATH, data=SENT) - def test_create_rows_w_skip_invalid_and_ignore_unknown(self): + def test_insert_rows_w_skip_invalid_and_ignore_unknown(self): from google.cloud.bigquery.table import Table, SchemaField PATH = 'projects/%s/datasets/%s/tables/%s/insertAll' % ( @@ -2626,7 +2626,7 @@ def _row_data(row): for index, row in enumerate(ROWS)], } - errors = client.create_rows( + errors = client.insert_rows( table, ROWS, row_ids=[index for index, _ in enumerate(ROWS)], @@ -2645,7 +2645,7 @@ def _row_data(row): path='/%s' % PATH, data=SENT) - def test_create_rows_w_repeated_fields(self): + def test_insert_rows_w_repeated_fields(self): from google.cloud.bigquery.table import Table, SchemaField PATH = 'projects/%s/datasets/%s/tables/%s/insertAll' % ( @@ -2677,7 +2677,7 @@ def _row_data(row): } with mock.patch('uuid.uuid4', side_effect=map(str, range(len(ROWS)))): - errors = client.create_rows(table, ROWS) + errors = client.insert_rows(table, ROWS) self.assertEqual(len(errors), 0) conn.api_request.assert_called_once_with( @@ -2685,7 +2685,7 @@ def _row_data(row): path='/%s' % PATH, data=SENT) - def test_create_rows_w_record_schema(self): + def test_insert_rows_w_record_schema(self): from google.cloud.bigquery.table import SchemaField PATH = 'projects/%s/datasets/%s/tables/%s/insertAll' % ( @@ -2723,7 +2723,7 @@ def _row_data(row): } with mock.patch('uuid.uuid4', side_effect=map(str, range(len(ROWS)))): - errors = client.create_rows(self.TABLE_REF, ROWS, + errors = client.insert_rows(self.TABLE_REF, ROWS, selected_fields=[full_name, phone]) self.assertEqual(len(errors), 0) @@ -2732,7 +2732,7 @@ def _row_data(row): path='/%s' % PATH, data=SENT) - def test_create_rows_errors(self): + def test_insert_rows_errors(self): from google.cloud.bigquery.table import Table ROWS = [ @@ -2748,17 +2748,17 @@ def test_create_rows_errors(self): # table ref with no selected fields with self.assertRaises(ValueError): - client.create_rows(self.TABLE_REF, ROWS) + client.insert_rows(self.TABLE_REF, ROWS) # table with no schema with self.assertRaises(ValueError): - client.create_rows(Table(self.TABLE_REF), ROWS) + client.insert_rows(Table(self.TABLE_REF), ROWS) # neither Table nor tableReference with self.assertRaises(TypeError): - client.create_rows(1, ROWS) + client.insert_rows(1, ROWS) - def test_create_rows_json(self): + def test_insert_rows_json(self): from google.cloud.bigquery.table import Table, SchemaField from google.cloud.bigquery.dataset import DatasetReference @@ -2804,7 +2804,7 @@ def test_create_rows_json(self): } with mock.patch('uuid.uuid4', side_effect=map(str, range(len(ROWS)))): - errors = client.create_rows_json(table, ROWS) + errors = client.insert_rows_json(table, ROWS) self.assertEqual(len(errors), 0) conn.api_request.assert_called_once_with( From 018fddc4815a6034a97299ac5288e46753ba3dd6 Mon Sep 17 00:00:00 2001 From: Christopher Wilcox Date: Wed, 11 Apr 2018 16:16:40 -0700 Subject: [PATCH 0424/2016] Release 1.0.0 (#5185) --- packages/google-cloud-bigquery/CHANGELOG.md | 5 +++++ packages/google-cloud-bigquery/setup.py | 2 +- 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/CHANGELOG.md b/packages/google-cloud-bigquery/CHANGELOG.md index 038e378098b0..df14d36a9dcd 100644 --- a/packages/google-cloud-bigquery/CHANGELOG.md +++ b/packages/google-cloud-bigquery/CHANGELOG.md @@ -4,6 +4,11 @@ [1]: https://pypi.org/project/google-cloud-bigquery/#history +## 1.0.0 + +### Implementation Changes +- Remove deprecated Client methods (#5182) + ## 0.32.0 ### :warning: Interface changes diff --git a/packages/google-cloud-bigquery/setup.py b/packages/google-cloud-bigquery/setup.py index e04a59f88a57..696f8f918177 100644 --- a/packages/google-cloud-bigquery/setup.py +++ b/packages/google-cloud-bigquery/setup.py @@ -22,7 +22,7 @@ name = 'google-cloud-bigquery' description = 'Google BigQuery API client library' -version = '0.32.0' +version = '1.0.0' # Should be one of: # 'Development Status :: 3 - Alpha' # 'Development Status :: 4 - Beta' From 070f6979b65897a6ad987b76c6aebef7c86489bb Mon Sep 17 00:00:00 2001 From: Alix Hamilton Date: Fri, 13 Apr 2018 12:50:36 -0700 Subject: [PATCH 0425/2016] BigQuery: Samples update and region tag standardization (#5195) * sample update and region tag standardization * fixes per comments --- packages/google-cloud-bigquery/nox.py | 2 +- packages/google-cloud-bigquery/tests/data/people.csv | 3 +++ 2 files changed, 4 insertions(+), 1 deletion(-) create mode 100644 packages/google-cloud-bigquery/tests/data/people.csv diff --git a/packages/google-cloud-bigquery/nox.py b/packages/google-cloud-bigquery/nox.py index 07c5ea80408f..6ce3a89d9b3c 100644 --- a/packages/google-cloud-bigquery/nox.py +++ b/packages/google-cloud-bigquery/nox.py @@ -139,7 +139,7 @@ def snippets(session, py): os.path.join('..', 'storage'), os.path.join('..', 'test_utils'), ) - session.install('-e', '.') + session.install('-e', '.[pandas]') # Run py.test against the system tests. session.run( diff --git a/packages/google-cloud-bigquery/tests/data/people.csv b/packages/google-cloud-bigquery/tests/data/people.csv new file mode 100644 index 000000000000..d3c7d063892a --- /dev/null +++ b/packages/google-cloud-bigquery/tests/data/people.csv @@ -0,0 +1,3 @@ +full_name,age +Phred Phlyntstone,32 +Wylma Phlyntstone,29 \ No newline at end of file From 7c134b657610fff8d0b3ab348e6db7fb6a4b5bcc Mon Sep 17 00:00:00 2001 From: Tres Seaver Date: Tue, 17 Apr 2018 16:35:37 -0400 Subject: [PATCH 0426/2016] Don't suppress 'dots' output for snippets tests. (#5202) CI aborts the run if the test goes longer than 10 minutes without emitting any output. This change also eases running with verbose output for debugging long- running tests, e.g., 'nox -e snippets -- --verbose'. See #5003. --- packages/google-cloud-bigquery/nox.py | 1 - 1 file changed, 1 deletion(-) diff --git a/packages/google-cloud-bigquery/nox.py b/packages/google-cloud-bigquery/nox.py index 6ce3a89d9b3c..29dead9ac43d 100644 --- a/packages/google-cloud-bigquery/nox.py +++ b/packages/google-cloud-bigquery/nox.py @@ -144,7 +144,6 @@ def snippets(session, py): # Run py.test against the system tests. session.run( 'py.test', - '--quiet', os.path.join(os.pardir, 'docs', 'bigquery', 'snippets.py'), *session.posargs ) From 4f33d4d2b9c4d134063be027450a1ecc3bead593 Mon Sep 17 00:00:00 2001 From: Alix Hamilton Date: Tue, 17 Apr 2018 16:17:28 -0700 Subject: [PATCH 0427/2016] adds client.get_service_account_email() (#5203) --- .../google/cloud/bigquery/client.py | 29 ++++++++++++++ .../google-cloud-bigquery/tests/system.py | 8 ++++ .../tests/unit/test_client.py | 38 +++++++++++++++++++ 3 files changed, 75 insertions(+) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py index b0f56f3998ac..158407a5dd73 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py @@ -121,6 +121,35 @@ def __init__(self, project=None, credentials=None, _http=None): project=project, credentials=credentials, _http=_http) self._connection = Connection(self) + def get_service_account_email(self, project=None): + """Get the email address of the project's BigQuery service account + + Note: + This is the service account that BigQuery uses to manage tables + encrypted by a key in KMS. + + Args: + project (str, optional): + Project ID to use for retreiving service account email. + Defaults to the client's project. + + Returns: + str: service account email address + + Example: + + >>> from google.cloud import bigquery + >>> client = bigquery.Client() + >>> client.get_service_account_email() + my_service_account@my-project.iam.gserviceaccount.com + + """ + if project is None: + project = self.project + path = '/projects/%s/serviceAccount' % (project,) + api_response = self._connection.api_request(method='GET', path=path) + return api_response['email'] + def list_projects(self, max_results=None, page_token=None, retry=DEFAULT_RETRY): """List projects for the project associated with this client. diff --git a/packages/google-cloud-bigquery/tests/system.py b/packages/google-cloud-bigquery/tests/system.py index 60d73ddd29a3..3b6809675b55 100644 --- a/packages/google-cloud-bigquery/tests/system.py +++ b/packages/google-cloud-bigquery/tests/system.py @@ -142,6 +142,14 @@ def _still_in_use(bad_request): else: doomed.delete() + def test_get_service_account_email(self): + client = Config.CLIENT + + got = client.get_service_account_email() + + self.assertIsInstance(got, six.text_type) + self.assertIn('@', got) + def test_create_dataset(self): DATASET_ID = _make_dataset_id('create_dataset') dataset = self.temp_dataset(DATASET_ID) diff --git a/packages/google-cloud-bigquery/tests/unit/test_client.py b/packages/google-cloud-bigquery/tests/unit/test_client.py index d7261d4424ed..b5db6485ac8e 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_client.py +++ b/packages/google-cloud-bigquery/tests/unit/test_client.py @@ -124,6 +124,44 @@ def test__get_query_results_hit(self): self.assertEqual(query_results.total_rows, 10) self.assertTrue(query_results.complete) + def test_get_service_account_email(self): + path = '/projects/%s/serviceAccount' % (self.PROJECT,) + creds = _make_credentials() + http = object() + client = self._make_one(project=self.PROJECT, credentials=creds, + _http=http) + email = 'bq-123@bigquery-encryption.iam.gserviceaccount.com' + resource = { + 'kind': 'bigquery#getServiceAccountResponse', + 'email': email, + } + conn = client._connection = _make_connection(resource) + + service_account_email = client.get_service_account_email() + + conn.api_request.assert_called_once_with(method='GET', path=path) + self.assertEqual(service_account_email, email) + + def test_get_service_account_email_w_alternate_project(self): + project = 'my-alternate-project' + path = '/projects/%s/serviceAccount' % (project,) + creds = _make_credentials() + http = object() + client = self._make_one(project=self.PROJECT, credentials=creds, + _http=http) + email = 'bq-123@bigquery-encryption.iam.gserviceaccount.com' + resource = { + 'kind': 'bigquery#getServiceAccountResponse', + 'email': email, + } + conn = client._connection = _make_connection(resource) + + service_account_email = client.get_service_account_email( + project=project) + + conn.api_request.assert_called_once_with(method='GET', path=path) + self.assertEqual(service_account_email, email) + def test_list_projects_defaults(self): from google.cloud.bigquery.client import Project From bf0ba63cd371ea09fadadecef1438ff4e53fcbce Mon Sep 17 00:00:00 2001 From: Christopher Wilcox Date: Wed, 18 Apr 2018 11:18:42 -0700 Subject: [PATCH 0428/2016] Release 1.0.1 (#5206) --- packages/google-cloud-bigquery/CHANGELOG.md | 4 ++++ packages/google-cloud-bigquery/setup.py | 4 ++-- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/packages/google-cloud-bigquery/CHANGELOG.md b/packages/google-cloud-bigquery/CHANGELOG.md index df14d36a9dcd..d78d93a787f3 100644 --- a/packages/google-cloud-bigquery/CHANGELOG.md +++ b/packages/google-cloud-bigquery/CHANGELOG.md @@ -4,6 +4,10 @@ [1]: https://pypi.org/project/google-cloud-bigquery/#history +## 1.0.1 + +Mark with trove classifier for Production/Stable + ## 1.0.0 ### Implementation Changes diff --git a/packages/google-cloud-bigquery/setup.py b/packages/google-cloud-bigquery/setup.py index 696f8f918177..138e807b9cb7 100644 --- a/packages/google-cloud-bigquery/setup.py +++ b/packages/google-cloud-bigquery/setup.py @@ -22,12 +22,12 @@ name = 'google-cloud-bigquery' description = 'Google BigQuery API client library' -version = '1.0.0' +version = '1.0.1' # Should be one of: # 'Development Status :: 3 - Alpha' # 'Development Status :: 4 - Beta' # 'Development Status :: 5 - Production/Stable' -release_status = 'Development Status :: 4 - Beta' +release_status = 'Development Status :: 5 - Production/Stable' dependencies = [ 'google-cloud-core<0.29dev,>=0.28.0', 'google-api-core<2.0.0dev,>=1.0.0', From 40ed592a73a84b8edf39dccea76b863ebf6a3e9c Mon Sep 17 00:00:00 2001 From: Christopher Wilcox Date: Wed, 18 Apr 2018 13:50:23 -0700 Subject: [PATCH 0429/2016] Release bigquery 1.1.0 (#5208) * Release 1.1.0 --- packages/google-cloud-bigquery/CHANGELOG.md | 12 ++++++++++-- packages/google-cloud-bigquery/setup.py | 2 +- 2 files changed, 11 insertions(+), 3 deletions(-) diff --git a/packages/google-cloud-bigquery/CHANGELOG.md b/packages/google-cloud-bigquery/CHANGELOG.md index d78d93a787f3..8cfb1aafcbae 100644 --- a/packages/google-cloud-bigquery/CHANGELOG.md +++ b/packages/google-cloud-bigquery/CHANGELOG.md @@ -4,9 +4,17 @@ [1]: https://pypi.org/project/google-cloud-bigquery/#history -## 1.0.1 +## 1.1.0 -Mark with trove classifier for Production/Stable +### New Features +- Add client.get_service_account_email (#5203) + +### Documentation +- Update samples and standardize region tags (#5195) + +### Internal / Testing Changes +- Fix trove classifier to be Production/Stable +- Don't suppress 'dots' output on test (#5202) ## 1.0.0 diff --git a/packages/google-cloud-bigquery/setup.py b/packages/google-cloud-bigquery/setup.py index 138e807b9cb7..851f78160f03 100644 --- a/packages/google-cloud-bigquery/setup.py +++ b/packages/google-cloud-bigquery/setup.py @@ -22,7 +22,7 @@ name = 'google-cloud-bigquery' description = 'Google BigQuery API client library' -version = '1.0.1' +version = '1.1.0' # Should be one of: # 'Development Status :: 3 - Alpha' # 'Development Status :: 4 - Beta' From 822ac29e5edf68512d5baf295da931b5cb9bc22b Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Wed, 18 Apr 2018 21:14:01 -0700 Subject: [PATCH 0430/2016] BigQuery: Fix typo in Encoding.ISO_8859_1 enum value (#5211) Per https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.load.encoding the encoding value should be ISO-8859-1 ("Latin alphabet no. 1"). It was ISO-8559-1 ("anthropometric measurements that can be used for the creation of size and shape profiles and their application in the field of clothing"). --- packages/google-cloud-bigquery/google/cloud/bigquery/job.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/job.py b/packages/google-cloud-bigquery/google/cloud/bigquery/job.py index 340fe612a39c..f282d0019424 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/job.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/job.py @@ -131,13 +131,13 @@ class DestinationFormat(object): class Encoding(object): """The character encoding of the data. The supported values are `UTF_8` corresponding to `'UTF-8'` or `ISO_8859_1` corresponding to - `'ISO-8559-1'`. The default value is `UTF_8`. + `'ISO-8859-1'`. The default value is `UTF_8`. BigQuery decodes the data after the raw, binary data has been split using the values of the quote and fieldDelimiter properties. """ UTF_8 = 'UTF-8' - ISO_8559_1 = 'ISO-8559-1' + ISO_8859_1 = 'ISO-8859-1' class QueryPriority(object): From 93ebae6a78749a03bc353a3986757815b3dd220a Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Fri, 20 Apr 2018 12:10:02 -0700 Subject: [PATCH 0431/2016] BigQuery: Add project parameter to `list_datasets` and `list_jobs` (#5217) This allows a Client to list datasets or jobs in projects other than the default project. --- .../google/cloud/bigquery/client.py | 130 +++++++++--------- .../google-cloud-bigquery/tests/system.py | 6 + .../tests/unit/test_client.py | 26 ++++ 3 files changed, 99 insertions(+), 63 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py index 158407a5dd73..4204858013d7 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py @@ -186,51 +186,53 @@ def list_projects(self, max_results=None, page_token=None, page_token=page_token, max_results=max_results) - def list_datasets(self, include_all=False, filter=None, max_results=None, - page_token=None, retry=DEFAULT_RETRY): + def list_datasets( + self, project=None, include_all=False, filter=None, + max_results=None, page_token=None, retry=DEFAULT_RETRY): """List datasets for the project associated with this client. See https://cloud.google.com/bigquery/docs/reference/rest/v2/datasets/list - :type include_all: bool - :param include_all: True if results include hidden datasets. - - :type filter: str - :param filter: (Optional) an expression for filtering the results by - label. For syntax, see - https://cloud.google.com/bigquery/docs/reference/rest/v2/datasets/list#filter. - - :type max_results: int - :param max_results: (Optional) maximum number of datasets to return, - if not passed, defaults to a value set by the API. - - :type page_token: str - :param page_token: - (Optional) Token representing a cursor into the datasets. If - not passed, the API will return the first page of datasets. - The token marks the beginning of the iterator to be returned - and the value of the ``page_token`` can be accessed at - ``next_page_token`` of the - :class:`~google.api_core.page_iterator.HTTPIterator`. - - :type retry: :class:`google.api_core.retry.Retry` - :param retry: (Optional) How to retry the RPC. + Args: + project (str): + Optional. Project ID to use for retreiving datasets. Defaults + to the client's project. + include_all (bool): + Optional. True if results include hidden datasets. Defaults + to False. + filter (str): + Optional. An expression for filtering the results by label. + For syntax, see + https://cloud.google.com/bigquery/docs/reference/rest/v2/datasets/list#filter. + max_results (int): + Optional. Maximum number of datasets to return. + page_token (str): + Optional. Token representing a cursor into the datasets. If + not passed, the API will return the first page of datasets. + The token marks the beginning of the iterator to be returned + and the value of the ``page_token`` can be accessed at + ``next_page_token`` of the + :class:`~google.api_core.page_iterator.HTTPIterator`. + retry (google.api_core.retry.Retry): + Optional. How to retry the RPC. - :rtype: :class:`~google.api_core.page_iterator.Iterator` - :returns: - Iterator of - :class:`~google.cloud.bigquery.dataset.DatasetListItem`. - associated with the client's project. + Returns: + google.api_core.page_iterator.Iterator: + Iterator of + :class:`~google.cloud.bigquery.dataset.DatasetListItem`. + associated with the project. """ extra_params = {} + if project is None: + project = self.project if include_all: extra_params['all'] = True if filter: # TODO: consider supporting a dict of label -> value for filter, # and converting it into a string here. extra_params['filter'] = filter - path = '/projects/%s/datasets' % (self.project,) + path = '/projects/%s/datasets' % (project,) return page_iterator.HTTPIterator( client=self, api_request=functools.partial(self._call_api, retry), @@ -658,53 +660,55 @@ def cancel_job( return self.job_from_resource(resource['job']) - def list_jobs(self, max_results=None, page_token=None, all_users=None, - state_filter=None, retry=DEFAULT_RETRY): + def list_jobs( + self, project=None, max_results=None, page_token=None, + all_users=None, state_filter=None, retry=DEFAULT_RETRY): """List jobs for the project associated with this client. See https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs/list - :type max_results: int - :param max_results: maximum number of jobs to return, If not - passed, defaults to a value set by the API. - - :type page_token: str - :param page_token: - (Optional) Opaque marker for the next "page" of jobs. If not - passed, the API will return the first page of jobs. The token - marks the beginning of the iterator to be returned and the - value of the ``page_token`` can be accessed at - ``next_page_token`` of - :class:`~google.api_core.page_iterator.HTTPIterator`. - - :type all_users: bool - :param all_users: if true, include jobs owned by all users in the - project. - - :type state_filter: str - :param state_filter: if passed, include only jobs matching the given - state. One of - - * ``"done"`` - * ``"pending"`` - * ``"running"`` - - :type retry: :class:`google.api_core.retry.Retry` - :param retry: (Optional) How to retry the RPC. + Args: + project (str): + Optional. Project ID to use for retreiving datasets. Defaults + to the client's project. + max_results (int): + Optional. Maximum number of jobs to return. + page_token (str): + Optional. Opaque marker for the next "page" of jobs. If not + passed, the API will return the first page of jobs. The token + marks the beginning of the iterator to be returned and the + value of the ``page_token`` can be accessed at + ``next_page_token`` of + :class:`~google.api_core.page_iterator.HTTPIterator`. + all_users (bool): + If true, include jobs owned by all users in the project. + state_filter (str): + Optional. If set, include only jobs matching the given + state. One of + + * ``"done"`` + * ``"pending"`` + * ``"running"`` + retry (google.api_core.retry.Retry): + Optional. How to retry the RPC. - :rtype: :class:`~google.api_core.page_iterator.Iterator` - :returns: Iterable of job instances. + Returns: + google.api_core.page_iterator.Iterator: + Iterable of job instances. """ extra_params = {'projection': 'full'} + if project is None: + project = self.project + if all_users is not None: extra_params['allUsers'] = all_users if state_filter is not None: extra_params['stateFilter'] = state_filter - path = '/projects/%s/jobs' % (self.project,) + path = '/projects/%s/jobs' % (project,) return page_iterator.HTTPIterator( client=self, api_request=functools.partial(self._call_api, retry), diff --git a/packages/google-cloud-bigquery/tests/system.py b/packages/google-cloud-bigquery/tests/system.py index 3b6809675b55..534d820b94bf 100644 --- a/packages/google-cloud-bigquery/tests/system.py +++ b/packages/google-cloud-bigquery/tests/system.py @@ -222,6 +222,12 @@ def test_list_datasets(self): dataset.project == Config.CLIENT.project] self.assertEqual(len(created), len(datasets_to_create)) + def test_list_datasets_w_project(self): + # Retrieve datasets from a different project. + iterator = Config.CLIENT.list_datasets(project='bigquery-public-data') + all_datasets = frozenset([dataset.dataset_id for dataset in iterator]) + self.assertIn('usa_names', all_datasets) + def test_create_table(self): dataset = self.temp_dataset(_make_dataset_id('create_table')) table_id = 'test_table' diff --git a/packages/google-cloud-bigquery/tests/unit/test_client.py b/packages/google-cloud-bigquery/tests/unit/test_client.py index b5db6485ac8e..a5d2f7c90e18 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_client.py +++ b/packages/google-cloud-bigquery/tests/unit/test_client.py @@ -264,6 +264,18 @@ def test_list_datasets_defaults(self): conn.api_request.assert_called_once_with( method='GET', path='/%s' % PATH, query_params={}) + def test_list_datasets_w_project(self): + creds = _make_credentials() + client = self._make_one(self.PROJECT, creds) + conn = client._connection = _make_connection({}) + + list(client.list_datasets(project='other-project')) + + conn.api_request.assert_called_once_with( + method='GET', + path='/projects/other-project/datasets', + query_params={}) + def test_list_datasets_explicit_response_missing_datasets_key(self): PATH = 'projects/%s/datasets' % self.PROJECT TOKEN = 'TOKEN' @@ -1715,6 +1727,20 @@ def test_list_jobs_explicit_missing(self): 'stateFilter': 'done' }) + def test_list_jobs_w_project(self): + creds = _make_credentials() + client = self._make_one(self.PROJECT, creds) + conn = client._connection = _make_connection({}) + + list(client.list_jobs(project='other-project')) + + conn.api_request.assert_called_once_with( + method='GET', + path='/projects/other-project/jobs', + query_params={ + 'projection': 'full', + }) + def test_load_table_from_uri(self): from google.cloud.bigquery.job import LoadJob From a7b828589113ce6b84e61f0b0f8ae03a2deeebbe Mon Sep 17 00:00:00 2001 From: Tres Seaver Date: Thu, 26 Apr 2018 17:31:56 -0400 Subject: [PATCH 0432/2016] Fix docstring / impl of 'ExtractJob.destination_uri_file_counts'. (#5245) * Fix docstring for 'ExtractJob.destination_uri_file_counts'. Closes #5244. * Fix return value to match API. API returns a list of counts, one per destination URI. Add an assertion to the extract job system test to exercise the property. --- .../google/cloud/bigquery/job.py | 19 +++++++++++-------- .../google-cloud-bigquery/tests/system.py | 1 + .../tests/unit/test_job.py | 4 ++-- 3 files changed, 14 insertions(+), 10 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/job.py b/packages/google-cloud-bigquery/google/cloud/bigquery/job.py index f282d0019424..9c30f951867a 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/job.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/job.py @@ -1541,14 +1541,17 @@ def destination_uri_file_counts(self): See: https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#statistics.extract.destinationUriFileCounts - :rtype: int or None - :returns: number of DML rows affectd by the job, or None if job is not - yet complete. - """ - result = self._job_statistics().get('destinationUriFileCounts') - if result is not None: - result = int(result) - return result + Returns: + a list of integer counts, each representing the number of files + per destination URI or URI pattern specified in the extract + configuration. These values will be in the same order as the URIs + specified in the 'destinationUris' field. Returns None if job is + not yet complete. + """ + counts = self._job_statistics().get('destinationUriFileCounts') + if counts is not None: + return [int(count) for count in counts] + return None def _build_resource(self): """Generate a resource for :meth:`begin`.""" diff --git a/packages/google-cloud-bigquery/tests/system.py b/packages/google-cloud-bigquery/tests/system.py index 534d820b94bf..f1209fff3e96 100644 --- a/packages/google-cloud-bigquery/tests/system.py +++ b/packages/google-cloud-bigquery/tests/system.py @@ -843,6 +843,7 @@ def test_extract_table_w_job_config(self): self.to_delete.insert(0, destination) got = destination.download_as_string().decode('utf-8') self.assertIn('"Bharney Rhubble"', got) + self.assertEqual(job.destination_uri_file_counts, [1]) def test_copy_table(self): # If we create a new table to copy from, the test won't work diff --git a/packages/google-cloud-bigquery/tests/unit/test_job.py b/packages/google-cloud-bigquery/tests/unit/test_job.py index 0cb85eccd745..da7a1f723abd 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_job.py +++ b/packages/google-cloud-bigquery/tests/unit/test_job.py @@ -1604,8 +1604,8 @@ def test_destination_uri_file_counts(self): extract_stats = statistics['extract'] = {} self.assertIsNone(job.destination_uri_file_counts) - extract_stats['destinationUriFileCounts'] = str(file_counts) - self.assertEqual(job.destination_uri_file_counts, file_counts) + extract_stats['destinationUriFileCounts'] = [str(file_counts)] + self.assertEqual(job.destination_uri_file_counts, [file_counts]) def test_from_api_repr_missing_identity(self): self._setUpConstants() From 37802ce6c6dc56a6693aa13fab8c0b22b60a662c Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Fri, 27 Apr 2018 12:10:47 -0700 Subject: [PATCH 0433/2016] BigQuery: add from_string factory methods to Dataset and Table (#5255) * BigQuery: add from_string factory methods to Dataset and Table To make it easier to construct Datasets and Tables when the full-qualified ID is known ahead of time, this PR adds helper methods called `from_string` to Dataset, DatasetReference, Table, and TableReference. In each case, the expected format is a fully-qualified ID (including the project ID) in standard SQL format. ``` dataset = client.create_dataset( bigquery.Dataset( client.dataset('mydataset', project='other-project') ) ) ``` becomes ``` dataset = client.create_dataset( bigquery.Dataset.from_string('other-project.mydataset') ) ``` Also, while writing doctests/examples, I noticed `__repr__` was inconsistent with the actual constructor on some of these classes, so I fixed that while I was here so those string representations don't cause confusion. --- .../google/cloud/bigquery/dataset.py | 58 +++++++++++++++++ .../google/cloud/bigquery/table.py | 65 ++++++++++++++++++- .../tests/unit/test_dataset.py | 32 ++++++++- .../tests/unit/test_table.py | 50 +++++++++++++- 4 files changed, 201 insertions(+), 4 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/dataset.py b/packages/google-cloud-bigquery/google/cloud/bigquery/dataset.py index b290ec7bb49e..b5f2ef615fac 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/dataset.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/dataset.py @@ -210,6 +210,37 @@ def from_api_repr(cls, resource): dataset_id = resource['datasetId'] return cls(project, dataset_id) + @classmethod + def from_string(cls, full_dataset_id): + """Construct a dataset reference from fully-qualified dataset ID. + + Args: + full_dataset_id (str): + A fully-qualified dataset ID in standard SQL format. Must + included both the project ID and the dataset ID, separated by + ``.``. + + Returns: + DatasetReference: + Dataset reference parsed from ``full_dataset_id``. + + Examples: + >>> DatasetReference.from_string('my-project-id.some_dataset') + DatasetReference('my-project-id', 'some_dataset') + + Raises: + ValueError: + If ``full_dataset_id`` is not a fully-qualified dataset ID in + standard SQL format. + """ + parts = full_dataset_id.split('.') + if len(parts) != 2: + raise ValueError( + 'full_dataset_id must be a fully-qualified dataset ID in ' + 'standard SQL format. e.g. "project.dataset_id", got ' + '{}'.format(full_dataset_id)) + return cls(*parts) + def to_api_repr(self): """Construct the API resource representation of this dataset reference @@ -450,6 +481,30 @@ def labels(self, value): raise ValueError("Pass a dict") self._properties['labels'] = value + @classmethod + def from_string(cls, full_dataset_id): + """Construct a dataset from fully-qualified dataset ID. + + Args: + full_dataset_id (str): + A fully-qualified dataset ID in standard SQL format. Must + included both the project ID and the dataset ID, separated by + ``.``. + + Returns: + Dataset: Dataset parsed from ``full_dataset_id``. + + Examples: + >>> Dataset.from_string('my-project-id.some_dataset') + Dataset(DatasetReference('my-project-id', 'some_dataset')) + + Raises: + ValueError: + If ``full_dataset_id`` is not a fully-qualified dataset ID in + standard SQL format. + """ + return cls(DatasetReference.from_string(full_dataset_id)) + @classmethod def from_api_repr(cls, resource): """Factory: construct a dataset given its API representation @@ -508,6 +563,9 @@ def table(self, table_id): """ return TableReference(self.reference, table_id) + def __repr__(self): + return 'Dataset({})'.format(repr(self.reference)) + class DatasetListItem(object): """A read-only dataset resource from a list operation. diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py index 61be2004531b..e5c76370d364 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py @@ -166,6 +166,39 @@ def path(self): return '/projects/%s/datasets/%s/tables/%s' % ( self._project, self._dataset_id, self._table_id) + @classmethod + def from_string(cls, full_table_id): + """Construct a table reference from fully-qualified table ID. + + Args: + full_table_id (str): + A fully-qualified table ID in standard SQL format. Must + included a project ID, dataset ID, and table ID, each + separated by ``.``. + + Returns: + TableReference: Table reference parsed from ``full_table_id``. + + Examples: + >>> TableReference.from_string('my-project.mydataset.mytable') + TableRef...(DatasetRef...('my-project', 'mydataset'), 'mytable') + + Raises: + ValueError: + If ``full_table_id`` is not a fully-qualified table ID in + standard SQL format. + """ + from google.cloud.bigquery.dataset import DatasetReference + + parts = full_table_id.split('.') + if len(parts) != 3: + raise ValueError( + 'full_table_id must be a fully-qualified table ID in ' + 'standard SQL format. e.g. "project.dataset.table", got ' + '{}'.format(full_table_id)) + + return cls(DatasetReference(parts[0], parts[1]), parts[2]) + @classmethod def from_api_repr(cls, resource): """Factory: construct a table reference given its API representation @@ -223,7 +256,10 @@ def __hash__(self): return hash(self._key()) def __repr__(self): - return 'TableReference{}'.format(self._key()) + from google.cloud.bigquery.dataset import DatasetReference + dataset_ref = DatasetReference(self._project, self._dataset_id) + return "TableReference({}, '{}')".format( + repr(dataset_ref), self._table_id) class Table(object): @@ -609,6 +645,30 @@ def external_data_configuration(self, value): api_repr = value.to_api_repr() self._properties['externalDataConfiguration'] = api_repr + @classmethod + def from_string(cls, full_table_id): + """Construct a table from fully-qualified table ID. + + Args: + full_table_id (str): + A fully-qualified table ID in standard SQL format. Must + included a project ID, dataset ID, and table ID, each + separated by ``.``. + + Returns: + Table: Table parsed from ``full_table_id``. + + Examples: + >>> Table.from_string('my-project.mydataset.mytable') + Table(TableRef...(D...('my-project', 'mydataset'), 'mytable')) + + Raises: + ValueError: + If ``full_table_id`` is not a fully-qualified table ID in + standard SQL format. + """ + return cls(TableReference.from_string(full_table_id)) + @classmethod def from_api_repr(cls, resource): """Factory: construct a table given its API representation @@ -668,6 +728,9 @@ def _build_resource(self, filter_fields): return partial + def __repr__(self): + return 'Table({})'.format(repr(self.reference)) + class TableListItem(object): """A read-only table resource from a list operation. diff --git a/packages/google-cloud-bigquery/tests/unit/test_dataset.py b/packages/google-cloud-bigquery/tests/unit/test_dataset.py index 1dbf91f46e27..314010202676 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_dataset.py +++ b/packages/google-cloud-bigquery/tests/unit/test_dataset.py @@ -165,10 +165,10 @@ def test_to_api_repr(self): }) def test_from_api_repr(self): - from google.cloud.bigquery.dataset import DatasetReference + cls = self._get_target_class() expected = self._make_one('project_1', 'dataset_1') - got = DatasetReference.from_api_repr( + got = cls.from_api_repr( { 'projectId': 'project_1', 'datasetId': 'dataset_1', @@ -176,6 +176,17 @@ def test_from_api_repr(self): self.assertEqual(expected, got) + def test_from_string(self): + cls = self._get_target_class() + got = cls.from_string('string-project.string_dataset') + self.assertEqual(got.project, 'string-project') + self.assertEqual(got.dataset_id, 'string_dataset') + + def test_from_string_legacy_string(self): + cls = self._get_target_class() + with self.assertRaises(ValueError): + cls.from_string('string-project:string_dataset') + def test___eq___wrong_type(self): dataset = self._make_one('project_1', 'dataset_1') other = object() @@ -484,6 +495,17 @@ def test_to_api_repr_w_custom_field(self): } self.assertEqual(resource, exp_resource) + def test_from_string(self): + cls = self._get_target_class() + got = cls.from_string('string-project.string_dataset') + self.assertEqual(got.project, 'string-project') + self.assertEqual(got.dataset_id, 'string_dataset') + + def test_from_string_legacy_string(self): + cls = self._get_target_class() + with self.assertRaises(ValueError): + cls.from_string('string-project:string_dataset') + def test__build_resource_w_custom_field(self): dataset = self._make_one(self.DS_REF) dataset._properties['newAlphaProperty'] = 'unreleased property' @@ -510,6 +532,12 @@ def test_table(self): self.assertEqual(table.dataset_id, self.DS_ID) self.assertEqual(table.project, self.PROJECT) + def test___repr__(self): + from google.cloud.bigquery.dataset import DatasetReference + dataset = self._make_one(DatasetReference('project1', 'dataset1')) + expected = "Dataset(DatasetReference('project1', 'dataset1'))" + self.assertEqual(repr(dataset), expected) + class TestDatasetListItem(unittest.TestCase): diff --git a/packages/google-cloud-bigquery/tests/unit/test_table.py b/packages/google-cloud-bigquery/tests/unit/test_table.py index 7482e9f88ea3..db62782e1ebd 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_table.py +++ b/packages/google-cloud-bigquery/tests/unit/test_table.py @@ -134,6 +134,23 @@ def test_from_api_repr(self): self.assertEqual(expected, got) + def test_from_string(self): + cls = self._get_target_class() + got = cls.from_string('string-project.string_dataset.string_table') + self.assertEqual(got.project, 'string-project') + self.assertEqual(got.dataset_id, 'string_dataset') + self.assertEqual(got.table_id, 'string_table') + + def test_from_string_legacy_string(self): + cls = self._get_target_class() + with self.assertRaises(ValueError): + cls.from_string('string-project:string_dataset.string_table') + + def test_from_string_not_fully_qualified(self): + cls = self._get_target_class() + with self.assertRaises(ValueError): + cls.from_string('string_dataset.string_table') + def test___eq___wrong_type(self): from google.cloud.bigquery.dataset import DatasetReference dataset_ref = DatasetReference('project_1', 'dataset_1') @@ -193,7 +210,10 @@ def test___hash__not_equals(self): def test___repr__(self): dataset = DatasetReference('project1', 'dataset1') table1 = self._make_one(dataset, 'table1') - expected = "TableReference('project1', 'dataset1', 'table1')" + expected = ( + "TableReference(DatasetReference('project1', 'dataset1'), " + "'table1')" + ) self.assertEqual(repr(table1), expected) @@ -634,6 +654,23 @@ def test_labels_setter_bad_value(self): with self.assertRaises(ValueError): table.labels = 12345 + def test_from_string(self): + cls = self._get_target_class() + got = cls.from_string('string-project.string_dataset.string_table') + self.assertEqual(got.project, 'string-project') + self.assertEqual(got.dataset_id, 'string_dataset') + self.assertEqual(got.table_id, 'string_table') + + def test_from_string_legacy_string(self): + cls = self._get_target_class() + with self.assertRaises(ValueError): + cls.from_string('string-project:string_dataset.string_table') + + def test_from_string_not_fully_qualified(self): + cls = self._get_target_class() + with self.assertRaises(ValueError): + cls.from_string('string_dataset.string_table') + def test_from_api_repr_missing_identity(self): self._setUpConstants() RESOURCE = {} @@ -837,6 +874,17 @@ def test_encryption_configuration_setter(self): table.encryption_configuration = None self.assertIsNone(table.encryption_configuration) + def test___repr__(self): + from google.cloud.bigquery.table import TableReference + dataset = DatasetReference('project1', 'dataset1') + table1 = self._make_one(TableReference(dataset, 'table1')) + expected = ( + "Table(TableReference(" + "DatasetReference('project1', 'dataset1'), " + "'table1'))" + ) + self.assertEqual(repr(table1), expected) + class Test_row_from_mapping(unittest.TestCase, _SchemaBase): From 56059fc83080f018752d66d6fa33c91eb9936de3 Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Tue, 1 May 2018 13:20:54 -0700 Subject: [PATCH 0434/2016] BigQuery: add UnknownJob type for redacted jobs. (#5281) --- .../google/cloud/bigquery/__init__.py | 2 ++ .../google/cloud/bigquery/client.py | 4 ++-- .../google/cloud/bigquery/job.py | 22 +++++++++++++++++++ .../tests/unit/test_client.py | 6 +++-- 4 files changed, 30 insertions(+), 4 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/__init__.py b/packages/google-cloud-bigquery/google/cloud/bigquery/__init__.py index 0068acea347b..d6c435c2f447 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/__init__.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/__init__.py @@ -50,6 +50,7 @@ from google.cloud.bigquery.job import QueryJobConfig from google.cloud.bigquery.job import QueryPriority from google.cloud.bigquery.job import SourceFormat +from google.cloud.bigquery.job import UnknownJob from google.cloud.bigquery.job import WriteDisposition from google.cloud.bigquery.query import ArrayQueryParameter from google.cloud.bigquery.query import ScalarQueryParameter @@ -91,6 +92,7 @@ 'ExtractJobConfig', 'LoadJob', 'LoadJobConfig', + 'UnknownJob', # Shared helpers 'SchemaField', 'UDFResource', diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py index 4204858013d7..f69374b04524 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py @@ -571,7 +571,7 @@ def job_from_resource(self, resource): or :class:`google.cloud.bigquery.job.QueryJob` :returns: the job instance, constructed via the resource """ - config = resource['configuration'] + config = resource.get('configuration', {}) if 'load' in config: return job.LoadJob.from_api_repr(resource, self) elif 'copy' in config: @@ -580,7 +580,7 @@ def job_from_resource(self, resource): return job.ExtractJob.from_api_repr(resource, self) elif 'query' in config: return job.QueryJob.from_api_repr(resource, self) - raise ValueError('Cannot parse job resource') + return job.UnknownJob.from_api_repr(resource, self) def get_job( self, job_id, project=None, location=None, retry=DEFAULT_RETRY): diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/job.py b/packages/google-cloud-bigquery/google/cloud/bigquery/job.py index 9c30f951867a..70ff2b3cf54e 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/job.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/job.py @@ -2516,3 +2516,25 @@ def from_api_repr(cls, resource): steps=[QueryPlanEntryStep.from_api_repr(step) for step in resource.get('steps', ())], ) + + +class UnknownJob(_AsyncJob): + """A job whose type cannot be determined.""" + + @classmethod + def from_api_repr(cls, resource, client): + """Construct an UnknownJob from the JSON representation. + + Args: + resource (dict): JSON representation of a job. + client (google.cloud.bigquery.client.Client): + Client connected to BigQuery API. + + Returns: + UnknownJob: Job corresponding to the resource. + """ + job_ref = _JobReference._from_api_repr( + resource.get('jobReference', {'projectId': client.project})) + job = cls(job_ref, client) + job._properties = resource + return job diff --git a/packages/google-cloud-bigquery/tests/unit/test_client.py b/packages/google-cloud-bigquery/tests/unit/test_client.py index a5d2f7c90e18..42184cdba679 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_client.py +++ b/packages/google-cloud-bigquery/tests/unit/test_client.py @@ -1395,10 +1395,12 @@ def test_delete_table_w_wrong_type(self): client.delete_table(client.dataset(self.DS_ID)) def test_job_from_resource_unknown_type(self): + from google.cloud.bigquery.job import UnknownJob creds = _make_credentials() client = self._make_one(self.PROJECT, creds) - with self.assertRaises(ValueError): - client.job_from_resource({'configuration': {'nonesuch': {}}}) + got = client.job_from_resource({}) # Can parse redacted job. + self.assertIsInstance(got, UnknownJob) + self.assertEqual(got.project, self.PROJECT) def test_get_job_miss_w_explict_project(self): from google.cloud.exceptions import NotFound From 3d71dd4719e73bb0253b24a24f0a57d25c7bddce Mon Sep 17 00:00:00 2001 From: Alix Hamilton Date: Wed, 2 May 2018 10:08:58 -0700 Subject: [PATCH 0435/2016] BigQuery: Column based time partitioning (#5267) * adds TimePartitioningType class * adds TimePartitioning class * adds time partitioning to query job * adds time partitioning to load job * adds time_partitioning to table and adds deprecation warnings to partitioning_type and partition_expiration * adds time_partitioning to TableListItem * adds sample for creating partitioned table * adds sample for querying partitioned table --- .../google/cloud/bigquery/__init__.py | 4 + .../google/cloud/bigquery/job.py | 49 ++++ .../google/cloud/bigquery/table.py | 244 +++++++++++++++--- .../tests/unit/test_client.py | 13 +- .../tests/unit/test_job.py | 38 +++ .../tests/unit/test_table.py | 208 ++++++++++----- 6 files changed, 444 insertions(+), 112 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/__init__.py b/packages/google-cloud-bigquery/google/cloud/bigquery/__init__.py index d6c435c2f447..503c83bfb85c 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/__init__.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/__init__.py @@ -61,6 +61,8 @@ from google.cloud.bigquery.table import Table from google.cloud.bigquery.table import TableReference from google.cloud.bigquery.table import Row +from google.cloud.bigquery.table import TimePartitioningType +from google.cloud.bigquery.table import TimePartitioning from google.cloud.bigquery.external_config import ExternalConfig from google.cloud.bigquery.external_config import BigtableOptions from google.cloud.bigquery.external_config import BigtableColumnFamily @@ -93,6 +95,8 @@ 'LoadJob', 'LoadJobConfig', 'UnknownJob', + 'TimePartitioningType', + 'TimePartitioning', # Shared helpers 'SchemaField', 'UDFResource', diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/job.py b/packages/google-cloud-bigquery/google/cloud/bigquery/job.py index 70ff2b3cf54e..19f2eaf96c3c 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/job.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/job.py @@ -34,6 +34,7 @@ from google.cloud.bigquery.table import EncryptionConfiguration from google.cloud.bigquery.table import TableReference from google.cloud.bigquery.table import Table +from google.cloud.bigquery.table import TimePartitioning from google.cloud.bigquery import _helpers from google.cloud.bigquery._helpers import DEFAULT_RETRY from google.cloud.bigquery._helpers import _int_or_none @@ -967,6 +968,23 @@ def destination_encryption_configuration(self, value): api_repr = value.to_api_repr() self._set_sub_prop('destinationEncryptionConfiguration', api_repr) + @property + def time_partitioning(self): + """google.cloud.bigquery.table.TimePartitioning: Specifies time-based + partitioning for the destination table. + """ + prop = self._get_sub_prop('timePartitioning') + if prop is not None: + prop = TimePartitioning.from_api_repr(prop) + return prop + + @time_partitioning.setter + def time_partitioning(self, value): + api_repr = value + if value is not None: + api_repr = value.to_api_repr() + self._set_sub_prop('timePartitioning', api_repr) + class LoadJob(_AsyncJob): """Asynchronous job for loading data into a table. @@ -1114,6 +1132,13 @@ def destination_encryption_configuration(self): """ return self._configuration.destination_encryption_configuration + @property + def time_partitioning(self): + """See + :attr:`google.cloud.bigquery.job.LoadJobConfig.time_partitioning`. + """ + return self._configuration.time_partitioning + @property def input_file_bytes(self): """Count of bytes loaded from source files. @@ -1910,6 +1935,23 @@ def table_definitions(self, values): self._set_sub_prop( 'tableDefinitions', _to_api_repr_table_defs(values)) + @property + def time_partitioning(self): + """google.cloud.bigquery.table.TimePartitioning: Specifies time-based + partitioning for the destination table. + """ + prop = self._get_sub_prop('timePartitioning') + if prop is not None: + prop = TimePartitioning.from_api_repr(prop) + return prop + + @time_partitioning.setter + def time_partitioning(self, value): + api_repr = value + if value is not None: + api_repr = value.to_api_repr() + self._set_sub_prop('timePartitioning', api_repr) + def to_api_repr(self): """Build an API representation of the query job config. @@ -2081,6 +2123,13 @@ def table_definitions(self): """ return self._configuration.table_definitions + @property + def time_partitioning(self): + """See + :attr:`google.cloud.bigquery.job.QueryJobConfig.time_partitioning`. + """ + return self._configuration.time_partitioning + def _build_resource(self): """Generate a resource for :meth:`begin`.""" configuration = self._configuration.to_api_repr() diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py index e5c76370d364..40eb7a02f85a 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py @@ -19,6 +19,7 @@ import copy import datetime import operator +import warnings import six try: @@ -278,6 +279,7 @@ class Table(object): _PROPERTY_TO_API_FIELD = { 'friendly_name': 'friendlyName', 'expires': 'expirationTime', + 'time_partitioning': 'timePartitioning', 'partitioning_type': 'timePartitioning', 'partition_expiration': 'timePartitioning', 'view_use_legacy_sql': 'view', @@ -456,52 +458,80 @@ def table_type(self): """ return self._properties.get('type') + @property + def time_partitioning(self): + """google.cloud.bigquery.table.TimePartitioning: Configures time-based + partitioning for a table. + + Raises: + ValueError: + If the value is not :class:`TimePartitioning` or :data:`None`. + """ + prop = self._properties.get('timePartitioning') + if prop is not None: + return TimePartitioning.from_api_repr(prop) + + @time_partitioning.setter + def time_partitioning(self, value): + api_repr = value + if isinstance(value, TimePartitioning): + api_repr = value.to_api_repr() + elif value is not None: + raise ValueError( + "value must be google.cloud.bigquery.table.TimePartitioning " + "or None") + self._properties['timePartitioning'] = api_repr + @property def partitioning_type(self): """Union[str, None]: Time partitioning of the table if it is partitioned (Defaults to :data:`None`). - The only partitioning type that is currently supported is ``'DAY'``. - - Raises: - ValueError: If the value is not ``'DAY'`` or :data:`None`. + The only partitioning type that is currently supported is + :attr:`~google.cloud.bigquery.table.TimePartitioningType.DAY`. """ - return self._properties.get('timePartitioning', {}).get('type') + warnings.warn( + "This method will be deprecated in future versions. Please use " + "Table.time_partitioning.type_ instead.", + UserWarning) + if self.time_partitioning is not None: + return self.time_partitioning.type_ @partitioning_type.setter def partitioning_type(self, value): - if value not in ('DAY', None): - raise ValueError("value must be one of ['DAY', None]") - - if value is None: - self._properties.pop('timePartitioning', None) - else: - time_part = self._properties.setdefault('timePartitioning', {}) - time_part['type'] = value.upper() + warnings.warn( + "This method will be deprecated in future versions. Please use " + "Table.time_partitioning.type_ instead.", + UserWarning) + if self.time_partitioning is None: + self._properties['timePartitioning'] = {} + self._properties['timePartitioning']['type'] = value @property def partition_expiration(self): """Union[int, None]: Expiration time in milliseconds for a partition. + + If :attr:`partition_expiration` is set and :attr:`type_` is + not set, :attr:`type_` will default to + :attr:`~google.cloud.bigquery.table.TimePartitioningType.DAY`. """ - return _helpers._int_or_none( - self._properties.get('timePartitioning', {}).get('expirationMs')) + warnings.warn( + "This method will be deprecated in future versions. Please use " + "Table.time_partitioning.expiration_ms instead.", + UserWarning) + if self.time_partitioning is not None: + return self.time_partitioning.expiration_ms @partition_expiration.setter def partition_expiration(self, value): - if not isinstance(value, (int, type(None))): - raise ValueError( - "must be an integer representing milliseconds or None") - - if value is None: - if 'timePartitioning' in self._properties: - self._properties['timePartitioning'].pop('expirationMs') - else: - api_repr = str(value) - try: - self._properties['timePartitioning']['expirationMs'] = api_repr - except KeyError: - self._properties['timePartitioning'] = {'type': 'DAY'} - self._properties['timePartitioning']['expirationMs'] = api_repr + warnings.warn( + "This method will be deprecated in future versions. Please use " + "Table.time_partitioning.expiration_ms instead.", + UserWarning) + if self.time_partitioning is None: + self._properties['timePartitioning'] = { + 'type': TimePartitioningType.DAY} + self._properties['timePartitioning']['expirationMs'] = str(value) @property def description(self): @@ -816,22 +846,40 @@ def table_type(self): """ return self._properties.get('type') + @property + def time_partitioning(self): + """google.cloud.bigquery.table.TimePartitioning: Configures time-based + partitioning for a table. + """ + prop = self._properties.get('timePartitioning') + if prop is not None: + return TimePartitioning.from_api_repr(prop) + @property def partitioning_type(self): """Union[str, None]: Time partitioning of the table if it is partitioned (Defaults to :data:`None`). - - The only partitioning type that is currently supported is ``'DAY'``. """ - return self._properties.get('timePartitioning', {}).get('type') + warnings.warn( + "This method will be deprecated in future versions. Please use " + "TableListItem.time_partitioning.type_ instead.", + PendingDeprecationWarning) + if self.time_partitioning is not None: + return self.time_partitioning.type_ @property def partition_expiration(self): """Union[int, None]: Expiration time in milliseconds for a partition. + + If this property is set and :attr:`type_` is not set, :attr:`type_` + will default to :attr:`TimePartitioningType.DAY`. """ - expiration = self._properties.get( - 'timePartitioning', {}).get('expirationMs') - return _helpers._int_or_none(expiration) + warnings.warn( + "This method will be deprecated in future versions. Please use " + "TableListItem.time_partitioning.expiration_ms instead.", + PendingDeprecationWarning) + if self.time_partitioning is not None: + return self.time_partitioning.expiration_ms @property def friendly_name(self): @@ -1099,3 +1147,129 @@ def to_dataframe(self): rows = [row.values() for row in iter(self)] return pandas.DataFrame(rows, columns=column_headers) + + +class TimePartitioningType(object): + """Specifies the type of time partitioning to perform.""" + + DAY = 'DAY' + """str: Generates one partition per day.""" + + +class TimePartitioning(object): + """Configures time-based partitioning for a table. + + Args: + type_ (google.cloud.bigquery.table.TimePartitioningType, optional): + Specifies the type of time partitioning to perform. Defaults to + :attr:`~google.cloud.bigquery.table.TimePartitioningType.DAY`, + which is the only currently supported type. + field (str, optional): + If set, the table is partitioned by this field. If not set, the + table is partitioned by pseudo column ``_PARTITIONTIME``. The field + must be a top-level ``TIMESTAMP`` or ``DATE`` field. Its mode must + be ``NULLABLE`` or ``REQUIRED``. + expiration_ms(int, optional): + Number of milliseconds for which to keep the storage for a + partition. + require_partition_filter (bool, optional): + If set to true, queries over the partitioned table require a + partition filter that can be used for partition elimination to be + specified. + """ + def __init__(self, type_=None, field=None, expiration_ms=None, + require_partition_filter=None): + self._properties = {} + if type_ is None: + self.type_ = TimePartitioningType.DAY + else: + self.type_ = type_ + if field is not None: + self.field = field + if expiration_ms is not None: + self.expiration_ms = expiration_ms + if require_partition_filter is not None: + self.require_partition_filter = require_partition_filter + + @property + def type_(self): + """google.cloud.bigquery.table.TimePartitioningType: The type of time + partitioning to use. + """ + return self._properties['type'] + + @type_.setter + def type_(self, value): + self._properties['type'] = value + + @property + def field(self): + """str: Field in the table to use for partitioning""" + return self._properties.get('field') + + @field.setter + def field(self, value): + self._properties['field'] = value + + @property + def expiration_ms(self): + """int: Number of milliseconds to keep the storage for a partition.""" + return _helpers._int_or_none(self._properties.get('expirationMs')) + + @expiration_ms.setter + def expiration_ms(self, value): + self._properties['expirationMs'] = str(value) + + @property + def require_partition_filter(self): + """bool: Specifies whether partition filters are required for queries + """ + return self._properties.get('requirePartitionFilter') + + @require_partition_filter.setter + def require_partition_filter(self, value): + self._properties['requirePartitionFilter'] = value + + @classmethod + def from_api_repr(cls, api_repr): + """Return a :class:`TimePartitioning` object deserialized from a dict. + + This method creates a new ``TimePartitioning`` instance that points to + the ``api_repr`` parameter as its internal properties dict. This means + that when a ``TimePartitioning`` instance is stored as a property of + another object, any changes made at the higher level will also appear + here:: + + >>> time_partitioning = TimePartitioning() + >>> table.time_partitioning = time_partitioning + >>> table.time_partitioning.field = 'timecolumn' + >>> time_partitioning.field + 'timecolumn' + + Args: + api_repr (Mapping[str, str]): + The serialized representation of the TimePartitioning, such as + what is output by :meth:`to_api_repr`. + + Returns: + google.cloud.bigquery.table.TimePartitioning: + The ``TimePartitioning`` object. + """ + instance = cls(api_repr['type']) + instance._properties = api_repr + return instance + + def to_api_repr(self): + """Return a dictionary representing this object. + + This method returns the properties dict of the ``TimePartitioning`` + instance rather than making a copy. This means that when a + ``TimePartitioning`` instance is stored as a property of another + object, any changes made at the higher level will also appear here. + + Returns: + dict: + A dictionary representing the TimePartitioning object in + serialized form. + """ + return self._properties diff --git a/packages/google-cloud-bigquery/tests/unit/test_client.py b/packages/google-cloud-bigquery/tests/unit/test_client.py index 42184cdba679..7a9f8a1aae2e 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_client.py +++ b/packages/google-cloud-bigquery/tests/unit/test_client.py @@ -515,6 +515,7 @@ def test_create_dataset_w_custom_property(self): def test_create_table_w_day_partition(self): from google.cloud.bigquery.table import Table + from google.cloud.bigquery.table import TimePartitioning path = 'projects/%s/datasets/%s/tables' % ( self.PROJECT, self.DS_ID) @@ -530,7 +531,7 @@ def test_create_table_w_day_partition(self): } conn = client._connection = _make_connection(resource) table = Table(self.TABLE_REF) - table.partitioning_type = 'DAY' + table.time_partitioning = TimePartitioning() got = client.create_table(table) @@ -546,7 +547,7 @@ def test_create_table_w_day_partition(self): 'timePartitioning': {'type': 'DAY'}, 'labels': {}, }) - self.assertEqual(table.partitioning_type, "DAY") + self.assertEqual(table.time_partitioning.type_, 'DAY') self.assertEqual(got.table_id, self.TABLE_ID) def test_create_table_w_custom_property(self): @@ -628,6 +629,7 @@ def test_create_table_w_encryption_configuration(self): def test_create_table_w_day_partition_and_expire(self): from google.cloud.bigquery.table import Table + from google.cloud.bigquery.table import TimePartitioning path = 'projects/%s/datasets/%s/tables' % ( self.PROJECT, self.DS_ID) @@ -643,8 +645,7 @@ def test_create_table_w_day_partition_and_expire(self): } conn = client._connection = _make_connection(resource) table = Table(self.TABLE_REF) - table.partitioning_type = 'DAY' - table.partition_expiration = 100 + table.time_partitioning = TimePartitioning(expiration_ms=100) got = client.create_table(table) @@ -660,8 +661,8 @@ def test_create_table_w_day_partition_and_expire(self): 'timePartitioning': {'type': 'DAY', 'expirationMs': '100'}, 'labels': {}, }) - self.assertEqual(table.partitioning_type, "DAY") - self.assertEqual(table.partition_expiration, 100) + self.assertEqual(table.time_partitioning.type_, 'DAY') + self.assertEqual(table.time_partitioning.expiration_ms, 100) self.assertEqual(got.table_id, self.TABLE_ID) def test_create_table_w_schema_and_query(self): diff --git a/packages/google-cloud-bigquery/tests/unit/test_job.py b/packages/google-cloud-bigquery/tests/unit/test_job.py index da7a1f723abd..297aab771bab 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_job.py +++ b/packages/google-cloud-bigquery/tests/unit/test_job.py @@ -249,6 +249,24 @@ def test_schema(self): config.schema = [full_name, age] self.assertEqual(config.schema, [full_name, age]) + def test_time_partitioning(self): + from google.cloud.bigquery import table + + time_partitioning = table.TimePartitioning( + type_=table.TimePartitioningType.DAY, field='name') + config = self._get_target_class()() + config.time_partitioning = time_partitioning + # TimePartitioning should be configurable after assigning + time_partitioning.expiration_ms = 10000 + self.assertEqual( + config.time_partitioning.type_, + table.TimePartitioningType.DAY) + self.assertEqual(config.time_partitioning.field, 'name') + self.assertEqual(config.time_partitioning.expiration_ms, 10000) + + config.time_partitioning = None + self.assertIsNone(config.time_partitioning) + def test_api_repr(self): resource = self._make_resource() config = self._get_target_class().from_api_repr(resource) @@ -447,6 +465,7 @@ def test_ctor(self): self.assertIsNone(job.source_format) self.assertIsNone(job.write_disposition) self.assertIsNone(job.destination_encryption_configuration) + self.assertIsNone(job.time_partitioning) def test_ctor_w_config(self): from google.cloud.bigquery.schema import SchemaField @@ -1841,6 +1860,24 @@ def test_ctor_w_none(self): self.assertIsNone(config.default_dataset) self.assertIsNone(config.destination) + def test_time_partitioning(self): + from google.cloud.bigquery import table + + time_partitioning = table.TimePartitioning( + type_=table.TimePartitioningType.DAY, field='name') + config = self._make_one() + config.time_partitioning = time_partitioning + # TimePartitioning should be configurable after assigning + time_partitioning.expiration_ms = 10000 + + self.assertEqual( + config.time_partitioning.type_, table.TimePartitioningType.DAY) + self.assertEqual(config.time_partitioning.field, 'name') + self.assertEqual(config.time_partitioning.expiration_ms, 10000) + + config.time_partitioning = None + self.assertIsNone(config.time_partitioning) + def test_from_api_repr_empty(self): klass = self._get_target_class() config = klass.from_api_repr({}) @@ -2118,6 +2155,7 @@ def test_ctor_defaults(self): self.assertIsNone(job.maximum_bytes_billed) self.assertIsNone(job.table_definitions) self.assertIsNone(job.destination_encryption_configuration) + self.assertIsNone(job.time_partitioning) def test_ctor_w_udf_resources(self): from google.cloud.bigquery.job import QueryJobConfig diff --git a/packages/google-cloud-bigquery/tests/unit/test_table.py b/packages/google-cloud-bigquery/tests/unit/test_table.py index db62782e1ebd..6f9f69c9f0a7 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_table.py +++ b/packages/google-cloud-bigquery/tests/unit/test_table.py @@ -760,106 +760,109 @@ def test__build_resource_w_custom_field_not_in__properties(self): with self.assertRaises(ValueError): table._build_resource(['bad']) - def test_partition_type_setter_bad_type(self): - from google.cloud.bigquery.table import SchemaField + def test_time_partitioning_setter(self): + from google.cloud.bigquery.table import TimePartitioning + from google.cloud.bigquery.table import TimePartitioningType dataset = DatasetReference(self.PROJECT, self.DS_ID) table_ref = dataset.table(self.TABLE_NAME) - full_name = SchemaField('full_name', 'STRING', mode='REQUIRED') - age = SchemaField('age', 'INTEGER', mode='REQUIRED') - table = self._make_one(table_ref, schema=[full_name, age]) - with self.assertRaises(ValueError): - table.partitioning_type = 123 + table = self._make_one(table_ref) + time_partitioning = TimePartitioning(type_=TimePartitioningType.DAY) - def test_partition_type_setter_unknown_value(self): - from google.cloud.bigquery.table import SchemaField + table.time_partitioning = time_partitioning - dataset = DatasetReference(self.PROJECT, self.DS_ID) - table_ref = dataset.table(self.TABLE_NAME) - full_name = SchemaField('full_name', 'STRING', mode='REQUIRED') - age = SchemaField('age', 'INTEGER', mode='REQUIRED') - table = self._make_one(table_ref, schema=[full_name, age]) - with self.assertRaises(ValueError): - table.partitioning_type = "HASH" + self.assertEqual( + table.time_partitioning.type_, TimePartitioningType.DAY) + # Both objects point to the same properties dict + self.assertIs( + table._properties['timePartitioning'], + time_partitioning._properties) - def test_partition_type_setter_w_known_value(self): - from google.cloud.bigquery.table import SchemaField + time_partitioning.expiration_ms = 10000 + # Changes to TimePartitioning object are reflected in Table properties + self.assertEqual( + table.time_partitioning.expiration_ms, + time_partitioning.expiration_ms) + + def test_time_partitioning_setter_bad_type(self): dataset = DatasetReference(self.PROJECT, self.DS_ID) table_ref = dataset.table(self.TABLE_NAME) - full_name = SchemaField('full_name', 'STRING', mode='REQUIRED') - age = SchemaField('age', 'INTEGER', mode='REQUIRED') - table = self._make_one(table_ref, schema=[full_name, age]) - self.assertIsNone(table.partitioning_type) - table.partitioning_type = 'DAY' - self.assertEqual(table.partitioning_type, 'DAY') + table = self._make_one(table_ref) - def test_partition_type_setter_w_none(self): - from google.cloud.bigquery.table import SchemaField + with self.assertRaises(ValueError): + table.time_partitioning = {'timePartitioning': {'type': 'DAY'}} + def test_time_partitioning_setter_none(self): dataset = DatasetReference(self.PROJECT, self.DS_ID) table_ref = dataset.table(self.TABLE_NAME) - full_name = SchemaField('full_name', 'STRING', mode='REQUIRED') - age = SchemaField('age', 'INTEGER', mode='REQUIRED') - table = self._make_one(table_ref, schema=[full_name, age]) - table._properties['timePartitioning'] = {'type': 'DAY'} - table.partitioning_type = None - self.assertIsNone(table.partitioning_type) - self.assertFalse('timePartitioning' in table._properties) + table = self._make_one(table_ref) - def test_partition_experation_bad_type(self): - from google.cloud.bigquery.table import SchemaField + table.time_partitioning = None + + self.assertIsNone(table.time_partitioning) + + def test_partitioning_type_setter(self): + from google.cloud.bigquery.table import TimePartitioningType dataset = DatasetReference(self.PROJECT, self.DS_ID) table_ref = dataset.table(self.TABLE_NAME) - full_name = SchemaField('full_name', 'STRING', mode='REQUIRED') - age = SchemaField('age', 'INTEGER', mode='REQUIRED') - table = self._make_one(table_ref, schema=[full_name, age]) - with self.assertRaises(ValueError): - table.partition_expiration = "NEVER" + table = self._make_one(table_ref) - def test_partition_expiration_w_integer(self): - from google.cloud.bigquery.table import SchemaField + with mock.patch('warnings.warn') as warn_patch: + self.assertIsNone(table.partitioning_type) + + table.partitioning_type = TimePartitioningType.DAY + + self.assertEqual(table.partitioning_type, 'DAY') + + assert warn_patch.called + + def test_partitioning_type_setter_w_time_partitioning_set(self): + from google.cloud.bigquery.table import TimePartitioning dataset = DatasetReference(self.PROJECT, self.DS_ID) table_ref = dataset.table(self.TABLE_NAME) - full_name = SchemaField('full_name', 'STRING', mode='REQUIRED') - age = SchemaField('age', 'INTEGER', mode='REQUIRED') - table = self._make_one(table_ref, schema=[full_name, age]) - self.assertIsNone(table.partition_expiration) - table.partition_expiration = 100 - self.assertEqual(table.partitioning_type, "DAY") - self.assertEqual(table.partition_expiration, 100) + table = self._make_one(table_ref) + table.time_partitioning = TimePartitioning() - def test_partition_expiration_w_none(self): - from google.cloud.bigquery.table import SchemaField + with mock.patch('warnings.warn') as warn_patch: + table.partitioning_type = 'NEW_FAKE_TYPE' + + self.assertEqual(table.partitioning_type, 'NEW_FAKE_TYPE') + + assert warn_patch.called + + def test_partitioning_expiration_setter_w_time_partitioning_set(self): + from google.cloud.bigquery.table import TimePartitioning dataset = DatasetReference(self.PROJECT, self.DS_ID) table_ref = dataset.table(self.TABLE_NAME) - full_name = SchemaField('full_name', 'STRING', mode='REQUIRED') - age = SchemaField('age', 'INTEGER', mode='REQUIRED') - table = self._make_one(table_ref, schema=[full_name, age]) - self.assertIsNone(table.partition_expiration) - table._properties['timePartitioning'] = { - 'type': 'DAY', - 'expirationMs': 100, - } - table.partition_expiration = None - self.assertEqual(table.partitioning_type, "DAY") - self.assertIsNone(table.partition_expiration) + table = self._make_one(table_ref) + table.time_partitioning = TimePartitioning() - def test_partition_expiration_w_none_no_partition_set(self): - from google.cloud.bigquery.table import SchemaField + with mock.patch('warnings.warn') as warn_patch: + table.partition_expiration = 100000 + + self.assertEqual(table.partition_expiration, 100000) + assert warn_patch.called + + def test_partition_expiration_setter(self): dataset = DatasetReference(self.PROJECT, self.DS_ID) table_ref = dataset.table(self.TABLE_NAME) - full_name = SchemaField('full_name', 'STRING', mode='REQUIRED') - age = SchemaField('age', 'INTEGER', mode='REQUIRED') - table = self._make_one(table_ref, schema=[full_name, age]) - self.assertIsNone(table.partition_expiration) - table.partition_expiration = None - self.assertIsNone(table.partitioning_type) - self.assertIsNone(table.partition_expiration) + table = self._make_one(table_ref) + + with mock.patch('warnings.warn') as warn_patch: + self.assertIsNone(table.partition_expiration) + + table.partition_expiration = 100 + + self.assertEqual(table.partition_expiration, 100) + # defaults to 'DAY' when expiration is set and type is not set + self.assertEqual(table.partitioning_type, 'DAY') + + assert warn_patch.called def test_encryption_configuration_setter(self): from google.cloud.bigquery.table import EncryptionConfiguration @@ -978,6 +981,7 @@ def test_ctor(self): 'type': 'TABLE', 'timePartitioning': { 'type': 'DAY', + 'field': 'mycolumn', 'expirationMs': '10000', }, 'labels': { @@ -997,6 +1001,9 @@ def test_ctor(self): self.assertEqual(table.reference.table_id, table_id) self.assertEqual(table.friendly_name, 'Mahogany Coffee Table') self.assertEqual(table.table_type, 'TABLE') + self.assertEqual(table.time_partitioning.type_, 'DAY') + self.assertEqual(table.time_partitioning.expiration_ms, 10000) + self.assertEqual(table.time_partitioning.field, 'mycolumn') self.assertEqual(table.partitioning_type, 'DAY') self.assertEqual(table.partition_expiration, 10000) self.assertEqual(table.labels['some-stuff'], 'this-is-a-label') @@ -1046,6 +1053,7 @@ def test_ctor_missing_properties(self): self.assertIsNone(table.full_table_id) self.assertIsNone(table.friendly_name) self.assertIsNone(table.table_type) + self.assertIsNone(table.time_partitioning) self.assertIsNone(table.partitioning_type) self.assertIsNone(table.partition_expiration) self.assertEqual(table.labels, {}) @@ -1347,3 +1355,61 @@ def test_to_dataframe_error_if_pandas_is_none(self): with self.assertRaises(ValueError): row_iterator.to_dataframe() + + +class TestTimePartitioning(unittest.TestCase): + + def test_constructor_defaults(self): + from google.cloud.bigquery.table import TimePartitioning + + time_partitioning = TimePartitioning() + + self.assertEqual(time_partitioning.type_, 'DAY') + self.assertIsNone(time_partitioning.field) + self.assertIsNone(time_partitioning.expiration_ms) + self.assertIsNone(time_partitioning.require_partition_filter) + + api_repr = time_partitioning.to_api_repr() + + exp_api_repr = {'type': 'DAY'} + self.assertEqual(api_repr, exp_api_repr) + + tp_from_api_repr = TimePartitioning.from_api_repr(api_repr) + + self.assertEqual(tp_from_api_repr.type_, 'DAY') + self.assertIsNone(tp_from_api_repr.field) + self.assertIsNone(tp_from_api_repr.expiration_ms) + self.assertIsNone(tp_from_api_repr.require_partition_filter) + + def test_constructor_properties(self): + from google.cloud.bigquery.table import TimePartitioning + from google.cloud.bigquery.table import TimePartitioningType + + time_partitioning = TimePartitioning( + type_=TimePartitioningType.DAY, + field='name', + expiration_ms=10000, + require_partition_filter=True + ) + + self.assertEqual(time_partitioning.type_, 'DAY') + self.assertEqual(time_partitioning.field, 'name') + self.assertEqual(time_partitioning.expiration_ms, 10000) + self.assertTrue(time_partitioning.require_partition_filter) + + api_repr = time_partitioning.to_api_repr() + + exp_api_repr = { + 'type': 'DAY', + 'field': 'name', + 'expirationMs': '10000', + 'requirePartitionFilter': True, + } + self.assertEqual(api_repr, exp_api_repr) + + tp_from_api_repr = TimePartitioning.from_api_repr(api_repr) + + self.assertEqual(tp_from_api_repr.type_, 'DAY') + self.assertEqual(tp_from_api_repr.field, 'name') + self.assertEqual(tp_from_api_repr.expiration_ms, 10000) + self.assertTrue(time_partitioning.require_partition_filter) From 99d25912ee51b8b835dc5a935adffd3b002b3333 Mon Sep 17 00:00:00 2001 From: shollyman Date: Wed, 2 May 2018 16:10:17 -0700 Subject: [PATCH 0436/2016] BigQuery: switch list_partitions helper to a direct metatable read (#5273) * simplify list_partitions from legacy query to a metatable read * add a length assertion to system test * switch from in test to equality, strip unit test * reintroduce a unit test for list_partitions, address linter responses. * address reviewer comments and linting * Fix str type in return * Clarify the return is a list of strings in the doc comment. --- .../google/cloud/bigquery/client.py | 48 +++++----- .../google-cloud-bigquery/tests/system.py | 8 ++ .../tests/unit/test_client.py | 91 +++++++++---------- 3 files changed, 77 insertions(+), 70 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py index f69374b04524..a74a2c90c20c 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py @@ -1239,6 +1239,31 @@ def insert_rows_json(self, table, json_rows, row_ids=None, return errors + def list_partitions(self, table, retry=DEFAULT_RETRY): + """List the partitions in a table. + + Arguments: + table (Union[google.cloud.bigquery.table.Table, + google.cloud.bigquery.table.TableReference]): + The table or reference from which to get partition info + retry (google.api_core.retry.Retry): + (Optional) How to retry the RPC. + + Returns: + List[str]: + A list of the partition ids present in the partitioned table + """ + meta_table = self.get_table( + TableReference( + self.dataset(table.dataset_id, project=table.project), + '%s$__PARTITIONS_SUMMARY__' % table.table_id)) + + subset = [col for col in + meta_table.schema if col.name == 'partition_id'] + return [row[0] for row in self.list_rows(meta_table, + selected_fields=subset, + retry=retry)] + def list_rows(self, table, selected_fields=None, max_results=None, page_token=None, start_index=None, page_size=None, retry=DEFAULT_RETRY): @@ -1326,29 +1351,6 @@ def list_rows(self, table, selected_fields=None, max_results=None, extra_params=params) return row_iterator - def list_partitions(self, table, retry=DEFAULT_RETRY): - """List the partitions in a table. - - :type table: One of: - :class:`~google.cloud.bigquery.table.Table` - :class:`~google.cloud.bigquery.table.TableReference` - :param table: the table to list, or a reference to it. - - :type retry: :class:`google.api_core.retry.Retry` - :param retry: (Optional) How to retry the RPC. - - :rtype: list - :returns: a list of time partitions - """ - config = job.QueryJobConfig() - config.use_legacy_sql = True # required for '$' syntax - query_job = self.query( - 'SELECT partition_id from [%s:%s.%s$__PARTITIONS_SUMMARY__]' % - (table.project, table.dataset_id, table.table_id), - job_config=config, - retry=retry) - return [row[0] for row in query_job] - # pylint: disable=unused-argument def _item_to_project(iterator, resource): diff --git a/packages/google-cloud-bigquery/tests/system.py b/packages/google-cloud-bigquery/tests/system.py index f1209fff3e96..177f944ace3e 100644 --- a/packages/google-cloud-bigquery/tests/system.py +++ b/packages/google-cloud-bigquery/tests/system.py @@ -280,6 +280,14 @@ def test_get_table_w_public_dataset(self): self.assertEqual( schema_names, ['word', 'word_count', 'corpus', 'corpus_date']) + def test_list_partitions(self): + table_ref = DatasetReference( + 'bigquery-partition-samples', + 'samples').table('stackoverflow_comments') + all_rows = Config.CLIENT.list_partitions(table_ref) + self.assertIn('20150508', all_rows) + self.assertEquals(2066, len(all_rows)) + def test_list_tables(self): DATASET_ID = _make_dataset_id('list_tables') dataset = self.temp_dataset(DATASET_ID) diff --git a/packages/google-cloud-bigquery/tests/unit/test_client.py b/packages/google-cloud-bigquery/tests/unit/test_client.py index 7a9f8a1aae2e..89f738a3ccb7 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_client.py +++ b/packages/google-cloud-bigquery/tests/unit/test_client.py @@ -2879,6 +2879,50 @@ def test_insert_rows_json(self): path='/%s' % PATH, data=SENT) + def test_list_partitions(self): + from google.cloud.bigquery.table import Table + + rows = 3 + meta_info = { + 'tableReference': + {'projectId': self.PROJECT, + 'datasetId': self.DS_ID, + 'tableId': '%s$__PARTITIONS_SUMMARY__' % self.TABLE_ID}, + 'schema': {'fields': [ + {'name': 'project_id', 'type': 'STRING', 'mode': 'NULLABLE'}, + {'name': 'dataset_id', 'type': 'STRING', 'mode': 'NULLABLE'}, + {'name': 'table_id', 'type': 'STRING', 'mode': 'NULLABLE'}, + {'name': 'partition_id', 'type': 'STRING', 'mode': 'NULLABLE'} + ]}, + 'etag': 'ETAG', + 'numRows': rows, + } + + data = { + 'totalRows': str(rows), + 'rows': [ + {'f': [ + {'v': '20180101'}, + ]}, + {'f': [ + {'v': '20180102'}, + ]}, + {'f': [ + {'v': '20180103'}, + ]}, + ] + } + creds = _make_credentials() + http = object() + client = self._make_one(project=self.PROJECT, credentials=creds, + _http=http) + client._connection = _make_connection(meta_info, data) + table = Table(self.TABLE_REF) + + partition_list = client.list_partitions(table) + self.assertEqual(len(partition_list), rows) + self.assertIn('20180102', partition_list) + def test_list_rows(self): import datetime from google.cloud._helpers import UTC @@ -3114,53 +3158,6 @@ def test_list_rows_errors(self): with self.assertRaises(TypeError): client.list_rows(1) - def test_list_partitions(self): - RESOURCE = { - 'jobReference': { - 'projectId': self.PROJECT, - 'jobId': 'JOB_ID', - }, - 'configuration': { - 'query': { - 'query': 'q', - 'destinationTable': { - 'projectId': self.PROJECT, - 'datasetId': 'DS_ID', - 'tableId': 'TABLE_ID', - }, - }, - }, - 'status': { - 'state': 'DONE', - }, - } - RESULTS_RESOURCE = { - 'jobReference': RESOURCE['jobReference'], - 'jobComplete': True, - 'schema': { - 'fields': [ - {'name': 'partition_id', 'type': 'INTEGER', - 'mode': 'REQUIRED'}, - ] - }, - 'totalRows': '2', - 'pageToken': 'next-page', - } - FIRST_PAGE = copy.deepcopy(RESULTS_RESOURCE) - FIRST_PAGE['rows'] = [ - {'f': [{'v': 20160804}]}, - {'f': [{'v': 20160805}]}, - ] - del FIRST_PAGE['pageToken'] - creds = _make_credentials() - http = object() - client = self._make_one(project=self.PROJECT, credentials=creds, - _http=http) - client._connection = _make_connection( - RESOURCE, RESULTS_RESOURCE, FIRST_PAGE) - self.assertEqual(client.list_partitions(self.TABLE_REF), - [20160804, 20160805]) - class Test_make_job_id(unittest.TestCase): def _call_fut(self, job_id, prefix=None): From affbfbe23ad4b7b70207058c11b1eb0ada967b9c Mon Sep 17 00:00:00 2001 From: Alix Hamilton Date: Thu, 3 May 2018 10:52:22 -0700 Subject: [PATCH 0437/2016] BigQuery: Standardize docstrings for constants (#5289) --- .../google/cloud/bigquery/job.py | 93 +++++++++++-------- 1 file changed, 56 insertions(+), 37 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/job.py b/packages/google-cloud-bigquery/google/cloud/bigquery/job.py index 19f2eaf96c3c..c6436a85d11e 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/job.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/job.py @@ -88,103 +88,122 @@ def _error_result_to_exception(error_result): class Compression(object): - """The compression type to use for exported files. + """The compression type to use for exported files. The default value is + :attr:`NONE`. - Possible values include `GZIP`, `DEFLATE`, `SNAPPY`, and `NONE`. The - default value is `NONE`. `DEFLATE` and `SNAPPY` are only supported for - Avro. + :attr:`DEFLATE` and :attr:`SNAPPY` are + only supported for Avro. """ + GZIP = 'GZIP' + """Specifies GZIP format.""" + DEFLATE = 'DEFLATE' + """Specifies DEFLATE format.""" + SNAPPY = 'SNAPPY' + """Specifies SNAPPY format.""" + NONE = 'NONE' + """Specifies no compression.""" class CreateDisposition(object): - """Specifies whether the job is allowed to create new tables. - - The following values are supported: - `CREATE_IF_NEEDED`: If the table does not exist, BigQuery creates - the table. - `CREATE_NEVER`: The table must already exist. If it does not, - a 'notFound' error is returned in the job result. - The default value is `CREATE_IF_NEEDED`. + """Specifies whether the job is allowed to create new tables. The default + value is :attr:`CREATE_IF_NEEDED`. Creation, truncation and append actions occur as one atomic update upon job completion. """ + CREATE_IF_NEEDED = 'CREATE_IF_NEEDED' + """If the table does not exist, BigQuery creates the table.""" + CREATE_NEVER = 'CREATE_NEVER' + """The table must already exist. If it does not, a 'notFound' error is + returned in the job result.""" class DestinationFormat(object): - """The exported file format. + """The exported file format. The default value is :attr:`CSV`. - Possible values include `CSV`, `NEWLINE_DELIMITED_JSON` and `AVRO`. - The default value is `CSV`. Tables with nested or repeated fields - cannot be exported as CSV. + Tables with nested or repeated fields cannot be exported as CSV. """ + CSV = 'CSV' + """Specifies CSV format.""" + NEWLINE_DELIMITED_JSON = 'NEWLINE_DELIMITED_JSON' + """Specifies newline delimited JSON format.""" + AVRO = 'AVRO' + """Specifies Avro format.""" class Encoding(object): - """The character encoding of the data. The supported values - are `UTF_8` corresponding to `'UTF-8'` or `ISO_8859_1` corresponding to - `'ISO-8859-1'`. The default value is `UTF_8`. + """The character encoding of the data. The default is :attr:`UTF_8`. BigQuery decodes the data after the raw, binary data has been split using the values of the quote and fieldDelimiter properties. """ + UTF_8 = 'UTF-8' + """Specifies UTF-8 encoding.""" + ISO_8859_1 = 'ISO-8859-1' + """Specifies ISO-8859-1 encoding.""" class QueryPriority(object): - """Specifies a priority for the query. - - Possible values include `INTERACTIVE` and `BATCH`. The default value - is `INTERACTIVE`. + """Specifies a priority for the query. The default value is + :attr:`INTERACTIVE`. """ + INTERACTIVE = 'INTERACTIVE' + """Specifies interactive priority.""" + BATCH = 'BATCH' + """Specifies batch priority.""" class SourceFormat(object): - """The format of the data files. + """The format of the data files. The default value is :attr:`CSV`.""" - For CSV files, specify `CSV`. For datastore backups, specify - `DATASTORE_BACKUP`. For newline-delimited json, specify - `NEWLINE_DELIMITED_JSON`. For Avro, specify `AVRO`. For Parquet, specify - `PARQUET`. The default value is `CSV`. - """ CSV = 'CSV' + """Specifies CSV format.""" + DATASTORE_BACKUP = 'DATASTORE_BACKUP' + """Specifies datastore backup format""" + NEWLINE_DELIMITED_JSON = 'NEWLINE_DELIMITED_JSON' + """Specifies newline delimited JSON format.""" + AVRO = 'AVRO' + """Specifies Avro format.""" + PARQUET = 'PARQUET' + """Specifies Parquet format.""" class WriteDisposition(object): """Specifies the action that occurs if destination table already exists. - The following values are supported: - `WRITE_TRUNCATE`: If the table already exists, BigQuery overwrites the - table data. - `WRITE_APPEND`: If the table already exists, BigQuery appends the data - to the table. - `WRITE_EMPTY`: If the table already exists and contains data, a 'duplicate' - error is returned in the job result. - The default value is `WRITE_APPEND`. + The default value is :attr:`WRITE_APPEND`. Each action is atomic and only occurs if BigQuery is able to complete the job successfully. Creation, truncation and append actions occur as one atomic update upon job completion. """ + WRITE_APPEND = 'WRITE_APPEND' + """If the table already exists, BigQuery appends the data to the table.""" + WRITE_TRUNCATE = 'WRITE_TRUNCATE' + """If the table already exists, BigQuery overwrites the table data.""" + WRITE_EMPTY = 'WRITE_EMPTY' + """If the table already exists and contains data, a 'duplicate' error is + returned in the job result.""" class _JobReference(object): From 338a4f024bb4016f674e095736bc4d6e976b0678 Mon Sep 17 00:00:00 2001 From: Christopher Wilcox Date: Fri, 4 May 2018 09:01:24 -0700 Subject: [PATCH 0438/2016] Add Test runs for Python 3.7 and remove 3.4 (#5295) * remove 3.4 from unit test runs * add 3.7 to most packages. PubSub, Monitoring, BigQuery not enabled * Fix #5292 by draining queue in a way compatible with SimpleQueue and Queue --- packages/google-cloud-bigquery/nox.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/nox.py b/packages/google-cloud-bigquery/nox.py index 29dead9ac43d..bcf20c077f53 100644 --- a/packages/google-cloud-bigquery/nox.py +++ b/packages/google-cloud-bigquery/nox.py @@ -65,7 +65,7 @@ def default(session): @nox.session -@nox.parametrize('py', ['2.7', '3.4', '3.5', '3.6']) +@nox.parametrize('py', ['2.7', '3.5', '3.6']) def unit(session, py): """Run the unit test suite.""" From 3ce5ef9cccb5bc33c1d4fce98d3aaae6e13a6a23 Mon Sep 17 00:00:00 2001 From: Christopher Wilcox Date: Fri, 4 May 2018 14:05:32 -0700 Subject: [PATCH 0439/2016] Release bigquery 1.2.0 (#5297) * Release 1.2.0 * Fix casing in changelog * Changelog suggestions --- packages/google-cloud-bigquery/CHANGELOG.md | 21 ++++++++++++++++++++- packages/google-cloud-bigquery/setup.py | 2 +- 2 files changed, 21 insertions(+), 2 deletions(-) diff --git a/packages/google-cloud-bigquery/CHANGELOG.md b/packages/google-cloud-bigquery/CHANGELOG.md index 8cfb1aafcbae..80a98b7a98fb 100644 --- a/packages/google-cloud-bigquery/CHANGELOG.md +++ b/packages/google-cloud-bigquery/CHANGELOG.md @@ -4,10 +4,29 @@ [1]: https://pypi.org/project/google-cloud-bigquery/#history +## 1.2.0 + +### Implementation Changes +- Switch `list_partitions` helper to a direct metatable read (#5273) +- Fix typo in `Encoding.ISO_8859_1` enum value (#5211) + +### New Features +- Add UnknownJob type for redacted jobs. (#5281) +- Add project parameter to `list_datasets` and `list_jobs` (#5217) +- Add from_string factory methods to Dataset and Table (#5255) +- Add column based time partitioning (#5267) + +### Documentation +- Standardize docstrings for constants (#5289) +- Fix docstring / impl of `ExtractJob.destination_uri_file_counts`. (#5245) + +### Internal / Testing Changes +- Add testing support for Python 3.7; remove testing support for Python 3.4. (#5295) + ## 1.1.0 ### New Features -- Add client.get_service_account_email (#5203) +- Add `client.get_service_account_email` (#5203) ### Documentation - Update samples and standardize region tags (#5195) diff --git a/packages/google-cloud-bigquery/setup.py b/packages/google-cloud-bigquery/setup.py index 851f78160f03..28c44ab72bcd 100644 --- a/packages/google-cloud-bigquery/setup.py +++ b/packages/google-cloud-bigquery/setup.py @@ -22,7 +22,7 @@ name = 'google-cloud-bigquery' description = 'Google BigQuery API client library' -version = '1.1.0' +version = '1.2.0' # Should be one of: # 'Development Status :: 3 - Alpha' # 'Development Status :: 4 - Beta' From dfa239018eb000a87bebc748d188a93a8d2393b1 Mon Sep 17 00:00:00 2001 From: shollyman Date: Tue, 8 May 2018 13:20:00 -0700 Subject: [PATCH 0440/2016] BigQuery: Add additional statistics to query plan stages. (#5307) * Add additional statistics to query plan entries. * Add started/ended properties to QueryPlanEntry * Reimplement QueryPlanEntry as API-keyed property dict * Correct comment. * Address reviewer comments: ditch _PROPERTY_TO_API_FIELD, comment alignment, and explicit None returns for start/end --- .../google/cloud/bigquery/job.py | 295 ++++++++++++------ .../tests/unit/test_job.py | 167 +++++++--- 2 files changed, 329 insertions(+), 133 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/job.py b/packages/google-cloud-bigquery/google/cloud/bigquery/job.py index c6436a85d11e..e9c3bfa783ec 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/job.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/job.py @@ -2475,115 +2475,232 @@ def __eq__(self, other): class QueryPlanEntry(object): - """Map a single entry in a query plan. + """QueryPlanEntry represents a single stage of a query execution plan. - :type name: str - :param name: name of the entry + See + https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs + for the underlying API representation within query statistics. - :type entry_id: int - :param entry_id: ID of the entry + """ - :type wait_ratio_avg: float - :param wait_ratio_avg: average wait ratio + def __init__(self): + self._properties = {} - :type wait_ratio_max: float - :param wait_ratio_max: maximum wait ratio + @classmethod + def from_api_repr(cls, resource): + """Factory: construct instance from the JSON repr. - :type read_ratio_avg: float - :param read_ratio_avg: average read ratio + Args: + resource(Dict[str: object]): + ExplainQueryStage representation returned from API - :type read_ratio_max: float - :param read_ratio_max: maximum read ratio + Returns: + google.cloud.bigquery.QueryPlanEntry: + Query plan entry parsed from ``resource`` + """ + entry = cls() + entry._properties = resource + return entry - :type compute_ratio_avg: float - :param compute_ratio_avg: average compute ratio + @property + def name(self): + """Union[str, None]: Human-readable name of the stage.""" + return self._properties.get('name') - :type compute_ratio_max: float - :param compute_ratio_max: maximum compute ratio + @property + def entry_id(self): + """Union[str, None]: Unique ID for the stage within the plan.""" + return self._properties.get('id') - :type write_ratio_avg: float - :param write_ratio_avg: average write ratio + @property + def start(self): + """Union[Datetime, None]: Datetime when the stage started.""" + if self._properties.get('startMs') is None: + return None + return _datetime_from_microseconds( + self._properties.get('startMs') * 1000.0) - :type write_ratio_max: float - :param write_ratio_max: maximum write ratio + @property + def end(self): + """Union[Datetime, None]: Datetime when the stage ended.""" + if self._properties.get('endMs') is None: + return None + return _datetime_from_microseconds( + self._properties.get('endMs') * 1000.0) - :type records_read: int - :param records_read: number of records read + @property + def input_stages(self): + """List(int): Entry IDs for stages that were inputs for this stage.""" + return self._properties.get('inputStages', []) - :type records_written: int - :param records_written: number of records written + @property + def parallel_inputs(self): + """Union[int, None]: Number of parallel input segments within + the stage. + """ + return self._properties.get('parallelInputs') - :type status: str - :param status: entry status + @property + def completed_parallel_inputs(self): + """Union[int, None]: Number of parallel input segments completed.""" + return self._properties.get('completedParallelInputs') - :type steps: List(QueryPlanEntryStep) - :param steps: steps in the entry - """ - def __init__(self, - name, - entry_id, - wait_ratio_avg, - wait_ratio_max, - read_ratio_avg, - read_ratio_max, - compute_ratio_avg, - compute_ratio_max, - write_ratio_avg, - write_ratio_max, - records_read, - records_written, - status, - steps): - self.name = name - self.entry_id = entry_id - self.wait_ratio_avg = wait_ratio_avg - self.wait_ratio_max = wait_ratio_max - self.read_ratio_avg = read_ratio_avg - self.read_ratio_max = read_ratio_max - self.compute_ratio_avg = compute_ratio_avg - self.compute_ratio_max = compute_ratio_max - self.write_ratio_avg = write_ratio_avg - self.write_ratio_max = write_ratio_max - self.records_read = records_read - self.records_written = records_written - self.status = status - self.steps = steps + @property + def wait_ms_avg(self): + """Union[int, None]: Milliseconds the average worker spent waiting to + be scheduled. + """ + return self._properties.get('waitMsAvg') - @classmethod - def from_api_repr(cls, resource): - """Factory: construct instance from the JSON repr. + @property + def wait_ms_max(self): + """Union[int, None]: Milliseconds the slowest worker spent waiting to + be scheduled. + """ + return self._properties.get('waitMsMax') - :type resource: dict - :param resource: JSON representation of the entry + @property + def wait_ratio_avg(self): + """Union[float, None]: Ratio of time the average worker spent waiting + to be scheduled, relative to the longest time spent by any worker in + any stage of the overall plan. + """ + return self._properties.get('waitRatioAvg') - :rtype: :class:`QueryPlanEntry` - :return: new instance built from the resource + @property + def wait_ratio_max(self): + """Union[float, None]: Ratio of time the slowest worker spent waiting + to be scheduled, relative to the longest time spent by any worker in + any stage of the overall plan. """ - records_read = resource.get('recordsRead') - if records_read is not None: - records_read = int(records_read) + return self._properties.get('waitRatioMax') - records_written = resource.get('recordsWritten') - if records_written is not None: - records_written = int(records_written) + @property + def read_ms_avg(self): + """Union[int, None]: Milliseconds the average worker spent reading + input. + """ + return self._properties.get('readMsAvg') - return cls( - name=resource.get('name'), - entry_id=resource.get('id'), - wait_ratio_avg=resource.get('waitRatioAvg'), - wait_ratio_max=resource.get('waitRatioMax'), - read_ratio_avg=resource.get('readRatioAvg'), - read_ratio_max=resource.get('readRatioMax'), - compute_ratio_avg=resource.get('computeRatioAvg'), - compute_ratio_max=resource.get('computeRatioMax'), - write_ratio_avg=resource.get('writeRatioAvg'), - write_ratio_max=resource.get('writeRatioMax'), - records_read=records_read, - records_written=records_written, - status=resource.get('status'), - steps=[QueryPlanEntryStep.from_api_repr(step) - for step in resource.get('steps', ())], - ) + @property + def read_ms_max(self): + """Union[int, None]: Milliseconds the slowest worker spent reading + input. + """ + return self._properties.get('readMsMax') + + @property + def read_ratio_avg(self): + """Union[float, None]: Ratio of time the average worker spent reading + input, relative to the longest time spent by any worker in any stage + of the overall plan. + """ + return self._properties.get('readRatioAvg') + + @property + def read_ratio_max(self): + """Union[float, None]: Ratio of time the slowest worker spent reading + to be scheduled, relative to the longest time spent by any worker in + any stage of the overall plan. + """ + return self._properties.get('readRatioMax') + + @property + def compute_ms_avg(self): + """Union[int, None]: Milliseconds the average worker spent on CPU-bound + processing. + """ + return self._properties.get('computeMsAvg') + + @property + def compute_ms_max(self): + """Union[int, None]: Milliseconds the slowest worker spent on CPU-bound + processing. + """ + return self._properties.get('computeMsMax') + + @property + def compute_ratio_avg(self): + """Union[float, None]: Ratio of time the average worker spent on + CPU-bound processing, relative to the longest time spent by any + worker in any stage of the overall plan. + """ + return self._properties.get('computeRatioAvg') + + @property + def compute_ratio_max(self): + """Union[float, None]: Ratio of time the slowest worker spent on + CPU-bound processing, relative to the longest time spent by any + worker in any stage of the overall plan. + """ + return self._properties.get('computeRatioMax') + + @property + def write_ms_avg(self): + """Union[int, None]: Milliseconds the average worker spent writing + output data. + """ + return self._properties.get('writeMsAvg') + + @property + def write_ms_max(self): + """Union[int, None]: Milliseconds the slowest worker spent writing + output data. + """ + return self._properties.get('writeMsMax') + + @property + def write_ratio_avg(self): + """Union[float, None]: Ratio of time the average worker spent writing + output data, relative to the longest time spent by any worker in any + stage of the overall plan. + """ + return self._properties.get('writeRatioAvg') + + @property + def write_ratio_max(self): + """Union[float, None]: Ratio of time the slowest worker spent writing + output data, relative to the longest time spent by any worker in any + stage of the overall plan. + """ + return self._properties.get('writeRatioMax') + + @property + def records_read(self): + """Union[int, None]: Number of records read by this stage.""" + return self._properties.get('recordsRead') + + @property + def records_written(self): + """Union[int, None]: Number of records written by this stage.""" + return self._properties.get('recordsWritten') + + @property + def status(self): + """Union[str, None]: status of this stage.""" + return self._properties.get('status') + + @property + def shuffle_output_bytes(self): + """Union[int, None]: Number of bytes written by this stage to + intermediate shuffle. + """ + return self._properties.get('shuffleOutputBytes') + + @property + def shuffle_output_bytes_spilled(self): + """Union[int, None]: Number of bytes written by this stage to + intermediate shuffle and spilled to disk. + """ + return self._properties.get('shuffleOutputBytesSpilled') + + @property + def steps(self): + """List(QueryPlanEntryStep): List of step operations performed by + each worker in the stage. + """ + return [QueryPlanEntryStep.from_api_repr(step) + for step in self._properties.get('steps', [])] class UnknownJob(_AsyncJob): diff --git a/packages/google-cloud-bigquery/tests/unit/test_job.py b/packages/google-cloud-bigquery/tests/unit/test_job.py index 297aab771bab..a531a0976942 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_job.py +++ b/packages/google-cloud-bigquery/tests/unit/test_job.py @@ -25,6 +25,7 @@ from google.cloud.bigquery.job import LoadJobConfig from google.cloud.bigquery.dataset import DatasetReference from google.cloud.bigquery.table import EncryptionConfiguration +from google.cloud._helpers import _RFC3339_MICROS import mock @@ -2289,17 +2290,32 @@ def test_query_plan(self): plan_entries = [{ 'name': 'NAME', 'id': 1234, + 'inputStages': [88, 101], + 'startMs': 1522540800000, + 'endMs': 1522540804000, + 'parallelInputs': 1000, + 'completedParallelInputs': 5, + 'waitMsAvg': 33, + 'waitMsMax': 400, 'waitRatioAvg': 2.71828, 'waitRatioMax': 3.14159, + 'readMsAvg': 45, + 'readMsMax': 90, 'readRatioAvg': 1.41421, 'readRatioMax': 1.73205, + 'computeMsAvg': 55, + 'computeMsMax': 99, 'computeRatioAvg': 0.69315, 'computeRatioMax': 1.09861, + 'writeMsAvg': 203, + 'writeMsMax': 340, 'writeRatioAvg': 3.32193, 'writeRatioMax': 2.30258, 'recordsRead': '100', 'recordsWritten': '1', 'status': 'STATUS', + 'shuffleOutputBytes': 1024, + 'shuffleOutputBytesSpilled': 1, 'steps': [{ 'kind': 'KIND', 'substeps': ['SUBSTEP1', 'SUBSTEP2'], @@ -2322,21 +2338,52 @@ def test_query_plan(self): self.assertIsInstance(found, QueryPlanEntry) self.assertEqual(found.name, expected['name']) self.assertEqual(found.entry_id, expected['id']) + self.assertEqual( + len(found.input_stages), + len(expected['inputStages'])) + for f_id in found.input_stages: + self.assertIn(f_id, expected['inputStages']) + self.assertEqual( + found.start.strftime(_RFC3339_MICROS), + '2018-04-01T00:00:00.000000Z') + self.assertEqual( + found.end.strftime(_RFC3339_MICROS), + '2018-04-01T00:00:04.000000Z') + self.assertEqual( + found.parallel_inputs, + expected['parallelInputs']) + self.assertEqual( + found.completed_parallel_inputs, + expected['completedParallelInputs']) + self.assertEqual(found.wait_ms_avg, expected['waitMsAvg']) + self.assertEqual(found.wait_ms_max, expected['waitMsMax']) self.assertEqual(found.wait_ratio_avg, expected['waitRatioAvg']) self.assertEqual(found.wait_ratio_max, expected['waitRatioMax']) + self.assertEqual(found.read_ms_avg, expected['readMsAvg']) + self.assertEqual(found.read_ms_max, expected['readMsMax']) self.assertEqual(found.read_ratio_avg, expected['readRatioAvg']) self.assertEqual(found.read_ratio_max, expected['readRatioMax']) + self.assertEqual(found.compute_ms_avg, expected['computeMsAvg']) + self.assertEqual(found.compute_ms_max, expected['computeMsMax']) self.assertEqual( found.compute_ratio_avg, expected['computeRatioAvg']) self.assertEqual( found.compute_ratio_max, expected['computeRatioMax']) + self.assertEqual(found.write_ms_avg, expected['writeMsAvg']) + self.assertEqual(found.write_ms_max, expected['writeMsMax']) self.assertEqual(found.write_ratio_avg, expected['writeRatioAvg']) self.assertEqual(found.write_ratio_max, expected['writeRatioMax']) self.assertEqual( - found.records_read, int(expected['recordsRead'])) + found.records_read, expected['recordsRead']) self.assertEqual( - found.records_written, int(expected['recordsWritten'])) + found.records_written, expected['recordsWritten']) self.assertEqual(found.status, expected['status']) + self.assertEqual( + found.shuffle_output_bytes, + expected['shuffleOutputBytes']) + self.assertEqual( + found.shuffle_output_bytes_spilled, + expected['shuffleOutputBytesSpilled']) self.assertEqual(len(found.steps), len(expected['steps'])) for f_step, e_step in zip(found.steps, expected['steps']): @@ -3256,17 +3303,35 @@ def test___eq___wrong_type(self): class TestQueryPlanEntry(unittest.TestCase, _Base): NAME = 'NAME' ENTRY_ID = 1234 + START_MS = 1522540800000 + END_MS = 1522540804000 + INPUT_STAGES = (88, 101) + PARALLEL_INPUTS = 1000 + COMPLETED_PARALLEL_INPUTS = 5 + WAIT_MS_AVG = 33 + WAIT_MS_MAX = 400 WAIT_RATIO_AVG = 2.71828 WAIT_RATIO_MAX = 3.14159 + READ_MS_AVG = 45 + READ_MS_MAX = 90 READ_RATIO_AVG = 1.41421 READ_RATIO_MAX = 1.73205 + COMPUTE_MS_AVG = 55 + COMPUTE_MS_MAX = 99 COMPUTE_RATIO_AVG = 0.69315 COMPUTE_RATIO_MAX = 1.09861 + WRITE_MS_AVG = 203 + WRITE_MS_MAX = 340 WRITE_RATIO_AVG = 3.32193 WRITE_RATIO_MAX = 2.30258 RECORDS_READ = 100 RECORDS_WRITTEN = 1 STATUS = 'STATUS' + SHUFFLE_OUTPUT_BYTES = 1024 + SHUFFLE_OUTPUT_BYTES_SPILLED = 1 + + START_RFC3339_MICROS = '2018-04-01T00:00:00.000000Z' + END_RFC3339_MICROS = '2018-04-01T00:00:04.000000Z' @staticmethod def _get_target_class(): @@ -3274,46 +3339,6 @@ def _get_target_class(): return QueryPlanEntry - def _make_one(self, *args, **kw): - return self._get_target_class()(*args, **kw) - - def test_ctor(self): - from google.cloud.bigquery.job import QueryPlanEntryStep - - steps = [QueryPlanEntryStep( - kind=TestQueryPlanEntryStep.KIND, - substeps=TestQueryPlanEntryStep.SUBSTEPS)] - entry = self._make_one( - name=self.NAME, - entry_id=self.ENTRY_ID, - wait_ratio_avg=self.WAIT_RATIO_AVG, - wait_ratio_max=self.WAIT_RATIO_MAX, - read_ratio_avg=self.READ_RATIO_AVG, - read_ratio_max=self.READ_RATIO_MAX, - compute_ratio_avg=self.COMPUTE_RATIO_AVG, - compute_ratio_max=self.COMPUTE_RATIO_MAX, - write_ratio_avg=self.WRITE_RATIO_AVG, - write_ratio_max=self.WRITE_RATIO_MAX, - records_read=self.RECORDS_READ, - records_written=self.RECORDS_WRITTEN, - status=self.STATUS, - steps=steps, - ) - self.assertEqual(entry.name, self.NAME) - self.assertEqual(entry.entry_id, self.ENTRY_ID) - self.assertEqual(entry.wait_ratio_avg, self.WAIT_RATIO_AVG) - self.assertEqual(entry.wait_ratio_max, self.WAIT_RATIO_MAX) - self.assertEqual(entry.read_ratio_avg, self.READ_RATIO_AVG) - self.assertEqual(entry.read_ratio_max, self.READ_RATIO_MAX) - self.assertEqual(entry.compute_ratio_avg, self.COMPUTE_RATIO_AVG) - self.assertEqual(entry.compute_ratio_max, self.COMPUTE_RATIO_MAX) - self.assertEqual(entry.write_ratio_avg, self.WRITE_RATIO_AVG) - self.assertEqual(entry.write_ratio_max, self.WRITE_RATIO_MAX) - self.assertEqual(entry.records_read, self.RECORDS_READ) - self.assertEqual(entry.records_written, self.RECORDS_WRITTEN) - self.assertEqual(entry.status, self.STATUS) - self.assertEqual(entry.steps, steps) - def test_from_api_repr_empty(self): klass = self._get_target_class() @@ -3321,17 +3346,32 @@ def test_from_api_repr_empty(self): self.assertIsNone(entry.name) self.assertIsNone(entry.entry_id) + self.assertEqual(entry.input_stages, []) + self.assertIsNone(entry.start) + self.assertIsNone(entry.end) + self.assertIsNone(entry.parallel_inputs) + self.assertIsNone(entry.completed_parallel_inputs) + self.assertIsNone(entry.wait_ms_avg) + self.assertIsNone(entry.wait_ms_max) self.assertIsNone(entry.wait_ratio_avg) self.assertIsNone(entry.wait_ratio_max) + self.assertIsNone(entry.read_ms_avg) + self.assertIsNone(entry.read_ms_max) self.assertIsNone(entry.read_ratio_avg) self.assertIsNone(entry.read_ratio_max) + self.assertIsNone(entry.compute_ms_avg) + self.assertIsNone(entry.compute_ms_max) self.assertIsNone(entry.compute_ratio_avg) self.assertIsNone(entry.compute_ratio_max) + self.assertIsNone(entry.write_ms_avg) + self.assertIsNone(entry.write_ms_max) self.assertIsNone(entry.write_ratio_avg) self.assertIsNone(entry.write_ratio_max) self.assertIsNone(entry.records_read) self.assertIsNone(entry.records_written) self.assertIsNone(entry.status) + self.assertIsNone(entry.shuffle_output_bytes) + self.assertIsNone(entry.shuffle_output_bytes_spilled) self.assertEqual(entry.steps, []) def test_from_api_repr_normal(self): @@ -3343,17 +3383,30 @@ def test_from_api_repr_normal(self): resource = { 'name': self.NAME, 'id': self.ENTRY_ID, + 'inputStages': self.INPUT_STAGES, + 'startMs': self.START_MS, + 'endMs': self.END_MS, + 'waitMsAvg': self.WAIT_MS_AVG, + 'waitMsMax': self.WAIT_MS_MAX, 'waitRatioAvg': self.WAIT_RATIO_AVG, 'waitRatioMax': self.WAIT_RATIO_MAX, + 'readMsAvg': self.READ_MS_AVG, + 'readMsMax': self.READ_MS_MAX, 'readRatioAvg': self.READ_RATIO_AVG, 'readRatioMax': self.READ_RATIO_MAX, + 'computeMsAvg': self.COMPUTE_MS_AVG, + 'computeMsMax': self.COMPUTE_MS_MAX, 'computeRatioAvg': self.COMPUTE_RATIO_AVG, 'computeRatioMax': self.COMPUTE_RATIO_MAX, + 'writeMsAvg': self.WRITE_MS_AVG, + 'writeMsMax': self.WRITE_MS_MAX, 'writeRatioAvg': self.WRITE_RATIO_AVG, 'writeRatioMax': self.WRITE_RATIO_MAX, - 'recordsRead': str(self.RECORDS_READ), - 'recordsWritten': str(self.RECORDS_WRITTEN), + 'recordsRead': self.RECORDS_READ, + 'recordsWritten': self.RECORDS_WRITTEN, 'status': self.STATUS, + 'shuffleOutputBytes': self.SHUFFLE_OUTPUT_BYTES, + 'shuffleOutputBytesSpilled': self.SHUFFLE_OUTPUT_BYTES_SPILLED, 'steps': [{ 'kind': TestQueryPlanEntryStep.KIND, 'substeps': TestQueryPlanEntryStep.SUBSTEPS, @@ -3376,3 +3429,29 @@ def test_from_api_repr_normal(self): self.assertEqual(entry.records_written, self.RECORDS_WRITTEN) self.assertEqual(entry.status, self.STATUS) self.assertEqual(entry.steps, steps) + + def test_start(self): + klass = self._get_target_class() + + entry = klass.from_api_repr({}) + self.assertEqual( + entry.start, + None) + + entry._properties['startMs'] = self.START_MS + self.assertEqual( + entry.start.strftime(_RFC3339_MICROS), + self.START_RFC3339_MICROS) + + def test_end(self): + klass = self._get_target_class() + + entry = klass.from_api_repr({}) + self.assertEqual( + entry.end, + None) + + entry._properties['endMs'] = self.END_MS + self.assertEqual( + entry.end.strftime(_RFC3339_MICROS), + self.END_RFC3339_MICROS) From f99156050c16e8e3a58746eb767e0cb4bec293df Mon Sep 17 00:00:00 2001 From: shollyman Date: Wed, 9 May 2018 15:56:30 -0700 Subject: [PATCH 0441/2016] Add timeline and top-level slot-millis to query statistics. (#5312) * Add timeline and top-level slot-millis to query statistics. * address reviewer comment: add _int_or_none guards * Add a system test that examines query statistics. * Remove ratio evaluation from the query statistics system test. --- .../google/cloud/bigquery/job.py | 110 +++++++++++-- .../google-cloud-bigquery/tests/system.py | 76 +++++++++ .../tests/unit/test_job.py | 148 ++++++++++++++---- 3 files changed, 286 insertions(+), 48 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/job.py b/packages/google-cloud-bigquery/google/cloud/bigquery/job.py index e9c3bfa783ec..186beab31ba8 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/job.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/job.py @@ -2200,6 +2200,14 @@ def query_plan(self): plan_entries = self._job_statistics().get('queryPlan', ()) return [QueryPlanEntry.from_api_repr(entry) for entry in plan_entries] + @property + def timeline(self): + """List(TimelineEntry): Return the query execution timeline + from job statistics. + """ + raw = self._job_statistics().get('timeline', ()) + return [TimelineEntry.from_api_repr(entry) for entry in raw] + @property def total_bytes_processed(self): """Return total bytes processed from job statistics, if present. @@ -2274,6 +2282,11 @@ def num_dml_affected_rows(self): result = int(result) return result + @property + def slot_millis(self): + """Union[int, None]: Slot-milliseconds used by this query job.""" + return _int_or_none(self._job_statistics().get('totalSlotMs')) + @property def statement_type(self): """Return statement type from job statistics, if present. @@ -2518,7 +2531,7 @@ def start(self): if self._properties.get('startMs') is None: return None return _datetime_from_microseconds( - self._properties.get('startMs') * 1000.0) + int(self._properties.get('startMs')) * 1000.0) @property def end(self): @@ -2526,38 +2539,41 @@ def end(self): if self._properties.get('endMs') is None: return None return _datetime_from_microseconds( - self._properties.get('endMs') * 1000.0) + int(self._properties.get('endMs')) * 1000.0) @property def input_stages(self): """List(int): Entry IDs for stages that were inputs for this stage.""" - return self._properties.get('inputStages', []) + if self._properties.get('inputStages') is None: + return [] + return [_int_or_none(entry) + for entry in self._properties.get('inputStages')] @property def parallel_inputs(self): """Union[int, None]: Number of parallel input segments within the stage. """ - return self._properties.get('parallelInputs') + return _int_or_none(self._properties.get('parallelInputs')) @property def completed_parallel_inputs(self): """Union[int, None]: Number of parallel input segments completed.""" - return self._properties.get('completedParallelInputs') + return _int_or_none(self._properties.get('completedParallelInputs')) @property def wait_ms_avg(self): """Union[int, None]: Milliseconds the average worker spent waiting to be scheduled. """ - return self._properties.get('waitMsAvg') + return _int_or_none(self._properties.get('waitMsAvg')) @property def wait_ms_max(self): """Union[int, None]: Milliseconds the slowest worker spent waiting to be scheduled. """ - return self._properties.get('waitMsMax') + return _int_or_none(self._properties.get('waitMsMax')) @property def wait_ratio_avg(self): @@ -2580,14 +2596,14 @@ def read_ms_avg(self): """Union[int, None]: Milliseconds the average worker spent reading input. """ - return self._properties.get('readMsAvg') + return _int_or_none(self._properties.get('readMsAvg')) @property def read_ms_max(self): """Union[int, None]: Milliseconds the slowest worker spent reading input. """ - return self._properties.get('readMsMax') + return _int_or_none(self._properties.get('readMsMax')) @property def read_ratio_avg(self): @@ -2610,14 +2626,14 @@ def compute_ms_avg(self): """Union[int, None]: Milliseconds the average worker spent on CPU-bound processing. """ - return self._properties.get('computeMsAvg') + return _int_or_none(self._properties.get('computeMsAvg')) @property def compute_ms_max(self): """Union[int, None]: Milliseconds the slowest worker spent on CPU-bound processing. """ - return self._properties.get('computeMsMax') + return _int_or_none(self._properties.get('computeMsMax')) @property def compute_ratio_avg(self): @@ -2640,14 +2656,14 @@ def write_ms_avg(self): """Union[int, None]: Milliseconds the average worker spent writing output data. """ - return self._properties.get('writeMsAvg') + return _int_or_none(self._properties.get('writeMsAvg')) @property def write_ms_max(self): """Union[int, None]: Milliseconds the slowest worker spent writing output data. """ - return self._properties.get('writeMsMax') + return _int_or_none(self._properties.get('writeMsMax')) @property def write_ratio_avg(self): @@ -2668,12 +2684,12 @@ def write_ratio_max(self): @property def records_read(self): """Union[int, None]: Number of records read by this stage.""" - return self._properties.get('recordsRead') + return _int_or_none(self._properties.get('recordsRead')) @property def records_written(self): """Union[int, None]: Number of records written by this stage.""" - return self._properties.get('recordsWritten') + return _int_or_none(self._properties.get('recordsWritten')) @property def status(self): @@ -2685,14 +2701,14 @@ def shuffle_output_bytes(self): """Union[int, None]: Number of bytes written by this stage to intermediate shuffle. """ - return self._properties.get('shuffleOutputBytes') + return _int_or_none(self._properties.get('shuffleOutputBytes')) @property def shuffle_output_bytes_spilled(self): """Union[int, None]: Number of bytes written by this stage to intermediate shuffle and spilled to disk. """ - return self._properties.get('shuffleOutputBytesSpilled') + return _int_or_none(self._properties.get('shuffleOutputBytesSpilled')) @property def steps(self): @@ -2703,6 +2719,66 @@ def steps(self): for step in self._properties.get('steps', [])] +class TimelineEntry(object): + """TimelineEntry represents progress of a query job at a particular + point in time. + + See + https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs + for the underlying API representation within query statistics. + + """ + + def __init__(self): + self._properties = {} + + @classmethod + def from_api_repr(cls, resource): + """Factory: construct instance from the JSON repr. + + Args: + resource(Dict[str: object]): + QueryTimelineSample representation returned from API + + Returns: + google.cloud.bigquery.TimelineEntry: + Timeline sample parsed from ``resource`` + """ + entry = cls() + entry._properties = resource + return entry + + @property + def elapsed_ms(self): + """Union[int, None]: Milliseconds elapsed since start of query + execution.""" + return _int_or_none(self._properties.get('elapsedMs')) + + @property + def active_units(self): + """Union[int, None]: Current number of input units being processed + by workers, reported as largest value since the last sample.""" + return _int_or_none(self._properties.get('activeUnits')) + + @property + def pending_units(self): + """Union[int, None]: Current number of input units remaining for + query stages active at this sample time.""" + return _int_or_none(self._properties.get('pendingUnits')) + + @property + def completed_units(self): + """Union[int, None]: Current number of input units completed by + this query.""" + return _int_or_none(self._properties.get('completedUnits')) + + @property + def slot_millis(self): + """Union[int, None]: Cumulative slot-milliseconds consumed by + this query.""" + return _int_or_none(self._properties.get('totalSlotMs')) + + class UnknownJob(_AsyncJob): """A job whose type cannot be determined.""" diff --git a/packages/google-cloud-bigquery/tests/system.py b/packages/google-cloud-bigquery/tests/system.py index 177f944ace3e..72ff31a4d133 100644 --- a/packages/google-cloud-bigquery/tests/system.py +++ b/packages/google-cloud-bigquery/tests/system.py @@ -1087,6 +1087,82 @@ def test_query_w_timeout(self): # 1 second is much too short for this query. query_job.result(timeout=1) + def test_query_statistics(self): + """ + A system test to exercise some of the extended query statistics. + + Note: We construct a query that should need at least three stages by + specifying a JOIN query. Exact plan and stats are effectively + non-deterministic, so we're largely interested in confirming values + are present. + """ + + job_config = bigquery.QueryJobConfig() + job_config.use_query_cache = False + + query_job = Config.CLIENT.query( + """ + SELECT + COUNT(1) + FROM + ( + SELECT + year, + wban_number + FROM `bigquery-public-data.samples.gsod` + LIMIT 1000 + ) lside + INNER JOIN + ( + SELECT + year, + state + FROM `bigquery-public-data.samples.natality` + LIMIT 1000 + ) rside + ON + lside.year = rside.year + """, + location='US', + job_config=job_config) + + # run the job to completion + query_job.result() + + # Assert top-level stats + self.assertFalse(query_job.cache_hit) + self.assertIsNotNone(query_job.destination) + self.assertTrue(query_job.done) + self.assertFalse(query_job.dry_run) + self.assertIsNone(query_job.num_dml_affected_rows) + self.assertEqual(query_job.priority, 'INTERACTIVE') + self.assertGreater(query_job.total_bytes_billed, 1) + self.assertGreater(query_job.total_bytes_processed, 1) + self.assertEqual(query_job.statement_type, 'SELECT') + self.assertGreater(query_job.slot_millis, 1) + + # Make assertions on the shape of the query plan. + plan = query_job.query_plan + self.assertGreaterEqual(len(plan), 3) + first_stage = plan[0] + self.assertIsNotNone(first_stage.start) + self.assertIsNotNone(first_stage.end) + self.assertIsNotNone(first_stage.entry_id) + self.assertIsNotNone(first_stage.name) + self.assertGreater(first_stage.parallel_inputs, 0) + self.assertGreater(first_stage.completed_parallel_inputs, 0) + self.assertGreater(first_stage.shuffle_output_bytes, 0) + self.assertEqual(first_stage.status, 'COMPLETE') + + # Query plan is a digraph. Ensure it has inter-stage links, + # but not every stage has inputs. + stages_with_inputs = 0 + for entry in plan: + if len(entry.input_stages) > 0: + stages_with_inputs = stages_with_inputs + 1 + self.assertGreater(stages_with_inputs, 0) + self.assertGreater(len(plan), stages_with_inputs) + def test_dbapi_w_standard_sql_types(self): examples = self._generate_standard_sql_types_examples() for example in examples: diff --git a/packages/google-cloud-bigquery/tests/unit/test_job.py b/packages/google-cloud-bigquery/tests/unit/test_job.py index a531a0976942..d381c31895f3 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_job.py +++ b/packages/google-cloud-bigquery/tests/unit/test_job.py @@ -2289,33 +2289,33 @@ def test_query_plan(self): plan_entries = [{ 'name': 'NAME', - 'id': 1234, - 'inputStages': [88, 101], - 'startMs': 1522540800000, - 'endMs': 1522540804000, - 'parallelInputs': 1000, - 'completedParallelInputs': 5, - 'waitMsAvg': 33, - 'waitMsMax': 400, + 'id': '1234', + 'inputStages': ['88', '101'], + 'startMs': '1522540800000', + 'endMs': '1522540804000', + 'parallelInputs': '1000', + 'completedParallelInputs': '5', + 'waitMsAvg': '33', + 'waitMsMax': '400', 'waitRatioAvg': 2.71828, 'waitRatioMax': 3.14159, - 'readMsAvg': 45, - 'readMsMax': 90, + 'readMsAvg': '45', + 'readMsMax': '90', 'readRatioAvg': 1.41421, 'readRatioMax': 1.73205, - 'computeMsAvg': 55, - 'computeMsMax': 99, + 'computeMsAvg': '55', + 'computeMsMax': '99', 'computeRatioAvg': 0.69315, 'computeRatioMax': 1.09861, - 'writeMsAvg': 203, - 'writeMsMax': 340, + 'writeMsAvg': '203', + 'writeMsMax': '340', 'writeRatioAvg': 3.32193, 'writeRatioMax': 2.30258, 'recordsRead': '100', 'recordsWritten': '1', 'status': 'STATUS', - 'shuffleOutputBytes': 1024, - 'shuffleOutputBytesSpilled': 1, + 'shuffleOutputBytes': '1024', + 'shuffleOutputBytesSpilled': '1', 'steps': [{ 'kind': 'KIND', 'substeps': ['SUBSTEP1', 'SUBSTEP2'], @@ -2342,7 +2342,7 @@ def test_query_plan(self): len(found.input_stages), len(expected['inputStages'])) for f_id in found.input_stages: - self.assertIn(f_id, expected['inputStages']) + self.assertIn(f_id, [int(e) for e in expected['inputStages']]) self.assertEqual( found.start.strftime(_RFC3339_MICROS), '2018-04-01T00:00:00.000000Z') @@ -2351,39 +2351,43 @@ def test_query_plan(self): '2018-04-01T00:00:04.000000Z') self.assertEqual( found.parallel_inputs, - expected['parallelInputs']) + int(expected['parallelInputs'])) self.assertEqual( found.completed_parallel_inputs, - expected['completedParallelInputs']) - self.assertEqual(found.wait_ms_avg, expected['waitMsAvg']) - self.assertEqual(found.wait_ms_max, expected['waitMsMax']) + int(expected['completedParallelInputs'])) + self.assertEqual(found.wait_ms_avg, int(expected['waitMsAvg'])) + self.assertEqual(found.wait_ms_max, int(expected['waitMsMax'])) self.assertEqual(found.wait_ratio_avg, expected['waitRatioAvg']) self.assertEqual(found.wait_ratio_max, expected['waitRatioMax']) - self.assertEqual(found.read_ms_avg, expected['readMsAvg']) - self.assertEqual(found.read_ms_max, expected['readMsMax']) + self.assertEqual(found.read_ms_avg, int(expected['readMsAvg'])) + self.assertEqual(found.read_ms_max, int(expected['readMsMax'])) self.assertEqual(found.read_ratio_avg, expected['readRatioAvg']) self.assertEqual(found.read_ratio_max, expected['readRatioMax']) - self.assertEqual(found.compute_ms_avg, expected['computeMsAvg']) - self.assertEqual(found.compute_ms_max, expected['computeMsMax']) + self.assertEqual( + found.compute_ms_avg, + int(expected['computeMsAvg'])) + self.assertEqual( + found.compute_ms_max, + int(expected['computeMsMax'])) self.assertEqual( found.compute_ratio_avg, expected['computeRatioAvg']) self.assertEqual( found.compute_ratio_max, expected['computeRatioMax']) - self.assertEqual(found.write_ms_avg, expected['writeMsAvg']) - self.assertEqual(found.write_ms_max, expected['writeMsMax']) + self.assertEqual(found.write_ms_avg, int(expected['writeMsAvg'])) + self.assertEqual(found.write_ms_max, int(expected['writeMsMax'])) self.assertEqual(found.write_ratio_avg, expected['writeRatioAvg']) self.assertEqual(found.write_ratio_max, expected['writeRatioMax']) self.assertEqual( - found.records_read, expected['recordsRead']) + found.records_read, int(expected['recordsRead'])) self.assertEqual( - found.records_written, expected['recordsWritten']) + found.records_written, int(expected['recordsWritten'])) self.assertEqual(found.status, expected['status']) self.assertEqual( found.shuffle_output_bytes, - expected['shuffleOutputBytes']) + int(expected['shuffleOutputBytes'])) self.assertEqual( found.shuffle_output_bytes_spilled, - expected['shuffleOutputBytesSpilled']) + int(expected['shuffleOutputBytesSpilled'])) self.assertEqual(len(found.steps), len(expected['steps'])) for f_step, e_step in zip(found.steps, expected['steps']): @@ -2465,6 +2469,21 @@ def test_num_dml_affected_rows(self): query_stats['numDmlAffectedRows'] = str(num_rows) self.assertEqual(job.num_dml_affected_rows, num_rows) + def test_slot_millis(self): + millis = 1234 + client = _make_client(project=self.PROJECT) + job = self._make_one(self.JOB_ID, self.QUERY, client) + self.assertIsNone(job.slot_millis) + + statistics = job._properties['statistics'] = {} + self.assertIsNone(job.slot_millis) + + query_stats = statistics['query'] = {} + self.assertIsNone(job.slot_millis) + + query_stats['totalSlotMs'] = millis + self.assertEqual(job.slot_millis, millis) + def test_statement_type(self): statement_type = 'SELECT' client = _make_client(project=self.PROJECT) @@ -2527,6 +2546,34 @@ def test_referenced_tables(self): self.assertEqual(remote.dataset_id, 'other-dataset') self.assertEqual(remote.project, 'other-project-123') + def test_timeline(self): + timeline_resource = [{ + 'elapsedMs': 1, + 'activeUnits': 22, + 'pendingUnits': 33, + 'completedUnits': 44, + 'totalSlotMs': 101, + }] + + client = _make_client(project=self.PROJECT) + job = self._make_one(self.JOB_ID, self.QUERY, client) + self.assertEqual(job.timeline, []) + + statistics = job._properties['statistics'] = {} + self.assertEqual(job.timeline, []) + + query_stats = statistics['query'] = {} + self.assertEqual(job.timeline, []) + + query_stats['timeline'] = timeline_resource + + self.assertEqual(len(job.timeline), len(timeline_resource)) + self.assertEqual(job.timeline[0].elapsed_ms, 1) + self.assertEqual(job.timeline[0].active_units, 22) + self.assertEqual(job.timeline[0].pending_units, 33) + self.assertEqual(job.timeline[0].completed_units, 44) + self.assertEqual(job.timeline[0].slot_millis, 101) + def test_undeclared_query_parameters(self): from google.cloud.bigquery.query import ArrayQueryParameter from google.cloud.bigquery.query import ScalarQueryParameter @@ -3455,3 +3502,42 @@ def test_end(self): self.assertEqual( entry.end.strftime(_RFC3339_MICROS), self.END_RFC3339_MICROS) + + +class TestTimelineEntry(unittest.TestCase, _Base): + ELAPSED_MS = 101 + ACTIVE_UNITS = 50 + PENDING_UNITS = 98 + COMPLETED_UNITS = 520 + SLOT_MILLIS = 12029 + + @staticmethod + def _get_target_class(): + from google.cloud.bigquery.job import TimelineEntry + return TimelineEntry + + def test_from_api_repr_empty(self): + klass = self._get_target_class() + entry = klass.from_api_repr({}) + self.assertIsNone(entry.elapsed_ms) + self.assertIsNone(entry.active_units) + self.assertIsNone(entry.pending_units) + self.assertIsNone(entry.completed_units) + self.assertIsNone(entry.slot_millis) + + def test_from_api_repr_normal(self): + resource = { + 'elapsedMs': self.ELAPSED_MS, + 'activeUnits': self.ACTIVE_UNITS, + 'pendingUnits': self.PENDING_UNITS, + 'completedUnits': self.COMPLETED_UNITS, + 'totalSlotMs': self.SLOT_MILLIS, + } + klass = self._get_target_class() + + entry = klass.from_api_repr(resource) + self.assertEqual(entry.elapsed_ms, self.ELAPSED_MS) + self.assertEqual(entry.active_units, self.ACTIVE_UNITS) + self.assertEqual(entry.pending_units, self.PENDING_UNITS) + self.assertEqual(entry.completed_units, self.COMPLETED_UNITS) + self.assertEqual(entry.slot_millis, self.SLOT_MILLIS) From e4cabf0c002fac5db16bd9239afcc05028d3fc29 Mon Sep 17 00:00:00 2001 From: shollyman Date: Thu, 10 May 2018 13:40:56 -0700 Subject: [PATCH 0442/2016] BigQuery: improve system test performance (#5319) * improve system test perf * Correct comment --- .../google-cloud-bigquery/tests/system.py | 120 +++++++++--------- 1 file changed, 57 insertions(+), 63 deletions(-) diff --git a/packages/google-cloud-bigquery/tests/system.py b/packages/google-cloud-bigquery/tests/system.py index 72ff31a4d133..7d79541c2729 100644 --- a/packages/google-cloud-bigquery/tests/system.py +++ b/packages/google-cloud-bigquery/tests/system.py @@ -385,58 +385,54 @@ def _fetch_single_page(table, selected_fields=None): page = six.next(iterator.pages) return list(page) - def _create_table_many_columns(self, rows): - # Load a table with many columns - dataset = self.temp_dataset(_make_dataset_id('list_rows')) + def _create_table_many_columns(self, rowcount): + # Generate a table of maximum width via CREATE TABLE AS SELECT. + # first column is named 'rowval', and has a value from 1..rowcount + # Subsequent column is named col_ and contains the value N*rowval, + # where N is between 1 and 9999 inclusive. + dsname = _make_dataset_id('wide_schema') + dataset = self.temp_dataset(dsname) table_id = 'many_columns' table_ref = dataset.table(table_id) self.to_delete.insert(0, table_ref) - schema = [ - bigquery.SchemaField( - 'column_{}_with_long_name'.format(col_i), - 'INTEGER') - for col_i in range(len(rows[0]))] - body = '' - for row in rows: - body += ','.join([str(item) for item in row]) - body += '\n' - config = bigquery.LoadJobConfig() - config.schema = schema - job = Config.CLIENT.load_table_from_file( - six.BytesIO(body.encode('ascii')), table_ref, job_config=config) - job.result() - return bigquery.Table(table_ref, schema=schema) - - def test_list_rows_many_columns(self): - rows = [[], []] - # BigQuery tables can have max 10,000 columns - for col_i in range(9999): - rows[0].append(col_i) - rows[1].append(10000 - col_i) - expected_rows = frozenset([tuple(row) for row in rows]) - table = self._create_table_many_columns(rows) - - rows = list(Config.CLIENT.list_rows(table)) - - assert len(rows) == 2 - rows_set = frozenset([tuple(row.values()) for row in rows]) - assert rows_set == expected_rows + colprojections = ','.join( + ['r * {} as col_{}'.format(n, n) for n in range(1, 10000)]) + sql = """ + CREATE TABLE {}.{} + AS + SELECT + r as rowval, + {} + FROM + UNNEST(GENERATE_ARRAY(1,{},1)) as r + """.format(dsname, table_id, colprojections, rowcount) + query_job = Config.CLIENT.query(sql) + query_job.result() + return table_ref def test_query_many_columns(self): - rows = [[], []] - # BigQuery tables can have max 10,000 columns - for col_i in range(9999): - rows[0].append(col_i) - rows[1].append(10000 - col_i) - expected_rows = frozenset([tuple(row) for row in rows]) - table = self._create_table_many_columns(rows) - + # Test working with the widest schema BigQuery supports, 10k columns. + row_count = 2 + table_ref = self._create_table_many_columns(row_count) rows = list(Config.CLIENT.query( - 'SELECT * FROM `{}.many_columns`'.format(table.dataset_id))) - - assert len(rows) == 2 - rows_set = frozenset([tuple(row.values()) for row in rows]) - assert rows_set == expected_rows + 'SELECT * FROM `{}.{}`'.format( + table_ref.dataset_id, table_ref.table_id))) + + self.assertEqual(len(rows), row_count) + + # check field representations adhere to expected values. + correctwidth = 0 + badvals = 0 + for r in rows: + vals = r._xxx_values + rowval = vals[0] + if len(vals) == 10000: + correctwidth = correctwidth + 1 + for n in range(1, 10000): + if vals[n] != rowval * (n): + badvals = badvals + 1 + self.assertEqual(correctwidth, row_count) + self.assertEqual(badvals, 0) def test_insert_rows_then_dump_table(self): NOW_SECONDS = 1448911495.484366 @@ -692,20 +688,25 @@ def test_load_table_from_file_w_explicit_location(self): self.assertEqual( list(sorted(rows)), [('a', 3), ('b', 2), ('c', 1)]) - # Can query from EU. - query_string = 'SELECT MAX(value) FROM `{}.letters`'.format( + # Verify location behavior with queries + query_config = bigquery.QueryJobConfig() + query_config.dry_run = True + + query_string = 'SELECT * FROM `{}.letters` LIMIT 1'.format( dataset.dataset_id) - max_value = list(client.query(query_string, location='EU'))[0][0] - self.assertEqual(max_value, 3) + + eu_query = client.query( + query_string, + location='EU', + job_config=query_config) + self.assertTrue(eu_query.done) # Cannot query from US. with self.assertRaises(BadRequest): - list(client.query(query_string, location='US')) - - # Can copy from EU. - copy_job = client.copy_table( - table_ref, dataset.table('letters2'), location='EU') - copy_job.result() + list(client.query( + query_string, + location='US', + job_config=query_config)) # Cannot copy from US. with self.assertRaises(BadRequest): @@ -713,13 +714,6 @@ def test_load_table_from_file_w_explicit_location(self): table_ref, dataset.table('letters2_us'), location='US').result() - # Can extract from EU. - extract_job = client.extract_table( - table_ref, - 'gs://{}/letters.csv'.format(bucket_name), - location='EU') - extract_job.result() - # Cannot extract from US. with self.assertRaises(BadRequest): client.extract_table( From 74b573dfc6df502399f9771023e5d47e947dab1f Mon Sep 17 00:00:00 2001 From: Christopher Wilcox Date: Wed, 16 May 2018 10:14:30 -0700 Subject: [PATCH 0443/2016] Modify system tests to use prerelease versions of grpcio (#5304) --- packages/google-cloud-bigquery/nox.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/packages/google-cloud-bigquery/nox.py b/packages/google-cloud-bigquery/nox.py index bcf20c077f53..a286b8651462 100644 --- a/packages/google-cloud-bigquery/nox.py +++ b/packages/google-cloud-bigquery/nox.py @@ -93,6 +93,9 @@ def system(session, py): # Set the virtualenv dirname. session.virtualenv_dirname = 'sys-' + py + # Use pre-release gRPC for system tests. + session.install('--pre', 'grpcio') + # Install all test dependencies, then install this package into the # virtualenv's dist-packages. session.install('mock', 'pytest', *LOCAL_DEPS) From 768f822f8daaff98be88477da1ef0f7276b3c0ed Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Thu, 17 May 2018 12:41:33 -0700 Subject: [PATCH 0444/2016] BigQuery: fix typo in Client docstrings (#5342) Was missing a comma between types in get_job and cancel_job functions. --- .../google-cloud-bigquery/google/cloud/bigquery/client.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py index a74a2c90c20c..5ac988984617 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py @@ -603,7 +603,7 @@ def get_job( Returns: Union[google.cloud.bigquery.job.LoadJob, \ google.cloud.bigquery.job.CopyJob, \ - google.cloud.bigquery.job.ExtractJob \ + google.cloud.bigquery.job.ExtractJob, \ google.cloud.bigquery.job.QueryJob]: Job instance, based on the resource returned by the API. """ @@ -642,7 +642,7 @@ def cancel_job( Returns: Union[google.cloud.bigquery.job.LoadJob, \ google.cloud.bigquery.job.CopyJob, \ - google.cloud.bigquery.job.ExtractJob \ + google.cloud.bigquery.job.ExtractJob, \ google.cloud.bigquery.job.QueryJob]: Job instance, based on the resource returned by the API. """ From 70c93be0dbacab4c694102a88202c12663299d54 Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Thu, 17 May 2018 12:42:59 -0700 Subject: [PATCH 0445/2016] BigQuery: NUMERIC type support (#5331) * Support for BigQuery's NUMERIC type, which is currently in alpha. (#4874) * Support for BigQuery's NUMERIC type, which is currently in alpha. * Remove unused import from test_query.py. * Fix newly-added system tests; all system and unit tests pass now. Add unit tests to reach what should be 100% coverage for new code. * Fix lint warning and rename shadowed unit test. * Add unit test that NUMERIC types are encoded correctly in insert_rows * Convert numeric unit test to new mocked connection style. * use var for numeric type in SQL system test --- .../google/cloud/bigquery/_helpers.py | 16 +++++++ .../google/cloud/bigquery/dbapi/_helpers.py | 3 ++ .../google/cloud/bigquery/dbapi/types.py | 2 +- .../google/cloud/bigquery/query.py | 17 ++++--- .../google/cloud/bigquery/schema.py | 7 +-- .../tests/data/characters.json | 2 + .../tests/data/characters.jsonl | 6 +-- .../tests/data/schema.json | 5 ++ .../google-cloud-bigquery/tests/system.py | 17 +++++++ .../tests/unit/test__helpers.py | 42 +++++++++++++++++ .../tests/unit/test_client.py | 47 +++++++++++++++++++ .../tests/unit/test_dbapi__helpers.py | 2 + .../tests/unit/test_query.py | 14 ++++++ 13 files changed, 166 insertions(+), 14 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py b/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py index 1733f8e360b9..f67d9802a6bf 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py @@ -16,6 +16,7 @@ import base64 import datetime +import decimal from google.api_core import retry from google.cloud._helpers import UTC @@ -46,6 +47,12 @@ def _float_from_json(value, field): return float(value) +def _decimal_from_json(value, field): + """Coerce 'value' to a Decimal, if set or not nullable.""" + if _not_null(value, field): + return decimal.Decimal(value) + + def _bool_from_json(value, field): """Coerce 'value' to a bool, if set or not nullable.""" if _not_null(value, field): @@ -160,6 +167,7 @@ def _record_from_json(value, field): 'INT64': _int_from_json, 'FLOAT': _float_from_json, 'FLOAT64': _float_from_json, + 'NUMERIC': _decimal_from_json, 'BOOLEAN': _bool_from_json, 'BOOL': _bool_from_json, 'STRING': _string_from_json, @@ -228,6 +236,13 @@ def _float_to_json(value): return value +def _decimal_to_json(value): + """Coerce 'value' to a JSON-compatible representation.""" + if isinstance(value, decimal.Decimal): + value = str(value) + return value + + def _bool_to_json(value): """Coerce 'value' to an JSON-compatible representation.""" if isinstance(value, bool): @@ -293,6 +308,7 @@ def _time_to_json(value): 'INT64': _int_to_json, 'FLOAT': _float_to_json, 'FLOAT64': _float_to_json, + 'NUMERIC': _decimal_to_json, 'BOOLEAN': _bool_to_json, 'BOOL': _bool_to_json, 'BYTES': _bytes_to_json, diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/dbapi/_helpers.py b/packages/google-cloud-bigquery/google/cloud/bigquery/dbapi/_helpers.py index a5a1add1f3a4..56c6a088672f 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/dbapi/_helpers.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/dbapi/_helpers.py @@ -14,6 +14,7 @@ import collections import datetime +import decimal import numbers import six @@ -46,6 +47,8 @@ def scalar_to_query_parameter(value, name=None): parameter_type = 'INT64' elif isinstance(value, numbers.Real): parameter_type = 'FLOAT64' + elif isinstance(value, decimal.Decimal): + parameter_type = 'NUMERIC' elif isinstance(value, six.text_type): parameter_type = 'STRING' elif isinstance(value, six.binary_type): diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/dbapi/types.py b/packages/google-cloud-bigquery/google/cloud/bigquery/dbapi/types.py index 9636ce68bfc1..feb3e320bcca 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/dbapi/types.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/dbapi/types.py @@ -79,6 +79,6 @@ def __eq__(self, other): STRING = 'STRING' BINARY = _DBAPITypeObject('BYTES', 'RECORD', 'STRUCT') NUMBER = _DBAPITypeObject( - 'INTEGER', 'INT64', 'FLOAT', 'FLOAT64', 'BOOLEAN', 'BOOL') + 'INTEGER', 'INT64', 'FLOAT', 'FLOAT64', 'NUMERIC', 'BOOLEAN', 'BOOL') DATETIME = _DBAPITypeObject('TIMESTAMP', 'DATE', 'TIME', 'DATETIME') ROWID = 'ROWID' diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/query.py b/packages/google-cloud-bigquery/google/cloud/bigquery/query.py index e3bd5c196bec..a3991173f9df 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/query.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/query.py @@ -81,10 +81,11 @@ class ScalarQueryParameter(_AbstractQueryParameter): :type type_: str :param type_: name of parameter type. One of 'STRING', 'INT64', - 'FLOAT64', 'BOOL', 'TIMESTAMP', 'DATETIME', or 'DATE'. + 'FLOAT64', 'NUMERIC', 'BOOL', 'TIMESTAMP', 'DATETIME', or + 'DATE'. - :type value: str, int, float, bool, :class:`datetime.datetime`, or - :class:`datetime.date`. + :type value: str, int, float, :class:`decimal.Decimal`, bool, + :class:`datetime.datetime`, or :class:`datetime.date`. :param value: the scalar parameter value. """ def __init__(self, name, type_, value): @@ -99,9 +100,11 @@ def positional(cls, type_, value): :type type_: str :param type_: name of parameter type. One of 'STRING', 'INT64', - 'FLOAT64', 'BOOL', 'TIMESTAMP', 'DATETIME', or 'DATE'. + 'FLOAT64', 'NUMERIC', 'BOOL', 'TIMESTAMP', 'DATETIME', or + 'DATE'. - :type value: str, int, float, bool, :class:`datetime.datetime`, or + :type value: str, int, float, :class:`decimal.Decimal`, bool, + :class:`datetime.datetime`, or :class:`datetime.date`. :param value: the scalar parameter value. @@ -185,7 +188,7 @@ class ArrayQueryParameter(_AbstractQueryParameter): :type array_type: str :param array_type: name of type of array elements. One of `'STRING'`, `'INT64'`, - `'FLOAT64'`, `'BOOL'`, `'TIMESTAMP'`, or `'DATE'`. + `'FLOAT64'`, `'NUMERIC'`, `'BOOL'`, `'TIMESTAMP'`, or `'DATE'`. :type values: list of appropriate scalar type. :param values: the parameter array values. @@ -202,7 +205,7 @@ def positional(cls, array_type, values): :type array_type: str :param array_type: name of type of array elements. One of `'STRING'`, `'INT64'`, - `'FLOAT64'`, `'BOOL'`, `'TIMESTAMP'`, or `'DATE'`. + `'FLOAT64'`, `'NUMERIC'`, `'BOOL'`, `'TIMESTAMP'`, or `'DATE'`. :type values: list of appropriate scalar type :param values: the parameter array values. diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/schema.py b/packages/google-cloud-bigquery/google/cloud/bigquery/schema.py index 5f566025750c..cc1b4a5ff024 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/schema.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/schema.py @@ -23,7 +23,8 @@ class SchemaField(object): :type field_type: str :param field_type: the type of the field (one of 'STRING', 'INTEGER', - 'FLOAT', 'BOOLEAN', 'TIMESTAMP' or 'RECORD'). + 'FLOAT', 'NUMERIC', 'BOOLEAN', 'TIMESTAMP' or + 'RECORD'). :type mode: str :param mode: the mode of the field (one of 'NULLABLE', 'REQUIRED', @@ -77,8 +78,8 @@ def name(self): def field_type(self): """str: The type of the field. - Will be one of 'STRING', 'INTEGER', 'FLOAT', 'BOOLEAN', - 'TIMESTAMP' or 'RECORD'. + Will be one of 'STRING', 'INTEGER', 'FLOAT', 'NUMERIC', + 'BOOLEAN', 'TIMESTAMP' or 'RECORD'. """ return self._field_type diff --git a/packages/google-cloud-bigquery/tests/data/characters.json b/packages/google-cloud-bigquery/tests/data/characters.json index ac854fb812d4..d38636810196 100644 --- a/packages/google-cloud-bigquery/tests/data/characters.json +++ b/packages/google-cloud-bigquery/tests/data/characters.json @@ -38,6 +38,7 @@ "TeaTime" : "15:00:00", "Weight" : 198.6, "FavoriteTime" : "2001-12-19T23:59:59", + "FavoriteNumber" : "3.141592654", "IsMagic" : true }, { @@ -47,6 +48,7 @@ "IsMagic" : true, "FavoriteTime" : "2000-10-31T23:27:46", "Age" : "17", + "FavoriteNumber" : "13", "Spells" : [ { "LastUsed" : "2017-02-14 12:07:23 UTC", diff --git a/packages/google-cloud-bigquery/tests/data/characters.jsonl b/packages/google-cloud-bigquery/tests/data/characters.jsonl index 1da3f2309cae..42b5bdc6a152 100644 --- a/packages/google-cloud-bigquery/tests/data/characters.jsonl +++ b/packages/google-cloud-bigquery/tests/data/characters.jsonl @@ -1,3 +1,3 @@ -{"Name":"Bilbo","Age":"111","Weight":67.2,"IsMagic":false,"Spells":[],"TeaTime":"10:00:00","NextVacation":"2017-09-22","FavoriteTime":"2031-04-01T05:09:27"} -{"Name":"Gandalf","Age":"1000","Weight":198.6,"IsMagic":true,"Spells":[{"Name": "Skydragon", "Icon":"iVBORw0KGgoAAAANSUhEUgAAAB4AAAAgCAYAAAAFQMh/AAAAAXNSR0IArs4c6QAAA9lJREFUSA21lk9OVEEQxvsRDImoiMG9mLjjCG5mEg7gEfQGsIcF7p0EDsBBSJiNO7ZsFRZqosb/QkSj7fer7ur33sw8GDFUUq+7q6vqq6qu7pkQzqG4EeI521e7FePVgM9cGPYwhCi6UO8qFOK+YY+Br66ujsmmxb84Yzwp6zCsxjJfWVkxnMsEMGuWHZ9Wcz11cM48hkq0vLwc1tbW4mAwqDpcdIqnMmgF0JMv2CiGnZ2dcHR0FA4PD8Pe3t5U/tx6bCSlb+JT8XfxT3HsUek0Li0tRdjWl+z6iRF+FNA1hXPDQ/IMNyRg3s8bD/OaZS+VP+9cOLSa64cA34oXZWagDkRzAaJxXaE+ufc4rCN7LrazZ2+8+STtpAL8WYDvpTaHKlkB2iQARMvb2+H27m4YaL7zaDtUw1BZAASi6T8T2UZnPZV2pvnJfCH5p8bewcGB6TrIfz8wBZgHQ83kjpuj6RBYQpuo09Tvmpd7TPe+ktZN8cKwS92KWXGuaqWowlYEwthtMcWOZUNJc8at+zuF/Xkqo69baS7P+AvWjYwJ4jyHXXsEnd74ZO/Pq+uXUuv6WNlso6cvnDsZB1V/unJab3D1/KrJDw9NCM9wHf2FK2ejTKMejnBHfGtfH7LGGCdQDqaqJgfgzWjXK1nYV4jRbPGnxUT7cqUaZfJrVZeOm9QmB21L6xXgbu/ScsYusJFMoU0x2fsamRJOd6kOYDRLUxv94ENZe8+0gM+0dyz+KgU7X8rLHHCIOZyrna4y6ykIu0YCs02TBXmk3PZssmEgaTxTo83xjCIjoE21h0Yah3MrV4+9kR8MaabGze+9NEILGAFE5nMOiiA32KnAr/sb7tED3nzlzC4dB38WMC+EjaqHfqvUKHi2gJPdWQ6AbH8hgyQ7QY6jvjj3QZWvX6pUAtduTX5Dss96Q7NI9RQRJeeKvRFbt0v2gb1Gx/PooJsztn1c1DqpAU3Hde2dB2aEHBhjgOFjMeDvxLafjQ3YZQSgOcHJZX611H45sGLHWvYTz9hiURlpNoBZvxb/Ft9lAQ1DmBfUiR+j1hAPkMBTE9L9+zLva1QvGFHurRBaZ5xLVitoBviiRkD/sIMDztKA5FA0b9/0OclzO2/XAQymJ0TcghZwEo9/AX8gMeAJMOvIsWWt5bwCoiFhVSllrdH0t5Q1JHAFlKJNkvTVdn2GHb9KdmacMT+d/Os05imJUccRX2YuZ93Sxf0Ilc4DPDeAq5SAvFEAY94cQc6BA26dzb4HWAJI4DPmQE5KCVUyvb2FcDZem7JdT2ggKUP3xX6n9XNq1DpzSf4Cy4ZqSlmM8d8AAAAASUVORK5CYII=","DiscoveredBy":"Firebreather","Properties":[{"Name":"Flying","Power":1},{"Name":"Creature","Power":1},{"Name":"Explodey","Power":11}],"LastUsed":"2015-10-31 23:59:56 UTC"}],"TeaTime":"15:00:00","NextVacation":"2666-06-06","FavoriteTime":"2001-12-19T23:59:59"} -{"Name":"Sabrina","Age":"17","Weight":128.3,"IsMagic":true,"Spells":[{"Name": "Talking cats", "Icon":"iVBORw0KGgoAAAANSUhEUgAAAEAAAABACAYAAACqaXHeAAAAAXNSR0IArs4c6QAAABxpRE9UAAAAAgAAAAAAAAAgAAAAKAAAACAAAAAgAAABxj2CfowAAAGSSURBVHgB7Jc9TsNAEIX3JDkCPUV6KlpKFHEGlD4nyA04ACUXQKTgCEipUnKGNEbP0otentayicZ24SlWs7tjO/N9u/5J2b2+NUtuZcnwYE8BuQPyGZAPwXwLLPk5kG+BJa9+fgfkh1B+CeancL4F8i2Q/wWm/S/w+XFoTseftn0dvhu0OXfhpM+AGvzcEiYVAFisPqE9zrETJhHAlXfg2lglMK9z0f3RBfB+ZyRUV3x+erzsEIjjOBqc1xtNAIrvguybV3A9lkVHxlEE6GrrPb/ZvAySwlUnfCmlPQ+R8JCExvGtcRQBLFwj4FGkznX1VYDKPG/f2/MjwCksXACgdNUxJjwK9xwl4JihOwTFR0kIF+CABEPRnvsvPFctMoYKqAFSAFaMwB4pp3Y+bodIYL9WmIAaIOHxo7W8wiHvAjTvhUeNwwSgeAeAABbqOewC5hBdwFD4+9+7puzXV9fS6/b1wwT4tsaYAhwOOQdUQch5vgZCeAhAv3ZM31yYAAUgvApQQQ6n5w6FB/RVe1jdJOAPAAD//1eMQwoAAAGQSURBVO1UMU4DQQy8X9AgWopIUINEkS4VlJQo4gvwAV7AD3gEH4iSgidESpWSXyyZExP5lr0c7K5PsXBhec/2+jzjuWtent9CLdtu1mG5+gjz+WNr7IsY7eH+tvO+xfuqk4vz7CH91edFaF5v9nb6dBKm13edvrL+0Lk5lMzJkQDeJSkkgHF6mR8CHwMHCQR/NAQQGD0BAlwK4FCefQiefq+A2Vn29tG7igLAfmwcnJu/nJy3BMQkMN9HEPr8AL3bfBv7Bp+7/SoExMDjZwKEJwmyhnnmQIQEBIlz2x0iKoAvJkAC6TsTIH6MqRrEWUMSZF2zAwqT4Eu/e6pzFAIkmNSZ4OFT+VYBIIF//UqbJwnF/4DU0GwOn8r/JQYCpPGufEfJuZiA37ycQw/5uFeqPq4pfR6FADmkBCXjfWdZj3NfXW58dAJyB9W65wRoMWulryvAyqa05nQFaDFrpa8rwMqmtOZ0BWgxa6WvK8DKprTmdAVoMWulryvAyqa05nQFaDFrpa8rwMqmtOb89wr4AtQ4aPoL6yVpAAAAAElFTkSuQmCC","DiscoveredBy":"Salem","Properties":[{"Name":"Makes you look crazy","Power":1}],"LastUsed":"2017-02-14 12:07:23 UTC"}],"TeaTime":"12:00:00","NextVacation":"2017-03-14","FavoriteTime":"2000-10-31T23:27:46"} +{"Name":"Bilbo","Age":"111","Weight":67.2,"IsMagic":false,"Spells":[],"TeaTime":"10:00:00","NextVacation":"2017-09-22","FavoriteTime":"2031-04-01T05:09:27","FavoriteNumber":"111"} +{"Name":"Gandalf","Age":"1000","Weight":198.6,"IsMagic":true,"Spells":[{"Name": "Skydragon", "Icon":"iVBORw0KGgoAAAANSUhEUgAAAB4AAAAgCAYAAAAFQMh/AAAAAXNSR0IArs4c6QAAA9lJREFUSA21lk9OVEEQxvsRDImoiMG9mLjjCG5mEg7gEfQGsIcF7p0EDsBBSJiNO7ZsFRZqosb/QkSj7fer7ur33sw8GDFUUq+7q6vqq6qu7pkQzqG4EeI521e7FePVgM9cGPYwhCi6UO8qFOK+YY+Br66ujsmmxb84Yzwp6zCsxjJfWVkxnMsEMGuWHZ9Wcz11cM48hkq0vLwc1tbW4mAwqDpcdIqnMmgF0JMv2CiGnZ2dcHR0FA4PD8Pe3t5U/tx6bCSlb+JT8XfxT3HsUek0Li0tRdjWl+z6iRF+FNA1hXPDQ/IMNyRg3s8bD/OaZS+VP+9cOLSa64cA34oXZWagDkRzAaJxXaE+ufc4rCN7LrazZ2+8+STtpAL8WYDvpTaHKlkB2iQARMvb2+H27m4YaL7zaDtUw1BZAASi6T8T2UZnPZV2pvnJfCH5p8bewcGB6TrIfz8wBZgHQ83kjpuj6RBYQpuo09Tvmpd7TPe+ktZN8cKwS92KWXGuaqWowlYEwthtMcWOZUNJc8at+zuF/Xkqo69baS7P+AvWjYwJ4jyHXXsEnd74ZO/Pq+uXUuv6WNlso6cvnDsZB1V/unJab3D1/KrJDw9NCM9wHf2FK2ejTKMejnBHfGtfH7LGGCdQDqaqJgfgzWjXK1nYV4jRbPGnxUT7cqUaZfJrVZeOm9QmB21L6xXgbu/ScsYusJFMoU0x2fsamRJOd6kOYDRLUxv94ENZe8+0gM+0dyz+KgU7X8rLHHCIOZyrna4y6ykIu0YCs02TBXmk3PZssmEgaTxTo83xjCIjoE21h0Yah3MrV4+9kR8MaabGze+9NEILGAFE5nMOiiA32KnAr/sb7tED3nzlzC4dB38WMC+EjaqHfqvUKHi2gJPdWQ6AbH8hgyQ7QY6jvjj3QZWvX6pUAtduTX5Dss96Q7NI9RQRJeeKvRFbt0v2gb1Gx/PooJsztn1c1DqpAU3Hde2dB2aEHBhjgOFjMeDvxLafjQ3YZQSgOcHJZX611H45sGLHWvYTz9hiURlpNoBZvxb/Ft9lAQ1DmBfUiR+j1hAPkMBTE9L9+zLva1QvGFHurRBaZ5xLVitoBviiRkD/sIMDztKA5FA0b9/0OclzO2/XAQymJ0TcghZwEo9/AX8gMeAJMOvIsWWt5bwCoiFhVSllrdH0t5Q1JHAFlKJNkvTVdn2GHb9KdmacMT+d/Os05imJUccRX2YuZ93Sxf0Ilc4DPDeAq5SAvFEAY94cQc6BA26dzb4HWAJI4DPmQE5KCVUyvb2FcDZem7JdT2ggKUP3xX6n9XNq1DpzSf4Cy4ZqSlmM8d8AAAAASUVORK5CYII=","DiscoveredBy":"Firebreather","Properties":[{"Name":"Flying","Power":1},{"Name":"Creature","Power":1},{"Name":"Explodey","Power":11}],"LastUsed":"2015-10-31 23:59:56 UTC"}],"TeaTime":"15:00:00","NextVacation":"2666-06-06","FavoriteTime":"2001-12-19T23:59:59","FavoriteNumber":"1.618033989"} +{"Name":"Sabrina","Age":"17","Weight":128.3,"IsMagic":true,"Spells":[{"Name": "Talking cats", "Icon":"iVBORw0KGgoAAAANSUhEUgAAAEAAAABACAYAAACqaXHeAAAAAXNSR0IArs4c6QAAABxpRE9UAAAAAgAAAAAAAAAgAAAAKAAAACAAAAAgAAABxj2CfowAAAGSSURBVHgB7Jc9TsNAEIX3JDkCPUV6KlpKFHEGlD4nyA04ACUXQKTgCEipUnKGNEbP0otentayicZ24SlWs7tjO/N9u/5J2b2+NUtuZcnwYE8BuQPyGZAPwXwLLPk5kG+BJa9+fgfkh1B+CeancL4F8i2Q/wWm/S/w+XFoTseftn0dvhu0OXfhpM+AGvzcEiYVAFisPqE9zrETJhHAlXfg2lglMK9z0f3RBfB+ZyRUV3x+erzsEIjjOBqc1xtNAIrvguybV3A9lkVHxlEE6GrrPb/ZvAySwlUnfCmlPQ+R8JCExvGtcRQBLFwj4FGkznX1VYDKPG/f2/MjwCksXACgdNUxJjwK9xwl4JihOwTFR0kIF+CABEPRnvsvPFctMoYKqAFSAFaMwB4pp3Y+bodIYL9WmIAaIOHxo7W8wiHvAjTvhUeNwwSgeAeAABbqOewC5hBdwFD4+9+7puzXV9fS6/b1wwT4tsaYAhwOOQdUQch5vgZCeAhAv3ZM31yYAAUgvApQQQ6n5w6FB/RVe1jdJOAPAAD//1eMQwoAAAGQSURBVO1UMU4DQQy8X9AgWopIUINEkS4VlJQo4gvwAV7AD3gEH4iSgidESpWSXyyZExP5lr0c7K5PsXBhec/2+jzjuWtent9CLdtu1mG5+gjz+WNr7IsY7eH+tvO+xfuqk4vz7CH91edFaF5v9nb6dBKm13edvrL+0Lk5lMzJkQDeJSkkgHF6mR8CHwMHCQR/NAQQGD0BAlwK4FCefQiefq+A2Vn29tG7igLAfmwcnJu/nJy3BMQkMN9HEPr8AL3bfBv7Bp+7/SoExMDjZwKEJwmyhnnmQIQEBIlz2x0iKoAvJkAC6TsTIH6MqRrEWUMSZF2zAwqT4Eu/e6pzFAIkmNSZ4OFT+VYBIIF//UqbJwnF/4DU0GwOn8r/JQYCpPGufEfJuZiA37ycQw/5uFeqPq4pfR6FADmkBCXjfWdZj3NfXW58dAJyB9W65wRoMWulryvAyqa05nQFaDFrpa8rwMqmtOZ0BWgxa6WvK8DKprTmdAVoMWulryvAyqa05nQFaDFrpa8rwMqmtOb89wr4AtQ4aPoL6yVpAAAAAElFTkSuQmCC","DiscoveredBy":"Salem","Properties":[{"Name":"Makes you look crazy","Power":1}],"LastUsed":"2017-02-14 12:07:23 UTC"}],"TeaTime":"12:00:00","NextVacation":"2017-03-14","FavoriteTime":"2000-10-31T23:27:46","FavoriteNumber":"13"} diff --git a/packages/google-cloud-bigquery/tests/data/schema.json b/packages/google-cloud-bigquery/tests/data/schema.json index 303076123dd9..6a36e55e579a 100644 --- a/packages/google-cloud-bigquery/tests/data/schema.json +++ b/packages/google-cloud-bigquery/tests/data/schema.json @@ -78,6 +78,11 @@ "mode" : "NULLABLE", "name" : "FavoriteTime", "type" : "DATETIME" + }, + { + "mode" : "NULLABLE", + "name" : "FavoriteNumber", + "type" : "NUMERIC" } ] } diff --git a/packages/google-cloud-bigquery/tests/system.py b/packages/google-cloud-bigquery/tests/system.py index 7d79541c2729..4ab6c8b8c9b4 100644 --- a/packages/google-cloud-bigquery/tests/system.py +++ b/packages/google-cloud-bigquery/tests/system.py @@ -16,6 +16,7 @@ import concurrent.futures import csv import datetime +import decimal import json import operator import os @@ -959,6 +960,7 @@ def _generate_standard_sql_types_examples(self): stamp_microseconds = stamp + '.250000' zoned = naive.replace(tzinfo=UTC) zoned_microseconds = naive_microseconds.replace(tzinfo=UTC) + numeric = decimal.Decimal('123456789.123456789') return [ { 'sql': 'SELECT 1', @@ -1005,6 +1007,10 @@ def _generate_standard_sql_types_examples(self): 'sql': 'SELECT TIME(TIMESTAMP "%s")' % (stamp,), 'expected': naive.time(), }, + { + 'sql': 'SELECT NUMERIC "%s"' % (numeric,), + 'expected': numeric, + }, { 'sql': 'SELECT (1, 2)', 'expected': {'_field_1': 1, '_field_2': 2}, @@ -1257,6 +1263,10 @@ def test_query_w_query_params(self): pi = 3.1415926 pi_param = ScalarQueryParameter( name='pi', type_='FLOAT64', value=pi) + pi_numeric = decimal.Decimal('3.141592654') + pi_numeric_param = ScalarQueryParameter( + name='pi_numeric_param', type_='NUMERIC', + value=pi_numeric) truthy = True truthy_param = ScalarQueryParameter( name='truthy', type_='BOOL', value=truthy) @@ -1332,6 +1342,11 @@ def test_query_w_query_params(self): 'expected': pi, 'query_parameters': [pi_param], }, + { + 'sql': 'SELECT @pi_numeric_param', + 'expected': pi_numeric, + 'query_parameters': [pi_numeric_param], + }, { 'sql': 'SELECT @truthy', 'expected': truthy, @@ -1771,6 +1786,8 @@ def test_create_table_rows_fetch_nested_schema(self): '%Y-%m-%dT%H:%M:%S') e_favtime = datetime.datetime(*parts[0:6]) self.assertEqual(found[7], e_favtime) + self.assertEqual(found[8], + decimal.Decimal(expected['FavoriteNumber'])) def _fetch_dataframe(self, query): return Config.CLIENT.query(query).result().to_dataframe() diff --git a/packages/google-cloud-bigquery/tests/unit/test__helpers.py b/packages/google-cloud-bigquery/tests/unit/test__helpers.py index 0e407551f395..b4899094dc93 100644 --- a/packages/google-cloud-bigquery/tests/unit/test__helpers.py +++ b/packages/google-cloud-bigquery/tests/unit/test__helpers.py @@ -14,6 +14,7 @@ import base64 import datetime +import decimal import unittest @@ -80,6 +81,30 @@ def test_w_float_value(self): self.assertEqual(coerced, 3.1415) +class Test_decimal_from_json(unittest.TestCase): + + def _call_fut(self, value, field): + from google.cloud.bigquery._helpers import _decimal_from_json + + return _decimal_from_json(value, field) + + def test_w_none_nullable(self): + self.assertIsNone(self._call_fut(None, _Field('NULLABLE'))) + + def test_w_none_required(self): + with self.assertRaises(TypeError): + self._call_fut(None, _Field('REQUIRED')) + + def test_w_string_value(self): + coerced = self._call_fut('3.1415', object()) + self.assertEqual(coerced, decimal.Decimal('3.1415')) + + def test_w_float_value(self): + coerced = self._call_fut(3.1415, object()) + # There is no exact float representation of 3.1415. + self.assertEqual(coerced, decimal.Decimal(3.1415)) + + class Test_bool_from_json(unittest.TestCase): def _call_fut(self, value, field): @@ -585,6 +610,23 @@ def test_w_float(self): self.assertEqual(self._call_fut(1.23), 1.23) +class Test_decimal_to_json(unittest.TestCase): + + def _call_fut(self, value): + from google.cloud.bigquery._helpers import _decimal_to_json + + return _decimal_to_json(value) + + def test_w_float(self): + self.assertEqual(self._call_fut(1.23), 1.23) + + def test_w_string(self): + self.assertEqual(self._call_fut('1.23'), '1.23') + + def test_w_decimal(self): + self.assertEqual(self._call_fut(decimal.Decimal('1.23')), '1.23') + + class Test_bool_to_json(unittest.TestCase): def _call_fut(self, value): diff --git a/packages/google-cloud-bigquery/tests/unit/test_client.py b/packages/google-cloud-bigquery/tests/unit/test_client.py index 89f738a3ccb7..427f403e6be5 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_client.py +++ b/packages/google-cloud-bigquery/tests/unit/test_client.py @@ -13,6 +13,7 @@ # limitations under the License. import copy +import decimal import email import io import json @@ -2825,6 +2826,52 @@ def test_insert_rows_errors(self): with self.assertRaises(TypeError): client.insert_rows(1, ROWS) + def test_insert_rows_w_numeric(self): + from google.cloud.bigquery import table + + project = 'PROJECT' + ds_id = 'DS_ID' + table_id = 'TABLE_ID' + creds = _make_credentials() + http = object() + client = self._make_one(project=project, credentials=creds, _http=http) + conn = client._connection = _make_connection({}) + table_ref = DatasetReference(project, ds_id).table(table_id) + schema = [ + table.SchemaField('account', 'STRING'), + table.SchemaField('balance', 'NUMERIC'), + ] + insert_table = table.Table(table_ref, schema=schema) + rows = [ + ('Savings', decimal.Decimal('23.47')), + ('Checking', decimal.Decimal('1.98')), + ('Mortgage', decimal.Decimal('-12345678909.87654321')), + ] + + with mock.patch('uuid.uuid4', side_effect=map(str, range(len(rows)))): + errors = client.insert_rows(insert_table, rows) + + self.assertEqual(len(errors), 0) + rows_json = [ + {'account': 'Savings', 'balance': '23.47'}, + {'account': 'Checking', 'balance': '1.98'}, + { + 'account': 'Mortgage', + 'balance': '-12345678909.87654321', + }, + ] + sent = { + 'rows': [{ + 'json': row, + 'insertId': str(i), + } for i, row in enumerate(rows_json)], + } + conn.api_request.assert_called_once_with( + method='POST', + path='/projects/{}/datasets/{}/tables/{}/insertAll'.format( + project, ds_id, table_id), + data=sent) + def test_insert_rows_json(self): from google.cloud.bigquery.table import Table, SchemaField from google.cloud.bigquery.dataset import DatasetReference diff --git a/packages/google-cloud-bigquery/tests/unit/test_dbapi__helpers.py b/packages/google-cloud-bigquery/tests/unit/test_dbapi__helpers.py index 78c5ea1ca18a..f0430f06a1e5 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_dbapi__helpers.py +++ b/packages/google-cloud-bigquery/tests/unit/test_dbapi__helpers.py @@ -13,6 +13,7 @@ # limitations under the License. import datetime +import decimal import math import unittest @@ -30,6 +31,7 @@ def test_scalar_to_query_parameter(self): (123, 'INT64'), (-123456789, 'INT64'), (1.25, 'FLOAT64'), + (decimal.Decimal('1.25'), 'NUMERIC'), (b'I am some bytes', 'BYTES'), (u'I am a string', 'STRING'), (datetime.date(2017, 4, 1), 'DATE'), diff --git a/packages/google-cloud-bigquery/tests/unit/test_query.py b/packages/google-cloud-bigquery/tests/unit/test_query.py index bce6d2cd726a..c262132f8e0c 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_query.py +++ b/packages/google-cloud-bigquery/tests/unit/test_query.py @@ -172,6 +172,20 @@ def test_to_api_repr_w_float(self): param = klass.positional(type_='FLOAT64', value=12.345) self.assertEqual(param.to_api_repr(), EXPECTED) + def test_to_api_repr_w_numeric(self): + EXPECTED = { + 'parameterType': { + 'type': 'NUMERIC', + }, + 'parameterValue': { + 'value': '123456789.123456789', + }, + } + klass = self._get_target_class() + param = klass.positional(type_='NUMERIC', + value='123456789.123456789') + self.assertEqual(param.to_api_repr(), EXPECTED) + def test_to_api_repr_w_bool(self): EXPECTED = { 'parameterType': { From 8918b4e95eb54f7f0306e18f5d576edfaf7cb458 Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Tue, 22 May 2018 13:02:49 -0700 Subject: [PATCH 0446/2016] BigQuery: use autosummary to split up API reference docs (#5340) * BigQuery: use autosummary to split up API reference docs * Move generated docs stubs to generated directory. * Rename Job enums heading. Comment changes from upstream autosummary. * Remove duplicate documentation of RowIterator.pages. --- packages/google-cloud-bigquery/google/cloud/bigquery/table.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py index 40eb7a02f85a..9d786703d02f 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py @@ -1084,8 +1084,6 @@ class RowIterator(HTTPIterator): page_size (int, optional): The number of items to return per page. extra_params (Dict[str, object]): Extra query string parameters for the API call. - - .. autoattribute:: pages """ def __init__(self, client, api_request, path, schema, page_token=None, From c2a6b7240b4106716cfa6a7c0d633132d88d8276 Mon Sep 17 00:00:00 2001 From: Tres Seaver Date: Tue, 22 May 2018 16:05:16 -0400 Subject: [PATCH 0447/2016] Prune systests identified as reduntant to snippets. (#5365) See @tswast's analysis: https://github.com/GoogleCloudPlatform/google-cloud-python/issues/5003#issuecomment-385049005 Toward #5003. --- .../google-cloud-bigquery/tests/system.py | 98 ------------------- 1 file changed, 98 deletions(-) diff --git a/packages/google-cloud-bigquery/tests/system.py b/packages/google-cloud-bigquery/tests/system.py index 4ab6c8b8c9b4..03465ff8d594 100644 --- a/packages/google-cloud-bigquery/tests/system.py +++ b/packages/google-cloud-bigquery/tests/system.py @@ -475,47 +475,6 @@ def test_insert_rows_then_dump_table(self): self.assertEqual(sorted(row_tuples, key=by_age), sorted(ROWS, key=by_age)) - def test_load_table_from_local_file_then_dump_table(self): - from google.cloud._testing import _NamedTemporaryFile - from google.cloud.bigquery.job import CreateDisposition - from google.cloud.bigquery.job import SourceFormat - from google.cloud.bigquery.job import WriteDisposition - - TABLE_NAME = 'test_table' - - dataset = self.temp_dataset(_make_dataset_id('load_local_then_dump')) - table_ref = dataset.table(TABLE_NAME) - table_arg = Table(table_ref, schema=SCHEMA) - table = retry_403(Config.CLIENT.create_table)(table_arg) - self.to_delete.insert(0, table) - - with _NamedTemporaryFile() as temp: - with open(temp.name, 'w') as csv_write: - writer = csv.writer(csv_write) - writer.writerow(HEADER_ROW) - writer.writerows(ROWS) - - with open(temp.name, 'rb') as csv_read: - config = bigquery.LoadJobConfig() - config.source_format = SourceFormat.CSV - config.skip_leading_rows = 1 - config.create_disposition = CreateDisposition.CREATE_NEVER - config.write_disposition = WriteDisposition.WRITE_EMPTY - config.schema = table.schema - job = Config.CLIENT.load_table_from_file( - csv_read, table_ref, job_config=config) - - # Retry until done. - job.result(timeout=JOB_TIMEOUT) - - self.assertEqual(job.output_rows, len(ROWS)) - - rows = self._fetch_single_page(table) - row_tuples = [r.values() for r in rows] - by_age = operator.itemgetter(1) - self.assertEqual(sorted(row_tuples, key=by_age), - sorted(ROWS, key=by_age)) - def test_load_table_from_local_avro_file_then_dump_table(self): from google.cloud.bigquery.job import SourceFormat from google.cloud.bigquery.job import WriteDisposition @@ -817,37 +776,6 @@ def test_extract_table(self): got = destination.download_as_string().decode('utf-8') self.assertIn('Bharney Rhubble', got) - def test_extract_table_w_job_config(self): - from google.cloud.storage import Client as StorageClient - from google.cloud.bigquery.job import DestinationFormat - - storage_client = StorageClient() - local_id = unique_resource_id() - bucket_name = 'bq_extract_test' + local_id - blob_name = 'person_ages.csv' - dataset_id = _make_dataset_id('load_gcs_then_extract') - table_id = 'test_table' - table_ref = Config.CLIENT.dataset(dataset_id).table(table_id) - table = Table(table_ref) - self.to_delete.insert(0, table) - self._load_table_for_extract_table( - storage_client, ROWS, bucket_name, blob_name, table_ref) - bucket = storage_client.bucket(bucket_name) - destination_blob_name = 'person_ages_out.csv' - destination = bucket.blob(destination_blob_name) - destination_uri = 'gs://{}/person_ages_out.csv'.format(bucket_name) - - config = bigquery.ExtractJobConfig() - config.destination_format = DestinationFormat.NEWLINE_DELIMITED_JSON - job = Config.CLIENT.extract_table( - table, destination_uri, job_config=config) - job.result() - - self.to_delete.insert(0, destination) - got = destination.download_as_string().decode('utf-8') - self.assertIn('"Bharney Rhubble"', got) - self.assertEqual(job.destination_uri_file_counts, [1]) - def test_copy_table(self): # If we create a new table to copy from, the test won't work # because the new rows will be stored in the streaming buffer, @@ -1538,32 +1466,6 @@ def test_dbapi_w_query_parameters(self): row = Config.CURSOR.fetchone() self.assertIsNone(row, msg=msg) - def test_dump_table_w_public_data(self): - PUBLIC = 'bigquery-public-data' - DATASET_ID = 'samples' - TABLE_NAME = 'natality' - - table_ref = DatasetReference(PUBLIC, DATASET_ID).table(TABLE_NAME) - table = Config.CLIENT.get_table(table_ref) - self._fetch_single_page(table) - - def test_dump_table_w_public_data_selected_fields(self): - PUBLIC = 'bigquery-public-data' - DATASET_ID = 'samples' - TABLE_NAME = 'natality' - selected_fields = [ - bigquery.SchemaField('year', 'INTEGER', mode='NULLABLE'), - bigquery.SchemaField('month', 'INTEGER', mode='NULLABLE'), - bigquery.SchemaField('day', 'INTEGER', mode='NULLABLE'), - ] - table_ref = DatasetReference(PUBLIC, DATASET_ID).table(TABLE_NAME) - - rows = self._fetch_single_page( - table_ref, selected_fields=selected_fields) - - self.assertGreater(len(rows), 0) - self.assertEqual(len(rows[0]), 3) - def test_large_query_w_public_data(self): PUBLIC = 'bigquery-public-data' DATASET_ID = 'samples' From c419cf3af329b1e6905a309df71459d2a7e4dc4d Mon Sep 17 00:00:00 2001 From: shollyman Date: Thu, 24 May 2018 10:36:07 -0700 Subject: [PATCH 0448/2016] BigQuery: add ddl-related query stats. (#5382) * BigQuery: add ddl-related query stats. * Change docstrings from Union->Optional, add integration assertions. * assertTrue -> assertEqual --- .../google/cloud/bigquery/job.py | 23 +++++++++++ .../google-cloud-bigquery/tests/system.py | 4 ++ .../tests/unit/test_job.py | 39 +++++++++++++++++++ 3 files changed, 66 insertions(+) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/job.py b/packages/google-cloud-bigquery/google/cloud/bigquery/job.py index 186beab31ba8..393dcdf5dad7 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/job.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/job.py @@ -2266,6 +2266,29 @@ def cache_hit(self): """ return self._job_statistics().get('cacheHit') + @property + def ddl_operation_performed(self): + """Optional[str]: Return the DDL operation performed. + + See: + https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#statistics.query.ddlOperationPerformed + + """ + return self._job_statistics().get('ddlOperationPerformed') + + @property + def ddl_target_table(self): + """Optional[TableReference]: Return the DDL target table, present + for CREATE/DROP TABLE/VIEW queries. + + See: + https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#statistics.query.ddlTargetTable + """ + prop = self._job_statistics().get('ddlTargetTable') + if prop is not None: + prop = TableReference.from_api_repr(prop) + return prop + @property def num_dml_affected_rows(self): """Return the number of DML rows affected by the job. diff --git a/packages/google-cloud-bigquery/tests/system.py b/packages/google-cloud-bigquery/tests/system.py index 03465ff8d594..0b4c861eff39 100644 --- a/packages/google-cloud-bigquery/tests/system.py +++ b/packages/google-cloud-bigquery/tests/system.py @@ -409,6 +409,10 @@ def _create_table_many_columns(self, rowcount): """.format(dsname, table_id, colprojections, rowcount) query_job = Config.CLIENT.query(sql) query_job.result() + self.assertEqual(query_job.statement_type, 'CREATE_TABLE_AS_SELECT') + self.assertEqual(query_job.ddl_operation_performed, 'CREATE') + self.assertEqual(query_job.ddl_target_table, table_ref) + return table_ref def test_query_many_columns(self): diff --git a/packages/google-cloud-bigquery/tests/unit/test_job.py b/packages/google-cloud-bigquery/tests/unit/test_job.py index d381c31895f3..a76d05f4a077 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_job.py +++ b/packages/google-cloud-bigquery/tests/unit/test_job.py @@ -2454,6 +2454,45 @@ def test_cache_hit(self): query_stats['cacheHit'] = True self.assertTrue(job.cache_hit) + def test_ddl_operation_performed(self): + op = 'SKIP' + client = _make_client(project=self.PROJECT) + job = self._make_one(self.JOB_ID, self.QUERY, client) + self.assertIsNone(job.ddl_operation_performed) + + statistics = job._properties['statistics'] = {} + self.assertIsNone(job.ddl_operation_performed) + + query_stats = statistics['query'] = {} + self.assertIsNone(job.ddl_operation_performed) + + query_stats['ddlOperationPerformed'] = op + self.assertEqual(job.ddl_operation_performed, op) + + def test_ddl_target_table(self): + from google.cloud.bigquery.table import TableReference + + ref_table = { + 'projectId': self.PROJECT, + 'datasetId': 'ddl_ds', + 'tableId': 'targettable', + } + client = _make_client(project=self.PROJECT) + job = self._make_one(self.JOB_ID, self.QUERY, client) + self.assertIsNone(job.ddl_target_table) + + statistics = job._properties['statistics'] = {} + self.assertIsNone(job.ddl_target_table) + + query_stats = statistics['query'] = {} + self.assertIsNone(job.ddl_target_table) + + query_stats['ddlTargetTable'] = ref_table + self.assertIsInstance(job.ddl_target_table, TableReference) + self.assertEqual(job.ddl_target_table.table_id, 'targettable') + self.assertEqual(job.ddl_target_table.dataset_id, 'ddl_ds') + self.assertEqual(job.ddl_target_table.project, self.PROJECT) + def test_num_dml_affected_rows(self): num_rows = 1234 client = _make_client(project=self.PROJECT) From 85972c8338bff38435beb8b915765b70f6d6f4d2 Mon Sep 17 00:00:00 2001 From: Alix Hamilton Date: Tue, 29 May 2018 11:04:05 -0700 Subject: [PATCH 0449/2016] BigQuery: Adds load_table_from_dataframe() and snippet (#5387) * Adds load_table_from_dataframe() and snippet * Add index to DataFrame in bigquery_load_table_dataframe sample --- .../google/cloud/bigquery/client.py | 61 +++++++++++++++- packages/google-cloud-bigquery/nox.py | 4 +- packages/google-cloud-bigquery/setup.py | 1 + .../tests/unit/test_client.py | 70 +++++++++++++++++++ 4 files changed, 132 insertions(+), 4 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py index 5ac988984617..8e303610d082 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py @@ -773,8 +773,8 @@ def load_table_from_file( job_config=None): """Upload the contents of this table from a file-like object. - Like load_table_from_uri, this creates, starts and returns - a ``LoadJob``. + Similar to :meth:`load_table_from_uri`, this method creates, starts and + returns a :class:`~google.cloud.bigquery.job.LoadJob`. Arguments: file_obj (file): A file handle opened in binary mode for reading. @@ -833,6 +833,63 @@ def load_table_from_file( raise exceptions.from_http_response(exc.response) return self.job_from_resource(response.json()) + def load_table_from_dataframe(self, dataframe, destination, + num_retries=_DEFAULT_NUM_RETRIES, + job_id=None, job_id_prefix=None, + location=None, project=None, + job_config=None): + """Upload the contents of a table from a pandas DataFrame. + + Similar to :meth:`load_table_from_uri`, this method creates, starts and + returns a :class:`~google.cloud.bigquery.job.LoadJob`. + + Arguments: + dataframe (pandas.DataFrame): + A :class:`~pandas.DataFrame` containing the data to load. + destination (google.cloud.bigquery.table.TableReference): + The destination table to use for loading the data. If it is an + existing table, the schema of the :class:`~pandas.DataFrame` + must match the schema of the destination table. If the table + does not yet exist, the schema is inferred from the + :class:`~pandas.DataFrame`. + + Keyword Arguments: + num_retries (int, optional): Number of upload retries. + job_id (str, optional): Name of the job. + job_id_prefix (str, optional): + The user-provided prefix for a randomly generated + job ID. This parameter will be ignored if a ``job_id`` is + also given. + location (str): + Location where to run the job. Must match the location of the + destination table. + project (str, optional): + Project ID of the project of where to run the job. Defaults + to the client's project. + job_config (google.cloud.bigquery.job.LoadJobConfig, optional): + Extra configuration options for the job. + + Returns: + google.cloud.bigquery.job.LoadJob: A new load job. + + Raises: + ImportError: + If a usable parquet engine cannot be found. This method + requires one of :mod:`pyarrow` or :mod:`fastparquet` to be + installed. + """ + buffer = six.BytesIO() + dataframe.to_parquet(buffer) + + if job_config is None: + job_config = job.LoadJobConfig() + job_config.source_format = job.SourceFormat.PARQUET + + return self.load_table_from_file( + buffer, destination, num_retries=num_retries, rewind=True, + job_id=job_id, job_id_prefix=job_id_prefix, location=location, + project=project, job_config=job_config) + def _do_resumable_upload(self, stream, metadata, num_retries): """Perform a resumable upload. diff --git a/packages/google-cloud-bigquery/nox.py b/packages/google-cloud-bigquery/nox.py index a286b8651462..f0ddeadd8ac7 100644 --- a/packages/google-cloud-bigquery/nox.py +++ b/packages/google-cloud-bigquery/nox.py @@ -41,7 +41,7 @@ def default(session): if session.interpreter == 'python3.4': session.install('-e', '.') else: - session.install('-e', '.[pandas]') + session.install('-e', '.[pandas, pyarrow]') # IPython does not support Python 2 after version 5.x if session.interpreter == 'python2.7': @@ -142,7 +142,7 @@ def snippets(session, py): os.path.join('..', 'storage'), os.path.join('..', 'test_utils'), ) - session.install('-e', '.[pandas]') + session.install('-e', '.[pandas, pyarrow]') # Run py.test against the system tests. session.run( diff --git a/packages/google-cloud-bigquery/setup.py b/packages/google-cloud-bigquery/setup.py index 28c44ab72bcd..75348ce203e3 100644 --- a/packages/google-cloud-bigquery/setup.py +++ b/packages/google-cloud-bigquery/setup.py @@ -35,6 +35,7 @@ ] extras = { 'pandas': 'pandas>=0.17.1', + 'pyarrow': 'pyarrow>=0.4.1', } diff --git a/packages/google-cloud-bigquery/tests/unit/test_client.py b/packages/google-cloud-bigquery/tests/unit/test_client.py index 427f403e6be5..785f98b8e8bf 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_client.py +++ b/packages/google-cloud-bigquery/tests/unit/test_client.py @@ -23,6 +23,14 @@ import six from six.moves import http_client import pytest +try: + import pandas +except (ImportError, AttributeError): # pragma: NO COVER + pandas = None +try: + import pyarrow +except (ImportError, AttributeError): # pragma: NO COVER + pyarrow = None from google.cloud.bigquery.dataset import DatasetReference @@ -3484,6 +3492,68 @@ def test_load_table_from_file_bad_mode(self): with pytest.raises(ValueError): client.load_table_from_file(file_obj, self.TABLE_REF) + @unittest.skipIf(pandas is None, 'Requires `pandas`') + @unittest.skipIf(pyarrow is None, 'Requires `pyarrow`') + def test_load_table_from_dataframe(self): + from google.cloud.bigquery.client import _DEFAULT_NUM_RETRIES + from google.cloud.bigquery import job + + client = self._make_client() + records = [ + {'name': 'Monty', 'age': 100}, + {'name': 'Python', 'age': 60}, + ] + dataframe = pandas.DataFrame(records) + + load_patch = mock.patch( + 'google.cloud.bigquery.client.Client.load_table_from_file', + autospec=True) + with load_patch as load_table_from_file: + client.load_table_from_dataframe(dataframe, self.TABLE_REF) + + load_table_from_file.assert_called_once_with( + client, mock.ANY, self.TABLE_REF, num_retries=_DEFAULT_NUM_RETRIES, + rewind=True, job_id=None, job_id_prefix=None, location=None, + project=None, job_config=mock.ANY) + + sent_file = load_table_from_file.mock_calls[0][1][1] + sent_bytes = sent_file.getvalue() + assert isinstance(sent_bytes, bytes) + assert len(sent_bytes) > 0 + + sent_config = load_table_from_file.mock_calls[0][2]['job_config'] + assert sent_config.source_format == job.SourceFormat.PARQUET + + @unittest.skipIf(pandas is None, 'Requires `pandas`') + @unittest.skipIf(pyarrow is None, 'Requires `pyarrow`') + def test_load_table_from_dataframe_w_custom_job_config(self): + from google.cloud.bigquery.client import _DEFAULT_NUM_RETRIES + from google.cloud.bigquery import job + + client = self._make_client() + records = [ + {'name': 'Monty', 'age': 100}, + {'name': 'Python', 'age': 60}, + ] + dataframe = pandas.DataFrame(records) + job_config = job.LoadJobConfig() + + load_patch = mock.patch( + 'google.cloud.bigquery.client.Client.load_table_from_file', + autospec=True) + with load_patch as load_table_from_file: + client.load_table_from_dataframe( + dataframe, self.TABLE_REF, job_config=job_config) + + load_table_from_file.assert_called_once_with( + client, mock.ANY, self.TABLE_REF, num_retries=_DEFAULT_NUM_RETRIES, + rewind=True, job_id=None, job_id_prefix=None, location=None, + project=None, job_config=mock.ANY) + + sent_config = load_table_from_file.mock_calls[0][2]['job_config'] + assert sent_config is job_config + assert sent_config.source_format == job.SourceFormat.PARQUET + # Low-level tests @classmethod From 6062a8edad04c8e4009ea014d65fe0661227ffa6 Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Tue, 29 May 2018 11:15:35 -0700 Subject: [PATCH 0450/2016] BigQuery: fix IPython system test for new pytest (#5400) * BigQuery: fix IPython system test for new pytest Pytest changes the way that marks and fixtures work in pytest 3.6.0 that breaks when using unittest-style tests. Use pytest-style test for the IPython system test so that the fixture gets used. * flake8 fix --- .../google-cloud-bigquery/tests/system.py | 61 ++++++++++--------- 1 file changed, 31 insertions(+), 30 deletions(-) diff --git a/packages/google-cloud-bigquery/tests/system.py b/packages/google-cloud-bigquery/tests/system.py index 0b4c861eff39..2b0afb5fdb0f 100644 --- a/packages/google-cloud-bigquery/tests/system.py +++ b/packages/google-cloud-bigquery/tests/system.py @@ -1797,36 +1797,37 @@ def temp_dataset(self, dataset_id, location=None): self.to_delete.append(dataset) return dataset - @pytest.mark.skipif(pandas is None, reason='Requires `pandas`') - @pytest.mark.skipif(IPython is None, reason='Requires `ipython`') - @pytest.mark.usefixtures('ipython_interactive') - def test_bigquery_magic(self): - ip = IPython.get_ipython() - ip.extension_manager.load_extension('google.cloud.bigquery') - sql = """ - SELECT - CONCAT( - 'https://stackoverflow.com/questions/', - CAST(id as STRING)) as url, - view_count - FROM `bigquery-public-data.stackoverflow.posts_questions` - WHERE tags like '%google-bigquery%' - ORDER BY view_count DESC - LIMIT 10 - """ - with io.capture_output() as captured: - result = ip.run_cell_magic('bigquery', '', sql) - - lines = re.split('\n|\r', captured.stdout) - # Removes blanks & terminal code (result of display clearing) - updates = list(filter(lambda x: bool(x) and x != '\x1b[2K', lines)) - assert re.match("Executing query with job ID: .*", updates[0]) - assert all(re.match("Query executing: .*s", line) - for line in updates[1:-1]) - assert re.match("Query complete after .*s", updates[-1]) - assert isinstance(result, pandas.DataFrame) - assert len(result) == 10 # verify row count - assert list(result) == ['url', 'view_count'] # verify column names + +@pytest.mark.skipif(pandas is None, reason='Requires `pandas`') +@pytest.mark.skipif(IPython is None, reason='Requires `ipython`') +@pytest.mark.usefixtures('ipython_interactive') +def test_bigquery_magic(): + ip = IPython.get_ipython() + ip.extension_manager.load_extension('google.cloud.bigquery') + sql = """ + SELECT + CONCAT( + 'https://stackoverflow.com/questions/', + CAST(id as STRING)) as url, + view_count + FROM `bigquery-public-data.stackoverflow.posts_questions` + WHERE tags like '%google-bigquery%' + ORDER BY view_count DESC + LIMIT 10 + """ + with io.capture_output() as captured: + result = ip.run_cell_magic('bigquery', '', sql) + + lines = re.split('\n|\r', captured.stdout) + # Removes blanks & terminal code (result of display clearing) + updates = list(filter(lambda x: bool(x) and x != '\x1b[2K', lines)) + assert re.match("Executing query with job ID: .*", updates[0]) + assert all(re.match("Query executing: .*s", line) + for line in updates[1:-1]) + assert re.match("Query complete after .*s", updates[-1]) + assert isinstance(result, pandas.DataFrame) + assert len(result) == 10 # verify row count + assert list(result) == ['url', 'view_count'] # verify column names def _job_done(instance): From 613191158fd5b99f052739777d41a24352497a53 Mon Sep 17 00:00:00 2001 From: Alix Hamilton Date: Tue, 29 May 2018 12:52:17 -0700 Subject: [PATCH 0451/2016] BigQuery: Adds Python 3.7 and removes Python 3.4 (#5401) * BigQuery: Adds Python 3.7 and removes Python 3.4 --- packages/google-cloud-bigquery/nox.py | 6 +++--- packages/google-cloud-bigquery/setup.py | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/packages/google-cloud-bigquery/nox.py b/packages/google-cloud-bigquery/nox.py index f0ddeadd8ac7..9f9eccd4db0a 100644 --- a/packages/google-cloud-bigquery/nox.py +++ b/packages/google-cloud-bigquery/nox.py @@ -37,8 +37,8 @@ def default(session): # Install all test dependencies, then install this package in-place. session.install('mock', 'pytest', 'pytest-cov', *LOCAL_DEPS) - # Pandas does not support Python 3.4 - if session.interpreter == 'python3.4': + # Pandas does not support Python 3.7 + if session.interpreter == 'python3.7': session.install('-e', '.') else: session.install('-e', '.[pandas, pyarrow]') @@ -65,7 +65,7 @@ def default(session): @nox.session -@nox.parametrize('py', ['2.7', '3.5', '3.6']) +@nox.parametrize('py', ['2.7', '3.5', '3.6', '3.7']) def unit(session, py): """Run the unit test suite.""" diff --git a/packages/google-cloud-bigquery/setup.py b/packages/google-cloud-bigquery/setup.py index 75348ce203e3..c473dbcc2333 100644 --- a/packages/google-cloud-bigquery/setup.py +++ b/packages/google-cloud-bigquery/setup.py @@ -76,9 +76,9 @@ 'Programming Language :: Python :: 2', 'Programming Language :: Python :: 2.7', 'Programming Language :: Python :: 3', - 'Programming Language :: Python :: 3.4', 'Programming Language :: Python :: 3.5', 'Programming Language :: Python :: 3.6', + 'Programming Language :: Python :: 3.7', 'Operating System :: OS Independent', 'Topic :: Internet', ], From 96fc5a334d3f6d0768c93bd794548568f1eb7ccc Mon Sep 17 00:00:00 2001 From: Alix Hamilton Date: Thu, 31 May 2018 10:39:24 -0700 Subject: [PATCH 0452/2016] BigQuery: Adds schema update options and snippets (#5415) --- .../google/cloud/bigquery/__init__.py | 2 + .../google/cloud/bigquery/job.py | 50 +++++++++++++++++++ .../tests/unit/test_job.py | 34 ++++++++++++- 3 files changed, 84 insertions(+), 2 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/__init__.py b/packages/google-cloud-bigquery/google/cloud/bigquery/__init__.py index 503c83bfb85c..751efd5a671b 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/__init__.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/__init__.py @@ -49,6 +49,7 @@ from google.cloud.bigquery.job import QueryJob from google.cloud.bigquery.job import QueryJobConfig from google.cloud.bigquery.job import QueryPriority +from google.cloud.bigquery.job import SchemaUpdateOption from google.cloud.bigquery.job import SourceFormat from google.cloud.bigquery.job import UnknownJob from google.cloud.bigquery.job import WriteDisposition @@ -113,6 +114,7 @@ 'DestinationFormat', 'Encoding', 'QueryPriority', + 'SchemaUpdateOption', 'SourceFormat', 'WriteDisposition' ] diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/job.py b/packages/google-cloud-bigquery/google/cloud/bigquery/job.py index 393dcdf5dad7..5026445f0287 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/job.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/job.py @@ -206,6 +206,18 @@ class WriteDisposition(object): returned in the job result.""" +class SchemaUpdateOption(object): + """Specifies an update to the destination table schema as a side effect of + a load job. + """ + + ALLOW_FIELD_ADDITION = 'ALLOW_FIELD_ADDITION' + """Allow adding a nullable field to the schema.""" + + ALLOW_FIELD_RELAXATION = 'ALLOW_FIELD_RELAXATION' + """Allow relaxing a required field in the original schema to nullable.""" + + class _JobReference(object): """A reference to a job. @@ -1004,6 +1016,18 @@ def time_partitioning(self, value): api_repr = value.to_api_repr() self._set_sub_prop('timePartitioning', api_repr) + @property + def schema_update_options(self): + """List[google.cloud.bigquery.job.SchemaUpdateOption]: Specifies + updates to the destination table schema to allow as a side effect of + the load job. + """ + return self._get_sub_prop('schemaUpdateOptions') + + @schema_update_options.setter + def schema_update_options(self, values): + self._set_sub_prop('schemaUpdateOptions', values) + class LoadJob(_AsyncJob): """Asynchronous job for loading data into a table. @@ -1158,6 +1182,13 @@ def time_partitioning(self): """ return self._configuration.time_partitioning + @property + def schema_update_options(self): + """See + :attr:`google.cloud.bigquery.job.LoadJobConfig.schema_update_options`. + """ + return self._configuration.schema_update_options + @property def input_file_bytes(self): """Count of bytes loaded from source files. @@ -1971,6 +2002,18 @@ def time_partitioning(self, value): api_repr = value.to_api_repr() self._set_sub_prop('timePartitioning', api_repr) + @property + def schema_update_options(self): + """List[google.cloud.bigquery.job.SchemaUpdateOption]: Specifies + updates to the destination table schema to allow as a side effect of + the query job. + """ + return self._get_sub_prop('schemaUpdateOptions') + + @schema_update_options.setter + def schema_update_options(self, values): + self._set_sub_prop('schemaUpdateOptions', values) + def to_api_repr(self): """Build an API representation of the query job config. @@ -2149,6 +2192,13 @@ def time_partitioning(self): """ return self._configuration.time_partitioning + @property + def schema_update_options(self): + """See + :attr:`google.cloud.bigquery.job.QueryJobConfig.schema_update_options`. + """ + return self._configuration.schema_update_options + def _build_resource(self): """Generate a resource for :meth:`begin`.""" configuration = self._configuration.to_api_repr() diff --git a/packages/google-cloud-bigquery/tests/unit/test_job.py b/packages/google-cloud-bigquery/tests/unit/test_job.py index a76d05f4a077..a8d0ed96299f 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_job.py +++ b/packages/google-cloud-bigquery/tests/unit/test_job.py @@ -380,6 +380,11 @@ def _verifyEnumConfigProperties(self, job, config): config['writeDisposition']) else: self.assertIsNone(job.write_disposition) + if 'schemaUpdateOptions' in config: + self.assertEqual( + job.schema_update_options, config['schemaUpdateOptions']) + else: + self.assertIsNone(job.schema_update_options) def _verifyResourceProperties(self, job, resource): self._verifyReadonlyResourceProperties(job, resource) @@ -467,6 +472,7 @@ def test_ctor(self): self.assertIsNone(job.write_disposition) self.assertIsNone(job.destination_encryption_configuration) self.assertIsNone(job.time_partitioning) + self.assertIsNone(job.schema_update_options) def test_ctor_w_config(self): from google.cloud.bigquery.schema import SchemaField @@ -780,6 +786,7 @@ def test_begin_w_autodetect(self): def test_begin_w_alternate_client(self): from google.cloud.bigquery.job import CreateDisposition + from google.cloud.bigquery.job import SchemaUpdateOption from google.cloud.bigquery.job import WriteDisposition from google.cloud.bigquery.schema import SchemaField @@ -817,7 +824,10 @@ def test_begin_w_alternate_client(self): 'mode': 'REQUIRED', 'description': None, }, - ]} + ]}, + 'schemaUpdateOptions': [ + SchemaUpdateOption.ALLOW_FIELD_ADDITION, + ], } RESOURCE['configuration']['load'] = LOAD_CONFIGURATION conn1 = _make_connection() @@ -842,6 +852,9 @@ def test_begin_w_alternate_client(self): config.skip_leading_rows = 1 config.source_format = 'CSV' config.write_disposition = WriteDisposition.WRITE_TRUNCATE + config.schema_update_options = [ + SchemaUpdateOption.ALLOW_FIELD_ADDITION, + ] job._begin(client=client2) @@ -2127,6 +2140,11 @@ def _verifyResourceProperties(self, job, resource): 'kmsKeyName']) else: self.assertIsNone(job.destination_encryption_configuration) + if 'schemaUpdateOptions' in query_config: + self.assertEqual( + job.schema_update_options, query_config['schemaUpdateOptions']) + else: + self.assertIsNone(job.schema_update_options) def test_ctor_defaults(self): client = _make_client(project=self.PROJECT) @@ -2157,6 +2175,7 @@ def test_ctor_defaults(self): self.assertIsNone(job.table_definitions) self.assertIsNone(job.destination_encryption_configuration) self.assertIsNone(job.time_partitioning) + self.assertIsNone(job.schema_update_options) def test_ctor_w_udf_resources(self): from google.cloud.bigquery.job import QueryJobConfig @@ -2248,6 +2267,7 @@ def test_from_api_repr_with_encryption(self): def test_from_api_repr_w_properties(self): from google.cloud.bigquery.job import CreateDisposition + from google.cloud.bigquery.job import SchemaUpdateOption from google.cloud.bigquery.job import WriteDisposition client = _make_client(project=self.PROJECT) @@ -2260,6 +2280,9 @@ def test_from_api_repr_w_properties(self): 'datasetId': self.DS_ID, 'tableId': self.DESTINATION_TABLE, } + query_config['schemaUpdateOptions'] = [ + SchemaUpdateOption.ALLOW_FIELD_ADDITION, + ] klass = self._get_target_class() job = klass.from_api_repr(RESOURCE, client=client) self.assertIs(job._client, client) @@ -2841,6 +2864,7 @@ def test_begin_w_alternate_client(self): from google.cloud.bigquery.job import CreateDisposition from google.cloud.bigquery.job import QueryJobConfig from google.cloud.bigquery.job import QueryPriority + from google.cloud.bigquery.job import SchemaUpdateOption from google.cloud.bigquery.job import WriteDisposition PATH = '/projects/%s/jobs' % (self.PROJECT,) @@ -2866,7 +2890,10 @@ def test_begin_w_alternate_client(self): 'useLegacySql': True, 'writeDisposition': WriteDisposition.WRITE_TRUNCATE, 'maximumBillingTier': 4, - 'maximumBytesBilled': '123456' + 'maximumBytesBilled': '123456', + 'schemaUpdateOptions': [ + SchemaUpdateOption.ALLOW_FIELD_RELAXATION, + ] } RESOURCE['configuration']['query'] = QUERY_CONFIGURATION RESOURCE['configuration']['dryRun'] = True @@ -2890,6 +2917,9 @@ def test_begin_w_alternate_client(self): config.use_query_cache = True config.write_disposition = WriteDisposition.WRITE_TRUNCATE config.maximum_bytes_billed = 123456 + config.schema_update_options = [ + SchemaUpdateOption.ALLOW_FIELD_RELAXATION, + ] job = self._make_one( self.JOB_ID, self.QUERY, client1, job_config=config) From ad5c4fd497b97ffc1dfc79390217f8abd10d14f7 Mon Sep 17 00:00:00 2001 From: Alix Hamilton Date: Fri, 1 Jun 2018 16:13:02 -0700 Subject: [PATCH 0453/2016] BigQuery: Adds time filtering to client.list_jobs() (#5429) --- .../google/cloud/bigquery/client.py | 53 +++++++++++-------- .../tests/unit/test_client.py | 17 ++++++ 2 files changed, 49 insertions(+), 21 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py index 8e303610d082..23f3c40366de 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py @@ -33,6 +33,7 @@ from google.cloud.bigquery._helpers import DEFAULT_RETRY from google.cloud.bigquery._helpers import _SCALAR_VALUE_TO_JSON_ROW +from google.cloud.bigquery._helpers import _str_or_none from google.cloud.bigquery._http import Connection from google.cloud.bigquery.dataset import Dataset from google.cloud.bigquery.dataset import DatasetListItem @@ -662,52 +663,62 @@ def cancel_job( def list_jobs( self, project=None, max_results=None, page_token=None, - all_users=None, state_filter=None, retry=DEFAULT_RETRY): + all_users=None, state_filter=None, retry=DEFAULT_RETRY, + min_creation_time=None, max_creation_time=None): """List jobs for the project associated with this client. See https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs/list Args: - project (str): - Optional. Project ID to use for retreiving datasets. Defaults + project (str, optional): + Project ID to use for retreiving datasets. Defaults to the client's project. - max_results (int): - Optional. Maximum number of jobs to return. - page_token (str): - Optional. Opaque marker for the next "page" of jobs. If not + max_results (int, optional): + Maximum number of jobs to return. + page_token (str, optional): + Opaque marker for the next "page" of jobs. If not passed, the API will return the first page of jobs. The token marks the beginning of the iterator to be returned and the value of the ``page_token`` can be accessed at ``next_page_token`` of :class:`~google.api_core.page_iterator.HTTPIterator`. - all_users (bool): + all_users (bool, optional): If true, include jobs owned by all users in the project. - state_filter (str): - Optional. If set, include only jobs matching the given - state. One of - + state_filter (str, optional): + If set, include only jobs matching the given state. One of: * ``"done"`` * ``"pending"`` * ``"running"`` - retry (google.api_core.retry.Retry): - Optional. How to retry the RPC. + retry (google.api_core.retry.Retry, optional): + How to retry the RPC. + min_creation_time (int, optional): + Min value for job creation time, in milliseconds since the + POSIX epoch. If set, only jobs created after or at this + timestamp are returned. + max_creation_time (int, optional): + Max value for job creation time, in milliseconds since the + POSIX epoch. If set, only jobs created before or at this + timestamp are returned. Returns: google.api_core.page_iterator.Iterator: Iterable of job instances. """ - extra_params = {'projection': 'full'} + extra_params = { + 'allUsers': all_users, + 'stateFilter': state_filter, + 'minCreationTime': _str_or_none(min_creation_time), + 'maxCreationTime': _str_or_none(max_creation_time), + 'projection': 'full' + } + + extra_params = {param: value for param, value in extra_params.items() + if value is not None} if project is None: project = self.project - if all_users is not None: - extra_params['allUsers'] = all_users - - if state_filter is not None: - extra_params['stateFilter'] = state_filter - path = '/projects/%s/jobs' % (project,) return page_iterator.HTTPIterator( client=self, diff --git a/packages/google-cloud-bigquery/tests/unit/test_client.py b/packages/google-cloud-bigquery/tests/unit/test_client.py index 785f98b8e8bf..fe09b5e74556 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_client.py +++ b/packages/google-cloud-bigquery/tests/unit/test_client.py @@ -1753,6 +1753,23 @@ def test_list_jobs_w_project(self): 'projection': 'full', }) + def test_list_jobs_w_time_filter(self): + creds = _make_credentials() + client = self._make_one(self.PROJECT, creds) + conn = client._connection = _make_connection({}) + + list(client.list_jobs( + min_creation_time=1, max_creation_time=1527874895820)) + + conn.api_request.assert_called_once_with( + method='GET', + path='/projects/%s/jobs' % self.PROJECT, + query_params={ + 'projection': 'full', + 'minCreationTime': '1', + 'maxCreationTime': '1527874895820', + }) + def test_load_table_from_uri(self): from google.cloud.bigquery.job import LoadJob From d837f1d828e88e068a27099b0d7f42e392fce0f3 Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Mon, 4 Jun 2018 10:03:22 -0700 Subject: [PATCH 0454/2016] BigQuery: Use datetime for jobs listing filter args. (#5431) datetime.datetime will be easier to work with for filtering things like "all the jobs run last month" or "all the jobs in the past 5 minutes". --- .../google/cloud/bigquery/client.py | 25 +++++++++++-------- .../tests/unit/test_client.py | 11 ++++++-- 2 files changed, 24 insertions(+), 12 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py index 23f3c40366de..8215a629a12b 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py @@ -28,6 +28,7 @@ from google.resumable_media.requests import ResumableUpload from google.api_core import page_iterator +import google.cloud._helpers from google.cloud import exceptions from google.cloud.client import ClientWithProject @@ -692,14 +693,14 @@ def list_jobs( * ``"running"`` retry (google.api_core.retry.Retry, optional): How to retry the RPC. - min_creation_time (int, optional): - Min value for job creation time, in milliseconds since the - POSIX epoch. If set, only jobs created after or at this - timestamp are returned. - max_creation_time (int, optional): - Max value for job creation time, in milliseconds since the - POSIX epoch. If set, only jobs created before or at this - timestamp are returned. + min_creation_time (datetitme.datetime, optional): + Min value for job creation time. If set, only jobs created + after or at this timestamp are returned. If the datetime has + no time zone assumes UTC time. + max_creation_time (datetime.datetime, optional): + Max value for job creation time. If set, only jobs created + before or at this timestamp are returned. If the datetime has + no time zone assumes UTC time. Returns: google.api_core.page_iterator.Iterator: @@ -708,8 +709,12 @@ def list_jobs( extra_params = { 'allUsers': all_users, 'stateFilter': state_filter, - 'minCreationTime': _str_or_none(min_creation_time), - 'maxCreationTime': _str_or_none(max_creation_time), + 'minCreationTime': _str_or_none( + google.cloud._helpers._millis_from_datetime( + min_creation_time)), + 'maxCreationTime': _str_or_none( + google.cloud._helpers._millis_from_datetime( + max_creation_time)), 'projection': 'full' } diff --git a/packages/google-cloud-bigquery/tests/unit/test_client.py b/packages/google-cloud-bigquery/tests/unit/test_client.py index fe09b5e74556..49ce16cea115 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_client.py +++ b/packages/google-cloud-bigquery/tests/unit/test_client.py @@ -13,6 +13,7 @@ # limitations under the License. import copy +import datetime import decimal import email import io @@ -1758,8 +1759,14 @@ def test_list_jobs_w_time_filter(self): client = self._make_one(self.PROJECT, creds) conn = client._connection = _make_connection({}) + # One millisecond after the unix epoch. + start_time = datetime.datetime(1970, 1, 1, 0, 0, 0, 1000) + # One millisecond after the the 2038 31-bit signed int rollover + end_time = datetime.datetime(2038, 1, 19, 3, 14, 7, 1000) + end_time_millis = (((2 ** 31) - 1) * 1000) + 1 + list(client.list_jobs( - min_creation_time=1, max_creation_time=1527874895820)) + min_creation_time=start_time, max_creation_time=end_time)) conn.api_request.assert_called_once_with( method='GET', @@ -1767,7 +1774,7 @@ def test_list_jobs_w_time_filter(self): query_params={ 'projection': 'full', 'minCreationTime': '1', - 'maxCreationTime': '1527874895820', + 'maxCreationTime': str(end_time_millis), }) def test_load_table_from_uri(self): From 2156285e8b9799f3ef7176f9f171c8bb33f78aa5 Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Mon, 4 Jun 2018 10:39:38 -0700 Subject: [PATCH 0455/2016] BigQuery: fix datetitme typo in list_jobs docstring (#5433) --- packages/google-cloud-bigquery/google/cloud/bigquery/client.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py index 8215a629a12b..4812a31e761f 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py @@ -693,7 +693,7 @@ def list_jobs( * ``"running"`` retry (google.api_core.retry.Retry, optional): How to retry the RPC. - min_creation_time (datetitme.datetime, optional): + min_creation_time (datetime.datetime, optional): Min value for job creation time. If set, only jobs created after or at this timestamp are returned. If the datetime has no time zone assumes UTC time. From 46208d5b4901ee460d8ff1f76740a1d9149837a3 Mon Sep 17 00:00:00 2001 From: Alix Hamilton Date: Tue, 5 Jun 2018 12:16:10 -0700 Subject: [PATCH 0456/2016] BigQuery: Adds examples of optional params for client.list_jobs() snippet (#5436) --- packages/google-cloud-bigquery/google/cloud/bigquery/client.py | 1 + 1 file changed, 1 insertion(+) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py index 4812a31e761f..b5ef0bc4e7f2 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py @@ -686,6 +686,7 @@ def list_jobs( :class:`~google.api_core.page_iterator.HTTPIterator`. all_users (bool, optional): If true, include jobs owned by all users in the project. + Defaults to :data:`False`. state_filter (str, optional): If set, include only jobs matching the given state. One of: * ``"done"`` From 955ad0844de0a62c0f0a118bd131c7f7ff4bd9c8 Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Tue, 5 Jun 2018 16:34:17 -0700 Subject: [PATCH 0457/2016] BigQuery: Exclude PyArrow from Windows Python 2.7 tests (#5442) * BigQuery: Exclude PyArrow from Windows Python 2.7 tests * BigQuery: Exclude PyArrow from Windows Python 2.7 installs. --- packages/google-cloud-bigquery/setup.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/setup.py b/packages/google-cloud-bigquery/setup.py index c473dbcc2333..ef2062637442 100644 --- a/packages/google-cloud-bigquery/setup.py +++ b/packages/google-cloud-bigquery/setup.py @@ -35,7 +35,9 @@ ] extras = { 'pandas': 'pandas>=0.17.1', - 'pyarrow': 'pyarrow>=0.4.1', + # Exclude PyArrow dependency from Windows Python 2.7. + 'pyarrow: platform_system != "Windows" or python_version >= "3.4"': + 'pyarrow>=0.4.1', } From a9b2ab12fd0a000eb897a201cac7eef076a30104 Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Thu, 7 Jun 2018 12:49:35 -0400 Subject: [PATCH 0458/2016] Release 1.3.0 (#5451) --- packages/google-cloud-bigquery/CHANGELOG.md | 19 +++++++++++++++++++ packages/google-cloud-bigquery/setup.py | 2 +- 2 files changed, 20 insertions(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/CHANGELOG.md b/packages/google-cloud-bigquery/CHANGELOG.md index 80a98b7a98fb..7f2d04197cbc 100644 --- a/packages/google-cloud-bigquery/CHANGELOG.md +++ b/packages/google-cloud-bigquery/CHANGELOG.md @@ -4,6 +4,25 @@ [1]: https://pypi.org/project/google-cloud-bigquery/#history +## 1.3.0 + +### New Features + +- NUMERIC type support (#5331) +- Add timeline and top-level slot-millis to query statistics. (#5312) +- Add additional statistics to query plan stages. (#5307) + +### Documentation + +- Use autosummary to split up API reference docs (#5340) +- Fix typo in Client docstrings (#5342) + +### Internal / Testing Changes + +- Prune systests identified as reduntant to snippets. (#5365) +- Modify system tests to use prerelease versions of grpcio (#5304) +- Improve system test performance (#5319) + ## 1.2.0 ### Implementation Changes diff --git a/packages/google-cloud-bigquery/setup.py b/packages/google-cloud-bigquery/setup.py index ef2062637442..77dcf695f65d 100644 --- a/packages/google-cloud-bigquery/setup.py +++ b/packages/google-cloud-bigquery/setup.py @@ -22,7 +22,7 @@ name = 'google-cloud-bigquery' description = 'Google BigQuery API client library' -version = '1.2.0' +version = '1.3.0' # Should be one of: # 'Development Status :: 3 - Alpha' # 'Development Status :: 4 - Beta' From 3031e835f0f4795f602b9a70e6c756e58e865a2d Mon Sep 17 00:00:00 2001 From: Alix Hamilton Date: Fri, 15 Jun 2018 08:55:15 -0700 Subject: [PATCH 0459/2016] Adds load_table_from_dataframe() to usage docs and changelog and dedents snippets in usage page (#5501) --- packages/google-cloud-bigquery/CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/packages/google-cloud-bigquery/CHANGELOG.md b/packages/google-cloud-bigquery/CHANGELOG.md index 7f2d04197cbc..1cf217b2bde7 100644 --- a/packages/google-cloud-bigquery/CHANGELOG.md +++ b/packages/google-cloud-bigquery/CHANGELOG.md @@ -11,6 +11,7 @@ - NUMERIC type support (#5331) - Add timeline and top-level slot-millis to query statistics. (#5312) - Add additional statistics to query plan stages. (#5307) +- Add `client.load_table_from_dataframe()` (#5387) ### Documentation From a81c67ffdf8d8c6f8619a957984442c0e56ec6a9 Mon Sep 17 00:00:00 2001 From: Alix Hamilton Date: Fri, 15 Jun 2018 12:52:07 -0700 Subject: [PATCH 0460/2016] Add Orc source format support and samples (#5500) --- packages/google-cloud-bigquery/google/cloud/bigquery/job.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/job.py b/packages/google-cloud-bigquery/google/cloud/bigquery/job.py index 5026445f0287..5ce92b16422c 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/job.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/job.py @@ -184,6 +184,9 @@ class SourceFormat(object): PARQUET = 'PARQUET' """Specifies Parquet format.""" + ORC = 'ORC' + """Specifies Orc format.""" + class WriteDisposition(object): """Specifies the action that occurs if destination table already exists. From 9bc9fb7a2bbb8a338b4c50f210da65069258896d Mon Sep 17 00:00:00 2001 From: Alix Hamilton Date: Mon, 18 Jun 2018 10:43:06 -0700 Subject: [PATCH 0461/2016] Adds samples for query external data sources (GCS & Sheets) (#5491) --- .../google-cloud-bigquery/tests/system.py | 45 ------------------- 1 file changed, 45 deletions(-) diff --git a/packages/google-cloud-bigquery/tests/system.py b/packages/google-cloud-bigquery/tests/system.py index 2b0afb5fdb0f..0dd1e6196a74 100644 --- a/packages/google-cloud-bigquery/tests/system.py +++ b/packages/google-cloud-bigquery/tests/system.py @@ -1518,51 +1518,6 @@ def test_query_results_to_dataframe(self): if not row[col] is None: self.assertIsInstance(row[col], exp_datatypes[col]) - def test_query_table_def(self): - gs_url = self._write_csv_to_storage( - 'bq_external_test' + unique_resource_id(), 'person_ages.csv', - HEADER_ROW, ROWS) - - job_config = bigquery.QueryJobConfig() - table_id = 'flintstones' - ec = bigquery.ExternalConfig('CSV') - ec.source_uris = [gs_url] - ec.schema = SCHEMA - ec.options.skip_leading_rows = 1 # skip the header row - job_config.table_definitions = {table_id: ec} - sql = 'SELECT * FROM %s' % table_id - - got_rows = Config.CLIENT.query(sql, job_config=job_config) - - row_tuples = [r.values() for r in got_rows] - by_age = operator.itemgetter(1) - self.assertEqual(sorted(row_tuples, key=by_age), - sorted(ROWS, key=by_age)) - - def test_query_external_table(self): - gs_url = self._write_csv_to_storage( - 'bq_external_test' + unique_resource_id(), 'person_ages.csv', - HEADER_ROW, ROWS) - dataset_id = _make_dataset_id('query_external_table') - dataset = self.temp_dataset(dataset_id) - table_id = 'flintstones' - table_arg = Table(dataset.table(table_id), schema=SCHEMA) - ec = bigquery.ExternalConfig('CSV') - ec.source_uris = [gs_url] - ec.options.skip_leading_rows = 1 # skip the header row - table_arg.external_data_configuration = ec - table = Config.CLIENT.create_table(table_arg) - self.to_delete.insert(0, table) - - sql = 'SELECT * FROM %s.%s' % (dataset_id, table_id) - - got_rows = Config.CLIENT.query(sql) - - row_tuples = [r.values() for r in got_rows] - by_age = operator.itemgetter(1) - self.assertEqual(sorted(row_tuples, key=by_age), - sorted(ROWS, key=by_age)) - def test_insert_rows_nested_nested(self): # See #2951 SF = bigquery.SchemaField From 35c558996d7fedcec5744d60a9fabf4f8047989c Mon Sep 17 00:00:00 2001 From: Alix Hamilton Date: Thu, 21 Jun 2018 14:43:15 -0700 Subject: [PATCH 0462/2016] Adds BigQuery authorized view samples (#5515) --- packages/google-cloud-bigquery/google/cloud/bigquery/table.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py index 9d786703d02f..4910267846a7 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py @@ -607,7 +607,7 @@ def view_query(self): to :data:`None`). By default, the query is treated as Standard SQL. To use Legacy - SQL, set view_use_legacy_sql to True. + SQL, set :attr:`view_use_legacy_sql` to :data:`True`. Raises: ValueError: For invalid value types. From 477c091104276fc78e2b6ca292d4cdbef44f463f Mon Sep 17 00:00:00 2001 From: Tres Seaver Date: Thu, 28 Jun 2018 16:46:08 -0400 Subject: [PATCH 0463/2016] BigQuery: add missing explict coverage for '_helpers' (#5550) --- .../google/cloud/bigquery/_helpers.py | 82 ++------- .../google/cloud/bigquery/job.py | 36 ++-- .../google/cloud/bigquery/table.py | 46 ++++- .../tests/unit/test__helpers.py | 168 ++++++++++++++---- .../tests/unit/test_table.py | 6 +- 5 files changed, 221 insertions(+), 117 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py b/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py index f67d9802a6bf..41474458fdb5 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py @@ -330,50 +330,6 @@ def _snake_to_camel_case(value): return words[0] + ''.join(map(str.capitalize, words[1:])) -def _item_to_row(iterator, resource): - """Convert a JSON row to the native object. - - .. note:: - - This assumes that the ``schema`` attribute has been - added to the iterator after being created, which - should be done by the caller. - - :type iterator: :class:`~google.api_core.page_iterator.Iterator` - :param iterator: The iterator that is currently in use. - - :type resource: dict - :param resource: An item to be converted to a row. - - :rtype: :class:`~google.cloud.bigquery.table.Row` - :returns: The next row in the page. - """ - from google.cloud.bigquery import Row - - return Row(_row_tuple_from_json(resource, iterator.schema), - iterator._field_to_index) - - -# pylint: disable=unused-argument -def _rows_page_start(iterator, page, response): - """Grab total rows when :class:`~google.cloud.iterator.Page` starts. - - :type iterator: :class:`~google.api_core.page_iterator.Iterator` - :param iterator: The iterator that is currently in use. - - :type page: :class:`~google.api_core.page_iterator.Page` - :param page: The page that was just created. - - :type response: dict - :param response: The JSON API response for a page of rows in a table. - """ - total_rows = response.get('totalRows') - if total_rows is not None: - total_rows = int(total_rows) - iterator._total_rows = total_rows -# pylint: enable=unused-argument - - def _should_retry(exc): """Predicate for determining when to retry. @@ -388,7 +344,18 @@ def _should_retry(exc): return reason == 'backendError' or reason == 'rateLimitExceeded' -def get_sub_prop(container, keys, default=None): +DEFAULT_RETRY = retry.Retry(predicate=_should_retry) +"""The default retry object. + +Any method with a ``retry`` parameter will be retried automatically, +with reasonable defaults. To disable retry, pass ``retry=None``. +To modify the default retry behavior, call a ``with_XXX`` method +on ``DEFAULT_RETRY``. For example, to change the deadline to 30 seconds, +pass ``retry=bigquery.DEFAULT_RETRY.with_deadline(30)``. +""" + + +def _get_sub_prop(container, keys, default=None): """Get a nested value from a dictionary. This method works like ``dict.get(key)``, but for nested values. @@ -408,18 +375,18 @@ def get_sub_prop(container, keys, default=None): Examples: Get a top-level value (equivalent to ``container.get('key')``). - >>> get_sub_prop({'key': 'value'}, ['key']) + >>> _get_sub_prop({'key': 'value'}, ['key']) 'value' Get a top-level value, providing a default (equivalent to ``container.get('key', default='default')``). - >>> get_sub_prop({'nothere': 123}, ['key'], default='not found') + >>> _get_sub_prop({'nothere': 123}, ['key'], default='not found') 'not found' Get a nested value. - >>> get_sub_prop({'key': {'subkey': 'value'}}, ['key', 'subkey']) + >>> _get_sub_prop({'key': {'subkey': 'value'}}, ['key', 'subkey']) 'value' Returns: @@ -433,7 +400,7 @@ def get_sub_prop(container, keys, default=None): return sub_val -def set_sub_prop(container, keys, value): +def _set_sub_prop(container, keys, value): """Set a nested value in a dictionary. Arguments: @@ -450,21 +417,21 @@ def set_sub_prop(container, keys, value): Set a top-level value (equivalent to ``container['key'] = 'value'``). >>> container = {} - >>> set_sub_prop(container, ['key'], 'value') + >>> _set_sub_prop(container, ['key'], 'value') >>> container {'key': 'value'} Set a nested value. >>> container = {} - >>> set_sub_prop(container, ['key', 'subkey'], 'value') + >>> _set_sub_prop(container, ['key', 'subkey'], 'value') >>> container {'key': {'subkey': 'value'}} Replace a nested value. >>> container = {'key': {'subkey': 'prev'}} - >>> set_sub_prop(container, ['key', 'subkey'], 'new') + >>> _set_sub_prop(container, ['key', 'subkey'], 'new') >>> container {'key': {'subkey': 'new'}} """ @@ -476,17 +443,6 @@ def set_sub_prop(container, keys, value): sub_val[keys[-1]] = value -DEFAULT_RETRY = retry.Retry(predicate=_should_retry) -"""The default retry object. - -Any method with a ``retry`` parameter will be retried automatically, -with reasonable defaults. To disable retry, pass ``retry=None``. -To modify the default retry behavior, call a ``with_XXX`` method -on ``DEFAULT_RETRY``. For example, to change the deadline to 30 seconds, -pass ``retry=bigquery.DEFAULT_RETRY.with_deadline(30)``. -""" - - def _int_or_none(value): """Helper: deserialize int value from JSON string.""" if isinstance(value, int): diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/job.py b/packages/google-cloud-bigquery/google/cloud/bigquery/job.py index 5ce92b16422c..63ddfec3fb16 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/job.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/job.py @@ -708,9 +708,9 @@ def _get_sub_prop(self, key, default=None): self._get_sub_prop('destinationTable') - This is equivalent to using the ``_helper.get_sub_prop`` function:: + This is equivalent to using the ``_helpers._get_sub_prop`` function:: - _helper.get_sub_prop( + _helpers._get_sub_prop( self._properties, ['query', 'destinationTable']) Arguments: @@ -724,7 +724,7 @@ def _get_sub_prop(self, key, default=None): Returns: object: The value if present or the default. """ - return _helpers.get_sub_prop( + return _helpers._get_sub_prop( self._properties, [self._job_type, key], default=default) def _set_sub_prop(self, key, value): @@ -736,9 +736,9 @@ def _set_sub_prop(self, key, value): self._set_sub_prop('useLegacySql', False) - This is equivalent to using the ``_helper.set_sub_prop`` function:: + This is equivalent to using the ``_helper._set_sub_prop`` function:: - _helper.set_sub_prop( + _helper._set_sub_prop( self._properties, ['query', 'useLegacySql'], False) Arguments: @@ -747,7 +747,7 @@ def _set_sub_prop(self, key, value): dictionary. value (object): Value to set. """ - _helpers.set_sub_prop(self._properties, [self._job_type, key], value) + _helpers._set_sub_prop(self._properties, [self._job_type, key], value) def to_api_repr(self): """Build an API representation of the job config. @@ -964,7 +964,7 @@ def schema(self): See https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.load.schema """ - schema = _helpers.get_sub_prop( + schema = _helpers._get_sub_prop( self._properties, ['load', 'schema', 'fields']) if schema is None: return @@ -974,7 +974,7 @@ def schema(self): def schema(self, value): if not all(hasattr(field, 'to_api_repr') for field in value): raise ValueError('Schema items must be fields') - _helpers.set_sub_prop( + _helpers._set_sub_prop( self._properties, ['load', 'schema', 'fields'], [field.to_api_repr() for field in value]) @@ -1241,9 +1241,9 @@ def _build_resource(self): """Generate a resource for :meth:`begin`.""" configuration = self._configuration.to_api_repr() if self.source_uris is not None: - _helpers.set_sub_prop( + _helpers._set_sub_prop( configuration, ['load', 'sourceUris'], self.source_uris) - _helpers.set_sub_prop( + _helpers._set_sub_prop( configuration, ['load', 'destinationTable'], self.destination.to_api_repr()) @@ -1284,7 +1284,7 @@ def from_api_repr(cls, resource, client): dest_config['projectId'], dest_config['datasetId']) destination = TableReference(ds_ref, dest_config['tableId']) # sourceUris will be absent if this is a file upload. - source_uris = _helpers.get_sub_prop( + source_uris = _helpers._get_sub_prop( config_resource, ['load', 'sourceUris']) job_ref = _JobReference._from_api_repr(resource['jobReference']) job = cls(job_ref, source_uris, destination, client, config) @@ -1423,9 +1423,9 @@ def _build_resource(self): } for table in self.sources] configuration = self._configuration.to_api_repr() - _helpers.set_sub_prop( + _helpers._set_sub_prop( configuration, ['copy', 'sourceTables'], source_refs) - _helpers.set_sub_prop( + _helpers._set_sub_prop( configuration, ['copy', 'destinationTable'], { @@ -1641,9 +1641,9 @@ def _build_resource(self): } configuration = self._configuration.to_api_repr() - _helpers.set_sub_prop( + _helpers._set_sub_prop( configuration, ['extract', 'sourceTable'], source_ref) - _helpers.set_sub_prop( + _helpers._set_sub_prop( configuration, ['extract', 'destinationUris'], self.destination_uris) @@ -1678,12 +1678,12 @@ def from_api_repr(cls, resource, client): """ job_id, config_resource = cls._get_resource_config(resource) config = ExtractJobConfig.from_api_repr(config_resource) - source_config = _helpers.get_sub_prop( + source_config = _helpers._get_sub_prop( config_resource, ['extract', 'sourceTable']) dataset = DatasetReference( source_config['projectId'], source_config['datasetId']) source = dataset.table(source_config['tableId']) - destination_uris = _helpers.get_sub_prop( + destination_uris = _helpers._get_sub_prop( config_resource, ['extract', 'destinationUris']) job = cls( @@ -2217,7 +2217,7 @@ def _build_resource(self): def _copy_configuration_properties(self, configuration): """Helper: assign subclass configuration properties in cleaned.""" self._configuration._properties = copy.deepcopy(configuration) - self.query = _helpers.get_sub_prop(configuration, ['query', 'query']) + self.query = _helpers._get_sub_prop(configuration, ['query', 'query']) @classmethod def from_api_repr(cls, resource, client): diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py index 4910267846a7..06896ed7a527 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py @@ -1089,9 +1089,9 @@ class RowIterator(HTTPIterator): def __init__(self, client, api_request, path, schema, page_token=None, max_results=None, page_size=None, extra_params=None): super(RowIterator, self).__init__( - client, api_request, path, item_to_value=_helpers._item_to_row, + client, api_request, path, item_to_value=_item_to_row, items_key='rows', page_token=page_token, max_results=max_results, - extra_params=extra_params, page_start=_helpers._rows_page_start, + extra_params=extra_params, page_start=_rows_page_start, next_token='pageToken') self._schema = schema self._field_to_index = _helpers._field_to_index_mapping(schema) @@ -1271,3 +1271,45 @@ def to_api_repr(self): serialized form. """ return self._properties + + +def _item_to_row(iterator, resource): + """Convert a JSON row to the native object. + + .. note:: + + This assumes that the ``schema`` attribute has been + added to the iterator after being created, which + should be done by the caller. + + :type iterator: :class:`~google.api_core.page_iterator.Iterator` + :param iterator: The iterator that is currently in use. + + :type resource: dict + :param resource: An item to be converted to a row. + + :rtype: :class:`~google.cloud.bigquery.table.Row` + :returns: The next row in the page. + """ + return Row(_helpers._row_tuple_from_json(resource, iterator.schema), + iterator._field_to_index) + + +# pylint: disable=unused-argument +def _rows_page_start(iterator, page, response): + """Grab total rows when :class:`~google.cloud.iterator.Page` starts. + + :type iterator: :class:`~google.api_core.page_iterator.Iterator` + :param iterator: The iterator that is currently in use. + + :type page: :class:`~google.api_core.page_iterator.Page` + :param page: The page that was just created. + + :type response: dict + :param response: The JSON API response for a page of rows in a table. + """ + total_rows = response.get('totalRows') + if total_rows is not None: + total_rows = int(total_rows) + iterator._total_rows = total_rows +# pylint: enable=unused-argument diff --git a/packages/google-cloud-bigquery/tests/unit/test__helpers.py b/packages/google-cloud-bigquery/tests/unit/test__helpers.py index b4899094dc93..b53ac85354fb 100644 --- a/packages/google-cloud-bigquery/tests/unit/test__helpers.py +++ b/packages/google-cloud-bigquery/tests/unit/test__helpers.py @@ -17,6 +17,8 @@ import decimal import unittest +import mock + class Test_not_null(unittest.TestCase): @@ -181,6 +183,37 @@ def test_w_base64_encoded_text(self): self.assertEqual(coerced, expected) +class Test_timestamp_from_json(unittest.TestCase): + + def _call_fut(self, value, field): + from google.cloud.bigquery._helpers import _timestamp_from_json + + return _timestamp_from_json(value, field) + + def test_w_none_nullable(self): + self.assertIsNone(self._call_fut(None, _Field('NULLABLE'))) + + def test_w_none_required(self): + with self.assertRaises(TypeError): + self._call_fut(None, _Field('REQUIRED')) + + def test_w_string_value(self): + from google.cloud._helpers import _EPOCH + + coerced = self._call_fut('1.234567', object()) + self.assertEqual( + coerced, + _EPOCH + datetime.timedelta(seconds=1, microseconds=234567)) + + def test_w_float_value(self): + from google.cloud._helpers import _EPOCH + + coerced = self._call_fut(1.234567, object()) + self.assertEqual( + coerced, + _EPOCH + datetime.timedelta(seconds=1, microseconds=234567)) + + class Test_timestamp_query_param_from_json(unittest.TestCase): def _call_fut(self, value, field): @@ -238,37 +271,6 @@ def test_w_timestamp_invalid(self): self._call_fut('definitely-not-a-timestamp', _Field('NULLABLE')) -class Test_timestamp_from_json(unittest.TestCase): - - def _call_fut(self, value, field): - from google.cloud.bigquery._helpers import _timestamp_from_json - - return _timestamp_from_json(value, field) - - def test_w_none_nullable(self): - self.assertIsNone(self._call_fut(None, _Field('NULLABLE'))) - - def test_w_none_required(self): - with self.assertRaises(TypeError): - self._call_fut(None, _Field('REQUIRED')) - - def test_w_string_value(self): - from google.cloud._helpers import _EPOCH - - coerced = self._call_fut('1.234567', object()) - self.assertEqual( - coerced, - _EPOCH + datetime.timedelta(seconds=1, microseconds=234567)) - - def test_w_float_value(self): - from google.cloud._helpers import _EPOCH - - coerced = self._call_fut(1.234567, object()) - self.assertEqual( - coerced, - _EPOCH + datetime.timedelta(seconds=1, microseconds=234567)) - - class Test_datetime_from_json(unittest.TestCase): def _call_fut(self, value, field): @@ -400,6 +402,27 @@ def test_w_record_subfield(self): self.assertEqual(coerced, expected) +class Test_field_to_index_mapping(unittest.TestCase): + + def _call_fut(self, schema): + from google.cloud.bigquery._helpers import _field_to_index_mapping + + return _field_to_index_mapping(schema) + + def test_w_empty_schema(self): + self.assertEqual(self._call_fut([]), {}) + + def test_w_non_empty_schema(self): + schema = [ + _Field('REPEATED', 'first', 'INTEGER'), + _Field('REQUIRED', 'second', 'INTEGER'), + _Field('REPEATED', 'third', 'INTEGER'), + ] + self.assertEqual( + self._call_fut(schema), + {'first': 0, 'second': 1, 'third': 2}) + + class Test_row_tuple_from_json(unittest.TestCase): def _call_fut(self, row, schema): @@ -786,6 +809,89 @@ def test_w_camel_case_string(self): self.assertEqual(self._call_fut('friendlyName'), 'friendlyName') +class Test_should_retry(unittest.TestCase): + + def _call_fut(self, exc): + from google.cloud.bigquery._helpers import _should_retry + + return _should_retry(exc) + + def test_wo_errors_attribute(self): + self.assertFalse(self._call_fut(object())) + + def test_w_empty_errors(self): + exc = mock.Mock(errors=[], spec=['errors']) + self.assertFalse(self._call_fut(exc)) + + def test_w_non_matching_reason(self): + exc = mock.Mock( + errors=[{'reason': 'bogus'}], spec=['errors']) + self.assertFalse(self._call_fut(exc)) + + def test_w_backendError(self): + exc = mock.Mock( + errors=[{'reason': 'backendError'}], spec=['errors']) + self.assertTrue(self._call_fut(exc)) + + def test_w_rateLimitExceeded(self): + exc = mock.Mock( + errors=[{'reason': 'rateLimitExceeded'}], spec=['errors']) + self.assertTrue(self._call_fut(exc)) + + +class Test__get_sub_prop(unittest.TestCase): + + def _call_fut(self, container, keys, **kw): + from google.cloud.bigquery._helpers import _get_sub_prop + + return _get_sub_prop(container, keys, **kw) + + def test_w_empty_container_default_default(self): + self.assertIsNone(self._call_fut({}, ['key1'])) + + def test_w_missing_key_explicit_default(self): + self.assertEqual(self._call_fut({'key2': 2}, ['key1'], default=1), 1) + + def test_w_matching_single_key(self): + self.assertEqual(self._call_fut({'key1': 1}, ['key1']), 1) + + def test_w_matching_first_key_missing_second_key(self): + self.assertIsNone( + self._call_fut({'key1': {'key3': 3}}, ['key1', 'key2'])) + + def test_w_matching_first_key_matching_second_key(self): + self.assertEqual( + self._call_fut({'key1': {'key2': 2}}, ['key1', 'key2']), 2) + + +class Test__set_sub_prop(unittest.TestCase): + + def _call_fut(self, container, keys, value): + from google.cloud.bigquery._helpers import _set_sub_prop + + return _set_sub_prop(container, keys, value) + + def test_w_empty_container_single_key(self): + container = {} + self._call_fut(container, ['key1'], 'value') + self.assertEqual(container, {'key1': 'value'}) + + def test_w_empty_container_nested_keys(self): + container = {} + self._call_fut(container, ['key1', 'key2', 'key3'], 'value') + self.assertEqual(container, {'key1': {'key2': {'key3': 'value'}}}) + + def test_w_existing_value(self): + container = {'key1': 'before'} + self._call_fut(container, ['key1'], 'after') + self.assertEqual(container, {'key1': 'after'}) + + def test_w_nested_keys_existing_value(self): + container = {'key1': {'key2': {'key3': 'before'}}} + self._call_fut(container, ['key1', 'key2', 'key3'], 'after') + self.assertEqual(container, {'key1': {'key2': {'key3': 'after'}}}) + + class Test__int_or_none(unittest.TestCase): def _call_fut(self, value): diff --git a/packages/google-cloud-bigquery/tests/unit/test_table.py b/packages/google-cloud-bigquery/tests/unit/test_table.py index 6f9f69c9f0a7..20eece5fecb8 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_table.py +++ b/packages/google-cloud-bigquery/tests/unit/test_table.py @@ -1127,8 +1127,8 @@ class TestRowIterator(unittest.TestCase): def test_constructor(self): from google.cloud.bigquery.table import RowIterator - from google.cloud.bigquery._helpers import _item_to_row - from google.cloud.bigquery._helpers import _rows_page_start + from google.cloud.bigquery.table import _item_to_row + from google.cloud.bigquery.table import _rows_page_start client = mock.sentinel.client api_request = mock.sentinel.api_request @@ -1143,7 +1143,7 @@ def test_constructor(self): self.assertEqual(iterator._items_key, 'rows') self.assertIsNone(iterator.max_results) self.assertEqual(iterator.extra_params, {}) - self.assertEqual(iterator._page_start, _rows_page_start) + self.assertIs(iterator._page_start, _rows_page_start) # Changing attributes. self.assertEqual(iterator.page_number, 0) self.assertIsNone(iterator.next_page_token) From a2096ae4c40b37700d7251c2cbe260f61bbea6d5 Mon Sep 17 00:00:00 2001 From: Tres Seaver Date: Thu, 28 Jun 2018 17:35:24 -0400 Subject: [PATCH 0464/2016] Move 'DEFAULT_RETRY' (w/ its predicate) to a new public 'retry' module. (#5552) Publish it, w/ its docstring, in the reference docs. --- .../google/cloud/bigquery/__init__.py | 2 +- .../google/cloud/bigquery/_helpers.py | 26 -------- .../google/cloud/bigquery/client.py | 2 +- .../google/cloud/bigquery/job.py | 63 ++++++++++--------- .../google/cloud/bigquery/retry.py | 41 ++++++++++++ .../tests/unit/test__helpers.py | 32 ---------- .../tests/unit/test_job.py | 17 ----- .../tests/unit/test_retry.py | 48 ++++++++++++++ 8 files changed, 123 insertions(+), 108 deletions(-) create mode 100644 packages/google-cloud-bigquery/google/cloud/bigquery/retry.py create mode 100644 packages/google-cloud-bigquery/tests/unit/test_retry.py diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/__init__.py b/packages/google-cloud-bigquery/google/cloud/bigquery/__init__.py index 751efd5a671b..b5cc93b18974 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/__init__.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/__init__.py @@ -31,7 +31,6 @@ from pkg_resources import get_distribution __version__ = get_distribution('google-cloud-bigquery').version -from google.cloud.bigquery._helpers import DEFAULT_RETRY from google.cloud.bigquery.client import Client from google.cloud.bigquery.dataset import AccessEntry from google.cloud.bigquery.dataset import Dataset @@ -57,6 +56,7 @@ from google.cloud.bigquery.query import ScalarQueryParameter from google.cloud.bigquery.query import StructQueryParameter from google.cloud.bigquery.query import UDFResource +from google.cloud.bigquery.retry import DEFAULT_RETRY from google.cloud.bigquery.schema import SchemaField from google.cloud.bigquery.table import EncryptionConfiguration from google.cloud.bigquery.table import Table diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py b/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py index 41474458fdb5..6ef89e14e93f 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py @@ -18,7 +18,6 @@ import datetime import decimal -from google.api_core import retry from google.cloud._helpers import UTC from google.cloud._helpers import _date_from_iso8601_date from google.cloud._helpers import _datetime_from_microseconds @@ -330,31 +329,6 @@ def _snake_to_camel_case(value): return words[0] + ''.join(map(str.capitalize, words[1:])) -def _should_retry(exc): - """Predicate for determining when to retry. - - We retry if and only if the 'reason' is 'backendError' - or 'rateLimitExceeded'. - """ - if not hasattr(exc, 'errors'): - return False - if len(exc.errors) == 0: - return False - reason = exc.errors[0]['reason'] - return reason == 'backendError' or reason == 'rateLimitExceeded' - - -DEFAULT_RETRY = retry.Retry(predicate=_should_retry) -"""The default retry object. - -Any method with a ``retry`` parameter will be retried automatically, -with reasonable defaults. To disable retry, pass ``retry=None``. -To modify the default retry behavior, call a ``with_XXX`` method -on ``DEFAULT_RETRY``. For example, to change the deadline to 30 seconds, -pass ``retry=bigquery.DEFAULT_RETRY.with_deadline(30)``. -""" - - def _get_sub_prop(container, keys, default=None): """Get a nested value from a dictionary. diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py index b5ef0bc4e7f2..f60edd129a17 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py @@ -32,7 +32,6 @@ from google.cloud import exceptions from google.cloud.client import ClientWithProject -from google.cloud.bigquery._helpers import DEFAULT_RETRY from google.cloud.bigquery._helpers import _SCALAR_VALUE_TO_JSON_ROW from google.cloud.bigquery._helpers import _str_or_none from google.cloud.bigquery._http import Connection @@ -41,6 +40,7 @@ from google.cloud.bigquery.dataset import DatasetReference from google.cloud.bigquery import job from google.cloud.bigquery.query import _QueryResults +from google.cloud.bigquery.retry import DEFAULT_RETRY from google.cloud.bigquery.table import Table from google.cloud.bigquery.table import TableListItem from google.cloud.bigquery.table import TableReference diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/job.py b/packages/google-cloud-bigquery/google/cloud/bigquery/job.py index 63ddfec3fb16..7898c9b65d6c 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/job.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/job.py @@ -22,7 +22,6 @@ import google.api_core.future.polling from google.cloud import exceptions from google.cloud.exceptions import NotFound -from google.cloud._helpers import _datetime_from_microseconds from google.cloud.bigquery.dataset import DatasetReference from google.cloud.bigquery.external_config import ExternalConfig from google.cloud.bigquery.query import _query_param_from_api_repr @@ -30,14 +29,13 @@ from google.cloud.bigquery.query import ScalarQueryParameter from google.cloud.bigquery.query import StructQueryParameter from google.cloud.bigquery.query import UDFResource +from google.cloud.bigquery.retry import DEFAULT_RETRY from google.cloud.bigquery.schema import SchemaField from google.cloud.bigquery.table import EncryptionConfiguration from google.cloud.bigquery.table import TableReference from google.cloud.bigquery.table import Table from google.cloud.bigquery.table import TimePartitioning from google.cloud.bigquery import _helpers -from google.cloud.bigquery._helpers import DEFAULT_RETRY -from google.cloud.bigquery._helpers import _int_or_none _DONE_STATE = 'DONE' _STOPPED_REASON = 'stopped' @@ -379,7 +377,7 @@ def created(self): if statistics is not None: millis = statistics.get('creationTime') if millis is not None: - return _datetime_from_microseconds(millis * 1000.0) + return _helpers._datetime_from_microseconds(millis * 1000.0) @property def started(self): @@ -392,7 +390,7 @@ def started(self): if statistics is not None: millis = statistics.get('startTime') if millis is not None: - return _datetime_from_microseconds(millis * 1000.0) + return _helpers._datetime_from_microseconds(millis * 1000.0) @property def ended(self): @@ -405,7 +403,7 @@ def ended(self): if statistics is not None: millis = statistics.get('endTime') if millis is not None: - return _datetime_from_microseconds(millis * 1000.0) + return _helpers._datetime_from_microseconds(millis * 1000.0) def _job_statistics(self): """Helper for job-type specific statistics-based properties.""" @@ -923,7 +921,7 @@ def skip_leading_rows(self): See https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.load.skipLeadingRows """ - return _int_or_none(self._get_sub_prop('skipLeadingRows')) + return _helpers._int_or_none(self._get_sub_prop('skipLeadingRows')) @skip_leading_rows.setter def skip_leading_rows(self, value): @@ -1876,7 +1874,7 @@ def maximum_bytes_billed(self): See https://g.co/cloud/bigquery/docs/reference/rest/v2/jobs#configuration.query.maximumBytesBilled """ - return _int_or_none(self._get_sub_prop('maximumBytesBilled')) + return _helpers._int_or_none(self._get_sub_prop('maximumBytesBilled')) @maximum_bytes_billed.setter def maximum_bytes_billed(self, value): @@ -2361,7 +2359,7 @@ def num_dml_affected_rows(self): @property def slot_millis(self): """Union[int, None]: Slot-milliseconds used by this query job.""" - return _int_or_none(self._job_statistics().get('totalSlotMs')) + return _helpers._int_or_none(self._job_statistics().get('totalSlotMs')) @property def statement_type(self): @@ -2606,7 +2604,7 @@ def start(self): """Union[Datetime, None]: Datetime when the stage started.""" if self._properties.get('startMs') is None: return None - return _datetime_from_microseconds( + return _helpers._datetime_from_microseconds( int(self._properties.get('startMs')) * 1000.0) @property @@ -2614,7 +2612,7 @@ def end(self): """Union[Datetime, None]: Datetime when the stage ended.""" if self._properties.get('endMs') is None: return None - return _datetime_from_microseconds( + return _helpers._datetime_from_microseconds( int(self._properties.get('endMs')) * 1000.0) @property @@ -2622,7 +2620,7 @@ def input_stages(self): """List(int): Entry IDs for stages that were inputs for this stage.""" if self._properties.get('inputStages') is None: return [] - return [_int_or_none(entry) + return [_helpers._int_or_none(entry) for entry in self._properties.get('inputStages')] @property @@ -2630,26 +2628,27 @@ def parallel_inputs(self): """Union[int, None]: Number of parallel input segments within the stage. """ - return _int_or_none(self._properties.get('parallelInputs')) + return _helpers._int_or_none(self._properties.get('parallelInputs')) @property def completed_parallel_inputs(self): """Union[int, None]: Number of parallel input segments completed.""" - return _int_or_none(self._properties.get('completedParallelInputs')) + return _helpers._int_or_none( + self._properties.get('completedParallelInputs')) @property def wait_ms_avg(self): """Union[int, None]: Milliseconds the average worker spent waiting to be scheduled. """ - return _int_or_none(self._properties.get('waitMsAvg')) + return _helpers._int_or_none(self._properties.get('waitMsAvg')) @property def wait_ms_max(self): """Union[int, None]: Milliseconds the slowest worker spent waiting to be scheduled. """ - return _int_or_none(self._properties.get('waitMsMax')) + return _helpers._int_or_none(self._properties.get('waitMsMax')) @property def wait_ratio_avg(self): @@ -2672,14 +2671,14 @@ def read_ms_avg(self): """Union[int, None]: Milliseconds the average worker spent reading input. """ - return _int_or_none(self._properties.get('readMsAvg')) + return _helpers._int_or_none(self._properties.get('readMsAvg')) @property def read_ms_max(self): """Union[int, None]: Milliseconds the slowest worker spent reading input. """ - return _int_or_none(self._properties.get('readMsMax')) + return _helpers._int_or_none(self._properties.get('readMsMax')) @property def read_ratio_avg(self): @@ -2702,14 +2701,14 @@ def compute_ms_avg(self): """Union[int, None]: Milliseconds the average worker spent on CPU-bound processing. """ - return _int_or_none(self._properties.get('computeMsAvg')) + return _helpers._int_or_none(self._properties.get('computeMsAvg')) @property def compute_ms_max(self): """Union[int, None]: Milliseconds the slowest worker spent on CPU-bound processing. """ - return _int_or_none(self._properties.get('computeMsMax')) + return _helpers._int_or_none(self._properties.get('computeMsMax')) @property def compute_ratio_avg(self): @@ -2732,14 +2731,14 @@ def write_ms_avg(self): """Union[int, None]: Milliseconds the average worker spent writing output data. """ - return _int_or_none(self._properties.get('writeMsAvg')) + return _helpers._int_or_none(self._properties.get('writeMsAvg')) @property def write_ms_max(self): """Union[int, None]: Milliseconds the slowest worker spent writing output data. """ - return _int_or_none(self._properties.get('writeMsMax')) + return _helpers._int_or_none(self._properties.get('writeMsMax')) @property def write_ratio_avg(self): @@ -2760,12 +2759,12 @@ def write_ratio_max(self): @property def records_read(self): """Union[int, None]: Number of records read by this stage.""" - return _int_or_none(self._properties.get('recordsRead')) + return _helpers._int_or_none(self._properties.get('recordsRead')) @property def records_written(self): """Union[int, None]: Number of records written by this stage.""" - return _int_or_none(self._properties.get('recordsWritten')) + return _helpers._int_or_none(self._properties.get('recordsWritten')) @property def status(self): @@ -2777,14 +2776,16 @@ def shuffle_output_bytes(self): """Union[int, None]: Number of bytes written by this stage to intermediate shuffle. """ - return _int_or_none(self._properties.get('shuffleOutputBytes')) + return _helpers._int_or_none( + self._properties.get('shuffleOutputBytes')) @property def shuffle_output_bytes_spilled(self): """Union[int, None]: Number of bytes written by this stage to intermediate shuffle and spilled to disk. """ - return _int_or_none(self._properties.get('shuffleOutputBytesSpilled')) + return _helpers._int_or_none( + self._properties.get('shuffleOutputBytesSpilled')) @property def steps(self): @@ -2828,31 +2829,31 @@ def from_api_repr(cls, resource): def elapsed_ms(self): """Union[int, None]: Milliseconds elapsed since start of query execution.""" - return _int_or_none(self._properties.get('elapsedMs')) + return _helpers._int_or_none(self._properties.get('elapsedMs')) @property def active_units(self): """Union[int, None]: Current number of input units being processed by workers, reported as largest value since the last sample.""" - return _int_or_none(self._properties.get('activeUnits')) + return _helpers._int_or_none(self._properties.get('activeUnits')) @property def pending_units(self): """Union[int, None]: Current number of input units remaining for query stages active at this sample time.""" - return _int_or_none(self._properties.get('pendingUnits')) + return _helpers._int_or_none(self._properties.get('pendingUnits')) @property def completed_units(self): """Union[int, None]: Current number of input units completed by this query.""" - return _int_or_none(self._properties.get('completedUnits')) + return _helpers._int_or_none(self._properties.get('completedUnits')) @property def slot_millis(self): """Union[int, None]: Cumulative slot-milliseconds consumed by this query.""" - return _int_or_none(self._properties.get('totalSlotMs')) + return _helpers._int_or_none(self._properties.get('totalSlotMs')) class UnknownJob(_AsyncJob): diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/retry.py b/packages/google-cloud-bigquery/google/cloud/bigquery/retry.py new file mode 100644 index 000000000000..1e1ef57f116c --- /dev/null +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/retry.py @@ -0,0 +1,41 @@ + +# Copyright 2018 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from google.api_core import retry + + +def _should_retry(exc): + """Predicate for determining when to retry. + + We retry if and only if the 'reason' is 'backendError' + or 'rateLimitExceeded'. + """ + if not hasattr(exc, 'errors'): + return False + if len(exc.errors) == 0: + return False + reason = exc.errors[0]['reason'] + return reason == 'backendError' or reason == 'rateLimitExceeded' + + +DEFAULT_RETRY = retry.Retry(predicate=_should_retry) +"""The default retry object. + +Any method with a ``retry`` parameter will be retried automatically, +with reasonable defaults. To disable retry, pass ``retry=None``. +To modify the default retry behavior, call a ``with_XXX`` method +on ``DEFAULT_RETRY``. For example, to change the deadline to 30 seconds, +pass ``retry=bigquery.DEFAULT_RETRY.with_deadline(30)``. +""" diff --git a/packages/google-cloud-bigquery/tests/unit/test__helpers.py b/packages/google-cloud-bigquery/tests/unit/test__helpers.py index b53ac85354fb..b70b81b1bea7 100644 --- a/packages/google-cloud-bigquery/tests/unit/test__helpers.py +++ b/packages/google-cloud-bigquery/tests/unit/test__helpers.py @@ -17,8 +17,6 @@ import decimal import unittest -import mock - class Test_not_null(unittest.TestCase): @@ -809,36 +807,6 @@ def test_w_camel_case_string(self): self.assertEqual(self._call_fut('friendlyName'), 'friendlyName') -class Test_should_retry(unittest.TestCase): - - def _call_fut(self, exc): - from google.cloud.bigquery._helpers import _should_retry - - return _should_retry(exc) - - def test_wo_errors_attribute(self): - self.assertFalse(self._call_fut(object())) - - def test_w_empty_errors(self): - exc = mock.Mock(errors=[], spec=['errors']) - self.assertFalse(self._call_fut(exc)) - - def test_w_non_matching_reason(self): - exc = mock.Mock( - errors=[{'reason': 'bogus'}], spec=['errors']) - self.assertFalse(self._call_fut(exc)) - - def test_w_backendError(self): - exc = mock.Mock( - errors=[{'reason': 'backendError'}], spec=['errors']) - self.assertTrue(self._call_fut(exc)) - - def test_w_rateLimitExceeded(self): - exc = mock.Mock( - errors=[{'reason': 'rateLimitExceeded'}], spec=['errors']) - self.assertTrue(self._call_fut(exc)) - - class Test__get_sub_prop(unittest.TestCase): def _call_fut(self, container, keys, **kw): diff --git a/packages/google-cloud-bigquery/tests/unit/test_job.py b/packages/google-cloud-bigquery/tests/unit/test_job.py index a8d0ed96299f..594c605b505d 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_job.py +++ b/packages/google-cloud-bigquery/tests/unit/test_job.py @@ -57,23 +57,6 @@ def _make_connection(*responses): return mock_conn -class Test__int_or_none(unittest.TestCase): - - def _call_fut(self, *args, **kwargs): - from google.cloud.bigquery import job - - return job._int_or_none(*args, **kwargs) - - def test_w_int(self): - self.assertEqual(self._call_fut(13), 13) - - def test_w_none(self): - self.assertIsNone(self._call_fut(None)) - - def test_w_str(self): - self.assertEqual(self._call_fut('13'), 13) - - class Test__error_result_to_exception(unittest.TestCase): def _call_fut(self, *args, **kwargs): diff --git a/packages/google-cloud-bigquery/tests/unit/test_retry.py b/packages/google-cloud-bigquery/tests/unit/test_retry.py new file mode 100644 index 000000000000..b94c26a3b990 --- /dev/null +++ b/packages/google-cloud-bigquery/tests/unit/test_retry.py @@ -0,0 +1,48 @@ + +# Copyright 2018 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import unittest + +import mock + + +class Test_should_retry(unittest.TestCase): + + def _call_fut(self, exc): + from google.cloud.bigquery.retry import _should_retry + + return _should_retry(exc) + + def test_wo_errors_attribute(self): + self.assertFalse(self._call_fut(object())) + + def test_w_empty_errors(self): + exc = mock.Mock(errors=[], spec=['errors']) + self.assertFalse(self._call_fut(exc)) + + def test_w_non_matching_reason(self): + exc = mock.Mock( + errors=[{'reason': 'bogus'}], spec=['errors']) + self.assertFalse(self._call_fut(exc)) + + def test_w_backendError(self): + exc = mock.Mock( + errors=[{'reason': 'backendError'}], spec=['errors']) + self.assertTrue(self._call_fut(exc)) + + def test_w_rateLimitExceeded(self): + exc = mock.Mock( + errors=[{'reason': 'rateLimitExceeded'}], spec=['errors']) + self.assertTrue(self._call_fut(exc)) From bf6e14e2695a0874f78c099685b7213637ee787c Mon Sep 17 00:00:00 2001 From: Alix Hamilton Date: Tue, 10 Jul 2018 16:10:28 -0700 Subject: [PATCH 0465/2016] update docs to show pyarrow as the only dependency of load_table_from_dataframe() (#5582) --- packages/google-cloud-bigquery/google/cloud/bigquery/client.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py index f60edd129a17..5ef6b069bdfe 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py @@ -892,8 +892,7 @@ def load_table_from_dataframe(self, dataframe, destination, Raises: ImportError: If a usable parquet engine cannot be found. This method - requires one of :mod:`pyarrow` or :mod:`fastparquet` to be - installed. + requires :mod:`pyarrow` to be installed. """ buffer = six.BytesIO() dataframe.to_parquet(buffer) From ec9a7a7ba20610f79dfd517d554f070788b6600d Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Wed, 11 Jul 2018 10:39:30 -0700 Subject: [PATCH 0466/2016] Skip update_table and update_dataset tests until etag issue is resolved. (#5590) * Skip update_table and update_dataset tests until etag issue is resolved. * Flake8 fixes --- packages/google-cloud-bigquery/tests/system.py | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/packages/google-cloud-bigquery/tests/system.py b/packages/google-cloud-bigquery/tests/system.py index 0dd1e6196a74..5a5a95fcd0bc 100644 --- a/packages/google-cloud-bigquery/tests/system.py +++ b/packages/google-cloud-bigquery/tests/system.py @@ -174,6 +174,10 @@ def test_get_dataset(self): self.assertEqual(got.friendly_name, 'Friendly') self.assertEqual(got.description, 'Description') + @pytest.mark.skip(reason=( + 'update_dataset() is flaky ' + 'https://github.com/GoogleCloudPlatform/google-cloud-python/issues/' + '5588')) def test_update_dataset(self): dataset = self.temp_dataset(_make_dataset_id('update_dataset')) self.assertTrue(_dataset_exists(dataset)) @@ -318,6 +322,10 @@ def test_list_tables(self): table.dataset_id == DATASET_ID)] self.assertEqual(len(created), len(tables_to_create)) + @pytest.mark.skip(reason=( + 'update_table() is flaky ' + 'https://github.com/GoogleCloudPlatform/google-cloud-python/issues/' + '5589')) def test_update_table(self): dataset = self.temp_dataset(_make_dataset_id('update_table')) @@ -357,6 +365,10 @@ def test_update_table(self): with self.assertRaises(PreconditionFailed): Config.CLIENT.update_table(table2, ['description']) + @pytest.mark.skip(reason=( + 'update_table() is flaky ' + 'https://github.com/GoogleCloudPlatform/google-cloud-python/issues/' + '5589')) def test_update_table_schema(self): dataset = self.temp_dataset(_make_dataset_id('update_table')) From 99b99aca4ba82b72a53827aa48c78578c916923b Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Wed, 11 Jul 2018 15:13:28 -0700 Subject: [PATCH 0467/2016] Allow listing rows on an empty table. (#5584) * Allow listing rows on an empty table. Removes check for missing schema on Client.list_rows. This allows the rows of an empty table to be listed. Such a table might be the "destination table" for a DML or DDL query. * Only skip schema check if table has no creation date. This is how we disambiguate (1) The table has no schema. list_rows() shouldn't cause errors but currently is, and it's affecting DML & DDL queries. (2) The Table object was constructed locally via the Table() constructor. * Add creationTime workaround to QueryJob.result() --- .../google/cloud/bigquery/client.py | 2 +- .../google/cloud/bigquery/job.py | 3 +++ .../google-cloud-bigquery/tests/system.py | 14 ++++++++++++++ .../tests/unit/test_client.py | 19 +++++++++++++++++++ .../tests/unit/test_job.py | 19 +++++++++++++++++++ 5 files changed, 56 insertions(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py index 5ef6b069bdfe..aae8872e20f0 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py @@ -1400,7 +1400,7 @@ def list_rows(self, table, selected_fields=None, max_results=None, elif isinstance(table, TableReference): raise ValueError('need selected_fields with TableReference') elif isinstance(table, Table): - if len(table.schema) == 0: + if len(table.schema) == 0 and table.created is None: raise ValueError(_TABLE_HAS_NO_SCHEMA) schema = table.schema else: diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/job.py b/packages/google-cloud-bigquery/google/cloud/bigquery/job.py index 7898c9b65d6c..05be9183ce5f 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/job.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/job.py @@ -2508,6 +2508,9 @@ def result(self, timeout=None, retry=DEFAULT_RETRY): schema = self._query_results.schema dest_table_ref = self.destination dest_table = Table(dest_table_ref, schema=schema) + # Set creation time to non-null to indicate this is actually the + # fetched schema to list_rows(). + dest_table._properties['creationTime'] = '0' return self._client.list_rows(dest_table, retry=retry) def to_dataframe(self): diff --git a/packages/google-cloud-bigquery/tests/system.py b/packages/google-cloud-bigquery/tests/system.py index 5a5a95fcd0bc..6d9cfc464912 100644 --- a/packages/google-cloud-bigquery/tests/system.py +++ b/packages/google-cloud-bigquery/tests/system.py @@ -1720,6 +1720,20 @@ def test_nested_table_to_dataframe(self): row['record_col']['nested_record']['nested_nested_string'], 'some deep insight') + def test_list_rows_empty_table(self): + from google.cloud.bigquery.table import RowIterator + + dataset_id = _make_dataset_id('empty_table') + dataset = self.temp_dataset(dataset_id) + table_ref = dataset.table('empty_table') + table = Config.CLIENT.create_table(bigquery.Table(table_ref)) + + # It's a bit silly to list rows for an empty table, but this does + # happen as the result of a DDL query from an IPython magic command. + rows = Config.CLIENT.list_rows(table) + self.assertIsInstance(rows, RowIterator) + self.assertEqual(tuple(rows), ()) + def test_list_rows_page_size(self): from google.cloud.bigquery.job import SourceFormat from google.cloud.bigquery.job import WriteDisposition diff --git a/packages/google-cloud-bigquery/tests/unit/test_client.py b/packages/google-cloud-bigquery/tests/unit/test_client.py index 49ce16cea115..c9a2e995f02e 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_client.py +++ b/packages/google-cloud-bigquery/tests/unit/test_client.py @@ -3079,6 +3079,25 @@ def _bigquery_timestamp_float_repr(ts_float): path='/%s' % PATH, query_params={}) + def test_list_rows_empty_table(self): + from google.cloud.bigquery.table import Table + + response = { + 'totalRows': '0', + 'rows': [], + } + creds = _make_credentials() + http = object() + client = self._make_one( + project=self.PROJECT, credentials=creds, _http=http) + client._connection = _make_connection(response, response) + + # Table that has no schema because it's an empty table. + table = Table(self.TABLE_REF) + table._properties['creationTime'] = '1234567890' + rows = tuple(client.list_rows(table)) + self.assertEqual(rows, ()) + def test_list_rows_query_params(self): from google.cloud.bigquery.table import Table, SchemaField diff --git a/packages/google-cloud-bigquery/tests/unit/test_job.py b/packages/google-cloud-bigquery/tests/unit/test_job.py index 594c605b505d..7ace62361e70 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_job.py +++ b/packages/google-cloud-bigquery/tests/unit/test_job.py @@ -2712,6 +2712,25 @@ def test_result(self): self.assertEqual(list(result), []) + def test_result_w_empty_schema(self): + # Destination table may have no schema for some DDL and DML queries. + query_resource = { + 'jobComplete': True, + 'jobReference': { + 'projectId': self.PROJECT, + 'jobId': self.JOB_ID, + }, + 'schema': {'fields': []}, + } + connection = _make_connection(query_resource, query_resource) + client = _make_client(self.PROJECT, connection=connection) + resource = self._make_resource(ended=True) + job = self._get_target_class().from_api_repr(resource, client) + + result = job.result() + + self.assertEqual(list(result), []) + def test_result_invokes_begins(self): begun_resource = self._make_resource() incomplete_resource = { From 61c6f0de565ab8e8b3ff93a3f71555efbc6bc89a Mon Sep 17 00:00:00 2001 From: Tres Seaver Date: Thu, 12 Jul 2018 13:00:26 -0400 Subject: [PATCH 0468/2016] Add 'internalError' to retryable error reasons. (#5599) Closes #5547. --- .../google-cloud-bigquery/google/cloud/bigquery/retry.py | 9 ++++++++- packages/google-cloud-bigquery/tests/unit/test_retry.py | 5 +++++ 2 files changed, 13 insertions(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/retry.py b/packages/google-cloud-bigquery/google/cloud/bigquery/retry.py index 1e1ef57f116c..fc2eebeecfad 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/retry.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/retry.py @@ -16,6 +16,13 @@ from google.api_core import retry +_RETRYABLE_REASONS = frozenset([ + 'backendError', + 'rateLimitExceeded', + 'internalError', +]) + + def _should_retry(exc): """Predicate for determining when to retry. @@ -27,7 +34,7 @@ def _should_retry(exc): if len(exc.errors) == 0: return False reason = exc.errors[0]['reason'] - return reason == 'backendError' or reason == 'rateLimitExceeded' + return reason in _RETRYABLE_REASONS DEFAULT_RETRY = retry.Retry(predicate=_should_retry) diff --git a/packages/google-cloud-bigquery/tests/unit/test_retry.py b/packages/google-cloud-bigquery/tests/unit/test_retry.py index b94c26a3b990..4c93b3b1cbdd 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_retry.py +++ b/packages/google-cloud-bigquery/tests/unit/test_retry.py @@ -46,3 +46,8 @@ def test_w_rateLimitExceeded(self): exc = mock.Mock( errors=[{'reason': 'rateLimitExceeded'}], spec=['errors']) self.assertTrue(self._call_fut(exc)) + + def test_w_internalError(self): + exc = mock.Mock( + errors=[{'reason': 'internalError'}], spec=['errors']) + self.assertTrue(self._call_fut(exc)) From 56b0085d87121ca5857f99fa24184fa5c05f0c27 Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Thu, 12 Jul 2018 14:44:12 -0700 Subject: [PATCH 0469/2016] Don't raise exception if viewing CREATE VIEW DDL results (#5602) CREATE VIEW DDL queries set a "destination table" field, but that table is actually a view. If you attempt to list rows on the view table, it results in an error. The fix is to avoid listing rows altogether if getQueryResults() says that a query has completed but there are no rows in the result set. --- .../google/cloud/bigquery/job.py | 12 ++++++-- .../google/cloud/bigquery/table.py | 30 ++++++++++++++++--- .../tests/unit/test_job.py | 2 ++ .../tests/unit/test_table.py | 18 +++++++++++ 4 files changed, 55 insertions(+), 7 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/job.py b/packages/google-cloud-bigquery/google/cloud/bigquery/job.py index 05be9183ce5f..59f8bef83bb3 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/job.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/job.py @@ -31,6 +31,7 @@ from google.cloud.bigquery.query import UDFResource from google.cloud.bigquery.retry import DEFAULT_RETRY from google.cloud.bigquery.schema import SchemaField +from google.cloud.bigquery.table import _EmptyRowIterator from google.cloud.bigquery.table import EncryptionConfiguration from google.cloud.bigquery.table import TableReference from google.cloud.bigquery.table import Table @@ -2505,12 +2506,17 @@ def result(self, timeout=None, retry=DEFAULT_RETRY): self._query_results = self._client._get_query_results( self.job_id, retry, project=self.project, location=self.location) + + # If the query job is complete but there are no query results, this was + # special job, such as a DDL query. Return an empty result set to + # indicate success and avoid calling tabledata.list on a table which + # can't be read (such as a view table). + if self._query_results.total_rows is None: + return _EmptyRowIterator() + schema = self._query_results.schema dest_table_ref = self.destination dest_table = Table(dest_table_ref, schema=schema) - # Set creation time to non-null to indicate this is actually the - # fetched schema to list_rows(). - dest_table._properties['creationTime'] = '0' return self._client.list_rows(dest_table, retry=retry) def to_dataframe(self): diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py index 06896ed7a527..d4bcb0884983 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py @@ -37,7 +37,11 @@ from google.cloud.bigquery.external_config import ExternalConfig -_TABLE_HAS_NO_SCHEMA = "Table has no schema: call 'client.get_table()'" +_NO_PANDAS_ERROR = ( + 'The pandas library is not installed, please install ' + 'pandas to use the to_dataframe() function.' +) +_TABLE_HAS_NO_SCHEMA = 'Table has no schema: call "client.get_table()"' _MARKER = object() @@ -1137,9 +1141,7 @@ def to_dataframe(self): """ if pandas is None: - raise ValueError('The pandas library is not installed, please ' - 'install pandas to use the to_dataframe() ' - 'function.') + raise ValueError(_NO_PANDAS_ERROR) column_headers = [field.name for field in self.schema] rows = [row.values() for row in iter(self)] @@ -1147,6 +1149,26 @@ def to_dataframe(self): return pandas.DataFrame(rows, columns=column_headers) +class _EmptyRowIterator(object): + """An empty row iterator. + + This class prevents API requests when there are no rows to fetch or rows + are impossible to fetch, such as with query results for DDL CREATE VIEW + statements. + """ + schema = () + pages = () + total_rows = 0 + + def to_dataframe(self): + if pandas is None: + raise ValueError(_NO_PANDAS_ERROR) + return pandas.DataFrame() + + def __iter__(self): + return iter(()) + + class TimePartitioningType(object): """Specifies the type of time partitioning to perform.""" diff --git a/packages/google-cloud-bigquery/tests/unit/test_job.py b/packages/google-cloud-bigquery/tests/unit/test_job.py index 7ace62361e70..23cdbaa30857 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_job.py +++ b/packages/google-cloud-bigquery/tests/unit/test_job.py @@ -3312,6 +3312,7 @@ def test_to_dataframe(self): 'projectId': self.PROJECT, 'jobId': self.JOB_ID, }, + 'totalRows': '4', 'schema': { 'fields': [ {'name': 'name', 'type': 'STRING', 'mode': 'NULLABLE'}, @@ -3348,6 +3349,7 @@ def test_iter(self): 'projectId': self.PROJECT, 'jobId': self.JOB_ID, }, + 'totalRows': '0', 'schema': {'fields': [{'name': 'col1', 'type': 'STRING'}]}, } done_resource = copy.deepcopy(begun_resource) diff --git a/packages/google-cloud-bigquery/tests/unit/test_table.py b/packages/google-cloud-bigquery/tests/unit/test_table.py index 20eece5fecb8..e434fd2a7e11 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_table.py +++ b/packages/google-cloud-bigquery/tests/unit/test_table.py @@ -1123,6 +1123,24 @@ def test_row(self): row['z'] +class Test_EmptyRowIterator(unittest.TestCase): + + @mock.patch('google.cloud.bigquery.table.pandas', new=None) + def test_to_dataframe_error_if_pandas_is_none(self): + from google.cloud.bigquery.table import _EmptyRowIterator + row_iterator = _EmptyRowIterator() + with self.assertRaises(ValueError): + row_iterator.to_dataframe() + + @unittest.skipIf(pandas is None, 'Requires `pandas`') + def test_to_dataframe(self): + from google.cloud.bigquery.table import _EmptyRowIterator + row_iterator = _EmptyRowIterator() + df = row_iterator.to_dataframe() + self.assertIsInstance(df, pandas.DataFrame) + self.assertEqual(len(df), 0) # verify the number of rows + + class TestRowIterator(unittest.TestCase): def test_constructor(self): From 65c48c8c806cb597bff90cacad4607e1760968fd Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Fri, 13 Jul 2018 07:24:03 -0700 Subject: [PATCH 0470/2016] Release bigquery 1.4.0 (#5604) * Release 1.4.0 --- packages/google-cloud-bigquery/CHANGELOG.md | 25 +++++++++++++++++++++ packages/google-cloud-bigquery/setup.py | 2 +- 2 files changed, 26 insertions(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/CHANGELOG.md b/packages/google-cloud-bigquery/CHANGELOG.md index 1cf217b2bde7..9afc48a3f57e 100644 --- a/packages/google-cloud-bigquery/CHANGELOG.md +++ b/packages/google-cloud-bigquery/CHANGELOG.md @@ -4,6 +4,31 @@ [1]: https://pypi.org/project/google-cloud-bigquery/#history +## 1.4.0 + +### Implementation Changes + +- Add 'internalError' to retryable error reasons. (#5599) +- Don't raise exception if viewing CREATE VIEW DDL results (#5602) + +### New Features + +- Add Orc source format support and samples (#5500) +- Move 'DEFAULT_RETRY' (w/ its predicate) to a new public 'retry' module. (#5552) +- Allow listing rows on an empty table. (#5584) + +### Documentation + +- Add load_table_from_dataframe() to usage docs and changelog and dedents snippets in usage page (#5501) +- Add samples for query external data sources (GCS & Sheets) (#5491) +- Add BigQuery authorized view samples (#5515) +- Update docs to show pyarrow as the only dependency of load_table_from_dataframe() (#5582) + +### Internal / Testing Changes + +- Add missing explict coverage for '_helpers' (#5550) +- Skip update_table and update_dataset tests until etag issue is resolved. (#5590) + ## 1.3.0 ### New Features diff --git a/packages/google-cloud-bigquery/setup.py b/packages/google-cloud-bigquery/setup.py index 77dcf695f65d..c9fbaeea1488 100644 --- a/packages/google-cloud-bigquery/setup.py +++ b/packages/google-cloud-bigquery/setup.py @@ -22,7 +22,7 @@ name = 'google-cloud-bigquery' description = 'Google BigQuery API client library' -version = '1.3.0' +version = '1.4.0' # Should be one of: # 'Development Status :: 3 - Alpha' # 'Development Status :: 4 - Beta' From 01b4f4d8308f1290069f1d9d70362245c1ca56b9 Mon Sep 17 00:00:00 2001 From: Alix Hamilton Date: Mon, 16 Jul 2018 15:55:20 -0700 Subject: [PATCH 0471/2016] Add/refactor snippets for managing BigQuery jobs (#5631) * adds snippet for get_job() * updates list_jobs() to follow rubric * adds snippet for cancel_job() --- packages/google-cloud-bigquery/google/cloud/bigquery/client.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py index aae8872e20f0..39f85fe35e37 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py @@ -635,7 +635,7 @@ def cancel_job( Keyword Arguments: project (str): - (Optional) ID of the project which ownsthe job (defaults to + (Optional) ID of the project which owns the job (defaults to the client's project). location (str): Location where the job was run. retry (google.api_core.retry.Retry): From 67f3c40141a5c85402e46bfdc65291da1fcec135 Mon Sep 17 00:00:00 2001 From: Tres Seaver Date: Thu, 19 Jul 2018 14:34:27 -0400 Subject: [PATCH 0472/2016] BigQuery: Add 'QueryJob.estimated_bytes_processed' property (#5655) * Avoid importing module-/package-under-test at testcase module scope. * Add explict unit tests for 'job._JobReference'. * Add explicit unit tests for '_AsyncJob' ctor and read-only properties. * Add explicit unit tests for '_AsyncJob._set_properties' and helpers. * Add explict unit tests for '_AsyncJob._get_resource_config'. * Note '_AsyncJob._build_resource' explicitly as an abstract method. Update docstrings to note use by '_begin', rather than 'begin'. * Add explicit unit tests for '_AsyncJob._begin'. * Add explicit unit tests for '_AsyncJob.exists'. * Add explicit unit tests for '_AsyncJob.reload'. * Add explicit unit tests for '_AsyncJob.cancel'. * Add explicit unit tests for '_AsyncJob._set_future_result'. * Add explicit unit tests for '_AsyncJob.done'. * Add explicit unit tests for '_AsyncJob.resuit'. * Add explicit unit tests for '_AsyncJob.cancelled'. * Add explicit unit test for '_JobConfig' ctor. * Add explicit unit tests for '_JobConfig._{get,set}_sub_prop'. * Add explicit unit test for '_JobConfig.to_api_repr'. * Note why '_JobConfig.from_api_repr' is not tested directly. * Add 'QueryJob.estimated_bytes_proceesed' property. Closes #5646. --- .../google/cloud/bigquery/job.py | 28 +- .../tests/unit/test_job.py | 904 +++++++++++++++++- 2 files changed, 919 insertions(+), 13 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/job.py b/packages/google-cloud-bigquery/google/cloud/bigquery/job.py index 59f8bef83bb3..97d3b76e6996 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/job.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/job.py @@ -500,6 +500,10 @@ def _get_resource_config(cls, resource): '["configuration"]["%s"]' % cls._JOB_TYPE) return job_id, resource['configuration'] + def _build_resource(self): + """Helper: Generate a resource for :meth:`_begin`.""" + raise NotImplementedError("Abstract") + def _begin(self, client=None, retry=DEFAULT_RETRY): """API call: begin the job via a POST request @@ -1237,7 +1241,7 @@ def output_rows(self): return int(statistics['load']['outputRows']) def _build_resource(self): - """Generate a resource for :meth:`begin`.""" + """Generate a resource for :meth:`_begin`.""" configuration = self._configuration.to_api_repr() if self.source_uris is not None: _helpers._set_sub_prop( @@ -1413,7 +1417,7 @@ def destination_encryption_configuration(self): return self._configuration.destination_encryption_configuration def _build_resource(self): - """Generate a resource for :meth:`begin`.""" + """Generate a resource for :meth:`_begin`.""" source_refs = [{ 'projectId': table.project, @@ -1631,7 +1635,7 @@ def destination_uri_file_counts(self): return None def _build_resource(self): - """Generate a resource for :meth:`begin`.""" + """Generate a resource for :meth:`_begin`.""" source_ref = { 'projectId': self.source.project, @@ -2202,7 +2206,7 @@ def schema_update_options(self): return self._configuration.schema_update_options def _build_resource(self): - """Generate a resource for :meth:`begin`.""" + """Generate a resource for :meth:`_begin`.""" configuration = self._configuration.to_api_repr() resource = { @@ -2437,6 +2441,22 @@ def undeclared_query_parameters(self): return parameters + @property + def estimated_bytes_processed(self): + """Return the estimated number of bytes processed by the query. + + See: + https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#statistics.query.estimatedBytesProcessed + + :rtype: int or None + :returns: number of DML rows affected by the job, or None if job is not + yet complete. + """ + result = self._job_statistics().get('estimatedBytesProcessed') + if result is not None: + result = int(result) + return result + def done(self, retry=DEFAULT_RETRY): """Refresh the job and checks if it is complete. diff --git a/packages/google-cloud-bigquery/tests/unit/test_job.py b/packages/google-cloud-bigquery/tests/unit/test_job.py index 23cdbaa30857..40067551d133 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_job.py +++ b/packages/google-cloud-bigquery/tests/unit/test_job.py @@ -13,21 +13,14 @@ # limitations under the License. import copy +import unittest +import mock from six.moves import http_client -import unittest try: import pandas except (ImportError, AttributeError): # pragma: NO COVER pandas = None -from google.cloud.bigquery.job import CopyJobConfig -from google.cloud.bigquery.job import ExtractJobConfig -from google.cloud.bigquery.job import LoadJobConfig -from google.cloud.bigquery.dataset import DatasetReference -from google.cloud.bigquery.table import EncryptionConfiguration -from google.cloud._helpers import _RFC3339_MICROS - -import mock def _make_credentials(): @@ -80,6 +73,850 @@ def test_missing_reason(self): self.assertEqual(exception.code, http_client.INTERNAL_SERVER_ERROR) +class Test_JobReference(unittest.TestCase): + JOB_ID = 'job-id' + PROJECT = 'test-project-123' + LOCATION = 'us-central' + + @staticmethod + def _get_target_class(): + from google.cloud.bigquery import job + + return job._JobReference + + def _make_one(self, job_id, project, location): + return self._get_target_class()(job_id, project, location) + + def test_ctor(self): + job_ref = self._make_one(self.JOB_ID, self.PROJECT, self.LOCATION) + + self.assertEqual(job_ref.job_id, self.JOB_ID) + self.assertEqual(job_ref.project, self.PROJECT) + self.assertEqual(job_ref.location, self.LOCATION) + + def test__to_api_repr(self): + job_ref = self._make_one(self.JOB_ID, self.PROJECT, self.LOCATION) + + self.assertEqual(job_ref._to_api_repr(), { + 'jobId': self.JOB_ID, + 'projectId': self.PROJECT, + 'location': self.LOCATION, + }) + + def test_from_api_repr(self): + api_repr = { + 'jobId': self.JOB_ID, + 'projectId': self.PROJECT, + 'location': self.LOCATION, + } + + job_ref = self._get_target_class()._from_api_repr(api_repr) + + self.assertEqual(job_ref.job_id, self.JOB_ID) + self.assertEqual(job_ref.project, self.PROJECT) + self.assertEqual(job_ref.location, self.LOCATION) + + +class Test_AsyncJob(unittest.TestCase): + JOB_ID = 'job-id' + PROJECT = 'test-project-123' + LOCATION = 'us-central' + + @staticmethod + def _get_target_class(): + from google.cloud.bigquery import job + + return job._AsyncJob + + def _make_one(self, job_id, client): + return self._get_target_class()(job_id, client) + + def _make_derived_class(self): + class Derived(self._get_target_class()): + _JOB_TYPE = 'derived' + + return Derived + + def _make_derived(self, job_id, client): + return self._make_derived_class()(job_id, client) + + @staticmethod + def _job_reference(job_id, project, location): + from google.cloud.bigquery import job + + return job._JobReference(job_id, project, location) + + def test_ctor_w_bare_job_id(self): + import threading + + client = _make_client(project=self.PROJECT) + job = self._make_one(self.JOB_ID, client) + + self.assertEqual(job.job_id, self.JOB_ID) + self.assertEqual(job.project, self.PROJECT) + self.assertIsNone(job.location) + self.assertIs(job._client, client) + self.assertEqual(job._properties, {}) + self.assertIsInstance(job._completion_lock, type(threading.Lock())) + self.assertEqual( + job.path, + '/projects/{}/jobs/{}'.format(self.PROJECT, self.JOB_ID)) + + def test_ctor_w_job_ref(self): + import threading + + other_project = 'other-project-234' + client = _make_client(project=other_project) + job_ref = self._job_reference(self.JOB_ID, self.PROJECT, self.LOCATION) + job = self._make_one(job_ref, client) + + self.assertEqual(job.job_id, self.JOB_ID) + self.assertEqual(job.project, self.PROJECT) + self.assertEqual(job.location, self.LOCATION) + self.assertIs(job._client, client) + self.assertEqual(job._properties, {}) + self.assertFalse(job._result_set) + self.assertIsInstance(job._completion_lock, type(threading.Lock())) + self.assertEqual( + job.path, + '/projects/{}/jobs/{}'.format(self.PROJECT, self.JOB_ID)) + + def test__require_client_w_none(self): + client = _make_client(project=self.PROJECT) + job = self._make_one(self.JOB_ID, client) + + self.assertIs(job._require_client(None), client) + + def test__require_client_w_other(self): + client = _make_client(project=self.PROJECT) + other = object() + job = self._make_one(self.JOB_ID, client) + + self.assertIs(job._require_client(other), other) + + def test_job_type(self): + client = _make_client(project=self.PROJECT) + derived = self._make_derived(self.JOB_ID, client) + + self.assertEqual(derived.job_type, 'derived') + + def test_etag(self): + etag = 'ETAG-123' + client = _make_client(project=self.PROJECT) + job = self._make_one(self.JOB_ID, client) + self.assertIsNone(job.etag) + job._properties['etag'] = etag + self.assertEqual(job.etag, etag) + + def test_self_link(self): + self_link = 'https://api.example.com/123' + client = _make_client(project=self.PROJECT) + job = self._make_one(self.JOB_ID, client) + self.assertIsNone(job.self_link) + job._properties['selfLink'] = self_link + self.assertEqual(job.self_link, self_link) + + def test_user_email(self): + user_email = 'user@example.com' + client = _make_client(project=self.PROJECT) + job = self._make_one(self.JOB_ID, client) + self.assertIsNone(job.user_email) + job._properties['user_email'] = user_email + self.assertEqual(job.user_email, user_email) + + @staticmethod + def _datetime_and_millis(): + import datetime + import pytz + from google.cloud._helpers import _millis + now = datetime.datetime.utcnow().replace( + microsecond=123000, # stats timestamps have ms precision + tzinfo=pytz.UTC) + return now, _millis(now) + + def test_created(self): + now, millis = self._datetime_and_millis() + client = _make_client(project=self.PROJECT) + job = self._make_one(self.JOB_ID, client) + self.assertIsNone(job.created) + stats = job._properties['statistics'] = {} + self.assertIsNone(job.created) + stats['creationTime'] = millis + self.assertEqual(job.created, now) + + def test_started(self): + now, millis = self._datetime_and_millis() + client = _make_client(project=self.PROJECT) + job = self._make_one(self.JOB_ID, client) + self.assertIsNone(job.started) + stats = job._properties['statistics'] = {} + self.assertIsNone(job.started) + stats['startTime'] = millis + self.assertEqual(job.started, now) + + def test_ended(self): + now, millis = self._datetime_and_millis() + client = _make_client(project=self.PROJECT) + job = self._make_one(self.JOB_ID, client) + self.assertIsNone(job.ended) + stats = job._properties['statistics'] = {} + self.assertIsNone(job.ended) + stats['endTime'] = millis + self.assertEqual(job.ended, now) + + def test__job_statistics(self): + statistics = {'foo': 'bar'} + client = _make_client(project=self.PROJECT) + derived = self._make_derived(self.JOB_ID, client) + self.assertEqual(derived._job_statistics(), {}) + stats = derived._properties['statistics'] = {} + self.assertEqual(derived._job_statistics(), {}) + stats['derived'] = statistics + self.assertEqual(derived._job_statistics(), statistics) + + def test_error_result(self): + error_result = { + 'debugInfo': 'DEBUG INFO', + 'location': 'LOCATION', + 'message': 'MESSAGE', + 'reason': 'REASON' + } + client = _make_client(project=self.PROJECT) + job = self._make_one(self.JOB_ID, client) + self.assertIsNone(job.error_result) + status = job._properties['status'] = {} + self.assertIsNone(job.error_result) + status['errorResult'] = error_result + self.assertEqual(job.error_result, error_result) + + def test_errors(self): + errors = [{ + 'debugInfo': 'DEBUG INFO', + 'location': 'LOCATION', + 'message': 'MESSAGE', + 'reason': 'REASON' + }] + client = _make_client(project=self.PROJECT) + job = self._make_one(self.JOB_ID, client) + self.assertIsNone(job.errors) + status = job._properties['status'] = {} + self.assertIsNone(job.errors) + status['errors'] = errors + self.assertEqual(job.errors, errors) + + def test_state(self): + state = 'STATE' + client = _make_client(project=self.PROJECT) + job = self._make_one(self.JOB_ID, client) + self.assertIsNone(job.state) + status = job._properties['status'] = {} + self.assertIsNone(job.state) + status['state'] = state + self.assertEqual(job.state, state) + + def test__scrub_local_properties(self): + before = {'foo': 'bar'} + resource = before.copy() + client = _make_client(project=self.PROJECT) + job = self._make_one(self.JOB_ID, client) + job._scrub_local_properties(resource) # no raise + self.assertEqual(resource, before) + + def test__copy_configuration_properties(self): + before = {'foo': 'bar'} + resource = before.copy() + client = _make_client(project=self.PROJECT) + job = self._make_one(self.JOB_ID, client) + with self.assertRaises(NotImplementedError): + job._copy_configuration_properties(resource) + self.assertEqual(resource, before) + + def _set_properties_job(self): + client = _make_client(project=self.PROJECT) + job = self._make_one(self.JOB_ID, client) + job._scrub_local_properties = mock.Mock() + job._copy_configuration_properties = mock.Mock() + job._set_future_result = mock.Mock() + job._properties = { + 'foo': 'bar', + } + return job + + def test__set_properties_no_stats(self): + config = { + 'test': True, + } + resource = { + 'configuration': config, + } + job = self._set_properties_job() + + job._set_properties(resource) + + self.assertEqual(job._properties, resource) + + job._scrub_local_properties.assert_called_once_with(resource) + job._copy_configuration_properties.assert_called_once_with(config) + + def test__set_properties_w_creation_time(self): + now, millis = self._datetime_and_millis() + config = { + 'test': True, + } + stats = { + 'creationTime': str(millis), + } + resource = { + 'configuration': config, + 'statistics': stats, + } + job = self._set_properties_job() + + job._set_properties(resource) + + cleaned = copy.deepcopy(resource) + cleaned['statistics']['creationTime'] = float(millis) + self.assertEqual(job._properties, cleaned) + + job._scrub_local_properties.assert_called_once_with(resource) + job._copy_configuration_properties.assert_called_once_with(config) + + def test__set_properties_w_start_time(self): + now, millis = self._datetime_and_millis() + config = { + 'test': True, + } + stats = { + 'startTime': str(millis), + } + resource = { + 'configuration': config, + 'statistics': stats, + } + job = self._set_properties_job() + + job._set_properties(resource) + + cleaned = copy.deepcopy(resource) + cleaned['statistics']['startTime'] = float(millis) + self.assertEqual(job._properties, cleaned) + + job._scrub_local_properties.assert_called_once_with(resource) + job._copy_configuration_properties.assert_called_once_with(config) + + def test__set_properties_w_end_time(self): + now, millis = self._datetime_and_millis() + config = { + 'test': True, + } + stats = { + 'endTime': str(millis), + } + resource = { + 'configuration': config, + 'statistics': stats, + } + job = self._set_properties_job() + + job._set_properties(resource) + + cleaned = copy.deepcopy(resource) + cleaned['statistics']['endTime'] = float(millis) + self.assertEqual(job._properties, cleaned) + + job._scrub_local_properties.assert_called_once_with(resource) + job._copy_configuration_properties.assert_called_once_with(config) + + def test__get_resource_config_missing_job_ref(self): + resource = {} + klass = self._make_derived_class() + + with self.assertRaises(KeyError): + klass._get_resource_config(resource) + + def test__get_resource_config_missing_job_id(self): + resource = { + 'jobReference': {}, + } + klass = self._make_derived_class() + + with self.assertRaises(KeyError): + klass._get_resource_config(resource) + + def test__get_resource_config_missing_configuration(self): + resource = { + 'jobReference': {'jobId': self.JOB_ID}, + } + klass = self._make_derived_class() + + with self.assertRaises(KeyError): + klass._get_resource_config(resource) + + def test__get_resource_config_missing_config_type(self): + resource = { + 'jobReference': {'jobId': self.JOB_ID}, + 'configuration': {}, + } + klass = self._make_derived_class() + + with self.assertRaises(KeyError): + klass._get_resource_config(resource) + + def test__get_resource_config_ok(self): + derived_config = {'foo': 'bar'} + resource = { + 'jobReference': {'jobId': self.JOB_ID}, + 'configuration': { + 'derived': derived_config, + }, + } + klass = self._make_derived_class() + + job_id, config = klass._get_resource_config(resource) + + self.assertEqual(job_id, self.JOB_ID) + self.assertEqual(config, {'derived': derived_config}) + + def test__build_resource(self): + client = _make_client(project=self.PROJECT) + job = self._make_one(self.JOB_ID, client) + with self.assertRaises(NotImplementedError): + job._build_resource() + + def test__begin_already(self): + job = self._set_properties_job() + job._properties['status'] = {'state': 'WHATEVER'} + + with self.assertRaises(ValueError): + job._begin() + + def test__begin_defaults(self): + from google.cloud.bigquery.retry import DEFAULT_RETRY + + resource = { + 'jobReference': { + 'jobId': self.JOB_ID, + 'projectId': self.PROJECT, + 'location': None, + }, + 'configuration': { + 'test': True, + } + } + job = self._set_properties_job() + builder = job._build_resource = mock.Mock() + builder.return_value = resource + call_api = job._client._call_api = mock.Mock() + call_api.return_value = resource + + job._begin() + + call_api.assert_called_once_with( + DEFAULT_RETRY, + method='POST', + path='/projects/{}/jobs'.format(self.PROJECT), + data=resource, + ) + self.assertEqual(job._properties, resource) + + def test__begin_explicit(self): + from google.cloud.bigquery.retry import DEFAULT_RETRY + + other_project = 'other-project-234' + resource = { + 'jobReference': { + 'jobId': self.JOB_ID, + 'projectId': self.PROJECT, + 'location': None, + }, + 'configuration': { + 'test': True, + } + } + job = self._set_properties_job() + builder = job._build_resource = mock.Mock() + builder.return_value = resource + client = _make_client(project=other_project) + call_api = client._call_api = mock.Mock() + call_api.return_value = resource + retry = DEFAULT_RETRY.with_deadline(1) + + job._begin(client=client, retry=retry) + + call_api.assert_called_once_with( + retry, + method='POST', + path='/projects/{}/jobs'.format(self.PROJECT), + data=resource, + ) + self.assertEqual(job._properties, resource) + + def test_exists_defaults_miss(self): + from google.cloud.exceptions import NotFound + from google.cloud.bigquery.retry import DEFAULT_RETRY + + job = self._set_properties_job() + job._job_ref._properties['location'] = self.LOCATION + call_api = job._client._call_api = mock.Mock() + call_api.side_effect = NotFound('testing') + + self.assertFalse(job.exists()) + + call_api.assert_called_once_with( + DEFAULT_RETRY, + method='GET', + path='/projects/{}/jobs/{}'.format(self.PROJECT, self.JOB_ID), + query_params={ + 'fields': 'id', + 'location': self.LOCATION, + } + ) + + def test_exists_explicit_hit(self): + from google.cloud.bigquery.retry import DEFAULT_RETRY + + other_project = 'other-project-234' + resource = { + 'jobReference': { + 'jobId': self.JOB_ID, + 'projectId': self.PROJECT, + 'location': None, + }, + 'configuration': { + 'test': True, + } + } + job = self._set_properties_job() + client = _make_client(project=other_project) + call_api = client._call_api = mock.Mock() + call_api.return_value = resource + retry = DEFAULT_RETRY.with_deadline(1) + + self.assertTrue(job.exists(client=client, retry=retry)) + + call_api.assert_called_once_with( + retry, + method='GET', + path='/projects/{}/jobs/{}'.format(self.PROJECT, self.JOB_ID), + query_params={'fields': 'id'} + ) + + def test_reload_defaults(self): + from google.cloud.bigquery.retry import DEFAULT_RETRY + + resource = { + 'jobReference': { + 'jobId': self.JOB_ID, + 'projectId': self.PROJECT, + 'location': None, + }, + 'configuration': { + 'test': True, + } + } + job = self._set_properties_job() + job._job_ref._properties['location'] = self.LOCATION + call_api = job._client._call_api = mock.Mock() + call_api.return_value = resource + + job.reload() + + call_api.assert_called_once_with( + DEFAULT_RETRY, + method='GET', + path='/projects/{}/jobs/{}'.format(self.PROJECT, self.JOB_ID), + query_params={'location': self.LOCATION}, + ) + self.assertEqual(job._properties, resource) + + def test_reload_explicit(self): + from google.cloud.bigquery.retry import DEFAULT_RETRY + + other_project = 'other-project-234' + resource = { + 'jobReference': { + 'jobId': self.JOB_ID, + 'projectId': self.PROJECT, + 'location': None, + }, + 'configuration': { + 'test': True, + } + } + job = self._set_properties_job() + client = _make_client(project=other_project) + call_api = client._call_api = mock.Mock() + call_api.return_value = resource + retry = DEFAULT_RETRY.with_deadline(1) + + job.reload(client=client, retry=retry) + + call_api.assert_called_once_with( + retry, + method='GET', + path='/projects/{}/jobs/{}'.format(self.PROJECT, self.JOB_ID), + query_params={}, + ) + self.assertEqual(job._properties, resource) + + def test_cancel_defaults(self): + resource = { + 'jobReference': { + 'jobId': self.JOB_ID, + 'projectId': self.PROJECT, + 'location': None, + }, + 'configuration': { + 'test': True, + } + } + response = {'job': resource} + job = self._set_properties_job() + job._job_ref._properties['location'] = self.LOCATION + connection = job._client._connection = _make_connection(response) + + self.assertTrue(job.cancel()) + + connection.api_request.assert_called_once_with( + method='POST', + path='/projects/{}/jobs/{}/cancel'.format( + self.PROJECT, self.JOB_ID), + query_params={'location': self.LOCATION}, + ) + self.assertEqual(job._properties, resource) + + def test_cancel_explicit(self): + other_project = 'other-project-234' + resource = { + 'jobReference': { + 'jobId': self.JOB_ID, + 'projectId': self.PROJECT, + 'location': None, + }, + 'configuration': { + 'test': True, + } + } + response = {'job': resource} + job = self._set_properties_job() + client = _make_client(project=other_project) + connection = client._connection = _make_connection(response) + + self.assertTrue(job.cancel(client=client)) + + connection.api_request.assert_called_once_with( + method='POST', + path='/projects/{}/jobs/{}/cancel'.format( + self.PROJECT, self.JOB_ID), + query_params={}, + ) + self.assertEqual(job._properties, resource) + + def test__set_future_result_wo_done(self): + client = _make_client(project=self.PROJECT) + job = self._make_one(self.JOB_ID, client) + set_exception = job.set_exception = mock.Mock() + set_result = job.set_result = mock.Mock() + + job._set_future_result() + + set_exception.assert_not_called() + set_result.assert_not_called() + + def test__set_future_result_w_result_set(self): + client = _make_client(project=self.PROJECT) + job = self._make_one(self.JOB_ID, client) + job._properties['status'] = {'state': 'DONE'} + job._result_set = True + set_exception = job.set_exception = mock.Mock() + set_result = job.set_result = mock.Mock() + + job._set_future_result() + + set_exception.assert_not_called() + set_result.assert_not_called() + + def test__set_future_result_w_done_wo_result_set_w_error(self): + from google.cloud.exceptions import NotFound + + client = _make_client(project=self.PROJECT) + job = self._make_one(self.JOB_ID, client) + job._properties['status'] = { + 'state': 'DONE', + 'errorResult': { + 'reason': 'notFound', + 'message': 'testing' + } + } + set_exception = job.set_exception = mock.Mock() + set_result = job.set_result = mock.Mock() + + job._set_future_result() + + set_exception.assert_called_once() + args, kw = set_exception.call_args + exception, = args + self.assertIsInstance(exception, NotFound) + self.assertEqual(exception.message, 'testing') + self.assertEqual(kw, {}) + set_result.assert_not_called() + + def test__set_future_result_w_done_wo_result_set_wo_error(self): + client = _make_client(project=self.PROJECT) + job = self._make_one(self.JOB_ID, client) + job._properties['status'] = {'state': 'DONE'} + set_exception = job.set_exception = mock.Mock() + set_result = job.set_result = mock.Mock() + + job._set_future_result() + + set_exception.assert_not_called() + set_result.assert_called_once_with(job) + + def test_done_defaults_wo_state(self): + from google.cloud.bigquery.retry import DEFAULT_RETRY + + client = _make_client(project=self.PROJECT) + job = self._make_one(self.JOB_ID, client) + reload_ = job.reload = mock.Mock() + + self.assertFalse(job.done()) + + reload_.assert_called_once_with(retry=DEFAULT_RETRY) + + def test_done_explicit_wo_state(self): + from google.cloud.bigquery.retry import DEFAULT_RETRY + + client = _make_client(project=self.PROJECT) + job = self._make_one(self.JOB_ID, client) + reload_ = job.reload = mock.Mock() + retry = DEFAULT_RETRY.with_deadline(1) + + self.assertFalse(job.done(retry=retry)) + + reload_.assert_called_once_with(retry=retry) + + def test_done_already(self): + client = _make_client(project=self.PROJECT) + job = self._make_one(self.JOB_ID, client) + job._properties['status'] = {'state': 'DONE'} + + self.assertTrue(job.done()) + + @mock.patch('google.api_core.future.polling.PollingFuture.result') + def test_result_default_wo_state(self, result): + client = _make_client(project=self.PROJECT) + job = self._make_one(self.JOB_ID, client) + begin = job._begin = mock.Mock() + + self.assertIs(job.result(), result.return_value) + + begin.assert_called_once() + result.assert_called_once_with(timeout=None) + + @mock.patch('google.api_core.future.polling.PollingFuture.result') + def test_result_explicit_w_state(self, result): + client = _make_client(project=self.PROJECT) + job = self._make_one(self.JOB_ID, client) + job._properties['status'] = {'state': 'DONE'} + begin = job._begin = mock.Mock() + timeout = 1 + + self.assertIs(job.result(timeout=timeout), result.return_value) + + begin.assert_not_called() + result.assert_called_once_with(timeout=timeout) + + def test_cancelled_wo_error_result(self): + client = _make_client(project=self.PROJECT) + job = self._make_one(self.JOB_ID, client) + + self.assertFalse(job.cancelled()) + + def test_cancelled_w_error_result_not_stopped(self): + client = _make_client(project=self.PROJECT) + job = self._make_one(self.JOB_ID, client) + job._properties['status'] = { + 'errorResult': { + 'reason': 'other', + } + } + + self.assertFalse(job.cancelled()) + + def test_cancelled_w_error_result_w_stopped(self): + client = _make_client(project=self.PROJECT) + job = self._make_one(self.JOB_ID, client) + job._properties['status'] = { + 'errorResult': { + 'reason': 'stopped', + } + } + + self.assertTrue(job.cancelled()) + + +class Test_JobConfig(unittest.TestCase): + JOB_TYPE = 'testing' + + @staticmethod + def _get_target_class(): + from google.cloud.bigquery import job + + return job._JobConfig + + def _make_one(self, job_type=JOB_TYPE): + return self._get_target_class()(job_type) + + def test_ctor(self): + job_config = self._make_one() + self.assertEqual(job_config._job_type, self.JOB_TYPE) + self.assertEqual(job_config._properties, {self.JOB_TYPE: {}}) + + @mock.patch('google.cloud.bigquery._helpers._get_sub_prop') + def test__get_sub_prop_wo_default(self, _get_sub_prop): + job_config = self._make_one() + key = 'key' + self.assertIs( + job_config._get_sub_prop(key), _get_sub_prop.return_value) + _get_sub_prop.assert_called_once_with( + job_config._properties, [self.JOB_TYPE, key], default=None) + + @mock.patch('google.cloud.bigquery._helpers._get_sub_prop') + def test__get_sub_prop_w_default(self, _get_sub_prop): + job_config = self._make_one() + key = 'key' + default = 'default' + self.assertIs( + job_config._get_sub_prop(key, default=default), + _get_sub_prop.return_value) + _get_sub_prop.assert_called_once_with( + job_config._properties, [self.JOB_TYPE, key], default=default) + + @mock.patch('google.cloud.bigquery._helpers._set_sub_prop') + def test__set_sub_prop(self, _set_sub_prop): + job_config = self._make_one() + key = 'key' + value = 'value' + job_config._set_sub_prop(key, value) + _set_sub_prop.assert_called_once_with( + job_config._properties, [self.JOB_TYPE, key], value) + + def test_to_api_repr(self): + job_config = self._make_one() + expected = job_config._properties = { + self.JOB_TYPE: { + 'foo': 'bar', + } + } + found = job_config.to_api_repr() + self.assertEqual(found, expected) + self.assertIsNot(found, expected) # copied + + # 'from_api_repr' cannot be tested on '_JobConfig', because it presumes + # the ctor can be called w/o arguments + + class _Base(object): from google.cloud.bigquery.dataset import DatasetReference from google.cloud.bigquery.table import TableReference @@ -257,6 +1094,8 @@ def test_api_repr(self): self.assertEqual(config.to_api_repr(), resource) def test_to_api_repr_with_encryption(self): + from google.cloud.bigquery.table import EncryptionConfiguration + config = self._make_one() config.destination_encryption_configuration = EncryptionConfiguration( kms_key_name=self.KMS_KEY_NAME) @@ -459,6 +1298,7 @@ def test_ctor(self): def test_ctor_w_config(self): from google.cloud.bigquery.schema import SchemaField + from google.cloud.bigquery.job import LoadJobConfig client = _make_client(project=self.PROJECT) full_name = SchemaField('full_name', 'STRING', mode='REQUIRED') @@ -512,11 +1352,14 @@ def test_result_invokes_begin(self): self.assertEqual(reload_request[1]['method'], 'GET') def test_schema_setter_non_list(self): + from google.cloud.bigquery.job import LoadJobConfig + config = LoadJobConfig() with self.assertRaises(TypeError): config.schema = object() def test_schema_setter_invalid_field(self): + from google.cloud.bigquery.job import LoadJobConfig from google.cloud.bigquery.schema import SchemaField config = LoadJobConfig() @@ -525,6 +1368,7 @@ def test_schema_setter_invalid_field(self): config.schema = [full_name, object()] def test_schema_setter(self): + from google.cloud.bigquery.job import LoadJobConfig from google.cloud.bigquery.schema import SchemaField config = LoadJobConfig() @@ -728,6 +1572,8 @@ def test_begin_w_bound_client(self): self._verifyResourceProperties(job, RESOURCE) def test_begin_w_autodetect(self): + from google.cloud.bigquery.job import LoadJobConfig + path = '/projects/{}/jobs'.format(self.PROJECT) resource = self._make_resource() resource['configuration']['load']['autodetect'] = True @@ -769,6 +1615,7 @@ def test_begin_w_autodetect(self): def test_begin_w_alternate_client(self): from google.cloud.bigquery.job import CreateDisposition + from google.cloud.bigquery.job import LoadJobConfig from google.cloud.bigquery.job import SchemaUpdateOption from google.cloud.bigquery.job import WriteDisposition from google.cloud.bigquery.schema import SchemaField @@ -1053,6 +1900,8 @@ def _get_target_class(): return CopyJobConfig def test_to_api_repr_with_encryption(self): + from google.cloud.bigquery.table import EncryptionConfiguration + config = self._make_one() config.destination_encryption_configuration = EncryptionConfiguration( kms_key_name=self.KMS_KEY_NAME) @@ -1355,6 +2204,8 @@ def test_begin_w_bound_client(self): self._verifyResourceProperties(job, RESOURCE) def test_begin_w_alternate_client(self): + from google.cloud.bigquery.job import CopyJobConfig + from google.cloud.bigquery.job import CreateDisposition from google.cloud.bigquery.job import WriteDisposition PATH = '/projects/%s/jobs' % (self.PROJECT,) @@ -1682,6 +2533,8 @@ def test_from_api_repr_w_properties(self): self._verifyResourceProperties(job, RESOURCE) def test_begin_w_bound_client(self): + from google.cloud.bigquery.dataset import DatasetReference + PATH = '/projects/%s/jobs' % (self.PROJECT,) RESOURCE = self._make_resource() # Ensure None for missing server-set props @@ -1720,8 +2573,10 @@ def test_begin_w_bound_client(self): self._verifyResourceProperties(job, RESOURCE) def test_begin_w_alternate_client(self): + from google.cloud.bigquery.dataset import DatasetReference from google.cloud.bigquery.job import Compression from google.cloud.bigquery.job import DestinationFormat + from google.cloud.bigquery.job import ExtractJobConfig PATH = '/projects/%s/jobs' % (self.PROJECT,) RESOURCE = self._make_resource(ended=True) @@ -1801,6 +2656,8 @@ def test_exists_hit_w_alternate_client(self): query_params={'fields': 'id'}) def test_reload_w_bound_client(self): + from google.cloud.bigquery.dataset import DatasetReference + PATH = '/projects/%s/jobs/%s' % (self.PROJECT, self.JOB_ID) RESOURCE = self._make_resource() conn = _make_connection(RESOURCE) @@ -1817,6 +2674,8 @@ def test_reload_w_bound_client(self): self._verifyResourceProperties(job, RESOURCE) def test_reload_w_alternate_client(self): + from google.cloud.bigquery.dataset import DatasetReference + PATH = '/projects/%s/jobs/%s' % (self.PROJECT, self.JOB_ID) RESOURCE = self._make_resource() conn1 = _make_connection() @@ -1885,6 +2744,8 @@ def test_from_api_repr_empty(self): self.assertIsNone(config.destination_encryption_configuration) def test_from_api_repr_normal(self): + from google.cloud.bigquery.dataset import DatasetReference + resource = { 'query': { 'useLegacySql': True, @@ -1914,6 +2775,8 @@ def test_from_api_repr_normal(self): 'I should be saved, too.') def test_to_api_repr_normal(self): + from google.cloud.bigquery.dataset import DatasetReference + config = self._make_one() config.use_legacy_sql = True config.default_dataset = DatasetReference( @@ -1934,6 +2797,8 @@ def test_to_api_repr_normal(self): resource['someNewProperty'], 'Woohoo, alpha stuff.') def test_to_api_repr_with_encryption(self): + from google.cloud.bigquery.table import EncryptionConfiguration + config = self._make_one() config.destination_encryption_configuration = EncryptionConfiguration( kms_key_name=self.KMS_KEY_NAME) @@ -2290,6 +3155,7 @@ def test_done(self): self.assertTrue(job.done()) def test_query_plan(self): + from google.cloud._helpers import _RFC3339_MICROS from google.cloud.bigquery.job import QueryPlanEntry from google.cloud.bigquery.job import QueryPlanEntryStep @@ -2694,6 +3560,22 @@ def test_undeclared_query_parameters(self): self.assertEqual(struct.struct_types, {'count': 'INT64'}) self.assertEqual(struct.struct_values, {'count': 123}) + def test_estimated_bytes_processed(self): + est_bytes = 123456 + + client = _make_client(project=self.PROJECT) + job = self._make_one(self.JOB_ID, self.QUERY, client) + self.assertIsNone(job.estimated_bytes_processed) + + statistics = job._properties['statistics'] = {} + self.assertIsNone(job.estimated_bytes_processed) + + query_stats = statistics['query'] = {} + self.assertIsNone(job.estimated_bytes_processed) + + query_stats['estimatedBytesProcessed'] = str(est_bytes) + self.assertEqual(job.estimated_bytes_processed, est_bytes) + def test_result(self): query_resource = { 'jobComplete': True, @@ -3551,6 +4433,8 @@ def test_from_api_repr_normal(self): self.assertEqual(entry.steps, steps) def test_start(self): + from google.cloud._helpers import _RFC3339_MICROS + klass = self._get_target_class() entry = klass.from_api_repr({}) @@ -3564,6 +4448,8 @@ def test_start(self): self.START_RFC3339_MICROS) def test_end(self): + from google.cloud._helpers import _RFC3339_MICROS + klass = self._get_target_class() entry = klass.from_api_repr({}) From 8c9838ec8ebe92d542aa60d38c34c5ebaba31e5f Mon Sep 17 00:00:00 2001 From: Tres Seaver Date: Thu, 19 Jul 2018 16:14:02 -0400 Subject: [PATCH 0473/2016] BigQuery: add support for job labels (#5654) Add readonly '_AsyncJob.labels' property. Add writeable '_JobConfig.labels' property. Ensure that '{_AsyncJob,_JobConfig}.labels' can be modified in-place. Likewise for '{Dataset,DatasetListItem}.labels' and '{Table,TableListItem}.labels'. Closes #5645. --- .../google/cloud/bigquery/dataset.py | 4 +- .../google/cloud/bigquery/job.py | 24 +++++++++ .../google/cloud/bigquery/table.py | 4 +- .../tests/unit/test_dataset.py | 19 +++++++ .../tests/unit/test_job.py | 53 +++++++++++++++++++ .../tests/unit/test_table.py | 22 ++++++++ 6 files changed, 122 insertions(+), 4 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/dataset.py b/packages/google-cloud-bigquery/google/cloud/bigquery/dataset.py index b5f2ef615fac..9927af4e9a7a 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/dataset.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/dataset.py @@ -473,7 +473,7 @@ def labels(self): Raises: ValueError: for invalid value types. """ - return self._properties.get('labels', {}) + return self._properties.setdefault('labels', {}) @labels.setter def labels(self, value): @@ -630,7 +630,7 @@ def friendly_name(self): @property def labels(self): """Dict[str, str]: Labels for the dataset.""" - return self._properties.get('labels', {}) + return self._properties.setdefault('labels', {}) @property def reference(self): diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/job.py b/packages/google-cloud-bigquery/google/cloud/bigquery/job.py index 97d3b76e6996..6b7bc3dfe14e 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/job.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/job.py @@ -340,6 +340,11 @@ def path(self): """ return '/projects/%s/jobs/%s' % (self.project, self.job_id) + @property + def labels(self): + """Dict[str, str]: Labels for the job.""" + return self._properties.setdefault('labels', {}) + @property def etag(self): """ETag for the job resource. @@ -702,6 +707,25 @@ def __init__(self, job_type): self._job_type = job_type self._properties = {job_type: {}} + @property + def labels(self): + """Dict[str, str]: Labels for the job. + + This method always returns a dict. To change a job's labels, + modify the dict, then call ``Client.update_job``. To delete a + label, set its value to :data:`None` before updating. + + Raises: + ValueError: If ``value`` type is invalid. + """ + return self._properties.setdefault('labels', {}) + + @labels.setter + def labels(self, value): + if not isinstance(value, dict): + raise ValueError("Pass a dict") + self._properties['labels'] = value + def _get_sub_prop(self, key, default=None): """Get a value in the ``self._properties[self._job_type]`` dictionary. diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py index d4bcb0884983..1739c1bdd7c6 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py @@ -362,7 +362,7 @@ def labels(self): Raises: ValueError: If ``value`` type is invalid. """ - return self._properties.get('labels', {}) + return self._properties.setdefault('labels', {}) @labels.setter def labels(self, value): @@ -830,7 +830,7 @@ def labels(self): modify the dict, then call ``Client.update_table``. To delete a label, set its value to :data:`None` before updating. """ - return self._properties.get('labels', {}) + return self._properties.setdefault('labels', {}) @property def full_table_id(self): diff --git a/packages/google-cloud-bigquery/tests/unit/test_dataset.py b/packages/google-cloud-bigquery/tests/unit/test_dataset.py index 314010202676..d43687e32244 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_dataset.py +++ b/packages/google-cloud-bigquery/tests/unit/test_dataset.py @@ -443,6 +443,13 @@ def test_location_setter(self): dataset.location = 'LOCATION' self.assertEqual(dataset.location, 'LOCATION') + def test_labels_update_in_place(self): + dataset = self._make_one(self.DS_REF) + del dataset._properties['labels'] # don't start w/ existing dict + labels = dataset.labels + labels['foo'] = 'bar' # update in place + self.assertEqual(dataset.labels, {'foo': 'bar'}) + def test_labels_setter(self): dataset = self._make_one(self.DS_REF) dataset.labels = {'color': 'green'} @@ -613,6 +620,18 @@ def test_ctor_wo_reference(self): with self.assertRaises(ValueError): self._make_one({}) + def test_labels_update_in_place(self): + resource = { + 'datasetReference': { + 'projectId': 'testproject', + 'datasetId': 'testdataset', + }, + } + dataset = self._make_one(resource) + labels = dataset.labels + labels['foo'] = 'bar' # update in place + self.assertEqual(dataset.labels, {'foo': 'bar'}) + def test_table(self): from google.cloud.bigquery.table import TableReference diff --git a/packages/google-cloud-bigquery/tests/unit/test_job.py b/packages/google-cloud-bigquery/tests/unit/test_job.py index 40067551d133..4a7141d29d7d 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_job.py +++ b/packages/google-cloud-bigquery/tests/unit/test_job.py @@ -200,6 +200,27 @@ def test_job_type(self): self.assertEqual(derived.job_type, 'derived') + def test_labels_miss(self): + client = _make_client(project=self.PROJECT) + job = self._make_one(self.JOB_ID, client) + self.assertEqual(job.labels, {}) + + def test_labels_update_in_place(self): + client = _make_client(project=self.PROJECT) + job = self._make_one(self.JOB_ID, client) + labels = job.labels + labels['foo'] = 'bar' # update in place + self.assertEqual(job.labels, {'foo': 'bar'}) + + def test_labels_hit(self): + labels = { + 'foo': 'bar', + } + client = _make_client(project=self.PROJECT) + job = self._make_one(self.JOB_ID, client) + job._properties['labels'] = labels + self.assertEqual(job.labels, labels) + def test_etag(self): etag = 'ETAG-123' client = _make_client(project=self.PROJECT) @@ -916,6 +937,38 @@ def test_to_api_repr(self): # 'from_api_repr' cannot be tested on '_JobConfig', because it presumes # the ctor can be called w/o arguments + def test_labels_miss(self): + job_config = self._make_one() + self.assertEqual(job_config.labels, {}) + + def test_labels_update_in_place(self): + job_config = self._make_one() + labels = job_config.labels + labels['foo'] = 'bar' # update in place + self.assertEqual(job_config.labels, {'foo': 'bar'}) + + def test_labels_hit(self): + labels = { + 'foo': 'bar', + } + job_config = self._make_one() + job_config._properties['labels'] = labels + self.assertEqual(job_config.labels, labels) + + def test_labels_setter_invalid(self): + labels = object() + job_config = self._make_one() + with self.assertRaises(ValueError): + job_config.labels = labels + + def test_labels_setter(self): + labels = { + 'foo': 'bar', + } + job_config = self._make_one() + job_config.labels = labels + self.assertEqual(job_config._properties['labels'], labels) + class _Base(object): from google.cloud.bigquery.dataset import DatasetReference diff --git a/packages/google-cloud-bigquery/tests/unit/test_table.py b/packages/google-cloud-bigquery/tests/unit/test_table.py index e434fd2a7e11..40691029084b 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_table.py +++ b/packages/google-cloud-bigquery/tests/unit/test_table.py @@ -647,6 +647,15 @@ def test_external_data_configuration_setter_bad_value(self): with self.assertRaises(ValueError): table.external_data_configuration = 12345 + def test_labels_update_in_place(self): + dataset = DatasetReference(self.PROJECT, self.DS_ID) + table_ref = dataset.table(self.TABLE_NAME) + table = self._make_one(table_ref) + del table._properties['labels'] # don't start w/ existing dict + labels = table.labels + labels['foo'] = 'bar' # update in place + self.assertEqual(table.labels, {'foo': 'bar'}) + def test_labels_setter_bad_value(self): dataset = DatasetReference(self.PROJECT, self.DS_ID) table_ref = dataset.table(self.TABLE_NAME) @@ -1093,6 +1102,19 @@ def test_ctor_wo_reference(self): with self.assertRaises(ValueError): self._make_one({}) + def test_labels_update_in_place(self): + resource = { + 'tableReference': { + 'projectId': 'testproject', + 'datasetId': 'testdataset', + 'tableId': 'testtable', + }, + } + table = self._make_one(resource) + labels = table.labels + labels['foo'] = 'bar' # update in place + self.assertEqual(table.labels, {'foo': 'bar'}) + class TestRow(unittest.TestCase): From 87b3fb1e0a51c996d3ede7b281bee9c346832490 Mon Sep 17 00:00:00 2001 From: Tres Seaver Date: Wed, 25 Jul 2018 14:09:48 -0400 Subject: [PATCH 0474/2016] Add 'ExternalSourceFormat' enum. (#5674) Closes #5499. --- .../google/cloud/bigquery/__init__.py | 14 ++++--- .../google/cloud/bigquery/external_config.py | 38 ++++++++++++++++--- .../google/cloud/bigquery/job.py | 7 +++- 3 files changed, 47 insertions(+), 12 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/__init__.py b/packages/google-cloud-bigquery/google/cloud/bigquery/__init__.py index b5cc93b18974..65392214eeda 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/__init__.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/__init__.py @@ -35,6 +35,13 @@ from google.cloud.bigquery.dataset import AccessEntry from google.cloud.bigquery.dataset import Dataset from google.cloud.bigquery.dataset import DatasetReference +from google.cloud.bigquery.external_config import ExternalConfig +from google.cloud.bigquery.external_config import BigtableOptions +from google.cloud.bigquery.external_config import BigtableColumnFamily +from google.cloud.bigquery.external_config import BigtableColumn +from google.cloud.bigquery.external_config import CSVOptions +from google.cloud.bigquery.external_config import GoogleSheetsOptions +from google.cloud.bigquery.external_config import ExternalSourceFormat from google.cloud.bigquery.job import Compression from google.cloud.bigquery.job import CopyJob from google.cloud.bigquery.job import CopyJobConfig @@ -64,12 +71,6 @@ from google.cloud.bigquery.table import Row from google.cloud.bigquery.table import TimePartitioningType from google.cloud.bigquery.table import TimePartitioning -from google.cloud.bigquery.external_config import ExternalConfig -from google.cloud.bigquery.external_config import BigtableOptions -from google.cloud.bigquery.external_config import BigtableColumnFamily -from google.cloud.bigquery.external_config import BigtableColumn -from google.cloud.bigquery.external_config import CSVOptions -from google.cloud.bigquery.external_config import GoogleSheetsOptions __all__ = [ '__version__', @@ -112,6 +113,7 @@ 'Compression', 'CreateDisposition', 'DestinationFormat', + 'ExternalSourceFormat', 'Encoding', 'QueryPriority', 'SchemaUpdateOption', diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/external_config.py b/packages/google-cloud-bigquery/google/cloud/bigquery/external_config.py index bd29e35dd9a9..c182b80d220f 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/external_config.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/external_config.py @@ -29,6 +29,33 @@ from google.cloud.bigquery.schema import SchemaField +class ExternalSourceFormat(object): + """The format for external data files.. + + Note that the set of allowed values for external data sources is different + than the set used for loading data (see + :class:`~google.cloud.bigquery.job.SourceFormat`). + """ + + CSV = 'CSV' + """Specifies CSV format.""" + + GOOGLE_SHEETS = 'GOOGLE_SHEETS' + """Specifies Google Sheets format.""" + + NEWLINE_DELIMITED_JSON = 'NEWLINE_DELIMITED_JSON' + """Specifies newline delimited JSON format.""" + + AVRO = 'AVRO' + """Specifies Avro format.""" + + DATASTORE_BACKUP = 'DATASTORE_BACKUP' + """Specifies datastore backup format""" + + BIGTABLE = 'BIGTABLE' + """Specifies Bigtable format.""" + + class BigtableColumn(object): """Options for a Bigtable column.""" @@ -516,9 +543,9 @@ def from_api_repr(cls, resource): class ExternalConfig(object): """Description of an external data source. - :type source_format: str - :param source_format: the format of the external data. See - the ``source_format`` property on this class. + Args: + source_format :class:`ExternalSourceFormat`: + See :attr:`source_format`. """ def __init__(self, source_format): @@ -531,8 +558,9 @@ def __init__(self, source_format): @property def source_format(self): - """See - https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query.tableDefinitions.(key).sourceFormat + """:class:`ExternalSourceFormat`: Format of external source. + + See https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#externalDataConfiguration.sourceFormat """ return self._properties['sourceFormat'] diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/job.py b/packages/google-cloud-bigquery/google/cloud/bigquery/job.py index 6b7bc3dfe14e..f5e437ffa86e 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/job.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/job.py @@ -166,7 +166,12 @@ class QueryPriority(object): class SourceFormat(object): - """The format of the data files. The default value is :attr:`CSV`.""" + """The format of the data files. The default value is :attr:`CSV`. + + Note that the set of allowed values for loading data is different + than the set used for external data sources (see + :class:`~google.cloud.bigquery.external_config.ExternalSourceFormat`). + """ CSV = 'CSV' """Specifies CSV format.""" From 04b12850c6075751ae6c58bd3dac5e0143d2ffee Mon Sep 17 00:00:00 2001 From: Tres Seaver Date: Wed, 25 Jul 2018 14:21:29 -0400 Subject: [PATCH 0475/2016] Make 'Table.location' read-only. (#5687) Documented as '[Output-only]' in the API reference. Closes #5686. --- .../google/cloud/bigquery/table.py | 12 ++----- .../tests/unit/test_client.py | 32 +++++++++++-------- .../tests/unit/test_table.py | 14 -------- 3 files changed, 21 insertions(+), 37 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py index 1739c1bdd7c6..97091ac05c9f 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py @@ -591,20 +591,12 @@ def friendly_name(self, value): @property def location(self): - """Union[str, None]: Location in which the table is hosted (defaults - to :data:`None`). + """Union[str, None]: Location in which the table is hosted - Raises: - ValueError: For invalid value types. + Defaults to :data:`None`. """ return self._properties.get('location') - @location.setter - def location(self, value): - if not isinstance(value, six.string_types) and value is not None: - raise ValueError("Pass a string, or None") - self._properties['location'] = value - @property def view_query(self): """Union[str, None]: SQL query defining the table as a view (defaults diff --git a/packages/google-cloud-bigquery/tests/unit/test_client.py b/packages/google-cloud-bigquery/tests/unit/test_client.py index c9a2e995f02e..20059c451769 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_client.py +++ b/packages/google-cloud-bigquery/tests/unit/test_client.py @@ -1110,7 +1110,10 @@ def test_update_table_w_query(self): 'tableId': self.TABLE_ID }, 'schema': schema_resource, - 'view': {'query': query, 'useLegacySql': True}, + 'view': { + 'query': query, + 'useLegacySql': True, + }, 'location': location, 'expirationTime': _millis(exp_time) } @@ -1118,31 +1121,34 @@ def test_update_table_w_query(self): client = self._make_one(project=self.PROJECT, credentials=creds) conn = client._connection = _make_connection(resource) table = Table(self.TABLE_REF, schema=schema) - table.location = location table.expires = exp_time table.view_query = query table.view_use_legacy_sql = True - updated_properties = ['schema', 'view_query', 'location', - 'expires', 'view_use_legacy_sql'] + updated_properties = [ + 'schema', 'view_query', 'expires', 'view_use_legacy_sql'] updated_table = client.update_table(table, updated_properties) + self.assertEqual(updated_table.schema, table.schema) + self.assertEqual(updated_table.view_query, table.view_query) + self.assertEqual(updated_table.expires, table.expires) + self.assertEqual( + updated_table.view_use_legacy_sql, table.view_use_legacy_sql) + self.assertEqual(updated_table.location, location) + conn.api_request.assert_called_once_with( method='PATCH', path='/%s' % path, data={ - 'view': {'query': query, 'useLegacySql': True}, - 'location': location, + 'view': { + 'query': query, + 'useLegacySql': True, + }, 'expirationTime': str(_millis(exp_time)), 'schema': schema_resource, }, - headers=None) - self.assertEqual(updated_table.schema, table.schema) - self.assertEqual(updated_table.view_query, table.view_query) - self.assertEqual(updated_table.location, table.location) - self.assertEqual(updated_table.expires, table.expires) - self.assertEqual( - updated_table.view_use_legacy_sql, table.view_use_legacy_sql) + headers=None, + ) def test_update_table_w_schema_None(self): # Simulate deleting schema: not sure if back-end will actually diff --git a/packages/google-cloud-bigquery/tests/unit/test_table.py b/packages/google-cloud-bigquery/tests/unit/test_table.py index 40691029084b..b4b7a2671d7b 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_table.py +++ b/packages/google-cloud-bigquery/tests/unit/test_table.py @@ -560,20 +560,6 @@ def test_friendly_name_setter(self): table.friendly_name = 'FRIENDLY' self.assertEqual(table.friendly_name, 'FRIENDLY') - def test_location_setter_bad_value(self): - dataset = DatasetReference(self.PROJECT, self.DS_ID) - table_ref = dataset.table(self.TABLE_NAME) - table = self._make_one(table_ref) - with self.assertRaises(ValueError): - table.location = 12345 - - def test_location_setter(self): - dataset = DatasetReference(self.PROJECT, self.DS_ID) - table_ref = dataset.table(self.TABLE_NAME) - table = self._make_one(table_ref) - table.location = 'LOCATION' - self.assertEqual(table.location, 'LOCATION') - def test_view_query_setter_bad_value(self): dataset = DatasetReference(self.PROJECT, self.DS_ID) table_ref = dataset.table(self.TABLE_NAME) From 5d878f00bb7fb2028778ce93376b3a8661101fb9 Mon Sep 17 00:00:00 2001 From: Tres Seaver Date: Wed, 25 Jul 2018 14:22:49 -0400 Subject: [PATCH 0476/2016] BigQuery add default location to client (#5678) * Add read-only 'Client.location' property. Settable via new 'location' argument to ctor. * Use 'Client.location' as default for 'Client._get_query_results'. * Use 'Client.location' as default for 'Client.get_job'. * Use 'Client.location' as default for 'Client.cancel_job'. * Use 'Client.location' as default for 'Client.load_table_from_uri'. * Use 'Client.location' as default for 'Client.load_table_from_file'. * Use 'Client.location' as default for 'Client.load_table_from_dataframe'. * Use 'Client.location' as default for 'Client.copy_table'. * Use 'Client.location' as default for 'Client.extract_table'. * Use 'Client.location' as default for 'Client.query'. * Use 'Client.location' as default default for 'create_dataset'. Closes #5148. --- .../google/cloud/bigquery/client.py | 89 ++- .../tests/unit/test_client.py | 566 +++++++++++++++--- 2 files changed, 578 insertions(+), 77 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py index 39f85fe35e37..4d7063e260ee 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py @@ -107,6 +107,8 @@ class Client(ClientWithProject): current object. This parameter should be considered private, and could change in the future. + location str: + (Optional) Default location for jobs / datasets / tables. Raises: google.auth.exceptions.DefaultCredentialsError: @@ -118,10 +120,17 @@ class Client(ClientWithProject): 'https://www.googleapis.com/auth/cloud-platform') """The scopes required for authenticating as a BigQuery consumer.""" - def __init__(self, project=None, credentials=None, _http=None): + def __init__( + self, project=None, credentials=None, _http=None, location=None): super(Client, self).__init__( project=project, credentials=credentials, _http=_http) self._connection = Connection(self) + self._location = location + + @property + def location(self): + """Default location for jobs / datasets / tables.""" + return self._location def get_service_account_email(self, project=None): """Get the email address of the project's BigQuery service account @@ -286,8 +295,14 @@ def create_dataset(self, dataset): """ path = '/projects/%s/datasets' % (dataset.project,) + + data = dataset.to_api_repr() + if data.get('location') is None and self.location is not None: + data['location'] = self.location + api_response = self._connection.api_request( - method='POST', path=path, data=dataset.to_api_repr()) + method='POST', path=path, data=data) + return Dataset.from_api_repr(api_response) def create_table(self, table): @@ -548,6 +563,9 @@ def _get_query_results( if timeout_ms is not None: extra_params['timeoutMs'] = timeout_ms + if location is None: + location = self.location + if location is not None: extra_params['location'] = location @@ -613,6 +631,10 @@ def get_job( if project is None: project = self.project + + if location is None: + location = self.location + if location is not None: extra_params['location'] = location @@ -652,6 +674,10 @@ def cancel_job( if project is None: project = self.project + + if location is None: + location = self.location + if location is not None: extra_params['location'] = location @@ -737,8 +763,12 @@ def list_jobs( extra_params=extra_params) def load_table_from_uri( - self, source_uris, destination, job_id=None, job_id_prefix=None, - location=None, project=None, job_config=None, + self, source_uris, destination, + job_id=None, + job_id_prefix=None, + location=None, + project=None, + job_config=None, retry=DEFAULT_RETRY): """Starts a job for loading data into a table from CloudStorage. @@ -773,14 +803,22 @@ def load_table_from_uri( google.cloud.bigquery.job.LoadJob: A new load job. """ job_id = _make_job_id(job_id, job_id_prefix) + if project is None: project = self.project + + if location is None: + location = self.location + job_ref = job._JobReference(job_id, project=project, location=location) + if isinstance(source_uris, six.string_types): source_uris = [source_uris] + load_job = job.LoadJob( job_ref, source_uris, destination, self, job_config) load_job._begin(retry=retry) + return load_job def load_table_from_file( @@ -831,14 +869,22 @@ def load_table_from_file( mode. """ job_id = _make_job_id(job_id, job_id_prefix) + if project is None: project = self.project + + if location is None: + location = self.location + job_ref = job._JobReference(job_id, project=project, location=location) load_job = job.LoadJob(job_ref, None, destination, self, job_config) job_resource = load_job._build_resource() + if rewind: file_obj.seek(0, os.SEEK_SET) + _check_mode(file_obj) + try: if size is None or size >= _MAX_MULTIPART_SIZE: response = self._do_resumable_upload( @@ -848,6 +894,7 @@ def load_table_from_file( file_obj, job_resource, size, num_retries) except resumable_media.InvalidResponse as exc: raise exceptions.from_http_response(exc.response) + return self.job_from_resource(response.json()) def load_table_from_dataframe(self, dataframe, destination, @@ -901,10 +948,19 @@ def load_table_from_dataframe(self, dataframe, destination, job_config = job.LoadJobConfig() job_config.source_format = job.SourceFormat.PARQUET + if location is None: + location = self.location + return self.load_table_from_file( - buffer, destination, num_retries=num_retries, rewind=True, - job_id=job_id, job_id_prefix=job_id_prefix, location=location, - project=project, job_config=job_config) + buffer, destination, + num_retries=num_retries, + rewind=True, + job_id=job_id, + job_id_prefix=job_id_prefix, + location=location, + project=project, + job_config=job_config, + ) def _do_resumable_upload(self, stream, metadata, num_retries): """Perform a resumable upload. @@ -1050,16 +1106,23 @@ def copy_table( google.cloud.bigquery.job.CopyJob: A new copy job instance. """ job_id = _make_job_id(job_id, job_id_prefix) + if project is None: project = self.project + + if location is None: + location = self.location + job_ref = job._JobReference(job_id, project=project, location=location) if not isinstance(sources, collections.Sequence): sources = [sources] + copy_job = job.CopyJob( job_ref, sources, destination, client=self, job_config=job_config) copy_job._begin(retry=retry) + return copy_job def extract_table( @@ -1103,8 +1166,13 @@ def extract_table( google.cloud.bigquery.job.ExtractJob: A new extract job instance. """ job_id = _make_job_id(job_id, job_id_prefix) + if project is None: project = self.project + + if location is None: + location = self.location + job_ref = job._JobReference(job_id, project=project, location=location) if isinstance(destination_uris, six.string_types): @@ -1114,6 +1182,7 @@ def extract_table( job_ref, source, destination_uris, client=self, job_config=job_config) extract_job._begin(retry=retry) + return extract_job def query( @@ -1149,12 +1218,18 @@ def query( google.cloud.bigquery.job.QueryJob: A new query job instance. """ job_id = _make_job_id(job_id, job_id_prefix) + if project is None: project = self.project + + if location is None: + location = self.location + job_ref = job._JobReference(job_id, project=project, location=location) query_job = job.QueryJob( job_ref, query, client=self, job_config=job_config) query_job._begin(retry=retry) + return query_job def insert_rows(self, table, rows, selected_fields=None, **kwargs): diff --git a/packages/google-cloud-bigquery/tests/unit/test_client.py b/packages/google-cloud-bigquery/tests/unit/test_client.py index 20059c451769..47f116d258ed 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_client.py +++ b/packages/google-cloud-bigquery/tests/unit/test_client.py @@ -59,6 +59,7 @@ class TestClient(unittest.TestCase): TABLE_ID = 'TABLE_ID' TABLE_REF = DatasetReference(PROJECT, DS_ID).table(TABLE_ID) KMS_KEY_NAME = 'projects/1/locations/global/keyRings/1/cryptoKeys/1' + LOCATION = 'us-central' @staticmethod def _get_target_class(): @@ -69,7 +70,7 @@ def _get_target_class(): def _make_one(self, *args, **kw): return self._get_target_class()(*args, **kw) - def test_ctor(self): + def test_ctor_defaults(self): from google.cloud.bigquery._http import Connection creds = _make_credentials() @@ -79,6 +80,20 @@ def test_ctor(self): self.assertIsInstance(client._connection, Connection) self.assertIs(client._connection.credentials, creds) self.assertIs(client._connection.http, http) + self.assertIsNone(client.location) + + def test_ctor_w_location(self): + from google.cloud.bigquery._http import Connection + + creds = _make_credentials() + http = object() + location = 'us-central' + client = self._make_one(project=self.PROJECT, credentials=creds, + _http=http, location=location) + self.assertIsInstance(client._connection, Connection) + self.assertIs(client._connection.credentials, creds) + self.assertIs(client._connection.http, http) + self.assertEqual(client.location, location) def test__get_query_results_miss_w_explicit_project_and_timeout(self): from google.cloud.exceptions import NotFound @@ -89,13 +104,32 @@ def test__get_query_results_miss_w_explicit_project_and_timeout(self): with self.assertRaises(NotFound): client._get_query_results( - 'nothere', None, project='other-project', location='US', + 'nothere', None, + project='other-project', + location=self.LOCATION, timeout_ms=500) conn.api_request.assert_called_once_with( method='GET', path='/projects/other-project/queries/nothere', - query_params={'maxResults': 0, 'timeoutMs': 500, 'location': 'US'}) + query_params={ + 'maxResults': 0, 'timeoutMs': 500, 'location': self.LOCATION}, + ) + + def test__get_query_results_miss_w_client_location(self): + from google.cloud.exceptions import NotFound + + creds = _make_credentials() + client = self._make_one(self.PROJECT, creds, location=self.LOCATION) + conn = client._connection = _make_connection() + + with self.assertRaises(NotFound): + client._get_query_results('nothere', None) + + conn.api_request.assert_called_once_with( + method='GET', + path='/projects/PROJECT/queries/nothere', + query_params={'maxResults': 0, 'location': self.LOCATION}) def test__get_query_results_hit(self): job_id = 'query_job' @@ -401,27 +435,37 @@ def test_create_dataset_minimal(self): PATH = 'projects/%s/datasets' % self.PROJECT RESOURCE = { - 'datasetReference': - {'projectId': self.PROJECT, 'datasetId': self.DS_ID}, + 'datasetReference': { + 'projectId': self.PROJECT, + 'datasetId': self.DS_ID, + }, 'etag': "etag", 'id': "%s:%s" % (self.PROJECT, self.DS_ID), } creds = _make_credentials() client = self._make_one(project=self.PROJECT, credentials=creds) conn = client._connection = _make_connection(RESOURCE) - ds = client.create_dataset(Dataset(client.dataset(self.DS_ID))) + + ds_ref = client.dataset(self.DS_ID) + before = Dataset(ds_ref) + + after = client.create_dataset(before) + + self.assertEqual(after.dataset_id, self.DS_ID) + self.assertEqual(after.project, self.PROJECT) + self.assertEqual(after.etag, RESOURCE['etag']) + self.assertEqual(after.full_dataset_id, RESOURCE['id']) + conn.api_request.assert_called_once_with( method='POST', path='/%s' % PATH, data={ - 'datasetReference': - {'projectId': self.PROJECT, 'datasetId': self.DS_ID}, + 'datasetReference': { + 'projectId': self.PROJECT, + 'datasetId': self.DS_ID, + }, 'labels': {}, }) - self.assertEqual(ds.dataset_id, self.DS_ID) - self.assertEqual(ds.project, self.PROJECT) - self.assertEqual(ds.etag, RESOURCE['etag']) - self.assertEqual(ds.full_dataset_id, RESOURCE['id']) def test_create_dataset_w_attrs(self): from google.cloud.bigquery.dataset import Dataset, AccessEntry @@ -438,8 +482,10 @@ def test_create_dataset_w_attrs(self): 'tableId': 'northern-hemisphere', } RESOURCE = { - 'datasetReference': - {'projectId': self.PROJECT, 'datasetId': self.DS_ID}, + 'datasetReference': { + 'projectId': self.PROJECT, + 'datasetId': self.DS_ID, + }, 'etag': "etag", 'id': "%s:%s" % (self.PROJECT, self.DS_ID), 'description': DESCRIPTION, @@ -449,45 +495,56 @@ def test_create_dataset_w_attrs(self): 'labels': LABELS, 'access': [ {'role': 'OWNER', 'userByEmail': USER_EMAIL}, - {'view': VIEW}], + {'view': VIEW}, + ], } creds = _make_credentials() client = self._make_one(project=self.PROJECT, credentials=creds) conn = client._connection = _make_connection(RESOURCE) - entries = [AccessEntry('OWNER', 'userByEmail', USER_EMAIL), - AccessEntry(None, 'view', VIEW)] - ds_arg = Dataset(client.dataset(self.DS_ID)) - ds_arg.access_entries = entries - ds_arg.description = DESCRIPTION - ds_arg.friendly_name = FRIENDLY_NAME - ds_arg.default_table_expiration_ms = 3600 - ds_arg.location = LOCATION - ds_arg.labels = LABELS - ds = client.create_dataset(ds_arg) + entries = [ + AccessEntry('OWNER', 'userByEmail', USER_EMAIL), + AccessEntry(None, 'view', VIEW), + ] + + ds_ref = client.dataset(self.DS_ID) + before = Dataset(ds_ref) + before.access_entries = entries + before.description = DESCRIPTION + before.friendly_name = FRIENDLY_NAME + before.default_table_expiration_ms = 3600 + before.location = LOCATION + before.labels = LABELS + + after = client.create_dataset(before) + + self.assertEqual(after.dataset_id, self.DS_ID) + self.assertEqual(after.project, self.PROJECT) + self.assertEqual(after.etag, RESOURCE['etag']) + self.assertEqual(after.full_dataset_id, RESOURCE['id']) + self.assertEqual(after.description, DESCRIPTION) + self.assertEqual(after.friendly_name, FRIENDLY_NAME) + self.assertEqual(after.location, LOCATION) + self.assertEqual(after.default_table_expiration_ms, 3600) + self.assertEqual(after.labels, LABELS) + conn.api_request.assert_called_once_with( method='POST', path='/%s' % PATH, data={ - 'datasetReference': - {'projectId': self.PROJECT, 'datasetId': self.DS_ID}, + 'datasetReference': { + 'projectId': self.PROJECT, + 'datasetId': self.DS_ID, + }, 'description': DESCRIPTION, 'friendlyName': FRIENDLY_NAME, 'location': LOCATION, 'defaultTableExpirationMs': '3600', 'access': [ {'role': 'OWNER', 'userByEmail': USER_EMAIL}, - {'view': VIEW}], + {'view': VIEW}, + ], 'labels': LABELS, }) - self.assertEqual(ds.dataset_id, self.DS_ID) - self.assertEqual(ds.project, self.PROJECT) - self.assertEqual(ds.etag, RESOURCE['etag']) - self.assertEqual(ds.full_dataset_id, RESOURCE['id']) - self.assertEqual(ds.description, DESCRIPTION) - self.assertEqual(ds.friendly_name, FRIENDLY_NAME) - self.assertEqual(ds.location, LOCATION) - self.assertEqual(ds.default_table_expiration_ms, 3600) - self.assertEqual(ds.labels, LABELS) def test_create_dataset_w_custom_property(self): # The library should handle sending properties to the API that are not @@ -503,25 +560,112 @@ def test_create_dataset_w_custom_property(self): creds = _make_credentials() client = self._make_one(project=self.PROJECT, credentials=creds) conn = client._connection = _make_connection(resource) - dataset = Dataset(client.dataset(self.DS_ID)) - dataset._properties['newAlphaProperty'] = 'unreleased property' - dataset = client.create_dataset(dataset) + ds_ref = client.dataset(self.DS_ID) + before = Dataset(ds_ref) + before._properties['newAlphaProperty'] = 'unreleased property' + + after = client.create_dataset(before) + + self.assertEqual(after.dataset_id, self.DS_ID) + self.assertEqual(after.project, self.PROJECT) + self.assertEqual( + after._properties['newAlphaProperty'], 'unreleased property') + conn.api_request.assert_called_once_with( method='POST', path=path, data={ - 'datasetReference': - {'projectId': self.PROJECT, 'datasetId': self.DS_ID}, + 'datasetReference': { + 'projectId': self.PROJECT, + 'datasetId': self.DS_ID, + }, 'newAlphaProperty': 'unreleased property', 'labels': {}, } ) - self.assertEqual(dataset.dataset_id, self.DS_ID) - self.assertEqual(dataset.project, self.PROJECT) - self.assertEqual( - dataset._properties['newAlphaProperty'], 'unreleased property') + def test_create_dataset_w_client_location_wo_dataset_location(self): + from google.cloud.bigquery.dataset import Dataset + + PATH = 'projects/%s/datasets' % self.PROJECT + RESOURCE = { + 'datasetReference': + {'projectId': self.PROJECT, 'datasetId': self.DS_ID}, + 'etag': "etag", + 'id': "%s:%s" % (self.PROJECT, self.DS_ID), + 'location': self.LOCATION, + } + creds = _make_credentials() + client = self._make_one( + project=self.PROJECT, credentials=creds, location=self.LOCATION) + conn = client._connection = _make_connection(RESOURCE) + + ds_ref = client.dataset(self.DS_ID) + before = Dataset(ds_ref) + + after = client.create_dataset(before) + + self.assertEqual(after.dataset_id, self.DS_ID) + self.assertEqual(after.project, self.PROJECT) + self.assertEqual(after.etag, RESOURCE['etag']) + self.assertEqual(after.full_dataset_id, RESOURCE['id']) + self.assertEqual(after.location, self.LOCATION) + + conn.api_request.assert_called_once_with( + method='POST', + path='/%s' % PATH, + data={ + 'datasetReference': { + 'projectId': self.PROJECT, + 'datasetId': self.DS_ID, + }, + 'labels': {}, + 'location': self.LOCATION, + }) + + def test_create_dataset_w_client_location_w_dataset_location(self): + from google.cloud.bigquery.dataset import Dataset + + PATH = 'projects/%s/datasets' % self.PROJECT + OTHER_LOCATION = 'EU' + RESOURCE = { + 'datasetReference': { + 'projectId': self.PROJECT, + 'datasetId': self.DS_ID, + }, + 'etag': "etag", + 'id': "%s:%s" % (self.PROJECT, self.DS_ID), + 'location': OTHER_LOCATION, + } + creds = _make_credentials() + client = self._make_one( + project=self.PROJECT, credentials=creds, location=self.LOCATION) + conn = client._connection = _make_connection(RESOURCE) + + ds_ref = client.dataset(self.DS_ID) + before = Dataset(ds_ref) + before.location = OTHER_LOCATION + + after = client.create_dataset(before) + + self.assertEqual(after.dataset_id, self.DS_ID) + self.assertEqual(after.project, self.PROJECT) + self.assertEqual(after.etag, RESOURCE['etag']) + self.assertEqual(after.full_dataset_id, RESOURCE['id']) + self.assertEqual(after.location, OTHER_LOCATION) + + conn.api_request.assert_called_once_with( + method='POST', + path='/%s' % PATH, + data={ + 'datasetReference': { + 'projectId': self.PROJECT, + 'datasetId': self.DS_ID, + }, + 'labels': {}, + 'location': OTHER_LOCATION, + }) def test_create_table_w_day_partition(self): from google.cloud.bigquery.table import Table @@ -1429,12 +1573,36 @@ def test_get_job_miss_w_explict_project(self): conn = client._connection = _make_connection() with self.assertRaises(NotFound): - client.get_job(JOB_ID, project=OTHER_PROJECT, location='EU') + client.get_job( + JOB_ID, project=OTHER_PROJECT, location=self.LOCATION) conn.api_request.assert_called_once_with( method='GET', path='/projects/OTHER_PROJECT/jobs/NONESUCH', - query_params={'projection': 'full', 'location': 'EU'}) + query_params={ + 'projection': 'full', + 'location': self.LOCATION, + }) + + def test_get_job_miss_w_client_location(self): + from google.cloud.exceptions import NotFound + + OTHER_PROJECT = 'OTHER_PROJECT' + JOB_ID = 'NONESUCH' + creds = _make_credentials() + client = self._make_one(self.PROJECT, creds, location=self.LOCATION) + conn = client._connection = _make_connection() + + with self.assertRaises(NotFound): + client.get_job(JOB_ID, project=OTHER_PROJECT) + + conn.api_request.assert_called_once_with( + method='GET', + path='/projects/OTHER_PROJECT/jobs/NONESUCH', + query_params={ + 'projection': 'full', + 'location': self.LOCATION, + }) def test_get_job_hit(self): from google.cloud.bigquery.job import CreateDisposition @@ -1480,7 +1648,8 @@ def test_get_job_hit(self): conn.api_request.assert_called_once_with( method='GET', path='/projects/PROJECT/jobs/query_job', - query_params={'projection': 'full'}) + query_params={'projection': 'full'}, + ) def test_cancel_job_miss_w_explict_project(self): from google.cloud.exceptions import NotFound @@ -1492,12 +1661,36 @@ def test_cancel_job_miss_w_explict_project(self): conn = client._connection = _make_connection() with self.assertRaises(NotFound): - client.cancel_job(JOB_ID, project=OTHER_PROJECT, location='EU') + client.cancel_job( + JOB_ID, project=OTHER_PROJECT, location=self.LOCATION) conn.api_request.assert_called_once_with( method='POST', path='/projects/OTHER_PROJECT/jobs/NONESUCH/cancel', - query_params={'projection': 'full', 'location': 'EU'}) + query_params={ + 'projection': 'full', + 'location': self.LOCATION, + }) + + def test_cancel_job_miss_w_client_location(self): + from google.cloud.exceptions import NotFound + + OTHER_PROJECT = 'OTHER_PROJECT' + JOB_ID = 'NONESUCH' + creds = _make_credentials() + client = self._make_one(self.PROJECT, creds, location=self.LOCATION) + conn = client._connection = _make_connection() + + with self.assertRaises(NotFound): + client.cancel_job(JOB_ID, project=OTHER_PROJECT) + + conn.api_request.assert_called_once_with( + method='POST', + path='/projects/OTHER_PROJECT/jobs/NONESUCH/cancel', + query_params={ + 'projection': 'full', + 'location': self.LOCATION, + }) def test_cancel_job_hit(self): from google.cloud.bigquery.job import QueryJob @@ -1842,7 +2035,7 @@ def test_load_table_from_uri_w_explicit_project(self): resource = { 'jobReference': { 'projectId': 'other-project', - 'location': 'US', + 'location': self.LOCATION, 'jobId': job_id, }, 'configuration': { @@ -1865,7 +2058,47 @@ def test_load_table_from_uri_w_explicit_project(self): client.load_table_from_uri( source_uri, destination, job_id=job_id, project='other-project', - location='US') + location=self.LOCATION) + + # Check that load_table_from_uri actually starts the job. + conn.api_request.assert_called_once_with( + method='POST', + path='/projects/other-project/jobs', + data=resource) + + def test_load_table_from_uri_w_client_location(self): + job_id = 'this-is-a-job-id' + destination_id = 'destination_table' + source_uri = 'gs://example/source.csv' + resource = { + 'jobReference': { + 'projectId': 'other-project', + 'location': self.LOCATION, + 'jobId': job_id, + }, + 'configuration': { + 'load': { + 'sourceUris': [source_uri], + 'destinationTable': { + 'projectId': self.PROJECT, + 'datasetId': self.DS_ID, + 'tableId': destination_id, + }, + }, + }, + } + creds = _make_credentials() + http = object() + client = self._make_one( + project=self.PROJECT, credentials=creds, _http=http, + location=self.LOCATION) + conn = client._connection = _make_connection(resource) + destination = client.dataset(self.DS_ID).table(destination_id) + + client.load_table_from_uri( + source_uri, destination, + job_id=job_id, + project='other-project') # Check that load_table_from_uri actually starts the job. conn.api_request.assert_called_once_with( @@ -2082,7 +2315,7 @@ def test_copy_table_w_explicit_project(self): resource = { 'jobReference': { 'projectId': 'other-project', - 'location': 'US', + 'location': self.LOCATION, 'jobId': job_id, }, 'configuration': { @@ -2113,13 +2346,61 @@ def test_copy_table_w_explicit_project(self): client.copy_table( source, destination, job_id=job_id, project='other-project', - location='US') + location=self.LOCATION) # Check that copy_table actually starts the job. conn.api_request.assert_called_once_with( method='POST', path='/projects/other-project/jobs', - data=resource) + data=resource, + ) + + def test_copy_table_w_client_location(self): + job_id = 'this-is-a-job-id' + source_id = 'source_table' + destination_id = 'destination_table' + resource = { + 'jobReference': { + 'projectId': 'other-project', + 'location': self.LOCATION, + 'jobId': job_id, + }, + 'configuration': { + 'copy': { + 'sourceTables': [ + { + 'projectId': self.PROJECT, + 'datasetId': self.DS_ID, + 'tableId': source_id, + }, + ], + 'destinationTable': { + 'projectId': self.PROJECT, + 'datasetId': self.DS_ID, + 'tableId': destination_id, + }, + }, + }, + } + creds = _make_credentials() + http = object() + client = self._make_one( + project=self.PROJECT, credentials=creds, _http=http, + location=self.LOCATION) + conn = client._connection = _make_connection(resource) + dataset = client.dataset(self.DS_ID) + source = dataset.table(source_id) + destination = dataset.table(destination_id) + + client.copy_table( + source, destination, job_id=job_id, project='other-project') + + # Check that copy_table actually starts the job. + conn.api_request.assert_called_once_with( + method='POST', + path='/projects/other-project/jobs', + data=resource, + ) def test_extract_table(self): from google.cloud.bigquery.job import ExtractJob @@ -2173,7 +2454,7 @@ def test_extract_table_w_explicit_project(self): resource = { 'jobReference': { 'projectId': 'other-project', - 'location': 'US', + 'location': self.LOCATION, 'jobId': job_id, }, 'configuration': { @@ -2197,13 +2478,54 @@ def test_extract_table_w_explicit_project(self): client.extract_table( source, destination, job_id=job_id, project='other-project', - location='US') + location=self.LOCATION) # Check that extract_table actually starts the job. conn.api_request.assert_called_once_with( method='POST', path='/projects/other-project/jobs', - data=resource) + data=resource, + ) + + def test_extract_table_w_client_location(self): + job_id = 'job_id' + source_id = 'source_table' + destination = 'gs://bucket_name/object_name' + resource = { + 'jobReference': { + 'projectId': 'other-project', + 'location': self.LOCATION, + 'jobId': job_id, + }, + 'configuration': { + 'extract': { + 'sourceTable': { + 'projectId': self.PROJECT, + 'datasetId': self.DS_ID, + 'tableId': source_id, + }, + 'destinationUris': [destination], + }, + }, + } + creds = _make_credentials() + http = object() + client = self._make_one( + project=self.PROJECT, credentials=creds, _http=http, + location=self.LOCATION) + conn = client._connection = _make_connection(resource) + dataset = client.dataset(self.DS_ID) + source = dataset.table(source_id) + + client.extract_table( + source, destination, job_id=job_id, project='other-project') + + # Check that extract_table actually starts the job. + conn.api_request.assert_called_once_with( + method='POST', + path='/projects/other-project/jobs', + data=resource, + ) def test_extract_table_generated_job_id(self): from google.cloud.bigquery.job import ExtractJob @@ -2357,7 +2679,7 @@ def test_query_w_explicit_project(self): resource = { 'jobReference': { 'projectId': 'other-project', - 'location': 'US', + 'location': self.LOCATION, 'jobId': job_id, }, 'configuration': { @@ -2374,13 +2696,48 @@ def test_query_w_explicit_project(self): conn = client._connection = _make_connection(resource) client.query( - query, job_id=job_id, project='other-project', location='US') + query, job_id=job_id, project='other-project', + location=self.LOCATION) # Check that query actually starts the job. conn.api_request.assert_called_once_with( method='POST', path='/projects/other-project/jobs', - data=resource) + data=resource, + ) + + def test_query_w_client_location(self): + job_id = 'some-job-id' + query = 'select count(*) from persons' + resource = { + 'jobReference': { + 'projectId': 'other-project', + 'location': self.LOCATION, + 'jobId': job_id, + }, + 'configuration': { + 'query': { + 'query': query, + 'useLegacySql': False, + }, + }, + } + creds = _make_credentials() + http = object() + client = self._make_one( + project=self.PROJECT, credentials=creds, _http=http, + location=self.LOCATION) + conn = client._connection = _make_connection(resource) + + client.query( + query, job_id=job_id, project='other-project') + + # Check that query actually starts the job. + conn.api_request.assert_called_once_with( + method='POST', + path='/projects/other-project/jobs', + data=resource, + ) def test_query_w_udf_resources(self): from google.cloud.bigquery.job import QueryJob @@ -3300,14 +3657,16 @@ class TestClientUpload(object): TABLE_REF = DatasetReference( 'project_id', 'test_dataset').table('test_table') + LOCATION = 'us-central' + @staticmethod - def _make_client(transport=None): + def _make_client(transport=None, location=None): from google.cloud.bigquery import _http from google.cloud.bigquery import client cl = client.Client(project='project_id', credentials=_make_credentials(), - _http=transport) + _http=transport, location=location) cl._connection = mock.create_autospec(_http.Connection, instance=True) return cl @@ -3393,11 +3752,33 @@ def test_load_table_from_file_w_explicit_project(self): with do_upload_patch as do_upload: client.load_table_from_file( file_obj, self.TABLE_REF, job_id='job_id', - project='other-project', location='US', + project='other-project', location=self.LOCATION, job_config=self._make_config()) expected_resource = copy.deepcopy(self.EXPECTED_CONFIGURATION) - expected_resource['jobReference']['location'] = 'US' + expected_resource['jobReference']['location'] = self.LOCATION + expected_resource['jobReference']['projectId'] = 'other-project' + do_upload.assert_called_once_with( + file_obj, + expected_resource, + _DEFAULT_NUM_RETRIES) + + def test_load_table_from_file_w_client_location(self): + from google.cloud.bigquery.client import _DEFAULT_NUM_RETRIES + + client = self._make_client(location=self.LOCATION) + file_obj = self._make_file_obj() + + do_upload_patch = self._make_do_upload_patch( + client, '_do_resumable_upload', self.EXPECTED_CONFIGURATION) + with do_upload_patch as do_upload: + client.load_table_from_file( + file_obj, self.TABLE_REF, job_id='job_id', + project='other-project', + job_config=self._make_config()) + + expected_resource = copy.deepcopy(self.EXPECTED_CONFIGURATION) + expected_resource['jobReference']['location'] = self.LOCATION expected_resource['jobReference']['projectId'] = 'other-project' do_upload.assert_called_once_with( file_obj, @@ -3573,6 +3954,43 @@ def test_load_table_from_dataframe(self): sent_config = load_table_from_file.mock_calls[0][2]['job_config'] assert sent_config.source_format == job.SourceFormat.PARQUET + @unittest.skipIf(pandas is None, 'Requires `pandas`') + @unittest.skipIf(pyarrow is None, 'Requires `pyarrow`') + def test_load_table_from_dataframe_w_client_location(self): + from google.cloud.bigquery.client import _DEFAULT_NUM_RETRIES + from google.cloud.bigquery import job + + client = self._make_client(location=self.LOCATION) + records = [ + {'name': 'Monty', 'age': 100}, + {'name': 'Python', 'age': 60}, + ] + dataframe = pandas.DataFrame(records) + + load_patch = mock.patch( + 'google.cloud.bigquery.client.Client.load_table_from_file', + autospec=True) + with load_patch as load_table_from_file: + client.load_table_from_dataframe(dataframe, self.TABLE_REF) + + load_table_from_file.assert_called_once_with( + client, mock.ANY, self.TABLE_REF, + num_retries=_DEFAULT_NUM_RETRIES, + rewind=True, job_id=None, + job_id_prefix=None, + location=self.LOCATION, + project=None, + job_config=mock.ANY, + ) + + sent_file = load_table_from_file.mock_calls[0][1][1] + sent_bytes = sent_file.getvalue() + assert isinstance(sent_bytes, bytes) + assert len(sent_bytes) > 0 + + sent_config = load_table_from_file.mock_calls[0][2]['job_config'] + assert sent_config.source_format == job.SourceFormat.PARQUET + @unittest.skipIf(pandas is None, 'Requires `pandas`') @unittest.skipIf(pyarrow is None, 'Requires `pyarrow`') def test_load_table_from_dataframe_w_custom_job_config(self): @@ -3592,12 +4010,20 @@ def test_load_table_from_dataframe_w_custom_job_config(self): autospec=True) with load_patch as load_table_from_file: client.load_table_from_dataframe( - dataframe, self.TABLE_REF, job_config=job_config) + dataframe, self.TABLE_REF, + job_config=job_config, + location=self.LOCATION) load_table_from_file.assert_called_once_with( - client, mock.ANY, self.TABLE_REF, num_retries=_DEFAULT_NUM_RETRIES, - rewind=True, job_id=None, job_id_prefix=None, location=None, - project=None, job_config=mock.ANY) + client, mock.ANY, self.TABLE_REF, + num_retries=_DEFAULT_NUM_RETRIES, + rewind=True, + job_id=None, + job_id_prefix=None, + location=self.LOCATION, + project=None, + job_config=mock.ANY, + ) sent_config = load_table_from_file.mock_calls[0][2]['job_config'] assert sent_config is job_config From e3f6df4ecb07a4793c727895b3d200a629daa07d Mon Sep 17 00:00:00 2001 From: alec-brooks Date: Fri, 27 Jul 2018 03:08:10 +1000 Subject: [PATCH 0477/2016] Fix typo in CopyJob sources docstring (#5690) --- packages/google-cloud-bigquery/google/cloud/bigquery/job.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/job.py b/packages/google-cloud-bigquery/google/cloud/bigquery/job.py index f5e437ffa86e..2d14a7d51ec2 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/job.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/job.py @@ -1393,7 +1393,7 @@ class CopyJob(_AsyncJob): :param job_id: the job's ID, within the project belonging to ``client``. :type sources: list of :class:`google.cloud.bigquery.table.TableReference` - :param sources: Table into which data is to be loaded. + :param sources: Table from which data is to be loaded. :type destination: :class:`google.cloud.bigquery.table.TableReference` :param destination: Table into which data is to be loaded. From 4d8f117c695247b8a0340fa645940362805dd77a Mon Sep 17 00:00:00 2001 From: Tres Seaver Date: Tue, 31 Jul 2018 14:17:43 -0400 Subject: [PATCH 0478/2016] Add 'clustering_fields' properties. (#5630) * Add 'Table.clustering_fields' property. * Add 'clustering_fields' support for load / query jobs. --- .../google/cloud/bigquery/_helpers.py | 36 ++++++++ .../google/cloud/bigquery/job.py | 91 +++++++++++++++++++ .../google/cloud/bigquery/table.py | 30 ++++++ .../google-cloud-bigquery/tests/system.py | 38 ++++++++ .../tests/unit/test__helpers.py | 23 +++++ .../tests/unit/test_job.py | 20 ++++ .../tests/unit/test_table.py | 32 +++++++ 7 files changed, 270 insertions(+) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py b/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py index 6ef89e14e93f..49039e65f7dd 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py @@ -417,6 +417,42 @@ def _set_sub_prop(container, keys, value): sub_val[keys[-1]] = value +def _del_sub_prop(container, keys): + """Remove a nested key fro a dictionary. + + Arguments: + container (dict): + A dictionary which may contain other dictionaries as values. + keys (iterable): + A sequence of keys to attempt to clear the value for. Each item in + the sequence represents a deeper nesting. The first key is for + the top level. If there is a dictionary there, the second key + attempts to get the value within that, and so on. + + Examples: + Remove a top-level value (equivalent to ``del container['key']``). + + >>> container = {'key': 'value'} + >>> _del_sub_prop(container, ['key']) + >>> container + {} + + Remove a nested value. + + >>> container = {'key': {'subkey': 'value'}} + >>> _del_sub_prop(container, ['key', 'subkey']) + >>> container + {'key': {}} + """ + sub_val = container + for key in keys[:-1]: + if key not in sub_val: + sub_val[key] = {} + sub_val = sub_val[key] + if keys[-1] in sub_val: + del sub_val[keys[-1]] + + def _int_or_none(value): """Helper: deserialize int value from JSON string.""" if isinstance(value, int): diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/job.py b/packages/google-cloud-bigquery/google/cloud/bigquery/job.py index 2d14a7d51ec2..a4e57ef3e884 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/job.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/job.py @@ -781,6 +781,27 @@ def _set_sub_prop(self, key, value): """ _helpers._set_sub_prop(self._properties, [self._job_type, key], value) + def _del_sub_prop(self, key): + """Reove ``key`` from the ``self._properties[self._job_type]`` dict. + + Most job properties are inside the dictionary related to the job type + (e.g. 'copy', 'extract', 'load', 'query'). Use this method to clear + those properties:: + + self._del_sub_prop('useLegacySql') + + This is equivalent to using the ``_helper._del_sub_prop`` function:: + + _helper._del_sub_prop( + self._properties, ['query', 'useLegacySql']) + + Arguments: + key (str): + Key to remove in the ``self._properties[self._job_type]`` + dictionary. + """ + _helpers._del_sub_prop(self._properties, [self._job_type, key]) + def to_api_repr(self): """Build an API representation of the job config. @@ -1051,6 +1072,34 @@ def time_partitioning(self, value): api_repr = value.to_api_repr() self._set_sub_prop('timePartitioning', api_repr) + @property + def clustering_fields(self): + """Union[List[str], None]: Fields defining clustering for the table + + (Defaults to :data:`None`). + + Clustering fields are immutable after table creation. + + .. note:: + + As of 2018-06-29, clustering fields cannot be set on a table + which does not also have time partioning defined. + """ + prop = self._get_sub_prop('clustering') + if prop is not None: + return list(prop.get('fields', ())) + + @clustering_fields.setter + def clustering_fields(self, value): + """Union[List[str], None]: Fields defining clustering for the table + + (Defaults to :data:`None`). + """ + if value is not None: + self._set_sub_prop('clustering', {'fields': value}) + else: + self._del_sub_prop('clustering') + @property def schema_update_options(self): """List[google.cloud.bigquery.job.SchemaUpdateOption]: Specifies @@ -1217,6 +1266,13 @@ def time_partitioning(self): """ return self._configuration.time_partitioning + @property + def clustering_fields(self): + """See + :attr:`google.cloud.bigquery.job.LoadJobConfig.clustering_fields`. + """ + return self._configuration.clustering_fields + @property def schema_update_options(self): """See @@ -2037,6 +2093,34 @@ def time_partitioning(self, value): api_repr = value.to_api_repr() self._set_sub_prop('timePartitioning', api_repr) + @property + def clustering_fields(self): + """Union[List[str], None]: Fields defining clustering for the table + + (Defaults to :data:`None`). + + Clustering fields are immutable after table creation. + + .. note:: + + As of 2018-06-29, clustering fields cannot be set on a table + which does not also have time partioning defined. + """ + prop = self._get_sub_prop('clustering') + if prop is not None: + return list(prop.get('fields', ())) + + @clustering_fields.setter + def clustering_fields(self, value): + """Union[List[str], None]: Fields defining clustering for the table + + (Defaults to :data:`None`). + """ + if value is not None: + self._set_sub_prop('clustering', {'fields': value}) + else: + self._del_sub_prop('clustering') + @property def schema_update_options(self): """List[google.cloud.bigquery.job.SchemaUpdateOption]: Specifies @@ -2227,6 +2311,13 @@ def time_partitioning(self): """ return self._configuration.time_partitioning + @property + def clustering_fields(self): + """See + :attr:`google.cloud.bigquery.job.QueryJobConfig.clustering_fields`. + """ + return self._configuration.clustering_fields + @property def schema_update_options(self): """See diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py index 97091ac05c9f..8397bd89fef9 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py @@ -537,6 +537,36 @@ def partition_expiration(self, value): 'type': TimePartitioningType.DAY} self._properties['timePartitioning']['expirationMs'] = str(value) + @property + def clustering_fields(self): + """Union[List[str], None]: Fields defining clustering for the table + + (Defaults to :data:`None`). + + Clustering fields are immutable after table creation. + + .. note:: + + As of 2018-06-29, clustering fields cannot be set on a table + which does not also have time partioning defined. + """ + prop = self._properties.get('clustering') + if prop is not None: + return list(prop.get('fields', ())) + + @clustering_fields.setter + def clustering_fields(self, value): + """Union[List[str], None]: Fields defining clustering for the table + + (Defaults to :data:`None`). + """ + if value is not None: + prop = self._properties.setdefault('clustering', {}) + prop['fields'] = value + else: + if 'clustering' in self._properties: + del self._properties['clustering'] + @property def description(self): """Union[str, None]: Description of the table (defaults to diff --git a/packages/google-cloud-bigquery/tests/system.py b/packages/google-cloud-bigquery/tests/system.py index 6d9cfc464912..cdc0ad9adea7 100644 --- a/packages/google-cloud-bigquery/tests/system.py +++ b/packages/google-cloud-bigquery/tests/system.py @@ -69,6 +69,19 @@ bigquery.SchemaField('full_name', 'STRING', mode='REQUIRED'), bigquery.SchemaField('age', 'INTEGER', mode='REQUIRED'), ] +TIME_PARTITIONING_CLUSTERING_FIELDS_SCHEMA = [ + bigquery.SchemaField('transaction_time', 'TIMESTAMP', mode='REQUIRED'), + bigquery.SchemaField('transaction_id', 'INTEGER', mode='REQUIRED'), + bigquery.SchemaField('user_email', 'STRING', mode='REQUIRED'), + bigquery.SchemaField('store_code', 'STRING', mode='REQUIRED'), + bigquery.SchemaField( + 'items', 'RECORD', mode='REPEATED', fields=[ + bigquery.SchemaField('item_code', 'STRING', mode='REQUIRED'), + bigquery.SchemaField('quantity', 'INTEGER', mode='REQUIRED'), + bigquery.SchemaField('comments', 'STRING', mode='NULLABLE'), + bigquery.SchemaField('expiration_date', 'DATE', mode='REQUIRED'), + ]), +] def _has_rows(result): @@ -245,6 +258,31 @@ def test_create_table(self): self.assertTrue(_table_exists(table)) self.assertEqual(table.table_id, table_id) + def test_create_table_w_time_partitioning_w_clustering_fields(self): + from google.cloud.bigquery.table import TimePartitioning + from google.cloud.bigquery.table import TimePartitioningType + + dataset = self.temp_dataset(_make_dataset_id('create_table_tp_cf')) + table_id = 'test_table' + table_arg = Table( + dataset.table(table_id), + schema=TIME_PARTITIONING_CLUSTERING_FIELDS_SCHEMA) + self.assertFalse(_table_exists(table_arg)) + + table_arg.time_partitioning = TimePartitioning( + field='transaction_time') + + table_arg.clustering_fields = ['user_email', 'store_code'] + table = retry_403(Config.CLIENT.create_table)(table_arg) + self.to_delete.insert(0, table) + + self.assertTrue(_table_exists(table)) + self.assertEqual(table.table_id, table_id) + time_partitioning = table.time_partitioning + self.assertEqual(time_partitioning.type_, TimePartitioningType.DAY) + self.assertEqual(time_partitioning.field, 'transaction_time') + self.assertEqual(table.clustering_fields, ['user_email', 'store_code']) + def test_delete_dataset_delete_contents_true(self): dataset_id = _make_dataset_id('delete_table_true') dataset = retry_403(Config.CLIENT.create_dataset)( diff --git a/packages/google-cloud-bigquery/tests/unit/test__helpers.py b/packages/google-cloud-bigquery/tests/unit/test__helpers.py index b70b81b1bea7..677a1a427661 100644 --- a/packages/google-cloud-bigquery/tests/unit/test__helpers.py +++ b/packages/google-cloud-bigquery/tests/unit/test__helpers.py @@ -860,6 +860,29 @@ def test_w_nested_keys_existing_value(self): self.assertEqual(container, {'key1': {'key2': {'key3': 'after'}}}) +class Test__del_sub_prop(unittest.TestCase): + + def _call_fut(self, container, keys): + from google.cloud.bigquery._helpers import _del_sub_prop + + return _del_sub_prop(container, keys) + + def test_w_single_key(self): + container = {'key1': 'value'} + self._call_fut(container, ['key1']) + self.assertEqual(container, {}) + + def test_w_empty_container_nested_keys(self): + container = {} + self._call_fut(container, ['key1', 'key2', 'key3']) + self.assertEqual(container, {'key1': {'key2': {}}}) + + def test_w_existing_value_nested_keys(self): + container = {'key1': {'key2': {'key3': 'value'}}} + self._call_fut(container, ['key1', 'key2', 'key3']) + self.assertEqual(container, {'key1': {'key2': {}}}) + + class Test__int_or_none(unittest.TestCase): def _call_fut(self, value): diff --git a/packages/google-cloud-bigquery/tests/unit/test_job.py b/packages/google-cloud-bigquery/tests/unit/test_job.py index 4a7141d29d7d..0df830a2d9e7 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_job.py +++ b/packages/google-cloud-bigquery/tests/unit/test_job.py @@ -1141,6 +1141,15 @@ def test_time_partitioning(self): config.time_partitioning = None self.assertIsNone(config.time_partitioning) + def test_clustering_fields(self): + fields = ['email', 'postal_code'] + config = self._get_target_class()() + config.clustering_fields = fields + self.assertEqual(config.clustering_fields, fields) + + config.clustering_fields = None + self.assertIsNone(config.clustering_fields) + def test_api_repr(self): resource = self._make_resource() config = self._get_target_class().from_api_repr(resource) @@ -1347,6 +1356,7 @@ def test_ctor(self): self.assertIsNone(job.write_disposition) self.assertIsNone(job.destination_encryption_configuration) self.assertIsNone(job.time_partitioning) + self.assertIsNone(job.clustering_fields) self.assertIsNone(job.schema_update_options) def test_ctor_w_config(self): @@ -2787,6 +2797,15 @@ def test_time_partitioning(self): config.time_partitioning = None self.assertIsNone(config.time_partitioning) + def test_clustering_fields(self): + fields = ['email', 'postal_code'] + config = self._get_target_class()() + config.clustering_fields = fields + self.assertEqual(config.clustering_fields, fields) + + config.clustering_fields = None + self.assertIsNone(config.clustering_fields) + def test_from_api_repr_empty(self): klass = self._get_target_class() config = klass.from_api_repr({}) @@ -3076,6 +3095,7 @@ def test_ctor_defaults(self): self.assertIsNone(job.table_definitions) self.assertIsNone(job.destination_encryption_configuration) self.assertIsNone(job.time_partitioning) + self.assertIsNone(job.clustering_fields) self.assertIsNone(job.schema_update_options) def test_ctor_w_udf_resources(self): diff --git a/packages/google-cloud-bigquery/tests/unit/test_table.py b/packages/google-cloud-bigquery/tests/unit/test_table.py index b4b7a2671d7b..9e7b7340ac6f 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_table.py +++ b/packages/google-cloud-bigquery/tests/unit/test_table.py @@ -398,6 +398,8 @@ def test_ctor(self): self.assertIsNone(table.external_data_configuration) self.assertEquals(table.labels, {}) self.assertIsNone(table.encryption_configuration) + self.assertIsNone(table.time_partitioning) + self.assertIsNone(table.clustering_fields) def test_ctor_w_schema(self): from google.cloud.bigquery.table import SchemaField @@ -859,6 +861,36 @@ def test_partition_expiration_setter(self): assert warn_patch.called + def test_clustering_fields_setter_w_fields(self): + dataset = DatasetReference(self.PROJECT, self.DS_ID) + table_ref = dataset.table(self.TABLE_NAME) + table = self._make_one(table_ref) + fields = ['email', 'phone'] + + table.clustering_fields = fields + self.assertEqual(table.clustering_fields, fields) + self.assertEqual(table._properties['clustering'], {'fields': fields}) + + def test_clustering_fields_setter_w_none(self): + dataset = DatasetReference(self.PROJECT, self.DS_ID) + table_ref = dataset.table(self.TABLE_NAME) + table = self._make_one(table_ref) + fields = ['email', 'phone'] + + table._properties['clustering'] = {'fields': fields} + table.clustering_fields = None + self.assertEqual(table.clustering_fields, None) + self.assertFalse('clustering' in table._properties) + + def test_clustering_fields_setter_w_none_noop(self): + dataset = DatasetReference(self.PROJECT, self.DS_ID) + table_ref = dataset.table(self.TABLE_NAME) + table = self._make_one(table_ref) + + table.clustering_fields = None + self.assertEqual(table.clustering_fields, None) + self.assertFalse('clustering' in table._properties) + def test_encryption_configuration_setter(self): from google.cloud.bigquery.table import EncryptionConfiguration dataset = DatasetReference(self.PROJECT, self.DS_ID) From 2cc534a1e3a5a3d7c8bd82e660835d4004b9c898 Mon Sep 17 00:00:00 2001 From: Tres Seaver Date: Wed, 1 Aug 2018 19:47:51 -0400 Subject: [PATCH 0479/2016] Add support/tests for loading tables from 'gzip.GzipFile'. (#5711) Closes #5276. --- .../google/cloud/bigquery/client.py | 15 +++++--- .../tests/unit/test_client.py | 34 +++++++++++++++++++ 2 files changed, 45 insertions(+), 4 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py index 4d7063e260ee..89a26ab18743 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py @@ -18,6 +18,7 @@ import collections import functools +import gzip import os import uuid @@ -1593,10 +1594,16 @@ def _check_mode(stream): """ mode = getattr(stream, 'mode', None) - if mode is not None and mode not in ('rb', 'r+b', 'rb+'): - raise ValueError( - "Cannot upload files opened in text mode: use " - "open(filename, mode='rb') or open(filename, mode='r+b')") + if isinstance(stream, gzip.GzipFile): + if mode != gzip.READ: + raise ValueError( + "Cannot upload gzip files opened in write mode: use " + "gzip.GzipFile(filename, mode='rb')") + else: + if mode is not None and mode not in ('rb', 'r+b', 'rb+'): + raise ValueError( + "Cannot upload files opened in text mode: use " + "open(filename, mode='rb') or open(filename, mode='r+b')") def _get_upload_headers(user_agent): diff --git a/packages/google-cloud-bigquery/tests/unit/test_client.py b/packages/google-cloud-bigquery/tests/unit/test_client.py index 47f116d258ed..85f882eee8da 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_client.py +++ b/packages/google-cloud-bigquery/tests/unit/test_client.py @@ -16,6 +16,7 @@ import datetime import decimal import email +import gzip import io import json import unittest @@ -3712,6 +3713,12 @@ def _make_do_upload_patch(cls, client, method, def _make_file_obj(): return io.BytesIO(b'hello, is it me you\'re looking for?') + def _make_gzip_file_obj(self, writable): + if writable: + return gzip.GzipFile(mode='w', fileobj=io.BytesIO()) + else: + return gzip.GzipFile(mode='r', fileobj=self._make_file_obj()) + @staticmethod def _make_config(): from google.cloud.bigquery.job import LoadJobConfig @@ -3892,6 +3899,33 @@ def test_load_table_from_file_with_rewind(self): assert file_obj.tell() == 0 + def test_load_table_from_file_with_readable_gzip(self): + from google.cloud.bigquery.client import _DEFAULT_NUM_RETRIES + + client = self._make_client() + gzip_file = self._make_gzip_file_obj(writable=False) + + do_upload_patch = self._make_do_upload_patch( + client, '_do_resumable_upload', self.EXPECTED_CONFIGURATION) + with do_upload_patch as do_upload: + client.load_table_from_file( + gzip_file, self.TABLE_REF, job_id='job_id', + job_config=self._make_config()) + + do_upload.assert_called_once_with( + gzip_file, + self.EXPECTED_CONFIGURATION, + _DEFAULT_NUM_RETRIES) + + def test_load_table_from_file_with_writable_gzip(self): + client = self._make_client() + gzip_file = self._make_gzip_file_obj(writable=True) + + with pytest.raises(ValueError): + client.load_table_from_file( + gzip_file, self.TABLE_REF, job_id='job_id', + job_config=self._make_config()) + def test_load_table_from_file_failure(self): from google.resumable_media import InvalidResponse from google.cloud import exceptions From 79029f3c0004c53fe55ff6346ee20959c4148bef Mon Sep 17 00:00:00 2001 From: Tres Seaver Date: Thu, 2 Aug 2018 11:57:32 -0400 Subject: [PATCH 0480/2016] Reenable systests for 'dataset.update'/'table.update'. (#5732) Reverts #5590. Closes #5588. Closes #5589. --- packages/google-cloud-bigquery/tests/system.py | 12 ------------ 1 file changed, 12 deletions(-) diff --git a/packages/google-cloud-bigquery/tests/system.py b/packages/google-cloud-bigquery/tests/system.py index cdc0ad9adea7..179b75a8695e 100644 --- a/packages/google-cloud-bigquery/tests/system.py +++ b/packages/google-cloud-bigquery/tests/system.py @@ -187,10 +187,6 @@ def test_get_dataset(self): self.assertEqual(got.friendly_name, 'Friendly') self.assertEqual(got.description, 'Description') - @pytest.mark.skip(reason=( - 'update_dataset() is flaky ' - 'https://github.com/GoogleCloudPlatform/google-cloud-python/issues/' - '5588')) def test_update_dataset(self): dataset = self.temp_dataset(_make_dataset_id('update_dataset')) self.assertTrue(_dataset_exists(dataset)) @@ -360,10 +356,6 @@ def test_list_tables(self): table.dataset_id == DATASET_ID)] self.assertEqual(len(created), len(tables_to_create)) - @pytest.mark.skip(reason=( - 'update_table() is flaky ' - 'https://github.com/GoogleCloudPlatform/google-cloud-python/issues/' - '5589')) def test_update_table(self): dataset = self.temp_dataset(_make_dataset_id('update_table')) @@ -403,10 +395,6 @@ def test_update_table(self): with self.assertRaises(PreconditionFailed): Config.CLIENT.update_table(table2, ['description']) - @pytest.mark.skip(reason=( - 'update_table() is flaky ' - 'https://github.com/GoogleCloudPlatform/google-cloud-python/issues/' - '5589')) def test_update_table_schema(self): dataset = self.temp_dataset(_make_dataset_id('update_table')) From 991471a310725bcd6f2b20a403d8ed89e6c5101d Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Thu, 2 Aug 2018 12:16:15 -0700 Subject: [PATCH 0481/2016] Release 1.5.0 (#5735) --- packages/google-cloud-bigquery/CHANGELOG.md | 24 +++++++++++++++++++++ packages/google-cloud-bigquery/setup.py | 2 +- 2 files changed, 25 insertions(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/CHANGELOG.md b/packages/google-cloud-bigquery/CHANGELOG.md index 9afc48a3f57e..a8daa2bacd04 100644 --- a/packages/google-cloud-bigquery/CHANGELOG.md +++ b/packages/google-cloud-bigquery/CHANGELOG.md @@ -4,6 +4,30 @@ [1]: https://pypi.org/project/google-cloud-bigquery/#history +## 1.5.0 + +### Implementation Changes + +- Make 'Table.location' read-only. (#5687) + +### New Features + +- Add 'clustering_fields' properties. (#5630) +- Add support for job labels (#5654) +- Add 'QueryJob.estimated_bytes_processed' property (#5655) +- Add support/tests for loading tables from 'gzip.GzipFile'. (#5711) +- Add 'ExternalSourceFormat' enum. (#5674) +- Add default location to client (#5678) + +### Documentation + +- Fix typo in CopyJob sources docstring (#5690) + +### Internal / Testing Changes + +- Add/refactor snippets for managing BigQuery jobs (#5631) +- Reenable systests for 'dataset.update'/'table.update'. (#5732) + ## 1.4.0 ### Implementation Changes diff --git a/packages/google-cloud-bigquery/setup.py b/packages/google-cloud-bigquery/setup.py index c9fbaeea1488..16f018dd1172 100644 --- a/packages/google-cloud-bigquery/setup.py +++ b/packages/google-cloud-bigquery/setup.py @@ -22,7 +22,7 @@ name = 'google-cloud-bigquery' description = 'Google BigQuery API client library' -version = '1.4.0' +version = '1.5.0' # Should be one of: # 'Development Status :: 3 - Alpha' # 'Development Status :: 4 - Beta' From b04aa52816dd537f00436f5bca8506929ebfcbaf Mon Sep 17 00:00:00 2001 From: Alix Hamilton Date: Mon, 6 Aug 2018 11:21:47 -0700 Subject: [PATCH 0482/2016] Update external_config docstrings to google style and fixes typos (#5713) --- .../google/cloud/bigquery/external_config.py | 189 ++++++++++-------- 1 file changed, 105 insertions(+), 84 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/external_config.py b/packages/google-cloud-bigquery/google/cloud/bigquery/external_config.py index c182b80d220f..1c7b055e2953 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/external_config.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/external_config.py @@ -30,7 +30,7 @@ class ExternalSourceFormat(object): - """The format for external data files.. + """The format for external data files. Note that the set of allowed values for external data sources is different than the set used for loading data (see @@ -157,21 +157,26 @@ def type_(self, value): def to_api_repr(self): """Build an API representation of this object. - :rtype: dict - :returns: A dictionary in the format used by the BigQuery API. + Returns: + Dict[str, Any]: + A dictionary in the format used by the BigQuery API. """ return copy.deepcopy(self._properties) @classmethod def from_api_repr(cls, resource): - """Factory: construct a BigtableColumn given its API representation + """Factory: construct a :class:`~.external_config.BigtableColumn` + instance given its API representation. - :type resource: dict - :param resource: - A column in the same representation as is returned from the API. + Args: + resource (Dict[str, Any]): + Definition of a :class:`~.external_config.BigtableColumn` + instance in the same representation as is returned from the + API. - :rtype: :class:`~google.cloud.bigquery.BigtableColumn` - :returns: Configuration parsed from ``resource``. + Returns: + :class:`~.external_config.BigtableColumn`: + Configuration parsed from ``resource``. """ config = cls() config._properties = copy.deepcopy(resource) @@ -243,8 +248,8 @@ def type_(self, value): @property def columns(self): - """List[google.cloud.bigquery.external_config.BigtableColumn]: Lists of - columns that should be exposed as individual fields + """List[:class:`~.external_config.BigtableColumn`]: Lists of columns + that should be exposed as individual fields. See https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query.tableDefinitions.(key).bigtableOptions.columnFamilies.columns @@ -260,24 +265,26 @@ def columns(self, value): def to_api_repr(self): """Build an API representation of this object. - :rtype: dict - :returns: A dictionary in the format used by the BigQuery API. + Returns: + Dict[str, Any]: + A dictionary in the format used by the BigQuery API. """ return copy.deepcopy(self._properties) @classmethod def from_api_repr(cls, resource): - """Factory: construct a BigtableColumnFamily given its - API representation + """Factory: construct a :class:`~.external_config.BigtableColumnFamily` + instance given its API representation. - :type resource: dict - :param resource: - A column family in the same representation as is returned - from the API. + Args: + resource (Dict[str, Any]): + Definition of a :class:`~.external_config.BigtableColumnFamily` + instance in the same representation as is returned from the + API. - :rtype: - :class:`~google.cloud.bigquery.external_config.BigtableColumnFamily` - :returns: Configuration parsed from ``resource``. + Returns: + :class:`~.external_config.BigtableColumnFamily`: + Configuration parsed from ``resource``. """ config = cls() config._properties = copy.deepcopy(resource) @@ -285,8 +292,8 @@ def from_api_repr(cls, resource): class BigtableOptions(object): - """Options that describe how to treat Bigtable tables - as BigQuery tables.""" + """Options that describe how to treat Bigtable tables as BigQuery tables. + """ _SOURCE_FORMAT = 'BIGTABLE' _RESOURCE_NAME = 'bigtableOptions' @@ -296,8 +303,8 @@ def __init__(self): @property def ignore_unspecified_column_families(self): - """bool: If `True`, ignore columns not specified in columnFamilies - list. Defaults to `False`. + """bool: If :data:`True`, ignore columns not specified in + :attr:`column_families` list. Defaults to :data:`False`. See https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query.tableDefinitions.(key).bigtableOptions.ignoreUnspecifiedColumnFamilies @@ -311,8 +318,8 @@ def ignore_unspecified_column_families(self, value): @property def read_rowkey_as_string(self): - """bool: If `True`, rowkey column families will be read and converted - to string. Defaults to `False`. + """bool: If :data:`True`, rowkey column families will be read and + converted to string. Defaults to :data:`False`. See https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query.tableDefinitions.(key).bigtableOptions.readRowkeyAsString @@ -326,9 +333,8 @@ def read_rowkey_as_string(self, value): @property def column_families(self): - """List[google.cloud.bigquery.external_config.BigtableColumnFamily]: - List of column families to expose in the table schema along with their - types. + """List[:class:`~.external_config.BigtableColumnFamily`]: List of + column families to expose in the table schema along with their types. See https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query.tableDefinitions.(key).bigtableOptions.columnFamilies @@ -344,23 +350,26 @@ def column_families(self, value): def to_api_repr(self): """Build an API representation of this object. - :rtype: dict - :returns: A dictionary in the format used by the BigQuery API. + Returns: + Dict[str, Any]: + A dictionary in the format used by the BigQuery API. """ return copy.deepcopy(self._properties) @classmethod def from_api_repr(cls, resource): - """Factory: construct a BigtableOptions given its API representation + """Factory: construct a :class:`~.external_config.BigtableOptions` + instance given its API representation. - :type resource: dict - :param resource: - A BigtableOptions in the same representation as is returned - from the API. + Args: + resource (Dict[str, Any]): + Definition of a :class:`~.external_config.BigtableOptions` + instance in the same representation as is returned from the + API. - :rtype: - :class:`~google.cloud.bigquery.external_config.BigtableOptions` - :returns: Configuration parsed from ``resource``. + Returns: + :class:`~.external_config.BigtableOptions`: + Configuration parsed from ``resource``. """ config = cls() config._properties = copy.deepcopy(resource) @@ -378,8 +387,8 @@ def __init__(self): @property def allow_jagged_rows(self): - """bool: If `True`, BigQuery treats missing trailing columns as null - values. Defaults to `False`. + """bool: If :data:`True`, BigQuery treats missing trailing columns as + null values. Defaults to :data:`False`. See https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query.tableDefinitions.(key).csvOptions.allowJaggedRows @@ -393,8 +402,8 @@ def allow_jagged_rows(self, value): @property def allow_quoted_newlines(self): - """bool: If `True`, quoted data sections that contain newline - characters in a CSV file are allowed. Defaults to `False`. + """bool: If :data:`True`, quoted data sections that contain newline + characters in a CSV file are allowed. Defaults to :data:`False`. See https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query.tableDefinitions.(key).csvOptions.allowQuotedNewlines @@ -422,7 +431,7 @@ def encoding(self, value): @property def field_delimiter(self): - """str: The separator for fields in a CSV file. Defaults a comma (','). + """str: The separator for fields in a CSV file. Defaults to comma (','). See https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query.tableDefinitions.(key).csvOptions.fieldDelimiter @@ -465,22 +474,26 @@ def skip_leading_rows(self, value): def to_api_repr(self): """Build an API representation of this object. - :rtype: dict - :returns: A dictionary in the format used by the BigQuery API. + Returns: + Dict[str, Any]: + A dictionary in the format used by the BigQuery API. """ return copy.deepcopy(self._properties) @classmethod def from_api_repr(cls, resource): - """Factory: construct a CSVOptions given its API representation + """Factory: construct a :class:`~.external_config.CSVOptions` instance + given its API representation. - :type resource: dict - :param resource: - A CSVOptions in the same representation as is - returned from the API. + Args: + resource (Dict[str, Any]): + Definition of a :class:`~.external_config.CSVOptions` + instance in the same representation as is returned from the + API. - :rtype: :class:`~google.cloud.bigquery.external_config.CSVOptions` - :returns: Configuration parsed from ``resource``. + Returns: + :class:`~.external_config.CSVOptions`: + Configuration parsed from ``resource``. """ config = cls() config._properties = copy.deepcopy(resource) @@ -514,23 +527,26 @@ def skip_leading_rows(self, value): def to_api_repr(self): """Build an API representation of this object. - :rtype: dict - :returns: A dictionary in the format used by the BigQuery API. + Returns: + Dict[str, Any]: + A dictionary in the format used by the BigQuery API. """ return copy.deepcopy(self._properties) @classmethod def from_api_repr(cls, resource): - """Factory: construct a GoogleSheetsOptions given its API representation + """Factory: construct a :class:`~.external_config.GoogleSheetsOptions` + instance given its API representation. - :type resource: dict - :param resource: - An GoogleSheetsOptions in the same representation as is - returned from the API. + Args: + resource (Dict[str, Any]): + Definition of a :class:`~.external_config.GoogleSheetsOptions` + instance in the same representation as is returned from the + API. - :rtype: - :class:`~google.cloud.bigquery.external_config.GoogleSheetsOptions` - :returns: Configuration parsed from ``resource``. + Returns: + :class:`~.external_config.GoogleSheetsOptions`: + Configuration parsed from ``resource``. """ config = cls() config._properties = copy.deepcopy(resource) @@ -544,7 +560,7 @@ class ExternalConfig(object): """Description of an external data source. Args: - source_format :class:`ExternalSourceFormat`: + source_format (:class:`~.external_config.ExternalSourceFormat`): See :attr:`source_format`. """ @@ -558,7 +574,8 @@ def __init__(self, source_format): @property def source_format(self): - """:class:`ExternalSourceFormat`: Format of external source. + """:class:`~.external_config.ExternalSourceFormat`: + Format of external source. See https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#externalDataConfiguration.sourceFormat @@ -567,12 +584,12 @@ def source_format(self): @property def options(self): - """Source-specific options.""" + """Dict[str, Any]: Source-specific options.""" return self._options @property def autodetect(self): - """bool: If `True`, try to detect schema and format options + """bool: If :data:`True`, try to detect schema and format options automatically. See @@ -601,8 +618,8 @@ def compression(self, value): @property def ignore_unknown_values(self): - """bool: If `True`, extra values that are not represented in the table - schema are ignored. Defaults to `False`. + """bool: If :data:`True`, extra values that are not represented in the + table schema are ignored. Defaults to :data:`False`. See https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query.tableDefinitions.(key).ignoreUnknownValues @@ -645,8 +662,8 @@ def source_uris(self, value): @property def schema(self): - """List[google.cloud.bigquery.schema.SchemaField]: The schema for the - data. + """List[:class:`~google.cloud.bigquery.schema.SchemaField`]: The schema + for the data. See https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query.tableDefinitions.(key).schema @@ -666,8 +683,9 @@ def schema(self, value): def to_api_repr(self): """Build an API representation of this object. - :rtype: dict - :returns: A dictionary in the format used by the BigQuery API. + Returns: + Dict[str, Any]: + A dictionary in the format used by the BigQuery API. """ config = copy.deepcopy(self._properties) if self.options is not None: @@ -678,15 +696,18 @@ def to_api_repr(self): @classmethod def from_api_repr(cls, resource): - """Factory: construct a CSVOptions given its API representation - - :type resource: dict - :param resource: - An extract job configuration in the same representation as is - returned from the API. - - :rtype: :class:`~google.cloud.bigquery.external_config.CSVOptions` - :returns: Configuration parsed from ``resource``. + """Factory: construct an :class:`~.external_config.ExternalConfig` + instance given its API representation. + + Args: + resource (Dict[str, Any]): + Definition of an :class:`~.external_config.ExternalConfig` + instance in the same representation as is returned from the + API. + + Returns: + :class:`~.external_config.ExternalConfig`: + Configuration parsed from ``resource``. """ config = cls(resource['sourceFormat']) for optcls in _OPTION_CLASSES: From b077fc840ce64387a7c666130d595ff56c67db51 Mon Sep 17 00:00:00 2001 From: Tres Seaver Date: Mon, 6 Aug 2018 14:47:53 -0400 Subject: [PATCH 0483/2016] Fix '_time_from_json' for values with micros. (#5753) See #5750. The issue remains in 'google.cloud._helpers._time_from_iso8601_time_naive', but I don't want to have BigQuery wait on a release of core. --- .../google/cloud/bigquery/_helpers.py | 12 +++++++++--- .../tests/unit/test__helpers.py | 10 ++++++++++ 2 files changed, 19 insertions(+), 3 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py b/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py index 49039e65f7dd..3108afcaf258 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py @@ -23,10 +23,11 @@ from google.cloud._helpers import _datetime_from_microseconds from google.cloud._helpers import _microseconds_from_datetime from google.cloud._helpers import _RFC3339_NO_FRACTION -from google.cloud._helpers import _time_from_iso8601_time_naive from google.cloud._helpers import _to_bytes _RFC3339_MICROS_NO_ZULU = '%Y-%m-%dT%H:%M:%S.%f' +_TIMEONLY_WO_MICROS = '%H:%M:%S' +_TIMEONLY_W_MICROS = '%H:%M:%S.%f' def _not_null(value, field): @@ -142,8 +143,13 @@ def _date_from_json(value, field): def _time_from_json(value, field): """Coerce 'value' to a datetime date, if set or not nullable""" if _not_null(value, field): - # value will be a string, in HH:MM:SS form. - return _time_from_iso8601_time_naive(value) + if len(value) == 8: # HH:MM:SS + fmt = _TIMEONLY_WO_MICROS + elif len(value) == 15: # HH:MM:SS.micros + fmt = _TIMEONLY_W_MICROS + else: + raise ValueError("Unknown time format: {}".format(value)) + return datetime.datetime.strptime(value, fmt).time() def _record_from_json(value, field): diff --git a/packages/google-cloud-bigquery/tests/unit/test__helpers.py b/packages/google-cloud-bigquery/tests/unit/test__helpers.py index 677a1a427661..906119e453e2 100644 --- a/packages/google-cloud-bigquery/tests/unit/test__helpers.py +++ b/packages/google-cloud-bigquery/tests/unit/test__helpers.py @@ -337,6 +337,16 @@ def test_w_string_value(self): coerced, datetime.time(12, 12, 27)) + def test_w_subsecond_string_value(self): + coerced = self._call_fut('12:12:27.123456', object()) + self.assertEqual( + coerced, + datetime.time(12, 12, 27, 123456)) + + def test_w_bogus_string_value(self): + with self.assertRaises(ValueError): + self._call_fut('12:12:27.123', object()) + class Test_record_from_json(unittest.TestCase): From e6c55c43c79fd99504252328e655dad3fbf0ece7 Mon Sep 17 00:00:00 2001 From: Tres Seaver Date: Tue, 28 Aug 2018 22:00:21 -0400 Subject: [PATCH 0484/2016] Test pandas support under Python 3.7. (#5857) * Test pandas support under Python 3.7. Full support still blocked on pyarrow support for Python 3.7. See #5294. --- packages/google-cloud-bigquery/nox.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/packages/google-cloud-bigquery/nox.py b/packages/google-cloud-bigquery/nox.py index 9f9eccd4db0a..824e06c91612 100644 --- a/packages/google-cloud-bigquery/nox.py +++ b/packages/google-cloud-bigquery/nox.py @@ -37,11 +37,12 @@ def default(session): # Install all test dependencies, then install this package in-place. session.install('mock', 'pytest', 'pytest-cov', *LOCAL_DEPS) - # Pandas does not support Python 3.7 + # Pyarrow does not support Python 3.7 if session.interpreter == 'python3.7': - session.install('-e', '.') + dev_install = '.[pandas]' else: - session.install('-e', '.[pandas, pyarrow]') + dev_install = '.[pandas, pyarrow]' + session.install('-e', dev_install) # IPython does not support Python 2 after version 5.x if session.interpreter == 'python2.7': From 66466522be13854ff96e11144868ccd751e01903 Mon Sep 17 00:00:00 2001 From: cclauss Date: Wed, 29 Aug 2018 20:59:19 +0200 Subject: [PATCH 0485/2016] Spanner benchmarks: print() is a function in Python 3 (#5862) --- packages/google-cloud-bigquery/benchmark/benchmark.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/packages/google-cloud-bigquery/benchmark/benchmark.py b/packages/google-cloud-bigquery/benchmark/benchmark.py index 9c71cb943271..2917f169aba1 100644 --- a/packages/google-cloud-bigquery/benchmark/benchmark.py +++ b/packages/google-cloud-bigquery/benchmark/benchmark.py @@ -42,5 +42,5 @@ raise Exception('found {0} columsn, expected {1}'.format(len(row), num_cols)) num_rows += 1 total_time = datetime.now() - start_time - print "query {0}: {1} rows, {2} cols, first byte {3} sec, total {4} sec"\ - .format(query, num_rows, num_cols, first_byte_time.total_seconds(), total_time.total_seconds()) + print("query {0}: {1} rows, {2} cols, first byte {3} sec, total {4} sec" + .format(query, num_rows, num_cols, first_byte_time.total_seconds(), total_time.total_seconds())) From 223111625b195337250ba5b77727c15c13ecb726 Mon Sep 17 00:00:00 2001 From: Tres Seaver Date: Thu, 30 Aug 2018 11:50:23 -0400 Subject: [PATCH 0486/2016] Nox: use inplace installs (#5865) --- packages/google-cloud-bigquery/nox.py | 32 +++++++++++++-------------- 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/packages/google-cloud-bigquery/nox.py b/packages/google-cloud-bigquery/nox.py index 824e06c91612..a9d9242c55ac 100644 --- a/packages/google-cloud-bigquery/nox.py +++ b/packages/google-cloud-bigquery/nox.py @@ -34,8 +34,10 @@ def default(session): Python corresponding to the ``nox`` binary the ``PATH`` can run the tests. """ - # Install all test dependencies, then install this package in-place. - session.install('mock', 'pytest', 'pytest-cov', *LOCAL_DEPS) + # Install all test dependencies, then install local packages in-place. + session.install('mock', 'pytest', 'pytest-cov') + for local_dep in LOCAL_DEPS: + session.install('-e', local_dep) # Pyarrow does not support Python 3.7 if session.interpreter == 'python3.7': @@ -97,13 +99,12 @@ def system(session, py): # Use pre-release gRPC for system tests. session.install('--pre', 'grpcio') - # Install all test dependencies, then install this package into the - # virtualenv's dist-packages. - session.install('mock', 'pytest', *LOCAL_DEPS) - session.install( - os.path.join('..', 'storage'), - os.path.join('..', 'test_utils'), - ) + # Install all test dependencies, then install local packages in place. + session.install('mock', 'pytest') + for local_dep in LOCAL_DEPS: + session.install('-e', local_dep) + session.install('-e', os.path.join('..', 'storage')) + session.install('-e', os.path.join('..', 'test_utils')) session.install('-e', '.[pandas]') # IPython does not support Python 2 after version 5.x @@ -136,13 +137,12 @@ def snippets(session, py): # Set the virtualenv dirname. session.virtualenv_dirname = 'snip-' + py - # Install all test dependencies, then install this package into the - # virtualenv's dist-packages. - session.install('mock', 'pytest', *LOCAL_DEPS) - session.install( - os.path.join('..', 'storage'), - os.path.join('..', 'test_utils'), - ) + # Install all test dependencies, then install local packages in place. + session.install('mock', 'pytest') + for local_dep in LOCAL_DEPS: + session.install('-e', local_dep) + session.install('-e', os.path.join('..', 'storage')) + session.install('-e', os.path.join('..', 'test_utils')) session.install('-e', '.[pandas, pyarrow]') # Run py.test against the system tests. From 4495f827a92960d00e7424b9599a9ae7c61653a4 Mon Sep 17 00:00:00 2001 From: Tres Seaver Date: Thu, 30 Aug 2018 15:32:30 -0400 Subject: [PATCH 0487/2016] Avoid pulling entire result set into memory when constructing dataframe. (#5870) Closes #5859. --- packages/google-cloud-bigquery/google/cloud/bigquery/table.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py index 8397bd89fef9..d4edbd875792 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py @@ -1166,7 +1166,8 @@ def to_dataframe(self): raise ValueError(_NO_PANDAS_ERROR) column_headers = [field.name for field in self.schema] - rows = [row.values() for row in iter(self)] + # Use generator, rather than pulling the whole rowset into memory. + rows = (row.values() for row in iter(self)) return pandas.DataFrame(rows, columns=column_headers) From 063ab7ce70563bc0fb5c2dceb71508eb45447aee Mon Sep 17 00:00:00 2001 From: Tres Seaver Date: Tue, 11 Sep 2018 16:37:14 -0400 Subject: [PATCH 0488/2016] Retry '502 Bad Gateway' errors by default. (#5930) Closes #5918. --- .../google-cloud-bigquery/google/cloud/bigquery/retry.py | 1 + packages/google-cloud-bigquery/tests/unit/test_retry.py | 5 +++++ 2 files changed, 6 insertions(+) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/retry.py b/packages/google-cloud-bigquery/google/cloud/bigquery/retry.py index fc2eebeecfad..be279171a479 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/retry.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/retry.py @@ -20,6 +20,7 @@ 'backendError', 'rateLimitExceeded', 'internalError', + 'badGateway', ]) diff --git a/packages/google-cloud-bigquery/tests/unit/test_retry.py b/packages/google-cloud-bigquery/tests/unit/test_retry.py index 4c93b3b1cbdd..9f4987fe5a54 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_retry.py +++ b/packages/google-cloud-bigquery/tests/unit/test_retry.py @@ -51,3 +51,8 @@ def test_w_internalError(self): exc = mock.Mock( errors=[{'reason': 'internalError'}], spec=['errors']) self.assertTrue(self._call_fut(exc)) + + def test_w_badGateway(self): + exc = mock.Mock( + errors=[{'reason': 'badGateway'}], spec=['errors']) + self.assertTrue(self._call_fut(exc)) From 9c0f040b7a5b0264af74020051a3b66571439297 Mon Sep 17 00:00:00 2001 From: shollyman Date: Thu, 13 Sep 2018 10:52:14 -0700 Subject: [PATCH 0489/2016] BigQuery: update system test to use test data in bigquery-public-data. (#5965) --- packages/google-cloud-bigquery/tests/system.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/packages/google-cloud-bigquery/tests/system.py b/packages/google-cloud-bigquery/tests/system.py index 179b75a8695e..6662f584cdb9 100644 --- a/packages/google-cloud-bigquery/tests/system.py +++ b/packages/google-cloud-bigquery/tests/system.py @@ -321,11 +321,11 @@ def test_get_table_w_public_dataset(self): def test_list_partitions(self): table_ref = DatasetReference( - 'bigquery-partition-samples', - 'samples').table('stackoverflow_comments') + 'bigquery-public-data', + 'ethereum_blockchain').table('blocks') all_rows = Config.CLIENT.list_partitions(table_ref) - self.assertIn('20150508', all_rows) - self.assertEquals(2066, len(all_rows)) + self.assertIn('20180801', all_rows) + self.assertGreater(len(all_rows), 1000) def test_list_tables(self): DATASET_ID = _make_dataset_id('list_tables') From 58dc5eb07f51e064236c273119efce701008ac33 Mon Sep 17 00:00:00 2001 From: Tres Seaver Date: Thu, 13 Sep 2018 16:27:39 -0400 Subject: [PATCH 0490/2016] Prep bigquery docs for repo split (#5955) * Move 'docs/bigquery' to 'bigquery/docs' and leave symlink behind. * Rename BQ's 'usage.rst' -> 'index.rst'. * DRY 'bigquery/README.rst'<->'bigquery/docs/index.rst'. * Add Sphinx logic for managing static redirect files. Add a redirect for BigQuery from 'usage.html' -> 'index.html'. * Find snippets under 'bigquery/docs/'. --- packages/google-cloud-bigquery/README.rst | 93 +- .../google-cloud-bigquery/docs/.gitignore | 1 + .../google-cloud-bigquery/docs/changelog.md | 1 + packages/google-cloud-bigquery/docs/dbapi.rst | 6 + packages/google-cloud-bigquery/docs/index.rst | 425 +++ .../google-cloud-bigquery/docs/reference.rst | 147 + .../google-cloud-bigquery/docs/snippets.py | 3096 +++++++++++++++++ .../google-cloud-bigquery/docs/usage.html | 8 + packages/google-cloud-bigquery/nox.py | 7 +- 9 files changed, 3740 insertions(+), 44 deletions(-) create mode 100644 packages/google-cloud-bigquery/docs/.gitignore create mode 120000 packages/google-cloud-bigquery/docs/changelog.md create mode 100644 packages/google-cloud-bigquery/docs/dbapi.rst create mode 100644 packages/google-cloud-bigquery/docs/index.rst create mode 100644 packages/google-cloud-bigquery/docs/reference.rst create mode 100644 packages/google-cloud-bigquery/docs/snippets.py create mode 100644 packages/google-cloud-bigquery/docs/usage.html diff --git a/packages/google-cloud-bigquery/README.rst b/packages/google-cloud-bigquery/README.rst index 1a15304a1bef..7f9d2d8213e9 100644 --- a/packages/google-cloud-bigquery/README.rst +++ b/packages/google-cloud-bigquery/README.rst @@ -1,50 +1,76 @@ Python Client for Google BigQuery ================================= - Python idiomatic client for `Google BigQuery`_ - -.. _Google BigQuery: https://cloud.google.com/bigquery/what-is-bigquery - |pypi| |versions| -- `Documentation`_ +Querying massive datasets can be time consuming and expensive without the +right hardware and infrastructure. Google `BigQuery`_ solves this problem by +enabling super-fast, SQL queries against append-mostly tables, using the +processing power of Google's infrastructure. -.. _Documentation: https://googlecloudplatform.github.io/google-cloud-python/latest/bigquery/usage.html +- `Client Library Documentation`_ +- `Product Documentation`_ + +.. |pypi| image:: https://img.shields.io/pypi/v/google-cloud-bigquery.svg + :target: https://pypi.org/project/google-cloud-bigquery/ +.. |versions| image:: https://img.shields.io/pypi/pyversions/google-cloud-bigquery.svg + :target: https://pypi.org/project/google-cloud-bigquery/ +.. _BigQuery: https://cloud.google.com/bigquery/what-is-bigquery +.. _Client Library Documentation: https://googlecloudplatform.github.io/google-cloud-python/latest/bigquery/index.html +.. _Product Documentation: https://cloud.google.com/bigquery/docs/reference/v2/ Quick Start ----------- -.. code-block:: console +In order to use this library, you first need to go through the following steps: - $ pip install --upgrade google-cloud-bigquery +1. `Select or create a Cloud Platform project.`_ +2. `Enable billing for your project.`_ +3. `Enable the Google Cloud Datastore API.`_ +4. `Setup Authentication.`_ -For more information on setting up your Python development environment, -such as installing ``pip`` and ``virtualenv`` on your system, please refer -to `Python Development Environment Setup Guide`_ for Google Cloud Platform. +.. _Select or create a Cloud Platform project.: https://console.cloud.google.com/project +.. _Enable billing for your project.: https://cloud.google.com/billing/docs/how-to/modify-project#enable_billing_for_a_project +.. _Enable the Google Cloud Datastore API.: https://cloud.google.com/bigquery +.. _Setup Authentication.: https://googlecloudplatform.github.io/google-cloud-python/latest/core/auth.html -.. _Python Development Environment Setup Guide: https://cloud.google.com/python/setup +Installation +~~~~~~~~~~~~ -Authentication --------------- +Install this library in a `virtualenv`_ using pip. `virtualenv`_ is a tool to +create isolated Python environments. The basic problem it addresses is one of +dependencies and versions, and indirectly permissions. -With ``google-cloud-python`` we try to make authentication as painless as -possible. Check out the `Authentication section`_ in our documentation to -learn more. You may also find the `authentication document`_ shared by all -the ``google-cloud-*`` libraries to be helpful. +With `virtualenv`_, it's possible to install this library without needing system +install permissions, and without clashing with the installed system +dependencies. -.. _Authentication section: https://google-cloud-python.readthedocs.io/en/latest/core/auth.html -.. _authentication document: https://github.com/GoogleCloudPlatform/google-cloud-common/tree/master/authentication +.. _`virtualenv`: https://virtualenv.pypa.io/en/latest/ -Using the API -------------- -Querying massive datasets can be time consuming and expensive without the -right hardware and infrastructure. Google `BigQuery`_ (`BigQuery API docs`_) -solves this problem by enabling super-fast, SQL queries against -append-mostly tables, using the processing power of Google's infrastructure. +Mac/Linux +^^^^^^^^^ -.. _BigQuery: https://cloud.google.com/bigquery/what-is-bigquery -.. _BigQuery API docs: https://cloud.google.com/bigquery/docs/reference/v2/ +.. code-block:: console + + pip install virtualenv + virtualenv + source /bin/activate + /bin/pip install google-cloud-bigquery + + +Windows +^^^^^^^ + +.. code-block:: console + + pip install virtualenv + virtualenv + \Scripts\activate + \Scripts\pip.exe install google-cloud-bigquery + +Example Usage +------------- Create a dataset ~~~~~~~~~~~~~~~~ @@ -106,14 +132,3 @@ Perform a query for row in rows: print(row.name) - - -See the ``google-cloud-python`` API `BigQuery documentation`_ to learn how -to connect to BigQuery using this Client Library. - -.. _BigQuery documentation: https://googlecloudplatform.github.io/google-cloud-python/latest/bigquery/usage.html - -.. |pypi| image:: https://img.shields.io/pypi/v/google-cloud-bigquery.svg - :target: https://pypi.org/project/google-cloud-bigquery/ -.. |versions| image:: https://img.shields.io/pypi/pyversions/google-cloud-bigquery.svg - :target: https://pypi.org/project/google-cloud-bigquery/ diff --git a/packages/google-cloud-bigquery/docs/.gitignore b/packages/google-cloud-bigquery/docs/.gitignore new file mode 100644 index 000000000000..3fe20bec0f3a --- /dev/null +++ b/packages/google-cloud-bigquery/docs/.gitignore @@ -0,0 +1 @@ +generated/ \ No newline at end of file diff --git a/packages/google-cloud-bigquery/docs/changelog.md b/packages/google-cloud-bigquery/docs/changelog.md new file mode 120000 index 000000000000..04c99a55caae --- /dev/null +++ b/packages/google-cloud-bigquery/docs/changelog.md @@ -0,0 +1 @@ +../CHANGELOG.md \ No newline at end of file diff --git a/packages/google-cloud-bigquery/docs/dbapi.rst b/packages/google-cloud-bigquery/docs/dbapi.rst new file mode 100644 index 000000000000..ca0256d3c8de --- /dev/null +++ b/packages/google-cloud-bigquery/docs/dbapi.rst @@ -0,0 +1,6 @@ +DB-API Reference +~~~~~~~~~~~~~~~~ + +.. automodule:: google.cloud.bigquery.dbapi + :members: + :show-inheritance: diff --git a/packages/google-cloud-bigquery/docs/index.rst b/packages/google-cloud-bigquery/docs/index.rst new file mode 100644 index 000000000000..94580556c862 --- /dev/null +++ b/packages/google-cloud-bigquery/docs/index.rst @@ -0,0 +1,425 @@ +.. include:: /../bigquery/README.rst + +Using the Library +================= + +Projects +-------- + +A project is the top-level container in the ``BigQuery`` API: it is tied +closely to billing, and can provide default access control across all its +datasets. If no ``project`` is passed to the client container, the library +attempts to infer a project using the environment (including explicit +environment variables, GAE, and GCE). + +To override the project inferred from the environment, pass an explicit +``project`` to the constructor, or to either of the alternative +``classmethod`` factories: + +.. code-block:: python + + from google.cloud import bigquery + client = bigquery.Client(project='PROJECT_ID') + + +Project ACLs +~~~~~~~~~~~~ + +Each project has an access control list granting reader / writer / owner +permission to one or more entities. This list cannot be queried or set +via the API; it must be managed using the Google Developer Console. + + +Datasets +-------- + +A dataset represents a collection of tables, and applies several default +policies to tables as they are created: + +- An access control list (ACL). When created, a dataset has an ACL + which maps to the ACL inherited from its project. + +- A default table expiration period. If set, tables created within the + dataset will have the value as their expiration period. + +See BigQuery documentation for more information on +`Datasets `_. + + +Dataset operations +~~~~~~~~~~~~~~~~~~ + +List datasets for the client's project: + +.. literalinclude:: snippets.py + :language: python + :dedent: 4 + :start-after: [START bigquery_list_datasets] + :end-before: [END bigquery_list_datasets] + +Create a new dataset for the client's project: + +.. literalinclude:: snippets.py + :language: python + :dedent: 4 + :start-after: [START bigquery_create_dataset] + :end-before: [END bigquery_create_dataset] + +Refresh metadata for a dataset (to pick up changes made by another client): + +.. literalinclude:: snippets.py + :language: python + :dedent: 4 + :start-after: [START bigquery_get_dataset] + :end-before: [END bigquery_get_dataset] + +Update a property in a dataset's metadata: + +.. literalinclude:: snippets.py + :language: python + :dedent: 4 + :start-after: [START bigquery_update_dataset_description] + :end-before: [END bigquery_update_dataset_description] + +Modify user permissions on a dataset: + +.. literalinclude:: snippets.py + :language: python + :dedent: 4 + :start-after: [START bigquery_update_dataset_access] + :end-before: [END bigquery_update_dataset_access] + +Delete a dataset: + +.. literalinclude:: snippets.py + :language: python + :dedent: 4 + :start-after: [START bigquery_delete_dataset] + :end-before: [END bigquery_delete_dataset] + + +Tables +------ + +Tables exist within datasets. See BigQuery documentation for more information +on `Tables `_. + +Table operations +~~~~~~~~~~~~~~~~ +List tables for the dataset: + +.. literalinclude:: snippets.py + :language: python + :dedent: 4 + :start-after: [START bigquery_list_tables] + :end-before: [END bigquery_list_tables] + +Create a table: + +.. literalinclude:: snippets.py + :language: python + :dedent: 4 + :start-after: [START bigquery_create_table] + :end-before: [END bigquery_create_table] + +Get a table: + +.. literalinclude:: snippets.py + :language: python + :dedent: 4 + :start-after: [START bigquery_get_table] + :end-before: [END bigquery_get_table] + +Update a property in a table's metadata: + +.. literalinclude:: snippets.py + :language: python + :dedent: 4 + :start-after: [START bigquery_update_table_description] + :end-before: [END bigquery_update_table_description] + +Browse selected rows in a table: + +.. literalinclude:: snippets.py + :language: python + :dedent: 4 + :start-after: [START bigquery_browse_table] + :end-before: [END bigquery_browse_table] + +Insert rows into a table's data: + +.. literalinclude:: snippets.py + :language: python + :dedent: 4 + :start-after: [START bigquery_table_insert_rows] + :end-before: [END bigquery_table_insert_rows] + +Copy a table: + +.. literalinclude:: snippets.py + :language: python + :dedent: 4 + :start-after: [START bigquery_copy_table] + :end-before: [END bigquery_copy_table] + +Extract a table to Google Cloud Storage: + +.. literalinclude:: snippets.py + :language: python + :dedent: 4 + :start-after: [START bigquery_extract_table] + :end-before: [END bigquery_extract_table] + +Delete a table: + +.. literalinclude:: snippets.py + :language: python + :dedent: 4 + :start-after: [START bigquery_delete_table] + :end-before: [END bigquery_delete_table] + +Upload table data from a file: + +.. literalinclude:: snippets.py + :language: python + :dedent: 4 + :start-after: [START bigquery_load_from_file] + :end-before: [END bigquery_load_from_file] + +Load table data from Google Cloud Storage +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +See also: `Loading JSON data from Cloud Storage +`_. + +Load a CSV file from Cloud Storage: + +.. literalinclude:: snippets.py + :language: python + :dedent: 4 + :start-after: [START bigquery_load_table_gcs_csv] + :end-before: [END bigquery_load_table_gcs_csv] + +Load a JSON file from Cloud Storage: + +.. literalinclude:: snippets.py + :language: python + :dedent: 4 + :start-after: [START bigquery_load_table_gcs_json] + :end-before: [END bigquery_load_table_gcs_json] + +Load a Parquet file from Cloud Storage: + +.. literalinclude:: snippets.py + :language: python + :dedent: 4 + :start-after: [START bigquery_load_table_gcs_parquet] + :end-before: [END bigquery_load_table_gcs_parquet] + +Customer Managed Encryption Keys +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Table data is always encrypted at rest, but BigQuery also provides a way for +you to control what keys it uses to encrypt they data. See `Protecting data +with Cloud KMS keys +`_ +in the BigQuery documentation for more details. + +Create a new table, using a customer-managed encryption key from +Cloud KMS to encrypt it. + +.. literalinclude:: snippets.py + :language: python + :dedent: 4 + :start-after: [START bigquery_create_table_cmek] + :end-before: [END bigquery_create_table_cmek] + +Change the key used to encrypt a table. + +.. literalinclude:: snippets.py + :language: python + :dedent: 4 + :start-after: [START bigquery_update_table_cmek] + :end-before: [END bigquery_update_table_cmek] + +Load a file from Cloud Storage, using a customer-managed encryption key from +Cloud KMS for the destination table. + +.. literalinclude:: snippets.py + :language: python + :dedent: 4 + :start-after: [START bigquery_load_table_gcs_json_cmek] + :end-before: [END bigquery_load_table_gcs_json_cmek] + +Copy a table, using a customer-managed encryption key from Cloud KMS for the +destination table. + +.. literalinclude:: snippets.py + :language: python + :dedent: 4 + :start-after: [START bigquery_copy_table_cmek] + :end-before: [END bigquery_copy_table_cmek] + +Write query results to a table, using a customer-managed encryption key from +Cloud KMS for the destination table. + +.. literalinclude:: snippets.py + :language: python + :dedent: 4 + :start-after: [START bigquery_query_destination_table_cmek] + :end-before: [END bigquery_query_destination_table_cmek] + +Queries +------- + + +Querying data +~~~~~~~~~~~~~ + +Run a query and wait for it to finish: + +.. literalinclude:: snippets.py + :language: python + :dedent: 4 + :start-after: [START bigquery_query] + :end-before: [END bigquery_query] + + +Run a dry run query +~~~~~~~~~~~~~~~~~~~ + +.. literalinclude:: snippets.py + :language: python + :dedent: 4 + :start-after: [START bigquery_query_dry_run] + :end-before: [END bigquery_query_dry_run] + + +Writing query results to a destination table +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +See BigQuery documentation for more information on +`writing query results `_. + +.. literalinclude:: snippets.py + :language: python + :dedent: 4 + :start-after: [START bigquery_query_destination_table] + :end-before: [END bigquery_query_destination_table] + + +Run a query using a named query parameter +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +See BigQuery documentation for more information on +`parameterized queries `_. + +.. literalinclude:: snippets.py + :language: python + :dedent: 4 + :start-after: [START bigquery_query_params_named] + :end-before: [END bigquery_query_params_named] + + +Jobs +---- + +List jobs for a project +~~~~~~~~~~~~~~~~~~~~~~~ + +Jobs describe actions performed on data in BigQuery tables: + +- Load data into a table +- Run a query against data in one or more tables +- Extract data from a table +- Copy a table + +.. literalinclude:: snippets.py + :language: python + :dedent: 4 + :start-after: [START bigquery_list_jobs] + :end-before: [END bigquery_list_jobs] + + +Using BigQuery with Pandas +-------------------------- + +Retrieve BigQuery data as a Pandas DataFrame +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +As of version 0.29.0, you can use the +:func:`~google.cloud.bigquery.table.RowIterator.to_dataframe` function to +retrieve query results or table rows as a :class:`pandas.DataFrame`. + +First, ensure that the :mod:`pandas` library is installed by running: + +.. code-block:: bash + + pip install --upgrade pandas + +Alternatively, you can install the BigQuery python client library with +:mod:`pandas` by running: + +.. code-block:: bash + + pip install --upgrade google-cloud-bigquery[pandas] + +To retrieve query results as a :class:`pandas.DataFrame`: + +.. literalinclude:: snippets.py + :language: python + :dedent: 4 + :start-after: [START bigquery_query_results_dataframe] + :end-before: [END bigquery_query_results_dataframe] + +To retrieve table rows as a :class:`pandas.DataFrame`: + +.. literalinclude:: snippets.py + :language: python + :dedent: 4 + :start-after: [START bigquery_list_rows_dataframe] + :end-before: [END bigquery_list_rows_dataframe] + +Load a Pandas DataFrame to a BigQuery Table +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +As of version 1.3.0, you can use the +:func:`~google.cloud.bigquery.client.Client.load_table_from_dataframe` function +to load data from a :class:`pandas.DataFrame` to a +:class:`~google.cloud.bigquery.table.Table`. To use this function, in addition +to :mod:`pandas`, you will need to install the :mod:`pyarrow` library. You can +install the BigQuery python client library with :mod:`pandas` and +:mod:`pyarrow` by running: + +.. code-block:: bash + + pip install --upgrade google-cloud-bigquery[pandas,pyarrow] + +The following example demonstrates how to create a :class:`pandas.DataFrame` +and load it into a new table: + +.. literalinclude:: snippets.py + :language: python + :dedent: 4 + :start-after: [START bigquery_load_table_dataframe] + :end-before: [END bigquery_load_table_dataframe] + +API Reference +============= + +.. toctree:: + :maxdepth: 2 + + reference + dbapi + +Changelog +========= + +For a list of all ``google-cloud-bigquery`` releases: + +.. toctree:: + :maxdepth: 2 + + changelog + diff --git a/packages/google-cloud-bigquery/docs/reference.rst b/packages/google-cloud-bigquery/docs/reference.rst new file mode 100644 index 000000000000..b66117b59b31 --- /dev/null +++ b/packages/google-cloud-bigquery/docs/reference.rst @@ -0,0 +1,147 @@ +API Reference +~~~~~~~~~~~~~ + +.. currentmodule:: google.cloud.bigquery + +The main concepts with this API are: + +- :class:`~google.cloud.bigquery.client.Client` manages connections to the + BigQuery API. Use the client methods to run jobs (such as a + :class:`~google.cloud.bigquery.job.QueryJob` via + :meth:`~google.cloud.bigquery.client.Client.query`) and manage resources. + +- :class:`~google.cloud.bigquery.dataset.Dataset` represents a + collection of tables. + +- :class:`~google.cloud.bigquery.table.Table` represents a single "relation". + +Client +====== + +.. autosummary:: + :toctree: generated + + client.Client + +Job +=== + +Job Configuration +----------------- + +.. autosummary:: + :toctree: generated + + job.QueryJobConfig + job.CopyJobConfig + job.LoadJobConfig + job.ExtractJobConfig + +Job Classes +----------- + +.. autosummary:: + :toctree: generated + + job.QueryJob + job.CopyJob + job.LoadJob + job.ExtractJob + job.UnknownJob + +Job-Related Types +----------------- + +.. autosummary:: + :toctree: generated + + job.Compression + job.CreateDisposition + job.DestinationFormat + job.Encoding + job.QueryPriority + job.SourceFormat + job.WriteDisposition + job.SchemaUpdateOption + + +Dataset +======= + +.. autosummary:: + :toctree: generated + + dataset.Dataset + dataset.DatasetListItem + dataset.DatasetReference + dataset.AccessEntry + + +Table +===== + +.. autosummary:: + :toctree: generated + + table.Table + table.TableListItem + table.TableReference + table.Row + table.RowIterator + table.EncryptionConfiguration + table.TimePartitioning + table.TimePartitioningType + + +Schema +====== + +.. autosummary:: + :toctree: generated + + schema.SchemaField + + +Query +===== + +.. autosummary:: + :toctree: generated + + query.ArrayQueryParameter + query.ScalarQueryParameter + query.StructQueryParameter + query.UDFResource + + +Retries +======= + +.. autosummary:: + :toctree: generated + + retry.DEFAULT_RETRY + + +External Configuration +====================== + +.. autosummary:: + :toctree: generated + + external_config.ExternalSourceFormat + external_config.ExternalConfig + external_config.BigtableOptions + external_config.BigtableColumnFamily + external_config.BigtableColumn + external_config.CSVOptions + external_config.GoogleSheetsOptions + + +Magics +====================== + +.. autosummary:: + :toctree: generated + + magics diff --git a/packages/google-cloud-bigquery/docs/snippets.py b/packages/google-cloud-bigquery/docs/snippets.py new file mode 100644 index 000000000000..6093abe95c1c --- /dev/null +++ b/packages/google-cloud-bigquery/docs/snippets.py @@ -0,0 +1,3096 @@ +# Copyright 2016 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Testable usage examples for Google BigQuery API wrapper + +Each example function takes a ``client`` argument (which must be an instance +of :class:`google.cloud.bigquery.client.Client`) and uses it to perform a task +with the API. + +To facilitate running the examples as system tests, each example is also passed +a ``to_delete`` list; the function adds to the list any objects created which +need to be deleted during teardown. +""" + +import os +import time + +import mock +import pytest +import six +try: + import pandas +except (ImportError, AttributeError): + pandas = None +try: + import pyarrow +except (ImportError, AttributeError): + pyarrow = None + +from google.api_core import datetime_helpers +from google.api_core.exceptions import TooManyRequests +from google.cloud import bigquery +from test_utils.retry import RetryErrors + +ORIGINAL_FRIENDLY_NAME = 'Original friendly name' +ORIGINAL_DESCRIPTION = 'Original description' +LOCALLY_CHANGED_FRIENDLY_NAME = 'Locally-changed friendly name' +LOCALLY_CHANGED_DESCRIPTION = 'Locally-changed description' +UPDATED_FRIENDLY_NAME = 'Updated friendly name' +UPDATED_DESCRIPTION = 'Updated description' + +SCHEMA = [ + bigquery.SchemaField('full_name', 'STRING', mode='REQUIRED'), + bigquery.SchemaField('age', 'INTEGER', mode='REQUIRED'), +] + +ROWS = [ + ('Phred Phlyntstone', 32), + ('Bharney Rhubble', 33), + ('Wylma Phlyntstone', 29), + ('Bhettye Rhubble', 27), +] + +QUERY = ( + 'SELECT name FROM `bigquery-public-data.usa_names.usa_1910_2013` ' + 'WHERE state = "TX"') + + +retry_429 = RetryErrors(TooManyRequests) + + +@pytest.fixture(scope='module') +def client(): + return bigquery.Client() + + +@pytest.fixture +def to_delete(client): + doomed = [] + yield doomed + for item in doomed: + if isinstance(item, (bigquery.Dataset, bigquery.DatasetReference)): + retry_429(client.delete_dataset)(item, delete_contents=True) + else: + retry_429(item.delete)() + + +def _millis(): + return int(time.time() * 1000) + + +class _CloseOnDelete(object): + + def __init__(self, wrapped): + self._wrapped = wrapped + + def delete(self): + self._wrapped.close() + + +def test_create_client_default_credentials(): + """Create a BigQuery client with Application Default Credentials""" + + # [START bigquery_client_default_credentials] + from google.cloud import bigquery + + # If you don't specify credentials when constructing the client, the + # client library will look for credentials in the environment. + client = bigquery.Client() + # [END bigquery_client_default_credentials] + + assert client is not None + + +def test_create_client_json_credentials(): + """Create a BigQuery client with Application Default Credentials""" + with open(os.environ['GOOGLE_APPLICATION_CREDENTIALS']) as creds_file: + creds_file_data = creds_file.read() + + open_mock = mock.mock_open(read_data=creds_file_data) + + with mock.patch('io.open', open_mock): + # [START bigquery_client_json_credentials] + from google.cloud import bigquery + + # Explicitly use service account credentials by specifying the private + # key file. All clients in google-cloud-python have this helper. + client = bigquery.Client.from_service_account_json( + 'path/to/service_account.json') + # [END bigquery_client_json_credentials] + + assert client is not None + + +def test_list_datasets(client): + """List datasets for a project.""" + # [START bigquery_list_datasets] + # from google.cloud import bigquery + # client = bigquery.Client() + + datasets = list(client.list_datasets()) + project = client.project + + if datasets: + print('Datasets in project {}:'.format(project)) + for dataset in datasets: # API request(s) + print('\t{}'.format(dataset.dataset_id)) + else: + print('{} project does not contain any datasets.'.format(project)) + # [END bigquery_list_datasets] + + +def test_list_datasets_by_label(client, to_delete): + dataset_id = 'list_datasets_by_label_{}'.format(_millis()) + dataset = bigquery.Dataset(client.dataset(dataset_id)) + dataset.labels = {'color': 'green'} + dataset = client.create_dataset(dataset) # API request + to_delete.append(dataset) + + # [START bigquery_list_datasets_by_label] + # from google.cloud import bigquery + # client = bigquery.Client() + + # The following label filter example will find datasets with an + # arbitrary 'color' label set to 'green' + label_filter = 'labels.color:green' + datasets = list(client.list_datasets(filter=label_filter)) + + if datasets: + print('Datasets filtered by {}:'.format(label_filter)) + for dataset in datasets: # API request(s) + print('\t{}'.format(dataset.dataset_id)) + else: + print('No datasets found with this filter.') + # [END bigquery_list_datasets_by_label] + found = set([dataset.dataset_id for dataset in datasets]) + assert dataset_id in found + + +def test_create_dataset(client, to_delete): + """Create a dataset.""" + dataset_id = 'create_dataset_{}'.format(_millis()) + + # [START bigquery_create_dataset] + # from google.cloud import bigquery + # client = bigquery.Client() + # dataset_id = 'my_dataset' + + # Create a DatasetReference using a chosen dataset ID. + # The project defaults to the Client's project if not specified. + dataset_ref = client.dataset(dataset_id) + + # Construct a full Dataset object to send to the API. + dataset = bigquery.Dataset(dataset_ref) + # Specify the geographic location where the dataset should reside. + dataset.location = 'US' + + # Send the dataset to the API for creation. + # Raises google.api_core.exceptions.AlreadyExists if the Dataset already + # exists within the project. + dataset = client.create_dataset(dataset) # API request + # [END bigquery_create_dataset] + + to_delete.append(dataset) + + +def test_get_dataset_information(client, to_delete): + """View information about a dataset.""" + dataset_id = 'get_dataset_{}'.format(_millis()) + dataset_labels = {'color': 'green'} + dataset_ref = client.dataset(dataset_id) + dataset = bigquery.Dataset(dataset_ref) + dataset.description = ORIGINAL_DESCRIPTION + dataset.labels = dataset_labels + dataset = client.create_dataset(dataset) # API request + to_delete.append(dataset) + + # [START bigquery_get_dataset] + # from google.cloud import bigquery + # client = bigquery.Client() + # dataset_id = 'my_dataset' + + dataset_ref = client.dataset(dataset_id) + dataset = client.get_dataset(dataset_ref) # API request + + # View dataset properties + print('Dataset ID: {}'.format(dataset_id)) + print('Description: {}'.format(dataset.description)) + print('Labels:') + labels = dataset.labels + if labels: + for label, value in labels.items(): + print('\t{}: {}'.format(label, value)) + else: + print("\tDataset has no labels defined.") + + # View tables in dataset + print('Tables:') + tables = list(client.list_tables(dataset_ref)) # API request(s) + if tables: + for table in tables: + print('\t{}'.format(table.table_id)) + else: + print('\tThis dataset does not contain any tables.') + # [END bigquery_get_dataset] + + assert dataset.description == ORIGINAL_DESCRIPTION + assert dataset.labels == dataset_labels + assert tables == [] + + +# [START bigquery_dataset_exists] +def dataset_exists(client, dataset_reference): + """Return if a dataset exists. + + Args: + client (google.cloud.bigquery.client.Client): + A client to connect to the BigQuery API. + dataset_reference (google.cloud.bigquery.dataset.DatasetReference): + A reference to the dataset to look for. + + Returns: + bool: ``True`` if the dataset exists, ``False`` otherwise. + """ + from google.cloud.exceptions import NotFound + + try: + client.get_dataset(dataset_reference) + return True + except NotFound: + return False +# [END bigquery_dataset_exists] + + +def test_dataset_exists(client, to_delete): + """Determine if a dataset exists.""" + DATASET_ID = 'get_table_dataset_{}'.format(_millis()) + dataset_ref = client.dataset(DATASET_ID) + dataset = bigquery.Dataset(dataset_ref) + dataset = client.create_dataset(dataset) + to_delete.append(dataset) + + assert dataset_exists(client, dataset_ref) + assert not dataset_exists(client, client.dataset('i_dont_exist')) + + +@pytest.mark.skip(reason=( + 'update_dataset() is flaky ' + 'https://github.com/GoogleCloudPlatform/google-cloud-python/issues/5588')) +def test_update_dataset_description(client, to_delete): + """Update a dataset's description.""" + dataset_id = 'update_dataset_description_{}'.format(_millis()) + dataset = bigquery.Dataset(client.dataset(dataset_id)) + dataset.description = 'Original description.' + client.create_dataset(dataset) + to_delete.append(dataset) + + # [START bigquery_update_dataset_description] + # from google.cloud import bigquery + # client = bigquery.Client() + # dataset_ref = client.dataset('my_dataset') + # dataset = client.get_dataset(dataset_ref) # API request + + assert dataset.description == 'Original description.' + dataset.description = 'Updated description.' + + dataset = client.update_dataset(dataset, ['description']) # API request + + assert dataset.description == 'Updated description.' + # [END bigquery_update_dataset_description] + + +@pytest.mark.skip(reason=( + 'update_dataset() is flaky ' + 'https://github.com/GoogleCloudPlatform/google-cloud-python/issues/5588')) +def test_update_dataset_default_table_expiration(client, to_delete): + """Update a dataset's default table expiration.""" + dataset_id = 'update_dataset_default_expiration_{}'.format(_millis()) + dataset = bigquery.Dataset(client.dataset(dataset_id)) + dataset = client.create_dataset(dataset) + to_delete.append(dataset) + + # [START bigquery_update_dataset_expiration] + # from google.cloud import bigquery + # client = bigquery.Client() + # dataset_ref = client.dataset('my_dataset') + # dataset = client.get_dataset(dataset_ref) # API request + + assert dataset.default_table_expiration_ms is None + one_day_ms = 24 * 60 * 60 * 1000 # in milliseconds + dataset.default_table_expiration_ms = one_day_ms + + dataset = client.update_dataset( + dataset, ['default_table_expiration_ms']) # API request + + assert dataset.default_table_expiration_ms == one_day_ms + # [END bigquery_update_dataset_expiration] + + +@pytest.mark.skip(reason=( + 'update_dataset() is flaky ' + 'https://github.com/GoogleCloudPlatform/google-cloud-python/issues/5588')) +def test_manage_dataset_labels(client, to_delete): + dataset_id = 'label_dataset_{}'.format(_millis()) + dataset = bigquery.Dataset(client.dataset(dataset_id)) + dataset = client.create_dataset(dataset) + to_delete.append(dataset) + + # [START bigquery_label_dataset] + # from google.cloud import bigquery + # client = bigquery.Client() + # dataset_ref = client.dataset('my_dataset') + # dataset = client.get_dataset(dataset_ref) # API request + + assert dataset.labels == {} + labels = {'color': 'green'} + dataset.labels = labels + + dataset = client.update_dataset(dataset, ['labels']) # API request + + assert dataset.labels == labels + # [END bigquery_label_dataset] + + # [START bigquery_get_dataset_labels] + # from google.cloud import bigquery + # client = bigquery.Client() + # dataset_id = 'my_dataset' + + dataset_ref = client.dataset(dataset_id) + dataset = client.get_dataset(dataset_ref) # API request + + # View dataset labels + print('Dataset ID: {}'.format(dataset_id)) + print('Labels:') + if dataset.labels: + for label, value in dataset.labels.items(): + print('\t{}: {}'.format(label, value)) + else: + print("\tDataset has no labels defined.") + # [END bigquery_get_dataset_labels] + assert dataset.labels == labels + + # [START bigquery_delete_label_dataset] + # from google.cloud import bigquery + # client = bigquery.Client() + # dataset_ref = client.dataset('my_dataset') + # dataset = client.get_dataset(dataset_ref) # API request + + # This example dataset starts with one label + assert dataset.labels == {'color': 'green'} + # To delete a label from a dataset, set its value to None + dataset.labels['color'] = None + + dataset = client.update_dataset(dataset, ['labels']) # API request + + assert dataset.labels == {} + # [END bigquery_delete_label_dataset] + + +@pytest.mark.skip(reason=( + 'update_dataset() is flaky ' + 'https://github.com/GoogleCloudPlatform/google-cloud-python/issues/5588')) +def test_update_dataset_access(client, to_delete): + """Update a dataset's access controls.""" + dataset_id = 'update_dataset_access_{}'.format(_millis()) + dataset = bigquery.Dataset(client.dataset(dataset_id)) + dataset = client.create_dataset(dataset) + to_delete.append(dataset) + + # [START bigquery_update_dataset_access] + # from google.cloud import bigquery + # client = bigquery.Client() + # dataset = client.get_dataset(client.dataset('my_dataset')) + + entry = bigquery.AccessEntry( + role='READER', + entity_type='userByEmail', + entity_id='sample.bigquery.dev@gmail.com') + assert entry not in dataset.access_entries + entries = list(dataset.access_entries) + entries.append(entry) + dataset.access_entries = entries + + dataset = client.update_dataset(dataset, ['access_entries']) # API request + + assert entry in dataset.access_entries + # [END bigquery_update_dataset_access] + + +def test_delete_dataset(client): + """Delete a dataset.""" + from google.cloud.exceptions import NotFound + + dataset1_id = 'delete_dataset_{}'.format(_millis()) + dataset1 = bigquery.Dataset(client.dataset(dataset1_id)) + client.create_dataset(dataset1) + + dataset2_id = 'delete_dataset_with_tables{}'.format(_millis()) + dataset2 = bigquery.Dataset(client.dataset(dataset2_id)) + client.create_dataset(dataset2) + + table = bigquery.Table(dataset2.table('new_table')) + client.create_table(table) + + # [START bigquery_delete_dataset] + # from google.cloud import bigquery + # client = bigquery.Client() + + # Delete a dataset that does not contain any tables + # dataset1_id = 'my_empty_dataset' + dataset1_ref = client.dataset(dataset1_id) + client.delete_dataset(dataset1_ref) # API request + + print('Dataset {} deleted.'.format(dataset1_id)) + + # Use the delete_contents parameter to delete a dataset and its contents + # dataset2_id = 'my_dataset_with_tables' + dataset2_ref = client.dataset(dataset2_id) + client.delete_dataset(dataset2_ref, delete_contents=True) # API request + + print('Dataset {} deleted.'.format(dataset2_id)) + # [END bigquery_delete_dataset] + + for dataset in [dataset1, dataset2]: + with pytest.raises(NotFound): + client.get_dataset(dataset) # API request + + +def test_list_tables(client, to_delete): + """List tables within a dataset.""" + dataset_id = 'list_tables_dataset_{}'.format(_millis()) + dataset_ref = client.dataset(dataset_id) + dataset = client.create_dataset(bigquery.Dataset(dataset_ref)) + to_delete.append(dataset) + + # [START bigquery_list_tables] + # from google.cloud import bigquery + # client = bigquery.Client() + # dataset_ref = client.dataset('my_dataset') + + tables = list(client.list_tables(dataset_ref)) # API request(s) + assert len(tables) == 0 + + table_ref = dataset.table('my_table') + table = bigquery.Table(table_ref) + client.create_table(table) # API request + tables = list(client.list_tables(dataset)) # API request(s) + + assert len(tables) == 1 + assert tables[0].table_id == 'my_table' + # [END bigquery_list_tables] + + +def test_create_table(client, to_delete): + """Create a table.""" + dataset_id = 'create_table_dataset_{}'.format(_millis()) + dataset_ref = client.dataset(dataset_id) + dataset = bigquery.Dataset(dataset_ref) + client.create_dataset(dataset) + to_delete.append(dataset) + + # [START bigquery_create_table] + # from google.cloud import bigquery + # client = bigquery.Client() + # dataset_ref = client.dataset('my_dataset') + + schema = [ + bigquery.SchemaField('full_name', 'STRING', mode='REQUIRED'), + bigquery.SchemaField('age', 'INTEGER', mode='REQUIRED'), + ] + table_ref = dataset_ref.table('my_table') + table = bigquery.Table(table_ref, schema=schema) + table = client.create_table(table) # API request + + assert table.table_id == 'my_table' + # [END bigquery_create_table] + + +def test_create_table_nested_repeated_schema(client, to_delete): + dataset_id = 'create_table_nested_repeated_{}'.format(_millis()) + dataset_ref = client.dataset(dataset_id) + dataset = bigquery.Dataset(dataset_ref) + client.create_dataset(dataset) + to_delete.append(dataset) + + # [START bigquery_nested_repeated_schema] + # from google.cloud import bigquery + # client = bigquery.Client() + # dataset_ref = client.dataset('my_dataset') + + schema = [ + bigquery.SchemaField('id', 'STRING', mode='NULLABLE'), + bigquery.SchemaField('first_name', 'STRING', mode='NULLABLE'), + bigquery.SchemaField('last_name', 'STRING', mode='NULLABLE'), + bigquery.SchemaField('dob', 'DATE', mode='NULLABLE'), + bigquery.SchemaField('addresses', 'RECORD', mode='REPEATED', fields=[ + bigquery.SchemaField('status', 'STRING', mode='NULLABLE'), + bigquery.SchemaField('address', 'STRING', mode='NULLABLE'), + bigquery.SchemaField('city', 'STRING', mode='NULLABLE'), + bigquery.SchemaField('state', 'STRING', mode='NULLABLE'), + bigquery.SchemaField('zip', 'STRING', mode='NULLABLE'), + bigquery.SchemaField('numberOfYears', 'STRING', mode='NULLABLE'), + ]), + ] + table_ref = dataset_ref.table('my_table') + table = bigquery.Table(table_ref, schema=schema) + table = client.create_table(table) # API request + + print('Created table {}'.format(table.full_table_id)) + # [END bigquery_nested_repeated_schema] + + +def test_create_table_cmek(client, to_delete): + dataset_id = 'create_table_cmek_{}'.format(_millis()) + dataset = bigquery.Dataset(client.dataset(dataset_id)) + client.create_dataset(dataset) + to_delete.append(dataset) + + # [START bigquery_create_table_cmek] + # from google.cloud import bigquery + # client = bigquery.Client() + # dataset_id = 'my_dataset' + + table_ref = client.dataset(dataset_id).table('my_table') + table = bigquery.Table(table_ref) + + # Set the encryption key to use for the table. + # TODO: Replace this key with a key you have created in Cloud KMS. + kms_key_name = 'projects/{}/locations/{}/keyRings/{}/cryptoKeys/{}'.format( + 'cloud-samples-tests', 'us-central1', 'test', 'test') + table.encryption_configuration = bigquery.EncryptionConfiguration( + kms_key_name=kms_key_name) + + table = client.create_table(table) # API request + + assert table.encryption_configuration.kms_key_name == kms_key_name + # [END bigquery_create_table_cmek] + + +def test_create_partitioned_table(client, to_delete): + dataset_id = 'create_table_partitioned_{}'.format(_millis()) + dataset_ref = bigquery.Dataset(client.dataset(dataset_id)) + dataset = client.create_dataset(dataset_ref) + to_delete.append(dataset) + + # [START bigquery_create_table_partitioned] + # from google.cloud import bigquery + # client = bigquery.Client() + # dataset_ref = client.dataset('my_dataset') + + table_ref = dataset_ref.table('my_partitioned_table') + schema = [ + bigquery.SchemaField('name', 'STRING'), + bigquery.SchemaField('post_abbr', 'STRING'), + bigquery.SchemaField('date', 'DATE') + ] + table = bigquery.Table(table_ref, schema=schema) + table.time_partitioning = bigquery.TimePartitioning( + type_=bigquery.TimePartitioningType.DAY, + field='date', # name of column to use for partitioning + expiration_ms=7776000000) # 90 days + + table = client.create_table(table) + + print('Created table {}, partitioned on column {}'.format( + table.table_id, table.time_partitioning.field)) + # [END bigquery_create_table_partitioned] + + assert table.time_partitioning.type_ == 'DAY' + assert table.time_partitioning.field == 'date' + assert table.time_partitioning.expiration_ms == 7776000000 + + +def test_load_and_query_partitioned_table(client, to_delete): + dataset_id = 'load_partitioned_table_dataset_{}'.format(_millis()) + dataset = bigquery.Dataset(client.dataset(dataset_id)) + client.create_dataset(dataset) + to_delete.append(dataset) + + # [START bigquery_load_table_partitioned] + # from google.cloud import bigquery + # client = bigquery.Client() + # dataset_id = 'my_dataset' + table_id = 'us_states_by_date' + + dataset_ref = client.dataset(dataset_id) + job_config = bigquery.LoadJobConfig() + job_config.schema = [ + bigquery.SchemaField('name', 'STRING'), + bigquery.SchemaField('post_abbr', 'STRING'), + bigquery.SchemaField('date', 'DATE') + ] + job_config.skip_leading_rows = 1 + job_config.time_partitioning = bigquery.TimePartitioning( + type_=bigquery.TimePartitioningType.DAY, + field='date', # name of column to use for partitioning + expiration_ms=7776000000) # 90 days + uri = 'gs://cloud-samples-data/bigquery/us-states/us-states-by-date.csv' + + load_job = client.load_table_from_uri( + uri, + dataset_ref.table(table_id), + job_config=job_config) # API request + + assert load_job.job_type == 'load' + + load_job.result() # Waits for table load to complete. + + table = client.get_table(dataset_ref.table(table_id)) + print("Loaded {} rows to table {}".format(table.num_rows, table_id)) + # [END bigquery_load_table_partitioned] + assert table.num_rows == 50 + + project_id = client.project + + # [START bigquery_query_partitioned_table] + import datetime + # from google.cloud import bigquery + # client = bigquery.Client() + # project_id = 'my-project' + # dataset_id = 'my_dataset' + table_id = 'us_states_by_date' + + sql_template = """ + SELECT * + FROM `{}.{}.{}` + WHERE date BETWEEN @start_date AND @end_date + """ + sql = sql_template.format(project_id, dataset_id, table_id) + job_config = bigquery.QueryJobConfig() + job_config.query_parameters = [ + bigquery.ScalarQueryParameter( + 'start_date', + 'DATE', + datetime.date(1800, 1, 1) + ), + bigquery.ScalarQueryParameter( + 'end_date', + 'DATE', + datetime.date(1899, 12, 31) + ) + ] + + query_job = client.query( + sql, + # Location must match that of the dataset(s) referenced in the query. + location='US', + job_config=job_config) # API request + + rows = list(query_job) + print("{} states were admitted to the US in the 1800s".format(len(rows))) + # [END bigquery_query_partitioned_table] + assert len(rows) == 29 + + +def test_get_table_information(client, to_delete): + """Show a table's properties.""" + dataset_id = 'show_table_dataset_{}'.format(_millis()) + table_id = 'show_table_table_{}'.format(_millis()) + dataset_ref = client.dataset(dataset_id) + dataset = bigquery.Dataset(dataset_ref) + client.create_dataset(dataset) + to_delete.append(dataset) + + table = bigquery.Table(dataset.table(table_id), schema=SCHEMA) + table.description = ORIGINAL_DESCRIPTION + table = client.create_table(table) + + # [START bigquery_get_table] + # from google.cloud import bigquery + # client = bigquery.Client() + # dataset_id = 'my_dataset' + # table_id = 'my_table' + + dataset_ref = client.dataset(dataset_id) + table_ref = dataset_ref.table(table_id) + table = client.get_table(table_ref) # API Request + + # View table properties + print(table.schema) + print(table.description) + print(table.num_rows) + # [END bigquery_get_table] + + assert table.schema == SCHEMA + assert table.description == ORIGINAL_DESCRIPTION + assert table.num_rows == 0 + + +# [START bigquery_table_exists] +def table_exists(client, table_reference): + """Return if a table exists. + + Args: + client (google.cloud.bigquery.client.Client): + A client to connect to the BigQuery API. + table_reference (google.cloud.bigquery.table.TableReference): + A reference to the table to look for. + + Returns: + bool: ``True`` if the table exists, ``False`` otherwise. + """ + from google.cloud.exceptions import NotFound + + try: + client.get_table(table_reference) + return True + except NotFound: + return False +# [END bigquery_table_exists] + + +def test_table_exists(client, to_delete): + """Determine if a table exists.""" + DATASET_ID = 'get_table_dataset_{}'.format(_millis()) + TABLE_ID = 'get_table_table_{}'.format(_millis()) + dataset = bigquery.Dataset(client.dataset(DATASET_ID)) + dataset = client.create_dataset(dataset) + to_delete.append(dataset) + + table_ref = dataset.table(TABLE_ID) + table = bigquery.Table(table_ref, schema=SCHEMA) + table = client.create_table(table) + + assert table_exists(client, table_ref) + assert not table_exists(client, dataset.table('i_dont_exist')) + + +@pytest.mark.skip(reason=( + 'update_table() is flaky ' + 'https://github.com/GoogleCloudPlatform/google-cloud-python/issues/5589')) +def test_manage_table_labels(client, to_delete): + dataset_id = 'label_table_dataset_{}'.format(_millis()) + table_id = 'label_table_{}'.format(_millis()) + dataset = bigquery.Dataset(client.dataset(dataset_id)) + client.create_dataset(dataset) + to_delete.append(dataset) + + table = bigquery.Table(dataset.table(table_id), schema=SCHEMA) + table = client.create_table(table) + + # [START bigquery_label_table] + # from google.cloud import bigquery + # client = bigquery.Client() + # table_ref = client.dataset('my_dataset').table('my_table') + # table = client.get_table(table_ref) # API request + + assert table.labels == {} + labels = {'color': 'green'} + table.labels = labels + + table = client.update_table(table, ['labels']) # API request + + assert table.labels == labels + # [END bigquery_label_table] + + # [START bigquery_get_table_labels] + # from google.cloud import bigquery + # client = bigquery.Client() + # dataset_id = 'my_dataset' + # table_id = 'my_table' + + dataset_ref = client.dataset(dataset_id) + table_ref = dataset_ref.table(table_id) + table = client.get_table(table_ref) # API Request + + # View table labels + print('Table ID: {}'.format(table_id)) + print('Labels:') + if table.labels: + for label, value in table.labels.items(): + print('\t{}: {}'.format(label, value)) + else: + print("\tTable has no labels defined.") + # [END bigquery_get_table_labels] + assert table.labels == labels + + # [START bigquery_delete_label_table] + # from google.cloud import bigquery + # client = bigquery.Client() + # table_ref = client.dataset('my_dataset').table('my_table') + # table = client.get_table(table_ref) # API request + + # This example table starts with one label + assert table.labels == {'color': 'green'} + # To delete a label from a table, set its value to None + table.labels['color'] = None + + table = client.update_table(table, ['labels']) # API request + + assert table.labels == {} + # [END bigquery_delete_label_table] + + +@pytest.mark.skip(reason=( + 'update_table() is flaky ' + 'https://github.com/GoogleCloudPlatform/google-cloud-python/issues/5589')) +def test_update_table_description(client, to_delete): + """Update a table's description.""" + dataset_id = 'update_table_description_dataset_{}'.format(_millis()) + table_id = 'update_table_description_table_{}'.format(_millis()) + dataset = bigquery.Dataset(client.dataset(dataset_id)) + client.create_dataset(dataset) + to_delete.append(dataset) + + table = bigquery.Table(dataset.table(table_id), schema=SCHEMA) + table.description = 'Original description.' + table = client.create_table(table) + + # [START bigquery_update_table_description] + # from google.cloud import bigquery + # client = bigquery.Client() + # table_ref = client.dataset('my_dataset').table('my_table') + # table = client.get_table(table_ref) # API request + + assert table.description == 'Original description.' + table.description = 'Updated description.' + + table = client.update_table(table, ['description']) # API request + + assert table.description == 'Updated description.' + # [END bigquery_update_table_description] + + +@pytest.mark.skip(reason=( + 'update_table() is flaky ' + 'https://github.com/GoogleCloudPlatform/google-cloud-python/issues/5589')) +def test_update_table_expiration(client, to_delete): + """Update a table's expiration time.""" + dataset_id = 'update_table_expiration_dataset_{}'.format(_millis()) + table_id = 'update_table_expiration_table_{}'.format(_millis()) + dataset = bigquery.Dataset(client.dataset(dataset_id)) + client.create_dataset(dataset) + to_delete.append(dataset) + + table = bigquery.Table(dataset.table(table_id), schema=SCHEMA) + table = client.create_table(table) + + # [START bigquery_update_table_expiration] + import datetime + import pytz + + # from google.cloud import bigquery + # client = bigquery.Client() + # table_ref = client.dataset('my_dataset').table('my_table') + # table = client.get_table(table_ref) # API request + + assert table.expires is None + + # set table to expire 5 days from now + expiration = datetime.datetime.now(pytz.utc) + datetime.timedelta(days=5) + table.expires = expiration + table = client.update_table(table, ['expires']) # API request + + # expiration is stored in milliseconds + margin = datetime.timedelta(microseconds=1000) + assert expiration - margin <= table.expires <= expiration + margin + # [END bigquery_update_table_expiration] + + +@pytest.mark.skip(reason=( + 'update_table() is flaky ' + 'https://github.com/GoogleCloudPlatform/google-cloud-python/issues/5589')) +def test_add_empty_column(client, to_delete): + """Adds an empty column to an existing table.""" + dataset_id = 'add_empty_column_dataset_{}'.format(_millis()) + table_id = 'add_empty_column_table_{}'.format(_millis()) + dataset = bigquery.Dataset(client.dataset(dataset_id)) + dataset = client.create_dataset(dataset) + to_delete.append(dataset) + + table = bigquery.Table(dataset.table(table_id), schema=SCHEMA) + table = client.create_table(table) + + # [START bigquery_add_empty_column] + # from google.cloud import bigquery + # client = bigquery.Client() + # dataset_id = 'my_dataset' + # table_id = 'my_table' + + table_ref = client.dataset(dataset_id).table(table_id) + table = client.get_table(table_ref) # API request + + original_schema = table.schema + new_schema = original_schema[:] # creates a copy of the schema + new_schema.append(bigquery.SchemaField('phone', 'STRING')) + + table.schema = new_schema + table = client.update_table(table, ['schema']) # API request + + assert len(table.schema) == len(original_schema) + 1 == len(new_schema) + # [END bigquery_add_empty_column] + + +@pytest.mark.skip(reason=( + 'update_table() is flaky ' + 'https://github.com/GoogleCloudPlatform/google-cloud-python/issues/5589')) +def test_relax_column(client, to_delete): + """Updates a schema field from required to nullable.""" + dataset_id = 'relax_column_dataset_{}'.format(_millis()) + table_id = 'relax_column_table_{}'.format(_millis()) + dataset = bigquery.Dataset(client.dataset(dataset_id)) + dataset = client.create_dataset(dataset) + to_delete.append(dataset) + + # [START bigquery_relax_column] + # from google.cloud import bigquery + # client = bigquery.Client() + # dataset_id = 'my_dataset' + # table_id = 'my_table' + + original_schema = [ + bigquery.SchemaField('full_name', 'STRING', mode='REQUIRED'), + bigquery.SchemaField('age', 'INTEGER', mode='REQUIRED'), + ] + table_ref = client.dataset(dataset_id).table(table_id) + table = bigquery.Table(table_ref, schema=original_schema) + table = client.create_table(table) + assert all(field.mode == 'REQUIRED' for field in table.schema) + + # SchemaField properties cannot be edited after initialization. + # To make changes, construct new SchemaField objects. + relaxed_schema = [ + bigquery.SchemaField('full_name', 'STRING', mode='NULLABLE'), + bigquery.SchemaField('age', 'INTEGER', mode='NULLABLE'), + ] + table.schema = relaxed_schema + table = client.update_table(table, ['schema']) + + assert all(field.mode == 'NULLABLE' for field in table.schema) + # [END bigquery_relax_column] + + +@pytest.mark.skip(reason=( + 'update_table() is flaky ' + 'https://github.com/GoogleCloudPlatform/google-cloud-python/issues/5589')) +def test_update_table_cmek(client, to_delete): + """Patch a table's metadata.""" + dataset_id = 'update_table_cmek_{}'.format(_millis()) + table_id = 'update_table_cmek_{}'.format(_millis()) + dataset = bigquery.Dataset(client.dataset(dataset_id)) + client.create_dataset(dataset) + to_delete.append(dataset) + + table = bigquery.Table(dataset.table(table_id)) + original_kms_key_name = ( + 'projects/{}/locations/{}/keyRings/{}/cryptoKeys/{}'.format( + 'cloud-samples-tests', 'us-central1', 'test', 'test')) + table.encryption_configuration = bigquery.EncryptionConfiguration( + kms_key_name=original_kms_key_name) + table = client.create_table(table) + + # [START bigquery_update_table_cmek] + # from google.cloud import bigquery + # client = bigquery.Client() + + assert table.encryption_configuration.kms_key_name == original_kms_key_name + + # Set a new encryption key to use for the destination. + # TODO: Replace this key with a key you have created in KMS. + updated_kms_key_name = ( + 'projects/cloud-samples-tests/locations/us-central1/' + 'keyRings/test/cryptoKeys/otherkey') + table.encryption_configuration = bigquery.EncryptionConfiguration( + kms_key_name=updated_kms_key_name) + + table = client.update_table( + table, ['encryption_configuration']) # API request + + assert table.encryption_configuration.kms_key_name == updated_kms_key_name + assert original_kms_key_name != updated_kms_key_name + # [END bigquery_update_table_cmek] + + +def test_browse_table_data(client, to_delete, capsys): + """Retreive selected row data from a table.""" + + # [START bigquery_browse_table] + # from google.cloud import bigquery + # client = bigquery.Client() + + dataset_ref = client.dataset('samples', project='bigquery-public-data') + table_ref = dataset_ref.table('shakespeare') + table = client.get_table(table_ref) # API call + + # Load all rows from a table + rows = client.list_rows(table) + assert len(list(rows)) == table.num_rows + + # Load the first 10 rows + rows = client.list_rows(table, max_results=10) + assert len(list(rows)) == 10 + + # Specify selected fields to limit the results to certain columns + fields = table.schema[:2] # first two columns + rows = client.list_rows(table, selected_fields=fields, max_results=10) + assert len(rows.schema) == 2 + assert len(list(rows)) == 10 + + # Use the start index to load an arbitrary portion of the table + rows = client.list_rows(table, start_index=10, max_results=10) + + # Print row data in tabular format + format_string = '{!s:<16} ' * len(rows.schema) + field_names = [field.name for field in rows.schema] + print(format_string.format(*field_names)) # prints column headers + for row in rows: + print(format_string.format(*row)) # prints row data + # [END bigquery_browse_table] + + out, err = capsys.readouterr() + out = list(filter(bool, out.split('\n'))) # list of non-blank lines + assert len(out) == 11 + + +@pytest.mark.skip(reason=( + 'update_table() is flaky ' + 'https://github.com/GoogleCloudPlatform/google-cloud-python/issues/5589')) +def test_manage_views(client, to_delete): + project = client.project + source_dataset_id = 'source_dataset_{}'.format(_millis()) + source_dataset_ref = client.dataset(source_dataset_id) + source_dataset = bigquery.Dataset(source_dataset_ref) + source_dataset = client.create_dataset(source_dataset) + to_delete.append(source_dataset) + + job_config = bigquery.LoadJobConfig() + job_config.schema = [ + bigquery.SchemaField('name', 'STRING'), + bigquery.SchemaField('post_abbr', 'STRING') + ] + job_config.skip_leading_rows = 1 + uri = 'gs://cloud-samples-data/bigquery/us-states/us-states.csv' + source_table_id = 'us_states' + load_job = client.load_table_from_uri( + uri, source_dataset.table(source_table_id), job_config=job_config) + load_job.result() + + shared_dataset_id = 'shared_dataset_{}'.format(_millis()) + shared_dataset_ref = client.dataset(shared_dataset_id) + shared_dataset = bigquery.Dataset(shared_dataset_ref) + shared_dataset = client.create_dataset(shared_dataset) + to_delete.append(shared_dataset) + + # [START bigquery_create_view] + # from google.cloud import bigquery + # client = bigquery.Client() + # project = 'my-project' + # source_dataset_id = 'my_source_dataset' + # source_table_id = 'us_states' + # shared_dataset_ref = client.dataset('my_shared_dataset') + + # This example shows how to create a shared view of a source table of + # US States. The source table contains all 50 states, while the view will + # contain only states with names starting with 'W'. + view_ref = shared_dataset_ref.table('my_shared_view') + view = bigquery.Table(view_ref) + sql_template = ( + 'SELECT name, post_abbr FROM `{}.{}.{}` WHERE name LIKE "W%"') + view.view_query = sql_template.format( + project, source_dataset_id, source_table_id) + view = client.create_table(view) # API request + + print('Successfully created view at {}'.format(view.full_table_id)) + # [END bigquery_create_view] + + # [START bigquery_update_view_query] + # from google.cloud import bigquery + # client = bigquery.Client() + # project = 'my-project' + # source_dataset_id = 'my_source_dataset' + # source_table_id = 'us_states' + # shared_dataset_ref = client.dataset('my_shared_dataset') + + # This example shows how to update a shared view of a source table of + # US States. The view's query will be updated to contain only states with + # names starting with 'M'. + view_ref = shared_dataset_ref.table('my_shared_view') + view = bigquery.Table(view_ref) + sql_template = ( + 'SELECT name, post_abbr FROM `{}.{}.{}` WHERE name LIKE "M%"') + view.view_query = sql_template.format( + project, source_dataset_id, source_table_id) + view = client.update_table(view, ['view_query']) # API request + # [END bigquery_update_view_query] + + # [START bigquery_get_view] + # from google.cloud import bigquery + # client = bigquery.Client() + # shared_dataset_id = 'my_shared_dataset' + + view_ref = client.dataset(shared_dataset_id).table('my_shared_view') + view = client.get_table(view_ref) # API Request + + # Display view properties + print('View at {}'.format(view.full_table_id)) + print('View Query:\n{}'.format(view.view_query)) + # [END bigquery_get_view] + assert view.view_query is not None + + analyst_group_email = 'example-analyst-group@google.com' + # [START bigquery_grant_view_access] + # from google.cloud import bigquery + # client = bigquery.Client() + + # Assign access controls to the dataset containing the view + # shared_dataset_id = 'my_shared_dataset' + # analyst_group_email = 'data_analysts@example.com' + shared_dataset = client.get_dataset( + client.dataset(shared_dataset_id)) # API request + access_entries = shared_dataset.access_entries + access_entries.append( + bigquery.AccessEntry('READER', 'groupByEmail', analyst_group_email) + ) + shared_dataset.access_entries = access_entries + shared_dataset = client.update_dataset( + shared_dataset, ['access_entries']) # API request + + # Authorize the view to access the source dataset + # project = 'my-project' + # source_dataset_id = 'my_source_dataset' + source_dataset = client.get_dataset( + client.dataset(source_dataset_id)) # API request + view_reference = { + 'projectId': project, + 'datasetId': shared_dataset_id, + 'tableId': 'my_shared_view', + } + access_entries = source_dataset.access_entries + access_entries.append( + bigquery.AccessEntry(None, 'view', view_reference) + ) + source_dataset.access_entries = access_entries + source_dataset = client.update_dataset( + source_dataset, ['access_entries']) # API request + # [END bigquery_grant_view_access] + + +def test_table_insert_rows(client, to_delete): + """Insert / fetch table data.""" + dataset_id = 'table_insert_rows_dataset_{}'.format(_millis()) + table_id = 'table_insert_rows_table_{}'.format(_millis()) + dataset = bigquery.Dataset(client.dataset(dataset_id)) + dataset = client.create_dataset(dataset) + dataset.location = 'US' + to_delete.append(dataset) + + table = bigquery.Table(dataset.table(table_id), schema=SCHEMA) + table = client.create_table(table) + + # [START bigquery_table_insert_rows] + # TODO(developer): Uncomment the lines below and replace with your values. + # from google.cloud import bigquery + # client = bigquery.Client() + # dataset_id = 'my_dataset' # replace with your dataset ID + # For this sample, the table must already exist and have a defined schema + # table_id = 'my_table' # replace with your table ID + # table_ref = client.dataset(dataset_id).table(table_id) + # table = client.get_table(table_ref) # API request + + rows_to_insert = [ + (u'Phred Phlyntstone', 32), + (u'Wylma Phlyntstone', 29), + ] + + errors = client.insert_rows(table, rows_to_insert) # API request + + assert errors == [] + # [END bigquery_table_insert_rows] + + +def test_load_table_from_file(client, to_delete): + """Upload table data from a CSV file.""" + dataset_id = 'table_upload_from_file_dataset_{}'.format(_millis()) + table_id = 'table_upload_from_file_table_{}'.format(_millis()) + dataset = bigquery.Dataset(client.dataset(dataset_id)) + dataset.location = 'US' + client.create_dataset(dataset) + to_delete.append(dataset) + snippets_dir = os.path.abspath(os.path.dirname(__file__)) + filename = os.path.join( + snippets_dir, '..', '..', 'bigquery', 'tests', 'data', 'people.csv') + + # [START bigquery_load_from_file] + # from google.cloud import bigquery + # client = bigquery.Client() + # filename = '/path/to/file.csv' + # dataset_id = 'my_dataset' + # table_id = 'my_table' + + dataset_ref = client.dataset(dataset_id) + table_ref = dataset_ref.table(table_id) + job_config = bigquery.LoadJobConfig() + job_config.source_format = bigquery.SourceFormat.CSV + job_config.skip_leading_rows = 1 + job_config.autodetect = True + + with open(filename, 'rb') as source_file: + job = client.load_table_from_file( + source_file, + table_ref, + location='US', # Must match the destination dataset location. + job_config=job_config) # API request + + job.result() # Waits for table load to complete. + + print('Loaded {} rows into {}:{}.'.format( + job.output_rows, dataset_id, table_id)) + # [END bigquery_load_from_file] + + table = client.get_table(table_ref) + rows = list(client.list_rows(table)) # API request + + assert len(rows) == 2 + # Order is not preserved, so compare individually + row1 = bigquery.Row(('Wylma Phlyntstone', 29), {'full_name': 0, 'age': 1}) + assert row1 in rows + row2 = bigquery.Row(('Phred Phlyntstone', 32), {'full_name': 0, 'age': 1}) + assert row2 in rows + + +def test_load_table_from_uri_csv(client, to_delete, capsys): + dataset_id = 'load_table_dataset_{}'.format(_millis()) + dataset = bigquery.Dataset(client.dataset(dataset_id)) + client.create_dataset(dataset) + to_delete.append(dataset) + + # [START bigquery_load_table_gcs_csv] + # from google.cloud import bigquery + # client = bigquery.Client() + # dataset_id = 'my_dataset' + + dataset_ref = client.dataset(dataset_id) + job_config = bigquery.LoadJobConfig() + job_config.schema = [ + bigquery.SchemaField('name', 'STRING'), + bigquery.SchemaField('post_abbr', 'STRING') + ] + job_config.skip_leading_rows = 1 + # The source format defaults to CSV, so the line below is optional. + job_config.source_format = bigquery.SourceFormat.CSV + uri = 'gs://cloud-samples-data/bigquery/us-states/us-states.csv' + + load_job = client.load_table_from_uri( + uri, + dataset_ref.table('us_states'), + job_config=job_config) # API request + print('Starting job {}'.format(load_job.job_id)) + + load_job.result() # Waits for table load to complete. + print('Job finished.') + + destination_table = client.get_table(dataset_ref.table('us_states')) + print('Loaded {} rows.'.format(destination_table.num_rows)) + # [END bigquery_load_table_gcs_csv] + + out, _ = capsys.readouterr() + assert 'Loaded 50 rows.' in out + + +def test_load_table_from_uri_json(client, to_delete, capsys): + dataset_id = 'load_table_dataset_{}'.format(_millis()) + dataset = bigquery.Dataset(client.dataset(dataset_id)) + dataset.location = 'US' + client.create_dataset(dataset) + to_delete.append(dataset) + + # [START bigquery_load_table_gcs_json] + # from google.cloud import bigquery + # client = bigquery.Client() + # dataset_id = 'my_dataset' + + dataset_ref = client.dataset(dataset_id) + job_config = bigquery.LoadJobConfig() + job_config.schema = [ + bigquery.SchemaField('name', 'STRING'), + bigquery.SchemaField('post_abbr', 'STRING') + ] + job_config.source_format = bigquery.SourceFormat.NEWLINE_DELIMITED_JSON + uri = 'gs://cloud-samples-data/bigquery/us-states/us-states.json' + + load_job = client.load_table_from_uri( + uri, + dataset_ref.table('us_states'), + location='US', # Location must match that of the destination dataset. + job_config=job_config) # API request + print('Starting job {}'.format(load_job.job_id)) + + load_job.result() # Waits for table load to complete. + print('Job finished.') + + destination_table = client.get_table(dataset_ref.table('us_states')) + print('Loaded {} rows.'.format(destination_table.num_rows)) + # [END bigquery_load_table_gcs_json] + + out, _ = capsys.readouterr() + assert 'Loaded 50 rows.' in out + + +def test_load_table_from_uri_cmek(client, to_delete): + dataset_id = 'load_table_from_uri_cmek_{}'.format(_millis()) + dataset = bigquery.Dataset(client.dataset(dataset_id)) + dataset.location = 'US' + client.create_dataset(dataset) + to_delete.append(dataset) + + # [START bigquery_load_table_gcs_json_cmek] + # from google.cloud import bigquery + # client = bigquery.Client() + # dataset_id = 'my_dataset' + + dataset_ref = client.dataset(dataset_id) + job_config = bigquery.LoadJobConfig() + job_config.autodetect = True + job_config.source_format = bigquery.SourceFormat.NEWLINE_DELIMITED_JSON + + # Set the encryption key to use for the destination. + # TODO: Replace this key with a key you have created in KMS. + kms_key_name = 'projects/{}/locations/{}/keyRings/{}/cryptoKeys/{}'.format( + 'cloud-samples-tests', 'us-central1', 'test', 'test') + encryption_config = bigquery.EncryptionConfiguration( + kms_key_name=kms_key_name) + job_config.destination_encryption_configuration = encryption_config + uri = 'gs://cloud-samples-data/bigquery/us-states/us-states.json' + + load_job = client.load_table_from_uri( + uri, + dataset_ref.table('us_states'), + location='US', # Location must match that of the destination dataset. + job_config=job_config) # API request + + assert load_job.job_type == 'load' + + load_job.result() # Waits for table load to complete. + + assert load_job.state == 'DONE' + table = client.get_table(dataset_ref.table('us_states')) + assert table.encryption_configuration.kms_key_name == kms_key_name + # [END bigquery_load_table_gcs_json_cmek] + + +def test_load_table_from_uri_parquet(client, to_delete, capsys): + dataset_id = 'load_table_dataset_{}'.format(_millis()) + dataset = bigquery.Dataset(client.dataset(dataset_id)) + client.create_dataset(dataset) + to_delete.append(dataset) + + # [START bigquery_load_table_gcs_parquet] + # from google.cloud import bigquery + # client = bigquery.Client() + # dataset_id = 'my_dataset' + + dataset_ref = client.dataset(dataset_id) + job_config = bigquery.LoadJobConfig() + job_config.source_format = bigquery.SourceFormat.PARQUET + uri = 'gs://cloud-samples-data/bigquery/us-states/us-states.parquet' + + load_job = client.load_table_from_uri( + uri, + dataset_ref.table('us_states'), + job_config=job_config) # API request + print('Starting job {}'.format(load_job.job_id)) + + load_job.result() # Waits for table load to complete. + print('Job finished.') + + destination_table = client.get_table(dataset_ref.table('us_states')) + print('Loaded {} rows.'.format(destination_table.num_rows)) + # [END bigquery_load_table_gcs_parquet] + + out, _ = capsys.readouterr() + assert 'Loaded 50 rows.' in out + + +def test_load_table_from_uri_orc(client, to_delete, capsys): + dataset_id = 'load_table_dataset_{}'.format(_millis()) + dataset = bigquery.Dataset(client.dataset(dataset_id)) + client.create_dataset(dataset) + to_delete.append(dataset) + + # [START bigquery_load_table_gcs_orc] + # from google.cloud import bigquery + # client = bigquery.Client() + # dataset_id = 'my_dataset' + + dataset_ref = client.dataset(dataset_id) + job_config = bigquery.LoadJobConfig() + job_config.source_format = bigquery.SourceFormat.ORC + uri = 'gs://cloud-samples-data/bigquery/us-states/us-states.orc' + + load_job = client.load_table_from_uri( + uri, + dataset_ref.table('us_states'), + job_config=job_config) # API request + print('Starting job {}'.format(load_job.job_id)) + + load_job.result() # Waits for table load to complete. + print('Job finished.') + + destination_table = client.get_table(dataset_ref.table('us_states')) + print('Loaded {} rows.'.format(destination_table.num_rows)) + # [END bigquery_load_table_gcs_orc] + + out, _ = capsys.readouterr() + assert 'Loaded 50 rows.' in out + + +def test_load_table_from_uri_autodetect(client, to_delete, capsys): + """Load table from a GCS URI using various formats and auto-detected schema + + Each file format has its own tested load from URI sample. Because most of + the code is common for autodetect, append, and truncate, this sample + includes snippets for all supported formats but only calls a single load + job. + + This code snippet is made up of shared code, then format-specific code, + followed by more shared code. Note that only the last format in the + format-specific code section will be tested in this test. + """ + dataset_id = 'load_table_dataset_{}'.format(_millis()) + dataset = bigquery.Dataset(client.dataset(dataset_id)) + client.create_dataset(dataset) + to_delete.append(dataset) + + # Shared code + # [START bigquery_load_table_gcs_csv_autodetect] + # [START bigquery_load_table_gcs_json_autodetect] + # from google.cloud import bigquery + # client = bigquery.Client() + # dataset_id = 'my_dataset' + + dataset_ref = client.dataset(dataset_id) + job_config = bigquery.LoadJobConfig() + job_config.autodetect = True + # [END bigquery_load_table_gcs_csv_autodetect] + # [END bigquery_load_table_gcs_json_autodetect] + + # Format-specific code + # [START bigquery_load_table_gcs_csv_autodetect] + job_config.skip_leading_rows = 1 + # The source format defaults to CSV, so the line below is optional. + job_config.source_format = bigquery.SourceFormat.CSV + uri = 'gs://cloud-samples-data/bigquery/us-states/us-states.csv' + # [END bigquery_load_table_gcs_csv_autodetect] + # unset csv-specific attribute + del job_config._properties['load']['skipLeadingRows'] + + # [START bigquery_load_table_gcs_json_autodetect] + job_config.source_format = bigquery.SourceFormat.NEWLINE_DELIMITED_JSON + uri = 'gs://cloud-samples-data/bigquery/us-states/us-states.json' + # [END bigquery_load_table_gcs_json_autodetect] + + # Shared code + # [START bigquery_load_table_gcs_csv_autodetect] + # [START bigquery_load_table_gcs_json_autodetect] + load_job = client.load_table_from_uri( + uri, + dataset_ref.table('us_states'), + job_config=job_config) # API request + print('Starting job {}'.format(load_job.job_id)) + + load_job.result() # Waits for table load to complete. + print('Job finished.') + + destination_table = client.get_table(dataset_ref.table('us_states')) + print('Loaded {} rows.'.format(destination_table.num_rows)) + # [END bigquery_load_table_gcs_csv_autodetect] + # [END bigquery_load_table_gcs_json_autodetect] + + out, _ = capsys.readouterr() + assert 'Loaded 50 rows.' in out + + +def test_load_table_from_uri_append(client, to_delete, capsys): + """Appends data to a table from a GCS URI using various formats + + Each file format has its own tested load from URI sample. Because most of + the code is common for autodetect, append, and truncate, this sample + includes snippets for all supported formats but only calls a single load + job. + + This code snippet is made up of shared code, then format-specific code, + followed by more shared code. Note that only the last format in the + format-specific code section will be tested in this test. + """ + dataset_id = 'load_table_dataset_{}'.format(_millis()) + dataset = bigquery.Dataset(client.dataset(dataset_id)) + client.create_dataset(dataset) + to_delete.append(dataset) + + job_config = bigquery.LoadJobConfig() + job_config.schema = [ + bigquery.SchemaField('name', 'STRING'), + bigquery.SchemaField('post_abbr', 'STRING') + ] + table_ref = dataset.table('us_states') + body = six.BytesIO(b'Washington,WA') + client.load_table_from_file( + body, table_ref, job_config=job_config).result() + + # SHared code + # [START bigquery_load_table_gcs_csv_append] + # [START bigquery_load_table_gcs_json_append] + # from google.cloud import bigquery + # client = bigquery.Client() + # table_ref = client.dataset('my_dataset').table('existing_table') + + previous_rows = client.get_table(table_ref).num_rows + assert previous_rows > 0 + + job_config = bigquery.LoadJobConfig() + job_config.write_disposition = bigquery.WriteDisposition.WRITE_APPEND + # [END bigquery_load_table_gcs_csv_append] + # [END bigquery_load_table_gcs_json_append] + + # Format-specific code + # [START bigquery_load_table_gcs_csv_append] + job_config.skip_leading_rows = 1 + # The source format defaults to CSV, so the line below is optional. + job_config.source_format = bigquery.SourceFormat.CSV + uri = 'gs://cloud-samples-data/bigquery/us-states/us-states.csv' + # [END bigquery_load_table_gcs_csv_append] + # unset csv-specific attribute + del job_config._properties['load']['skipLeadingRows'] + + # [START bigquery_load_table_gcs_json_append] + job_config.source_format = bigquery.SourceFormat.NEWLINE_DELIMITED_JSON + uri = 'gs://cloud-samples-data/bigquery/us-states/us-states.json' + # [END bigquery_load_table_gcs_json_append] + + # Shared code + # [START bigquery_load_table_gcs_csv_append] + # [START bigquery_load_table_gcs_json_append] + load_job = client.load_table_from_uri( + uri, + table_ref, + job_config=job_config) # API request + print('Starting job {}'.format(load_job.job_id)) + + load_job.result() # Waits for table load to complete. + print('Job finished.') + + destination_table = client.get_table(table_ref) + print('Loaded {} rows.'.format(destination_table.num_rows - previous_rows)) + # [END bigquery_load_table_gcs_csv_append] + # [END bigquery_load_table_gcs_json_append] + + out, _ = capsys.readouterr() + assert previous_rows == 1 + assert 'Loaded 50 rows.' in out + + +def test_load_table_from_uri_truncate(client, to_delete, capsys): + """Replaces table data with data from a GCS URI using various formats + + Each file format has its own tested load from URI sample. Because most of + the code is common for autodetect, append, and truncate, this sample + includes snippets for all supported formats but only calls a single load + job. + + This code snippet is made up of shared code, then format-specific code, + followed by more shared code. Note that only the last format in the + format-specific code section will be tested in this test. + """ + dataset_id = 'load_table_dataset_{}'.format(_millis()) + dataset = bigquery.Dataset(client.dataset(dataset_id)) + client.create_dataset(dataset) + to_delete.append(dataset) + + job_config = bigquery.LoadJobConfig() + job_config.schema = [ + bigquery.SchemaField('name', 'STRING'), + bigquery.SchemaField('post_abbr', 'STRING') + ] + table_ref = dataset.table('us_states') + body = six.BytesIO(b'Washington,WA') + client.load_table_from_file( + body, table_ref, job_config=job_config).result() + + # Shared code + # [START bigquery_load_table_gcs_csv_truncate] + # [START bigquery_load_table_gcs_json_truncate] + # [START bigquery_load_table_gcs_parquet_truncate] + # [START bigquery_load_table_gcs_orc_truncate] + # from google.cloud import bigquery + # client = bigquery.Client() + # table_ref = client.dataset('my_dataset').table('existing_table') + + previous_rows = client.get_table(table_ref).num_rows + assert previous_rows > 0 + + job_config = bigquery.LoadJobConfig() + job_config.write_disposition = bigquery.WriteDisposition.WRITE_TRUNCATE + # [END bigquery_load_table_gcs_csv_truncate] + # [END bigquery_load_table_gcs_json_truncate] + # [END bigquery_load_table_gcs_parquet_truncate] + # [END bigquery_load_table_gcs_orc_truncate] + + # Format-specific code + # [START bigquery_load_table_gcs_csv_truncate] + job_config.skip_leading_rows = 1 + # The source format defaults to CSV, so the line below is optional. + job_config.source_format = bigquery.SourceFormat.CSV + uri = 'gs://cloud-samples-data/bigquery/us-states/us-states.csv' + # [END bigquery_load_table_gcs_csv_truncate] + # unset csv-specific attribute + del job_config._properties['load']['skipLeadingRows'] + + # [START bigquery_load_table_gcs_json_truncate] + job_config.source_format = bigquery.SourceFormat.NEWLINE_DELIMITED_JSON + uri = 'gs://cloud-samples-data/bigquery/us-states/us-states.json' + # [END bigquery_load_table_gcs_json_truncate] + + # [START bigquery_load_table_gcs_parquet_truncate] + job_config.source_format = bigquery.SourceFormat.PARQUET + uri = 'gs://cloud-samples-data/bigquery/us-states/us-states.parquet' + # [END bigquery_load_table_gcs_parquet_truncate] + + # [START bigquery_load_table_gcs_orc_truncate] + job_config.source_format = bigquery.SourceFormat.ORC + uri = 'gs://cloud-samples-data/bigquery/us-states/us-states.orc' + # [END bigquery_load_table_gcs_orc_truncate] + + # Shared code + # [START bigquery_load_table_gcs_csv_truncate] + # [START bigquery_load_table_gcs_json_truncate] + # [START bigquery_load_table_gcs_parquet_truncate] + # [START bigquery_load_table_gcs_orc_truncate] + load_job = client.load_table_from_uri( + uri, + table_ref, + job_config=job_config) # API request + print('Starting job {}'.format(load_job.job_id)) + + load_job.result() # Waits for table load to complete. + print('Job finished.') + + destination_table = client.get_table(table_ref) + print('Loaded {} rows.'.format(destination_table.num_rows)) + # [END bigquery_load_table_gcs_csv_truncate] + # [END bigquery_load_table_gcs_json_truncate] + # [END bigquery_load_table_gcs_parquet_truncate] + # [END bigquery_load_table_gcs_orc_truncate] + + out, _ = capsys.readouterr() + assert 'Loaded 50 rows.' in out + + +def test_load_table_add_column(client, to_delete): + dataset_id = 'load_table_add_column_{}'.format(_millis()) + dataset_ref = client.dataset(dataset_id) + dataset = bigquery.Dataset(dataset_ref) + dataset.location = 'US' + dataset = client.create_dataset(dataset) + to_delete.append(dataset) + + snippets_dir = os.path.abspath(os.path.dirname(__file__)) + filepath = os.path.join( + snippets_dir, '..', '..', 'bigquery', 'tests', 'data', 'people.csv') + table_ref = dataset_ref.table('my_table') + old_schema = [ + bigquery.SchemaField('full_name', 'STRING', mode='REQUIRED'), + ] + table = client.create_table(bigquery.Table(table_ref, schema=old_schema)) + + # [START bigquery_add_column_load_append] + # from google.cloud import bigquery + # client = bigquery.Client() + # dataset_ref = client.dataset('my_dataset') + # filepath = 'path/to/your_file.csv' + + # Retrieves the destination table and checks the length of the schema + table_id = 'my_table' + table_ref = dataset_ref.table(table_id) + table = client.get_table(table_ref) + print("Table {} contains {} columns.".format(table_id, len(table.schema))) + + # Configures the load job to append the data to the destination table, + # allowing field addition + job_config = bigquery.LoadJobConfig() + job_config.write_disposition = bigquery.WriteDisposition.WRITE_APPEND + job_config.schema_update_options = [ + bigquery.SchemaUpdateOption.ALLOW_FIELD_ADDITION, + ] + # In this example, the existing table contains only the 'full_name' column. + # 'REQUIRED' fields cannot be added to an existing schema, so the + # additional column must be 'NULLABLE'. + job_config.schema = [ + bigquery.SchemaField('full_name', 'STRING', mode='REQUIRED'), + bigquery.SchemaField('age', 'INTEGER', mode='NULLABLE'), + ] + job_config.source_format = bigquery.SourceFormat.CSV + job_config.skip_leading_rows = 1 + + with open(filepath, 'rb') as source_file: + job = client.load_table_from_file( + source_file, + table_ref, + location='US', # Must match the destination dataset location. + job_config=job_config) # API request + + job.result() # Waits for table load to complete. + print('Loaded {} rows into {}:{}.'.format( + job.output_rows, dataset_id, table_ref.table_id)) + + # Checks the updated length of the schema + table = client.get_table(table) + print("Table {} now contains {} columns.".format( + table_id, len(table.schema))) + # [END bigquery_add_column_load_append] + assert len(table.schema) == 2 + assert table.num_rows > 0 + + +def test_load_table_relax_column(client, to_delete): + dataset_id = 'load_table_relax_column_{}'.format(_millis()) + dataset_ref = client.dataset(dataset_id) + dataset = bigquery.Dataset(dataset_ref) + dataset.location = 'US' + dataset = client.create_dataset(dataset) + to_delete.append(dataset) + + snippets_dir = os.path.abspath(os.path.dirname(__file__)) + filepath = os.path.join( + snippets_dir, '..', '..', 'bigquery', 'tests', 'data', 'people.csv') + table_ref = dataset_ref.table('my_table') + old_schema = [ + bigquery.SchemaField('full_name', 'STRING', mode='REQUIRED'), + bigquery.SchemaField('age', 'INTEGER', mode='REQUIRED'), + bigquery.SchemaField('favorite_color', 'STRING', mode='REQUIRED'), + ] + table = client.create_table(bigquery.Table(table_ref, schema=old_schema)) + + # [START bigquery_relax_column_load_append] + # from google.cloud import bigquery + # client = bigquery.Client() + # dataset_ref = client.dataset('my_dataset') + # filepath = 'path/to/your_file.csv' + + # Retrieves the destination table and checks the number of required fields + table_id = 'my_table' + table_ref = dataset_ref.table(table_id) + table = client.get_table(table_ref) + original_required_fields = sum( + field.mode == 'REQUIRED' for field in table.schema) + # In this example, the existing table has 3 required fields. + print("{} fields in the schema are required.".format( + original_required_fields)) + + # Configures the load job to append the data to a destination table, + # allowing field relaxation + job_config = bigquery.LoadJobConfig() + job_config.write_disposition = bigquery.WriteDisposition.WRITE_APPEND + job_config.schema_update_options = [ + bigquery.SchemaUpdateOption.ALLOW_FIELD_RELAXATION, + ] + # In this example, the existing table contains three required fields + # ('full_name', 'age', and 'favorite_color'), while the data to load + # contains only the first two fields. + job_config.schema = [ + bigquery.SchemaField('full_name', 'STRING', mode='REQUIRED'), + bigquery.SchemaField('age', 'INTEGER', mode='REQUIRED'), + ] + job_config.source_format = bigquery.SourceFormat.CSV + job_config.skip_leading_rows = 1 + + with open(filepath, 'rb') as source_file: + job = client.load_table_from_file( + source_file, + table_ref, + location='US', # Must match the destination dataset location. + job_config=job_config) # API request + + job.result() # Waits for table load to complete. + print('Loaded {} rows into {}:{}.'.format( + job.output_rows, dataset_id, table_ref.table_id)) + + # Checks the updated number of required fields + table = client.get_table(table) + current_required_fields = sum( + field.mode == 'REQUIRED' for field in table.schema) + print("{} fields in the schema are now required.".format( + current_required_fields)) + # [END bigquery_relax_column_load_append] + assert original_required_fields - current_required_fields == 1 + assert len(table.schema) == 3 + assert table.schema[2].mode == 'NULLABLE' + assert table.num_rows > 0 + + +def test_copy_table(client, to_delete): + dataset_id = 'copy_table_dataset_{}'.format(_millis()) + dest_dataset = bigquery.Dataset(client.dataset(dataset_id)) + dest_dataset.location = 'US' + dest_dataset = client.create_dataset(dest_dataset) + to_delete.append(dest_dataset) + + # [START bigquery_copy_table] + # from google.cloud import bigquery + # client = bigquery.Client() + + source_dataset = client.dataset('samples', project='bigquery-public-data') + source_table_ref = source_dataset.table('shakespeare') + + # dataset_id = 'my_dataset' + dest_table_ref = client.dataset(dataset_id).table('destination_table') + + job = client.copy_table( + source_table_ref, + dest_table_ref, + # Location must match that of the source and destination tables. + location='US') # API request + + job.result() # Waits for job to complete. + + assert job.state == 'DONE' + dest_table = client.get_table(dest_table_ref) # API request + assert dest_table.num_rows > 0 + # [END bigquery_copy_table] + + +def test_copy_table_multiple_source(client, to_delete): + dest_dataset_id = 'dest_dataset_{}'.format(_millis()) + dest_dataset = bigquery.Dataset(client.dataset(dest_dataset_id)) + dest_dataset.location = 'US' + dest_dataset = client.create_dataset(dest_dataset) + to_delete.append(dest_dataset) + + source_dataset_id = 'source_dataset_{}'.format(_millis()) + source_dataset = bigquery.Dataset(client.dataset(source_dataset_id)) + source_dataset.location = 'US' + source_dataset = client.create_dataset(source_dataset) + to_delete.append(source_dataset) + + schema = [ + bigquery.SchemaField('name', 'STRING'), + bigquery.SchemaField('post_abbr', 'STRING') + ] + + table_data = {'table1': b'Washington,WA', 'table2': b'California,CA'} + for table_id, data in table_data.items(): + table_ref = source_dataset.table(table_id) + job_config = bigquery.LoadJobConfig() + job_config.schema = schema + body = six.BytesIO(data) + client.load_table_from_file( + body, + table_ref, + # Location must match that of the destination dataset. + location='US', + job_config=job_config).result() + + # [START bigquery_copy_table_multiple_source] + # from google.cloud import bigquery + # client = bigquery.Client() + # source_dataset_id = 'my_source_dataset' + # dest_dataset_id = 'my_destination_dataset' + + table1_ref = client.dataset(source_dataset_id).table('table1') + table2_ref = client.dataset(source_dataset_id).table('table2') + dest_table_ref = client.dataset(dest_dataset_id).table('destination_table') + + job = client.copy_table( + [table1_ref, table2_ref], + dest_table_ref, + # Location must match that of the source and destination tables. + location='US') # API request + job.result() # Waits for job to complete. + + assert job.state == 'DONE' + dest_table = client.get_table(dest_table_ref) # API request + assert dest_table.num_rows > 0 + # [END bigquery_copy_table_multiple_source] + + assert dest_table.num_rows == 2 + + +def test_copy_table_cmek(client, to_delete): + dataset_id = 'copy_table_cmek_{}'.format(_millis()) + dest_dataset = bigquery.Dataset(client.dataset(dataset_id)) + dest_dataset.location = 'US' + dest_dataset = client.create_dataset(dest_dataset) + to_delete.append(dest_dataset) + + # [START bigquery_copy_table_cmek] + # from google.cloud import bigquery + # client = bigquery.Client() + + source_dataset = bigquery.DatasetReference( + 'bigquery-public-data', 'samples') + source_table_ref = source_dataset.table('shakespeare') + + # dataset_id = 'my_dataset' + dest_dataset_ref = client.dataset(dataset_id) + dest_table_ref = dest_dataset_ref.table('destination_table') + + # Set the encryption key to use for the destination. + # TODO: Replace this key with a key you have created in KMS. + kms_key_name = 'projects/{}/locations/{}/keyRings/{}/cryptoKeys/{}'.format( + 'cloud-samples-tests', 'us-central1', 'test', 'test') + encryption_config = bigquery.EncryptionConfiguration( + kms_key_name=kms_key_name) + job_config = bigquery.CopyJobConfig() + job_config.destination_encryption_configuration = encryption_config + + job = client.copy_table( + source_table_ref, + dest_table_ref, + # Location must match that of the source and destination tables. + location='US', + job_config=job_config) # API request + job.result() # Waits for job to complete. + + assert job.state == 'DONE' + dest_table = client.get_table(dest_table_ref) + assert dest_table.encryption_configuration.kms_key_name == kms_key_name + # [END bigquery_copy_table_cmek] + + +def test_extract_table(client, to_delete): + from google.cloud import storage + + bucket_name = 'extract_shakespeare_{}'.format(_millis()) + storage_client = storage.Client() + bucket = retry_429(storage_client.create_bucket)(bucket_name) + to_delete.append(bucket) + + # [START bigquery_extract_table] + # from google.cloud import bigquery + # client = bigquery.Client() + # bucket_name = 'my-bucket' + project = 'bigquery-public-data' + dataset_id = 'samples' + table_id = 'shakespeare' + + destination_uri = 'gs://{}/{}'.format(bucket_name, 'shakespeare.csv') + dataset_ref = client.dataset(dataset_id, project=project) + table_ref = dataset_ref.table(table_id) + + extract_job = client.extract_table( + table_ref, + destination_uri, + # Location must match that of the source table. + location='US') # API request + extract_job.result() # Waits for job to complete. + + print('Exported {}:{}.{} to {}'.format( + project, dataset_id, table_id, destination_uri)) + # [END bigquery_extract_table] + + blob = bucket.get_blob('shakespeare.csv') + assert blob.exists + assert blob.size > 0 + to_delete.insert(0, blob) + + +def test_extract_table_json(client, to_delete): + from google.cloud import storage + + bucket_name = 'extract_shakespeare_json_{}'.format(_millis()) + storage_client = storage.Client() + bucket = retry_429(storage_client.create_bucket)(bucket_name) + to_delete.append(bucket) + + # [START bigquery_extract_table_json] + # from google.cloud import bigquery + # client = bigquery.Client() + # bucket_name = 'my-bucket' + + destination_uri = 'gs://{}/{}'.format(bucket_name, 'shakespeare.json') + dataset_ref = client.dataset('samples', project='bigquery-public-data') + table_ref = dataset_ref.table('shakespeare') + job_config = bigquery.job.ExtractJobConfig() + job_config.destination_format = ( + bigquery.DestinationFormat.NEWLINE_DELIMITED_JSON) + + extract_job = client.extract_table( + table_ref, + destination_uri, + job_config=job_config, + # Location must match that of the source table. + location='US') # API request + extract_job.result() # Waits for job to complete. + # [END bigquery_extract_table_json] + + blob = bucket.get_blob('shakespeare.json') + assert blob.exists + assert blob.size > 0 + to_delete.insert(0, blob) + + +def test_extract_table_compressed(client, to_delete): + from google.cloud import storage + + bucket_name = 'extract_shakespeare_compress_{}'.format(_millis()) + storage_client = storage.Client() + bucket = retry_429(storage_client.create_bucket)(bucket_name) + to_delete.append(bucket) + + # [START bigquery_extract_table_compressed] + # from google.cloud import bigquery + # client = bigquery.Client() + # bucket_name = 'my-bucket' + + destination_uri = 'gs://{}/{}'.format(bucket_name, 'shakespeare.csv.gz') + dataset_ref = client.dataset('samples', project='bigquery-public-data') + table_ref = dataset_ref.table('shakespeare') + job_config = bigquery.job.ExtractJobConfig() + job_config.compression = bigquery.Compression.GZIP + + extract_job = client.extract_table( + table_ref, + destination_uri, + # Location must match that of the source table. + location='US', + job_config=job_config) # API request + extract_job.result() # Waits for job to complete. + # [END bigquery_extract_table_compressed] + + blob = bucket.get_blob('shakespeare.csv.gz') + assert blob.exists + assert blob.size > 0 + to_delete.insert(0, blob) + + +def test_delete_table(client, to_delete): + """Delete a table.""" + from google.cloud.exceptions import NotFound + + dataset_id = 'delete_table_dataset_{}'.format(_millis()) + table_id = 'delete_table_table_{}'.format(_millis()) + dataset_ref = client.dataset(dataset_id) + dataset = bigquery.Dataset(dataset_ref) + dataset.location = 'US' + dataset = client.create_dataset(dataset) + to_delete.append(dataset) + + table_ref = dataset.table(table_id) + table = bigquery.Table(table_ref, schema=SCHEMA) + client.create_table(table) + # [START bigquery_delete_table] + # from google.cloud import bigquery + # client = bigquery.Client() + # dataset_id = 'my_dataset' + # table_id = 'my_table' + + table_ref = client.dataset(dataset_id).table(table_id) + client.delete_table(table_ref) # API request + + print('Table {}:{} deleted.'.format(dataset_id, table_id)) + # [END bigquery_delete_table] + + with pytest.raises(NotFound): + client.get_table(table) # API request + + +def test_undelete_table(client, to_delete): + dataset_id = 'undelete_table_dataset_{}'.format(_millis()) + table_id = 'undelete_table_table_{}'.format(_millis()) + dataset = bigquery.Dataset(client.dataset(dataset_id)) + dataset.location = 'US' + dataset = client.create_dataset(dataset) + to_delete.append(dataset) + + table = bigquery.Table(dataset.table(table_id), schema=SCHEMA) + client.create_table(table) + + # [START bigquery_undelete_table] + # TODO(developer): Uncomment the lines below and replace with your values. + # import time + # from google.cloud import bigquery + # client = bigquery.Client() + # dataset_id = 'my_dataset' # Replace with your dataset ID. + # table_id = 'my_table' # Replace with your table ID. + + table_ref = client.dataset(dataset_id).table(table_id) + + # TODO(developer): Choose an appropriate snapshot point as epoch + # milliseconds. For this example, we choose the current time as we're about + # to delete the table immediately afterwards. + snapshot_epoch = int(time.time() * 1000) + # [END bigquery_undelete_table] + + # Due to very short lifecycle of the table, ensure we're not picking a time + # prior to the table creation due to time drift between backend and client. + table = client.get_table(table_ref) + created_epoch = datetime_helpers.to_microseconds(table.created) + if created_epoch > snapshot_epoch: + snapshot_epoch = created_epoch + + # [START bigquery_undelete_table] + + # "Accidentally" delete the table. + client.delete_table(table_ref) # API request + + # Construct the restore-from table ID using a snapshot decorator. + snapshot_table_id = '{}@{}'.format(table_id, snapshot_epoch) + source_table_ref = client.dataset(dataset_id).table(snapshot_table_id) + + # Choose a new table ID for the recovered table data. + recovered_table_id = '{}_recovered'.format(table_id) + dest_table_ref = client.dataset(dataset_id).table(recovered_table_id) + + # Construct and run a copy job. + job = client.copy_table( + source_table_ref, + dest_table_ref, + # Location must match that of the source and destination tables. + location='US') # API request + + job.result() # Waits for job to complete. + + print('Copied data from deleted table {} to {}'.format( + table_id, recovered_table_id)) + # [END bigquery_undelete_table] + + +def test_client_query(client): + """Run a simple query.""" + + # [START bigquery_query] + # from google.cloud import bigquery + # client = bigquery.Client() + + query = ( + 'SELECT name FROM `bigquery-public-data.usa_names.usa_1910_2013` ' + 'WHERE state = "TX" ' + 'LIMIT 100') + query_job = client.query( + query, + # Location must match that of the dataset(s) referenced in the query. + location='US') # API request - starts the query + + for row in query_job: # API request - fetches results + # Row values can be accessed by field name or index + assert row[0] == row.name == row['name'] + print(row) + # [END bigquery_query] + + +def test_client_query_legacy_sql(client): + """Run a query with Legacy SQL explicitly set""" + # [START bigquery_query_legacy] + # from google.cloud import bigquery + # client = bigquery.Client() + + query = ( + 'SELECT name FROM [bigquery-public-data:usa_names.usa_1910_2013] ' + 'WHERE state = "TX" ' + 'LIMIT 100') + + # Set use_legacy_sql to True to use legacy SQL syntax. + job_config = bigquery.QueryJobConfig() + job_config.use_legacy_sql = True + + query_job = client.query( + query, + # Location must match that of the dataset(s) referenced in the query. + location='US', + job_config=job_config) # API request - starts the query + + # Print the results. + for row in query_job: # API request - fetches results + print(row) + # [END bigquery_query_legacy] + + +def test_manage_job(client): + sql = """ + SELECT corpus + FROM `bigquery-public-data.samples.shakespeare` + GROUP BY corpus; + """ + location = 'us' + job = client.query(sql, location=location) + job_id = job.job_id + + # [START bigquery_cancel_job] + # TODO(developer): Uncomment the lines below and replace with your values. + # from google.cloud import bigquery + # client = bigquery.Client() + # job_id = 'bq-job-123x456-123y123z123c' # replace with your job ID + # location = 'us' # replace with your location + + job = client.cancel_job(job_id, location=location) + # [END bigquery_cancel_job] + + # [START bigquery_get_job] + # TODO(developer): Uncomment the lines below and replace with your values. + # from google.cloud import bigquery + # client = bigquery.Client() + # job_id = 'bq-job-123x456-123y123z123c' # replace with your job ID + # location = 'us' # replace with your location + + job = client.get_job(job_id, location=location) # API request + + # Print selected job properties + print('Details for job {} running in {}:'.format(job_id, location)) + print('\tType: {}\n\tState: {}\n\tCreated: {}'.format( + job.job_type, job.state, job.created)) + # [END bigquery_get_job] + + +def test_client_query_destination_table(client, to_delete): + """Run a query""" + dataset_id = 'query_destination_table_{}'.format(_millis()) + dataset_ref = client.dataset(dataset_id) + to_delete.append(dataset_ref) + dataset = bigquery.Dataset(dataset_ref) + dataset.location = 'US' + client.create_dataset(dataset) + + # [START bigquery_query_destination_table] + # from google.cloud import bigquery + # client = bigquery.Client() + # dataset_id = 'your_dataset_id' + + job_config = bigquery.QueryJobConfig() + # Set the destination table + table_ref = client.dataset(dataset_id).table('your_table_id') + job_config.destination = table_ref + sql = """ + SELECT corpus + FROM `bigquery-public-data.samples.shakespeare` + GROUP BY corpus; + """ + + # Start the query, passing in the extra configuration. + query_job = client.query( + sql, + # Location must match that of the dataset(s) referenced in the query + # and of the destination table. + location='US', + job_config=job_config) # API request - starts the query + + query_job.result() # Waits for the query to finish + print('Query results loaded to table {}'.format(table_ref.path)) + # [END bigquery_query_destination_table] + + +def test_client_query_destination_table_legacy(client, to_delete): + dataset_id = 'query_destination_table_legacy_{}'.format(_millis()) + dataset_ref = client.dataset(dataset_id) + to_delete.append(dataset_ref) + dataset = bigquery.Dataset(dataset_ref) + dataset.location = 'US' + client.create_dataset(dataset) + + # [START bigquery_query_legacy_large_results] + # from google.cloud import bigquery + # client = bigquery.Client() + # dataset_id = 'your_dataset_id' + + job_config = bigquery.QueryJobConfig() + # Set use_legacy_sql to True to use legacy SQL syntax. + job_config.use_legacy_sql = True + # Set the destination table + table_ref = client.dataset(dataset_id).table('your_table_id') + job_config.destination = table_ref + job_config.allow_large_results = True + sql = """ + SELECT corpus + FROM [bigquery-public-data:samples.shakespeare] + GROUP BY corpus; + """ + # Start the query, passing in the extra configuration. + query_job = client.query( + sql, + # Location must match that of the dataset(s) referenced in the query + # and of the destination table. + location='US', + job_config=job_config) # API request - starts the query + + query_job.result() # Waits for the query to finish + print('Query results loaded to table {}'.format(table_ref.path)) + # [END bigquery_query_legacy_large_results] + + +def test_client_query_destination_table_cmek(client, to_delete): + """Run a query""" + dataset_id = 'query_destination_table_{}'.format(_millis()) + dataset_ref = client.dataset(dataset_id) + to_delete.append(dataset_ref) + dataset = bigquery.Dataset(dataset_ref) + dataset.location = 'US' + client.create_dataset(dataset) + + # [START bigquery_query_destination_table_cmek] + # from google.cloud import bigquery + # client = bigquery.Client() + + job_config = bigquery.QueryJobConfig() + + # Set the destination table. Here, dataset_id is a string, such as: + # dataset_id = 'your_dataset_id' + table_ref = client.dataset(dataset_id).table('your_table_id') + job_config.destination = table_ref + + # Set the encryption key to use for the destination. + # TODO: Replace this key with a key you have created in KMS. + kms_key_name = 'projects/{}/locations/{}/keyRings/{}/cryptoKeys/{}'.format( + 'cloud-samples-tests', 'us-central1', 'test', 'test') + encryption_config = bigquery.EncryptionConfiguration( + kms_key_name=kms_key_name) + job_config.destination_encryption_configuration = encryption_config + + # Start the query, passing in the extra configuration. + query_job = client.query( + 'SELECT 17 AS my_col;', + # Location must match that of the dataset(s) referenced in the query + # and of the destination table. + location='US', + job_config=job_config) # API request - starts the query + query_job.result() + + # The destination table is written using the encryption configuration. + table = client.get_table(table_ref) + assert table.encryption_configuration.kms_key_name == kms_key_name + # [END bigquery_query_destination_table_cmek] + + +def test_client_query_batch(client, to_delete): + # [START bigquery_query_batch] + # from google.cloud import bigquery + # client = bigquery.Client() + + job_config = bigquery.QueryJobConfig() + # Run at batch priority, which won't count toward concurrent rate limit. + job_config.priority = bigquery.QueryPriority.BATCH + sql = """ + SELECT corpus + FROM `bigquery-public-data.samples.shakespeare` + GROUP BY corpus; + """ + # Location must match that of the dataset(s) referenced in the query. + location = 'US' + + # API request - starts the query + query_job = client.query(sql, location=location, job_config=job_config) + + # Check on the progress by getting the job's updated state. Once the state + # is `DONE`, the results are ready. + query_job = client.get_job( + query_job.job_id, location=location) # API request - fetches job + print('Job {} is currently in state {}'.format( + query_job.job_id, query_job.state)) + # [END bigquery_query_batch] + + +def test_client_query_relax_column(client, to_delete): + dataset_id = 'query_relax_column_{}'.format(_millis()) + dataset_ref = client.dataset(dataset_id) + dataset = bigquery.Dataset(dataset_ref) + dataset.location = 'US' + dataset = client.create_dataset(dataset) + to_delete.append(dataset) + + table_ref = dataset_ref.table('my_table') + schema = [ + bigquery.SchemaField('full_name', 'STRING', mode='REQUIRED'), + bigquery.SchemaField('age', 'INTEGER', mode='REQUIRED'), + ] + table = client.create_table( + bigquery.Table(table_ref, schema=schema)) + + # [START bigquery_relax_column_query_append] + # from google.cloud import bigquery + # client = bigquery.Client() + # dataset_ref = client.dataset('my_dataset') + + # Retrieves the destination table and checks the number of required fields + table_id = 'my_table' + table_ref = dataset_ref.table(table_id) + table = client.get_table(table_ref) + original_required_fields = sum( + field.mode == 'REQUIRED' for field in table.schema) + # In this example, the existing table has 2 required fields + print("{} fields in the schema are required.".format( + original_required_fields)) + + # Configures the query to append the results to a destination table, + # allowing field relaxation + job_config = bigquery.QueryJobConfig() + job_config.schema_update_options = [ + bigquery.SchemaUpdateOption.ALLOW_FIELD_RELAXATION, + ] + job_config.destination = table_ref + job_config.write_disposition = bigquery.WriteDisposition.WRITE_APPEND + + query_job = client.query( + # In this example, the existing table contains 'full_name' and 'age' as + # required columns, but the query results will omit the second column. + 'SELECT "Beyonce" as full_name;', + # Location must match that of the dataset(s) referenced in the query + # and of the destination table. + location='US', + job_config=job_config + ) # API request - starts the query + + query_job.result() # Waits for the query to finish + print("Query job {} complete.".format(query_job.job_id)) + + # Checks the updated number of required fields + table = client.get_table(table) + current_required_fields = sum( + field.mode == 'REQUIRED' for field in table.schema) + print("{} fields in the schema are now required.".format( + current_required_fields)) + # [END bigquery_relax_column_query_append] + assert original_required_fields - current_required_fields > 0 + assert len(table.schema) == 2 + assert table.schema[1].mode == 'NULLABLE' + assert table.num_rows > 0 + + +def test_client_query_add_column(client, to_delete): + dataset_id = 'query_add_column_{}'.format(_millis()) + dataset_ref = client.dataset(dataset_id) + dataset = bigquery.Dataset(dataset_ref) + dataset.location = 'US' + dataset = client.create_dataset(dataset) + to_delete.append(dataset) + + table_ref = dataset_ref.table('my_table') + schema = [ + bigquery.SchemaField('full_name', 'STRING', mode='REQUIRED'), + bigquery.SchemaField('age', 'INTEGER', mode='REQUIRED'), + ] + table = client.create_table(bigquery.Table(table_ref, schema=schema)) + + # [START bigquery_add_column_query_append] + # from google.cloud import bigquery + # client = bigquery.Client() + # dataset_ref = client.dataset('my_dataset') + + # Retrieves the destination table and checks the length of the schema + table_id = 'my_table' + table_ref = dataset_ref.table(table_id) + table = client.get_table(table_ref) + print("Table {} contains {} columns.".format(table_id, len(table.schema))) + + # Configures the query to append the results to a destination table, + # allowing field addition + job_config = bigquery.QueryJobConfig() + job_config.schema_update_options = [ + bigquery.SchemaUpdateOption.ALLOW_FIELD_ADDITION, + ] + job_config.destination = table_ref + job_config.write_disposition = bigquery.WriteDisposition.WRITE_APPEND + + query_job = client.query( + # In this example, the existing table contains only the 'full_name' and + # 'age' columns, while the results of this query will contain an + # additional 'favorite_color' column. + 'SELECT "Timmy" as full_name, 85 as age, "Blue" as favorite_color;', + # Location must match that of the dataset(s) referenced in the query + # and of the destination table. + location='US', + job_config=job_config + ) # API request - starts the query + + query_job.result() # Waits for the query to finish + print("Query job {} complete.".format(query_job.job_id)) + + # Checks the updated length of the schema + table = client.get_table(table) + print("Table {} now contains {} columns.".format( + table_id, len(table.schema))) + # [END bigquery_add_column_query_append] + assert len(table.schema) == 3 + assert table.num_rows > 0 + + +def test_client_query_w_named_params(client, capsys): + """Run a query using named query parameters""" + + # [START bigquery_query_params_named] + # from google.cloud import bigquery + # client = bigquery.Client() + + query = """ + SELECT word, word_count + FROM `bigquery-public-data.samples.shakespeare` + WHERE corpus = @corpus + AND word_count >= @min_word_count + ORDER BY word_count DESC; + """ + query_params = [ + bigquery.ScalarQueryParameter('corpus', 'STRING', 'romeoandjuliet'), + bigquery.ScalarQueryParameter('min_word_count', 'INT64', 250) + ] + job_config = bigquery.QueryJobConfig() + job_config.query_parameters = query_params + query_job = client.query( + query, + # Location must match that of the dataset(s) referenced in the query. + location='US', + job_config=job_config) # API request - starts the query + + # Print the results + for row in query_job: + print('{}: \t{}'.format(row.word, row.word_count)) + + assert query_job.state == 'DONE' + # [END bigquery_query_params_named] + + out, _ = capsys.readouterr() + assert 'the' in out + + +def test_client_query_w_positional_params(client, capsys): + """Run a query using query parameters""" + + # [START bigquery_query_params_positional] + # from google.cloud import bigquery + # client = bigquery.Client() + + query = """ + SELECT word, word_count + FROM `bigquery-public-data.samples.shakespeare` + WHERE corpus = ? + AND word_count >= ? + ORDER BY word_count DESC; + """ + # Set the name to None to use positional parameters. + # Note that you cannot mix named and positional parameters. + query_params = [ + bigquery.ScalarQueryParameter(None, 'STRING', 'romeoandjuliet'), + bigquery.ScalarQueryParameter(None, 'INT64', 250) + ] + job_config = bigquery.QueryJobConfig() + job_config.query_parameters = query_params + query_job = client.query( + query, + # Location must match that of the dataset(s) referenced in the query. + location='US', + job_config=job_config) # API request - starts the query + + # Print the results + for row in query_job: + print('{}: \t{}'.format(row.word, row.word_count)) + + assert query_job.state == 'DONE' + # [END bigquery_query_params_positional] + + out, _ = capsys.readouterr() + assert 'the' in out + + +def test_client_query_w_timestamp_params(client, capsys): + """Run a query using query parameters""" + + # [START bigquery_query_params_timestamps] + # from google.cloud import bigquery + # client = bigquery.Client() + + import datetime + import pytz + + query = 'SELECT TIMESTAMP_ADD(@ts_value, INTERVAL 1 HOUR);' + query_params = [ + bigquery.ScalarQueryParameter( + 'ts_value', + 'TIMESTAMP', + datetime.datetime(2016, 12, 7, 8, 0, tzinfo=pytz.UTC)) + ] + job_config = bigquery.QueryJobConfig() + job_config.query_parameters = query_params + query_job = client.query( + query, + # Location must match that of the dataset(s) referenced in the query. + location='US', + job_config=job_config) # API request - starts the query + + # Print the results + for row in query_job: + print(row) + + assert query_job.state == 'DONE' + # [END bigquery_query_params_timestamps] + + out, _ = capsys.readouterr() + assert '2016, 12, 7, 9, 0' in out + + +def test_client_query_w_array_params(client, capsys): + """Run a query using array query parameters""" + # [START bigquery_query_params_arrays] + # from google.cloud import bigquery + # client = bigquery.Client() + + query = """ + SELECT name, sum(number) as count + FROM `bigquery-public-data.usa_names.usa_1910_2013` + WHERE gender = @gender + AND state IN UNNEST(@states) + GROUP BY name + ORDER BY count DESC + LIMIT 10; + """ + query_params = [ + bigquery.ScalarQueryParameter('gender', 'STRING', 'M'), + bigquery.ArrayQueryParameter( + 'states', 'STRING', ['WA', 'WI', 'WV', 'WY']) + ] + job_config = bigquery.QueryJobConfig() + job_config.query_parameters = query_params + query_job = client.query( + query, + # Location must match that of the dataset(s) referenced in the query. + location='US', + job_config=job_config) # API request - starts the query + + # Print the results + for row in query_job: + print('{}: \t{}'.format(row.name, row.count)) + + assert query_job.state == 'DONE' + # [END bigquery_query_params_arrays] + + out, _ = capsys.readouterr() + assert 'James' in out + + +def test_client_query_w_struct_params(client, capsys): + """Run a query using struct query parameters""" + # [START bigquery_query_params_structs] + # from google.cloud import bigquery + # client = bigquery.Client() + + query = 'SELECT @struct_value AS s;' + query_params = [ + bigquery.StructQueryParameter( + 'struct_value', + bigquery.ScalarQueryParameter('x', 'INT64', 1), + bigquery.ScalarQueryParameter('y', 'STRING', 'foo') + ) + ] + job_config = bigquery.QueryJobConfig() + job_config.query_parameters = query_params + query_job = client.query( + query, + # Location must match that of the dataset(s) referenced in the query. + location='US', + job_config=job_config) # API request - starts the query + + # Print the results + for row in query_job: + print(row.s) + + assert query_job.state == 'DONE' + # [END bigquery_query_params_structs] + + out, _ = capsys.readouterr() + assert '1' in out + assert 'foo' in out + + +def test_client_query_dry_run(client): + """Run a dry run query""" + + # [START bigquery_query_dry_run] + # from google.cloud import bigquery + # client = bigquery.Client() + + job_config = bigquery.QueryJobConfig() + job_config.dry_run = True + job_config.use_query_cache = False + query_job = client.query( + ('SELECT name, COUNT(*) as name_count ' + 'FROM `bigquery-public-data.usa_names.usa_1910_2013` ' + "WHERE state = 'WA' " + 'GROUP BY name'), + # Location must match that of the dataset(s) referenced in the query. + location='US', + job_config=job_config) # API request + + # A dry run query completes immediately. + assert query_job.state == 'DONE' + assert query_job.dry_run + + print("This query will process {} bytes.".format( + query_job.total_bytes_processed)) + # [END bigquery_query_dry_run] + + assert query_job.total_bytes_processed > 0 + + +def test_query_no_cache(client): + # [START bigquery_query_no_cache] + # from google.cloud import bigquery + # client = bigquery.Client() + + job_config = bigquery.QueryJobConfig() + job_config.use_query_cache = False + sql = """ + SELECT corpus + FROM `bigquery-public-data.samples.shakespeare` + GROUP BY corpus; + """ + query_job = client.query( + sql, + # Location must match that of the dataset(s) referenced in the query. + location='US', + job_config=job_config) # API request + + # Print the results. + for row in query_job: # API request - fetches results + print(row) + # [END bigquery_query_no_cache] + + +def test_query_external_gcs_temporary_table(client): + # [START bigquery_query_external_gcs_temp] + # from google.cloud import bigquery + # client = bigquery.Client() + + # Configure the external data source and query job + external_config = bigquery.ExternalConfig('CSV') + external_config.source_uris = [ + 'gs://cloud-samples-data/bigquery/us-states/us-states.csv', + ] + external_config.schema = [ + bigquery.SchemaField('name', 'STRING'), + bigquery.SchemaField('post_abbr', 'STRING') + ] + external_config.options.skip_leading_rows = 1 # optionally skip header row + table_id = 'us_states' + job_config = bigquery.QueryJobConfig() + job_config.table_definitions = {table_id: external_config} + + # Example query to find states starting with 'W' + sql = 'SELECT * FROM `{}` WHERE name LIKE "W%"'.format(table_id) + + query_job = client.query(sql, job_config=job_config) # API request + + w_states = list(query_job) # Waits for query to finish + print('There are {} states with names starting with W.'.format( + len(w_states))) + # [END bigquery_query_external_gcs_temp] + assert len(w_states) == 4 + + +def test_query_external_gcs_permanent_table(client, to_delete): + dataset_id = 'query_external_gcs_{}'.format(_millis()) + dataset = bigquery.Dataset(client.dataset(dataset_id)) + client.create_dataset(dataset) + to_delete.append(dataset) + + # [START bigquery_query_external_gcs_perm] + # from google.cloud import bigquery + # client = bigquery.Client() + # dataset_id = 'my_dataset' + + # Configure the external data source + dataset_ref = client.dataset(dataset_id) + table_id = 'us_states' + schema = [ + bigquery.SchemaField('name', 'STRING'), + bigquery.SchemaField('post_abbr', 'STRING') + ] + table = bigquery.Table(dataset_ref.table(table_id), schema=schema) + external_config = bigquery.ExternalConfig('CSV') + external_config.source_uris = [ + 'gs://cloud-samples-data/bigquery/us-states/us-states.csv', + ] + external_config.options.skip_leading_rows = 1 # optionally skip header row + table.external_data_configuration = external_config + + # Create a permanent table linked to the GCS file + table = client.create_table(table) # API request + + # Example query to find states starting with 'W' + sql = 'SELECT * FROM `{}.{}` WHERE name LIKE "W%"'.format( + dataset_id, table_id) + + query_job = client.query(sql) # API request + + w_states = list(query_job) # Waits for query to finish + print('There are {} states with names starting with W.'.format( + len(w_states))) + # [END bigquery_query_external_gcs_perm] + assert len(w_states) == 4 + + +def test_query_external_sheets_temporary_table(client): + # [START bigquery_query_external_sheets_temp] + # [START bigquery_auth_drive_scope] + import google.auth + # from google.cloud import bigquery + + # Create credentials with Drive & BigQuery API scopes + # Both APIs must be enabled for your project before running this code + credentials, project = google.auth.default(scopes=[ + 'https://www.googleapis.com/auth/drive', + 'https://www.googleapis.com/auth/bigquery', + ]) + client = bigquery.Client(credentials=credentials, project=project) + # [END bigquery_auth_drive_scope] + + # Configure the external data source and query job + external_config = bigquery.ExternalConfig('GOOGLE_SHEETS') + # Use a shareable link or grant viewing access to the email address you + # used to authenticate with BigQuery (this example Sheet is public) + sheet_url = ( + 'https://docs.google.com/spreadsheets' + '/d/1i_QCL-7HcSyUZmIbP9E6lO_T5u3HnpLe7dnpHaijg_E/edit?usp=sharing') + external_config.source_uris = [sheet_url] + external_config.schema = [ + bigquery.SchemaField('name', 'STRING'), + bigquery.SchemaField('post_abbr', 'STRING') + ] + external_config.options.skip_leading_rows = 1 # optionally skip header row + table_id = 'us_states' + job_config = bigquery.QueryJobConfig() + job_config.table_definitions = {table_id: external_config} + + # Example query to find states starting with 'W' + sql = 'SELECT * FROM `{}` WHERE name LIKE "W%"'.format(table_id) + + query_job = client.query(sql, job_config=job_config) # API request + + w_states = list(query_job) # Waits for query to finish + print('There are {} states with names starting with W.'.format( + len(w_states))) + # [END bigquery_query_external_sheets_temp] + assert len(w_states) == 4 + + +def test_query_external_sheets_permanent_table(client, to_delete): + dataset_id = 'query_external_sheets_{}'.format(_millis()) + dataset = bigquery.Dataset(client.dataset(dataset_id)) + client.create_dataset(dataset) + to_delete.append(dataset) + + # [START bigquery_query_external_sheets_perm] + import google.auth + # from google.cloud import bigquery + # dataset_id = 'my_dataset' + + # Create credentials with Drive & BigQuery API scopes + # Both APIs must be enabled for your project before running this code + credentials, project = google.auth.default(scopes=[ + 'https://www.googleapis.com/auth/drive', + 'https://www.googleapis.com/auth/bigquery', + ]) + client = bigquery.Client(credentials=credentials, project=project) + + # Configure the external data source + dataset_ref = client.dataset(dataset_id) + table_id = 'us_states' + schema = [ + bigquery.SchemaField('name', 'STRING'), + bigquery.SchemaField('post_abbr', 'STRING') + ] + table = bigquery.Table(dataset_ref.table(table_id), schema=schema) + external_config = bigquery.ExternalConfig('GOOGLE_SHEETS') + # Use a shareable link or grant viewing access to the email address you + # used to authenticate with BigQuery (this example Sheet is public) + sheet_url = ( + 'https://docs.google.com/spreadsheets' + '/d/1i_QCL-7HcSyUZmIbP9E6lO_T5u3HnpLe7dnpHaijg_E/edit?usp=sharing') + external_config.source_uris = [sheet_url] + external_config.options.skip_leading_rows = 1 # optionally skip header row + table.external_data_configuration = external_config + + # Create a permanent table linked to the Sheets file + table = client.create_table(table) # API request + + # Example query to find states starting with 'W' + sql = 'SELECT * FROM `{}.{}` WHERE name LIKE "W%"'.format( + dataset_id, table_id) + + query_job = client.query(sql) # API request + + w_states = list(query_job) # Waits for query to finish + print('There are {} states with names starting with W.'.format( + len(w_states))) + # [END bigquery_query_external_sheets_perm] + assert len(w_states) == 4 + + +def test_ddl_create_view(client, to_delete, capsys): + """Create a view via a DDL query.""" + project = client.project + dataset_id = 'ddl_view_{}'.format(_millis()) + table_id = 'new_view' + dataset = bigquery.Dataset(client.dataset(dataset_id)) + client.create_dataset(dataset) + to_delete.append(dataset) + + # [START bigquery_ddl_create_view] + # from google.cloud import bigquery + # project = 'my-project' + # dataset_id = 'my_dataset' + # table_id = 'new_view' + # client = bigquery.Client(project=project) + + sql = """ + CREATE VIEW `{}.{}.{}` + OPTIONS( + expiration_timestamp=TIMESTAMP_ADD( + CURRENT_TIMESTAMP(), INTERVAL 48 HOUR), + friendly_name="new_view", + description="a view that expires in 2 days", + labels=[("org_unit", "development")] + ) + AS SELECT name, state, year, number + FROM `bigquery-public-data.usa_names.usa_1910_current` + WHERE state LIKE 'W%' + """.format(project, dataset_id, table_id) + + job = client.query(sql) # API request. + job.result() # Waits for the query to finish. + + print('Created new view "{}.{}.{}".'.format( + job.destination.project, + job.destination.dataset_id, + job.destination.table_id)) + # [END bigquery_ddl_create_view] + + out, _ = capsys.readouterr() + assert 'Created new view "{}.{}.{}".'.format( + project, dataset_id, table_id) in out + + # Test that listing query result rows succeeds so that generic query + # processing tools work with DDL statements. + rows = list(job) + assert len(rows) == 0 + + if pandas is not None: + df = job.to_dataframe() + assert len(df) == 0 + + +def test_client_list_jobs(client): + """List jobs for a project.""" + + # [START bigquery_list_jobs] + # TODO(developer): Uncomment the lines below and replace with your values. + # from google.cloud import bigquery + # project = 'my_project' # replace with your project ID + # client = bigquery.Client(project=project) + import datetime + + # List the 10 most recent jobs in reverse chronological order. + # Omit the max_results parameter to list jobs from the past 6 months. + print("Last 10 jobs:") + for job in client.list_jobs(max_results=10): # API request(s) + print(job.job_id) + + # The following are examples of additional optional parameters: + + # Use min_creation_time and/or max_creation_time to specify a time window. + print("Jobs from the last ten minutes:") + ten_mins_ago = datetime.datetime.utcnow() - datetime.timedelta(minutes=10) + for job in client.list_jobs(min_creation_time=ten_mins_ago): + print(job.job_id) + + # Use all_users to include jobs run by all users in the project. + print("Last 10 jobs run by all users:") + for job in client.list_jobs(max_results=10, all_users=True): + print("{} run by user: {}".format(job.job_id, job.user_email)) + + # Use state_filter to filter by job state. + print("Jobs currently running:") + for job in client.list_jobs(state_filter='RUNNING'): + print(job.job_id) + # [END bigquery_list_jobs] + + +@pytest.mark.skipif(pandas is None, reason='Requires `pandas`') +def test_query_results_as_dataframe(client): + # [START bigquery_query_results_dataframe] + # from google.cloud import bigquery + # client = bigquery.Client() + + sql = """ + SELECT name, SUM(number) as count + FROM `bigquery-public-data.usa_names.usa_1910_current` + GROUP BY name + ORDER BY count DESC + LIMIT 10 + """ + + df = client.query(sql).to_dataframe() + # [END bigquery_query_results_dataframe] + assert isinstance(df, pandas.DataFrame) + assert len(list(df)) == 2 # verify the number of columns + assert len(df) == 10 # verify the number of rows + + +@pytest.mark.skipif(pandas is None, reason='Requires `pandas`') +def test_list_rows_as_dataframe(client): + # [START bigquery_list_rows_dataframe] + # from google.cloud import bigquery + # client = bigquery.Client() + + dataset_ref = client.dataset('samples', project='bigquery-public-data') + table_ref = dataset_ref.table('shakespeare') + table = client.get_table(table_ref) + + df = client.list_rows(table).to_dataframe() + # [END bigquery_list_rows_dataframe] + assert isinstance(df, pandas.DataFrame) + assert len(list(df)) == len(table.schema) # verify the number of columns + assert len(df) == table.num_rows # verify the number of rows + + +@pytest.mark.skipif(pandas is None, reason='Requires `pandas`') +@pytest.mark.skipif(pyarrow is None, reason='Requires `pyarrow`') +def test_load_table_from_dataframe(client, to_delete): + dataset_id = 'load_table_dataframe_dataset_{}'.format(_millis()) + dataset = bigquery.Dataset(client.dataset(dataset_id)) + client.create_dataset(dataset) + to_delete.append(dataset) + + # [START bigquery_load_table_dataframe] + # from google.cloud import bigquery + # import pandas + # client = bigquery.Client() + # dataset_id = 'my_dataset' + + dataset_ref = client.dataset(dataset_id) + table_ref = dataset_ref.table('monty_python') + records = [ + {'title': 'The Meaning of Life', 'release_year': 1983}, + {'title': 'Monty Python and the Holy Grail', 'release_year': 1975}, + {'title': 'Life of Brian', 'release_year': 1979}, + { + 'title': 'And Now for Something Completely Different', + 'release_year': 1971 + }, + ] + # Optionally set explicit indices. + # If indices are not specified, a column will be created for the default + # indices created by pandas. + index = ['Q24980', 'Q25043', 'Q24953', 'Q16403'] + dataframe = pandas.DataFrame( + records, index=pandas.Index(index, name='wikidata_id')) + + job = client.load_table_from_dataframe(dataframe, table_ref, location='US') + + job.result() # Waits for table load to complete. + + assert job.state == 'DONE' + table = client.get_table(table_ref) + assert table.num_rows == 4 + # [END bigquery_load_table_dataframe] + column_names = [field.name for field in table.schema] + assert sorted(column_names) == ['release_year', 'title', 'wikidata_id'] + + +if __name__ == '__main__': + pytest.main() diff --git a/packages/google-cloud-bigquery/docs/usage.html b/packages/google-cloud-bigquery/docs/usage.html new file mode 100644 index 000000000000..9b81d6976cda --- /dev/null +++ b/packages/google-cloud-bigquery/docs/usage.html @@ -0,0 +1,8 @@ + + + + + + diff --git a/packages/google-cloud-bigquery/nox.py b/packages/google-cloud-bigquery/nox.py index a9d9242c55ac..902b6e663633 100644 --- a/packages/google-cloud-bigquery/nox.py +++ b/packages/google-cloud-bigquery/nox.py @@ -147,10 +147,7 @@ def snippets(session, py): # Run py.test against the system tests. session.run( - 'py.test', - os.path.join(os.pardir, 'docs', 'bigquery', 'snippets.py'), - *session.posargs - ) + 'py.test', os.path.join('docs', 'snippets.py'), *session.posargs) @nox.session @@ -167,7 +164,7 @@ def lint(session): session.run('flake8', os.path.join('google', 'cloud', 'bigquery')) session.run('flake8', 'tests') session.run( - 'flake8', os.path.join(os.pardir, 'docs', 'bigquery', 'snippets.py')) + 'flake8', os.path.join('docs', 'snippets.py')) @nox.session From cf7eb659c8b89629b1696e08118f3dc71d41df40 Mon Sep 17 00:00:00 2001 From: Tres Seaver Date: Fri, 14 Sep 2018 15:13:56 -0400 Subject: [PATCH 0491/2016] Docs: fix leakage of bigquery/spanner sections into sidebar menu. (#5986) --- packages/google-cloud-bigquery/docs/index.rst | 42 +++++++++---------- 1 file changed, 21 insertions(+), 21 deletions(-) diff --git a/packages/google-cloud-bigquery/docs/index.rst b/packages/google-cloud-bigquery/docs/index.rst index 94580556c862..d24621c246f7 100644 --- a/packages/google-cloud-bigquery/docs/index.rst +++ b/packages/google-cloud-bigquery/docs/index.rst @@ -1,10 +1,10 @@ .. include:: /../bigquery/README.rst Using the Library -================= +----------------- Projects --------- +~~~~~~~~ A project is the top-level container in the ``BigQuery`` API: it is tied closely to billing, and can provide default access control across all its @@ -23,7 +23,7 @@ To override the project inferred from the environment, pass an explicit Project ACLs -~~~~~~~~~~~~ +^^^^^^^^^^^^ Each project has an access control list granting reader / writer / owner permission to one or more entities. This list cannot be queried or set @@ -31,7 +31,7 @@ via the API; it must be managed using the Google Developer Console. Datasets --------- +~~~~~~~~ A dataset represents a collection of tables, and applies several default policies to tables as they are created: @@ -47,7 +47,7 @@ See BigQuery documentation for more information on Dataset operations -~~~~~~~~~~~~~~~~~~ +^^^^^^^^^^^^^^^^^^ List datasets for the client's project: @@ -99,13 +99,13 @@ Delete a dataset: Tables ------- +~~~~~~ Tables exist within datasets. See BigQuery documentation for more information on `Tables `_. Table operations -~~~~~~~~~~~~~~~~ +^^^^^^^^^^^^^^^^ List tables for the dataset: .. literalinclude:: snippets.py @@ -187,7 +187,7 @@ Upload table data from a file: :end-before: [END bigquery_load_from_file] Load table data from Google Cloud Storage -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ See also: `Loading JSON data from Cloud Storage `_. @@ -217,7 +217,7 @@ Load a Parquet file from Cloud Storage: :end-before: [END bigquery_load_table_gcs_parquet] Customer Managed Encryption Keys -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ Table data is always encrypted at rest, but BigQuery also provides a way for you to control what keys it uses to encrypt they data. See `Protecting data @@ -270,11 +270,11 @@ Cloud KMS for the destination table. :end-before: [END bigquery_query_destination_table_cmek] Queries -------- +~~~~~~~ Querying data -~~~~~~~~~~~~~ +^^^^^^^^^^^^^ Run a query and wait for it to finish: @@ -286,7 +286,7 @@ Run a query and wait for it to finish: Run a dry run query -~~~~~~~~~~~~~~~~~~~ +^^^^^^^^^^^^^^^^^^^ .. literalinclude:: snippets.py :language: python @@ -296,7 +296,7 @@ Run a dry run query Writing query results to a destination table -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ See BigQuery documentation for more information on `writing query results `_. @@ -309,7 +309,7 @@ See BigQuery documentation for more information on Run a query using a named query parameter -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ See BigQuery documentation for more information on `parameterized queries `_. @@ -322,10 +322,10 @@ See BigQuery documentation for more information on Jobs ----- +~~~~ List jobs for a project -~~~~~~~~~~~~~~~~~~~~~~~ +^^^^^^^^^^^^^^^^^^^^^^^ Jobs describe actions performed on data in BigQuery tables: @@ -342,10 +342,10 @@ Jobs describe actions performed on data in BigQuery tables: Using BigQuery with Pandas --------------------------- +~~~~~~~~~~~~~~~~~~~~~~~~~~ Retrieve BigQuery data as a Pandas DataFrame -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ As of version 0.29.0, you can use the :func:`~google.cloud.bigquery.table.RowIterator.to_dataframe` function to @@ -381,7 +381,7 @@ To retrieve table rows as a :class:`pandas.DataFrame`: :end-before: [END bigquery_list_rows_dataframe] Load a Pandas DataFrame to a BigQuery Table -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ As of version 1.3.0, you can use the :func:`~google.cloud.bigquery.client.Client.load_table_from_dataframe` function @@ -405,7 +405,7 @@ and load it into a new table: :end-before: [END bigquery_load_table_dataframe] API Reference -============= +------------- .. toctree:: :maxdepth: 2 @@ -414,7 +414,7 @@ API Reference dbapi Changelog -========= +--------- For a list of all ``google-cloud-bigquery`` releases: From 980e19554022475e739bf9bb605cf1044b27d529 Mon Sep 17 00:00:00 2001 From: Tres Seaver Date: Tue, 18 Sep 2018 18:20:57 -0400 Subject: [PATCH 0492/2016] Add support for retrying unstructured 429 / 500 / 502 responses. (#6011) Closes #5918. --- .../google/cloud/bigquery/retry.py | 14 ++++++++++++-- .../tests/unit/test_retry.py | 18 ++++++++++++++++++ 2 files changed, 30 insertions(+), 2 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/retry.py b/packages/google-cloud-bigquery/google/cloud/bigquery/retry.py index be279171a479..40e54113d309 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/retry.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/retry.py @@ -13,16 +13,23 @@ # See the License for the specific language governing permissions and # limitations under the License. +from google.api_core import exceptions from google.api_core import retry _RETRYABLE_REASONS = frozenset([ - 'backendError', 'rateLimitExceeded', + 'backendError', 'internalError', 'badGateway', ]) +_UNSTRUCTURED_RETRYABLE_TYPES = ( + exceptions.TooManyRequests, + exceptions.InternalServerError, + exceptions.BadGateway, +) + def _should_retry(exc): """Predicate for determining when to retry. @@ -32,8 +39,11 @@ def _should_retry(exc): """ if not hasattr(exc, 'errors'): return False + if len(exc.errors) == 0: - return False + # Check for unstructured error returns, e.g. from GFE + return isinstance(exc, _UNSTRUCTURED_RETRYABLE_TYPES) + reason = exc.errors[0]['reason'] return reason in _RETRYABLE_REASONS diff --git a/packages/google-cloud-bigquery/tests/unit/test_retry.py b/packages/google-cloud-bigquery/tests/unit/test_retry.py index 9f4987fe5a54..2b9f77cb4162 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_retry.py +++ b/packages/google-cloud-bigquery/tests/unit/test_retry.py @@ -47,12 +47,30 @@ def test_w_rateLimitExceeded(self): errors=[{'reason': 'rateLimitExceeded'}], spec=['errors']) self.assertTrue(self._call_fut(exc)) + def test_w_unstructured_too_many_requests(self): + from google.api_core.exceptions import TooManyRequests + + exc = TooManyRequests('testing') + self.assertTrue(self._call_fut(exc)) + def test_w_internalError(self): exc = mock.Mock( errors=[{'reason': 'internalError'}], spec=['errors']) self.assertTrue(self._call_fut(exc)) + def test_w_unstructured_internal_server_error(self): + from google.api_core.exceptions import InternalServerError + + exc = InternalServerError('testing') + self.assertTrue(self._call_fut(exc)) + def test_w_badGateway(self): exc = mock.Mock( errors=[{'reason': 'badGateway'}], spec=['errors']) self.assertTrue(self._call_fut(exc)) + + def test_w_unstructured_bad_gateway(self): + from google.api_core.exceptions import BadGateway + + exc = BadGateway('testing') + self.assertTrue(self._call_fut(exc)) From e6edfd2b63091c9f0baf0f9c51eb9ff96f78bd31 Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Fri, 21 Sep 2018 09:37:24 -0700 Subject: [PATCH 0493/2016] Populate the jobReference from the API response. (#6044) * Populate the jobReference from the API response. Before this change, the location field was not populated after inititial construction. This is a problem when no location is provided but it has been auto-detected based on the resources referenced in the job (such as via the tables referenced in a query). * Remove reference to _job_ref in system tests. * Fix lint error --- .../google/cloud/bigquery/job.py | 37 +++++++++++++------ .../google-cloud-bigquery/tests/system.py | 2 +- .../tests/unit/test_client.py | 33 +++++++++++++++++ .../tests/unit/test_job.py | 28 +++++++++++--- 4 files changed, 82 insertions(+), 18 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/job.py b/packages/google-cloud-bigquery/google/cloud/bigquery/job.py index a4e57ef3e884..7a4478551336 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/job.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/job.py @@ -284,19 +284,26 @@ class _AsyncJob(google.api_core.future.polling.PollingFuture): """ def __init__(self, job_id, client): super(_AsyncJob, self).__init__() + + # The job reference can be either a plain job ID or the full resource. + # Populate the properties dictionary consistently depending on what has + # been passed in. job_ref = job_id if not isinstance(job_id, _JobReference): job_ref = _JobReference(job_id, client.project, None) - self._job_ref = job_ref + self._properties = { + 'jobReference': job_ref._to_api_repr(), + } + self._client = client - self._properties = {} self._result_set = False self._completion_lock = threading.Lock() @property def job_id(self): """str: ID of the job.""" - return self._job_ref.job_id + return _helpers._get_sub_prop( + self._properties, ['jobReference', 'jobId']) @property def project(self): @@ -305,12 +312,14 @@ def project(self): :rtype: str :returns: the project (derived from the client). """ - return self._job_ref.project + return _helpers._get_sub_prop( + self._properties, ['jobReference', 'projectId']) @property def location(self): """str: Location where the job runs.""" - return self._job_ref.location + return _helpers._get_sub_prop( + self._properties, ['jobReference', 'location']) def _require_client(self, client): """Check client or verify over-ride. @@ -481,7 +490,7 @@ def _set_properties(self, api_response): self._properties.clear() self._properties.update(cleaned) - self._copy_configuration_properties(cleaned['configuration']) + self._copy_configuration_properties(cleaned.get('configuration', {})) # For Future interface self._set_future_result() @@ -1337,7 +1346,7 @@ def _build_resource(self): self.destination.to_api_repr()) return { - 'jobReference': self._job_ref._to_api_repr(), + 'jobReference': self._properties['jobReference'], 'configuration': configuration, } @@ -1523,7 +1532,7 @@ def _build_resource(self): }) return { - 'jobReference': self._job_ref._to_api_repr(), + 'jobReference': self._properties['jobReference'], 'configuration': configuration, } @@ -1737,7 +1746,7 @@ def _build_resource(self): self.destination_uris) return { - 'jobReference': self._job_ref._to_api_repr(), + 'jobReference': self._properties['jobReference'], 'configuration': configuration, } @@ -2330,7 +2339,7 @@ def _build_resource(self): configuration = self._configuration.to_api_repr() resource = { - 'jobReference': self._job_ref._to_api_repr(), + 'jobReference': self._properties['jobReference'], 'configuration': configuration, } configuration['query']['query'] = self.query @@ -3020,8 +3029,12 @@ def from_api_repr(cls, resource, client): Returns: UnknownJob: Job corresponding to the resource. """ - job_ref = _JobReference._from_api_repr( - resource.get('jobReference', {'projectId': client.project})) + job_ref_properties = resource.get( + 'jobReference', {'projectId': client.project}) + job_ref = _JobReference._from_api_repr(job_ref_properties) job = cls(job_ref, client) + # Populate the job reference with the project, even if it has been + # redacted, because we know it should equal that of the request. + resource['jobReference'] = job_ref_properties job._properties = resource return job diff --git a/packages/google-cloud-bigquery/tests/system.py b/packages/google-cloud-bigquery/tests/system.py index 6662f584cdb9..a94a672198b4 100644 --- a/packages/google-cloud-bigquery/tests/system.py +++ b/packages/google-cloud-bigquery/tests/system.py @@ -666,7 +666,7 @@ def test_load_table_from_file_w_explicit_location(self): client.get_job(job_id, location='US') load_job_us = client.get_job(job_id) - load_job_us._job_ref._properties['location'] = 'US' + load_job_us._properties['jobReference']['location'] = 'US' self.assertFalse(load_job_us.exists()) with self.assertRaises(NotFound): load_job_us.reload() diff --git a/packages/google-cloud-bigquery/tests/unit/test_client.py b/packages/google-cloud-bigquery/tests/unit/test_client.py index 85f882eee8da..ee7bb7ca8632 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_client.py +++ b/packages/google-cloud-bigquery/tests/unit/test_client.py @@ -2740,6 +2740,39 @@ def test_query_w_client_location(self): data=resource, ) + def test_query_detect_location(self): + query = 'select count(*) from persons' + resource_location = 'EU' + resource = { + 'jobReference': { + 'projectId': self.PROJECT, + # Location not set in request, but present in the response. + 'location': resource_location, + 'jobId': 'some-random-id', + }, + 'configuration': { + 'query': { + 'query': query, + 'useLegacySql': False, + }, + }, + } + creds = _make_credentials() + http = object() + client = self._make_one(project=self.PROJECT, credentials=creds, + _http=http) + conn = client._connection = _make_connection(resource) + + job = client.query(query) + + self.assertEqual(job.location, resource_location) + + # Check that request did not contain a location. + conn.api_request.assert_called_once() + _, req = conn.api_request.call_args + sent = req['data'] + self.assertIsNone(sent['jobReference'].get('location')) + def test_query_w_udf_resources(self): from google.cloud.bigquery.job import QueryJob from google.cloud.bigquery.job import QueryJobConfig diff --git a/packages/google-cloud-bigquery/tests/unit/test_job.py b/packages/google-cloud-bigquery/tests/unit/test_job.py index 0df830a2d9e7..f3a57439c508 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_job.py +++ b/packages/google-cloud-bigquery/tests/unit/test_job.py @@ -156,7 +156,15 @@ def test_ctor_w_bare_job_id(self): self.assertEqual(job.project, self.PROJECT) self.assertIsNone(job.location) self.assertIs(job._client, client) - self.assertEqual(job._properties, {}) + self.assertEqual( + job._properties, + { + 'jobReference': { + 'projectId': self.PROJECT, + 'jobId': self.JOB_ID, + }, + } + ) self.assertIsInstance(job._completion_lock, type(threading.Lock())) self.assertEqual( job.path, @@ -174,7 +182,16 @@ def test_ctor_w_job_ref(self): self.assertEqual(job.project, self.PROJECT) self.assertEqual(job.location, self.LOCATION) self.assertIs(job._client, client) - self.assertEqual(job._properties, {}) + self.assertEqual( + job._properties, + { + 'jobReference': { + 'projectId': self.PROJECT, + 'location': self.LOCATION, + 'jobId': self.JOB_ID, + }, + } + ) self.assertFalse(job._result_set) self.assertIsInstance(job._completion_lock, type(threading.Lock())) self.assertEqual( @@ -359,6 +376,7 @@ def _set_properties_job(self): job._copy_configuration_properties = mock.Mock() job._set_future_result = mock.Mock() job._properties = { + 'jobReference': job._properties['jobReference'], 'foo': 'bar', } return job @@ -577,7 +595,7 @@ def test_exists_defaults_miss(self): from google.cloud.bigquery.retry import DEFAULT_RETRY job = self._set_properties_job() - job._job_ref._properties['location'] = self.LOCATION + job._properties['jobReference']['location'] = self.LOCATION call_api = job._client._call_api = mock.Mock() call_api.side_effect = NotFound('testing') @@ -636,7 +654,7 @@ def test_reload_defaults(self): } } job = self._set_properties_job() - job._job_ref._properties['location'] = self.LOCATION + job._properties['jobReference']['location'] = self.LOCATION call_api = job._client._call_api = mock.Mock() call_api.return_value = resource @@ -693,7 +711,7 @@ def test_cancel_defaults(self): } response = {'job': resource} job = self._set_properties_job() - job._job_ref._properties['location'] = self.LOCATION + job._properties['jobReference']['location'] = self.LOCATION connection = job._client._connection = _make_connection(response) self.assertTrue(job.cancel()) From a7814291caa1c6c8ccc65a3f932016e832db6dc5 Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Fri, 21 Sep 2018 12:33:15 -0700 Subject: [PATCH 0494/2016] Release 1.5.1 (#6054) --- packages/google-cloud-bigquery/CHANGELOG.md | 20 ++++++++++++++++++++ packages/google-cloud-bigquery/setup.py | 2 +- 2 files changed, 21 insertions(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/CHANGELOG.md b/packages/google-cloud-bigquery/CHANGELOG.md index a8daa2bacd04..1b25a9100740 100644 --- a/packages/google-cloud-bigquery/CHANGELOG.md +++ b/packages/google-cloud-bigquery/CHANGELOG.md @@ -4,6 +4,26 @@ [1]: https://pypi.org/project/google-cloud-bigquery/#history +## 1.5.1 + +### Implementation Changes + +- Retry '502 Bad Gateway' errors by default. (#5930) +- Avoid pulling entire result set into memory when constructing dataframe. (#5870) +- Add support for retrying unstructured 429 / 500 / 502 responses. (#6011) +- Populate the jobReference from the API response. (#6044) + +### Documentation + +- Prepare documentation for repo split (#5955) +- Fix leakage of bigquery/spanner sections into sidebar menu. (#5986) + +### Internal / Testing Changes + +- Test pandas support under Python 3.7. (#5857) +- Nox: use inplace installs (#5865) +- Update system test to use test data in bigquery-public-data. (#5965) + ## 1.5.0 ### Implementation Changes diff --git a/packages/google-cloud-bigquery/setup.py b/packages/google-cloud-bigquery/setup.py index 16f018dd1172..9eaf1f741619 100644 --- a/packages/google-cloud-bigquery/setup.py +++ b/packages/google-cloud-bigquery/setup.py @@ -22,7 +22,7 @@ name = 'google-cloud-bigquery' description = 'Google BigQuery API client library' -version = '1.5.0' +version = '1.5.1' # Should be one of: # 'Development Status :: 3 - Alpha' # 'Development Status :: 4 - Beta' From 08436cee5b7589873e762d23be390adf14bc2995 Mon Sep 17 00:00:00 2001 From: Alix Hamilton Date: Tue, 25 Sep 2018 15:10:20 -0700 Subject: [PATCH 0495/2016] Remove unused BigQuery append samples (#6100) --- .../google-cloud-bigquery/docs/snippets.py | 79 ------------------- 1 file changed, 79 deletions(-) diff --git a/packages/google-cloud-bigquery/docs/snippets.py b/packages/google-cloud-bigquery/docs/snippets.py index 6093abe95c1c..594f7391c918 100644 --- a/packages/google-cloud-bigquery/docs/snippets.py +++ b/packages/google-cloud-bigquery/docs/snippets.py @@ -1512,85 +1512,6 @@ def test_load_table_from_uri_autodetect(client, to_delete, capsys): assert 'Loaded 50 rows.' in out -def test_load_table_from_uri_append(client, to_delete, capsys): - """Appends data to a table from a GCS URI using various formats - - Each file format has its own tested load from URI sample. Because most of - the code is common for autodetect, append, and truncate, this sample - includes snippets for all supported formats but only calls a single load - job. - - This code snippet is made up of shared code, then format-specific code, - followed by more shared code. Note that only the last format in the - format-specific code section will be tested in this test. - """ - dataset_id = 'load_table_dataset_{}'.format(_millis()) - dataset = bigquery.Dataset(client.dataset(dataset_id)) - client.create_dataset(dataset) - to_delete.append(dataset) - - job_config = bigquery.LoadJobConfig() - job_config.schema = [ - bigquery.SchemaField('name', 'STRING'), - bigquery.SchemaField('post_abbr', 'STRING') - ] - table_ref = dataset.table('us_states') - body = six.BytesIO(b'Washington,WA') - client.load_table_from_file( - body, table_ref, job_config=job_config).result() - - # SHared code - # [START bigquery_load_table_gcs_csv_append] - # [START bigquery_load_table_gcs_json_append] - # from google.cloud import bigquery - # client = bigquery.Client() - # table_ref = client.dataset('my_dataset').table('existing_table') - - previous_rows = client.get_table(table_ref).num_rows - assert previous_rows > 0 - - job_config = bigquery.LoadJobConfig() - job_config.write_disposition = bigquery.WriteDisposition.WRITE_APPEND - # [END bigquery_load_table_gcs_csv_append] - # [END bigquery_load_table_gcs_json_append] - - # Format-specific code - # [START bigquery_load_table_gcs_csv_append] - job_config.skip_leading_rows = 1 - # The source format defaults to CSV, so the line below is optional. - job_config.source_format = bigquery.SourceFormat.CSV - uri = 'gs://cloud-samples-data/bigquery/us-states/us-states.csv' - # [END bigquery_load_table_gcs_csv_append] - # unset csv-specific attribute - del job_config._properties['load']['skipLeadingRows'] - - # [START bigquery_load_table_gcs_json_append] - job_config.source_format = bigquery.SourceFormat.NEWLINE_DELIMITED_JSON - uri = 'gs://cloud-samples-data/bigquery/us-states/us-states.json' - # [END bigquery_load_table_gcs_json_append] - - # Shared code - # [START bigquery_load_table_gcs_csv_append] - # [START bigquery_load_table_gcs_json_append] - load_job = client.load_table_from_uri( - uri, - table_ref, - job_config=job_config) # API request - print('Starting job {}'.format(load_job.job_id)) - - load_job.result() # Waits for table load to complete. - print('Job finished.') - - destination_table = client.get_table(table_ref) - print('Loaded {} rows.'.format(destination_table.num_rows - previous_rows)) - # [END bigquery_load_table_gcs_csv_append] - # [END bigquery_load_table_gcs_json_append] - - out, _ = capsys.readouterr() - assert previous_rows == 1 - assert 'Loaded 50 rows.' in out - - def test_load_table_from_uri_truncate(client, to_delete, capsys): """Replaces table data with data from a GCS URI using various formats From 5aa57d5aa7f3be02a85830272fefc00fdab261eb Mon Sep 17 00:00:00 2001 From: Tres Seaver Date: Tue, 25 Sep 2018 18:22:19 -0400 Subject: [PATCH 0496/2016] Harden bucket teardown against '429 Too Many Requests'. (#6101) Closes #6096. --- packages/google-cloud-bigquery/tests/system.py | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/packages/google-cloud-bigquery/tests/system.py b/packages/google-cloud-bigquery/tests/system.py index a94a672198b4..bb5ebf88ceab 100644 --- a/packages/google-cloud-bigquery/tests/system.py +++ b/packages/google-cloud-bigquery/tests/system.py @@ -40,12 +40,17 @@ IPython = None from google.api_core.exceptions import PreconditionFailed +from google.api_core.exceptions import BadRequest +from google.api_core.exceptions import Conflict +from google.api_core.exceptions import Forbidden +from google.api_core.exceptions import NotFound +from google.api_core.exceptions import TooManyRequests from google.cloud import bigquery -from google.cloud.bigquery.dataset import Dataset, DatasetReference +from google.cloud.bigquery.dataset import Dataset +from google.cloud.bigquery.dataset import DatasetReference from google.cloud.bigquery.table import Table from google.cloud._helpers import UTC from google.cloud.bigquery import dbapi -from google.cloud.exceptions import BadRequest, Forbidden, NotFound from google.cloud import storage from test_utils.retry import RetryErrors @@ -135,19 +140,16 @@ def setUp(self): self.to_delete = [] def tearDown(self): - from google.cloud.storage import Bucket - from google.cloud.exceptions import BadRequest - from google.cloud.exceptions import Conflict def _still_in_use(bad_request): return any(error['reason'] == 'resourceInUse' for error in bad_request._errors) retry_in_use = RetryErrors(BadRequest, error_predicate=_still_in_use) - retry_409 = RetryErrors(Conflict) + retry_409_429 = RetryErrors((Conflict, TooManyRequests)) for doomed in self.to_delete: - if isinstance(doomed, Bucket): - retry_409(doomed.delete)(force=True) + if isinstance(doomed, storage.Bucket): + retry_409_429(doomed.delete)(force=True) elif isinstance(doomed, (Dataset, bigquery.DatasetReference)): retry_in_use(Config.CLIENT.delete_dataset)( doomed, delete_contents=True) From 87beba9d30e2c1d9867e2bb4c15d28bd43d723a0 Mon Sep 17 00:00:00 2001 From: Tres Seaver Date: Tue, 25 Sep 2018 18:44:38 -0400 Subject: [PATCH 0497/2016] BigQuery: address dataset leaks, conflicts in systests (#6099) Closes #6098. Closes #6097. --- .../google-cloud-bigquery/docs/snippets.py | 18 ++++++------- .../tests/scrub_datasets.py | 26 +++++++++++++++++++ .../google-cloud-bigquery/tests/system.py | 9 +++---- 3 files changed, 38 insertions(+), 15 deletions(-) create mode 100644 packages/google-cloud-bigquery/tests/scrub_datasets.py diff --git a/packages/google-cloud-bigquery/docs/snippets.py b/packages/google-cloud-bigquery/docs/snippets.py index 594f7391c918..5acb339d4571 100644 --- a/packages/google-cloud-bigquery/docs/snippets.py +++ b/packages/google-cloud-bigquery/docs/snippets.py @@ -1212,8 +1212,8 @@ def test_table_insert_rows(client, to_delete): def test_load_table_from_file(client, to_delete): """Upload table data from a CSV file.""" - dataset_id = 'table_upload_from_file_dataset_{}'.format(_millis()) - table_id = 'table_upload_from_file_table_{}'.format(_millis()) + dataset_id = 'load_table_from_file_dataset_{}'.format(_millis()) + table_id = 'load_table_from_file_table_{}'.format(_millis()) dataset = bigquery.Dataset(client.dataset(dataset_id)) dataset.location = 'US' client.create_dataset(dataset) @@ -1261,7 +1261,7 @@ def test_load_table_from_file(client, to_delete): def test_load_table_from_uri_csv(client, to_delete, capsys): - dataset_id = 'load_table_dataset_{}'.format(_millis()) + dataset_id = 'load_table_from_uri_csv_{}'.format(_millis()) dataset = bigquery.Dataset(client.dataset(dataset_id)) client.create_dataset(dataset) to_delete.append(dataset) @@ -1300,7 +1300,7 @@ def test_load_table_from_uri_csv(client, to_delete, capsys): def test_load_table_from_uri_json(client, to_delete, capsys): - dataset_id = 'load_table_dataset_{}'.format(_millis()) + dataset_id = 'load_table_from_uri_json_{}'.format(_millis()) dataset = bigquery.Dataset(client.dataset(dataset_id)) dataset.location = 'US' client.create_dataset(dataset) @@ -1381,7 +1381,7 @@ def test_load_table_from_uri_cmek(client, to_delete): def test_load_table_from_uri_parquet(client, to_delete, capsys): - dataset_id = 'load_table_dataset_{}'.format(_millis()) + dataset_id = 'load_table_from_uri_parquet_{}'.format(_millis()) dataset = bigquery.Dataset(client.dataset(dataset_id)) client.create_dataset(dataset) to_delete.append(dataset) @@ -1414,7 +1414,7 @@ def test_load_table_from_uri_parquet(client, to_delete, capsys): def test_load_table_from_uri_orc(client, to_delete, capsys): - dataset_id = 'load_table_dataset_{}'.format(_millis()) + dataset_id = 'load_table_from_uri_orc_{}'.format(_millis()) dataset = bigquery.Dataset(client.dataset(dataset_id)) client.create_dataset(dataset) to_delete.append(dataset) @@ -1458,7 +1458,7 @@ def test_load_table_from_uri_autodetect(client, to_delete, capsys): followed by more shared code. Note that only the last format in the format-specific code section will be tested in this test. """ - dataset_id = 'load_table_dataset_{}'.format(_millis()) + dataset_id = 'load_table_from_uri_auto_{}'.format(_millis()) dataset = bigquery.Dataset(client.dataset(dataset_id)) client.create_dataset(dataset) to_delete.append(dataset) @@ -1524,7 +1524,7 @@ def test_load_table_from_uri_truncate(client, to_delete, capsys): followed by more shared code. Note that only the last format in the format-specific code section will be tested in this test. """ - dataset_id = 'load_table_dataset_{}'.format(_millis()) + dataset_id = 'load_table_from_uri_trunc_{}'.format(_millis()) dataset = bigquery.Dataset(client.dataset(dataset_id)) client.create_dataset(dataset) to_delete.append(dataset) @@ -2972,7 +2972,7 @@ def test_list_rows_as_dataframe(client): @pytest.mark.skipif(pandas is None, reason='Requires `pandas`') @pytest.mark.skipif(pyarrow is None, reason='Requires `pyarrow`') def test_load_table_from_dataframe(client, to_delete): - dataset_id = 'load_table_dataframe_dataset_{}'.format(_millis()) + dataset_id = 'load_table_from_dataframe_{}'.format(_millis()) dataset = bigquery.Dataset(client.dataset(dataset_id)) client.create_dataset(dataset) to_delete.append(dataset) diff --git a/packages/google-cloud-bigquery/tests/scrub_datasets.py b/packages/google-cloud-bigquery/tests/scrub_datasets.py new file mode 100644 index 000000000000..2e8981aa62e3 --- /dev/null +++ b/packages/google-cloud-bigquery/tests/scrub_datasets.py @@ -0,0 +1,26 @@ +import re +import sys + +from google.api_core.exceptions import NotFound +from google.cloud.bigquery import Client + + +def main(prefixes): + client = Client() + + pattern = re.compile( + '|'.join('^{}.*$'.format(prefix) for prefix in prefixes)) + + ds_items = list(client.list_datasets()) + for dataset in ds_items: + ds_id = dataset.dataset_id + if pattern.match(ds_id): + print("Deleting dataset: {}".format(ds_id)) + try: + client.delete_dataset(dataset.reference, delete_contents=True) + except NotFound: + print(" NOT FOUND") + + +if __name__ == '__main__': + main(sys.argv[1:]) diff --git a/packages/google-cloud-bigquery/tests/system.py b/packages/google-cloud-bigquery/tests/system.py index bb5ebf88ceab..e70dcb26981d 100644 --- a/packages/google-cloud-bigquery/tests/system.py +++ b/packages/google-cloud-bigquery/tests/system.py @@ -295,10 +295,7 @@ def test_delete_dataset_delete_contents_true(self): def test_delete_dataset_delete_contents_false(self): from google.api_core import exceptions - dataset_id = _make_dataset_id('delete_table_false') - dataset = retry_403(Config.CLIENT.create_dataset)( - Dataset(Config.CLIENT.dataset(dataset_id))) - + dataset = self.temp_dataset(_make_dataset_id('delete_table_false')) table_id = 'test_table' table_arg = Table(dataset.table(table_id), schema=SCHEMA) @@ -1191,7 +1188,7 @@ def _load_table_for_dml(self, rows, dataset_id, table_id): self._fetch_single_page(table) def test_query_w_dml(self): - dataset_name = _make_dataset_id('dml_tests') + dataset_name = _make_dataset_id('dml_query') table_name = 'test_table' self._load_table_for_dml([('Hello World',)], dataset_name, table_name) query_template = """UPDATE {}.{} @@ -1207,7 +1204,7 @@ def test_query_w_dml(self): self.assertEqual(query_job.num_dml_affected_rows, 1) def test_dbapi_w_dml(self): - dataset_name = _make_dataset_id('dml_tests') + dataset_name = _make_dataset_id('dml_dbapi') table_name = 'test_table' self._load_table_for_dml([('Hello World',)], dataset_name, table_name) query_template = """UPDATE {}.{} From d3f221c0465b951b5994d22c54c92d45f934a0a4 Mon Sep 17 00:00:00 2001 From: Blaine Hansen Date: Wed, 26 Sep 2018 17:29:41 -0600 Subject: [PATCH 0498/2016] Add default QueryJobConfig to Client (#6088) * master * working implementation of default QueryJobConfigs attached to Client * removing comments and help texts * fixing lints * bringing coverage up to 100% * making revisions * missed some changes * making code tweaks * Make _JobConfig._fill_from_default semi-private. Also, update the docstrings to Google/Napoleon-style. --- .../google/cloud/bigquery/client.py | 28 ++- .../google/cloud/bigquery/job.py | 33 ++++ .../tests/unit/test_client.py | 164 ++++++++++++++++++ .../tests/unit/test_job.py | 28 +++ 4 files changed, 250 insertions(+), 3 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py index 89a26ab18743..0723e9133af9 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py @@ -108,8 +108,11 @@ class Client(ClientWithProject): current object. This parameter should be considered private, and could change in the future. - location str: + location (str): (Optional) Default location for jobs / datasets / tables. + default_query_job_config (google.cloud.bigquery.job.QueryJobConfig): + (Optional) Default ``QueryJobConfig``. + Will be merged into job configs passed into the ``query`` method. Raises: google.auth.exceptions.DefaultCredentialsError: @@ -122,11 +125,13 @@ class Client(ClientWithProject): """The scopes required for authenticating as a BigQuery consumer.""" def __init__( - self, project=None, credentials=None, _http=None, location=None): + self, project=None, credentials=None, _http=None, + location=None, default_query_job_config=None): super(Client, self).__init__( project=project, credentials=credentials, _http=_http) self._connection = Connection(self) self._location = location + self._default_query_job_config = default_query_job_config @property def location(self): @@ -1187,7 +1192,9 @@ def extract_table( return extract_job def query( - self, query, job_config=None, job_id=None, job_id_prefix=None, + self, query, + job_config=None, + job_id=None, job_id_prefix=None, location=None, project=None, retry=DEFAULT_RETRY): """Run a SQL query. @@ -1202,6 +1209,10 @@ def query( Keyword Arguments: job_config (google.cloud.bigquery.job.QueryJobConfig): (Optional) Extra configuration options for the job. + To override any options that were previously set in + the ``default_query_job_config`` given to the + ``Client`` constructor, manually set those options to ``None``, + or whatever value is preferred. job_id (str): (Optional) ID to use for the query job. job_id_prefix (str): (Optional) The prefix to use for a randomly generated job ID. @@ -1226,6 +1237,17 @@ def query( if location is None: location = self.location + if self._default_query_job_config: + if job_config: + # anything that's not defined on the incoming + # that is in the default, + # should be filled in with the default + # the incoming therefore has precedence + job_config = job_config._fill_from_default( + self._default_query_job_config) + else: + job_config = self._default_query_job_config + job_ref = job._JobReference(job_id, project=project, location=location) query_job = job.QueryJob( job_ref, query, client=self, job_config=job_config) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/job.py b/packages/google-cloud-bigquery/google/cloud/bigquery/job.py index 7a4478551336..7f89efa2d821 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/job.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/job.py @@ -819,6 +819,39 @@ def to_api_repr(self): """ return copy.deepcopy(self._properties) + def _fill_from_default(self, default_job_config): + """Merge this job config with a default job config. + + The keys in this object take precedence over the keys in the default + config. The merge is done at the top-level as well as for keys one + level below the job type. + + Arguments: + default_job_config (google.cloud.bigquery.job._JobConfig): + The default job config that will be used to fill in self. + + Returns: + google.cloud.bigquery.job._JobConfig A new (merged) job config. + """ + if self._job_type != default_job_config._job_type: + raise TypeError( + "attempted to merge two incompatible job types: " + + repr(self._job_type) + ', ' + + repr(default_job_config._job_type)) + + new_job_config = self.__class__() + + default_job_properties = copy.deepcopy(default_job_config._properties) + for key in self._properties: + if key != self._job_type: + default_job_properties[key] = self._properties[key] + + default_job_properties[self._job_type] \ + .update(self._properties[self._job_type]) + new_job_config._properties = default_job_properties + + return new_job_config + @classmethod def from_api_repr(cls, resource): """Factory: construct a job configuration given its API representation diff --git a/packages/google-cloud-bigquery/tests/unit/test_client.py b/packages/google-cloud-bigquery/tests/unit/test_client.py index ee7bb7ca8632..e49ec3670956 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_client.py +++ b/packages/google-cloud-bigquery/tests/unit/test_client.py @@ -96,6 +96,27 @@ def test_ctor_w_location(self): self.assertIs(client._connection.http, http) self.assertEqual(client.location, location) + def test_ctor_w_query_job_config(self): + from google.cloud.bigquery._http import Connection + from google.cloud.bigquery import QueryJobConfig + + creds = _make_credentials() + http = object() + location = 'us-central' + job_config = QueryJobConfig() + job_config.dry_run = True + + client = self._make_one(project=self.PROJECT, credentials=creds, + _http=http, location=location, + default_query_job_config=job_config) + self.assertIsInstance(client._connection, Connection) + self.assertIs(client._connection.credentials, creds) + self.assertIs(client._connection.http, http) + self.assertEqual(client.location, location) + + self.assertIsInstance(client._default_query_job_config, QueryJobConfig) + self.assertTrue(client._default_query_job_config.dry_run) + def test__get_query_results_miss_w_explicit_project_and_timeout(self): from google.cloud.exceptions import NotFound @@ -2707,6 +2728,149 @@ def test_query_w_explicit_project(self): data=resource, ) + def test_query_w_explicit_job_config(self): + job_id = 'some-job-id' + query = 'select count(*) from persons' + resource = { + 'jobReference': { + 'jobId': job_id, + 'projectId': self.PROJECT, + 'location': self.LOCATION, + }, + 'configuration': { + 'query': { + 'query': query, + 'defaultDataset': { + 'projectId': self.PROJECT, + 'datasetId': 'some-dataset', + }, + 'useLegacySql': False, + 'useQueryCache': True, + 'maximumBytesBilled': '2000', + }, + }, + } + + creds = _make_credentials() + http = object() + + from google.cloud.bigquery import QueryJobConfig, DatasetReference + default_job_config = QueryJobConfig() + default_job_config.default_dataset = DatasetReference( + self.PROJECT, 'some-dataset') + default_job_config.maximum_bytes_billed = 1000 + + client = self._make_one( + project=self.PROJECT, credentials=creds, + _http=http, default_query_job_config=default_job_config) + conn = client._connection = _make_connection(resource) + + job_config = QueryJobConfig() + job_config.use_query_cache = True + job_config.maximum_bytes_billed = 2000 + + client.query( + query, job_id=job_id, location=self.LOCATION, + job_config=job_config) + + # Check that query actually starts the job. + conn.api_request.assert_called_once_with( + method='POST', + path='/projects/PROJECT/jobs', + data=resource, + ) + + def test_query_w_explicit_job_config_override(self): + job_id = 'some-job-id' + query = 'select count(*) from persons' + resource = { + 'jobReference': { + 'jobId': job_id, + 'projectId': self.PROJECT, + 'location': self.LOCATION, + }, + 'configuration': { + 'query': { + 'query': query, + 'defaultDataset': None, + 'useLegacySql': False, + 'useQueryCache': True, + 'maximumBytesBilled': '2000', + }, + }, + } + + creds = _make_credentials() + http = object() + + from google.cloud.bigquery import QueryJobConfig, DatasetReference + default_job_config = QueryJobConfig() + default_job_config.default_dataset = DatasetReference( + self.PROJECT, 'some-dataset') + default_job_config.maximum_bytes_billed = 1000 + + client = self._make_one( + project=self.PROJECT, credentials=creds, _http=http, + default_query_job_config=default_job_config) + conn = client._connection = _make_connection(resource) + + job_config = QueryJobConfig() + job_config.use_query_cache = True + job_config.maximum_bytes_billed = 2000 + job_config.default_dataset = None + + client.query( + query, job_id=job_id, location=self.LOCATION, + job_config=job_config, + ) + + # Check that query actually starts the job. + conn.api_request.assert_called_once_with( + method='POST', + path='/projects/PROJECT/jobs', + data=resource, + ) + + def test_query_w_client_default_config_no_incoming(self): + job_id = 'some-job-id' + query = 'select count(*) from persons' + resource = { + 'jobReference': { + 'jobId': job_id, + 'projectId': self.PROJECT, + 'location': self.LOCATION, + }, + 'configuration': { + 'query': { + 'query': query, + 'useLegacySql': False, + 'maximumBytesBilled': '1000', + }, + }, + } + + creds = _make_credentials() + http = object() + + from google.cloud.bigquery import QueryJobConfig + default_job_config = QueryJobConfig() + default_job_config.maximum_bytes_billed = 1000 + + client = self._make_one( + project=self.PROJECT, credentials=creds, _http=http, + default_query_job_config=default_job_config) + conn = client._connection = _make_connection(resource) + + client.query( + query, job_id=job_id, location=self.LOCATION) + + # Check that query actually starts the job. + conn.api_request.assert_called_once_with( + method='POST', + path='/projects/PROJECT/jobs', + data=resource, + ) + def test_query_w_client_location(self): job_id = 'some-job-id' query = 'select count(*) from persons' diff --git a/packages/google-cloud-bigquery/tests/unit/test_job.py b/packages/google-cloud-bigquery/tests/unit/test_job.py index f3a57439c508..97c22e211a77 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_job.py +++ b/packages/google-cloud-bigquery/tests/unit/test_job.py @@ -912,6 +912,34 @@ def test_ctor(self): self.assertEqual(job_config._job_type, self.JOB_TYPE) self.assertEqual(job_config._properties, {self.JOB_TYPE: {}}) + def test_fill_from_default(self): + from google.cloud.bigquery import QueryJobConfig + + job_config = QueryJobConfig() + job_config.dry_run = True + job_config.maximum_bytes_billed = 1000 + + default_job_config = QueryJobConfig() + default_job_config.use_query_cache = True + default_job_config.maximum_bytes_billed = 2000 + + final_job_config = job_config._fill_from_default(default_job_config) + self.assertTrue(final_job_config.dry_run) + self.assertTrue(final_job_config.use_query_cache) + self.assertEqual(final_job_config.maximum_bytes_billed, 1000) + + def test_fill_from_default_conflict(self): + from google.cloud.bigquery import QueryJobConfig + + basic_job_config = QueryJobConfig() + conflicting_job_config = self._make_one('conflicting_job_type') + self.assertNotEqual( + basic_job_config._job_type, conflicting_job_config._job_type) + + with self.assertRaises(TypeError): + basic_job_config._fill_from_default( + conflicting_job_config) + @mock.patch('google.cloud.bigquery._helpers._get_sub_prop') def test__get_sub_prop_wo_default(self, _get_sub_prop): job_config = self._make_one() From 3649f3037b536757fc0b2dda4d2687093f53e178 Mon Sep 17 00:00:00 2001 From: Tres Seaver Date: Tue, 2 Oct 2018 15:57:09 -0400 Subject: [PATCH 0499/2016] BigQuery: add support for GEOGRAPHY type (#6147) . --- .../google/cloud/bigquery/_helpers.py | 1 + .../google/cloud/bigquery/schema.py | 58 +++++++++---------- .../google-cloud-bigquery/tests/system.py | 4 ++ .../tests/unit/test__helpers.py | 13 +++++ 4 files changed, 44 insertions(+), 32 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py b/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py index 3108afcaf258..c0a29b427b3b 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py @@ -176,6 +176,7 @@ def _record_from_json(value, field): 'BOOLEAN': _bool_from_json, 'BOOL': _bool_from_json, 'STRING': _string_from_json, + 'GEOGRAPHY': _string_from_json, 'BYTES': _bytes_from_json, 'TIMESTAMP': _timestamp_from_json, 'DATETIME': _datetime_from_json, diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/schema.py b/packages/google-cloud-bigquery/google/cloud/bigquery/schema.py index cc1b4a5ff024..759d7c3cbe65 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/schema.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/schema.py @@ -18,23 +18,19 @@ class SchemaField(object): """Describe a single field within a table schema. - :type name: str - :param name: the name of the field. + Args: + name (str): the name of the field. - :type field_type: str - :param field_type: the type of the field (one of 'STRING', 'INTEGER', - 'FLOAT', 'NUMERIC', 'BOOLEAN', 'TIMESTAMP' or - 'RECORD'). + field_type (str): the type of the field. See + https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#schema.fields.type - :type mode: str - :param mode: the mode of the field (one of 'NULLABLE', 'REQUIRED', - or 'REPEATED'). + mode (str): the mode of the field. See + https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#schema.fields.mode - :type description: str - :param description: optional description for the field. + description (Optional[str]):description for the field. - :type fields: tuple of :class:`~google.cloud.bigquery.schema.SchemaField` - :param fields: subfields (requires ``field_type`` of 'RECORD'). + fields (Tuple[:class:`~google.cloud.bigquery.schema.SchemaField`]): + subfields (requires ``field_type`` of 'RECORD'). """ def __init__(self, name, field_type, mode='NULLABLE', description=None, fields=()): @@ -78,8 +74,8 @@ def name(self): def field_type(self): """str: The type of the field. - Will be one of 'STRING', 'INTEGER', 'FLOAT', 'NUMERIC', - 'BOOLEAN', 'TIMESTAMP' or 'RECORD'. + See: + https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#schema.fields.type """ return self._field_type @@ -87,26 +83,26 @@ def field_type(self): def mode(self): """str: The mode of the field. - Will be one of 'NULLABLE', 'REQUIRED', or 'REPEATED'. + See: + https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#schema.fields.mode """ return self._mode @property def is_nullable(self): - """Check whether 'mode' is 'nullable'.""" + """bool: whether 'mode' is 'nullable'.""" return self._mode == 'NULLABLE' @property def description(self): - """Optional[str]: Description for the field.""" + """Optional[str]: description for the field.""" return self._description @property def fields(self): """tuple: Subfields contained in this field. - If ``field_type`` is not 'RECORD', this property must be - empty / unset. + Must be empty unset if ``field_type`` is not 'RECORD'. """ return self._fields @@ -168,14 +164,12 @@ def __repr__(self): def _parse_schema_resource(info): """Parse a resource fragment into a schema field. - :type info: mapping - :param info: should contain a "fields" key to be parsed + Args: + info: (Mapping[str->dict]): should contain a "fields" key to be parsed - :rtype: - list of :class:`google.cloud.bigquery.schema.SchemaField`, or - ``NoneType`` - :returns: a list of parsed fields, or ``None`` if no "fields" key is - present in ``info``. + Returns: + (Union[Sequence[:class:`google.cloud.bigquery.schema.SchemaField`],None]) + a list of parsed fields, or ``None`` if no "fields" key found. """ if 'fields' not in info: return () @@ -195,11 +189,11 @@ def _parse_schema_resource(info): def _build_schema_resource(fields): """Generate a resource fragment for a schema. - :type fields: - sequence of :class:`~google.cloud.bigquery.schema.SchemaField` - :param fields: schema to be dumped + Args: + fields [Sequence[:class:`~google.cloud.bigquery.schema.SchemaField`]): + schema to be dumped - :rtype: mapping - :returns: a mapping describing the schema of the supplied fields. + Returns: (Sequence[dict]) + mappings describing the schema of the supplied fields. """ return [field.to_api_repr() for field in fields] diff --git a/packages/google-cloud-bigquery/tests/system.py b/packages/google-cloud-bigquery/tests/system.py index e70dcb26981d..1cfc46c4736e 100644 --- a/packages/google-cloud-bigquery/tests/system.py +++ b/packages/google-cloud-bigquery/tests/system.py @@ -1019,6 +1019,10 @@ def _generate_standard_sql_types_examples(self): 'sql': 'SELECT ARRAY(SELECT STRUCT([1, 2]))', 'expected': [{u'_field_1': [1, 2]}], }, + { + 'sql': 'SELECT ST_GeogPoint(1, 2)', + 'expected': 'POINT(1 2)', + }, ] def test_query_w_standard_sql_types(self): diff --git a/packages/google-cloud-bigquery/tests/unit/test__helpers.py b/packages/google-cloud-bigquery/tests/unit/test__helpers.py index 906119e453e2..d0a93ebd1340 100644 --- a/packages/google-cloud-bigquery/tests/unit/test__helpers.py +++ b/packages/google-cloud-bigquery/tests/unit/test__helpers.py @@ -376,6 +376,13 @@ def test_w_scalar_subfield(self): coerced = self._call_fut(value, field) self.assertEqual(coerced, {'age': 42}) + def test_w_scalar_subfield_geography(self): + subfield = _Field('REQUIRED', 'geo', 'GEOGRAPHY') + field = _Field('REQUIRED', fields=[subfield]) + value = {'f': [{'v': 'POINT(1, 2)'}]} + coerced = self._call_fut(value, field) + self.assertEqual(coerced, {'geo': 'POINT(1, 2)'}) + def test_w_repeated_subfield(self): subfield = _Field('REPEATED', 'color', 'STRING') field = _Field('REQUIRED', fields=[subfield]) @@ -444,6 +451,12 @@ def test_w_single_scalar_column(self): row = {u'f': [{u'v': u'1'}]} self.assertEqual(self._call_fut(row, schema=[col]), (1,)) + def test_w_single_scalar_geography_column(self): + # SELECT 1 AS col + col = _Field('REQUIRED', 'geo', 'GEOGRAPHY') + row = {u'f': [{u'v': u'POINT(1, 2)'}]} + self.assertEqual(self._call_fut(row, schema=[col]), ('POINT(1, 2)',)) + def test_w_single_struct_column(self): # SELECT (1, 2) AS col sub_1 = _Field('REQUIRED', 'sub_1', 'INTEGER') From 795e1131ca082c873fa5deb7c7fb87a1697f86fe Mon Sep 17 00:00:00 2001 From: Tres Seaver Date: Tue, 2 Oct 2018 23:01:11 -0400 Subject: [PATCH 0500/2016] Release bigquery 1.6.0 (#6148) --- packages/google-cloud-bigquery/CHANGELOG.md | 13 +++++++++++++ packages/google-cloud-bigquery/setup.py | 2 +- 2 files changed, 14 insertions(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/CHANGELOG.md b/packages/google-cloud-bigquery/CHANGELOG.md index 1b25a9100740..f5f70256c2c2 100644 --- a/packages/google-cloud-bigquery/CHANGELOG.md +++ b/packages/google-cloud-bigquery/CHANGELOG.md @@ -4,6 +4,19 @@ [1]: https://pypi.org/project/google-cloud-bigquery/#history +## 1.6.0 + +### New Features +- Add support for `GEOGRAPHY` type ([#6147](https://github.com/googleapis/google-cloud-python/pull/6147)) +- Add default QueryJobConfig to Client ([#6088](https://github.com/googleapis/google-cloud-python/pull/6088)) + +### Documentation +- Remove unused "append" samples ([#6100](https://github.com/googleapis/google-cloud-python/pull/6100)) + +### Internal / Testing Changes +- Address dataset leaks, conflicts in systests ([#6099](https://github.com/googleapis/google-cloud-python/pull/6099)) +- Harden bucket teardown against `429 Too Many Requests`. ([#6101](https://github.com/googleapis/google-cloud-python/pull/6101)) + ## 1.5.1 ### Implementation Changes diff --git a/packages/google-cloud-bigquery/setup.py b/packages/google-cloud-bigquery/setup.py index 9eaf1f741619..40b4e41df4d0 100644 --- a/packages/google-cloud-bigquery/setup.py +++ b/packages/google-cloud-bigquery/setup.py @@ -22,7 +22,7 @@ name = 'google-cloud-bigquery' description = 'Google BigQuery API client library' -version = '1.5.1' +version = '1.6.0' # Should be one of: # 'Development Status :: 3 - Alpha' # 'Development Status :: 4 - Beta' From d25f17a1322da2999b8a3efe8f6b63ea95d60cb2 Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Thu, 4 Oct 2018 17:17:38 -0500 Subject: [PATCH 0501/2016] =?UTF-8?q?Allow=20string=20in=20addition=20to?= =?UTF-8?q?=20DatasetReference=20/=20TableReference=20in=20Clie=E2=80=A6?= =?UTF-8?q?=20(#6164)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Allow string in addition to DatasetReference / TableReference in Client methods. Whereever a client method accepts a DatasetReference or TableReference, it now also accepts a string. The project ID can be omitted from the string and the Client's default project is used instead. Also, to make it easier to share code across client methods for creating XReference classes: * Add default_project argument to DatasetReference.from_string * Add default_project argument to TableReference.from_string * Fix line breaks in docstrings. --- .../google/cloud/bigquery/client.py | 469 +++++++++++------- .../google/cloud/bigquery/dataset.py | 42 +- .../google/cloud/bigquery/table.py | 51 +- .../google-cloud-bigquery/tests/system.py | 62 ++- .../tests/unit/test_client.py | 174 +++++-- .../tests/unit/test_dataset.py | 21 + .../tests/unit/test_table.py | 23 + 7 files changed, 573 insertions(+), 269 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py index 0723e9133af9..2c967fa09d7e 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py @@ -338,34 +338,53 @@ def _call_api(self, retry, **kwargs): def get_dataset(self, dataset_ref, retry=DEFAULT_RETRY): """Fetch the dataset referenced by ``dataset_ref`` - :type dataset_ref: - :class:`google.cloud.bigquery.dataset.DatasetReference` - :param dataset_ref: the dataset to use. - - :type retry: :class:`google.api_core.retry.Retry` - :param retry: (Optional) How to retry the RPC. + Args: + dataset_ref (Union[ \ + :class:`~google.cloud.bigquery.dataset.DatasetReference`, \ + str, \ + ]): + A reference to the dataset to fetch from the BigQuery API. + If a string is passed in, this method attempts to create a + dataset reference from a string using + :func:`~google.cloud.bigquery.dataset.DatasetReference.from_string`. + retry (:class:`google.api_core.retry.Retry`): + (Optional) How to retry the RPC. - :rtype: :class:`google.cloud.bigquery.dataset.Dataset` - :returns: a ``Dataset`` instance + Returns: + google.cloud.bigquery.dataset.Dataset: + A ``Dataset`` instance. """ - api_response = self._call_api(retry, - method='GET', - path=dataset_ref.path) + if isinstance(dataset_ref, str): + dataset_ref = DatasetReference.from_string( + dataset_ref, default_project=self.project) + + api_response = self._call_api( + retry, method='GET', path=dataset_ref.path) return Dataset.from_api_repr(api_response) def get_table(self, table_ref, retry=DEFAULT_RETRY): - """Fetch the table referenced by ``table_ref`` + """Fetch the table referenced by ``table_ref``. - :type table_ref: - :class:`google.cloud.bigquery.table.TableReference` - :param table_ref: the table to use. - - :type retry: :class:`google.api_core.retry.Retry` - :param retry: (Optional) How to retry the RPC. + Args: + table_ref (Union[ \ + :class:`~google.cloud.bigquery.table.TableReference`, \ + str, \ + ]): + A reference to the table to fetch from the BigQuery API. + If a string is passed in, this method attempts to create a + table reference from a string using + :func:`google.cloud.bigquery.table.TableReference.from_string`. + retry (:class:`google.api_core.retry.Retry`): + (Optional) How to retry the RPC. - :rtype: :class:`google.cloud.bigquery.table.Table` - :returns: a ``Table`` instance + Returns: + google.cloud.bigquery.table.Table: + A ``Table`` instance. """ + if isinstance(table_ref, str): + table_ref = TableReference.from_string( + table_ref, default_project=self.project) + api_response = self._call_api(retry, method='GET', path=table_ref.path) return Table.from_api_repr(api_response) @@ -450,34 +469,43 @@ def list_tables(self, dataset, max_results=None, page_token=None, See https://cloud.google.com/bigquery/docs/reference/rest/v2/tables/list - :type dataset: One of: - :class:`~google.cloud.bigquery.dataset.Dataset` - :class:`~google.cloud.bigquery.dataset.DatasetReference` - :param dataset: the dataset whose tables to list, or a reference to it. - - :type max_results: int - :param max_results: (Optional) Maximum number of tables to return. - If not passed, defaults to a value set by the API. - - :type page_token: str - :param page_token: - (Optional) Token representing a cursor into the tables. If not - passed, the API will return the first page of tables. The - token marks the beginning of the iterator to be returned and - the value of the ``page_token`` can be accessed at - ``next_page_token`` of the - :class:`~google.api_core.page_iterator.HTTPIterator`. - - :type retry: :class:`google.api_core.retry.Retry` - :param retry: (Optional) How to retry the RPC. + Args: + dataset (Union[ \ + :class:`~google.cloud.bigquery.dataset.Dataset`, \ + :class:`~google.cloud.bigquery.dataset.DatasetReference`, \ + str, \ + ]): + A reference to the dataset whose tables to list from the + BigQuery API. If a string is passed in, this method attempts + to create a dataset reference from a string using + :func:`google.cloud.bigquery.dataset.DatasetReference.from_string`. + max_results (int): + (Optional) Maximum number of tables to return. If not passed, + defaults to a value set by the API. + page_token (str): + (Optional) Token representing a cursor into the tables. If + not passed, the API will return the first page of tables. The + token marks the beginning of the iterator to be returned and + the value of the ``page_token`` can be accessed at + ``next_page_token`` of the + :class:`~google.api_core.page_iterator.HTTPIterator`. + retry (:class:`google.api_core.retry.Retry`): + (Optional) How to retry the RPC. - :rtype: :class:`~google.api_core.page_iterator.Iterator` - :returns: - Iterator of :class:`~google.cloud.bigquery.table.TableListItem` - contained within the current dataset. + Returns: + google.api_core.page_iterator.Iterator: + Iterator of + :class:`~google.cloud.bigquery.table.TableListItem` contained + within the requested dataset. """ + if isinstance(dataset, str): + dataset = DatasetReference.from_string( + dataset, default_project=self.project) + if not isinstance(dataset, (Dataset, DatasetReference)): - raise TypeError('dataset must be a Dataset or a DatasetReference') + raise TypeError( + 'dataset must be a Dataset, DatasetReference, or string') + path = '%s/tables' % dataset.path result = page_iterator.HTTPIterator( client=self, @@ -497,19 +525,27 @@ def delete_dataset(self, dataset, delete_contents=False, See https://cloud.google.com/bigquery/docs/reference/rest/v2/datasets/delete - :type dataset: One of: - :class:`~google.cloud.bigquery.dataset.Dataset` - :class:`~google.cloud.bigquery.dataset.DatasetReference` - :param dataset: the dataset to delete, or a reference to it. - - :type retry: :class:`google.api_core.retry.Retry` - :param retry: (Optional) How to retry the RPC. - - :type delete_contents: boolean - :param delete_contents: (Optional) If True, delete all the tables - in the dataset. If False and the dataset contains tables, the - request will fail. Default is False + Args + dataset (Union[ \ + :class:`~google.cloud.bigquery.dataset.Dataset`, \ + :class:`~google.cloud.bigquery.dataset.DatasetReference`, \ + str, \ + ]): + A reference to the dataset to delete. If a string is passed + in, this method attempts to create a dataset reference from a + string using + :func:`google.cloud.bigquery.dataset.DatasetReference.from_string`. + retry (:class:`google.api_core.retry.Retry`): + (Optional) How to retry the RPC. + delete_contents (boolean): + (Optional) If True, delete all the tables in the dataset. If + False and the dataset contains tables, the request will fail. + Default is False. """ + if isinstance(dataset, str): + dataset = DatasetReference.from_string( + dataset, default_project=self.project) + if not isinstance(dataset, (Dataset, DatasetReference)): raise TypeError('dataset must be a Dataset or a DatasetReference') @@ -528,14 +564,23 @@ def delete_table(self, table, retry=DEFAULT_RETRY): See https://cloud.google.com/bigquery/docs/reference/rest/v2/tables/delete - :type table: One of: - :class:`~google.cloud.bigquery.table.Table` - :class:`~google.cloud.bigquery.table.TableReference` - :param table: the table to delete, or a reference to it. - - :type retry: :class:`google.api_core.retry.Retry` - :param retry: (Optional) How to retry the RPC. + Args: + table (Union[ \ + :class:`~google.cloud.bigquery.table.Table`, \ + :class:`~google.cloud.bigquery.table.TableReference`, \ + str, \ + ]): + A reference to the table to delete. If a string is passed in, + this method attempts to create a table reference from a + string using + :func:`google.cloud.bigquery.table.TableReference.from_string`. + retry (:class:`google.api_core.retry.Retry`): + (Optional) How to retry the RPC. """ + if isinstance(table, str): + table = TableReference.from_string( + table, default_project=self.project) + if not isinstance(table, (Table, TableReference)): raise TypeError('table must be a Table or a TableReference') self._call_api(retry, method='DELETE', path=table.path) @@ -785,8 +830,14 @@ def load_table_from_uri( source_uris (Union[str, Sequence[str]]): URIs of data files to be loaded; in format ``gs:///``. - destination (google.cloud.bigquery.table.TableReference): - Table into which data is to be loaded. + destination (Union[ \ + :class:`~google.cloud.bigquery.table.TableReference`, \ + str, \ + ]): + Table into which data is to be loaded. If a string is passed + in, this method attempts to create a table reference from a + string using + :func:`google.cloud.bigquery.table.TableReference.from_string`. Keyword Arguments: job_id (str): (Optional) Name of the job. @@ -821,6 +872,10 @@ def load_table_from_uri( if isinstance(source_uris, six.string_types): source_uris = [source_uris] + if isinstance(destination, str): + destination = TableReference.from_string( + destination, default_project=self.project) + load_job = job.LoadJob( job_ref, source_uris, destination, self, job_config) load_job._begin(retry=retry) @@ -839,8 +894,14 @@ def load_table_from_file( Arguments: file_obj (file): A file handle opened in binary mode for reading. - destination (google.cloud.bigquery.table.TableReference): - Table into which data is to be loaded. + destination (Union[ \ + :class:`~google.cloud.bigquery.table.TableReference`, \ + str, \ + ]): + Table into which data is to be loaded. If a string is passed + in, this method attempts to create a table reference from a + string using + :func:`google.cloud.bigquery.table.TableReference.from_string`. Keyword Arguments: rewind (bool): @@ -882,6 +943,10 @@ def load_table_from_file( if location is None: location = self.location + if isinstance(destination, str): + destination = TableReference.from_string( + destination, default_project=self.project) + job_ref = job._JobReference(job_id, project=project, location=location) load_job = job.LoadJob(job_ref, None, destination, self, job_config) job_resource = load_job._build_resource() @@ -923,6 +988,10 @@ def load_table_from_dataframe(self, dataframe, destination, does not yet exist, the schema is inferred from the :class:`~pandas.DataFrame`. + If a string is passed in, this method attempts to create a + table reference from a string using + :func:`google.cloud.bigquery.table.TableReference.from_string`. + Keyword Arguments: num_retries (int, optional): Number of upload retries. job_id (str, optional): Name of the job. @@ -1085,10 +1154,17 @@ def copy_table( https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.copy Arguments: - sources (Union[google.cloud.bigquery.table.TableReference, \ - Sequence[google.cloud.bigquery.table.TableReference]]): + sources (Union[ \ + :class:`~google.cloud.bigquery.table.TableReference`, \ + str, \ + Sequence[ \ + :class:`~google.cloud.bigquery.table.TableReference`], \ + ]): Table or tables to be copied. - destination (google.cloud.bigquery.table.TableReference): + destination (Union[ + :class:`~google.cloud.bigquery.table.TableReference`, \ + str, \ + ]): Table into which data is to be copied. Keyword Arguments: @@ -1121,6 +1197,14 @@ def copy_table( job_ref = job._JobReference(job_id, project=project, location=location) + if isinstance(sources, str): + sources = TableReference.from_string( + sources, default_project=self.project) + + if isinstance(destination, str): + destination = TableReference.from_string( + destination, default_project=self.project) + if not isinstance(sources, collections.Sequence): sources = [sources] @@ -1141,7 +1225,10 @@ def extract_table( https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.extract Arguments: - source (google.cloud.bigquery.table.TableReference): + source (Union[ \ + :class:`google.cloud.bigquery.table.TableReference`, \ + src, \ + ]): Table to be extracted. destination_uris (Union[str, Sequence[str]]): URIs of Cloud Storage file(s) into which table data is to be @@ -1181,6 +1268,10 @@ def extract_table( job_ref = job._JobReference(job_id, project=project, location=location) + if isinstance(source, str): + source = TableReference.from_string( + source, default_project=self.project) + if isinstance(destination_uris, six.string_types): destination_uris = [destination_uris] @@ -1261,40 +1352,45 @@ def insert_rows(self, table, rows, selected_fields=None, **kwargs): See https://cloud.google.com/bigquery/docs/reference/rest/v2/tabledata/insertAll - :type table: One of: - :class:`~google.cloud.bigquery.table.Table` - :class:`~google.cloud.bigquery.table.TableReference` - :param table: the destination table for the row data, or a reference - to it. - - :type rows: One of: - list of tuples - list of dictionaries - :param rows: Row data to be inserted. If a list of tuples is given, - each tuple should contain data for each schema field on - the current table and in the same order as the schema - fields. If a list of dictionaries is given, the keys must - include all required fields in the schema. Keys which do - not correspond to a field in the schema are ignored. - - :type selected_fields: - list of :class:`~google.cloud.bigquery.schema.SchemaField` - :param selected_fields: - The fields to return. Required if ``table`` is a - :class:`~google.cloud.bigquery.table.TableReference`. - - :type kwargs: dict - :param kwargs: - Keyword arguments to - :meth:`~google.cloud.bigquery.client.Client.insert_rows_json` - - :rtype: list of mappings - :returns: One mapping per row with insert errors: the "index" key - identifies the row, and the "errors" key contains a list - of the mappings describing one or more problems with the - row. - :raises: ValueError if table's schema is not set + Args: + table (Union[ \ + :class:`~google.cloud.bigquery.table.Table`, \ + :class:`~google.cloud.bigquery.table.TableReference`, \ + str, \ + ]): + The destination table for the row data, or a reference to it. + rows (Union[ \ + Sequence[Tuple], \ + Sequence[dict], \ + ]): + Row data to be inserted. If a list of tuples is given, each + tuple should contain data for each schema field on the + current table and in the same order as the schema fields. If + a list of dictionaries is given, the keys must include all + required fields in the schema. Keys which do not correspond + to a field in the schema are ignored. + selected_fields (Sequence[ \ + :class:`~google.cloud.bigquery.schema.SchemaField`, \ + ]): + The fields to return. Required if ``table`` is a + :class:`~google.cloud.bigquery.table.TableReference`. + kwargs (dict): + Keyword arguments to + :meth:`~google.cloud.bigquery.client.Client.insert_rows_json`. + + Returns: + Sequence[Mappings]: + One mapping per row with insert errors: the "index" key + identifies the row, and the "errors" key contains a list of + the mappings describing one or more problems with the row. + + Raises: + ValueError: if table's schema is not set """ + if isinstance(table, str): + table = TableReference.from_string( + table, default_project=self.project) + if selected_fields is not None: schema = selected_fields elif isinstance(table, TableReference): @@ -1331,51 +1427,44 @@ def insert_rows_json(self, table, json_rows, row_ids=None, See https://cloud.google.com/bigquery/docs/reference/rest/v2/tabledata/insertAll - :type table: One of: - :class:`~google.cloud.bigquery.table.Table` - :class:`~google.cloud.bigquery.table.TableReference` - :param table: the destination table for the row data, or a reference - to it. - - :type json_rows: list of dictionaries - :param json_rows: Row data to be inserted. Keys must match the table - schema fields and values must be JSON-compatible - representations. - - :type row_ids: list of string - :param row_ids: (Optional) Unique ids, one per row being inserted. - If omitted, unique IDs are created. - - :type skip_invalid_rows: bool - :param skip_invalid_rows: (Optional) Insert all valid rows of a - request, even if invalid rows exist. - The default value is False, which causes - the entire request to fail if any invalid - rows exist. - - :type ignore_unknown_values: bool - :param ignore_unknown_values: (Optional) Accept rows that contain - values that do not match the schema. - The unknown values are ignored. Default - is False, which treats unknown values as - errors. - - :type template_suffix: str - :param template_suffix: + table (Union[ \ + :class:`~google.cloud.bigquery.table.Table` \ + :class:`~google.cloud.bigquery.table.TableReference`, \ + str, \ + ]): + The destination table for the row data, or a reference to it. + json_rows (Sequence[dict]): + Row data to be inserted. Keys must match the table schema fields + and values must be JSON-compatible representations. + row_ids (Sequence[str]): + (Optional) Unique ids, one per row being inserted. If omitted, + unique IDs are created. + skip_invalid_rows (bool): + (Optional) Insert all valid rows of a request, even if invalid + rows exist. The default value is False, which causes the entire + request to fail if any invalid rows exist. + ignore_unknown_values (bool): + (Optional) Accept rows that contain values that do not match the + schema. The unknown values are ignored. Default is False, which + treats unknown values as errors. + template_suffix (str): (Optional) treat ``name`` as a template table and provide a suffix. BigQuery will create the table `` + `` based on the schema of the template table. See https://cloud.google.com/bigquery/streaming-data-into-bigquery#template-tables + retry (:class:`google.api_core.retry.Retry`): + (Optional) How to retry the RPC. - :type retry: :class:`google.api_core.retry.Retry` - :param retry: (Optional) How to retry the RPC. - - :rtype: list of mappings - :returns: One mapping per row with insert errors: the "index" key - identifies the row, and the "errors" key contains a list - of the mappings describing one or more problems with the - row. + Returns: + Sequence[Mappings]: + One mapping per row with insert errors: the "index" key + identifies the row, and the "errors" key contains a list of + the mappings describing one or more problems with the row. """ + if isinstance(table, str): + table = TableReference.from_string( + table, default_project=self.project) + rows_info = [] data = {'rows': rows_info} @@ -1414,8 +1503,11 @@ def list_partitions(self, table, retry=DEFAULT_RETRY): """List the partitions in a table. Arguments: - table (Union[google.cloud.bigquery.table.Table, - google.cloud.bigquery.table.TableReference]): + table (Union[ \ + :class:`~google.cloud.bigquery.table.Table`, \ + :class:`~google.cloud.bigquery.table.TableReference`, \ + str, \ + ]): The table or reference from which to get partition info retry (google.api_core.retry.Retry): (Optional) How to retry the RPC. @@ -1424,6 +1516,10 @@ def list_partitions(self, table, retry=DEFAULT_RETRY): List[str]: A list of the partition ids present in the partitioned table """ + if isinstance(table, str): + table = TableReference.from_string( + table, default_project=self.project) + meta_table = self.get_table( TableReference( self.dataset(table.dataset_id, project=table.project), @@ -1450,49 +1546,48 @@ def list_rows(self, table, selected_fields=None, max_results=None, identical, the values returned may be incomplete. To ensure that the local copy of the schema is up-to-date, call ``client.get_table``. - :type table: One of: - :class:`~google.cloud.bigquery.table.Table` - :class:`~google.cloud.bigquery.table.TableReference` - :param table: the table to list, or a reference to it. - - :type selected_fields: - list of :class:`~google.cloud.bigquery.schema.SchemaField` - :param selected_fields: - The fields to return. Required if ``table`` is a - :class:`~google.cloud.bigquery.table.TableReference`. - - :type max_results: int - :param max_results: (Optional) maximum number of rows to return. - - :type page_token: str - :param page_token: (Optional) Token representing a cursor into the - table's rows. If not passed, the API will return - the first page of the rows. The token marks the - beginning of the iterator to be returned and the - value of the ``page_token`` can be accessed at - ``next_page_token`` of the - :class:`~google.cloud.bigquery.table.RowIterator`. - - :type start_index: int - :param start_index: (Optional) The zero-based index of the starting - row to read. - - :type page_size: int - :param page_size: (Optional) The maximum number of items to return - per page in the iterator. - - :type retry: :class:`google.api_core.retry.Retry` - :param retry: (Optional) How to retry the RPC. - - :rtype: :class:`~google.cloud.bigquery.table.RowIterator` - :returns: Iterator of row data - :class:`~google.cloud.bigquery.table.Row`-s. During each - page, the iterator will have the ``total_rows`` attribute - set, which counts the total number of rows **in the table** - (this is distinct from the total number of rows in the - current page: ``iterator.page.num_items``). + Args: + table (Union[ \ + :class:`~google.cloud.bigquery.table.Table`, \ + :class:`~google.cloud.bigquery.table.TableReference`, \ + str, \ + ]): + The table to list, or a reference to it. + selected_fields (Sequence[ \ + :class:`~google.cloud.bigquery.schema.SchemaField` \ + ]): + The fields to return. Required if ``table`` is a + :class:`~google.cloud.bigquery.table.TableReference`. + max_results (int): + (Optional) maximum number of rows to return. + page_token (str): + (Optional) Token representing a cursor into the table's rows. + If not passed, the API will return the first page of the + rows. The token marks the beginning of the iterator to be + returned and the value of the ``page_token`` can be accessed + at ``next_page_token`` of the + :class:`~google.cloud.bigquery.table.RowIterator`. + start_index (int): + (Optional) The zero-based index of the starting row to read. + page_size (int): + (Optional) The maximum number of items to return per page in + the iterator. + retry (:class:`google.api_core.retry.Retry`): + (Optional) How to retry the RPC. + Returns: + google.cloud.bigquery.table.RowIterator: + Iterator of row data + :class:`~google.cloud.bigquery.table.Row`-s. During each + page, the iterator will have the ``total_rows`` attribute + set, which counts the total number of rows **in the table** + (this is distinct from the total number of rows in the + current page: ``iterator.page.num_items``). """ + if isinstance(table, str): + table = TableReference.from_string( + table, default_project=self.project) + if selected_fields is not None: schema = selected_fields elif isinstance(table, TableReference): diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/dataset.py b/packages/google-cloud-bigquery/google/cloud/bigquery/dataset.py index 9927af4e9a7a..5679ed76744b 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/dataset.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/dataset.py @@ -211,18 +211,21 @@ def from_api_repr(cls, resource): return cls(project, dataset_id) @classmethod - def from_string(cls, full_dataset_id): - """Construct a dataset reference from fully-qualified dataset ID. + def from_string(cls, dataset_id, default_project=None): + """Construct a dataset reference from dataset ID string. Args: - full_dataset_id (str): - A fully-qualified dataset ID in standard SQL format. Must - included both the project ID and the dataset ID, separated by - ``.``. + dataset_id (str): + A dataset ID in standard SQL format. If ``default_project`` + is not specified, this must included both the project ID and + the dataset ID, separated by ``.``. + default_project (str): + Optional. The project ID to use when ``dataset_id`` does not + include a project ID. Returns: DatasetReference: - Dataset reference parsed from ``full_dataset_id``. + Dataset reference parsed from ``dataset_id``. Examples: >>> DatasetReference.from_string('my-project-id.some_dataset') @@ -230,16 +233,27 @@ def from_string(cls, full_dataset_id): Raises: ValueError: - If ``full_dataset_id`` is not a fully-qualified dataset ID in + If ``dataset_id`` is not a fully-qualified dataset ID in standard SQL format. """ - parts = full_dataset_id.split('.') - if len(parts) != 2: + output_dataset_id = dataset_id + output_project_id = default_project + parts = dataset_id.split('.') + + if len(parts) == 1 and not default_project: raise ValueError( - 'full_dataset_id must be a fully-qualified dataset ID in ' - 'standard SQL format. e.g. "project.dataset_id", got ' - '{}'.format(full_dataset_id)) - return cls(*parts) + 'When default_project is not set, dataset_id must be a ' + 'fully-qualified dataset ID in standard SQL format. ' + 'e.g. "project.dataset_id", got {}'.format(dataset_id)) + elif len(parts) == 2: + output_project_id, output_dataset_id = parts + elif len(parts) > 2: + raise ValueError( + 'Too many parts in dataset_id. Expected a fully-qualified ' + 'dataset ID in standard SQL format. e.g. ' + '"project.dataset_id", got {}'.format(dataset_id)) + + return cls(output_project_id, output_dataset_id) def to_api_repr(self): """Construct the API resource representation of this dataset reference diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py index d4edbd875792..57e06e7f2258 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py @@ -172,17 +172,20 @@ def path(self): self._project, self._dataset_id, self._table_id) @classmethod - def from_string(cls, full_table_id): - """Construct a table reference from fully-qualified table ID. + def from_string(cls, table_id, default_project=None): + """Construct a table reference from table ID string. Args: - full_table_id (str): - A fully-qualified table ID in standard SQL format. Must - included a project ID, dataset ID, and table ID, each - separated by ``.``. + table_id (str): + A table ID in standard SQL format. If ``default_project`` + is not specified, this must included a project ID, dataset + ID, and table ID, each separated by ``.``. + default_project (str): + Optional. The project ID to use when ``table_id`` does not + include a project ID. Returns: - TableReference: Table reference parsed from ``full_table_id``. + TableReference: Table reference parsed from ``table_id``. Examples: >>> TableReference.from_string('my-project.mydataset.mytable') @@ -190,19 +193,41 @@ def from_string(cls, full_table_id): Raises: ValueError: - If ``full_table_id`` is not a fully-qualified table ID in + If ``table_id`` is not a fully-qualified table ID in standard SQL format. """ from google.cloud.bigquery.dataset import DatasetReference - parts = full_table_id.split('.') - if len(parts) != 3: + output_project_id = default_project + output_dataset_id = None + output_table_id = None + parts = table_id.split('.') + + if len(parts) < 2: raise ValueError( - 'full_table_id must be a fully-qualified table ID in ' + 'table_id must be a fully-qualified table ID in ' 'standard SQL format. e.g. "project.dataset.table", got ' - '{}'.format(full_table_id)) + '{}'.format(table_id)) + elif len(parts) == 2: + if not default_project: + raise ValueError( + 'When default_project is not set, table_id must be a ' + 'fully-qualified table ID in standard SQL format. ' + 'e.g. "project.dataset_id.table_id", got {}'.format( + table_id)) + output_dataset_id, output_table_id = parts + elif len(parts) == 3: + output_project_id, output_dataset_id, output_table_id = parts + if len(parts) > 3: + raise ValueError( + 'Too many parts in table_id. Must be a fully-qualified table ' + 'ID in standard SQL format. e.g. "project.dataset.table", ' + 'got {}'.format(table_id)) - return cls(DatasetReference(parts[0], parts[1]), parts[2]) + return cls( + DatasetReference(output_project_id, output_dataset_id), + output_table_id, + ) @classmethod def from_api_repr(cls, resource): diff --git a/packages/google-cloud-bigquery/tests/system.py b/packages/google-cloud-bigquery/tests/system.py index 1cfc46c4736e..650d1a12bf55 100644 --- a/packages/google-cloud-bigquery/tests/system.py +++ b/packages/google-cloud-bigquery/tests/system.py @@ -175,17 +175,27 @@ def test_create_dataset(self): self.assertEqual(dataset.project, Config.CLIENT.project) def test_get_dataset(self): - DATASET_ID = _make_dataset_id('get_dataset') + dataset_id = _make_dataset_id('get_dataset') client = Config.CLIENT - dataset_arg = Dataset(client.dataset(DATASET_ID)) + dataset_arg = Dataset(client.dataset(dataset_id)) dataset_arg.friendly_name = 'Friendly' dataset_arg.description = 'Description' dataset = retry_403(client.create_dataset)(dataset_arg) self.to_delete.append(dataset) - dataset_ref = client.dataset(DATASET_ID) + dataset_ref = client.dataset(dataset_id) + # Get with a reference. got = client.get_dataset(dataset_ref) + self.assertEqual(got.friendly_name, 'Friendly') + self.assertEqual(got.description, 'Description') + + # Get with a string. + got = client.get_dataset(dataset_id) + self.assertEqual(got.friendly_name, 'Friendly') + self.assertEqual(got.description, 'Description') + # Get with a fully-qualified string. + got = client.get_dataset('{}.{}'.format(client.project, dataset_id)) self.assertEqual(got.friendly_name, 'Friendly') self.assertEqual(got.description, 'Description') @@ -281,6 +291,14 @@ def test_create_table_w_time_partitioning_w_clustering_fields(self): self.assertEqual(time_partitioning.field, 'transaction_time') self.assertEqual(table.clustering_fields, ['user_email', 'store_code']) + def test_delete_dataset_with_string(self): + dataset_id = _make_dataset_id('delete_table_true') + dataset_ref = Config.CLIENT.dataset(dataset_id) + retry_403(Config.CLIENT.create_dataset)(Dataset(dataset_ref)) + self.assertTrue(_dataset_exists(dataset_ref)) + Config.CLIENT.delete_dataset(dataset_id) + self.assertFalse(_dataset_exists(dataset_ref)) + def test_delete_dataset_delete_contents_true(self): dataset_id = _make_dataset_id('delete_table_true') dataset = retry_403(Config.CLIENT.create_dataset)( @@ -304,20 +322,27 @@ def test_delete_dataset_delete_contents_false(self): Config.CLIENT.delete_dataset(dataset) def test_get_table_w_public_dataset(self): - PUBLIC = 'bigquery-public-data' - DATASET_ID = 'samples' - TABLE_ID = 'shakespeare' - table_ref = DatasetReference(PUBLIC, DATASET_ID).table(TABLE_ID) + public = 'bigquery-public-data' + dataset_id = 'samples' + table_id = 'shakespeare' + table_ref = DatasetReference(public, dataset_id).table(table_id) + # Get table with reference. table = Config.CLIENT.get_table(table_ref) - - self.assertEqual(table.table_id, TABLE_ID) - self.assertEqual(table.dataset_id, DATASET_ID) - self.assertEqual(table.project, PUBLIC) + self.assertEqual(table.table_id, table_id) + self.assertEqual(table.dataset_id, dataset_id) + self.assertEqual(table.project, public) schema_names = [field.name for field in table.schema] self.assertEqual( schema_names, ['word', 'word_count', 'corpus', 'corpus_date']) + # Get table with string. + table = Config.CLIENT.get_table( + '{}.{}.{}'.format(public, dataset_id, table_id)) + self.assertEqual(table.table_id, table_id) + self.assertEqual(table.dataset_id, dataset_id) + self.assertEqual(table.project, public) + def test_list_partitions(self): table_ref = DatasetReference( 'bigquery-public-data', @@ -327,8 +352,8 @@ def test_list_partitions(self): self.assertGreater(len(all_rows), 1000) def test_list_tables(self): - DATASET_ID = _make_dataset_id('list_tables') - dataset = self.temp_dataset(DATASET_ID) + dataset_id = _make_dataset_id('list_tables') + dataset = self.temp_dataset(dataset_id) # Retrieve tables before any are created for the dataset. iterator = Config.CLIENT.list_tables(dataset) all_tables = list(iterator) @@ -352,9 +377,18 @@ def test_list_tables(self): self.assertIsNone(iterator.next_page_token) created = [table for table in all_tables if (table.table_id in tables_to_create and - table.dataset_id == DATASET_ID)] + table.dataset_id == dataset_id)] self.assertEqual(len(created), len(tables_to_create)) + # List tables with a string ID. + iterator = Config.CLIENT.list_tables(dataset_id) + self.assertGreater(len(list(iterator)), 0) + + # List tables with a fully-qualified string ID. + iterator = Config.CLIENT.list_tables( + '{}.{}'.format(Config.CLIENT.project, dataset_id)) + self.assertGreater(len(list(iterator)), 0) + def test_update_table(self): dataset = self.temp_dataset(_make_dataset_id('update_table')) diff --git a/packages/google-cloud-bigquery/tests/unit/test_client.py b/packages/google-cloud-bigquery/tests/unit/test_client.py index e49ec3670956..73b14b506244 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_client.py +++ b/packages/google-cloud-bigquery/tests/unit/test_client.py @@ -53,6 +53,25 @@ def _make_connection(*responses): return mock_conn +def _make_list_partitons_meta_info(project, dataset_id, table_id, num_rows=0): + return { + 'tableReference': + { + 'projectId': project, + 'datasetId': dataset_id, + 'tableId': '{}$__PARTITIONS_SUMMARY__'.format(table_id), + }, + 'schema': {'fields': [ + {'name': 'project_id', 'type': 'STRING', 'mode': 'NULLABLE'}, + {'name': 'dataset_id', 'type': 'STRING', 'mode': 'NULLABLE'}, + {'name': 'table_id', 'type': 'STRING', 'mode': 'NULLABLE'}, + {'name': 'partition_id', 'type': 'STRING', 'mode': 'NULLABLE'} + ]}, + 'etag': 'ETAG', + 'numRows': num_rows, + } + + class TestClient(unittest.TestCase): PROJECT = 'PROJECT' @@ -449,7 +468,10 @@ def test_get_dataset(self): client._connection = _make_connection( ServerError('', errors=[{'reason': 'backendError'}]), resource) - dataset = client.get_dataset(dataset_ref) + dataset = client.get_dataset( + # Test with a string for dataset ID. + dataset_ref.dataset_id, + ) self.assertEqual(dataset.dataset_id, self.DS_ID) def test_create_dataset_minimal(self): @@ -1342,7 +1364,14 @@ def test_update_table_w_schema_None(self): creds = _make_credentials() client = self._make_one(project=self.PROJECT, credentials=creds) conn = client._connection = _make_connection(resource1, resource2) - table = client.get_table(self.TABLE_REF) + table = client.get_table( + # Test with string for table ID + '{}.{}.{}'.format( + self.TABLE_REF.project, + self.TABLE_REF.dataset_id, + self.TABLE_REF.table_id, + ) + ) table.schema = None updated_table = client.update_table(table, ['schema']) @@ -1495,8 +1524,10 @@ def test_list_tables_explicit(self): dataset = client.dataset(self.DS_ID) iterator = client.list_tables( - dataset, max_results=3, page_token=TOKEN) - self.assertIs(iterator.dataset, dataset) + # Test with string for dataset ID. + self.DS_ID, + max_results=3, page_token=TOKEN) + self.assertEqual(iterator.dataset, dataset) page = six.next(iterator.pages) tables = list(page) token = iterator.next_page_token @@ -1521,13 +1552,19 @@ def test_list_tables_wrong_type(self): def test_delete_dataset(self): from google.cloud.bigquery.dataset import Dataset + from google.cloud.bigquery.dataset import DatasetReference + ds_ref = DatasetReference(self.PROJECT, self.DS_ID) + datasets = ( + ds_ref, + Dataset(ds_ref), + '{}.{}'.format(self.PROJECT, self.DS_ID), + ) PATH = 'projects/%s/datasets/%s' % (self.PROJECT, self.DS_ID) creds = _make_credentials() client = self._make_one(project=self.PROJECT, credentials=creds) - conn = client._connection = _make_connection({}, {}) - ds_ref = client.dataset(self.DS_ID) - for arg in (ds_ref, Dataset(ds_ref)): + conn = client._connection = _make_connection(*([{}] * len(datasets))) + for arg in datasets: client.delete_dataset(arg) conn.api_request.assert_called_with( method='DELETE', @@ -1558,15 +1595,24 @@ def test_delete_dataset_wrong_type(self): def test_delete_table(self): from google.cloud.bigquery.table import Table + tables = ( + self.TABLE_REF, + Table(self.TABLE_REF), + '{}.{}.{}'.format( + self.TABLE_REF.project, + self.TABLE_REF.dataset_id, + self.TABLE_REF.table_id, + ), + ) path = 'projects/%s/datasets/%s/tables/%s' % ( self.PROJECT, self.DS_ID, self.TABLE_ID) creds = _make_credentials() http = object() client = self._make_one(project=self.PROJECT, credentials=creds, _http=http) - conn = client._connection = _make_connection({}, {}) + conn = client._connection = _make_connection(*([{}] * len(tables))) - for arg in (self.TABLE_REF, Table(self.TABLE_REF)): + for arg in tables: client.delete_table(arg) conn.api_request.assert_called_with( method='DELETE', path='/%s' % path) @@ -2115,10 +2161,14 @@ def test_load_table_from_uri_w_client_location(self): project=self.PROJECT, credentials=creds, _http=http, location=self.LOCATION) conn = client._connection = _make_connection(resource) - destination = client.dataset(self.DS_ID).table(destination_id) client.load_table_from_uri( - source_uri, destination, + source_uri, + # Test with string for table ID. + '{}.{}'.format( + self.DS_ID, + destination_id, + ), job_id=job_id, project='other-project') @@ -2410,12 +2460,12 @@ def test_copy_table_w_client_location(self): project=self.PROJECT, credentials=creds, _http=http, location=self.LOCATION) conn = client._connection = _make_connection(resource) - dataset = client.dataset(self.DS_ID) - source = dataset.table(source_id) - destination = dataset.table(destination_id) client.copy_table( - source, destination, job_id=job_id, project='other-project') + # Test with string for table IDs. + '{}.{}'.format(self.DS_ID, source_id), + '{}.{}'.format(self.DS_ID, destination_id), + job_id=job_id, project='other-project') # Check that copy_table actually starts the job. conn.api_request.assert_called_once_with( @@ -2536,11 +2586,11 @@ def test_extract_table_w_client_location(self): project=self.PROJECT, credentials=creds, _http=http, location=self.LOCATION) conn = client._connection = _make_connection(resource) - dataset = client.dataset(self.DS_ID) - source = dataset.table(source_id) client.extract_table( - source, destination, job_id=job_id, project='other-project') + # Test with string for table ID. + '{}.{}'.format(self.DS_ID, source_id), + destination, job_id=job_id, project='other-project') # Check that extract_table actually starts the job. conn.api_request.assert_called_once_with( @@ -3081,7 +3131,7 @@ def test_insert_rows_w_schema(self): from google.cloud._helpers import UTC from google.cloud._helpers import _datetime_to_rfc3339 from google.cloud._helpers import _microseconds_from_datetime - from google.cloud.bigquery.table import Table, SchemaField + from google.cloud.bigquery.table import SchemaField WHEN_TS = 1437767599.006 WHEN = datetime.datetime.utcfromtimestamp(WHEN_TS).replace( @@ -3098,7 +3148,6 @@ def test_insert_rows_w_schema(self): SchemaField('age', 'INTEGER', mode='REQUIRED'), SchemaField('joined', 'TIMESTAMP', mode='NULLABLE'), ] - table = Table(self.TABLE_REF, schema=schema) ROWS = [ ('Phred Phlyntstone', 32, _datetime_to_rfc3339(WHEN)), ('Bharney Rhubble', 33, WHEN + datetime.timedelta(seconds=1)), @@ -3122,7 +3171,11 @@ def _row_data(row): } with mock.patch('uuid.uuid4', side_effect=map(str, range(len(ROWS)))): - errors = client.insert_rows(table, ROWS) + # Test with using string IDs for the table. + errors = client.insert_rows( + '{}.{}'.format(self.DS_ID, self.TABLE_ID), + ROWS, + selected_fields=schema) self.assertEqual(len(errors), 0) conn.api_request.assert_called_once() @@ -3519,24 +3572,35 @@ def test_insert_rows_json(self): path='/%s' % PATH, data=SENT) + def test_insert_rows_json_with_string_id(self): + rows = [{'col1': 'val1'}] + creds = _make_credentials() + http = object() + client = self._make_one( + project='default-project', credentials=creds, _http=http) + conn = client._connection = _make_connection({}) + + with mock.patch('uuid.uuid4', side_effect=map(str, range(len(rows)))): + errors = client.insert_rows_json('proj.dset.tbl', rows) + + self.assertEqual(len(errors), 0) + expected = { + 'rows': [{ + 'json': row, + 'insertId': str(i), + } for i, row in enumerate(rows)], + } + conn.api_request.assert_called_once_with( + method='POST', + path='/projects/proj/datasets/dset/tables/tbl/insertAll', + data=expected) + def test_list_partitions(self): from google.cloud.bigquery.table import Table rows = 3 - meta_info = { - 'tableReference': - {'projectId': self.PROJECT, - 'datasetId': self.DS_ID, - 'tableId': '%s$__PARTITIONS_SUMMARY__' % self.TABLE_ID}, - 'schema': {'fields': [ - {'name': 'project_id', 'type': 'STRING', 'mode': 'NULLABLE'}, - {'name': 'dataset_id', 'type': 'STRING', 'mode': 'NULLABLE'}, - {'name': 'table_id', 'type': 'STRING', 'mode': 'NULLABLE'}, - {'name': 'partition_id', 'type': 'STRING', 'mode': 'NULLABLE'} - ]}, - 'etag': 'ETAG', - 'numRows': rows, - } + meta_info = _make_list_partitons_meta_info( + self.PROJECT, self.DS_ID, self.TABLE_ID, rows) data = { 'totalRows': str(rows), @@ -3563,6 +3627,21 @@ def test_list_partitions(self): self.assertEqual(len(partition_list), rows) self.assertIn('20180102', partition_list) + def test_list_partitions_with_string_id(self): + meta_info = _make_list_partitons_meta_info( + self.PROJECT, self.DS_ID, self.TABLE_ID, 0) + + creds = _make_credentials() + http = object() + client = self._make_one( + project=self.PROJECT, credentials=creds, _http=http) + client._connection = _make_connection(meta_info, {}) + + partition_list = client.list_partitions( + '{}.{}'.format(self.DS_ID, self.TABLE_ID)) + + self.assertEquals(len(partition_list), 0) + def test_list_rows(self): import datetime from google.cloud._helpers import UTC @@ -3641,8 +3720,6 @@ def _bigquery_timestamp_float_repr(ts_float): query_params={}) def test_list_rows_empty_table(self): - from google.cloud.bigquery.table import Table - response = { 'totalRows': '0', 'rows': [], @@ -3654,9 +3731,17 @@ def test_list_rows_empty_table(self): client._connection = _make_connection(response, response) # Table that has no schema because it's an empty table. - table = Table(self.TABLE_REF) - table._properties['creationTime'] = '1234567890' - rows = tuple(client.list_rows(table)) + rows = tuple( + client.list_rows( + # Test with using a string for the table ID. + '{}.{}.{}'.format( + self.TABLE_REF.project, + self.TABLE_REF.dataset_id, + self.TABLE_REF.table_id, + ), + selected_fields=[], + ) + ) self.assertEqual(rows, ()) def test_list_rows_query_params(self): @@ -3977,7 +4062,14 @@ def test_load_table_from_file_w_client_location(self): client, '_do_resumable_upload', self.EXPECTED_CONFIGURATION) with do_upload_patch as do_upload: client.load_table_from_file( - file_obj, self.TABLE_REF, job_id='job_id', + file_obj, + # Test with string for table ID. + '{}.{}.{}'.format( + self.TABLE_REF.project, + self.TABLE_REF.dataset_id, + self.TABLE_REF.table_id, + ), + job_id='job_id', project='other-project', job_config=self._make_config()) diff --git a/packages/google-cloud-bigquery/tests/unit/test_dataset.py b/packages/google-cloud-bigquery/tests/unit/test_dataset.py index d43687e32244..d438c1d478a9 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_dataset.py +++ b/packages/google-cloud-bigquery/tests/unit/test_dataset.py @@ -187,6 +187,27 @@ def test_from_string_legacy_string(self): with self.assertRaises(ValueError): cls.from_string('string-project:string_dataset') + def test_from_string_not_fully_qualified(self): + cls = self._get_target_class() + with self.assertRaises(ValueError): + cls.from_string('string_dataset') + with self.assertRaises(ValueError): + cls.from_string('a.b.c') + + def test_from_string_with_default_project(self): + cls = self._get_target_class() + got = cls.from_string( + 'string_dataset', default_project='default-project') + self.assertEqual(got.project, 'default-project') + self.assertEqual(got.dataset_id, 'string_dataset') + + def test_from_string_ignores_default_project(self): + cls = self._get_target_class() + got = cls.from_string( + 'string-project.string_dataset', default_project='default-project') + self.assertEqual(got.project, 'string-project') + self.assertEqual(got.dataset_id, 'string_dataset') + def test___eq___wrong_type(self): dataset = self._make_one('project_1', 'dataset_1') other = object() diff --git a/packages/google-cloud-bigquery/tests/unit/test_table.py b/packages/google-cloud-bigquery/tests/unit/test_table.py index 9e7b7340ac6f..1d57abbd47cd 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_table.py +++ b/packages/google-cloud-bigquery/tests/unit/test_table.py @@ -148,9 +148,32 @@ def test_from_string_legacy_string(self): def test_from_string_not_fully_qualified(self): cls = self._get_target_class() + with self.assertRaises(ValueError): + cls.from_string('string_table') + with self.assertRaises(ValueError): cls.from_string('string_dataset.string_table') + with self.assertRaises(ValueError): + cls.from_string('a.b.c.d') + + def test_from_string_with_default_project(self): + cls = self._get_target_class() + got = cls.from_string( + 'string_dataset.string_table', default_project='default-project') + self.assertEqual(got.project, 'default-project') + self.assertEqual(got.dataset_id, 'string_dataset') + self.assertEqual(got.table_id, 'string_table') + + def test_from_string_ignores_default_project(self): + cls = self._get_target_class() + got = cls.from_string( + 'string-project.string_dataset.string_table', + default_project='default-project') + self.assertEqual(got.project, 'string-project') + self.assertEqual(got.dataset_id, 'string_dataset') + self.assertEqual(got.table_id, 'string_table') + def test___eq___wrong_type(self): from google.cloud.bigquery.dataset import DatasetReference dataset_ref = DatasetReference('project_1', 'dataset_1') From bbb868781ccadf4814f9ac1511af539dc88cc406 Mon Sep 17 00:00:00 2001 From: Tres Seaver Date: Mon, 8 Oct 2018 21:11:35 -0400 Subject: [PATCH 0502/2016] Expose 'to_api_repr' method for jobs. (#6176) Leave '_build_resource' behind as a backward-compatibility alias. Closes #5866. --- .../google/cloud/bigquery/client.py | 2 +- .../google/cloud/bigquery/job.py | 16 +++++++++------- .../tests/unit/test_client.py | 4 ++-- .../google-cloud-bigquery/tests/unit/test_job.py | 10 ++++++++-- 4 files changed, 20 insertions(+), 12 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py index 2c967fa09d7e..47b655ab2345 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py @@ -949,7 +949,7 @@ def load_table_from_file( job_ref = job._JobReference(job_id, project=project, location=location) load_job = job.LoadJob(job_ref, None, destination, self, job_config) - job_resource = load_job._build_resource() + job_resource = load_job.to_api_repr() if rewind: file_obj.seek(0, os.SEEK_SET) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/job.py b/packages/google-cloud-bigquery/google/cloud/bigquery/job.py index 7f89efa2d821..c430ddc4285b 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/job.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/job.py @@ -519,10 +519,12 @@ def _get_resource_config(cls, resource): '["configuration"]["%s"]' % cls._JOB_TYPE) return job_id, resource['configuration'] - def _build_resource(self): - """Helper: Generate a resource for :meth:`_begin`.""" + def to_api_repr(self): + """Generate a resource for the job.""" raise NotImplementedError("Abstract") + _build_resource = to_api_repr # backward-compatibility alias + def _begin(self, client=None, retry=DEFAULT_RETRY): """API call: begin the job via a POST request @@ -549,7 +551,7 @@ def _begin(self, client=None, retry=DEFAULT_RETRY): # job has an ID. api_response = client._call_api( retry, - method='POST', path=path, data=self._build_resource()) + method='POST', path=path, data=self.to_api_repr()) self._set_properties(api_response) def exists(self, client=None, retry=DEFAULT_RETRY): @@ -1367,7 +1369,7 @@ def output_rows(self): if statistics is not None: return int(statistics['load']['outputRows']) - def _build_resource(self): + def to_api_repr(self): """Generate a resource for :meth:`_begin`.""" configuration = self._configuration.to_api_repr() if self.source_uris is not None: @@ -1543,7 +1545,7 @@ def destination_encryption_configuration(self): """ return self._configuration.destination_encryption_configuration - def _build_resource(self): + def to_api_repr(self): """Generate a resource for :meth:`_begin`.""" source_refs = [{ @@ -1761,7 +1763,7 @@ def destination_uri_file_counts(self): return [int(count) for count in counts] return None - def _build_resource(self): + def to_api_repr(self): """Generate a resource for :meth:`_begin`.""" source_ref = { @@ -2367,7 +2369,7 @@ def schema_update_options(self): """ return self._configuration.schema_update_options - def _build_resource(self): + def to_api_repr(self): """Generate a resource for :meth:`_begin`.""" configuration = self._configuration.to_api_repr() diff --git a/packages/google-cloud-bigquery/tests/unit/test_client.py b/packages/google-cloud-bigquery/tests/unit/test_client.py index 73b14b506244..56f4aabeb651 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_client.py +++ b/packages/google-cloud-bigquery/tests/unit/test_client.py @@ -2214,7 +2214,7 @@ def _initiate_resumable_upload_helper(self, num_retries=None): config = LoadJobConfig() config.source_format = SourceFormat.CSV job = LoadJob(None, None, self.TABLE_REF, client, job_config=config) - metadata = job._build_resource() + metadata = job.to_api_repr() upload, transport = client._initiate_resumable_upload( stream, metadata, num_retries) @@ -2279,7 +2279,7 @@ def _do_multipart_upload_success_helper( config = LoadJobConfig() config.source_format = SourceFormat.CSV job = LoadJob(None, None, self.TABLE_REF, client, job_config=config) - metadata = job._build_resource() + metadata = job.to_api_repr() size = len(data) response = client._do_multipart_upload( stream, metadata, size, num_retries) diff --git a/packages/google-cloud-bigquery/tests/unit/test_job.py b/packages/google-cloud-bigquery/tests/unit/test_job.py index 97c22e211a77..398894839ea7 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_job.py +++ b/packages/google-cloud-bigquery/tests/unit/test_job.py @@ -522,6 +522,12 @@ def test__build_resource(self): with self.assertRaises(NotImplementedError): job._build_resource() + def test_to_api_repr(self): + client = _make_client(project=self.PROJECT) + job = self._make_one(self.JOB_ID, client) + with self.assertRaises(NotImplementedError): + job.to_api_repr() + def test__begin_already(self): job = self._set_properties_job() job._properties['status'] = {'state': 'WHATEVER'} @@ -543,7 +549,7 @@ def test__begin_defaults(self): } } job = self._set_properties_job() - builder = job._build_resource = mock.Mock() + builder = job.to_api_repr = mock.Mock() builder.return_value = resource call_api = job._client._call_api = mock.Mock() call_api.return_value = resource @@ -573,7 +579,7 @@ def test__begin_explicit(self): } } job = self._set_properties_job() - builder = job._build_resource = mock.Mock() + builder = job.to_api_repr = mock.Mock() builder.return_value = resource client = _make_client(project=other_project) call_api = client._call_api = mock.Mock() From 26ba60ec5b225fb3757323a93ad8ee8a0a09df2a Mon Sep 17 00:00:00 2001 From: Tres Seaver Date: Wed, 10 Oct 2018 12:07:17 -0400 Subject: [PATCH 0503/2016] Harden snippets against transient GCS errors. (#6184) Closes #5886. Closes #5934. --- .../google-cloud-bigquery/docs/snippets.py | 25 ++++++++++--------- 1 file changed, 13 insertions(+), 12 deletions(-) diff --git a/packages/google-cloud-bigquery/docs/snippets.py b/packages/google-cloud-bigquery/docs/snippets.py index 5acb339d4571..9e8ba524a115 100644 --- a/packages/google-cloud-bigquery/docs/snippets.py +++ b/packages/google-cloud-bigquery/docs/snippets.py @@ -39,8 +39,11 @@ pyarrow = None from google.api_core import datetime_helpers +from google.api_core.exceptions import InternalServerError +from google.api_core.exceptions import ServiceUnavailable from google.api_core.exceptions import TooManyRequests from google.cloud import bigquery +from google.cloud import storage from test_utils.retry import RetryErrors ORIGINAL_FRIENDLY_NAME = 'Original friendly name' @@ -68,6 +71,8 @@ retry_429 = RetryErrors(TooManyRequests) +retry_storage_errors = RetryErrors( + (TooManyRequests, InternalServerError, ServiceUnavailable)) @pytest.fixture(scope='module') @@ -82,6 +87,8 @@ def to_delete(client): for item in doomed: if isinstance(item, (bigquery.Dataset, bigquery.DatasetReference)): retry_429(client.delete_dataset)(item, delete_contents=True) + elif isinstance(item, storage.Bucket): + retry_storage_errors(item.delete)() else: retry_429(item.delete)() @@ -1880,11 +1887,9 @@ def test_copy_table_cmek(client, to_delete): def test_extract_table(client, to_delete): - from google.cloud import storage - bucket_name = 'extract_shakespeare_{}'.format(_millis()) storage_client = storage.Client() - bucket = retry_429(storage_client.create_bucket)(bucket_name) + bucket = retry_storage_errors(storage_client.create_bucket)(bucket_name) to_delete.append(bucket) # [START bigquery_extract_table] @@ -1910,18 +1915,16 @@ def test_extract_table(client, to_delete): project, dataset_id, table_id, destination_uri)) # [END bigquery_extract_table] - blob = bucket.get_blob('shakespeare.csv') + blob = retry_storage_errors(bucket.get_blob)('shakespeare.csv') assert blob.exists assert blob.size > 0 to_delete.insert(0, blob) def test_extract_table_json(client, to_delete): - from google.cloud import storage - bucket_name = 'extract_shakespeare_json_{}'.format(_millis()) storage_client = storage.Client() - bucket = retry_429(storage_client.create_bucket)(bucket_name) + bucket = retry_storage_errors(storage_client.create_bucket)(bucket_name) to_delete.append(bucket) # [START bigquery_extract_table_json] @@ -1945,18 +1948,16 @@ def test_extract_table_json(client, to_delete): extract_job.result() # Waits for job to complete. # [END bigquery_extract_table_json] - blob = bucket.get_blob('shakespeare.json') + blob = retry_storage_errors(bucket.get_blob)('shakespeare.json') assert blob.exists assert blob.size > 0 to_delete.insert(0, blob) def test_extract_table_compressed(client, to_delete): - from google.cloud import storage - bucket_name = 'extract_shakespeare_compress_{}'.format(_millis()) storage_client = storage.Client() - bucket = retry_429(storage_client.create_bucket)(bucket_name) + bucket = retry_storage_errors(storage_client.create_bucket)(bucket_name) to_delete.append(bucket) # [START bigquery_extract_table_compressed] @@ -1979,7 +1980,7 @@ def test_extract_table_compressed(client, to_delete): extract_job.result() # Waits for job to complete. # [END bigquery_extract_table_compressed] - blob = bucket.get_blob('shakespeare.csv.gz') + blob = retry_storage_errors(bucket.get_blob)('shakespeare.csv.gz') assert blob.exists assert blob.size > 0 to_delete.insert(0, blob) From 5d8e6e57a2074dfff267fb889048846b4174e71c Mon Sep 17 00:00:00 2001 From: Maxime Beauchemin Date: Wed, 10 Oct 2018 12:51:21 -0400 Subject: [PATCH 0504/2016] BigQuery: fix swallowed error message (#6168) --- .../google/cloud/bigquery/dbapi/cursor.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/dbapi/cursor.py b/packages/google-cloud-bigquery/google/cloud/bigquery/dbapi/cursor.py index c550287ecc4b..b582860c6045 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/dbapi/cursor.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/dbapi/cursor.py @@ -153,8 +153,8 @@ def execute(self, operation, parameters=None, job_id=None): # Wait for the query to finish. try: self._query_job.result() - except google.cloud.exceptions.GoogleCloudError: - raise exceptions.DatabaseError(self._query_job.errors) + except google.cloud.exceptions.GoogleCloudError as exc: + raise exceptions.DatabaseError(exc) query_results = self._query_job._query_results self._set_rowcount(query_results) From 14710571051bccc6a6ce3b8dea64bcc7ba7bc6e2 Mon Sep 17 00:00:00 2001 From: Bu Sun Kim <8822365+busunkim96@users.noreply.github.com> Date: Wed, 10 Oct 2018 11:04:44 -0700 Subject: [PATCH 0505/2016] Use new Nox (#6175) --- .../{nox.py => noxfile.py} | 54 +++++-------------- 1 file changed, 12 insertions(+), 42 deletions(-) rename packages/google-cloud-bigquery/{nox.py => noxfile.py} (79%) diff --git a/packages/google-cloud-bigquery/nox.py b/packages/google-cloud-bigquery/noxfile.py similarity index 79% rename from packages/google-cloud-bigquery/nox.py rename to packages/google-cloud-bigquery/noxfile.py index 902b6e663633..21b8487039cc 100644 --- a/packages/google-cloud-bigquery/nox.py +++ b/packages/google-cloud-bigquery/noxfile.py @@ -25,7 +25,6 @@ ) -@nox.session def default(session): """Default unit test session. @@ -40,14 +39,14 @@ def default(session): session.install('-e', local_dep) # Pyarrow does not support Python 3.7 - if session.interpreter == 'python3.7': + if session.python == '3.7': dev_install = '.[pandas]' else: dev_install = '.[pandas, pyarrow]' session.install('-e', dev_install) # IPython does not support Python 2 after version 5.x - if session.interpreter == 'python2.7': + if session.python == '2.7': session.install('ipython==5.5') else: session.install('ipython') @@ -67,35 +66,20 @@ def default(session): ) -@nox.session -@nox.parametrize('py', ['2.7', '3.5', '3.6', '3.7']) -def unit(session, py): +@nox.session(python=['2.7', '3.5', '3.6', '3.7']) +def unit(session): """Run the unit test suite.""" - - # Run unit tests against all supported versions of Python. - session.interpreter = 'python{}'.format(py) - - # Set the virtualenv dirname. - session.virtualenv_dirname = 'unit-' + py - default(session) -@nox.session -@nox.parametrize('py', ['2.7', '3.6']) -def system(session, py): +@nox.session(python=['2.7', '3.6']) +def system(session): """Run the system test suite.""" # Sanity check: Only run system tests if the environment variable is set. if not os.environ.get('GOOGLE_APPLICATION_CREDENTIALS', ''): session.skip('Credentials must be set via environment variable.') - # Run the system tests against latest Python 2 and Python 3 only. - session.interpreter = 'python{}'.format(py) - - # Set the virtualenv dirname. - session.virtualenv_dirname = 'sys-' + py - # Use pre-release gRPC for system tests. session.install('--pre', 'grpcio') @@ -108,7 +92,7 @@ def system(session, py): session.install('-e', '.[pandas]') # IPython does not support Python 2 after version 5.x - if session.interpreter == 'python2.7': + if session.python == '2.7': session.install('ipython==5.5') else: session.install('ipython') @@ -122,21 +106,14 @@ def system(session, py): ) -@nox.session -@nox.parametrize('py', ['2.7', '3.6']) -def snippets(session, py): +@nox.session(python=['2.7', '3.6']) +def snippets(session): """Run the system test suite.""" # Sanity check: Only run system tests if the environment variable is set. if not os.environ.get('GOOGLE_APPLICATION_CREDENTIALS', ''): session.skip('Credentials must be set via environment variable.') - # Run the system tests against latest Python 2 and Python 3 only. - session.interpreter = 'python{}'.format(py) - - # Set the virtualenv dirname. - session.virtualenv_dirname = 'snip-' + py - # Install all test dependencies, then install local packages in place. session.install('mock', 'pytest') for local_dep in LOCAL_DEPS: @@ -150,14 +127,13 @@ def snippets(session, py): 'py.test', os.path.join('docs', 'snippets.py'), *session.posargs) -@nox.session +@nox.session(python='3.6') def lint(session): """Run linters. Returns a failure if the linters find linting errors or sufficiently serious code quality issues. """ - session.interpreter = 'python3.6' session.install('flake8', *LOCAL_DEPS) session.install('.') @@ -167,28 +143,22 @@ def lint(session): 'flake8', os.path.join('docs', 'snippets.py')) -@nox.session +@nox.session(python='3.6') def lint_setup_py(session): """Verify that setup.py is valid (including RST check).""" - session.interpreter = 'python3.6' - - # Set the virtualenv dirname. - session.virtualenv_dirname = 'setup' session.install('docutils', 'Pygments') session.run( 'python', 'setup.py', 'check', '--restructuredtext', '--strict') -@nox.session +@nox.session(python='3.6') def cover(session): """Run the final coverage report. This outputs the coverage report aggregating coverage from the unit test runs (not system test runs), and then erases coverage data. """ - session.interpreter = 'python3.6' - session.install('coverage', 'pytest-cov') session.run('coverage', 'report', '--show-missing', '--fail-under=100') session.run('coverage', 'erase') From 5fe69f17d2733e5b469705464353da646aeb2d8d Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Fri, 12 Oct 2018 11:39:44 -0500 Subject: [PATCH 0506/2016] Allow strings or references in create_dataset and create_table (#6199) When creating an empty dataset, the default location is included. --- .../google/cloud/bigquery/client.py | 39 +++- .../tests/unit/test_client.py | 194 ++++++++++++++++++ 2 files changed, 227 insertions(+), 6 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py index 47b655ab2345..4999337dbf3f 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py @@ -285,8 +285,14 @@ def create_dataset(self, dataset): https://cloud.google.com/bigquery/docs/reference/rest/v2/tables/insert Args: - dataset (google.cloud.bigquery.dataset.Dataset): - A ``Dataset`` populated with the desired initial state. + dataset (Union[ \ + :class:`~google.cloud.bigquery.dataset.Dataset`, \ + :class:`~google.cloud.bigquery.dataset.DatasetReference`, \ + str, \ + ]): + A :class:`~google.cloud.bigquery.dataset.Dataset` to create. + If ``dataset`` is a reference, an empty dataset is created + with the specified ID and client's default location. Returns: google.cloud.bigquery.dataset.Dataset: @@ -300,6 +306,12 @@ def create_dataset(self, dataset): >>> dataset = client.create_dataset(dataset) """ + if isinstance(dataset, str): + dataset = DatasetReference.from_string( + dataset, default_project=self.project) + if isinstance(dataset, DatasetReference): + dataset = Dataset(dataset) + path = '/projects/%s/datasets' % (dataset.project,) data = dataset.to_api_repr() @@ -317,12 +329,27 @@ def create_table(self, table): See https://cloud.google.com/bigquery/docs/reference/rest/v2/tables/insert - :type table: :class:`~google.cloud.bigquery.table.Table` - :param table: A ``Table`` populated with the desired initial state. + Args: + table (Union[ \ + :class:`~google.cloud.bigquery.table.Table`, \ + :class:`~google.cloud.bigquery.table.TableReference`, \ + str, \ + ]): + A :class:`~google.cloud.bigquery.table.Table` to create. + If ``table`` is a reference, an empty table is created + with the specified ID. The dataset that the table belongs to + must already exist. - :rtype: ":class:`~google.cloud.bigquery.table.Table`" - :returns: a new ``Table`` returned from the service. + Returns: + google.cloud.bigquery.table.Table: + A new ``Table`` returned from the service. """ + if isinstance(table, str): + table = TableReference.from_string( + table, default_project=self.project) + if isinstance(table, TableReference): + table = Table(table) + path = '/projects/%s/datasets/%s/tables' % ( table.project, table.dataset_id) api_response = self._connection.api_request( diff --git a/packages/google-cloud-bigquery/tests/unit/test_client.py b/packages/google-cloud-bigquery/tests/unit/test_client.py index 56f4aabeb651..60c81b59c482 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_client.py +++ b/packages/google-cloud-bigquery/tests/unit/test_client.py @@ -711,6 +711,109 @@ def test_create_dataset_w_client_location_w_dataset_location(self): 'location': OTHER_LOCATION, }) + def test_create_dataset_w_reference(self): + path = '/projects/%s/datasets' % self.PROJECT + resource = { + 'datasetReference': + {'projectId': self.PROJECT, 'datasetId': self.DS_ID}, + 'etag': "etag", + 'id': "%s:%s" % (self.PROJECT, self.DS_ID), + 'location': self.LOCATION, + } + creds = _make_credentials() + client = self._make_one( + project=self.PROJECT, credentials=creds, location=self.LOCATION) + conn = client._connection = _make_connection(resource) + + dataset = client.create_dataset(client.dataset(self.DS_ID)) + + self.assertEqual(dataset.dataset_id, self.DS_ID) + self.assertEqual(dataset.project, self.PROJECT) + self.assertEqual(dataset.etag, resource['etag']) + self.assertEqual(dataset.full_dataset_id, resource['id']) + self.assertEqual(dataset.location, self.LOCATION) + + conn.api_request.assert_called_once_with( + method='POST', + path=path, + data={ + 'datasetReference': { + 'projectId': self.PROJECT, + 'datasetId': self.DS_ID, + }, + 'labels': {}, + 'location': self.LOCATION, + }) + + def test_create_dataset_w_fully_qualified_string(self): + path = '/projects/%s/datasets' % self.PROJECT + resource = { + 'datasetReference': + {'projectId': self.PROJECT, 'datasetId': self.DS_ID}, + 'etag': "etag", + 'id': "%s:%s" % (self.PROJECT, self.DS_ID), + 'location': self.LOCATION, + } + creds = _make_credentials() + client = self._make_one( + project=self.PROJECT, credentials=creds, location=self.LOCATION) + conn = client._connection = _make_connection(resource) + + dataset = client.create_dataset( + '{}.{}'.format(self.PROJECT, self.DS_ID)) + + self.assertEqual(dataset.dataset_id, self.DS_ID) + self.assertEqual(dataset.project, self.PROJECT) + self.assertEqual(dataset.etag, resource['etag']) + self.assertEqual(dataset.full_dataset_id, resource['id']) + self.assertEqual(dataset.location, self.LOCATION) + + conn.api_request.assert_called_once_with( + method='POST', + path=path, + data={ + 'datasetReference': { + 'projectId': self.PROJECT, + 'datasetId': self.DS_ID, + }, + 'labels': {}, + 'location': self.LOCATION, + }) + + def test_create_dataset_w_string(self): + path = '/projects/%s/datasets' % self.PROJECT + resource = { + 'datasetReference': + {'projectId': self.PROJECT, 'datasetId': self.DS_ID}, + 'etag': "etag", + 'id': "%s:%s" % (self.PROJECT, self.DS_ID), + 'location': self.LOCATION, + } + creds = _make_credentials() + client = self._make_one( + project=self.PROJECT, credentials=creds, location=self.LOCATION) + conn = client._connection = _make_connection(resource) + + dataset = client.create_dataset(self.DS_ID) + + self.assertEqual(dataset.dataset_id, self.DS_ID) + self.assertEqual(dataset.project, self.PROJECT) + self.assertEqual(dataset.etag, resource['etag']) + self.assertEqual(dataset.full_dataset_id, resource['id']) + self.assertEqual(dataset.location, self.LOCATION) + + conn.api_request.assert_called_once_with( + method='POST', + path=path, + data={ + 'datasetReference': { + 'projectId': self.PROJECT, + 'datasetId': self.DS_ID, + }, + 'labels': {}, + 'location': self.LOCATION, + }) + def test_create_table_w_day_partition(self): from google.cloud.bigquery.table import Table from google.cloud.bigquery.table import TimePartitioning @@ -991,6 +1094,97 @@ def test_create_table_w_external(self): SourceFormat.CSV) self.assertEqual(got.external_data_configuration.autodetect, True) + def test_create_table_w_reference(self): + path = 'projects/%s/datasets/%s/tables' % ( + self.PROJECT, self.DS_ID) + creds = _make_credentials() + client = self._make_one(project=self.PROJECT, credentials=creds) + resource = { + 'id': '%s:%s:%s' % (self.PROJECT, self.DS_ID, self.TABLE_ID), + 'tableReference': { + 'projectId': self.PROJECT, + 'datasetId': self.DS_ID, + 'tableId': self.TABLE_ID + }, + } + conn = client._connection = _make_connection(resource) + + got = client.create_table(self.TABLE_REF) + + conn.api_request.assert_called_once_with( + method='POST', + path='/%s' % path, + data={ + 'tableReference': { + 'projectId': self.PROJECT, + 'datasetId': self.DS_ID, + 'tableId': self.TABLE_ID + }, + 'labels': {}, + }) + self.assertEqual(got.table_id, self.TABLE_ID) + + def test_create_table_w_fully_qualified_string(self): + path = 'projects/%s/datasets/%s/tables' % ( + self.PROJECT, self.DS_ID) + creds = _make_credentials() + client = self._make_one(project=self.PROJECT, credentials=creds) + resource = { + 'id': '%s:%s:%s' % (self.PROJECT, self.DS_ID, self.TABLE_ID), + 'tableReference': { + 'projectId': self.PROJECT, + 'datasetId': self.DS_ID, + 'tableId': self.TABLE_ID + }, + } + conn = client._connection = _make_connection(resource) + + got = client.create_table( + '{}.{}.{}'.format(self.PROJECT, self.DS_ID, self.TABLE_ID)) + + conn.api_request.assert_called_once_with( + method='POST', + path='/%s' % path, + data={ + 'tableReference': { + 'projectId': self.PROJECT, + 'datasetId': self.DS_ID, + 'tableId': self.TABLE_ID + }, + 'labels': {}, + }) + self.assertEqual(got.table_id, self.TABLE_ID) + + def test_create_table_w_string(self): + path = 'projects/%s/datasets/%s/tables' % ( + self.PROJECT, self.DS_ID) + creds = _make_credentials() + client = self._make_one(project=self.PROJECT, credentials=creds) + resource = { + 'id': '%s:%s:%s' % (self.PROJECT, self.DS_ID, self.TABLE_ID), + 'tableReference': { + 'projectId': self.PROJECT, + 'datasetId': self.DS_ID, + 'tableId': self.TABLE_ID + }, + } + conn = client._connection = _make_connection(resource) + + got = client.create_table('{}.{}'.format(self.DS_ID, self.TABLE_ID)) + + conn.api_request.assert_called_once_with( + method='POST', + path='/%s' % path, + data={ + 'tableReference': { + 'projectId': self.PROJECT, + 'datasetId': self.DS_ID, + 'tableId': self.TABLE_ID + }, + 'labels': {}, + }) + self.assertEqual(got.table_id, self.TABLE_ID) + def test_get_table(self): path = 'projects/%s/datasets/%s/tables/%s' % ( self.PROJECT, self.DS_ID, self.TABLE_ID) From 754104f5e59914c79407b03184bf42f1fc9d18da Mon Sep 17 00:00:00 2001 From: Tres Seaver Date: Wed, 17 Oct 2018 12:51:09 -0400 Subject: [PATCH 0507/2016] Docs: normalize use of support level badges (#6159) * Remove badges for deprecated umbrella 'google-cloud' package. * Clarify support levels. - Add explicit section to support linking from sub-package README badges. - Move explanatory text for a support level above the list of packages at that level. * Normalize use of support-level badges in READMEs. - Note that 'error_reporting/README.rst' and 'monitoring/README.rst' are undergoing other edits; they are left out here to avoid conflicts. * Use 'General Avaialblity' for support level. Fix linkx in related API READMEs. * Fix links for alpha support in API READMEs. * Fix links for beta support in API READMEs. --- packages/google-cloud-bigquery/README.rst | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/README.rst b/packages/google-cloud-bigquery/README.rst index 7f9d2d8213e9..0e0110136eb4 100644 --- a/packages/google-cloud-bigquery/README.rst +++ b/packages/google-cloud-bigquery/README.rst @@ -1,7 +1,7 @@ Python Client for Google BigQuery ================================= -|pypi| |versions| +|GA| |pypi| |versions| Querying massive datasets can be time consuming and expensive without the right hardware and infrastructure. Google `BigQuery`_ solves this problem by @@ -11,6 +11,8 @@ processing power of Google's infrastructure. - `Client Library Documentation`_ - `Product Documentation`_ +.. |GA| image:: https://img.shields.io/badge/support-GA-gold.svg + :target: https://github.com/googleapis/google-cloud-python/blob/master/README.rst#general-availability .. |pypi| image:: https://img.shields.io/pypi/v/google-cloud-bigquery.svg :target: https://pypi.org/project/google-cloud-bigquery/ .. |versions| image:: https://img.shields.io/pypi/pyversions/google-cloud-bigquery.svg From f9ab3bc6dc1608fe65e4ff8faa4264dc704c0c33 Mon Sep 17 00:00:00 2001 From: Tres Seaver Date: Wed, 17 Oct 2018 17:00:51 -0400 Subject: [PATCH 0508/2016] BigQuery: add destination table properties to 'LoadJobConfig'. (#6202) Add 'LoadTableConfig.destination_table_description' property. Add 'LoadTableConfig.destination_table_friendly_name' property. Closes #5093. --- .../google/cloud/bigquery/job.py | 228 ++++--- .../google/cloud/bigquery/table.py | 32 + .../tests/unit/test_job.py | 590 ++++++++++++++++-- .../tests/unit/test_table.py | 220 ++++++- 4 files changed, 889 insertions(+), 181 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/job.py b/packages/google-cloud-bigquery/google/cloud/bigquery/job.py index c430ddc4285b..2068fca35c45 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/job.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/job.py @@ -920,6 +920,34 @@ def autodetect(self): def autodetect(self, value): self._set_sub_prop('autodetect', value) + @property + def clustering_fields(self): + """Union[List[str], None]: Fields defining clustering for the table + + (Defaults to :data:`None`). + + Clustering fields are immutable after table creation. + + .. note:: + + As of 2018-06-29, clustering fields cannot be set on a table + which does not also have time partioning defined. + """ + prop = self._get_sub_prop('clustering') + if prop is not None: + return list(prop.get('fields', ())) + + @clustering_fields.setter + def clustering_fields(self, value): + """Union[List[str], None]: Fields defining clustering for the table + + (Defaults to :data:`None`). + """ + if value is not None: + self._set_sub_prop('clustering', {'fields': value}) + else: + self._del_sub_prop('clustering') + @property def create_disposition(self): """google.cloud.bigquery.job.CreateDisposition: Specifies behavior @@ -934,6 +962,69 @@ def create_disposition(self): def create_disposition(self, value): self._set_sub_prop('createDisposition', value) + @property + def destination_encryption_configuration(self): + """google.cloud.bigquery.table.EncryptionConfiguration: Custom + encryption configuration for the destination table. + + Custom encryption configuration (e.g., Cloud KMS keys) or ``None`` + if using default encryption. + + See + https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.load.destinationEncryptionConfiguration + """ + prop = self._get_sub_prop('destinationEncryptionConfiguration') + if prop is not None: + prop = EncryptionConfiguration.from_api_repr(prop) + return prop + + @destination_encryption_configuration.setter + def destination_encryption_configuration(self, value): + api_repr = value + if value is not None: + api_repr = value.to_api_repr() + self._set_sub_prop('destinationEncryptionConfiguration', api_repr) + else: + self._del_sub_prop('destinationEncryptionConfiguration') + + @property + def destination_table_description(self): + """Union[str, None] name given to destination table. + + See: + https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.load.destinationTableProperties.description + """ + prop = self._get_sub_prop('destinationTableProperties') + if prop is not None: + return prop['description'] + + @destination_table_description.setter + def destination_table_description(self, value): + keys = [self._job_type, 'destinationTableProperties', 'description'] + if value is not None: + _helpers._set_sub_prop(self._properties, keys, value) + else: + _helpers._del_sub_prop(self._properties, keys) + + @property + def destination_table_friendly_name(self): + """Union[str, None] name given to destination table. + + See: + https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.load.destinationTableProperties.friendlyName + """ + prop = self._get_sub_prop('destinationTableProperties') + if prop is not None: + return prop['friendlyName'] + + @destination_table_friendly_name.setter + def destination_table_friendly_name(self, value): + keys = [self._job_type, 'destinationTableProperties', 'friendlyName'] + if value is not None: + _helpers._set_sub_prop(self._properties, keys, value) + else: + _helpers._del_sub_prop(self._properties, keys) + @property def encoding(self): """google.cloud.bigquery.job.Encoding: The character encoding of the @@ -981,7 +1072,7 @@ def max_bad_records(self): See https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.load.maxBadRecords """ - return self._get_sub_prop('maxBadRecords') + return _helpers._int_or_none(self._get_sub_prop('maxBadRecords')) @max_bad_records.setter def max_bad_records(self, value): @@ -1013,46 +1104,6 @@ def quote_character(self): def quote_character(self, value): self._set_sub_prop('quote', value) - @property - def skip_leading_rows(self): - """int: Number of rows to skip when reading data (CSV only). - - See - https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.load.skipLeadingRows - """ - return _helpers._int_or_none(self._get_sub_prop('skipLeadingRows')) - - @skip_leading_rows.setter - def skip_leading_rows(self, value): - self._set_sub_prop('skipLeadingRows', str(value)) - - @property - def source_format(self): - """google.cloud.bigquery.job.SourceFormat: File format of the data. - - See - https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.load.sourceFormat - """ - return self._get_sub_prop('sourceFormat') - - @source_format.setter - def source_format(self, value): - self._set_sub_prop('sourceFormat', value) - - @property - def write_disposition(self): - """google.cloud.bigquery.job.WriteDisposition: Action that occurs if - the destination table already exists. - - See - https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.load.writeDisposition - """ - return self._get_sub_prop('writeDisposition') - - @write_disposition.setter - def write_disposition(self, value): - self._set_sub_prop('writeDisposition', value) - @property def schema(self): """List[google.cloud.bigquery.schema.SchemaField]: Schema of the @@ -1077,27 +1128,42 @@ def schema(self, value): [field.to_api_repr() for field in value]) @property - def destination_encryption_configuration(self): - """google.cloud.bigquery.table.EncryptionConfiguration: Custom - encryption configuration for the destination table. + def schema_update_options(self): + """List[google.cloud.bigquery.job.SchemaUpdateOption]: Specifies + updates to the destination table schema to allow as a side effect of + the load job. + """ + return self._get_sub_prop('schemaUpdateOptions') - Custom encryption configuration (e.g., Cloud KMS keys) or ``None`` - if using default encryption. + @schema_update_options.setter + def schema_update_options(self, values): + self._set_sub_prop('schemaUpdateOptions', values) + + @property + def skip_leading_rows(self): + """int: Number of rows to skip when reading data (CSV only). See - https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.load.destinationEncryptionConfiguration + https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.load.skipLeadingRows """ - prop = self._get_sub_prop('destinationEncryptionConfiguration') - if prop is not None: - prop = EncryptionConfiguration.from_api_repr(prop) - return prop + return _helpers._int_or_none(self._get_sub_prop('skipLeadingRows')) - @destination_encryption_configuration.setter - def destination_encryption_configuration(self, value): - api_repr = value - if value is not None: - api_repr = value.to_api_repr() - self._set_sub_prop('destinationEncryptionConfiguration', api_repr) + @skip_leading_rows.setter + def skip_leading_rows(self, value): + self._set_sub_prop('skipLeadingRows', str(value)) + + @property + def source_format(self): + """google.cloud.bigquery.job.SourceFormat: File format of the data. + + See + https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.load.sourceFormat + """ + return self._get_sub_prop('sourceFormat') + + @source_format.setter + def source_format(self, value): + self._set_sub_prop('sourceFormat', value) @property def time_partitioning(self): @@ -1114,47 +1180,23 @@ def time_partitioning(self, value): api_repr = value if value is not None: api_repr = value.to_api_repr() - self._set_sub_prop('timePartitioning', api_repr) - - @property - def clustering_fields(self): - """Union[List[str], None]: Fields defining clustering for the table - - (Defaults to :data:`None`). - - Clustering fields are immutable after table creation. - - .. note:: - - As of 2018-06-29, clustering fields cannot be set on a table - which does not also have time partioning defined. - """ - prop = self._get_sub_prop('clustering') - if prop is not None: - return list(prop.get('fields', ())) - - @clustering_fields.setter - def clustering_fields(self, value): - """Union[List[str], None]: Fields defining clustering for the table - - (Defaults to :data:`None`). - """ - if value is not None: - self._set_sub_prop('clustering', {'fields': value}) + self._set_sub_prop('timePartitioning', api_repr) else: - self._del_sub_prop('clustering') + self._del_sub_prop('timePartitioning') @property - def schema_update_options(self): - """List[google.cloud.bigquery.job.SchemaUpdateOption]: Specifies - updates to the destination table schema to allow as a side effect of - the load job. + def write_disposition(self): + """google.cloud.bigquery.job.WriteDisposition: Action that occurs if + the destination table already exists. + + See + https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.load.writeDisposition """ - return self._get_sub_prop('schemaUpdateOptions') + return self._get_sub_prop('writeDisposition') - @schema_update_options.setter - def schema_update_options(self, values): - self._set_sub_prop('schemaUpdateOptions', values) + @write_disposition.setter + def write_disposition(self, value): + self._set_sub_prop('writeDisposition', value) class LoadJob(_AsyncJob): diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py index 57e06e7f2258..cd8f20ce49f2 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py @@ -132,6 +132,20 @@ def to_api_repr(self): """ return copy.deepcopy(self._properties) + def __eq__(self, other): + if not isinstance(other, EncryptionConfiguration): + return NotImplemented + return self.kms_key_name == other.kms_key_name + + def __ne__(self, other): + return not self == other + + def __hash__(self): + return hash(self.kms_key_name) + + def __repr__(self): + return 'EncryptionConfiguration({})'.format(self.kms_key_name) + class TableReference(object): """TableReferences are pointers to tables. @@ -1342,6 +1356,24 @@ def to_api_repr(self): """ return self._properties + def _key(self): + return tuple(sorted(self._properties.items())) + + def __eq__(self, other): + if not isinstance(other, TimePartitioning): + return NotImplemented + return self._key() == other._key() + + def __ne__(self, other): + return not self == other + + def __hash__(self): + return hash(self._key()) + + def __repr__(self): + key_vals = ['{}={}'.format(key, val) for key, val in self._key()] + return 'TimePartitioning({})'.format(','.join(key_vals)) + def _item_to_row(iterator, resource): """Convert a JSON row to the native object. diff --git a/packages/google-cloud-bigquery/tests/unit/test_job.py b/packages/google-cloud-bigquery/tests/unit/test_job.py index 398894839ea7..9b94dcf591ef 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_job.py +++ b/packages/google-cloud-bigquery/tests/unit/test_job.py @@ -1147,94 +1147,558 @@ def _verifyReadonlyResourceProperties(self, job, resource): class TestLoadJobConfig(unittest.TestCase, _Base): JOB_TYPE = 'load' - def _make_resource(self, started=False, ended=False): - resource = super(TestLoadJobConfig, self)._make_resource( - started, ended) - config = resource['configuration']['load'] - config['sourceUris'] = [self.SOURCE1] - config['destinationTable'] = { - 'projectId': self.PROJECT, - 'datasetId': self.DS_ID, - 'tableId': self.TABLE_ID, - } - config['destinationEncryptionConfiguration'] = { - 'kmsKeyName': self.KMS_KEY_NAME} - - return resource - @staticmethod def _get_target_class(): from google.cloud.bigquery.job import LoadJobConfig return LoadJobConfig - def test_schema(self): - from google.cloud.bigquery.schema import SchemaField + def test_allow_jagged_rows_missing(self): config = self._get_target_class()() - full_name = SchemaField('full_name', 'STRING', mode='REQUIRED') - age = SchemaField('age', 'INTEGER', mode='REQUIRED') - config.schema = [full_name, age] - self.assertEqual(config.schema, [full_name, age]) + self.assertIsNone(config.allow_jagged_rows) - def test_time_partitioning(self): - from google.cloud.bigquery import table + def test_allow_jagged_rows_hit(self): + config = self._get_target_class()() + config._properties['load']['allowJaggedRows'] = True + self.assertTrue(config.allow_jagged_rows) - time_partitioning = table.TimePartitioning( - type_=table.TimePartitioningType.DAY, field='name') + def test_allow_jagged_rows_setter(self): config = self._get_target_class()() - config.time_partitioning = time_partitioning - # TimePartitioning should be configurable after assigning - time_partitioning.expiration_ms = 10000 - self.assertEqual( - config.time_partitioning.type_, - table.TimePartitioningType.DAY) - self.assertEqual(config.time_partitioning.field, 'name') - self.assertEqual(config.time_partitioning.expiration_ms, 10000) + config.allow_jagged_rows = True + self.assertTrue(config._properties['load']['allowJaggedRows']) - config.time_partitioning = None - self.assertIsNone(config.time_partitioning) + def test_allow_quoted_newlines_missing(self): + config = self._get_target_class()() + self.assertIsNone(config.allow_quoted_newlines) - def test_clustering_fields(self): + def test_allow_quoted_newlines_hit(self): + config = self._get_target_class()() + config._properties['load']['allowQuotedNewlines'] = True + self.assertTrue(config.allow_quoted_newlines) + + def test_allow_quoted_newlines_setter(self): + config = self._get_target_class()() + config.allow_quoted_newlines = True + self.assertTrue(config._properties['load']['allowQuotedNewlines']) + + def test_autodetect_missing(self): + config = self._get_target_class()() + self.assertIsNone(config.autodetect) + + def test_autodetect_hit(self): + config = self._get_target_class()() + config._properties['load']['autodetect'] = True + self.assertTrue(config.autodetect) + + def test_autodetect_setter(self): + config = self._get_target_class()() + config.autodetect = True + self.assertTrue(config._properties['load']['autodetect']) + + def test_clustering_fields_miss(self): + config = self._get_target_class()() + self.assertIsNone(config.clustering_fields) + + def test_clustering_fields_hit(self): + config = self._get_target_class()() + fields = ['email', 'postal_code'] + config._properties['load']['clustering'] = { + 'fields': fields, + } + self.assertEqual(config.clustering_fields, fields) + + def test_clustering_fields_setter(self): fields = ['email', 'postal_code'] config = self._get_target_class()() config.clustering_fields = fields - self.assertEqual(config.clustering_fields, fields) + self.assertEqual( + config._properties['load']['clustering'], {'fields': fields}) + def test_clustering_fields_setter_w_none(self): + config = self._get_target_class()() + fields = ['email', 'postal_code'] + config._properties['load']['clustering'] = { + 'fields': fields, + } config.clustering_fields = None self.assertIsNone(config.clustering_fields) + self.assertNotIn('clustering', config._properties['load']) - def test_api_repr(self): - resource = self._make_resource() - config = self._get_target_class().from_api_repr(resource) - self.assertEqual(config.to_api_repr(), resource) + def test_create_disposition_missing(self): + config = self._get_target_class()() + self.assertIsNone(config.create_disposition) - def test_to_api_repr_with_encryption(self): + def test_create_disposition_hit(self): + from google.cloud.bigquery.job import CreateDisposition + + disposition = CreateDisposition.CREATE_IF_NEEDED + config = self._get_target_class()() + config._properties['load']['createDisposition'] = disposition + self.assertEqual(config.create_disposition, disposition) + + def test_create_disposition_setter(self): + from google.cloud.bigquery.job import CreateDisposition + + disposition = CreateDisposition.CREATE_IF_NEEDED + config = self._get_target_class()() + config.create_disposition = disposition + self.assertEqual( + config._properties['load']['createDisposition'], disposition) + + def test_destination_encryption_configuration_missing(self): + config = self._get_target_class()() + self.assertIsNone(config.destination_encryption_configuration) + + def test_destination_encryption_configuration_hit(self): from google.cloud.bigquery.table import EncryptionConfiguration - config = self._make_one() - config.destination_encryption_configuration = EncryptionConfiguration( - kms_key_name=self.KMS_KEY_NAME) - resource = config.to_api_repr() + kms_key_name = 'kms-key-name' + encryption_configuration = EncryptionConfiguration(kms_key_name) + config = self._get_target_class()() + config._properties['load']['destinationEncryptionConfiguration'] = { + 'kmsKeyName': kms_key_name, + } self.assertEqual( - resource, - { - 'load': { - 'destinationEncryptionConfiguration': { - 'kmsKeyName': self.KMS_KEY_NAME, - }, - }, - }) + config.destination_encryption_configuration, + encryption_configuration) - def test_to_api_repr_with_encryption_none(self): - config = self._make_one() + def test_destination_encryption_configuration_setter(self): + from google.cloud.bigquery.table import EncryptionConfiguration + + kms_key_name = 'kms-key-name' + encryption_configuration = EncryptionConfiguration(kms_key_name) + config = self._get_target_class()() + config.destination_encryption_configuration = encryption_configuration + expected = { + 'kmsKeyName': kms_key_name, + } + self.assertEqual( + config._properties['load']['destinationEncryptionConfiguration'], + expected) + + def test_destination_encryption_configuration_setter_w_none(self): + kms_key_name = 'kms-key-name' + config = self._get_target_class()() + config._properties['load']['destinationEncryptionConfiguration'] = { + 'kmsKeyName': kms_key_name, + } config.destination_encryption_configuration = None - resource = config.to_api_repr() + self.assertIsNone(config.destination_encryption_configuration) + self.assertNotIn( + 'destinationEncryptionConfiguration', config._properties['load']) + + def test_destination_table_description_missing(self): + config = self._get_target_class()() + self.assertIsNone(config.destination_table_description) + + def test_destination_table_description_hit(self): + description = 'Description' + config = self._get_target_class()() + config._properties['load']['destinationTableProperties'] = { + 'description': description, + } self.assertEqual( - resource, - { - 'load': { - 'destinationEncryptionConfiguration': None, - }, - }) + config.destination_table_description, description) + + def test_destination_table_description_setter(self): + description = 'Description' + config = self._get_target_class()() + config.destination_table_description = description + expected = { + 'description': description, + } + self.assertEqual( + config._properties['load']['destinationTableProperties'], expected) + + def test_destination_table_description_setter_w_fn_already(self): + description = 'Description' + friendly_name = 'Friendly Name' + config = self._get_target_class()() + config._properties['load']['destinationTableProperties'] = { + 'friendlyName': friendly_name, + } + config.destination_table_description = description + expected = { + 'friendlyName': friendly_name, + 'description': description, + } + self.assertEqual( + config._properties['load']['destinationTableProperties'], expected) + + def test_destination_table_description_w_none(self): + description = 'Description' + friendly_name = 'Friendly Name' + config = self._get_target_class()() + config._properties['load']['destinationTableProperties'] = { + 'description': description, + 'friendlyName': friendly_name, + } + config.destination_table_description = None + expected = { + 'friendlyName': friendly_name, + } + self.assertEqual( + config._properties['load']['destinationTableProperties'], expected) + + def test_destination_table_friendly_name_missing(self): + config = self._get_target_class()() + self.assertIsNone(config.destination_table_friendly_name) + + def test_destination_table_friendly_name_hit(self): + friendly_name = 'Friendly Name' + config = self._get_target_class()() + config._properties['load']['destinationTableProperties'] = { + 'friendlyName': friendly_name, + } + self.assertEqual( + config.destination_table_friendly_name, friendly_name) + + def test_destination_table_friendly_name_setter(self): + friendly_name = 'Friendly Name' + config = self._get_target_class()() + config.destination_table_friendly_name = friendly_name + expected = { + 'friendlyName': friendly_name, + } + self.assertEqual( + config._properties['load']['destinationTableProperties'], expected) + + def test_destination_table_friendly_name_setter_w_descr_already(self): + friendly_name = 'Friendly Name' + description = 'Description' + config = self._get_target_class()() + config._properties['load']['destinationTableProperties'] = { + 'description': description, + } + config.destination_table_friendly_name = friendly_name + expected = { + 'friendlyName': friendly_name, + 'description': description, + } + self.assertEqual( + config._properties['load']['destinationTableProperties'], expected) + + def test_destination_table_friendly_name_w_none(self): + friendly_name = 'Friendly Name' + description = 'Description' + config = self._get_target_class()() + config._properties['load']['destinationTableProperties'] = { + 'description': description, + 'friendlyName': friendly_name, + } + config.destination_table_friendly_name = None + expected = { + 'description': description, + } + self.assertEqual( + config._properties['load']['destinationTableProperties'], expected) + + def test_encoding_missing(self): + config = self._get_target_class()() + self.assertIsNone(config.encoding) + + def test_encoding_hit(self): + from google.cloud.bigquery.job import Encoding + + encoding = Encoding.UTF_8 + config = self._get_target_class()() + config._properties['load']['encoding'] = encoding + self.assertEqual(config.encoding, encoding) + + def test_encoding_setter(self): + from google.cloud.bigquery.job import Encoding + + encoding = Encoding.UTF_8 + config = self._get_target_class()() + config.encoding = encoding + self.assertEqual( + config._properties['load']['encoding'], encoding) + + def test_field_delimiter_missing(self): + config = self._get_target_class()() + self.assertIsNone(config.field_delimiter) + + def test_field_delimiter_hit(self): + field_delimiter = '|' + config = self._get_target_class()() + config._properties['load']['fieldDelimiter'] = field_delimiter + self.assertEqual(config.field_delimiter, field_delimiter) + + def test_field_delimiter_setter(self): + field_delimiter = '|' + config = self._get_target_class()() + config.field_delimiter = field_delimiter + self.assertEqual( + config._properties['load']['fieldDelimiter'], field_delimiter) + + def test_ignore_unknown_values_missing(self): + config = self._get_target_class()() + self.assertIsNone(config.ignore_unknown_values) + + def test_ignore_unknown_values_hit(self): + config = self._get_target_class()() + config._properties['load']['ignoreUnknownValues'] = True + self.assertTrue(config.ignore_unknown_values) + + def test_ignore_unknown_values_setter(self): + config = self._get_target_class()() + config.ignore_unknown_values = True + self.assertTrue(config._properties['load']['ignoreUnknownValues']) + + def test_max_bad_records_missing(self): + config = self._get_target_class()() + self.assertIsNone(config.max_bad_records) + + def test_max_bad_records_hit(self): + max_bad_records = 13 + config = self._get_target_class()() + config._properties['load']['maxBadRecords'] = max_bad_records + self.assertEqual(config.max_bad_records, max_bad_records) + + def test_max_bad_records_setter(self): + max_bad_records = 13 + config = self._get_target_class()() + config.max_bad_records = max_bad_records + self.assertEqual( + config._properties['load']['maxBadRecords'], max_bad_records) + + def test_null_marker_missing(self): + config = self._get_target_class()() + self.assertIsNone(config.null_marker) + + def test_null_marker_hit(self): + null_marker = 'XXX' + config = self._get_target_class()() + config._properties['load']['nullMarker'] = null_marker + self.assertEqual(config.null_marker, null_marker) + + def test_null_marker_setter(self): + null_marker = 'XXX' + config = self._get_target_class()() + config.null_marker = null_marker + self.assertEqual( + config._properties['load']['nullMarker'], null_marker) + + def test_quote_character_missing(self): + config = self._get_target_class()() + self.assertIsNone(config.quote_character) + + def test_quote_character_hit(self): + quote_character = "'" + config = self._get_target_class()() + config._properties['load']['quote'] = quote_character + self.assertEqual(config.quote_character, quote_character) + + def test_quote_character_setter(self): + quote_character = "'" + config = self._get_target_class()() + config.quote_character = quote_character + self.assertEqual( + config._properties['load']['quote'], quote_character) + + def test_schema_missing(self): + config = self._get_target_class()() + self.assertIsNone(config.schema) + + def test_schema_hit(self): + from google.cloud.bigquery.schema import SchemaField + + config = self._get_target_class()() + all_props_repr = { + 'mode': 'REQUIRED', + 'name': 'foo', + 'type': 'INTEGER', + 'description': 'Foo', + } + minimal_repr = { + 'name': 'bar', + 'type': 'STRING', + } + config._properties['load']['schema'] = { + 'fields': [all_props_repr, minimal_repr], + } + all_props, minimal = config.schema + self.assertEqual(all_props, SchemaField.from_api_repr(all_props_repr)) + self.assertEqual(minimal, SchemaField.from_api_repr(minimal_repr)) + + def test_schema_setter(self): + from google.cloud.bigquery.schema import SchemaField + + config = self._get_target_class()() + full_name = SchemaField('full_name', 'STRING', mode='REQUIRED') + age = SchemaField('age', 'INTEGER', mode='REQUIRED') + config.schema = [full_name, age] + full_name_repr = { + 'name': 'full_name', + 'type': 'STRING', + 'mode': 'REQUIRED', + 'description': None, + } + age_repr = { + 'name': 'age', + 'type': 'INTEGER', + 'mode': 'REQUIRED', + 'description': None, + } + self.assertEqual( + config._properties['load']['schema'], + {'fields': [full_name_repr, age_repr]}) + + def test_schema_update_options_missing(self): + config = self._get_target_class()() + self.assertIsNone(config.schema_update_options) + + def test_schema_update_options_hit(self): + from google.cloud.bigquery.job import SchemaUpdateOption + + options = [ + SchemaUpdateOption.ALLOW_FIELD_ADDITION, + SchemaUpdateOption.ALLOW_FIELD_RELAXATION, + ] + config = self._get_target_class()() + config._properties['load']['schemaUpdateOptions'] = options + self.assertEqual(config.schema_update_options, options) + + def test_schema_update_options_setter(self): + from google.cloud.bigquery.job import SchemaUpdateOption + + options = [ + SchemaUpdateOption.ALLOW_FIELD_ADDITION, + SchemaUpdateOption.ALLOW_FIELD_RELAXATION, + ] + config = self._get_target_class()() + config.schema_update_options = options + self.assertEqual( + config._properties['load']['schemaUpdateOptions'], options) + + def test_skip_leading_rows_missing(self): + config = self._get_target_class()() + self.assertIsNone(config.skip_leading_rows) + + def test_skip_leading_rows_hit_w_str(self): + skip_leading_rows = 1 + config = self._get_target_class()() + config._properties['load']['skipLeadingRows'] = str(skip_leading_rows) + self.assertEqual(config.skip_leading_rows, skip_leading_rows) + + def test_skip_leading_rows_hit_w_integer(self): + skip_leading_rows = 1 + config = self._get_target_class()() + config._properties['load']['skipLeadingRows'] = skip_leading_rows + self.assertEqual(config.skip_leading_rows, skip_leading_rows) + + def test_skip_leading_rows_setter(self): + skip_leading_rows = 1 + config = self._get_target_class()() + config.skip_leading_rows = skip_leading_rows + self.assertEqual( + config._properties['load']['skipLeadingRows'], + str(skip_leading_rows)) + + def test_source_format_missing(self): + config = self._get_target_class()() + self.assertIsNone(config.source_format) + + def test_source_format_hit(self): + from google.cloud.bigquery.job import SourceFormat + + source_format = SourceFormat.CSV + config = self._get_target_class()() + config._properties['load']['sourceFormat'] = source_format + self.assertEqual(config.source_format, source_format) + + def test_source_format_setter(self): + from google.cloud.bigquery.job import SourceFormat + + source_format = SourceFormat.CSV + config = self._get_target_class()() + config.source_format = source_format + self.assertEqual( + config._properties['load']['sourceFormat'], source_format) + + def test_time_partitioning_miss(self): + config = self._get_target_class()() + self.assertIsNone(config.time_partitioning) + + def test_time_partitioning_hit(self): + from google.cloud.bigquery.table import TimePartitioning + from google.cloud.bigquery.table import TimePartitioningType + + field = 'creation_date' + year_ms = 86400 * 1000 * 365 + config = self._get_target_class()() + config._properties['load']['timePartitioning'] = { + 'type': TimePartitioningType.DAY, + 'field': field, + 'expirationMs': str(year_ms), + 'requirePartitionFilter': False, + } + expected = TimePartitioning( + type_=TimePartitioningType.DAY, + field=field, + expiration_ms=year_ms, + require_partition_filter=False, + ) + self.assertEqual(config.time_partitioning, expected) + + def test_time_partitioning_setter(self): + from google.cloud.bigquery.table import TimePartitioning + from google.cloud.bigquery.table import TimePartitioningType + + field = 'creation_date' + year_ms = 86400 * 1000 * 365 + time_partitioning = TimePartitioning( + type_=TimePartitioningType.DAY, + field=field, + expiration_ms=year_ms, + require_partition_filter=False, + ) + config = self._get_target_class()() + config.time_partitioning = time_partitioning + expected = { + 'type': TimePartitioningType.DAY, + 'field': field, + 'expirationMs': str(year_ms), + 'requirePartitionFilter': False, + } + self.assertEqual( + config._properties['load']['timePartitioning'], expected) + + def test_time_partitioning_setter_w_none(self): + from google.cloud.bigquery.table import TimePartitioningType + + field = 'creation_date' + year_ms = 86400 * 1000 * 365 + config = self._get_target_class()() + config._properties['load']['timePartitioning'] = { + 'type': TimePartitioningType.DAY, + 'field': field, + 'expirationMs': str(year_ms), + 'requirePartitionFilter': False, + } + config.time_partitioning = None + self.assertIsNone(config.time_partitioning) + self.assertNotIn('timePartitioning', config._properties['load']) + + def test_write_disposition_missing(self): + config = self._get_target_class()() + self.assertIsNone(config.write_disposition) + + def test_write_disposition_hit(self): + from google.cloud.bigquery.job import WriteDisposition + + write_disposition = WriteDisposition.WRITE_TRUNCATE + config = self._get_target_class()() + config._properties['load']['writeDisposition'] = write_disposition + self.assertEqual(config.write_disposition, write_disposition) + + def test_write_disposition_setter(self): + from google.cloud.bigquery.job import WriteDisposition + + write_disposition = WriteDisposition.WRITE_TRUNCATE + config = self._get_target_class()() + config.write_disposition = write_disposition + self.assertEqual( + config._properties['load']['writeDisposition'], write_disposition) class TestLoadJob(unittest.TestCase, _Base): diff --git a/packages/google-cloud-bigquery/tests/unit/test_table.py b/packages/google-cloud-bigquery/tests/unit/test_table.py index 1d57abbd47cd..3a187d20af4b 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_table.py +++ b/packages/google-cloud-bigquery/tests/unit/test_table.py @@ -84,6 +84,59 @@ def test_to_api_repr(self): 'kmsKeyName': self.KMS_KEY_NAME, }) + def test___eq___wrong_type(self): + encryption_config = self._make_one() + other = object() + self.assertNotEqual(encryption_config, other) + self.assertEqual(encryption_config, mock.ANY) + + def test___eq___kms_key_name_mismatch(self): + encryption_config = self._make_one() + other = self._make_one(self.KMS_KEY_NAME) + self.assertNotEqual(encryption_config, other) + + def test___eq___hit(self): + encryption_config = self._make_one(self.KMS_KEY_NAME) + other = self._make_one(self.KMS_KEY_NAME) + self.assertEqual(encryption_config, other) + + def test___ne___wrong_type(self): + encryption_config = self._make_one() + other = object() + self.assertNotEqual(encryption_config, other) + self.assertEqual(encryption_config, mock.ANY) + + def test___ne___same_value(self): + encryption_config1 = self._make_one(self.KMS_KEY_NAME) + encryption_config2 = self._make_one(self.KMS_KEY_NAME) + # unittest ``assertEqual`` uses ``==`` not ``!=``. + comparison_val = (encryption_config1 != encryption_config2) + self.assertFalse(comparison_val) + + def test___ne___different_values(self): + encryption_config1 = self._make_one() + encryption_config2 = self._make_one(self.KMS_KEY_NAME) + self.assertNotEqual(encryption_config1, encryption_config2) + + def test___hash__set_equality(self): + encryption_config1 = self._make_one(self.KMS_KEY_NAME) + encryption_config2 = self._make_one(self.KMS_KEY_NAME) + set_one = {encryption_config1, encryption_config2} + set_two = {encryption_config1, encryption_config2} + self.assertEqual(set_one, set_two) + + def test___hash__not_equals(self): + encryption_config1 = self._make_one() + encryption_config2 = self._make_one(self.KMS_KEY_NAME) + set_one = {encryption_config1} + set_two = {encryption_config2} + self.assertNotEqual(set_one, set_two) + + def test___repr__(self): + encryption_config = self._make_one(self.KMS_KEY_NAME) + expected = "EncryptionConfiguration({})".format(self.KMS_KEY_NAME) + self.assertEqual(repr(encryption_config), expected) + class TestTableReference(unittest.TestCase): @@ -1440,33 +1493,26 @@ def test_to_dataframe_error_if_pandas_is_none(self): class TestTimePartitioning(unittest.TestCase): - def test_constructor_defaults(self): + def _get_target_class(self): from google.cloud.bigquery.table import TimePartitioning - time_partitioning = TimePartitioning() + return TimePartitioning + + def _make_one(self, *args, **kw): + return self._get_target_class()(*args, **kw) + + def test_constructor_defaults(self): + time_partitioning = self._make_one() self.assertEqual(time_partitioning.type_, 'DAY') self.assertIsNone(time_partitioning.field) self.assertIsNone(time_partitioning.expiration_ms) self.assertIsNone(time_partitioning.require_partition_filter) - api_repr = time_partitioning.to_api_repr() - - exp_api_repr = {'type': 'DAY'} - self.assertEqual(api_repr, exp_api_repr) - - tp_from_api_repr = TimePartitioning.from_api_repr(api_repr) - - self.assertEqual(tp_from_api_repr.type_, 'DAY') - self.assertIsNone(tp_from_api_repr.field) - self.assertIsNone(tp_from_api_repr.expiration_ms) - self.assertIsNone(tp_from_api_repr.require_partition_filter) - - def test_constructor_properties(self): - from google.cloud.bigquery.table import TimePartitioning + def test_constructor_explicit(self): from google.cloud.bigquery.table import TimePartitioningType - time_partitioning = TimePartitioning( + time_partitioning = self._make_one( type_=TimePartitioningType.DAY, field='name', expiration_ms=10000, @@ -1478,19 +1524,143 @@ def test_constructor_properties(self): self.assertEqual(time_partitioning.expiration_ms, 10000) self.assertTrue(time_partitioning.require_partition_filter) - api_repr = time_partitioning.to_api_repr() + def test_from_api_repr_minimal(self): + from google.cloud.bigquery.table import TimePartitioningType + + klass = self._get_target_class() + api_repr = {'type': 'DAY'} + time_partitioning = klass.from_api_repr(api_repr) + + self.assertEqual(time_partitioning.type_, TimePartitioningType.DAY) + self.assertIsNone(time_partitioning.field) + self.assertIsNone(time_partitioning.expiration_ms) + self.assertIsNone(time_partitioning.require_partition_filter) - exp_api_repr = { + def test_from_api_repr_explicit(self): + from google.cloud.bigquery.table import TimePartitioningType + + klass = self._get_target_class() + api_repr = { 'type': 'DAY', 'field': 'name', 'expirationMs': '10000', 'requirePartitionFilter': True, } - self.assertEqual(api_repr, exp_api_repr) + time_partitioning = klass.from_api_repr(api_repr) - tp_from_api_repr = TimePartitioning.from_api_repr(api_repr) - - self.assertEqual(tp_from_api_repr.type_, 'DAY') - self.assertEqual(tp_from_api_repr.field, 'name') - self.assertEqual(tp_from_api_repr.expiration_ms, 10000) + self.assertEqual(time_partitioning.type_, TimePartitioningType.DAY) + self.assertEqual(time_partitioning.field, 'name') + self.assertEqual(time_partitioning.expiration_ms, 10000) self.assertTrue(time_partitioning.require_partition_filter) + + def test_to_api_repr_defaults(self): + time_partitioning = self._make_one() + expected = {'type': 'DAY'} + self.assertEqual(time_partitioning.to_api_repr(), expected) + + def test_to_api_repr_explicit(self): + from google.cloud.bigquery.table import TimePartitioningType + + time_partitioning = self._make_one( + type_=TimePartitioningType.DAY, + field='name', + expiration_ms=10000, + require_partition_filter=True + ) + + expected = { + 'type': 'DAY', + 'field': 'name', + 'expirationMs': '10000', + 'requirePartitionFilter': True, + } + self.assertEqual(time_partitioning.to_api_repr(), expected) + + def test___eq___wrong_type(self): + time_partitioning = self._make_one() + other = object() + self.assertNotEqual(time_partitioning, other) + self.assertEqual(time_partitioning, mock.ANY) + + def test___eq___type__mismatch(self): + time_partitioning = self._make_one() + other = self._make_one(type_='HOUR') + self.assertNotEqual(time_partitioning, other) + + def test___eq___field_mismatch(self): + time_partitioning = self._make_one(field='foo') + other = self._make_one(field='bar') + self.assertNotEqual(time_partitioning, other) + + def test___eq___expiration_ms_mismatch(self): + time_partitioning = self._make_one(field='foo', expiration_ms=100000) + other = self._make_one(field='foo', expiration_ms=200000) + self.assertNotEqual(time_partitioning, other) + + def test___eq___require_partition_filter_mismatch(self): + time_partitioning = self._make_one( + field='foo', expiration_ms=100000, require_partition_filter=True) + other = self._make_one( + field='foo', expiration_ms=100000, require_partition_filter=False) + self.assertNotEqual(time_partitioning, other) + + def test___eq___hit(self): + time_partitioning = self._make_one( + field='foo', expiration_ms=100000, require_partition_filter=True) + other = self._make_one( + field='foo', expiration_ms=100000, require_partition_filter=True) + self.assertEqual(time_partitioning, other) + + def test___ne___wrong_type(self): + time_partitioning = self._make_one() + other = object() + self.assertNotEqual(time_partitioning, other) + self.assertEqual(time_partitioning, mock.ANY) + + def test___ne___same_value(self): + time_partitioning1 = self._make_one() + time_partitioning2 = self._make_one() + # unittest ``assertEqual`` uses ``==`` not ``!=``. + comparison_val = (time_partitioning1 != time_partitioning2) + self.assertFalse(comparison_val) + + def test___ne___different_values(self): + time_partitioning1 = self._make_one() + time_partitioning2 = self._make_one(type_='HOUR') + self.assertNotEqual(time_partitioning1, time_partitioning2) + + def test___hash__set_equality(self): + time_partitioning1 = self._make_one(field='foo') + time_partitioning2 = self._make_one(field='foo') + set_one = {time_partitioning1, time_partitioning2} + set_two = {time_partitioning1, time_partitioning2} + self.assertEqual(set_one, set_two) + + def test___hash__not_equals(self): + time_partitioning1 = self._make_one(field='foo') + time_partitioning2 = self._make_one(field='bar') + set_one = {time_partitioning1} + set_two = {time_partitioning2} + self.assertNotEqual(set_one, set_two) + + def test___repr___minimal(self): + time_partitioning = self._make_one() + expected = "TimePartitioning(type=DAY)" + self.assertEqual(repr(time_partitioning), expected) + + def test___repr___explicit(self): + from google.cloud.bigquery.table import TimePartitioningType + + time_partitioning = self._make_one( + type_=TimePartitioningType.DAY, + field='name', + expiration_ms=10000, + require_partition_filter=True + ) + expected = ( + "TimePartitioning(" + "expirationMs=10000," + "field=name," + "requirePartitionFilter=True," + "type=DAY)") + self.assertEqual(repr(time_partitioning), expected) From 926561a0022d4fa3a5b2c73a619cb28b866f4744 Mon Sep 17 00:00:00 2001 From: Henry Fuller Date: Fri, 19 Oct 2018 11:33:21 -0400 Subject: [PATCH 0509/2016] BigQuery: use _get_sub_prop helper so missing load stats don't raise. (#6269) --- .../google/cloud/bigquery/job.py | 24 +++++++++---------- .../tests/unit/test_job.py | 14 +++++++---- 2 files changed, 21 insertions(+), 17 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/job.py b/packages/google-cloud-bigquery/google/cloud/bigquery/job.py index 2068fca35c45..053de18fb827 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/job.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/job.py @@ -1374,9 +1374,9 @@ def input_file_bytes(self): :returns: the count (None until set from the server). :raises: ValueError for invalid value types. """ - statistics = self._properties.get('statistics') - if statistics is not None: - return int(statistics['load']['inputFileBytes']) + return _helpers._int_or_none(_helpers._get_sub_prop( + self._properties, ['statistics', 'load', 'inputFileBytes'] + )) @property def input_files(self): @@ -1385,9 +1385,9 @@ def input_files(self): :rtype: int, or ``NoneType`` :returns: the count (None until set from the server). """ - statistics = self._properties.get('statistics') - if statistics is not None: - return int(statistics['load']['inputFiles']) + return _helpers._int_or_none(_helpers._get_sub_prop( + self._properties, ['statistics', 'load', 'inputFiles'] + )) @property def output_bytes(self): @@ -1396,9 +1396,9 @@ def output_bytes(self): :rtype: int, or ``NoneType`` :returns: the count (None until set from the server). """ - statistics = self._properties.get('statistics') - if statistics is not None: - return int(statistics['load']['outputBytes']) + return _helpers._int_or_none(_helpers._get_sub_prop( + self._properties, ['statistics', 'load', 'outputBytes'] + )) @property def output_rows(self): @@ -1407,9 +1407,9 @@ def output_rows(self): :rtype: int, or ``NoneType`` :returns: the count (None until set from the server). """ - statistics = self._properties.get('statistics') - if statistics is not None: - return int(statistics['load']['outputRows']) + return _helpers._int_or_none(_helpers._get_sub_prop( + self._properties, ['statistics', 'load', 'outputRows'] + )) def to_api_repr(self): """Generate a resource for :meth:`_begin`.""" diff --git a/packages/google-cloud-bigquery/tests/unit/test_job.py b/packages/google-cloud-bigquery/tests/unit/test_job.py index 9b94dcf591ef..88deb068f871 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_job.py +++ b/packages/google-cloud-bigquery/tests/unit/test_job.py @@ -1985,11 +1985,6 @@ def test_props_set_by_server(self): statistics['creationTime'] = _millis(CREATED) statistics['startTime'] = _millis(STARTED) statistics['endTime'] = _millis(ENDED) - load_stats = statistics['load'] = {} - load_stats['inputFileBytes'] = 12345 - load_stats['inputFiles'] = 1 - load_stats['outputBytes'] = 23456 - load_stats['outputRows'] = 345 self.assertEqual(job.etag, 'ETAG') self.assertEqual(job.self_link, URL) @@ -1999,6 +1994,15 @@ def test_props_set_by_server(self): self.assertEqual(job.started, STARTED) self.assertEqual(job.ended, ENDED) + # running jobs have no load stats not yet set. + self.assertIsNone(job.output_bytes) + + load_stats = statistics['load'] = {} + load_stats['inputFileBytes'] = 12345 + load_stats['inputFiles'] = 1 + load_stats['outputBytes'] = 23456 + load_stats['outputRows'] = 345 + self.assertEqual(job.input_file_bytes, 12345) self.assertEqual(job.input_files, 1) self.assertEqual(job.output_bytes, 23456) From c35c66a4664a36e0b402c108725e3e57fca110ac Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Fri, 19 Oct 2018 10:01:58 -0700 Subject: [PATCH 0510/2016] Move usage guides to their own docs. (#6238) * Move usage guides to their own docs. The API reference links were buried behind pages of code samples. Moving the how-to guides to their own pages makes the documentation easier to navigate. * Reorganize howto guides into sections. Add links to relevant methods. * Move 'more examples' to bottom of the examples section. Add section headers to Dataset how-to guides. * Move howto to usage directory --- packages/google-cloud-bigquery/README.rst | 45 +- packages/google-cloud-bigquery/docs/index.rst | 407 +----------------- .../google-cloud-bigquery/docs/usage.html | 4 +- .../docs/usage/client.rst | 25 ++ .../docs/usage/datasets.rst | 83 ++++ .../docs/usage/encryption.rst | 52 +++ .../docs/usage/index.rst | 35 ++ .../google-cloud-bigquery/docs/usage/jobs.rst | 18 + .../docs/usage/pandas.rst | 62 +++ .../docs/usage/queries.rst | 49 +++ .../docs/usage/tables.rst | 147 +++++++ 11 files changed, 481 insertions(+), 446 deletions(-) create mode 100644 packages/google-cloud-bigquery/docs/usage/client.rst create mode 100644 packages/google-cloud-bigquery/docs/usage/datasets.rst create mode 100644 packages/google-cloud-bigquery/docs/usage/encryption.rst create mode 100644 packages/google-cloud-bigquery/docs/usage/index.rst create mode 100644 packages/google-cloud-bigquery/docs/usage/jobs.rst create mode 100644 packages/google-cloud-bigquery/docs/usage/pandas.rst create mode 100644 packages/google-cloud-bigquery/docs/usage/queries.rst create mode 100644 packages/google-cloud-bigquery/docs/usage/tables.rst diff --git a/packages/google-cloud-bigquery/README.rst b/packages/google-cloud-bigquery/README.rst index 0e0110136eb4..0ea938e306fe 100644 --- a/packages/google-cloud-bigquery/README.rst +++ b/packages/google-cloud-bigquery/README.rst @@ -74,56 +74,15 @@ Windows Example Usage ------------- -Create a dataset -~~~~~~~~~~~~~~~~ +Perform a query +~~~~~~~~~~~~~~~ .. code:: python from google.cloud import bigquery - from google.cloud.bigquery import Dataset client = bigquery.Client() - dataset_ref = client.dataset('dataset_name') - dataset = Dataset(dataset_ref) - dataset.description = 'my dataset' - dataset = client.create_dataset(dataset) # API request - -Load data from CSV -~~~~~~~~~~~~~~~~~~ - -.. code:: python - - import csv - - from google.cloud import bigquery - from google.cloud.bigquery import LoadJobConfig - from google.cloud.bigquery import SchemaField - - client = bigquery.Client() - - SCHEMA = [ - SchemaField('full_name', 'STRING', mode='required'), - SchemaField('age', 'INTEGER', mode='required'), - ] - table_ref = client.dataset('dataset_name').table('table_name') - - load_config = LoadJobConfig() - load_config.skip_leading_rows = 1 - load_config.schema = SCHEMA - - # Contents of csv_file.csv: - # Name,Age - # Tim,99 - with open('csv_file.csv', 'rb') as readable: - client.load_table_from_file( - readable, table_ref, job_config=load_config) # API request - -Perform a query -~~~~~~~~~~~~~~~ - -.. code:: python - # Perform a query. QUERY = ( 'SELECT name FROM `bigquery-public-data.usa_names.usa_1910_2013` ' diff --git a/packages/google-cloud-bigquery/docs/index.rst b/packages/google-cloud-bigquery/docs/index.rst index d24621c246f7..99977545436d 100644 --- a/packages/google-cloud-bigquery/docs/index.rst +++ b/packages/google-cloud-bigquery/docs/index.rst @@ -1,408 +1,13 @@ .. include:: /../bigquery/README.rst -Using the Library ------------------ +More Examples +~~~~~~~~~~~~~ -Projects -~~~~~~~~ - -A project is the top-level container in the ``BigQuery`` API: it is tied -closely to billing, and can provide default access control across all its -datasets. If no ``project`` is passed to the client container, the library -attempts to infer a project using the environment (including explicit -environment variables, GAE, and GCE). - -To override the project inferred from the environment, pass an explicit -``project`` to the constructor, or to either of the alternative -``classmethod`` factories: - -.. code-block:: python - - from google.cloud import bigquery - client = bigquery.Client(project='PROJECT_ID') - - -Project ACLs -^^^^^^^^^^^^ - -Each project has an access control list granting reader / writer / owner -permission to one or more entities. This list cannot be queried or set -via the API; it must be managed using the Google Developer Console. - - -Datasets -~~~~~~~~ - -A dataset represents a collection of tables, and applies several default -policies to tables as they are created: - -- An access control list (ACL). When created, a dataset has an ACL - which maps to the ACL inherited from its project. - -- A default table expiration period. If set, tables created within the - dataset will have the value as their expiration period. - -See BigQuery documentation for more information on -`Datasets `_. - - -Dataset operations -^^^^^^^^^^^^^^^^^^ - -List datasets for the client's project: - -.. literalinclude:: snippets.py - :language: python - :dedent: 4 - :start-after: [START bigquery_list_datasets] - :end-before: [END bigquery_list_datasets] - -Create a new dataset for the client's project: - -.. literalinclude:: snippets.py - :language: python - :dedent: 4 - :start-after: [START bigquery_create_dataset] - :end-before: [END bigquery_create_dataset] - -Refresh metadata for a dataset (to pick up changes made by another client): - -.. literalinclude:: snippets.py - :language: python - :dedent: 4 - :start-after: [START bigquery_get_dataset] - :end-before: [END bigquery_get_dataset] - -Update a property in a dataset's metadata: - -.. literalinclude:: snippets.py - :language: python - :dedent: 4 - :start-after: [START bigquery_update_dataset_description] - :end-before: [END bigquery_update_dataset_description] - -Modify user permissions on a dataset: - -.. literalinclude:: snippets.py - :language: python - :dedent: 4 - :start-after: [START bigquery_update_dataset_access] - :end-before: [END bigquery_update_dataset_access] - -Delete a dataset: - -.. literalinclude:: snippets.py - :language: python - :dedent: 4 - :start-after: [START bigquery_delete_dataset] - :end-before: [END bigquery_delete_dataset] - - -Tables -~~~~~~ - -Tables exist within datasets. See BigQuery documentation for more information -on `Tables `_. - -Table operations -^^^^^^^^^^^^^^^^ -List tables for the dataset: - -.. literalinclude:: snippets.py - :language: python - :dedent: 4 - :start-after: [START bigquery_list_tables] - :end-before: [END bigquery_list_tables] - -Create a table: - -.. literalinclude:: snippets.py - :language: python - :dedent: 4 - :start-after: [START bigquery_create_table] - :end-before: [END bigquery_create_table] - -Get a table: - -.. literalinclude:: snippets.py - :language: python - :dedent: 4 - :start-after: [START bigquery_get_table] - :end-before: [END bigquery_get_table] - -Update a property in a table's metadata: - -.. literalinclude:: snippets.py - :language: python - :dedent: 4 - :start-after: [START bigquery_update_table_description] - :end-before: [END bigquery_update_table_description] - -Browse selected rows in a table: - -.. literalinclude:: snippets.py - :language: python - :dedent: 4 - :start-after: [START bigquery_browse_table] - :end-before: [END bigquery_browse_table] - -Insert rows into a table's data: - -.. literalinclude:: snippets.py - :language: python - :dedent: 4 - :start-after: [START bigquery_table_insert_rows] - :end-before: [END bigquery_table_insert_rows] - -Copy a table: - -.. literalinclude:: snippets.py - :language: python - :dedent: 4 - :start-after: [START bigquery_copy_table] - :end-before: [END bigquery_copy_table] - -Extract a table to Google Cloud Storage: - -.. literalinclude:: snippets.py - :language: python - :dedent: 4 - :start-after: [START bigquery_extract_table] - :end-before: [END bigquery_extract_table] - -Delete a table: - -.. literalinclude:: snippets.py - :language: python - :dedent: 4 - :start-after: [START bigquery_delete_table] - :end-before: [END bigquery_delete_table] - -Upload table data from a file: - -.. literalinclude:: snippets.py - :language: python - :dedent: 4 - :start-after: [START bigquery_load_from_file] - :end-before: [END bigquery_load_from_file] - -Load table data from Google Cloud Storage -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -See also: `Loading JSON data from Cloud Storage -`_. - -Load a CSV file from Cloud Storage: - -.. literalinclude:: snippets.py - :language: python - :dedent: 4 - :start-after: [START bigquery_load_table_gcs_csv] - :end-before: [END bigquery_load_table_gcs_csv] - -Load a JSON file from Cloud Storage: - -.. literalinclude:: snippets.py - :language: python - :dedent: 4 - :start-after: [START bigquery_load_table_gcs_json] - :end-before: [END bigquery_load_table_gcs_json] - -Load a Parquet file from Cloud Storage: - -.. literalinclude:: snippets.py - :language: python - :dedent: 4 - :start-after: [START bigquery_load_table_gcs_parquet] - :end-before: [END bigquery_load_table_gcs_parquet] - -Customer Managed Encryption Keys -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -Table data is always encrypted at rest, but BigQuery also provides a way for -you to control what keys it uses to encrypt they data. See `Protecting data -with Cloud KMS keys -`_ -in the BigQuery documentation for more details. - -Create a new table, using a customer-managed encryption key from -Cloud KMS to encrypt it. - -.. literalinclude:: snippets.py - :language: python - :dedent: 4 - :start-after: [START bigquery_create_table_cmek] - :end-before: [END bigquery_create_table_cmek] - -Change the key used to encrypt a table. - -.. literalinclude:: snippets.py - :language: python - :dedent: 4 - :start-after: [START bigquery_update_table_cmek] - :end-before: [END bigquery_update_table_cmek] - -Load a file from Cloud Storage, using a customer-managed encryption key from -Cloud KMS for the destination table. - -.. literalinclude:: snippets.py - :language: python - :dedent: 4 - :start-after: [START bigquery_load_table_gcs_json_cmek] - :end-before: [END bigquery_load_table_gcs_json_cmek] - -Copy a table, using a customer-managed encryption key from Cloud KMS for the -destination table. - -.. literalinclude:: snippets.py - :language: python - :dedent: 4 - :start-after: [START bigquery_copy_table_cmek] - :end-before: [END bigquery_copy_table_cmek] - -Write query results to a table, using a customer-managed encryption key from -Cloud KMS for the destination table. - -.. literalinclude:: snippets.py - :language: python - :dedent: 4 - :start-after: [START bigquery_query_destination_table_cmek] - :end-before: [END bigquery_query_destination_table_cmek] - -Queries -~~~~~~~ - - -Querying data -^^^^^^^^^^^^^ - -Run a query and wait for it to finish: - -.. literalinclude:: snippets.py - :language: python - :dedent: 4 - :start-after: [START bigquery_query] - :end-before: [END bigquery_query] - - -Run a dry run query -^^^^^^^^^^^^^^^^^^^ - -.. literalinclude:: snippets.py - :language: python - :dedent: 4 - :start-after: [START bigquery_query_dry_run] - :end-before: [END bigquery_query_dry_run] - - -Writing query results to a destination table -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -See BigQuery documentation for more information on -`writing query results `_. - -.. literalinclude:: snippets.py - :language: python - :dedent: 4 - :start-after: [START bigquery_query_destination_table] - :end-before: [END bigquery_query_destination_table] - - -Run a query using a named query parameter -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -See BigQuery documentation for more information on -`parameterized queries `_. - -.. literalinclude:: snippets.py - :language: python - :dedent: 4 - :start-after: [START bigquery_query_params_named] - :end-before: [END bigquery_query_params_named] - - -Jobs -~~~~ - -List jobs for a project -^^^^^^^^^^^^^^^^^^^^^^^ - -Jobs describe actions performed on data in BigQuery tables: - -- Load data into a table -- Run a query against data in one or more tables -- Extract data from a table -- Copy a table - -.. literalinclude:: snippets.py - :language: python - :dedent: 4 - :start-after: [START bigquery_list_jobs] - :end-before: [END bigquery_list_jobs] - - -Using BigQuery with Pandas -~~~~~~~~~~~~~~~~~~~~~~~~~~ - -Retrieve BigQuery data as a Pandas DataFrame -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -As of version 0.29.0, you can use the -:func:`~google.cloud.bigquery.table.RowIterator.to_dataframe` function to -retrieve query results or table rows as a :class:`pandas.DataFrame`. - -First, ensure that the :mod:`pandas` library is installed by running: - -.. code-block:: bash - - pip install --upgrade pandas - -Alternatively, you can install the BigQuery python client library with -:mod:`pandas` by running: - -.. code-block:: bash - - pip install --upgrade google-cloud-bigquery[pandas] - -To retrieve query results as a :class:`pandas.DataFrame`: - -.. literalinclude:: snippets.py - :language: python - :dedent: 4 - :start-after: [START bigquery_query_results_dataframe] - :end-before: [END bigquery_query_results_dataframe] - -To retrieve table rows as a :class:`pandas.DataFrame`: - -.. literalinclude:: snippets.py - :language: python - :dedent: 4 - :start-after: [START bigquery_list_rows_dataframe] - :end-before: [END bigquery_list_rows_dataframe] - -Load a Pandas DataFrame to a BigQuery Table -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -As of version 1.3.0, you can use the -:func:`~google.cloud.bigquery.client.Client.load_table_from_dataframe` function -to load data from a :class:`pandas.DataFrame` to a -:class:`~google.cloud.bigquery.table.Table`. To use this function, in addition -to :mod:`pandas`, you will need to install the :mod:`pyarrow` library. You can -install the BigQuery python client library with :mod:`pandas` and -:mod:`pyarrow` by running: - -.. code-block:: bash - - pip install --upgrade google-cloud-bigquery[pandas,pyarrow] - -The following example demonstrates how to create a :class:`pandas.DataFrame` -and load it into a new table: +.. toctree:: + :maxdepth: 2 -.. literalinclude:: snippets.py - :language: python - :dedent: 4 - :start-after: [START bigquery_load_table_dataframe] - :end-before: [END bigquery_load_table_dataframe] + usage/index + Official Google BigQuery How-to Guides API Reference ------------- diff --git a/packages/google-cloud-bigquery/docs/usage.html b/packages/google-cloud-bigquery/docs/usage.html index 9b81d6976cda..78dc14b9ca03 100644 --- a/packages/google-cloud-bigquery/docs/usage.html +++ b/packages/google-cloud-bigquery/docs/usage.html @@ -1,8 +1,8 @@ - + diff --git a/packages/google-cloud-bigquery/docs/usage/client.rst b/packages/google-cloud-bigquery/docs/usage/client.rst new file mode 100644 index 000000000000..d631585ea2fe --- /dev/null +++ b/packages/google-cloud-bigquery/docs/usage/client.rst @@ -0,0 +1,25 @@ +Creating a Client +~~~~~~~~~~~~~~~~~ + +A project is the top-level container in the ``BigQuery`` API: it is tied +closely to billing, and can provide default access control across all its +datasets. If no ``project`` is passed to the client container, the library +attempts to infer a project using the environment (including explicit +environment variables, GAE, and GCE). + +To override the project inferred from the environment, pass an explicit +``project`` to the :class:`~google.cloud.bigquery.client.Client` constructor, +or to either of the alternative ``classmethod`` factories: + +.. code-block:: python + + from google.cloud import bigquery + client = bigquery.Client(project='PROJECT_ID') + + +Project ACLs +^^^^^^^^^^^^ + +Each project has an access control list granting reader / writer / owner +permission to one or more entities. This list cannot be queried or set +via the API; it must be managed using the Google Developer Console. diff --git a/packages/google-cloud-bigquery/docs/usage/datasets.rst b/packages/google-cloud-bigquery/docs/usage/datasets.rst new file mode 100644 index 000000000000..09ae90767cdc --- /dev/null +++ b/packages/google-cloud-bigquery/docs/usage/datasets.rst @@ -0,0 +1,83 @@ +Managing Datasets +~~~~~~~~~~~~~~~~~ + +A dataset represents a collection of tables, and applies several default +policies to tables as they are created: + +- An access control list (ACL). When created, a dataset has an ACL + which maps to the ACL inherited from its project. + +- A default table expiration period. If set, tables created within the + dataset will have the value as their expiration period. + +See BigQuery documentation for more information on +`Datasets `_. + +Listing Datasets +^^^^^^^^^^^^^^^^ + +List datasets for a project with the +:func:`~google.cloud.bigquery.client.Client.list_datasets` method: + +.. literalinclude:: ../snippets.py + :language: python + :dedent: 4 + :start-after: [START bigquery_list_datasets] + :end-before: [END bigquery_list_datasets] + +Getting a Dataset +^^^^^^^^^^^^^^^^^ + +Get a dataset resource (to pick up changes made by another client) with the +:func:`~google.cloud.bigquery.client.Client.get_dataset` method: + +.. literalinclude:: ../snippets.py + :language: python + :dedent: 4 + :start-after: [START bigquery_get_dataset] + :end-before: [END bigquery_get_dataset] + +Creating a Dataset +^^^^^^^^^^^^^^^^^^ + +Create a new dataset with the +:func:`~google.cloud.bigquery.client.Client.create_dataset` method: + +.. literalinclude:: ../snippets.py + :language: python + :dedent: 4 + :start-after: [START bigquery_create_dataset] + :end-before: [END bigquery_create_dataset] + +Updating a Dataset +^^^^^^^^^^^^^^^^^^ + +Update a property in a dataset's metadata with the +:func:`~google.cloud.bigquery.client.Client.update_dataset` method: + +.. literalinclude:: ../snippets.py + :language: python + :dedent: 4 + :start-after: [START bigquery_update_dataset_description] + :end-before: [END bigquery_update_dataset_description] + +Modify user permissions on a dataset with the +:func:`~google.cloud.bigquery.client.Client.update_dataset` method: + +.. literalinclude:: ../snippets.py + :language: python + :dedent: 4 + :start-after: [START bigquery_update_dataset_access] + :end-before: [END bigquery_update_dataset_access] + +Deleting a Dataset +^^^^^^^^^^^^^^^^^^ + +Delete a dataset with the +:func:`~google.cloud.bigquery.client.Client.delete_dataset` method: + +.. literalinclude:: ../snippets.py + :language: python + :dedent: 4 + :start-after: [START bigquery_delete_dataset] + :end-before: [END bigquery_delete_dataset] diff --git a/packages/google-cloud-bigquery/docs/usage/encryption.rst b/packages/google-cloud-bigquery/docs/usage/encryption.rst new file mode 100644 index 000000000000..88d23067995e --- /dev/null +++ b/packages/google-cloud-bigquery/docs/usage/encryption.rst @@ -0,0 +1,52 @@ +Using Customer Managed Encryption Keys +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Table data is always encrypted at rest, but BigQuery also provides a way for +you to control what keys it uses to encrypt they data. See `Protecting data +with Cloud KMS keys +`_ +in the BigQuery documentation for more details. + +Create a new table, using a customer-managed encryption key from +Cloud KMS to encrypt it. + +.. literalinclude:: ../snippets.py + :language: python + :dedent: 4 + :start-after: [START bigquery_create_table_cmek] + :end-before: [END bigquery_create_table_cmek] + +Change the key used to encrypt a table. + +.. literalinclude:: ../snippets.py + :language: python + :dedent: 4 + :start-after: [START bigquery_update_table_cmek] + :end-before: [END bigquery_update_table_cmek] + +Load a file from Cloud Storage, using a customer-managed encryption key from +Cloud KMS for the destination table. + +.. literalinclude:: ../snippets.py + :language: python + :dedent: 4 + :start-after: [START bigquery_load_table_gcs_json_cmek] + :end-before: [END bigquery_load_table_gcs_json_cmek] + +Copy a table, using a customer-managed encryption key from Cloud KMS for the +destination table. + +.. literalinclude:: ../snippets.py + :language: python + :dedent: 4 + :start-after: [START bigquery_copy_table_cmek] + :end-before: [END bigquery_copy_table_cmek] + +Write query results to a table, using a customer-managed encryption key from +Cloud KMS for the destination table. + +.. literalinclude:: ../snippets.py + :language: python + :dedent: 4 + :start-after: [START bigquery_query_destination_table_cmek] + :end-before: [END bigquery_query_destination_table_cmek] diff --git a/packages/google-cloud-bigquery/docs/usage/index.rst b/packages/google-cloud-bigquery/docs/usage/index.rst new file mode 100644 index 000000000000..ff4c9d7f1a8f --- /dev/null +++ b/packages/google-cloud-bigquery/docs/usage/index.rst @@ -0,0 +1,35 @@ +Usage Guides +~~~~~~~~~~~~ + +BigQuery Basics +^^^^^^^^^^^^^^^ + +.. toctree:: + :maxdepth: 1 + + client + queries + +Working with BigQuery Resources +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +.. toctree:: + :maxdepth: 1 + + datasets + tables + encryption + jobs + +Integrations with Other Libraries +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +.. toctree:: + :maxdepth: 1 + + pandas + +See also, the :mod:`google.cloud.bigquery.magics` module for integrations +with Jupyter. + + diff --git a/packages/google-cloud-bigquery/docs/usage/jobs.rst b/packages/google-cloud-bigquery/docs/usage/jobs.rst new file mode 100644 index 000000000000..914d1d459ee7 --- /dev/null +++ b/packages/google-cloud-bigquery/docs/usage/jobs.rst @@ -0,0 +1,18 @@ +Managing Jobs +~~~~~~~~~~~~~ + +List jobs for a project +^^^^^^^^^^^^^^^^^^^^^^^ + +Jobs describe actions performed on data in BigQuery tables: + +- Load data into a table +- Run a query against data in one or more tables +- Extract data from a table +- Copy a table + +.. literalinclude:: ../snippets.py + :language: python + :dedent: 4 + :start-after: [START bigquery_list_jobs] + :end-before: [END bigquery_list_jobs] diff --git a/packages/google-cloud-bigquery/docs/usage/pandas.rst b/packages/google-cloud-bigquery/docs/usage/pandas.rst new file mode 100644 index 000000000000..9504bd19673a --- /dev/null +++ b/packages/google-cloud-bigquery/docs/usage/pandas.rst @@ -0,0 +1,62 @@ +Using BigQuery with Pandas +~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Retrieve BigQuery data as a Pandas DataFrame +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +As of version 0.29.0, you can use the +:func:`~google.cloud.bigquery.table.RowIterator.to_dataframe` function to +retrieve query results or table rows as a :class:`pandas.DataFrame`. + +First, ensure that the :mod:`pandas` library is installed by running: + +.. code-block:: bash + + pip install --upgrade pandas + +Alternatively, you can install the BigQuery python client library with +:mod:`pandas` by running: + +.. code-block:: bash + + pip install --upgrade google-cloud-bigquery[pandas] + +To retrieve query results as a :class:`pandas.DataFrame`: + +.. literalinclude:: ../snippets.py + :language: python + :dedent: 4 + :start-after: [START bigquery_query_results_dataframe] + :end-before: [END bigquery_query_results_dataframe] + +To retrieve table rows as a :class:`pandas.DataFrame`: + +.. literalinclude:: ../snippets.py + :language: python + :dedent: 4 + :start-after: [START bigquery_list_rows_dataframe] + :end-before: [END bigquery_list_rows_dataframe] + +Load a Pandas DataFrame to a BigQuery Table +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +As of version 1.3.0, you can use the +:func:`~google.cloud.bigquery.client.Client.load_table_from_dataframe` function +to load data from a :class:`pandas.DataFrame` to a +:class:`~google.cloud.bigquery.table.Table`. To use this function, in addition +to :mod:`pandas`, you will need to install the :mod:`pyarrow` library. You can +install the BigQuery python client library with :mod:`pandas` and +:mod:`pyarrow` by running: + +.. code-block:: bash + + pip install --upgrade google-cloud-bigquery[pandas,pyarrow] + +The following example demonstrates how to create a :class:`pandas.DataFrame` +and load it into a new table: + +.. literalinclude:: ../snippets.py + :language: python + :dedent: 4 + :start-after: [START bigquery_load_table_dataframe] + :end-before: [END bigquery_load_table_dataframe] diff --git a/packages/google-cloud-bigquery/docs/usage/queries.rst b/packages/google-cloud-bigquery/docs/usage/queries.rst new file mode 100644 index 000000000000..fc77bb5b80cd --- /dev/null +++ b/packages/google-cloud-bigquery/docs/usage/queries.rst @@ -0,0 +1,49 @@ +Running Queries +~~~~~~~~~~~~~~~ + +Querying data +^^^^^^^^^^^^^ + +Run a query and wait for it to finish: + +.. literalinclude:: ../snippets.py + :language: python + :dedent: 4 + :start-after: [START bigquery_query] + :end-before: [END bigquery_query] + + +Run a dry run query +^^^^^^^^^^^^^^^^^^^ + +.. literalinclude:: ../snippets.py + :language: python + :dedent: 4 + :start-after: [START bigquery_query_dry_run] + :end-before: [END bigquery_query_dry_run] + + +Writing query results to a destination table +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +See BigQuery documentation for more information on +`writing query results `_. + +.. literalinclude:: ../snippets.py + :language: python + :dedent: 4 + :start-after: [START bigquery_query_destination_table] + :end-before: [END bigquery_query_destination_table] + + +Run a query using a named query parameter +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +See BigQuery documentation for more information on +`parameterized queries `_. + +.. literalinclude:: ../snippets.py + :language: python + :dedent: 4 + :start-after: [START bigquery_query_params_named] + :end-before: [END bigquery_query_params_named] diff --git a/packages/google-cloud-bigquery/docs/usage/tables.rst b/packages/google-cloud-bigquery/docs/usage/tables.rst new file mode 100644 index 000000000000..555366fd2a4b --- /dev/null +++ b/packages/google-cloud-bigquery/docs/usage/tables.rst @@ -0,0 +1,147 @@ +Managing Tables +~~~~~~~~~~~~~~~ + +Tables exist within datasets. See BigQuery documentation for more information +on `Tables `_. + +Listing Tables +^^^^^^^^^^^^^^ + +List the tables belonging to a dataset with the +:func:`~google.cloud.bigquery.client.Client.list_tables` method: + +.. literalinclude:: ../snippets.py + :language: python + :dedent: 4 + :start-after: [START bigquery_list_tables] + :end-before: [END bigquery_list_tables] + +Getting a Table +^^^^^^^^^^^^^^^ + +Get a table resource with the +:func:`~google.cloud.bigquery.client.Client.get_table` method: + +.. literalinclude:: ../snippets.py + :language: python + :dedent: 4 + :start-after: [START bigquery_get_table] + :end-before: [END bigquery_get_table] + +Browse data rows in a table with the +:func:`~google.cloud.bigquery.client.Client.list_rows` method: + +.. literalinclude:: ../snippets.py + :language: python + :dedent: 4 + :start-after: [START bigquery_browse_table] + :end-before: [END bigquery_browse_table] + +Creating a Table +^^^^^^^^^^^^^^^^ + +Create an empty table with the +:func:`~google.cloud.bigquery.client.Client.create_table` method: + +.. literalinclude:: ../snippets.py + :language: python + :dedent: 4 + :start-after: [START bigquery_create_table] + :end-before: [END bigquery_create_table] + +Load table data from a file with the +:func:`~google.cloud.bigquery.client.Client.load_table_from_file` method: + +.. literalinclude:: ../snippets.py + :language: python + :dedent: 4 + :start-after: [START bigquery_load_from_file] + :end-before: [END bigquery_load_from_file] + +Load a CSV file from Cloud Storage with the +:func:`~google.cloud.bigquery.client.Client.load_table_from_uri` method: + +.. literalinclude:: ../snippets.py + :language: python + :dedent: 4 + :start-after: [START bigquery_load_table_gcs_csv] + :end-before: [END bigquery_load_table_gcs_csv] + +See also: `Loading CSV data from Cloud Storage +`_. + +Load a JSON file from Cloud Storage: + +.. literalinclude:: ../snippets.py + :language: python + :dedent: 4 + :start-after: [START bigquery_load_table_gcs_json] + :end-before: [END bigquery_load_table_gcs_json] + +See also: `Loading JSON data from Cloud Storage +`_. + +Load a Parquet file from Cloud Storage: + +.. literalinclude:: ../snippets.py + :language: python + :dedent: 4 + :start-after: [START bigquery_load_table_gcs_parquet] + :end-before: [END bigquery_load_table_gcs_parquet] + +See also: `Loading Parquet data from Cloud Storage +`_. + +Updating a Table +^^^^^^^^^^^^^^^^ + +Update a property in a table's metadata with the +:func:`~google.cloud.bigquery.client.Client.update_table` method: + +.. literalinclude:: ../snippets.py + :language: python + :dedent: 4 + :start-after: [START bigquery_update_table_description] + :end-before: [END bigquery_update_table_description] + +Insert rows into a table's data with the +:func:`~google.cloud.bigquery.client.Client.insert_rows` method: + +.. literalinclude:: ../snippets.py + :language: python + :dedent: 4 + :start-after: [START bigquery_table_insert_rows] + :end-before: [END bigquery_table_insert_rows] + +Copying a Table +^^^^^^^^^^^^^^^ + +Copy a table with the +:func:`~google.cloud.bigquery.client.Client.copy_table` method: + +.. literalinclude:: ../snippets.py + :language: python + :dedent: 4 + :start-after: [START bigquery_copy_table] + :end-before: [END bigquery_copy_table] + +Copy table data to Google Cloud Storage with the +:func:`~google.cloud.bigquery.client.Client.extract_table` method: + +.. literalinclude:: ../snippets.py + :language: python + :dedent: 4 + :start-after: [START bigquery_extract_table] + :end-before: [END bigquery_extract_table] + +Deleting a Table +^^^^^^^^^^^^^^^^ + +Delete a table with the +:func:`~google.cloud.bigquery.client.Client.delete_table` method: + +.. literalinclude:: ../snippets.py + :language: python + :dedent: 4 + :start-after: [START bigquery_delete_table] + :end-before: [END bigquery_delete_table] From 7d351d23bf391dc76eb91003854a3f7285f31df6 Mon Sep 17 00:00:00 2001 From: Tres Seaver Date: Thu, 25 Oct 2018 14:07:27 -0400 Subject: [PATCH 0511/2016] BigQuery: deprecation cleanups (#6304) * Remove use of deprecated 'assertEquals'. * Use 'PendingDeprecationWarning' and 'stacklevel=2' for future deprecations. Catch and assert the warnings in tests. --- .../google/cloud/bigquery/table.py | 12 ++-- .../google-cloud-bigquery/tests/system.py | 4 +- .../tests/unit/test_client.py | 2 +- .../tests/unit/test_table.py | 55 ++++++++++++++----- 4 files changed, 51 insertions(+), 22 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py index cd8f20ce49f2..d28310fead16 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py @@ -536,7 +536,7 @@ def partitioning_type(self): warnings.warn( "This method will be deprecated in future versions. Please use " "Table.time_partitioning.type_ instead.", - UserWarning) + PendingDeprecationWarning, stacklevel=2) if self.time_partitioning is not None: return self.time_partitioning.type_ @@ -545,7 +545,7 @@ def partitioning_type(self, value): warnings.warn( "This method will be deprecated in future versions. Please use " "Table.time_partitioning.type_ instead.", - UserWarning) + PendingDeprecationWarning, stacklevel=2) if self.time_partitioning is None: self._properties['timePartitioning'] = {} self._properties['timePartitioning']['type'] = value @@ -561,7 +561,7 @@ def partition_expiration(self): warnings.warn( "This method will be deprecated in future versions. Please use " "Table.time_partitioning.expiration_ms instead.", - UserWarning) + PendingDeprecationWarning, stacklevel=2) if self.time_partitioning is not None: return self.time_partitioning.expiration_ms @@ -570,7 +570,7 @@ def partition_expiration(self, value): warnings.warn( "This method will be deprecated in future versions. Please use " "Table.time_partitioning.expiration_ms instead.", - UserWarning) + PendingDeprecationWarning, stacklevel=2) if self.time_partitioning is None: self._properties['timePartitioning'] = { 'type': TimePartitioningType.DAY} @@ -928,7 +928,7 @@ def partitioning_type(self): warnings.warn( "This method will be deprecated in future versions. Please use " "TableListItem.time_partitioning.type_ instead.", - PendingDeprecationWarning) + PendingDeprecationWarning, stacklevel=2) if self.time_partitioning is not None: return self.time_partitioning.type_ @@ -942,7 +942,7 @@ def partition_expiration(self): warnings.warn( "This method will be deprecated in future versions. Please use " "TableListItem.time_partitioning.expiration_ms instead.", - PendingDeprecationWarning) + PendingDeprecationWarning, stacklevel=2) if self.time_partitioning is not None: return self.time_partitioning.expiration_ms diff --git a/packages/google-cloud-bigquery/tests/system.py b/packages/google-cloud-bigquery/tests/system.py index 650d1a12bf55..6ab64ec9a7a0 100644 --- a/packages/google-cloud-bigquery/tests/system.py +++ b/packages/google-cloud-bigquery/tests/system.py @@ -204,7 +204,7 @@ def test_update_dataset(self): self.assertTrue(_dataset_exists(dataset)) self.assertIsNone(dataset.friendly_name) self.assertIsNone(dataset.description) - self.assertEquals(dataset.labels, {}) + self.assertEqual(dataset.labels, {}) dataset.friendly_name = 'Friendly' dataset.description = 'Description' @@ -400,7 +400,7 @@ def test_update_table(self): self.assertTrue(_table_exists(table)) self.assertIsNone(table.friendly_name) self.assertIsNone(table.description) - self.assertEquals(table.labels, {}) + self.assertEqual(table.labels, {}) table.friendly_name = 'Friendly' table.description = 'Description' table.labels = {'priority': 'high', 'color': 'blue'} diff --git a/packages/google-cloud-bigquery/tests/unit/test_client.py b/packages/google-cloud-bigquery/tests/unit/test_client.py index 60c81b59c482..4a4e3b7f24b4 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_client.py +++ b/packages/google-cloud-bigquery/tests/unit/test_client.py @@ -3834,7 +3834,7 @@ def test_list_partitions_with_string_id(self): partition_list = client.list_partitions( '{}.{}'.format(self.DS_ID, self.TABLE_ID)) - self.assertEquals(len(partition_list), 0) + self.assertEqual(len(partition_list), 0) def test_list_rows(self): import datetime diff --git a/packages/google-cloud-bigquery/tests/unit/test_table.py b/packages/google-cloud-bigquery/tests/unit/test_table.py index 3a187d20af4b..5795a3c92e39 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_table.py +++ b/packages/google-cloud-bigquery/tests/unit/test_table.py @@ -472,7 +472,7 @@ def test_ctor(self): self.assertIsNone(table.view_query) self.assertIsNone(table.view_use_legacy_sql) self.assertIsNone(table.external_data_configuration) - self.assertEquals(table.labels, {}) + self.assertEqual(table.labels, {}) self.assertIsNone(table.encryption_configuration) self.assertIsNone(table.time_partitioning) self.assertIsNone(table.clustering_fields) @@ -876,22 +876,26 @@ def test_time_partitioning_setter_none(self): self.assertIsNone(table.time_partitioning) def test_partitioning_type_setter(self): + import warnings from google.cloud.bigquery.table import TimePartitioningType dataset = DatasetReference(self.PROJECT, self.DS_ID) table_ref = dataset.table(self.TABLE_NAME) table = self._make_one(table_ref) - with mock.patch('warnings.warn') as warn_patch: + with warnings.catch_warnings(record=True) as warned: self.assertIsNone(table.partitioning_type) table.partitioning_type = TimePartitioningType.DAY self.assertEqual(table.partitioning_type, 'DAY') - assert warn_patch.called + self.assertEqual(len(warned), 3) + for warning in warned: + self.assertIs(warning.category, PendingDeprecationWarning) def test_partitioning_type_setter_w_time_partitioning_set(self): + import warnings from google.cloud.bigquery.table import TimePartitioning dataset = DatasetReference(self.PROJECT, self.DS_ID) @@ -899,14 +903,17 @@ def test_partitioning_type_setter_w_time_partitioning_set(self): table = self._make_one(table_ref) table.time_partitioning = TimePartitioning() - with mock.patch('warnings.warn') as warn_patch: + with warnings.catch_warnings(record=True) as warned: table.partitioning_type = 'NEW_FAKE_TYPE' self.assertEqual(table.partitioning_type, 'NEW_FAKE_TYPE') - assert warn_patch.called + self.assertEqual(len(warned), 2) + for warning in warned: + self.assertIs(warning.category, PendingDeprecationWarning) def test_partitioning_expiration_setter_w_time_partitioning_set(self): + import warnings from google.cloud.bigquery.table import TimePartitioning dataset = DatasetReference(self.PROJECT, self.DS_ID) @@ -914,19 +921,23 @@ def test_partitioning_expiration_setter_w_time_partitioning_set(self): table = self._make_one(table_ref) table.time_partitioning = TimePartitioning() - with mock.patch('warnings.warn') as warn_patch: + with warnings.catch_warnings(record=True) as warned: table.partition_expiration = 100000 self.assertEqual(table.partition_expiration, 100000) - assert warn_patch.called + self.assertEqual(len(warned), 2) + for warning in warned: + self.assertIs(warning.category, PendingDeprecationWarning) def test_partition_expiration_setter(self): + import warnings + dataset = DatasetReference(self.PROJECT, self.DS_ID) table_ref = dataset.table(self.TABLE_NAME) table = self._make_one(table_ref) - with mock.patch('warnings.warn') as warn_patch: + with warnings.catch_warnings(record=True) as warned: self.assertIsNone(table.partition_expiration) table.partition_expiration = 100 @@ -935,7 +946,9 @@ def test_partition_expiration_setter(self): # defaults to 'DAY' when expiration is set and type is not set self.assertEqual(table.partitioning_type, 'DAY') - assert warn_patch.called + self.assertEqual(len(warned), 4) + for warning in warned: + self.assertIs(warning.category, PendingDeprecationWarning) def test_clustering_fields_setter_w_fields(self): dataset = DatasetReference(self.PROJECT, self.DS_ID) @@ -1069,6 +1082,8 @@ def _make_one(self, *args, **kw): return self._get_target_class()(*args, **kw) def test_ctor(self): + import warnings + project = 'test-project' dataset_id = 'test_dataset' table_id = 'coffee_table' @@ -1107,11 +1122,17 @@ def test_ctor(self): self.assertEqual(table.time_partitioning.type_, 'DAY') self.assertEqual(table.time_partitioning.expiration_ms, 10000) self.assertEqual(table.time_partitioning.field, 'mycolumn') - self.assertEqual(table.partitioning_type, 'DAY') - self.assertEqual(table.partition_expiration, 10000) self.assertEqual(table.labels['some-stuff'], 'this-is-a-label') self.assertIsNone(table.view_use_legacy_sql) + with warnings.catch_warnings(record=True) as warned: + self.assertEqual(table.partitioning_type, 'DAY') + self.assertEqual(table.partition_expiration, 10000) + + self.assertEqual(len(warned), 2) + for warning in warned: + self.assertIs(warning.category, PendingDeprecationWarning) + def test_ctor_view(self): project = 'test-project' dataset_id = 'test_dataset' @@ -1142,6 +1163,8 @@ def test_ctor_view(self): self.assertTrue(table.view_use_legacy_sql) def test_ctor_missing_properties(self): + import warnings + resource = { 'tableReference': { 'projectId': 'testproject', @@ -1157,11 +1180,17 @@ def test_ctor_missing_properties(self): self.assertIsNone(table.friendly_name) self.assertIsNone(table.table_type) self.assertIsNone(table.time_partitioning) - self.assertIsNone(table.partitioning_type) - self.assertIsNone(table.partition_expiration) self.assertEqual(table.labels, {}) self.assertIsNone(table.view_use_legacy_sql) + with warnings.catch_warnings(record=True) as warned: + self.assertIsNone(table.partitioning_type) + self.assertIsNone(table.partition_expiration) + + self.assertEqual(len(warned), 2) + for warning in warned: + self.assertIs(warning.category, PendingDeprecationWarning) + def test_ctor_wo_project(self): resource = { 'tableReference': { From 4f9a5813a368d91059705e5801dbca0fd13ccb5c Mon Sep 17 00:00:00 2001 From: Christopher Wilcox Date: Fri, 26 Oct 2018 15:11:36 -0700 Subject: [PATCH 0512/2016] Fix #6321 Update README service links in quickstart guides. (#6322) --- packages/google-cloud-bigquery/README.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/packages/google-cloud-bigquery/README.rst b/packages/google-cloud-bigquery/README.rst index 0ea938e306fe..7d5fbb067071 100644 --- a/packages/google-cloud-bigquery/README.rst +++ b/packages/google-cloud-bigquery/README.rst @@ -28,12 +28,12 @@ In order to use this library, you first need to go through the following steps: 1. `Select or create a Cloud Platform project.`_ 2. `Enable billing for your project.`_ -3. `Enable the Google Cloud Datastore API.`_ +3. `Enable the Google Cloud BigQuery API.`_ 4. `Setup Authentication.`_ .. _Select or create a Cloud Platform project.: https://console.cloud.google.com/project .. _Enable billing for your project.: https://cloud.google.com/billing/docs/how-to/modify-project#enable_billing_for_a_project -.. _Enable the Google Cloud Datastore API.: https://cloud.google.com/bigquery +.. _Enable the Google Cloud BigQuery API.: https://cloud.google.com/bigquery .. _Setup Authentication.: https://googlecloudplatform.github.io/google-cloud-python/latest/core/auth.html Installation From 09197804b4b93530958bd4ca557314ddda3d12ec Mon Sep 17 00:00:00 2001 From: Guillermo Carrasco Date: Tue, 30 Oct 2018 19:40:21 +0100 Subject: [PATCH 0513/2016] Add --params option to %%bigquery magic (#6277) --- .../google/cloud/bigquery/magics.py | 56 ++++++++++++- .../tests/unit/test_magics.py | 81 ++++++++++++++++++- 2 files changed, 134 insertions(+), 3 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/magics.py b/packages/google-cloud-bigquery/google/cloud/bigquery/magics.py index ed54d9c04b59..05e8e52c7ffa 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/magics.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/magics.py @@ -21,7 +21,7 @@ .. code-block:: python %%bigquery [] [--project ] [--use_legacy_sql] - [--verbose] + [--verbose] [--params ] Parameters: @@ -39,6 +39,20 @@ amount of time for the query to complete will not be cleared after the query is finished. By default, this information will be displayed but will be cleared after the query is finished. + * ``--params `` (optional, line argument): + If present, the argument following the ``--params`` flag must be + either: + + * :class:`str` - A JSON string representation of a dictionary in the + format ``{"param_name": "param_value"}`` (ex. ``{"num": 17}``). Use + of the parameter in the query should be indicated with + ``@param_name``. See ``In[5]`` in the Examples section below. + + * :class:`dict` reference - A reference to a ``dict`` in the format + ``{"param_name": "param_value"}``, where the value types must be JSON + serializable. The variable reference is indicated by a ``$`` before + the variable name (ex. ``$my_dict_var``). See ``In[6]`` and ``In[7]`` + in the Examples section below. * ```` (required, cell argument): SQL query to run. @@ -54,7 +68,7 @@ the bigquery IPython extension (see ``In[1]``) and setting up Application Default Credentials. - .. code-block:: python + .. code-block:: none In [1]: %load_ext google.cloud.bigquery @@ -96,10 +110,26 @@ ...: 1 Patricia 1568495 ...: 2 Elizabeth 1519946 + In [5]: %%bigquery df --params {"num": 17} + ...: SELECT @num AS num + + Out[5]: num + ...: ------- + ...: 0 17 + + In [6]: params = {"num": 17} + + In [7]: %%bigquery df --params $params + ...: SELECT @num AS num + + Out[7]: num + ...: ------- + ...: 0 17 """ from __future__ import print_function +import ast import time from concurrent import futures @@ -112,6 +142,7 @@ import google.auth from google.cloud import bigquery +from google.cloud.bigquery.dbapi import _helpers class Context(object): @@ -249,6 +280,16 @@ def _run_query(client, query, job_config=None): 'amount of time for the query to finish. By default, this ' 'information will be displayed as the query runs, but will be ' 'cleared after the query is finished.')) +@magic_arguments.argument( + '--params', + nargs='+', + default=None, + help=('Parameters to format the query string. If present, the --params ' + 'flag should be followed by a string representation of a dictionary ' + 'in the format {\'param_name\': \'param_value\'} (ex. {"num": 17}), ' + 'or a reference to a dictionary in the same format. The dictionary ' + 'reference can be made by including a \'$\' before the variable ' + 'name (ex. $my_dict_var).')) def _cell_magic(line, query): """Underlying function for bigquery cell magic @@ -265,9 +306,20 @@ def _cell_magic(line, query): """ args = magic_arguments.parse_argstring(_cell_magic, line) + params = [] + if args.params is not None: + try: + params = _helpers.to_query_parameters( + ast.literal_eval(''.join(args.params))) + except Exception: + raise SyntaxError( + '--params is not a correctly formatted JSON string or a JSON ' + 'serializable dictionary') + project = args.project or context.project client = bigquery.Client(project=project, credentials=context.credentials) job_config = bigquery.job.QueryJobConfig() + job_config.query_parameters = params job_config.use_legacy_sql = args.use_legacy_sql query_job = _run_query(client, query, job_config) diff --git a/packages/google-cloud-bigquery/tests/unit/test_magics.py b/packages/google-cloud-bigquery/tests/unit/test_magics.py index 800edf2918bc..b0e08661ca00 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_magics.py +++ b/packages/google-cloud-bigquery/tests/unit/test_magics.py @@ -186,7 +186,7 @@ def test_bigquery_magic_with_result_saved_to_variable(): sql = 'SELECT 17 AS num' result = pandas.DataFrame([17], columns=['num']) - assert 'myvariable' not in ip.user_ns + assert 'df' not in ip.user_ns run_query_patch = mock.patch( 'google.cloud.bigquery.magics._run_query', autospec=True) @@ -259,3 +259,82 @@ def test_bigquery_magic_with_project(): assert client_used.project == 'specific-project' # context project should not change assert magics.context.project == 'general-project' + + +@pytest.mark.usefixtures('ipython_interactive') +@pytest.mark.skipif(pandas is None, reason='Requires `pandas`') +def test_bigquery_magic_with_string_params(): + ip = IPython.get_ipython() + ip.extension_manager.load_extension('google.cloud.bigquery') + magics.context.credentials = mock.create_autospec( + google.auth.credentials.Credentials, instance=True) + + sql = 'SELECT @num AS num' + result = pandas.DataFrame([17], columns=['num']) + assert 'params_string_df' not in ip.user_ns + + run_query_patch = mock.patch( + 'google.cloud.bigquery.magics._run_query', autospec=True) + query_job_mock = mock.create_autospec( + google.cloud.bigquery.job.QueryJob, instance=True) + query_job_mock.to_dataframe.return_value = result + with run_query_patch as run_query_mock: + run_query_mock.return_value = query_job_mock + + ip.run_cell_magic( + 'bigquery', 'params_string_df --params {"num":17}', sql) + run_query_mock.assert_called_once_with( + mock.ANY, sql.format(num=17), mock.ANY) + + assert 'params_string_df' in ip.user_ns # verify that the variable exists + df = ip.user_ns['params_string_df'] + assert len(df) == len(result) # verify row count + assert list(df) == list(result) # verify column names + + +@pytest.mark.usefixtures('ipython_interactive') +@pytest.mark.skipif(pandas is None, reason='Requires `pandas`') +def test_bigquery_magic_with_dict_params(): + ip = IPython.get_ipython() + ip.extension_manager.load_extension('google.cloud.bigquery') + magics.context.credentials = mock.create_autospec( + google.auth.credentials.Credentials, instance=True) + + sql = 'SELECT @num AS num' + result = pandas.DataFrame([17], columns=['num']) + assert 'params_dict_df' not in ip.user_ns + + run_query_patch = mock.patch( + 'google.cloud.bigquery.magics._run_query', autospec=True) + query_job_mock = mock.create_autospec( + google.cloud.bigquery.job.QueryJob, instance=True) + query_job_mock.to_dataframe.return_value = result + with run_query_patch as run_query_mock: + run_query_mock.return_value = query_job_mock + + params = {"num": 17} + # Insert dictionary into user namespace so that it can be expanded + ip.user_ns['params'] = params + ip.run_cell_magic('bigquery', 'params_dict_df --params $params', sql) + run_query_mock.assert_called_once_with( + mock.ANY, sql.format(num=17), mock.ANY) + + assert 'params_dict_df' in ip.user_ns # verify that the variable exists + df = ip.user_ns['params_dict_df'] + assert len(df) == len(result) # verify row count + assert list(df) == list(result) # verify column names + + +@pytest.mark.usefixtures('ipython_interactive') +@pytest.mark.skipif(pandas is None, reason='Requires `pandas`') +def test_bigquery_magic_with_improperly_formatted_params(): + ip = IPython.get_ipython() + ip.extension_manager.load_extension('google.cloud.bigquery') + magics.context.credentials = mock.create_autospec( + google.auth.credentials.Credentials, instance=True) + + sql = 'SELECT @num AS num' + + with pytest.raises(SyntaxError): + ip.run_cell_magic( + 'bigquery', '--params {17}', sql) From ea063ed40aee96b13ff0195d8b612d039aa3ea6c Mon Sep 17 00:00:00 2001 From: Alix Hamilton Date: Tue, 6 Nov 2018 09:07:43 -0800 Subject: [PATCH 0514/2016] BigQuery: Add properties to job config constructors (#6397) * add properties to job config constructors * update properties arg * update **properties to **kwargs * fix docstrings --- .../google/cloud/bigquery/job.py | 68 +++++++++++-------- .../tests/unit/test_job.py | 38 ++++++++++- 2 files changed, 74 insertions(+), 32 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/job.py b/packages/google-cloud-bigquery/google/cloud/bigquery/job.py index 053de18fb827..5eec77de7e3a 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/job.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/job.py @@ -719,9 +719,11 @@ class _JobConfig(object): job_type (str): The key to use for the job configuration. """ - def __init__(self, job_type): + def __init__(self, job_type, **kwargs): self._job_type = job_type self._properties = {job_type: {}} + for prop, val in kwargs.items(): + setattr(self, prop, val) @property def labels(self): @@ -762,7 +764,7 @@ def _get_sub_prop(self, key, default=None): ``self._properties[self._job_type]`` dictionary. default (object): (Optional) Default value to return if the key is not found. - Defaults to ``None``. + Defaults to :data:`None`. Returns: object: The value if present or the default. @@ -793,7 +795,7 @@ def _set_sub_prop(self, key, value): _helpers._set_sub_prop(self._properties, [self._job_type, key], value) def _del_sub_prop(self, key): - """Reove ``key`` from the ``self._properties[self._job_type]`` dict. + """Remove ``key`` from the ``self._properties[self._job_type]`` dict. Most job properties are inside the dictionary related to the job type (e.g. 'copy', 'extract', 'load', 'query'). Use this method to clear @@ -874,12 +876,13 @@ def from_api_repr(cls, resource): class LoadJobConfig(_JobConfig): """Configuration options for load jobs. - All properties in this class are optional. Values which are ``None`` -> - server defaults. + All properties in this class are optional. Values which are :data:`None` -> + server defaults. Set properties on the constructed configuration by using + the property name as the name of a keyword argument. """ - def __init__(self): - super(LoadJobConfig, self).__init__('load') + def __init__(self, **kwargs): + super(LoadJobConfig, self).__init__('load', **kwargs) @property def allow_jagged_rows(self): @@ -967,7 +970,7 @@ def destination_encryption_configuration(self): """google.cloud.bigquery.table.EncryptionConfiguration: Custom encryption configuration for the destination table. - Custom encryption configuration (e.g., Cloud KMS keys) or ``None`` + Custom encryption configuration (e.g., Cloud KMS keys) or :data:`None` if using default encryption. See @@ -1338,7 +1341,7 @@ def destination_encryption_configuration(self): encryption configuration for the destination table. Custom encryption configuration (e.g., Cloud KMS keys) - or ``None`` if using default encryption. + or :data:`None` if using default encryption. See :attr:`google.cloud.bigquery.job.LoadJobConfig.destination_encryption_configuration`. @@ -1469,12 +1472,13 @@ def from_api_repr(cls, resource, client): class CopyJobConfig(_JobConfig): """Configuration options for copy jobs. - All properties in this class are optional. Values which are ``None`` -> - server defaults. + All properties in this class are optional. Values which are :data:`None` -> + server defaults. Set properties on the constructed configuration by using + the property name as the name of a keyword argument. """ - def __init__(self): - super(CopyJobConfig, self).__init__('copy') + def __init__(self, **kwargs): + super(CopyJobConfig, self).__init__('copy', **kwargs) @property def create_disposition(self): @@ -1509,7 +1513,7 @@ def destination_encryption_configuration(self): """google.cloud.bigquery.table.EncryptionConfiguration: Custom encryption configuration for the destination table. - Custom encryption configuration (e.g., Cloud KMS keys) or ``None`` + Custom encryption configuration (e.g., Cloud KMS keys) or :data:`None` if using default encryption. See @@ -1579,7 +1583,7 @@ def destination_encryption_configuration(self): """google.cloud.bigquery.table.EncryptionConfiguration: Custom encryption configuration for the destination table. - Custom encryption configuration (e.g., Cloud KMS keys) or ``None`` + Custom encryption configuration (e.g., Cloud KMS keys) or :data:`None` if using default encryption. See @@ -1662,12 +1666,13 @@ def from_api_repr(cls, resource, client): class ExtractJobConfig(_JobConfig): """Configuration options for extract jobs. - All properties in this class are optional. Values which are ``None`` -> - server defaults. + All properties in this class are optional. Values which are :data:`None` -> + server defaults. Set properties on the constructed configuration by using + the property name as the name of a keyword argument. """ - def __init__(self): - super(ExtractJobConfig, self).__init__('extract') + def __init__(self, **kwargs): + super(ExtractJobConfig, self).__init__('extract', **kwargs) @property def compression(self): @@ -1906,19 +1911,20 @@ def _to_api_repr_table_defs(value): class QueryJobConfig(_JobConfig): """Configuration options for query jobs. - All properties in this class are optional. Values which are ``None`` -> - server defaults. + All properties in this class are optional. Values which are :data:`None` -> + server defaults. Set properties on the constructed configuration by using + the property name as the name of a keyword argument. """ - def __init__(self): - super(QueryJobConfig, self).__init__('query') + def __init__(self, **kwargs): + super(QueryJobConfig, self).__init__('query', **kwargs) @property def destination_encryption_configuration(self): """google.cloud.bigquery.table.EncryptionConfiguration: Custom encryption configuration for the destination table. - Custom encryption configuration (e.g., Cloud KMS keys) or ``None`` + Custom encryption configuration (e.g., Cloud KMS keys) or :data:`None` if using default encryption. See @@ -1966,7 +1972,8 @@ def create_disposition(self, value): @property def default_dataset(self): """google.cloud.bigquery.dataset.DatasetReference: the default dataset - to use for unqualified table names in the query or ``None`` if not set. + to use for unqualified table names in the query or :data:`None` if not + set. See https://g.co/cloud/bigquery/docs/reference/v2/jobs#configuration.query.defaultDataset @@ -1986,7 +1993,7 @@ def default_dataset(self, value): @property def destination(self): """google.cloud.bigquery.table.TableReference: table where results are - written or ``None`` if not set. + written or :data:`None` if not set. See https://g.co/cloud/bigquery/docs/reference/rest/v2/jobs#configuration.query.destinationTable @@ -2005,7 +2012,8 @@ def destination(self, value): @property def dry_run(self): - """bool: ``True`` if this query should be a dry run to estimate costs. + """bool: :data:`True` if this query should be a dry run to estimate + costs. See https://g.co/cloud/bigquery/docs/reference/v2/jobs#configuration.dryRun @@ -2045,7 +2053,7 @@ def maximum_billing_tier(self, value): @property def maximum_bytes_billed(self): - """int: Maximum bytes to be billed for this job or ``None`` if not set. + """int: Maximum bytes to be billed for this job or :data:`None` if not set. See https://g.co/cloud/bigquery/docs/reference/rest/v2/jobs#configuration.query.maximumBytesBilled @@ -2147,7 +2155,7 @@ def write_disposition(self, value): @property def table_definitions(self): """Dict[str, google.cloud.bigquery.external_config.ExternalConfig]: - Definitions for external tables or ``None`` if not set. + Definitions for external tables or :data:`None` if not set. See https://g.co/cloud/bigquery/docs/reference/rest/v2/jobs#configuration.query.tableDefinitions @@ -2305,7 +2313,7 @@ def destination_encryption_configuration(self): """google.cloud.bigquery.table.EncryptionConfiguration: Custom encryption configuration for the destination table. - Custom encryption configuration (e.g., Cloud KMS keys) or ``None`` + Custom encryption configuration (e.g., Cloud KMS keys) or :data:`None` if using default encryption. See diff --git a/packages/google-cloud-bigquery/tests/unit/test_job.py b/packages/google-cloud-bigquery/tests/unit/test_job.py index 88deb068f871..edddebc05e31 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_job.py +++ b/packages/google-cloud-bigquery/tests/unit/test_job.py @@ -1152,6 +1152,13 @@ def _get_target_class(): from google.cloud.bigquery.job import LoadJobConfig return LoadJobConfig + def test_ctor_w_properties(self): + config = self._get_target_class()( + allow_jagged_rows=True, allow_quoted_newlines=True) + + self.assertTrue(config.allow_jagged_rows) + self.assertTrue(config.allow_quoted_newlines) + def test_allow_jagged_rows_missing(self): config = self._get_target_class()() self.assertIsNone(config.allow_jagged_rows) @@ -2482,6 +2489,20 @@ def _get_target_class(): from google.cloud.bigquery.job import CopyJobConfig return CopyJobConfig + def test_ctor_w_properties(self): + from google.cloud.bigquery.job import CreateDisposition + from google.cloud.bigquery.job import WriteDisposition + + create_disposition = CreateDisposition.CREATE_NEVER + write_disposition = WriteDisposition.WRITE_TRUNCATE + config = self._get_target_class()( + create_disposition=create_disposition, + write_disposition=write_disposition + ) + + self.assertEqual(config.create_disposition, create_disposition) + self.assertEqual(config.write_disposition, write_disposition) + def test_to_api_repr_with_encryption(self): from google.cloud.bigquery.table import EncryptionConfiguration @@ -2916,6 +2937,13 @@ def _get_target_class(): from google.cloud.bigquery.job import ExtractJobConfig return ExtractJobConfig + def test_ctor_w_properties(self): + config = self._get_target_class()( + field_delimiter='\t', print_header=True) + + self.assertEqual(config.field_delimiter, '\t') + self.assertTrue(config.print_header) + def test_to_api_repr(self): from google.cloud.bigquery import job config = self._make_one() @@ -3299,6 +3327,13 @@ def test_ctor_w_none(self): self.assertIsNone(config.default_dataset) self.assertIsNone(config.destination) + def test_ctor_w_properties(self): + config = self._get_target_class()( + use_query_cache=False, use_legacy_sql=True) + + self.assertFalse(config.use_query_cache) + self.assertTrue(config.use_legacy_sql) + def test_time_partitioning(self): from google.cloud.bigquery import table @@ -3637,8 +3672,7 @@ def test_ctor_w_query_parameters(self): query_parameters = [ScalarQueryParameter("foo", 'INT64', 123)] client = _make_client(project=self.PROJECT) - config = QueryJobConfig() - config.query_parameters = query_parameters + config = QueryJobConfig(query_parameters=query_parameters) job = self._make_one( self.JOB_ID, self.QUERY, client, job_config=config) self.assertEqual(job.query_parameters, query_parameters) From f5c28848c2553eaf7dbb100e42d728c9683bd4bc Mon Sep 17 00:00:00 2001 From: shollyman Date: Tue, 6 Nov 2018 09:33:54 -0800 Subject: [PATCH 0515/2016] Release bigquery 1.7.0 (#6399) * Release 1.7.0 * Fix whitespace * Add new keyword properties to changelog. * markdown corrections. --- packages/google-cloud-bigquery/CHANGELOG.md | 30 +++++++++++++++++++++ packages/google-cloud-bigquery/setup.py | 2 +- 2 files changed, 31 insertions(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/CHANGELOG.md b/packages/google-cloud-bigquery/CHANGELOG.md index f5f70256c2c2..4ef594f61226 100644 --- a/packages/google-cloud-bigquery/CHANGELOG.md +++ b/packages/google-cloud-bigquery/CHANGELOG.md @@ -4,6 +4,36 @@ [1]: https://pypi.org/project/google-cloud-bigquery/#history +## 1.7.0 + +11-05-2018 16:41 PST + +### Implementation Changes + +- Add destination table properties to `LoadJobConfig`. ([#6202](https://github.com/googleapis/google-cloud-python/pull/6202)) +- Allow strings or references in `create_dataset` and `create_table` ([#6199](https://github.com/googleapis/google-cloud-python/pull/6199)) +- Fix swallowed error message ([#6168](https://github.com/googleapis/google-cloud-python/pull/6168)) + +### New Features + +- Add `--params option` to `%%bigquery` magic ([#6277](https://github.com/googleapis/google-cloud-python/pull/6277)) +- Expose `to_api_repr` method for jobs. ([#6176](https://github.com/googleapis/google-cloud-python/pull/6176)) +- Allow string in addition to DatasetReference / TableReference in Client methods. ([#6164](https://github.com/googleapis/google-cloud-python/pull/6164)) +- Add keyword arguments to job config constructors for setting properties ([#6397](https://github.com/googleapis/google-cloud-python/pull/6397)) + +### Documentation + +- Update README service links in quickstart guides. ([#6322](https://github.com/googleapis/google-cloud-python/pull/6322)) +- Move usage guides to their own docs. ([#6238](https://github.com/googleapis/google-cloud-python/pull/6238)) +- Normalize use of support level badges ([#6159](https://github.com/googleapis/google-cloud-python/pull/6159)) + +### Internal / Testing Changes + +- Deprecation cleanups ([#6304](https://github.com/googleapis/google-cloud-python/pull/6304)) +- Use `_get_sub_prop` helper so missing load stats don't raise. ([#6269](https://github.com/googleapis/google-cloud-python/pull/6269)) +- Use new Nox ([#6175](https://github.com/googleapis/google-cloud-python/pull/6175)) +- Harden snippets against transient GCS errors. ([#6184](https://github.com/googleapis/google-cloud-python/pull/6184)) + ## 1.6.0 ### New Features diff --git a/packages/google-cloud-bigquery/setup.py b/packages/google-cloud-bigquery/setup.py index 40b4e41df4d0..c1416dc1d695 100644 --- a/packages/google-cloud-bigquery/setup.py +++ b/packages/google-cloud-bigquery/setup.py @@ -22,7 +22,7 @@ name = 'google-cloud-bigquery' description = 'Google BigQuery API client library' -version = '1.6.0' +version = '1.7.0' # Should be one of: # 'Development Status :: 3 - Alpha' # 'Development Status :: 4 - Beta' From 87da8ac92a1dc6b5312faf76e99d75c71a24f528 Mon Sep 17 00:00:00 2001 From: Tres Seaver Date: Wed, 7 Nov 2018 14:03:35 -0500 Subject: [PATCH 0516/2016] Bump minimum 'api_core' version for all GAPIC libs to 1.4.1. (#6391) Closes #6390. --- packages/google-cloud-bigquery/setup.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/packages/google-cloud-bigquery/setup.py b/packages/google-cloud-bigquery/setup.py index c1416dc1d695..5c4aa468d74d 100644 --- a/packages/google-cloud-bigquery/setup.py +++ b/packages/google-cloud-bigquery/setup.py @@ -29,9 +29,9 @@ # 'Development Status :: 5 - Production/Stable' release_status = 'Development Status :: 5 - Production/Stable' dependencies = [ - 'google-cloud-core<0.29dev,>=0.28.0', - 'google-api-core<2.0.0dev,>=1.0.0', - 'google-resumable-media>=0.2.1', + 'google-api-core >= 1.0.0, < 2.0.0dev', + 'google-cloud-core >= 0.28.0, < 0.29dev', + 'google-resumable-media >= 0.3.1', ] extras = { 'pandas': 'pandas>=0.17.1', From 2a2661cadb834e5fa78ac41d0fff582469be698a Mon Sep 17 00:00:00 2001 From: Tres Seaver Date: Mon, 12 Nov 2018 14:06:35 -0500 Subject: [PATCH 0517/2016] Import stdlib ABCs from 'collections.abc' rather than 'collections'. (#6451) On Python 2.7, fall back to 'collections'. Closes #6450. --- .../google-cloud-bigquery/google/cloud/bigquery/client.py | 8 ++++++-- .../google/cloud/bigquery/dbapi/_helpers.py | 8 ++++++-- .../google/cloud/bigquery/dbapi/cursor.py | 7 ++++++- 3 files changed, 18 insertions(+), 5 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py index 4999337dbf3f..8b18da22c59b 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py @@ -16,7 +16,11 @@ from __future__ import absolute_import -import collections +try: + from collections import abc as collections_abc +except ImportError: # Python 2.7 + import collections as collections_abc + import functools import gzip import os @@ -1232,7 +1236,7 @@ def copy_table( destination = TableReference.from_string( destination, default_project=self.project) - if not isinstance(sources, collections.Sequence): + if not isinstance(sources, collections_abc.Sequence): sources = [sources] copy_job = job.CopyJob( diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/dbapi/_helpers.py b/packages/google-cloud-bigquery/google/cloud/bigquery/dbapi/_helpers.py index 56c6a088672f..ee9198cbada4 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/dbapi/_helpers.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/dbapi/_helpers.py @@ -12,7 +12,11 @@ # See the License for the specific language governing permissions and # limitations under the License. -import collections +try: + from collections import abc as collections_abc +except ImportError: # Python 2.7 + import collections as collections_abc + import datetime import decimal import numbers @@ -105,7 +109,7 @@ def to_query_parameters(parameters): if parameters is None: return [] - if isinstance(parameters, collections.Mapping): + if isinstance(parameters, collections_abc.Mapping): return to_query_parameters_dict(parameters) return to_query_parameters_list(parameters) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/dbapi/cursor.py b/packages/google-cloud-bigquery/google/cloud/bigquery/dbapi/cursor.py index b582860c6045..e56a343c362d 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/dbapi/cursor.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/dbapi/cursor.py @@ -16,6 +16,11 @@ import collections +try: + from collections import abc as collections_abc +except ImportError: # Python 2.7 + import collections as collections_abc + import six from google.cloud.bigquery import job @@ -335,7 +340,7 @@ def _format_operation(operation, parameters=None): if parameters is None: return operation - if isinstance(parameters, collections.Mapping): + if isinstance(parameters, collections_abc.Mapping): return _format_operation_dict(operation, parameters) return _format_operation_list(operation, parameters) From 645b080fc73dc677deb72ac67f623075cf13a822 Mon Sep 17 00:00:00 2001 From: Christopher Wilcox Date: Wed, 28 Nov 2018 13:55:23 -0800 Subject: [PATCH 0518/2016] Add templates for flake8, coveragerc, noxfile, and black. (#6642) --- packages/google-cloud-bigquery/.coveragerc | 3 + packages/google-cloud-bigquery/.flake8 | 1 + .../google/cloud/bigquery/dataset.py | 10 +- .../google/cloud/bigquery/job.py | 259 +++++++++--------- .../google/cloud/bigquery/query.py | 4 +- .../google/cloud/bigquery/table.py | 7 +- .../google-cloud-bigquery/tests/system.py | 16 +- .../tests/unit/test_client.py | 60 ++-- .../tests/unit/test_job.py | 24 +- 9 files changed, 195 insertions(+), 189 deletions(-) diff --git a/packages/google-cloud-bigquery/.coveragerc b/packages/google-cloud-bigquery/.coveragerc index d097511c3124..098720f672e1 100644 --- a/packages/google-cloud-bigquery/.coveragerc +++ b/packages/google-cloud-bigquery/.coveragerc @@ -11,3 +11,6 @@ exclude_lines = def __repr__ # Ignore abstract methods raise NotImplementedError +omit = + */gapic/*.py + */proto/*.py diff --git a/packages/google-cloud-bigquery/.flake8 b/packages/google-cloud-bigquery/.flake8 index 1f44a90f8195..61766fa84d02 100644 --- a/packages/google-cloud-bigquery/.flake8 +++ b/packages/google-cloud-bigquery/.flake8 @@ -1,4 +1,5 @@ [flake8] +ignore = E203, E266, E501, W503 exclude = # Exclude generated code. **/proto/** diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/dataset.py b/packages/google-cloud-bigquery/google/cloud/bigquery/dataset.py index 5679ed76744b..82d9b432b6d2 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/dataset.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/dataset.py @@ -98,9 +98,9 @@ def __eq__(self, other): if not isinstance(other, AccessEntry): return NotImplemented return ( - self.role == other.role and - self.entity_type == other.entity_type and - self.entity_id == other.entity_id) + self.role == other.role + and self.entity_type == other.entity_type + and self.entity_id == other.entity_id) def __ne__(self, other): return not self == other @@ -531,8 +531,8 @@ def from_api_repr(cls, resource): google.cloud.bigquery.dataset.Dataset: Dataset parsed from ``resource``. """ - if ('datasetReference' not in resource or - 'datasetId' not in resource['datasetReference']): + if ('datasetReference' not in resource + or 'datasetId' not in resource['datasetReference']): raise KeyError('Resource lacks required identity information:' '["datasetReference"]["datasetId"]') project_id = resource['datasetReference']['projectId'] diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/job.py b/packages/google-cloud-bigquery/google/cloud/bigquery/job.py index 5eec77de7e3a..b625dbf51f76 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/job.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/job.py @@ -81,9 +81,11 @@ def _error_result_to_exception(error_result): """ reason = error_result.get('reason') status_code = _ERROR_REASON_TO_EXCEPTION.get( - reason, http_client.INTERNAL_SERVER_ERROR) + reason, http_client.INTERNAL_SERVER_ERROR + ) return exceptions.from_http_status( - status_code, error_result.get('message', ''), errors=[error_result]) + status_code, error_result.get('message', ''), errors=[error_result] + ) class Compression(object): @@ -235,10 +237,7 @@ class _JobReference(object): """ def __init__(self, job_id, project, location): - self._properties = { - 'jobId': job_id, - 'projectId': project, - } + self._properties = {'jobId': job_id, 'projectId': project} # The location field must not be populated if it is None. if location: self._properties['location'] = location @@ -282,6 +281,7 @@ class _AsyncJob(google.api_core.future.polling.PollingFuture): client (google.cloud.bigquery.client.Client): Client which holds credentials and project configuration. """ + def __init__(self, job_id, client): super(_AsyncJob, self).__init__() @@ -291,9 +291,7 @@ def __init__(self, job_id, client): job_ref = job_id if not isinstance(job_id, _JobReference): job_ref = _JobReference(job_id, client.project, None) - self._properties = { - 'jobReference': job_ref._to_api_repr(), - } + self._properties = {'jobReference': job_ref._to_api_repr()} self._client = client self._result_set = False @@ -302,8 +300,7 @@ def __init__(self, job_id, client): @property def job_id(self): """str: ID of the job.""" - return _helpers._get_sub_prop( - self._properties, ['jobReference', 'jobId']) + return _helpers._get_sub_prop(self._properties, ['jobReference', 'jobId']) @property def project(self): @@ -312,14 +309,12 @@ def project(self): :rtype: str :returns: the project (derived from the client). """ - return _helpers._get_sub_prop( - self._properties, ['jobReference', 'projectId']) + return _helpers._get_sub_prop(self._properties, ['jobReference', 'projectId']) @property def location(self): """str: Location where the job runs.""" - return _helpers._get_sub_prop( - self._properties, ['jobReference', 'location']) + return _helpers._get_sub_prop(self._properties, ['jobReference', 'location']) def _require_client(self, client): """Check client or verify over-ride. @@ -508,15 +503,20 @@ def _get_resource_config(cls, resource): :raises: :class:`KeyError` if the resource has no identifier, or is missing the appropriate configuration. """ - if ('jobReference' not in resource or - 'jobId' not in resource['jobReference']): - raise KeyError('Resource lacks required identity information: ' - '["jobReference"]["jobId"]') + if 'jobReference' not in resource or 'jobId' not in resource['jobReference']: + raise KeyError( + 'Resource lacks required identity information: ' + '["jobReference"]["jobId"]' + ) job_id = resource['jobReference']['jobId'] - if ('configuration' not in resource or - cls._JOB_TYPE not in resource['configuration']): - raise KeyError('Resource lacks required configuration: ' - '["configuration"]["%s"]' % cls._JOB_TYPE) + if ( + 'configuration' not in resource + or cls._JOB_TYPE not in resource['configuration'] + ): + raise KeyError( + 'Resource lacks required configuration: ' + '["configuration"]["%s"]' % cls._JOB_TYPE + ) return job_id, resource['configuration'] def to_api_repr(self): @@ -550,8 +550,8 @@ def _begin(self, client=None, retry=DEFAULT_RETRY): # jobs.insert is idempotent because we ensure that every new # job has an ID. api_response = client._call_api( - retry, - method='POST', path=path, data=self.to_api_repr()) + retry, method='POST', path=path, data=self.to_api_repr() + ) self._set_properties(api_response) def exists(self, client=None, retry=DEFAULT_RETRY): @@ -578,9 +578,9 @@ def exists(self, client=None, retry=DEFAULT_RETRY): extra_params['location'] = self.location try: - client._call_api(retry, - method='GET', path=self.path, - query_params=extra_params) + client._call_api( + retry, method='GET', path=self.path, query_params=extra_params + ) except NotFound: return False else: @@ -607,7 +607,8 @@ def reload(self, client=None, retry=DEFAULT_RETRY): extra_params['location'] = self.location api_response = client._call_api( - retry, method='GET', path=self.path, query_params=extra_params) + retry, method='GET', path=self.path, query_params=extra_params + ) self._set_properties(api_response) def cancel(self, client=None): @@ -631,8 +632,8 @@ def cancel(self, client=None): extra_params['location'] = self.location api_response = client._connection.api_request( - method='POST', path='%s/cancel' % (self.path,), - query_params=extra_params) + method='POST', path='%s/cancel' % (self.path,), query_params=extra_params + ) self._set_properties(api_response['job']) # The Future interface requires that we return True if the *attempt* # to cancel was successful. @@ -708,8 +709,10 @@ def cancelled(self): :rtype: bool :returns: False """ - return (self.error_result is not None - and self.error_result.get('reason') == _STOPPED_REASON) + return ( + self.error_result is not None + and self.error_result.get('reason') == _STOPPED_REASON + ) class _JobConfig(object): @@ -770,7 +773,8 @@ def _get_sub_prop(self, key, default=None): object: The value if present or the default. """ return _helpers._get_sub_prop( - self._properties, [self._job_type, key], default=default) + self._properties, [self._job_type, key], default=default + ) def _set_sub_prop(self, key, value): """Set a value in the ``self._properties[self._job_type]`` dictionary. @@ -840,8 +844,10 @@ def _fill_from_default(self, default_job_config): if self._job_type != default_job_config._job_type: raise TypeError( "attempted to merge two incompatible job types: " - + repr(self._job_type) + ', ' - + repr(default_job_config._job_type)) + + repr(self._job_type) + + ', ' + + repr(default_job_config._job_type) + ) new_job_config = self.__class__() @@ -850,8 +856,7 @@ def _fill_from_default(self, default_job_config): if key != self._job_type: default_job_properties[key] = self._properties[key] - default_job_properties[self._job_type] \ - .update(self._properties[self._job_type]) + default_job_properties[self._job_type].update(self._properties[self._job_type]) new_job_config._properties = default_job_properties return new_job_config @@ -1115,8 +1120,7 @@ def schema(self): See https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.load.schema """ - schema = _helpers._get_sub_prop( - self._properties, ['load', 'schema', 'fields']) + schema = _helpers._get_sub_prop(self._properties, ['load', 'schema', 'fields']) if schema is None: return return [SchemaField.from_api_repr(field) for field in schema] @@ -1128,7 +1132,8 @@ def schema(self, value): _helpers._set_sub_prop( self._properties, ['load', 'schema', 'fields'], - [field.to_api_repr() for field in value]) + [field.to_api_repr() for field in value], + ) @property def schema_update_options(self): @@ -1226,8 +1231,7 @@ class LoadJob(_AsyncJob): _JOB_TYPE = 'load' - def __init__(self, job_id, source_uris, destination, client, - job_config=None): + def __init__(self, job_id, source_uris, destination, client, job_config=None): super(LoadJob, self).__init__(job_id, client) if job_config is None: @@ -1377,9 +1381,11 @@ def input_file_bytes(self): :returns: the count (None until set from the server). :raises: ValueError for invalid value types. """ - return _helpers._int_or_none(_helpers._get_sub_prop( + return _helpers._int_or_none( + _helpers._get_sub_prop( self._properties, ['statistics', 'load', 'inputFileBytes'] - )) + ) + ) @property def input_files(self): @@ -1388,9 +1394,11 @@ def input_files(self): :rtype: int, or ``NoneType`` :returns: the count (None until set from the server). """ - return _helpers._int_or_none(_helpers._get_sub_prop( + return _helpers._int_or_none( + _helpers._get_sub_prop( self._properties, ['statistics', 'load', 'inputFiles'] - )) + ) + ) @property def output_bytes(self): @@ -1399,9 +1407,11 @@ def output_bytes(self): :rtype: int, or ``NoneType`` :returns: the count (None until set from the server). """ - return _helpers._int_or_none(_helpers._get_sub_prop( + return _helpers._int_or_none( + _helpers._get_sub_prop( self._properties, ['statistics', 'load', 'outputBytes'] - )) + ) + ) @property def output_rows(self): @@ -1410,20 +1420,22 @@ def output_rows(self): :rtype: int, or ``NoneType`` :returns: the count (None until set from the server). """ - return _helpers._int_or_none(_helpers._get_sub_prop( + return _helpers._int_or_none( + _helpers._get_sub_prop( self._properties, ['statistics', 'load', 'outputRows'] - )) + ) + ) def to_api_repr(self): """Generate a resource for :meth:`_begin`.""" configuration = self._configuration.to_api_repr() if self.source_uris is not None: _helpers._set_sub_prop( - configuration, ['load', 'sourceUris'], self.source_uris) + configuration, ['load', 'sourceUris'], self.source_uris + ) _helpers._set_sub_prop( - configuration, - ['load', 'destinationTable'], - self.destination.to_api_repr()) + configuration, ['load', 'destinationTable'], self.destination.to_api_repr() + ) return { 'jobReference': self._properties['jobReference'], @@ -1457,12 +1469,10 @@ def from_api_repr(cls, resource, client): config = LoadJobConfig.from_api_repr(config_resource) # A load job requires a destination table. dest_config = config_resource['load']['destinationTable'] - ds_ref = DatasetReference( - dest_config['projectId'], dest_config['datasetId']) + ds_ref = DatasetReference(dest_config['projectId'], dest_config['datasetId']) destination = TableReference(ds_ref, dest_config['tableId']) # sourceUris will be absent if this is a file upload. - source_uris = _helpers._get_sub_prop( - config_resource, ['load', 'sourceUris']) + source_uris = _helpers._get_sub_prop(config_resource, ['load', 'sourceUris']) job_ref = _JobReference._from_api_repr(resource['jobReference']) job = cls(job_ref, source_uris, destination, client, config) job._set_properties(resource) @@ -1552,6 +1562,7 @@ class CopyJob(_AsyncJob): :param job_config: (Optional) Extra configuration options for the copy job. """ + _JOB_TYPE = 'copy' def __init__(self, job_id, sources, destination, client, job_config=None): @@ -1594,15 +1605,17 @@ def destination_encryption_configuration(self): def to_api_repr(self): """Generate a resource for :meth:`_begin`.""" - source_refs = [{ - 'projectId': table.project, - 'datasetId': table.dataset_id, - 'tableId': table.table_id, - } for table in self.sources] + source_refs = [ + { + 'projectId': table.project, + 'datasetId': table.dataset_id, + 'tableId': table.table_id, + } + for table in self.sources + ] configuration = self._configuration.to_api_repr() - _helpers._set_sub_prop( - configuration, ['copy', 'sourceTables'], source_refs) + _helpers._set_sub_prop(configuration, ['copy', 'sourceTables'], source_refs) _helpers._set_sub_prop( configuration, ['copy', 'destinationTable'], @@ -1610,7 +1623,8 @@ def to_api_repr(self): 'projectId': self.destination.project, 'datasetId': self.destination.dataset_id, 'tableId': self.destination.table_id, - }) + }, + ) return { 'jobReference': self._properties['jobReference'], @@ -1644,21 +1658,18 @@ def from_api_repr(cls, resource, client): config = CopyJobConfig.from_api_repr(config_resource) # Copy required fields to the job. copy_resource = config_resource['copy'] - destination = TableReference.from_api_repr( - copy_resource['destinationTable']) + destination = TableReference.from_api_repr(copy_resource['destinationTable']) sources = [] source_configs = copy_resource.get('sourceTables') if source_configs is None: single = copy_resource.get('sourceTable') if single is None: - raise KeyError( - "Resource missing 'sourceTables' / 'sourceTable'") + raise KeyError("Resource missing 'sourceTables' / 'sourceTable'") source_configs = [single] for source_config in source_configs: table_ref = TableReference.from_api_repr(source_config) sources.append(table_ref) - job = cls( - job_id, sources, destination, client=client, job_config=config) + job = cls(job_id, sources, destination, client=client, job_config=config) job._set_properties(resource) return job @@ -1750,10 +1761,10 @@ class ExtractJob(_AsyncJob): :param job_config: (Optional) Extra configuration options for the extract job. """ + _JOB_TYPE = 'extract' - def __init__( - self, job_id, source, destination_uris, client, job_config=None): + def __init__(self, job_id, source, destination_uris, client, job_config=None): super(ExtractJob, self).__init__(job_id, client) if job_config is None: @@ -1820,12 +1831,10 @@ def to_api_repr(self): } configuration = self._configuration.to_api_repr() + _helpers._set_sub_prop(configuration, ['extract', 'sourceTable'], source_ref) _helpers._set_sub_prop( - configuration, ['extract', 'sourceTable'], source_ref) - _helpers._set_sub_prop( - configuration, - ['extract', 'destinationUris'], - self.destination_uris) + configuration, ['extract', 'destinationUris'], self.destination_uris + ) return { 'jobReference': self._properties['jobReference'], @@ -1858,31 +1867,27 @@ def from_api_repr(cls, resource, client): job_id, config_resource = cls._get_resource_config(resource) config = ExtractJobConfig.from_api_repr(config_resource) source_config = _helpers._get_sub_prop( - config_resource, ['extract', 'sourceTable']) + config_resource, ['extract', 'sourceTable'] + ) dataset = DatasetReference( - source_config['projectId'], source_config['datasetId']) + source_config['projectId'], source_config['datasetId'] + ) source = dataset.table(source_config['tableId']) destination_uris = _helpers._get_sub_prop( - config_resource, ['extract', 'destinationUris']) + config_resource, ['extract', 'destinationUris'] + ) - job = cls( - job_id, source, destination_uris, client=client, job_config=config) + job = cls(job_id, source, destination_uris, client=client, job_config=config) job._set_properties(resource) return job def _from_api_repr_query_parameters(resource): - return [ - _query_param_from_api_repr(mapping) - for mapping in resource - ] + return [_query_param_from_api_repr(mapping) for mapping in resource] def _to_api_repr_query_parameters(value): - return [ - query_parameter.to_api_repr() - for query_parameter in value - ] + return [query_parameter.to_api_repr() for query_parameter in value] def _from_api_repr_udf_resources(resource): @@ -1894,10 +1899,7 @@ def _from_api_repr_udf_resources(resource): def _to_api_repr_udf_resources(value): - return [ - {udf_resource.udf_type: udf_resource.value} - for udf_resource in value - ] + return [{udf_resource.udf_type: udf_resource.value} for udf_resource in value] def _from_api_repr_table_defs(resource): @@ -2092,8 +2094,7 @@ def query_parameters(self): @query_parameters.setter def query_parameters(self, values): - self._set_sub_prop( - 'queryParameters', _to_api_repr_query_parameters(values)) + self._set_sub_prop('queryParameters', _to_api_repr_query_parameters(values)) @property def udf_resources(self): @@ -2109,8 +2110,8 @@ def udf_resources(self): @udf_resources.setter def udf_resources(self, values): self._set_sub_prop( - 'userDefinedFunctionResources', - _to_api_repr_udf_resources(values)) + 'userDefinedFunctionResources', _to_api_repr_udf_resources(values) + ) @property def use_legacy_sql(self): @@ -2167,8 +2168,7 @@ def table_definitions(self): @table_definitions.setter def table_definitions(self, values): - self._set_sub_prop( - 'tableDefinitions', _to_api_repr_table_defs(values)) + self._set_sub_prop('tableDefinitions', _to_api_repr_table_defs(values)) @property def time_partitioning(self): @@ -2264,6 +2264,7 @@ class QueryJob(_AsyncJob): :param job_config: (Optional) Extra configuration options for the query job. """ + _JOB_TYPE = 'query' _UDF_KEY = 'userDefinedFunctionResources' @@ -2638,8 +2639,7 @@ def undeclared_query_parameters(self): not yet completed. """ parameters = [] - undeclared = self._job_statistics().get( - 'undeclaredQueryParameters', ()) + undeclared = self._job_statistics().get('undeclaredQueryParameters', ()) for parameter in undeclared: p_type = parameter['parameterType'] @@ -2694,9 +2694,12 @@ def done(self, retry=DEFAULT_RETRY): # change once complete. if self.state != _DONE_STATE: self._query_results = self._client._get_query_results( - self.job_id, retry, - project=self.project, timeout_ms=timeout_ms, - location=self.location) + self.job_id, + retry, + project=self.project, + timeout_ms=timeout_ms, + location=self.location, + ) # Only reload the job once we know the query is complete. # This will ensure that fields such as the destination table are @@ -2738,8 +2741,8 @@ def result(self, timeout=None, retry=DEFAULT_RETRY): # Return an iterator instead of returning the job. if not self._query_results: self._query_results = self._client._get_query_results( - self.job_id, retry, project=self.project, - location=self.location) + self.job_id, retry, project=self.project, location=self.location + ) # If the query job is complete but there are no query results, this was # special job, such as a DDL query. Return an empty result set to @@ -2779,6 +2782,7 @@ class QueryPlanEntryStep(object): :type substeps: :param substeps: names of substeps """ + def __init__(self, kind, substeps): self.kind = kind self.substeps = list(substeps) @@ -2793,10 +2797,7 @@ def from_api_repr(cls, resource): :rtype: :class:`QueryPlanEntryStep` :return: new instance built from the resource """ - return cls( - kind=resource.get('kind'), - substeps=resource.get('substeps', ()), - ) + return cls(kind=resource.get('kind'), substeps=resource.get('substeps', ())) def __eq__(self, other): if not isinstance(other, self.__class__): @@ -2848,7 +2849,8 @@ def start(self): if self._properties.get('startMs') is None: return None return _helpers._datetime_from_microseconds( - int(self._properties.get('startMs')) * 1000.0) + int(self._properties.get('startMs')) * 1000.0 + ) @property def end(self): @@ -2856,15 +2858,18 @@ def end(self): if self._properties.get('endMs') is None: return None return _helpers._datetime_from_microseconds( - int(self._properties.get('endMs')) * 1000.0) + int(self._properties.get('endMs')) * 1000.0 + ) @property def input_stages(self): """List(int): Entry IDs for stages that were inputs for this stage.""" if self._properties.get('inputStages') is None: return [] - return [_helpers._int_or_none(entry) - for entry in self._properties.get('inputStages')] + return [ + _helpers._int_or_none(entry) + for entry in self._properties.get('inputStages') + ] @property def parallel_inputs(self): @@ -2876,8 +2881,7 @@ def parallel_inputs(self): @property def completed_parallel_inputs(self): """Union[int, None]: Number of parallel input segments completed.""" - return _helpers._int_or_none( - self._properties.get('completedParallelInputs')) + return _helpers._int_or_none(self._properties.get('completedParallelInputs')) @property def wait_ms_avg(self): @@ -3019,24 +3023,24 @@ def shuffle_output_bytes(self): """Union[int, None]: Number of bytes written by this stage to intermediate shuffle. """ - return _helpers._int_or_none( - self._properties.get('shuffleOutputBytes')) + return _helpers._int_or_none(self._properties.get('shuffleOutputBytes')) @property def shuffle_output_bytes_spilled(self): """Union[int, None]: Number of bytes written by this stage to intermediate shuffle and spilled to disk. """ - return _helpers._int_or_none( - self._properties.get('shuffleOutputBytesSpilled')) + return _helpers._int_or_none(self._properties.get('shuffleOutputBytesSpilled')) @property def steps(self): """List(QueryPlanEntryStep): List of step operations performed by each worker in the stage. """ - return [QueryPlanEntryStep.from_api_repr(step) - for step in self._properties.get('steps', [])] + return [ + QueryPlanEntryStep.from_api_repr(step) + for step in self._properties.get('steps', []) + ] class TimelineEntry(object): @@ -3114,8 +3118,7 @@ def from_api_repr(cls, resource, client): Returns: UnknownJob: Job corresponding to the resource. """ - job_ref_properties = resource.get( - 'jobReference', {'projectId': client.project}) + job_ref_properties = resource.get('jobReference', {'projectId': client.project}) job_ref = _JobReference._from_api_repr(job_ref_properties) job = cls(job_ref, client) # Populate the job reference with the project, even if it has been diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/query.py b/packages/google-cloud-bigquery/google/cloud/bigquery/query.py index a3991173f9df..fb22c680e2b1 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/query.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/query.py @@ -43,8 +43,8 @@ def __eq__(self, other): if not isinstance(other, UDFResource): return NotImplemented return( - self.udf_type == other.udf_type and - self.value == other.value) + self.udf_type == other.udf_type + and self.value == other.value) def __ne__(self, other): return not self == other diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py index d28310fead16..28036fba74ac 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py @@ -785,8 +785,7 @@ def from_api_repr(cls, resource): """ from google.cloud.bigquery import dataset - if ('tableReference' not in resource or - 'tableId' not in resource['tableReference']): + if ('tableReference' not in resource or 'tableId' not in resource['tableReference']): raise KeyError('Resource lacks required identity information:' '["tableReference"]["tableId"]') project_id = resource['tableReference']['projectId'] @@ -1121,8 +1120,8 @@ def __eq__(self, other): if not isinstance(other, Row): return NotImplemented return( - self._xxx_values == other._xxx_values and - self._xxx_field_to_index == other._xxx_field_to_index) + self._xxx_values == other._xxx_values + and self._xxx_field_to_index == other._xxx_field_to_index) def __ne__(self, other): return not self == other diff --git a/packages/google-cloud-bigquery/tests/system.py b/packages/google-cloud-bigquery/tests/system.py index 6ab64ec9a7a0..29d5fb908dea 100644 --- a/packages/google-cloud-bigquery/tests/system.py +++ b/packages/google-cloud-bigquery/tests/system.py @@ -244,8 +244,8 @@ def test_list_datasets(self): all_datasets = list(iterator) self.assertIsNone(iterator.next_page_token) created = [dataset for dataset in all_datasets - if dataset.dataset_id in datasets_to_create and - dataset.project == Config.CLIENT.project] + if dataset.dataset_id in datasets_to_create + and dataset.project == Config.CLIENT.project] self.assertEqual(len(created), len(datasets_to_create)) def test_list_datasets_w_project(self): @@ -376,8 +376,8 @@ def test_list_tables(self): all_tables = list(iterator) self.assertIsNone(iterator.next_page_token) created = [table for table in all_tables - if (table.table_id in tables_to_create and - table.dataset_id == dataset_id)] + if (table.table_id in tables_to_create + and table.dataset_id == dataset_id)] self.assertEqual(len(created), len(tables_to_create)) # List tables with a string ID. @@ -468,7 +468,7 @@ def _create_table_many_columns(self, rowcount): table_ref = dataset.table(table_id) self.to_delete.insert(0, table_ref) colprojections = ','.join( - ['r * {} as col_{}'.format(n, n) for n in range(1, 10000)]) + ['r * {} as col_{}'.format(n, n) for n in range(1, 10000)]) sql = """ CREATE TABLE {}.{} AS @@ -739,9 +739,9 @@ def test_load_table_from_file_w_explicit_location(self): # Cannot query from US. with self.assertRaises(BadRequest): list(client.query( - query_string, - location='US', - job_config=query_config)) + query_string, + location='US', + job_config=query_config)) # Cannot copy from US. with self.assertRaises(BadRequest): diff --git a/packages/google-cloud-bigquery/tests/unit/test_client.py b/packages/google-cloud-bigquery/tests/unit/test_client.py index 4a4e3b7f24b4..69535f62a00e 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_client.py +++ b/packages/google-cloud-bigquery/tests/unit/test_client.py @@ -1224,7 +1224,7 @@ def test_update_dataset(self): LOCATION = 'loc' LABELS = {'priority': 'high'} ACCESS = [ - {'role': 'OWNER', 'userByEmail': 'phred@example.com'}, + {'role': 'OWNER', 'userByEmail': 'phred@example.com'}, ] EXP = 17 RESOURCE = { @@ -1465,20 +1465,20 @@ def test_update_table_w_query(self): location = 'EU' exp_time = datetime.datetime(2015, 8, 1, 23, 59, 59, tzinfo=UTC) schema_resource = { - 'fields': [ - { - 'name': 'full_name', - 'type': 'STRING', - 'mode': 'REQUIRED', - 'description': None - }, - { - 'name': 'age', - 'type': 'INTEGER', - 'mode': 'REQUIRED', - 'description': None - }, - ], + 'fields': [ + { + 'name': 'full_name', + 'type': 'STRING', + 'mode': 'REQUIRED', + 'description': None + }, + { + 'name': 'age', + 'type': 'INTEGER', + 'mode': 'REQUIRED', + 'description': None + }, + ], } schema = [ SchemaField('full_name', 'STRING', mode='REQUIRED'), @@ -2415,9 +2415,9 @@ def _initiate_resumable_upload_helper(self, num_retries=None): # Check the returned values. self.assertIsInstance(upload, ResumableUpload) upload_url = ( - 'https://www.googleapis.com/upload/bigquery/v2/projects/' + - self.PROJECT + - '/jobs?uploadType=resumable') + 'https://www.googleapis.com/upload/bigquery/v2/projects/' + + self.PROJECT + + '/jobs?uploadType=resumable') self.assertEqual(upload.upload_url, upload_url) expected_headers = _get_upload_headers(conn.USER_AGENT) self.assertEqual(upload._headers, expected_headers) @@ -2484,17 +2484,17 @@ def _do_multipart_upload_success_helper( get_boundary.assert_called_once_with() upload_url = ( - 'https://www.googleapis.com/upload/bigquery/v2/projects/' + - self.PROJECT + - '/jobs?uploadType=multipart') + 'https://www.googleapis.com/upload/bigquery/v2/projects/' + + self.PROJECT + + '/jobs?uploadType=multipart') payload = ( - b'--==0==\r\n' + - b'content-type: application/json; charset=UTF-8\r\n\r\n' + - json.dumps(metadata).encode('utf-8') + b'\r\n' + - b'--==0==\r\n' + - b'content-type: */*\r\n\r\n' + - data + b'\r\n' + - b'--==0==--') + b'--==0==\r\n' + + b'content-type: application/json; charset=UTF-8\r\n\r\n' + + json.dumps(metadata).encode('utf-8') + b'\r\n' + + b'--==0==\r\n' + + b'content-type: */*\r\n\r\n' + + data + b'\r\n' + + b'--==0==--') headers = _get_upload_headers(conn.USER_AGENT) headers['content-type'] = b'multipart/related; boundary="==0=="' fake_transport.request.assert_called_once_with( @@ -3954,7 +3954,7 @@ def test_list_rows_query_params(self): ({'start_index': 1, 'max_results': 2}, {'startIndex': 1, 'maxResults': 2}), ] - conn = client._connection = _make_connection(*len(tests)*[{}]) + conn = client._connection = _make_connection(*len(tests) * [{}]) for i, test in enumerate(tests): iterator = client.list_rows(table, **test[0]) six.next(iterator.pages) @@ -4636,5 +4636,5 @@ def test__do_multipart_upload_wrong_size(self): client._do_multipart_upload( file_obj, {}, - file_obj_len+1, + file_obj_len + 1, None) diff --git a/packages/google-cloud-bigquery/tests/unit/test_job.py b/packages/google-cloud-bigquery/tests/unit/test_job.py index edddebc05e31..998a397e0bed 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_job.py +++ b/packages/google-cloud-bigquery/tests/unit/test_job.py @@ -1514,7 +1514,7 @@ def test_schema_hit(self): 'mode': 'REQUIRED', 'name': 'foo', 'type': 'INTEGER', - 'description': 'Foo', + 'description': 'Foo', } minimal_repr = { 'name': 'bar', @@ -3791,7 +3791,7 @@ def test_query_plan(self): 'id': '1234', 'inputStages': ['88', '101'], 'startMs': '1522540800000', - 'endMs': '1522540804000', + 'endMs': '1522540804000', 'parallelInputs': '1000', 'completedParallelInputs': '5', 'waitMsAvg': '33', @@ -3838,8 +3838,8 @@ def test_query_plan(self): self.assertEqual(found.name, expected['name']) self.assertEqual(found.entry_id, expected['id']) self.assertEqual( - len(found.input_stages), - len(expected['inputStages'])) + len(found.input_stages), + len(expected['inputStages'])) for f_id in found.input_stages: self.assertIn(f_id, [int(e) for e in expected['inputStages']]) self.assertEqual( @@ -3849,11 +3849,11 @@ def test_query_plan(self): found.end.strftime(_RFC3339_MICROS), '2018-04-01T00:00:04.000000Z') self.assertEqual( - found.parallel_inputs, - int(expected['parallelInputs'])) + found.parallel_inputs, + int(expected['parallelInputs'])) self.assertEqual( - found.completed_parallel_inputs, - int(expected['completedParallelInputs'])) + found.completed_parallel_inputs, + int(expected['completedParallelInputs'])) self.assertEqual(found.wait_ms_avg, int(expected['waitMsAvg'])) self.assertEqual(found.wait_ms_max, int(expected['waitMsMax'])) self.assertEqual(found.wait_ratio_avg, expected['waitRatioAvg']) @@ -3882,11 +3882,11 @@ def test_query_plan(self): found.records_written, int(expected['recordsWritten'])) self.assertEqual(found.status, expected['status']) self.assertEqual( - found.shuffle_output_bytes, - int(expected['shuffleOutputBytes'])) + found.shuffle_output_bytes, + int(expected['shuffleOutputBytes'])) self.assertEqual( - found.shuffle_output_bytes_spilled, - int(expected['shuffleOutputBytesSpilled'])) + found.shuffle_output_bytes_spilled, + int(expected['shuffleOutputBytesSpilled'])) self.assertEqual(len(found.steps), len(expected['steps'])) for f_step, e_step in zip(found.steps, expected['steps']): From 7f2457f9df7d78317e84ad40ac5608b6712d2074 Mon Sep 17 00:00:00 2001 From: Christopher Wilcox Date: Fri, 30 Nov 2018 15:15:57 -0800 Subject: [PATCH 0519/2016] Blacken libraries (#6794) --- .../google-cloud-bigquery/docs/snippets.py | 1411 +++--- .../google-cloud-bigquery/google/__init__.py | 2 + .../google/cloud/__init__.py | 2 + .../google/cloud/bigquery/__init__.py | 88 +- .../google/cloud/bigquery/_helpers.py | 119 +- .../google/cloud/bigquery/_http.py | 10 +- .../google/cloud/bigquery/client.py | 516 +- .../google/cloud/bigquery/dataset.py | 180 +- .../google/cloud/bigquery/dbapi/__init__.py | 38 +- .../google/cloud/bigquery/dbapi/_helpers.py | 28 +- .../google/cloud/bigquery/dbapi/connection.py | 1 + .../google/cloud/bigquery/dbapi/cursor.py | 62 +- .../google/cloud/bigquery/dbapi/types.py | 13 +- .../google/cloud/bigquery/external_config.py | 143 +- .../google/cloud/bigquery/job.py | 650 +-- .../google/cloud/bigquery/magics.py | 81 +- .../google/cloud/bigquery/query.py | 187 +- .../google/cloud/bigquery/retry.py | 14 +- .../google/cloud/bigquery/schema.py | 45 +- .../google/cloud/bigquery/table.py | 414 +- packages/google-cloud-bigquery/noxfile.py | 31 +- .../tests/scrub_datasets.py | 5 +- .../google-cloud-bigquery/tests/system.py | 1295 +++--- .../tests/unit/test__helpers.py | 617 ++- .../tests/unit/test__http.py | 39 +- .../tests/unit/test_client.py | 4141 ++++++++--------- .../tests/unit/test_dataset.py | 400 +- .../tests/unit/test_dbapi__helpers.py | 75 +- .../tests/unit/test_dbapi_connection.py | 9 +- .../tests/unit/test_dbapi_cursor.py | 164 +- .../tests/unit/test_dbapi_types.py | 18 +- .../tests/unit/test_external_config.py | 322 +- .../tests/unit/test_job.py | 3472 +++++++------- .../tests/unit/test_magics.py | 248 +- .../tests/unit/test_query.py | 979 ++-- .../tests/unit/test_retry.py | 25 +- .../tests/unit/test_schema.py | 407 +- .../tests/unit/test_table.py | 942 ++-- 38 files changed, 8357 insertions(+), 8836 deletions(-) diff --git a/packages/google-cloud-bigquery/docs/snippets.py b/packages/google-cloud-bigquery/docs/snippets.py index 9e8ba524a115..a60f587d03cf 100644 --- a/packages/google-cloud-bigquery/docs/snippets.py +++ b/packages/google-cloud-bigquery/docs/snippets.py @@ -29,6 +29,7 @@ import mock import pytest import six + try: import pandas except (ImportError, AttributeError): @@ -46,36 +47,38 @@ from google.cloud import storage from test_utils.retry import RetryErrors -ORIGINAL_FRIENDLY_NAME = 'Original friendly name' -ORIGINAL_DESCRIPTION = 'Original description' -LOCALLY_CHANGED_FRIENDLY_NAME = 'Locally-changed friendly name' -LOCALLY_CHANGED_DESCRIPTION = 'Locally-changed description' -UPDATED_FRIENDLY_NAME = 'Updated friendly name' -UPDATED_DESCRIPTION = 'Updated description' +ORIGINAL_FRIENDLY_NAME = "Original friendly name" +ORIGINAL_DESCRIPTION = "Original description" +LOCALLY_CHANGED_FRIENDLY_NAME = "Locally-changed friendly name" +LOCALLY_CHANGED_DESCRIPTION = "Locally-changed description" +UPDATED_FRIENDLY_NAME = "Updated friendly name" +UPDATED_DESCRIPTION = "Updated description" SCHEMA = [ - bigquery.SchemaField('full_name', 'STRING', mode='REQUIRED'), - bigquery.SchemaField('age', 'INTEGER', mode='REQUIRED'), + bigquery.SchemaField("full_name", "STRING", mode="REQUIRED"), + bigquery.SchemaField("age", "INTEGER", mode="REQUIRED"), ] ROWS = [ - ('Phred Phlyntstone', 32), - ('Bharney Rhubble', 33), - ('Wylma Phlyntstone', 29), - ('Bhettye Rhubble', 27), + ("Phred Phlyntstone", 32), + ("Bharney Rhubble", 33), + ("Wylma Phlyntstone", 29), + ("Bhettye Rhubble", 27), ] QUERY = ( - 'SELECT name FROM `bigquery-public-data.usa_names.usa_1910_2013` ' - 'WHERE state = "TX"') + "SELECT name FROM `bigquery-public-data.usa_names.usa_1910_2013` " + 'WHERE state = "TX"' +) retry_429 = RetryErrors(TooManyRequests) retry_storage_errors = RetryErrors( - (TooManyRequests, InternalServerError, ServiceUnavailable)) + (TooManyRequests, InternalServerError, ServiceUnavailable) +) -@pytest.fixture(scope='module') +@pytest.fixture(scope="module") def client(): return bigquery.Client() @@ -98,7 +101,6 @@ def _millis(): class _CloseOnDelete(object): - def __init__(self, wrapped): self._wrapped = wrapped @@ -122,19 +124,20 @@ def test_create_client_default_credentials(): def test_create_client_json_credentials(): """Create a BigQuery client with Application Default Credentials""" - with open(os.environ['GOOGLE_APPLICATION_CREDENTIALS']) as creds_file: + with open(os.environ["GOOGLE_APPLICATION_CREDENTIALS"]) as creds_file: creds_file_data = creds_file.read() open_mock = mock.mock_open(read_data=creds_file_data) - with mock.patch('io.open', open_mock): + with mock.patch("io.open", open_mock): # [START bigquery_client_json_credentials] from google.cloud import bigquery # Explicitly use service account credentials by specifying the private # key file. All clients in google-cloud-python have this helper. client = bigquery.Client.from_service_account_json( - 'path/to/service_account.json') + "path/to/service_account.json" + ) # [END bigquery_client_json_credentials] assert client is not None @@ -150,18 +153,18 @@ def test_list_datasets(client): project = client.project if datasets: - print('Datasets in project {}:'.format(project)) + print("Datasets in project {}:".format(project)) for dataset in datasets: # API request(s) - print('\t{}'.format(dataset.dataset_id)) + print("\t{}".format(dataset.dataset_id)) else: - print('{} project does not contain any datasets.'.format(project)) + print("{} project does not contain any datasets.".format(project)) # [END bigquery_list_datasets] def test_list_datasets_by_label(client, to_delete): - dataset_id = 'list_datasets_by_label_{}'.format(_millis()) + dataset_id = "list_datasets_by_label_{}".format(_millis()) dataset = bigquery.Dataset(client.dataset(dataset_id)) - dataset.labels = {'color': 'green'} + dataset.labels = {"color": "green"} dataset = client.create_dataset(dataset) # API request to_delete.append(dataset) @@ -171,15 +174,15 @@ def test_list_datasets_by_label(client, to_delete): # The following label filter example will find datasets with an # arbitrary 'color' label set to 'green' - label_filter = 'labels.color:green' + label_filter = "labels.color:green" datasets = list(client.list_datasets(filter=label_filter)) if datasets: - print('Datasets filtered by {}:'.format(label_filter)) + print("Datasets filtered by {}:".format(label_filter)) for dataset in datasets: # API request(s) - print('\t{}'.format(dataset.dataset_id)) + print("\t{}".format(dataset.dataset_id)) else: - print('No datasets found with this filter.') + print("No datasets found with this filter.") # [END bigquery_list_datasets_by_label] found = set([dataset.dataset_id for dataset in datasets]) assert dataset_id in found @@ -187,7 +190,7 @@ def test_list_datasets_by_label(client, to_delete): def test_create_dataset(client, to_delete): """Create a dataset.""" - dataset_id = 'create_dataset_{}'.format(_millis()) + dataset_id = "create_dataset_{}".format(_millis()) # [START bigquery_create_dataset] # from google.cloud import bigquery @@ -201,7 +204,7 @@ def test_create_dataset(client, to_delete): # Construct a full Dataset object to send to the API. dataset = bigquery.Dataset(dataset_ref) # Specify the geographic location where the dataset should reside. - dataset.location = 'US' + dataset.location = "US" # Send the dataset to the API for creation. # Raises google.api_core.exceptions.AlreadyExists if the Dataset already @@ -214,8 +217,8 @@ def test_create_dataset(client, to_delete): def test_get_dataset_information(client, to_delete): """View information about a dataset.""" - dataset_id = 'get_dataset_{}'.format(_millis()) - dataset_labels = {'color': 'green'} + dataset_id = "get_dataset_{}".format(_millis()) + dataset_labels = {"color": "green"} dataset_ref = client.dataset(dataset_id) dataset = bigquery.Dataset(dataset_ref) dataset.description = ORIGINAL_DESCRIPTION @@ -232,24 +235,24 @@ def test_get_dataset_information(client, to_delete): dataset = client.get_dataset(dataset_ref) # API request # View dataset properties - print('Dataset ID: {}'.format(dataset_id)) - print('Description: {}'.format(dataset.description)) - print('Labels:') + print("Dataset ID: {}".format(dataset_id)) + print("Description: {}".format(dataset.description)) + print("Labels:") labels = dataset.labels if labels: for label, value in labels.items(): - print('\t{}: {}'.format(label, value)) + print("\t{}: {}".format(label, value)) else: print("\tDataset has no labels defined.") # View tables in dataset - print('Tables:') + print("Tables:") tables = list(client.list_tables(dataset_ref)) # API request(s) if tables: for table in tables: - print('\t{}'.format(table.table_id)) + print("\t{}".format(table.table_id)) else: - print('\tThis dataset does not contain any tables.') + print("\tThis dataset does not contain any tables.") # [END bigquery_get_dataset] assert dataset.description == ORIGINAL_DESCRIPTION @@ -277,29 +280,34 @@ def dataset_exists(client, dataset_reference): return True except NotFound: return False + + # [END bigquery_dataset_exists] def test_dataset_exists(client, to_delete): """Determine if a dataset exists.""" - DATASET_ID = 'get_table_dataset_{}'.format(_millis()) + DATASET_ID = "get_table_dataset_{}".format(_millis()) dataset_ref = client.dataset(DATASET_ID) dataset = bigquery.Dataset(dataset_ref) dataset = client.create_dataset(dataset) to_delete.append(dataset) assert dataset_exists(client, dataset_ref) - assert not dataset_exists(client, client.dataset('i_dont_exist')) + assert not dataset_exists(client, client.dataset("i_dont_exist")) -@pytest.mark.skip(reason=( - 'update_dataset() is flaky ' - 'https://github.com/GoogleCloudPlatform/google-cloud-python/issues/5588')) +@pytest.mark.skip( + reason=( + "update_dataset() is flaky " + "https://github.com/GoogleCloudPlatform/google-cloud-python/issues/5588" + ) +) def test_update_dataset_description(client, to_delete): """Update a dataset's description.""" - dataset_id = 'update_dataset_description_{}'.format(_millis()) + dataset_id = "update_dataset_description_{}".format(_millis()) dataset = bigquery.Dataset(client.dataset(dataset_id)) - dataset.description = 'Original description.' + dataset.description = "Original description." client.create_dataset(dataset) to_delete.append(dataset) @@ -309,21 +317,24 @@ def test_update_dataset_description(client, to_delete): # dataset_ref = client.dataset('my_dataset') # dataset = client.get_dataset(dataset_ref) # API request - assert dataset.description == 'Original description.' - dataset.description = 'Updated description.' + assert dataset.description == "Original description." + dataset.description = "Updated description." - dataset = client.update_dataset(dataset, ['description']) # API request + dataset = client.update_dataset(dataset, ["description"]) # API request - assert dataset.description == 'Updated description.' + assert dataset.description == "Updated description." # [END bigquery_update_dataset_description] -@pytest.mark.skip(reason=( - 'update_dataset() is flaky ' - 'https://github.com/GoogleCloudPlatform/google-cloud-python/issues/5588')) +@pytest.mark.skip( + reason=( + "update_dataset() is flaky " + "https://github.com/GoogleCloudPlatform/google-cloud-python/issues/5588" + ) +) def test_update_dataset_default_table_expiration(client, to_delete): """Update a dataset's default table expiration.""" - dataset_id = 'update_dataset_default_expiration_{}'.format(_millis()) + dataset_id = "update_dataset_default_expiration_{}".format(_millis()) dataset = bigquery.Dataset(client.dataset(dataset_id)) dataset = client.create_dataset(dataset) to_delete.append(dataset) @@ -339,17 +350,21 @@ def test_update_dataset_default_table_expiration(client, to_delete): dataset.default_table_expiration_ms = one_day_ms dataset = client.update_dataset( - dataset, ['default_table_expiration_ms']) # API request + dataset, ["default_table_expiration_ms"] + ) # API request assert dataset.default_table_expiration_ms == one_day_ms # [END bigquery_update_dataset_expiration] -@pytest.mark.skip(reason=( - 'update_dataset() is flaky ' - 'https://github.com/GoogleCloudPlatform/google-cloud-python/issues/5588')) +@pytest.mark.skip( + reason=( + "update_dataset() is flaky " + "https://github.com/GoogleCloudPlatform/google-cloud-python/issues/5588" + ) +) def test_manage_dataset_labels(client, to_delete): - dataset_id = 'label_dataset_{}'.format(_millis()) + dataset_id = "label_dataset_{}".format(_millis()) dataset = bigquery.Dataset(client.dataset(dataset_id)) dataset = client.create_dataset(dataset) to_delete.append(dataset) @@ -361,10 +376,10 @@ def test_manage_dataset_labels(client, to_delete): # dataset = client.get_dataset(dataset_ref) # API request assert dataset.labels == {} - labels = {'color': 'green'} + labels = {"color": "green"} dataset.labels = labels - dataset = client.update_dataset(dataset, ['labels']) # API request + dataset = client.update_dataset(dataset, ["labels"]) # API request assert dataset.labels == labels # [END bigquery_label_dataset] @@ -378,11 +393,11 @@ def test_manage_dataset_labels(client, to_delete): dataset = client.get_dataset(dataset_ref) # API request # View dataset labels - print('Dataset ID: {}'.format(dataset_id)) - print('Labels:') + print("Dataset ID: {}".format(dataset_id)) + print("Labels:") if dataset.labels: for label, value in dataset.labels.items(): - print('\t{}: {}'.format(label, value)) + print("\t{}: {}".format(label, value)) else: print("\tDataset has no labels defined.") # [END bigquery_get_dataset_labels] @@ -395,22 +410,25 @@ def test_manage_dataset_labels(client, to_delete): # dataset = client.get_dataset(dataset_ref) # API request # This example dataset starts with one label - assert dataset.labels == {'color': 'green'} + assert dataset.labels == {"color": "green"} # To delete a label from a dataset, set its value to None - dataset.labels['color'] = None + dataset.labels["color"] = None - dataset = client.update_dataset(dataset, ['labels']) # API request + dataset = client.update_dataset(dataset, ["labels"]) # API request assert dataset.labels == {} # [END bigquery_delete_label_dataset] -@pytest.mark.skip(reason=( - 'update_dataset() is flaky ' - 'https://github.com/GoogleCloudPlatform/google-cloud-python/issues/5588')) +@pytest.mark.skip( + reason=( + "update_dataset() is flaky " + "https://github.com/GoogleCloudPlatform/google-cloud-python/issues/5588" + ) +) def test_update_dataset_access(client, to_delete): """Update a dataset's access controls.""" - dataset_id = 'update_dataset_access_{}'.format(_millis()) + dataset_id = "update_dataset_access_{}".format(_millis()) dataset = bigquery.Dataset(client.dataset(dataset_id)) dataset = client.create_dataset(dataset) to_delete.append(dataset) @@ -421,15 +439,16 @@ def test_update_dataset_access(client, to_delete): # dataset = client.get_dataset(client.dataset('my_dataset')) entry = bigquery.AccessEntry( - role='READER', - entity_type='userByEmail', - entity_id='sample.bigquery.dev@gmail.com') + role="READER", + entity_type="userByEmail", + entity_id="sample.bigquery.dev@gmail.com", + ) assert entry not in dataset.access_entries entries = list(dataset.access_entries) entries.append(entry) dataset.access_entries = entries - dataset = client.update_dataset(dataset, ['access_entries']) # API request + dataset = client.update_dataset(dataset, ["access_entries"]) # API request assert entry in dataset.access_entries # [END bigquery_update_dataset_access] @@ -439,15 +458,15 @@ def test_delete_dataset(client): """Delete a dataset.""" from google.cloud.exceptions import NotFound - dataset1_id = 'delete_dataset_{}'.format(_millis()) + dataset1_id = "delete_dataset_{}".format(_millis()) dataset1 = bigquery.Dataset(client.dataset(dataset1_id)) client.create_dataset(dataset1) - dataset2_id = 'delete_dataset_with_tables{}'.format(_millis()) + dataset2_id = "delete_dataset_with_tables{}".format(_millis()) dataset2 = bigquery.Dataset(client.dataset(dataset2_id)) client.create_dataset(dataset2) - table = bigquery.Table(dataset2.table('new_table')) + table = bigquery.Table(dataset2.table("new_table")) client.create_table(table) # [START bigquery_delete_dataset] @@ -459,14 +478,14 @@ def test_delete_dataset(client): dataset1_ref = client.dataset(dataset1_id) client.delete_dataset(dataset1_ref) # API request - print('Dataset {} deleted.'.format(dataset1_id)) + print("Dataset {} deleted.".format(dataset1_id)) # Use the delete_contents parameter to delete a dataset and its contents # dataset2_id = 'my_dataset_with_tables' dataset2_ref = client.dataset(dataset2_id) client.delete_dataset(dataset2_ref, delete_contents=True) # API request - print('Dataset {} deleted.'.format(dataset2_id)) + print("Dataset {} deleted.".format(dataset2_id)) # [END bigquery_delete_dataset] for dataset in [dataset1, dataset2]: @@ -476,7 +495,7 @@ def test_delete_dataset(client): def test_list_tables(client, to_delete): """List tables within a dataset.""" - dataset_id = 'list_tables_dataset_{}'.format(_millis()) + dataset_id = "list_tables_dataset_{}".format(_millis()) dataset_ref = client.dataset(dataset_id) dataset = client.create_dataset(bigquery.Dataset(dataset_ref)) to_delete.append(dataset) @@ -489,19 +508,19 @@ def test_list_tables(client, to_delete): tables = list(client.list_tables(dataset_ref)) # API request(s) assert len(tables) == 0 - table_ref = dataset.table('my_table') + table_ref = dataset.table("my_table") table = bigquery.Table(table_ref) - client.create_table(table) # API request + client.create_table(table) # API request tables = list(client.list_tables(dataset)) # API request(s) assert len(tables) == 1 - assert tables[0].table_id == 'my_table' + assert tables[0].table_id == "my_table" # [END bigquery_list_tables] def test_create_table(client, to_delete): """Create a table.""" - dataset_id = 'create_table_dataset_{}'.format(_millis()) + dataset_id = "create_table_dataset_{}".format(_millis()) dataset_ref = client.dataset(dataset_id) dataset = bigquery.Dataset(dataset_ref) client.create_dataset(dataset) @@ -513,19 +532,19 @@ def test_create_table(client, to_delete): # dataset_ref = client.dataset('my_dataset') schema = [ - bigquery.SchemaField('full_name', 'STRING', mode='REQUIRED'), - bigquery.SchemaField('age', 'INTEGER', mode='REQUIRED'), + bigquery.SchemaField("full_name", "STRING", mode="REQUIRED"), + bigquery.SchemaField("age", "INTEGER", mode="REQUIRED"), ] - table_ref = dataset_ref.table('my_table') + table_ref = dataset_ref.table("my_table") table = bigquery.Table(table_ref, schema=schema) table = client.create_table(table) # API request - assert table.table_id == 'my_table' + assert table.table_id == "my_table" # [END bigquery_create_table] def test_create_table_nested_repeated_schema(client, to_delete): - dataset_id = 'create_table_nested_repeated_{}'.format(_millis()) + dataset_id = "create_table_nested_repeated_{}".format(_millis()) dataset_ref = client.dataset(dataset_id) dataset = bigquery.Dataset(dataset_ref) client.create_dataset(dataset) @@ -537,29 +556,34 @@ def test_create_table_nested_repeated_schema(client, to_delete): # dataset_ref = client.dataset('my_dataset') schema = [ - bigquery.SchemaField('id', 'STRING', mode='NULLABLE'), - bigquery.SchemaField('first_name', 'STRING', mode='NULLABLE'), - bigquery.SchemaField('last_name', 'STRING', mode='NULLABLE'), - bigquery.SchemaField('dob', 'DATE', mode='NULLABLE'), - bigquery.SchemaField('addresses', 'RECORD', mode='REPEATED', fields=[ - bigquery.SchemaField('status', 'STRING', mode='NULLABLE'), - bigquery.SchemaField('address', 'STRING', mode='NULLABLE'), - bigquery.SchemaField('city', 'STRING', mode='NULLABLE'), - bigquery.SchemaField('state', 'STRING', mode='NULLABLE'), - bigquery.SchemaField('zip', 'STRING', mode='NULLABLE'), - bigquery.SchemaField('numberOfYears', 'STRING', mode='NULLABLE'), - ]), + bigquery.SchemaField("id", "STRING", mode="NULLABLE"), + bigquery.SchemaField("first_name", "STRING", mode="NULLABLE"), + bigquery.SchemaField("last_name", "STRING", mode="NULLABLE"), + bigquery.SchemaField("dob", "DATE", mode="NULLABLE"), + bigquery.SchemaField( + "addresses", + "RECORD", + mode="REPEATED", + fields=[ + bigquery.SchemaField("status", "STRING", mode="NULLABLE"), + bigquery.SchemaField("address", "STRING", mode="NULLABLE"), + bigquery.SchemaField("city", "STRING", mode="NULLABLE"), + bigquery.SchemaField("state", "STRING", mode="NULLABLE"), + bigquery.SchemaField("zip", "STRING", mode="NULLABLE"), + bigquery.SchemaField("numberOfYears", "STRING", mode="NULLABLE"), + ], + ), ] - table_ref = dataset_ref.table('my_table') + table_ref = dataset_ref.table("my_table") table = bigquery.Table(table_ref, schema=schema) table = client.create_table(table) # API request - print('Created table {}'.format(table.full_table_id)) + print("Created table {}".format(table.full_table_id)) # [END bigquery_nested_repeated_schema] def test_create_table_cmek(client, to_delete): - dataset_id = 'create_table_cmek_{}'.format(_millis()) + dataset_id = "create_table_cmek_{}".format(_millis()) dataset = bigquery.Dataset(client.dataset(dataset_id)) client.create_dataset(dataset) to_delete.append(dataset) @@ -569,15 +593,17 @@ def test_create_table_cmek(client, to_delete): # client = bigquery.Client() # dataset_id = 'my_dataset' - table_ref = client.dataset(dataset_id).table('my_table') + table_ref = client.dataset(dataset_id).table("my_table") table = bigquery.Table(table_ref) # Set the encryption key to use for the table. # TODO: Replace this key with a key you have created in Cloud KMS. - kms_key_name = 'projects/{}/locations/{}/keyRings/{}/cryptoKeys/{}'.format( - 'cloud-samples-tests', 'us-central1', 'test', 'test') + kms_key_name = "projects/{}/locations/{}/keyRings/{}/cryptoKeys/{}".format( + "cloud-samples-tests", "us-central1", "test", "test" + ) table.encryption_configuration = bigquery.EncryptionConfiguration( - kms_key_name=kms_key_name) + kms_key_name=kms_key_name + ) table = client.create_table(table) # API request @@ -586,7 +612,7 @@ def test_create_table_cmek(client, to_delete): def test_create_partitioned_table(client, to_delete): - dataset_id = 'create_table_partitioned_{}'.format(_millis()) + dataset_id = "create_table_partitioned_{}".format(_millis()) dataset_ref = bigquery.Dataset(client.dataset(dataset_id)) dataset = client.create_dataset(dataset_ref) to_delete.append(dataset) @@ -596,31 +622,35 @@ def test_create_partitioned_table(client, to_delete): # client = bigquery.Client() # dataset_ref = client.dataset('my_dataset') - table_ref = dataset_ref.table('my_partitioned_table') + table_ref = dataset_ref.table("my_partitioned_table") schema = [ - bigquery.SchemaField('name', 'STRING'), - bigquery.SchemaField('post_abbr', 'STRING'), - bigquery.SchemaField('date', 'DATE') + bigquery.SchemaField("name", "STRING"), + bigquery.SchemaField("post_abbr", "STRING"), + bigquery.SchemaField("date", "DATE"), ] table = bigquery.Table(table_ref, schema=schema) table.time_partitioning = bigquery.TimePartitioning( type_=bigquery.TimePartitioningType.DAY, - field='date', # name of column to use for partitioning - expiration_ms=7776000000) # 90 days + field="date", # name of column to use for partitioning + expiration_ms=7776000000, + ) # 90 days table = client.create_table(table) - print('Created table {}, partitioned on column {}'.format( - table.table_id, table.time_partitioning.field)) + print( + "Created table {}, partitioned on column {}".format( + table.table_id, table.time_partitioning.field + ) + ) # [END bigquery_create_table_partitioned] - assert table.time_partitioning.type_ == 'DAY' - assert table.time_partitioning.field == 'date' + assert table.time_partitioning.type_ == "DAY" + assert table.time_partitioning.field == "date" assert table.time_partitioning.expiration_ms == 7776000000 def test_load_and_query_partitioned_table(client, to_delete): - dataset_id = 'load_partitioned_table_dataset_{}'.format(_millis()) + dataset_id = "load_partitioned_table_dataset_{}".format(_millis()) dataset = bigquery.Dataset(client.dataset(dataset_id)) client.create_dataset(dataset) to_delete.append(dataset) @@ -629,28 +659,28 @@ def test_load_and_query_partitioned_table(client, to_delete): # from google.cloud import bigquery # client = bigquery.Client() # dataset_id = 'my_dataset' - table_id = 'us_states_by_date' + table_id = "us_states_by_date" dataset_ref = client.dataset(dataset_id) job_config = bigquery.LoadJobConfig() job_config.schema = [ - bigquery.SchemaField('name', 'STRING'), - bigquery.SchemaField('post_abbr', 'STRING'), - bigquery.SchemaField('date', 'DATE') + bigquery.SchemaField("name", "STRING"), + bigquery.SchemaField("post_abbr", "STRING"), + bigquery.SchemaField("date", "DATE"), ] job_config.skip_leading_rows = 1 job_config.time_partitioning = bigquery.TimePartitioning( type_=bigquery.TimePartitioningType.DAY, - field='date', # name of column to use for partitioning - expiration_ms=7776000000) # 90 days - uri = 'gs://cloud-samples-data/bigquery/us-states/us-states-by-date.csv' + field="date", # name of column to use for partitioning + expiration_ms=7776000000, + ) # 90 days + uri = "gs://cloud-samples-data/bigquery/us-states/us-states-by-date.csv" load_job = client.load_table_from_uri( - uri, - dataset_ref.table(table_id), - job_config=job_config) # API request + uri, dataset_ref.table(table_id), job_config=job_config + ) # API request - assert load_job.job_type == 'load' + assert load_job.job_type == "load" load_job.result() # Waits for table load to complete. @@ -663,11 +693,12 @@ def test_load_and_query_partitioned_table(client, to_delete): # [START bigquery_query_partitioned_table] import datetime + # from google.cloud import bigquery # client = bigquery.Client() # project_id = 'my-project' # dataset_id = 'my_dataset' - table_id = 'us_states_by_date' + table_id = "us_states_by_date" sql_template = """ SELECT * @@ -677,23 +708,16 @@ def test_load_and_query_partitioned_table(client, to_delete): sql = sql_template.format(project_id, dataset_id, table_id) job_config = bigquery.QueryJobConfig() job_config.query_parameters = [ - bigquery.ScalarQueryParameter( - 'start_date', - 'DATE', - datetime.date(1800, 1, 1) - ), - bigquery.ScalarQueryParameter( - 'end_date', - 'DATE', - datetime.date(1899, 12, 31) - ) + bigquery.ScalarQueryParameter("start_date", "DATE", datetime.date(1800, 1, 1)), + bigquery.ScalarQueryParameter("end_date", "DATE", datetime.date(1899, 12, 31)), ] query_job = client.query( sql, # Location must match that of the dataset(s) referenced in the query. - location='US', - job_config=job_config) # API request + location="US", + job_config=job_config, + ) # API request rows = list(query_job) print("{} states were admitted to the US in the 1800s".format(len(rows))) @@ -703,8 +727,8 @@ def test_load_and_query_partitioned_table(client, to_delete): def test_get_table_information(client, to_delete): """Show a table's properties.""" - dataset_id = 'show_table_dataset_{}'.format(_millis()) - table_id = 'show_table_table_{}'.format(_millis()) + dataset_id = "show_table_dataset_{}".format(_millis()) + table_id = "show_table_table_{}".format(_millis()) dataset_ref = client.dataset(dataset_id) dataset = bigquery.Dataset(dataset_ref) client.create_dataset(dataset) @@ -755,13 +779,15 @@ def table_exists(client, table_reference): return True except NotFound: return False + + # [END bigquery_table_exists] def test_table_exists(client, to_delete): """Determine if a table exists.""" - DATASET_ID = 'get_table_dataset_{}'.format(_millis()) - TABLE_ID = 'get_table_table_{}'.format(_millis()) + DATASET_ID = "get_table_dataset_{}".format(_millis()) + TABLE_ID = "get_table_table_{}".format(_millis()) dataset = bigquery.Dataset(client.dataset(DATASET_ID)) dataset = client.create_dataset(dataset) to_delete.append(dataset) @@ -771,15 +797,18 @@ def test_table_exists(client, to_delete): table = client.create_table(table) assert table_exists(client, table_ref) - assert not table_exists(client, dataset.table('i_dont_exist')) + assert not table_exists(client, dataset.table("i_dont_exist")) -@pytest.mark.skip(reason=( - 'update_table() is flaky ' - 'https://github.com/GoogleCloudPlatform/google-cloud-python/issues/5589')) +@pytest.mark.skip( + reason=( + "update_table() is flaky " + "https://github.com/GoogleCloudPlatform/google-cloud-python/issues/5589" + ) +) def test_manage_table_labels(client, to_delete): - dataset_id = 'label_table_dataset_{}'.format(_millis()) - table_id = 'label_table_{}'.format(_millis()) + dataset_id = "label_table_dataset_{}".format(_millis()) + table_id = "label_table_{}".format(_millis()) dataset = bigquery.Dataset(client.dataset(dataset_id)) client.create_dataset(dataset) to_delete.append(dataset) @@ -794,10 +823,10 @@ def test_manage_table_labels(client, to_delete): # table = client.get_table(table_ref) # API request assert table.labels == {} - labels = {'color': 'green'} + labels = {"color": "green"} table.labels = labels - table = client.update_table(table, ['labels']) # API request + table = client.update_table(table, ["labels"]) # API request assert table.labels == labels # [END bigquery_label_table] @@ -813,11 +842,11 @@ def test_manage_table_labels(client, to_delete): table = client.get_table(table_ref) # API Request # View table labels - print('Table ID: {}'.format(table_id)) - print('Labels:') + print("Table ID: {}".format(table_id)) + print("Labels:") if table.labels: for label, value in table.labels.items(): - print('\t{}: {}'.format(label, value)) + print("\t{}: {}".format(label, value)) else: print("\tTable has no labels defined.") # [END bigquery_get_table_labels] @@ -830,29 +859,32 @@ def test_manage_table_labels(client, to_delete): # table = client.get_table(table_ref) # API request # This example table starts with one label - assert table.labels == {'color': 'green'} + assert table.labels == {"color": "green"} # To delete a label from a table, set its value to None - table.labels['color'] = None + table.labels["color"] = None - table = client.update_table(table, ['labels']) # API request + table = client.update_table(table, ["labels"]) # API request assert table.labels == {} # [END bigquery_delete_label_table] -@pytest.mark.skip(reason=( - 'update_table() is flaky ' - 'https://github.com/GoogleCloudPlatform/google-cloud-python/issues/5589')) +@pytest.mark.skip( + reason=( + "update_table() is flaky " + "https://github.com/GoogleCloudPlatform/google-cloud-python/issues/5589" + ) +) def test_update_table_description(client, to_delete): """Update a table's description.""" - dataset_id = 'update_table_description_dataset_{}'.format(_millis()) - table_id = 'update_table_description_table_{}'.format(_millis()) + dataset_id = "update_table_description_dataset_{}".format(_millis()) + table_id = "update_table_description_table_{}".format(_millis()) dataset = bigquery.Dataset(client.dataset(dataset_id)) client.create_dataset(dataset) to_delete.append(dataset) table = bigquery.Table(dataset.table(table_id), schema=SCHEMA) - table.description = 'Original description.' + table.description = "Original description." table = client.create_table(table) # [START bigquery_update_table_description] @@ -861,22 +893,25 @@ def test_update_table_description(client, to_delete): # table_ref = client.dataset('my_dataset').table('my_table') # table = client.get_table(table_ref) # API request - assert table.description == 'Original description.' - table.description = 'Updated description.' + assert table.description == "Original description." + table.description = "Updated description." - table = client.update_table(table, ['description']) # API request + table = client.update_table(table, ["description"]) # API request - assert table.description == 'Updated description.' + assert table.description == "Updated description." # [END bigquery_update_table_description] -@pytest.mark.skip(reason=( - 'update_table() is flaky ' - 'https://github.com/GoogleCloudPlatform/google-cloud-python/issues/5589')) +@pytest.mark.skip( + reason=( + "update_table() is flaky " + "https://github.com/GoogleCloudPlatform/google-cloud-python/issues/5589" + ) +) def test_update_table_expiration(client, to_delete): """Update a table's expiration time.""" - dataset_id = 'update_table_expiration_dataset_{}'.format(_millis()) - table_id = 'update_table_expiration_table_{}'.format(_millis()) + dataset_id = "update_table_expiration_dataset_{}".format(_millis()) + table_id = "update_table_expiration_table_{}".format(_millis()) dataset = bigquery.Dataset(client.dataset(dataset_id)) client.create_dataset(dataset) to_delete.append(dataset) @@ -898,7 +933,7 @@ def test_update_table_expiration(client, to_delete): # set table to expire 5 days from now expiration = datetime.datetime.now(pytz.utc) + datetime.timedelta(days=5) table.expires = expiration - table = client.update_table(table, ['expires']) # API request + table = client.update_table(table, ["expires"]) # API request # expiration is stored in milliseconds margin = datetime.timedelta(microseconds=1000) @@ -906,13 +941,16 @@ def test_update_table_expiration(client, to_delete): # [END bigquery_update_table_expiration] -@pytest.mark.skip(reason=( - 'update_table() is flaky ' - 'https://github.com/GoogleCloudPlatform/google-cloud-python/issues/5589')) +@pytest.mark.skip( + reason=( + "update_table() is flaky " + "https://github.com/GoogleCloudPlatform/google-cloud-python/issues/5589" + ) +) def test_add_empty_column(client, to_delete): """Adds an empty column to an existing table.""" - dataset_id = 'add_empty_column_dataset_{}'.format(_millis()) - table_id = 'add_empty_column_table_{}'.format(_millis()) + dataset_id = "add_empty_column_dataset_{}".format(_millis()) + table_id = "add_empty_column_table_{}".format(_millis()) dataset = bigquery.Dataset(client.dataset(dataset_id)) dataset = client.create_dataset(dataset) to_delete.append(dataset) @@ -931,22 +969,25 @@ def test_add_empty_column(client, to_delete): original_schema = table.schema new_schema = original_schema[:] # creates a copy of the schema - new_schema.append(bigquery.SchemaField('phone', 'STRING')) + new_schema.append(bigquery.SchemaField("phone", "STRING")) table.schema = new_schema - table = client.update_table(table, ['schema']) # API request + table = client.update_table(table, ["schema"]) # API request assert len(table.schema) == len(original_schema) + 1 == len(new_schema) # [END bigquery_add_empty_column] -@pytest.mark.skip(reason=( - 'update_table() is flaky ' - 'https://github.com/GoogleCloudPlatform/google-cloud-python/issues/5589')) +@pytest.mark.skip( + reason=( + "update_table() is flaky " + "https://github.com/GoogleCloudPlatform/google-cloud-python/issues/5589" + ) +) def test_relax_column(client, to_delete): """Updates a schema field from required to nullable.""" - dataset_id = 'relax_column_dataset_{}'.format(_millis()) - table_id = 'relax_column_table_{}'.format(_millis()) + dataset_id = "relax_column_dataset_{}".format(_millis()) + table_id = "relax_column_table_{}".format(_millis()) dataset = bigquery.Dataset(client.dataset(dataset_id)) dataset = client.create_dataset(dataset) to_delete.append(dataset) @@ -958,44 +999,48 @@ def test_relax_column(client, to_delete): # table_id = 'my_table' original_schema = [ - bigquery.SchemaField('full_name', 'STRING', mode='REQUIRED'), - bigquery.SchemaField('age', 'INTEGER', mode='REQUIRED'), + bigquery.SchemaField("full_name", "STRING", mode="REQUIRED"), + bigquery.SchemaField("age", "INTEGER", mode="REQUIRED"), ] table_ref = client.dataset(dataset_id).table(table_id) table = bigquery.Table(table_ref, schema=original_schema) table = client.create_table(table) - assert all(field.mode == 'REQUIRED' for field in table.schema) + assert all(field.mode == "REQUIRED" for field in table.schema) # SchemaField properties cannot be edited after initialization. # To make changes, construct new SchemaField objects. relaxed_schema = [ - bigquery.SchemaField('full_name', 'STRING', mode='NULLABLE'), - bigquery.SchemaField('age', 'INTEGER', mode='NULLABLE'), + bigquery.SchemaField("full_name", "STRING", mode="NULLABLE"), + bigquery.SchemaField("age", "INTEGER", mode="NULLABLE"), ] table.schema = relaxed_schema - table = client.update_table(table, ['schema']) + table = client.update_table(table, ["schema"]) - assert all(field.mode == 'NULLABLE' for field in table.schema) + assert all(field.mode == "NULLABLE" for field in table.schema) # [END bigquery_relax_column] -@pytest.mark.skip(reason=( - 'update_table() is flaky ' - 'https://github.com/GoogleCloudPlatform/google-cloud-python/issues/5589')) +@pytest.mark.skip( + reason=( + "update_table() is flaky " + "https://github.com/GoogleCloudPlatform/google-cloud-python/issues/5589" + ) +) def test_update_table_cmek(client, to_delete): """Patch a table's metadata.""" - dataset_id = 'update_table_cmek_{}'.format(_millis()) - table_id = 'update_table_cmek_{}'.format(_millis()) + dataset_id = "update_table_cmek_{}".format(_millis()) + table_id = "update_table_cmek_{}".format(_millis()) dataset = bigquery.Dataset(client.dataset(dataset_id)) client.create_dataset(dataset) to_delete.append(dataset) table = bigquery.Table(dataset.table(table_id)) - original_kms_key_name = ( - 'projects/{}/locations/{}/keyRings/{}/cryptoKeys/{}'.format( - 'cloud-samples-tests', 'us-central1', 'test', 'test')) + original_kms_key_name = "projects/{}/locations/{}/keyRings/{}/cryptoKeys/{}".format( + "cloud-samples-tests", "us-central1", "test", "test" + ) table.encryption_configuration = bigquery.EncryptionConfiguration( - kms_key_name=original_kms_key_name) + kms_key_name=original_kms_key_name + ) table = client.create_table(table) # [START bigquery_update_table_cmek] @@ -1007,13 +1052,14 @@ def test_update_table_cmek(client, to_delete): # Set a new encryption key to use for the destination. # TODO: Replace this key with a key you have created in KMS. updated_kms_key_name = ( - 'projects/cloud-samples-tests/locations/us-central1/' - 'keyRings/test/cryptoKeys/otherkey') + "projects/cloud-samples-tests/locations/us-central1/" + "keyRings/test/cryptoKeys/otherkey" + ) table.encryption_configuration = bigquery.EncryptionConfiguration( - kms_key_name=updated_kms_key_name) + kms_key_name=updated_kms_key_name + ) - table = client.update_table( - table, ['encryption_configuration']) # API request + table = client.update_table(table, ["encryption_configuration"]) # API request assert table.encryption_configuration.kms_key_name == updated_kms_key_name assert original_kms_key_name != updated_kms_key_name @@ -1027,8 +1073,8 @@ def test_browse_table_data(client, to_delete, capsys): # from google.cloud import bigquery # client = bigquery.Client() - dataset_ref = client.dataset('samples', project='bigquery-public-data') - table_ref = dataset_ref.table('shakespeare') + dataset_ref = client.dataset("samples", project="bigquery-public-data") + table_ref = dataset_ref.table("shakespeare") table = client.get_table(table_ref) # API call # Load all rows from a table @@ -1049,24 +1095,27 @@ def test_browse_table_data(client, to_delete, capsys): rows = client.list_rows(table, start_index=10, max_results=10) # Print row data in tabular format - format_string = '{!s:<16} ' * len(rows.schema) + format_string = "{!s:<16} " * len(rows.schema) field_names = [field.name for field in rows.schema] print(format_string.format(*field_names)) # prints column headers for row in rows: - print(format_string.format(*row)) # prints row data + print(format_string.format(*row)) # prints row data # [END bigquery_browse_table] out, err = capsys.readouterr() - out = list(filter(bool, out.split('\n'))) # list of non-blank lines + out = list(filter(bool, out.split("\n"))) # list of non-blank lines assert len(out) == 11 -@pytest.mark.skip(reason=( - 'update_table() is flaky ' - 'https://github.com/GoogleCloudPlatform/google-cloud-python/issues/5589')) +@pytest.mark.skip( + reason=( + "update_table() is flaky " + "https://github.com/GoogleCloudPlatform/google-cloud-python/issues/5589" + ) +) def test_manage_views(client, to_delete): project = client.project - source_dataset_id = 'source_dataset_{}'.format(_millis()) + source_dataset_id = "source_dataset_{}".format(_millis()) source_dataset_ref = client.dataset(source_dataset_id) source_dataset = bigquery.Dataset(source_dataset_ref) source_dataset = client.create_dataset(source_dataset) @@ -1074,17 +1123,18 @@ def test_manage_views(client, to_delete): job_config = bigquery.LoadJobConfig() job_config.schema = [ - bigquery.SchemaField('name', 'STRING'), - bigquery.SchemaField('post_abbr', 'STRING') + bigquery.SchemaField("name", "STRING"), + bigquery.SchemaField("post_abbr", "STRING"), ] job_config.skip_leading_rows = 1 - uri = 'gs://cloud-samples-data/bigquery/us-states/us-states.csv' - source_table_id = 'us_states' + uri = "gs://cloud-samples-data/bigquery/us-states/us-states.csv" + source_table_id = "us_states" load_job = client.load_table_from_uri( - uri, source_dataset.table(source_table_id), job_config=job_config) + uri, source_dataset.table(source_table_id), job_config=job_config + ) load_job.result() - shared_dataset_id = 'shared_dataset_{}'.format(_millis()) + shared_dataset_id = "shared_dataset_{}".format(_millis()) shared_dataset_ref = client.dataset(shared_dataset_id) shared_dataset = bigquery.Dataset(shared_dataset_ref) shared_dataset = client.create_dataset(shared_dataset) @@ -1101,15 +1151,13 @@ def test_manage_views(client, to_delete): # This example shows how to create a shared view of a source table of # US States. The source table contains all 50 states, while the view will # contain only states with names starting with 'W'. - view_ref = shared_dataset_ref.table('my_shared_view') + view_ref = shared_dataset_ref.table("my_shared_view") view = bigquery.Table(view_ref) - sql_template = ( - 'SELECT name, post_abbr FROM `{}.{}.{}` WHERE name LIKE "W%"') - view.view_query = sql_template.format( - project, source_dataset_id, source_table_id) + sql_template = 'SELECT name, post_abbr FROM `{}.{}.{}` WHERE name LIKE "W%"' + view.view_query = sql_template.format(project, source_dataset_id, source_table_id) view = client.create_table(view) # API request - print('Successfully created view at {}'.format(view.full_table_id)) + print("Successfully created view at {}".format(view.full_table_id)) # [END bigquery_create_view] # [START bigquery_update_view_query] @@ -1123,13 +1171,11 @@ def test_manage_views(client, to_delete): # This example shows how to update a shared view of a source table of # US States. The view's query will be updated to contain only states with # names starting with 'M'. - view_ref = shared_dataset_ref.table('my_shared_view') + view_ref = shared_dataset_ref.table("my_shared_view") view = bigquery.Table(view_ref) - sql_template = ( - 'SELECT name, post_abbr FROM `{}.{}.{}` WHERE name LIKE "M%"') - view.view_query = sql_template.format( - project, source_dataset_id, source_table_id) - view = client.update_table(view, ['view_query']) # API request + sql_template = 'SELECT name, post_abbr FROM `{}.{}.{}` WHERE name LIKE "M%"' + view.view_query = sql_template.format(project, source_dataset_id, source_table_id) + view = client.update_table(view, ["view_query"]) # API request # [END bigquery_update_view_query] # [START bigquery_get_view] @@ -1137,16 +1183,16 @@ def test_manage_views(client, to_delete): # client = bigquery.Client() # shared_dataset_id = 'my_shared_dataset' - view_ref = client.dataset(shared_dataset_id).table('my_shared_view') + view_ref = client.dataset(shared_dataset_id).table("my_shared_view") view = client.get_table(view_ref) # API Request # Display view properties - print('View at {}'.format(view.full_table_id)) - print('View Query:\n{}'.format(view.view_query)) + print("View at {}".format(view.full_table_id)) + print("View Query:\n{}".format(view.view_query)) # [END bigquery_get_view] assert view.view_query is not None - analyst_group_email = 'example-analyst-group@google.com' + analyst_group_email = "example-analyst-group@google.com" # [START bigquery_grant_view_access] # from google.cloud import bigquery # client = bigquery.Client() @@ -1155,42 +1201,44 @@ def test_manage_views(client, to_delete): # shared_dataset_id = 'my_shared_dataset' # analyst_group_email = 'data_analysts@example.com' shared_dataset = client.get_dataset( - client.dataset(shared_dataset_id)) # API request + client.dataset(shared_dataset_id) + ) # API request access_entries = shared_dataset.access_entries access_entries.append( - bigquery.AccessEntry('READER', 'groupByEmail', analyst_group_email) + bigquery.AccessEntry("READER", "groupByEmail", analyst_group_email) ) shared_dataset.access_entries = access_entries shared_dataset = client.update_dataset( - shared_dataset, ['access_entries']) # API request + shared_dataset, ["access_entries"] + ) # API request # Authorize the view to access the source dataset # project = 'my-project' # source_dataset_id = 'my_source_dataset' source_dataset = client.get_dataset( - client.dataset(source_dataset_id)) # API request + client.dataset(source_dataset_id) + ) # API request view_reference = { - 'projectId': project, - 'datasetId': shared_dataset_id, - 'tableId': 'my_shared_view', + "projectId": project, + "datasetId": shared_dataset_id, + "tableId": "my_shared_view", } access_entries = source_dataset.access_entries - access_entries.append( - bigquery.AccessEntry(None, 'view', view_reference) - ) + access_entries.append(bigquery.AccessEntry(None, "view", view_reference)) source_dataset.access_entries = access_entries source_dataset = client.update_dataset( - source_dataset, ['access_entries']) # API request + source_dataset, ["access_entries"] + ) # API request # [END bigquery_grant_view_access] def test_table_insert_rows(client, to_delete): """Insert / fetch table data.""" - dataset_id = 'table_insert_rows_dataset_{}'.format(_millis()) - table_id = 'table_insert_rows_table_{}'.format(_millis()) + dataset_id = "table_insert_rows_dataset_{}".format(_millis()) + table_id = "table_insert_rows_table_{}".format(_millis()) dataset = bigquery.Dataset(client.dataset(dataset_id)) dataset = client.create_dataset(dataset) - dataset.location = 'US' + dataset.location = "US" to_delete.append(dataset) table = bigquery.Table(dataset.table(table_id), schema=SCHEMA) @@ -1206,10 +1254,7 @@ def test_table_insert_rows(client, to_delete): # table_ref = client.dataset(dataset_id).table(table_id) # table = client.get_table(table_ref) # API request - rows_to_insert = [ - (u'Phred Phlyntstone', 32), - (u'Wylma Phlyntstone', 29), - ] + rows_to_insert = [(u"Phred Phlyntstone", 32), (u"Wylma Phlyntstone", 29)] errors = client.insert_rows(table, rows_to_insert) # API request @@ -1219,15 +1264,16 @@ def test_table_insert_rows(client, to_delete): def test_load_table_from_file(client, to_delete): """Upload table data from a CSV file.""" - dataset_id = 'load_table_from_file_dataset_{}'.format(_millis()) - table_id = 'load_table_from_file_table_{}'.format(_millis()) + dataset_id = "load_table_from_file_dataset_{}".format(_millis()) + table_id = "load_table_from_file_table_{}".format(_millis()) dataset = bigquery.Dataset(client.dataset(dataset_id)) - dataset.location = 'US' + dataset.location = "US" client.create_dataset(dataset) to_delete.append(dataset) snippets_dir = os.path.abspath(os.path.dirname(__file__)) filename = os.path.join( - snippets_dir, '..', '..', 'bigquery', 'tests', 'data', 'people.csv') + snippets_dir, "..", "..", "bigquery", "tests", "data", "people.csv" + ) # [START bigquery_load_from_file] # from google.cloud import bigquery @@ -1243,17 +1289,17 @@ def test_load_table_from_file(client, to_delete): job_config.skip_leading_rows = 1 job_config.autodetect = True - with open(filename, 'rb') as source_file: + with open(filename, "rb") as source_file: job = client.load_table_from_file( source_file, table_ref, - location='US', # Must match the destination dataset location. - job_config=job_config) # API request + location="US", # Must match the destination dataset location. + job_config=job_config, + ) # API request job.result() # Waits for table load to complete. - print('Loaded {} rows into {}:{}.'.format( - job.output_rows, dataset_id, table_id)) + print("Loaded {} rows into {}:{}.".format(job.output_rows, dataset_id, table_id)) # [END bigquery_load_from_file] table = client.get_table(table_ref) @@ -1261,14 +1307,14 @@ def test_load_table_from_file(client, to_delete): assert len(rows) == 2 # Order is not preserved, so compare individually - row1 = bigquery.Row(('Wylma Phlyntstone', 29), {'full_name': 0, 'age': 1}) + row1 = bigquery.Row(("Wylma Phlyntstone", 29), {"full_name": 0, "age": 1}) assert row1 in rows - row2 = bigquery.Row(('Phred Phlyntstone', 32), {'full_name': 0, 'age': 1}) + row2 = bigquery.Row(("Phred Phlyntstone", 32), {"full_name": 0, "age": 1}) assert row2 in rows def test_load_table_from_uri_csv(client, to_delete, capsys): - dataset_id = 'load_table_from_uri_csv_{}'.format(_millis()) + dataset_id = "load_table_from_uri_csv_{}".format(_millis()) dataset = bigquery.Dataset(client.dataset(dataset_id)) client.create_dataset(dataset) to_delete.append(dataset) @@ -1281,35 +1327,34 @@ def test_load_table_from_uri_csv(client, to_delete, capsys): dataset_ref = client.dataset(dataset_id) job_config = bigquery.LoadJobConfig() job_config.schema = [ - bigquery.SchemaField('name', 'STRING'), - bigquery.SchemaField('post_abbr', 'STRING') + bigquery.SchemaField("name", "STRING"), + bigquery.SchemaField("post_abbr", "STRING"), ] job_config.skip_leading_rows = 1 # The source format defaults to CSV, so the line below is optional. job_config.source_format = bigquery.SourceFormat.CSV - uri = 'gs://cloud-samples-data/bigquery/us-states/us-states.csv' + uri = "gs://cloud-samples-data/bigquery/us-states/us-states.csv" load_job = client.load_table_from_uri( - uri, - dataset_ref.table('us_states'), - job_config=job_config) # API request - print('Starting job {}'.format(load_job.job_id)) + uri, dataset_ref.table("us_states"), job_config=job_config + ) # API request + print("Starting job {}".format(load_job.job_id)) load_job.result() # Waits for table load to complete. - print('Job finished.') + print("Job finished.") - destination_table = client.get_table(dataset_ref.table('us_states')) - print('Loaded {} rows.'.format(destination_table.num_rows)) + destination_table = client.get_table(dataset_ref.table("us_states")) + print("Loaded {} rows.".format(destination_table.num_rows)) # [END bigquery_load_table_gcs_csv] out, _ = capsys.readouterr() - assert 'Loaded 50 rows.' in out + assert "Loaded 50 rows." in out def test_load_table_from_uri_json(client, to_delete, capsys): - dataset_id = 'load_table_from_uri_json_{}'.format(_millis()) + dataset_id = "load_table_from_uri_json_{}".format(_millis()) dataset = bigquery.Dataset(client.dataset(dataset_id)) - dataset.location = 'US' + dataset.location = "US" client.create_dataset(dataset) to_delete.append(dataset) @@ -1321,34 +1366,35 @@ def test_load_table_from_uri_json(client, to_delete, capsys): dataset_ref = client.dataset(dataset_id) job_config = bigquery.LoadJobConfig() job_config.schema = [ - bigquery.SchemaField('name', 'STRING'), - bigquery.SchemaField('post_abbr', 'STRING') + bigquery.SchemaField("name", "STRING"), + bigquery.SchemaField("post_abbr", "STRING"), ] job_config.source_format = bigquery.SourceFormat.NEWLINE_DELIMITED_JSON - uri = 'gs://cloud-samples-data/bigquery/us-states/us-states.json' + uri = "gs://cloud-samples-data/bigquery/us-states/us-states.json" load_job = client.load_table_from_uri( uri, - dataset_ref.table('us_states'), - location='US', # Location must match that of the destination dataset. - job_config=job_config) # API request - print('Starting job {}'.format(load_job.job_id)) + dataset_ref.table("us_states"), + location="US", # Location must match that of the destination dataset. + job_config=job_config, + ) # API request + print("Starting job {}".format(load_job.job_id)) load_job.result() # Waits for table load to complete. - print('Job finished.') + print("Job finished.") - destination_table = client.get_table(dataset_ref.table('us_states')) - print('Loaded {} rows.'.format(destination_table.num_rows)) + destination_table = client.get_table(dataset_ref.table("us_states")) + print("Loaded {} rows.".format(destination_table.num_rows)) # [END bigquery_load_table_gcs_json] out, _ = capsys.readouterr() - assert 'Loaded 50 rows.' in out + assert "Loaded 50 rows." in out def test_load_table_from_uri_cmek(client, to_delete): - dataset_id = 'load_table_from_uri_cmek_{}'.format(_millis()) + dataset_id = "load_table_from_uri_cmek_{}".format(_millis()) dataset = bigquery.Dataset(client.dataset(dataset_id)) - dataset.location = 'US' + dataset.location = "US" client.create_dataset(dataset) to_delete.append(dataset) @@ -1364,31 +1410,32 @@ def test_load_table_from_uri_cmek(client, to_delete): # Set the encryption key to use for the destination. # TODO: Replace this key with a key you have created in KMS. - kms_key_name = 'projects/{}/locations/{}/keyRings/{}/cryptoKeys/{}'.format( - 'cloud-samples-tests', 'us-central1', 'test', 'test') - encryption_config = bigquery.EncryptionConfiguration( - kms_key_name=kms_key_name) + kms_key_name = "projects/{}/locations/{}/keyRings/{}/cryptoKeys/{}".format( + "cloud-samples-tests", "us-central1", "test", "test" + ) + encryption_config = bigquery.EncryptionConfiguration(kms_key_name=kms_key_name) job_config.destination_encryption_configuration = encryption_config - uri = 'gs://cloud-samples-data/bigquery/us-states/us-states.json' + uri = "gs://cloud-samples-data/bigquery/us-states/us-states.json" load_job = client.load_table_from_uri( uri, - dataset_ref.table('us_states'), - location='US', # Location must match that of the destination dataset. - job_config=job_config) # API request + dataset_ref.table("us_states"), + location="US", # Location must match that of the destination dataset. + job_config=job_config, + ) # API request - assert load_job.job_type == 'load' + assert load_job.job_type == "load" load_job.result() # Waits for table load to complete. - assert load_job.state == 'DONE' - table = client.get_table(dataset_ref.table('us_states')) + assert load_job.state == "DONE" + table = client.get_table(dataset_ref.table("us_states")) assert table.encryption_configuration.kms_key_name == kms_key_name # [END bigquery_load_table_gcs_json_cmek] def test_load_table_from_uri_parquet(client, to_delete, capsys): - dataset_id = 'load_table_from_uri_parquet_{}'.format(_millis()) + dataset_id = "load_table_from_uri_parquet_{}".format(_millis()) dataset = bigquery.Dataset(client.dataset(dataset_id)) client.create_dataset(dataset) to_delete.append(dataset) @@ -1401,27 +1448,26 @@ def test_load_table_from_uri_parquet(client, to_delete, capsys): dataset_ref = client.dataset(dataset_id) job_config = bigquery.LoadJobConfig() job_config.source_format = bigquery.SourceFormat.PARQUET - uri = 'gs://cloud-samples-data/bigquery/us-states/us-states.parquet' + uri = "gs://cloud-samples-data/bigquery/us-states/us-states.parquet" load_job = client.load_table_from_uri( - uri, - dataset_ref.table('us_states'), - job_config=job_config) # API request - print('Starting job {}'.format(load_job.job_id)) + uri, dataset_ref.table("us_states"), job_config=job_config + ) # API request + print("Starting job {}".format(load_job.job_id)) load_job.result() # Waits for table load to complete. - print('Job finished.') + print("Job finished.") - destination_table = client.get_table(dataset_ref.table('us_states')) - print('Loaded {} rows.'.format(destination_table.num_rows)) + destination_table = client.get_table(dataset_ref.table("us_states")) + print("Loaded {} rows.".format(destination_table.num_rows)) # [END bigquery_load_table_gcs_parquet] out, _ = capsys.readouterr() - assert 'Loaded 50 rows.' in out + assert "Loaded 50 rows." in out def test_load_table_from_uri_orc(client, to_delete, capsys): - dataset_id = 'load_table_from_uri_orc_{}'.format(_millis()) + dataset_id = "load_table_from_uri_orc_{}".format(_millis()) dataset = bigquery.Dataset(client.dataset(dataset_id)) client.create_dataset(dataset) to_delete.append(dataset) @@ -1434,23 +1480,22 @@ def test_load_table_from_uri_orc(client, to_delete, capsys): dataset_ref = client.dataset(dataset_id) job_config = bigquery.LoadJobConfig() job_config.source_format = bigquery.SourceFormat.ORC - uri = 'gs://cloud-samples-data/bigquery/us-states/us-states.orc' + uri = "gs://cloud-samples-data/bigquery/us-states/us-states.orc" load_job = client.load_table_from_uri( - uri, - dataset_ref.table('us_states'), - job_config=job_config) # API request - print('Starting job {}'.format(load_job.job_id)) + uri, dataset_ref.table("us_states"), job_config=job_config + ) # API request + print("Starting job {}".format(load_job.job_id)) load_job.result() # Waits for table load to complete. - print('Job finished.') + print("Job finished.") - destination_table = client.get_table(dataset_ref.table('us_states')) - print('Loaded {} rows.'.format(destination_table.num_rows)) + destination_table = client.get_table(dataset_ref.table("us_states")) + print("Loaded {} rows.".format(destination_table.num_rows)) # [END bigquery_load_table_gcs_orc] out, _ = capsys.readouterr() - assert 'Loaded 50 rows.' in out + assert "Loaded 50 rows." in out def test_load_table_from_uri_autodetect(client, to_delete, capsys): @@ -1465,7 +1510,7 @@ def test_load_table_from_uri_autodetect(client, to_delete, capsys): followed by more shared code. Note that only the last format in the format-specific code section will be tested in this test. """ - dataset_id = 'load_table_from_uri_auto_{}'.format(_millis()) + dataset_id = "load_table_from_uri_auto_{}".format(_millis()) dataset = bigquery.Dataset(client.dataset(dataset_id)) client.create_dataset(dataset) to_delete.append(dataset) @@ -1488,35 +1533,34 @@ def test_load_table_from_uri_autodetect(client, to_delete, capsys): job_config.skip_leading_rows = 1 # The source format defaults to CSV, so the line below is optional. job_config.source_format = bigquery.SourceFormat.CSV - uri = 'gs://cloud-samples-data/bigquery/us-states/us-states.csv' + uri = "gs://cloud-samples-data/bigquery/us-states/us-states.csv" # [END bigquery_load_table_gcs_csv_autodetect] # unset csv-specific attribute - del job_config._properties['load']['skipLeadingRows'] + del job_config._properties["load"]["skipLeadingRows"] # [START bigquery_load_table_gcs_json_autodetect] job_config.source_format = bigquery.SourceFormat.NEWLINE_DELIMITED_JSON - uri = 'gs://cloud-samples-data/bigquery/us-states/us-states.json' + uri = "gs://cloud-samples-data/bigquery/us-states/us-states.json" # [END bigquery_load_table_gcs_json_autodetect] # Shared code # [START bigquery_load_table_gcs_csv_autodetect] # [START bigquery_load_table_gcs_json_autodetect] load_job = client.load_table_from_uri( - uri, - dataset_ref.table('us_states'), - job_config=job_config) # API request - print('Starting job {}'.format(load_job.job_id)) + uri, dataset_ref.table("us_states"), job_config=job_config + ) # API request + print("Starting job {}".format(load_job.job_id)) load_job.result() # Waits for table load to complete. - print('Job finished.') + print("Job finished.") - destination_table = client.get_table(dataset_ref.table('us_states')) - print('Loaded {} rows.'.format(destination_table.num_rows)) + destination_table = client.get_table(dataset_ref.table("us_states")) + print("Loaded {} rows.".format(destination_table.num_rows)) # [END bigquery_load_table_gcs_csv_autodetect] # [END bigquery_load_table_gcs_json_autodetect] out, _ = capsys.readouterr() - assert 'Loaded 50 rows.' in out + assert "Loaded 50 rows." in out def test_load_table_from_uri_truncate(client, to_delete, capsys): @@ -1531,20 +1575,19 @@ def test_load_table_from_uri_truncate(client, to_delete, capsys): followed by more shared code. Note that only the last format in the format-specific code section will be tested in this test. """ - dataset_id = 'load_table_from_uri_trunc_{}'.format(_millis()) + dataset_id = "load_table_from_uri_trunc_{}".format(_millis()) dataset = bigquery.Dataset(client.dataset(dataset_id)) client.create_dataset(dataset) to_delete.append(dataset) job_config = bigquery.LoadJobConfig() job_config.schema = [ - bigquery.SchemaField('name', 'STRING'), - bigquery.SchemaField('post_abbr', 'STRING') + bigquery.SchemaField("name", "STRING"), + bigquery.SchemaField("post_abbr", "STRING"), ] - table_ref = dataset.table('us_states') - body = six.BytesIO(b'Washington,WA') - client.load_table_from_file( - body, table_ref, job_config=job_config).result() + table_ref = dataset.table("us_states") + body = six.BytesIO(b"Washington,WA") + client.load_table_from_file(body, table_ref, job_config=job_config).result() # Shared code # [START bigquery_load_table_gcs_csv_truncate] @@ -1570,24 +1613,24 @@ def test_load_table_from_uri_truncate(client, to_delete, capsys): job_config.skip_leading_rows = 1 # The source format defaults to CSV, so the line below is optional. job_config.source_format = bigquery.SourceFormat.CSV - uri = 'gs://cloud-samples-data/bigquery/us-states/us-states.csv' + uri = "gs://cloud-samples-data/bigquery/us-states/us-states.csv" # [END bigquery_load_table_gcs_csv_truncate] # unset csv-specific attribute - del job_config._properties['load']['skipLeadingRows'] + del job_config._properties["load"]["skipLeadingRows"] # [START bigquery_load_table_gcs_json_truncate] job_config.source_format = bigquery.SourceFormat.NEWLINE_DELIMITED_JSON - uri = 'gs://cloud-samples-data/bigquery/us-states/us-states.json' + uri = "gs://cloud-samples-data/bigquery/us-states/us-states.json" # [END bigquery_load_table_gcs_json_truncate] # [START bigquery_load_table_gcs_parquet_truncate] job_config.source_format = bigquery.SourceFormat.PARQUET - uri = 'gs://cloud-samples-data/bigquery/us-states/us-states.parquet' + uri = "gs://cloud-samples-data/bigquery/us-states/us-states.parquet" # [END bigquery_load_table_gcs_parquet_truncate] # [START bigquery_load_table_gcs_orc_truncate] job_config.source_format = bigquery.SourceFormat.ORC - uri = 'gs://cloud-samples-data/bigquery/us-states/us-states.orc' + uri = "gs://cloud-samples-data/bigquery/us-states/us-states.orc" # [END bigquery_load_table_gcs_orc_truncate] # Shared code @@ -1596,40 +1639,38 @@ def test_load_table_from_uri_truncate(client, to_delete, capsys): # [START bigquery_load_table_gcs_parquet_truncate] # [START bigquery_load_table_gcs_orc_truncate] load_job = client.load_table_from_uri( - uri, - table_ref, - job_config=job_config) # API request - print('Starting job {}'.format(load_job.job_id)) + uri, table_ref, job_config=job_config + ) # API request + print("Starting job {}".format(load_job.job_id)) load_job.result() # Waits for table load to complete. - print('Job finished.') + print("Job finished.") destination_table = client.get_table(table_ref) - print('Loaded {} rows.'.format(destination_table.num_rows)) + print("Loaded {} rows.".format(destination_table.num_rows)) # [END bigquery_load_table_gcs_csv_truncate] # [END bigquery_load_table_gcs_json_truncate] # [END bigquery_load_table_gcs_parquet_truncate] # [END bigquery_load_table_gcs_orc_truncate] out, _ = capsys.readouterr() - assert 'Loaded 50 rows.' in out + assert "Loaded 50 rows." in out def test_load_table_add_column(client, to_delete): - dataset_id = 'load_table_add_column_{}'.format(_millis()) + dataset_id = "load_table_add_column_{}".format(_millis()) dataset_ref = client.dataset(dataset_id) dataset = bigquery.Dataset(dataset_ref) - dataset.location = 'US' + dataset.location = "US" dataset = client.create_dataset(dataset) to_delete.append(dataset) snippets_dir = os.path.abspath(os.path.dirname(__file__)) filepath = os.path.join( - snippets_dir, '..', '..', 'bigquery', 'tests', 'data', 'people.csv') - table_ref = dataset_ref.table('my_table') - old_schema = [ - bigquery.SchemaField('full_name', 'STRING', mode='REQUIRED'), - ] + snippets_dir, "..", "..", "bigquery", "tests", "data", "people.csv" + ) + table_ref = dataset_ref.table("my_table") + old_schema = [bigquery.SchemaField("full_name", "STRING", mode="REQUIRED")] table = client.create_table(bigquery.Table(table_ref, schema=old_schema)) # [START bigquery_add_column_load_append] @@ -1639,7 +1680,7 @@ def test_load_table_add_column(client, to_delete): # filepath = 'path/to/your_file.csv' # Retrieves the destination table and checks the length of the schema - table_id = 'my_table' + table_id = "my_table" table_ref = dataset_ref.table(table_id) table = client.get_table(table_ref) print("Table {} contains {} columns.".format(table_id, len(table.schema))) @@ -1649,54 +1690,58 @@ def test_load_table_add_column(client, to_delete): job_config = bigquery.LoadJobConfig() job_config.write_disposition = bigquery.WriteDisposition.WRITE_APPEND job_config.schema_update_options = [ - bigquery.SchemaUpdateOption.ALLOW_FIELD_ADDITION, + bigquery.SchemaUpdateOption.ALLOW_FIELD_ADDITION ] # In this example, the existing table contains only the 'full_name' column. # 'REQUIRED' fields cannot be added to an existing schema, so the # additional column must be 'NULLABLE'. job_config.schema = [ - bigquery.SchemaField('full_name', 'STRING', mode='REQUIRED'), - bigquery.SchemaField('age', 'INTEGER', mode='NULLABLE'), + bigquery.SchemaField("full_name", "STRING", mode="REQUIRED"), + bigquery.SchemaField("age", "INTEGER", mode="NULLABLE"), ] job_config.source_format = bigquery.SourceFormat.CSV job_config.skip_leading_rows = 1 - with open(filepath, 'rb') as source_file: + with open(filepath, "rb") as source_file: job = client.load_table_from_file( source_file, table_ref, - location='US', # Must match the destination dataset location. - job_config=job_config) # API request + location="US", # Must match the destination dataset location. + job_config=job_config, + ) # API request job.result() # Waits for table load to complete. - print('Loaded {} rows into {}:{}.'.format( - job.output_rows, dataset_id, table_ref.table_id)) + print( + "Loaded {} rows into {}:{}.".format( + job.output_rows, dataset_id, table_ref.table_id + ) + ) # Checks the updated length of the schema table = client.get_table(table) - print("Table {} now contains {} columns.".format( - table_id, len(table.schema))) + print("Table {} now contains {} columns.".format(table_id, len(table.schema))) # [END bigquery_add_column_load_append] assert len(table.schema) == 2 assert table.num_rows > 0 def test_load_table_relax_column(client, to_delete): - dataset_id = 'load_table_relax_column_{}'.format(_millis()) + dataset_id = "load_table_relax_column_{}".format(_millis()) dataset_ref = client.dataset(dataset_id) dataset = bigquery.Dataset(dataset_ref) - dataset.location = 'US' + dataset.location = "US" dataset = client.create_dataset(dataset) to_delete.append(dataset) snippets_dir = os.path.abspath(os.path.dirname(__file__)) filepath = os.path.join( - snippets_dir, '..', '..', 'bigquery', 'tests', 'data', 'people.csv') - table_ref = dataset_ref.table('my_table') + snippets_dir, "..", "..", "bigquery", "tests", "data", "people.csv" + ) + table_ref = dataset_ref.table("my_table") old_schema = [ - bigquery.SchemaField('full_name', 'STRING', mode='REQUIRED'), - bigquery.SchemaField('age', 'INTEGER', mode='REQUIRED'), - bigquery.SchemaField('favorite_color', 'STRING', mode='REQUIRED'), + bigquery.SchemaField("full_name", "STRING", mode="REQUIRED"), + bigquery.SchemaField("age", "INTEGER", mode="REQUIRED"), + bigquery.SchemaField("favorite_color", "STRING", mode="REQUIRED"), ] table = client.create_table(bigquery.Table(table_ref, schema=old_schema)) @@ -1707,60 +1752,60 @@ def test_load_table_relax_column(client, to_delete): # filepath = 'path/to/your_file.csv' # Retrieves the destination table and checks the number of required fields - table_id = 'my_table' + table_id = "my_table" table_ref = dataset_ref.table(table_id) table = client.get_table(table_ref) - original_required_fields = sum( - field.mode == 'REQUIRED' for field in table.schema) + original_required_fields = sum(field.mode == "REQUIRED" for field in table.schema) # In this example, the existing table has 3 required fields. - print("{} fields in the schema are required.".format( - original_required_fields)) + print("{} fields in the schema are required.".format(original_required_fields)) # Configures the load job to append the data to a destination table, # allowing field relaxation job_config = bigquery.LoadJobConfig() job_config.write_disposition = bigquery.WriteDisposition.WRITE_APPEND job_config.schema_update_options = [ - bigquery.SchemaUpdateOption.ALLOW_FIELD_RELAXATION, + bigquery.SchemaUpdateOption.ALLOW_FIELD_RELAXATION ] # In this example, the existing table contains three required fields # ('full_name', 'age', and 'favorite_color'), while the data to load # contains only the first two fields. job_config.schema = [ - bigquery.SchemaField('full_name', 'STRING', mode='REQUIRED'), - bigquery.SchemaField('age', 'INTEGER', mode='REQUIRED'), + bigquery.SchemaField("full_name", "STRING", mode="REQUIRED"), + bigquery.SchemaField("age", "INTEGER", mode="REQUIRED"), ] job_config.source_format = bigquery.SourceFormat.CSV job_config.skip_leading_rows = 1 - with open(filepath, 'rb') as source_file: + with open(filepath, "rb") as source_file: job = client.load_table_from_file( source_file, table_ref, - location='US', # Must match the destination dataset location. - job_config=job_config) # API request + location="US", # Must match the destination dataset location. + job_config=job_config, + ) # API request job.result() # Waits for table load to complete. - print('Loaded {} rows into {}:{}.'.format( - job.output_rows, dataset_id, table_ref.table_id)) + print( + "Loaded {} rows into {}:{}.".format( + job.output_rows, dataset_id, table_ref.table_id + ) + ) # Checks the updated number of required fields table = client.get_table(table) - current_required_fields = sum( - field.mode == 'REQUIRED' for field in table.schema) - print("{} fields in the schema are now required.".format( - current_required_fields)) + current_required_fields = sum(field.mode == "REQUIRED" for field in table.schema) + print("{} fields in the schema are now required.".format(current_required_fields)) # [END bigquery_relax_column_load_append] assert original_required_fields - current_required_fields == 1 assert len(table.schema) == 3 - assert table.schema[2].mode == 'NULLABLE' + assert table.schema[2].mode == "NULLABLE" assert table.num_rows > 0 def test_copy_table(client, to_delete): - dataset_id = 'copy_table_dataset_{}'.format(_millis()) + dataset_id = "copy_table_dataset_{}".format(_millis()) dest_dataset = bigquery.Dataset(client.dataset(dataset_id)) - dest_dataset.location = 'US' + dest_dataset.location = "US" dest_dataset = client.create_dataset(dest_dataset) to_delete.append(dest_dataset) @@ -1768,45 +1813,46 @@ def test_copy_table(client, to_delete): # from google.cloud import bigquery # client = bigquery.Client() - source_dataset = client.dataset('samples', project='bigquery-public-data') - source_table_ref = source_dataset.table('shakespeare') + source_dataset = client.dataset("samples", project="bigquery-public-data") + source_table_ref = source_dataset.table("shakespeare") # dataset_id = 'my_dataset' - dest_table_ref = client.dataset(dataset_id).table('destination_table') + dest_table_ref = client.dataset(dataset_id).table("destination_table") job = client.copy_table( source_table_ref, dest_table_ref, # Location must match that of the source and destination tables. - location='US') # API request + location="US", + ) # API request job.result() # Waits for job to complete. - assert job.state == 'DONE' + assert job.state == "DONE" dest_table = client.get_table(dest_table_ref) # API request assert dest_table.num_rows > 0 # [END bigquery_copy_table] def test_copy_table_multiple_source(client, to_delete): - dest_dataset_id = 'dest_dataset_{}'.format(_millis()) + dest_dataset_id = "dest_dataset_{}".format(_millis()) dest_dataset = bigquery.Dataset(client.dataset(dest_dataset_id)) - dest_dataset.location = 'US' + dest_dataset.location = "US" dest_dataset = client.create_dataset(dest_dataset) to_delete.append(dest_dataset) - source_dataset_id = 'source_dataset_{}'.format(_millis()) + source_dataset_id = "source_dataset_{}".format(_millis()) source_dataset = bigquery.Dataset(client.dataset(source_dataset_id)) - source_dataset.location = 'US' + source_dataset.location = "US" source_dataset = client.create_dataset(source_dataset) to_delete.append(source_dataset) schema = [ - bigquery.SchemaField('name', 'STRING'), - bigquery.SchemaField('post_abbr', 'STRING') + bigquery.SchemaField("name", "STRING"), + bigquery.SchemaField("post_abbr", "STRING"), ] - table_data = {'table1': b'Washington,WA', 'table2': b'California,CA'} + table_data = {"table1": b"Washington,WA", "table2": b"California,CA"} for table_id, data in table_data.items(): table_ref = source_dataset.table(table_id) job_config = bigquery.LoadJobConfig() @@ -1816,8 +1862,9 @@ def test_copy_table_multiple_source(client, to_delete): body, table_ref, # Location must match that of the destination dataset. - location='US', - job_config=job_config).result() + location="US", + job_config=job_config, + ).result() # [START bigquery_copy_table_multiple_source] # from google.cloud import bigquery @@ -1825,18 +1872,19 @@ def test_copy_table_multiple_source(client, to_delete): # source_dataset_id = 'my_source_dataset' # dest_dataset_id = 'my_destination_dataset' - table1_ref = client.dataset(source_dataset_id).table('table1') - table2_ref = client.dataset(source_dataset_id).table('table2') - dest_table_ref = client.dataset(dest_dataset_id).table('destination_table') + table1_ref = client.dataset(source_dataset_id).table("table1") + table2_ref = client.dataset(source_dataset_id).table("table2") + dest_table_ref = client.dataset(dest_dataset_id).table("destination_table") job = client.copy_table( [table1_ref, table2_ref], dest_table_ref, # Location must match that of the source and destination tables. - location='US') # API request + location="US", + ) # API request job.result() # Waits for job to complete. - assert job.state == 'DONE' + assert job.state == "DONE" dest_table = client.get_table(dest_table_ref) # API request assert dest_table.num_rows > 0 # [END bigquery_copy_table_multiple_source] @@ -1845,9 +1893,9 @@ def test_copy_table_multiple_source(client, to_delete): def test_copy_table_cmek(client, to_delete): - dataset_id = 'copy_table_cmek_{}'.format(_millis()) + dataset_id = "copy_table_cmek_{}".format(_millis()) dest_dataset = bigquery.Dataset(client.dataset(dataset_id)) - dest_dataset.location = 'US' + dest_dataset.location = "US" dest_dataset = client.create_dataset(dest_dataset) to_delete.append(dest_dataset) @@ -1855,20 +1903,19 @@ def test_copy_table_cmek(client, to_delete): # from google.cloud import bigquery # client = bigquery.Client() - source_dataset = bigquery.DatasetReference( - 'bigquery-public-data', 'samples') - source_table_ref = source_dataset.table('shakespeare') + source_dataset = bigquery.DatasetReference("bigquery-public-data", "samples") + source_table_ref = source_dataset.table("shakespeare") # dataset_id = 'my_dataset' dest_dataset_ref = client.dataset(dataset_id) - dest_table_ref = dest_dataset_ref.table('destination_table') + dest_table_ref = dest_dataset_ref.table("destination_table") # Set the encryption key to use for the destination. # TODO: Replace this key with a key you have created in KMS. - kms_key_name = 'projects/{}/locations/{}/keyRings/{}/cryptoKeys/{}'.format( - 'cloud-samples-tests', 'us-central1', 'test', 'test') - encryption_config = bigquery.EncryptionConfiguration( - kms_key_name=kms_key_name) + kms_key_name = "projects/{}/locations/{}/keyRings/{}/cryptoKeys/{}".format( + "cloud-samples-tests", "us-central1", "test", "test" + ) + encryption_config = bigquery.EncryptionConfiguration(kms_key_name=kms_key_name) job_config = bigquery.CopyJobConfig() job_config.destination_encryption_configuration = encryption_config @@ -1876,18 +1923,19 @@ def test_copy_table_cmek(client, to_delete): source_table_ref, dest_table_ref, # Location must match that of the source and destination tables. - location='US', - job_config=job_config) # API request + location="US", + job_config=job_config, + ) # API request job.result() # Waits for job to complete. - assert job.state == 'DONE' + assert job.state == "DONE" dest_table = client.get_table(dest_table_ref) assert dest_table.encryption_configuration.kms_key_name == kms_key_name # [END bigquery_copy_table_cmek] def test_extract_table(client, to_delete): - bucket_name = 'extract_shakespeare_{}'.format(_millis()) + bucket_name = "extract_shakespeare_{}".format(_millis()) storage_client = storage.Client() bucket = retry_storage_errors(storage_client.create_bucket)(bucket_name) to_delete.append(bucket) @@ -1896,11 +1944,11 @@ def test_extract_table(client, to_delete): # from google.cloud import bigquery # client = bigquery.Client() # bucket_name = 'my-bucket' - project = 'bigquery-public-data' - dataset_id = 'samples' - table_id = 'shakespeare' + project = "bigquery-public-data" + dataset_id = "samples" + table_id = "shakespeare" - destination_uri = 'gs://{}/{}'.format(bucket_name, 'shakespeare.csv') + destination_uri = "gs://{}/{}".format(bucket_name, "shakespeare.csv") dataset_ref = client.dataset(dataset_id, project=project) table_ref = dataset_ref.table(table_id) @@ -1908,21 +1956,23 @@ def test_extract_table(client, to_delete): table_ref, destination_uri, # Location must match that of the source table. - location='US') # API request + location="US", + ) # API request extract_job.result() # Waits for job to complete. - print('Exported {}:{}.{} to {}'.format( - project, dataset_id, table_id, destination_uri)) + print( + "Exported {}:{}.{} to {}".format(project, dataset_id, table_id, destination_uri) + ) # [END bigquery_extract_table] - blob = retry_storage_errors(bucket.get_blob)('shakespeare.csv') + blob = retry_storage_errors(bucket.get_blob)("shakespeare.csv") assert blob.exists assert blob.size > 0 to_delete.insert(0, blob) def test_extract_table_json(client, to_delete): - bucket_name = 'extract_shakespeare_json_{}'.format(_millis()) + bucket_name = "extract_shakespeare_json_{}".format(_millis()) storage_client = storage.Client() bucket = retry_storage_errors(storage_client.create_bucket)(bucket_name) to_delete.append(bucket) @@ -1932,30 +1982,30 @@ def test_extract_table_json(client, to_delete): # client = bigquery.Client() # bucket_name = 'my-bucket' - destination_uri = 'gs://{}/{}'.format(bucket_name, 'shakespeare.json') - dataset_ref = client.dataset('samples', project='bigquery-public-data') - table_ref = dataset_ref.table('shakespeare') + destination_uri = "gs://{}/{}".format(bucket_name, "shakespeare.json") + dataset_ref = client.dataset("samples", project="bigquery-public-data") + table_ref = dataset_ref.table("shakespeare") job_config = bigquery.job.ExtractJobConfig() - job_config.destination_format = ( - bigquery.DestinationFormat.NEWLINE_DELIMITED_JSON) + job_config.destination_format = bigquery.DestinationFormat.NEWLINE_DELIMITED_JSON extract_job = client.extract_table( table_ref, destination_uri, job_config=job_config, # Location must match that of the source table. - location='US') # API request + location="US", + ) # API request extract_job.result() # Waits for job to complete. # [END bigquery_extract_table_json] - blob = retry_storage_errors(bucket.get_blob)('shakespeare.json') + blob = retry_storage_errors(bucket.get_blob)("shakespeare.json") assert blob.exists assert blob.size > 0 to_delete.insert(0, blob) def test_extract_table_compressed(client, to_delete): - bucket_name = 'extract_shakespeare_compress_{}'.format(_millis()) + bucket_name = "extract_shakespeare_compress_{}".format(_millis()) storage_client = storage.Client() bucket = retry_storage_errors(storage_client.create_bucket)(bucket_name) to_delete.append(bucket) @@ -1965,9 +2015,9 @@ def test_extract_table_compressed(client, to_delete): # client = bigquery.Client() # bucket_name = 'my-bucket' - destination_uri = 'gs://{}/{}'.format(bucket_name, 'shakespeare.csv.gz') - dataset_ref = client.dataset('samples', project='bigquery-public-data') - table_ref = dataset_ref.table('shakespeare') + destination_uri = "gs://{}/{}".format(bucket_name, "shakespeare.csv.gz") + dataset_ref = client.dataset("samples", project="bigquery-public-data") + table_ref = dataset_ref.table("shakespeare") job_config = bigquery.job.ExtractJobConfig() job_config.compression = bigquery.Compression.GZIP @@ -1975,12 +2025,13 @@ def test_extract_table_compressed(client, to_delete): table_ref, destination_uri, # Location must match that of the source table. - location='US', - job_config=job_config) # API request + location="US", + job_config=job_config, + ) # API request extract_job.result() # Waits for job to complete. # [END bigquery_extract_table_compressed] - blob = retry_storage_errors(bucket.get_blob)('shakespeare.csv.gz') + blob = retry_storage_errors(bucket.get_blob)("shakespeare.csv.gz") assert blob.exists assert blob.size > 0 to_delete.insert(0, blob) @@ -1990,11 +2041,11 @@ def test_delete_table(client, to_delete): """Delete a table.""" from google.cloud.exceptions import NotFound - dataset_id = 'delete_table_dataset_{}'.format(_millis()) - table_id = 'delete_table_table_{}'.format(_millis()) + dataset_id = "delete_table_dataset_{}".format(_millis()) + table_id = "delete_table_table_{}".format(_millis()) dataset_ref = client.dataset(dataset_id) dataset = bigquery.Dataset(dataset_ref) - dataset.location = 'US' + dataset.location = "US" dataset = client.create_dataset(dataset) to_delete.append(dataset) @@ -2010,7 +2061,7 @@ def test_delete_table(client, to_delete): table_ref = client.dataset(dataset_id).table(table_id) client.delete_table(table_ref) # API request - print('Table {}:{} deleted.'.format(dataset_id, table_id)) + print("Table {}:{} deleted.".format(dataset_id, table_id)) # [END bigquery_delete_table] with pytest.raises(NotFound): @@ -2018,10 +2069,10 @@ def test_delete_table(client, to_delete): def test_undelete_table(client, to_delete): - dataset_id = 'undelete_table_dataset_{}'.format(_millis()) - table_id = 'undelete_table_table_{}'.format(_millis()) + dataset_id = "undelete_table_dataset_{}".format(_millis()) + table_id = "undelete_table_table_{}".format(_millis()) dataset = bigquery.Dataset(client.dataset(dataset_id)) - dataset.location = 'US' + dataset.location = "US" dataset = client.create_dataset(dataset) to_delete.append(dataset) @@ -2057,11 +2108,11 @@ def test_undelete_table(client, to_delete): client.delete_table(table_ref) # API request # Construct the restore-from table ID using a snapshot decorator. - snapshot_table_id = '{}@{}'.format(table_id, snapshot_epoch) + snapshot_table_id = "{}@{}".format(table_id, snapshot_epoch) source_table_ref = client.dataset(dataset_id).table(snapshot_table_id) # Choose a new table ID for the recovered table data. - recovered_table_id = '{}_recovered'.format(table_id) + recovered_table_id = "{}_recovered".format(table_id) dest_table_ref = client.dataset(dataset_id).table(recovered_table_id) # Construct and run a copy job. @@ -2069,12 +2120,14 @@ def test_undelete_table(client, to_delete): source_table_ref, dest_table_ref, # Location must match that of the source and destination tables. - location='US') # API request + location="US", + ) # API request job.result() # Waits for job to complete. - print('Copied data from deleted table {} to {}'.format( - table_id, recovered_table_id)) + print( + "Copied data from deleted table {} to {}".format(table_id, recovered_table_id) + ) # [END bigquery_undelete_table] @@ -2086,17 +2139,19 @@ def test_client_query(client): # client = bigquery.Client() query = ( - 'SELECT name FROM `bigquery-public-data.usa_names.usa_1910_2013` ' + "SELECT name FROM `bigquery-public-data.usa_names.usa_1910_2013` " 'WHERE state = "TX" ' - 'LIMIT 100') + "LIMIT 100" + ) query_job = client.query( query, # Location must match that of the dataset(s) referenced in the query. - location='US') # API request - starts the query + location="US", + ) # API request - starts the query for row in query_job: # API request - fetches results # Row values can be accessed by field name or index - assert row[0] == row.name == row['name'] + assert row[0] == row.name == row["name"] print(row) # [END bigquery_query] @@ -2108,9 +2163,10 @@ def test_client_query_legacy_sql(client): # client = bigquery.Client() query = ( - 'SELECT name FROM [bigquery-public-data:usa_names.usa_1910_2013] ' + "SELECT name FROM [bigquery-public-data:usa_names.usa_1910_2013] " 'WHERE state = "TX" ' - 'LIMIT 100') + "LIMIT 100" + ) # Set use_legacy_sql to True to use legacy SQL syntax. job_config = bigquery.QueryJobConfig() @@ -2119,8 +2175,9 @@ def test_client_query_legacy_sql(client): query_job = client.query( query, # Location must match that of the dataset(s) referenced in the query. - location='US', - job_config=job_config) # API request - starts the query + location="US", + job_config=job_config, + ) # API request - starts the query # Print the results. for row in query_job: # API request - fetches results @@ -2134,7 +2191,7 @@ def test_manage_job(client): FROM `bigquery-public-data.samples.shakespeare` GROUP BY corpus; """ - location = 'us' + location = "us" job = client.query(sql, location=location) job_id = job.job_id @@ -2158,19 +2215,22 @@ def test_manage_job(client): job = client.get_job(job_id, location=location) # API request # Print selected job properties - print('Details for job {} running in {}:'.format(job_id, location)) - print('\tType: {}\n\tState: {}\n\tCreated: {}'.format( - job.job_type, job.state, job.created)) + print("Details for job {} running in {}:".format(job_id, location)) + print( + "\tType: {}\n\tState: {}\n\tCreated: {}".format( + job.job_type, job.state, job.created + ) + ) # [END bigquery_get_job] def test_client_query_destination_table(client, to_delete): """Run a query""" - dataset_id = 'query_destination_table_{}'.format(_millis()) + dataset_id = "query_destination_table_{}".format(_millis()) dataset_ref = client.dataset(dataset_id) to_delete.append(dataset_ref) dataset = bigquery.Dataset(dataset_ref) - dataset.location = 'US' + dataset.location = "US" client.create_dataset(dataset) # [START bigquery_query_destination_table] @@ -2180,7 +2240,7 @@ def test_client_query_destination_table(client, to_delete): job_config = bigquery.QueryJobConfig() # Set the destination table - table_ref = client.dataset(dataset_id).table('your_table_id') + table_ref = client.dataset(dataset_id).table("your_table_id") job_config.destination = table_ref sql = """ SELECT corpus @@ -2193,20 +2253,21 @@ def test_client_query_destination_table(client, to_delete): sql, # Location must match that of the dataset(s) referenced in the query # and of the destination table. - location='US', - job_config=job_config) # API request - starts the query + location="US", + job_config=job_config, + ) # API request - starts the query query_job.result() # Waits for the query to finish - print('Query results loaded to table {}'.format(table_ref.path)) + print("Query results loaded to table {}".format(table_ref.path)) # [END bigquery_query_destination_table] def test_client_query_destination_table_legacy(client, to_delete): - dataset_id = 'query_destination_table_legacy_{}'.format(_millis()) + dataset_id = "query_destination_table_legacy_{}".format(_millis()) dataset_ref = client.dataset(dataset_id) to_delete.append(dataset_ref) dataset = bigquery.Dataset(dataset_ref) - dataset.location = 'US' + dataset.location = "US" client.create_dataset(dataset) # [START bigquery_query_legacy_large_results] @@ -2218,7 +2279,7 @@ def test_client_query_destination_table_legacy(client, to_delete): # Set use_legacy_sql to True to use legacy SQL syntax. job_config.use_legacy_sql = True # Set the destination table - table_ref = client.dataset(dataset_id).table('your_table_id') + table_ref = client.dataset(dataset_id).table("your_table_id") job_config.destination = table_ref job_config.allow_large_results = True sql = """ @@ -2231,21 +2292,22 @@ def test_client_query_destination_table_legacy(client, to_delete): sql, # Location must match that of the dataset(s) referenced in the query # and of the destination table. - location='US', - job_config=job_config) # API request - starts the query + location="US", + job_config=job_config, + ) # API request - starts the query query_job.result() # Waits for the query to finish - print('Query results loaded to table {}'.format(table_ref.path)) + print("Query results loaded to table {}".format(table_ref.path)) # [END bigquery_query_legacy_large_results] def test_client_query_destination_table_cmek(client, to_delete): """Run a query""" - dataset_id = 'query_destination_table_{}'.format(_millis()) + dataset_id = "query_destination_table_{}".format(_millis()) dataset_ref = client.dataset(dataset_id) to_delete.append(dataset_ref) dataset = bigquery.Dataset(dataset_ref) - dataset.location = 'US' + dataset.location = "US" client.create_dataset(dataset) # [START bigquery_query_destination_table_cmek] @@ -2256,24 +2318,25 @@ def test_client_query_destination_table_cmek(client, to_delete): # Set the destination table. Here, dataset_id is a string, such as: # dataset_id = 'your_dataset_id' - table_ref = client.dataset(dataset_id).table('your_table_id') + table_ref = client.dataset(dataset_id).table("your_table_id") job_config.destination = table_ref # Set the encryption key to use for the destination. # TODO: Replace this key with a key you have created in KMS. - kms_key_name = 'projects/{}/locations/{}/keyRings/{}/cryptoKeys/{}'.format( - 'cloud-samples-tests', 'us-central1', 'test', 'test') - encryption_config = bigquery.EncryptionConfiguration( - kms_key_name=kms_key_name) + kms_key_name = "projects/{}/locations/{}/keyRings/{}/cryptoKeys/{}".format( + "cloud-samples-tests", "us-central1", "test", "test" + ) + encryption_config = bigquery.EncryptionConfiguration(kms_key_name=kms_key_name) job_config.destination_encryption_configuration = encryption_config # Start the query, passing in the extra configuration. query_job = client.query( - 'SELECT 17 AS my_col;', + "SELECT 17 AS my_col;", # Location must match that of the dataset(s) referenced in the query # and of the destination table. - location='US', - job_config=job_config) # API request - starts the query + location="US", + job_config=job_config, + ) # API request - starts the query query_job.result() # The destination table is written using the encryption configuration. @@ -2296,7 +2359,7 @@ def test_client_query_batch(client, to_delete): GROUP BY corpus; """ # Location must match that of the dataset(s) referenced in the query. - location = 'US' + location = "US" # API request - starts the query query_job = client.query(sql, location=location, job_config=job_config) @@ -2304,27 +2367,26 @@ def test_client_query_batch(client, to_delete): # Check on the progress by getting the job's updated state. Once the state # is `DONE`, the results are ready. query_job = client.get_job( - query_job.job_id, location=location) # API request - fetches job - print('Job {} is currently in state {}'.format( - query_job.job_id, query_job.state)) + query_job.job_id, location=location + ) # API request - fetches job + print("Job {} is currently in state {}".format(query_job.job_id, query_job.state)) # [END bigquery_query_batch] def test_client_query_relax_column(client, to_delete): - dataset_id = 'query_relax_column_{}'.format(_millis()) + dataset_id = "query_relax_column_{}".format(_millis()) dataset_ref = client.dataset(dataset_id) dataset = bigquery.Dataset(dataset_ref) - dataset.location = 'US' + dataset.location = "US" dataset = client.create_dataset(dataset) to_delete.append(dataset) - table_ref = dataset_ref.table('my_table') + table_ref = dataset_ref.table("my_table") schema = [ - bigquery.SchemaField('full_name', 'STRING', mode='REQUIRED'), - bigquery.SchemaField('age', 'INTEGER', mode='REQUIRED'), + bigquery.SchemaField("full_name", "STRING", mode="REQUIRED"), + bigquery.SchemaField("age", "INTEGER", mode="REQUIRED"), ] - table = client.create_table( - bigquery.Table(table_ref, schema=schema)) + table = client.create_table(bigquery.Table(table_ref, schema=schema)) # [START bigquery_relax_column_query_append] # from google.cloud import bigquery @@ -2332,20 +2394,18 @@ def test_client_query_relax_column(client, to_delete): # dataset_ref = client.dataset('my_dataset') # Retrieves the destination table and checks the number of required fields - table_id = 'my_table' + table_id = "my_table" table_ref = dataset_ref.table(table_id) table = client.get_table(table_ref) - original_required_fields = sum( - field.mode == 'REQUIRED' for field in table.schema) + original_required_fields = sum(field.mode == "REQUIRED" for field in table.schema) # In this example, the existing table has 2 required fields - print("{} fields in the schema are required.".format( - original_required_fields)) + print("{} fields in the schema are required.".format(original_required_fields)) # Configures the query to append the results to a destination table, # allowing field relaxation job_config = bigquery.QueryJobConfig() job_config.schema_update_options = [ - bigquery.SchemaUpdateOption.ALLOW_FIELD_RELAXATION, + bigquery.SchemaUpdateOption.ALLOW_FIELD_RELAXATION ] job_config.destination = table_ref job_config.write_disposition = bigquery.WriteDisposition.WRITE_APPEND @@ -2356,8 +2416,8 @@ def test_client_query_relax_column(client, to_delete): 'SELECT "Beyonce" as full_name;', # Location must match that of the dataset(s) referenced in the query # and of the destination table. - location='US', - job_config=job_config + location="US", + job_config=job_config, ) # API request - starts the query query_job.result() # Waits for the query to finish @@ -2365,29 +2425,27 @@ def test_client_query_relax_column(client, to_delete): # Checks the updated number of required fields table = client.get_table(table) - current_required_fields = sum( - field.mode == 'REQUIRED' for field in table.schema) - print("{} fields in the schema are now required.".format( - current_required_fields)) + current_required_fields = sum(field.mode == "REQUIRED" for field in table.schema) + print("{} fields in the schema are now required.".format(current_required_fields)) # [END bigquery_relax_column_query_append] assert original_required_fields - current_required_fields > 0 assert len(table.schema) == 2 - assert table.schema[1].mode == 'NULLABLE' + assert table.schema[1].mode == "NULLABLE" assert table.num_rows > 0 def test_client_query_add_column(client, to_delete): - dataset_id = 'query_add_column_{}'.format(_millis()) + dataset_id = "query_add_column_{}".format(_millis()) dataset_ref = client.dataset(dataset_id) dataset = bigquery.Dataset(dataset_ref) - dataset.location = 'US' + dataset.location = "US" dataset = client.create_dataset(dataset) to_delete.append(dataset) - table_ref = dataset_ref.table('my_table') + table_ref = dataset_ref.table("my_table") schema = [ - bigquery.SchemaField('full_name', 'STRING', mode='REQUIRED'), - bigquery.SchemaField('age', 'INTEGER', mode='REQUIRED'), + bigquery.SchemaField("full_name", "STRING", mode="REQUIRED"), + bigquery.SchemaField("age", "INTEGER", mode="REQUIRED"), ] table = client.create_table(bigquery.Table(table_ref, schema=schema)) @@ -2397,7 +2455,7 @@ def test_client_query_add_column(client, to_delete): # dataset_ref = client.dataset('my_dataset') # Retrieves the destination table and checks the length of the schema - table_id = 'my_table' + table_id = "my_table" table_ref = dataset_ref.table(table_id) table = client.get_table(table_ref) print("Table {} contains {} columns.".format(table_id, len(table.schema))) @@ -2406,7 +2464,7 @@ def test_client_query_add_column(client, to_delete): # allowing field addition job_config = bigquery.QueryJobConfig() job_config.schema_update_options = [ - bigquery.SchemaUpdateOption.ALLOW_FIELD_ADDITION, + bigquery.SchemaUpdateOption.ALLOW_FIELD_ADDITION ] job_config.destination = table_ref job_config.write_disposition = bigquery.WriteDisposition.WRITE_APPEND @@ -2418,8 +2476,8 @@ def test_client_query_add_column(client, to_delete): 'SELECT "Timmy" as full_name, 85 as age, "Blue" as favorite_color;', # Location must match that of the dataset(s) referenced in the query # and of the destination table. - location='US', - job_config=job_config + location="US", + job_config=job_config, ) # API request - starts the query query_job.result() # Waits for the query to finish @@ -2427,8 +2485,7 @@ def test_client_query_add_column(client, to_delete): # Checks the updated length of the schema table = client.get_table(table) - print("Table {} now contains {} columns.".format( - table_id, len(table.schema))) + print("Table {} now contains {} columns.".format(table_id, len(table.schema))) # [END bigquery_add_column_query_append] assert len(table.schema) == 3 assert table.num_rows > 0 @@ -2449,26 +2506,27 @@ def test_client_query_w_named_params(client, capsys): ORDER BY word_count DESC; """ query_params = [ - bigquery.ScalarQueryParameter('corpus', 'STRING', 'romeoandjuliet'), - bigquery.ScalarQueryParameter('min_word_count', 'INT64', 250) + bigquery.ScalarQueryParameter("corpus", "STRING", "romeoandjuliet"), + bigquery.ScalarQueryParameter("min_word_count", "INT64", 250), ] job_config = bigquery.QueryJobConfig() job_config.query_parameters = query_params query_job = client.query( query, # Location must match that of the dataset(s) referenced in the query. - location='US', - job_config=job_config) # API request - starts the query + location="US", + job_config=job_config, + ) # API request - starts the query # Print the results for row in query_job: - print('{}: \t{}'.format(row.word, row.word_count)) + print("{}: \t{}".format(row.word, row.word_count)) - assert query_job.state == 'DONE' + assert query_job.state == "DONE" # [END bigquery_query_params_named] out, _ = capsys.readouterr() - assert 'the' in out + assert "the" in out def test_client_query_w_positional_params(client, capsys): @@ -2488,26 +2546,27 @@ def test_client_query_w_positional_params(client, capsys): # Set the name to None to use positional parameters. # Note that you cannot mix named and positional parameters. query_params = [ - bigquery.ScalarQueryParameter(None, 'STRING', 'romeoandjuliet'), - bigquery.ScalarQueryParameter(None, 'INT64', 250) + bigquery.ScalarQueryParameter(None, "STRING", "romeoandjuliet"), + bigquery.ScalarQueryParameter(None, "INT64", 250), ] job_config = bigquery.QueryJobConfig() job_config.query_parameters = query_params query_job = client.query( query, # Location must match that of the dataset(s) referenced in the query. - location='US', - job_config=job_config) # API request - starts the query + location="US", + job_config=job_config, + ) # API request - starts the query # Print the results for row in query_job: - print('{}: \t{}'.format(row.word, row.word_count)) + print("{}: \t{}".format(row.word, row.word_count)) - assert query_job.state == 'DONE' + assert query_job.state == "DONE" # [END bigquery_query_params_positional] out, _ = capsys.readouterr() - assert 'the' in out + assert "the" in out def test_client_query_w_timestamp_params(client, capsys): @@ -2520,30 +2579,32 @@ def test_client_query_w_timestamp_params(client, capsys): import datetime import pytz - query = 'SELECT TIMESTAMP_ADD(@ts_value, INTERVAL 1 HOUR);' + query = "SELECT TIMESTAMP_ADD(@ts_value, INTERVAL 1 HOUR);" query_params = [ bigquery.ScalarQueryParameter( - 'ts_value', - 'TIMESTAMP', - datetime.datetime(2016, 12, 7, 8, 0, tzinfo=pytz.UTC)) + "ts_value", + "TIMESTAMP", + datetime.datetime(2016, 12, 7, 8, 0, tzinfo=pytz.UTC), + ) ] job_config = bigquery.QueryJobConfig() job_config.query_parameters = query_params query_job = client.query( query, # Location must match that of the dataset(s) referenced in the query. - location='US', - job_config=job_config) # API request - starts the query + location="US", + job_config=job_config, + ) # API request - starts the query # Print the results for row in query_job: print(row) - assert query_job.state == 'DONE' + assert query_job.state == "DONE" # [END bigquery_query_params_timestamps] out, _ = capsys.readouterr() - assert '2016, 12, 7, 9, 0' in out + assert "2016, 12, 7, 9, 0" in out def test_client_query_w_array_params(client, capsys): @@ -2562,27 +2623,27 @@ def test_client_query_w_array_params(client, capsys): LIMIT 10; """ query_params = [ - bigquery.ScalarQueryParameter('gender', 'STRING', 'M'), - bigquery.ArrayQueryParameter( - 'states', 'STRING', ['WA', 'WI', 'WV', 'WY']) + bigquery.ScalarQueryParameter("gender", "STRING", "M"), + bigquery.ArrayQueryParameter("states", "STRING", ["WA", "WI", "WV", "WY"]), ] job_config = bigquery.QueryJobConfig() job_config.query_parameters = query_params query_job = client.query( query, # Location must match that of the dataset(s) referenced in the query. - location='US', - job_config=job_config) # API request - starts the query + location="US", + job_config=job_config, + ) # API request - starts the query # Print the results for row in query_job: - print('{}: \t{}'.format(row.name, row.count)) + print("{}: \t{}".format(row.name, row.count)) - assert query_job.state == 'DONE' + assert query_job.state == "DONE" # [END bigquery_query_params_arrays] out, _ = capsys.readouterr() - assert 'James' in out + assert "James" in out def test_client_query_w_struct_params(client, capsys): @@ -2591,12 +2652,12 @@ def test_client_query_w_struct_params(client, capsys): # from google.cloud import bigquery # client = bigquery.Client() - query = 'SELECT @struct_value AS s;' + query = "SELECT @struct_value AS s;" query_params = [ bigquery.StructQueryParameter( - 'struct_value', - bigquery.ScalarQueryParameter('x', 'INT64', 1), - bigquery.ScalarQueryParameter('y', 'STRING', 'foo') + "struct_value", + bigquery.ScalarQueryParameter("x", "INT64", 1), + bigquery.ScalarQueryParameter("y", "STRING", "foo"), ) ] job_config = bigquery.QueryJobConfig() @@ -2604,19 +2665,20 @@ def test_client_query_w_struct_params(client, capsys): query_job = client.query( query, # Location must match that of the dataset(s) referenced in the query. - location='US', - job_config=job_config) # API request - starts the query + location="US", + job_config=job_config, + ) # API request - starts the query # Print the results for row in query_job: print(row.s) - assert query_job.state == 'DONE' + assert query_job.state == "DONE" # [END bigquery_query_params_structs] out, _ = capsys.readouterr() - assert '1' in out - assert 'foo' in out + assert "1" in out + assert "foo" in out def test_client_query_dry_run(client): @@ -2630,20 +2692,22 @@ def test_client_query_dry_run(client): job_config.dry_run = True job_config.use_query_cache = False query_job = client.query( - ('SELECT name, COUNT(*) as name_count ' - 'FROM `bigquery-public-data.usa_names.usa_1910_2013` ' - "WHERE state = 'WA' " - 'GROUP BY name'), + ( + "SELECT name, COUNT(*) as name_count " + "FROM `bigquery-public-data.usa_names.usa_1910_2013` " + "WHERE state = 'WA' " + "GROUP BY name" + ), # Location must match that of the dataset(s) referenced in the query. - location='US', - job_config=job_config) # API request + location="US", + job_config=job_config, + ) # API request # A dry run query completes immediately. - assert query_job.state == 'DONE' + assert query_job.state == "DONE" assert query_job.dry_run - print("This query will process {} bytes.".format( - query_job.total_bytes_processed)) + print("This query will process {} bytes.".format(query_job.total_bytes_processed)) # [END bigquery_query_dry_run] assert query_job.total_bytes_processed > 0 @@ -2664,8 +2728,9 @@ def test_query_no_cache(client): query_job = client.query( sql, # Location must match that of the dataset(s) referenced in the query. - location='US', - job_config=job_config) # API request + location="US", + job_config=job_config, + ) # API request # Print the results. for row in query_job: # API request - fetches results @@ -2679,16 +2744,16 @@ def test_query_external_gcs_temporary_table(client): # client = bigquery.Client() # Configure the external data source and query job - external_config = bigquery.ExternalConfig('CSV') + external_config = bigquery.ExternalConfig("CSV") external_config.source_uris = [ - 'gs://cloud-samples-data/bigquery/us-states/us-states.csv', + "gs://cloud-samples-data/bigquery/us-states/us-states.csv" ] external_config.schema = [ - bigquery.SchemaField('name', 'STRING'), - bigquery.SchemaField('post_abbr', 'STRING') + bigquery.SchemaField("name", "STRING"), + bigquery.SchemaField("post_abbr", "STRING"), ] external_config.options.skip_leading_rows = 1 # optionally skip header row - table_id = 'us_states' + table_id = "us_states" job_config = bigquery.QueryJobConfig() job_config.table_definitions = {table_id: external_config} @@ -2698,14 +2763,13 @@ def test_query_external_gcs_temporary_table(client): query_job = client.query(sql, job_config=job_config) # API request w_states = list(query_job) # Waits for query to finish - print('There are {} states with names starting with W.'.format( - len(w_states))) + print("There are {} states with names starting with W.".format(len(w_states))) # [END bigquery_query_external_gcs_temp] assert len(w_states) == 4 def test_query_external_gcs_permanent_table(client, to_delete): - dataset_id = 'query_external_gcs_{}'.format(_millis()) + dataset_id = "query_external_gcs_{}".format(_millis()) dataset = bigquery.Dataset(client.dataset(dataset_id)) client.create_dataset(dataset) to_delete.append(dataset) @@ -2717,15 +2781,15 @@ def test_query_external_gcs_permanent_table(client, to_delete): # Configure the external data source dataset_ref = client.dataset(dataset_id) - table_id = 'us_states' + table_id = "us_states" schema = [ - bigquery.SchemaField('name', 'STRING'), - bigquery.SchemaField('post_abbr', 'STRING') + bigquery.SchemaField("name", "STRING"), + bigquery.SchemaField("post_abbr", "STRING"), ] table = bigquery.Table(dataset_ref.table(table_id), schema=schema) - external_config = bigquery.ExternalConfig('CSV') + external_config = bigquery.ExternalConfig("CSV") external_config.source_uris = [ - 'gs://cloud-samples-data/bigquery/us-states/us-states.csv', + "gs://cloud-samples-data/bigquery/us-states/us-states.csv" ] external_config.options.skip_leading_rows = 1 # optionally skip header row table.external_data_configuration = external_config @@ -2734,14 +2798,12 @@ def test_query_external_gcs_permanent_table(client, to_delete): table = client.create_table(table) # API request # Example query to find states starting with 'W' - sql = 'SELECT * FROM `{}.{}` WHERE name LIKE "W%"'.format( - dataset_id, table_id) + sql = 'SELECT * FROM `{}.{}` WHERE name LIKE "W%"'.format(dataset_id, table_id) query_job = client.query(sql) # API request w_states = list(query_job) # Waits for query to finish - print('There are {} states with names starting with W.'.format( - len(w_states))) + print("There are {} states with names starting with W.".format(len(w_states))) # [END bigquery_query_external_gcs_perm] assert len(w_states) == 4 @@ -2750,31 +2812,35 @@ def test_query_external_sheets_temporary_table(client): # [START bigquery_query_external_sheets_temp] # [START bigquery_auth_drive_scope] import google.auth + # from google.cloud import bigquery # Create credentials with Drive & BigQuery API scopes # Both APIs must be enabled for your project before running this code - credentials, project = google.auth.default(scopes=[ - 'https://www.googleapis.com/auth/drive', - 'https://www.googleapis.com/auth/bigquery', - ]) + credentials, project = google.auth.default( + scopes=[ + "https://www.googleapis.com/auth/drive", + "https://www.googleapis.com/auth/bigquery", + ] + ) client = bigquery.Client(credentials=credentials, project=project) # [END bigquery_auth_drive_scope] # Configure the external data source and query job - external_config = bigquery.ExternalConfig('GOOGLE_SHEETS') + external_config = bigquery.ExternalConfig("GOOGLE_SHEETS") # Use a shareable link or grant viewing access to the email address you # used to authenticate with BigQuery (this example Sheet is public) sheet_url = ( - 'https://docs.google.com/spreadsheets' - '/d/1i_QCL-7HcSyUZmIbP9E6lO_T5u3HnpLe7dnpHaijg_E/edit?usp=sharing') + "https://docs.google.com/spreadsheets" + "/d/1i_QCL-7HcSyUZmIbP9E6lO_T5u3HnpLe7dnpHaijg_E/edit?usp=sharing" + ) external_config.source_uris = [sheet_url] external_config.schema = [ - bigquery.SchemaField('name', 'STRING'), - bigquery.SchemaField('post_abbr', 'STRING') + bigquery.SchemaField("name", "STRING"), + bigquery.SchemaField("post_abbr", "STRING"), ] external_config.options.skip_leading_rows = 1 # optionally skip header row - table_id = 'us_states' + table_id = "us_states" job_config = bigquery.QueryJobConfig() job_config.table_definitions = {table_id: external_config} @@ -2784,45 +2850,48 @@ def test_query_external_sheets_temporary_table(client): query_job = client.query(sql, job_config=job_config) # API request w_states = list(query_job) # Waits for query to finish - print('There are {} states with names starting with W.'.format( - len(w_states))) + print("There are {} states with names starting with W.".format(len(w_states))) # [END bigquery_query_external_sheets_temp] assert len(w_states) == 4 def test_query_external_sheets_permanent_table(client, to_delete): - dataset_id = 'query_external_sheets_{}'.format(_millis()) + dataset_id = "query_external_sheets_{}".format(_millis()) dataset = bigquery.Dataset(client.dataset(dataset_id)) client.create_dataset(dataset) to_delete.append(dataset) # [START bigquery_query_external_sheets_perm] import google.auth + # from google.cloud import bigquery # dataset_id = 'my_dataset' # Create credentials with Drive & BigQuery API scopes # Both APIs must be enabled for your project before running this code - credentials, project = google.auth.default(scopes=[ - 'https://www.googleapis.com/auth/drive', - 'https://www.googleapis.com/auth/bigquery', - ]) + credentials, project = google.auth.default( + scopes=[ + "https://www.googleapis.com/auth/drive", + "https://www.googleapis.com/auth/bigquery", + ] + ) client = bigquery.Client(credentials=credentials, project=project) # Configure the external data source dataset_ref = client.dataset(dataset_id) - table_id = 'us_states' + table_id = "us_states" schema = [ - bigquery.SchemaField('name', 'STRING'), - bigquery.SchemaField('post_abbr', 'STRING') + bigquery.SchemaField("name", "STRING"), + bigquery.SchemaField("post_abbr", "STRING"), ] table = bigquery.Table(dataset_ref.table(table_id), schema=schema) - external_config = bigquery.ExternalConfig('GOOGLE_SHEETS') + external_config = bigquery.ExternalConfig("GOOGLE_SHEETS") # Use a shareable link or grant viewing access to the email address you # used to authenticate with BigQuery (this example Sheet is public) sheet_url = ( - 'https://docs.google.com/spreadsheets' - '/d/1i_QCL-7HcSyUZmIbP9E6lO_T5u3HnpLe7dnpHaijg_E/edit?usp=sharing') + "https://docs.google.com/spreadsheets" + "/d/1i_QCL-7HcSyUZmIbP9E6lO_T5u3HnpLe7dnpHaijg_E/edit?usp=sharing" + ) external_config.source_uris = [sheet_url] external_config.options.skip_leading_rows = 1 # optionally skip header row table.external_data_configuration = external_config @@ -2831,14 +2900,12 @@ def test_query_external_sheets_permanent_table(client, to_delete): table = client.create_table(table) # API request # Example query to find states starting with 'W' - sql = 'SELECT * FROM `{}.{}` WHERE name LIKE "W%"'.format( - dataset_id, table_id) + sql = 'SELECT * FROM `{}.{}` WHERE name LIKE "W%"'.format(dataset_id, table_id) query_job = client.query(sql) # API request w_states = list(query_job) # Waits for query to finish - print('There are {} states with names starting with W.'.format( - len(w_states))) + print("There are {} states with names starting with W.".format(len(w_states))) # [END bigquery_query_external_sheets_perm] assert len(w_states) == 4 @@ -2846,8 +2913,8 @@ def test_query_external_sheets_permanent_table(client, to_delete): def test_ddl_create_view(client, to_delete, capsys): """Create a view via a DDL query.""" project = client.project - dataset_id = 'ddl_view_{}'.format(_millis()) - table_id = 'new_view' + dataset_id = "ddl_view_{}".format(_millis()) + table_id = "new_view" dataset = bigquery.Dataset(client.dataset(dataset_id)) client.create_dataset(dataset) to_delete.append(dataset) @@ -2871,20 +2938,24 @@ def test_ddl_create_view(client, to_delete, capsys): AS SELECT name, state, year, number FROM `bigquery-public-data.usa_names.usa_1910_current` WHERE state LIKE 'W%' - """.format(project, dataset_id, table_id) + """.format( + project, dataset_id, table_id + ) job = client.query(sql) # API request. job.result() # Waits for the query to finish. - print('Created new view "{}.{}.{}".'.format( - job.destination.project, - job.destination.dataset_id, - job.destination.table_id)) + print( + 'Created new view "{}.{}.{}".'.format( + job.destination.project, + job.destination.dataset_id, + job.destination.table_id, + ) + ) # [END bigquery_ddl_create_view] out, _ = capsys.readouterr() - assert 'Created new view "{}.{}.{}".'.format( - project, dataset_id, table_id) in out + assert 'Created new view "{}.{}.{}".'.format(project, dataset_id, table_id) in out # Test that listing query result rows succeeds so that generic query # processing tools work with DDL statements. @@ -2927,12 +2998,12 @@ def test_client_list_jobs(client): # Use state_filter to filter by job state. print("Jobs currently running:") - for job in client.list_jobs(state_filter='RUNNING'): + for job in client.list_jobs(state_filter="RUNNING"): print(job.job_id) # [END bigquery_list_jobs] -@pytest.mark.skipif(pandas is None, reason='Requires `pandas`') +@pytest.mark.skipif(pandas is None, reason="Requires `pandas`") def test_query_results_as_dataframe(client): # [START bigquery_query_results_dataframe] # from google.cloud import bigquery @@ -2950,30 +3021,30 @@ def test_query_results_as_dataframe(client): # [END bigquery_query_results_dataframe] assert isinstance(df, pandas.DataFrame) assert len(list(df)) == 2 # verify the number of columns - assert len(df) == 10 # verify the number of rows + assert len(df) == 10 # verify the number of rows -@pytest.mark.skipif(pandas is None, reason='Requires `pandas`') +@pytest.mark.skipif(pandas is None, reason="Requires `pandas`") def test_list_rows_as_dataframe(client): # [START bigquery_list_rows_dataframe] # from google.cloud import bigquery # client = bigquery.Client() - dataset_ref = client.dataset('samples', project='bigquery-public-data') - table_ref = dataset_ref.table('shakespeare') + dataset_ref = client.dataset("samples", project="bigquery-public-data") + table_ref = dataset_ref.table("shakespeare") table = client.get_table(table_ref) df = client.list_rows(table).to_dataframe() # [END bigquery_list_rows_dataframe] assert isinstance(df, pandas.DataFrame) assert len(list(df)) == len(table.schema) # verify the number of columns - assert len(df) == table.num_rows # verify the number of rows + assert len(df) == table.num_rows # verify the number of rows -@pytest.mark.skipif(pandas is None, reason='Requires `pandas`') -@pytest.mark.skipif(pyarrow is None, reason='Requires `pyarrow`') +@pytest.mark.skipif(pandas is None, reason="Requires `pandas`") +@pytest.mark.skipif(pyarrow is None, reason="Requires `pyarrow`") def test_load_table_from_dataframe(client, to_delete): - dataset_id = 'load_table_from_dataframe_{}'.format(_millis()) + dataset_id = "load_table_from_dataframe_{}".format(_millis()) dataset = bigquery.Dataset(client.dataset(dataset_id)) client.create_dataset(dataset) to_delete.append(dataset) @@ -2985,34 +3056,30 @@ def test_load_table_from_dataframe(client, to_delete): # dataset_id = 'my_dataset' dataset_ref = client.dataset(dataset_id) - table_ref = dataset_ref.table('monty_python') + table_ref = dataset_ref.table("monty_python") records = [ - {'title': 'The Meaning of Life', 'release_year': 1983}, - {'title': 'Monty Python and the Holy Grail', 'release_year': 1975}, - {'title': 'Life of Brian', 'release_year': 1979}, - { - 'title': 'And Now for Something Completely Different', - 'release_year': 1971 - }, + {"title": "The Meaning of Life", "release_year": 1983}, + {"title": "Monty Python and the Holy Grail", "release_year": 1975}, + {"title": "Life of Brian", "release_year": 1979}, + {"title": "And Now for Something Completely Different", "release_year": 1971}, ] # Optionally set explicit indices. # If indices are not specified, a column will be created for the default # indices created by pandas. - index = ['Q24980', 'Q25043', 'Q24953', 'Q16403'] - dataframe = pandas.DataFrame( - records, index=pandas.Index(index, name='wikidata_id')) + index = ["Q24980", "Q25043", "Q24953", "Q16403"] + dataframe = pandas.DataFrame(records, index=pandas.Index(index, name="wikidata_id")) - job = client.load_table_from_dataframe(dataframe, table_ref, location='US') + job = client.load_table_from_dataframe(dataframe, table_ref, location="US") job.result() # Waits for table load to complete. - assert job.state == 'DONE' + assert job.state == "DONE" table = client.get_table(table_ref) assert table.num_rows == 4 # [END bigquery_load_table_dataframe] column_names = [field.name for field in table.schema] - assert sorted(column_names) == ['release_year', 'title', 'wikidata_id'] + assert sorted(column_names) == ["release_year", "title", "wikidata_id"] -if __name__ == '__main__': +if __name__ == "__main__": pytest.main() diff --git a/packages/google-cloud-bigquery/google/__init__.py b/packages/google-cloud-bigquery/google/__init__.py index 9ee9bf4342ab..0e1bc5131ba6 100644 --- a/packages/google-cloud-bigquery/google/__init__.py +++ b/packages/google-cloud-bigquery/google/__init__.py @@ -14,7 +14,9 @@ try: import pkg_resources + pkg_resources.declare_namespace(__name__) except ImportError: import pkgutil + __path__ = pkgutil.extend_path(__path__, __name__) diff --git a/packages/google-cloud-bigquery/google/cloud/__init__.py b/packages/google-cloud-bigquery/google/cloud/__init__.py index 9ee9bf4342ab..0e1bc5131ba6 100644 --- a/packages/google-cloud-bigquery/google/cloud/__init__.py +++ b/packages/google-cloud-bigquery/google/cloud/__init__.py @@ -14,7 +14,9 @@ try: import pkg_resources + pkg_resources.declare_namespace(__name__) except ImportError: import pkgutil + __path__ = pkgutil.extend_path(__path__, __name__) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/__init__.py b/packages/google-cloud-bigquery/google/cloud/bigquery/__init__.py index 65392214eeda..c3865d511b6c 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/__init__.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/__init__.py @@ -29,7 +29,8 @@ from pkg_resources import get_distribution -__version__ = get_distribution('google-cloud-bigquery').version + +__version__ = get_distribution("google-cloud-bigquery").version from google.cloud.bigquery.client import Client from google.cloud.bigquery.dataset import AccessEntry @@ -73,52 +74,52 @@ from google.cloud.bigquery.table import TimePartitioning __all__ = [ - '__version__', - 'Client', + "__version__", + "Client", # Queries - 'QueryJob', - 'QueryJobConfig', - 'ArrayQueryParameter', - 'ScalarQueryParameter', - 'StructQueryParameter', + "QueryJob", + "QueryJobConfig", + "ArrayQueryParameter", + "ScalarQueryParameter", + "StructQueryParameter", # Datasets - 'Dataset', - 'DatasetReference', - 'AccessEntry', + "Dataset", + "DatasetReference", + "AccessEntry", # Tables - 'EncryptionConfiguration', - 'Table', - 'TableReference', - 'Row', - 'CopyJob', - 'CopyJobConfig', - 'ExtractJob', - 'ExtractJobConfig', - 'LoadJob', - 'LoadJobConfig', - 'UnknownJob', - 'TimePartitioningType', - 'TimePartitioning', + "EncryptionConfiguration", + "Table", + "TableReference", + "Row", + "CopyJob", + "CopyJobConfig", + "ExtractJob", + "ExtractJobConfig", + "LoadJob", + "LoadJobConfig", + "UnknownJob", + "TimePartitioningType", + "TimePartitioning", # Shared helpers - 'SchemaField', - 'UDFResource', - 'ExternalConfig', - 'BigtableOptions', - 'BigtableColumnFamily', - 'BigtableColumn', - 'CSVOptions', - 'GoogleSheetsOptions', - 'DEFAULT_RETRY', + "SchemaField", + "UDFResource", + "ExternalConfig", + "BigtableOptions", + "BigtableColumnFamily", + "BigtableColumn", + "CSVOptions", + "GoogleSheetsOptions", + "DEFAULT_RETRY", # Enum Constants - 'Compression', - 'CreateDisposition', - 'DestinationFormat', - 'ExternalSourceFormat', - 'Encoding', - 'QueryPriority', - 'SchemaUpdateOption', - 'SourceFormat', - 'WriteDisposition' + "Compression", + "CreateDisposition", + "DestinationFormat", + "ExternalSourceFormat", + "Encoding", + "QueryPriority", + "SchemaUpdateOption", + "SourceFormat", + "WriteDisposition", ] @@ -127,4 +128,5 @@ def load_ipython_extension(ipython): from google.cloud.bigquery.magics import _cell_magic ipython.register_magic_function( - _cell_magic, magic_kind='cell', magic_name='bigquery') + _cell_magic, magic_kind="cell", magic_name="bigquery" + ) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py b/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py index c0a29b427b3b..6990fb3eaa69 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py @@ -25,14 +25,14 @@ from google.cloud._helpers import _RFC3339_NO_FRACTION from google.cloud._helpers import _to_bytes -_RFC3339_MICROS_NO_ZULU = '%Y-%m-%dT%H:%M:%S.%f' -_TIMEONLY_WO_MICROS = '%H:%M:%S' -_TIMEONLY_W_MICROS = '%H:%M:%S.%f' +_RFC3339_MICROS_NO_ZULU = "%Y-%m-%dT%H:%M:%S.%f" +_TIMEONLY_WO_MICROS = "%H:%M:%S" +_TIMEONLY_W_MICROS = "%H:%M:%S.%f" def _not_null(value, field): """Check whether 'value' should be coerced to 'field' type.""" - return value is not None or field.mode != 'NULLABLE' + return value is not None or field.mode != "NULLABLE" def _int_from_json(value, field): @@ -56,7 +56,7 @@ def _decimal_from_json(value, field): def _bool_from_json(value, field): """Coerce 'value' to a bool, if set or not nullable.""" if _not_null(value, field): - return value.lower() in ['t', 'true', '1'] + return value.lower() in ["t", "true", "1"] def _string_from_json(value, _): @@ -93,19 +93,21 @@ def _timestamp_query_param_from_json(value, field): # Canonical formats for timestamps in BigQuery are flexible. See: # g.co/cloud/bigquery/docs/reference/standard-sql/data-types#timestamp-type # The separator between the date and time can be 'T' or ' '. - value = value.replace(' ', 'T', 1) + value = value.replace(" ", "T", 1) # The UTC timezone may be formatted as Z or +00:00. - value = value.replace('Z', '') - value = value.replace('+00:00', '') + value = value.replace("Z", "") + value = value.replace("+00:00", "") - if '.' in value: + if "." in value: # YYYY-MM-DDTHH:MM:SS.ffffff - return datetime.datetime.strptime( - value, _RFC3339_MICROS_NO_ZULU).replace(tzinfo=UTC) + return datetime.datetime.strptime(value, _RFC3339_MICROS_NO_ZULU).replace( + tzinfo=UTC + ) else: # YYYY-MM-DDTHH:MM:SS - return datetime.datetime.strptime( - value, _RFC3339_NO_FRACTION).replace(tzinfo=UTC) + return datetime.datetime.strptime(value, _RFC3339_NO_FRACTION).replace( + tzinfo=UTC + ) else: return None @@ -123,7 +125,7 @@ def _datetime_from_json(value, field): :data:`None`). """ if _not_null(value, field): - if '.' in value: + if "." in value: # YYYY-MM-DDTHH:MM:SS.ffffff return datetime.datetime.strptime(value, _RFC3339_MICROS_NO_ZULU) else: @@ -156,37 +158,37 @@ def _record_from_json(value, field): """Coerce 'value' to a mapping, if set or not nullable.""" if _not_null(value, field): record = {} - record_iter = zip(field.fields, value['f']) + record_iter = zip(field.fields, value["f"]) for subfield, cell in record_iter: converter = _CELLDATA_FROM_JSON[subfield.field_type] - if subfield.mode == 'REPEATED': - value = [converter(item['v'], subfield) for item in cell['v']] + if subfield.mode == "REPEATED": + value = [converter(item["v"], subfield) for item in cell["v"]] else: - value = converter(cell['v'], subfield) + value = converter(cell["v"], subfield) record[subfield.name] = value return record _CELLDATA_FROM_JSON = { - 'INTEGER': _int_from_json, - 'INT64': _int_from_json, - 'FLOAT': _float_from_json, - 'FLOAT64': _float_from_json, - 'NUMERIC': _decimal_from_json, - 'BOOLEAN': _bool_from_json, - 'BOOL': _bool_from_json, - 'STRING': _string_from_json, - 'GEOGRAPHY': _string_from_json, - 'BYTES': _bytes_from_json, - 'TIMESTAMP': _timestamp_from_json, - 'DATETIME': _datetime_from_json, - 'DATE': _date_from_json, - 'TIME': _time_from_json, - 'RECORD': _record_from_json, + "INTEGER": _int_from_json, + "INT64": _int_from_json, + "FLOAT": _float_from_json, + "FLOAT64": _float_from_json, + "NUMERIC": _decimal_from_json, + "BOOLEAN": _bool_from_json, + "BOOL": _bool_from_json, + "STRING": _string_from_json, + "GEOGRAPHY": _string_from_json, + "BYTES": _bytes_from_json, + "TIMESTAMP": _timestamp_from_json, + "DATETIME": _datetime_from_json, + "DATE": _date_from_json, + "TIME": _time_from_json, + "RECORD": _record_from_json, } _QUERY_PARAMS_FROM_JSON = dict(_CELLDATA_FROM_JSON) -_QUERY_PARAMS_FROM_JSON['TIMESTAMP'] = _timestamp_query_param_from_json +_QUERY_PARAMS_FROM_JSON["TIMESTAMP"] = _timestamp_query_param_from_json def _field_to_index_mapping(schema): @@ -210,13 +212,12 @@ def _row_tuple_from_json(row, schema): :returns: A tuple of data converted to native types. """ row_data = [] - for field, cell in zip(schema, row['f']): + for field, cell in zip(schema, row["f"]): converter = _CELLDATA_FROM_JSON[field.field_type] - if field.mode == 'REPEATED': - row_data.append([converter(item['v'], field) - for item in cell['v']]) + if field.mode == "REPEATED": + row_data.append([converter(item["v"], field) for item in cell["v"]]) else: - row_data.append(converter(cell['v'], field)) + row_data.append(converter(cell["v"], field)) return tuple(row_data) @@ -226,8 +227,7 @@ def _rows_from_json(values, schema): from google.cloud.bigquery import Row field_to_index = _field_to_index_mapping(schema) - return [Row(_row_tuple_from_json(r, schema), field_to_index) - for r in values] + return [Row(_row_tuple_from_json(r, schema), field_to_index) for r in values] def _int_to_json(value): @@ -252,14 +252,14 @@ def _decimal_to_json(value): def _bool_to_json(value): """Coerce 'value' to an JSON-compatible representation.""" if isinstance(value, bool): - value = 'true' if value else 'false' + value = "true" if value else "false" return value def _bytes_to_json(value): """Coerce 'value' to an JSON-compatible representation.""" if isinstance(value, bytes): - value = base64.standard_b64encode(value).decode('ascii') + value = base64.standard_b64encode(value).decode("ascii") return value @@ -272,8 +272,7 @@ def _timestamp_to_json_parameter(value): if value.tzinfo not in (None, UTC): # Convert to UTC and remove the time zone info. value = value.replace(tzinfo=None) - value.utcoffset() - value = '%s %s+00:00' % ( - value.date().isoformat(), value.time().isoformat()) + value = "%s %s+00:00" % (value.date().isoformat(), value.time().isoformat()) return value @@ -310,30 +309,30 @@ def _time_to_json(value): # Converters used for scalar values marshalled as row data. _SCALAR_VALUE_TO_JSON_ROW = { - 'INTEGER': _int_to_json, - 'INT64': _int_to_json, - 'FLOAT': _float_to_json, - 'FLOAT64': _float_to_json, - 'NUMERIC': _decimal_to_json, - 'BOOLEAN': _bool_to_json, - 'BOOL': _bool_to_json, - 'BYTES': _bytes_to_json, - 'TIMESTAMP': _timestamp_to_json_row, - 'DATETIME': _datetime_to_json, - 'DATE': _date_to_json, - 'TIME': _time_to_json, + "INTEGER": _int_to_json, + "INT64": _int_to_json, + "FLOAT": _float_to_json, + "FLOAT64": _float_to_json, + "NUMERIC": _decimal_to_json, + "BOOLEAN": _bool_to_json, + "BOOL": _bool_to_json, + "BYTES": _bytes_to_json, + "TIMESTAMP": _timestamp_to_json_row, + "DATETIME": _datetime_to_json, + "DATE": _date_to_json, + "TIME": _time_to_json, } # Converters used for scalar values marshalled as query parameters. _SCALAR_VALUE_TO_JSON_PARAM = _SCALAR_VALUE_TO_JSON_ROW.copy() -_SCALAR_VALUE_TO_JSON_PARAM['TIMESTAMP'] = _timestamp_to_json_parameter +_SCALAR_VALUE_TO_JSON_PARAM["TIMESTAMP"] = _timestamp_to_json_parameter def _snake_to_camel_case(value): """Convert snake case string to camel case.""" - words = value.split('_') - return words[0] + ''.join(map(str.capitalize, words[1:])) + words = value.split("_") + return words[0] + "".join(map(str.capitalize, words[1:])) def _get_sub_prop(container, keys, default=None): diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/_http.py b/packages/google-cloud-bigquery/google/cloud/bigquery/_http.py index c2698cd80bc2..1dd7524542a5 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/_http.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/_http.py @@ -29,15 +29,13 @@ class Connection(_http.JSONConnection): :param client: The client that owns the current connection. """ - API_BASE_URL = 'https://www.googleapis.com' + API_BASE_URL = "https://www.googleapis.com" """The base of the API call URL.""" - API_VERSION = 'v2' + API_VERSION = "v2" """The version of the API, used in building the API call's URL.""" - API_URL_TEMPLATE = '{api_base_url}/bigquery/{api_version}{path}' + API_URL_TEMPLATE = "{api_base_url}/bigquery/{api_version}{path}" """A template for the URL of a particular API call.""" - _EXTRA_HEADERS = { - _http.CLIENT_INFO_HEADER: _CLIENT_INFO, - } + _EXTRA_HEADERS = {_http.CLIENT_INFO_HEADER: _CLIENT_INFO} diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py index 8b18da22c59b..12c0b57ad641 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py @@ -58,14 +58,15 @@ _MAX_MULTIPART_SIZE = 5 * 1024 * 1024 _DEFAULT_NUM_RETRIES = 6 _BASE_UPLOAD_TEMPLATE = ( - u'https://www.googleapis.com/upload/bigquery/v2/projects/' - u'{project}/jobs?uploadType=') -_MULTIPART_URL_TEMPLATE = _BASE_UPLOAD_TEMPLATE + u'multipart' -_RESUMABLE_URL_TEMPLATE = _BASE_UPLOAD_TEMPLATE + u'resumable' -_GENERIC_CONTENT_TYPE = u'*/*' + u"https://www.googleapis.com/upload/bigquery/v2/projects/" + u"{project}/jobs?uploadType=" +) +_MULTIPART_URL_TEMPLATE = _BASE_UPLOAD_TEMPLATE + u"multipart" +_RESUMABLE_URL_TEMPLATE = _BASE_UPLOAD_TEMPLATE + u"resumable" +_GENERIC_CONTENT_TYPE = u"*/*" _READ_LESS_THAN_SIZE = ( - 'Size {:d} was specified but the file-like object only had ' - '{:d} bytes remaining.') + "Size {:d} was specified but the file-like object only had " "{:d} bytes remaining." +) class Project(object): @@ -80,6 +81,7 @@ class Project(object): :type friendly_name: str :param friendly_name: Display name of the project """ + def __init__(self, project_id, numeric_id, friendly_name): self.project_id = project_id self.numeric_id = numeric_id @@ -88,8 +90,7 @@ def __init__(self, project_id, numeric_id, friendly_name): @classmethod def from_api_repr(cls, resource): """Factory: construct an instance from a resource dict.""" - return cls( - resource['id'], resource['numericId'], resource['friendlyName']) + return cls(resource["id"], resource["numericId"], resource["friendlyName"]) class Client(ClientWithProject): @@ -124,15 +125,23 @@ class Client(ClientWithProject): to acquire default credentials. """ - SCOPE = ('https://www.googleapis.com/auth/bigquery', - 'https://www.googleapis.com/auth/cloud-platform') + SCOPE = ( + "https://www.googleapis.com/auth/bigquery", + "https://www.googleapis.com/auth/cloud-platform", + ) """The scopes required for authenticating as a BigQuery consumer.""" def __init__( - self, project=None, credentials=None, _http=None, - location=None, default_query_job_config=None): + self, + project=None, + credentials=None, + _http=None, + location=None, + default_query_job_config=None, + ): super(Client, self).__init__( - project=project, credentials=credentials, _http=_http) + project=project, credentials=credentials, _http=_http + ) self._connection = Connection(self) self._location = location self._default_query_job_config = default_query_job_config @@ -167,12 +176,11 @@ def get_service_account_email(self, project=None): """ if project is None: project = self.project - path = '/projects/%s/serviceAccount' % (project,) - api_response = self._connection.api_request(method='GET', path=path) - return api_response['email'] + path = "/projects/%s/serviceAccount" % (project,) + api_response = self._connection.api_request(method="GET", path=path) + return api_response["email"] - def list_projects(self, max_results=None, page_token=None, - retry=DEFAULT_RETRY): + def list_projects(self, max_results=None, page_token=None, retry=DEFAULT_RETRY): """List projects for the project associated with this client. See @@ -201,15 +209,22 @@ def list_projects(self, max_results=None, page_token=None, return page_iterator.HTTPIterator( client=self, api_request=functools.partial(self._call_api, retry), - path='/projects', + path="/projects", item_to_value=_item_to_project, - items_key='projects', + items_key="projects", page_token=page_token, - max_results=max_results) + max_results=max_results, + ) def list_datasets( - self, project=None, include_all=False, filter=None, - max_results=None, page_token=None, retry=DEFAULT_RETRY): + self, + project=None, + include_all=False, + filter=None, + max_results=None, + page_token=None, + retry=DEFAULT_RETRY, + ): """List datasets for the project associated with this client. See @@ -248,21 +263,22 @@ def list_datasets( if project is None: project = self.project if include_all: - extra_params['all'] = True + extra_params["all"] = True if filter: # TODO: consider supporting a dict of label -> value for filter, # and converting it into a string here. - extra_params['filter'] = filter - path = '/projects/%s/datasets' % (project,) + extra_params["filter"] = filter + path = "/projects/%s/datasets" % (project,) return page_iterator.HTTPIterator( client=self, api_request=functools.partial(self._call_api, retry), path=path, item_to_value=_item_to_dataset, - items_key='datasets', + items_key="datasets", page_token=page_token, max_results=max_results, - extra_params=extra_params) + extra_params=extra_params, + ) def dataset(self, dataset_id, project=None): """Construct a reference to a dataset. @@ -312,18 +328,18 @@ def create_dataset(self, dataset): """ if isinstance(dataset, str): dataset = DatasetReference.from_string( - dataset, default_project=self.project) + dataset, default_project=self.project + ) if isinstance(dataset, DatasetReference): dataset = Dataset(dataset) - path = '/projects/%s/datasets' % (dataset.project,) + path = "/projects/%s/datasets" % (dataset.project,) data = dataset.to_api_repr() - if data.get('location') is None and self.location is not None: - data['location'] = self.location + if data.get("location") is None and self.location is not None: + data["location"] = self.location - api_response = self._connection.api_request( - method='POST', path=path, data=data) + api_response = self._connection.api_request(method="POST", path=path, data=data) return Dataset.from_api_repr(api_response) @@ -349,15 +365,14 @@ def create_table(self, table): A new ``Table`` returned from the service. """ if isinstance(table, str): - table = TableReference.from_string( - table, default_project=self.project) + table = TableReference.from_string(table, default_project=self.project) if isinstance(table, TableReference): table = Table(table) - path = '/projects/%s/datasets/%s/tables' % ( - table.project, table.dataset_id) + path = "/projects/%s/datasets/%s/tables" % (table.project, table.dataset_id) api_response = self._connection.api_request( - method='POST', path=path, data=table.to_api_repr()) + method="POST", path=path, data=table.to_api_repr() + ) return Table.from_api_repr(api_response) def _call_api(self, retry, **kwargs): @@ -387,10 +402,10 @@ def get_dataset(self, dataset_ref, retry=DEFAULT_RETRY): """ if isinstance(dataset_ref, str): dataset_ref = DatasetReference.from_string( - dataset_ref, default_project=self.project) + dataset_ref, default_project=self.project + ) - api_response = self._call_api( - retry, method='GET', path=dataset_ref.path) + api_response = self._call_api(retry, method="GET", path=dataset_ref.path) return Dataset.from_api_repr(api_response) def get_table(self, table_ref, retry=DEFAULT_RETRY): @@ -414,9 +429,10 @@ def get_table(self, table_ref, retry=DEFAULT_RETRY): """ if isinstance(table_ref, str): table_ref = TableReference.from_string( - table_ref, default_project=self.project) + table_ref, default_project=self.project + ) - api_response = self._call_api(retry, method='GET', path=table_ref.path) + api_response = self._call_api(retry, method="GET", path=table_ref.path) return Table.from_api_repr(api_response) def update_dataset(self, dataset, fields, retry=DEFAULT_RETRY): @@ -447,15 +463,12 @@ def update_dataset(self, dataset, fields, retry=DEFAULT_RETRY): """ partial = dataset._build_resource(fields) if dataset.etag is not None: - headers = {'If-Match': dataset.etag} + headers = {"If-Match": dataset.etag} else: headers = None api_response = self._call_api( - retry, - method='PATCH', - path=dataset.path, - data=partial, - headers=headers) + retry, method="PATCH", path=dataset.path, data=partial, headers=headers + ) return Dataset.from_api_repr(api_response) def update_table(self, table, fields, retry=DEFAULT_RETRY): @@ -485,16 +498,17 @@ def update_table(self, table, fields, retry=DEFAULT_RETRY): """ partial = table._build_resource(fields) if table.etag is not None: - headers = {'If-Match': table.etag} + headers = {"If-Match": table.etag} else: headers = None api_response = self._call_api( - retry, - method='PATCH', path=table.path, data=partial, headers=headers) + retry, method="PATCH", path=table.path, data=partial, headers=headers + ) return Table.from_api_repr(api_response) - def list_tables(self, dataset, max_results=None, page_token=None, - retry=DEFAULT_RETRY): + def list_tables( + self, dataset, max_results=None, page_token=None, retry=DEFAULT_RETRY + ): """List tables in the dataset. See @@ -531,26 +545,26 @@ def list_tables(self, dataset, max_results=None, page_token=None, """ if isinstance(dataset, str): dataset = DatasetReference.from_string( - dataset, default_project=self.project) + dataset, default_project=self.project + ) if not isinstance(dataset, (Dataset, DatasetReference)): - raise TypeError( - 'dataset must be a Dataset, DatasetReference, or string') + raise TypeError("dataset must be a Dataset, DatasetReference, or string") - path = '%s/tables' % dataset.path + path = "%s/tables" % dataset.path result = page_iterator.HTTPIterator( client=self, api_request=functools.partial(self._call_api, retry), path=path, item_to_value=_item_to_table, - items_key='tables', + items_key="tables", page_token=page_token, - max_results=max_results) + max_results=max_results, + ) result.dataset = dataset return result - def delete_dataset(self, dataset, delete_contents=False, - retry=DEFAULT_RETRY): + def delete_dataset(self, dataset, delete_contents=False, retry=DEFAULT_RETRY): """Delete a dataset. See @@ -575,19 +589,17 @@ def delete_dataset(self, dataset, delete_contents=False, """ if isinstance(dataset, str): dataset = DatasetReference.from_string( - dataset, default_project=self.project) + dataset, default_project=self.project + ) if not isinstance(dataset, (Dataset, DatasetReference)): - raise TypeError('dataset must be a Dataset or a DatasetReference') + raise TypeError("dataset must be a Dataset or a DatasetReference") params = {} if delete_contents: - params['deleteContents'] = 'true' + params["deleteContents"] = "true" - self._call_api(retry, - method='DELETE', - path=dataset.path, - query_params=params) + self._call_api(retry, method="DELETE", path=dataset.path, query_params=params) def delete_table(self, table, retry=DEFAULT_RETRY): """Delete a table @@ -609,15 +621,15 @@ def delete_table(self, table, retry=DEFAULT_RETRY): (Optional) How to retry the RPC. """ if isinstance(table, str): - table = TableReference.from_string( - table, default_project=self.project) + table = TableReference.from_string(table, default_project=self.project) if not isinstance(table, (Table, TableReference)): - raise TypeError('table must be a Table or a TableReference') - self._call_api(retry, method='DELETE', path=table.path) + raise TypeError("table must be a Table or a TableReference") + self._call_api(retry, method="DELETE", path=table.path) def _get_query_results( - self, job_id, retry, project=None, timeout_ms=None, location=None): + self, job_id, retry, project=None, timeout_ms=None, location=None + ): """Get the query results object for a query job. Arguments: @@ -637,27 +649,28 @@ def _get_query_results( A new ``_QueryResults`` instance. """ - extra_params = {'maxResults': 0} + extra_params = {"maxResults": 0} if project is None: project = self.project if timeout_ms is not None: - extra_params['timeoutMs'] = timeout_ms + extra_params["timeoutMs"] = timeout_ms if location is None: location = self.location if location is not None: - extra_params['location'] = location + extra_params["location"] = location - path = '/projects/{}/queries/{}'.format(project, job_id) + path = "/projects/{}/queries/{}".format(project, job_id) # This call is typically made in a polling loop that checks whether the # job is complete (from QueryJob.done(), called ultimately from # QueryJob.result()). So we don't need to poll here. resource = self._call_api( - retry, method='GET', path=path, query_params=extra_params) + retry, method="GET", path=path, query_params=extra_params + ) return _QueryResults.from_api_repr(resource) def job_from_resource(self, resource): @@ -673,19 +686,18 @@ def job_from_resource(self, resource): or :class:`google.cloud.bigquery.job.QueryJob` :returns: the job instance, constructed via the resource """ - config = resource.get('configuration', {}) - if 'load' in config: + config = resource.get("configuration", {}) + if "load" in config: return job.LoadJob.from_api_repr(resource, self) - elif 'copy' in config: + elif "copy" in config: return job.CopyJob.from_api_repr(resource, self) - elif 'extract' in config: + elif "extract" in config: return job.ExtractJob.from_api_repr(resource, self) - elif 'query' in config: + elif "query" in config: return job.QueryJob.from_api_repr(resource, self) return job.UnknownJob.from_api_repr(resource, self) - def get_job( - self, job_id, project=None, location=None, retry=DEFAULT_RETRY): + def get_job(self, job_id, project=None, location=None, retry=DEFAULT_RETRY): """Fetch a job for the project associated with this client. See @@ -709,7 +721,7 @@ def get_job( google.cloud.bigquery.job.QueryJob]: Job instance, based on the resource returned by the API. """ - extra_params = {'projection': 'full'} + extra_params = {"projection": "full"} if project is None: project = self.project @@ -718,17 +730,17 @@ def get_job( location = self.location if location is not None: - extra_params['location'] = location + extra_params["location"] = location - path = '/projects/{}/jobs/{}'.format(project, job_id) + path = "/projects/{}/jobs/{}".format(project, job_id) resource = self._call_api( - retry, method='GET', path=path, query_params=extra_params) + retry, method="GET", path=path, query_params=extra_params + ) return self.job_from_resource(resource) - def cancel_job( - self, job_id, project=None, location=None, retry=DEFAULT_RETRY): + def cancel_job(self, job_id, project=None, location=None, retry=DEFAULT_RETRY): """Attempt to cancel a job from a job ID. See @@ -752,7 +764,7 @@ def cancel_job( google.cloud.bigquery.job.QueryJob]: Job instance, based on the resource returned by the API. """ - extra_params = {'projection': 'full'} + extra_params = {"projection": "full"} if project is None: project = self.project @@ -761,19 +773,27 @@ def cancel_job( location = self.location if location is not None: - extra_params['location'] = location + extra_params["location"] = location - path = '/projects/{}/jobs/{}/cancel'.format(project, job_id) + path = "/projects/{}/jobs/{}/cancel".format(project, job_id) resource = self._call_api( - retry, method='POST', path=path, query_params=extra_params) + retry, method="POST", path=path, query_params=extra_params + ) - return self.job_from_resource(resource['job']) + return self.job_from_resource(resource["job"]) def list_jobs( - self, project=None, max_results=None, page_token=None, - all_users=None, state_filter=None, retry=DEFAULT_RETRY, - min_creation_time=None, max_creation_time=None): + self, + project=None, + max_results=None, + page_token=None, + all_users=None, + state_filter=None, + retry=DEFAULT_RETRY, + min_creation_time=None, + max_creation_time=None, + ): """List jobs for the project associated with this client. See @@ -816,42 +836,47 @@ def list_jobs( Iterable of job instances. """ extra_params = { - 'allUsers': all_users, - 'stateFilter': state_filter, - 'minCreationTime': _str_or_none( - google.cloud._helpers._millis_from_datetime( - min_creation_time)), - 'maxCreationTime': _str_or_none( - google.cloud._helpers._millis_from_datetime( - max_creation_time)), - 'projection': 'full' + "allUsers": all_users, + "stateFilter": state_filter, + "minCreationTime": _str_or_none( + google.cloud._helpers._millis_from_datetime(min_creation_time) + ), + "maxCreationTime": _str_or_none( + google.cloud._helpers._millis_from_datetime(max_creation_time) + ), + "projection": "full", } - extra_params = {param: value for param, value in extra_params.items() - if value is not None} + extra_params = { + param: value for param, value in extra_params.items() if value is not None + } if project is None: project = self.project - path = '/projects/%s/jobs' % (project,) + path = "/projects/%s/jobs" % (project,) return page_iterator.HTTPIterator( client=self, api_request=functools.partial(self._call_api, retry), path=path, item_to_value=_item_to_job, - items_key='jobs', + items_key="jobs", page_token=page_token, max_results=max_results, - extra_params=extra_params) + extra_params=extra_params, + ) def load_table_from_uri( - self, source_uris, destination, - job_id=None, - job_id_prefix=None, - location=None, - project=None, - job_config=None, - retry=DEFAULT_RETRY): + self, + source_uris, + destination, + job_id=None, + job_id_prefix=None, + location=None, + project=None, + job_config=None, + retry=DEFAULT_RETRY, + ): """Starts a job for loading data into a table from CloudStorage. See @@ -905,19 +930,27 @@ def load_table_from_uri( if isinstance(destination, str): destination = TableReference.from_string( - destination, default_project=self.project) + destination, default_project=self.project + ) - load_job = job.LoadJob( - job_ref, source_uris, destination, self, job_config) + load_job = job.LoadJob(job_ref, source_uris, destination, self, job_config) load_job._begin(retry=retry) return load_job def load_table_from_file( - self, file_obj, destination, rewind=False, size=None, - num_retries=_DEFAULT_NUM_RETRIES, job_id=None, - job_id_prefix=None, location=None, project=None, - job_config=None): + self, + file_obj, + destination, + rewind=False, + size=None, + num_retries=_DEFAULT_NUM_RETRIES, + job_id=None, + job_id_prefix=None, + location=None, + project=None, + job_config=None, + ): """Upload the contents of this table from a file-like object. Similar to :meth:`load_table_from_uri`, this method creates, starts and @@ -976,7 +1009,8 @@ def load_table_from_file( if isinstance(destination, str): destination = TableReference.from_string( - destination, default_project=self.project) + destination, default_project=self.project + ) job_ref = job._JobReference(job_id, project=project, location=location) load_job = job.LoadJob(job_ref, None, destination, self, job_config) @@ -990,20 +1024,28 @@ def load_table_from_file( try: if size is None or size >= _MAX_MULTIPART_SIZE: response = self._do_resumable_upload( - file_obj, job_resource, num_retries) + file_obj, job_resource, num_retries + ) else: response = self._do_multipart_upload( - file_obj, job_resource, size, num_retries) + file_obj, job_resource, size, num_retries + ) except resumable_media.InvalidResponse as exc: raise exceptions.from_http_response(exc.response) return self.job_from_resource(response.json()) - def load_table_from_dataframe(self, dataframe, destination, - num_retries=_DEFAULT_NUM_RETRIES, - job_id=None, job_id_prefix=None, - location=None, project=None, - job_config=None): + def load_table_from_dataframe( + self, + dataframe, + destination, + num_retries=_DEFAULT_NUM_RETRIES, + job_id=None, + job_id_prefix=None, + location=None, + project=None, + job_config=None, + ): """Upload the contents of a table from a pandas DataFrame. Similar to :meth:`load_table_from_uri`, this method creates, starts and @@ -1058,7 +1100,8 @@ def load_table_from_dataframe(self, dataframe, destination, location = self.location return self.load_table_from_file( - buffer, destination, + buffer, + destination, num_retries=num_retries, rewind=True, job_id=job_id, @@ -1086,7 +1129,8 @@ def _do_resumable_upload(self, stream, metadata, num_retries): is uploaded. """ upload, transport = self._initiate_resumable_upload( - stream, metadata, num_retries) + stream, metadata, num_retries + ) while not upload.finished: response = upload.transmit_next_chunk(transport) @@ -1124,11 +1168,12 @@ def _initiate_resumable_upload(self, stream, metadata, num_retries): if num_retries is not None: upload._retry_strategy = resumable_media.RetryStrategy( - max_retries=num_retries) + max_retries=num_retries + ) upload.initiate( - transport, stream, metadata, _GENERIC_CONTENT_TYPE, - stream_final=False) + transport, stream, metadata, _GENERIC_CONTENT_TYPE, stream_final=False + ) return upload, transport @@ -1168,17 +1213,24 @@ def _do_multipart_upload(self, stream, metadata, size, num_retries): if num_retries is not None: upload._retry_strategy = resumable_media.RetryStrategy( - max_retries=num_retries) + max_retries=num_retries + ) - response = upload.transmit( - self._http, data, metadata, _GENERIC_CONTENT_TYPE) + response = upload.transmit(self._http, data, metadata, _GENERIC_CONTENT_TYPE) return response def copy_table( - self, sources, destination, job_id=None, job_id_prefix=None, - location=None, project=None, job_config=None, - retry=DEFAULT_RETRY): + self, + sources, + destination, + job_id=None, + job_id_prefix=None, + location=None, + project=None, + job_config=None, + retry=DEFAULT_RETRY, + ): """Copy one or more tables to another table. See @@ -1229,27 +1281,34 @@ def copy_table( job_ref = job._JobReference(job_id, project=project, location=location) if isinstance(sources, str): - sources = TableReference.from_string( - sources, default_project=self.project) + sources = TableReference.from_string(sources, default_project=self.project) if isinstance(destination, str): destination = TableReference.from_string( - destination, default_project=self.project) + destination, default_project=self.project + ) if not isinstance(sources, collections_abc.Sequence): sources = [sources] copy_job = job.CopyJob( - job_ref, sources, destination, client=self, - job_config=job_config) + job_ref, sources, destination, client=self, job_config=job_config + ) copy_job._begin(retry=retry) return copy_job def extract_table( - self, source, destination_uris, job_id=None, job_id_prefix=None, - location=None, project=None, job_config=None, - retry=DEFAULT_RETRY): + self, + source, + destination_uris, + job_id=None, + job_id_prefix=None, + location=None, + project=None, + job_config=None, + retry=DEFAULT_RETRY, + ): """Start a job to extract a table into Cloud Storage files. See @@ -1300,24 +1359,28 @@ def extract_table( job_ref = job._JobReference(job_id, project=project, location=location) if isinstance(source, str): - source = TableReference.from_string( - source, default_project=self.project) + source = TableReference.from_string(source, default_project=self.project) if isinstance(destination_uris, six.string_types): destination_uris = [destination_uris] extract_job = job.ExtractJob( - job_ref, source, destination_uris, client=self, - job_config=job_config) + job_ref, source, destination_uris, client=self, job_config=job_config + ) extract_job._begin(retry=retry) return extract_job def query( - self, query, - job_config=None, - job_id=None, job_id_prefix=None, - location=None, project=None, retry=DEFAULT_RETRY): + self, + query, + job_config=None, + job_id=None, + job_id_prefix=None, + location=None, + project=None, + retry=DEFAULT_RETRY, + ): """Run a SQL query. See @@ -1366,13 +1429,13 @@ def query( # should be filled in with the default # the incoming therefore has precedence job_config = job_config._fill_from_default( - self._default_query_job_config) + self._default_query_job_config + ) else: job_config = self._default_query_job_config job_ref = job._JobReference(job_id, project=project, location=location) - query_job = job.QueryJob( - job_ref, query, client=self, job_config=job_config) + query_job = job.QueryJob(job_ref, query, client=self, job_config=job_config) query_job._begin(retry=retry) return query_job @@ -1419,19 +1482,18 @@ def insert_rows(self, table, rows, selected_fields=None, **kwargs): ValueError: if table's schema is not set """ if isinstance(table, str): - table = TableReference.from_string( - table, default_project=self.project) + table = TableReference.from_string(table, default_project=self.project) if selected_fields is not None: schema = selected_fields elif isinstance(table, TableReference): - raise ValueError('need selected_fields with TableReference') + raise ValueError("need selected_fields with TableReference") elif isinstance(table, Table): if len(table.schema) == 0: raise ValueError(_TABLE_HAS_NO_SCHEMA) schema = table.schema else: - raise TypeError('table should be Table or TableReference') + raise TypeError("table should be Table or TableReference") json_rows = [] @@ -1450,9 +1512,16 @@ def insert_rows(self, table, rows, selected_fields=None, **kwargs): return self.insert_rows_json(table, json_rows, **kwargs) - def insert_rows_json(self, table, json_rows, row_ids=None, - skip_invalid_rows=None, ignore_unknown_values=None, - template_suffix=None, retry=DEFAULT_RETRY): + def insert_rows_json( + self, + table, + json_rows, + row_ids=None, + skip_invalid_rows=None, + ignore_unknown_values=None, + template_suffix=None, + retry=DEFAULT_RETRY, + ): """Insert rows into a table without applying local type conversions. See @@ -1493,40 +1562,36 @@ def insert_rows_json(self, table, json_rows, row_ids=None, the mappings describing one or more problems with the row. """ if isinstance(table, str): - table = TableReference.from_string( - table, default_project=self.project) + table = TableReference.from_string(table, default_project=self.project) rows_info = [] - data = {'rows': rows_info} + data = {"rows": rows_info} for index, row in enumerate(json_rows): - info = {'json': row} + info = {"json": row} if row_ids is not None: - info['insertId'] = row_ids[index] + info["insertId"] = row_ids[index] else: - info['insertId'] = str(uuid.uuid4()) + info["insertId"] = str(uuid.uuid4()) rows_info.append(info) if skip_invalid_rows is not None: - data['skipInvalidRows'] = skip_invalid_rows + data["skipInvalidRows"] = skip_invalid_rows if ignore_unknown_values is not None: - data['ignoreUnknownValues'] = ignore_unknown_values + data["ignoreUnknownValues"] = ignore_unknown_values if template_suffix is not None: - data['templateSuffix'] = template_suffix + data["templateSuffix"] = template_suffix # We can always retry, because every row has an insert ID. response = self._call_api( - retry, - method='POST', - path='%s/insertAll' % table.path, - data=data) + retry, method="POST", path="%s/insertAll" % table.path, data=data + ) errors = [] - for error in response.get('insertErrors', ()): - errors.append({'index': int(error['index']), - 'errors': error['errors']}) + for error in response.get("insertErrors", ()): + errors.append({"index": int(error["index"]), "errors": error["errors"]}) return errors @@ -1548,23 +1613,31 @@ def list_partitions(self, table, retry=DEFAULT_RETRY): A list of the partition ids present in the partitioned table """ if isinstance(table, str): - table = TableReference.from_string( - table, default_project=self.project) + table = TableReference.from_string(table, default_project=self.project) meta_table = self.get_table( TableReference( self.dataset(table.dataset_id, project=table.project), - '%s$__PARTITIONS_SUMMARY__' % table.table_id)) - - subset = [col for col in - meta_table.schema if col.name == 'partition_id'] - return [row[0] for row in self.list_rows(meta_table, - selected_fields=subset, - retry=retry)] + "%s$__PARTITIONS_SUMMARY__" % table.table_id, + ) + ) - def list_rows(self, table, selected_fields=None, max_results=None, - page_token=None, start_index=None, page_size=None, - retry=DEFAULT_RETRY): + subset = [col for col in meta_table.schema if col.name == "partition_id"] + return [ + row[0] + for row in self.list_rows(meta_table, selected_fields=subset, retry=retry) + ] + + def list_rows( + self, + table, + selected_fields=None, + max_results=None, + page_token=None, + start_index=None, + page_size=None, + retry=DEFAULT_RETRY, + ): """List the rows of the table. See @@ -1616,36 +1689,35 @@ def list_rows(self, table, selected_fields=None, max_results=None, current page: ``iterator.page.num_items``). """ if isinstance(table, str): - table = TableReference.from_string( - table, default_project=self.project) + table = TableReference.from_string(table, default_project=self.project) if selected_fields is not None: schema = selected_fields elif isinstance(table, TableReference): - raise ValueError('need selected_fields with TableReference') + raise ValueError("need selected_fields with TableReference") elif isinstance(table, Table): if len(table.schema) == 0 and table.created is None: raise ValueError(_TABLE_HAS_NO_SCHEMA) schema = table.schema else: - raise TypeError('table should be Table or TableReference') + raise TypeError("table should be Table or TableReference") params = {} if selected_fields is not None: - params['selectedFields'] = ','.join( - field.name for field in selected_fields) + params["selectedFields"] = ",".join(field.name for field in selected_fields) if start_index is not None: - params['startIndex'] = start_index + params["startIndex"] = start_index row_iterator = RowIterator( client=self, api_request=functools.partial(self._call_api, retry), - path='%s/data' % (table.path,), + path="%s/data" % (table.path,), schema=schema, page_token=page_token, max_results=max_results, page_size=page_size, - extra_params=params) + extra_params=params, + ) return row_iterator @@ -1663,6 +1735,8 @@ def _item_to_project(iterator, resource): :returns: The next project in the page. """ return Project.from_api_repr(resource) + + # pylint: enable=unused-argument @@ -1740,18 +1814,20 @@ def _check_mode(stream): :raises: :exc:`ValueError` if the ``stream.mode`` is a valid attribute and is not among ``rb``, ``r+b`` or ``rb+``. """ - mode = getattr(stream, 'mode', None) + mode = getattr(stream, "mode", None) if isinstance(stream, gzip.GzipFile): if mode != gzip.READ: raise ValueError( "Cannot upload gzip files opened in write mode: use " - "gzip.GzipFile(filename, mode='rb')") + "gzip.GzipFile(filename, mode='rb')" + ) else: - if mode is not None and mode not in ('rb', 'r+b', 'rb+'): + if mode is not None and mode not in ("rb", "r+b", "rb+"): raise ValueError( "Cannot upload files opened in text mode: use " - "open(filename, mode='rb') or open(filename, mode='r+b')") + "open(filename, mode='rb') or open(filename, mode='r+b')" + ) def _get_upload_headers(user_agent): @@ -1764,8 +1840,8 @@ def _get_upload_headers(user_agent): :returns: The headers to be used for the request. """ return { - 'Accept': 'application/json', - 'Accept-Encoding': 'gzip, deflate', - 'User-Agent': user_agent, - 'content-type': 'application/json', + "Accept": "application/json", + "Accept-Encoding": "gzip, deflate", + "User-Agent": user_agent, + "content-type": "application/json", } diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/dataset.py b/packages/google-cloud-bigquery/google/cloud/bigquery/dataset.py index 82d9b432b6d2..c4e8e839497c 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/dataset.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/dataset.py @@ -72,23 +72,28 @@ class AccessEntry(object): >>> entry = AccessEntry(None, 'view', view) """ - ENTITY_TYPES = frozenset(['userByEmail', 'groupByEmail', 'domain', - 'specialGroup', 'view']) + ENTITY_TYPES = frozenset( + ["userByEmail", "groupByEmail", "domain", "specialGroup", "view"] + ) """Allowed entity types.""" def __init__(self, role, entity_type, entity_id): if entity_type not in self.ENTITY_TYPES: - message = 'Entity type %r not among: %s' % ( - entity_type, ', '.join(self.ENTITY_TYPES)) + message = "Entity type %r not among: %s" % ( + entity_type, + ", ".join(self.ENTITY_TYPES), + ) raise ValueError(message) - if entity_type == 'view': + if entity_type == "view": if role is not None: - raise ValueError('Role must be None for a view. Received ' - 'role: %r' % (role,)) + raise ValueError( + "Role must be None for a view. Received " "role: %r" % (role,) + ) else: if role is None: - raise ValueError('Role must be set for entity ' - 'type %r' % (entity_type,)) + raise ValueError( + "Role must be set for entity " "type %r" % (entity_type,) + ) self.role = role self.entity_type = entity_type @@ -100,14 +105,18 @@ def __eq__(self, other): return ( self.role == other.role and self.entity_type == other.entity_type - and self.entity_id == other.entity_id) + and self.entity_id == other.entity_id + ) def __ne__(self, other): return not self == other def __repr__(self): - return '' % ( - self.role, self.entity_type, self.entity_id) + return "" % ( + self.role, + self.entity_type, + self.entity_id, + ) def to_api_repr(self): """Construct the API resource representation of this access entry @@ -117,7 +126,7 @@ def to_api_repr(self): """ resource = {self.entity_type: self.entity_id} if self.role is not None: - resource['role'] = self.role + resource["role"] = self.role return resource @classmethod @@ -138,10 +147,10 @@ def from_api_repr(cls, resource): key. """ entry = resource.copy() - role = entry.pop('role', None) + role = entry.pop("role", None) entity_type, entity_id = entry.popitem() if len(entry) != 0: - raise ValueError('Entry has unexpected keys remaining.', entry) + raise ValueError("Entry has unexpected keys remaining.", entry) return cls(role, entity_type, entity_id) @@ -180,7 +189,7 @@ def dataset_id(self): @property def path(self): """str: URL path for the dataset based on project and dataset ID.""" - return '/projects/%s/datasets/%s' % (self.project, self.dataset_id) + return "/projects/%s/datasets/%s" % (self.project, self.dataset_id) def table(self, table_id): """Constructs a TableReference. @@ -206,8 +215,8 @@ def from_api_repr(cls, resource): google.cloud.bigquery.dataset.DatasetReference: Dataset reference parsed from ``resource``. """ - project = resource['projectId'] - dataset_id = resource['datasetId'] + project = resource["projectId"] + dataset_id = resource["datasetId"] return cls(project, dataset_id) @classmethod @@ -238,20 +247,22 @@ def from_string(cls, dataset_id, default_project=None): """ output_dataset_id = dataset_id output_project_id = default_project - parts = dataset_id.split('.') + parts = dataset_id.split(".") if len(parts) == 1 and not default_project: raise ValueError( - 'When default_project is not set, dataset_id must be a ' - 'fully-qualified dataset ID in standard SQL format. ' - 'e.g. "project.dataset_id", got {}'.format(dataset_id)) + "When default_project is not set, dataset_id must be a " + "fully-qualified dataset ID in standard SQL format. " + 'e.g. "project.dataset_id", got {}'.format(dataset_id) + ) elif len(parts) == 2: output_project_id, output_dataset_id = parts elif len(parts) > 2: raise ValueError( - 'Too many parts in dataset_id. Expected a fully-qualified ' - 'dataset ID in standard SQL format. e.g. ' - '"project.dataset_id", got {}'.format(dataset_id)) + "Too many parts in dataset_id. Expected a fully-qualified " + "dataset ID in standard SQL format. e.g. " + '"project.dataset_id", got {}'.format(dataset_id) + ) return cls(output_project_id, output_dataset_id) @@ -261,10 +272,7 @@ def to_api_repr(self): Returns: Dict[str, str]: dataset reference represented as an API resource """ - return { - 'projectId': self._project, - 'datasetId': self._dataset_id, - } + return {"projectId": self._project, "datasetId": self._dataset_id} def _key(self): """A tuple key that uniquely describes this field. @@ -274,10 +282,7 @@ def _key(self): Returns: Tuple[str]: The contents of this :class:`.DatasetReference`. """ - return ( - self._project, - self._dataset_id, - ) + return (self._project, self._dataset_id) def __eq__(self, other): if not isinstance(other, DatasetReference): @@ -291,7 +296,7 @@ def __hash__(self): return hash(self._key()) def __repr__(self): - return 'DatasetReference{}'.format(self._key()) + return "DatasetReference{}".format(self._key()) class Dataset(object): @@ -306,27 +311,24 @@ class Dataset(object): """ _PROPERTY_TO_API_FIELD = { - 'access_entries': 'access', - 'created': 'creationTime', - 'default_table_expiration_ms': 'defaultTableExpirationMs', - 'friendly_name': 'friendlyName', + "access_entries": "access", + "created": "creationTime", + "default_table_expiration_ms": "defaultTableExpirationMs", + "friendly_name": "friendlyName", } def __init__(self, dataset_ref): - self._properties = { - 'datasetReference': dataset_ref.to_api_repr(), - 'labels': {}, - } + self._properties = {"datasetReference": dataset_ref.to_api_repr(), "labels": {}} @property def project(self): """str: Project ID of the project bound to the dataset.""" - return self._properties['datasetReference']['projectId'] + return self._properties["datasetReference"]["projectId"] @property def path(self): """str: URL path for the dataset based on project and dataset ID.""" - return '/projects/%s/datasets/%s' % (self.project, self.dataset_id) + return "/projects/%s/datasets/%s" % (self.project, self.dataset_id) @property def access_entries(self): @@ -342,31 +344,32 @@ def access_entries(self): If any item in the sequence is not an :class:`~google.cloud.bigquery.dataset.AccessEntry`. """ - entries = self._properties.get('access', []) + entries = self._properties.get("access", []) return [AccessEntry.from_api_repr(entry) for entry in entries] @access_entries.setter def access_entries(self, value): if not all(isinstance(field, AccessEntry) for field in value): - raise ValueError('Values must be AccessEntry instances') + raise ValueError("Values must be AccessEntry instances") entries = [entry.to_api_repr() for entry in value] - self._properties['access'] = entries + self._properties["access"] = entries @property def created(self): """Union[datetime.datetime, None]: Datetime at which the dataset was created (:data:`None` until set from the server). """ - creation_time = self._properties.get('creationTime') + creation_time = self._properties.get("creationTime") if creation_time is not None: # creation_time will be in milliseconds. return google.cloud._helpers._datetime_from_microseconds( - 1000.0 * float(creation_time)) + 1000.0 * float(creation_time) + ) @property def dataset_id(self): """str: Dataset ID.""" - return self._properties['datasetReference']['datasetId'] + return self._properties["datasetReference"]["datasetId"] @property def full_dataset_id(self): @@ -375,7 +378,7 @@ def full_dataset_id(self): In the format ``project_id:dataset_id``. """ - return self._properties.get('id') + return self._properties.get("id") @property def reference(self): @@ -389,25 +392,26 @@ def etag(self): """Union[str, None]: ETag for the dataset resource (:data:`None` until set from the server). """ - return self._properties.get('etag') + return self._properties.get("etag") @property def modified(self): """Union[datetime.datetime, None]: Datetime at which the dataset was last modified (:data:`None` until set from the server). """ - modified_time = self._properties.get('lastModifiedTime') + modified_time = self._properties.get("lastModifiedTime") if modified_time is not None: # modified_time will be in milliseconds. return google.cloud._helpers._datetime_from_microseconds( - 1000.0 * float(modified_time)) + 1000.0 * float(modified_time) + ) @property def self_link(self): """Union[str, None]: URL for the dataset resource (:data:`None` until set from the server). """ - return self._properties.get('selfLink') + return self._properties.get("selfLink") @property def default_table_expiration_ms(self): @@ -417,15 +421,13 @@ def default_table_expiration_ms(self): Raises: ValueError: For invalid value types. """ - return _helpers._int_or_none( - self._properties.get('defaultTableExpirationMs')) + return _helpers._int_or_none(self._properties.get("defaultTableExpirationMs")) @default_table_expiration_ms.setter def default_table_expiration_ms(self, value): if not isinstance(value, six.integer_types) and value is not None: raise ValueError("Pass an integer, or None") - self._properties['defaultTableExpirationMs'] = _helpers._str_or_none( - value) + self._properties["defaultTableExpirationMs"] = _helpers._str_or_none(value) @property def description(self): @@ -435,13 +437,13 @@ def description(self): Raises: ValueError: for invalid value types. """ - return self._properties.get('description') + return self._properties.get("description") @description.setter def description(self, value): if not isinstance(value, six.string_types) and value is not None: raise ValueError("Pass a string, or None") - self._properties['description'] = value + self._properties["description"] = value @property def friendly_name(self): @@ -451,13 +453,13 @@ def friendly_name(self): Raises: ValueError: for invalid value types. """ - return self._properties.get('friendlyName') + return self._properties.get("friendlyName") @friendly_name.setter def friendly_name(self, value): if not isinstance(value, six.string_types) and value is not None: raise ValueError("Pass a string, or None") - self._properties['friendlyName'] = value + self._properties["friendlyName"] = value @property def location(self): @@ -467,13 +469,13 @@ def location(self): Raises: ValueError: for invalid value types. """ - return self._properties.get('location') + return self._properties.get("location") @location.setter def location(self, value): if not isinstance(value, six.string_types) and value is not None: raise ValueError("Pass a string, or None") - self._properties['location'] = value + self._properties["location"] = value @property def labels(self): @@ -487,13 +489,13 @@ def labels(self): Raises: ValueError: for invalid value types. """ - return self._properties.setdefault('labels', {}) + return self._properties.setdefault("labels", {}) @labels.setter def labels(self, value): if not isinstance(value, dict): raise ValueError("Pass a dict") - self._properties['labels'] = value + self._properties["labels"] = value @classmethod def from_string(cls, full_dataset_id): @@ -531,12 +533,16 @@ def from_api_repr(cls, resource): google.cloud.bigquery.dataset.Dataset: Dataset parsed from ``resource``. """ - if ('datasetReference' not in resource - or 'datasetId' not in resource['datasetReference']): - raise KeyError('Resource lacks required identity information:' - '["datasetReference"]["datasetId"]') - project_id = resource['datasetReference']['projectId'] - dataset_id = resource['datasetReference']['datasetId'] + if ( + "datasetReference" not in resource + or "datasetId" not in resource["datasetReference"] + ): + raise KeyError( + "Resource lacks required identity information:" + '["datasetReference"]["datasetId"]' + ) + project_id = resource["datasetReference"]["projectId"] + dataset_id = resource["datasetReference"]["datasetId"] dataset = cls(DatasetReference(project_id, dataset_id)) dataset._properties = copy.deepcopy(resource) return dataset @@ -555,7 +561,7 @@ def _build_resource(self, filter_fields): for filter_field in filter_fields: api_field = self._PROPERTY_TO_API_FIELD.get(filter_field) if api_field is None and filter_field not in self._properties: - raise ValueError('No Dataset property %s' % filter_field) + raise ValueError("No Dataset property %s" % filter_field) elif api_field is not None: partial[api_field] = self._properties.get(api_field) else: @@ -578,7 +584,7 @@ def table(self, table_id): return TableReference(self.reference, table_id) def __repr__(self): - return 'Dataset({})'.format(repr(self.reference)) + return "Dataset({})".format(repr(self.reference)) class DatasetListItem(object): @@ -605,25 +611,27 @@ class DatasetListItem(object): """ def __init__(self, resource): - if 'datasetReference' not in resource: - raise ValueError('resource must contain a datasetReference value') - if 'projectId' not in resource['datasetReference']: + if "datasetReference" not in resource: + raise ValueError("resource must contain a datasetReference value") + if "projectId" not in resource["datasetReference"]: raise ValueError( - "resource['datasetReference'] must contain a projectId value") - if 'datasetId' not in resource['datasetReference']: + "resource['datasetReference'] must contain a projectId value" + ) + if "datasetId" not in resource["datasetReference"]: raise ValueError( - "resource['datasetReference'] must contain a datasetId value") + "resource['datasetReference'] must contain a datasetId value" + ) self._properties = resource @property def project(self): """str: Project bound to the dataset.""" - return self._properties['datasetReference']['projectId'] + return self._properties["datasetReference"]["projectId"] @property def dataset_id(self): """str: Dataset ID.""" - return self._properties['datasetReference']['datasetId'] + return self._properties["datasetReference"]["datasetId"] @property def full_dataset_id(self): @@ -632,19 +640,19 @@ def full_dataset_id(self): In the format ``project_id:dataset_id``. """ - return self._properties.get('id') + return self._properties.get("id") @property def friendly_name(self): """Union[str, None]: Title of the dataset as set by the user (defaults to :data:`None`). """ - return self._properties.get('friendlyName') + return self._properties.get("friendlyName") @property def labels(self): """Dict[str, str]: Labels for the dataset.""" - return self._properties.setdefault('labels', {}) + return self._properties.setdefault("labels", {}) @property def reference(self): diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/dbapi/__init__.py b/packages/google-cloud-bigquery/google/cloud/bigquery/dbapi/__init__.py index de34d5553315..d1a723949b10 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/dbapi/__init__.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/dbapi/__init__.py @@ -48,18 +48,40 @@ from google.cloud.bigquery.dbapi.types import STRING -apilevel = '2.0' +apilevel = "2.0" # Threads may share the module and connections, but not cursors. threadsafety = 2 -paramstyle = 'pyformat' +paramstyle = "pyformat" __all__ = [ - 'apilevel', 'threadsafety', 'paramstyle', 'connect', 'Connection', - 'Cursor', 'Warning', 'Error', 'InterfaceError', 'DatabaseError', - 'DataError', 'OperationalError', 'IntegrityError', 'InternalError', - 'ProgrammingError', 'NotSupportedError', 'Binary', 'Date', 'DateFromTicks', - 'Time', 'TimeFromTicks', 'Timestamp', 'TimestampFromTicks', 'BINARY', - 'DATETIME', 'NUMBER', 'ROWID', 'STRING', + "apilevel", + "threadsafety", + "paramstyle", + "connect", + "Connection", + "Cursor", + "Warning", + "Error", + "InterfaceError", + "DatabaseError", + "DataError", + "OperationalError", + "IntegrityError", + "InternalError", + "ProgrammingError", + "NotSupportedError", + "Binary", + "Date", + "DateFromTicks", + "Time", + "TimeFromTicks", + "Timestamp", + "TimestampFromTicks", + "BINARY", + "DATETIME", + "NUMBER", + "ROWID", + "STRING", ] diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/dbapi/_helpers.py b/packages/google-cloud-bigquery/google/cloud/bigquery/dbapi/_helpers.py index ee9198cbada4..6e7f58bd4944 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/dbapi/_helpers.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/dbapi/_helpers.py @@ -46,27 +46,29 @@ def scalar_to_query_parameter(value, name=None): parameter_type = None if isinstance(value, bool): - parameter_type = 'BOOL' + parameter_type = "BOOL" elif isinstance(value, numbers.Integral): - parameter_type = 'INT64' + parameter_type = "INT64" elif isinstance(value, numbers.Real): - parameter_type = 'FLOAT64' + parameter_type = "FLOAT64" elif isinstance(value, decimal.Decimal): - parameter_type = 'NUMERIC' + parameter_type = "NUMERIC" elif isinstance(value, six.text_type): - parameter_type = 'STRING' + parameter_type = "STRING" elif isinstance(value, six.binary_type): - parameter_type = 'BYTES' + parameter_type = "BYTES" elif isinstance(value, datetime.datetime): - parameter_type = 'DATETIME' if value.tzinfo is None else 'TIMESTAMP' + parameter_type = "DATETIME" if value.tzinfo is None else "TIMESTAMP" elif isinstance(value, datetime.date): - parameter_type = 'DATE' + parameter_type = "DATE" elif isinstance(value, datetime.time): - parameter_type = 'TIME' + parameter_type = "TIME" else: raise exceptions.ProgrammingError( - 'encountered parameter {} with value {} of unexpected type'.format( - name, value)) + "encountered parameter {} with value {} of unexpected type".format( + name, value + ) + ) return bigquery.ScalarQueryParameter(name, parameter_type, value) @@ -93,8 +95,8 @@ def to_query_parameters_dict(parameters): """ return [ scalar_to_query_parameter(value, name=name) - for name, value - in six.iteritems(parameters)] + for name, value in six.iteritems(parameters) + ] def to_query_parameters(parameters): diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/dbapi/connection.py b/packages/google-cloud-bigquery/google/cloud/bigquery/dbapi/connection.py index 5f962df97412..0dbc9143b255 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/dbapi/connection.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/dbapi/connection.py @@ -24,6 +24,7 @@ class Connection(object): :type client: :class:`~google.cloud.bigquery.Client` :param client: A client used to connect to BigQuery. """ + def __init__(self, client): self._client = client diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/dbapi/cursor.py b/packages/google-cloud-bigquery/google/cloud/bigquery/dbapi/cursor.py index e56a343c362d..1fbd9fb10cc4 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/dbapi/cursor.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/dbapi/cursor.py @@ -33,11 +33,17 @@ # five are optional and are set to None if no meaningful values can be # provided. Column = collections.namedtuple( - 'Column', + "Column", [ - 'name', 'type_code', 'display_size', 'internal_size', 'precision', - 'scale', 'null_ok', - ]) + "name", + "type_code", + "display_size", + "internal_size", + "precision", + "scale", + "null_ok", + ], +) class Cursor(object): @@ -46,6 +52,7 @@ class Cursor(object): :type connection: :class:`~google.cloud.bigquery.dbapi.Connection` :param connection: A DB-API connection to Google BigQuery. """ + def __init__(self, connection): self.connection = connection self.description = None @@ -72,16 +79,20 @@ def _set_description(self, schema): self.description = None return - self.description = tuple([ - Column( - name=field.name, - type_code=field.field_type, - display_size=None, - internal_size=None, - precision=None, - scale=None, - null_ok=field.is_nullable) - for field in schema]) + self.description = tuple( + [ + Column( + name=field.name, + type_code=field.field_type, + display_size=None, + internal_size=None, + precision=None, + scale=None, + null_ok=field.is_nullable, + ) + for field in schema + ] + ) def _set_rowcount(self, query_results): """Set the rowcount from query results. @@ -97,8 +108,7 @@ def _set_rowcount(self, query_results): total_rows = 0 num_dml_affected_rows = query_results.num_dml_affected_rows - if (query_results.total_rows is not None - and query_results.total_rows > 0): + if query_results.total_rows is not None and query_results.total_rows > 0: total_rows = query_results.total_rows if num_dml_affected_rows is not None and num_dml_affected_rows > 0: total_rows = num_dml_affected_rows @@ -145,15 +155,15 @@ def execute(self, operation, parameters=None, job_id=None): # query parameters was not one of the standard options. Convert both # the query and the parameters to the format expected by the client # libraries. - formatted_operation = _format_operation( - operation, parameters=parameters) + formatted_operation = _format_operation(operation, parameters=parameters) query_parameters = _helpers.to_query_parameters(parameters) config = job.QueryJobConfig() config.query_parameters = query_parameters config.use_legacy_sql = False self._query_job = client.query( - formatted_operation, job_config=config, job_id=job_id) + formatted_operation, job_config=config, job_id=job_id + ) # Wait for the query to finish. try: @@ -184,11 +194,13 @@ def _try_fetch(self, size=None): """ if self._query_job is None: raise exceptions.InterfaceError( - 'No query results: execute() must be called before fetch.') + "No query results: execute() must be called before fetch." + ) is_dml = ( self._query_job.statement_type - and self._query_job.statement_type.upper() != 'SELECT') + and self._query_job.statement_type.upper() != "SELECT" + ) if is_dml: self._query_data = iter([]) return @@ -198,7 +210,7 @@ def _try_fetch(self, size=None): rows_iter = client.list_rows( self._query_job.destination, selected_fields=self._query_job._query_results.schema, - page_size=self.arraysize + page_size=self.arraysize, ) self._query_data = iter(rows_iter) @@ -285,7 +297,7 @@ def _format_operation_list(operation, parameters): if a parameter used in the operation is not found in the ``parameters`` argument. """ - formatted_params = ['?' for _ in parameters] + formatted_params = ["?" for _ in parameters] try: return operation % tuple(formatted_params) @@ -313,8 +325,8 @@ def _format_operation_dict(operation, parameters): """ formatted_params = {} for name in parameters: - escaped_name = name.replace('`', r'\`') - formatted_params[name] = '@`{}`'.format(escaped_name) + escaped_name = name.replace("`", r"\`") + formatted_params[name] = "@`{}`".format(escaped_name) try: return operation % formatted_params diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/dbapi/types.py b/packages/google-cloud-bigquery/google/cloud/bigquery/dbapi/types.py index feb3e320bcca..3c8c454a011a 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/dbapi/types.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/dbapi/types.py @@ -39,7 +39,7 @@ def Binary(string): :rtype: bytes :returns: The UTF-8 encoded bytes representing the string. """ - return string.encode('utf-8') + return string.encode("utf-8") def TimeFromTicks(ticks, tz=None): @@ -76,9 +76,10 @@ def __eq__(self, other): return other in self.values -STRING = 'STRING' -BINARY = _DBAPITypeObject('BYTES', 'RECORD', 'STRUCT') +STRING = "STRING" +BINARY = _DBAPITypeObject("BYTES", "RECORD", "STRUCT") NUMBER = _DBAPITypeObject( - 'INTEGER', 'INT64', 'FLOAT', 'FLOAT64', 'NUMERIC', 'BOOLEAN', 'BOOL') -DATETIME = _DBAPITypeObject('TIMESTAMP', 'DATE', 'TIME', 'DATETIME') -ROWID = 'ROWID' + "INTEGER", "INT64", "FLOAT", "FLOAT64", "NUMERIC", "BOOLEAN", "BOOL" +) +DATETIME = _DBAPITypeObject("TIMESTAMP", "DATE", "TIME", "DATETIME") +ROWID = "ROWID" diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/external_config.py b/packages/google-cloud-bigquery/google/cloud/bigquery/external_config.py index 1c7b055e2953..048c2178a654 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/external_config.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/external_config.py @@ -37,22 +37,22 @@ class ExternalSourceFormat(object): :class:`~google.cloud.bigquery.job.SourceFormat`). """ - CSV = 'CSV' + CSV = "CSV" """Specifies CSV format.""" - GOOGLE_SHEETS = 'GOOGLE_SHEETS' + GOOGLE_SHEETS = "GOOGLE_SHEETS" """Specifies Google Sheets format.""" - NEWLINE_DELIMITED_JSON = 'NEWLINE_DELIMITED_JSON' + NEWLINE_DELIMITED_JSON = "NEWLINE_DELIMITED_JSON" """Specifies newline delimited JSON format.""" - AVRO = 'AVRO' + AVRO = "AVRO" """Specifies Avro format.""" - DATASTORE_BACKUP = 'DATASTORE_BACKUP' + DATASTORE_BACKUP = "DATASTORE_BACKUP" """Specifies datastore backup format""" - BIGTABLE = 'BIGTABLE' + BIGTABLE = "BIGTABLE" """Specifies Bigtable format.""" @@ -70,11 +70,11 @@ def encoding(self): https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query.tableDefinitions.%28key%29.bigtableOptions.columnFamilies.columns.encoding https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#externalDataConfiguration.bigtableOptions.columnFamilies.columns.encoding """ - return self._properties.get('encoding') + return self._properties.get("encoding") @encoding.setter def encoding(self, value): - self._properties['encoding'] = value + self._properties["encoding"] = value @property def field_name(self): @@ -85,11 +85,11 @@ def field_name(self): https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query.tableDefinitions.%28key%29.bigtableOptions.columnFamilies.columns.fieldName https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#externalDataConfiguration.bigtableOptions.columnFamilies.columns.fieldName """ - return self._properties.get('fieldName') + return self._properties.get("fieldName") @field_name.setter def field_name(self, value): - self._properties['fieldName'] = value + self._properties["fieldName"] = value @property def only_read_latest(self): @@ -100,11 +100,11 @@ def only_read_latest(self): https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query.tableDefinitions.%28key%29.bigtableOptions.columnFamilies.columns.onlyReadLatest https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#externalDataConfiguration.bigtableOptions.columnFamilies.columns.onlyReadLatest """ - return self._properties.get('onlyReadLatest') + return self._properties.get("onlyReadLatest") @only_read_latest.setter def only_read_latest(self, value): - self._properties['onlyReadLatest'] = value + self._properties["onlyReadLatest"] = value @property def qualifier_encoded(self): @@ -117,14 +117,14 @@ def qualifier_encoded(self): https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query.tableDefinitions.%28key%29.bigtableOptions.columnFamilies.columns.qualifierEncoded https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#externalDataConfiguration.bigtableOptions.columnFamilies.columns.qualifierEncoded """ - prop = self._properties.get('qualifierEncoded') + prop = self._properties.get("qualifierEncoded") if prop is None: return None return base64.standard_b64decode(_to_bytes(prop)) @qualifier_encoded.setter def qualifier_encoded(self, value): - self._properties['qualifierEncoded'] = _bytes_to_json(value) + self._properties["qualifierEncoded"] = _bytes_to_json(value) @property def qualifier_string(self): @@ -134,11 +134,11 @@ def qualifier_string(self): https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query.tableDefinitions.%28key%29.bigtableOptions.columnFamilies.columns.qualifierEncoded https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#externalDataConfiguration.bigtableOptions.columnFamilies.columns.qualifierEncoded """ - return self._properties.get('qualifierString') + return self._properties.get("qualifierString") @qualifier_string.setter def qualifier_string(self, value): - self._properties['qualifierString'] = value + self._properties["qualifierString"] = value @property def type_(self): @@ -148,11 +148,11 @@ def type_(self): https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query.tableDefinitions.%28key%29.bigtableOptions.columnFamilies.columns.type https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#externalDataConfiguration.bigtableOptions.columnFamilies.columns.type """ - return self._properties.get('type') + return self._properties.get("type") @type_.setter def type_(self, value): - self._properties['type'] = value + self._properties["type"] = value def to_api_repr(self): """Build an API representation of this object. @@ -197,11 +197,11 @@ def encoding(self): https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query.tableDefinitions.(key).bigtableOptions.columnFamilies.encoding https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#externalDataConfiguration.bigtableOptions.columnFamilies.encoding """ - return self._properties.get('encoding') + return self._properties.get("encoding") @encoding.setter def encoding(self, value): - self._properties['encoding'] = value + self._properties["encoding"] = value @property def family_id(self): @@ -211,11 +211,11 @@ def family_id(self): https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query.tableDefinitions.(key).bigtableOptions.columnFamilies.familyId https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#externalDataConfiguration.bigtableOptions.columnFamilies.familyId """ - return self._properties.get('familyId') + return self._properties.get("familyId") @family_id.setter def family_id(self, value): - self._properties['familyId'] = value + self._properties["familyId"] = value @property def only_read_latest(self): @@ -226,11 +226,11 @@ def only_read_latest(self): https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query.tableDefinitions.(key).bigtableOptions.columnFamilies.onlyReadLatest https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#externalDataConfiguration.bigtableOptions.columnFamilies.onlyReadLatest """ - return self._properties.get('onlyReadLatest') + return self._properties.get("onlyReadLatest") @only_read_latest.setter def only_read_latest(self, value): - self._properties['onlyReadLatest'] = value + self._properties["onlyReadLatest"] = value @property def type_(self): @@ -240,11 +240,11 @@ def type_(self): https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query.tableDefinitions.(key).bigtableOptions.columnFamilies.type https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#externalDataConfiguration.bigtableOptions.columnFamilies.type """ - return self._properties.get('type') + return self._properties.get("type") @type_.setter def type_(self, value): - self._properties['type'] = value + self._properties["type"] = value @property def columns(self): @@ -255,12 +255,12 @@ def columns(self): https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query.tableDefinitions.(key).bigtableOptions.columnFamilies.columns https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#externalDataConfiguration.bigtableOptions.columnFamilies.columns """ - prop = self._properties.get('columns', []) + prop = self._properties.get("columns", []) return [BigtableColumn.from_api_repr(col) for col in prop] @columns.setter def columns(self, value): - self._properties['columns'] = [col.to_api_repr() for col in value] + self._properties["columns"] = [col.to_api_repr() for col in value] def to_api_repr(self): """Build an API representation of this object. @@ -295,8 +295,8 @@ class BigtableOptions(object): """Options that describe how to treat Bigtable tables as BigQuery tables. """ - _SOURCE_FORMAT = 'BIGTABLE' - _RESOURCE_NAME = 'bigtableOptions' + _SOURCE_FORMAT = "BIGTABLE" + _RESOURCE_NAME = "bigtableOptions" def __init__(self): self._properties = {} @@ -310,11 +310,11 @@ def ignore_unspecified_column_families(self): https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query.tableDefinitions.(key).bigtableOptions.ignoreUnspecifiedColumnFamilies https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#externalDataConfiguration.bigtableOptions.ignoreUnspecifiedColumnFamilies """ - return self._properties.get('ignoreUnspecifiedColumnFamilies') + return self._properties.get("ignoreUnspecifiedColumnFamilies") @ignore_unspecified_column_families.setter def ignore_unspecified_column_families(self, value): - self._properties['ignoreUnspecifiedColumnFamilies'] = value + self._properties["ignoreUnspecifiedColumnFamilies"] = value @property def read_rowkey_as_string(self): @@ -325,11 +325,11 @@ def read_rowkey_as_string(self): https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query.tableDefinitions.(key).bigtableOptions.readRowkeyAsString https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#externalDataConfiguration.bigtableOptions.readRowkeyAsString """ - return self._properties.get('readRowkeyAsString') + return self._properties.get("readRowkeyAsString") @read_rowkey_as_string.setter def read_rowkey_as_string(self, value): - self._properties['readRowkeyAsString'] = value + self._properties["readRowkeyAsString"] = value @property def column_families(self): @@ -340,12 +340,12 @@ def column_families(self): https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query.tableDefinitions.(key).bigtableOptions.columnFamilies https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#externalDataConfiguration.bigtableOptions.columnFamilies """ - prop = self._properties.get('columnFamilies', []) + prop = self._properties.get("columnFamilies", []) return [BigtableColumnFamily.from_api_repr(cf) for cf in prop] @column_families.setter def column_families(self, value): - self._properties['columnFamilies'] = [cf.to_api_repr() for cf in value] + self._properties["columnFamilies"] = [cf.to_api_repr() for cf in value] def to_api_repr(self): """Build an API representation of this object. @@ -379,8 +379,8 @@ def from_api_repr(cls, resource): class CSVOptions(object): """Options that describe how to treat CSV files as BigQuery tables.""" - _SOURCE_FORMAT = 'CSV' - _RESOURCE_NAME = 'csvOptions' + _SOURCE_FORMAT = "CSV" + _RESOURCE_NAME = "csvOptions" def __init__(self): self._properties = {} @@ -394,11 +394,11 @@ def allow_jagged_rows(self): https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query.tableDefinitions.(key).csvOptions.allowJaggedRows https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#externalDataConfiguration.csvOptions.allowJaggedRows """ - return self._properties.get('allowJaggedRows') + return self._properties.get("allowJaggedRows") @allow_jagged_rows.setter def allow_jagged_rows(self, value): - self._properties['allowJaggedRows'] = value + self._properties["allowJaggedRows"] = value @property def allow_quoted_newlines(self): @@ -409,11 +409,11 @@ def allow_quoted_newlines(self): https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query.tableDefinitions.(key).csvOptions.allowQuotedNewlines https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#externalDataConfiguration.csvOptions.allowQuotedNewlines """ - return self._properties.get('allowQuotedNewlines') + return self._properties.get("allowQuotedNewlines") @allow_quoted_newlines.setter def allow_quoted_newlines(self, value): - self._properties['allowQuotedNewlines'] = value + self._properties["allowQuotedNewlines"] = value @property def encoding(self): @@ -423,11 +423,11 @@ def encoding(self): https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query.tableDefinitions.(key).csvOptions.encoding https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#externalDataConfiguration.csvOptions.encoding """ - return self._properties.get('encoding') + return self._properties.get("encoding") @encoding.setter def encoding(self, value): - self._properties['encoding'] = value + self._properties["encoding"] = value @property def field_delimiter(self): @@ -437,11 +437,11 @@ def field_delimiter(self): https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query.tableDefinitions.(key).csvOptions.fieldDelimiter https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#externalDataConfiguration.csvOptions.fieldDelimiter """ - return self._properties.get('fieldDelimiter') + return self._properties.get("fieldDelimiter") @field_delimiter.setter def field_delimiter(self, value): - self._properties['fieldDelimiter'] = value + self._properties["fieldDelimiter"] = value @property def quote_character(self): @@ -451,11 +451,11 @@ def quote_character(self): https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query.tableDefinitions.(key).csvOptions.quote https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#externalDataConfiguration.csvOptions.quote """ - return self._properties.get('quote') + return self._properties.get("quote") @quote_character.setter def quote_character(self, value): - self._properties['quote'] = value + self._properties["quote"] = value @property def skip_leading_rows(self): @@ -465,11 +465,11 @@ def skip_leading_rows(self): https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query.tableDefinitions.(key).csvOptions.skipLeadingRows https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#externalDataConfiguration.csvOptions.skipLeadingRows """ - return _int_or_none(self._properties.get('skipLeadingRows')) + return _int_or_none(self._properties.get("skipLeadingRows")) @skip_leading_rows.setter def skip_leading_rows(self, value): - self._properties['skipLeadingRows'] = str(value) + self._properties["skipLeadingRows"] = str(value) def to_api_repr(self): """Build an API representation of this object. @@ -503,8 +503,8 @@ def from_api_repr(cls, resource): class GoogleSheetsOptions(object): """Options that describe how to treat Google Sheets as BigQuery tables.""" - _SOURCE_FORMAT = 'GOOGLE_SHEETS' - _RESOURCE_NAME = 'googleSheetsOptions' + _SOURCE_FORMAT = "GOOGLE_SHEETS" + _RESOURCE_NAME = "googleSheetsOptions" def __init__(self): self._properties = {} @@ -518,11 +518,11 @@ def skip_leading_rows(self): https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query.tableDefinitions.(key).googleSheetsOptions.skipLeadingRows https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#externalDataConfiguration.googleSheetsOptions.skipLeadingRows """ - return _int_or_none(self._properties.get('skipLeadingRows')) + return _int_or_none(self._properties.get("skipLeadingRows")) @skip_leading_rows.setter def skip_leading_rows(self, value): - self._properties['skipLeadingRows'] = str(value) + self._properties["skipLeadingRows"] = str(value) def to_api_repr(self): """Build an API representation of this object. @@ -565,7 +565,7 @@ class ExternalConfig(object): """ def __init__(self, source_format): - self._properties = {'sourceFormat': source_format} + self._properties = {"sourceFormat": source_format} self._options = None for optcls in _OPTION_CLASSES: if source_format == optcls._SOURCE_FORMAT: @@ -580,7 +580,7 @@ def source_format(self): See https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#externalDataConfiguration.sourceFormat """ - return self._properties['sourceFormat'] + return self._properties["sourceFormat"] @property def options(self): @@ -596,11 +596,11 @@ def autodetect(self): https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query.tableDefinitions.(key).autodetect https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#externalDataConfiguration.autodetect """ - return self._properties.get('autodetect') + return self._properties.get("autodetect") @autodetect.setter def autodetect(self, value): - self._properties['autodetect'] = value + self._properties["autodetect"] = value @property def compression(self): @@ -610,11 +610,11 @@ def compression(self): https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query.tableDefinitions.(key).compression https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#externalDataConfiguration.compression """ - return self._properties.get('compression') + return self._properties.get("compression") @compression.setter def compression(self, value): - self._properties['compression'] = value + self._properties["compression"] = value @property def ignore_unknown_values(self): @@ -625,11 +625,11 @@ def ignore_unknown_values(self): https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query.tableDefinitions.(key).ignoreUnknownValues https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#externalDataConfiguration.ignoreUnknownValues """ - return self._properties.get('ignoreUnknownValues') + return self._properties.get("ignoreUnknownValues") @ignore_unknown_values.setter def ignore_unknown_values(self, value): - self._properties['ignoreUnknownValues'] = value + self._properties["ignoreUnknownValues"] = value @property def max_bad_records(self): @@ -640,11 +640,11 @@ def max_bad_records(self): https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query.tableDefinitions.(key).maxBadRecords https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#externalDataConfiguration.maxBadRecords """ - return self._properties.get('maxBadRecords') + return self._properties.get("maxBadRecords") @max_bad_records.setter def max_bad_records(self, value): - self._properties['maxBadRecords'] = value + self._properties["maxBadRecords"] = value @property def source_uris(self): @@ -654,11 +654,11 @@ def source_uris(self): https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query.tableDefinitions.(key).sourceUris https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#externalDataConfiguration.sourceUris """ - return self._properties.get('sourceUris', []) + return self._properties.get("sourceUris", []) @source_uris.setter def source_uris(self, value): - self._properties['sourceUris'] = value + self._properties["sourceUris"] = value @property def schema(self): @@ -669,16 +669,15 @@ def schema(self): https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query.tableDefinitions.(key).schema https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#externalDataConfiguration.schema """ - prop = self._properties.get('schema', {}) - return [SchemaField.from_api_repr(field) - for field in prop.get('fields', [])] + prop = self._properties.get("schema", {}) + return [SchemaField.from_api_repr(field) for field in prop.get("fields", [])] @schema.setter def schema(self, value): prop = value if value is not None: - prop = {'fields': [field.to_api_repr() for field in value]} - self._properties['schema'] = prop + prop = {"fields": [field.to_api_repr() for field in value]} + self._properties["schema"] = prop def to_api_repr(self): """Build an API representation of this object. @@ -709,7 +708,7 @@ def from_api_repr(cls, resource): :class:`~.external_config.ExternalConfig`: Configuration parsed from ``resource``. """ - config = cls(resource['sourceFormat']) + config = cls(resource["sourceFormat"]) for optcls in _OPTION_CLASSES: opts = resource.get(optcls._RESOURCE_NAME) if opts is not None: diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/job.py b/packages/google-cloud-bigquery/google/cloud/bigquery/job.py index b625dbf51f76..cdb275ed5f83 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/job.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/job.py @@ -38,29 +38,29 @@ from google.cloud.bigquery.table import TimePartitioning from google.cloud.bigquery import _helpers -_DONE_STATE = 'DONE' -_STOPPED_REASON = 'stopped' +_DONE_STATE = "DONE" +_STOPPED_REASON = "stopped" _TIMEOUT_BUFFER_SECS = 0.1 _ERROR_REASON_TO_EXCEPTION = { - 'accessDenied': http_client.FORBIDDEN, - 'backendError': http_client.INTERNAL_SERVER_ERROR, - 'billingNotEnabled': http_client.FORBIDDEN, - 'billingTierLimitExceeded': http_client.BAD_REQUEST, - 'blocked': http_client.FORBIDDEN, - 'duplicate': http_client.CONFLICT, - 'internalError': http_client.INTERNAL_SERVER_ERROR, - 'invalid': http_client.BAD_REQUEST, - 'invalidQuery': http_client.BAD_REQUEST, - 'notFound': http_client.NOT_FOUND, - 'notImplemented': http_client.NOT_IMPLEMENTED, - 'quotaExceeded': http_client.FORBIDDEN, - 'rateLimitExceeded': http_client.FORBIDDEN, - 'resourceInUse': http_client.BAD_REQUEST, - 'resourcesExceeded': http_client.BAD_REQUEST, - 'responseTooLarge': http_client.FORBIDDEN, - 'stopped': http_client.OK, - 'tableUnavailable': http_client.BAD_REQUEST, + "accessDenied": http_client.FORBIDDEN, + "backendError": http_client.INTERNAL_SERVER_ERROR, + "billingNotEnabled": http_client.FORBIDDEN, + "billingTierLimitExceeded": http_client.BAD_REQUEST, + "blocked": http_client.FORBIDDEN, + "duplicate": http_client.CONFLICT, + "internalError": http_client.INTERNAL_SERVER_ERROR, + "invalid": http_client.BAD_REQUEST, + "invalidQuery": http_client.BAD_REQUEST, + "notFound": http_client.NOT_FOUND, + "notImplemented": http_client.NOT_IMPLEMENTED, + "quotaExceeded": http_client.FORBIDDEN, + "rateLimitExceeded": http_client.FORBIDDEN, + "resourceInUse": http_client.BAD_REQUEST, + "resourcesExceeded": http_client.BAD_REQUEST, + "responseTooLarge": http_client.FORBIDDEN, + "stopped": http_client.OK, + "tableUnavailable": http_client.BAD_REQUEST, } @@ -79,12 +79,12 @@ def _error_result_to_exception(error_result): :rtype google.cloud.exceptions.GoogleCloudError: :returns: The mapped exception. """ - reason = error_result.get('reason') + reason = error_result.get("reason") status_code = _ERROR_REASON_TO_EXCEPTION.get( reason, http_client.INTERNAL_SERVER_ERROR ) return exceptions.from_http_status( - status_code, error_result.get('message', ''), errors=[error_result] + status_code, error_result.get("message", ""), errors=[error_result] ) @@ -96,16 +96,16 @@ class Compression(object): only supported for Avro. """ - GZIP = 'GZIP' + GZIP = "GZIP" """Specifies GZIP format.""" - DEFLATE = 'DEFLATE' + DEFLATE = "DEFLATE" """Specifies DEFLATE format.""" - SNAPPY = 'SNAPPY' + SNAPPY = "SNAPPY" """Specifies SNAPPY format.""" - NONE = 'NONE' + NONE = "NONE" """Specifies no compression.""" @@ -117,10 +117,10 @@ class CreateDisposition(object): upon job completion. """ - CREATE_IF_NEEDED = 'CREATE_IF_NEEDED' + CREATE_IF_NEEDED = "CREATE_IF_NEEDED" """If the table does not exist, BigQuery creates the table.""" - CREATE_NEVER = 'CREATE_NEVER' + CREATE_NEVER = "CREATE_NEVER" """The table must already exist. If it does not, a 'notFound' error is returned in the job result.""" @@ -131,13 +131,13 @@ class DestinationFormat(object): Tables with nested or repeated fields cannot be exported as CSV. """ - CSV = 'CSV' + CSV = "CSV" """Specifies CSV format.""" - NEWLINE_DELIMITED_JSON = 'NEWLINE_DELIMITED_JSON' + NEWLINE_DELIMITED_JSON = "NEWLINE_DELIMITED_JSON" """Specifies newline delimited JSON format.""" - AVRO = 'AVRO' + AVRO = "AVRO" """Specifies Avro format.""" @@ -148,10 +148,10 @@ class Encoding(object): split using the values of the quote and fieldDelimiter properties. """ - UTF_8 = 'UTF-8' + UTF_8 = "UTF-8" """Specifies UTF-8 encoding.""" - ISO_8859_1 = 'ISO-8859-1' + ISO_8859_1 = "ISO-8859-1" """Specifies ISO-8859-1 encoding.""" @@ -160,10 +160,10 @@ class QueryPriority(object): :attr:`INTERACTIVE`. """ - INTERACTIVE = 'INTERACTIVE' + INTERACTIVE = "INTERACTIVE" """Specifies interactive priority.""" - BATCH = 'BATCH' + BATCH = "BATCH" """Specifies batch priority.""" @@ -175,22 +175,22 @@ class SourceFormat(object): :class:`~google.cloud.bigquery.external_config.ExternalSourceFormat`). """ - CSV = 'CSV' + CSV = "CSV" """Specifies CSV format.""" - DATASTORE_BACKUP = 'DATASTORE_BACKUP' + DATASTORE_BACKUP = "DATASTORE_BACKUP" """Specifies datastore backup format""" - NEWLINE_DELIMITED_JSON = 'NEWLINE_DELIMITED_JSON' + NEWLINE_DELIMITED_JSON = "NEWLINE_DELIMITED_JSON" """Specifies newline delimited JSON format.""" - AVRO = 'AVRO' + AVRO = "AVRO" """Specifies Avro format.""" - PARQUET = 'PARQUET' + PARQUET = "PARQUET" """Specifies Parquet format.""" - ORC = 'ORC' + ORC = "ORC" """Specifies Orc format.""" @@ -204,13 +204,13 @@ class WriteDisposition(object): atomic update upon job completion. """ - WRITE_APPEND = 'WRITE_APPEND' + WRITE_APPEND = "WRITE_APPEND" """If the table already exists, BigQuery appends the data to the table.""" - WRITE_TRUNCATE = 'WRITE_TRUNCATE' + WRITE_TRUNCATE = "WRITE_TRUNCATE" """If the table already exists, BigQuery overwrites the table data.""" - WRITE_EMPTY = 'WRITE_EMPTY' + WRITE_EMPTY = "WRITE_EMPTY" """If the table already exists and contains data, a 'duplicate' error is returned in the job result.""" @@ -220,10 +220,10 @@ class SchemaUpdateOption(object): a load job. """ - ALLOW_FIELD_ADDITION = 'ALLOW_FIELD_ADDITION' + ALLOW_FIELD_ADDITION = "ALLOW_FIELD_ADDITION" """Allow adding a nullable field to the schema.""" - ALLOW_FIELD_RELAXATION = 'ALLOW_FIELD_RELAXATION' + ALLOW_FIELD_RELAXATION = "ALLOW_FIELD_RELAXATION" """Allow relaxing a required field in the original schema to nullable.""" @@ -237,25 +237,25 @@ class _JobReference(object): """ def __init__(self, job_id, project, location): - self._properties = {'jobId': job_id, 'projectId': project} + self._properties = {"jobId": job_id, "projectId": project} # The location field must not be populated if it is None. if location: - self._properties['location'] = location + self._properties["location"] = location @property def job_id(self): """str: ID of the job.""" - return self._properties.get('jobId') + return self._properties.get("jobId") @property def project(self): """str: ID of the project where the job runs.""" - return self._properties.get('projectId') + return self._properties.get("projectId") @property def location(self): """str: Location where the job runs.""" - return self._properties.get('location') + return self._properties.get("location") def _to_api_repr(self): """Returns the API resource representation of the job reference.""" @@ -264,9 +264,9 @@ def _to_api_repr(self): @classmethod def _from_api_repr(cls, resource): """Returns a job reference for an API resource representation.""" - job_id = resource.get('jobId') - project = resource.get('projectId') - location = resource.get('location') + job_id = resource.get("jobId") + project = resource.get("projectId") + location = resource.get("location") job_ref = cls(job_id, project, location) return job_ref @@ -291,7 +291,7 @@ def __init__(self, job_id, client): job_ref = job_id if not isinstance(job_id, _JobReference): job_ref = _JobReference(job_id, client.project, None) - self._properties = {'jobReference': job_ref._to_api_repr()} + self._properties = {"jobReference": job_ref._to_api_repr()} self._client = client self._result_set = False @@ -300,7 +300,7 @@ def __init__(self, job_id, client): @property def job_id(self): """str: ID of the job.""" - return _helpers._get_sub_prop(self._properties, ['jobReference', 'jobId']) + return _helpers._get_sub_prop(self._properties, ["jobReference", "jobId"]) @property def project(self): @@ -309,12 +309,12 @@ def project(self): :rtype: str :returns: the project (derived from the client). """ - return _helpers._get_sub_prop(self._properties, ['jobReference', 'projectId']) + return _helpers._get_sub_prop(self._properties, ["jobReference", "projectId"]) @property def location(self): """str: Location where the job runs.""" - return _helpers._get_sub_prop(self._properties, ['jobReference', 'location']) + return _helpers._get_sub_prop(self._properties, ["jobReference", "location"]) def _require_client(self, client): """Check client or verify over-ride. @@ -347,12 +347,12 @@ def path(self): :rtype: str :returns: the path based on project and job ID. """ - return '/projects/%s/jobs/%s' % (self.project, self.job_id) + return "/projects/%s/jobs/%s" % (self.project, self.job_id) @property def labels(self): """Dict[str, str]: Labels for the job.""" - return self._properties.setdefault('labels', {}) + return self._properties.setdefault("labels", {}) @property def etag(self): @@ -361,7 +361,7 @@ def etag(self): :rtype: str, or ``NoneType`` :returns: the ETag (None until set from the server). """ - return self._properties.get('etag') + return self._properties.get("etag") @property def self_link(self): @@ -370,7 +370,7 @@ def self_link(self): :rtype: str, or ``NoneType`` :returns: the URL (None until set from the server). """ - return self._properties.get('selfLink') + return self._properties.get("selfLink") @property def user_email(self): @@ -379,7 +379,7 @@ def user_email(self): :rtype: str, or ``NoneType`` :returns: the URL (None until set from the server). """ - return self._properties.get('user_email') + return self._properties.get("user_email") @property def created(self): @@ -388,9 +388,9 @@ def created(self): :rtype: ``datetime.datetime``, or ``NoneType`` :returns: the creation time (None until set from the server). """ - statistics = self._properties.get('statistics') + statistics = self._properties.get("statistics") if statistics is not None: - millis = statistics.get('creationTime') + millis = statistics.get("creationTime") if millis is not None: return _helpers._datetime_from_microseconds(millis * 1000.0) @@ -401,9 +401,9 @@ def started(self): :rtype: ``datetime.datetime``, or ``NoneType`` :returns: the start time (None until set from the server). """ - statistics = self._properties.get('statistics') + statistics = self._properties.get("statistics") if statistics is not None: - millis = statistics.get('startTime') + millis = statistics.get("startTime") if millis is not None: return _helpers._datetime_from_microseconds(millis * 1000.0) @@ -414,15 +414,15 @@ def ended(self): :rtype: ``datetime.datetime``, or ``NoneType`` :returns: the end time (None until set from the server). """ - statistics = self._properties.get('statistics') + statistics = self._properties.get("statistics") if statistics is not None: - millis = statistics.get('endTime') + millis = statistics.get("endTime") if millis is not None: return _helpers._datetime_from_microseconds(millis * 1000.0) def _job_statistics(self): """Helper for job-type specific statistics-based properties.""" - statistics = self._properties.get('statistics', {}) + statistics = self._properties.get("statistics", {}) return statistics.get(self._JOB_TYPE, {}) @property @@ -432,9 +432,9 @@ def error_result(self): :rtype: mapping, or ``NoneType`` :returns: the error information (None until set from the server). """ - status = self._properties.get('status') + status = self._properties.get("status") if status is not None: - return status.get('errorResult') + return status.get("errorResult") @property def errors(self): @@ -443,9 +443,9 @@ def errors(self): :rtype: list of mappings, or ``NoneType`` :returns: the error information (None until set from the server). """ - status = self._properties.get('status') + status = self._properties.get("status") if status is not None: - return status.get('errors') + return status.get("errors") @property def state(self): @@ -454,9 +454,9 @@ def state(self): :rtype: str, or ``NoneType`` :returns: the state (None until set from the server). """ - status = self._properties.get('status') + status = self._properties.get("status") if status is not None: - return status.get('state') + return status.get("state") def _scrub_local_properties(self, cleaned): """Helper: handle subclass properties in cleaned.""" @@ -475,17 +475,17 @@ def _set_properties(self, api_response): cleaned = api_response.copy() self._scrub_local_properties(cleaned) - statistics = cleaned.get('statistics', {}) - if 'creationTime' in statistics: - statistics['creationTime'] = float(statistics['creationTime']) - if 'startTime' in statistics: - statistics['startTime'] = float(statistics['startTime']) - if 'endTime' in statistics: - statistics['endTime'] = float(statistics['endTime']) + statistics = cleaned.get("statistics", {}) + if "creationTime" in statistics: + statistics["creationTime"] = float(statistics["creationTime"]) + if "startTime" in statistics: + statistics["startTime"] = float(statistics["startTime"]) + if "endTime" in statistics: + statistics["endTime"] = float(statistics["endTime"]) self._properties.clear() self._properties.update(cleaned) - self._copy_configuration_properties(cleaned.get('configuration', {})) + self._copy_configuration_properties(cleaned.get("configuration", {})) # For Future interface self._set_future_result() @@ -503,21 +503,21 @@ def _get_resource_config(cls, resource): :raises: :class:`KeyError` if the resource has no identifier, or is missing the appropriate configuration. """ - if 'jobReference' not in resource or 'jobId' not in resource['jobReference']: + if "jobReference" not in resource or "jobId" not in resource["jobReference"]: raise KeyError( - 'Resource lacks required identity information: ' + "Resource lacks required identity information: " '["jobReference"]["jobId"]' ) - job_id = resource['jobReference']['jobId'] + job_id = resource["jobReference"]["jobId"] if ( - 'configuration' not in resource - or cls._JOB_TYPE not in resource['configuration'] + "configuration" not in resource + or cls._JOB_TYPE not in resource["configuration"] ): raise KeyError( - 'Resource lacks required configuration: ' + "Resource lacks required configuration: " '["configuration"]["%s"]' % cls._JOB_TYPE ) - return job_id, resource['configuration'] + return job_id, resource["configuration"] def to_api_repr(self): """Generate a resource for the job.""" @@ -545,12 +545,12 @@ def _begin(self, client=None, retry=DEFAULT_RETRY): raise ValueError("Job already begun.") client = self._require_client(client) - path = '/projects/%s/jobs' % (self.project,) + path = "/projects/%s/jobs" % (self.project,) # jobs.insert is idempotent because we ensure that every new # job has an ID. api_response = client._call_api( - retry, method='POST', path=path, data=self.to_api_repr() + retry, method="POST", path=path, data=self.to_api_repr() ) self._set_properties(api_response) @@ -573,13 +573,13 @@ def exists(self, client=None, retry=DEFAULT_RETRY): """ client = self._require_client(client) - extra_params = {'fields': 'id'} + extra_params = {"fields": "id"} if self.location: - extra_params['location'] = self.location + extra_params["location"] = self.location try: client._call_api( - retry, method='GET', path=self.path, query_params=extra_params + retry, method="GET", path=self.path, query_params=extra_params ) except NotFound: return False @@ -604,10 +604,10 @@ def reload(self, client=None, retry=DEFAULT_RETRY): extra_params = {} if self.location: - extra_params['location'] = self.location + extra_params["location"] = self.location api_response = client._call_api( - retry, method='GET', path=self.path, query_params=extra_params + retry, method="GET", path=self.path, query_params=extra_params ) self._set_properties(api_response) @@ -629,12 +629,12 @@ def cancel(self, client=None): extra_params = {} if self.location: - extra_params['location'] = self.location + extra_params["location"] = self.location api_response = client._connection.api_request( - method='POST', path='%s/cancel' % (self.path,), query_params=extra_params + method="POST", path="%s/cancel" % (self.path,), query_params=extra_params ) - self._set_properties(api_response['job']) + self._set_properties(api_response["job"]) # The Future interface requires that we return True if the *attempt* # to cancel was successful. return True @@ -711,7 +711,7 @@ def cancelled(self): """ return ( self.error_result is not None - and self.error_result.get('reason') == _STOPPED_REASON + and self.error_result.get("reason") == _STOPPED_REASON ) @@ -739,13 +739,13 @@ def labels(self): Raises: ValueError: If ``value`` type is invalid. """ - return self._properties.setdefault('labels', {}) + return self._properties.setdefault("labels", {}) @labels.setter def labels(self, value): if not isinstance(value, dict): raise ValueError("Pass a dict") - self._properties['labels'] = value + self._properties["labels"] = value def _get_sub_prop(self, key, default=None): """Get a value in the ``self._properties[self._job_type]`` dictionary. @@ -845,7 +845,7 @@ def _fill_from_default(self, default_job_config): raise TypeError( "attempted to merge two incompatible job types: " + repr(self._job_type) - + ', ' + + ", " + repr(default_job_config._job_type) ) @@ -887,7 +887,7 @@ class LoadJobConfig(_JobConfig): """ def __init__(self, **kwargs): - super(LoadJobConfig, self).__init__('load', **kwargs) + super(LoadJobConfig, self).__init__("load", **kwargs) @property def allow_jagged_rows(self): @@ -896,11 +896,11 @@ def allow_jagged_rows(self): See https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.load.allowJaggedRows """ - return self._get_sub_prop('allowJaggedRows') + return self._get_sub_prop("allowJaggedRows") @allow_jagged_rows.setter def allow_jagged_rows(self, value): - self._set_sub_prop('allowJaggedRows', value) + self._set_sub_prop("allowJaggedRows", value) @property def allow_quoted_newlines(self): @@ -909,11 +909,11 @@ def allow_quoted_newlines(self): See https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.load.allowQuotedNewlines """ - return self._get_sub_prop('allowQuotedNewlines') + return self._get_sub_prop("allowQuotedNewlines") @allow_quoted_newlines.setter def allow_quoted_newlines(self, value): - self._set_sub_prop('allowQuotedNewlines', value) + self._set_sub_prop("allowQuotedNewlines", value) @property def autodetect(self): @@ -922,11 +922,11 @@ def autodetect(self): See https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.load.autodetect """ - return self._get_sub_prop('autodetect') + return self._get_sub_prop("autodetect") @autodetect.setter def autodetect(self, value): - self._set_sub_prop('autodetect', value) + self._set_sub_prop("autodetect", value) @property def clustering_fields(self): @@ -941,9 +941,9 @@ def clustering_fields(self): As of 2018-06-29, clustering fields cannot be set on a table which does not also have time partioning defined. """ - prop = self._get_sub_prop('clustering') + prop = self._get_sub_prop("clustering") if prop is not None: - return list(prop.get('fields', ())) + return list(prop.get("fields", ())) @clustering_fields.setter def clustering_fields(self, value): @@ -952,9 +952,9 @@ def clustering_fields(self, value): (Defaults to :data:`None`). """ if value is not None: - self._set_sub_prop('clustering', {'fields': value}) + self._set_sub_prop("clustering", {"fields": value}) else: - self._del_sub_prop('clustering') + self._del_sub_prop("clustering") @property def create_disposition(self): @@ -964,11 +964,11 @@ def create_disposition(self): See https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.load.createDisposition """ - return self._get_sub_prop('createDisposition') + return self._get_sub_prop("createDisposition") @create_disposition.setter def create_disposition(self, value): - self._set_sub_prop('createDisposition', value) + self._set_sub_prop("createDisposition", value) @property def destination_encryption_configuration(self): @@ -981,7 +981,7 @@ def destination_encryption_configuration(self): See https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.load.destinationEncryptionConfiguration """ - prop = self._get_sub_prop('destinationEncryptionConfiguration') + prop = self._get_sub_prop("destinationEncryptionConfiguration") if prop is not None: prop = EncryptionConfiguration.from_api_repr(prop) return prop @@ -991,9 +991,9 @@ def destination_encryption_configuration(self, value): api_repr = value if value is not None: api_repr = value.to_api_repr() - self._set_sub_prop('destinationEncryptionConfiguration', api_repr) + self._set_sub_prop("destinationEncryptionConfiguration", api_repr) else: - self._del_sub_prop('destinationEncryptionConfiguration') + self._del_sub_prop("destinationEncryptionConfiguration") @property def destination_table_description(self): @@ -1002,13 +1002,13 @@ def destination_table_description(self): See: https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.load.destinationTableProperties.description """ - prop = self._get_sub_prop('destinationTableProperties') + prop = self._get_sub_prop("destinationTableProperties") if prop is not None: - return prop['description'] + return prop["description"] @destination_table_description.setter def destination_table_description(self, value): - keys = [self._job_type, 'destinationTableProperties', 'description'] + keys = [self._job_type, "destinationTableProperties", "description"] if value is not None: _helpers._set_sub_prop(self._properties, keys, value) else: @@ -1021,13 +1021,13 @@ def destination_table_friendly_name(self): See: https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.load.destinationTableProperties.friendlyName """ - prop = self._get_sub_prop('destinationTableProperties') + prop = self._get_sub_prop("destinationTableProperties") if prop is not None: - return prop['friendlyName'] + return prop["friendlyName"] @destination_table_friendly_name.setter def destination_table_friendly_name(self, value): - keys = [self._job_type, 'destinationTableProperties', 'friendlyName'] + keys = [self._job_type, "destinationTableProperties", "friendlyName"] if value is not None: _helpers._set_sub_prop(self._properties, keys, value) else: @@ -1041,11 +1041,11 @@ def encoding(self): See https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.load.encoding """ - return self._get_sub_prop('encoding') + return self._get_sub_prop("encoding") @encoding.setter def encoding(self, value): - self._set_sub_prop('encoding', value) + self._set_sub_prop("encoding", value) @property def field_delimiter(self): @@ -1054,11 +1054,11 @@ def field_delimiter(self): See https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.load.fieldDelimiter """ - return self._get_sub_prop('fieldDelimiter') + return self._get_sub_prop("fieldDelimiter") @field_delimiter.setter def field_delimiter(self, value): - self._set_sub_prop('fieldDelimiter', value) + self._set_sub_prop("fieldDelimiter", value) @property def ignore_unknown_values(self): @@ -1067,11 +1067,11 @@ def ignore_unknown_values(self): See https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.load.ignoreUnknownValues """ - return self._get_sub_prop('ignoreUnknownValues') + return self._get_sub_prop("ignoreUnknownValues") @ignore_unknown_values.setter def ignore_unknown_values(self, value): - self._set_sub_prop('ignoreUnknownValues', value) + self._set_sub_prop("ignoreUnknownValues", value) @property def max_bad_records(self): @@ -1080,11 +1080,11 @@ def max_bad_records(self): See https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.load.maxBadRecords """ - return _helpers._int_or_none(self._get_sub_prop('maxBadRecords')) + return _helpers._int_or_none(self._get_sub_prop("maxBadRecords")) @max_bad_records.setter def max_bad_records(self, value): - self._set_sub_prop('maxBadRecords', value) + self._set_sub_prop("maxBadRecords", value) @property def null_marker(self): @@ -1093,11 +1093,11 @@ def null_marker(self): See https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.load.nullMarker """ - return self._get_sub_prop('nullMarker') + return self._get_sub_prop("nullMarker") @null_marker.setter def null_marker(self, value): - self._set_sub_prop('nullMarker', value) + self._set_sub_prop("nullMarker", value) @property def quote_character(self): @@ -1106,11 +1106,11 @@ def quote_character(self): See https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.load.quote """ - return self._get_sub_prop('quote') + return self._get_sub_prop("quote") @quote_character.setter def quote_character(self, value): - self._set_sub_prop('quote', value) + self._set_sub_prop("quote", value) @property def schema(self): @@ -1120,18 +1120,18 @@ def schema(self): See https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.load.schema """ - schema = _helpers._get_sub_prop(self._properties, ['load', 'schema', 'fields']) + schema = _helpers._get_sub_prop(self._properties, ["load", "schema", "fields"]) if schema is None: return return [SchemaField.from_api_repr(field) for field in schema] @schema.setter def schema(self, value): - if not all(hasattr(field, 'to_api_repr') for field in value): - raise ValueError('Schema items must be fields') + if not all(hasattr(field, "to_api_repr") for field in value): + raise ValueError("Schema items must be fields") _helpers._set_sub_prop( self._properties, - ['load', 'schema', 'fields'], + ["load", "schema", "fields"], [field.to_api_repr() for field in value], ) @@ -1141,11 +1141,11 @@ def schema_update_options(self): updates to the destination table schema to allow as a side effect of the load job. """ - return self._get_sub_prop('schemaUpdateOptions') + return self._get_sub_prop("schemaUpdateOptions") @schema_update_options.setter def schema_update_options(self, values): - self._set_sub_prop('schemaUpdateOptions', values) + self._set_sub_prop("schemaUpdateOptions", values) @property def skip_leading_rows(self): @@ -1154,11 +1154,11 @@ def skip_leading_rows(self): See https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.load.skipLeadingRows """ - return _helpers._int_or_none(self._get_sub_prop('skipLeadingRows')) + return _helpers._int_or_none(self._get_sub_prop("skipLeadingRows")) @skip_leading_rows.setter def skip_leading_rows(self, value): - self._set_sub_prop('skipLeadingRows', str(value)) + self._set_sub_prop("skipLeadingRows", str(value)) @property def source_format(self): @@ -1167,18 +1167,18 @@ def source_format(self): See https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.load.sourceFormat """ - return self._get_sub_prop('sourceFormat') + return self._get_sub_prop("sourceFormat") @source_format.setter def source_format(self, value): - self._set_sub_prop('sourceFormat', value) + self._set_sub_prop("sourceFormat", value) @property def time_partitioning(self): """google.cloud.bigquery.table.TimePartitioning: Specifies time-based partitioning for the destination table. """ - prop = self._get_sub_prop('timePartitioning') + prop = self._get_sub_prop("timePartitioning") if prop is not None: prop = TimePartitioning.from_api_repr(prop) return prop @@ -1188,9 +1188,9 @@ def time_partitioning(self, value): api_repr = value if value is not None: api_repr = value.to_api_repr() - self._set_sub_prop('timePartitioning', api_repr) + self._set_sub_prop("timePartitioning", api_repr) else: - self._del_sub_prop('timePartitioning') + self._del_sub_prop("timePartitioning") @property def write_disposition(self): @@ -1200,11 +1200,11 @@ def write_disposition(self): See https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.load.writeDisposition """ - return self._get_sub_prop('writeDisposition') + return self._get_sub_prop("writeDisposition") @write_disposition.setter def write_disposition(self, value): - self._set_sub_prop('writeDisposition', value) + self._set_sub_prop("writeDisposition", value) class LoadJob(_AsyncJob): @@ -1229,7 +1229,7 @@ class LoadJob(_AsyncJob): for the dataset (which requires a project). """ - _JOB_TYPE = 'load' + _JOB_TYPE = "load" def __init__(self, job_id, source_uris, destination, client, job_config=None): super(LoadJob, self).__init__(job_id, client) @@ -1383,7 +1383,7 @@ def input_file_bytes(self): """ return _helpers._int_or_none( _helpers._get_sub_prop( - self._properties, ['statistics', 'load', 'inputFileBytes'] + self._properties, ["statistics", "load", "inputFileBytes"] ) ) @@ -1396,7 +1396,7 @@ def input_files(self): """ return _helpers._int_or_none( _helpers._get_sub_prop( - self._properties, ['statistics', 'load', 'inputFiles'] + self._properties, ["statistics", "load", "inputFiles"] ) ) @@ -1409,7 +1409,7 @@ def output_bytes(self): """ return _helpers._int_or_none( _helpers._get_sub_prop( - self._properties, ['statistics', 'load', 'outputBytes'] + self._properties, ["statistics", "load", "outputBytes"] ) ) @@ -1422,7 +1422,7 @@ def output_rows(self): """ return _helpers._int_or_none( _helpers._get_sub_prop( - self._properties, ['statistics', 'load', 'outputRows'] + self._properties, ["statistics", "load", "outputRows"] ) ) @@ -1431,15 +1431,15 @@ def to_api_repr(self): configuration = self._configuration.to_api_repr() if self.source_uris is not None: _helpers._set_sub_prop( - configuration, ['load', 'sourceUris'], self.source_uris + configuration, ["load", "sourceUris"], self.source_uris ) _helpers._set_sub_prop( - configuration, ['load', 'destinationTable'], self.destination.to_api_repr() + configuration, ["load", "destinationTable"], self.destination.to_api_repr() ) return { - 'jobReference': self._properties['jobReference'], - 'configuration': configuration, + "jobReference": self._properties["jobReference"], + "configuration": configuration, } def _copy_configuration_properties(self, configuration): @@ -1465,15 +1465,15 @@ def from_api_repr(cls, resource, client): :rtype: :class:`google.cloud.bigquery.job.LoadJob` :returns: Job parsed from ``resource``. """ - config_resource = resource.get('configuration', {}) + config_resource = resource.get("configuration", {}) config = LoadJobConfig.from_api_repr(config_resource) # A load job requires a destination table. - dest_config = config_resource['load']['destinationTable'] - ds_ref = DatasetReference(dest_config['projectId'], dest_config['datasetId']) - destination = TableReference(ds_ref, dest_config['tableId']) + dest_config = config_resource["load"]["destinationTable"] + ds_ref = DatasetReference(dest_config["projectId"], dest_config["datasetId"]) + destination = TableReference(ds_ref, dest_config["tableId"]) # sourceUris will be absent if this is a file upload. - source_uris = _helpers._get_sub_prop(config_resource, ['load', 'sourceUris']) - job_ref = _JobReference._from_api_repr(resource['jobReference']) + source_uris = _helpers._get_sub_prop(config_resource, ["load", "sourceUris"]) + job_ref = _JobReference._from_api_repr(resource["jobReference"]) job = cls(job_ref, source_uris, destination, client, config) job._set_properties(resource) return job @@ -1488,7 +1488,7 @@ class CopyJobConfig(_JobConfig): """ def __init__(self, **kwargs): - super(CopyJobConfig, self).__init__('copy', **kwargs) + super(CopyJobConfig, self).__init__("copy", **kwargs) @property def create_disposition(self): @@ -1498,11 +1498,11 @@ def create_disposition(self): See https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.copy.createDisposition """ - return self._get_sub_prop('createDisposition') + return self._get_sub_prop("createDisposition") @create_disposition.setter def create_disposition(self, value): - self._set_sub_prop('createDisposition', value) + self._set_sub_prop("createDisposition", value) @property def write_disposition(self): @@ -1512,11 +1512,11 @@ def write_disposition(self): See https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.copy.writeDisposition """ - return self._get_sub_prop('writeDisposition') + return self._get_sub_prop("writeDisposition") @write_disposition.setter def write_disposition(self, value): - self._set_sub_prop('writeDisposition', value) + self._set_sub_prop("writeDisposition", value) @property def destination_encryption_configuration(self): @@ -1529,7 +1529,7 @@ def destination_encryption_configuration(self): See https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.copy.destinationEncryptionConfiguration """ - prop = self._get_sub_prop('destinationEncryptionConfiguration') + prop = self._get_sub_prop("destinationEncryptionConfiguration") if prop is not None: prop = EncryptionConfiguration.from_api_repr(prop) return prop @@ -1539,7 +1539,7 @@ def destination_encryption_configuration(self, value): api_repr = value if value is not None: api_repr = value.to_api_repr() - self._set_sub_prop('destinationEncryptionConfiguration', api_repr) + self._set_sub_prop("destinationEncryptionConfiguration", api_repr) class CopyJob(_AsyncJob): @@ -1563,7 +1563,7 @@ class CopyJob(_AsyncJob): (Optional) Extra configuration options for the copy job. """ - _JOB_TYPE = 'copy' + _JOB_TYPE = "copy" def __init__(self, job_id, sources, destination, client, job_config=None): super(CopyJob, self).__init__(job_id, client) @@ -1607,28 +1607,28 @@ def to_api_repr(self): source_refs = [ { - 'projectId': table.project, - 'datasetId': table.dataset_id, - 'tableId': table.table_id, + "projectId": table.project, + "datasetId": table.dataset_id, + "tableId": table.table_id, } for table in self.sources ] configuration = self._configuration.to_api_repr() - _helpers._set_sub_prop(configuration, ['copy', 'sourceTables'], source_refs) + _helpers._set_sub_prop(configuration, ["copy", "sourceTables"], source_refs) _helpers._set_sub_prop( configuration, - ['copy', 'destinationTable'], + ["copy", "destinationTable"], { - 'projectId': self.destination.project, - 'datasetId': self.destination.dataset_id, - 'tableId': self.destination.table_id, + "projectId": self.destination.project, + "datasetId": self.destination.dataset_id, + "tableId": self.destination.table_id, }, ) return { - 'jobReference': self._properties['jobReference'], - 'configuration': configuration, + "jobReference": self._properties["jobReference"], + "configuration": configuration, } def _copy_configuration_properties(self, configuration): @@ -1657,12 +1657,12 @@ def from_api_repr(cls, resource, client): job_id, config_resource = cls._get_resource_config(resource) config = CopyJobConfig.from_api_repr(config_resource) # Copy required fields to the job. - copy_resource = config_resource['copy'] - destination = TableReference.from_api_repr(copy_resource['destinationTable']) + copy_resource = config_resource["copy"] + destination = TableReference.from_api_repr(copy_resource["destinationTable"]) sources = [] - source_configs = copy_resource.get('sourceTables') + source_configs = copy_resource.get("sourceTables") if source_configs is None: - single = copy_resource.get('sourceTable') + single = copy_resource.get("sourceTable") if single is None: raise KeyError("Resource missing 'sourceTables' / 'sourceTable'") source_configs = [single] @@ -1683,7 +1683,7 @@ class ExtractJobConfig(_JobConfig): """ def __init__(self, **kwargs): - super(ExtractJobConfig, self).__init__('extract', **kwargs) + super(ExtractJobConfig, self).__init__("extract", **kwargs) @property def compression(self): @@ -1693,11 +1693,11 @@ def compression(self): See https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.extract.compression """ - return self._get_sub_prop('compression') + return self._get_sub_prop("compression") @compression.setter def compression(self, value): - self._set_sub_prop('compression', value) + self._set_sub_prop("compression", value) @property def destination_format(self): @@ -1706,11 +1706,11 @@ def destination_format(self): See https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.extract.destinationFormat """ - return self._get_sub_prop('destinationFormat') + return self._get_sub_prop("destinationFormat") @destination_format.setter def destination_format(self, value): - self._set_sub_prop('destinationFormat', value) + self._set_sub_prop("destinationFormat", value) @property def field_delimiter(self): @@ -1719,11 +1719,11 @@ def field_delimiter(self): See https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.extract.fieldDelimiter """ - return self._get_sub_prop('fieldDelimiter') + return self._get_sub_prop("fieldDelimiter") @field_delimiter.setter def field_delimiter(self, value): - self._set_sub_prop('fieldDelimiter', value) + self._set_sub_prop("fieldDelimiter", value) @property def print_header(self): @@ -1732,11 +1732,11 @@ def print_header(self): See https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.extract.printHeader """ - return self._get_sub_prop('printHeader') + return self._get_sub_prop("printHeader") @print_header.setter def print_header(self, value): - self._set_sub_prop('printHeader', value) + self._set_sub_prop("printHeader", value) class ExtractJob(_AsyncJob): @@ -1762,7 +1762,7 @@ class ExtractJob(_AsyncJob): (Optional) Extra configuration options for the extract job. """ - _JOB_TYPE = 'extract' + _JOB_TYPE = "extract" def __init__(self, job_id, source, destination_uris, client, job_config=None): super(ExtractJob, self).__init__(job_id, client) @@ -1816,7 +1816,7 @@ def destination_uri_file_counts(self): specified in the 'destinationUris' field. Returns None if job is not yet complete. """ - counts = self._job_statistics().get('destinationUriFileCounts') + counts = self._job_statistics().get("destinationUriFileCounts") if counts is not None: return [int(count) for count in counts] return None @@ -1825,20 +1825,20 @@ def to_api_repr(self): """Generate a resource for :meth:`_begin`.""" source_ref = { - 'projectId': self.source.project, - 'datasetId': self.source.dataset_id, - 'tableId': self.source.table_id, + "projectId": self.source.project, + "datasetId": self.source.dataset_id, + "tableId": self.source.table_id, } configuration = self._configuration.to_api_repr() - _helpers._set_sub_prop(configuration, ['extract', 'sourceTable'], source_ref) + _helpers._set_sub_prop(configuration, ["extract", "sourceTable"], source_ref) _helpers._set_sub_prop( - configuration, ['extract', 'destinationUris'], self.destination_uris + configuration, ["extract", "destinationUris"], self.destination_uris ) return { - 'jobReference': self._properties['jobReference'], - 'configuration': configuration, + "jobReference": self._properties["jobReference"], + "configuration": configuration, } def _copy_configuration_properties(self, configuration): @@ -1867,14 +1867,14 @@ def from_api_repr(cls, resource, client): job_id, config_resource = cls._get_resource_config(resource) config = ExtractJobConfig.from_api_repr(config_resource) source_config = _helpers._get_sub_prop( - config_resource, ['extract', 'sourceTable'] + config_resource, ["extract", "sourceTable"] ) dataset = DatasetReference( - source_config['projectId'], source_config['datasetId'] + source_config["projectId"], source_config["datasetId"] ) - source = dataset.table(source_config['tableId']) + source = dataset.table(source_config["tableId"]) destination_uris = _helpers._get_sub_prop( - config_resource, ['extract', 'destinationUris'] + config_resource, ["extract", "destinationUris"] ) job = cls(job_id, source, destination_uris, client=client, job_config=config) @@ -1919,7 +1919,7 @@ class QueryJobConfig(_JobConfig): """ def __init__(self, **kwargs): - super(QueryJobConfig, self).__init__('query', **kwargs) + super(QueryJobConfig, self).__init__("query", **kwargs) @property def destination_encryption_configuration(self): @@ -1932,7 +1932,7 @@ def destination_encryption_configuration(self): See https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query.destinationEncryptionConfiguration """ - prop = self._get_sub_prop('destinationEncryptionConfiguration') + prop = self._get_sub_prop("destinationEncryptionConfiguration") if prop is not None: prop = EncryptionConfiguration.from_api_repr(prop) return prop @@ -1942,7 +1942,7 @@ def destination_encryption_configuration(self, value): api_repr = value if value is not None: api_repr = value.to_api_repr() - self._set_sub_prop('destinationEncryptionConfiguration', api_repr) + self._set_sub_prop("destinationEncryptionConfiguration", api_repr) @property def allow_large_results(self): @@ -1951,11 +1951,11 @@ def allow_large_results(self): See https://g.co/cloud/bigquery/docs/reference/rest/v2/jobs#configuration.query.allowLargeResults """ - return self._get_sub_prop('allowLargeResults') + return self._get_sub_prop("allowLargeResults") @allow_large_results.setter def allow_large_results(self, value): - self._set_sub_prop('allowLargeResults', value) + self._set_sub_prop("allowLargeResults", value) @property def create_disposition(self): @@ -1965,11 +1965,11 @@ def create_disposition(self): See https://g.co/cloud/bigquery/docs/reference/rest/v2/jobs#configuration.query.createDisposition """ - return self._get_sub_prop('createDisposition') + return self._get_sub_prop("createDisposition") @create_disposition.setter def create_disposition(self, value): - self._set_sub_prop('createDisposition', value) + self._set_sub_prop("createDisposition", value) @property def default_dataset(self): @@ -1980,7 +1980,7 @@ def default_dataset(self): See https://g.co/cloud/bigquery/docs/reference/v2/jobs#configuration.query.defaultDataset """ - prop = self._get_sub_prop('defaultDataset') + prop = self._get_sub_prop("defaultDataset") if prop is not None: prop = DatasetReference.from_api_repr(prop) return prop @@ -1990,7 +1990,7 @@ def default_dataset(self, value): resource = None if value is not None: resource = value.to_api_repr() - self._set_sub_prop('defaultDataset', resource) + self._set_sub_prop("defaultDataset", resource) @property def destination(self): @@ -2000,7 +2000,7 @@ def destination(self): See https://g.co/cloud/bigquery/docs/reference/rest/v2/jobs#configuration.query.destinationTable """ - prop = self._get_sub_prop('destinationTable') + prop = self._get_sub_prop("destinationTable") if prop is not None: prop = TableReference.from_api_repr(prop) return prop @@ -2010,7 +2010,7 @@ def destination(self, value): resource = None if value is not None: resource = value.to_api_repr() - self._set_sub_prop('destinationTable', resource) + self._set_sub_prop("destinationTable", resource) @property def dry_run(self): @@ -2020,11 +2020,11 @@ def dry_run(self): See https://g.co/cloud/bigquery/docs/reference/v2/jobs#configuration.dryRun """ - return self._properties.get('dryRun') + return self._properties.get("dryRun") @dry_run.setter def dry_run(self, value): - self._properties['dryRun'] = value + self._properties["dryRun"] = value @property def flatten_results(self): @@ -2033,11 +2033,11 @@ def flatten_results(self): See https://g.co/cloud/bigquery/docs/reference/rest/v2/jobs#configuration.query.flattenResults """ - return self._get_sub_prop('flattenResults') + return self._get_sub_prop("flattenResults") @flatten_results.setter def flatten_results(self, value): - self._set_sub_prop('flattenResults', value) + self._set_sub_prop("flattenResults", value) @property def maximum_billing_tier(self): @@ -2047,11 +2047,11 @@ def maximum_billing_tier(self): See https://g.co/cloud/bigquery/docs/reference/rest/v2/jobs#configuration.query.maximumBillingTier """ - return self._get_sub_prop('maximumBillingTier') + return self._get_sub_prop("maximumBillingTier") @maximum_billing_tier.setter def maximum_billing_tier(self, value): - self._set_sub_prop('maximumBillingTier', value) + self._set_sub_prop("maximumBillingTier", value) @property def maximum_bytes_billed(self): @@ -2060,11 +2060,11 @@ def maximum_bytes_billed(self): See https://g.co/cloud/bigquery/docs/reference/rest/v2/jobs#configuration.query.maximumBytesBilled """ - return _helpers._int_or_none(self._get_sub_prop('maximumBytesBilled')) + return _helpers._int_or_none(self._get_sub_prop("maximumBytesBilled")) @maximum_bytes_billed.setter def maximum_bytes_billed(self, value): - self._set_sub_prop('maximumBytesBilled', str(value)) + self._set_sub_prop("maximumBytesBilled", str(value)) @property def priority(self): @@ -2073,11 +2073,11 @@ def priority(self): See https://g.co/cloud/bigquery/docs/reference/rest/v2/jobs#configuration.query.priority """ - return self._get_sub_prop('priority') + return self._get_sub_prop("priority") @priority.setter def priority(self, value): - self._set_sub_prop('priority', value) + self._set_sub_prop("priority", value) @property def query_parameters(self): @@ -2089,12 +2089,12 @@ def query_parameters(self): See: https://g.co/cloud/bigquery/docs/reference/rest/v2/jobs#configuration.query.queryParameters """ - prop = self._get_sub_prop('queryParameters', default=[]) + prop = self._get_sub_prop("queryParameters", default=[]) return _from_api_repr_query_parameters(prop) @query_parameters.setter def query_parameters(self, values): - self._set_sub_prop('queryParameters', _to_api_repr_query_parameters(values)) + self._set_sub_prop("queryParameters", _to_api_repr_query_parameters(values)) @property def udf_resources(self): @@ -2104,13 +2104,13 @@ def udf_resources(self): See: https://g.co/cloud/bigquery/docs/reference/rest/v2/jobs#configuration.query.userDefinedFunctionResources """ - prop = self._get_sub_prop('userDefinedFunctionResources', default=[]) + prop = self._get_sub_prop("userDefinedFunctionResources", default=[]) return _from_api_repr_udf_resources(prop) @udf_resources.setter def udf_resources(self, values): self._set_sub_prop( - 'userDefinedFunctionResources', _to_api_repr_udf_resources(values) + "userDefinedFunctionResources", _to_api_repr_udf_resources(values) ) @property @@ -2120,11 +2120,11 @@ def use_legacy_sql(self): See https://g.co/cloud/bigquery/docs/reference/v2/jobs#configuration.query.useLegacySql """ - return self._get_sub_prop('useLegacySql') + return self._get_sub_prop("useLegacySql") @use_legacy_sql.setter def use_legacy_sql(self, value): - self._set_sub_prop('useLegacySql', value) + self._set_sub_prop("useLegacySql", value) @property def use_query_cache(self): @@ -2133,11 +2133,11 @@ def use_query_cache(self): See https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query.useQueryCache """ - return self._get_sub_prop('useQueryCache') + return self._get_sub_prop("useQueryCache") @use_query_cache.setter def use_query_cache(self, value): - self._set_sub_prop('useQueryCache', value) + self._set_sub_prop("useQueryCache", value) @property def write_disposition(self): @@ -2147,11 +2147,11 @@ def write_disposition(self): See https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query.writeDisposition """ - return self._get_sub_prop('writeDisposition') + return self._get_sub_prop("writeDisposition") @write_disposition.setter def write_disposition(self, value): - self._set_sub_prop('writeDisposition', value) + self._set_sub_prop("writeDisposition", value) @property def table_definitions(self): @@ -2161,21 +2161,21 @@ def table_definitions(self): See https://g.co/cloud/bigquery/docs/reference/rest/v2/jobs#configuration.query.tableDefinitions """ - prop = self._get_sub_prop('tableDefinitions') + prop = self._get_sub_prop("tableDefinitions") if prop is not None: prop = _from_api_repr_table_defs(prop) return prop @table_definitions.setter def table_definitions(self, values): - self._set_sub_prop('tableDefinitions', _to_api_repr_table_defs(values)) + self._set_sub_prop("tableDefinitions", _to_api_repr_table_defs(values)) @property def time_partitioning(self): """google.cloud.bigquery.table.TimePartitioning: Specifies time-based partitioning for the destination table. """ - prop = self._get_sub_prop('timePartitioning') + prop = self._get_sub_prop("timePartitioning") if prop is not None: prop = TimePartitioning.from_api_repr(prop) return prop @@ -2185,7 +2185,7 @@ def time_partitioning(self, value): api_repr = value if value is not None: api_repr = value.to_api_repr() - self._set_sub_prop('timePartitioning', api_repr) + self._set_sub_prop("timePartitioning", api_repr) @property def clustering_fields(self): @@ -2200,9 +2200,9 @@ def clustering_fields(self): As of 2018-06-29, clustering fields cannot be set on a table which does not also have time partioning defined. """ - prop = self._get_sub_prop('clustering') + prop = self._get_sub_prop("clustering") if prop is not None: - return list(prop.get('fields', ())) + return list(prop.get("fields", ())) @clustering_fields.setter def clustering_fields(self, value): @@ -2211,9 +2211,9 @@ def clustering_fields(self, value): (Defaults to :data:`None`). """ if value is not None: - self._set_sub_prop('clustering', {'fields': value}) + self._set_sub_prop("clustering", {"fields": value}) else: - self._del_sub_prop('clustering') + self._del_sub_prop("clustering") @property def schema_update_options(self): @@ -2221,11 +2221,11 @@ def schema_update_options(self): updates to the destination table schema to allow as a side effect of the query job. """ - return self._get_sub_prop('schemaUpdateOptions') + return self._get_sub_prop("schemaUpdateOptions") @schema_update_options.setter def schema_update_options(self, values): - self._set_sub_prop('schemaUpdateOptions', values) + self._set_sub_prop("schemaUpdateOptions", values) def to_api_repr(self): """Build an API representation of the query job config. @@ -2237,12 +2237,12 @@ def to_api_repr(self): # Query parameters have an addition property associated with them # to indicate if the query is using named or positional parameters. - query_parameters = resource['query'].get('queryParameters') + query_parameters = resource["query"].get("queryParameters") if query_parameters: - if query_parameters[0].get('name') is None: - resource['query']['parameterMode'] = 'POSITIONAL' + if query_parameters[0].get("name") is None: + resource["query"]["parameterMode"] = "POSITIONAL" else: - resource['query']['parameterMode'] = 'NAMED' + resource["query"]["parameterMode"] = "NAMED" return resource @@ -2265,8 +2265,8 @@ class QueryJob(_AsyncJob): (Optional) Extra configuration options for the query job. """ - _JOB_TYPE = 'query' - _UDF_KEY = 'userDefinedFunctionResources' + _JOB_TYPE = "query" + _UDF_KEY = "userDefinedFunctionResources" def __init__(self, job_id, query, client, job_config=None): super(QueryJob, self).__init__(job_id, client) @@ -2425,17 +2425,17 @@ def to_api_repr(self): configuration = self._configuration.to_api_repr() resource = { - 'jobReference': self._properties['jobReference'], - 'configuration': configuration, + "jobReference": self._properties["jobReference"], + "configuration": configuration, } - configuration['query']['query'] = self.query + configuration["query"]["query"] = self.query return resource def _copy_configuration_properties(self, configuration): """Helper: assign subclass configuration properties in cleaned.""" self._configuration._properties = copy.deepcopy(configuration) - self.query = _helpers._get_sub_prop(configuration, ['query', 'query']) + self.query = _helpers._get_sub_prop(configuration, ["query", "query"]) @classmethod def from_api_repr(cls, resource, client): @@ -2452,7 +2452,7 @@ def from_api_repr(cls, resource, client): :returns: Job parsed from ``resource``. """ job_id, config = cls._get_resource_config(resource) - query = config['query']['query'] + query = config["query"]["query"] job = cls(job_id, query, client=client) job._set_properties(resource) return job @@ -2468,7 +2468,7 @@ def query_plan(self): :returns: mappings describing the query plan, or an empty list if the query has not yet completed. """ - plan_entries = self._job_statistics().get('queryPlan', ()) + plan_entries = self._job_statistics().get("queryPlan", ()) return [QueryPlanEntry.from_api_repr(entry) for entry in plan_entries] @property @@ -2476,7 +2476,7 @@ def timeline(self): """List(TimelineEntry): Return the query execution timeline from job statistics. """ - raw = self._job_statistics().get('timeline', ()) + raw = self._job_statistics().get("timeline", ()) return [TimelineEntry.from_api_repr(entry) for entry in raw] @property @@ -2490,7 +2490,7 @@ def total_bytes_processed(self): :returns: total bytes processed by the job, or None if job is not yet complete. """ - result = self._job_statistics().get('totalBytesProcessed') + result = self._job_statistics().get("totalBytesProcessed") if result is not None: result = int(result) return result @@ -2506,7 +2506,7 @@ def total_bytes_billed(self): :returns: total bytes processed by the job, or None if job is not yet complete. """ - result = self._job_statistics().get('totalBytesBilled') + result = self._job_statistics().get("totalBytesBilled") if result is not None: result = int(result) return result @@ -2522,7 +2522,7 @@ def billing_tier(self): :returns: billing tier used by the job, or None if job is not yet complete. """ - return self._job_statistics().get('billingTier') + return self._job_statistics().get("billingTier") @property def cache_hit(self): @@ -2535,7 +2535,7 @@ def cache_hit(self): :returns: whether the query results were returned from cache, or None if job is not yet complete. """ - return self._job_statistics().get('cacheHit') + return self._job_statistics().get("cacheHit") @property def ddl_operation_performed(self): @@ -2545,7 +2545,7 @@ def ddl_operation_performed(self): https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#statistics.query.ddlOperationPerformed """ - return self._job_statistics().get('ddlOperationPerformed') + return self._job_statistics().get("ddlOperationPerformed") @property def ddl_target_table(self): @@ -2555,7 +2555,7 @@ def ddl_target_table(self): See: https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#statistics.query.ddlTargetTable """ - prop = self._job_statistics().get('ddlTargetTable') + prop = self._job_statistics().get("ddlTargetTable") if prop is not None: prop = TableReference.from_api_repr(prop) return prop @@ -2571,7 +2571,7 @@ def num_dml_affected_rows(self): :returns: number of DML rows affected by the job, or None if job is not yet complete. """ - result = self._job_statistics().get('numDmlAffectedRows') + result = self._job_statistics().get("numDmlAffectedRows") if result is not None: result = int(result) return result @@ -2579,7 +2579,7 @@ def num_dml_affected_rows(self): @property def slot_millis(self): """Union[int, None]: Slot-milliseconds used by this query job.""" - return _helpers._int_or_none(self._job_statistics().get('totalSlotMs')) + return _helpers._int_or_none(self._job_statistics().get("totalSlotMs")) @property def statement_type(self): @@ -2592,7 +2592,7 @@ def statement_type(self): :returns: type of statement used by the job, or None if job is not yet complete. """ - return self._job_statistics().get('statementType') + return self._job_statistics().get("statementType") @property def referenced_tables(self): @@ -2608,17 +2608,17 @@ def referenced_tables(self): tables = [] datasets_by_project_name = {} - for table in self._job_statistics().get('referencedTables', ()): + for table in self._job_statistics().get("referencedTables", ()): - t_project = table['projectId'] + t_project = table["projectId"] - ds_id = table['datasetId'] + ds_id = table["datasetId"] t_dataset = datasets_by_project_name.get((t_project, ds_id)) if t_dataset is None: t_dataset = DatasetReference(t_project, ds_id) datasets_by_project_name[(t_project, ds_id)] = t_dataset - t_name = table['tableId'] + t_name = table["tableId"] tables.append(t_dataset.table(t_name)) return tables @@ -2639,14 +2639,14 @@ def undeclared_query_parameters(self): not yet completed. """ parameters = [] - undeclared = self._job_statistics().get('undeclaredQueryParameters', ()) + undeclared = self._job_statistics().get("undeclaredQueryParameters", ()) for parameter in undeclared: - p_type = parameter['parameterType'] + p_type = parameter["parameterType"] - if 'arrayType' in p_type: + if "arrayType" in p_type: klass = ArrayQueryParameter - elif 'structTypes' in p_type: + elif "structTypes" in p_type: klass = StructQueryParameter else: klass = ScalarQueryParameter @@ -2666,7 +2666,7 @@ def estimated_bytes_processed(self): :returns: number of DML rows affected by the job, or None if job is not yet complete. """ - result = self._job_statistics().get('estimatedBytesProcessed') + result = self._job_statistics().get("estimatedBytesProcessed") if result is not None: result = int(result) return result @@ -2797,7 +2797,7 @@ def from_api_repr(cls, resource): :rtype: :class:`QueryPlanEntryStep` :return: new instance built from the resource """ - return cls(kind=resource.get('kind'), substeps=resource.get('substeps', ())) + return cls(kind=resource.get("kind"), substeps=resource.get("substeps", ())) def __eq__(self, other): if not isinstance(other, self.__class__): @@ -2836,39 +2836,39 @@ def from_api_repr(cls, resource): @property def name(self): """Union[str, None]: Human-readable name of the stage.""" - return self._properties.get('name') + return self._properties.get("name") @property def entry_id(self): """Union[str, None]: Unique ID for the stage within the plan.""" - return self._properties.get('id') + return self._properties.get("id") @property def start(self): """Union[Datetime, None]: Datetime when the stage started.""" - if self._properties.get('startMs') is None: + if self._properties.get("startMs") is None: return None return _helpers._datetime_from_microseconds( - int(self._properties.get('startMs')) * 1000.0 + int(self._properties.get("startMs")) * 1000.0 ) @property def end(self): """Union[Datetime, None]: Datetime when the stage ended.""" - if self._properties.get('endMs') is None: + if self._properties.get("endMs") is None: return None return _helpers._datetime_from_microseconds( - int(self._properties.get('endMs')) * 1000.0 + int(self._properties.get("endMs")) * 1000.0 ) @property def input_stages(self): """List(int): Entry IDs for stages that were inputs for this stage.""" - if self._properties.get('inputStages') is None: + if self._properties.get("inputStages") is None: return [] return [ _helpers._int_or_none(entry) - for entry in self._properties.get('inputStages') + for entry in self._properties.get("inputStages") ] @property @@ -2876,26 +2876,26 @@ def parallel_inputs(self): """Union[int, None]: Number of parallel input segments within the stage. """ - return _helpers._int_or_none(self._properties.get('parallelInputs')) + return _helpers._int_or_none(self._properties.get("parallelInputs")) @property def completed_parallel_inputs(self): """Union[int, None]: Number of parallel input segments completed.""" - return _helpers._int_or_none(self._properties.get('completedParallelInputs')) + return _helpers._int_or_none(self._properties.get("completedParallelInputs")) @property def wait_ms_avg(self): """Union[int, None]: Milliseconds the average worker spent waiting to be scheduled. """ - return _helpers._int_or_none(self._properties.get('waitMsAvg')) + return _helpers._int_or_none(self._properties.get("waitMsAvg")) @property def wait_ms_max(self): """Union[int, None]: Milliseconds the slowest worker spent waiting to be scheduled. """ - return _helpers._int_or_none(self._properties.get('waitMsMax')) + return _helpers._int_or_none(self._properties.get("waitMsMax")) @property def wait_ratio_avg(self): @@ -2903,7 +2903,7 @@ def wait_ratio_avg(self): to be scheduled, relative to the longest time spent by any worker in any stage of the overall plan. """ - return self._properties.get('waitRatioAvg') + return self._properties.get("waitRatioAvg") @property def wait_ratio_max(self): @@ -2911,21 +2911,21 @@ def wait_ratio_max(self): to be scheduled, relative to the longest time spent by any worker in any stage of the overall plan. """ - return self._properties.get('waitRatioMax') + return self._properties.get("waitRatioMax") @property def read_ms_avg(self): """Union[int, None]: Milliseconds the average worker spent reading input. """ - return _helpers._int_or_none(self._properties.get('readMsAvg')) + return _helpers._int_or_none(self._properties.get("readMsAvg")) @property def read_ms_max(self): """Union[int, None]: Milliseconds the slowest worker spent reading input. """ - return _helpers._int_or_none(self._properties.get('readMsMax')) + return _helpers._int_or_none(self._properties.get("readMsMax")) @property def read_ratio_avg(self): @@ -2933,7 +2933,7 @@ def read_ratio_avg(self): input, relative to the longest time spent by any worker in any stage of the overall plan. """ - return self._properties.get('readRatioAvg') + return self._properties.get("readRatioAvg") @property def read_ratio_max(self): @@ -2941,21 +2941,21 @@ def read_ratio_max(self): to be scheduled, relative to the longest time spent by any worker in any stage of the overall plan. """ - return self._properties.get('readRatioMax') + return self._properties.get("readRatioMax") @property def compute_ms_avg(self): """Union[int, None]: Milliseconds the average worker spent on CPU-bound processing. """ - return _helpers._int_or_none(self._properties.get('computeMsAvg')) + return _helpers._int_or_none(self._properties.get("computeMsAvg")) @property def compute_ms_max(self): """Union[int, None]: Milliseconds the slowest worker spent on CPU-bound processing. """ - return _helpers._int_or_none(self._properties.get('computeMsMax')) + return _helpers._int_or_none(self._properties.get("computeMsMax")) @property def compute_ratio_avg(self): @@ -2963,7 +2963,7 @@ def compute_ratio_avg(self): CPU-bound processing, relative to the longest time spent by any worker in any stage of the overall plan. """ - return self._properties.get('computeRatioAvg') + return self._properties.get("computeRatioAvg") @property def compute_ratio_max(self): @@ -2971,21 +2971,21 @@ def compute_ratio_max(self): CPU-bound processing, relative to the longest time spent by any worker in any stage of the overall plan. """ - return self._properties.get('computeRatioMax') + return self._properties.get("computeRatioMax") @property def write_ms_avg(self): """Union[int, None]: Milliseconds the average worker spent writing output data. """ - return _helpers._int_or_none(self._properties.get('writeMsAvg')) + return _helpers._int_or_none(self._properties.get("writeMsAvg")) @property def write_ms_max(self): """Union[int, None]: Milliseconds the slowest worker spent writing output data. """ - return _helpers._int_or_none(self._properties.get('writeMsMax')) + return _helpers._int_or_none(self._properties.get("writeMsMax")) @property def write_ratio_avg(self): @@ -2993,7 +2993,7 @@ def write_ratio_avg(self): output data, relative to the longest time spent by any worker in any stage of the overall plan. """ - return self._properties.get('writeRatioAvg') + return self._properties.get("writeRatioAvg") @property def write_ratio_max(self): @@ -3001,36 +3001,36 @@ def write_ratio_max(self): output data, relative to the longest time spent by any worker in any stage of the overall plan. """ - return self._properties.get('writeRatioMax') + return self._properties.get("writeRatioMax") @property def records_read(self): """Union[int, None]: Number of records read by this stage.""" - return _helpers._int_or_none(self._properties.get('recordsRead')) + return _helpers._int_or_none(self._properties.get("recordsRead")) @property def records_written(self): """Union[int, None]: Number of records written by this stage.""" - return _helpers._int_or_none(self._properties.get('recordsWritten')) + return _helpers._int_or_none(self._properties.get("recordsWritten")) @property def status(self): """Union[str, None]: status of this stage.""" - return self._properties.get('status') + return self._properties.get("status") @property def shuffle_output_bytes(self): """Union[int, None]: Number of bytes written by this stage to intermediate shuffle. """ - return _helpers._int_or_none(self._properties.get('shuffleOutputBytes')) + return _helpers._int_or_none(self._properties.get("shuffleOutputBytes")) @property def shuffle_output_bytes_spilled(self): """Union[int, None]: Number of bytes written by this stage to intermediate shuffle and spilled to disk. """ - return _helpers._int_or_none(self._properties.get('shuffleOutputBytesSpilled')) + return _helpers._int_or_none(self._properties.get("shuffleOutputBytesSpilled")) @property def steps(self): @@ -3039,7 +3039,7 @@ def steps(self): """ return [ QueryPlanEntryStep.from_api_repr(step) - for step in self._properties.get('steps', []) + for step in self._properties.get("steps", []) ] @@ -3076,31 +3076,31 @@ def from_api_repr(cls, resource): def elapsed_ms(self): """Union[int, None]: Milliseconds elapsed since start of query execution.""" - return _helpers._int_or_none(self._properties.get('elapsedMs')) + return _helpers._int_or_none(self._properties.get("elapsedMs")) @property def active_units(self): """Union[int, None]: Current number of input units being processed by workers, reported as largest value since the last sample.""" - return _helpers._int_or_none(self._properties.get('activeUnits')) + return _helpers._int_or_none(self._properties.get("activeUnits")) @property def pending_units(self): """Union[int, None]: Current number of input units remaining for query stages active at this sample time.""" - return _helpers._int_or_none(self._properties.get('pendingUnits')) + return _helpers._int_or_none(self._properties.get("pendingUnits")) @property def completed_units(self): """Union[int, None]: Current number of input units completed by this query.""" - return _helpers._int_or_none(self._properties.get('completedUnits')) + return _helpers._int_or_none(self._properties.get("completedUnits")) @property def slot_millis(self): """Union[int, None]: Cumulative slot-milliseconds consumed by this query.""" - return _helpers._int_or_none(self._properties.get('totalSlotMs')) + return _helpers._int_or_none(self._properties.get("totalSlotMs")) class UnknownJob(_AsyncJob): @@ -3118,11 +3118,11 @@ def from_api_repr(cls, resource, client): Returns: UnknownJob: Job corresponding to the resource. """ - job_ref_properties = resource.get('jobReference', {'projectId': client.project}) + job_ref_properties = resource.get("jobReference", {"projectId": client.project}) job_ref = _JobReference._from_api_repr(job_ref_properties) job = cls(job_ref, client) # Populate the job reference with the project, even if it has been # redacted, because we know it should equal that of the request. - resource['jobReference'] = job_ref_properties + resource["jobReference"] = job_ref_properties job._properties = resource return job diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/magics.py b/packages/google-cloud-bigquery/google/cloud/bigquery/magics.py index 05e8e52c7ffa..1b8f1f2ee923 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/magics.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/magics.py @@ -138,7 +138,7 @@ from IPython import display from IPython.core import magic_arguments except ImportError: # pragma: NO COVER - raise ImportError('This module can only be loaded in IPython.') + raise ImportError("This module can only be loaded in IPython.") import google.auth from google.cloud import bigquery @@ -151,6 +151,7 @@ class Context(object): A Context object is initialized when the ``magics`` module is imported, and can be found at ``google.cloud.bigquery.magics.context``. """ + def __init__(self): self._credentials = None self._project = None @@ -244,52 +245,68 @@ def _run_query(client, query, job_config=None): """ start_time = time.time() query_job = client.query(query, job_config=job_config) - print('Executing query with job ID: {}'.format(query_job.job_id)) + print("Executing query with job ID: {}".format(query_job.job_id)) while True: - print('\rQuery executing: {:0.2f}s'.format( - time.time() - start_time), end='') + print("\rQuery executing: {:0.2f}s".format(time.time() - start_time), end="") try: query_job.result(timeout=0.5) break except futures.TimeoutError: continue - print('\nQuery complete after {:0.2f}s'.format(time.time() - start_time)) + print("\nQuery complete after {:0.2f}s".format(time.time() - start_time)) return query_job @magic_arguments.magic_arguments() @magic_arguments.argument( - 'destination_var', - nargs='?', - help=('If provided, save the output to this variable in addition ' - 'to displaying it.')) + "destination_var", + nargs="?", + help=( + "If provided, save the output to this variable in addition " "to displaying it." + ), +) @magic_arguments.argument( - '--project', + "--project", type=str, default=None, - help=('Project to use for executing this query. Defaults to the context ' - 'project.')) + help=( + "Project to use for executing this query. Defaults to the context " "project." + ), +) @magic_arguments.argument( - '--use_legacy_sql', action='store_true', default=False, - help=('Sets query to use Legacy SQL instead of Standard SQL. Defaults to ' - 'Standard SQL if this argument is not used.')) + "--use_legacy_sql", + action="store_true", + default=False, + help=( + "Sets query to use Legacy SQL instead of Standard SQL. Defaults to " + "Standard SQL if this argument is not used." + ), +) @magic_arguments.argument( - '--verbose', action='store_true', default=False, - help=('If set, print verbose output, including the query job ID and the ' - 'amount of time for the query to finish. By default, this ' - 'information will be displayed as the query runs, but will be ' - 'cleared after the query is finished.')) + "--verbose", + action="store_true", + default=False, + help=( + "If set, print verbose output, including the query job ID and the " + "amount of time for the query to finish. By default, this " + "information will be displayed as the query runs, but will be " + "cleared after the query is finished." + ), +) @magic_arguments.argument( - '--params', - nargs='+', + "--params", + nargs="+", default=None, - help=('Parameters to format the query string. If present, the --params ' - 'flag should be followed by a string representation of a dictionary ' - 'in the format {\'param_name\': \'param_value\'} (ex. {"num": 17}), ' - 'or a reference to a dictionary in the same format. The dictionary ' - 'reference can be made by including a \'$\' before the variable ' - 'name (ex. $my_dict_var).')) + help=( + "Parameters to format the query string. If present, the --params " + "flag should be followed by a string representation of a dictionary " + "in the format {'param_name': 'param_value'} (ex. {\"num\": 17}), " + "or a reference to a dictionary in the same format. The dictionary " + "reference can be made by including a '$' before the variable " + "name (ex. $my_dict_var)." + ), +) def _cell_magic(line, query): """Underlying function for bigquery cell magic @@ -310,11 +327,13 @@ def _cell_magic(line, query): if args.params is not None: try: params = _helpers.to_query_parameters( - ast.literal_eval(''.join(args.params))) + ast.literal_eval("".join(args.params)) + ) except Exception: raise SyntaxError( - '--params is not a correctly formatted JSON string or a JSON ' - 'serializable dictionary') + "--params is not a correctly formatted JSON string or a JSON " + "serializable dictionary" + ) project = args.project or context.project client = bigquery.Client(project=project, credentials=context.credentials) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/query.py b/packages/google-cloud-bigquery/google/cloud/bigquery/query.py index fb22c680e2b1..685d83cf9c7f 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/query.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/query.py @@ -35,6 +35,7 @@ class UDFResource(object): See https://cloud.google.com/bigquery/user-defined-functions#api """ + def __init__(self, udf_type, value): self.udf_type = udf_type self.value = value @@ -42,9 +43,7 @@ def __init__(self, udf_type, value): def __eq__(self, other): if not isinstance(other, UDFResource): return NotImplemented - return( - self.udf_type == other.udf_type - and self.value == other.value) + return self.udf_type == other.udf_type and self.value == other.value def __ne__(self, other): return not self == other @@ -53,6 +52,7 @@ def __ne__(self, other): class _AbstractQueryParameter(object): """Base class for named / positional query parameters. """ + @classmethod def from_api_repr(cls, resource): """Factory: construct parameter from JSON resource. @@ -88,6 +88,7 @@ class ScalarQueryParameter(_AbstractQueryParameter): :class:`datetime.datetime`, or :class:`datetime.date`. :param value: the scalar parameter value. """ + def __init__(self, name, type_, value): self.name = name self.type_ = type_ @@ -123,9 +124,9 @@ def from_api_repr(cls, resource): :rtype: :class:`~google.cloud.bigquery.query.ScalarQueryParameter` :returns: instance """ - name = resource.get('name') - type_ = resource['parameterType']['type'] - value = resource['parameterValue']['value'] + name = resource.get("name") + type_ = resource["parameterType"]["type"] + value = resource["parameterValue"]["value"] converted = _QUERY_PARAMS_FROM_JSON[type_](value, None) return cls(name, type_, converted) @@ -140,15 +141,11 @@ def to_api_repr(self): if converter is not None: value = converter(value) resource = { - 'parameterType': { - 'type': self.type_, - }, - 'parameterValue': { - 'value': value, - }, + "parameterType": {"type": self.type_}, + "parameterValue": {"value": value}, } if self.name is not None: - resource['name'] = self.name + resource["name"] = self.name return resource def _key(self): @@ -160,11 +157,7 @@ def _key(self): tuple: The contents of this :class:`~google.cloud.bigquery.query.ScalarQueryParameter`. """ - return ( - self.name, - self.type_.upper(), - self.value, - ) + return (self.name, self.type_.upper(), self.value) def __eq__(self, other): if not isinstance(other, ScalarQueryParameter): @@ -175,7 +168,7 @@ def __ne__(self, other): return not self == other def __repr__(self): - return 'ScalarQueryParameter{}'.format(self._key()) + return "ScalarQueryParameter{}".format(self._key()) class ArrayQueryParameter(_AbstractQueryParameter): @@ -193,6 +186,7 @@ class ArrayQueryParameter(_AbstractQueryParameter): :type values: list of appropriate scalar type. :param values: the parameter array values. """ + def __init__(self, name, array_type, values): self.name = name self.array_type = array_type @@ -217,32 +211,28 @@ def positional(cls, array_type, values): @classmethod def _from_api_repr_struct(cls, resource): - name = resource.get('name') + name = resource.get("name") converted = [] # We need to flatten the array to use the StructQueryParameter # parse code. resource_template = { # The arrayType includes all the types of the fields of the STRUCT - 'parameterType': resource['parameterType']['arrayType'] + "parameterType": resource["parameterType"]["arrayType"] } - for array_value in resource['parameterValue']['arrayValues']: + for array_value in resource["parameterValue"]["arrayValues"]: struct_resource = copy.deepcopy(resource_template) - struct_resource['parameterValue'] = array_value + struct_resource["parameterValue"] = array_value struct_value = StructQueryParameter.from_api_repr(struct_resource) converted.append(struct_value) - return cls(name, 'STRUCT', converted) + return cls(name, "STRUCT", converted) @classmethod def _from_api_repr_scalar(cls, resource): - name = resource.get('name') - array_type = resource['parameterType']['arrayType']['type'] - values = [ - value['value'] - for value - in resource['parameterValue']['arrayValues']] + name = resource.get("name") + array_type = resource["parameterType"]["arrayType"]["type"] + values = [value["value"] for value in resource["parameterValue"]["arrayValues"]] converted = [ - _QUERY_PARAMS_FROM_JSON[array_type](value, None) - for value in values + _QUERY_PARAMS_FROM_JSON[array_type](value, None) for value in values ] return cls(name, array_type, converted) @@ -256,8 +246,8 @@ def from_api_repr(cls, resource): :rtype: :class:`~google.cloud.bigquery.query.ArrayQueryParameter` :returns: instance """ - array_type = resource['parameterType']['arrayType']['type'] - if array_type == 'STRUCT': + array_type = resource["parameterType"]["arrayType"]["type"] + if array_type == "STRUCT": return cls._from_api_repr_struct(resource) return cls._from_api_repr_scalar(resource) @@ -268,27 +258,22 @@ def to_api_repr(self): :returns: JSON mapping """ values = self.values - if self.array_type == 'RECORD' or self.array_type == 'STRUCT': + if self.array_type == "RECORD" or self.array_type == "STRUCT": reprs = [value.to_api_repr() for value in values] - a_type = reprs[0]['parameterType'] - a_values = [repr_['parameterValue'] for repr_ in reprs] + a_type = reprs[0]["parameterType"] + a_values = [repr_["parameterValue"] for repr_ in reprs] else: - a_type = {'type': self.array_type} + a_type = {"type": self.array_type} converter = _SCALAR_VALUE_TO_JSON_PARAM.get(self.array_type) if converter is not None: values = [converter(value) for value in values] - a_values = [{'value': value} for value in values] + a_values = [{"value": value} for value in values] resource = { - 'parameterType': { - 'type': 'ARRAY', - 'arrayType': a_type, - }, - 'parameterValue': { - 'arrayValues': a_values, - }, + "parameterType": {"type": "ARRAY", "arrayType": a_type}, + "parameterValue": {"arrayValues": a_values}, } if self.name is not None: - resource['name'] = self.name + resource["name"] = self.name return resource def _key(self): @@ -300,11 +285,7 @@ def _key(self): tuple: The contents of this :class:`~google.cloud.bigquery.query.ArrayQueryParameter`. """ - return ( - self.name, - self.array_type.upper(), - self.values, - ) + return (self.name, self.array_type.upper(), self.values) def __eq__(self, other): if not isinstance(other, ArrayQueryParameter): @@ -315,7 +296,7 @@ def __ne__(self, other): return not self == other def __repr__(self): - return 'ArrayQueryParameter{}'.format(self._key()) + return "ArrayQueryParameter{}".format(self._key()) class StructQueryParameter(_AbstractQueryParameter): @@ -331,16 +312,17 @@ class StructQueryParameter(_AbstractQueryParameter): :class:`~google.cloud.bigquery.query.StructQueryParameter` :param sub_params: the sub-parameters for the struct """ + def __init__(self, name, *sub_params): self.name = name types = self.struct_types = OrderedDict() values = self.struct_values = {} for sub in sub_params: if isinstance(sub, self.__class__): - types[sub.name] = 'STRUCT' + types[sub.name] = "STRUCT" values[sub.name] = sub elif isinstance(sub, ArrayQueryParameter): - types[sub.name] = 'ARRAY' + types[sub.name] = "ARRAY" values[sub.name] = sub else: types[sub.name] = sub.type_ @@ -372,33 +354,33 @@ def from_api_repr(cls, resource): :rtype: :class:`~google.cloud.bigquery.query.StructQueryParameter` :returns: instance """ - name = resource.get('name') + name = resource.get("name") instance = cls(name) type_resources = {} types = instance.struct_types - for item in resource['parameterType']['structTypes']: - types[item['name']] = item['type']['type'] - type_resources[item['name']] = item['type'] - struct_values = resource['parameterValue']['structValues'] + for item in resource["parameterType"]["structTypes"]: + types[item["name"]] = item["type"]["type"] + type_resources[item["name"]] = item["type"] + struct_values = resource["parameterValue"]["structValues"] for key, value in struct_values.items(): type_ = types[key] converted = None - if type_ == 'STRUCT': + if type_ == "STRUCT": struct_resource = { - 'name': key, - 'parameterType': type_resources[key], - 'parameterValue': value, + "name": key, + "parameterType": type_resources[key], + "parameterValue": value, } converted = StructQueryParameter.from_api_repr(struct_resource) - elif type_ == 'ARRAY': + elif type_ == "ARRAY": struct_resource = { - 'name': key, - 'parameterType': type_resources[key], - 'parameterValue': value, + "name": key, + "parameterType": type_resources[key], + "parameterValue": value, } converted = ArrayQueryParameter.from_api_repr(struct_resource) else: - value = value['value'] + value = value["value"] converted = _QUERY_PARAMS_FROM_JSON[type_](value, None) instance.struct_values[key] = converted return instance @@ -413,28 +395,26 @@ def to_api_repr(self): values = {} for name, value in self.struct_values.items(): type_ = self.struct_types[name] - if type_ in ('STRUCT', 'ARRAY'): + if type_ in ("STRUCT", "ARRAY"): repr_ = value.to_api_repr() - s_types[name] = {'name': name, 'type': repr_['parameterType']} - values[name] = repr_['parameterValue'] + s_types[name] = {"name": name, "type": repr_["parameterType"]} + values[name] = repr_["parameterValue"] else: - s_types[name] = {'name': name, 'type': {'type': type_}} + s_types[name] = {"name": name, "type": {"type": type_}} converter = _SCALAR_VALUE_TO_JSON_PARAM.get(type_) if converter is not None: value = converter(value) - values[name] = {'value': value} + values[name] = {"value": value} resource = { - 'parameterType': { - 'type': 'STRUCT', - 'structTypes': [s_types[key] for key in self.struct_types], - }, - 'parameterValue': { - 'structValues': values, + "parameterType": { + "type": "STRUCT", + "structTypes": [s_types[key] for key in self.struct_types], }, + "parameterValue": {"structValues": values}, } if self.name is not None: - resource['name'] = self.name + resource["name"] = self.name return resource def _key(self): @@ -446,11 +426,7 @@ def _key(self): tuple: The contents of this :class:`~google.cloud.biquery.ArrayQueryParameter`. """ - return ( - self.name, - self.struct_types, - self.struct_values, - ) + return (self.name, self.struct_types, self.struct_values) def __eq__(self, other): if not isinstance(other, StructQueryParameter): @@ -461,7 +437,7 @@ def __ne__(self, other): return not self == other def __repr__(self): - return 'StructQueryParameter{}'.format(self._key()) + return "StructQueryParameter{}".format(self._key()) class _QueryResults(object): @@ -486,7 +462,7 @@ def project(self): :rtype: str :returns: the project that the query job is associated with. """ - return self._properties.get('jobReference', {}).get('projectId') + return self._properties.get("jobReference", {}).get("projectId") @property def cache_hit(self): @@ -499,7 +475,7 @@ def cache_hit(self): :returns: True if the query results were served from cache (None until set by the server). """ - return self._properties.get('cacheHit') + return self._properties.get("cacheHit") @property def complete(self): @@ -512,7 +488,7 @@ def complete(self): :returns: True if the query completed on the server (None until set by the server). """ - return self._properties.get('jobComplete') + return self._properties.get("jobComplete") @property def errors(self): @@ -525,7 +501,7 @@ def errors(self): :returns: Mappings describing errors generated on the server (None until set by the server). """ - return self._properties.get('errors') + return self._properties.get("errors") @property def job_id(self): @@ -537,7 +513,7 @@ def job_id(self): :rtype: string :returns: Job ID of the query job. """ - return self._properties.get('jobReference', {}).get('jobId') + return self._properties.get("jobReference", {}).get("jobId") @property def page_token(self): @@ -549,7 +525,7 @@ def page_token(self): :rtype: str, or ``NoneType`` :returns: Token generated on the server (None until set by the server). """ - return self._properties.get('pageToken') + return self._properties.get("pageToken") @property def total_rows(self): @@ -561,7 +537,7 @@ def total_rows(self): :rtype: int, or ``NoneType`` :returns: Count generated on the server (None until set by the server). """ - total_rows = self._properties.get('totalRows') + total_rows = self._properties.get("totalRows") if total_rows is not None: return int(total_rows) @@ -575,7 +551,7 @@ def total_bytes_processed(self): :rtype: int, or ``NoneType`` :returns: Count generated on the server (None until set by the server). """ - total_bytes_processed = self._properties.get('totalBytesProcessed') + total_bytes_processed = self._properties.get("totalBytesProcessed") if total_bytes_processed is not None: return int(total_bytes_processed) @@ -589,7 +565,7 @@ def num_dml_affected_rows(self): :rtype: int, or ``NoneType`` :returns: Count generated on the server (None until set by the server). """ - num_dml_affected_rows = self._properties.get('numDmlAffectedRows') + num_dml_affected_rows = self._properties.get("numDmlAffectedRows") if num_dml_affected_rows is not None: return int(num_dml_affected_rows) @@ -603,7 +579,7 @@ def rows(self): :rtype: list of :class:`~google.cloud.bigquery.table.Row` :returns: fields describing the schema (None until set by the server). """ - return _rows_from_json(self._properties.get('rows', ()), self.schema) + return _rows_from_json(self._properties.get("rows", ()), self.schema) @property def schema(self): @@ -615,7 +591,7 @@ def schema(self): :rtype: list of :class:`SchemaField`, or ``NoneType`` :returns: fields describing the schema (None until set by the server). """ - return _parse_schema_resource(self._properties.get('schema', {})) + return _parse_schema_resource(self._properties.get("schema", {})) def _set_properties(self, api_response): """Update properties from resource in body of ``api_response`` @@ -624,11 +600,12 @@ def _set_properties(self, api_response): :param api_response: response returned from an API call """ job_id_present = ( - 'jobReference' in api_response - and 'jobId' in api_response['jobReference'] - and 'projectId' in api_response['jobReference']) + "jobReference" in api_response + and "jobId" in api_response["jobReference"] + and "projectId" in api_response["jobReference"] + ) if not job_id_present: - raise ValueError('QueryResult requires a job reference') + raise ValueError("QueryResult requires a job reference") self._properties.clear() self._properties.update(copy.deepcopy(api_response)) @@ -636,10 +613,10 @@ def _set_properties(self, api_response): def _query_param_from_api_repr(resource): """Helper: construct concrete query parameter from JSON resource.""" - qp_type = resource['parameterType'] - if 'arrayType' in qp_type: + qp_type = resource["parameterType"] + if "arrayType" in qp_type: klass = ArrayQueryParameter - elif 'structTypes' in qp_type: + elif "structTypes" in qp_type: klass = StructQueryParameter else: klass = ScalarQueryParameter diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/retry.py b/packages/google-cloud-bigquery/google/cloud/bigquery/retry.py index 40e54113d309..4bc4b757f45d 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/retry.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/retry.py @@ -1,4 +1,3 @@ - # Copyright 2018 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -17,12 +16,9 @@ from google.api_core import retry -_RETRYABLE_REASONS = frozenset([ - 'rateLimitExceeded', - 'backendError', - 'internalError', - 'badGateway', -]) +_RETRYABLE_REASONS = frozenset( + ["rateLimitExceeded", "backendError", "internalError", "badGateway"] +) _UNSTRUCTURED_RETRYABLE_TYPES = ( exceptions.TooManyRequests, @@ -37,14 +33,14 @@ def _should_retry(exc): We retry if and only if the 'reason' is 'backendError' or 'rateLimitExceeded'. """ - if not hasattr(exc, 'errors'): + if not hasattr(exc, "errors"): return False if len(exc.errors) == 0: # Check for unstructured error returns, e.g. from GFE return isinstance(exc, _UNSTRUCTURED_RETRYABLE_TYPES) - reason = exc.errors[0]['reason'] + reason = exc.errors[0]["reason"] return reason in _RETRYABLE_REASONS diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/schema.py b/packages/google-cloud-bigquery/google/cloud/bigquery/schema.py index 759d7c3cbe65..99fc65fc0c46 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/schema.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/schema.py @@ -32,8 +32,8 @@ class SchemaField(object): fields (Tuple[:class:`~google.cloud.bigquery.schema.SchemaField`]): subfields (requires ``field_type`` of 'RECORD'). """ - def __init__(self, name, field_type, mode='NULLABLE', - description=None, fields=()): + + def __init__(self, name, field_type, mode="NULLABLE", description=None, fields=()): self._name = name self._field_type = field_type self._mode = mode @@ -54,15 +54,15 @@ def from_api_repr(cls, api_repr): The ``SchemaField`` object. """ # Handle optional properties with default values - mode = api_repr.get('mode', 'NULLABLE') - description = api_repr.get('description') - fields = api_repr.get('fields', ()) + mode = api_repr.get("mode", "NULLABLE") + description = api_repr.get("description") + fields = api_repr.get("fields", ()) return cls( - field_type=api_repr['type'].upper(), + field_type=api_repr["type"].upper(), fields=[cls.from_api_repr(f) for f in fields], mode=mode.upper(), description=description, - name=api_repr['name'], + name=api_repr["name"], ) @property @@ -91,7 +91,7 @@ def mode(self): @property def is_nullable(self): """bool: whether 'mode' is 'nullable'.""" - return self._mode == 'NULLABLE' + return self._mode == "NULLABLE" @property def description(self): @@ -115,16 +115,16 @@ def to_api_repr(self): """ # Put together the basic representation. See http://bit.ly/2hOAT5u. answer = { - 'mode': self.mode.upper(), - 'name': self.name, - 'type': self.field_type.upper(), - 'description': self.description, + "mode": self.mode.upper(), + "name": self.name, + "type": self.field_type.upper(), + "description": self.description, } # If this is a RECORD type, then sub-fields are also included, # add this to the serialized representation. - if self.field_type.upper() == 'RECORD': - answer['fields'] = [f.to_api_repr() for f in self.fields] + if self.field_type.upper() == "RECORD": + answer["fields"] = [f.to_api_repr() for f in self.fields] # Done; return the serialized dictionary. return answer @@ -158,7 +158,7 @@ def __hash__(self): return hash(self._key()) def __repr__(self): - return 'SchemaField{}'.format(self._key()) + return "SchemaField{}".format(self._key()) def _parse_schema_resource(info): @@ -171,18 +171,17 @@ def _parse_schema_resource(info): (Union[Sequence[:class:`google.cloud.bigquery.schema.SchemaField`],None]) a list of parsed fields, or ``None`` if no "fields" key found. """ - if 'fields' not in info: + if "fields" not in info: return () schema = [] - for r_field in info['fields']: - name = r_field['name'] - field_type = r_field['type'] - mode = r_field.get('mode', 'NULLABLE') - description = r_field.get('description') + for r_field in info["fields"]: + name = r_field["name"] + field_type = r_field["type"] + mode = r_field.get("mode", "NULLABLE") + description = r_field.get("description") sub_fields = _parse_schema_resource(r_field) - schema.append( - SchemaField(name, field_type, mode, description, sub_fields)) + schema.append(SchemaField(name, field_type, mode, description, sub_fields)) return schema diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py index 28036fba74ac..c145d6a9dc1c 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py @@ -22,6 +22,7 @@ import warnings import six + try: import pandas except ImportError: # pragma: NO COVER @@ -38,8 +39,8 @@ _NO_PANDAS_ERROR = ( - 'The pandas library is not installed, please install ' - 'pandas to use the to_dataframe() function.' + "The pandas library is not installed, please install " + "pandas to use the to_dataframe() function." ) _TABLE_HAS_NO_SCHEMA = 'Table has no schema: call "client.get_table()"' _MARKER = object() @@ -69,13 +70,13 @@ def _view_use_legacy_sql_getter(table): Raises: ValueError: For invalid value types. """ - view = table._properties.get('view') + view = table._properties.get("view") if view is not None: # The server-side default for useLegacySql is True. - return view.get('useLegacySql', True) + return view.get("useLegacySql", True) # In some cases, such as in a table list no view object is present, but the # resource still represents a view. Use the type as a fallback. - if table.table_type == 'VIEW': + if table.table_type == "VIEW": # The server-side default for useLegacySql is True. return True @@ -90,7 +91,7 @@ class EncryptionConfiguration(object): def __init__(self, kms_key_name=None): self._properties = {} if kms_key_name is not None: - self._properties['kmsKeyName'] = kms_key_name + self._properties["kmsKeyName"] = kms_key_name @property def kms_key_name(self): @@ -99,11 +100,11 @@ def kms_key_name(self): Resource ID of Cloud KMS key or :data:`None` if using default encryption. """ - return self._properties.get('kmsKeyName') + return self._properties.get("kmsKeyName") @kms_key_name.setter def kms_key_name(self, value): - self._properties['kmsKeyName'] = value + self._properties["kmsKeyName"] = value @classmethod def from_api_repr(cls, resource): @@ -144,7 +145,7 @@ def __hash__(self): return hash(self.kms_key_name) def __repr__(self): - return 'EncryptionConfiguration({})'.format(self.kms_key_name) + return "EncryptionConfiguration({})".format(self.kms_key_name) class TableReference(object): @@ -182,8 +183,11 @@ def table_id(self): @property def path(self): """str: URL path for the table's APIs.""" - return '/projects/%s/datasets/%s/tables/%s' % ( - self._project, self._dataset_id, self._table_id) + return "/projects/%s/datasets/%s/tables/%s" % ( + self._project, + self._dataset_id, + self._table_id, + ) @classmethod def from_string(cls, table_id, default_project=None): @@ -215,32 +219,33 @@ def from_string(cls, table_id, default_project=None): output_project_id = default_project output_dataset_id = None output_table_id = None - parts = table_id.split('.') + parts = table_id.split(".") if len(parts) < 2: raise ValueError( - 'table_id must be a fully-qualified table ID in ' + "table_id must be a fully-qualified table ID in " 'standard SQL format. e.g. "project.dataset.table", got ' - '{}'.format(table_id)) + "{}".format(table_id) + ) elif len(parts) == 2: if not default_project: raise ValueError( - 'When default_project is not set, table_id must be a ' - 'fully-qualified table ID in standard SQL format. ' - 'e.g. "project.dataset_id.table_id", got {}'.format( - table_id)) + "When default_project is not set, table_id must be a " + "fully-qualified table ID in standard SQL format. " + 'e.g. "project.dataset_id.table_id", got {}'.format(table_id) + ) output_dataset_id, output_table_id = parts elif len(parts) == 3: output_project_id, output_dataset_id, output_table_id = parts if len(parts) > 3: raise ValueError( - 'Too many parts in table_id. Must be a fully-qualified table ' + "Too many parts in table_id. Must be a fully-qualified table " 'ID in standard SQL format. e.g. "project.dataset.table", ' - 'got {}'.format(table_id)) + "got {}".format(table_id) + ) return cls( - DatasetReference(output_project_id, output_dataset_id), - output_table_id, + DatasetReference(output_project_id, output_dataset_id), output_table_id ) @classmethod @@ -257,9 +262,9 @@ def from_api_repr(cls, resource): """ from google.cloud.bigquery.dataset import DatasetReference - project = resource['projectId'] - dataset_id = resource['datasetId'] - table_id = resource['tableId'] + project = resource["projectId"] + dataset_id = resource["datasetId"] + table_id = resource["tableId"] return cls(DatasetReference(project, dataset_id), table_id) def to_api_repr(self): @@ -269,9 +274,9 @@ def to_api_repr(self): Dict[str, object]: Table reference represented as an API resource """ return { - 'projectId': self._project, - 'datasetId': self._dataset_id, - 'tableId': self._table_id, + "projectId": self._project, + "datasetId": self._dataset_id, + "tableId": self._table_id, } def _key(self): @@ -282,11 +287,7 @@ def _key(self): Returns: Tuple[str]: The contents of this :class:`DatasetReference`. """ - return ( - self._project, - self._dataset_id, - self._table_id, - ) + return (self._project, self._dataset_id, self._table_id) def __eq__(self, other): if not isinstance(other, TableReference): @@ -301,9 +302,9 @@ def __hash__(self): def __repr__(self): from google.cloud.bigquery.dataset import DatasetReference + dataset_ref = DatasetReference(self._project, self._dataset_id) - return "TableReference({}, '{}')".format( - repr(dataset_ref), self._table_id) + return "TableReference({}, '{}')".format(repr(dataset_ref), self._table_id) class Table(object): @@ -320,22 +321,19 @@ class Table(object): """ _PROPERTY_TO_API_FIELD = { - 'friendly_name': 'friendlyName', - 'expires': 'expirationTime', - 'time_partitioning': 'timePartitioning', - 'partitioning_type': 'timePartitioning', - 'partition_expiration': 'timePartitioning', - 'view_use_legacy_sql': 'view', - 'view_query': 'view', - 'external_data_configuration': 'externalDataConfiguration', - 'encryption_configuration': 'encryptionConfiguration', + "friendly_name": "friendlyName", + "expires": "expirationTime", + "time_partitioning": "timePartitioning", + "partitioning_type": "timePartitioning", + "partition_expiration": "timePartitioning", + "view_use_legacy_sql": "view", + "view_query": "view", + "external_data_configuration": "externalDataConfiguration", + "encryption_configuration": "encryptionConfiguration", } def __init__(self, table_ref, schema=None): - self._properties = { - 'tableReference': table_ref.to_api_repr(), - 'labels': {}, - } + self._properties = {"tableReference": table_ref.to_api_repr(), "labels": {}} # Let the @property do validation. if schema is not None: self.schema = schema @@ -343,25 +341,28 @@ def __init__(self, table_ref, schema=None): @property def project(self): """str: Project bound to the table.""" - return self._properties['tableReference']['projectId'] + return self._properties["tableReference"]["projectId"] @property def dataset_id(self): """str: ID of dataset containing the table.""" - return self._properties['tableReference']['datasetId'] + return self._properties["tableReference"]["datasetId"] @property def table_id(self): """str: ID of the table.""" - return self._properties['tableReference']['tableId'] + return self._properties["tableReference"]["tableId"] reference = property(_reference_getter) @property def path(self): """str: URL path for the table's APIs.""" - return '/projects/%s/datasets/%s/tables/%s' % ( - self.project, self.dataset_id, self.table_id) + return "/projects/%s/datasets/%s/tables/%s" % ( + self.project, + self.dataset_id, + self.table_id, + ) @property def schema(self): @@ -373,7 +374,7 @@ def schema(self): If any item in the sequence is not a :class:`~google.cloud.bigquery.schema.SchemaField` """ - prop = self._properties.get('schema') + prop = self._properties.get("schema") if not prop: return [] else: @@ -382,13 +383,11 @@ def schema(self): @schema.setter def schema(self, value): if value is None: - self._properties['schema'] = None + self._properties["schema"] = None elif not all(isinstance(field, SchemaField) for field in value): - raise ValueError('Schema items must be fields') + raise ValueError("Schema items must be fields") else: - self._properties['schema'] = { - 'fields': _build_schema_resource(value) - } + self._properties["schema"] = {"fields": _build_schema_resource(value)} @property def labels(self): @@ -401,13 +400,13 @@ def labels(self): Raises: ValueError: If ``value`` type is invalid. """ - return self._properties.setdefault('labels', {}) + return self._properties.setdefault("labels", {}) @labels.setter def labels(self, value): if not isinstance(value, dict): raise ValueError("Pass a dict") - self._properties['labels'] = value + self._properties["labels"] = value @property def encryption_configuration(self): @@ -421,7 +420,7 @@ def encryption_configuration(self): `_ in the BigQuery documentation. """ - prop = self._properties.get('encryptionConfiguration') + prop = self._properties.get("encryptionConfiguration") if prop is not None: prop = EncryptionConfiguration.from_api_repr(prop) return prop @@ -431,57 +430,59 @@ def encryption_configuration(self, value): api_repr = value if value is not None: api_repr = value.to_api_repr() - self._properties['encryptionConfiguration'] = api_repr + self._properties["encryptionConfiguration"] = api_repr @property def created(self): """Union[datetime.datetime, None]: Datetime at which the table was created (:data:`None` until set from the server). """ - creation_time = self._properties.get('creationTime') + creation_time = self._properties.get("creationTime") if creation_time is not None: # creation_time will be in milliseconds. return google.cloud._helpers._datetime_from_microseconds( - 1000.0 * float(creation_time)) + 1000.0 * float(creation_time) + ) @property def etag(self): """Union[str, None]: ETag for the table resource (:data:`None` until set from the server). """ - return self._properties.get('etag') + return self._properties.get("etag") @property def modified(self): """Union[datetime.datetime, None]: Datetime at which the table was last modified (:data:`None` until set from the server). """ - modified_time = self._properties.get('lastModifiedTime') + modified_time = self._properties.get("lastModifiedTime") if modified_time is not None: # modified_time will be in milliseconds. return google.cloud._helpers._datetime_from_microseconds( - 1000.0 * float(modified_time)) + 1000.0 * float(modified_time) + ) @property def num_bytes(self): """Union[int, None]: The size of the table in bytes (:data:`None` until set from the server). """ - return _helpers._int_or_none(self._properties.get('numBytes')) + return _helpers._int_or_none(self._properties.get("numBytes")) @property def num_rows(self): """Union[int, None]: The number of rows in the table (:data:`None` until set from the server). """ - return _helpers._int_or_none(self._properties.get('numRows')) + return _helpers._int_or_none(self._properties.get("numRows")) @property def self_link(self): """Union[str, None]: URL for the table resource (:data:`None` until set from the server). """ - return self._properties.get('selfLink') + return self._properties.get("selfLink") @property def full_table_id(self): @@ -490,7 +491,7 @@ def full_table_id(self): In the format ``project_id:dataset_id.table_id``. """ - return self._properties.get('id') + return self._properties.get("id") @property def table_type(self): @@ -499,7 +500,7 @@ def table_type(self): Possible values are ``'TABLE'``, ``'VIEW'``, or ``'EXTERNAL'``. """ - return self._properties.get('type') + return self._properties.get("type") @property def time_partitioning(self): @@ -510,7 +511,7 @@ def time_partitioning(self): ValueError: If the value is not :class:`TimePartitioning` or :data:`None`. """ - prop = self._properties.get('timePartitioning') + prop = self._properties.get("timePartitioning") if prop is not None: return TimePartitioning.from_api_repr(prop) @@ -521,9 +522,9 @@ def time_partitioning(self, value): api_repr = value.to_api_repr() elif value is not None: raise ValueError( - "value must be google.cloud.bigquery.table.TimePartitioning " - "or None") - self._properties['timePartitioning'] = api_repr + "value must be google.cloud.bigquery.table.TimePartitioning " "or None" + ) + self._properties["timePartitioning"] = api_repr @property def partitioning_type(self): @@ -536,7 +537,9 @@ def partitioning_type(self): warnings.warn( "This method will be deprecated in future versions. Please use " "Table.time_partitioning.type_ instead.", - PendingDeprecationWarning, stacklevel=2) + PendingDeprecationWarning, + stacklevel=2, + ) if self.time_partitioning is not None: return self.time_partitioning.type_ @@ -545,10 +548,12 @@ def partitioning_type(self, value): warnings.warn( "This method will be deprecated in future versions. Please use " "Table.time_partitioning.type_ instead.", - PendingDeprecationWarning, stacklevel=2) + PendingDeprecationWarning, + stacklevel=2, + ) if self.time_partitioning is None: - self._properties['timePartitioning'] = {} - self._properties['timePartitioning']['type'] = value + self._properties["timePartitioning"] = {} + self._properties["timePartitioning"]["type"] = value @property def partition_expiration(self): @@ -561,7 +566,9 @@ def partition_expiration(self): warnings.warn( "This method will be deprecated in future versions. Please use " "Table.time_partitioning.expiration_ms instead.", - PendingDeprecationWarning, stacklevel=2) + PendingDeprecationWarning, + stacklevel=2, + ) if self.time_partitioning is not None: return self.time_partitioning.expiration_ms @@ -570,11 +577,12 @@ def partition_expiration(self, value): warnings.warn( "This method will be deprecated in future versions. Please use " "Table.time_partitioning.expiration_ms instead.", - PendingDeprecationWarning, stacklevel=2) + PendingDeprecationWarning, + stacklevel=2, + ) if self.time_partitioning is None: - self._properties['timePartitioning'] = { - 'type': TimePartitioningType.DAY} - self._properties['timePartitioning']['expirationMs'] = str(value) + self._properties["timePartitioning"] = {"type": TimePartitioningType.DAY} + self._properties["timePartitioning"]["expirationMs"] = str(value) @property def clustering_fields(self): @@ -589,9 +597,9 @@ def clustering_fields(self): As of 2018-06-29, clustering fields cannot be set on a table which does not also have time partioning defined. """ - prop = self._properties.get('clustering') + prop = self._properties.get("clustering") if prop is not None: - return list(prop.get('fields', ())) + return list(prop.get("fields", ())) @clustering_fields.setter def clustering_fields(self, value): @@ -600,11 +608,11 @@ def clustering_fields(self, value): (Defaults to :data:`None`). """ if value is not None: - prop = self._properties.setdefault('clustering', {}) - prop['fields'] = value + prop = self._properties.setdefault("clustering", {}) + prop["fields"] = value else: - if 'clustering' in self._properties: - del self._properties['clustering'] + if "clustering" in self._properties: + del self._properties["clustering"] @property def description(self): @@ -614,13 +622,13 @@ def description(self): Raises: ValueError: For invalid value types. """ - return self._properties.get('description') + return self._properties.get("description") @description.setter def description(self, value): if not isinstance(value, six.string_types) and value is not None: raise ValueError("Pass a string, or None") - self._properties['description'] = value + self._properties["description"] = value @property def expires(self): @@ -630,18 +638,19 @@ def expires(self): Raises: ValueError: For invalid value types. """ - expiration_time = self._properties.get('expirationTime') + expiration_time = self._properties.get("expirationTime") if expiration_time is not None: # expiration_time will be in milliseconds. return google.cloud._helpers._datetime_from_microseconds( - 1000.0 * float(expiration_time)) + 1000.0 * float(expiration_time) + ) @expires.setter def expires(self, value): if not isinstance(value, datetime.datetime) and value is not None: raise ValueError("Pass a datetime, or None") value_ms = google.cloud._helpers._millis_from_datetime(value) - self._properties['expirationTime'] = _helpers._str_or_none(value_ms) + self._properties["expirationTime"] = _helpers._str_or_none(value_ms) @property def friendly_name(self): @@ -650,13 +659,13 @@ def friendly_name(self): Raises: ValueError: For invalid value types. """ - return self._properties.get('friendlyName') + return self._properties.get("friendlyName") @friendly_name.setter def friendly_name(self, value): if not isinstance(value, six.string_types) and value is not None: raise ValueError("Pass a string, or None") - self._properties['friendlyName'] = value + self._properties["friendlyName"] = value @property def location(self): @@ -664,7 +673,7 @@ def location(self): Defaults to :data:`None`. """ - return self._properties.get('location') + return self._properties.get("location") @property def view_query(self): @@ -677,27 +686,27 @@ def view_query(self): Raises: ValueError: For invalid value types. """ - view = self._properties.get('view') + view = self._properties.get("view") if view is not None: - return view.get('query') + return view.get("query") @view_query.setter def view_query(self, value): if not isinstance(value, six.string_types): raise ValueError("Pass a string") - view = self._properties.get('view') + view = self._properties.get("view") if view is None: - view = self._properties['view'] = {} - view['query'] = value + view = self._properties["view"] = {} + view["query"] = value # The service defaults useLegacySql to True, but this # client uses Standard SQL by default. - if view.get('useLegacySql') is None: - view['useLegacySql'] = False + if view.get("useLegacySql") is None: + view["useLegacySql"] = False @view_query.deleter def view_query(self): """Delete SQL query defining the table as a view.""" - self._properties.pop('view', None) + self._properties.pop("view", None) view_use_legacy_sql = property(_view_use_legacy_sql_getter) @@ -705,16 +714,16 @@ def view_query(self): def view_use_legacy_sql(self, value): if not isinstance(value, bool): raise ValueError("Pass a boolean") - if self._properties.get('view') is None: - self._properties['view'] = {} - self._properties['view']['useLegacySql'] = value + if self._properties.get("view") is None: + self._properties["view"] = {} + self._properties["view"]["useLegacySql"] = value @property def streaming_buffer(self): """google.cloud.bigquery.StreamingBuffer: Information about a table's streaming buffer. """ - sb = self._properties.get('streamingBuffer') + sb = self._properties.get("streamingBuffer") if sb is not None: return StreamingBuffer(sb) @@ -726,7 +735,7 @@ def external_data_configuration(self): Raises: ValueError: For invalid value types. """ - prop = self._properties.get('externalDataConfiguration') + prop = self._properties.get("externalDataConfiguration") if prop is not None: prop = ExternalConfig.from_api_repr(prop) return prop @@ -738,7 +747,7 @@ def external_data_configuration(self, value): api_repr = value if value is not None: api_repr = value.to_api_repr() - self._properties['externalDataConfiguration'] = api_repr + self._properties["externalDataConfiguration"] = api_repr @classmethod def from_string(cls, full_table_id): @@ -785,12 +794,17 @@ def from_api_repr(cls, resource): """ from google.cloud.bigquery import dataset - if ('tableReference' not in resource or 'tableId' not in resource['tableReference']): - raise KeyError('Resource lacks required identity information:' - '["tableReference"]["tableId"]') - project_id = resource['tableReference']['projectId'] - table_id = resource['tableReference']['tableId'] - dataset_id = resource['tableReference']['datasetId'] + if ( + "tableReference" not in resource + or "tableId" not in resource["tableReference"] + ): + raise KeyError( + "Resource lacks required identity information:" + '["tableReference"]["tableId"]' + ) + project_id = resource["tableReference"]["projectId"] + table_id = resource["tableReference"]["tableId"] + dataset_id = resource["tableReference"]["datasetId"] dataset_ref = dataset.DatasetReference(project_id, dataset_id) table = cls(dataset_ref.table(table_id)) @@ -812,7 +826,7 @@ def _build_resource(self, filter_fields): for filter_field in filter_fields: api_field = self._PROPERTY_TO_API_FIELD.get(filter_field) if api_field is None and filter_field not in self._properties: - raise ValueError('No Table property %s' % filter_field) + raise ValueError("No Table property %s" % filter_field) elif api_field is not None: partial[api_field] = self._properties.get(api_field) else: @@ -823,7 +837,7 @@ def _build_resource(self, filter_fields): return partial def __repr__(self): - return 'Table({})'.format(repr(self.reference)) + return "Table({})".format(repr(self.reference)) class TableListItem(object): @@ -851,34 +865,35 @@ class TableListItem(object): """ def __init__(self, resource): - if 'tableReference' not in resource: - raise ValueError('resource must contain a tableReference value') - if 'projectId' not in resource['tableReference']: + if "tableReference" not in resource: + raise ValueError("resource must contain a tableReference value") + if "projectId" not in resource["tableReference"]: raise ValueError( - "resource['tableReference'] must contain a projectId value") - if 'datasetId' not in resource['tableReference']: + "resource['tableReference'] must contain a projectId value" + ) + if "datasetId" not in resource["tableReference"]: raise ValueError( - "resource['tableReference'] must contain a datasetId value") - if 'tableId' not in resource['tableReference']: - raise ValueError( - "resource['tableReference'] must contain a tableId value") + "resource['tableReference'] must contain a datasetId value" + ) + if "tableId" not in resource["tableReference"]: + raise ValueError("resource['tableReference'] must contain a tableId value") self._properties = resource @property def project(self): """str: Project bound to the table.""" - return self._properties['tableReference']['projectId'] + return self._properties["tableReference"]["projectId"] @property def dataset_id(self): """str: ID of dataset containing the table.""" - return self._properties['tableReference']['datasetId'] + return self._properties["tableReference"]["datasetId"] @property def table_id(self): """str: ID of the table.""" - return self._properties['tableReference']['tableId'] + return self._properties["tableReference"]["tableId"] reference = property(_reference_getter) @@ -890,7 +905,7 @@ def labels(self): modify the dict, then call ``Client.update_table``. To delete a label, set its value to :data:`None` before updating. """ - return self._properties.setdefault('labels', {}) + return self._properties.setdefault("labels", {}) @property def full_table_id(self): @@ -899,7 +914,7 @@ def full_table_id(self): In the format ``project_id:dataset_id.table_id``. """ - return self._properties.get('id') + return self._properties.get("id") @property def table_type(self): @@ -908,14 +923,14 @@ def table_type(self): Possible values are ``'TABLE'``, ``'VIEW'``, or ``'EXTERNAL'``. """ - return self._properties.get('type') + return self._properties.get("type") @property def time_partitioning(self): """google.cloud.bigquery.table.TimePartitioning: Configures time-based partitioning for a table. """ - prop = self._properties.get('timePartitioning') + prop = self._properties.get("timePartitioning") if prop is not None: return TimePartitioning.from_api_repr(prop) @@ -927,7 +942,9 @@ def partitioning_type(self): warnings.warn( "This method will be deprecated in future versions. Please use " "TableListItem.time_partitioning.type_ instead.", - PendingDeprecationWarning, stacklevel=2) + PendingDeprecationWarning, + stacklevel=2, + ) if self.time_partitioning is not None: return self.time_partitioning.type_ @@ -941,14 +958,16 @@ def partition_expiration(self): warnings.warn( "This method will be deprecated in future versions. Please use " "TableListItem.time_partitioning.expiration_ms instead.", - PendingDeprecationWarning, stacklevel=2) + PendingDeprecationWarning, + stacklevel=2, + ) if self.time_partitioning is not None: return self.time_partitioning.expiration_ms @property def friendly_name(self): """Union[str, None]: Title of the table (defaults to :data:`None`).""" - return self._properties.get('friendlyName') + return self._properties.get("friendlyName") view_use_legacy_sql = property(_view_use_legacy_sql_getter) @@ -976,15 +995,14 @@ def _row_from_mapping(mapping, schema): row = [] for field in schema: - if field.mode == 'REQUIRED': + if field.mode == "REQUIRED": row.append(mapping[field.name]) - elif field.mode == 'REPEATED': + elif field.mode == "REPEATED": row.append(mapping.get(field.name, ())) - elif field.mode == 'NULLABLE': + elif field.mode == "NULLABLE": row.append(mapping.get(field.name)) else: - raise ValueError( - "Unknown field mode: {}".format(field.mode)) + raise ValueError("Unknown field mode: {}".format(field.mode)) return tuple(row) @@ -999,12 +1017,12 @@ class StreamingBuffer(object): """ def __init__(self, resource): - self.estimated_bytes = int(resource['estimatedBytes']) - self.estimated_rows = int(resource['estimatedRows']) + self.estimated_bytes = int(resource["estimatedBytes"]) + self.estimated_rows = int(resource["estimatedRows"]) # time is in milliseconds since the epoch. - self.oldest_entry_time = ( - google.cloud._helpers._datetime_from_microseconds( - 1000.0 * int(resource['oldestEntryTime']))) + self.oldest_entry_time = google.cloud._helpers._datetime_from_microseconds( + 1000.0 * int(resource["oldestEntryTime"]) + ) class Row(object): @@ -1020,7 +1038,7 @@ class Row(object): """ # Choose unusual field names to try to avoid conflict with schema fields. - __slots__ = ('_xxx_values', '_xxx_field_to_index') + __slots__ = ("_xxx_values", "_xxx_field_to_index") def __init__(self, values, field_to_index): self._xxx_values = values @@ -1102,7 +1120,7 @@ def get(self, key, default=None): def __getattr__(self, name): value = self._xxx_field_to_index.get(name) if value is None: - raise AttributeError('no row field {!r}'.format(name)) + raise AttributeError("no row field {!r}".format(name)) return self._xxx_values[value] def __len__(self): @@ -1112,26 +1130,26 @@ def __getitem__(self, key): if isinstance(key, six.string_types): value = self._xxx_field_to_index.get(key) if value is None: - raise KeyError('no row field {!r}'.format(key)) + raise KeyError("no row field {!r}".format(key)) key = value return self._xxx_values[key] def __eq__(self, other): if not isinstance(other, Row): return NotImplemented - return( + return ( self._xxx_values == other._xxx_values - and self._xxx_field_to_index == other._xxx_field_to_index) + and self._xxx_field_to_index == other._xxx_field_to_index + ) def __ne__(self, other): return not self == other def __repr__(self): # sort field dict by value, for determinism - items = sorted(self._xxx_field_to_index.items(), - key=operator.itemgetter(1)) - f2i = '{' + ', '.join('%r: %d' % item for item in items) + '}' - return 'Row({}, {})'.format(self._xxx_values, f2i) + items = sorted(self._xxx_field_to_index.items(), key=operator.itemgetter(1)) + f2i = "{" + ", ".join("%r: %d" % item for item in items) + "}" + return "Row({}, {})".format(self._xxx_values, f2i) class RowIterator(HTTPIterator): @@ -1150,13 +1168,29 @@ class RowIterator(HTTPIterator): Extra query string parameters for the API call. """ - def __init__(self, client, api_request, path, schema, page_token=None, - max_results=None, page_size=None, extra_params=None): + def __init__( + self, + client, + api_request, + path, + schema, + page_token=None, + max_results=None, + page_size=None, + extra_params=None, + ): super(RowIterator, self).__init__( - client, api_request, path, item_to_value=_item_to_row, - items_key='rows', page_token=page_token, max_results=max_results, - extra_params=extra_params, page_start=_rows_page_start, - next_token='pageToken') + client, + api_request, + path, + item_to_value=_item_to_row, + items_key="rows", + page_token=page_token, + max_results=max_results, + extra_params=extra_params, + page_start=_rows_page_start, + next_token="pageToken", + ) self._schema = schema self._field_to_index = _helpers._field_to_index_mapping(schema) self._total_rows = None @@ -1171,11 +1205,10 @@ def _get_next_page_response(self): """ params = self._get_query_params() if self._page_size is not None: - params['maxResults'] = self._page_size + params["maxResults"] = self._page_size return self.api_request( - method=self._HTTP_METHOD, - path=self.path, - query_params=params) + method=self._HTTP_METHOD, path=self.path, query_params=params + ) @property def schema(self): @@ -1217,6 +1250,7 @@ class _EmptyRowIterator(object): are impossible to fetch, such as with query results for DDL CREATE VIEW statements. """ + schema = () pages = () total_rows = 0 @@ -1233,7 +1267,7 @@ def __iter__(self): class TimePartitioningType(object): """Specifies the type of time partitioning to perform.""" - DAY = 'DAY' + DAY = "DAY" """str: Generates one partition per day.""" @@ -1258,8 +1292,10 @@ class TimePartitioning(object): partition filter that can be used for partition elimination to be specified. """ - def __init__(self, type_=None, field=None, expiration_ms=None, - require_partition_filter=None): + + def __init__( + self, type_=None, field=None, expiration_ms=None, require_partition_filter=None + ): self._properties = {} if type_ is None: self.type_ = TimePartitioningType.DAY @@ -1277,39 +1313,39 @@ def type_(self): """google.cloud.bigquery.table.TimePartitioningType: The type of time partitioning to use. """ - return self._properties['type'] + return self._properties["type"] @type_.setter def type_(self, value): - self._properties['type'] = value + self._properties["type"] = value @property def field(self): """str: Field in the table to use for partitioning""" - return self._properties.get('field') + return self._properties.get("field") @field.setter def field(self, value): - self._properties['field'] = value + self._properties["field"] = value @property def expiration_ms(self): """int: Number of milliseconds to keep the storage for a partition.""" - return _helpers._int_or_none(self._properties.get('expirationMs')) + return _helpers._int_or_none(self._properties.get("expirationMs")) @expiration_ms.setter def expiration_ms(self, value): - self._properties['expirationMs'] = str(value) + self._properties["expirationMs"] = str(value) @property def require_partition_filter(self): """bool: Specifies whether partition filters are required for queries """ - return self._properties.get('requirePartitionFilter') + return self._properties.get("requirePartitionFilter") @require_partition_filter.setter def require_partition_filter(self, value): - self._properties['requirePartitionFilter'] = value + self._properties["requirePartitionFilter"] = value @classmethod def from_api_repr(cls, api_repr): @@ -1336,7 +1372,7 @@ def from_api_repr(cls, api_repr): google.cloud.bigquery.table.TimePartitioning: The ``TimePartitioning`` object. """ - instance = cls(api_repr['type']) + instance = cls(api_repr["type"]) instance._properties = api_repr return instance @@ -1370,8 +1406,8 @@ def __hash__(self): return hash(self._key()) def __repr__(self): - key_vals = ['{}={}'.format(key, val) for key, val in self._key()] - return 'TimePartitioning({})'.format(','.join(key_vals)) + key_vals = ["{}={}".format(key, val) for key, val in self._key()] + return "TimePartitioning({})".format(",".join(key_vals)) def _item_to_row(iterator, resource): @@ -1392,8 +1428,10 @@ def _item_to_row(iterator, resource): :rtype: :class:`~google.cloud.bigquery.table.Row` :returns: The next row in the page. """ - return Row(_helpers._row_tuple_from_json(resource, iterator.schema), - iterator._field_to_index) + return Row( + _helpers._row_tuple_from_json(resource, iterator.schema), + iterator._field_to_index, + ) # pylint: disable=unused-argument @@ -1409,8 +1447,10 @@ def _rows_page_start(iterator, page, response): :type response: dict :param response: The JSON API response for a page of rows in a table. """ - total_rows = response.get('totalRows') + total_rows = response.get("totalRows") if total_rows is not None: total_rows = int(total_rows) iterator._total_rows = total_rows + + # pylint: enable=unused-argument diff --git a/packages/google-cloud-bigquery/noxfile.py b/packages/google-cloud-bigquery/noxfile.py index 21b8487039cc..0927d2d430f1 100644 --- a/packages/google-cloud-bigquery/noxfile.py +++ b/packages/google-cloud-bigquery/noxfile.py @@ -127,6 +127,18 @@ def snippets(session): 'py.test', os.path.join('docs', 'snippets.py'), *session.posargs) +@nox.session(python='3.6') +def cover(session): + """Run the final coverage report. + + This outputs the coverage report aggregating coverage from the unit + test runs (not system test runs), and then erases coverage data. + """ + session.install('coverage', 'pytest-cov') + session.run('coverage', 'report', '--show-missing', '--fail-under=100') + session.run('coverage', 'erase') + + @nox.session(python='3.6') def lint(session): """Run linters. @@ -152,13 +164,16 @@ def lint_setup_py(session): 'python', 'setup.py', 'check', '--restructuredtext', '--strict') -@nox.session(python='3.6') -def cover(session): - """Run the final coverage report. - This outputs the coverage report aggregating coverage from the unit - test runs (not system test runs), and then erases coverage data. +@nox.session(python="3.6") +def blacken(session): + """Run black. + Format code to uniform standard. """ - session.install('coverage', 'pytest-cov') - session.run('coverage', 'report', '--show-missing', '--fail-under=100') - session.run('coverage', 'erase') + session.install("black") + session.run( + "black", + "google", + "tests", + "docs", + ) \ No newline at end of file diff --git a/packages/google-cloud-bigquery/tests/scrub_datasets.py b/packages/google-cloud-bigquery/tests/scrub_datasets.py index 2e8981aa62e3..9a8ab3e7b7c5 100644 --- a/packages/google-cloud-bigquery/tests/scrub_datasets.py +++ b/packages/google-cloud-bigquery/tests/scrub_datasets.py @@ -8,8 +8,7 @@ def main(prefixes): client = Client() - pattern = re.compile( - '|'.join('^{}.*$'.format(prefix) for prefix in prefixes)) + pattern = re.compile("|".join("^{}.*$".format(prefix) for prefix in prefixes)) ds_items = list(client.list_datasets()) for dataset in ds_items: @@ -22,5 +21,5 @@ def main(prefixes): print(" NOT FOUND") -if __name__ == '__main__': +if __name__ == "__main__": main(sys.argv[1:]) diff --git a/packages/google-cloud-bigquery/tests/system.py b/packages/google-cloud-bigquery/tests/system.py index 29d5fb908dea..fe5e3ce3dea0 100644 --- a/packages/google-cloud-bigquery/tests/system.py +++ b/packages/google-cloud-bigquery/tests/system.py @@ -27,6 +27,7 @@ import six import pytest + try: import pandas except ImportError: # pragma: NO COVER @@ -64,28 +65,32 @@ # Common table data used for many tests. ROWS = [ - ('Phred Phlyntstone', 32), - ('Bharney Rhubble', 33), - ('Wylma Phlyntstone', 29), - ('Bhettye Rhubble', 27), + ("Phred Phlyntstone", 32), + ("Bharney Rhubble", 33), + ("Wylma Phlyntstone", 29), + ("Bhettye Rhubble", 27), ] -HEADER_ROW = ('Full Name', 'Age') +HEADER_ROW = ("Full Name", "Age") SCHEMA = [ - bigquery.SchemaField('full_name', 'STRING', mode='REQUIRED'), - bigquery.SchemaField('age', 'INTEGER', mode='REQUIRED'), + bigquery.SchemaField("full_name", "STRING", mode="REQUIRED"), + bigquery.SchemaField("age", "INTEGER", mode="REQUIRED"), ] TIME_PARTITIONING_CLUSTERING_FIELDS_SCHEMA = [ - bigquery.SchemaField('transaction_time', 'TIMESTAMP', mode='REQUIRED'), - bigquery.SchemaField('transaction_id', 'INTEGER', mode='REQUIRED'), - bigquery.SchemaField('user_email', 'STRING', mode='REQUIRED'), - bigquery.SchemaField('store_code', 'STRING', mode='REQUIRED'), + bigquery.SchemaField("transaction_time", "TIMESTAMP", mode="REQUIRED"), + bigquery.SchemaField("transaction_id", "INTEGER", mode="REQUIRED"), + bigquery.SchemaField("user_email", "STRING", mode="REQUIRED"), + bigquery.SchemaField("store_code", "STRING", mode="REQUIRED"), bigquery.SchemaField( - 'items', 'RECORD', mode='REPEATED', fields=[ - bigquery.SchemaField('item_code', 'STRING', mode='REQUIRED'), - bigquery.SchemaField('quantity', 'INTEGER', mode='REQUIRED'), - bigquery.SchemaField('comments', 'STRING', mode='NULLABLE'), - bigquery.SchemaField('expiration_date', 'DATE', mode='REQUIRED'), - ]), + "items", + "RECORD", + mode="REPEATED", + fields=[ + bigquery.SchemaField("item_code", "STRING", mode="REQUIRED"), + bigquery.SchemaField("quantity", "INTEGER", mode="REQUIRED"), + bigquery.SchemaField("comments", "STRING", mode="NULLABLE"), + bigquery.SchemaField("expiration_date", "DATE", mode="REQUIRED"), + ], + ), ] @@ -94,22 +99,21 @@ def _has_rows(result): def _make_dataset_id(prefix): - return '%s%s' % (prefix, unique_resource_id()) + return "%s%s" % (prefix, unique_resource_id()) -def _load_json_schema(filename='data/schema.json'): +def _load_json_schema(filename="data/schema.json"): from google.cloud.bigquery.table import _parse_schema_resource json_filename = os.path.join(WHERE, filename) - with open(json_filename, 'r') as schema_file: + with open(json_filename, "r") as schema_file: return _parse_schema_resource(json.load(schema_file)) def _rate_limit_exceeded(forbidden): """Predicate: pass only exceptions with 'rateLimitExceeded' as reason.""" - return any(error['reason'] == 'rateLimitExceeded' - for error in forbidden._errors) + return any(error["reason"] == "rateLimitExceeded" for error in forbidden._errors) # We need to wait to stay within the rate limits. @@ -125,6 +129,7 @@ class Config(object): This is a mutable stand-in to allow test set-up to modify global state. """ + CLIENT = None CURSOR = None @@ -135,15 +140,14 @@ def setUpModule(): class TestBigQuery(unittest.TestCase): - def setUp(self): self.to_delete = [] def tearDown(self): - def _still_in_use(bad_request): - return any(error['reason'] == 'resourceInUse' - for error in bad_request._errors) + return any( + error["reason"] == "resourceInUse" for error in bad_request._errors + ) retry_in_use = RetryErrors(BadRequest, error_predicate=_still_in_use) retry_409_429 = RetryErrors((Conflict, TooManyRequests)) @@ -151,8 +155,7 @@ def _still_in_use(bad_request): if isinstance(doomed, storage.Bucket): retry_409_429(doomed.delete)(force=True) elif isinstance(doomed, (Dataset, bigquery.DatasetReference)): - retry_in_use(Config.CLIENT.delete_dataset)( - doomed, delete_contents=True) + retry_in_use(Config.CLIENT.delete_dataset)(doomed, delete_contents=True) elif isinstance(doomed, (Table, bigquery.TableReference)): retry_in_use(Config.CLIENT.delete_table)(doomed) else: @@ -164,10 +167,10 @@ def test_get_service_account_email(self): got = client.get_service_account_email() self.assertIsInstance(got, six.text_type) - self.assertIn('@', got) + self.assertIn("@", got) def test_create_dataset(self): - DATASET_ID = _make_dataset_id('create_dataset') + DATASET_ID = _make_dataset_id("create_dataset") dataset = self.temp_dataset(DATASET_ID) self.assertTrue(_dataset_exists(dataset)) @@ -175,66 +178,66 @@ def test_create_dataset(self): self.assertEqual(dataset.project, Config.CLIENT.project) def test_get_dataset(self): - dataset_id = _make_dataset_id('get_dataset') + dataset_id = _make_dataset_id("get_dataset") client = Config.CLIENT dataset_arg = Dataset(client.dataset(dataset_id)) - dataset_arg.friendly_name = 'Friendly' - dataset_arg.description = 'Description' + dataset_arg.friendly_name = "Friendly" + dataset_arg.description = "Description" dataset = retry_403(client.create_dataset)(dataset_arg) self.to_delete.append(dataset) dataset_ref = client.dataset(dataset_id) # Get with a reference. got = client.get_dataset(dataset_ref) - self.assertEqual(got.friendly_name, 'Friendly') - self.assertEqual(got.description, 'Description') + self.assertEqual(got.friendly_name, "Friendly") + self.assertEqual(got.description, "Description") # Get with a string. got = client.get_dataset(dataset_id) - self.assertEqual(got.friendly_name, 'Friendly') - self.assertEqual(got.description, 'Description') + self.assertEqual(got.friendly_name, "Friendly") + self.assertEqual(got.description, "Description") # Get with a fully-qualified string. - got = client.get_dataset('{}.{}'.format(client.project, dataset_id)) - self.assertEqual(got.friendly_name, 'Friendly') - self.assertEqual(got.description, 'Description') + got = client.get_dataset("{}.{}".format(client.project, dataset_id)) + self.assertEqual(got.friendly_name, "Friendly") + self.assertEqual(got.description, "Description") def test_update_dataset(self): - dataset = self.temp_dataset(_make_dataset_id('update_dataset')) + dataset = self.temp_dataset(_make_dataset_id("update_dataset")) self.assertTrue(_dataset_exists(dataset)) self.assertIsNone(dataset.friendly_name) self.assertIsNone(dataset.description) self.assertEqual(dataset.labels, {}) - dataset.friendly_name = 'Friendly' - dataset.description = 'Description' - dataset.labels = {'priority': 'high', 'color': 'blue'} + dataset.friendly_name = "Friendly" + dataset.description = "Description" + dataset.labels = {"priority": "high", "color": "blue"} ds2 = Config.CLIENT.update_dataset( - dataset, - ('friendly_name', 'description', 'labels')) - self.assertEqual(ds2.friendly_name, 'Friendly') - self.assertEqual(ds2.description, 'Description') - self.assertEqual(ds2.labels, {'priority': 'high', 'color': 'blue'}) + dataset, ("friendly_name", "description", "labels") + ) + self.assertEqual(ds2.friendly_name, "Friendly") + self.assertEqual(ds2.description, "Description") + self.assertEqual(ds2.labels, {"priority": "high", "color": "blue"}) ds2.labels = { - 'color': 'green', # change - 'shape': 'circle', # add - 'priority': None, # delete + "color": "green", # change + "shape": "circle", # add + "priority": None, # delete } - ds3 = Config.CLIENT.update_dataset(ds2, ['labels']) - self.assertEqual(ds3.labels, {'color': 'green', 'shape': 'circle'}) + ds3 = Config.CLIENT.update_dataset(ds2, ["labels"]) + self.assertEqual(ds3.labels, {"color": "green", "shape": "circle"}) # If we try to update using d2 again, it will fail because the # previous update changed the ETag. - ds2.description = 'no good' + ds2.description = "no good" with self.assertRaises(PreconditionFailed): - Config.CLIENT.update_dataset(ds2, ['description']) + Config.CLIENT.update_dataset(ds2, ["description"]) def test_list_datasets(self): datasets_to_create = [ - 'new' + unique_resource_id(), - 'newer' + unique_resource_id(), - 'newest' + unique_resource_id(), + "new" + unique_resource_id(), + "newer" + unique_resource_id(), + "newest" + unique_resource_id(), ] for dataset_id in datasets_to_create: self.temp_dataset(dataset_id) @@ -243,20 +246,23 @@ def test_list_datasets(self): iterator = Config.CLIENT.list_datasets() all_datasets = list(iterator) self.assertIsNone(iterator.next_page_token) - created = [dataset for dataset in all_datasets - if dataset.dataset_id in datasets_to_create - and dataset.project == Config.CLIENT.project] + created = [ + dataset + for dataset in all_datasets + if dataset.dataset_id in datasets_to_create + and dataset.project == Config.CLIENT.project + ] self.assertEqual(len(created), len(datasets_to_create)) def test_list_datasets_w_project(self): # Retrieve datasets from a different project. - iterator = Config.CLIENT.list_datasets(project='bigquery-public-data') + iterator = Config.CLIENT.list_datasets(project="bigquery-public-data") all_datasets = frozenset([dataset.dataset_id for dataset in iterator]) - self.assertIn('usa_names', all_datasets) + self.assertIn("usa_names", all_datasets) def test_create_table(self): - dataset = self.temp_dataset(_make_dataset_id('create_table')) - table_id = 'test_table' + dataset = self.temp_dataset(_make_dataset_id("create_table")) + table_id = "test_table" table_arg = Table(dataset.table(table_id), schema=SCHEMA) self.assertFalse(_table_exists(table_arg)) @@ -270,17 +276,16 @@ def test_create_table_w_time_partitioning_w_clustering_fields(self): from google.cloud.bigquery.table import TimePartitioning from google.cloud.bigquery.table import TimePartitioningType - dataset = self.temp_dataset(_make_dataset_id('create_table_tp_cf')) - table_id = 'test_table' + dataset = self.temp_dataset(_make_dataset_id("create_table_tp_cf")) + table_id = "test_table" table_arg = Table( - dataset.table(table_id), - schema=TIME_PARTITIONING_CLUSTERING_FIELDS_SCHEMA) + dataset.table(table_id), schema=TIME_PARTITIONING_CLUSTERING_FIELDS_SCHEMA + ) self.assertFalse(_table_exists(table_arg)) - table_arg.time_partitioning = TimePartitioning( - field='transaction_time') + table_arg.time_partitioning = TimePartitioning(field="transaction_time") - table_arg.clustering_fields = ['user_email', 'store_code'] + table_arg.clustering_fields = ["user_email", "store_code"] table = retry_403(Config.CLIENT.create_table)(table_arg) self.to_delete.insert(0, table) @@ -288,11 +293,11 @@ def test_create_table_w_time_partitioning_w_clustering_fields(self): self.assertEqual(table.table_id, table_id) time_partitioning = table.time_partitioning self.assertEqual(time_partitioning.type_, TimePartitioningType.DAY) - self.assertEqual(time_partitioning.field, 'transaction_time') - self.assertEqual(table.clustering_fields, ['user_email', 'store_code']) + self.assertEqual(time_partitioning.field, "transaction_time") + self.assertEqual(table.clustering_fields, ["user_email", "store_code"]) def test_delete_dataset_with_string(self): - dataset_id = _make_dataset_id('delete_table_true') + dataset_id = _make_dataset_id("delete_table_true") dataset_ref = Config.CLIENT.dataset(dataset_id) retry_403(Config.CLIENT.create_dataset)(Dataset(dataset_ref)) self.assertTrue(_dataset_exists(dataset_ref)) @@ -300,11 +305,12 @@ def test_delete_dataset_with_string(self): self.assertFalse(_dataset_exists(dataset_ref)) def test_delete_dataset_delete_contents_true(self): - dataset_id = _make_dataset_id('delete_table_true') + dataset_id = _make_dataset_id("delete_table_true") dataset = retry_403(Config.CLIENT.create_dataset)( - Dataset(Config.CLIENT.dataset(dataset_id))) + Dataset(Config.CLIENT.dataset(dataset_id)) + ) - table_id = 'test_table' + table_id = "test_table" table_arg = Table(dataset.table(table_id), schema=SCHEMA) table = retry_403(Config.CLIENT.create_table)(table_arg) Config.CLIENT.delete_dataset(dataset, delete_contents=True) @@ -313,8 +319,9 @@ def test_delete_dataset_delete_contents_true(self): def test_delete_dataset_delete_contents_false(self): from google.api_core import exceptions - dataset = self.temp_dataset(_make_dataset_id('delete_table_false')) - table_id = 'test_table' + + dataset = self.temp_dataset(_make_dataset_id("delete_table_false")) + table_id = "test_table" table_arg = Table(dataset.table(table_id), schema=SCHEMA) retry_403(Config.CLIENT.create_table)(table_arg) @@ -322,9 +329,9 @@ def test_delete_dataset_delete_contents_false(self): Config.CLIENT.delete_dataset(dataset) def test_get_table_w_public_dataset(self): - public = 'bigquery-public-data' - dataset_id = 'samples' - table_id = 'shakespeare' + public = "bigquery-public-data" + dataset_id = "samples" + table_id = "shakespeare" table_ref = DatasetReference(public, dataset_id).table(table_id) # Get table with reference. @@ -333,26 +340,24 @@ def test_get_table_w_public_dataset(self): self.assertEqual(table.dataset_id, dataset_id) self.assertEqual(table.project, public) schema_names = [field.name for field in table.schema] - self.assertEqual( - schema_names, ['word', 'word_count', 'corpus', 'corpus_date']) + self.assertEqual(schema_names, ["word", "word_count", "corpus", "corpus_date"]) # Get table with string. - table = Config.CLIENT.get_table( - '{}.{}.{}'.format(public, dataset_id, table_id)) + table = Config.CLIENT.get_table("{}.{}.{}".format(public, dataset_id, table_id)) self.assertEqual(table.table_id, table_id) self.assertEqual(table.dataset_id, dataset_id) self.assertEqual(table.project, public) def test_list_partitions(self): table_ref = DatasetReference( - 'bigquery-public-data', - 'ethereum_blockchain').table('blocks') + "bigquery-public-data", "ethereum_blockchain" + ).table("blocks") all_rows = Config.CLIENT.list_partitions(table_ref) - self.assertIn('20180801', all_rows) + self.assertIn("20180801", all_rows) self.assertGreater(len(all_rows), 1000) def test_list_tables(self): - dataset_id = _make_dataset_id('list_tables') + dataset_id = _make_dataset_id("list_tables") dataset = self.temp_dataset(dataset_id) # Retrieve tables before any are created for the dataset. iterator = Config.CLIENT.list_tables(dataset) @@ -362,9 +367,9 @@ def test_list_tables(self): # Insert some tables to be listed. tables_to_create = [ - 'new' + unique_resource_id(), - 'newer' + unique_resource_id(), - 'newest' + unique_resource_id(), + "new" + unique_resource_id(), + "newer" + unique_resource_id(), + "newest" + unique_resource_id(), ] for table_name in tables_to_create: table = Table(dataset.table(table_name), schema=SCHEMA) @@ -375,9 +380,11 @@ def test_list_tables(self): iterator = Config.CLIENT.list_tables(dataset) all_tables = list(iterator) self.assertIsNone(iterator.next_page_token) - created = [table for table in all_tables - if (table.table_id in tables_to_create - and table.dataset_id == dataset_id)] + created = [ + table + for table in all_tables + if (table.table_id in tables_to_create and table.dataset_id == dataset_id) + ] self.assertEqual(len(created), len(tables_to_create)) # List tables with a string ID. @@ -386,13 +393,14 @@ def test_list_tables(self): # List tables with a fully-qualified string ID. iterator = Config.CLIENT.list_tables( - '{}.{}'.format(Config.CLIENT.project, dataset_id)) + "{}.{}".format(Config.CLIENT.project, dataset_id) + ) self.assertGreater(len(list(iterator)), 0) def test_update_table(self): - dataset = self.temp_dataset(_make_dataset_id('update_table')) + dataset = self.temp_dataset(_make_dataset_id("update_table")) - TABLE_NAME = 'test_table' + TABLE_NAME = "test_table" table_arg = Table(dataset.table(TABLE_NAME), schema=SCHEMA) self.assertFalse(_table_exists(table_arg)) table = retry_403(Config.CLIENT.create_table)(table_arg) @@ -401,48 +409,49 @@ def test_update_table(self): self.assertIsNone(table.friendly_name) self.assertIsNone(table.description) self.assertEqual(table.labels, {}) - table.friendly_name = 'Friendly' - table.description = 'Description' - table.labels = {'priority': 'high', 'color': 'blue'} + table.friendly_name = "Friendly" + table.description = "Description" + table.labels = {"priority": "high", "color": "blue"} table2 = Config.CLIENT.update_table( - table, ['friendly_name', 'description', 'labels']) + table, ["friendly_name", "description", "labels"] + ) - self.assertEqual(table2.friendly_name, 'Friendly') - self.assertEqual(table2.description, 'Description') - self.assertEqual(table2.labels, {'priority': 'high', 'color': 'blue'}) + self.assertEqual(table2.friendly_name, "Friendly") + self.assertEqual(table2.description, "Description") + self.assertEqual(table2.labels, {"priority": "high", "color": "blue"}) table2.description = None table2.labels = { - 'color': 'green', # change - 'shape': 'circle', # add - 'priority': None, # delete + "color": "green", # change + "shape": "circle", # add + "priority": None, # delete } - table3 = Config.CLIENT.update_table(table2, ['description', 'labels']) + table3 = Config.CLIENT.update_table(table2, ["description", "labels"]) self.assertIsNone(table3.description) - self.assertEqual(table3.labels, {'color': 'green', 'shape': 'circle'}) + self.assertEqual(table3.labels, {"color": "green", "shape": "circle"}) # If we try to update using table2 again, it will fail because the # previous update changed the ETag. - table2.description = 'no good' + table2.description = "no good" with self.assertRaises(PreconditionFailed): - Config.CLIENT.update_table(table2, ['description']) + Config.CLIENT.update_table(table2, ["description"]) def test_update_table_schema(self): - dataset = self.temp_dataset(_make_dataset_id('update_table')) + dataset = self.temp_dataset(_make_dataset_id("update_table")) - TABLE_NAME = 'test_table' + TABLE_NAME = "test_table" table_arg = Table(dataset.table(TABLE_NAME), schema=SCHEMA) self.assertFalse(_table_exists(table_arg)) table = retry_403(Config.CLIENT.create_table)(table_arg) self.to_delete.insert(0, table) self.assertTrue(_table_exists(table)) - voter = bigquery.SchemaField('voter', 'BOOLEAN', mode='NULLABLE') + voter = bigquery.SchemaField("voter", "BOOLEAN", mode="NULLABLE") schema = table.schema schema.append(voter) table.schema = schema - updated_table = Config.CLIENT.update_table(table, ['schema']) + updated_table = Config.CLIENT.update_table(table, ["schema"]) self.assertEqual(len(updated_table.schema), len(schema)) for found, expected in zip(updated_table.schema, schema): @@ -452,8 +461,7 @@ def test_update_table_schema(self): @staticmethod def _fetch_single_page(table, selected_fields=None): - iterator = Config.CLIENT.list_rows( - table, selected_fields=selected_fields) + iterator = Config.CLIENT.list_rows(table, selected_fields=selected_fields) page = six.next(iterator.pages) return list(page) @@ -462,13 +470,14 @@ def _create_table_many_columns(self, rowcount): # first column is named 'rowval', and has a value from 1..rowcount # Subsequent column is named col_ and contains the value N*rowval, # where N is between 1 and 9999 inclusive. - dsname = _make_dataset_id('wide_schema') + dsname = _make_dataset_id("wide_schema") dataset = self.temp_dataset(dsname) - table_id = 'many_columns' + table_id = "many_columns" table_ref = dataset.table(table_id) self.to_delete.insert(0, table_ref) - colprojections = ','.join( - ['r * {} as col_{}'.format(n, n) for n in range(1, 10000)]) + colprojections = ",".join( + ["r * {} as col_{}".format(n, n) for n in range(1, 10000)] + ) sql = """ CREATE TABLE {}.{} AS @@ -477,11 +486,13 @@ def _create_table_many_columns(self, rowcount): {} FROM UNNEST(GENERATE_ARRAY(1,{},1)) as r - """.format(dsname, table_id, colprojections, rowcount) + """.format( + dsname, table_id, colprojections, rowcount + ) query_job = Config.CLIENT.query(sql) query_job.result() - self.assertEqual(query_job.statement_type, 'CREATE_TABLE_AS_SELECT') - self.assertEqual(query_job.ddl_operation_performed, 'CREATE') + self.assertEqual(query_job.statement_type, "CREATE_TABLE_AS_SELECT") + self.assertEqual(query_job.ddl_operation_performed, "CREATE") self.assertEqual(query_job.ddl_target_table, table_ref) return table_ref @@ -490,9 +501,11 @@ def test_query_many_columns(self): # Test working with the widest schema BigQuery supports, 10k columns. row_count = 2 table_ref = self._create_table_many_columns(row_count) - rows = list(Config.CLIENT.query( - 'SELECT * FROM `{}.{}`'.format( - table_ref.dataset_id, table_ref.table_id))) + rows = list( + Config.CLIENT.query( + "SELECT * FROM `{}.{}`".format(table_ref.dataset_id, table_ref.table_id) + ) + ) self.assertEqual(len(rows), row_count) @@ -512,22 +525,21 @@ def test_query_many_columns(self): def test_insert_rows_then_dump_table(self): NOW_SECONDS = 1448911495.484366 - NOW = datetime.datetime.utcfromtimestamp( - NOW_SECONDS).replace(tzinfo=UTC) + NOW = datetime.datetime.utcfromtimestamp(NOW_SECONDS).replace(tzinfo=UTC) ROWS = [ - ('Phred Phlyntstone', 32, NOW), - ('Bharney Rhubble', 33, NOW + datetime.timedelta(seconds=10)), - ('Wylma Phlyntstone', 29, NOW + datetime.timedelta(seconds=20)), - ('Bhettye Rhubble', 27, None), + ("Phred Phlyntstone", 32, NOW), + ("Bharney Rhubble", 33, NOW + datetime.timedelta(seconds=10)), + ("Wylma Phlyntstone", 29, NOW + datetime.timedelta(seconds=20)), + ("Bhettye Rhubble", 27, None), ] ROW_IDS = range(len(ROWS)) - dataset = self.temp_dataset(_make_dataset_id('insert_rows_then_dump')) - TABLE_ID = 'test_table' + dataset = self.temp_dataset(_make_dataset_id("insert_rows_then_dump")) + TABLE_ID = "test_table" schema = [ - bigquery.SchemaField('full_name', 'STRING', mode='REQUIRED'), - bigquery.SchemaField('age', 'INTEGER', mode='REQUIRED'), - bigquery.SchemaField('now', 'TIMESTAMP'), + bigquery.SchemaField("full_name", "STRING", mode="REQUIRED"), + bigquery.SchemaField("age", "INTEGER", mode="REQUIRED"), + bigquery.SchemaField("now", "TIMESTAMP"), ] table_arg = Table(dataset.table(TABLE_ID), schema=schema) self.assertFalse(_table_exists(table_arg)) @@ -547,13 +559,13 @@ def test_insert_rows_then_dump_table(self): rows = retry(self._fetch_single_page)(table) row_tuples = [r.values() for r in rows] by_age = operator.itemgetter(1) - self.assertEqual(sorted(row_tuples, key=by_age), - sorted(ROWS, key=by_age)) + self.assertEqual(sorted(row_tuples, key=by_age), sorted(ROWS, key=by_age)) def test_load_table_from_local_avro_file_then_dump_table(self): from google.cloud.bigquery.job import SourceFormat from google.cloud.bigquery.job import WriteDisposition - TABLE_NAME = 'test_table_avro' + + TABLE_NAME = "test_table_avro" ROWS = [ ("violet", 400), ("indigo", 445), @@ -561,19 +573,21 @@ def test_load_table_from_local_avro_file_then_dump_table(self): ("green", 510), ("yellow", 570), ("orange", 590), - ("red", 650)] + ("red", 650), + ] - dataset = self.temp_dataset(_make_dataset_id('load_local_then_dump')) + dataset = self.temp_dataset(_make_dataset_id("load_local_then_dump")) table_ref = dataset.table(TABLE_NAME) table = Table(table_ref) self.to_delete.insert(0, table) - with open(os.path.join(WHERE, 'data', 'colors.avro'), 'rb') as avrof: + with open(os.path.join(WHERE, "data", "colors.avro"), "rb") as avrof: config = bigquery.LoadJobConfig() config.source_format = SourceFormat.AVRO config.write_disposition = WriteDisposition.WRITE_TRUNCATE job = Config.CLIENT.load_table_from_file( - avrof, table_ref, job_config=config) + avrof, table_ref, job_config=config + ) # Retry until done. job.result(timeout=JOB_TIMEOUT) @@ -583,14 +597,16 @@ def test_load_table_from_local_avro_file_then_dump_table(self): rows = self._fetch_single_page(table) row_tuples = [r.values() for r in rows] by_wavelength = operator.itemgetter(1) - self.assertEqual(sorted(row_tuples, key=by_wavelength), - sorted(ROWS, key=by_wavelength)) + self.assertEqual( + sorted(row_tuples, key=by_wavelength), sorted(ROWS, key=by_wavelength) + ) def test_load_avro_from_uri_then_dump_table(self): from google.cloud.bigquery.job import CreateDisposition from google.cloud.bigquery.job import SourceFormat from google.cloud.bigquery.job import WriteDisposition - table_name = 'test_table' + + table_name = "test_table" rows = [ ("violet", 400), ("indigo", 445), @@ -598,13 +614,14 @@ def test_load_avro_from_uri_then_dump_table(self): ("green", 510), ("yellow", 570), ("orange", 590), - ("red", 650) + ("red", 650), ] - with open(os.path.join(WHERE, 'data', 'colors.avro'), 'rb') as f: + with open(os.path.join(WHERE, "data", "colors.avro"), "rb") as f: GS_URL = self._write_avro_to_storage( - 'bq_load_test' + unique_resource_id(), 'colors.avro', f) + "bq_load_test" + unique_resource_id(), "colors.avro", f + ) - dataset = self.temp_dataset(_make_dataset_id('bq_load_test')) + dataset = self.temp_dataset(_make_dataset_id("bq_load_test")) table_arg = dataset.table(table_name) table = retry_403(Config.CLIENT.create_table)(Table(table_arg)) self.to_delete.insert(0, table) @@ -613,28 +630,28 @@ def test_load_avro_from_uri_then_dump_table(self): config.create_disposition = CreateDisposition.CREATE_NEVER config.source_format = SourceFormat.AVRO config.write_disposition = WriteDisposition.WRITE_EMPTY - job = Config.CLIENT.load_table_from_uri( - GS_URL, table_arg, job_config=config) + job = Config.CLIENT.load_table_from_uri(GS_URL, table_arg, job_config=config) job.result(timeout=JOB_TIMEOUT) self.assertEqual(job.output_rows, len(rows)) table = Config.CLIENT.get_table(table) fetched = self._fetch_single_page(table) row_tuples = [r.values() for r in fetched] - self.assertEqual(sorted(row_tuples, key=lambda x: x[1]), - sorted(rows, key=lambda x: x[1])) + self.assertEqual( + sorted(row_tuples, key=lambda x: x[1]), sorted(rows, key=lambda x: x[1]) + ) def test_load_table_from_uri_then_dump_table(self): from google.cloud.bigquery.job import CreateDisposition from google.cloud.bigquery.job import SourceFormat from google.cloud.bigquery.job import WriteDisposition - TABLE_ID = 'test_table' + TABLE_ID = "test_table" GS_URL = self._write_csv_to_storage( - 'bq_load_test' + unique_resource_id(), 'person_ages.csv', - HEADER_ROW, ROWS) + "bq_load_test" + unique_resource_id(), "person_ages.csv", HEADER_ROW, ROWS + ) - dataset = self.temp_dataset(_make_dataset_id('load_gcs_then_dump')) + dataset = self.temp_dataset(_make_dataset_id("load_gcs_then_dump")) table_arg = Table(dataset.table(TABLE_ID), schema=SCHEMA) table = retry_403(Config.CLIENT.create_table)(table_arg) @@ -646,7 +663,8 @@ def test_load_table_from_uri_then_dump_table(self): config.source_format = SourceFormat.CSV config.write_disposition = WriteDisposition.WRITE_EMPTY job = Config.CLIENT.load_table_from_uri( - GS_URL, dataset.table(TABLE_ID), job_config=config) + GS_URL, dataset.table(TABLE_ID), job_config=config + ) # Allow for 90 seconds of "warm up" before rows visible. See # https://cloud.google.com/bigquery/streaming-data-into-bigquery#dataavailability @@ -657,62 +675,61 @@ def test_load_table_from_uri_then_dump_table(self): rows = self._fetch_single_page(table) row_tuples = [r.values() for r in rows] by_age = operator.itemgetter(1) - self.assertEqual(sorted(row_tuples, key=by_age), - sorted(ROWS, key=by_age)) + self.assertEqual(sorted(row_tuples, key=by_age), sorted(ROWS, key=by_age)) def test_load_table_from_file_w_explicit_location(self): # Create a temporary bucket for extract files. storage_client = storage.Client() - bucket_name = 'bq_load_table_eu_extract_test' + unique_resource_id() + bucket_name = "bq_load_table_eu_extract_test" + unique_resource_id() bucket = storage_client.bucket(bucket_name) - bucket.location = 'eu' + bucket.location = "eu" self.to_delete.append(bucket) bucket.create() # Create a temporary dataset & table in the EU. - table_bytes = six.BytesIO(b'a,3\nb,2\nc,1\n') + table_bytes = six.BytesIO(b"a,3\nb,2\nc,1\n") client = Config.CLIENT - dataset = self.temp_dataset( - _make_dataset_id('eu_load_file'), location='EU') - table_ref = dataset.table('letters') + dataset = self.temp_dataset(_make_dataset_id("eu_load_file"), location="EU") + table_ref = dataset.table("letters") job_config = bigquery.LoadJobConfig() job_config.skip_leading_rows = 0 job_config.schema = [ - bigquery.SchemaField('letter', 'STRING'), - bigquery.SchemaField('value', 'INTEGER'), + bigquery.SchemaField("letter", "STRING"), + bigquery.SchemaField("value", "INTEGER"), ] # Load the file to an EU dataset with an EU load job. load_job = client.load_table_from_file( - table_bytes, table_ref, location='EU', job_config=job_config) + table_bytes, table_ref, location="EU", job_config=job_config + ) load_job.result() job_id = load_job.job_id # Can get the job from the EU. - load_job = client.get_job(job_id, location='EU') + load_job = client.get_job(job_id, location="EU") self.assertEqual(job_id, load_job.job_id) - self.assertEqual('EU', load_job.location) + self.assertEqual("EU", load_job.location) self.assertTrue(load_job.exists()) # Cannot get the job from the US. with self.assertRaises(NotFound): - client.get_job(job_id, location='US') + client.get_job(job_id, location="US") load_job_us = client.get_job(job_id) - load_job_us._properties['jobReference']['location'] = 'US' + load_job_us._properties["jobReference"]["location"] = "US" self.assertFalse(load_job_us.exists()) with self.assertRaises(NotFound): load_job_us.reload() # Can cancel the job from the EU. self.assertTrue(load_job.cancel()) - load_job = client.cancel_job(job_id, location='EU') + load_job = client.cancel_job(job_id, location="EU") self.assertEqual(job_id, load_job.job_id) - self.assertEqual('EU', load_job.location) + self.assertEqual("EU", load_job.location) # Cannot cancel the job from the US. with self.assertRaises(NotFound): - client.cancel_job(job_id, location='US') + client.cancel_job(job_id, location="US") with self.assertRaises(NotFound): load_job_us.cancel() @@ -720,41 +737,32 @@ def test_load_table_from_file_w_explicit_location(self): table = client.get_table(table_ref) self.assertEqual(table.num_rows, 3) rows = [(row.letter, row.value) for row in client.list_rows(table)] - self.assertEqual( - list(sorted(rows)), [('a', 3), ('b', 2), ('c', 1)]) + self.assertEqual(list(sorted(rows)), [("a", 3), ("b", 2), ("c", 1)]) # Verify location behavior with queries query_config = bigquery.QueryJobConfig() query_config.dry_run = True - query_string = 'SELECT * FROM `{}.letters` LIMIT 1'.format( - dataset.dataset_id) + query_string = "SELECT * FROM `{}.letters` LIMIT 1".format(dataset.dataset_id) - eu_query = client.query( - query_string, - location='EU', - job_config=query_config) + eu_query = client.query(query_string, location="EU", job_config=query_config) self.assertTrue(eu_query.done) # Cannot query from US. with self.assertRaises(BadRequest): - list(client.query( - query_string, - location='US', - job_config=query_config)) + list(client.query(query_string, location="US", job_config=query_config)) # Cannot copy from US. with self.assertRaises(BadRequest): client.copy_table( - table_ref, dataset.table('letters2_us'), - location='US').result() + table_ref, dataset.table("letters2_us"), location="US" + ).result() # Cannot extract from US. with self.assertRaises(BadRequest): client.extract_table( - table_ref, - 'gs://{}/letters-us.csv'.format(bucket_name), - location='US').result() + table_ref, "gs://{}/letters-us.csv".format(bucket_name), location="US" + ).result() def _create_storage(self, bucket_name, blob_name): storage_client = storage.Client() @@ -766,35 +774,34 @@ def _create_storage(self, bucket_name, blob_name): return bucket.blob(blob_name) - def _write_csv_to_storage(self, bucket_name, blob_name, header_row, - data_rows): + def _write_csv_to_storage(self, bucket_name, blob_name, header_row, data_rows): from google.cloud._testing import _NamedTemporaryFile blob = self._create_storage(bucket_name, blob_name) with _NamedTemporaryFile() as temp: - with open(temp.name, 'w') as csv_write: + with open(temp.name, "w") as csv_write: writer = csv.writer(csv_write) writer.writerow(header_row) writer.writerows(data_rows) - with open(temp.name, 'rb') as csv_read: - blob.upload_from_file(csv_read, content_type='text/csv') + with open(temp.name, "rb") as csv_read: + blob.upload_from_file(csv_read, content_type="text/csv") self.to_delete.insert(0, blob) - return 'gs://{}/{}'.format(bucket_name, blob_name) + return "gs://{}/{}".format(bucket_name, blob_name) def _write_avro_to_storage(self, bucket_name, blob_name, avro_file): blob = self._create_storage(bucket_name, blob_name) - blob.upload_from_file(avro_file, - content_type='application/x-avro-binary') + blob.upload_from_file(avro_file, content_type="application/x-avro-binary") self.to_delete.insert(0, blob) - return 'gs://{}/{}'.format(bucket_name, blob_name) + return "gs://{}/{}".format(bucket_name, blob_name) def _load_table_for_extract_table( - self, storage_client, rows, bucket_name, blob_name, table): + self, storage_client, rows, bucket_name, blob_name, table + ): from google.cloud._testing import _NamedTemporaryFile - gs_url = 'gs://{}/{}'.format(bucket_name, blob_name) + gs_url = "gs://{}/{}".format(bucket_name, blob_name) # In the **very** rare case the bucket name is reserved, this # fails with a ConnectionError. @@ -803,21 +810,20 @@ def _load_table_for_extract_table( blob = bucket.blob(blob_name) with _NamedTemporaryFile() as temp: - with open(temp.name, 'w') as csv_write: + with open(temp.name, "w") as csv_write: writer = csv.writer(csv_write) writer.writerow(HEADER_ROW) writer.writerows(rows) - with open(temp.name, 'rb') as csv_read: - blob.upload_from_file(csv_read, content_type='text/csv') + with open(temp.name, "rb") as csv_read: + blob.upload_from_file(csv_read, content_type="text/csv") self.to_delete.insert(0, blob) dataset = self.temp_dataset(table.dataset_id) table_ref = dataset.table(table.table_id) config = bigquery.LoadJobConfig() config.autodetect = True - job = Config.CLIENT.load_table_from_uri(gs_url, table_ref, - job_config=config) + job = Config.CLIENT.load_table_from_uri(gs_url, table_ref, job_config=config) # TODO(jba): do we need this retry now that we have job.result()? # Allow for 90 seconds of "warm up" before rows visible. See # https://cloud.google.com/bigquery/streaming-data-into-bigquery#dataavailability @@ -830,26 +836,27 @@ def test_extract_table(self): storage_client = StorageClient() local_id = unique_resource_id() - bucket_name = 'bq_extract_test' + local_id - blob_name = 'person_ages.csv' - dataset_id = _make_dataset_id('load_gcs_then_extract') - table_id = 'test_table' + bucket_name = "bq_extract_test" + local_id + blob_name = "person_ages.csv" + dataset_id = _make_dataset_id("load_gcs_then_extract") + table_id = "test_table" table_ref = Config.CLIENT.dataset(dataset_id).table(table_id) table = Table(table_ref) self.to_delete.insert(0, table) self._load_table_for_extract_table( - storage_client, ROWS, bucket_name, blob_name, table_ref) + storage_client, ROWS, bucket_name, blob_name, table_ref + ) bucket = storage_client.bucket(bucket_name) - destination_blob_name = 'person_ages_out.csv' + destination_blob_name = "person_ages_out.csv" destination = bucket.blob(destination_blob_name) - destination_uri = 'gs://{}/person_ages_out.csv'.format(bucket_name) + destination_uri = "gs://{}/person_ages_out.csv".format(bucket_name) job = Config.CLIENT.extract_table(table_ref, destination_uri) job.result(timeout=100) self.to_delete.insert(0, destination) - got = destination.download_as_string().decode('utf-8') - self.assertIn('Bharney Rhubble', got) + got = destination.download_as_string().decode("utf-8") + self.assertIn("Bharney Rhubble", got) def test_copy_table(self): # If we create a new table to copy from, the test won't work @@ -857,13 +864,12 @@ def test_copy_table(self): # and copy jobs don't read the streaming buffer. # We could wait for the streaming buffer to empty, but that could # take minutes. Instead we copy a small public table. - source_dataset = DatasetReference('bigquery-public-data', 'samples') - source_ref = source_dataset.table('shakespeare') - dest_dataset = self.temp_dataset(_make_dataset_id('copy_table')) - dest_ref = dest_dataset.table('destination_table') + source_dataset = DatasetReference("bigquery-public-data", "samples") + source_ref = source_dataset.table("shakespeare") + dest_dataset = self.temp_dataset(_make_dataset_id("copy_table")) + dest_ref = dest_dataset.table("destination_table") job_config = bigquery.CopyJobConfig() - job = Config.CLIENT.copy_table( - source_ref, dest_ref, job_config=job_config) + job = Config.CLIENT.copy_table(source_ref, dest_ref, job_config=job_config) job.result() dest_table = Config.CLIENT.get_table(dest_ref) @@ -873,10 +879,10 @@ def test_copy_table(self): self.assertTrue(len(got_rows) > 0) def test_job_cancel(self): - DATASET_ID = _make_dataset_id('job_cancel') - JOB_ID_PREFIX = 'fetch_' + DATASET_ID - TABLE_NAME = 'test_table' - QUERY = 'SELECT * FROM %s.%s' % (DATASET_ID, TABLE_NAME) + DATASET_ID = _make_dataset_id("job_cancel") + JOB_ID_PREFIX = "fetch_" + DATASET_ID + TABLE_NAME = "test_table" + QUERY = "SELECT * FROM %s.%s" % (DATASET_ID, TABLE_NAME) dataset = self.temp_dataset(DATASET_ID) @@ -900,17 +906,15 @@ def test_get_failed_job(self): # issue 4246 from google.api_core.exceptions import BadRequest - JOB_ID = 'invalid_{}'.format(str(uuid.uuid4())) - QUERY = 'SELECT TIMESTAMP_ADD(@ts_value, INTERVAL 1 HOUR);' - PARAM = bigquery.ScalarQueryParameter( - 'ts_value', 'TIMESTAMP', 1.4810976E9) + JOB_ID = "invalid_{}".format(str(uuid.uuid4())) + QUERY = "SELECT TIMESTAMP_ADD(@ts_value, INTERVAL 1 HOUR);" + PARAM = bigquery.ScalarQueryParameter("ts_value", "TIMESTAMP", 1.4810976e9) job_config = bigquery.QueryJobConfig() job_config.query_parameters = [PARAM] with self.assertRaises(BadRequest): - Config.CLIENT.query( - QUERY, job_id=JOB_ID, job_config=job_config).result() + Config.CLIENT.query(QUERY, job_id=JOB_ID, job_config=job_config).result() job = Config.CLIENT.get_job(JOB_ID) @@ -919,176 +923,117 @@ def test_get_failed_job(self): def test_query_w_legacy_sql_types(self): naive = datetime.datetime(2016, 12, 5, 12, 41, 9) - stamp = '%s %s' % (naive.date().isoformat(), naive.time().isoformat()) + stamp = "%s %s" % (naive.date().isoformat(), naive.time().isoformat()) zoned = naive.replace(tzinfo=UTC) examples = [ - { - 'sql': 'SELECT 1', - 'expected': 1, - }, - { - 'sql': 'SELECT 1.3', - 'expected': 1.3, - }, - { - 'sql': 'SELECT TRUE', - 'expected': True, - }, - { - 'sql': 'SELECT "ABC"', - 'expected': 'ABC', - }, - { - 'sql': 'SELECT CAST("foo" AS BYTES)', - 'expected': b'foo', - }, - { - 'sql': 'SELECT CAST("%s" AS TIMESTAMP)' % (stamp,), - 'expected': zoned, - }, + {"sql": "SELECT 1", "expected": 1}, + {"sql": "SELECT 1.3", "expected": 1.3}, + {"sql": "SELECT TRUE", "expected": True}, + {"sql": 'SELECT "ABC"', "expected": "ABC"}, + {"sql": 'SELECT CAST("foo" AS BYTES)', "expected": b"foo"}, + {"sql": 'SELECT CAST("%s" AS TIMESTAMP)' % (stamp,), "expected": zoned}, ] for example in examples: job_config = bigquery.QueryJobConfig() job_config.use_legacy_sql = True - rows = list(Config.CLIENT.query( - example['sql'], job_config=job_config)) + rows = list(Config.CLIENT.query(example["sql"], job_config=job_config)) self.assertEqual(len(rows), 1) self.assertEqual(len(rows[0]), 1) - self.assertEqual(rows[0][0], example['expected']) + self.assertEqual(rows[0][0], example["expected"]) def _generate_standard_sql_types_examples(self): naive = datetime.datetime(2016, 12, 5, 12, 41, 9) naive_microseconds = datetime.datetime(2016, 12, 5, 12, 41, 9, 250000) - stamp = '%s %s' % (naive.date().isoformat(), naive.time().isoformat()) - stamp_microseconds = stamp + '.250000' + stamp = "%s %s" % (naive.date().isoformat(), naive.time().isoformat()) + stamp_microseconds = stamp + ".250000" zoned = naive.replace(tzinfo=UTC) zoned_microseconds = naive_microseconds.replace(tzinfo=UTC) - numeric = decimal.Decimal('123456789.123456789') + numeric = decimal.Decimal("123456789.123456789") return [ + {"sql": "SELECT 1", "expected": 1}, + {"sql": "SELECT 1.3", "expected": 1.3}, + {"sql": "SELECT TRUE", "expected": True}, + {"sql": 'SELECT "ABC"', "expected": "ABC"}, + {"sql": 'SELECT CAST("foo" AS BYTES)', "expected": b"foo"}, + {"sql": 'SELECT TIMESTAMP "%s"' % (stamp,), "expected": zoned}, { - 'sql': 'SELECT 1', - 'expected': 1, - }, - { - 'sql': 'SELECT 1.3', - 'expected': 1.3, - }, - { - 'sql': 'SELECT TRUE', - 'expected': True, - }, - { - 'sql': 'SELECT "ABC"', - 'expected': 'ABC', + "sql": 'SELECT TIMESTAMP "%s"' % (stamp_microseconds,), + "expected": zoned_microseconds, }, + {"sql": 'SELECT DATETIME(TIMESTAMP "%s")' % (stamp,), "expected": naive}, { - 'sql': 'SELECT CAST("foo" AS BYTES)', - 'expected': b'foo', + "sql": 'SELECT DATETIME(TIMESTAMP "%s")' % (stamp_microseconds,), + "expected": naive_microseconds, }, + {"sql": 'SELECT DATE(TIMESTAMP "%s")' % (stamp,), "expected": naive.date()}, + {"sql": 'SELECT TIME(TIMESTAMP "%s")' % (stamp,), "expected": naive.time()}, + {"sql": 'SELECT NUMERIC "%s"' % (numeric,), "expected": numeric}, + {"sql": "SELECT (1, 2)", "expected": {"_field_1": 1, "_field_2": 2}}, { - 'sql': 'SELECT TIMESTAMP "%s"' % (stamp,), - 'expected': zoned, - }, - { - 'sql': 'SELECT TIMESTAMP "%s"' % (stamp_microseconds,), - 'expected': zoned_microseconds, - }, - { - 'sql': 'SELECT DATETIME(TIMESTAMP "%s")' % (stamp,), - 'expected': naive, - }, - { - 'sql': 'SELECT DATETIME(TIMESTAMP "%s")' % ( - stamp_microseconds,), - 'expected': naive_microseconds, - }, - { - 'sql': 'SELECT DATE(TIMESTAMP "%s")' % (stamp,), - 'expected': naive.date(), - }, - { - 'sql': 'SELECT TIME(TIMESTAMP "%s")' % (stamp,), - 'expected': naive.time(), - }, - { - 'sql': 'SELECT NUMERIC "%s"' % (numeric,), - 'expected': numeric, - }, - { - 'sql': 'SELECT (1, 2)', - 'expected': {'_field_1': 1, '_field_2': 2}, - }, - { - 'sql': 'SELECT ((1, 2), (3, 4), 5)', - 'expected': { - '_field_1': {'_field_1': 1, '_field_2': 2}, - '_field_2': {'_field_1': 3, '_field_2': 4}, - '_field_3': 5, + "sql": "SELECT ((1, 2), (3, 4), 5)", + "expected": { + "_field_1": {"_field_1": 1, "_field_2": 2}, + "_field_2": {"_field_1": 3, "_field_2": 4}, + "_field_3": 5, }, }, + {"sql": "SELECT [1, 2, 3]", "expected": [1, 2, 3]}, { - 'sql': 'SELECT [1, 2, 3]', - 'expected': [1, 2, 3], - }, - { - 'sql': 'SELECT ([1, 2], 3, [4, 5])', - 'expected': - {'_field_1': [1, 2], '_field_2': 3, '_field_3': [4, 5]}, + "sql": "SELECT ([1, 2], 3, [4, 5])", + "expected": {"_field_1": [1, 2], "_field_2": 3, "_field_3": [4, 5]}, }, { - 'sql': 'SELECT [(1, 2, 3), (4, 5, 6)]', - 'expected': [ - {'_field_1': 1, '_field_2': 2, '_field_3': 3}, - {'_field_1': 4, '_field_2': 5, '_field_3': 6}, + "sql": "SELECT [(1, 2, 3), (4, 5, 6)]", + "expected": [ + {"_field_1": 1, "_field_2": 2, "_field_3": 3}, + {"_field_1": 4, "_field_2": 5, "_field_3": 6}, ], }, { - 'sql': 'SELECT [([1, 2, 3], 4), ([5, 6], 7)]', - 'expected': [ - {u'_field_1': [1, 2, 3], u'_field_2': 4}, - {u'_field_1': [5, 6], u'_field_2': 7}, + "sql": "SELECT [([1, 2, 3], 4), ([5, 6], 7)]", + "expected": [ + {u"_field_1": [1, 2, 3], u"_field_2": 4}, + {u"_field_1": [5, 6], u"_field_2": 7}, ], }, { - 'sql': 'SELECT ARRAY(SELECT STRUCT([1, 2]))', - 'expected': [{u'_field_1': [1, 2]}], - }, - { - 'sql': 'SELECT ST_GeogPoint(1, 2)', - 'expected': 'POINT(1 2)', + "sql": "SELECT ARRAY(SELECT STRUCT([1, 2]))", + "expected": [{u"_field_1": [1, 2]}], }, + {"sql": "SELECT ST_GeogPoint(1, 2)", "expected": "POINT(1 2)"}, ] def test_query_w_standard_sql_types(self): examples = self._generate_standard_sql_types_examples() for example in examples: - rows = list(Config.CLIENT.query(example['sql'])) + rows = list(Config.CLIENT.query(example["sql"])) self.assertEqual(len(rows), 1) self.assertEqual(len(rows[0]), 1) - self.assertEqual(rows[0][0], example['expected']) + self.assertEqual(rows[0][0], example["expected"]) def test_query_w_failed_query(self): from google.api_core.exceptions import BadRequest with self.assertRaises(BadRequest): - Config.CLIENT.query('invalid syntax;').result() + Config.CLIENT.query("invalid syntax;").result() def test_query_w_wrong_config(self): from google.cloud.bigquery.job import LoadJobConfig - good_query = 'SELECT 1;' - rows = list(Config.CLIENT.query('SELECT 1;').result()) + good_query = "SELECT 1;" + rows = list(Config.CLIENT.query("SELECT 1;").result()) assert rows[0][0] == 1 bad_config = LoadJobConfig() - bad_config.destination = Config.CLIENT.dataset('dset').table('tbl') + bad_config.destination = Config.CLIENT.dataset("dset").table("tbl") with self.assertRaises(Exception): Config.CLIENT.query(good_query, job_config=bad_config).result() def test_query_w_timeout(self): query_job = Config.CLIENT.query( - 'SELECT * FROM `bigquery-public-data.github_repos.commits`;', - job_id_prefix='test_query_w_timeout_') + "SELECT * FROM `bigquery-public-data.github_repos.commits`;", + job_id_prefix="test_query_w_timeout_", + ) with self.assertRaises(concurrent.futures.TimeoutError): # 1 second is much too short for this query. @@ -1130,8 +1075,9 @@ def test_query_statistics(self): ON lside.year = rside.year """, - location='US', - job_config=job_config) + location="US", + job_config=job_config, + ) # run the job to completion query_job.result() @@ -1142,10 +1088,10 @@ def test_query_statistics(self): self.assertTrue(query_job.done) self.assertFalse(query_job.dry_run) self.assertIsNone(query_job.num_dml_affected_rows) - self.assertEqual(query_job.priority, 'INTERACTIVE') + self.assertEqual(query_job.priority, "INTERACTIVE") self.assertGreater(query_job.total_bytes_billed, 1) self.assertGreater(query_job.total_bytes_processed, 1) - self.assertEqual(query_job.statement_type, 'SELECT') + self.assertEqual(query_job.statement_type, "SELECT") self.assertGreater(query_job.slot_millis, 1) # Make assertions on the shape of the query plan. @@ -1159,7 +1105,7 @@ def test_query_statistics(self): self.assertGreater(first_stage.parallel_inputs, 0) self.assertGreater(first_stage.completed_parallel_inputs, 0) self.assertGreater(first_stage.shuffle_output_bytes, 0) - self.assertEqual(first_stage.status, 'COMPLETE') + self.assertEqual(first_stage.status, "COMPLETE") # Query plan is a digraph. Ensure it has inter-stage links, # but not every stage has inputs. @@ -1173,16 +1119,16 @@ def test_query_statistics(self): def test_dbapi_w_standard_sql_types(self): examples = self._generate_standard_sql_types_examples() for example in examples: - Config.CURSOR.execute(example['sql']) + Config.CURSOR.execute(example["sql"]) self.assertEqual(Config.CURSOR.rowcount, 1) row = Config.CURSOR.fetchone() self.assertEqual(len(row), 1) - self.assertEqual(row[0], example['expected']) + self.assertEqual(row[0], example["expected"]) row = Config.CURSOR.fetchone() self.assertIsNone(row) def test_dbapi_fetchall(self): - query = 'SELECT * FROM UNNEST([(1, 2), (3, 4), (5, 6)])' + query = "SELECT * FROM UNNEST([(1, 2), (3, 4), (5, 6)])" for arraysize in range(1, 5): Config.CURSOR.execute(query) @@ -1199,36 +1145,36 @@ def _load_table_for_dml(self, rows, dataset_id, table_id): from google.cloud.bigquery.job import WriteDisposition dataset = self.temp_dataset(dataset_id) - greeting = bigquery.SchemaField( - 'greeting', 'STRING', mode='NULLABLE') + greeting = bigquery.SchemaField("greeting", "STRING", mode="NULLABLE") table_ref = dataset.table(table_id) table_arg = Table(table_ref, schema=[greeting]) table = retry_403(Config.CLIENT.create_table)(table_arg) self.to_delete.insert(0, table) with _NamedTemporaryFile() as temp: - with open(temp.name, 'w') as csv_write: + with open(temp.name, "w") as csv_write: writer = csv.writer(csv_write) - writer.writerow(('Greeting',)) + writer.writerow(("Greeting",)) writer.writerows(rows) - with open(temp.name, 'rb') as csv_read: + with open(temp.name, "rb") as csv_read: config = bigquery.LoadJobConfig() config.source_format = SourceFormat.CSV config.skip_leading_rows = 1 config.create_disposition = CreateDisposition.CREATE_NEVER config.write_disposition = WriteDisposition.WRITE_EMPTY job = Config.CLIENT.load_table_from_file( - csv_read, table_ref, job_config=config) + csv_read, table_ref, job_config=config + ) # Retry until done. job.result(timeout=JOB_TIMEOUT) self._fetch_single_page(table) def test_query_w_dml(self): - dataset_name = _make_dataset_id('dml_query') - table_name = 'test_table' - self._load_table_for_dml([('Hello World',)], dataset_name, table_name) + dataset_name = _make_dataset_id("dml_query") + table_name = "test_table" + self._load_table_for_dml([("Hello World",)], dataset_name, table_name) query_template = """UPDATE {}.{} SET greeting = 'Guten Tag' WHERE greeting = 'Hello World' @@ -1236,15 +1182,16 @@ def test_query_w_dml(self): query_job = Config.CLIENT.query( query_template.format(dataset_name, table_name), - job_id_prefix='test_query_w_dml_') + job_id_prefix="test_query_w_dml_", + ) query_job.result() self.assertEqual(query_job.num_dml_affected_rows, 1) def test_dbapi_w_dml(self): - dataset_name = _make_dataset_id('dml_dbapi') - table_name = 'test_table' - self._load_table_for_dml([('Hello World',)], dataset_name, table_name) + dataset_name = _make_dataset_id("dml_dbapi") + table_name = "test_table" + self._load_table_for_dml([("Hello World",)], dataset_name, table_name) query_template = """UPDATE {}.{} SET greeting = 'Guten Tag' WHERE greeting = 'Hello World' @@ -1252,7 +1199,8 @@ def test_dbapi_w_dml(self): Config.CURSOR.execute( query_template.format(dataset_name, table_name), - job_id='test_dbapi_w_dml_{}'.format(str(uuid.uuid4()))) + job_id="test_dbapi_w_dml_{}".format(str(uuid.uuid4())), + ) self.assertEqual(Config.CURSOR.rowcount, 1) self.assertIsNone(Config.CURSOR.fetchone()) @@ -1261,297 +1209,269 @@ def test_query_w_query_params(self): from google.cloud.bigquery.query import ArrayQueryParameter from google.cloud.bigquery.query import ScalarQueryParameter from google.cloud.bigquery.query import StructQueryParameter - question = 'What is the answer to life, the universe, and everything?' + + question = "What is the answer to life, the universe, and everything?" question_param = ScalarQueryParameter( - name='question', type_='STRING', value=question) + name="question", type_="STRING", value=question + ) answer = 42 - answer_param = ScalarQueryParameter( - name='answer', type_='INT64', value=answer) + answer_param = ScalarQueryParameter(name="answer", type_="INT64", value=answer) pi = 3.1415926 - pi_param = ScalarQueryParameter( - name='pi', type_='FLOAT64', value=pi) - pi_numeric = decimal.Decimal('3.141592654') + pi_param = ScalarQueryParameter(name="pi", type_="FLOAT64", value=pi) + pi_numeric = decimal.Decimal("3.141592654") pi_numeric_param = ScalarQueryParameter( - name='pi_numeric_param', type_='NUMERIC', - value=pi_numeric) + name="pi_numeric_param", type_="NUMERIC", value=pi_numeric + ) truthy = True - truthy_param = ScalarQueryParameter( - name='truthy', type_='BOOL', value=truthy) - beef = b'DEADBEEF' - beef_param = ScalarQueryParameter( - name='beef', type_='BYTES', value=beef) + truthy_param = ScalarQueryParameter(name="truthy", type_="BOOL", value=truthy) + beef = b"DEADBEEF" + beef_param = ScalarQueryParameter(name="beef", type_="BYTES", value=beef) naive = datetime.datetime(2016, 12, 5, 12, 41, 9) - naive_param = ScalarQueryParameter( - name='naive', type_='DATETIME', value=naive) + naive_param = ScalarQueryParameter(name="naive", type_="DATETIME", value=naive) naive_date_param = ScalarQueryParameter( - name='naive_date', type_='DATE', value=naive.date()) + name="naive_date", type_="DATE", value=naive.date() + ) naive_time_param = ScalarQueryParameter( - name='naive_time', type_='TIME', value=naive.time()) + name="naive_time", type_="TIME", value=naive.time() + ) zoned = naive.replace(tzinfo=UTC) - zoned_param = ScalarQueryParameter( - name='zoned', type_='TIMESTAMP', value=zoned) + zoned_param = ScalarQueryParameter(name="zoned", type_="TIMESTAMP", value=zoned) array_param = ArrayQueryParameter( - name='array_param', array_type='INT64', values=[1, 2]) - struct_param = StructQueryParameter( - 'hitchhiker', question_param, answer_param) - phred_name = 'Phred Phlyntstone' + name="array_param", array_type="INT64", values=[1, 2] + ) + struct_param = StructQueryParameter("hitchhiker", question_param, answer_param) + phred_name = "Phred Phlyntstone" phred_name_param = ScalarQueryParameter( - name='name', type_='STRING', value=phred_name) + name="name", type_="STRING", value=phred_name + ) phred_age = 32 phred_age_param = ScalarQueryParameter( - name='age', type_='INT64', value=phred_age) - phred_param = StructQueryParameter( - None, phred_name_param, phred_age_param) - bharney_name = 'Bharney Rhubbyl' + name="age", type_="INT64", value=phred_age + ) + phred_param = StructQueryParameter(None, phred_name_param, phred_age_param) + bharney_name = "Bharney Rhubbyl" bharney_name_param = ScalarQueryParameter( - name='name', type_='STRING', value=bharney_name) + name="name", type_="STRING", value=bharney_name + ) bharney_age = 31 bharney_age_param = ScalarQueryParameter( - name='age', type_='INT64', value=bharney_age) + name="age", type_="INT64", value=bharney_age + ) bharney_param = StructQueryParameter( - None, bharney_name_param, bharney_age_param) + None, bharney_name_param, bharney_age_param + ) characters_param = ArrayQueryParameter( - name=None, array_type='RECORD', - values=[phred_param, bharney_param]) - hero_param = StructQueryParameter( - 'hero', phred_name_param, phred_age_param) + name=None, array_type="RECORD", values=[phred_param, bharney_param] + ) + hero_param = StructQueryParameter("hero", phred_name_param, phred_age_param) sidekick_param = StructQueryParameter( - 'sidekick', bharney_name_param, bharney_age_param) - roles_param = StructQueryParameter( - 'roles', hero_param, sidekick_param) + "sidekick", bharney_name_param, bharney_age_param + ) + roles_param = StructQueryParameter("roles", hero_param, sidekick_param) friends_param = ArrayQueryParameter( - name='friends', array_type='STRING', - values=[phred_name, bharney_name]) + name="friends", array_type="STRING", values=[phred_name, bharney_name] + ) with_friends_param = StructQueryParameter(None, friends_param) top_left_param = StructQueryParameter( - 'top_left', - ScalarQueryParameter('x', 'INT64', 12), - ScalarQueryParameter('y', 'INT64', 102)) + "top_left", + ScalarQueryParameter("x", "INT64", 12), + ScalarQueryParameter("y", "INT64", 102), + ) bottom_right_param = StructQueryParameter( - 'bottom_right', - ScalarQueryParameter('x', 'INT64', 22), - ScalarQueryParameter('y', 'INT64', 92)) + "bottom_right", + ScalarQueryParameter("x", "INT64", 22), + ScalarQueryParameter("y", "INT64", 92), + ) rectangle_param = StructQueryParameter( - 'rectangle', top_left_param, bottom_right_param) + "rectangle", top_left_param, bottom_right_param + ) examples = [ { - 'sql': 'SELECT @question', - 'expected': question, - 'query_parameters': [question_param], - }, - { - 'sql': 'SELECT @answer', - 'expected': answer, - 'query_parameters': [answer_param], - }, - { - 'sql': 'SELECT @pi', - 'expected': pi, - 'query_parameters': [pi_param], + "sql": "SELECT @question", + "expected": question, + "query_parameters": [question_param], }, { - 'sql': 'SELECT @pi_numeric_param', - 'expected': pi_numeric, - 'query_parameters': [pi_numeric_param], + "sql": "SELECT @answer", + "expected": answer, + "query_parameters": [answer_param], }, + {"sql": "SELECT @pi", "expected": pi, "query_parameters": [pi_param]}, { - 'sql': 'SELECT @truthy', - 'expected': truthy, - 'query_parameters': [truthy_param], + "sql": "SELECT @pi_numeric_param", + "expected": pi_numeric, + "query_parameters": [pi_numeric_param], }, { - 'sql': 'SELECT @beef', - 'expected': beef, - 'query_parameters': [beef_param], + "sql": "SELECT @truthy", + "expected": truthy, + "query_parameters": [truthy_param], }, + {"sql": "SELECT @beef", "expected": beef, "query_parameters": [beef_param]}, { - 'sql': 'SELECT @naive', - 'expected': naive, - 'query_parameters': [naive_param], + "sql": "SELECT @naive", + "expected": naive, + "query_parameters": [naive_param], }, { - 'sql': 'SELECT @naive_date', - 'expected': naive.date(), - 'query_parameters': [naive_date_param], + "sql": "SELECT @naive_date", + "expected": naive.date(), + "query_parameters": [naive_date_param], }, { - 'sql': 'SELECT @naive_time', - 'expected': naive.time(), - 'query_parameters': [naive_time_param], + "sql": "SELECT @naive_time", + "expected": naive.time(), + "query_parameters": [naive_time_param], }, { - 'sql': 'SELECT @zoned', - 'expected': zoned, - 'query_parameters': [zoned_param], + "sql": "SELECT @zoned", + "expected": zoned, + "query_parameters": [zoned_param], }, { - 'sql': 'SELECT @array_param', - 'expected': [1, 2], - 'query_parameters': [array_param], + "sql": "SELECT @array_param", + "expected": [1, 2], + "query_parameters": [array_param], }, { - 'sql': 'SELECT (@hitchhiker.question, @hitchhiker.answer)', - 'expected': ({'_field_1': question, '_field_2': answer}), - 'query_parameters': [struct_param], + "sql": "SELECT (@hitchhiker.question, @hitchhiker.answer)", + "expected": ({"_field_1": question, "_field_2": answer}), + "query_parameters": [struct_param], }, { - 'sql': - 'SELECT ' - '((@rectangle.bottom_right.x - @rectangle.top_left.x) ' - '* (@rectangle.top_left.y - @rectangle.bottom_right.y))', - 'expected': 100, - 'query_parameters': [rectangle_param], + "sql": "SELECT " + "((@rectangle.bottom_right.x - @rectangle.top_left.x) " + "* (@rectangle.top_left.y - @rectangle.bottom_right.y))", + "expected": 100, + "query_parameters": [rectangle_param], }, { - 'sql': 'SELECT ?', - 'expected': [ - {'name': phred_name, 'age': phred_age}, - {'name': bharney_name, 'age': bharney_age}, + "sql": "SELECT ?", + "expected": [ + {"name": phred_name, "age": phred_age}, + {"name": bharney_name, "age": bharney_age}, ], - 'query_parameters': [characters_param], + "query_parameters": [characters_param], }, { - 'sql': 'SELECT @roles', - 'expected': { - 'hero': {'name': phred_name, 'age': phred_age}, - 'sidekick': {'name': bharney_name, 'age': bharney_age}, + "sql": "SELECT @roles", + "expected": { + "hero": {"name": phred_name, "age": phred_age}, + "sidekick": {"name": bharney_name, "age": bharney_age}, }, - 'query_parameters': [roles_param], + "query_parameters": [roles_param], }, { - 'sql': 'SELECT ?', - 'expected': { - 'friends': [phred_name, bharney_name], - }, - 'query_parameters': [with_friends_param], + "sql": "SELECT ?", + "expected": {"friends": [phred_name, bharney_name]}, + "query_parameters": [with_friends_param], }, ] for example in examples: jconfig = QueryJobConfig() - jconfig.query_parameters = example['query_parameters'] + jconfig.query_parameters = example["query_parameters"] query_job = Config.CLIENT.query( - example['sql'], + example["sql"], job_config=jconfig, - job_id_prefix='test_query_w_query_params') + job_id_prefix="test_query_w_query_params", + ) rows = list(query_job.result()) self.assertEqual(len(rows), 1) self.assertEqual(len(rows[0]), 1) - self.assertEqual(rows[0][0], example['expected']) + self.assertEqual(rows[0][0], example["expected"]) def test_dbapi_w_query_parameters(self): examples = [ { - 'sql': 'SELECT %(boolval)s', - 'expected': True, - 'query_parameters': { - 'boolval': True, - }, + "sql": "SELECT %(boolval)s", + "expected": True, + "query_parameters": {"boolval": True}, }, { - 'sql': 'SELECT %(a "very" weird `name`)s', - 'expected': True, - 'query_parameters': { - 'a "very" weird `name`': True, - }, + "sql": 'SELECT %(a "very" weird `name`)s', + "expected": True, + "query_parameters": {'a "very" weird `name`': True}, }, { - 'sql': 'SELECT %(select)s', - 'expected': True, - 'query_parameters': { - 'select': True, # this name is a keyword - }, + "sql": "SELECT %(select)s", + "expected": True, + "query_parameters": {"select": True}, # this name is a keyword }, + {"sql": "SELECT %s", "expected": False, "query_parameters": [False]}, { - 'sql': 'SELECT %s', - 'expected': False, - 'query_parameters': [False], + "sql": "SELECT %(intval)s", + "expected": 123, + "query_parameters": {"intval": 123}, }, { - 'sql': 'SELECT %(intval)s', - 'expected': 123, - 'query_parameters': { - 'intval': 123, - }, + "sql": "SELECT %s", + "expected": -123456789, + "query_parameters": [-123456789], }, { - 'sql': 'SELECT %s', - 'expected': -123456789, - 'query_parameters': [-123456789], + "sql": "SELECT %(floatval)s", + "expected": 1.25, + "query_parameters": {"floatval": 1.25}, }, { - 'sql': 'SELECT %(floatval)s', - 'expected': 1.25, - 'query_parameters': { - 'floatval': 1.25, - }, + "sql": "SELECT LOWER(%(strval)s)", + "query_parameters": {"strval": "I Am A String"}, + "expected": "i am a string", }, { - 'sql': 'SELECT LOWER(%(strval)s)', - 'query_parameters': { - 'strval': 'I Am A String', - }, - 'expected': 'i am a string', + "sql": "SELECT DATE_SUB(%(dateval)s, INTERVAL 1 DAY)", + "query_parameters": {"dateval": datetime.date(2017, 4, 2)}, + "expected": datetime.date(2017, 4, 1), }, { - 'sql': 'SELECT DATE_SUB(%(dateval)s, INTERVAL 1 DAY)', - 'query_parameters': { - 'dateval': datetime.date(2017, 4, 2), - }, - 'expected': datetime.date(2017, 4, 1), + "sql": "SELECT TIME_ADD(%(timeval)s, INTERVAL 4 SECOND)", + "query_parameters": {"timeval": datetime.time(12, 34, 56)}, + "expected": datetime.time(12, 35, 0), }, { - 'sql': 'SELECT TIME_ADD(%(timeval)s, INTERVAL 4 SECOND)', - 'query_parameters': { - 'timeval': datetime.time(12, 34, 56), + "sql": ("SELECT DATETIME_ADD(%(datetimeval)s, INTERVAL 53 SECOND)"), + "query_parameters": { + "datetimeval": datetime.datetime(2012, 3, 4, 5, 6, 7) }, - 'expected': datetime.time(12, 35, 0), + "expected": datetime.datetime(2012, 3, 4, 5, 7, 0), }, { - 'sql': ( - 'SELECT DATETIME_ADD(%(datetimeval)s, INTERVAL 53 SECOND)' - ), - 'query_parameters': { - 'datetimeval': datetime.datetime(2012, 3, 4, 5, 6, 7), + "sql": "SELECT TIMESTAMP_TRUNC(%(zoned)s, MINUTE)", + "query_parameters": { + "zoned": datetime.datetime(2012, 3, 4, 5, 6, 7, tzinfo=UTC) }, - 'expected': datetime.datetime(2012, 3, 4, 5, 7, 0), + "expected": datetime.datetime(2012, 3, 4, 5, 6, 0, tzinfo=UTC), }, { - 'sql': 'SELECT TIMESTAMP_TRUNC(%(zoned)s, MINUTE)', - 'query_parameters': { - 'zoned': datetime.datetime( - 2012, 3, 4, 5, 6, 7, tzinfo=UTC), + "sql": "SELECT TIMESTAMP_TRUNC(%(zoned)s, MINUTE)", + "query_parameters": { + "zoned": datetime.datetime(2012, 3, 4, 5, 6, 7, 250000, tzinfo=UTC) }, - 'expected': datetime.datetime(2012, 3, 4, 5, 6, 0, tzinfo=UTC), - }, - { - 'sql': 'SELECT TIMESTAMP_TRUNC(%(zoned)s, MINUTE)', - 'query_parameters': { - 'zoned': datetime.datetime( - 2012, 3, 4, 5, 6, 7, 250000, tzinfo=UTC), - }, - 'expected': datetime.datetime(2012, 3, 4, 5, 6, 0, tzinfo=UTC), + "expected": datetime.datetime(2012, 3, 4, 5, 6, 0, tzinfo=UTC), }, ] for example in examples: - msg = 'sql: {} query_parameters: {}'.format( - example['sql'], example['query_parameters']) + msg = "sql: {} query_parameters: {}".format( + example["sql"], example["query_parameters"] + ) - Config.CURSOR.execute(example['sql'], example['query_parameters']) + Config.CURSOR.execute(example["sql"], example["query_parameters"]) self.assertEqual(Config.CURSOR.rowcount, 1, msg=msg) row = Config.CURSOR.fetchone() self.assertEqual(len(row), 1, msg=msg) - self.assertEqual(row[0], example['expected'], msg=msg) + self.assertEqual(row[0], example["expected"], msg=msg) row = Config.CURSOR.fetchone() self.assertIsNone(row, msg=msg) def test_large_query_w_public_data(self): - PUBLIC = 'bigquery-public-data' - DATASET_ID = 'samples' - TABLE_NAME = 'natality' + PUBLIC = "bigquery-public-data" + DATASET_ID = "samples" + TABLE_NAME = "natality" LIMIT = 1000 - SQL = 'SELECT * from `{}.{}.{}` LIMIT {}'.format( - PUBLIC, DATASET_ID, TABLE_NAME, LIMIT) + SQL = "SELECT * from `{}.{}.{}` LIMIT {}".format( + PUBLIC, DATASET_ID, TABLE_NAME, LIMIT + ) query_job = Config.CLIENT.query(SQL) @@ -1559,19 +1479,20 @@ def test_large_query_w_public_data(self): self.assertEqual(len(rows), LIMIT) def test_query_future(self): - query_job = Config.CLIENT.query('SELECT 1') + query_job = Config.CLIENT.query("SELECT 1") iterator = query_job.result(timeout=JOB_TIMEOUT) row_tuples = [r.values() for r in iterator] self.assertEqual(row_tuples, [(1,)]) def test_query_iter(self): import types - query_job = Config.CLIENT.query('SELECT 1') + + query_job = Config.CLIENT.query("SELECT 1") self.assertIsInstance(iter(query_job), types.GeneratorType) row_tuples = [r.values() for r in query_job] self.assertEqual(row_tuples, [(1,)]) - @unittest.skipIf(pandas is None, 'Requires `pandas`') + @unittest.skipIf(pandas is None, "Requires `pandas`") def test_query_results_to_dataframe(self): QUERY = """ SELECT id, author, time_ts, dead @@ -1583,10 +1504,14 @@ def test_query_results_to_dataframe(self): self.assertIsInstance(df, pandas.DataFrame) self.assertEqual(len(df), 10) # verify the number of rows - column_names = ['id', 'author', 'time_ts', 'dead'] + column_names = ["id", "author", "time_ts", "dead"] self.assertEqual(list(df), column_names) # verify the column names - exp_datatypes = {'id': int, 'author': six.text_type, - 'time_ts': pandas.Timestamp, 'dead': bool} + exp_datatypes = { + "id": int, + "author": six.text_type, + "time_ts": pandas.Timestamp, + "dead": bool, + } for index, row in df.iterrows(): for col in column_names: # all the schema fields are nullable, so None is acceptable @@ -1597,25 +1522,31 @@ def test_insert_rows_nested_nested(self): # See #2951 SF = bigquery.SchemaField schema = [ - SF('string_col', 'STRING', mode='NULLABLE'), - SF('record_col', 'RECORD', mode='NULLABLE', fields=[ - SF('nested_string', 'STRING', mode='NULLABLE'), - SF('nested_repeated', 'INTEGER', mode='REPEATED'), - SF('nested_record', 'RECORD', mode='NULLABLE', fields=[ - SF('nested_nested_string', 'STRING', mode='NULLABLE'), - ]), - ]), + SF("string_col", "STRING", mode="NULLABLE"), + SF( + "record_col", + "RECORD", + mode="NULLABLE", + fields=[ + SF("nested_string", "STRING", mode="NULLABLE"), + SF("nested_repeated", "INTEGER", mode="REPEATED"), + SF( + "nested_record", + "RECORD", + mode="NULLABLE", + fields=[SF("nested_nested_string", "STRING", mode="NULLABLE")], + ), + ], + ), ] record = { - 'nested_string': 'another string value', - 'nested_repeated': [0, 1, 2], - 'nested_record': {'nested_nested_string': 'some deep insight'}, + "nested_string": "another string value", + "nested_repeated": [0, 1, 2], + "nested_record": {"nested_nested_string": "some deep insight"}, } - to_insert = [ - ('Some value', record) - ] - table_id = 'test_table' - dataset = self.temp_dataset(_make_dataset_id('issue_2951')) + to_insert = [("Some value", record)] + table_id = "test_table" + dataset = self.temp_dataset(_make_dataset_id("issue_2951")) table_arg = Table(dataset.table(table_id), schema=schema) table = retry_403(Config.CLIENT.create_table)(table_arg) self.to_delete.insert(0, table) @@ -1631,25 +1562,31 @@ def test_insert_rows_nested_nested_dictionary(self): # See #2951 SF = bigquery.SchemaField schema = [ - SF('string_col', 'STRING', mode='NULLABLE'), - SF('record_col', 'RECORD', mode='NULLABLE', fields=[ - SF('nested_string', 'STRING', mode='NULLABLE'), - SF('nested_repeated', 'INTEGER', mode='REPEATED'), - SF('nested_record', 'RECORD', mode='NULLABLE', fields=[ - SF('nested_nested_string', 'STRING', mode='NULLABLE'), - ]), - ]), + SF("string_col", "STRING", mode="NULLABLE"), + SF( + "record_col", + "RECORD", + mode="NULLABLE", + fields=[ + SF("nested_string", "STRING", mode="NULLABLE"), + SF("nested_repeated", "INTEGER", mode="REPEATED"), + SF( + "nested_record", + "RECORD", + mode="NULLABLE", + fields=[SF("nested_nested_string", "STRING", mode="NULLABLE")], + ), + ], + ), ] record = { - 'nested_string': 'another string value', - 'nested_repeated': [0, 1, 2], - 'nested_record': {'nested_nested_string': 'some deep insight'}, + "nested_string": "another string value", + "nested_repeated": [0, 1, 2], + "nested_record": {"nested_nested_string": "some deep insight"}, } - to_insert = [ - {'string_col': 'Some value', 'record_col': record} - ] - table_id = 'test_table' - dataset = self.temp_dataset(_make_dataset_id('issue_2951')) + to_insert = [{"string_col": "Some value", "record_col": record}] + table_id = "test_table" + dataset = self.temp_dataset(_make_dataset_id("issue_2951")) table_arg = Table(dataset.table(table_id), schema=schema) table = retry_403(Config.CLIENT.create_table)(table_arg) self.to_delete.insert(0, table) @@ -1659,13 +1596,12 @@ def test_insert_rows_nested_nested_dictionary(self): retry = RetryResult(_has_rows, max_tries=8) rows = retry(self._fetch_single_page)(table) row_tuples = [r.values() for r in rows] - expected_rows = [('Some value', record)] + expected_rows = [("Some value", record)] self.assertEqual(row_tuples, expected_rows) def test_create_table_rows_fetch_nested_schema(self): - table_name = 'test_table' - dataset = self.temp_dataset( - _make_dataset_id('create_table_nested_schema')) + table_name = "test_table" + dataset = self.temp_dataset(_make_dataset_id("create_table_nested_schema")) schema = _load_json_schema() table_arg = Table(dataset.table(table_name), schema=schema) table = retry_403(Config.CLIENT.create_table)(table_arg) @@ -1675,7 +1611,7 @@ def test_create_table_rows_fetch_nested_schema(self): to_insert = [] # Data is in "JSON Lines" format, see http://jsonlines.org/ - json_filename = os.path.join(WHERE, 'data', 'characters.jsonl') + json_filename = os.path.join(WHERE, "data", "characters.jsonl") with open(json_filename) as rows_file: for line in rows_file: to_insert.append(json.loads(line)) @@ -1690,72 +1626,73 @@ def test_create_table_rows_fetch_nested_schema(self): self.assertEqual(len(fetched), len(to_insert)) for found, expected in zip(sorted(fetched_tuples), to_insert): - self.assertEqual(found[0], expected['Name']) - self.assertEqual(found[1], int(expected['Age'])) - self.assertEqual(found[2], expected['Weight']) - self.assertEqual(found[3], expected['IsMagic']) - - self.assertEqual(len(found[4]), len(expected['Spells'])) - for f_spell, e_spell in zip(found[4], expected['Spells']): - self.assertEqual(f_spell['Name'], e_spell['Name']) - parts = time.strptime( - e_spell['LastUsed'], '%Y-%m-%d %H:%M:%S UTC') + self.assertEqual(found[0], expected["Name"]) + self.assertEqual(found[1], int(expected["Age"])) + self.assertEqual(found[2], expected["Weight"]) + self.assertEqual(found[3], expected["IsMagic"]) + + self.assertEqual(len(found[4]), len(expected["Spells"])) + for f_spell, e_spell in zip(found[4], expected["Spells"]): + self.assertEqual(f_spell["Name"], e_spell["Name"]) + parts = time.strptime(e_spell["LastUsed"], "%Y-%m-%d %H:%M:%S UTC") e_used = datetime.datetime(*parts[0:6], tzinfo=UTC) - self.assertEqual(f_spell['LastUsed'], e_used) - self.assertEqual(f_spell['DiscoveredBy'], - e_spell['DiscoveredBy']) - self.assertEqual(f_spell['Properties'], e_spell['Properties']) + self.assertEqual(f_spell["LastUsed"], e_used) + self.assertEqual(f_spell["DiscoveredBy"], e_spell["DiscoveredBy"]) + self.assertEqual(f_spell["Properties"], e_spell["Properties"]) - e_icon = base64.standard_b64decode( - e_spell['Icon'].encode('ascii')) - self.assertEqual(f_spell['Icon'], e_icon) + e_icon = base64.standard_b64decode(e_spell["Icon"].encode("ascii")) + self.assertEqual(f_spell["Icon"], e_icon) - parts = time.strptime(expected['TeaTime'], '%H:%M:%S') + parts = time.strptime(expected["TeaTime"], "%H:%M:%S") e_teatime = datetime.time(*parts[3:6]) self.assertEqual(found[5], e_teatime) - parts = time.strptime(expected['NextVacation'], '%Y-%m-%d') + parts = time.strptime(expected["NextVacation"], "%Y-%m-%d") e_nextvac = datetime.date(*parts[0:3]) self.assertEqual(found[6], e_nextvac) - parts = time.strptime(expected['FavoriteTime'], - '%Y-%m-%dT%H:%M:%S') + parts = time.strptime(expected["FavoriteTime"], "%Y-%m-%dT%H:%M:%S") e_favtime = datetime.datetime(*parts[0:6]) self.assertEqual(found[7], e_favtime) - self.assertEqual(found[8], - decimal.Decimal(expected['FavoriteNumber'])) + self.assertEqual(found[8], decimal.Decimal(expected["FavoriteNumber"])) def _fetch_dataframe(self, query): return Config.CLIENT.query(query).result().to_dataframe() - @unittest.skipIf(pandas is None, 'Requires `pandas`') + @unittest.skipIf(pandas is None, "Requires `pandas`") def test_nested_table_to_dataframe(self): from google.cloud.bigquery.job import SourceFormat from google.cloud.bigquery.job import WriteDisposition SF = bigquery.SchemaField schema = [ - SF('string_col', 'STRING', mode='NULLABLE'), - SF('record_col', 'RECORD', mode='NULLABLE', fields=[ - SF('nested_string', 'STRING', mode='NULLABLE'), - SF('nested_repeated', 'INTEGER', mode='REPEATED'), - SF('nested_record', 'RECORD', mode='NULLABLE', fields=[ - SF('nested_nested_string', 'STRING', mode='NULLABLE'), - ]), - ]), + SF("string_col", "STRING", mode="NULLABLE"), + SF( + "record_col", + "RECORD", + mode="NULLABLE", + fields=[ + SF("nested_string", "STRING", mode="NULLABLE"), + SF("nested_repeated", "INTEGER", mode="REPEATED"), + SF( + "nested_record", + "RECORD", + mode="NULLABLE", + fields=[SF("nested_nested_string", "STRING", mode="NULLABLE")], + ), + ], + ), ] record = { - 'nested_string': 'another string value', - 'nested_repeated': [0, 1, 2], - 'nested_record': {'nested_nested_string': 'some deep insight'}, + "nested_string": "another string value", + "nested_repeated": [0, 1, 2], + "nested_record": {"nested_nested_string": "some deep insight"}, } - to_insert = [ - {'string_col': 'Some value', 'record_col': record}, - ] + to_insert = [{"string_col": "Some value", "record_col": record}] rows = [json.dumps(row) for row in to_insert] - body = six.BytesIO('{}\n'.format('\n'.join(rows)).encode('ascii')) - table_id = 'test_table' - dataset = self.temp_dataset(_make_dataset_id('nested_df')) + body = six.BytesIO("{}\n".format("\n".join(rows)).encode("ascii")) + table_id = "test_table" + dataset = self.temp_dataset(_make_dataset_id("nested_df")) table = dataset.table(table_id) self.to_delete.insert(0, table) job_config = bigquery.LoadJobConfig() @@ -1763,32 +1700,31 @@ def test_nested_table_to_dataframe(self): job_config.source_format = SourceFormat.NEWLINE_DELIMITED_JSON job_config.schema = schema # Load a table using a local JSON file from memory. - Config.CLIENT.load_table_from_file( - body, table, job_config=job_config).result() + Config.CLIENT.load_table_from_file(body, table, job_config=job_config).result() - df = Config.CLIENT.list_rows( - table, selected_fields=schema).to_dataframe() + df = Config.CLIENT.list_rows(table, selected_fields=schema).to_dataframe() self.assertIsInstance(df, pandas.DataFrame) self.assertEqual(len(df), 1) # verify the number of rows - exp_columns = ['string_col', 'record_col'] + exp_columns = ["string_col", "record_col"] self.assertEqual(list(df), exp_columns) # verify the column names row = df.iloc[0] # verify the row content - self.assertEqual(row['string_col'], 'Some value') - self.assertEqual(row['record_col'], record) + self.assertEqual(row["string_col"], "Some value") + self.assertEqual(row["record_col"], record) # verify that nested data can be accessed with indices/keys - self.assertEqual(row['record_col']['nested_repeated'][0], 0) + self.assertEqual(row["record_col"]["nested_repeated"][0], 0) self.assertEqual( - row['record_col']['nested_record']['nested_nested_string'], - 'some deep insight') + row["record_col"]["nested_record"]["nested_nested_string"], + "some deep insight", + ) def test_list_rows_empty_table(self): from google.cloud.bigquery.table import RowIterator - dataset_id = _make_dataset_id('empty_table') + dataset_id = _make_dataset_id("empty_table") dataset = self.temp_dataset(dataset_id) - table_ref = dataset.table('empty_table') + table_ref = dataset.table("empty_table") table = Config.CLIENT.create_table(bigquery.Table(table_ref)) # It's a bit silly to list rows for an empty table, but this does @@ -1806,13 +1742,13 @@ def test_list_rows_page_size(self): num_pages, num_last_page = divmod(num_items, page_size) SF = bigquery.SchemaField - schema = [SF('string_col', 'STRING', mode='NULLABLE')] - to_insert = [{'string_col': 'item%d' % i} for i in range(num_items)] + schema = [SF("string_col", "STRING", mode="NULLABLE")] + to_insert = [{"string_col": "item%d" % i} for i in range(num_items)] rows = [json.dumps(row) for row in to_insert] - body = six.BytesIO('{}\n'.format('\n'.join(rows)).encode('ascii')) + body = six.BytesIO("{}\n".format("\n".join(rows)).encode("ascii")) - table_id = 'test_table' - dataset = self.temp_dataset(_make_dataset_id('nested_df')) + table_id = "test_table" + dataset = self.temp_dataset(_make_dataset_id("nested_df")) table = dataset.table(table_id) self.to_delete.insert(0, table) job_config = bigquery.LoadJobConfig() @@ -1820,11 +1756,9 @@ def test_list_rows_page_size(self): job_config.source_format = SourceFormat.NEWLINE_DELIMITED_JSON job_config.schema = schema # Load a table using a local JSON file from memory. - Config.CLIENT.load_table_from_file( - body, table, job_config=job_config).result() + Config.CLIENT.load_table_from_file(body, table, job_config=job_config).result() - df = Config.CLIENT.list_rows( - table, selected_fields=schema, page_size=page_size) + df = Config.CLIENT.list_rows(table, selected_fields=schema, page_size=page_size) pages = df.pages for i in range(num_pages): @@ -1842,12 +1776,12 @@ def temp_dataset(self, dataset_id, location=None): return dataset -@pytest.mark.skipif(pandas is None, reason='Requires `pandas`') -@pytest.mark.skipif(IPython is None, reason='Requires `ipython`') -@pytest.mark.usefixtures('ipython_interactive') +@pytest.mark.skipif(pandas is None, reason="Requires `pandas`") +@pytest.mark.skipif(IPython is None, reason="Requires `ipython`") +@pytest.mark.usefixtures("ipython_interactive") def test_bigquery_magic(): ip = IPython.get_ipython() - ip.extension_manager.load_extension('google.cloud.bigquery') + ip.extension_manager.load_extension("google.cloud.bigquery") sql = """ SELECT CONCAT( @@ -1860,22 +1794,21 @@ def test_bigquery_magic(): LIMIT 10 """ with io.capture_output() as captured: - result = ip.run_cell_magic('bigquery', '', sql) + result = ip.run_cell_magic("bigquery", "", sql) - lines = re.split('\n|\r', captured.stdout) + lines = re.split("\n|\r", captured.stdout) # Removes blanks & terminal code (result of display clearing) - updates = list(filter(lambda x: bool(x) and x != '\x1b[2K', lines)) + updates = list(filter(lambda x: bool(x) and x != "\x1b[2K", lines)) assert re.match("Executing query with job ID: .*", updates[0]) - assert all(re.match("Query executing: .*s", line) - for line in updates[1:-1]) + assert all(re.match("Query executing: .*s", line) for line in updates[1:-1]) assert re.match("Query complete after .*s", updates[-1]) assert isinstance(result, pandas.DataFrame) - assert len(result) == 10 # verify row count - assert list(result) == ['url', 'view_count'] # verify column names + assert len(result) == 10 # verify row count + assert list(result) == ["url", "view_count"] # verify column names def _job_done(instance): - return instance.state.lower() == 'done' + return instance.state.lower() == "done" def _dataset_exists(ds): @@ -1895,7 +1828,7 @@ def _table_exists(t): return False -@pytest.fixture(scope='session') +@pytest.fixture(scope="session") def ipython(): config = tools.default_config() config.TerminalInteractiveShell.simple_prompt = True diff --git a/packages/google-cloud-bigquery/tests/unit/test__helpers.py b/packages/google-cloud-bigquery/tests/unit/test__helpers.py index d0a93ebd1340..c2c4f9f7f787 100644 --- a/packages/google-cloud-bigquery/tests/unit/test__helpers.py +++ b/packages/google-cloud-bigquery/tests/unit/test__helpers.py @@ -19,38 +19,36 @@ class Test_not_null(unittest.TestCase): - def _call_fut(self, value, field): from google.cloud.bigquery._helpers import _not_null return _not_null(value, field) def test_w_none_nullable(self): - self.assertFalse(self._call_fut(None, _Field('NULLABLE'))) + self.assertFalse(self._call_fut(None, _Field("NULLABLE"))) def test_w_none_required(self): - self.assertTrue(self._call_fut(None, _Field('REQUIRED'))) + self.assertTrue(self._call_fut(None, _Field("REQUIRED"))) def test_w_value(self): self.assertTrue(self._call_fut(object(), object())) class Test_int_from_json(unittest.TestCase): - def _call_fut(self, value, field): from google.cloud.bigquery._helpers import _int_from_json return _int_from_json(value, field) def test_w_none_nullable(self): - self.assertIsNone(self._call_fut(None, _Field('NULLABLE'))) + self.assertIsNone(self._call_fut(None, _Field("NULLABLE"))) def test_w_none_required(self): with self.assertRaises(TypeError): - self._call_fut(None, _Field('REQUIRED')) + self._call_fut(None, _Field("REQUIRED")) def test_w_string_value(self): - coerced = self._call_fut('42', object()) + coerced = self._call_fut("42", object()) self.assertEqual(coerced, 42) def test_w_float_value(self): @@ -59,21 +57,20 @@ def test_w_float_value(self): class Test_float_from_json(unittest.TestCase): - def _call_fut(self, value, field): from google.cloud.bigquery._helpers import _float_from_json return _float_from_json(value, field) def test_w_none_nullable(self): - self.assertIsNone(self._call_fut(None, _Field('NULLABLE'))) + self.assertIsNone(self._call_fut(None, _Field("NULLABLE"))) def test_w_none_required(self): with self.assertRaises(TypeError): - self._call_fut(None, _Field('REQUIRED')) + self._call_fut(None, _Field("REQUIRED")) def test_w_string_value(self): - coerced = self._call_fut('3.1415', object()) + coerced = self._call_fut("3.1415", object()) self.assertEqual(coerced, 3.1415) def test_w_float_value(self): @@ -82,22 +79,21 @@ def test_w_float_value(self): class Test_decimal_from_json(unittest.TestCase): - def _call_fut(self, value, field): from google.cloud.bigquery._helpers import _decimal_from_json return _decimal_from_json(value, field) def test_w_none_nullable(self): - self.assertIsNone(self._call_fut(None, _Field('NULLABLE'))) + self.assertIsNone(self._call_fut(None, _Field("NULLABLE"))) def test_w_none_required(self): with self.assertRaises(TypeError): - self._call_fut(None, _Field('REQUIRED')) + self._call_fut(None, _Field("REQUIRED")) def test_w_string_value(self): - coerced = self._call_fut('3.1415', object()) - self.assertEqual(coerced, decimal.Decimal('3.1415')) + coerced = self._call_fut("3.1415", object()) + self.assertEqual(coerced, decimal.Decimal("3.1415")) def test_w_float_value(self): coerced = self._call_fut(3.1415, object()) @@ -106,319 +102,295 @@ def test_w_float_value(self): class Test_bool_from_json(unittest.TestCase): - def _call_fut(self, value, field): from google.cloud.bigquery._helpers import _bool_from_json return _bool_from_json(value, field) def test_w_none_nullable(self): - self.assertIsNone(self._call_fut(None, _Field('NULLABLE'))) + self.assertIsNone(self._call_fut(None, _Field("NULLABLE"))) def test_w_none_required(self): with self.assertRaises(AttributeError): - self._call_fut(None, _Field('REQUIRED')) + self._call_fut(None, _Field("REQUIRED")) def test_w_value_t(self): - coerced = self._call_fut('T', object()) + coerced = self._call_fut("T", object()) self.assertTrue(coerced) def test_w_value_true(self): - coerced = self._call_fut('True', object()) + coerced = self._call_fut("True", object()) self.assertTrue(coerced) def test_w_value_1(self): - coerced = self._call_fut('1', object()) + coerced = self._call_fut("1", object()) self.assertTrue(coerced) def test_w_value_other(self): - coerced = self._call_fut('f', object()) + coerced = self._call_fut("f", object()) self.assertFalse(coerced) class Test_string_from_json(unittest.TestCase): - def _call_fut(self, value, field): from google.cloud.bigquery._helpers import _string_from_json return _string_from_json(value, field) def test_w_none_nullable(self): - self.assertIsNone(self._call_fut(None, _Field('NULLABLE'))) + self.assertIsNone(self._call_fut(None, _Field("NULLABLE"))) def test_w_none_required(self): - self.assertIsNone(self._call_fut(None, _Field('REQUIRED'))) + self.assertIsNone(self._call_fut(None, _Field("REQUIRED"))) def test_w_string_value(self): - coerced = self._call_fut('Wonderful!', object()) - self.assertEqual(coerced, 'Wonderful!') + coerced = self._call_fut("Wonderful!", object()) + self.assertEqual(coerced, "Wonderful!") class Test_bytes_from_json(unittest.TestCase): - def _call_fut(self, value, field): from google.cloud.bigquery._helpers import _bytes_from_json return _bytes_from_json(value, field) def test_w_none_nullable(self): - self.assertIsNone(self._call_fut(None, _Field('NULLABLE'))) + self.assertIsNone(self._call_fut(None, _Field("NULLABLE"))) def test_w_none_required(self): with self.assertRaises(TypeError): - self._call_fut(None, _Field('REQUIRED')) + self._call_fut(None, _Field("REQUIRED")) def test_w_base64_encoded_bytes(self): - expected = b'Wonderful!' + expected = b"Wonderful!" encoded = base64.standard_b64encode(expected) coerced = self._call_fut(encoded, object()) self.assertEqual(coerced, expected) def test_w_base64_encoded_text(self): - expected = b'Wonderful!' - encoded = base64.standard_b64encode(expected).decode('ascii') + expected = b"Wonderful!" + encoded = base64.standard_b64encode(expected).decode("ascii") coerced = self._call_fut(encoded, object()) self.assertEqual(coerced, expected) class Test_timestamp_from_json(unittest.TestCase): - def _call_fut(self, value, field): from google.cloud.bigquery._helpers import _timestamp_from_json return _timestamp_from_json(value, field) def test_w_none_nullable(self): - self.assertIsNone(self._call_fut(None, _Field('NULLABLE'))) + self.assertIsNone(self._call_fut(None, _Field("NULLABLE"))) def test_w_none_required(self): with self.assertRaises(TypeError): - self._call_fut(None, _Field('REQUIRED')) + self._call_fut(None, _Field("REQUIRED")) def test_w_string_value(self): from google.cloud._helpers import _EPOCH - coerced = self._call_fut('1.234567', object()) + coerced = self._call_fut("1.234567", object()) self.assertEqual( - coerced, - _EPOCH + datetime.timedelta(seconds=1, microseconds=234567)) + coerced, _EPOCH + datetime.timedelta(seconds=1, microseconds=234567) + ) def test_w_float_value(self): from google.cloud._helpers import _EPOCH coerced = self._call_fut(1.234567, object()) self.assertEqual( - coerced, - _EPOCH + datetime.timedelta(seconds=1, microseconds=234567)) + coerced, _EPOCH + datetime.timedelta(seconds=1, microseconds=234567) + ) class Test_timestamp_query_param_from_json(unittest.TestCase): - def _call_fut(self, value, field): from google.cloud.bigquery import _helpers return _helpers._timestamp_query_param_from_json(value, field) def test_w_none_nullable(self): - self.assertIsNone(self._call_fut(None, _Field('NULLABLE'))) + self.assertIsNone(self._call_fut(None, _Field("NULLABLE"))) def test_w_timestamp_valid(self): from google.cloud._helpers import UTC samples = [ ( - '2016-12-20 15:58:27.339328+00:00', - datetime.datetime(2016, 12, 20, 15, 58, 27, 339328, tzinfo=UTC) + "2016-12-20 15:58:27.339328+00:00", + datetime.datetime(2016, 12, 20, 15, 58, 27, 339328, tzinfo=UTC), ), ( - '2016-12-20 15:58:27+00:00', - datetime.datetime(2016, 12, 20, 15, 58, 27, tzinfo=UTC) + "2016-12-20 15:58:27+00:00", + datetime.datetime(2016, 12, 20, 15, 58, 27, tzinfo=UTC), ), ( - '2016-12-20T15:58:27.339328+00:00', - datetime.datetime(2016, 12, 20, 15, 58, 27, 339328, tzinfo=UTC) + "2016-12-20T15:58:27.339328+00:00", + datetime.datetime(2016, 12, 20, 15, 58, 27, 339328, tzinfo=UTC), ), ( - '2016-12-20T15:58:27+00:00', - datetime.datetime(2016, 12, 20, 15, 58, 27, tzinfo=UTC) + "2016-12-20T15:58:27+00:00", + datetime.datetime(2016, 12, 20, 15, 58, 27, tzinfo=UTC), ), ( - '2016-12-20 15:58:27.339328Z', - datetime.datetime(2016, 12, 20, 15, 58, 27, 339328, tzinfo=UTC) + "2016-12-20 15:58:27.339328Z", + datetime.datetime(2016, 12, 20, 15, 58, 27, 339328, tzinfo=UTC), ), ( - '2016-12-20 15:58:27Z', - datetime.datetime(2016, 12, 20, 15, 58, 27, tzinfo=UTC) + "2016-12-20 15:58:27Z", + datetime.datetime(2016, 12, 20, 15, 58, 27, tzinfo=UTC), ), ( - '2016-12-20T15:58:27.339328Z', - datetime.datetime(2016, 12, 20, 15, 58, 27, 339328, tzinfo=UTC) + "2016-12-20T15:58:27.339328Z", + datetime.datetime(2016, 12, 20, 15, 58, 27, 339328, tzinfo=UTC), ), ( - '2016-12-20T15:58:27Z', - datetime.datetime(2016, 12, 20, 15, 58, 27, tzinfo=UTC) + "2016-12-20T15:58:27Z", + datetime.datetime(2016, 12, 20, 15, 58, 27, tzinfo=UTC), ), ] for timestamp_str, expected_result in samples: self.assertEqual( - self._call_fut(timestamp_str, _Field('NULLABLE')), - expected_result) + self._call_fut(timestamp_str, _Field("NULLABLE")), expected_result + ) def test_w_timestamp_invalid(self): with self.assertRaises(ValueError): - self._call_fut('definitely-not-a-timestamp', _Field('NULLABLE')) + self._call_fut("definitely-not-a-timestamp", _Field("NULLABLE")) class Test_datetime_from_json(unittest.TestCase): - def _call_fut(self, value, field): from google.cloud.bigquery._helpers import _datetime_from_json return _datetime_from_json(value, field) def test_w_none_nullable(self): - self.assertIsNone(self._call_fut(None, _Field('NULLABLE'))) + self.assertIsNone(self._call_fut(None, _Field("NULLABLE"))) def test_w_none_required(self): with self.assertRaises(TypeError): - self._call_fut(None, _Field('REQUIRED')) + self._call_fut(None, _Field("REQUIRED")) def test_w_string_value(self): - coerced = self._call_fut('2016-12-02T18:51:33', object()) - self.assertEqual( - coerced, - datetime.datetime(2016, 12, 2, 18, 51, 33)) + coerced = self._call_fut("2016-12-02T18:51:33", object()) + self.assertEqual(coerced, datetime.datetime(2016, 12, 2, 18, 51, 33)) def test_w_microseconds(self): - coerced = self._call_fut('2015-05-22T10:11:12.987654', object()) - self.assertEqual( - coerced, - datetime.datetime(2015, 5, 22, 10, 11, 12, 987654)) + coerced = self._call_fut("2015-05-22T10:11:12.987654", object()) + self.assertEqual(coerced, datetime.datetime(2015, 5, 22, 10, 11, 12, 987654)) class Test_date_from_json(unittest.TestCase): - def _call_fut(self, value, field): from google.cloud.bigquery._helpers import _date_from_json return _date_from_json(value, field) def test_w_none_nullable(self): - self.assertIsNone(self._call_fut(None, _Field('NULLABLE'))) + self.assertIsNone(self._call_fut(None, _Field("NULLABLE"))) def test_w_none_required(self): with self.assertRaises(TypeError): - self._call_fut(None, _Field('REQUIRED')) + self._call_fut(None, _Field("REQUIRED")) def test_w_string_value(self): - coerced = self._call_fut('1987-09-22', object()) - self.assertEqual( - coerced, - datetime.date(1987, 9, 22)) + coerced = self._call_fut("1987-09-22", object()) + self.assertEqual(coerced, datetime.date(1987, 9, 22)) class Test_time_from_json(unittest.TestCase): - def _call_fut(self, value, field): from google.cloud.bigquery._helpers import _time_from_json return _time_from_json(value, field) def test_w_none_nullable(self): - self.assertIsNone(self._call_fut(None, _Field('NULLABLE'))) + self.assertIsNone(self._call_fut(None, _Field("NULLABLE"))) def test_w_none_required(self): with self.assertRaises(TypeError): - self._call_fut(None, _Field('REQUIRED')) + self._call_fut(None, _Field("REQUIRED")) def test_w_string_value(self): - coerced = self._call_fut('12:12:27', object()) - self.assertEqual( - coerced, - datetime.time(12, 12, 27)) + coerced = self._call_fut("12:12:27", object()) + self.assertEqual(coerced, datetime.time(12, 12, 27)) def test_w_subsecond_string_value(self): - coerced = self._call_fut('12:12:27.123456', object()) - self.assertEqual( - coerced, - datetime.time(12, 12, 27, 123456)) + coerced = self._call_fut("12:12:27.123456", object()) + self.assertEqual(coerced, datetime.time(12, 12, 27, 123456)) def test_w_bogus_string_value(self): with self.assertRaises(ValueError): - self._call_fut('12:12:27.123', object()) + self._call_fut("12:12:27.123", object()) class Test_record_from_json(unittest.TestCase): - def _call_fut(self, value, field): from google.cloud.bigquery._helpers import _record_from_json return _record_from_json(value, field) def test_w_none_nullable(self): - self.assertIsNone(self._call_fut(None, _Field('NULLABLE'))) + self.assertIsNone(self._call_fut(None, _Field("NULLABLE"))) def test_w_none_required(self): with self.assertRaises(TypeError): - self._call_fut(None, _Field('REQUIRED')) + self._call_fut(None, _Field("REQUIRED")) def test_w_nullable_subfield_none(self): - subfield = _Field('NULLABLE', 'age', 'INTEGER') - field = _Field('REQUIRED', fields=[subfield]) - value = {'f': [{'v': None}]} + subfield = _Field("NULLABLE", "age", "INTEGER") + field = _Field("REQUIRED", fields=[subfield]) + value = {"f": [{"v": None}]} coerced = self._call_fut(value, field) - self.assertEqual(coerced, {'age': None}) + self.assertEqual(coerced, {"age": None}) def test_w_scalar_subfield(self): - subfield = _Field('REQUIRED', 'age', 'INTEGER') - field = _Field('REQUIRED', fields=[subfield]) - value = {'f': [{'v': 42}]} + subfield = _Field("REQUIRED", "age", "INTEGER") + field = _Field("REQUIRED", fields=[subfield]) + value = {"f": [{"v": 42}]} coerced = self._call_fut(value, field) - self.assertEqual(coerced, {'age': 42}) + self.assertEqual(coerced, {"age": 42}) def test_w_scalar_subfield_geography(self): - subfield = _Field('REQUIRED', 'geo', 'GEOGRAPHY') - field = _Field('REQUIRED', fields=[subfield]) - value = {'f': [{'v': 'POINT(1, 2)'}]} + subfield = _Field("REQUIRED", "geo", "GEOGRAPHY") + field = _Field("REQUIRED", fields=[subfield]) + value = {"f": [{"v": "POINT(1, 2)"}]} coerced = self._call_fut(value, field) - self.assertEqual(coerced, {'geo': 'POINT(1, 2)'}) + self.assertEqual(coerced, {"geo": "POINT(1, 2)"}) def test_w_repeated_subfield(self): - subfield = _Field('REPEATED', 'color', 'STRING') - field = _Field('REQUIRED', fields=[subfield]) - value = {'f': [{'v': [{'v': 'red'}, {'v': 'yellow'}, {'v': 'blue'}]}]} + subfield = _Field("REPEATED", "color", "STRING") + field = _Field("REQUIRED", fields=[subfield]) + value = {"f": [{"v": [{"v": "red"}, {"v": "yellow"}, {"v": "blue"}]}]} coerced = self._call_fut(value, field) - self.assertEqual(coerced, {'color': ['red', 'yellow', 'blue']}) + self.assertEqual(coerced, {"color": ["red", "yellow", "blue"]}) def test_w_record_subfield(self): - full_name = _Field('REQUIRED', 'full_name', 'STRING') - area_code = _Field('REQUIRED', 'area_code', 'STRING') - local_number = _Field('REQUIRED', 'local_number', 'STRING') - rank = _Field('REQUIRED', 'rank', 'INTEGER') - phone = _Field('NULLABLE', 'phone', 'RECORD', - fields=[area_code, local_number, rank]) - person = _Field('REQUIRED', 'person', 'RECORD', - fields=[full_name, phone]) + full_name = _Field("REQUIRED", "full_name", "STRING") + area_code = _Field("REQUIRED", "area_code", "STRING") + local_number = _Field("REQUIRED", "local_number", "STRING") + rank = _Field("REQUIRED", "rank", "INTEGER") + phone = _Field( + "NULLABLE", "phone", "RECORD", fields=[area_code, local_number, rank] + ) + person = _Field("REQUIRED", "person", "RECORD", fields=[full_name, phone]) value = { - 'f': [ - {'v': 'Phred Phlyntstone'}, - {'v': {'f': [{'v': '800'}, {'v': '555-1212'}, {'v': 1}]}}, - ], + "f": [ + {"v": "Phred Phlyntstone"}, + {"v": {"f": [{"v": "800"}, {"v": "555-1212"}, {"v": 1}]}}, + ] } expected = { - 'full_name': 'Phred Phlyntstone', - 'phone': { - 'area_code': '800', - 'local_number': '555-1212', - 'rank': 1, - } + "full_name": "Phred Phlyntstone", + "phone": {"area_code": "800", "local_number": "555-1212", "rank": 1}, } coerced = self._call_fut(value, person) self.assertEqual(coerced, expected) class Test_field_to_index_mapping(unittest.TestCase): - def _call_fut(self, schema): from google.cloud.bigquery._helpers import _field_to_index_mapping @@ -429,17 +401,14 @@ def test_w_empty_schema(self): def test_w_non_empty_schema(self): schema = [ - _Field('REPEATED', 'first', 'INTEGER'), - _Field('REQUIRED', 'second', 'INTEGER'), - _Field('REPEATED', 'third', 'INTEGER'), + _Field("REPEATED", "first", "INTEGER"), + _Field("REQUIRED", "second", "INTEGER"), + _Field("REPEATED", "third", "INTEGER"), ] - self.assertEqual( - self._call_fut(schema), - {'first': 0, 'second': 1, 'third': 2}) + self.assertEqual(self._call_fut(schema), {"first": 0, "second": 1, "third": 2}) class Test_row_tuple_from_json(unittest.TestCase): - def _call_fut(self, row, schema): from google.cloud.bigquery._helpers import _row_tuple_from_json @@ -447,97 +416,116 @@ def _call_fut(self, row, schema): def test_w_single_scalar_column(self): # SELECT 1 AS col - col = _Field('REQUIRED', 'col', 'INTEGER') - row = {u'f': [{u'v': u'1'}]} + col = _Field("REQUIRED", "col", "INTEGER") + row = {u"f": [{u"v": u"1"}]} self.assertEqual(self._call_fut(row, schema=[col]), (1,)) def test_w_single_scalar_geography_column(self): # SELECT 1 AS col - col = _Field('REQUIRED', 'geo', 'GEOGRAPHY') - row = {u'f': [{u'v': u'POINT(1, 2)'}]} - self.assertEqual(self._call_fut(row, schema=[col]), ('POINT(1, 2)',)) + col = _Field("REQUIRED", "geo", "GEOGRAPHY") + row = {u"f": [{u"v": u"POINT(1, 2)"}]} + self.assertEqual(self._call_fut(row, schema=[col]), ("POINT(1, 2)",)) def test_w_single_struct_column(self): # SELECT (1, 2) AS col - sub_1 = _Field('REQUIRED', 'sub_1', 'INTEGER') - sub_2 = _Field('REQUIRED', 'sub_2', 'INTEGER') - col = _Field('REQUIRED', 'col', 'RECORD', fields=[sub_1, sub_2]) - row = {u'f': [{u'v': {u'f': [{u'v': u'1'}, {u'v': u'2'}]}}]} - self.assertEqual(self._call_fut(row, schema=[col]), - ({'sub_1': 1, 'sub_2': 2},)) + sub_1 = _Field("REQUIRED", "sub_1", "INTEGER") + sub_2 = _Field("REQUIRED", "sub_2", "INTEGER") + col = _Field("REQUIRED", "col", "RECORD", fields=[sub_1, sub_2]) + row = {u"f": [{u"v": {u"f": [{u"v": u"1"}, {u"v": u"2"}]}}]} + self.assertEqual(self._call_fut(row, schema=[col]), ({"sub_1": 1, "sub_2": 2},)) def test_w_single_array_column(self): # SELECT [1, 2, 3] as col - col = _Field('REPEATED', 'col', 'INTEGER') - row = {u'f': [{u'v': [{u'v': u'1'}, {u'v': u'2'}, {u'v': u'3'}]}]} - self.assertEqual(self._call_fut(row, schema=[col]), - ([1, 2, 3],)) + col = _Field("REPEATED", "col", "INTEGER") + row = {u"f": [{u"v": [{u"v": u"1"}, {u"v": u"2"}, {u"v": u"3"}]}]} + self.assertEqual(self._call_fut(row, schema=[col]), ([1, 2, 3],)) def test_w_struct_w_nested_array_column(self): # SELECT ([1, 2], 3, [4, 5]) as col - first = _Field('REPEATED', 'first', 'INTEGER') - second = _Field('REQUIRED', 'second', 'INTEGER') - third = _Field('REPEATED', 'third', 'INTEGER') - col = _Field('REQUIRED', 'col', 'RECORD', - fields=[first, second, third]) + first = _Field("REPEATED", "first", "INTEGER") + second = _Field("REQUIRED", "second", "INTEGER") + third = _Field("REPEATED", "third", "INTEGER") + col = _Field("REQUIRED", "col", "RECORD", fields=[first, second, third]) row = { - u'f': [ - {u'v': { - u'f': [ - {u'v': [{u'v': u'1'}, {u'v': u'2'}]}, - {u'v': u'3'}, - {u'v': [{u'v': u'4'}, {u'v': u'5'}]} - ] - }}, + u"f": [ + { + u"v": { + u"f": [ + {u"v": [{u"v": u"1"}, {u"v": u"2"}]}, + {u"v": u"3"}, + {u"v": [{u"v": u"4"}, {u"v": u"5"}]}, + ] + } + } ] } self.assertEqual( self._call_fut(row, schema=[col]), - ({u'first': [1, 2], u'second': 3, u'third': [4, 5]},)) + ({u"first": [1, 2], u"second": 3, u"third": [4, 5]},), + ) def test_w_array_of_struct(self): # SELECT [(1, 2, 3), (4, 5, 6)] as col - first = _Field('REQUIRED', 'first', 'INTEGER') - second = _Field('REQUIRED', 'second', 'INTEGER') - third = _Field('REQUIRED', 'third', 'INTEGER') - col = _Field('REPEATED', 'col', 'RECORD', - fields=[first, second, third]) - row = {u'f': [{u'v': [ - {u'v': {u'f': [{u'v': u'1'}, {u'v': u'2'}, {u'v': u'3'}]}}, - {u'v': {u'f': [{u'v': u'4'}, {u'v': u'5'}, {u'v': u'6'}]}}, - ]}]} + first = _Field("REQUIRED", "first", "INTEGER") + second = _Field("REQUIRED", "second", "INTEGER") + third = _Field("REQUIRED", "third", "INTEGER") + col = _Field("REPEATED", "col", "RECORD", fields=[first, second, third]) + row = { + u"f": [ + { + u"v": [ + {u"v": {u"f": [{u"v": u"1"}, {u"v": u"2"}, {u"v": u"3"}]}}, + {u"v": {u"f": [{u"v": u"4"}, {u"v": u"5"}, {u"v": u"6"}]}}, + ] + } + ] + } self.assertEqual( self._call_fut(row, schema=[col]), - ([ - {u'first': 1, u'second': 2, u'third': 3}, - {u'first': 4, u'second': 5, u'third': 6}, - ],)) + ( + [ + {u"first": 1, u"second": 2, u"third": 3}, + {u"first": 4, u"second": 5, u"third": 6}, + ], + ), + ) def test_w_array_of_struct_w_array(self): # SELECT [([1, 2, 3], 4), ([5, 6], 7)] - first = _Field('REPEATED', 'first', 'INTEGER') - second = _Field('REQUIRED', 'second', 'INTEGER') - col = _Field('REPEATED', 'col', 'RECORD', fields=[first, second]) - row = {u'f': [{u'v': [ - {u'v': {u'f': [ - {u'v': [{u'v': u'1'}, {u'v': u'2'}, {u'v': u'3'}]}, - {u'v': u'4'} - ]}}, - {u'v': {u'f': [ - {u'v': [{u'v': u'5'}, {u'v': u'6'}]}, - {u'v': u'7'} - ]}} - ]}]} + first = _Field("REPEATED", "first", "INTEGER") + second = _Field("REQUIRED", "second", "INTEGER") + col = _Field("REPEATED", "col", "RECORD", fields=[first, second]) + row = { + u"f": [ + { + u"v": [ + { + u"v": { + u"f": [ + {u"v": [{u"v": u"1"}, {u"v": u"2"}, {u"v": u"3"}]}, + {u"v": u"4"}, + ] + } + }, + { + u"v": { + u"f": [ + {u"v": [{u"v": u"5"}, {u"v": u"6"}]}, + {u"v": u"7"}, + ] + } + }, + ] + } + ] + } self.assertEqual( self._call_fut(row, schema=[col]), - ([ - {u'first': [1, 2, 3], u'second': 4}, - {u'first': [5, 6], u'second': 7}, - ],)) + ([{u"first": [1, 2, 3], u"second": 4}, {u"first": [5, 6], u"second": 7}],), + ) class Test_rows_from_json(unittest.TestCase): - def _call_fut(self, rows, schema): from google.cloud.bigquery._helpers import _rows_from_json @@ -546,46 +534,39 @@ def _call_fut(self, rows, schema): def test_w_record_subfield(self): from google.cloud.bigquery.table import Row - full_name = _Field('REQUIRED', 'full_name', 'STRING') - area_code = _Field('REQUIRED', 'area_code', 'STRING') - local_number = _Field('REQUIRED', 'local_number', 'STRING') - rank = _Field('REQUIRED', 'rank', 'INTEGER') - phone = _Field('NULLABLE', 'phone', 'RECORD', - fields=[area_code, local_number, rank]) - color = _Field('REPEATED', 'color', 'STRING') + full_name = _Field("REQUIRED", "full_name", "STRING") + area_code = _Field("REQUIRED", "area_code", "STRING") + local_number = _Field("REQUIRED", "local_number", "STRING") + rank = _Field("REQUIRED", "rank", "INTEGER") + phone = _Field( + "NULLABLE", "phone", "RECORD", fields=[area_code, local_number, rank] + ) + color = _Field("REPEATED", "color", "STRING") schema = [full_name, phone, color] rows = [ - {'f': [ - {'v': 'Phred Phlyntstone'}, - {'v': {'f': [{'v': '800'}, {'v': '555-1212'}, {'v': 1}]}}, - {'v': [{'v': 'orange'}, {'v': 'black'}]}, - ]}, - {'f': [ - {'v': 'Bharney Rhubble'}, - {'v': {'f': [{'v': '877'}, {'v': '768-5309'}, {'v': 2}]}}, - {'v': [{'v': 'brown'}]}, - ]}, - {'f': [ - {'v': 'Wylma Phlyntstone'}, - {'v': None}, - {'v': []}, - ]}, + { + "f": [ + {"v": "Phred Phlyntstone"}, + {"v": {"f": [{"v": "800"}, {"v": "555-1212"}, {"v": 1}]}}, + {"v": [{"v": "orange"}, {"v": "black"}]}, + ] + }, + { + "f": [ + {"v": "Bharney Rhubble"}, + {"v": {"f": [{"v": "877"}, {"v": "768-5309"}, {"v": 2}]}}, + {"v": [{"v": "brown"}]}, + ] + }, + {"f": [{"v": "Wylma Phlyntstone"}, {"v": None}, {"v": []}]}, ] - phred_phone = { - 'area_code': '800', - 'local_number': '555-1212', - 'rank': 1, - } - bharney_phone = { - 'area_code': '877', - 'local_number': '768-5309', - 'rank': 2, - } - f2i = {'full_name': 0, 'phone': 1, 'color': 2} + phred_phone = {"area_code": "800", "local_number": "555-1212", "rank": 1} + bharney_phone = {"area_code": "877", "local_number": "768-5309", "rank": 2} + f2i = {"full_name": 0, "phone": 1, "color": 2} expected = [ - Row(('Phred Phlyntstone', phred_phone, ['orange', 'black']), f2i), - Row(('Bharney Rhubble', bharney_phone, ['brown']), f2i), - Row(('Wylma Phlyntstone', None, []), f2i), + Row(("Phred Phlyntstone", phred_phone, ["orange", "black"]), f2i), + Row(("Bharney Rhubble", bharney_phone, ["brown"]), f2i), + Row(("Wylma Phlyntstone", None, []), f2i), ] coerced = self._call_fut(rows, schema) self.assertEqual(coerced, expected) @@ -594,57 +575,47 @@ def test_w_int64_float64_bool(self): from google.cloud.bigquery.table import Row # "Standard" SQL dialect uses 'INT64', 'FLOAT64', 'BOOL'. - candidate = _Field('REQUIRED', 'candidate', 'STRING') - votes = _Field('REQUIRED', 'votes', 'INT64') - percentage = _Field('REQUIRED', 'percentage', 'FLOAT64') - incumbent = _Field('REQUIRED', 'incumbent', 'BOOL') + candidate = _Field("REQUIRED", "candidate", "STRING") + votes = _Field("REQUIRED", "votes", "INT64") + percentage = _Field("REQUIRED", "percentage", "FLOAT64") + incumbent = _Field("REQUIRED", "incumbent", "BOOL") schema = [candidate, votes, percentage, incumbent] rows = [ - {'f': [ - {'v': 'Phred Phlyntstone'}, - {'v': 8}, - {'v': 0.25}, - {'v': 'true'}, - ]}, - {'f': [ - {'v': 'Bharney Rhubble'}, - {'v': 4}, - {'v': 0.125}, - {'v': 'false'}, - ]}, - {'f': [ - {'v': 'Wylma Phlyntstone'}, - {'v': 20}, - {'v': 0.625}, - {'v': 'false'}, - ]}, + {"f": [{"v": "Phred Phlyntstone"}, {"v": 8}, {"v": 0.25}, {"v": "true"}]}, + {"f": [{"v": "Bharney Rhubble"}, {"v": 4}, {"v": 0.125}, {"v": "false"}]}, + { + "f": [ + {"v": "Wylma Phlyntstone"}, + {"v": 20}, + {"v": 0.625}, + {"v": "false"}, + ] + }, ] - f2i = {'candidate': 0, 'votes': 1, 'percentage': 2, 'incumbent': 3} + f2i = {"candidate": 0, "votes": 1, "percentage": 2, "incumbent": 3} expected = [ - Row(('Phred Phlyntstone', 8, 0.25, True), f2i), - Row(('Bharney Rhubble', 4, 0.125, False), f2i), - Row(('Wylma Phlyntstone', 20, 0.625, False), f2i), + Row(("Phred Phlyntstone", 8, 0.25, True), f2i), + Row(("Bharney Rhubble", 4, 0.125, False), f2i), + Row(("Wylma Phlyntstone", 20, 0.625, False), f2i), ] coerced = self._call_fut(rows, schema) self.assertEqual(coerced, expected) class Test_int_to_json(unittest.TestCase): - def _call_fut(self, value): from google.cloud.bigquery._helpers import _int_to_json return _int_to_json(value) def test_w_int(self): - self.assertEqual(self._call_fut(123), '123') + self.assertEqual(self._call_fut(123), "123") def test_w_string(self): - self.assertEqual(self._call_fut('123'), '123') + self.assertEqual(self._call_fut("123"), "123") class Test_float_to_json(unittest.TestCase): - def _call_fut(self, value): from google.cloud.bigquery._helpers import _float_to_json @@ -655,7 +626,6 @@ def test_w_float(self): class Test_decimal_to_json(unittest.TestCase): - def _call_fut(self, value): from google.cloud.bigquery._helpers import _decimal_to_json @@ -665,31 +635,29 @@ def test_w_float(self): self.assertEqual(self._call_fut(1.23), 1.23) def test_w_string(self): - self.assertEqual(self._call_fut('1.23'), '1.23') + self.assertEqual(self._call_fut("1.23"), "1.23") def test_w_decimal(self): - self.assertEqual(self._call_fut(decimal.Decimal('1.23')), '1.23') + self.assertEqual(self._call_fut(decimal.Decimal("1.23")), "1.23") class Test_bool_to_json(unittest.TestCase): - def _call_fut(self, value): from google.cloud.bigquery._helpers import _bool_to_json return _bool_to_json(value) def test_w_true(self): - self.assertEqual(self._call_fut(True), 'true') + self.assertEqual(self._call_fut(True), "true") def test_w_false(self): - self.assertEqual(self._call_fut(False), 'false') + self.assertEqual(self._call_fut(False), "false") def test_w_string(self): - self.assertEqual(self._call_fut('false'), 'false') + self.assertEqual(self._call_fut("false"), "false") class Test_bytes_to_json(unittest.TestCase): - def _call_fut(self, value): from google.cloud.bigquery._helpers import _bytes_to_json @@ -700,14 +668,13 @@ def test_w_non_bytes(self): self.assertIs(self._call_fut(non_bytes), non_bytes) def test_w_bytes(self): - source = b'source' - expected = u'c291cmNl' + source = b"source" + expected = u"c291cmNl" converted = self._call_fut(source) self.assertEqual(converted, expected) class Test_timestamp_to_json_parameter(unittest.TestCase): - def _call_fut(self, value): from google.cloud.bigquery._helpers import _timestamp_to_json_parameter @@ -717,35 +684,32 @@ def test_w_float(self): self.assertEqual(self._call_fut(1.234567), 1.234567) def test_w_string(self): - ZULU = '2016-12-20 15:58:27.339328+00:00' + ZULU = "2016-12-20 15:58:27.339328+00:00" self.assertEqual(self._call_fut(ZULU), ZULU) def test_w_datetime_wo_zone(self): - ZULU = '2016-12-20 15:58:27.339328+00:00' + ZULU = "2016-12-20 15:58:27.339328+00:00" when = datetime.datetime(2016, 12, 20, 15, 58, 27, 339328) self.assertEqual(self._call_fut(when), ZULU) def test_w_datetime_w_non_utc_zone(self): class _Zone(datetime.tzinfo): - def utcoffset(self, _): return datetime.timedelta(minutes=-240) - ZULU = '2016-12-20 19:58:27.339328+00:00' - when = datetime.datetime( - 2016, 12, 20, 15, 58, 27, 339328, tzinfo=_Zone()) + ZULU = "2016-12-20 19:58:27.339328+00:00" + when = datetime.datetime(2016, 12, 20, 15, 58, 27, 339328, tzinfo=_Zone()) self.assertEqual(self._call_fut(when), ZULU) def test_w_datetime_w_utc_zone(self): from google.cloud._helpers import UTC - ZULU = '2016-12-20 15:58:27.339328+00:00' + ZULU = "2016-12-20 15:58:27.339328+00:00" when = datetime.datetime(2016, 12, 20, 15, 58, 27, 339328, tzinfo=UTC) self.assertEqual(self._call_fut(when), ZULU) class Test_timestamp_to_json_row(unittest.TestCase): - def _call_fut(self, value): from google.cloud.bigquery._helpers import _timestamp_to_json_row @@ -755,108 +719,99 @@ def test_w_float(self): self.assertEqual(self._call_fut(1.234567), 1.234567) def test_w_string(self): - ZULU = '2016-12-20 15:58:27.339328+00:00' + ZULU = "2016-12-20 15:58:27.339328+00:00" self.assertEqual(self._call_fut(ZULU), ZULU) def test_w_datetime(self): from google.cloud._helpers import _microseconds_from_datetime when = datetime.datetime(2016, 12, 20, 15, 58, 27, 339328) - self.assertEqual( - self._call_fut(when), _microseconds_from_datetime(when) / 1e6) + self.assertEqual(self._call_fut(when), _microseconds_from_datetime(when) / 1e6) class Test_datetime_to_json(unittest.TestCase): - def _call_fut(self, value): from google.cloud.bigquery._helpers import _datetime_to_json return _datetime_to_json(value) def test_w_string(self): - RFC3339 = '2016-12-03T14:14:51Z' + RFC3339 = "2016-12-03T14:14:51Z" self.assertEqual(self._call_fut(RFC3339), RFC3339) def test_w_datetime(self): from google.cloud._helpers import UTC when = datetime.datetime(2016, 12, 3, 14, 11, 27, 123456, tzinfo=UTC) - self.assertEqual(self._call_fut(when), '2016-12-03T14:11:27.123456') + self.assertEqual(self._call_fut(when), "2016-12-03T14:11:27.123456") class Test_date_to_json(unittest.TestCase): - def _call_fut(self, value): from google.cloud.bigquery._helpers import _date_to_json return _date_to_json(value) def test_w_string(self): - RFC3339 = '2016-12-03' + RFC3339 = "2016-12-03" self.assertEqual(self._call_fut(RFC3339), RFC3339) def test_w_datetime(self): when = datetime.date(2016, 12, 3) - self.assertEqual(self._call_fut(when), '2016-12-03') + self.assertEqual(self._call_fut(when), "2016-12-03") class Test_time_to_json(unittest.TestCase): - def _call_fut(self, value): from google.cloud.bigquery._helpers import _time_to_json return _time_to_json(value) def test_w_string(self): - RFC3339 = '12:13:41' + RFC3339 = "12:13:41" self.assertEqual(self._call_fut(RFC3339), RFC3339) def test_w_datetime(self): when = datetime.time(12, 13, 41) - self.assertEqual(self._call_fut(when), '12:13:41') + self.assertEqual(self._call_fut(when), "12:13:41") class Test_snake_to_camel_case(unittest.TestCase): - def _call_fut(self, value): from google.cloud.bigquery._helpers import _snake_to_camel_case return _snake_to_camel_case(value) def test_w_snake_case_string(self): - self.assertEqual(self._call_fut('friendly_name'), 'friendlyName') + self.assertEqual(self._call_fut("friendly_name"), "friendlyName") def test_w_camel_case_string(self): - self.assertEqual(self._call_fut('friendlyName'), 'friendlyName') + self.assertEqual(self._call_fut("friendlyName"), "friendlyName") class Test__get_sub_prop(unittest.TestCase): - def _call_fut(self, container, keys, **kw): from google.cloud.bigquery._helpers import _get_sub_prop return _get_sub_prop(container, keys, **kw) def test_w_empty_container_default_default(self): - self.assertIsNone(self._call_fut({}, ['key1'])) + self.assertIsNone(self._call_fut({}, ["key1"])) def test_w_missing_key_explicit_default(self): - self.assertEqual(self._call_fut({'key2': 2}, ['key1'], default=1), 1) + self.assertEqual(self._call_fut({"key2": 2}, ["key1"], default=1), 1) def test_w_matching_single_key(self): - self.assertEqual(self._call_fut({'key1': 1}, ['key1']), 1) + self.assertEqual(self._call_fut({"key1": 1}, ["key1"]), 1) def test_w_matching_first_key_missing_second_key(self): - self.assertIsNone( - self._call_fut({'key1': {'key3': 3}}, ['key1', 'key2'])) + self.assertIsNone(self._call_fut({"key1": {"key3": 3}}, ["key1", "key2"])) def test_w_matching_first_key_matching_second_key(self): - self.assertEqual( - self._call_fut({'key1': {'key2': 2}}, ['key1', 'key2']), 2) + self.assertEqual(self._call_fut({"key1": {"key2": 2}}, ["key1", "key2"]), 2) class Test__set_sub_prop(unittest.TestCase): - def _call_fut(self, container, keys, value): from google.cloud.bigquery._helpers import _set_sub_prop @@ -864,57 +819,55 @@ def _call_fut(self, container, keys, value): def test_w_empty_container_single_key(self): container = {} - self._call_fut(container, ['key1'], 'value') - self.assertEqual(container, {'key1': 'value'}) + self._call_fut(container, ["key1"], "value") + self.assertEqual(container, {"key1": "value"}) def test_w_empty_container_nested_keys(self): container = {} - self._call_fut(container, ['key1', 'key2', 'key3'], 'value') - self.assertEqual(container, {'key1': {'key2': {'key3': 'value'}}}) + self._call_fut(container, ["key1", "key2", "key3"], "value") + self.assertEqual(container, {"key1": {"key2": {"key3": "value"}}}) def test_w_existing_value(self): - container = {'key1': 'before'} - self._call_fut(container, ['key1'], 'after') - self.assertEqual(container, {'key1': 'after'}) + container = {"key1": "before"} + self._call_fut(container, ["key1"], "after") + self.assertEqual(container, {"key1": "after"}) def test_w_nested_keys_existing_value(self): - container = {'key1': {'key2': {'key3': 'before'}}} - self._call_fut(container, ['key1', 'key2', 'key3'], 'after') - self.assertEqual(container, {'key1': {'key2': {'key3': 'after'}}}) + container = {"key1": {"key2": {"key3": "before"}}} + self._call_fut(container, ["key1", "key2", "key3"], "after") + self.assertEqual(container, {"key1": {"key2": {"key3": "after"}}}) class Test__del_sub_prop(unittest.TestCase): - def _call_fut(self, container, keys): from google.cloud.bigquery._helpers import _del_sub_prop return _del_sub_prop(container, keys) def test_w_single_key(self): - container = {'key1': 'value'} - self._call_fut(container, ['key1']) + container = {"key1": "value"} + self._call_fut(container, ["key1"]) self.assertEqual(container, {}) def test_w_empty_container_nested_keys(self): container = {} - self._call_fut(container, ['key1', 'key2', 'key3']) - self.assertEqual(container, {'key1': {'key2': {}}}) + self._call_fut(container, ["key1", "key2", "key3"]) + self.assertEqual(container, {"key1": {"key2": {}}}) def test_w_existing_value_nested_keys(self): - container = {'key1': {'key2': {'key3': 'value'}}} - self._call_fut(container, ['key1', 'key2', 'key3']) - self.assertEqual(container, {'key1': {'key2': {}}}) + container = {"key1": {"key2": {"key3": "value"}}} + self._call_fut(container, ["key1", "key2", "key3"]) + self.assertEqual(container, {"key1": {"key2": {}}}) class Test__int_or_none(unittest.TestCase): - def _call_fut(self, value): from google.cloud.bigquery._helpers import _int_or_none return _int_or_none(value) def test_w_num_string(self): - self.assertEqual(self._call_fut('123'), 123) + self.assertEqual(self._call_fut("123"), 123) def test_w_none(self): self.assertIsNone(self._call_fut(None)) @@ -924,29 +877,27 @@ def test_w_int(self): def test_w_non_num_string(self): with self.assertRaises(ValueError): - self._call_fut('ham') + self._call_fut("ham") class Test__str_or_none(unittest.TestCase): - def _call_fut(self, value): from google.cloud.bigquery._helpers import _str_or_none return _str_or_none(value) def test_w_int(self): - self.assertEqual(self._call_fut(123), '123') + self.assertEqual(self._call_fut(123), "123") def test_w_none(self): self.assertIsNone(self._call_fut(None)) def test_w_str(self): - self.assertEqual(self._call_fut('ham'), 'ham') + self.assertEqual(self._call_fut("ham"), "ham") class _Field(object): - - def __init__(self, mode, name='unknown', field_type='UNKNOWN', fields=()): + def __init__(self, mode, name="unknown", field_type="UNKNOWN", fields=()): self.mode = mode self.name = name self.field_type = field_type diff --git a/packages/google-cloud-bigquery/tests/unit/test__http.py b/packages/google-cloud-bigquery/tests/unit/test__http.py index c1cd48ffdca8..890046ee05fa 100644 --- a/packages/google-cloud-bigquery/tests/unit/test__http.py +++ b/packages/google-cloud-bigquery/tests/unit/test__http.py @@ -19,7 +19,6 @@ class TestConnection(unittest.TestCase): - @staticmethod def _get_target_class(): from google.cloud.bigquery._http import Connection @@ -31,26 +30,20 @@ def _make_one(self, *args, **kw): def test_build_api_url_no_extra_query_params(self): conn = self._make_one(object()) - URI = '/'.join([ - conn.API_BASE_URL, - 'bigquery', - conn.API_VERSION, - 'foo', - ]) - self.assertEqual(conn.build_api_url('/foo'), URI) + URI = "/".join([conn.API_BASE_URL, "bigquery", conn.API_VERSION, "foo"]) + self.assertEqual(conn.build_api_url("/foo"), URI) def test_build_api_url_w_extra_query_params(self): from six.moves.urllib.parse import parse_qsl from six.moves.urllib.parse import urlsplit conn = self._make_one(object()) - uri = conn.build_api_url('/foo', {'bar': 'baz'}) + uri = conn.build_api_url("/foo", {"bar": "baz"}) scheme, netloc, path, qs, _ = urlsplit(uri) - self.assertEqual('%s://%s' % (scheme, netloc), conn.API_BASE_URL) - self.assertEqual(path, - '/'.join(['', 'bigquery', conn.API_VERSION, 'foo'])) + self.assertEqual("%s://%s" % (scheme, netloc), conn.API_BASE_URL) + self.assertEqual(path, "/".join(["", "bigquery", conn.API_VERSION, "foo"])) parms = dict(parse_qsl(qs)) - self.assertEqual(parms['bar'], 'baz') + self.assertEqual(parms["bar"], "baz") def test_extra_headers(self): from google.cloud import _http as base_http @@ -59,26 +52,22 @@ def test_extra_headers(self): http = mock.create_autospec(requests.Session, instance=True) response = requests.Response() response.status_code = 200 - data = b'brent-spiner' + data = b"brent-spiner" response._content = data http.request.return_value = response - client = mock.Mock(_http=http, spec=['_http']) + client = mock.Mock(_http=http, spec=["_http"]) conn = self._make_one(client) - req_data = 'req-data-boring' - result = conn.api_request( - 'GET', '/rainbow', data=req_data, expect_json=False) + req_data = "req-data-boring" + result = conn.api_request("GET", "/rainbow", data=req_data, expect_json=False) self.assertEqual(result, data) expected_headers = { - 'Accept-Encoding': 'gzip', + "Accept-Encoding": "gzip", base_http.CLIENT_INFO_HEADER: MUT._CLIENT_INFO, - 'User-Agent': conn.USER_AGENT, + "User-Agent": conn.USER_AGENT, } - expected_uri = conn.build_api_url('/rainbow') + expected_uri = conn.build_api_url("/rainbow") http.request.assert_called_once_with( - data=req_data, - headers=expected_headers, - method='GET', - url=expected_uri, + data=req_data, headers=expected_headers, method="GET", url=expected_uri ) diff --git a/packages/google-cloud-bigquery/tests/unit/test_client.py b/packages/google-cloud-bigquery/tests/unit/test_client.py index 69535f62a00e..0fc14b160a9c 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_client.py +++ b/packages/google-cloud-bigquery/tests/unit/test_client.py @@ -25,6 +25,7 @@ import six from six.moves import http_client import pytest + try: import pandas except (ImportError, AttributeError): # pragma: NO COVER @@ -48,38 +49,39 @@ def _make_connection(*responses): from google.cloud.exceptions import NotFound mock_conn = mock.create_autospec(google.cloud.bigquery._http.Connection) - mock_conn.USER_AGENT = 'testing 1.2.3' - mock_conn.api_request.side_effect = list(responses) + [NotFound('miss')] + mock_conn.USER_AGENT = "testing 1.2.3" + mock_conn.api_request.side_effect = list(responses) + [NotFound("miss")] return mock_conn def _make_list_partitons_meta_info(project, dataset_id, table_id, num_rows=0): return { - 'tableReference': - { - 'projectId': project, - 'datasetId': dataset_id, - 'tableId': '{}$__PARTITIONS_SUMMARY__'.format(table_id), - }, - 'schema': {'fields': [ - {'name': 'project_id', 'type': 'STRING', 'mode': 'NULLABLE'}, - {'name': 'dataset_id', 'type': 'STRING', 'mode': 'NULLABLE'}, - {'name': 'table_id', 'type': 'STRING', 'mode': 'NULLABLE'}, - {'name': 'partition_id', 'type': 'STRING', 'mode': 'NULLABLE'} - ]}, - 'etag': 'ETAG', - 'numRows': num_rows, + "tableReference": { + "projectId": project, + "datasetId": dataset_id, + "tableId": "{}$__PARTITIONS_SUMMARY__".format(table_id), + }, + "schema": { + "fields": [ + {"name": "project_id", "type": "STRING", "mode": "NULLABLE"}, + {"name": "dataset_id", "type": "STRING", "mode": "NULLABLE"}, + {"name": "table_id", "type": "STRING", "mode": "NULLABLE"}, + {"name": "partition_id", "type": "STRING", "mode": "NULLABLE"}, + ] + }, + "etag": "ETAG", + "numRows": num_rows, } class TestClient(unittest.TestCase): - PROJECT = 'PROJECT' - DS_ID = 'DATASET_ID' - TABLE_ID = 'TABLE_ID' + PROJECT = "PROJECT" + DS_ID = "DATASET_ID" + TABLE_ID = "TABLE_ID" TABLE_REF = DatasetReference(PROJECT, DS_ID).table(TABLE_ID) - KMS_KEY_NAME = 'projects/1/locations/global/keyRings/1/cryptoKeys/1' - LOCATION = 'us-central' + KMS_KEY_NAME = "projects/1/locations/global/keyRings/1/cryptoKeys/1" + LOCATION = "us-central" @staticmethod def _get_target_class(): @@ -95,8 +97,7 @@ def test_ctor_defaults(self): creds = _make_credentials() http = object() - client = self._make_one(project=self.PROJECT, credentials=creds, - _http=http) + client = self._make_one(project=self.PROJECT, credentials=creds, _http=http) self.assertIsInstance(client._connection, Connection) self.assertIs(client._connection.credentials, creds) self.assertIs(client._connection.http, http) @@ -107,9 +108,10 @@ def test_ctor_w_location(self): creds = _make_credentials() http = object() - location = 'us-central' - client = self._make_one(project=self.PROJECT, credentials=creds, - _http=http, location=location) + location = "us-central" + client = self._make_one( + project=self.PROJECT, credentials=creds, _http=http, location=location + ) self.assertIsInstance(client._connection, Connection) self.assertIs(client._connection.credentials, creds) self.assertIs(client._connection.http, http) @@ -121,13 +123,17 @@ def test_ctor_w_query_job_config(self): creds = _make_credentials() http = object() - location = 'us-central' + location = "us-central" job_config = QueryJobConfig() job_config.dry_run = True - client = self._make_one(project=self.PROJECT, credentials=creds, - _http=http, location=location, - default_query_job_config=job_config) + client = self._make_one( + project=self.PROJECT, + credentials=creds, + _http=http, + location=location, + default_query_job_config=job_config, + ) self.assertIsInstance(client._connection, Connection) self.assertIs(client._connection.credentials, creds) self.assertIs(client._connection.http, http) @@ -145,16 +151,17 @@ def test__get_query_results_miss_w_explicit_project_and_timeout(self): with self.assertRaises(NotFound): client._get_query_results( - 'nothere', None, - project='other-project', + "nothere", + None, + project="other-project", location=self.LOCATION, - timeout_ms=500) + timeout_ms=500, + ) conn.api_request.assert_called_once_with( - method='GET', - path='/projects/other-project/queries/nothere', - query_params={ - 'maxResults': 0, 'timeoutMs': 500, 'location': self.LOCATION}, + method="GET", + path="/projects/other-project/queries/nothere", + query_params={"maxResults": 0, "timeoutMs": 500, "location": self.LOCATION}, ) def test__get_query_results_miss_w_client_location(self): @@ -165,40 +172,30 @@ def test__get_query_results_miss_w_client_location(self): conn = client._connection = _make_connection() with self.assertRaises(NotFound): - client._get_query_results('nothere', None) + client._get_query_results("nothere", None) conn.api_request.assert_called_once_with( - method='GET', - path='/projects/PROJECT/queries/nothere', - query_params={'maxResults': 0, 'location': self.LOCATION}) + method="GET", + path="/projects/PROJECT/queries/nothere", + query_params={"maxResults": 0, "location": self.LOCATION}, + ) def test__get_query_results_hit(self): - job_id = 'query_job' + job_id = "query_job" data = { - 'kind': 'bigquery#getQueryResultsResponse', - 'etag': 'some-tag', - 'schema': { - 'fields': [ - { - 'name': 'title', - 'type': 'STRING', - 'mode': 'NULLABLE' - }, - { - 'name': 'unique_words', - 'type': 'INTEGER', - 'mode': 'NULLABLE' - } + "kind": "bigquery#getQueryResultsResponse", + "etag": "some-tag", + "schema": { + "fields": [ + {"name": "title", "type": "STRING", "mode": "NULLABLE"}, + {"name": "unique_words", "type": "INTEGER", "mode": "NULLABLE"}, ] }, - 'jobReference': { - 'projectId': self.PROJECT, - 'jobId': job_id, - }, - 'totalRows': '10', - 'totalBytesProcessed': '2464625', - 'jobComplete': True, - 'cacheHit': False, + "jobReference": {"projectId": self.PROJECT, "jobId": job_id}, + "totalRows": "10", + "totalBytesProcessed": "2464625", + "jobComplete": True, + "cacheHit": False, } creds = _make_credentials() @@ -210,63 +207,58 @@ def test__get_query_results_hit(self): self.assertTrue(query_results.complete) def test_get_service_account_email(self): - path = '/projects/%s/serviceAccount' % (self.PROJECT,) + path = "/projects/%s/serviceAccount" % (self.PROJECT,) creds = _make_credentials() http = object() - client = self._make_one(project=self.PROJECT, credentials=creds, - _http=http) - email = 'bq-123@bigquery-encryption.iam.gserviceaccount.com' - resource = { - 'kind': 'bigquery#getServiceAccountResponse', - 'email': email, - } + client = self._make_one(project=self.PROJECT, credentials=creds, _http=http) + email = "bq-123@bigquery-encryption.iam.gserviceaccount.com" + resource = {"kind": "bigquery#getServiceAccountResponse", "email": email} conn = client._connection = _make_connection(resource) service_account_email = client.get_service_account_email() - conn.api_request.assert_called_once_with(method='GET', path=path) + conn.api_request.assert_called_once_with(method="GET", path=path) self.assertEqual(service_account_email, email) def test_get_service_account_email_w_alternate_project(self): - project = 'my-alternate-project' - path = '/projects/%s/serviceAccount' % (project,) + project = "my-alternate-project" + path = "/projects/%s/serviceAccount" % (project,) creds = _make_credentials() http = object() - client = self._make_one(project=self.PROJECT, credentials=creds, - _http=http) - email = 'bq-123@bigquery-encryption.iam.gserviceaccount.com' - resource = { - 'kind': 'bigquery#getServiceAccountResponse', - 'email': email, - } + client = self._make_one(project=self.PROJECT, credentials=creds, _http=http) + email = "bq-123@bigquery-encryption.iam.gserviceaccount.com" + resource = {"kind": "bigquery#getServiceAccountResponse", "email": email} conn = client._connection = _make_connection(resource) - service_account_email = client.get_service_account_email( - project=project) + service_account_email = client.get_service_account_email(project=project) - conn.api_request.assert_called_once_with(method='GET', path=path) + conn.api_request.assert_called_once_with(method="GET", path=path) self.assertEqual(service_account_email, email) def test_list_projects_defaults(self): from google.cloud.bigquery.client import Project - PROJECT_1 = 'PROJECT_ONE' - PROJECT_2 = 'PROJECT_TWO' - TOKEN = 'TOKEN' + PROJECT_1 = "PROJECT_ONE" + PROJECT_2 = "PROJECT_TWO" + TOKEN = "TOKEN" DATA = { - 'nextPageToken': TOKEN, - 'projects': [ - {'kind': 'bigquery#project', - 'id': PROJECT_1, - 'numericId': 1, - 'projectReference': {'projectId': PROJECT_1}, - 'friendlyName': 'One'}, - {'kind': 'bigquery#project', - 'id': PROJECT_2, - 'numericId': 2, - 'projectReference': {'projectId': PROJECT_2}, - 'friendlyName': 'Two'}, - ] + "nextPageToken": TOKEN, + "projects": [ + { + "kind": "bigquery#project", + "id": PROJECT_1, + "numericId": 1, + "projectReference": {"projectId": PROJECT_1}, + "friendlyName": "One", + }, + { + "kind": "bigquery#project", + "id": PROJECT_2, + "numericId": 2, + "projectReference": {"projectId": PROJECT_2}, + "friendlyName": "Two", + }, + ], } creds = _make_credentials() client = self._make_one(PROJECT_1, creds) @@ -277,19 +269,20 @@ def test_list_projects_defaults(self): projects = list(page) token = iterator.next_page_token - self.assertEqual(len(projects), len(DATA['projects'])) - for found, expected in zip(projects, DATA['projects']): + self.assertEqual(len(projects), len(DATA["projects"])) + for found, expected in zip(projects, DATA["projects"]): self.assertIsInstance(found, Project) - self.assertEqual(found.project_id, expected['id']) - self.assertEqual(found.numeric_id, expected['numericId']) - self.assertEqual(found.friendly_name, expected['friendlyName']) + self.assertEqual(found.project_id, expected["id"]) + self.assertEqual(found.numeric_id, expected["numericId"]) + self.assertEqual(found.friendly_name, expected["friendlyName"]) self.assertEqual(token, TOKEN) conn.api_request.assert_called_once_with( - method='GET', path='/projects', query_params={}) + method="GET", path="/projects", query_params={} + ) def test_list_projects_explicit_response_missing_projects_key(self): - TOKEN = 'TOKEN' + TOKEN = "TOKEN" DATA = {} creds = _make_credentials() client = self._make_one(self.PROJECT, creds) @@ -304,31 +297,40 @@ def test_list_projects_explicit_response_missing_projects_key(self): self.assertIsNone(token) conn.api_request.assert_called_once_with( - method='GET', - path='/projects', - query_params={'maxResults': 3, 'pageToken': TOKEN}) + method="GET", + path="/projects", + query_params={"maxResults": 3, "pageToken": TOKEN}, + ) def test_list_datasets_defaults(self): from google.cloud.bigquery.dataset import DatasetListItem - DATASET_1 = 'dataset_one' - DATASET_2 = 'dataset_two' - PATH = 'projects/%s/datasets' % self.PROJECT - TOKEN = 'TOKEN' + DATASET_1 = "dataset_one" + DATASET_2 = "dataset_two" + PATH = "projects/%s/datasets" % self.PROJECT + TOKEN = "TOKEN" DATA = { - 'nextPageToken': TOKEN, - 'datasets': [ - {'kind': 'bigquery#dataset', - 'id': '%s:%s' % (self.PROJECT, DATASET_1), - 'datasetReference': {'datasetId': DATASET_1, - 'projectId': self.PROJECT}, - 'friendlyName': None}, - {'kind': 'bigquery#dataset', - 'id': '%s:%s' % (self.PROJECT, DATASET_2), - 'datasetReference': {'datasetId': DATASET_2, - 'projectId': self.PROJECT}, - 'friendlyName': 'Two'}, - ] + "nextPageToken": TOKEN, + "datasets": [ + { + "kind": "bigquery#dataset", + "id": "%s:%s" % (self.PROJECT, DATASET_1), + "datasetReference": { + "datasetId": DATASET_1, + "projectId": self.PROJECT, + }, + "friendlyName": None, + }, + { + "kind": "bigquery#dataset", + "id": "%s:%s" % (self.PROJECT, DATASET_2), + "datasetReference": { + "datasetId": DATASET_2, + "projectId": self.PROJECT, + }, + "friendlyName": "Two", + }, + ], } creds = _make_credentials() client = self._make_one(self.PROJECT, creds) @@ -339,40 +341,40 @@ def test_list_datasets_defaults(self): datasets = list(page) token = iterator.next_page_token - self.assertEqual(len(datasets), len(DATA['datasets'])) - for found, expected in zip(datasets, DATA['datasets']): + self.assertEqual(len(datasets), len(DATA["datasets"])) + for found, expected in zip(datasets, DATA["datasets"]): self.assertIsInstance(found, DatasetListItem) - self.assertEqual(found.full_dataset_id, expected['id']) - self.assertEqual(found.friendly_name, expected['friendlyName']) + self.assertEqual(found.full_dataset_id, expected["id"]) + self.assertEqual(found.friendly_name, expected["friendlyName"]) self.assertEqual(token, TOKEN) conn.api_request.assert_called_once_with( - method='GET', path='/%s' % PATH, query_params={}) + method="GET", path="/%s" % PATH, query_params={} + ) def test_list_datasets_w_project(self): creds = _make_credentials() client = self._make_one(self.PROJECT, creds) conn = client._connection = _make_connection({}) - list(client.list_datasets(project='other-project')) + list(client.list_datasets(project="other-project")) conn.api_request.assert_called_once_with( - method='GET', - path='/projects/other-project/datasets', - query_params={}) + method="GET", path="/projects/other-project/datasets", query_params={} + ) def test_list_datasets_explicit_response_missing_datasets_key(self): - PATH = 'projects/%s/datasets' % self.PROJECT - TOKEN = 'TOKEN' - FILTER = 'FILTER' + PATH = "projects/%s/datasets" % self.PROJECT + TOKEN = "TOKEN" + FILTER = "FILTER" DATA = {} creds = _make_credentials() client = self._make_one(self.PROJECT, creds) conn = client._connection = _make_connection(DATA) iterator = client.list_datasets( - include_all=True, filter=FILTER, - max_results=3, page_token=TOKEN) + include_all=True, filter=FILTER, max_results=3, page_token=TOKEN + ) page = six.next(iterator.pages) datasets = list(page) token = iterator.next_page_token @@ -381,22 +383,22 @@ def test_list_datasets_explicit_response_missing_datasets_key(self): self.assertIsNone(token) conn.api_request.assert_called_once_with( - method='GET', - path='/%s' % PATH, + method="GET", + path="/%s" % PATH, query_params={ - 'all': True, - 'filter': FILTER, - 'maxResults': 3, - 'pageToken': TOKEN, - }) + "all": True, + "filter": FILTER, + "maxResults": 3, + "pageToken": TOKEN, + }, + ) def test_dataset_with_specified_project(self): from google.cloud.bigquery.dataset import DatasetReference creds = _make_credentials() http = object() - client = self._make_one(project=self.PROJECT, credentials=creds, - _http=http) + client = self._make_one(project=self.PROJECT, credentials=creds, _http=http) dataset = client.dataset(self.DS_ID, self.PROJECT) self.assertIsInstance(dataset, DatasetReference) self.assertEqual(dataset.dataset_id, self.DS_ID) @@ -407,8 +409,7 @@ def test_dataset_with_default_project(self): creds = _make_credentials() http = object() - client = self._make_one(project=self.PROJECT, credentials=creds, - _http=http) + client = self._make_one(project=self.PROJECT, credentials=creds, _http=http) dataset = client.dataset(self.DS_ID) self.assertIsInstance(dataset, DatasetReference) self.assertEqual(dataset.dataset_id, self.DS_ID) @@ -417,74 +418,66 @@ def test_dataset_with_default_project(self): def test_get_dataset(self): from google.cloud.exceptions import ServerError - path = 'projects/%s/datasets/%s' % (self.PROJECT, self.DS_ID) + path = "projects/%s/datasets/%s" % (self.PROJECT, self.DS_ID) creds = _make_credentials() http = object() - client = self._make_one(project=self.PROJECT, credentials=creds, - _http=http) + client = self._make_one(project=self.PROJECT, credentials=creds, _http=http) resource = { - 'id': '%s:%s' % (self.PROJECT, self.DS_ID), - 'datasetReference': { - 'projectId': self.PROJECT, - 'datasetId': self.DS_ID, - }, + "id": "%s:%s" % (self.PROJECT, self.DS_ID), + "datasetReference": {"projectId": self.PROJECT, "datasetId": self.DS_ID}, } conn = client._connection = _make_connection(resource) dataset_ref = client.dataset(self.DS_ID) dataset = client.get_dataset(dataset_ref) - conn.api_request.assert_called_once_with( - method='GET', path='/%s' % path) + conn.api_request.assert_called_once_with(method="GET", path="/%s" % path) self.assertEqual(dataset.dataset_id, self.DS_ID) # Test retry. # Not a cloud API exception (missing 'errors' field). - client._connection = _make_connection(Exception(''), resource) + client._connection = _make_connection(Exception(""), resource) with self.assertRaises(Exception): client.get_dataset(dataset_ref) # Zero-length errors field. - client._connection = _make_connection(ServerError(''), resource) + client._connection = _make_connection(ServerError(""), resource) with self.assertRaises(ServerError): client.get_dataset(dataset_ref) # Non-retryable reason. client._connection = _make_connection( - ServerError('', errors=[{'reason': 'serious'}]), - resource) + ServerError("", errors=[{"reason": "serious"}]), resource + ) with self.assertRaises(ServerError): client.get_dataset(dataset_ref) # Retryable reason, but retry is disabled. client._connection = _make_connection( - ServerError('', errors=[{'reason': 'backendError'}]), - resource) + ServerError("", errors=[{"reason": "backendError"}]), resource + ) with self.assertRaises(ServerError): client.get_dataset(dataset_ref, retry=None) # Retryable reason, default retry: success. client._connection = _make_connection( - ServerError('', errors=[{'reason': 'backendError'}]), - resource) + ServerError("", errors=[{"reason": "backendError"}]), resource + ) dataset = client.get_dataset( # Test with a string for dataset ID. - dataset_ref.dataset_id, + dataset_ref.dataset_id ) self.assertEqual(dataset.dataset_id, self.DS_ID) def test_create_dataset_minimal(self): from google.cloud.bigquery.dataset import Dataset - PATH = 'projects/%s/datasets' % self.PROJECT + PATH = "projects/%s/datasets" % self.PROJECT RESOURCE = { - 'datasetReference': { - 'projectId': self.PROJECT, - 'datasetId': self.DS_ID, - }, - 'etag': "etag", - 'id': "%s:%s" % (self.PROJECT, self.DS_ID), + "datasetReference": {"projectId": self.PROJECT, "datasetId": self.DS_ID}, + "etag": "etag", + "id": "%s:%s" % (self.PROJECT, self.DS_ID), } creds = _make_credentials() client = self._make_one(project=self.PROJECT, credentials=creds) @@ -497,57 +490,52 @@ def test_create_dataset_minimal(self): self.assertEqual(after.dataset_id, self.DS_ID) self.assertEqual(after.project, self.PROJECT) - self.assertEqual(after.etag, RESOURCE['etag']) - self.assertEqual(after.full_dataset_id, RESOURCE['id']) + self.assertEqual(after.etag, RESOURCE["etag"]) + self.assertEqual(after.full_dataset_id, RESOURCE["id"]) conn.api_request.assert_called_once_with( - method='POST', - path='/%s' % PATH, + method="POST", + path="/%s" % PATH, data={ - 'datasetReference': { - 'projectId': self.PROJECT, - 'datasetId': self.DS_ID, + "datasetReference": { + "projectId": self.PROJECT, + "datasetId": self.DS_ID, }, - 'labels': {}, - }) + "labels": {}, + }, + ) def test_create_dataset_w_attrs(self): from google.cloud.bigquery.dataset import Dataset, AccessEntry - PATH = 'projects/%s/datasets' % self.PROJECT - DESCRIPTION = 'DESC' - FRIENDLY_NAME = 'FN' - LOCATION = 'US' - USER_EMAIL = 'phred@example.com' - LABELS = {'color': 'red'} + PATH = "projects/%s/datasets" % self.PROJECT + DESCRIPTION = "DESC" + FRIENDLY_NAME = "FN" + LOCATION = "US" + USER_EMAIL = "phred@example.com" + LABELS = {"color": "red"} VIEW = { - 'projectId': 'my-proj', - 'datasetId': 'starry-skies', - 'tableId': 'northern-hemisphere', + "projectId": "my-proj", + "datasetId": "starry-skies", + "tableId": "northern-hemisphere", } RESOURCE = { - 'datasetReference': { - 'projectId': self.PROJECT, - 'datasetId': self.DS_ID, - }, - 'etag': "etag", - 'id': "%s:%s" % (self.PROJECT, self.DS_ID), - 'description': DESCRIPTION, - 'friendlyName': FRIENDLY_NAME, - 'location': LOCATION, - 'defaultTableExpirationMs': '3600', - 'labels': LABELS, - 'access': [ - {'role': 'OWNER', 'userByEmail': USER_EMAIL}, - {'view': VIEW}, - ], + "datasetReference": {"projectId": self.PROJECT, "datasetId": self.DS_ID}, + "etag": "etag", + "id": "%s:%s" % (self.PROJECT, self.DS_ID), + "description": DESCRIPTION, + "friendlyName": FRIENDLY_NAME, + "location": LOCATION, + "defaultTableExpirationMs": "3600", + "labels": LABELS, + "access": [{"role": "OWNER", "userByEmail": USER_EMAIL}, {"view": VIEW}], } creds = _make_credentials() client = self._make_one(project=self.PROJECT, credentials=creds) conn = client._connection = _make_connection(RESOURCE) entries = [ - AccessEntry('OWNER', 'userByEmail', USER_EMAIL), - AccessEntry(None, 'view', VIEW), + AccessEntry("OWNER", "userByEmail", USER_EMAIL), + AccessEntry(None, "view", VIEW), ] ds_ref = client.dataset(self.DS_ID) @@ -563,8 +551,8 @@ def test_create_dataset_w_attrs(self): self.assertEqual(after.dataset_id, self.DS_ID) self.assertEqual(after.project, self.PROJECT) - self.assertEqual(after.etag, RESOURCE['etag']) - self.assertEqual(after.full_dataset_id, RESOURCE['id']) + self.assertEqual(after.etag, RESOURCE["etag"]) + self.assertEqual(after.full_dataset_id, RESOURCE["id"]) self.assertEqual(after.description, DESCRIPTION) self.assertEqual(after.friendly_name, FRIENDLY_NAME) self.assertEqual(after.location, LOCATION) @@ -572,34 +560,34 @@ def test_create_dataset_w_attrs(self): self.assertEqual(after.labels, LABELS) conn.api_request.assert_called_once_with( - method='POST', - path='/%s' % PATH, + method="POST", + path="/%s" % PATH, data={ - 'datasetReference': { - 'projectId': self.PROJECT, - 'datasetId': self.DS_ID, + "datasetReference": { + "projectId": self.PROJECT, + "datasetId": self.DS_ID, }, - 'description': DESCRIPTION, - 'friendlyName': FRIENDLY_NAME, - 'location': LOCATION, - 'defaultTableExpirationMs': '3600', - 'access': [ - {'role': 'OWNER', 'userByEmail': USER_EMAIL}, - {'view': VIEW}, + "description": DESCRIPTION, + "friendlyName": FRIENDLY_NAME, + "location": LOCATION, + "defaultTableExpirationMs": "3600", + "access": [ + {"role": "OWNER", "userByEmail": USER_EMAIL}, + {"view": VIEW}, ], - 'labels': LABELS, - }) + "labels": LABELS, + }, + ) def test_create_dataset_w_custom_property(self): # The library should handle sending properties to the API that are not # yet part of the library from google.cloud.bigquery.dataset import Dataset - path = '/projects/%s/datasets' % self.PROJECT + path = "/projects/%s/datasets" % self.PROJECT resource = { - 'datasetReference': - {'projectId': self.PROJECT, 'datasetId': self.DS_ID}, - 'newAlphaProperty': 'unreleased property', + "datasetReference": {"projectId": self.PROJECT, "datasetId": self.DS_ID}, + "newAlphaProperty": "unreleased property", } creds = _make_credentials() client = self._make_one(project=self.PROJECT, credentials=creds) @@ -607,42 +595,41 @@ def test_create_dataset_w_custom_property(self): ds_ref = client.dataset(self.DS_ID) before = Dataset(ds_ref) - before._properties['newAlphaProperty'] = 'unreleased property' + before._properties["newAlphaProperty"] = "unreleased property" after = client.create_dataset(before) self.assertEqual(after.dataset_id, self.DS_ID) self.assertEqual(after.project, self.PROJECT) - self.assertEqual( - after._properties['newAlphaProperty'], 'unreleased property') + self.assertEqual(after._properties["newAlphaProperty"], "unreleased property") conn.api_request.assert_called_once_with( - method='POST', + method="POST", path=path, data={ - 'datasetReference': { - 'projectId': self.PROJECT, - 'datasetId': self.DS_ID, + "datasetReference": { + "projectId": self.PROJECT, + "datasetId": self.DS_ID, }, - 'newAlphaProperty': 'unreleased property', - 'labels': {}, - } + "newAlphaProperty": "unreleased property", + "labels": {}, + }, ) def test_create_dataset_w_client_location_wo_dataset_location(self): from google.cloud.bigquery.dataset import Dataset - PATH = 'projects/%s/datasets' % self.PROJECT + PATH = "projects/%s/datasets" % self.PROJECT RESOURCE = { - 'datasetReference': - {'projectId': self.PROJECT, 'datasetId': self.DS_ID}, - 'etag': "etag", - 'id': "%s:%s" % (self.PROJECT, self.DS_ID), - 'location': self.LOCATION, + "datasetReference": {"projectId": self.PROJECT, "datasetId": self.DS_ID}, + "etag": "etag", + "id": "%s:%s" % (self.PROJECT, self.DS_ID), + "location": self.LOCATION, } creds = _make_credentials() client = self._make_one( - project=self.PROJECT, credentials=creds, location=self.LOCATION) + project=self.PROJECT, credentials=creds, location=self.LOCATION + ) conn = client._connection = _make_connection(RESOURCE) ds_ref = client.dataset(self.DS_ID) @@ -652,39 +639,38 @@ def test_create_dataset_w_client_location_wo_dataset_location(self): self.assertEqual(after.dataset_id, self.DS_ID) self.assertEqual(after.project, self.PROJECT) - self.assertEqual(after.etag, RESOURCE['etag']) - self.assertEqual(after.full_dataset_id, RESOURCE['id']) + self.assertEqual(after.etag, RESOURCE["etag"]) + self.assertEqual(after.full_dataset_id, RESOURCE["id"]) self.assertEqual(after.location, self.LOCATION) conn.api_request.assert_called_once_with( - method='POST', - path='/%s' % PATH, + method="POST", + path="/%s" % PATH, data={ - 'datasetReference': { - 'projectId': self.PROJECT, - 'datasetId': self.DS_ID, + "datasetReference": { + "projectId": self.PROJECT, + "datasetId": self.DS_ID, }, - 'labels': {}, - 'location': self.LOCATION, - }) + "labels": {}, + "location": self.LOCATION, + }, + ) def test_create_dataset_w_client_location_w_dataset_location(self): from google.cloud.bigquery.dataset import Dataset - PATH = 'projects/%s/datasets' % self.PROJECT - OTHER_LOCATION = 'EU' + PATH = "projects/%s/datasets" % self.PROJECT + OTHER_LOCATION = "EU" RESOURCE = { - 'datasetReference': { - 'projectId': self.PROJECT, - 'datasetId': self.DS_ID, - }, - 'etag': "etag", - 'id': "%s:%s" % (self.PROJECT, self.DS_ID), - 'location': OTHER_LOCATION, + "datasetReference": {"projectId": self.PROJECT, "datasetId": self.DS_ID}, + "etag": "etag", + "id": "%s:%s" % (self.PROJECT, self.DS_ID), + "location": OTHER_LOCATION, } creds = _make_credentials() client = self._make_one( - project=self.PROJECT, credentials=creds, location=self.LOCATION) + project=self.PROJECT, credentials=creds, location=self.LOCATION + ) conn = client._connection = _make_connection(RESOURCE) ds_ref = client.dataset(self.DS_ID) @@ -695,139 +681,141 @@ def test_create_dataset_w_client_location_w_dataset_location(self): self.assertEqual(after.dataset_id, self.DS_ID) self.assertEqual(after.project, self.PROJECT) - self.assertEqual(after.etag, RESOURCE['etag']) - self.assertEqual(after.full_dataset_id, RESOURCE['id']) + self.assertEqual(after.etag, RESOURCE["etag"]) + self.assertEqual(after.full_dataset_id, RESOURCE["id"]) self.assertEqual(after.location, OTHER_LOCATION) conn.api_request.assert_called_once_with( - method='POST', - path='/%s' % PATH, + method="POST", + path="/%s" % PATH, data={ - 'datasetReference': { - 'projectId': self.PROJECT, - 'datasetId': self.DS_ID, + "datasetReference": { + "projectId": self.PROJECT, + "datasetId": self.DS_ID, }, - 'labels': {}, - 'location': OTHER_LOCATION, - }) + "labels": {}, + "location": OTHER_LOCATION, + }, + ) def test_create_dataset_w_reference(self): - path = '/projects/%s/datasets' % self.PROJECT + path = "/projects/%s/datasets" % self.PROJECT resource = { - 'datasetReference': - {'projectId': self.PROJECT, 'datasetId': self.DS_ID}, - 'etag': "etag", - 'id': "%s:%s" % (self.PROJECT, self.DS_ID), - 'location': self.LOCATION, + "datasetReference": {"projectId": self.PROJECT, "datasetId": self.DS_ID}, + "etag": "etag", + "id": "%s:%s" % (self.PROJECT, self.DS_ID), + "location": self.LOCATION, } creds = _make_credentials() client = self._make_one( - project=self.PROJECT, credentials=creds, location=self.LOCATION) + project=self.PROJECT, credentials=creds, location=self.LOCATION + ) conn = client._connection = _make_connection(resource) dataset = client.create_dataset(client.dataset(self.DS_ID)) self.assertEqual(dataset.dataset_id, self.DS_ID) self.assertEqual(dataset.project, self.PROJECT) - self.assertEqual(dataset.etag, resource['etag']) - self.assertEqual(dataset.full_dataset_id, resource['id']) + self.assertEqual(dataset.etag, resource["etag"]) + self.assertEqual(dataset.full_dataset_id, resource["id"]) self.assertEqual(dataset.location, self.LOCATION) conn.api_request.assert_called_once_with( - method='POST', + method="POST", path=path, data={ - 'datasetReference': { - 'projectId': self.PROJECT, - 'datasetId': self.DS_ID, + "datasetReference": { + "projectId": self.PROJECT, + "datasetId": self.DS_ID, }, - 'labels': {}, - 'location': self.LOCATION, - }) + "labels": {}, + "location": self.LOCATION, + }, + ) def test_create_dataset_w_fully_qualified_string(self): - path = '/projects/%s/datasets' % self.PROJECT + path = "/projects/%s/datasets" % self.PROJECT resource = { - 'datasetReference': - {'projectId': self.PROJECT, 'datasetId': self.DS_ID}, - 'etag': "etag", - 'id': "%s:%s" % (self.PROJECT, self.DS_ID), - 'location': self.LOCATION, + "datasetReference": {"projectId": self.PROJECT, "datasetId": self.DS_ID}, + "etag": "etag", + "id": "%s:%s" % (self.PROJECT, self.DS_ID), + "location": self.LOCATION, } creds = _make_credentials() client = self._make_one( - project=self.PROJECT, credentials=creds, location=self.LOCATION) + project=self.PROJECT, credentials=creds, location=self.LOCATION + ) conn = client._connection = _make_connection(resource) - dataset = client.create_dataset( - '{}.{}'.format(self.PROJECT, self.DS_ID)) + dataset = client.create_dataset("{}.{}".format(self.PROJECT, self.DS_ID)) self.assertEqual(dataset.dataset_id, self.DS_ID) self.assertEqual(dataset.project, self.PROJECT) - self.assertEqual(dataset.etag, resource['etag']) - self.assertEqual(dataset.full_dataset_id, resource['id']) + self.assertEqual(dataset.etag, resource["etag"]) + self.assertEqual(dataset.full_dataset_id, resource["id"]) self.assertEqual(dataset.location, self.LOCATION) conn.api_request.assert_called_once_with( - method='POST', + method="POST", path=path, data={ - 'datasetReference': { - 'projectId': self.PROJECT, - 'datasetId': self.DS_ID, + "datasetReference": { + "projectId": self.PROJECT, + "datasetId": self.DS_ID, }, - 'labels': {}, - 'location': self.LOCATION, - }) + "labels": {}, + "location": self.LOCATION, + }, + ) def test_create_dataset_w_string(self): - path = '/projects/%s/datasets' % self.PROJECT + path = "/projects/%s/datasets" % self.PROJECT resource = { - 'datasetReference': - {'projectId': self.PROJECT, 'datasetId': self.DS_ID}, - 'etag': "etag", - 'id': "%s:%s" % (self.PROJECT, self.DS_ID), - 'location': self.LOCATION, + "datasetReference": {"projectId": self.PROJECT, "datasetId": self.DS_ID}, + "etag": "etag", + "id": "%s:%s" % (self.PROJECT, self.DS_ID), + "location": self.LOCATION, } creds = _make_credentials() client = self._make_one( - project=self.PROJECT, credentials=creds, location=self.LOCATION) + project=self.PROJECT, credentials=creds, location=self.LOCATION + ) conn = client._connection = _make_connection(resource) dataset = client.create_dataset(self.DS_ID) self.assertEqual(dataset.dataset_id, self.DS_ID) self.assertEqual(dataset.project, self.PROJECT) - self.assertEqual(dataset.etag, resource['etag']) - self.assertEqual(dataset.full_dataset_id, resource['id']) + self.assertEqual(dataset.etag, resource["etag"]) + self.assertEqual(dataset.full_dataset_id, resource["id"]) self.assertEqual(dataset.location, self.LOCATION) conn.api_request.assert_called_once_with( - method='POST', + method="POST", path=path, data={ - 'datasetReference': { - 'projectId': self.PROJECT, - 'datasetId': self.DS_ID, + "datasetReference": { + "projectId": self.PROJECT, + "datasetId": self.DS_ID, }, - 'labels': {}, - 'location': self.LOCATION, - }) + "labels": {}, + "location": self.LOCATION, + }, + ) def test_create_table_w_day_partition(self): from google.cloud.bigquery.table import Table from google.cloud.bigquery.table import TimePartitioning - path = 'projects/%s/datasets/%s/tables' % ( - self.PROJECT, self.DS_ID) + path = "projects/%s/datasets/%s/tables" % (self.PROJECT, self.DS_ID) creds = _make_credentials() client = self._make_one(project=self.PROJECT, credentials=creds) resource = { - 'id': '%s:%s:%s' % (self.PROJECT, self.DS_ID, self.TABLE_ID), - 'tableReference': { - 'projectId': self.PROJECT, - 'datasetId': self.DS_ID, - 'tableId': self.TABLE_ID + "id": "%s:%s:%s" % (self.PROJECT, self.DS_ID, self.TABLE_ID), + "tableReference": { + "projectId": self.PROJECT, + "datasetId": self.DS_ID, + "tableId": self.TABLE_ID, }, } conn = client._connection = _make_connection(resource) @@ -837,18 +825,19 @@ def test_create_table_w_day_partition(self): got = client.create_table(table) conn.api_request.assert_called_once_with( - method='POST', - path='/%s' % path, + method="POST", + path="/%s" % path, data={ - 'tableReference': { - 'projectId': self.PROJECT, - 'datasetId': self.DS_ID, - 'tableId': self.TABLE_ID + "tableReference": { + "projectId": self.PROJECT, + "datasetId": self.DS_ID, + "tableId": self.TABLE_ID, }, - 'timePartitioning': {'type': 'DAY'}, - 'labels': {}, - }) - self.assertEqual(table.time_partitioning.type_, 'DAY') + "timePartitioning": {"type": "DAY"}, + "labels": {}, + }, + ) + self.assertEqual(table.time_partitioning.type_, "DAY") self.assertEqual(got.table_id, self.TABLE_ID) def test_create_table_w_custom_property(self): @@ -856,92 +845,91 @@ def test_create_table_w_custom_property(self): # yet part of the library from google.cloud.bigquery.table import Table - path = 'projects/%s/datasets/%s/tables' % ( - self.PROJECT, self.DS_ID) + path = "projects/%s/datasets/%s/tables" % (self.PROJECT, self.DS_ID) creds = _make_credentials() client = self._make_one(project=self.PROJECT, credentials=creds) resource = { - 'id': '%s:%s:%s' % (self.PROJECT, self.DS_ID, self.TABLE_ID), - 'tableReference': { - 'projectId': self.PROJECT, - 'datasetId': self.DS_ID, - 'tableId': self.TABLE_ID + "id": "%s:%s:%s" % (self.PROJECT, self.DS_ID, self.TABLE_ID), + "tableReference": { + "projectId": self.PROJECT, + "datasetId": self.DS_ID, + "tableId": self.TABLE_ID, }, - 'newAlphaProperty': 'unreleased property', + "newAlphaProperty": "unreleased property", } conn = client._connection = _make_connection(resource) table = Table(self.TABLE_REF) - table._properties['newAlphaProperty'] = 'unreleased property' + table._properties["newAlphaProperty"] = "unreleased property" got = client.create_table(table) conn.api_request.assert_called_once_with( - method='POST', - path='/%s' % path, + method="POST", + path="/%s" % path, data={ - 'tableReference': { - 'projectId': self.PROJECT, - 'datasetId': self.DS_ID, - 'tableId': self.TABLE_ID + "tableReference": { + "projectId": self.PROJECT, + "datasetId": self.DS_ID, + "tableId": self.TABLE_ID, }, - 'newAlphaProperty': 'unreleased property', - 'labels': {}, - }) - self.assertEqual( - got._properties['newAlphaProperty'], 'unreleased property') + "newAlphaProperty": "unreleased property", + "labels": {}, + }, + ) + self.assertEqual(got._properties["newAlphaProperty"], "unreleased property") self.assertEqual(got.table_id, self.TABLE_ID) def test_create_table_w_encryption_configuration(self): from google.cloud.bigquery.table import EncryptionConfiguration from google.cloud.bigquery.table import Table - path = 'projects/%s/datasets/%s/tables' % ( - self.PROJECT, self.DS_ID) + path = "projects/%s/datasets/%s/tables" % (self.PROJECT, self.DS_ID) creds = _make_credentials() client = self._make_one(project=self.PROJECT, credentials=creds) resource = { - 'id': '%s:%s:%s' % (self.PROJECT, self.DS_ID, self.TABLE_ID), - 'tableReference': { - 'projectId': self.PROJECT, - 'datasetId': self.DS_ID, - 'tableId': self.TABLE_ID + "id": "%s:%s:%s" % (self.PROJECT, self.DS_ID, self.TABLE_ID), + "tableReference": { + "projectId": self.PROJECT, + "datasetId": self.DS_ID, + "tableId": self.TABLE_ID, }, } conn = client._connection = _make_connection(resource) table = Table(self.TABLE_REF) table.encryption_configuration = EncryptionConfiguration( - kms_key_name=self.KMS_KEY_NAME) + kms_key_name=self.KMS_KEY_NAME + ) got = client.create_table(table) conn.api_request.assert_called_once_with( - method='POST', - path='/%s' % path, + method="POST", + path="/%s" % path, data={ - 'tableReference': { - 'projectId': self.PROJECT, - 'datasetId': self.DS_ID, - 'tableId': self.TABLE_ID + "tableReference": { + "projectId": self.PROJECT, + "datasetId": self.DS_ID, + "tableId": self.TABLE_ID, }, - 'labels': {}, - 'encryptionConfiguration': {'kmsKeyName': self.KMS_KEY_NAME}, - }) + "labels": {}, + "encryptionConfiguration": {"kmsKeyName": self.KMS_KEY_NAME}, + }, + ) self.assertEqual(got.table_id, self.TABLE_ID) def test_create_table_w_day_partition_and_expire(self): from google.cloud.bigquery.table import Table from google.cloud.bigquery.table import TimePartitioning - path = 'projects/%s/datasets/%s/tables' % ( - self.PROJECT, self.DS_ID) + path = "projects/%s/datasets/%s/tables" % (self.PROJECT, self.DS_ID) creds = _make_credentials() client = self._make_one(project=self.PROJECT, credentials=creds) resource = { - 'id': '%s:%s:%s' % (self.PROJECT, self.DS_ID, self.TABLE_ID), - 'tableReference': { - 'projectId': self.PROJECT, - 'datasetId': self.DS_ID, - 'tableId': self.TABLE_ID + "id": "%s:%s:%s" % (self.PROJECT, self.DS_ID, self.TABLE_ID), + "tableReference": { + "projectId": self.PROJECT, + "datasetId": self.DS_ID, + "tableId": self.TABLE_ID, }, } conn = client._connection = _make_connection(resource) @@ -951,57 +939,57 @@ def test_create_table_w_day_partition_and_expire(self): got = client.create_table(table) conn.api_request.assert_called_once_with( - method='POST', - path='/%s' % path, + method="POST", + path="/%s" % path, data={ - 'tableReference': { - 'projectId': self.PROJECT, - 'datasetId': self.DS_ID, - 'tableId': self.TABLE_ID + "tableReference": { + "projectId": self.PROJECT, + "datasetId": self.DS_ID, + "tableId": self.TABLE_ID, }, - 'timePartitioning': {'type': 'DAY', 'expirationMs': '100'}, - 'labels': {}, - }) - self.assertEqual(table.time_partitioning.type_, 'DAY') + "timePartitioning": {"type": "DAY", "expirationMs": "100"}, + "labels": {}, + }, + ) + self.assertEqual(table.time_partitioning.type_, "DAY") self.assertEqual(table.time_partitioning.expiration_ms, 100) self.assertEqual(got.table_id, self.TABLE_ID) def test_create_table_w_schema_and_query(self): from google.cloud.bigquery.table import Table, SchemaField - path = 'projects/%s/datasets/%s/tables' % ( - self.PROJECT, self.DS_ID) - query = 'SELECT * from %s:%s' % (self.DS_ID, self.TABLE_ID) + path = "projects/%s/datasets/%s/tables" % (self.PROJECT, self.DS_ID) + query = "SELECT * from %s:%s" % (self.DS_ID, self.TABLE_ID) creds = _make_credentials() client = self._make_one(project=self.PROJECT, credentials=creds) resource = { - 'id': '%s:%s:%s' % (self.PROJECT, self.DS_ID, self.TABLE_ID), - 'tableReference': { - 'projectId': self.PROJECT, - 'datasetId': self.DS_ID, - 'tableId': self.TABLE_ID + "id": "%s:%s:%s" % (self.PROJECT, self.DS_ID, self.TABLE_ID), + "tableReference": { + "projectId": self.PROJECT, + "datasetId": self.DS_ID, + "tableId": self.TABLE_ID, }, - 'schema': { - 'fields': [ + "schema": { + "fields": [ { - 'name': 'full_name', - 'type': 'STRING', - 'mode': 'REQUIRED', - 'description': None, + "name": "full_name", + "type": "STRING", + "mode": "REQUIRED", + "description": None, }, { - 'name': 'age', - 'type': 'INTEGER', - 'mode': 'REQUIRED', - 'description': None, + "name": "age", + "type": "INTEGER", + "mode": "REQUIRED", + "description": None, }, - ], + ] }, - 'view': {'query': query}, + "view": {"query": query}, } schema = [ - SchemaField('full_name', 'STRING', mode='REQUIRED'), - SchemaField('age', 'INTEGER', mode='REQUIRED') + SchemaField("full_name", "STRING", mode="REQUIRED"), + SchemaField("age", "INTEGER", mode="REQUIRED"), ] conn = client._connection = _make_connection(resource) table = Table(self.TABLE_REF, schema=schema) @@ -1010,33 +998,34 @@ def test_create_table_w_schema_and_query(self): got = client.create_table(table) conn.api_request.assert_called_once_with( - method='POST', - path='/%s' % path, + method="POST", + path="/%s" % path, data={ - 'tableReference': { - 'projectId': self.PROJECT, - 'datasetId': self.DS_ID, - 'tableId': self.TABLE_ID, + "tableReference": { + "projectId": self.PROJECT, + "datasetId": self.DS_ID, + "tableId": self.TABLE_ID, }, - 'schema': { - 'fields': [ + "schema": { + "fields": [ { - 'name': 'full_name', - 'type': 'STRING', - 'mode': 'REQUIRED', - 'description': None, + "name": "full_name", + "type": "STRING", + "mode": "REQUIRED", + "description": None, }, { - 'name': 'age', - 'type': 'INTEGER', - 'mode': 'REQUIRED', - 'description': None, + "name": "age", + "type": "INTEGER", + "mode": "REQUIRED", + "description": None, }, - ], + ] }, - 'view': {'query': query, 'useLegacySql': False}, - 'labels': {}, - }) + "view": {"query": query, "useLegacySql": False}, + "labels": {}, + }, + ) self.assertEqual(got.table_id, self.TABLE_ID) self.assertEqual(got.project, self.PROJECT) self.assertEqual(got.dataset_id, self.DS_ID) @@ -1048,63 +1037,63 @@ def test_create_table_w_external(self): from google.cloud.bigquery.job import SourceFormat from google.cloud.bigquery.table import Table - path = 'projects/%s/datasets/%s/tables' % ( - self.PROJECT, self.DS_ID) + path = "projects/%s/datasets/%s/tables" % (self.PROJECT, self.DS_ID) creds = _make_credentials() client = self._make_one(project=self.PROJECT, credentials=creds) resource = { - 'id': '%s:%s:%s' % (self.PROJECT, self.DS_ID, self.TABLE_ID), - 'tableReference': { - 'projectId': self.PROJECT, - 'datasetId': self.DS_ID, - 'tableId': self.TABLE_ID + "id": "%s:%s:%s" % (self.PROJECT, self.DS_ID, self.TABLE_ID), + "tableReference": { + "projectId": self.PROJECT, + "datasetId": self.DS_ID, + "tableId": self.TABLE_ID, }, - 'externalDataConfiguration': { - 'sourceFormat': SourceFormat.CSV, - 'autodetect': True, + "externalDataConfiguration": { + "sourceFormat": SourceFormat.CSV, + "autodetect": True, }, } conn = client._connection = _make_connection(resource) table = Table(self.TABLE_REF) - ec = ExternalConfig('CSV') + ec = ExternalConfig("CSV") ec.autodetect = True table.external_data_configuration = ec got = client.create_table(table) conn.api_request.assert_called_once_with( - method='POST', - path='/%s' % path, + method="POST", + path="/%s" % path, data={ - 'tableReference': { - 'projectId': self.PROJECT, - 'datasetId': self.DS_ID, - 'tableId': self.TABLE_ID, + "tableReference": { + "projectId": self.PROJECT, + "datasetId": self.DS_ID, + "tableId": self.TABLE_ID, }, - 'externalDataConfiguration': { - 'sourceFormat': SourceFormat.CSV, - 'autodetect': True, + "externalDataConfiguration": { + "sourceFormat": SourceFormat.CSV, + "autodetect": True, }, - 'labels': {}, - }) + "labels": {}, + }, + ) self.assertEqual(got.table_id, self.TABLE_ID) self.assertEqual(got.project, self.PROJECT) self.assertEqual(got.dataset_id, self.DS_ID) - self.assertEqual(got.external_data_configuration.source_format, - SourceFormat.CSV) + self.assertEqual( + got.external_data_configuration.source_format, SourceFormat.CSV + ) self.assertEqual(got.external_data_configuration.autodetect, True) def test_create_table_w_reference(self): - path = 'projects/%s/datasets/%s/tables' % ( - self.PROJECT, self.DS_ID) + path = "projects/%s/datasets/%s/tables" % (self.PROJECT, self.DS_ID) creds = _make_credentials() client = self._make_one(project=self.PROJECT, credentials=creds) resource = { - 'id': '%s:%s:%s' % (self.PROJECT, self.DS_ID, self.TABLE_ID), - 'tableReference': { - 'projectId': self.PROJECT, - 'datasetId': self.DS_ID, - 'tableId': self.TABLE_ID + "id": "%s:%s:%s" % (self.PROJECT, self.DS_ID, self.TABLE_ID), + "tableReference": { + "projectId": self.PROJECT, + "datasetId": self.DS_ID, + "tableId": self.TABLE_ID, }, } conn = client._connection = _make_connection(resource) @@ -1112,99 +1101,102 @@ def test_create_table_w_reference(self): got = client.create_table(self.TABLE_REF) conn.api_request.assert_called_once_with( - method='POST', - path='/%s' % path, + method="POST", + path="/%s" % path, data={ - 'tableReference': { - 'projectId': self.PROJECT, - 'datasetId': self.DS_ID, - 'tableId': self.TABLE_ID + "tableReference": { + "projectId": self.PROJECT, + "datasetId": self.DS_ID, + "tableId": self.TABLE_ID, }, - 'labels': {}, - }) + "labels": {}, + }, + ) self.assertEqual(got.table_id, self.TABLE_ID) def test_create_table_w_fully_qualified_string(self): - path = 'projects/%s/datasets/%s/tables' % ( - self.PROJECT, self.DS_ID) + path = "projects/%s/datasets/%s/tables" % (self.PROJECT, self.DS_ID) creds = _make_credentials() client = self._make_one(project=self.PROJECT, credentials=creds) resource = { - 'id': '%s:%s:%s' % (self.PROJECT, self.DS_ID, self.TABLE_ID), - 'tableReference': { - 'projectId': self.PROJECT, - 'datasetId': self.DS_ID, - 'tableId': self.TABLE_ID + "id": "%s:%s:%s" % (self.PROJECT, self.DS_ID, self.TABLE_ID), + "tableReference": { + "projectId": self.PROJECT, + "datasetId": self.DS_ID, + "tableId": self.TABLE_ID, }, } conn = client._connection = _make_connection(resource) got = client.create_table( - '{}.{}.{}'.format(self.PROJECT, self.DS_ID, self.TABLE_ID)) + "{}.{}.{}".format(self.PROJECT, self.DS_ID, self.TABLE_ID) + ) conn.api_request.assert_called_once_with( - method='POST', - path='/%s' % path, + method="POST", + path="/%s" % path, data={ - 'tableReference': { - 'projectId': self.PROJECT, - 'datasetId': self.DS_ID, - 'tableId': self.TABLE_ID + "tableReference": { + "projectId": self.PROJECT, + "datasetId": self.DS_ID, + "tableId": self.TABLE_ID, }, - 'labels': {}, - }) + "labels": {}, + }, + ) self.assertEqual(got.table_id, self.TABLE_ID) def test_create_table_w_string(self): - path = 'projects/%s/datasets/%s/tables' % ( - self.PROJECT, self.DS_ID) + path = "projects/%s/datasets/%s/tables" % (self.PROJECT, self.DS_ID) creds = _make_credentials() client = self._make_one(project=self.PROJECT, credentials=creds) resource = { - 'id': '%s:%s:%s' % (self.PROJECT, self.DS_ID, self.TABLE_ID), - 'tableReference': { - 'projectId': self.PROJECT, - 'datasetId': self.DS_ID, - 'tableId': self.TABLE_ID + "id": "%s:%s:%s" % (self.PROJECT, self.DS_ID, self.TABLE_ID), + "tableReference": { + "projectId": self.PROJECT, + "datasetId": self.DS_ID, + "tableId": self.TABLE_ID, }, } conn = client._connection = _make_connection(resource) - got = client.create_table('{}.{}'.format(self.DS_ID, self.TABLE_ID)) + got = client.create_table("{}.{}".format(self.DS_ID, self.TABLE_ID)) conn.api_request.assert_called_once_with( - method='POST', - path='/%s' % path, + method="POST", + path="/%s" % path, data={ - 'tableReference': { - 'projectId': self.PROJECT, - 'datasetId': self.DS_ID, - 'tableId': self.TABLE_ID + "tableReference": { + "projectId": self.PROJECT, + "datasetId": self.DS_ID, + "tableId": self.TABLE_ID, }, - 'labels': {}, - }) + "labels": {}, + }, + ) self.assertEqual(got.table_id, self.TABLE_ID) def test_get_table(self): - path = 'projects/%s/datasets/%s/tables/%s' % ( - self.PROJECT, self.DS_ID, self.TABLE_ID) + path = "projects/%s/datasets/%s/tables/%s" % ( + self.PROJECT, + self.DS_ID, + self.TABLE_ID, + ) creds = _make_credentials() http = object() - client = self._make_one(project=self.PROJECT, credentials=creds, - _http=http) + client = self._make_one(project=self.PROJECT, credentials=creds, _http=http) resource = { - 'id': '%s:%s:%s' % (self.PROJECT, self.DS_ID, self.TABLE_ID), - 'tableReference': { - 'projectId': self.PROJECT, - 'datasetId': self.DS_ID, - 'tableId': self.TABLE_ID, + "id": "%s:%s:%s" % (self.PROJECT, self.DS_ID, self.TABLE_ID), + "tableReference": { + "projectId": self.PROJECT, + "datasetId": self.DS_ID, + "tableId": self.TABLE_ID, }, } conn = client._connection = _make_connection(resource) table = client.get_table(self.TABLE_REF) - conn.api_request.assert_called_once_with( - method='GET', path='/%s' % path) + conn.api_request.assert_called_once_with(method="GET", path="/%s" % path) self.assertEqual(table.table_id, self.TABLE_ID) def test_update_dataset_w_invalid_field(self): @@ -1218,25 +1210,22 @@ def test_update_dataset_w_invalid_field(self): def test_update_dataset(self): from google.cloud.bigquery.dataset import Dataset, AccessEntry - PATH = 'projects/%s/datasets/%s' % (self.PROJECT, self.DS_ID) - DESCRIPTION = 'DESCRIPTION' - FRIENDLY_NAME = 'TITLE' - LOCATION = 'loc' - LABELS = {'priority': 'high'} - ACCESS = [ - {'role': 'OWNER', 'userByEmail': 'phred@example.com'}, - ] + PATH = "projects/%s/datasets/%s" % (self.PROJECT, self.DS_ID) + DESCRIPTION = "DESCRIPTION" + FRIENDLY_NAME = "TITLE" + LOCATION = "loc" + LABELS = {"priority": "high"} + ACCESS = [{"role": "OWNER", "userByEmail": "phred@example.com"}] EXP = 17 RESOURCE = { - 'datasetReference': - {'projectId': self.PROJECT, 'datasetId': self.DS_ID}, - 'etag': "etag", - 'description': DESCRIPTION, - 'friendlyName': FRIENDLY_NAME, - 'location': LOCATION, - 'defaultTableExpirationMs': EXP, - 'labels': LABELS, - 'access': ACCESS, + "datasetReference": {"projectId": self.PROJECT, "datasetId": self.DS_ID}, + "etag": "etag", + "description": DESCRIPTION, + "friendlyName": FRIENDLY_NAME, + "location": LOCATION, + "defaultTableExpirationMs": EXP, + "labels": LABELS, + "access": ACCESS, } creds = _make_credentials() client = self._make_one(project=self.PROJECT, credentials=creds) @@ -1247,22 +1236,22 @@ def test_update_dataset(self): ds.location = LOCATION ds.default_table_expiration_ms = EXP ds.labels = LABELS - ds.access_entries = [ - AccessEntry('OWNER', 'userByEmail', 'phred@example.com')] + ds.access_entries = [AccessEntry("OWNER", "userByEmail", "phred@example.com")] ds2 = client.update_dataset( - ds, ['description', 'friendly_name', 'location', 'labels', - 'access_entries']) + ds, ["description", "friendly_name", "location", "labels", "access_entries"] + ) conn.api_request.assert_called_once_with( - method='PATCH', + method="PATCH", data={ - 'description': DESCRIPTION, - 'friendlyName': FRIENDLY_NAME, - 'location': LOCATION, - 'labels': LABELS, - 'access': ACCESS, + "description": DESCRIPTION, + "friendlyName": FRIENDLY_NAME, + "location": LOCATION, + "labels": LABELS, + "access": ACCESS, }, - path='/' + PATH, - headers=None) + path="/" + PATH, + headers=None, + ) self.assertEqual(ds2.description, ds.description) self.assertEqual(ds2.friendly_name, ds.friendly_name) self.assertEqual(ds2.location, ds.location) @@ -1270,79 +1259,80 @@ def test_update_dataset(self): self.assertEqual(ds2.access_entries, ds.access_entries) # ETag becomes If-Match header. - ds._properties['etag'] = 'etag' + ds._properties["etag"] = "etag" client.update_dataset(ds, []) req = conn.api_request.call_args - self.assertEqual(req[1]['headers']['If-Match'], 'etag') + self.assertEqual(req[1]["headers"]["If-Match"], "etag") def test_update_dataset_w_custom_property(self): # The library should handle sending properties to the API that are not # yet part of the library from google.cloud.bigquery.dataset import Dataset - path = '/projects/%s/datasets/%s' % (self.PROJECT, self.DS_ID) + path = "/projects/%s/datasets/%s" % (self.PROJECT, self.DS_ID) resource = { - 'datasetReference': - {'projectId': self.PROJECT, 'datasetId': self.DS_ID}, - 'newAlphaProperty': 'unreleased property', + "datasetReference": {"projectId": self.PROJECT, "datasetId": self.DS_ID}, + "newAlphaProperty": "unreleased property", } creds = _make_credentials() client = self._make_one(project=self.PROJECT, credentials=creds) conn = client._connection = _make_connection(resource) dataset = Dataset(client.dataset(self.DS_ID)) - dataset._properties['newAlphaProperty'] = 'unreleased property' + dataset._properties["newAlphaProperty"] = "unreleased property" - dataset = client.update_dataset(dataset, ['newAlphaProperty']) + dataset = client.update_dataset(dataset, ["newAlphaProperty"]) conn.api_request.assert_called_once_with( - method='PATCH', - data={'newAlphaProperty': 'unreleased property'}, + method="PATCH", + data={"newAlphaProperty": "unreleased property"}, path=path, headers=None, ) self.assertEqual(dataset.dataset_id, self.DS_ID) self.assertEqual(dataset.project, self.PROJECT) - self.assertEqual( - dataset._properties['newAlphaProperty'], 'unreleased property') + self.assertEqual(dataset._properties["newAlphaProperty"], "unreleased property") def test_update_table(self): from google.cloud.bigquery.table import Table, SchemaField - path = 'projects/%s/datasets/%s/tables/%s' % ( - self.PROJECT, self.DS_ID, self.TABLE_ID) - description = 'description' - title = 'title' + path = "projects/%s/datasets/%s/tables/%s" % ( + self.PROJECT, + self.DS_ID, + self.TABLE_ID, + ) + description = "description" + title = "title" resource = { - 'id': '%s:%s:%s' % (self.PROJECT, self.DS_ID, self.TABLE_ID), - 'tableReference': { - 'projectId': self.PROJECT, - 'datasetId': self.DS_ID, - 'tableId': self.TABLE_ID + "id": "%s:%s:%s" % (self.PROJECT, self.DS_ID, self.TABLE_ID), + "tableReference": { + "projectId": self.PROJECT, + "datasetId": self.DS_ID, + "tableId": self.TABLE_ID, }, - 'schema': { - 'fields': [ + "schema": { + "fields": [ { - 'name': 'full_name', - 'type': 'STRING', - 'mode': 'REQUIRED', - 'description': None + "name": "full_name", + "type": "STRING", + "mode": "REQUIRED", + "description": None, }, { - 'name': 'age', - 'type': 'INTEGER', - 'mode': 'REQUIRED', - 'description': None + "name": "age", + "type": "INTEGER", + "mode": "REQUIRED", + "description": None, }, - ], + ] }, - 'etag': 'etag', - 'description': description, - 'friendlyName': title, - 'labels': {'x': 'y'}, + "etag": "etag", + "description": description, + "friendlyName": title, + "labels": {"x": "y"}, } schema = [ - SchemaField('full_name', 'STRING', mode='REQUIRED'), - SchemaField('age', 'INTEGER', mode='REQUIRED') + SchemaField("full_name", "STRING", mode="REQUIRED"), + SchemaField("age", "INTEGER", mode="REQUIRED"), ] creds = _make_credentials() client = self._make_one(project=self.PROJECT, credentials=creds) @@ -1350,92 +1340,98 @@ def test_update_table(self): table = Table(self.TABLE_REF, schema=schema) table.description = description table.friendly_name = title - table.labels = {'x': 'y'} + table.labels = {"x": "y"} updated_table = client.update_table( - table, ['schema', 'description', 'friendly_name', 'labels']) + table, ["schema", "description", "friendly_name", "labels"] + ) sent = { - 'schema': { - 'fields': [ + "schema": { + "fields": [ { - 'name': 'full_name', - 'type': 'STRING', - 'mode': 'REQUIRED', - 'description': None + "name": "full_name", + "type": "STRING", + "mode": "REQUIRED", + "description": None, }, { - 'name': 'age', - 'type': 'INTEGER', - 'mode': 'REQUIRED', - 'description': None + "name": "age", + "type": "INTEGER", + "mode": "REQUIRED", + "description": None, }, - ], + ] }, - 'description': description, - 'friendlyName': title, - 'labels': {'x': 'y'}, + "description": description, + "friendlyName": title, + "labels": {"x": "y"}, } conn.api_request.assert_called_once_with( - method='PATCH', - data=sent, - path='/' + path, - headers=None) + method="PATCH", data=sent, path="/" + path, headers=None + ) self.assertEqual(updated_table.description, table.description) self.assertEqual(updated_table.friendly_name, table.friendly_name) self.assertEqual(updated_table.schema, table.schema) self.assertEqual(updated_table.labels, table.labels) # ETag becomes If-Match header. - table._properties['etag'] = 'etag' + table._properties["etag"] = "etag" client.update_table(table, []) req = conn.api_request.call_args - self.assertEqual(req[1]['headers']['If-Match'], 'etag') + self.assertEqual(req[1]["headers"]["If-Match"], "etag") def test_update_table_w_custom_property(self): from google.cloud.bigquery.table import Table - path = 'projects/%s/datasets/%s/tables/%s' % ( - self.PROJECT, self.DS_ID, self.TABLE_ID) + path = "projects/%s/datasets/%s/tables/%s" % ( + self.PROJECT, + self.DS_ID, + self.TABLE_ID, + ) resource = { - 'id': '%s:%s:%s' % (self.PROJECT, self.DS_ID, self.TABLE_ID), - 'tableReference': { - 'projectId': self.PROJECT, - 'datasetId': self.DS_ID, - 'tableId': self.TABLE_ID + "id": "%s:%s:%s" % (self.PROJECT, self.DS_ID, self.TABLE_ID), + "tableReference": { + "projectId": self.PROJECT, + "datasetId": self.DS_ID, + "tableId": self.TABLE_ID, }, - 'newAlphaProperty': 'unreleased property', + "newAlphaProperty": "unreleased property", } creds = _make_credentials() client = self._make_one(project=self.PROJECT, credentials=creds) conn = client._connection = _make_connection(resource) table = Table(self.TABLE_REF) - table._properties['newAlphaProperty'] = 'unreleased property' + table._properties["newAlphaProperty"] = "unreleased property" - updated_table = client.update_table(table, ['newAlphaProperty']) + updated_table = client.update_table(table, ["newAlphaProperty"]) conn.api_request.assert_called_once_with( - method='PATCH', - path='/%s' % path, - data={'newAlphaProperty': 'unreleased property'}, - headers=None) + method="PATCH", + path="/%s" % path, + data={"newAlphaProperty": "unreleased property"}, + headers=None, + ) self.assertEqual( - updated_table._properties['newAlphaProperty'], - 'unreleased property') + updated_table._properties["newAlphaProperty"], "unreleased property" + ) def test_update_table_only_use_legacy_sql(self): from google.cloud.bigquery.table import Table - path = 'projects/%s/datasets/%s/tables/%s' % ( - self.PROJECT, self.DS_ID, self.TABLE_ID) + path = "projects/%s/datasets/%s/tables/%s" % ( + self.PROJECT, + self.DS_ID, + self.TABLE_ID, + ) resource = { - 'id': '%s:%s:%s' % (self.PROJECT, self.DS_ID, self.TABLE_ID), - 'tableReference': { - 'projectId': self.PROJECT, - 'datasetId': self.DS_ID, - 'tableId': self.TABLE_ID + "id": "%s:%s:%s" % (self.PROJECT, self.DS_ID, self.TABLE_ID), + "tableReference": { + "projectId": self.PROJECT, + "datasetId": self.DS_ID, + "tableId": self.TABLE_ID, }, - 'view': {'useLegacySql': True} + "view": {"useLegacySql": True}, } creds = _make_credentials() client = self._make_one(project=self.PROJECT, credentials=creds) @@ -1443,15 +1439,15 @@ def test_update_table_only_use_legacy_sql(self): table = Table(self.TABLE_REF) table.view_use_legacy_sql = True - updated_table = client.update_table(table, ['view_use_legacy_sql']) + updated_table = client.update_table(table, ["view_use_legacy_sql"]) conn.api_request.assert_called_once_with( - method='PATCH', - path='/%s' % path, - data={'view': {'useLegacySql': True}}, - headers=None) - self.assertEqual( - updated_table.view_use_legacy_sql, table.view_use_legacy_sql) + method="PATCH", + path="/%s" % path, + data={"view": {"useLegacySql": True}}, + headers=None, + ) + self.assertEqual(updated_table.view_use_legacy_sql, table.view_use_legacy_sql) def test_update_table_w_query(self): import datetime @@ -1459,45 +1455,45 @@ def test_update_table_w_query(self): from google.cloud._helpers import _millis from google.cloud.bigquery.table import Table, SchemaField - path = 'projects/%s/datasets/%s/tables/%s' % ( - self.PROJECT, self.DS_ID, self.TABLE_ID) - query = 'select fullname, age from person_ages' - location = 'EU' + path = "projects/%s/datasets/%s/tables/%s" % ( + self.PROJECT, + self.DS_ID, + self.TABLE_ID, + ) + query = "select fullname, age from person_ages" + location = "EU" exp_time = datetime.datetime(2015, 8, 1, 23, 59, 59, tzinfo=UTC) schema_resource = { - 'fields': [ + "fields": [ { - 'name': 'full_name', - 'type': 'STRING', - 'mode': 'REQUIRED', - 'description': None + "name": "full_name", + "type": "STRING", + "mode": "REQUIRED", + "description": None, }, { - 'name': 'age', - 'type': 'INTEGER', - 'mode': 'REQUIRED', - 'description': None + "name": "age", + "type": "INTEGER", + "mode": "REQUIRED", + "description": None, }, - ], + ] } schema = [ - SchemaField('full_name', 'STRING', mode='REQUIRED'), - SchemaField('age', 'INTEGER', mode='REQUIRED') + SchemaField("full_name", "STRING", mode="REQUIRED"), + SchemaField("age", "INTEGER", mode="REQUIRED"), ] resource = { - 'id': '%s:%s:%s' % (self.PROJECT, self.DS_ID, self.TABLE_ID), - 'tableReference': { - 'projectId': self.PROJECT, - 'datasetId': self.DS_ID, - 'tableId': self.TABLE_ID - }, - 'schema': schema_resource, - 'view': { - 'query': query, - 'useLegacySql': True, + "id": "%s:%s:%s" % (self.PROJECT, self.DS_ID, self.TABLE_ID), + "tableReference": { + "projectId": self.PROJECT, + "datasetId": self.DS_ID, + "tableId": self.TABLE_ID, }, - 'location': location, - 'expirationTime': _millis(exp_time) + "schema": schema_resource, + "view": {"query": query, "useLegacySql": True}, + "location": location, + "expirationTime": _millis(exp_time), } creds = _make_credentials() client = self._make_one(project=self.PROJECT, credentials=creds) @@ -1506,28 +1502,23 @@ def test_update_table_w_query(self): table.expires = exp_time table.view_query = query table.view_use_legacy_sql = True - updated_properties = [ - 'schema', 'view_query', 'expires', 'view_use_legacy_sql'] + updated_properties = ["schema", "view_query", "expires", "view_use_legacy_sql"] updated_table = client.update_table(table, updated_properties) self.assertEqual(updated_table.schema, table.schema) self.assertEqual(updated_table.view_query, table.view_query) self.assertEqual(updated_table.expires, table.expires) - self.assertEqual( - updated_table.view_use_legacy_sql, table.view_use_legacy_sql) + self.assertEqual(updated_table.view_use_legacy_sql, table.view_use_legacy_sql) self.assertEqual(updated_table.location, location) conn.api_request.assert_called_once_with( - method='PATCH', - path='/%s' % path, + method="PATCH", + path="/%s" % path, data={ - 'view': { - 'query': query, - 'useLegacySql': True, - }, - 'expirationTime': str(_millis(exp_time)), - 'schema': schema_resource, + "view": {"query": query, "useLegacySql": True}, + "expirationTime": str(_millis(exp_time)), + "schema": schema_resource, }, headers=None, ) @@ -1535,24 +1526,31 @@ def test_update_table_w_query(self): def test_update_table_w_schema_None(self): # Simulate deleting schema: not sure if back-end will actually # allow this operation, but the spec says it is optional. - path = 'projects/%s/datasets/%s/tables/%s' % ( - self.PROJECT, self.DS_ID, self.TABLE_ID) + path = "projects/%s/datasets/%s/tables/%s" % ( + self.PROJECT, + self.DS_ID, + self.TABLE_ID, + ) resource1 = { - 'id': '%s:%s:%s' % (self.PROJECT, self.DS_ID, self.TABLE_ID), - 'tableReference': { - 'projectId': self.PROJECT, - 'datasetId': self.DS_ID, - 'tableId': self.TABLE_ID}, - 'schema': {'fields': [ - {'name': 'full_name', 'type': 'STRING', 'mode': 'REQUIRED'}, - {'name': 'age', 'type': 'INTEGER', 'mode': 'REQUIRED'}]} + "id": "%s:%s:%s" % (self.PROJECT, self.DS_ID, self.TABLE_ID), + "tableReference": { + "projectId": self.PROJECT, + "datasetId": self.DS_ID, + "tableId": self.TABLE_ID, + }, + "schema": { + "fields": [ + {"name": "full_name", "type": "STRING", "mode": "REQUIRED"}, + {"name": "age", "type": "INTEGER", "mode": "REQUIRED"}, + ] + }, } resource2 = { - 'id': '%s:%s:%s' % (self.PROJECT, self.DS_ID, self.TABLE_ID), - 'tableReference': { - 'projectId': self.PROJECT, - 'datasetId': self.DS_ID, - 'tableId': self.TABLE_ID, + "id": "%s:%s:%s" % (self.PROJECT, self.DS_ID, self.TABLE_ID), + "tableReference": { + "projectId": self.PROJECT, + "datasetId": self.DS_ID, + "tableId": self.TABLE_ID, }, } creds = _make_credentials() @@ -1560,7 +1558,7 @@ def test_update_table_w_schema_None(self): conn = client._connection = _make_connection(resource1, resource2) table = client.get_table( # Test with string for table ID - '{}.{}.{}'.format( + "{}.{}.{}".format( self.TABLE_REF.project, self.TABLE_REF.dataset_id, self.TABLE_REF.table_id, @@ -1568,41 +1566,44 @@ def test_update_table_w_schema_None(self): ) table.schema = None - updated_table = client.update_table(table, ['schema']) + updated_table = client.update_table(table, ["schema"]) self.assertEqual(len(conn.api_request.call_args_list), 2) req = conn.api_request.call_args_list[1] - self.assertEqual(req[1]['method'], 'PATCH') - sent = {'schema': None} - self.assertEqual(req[1]['data'], sent) - self.assertEqual(req[1]['path'], '/%s' % path) + self.assertEqual(req[1]["method"], "PATCH") + sent = {"schema": None} + self.assertEqual(req[1]["data"], sent) + self.assertEqual(req[1]["path"], "/%s" % path) self.assertEqual(len(updated_table.schema), 0) def test_update_table_delete_property(self): from google.cloud.bigquery.table import Table - description = 'description' - title = 'title' - path = 'projects/%s/datasets/%s/tables/%s' % ( - self.PROJECT, self.DS_ID, self.TABLE_ID) + description = "description" + title = "title" + path = "projects/%s/datasets/%s/tables/%s" % ( + self.PROJECT, + self.DS_ID, + self.TABLE_ID, + ) resource1 = { - 'id': '%s:%s:%s' % (self.PROJECT, self.DS_ID, self.TABLE_ID), - 'tableReference': { - 'projectId': self.PROJECT, - 'datasetId': self.DS_ID, - 'tableId': self.TABLE_ID + "id": "%s:%s:%s" % (self.PROJECT, self.DS_ID, self.TABLE_ID), + "tableReference": { + "projectId": self.PROJECT, + "datasetId": self.DS_ID, + "tableId": self.TABLE_ID, }, - 'description': description, - 'friendlyName': title, + "description": description, + "friendlyName": title, } resource2 = { - 'id': '%s:%s:%s' % (self.PROJECT, self.DS_ID, self.TABLE_ID), - 'tableReference': { - 'projectId': self.PROJECT, - 'datasetId': self.DS_ID, - 'tableId': self.TABLE_ID + "id": "%s:%s:%s" % (self.PROJECT, self.DS_ID, self.TABLE_ID), + "tableReference": { + "projectId": self.PROJECT, + "datasetId": self.DS_ID, + "tableId": self.TABLE_ID, }, - 'description': None, + "description": None, } creds = _make_credentials() client = self._make_one(project=self.PROJECT, credentials=creds) @@ -1610,22 +1611,21 @@ def test_update_table_delete_property(self): table = Table(self.TABLE_REF) table.description = description table.friendly_name = title - table2 = client.update_table(table, ['description', 'friendly_name']) + table2 = client.update_table(table, ["description", "friendly_name"]) self.assertEqual(table2.description, table.description) table2.description = None - table3 = client.update_table(table2, ['description']) + table3 = client.update_table(table2, ["description"]) self.assertEqual(len(conn.api_request.call_args_list), 2) req = conn.api_request.call_args_list[1] - self.assertEqual(req[1]['method'], 'PATCH') - self.assertEqual(req[1]['path'], '/%s' % path) - sent = {'description': None} - self.assertEqual(req[1]['data'], sent) + self.assertEqual(req[1]["method"], "PATCH") + self.assertEqual(req[1]["path"], "/%s" % path) + sent = {"description": None} + self.assertEqual(req[1]["data"], sent) self.assertIsNone(table3.description) def test_list_tables_empty(self): - path = '/projects/{}/datasets/{}/tables'.format( - self.PROJECT, self.DS_ID) + path = "/projects/{}/datasets/{}/tables".format(self.PROJECT, self.DS_ID) creds = _make_credentials() client = self._make_one(project=self.PROJECT, credentials=creds) conn = client._connection = _make_connection({}) @@ -1640,31 +1640,40 @@ def test_list_tables_empty(self): self.assertEqual(tables, []) self.assertIsNone(token) conn.api_request.assert_called_once_with( - method='GET', path=path, query_params={}) + method="GET", path=path, query_params={} + ) def test_list_tables_defaults(self): from google.cloud.bigquery.table import TableListItem - TABLE_1 = 'table_one' - TABLE_2 = 'table_two' - PATH = 'projects/%s/datasets/%s/tables' % (self.PROJECT, self.DS_ID) - TOKEN = 'TOKEN' + TABLE_1 = "table_one" + TABLE_2 = "table_two" + PATH = "projects/%s/datasets/%s/tables" % (self.PROJECT, self.DS_ID) + TOKEN = "TOKEN" DATA = { - 'nextPageToken': TOKEN, - 'tables': [ - {'kind': 'bigquery#table', - 'id': '%s:%s.%s' % (self.PROJECT, self.DS_ID, TABLE_1), - 'tableReference': {'tableId': TABLE_1, - 'datasetId': self.DS_ID, - 'projectId': self.PROJECT}, - 'type': 'TABLE'}, - {'kind': 'bigquery#table', - 'id': '%s:%s.%s' % (self.PROJECT, self.DS_ID, TABLE_2), - 'tableReference': {'tableId': TABLE_2, - 'datasetId': self.DS_ID, - 'projectId': self.PROJECT}, - 'type': 'TABLE'}, - ] + "nextPageToken": TOKEN, + "tables": [ + { + "kind": "bigquery#table", + "id": "%s:%s.%s" % (self.PROJECT, self.DS_ID, TABLE_1), + "tableReference": { + "tableId": TABLE_1, + "datasetId": self.DS_ID, + "projectId": self.PROJECT, + }, + "type": "TABLE", + }, + { + "kind": "bigquery#table", + "id": "%s:%s.%s" % (self.PROJECT, self.DS_ID, TABLE_2), + "tableReference": { + "tableId": TABLE_2, + "datasetId": self.DS_ID, + "projectId": self.PROJECT, + }, + "type": "TABLE", + }, + ], } creds = _make_credentials() @@ -1678,37 +1687,46 @@ def test_list_tables_defaults(self): tables = list(page) token = iterator.next_page_token - self.assertEqual(len(tables), len(DATA['tables'])) - for found, expected in zip(tables, DATA['tables']): + self.assertEqual(len(tables), len(DATA["tables"])) + for found, expected in zip(tables, DATA["tables"]): self.assertIsInstance(found, TableListItem) - self.assertEqual(found.full_table_id, expected['id']) - self.assertEqual(found.table_type, expected['type']) + self.assertEqual(found.full_table_id, expected["id"]) + self.assertEqual(found.table_type, expected["type"]) self.assertEqual(token, TOKEN) conn.api_request.assert_called_once_with( - method='GET', path='/%s' % PATH, query_params={}) + method="GET", path="/%s" % PATH, query_params={} + ) def test_list_tables_explicit(self): from google.cloud.bigquery.table import TableListItem - TABLE_1 = 'table_one' - TABLE_2 = 'table_two' - PATH = 'projects/%s/datasets/%s/tables' % (self.PROJECT, self.DS_ID) - TOKEN = 'TOKEN' + TABLE_1 = "table_one" + TABLE_2 = "table_two" + PATH = "projects/%s/datasets/%s/tables" % (self.PROJECT, self.DS_ID) + TOKEN = "TOKEN" DATA = { - 'tables': [ - {'kind': 'bigquery#dataset', - 'id': '%s:%s.%s' % (self.PROJECT, self.DS_ID, TABLE_1), - 'tableReference': {'tableId': TABLE_1, - 'datasetId': self.DS_ID, - 'projectId': self.PROJECT}, - 'type': 'TABLE'}, - {'kind': 'bigquery#dataset', - 'id': '%s:%s.%s' % (self.PROJECT, self.DS_ID, TABLE_2), - 'tableReference': {'tableId': TABLE_2, - 'datasetId': self.DS_ID, - 'projectId': self.PROJECT}, - 'type': 'TABLE'}, + "tables": [ + { + "kind": "bigquery#dataset", + "id": "%s:%s.%s" % (self.PROJECT, self.DS_ID, TABLE_1), + "tableReference": { + "tableId": TABLE_1, + "datasetId": self.DS_ID, + "projectId": self.PROJECT, + }, + "type": "TABLE", + }, + { + "kind": "bigquery#dataset", + "id": "%s:%s.%s" % (self.PROJECT, self.DS_ID, TABLE_2), + "tableReference": { + "tableId": TABLE_2, + "datasetId": self.DS_ID, + "projectId": self.PROJECT, + }, + "type": "TABLE", + }, ] } @@ -1720,23 +1738,26 @@ def test_list_tables_explicit(self): iterator = client.list_tables( # Test with string for dataset ID. self.DS_ID, - max_results=3, page_token=TOKEN) + max_results=3, + page_token=TOKEN, + ) self.assertEqual(iterator.dataset, dataset) page = six.next(iterator.pages) tables = list(page) token = iterator.next_page_token - self.assertEqual(len(tables), len(DATA['tables'])) - for found, expected in zip(tables, DATA['tables']): + self.assertEqual(len(tables), len(DATA["tables"])) + for found, expected in zip(tables, DATA["tables"]): self.assertIsInstance(found, TableListItem) - self.assertEqual(found.full_table_id, expected['id']) - self.assertEqual(found.table_type, expected['type']) + self.assertEqual(found.full_table_id, expected["id"]) + self.assertEqual(found.table_type, expected["type"]) self.assertIsNone(token) conn.api_request.assert_called_once_with( - method='GET', - path='/%s' % PATH, - query_params={'maxResults': 3, 'pageToken': TOKEN}) + method="GET", + path="/%s" % PATH, + query_params={"maxResults": 3, "pageToken": TOKEN}, + ) def test_list_tables_wrong_type(self): creds = _make_credentials() @@ -1749,26 +1770,21 @@ def test_delete_dataset(self): from google.cloud.bigquery.dataset import DatasetReference ds_ref = DatasetReference(self.PROJECT, self.DS_ID) - datasets = ( - ds_ref, - Dataset(ds_ref), - '{}.{}'.format(self.PROJECT, self.DS_ID), - ) - PATH = 'projects/%s/datasets/%s' % (self.PROJECT, self.DS_ID) + datasets = (ds_ref, Dataset(ds_ref), "{}.{}".format(self.PROJECT, self.DS_ID)) + PATH = "projects/%s/datasets/%s" % (self.PROJECT, self.DS_ID) creds = _make_credentials() client = self._make_one(project=self.PROJECT, credentials=creds) conn = client._connection = _make_connection(*([{}] * len(datasets))) for arg in datasets: client.delete_dataset(arg) conn.api_request.assert_called_with( - method='DELETE', - path='/%s' % PATH, - query_params={}) + method="DELETE", path="/%s" % PATH, query_params={} + ) def test_delete_dataset_delete_contents(self): from google.cloud.bigquery.dataset import Dataset - PATH = 'projects/%s/datasets/%s' % (self.PROJECT, self.DS_ID) + PATH = "projects/%s/datasets/%s" % (self.PROJECT, self.DS_ID) creds = _make_credentials() client = self._make_one(project=self.PROJECT, credentials=creds) conn = client._connection = _make_connection({}, {}) @@ -1776,9 +1792,10 @@ def test_delete_dataset_delete_contents(self): for arg in (ds_ref, Dataset(ds_ref)): client.delete_dataset(arg, delete_contents=True) conn.api_request.assert_called_with( - method='DELETE', - path='/%s' % PATH, - query_params={'deleteContents': 'true'}) + method="DELETE", + path="/%s" % PATH, + query_params={"deleteContents": "true"}, + ) def test_delete_dataset_wrong_type(self): creds = _make_credentials() @@ -1792,24 +1809,25 @@ def test_delete_table(self): tables = ( self.TABLE_REF, Table(self.TABLE_REF), - '{}.{}.{}'.format( + "{}.{}.{}".format( self.TABLE_REF.project, self.TABLE_REF.dataset_id, self.TABLE_REF.table_id, ), ) - path = 'projects/%s/datasets/%s/tables/%s' % ( - self.PROJECT, self.DS_ID, self.TABLE_ID) + path = "projects/%s/datasets/%s/tables/%s" % ( + self.PROJECT, + self.DS_ID, + self.TABLE_ID, + ) creds = _make_credentials() http = object() - client = self._make_one(project=self.PROJECT, credentials=creds, - _http=http) + client = self._make_one(project=self.PROJECT, credentials=creds, _http=http) conn = client._connection = _make_connection(*([{}] * len(tables))) for arg in tables: client.delete_table(arg) - conn.api_request.assert_called_with( - method='DELETE', path='/%s' % path) + conn.api_request.assert_called_with(method="DELETE", path="/%s" % path) def test_delete_table_w_wrong_type(self): creds = _make_credentials() @@ -1819,6 +1837,7 @@ def test_delete_table_w_wrong_type(self): def test_job_from_resource_unknown_type(self): from google.cloud.bigquery.job import UnknownJob + creds = _make_credentials() client = self._make_one(self.PROJECT, creds) got = client.job_from_resource({}) # Can parse redacted job. @@ -1828,29 +1847,26 @@ def test_job_from_resource_unknown_type(self): def test_get_job_miss_w_explict_project(self): from google.cloud.exceptions import NotFound - OTHER_PROJECT = 'OTHER_PROJECT' - JOB_ID = 'NONESUCH' + OTHER_PROJECT = "OTHER_PROJECT" + JOB_ID = "NONESUCH" creds = _make_credentials() client = self._make_one(self.PROJECT, creds) conn = client._connection = _make_connection() with self.assertRaises(NotFound): - client.get_job( - JOB_ID, project=OTHER_PROJECT, location=self.LOCATION) + client.get_job(JOB_ID, project=OTHER_PROJECT, location=self.LOCATION) conn.api_request.assert_called_once_with( - method='GET', - path='/projects/OTHER_PROJECT/jobs/NONESUCH', - query_params={ - 'projection': 'full', - 'location': self.LOCATION, - }) + method="GET", + path="/projects/OTHER_PROJECT/jobs/NONESUCH", + query_params={"projection": "full", "location": self.LOCATION}, + ) def test_get_job_miss_w_client_location(self): from google.cloud.exceptions import NotFound - OTHER_PROJECT = 'OTHER_PROJECT' - JOB_ID = 'NONESUCH' + OTHER_PROJECT = "OTHER_PROJECT" + JOB_ID = "NONESUCH" creds = _make_credentials() client = self._make_one(self.PROJECT, creds, location=self.LOCATION) conn = client._connection = _make_connection() @@ -1859,38 +1875,33 @@ def test_get_job_miss_w_client_location(self): client.get_job(JOB_ID, project=OTHER_PROJECT) conn.api_request.assert_called_once_with( - method='GET', - path='/projects/OTHER_PROJECT/jobs/NONESUCH', - query_params={ - 'projection': 'full', - 'location': self.LOCATION, - }) + method="GET", + path="/projects/OTHER_PROJECT/jobs/NONESUCH", + query_params={"projection": "full", "location": self.LOCATION}, + ) def test_get_job_hit(self): from google.cloud.bigquery.job import CreateDisposition from google.cloud.bigquery.job import QueryJob from google.cloud.bigquery.job import WriteDisposition - JOB_ID = 'query_job' - QUERY_DESTINATION_TABLE = 'query_destination_table' - QUERY = 'SELECT * from test_dataset:test_table' + JOB_ID = "query_job" + QUERY_DESTINATION_TABLE = "query_destination_table" + QUERY = "SELECT * from test_dataset:test_table" ASYNC_QUERY_DATA = { - 'id': '{}:{}'.format(self.PROJECT, JOB_ID), - 'jobReference': { - 'projectId': self.PROJECT, - 'jobId': 'query_job', - }, - 'state': 'DONE', - 'configuration': { - 'query': { - 'query': QUERY, - 'destinationTable': { - 'projectId': self.PROJECT, - 'datasetId': self.DS_ID, - 'tableId': QUERY_DESTINATION_TABLE, + "id": "{}:{}".format(self.PROJECT, JOB_ID), + "jobReference": {"projectId": self.PROJECT, "jobId": "query_job"}, + "state": "DONE", + "configuration": { + "query": { + "query": QUERY, + "destinationTable": { + "projectId": self.PROJECT, + "datasetId": self.DS_ID, + "tableId": QUERY_DESTINATION_TABLE, }, - 'createDisposition': CreateDisposition.CREATE_IF_NEEDED, - 'writeDisposition': WriteDisposition.WRITE_TRUNCATE, + "createDisposition": CreateDisposition.CREATE_IF_NEEDED, + "writeDisposition": WriteDisposition.WRITE_TRUNCATE, } }, } @@ -1902,43 +1913,38 @@ def test_get_job_hit(self): self.assertIsInstance(job, QueryJob) self.assertEqual(job.job_id, JOB_ID) - self.assertEqual(job.create_disposition, - CreateDisposition.CREATE_IF_NEEDED) - self.assertEqual(job.write_disposition, - WriteDisposition.WRITE_TRUNCATE) + self.assertEqual(job.create_disposition, CreateDisposition.CREATE_IF_NEEDED) + self.assertEqual(job.write_disposition, WriteDisposition.WRITE_TRUNCATE) conn.api_request.assert_called_once_with( - method='GET', - path='/projects/PROJECT/jobs/query_job', - query_params={'projection': 'full'}, + method="GET", + path="/projects/PROJECT/jobs/query_job", + query_params={"projection": "full"}, ) def test_cancel_job_miss_w_explict_project(self): from google.cloud.exceptions import NotFound - OTHER_PROJECT = 'OTHER_PROJECT' - JOB_ID = 'NONESUCH' + OTHER_PROJECT = "OTHER_PROJECT" + JOB_ID = "NONESUCH" creds = _make_credentials() client = self._make_one(self.PROJECT, creds) conn = client._connection = _make_connection() with self.assertRaises(NotFound): - client.cancel_job( - JOB_ID, project=OTHER_PROJECT, location=self.LOCATION) + client.cancel_job(JOB_ID, project=OTHER_PROJECT, location=self.LOCATION) conn.api_request.assert_called_once_with( - method='POST', - path='/projects/OTHER_PROJECT/jobs/NONESUCH/cancel', - query_params={ - 'projection': 'full', - 'location': self.LOCATION, - }) + method="POST", + path="/projects/OTHER_PROJECT/jobs/NONESUCH/cancel", + query_params={"projection": "full", "location": self.LOCATION}, + ) def test_cancel_job_miss_w_client_location(self): from google.cloud.exceptions import NotFound - OTHER_PROJECT = 'OTHER_PROJECT' - JOB_ID = 'NONESUCH' + OTHER_PROJECT = "OTHER_PROJECT" + JOB_ID = "NONESUCH" creds = _make_credentials() client = self._make_one(self.PROJECT, creds, location=self.LOCATION) conn = client._connection = _make_connection() @@ -1947,34 +1953,23 @@ def test_cancel_job_miss_w_client_location(self): client.cancel_job(JOB_ID, project=OTHER_PROJECT) conn.api_request.assert_called_once_with( - method='POST', - path='/projects/OTHER_PROJECT/jobs/NONESUCH/cancel', - query_params={ - 'projection': 'full', - 'location': self.LOCATION, - }) + method="POST", + path="/projects/OTHER_PROJECT/jobs/NONESUCH/cancel", + query_params={"projection": "full", "location": self.LOCATION}, + ) def test_cancel_job_hit(self): from google.cloud.bigquery.job import QueryJob - JOB_ID = 'query_job' - QUERY = 'SELECT * from test_dataset:test_table' + JOB_ID = "query_job" + QUERY = "SELECT * from test_dataset:test_table" QUERY_JOB_RESOURCE = { - 'id': '{}:{}'.format(self.PROJECT, JOB_ID), - 'jobReference': { - 'projectId': self.PROJECT, - 'jobId': 'query_job', - }, - 'state': 'RUNNING', - 'configuration': { - 'query': { - 'query': QUERY, - } - }, - } - RESOURCE = { - 'job': QUERY_JOB_RESOURCE, + "id": "{}:{}".format(self.PROJECT, JOB_ID), + "jobReference": {"projectId": self.PROJECT, "jobId": "query_job"}, + "state": "RUNNING", + "configuration": {"query": {"query": QUERY}}, } + RESOURCE = {"job": QUERY_JOB_RESOURCE} creds = _make_credentials() client = self._make_one(self.PROJECT, creds) conn = client._connection = _make_connection(RESOURCE) @@ -1986,9 +1981,10 @@ def test_cancel_job_hit(self): self.assertEqual(job.query, QUERY) conn.api_request.assert_called_once_with( - method='POST', - path='/projects/PROJECT/jobs/query_job/cancel', - query_params={'projection': 'full'}) + method="POST", + path="/projects/PROJECT/jobs/query_job/cancel", + query_params={"projection": "full"}, + ) def test_list_jobs_defaults(self): from google.cloud.bigquery.job import CopyJob @@ -1998,106 +1994,91 @@ def test_list_jobs_defaults(self): from google.cloud.bigquery.job import QueryJob from google.cloud.bigquery.job import WriteDisposition - SOURCE_TABLE = 'source_table' - DESTINATION_TABLE = 'destination_table' - QUERY_DESTINATION_TABLE = 'query_destination_table' - SOURCE_URI = 'gs://test_bucket/src_object*' - DESTINATION_URI = 'gs://test_bucket/dst_object*' + SOURCE_TABLE = "source_table" + DESTINATION_TABLE = "destination_table" + QUERY_DESTINATION_TABLE = "query_destination_table" + SOURCE_URI = "gs://test_bucket/src_object*" + DESTINATION_URI = "gs://test_bucket/dst_object*" JOB_TYPES = { - 'load_job': LoadJob, - 'copy_job': CopyJob, - 'extract_job': ExtractJob, - 'query_job': QueryJob, + "load_job": LoadJob, + "copy_job": CopyJob, + "extract_job": ExtractJob, + "query_job": QueryJob, } - PATH = 'projects/%s/jobs' % self.PROJECT - TOKEN = 'TOKEN' - QUERY = 'SELECT * from test_dataset:test_table' + PATH = "projects/%s/jobs" % self.PROJECT + TOKEN = "TOKEN" + QUERY = "SELECT * from test_dataset:test_table" ASYNC_QUERY_DATA = { - 'id': '%s:%s' % (self.PROJECT, 'query_job'), - 'jobReference': { - 'projectId': self.PROJECT, - 'jobId': 'query_job', - }, - 'state': 'DONE', - 'configuration': { - 'query': { - 'query': QUERY, - 'destinationTable': { - 'projectId': self.PROJECT, - 'datasetId': self.DS_ID, - 'tableId': QUERY_DESTINATION_TABLE, + "id": "%s:%s" % (self.PROJECT, "query_job"), + "jobReference": {"projectId": self.PROJECT, "jobId": "query_job"}, + "state": "DONE", + "configuration": { + "query": { + "query": QUERY, + "destinationTable": { + "projectId": self.PROJECT, + "datasetId": self.DS_ID, + "tableId": QUERY_DESTINATION_TABLE, }, - 'createDisposition': CreateDisposition.CREATE_IF_NEEDED, - 'writeDisposition': WriteDisposition.WRITE_TRUNCATE, + "createDisposition": CreateDisposition.CREATE_IF_NEEDED, + "writeDisposition": WriteDisposition.WRITE_TRUNCATE, } }, } EXTRACT_DATA = { - 'id': '%s:%s' % (self.PROJECT, 'extract_job'), - 'jobReference': { - 'projectId': self.PROJECT, - 'jobId': 'extract_job', - }, - 'state': 'DONE', - 'configuration': { - 'extract': { - 'sourceTable': { - 'projectId': self.PROJECT, - 'datasetId': self.DS_ID, - 'tableId': SOURCE_TABLE, + "id": "%s:%s" % (self.PROJECT, "extract_job"), + "jobReference": {"projectId": self.PROJECT, "jobId": "extract_job"}, + "state": "DONE", + "configuration": { + "extract": { + "sourceTable": { + "projectId": self.PROJECT, + "datasetId": self.DS_ID, + "tableId": SOURCE_TABLE, }, - 'destinationUris': [DESTINATION_URI], + "destinationUris": [DESTINATION_URI], } }, } COPY_DATA = { - 'id': '%s:%s' % (self.PROJECT, 'copy_job'), - 'jobReference': { - 'projectId': self.PROJECT, - 'jobId': 'copy_job', - }, - 'state': 'DONE', - 'configuration': { - 'copy': { - 'sourceTables': [{ - 'projectId': self.PROJECT, - 'datasetId': self.DS_ID, - 'tableId': SOURCE_TABLE, - }], - 'destinationTable': { - 'projectId': self.PROJECT, - 'datasetId': self.DS_ID, - 'tableId': DESTINATION_TABLE, + "id": "%s:%s" % (self.PROJECT, "copy_job"), + "jobReference": {"projectId": self.PROJECT, "jobId": "copy_job"}, + "state": "DONE", + "configuration": { + "copy": { + "sourceTables": [ + { + "projectId": self.PROJECT, + "datasetId": self.DS_ID, + "tableId": SOURCE_TABLE, + } + ], + "destinationTable": { + "projectId": self.PROJECT, + "datasetId": self.DS_ID, + "tableId": DESTINATION_TABLE, }, } }, } LOAD_DATA = { - 'id': '%s:%s' % (self.PROJECT, 'load_job'), - 'jobReference': { - 'projectId': self.PROJECT, - 'jobId': 'load_job', - }, - 'state': 'DONE', - 'configuration': { - 'load': { - 'destinationTable': { - 'projectId': self.PROJECT, - 'datasetId': self.DS_ID, - 'tableId': SOURCE_TABLE, + "id": "%s:%s" % (self.PROJECT, "load_job"), + "jobReference": {"projectId": self.PROJECT, "jobId": "load_job"}, + "state": "DONE", + "configuration": { + "load": { + "destinationTable": { + "projectId": self.PROJECT, + "datasetId": self.DS_ID, + "tableId": SOURCE_TABLE, }, - 'sourceUris': [SOURCE_URI], + "sourceUris": [SOURCE_URI], } }, } DATA = { - 'nextPageToken': TOKEN, - 'jobs': [ - ASYNC_QUERY_DATA, - EXTRACT_DATA, - COPY_DATA, - LOAD_DATA, - ] + "nextPageToken": TOKEN, + "jobs": [ASYNC_QUERY_DATA, EXTRACT_DATA, COPY_DATA, LOAD_DATA], } creds = _make_credentials() client = self._make_one(self.PROJECT, creds) @@ -2108,50 +2089,39 @@ def test_list_jobs_defaults(self): jobs = list(page) token = iterator.next_page_token - self.assertEqual(len(jobs), len(DATA['jobs'])) - for found, expected in zip(jobs, DATA['jobs']): - name = expected['jobReference']['jobId'] + self.assertEqual(len(jobs), len(DATA["jobs"])) + for found, expected in zip(jobs, DATA["jobs"]): + name = expected["jobReference"]["jobId"] self.assertIsInstance(found, JOB_TYPES[name]) self.assertEqual(found.job_id, name) self.assertEqual(token, TOKEN) conn.api_request.assert_called_once_with( - method='GET', - path='/%s' % PATH, - query_params={'projection': 'full'}) + method="GET", path="/%s" % PATH, query_params={"projection": "full"} + ) def test_list_jobs_load_job_wo_sourceUris(self): from google.cloud.bigquery.job import LoadJob - SOURCE_TABLE = 'source_table' - JOB_TYPES = { - 'load_job': LoadJob, - } - PATH = 'projects/%s/jobs' % self.PROJECT - TOKEN = 'TOKEN' + SOURCE_TABLE = "source_table" + JOB_TYPES = {"load_job": LoadJob} + PATH = "projects/%s/jobs" % self.PROJECT + TOKEN = "TOKEN" LOAD_DATA = { - 'id': '%s:%s' % (self.PROJECT, 'load_job'), - 'jobReference': { - 'projectId': self.PROJECT, - 'jobId': 'load_job', - }, - 'state': 'DONE', - 'configuration': { - 'load': { - 'destinationTable': { - 'projectId': self.PROJECT, - 'datasetId': self.DS_ID, - 'tableId': SOURCE_TABLE, - }, + "id": "%s:%s" % (self.PROJECT, "load_job"), + "jobReference": {"projectId": self.PROJECT, "jobId": "load_job"}, + "state": "DONE", + "configuration": { + "load": { + "destinationTable": { + "projectId": self.PROJECT, + "datasetId": self.DS_ID, + "tableId": SOURCE_TABLE, + } } }, } - DATA = { - 'nextPageToken': TOKEN, - 'jobs': [ - LOAD_DATA, - ] - } + DATA = {"nextPageToken": TOKEN, "jobs": [LOAD_DATA]} creds = _make_credentials() client = self._make_one(self.PROJECT, creds) conn = client._connection = _make_connection(DATA) @@ -2161,28 +2131,28 @@ def test_list_jobs_load_job_wo_sourceUris(self): jobs = list(page) token = iterator.next_page_token - self.assertEqual(len(jobs), len(DATA['jobs'])) - for found, expected in zip(jobs, DATA['jobs']): - name = expected['jobReference']['jobId'] + self.assertEqual(len(jobs), len(DATA["jobs"])) + for found, expected in zip(jobs, DATA["jobs"]): + name = expected["jobReference"]["jobId"] self.assertIsInstance(found, JOB_TYPES[name]) self.assertEqual(found.job_id, name) self.assertEqual(token, TOKEN) conn.api_request.assert_called_once_with( - method='GET', - path='/%s' % PATH, - query_params={'projection': 'full'}) + method="GET", path="/%s" % PATH, query_params={"projection": "full"} + ) def test_list_jobs_explicit_missing(self): - PATH = 'projects/%s/jobs' % self.PROJECT + PATH = "projects/%s/jobs" % self.PROJECT DATA = {} - TOKEN = 'TOKEN' + TOKEN = "TOKEN" creds = _make_credentials() client = self._make_one(self.PROJECT, creds) conn = client._connection = _make_connection(DATA) - iterator = client.list_jobs(max_results=1000, page_token=TOKEN, - all_users=True, state_filter='done') + iterator = client.list_jobs( + max_results=1000, page_token=TOKEN, all_users=True, state_filter="done" + ) page = six.next(iterator.pages) jobs = list(page) token = iterator.next_page_token @@ -2191,29 +2161,29 @@ def test_list_jobs_explicit_missing(self): self.assertIsNone(token) conn.api_request.assert_called_once_with( - method='GET', - path='/%s' % PATH, + method="GET", + path="/%s" % PATH, query_params={ - 'projection': 'full', - 'maxResults': 1000, - 'pageToken': TOKEN, - 'allUsers': True, - 'stateFilter': 'done' - }) + "projection": "full", + "maxResults": 1000, + "pageToken": TOKEN, + "allUsers": True, + "stateFilter": "done", + }, + ) def test_list_jobs_w_project(self): creds = _make_credentials() client = self._make_one(self.PROJECT, creds) conn = client._connection = _make_connection({}) - list(client.list_jobs(project='other-project')) + list(client.list_jobs(project="other-project")) conn.api_request.assert_called_once_with( - method='GET', - path='/projects/other-project/jobs', - query_params={ - 'projection': 'full', - }) + method="GET", + path="/projects/other-project/jobs", + query_params={"projection": "full"}, + ) def test_list_jobs_w_time_filter(self): creds = _make_credentials() @@ -2226,44 +2196,40 @@ def test_list_jobs_w_time_filter(self): end_time = datetime.datetime(2038, 1, 19, 3, 14, 7, 1000) end_time_millis = (((2 ** 31) - 1) * 1000) + 1 - list(client.list_jobs( - min_creation_time=start_time, max_creation_time=end_time)) + list(client.list_jobs(min_creation_time=start_time, max_creation_time=end_time)) conn.api_request.assert_called_once_with( - method='GET', - path='/projects/%s/jobs' % self.PROJECT, + method="GET", + path="/projects/%s/jobs" % self.PROJECT, query_params={ - 'projection': 'full', - 'minCreationTime': '1', - 'maxCreationTime': str(end_time_millis), - }) + "projection": "full", + "minCreationTime": "1", + "maxCreationTime": str(end_time_millis), + }, + ) def test_load_table_from_uri(self): from google.cloud.bigquery.job import LoadJob - JOB = 'job_name' - DESTINATION = 'destination_table' - SOURCE_URI = 'http://example.com/source.csv' + JOB = "job_name" + DESTINATION = "destination_table" + SOURCE_URI = "http://example.com/source.csv" RESOURCE = { - 'jobReference': { - 'projectId': self.PROJECT, - 'jobId': JOB, - }, - 'configuration': { - 'load': { - 'sourceUris': [SOURCE_URI], - 'destinationTable': { - 'projectId': self.PROJECT, - 'datasetId': self.DS_ID, - 'tableId': DESTINATION, + "jobReference": {"projectId": self.PROJECT, "jobId": JOB}, + "configuration": { + "load": { + "sourceUris": [SOURCE_URI], + "destinationTable": { + "projectId": self.PROJECT, + "datasetId": self.DS_ID, + "tableId": DESTINATION, }, - }, + } }, } creds = _make_credentials() http = object() - client = self._make_one(project=self.PROJECT, credentials=creds, - _http=http) + client = self._make_one(project=self.PROJECT, credentials=creds, _http=http) conn = client._connection = _make_connection(RESOURCE) destination = client.dataset(self.DS_ID).table(DESTINATION) @@ -2271,9 +2237,8 @@ def test_load_table_from_uri(self): # Check that load_table_from_uri actually starts the job. conn.api_request.assert_called_once_with( - method='POST', - path='/projects/%s/jobs' % self.PROJECT, - data=RESOURCE) + method="POST", path="/projects/%s/jobs" % self.PROJECT, data=RESOURCE + ) self.assertIsInstance(job, LoadJob) self.assertIs(job._client, client) @@ -2291,97 +2256,100 @@ def test_load_table_from_uri(self): self.assertIs(job.destination, destination) def test_load_table_from_uri_w_explicit_project(self): - job_id = 'this-is-a-job-id' - destination_id = 'destination_table' - source_uri = 'gs://example/source.csv' + job_id = "this-is-a-job-id" + destination_id = "destination_table" + source_uri = "gs://example/source.csv" resource = { - 'jobReference': { - 'projectId': 'other-project', - 'location': self.LOCATION, - 'jobId': job_id, + "jobReference": { + "projectId": "other-project", + "location": self.LOCATION, + "jobId": job_id, }, - 'configuration': { - 'load': { - 'sourceUris': [source_uri], - 'destinationTable': { - 'projectId': self.PROJECT, - 'datasetId': self.DS_ID, - 'tableId': destination_id, + "configuration": { + "load": { + "sourceUris": [source_uri], + "destinationTable": { + "projectId": self.PROJECT, + "datasetId": self.DS_ID, + "tableId": destination_id, }, - }, + } }, } creds = _make_credentials() http = object() - client = self._make_one( - project=self.PROJECT, credentials=creds, _http=http) + client = self._make_one(project=self.PROJECT, credentials=creds, _http=http) conn = client._connection = _make_connection(resource) destination = client.dataset(self.DS_ID).table(destination_id) client.load_table_from_uri( - source_uri, destination, job_id=job_id, project='other-project', - location=self.LOCATION) + source_uri, + destination, + job_id=job_id, + project="other-project", + location=self.LOCATION, + ) # Check that load_table_from_uri actually starts the job. conn.api_request.assert_called_once_with( - method='POST', - path='/projects/other-project/jobs', - data=resource) + method="POST", path="/projects/other-project/jobs", data=resource + ) def test_load_table_from_uri_w_client_location(self): - job_id = 'this-is-a-job-id' - destination_id = 'destination_table' - source_uri = 'gs://example/source.csv' + job_id = "this-is-a-job-id" + destination_id = "destination_table" + source_uri = "gs://example/source.csv" resource = { - 'jobReference': { - 'projectId': 'other-project', - 'location': self.LOCATION, - 'jobId': job_id, + "jobReference": { + "projectId": "other-project", + "location": self.LOCATION, + "jobId": job_id, }, - 'configuration': { - 'load': { - 'sourceUris': [source_uri], - 'destinationTable': { - 'projectId': self.PROJECT, - 'datasetId': self.DS_ID, - 'tableId': destination_id, + "configuration": { + "load": { + "sourceUris": [source_uri], + "destinationTable": { + "projectId": self.PROJECT, + "datasetId": self.DS_ID, + "tableId": destination_id, }, - }, + } }, } creds = _make_credentials() http = object() client = self._make_one( - project=self.PROJECT, credentials=creds, _http=http, - location=self.LOCATION) + project=self.PROJECT, credentials=creds, _http=http, location=self.LOCATION + ) conn = client._connection = _make_connection(resource) client.load_table_from_uri( source_uri, # Test with string for table ID. - '{}.{}'.format( - self.DS_ID, - destination_id, - ), + "{}.{}".format(self.DS_ID, destination_id), job_id=job_id, - project='other-project') + project="other-project", + ) # Check that load_table_from_uri actually starts the job. conn.api_request.assert_called_once_with( - method='POST', - path='/projects/other-project/jobs', - data=resource) + method="POST", path="/projects/other-project/jobs", data=resource + ) @staticmethod - def _mock_requests_response(status_code, headers, content=b''): + def _mock_requests_response(status_code, headers, content=b""): return mock.Mock( - content=content, headers=headers, status_code=status_code, - spec=['content', 'headers', 'status_code']) + content=content, + headers=headers, + status_code=status_code, + spec=["content", "headers", "status_code"], + ) - def _mock_transport(self, status_code, headers, content=b''): - fake_transport = mock.Mock(spec=['request']) + def _mock_transport(self, status_code, headers, content=b""): + fake_transport = mock.Mock(spec=["request"]) fake_response = self._mock_requests_response( - status_code, headers, content=content) + status_code, headers, content=content + ) fake_transport.request.return_value = fake_response return fake_transport @@ -2395,29 +2363,30 @@ def _initiate_resumable_upload_helper(self, num_retries=None): from google.cloud.bigquery.job import SourceFormat # Create mocks to be checked for doing transport. - resumable_url = 'http://test.invalid?upload_id=hey-you' - response_headers = {'location': resumable_url} - fake_transport = self._mock_transport( - http_client.OK, response_headers) + resumable_url = "http://test.invalid?upload_id=hey-you" + response_headers = {"location": resumable_url} + fake_transport = self._mock_transport(http_client.OK, response_headers) client = self._make_one(project=self.PROJECT, _http=fake_transport) conn = client._connection = _make_connection() # Create some mock arguments and call the method under test. - data = b'goodbye gudbi gootbee' + data = b"goodbye gudbi gootbee" stream = io.BytesIO(data) config = LoadJobConfig() config.source_format = SourceFormat.CSV job = LoadJob(None, None, self.TABLE_REF, client, job_config=config) metadata = job.to_api_repr() upload, transport = client._initiate_resumable_upload( - stream, metadata, num_retries) + stream, metadata, num_retries + ) # Check the returned values. self.assertIsInstance(upload, ResumableUpload) upload_url = ( - 'https://www.googleapis.com/upload/bigquery/v2/projects/' + "https://www.googleapis.com/upload/bigquery/v2/projects/" + self.PROJECT - + '/jobs?uploadType=resumable') + + "/jobs?uploadType=resumable" + ) self.assertEqual(upload.upload_url, upload_url) expected_headers = _get_upload_headers(conn.USER_AGENT) self.assertEqual(upload._headers, expected_headers) @@ -2442,11 +2411,11 @@ def _initiate_resumable_upload_helper(self, num_retries=None): # Check the mocks. request_headers = expected_headers.copy() - request_headers['x-upload-content-type'] = _GENERIC_CONTENT_TYPE + request_headers["x-upload-content-type"] = _GENERIC_CONTENT_TYPE fake_transport.request.assert_called_once_with( - 'POST', + "POST", upload_url, - data=json.dumps(metadata).encode('utf-8'), + data=json.dumps(metadata).encode("utf-8"), headers=request_headers, ) @@ -2456,8 +2425,7 @@ def test__initiate_resumable_upload(self): def test__initiate_resumable_upload_with_retry(self): self._initiate_resumable_upload_helper(num_retries=11) - def _do_multipart_upload_success_helper( - self, get_boundary, num_retries=None): + def _do_multipart_upload_success_helper(self, get_boundary, num_retries=None): from google.cloud.bigquery.client import _get_upload_headers from google.cloud.bigquery.job import LoadJob from google.cloud.bigquery.job import LoadJobConfig @@ -2468,15 +2436,14 @@ def _do_multipart_upload_success_helper( conn = client._connection = _make_connection() # Create some mock arguments. - data = b'Bzzzz-zap \x00\x01\xf4' + data = b"Bzzzz-zap \x00\x01\xf4" stream = io.BytesIO(data) config = LoadJobConfig() config.source_format = SourceFormat.CSV job = LoadJob(None, None, self.TABLE_REF, client, job_config=config) metadata = job.to_api_repr() size = len(data) - response = client._do_multipart_upload( - stream, metadata, size, num_retries) + response = client._do_multipart_upload(stream, metadata, size, num_retries) # Check the mocks and the returned value. self.assertIs(response, fake_transport.request.return_value) @@ -2484,68 +2451,63 @@ def _do_multipart_upload_success_helper( get_boundary.assert_called_once_with() upload_url = ( - 'https://www.googleapis.com/upload/bigquery/v2/projects/' + "https://www.googleapis.com/upload/bigquery/v2/projects/" + self.PROJECT - + '/jobs?uploadType=multipart') + + "/jobs?uploadType=multipart" + ) payload = ( - b'--==0==\r\n' - + b'content-type: application/json; charset=UTF-8\r\n\r\n' - + json.dumps(metadata).encode('utf-8') + b'\r\n' - + b'--==0==\r\n' - + b'content-type: */*\r\n\r\n' - + data + b'\r\n' - + b'--==0==--') + b"--==0==\r\n" + + b"content-type: application/json; charset=UTF-8\r\n\r\n" + + json.dumps(metadata).encode("utf-8") + + b"\r\n" + + b"--==0==\r\n" + + b"content-type: */*\r\n\r\n" + + data + + b"\r\n" + + b"--==0==--" + ) headers = _get_upload_headers(conn.USER_AGENT) - headers['content-type'] = b'multipart/related; boundary="==0=="' + headers["content-type"] = b'multipart/related; boundary="==0=="' fake_transport.request.assert_called_once_with( - 'POST', - upload_url, - data=payload, - headers=headers, + "POST", upload_url, data=payload, headers=headers ) - @mock.patch(u'google.resumable_media._upload.get_boundary', - return_value=b'==0==') + @mock.patch(u"google.resumable_media._upload.get_boundary", return_value=b"==0==") def test__do_multipart_upload(self, get_boundary): self._do_multipart_upload_success_helper(get_boundary) - @mock.patch(u'google.resumable_media._upload.get_boundary', - return_value=b'==0==') + @mock.patch(u"google.resumable_media._upload.get_boundary", return_value=b"==0==") def test__do_multipart_upload_with_retry(self, get_boundary): self._do_multipart_upload_success_helper(get_boundary, num_retries=8) def test_copy_table(self): from google.cloud.bigquery.job import CopyJob - JOB = 'job_name' - SOURCE = 'source_table' - DESTINATION = 'destination_table' + JOB = "job_name" + SOURCE = "source_table" + DESTINATION = "destination_table" RESOURCE = { - 'jobReference': { - 'projectId': self.PROJECT, - 'jobId': JOB, - }, - 'configuration': { - 'copy': { - 'sourceTables': [ + "jobReference": {"projectId": self.PROJECT, "jobId": JOB}, + "configuration": { + "copy": { + "sourceTables": [ { - 'projectId': self.PROJECT, - 'datasetId': self.DS_ID, - 'tableId': SOURCE, - }, + "projectId": self.PROJECT, + "datasetId": self.DS_ID, + "tableId": SOURCE, + } ], - 'destinationTable': { - 'projectId': self.PROJECT, - 'datasetId': self.DS_ID, - 'tableId': DESTINATION, + "destinationTable": { + "projectId": self.PROJECT, + "datasetId": self.DS_ID, + "tableId": DESTINATION, }, - }, + } }, } creds = _make_credentials() http = object() - client = self._make_one(project=self.PROJECT, credentials=creds, - _http=http) + client = self._make_one(project=self.PROJECT, credentials=creds, _http=http) conn = client._connection = _make_connection(RESOURCE) dataset = client.dataset(self.DS_ID) source = dataset.table(SOURCE) @@ -2555,9 +2517,8 @@ def test_copy_table(self): # Check that copy_table actually starts the job. conn.api_request.assert_called_once_with( - method='POST', - path='/projects/%s/jobs' % self.PROJECT, - data=RESOURCE) + method="POST", path="/projects/%s/jobs" % self.PROJECT, data=RESOURCE + ) self.assertIsInstance(job, CopyJob) self.assertIs(job._client, client) @@ -2566,7 +2527,7 @@ def test_copy_table(self): self.assertIs(job.destination, destination) conn = client._connection = _make_connection(RESOURCE) - source2 = dataset.table(SOURCE + '2') + source2 = dataset.table(SOURCE + "2") job = client.copy_table([source, source2], destination, job_id=JOB) self.assertIsInstance(job, CopyJob) self.assertIs(job._client, client) @@ -2575,125 +2536,122 @@ def test_copy_table(self): self.assertIs(job.destination, destination) def test_copy_table_w_explicit_project(self): - job_id = 'this-is-a-job-id' - source_id = 'source_table' - destination_id = 'destination_table' + job_id = "this-is-a-job-id" + source_id = "source_table" + destination_id = "destination_table" resource = { - 'jobReference': { - 'projectId': 'other-project', - 'location': self.LOCATION, - 'jobId': job_id, + "jobReference": { + "projectId": "other-project", + "location": self.LOCATION, + "jobId": job_id, }, - 'configuration': { - 'copy': { - 'sourceTables': [ + "configuration": { + "copy": { + "sourceTables": [ { - 'projectId': self.PROJECT, - 'datasetId': self.DS_ID, - 'tableId': source_id, - }, + "projectId": self.PROJECT, + "datasetId": self.DS_ID, + "tableId": source_id, + } ], - 'destinationTable': { - 'projectId': self.PROJECT, - 'datasetId': self.DS_ID, - 'tableId': destination_id, + "destinationTable": { + "projectId": self.PROJECT, + "datasetId": self.DS_ID, + "tableId": destination_id, }, - }, + } }, } creds = _make_credentials() http = object() - client = self._make_one( - project=self.PROJECT, credentials=creds, _http=http) + client = self._make_one(project=self.PROJECT, credentials=creds, _http=http) conn = client._connection = _make_connection(resource) dataset = client.dataset(self.DS_ID) source = dataset.table(source_id) destination = dataset.table(destination_id) client.copy_table( - source, destination, job_id=job_id, project='other-project', - location=self.LOCATION) + source, + destination, + job_id=job_id, + project="other-project", + location=self.LOCATION, + ) # Check that copy_table actually starts the job. conn.api_request.assert_called_once_with( - method='POST', - path='/projects/other-project/jobs', - data=resource, + method="POST", path="/projects/other-project/jobs", data=resource ) def test_copy_table_w_client_location(self): - job_id = 'this-is-a-job-id' - source_id = 'source_table' - destination_id = 'destination_table' + job_id = "this-is-a-job-id" + source_id = "source_table" + destination_id = "destination_table" resource = { - 'jobReference': { - 'projectId': 'other-project', - 'location': self.LOCATION, - 'jobId': job_id, + "jobReference": { + "projectId": "other-project", + "location": self.LOCATION, + "jobId": job_id, }, - 'configuration': { - 'copy': { - 'sourceTables': [ + "configuration": { + "copy": { + "sourceTables": [ { - 'projectId': self.PROJECT, - 'datasetId': self.DS_ID, - 'tableId': source_id, - }, + "projectId": self.PROJECT, + "datasetId": self.DS_ID, + "tableId": source_id, + } ], - 'destinationTable': { - 'projectId': self.PROJECT, - 'datasetId': self.DS_ID, - 'tableId': destination_id, + "destinationTable": { + "projectId": self.PROJECT, + "datasetId": self.DS_ID, + "tableId": destination_id, }, - }, + } }, } creds = _make_credentials() http = object() client = self._make_one( - project=self.PROJECT, credentials=creds, _http=http, - location=self.LOCATION) + project=self.PROJECT, credentials=creds, _http=http, location=self.LOCATION + ) conn = client._connection = _make_connection(resource) client.copy_table( # Test with string for table IDs. - '{}.{}'.format(self.DS_ID, source_id), - '{}.{}'.format(self.DS_ID, destination_id), - job_id=job_id, project='other-project') + "{}.{}".format(self.DS_ID, source_id), + "{}.{}".format(self.DS_ID, destination_id), + job_id=job_id, + project="other-project", + ) # Check that copy_table actually starts the job. conn.api_request.assert_called_once_with( - method='POST', - path='/projects/other-project/jobs', - data=resource, + method="POST", path="/projects/other-project/jobs", data=resource ) def test_extract_table(self): from google.cloud.bigquery.job import ExtractJob - JOB = 'job_id' - SOURCE = 'source_table' - DESTINATION = 'gs://bucket_name/object_name' + JOB = "job_id" + SOURCE = "source_table" + DESTINATION = "gs://bucket_name/object_name" RESOURCE = { - 'jobReference': { - 'projectId': self.PROJECT, - 'jobId': JOB, - }, - 'configuration': { - 'extract': { - 'sourceTable': { - 'projectId': self.PROJECT, - 'datasetId': self.DS_ID, - 'tableId': SOURCE, + "jobReference": {"projectId": self.PROJECT, "jobId": JOB}, + "configuration": { + "extract": { + "sourceTable": { + "projectId": self.PROJECT, + "datasetId": self.DS_ID, + "tableId": SOURCE, }, - 'destinationUris': [DESTINATION], - }, + "destinationUris": [DESTINATION], + } }, } creds = _make_credentials() http = object() - client = self._make_one(project=self.PROJECT, credentials=creds, - _http=http) + client = self._make_one(project=self.PROJECT, credentials=creds, _http=http) conn = client._connection = _make_connection(RESOURCE) dataset = client.dataset(self.DS_ID) source = dataset.table(SOURCE) @@ -2702,9 +2660,8 @@ def test_extract_table(self): # Check that extract_table actually starts the job. conn.api_request.assert_called_once_with( - method='POST', - path='/projects/PROJECT/jobs', - data=RESOURCE) + method="POST", path="/projects/PROJECT/jobs", data=RESOURCE + ) # Check the job resource. self.assertIsInstance(job, ExtractJob) @@ -2714,83 +2671,85 @@ def test_extract_table(self): self.assertEqual(list(job.destination_uris), [DESTINATION]) def test_extract_table_w_explicit_project(self): - job_id = 'job_id' - source_id = 'source_table' - destination = 'gs://bucket_name/object_name' + job_id = "job_id" + source_id = "source_table" + destination = "gs://bucket_name/object_name" resource = { - 'jobReference': { - 'projectId': 'other-project', - 'location': self.LOCATION, - 'jobId': job_id, + "jobReference": { + "projectId": "other-project", + "location": self.LOCATION, + "jobId": job_id, }, - 'configuration': { - 'extract': { - 'sourceTable': { - 'projectId': self.PROJECT, - 'datasetId': self.DS_ID, - 'tableId': source_id, + "configuration": { + "extract": { + "sourceTable": { + "projectId": self.PROJECT, + "datasetId": self.DS_ID, + "tableId": source_id, }, - 'destinationUris': [destination], - }, + "destinationUris": [destination], + } }, } creds = _make_credentials() http = object() - client = self._make_one( - project=self.PROJECT, credentials=creds, _http=http) + client = self._make_one(project=self.PROJECT, credentials=creds, _http=http) conn = client._connection = _make_connection(resource) dataset = client.dataset(self.DS_ID) source = dataset.table(source_id) client.extract_table( - source, destination, job_id=job_id, project='other-project', - location=self.LOCATION) + source, + destination, + job_id=job_id, + project="other-project", + location=self.LOCATION, + ) # Check that extract_table actually starts the job. conn.api_request.assert_called_once_with( - method='POST', - path='/projects/other-project/jobs', - data=resource, + method="POST", path="/projects/other-project/jobs", data=resource ) def test_extract_table_w_client_location(self): - job_id = 'job_id' - source_id = 'source_table' - destination = 'gs://bucket_name/object_name' + job_id = "job_id" + source_id = "source_table" + destination = "gs://bucket_name/object_name" resource = { - 'jobReference': { - 'projectId': 'other-project', - 'location': self.LOCATION, - 'jobId': job_id, + "jobReference": { + "projectId": "other-project", + "location": self.LOCATION, + "jobId": job_id, }, - 'configuration': { - 'extract': { - 'sourceTable': { - 'projectId': self.PROJECT, - 'datasetId': self.DS_ID, - 'tableId': source_id, + "configuration": { + "extract": { + "sourceTable": { + "projectId": self.PROJECT, + "datasetId": self.DS_ID, + "tableId": source_id, }, - 'destinationUris': [destination], - }, + "destinationUris": [destination], + } }, } creds = _make_credentials() http = object() client = self._make_one( - project=self.PROJECT, credentials=creds, _http=http, - location=self.LOCATION) + project=self.PROJECT, credentials=creds, _http=http, location=self.LOCATION + ) conn = client._connection = _make_connection(resource) client.extract_table( # Test with string for table ID. - '{}.{}'.format(self.DS_ID, source_id), - destination, job_id=job_id, project='other-project') + "{}.{}".format(self.DS_ID, source_id), + destination, + job_id=job_id, + project="other-project", + ) # Check that extract_table actually starts the job. conn.api_request.assert_called_once_with( - method='POST', - path='/projects/other-project/jobs', - data=resource, + method="POST", path="/projects/other-project/jobs", data=resource ) def test_extract_table_generated_job_id(self): @@ -2798,46 +2757,40 @@ def test_extract_table_generated_job_id(self): from google.cloud.bigquery.job import ExtractJobConfig from google.cloud.bigquery.job import DestinationFormat - JOB = 'job_id' - SOURCE = 'source_table' - DESTINATION = 'gs://bucket_name/object_name' + JOB = "job_id" + SOURCE = "source_table" + DESTINATION = "gs://bucket_name/object_name" RESOURCE = { - 'jobReference': { - 'projectId': self.PROJECT, - 'jobId': JOB, - }, - 'configuration': { - 'extract': { - 'sourceTable': { - 'projectId': self.PROJECT, - 'datasetId': self.DS_ID, - 'tableId': SOURCE, + "jobReference": {"projectId": self.PROJECT, "jobId": JOB}, + "configuration": { + "extract": { + "sourceTable": { + "projectId": self.PROJECT, + "datasetId": self.DS_ID, + "tableId": SOURCE, }, - 'destinationUris': [DESTINATION], - 'destinationFormat': 'NEWLINE_DELIMITED_JSON', - }, + "destinationUris": [DESTINATION], + "destinationFormat": "NEWLINE_DELIMITED_JSON", + } }, } creds = _make_credentials() http = object() - client = self._make_one(project=self.PROJECT, credentials=creds, - _http=http) + client = self._make_one(project=self.PROJECT, credentials=creds, _http=http) conn = client._connection = _make_connection(RESOURCE) dataset = client.dataset(self.DS_ID) source = dataset.table(SOURCE) job_config = ExtractJobConfig() - job_config.destination_format = ( - DestinationFormat.NEWLINE_DELIMITED_JSON) + job_config.destination_format = DestinationFormat.NEWLINE_DELIMITED_JSON job = client.extract_table(source, DESTINATION, job_config=job_config) # Check that extract_table actually starts the job. conn.api_request.assert_called_once() _, req = conn.api_request.call_args - self.assertEqual(req['method'], 'POST') - self.assertEqual(req['path'], '/projects/PROJECT/jobs') - self.assertIsInstance( - req['data']['jobReference']['jobId'], six.string_types) + self.assertEqual(req["method"], "POST") + self.assertEqual(req["path"], "/projects/PROJECT/jobs") + self.assertIsInstance(req["data"]["jobReference"]["jobId"], six.string_types) # Check the job resource. self.assertIsInstance(job, ExtractJob) @@ -2848,74 +2801,56 @@ def test_extract_table_generated_job_id(self): def test_extract_table_w_destination_uris(self): from google.cloud.bigquery.job import ExtractJob - JOB = 'job_id' - SOURCE = 'source_table' - DESTINATION1 = 'gs://bucket_name/object_one' - DESTINATION2 = 'gs://bucket_name/object_two' + JOB = "job_id" + SOURCE = "source_table" + DESTINATION1 = "gs://bucket_name/object_one" + DESTINATION2 = "gs://bucket_name/object_two" RESOURCE = { - 'jobReference': { - 'projectId': self.PROJECT, - 'jobId': JOB, - }, - 'configuration': { - 'extract': { - 'sourceTable': { - 'projectId': self.PROJECT, - 'datasetId': self.DS_ID, - 'tableId': SOURCE, + "jobReference": {"projectId": self.PROJECT, "jobId": JOB}, + "configuration": { + "extract": { + "sourceTable": { + "projectId": self.PROJECT, + "datasetId": self.DS_ID, + "tableId": SOURCE, }, - 'destinationUris': [ - DESTINATION1, - DESTINATION2, - ], - }, + "destinationUris": [DESTINATION1, DESTINATION2], + } }, } creds = _make_credentials() http = object() - client = self._make_one(project=self.PROJECT, credentials=creds, - _http=http) + client = self._make_one(project=self.PROJECT, credentials=creds, _http=http) conn = client._connection = _make_connection(RESOURCE) dataset = client.dataset(self.DS_ID) source = dataset.table(SOURCE) - job = client.extract_table( - source, [DESTINATION1, DESTINATION2], job_id=JOB) + job = client.extract_table(source, [DESTINATION1, DESTINATION2], job_id=JOB) # Check that extract_table actually starts the job. conn.api_request.assert_called_once() _, req = conn.api_request.call_args - self.assertEqual(req['method'], 'POST') - self.assertEqual(req['path'], '/projects/PROJECT/jobs') + self.assertEqual(req["method"], "POST") + self.assertEqual(req["path"], "/projects/PROJECT/jobs") # Check the job resource. self.assertIsInstance(job, ExtractJob) self.assertIs(job._client, client) self.assertEqual(job.job_id, JOB) self.assertEqual(job.source, source) - self.assertEqual( - list(job.destination_uris), [DESTINATION1, DESTINATION2]) + self.assertEqual(list(job.destination_uris), [DESTINATION1, DESTINATION2]) def test_query_defaults(self): from google.cloud.bigquery.job import QueryJob - QUERY = 'select count(*) from persons' + QUERY = "select count(*) from persons" RESOURCE = { - 'jobReference': { - 'projectId': self.PROJECT, - 'jobId': 'some-random-id', - }, - 'configuration': { - 'query': { - 'query': QUERY, - 'useLegacySql': False, - }, - }, + "jobReference": {"projectId": self.PROJECT, "jobId": "some-random-id"}, + "configuration": {"query": {"query": QUERY, "useLegacySql": False}}, } creds = _make_credentials() http = object() - client = self._make_one(project=self.PROJECT, credentials=creds, - _http=http) + client = self._make_one(project=self.PROJECT, credentials=creds, _http=http) conn = client._connection = _make_connection(RESOURCE) job = client.query(QUERY) @@ -2930,68 +2865,59 @@ def test_query_defaults(self): # Check that query actually starts the job. conn.api_request.assert_called_once() _, req = conn.api_request.call_args - self.assertEqual(req['method'], 'POST') - self.assertEqual(req['path'], '/projects/PROJECT/jobs') - sent = req['data'] - self.assertIsInstance( - sent['jobReference']['jobId'], six.string_types) - sent_config = sent['configuration']['query'] - self.assertEqual(sent_config['query'], QUERY) - self.assertFalse(sent_config['useLegacySql']) + self.assertEqual(req["method"], "POST") + self.assertEqual(req["path"], "/projects/PROJECT/jobs") + sent = req["data"] + self.assertIsInstance(sent["jobReference"]["jobId"], six.string_types) + sent_config = sent["configuration"]["query"] + self.assertEqual(sent_config["query"], QUERY) + self.assertFalse(sent_config["useLegacySql"]) def test_query_w_explicit_project(self): - job_id = 'some-job-id' - query = 'select count(*) from persons' + job_id = "some-job-id" + query = "select count(*) from persons" resource = { - 'jobReference': { - 'projectId': 'other-project', - 'location': self.LOCATION, - 'jobId': job_id, - }, - 'configuration': { - 'query': { - 'query': query, - 'useLegacySql': False, - }, + "jobReference": { + "projectId": "other-project", + "location": self.LOCATION, + "jobId": job_id, }, + "configuration": {"query": {"query": query, "useLegacySql": False}}, } creds = _make_credentials() http = object() - client = self._make_one( - project=self.PROJECT, credentials=creds, _http=http) + client = self._make_one(project=self.PROJECT, credentials=creds, _http=http) conn = client._connection = _make_connection(resource) client.query( - query, job_id=job_id, project='other-project', - location=self.LOCATION) + query, job_id=job_id, project="other-project", location=self.LOCATION + ) # Check that query actually starts the job. conn.api_request.assert_called_once_with( - method='POST', - path='/projects/other-project/jobs', - data=resource, + method="POST", path="/projects/other-project/jobs", data=resource ) def test_query_w_explicit_job_config(self): - job_id = 'some-job-id' - query = 'select count(*) from persons' + job_id = "some-job-id" + query = "select count(*) from persons" resource = { - 'jobReference': { - 'jobId': job_id, - 'projectId': self.PROJECT, - 'location': self.LOCATION, + "jobReference": { + "jobId": job_id, + "projectId": self.PROJECT, + "location": self.LOCATION, }, - 'configuration': { - 'query': { - 'query': query, - 'defaultDataset': { - 'projectId': self.PROJECT, - 'datasetId': 'some-dataset', + "configuration": { + "query": { + "query": query, + "defaultDataset": { + "projectId": self.PROJECT, + "datasetId": "some-dataset", }, - 'useLegacySql': False, - 'useQueryCache': True, - 'maximumBytesBilled': '2000', - }, + "useLegacySql": False, + "useQueryCache": True, + "maximumBytesBilled": "2000", + } }, } @@ -2999,14 +2925,19 @@ def test_query_w_explicit_job_config(self): http = object() from google.cloud.bigquery import QueryJobConfig, DatasetReference + default_job_config = QueryJobConfig() default_job_config.default_dataset = DatasetReference( - self.PROJECT, 'some-dataset') + self.PROJECT, "some-dataset" + ) default_job_config.maximum_bytes_billed = 1000 client = self._make_one( - project=self.PROJECT, credentials=creds, - _http=http, default_query_job_config=default_job_config) + project=self.PROJECT, + credentials=creds, + _http=http, + default_query_job_config=default_job_config, + ) conn = client._connection = _make_connection(resource) job_config = QueryJobConfig() @@ -3014,33 +2945,31 @@ def test_query_w_explicit_job_config(self): job_config.maximum_bytes_billed = 2000 client.query( - query, job_id=job_id, location=self.LOCATION, - job_config=job_config) + query, job_id=job_id, location=self.LOCATION, job_config=job_config + ) # Check that query actually starts the job. conn.api_request.assert_called_once_with( - method='POST', - path='/projects/PROJECT/jobs', - data=resource, + method="POST", path="/projects/PROJECT/jobs", data=resource ) def test_query_w_explicit_job_config_override(self): - job_id = 'some-job-id' - query = 'select count(*) from persons' + job_id = "some-job-id" + query = "select count(*) from persons" resource = { - 'jobReference': { - 'jobId': job_id, - 'projectId': self.PROJECT, - 'location': self.LOCATION, + "jobReference": { + "jobId": job_id, + "projectId": self.PROJECT, + "location": self.LOCATION, }, - 'configuration': { - 'query': { - 'query': query, - 'defaultDataset': None, - 'useLegacySql': False, - 'useQueryCache': True, - 'maximumBytesBilled': '2000', - }, + "configuration": { + "query": { + "query": query, + "defaultDataset": None, + "useLegacySql": False, + "useQueryCache": True, + "maximumBytesBilled": "2000", + } }, } @@ -3048,14 +2977,19 @@ def test_query_w_explicit_job_config_override(self): http = object() from google.cloud.bigquery import QueryJobConfig, DatasetReference + default_job_config = QueryJobConfig() default_job_config.default_dataset = DatasetReference( - self.PROJECT, 'some-dataset') + self.PROJECT, "some-dataset" + ) default_job_config.maximum_bytes_billed = 1000 client = self._make_one( - project=self.PROJECT, credentials=creds, _http=http, - default_query_job_config=default_job_config) + project=self.PROJECT, + credentials=creds, + _http=http, + default_query_job_config=default_job_config, + ) conn = client._connection = _make_connection(resource) job_config = QueryJobConfig() @@ -3064,32 +2998,29 @@ def test_query_w_explicit_job_config_override(self): job_config.default_dataset = None client.query( - query, job_id=job_id, location=self.LOCATION, - job_config=job_config, + query, job_id=job_id, location=self.LOCATION, job_config=job_config ) # Check that query actually starts the job. conn.api_request.assert_called_once_with( - method='POST', - path='/projects/PROJECT/jobs', - data=resource, + method="POST", path="/projects/PROJECT/jobs", data=resource ) def test_query_w_client_default_config_no_incoming(self): - job_id = 'some-job-id' - query = 'select count(*) from persons' + job_id = "some-job-id" + query = "select count(*) from persons" resource = { - 'jobReference': { - 'jobId': job_id, - 'projectId': self.PROJECT, - 'location': self.LOCATION, + "jobReference": { + "jobId": job_id, + "projectId": self.PROJECT, + "location": self.LOCATION, }, - 'configuration': { - 'query': { - 'query': query, - 'useLegacySql': False, - 'maximumBytesBilled': '1000', - }, + "configuration": { + "query": { + "query": query, + "useLegacySql": False, + "maximumBytesBilled": "1000", + } }, } @@ -3097,78 +3028,65 @@ def test_query_w_client_default_config_no_incoming(self): http = object() from google.cloud.bigquery import QueryJobConfig + default_job_config = QueryJobConfig() default_job_config.maximum_bytes_billed = 1000 client = self._make_one( - project=self.PROJECT, credentials=creds, _http=http, - default_query_job_config=default_job_config) + project=self.PROJECT, + credentials=creds, + _http=http, + default_query_job_config=default_job_config, + ) conn = client._connection = _make_connection(resource) - client.query( - query, job_id=job_id, location=self.LOCATION) + client.query(query, job_id=job_id, location=self.LOCATION) # Check that query actually starts the job. conn.api_request.assert_called_once_with( - method='POST', - path='/projects/PROJECT/jobs', - data=resource, + method="POST", path="/projects/PROJECT/jobs", data=resource ) def test_query_w_client_location(self): - job_id = 'some-job-id' - query = 'select count(*) from persons' + job_id = "some-job-id" + query = "select count(*) from persons" resource = { - 'jobReference': { - 'projectId': 'other-project', - 'location': self.LOCATION, - 'jobId': job_id, - }, - 'configuration': { - 'query': { - 'query': query, - 'useLegacySql': False, - }, + "jobReference": { + "projectId": "other-project", + "location": self.LOCATION, + "jobId": job_id, }, + "configuration": {"query": {"query": query, "useLegacySql": False}}, } creds = _make_credentials() http = object() client = self._make_one( - project=self.PROJECT, credentials=creds, _http=http, - location=self.LOCATION) + project=self.PROJECT, credentials=creds, _http=http, location=self.LOCATION + ) conn = client._connection = _make_connection(resource) - client.query( - query, job_id=job_id, project='other-project') + client.query(query, job_id=job_id, project="other-project") # Check that query actually starts the job. conn.api_request.assert_called_once_with( - method='POST', - path='/projects/other-project/jobs', - data=resource, + method="POST", path="/projects/other-project/jobs", data=resource ) def test_query_detect_location(self): - query = 'select count(*) from persons' - resource_location = 'EU' + query = "select count(*) from persons" + resource_location = "EU" resource = { - 'jobReference': { - 'projectId': self.PROJECT, + "jobReference": { + "projectId": self.PROJECT, # Location not set in request, but present in the response. - 'location': resource_location, - 'jobId': 'some-random-id', - }, - 'configuration': { - 'query': { - 'query': query, - 'useLegacySql': False, - }, + "location": resource_location, + "jobId": "some-random-id", }, + "configuration": {"query": {"query": query, "useLegacySql": False}}, } creds = _make_credentials() http = object() - client = self._make_one(project=self.PROJECT, credentials=creds, - _http=http) + client = self._make_one(project=self.PROJECT, credentials=creds, _http=http) conn = client._connection = _make_connection(resource) job = client.query(query) @@ -3178,36 +3096,30 @@ def test_query_detect_location(self): # Check that request did not contain a location. conn.api_request.assert_called_once() _, req = conn.api_request.call_args - sent = req['data'] - self.assertIsNone(sent['jobReference'].get('location')) + sent = req["data"] + self.assertIsNone(sent["jobReference"].get("location")) def test_query_w_udf_resources(self): from google.cloud.bigquery.job import QueryJob from google.cloud.bigquery.job import QueryJobConfig from google.cloud.bigquery.query import UDFResource - RESOURCE_URI = 'gs://some-bucket/js/lib.js' - JOB = 'job_name' - QUERY = 'select count(*) from persons' + RESOURCE_URI = "gs://some-bucket/js/lib.js" + JOB = "job_name" + QUERY = "select count(*) from persons" RESOURCE = { - 'jobReference': { - 'projectId': self.PROJECT, - 'jobId': JOB, - }, - 'configuration': { - 'query': { - 'query': QUERY, - 'useLegacySql': True, - 'userDefinedFunctionResources': [ - {'resourceUri': RESOURCE_URI}, - ], - }, + "jobReference": {"projectId": self.PROJECT, "jobId": JOB}, + "configuration": { + "query": { + "query": QUERY, + "useLegacySql": True, + "userDefinedFunctionResources": [{"resourceUri": RESOURCE_URI}], + } }, } creds = _make_credentials() http = object() - client = self._make_one(project=self.PROJECT, credentials=creds, - _http=http) + client = self._make_one(project=self.PROJECT, credentials=creds, _http=http) conn = client._connection = _make_connection(RESOURCE) udf_resources = [UDFResource("resourceUri", RESOURCE_URI)] config = QueryJobConfig() @@ -3226,50 +3138,46 @@ def test_query_w_udf_resources(self): # Check that query actually starts the job. conn.api_request.assert_called_once() _, req = conn.api_request.call_args - self.assertEqual(req['method'], 'POST') - self.assertEqual(req['path'], '/projects/PROJECT/jobs') - sent = req['data'] - self.assertIsInstance( - sent['jobReference']['jobId'], six.string_types) - sent_config = sent['configuration']['query'] - self.assertEqual(sent_config['query'], QUERY) - self.assertTrue(sent_config['useLegacySql']) + self.assertEqual(req["method"], "POST") + self.assertEqual(req["path"], "/projects/PROJECT/jobs") + sent = req["data"] + self.assertIsInstance(sent["jobReference"]["jobId"], six.string_types) + sent_config = sent["configuration"]["query"] + self.assertEqual(sent_config["query"], QUERY) + self.assertTrue(sent_config["useLegacySql"]) self.assertEqual( - sent_config['userDefinedFunctionResources'][0], - {'resourceUri': RESOURCE_URI}) + sent_config["userDefinedFunctionResources"][0], + {"resourceUri": RESOURCE_URI}, + ) def test_query_w_query_parameters(self): from google.cloud.bigquery.job import QueryJob from google.cloud.bigquery.job import QueryJobConfig from google.cloud.bigquery.query import ScalarQueryParameter - JOB = 'job_name' - QUERY = 'select count(*) from persons' + JOB = "job_name" + QUERY = "select count(*) from persons" RESOURCE = { - 'jobReference': { - 'projectId': self.PROJECT, - 'jobId': JOB, - }, - 'configuration': { - 'query': { - 'query': QUERY, - 'useLegacySql': False, - 'queryParameters': [ + "jobReference": {"projectId": self.PROJECT, "jobId": JOB}, + "configuration": { + "query": { + "query": QUERY, + "useLegacySql": False, + "queryParameters": [ { - 'name': 'foo', - 'parameterType': {'type': 'INT64'}, - 'parameterValue': {'value': '123'} - }, + "name": "foo", + "parameterType": {"type": "INT64"}, + "parameterValue": {"value": "123"}, + } ], - }, + } }, } creds = _make_credentials() http = object() - client = self._make_one(project=self.PROJECT, credentials=creds, - _http=http) + client = self._make_one(project=self.PROJECT, credentials=creds, _http=http) conn = client._connection = _make_connection(RESOURCE) - query_parameters = [ScalarQueryParameter('foo', 'INT64', 123)] + query_parameters = [ScalarQueryParameter("foo", "INT64", 123)] config = QueryJobConfig() config.query_parameters = query_parameters @@ -3285,34 +3193,34 @@ def test_query_w_query_parameters(self): # Check that query actually starts the job. conn.api_request.assert_called_once() _, req = conn.api_request.call_args - self.assertEqual(req['method'], 'POST') - self.assertEqual(req['path'], '/projects/PROJECT/jobs') - sent = req['data'] - self.assertEqual(sent['jobReference']['jobId'], JOB) - sent_config = sent['configuration']['query'] - self.assertEqual(sent_config['query'], QUERY) - self.assertFalse(sent_config['useLegacySql']) + self.assertEqual(req["method"], "POST") + self.assertEqual(req["path"], "/projects/PROJECT/jobs") + sent = req["data"] + self.assertEqual(sent["jobReference"]["jobId"], JOB) + sent_config = sent["configuration"]["query"] + self.assertEqual(sent_config["query"], QUERY) + self.assertFalse(sent_config["useLegacySql"]) self.assertEqual( - sent_config['queryParameters'][0], + sent_config["queryParameters"][0], { - 'name': 'foo', - 'parameterType': {'type': 'INT64'}, - 'parameterValue': {'value': '123'} - }) + "name": "foo", + "parameterType": {"type": "INT64"}, + "parameterValue": {"value": "123"}, + }, + ) def test_insert_rows_wo_schema(self): from google.cloud.bigquery.table import Table, _TABLE_HAS_NO_SCHEMA creds = _make_credentials() http = object() - client = self._make_one(project=self.PROJECT, credentials=creds, - _http=http) + client = self._make_one(project=self.PROJECT, credentials=creds, _http=http) table = Table(self.TABLE_REF) ROWS = [ - ('Phred Phlyntstone', 32), - ('Bharney Rhubble', 33), - ('Wylma Phlyntstone', 29), - ('Bhettye Rhubble', 27), + ("Phred Phlyntstone", 32), + ("Bharney Rhubble", 33), + ("Wylma Phlyntstone", 29), + ("Bhettye Rhubble", 27), ] with self.assertRaises(ValueError) as exc: @@ -3328,55 +3236,53 @@ def test_insert_rows_w_schema(self): from google.cloud.bigquery.table import SchemaField WHEN_TS = 1437767599.006 - WHEN = datetime.datetime.utcfromtimestamp(WHEN_TS).replace( - tzinfo=UTC) - PATH = 'projects/%s/datasets/%s/tables/%s/insertAll' % ( - self.PROJECT, self.DS_ID, self.TABLE_ID) + WHEN = datetime.datetime.utcfromtimestamp(WHEN_TS).replace(tzinfo=UTC) + PATH = "projects/%s/datasets/%s/tables/%s/insertAll" % ( + self.PROJECT, + self.DS_ID, + self.TABLE_ID, + ) creds = _make_credentials() http = object() - client = self._make_one(project=self.PROJECT, credentials=creds, - _http=http) + client = self._make_one(project=self.PROJECT, credentials=creds, _http=http) conn = client._connection = _make_connection({}) schema = [ - SchemaField('full_name', 'STRING', mode='REQUIRED'), - SchemaField('age', 'INTEGER', mode='REQUIRED'), - SchemaField('joined', 'TIMESTAMP', mode='NULLABLE'), + SchemaField("full_name", "STRING", mode="REQUIRED"), + SchemaField("age", "INTEGER", mode="REQUIRED"), + SchemaField("joined", "TIMESTAMP", mode="NULLABLE"), ] ROWS = [ - ('Phred Phlyntstone', 32, _datetime_to_rfc3339(WHEN)), - ('Bharney Rhubble', 33, WHEN + datetime.timedelta(seconds=1)), - ('Wylma Phlyntstone', 29, WHEN + datetime.timedelta(seconds=2)), - ('Bhettye Rhubble', 27, None), + ("Phred Phlyntstone", 32, _datetime_to_rfc3339(WHEN)), + ("Bharney Rhubble", 33, WHEN + datetime.timedelta(seconds=1)), + ("Wylma Phlyntstone", 29, WHEN + datetime.timedelta(seconds=2)), + ("Bhettye Rhubble", 27, None), ] def _row_data(row): joined = row[2] if isinstance(row[2], datetime.datetime): joined = _microseconds_from_datetime(joined) * 1e-6 - return {'full_name': row[0], - 'age': str(row[1]), - 'joined': joined} + return {"full_name": row[0], "age": str(row[1]), "joined": joined} SENT = { - 'rows': [{ - 'json': _row_data(row), - 'insertId': str(i), - } for i, row in enumerate(ROWS)], + "rows": [ + {"json": _row_data(row), "insertId": str(i)} + for i, row in enumerate(ROWS) + ] } - with mock.patch('uuid.uuid4', side_effect=map(str, range(len(ROWS)))): + with mock.patch("uuid.uuid4", side_effect=map(str, range(len(ROWS)))): # Test with using string IDs for the table. errors = client.insert_rows( - '{}.{}'.format(self.DS_ID, self.TABLE_ID), - ROWS, - selected_fields=schema) + "{}.{}".format(self.DS_ID, self.TABLE_ID), ROWS, selected_fields=schema + ) self.assertEqual(len(errors), 0) conn.api_request.assert_called_once() _, req = conn.api_request.call_args - self.assertEqual(req['method'], 'POST') - self.assertEqual(req['path'], '/%s' % PATH) - self.assertEqual(req['data'], SENT) + self.assertEqual(req["method"], "POST") + self.assertEqual(req["path"], "/%s" % PATH) + self.assertEqual(req["data"], SENT) def test_insert_rows_w_list_of_dictionaries(self): import datetime @@ -3386,152 +3292,163 @@ def test_insert_rows_w_list_of_dictionaries(self): from google.cloud.bigquery.table import Table, SchemaField WHEN_TS = 1437767599.006 - WHEN = datetime.datetime.utcfromtimestamp(WHEN_TS).replace( - tzinfo=UTC) - PATH = 'projects/%s/datasets/%s/tables/%s/insertAll' % ( - self.PROJECT, self.DS_ID, self.TABLE_ID) + WHEN = datetime.datetime.utcfromtimestamp(WHEN_TS).replace(tzinfo=UTC) + PATH = "projects/%s/datasets/%s/tables/%s/insertAll" % ( + self.PROJECT, + self.DS_ID, + self.TABLE_ID, + ) creds = _make_credentials() http = object() - client = self._make_one(project=self.PROJECT, credentials=creds, - _http=http) + client = self._make_one(project=self.PROJECT, credentials=creds, _http=http) conn = client._connection = _make_connection({}) schema = [ - SchemaField('full_name', 'STRING', mode='REQUIRED'), - SchemaField('age', 'INTEGER', mode='REQUIRED'), - SchemaField('joined', 'TIMESTAMP', mode='NULLABLE'), + SchemaField("full_name", "STRING", mode="REQUIRED"), + SchemaField("age", "INTEGER", mode="REQUIRED"), + SchemaField("joined", "TIMESTAMP", mode="NULLABLE"), ] table = Table(self.TABLE_REF, schema=schema) ROWS = [ { - 'full_name': 'Phred Phlyntstone', 'age': 32, - 'joined': _datetime_to_rfc3339(WHEN) - }, - { - 'full_name': 'Bharney Rhubble', 'age': 33, - 'joined': WHEN + datetime.timedelta(seconds=1) + "full_name": "Phred Phlyntstone", + "age": 32, + "joined": _datetime_to_rfc3339(WHEN), }, { - 'full_name': 'Wylma Phlyntstone', 'age': 29, - 'joined': WHEN + datetime.timedelta(seconds=2) + "full_name": "Bharney Rhubble", + "age": 33, + "joined": WHEN + datetime.timedelta(seconds=1), }, { - 'full_name': 'Bhettye Rhubble', 'age': 27, 'joined': None + "full_name": "Wylma Phlyntstone", + "age": 29, + "joined": WHEN + datetime.timedelta(seconds=2), }, + {"full_name": "Bhettye Rhubble", "age": 27, "joined": None}, ] def _row_data(row): - joined = row['joined'] + joined = row["joined"] if isinstance(joined, datetime.datetime): - row['joined'] = _microseconds_from_datetime(joined) * 1e-6 - row['age'] = str(row['age']) + row["joined"] = _microseconds_from_datetime(joined) * 1e-6 + row["age"] = str(row["age"]) return row SENT = { - 'rows': [{ - 'json': _row_data(row), - 'insertId': str(i), - } for i, row in enumerate(ROWS)], + "rows": [ + {"json": _row_data(row), "insertId": str(i)} + for i, row in enumerate(ROWS) + ] } - with mock.patch('uuid.uuid4', side_effect=map(str, range(len(ROWS)))): + with mock.patch("uuid.uuid4", side_effect=map(str, range(len(ROWS)))): errors = client.insert_rows(table, ROWS) self.assertEqual(len(errors), 0) conn.api_request.assert_called_once_with( - method='POST', - path='/%s' % PATH, - data=SENT) + method="POST", path="/%s" % PATH, data=SENT + ) def test_insert_rows_w_list_of_Rows(self): from google.cloud.bigquery.table import Table from google.cloud.bigquery.table import SchemaField from google.cloud.bigquery.table import Row - PATH = 'projects/%s/datasets/%s/tables/%s/insertAll' % ( - self.PROJECT, self.DS_ID, self.TABLE_ID) + PATH = "projects/%s/datasets/%s/tables/%s/insertAll" % ( + self.PROJECT, + self.DS_ID, + self.TABLE_ID, + ) creds = _make_credentials() http = object() - client = self._make_one(project=self.PROJECT, credentials=creds, - _http=http) + client = self._make_one(project=self.PROJECT, credentials=creds, _http=http) conn = client._connection = _make_connection({}) schema = [ - SchemaField('full_name', 'STRING', mode='REQUIRED'), - SchemaField('age', 'INTEGER', mode='REQUIRED'), + SchemaField("full_name", "STRING", mode="REQUIRED"), + SchemaField("age", "INTEGER", mode="REQUIRED"), ] table = Table(self.TABLE_REF, schema=schema) - f2i = {'full_name': 0, 'age': 1} + f2i = {"full_name": 0, "age": 1} ROWS = [ - Row(('Phred Phlyntstone', 32), f2i), - Row(('Bharney Rhubble', 33), f2i), - Row(('Wylma Phlyntstone', 29), f2i), - Row(('Bhettye Rhubble', 27), f2i), + Row(("Phred Phlyntstone", 32), f2i), + Row(("Bharney Rhubble", 33), f2i), + Row(("Wylma Phlyntstone", 29), f2i), + Row(("Bhettye Rhubble", 27), f2i), ] def _row_data(row): - return {'full_name': row[0], 'age': str(row[1])} + return {"full_name": row[0], "age": str(row[1])} SENT = { - 'rows': [{ - 'json': _row_data(row), - 'insertId': str(i), - } for i, row in enumerate(ROWS)], + "rows": [ + {"json": _row_data(row), "insertId": str(i)} + for i, row in enumerate(ROWS) + ] } - with mock.patch('uuid.uuid4', side_effect=map(str, range(len(ROWS)))): + with mock.patch("uuid.uuid4", side_effect=map(str, range(len(ROWS)))): errors = client.insert_rows(table, ROWS) self.assertEqual(len(errors), 0) conn.api_request.assert_called_once_with( - method='POST', - path='/%s' % PATH, - data=SENT) + method="POST", path="/%s" % PATH, data=SENT + ) def test_insert_rows_w_skip_invalid_and_ignore_unknown(self): from google.cloud.bigquery.table import Table, SchemaField - PATH = 'projects/%s/datasets/%s/tables/%s/insertAll' % ( - self.PROJECT, self.DS_ID, self.TABLE_ID) + PATH = "projects/%s/datasets/%s/tables/%s/insertAll" % ( + self.PROJECT, + self.DS_ID, + self.TABLE_ID, + ) RESPONSE = { - 'insertErrors': [ - {'index': 1, - 'errors': [ - {'reason': 'REASON', - 'location': 'LOCATION', - 'debugInfo': 'INFO', - 'message': 'MESSAGE'} - ]}, - ]} + "insertErrors": [ + { + "index": 1, + "errors": [ + { + "reason": "REASON", + "location": "LOCATION", + "debugInfo": "INFO", + "message": "MESSAGE", + } + ], + } + ] + } creds = _make_credentials() http = object() - client = self._make_one(project=self.PROJECT, credentials=creds, - _http=http) + client = self._make_one(project=self.PROJECT, credentials=creds, _http=http) conn = client._connection = _make_connection(RESPONSE) schema = [ - SchemaField('full_name', 'STRING', mode='REQUIRED'), - SchemaField('age', 'INTEGER', mode='REQUIRED'), - SchemaField('voter', 'BOOLEAN', mode='NULLABLE'), + SchemaField("full_name", "STRING", mode="REQUIRED"), + SchemaField("age", "INTEGER", mode="REQUIRED"), + SchemaField("voter", "BOOLEAN", mode="NULLABLE"), ] table = Table(self.TABLE_REF, schema=schema) ROWS = [ - ('Phred Phlyntstone', 32, True), - ('Bharney Rhubble', 33, False), - ('Wylma Phlyntstone', 29, True), - ('Bhettye Rhubble', 27, True), + ("Phred Phlyntstone", 32, True), + ("Bharney Rhubble", 33, False), + ("Wylma Phlyntstone", 29, True), + ("Bhettye Rhubble", 27, True), ] def _row_data(row): return { - 'full_name': row[0], - 'age': str(row[1]), - 'voter': row[2] and 'true' or 'false', + "full_name": row[0], + "age": str(row[1]), + "voter": row[2] and "true" or "false", } SENT = { - 'skipInvalidRows': True, - 'ignoreUnknownValues': True, - 'templateSuffix': '20160303', - 'rows': [{'insertId': index, 'json': _row_data(row)} - for index, row in enumerate(ROWS)], + "skipInvalidRows": True, + "ignoreUnknownValues": True, + "templateSuffix": "20160303", + "rows": [ + {"insertId": index, "json": _row_data(row)} + for index, row in enumerate(ROWS) + ], } errors = client.insert_rows( @@ -3540,119 +3457,119 @@ def _row_data(row): row_ids=[index for index, _ in enumerate(ROWS)], skip_invalid_rows=True, ignore_unknown_values=True, - template_suffix='20160303', + template_suffix="20160303", ) self.assertEqual(len(errors), 1) - self.assertEqual(errors[0]['index'], 1) - self.assertEqual(len(errors[0]['errors']), 1) - self.assertEqual(errors[0]['errors'][0], - RESPONSE['insertErrors'][0]['errors'][0]) + self.assertEqual(errors[0]["index"], 1) + self.assertEqual(len(errors[0]["errors"]), 1) + self.assertEqual( + errors[0]["errors"][0], RESPONSE["insertErrors"][0]["errors"][0] + ) conn.api_request.assert_called_once_with( - method='POST', - path='/%s' % PATH, - data=SENT) + method="POST", path="/%s" % PATH, data=SENT + ) def test_insert_rows_w_repeated_fields(self): from google.cloud.bigquery.table import Table, SchemaField - PATH = 'projects/%s/datasets/%s/tables/%s/insertAll' % ( - self.PROJECT, self.DS_ID, self.TABLE_ID) + PATH = "projects/%s/datasets/%s/tables/%s/insertAll" % ( + self.PROJECT, + self.DS_ID, + self.TABLE_ID, + ) creds = _make_credentials() http = object() - client = self._make_one(project=self.PROJECT, credentials=creds, - _http=http) + client = self._make_one(project=self.PROJECT, credentials=creds, _http=http) conn = client._connection = _make_connection({}) - full_name = SchemaField('color', 'STRING', mode='REPEATED') - index = SchemaField('index', 'INTEGER', 'REPEATED') - score = SchemaField('score', 'FLOAT', 'REPEATED') - struct = SchemaField('struct', 'RECORD', mode='REPEATED', - fields=[index, score]) + full_name = SchemaField("color", "STRING", mode="REPEATED") + index = SchemaField("index", "INTEGER", "REPEATED") + score = SchemaField("score", "FLOAT", "REPEATED") + struct = SchemaField("struct", "RECORD", mode="REPEATED", fields=[index, score]) table = Table(self.TABLE_REF, schema=[full_name, struct]) - ROWS = [ - (['red', 'green'], [{'index': [1, 2], 'score': [3.1415, 1.414]}]), - ] + ROWS = [(["red", "green"], [{"index": [1, 2], "score": [3.1415, 1.414]}])] def _row_data(row): - return {'color': row[0], - 'struct': row[1]} + return {"color": row[0], "struct": row[1]} SENT = { - 'rows': [{ - 'json': _row_data(row), - 'insertId': str(i), - } for i, row in enumerate(ROWS)], + "rows": [ + {"json": _row_data(row), "insertId": str(i)} + for i, row in enumerate(ROWS) + ] } - with mock.patch('uuid.uuid4', side_effect=map(str, range(len(ROWS)))): + with mock.patch("uuid.uuid4", side_effect=map(str, range(len(ROWS)))): errors = client.insert_rows(table, ROWS) self.assertEqual(len(errors), 0) conn.api_request.assert_called_once_with( - method='POST', - path='/%s' % PATH, - data=SENT) + method="POST", path="/%s" % PATH, data=SENT + ) def test_insert_rows_w_record_schema(self): from google.cloud.bigquery.table import SchemaField - PATH = 'projects/%s/datasets/%s/tables/%s/insertAll' % ( - self.PROJECT, self.DS_ID, self.TABLE_ID) + PATH = "projects/%s/datasets/%s/tables/%s/insertAll" % ( + self.PROJECT, + self.DS_ID, + self.TABLE_ID, + ) creds = _make_credentials() http = object() - client = self._make_one(project=self.PROJECT, credentials=creds, - _http=http) + client = self._make_one(project=self.PROJECT, credentials=creds, _http=http) conn = client._connection = _make_connection({}) - full_name = SchemaField('full_name', 'STRING', mode='REQUIRED') - area_code = SchemaField('area_code', 'STRING', 'REQUIRED') - local_number = SchemaField('local_number', 'STRING', 'REQUIRED') - rank = SchemaField('rank', 'INTEGER', 'REQUIRED') - phone = SchemaField('phone', 'RECORD', mode='NULLABLE', - fields=[area_code, local_number, rank]) + full_name = SchemaField("full_name", "STRING", mode="REQUIRED") + area_code = SchemaField("area_code", "STRING", "REQUIRED") + local_number = SchemaField("local_number", "STRING", "REQUIRED") + rank = SchemaField("rank", "INTEGER", "REQUIRED") + phone = SchemaField( + "phone", "RECORD", mode="NULLABLE", fields=[area_code, local_number, rank] + ) ROWS = [ - ('Phred Phlyntstone', {'area_code': '800', - 'local_number': '555-1212', - 'rank': 1}), - ('Bharney Rhubble', {'area_code': '877', - 'local_number': '768-5309', - 'rank': 2}), - ('Wylma Phlyntstone', None), + ( + "Phred Phlyntstone", + {"area_code": "800", "local_number": "555-1212", "rank": 1}, + ), + ( + "Bharney Rhubble", + {"area_code": "877", "local_number": "768-5309", "rank": 2}, + ), + ("Wylma Phlyntstone", None), ] def _row_data(row): - return {'full_name': row[0], - 'phone': row[1]} + return {"full_name": row[0], "phone": row[1]} SENT = { - 'rows': [{ - 'json': _row_data(row), - 'insertId': str(i), - } for i, row in enumerate(ROWS)], + "rows": [ + {"json": _row_data(row), "insertId": str(i)} + for i, row in enumerate(ROWS) + ] } - with mock.patch('uuid.uuid4', side_effect=map(str, range(len(ROWS)))): - errors = client.insert_rows(self.TABLE_REF, ROWS, - selected_fields=[full_name, phone]) + with mock.patch("uuid.uuid4", side_effect=map(str, range(len(ROWS)))): + errors = client.insert_rows( + self.TABLE_REF, ROWS, selected_fields=[full_name, phone] + ) self.assertEqual(len(errors), 0) conn.api_request.assert_called_once_with( - method='POST', - path='/%s' % PATH, - data=SENT) + method="POST", path="/%s" % PATH, data=SENT + ) def test_insert_rows_errors(self): from google.cloud.bigquery.table import Table ROWS = [ - ('Phred Phlyntstone', 32, True), - ('Bharney Rhubble', 33, False), - ('Wylma Phlyntstone', 29, True), - ('Bhettye Rhubble', 27, True), + ("Phred Phlyntstone", 32, True), + ("Bharney Rhubble", 33, False), + ("Wylma Phlyntstone", 29, True), + ("Bhettye Rhubble", 27, True), ] creds = _make_credentials() http = object() - client = self._make_one(project=self.PROJECT, credentials=creds, - _http=http) + client = self._make_one(project=self.PROJECT, credentials=creds, _http=http) # table ref with no selected fields with self.assertRaises(ValueError): @@ -3669,170 +3586,154 @@ def test_insert_rows_errors(self): def test_insert_rows_w_numeric(self): from google.cloud.bigquery import table - project = 'PROJECT' - ds_id = 'DS_ID' - table_id = 'TABLE_ID' + project = "PROJECT" + ds_id = "DS_ID" + table_id = "TABLE_ID" creds = _make_credentials() http = object() client = self._make_one(project=project, credentials=creds, _http=http) conn = client._connection = _make_connection({}) table_ref = DatasetReference(project, ds_id).table(table_id) schema = [ - table.SchemaField('account', 'STRING'), - table.SchemaField('balance', 'NUMERIC'), + table.SchemaField("account", "STRING"), + table.SchemaField("balance", "NUMERIC"), ] insert_table = table.Table(table_ref, schema=schema) rows = [ - ('Savings', decimal.Decimal('23.47')), - ('Checking', decimal.Decimal('1.98')), - ('Mortgage', decimal.Decimal('-12345678909.87654321')), + ("Savings", decimal.Decimal("23.47")), + ("Checking", decimal.Decimal("1.98")), + ("Mortgage", decimal.Decimal("-12345678909.87654321")), ] - with mock.patch('uuid.uuid4', side_effect=map(str, range(len(rows)))): + with mock.patch("uuid.uuid4", side_effect=map(str, range(len(rows)))): errors = client.insert_rows(insert_table, rows) self.assertEqual(len(errors), 0) rows_json = [ - {'account': 'Savings', 'balance': '23.47'}, - {'account': 'Checking', 'balance': '1.98'}, - { - 'account': 'Mortgage', - 'balance': '-12345678909.87654321', - }, + {"account": "Savings", "balance": "23.47"}, + {"account": "Checking", "balance": "1.98"}, + {"account": "Mortgage", "balance": "-12345678909.87654321"}, ] sent = { - 'rows': [{ - 'json': row, - 'insertId': str(i), - } for i, row in enumerate(rows_json)], + "rows": [ + {"json": row, "insertId": str(i)} for i, row in enumerate(rows_json) + ] } conn.api_request.assert_called_once_with( - method='POST', - path='/projects/{}/datasets/{}/tables/{}/insertAll'.format( - project, ds_id, table_id), - data=sent) + method="POST", + path="/projects/{}/datasets/{}/tables/{}/insertAll".format( + project, ds_id, table_id + ), + data=sent, + ) def test_insert_rows_json(self): from google.cloud.bigquery.table import Table, SchemaField from google.cloud.bigquery.dataset import DatasetReference - PROJECT = 'PROJECT' - DS_ID = 'DS_ID' - TABLE_ID = 'TABLE_ID' - PATH = 'projects/%s/datasets/%s/tables/%s/insertAll' % ( - PROJECT, DS_ID, TABLE_ID) + PROJECT = "PROJECT" + DS_ID = "DS_ID" + TABLE_ID = "TABLE_ID" + PATH = "projects/%s/datasets/%s/tables/%s/insertAll" % ( + PROJECT, + DS_ID, + TABLE_ID, + ) creds = _make_credentials() http = object() client = self._make_one(project=PROJECT, credentials=creds, _http=http) conn = client._connection = _make_connection({}) table_ref = DatasetReference(PROJECT, DS_ID).table(TABLE_ID) schema = [ - SchemaField('full_name', 'STRING', mode='REQUIRED'), - SchemaField('age', 'INTEGER', mode='REQUIRED'), - SchemaField('joined', 'TIMESTAMP', mode='NULLABLE'), + SchemaField("full_name", "STRING", mode="REQUIRED"), + SchemaField("age", "INTEGER", mode="REQUIRED"), + SchemaField("joined", "TIMESTAMP", mode="NULLABLE"), ] table = Table(table_ref, schema=schema) ROWS = [ { - 'full_name': 'Phred Phlyntstone', 'age': '32', - 'joined': '2015-07-24T19:53:19.006000Z' - }, - { - 'full_name': 'Bharney Rhubble', 'age': '33', - 'joined': 1437767600.006 - }, - { - 'full_name': 'Wylma Phlyntstone', 'age': '29', - 'joined': 1437767601.006 - }, - { - 'full_name': 'Bhettye Rhubble', 'age': '27', 'joined': None + "full_name": "Phred Phlyntstone", + "age": "32", + "joined": "2015-07-24T19:53:19.006000Z", }, + {"full_name": "Bharney Rhubble", "age": "33", "joined": 1437767600.006}, + {"full_name": "Wylma Phlyntstone", "age": "29", "joined": 1437767601.006}, + {"full_name": "Bhettye Rhubble", "age": "27", "joined": None}, ] SENT = { - 'rows': [{ - 'json': row, - 'insertId': str(i), - } for i, row in enumerate(ROWS)], + "rows": [{"json": row, "insertId": str(i)} for i, row in enumerate(ROWS)] } - with mock.patch('uuid.uuid4', side_effect=map(str, range(len(ROWS)))): + with mock.patch("uuid.uuid4", side_effect=map(str, range(len(ROWS)))): errors = client.insert_rows_json(table, ROWS) self.assertEqual(len(errors), 0) conn.api_request.assert_called_once_with( - method='POST', - path='/%s' % PATH, - data=SENT) + method="POST", path="/%s" % PATH, data=SENT + ) def test_insert_rows_json_with_string_id(self): - rows = [{'col1': 'val1'}] + rows = [{"col1": "val1"}] creds = _make_credentials() http = object() client = self._make_one( - project='default-project', credentials=creds, _http=http) + project="default-project", credentials=creds, _http=http + ) conn = client._connection = _make_connection({}) - with mock.patch('uuid.uuid4', side_effect=map(str, range(len(rows)))): - errors = client.insert_rows_json('proj.dset.tbl', rows) + with mock.patch("uuid.uuid4", side_effect=map(str, range(len(rows)))): + errors = client.insert_rows_json("proj.dset.tbl", rows) self.assertEqual(len(errors), 0) expected = { - 'rows': [{ - 'json': row, - 'insertId': str(i), - } for i, row in enumerate(rows)], + "rows": [{"json": row, "insertId": str(i)} for i, row in enumerate(rows)] } conn.api_request.assert_called_once_with( - method='POST', - path='/projects/proj/datasets/dset/tables/tbl/insertAll', - data=expected) + method="POST", + path="/projects/proj/datasets/dset/tables/tbl/insertAll", + data=expected, + ) def test_list_partitions(self): from google.cloud.bigquery.table import Table rows = 3 meta_info = _make_list_partitons_meta_info( - self.PROJECT, self.DS_ID, self.TABLE_ID, rows) + self.PROJECT, self.DS_ID, self.TABLE_ID, rows + ) data = { - 'totalRows': str(rows), - 'rows': [ - {'f': [ - {'v': '20180101'}, - ]}, - {'f': [ - {'v': '20180102'}, - ]}, - {'f': [ - {'v': '20180103'}, - ]}, - ] + "totalRows": str(rows), + "rows": [ + {"f": [{"v": "20180101"}]}, + {"f": [{"v": "20180102"}]}, + {"f": [{"v": "20180103"}]}, + ], } creds = _make_credentials() http = object() - client = self._make_one(project=self.PROJECT, credentials=creds, - _http=http) + client = self._make_one(project=self.PROJECT, credentials=creds, _http=http) client._connection = _make_connection(meta_info, data) table = Table(self.TABLE_REF) partition_list = client.list_partitions(table) self.assertEqual(len(partition_list), rows) - self.assertIn('20180102', partition_list) + self.assertIn("20180102", partition_list) def test_list_partitions_with_string_id(self): meta_info = _make_list_partitons_meta_info( - self.PROJECT, self.DS_ID, self.TABLE_ID, 0) + self.PROJECT, self.DS_ID, self.TABLE_ID, 0 + ) creds = _make_credentials() http = object() - client = self._make_one( - project=self.PROJECT, credentials=creds, _http=http) + client = self._make_one(project=self.PROJECT, credentials=creds, _http=http) client._connection = _make_connection(meta_info, {}) partition_list = client.list_partitions( - '{}.{}'.format(self.DS_ID, self.TABLE_ID)) + "{}.{}".format(self.DS_ID, self.TABLE_ID) + ) self.assertEqual(len(partition_list), 0) @@ -3843,54 +3744,57 @@ def test_list_rows(self): from google.cloud.bigquery.table import SchemaField from google.cloud.bigquery.table import Row - PATH = 'projects/%s/datasets/%s/tables/%s/data' % ( - self.PROJECT, self.DS_ID, self.TABLE_ID) + PATH = "projects/%s/datasets/%s/tables/%s/data" % ( + self.PROJECT, + self.DS_ID, + self.TABLE_ID, + ) WHEN_TS = 1437767599.006 - WHEN = datetime.datetime.utcfromtimestamp(WHEN_TS).replace( - tzinfo=UTC) + WHEN = datetime.datetime.utcfromtimestamp(WHEN_TS).replace(tzinfo=UTC) WHEN_1 = WHEN + datetime.timedelta(seconds=1) WHEN_2 = WHEN + datetime.timedelta(seconds=2) ROWS = 1234 - TOKEN = 'TOKEN' + TOKEN = "TOKEN" def _bigquery_timestamp_float_repr(ts_float): # Preserve microsecond precision for E+09 timestamps - return '%0.15E' % (ts_float,) + return "%0.15E" % (ts_float,) DATA = { - 'totalRows': str(ROWS), - 'pageToken': TOKEN, - 'rows': [ - {'f': [ - {'v': 'Phred Phlyntstone'}, - {'v': '32'}, - {'v': _bigquery_timestamp_float_repr(WHEN_TS)}, - ]}, - {'f': [ - {'v': 'Bharney Rhubble'}, - {'v': '33'}, - {'v': _bigquery_timestamp_float_repr(WHEN_TS + 1)}, - ]}, - {'f': [ - {'v': 'Wylma Phlyntstone'}, - {'v': '29'}, - {'v': _bigquery_timestamp_float_repr(WHEN_TS + 2)}, - ]}, - {'f': [ - {'v': 'Bhettye Rhubble'}, - {'v': None}, - {'v': None}, - ]}, - ] + "totalRows": str(ROWS), + "pageToken": TOKEN, + "rows": [ + { + "f": [ + {"v": "Phred Phlyntstone"}, + {"v": "32"}, + {"v": _bigquery_timestamp_float_repr(WHEN_TS)}, + ] + }, + { + "f": [ + {"v": "Bharney Rhubble"}, + {"v": "33"}, + {"v": _bigquery_timestamp_float_repr(WHEN_TS + 1)}, + ] + }, + { + "f": [ + {"v": "Wylma Phlyntstone"}, + {"v": "29"}, + {"v": _bigquery_timestamp_float_repr(WHEN_TS + 2)}, + ] + }, + {"f": [{"v": "Bhettye Rhubble"}, {"v": None}, {"v": None}]}, + ], } creds = _make_credentials() http = object() - client = self._make_one(project=self.PROJECT, credentials=creds, - _http=http) + client = self._make_one(project=self.PROJECT, credentials=creds, _http=http) conn = client._connection = _make_connection(DATA, DATA) - full_name = SchemaField('full_name', 'STRING', mode='REQUIRED') - age = SchemaField('age', 'INTEGER', mode='NULLABLE') - joined = SchemaField('joined', 'TIMESTAMP', mode='NULLABLE') + full_name = SchemaField("full_name", "STRING", mode="REQUIRED") + age = SchemaField("age", "INTEGER", mode="NULLABLE") + joined = SchemaField("joined", "TIMESTAMP", mode="NULLABLE") table = Table(self.TABLE_REF, schema=[full_name, age, joined]) iterator = client.list_rows(table) @@ -3899,36 +3803,31 @@ def _bigquery_timestamp_float_repr(ts_float): total_rows = iterator.total_rows page_token = iterator.next_page_token - f2i = {'full_name': 0, 'age': 1, 'joined': 2} + f2i = {"full_name": 0, "age": 1, "joined": 2} self.assertEqual(len(rows), 4) - self.assertEqual(rows[0], Row(('Phred Phlyntstone', 32, WHEN), f2i)) - self.assertEqual(rows[1], Row(('Bharney Rhubble', 33, WHEN_1), f2i)) - self.assertEqual(rows[2], Row(('Wylma Phlyntstone', 29, WHEN_2), f2i)) - self.assertEqual(rows[3], Row(('Bhettye Rhubble', None, None), f2i)) + self.assertEqual(rows[0], Row(("Phred Phlyntstone", 32, WHEN), f2i)) + self.assertEqual(rows[1], Row(("Bharney Rhubble", 33, WHEN_1), f2i)) + self.assertEqual(rows[2], Row(("Wylma Phlyntstone", 29, WHEN_2), f2i)) + self.assertEqual(rows[3], Row(("Bhettye Rhubble", None, None), f2i)) self.assertEqual(total_rows, ROWS) self.assertEqual(page_token, TOKEN) conn.api_request.assert_called_once_with( - method='GET', - path='/%s' % PATH, - query_params={}) + method="GET", path="/%s" % PATH, query_params={} + ) def test_list_rows_empty_table(self): - response = { - 'totalRows': '0', - 'rows': [], - } + response = {"totalRows": "0", "rows": []} creds = _make_credentials() http = object() - client = self._make_one( - project=self.PROJECT, credentials=creds, _http=http) + client = self._make_one(project=self.PROJECT, credentials=creds, _http=http) client._connection = _make_connection(response, response) # Table that has no schema because it's an empty table. rows = tuple( client.list_rows( # Test with using a string for the table ID. - '{}.{}.{}'.format( + "{}.{}.{}".format( self.TABLE_REF.project, self.TABLE_REF.dataset_id, self.TABLE_REF.table_id, @@ -3943,114 +3842,123 @@ def test_list_rows_query_params(self): creds = _make_credentials() http = object() - client = self._make_one(project=self.PROJECT, credentials=creds, - _http=http) - table = Table(self.TABLE_REF, - schema=[SchemaField('age', 'INTEGER', mode='NULLABLE')]) + client = self._make_one(project=self.PROJECT, credentials=creds, _http=http) + table = Table( + self.TABLE_REF, schema=[SchemaField("age", "INTEGER", mode="NULLABLE")] + ) tests = [ ({}, {}), - ({'start_index': 1}, {'startIndex': 1}), - ({'max_results': 2}, {'maxResults': 2}), - ({'start_index': 1, 'max_results': 2}, - {'startIndex': 1, 'maxResults': 2}), + ({"start_index": 1}, {"startIndex": 1}), + ({"max_results": 2}, {"maxResults": 2}), + ({"start_index": 1, "max_results": 2}, {"startIndex": 1, "maxResults": 2}), ] conn = client._connection = _make_connection(*len(tests) * [{}]) for i, test in enumerate(tests): iterator = client.list_rows(table, **test[0]) six.next(iterator.pages) req = conn.api_request.call_args_list[i] - self.assertEqual(req[1]['query_params'], test[1], - 'for kwargs %s' % test[0]) + self.assertEqual(req[1]["query_params"], test[1], "for kwargs %s" % test[0]) def test_list_rows_repeated_fields(self): from google.cloud.bigquery.table import SchemaField - PATH = 'projects/%s/datasets/%s/tables/%s/data' % ( - self.PROJECT, self.DS_ID, self.TABLE_ID) + PATH = "projects/%s/datasets/%s/tables/%s/data" % ( + self.PROJECT, + self.DS_ID, + self.TABLE_ID, + ) ROWS = 1234 - TOKEN = 'TOKEN' + TOKEN = "TOKEN" DATA = { - 'totalRows': ROWS, - 'pageToken': TOKEN, - 'rows': [ - {'f': [ - {'v': [{'v': 'red'}, {'v': 'green'}]}, - {'v': [{ - 'v': { - 'f': [ - {'v': [{'v': '1'}, {'v': '2'}]}, - {'v': [{'v': '3.1415'}, {'v': '1.414'}]}, - ]} - }]}, - ]}, - ] + "totalRows": ROWS, + "pageToken": TOKEN, + "rows": [ + { + "f": [ + {"v": [{"v": "red"}, {"v": "green"}]}, + { + "v": [ + { + "v": { + "f": [ + {"v": [{"v": "1"}, {"v": "2"}]}, + {"v": [{"v": "3.1415"}, {"v": "1.414"}]}, + ] + } + } + ] + }, + ] + } + ], } creds = _make_credentials() http = object() - client = self._make_one(project=self.PROJECT, credentials=creds, - _http=http) + client = self._make_one(project=self.PROJECT, credentials=creds, _http=http) conn = client._connection = _make_connection(DATA) - color = SchemaField('color', 'STRING', mode='REPEATED') - index = SchemaField('index', 'INTEGER', 'REPEATED') - score = SchemaField('score', 'FLOAT', 'REPEATED') - struct = SchemaField('struct', 'RECORD', mode='REPEATED', - fields=[index, score]) - - iterator = client.list_rows(self.TABLE_REF, - selected_fields=[color, struct]) + color = SchemaField("color", "STRING", mode="REPEATED") + index = SchemaField("index", "INTEGER", "REPEATED") + score = SchemaField("score", "FLOAT", "REPEATED") + struct = SchemaField("struct", "RECORD", mode="REPEATED", fields=[index, score]) + + iterator = client.list_rows(self.TABLE_REF, selected_fields=[color, struct]) page = six.next(iterator.pages) rows = list(page) total_rows = iterator.total_rows page_token = iterator.next_page_token self.assertEqual(len(rows), 1) - self.assertEqual(rows[0][0], ['red', 'green']) - self.assertEqual(rows[0][1], [{'index': [1, 2], - 'score': [3.1415, 1.414]}]) + self.assertEqual(rows[0][0], ["red", "green"]) + self.assertEqual(rows[0][1], [{"index": [1, 2], "score": [3.1415, 1.414]}]) self.assertEqual(total_rows, ROWS) self.assertEqual(page_token, TOKEN) conn.api_request.assert_called_once_with( - method='GET', - path='/%s' % PATH, - query_params={'selectedFields': 'color,struct'}) + method="GET", + path="/%s" % PATH, + query_params={"selectedFields": "color,struct"}, + ) def test_list_rows_w_record_schema(self): from google.cloud.bigquery.table import Table, SchemaField - PATH = 'projects/%s/datasets/%s/tables/%s/data' % ( - self.PROJECT, self.DS_ID, self.TABLE_ID) + PATH = "projects/%s/datasets/%s/tables/%s/data" % ( + self.PROJECT, + self.DS_ID, + self.TABLE_ID, + ) ROWS = 1234 - TOKEN = 'TOKEN' + TOKEN = "TOKEN" DATA = { - 'totalRows': ROWS, - 'pageToken': TOKEN, - 'rows': [ - {'f': [ - {'v': 'Phred Phlyntstone'}, - {'v': {'f': [{'v': '800'}, {'v': '555-1212'}, {'v': 1}]}}, - ]}, - {'f': [ - {'v': 'Bharney Rhubble'}, - {'v': {'f': [{'v': '877'}, {'v': '768-5309'}, {'v': 2}]}}, - ]}, - {'f': [ - {'v': 'Wylma Phlyntstone'}, - {'v': None}, - ]}, - ] + "totalRows": ROWS, + "pageToken": TOKEN, + "rows": [ + { + "f": [ + {"v": "Phred Phlyntstone"}, + {"v": {"f": [{"v": "800"}, {"v": "555-1212"}, {"v": 1}]}}, + ] + }, + { + "f": [ + {"v": "Bharney Rhubble"}, + {"v": {"f": [{"v": "877"}, {"v": "768-5309"}, {"v": 2}]}}, + ] + }, + {"f": [{"v": "Wylma Phlyntstone"}, {"v": None}]}, + ], } creds = _make_credentials() http = object() - client = self._make_one(project=self.PROJECT, credentials=creds, - _http=http) + client = self._make_one(project=self.PROJECT, credentials=creds, _http=http) conn = client._connection = _make_connection(DATA) - full_name = SchemaField('full_name', 'STRING', mode='REQUIRED') - area_code = SchemaField('area_code', 'STRING', 'REQUIRED') - local_number = SchemaField('local_number', 'STRING', 'REQUIRED') - rank = SchemaField('rank', 'INTEGER', 'REQUIRED') - phone = SchemaField('phone', 'RECORD', mode='NULLABLE', - fields=[area_code, local_number, rank]) + full_name = SchemaField("full_name", "STRING", mode="REQUIRED") + area_code = SchemaField("area_code", "STRING", "REQUIRED") + local_number = SchemaField("local_number", "STRING", "REQUIRED") + rank = SchemaField("rank", "INTEGER", "REQUIRED") + phone = SchemaField( + "phone", "RECORD", mode="NULLABLE", fields=[area_code, local_number, rank] + ) table = Table(self.TABLE_REF, schema=[full_name, phone]) iterator = client.list_rows(table) @@ -4060,29 +3968,29 @@ def test_list_rows_w_record_schema(self): page_token = iterator.next_page_token self.assertEqual(len(rows), 3) - self.assertEqual(rows[0][0], 'Phred Phlyntstone') - self.assertEqual(rows[0][1], {'area_code': '800', - 'local_number': '555-1212', - 'rank': 1}) - self.assertEqual(rows[1][0], 'Bharney Rhubble') - self.assertEqual(rows[1][1], {'area_code': '877', - 'local_number': '768-5309', - 'rank': 2}) - self.assertEqual(rows[2][0], 'Wylma Phlyntstone') + self.assertEqual(rows[0][0], "Phred Phlyntstone") + self.assertEqual( + rows[0][1], {"area_code": "800", "local_number": "555-1212", "rank": 1} + ) + self.assertEqual(rows[1][0], "Bharney Rhubble") + self.assertEqual( + rows[1][1], {"area_code": "877", "local_number": "768-5309", "rank": 2} + ) + self.assertEqual(rows[2][0], "Wylma Phlyntstone") self.assertIsNone(rows[2][1]) self.assertEqual(total_rows, ROWS) self.assertEqual(page_token, TOKEN) conn.api_request.assert_called_once_with( - method='GET', path='/%s' % PATH, query_params={}) + method="GET", path="/%s" % PATH, query_params={} + ) def test_list_rows_errors(self): from google.cloud.bigquery.table import Table creds = _make_credentials() http = object() - client = self._make_one(project=self.PROJECT, credentials=creds, - _http=http) + client = self._make_one(project=self.PROJECT, credentials=creds, _http=http) # table ref with no selected fields with self.assertRaises(ValueError): @@ -4104,26 +4012,26 @@ def _call_fut(self, job_id, prefix=None): return _make_job_id(job_id, prefix=prefix) def test__make_job_id_wo_suffix(self): - job_id = self._call_fut('job_id') + job_id = self._call_fut("job_id") - self.assertEqual(job_id, 'job_id') + self.assertEqual(job_id, "job_id") def test__make_job_id_w_suffix(self): - with mock.patch('uuid.uuid4', side_effect=['212345']): - job_id = self._call_fut(None, prefix='job_id') + with mock.patch("uuid.uuid4", side_effect=["212345"]): + job_id = self._call_fut(None, prefix="job_id") - self.assertEqual(job_id, 'job_id212345') + self.assertEqual(job_id, "job_id212345") def test__make_random_job_id(self): - with mock.patch('uuid.uuid4', side_effect=['212345']): + with mock.patch("uuid.uuid4", side_effect=["212345"]): job_id = self._call_fut(None) - self.assertEqual(job_id, '212345') + self.assertEqual(job_id, "212345") def test__make_job_id_w_job_id_overrides_prefix(self): - job_id = self._call_fut('job_id', prefix='unused_prefix') + job_id = self._call_fut("job_id", prefix="unused_prefix") - self.assertEqual(job_id, 'job_id') + self.assertEqual(job_id, "job_id") class TestClientUpload(object): @@ -4131,69 +4039,73 @@ class TestClientUpload(object): # "load_table_from_file" portions of `Client`. It also uses # `pytest`-style tests rather than `unittest`-style. from google.cloud.bigquery.job import SourceFormat - TABLE_REF = DatasetReference( - 'project_id', 'test_dataset').table('test_table') - LOCATION = 'us-central' + TABLE_REF = DatasetReference("project_id", "test_dataset").table("test_table") + + LOCATION = "us-central" @staticmethod def _make_client(transport=None, location=None): from google.cloud.bigquery import _http from google.cloud.bigquery import client - cl = client.Client(project='project_id', - credentials=_make_credentials(), - _http=transport, location=location) + cl = client.Client( + project="project_id", + credentials=_make_credentials(), + _http=transport, + location=location, + ) cl._connection = mock.create_autospec(_http.Connection, instance=True) return cl @staticmethod - def _make_response(status_code, content='', headers={}): + def _make_response(status_code, content="", headers={}): """Make a mock HTTP response.""" import requests + response = requests.Response() - response.request = requests.Request( - 'POST', 'http://example.com').prepare() - response._content = content.encode('utf-8') + response.request = requests.Request("POST", "http://example.com").prepare() + response._content = content.encode("utf-8") response.headers.update(headers) response.status_code = status_code return response @classmethod - def _make_do_upload_patch(cls, client, method, - resource={}, side_effect=None): + def _make_do_upload_patch(cls, client, method, resource={}, side_effect=None): """Patches the low-level upload helpers.""" if side_effect is None: - side_effect = [cls._make_response( - http_client.OK, - json.dumps(resource), - {'Content-Type': 'application/json'})] - return mock.patch.object( - client, method, side_effect=side_effect, autospec=True) + side_effect = [ + cls._make_response( + http_client.OK, + json.dumps(resource), + {"Content-Type": "application/json"}, + ) + ] + return mock.patch.object(client, method, side_effect=side_effect, autospec=True) EXPECTED_CONFIGURATION = { - 'jobReference': {'projectId': 'project_id', 'jobId': 'job_id'}, - 'configuration': { - 'load': { - 'sourceFormat': SourceFormat.CSV, - 'destinationTable': { - 'projectId': 'project_id', - 'datasetId': 'test_dataset', - 'tableId': 'test_table' - } + "jobReference": {"projectId": "project_id", "jobId": "job_id"}, + "configuration": { + "load": { + "sourceFormat": SourceFormat.CSV, + "destinationTable": { + "projectId": "project_id", + "datasetId": "test_dataset", + "tableId": "test_table", + }, } - } + }, } @staticmethod def _make_file_obj(): - return io.BytesIO(b'hello, is it me you\'re looking for?') + return io.BytesIO(b"hello, is it me you're looking for?") def _make_gzip_file_obj(self, writable): if writable: - return gzip.GzipFile(mode='w', fileobj=io.BytesIO()) + return gzip.GzipFile(mode="w", fileobj=io.BytesIO()) else: - return gzip.GzipFile(mode='r', fileobj=self._make_file_obj()) + return gzip.GzipFile(mode="r", fileobj=self._make_file_obj()) @staticmethod def _make_config(): @@ -4213,16 +4125,19 @@ def test_load_table_from_file_resumable(self): file_obj = self._make_file_obj() do_upload_patch = self._make_do_upload_patch( - client, '_do_resumable_upload', self.EXPECTED_CONFIGURATION) + client, "_do_resumable_upload", self.EXPECTED_CONFIGURATION + ) with do_upload_patch as do_upload: - client.load_table_from_file(file_obj, self.TABLE_REF, - job_id='job_id', - job_config=self._make_config()) + client.load_table_from_file( + file_obj, + self.TABLE_REF, + job_id="job_id", + job_config=self._make_config(), + ) do_upload.assert_called_once_with( - file_obj, - self.EXPECTED_CONFIGURATION, - _DEFAULT_NUM_RETRIES) + file_obj, self.EXPECTED_CONFIGURATION, _DEFAULT_NUM_RETRIES + ) def test_load_table_from_file_w_explicit_project(self): from google.cloud.bigquery.client import _DEFAULT_NUM_RETRIES @@ -4231,20 +4146,24 @@ def test_load_table_from_file_w_explicit_project(self): file_obj = self._make_file_obj() do_upload_patch = self._make_do_upload_patch( - client, '_do_resumable_upload', self.EXPECTED_CONFIGURATION) + client, "_do_resumable_upload", self.EXPECTED_CONFIGURATION + ) with do_upload_patch as do_upload: client.load_table_from_file( - file_obj, self.TABLE_REF, job_id='job_id', - project='other-project', location=self.LOCATION, - job_config=self._make_config()) + file_obj, + self.TABLE_REF, + job_id="job_id", + project="other-project", + location=self.LOCATION, + job_config=self._make_config(), + ) expected_resource = copy.deepcopy(self.EXPECTED_CONFIGURATION) - expected_resource['jobReference']['location'] = self.LOCATION - expected_resource['jobReference']['projectId'] = 'other-project' + expected_resource["jobReference"]["location"] = self.LOCATION + expected_resource["jobReference"]["projectId"] = "other-project" do_upload.assert_called_once_with( - file_obj, - expected_resource, - _DEFAULT_NUM_RETRIES) + file_obj, expected_resource, _DEFAULT_NUM_RETRIES + ) def test_load_table_from_file_w_client_location(self): from google.cloud.bigquery.client import _DEFAULT_NUM_RETRIES @@ -4253,27 +4172,28 @@ def test_load_table_from_file_w_client_location(self): file_obj = self._make_file_obj() do_upload_patch = self._make_do_upload_patch( - client, '_do_resumable_upload', self.EXPECTED_CONFIGURATION) + client, "_do_resumable_upload", self.EXPECTED_CONFIGURATION + ) with do_upload_patch as do_upload: client.load_table_from_file( file_obj, # Test with string for table ID. - '{}.{}.{}'.format( + "{}.{}.{}".format( self.TABLE_REF.project, self.TABLE_REF.dataset_id, self.TABLE_REF.table_id, ), - job_id='job_id', - project='other-project', - job_config=self._make_config()) + job_id="job_id", + project="other-project", + job_config=self._make_config(), + ) expected_resource = copy.deepcopy(self.EXPECTED_CONFIGURATION) - expected_resource['jobReference']['location'] = self.LOCATION - expected_resource['jobReference']['projectId'] = 'other-project' + expected_resource["jobReference"]["location"] = self.LOCATION + expected_resource["jobReference"]["projectId"] = "other-project" do_upload.assert_called_once_with( - file_obj, - expected_resource, - _DEFAULT_NUM_RETRIES) + file_obj, expected_resource, _DEFAULT_NUM_RETRIES + ) def test_load_table_from_file_resumable_metadata(self): from google.cloud.bigquery.client import _DEFAULT_NUM_RETRIES @@ -4287,50 +4207,51 @@ def test_load_table_from_file_resumable_metadata(self): config.allow_jagged_rows = False config.allow_quoted_newlines = False config.create_disposition = CreateDisposition.CREATE_IF_NEEDED - config.encoding = 'utf8' - config.field_delimiter = ',' + config.encoding = "utf8" + config.field_delimiter = "," config.ignore_unknown_values = False config.max_bad_records = 0 config.quote_character = '"' config.skip_leading_rows = 1 config.write_disposition = WriteDisposition.WRITE_APPEND - config.null_marker = r'\N' + config.null_marker = r"\N" expected_config = { - 'jobReference': {'projectId': 'project_id', 'jobId': 'job_id'}, - 'configuration': { - 'load': { - 'destinationTable': { - 'projectId': self.TABLE_REF.project, - 'datasetId': self.TABLE_REF.dataset_id, - 'tableId': self.TABLE_REF.table_id, + "jobReference": {"projectId": "project_id", "jobId": "job_id"}, + "configuration": { + "load": { + "destinationTable": { + "projectId": self.TABLE_REF.project, + "datasetId": self.TABLE_REF.dataset_id, + "tableId": self.TABLE_REF.table_id, }, - 'sourceFormat': config.source_format, - 'allowJaggedRows': config.allow_jagged_rows, - 'allowQuotedNewlines': config.allow_quoted_newlines, - 'createDisposition': config.create_disposition, - 'encoding': config.encoding, - 'fieldDelimiter': config.field_delimiter, - 'ignoreUnknownValues': config.ignore_unknown_values, - 'maxBadRecords': config.max_bad_records, - 'quote': config.quote_character, - 'skipLeadingRows': str(config.skip_leading_rows), - 'writeDisposition': config.write_disposition, - 'nullMarker': config.null_marker, - }, + "sourceFormat": config.source_format, + "allowJaggedRows": config.allow_jagged_rows, + "allowQuotedNewlines": config.allow_quoted_newlines, + "createDisposition": config.create_disposition, + "encoding": config.encoding, + "fieldDelimiter": config.field_delimiter, + "ignoreUnknownValues": config.ignore_unknown_values, + "maxBadRecords": config.max_bad_records, + "quote": config.quote_character, + "skipLeadingRows": str(config.skip_leading_rows), + "writeDisposition": config.write_disposition, + "nullMarker": config.null_marker, + } }, } do_upload_patch = self._make_do_upload_patch( - client, '_do_resumable_upload', expected_config) + client, "_do_resumable_upload", expected_config + ) with do_upload_patch as do_upload: client.load_table_from_file( - file_obj, self.TABLE_REF, job_id='job_id', job_config=config) + file_obj, self.TABLE_REF, job_id="job_id", job_config=config + ) do_upload.assert_called_once_with( - file_obj, - expected_config, - _DEFAULT_NUM_RETRIES) + file_obj, expected_config, _DEFAULT_NUM_RETRIES + ) def test_load_table_from_file_multipart(self): from google.cloud.bigquery.client import _DEFAULT_NUM_RETRIES @@ -4341,17 +4262,20 @@ def test_load_table_from_file_multipart(self): config = self._make_config() do_upload_patch = self._make_do_upload_patch( - client, '_do_multipart_upload', self.EXPECTED_CONFIGURATION) + client, "_do_multipart_upload", self.EXPECTED_CONFIGURATION + ) with do_upload_patch as do_upload: client.load_table_from_file( - file_obj, self.TABLE_REF, job_id='job_id', job_config=config, - size=file_obj_size) + file_obj, + self.TABLE_REF, + job_id="job_id", + job_config=config, + size=file_obj_size, + ) do_upload.assert_called_once_with( - file_obj, - self.EXPECTED_CONFIGURATION, - file_obj_size, - _DEFAULT_NUM_RETRIES) + file_obj, self.EXPECTED_CONFIGURATION, file_obj_size, _DEFAULT_NUM_RETRIES + ) def test_load_table_from_file_with_retries(self): client = self._make_client() @@ -4359,16 +4283,20 @@ def test_load_table_from_file_with_retries(self): num_retries = 20 do_upload_patch = self._make_do_upload_patch( - client, '_do_resumable_upload', self.EXPECTED_CONFIGURATION) + client, "_do_resumable_upload", self.EXPECTED_CONFIGURATION + ) with do_upload_patch as do_upload: client.load_table_from_file( - file_obj, self.TABLE_REF, num_retries=num_retries, - job_id='job_id', job_config=self._make_config()) + file_obj, + self.TABLE_REF, + num_retries=num_retries, + job_id="job_id", + job_config=self._make_config(), + ) do_upload.assert_called_once_with( - file_obj, - self.EXPECTED_CONFIGURATION, - num_retries) + file_obj, self.EXPECTED_CONFIGURATION, num_retries + ) def test_load_table_from_file_with_rewind(self): client = self._make_client() @@ -4376,9 +4304,9 @@ def test_load_table_from_file_with_rewind(self): file_obj.seek(2) with self._make_do_upload_patch( - client, '_do_resumable_upload', self.EXPECTED_CONFIGURATION): - client.load_table_from_file( - file_obj, self.TABLE_REF, rewind=True) + client, "_do_resumable_upload", self.EXPECTED_CONFIGURATION + ): + client.load_table_from_file(file_obj, self.TABLE_REF, rewind=True) assert file_obj.tell() == 0 @@ -4389,16 +4317,19 @@ def test_load_table_from_file_with_readable_gzip(self): gzip_file = self._make_gzip_file_obj(writable=False) do_upload_patch = self._make_do_upload_patch( - client, '_do_resumable_upload', self.EXPECTED_CONFIGURATION) + client, "_do_resumable_upload", self.EXPECTED_CONFIGURATION + ) with do_upload_patch as do_upload: client.load_table_from_file( - gzip_file, self.TABLE_REF, job_id='job_id', - job_config=self._make_config()) + gzip_file, + self.TABLE_REF, + job_id="job_id", + job_config=self._make_config(), + ) do_upload.assert_called_once_with( - gzip_file, - self.EXPECTED_CONFIGURATION, - _DEFAULT_NUM_RETRIES) + gzip_file, self.EXPECTED_CONFIGURATION, _DEFAULT_NUM_RETRIES + ) def test_load_table_from_file_with_writable_gzip(self): client = self._make_client() @@ -4406,8 +4337,11 @@ def test_load_table_from_file_with_writable_gzip(self): with pytest.raises(ValueError): client.load_table_from_file( - gzip_file, self.TABLE_REF, job_id='job_id', - job_config=self._make_config()) + gzip_file, + self.TABLE_REF, + job_id="job_id", + job_config=self._make_config(), + ) def test_load_table_from_file_failure(self): from google.resumable_media import InvalidResponse @@ -4417,83 +4351,87 @@ def test_load_table_from_file_failure(self): file_obj = self._make_file_obj() response = self._make_response( - content='Someone is already in this spot.', - status_code=http_client.CONFLICT) + content="Someone is already in this spot.", status_code=http_client.CONFLICT + ) do_upload_patch = self._make_do_upload_patch( - client, '_do_resumable_upload', - side_effect=InvalidResponse(response)) + client, "_do_resumable_upload", side_effect=InvalidResponse(response) + ) with do_upload_patch, pytest.raises(exceptions.Conflict) as exc_info: - client.load_table_from_file( - file_obj, self.TABLE_REF, rewind=True) + client.load_table_from_file(file_obj, self.TABLE_REF, rewind=True) assert response.text in exc_info.value.message assert exc_info.value.errors == [] def test_load_table_from_file_bad_mode(self): client = self._make_client() - file_obj = mock.Mock(spec=['mode']) - file_obj.mode = 'x' + file_obj = mock.Mock(spec=["mode"]) + file_obj.mode = "x" with pytest.raises(ValueError): client.load_table_from_file(file_obj, self.TABLE_REF) - @unittest.skipIf(pandas is None, 'Requires `pandas`') - @unittest.skipIf(pyarrow is None, 'Requires `pyarrow`') + @unittest.skipIf(pandas is None, "Requires `pandas`") + @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_load_table_from_dataframe(self): from google.cloud.bigquery.client import _DEFAULT_NUM_RETRIES from google.cloud.bigquery import job client = self._make_client() - records = [ - {'name': 'Monty', 'age': 100}, - {'name': 'Python', 'age': 60}, - ] + records = [{"name": "Monty", "age": 100}, {"name": "Python", "age": 60}] dataframe = pandas.DataFrame(records) load_patch = mock.patch( - 'google.cloud.bigquery.client.Client.load_table_from_file', - autospec=True) + "google.cloud.bigquery.client.Client.load_table_from_file", autospec=True + ) with load_patch as load_table_from_file: client.load_table_from_dataframe(dataframe, self.TABLE_REF) load_table_from_file.assert_called_once_with( - client, mock.ANY, self.TABLE_REF, num_retries=_DEFAULT_NUM_RETRIES, - rewind=True, job_id=None, job_id_prefix=None, location=None, - project=None, job_config=mock.ANY) + client, + mock.ANY, + self.TABLE_REF, + num_retries=_DEFAULT_NUM_RETRIES, + rewind=True, + job_id=None, + job_id_prefix=None, + location=None, + project=None, + job_config=mock.ANY, + ) sent_file = load_table_from_file.mock_calls[0][1][1] sent_bytes = sent_file.getvalue() assert isinstance(sent_bytes, bytes) assert len(sent_bytes) > 0 - sent_config = load_table_from_file.mock_calls[0][2]['job_config'] + sent_config = load_table_from_file.mock_calls[0][2]["job_config"] assert sent_config.source_format == job.SourceFormat.PARQUET - @unittest.skipIf(pandas is None, 'Requires `pandas`') - @unittest.skipIf(pyarrow is None, 'Requires `pyarrow`') + @unittest.skipIf(pandas is None, "Requires `pandas`") + @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_load_table_from_dataframe_w_client_location(self): from google.cloud.bigquery.client import _DEFAULT_NUM_RETRIES from google.cloud.bigquery import job client = self._make_client(location=self.LOCATION) - records = [ - {'name': 'Monty', 'age': 100}, - {'name': 'Python', 'age': 60}, - ] + records = [{"name": "Monty", "age": 100}, {"name": "Python", "age": 60}] dataframe = pandas.DataFrame(records) load_patch = mock.patch( - 'google.cloud.bigquery.client.Client.load_table_from_file', - autospec=True) + "google.cloud.bigquery.client.Client.load_table_from_file", autospec=True + ) with load_patch as load_table_from_file: client.load_table_from_dataframe(dataframe, self.TABLE_REF) load_table_from_file.assert_called_once_with( - client, mock.ANY, self.TABLE_REF, + client, + mock.ANY, + self.TABLE_REF, num_retries=_DEFAULT_NUM_RETRIES, - rewind=True, job_id=None, + rewind=True, + job_id=None, job_id_prefix=None, location=self.LOCATION, project=None, @@ -4505,34 +4443,32 @@ def test_load_table_from_dataframe_w_client_location(self): assert isinstance(sent_bytes, bytes) assert len(sent_bytes) > 0 - sent_config = load_table_from_file.mock_calls[0][2]['job_config'] + sent_config = load_table_from_file.mock_calls[0][2]["job_config"] assert sent_config.source_format == job.SourceFormat.PARQUET - @unittest.skipIf(pandas is None, 'Requires `pandas`') - @unittest.skipIf(pyarrow is None, 'Requires `pyarrow`') + @unittest.skipIf(pandas is None, "Requires `pandas`") + @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_load_table_from_dataframe_w_custom_job_config(self): from google.cloud.bigquery.client import _DEFAULT_NUM_RETRIES from google.cloud.bigquery import job client = self._make_client() - records = [ - {'name': 'Monty', 'age': 100}, - {'name': 'Python', 'age': 60}, - ] + records = [{"name": "Monty", "age": 100}, {"name": "Python", "age": 60}] dataframe = pandas.DataFrame(records) job_config = job.LoadJobConfig() load_patch = mock.patch( - 'google.cloud.bigquery.client.Client.load_table_from_file', - autospec=True) + "google.cloud.bigquery.client.Client.load_table_from_file", autospec=True + ) with load_patch as load_table_from_file: client.load_table_from_dataframe( - dataframe, self.TABLE_REF, - job_config=job_config, - location=self.LOCATION) + dataframe, self.TABLE_REF, job_config=job_config, location=self.LOCATION + ) load_table_from_file.assert_called_once_with( - client, mock.ANY, self.TABLE_REF, + client, + mock.ANY, + self.TABLE_REF, num_retries=_DEFAULT_NUM_RETRIES, rewind=True, job_id=None, @@ -4542,7 +4478,7 @@ def test_load_table_from_dataframe_w_custom_job_config(self): job_config=mock.ANY, ) - sent_config = load_table_from_file.mock_calls[0][2]['job_config'] + sent_config = load_table_from_file.mock_calls[0][2]["job_config"] assert sent_config is job_config assert sent_config.source_format == job.SourceFormat.PARQUET @@ -4553,16 +4489,20 @@ def _make_resumable_upload_responses(cls, size): """Make a series of responses for a successful resumable upload.""" from google import resumable_media - resumable_url = 'http://test.invalid?upload_id=and-then-there-was-1' + resumable_url = "http://test.invalid?upload_id=and-then-there-was-1" initial_response = cls._make_response( - http_client.OK, '', {'location': resumable_url}) + http_client.OK, "", {"location": resumable_url} + ) data_response = cls._make_response( resumable_media.PERMANENT_REDIRECT, - '', {'range': 'bytes=0-{:d}'.format(size - 1)}) + "", + {"range": "bytes=0-{:d}".format(size - 1)}, + ) final_response = cls._make_response( http_client.OK, - json.dumps({'size': size}), - {'Content-Type': 'application/json'}) + json.dumps({"size": size}), + {"Content-Type": "application/json"}, + ) return [initial_response, data_response, final_response] @staticmethod @@ -4570,7 +4510,8 @@ def _make_transport(responses=None): import google.auth.transport.requests transport = mock.create_autospec( - google.auth.transport.requests.AuthorizedSession, instance=True) + google.auth.transport.requests.AuthorizedSession, instance=True + ) transport.request.side_effect = responses return transport @@ -4578,24 +4519,25 @@ def test__do_resumable_upload(self): file_obj = self._make_file_obj() file_obj_len = len(file_obj.getvalue()) transport = self._make_transport( - self._make_resumable_upload_responses(file_obj_len)) + self._make_resumable_upload_responses(file_obj_len) + ) client = self._make_client(transport) result = client._do_resumable_upload( - file_obj, - self.EXPECTED_CONFIGURATION, - None) + file_obj, self.EXPECTED_CONFIGURATION, None + ) - content = result.content.decode('utf-8') - assert json.loads(content) == {'size': file_obj_len} + content = result.content.decode("utf-8") + assert json.loads(content) == {"size": file_obj_len} # Verify that configuration data was passed in with the initial # request. transport.request.assert_any_call( - 'POST', + "POST", mock.ANY, - data=json.dumps(self.EXPECTED_CONFIGURATION).encode('utf-8'), - headers=mock.ANY) + data=json.dumps(self.EXPECTED_CONFIGURATION).encode("utf-8"), + headers=mock.ANY, + ) def test__do_multipart_upload(self): transport = self._make_transport([self._make_response(http_client.OK)]) @@ -4604,28 +4546,27 @@ def test__do_multipart_upload(self): file_obj_len = len(file_obj.getvalue()) client._do_multipart_upload( - file_obj, - self.EXPECTED_CONFIGURATION, - file_obj_len, - None) + file_obj, self.EXPECTED_CONFIGURATION, file_obj_len, None + ) # Verify that configuration data was passed in with the initial # request. request_args = transport.request.mock_calls[0][2] - request_data = request_args['data'].decode('utf-8') - request_headers = request_args['headers'] + request_data = request_args["data"].decode("utf-8") + request_headers = request_args["headers"] request_content = email.message_from_string( - 'Content-Type: {}\r\n{}'.format( - request_headers['content-type'].decode('utf-8'), - request_data)) + "Content-Type: {}\r\n{}".format( + request_headers["content-type"].decode("utf-8"), request_data + ) + ) # There should be two payloads: the configuration and the binary daya. configuration_data = request_content.get_payload(0).get_payload() binary_data = request_content.get_payload(1).get_payload() assert json.loads(configuration_data) == self.EXPECTED_CONFIGURATION - assert binary_data.encode('utf-8') == file_obj.getvalue() + assert binary_data.encode("utf-8") == file_obj.getvalue() def test__do_multipart_upload_wrong_size(self): client = self._make_client() @@ -4633,8 +4574,4 @@ def test__do_multipart_upload_wrong_size(self): file_obj_len = len(file_obj.getvalue()) with pytest.raises(ValueError): - client._do_multipart_upload( - file_obj, - {}, - file_obj_len + 1, - None) + client._do_multipart_upload(file_obj, {}, file_obj_len + 1, None) diff --git a/packages/google-cloud-bigquery/tests/unit/test_dataset.py b/packages/google-cloud-bigquery/tests/unit/test_dataset.py index d438c1d478a9..f477904c2f7d 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_dataset.py +++ b/packages/google-cloud-bigquery/tests/unit/test_dataset.py @@ -18,7 +18,6 @@ class TestAccessEntry(unittest.TestCase): - @staticmethod def _get_target_class(): from google.cloud.bigquery.dataset import AccessEntry @@ -29,24 +28,24 @@ def _make_one(self, *args, **kw): return self._get_target_class()(*args, **kw) def test_ctor_defaults(self): - entry = self._make_one('OWNER', 'userByEmail', 'phred@example.com') - self.assertEqual(entry.role, 'OWNER') - self.assertEqual(entry.entity_type, 'userByEmail') - self.assertEqual(entry.entity_id, 'phred@example.com') + entry = self._make_one("OWNER", "userByEmail", "phred@example.com") + self.assertEqual(entry.role, "OWNER") + self.assertEqual(entry.entity_type, "userByEmail") + self.assertEqual(entry.entity_id, "phred@example.com") def test_ctor_bad_entity_type(self): with self.assertRaises(ValueError): - self._make_one(None, 'unknown', None) + self._make_one(None, "unknown", None) def test_ctor_view_with_role(self): - role = 'READER' - entity_type = 'view' + role = "READER" + entity_type = "view" with self.assertRaises(ValueError): self._make_one(role, entity_type, None) def test_ctor_view_success(self): role = None - entity_type = 'view' + entity_type = "view" entity_id = object() entry = self._make_one(role, entity_type, entity_id) self.assertEqual(entry.role, role) @@ -55,76 +54,75 @@ def test_ctor_view_success(self): def test_ctor_nonview_without_role(self): role = None - entity_type = 'userByEmail' + entity_type = "userByEmail" with self.assertRaises(ValueError): self._make_one(role, entity_type, None) def test___eq___role_mismatch(self): - entry = self._make_one('OWNER', 'userByEmail', 'phred@example.com') - other = self._make_one('WRITER', 'userByEmail', 'phred@example.com') + entry = self._make_one("OWNER", "userByEmail", "phred@example.com") + other = self._make_one("WRITER", "userByEmail", "phred@example.com") self.assertNotEqual(entry, other) def test___eq___entity_type_mismatch(self): - entry = self._make_one('OWNER', 'userByEmail', 'phred@example.com') - other = self._make_one('OWNER', 'groupByEmail', 'phred@example.com') + entry = self._make_one("OWNER", "userByEmail", "phred@example.com") + other = self._make_one("OWNER", "groupByEmail", "phred@example.com") self.assertNotEqual(entry, other) def test___eq___entity_id_mismatch(self): - entry = self._make_one('OWNER', 'userByEmail', 'phred@example.com') - other = self._make_one('OWNER', 'userByEmail', 'bharney@example.com') + entry = self._make_one("OWNER", "userByEmail", "phred@example.com") + other = self._make_one("OWNER", "userByEmail", "bharney@example.com") self.assertNotEqual(entry, other) def test___eq___hit(self): - entry = self._make_one('OWNER', 'userByEmail', 'phred@example.com') - other = self._make_one('OWNER', 'userByEmail', 'phred@example.com') + entry = self._make_one("OWNER", "userByEmail", "phred@example.com") + other = self._make_one("OWNER", "userByEmail", "phred@example.com") self.assertEqual(entry, other) def test__eq___type_mismatch(self): - entry = self._make_one('OWNER', 'userByEmail', 'silly@example.com') + entry = self._make_one("OWNER", "userByEmail", "silly@example.com") self.assertNotEqual(entry, object()) self.assertEqual(entry, mock.ANY) def test_to_api_repr(self): - entry = self._make_one('OWNER', 'userByEmail', 'salmon@example.com') + entry = self._make_one("OWNER", "userByEmail", "salmon@example.com") resource = entry.to_api_repr() - exp_resource = {'role': 'OWNER', 'userByEmail': 'salmon@example.com'} + exp_resource = {"role": "OWNER", "userByEmail": "salmon@example.com"} self.assertEqual(resource, exp_resource) def test_to_api_repr_view(self): view = { - 'projectId': 'my-project', - 'datasetId': 'my_dataset', - 'tableId': 'my_table' + "projectId": "my-project", + "datasetId": "my_dataset", + "tableId": "my_table", } - entry = self._make_one(None, 'view', view) + entry = self._make_one(None, "view", view) resource = entry.to_api_repr() - exp_resource = {'view': view} + exp_resource = {"view": view} self.assertEqual(resource, exp_resource) def test_from_api_repr(self): - resource = {'role': 'OWNER', 'userByEmail': 'salmon@example.com'} + resource = {"role": "OWNER", "userByEmail": "salmon@example.com"} entry = self._get_target_class().from_api_repr(resource) - self.assertEqual(entry.role, 'OWNER') - self.assertEqual(entry.entity_type, 'userByEmail') - self.assertEqual(entry.entity_id, 'salmon@example.com') + self.assertEqual(entry.role, "OWNER") + self.assertEqual(entry.entity_type, "userByEmail") + self.assertEqual(entry.entity_id, "salmon@example.com") def test_from_api_repr_w_unknown_entity_type(self): - resource = {'role': 'READER', 'unknown': 'UNKNOWN'} + resource = {"role": "READER", "unknown": "UNKNOWN"} with self.assertRaises(ValueError): self._get_target_class().from_api_repr(resource) def test_from_api_repr_entries_w_extra_keys(self): resource = { - 'role': 'READER', - 'specialGroup': 'projectReaders', - 'userByEmail': 'salmon@example.com', + "role": "READER", + "specialGroup": "projectReaders", + "userByEmail": "salmon@example.com", } with self.assertRaises(ValueError): self._get_target_class().from_api_repr(resource) class TestDatasetReference(unittest.TestCase): - @staticmethod def _get_target_class(): from google.cloud.bigquery.dataset import DatasetReference @@ -135,116 +133,107 @@ def _make_one(self, *args, **kw): return self._get_target_class()(*args, **kw) def test_ctor_defaults(self): - dataset_ref = self._make_one('some-project-1', 'dataset_1') - self.assertEqual(dataset_ref.project, 'some-project-1') - self.assertEqual(dataset_ref.dataset_id, 'dataset_1') + dataset_ref = self._make_one("some-project-1", "dataset_1") + self.assertEqual(dataset_ref.project, "some-project-1") + self.assertEqual(dataset_ref.dataset_id, "dataset_1") def test_ctor_bad_args(self): with self.assertRaises(ValueError): - self._make_one(1, 'd') + self._make_one(1, "d") with self.assertRaises(ValueError): - self._make_one('p', 2) + self._make_one("p", 2) def test_table(self): - dataset_ref = self._make_one('some-project-1', 'dataset_1') - table_ref = dataset_ref.table('table_1') - self.assertEqual(table_ref.dataset_id, 'dataset_1') - self.assertEqual(table_ref.project, 'some-project-1') - self.assertEqual(table_ref.table_id, 'table_1') + dataset_ref = self._make_one("some-project-1", "dataset_1") + table_ref = dataset_ref.table("table_1") + self.assertEqual(table_ref.dataset_id, "dataset_1") + self.assertEqual(table_ref.project, "some-project-1") + self.assertEqual(table_ref.table_id, "table_1") def test_to_api_repr(self): - dataset = self._make_one('project_1', 'dataset_1') + dataset = self._make_one("project_1", "dataset_1") resource = dataset.to_api_repr() - self.assertEqual( - resource, - { - 'projectId': 'project_1', - 'datasetId': 'dataset_1', - }) + self.assertEqual(resource, {"projectId": "project_1", "datasetId": "dataset_1"}) def test_from_api_repr(self): cls = self._get_target_class() - expected = self._make_one('project_1', 'dataset_1') + expected = self._make_one("project_1", "dataset_1") - got = cls.from_api_repr( - { - 'projectId': 'project_1', - 'datasetId': 'dataset_1', - }) + got = cls.from_api_repr({"projectId": "project_1", "datasetId": "dataset_1"}) self.assertEqual(expected, got) def test_from_string(self): cls = self._get_target_class() - got = cls.from_string('string-project.string_dataset') - self.assertEqual(got.project, 'string-project') - self.assertEqual(got.dataset_id, 'string_dataset') + got = cls.from_string("string-project.string_dataset") + self.assertEqual(got.project, "string-project") + self.assertEqual(got.dataset_id, "string_dataset") def test_from_string_legacy_string(self): cls = self._get_target_class() with self.assertRaises(ValueError): - cls.from_string('string-project:string_dataset') + cls.from_string("string-project:string_dataset") def test_from_string_not_fully_qualified(self): cls = self._get_target_class() with self.assertRaises(ValueError): - cls.from_string('string_dataset') + cls.from_string("string_dataset") with self.assertRaises(ValueError): - cls.from_string('a.b.c') + cls.from_string("a.b.c") def test_from_string_with_default_project(self): cls = self._get_target_class() - got = cls.from_string( - 'string_dataset', default_project='default-project') - self.assertEqual(got.project, 'default-project') - self.assertEqual(got.dataset_id, 'string_dataset') + got = cls.from_string("string_dataset", default_project="default-project") + self.assertEqual(got.project, "default-project") + self.assertEqual(got.dataset_id, "string_dataset") def test_from_string_ignores_default_project(self): cls = self._get_target_class() got = cls.from_string( - 'string-project.string_dataset', default_project='default-project') - self.assertEqual(got.project, 'string-project') - self.assertEqual(got.dataset_id, 'string_dataset') + "string-project.string_dataset", default_project="default-project" + ) + self.assertEqual(got.project, "string-project") + self.assertEqual(got.dataset_id, "string_dataset") def test___eq___wrong_type(self): - dataset = self._make_one('project_1', 'dataset_1') + dataset = self._make_one("project_1", "dataset_1") other = object() self.assertNotEqual(dataset, other) self.assertEqual(dataset, mock.ANY) def test___eq___project_mismatch(self): - dataset = self._make_one('project_1', 'dataset_1') - other = self._make_one('project_2', 'dataset_1') + dataset = self._make_one("project_1", "dataset_1") + other = self._make_one("project_2", "dataset_1") self.assertNotEqual(dataset, other) def test___eq___dataset_mismatch(self): - dataset = self._make_one('project_1', 'dataset_1') - other = self._make_one('project_1', 'dataset_2') + dataset = self._make_one("project_1", "dataset_1") + other = self._make_one("project_1", "dataset_2") self.assertNotEqual(dataset, other) def test___eq___equality(self): - dataset = self._make_one('project_1', 'dataset_1') - other = self._make_one('project_1', 'dataset_1') + dataset = self._make_one("project_1", "dataset_1") + other = self._make_one("project_1", "dataset_1") self.assertEqual(dataset, other) def test___hash__set_equality(self): - dataset1 = self._make_one('project_1', 'dataset_1') - dataset2 = self._make_one('project_1', 'dataset_2') + dataset1 = self._make_one("project_1", "dataset_1") + dataset2 = self._make_one("project_1", "dataset_2") set_one = {dataset1, dataset2} set_two = {dataset1, dataset2} self.assertEqual(set_one, set_two) def test___hash__not_equals(self): - dataset1 = self._make_one('project_1', 'dataset_1') - dataset2 = self._make_one('project_1', 'dataset_2') + dataset1 = self._make_one("project_1", "dataset_1") + dataset2 = self._make_one("project_1", "dataset_2") set_one = {dataset1} set_two = {dataset2} self.assertNotEqual(set_one, set_two) def test___repr__(self): - dataset = self._make_one('project1', 'dataset1') + dataset = self._make_one("project1", "dataset1") expected = "DatasetReference('project1', 'dataset1')" self.assertEqual(repr(dataset), expected) @@ -252,8 +241,8 @@ def test___repr__(self): class TestDataset(unittest.TestCase): from google.cloud.bigquery.dataset import DatasetReference - PROJECT = 'project' - DS_ID = 'dataset-id' + PROJECT = "project" + DS_ID = "dataset-id" DS_REF = DatasetReference(PROJECT, DS_ID) @staticmethod @@ -270,48 +259,46 @@ def _setUpConstants(self): from google.cloud._helpers import UTC self.WHEN_TS = 1437767599.006 - self.WHEN = datetime.datetime.utcfromtimestamp(self.WHEN_TS).replace( - tzinfo=UTC) - self.ETAG = 'ETAG' - self.DS_FULL_ID = '%s:%s' % (self.PROJECT, self.DS_ID) - self.RESOURCE_URL = 'http://example.com/path/to/resource' + self.WHEN = datetime.datetime.utcfromtimestamp(self.WHEN_TS).replace(tzinfo=UTC) + self.ETAG = "ETAG" + self.DS_FULL_ID = "%s:%s" % (self.PROJECT, self.DS_ID) + self.RESOURCE_URL = "http://example.com/path/to/resource" def _make_resource(self): self._setUpConstants() - USER_EMAIL = 'phred@example.com' - GROUP_EMAIL = 'group-name@lists.example.com' + USER_EMAIL = "phred@example.com" + GROUP_EMAIL = "group-name@lists.example.com" return { - 'creationTime': self.WHEN_TS * 1000, - 'datasetReference': - {'projectId': self.PROJECT, 'datasetId': self.DS_ID}, - 'etag': self.ETAG, - 'id': self.DS_FULL_ID, - 'lastModifiedTime': self.WHEN_TS * 1000, - 'location': 'US', - 'selfLink': self.RESOURCE_URL, - 'defaultTableExpirationMs': 3600, - 'access': [ - {'role': 'OWNER', 'userByEmail': USER_EMAIL}, - {'role': 'OWNER', 'groupByEmail': GROUP_EMAIL}, - {'role': 'WRITER', 'specialGroup': 'projectWriters'}, - {'role': 'READER', 'specialGroup': 'projectReaders'}], + "creationTime": self.WHEN_TS * 1000, + "datasetReference": {"projectId": self.PROJECT, "datasetId": self.DS_ID}, + "etag": self.ETAG, + "id": self.DS_FULL_ID, + "lastModifiedTime": self.WHEN_TS * 1000, + "location": "US", + "selfLink": self.RESOURCE_URL, + "defaultTableExpirationMs": 3600, + "access": [ + {"role": "OWNER", "userByEmail": USER_EMAIL}, + {"role": "OWNER", "groupByEmail": GROUP_EMAIL}, + {"role": "WRITER", "specialGroup": "projectWriters"}, + {"role": "READER", "specialGroup": "projectReaders"}, + ], } def _verify_access_entry(self, access_entries, resource): r_entries = [] - for r_entry in resource['access']: - role = r_entry.pop('role') + for r_entry in resource["access"]: + role = r_entry.pop("role") for entity_type, entity_id in sorted(r_entry.items()): - r_entries.append({ - 'role': role, - 'entity_type': entity_type, - 'entity_id': entity_id}) + r_entries.append( + {"role": role, "entity_type": entity_type, "entity_id": entity_id} + ) self.assertEqual(len(access_entries), len(r_entries)) for a_entry, r_entry in zip(access_entries, r_entries): - self.assertEqual(a_entry.role, r_entry['role']) - self.assertEqual(a_entry.entity_type, r_entry['entity_type']) - self.assertEqual(a_entry.entity_id, r_entry['entity_id']) + self.assertEqual(a_entry.role, r_entry["role"]) + self.assertEqual(a_entry.entity_type, r_entry["entity_type"]) + self.assertEqual(a_entry.entity_id, r_entry["entity_id"]) def _verify_readonly_resource_properties(self, dataset, resource): @@ -320,19 +307,19 @@ def _verify_readonly_resource_properties(self, dataset, resource): self.assertEqual(dataset.reference.project, self.PROJECT) self.assertEqual(dataset.reference.dataset_id, self.DS_ID) - if 'creationTime' in resource: + if "creationTime" in resource: self.assertEqual(dataset.created, self.WHEN) else: self.assertIsNone(dataset.created) - if 'etag' in resource: + if "etag" in resource: self.assertEqual(dataset.etag, self.ETAG) else: self.assertIsNone(dataset.etag) - if 'lastModifiedTime' in resource: + if "lastModifiedTime" in resource: self.assertEqual(dataset.modified, self.WHEN) else: self.assertIsNone(dataset.modified) - if 'selfLink' in resource: + if "selfLink" in resource: self.assertEqual(dataset.self_link, self.RESOURCE_URL) else: self.assertIsNone(dataset.self_link) @@ -341,16 +328,18 @@ def _verify_resource_properties(self, dataset, resource): self._verify_readonly_resource_properties(dataset, resource) - if 'defaultTableExpirationMs' in resource: - self.assertEqual(dataset.default_table_expiration_ms, - int(resource.get('defaultTableExpirationMs'))) + if "defaultTableExpirationMs" in resource: + self.assertEqual( + dataset.default_table_expiration_ms, + int(resource.get("defaultTableExpirationMs")), + ) else: self.assertIsNone(dataset.default_table_expiration_ms) - self.assertEqual(dataset.description, resource.get('description')) - self.assertEqual(dataset.friendly_name, resource.get('friendlyName')) - self.assertEqual(dataset.location, resource.get('location')) + self.assertEqual(dataset.description, resource.get("description")) + self.assertEqual(dataset.friendly_name, resource.get("friendlyName")) + self.assertEqual(dataset.location, resource.get("location")) - if 'access' in resource: + if "access" in resource: self._verify_access_entry(dataset.access_entries, resource) else: self.assertEqual(dataset.access_entries, []) @@ -360,8 +349,8 @@ def test_ctor_defaults(self): self.assertEqual(dataset.dataset_id, self.DS_ID) self.assertEqual(dataset.project, self.PROJECT) self.assertEqual( - dataset.path, - '/projects/%s/datasets/%s' % (self.PROJECT, self.DS_ID)) + dataset.path, "/projects/%s/datasets/%s" % (self.PROJECT, self.DS_ID) + ) self.assertEqual(dataset.access_entries, []) self.assertIsNone(dataset.created) @@ -378,17 +367,17 @@ def test_ctor_defaults(self): def test_ctor_explicit(self): from google.cloud.bigquery.dataset import DatasetReference, AccessEntry - phred = AccessEntry('OWNER', 'userByEmail', 'phred@example.com') - bharney = AccessEntry('OWNER', 'userByEmail', 'bharney@example.com') + phred = AccessEntry("OWNER", "userByEmail", "phred@example.com") + bharney = AccessEntry("OWNER", "userByEmail", "bharney@example.com") entries = [phred, bharney] - OTHER_PROJECT = 'foo-bar-123' + OTHER_PROJECT = "foo-bar-123" dataset = self._make_one(DatasetReference(OTHER_PROJECT, self.DS_ID)) dataset.access_entries = entries self.assertEqual(dataset.dataset_id, self.DS_ID) self.assertEqual(dataset.project, OTHER_PROJECT) self.assertEqual( - dataset.path, - '/projects/%s/datasets/%s' % (OTHER_PROJECT, self.DS_ID)) + dataset.path, "/projects/%s/datasets/%s" % (OTHER_PROJECT, self.DS_ID) + ) self.assertEqual(dataset.access_entries, entries) self.assertIsNone(dataset.created) @@ -411,7 +400,7 @@ def test_access_entries_setter_invalid_field(self): from google.cloud.bigquery.dataset import AccessEntry dataset = self._make_one(self.DS_REF) - phred = AccessEntry('OWNER', 'userByEmail', 'phred@example.com') + phred = AccessEntry("OWNER", "userByEmail", "phred@example.com") with self.assertRaises(ValueError): dataset.access_entries = [phred, object()] @@ -419,15 +408,15 @@ def test_access_entries_setter(self): from google.cloud.bigquery.dataset import AccessEntry dataset = self._make_one(self.DS_REF) - phred = AccessEntry('OWNER', 'userByEmail', 'phred@example.com') - bharney = AccessEntry('OWNER', 'userByEmail', 'bharney@example.com') + phred = AccessEntry("OWNER", "userByEmail", "phred@example.com") + bharney = AccessEntry("OWNER", "userByEmail", "bharney@example.com") dataset.access_entries = [phred, bharney] self.assertEqual(dataset.access_entries, [phred, bharney]) def test_default_table_expiration_ms_setter_bad_value(self): dataset = self._make_one(self.DS_REF) with self.assertRaises(ValueError): - dataset.default_table_expiration_ms = 'bogus' + dataset.default_table_expiration_ms = "bogus" def test_default_table_expiration_ms_setter(self): dataset = self._make_one(self.DS_REF) @@ -441,8 +430,8 @@ def test_description_setter_bad_value(self): def test_description_setter(self): dataset = self._make_one(self.DS_REF) - dataset.description = 'DESCRIPTION' - self.assertEqual(dataset.description, 'DESCRIPTION') + dataset.description = "DESCRIPTION" + self.assertEqual(dataset.description, "DESCRIPTION") def test_friendly_name_setter_bad_value(self): dataset = self._make_one(self.DS_REF) @@ -451,8 +440,8 @@ def test_friendly_name_setter_bad_value(self): def test_friendly_name_setter(self): dataset = self._make_one(self.DS_REF) - dataset.friendly_name = 'FRIENDLY' - self.assertEqual(dataset.friendly_name, 'FRIENDLY') + dataset.friendly_name = "FRIENDLY" + self.assertEqual(dataset.friendly_name, "FRIENDLY") def test_location_setter_bad_value(self): dataset = self._make_one(self.DS_REF) @@ -461,20 +450,20 @@ def test_location_setter_bad_value(self): def test_location_setter(self): dataset = self._make_one(self.DS_REF) - dataset.location = 'LOCATION' - self.assertEqual(dataset.location, 'LOCATION') + dataset.location = "LOCATION" + self.assertEqual(dataset.location, "LOCATION") def test_labels_update_in_place(self): dataset = self._make_one(self.DS_REF) - del dataset._properties['labels'] # don't start w/ existing dict + del dataset._properties["labels"] # don't start w/ existing dict labels = dataset.labels - labels['foo'] = 'bar' # update in place - self.assertEqual(dataset.labels, {'foo': 'bar'}) + labels["foo"] = "bar" # update in place + self.assertEqual(dataset.labels, {"foo": "bar"}) def test_labels_setter(self): dataset = self._make_one(self.DS_REF) - dataset.labels = {'color': 'green'} - self.assertEqual(dataset.labels, {'color': 'green'}) + dataset.labels = {"color": "green"} + self.assertEqual(dataset.labels, {"color": "green"}) def test_labels_setter_bad_value(self): dataset = self._make_one(self.DS_REF) @@ -495,11 +484,8 @@ def test_from_api_repr_missing_identity(self): def test_from_api_repr_bare(self): self._setUpConstants() RESOURCE = { - 'id': '%s:%s' % (self.PROJECT, self.DS_ID), - 'datasetReference': { - 'projectId': self.PROJECT, - 'datasetId': self.DS_ID, - } + "id": "%s:%s" % (self.PROJECT, self.DS_ID), + "datasetReference": {"projectId": self.PROJECT, "datasetId": self.DS_ID}, } klass = self._get_target_class() dataset = klass.from_api_repr(RESOURCE) @@ -513,62 +499,60 @@ def test_from_api_repr_w_properties(self): def test_to_api_repr_w_custom_field(self): dataset = self._make_one(self.DS_REF) - dataset._properties['newAlphaProperty'] = 'unreleased property' + dataset._properties["newAlphaProperty"] = "unreleased property" resource = dataset.to_api_repr() exp_resource = { - 'datasetReference': self.DS_REF.to_api_repr(), - 'labels': {}, - 'newAlphaProperty': 'unreleased property', + "datasetReference": self.DS_REF.to_api_repr(), + "labels": {}, + "newAlphaProperty": "unreleased property", } self.assertEqual(resource, exp_resource) def test_from_string(self): cls = self._get_target_class() - got = cls.from_string('string-project.string_dataset') - self.assertEqual(got.project, 'string-project') - self.assertEqual(got.dataset_id, 'string_dataset') + got = cls.from_string("string-project.string_dataset") + self.assertEqual(got.project, "string-project") + self.assertEqual(got.dataset_id, "string_dataset") def test_from_string_legacy_string(self): cls = self._get_target_class() with self.assertRaises(ValueError): - cls.from_string('string-project:string_dataset') + cls.from_string("string-project:string_dataset") def test__build_resource_w_custom_field(self): dataset = self._make_one(self.DS_REF) - dataset._properties['newAlphaProperty'] = 'unreleased property' - resource = dataset._build_resource(['newAlphaProperty']) + dataset._properties["newAlphaProperty"] = "unreleased property" + resource = dataset._build_resource(["newAlphaProperty"]) - exp_resource = { - 'newAlphaProperty': 'unreleased property' - } + exp_resource = {"newAlphaProperty": "unreleased property"} self.assertEqual(resource, exp_resource) def test__build_resource_w_custom_field_not_in__properties(self): dataset = self._make_one(self.DS_REF) - dataset.bad = 'value' + dataset.bad = "value" with self.assertRaises(ValueError): - dataset._build_resource(['bad']) + dataset._build_resource(["bad"]) def test_table(self): from google.cloud.bigquery.table import TableReference dataset = self._make_one(self.DS_REF) - table = dataset.table('table_id') + table = dataset.table("table_id") self.assertIsInstance(table, TableReference) - self.assertEqual(table.table_id, 'table_id') + self.assertEqual(table.table_id, "table_id") self.assertEqual(table.dataset_id, self.DS_ID) self.assertEqual(table.project, self.PROJECT) def test___repr__(self): from google.cloud.bigquery.dataset import DatasetReference - dataset = self._make_one(DatasetReference('project1', 'dataset1')) + + dataset = self._make_one(DatasetReference("project1", "dataset1")) expected = "Dataset(DatasetReference('project1', 'dataset1'))" self.assertEqual(repr(dataset), expected) class TestDatasetListItem(unittest.TestCase): - @staticmethod def _get_target_class(): from google.cloud.bigquery.dataset import DatasetListItem @@ -579,61 +563,43 @@ def _make_one(self, *args, **kw): return self._get_target_class()(*args, **kw) def test_ctor(self): - project = 'test-project' - dataset_id = 'test_dataset' + project = "test-project" + dataset_id = "test_dataset" resource = { - 'kind': 'bigquery#dataset', - 'id': '{}:{}'.format(project, dataset_id), - 'datasetReference': { - 'projectId': project, - 'datasetId': dataset_id, - }, - 'friendlyName': 'Data of the Test', - 'labels': { - 'some-stuff': 'this-is-a-label', - }, + "kind": "bigquery#dataset", + "id": "{}:{}".format(project, dataset_id), + "datasetReference": {"projectId": project, "datasetId": dataset_id}, + "friendlyName": "Data of the Test", + "labels": {"some-stuff": "this-is-a-label"}, } dataset = self._make_one(resource) self.assertEqual(dataset.project, project) self.assertEqual(dataset.dataset_id, dataset_id) - self.assertEqual( - dataset.full_dataset_id, - '{}:{}'.format(project, dataset_id)) + self.assertEqual(dataset.full_dataset_id, "{}:{}".format(project, dataset_id)) self.assertEqual(dataset.reference.project, project) self.assertEqual(dataset.reference.dataset_id, dataset_id) - self.assertEqual(dataset.friendly_name, 'Data of the Test') - self.assertEqual(dataset.labels['some-stuff'], 'this-is-a-label') + self.assertEqual(dataset.friendly_name, "Data of the Test") + self.assertEqual(dataset.labels["some-stuff"], "this-is-a-label") def test_ctor_missing_properties(self): resource = { - 'datasetReference': { - 'projectId': 'testproject', - 'datasetId': 'testdataset', - }, + "datasetReference": {"projectId": "testproject", "datasetId": "testdataset"} } dataset = self._make_one(resource) - self.assertEqual(dataset.project, 'testproject') - self.assertEqual(dataset.dataset_id, 'testdataset') + self.assertEqual(dataset.project, "testproject") + self.assertEqual(dataset.dataset_id, "testdataset") self.assertIsNone(dataset.full_dataset_id) self.assertIsNone(dataset.friendly_name) self.assertEqual(dataset.labels, {}) def test_ctor_wo_project(self): - resource = { - 'datasetReference': { - 'datasetId': 'testdataset', - }, - } + resource = {"datasetReference": {"datasetId": "testdataset"}} with self.assertRaises(ValueError): self._make_one(resource) def test_ctor_wo_dataset(self): - resource = { - 'datasetReference': { - 'projectId': 'testproject', - }, - } + resource = {"datasetReference": {"projectId": "testproject"}} with self.assertRaises(ValueError): self._make_one(resource) @@ -643,30 +609,22 @@ def test_ctor_wo_reference(self): def test_labels_update_in_place(self): resource = { - 'datasetReference': { - 'projectId': 'testproject', - 'datasetId': 'testdataset', - }, + "datasetReference": {"projectId": "testproject", "datasetId": "testdataset"} } dataset = self._make_one(resource) labels = dataset.labels - labels['foo'] = 'bar' # update in place - self.assertEqual(dataset.labels, {'foo': 'bar'}) + labels["foo"] = "bar" # update in place + self.assertEqual(dataset.labels, {"foo": "bar"}) def test_table(self): from google.cloud.bigquery.table import TableReference - project = 'test-project' - dataset_id = 'test_dataset' - resource = { - 'datasetReference': { - 'projectId': project, - 'datasetId': dataset_id, - }, - } + project = "test-project" + dataset_id = "test_dataset" + resource = {"datasetReference": {"projectId": project, "datasetId": dataset_id}} dataset = self._make_one(resource) - table = dataset.table('table_id') + table = dataset.table("table_id") self.assertIsInstance(table, TableReference) - self.assertEqual(table.table_id, 'table_id') + self.assertEqual(table.table_id, "table_id") self.assertEqual(table.dataset_id, dataset_id) self.assertEqual(table.project, project) diff --git a/packages/google-cloud-bigquery/tests/unit/test_dbapi__helpers.py b/packages/google-cloud-bigquery/tests/unit/test_dbapi__helpers.py index f0430f06a1e5..bcc3e0879f87 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_dbapi__helpers.py +++ b/packages/google-cloud-bigquery/tests/unit/test_dbapi__helpers.py @@ -23,77 +23,72 @@ class TestQueryParameters(unittest.TestCase): - def test_scalar_to_query_parameter(self): expected_types = [ - (True, 'BOOL'), - (False, 'BOOL'), - (123, 'INT64'), - (-123456789, 'INT64'), - (1.25, 'FLOAT64'), - (decimal.Decimal('1.25'), 'NUMERIC'), - (b'I am some bytes', 'BYTES'), - (u'I am a string', 'STRING'), - (datetime.date(2017, 4, 1), 'DATE'), - (datetime.time(12, 34, 56), 'TIME'), - (datetime.datetime(2012, 3, 4, 5, 6, 7), 'DATETIME'), + (True, "BOOL"), + (False, "BOOL"), + (123, "INT64"), + (-123456789, "INT64"), + (1.25, "FLOAT64"), + (decimal.Decimal("1.25"), "NUMERIC"), + (b"I am some bytes", "BYTES"), + (u"I am a string", "STRING"), + (datetime.date(2017, 4, 1), "DATE"), + (datetime.time(12, 34, 56), "TIME"), + (datetime.datetime(2012, 3, 4, 5, 6, 7), "DATETIME"), ( datetime.datetime( - 2012, 3, 4, 5, 6, 7, tzinfo=google.cloud._helpers.UTC), - 'TIMESTAMP', + 2012, 3, 4, 5, 6, 7, tzinfo=google.cloud._helpers.UTC + ), + "TIMESTAMP", ), ] for value, expected_type in expected_types: - msg = 'value: {} expected_type: {}'.format(value, expected_type) + msg = "value: {} expected_type: {}".format(value, expected_type) parameter = _helpers.scalar_to_query_parameter(value) self.assertIsNone(parameter.name, msg=msg) self.assertEqual(parameter.type_, expected_type, msg=msg) self.assertEqual(parameter.value, value, msg=msg) - named_parameter = _helpers.scalar_to_query_parameter( - value, name='myvar') - self.assertEqual(named_parameter.name, 'myvar', msg=msg) + named_parameter = _helpers.scalar_to_query_parameter(value, name="myvar") + self.assertEqual(named_parameter.name, "myvar", msg=msg) self.assertEqual(named_parameter.type_, expected_type, msg=msg) self.assertEqual(named_parameter.value, value, msg=msg) def test_scalar_to_query_parameter_w_unexpected_type(self): with self.assertRaises(exceptions.ProgrammingError): - _helpers.scalar_to_query_parameter(value={'a': 'dictionary'}) + _helpers.scalar_to_query_parameter(value={"a": "dictionary"}) def test_scalar_to_query_parameter_w_special_floats(self): - nan_parameter = _helpers.scalar_to_query_parameter(float('nan')) + nan_parameter = _helpers.scalar_to_query_parameter(float("nan")) self.assertTrue(math.isnan(nan_parameter.value)) - self.assertEqual(nan_parameter.type_, 'FLOAT64') - inf_parameter = _helpers.scalar_to_query_parameter(float('inf')) + self.assertEqual(nan_parameter.type_, "FLOAT64") + inf_parameter = _helpers.scalar_to_query_parameter(float("inf")) self.assertTrue(math.isinf(inf_parameter.value)) - self.assertEqual(inf_parameter.type_, 'FLOAT64') + self.assertEqual(inf_parameter.type_, "FLOAT64") def test_to_query_parameters_w_dict(self): - parameters = { - 'somebool': True, - 'somestring': u'a-string-value', - } + parameters = {"somebool": True, "somestring": u"a-string-value"} query_parameters = _helpers.to_query_parameters(parameters) query_parameter_tuples = [] for param in query_parameters: - query_parameter_tuples.append( - (param.name, param.type_, param.value)) + query_parameter_tuples.append((param.name, param.type_, param.value)) self.assertSequenceEqual( sorted(query_parameter_tuples), - sorted([ - ('somebool', 'BOOL', True), - ('somestring', 'STRING', u'a-string-value'), - ])) + sorted( + [ + ("somebool", "BOOL", True), + ("somestring", "STRING", u"a-string-value"), + ] + ), + ) def test_to_query_parameters_w_list(self): - parameters = [True, u'a-string-value'] + parameters = [True, u"a-string-value"] query_parameters = _helpers.to_query_parameters(parameters) query_parameter_tuples = [] for param in query_parameters: - query_parameter_tuples.append( - (param.name, param.type_, param.value)) + query_parameter_tuples.append((param.name, param.type_, param.value)) self.assertSequenceEqual( sorted(query_parameter_tuples), - sorted([ - (None, 'BOOL', True), - (None, 'STRING', u'a-string-value'), - ])) + sorted([(None, "BOOL", True), (None, "STRING", u"a-string-value")]), + ) diff --git a/packages/google-cloud-bigquery/tests/unit/test_dbapi_connection.py b/packages/google-cloud-bigquery/tests/unit/test_dbapi_connection.py index 176d5f989b41..19acec05bd34 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_dbapi_connection.py +++ b/packages/google-cloud-bigquery/tests/unit/test_dbapi_connection.py @@ -18,10 +18,10 @@ class TestConnection(unittest.TestCase): - @staticmethod def _get_target_class(): from google.cloud.bigquery.dbapi import Connection + return Connection def _make_one(self, *args, **kw): @@ -29,20 +29,23 @@ def _make_one(self, *args, **kw): def _mock_client(self, rows=None, schema=None): from google.cloud.bigquery import client + mock_client = mock.create_autospec(client.Client) return mock_client def test_ctor(self): from google.cloud.bigquery.dbapi import Connection + mock_client = self._mock_client() connection = self._make_one(client=mock_client) self.assertIsInstance(connection, Connection) self.assertIs(connection._client, mock_client) - @mock.patch('google.cloud.bigquery.Client', autospec=True) + @mock.patch("google.cloud.bigquery.Client", autospec=True) def test_connect_wo_client(self, mock_client): from google.cloud.bigquery.dbapi import connect from google.cloud.bigquery.dbapi import Connection + connection = connect() self.assertIsInstance(connection, Connection) self.assertIsNotNone(connection._client) @@ -50,6 +53,7 @@ def test_connect_wo_client(self, mock_client): def test_connect_w_client(self): from google.cloud.bigquery.dbapi import connect from google.cloud.bigquery.dbapi import Connection + mock_client = self._mock_client() connection = connect(client=mock_client) self.assertIsInstance(connection, Connection) @@ -67,6 +71,7 @@ def test_commit(self): def test_cursor(self): from google.cloud.bigquery.dbapi import Cursor + connection = self._make_one(client=self._mock_client()) cursor = connection.cursor() self.assertIsInstance(cursor, Cursor) diff --git a/packages/google-cloud-bigquery/tests/unit/test_dbapi_cursor.py b/packages/google-cloud-bigquery/tests/unit/test_dbapi_cursor.py index d0ad5401cf85..4a675c73958d 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_dbapi_cursor.py +++ b/packages/google-cloud-bigquery/tests/unit/test_dbapi_cursor.py @@ -18,17 +18,16 @@ class TestCursor(unittest.TestCase): - @staticmethod def _get_target_class(): from google.cloud.bigquery.dbapi import Cursor + return Cursor def _make_one(self, *args, **kw): return self._get_target_class()(*args, **kw) - def _mock_client( - self, rows=None, schema=None, num_dml_affected_rows=None): + def _mock_client(self, rows=None, schema=None, num_dml_affected_rows=None): from google.cloud.bigquery import client if rows is None: @@ -40,31 +39,34 @@ def _mock_client( mock_client.query.return_value = self._mock_job( total_rows=total_rows, schema=schema, - num_dml_affected_rows=num_dml_affected_rows) + num_dml_affected_rows=num_dml_affected_rows, + ) mock_client.list_rows.return_value = rows return mock_client - def _mock_job( - self, total_rows=0, schema=None, num_dml_affected_rows=None): + def _mock_job(self, total_rows=0, schema=None, num_dml_affected_rows=None): from google.cloud.bigquery import job + mock_job = mock.create_autospec(job.QueryJob) mock_job.error_result = None - mock_job.state = 'DONE' + mock_job.state = "DONE" mock_job.result.return_value = mock_job mock_job._query_results = self._mock_results( - total_rows=total_rows, schema=schema, - num_dml_affected_rows=num_dml_affected_rows) + total_rows=total_rows, + schema=schema, + num_dml_affected_rows=num_dml_affected_rows, + ) if num_dml_affected_rows is None: mock_job.statement_type = None # API sends back None for SELECT else: - mock_job.statement_type = 'UPDATE' + mock_job.statement_type = "UPDATE" return mock_job - def _mock_results( - self, total_rows=0, schema=None, num_dml_affected_rows=None): + def _mock_results(self, total_rows=0, schema=None, num_dml_affected_rows=None): from google.cloud.bigquery import query + mock_results = mock.create_autospec(query._QueryResults) mock_results.schema = schema mock_results.num_dml_affected_rows = num_dml_affected_rows @@ -74,6 +76,7 @@ def _mock_results( def test_ctor(self): from google.cloud.bigquery.dbapi import connect from google.cloud.bigquery.dbapi import Cursor + connection = connect(self._mock_client()) cursor = self._make_one(connection) self.assertIsInstance(cursor, Cursor) @@ -81,6 +84,7 @@ def test_ctor(self): def test_close(self): from google.cloud.bigquery.dbapi import connect + connection = connect(self._mock_client()) cursor = connection.cursor() # close() is a no-op, there is nothing to test. @@ -88,47 +92,46 @@ def test_close(self): def test_fetchone_wo_execute_raises_error(self): from google.cloud.bigquery import dbapi + connection = dbapi.connect(self._mock_client()) cursor = connection.cursor() self.assertRaises(dbapi.Error, cursor.fetchone) def test_fetchone_w_row(self): from google.cloud.bigquery import dbapi - connection = dbapi.connect( - self._mock_client(rows=[(1,)])) + + connection = dbapi.connect(self._mock_client(rows=[(1,)])) cursor = connection.cursor() - cursor.execute('SELECT 1;') + cursor.execute("SELECT 1;") row = cursor.fetchone() self.assertEqual(row, (1,)) self.assertIsNone(cursor.fetchone()) def test_fetchmany_wo_execute_raises_error(self): from google.cloud.bigquery import dbapi + connection = dbapi.connect(self._mock_client()) cursor = connection.cursor() self.assertRaises(dbapi.Error, cursor.fetchmany) def test_fetchmany_w_row(self): from google.cloud.bigquery import dbapi - connection = dbapi.connect( - self._mock_client(rows=[(1,)])) + + connection = dbapi.connect(self._mock_client(rows=[(1,)])) cursor = connection.cursor() - cursor.execute('SELECT 1;') + cursor.execute("SELECT 1;") rows = cursor.fetchmany() self.assertEqual(len(rows), 1) self.assertEqual(rows[0], (1,)) def test_fetchmany_w_size(self): from google.cloud.bigquery import dbapi + connection = dbapi.connect( - self._mock_client( - rows=[ - (1, 2, 3), - (4, 5, 6), - (7, 8, 9), - ])) + self._mock_client(rows=[(1, 2, 3), (4, 5, 6), (7, 8, 9)]) + ) cursor = connection.cursor() - cursor.execute('SELECT a, b, c;') + cursor.execute("SELECT a, b, c;") rows = cursor.fetchmany(size=2) self.assertEqual(len(rows), 2) self.assertEqual(rows[0], (1, 2, 3)) @@ -141,15 +144,12 @@ def test_fetchmany_w_size(self): def test_fetchmany_w_arraysize(self): from google.cloud.bigquery import dbapi + connection = dbapi.connect( - self._mock_client( - rows=[ - (1, 2, 3), - (4, 5, 6), - (7, 8, 9), - ])) + self._mock_client(rows=[(1, 2, 3), (4, 5, 6), (7, 8, 9)]) + ) cursor = connection.cursor() - cursor.execute('SELECT a, b, c;') + cursor.execute("SELECT a, b, c;") cursor.arraysize = 2 rows = cursor.fetchmany() self.assertEqual(len(rows), 2) @@ -163,16 +163,17 @@ def test_fetchmany_w_arraysize(self): def test_fetchall_wo_execute_raises_error(self): from google.cloud.bigquery import dbapi + connection = dbapi.connect(self._mock_client()) cursor = connection.cursor() self.assertRaises(dbapi.Error, cursor.fetchall) def test_fetchall_w_row(self): from google.cloud.bigquery import dbapi - connection = dbapi.connect( - self._mock_client(rows=[(1,)])) + + connection = dbapi.connect(self._mock_client(rows=[(1,)])) cursor = connection.cursor() - cursor.execute('SELECT 1;') + cursor.execute("SELECT 1;") self.assertIsNone(cursor.description) self.assertEqual(cursor.rowcount, 1) rows = cursor.fetchall() @@ -181,20 +182,21 @@ def test_fetchall_w_row(self): def test_execute_custom_job_id(self): from google.cloud.bigquery.dbapi import connect + client = self._mock_client(rows=[], num_dml_affected_rows=0) connection = connect(client) cursor = connection.cursor() - cursor.execute('SELECT 1;', job_id='foo') + cursor.execute("SELECT 1;", job_id="foo") args, kwargs = client.query.call_args - self.assertEqual(args[0], 'SELECT 1;') - self.assertEqual(kwargs['job_id'], 'foo') + self.assertEqual(args[0], "SELECT 1;") + self.assertEqual(kwargs["job_id"], "foo") def test_execute_w_dml(self): from google.cloud.bigquery.dbapi import connect - connection = connect( - self._mock_client(rows=[], num_dml_affected_rows=12)) + + connection = connect(self._mock_client(rows=[], num_dml_affected_rows=12)) cursor = connection.cursor() - cursor.execute('DELETE FROM UserSessions WHERE user_id = \'test\';') + cursor.execute("DELETE FROM UserSessions WHERE user_id = 'test';") rows = cursor.fetchall() self.assertIsNone(cursor.description) self.assertEqual(cursor.rowcount, 12) @@ -204,39 +206,43 @@ def test_execute_w_query(self): from google.cloud.bigquery.schema import SchemaField from google.cloud.bigquery import dbapi - connection = dbapi.connect(self._mock_client( - rows=[('hello', 'world', 1), ('howdy', 'y\'all', 2)], - schema=[ - SchemaField('a', 'STRING', mode='NULLABLE'), - SchemaField('b', 'STRING', mode='REQUIRED'), - SchemaField('c', 'INTEGER', mode='NULLABLE')])) + connection = dbapi.connect( + self._mock_client( + rows=[("hello", "world", 1), ("howdy", "y'all", 2)], + schema=[ + SchemaField("a", "STRING", mode="NULLABLE"), + SchemaField("b", "STRING", mode="REQUIRED"), + SchemaField("c", "INTEGER", mode="NULLABLE"), + ], + ) + ) cursor = connection.cursor() - cursor.execute('SELECT a, b, c FROM hello_world WHERE d > 3;') + cursor.execute("SELECT a, b, c FROM hello_world WHERE d > 3;") # Verify the description. self.assertEqual(len(cursor.description), 3) a_name, a_type, _, _, _, _, a_null_ok = cursor.description[0] - self.assertEqual(a_name, 'a') - self.assertEqual(a_type, 'STRING') + self.assertEqual(a_name, "a") + self.assertEqual(a_type, "STRING") self.assertEqual(a_type, dbapi.STRING) self.assertTrue(a_null_ok) b_name, b_type, _, _, _, _, b_null_ok = cursor.description[1] - self.assertEqual(b_name, 'b') - self.assertEqual(b_type, 'STRING') + self.assertEqual(b_name, "b") + self.assertEqual(b_type, "STRING") self.assertEqual(b_type, dbapi.STRING) self.assertFalse(b_null_ok) c_name, c_type, _, _, _, _, c_null_ok = cursor.description[2] - self.assertEqual(c_name, 'c') - self.assertEqual(c_type, 'INTEGER') + self.assertEqual(c_name, "c") + self.assertEqual(c_type, "INTEGER") self.assertEqual(c_type, dbapi.NUMBER) self.assertTrue(c_null_ok) # Verify the results. self.assertEqual(cursor.rowcount, 2) row = cursor.fetchone() - self.assertEqual(row, ('hello', 'world', 1)) + self.assertEqual(row, ("hello", "world", 1)) row = cursor.fetchone() - self.assertEqual(row, ('howdy', 'y\'all', 2)) + self.assertEqual(row, ("howdy", "y'all", 2)) row = cursor.fetchone() self.assertIsNone(row) @@ -249,60 +255,64 @@ def test_execute_raises_if_result_raises(self): from google.cloud.bigquery.dbapi import exceptions job = mock.create_autospec(job.QueryJob) - job.result.side_effect = google.cloud.exceptions.GoogleCloudError('') + job.result.side_effect = google.cloud.exceptions.GoogleCloudError("") client = mock.create_autospec(client.Client) client.query.return_value = job connection = connect(client) cursor = connection.cursor() with self.assertRaises(exceptions.DatabaseError): - cursor.execute('SELECT 1') + cursor.execute("SELECT 1") def test_executemany_w_dml(self): from google.cloud.bigquery.dbapi import connect - connection = connect( - self._mock_client(rows=[], num_dml_affected_rows=12)) + + connection = connect(self._mock_client(rows=[], num_dml_affected_rows=12)) cursor = connection.cursor() cursor.executemany( - 'DELETE FROM UserSessions WHERE user_id = %s;', - (('test',), ('anothertest',))) + "DELETE FROM UserSessions WHERE user_id = %s;", + (("test",), ("anothertest",)), + ) self.assertIsNone(cursor.description) self.assertEqual(cursor.rowcount, 12) def test__format_operation_w_dict(self): from google.cloud.bigquery.dbapi import cursor + formatted_operation = cursor._format_operation( - 'SELECT %(somevalue)s, %(a `weird` one)s;', - { - 'somevalue': 'hi', - 'a `weird` one': 'world', - }) + "SELECT %(somevalue)s, %(a `weird` one)s;", + {"somevalue": "hi", "a `weird` one": "world"}, + ) self.assertEqual( - formatted_operation, 'SELECT @`somevalue`, @`a \\`weird\\` one`;') + formatted_operation, "SELECT @`somevalue`, @`a \\`weird\\` one`;" + ) def test__format_operation_w_wrong_dict(self): from google.cloud.bigquery import dbapi from google.cloud.bigquery.dbapi import cursor + self.assertRaises( dbapi.ProgrammingError, cursor._format_operation, - 'SELECT %(somevalue)s, %(othervalue)s;', - { - 'somevalue-not-here': 'hi', - 'othervalue': 'world', - }) + "SELECT %(somevalue)s, %(othervalue)s;", + {"somevalue-not-here": "hi", "othervalue": "world"}, + ) def test__format_operation_w_sequence(self): from google.cloud.bigquery.dbapi import cursor + formatted_operation = cursor._format_operation( - 'SELECT %s, %s;', ('hello', 'world')) - self.assertEqual(formatted_operation, 'SELECT ?, ?;') + "SELECT %s, %s;", ("hello", "world") + ) + self.assertEqual(formatted_operation, "SELECT ?, ?;") def test__format_operation_w_too_short_sequence(self): from google.cloud.bigquery import dbapi from google.cloud.bigquery.dbapi import cursor + self.assertRaises( dbapi.ProgrammingError, cursor._format_operation, - 'SELECT %s, %s;', - ('hello',)) + "SELECT %s, %s;", + ("hello",), + ) diff --git a/packages/google-cloud-bigquery/tests/unit/test_dbapi_types.py b/packages/google-cloud-bigquery/tests/unit/test_dbapi_types.py index 1803ea6d3c10..e05660ffed14 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_dbapi_types.py +++ b/packages/google-cloud-bigquery/tests/unit/test_dbapi_types.py @@ -21,20 +21,22 @@ class TestTypes(unittest.TestCase): def test_binary_type(self): - self.assertEqual('BYTES', types.BINARY) - self.assertEqual('RECORD', types.BINARY) - self.assertEqual('STRUCT', types.BINARY) - self.assertNotEqual('STRING', types.BINARY) + self.assertEqual("BYTES", types.BINARY) + self.assertEqual("RECORD", types.BINARY) + self.assertEqual("STRUCT", types.BINARY) + self.assertNotEqual("STRING", types.BINARY) def test_binary_constructor(self): - self.assertEqual(types.Binary(u'hello'), b'hello') - self.assertEqual(types.Binary(u'\u1f60'), u'\u1f60'.encode('utf-8')) + self.assertEqual(types.Binary(u"hello"), b"hello") + self.assertEqual(types.Binary(u"\u1f60"), u"\u1f60".encode("utf-8")) def test_timefromticks(self): somedatetime = datetime.datetime( - 2017, 2, 18, 12, 47, 26, tzinfo=google.cloud._helpers.UTC) + 2017, 2, 18, 12, 47, 26, tzinfo=google.cloud._helpers.UTC + ) epoch = datetime.datetime(1970, 1, 1, tzinfo=google.cloud._helpers.UTC) ticks = (somedatetime - epoch).total_seconds() self.assertEqual( types.TimeFromTicks(ticks, google.cloud._helpers.UTC), - datetime.time(12, 47, 26, tzinfo=google.cloud._helpers.UTC)) + datetime.time(12, 47, 26, tzinfo=google.cloud._helpers.UTC), + ) diff --git a/packages/google-cloud-bigquery/tests/unit/test_external_config.py b/packages/google-cloud-bigquery/tests/unit/test_external_config.py index 480645122554..ddf95e317969 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_external_config.py +++ b/packages/google-cloud-bigquery/tests/unit/test_external_config.py @@ -22,15 +22,15 @@ class TestExternalConfig(unittest.TestCase): - SOURCE_URIS = ['gs://foo', 'gs://bar'] + SOURCE_URIS = ["gs://foo", "gs://bar"] BASE_RESOURCE = { - 'sourceFormat': '', - 'sourceUris': SOURCE_URIS, - 'maxBadRecords': 17, - 'autodetect': True, - 'ignoreUnknownValues': False, - 'compression': 'compression', + "sourceFormat": "", + "sourceUris": SOURCE_URIS, + "maxBadRecords": 17, + "autodetect": True, + "ignoreUnknownValues": False, + "compression": "compression", } def test_from_api_repr_base(self): @@ -43,23 +43,24 @@ def test_from_api_repr_base(self): got_resource = ec.to_api_repr() self.assertEqual(got_resource, self.BASE_RESOURCE) - resource = _copy_and_update(self.BASE_RESOURCE, { - 'schema': { - 'fields': [ - { - 'name': 'full_name', - 'type': 'STRING', - 'mode': 'REQUIRED', - 'description': None, - }, - ], + resource = _copy_and_update( + self.BASE_RESOURCE, + { + "schema": { + "fields": [ + { + "name": "full_name", + "type": "STRING", + "mode": "REQUIRED", + "description": None, + } + ] + } }, - }) + ) ec = external_config.ExternalConfig.from_api_repr(resource) self._verify_base(ec) - exp_schema = [ - schema.SchemaField('full_name', 'STRING', mode='REQUIRED') - ] + exp_schema = [schema.SchemaField("full_name", "STRING", mode="REQUIRED")] self.assertEqual(ec.schema, exp_schema) self.assertIsNone(ec.options) @@ -67,75 +68,76 @@ def test_from_api_repr_base(self): self.assertEqual(got_resource, resource) def test_to_api_repr_base(self): - ec = external_config.ExternalConfig('') + ec = external_config.ExternalConfig("") ec.source_uris = self.SOURCE_URIS ec.max_bad_records = 17 ec.autodetect = True ec.ignore_unknown_values = False - ec.compression = 'compression' - ec.schema = [ - schema.SchemaField('full_name', 'STRING', mode='REQUIRED') - ] + ec.compression = "compression" + ec.schema = [schema.SchemaField("full_name", "STRING", mode="REQUIRED")] exp_schema = { - 'fields': [ + "fields": [ { - 'name': 'full_name', - 'type': 'STRING', - 'mode': 'REQUIRED', - 'description': None, - }, + "name": "full_name", + "type": "STRING", + "mode": "REQUIRED", + "description": None, + } ] } got_resource = ec.to_api_repr() exp_resource = { - 'sourceFormat': '', - 'sourceUris': self.SOURCE_URIS, - 'maxBadRecords': 17, - 'autodetect': True, - 'ignoreUnknownValues': False, - 'compression': 'compression', - 'schema': exp_schema + "sourceFormat": "", + "sourceUris": self.SOURCE_URIS, + "maxBadRecords": 17, + "autodetect": True, + "ignoreUnknownValues": False, + "compression": "compression", + "schema": exp_schema, } self.assertEqual(got_resource, exp_resource) def test_schema_None(self): - ec = external_config.ExternalConfig('') + ec = external_config.ExternalConfig("") ec.schema = None got = ec.to_api_repr() - want = {'sourceFormat': '', 'schema': None} + want = {"sourceFormat": "", "schema": None} self.assertEqual(got, want) def test_schema_empty(self): - ec = external_config.ExternalConfig('') + ec = external_config.ExternalConfig("") ec.schema = [] got = ec.to_api_repr() - want = {'sourceFormat': '', 'schema': {'fields': []}} + want = {"sourceFormat": "", "schema": {"fields": []}} self.assertEqual(got, want) def _verify_base(self, ec): self.assertEqual(ec.autodetect, True) - self.assertEqual(ec.compression, 'compression') + self.assertEqual(ec.compression, "compression") self.assertEqual(ec.ignore_unknown_values, False) self.assertEqual(ec.max_bad_records, 17) self.assertEqual(ec.source_uris, self.SOURCE_URIS) def test_to_api_repr_source_format(self): - ec = external_config.ExternalConfig('CSV') + ec = external_config.ExternalConfig("CSV") got = ec.to_api_repr() - want = {'sourceFormat': 'CSV'} + want = {"sourceFormat": "CSV"} self.assertEqual(got, want) def test_from_api_repr_sheets(self): - resource = _copy_and_update(self.BASE_RESOURCE, { - 'sourceFormat': 'GOOGLE_SHEETS', - 'googleSheetsOptions': {'skipLeadingRows': '123'}, - }) + resource = _copy_and_update( + self.BASE_RESOURCE, + { + "sourceFormat": "GOOGLE_SHEETS", + "googleSheetsOptions": {"skipLeadingRows": "123"}, + }, + ) ec = external_config.ExternalConfig.from_api_repr(resource) self._verify_base(ec) - self.assertEqual(ec.source_format, 'GOOGLE_SHEETS') + self.assertEqual(ec.source_format, "GOOGLE_SHEETS") self.assertIsInstance(ec.options, external_config.GoogleSheetsOptions) self.assertEqual(ec.options.skip_leading_rows, 123) @@ -143,21 +145,21 @@ def test_from_api_repr_sheets(self): self.assertEqual(got_resource, resource) - del resource['googleSheetsOptions']['skipLeadingRows'] + del resource["googleSheetsOptions"]["skipLeadingRows"] ec = external_config.ExternalConfig.from_api_repr(resource) self.assertIsNone(ec.options.skip_leading_rows) got_resource = ec.to_api_repr() self.assertEqual(got_resource, resource) def test_to_api_repr_sheets(self): - ec = external_config.ExternalConfig('GOOGLE_SHEETS') + ec = external_config.ExternalConfig("GOOGLE_SHEETS") options = external_config.GoogleSheetsOptions() options.skip_leading_rows = 123 ec._options = options exp_resource = { - 'sourceFormat': 'GOOGLE_SHEETS', - 'googleSheetsOptions': {'skipLeadingRows': '123'}, + "sourceFormat": "GOOGLE_SHEETS", + "googleSheetsOptions": {"skipLeadingRows": "123"}, } got_resource = ec.to_api_repr() @@ -165,60 +167,63 @@ def test_to_api_repr_sheets(self): self.assertEqual(got_resource, exp_resource) def test_from_api_repr_csv(self): - resource = _copy_and_update(self.BASE_RESOURCE, { - 'sourceFormat': 'CSV', - 'csvOptions': { - 'fieldDelimiter': 'fieldDelimiter', - 'skipLeadingRows': '123', - 'quote': 'quote', - 'allowQuotedNewlines': True, - 'allowJaggedRows': False, - 'encoding': 'encoding', + resource = _copy_and_update( + self.BASE_RESOURCE, + { + "sourceFormat": "CSV", + "csvOptions": { + "fieldDelimiter": "fieldDelimiter", + "skipLeadingRows": "123", + "quote": "quote", + "allowQuotedNewlines": True, + "allowJaggedRows": False, + "encoding": "encoding", + }, }, - }) + ) ec = external_config.ExternalConfig.from_api_repr(resource) self._verify_base(ec) - self.assertEqual(ec.source_format, 'CSV') + self.assertEqual(ec.source_format, "CSV") self.assertIsInstance(ec.options, external_config.CSVOptions) - self.assertEqual(ec.options.field_delimiter, 'fieldDelimiter') + self.assertEqual(ec.options.field_delimiter, "fieldDelimiter") self.assertEqual(ec.options.skip_leading_rows, 123) - self.assertEqual(ec.options.quote_character, 'quote') + self.assertEqual(ec.options.quote_character, "quote") self.assertEqual(ec.options.allow_quoted_newlines, True) self.assertEqual(ec.options.allow_jagged_rows, False) - self.assertEqual(ec.options.encoding, 'encoding') + self.assertEqual(ec.options.encoding, "encoding") got_resource = ec.to_api_repr() self.assertEqual(got_resource, resource) - del resource['csvOptions']['skipLeadingRows'] + del resource["csvOptions"]["skipLeadingRows"] ec = external_config.ExternalConfig.from_api_repr(resource) self.assertIsNone(ec.options.skip_leading_rows) got_resource = ec.to_api_repr() self.assertEqual(got_resource, resource) def test_to_api_repr_csv(self): - ec = external_config.ExternalConfig('CSV') + ec = external_config.ExternalConfig("CSV") options = external_config.CSVOptions() options.allow_quoted_newlines = True - options.encoding = 'encoding' - options.field_delimiter = 'fieldDelimiter' - options.quote_character = 'quote' + options.encoding = "encoding" + options.field_delimiter = "fieldDelimiter" + options.quote_character = "quote" options.skip_leading_rows = 123 options.allow_jagged_rows = False ec._options = options exp_resource = { - 'sourceFormat': 'CSV', - 'csvOptions': { - 'fieldDelimiter': 'fieldDelimiter', - 'skipLeadingRows': '123', - 'quote': 'quote', - 'allowQuotedNewlines': True, - 'allowJaggedRows': False, - 'encoding': 'encoding', + "sourceFormat": "CSV", + "csvOptions": { + "fieldDelimiter": "fieldDelimiter", + "skipLeadingRows": "123", + "quote": "quote", + "allowQuotedNewlines": True, + "allowJaggedRows": False, + "encoding": "encoding", }, } @@ -227,125 +232,126 @@ def test_to_api_repr_csv(self): self.assertEqual(got_resource, exp_resource) def test_from_api_repr_bigtable(self): - qualifier_encoded = base64.standard_b64encode(b'q').decode('ascii') - resource = _copy_and_update(self.BASE_RESOURCE, { - 'sourceFormat': 'BIGTABLE', - 'bigtableOptions': { - 'ignoreUnspecifiedColumnFamilies': True, - 'readRowkeyAsString': False, - 'columnFamilies': [ - { - 'familyId': 'familyId', - 'type': 'type', - 'encoding': 'encoding', - 'columns': [ - { - 'qualifierString': 'q', - 'fieldName': 'fieldName1', - 'type': 'type1', - 'encoding': 'encoding1', - 'onlyReadLatest': True, - }, - { - 'qualifierEncoded': qualifier_encoded, - 'fieldName': 'fieldName2', - 'type': 'type2', - 'encoding': 'encoding2', - }, - - ], - 'onlyReadLatest': False, - } - ], + qualifier_encoded = base64.standard_b64encode(b"q").decode("ascii") + resource = _copy_and_update( + self.BASE_RESOURCE, + { + "sourceFormat": "BIGTABLE", + "bigtableOptions": { + "ignoreUnspecifiedColumnFamilies": True, + "readRowkeyAsString": False, + "columnFamilies": [ + { + "familyId": "familyId", + "type": "type", + "encoding": "encoding", + "columns": [ + { + "qualifierString": "q", + "fieldName": "fieldName1", + "type": "type1", + "encoding": "encoding1", + "onlyReadLatest": True, + }, + { + "qualifierEncoded": qualifier_encoded, + "fieldName": "fieldName2", + "type": "type2", + "encoding": "encoding2", + }, + ], + "onlyReadLatest": False, + } + ], + }, }, - }) + ) ec = external_config.ExternalConfig.from_api_repr(resource) self._verify_base(ec) - self.assertEqual(ec.source_format, 'BIGTABLE') + self.assertEqual(ec.source_format, "BIGTABLE") self.assertIsInstance(ec.options, external_config.BigtableOptions) self.assertEqual(ec.options.ignore_unspecified_column_families, True) self.assertEqual(ec.options.read_rowkey_as_string, False) self.assertEqual(len(ec.options.column_families), 1) fam1 = ec.options.column_families[0] self.assertIsInstance(fam1, external_config.BigtableColumnFamily) - self.assertEqual(fam1.family_id, 'familyId') - self.assertEqual(fam1.type_, 'type') - self.assertEqual(fam1.encoding, 'encoding') + self.assertEqual(fam1.family_id, "familyId") + self.assertEqual(fam1.type_, "type") + self.assertEqual(fam1.encoding, "encoding") self.assertEqual(len(fam1.columns), 2) self.assertFalse(fam1.only_read_latest) col1 = fam1.columns[0] - self.assertEqual(col1.qualifier_string, 'q') - self.assertEqual(col1.field_name, 'fieldName1') - self.assertEqual(col1.type_, 'type1') - self.assertEqual(col1.encoding, 'encoding1') + self.assertEqual(col1.qualifier_string, "q") + self.assertEqual(col1.field_name, "fieldName1") + self.assertEqual(col1.type_, "type1") + self.assertEqual(col1.encoding, "encoding1") self.assertTrue(col1.only_read_latest) self.assertIsNone(col1.qualifier_encoded) col2 = ec.options.column_families[0].columns[1] - self.assertEqual(col2.qualifier_encoded, b'q') - self.assertEqual(col2.field_name, 'fieldName2') - self.assertEqual(col2.type_, 'type2') - self.assertEqual(col2.encoding, 'encoding2') + self.assertEqual(col2.qualifier_encoded, b"q") + self.assertEqual(col2.field_name, "fieldName2") + self.assertEqual(col2.type_, "type2") + self.assertEqual(col2.encoding, "encoding2") got_resource = ec.to_api_repr() self.assertEqual(got_resource, resource) def test_to_api_repr_bigtable(self): - ec = external_config.ExternalConfig('BIGTABLE') + ec = external_config.ExternalConfig("BIGTABLE") options = external_config.BigtableOptions() options.ignore_unspecified_column_families = True options.read_rowkey_as_string = False ec._options = options fam1 = external_config.BigtableColumnFamily() - fam1.family_id = 'familyId' - fam1.type_ = 'type' - fam1.encoding = 'encoding' + fam1.family_id = "familyId" + fam1.type_ = "type" + fam1.encoding = "encoding" fam1.only_read_latest = False col1 = external_config.BigtableColumn() - col1.qualifier_string = 'q' - col1.field_name = 'fieldName1' - col1.type_ = 'type1' - col1.encoding = 'encoding1' + col1.qualifier_string = "q" + col1.field_name = "fieldName1" + col1.type_ = "type1" + col1.encoding = "encoding1" col1.only_read_latest = True col2 = external_config.BigtableColumn() - col2.qualifier_encoded = b'q' - col2.field_name = 'fieldName2' - col2.type_ = 'type2' - col2.encoding = 'encoding2' + col2.qualifier_encoded = b"q" + col2.field_name = "fieldName2" + col2.type_ = "type2" + col2.encoding = "encoding2" fam1.columns = [col1, col2] options.column_families = [fam1] - qualifier_encoded = base64.standard_b64encode(b'q').decode('ascii') + qualifier_encoded = base64.standard_b64encode(b"q").decode("ascii") exp_resource = { - 'sourceFormat': 'BIGTABLE', - 'bigtableOptions': { - 'ignoreUnspecifiedColumnFamilies': True, - 'readRowkeyAsString': False, - 'columnFamilies': [ + "sourceFormat": "BIGTABLE", + "bigtableOptions": { + "ignoreUnspecifiedColumnFamilies": True, + "readRowkeyAsString": False, + "columnFamilies": [ { - 'familyId': 'familyId', - 'type': 'type', - 'encoding': 'encoding', - 'columns': [ + "familyId": "familyId", + "type": "type", + "encoding": "encoding", + "columns": [ { - 'qualifierString': 'q', - 'fieldName': 'fieldName1', - 'type': 'type1', - 'encoding': 'encoding1', - 'onlyReadLatest': True, + "qualifierString": "q", + "fieldName": "fieldName1", + "type": "type1", + "encoding": "encoding1", + "onlyReadLatest": True, }, { - 'qualifierEncoded': qualifier_encoded, - 'fieldName': 'fieldName2', - 'type': 'type2', - 'encoding': 'encoding2', + "qualifierEncoded": qualifier_encoded, + "fieldName": "fieldName2", + "type": "type2", + "encoding": "encoding2", }, - ], - 'onlyReadLatest': False, + "onlyReadLatest": False, } ], }, diff --git a/packages/google-cloud-bigquery/tests/unit/test_job.py b/packages/google-cloud-bigquery/tests/unit/test_job.py index 998a397e0bed..8d5aef8f4603 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_job.py +++ b/packages/google-cloud-bigquery/tests/unit/test_job.py @@ -17,6 +17,7 @@ import mock from six.moves import http_client + try: import pandas except (ImportError, AttributeError): # pragma: NO COVER @@ -29,14 +30,13 @@ def _make_credentials(): return mock.Mock(spec=google.auth.credentials.Credentials) -def _make_client(project='test-project', connection=None): +def _make_client(project="test-project", connection=None): from google.cloud.bigquery.client import Client if connection is None: connection = _make_connection() - client = Client( - project=project, credentials=_make_credentials(), _http=object()) + client = Client(project=project, credentials=_make_credentials(), _http=object()) client._connection = connection return client @@ -46,25 +46,21 @@ def _make_connection(*responses): from google.cloud.exceptions import NotFound mock_conn = mock.create_autospec(google.cloud.bigquery._http.Connection) - mock_conn.api_request.side_effect = list(responses) + [NotFound('miss')] + mock_conn.api_request.side_effect = list(responses) + [NotFound("miss")] return mock_conn class Test__error_result_to_exception(unittest.TestCase): - def _call_fut(self, *args, **kwargs): from google.cloud.bigquery import job return job._error_result_to_exception(*args, **kwargs) def test_simple(self): - error_result = { - 'reason': 'invalid', - 'message': 'bad request' - } + error_result = {"reason": "invalid", "message": "bad request"} exception = self._call_fut(error_result) self.assertEqual(exception.code, http_client.BAD_REQUEST) - self.assertTrue(exception.message.startswith('bad request')) + self.assertTrue(exception.message.startswith("bad request")) self.assertIn(error_result, exception.errors) def test_missing_reason(self): @@ -74,9 +70,9 @@ def test_missing_reason(self): class Test_JobReference(unittest.TestCase): - JOB_ID = 'job-id' - PROJECT = 'test-project-123' - LOCATION = 'us-central' + JOB_ID = "job-id" + PROJECT = "test-project-123" + LOCATION = "us-central" @staticmethod def _get_target_class(): @@ -97,17 +93,20 @@ def test_ctor(self): def test__to_api_repr(self): job_ref = self._make_one(self.JOB_ID, self.PROJECT, self.LOCATION) - self.assertEqual(job_ref._to_api_repr(), { - 'jobId': self.JOB_ID, - 'projectId': self.PROJECT, - 'location': self.LOCATION, - }) + self.assertEqual( + job_ref._to_api_repr(), + { + "jobId": self.JOB_ID, + "projectId": self.PROJECT, + "location": self.LOCATION, + }, + ) def test_from_api_repr(self): api_repr = { - 'jobId': self.JOB_ID, - 'projectId': self.PROJECT, - 'location': self.LOCATION, + "jobId": self.JOB_ID, + "projectId": self.PROJECT, + "location": self.LOCATION, } job_ref = self._get_target_class()._from_api_repr(api_repr) @@ -118,9 +117,9 @@ def test_from_api_repr(self): class Test_AsyncJob(unittest.TestCase): - JOB_ID = 'job-id' - PROJECT = 'test-project-123' - LOCATION = 'us-central' + JOB_ID = "job-id" + PROJECT = "test-project-123" + LOCATION = "us-central" @staticmethod def _get_target_class(): @@ -133,7 +132,7 @@ def _make_one(self, job_id, client): def _make_derived_class(self): class Derived(self._get_target_class()): - _JOB_TYPE = 'derived' + _JOB_TYPE = "derived" return Derived @@ -158,22 +157,17 @@ def test_ctor_w_bare_job_id(self): self.assertIs(job._client, client) self.assertEqual( job._properties, - { - 'jobReference': { - 'projectId': self.PROJECT, - 'jobId': self.JOB_ID, - }, - } + {"jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID}}, ) self.assertIsInstance(job._completion_lock, type(threading.Lock())) self.assertEqual( - job.path, - '/projects/{}/jobs/{}'.format(self.PROJECT, self.JOB_ID)) + job.path, "/projects/{}/jobs/{}".format(self.PROJECT, self.JOB_ID) + ) def test_ctor_w_job_ref(self): import threading - other_project = 'other-project-234' + other_project = "other-project-234" client = _make_client(project=other_project) job_ref = self._job_reference(self.JOB_ID, self.PROJECT, self.LOCATION) job = self._make_one(job_ref, client) @@ -185,18 +179,18 @@ def test_ctor_w_job_ref(self): self.assertEqual( job._properties, { - 'jobReference': { - 'projectId': self.PROJECT, - 'location': self.LOCATION, - 'jobId': self.JOB_ID, - }, - } + "jobReference": { + "projectId": self.PROJECT, + "location": self.LOCATION, + "jobId": self.JOB_ID, + } + }, ) self.assertFalse(job._result_set) self.assertIsInstance(job._completion_lock, type(threading.Lock())) self.assertEqual( - job.path, - '/projects/{}/jobs/{}'.format(self.PROJECT, self.JOB_ID)) + job.path, "/projects/{}/jobs/{}".format(self.PROJECT, self.JOB_ID) + ) def test__require_client_w_none(self): client = _make_client(project=self.PROJECT) @@ -215,7 +209,7 @@ def test_job_type(self): client = _make_client(project=self.PROJECT) derived = self._make_derived(self.JOB_ID, client) - self.assertEqual(derived.job_type, 'derived') + self.assertEqual(derived.job_type, "derived") def test_labels_miss(self): client = _make_client(project=self.PROJECT) @@ -226,40 +220,38 @@ def test_labels_update_in_place(self): client = _make_client(project=self.PROJECT) job = self._make_one(self.JOB_ID, client) labels = job.labels - labels['foo'] = 'bar' # update in place - self.assertEqual(job.labels, {'foo': 'bar'}) + labels["foo"] = "bar" # update in place + self.assertEqual(job.labels, {"foo": "bar"}) def test_labels_hit(self): - labels = { - 'foo': 'bar', - } + labels = {"foo": "bar"} client = _make_client(project=self.PROJECT) job = self._make_one(self.JOB_ID, client) - job._properties['labels'] = labels + job._properties["labels"] = labels self.assertEqual(job.labels, labels) def test_etag(self): - etag = 'ETAG-123' + etag = "ETAG-123" client = _make_client(project=self.PROJECT) job = self._make_one(self.JOB_ID, client) self.assertIsNone(job.etag) - job._properties['etag'] = etag + job._properties["etag"] = etag self.assertEqual(job.etag, etag) def test_self_link(self): - self_link = 'https://api.example.com/123' + self_link = "https://api.example.com/123" client = _make_client(project=self.PROJECT) job = self._make_one(self.JOB_ID, client) self.assertIsNone(job.self_link) - job._properties['selfLink'] = self_link + job._properties["selfLink"] = self_link self.assertEqual(job.self_link, self_link) def test_user_email(self): - user_email = 'user@example.com' + user_email = "user@example.com" client = _make_client(project=self.PROJECT) job = self._make_one(self.JOB_ID, client) self.assertIsNone(job.user_email) - job._properties['user_email'] = user_email + job._properties["user_email"] = user_email self.assertEqual(job.user_email, user_email) @staticmethod @@ -267,9 +259,10 @@ def _datetime_and_millis(): import datetime import pytz from google.cloud._helpers import _millis + now = datetime.datetime.utcnow().replace( - microsecond=123000, # stats timestamps have ms precision - tzinfo=pytz.UTC) + microsecond=123000, tzinfo=pytz.UTC # stats timestamps have ms precision + ) return now, _millis(now) def test_created(self): @@ -277,9 +270,9 @@ def test_created(self): client = _make_client(project=self.PROJECT) job = self._make_one(self.JOB_ID, client) self.assertIsNone(job.created) - stats = job._properties['statistics'] = {} + stats = job._properties["statistics"] = {} self.assertIsNone(job.created) - stats['creationTime'] = millis + stats["creationTime"] = millis self.assertEqual(job.created, now) def test_started(self): @@ -287,9 +280,9 @@ def test_started(self): client = _make_client(project=self.PROJECT) job = self._make_one(self.JOB_ID, client) self.assertIsNone(job.started) - stats = job._properties['statistics'] = {} + stats = job._properties["statistics"] = {} self.assertIsNone(job.started) - stats['startTime'] = millis + stats["startTime"] = millis self.assertEqual(job.started, now) def test_ended(self): @@ -297,63 +290,65 @@ def test_ended(self): client = _make_client(project=self.PROJECT) job = self._make_one(self.JOB_ID, client) self.assertIsNone(job.ended) - stats = job._properties['statistics'] = {} + stats = job._properties["statistics"] = {} self.assertIsNone(job.ended) - stats['endTime'] = millis + stats["endTime"] = millis self.assertEqual(job.ended, now) def test__job_statistics(self): - statistics = {'foo': 'bar'} + statistics = {"foo": "bar"} client = _make_client(project=self.PROJECT) derived = self._make_derived(self.JOB_ID, client) self.assertEqual(derived._job_statistics(), {}) - stats = derived._properties['statistics'] = {} + stats = derived._properties["statistics"] = {} self.assertEqual(derived._job_statistics(), {}) - stats['derived'] = statistics + stats["derived"] = statistics self.assertEqual(derived._job_statistics(), statistics) def test_error_result(self): error_result = { - 'debugInfo': 'DEBUG INFO', - 'location': 'LOCATION', - 'message': 'MESSAGE', - 'reason': 'REASON' + "debugInfo": "DEBUG INFO", + "location": "LOCATION", + "message": "MESSAGE", + "reason": "REASON", } client = _make_client(project=self.PROJECT) job = self._make_one(self.JOB_ID, client) self.assertIsNone(job.error_result) - status = job._properties['status'] = {} + status = job._properties["status"] = {} self.assertIsNone(job.error_result) - status['errorResult'] = error_result + status["errorResult"] = error_result self.assertEqual(job.error_result, error_result) def test_errors(self): - errors = [{ - 'debugInfo': 'DEBUG INFO', - 'location': 'LOCATION', - 'message': 'MESSAGE', - 'reason': 'REASON' - }] + errors = [ + { + "debugInfo": "DEBUG INFO", + "location": "LOCATION", + "message": "MESSAGE", + "reason": "REASON", + } + ] client = _make_client(project=self.PROJECT) job = self._make_one(self.JOB_ID, client) self.assertIsNone(job.errors) - status = job._properties['status'] = {} + status = job._properties["status"] = {} self.assertIsNone(job.errors) - status['errors'] = errors + status["errors"] = errors self.assertEqual(job.errors, errors) def test_state(self): - state = 'STATE' + state = "STATE" client = _make_client(project=self.PROJECT) job = self._make_one(self.JOB_ID, client) self.assertIsNone(job.state) - status = job._properties['status'] = {} + status = job._properties["status"] = {} self.assertIsNone(job.state) - status['state'] = state + status["state"] = state self.assertEqual(job.state, state) def test__scrub_local_properties(self): - before = {'foo': 'bar'} + before = {"foo": "bar"} resource = before.copy() client = _make_client(project=self.PROJECT) job = self._make_one(self.JOB_ID, client) @@ -361,7 +356,7 @@ def test__scrub_local_properties(self): self.assertEqual(resource, before) def test__copy_configuration_properties(self): - before = {'foo': 'bar'} + before = {"foo": "bar"} resource = before.copy() client = _make_client(project=self.PROJECT) job = self._make_one(self.JOB_ID, client) @@ -376,18 +371,14 @@ def _set_properties_job(self): job._copy_configuration_properties = mock.Mock() job._set_future_result = mock.Mock() job._properties = { - 'jobReference': job._properties['jobReference'], - 'foo': 'bar', + "jobReference": job._properties["jobReference"], + "foo": "bar", } return job def test__set_properties_no_stats(self): - config = { - 'test': True, - } - resource = { - 'configuration': config, - } + config = {"test": True} + resource = {"configuration": config} job = self._set_properties_job() job._set_properties(resource) @@ -399,22 +390,15 @@ def test__set_properties_no_stats(self): def test__set_properties_w_creation_time(self): now, millis = self._datetime_and_millis() - config = { - 'test': True, - } - stats = { - 'creationTime': str(millis), - } - resource = { - 'configuration': config, - 'statistics': stats, - } + config = {"test": True} + stats = {"creationTime": str(millis)} + resource = {"configuration": config, "statistics": stats} job = self._set_properties_job() job._set_properties(resource) cleaned = copy.deepcopy(resource) - cleaned['statistics']['creationTime'] = float(millis) + cleaned["statistics"]["creationTime"] = float(millis) self.assertEqual(job._properties, cleaned) job._scrub_local_properties.assert_called_once_with(resource) @@ -422,22 +406,15 @@ def test__set_properties_w_creation_time(self): def test__set_properties_w_start_time(self): now, millis = self._datetime_and_millis() - config = { - 'test': True, - } - stats = { - 'startTime': str(millis), - } - resource = { - 'configuration': config, - 'statistics': stats, - } + config = {"test": True} + stats = {"startTime": str(millis)} + resource = {"configuration": config, "statistics": stats} job = self._set_properties_job() job._set_properties(resource) cleaned = copy.deepcopy(resource) - cleaned['statistics']['startTime'] = float(millis) + cleaned["statistics"]["startTime"] = float(millis) self.assertEqual(job._properties, cleaned) job._scrub_local_properties.assert_called_once_with(resource) @@ -445,22 +422,15 @@ def test__set_properties_w_start_time(self): def test__set_properties_w_end_time(self): now, millis = self._datetime_and_millis() - config = { - 'test': True, - } - stats = { - 'endTime': str(millis), - } - resource = { - 'configuration': config, - 'statistics': stats, - } + config = {"test": True} + stats = {"endTime": str(millis)} + resource = {"configuration": config, "statistics": stats} job = self._set_properties_job() job._set_properties(resource) cleaned = copy.deepcopy(resource) - cleaned['statistics']['endTime'] = float(millis) + cleaned["statistics"]["endTime"] = float(millis) self.assertEqual(job._properties, cleaned) job._scrub_local_properties.assert_called_once_with(resource) @@ -474,47 +444,38 @@ def test__get_resource_config_missing_job_ref(self): klass._get_resource_config(resource) def test__get_resource_config_missing_job_id(self): - resource = { - 'jobReference': {}, - } + resource = {"jobReference": {}} klass = self._make_derived_class() with self.assertRaises(KeyError): klass._get_resource_config(resource) def test__get_resource_config_missing_configuration(self): - resource = { - 'jobReference': {'jobId': self.JOB_ID}, - } + resource = {"jobReference": {"jobId": self.JOB_ID}} klass = self._make_derived_class() with self.assertRaises(KeyError): klass._get_resource_config(resource) def test__get_resource_config_missing_config_type(self): - resource = { - 'jobReference': {'jobId': self.JOB_ID}, - 'configuration': {}, - } + resource = {"jobReference": {"jobId": self.JOB_ID}, "configuration": {}} klass = self._make_derived_class() with self.assertRaises(KeyError): klass._get_resource_config(resource) def test__get_resource_config_ok(self): - derived_config = {'foo': 'bar'} + derived_config = {"foo": "bar"} resource = { - 'jobReference': {'jobId': self.JOB_ID}, - 'configuration': { - 'derived': derived_config, - }, + "jobReference": {"jobId": self.JOB_ID}, + "configuration": {"derived": derived_config}, } klass = self._make_derived_class() job_id, config = klass._get_resource_config(resource) self.assertEqual(job_id, self.JOB_ID) - self.assertEqual(config, {'derived': derived_config}) + self.assertEqual(config, {"derived": derived_config}) def test__build_resource(self): client = _make_client(project=self.PROJECT) @@ -530,7 +491,7 @@ def test_to_api_repr(self): def test__begin_already(self): job = self._set_properties_job() - job._properties['status'] = {'state': 'WHATEVER'} + job._properties["status"] = {"state": "WHATEVER"} with self.assertRaises(ValueError): job._begin() @@ -539,14 +500,12 @@ def test__begin_defaults(self): from google.cloud.bigquery.retry import DEFAULT_RETRY resource = { - 'jobReference': { - 'jobId': self.JOB_ID, - 'projectId': self.PROJECT, - 'location': None, + "jobReference": { + "jobId": self.JOB_ID, + "projectId": self.PROJECT, + "location": None, }, - 'configuration': { - 'test': True, - } + "configuration": {"test": True}, } job = self._set_properties_job() builder = job.to_api_repr = mock.Mock() @@ -558,8 +517,8 @@ def test__begin_defaults(self): call_api.assert_called_once_with( DEFAULT_RETRY, - method='POST', - path='/projects/{}/jobs'.format(self.PROJECT), + method="POST", + path="/projects/{}/jobs".format(self.PROJECT), data=resource, ) self.assertEqual(job._properties, resource) @@ -567,16 +526,14 @@ def test__begin_defaults(self): def test__begin_explicit(self): from google.cloud.bigquery.retry import DEFAULT_RETRY - other_project = 'other-project-234' + other_project = "other-project-234" resource = { - 'jobReference': { - 'jobId': self.JOB_ID, - 'projectId': self.PROJECT, - 'location': None, + "jobReference": { + "jobId": self.JOB_ID, + "projectId": self.PROJECT, + "location": None, }, - 'configuration': { - 'test': True, - } + "configuration": {"test": True}, } job = self._set_properties_job() builder = job.to_api_repr = mock.Mock() @@ -590,8 +547,8 @@ def test__begin_explicit(self): call_api.assert_called_once_with( retry, - method='POST', - path='/projects/{}/jobs'.format(self.PROJECT), + method="POST", + path="/projects/{}/jobs".format(self.PROJECT), data=resource, ) self.assertEqual(job._properties, resource) @@ -601,35 +558,30 @@ def test_exists_defaults_miss(self): from google.cloud.bigquery.retry import DEFAULT_RETRY job = self._set_properties_job() - job._properties['jobReference']['location'] = self.LOCATION + job._properties["jobReference"]["location"] = self.LOCATION call_api = job._client._call_api = mock.Mock() - call_api.side_effect = NotFound('testing') + call_api.side_effect = NotFound("testing") self.assertFalse(job.exists()) call_api.assert_called_once_with( DEFAULT_RETRY, - method='GET', - path='/projects/{}/jobs/{}'.format(self.PROJECT, self.JOB_ID), - query_params={ - 'fields': 'id', - 'location': self.LOCATION, - } + method="GET", + path="/projects/{}/jobs/{}".format(self.PROJECT, self.JOB_ID), + query_params={"fields": "id", "location": self.LOCATION}, ) def test_exists_explicit_hit(self): from google.cloud.bigquery.retry import DEFAULT_RETRY - other_project = 'other-project-234' + other_project = "other-project-234" resource = { - 'jobReference': { - 'jobId': self.JOB_ID, - 'projectId': self.PROJECT, - 'location': None, + "jobReference": { + "jobId": self.JOB_ID, + "projectId": self.PROJECT, + "location": None, }, - 'configuration': { - 'test': True, - } + "configuration": {"test": True}, } job = self._set_properties_job() client = _make_client(project=other_project) @@ -641,26 +593,24 @@ def test_exists_explicit_hit(self): call_api.assert_called_once_with( retry, - method='GET', - path='/projects/{}/jobs/{}'.format(self.PROJECT, self.JOB_ID), - query_params={'fields': 'id'} + method="GET", + path="/projects/{}/jobs/{}".format(self.PROJECT, self.JOB_ID), + query_params={"fields": "id"}, ) def test_reload_defaults(self): from google.cloud.bigquery.retry import DEFAULT_RETRY resource = { - 'jobReference': { - 'jobId': self.JOB_ID, - 'projectId': self.PROJECT, - 'location': None, + "jobReference": { + "jobId": self.JOB_ID, + "projectId": self.PROJECT, + "location": None, }, - 'configuration': { - 'test': True, - } + "configuration": {"test": True}, } job = self._set_properties_job() - job._properties['jobReference']['location'] = self.LOCATION + job._properties["jobReference"]["location"] = self.LOCATION call_api = job._client._call_api = mock.Mock() call_api.return_value = resource @@ -668,25 +618,23 @@ def test_reload_defaults(self): call_api.assert_called_once_with( DEFAULT_RETRY, - method='GET', - path='/projects/{}/jobs/{}'.format(self.PROJECT, self.JOB_ID), - query_params={'location': self.LOCATION}, + method="GET", + path="/projects/{}/jobs/{}".format(self.PROJECT, self.JOB_ID), + query_params={"location": self.LOCATION}, ) self.assertEqual(job._properties, resource) def test_reload_explicit(self): from google.cloud.bigquery.retry import DEFAULT_RETRY - other_project = 'other-project-234' + other_project = "other-project-234" resource = { - 'jobReference': { - 'jobId': self.JOB_ID, - 'projectId': self.PROJECT, - 'location': None, + "jobReference": { + "jobId": self.JOB_ID, + "projectId": self.PROJECT, + "location": None, }, - 'configuration': { - 'test': True, - } + "configuration": {"test": True}, } job = self._set_properties_job() client = _make_client(project=other_project) @@ -698,51 +646,46 @@ def test_reload_explicit(self): call_api.assert_called_once_with( retry, - method='GET', - path='/projects/{}/jobs/{}'.format(self.PROJECT, self.JOB_ID), + method="GET", + path="/projects/{}/jobs/{}".format(self.PROJECT, self.JOB_ID), query_params={}, ) self.assertEqual(job._properties, resource) def test_cancel_defaults(self): resource = { - 'jobReference': { - 'jobId': self.JOB_ID, - 'projectId': self.PROJECT, - 'location': None, + "jobReference": { + "jobId": self.JOB_ID, + "projectId": self.PROJECT, + "location": None, }, - 'configuration': { - 'test': True, - } + "configuration": {"test": True}, } - response = {'job': resource} + response = {"job": resource} job = self._set_properties_job() - job._properties['jobReference']['location'] = self.LOCATION + job._properties["jobReference"]["location"] = self.LOCATION connection = job._client._connection = _make_connection(response) self.assertTrue(job.cancel()) connection.api_request.assert_called_once_with( - method='POST', - path='/projects/{}/jobs/{}/cancel'.format( - self.PROJECT, self.JOB_ID), - query_params={'location': self.LOCATION}, + method="POST", + path="/projects/{}/jobs/{}/cancel".format(self.PROJECT, self.JOB_ID), + query_params={"location": self.LOCATION}, ) self.assertEqual(job._properties, resource) def test_cancel_explicit(self): - other_project = 'other-project-234' + other_project = "other-project-234" resource = { - 'jobReference': { - 'jobId': self.JOB_ID, - 'projectId': self.PROJECT, - 'location': None, + "jobReference": { + "jobId": self.JOB_ID, + "projectId": self.PROJECT, + "location": None, }, - 'configuration': { - 'test': True, - } + "configuration": {"test": True}, } - response = {'job': resource} + response = {"job": resource} job = self._set_properties_job() client = _make_client(project=other_project) connection = client._connection = _make_connection(response) @@ -750,9 +693,8 @@ def test_cancel_explicit(self): self.assertTrue(job.cancel(client=client)) connection.api_request.assert_called_once_with( - method='POST', - path='/projects/{}/jobs/{}/cancel'.format( - self.PROJECT, self.JOB_ID), + method="POST", + path="/projects/{}/jobs/{}/cancel".format(self.PROJECT, self.JOB_ID), query_params={}, ) self.assertEqual(job._properties, resource) @@ -771,7 +713,7 @@ def test__set_future_result_wo_done(self): def test__set_future_result_w_result_set(self): client = _make_client(project=self.PROJECT) job = self._make_one(self.JOB_ID, client) - job._properties['status'] = {'state': 'DONE'} + job._properties["status"] = {"state": "DONE"} job._result_set = True set_exception = job.set_exception = mock.Mock() set_result = job.set_result = mock.Mock() @@ -786,12 +728,9 @@ def test__set_future_result_w_done_wo_result_set_w_error(self): client = _make_client(project=self.PROJECT) job = self._make_one(self.JOB_ID, client) - job._properties['status'] = { - 'state': 'DONE', - 'errorResult': { - 'reason': 'notFound', - 'message': 'testing' - } + job._properties["status"] = { + "state": "DONE", + "errorResult": {"reason": "notFound", "message": "testing"}, } set_exception = job.set_exception = mock.Mock() set_result = job.set_result = mock.Mock() @@ -802,14 +741,14 @@ def test__set_future_result_w_done_wo_result_set_w_error(self): args, kw = set_exception.call_args exception, = args self.assertIsInstance(exception, NotFound) - self.assertEqual(exception.message, 'testing') + self.assertEqual(exception.message, "testing") self.assertEqual(kw, {}) set_result.assert_not_called() def test__set_future_result_w_done_wo_result_set_wo_error(self): client = _make_client(project=self.PROJECT) job = self._make_one(self.JOB_ID, client) - job._properties['status'] = {'state': 'DONE'} + job._properties["status"] = {"state": "DONE"} set_exception = job.set_exception = mock.Mock() set_result = job.set_result = mock.Mock() @@ -844,11 +783,11 @@ def test_done_explicit_wo_state(self): def test_done_already(self): client = _make_client(project=self.PROJECT) job = self._make_one(self.JOB_ID, client) - job._properties['status'] = {'state': 'DONE'} + job._properties["status"] = {"state": "DONE"} self.assertTrue(job.done()) - @mock.patch('google.api_core.future.polling.PollingFuture.result') + @mock.patch("google.api_core.future.polling.PollingFuture.result") def test_result_default_wo_state(self, result): client = _make_client(project=self.PROJECT) job = self._make_one(self.JOB_ID, client) @@ -859,11 +798,11 @@ def test_result_default_wo_state(self, result): begin.assert_called_once() result.assert_called_once_with(timeout=None) - @mock.patch('google.api_core.future.polling.PollingFuture.result') + @mock.patch("google.api_core.future.polling.PollingFuture.result") def test_result_explicit_w_state(self, result): client = _make_client(project=self.PROJECT) job = self._make_one(self.JOB_ID, client) - job._properties['status'] = {'state': 'DONE'} + job._properties["status"] = {"state": "DONE"} begin = job._begin = mock.Mock() timeout = 1 @@ -881,28 +820,20 @@ def test_cancelled_wo_error_result(self): def test_cancelled_w_error_result_not_stopped(self): client = _make_client(project=self.PROJECT) job = self._make_one(self.JOB_ID, client) - job._properties['status'] = { - 'errorResult': { - 'reason': 'other', - } - } + job._properties["status"] = {"errorResult": {"reason": "other"}} self.assertFalse(job.cancelled()) def test_cancelled_w_error_result_w_stopped(self): client = _make_client(project=self.PROJECT) job = self._make_one(self.JOB_ID, client) - job._properties['status'] = { - 'errorResult': { - 'reason': 'stopped', - } - } + job._properties["status"] = {"errorResult": {"reason": "stopped"}} self.assertTrue(job.cancelled()) class Test_JobConfig(unittest.TestCase): - JOB_TYPE = 'testing' + JOB_TYPE = "testing" @staticmethod def _get_target_class(): @@ -938,50 +869,48 @@ def test_fill_from_default_conflict(self): from google.cloud.bigquery import QueryJobConfig basic_job_config = QueryJobConfig() - conflicting_job_config = self._make_one('conflicting_job_type') + conflicting_job_config = self._make_one("conflicting_job_type") self.assertNotEqual( - basic_job_config._job_type, conflicting_job_config._job_type) + basic_job_config._job_type, conflicting_job_config._job_type + ) with self.assertRaises(TypeError): - basic_job_config._fill_from_default( - conflicting_job_config) + basic_job_config._fill_from_default(conflicting_job_config) - @mock.patch('google.cloud.bigquery._helpers._get_sub_prop') + @mock.patch("google.cloud.bigquery._helpers._get_sub_prop") def test__get_sub_prop_wo_default(self, _get_sub_prop): job_config = self._make_one() - key = 'key' - self.assertIs( - job_config._get_sub_prop(key), _get_sub_prop.return_value) + key = "key" + self.assertIs(job_config._get_sub_prop(key), _get_sub_prop.return_value) _get_sub_prop.assert_called_once_with( - job_config._properties, [self.JOB_TYPE, key], default=None) + job_config._properties, [self.JOB_TYPE, key], default=None + ) - @mock.patch('google.cloud.bigquery._helpers._get_sub_prop') + @mock.patch("google.cloud.bigquery._helpers._get_sub_prop") def test__get_sub_prop_w_default(self, _get_sub_prop): job_config = self._make_one() - key = 'key' - default = 'default' + key = "key" + default = "default" self.assertIs( - job_config._get_sub_prop(key, default=default), - _get_sub_prop.return_value) + job_config._get_sub_prop(key, default=default), _get_sub_prop.return_value + ) _get_sub_prop.assert_called_once_with( - job_config._properties, [self.JOB_TYPE, key], default=default) + job_config._properties, [self.JOB_TYPE, key], default=default + ) - @mock.patch('google.cloud.bigquery._helpers._set_sub_prop') + @mock.patch("google.cloud.bigquery._helpers._set_sub_prop") def test__set_sub_prop(self, _set_sub_prop): job_config = self._make_one() - key = 'key' - value = 'value' + key = "key" + value = "value" job_config._set_sub_prop(key, value) _set_sub_prop.assert_called_once_with( - job_config._properties, [self.JOB_TYPE, key], value) + job_config._properties, [self.JOB_TYPE, key], value + ) def test_to_api_repr(self): job_config = self._make_one() - expected = job_config._properties = { - self.JOB_TYPE: { - 'foo': 'bar', - } - } + expected = job_config._properties = {self.JOB_TYPE: {"foo": "bar"}} found = job_config.to_api_repr() self.assertEqual(found, expected) self.assertIsNot(found, expected) # copied @@ -996,15 +925,13 @@ def test_labels_miss(self): def test_labels_update_in_place(self): job_config = self._make_one() labels = job_config.labels - labels['foo'] = 'bar' # update in place - self.assertEqual(job_config.labels, {'foo': 'bar'}) + labels["foo"] = "bar" # update in place + self.assertEqual(job_config.labels, {"foo": "bar"}) def test_labels_hit(self): - labels = { - 'foo': 'bar', - } + labels = {"foo": "bar"} job_config = self._make_one() - job_config._properties['labels'] = labels + job_config._properties["labels"] = labels self.assertEqual(job_config.labels, labels) def test_labels_setter_invalid(self): @@ -1014,26 +941,24 @@ def test_labels_setter_invalid(self): job_config.labels = labels def test_labels_setter(self): - labels = { - 'foo': 'bar', - } + labels = {"foo": "bar"} job_config = self._make_one() job_config.labels = labels - self.assertEqual(job_config._properties['labels'], labels) + self.assertEqual(job_config._properties["labels"], labels) class _Base(object): from google.cloud.bigquery.dataset import DatasetReference from google.cloud.bigquery.table import TableReference - PROJECT = 'project' - SOURCE1 = 'http://example.com/source1.csv' - DS_ID = 'dataset_id' + PROJECT = "project" + SOURCE1 = "http://example.com/source1.csv" + DS_ID = "dataset_id" DS_REF = DatasetReference(PROJECT, DS_ID) - TABLE_ID = 'table_id' + TABLE_ID = "table_id" TABLE_REF = TableReference(DS_REF, TABLE_ID) - JOB_ID = 'JOB_ID' - KMS_KEY_NAME = 'projects/1/locations/global/keyRings/1/cryptoKeys/1' + JOB_ID = "JOB_ID" + KMS_KEY_NAME = "projects/1/locations/global/keyRings/1/cryptoKeys/1" def _make_one(self, *args, **kw): return self._get_target_class()(*args, **kw) @@ -1043,12 +968,11 @@ def _setUpConstants(self): from google.cloud._helpers import UTC self.WHEN_TS = 1437767599.006 - self.WHEN = datetime.datetime.utcfromtimestamp(self.WHEN_TS).replace( - tzinfo=UTC) - self.ETAG = 'ETAG' - self.FULL_JOB_ID = '%s:%s' % (self.PROJECT, self.JOB_ID) - self.RESOURCE_URL = 'http://example.com/path/to/resource' - self.USER_EMAIL = 'phred@example.com' + self.WHEN = datetime.datetime.utcfromtimestamp(self.WHEN_TS).replace(tzinfo=UTC) + self.ETAG = "ETAG" + self.FULL_JOB_ID = "%s:%s" % (self.PROJECT, self.JOB_ID) + self.RESOURCE_URL = "http://example.com/path/to/resource" + self.USER_EMAIL = "phred@example.com" def _table_ref(self, table_id): from google.cloud.bigquery.table import TableReference @@ -1058,36 +982,26 @@ def _table_ref(self, table_id): def _make_resource(self, started=False, ended=False): self._setUpConstants() resource = { - 'configuration': { - self.JOB_TYPE: { - }, - }, - 'statistics': { - 'creationTime': self.WHEN_TS * 1000, - self.JOB_TYPE: { - } - }, - 'etag': self.ETAG, - 'id': self.FULL_JOB_ID, - 'jobReference': { - 'projectId': self.PROJECT, - 'jobId': self.JOB_ID, - }, - 'selfLink': self.RESOURCE_URL, - 'user_email': self.USER_EMAIL, + "configuration": {self.JOB_TYPE: {}}, + "statistics": {"creationTime": self.WHEN_TS * 1000, self.JOB_TYPE: {}}, + "etag": self.ETAG, + "id": self.FULL_JOB_ID, + "jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID}, + "selfLink": self.RESOURCE_URL, + "user_email": self.USER_EMAIL, } if started or ended: - resource['statistics']['startTime'] = self.WHEN_TS * 1000 + resource["statistics"]["startTime"] = self.WHEN_TS * 1000 if ended: - resource['statistics']['endTime'] = (self.WHEN_TS + 1000) * 1000 + resource["statistics"]["endTime"] = (self.WHEN_TS + 1000) * 1000 - if self.JOB_TYPE == 'query': - resource['configuration']['query']['destinationTable'] = { - 'projectId': self.PROJECT, - 'datasetId': '_temp_dataset', - 'tableId': '_temp_table', + if self.JOB_TYPE == "query": + resource["configuration"]["query"]["destinationTable"] = { + "projectId": self.PROJECT, + "datasetId": "_temp_dataset", + "tableId": "_temp_table", } return resource @@ -1111,50 +1025,52 @@ def _verifyInitialReadonlyProperties(self, job): def _verifyReadonlyResourceProperties(self, job, resource): from datetime import timedelta - statistics = resource.get('statistics', {}) + statistics = resource.get("statistics", {}) - if 'creationTime' in statistics: + if "creationTime" in statistics: self.assertEqual(job.created, self.WHEN) else: self.assertIsNone(job.created) - if 'startTime' in statistics: + if "startTime" in statistics: self.assertEqual(job.started, self.WHEN) else: self.assertIsNone(job.started) - if 'endTime' in statistics: + if "endTime" in statistics: self.assertEqual(job.ended, self.WHEN + timedelta(seconds=1000)) else: self.assertIsNone(job.ended) - if 'etag' in resource: + if "etag" in resource: self.assertEqual(job.etag, self.ETAG) else: self.assertIsNone(job.etag) - if 'selfLink' in resource: + if "selfLink" in resource: self.assertEqual(job.self_link, self.RESOURCE_URL) else: self.assertIsNone(job.self_link) - if 'user_email' in resource: + if "user_email" in resource: self.assertEqual(job.user_email, self.USER_EMAIL) else: self.assertIsNone(job.user_email) class TestLoadJobConfig(unittest.TestCase, _Base): - JOB_TYPE = 'load' + JOB_TYPE = "load" @staticmethod def _get_target_class(): from google.cloud.bigquery.job import LoadJobConfig + return LoadJobConfig def test_ctor_w_properties(self): config = self._get_target_class()( - allow_jagged_rows=True, allow_quoted_newlines=True) + allow_jagged_rows=True, allow_quoted_newlines=True + ) self.assertTrue(config.allow_jagged_rows) self.assertTrue(config.allow_quoted_newlines) @@ -1165,13 +1081,13 @@ def test_allow_jagged_rows_missing(self): def test_allow_jagged_rows_hit(self): config = self._get_target_class()() - config._properties['load']['allowJaggedRows'] = True + config._properties["load"]["allowJaggedRows"] = True self.assertTrue(config.allow_jagged_rows) def test_allow_jagged_rows_setter(self): config = self._get_target_class()() config.allow_jagged_rows = True - self.assertTrue(config._properties['load']['allowJaggedRows']) + self.assertTrue(config._properties["load"]["allowJaggedRows"]) def test_allow_quoted_newlines_missing(self): config = self._get_target_class()() @@ -1179,13 +1095,13 @@ def test_allow_quoted_newlines_missing(self): def test_allow_quoted_newlines_hit(self): config = self._get_target_class()() - config._properties['load']['allowQuotedNewlines'] = True + config._properties["load"]["allowQuotedNewlines"] = True self.assertTrue(config.allow_quoted_newlines) def test_allow_quoted_newlines_setter(self): config = self._get_target_class()() config.allow_quoted_newlines = True - self.assertTrue(config._properties['load']['allowQuotedNewlines']) + self.assertTrue(config._properties["load"]["allowQuotedNewlines"]) def test_autodetect_missing(self): config = self._get_target_class()() @@ -1193,13 +1109,13 @@ def test_autodetect_missing(self): def test_autodetect_hit(self): config = self._get_target_class()() - config._properties['load']['autodetect'] = True + config._properties["load"]["autodetect"] = True self.assertTrue(config.autodetect) def test_autodetect_setter(self): config = self._get_target_class()() config.autodetect = True - self.assertTrue(config._properties['load']['autodetect']) + self.assertTrue(config._properties["load"]["autodetect"]) def test_clustering_fields_miss(self): config = self._get_target_class()() @@ -1207,28 +1123,23 @@ def test_clustering_fields_miss(self): def test_clustering_fields_hit(self): config = self._get_target_class()() - fields = ['email', 'postal_code'] - config._properties['load']['clustering'] = { - 'fields': fields, - } + fields = ["email", "postal_code"] + config._properties["load"]["clustering"] = {"fields": fields} self.assertEqual(config.clustering_fields, fields) def test_clustering_fields_setter(self): - fields = ['email', 'postal_code'] + fields = ["email", "postal_code"] config = self._get_target_class()() config.clustering_fields = fields - self.assertEqual( - config._properties['load']['clustering'], {'fields': fields}) + self.assertEqual(config._properties["load"]["clustering"], {"fields": fields}) def test_clustering_fields_setter_w_none(self): config = self._get_target_class()() - fields = ['email', 'postal_code'] - config._properties['load']['clustering'] = { - 'fields': fields, - } + fields = ["email", "postal_code"] + config._properties["load"]["clustering"] = {"fields": fields} config.clustering_fields = None self.assertIsNone(config.clustering_fields) - self.assertNotIn('clustering', config._properties['load']) + self.assertNotIn("clustering", config._properties["load"]) def test_create_disposition_missing(self): config = self._get_target_class()() @@ -1239,7 +1150,7 @@ def test_create_disposition_hit(self): disposition = CreateDisposition.CREATE_IF_NEEDED config = self._get_target_class()() - config._properties['load']['createDisposition'] = disposition + config._properties["load"]["createDisposition"] = disposition self.assertEqual(config.create_disposition, disposition) def test_create_disposition_setter(self): @@ -1248,8 +1159,7 @@ def test_create_disposition_setter(self): disposition = CreateDisposition.CREATE_IF_NEEDED config = self._get_target_class()() config.create_disposition = disposition - self.assertEqual( - config._properties['load']['createDisposition'], disposition) + self.assertEqual(config._properties["load"]["createDisposition"], disposition) def test_destination_encryption_configuration_missing(self): config = self._get_target_class()() @@ -1258,146 +1168,135 @@ def test_destination_encryption_configuration_missing(self): def test_destination_encryption_configuration_hit(self): from google.cloud.bigquery.table import EncryptionConfiguration - kms_key_name = 'kms-key-name' + kms_key_name = "kms-key-name" encryption_configuration = EncryptionConfiguration(kms_key_name) config = self._get_target_class()() - config._properties['load']['destinationEncryptionConfiguration'] = { - 'kmsKeyName': kms_key_name, + config._properties["load"]["destinationEncryptionConfiguration"] = { + "kmsKeyName": kms_key_name } self.assertEqual( - config.destination_encryption_configuration, - encryption_configuration) + config.destination_encryption_configuration, encryption_configuration + ) def test_destination_encryption_configuration_setter(self): from google.cloud.bigquery.table import EncryptionConfiguration - kms_key_name = 'kms-key-name' + kms_key_name = "kms-key-name" encryption_configuration = EncryptionConfiguration(kms_key_name) config = self._get_target_class()() config.destination_encryption_configuration = encryption_configuration - expected = { - 'kmsKeyName': kms_key_name, - } + expected = {"kmsKeyName": kms_key_name} self.assertEqual( - config._properties['load']['destinationEncryptionConfiguration'], - expected) + config._properties["load"]["destinationEncryptionConfiguration"], expected + ) def test_destination_encryption_configuration_setter_w_none(self): - kms_key_name = 'kms-key-name' + kms_key_name = "kms-key-name" config = self._get_target_class()() - config._properties['load']['destinationEncryptionConfiguration'] = { - 'kmsKeyName': kms_key_name, + config._properties["load"]["destinationEncryptionConfiguration"] = { + "kmsKeyName": kms_key_name } config.destination_encryption_configuration = None self.assertIsNone(config.destination_encryption_configuration) self.assertNotIn( - 'destinationEncryptionConfiguration', config._properties['load']) + "destinationEncryptionConfiguration", config._properties["load"] + ) def test_destination_table_description_missing(self): config = self._get_target_class()() self.assertIsNone(config.destination_table_description) def test_destination_table_description_hit(self): - description = 'Description' + description = "Description" config = self._get_target_class()() - config._properties['load']['destinationTableProperties'] = { - 'description': description, + config._properties["load"]["destinationTableProperties"] = { + "description": description } - self.assertEqual( - config.destination_table_description, description) + self.assertEqual(config.destination_table_description, description) def test_destination_table_description_setter(self): - description = 'Description' + description = "Description" config = self._get_target_class()() config.destination_table_description = description - expected = { - 'description': description, - } + expected = {"description": description} self.assertEqual( - config._properties['load']['destinationTableProperties'], expected) + config._properties["load"]["destinationTableProperties"], expected + ) def test_destination_table_description_setter_w_fn_already(self): - description = 'Description' - friendly_name = 'Friendly Name' + description = "Description" + friendly_name = "Friendly Name" config = self._get_target_class()() - config._properties['load']['destinationTableProperties'] = { - 'friendlyName': friendly_name, + config._properties["load"]["destinationTableProperties"] = { + "friendlyName": friendly_name } config.destination_table_description = description - expected = { - 'friendlyName': friendly_name, - 'description': description, - } + expected = {"friendlyName": friendly_name, "description": description} self.assertEqual( - config._properties['load']['destinationTableProperties'], expected) + config._properties["load"]["destinationTableProperties"], expected + ) def test_destination_table_description_w_none(self): - description = 'Description' - friendly_name = 'Friendly Name' + description = "Description" + friendly_name = "Friendly Name" config = self._get_target_class()() - config._properties['load']['destinationTableProperties'] = { - 'description': description, - 'friendlyName': friendly_name, + config._properties["load"]["destinationTableProperties"] = { + "description": description, + "friendlyName": friendly_name, } config.destination_table_description = None - expected = { - 'friendlyName': friendly_name, - } + expected = {"friendlyName": friendly_name} self.assertEqual( - config._properties['load']['destinationTableProperties'], expected) + config._properties["load"]["destinationTableProperties"], expected + ) def test_destination_table_friendly_name_missing(self): config = self._get_target_class()() self.assertIsNone(config.destination_table_friendly_name) def test_destination_table_friendly_name_hit(self): - friendly_name = 'Friendly Name' + friendly_name = "Friendly Name" config = self._get_target_class()() - config._properties['load']['destinationTableProperties'] = { - 'friendlyName': friendly_name, + config._properties["load"]["destinationTableProperties"] = { + "friendlyName": friendly_name } - self.assertEqual( - config.destination_table_friendly_name, friendly_name) + self.assertEqual(config.destination_table_friendly_name, friendly_name) def test_destination_table_friendly_name_setter(self): - friendly_name = 'Friendly Name' + friendly_name = "Friendly Name" config = self._get_target_class()() config.destination_table_friendly_name = friendly_name - expected = { - 'friendlyName': friendly_name, - } + expected = {"friendlyName": friendly_name} self.assertEqual( - config._properties['load']['destinationTableProperties'], expected) + config._properties["load"]["destinationTableProperties"], expected + ) def test_destination_table_friendly_name_setter_w_descr_already(self): - friendly_name = 'Friendly Name' - description = 'Description' + friendly_name = "Friendly Name" + description = "Description" config = self._get_target_class()() - config._properties['load']['destinationTableProperties'] = { - 'description': description, + config._properties["load"]["destinationTableProperties"] = { + "description": description } config.destination_table_friendly_name = friendly_name - expected = { - 'friendlyName': friendly_name, - 'description': description, - } + expected = {"friendlyName": friendly_name, "description": description} self.assertEqual( - config._properties['load']['destinationTableProperties'], expected) + config._properties["load"]["destinationTableProperties"], expected + ) def test_destination_table_friendly_name_w_none(self): - friendly_name = 'Friendly Name' - description = 'Description' + friendly_name = "Friendly Name" + description = "Description" config = self._get_target_class()() - config._properties['load']['destinationTableProperties'] = { - 'description': description, - 'friendlyName': friendly_name, + config._properties["load"]["destinationTableProperties"] = { + "description": description, + "friendlyName": friendly_name, } config.destination_table_friendly_name = None - expected = { - 'description': description, - } + expected = {"description": description} self.assertEqual( - config._properties['load']['destinationTableProperties'], expected) + config._properties["load"]["destinationTableProperties"], expected + ) def test_encoding_missing(self): config = self._get_target_class()() @@ -1408,7 +1307,7 @@ def test_encoding_hit(self): encoding = Encoding.UTF_8 config = self._get_target_class()() - config._properties['load']['encoding'] = encoding + config._properties["load"]["encoding"] = encoding self.assertEqual(config.encoding, encoding) def test_encoding_setter(self): @@ -1417,25 +1316,23 @@ def test_encoding_setter(self): encoding = Encoding.UTF_8 config = self._get_target_class()() config.encoding = encoding - self.assertEqual( - config._properties['load']['encoding'], encoding) + self.assertEqual(config._properties["load"]["encoding"], encoding) def test_field_delimiter_missing(self): config = self._get_target_class()() self.assertIsNone(config.field_delimiter) def test_field_delimiter_hit(self): - field_delimiter = '|' + field_delimiter = "|" config = self._get_target_class()() - config._properties['load']['fieldDelimiter'] = field_delimiter + config._properties["load"]["fieldDelimiter"] = field_delimiter self.assertEqual(config.field_delimiter, field_delimiter) def test_field_delimiter_setter(self): - field_delimiter = '|' + field_delimiter = "|" config = self._get_target_class()() config.field_delimiter = field_delimiter - self.assertEqual( - config._properties['load']['fieldDelimiter'], field_delimiter) + self.assertEqual(config._properties["load"]["fieldDelimiter"], field_delimiter) def test_ignore_unknown_values_missing(self): config = self._get_target_class()() @@ -1443,13 +1340,13 @@ def test_ignore_unknown_values_missing(self): def test_ignore_unknown_values_hit(self): config = self._get_target_class()() - config._properties['load']['ignoreUnknownValues'] = True + config._properties["load"]["ignoreUnknownValues"] = True self.assertTrue(config.ignore_unknown_values) def test_ignore_unknown_values_setter(self): config = self._get_target_class()() config.ignore_unknown_values = True - self.assertTrue(config._properties['load']['ignoreUnknownValues']) + self.assertTrue(config._properties["load"]["ignoreUnknownValues"]) def test_max_bad_records_missing(self): config = self._get_target_class()() @@ -1458,32 +1355,30 @@ def test_max_bad_records_missing(self): def test_max_bad_records_hit(self): max_bad_records = 13 config = self._get_target_class()() - config._properties['load']['maxBadRecords'] = max_bad_records + config._properties["load"]["maxBadRecords"] = max_bad_records self.assertEqual(config.max_bad_records, max_bad_records) def test_max_bad_records_setter(self): max_bad_records = 13 config = self._get_target_class()() config.max_bad_records = max_bad_records - self.assertEqual( - config._properties['load']['maxBadRecords'], max_bad_records) + self.assertEqual(config._properties["load"]["maxBadRecords"], max_bad_records) def test_null_marker_missing(self): config = self._get_target_class()() self.assertIsNone(config.null_marker) def test_null_marker_hit(self): - null_marker = 'XXX' + null_marker = "XXX" config = self._get_target_class()() - config._properties['load']['nullMarker'] = null_marker + config._properties["load"]["nullMarker"] = null_marker self.assertEqual(config.null_marker, null_marker) def test_null_marker_setter(self): - null_marker = 'XXX' + null_marker = "XXX" config = self._get_target_class()() config.null_marker = null_marker - self.assertEqual( - config._properties['load']['nullMarker'], null_marker) + self.assertEqual(config._properties["load"]["nullMarker"], null_marker) def test_quote_character_missing(self): config = self._get_target_class()() @@ -1492,15 +1387,14 @@ def test_quote_character_missing(self): def test_quote_character_hit(self): quote_character = "'" config = self._get_target_class()() - config._properties['load']['quote'] = quote_character + config._properties["load"]["quote"] = quote_character self.assertEqual(config.quote_character, quote_character) def test_quote_character_setter(self): quote_character = "'" config = self._get_target_class()() config.quote_character = quote_character - self.assertEqual( - config._properties['load']['quote'], quote_character) + self.assertEqual(config._properties["load"]["quote"], quote_character) def test_schema_missing(self): config = self._get_target_class()() @@ -1511,17 +1405,14 @@ def test_schema_hit(self): config = self._get_target_class()() all_props_repr = { - 'mode': 'REQUIRED', - 'name': 'foo', - 'type': 'INTEGER', - 'description': 'Foo', + "mode": "REQUIRED", + "name": "foo", + "type": "INTEGER", + "description": "Foo", } - minimal_repr = { - 'name': 'bar', - 'type': 'STRING', - } - config._properties['load']['schema'] = { - 'fields': [all_props_repr, minimal_repr], + minimal_repr = {"name": "bar", "type": "STRING"} + config._properties["load"]["schema"] = { + "fields": [all_props_repr, minimal_repr] } all_props, minimal = config.schema self.assertEqual(all_props, SchemaField.from_api_repr(all_props_repr)) @@ -1531,24 +1422,24 @@ def test_schema_setter(self): from google.cloud.bigquery.schema import SchemaField config = self._get_target_class()() - full_name = SchemaField('full_name', 'STRING', mode='REQUIRED') - age = SchemaField('age', 'INTEGER', mode='REQUIRED') + full_name = SchemaField("full_name", "STRING", mode="REQUIRED") + age = SchemaField("age", "INTEGER", mode="REQUIRED") config.schema = [full_name, age] full_name_repr = { - 'name': 'full_name', - 'type': 'STRING', - 'mode': 'REQUIRED', - 'description': None, + "name": "full_name", + "type": "STRING", + "mode": "REQUIRED", + "description": None, } age_repr = { - 'name': 'age', - 'type': 'INTEGER', - 'mode': 'REQUIRED', - 'description': None, + "name": "age", + "type": "INTEGER", + "mode": "REQUIRED", + "description": None, } self.assertEqual( - config._properties['load']['schema'], - {'fields': [full_name_repr, age_repr]}) + config._properties["load"]["schema"], {"fields": [full_name_repr, age_repr]} + ) def test_schema_update_options_missing(self): config = self._get_target_class()() @@ -1562,7 +1453,7 @@ def test_schema_update_options_hit(self): SchemaUpdateOption.ALLOW_FIELD_RELAXATION, ] config = self._get_target_class()() - config._properties['load']['schemaUpdateOptions'] = options + config._properties["load"]["schemaUpdateOptions"] = options self.assertEqual(config.schema_update_options, options) def test_schema_update_options_setter(self): @@ -1574,8 +1465,7 @@ def test_schema_update_options_setter(self): ] config = self._get_target_class()() config.schema_update_options = options - self.assertEqual( - config._properties['load']['schemaUpdateOptions'], options) + self.assertEqual(config._properties["load"]["schemaUpdateOptions"], options) def test_skip_leading_rows_missing(self): config = self._get_target_class()() @@ -1584,13 +1474,13 @@ def test_skip_leading_rows_missing(self): def test_skip_leading_rows_hit_w_str(self): skip_leading_rows = 1 config = self._get_target_class()() - config._properties['load']['skipLeadingRows'] = str(skip_leading_rows) + config._properties["load"]["skipLeadingRows"] = str(skip_leading_rows) self.assertEqual(config.skip_leading_rows, skip_leading_rows) def test_skip_leading_rows_hit_w_integer(self): skip_leading_rows = 1 config = self._get_target_class()() - config._properties['load']['skipLeadingRows'] = skip_leading_rows + config._properties["load"]["skipLeadingRows"] = skip_leading_rows self.assertEqual(config.skip_leading_rows, skip_leading_rows) def test_skip_leading_rows_setter(self): @@ -1598,8 +1488,8 @@ def test_skip_leading_rows_setter(self): config = self._get_target_class()() config.skip_leading_rows = skip_leading_rows self.assertEqual( - config._properties['load']['skipLeadingRows'], - str(skip_leading_rows)) + config._properties["load"]["skipLeadingRows"], str(skip_leading_rows) + ) def test_source_format_missing(self): config = self._get_target_class()() @@ -1610,7 +1500,7 @@ def test_source_format_hit(self): source_format = SourceFormat.CSV config = self._get_target_class()() - config._properties['load']['sourceFormat'] = source_format + config._properties["load"]["sourceFormat"] = source_format self.assertEqual(config.source_format, source_format) def test_source_format_setter(self): @@ -1619,8 +1509,7 @@ def test_source_format_setter(self): source_format = SourceFormat.CSV config = self._get_target_class()() config.source_format = source_format - self.assertEqual( - config._properties['load']['sourceFormat'], source_format) + self.assertEqual(config._properties["load"]["sourceFormat"], source_format) def test_time_partitioning_miss(self): config = self._get_target_class()() @@ -1630,14 +1519,14 @@ def test_time_partitioning_hit(self): from google.cloud.bigquery.table import TimePartitioning from google.cloud.bigquery.table import TimePartitioningType - field = 'creation_date' + field = "creation_date" year_ms = 86400 * 1000 * 365 config = self._get_target_class()() - config._properties['load']['timePartitioning'] = { - 'type': TimePartitioningType.DAY, - 'field': field, - 'expirationMs': str(year_ms), - 'requirePartitionFilter': False, + config._properties["load"]["timePartitioning"] = { + "type": TimePartitioningType.DAY, + "field": field, + "expirationMs": str(year_ms), + "requirePartitionFilter": False, } expected = TimePartitioning( type_=TimePartitioningType.DAY, @@ -1651,7 +1540,7 @@ def test_time_partitioning_setter(self): from google.cloud.bigquery.table import TimePartitioning from google.cloud.bigquery.table import TimePartitioningType - field = 'creation_date' + field = "creation_date" year_ms = 86400 * 1000 * 365 time_partitioning = TimePartitioning( type_=TimePartitioningType.DAY, @@ -1662,29 +1551,28 @@ def test_time_partitioning_setter(self): config = self._get_target_class()() config.time_partitioning = time_partitioning expected = { - 'type': TimePartitioningType.DAY, - 'field': field, - 'expirationMs': str(year_ms), - 'requirePartitionFilter': False, + "type": TimePartitioningType.DAY, + "field": field, + "expirationMs": str(year_ms), + "requirePartitionFilter": False, } - self.assertEqual( - config._properties['load']['timePartitioning'], expected) + self.assertEqual(config._properties["load"]["timePartitioning"], expected) def test_time_partitioning_setter_w_none(self): from google.cloud.bigquery.table import TimePartitioningType - field = 'creation_date' + field = "creation_date" year_ms = 86400 * 1000 * 365 config = self._get_target_class()() - config._properties['load']['timePartitioning'] = { - 'type': TimePartitioningType.DAY, - 'field': field, - 'expirationMs': str(year_ms), - 'requirePartitionFilter': False, + config._properties["load"]["timePartitioning"] = { + "type": TimePartitioningType.DAY, + "field": field, + "expirationMs": str(year_ms), + "requirePartitionFilter": False, } config.time_partitioning = None self.assertIsNone(config.time_partitioning) - self.assertNotIn('timePartitioning', config._properties['load']) + self.assertNotIn("timePartitioning", config._properties["load"]) def test_write_disposition_missing(self): config = self._get_target_class()() @@ -1695,7 +1583,7 @@ def test_write_disposition_hit(self): write_disposition = WriteDisposition.WRITE_TRUNCATE config = self._get_target_class()() - config._properties['load']['writeDisposition'] = write_disposition + config._properties["load"]["writeDisposition"] = write_disposition self.assertEqual(config.write_disposition, write_disposition) def test_write_disposition_setter(self): @@ -1705,11 +1593,12 @@ def test_write_disposition_setter(self): config = self._get_target_class()() config.write_disposition = write_disposition self.assertEqual( - config._properties['load']['writeDisposition'], write_disposition) + config._properties["load"]["writeDisposition"], write_disposition + ) class TestLoadJob(unittest.TestCase, _Base): - JOB_TYPE = 'load' + JOB_TYPE = "load" @staticmethod def _get_target_class(): @@ -1725,134 +1614,117 @@ def _setUpConstants(self): self.OUTPUT_ROWS = 345 def _make_resource(self, started=False, ended=False): - resource = super(TestLoadJob, self)._make_resource( - started, ended) - config = resource['configuration']['load'] - config['sourceUris'] = [self.SOURCE1] - config['destinationTable'] = { - 'projectId': self.PROJECT, - 'datasetId': self.DS_ID, - 'tableId': self.TABLE_ID, + resource = super(TestLoadJob, self)._make_resource(started, ended) + config = resource["configuration"]["load"] + config["sourceUris"] = [self.SOURCE1] + config["destinationTable"] = { + "projectId": self.PROJECT, + "datasetId": self.DS_ID, + "tableId": self.TABLE_ID, } if ended: - resource['status'] = {'state': 'DONE'} - resource['statistics']['load']['inputFiles'] = self.INPUT_FILES - resource['statistics']['load']['inputFileBytes'] = self.INPUT_BYTES - resource['statistics']['load']['outputBytes'] = self.OUTPUT_BYTES - resource['statistics']['load']['outputRows'] = self.OUTPUT_ROWS + resource["status"] = {"state": "DONE"} + resource["statistics"]["load"]["inputFiles"] = self.INPUT_FILES + resource["statistics"]["load"]["inputFileBytes"] = self.INPUT_BYTES + resource["statistics"]["load"]["outputBytes"] = self.OUTPUT_BYTES + resource["statistics"]["load"]["outputRows"] = self.OUTPUT_ROWS return resource def _verifyBooleanConfigProperties(self, job, config): - if 'allowJaggedRows' in config: - self.assertEqual(job.allow_jagged_rows, - config['allowJaggedRows']) + if "allowJaggedRows" in config: + self.assertEqual(job.allow_jagged_rows, config["allowJaggedRows"]) else: self.assertIsNone(job.allow_jagged_rows) - if 'allowQuotedNewlines' in config: - self.assertEqual(job.allow_quoted_newlines, - config['allowQuotedNewlines']) + if "allowQuotedNewlines" in config: + self.assertEqual(job.allow_quoted_newlines, config["allowQuotedNewlines"]) else: self.assertIsNone(job.allow_quoted_newlines) - if 'autodetect' in config: - self.assertEqual( - job.autodetect, config['autodetect']) + if "autodetect" in config: + self.assertEqual(job.autodetect, config["autodetect"]) else: self.assertIsNone(job.autodetect) - if 'ignoreUnknownValues' in config: - self.assertEqual(job.ignore_unknown_values, - config['ignoreUnknownValues']) + if "ignoreUnknownValues" in config: + self.assertEqual(job.ignore_unknown_values, config["ignoreUnknownValues"]) else: self.assertIsNone(job.ignore_unknown_values) def _verifyEnumConfigProperties(self, job, config): - if 'createDisposition' in config: - self.assertEqual(job.create_disposition, - config['createDisposition']) + if "createDisposition" in config: + self.assertEqual(job.create_disposition, config["createDisposition"]) else: self.assertIsNone(job.create_disposition) - if 'encoding' in config: - self.assertEqual(job.encoding, - config['encoding']) + if "encoding" in config: + self.assertEqual(job.encoding, config["encoding"]) else: self.assertIsNone(job.encoding) - if 'sourceFormat' in config: - self.assertEqual(job.source_format, - config['sourceFormat']) + if "sourceFormat" in config: + self.assertEqual(job.source_format, config["sourceFormat"]) else: self.assertIsNone(job.source_format) - if 'writeDisposition' in config: - self.assertEqual(job.write_disposition, - config['writeDisposition']) + if "writeDisposition" in config: + self.assertEqual(job.write_disposition, config["writeDisposition"]) else: self.assertIsNone(job.write_disposition) - if 'schemaUpdateOptions' in config: - self.assertEqual( - job.schema_update_options, config['schemaUpdateOptions']) + if "schemaUpdateOptions" in config: + self.assertEqual(job.schema_update_options, config["schemaUpdateOptions"]) else: self.assertIsNone(job.schema_update_options) def _verifyResourceProperties(self, job, resource): self._verifyReadonlyResourceProperties(job, resource) - config = resource.get('configuration', {}).get('load') + config = resource.get("configuration", {}).get("load") self._verifyBooleanConfigProperties(job, config) self._verifyEnumConfigProperties(job, config) - self.assertEqual(job.source_uris, config['sourceUris']) + self.assertEqual(job.source_uris, config["sourceUris"]) - table_ref = config['destinationTable'] - self.assertEqual(job.destination.project, table_ref['projectId']) - self.assertEqual(job.destination.dataset_id, table_ref['datasetId']) - self.assertEqual(job.destination.table_id, table_ref['tableId']) + table_ref = config["destinationTable"] + self.assertEqual(job.destination.project, table_ref["projectId"]) + self.assertEqual(job.destination.dataset_id, table_ref["datasetId"]) + self.assertEqual(job.destination.table_id, table_ref["tableId"]) - if 'fieldDelimiter' in config: - self.assertEqual(job.field_delimiter, - config['fieldDelimiter']) + if "fieldDelimiter" in config: + self.assertEqual(job.field_delimiter, config["fieldDelimiter"]) else: self.assertIsNone(job.field_delimiter) - if 'maxBadRecords' in config: - self.assertEqual(job.max_bad_records, - config['maxBadRecords']) + if "maxBadRecords" in config: + self.assertEqual(job.max_bad_records, config["maxBadRecords"]) else: self.assertIsNone(job.max_bad_records) - if 'nullMarker' in config: - self.assertEqual(job.null_marker, - config['nullMarker']) + if "nullMarker" in config: + self.assertEqual(job.null_marker, config["nullMarker"]) else: self.assertIsNone(job.null_marker) - if 'quote' in config: - self.assertEqual(job.quote_character, - config['quote']) + if "quote" in config: + self.assertEqual(job.quote_character, config["quote"]) else: self.assertIsNone(job.quote_character) - if 'skipLeadingRows' in config: - self.assertEqual(str(job.skip_leading_rows), - config['skipLeadingRows']) + if "skipLeadingRows" in config: + self.assertEqual(str(job.skip_leading_rows), config["skipLeadingRows"]) else: self.assertIsNone(job.skip_leading_rows) - if 'destinationEncryptionConfiguration' in config: + if "destinationEncryptionConfiguration" in config: self.assertIsNotNone(job.destination_encryption_configuration) self.assertEqual( job.destination_encryption_configuration.kms_key_name, - config['destinationEncryptionConfiguration']['kmsKeyName']) + config["destinationEncryptionConfiguration"]["kmsKeyName"], + ) else: self.assertIsNone(job.destination_encryption_configuration) def test_ctor(self): client = _make_client(project=self.PROJECT) - job = self._make_one(self.JOB_ID, [self.SOURCE1], self.TABLE_REF, - client) + job = self._make_one(self.JOB_ID, [self.SOURCE1], self.TABLE_REF, client) self.assertIs(job.destination, self.TABLE_REF) self.assertEqual(list(job.source_uris), [self.SOURCE1]) self.assertIs(job._client, client) self.assertEqual(job.job_type, self.JOB_TYPE) - self.assertEqual( - job.path, - '/projects/%s/jobs/%s' % (self.PROJECT, self.JOB_ID)) + self.assertEqual(job.path, "/projects/%s/jobs/%s" % (self.PROJECT, self.JOB_ID)) self._verifyInitialReadonlyProperties(job) @@ -1887,23 +1759,23 @@ def test_ctor_w_config(self): from google.cloud.bigquery.job import LoadJobConfig client = _make_client(project=self.PROJECT) - full_name = SchemaField('full_name', 'STRING', mode='REQUIRED') - age = SchemaField('age', 'INTEGER', mode='REQUIRED') + full_name = SchemaField("full_name", "STRING", mode="REQUIRED") + age = SchemaField("age", "INTEGER", mode="REQUIRED") config = LoadJobConfig() config.schema = [full_name, age] - job = self._make_one(self.JOB_ID, [self.SOURCE1], self.TABLE_REF, - client, config) + job = self._make_one( + self.JOB_ID, [self.SOURCE1], self.TABLE_REF, client, config + ) self.assertEqual(job.schema, [full_name, age]) def test_ctor_w_job_reference(self): from google.cloud.bigquery import job client = _make_client(project=self.PROJECT) - job_ref = job._JobReference(self.JOB_ID, 'alternative-project', 'US') - load_job = self._make_one( - job_ref, [self.SOURCE1], self.TABLE_REF, client) - self.assertEqual(load_job.project, 'alternative-project') - self.assertEqual(load_job.location, 'US') + job_ref = job._JobReference(self.JOB_ID, "alternative-project", "US") + load_job = self._make_one(job_ref, [self.SOURCE1], self.TABLE_REF, client) + self.assertEqual(load_job.project, "alternative-project") + self.assertEqual(load_job.location, "US") def test_done(self): client = _make_client(project=self.PROJECT) @@ -1923,19 +1795,18 @@ def test_result(self): def test_result_invokes_begin(self): begun_resource = self._make_resource() done_resource = copy.deepcopy(begun_resource) - done_resource['status'] = {'state': 'DONE'} + done_resource["status"] = {"state": "DONE"} connection = _make_connection(begun_resource, done_resource) client = _make_client(self.PROJECT) client._connection = connection - job = self._make_one(self.JOB_ID, [self.SOURCE1], self.TABLE_REF, - client) + job = self._make_one(self.JOB_ID, [self.SOURCE1], self.TABLE_REF, client) job.result() self.assertEqual(len(connection.api_request.call_args_list), 2) begin_request, reload_request = connection.api_request.call_args_list - self.assertEqual(begin_request[1]['method'], 'POST') - self.assertEqual(reload_request[1]['method'], 'GET') + self.assertEqual(begin_request[1]["method"], "POST") + self.assertEqual(reload_request[1]["method"], "GET") def test_schema_setter_non_list(self): from google.cloud.bigquery.job import LoadJobConfig @@ -1949,7 +1820,7 @@ def test_schema_setter_invalid_field(self): from google.cloud.bigquery.schema import SchemaField config = LoadJobConfig() - full_name = SchemaField('full_name', 'STRING', mode='REQUIRED') + full_name = SchemaField("full_name", "STRING", mode="REQUIRED") with self.assertRaises(ValueError): config.schema = [full_name, object()] @@ -1958,8 +1829,8 @@ def test_schema_setter(self): from google.cloud.bigquery.schema import SchemaField config = LoadJobConfig() - full_name = SchemaField('full_name', 'STRING', mode='REQUIRED') - age = SchemaField('age', 'INTEGER', mode='REQUIRED') + full_name = SchemaField("full_name", "STRING", mode="REQUIRED") + age = SchemaField("age", "INTEGER", mode="REQUIRED") config.schema = [full_name, age] self.assertEqual(config.schema, [full_name, age]) @@ -1971,29 +1842,29 @@ def test_props_set_by_server(self): CREATED = datetime.datetime(2015, 8, 11, 12, 13, 22, tzinfo=UTC) STARTED = datetime.datetime(2015, 8, 11, 13, 47, 15, tzinfo=UTC) ENDED = datetime.datetime(2015, 8, 11, 14, 47, 15, tzinfo=UTC) - FULL_JOB_ID = '%s:%s' % (self.PROJECT, self.JOB_ID) - URL = 'http://example.com/projects/%s/jobs/%s' % ( - self.PROJECT, self.JOB_ID) - EMAIL = 'phred@example.com' - ERROR_RESULT = {'debugInfo': 'DEBUG', - 'location': 'LOCATION', - 'message': 'MESSAGE', - 'reason': 'REASON'} + FULL_JOB_ID = "%s:%s" % (self.PROJECT, self.JOB_ID) + URL = "http://example.com/projects/%s/jobs/%s" % (self.PROJECT, self.JOB_ID) + EMAIL = "phred@example.com" + ERROR_RESULT = { + "debugInfo": "DEBUG", + "location": "LOCATION", + "message": "MESSAGE", + "reason": "REASON", + } client = _make_client(project=self.PROJECT) - job = self._make_one( - self.JOB_ID, [self.SOURCE1], self.TABLE_REF, client) - job._properties['etag'] = 'ETAG' - job._properties['id'] = FULL_JOB_ID - job._properties['selfLink'] = URL - job._properties['user_email'] = EMAIL - - statistics = job._properties['statistics'] = {} - statistics['creationTime'] = _millis(CREATED) - statistics['startTime'] = _millis(STARTED) - statistics['endTime'] = _millis(ENDED) - - self.assertEqual(job.etag, 'ETAG') + job = self._make_one(self.JOB_ID, [self.SOURCE1], self.TABLE_REF, client) + job._properties["etag"] = "ETAG" + job._properties["id"] = FULL_JOB_ID + job._properties["selfLink"] = URL + job._properties["user_email"] = EMAIL + + statistics = job._properties["statistics"] = {} + statistics["creationTime"] = _millis(CREATED) + statistics["startTime"] = _millis(STARTED) + statistics["endTime"] = _millis(ENDED) + + self.assertEqual(job.etag, "ETAG") self.assertEqual(job.self_link, URL) self.assertEqual(job.user_email, EMAIL) @@ -2004,30 +1875,30 @@ def test_props_set_by_server(self): # running jobs have no load stats not yet set. self.assertIsNone(job.output_bytes) - load_stats = statistics['load'] = {} - load_stats['inputFileBytes'] = 12345 - load_stats['inputFiles'] = 1 - load_stats['outputBytes'] = 23456 - load_stats['outputRows'] = 345 + load_stats = statistics["load"] = {} + load_stats["inputFileBytes"] = 12345 + load_stats["inputFiles"] = 1 + load_stats["outputBytes"] = 23456 + load_stats["outputRows"] = 345 self.assertEqual(job.input_file_bytes, 12345) self.assertEqual(job.input_files, 1) self.assertEqual(job.output_bytes, 23456) self.assertEqual(job.output_rows, 345) - status = job._properties['status'] = {} + status = job._properties["status"] = {} self.assertIsNone(job.error_result) self.assertIsNone(job.errors) self.assertIsNone(job.state) - status['errorResult'] = ERROR_RESULT - status['errors'] = [ERROR_RESULT] - status['state'] = 'STATE' + status["errorResult"] = ERROR_RESULT + status["errors"] = [ERROR_RESULT] + status["state"] = "STATE" self.assertEqual(job.error_result, ERROR_RESULT) self.assertEqual(job.errors, [ERROR_RESULT]) - self.assertEqual(job.state, 'STATE') + self.assertEqual(job.state, "STATE") def test_from_api_repr_missing_identity(self): self._setUpConstants() @@ -2041,11 +1912,8 @@ def test_from_api_repr_missing_config(self): self._setUpConstants() client = _make_client(project=self.PROJECT) RESOURCE = { - 'id': '%s:%s' % (self.PROJECT, self.JOB_ID), - 'jobReference': { - 'projectId': self.PROJECT, - 'jobId': self.JOB_ID, - } + "id": "%s:%s" % (self.PROJECT, self.JOB_ID), + "jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID}, } klass = self._get_target_class() with self.assertRaises(KeyError): @@ -2055,18 +1923,15 @@ def test_from_api_repr_bare(self): self._setUpConstants() client = _make_client(project=self.PROJECT) RESOURCE = { - 'id': self.FULL_JOB_ID, - 'jobReference': { - 'projectId': self.PROJECT, - 'jobId': self.JOB_ID, - }, - 'configuration': { - 'load': { - 'sourceUris': [self.SOURCE1], - 'destinationTable': { - 'projectId': self.PROJECT, - 'datasetId': self.DS_ID, - 'tableId': self.TABLE_ID, + "id": self.FULL_JOB_ID, + "jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID}, + "configuration": { + "load": { + "sourceUris": [self.SOURCE1], + "destinationTable": { + "projectId": self.PROJECT, + "datasetId": self.DS_ID, + "tableId": self.TABLE_ID, }, } }, @@ -2080,22 +1945,19 @@ def test_from_api_with_encryption(self): self._setUpConstants() client = _make_client(project=self.PROJECT) RESOURCE = { - 'id': self.FULL_JOB_ID, - 'jobReference': { - 'projectId': self.PROJECT, - 'jobId': self.JOB_ID, - }, - 'configuration': { - 'load': { - 'sourceUris': [self.SOURCE1], - 'destinationTable': { - 'projectId': self.PROJECT, - 'datasetId': self.DS_ID, - 'tableId': self.TABLE_ID, + "id": self.FULL_JOB_ID, + "jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID}, + "configuration": { + "load": { + "sourceUris": [self.SOURCE1], + "destinationTable": { + "projectId": self.PROJECT, + "datasetId": self.DS_ID, + "tableId": self.TABLE_ID, + }, + "destinationEncryptionConfiguration": { + "kmsKeyName": self.KMS_KEY_NAME }, - 'destinationEncryptionConfiguration': { - 'kmsKeyName': self.KMS_KEY_NAME - } } }, } @@ -2109,8 +1971,8 @@ def test_from_api_repr_w_properties(self): client = _make_client(project=self.PROJECT) RESOURCE = self._make_resource() - load_config = RESOURCE['configuration']['load'] - load_config['createDisposition'] = CreateDisposition.CREATE_IF_NEEDED + load_config = RESOURCE["configuration"]["load"] + load_config["createDisposition"] = CreateDisposition.CREATE_IF_NEEDED klass = self._get_target_class() job = klass.from_api_repr(RESOURCE, client=client) self.assertIs(job._client, client) @@ -2119,9 +1981,8 @@ def test_from_api_repr_w_properties(self): def test_begin_w_already_running(self): conn = _make_connection() client = _make_client(project=self.PROJECT, connection=conn) - job = self._make_one(self.JOB_ID, [self.SOURCE1], self.TABLE_REF, - client) - job._properties['status'] = {'state': 'RUNNING'} + job = self._make_one(self.JOB_ID, [self.SOURCE1], self.TABLE_REF, client) + job._properties["status"] = {"state": "RUNNING"} with self.assertRaises(ValueError): job._begin() @@ -2129,78 +1990,70 @@ def test_begin_w_already_running(self): def test_begin_w_bound_client(self): RESOURCE = self._make_resource() # Ensure None for missing server-set props - del RESOURCE['statistics']['creationTime'] - del RESOURCE['etag'] - del RESOURCE['selfLink'] - del RESOURCE['user_email'] + del RESOURCE["statistics"]["creationTime"] + del RESOURCE["etag"] + del RESOURCE["selfLink"] + del RESOURCE["user_email"] conn = _make_connection(RESOURCE) client = _make_client(project=self.PROJECT, connection=conn) - job = self._make_one(self.JOB_ID, [self.SOURCE1], self.TABLE_REF, - client) + job = self._make_one(self.JOB_ID, [self.SOURCE1], self.TABLE_REF, client) job._begin() conn.api_request.assert_called_once_with( - method='POST', - path='/projects/{}/jobs'.format(self.PROJECT), + method="POST", + path="/projects/{}/jobs".format(self.PROJECT), data={ - 'jobReference': { - 'projectId': self.PROJECT, - 'jobId': self.JOB_ID, - }, - 'configuration': { - 'load': { - 'sourceUris': [self.SOURCE1], - 'destinationTable': { - 'projectId': self.PROJECT, - 'datasetId': self.DS_ID, - 'tableId': self.TABLE_ID, + "jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID}, + "configuration": { + "load": { + "sourceUris": [self.SOURCE1], + "destinationTable": { + "projectId": self.PROJECT, + "datasetId": self.DS_ID, + "tableId": self.TABLE_ID, }, - }, + } }, - }) + }, + ) self._verifyResourceProperties(job, RESOURCE) def test_begin_w_autodetect(self): from google.cloud.bigquery.job import LoadJobConfig - path = '/projects/{}/jobs'.format(self.PROJECT) + path = "/projects/{}/jobs".format(self.PROJECT) resource = self._make_resource() - resource['configuration']['load']['autodetect'] = True + resource["configuration"]["load"]["autodetect"] = True # Ensure None for missing server-set props - del resource['statistics']['creationTime'] - del resource['etag'] - del resource['selfLink'] - del resource['user_email'] + del resource["statistics"]["creationTime"] + del resource["etag"] + del resource["selfLink"] + del resource["user_email"] conn = _make_connection(resource) client = _make_client(project=self.PROJECT, connection=conn) config = LoadJobConfig() config.autodetect = True - job = self._make_one(self.JOB_ID, [self.SOURCE1], self.TABLE_REF, - client, config) + job = self._make_one( + self.JOB_ID, [self.SOURCE1], self.TABLE_REF, client, config + ) job._begin() sent = { - 'jobReference': { - 'projectId': self.PROJECT, - 'jobId': self.JOB_ID, - }, - 'configuration': { - 'load': { - 'sourceUris': [self.SOURCE1], - 'destinationTable': { - 'projectId': self.PROJECT, - 'datasetId': self.DS_ID, - 'tableId': self.TABLE_ID, + "jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID}, + "configuration": { + "load": { + "sourceUris": [self.SOURCE1], + "destinationTable": { + "projectId": self.PROJECT, + "datasetId": self.DS_ID, + "tableId": self.TABLE_ID, }, - 'autodetect': True - }, + "autodetect": True, + } }, } - conn.api_request.assert_called_once_with( - method='POST', - path=path, - data=sent) + conn.api_request.assert_called_once_with(method="POST", path=path, data=sent) self._verifyResourceProperties(job, resource) def test_begin_w_alternate_client(self): @@ -2210,283 +2063,262 @@ def test_begin_w_alternate_client(self): from google.cloud.bigquery.job import WriteDisposition from google.cloud.bigquery.schema import SchemaField - PATH = '/projects/%s/jobs' % (self.PROJECT,) + PATH = "/projects/%s/jobs" % (self.PROJECT,) RESOURCE = self._make_resource(ended=True) LOAD_CONFIGURATION = { - 'sourceUris': [self.SOURCE1], - 'destinationTable': { - 'projectId': self.PROJECT, - 'datasetId': self.DS_ID, - 'tableId': self.TABLE_ID, + "sourceUris": [self.SOURCE1], + "destinationTable": { + "projectId": self.PROJECT, + "datasetId": self.DS_ID, + "tableId": self.TABLE_ID, }, - 'allowJaggedRows': True, - 'allowQuotedNewlines': True, - 'createDisposition': CreateDisposition.CREATE_NEVER, - 'encoding': 'ISO-8559-1', - 'fieldDelimiter': '|', - 'ignoreUnknownValues': True, - 'maxBadRecords': 100, - 'nullMarker': r'\N', - 'quote': "'", - 'skipLeadingRows': '1', - 'sourceFormat': 'CSV', - 'writeDisposition': WriteDisposition.WRITE_TRUNCATE, - 'schema': {'fields': [ - { - 'name': 'full_name', - 'type': 'STRING', - 'mode': 'REQUIRED', - 'description': None, - }, - { - 'name': 'age', - 'type': 'INTEGER', - 'mode': 'REQUIRED', - 'description': None, - }, - ]}, - 'schemaUpdateOptions': [ - SchemaUpdateOption.ALLOW_FIELD_ADDITION, - ], + "allowJaggedRows": True, + "allowQuotedNewlines": True, + "createDisposition": CreateDisposition.CREATE_NEVER, + "encoding": "ISO-8559-1", + "fieldDelimiter": "|", + "ignoreUnknownValues": True, + "maxBadRecords": 100, + "nullMarker": r"\N", + "quote": "'", + "skipLeadingRows": "1", + "sourceFormat": "CSV", + "writeDisposition": WriteDisposition.WRITE_TRUNCATE, + "schema": { + "fields": [ + { + "name": "full_name", + "type": "STRING", + "mode": "REQUIRED", + "description": None, + }, + { + "name": "age", + "type": "INTEGER", + "mode": "REQUIRED", + "description": None, + }, + ] + }, + "schemaUpdateOptions": [SchemaUpdateOption.ALLOW_FIELD_ADDITION], } - RESOURCE['configuration']['load'] = LOAD_CONFIGURATION + RESOURCE["configuration"]["load"] = LOAD_CONFIGURATION conn1 = _make_connection() client1 = _make_client(project=self.PROJECT, connection=conn1) conn2 = _make_connection(RESOURCE) client2 = _make_client(project=self.PROJECT, connection=conn2) - full_name = SchemaField('full_name', 'STRING', mode='REQUIRED') - age = SchemaField('age', 'INTEGER', mode='REQUIRED') + full_name = SchemaField("full_name", "STRING", mode="REQUIRED") + age = SchemaField("age", "INTEGER", mode="REQUIRED") config = LoadJobConfig() config.schema = [full_name, age] - job = self._make_one(self.JOB_ID, [self.SOURCE1], self.TABLE_REF, - client1, config) + job = self._make_one( + self.JOB_ID, [self.SOURCE1], self.TABLE_REF, client1, config + ) config.allow_jagged_rows = True config.allow_quoted_newlines = True config.create_disposition = CreateDisposition.CREATE_NEVER - config.encoding = 'ISO-8559-1' - config.field_delimiter = '|' + config.encoding = "ISO-8559-1" + config.field_delimiter = "|" config.ignore_unknown_values = True config.max_bad_records = 100 - config.null_marker = r'\N' + config.null_marker = r"\N" config.quote_character = "'" config.skip_leading_rows = 1 - config.source_format = 'CSV' + config.source_format = "CSV" config.write_disposition = WriteDisposition.WRITE_TRUNCATE - config.schema_update_options = [ - SchemaUpdateOption.ALLOW_FIELD_ADDITION, - ] + config.schema_update_options = [SchemaUpdateOption.ALLOW_FIELD_ADDITION] job._begin(client=client2) conn1.api_request.assert_not_called() self.assertEqual(len(conn2.api_request.call_args_list), 1) req = conn2.api_request.call_args_list[0] - self.assertEqual(req[1]['method'], 'POST') - self.assertEqual(req[1]['path'], PATH) + self.assertEqual(req[1]["method"], "POST") + self.assertEqual(req[1]["path"], PATH) SENT = { - 'jobReference': { - 'projectId': self.PROJECT, - 'jobId': self.JOB_ID, - }, - 'configuration': { - 'load': LOAD_CONFIGURATION, - }, + "jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID}, + "configuration": {"load": LOAD_CONFIGURATION}, } self.maxDiff = None - self.assertEqual(req[1]['data'], SENT) + self.assertEqual(req[1]["data"], SENT) self._verifyResourceProperties(job, RESOURCE) def test_begin_w_job_reference(self): from google.cloud.bigquery import job resource = self._make_resource() - resource['jobReference']['projectId'] = 'alternative-project' - resource['jobReference']['location'] = 'US' - job_ref = job._JobReference(self.JOB_ID, 'alternative-project', 'US') + resource["jobReference"]["projectId"] = "alternative-project" + resource["jobReference"]["location"] = "US" + job_ref = job._JobReference(self.JOB_ID, "alternative-project", "US") conn = _make_connection(resource) client = _make_client(project=self.PROJECT, connection=conn) - load_job = self._make_one( - job_ref, [self.SOURCE1], self.TABLE_REF, client) + load_job = self._make_one(job_ref, [self.SOURCE1], self.TABLE_REF, client) load_job._begin() conn.api_request.assert_called_once() _, request = conn.api_request.call_args - self.assertEqual(request['method'], 'POST') - self.assertEqual( - request['path'], '/projects/alternative-project/jobs') + self.assertEqual(request["method"], "POST") + self.assertEqual(request["path"], "/projects/alternative-project/jobs") self.assertEqual( - request['data']['jobReference']['projectId'], - 'alternative-project') - self.assertEqual(request['data']['jobReference']['location'], 'US') - self.assertEqual(request['data']['jobReference']['jobId'], self.JOB_ID) + request["data"]["jobReference"]["projectId"], "alternative-project" + ) + self.assertEqual(request["data"]["jobReference"]["location"], "US") + self.assertEqual(request["data"]["jobReference"]["jobId"], self.JOB_ID) def test_exists_miss_w_bound_client(self): - PATH = '/projects/%s/jobs/%s' % (self.PROJECT, self.JOB_ID) + PATH = "/projects/%s/jobs/%s" % (self.PROJECT, self.JOB_ID) conn = _make_connection() client = _make_client(project=self.PROJECT, connection=conn) - job = self._make_one( - self.JOB_ID, [self.SOURCE1], self.TABLE_REF, client) + job = self._make_one(self.JOB_ID, [self.SOURCE1], self.TABLE_REF, client) self.assertFalse(job.exists()) conn.api_request.assert_called_once_with( - method='GET', - path=PATH, - query_params={'fields': 'id'}) + method="GET", path=PATH, query_params={"fields": "id"} + ) def test_exists_hit_w_alternate_client(self): - PATH = '/projects/%s/jobs/%s' % (self.PROJECT, self.JOB_ID) + PATH = "/projects/%s/jobs/%s" % (self.PROJECT, self.JOB_ID) conn1 = _make_connection() client1 = _make_client(project=self.PROJECT, connection=conn1) conn2 = _make_connection({}) client2 = _make_client(project=self.PROJECT, connection=conn2) - job = self._make_one( - self.JOB_ID, [self.SOURCE1], self.TABLE_REF, client1) + job = self._make_one(self.JOB_ID, [self.SOURCE1], self.TABLE_REF, client1) self.assertTrue(job.exists(client=client2)) conn1.api_request.assert_not_called() conn2.api_request.assert_called_once_with( - method='GET', - path=PATH, - query_params={'fields': 'id'}) + method="GET", path=PATH, query_params={"fields": "id"} + ) def test_exists_miss_w_job_reference(self): from google.cloud.bigquery import job - job_ref = job._JobReference('my-job-id', 'other-project', 'US') + job_ref = job._JobReference("my-job-id", "other-project", "US") conn = _make_connection() client = _make_client(project=self.PROJECT, connection=conn) - load_job = self._make_one( - job_ref, [self.SOURCE1], self.TABLE_REF, client) + load_job = self._make_one(job_ref, [self.SOURCE1], self.TABLE_REF, client) self.assertFalse(load_job.exists()) conn.api_request.assert_called_once_with( - method='GET', - path='/projects/other-project/jobs/my-job-id', - query_params={'fields': 'id', 'location': 'US'}) + method="GET", + path="/projects/other-project/jobs/my-job-id", + query_params={"fields": "id", "location": "US"}, + ) def test_reload_w_bound_client(self): - PATH = '/projects/%s/jobs/%s' % (self.PROJECT, self.JOB_ID) + PATH = "/projects/%s/jobs/%s" % (self.PROJECT, self.JOB_ID) RESOURCE = self._make_resource() conn = _make_connection(RESOURCE) client = _make_client(project=self.PROJECT, connection=conn) - job = self._make_one( - self.JOB_ID, [self.SOURCE1], self.TABLE_REF, client) + job = self._make_one(self.JOB_ID, [self.SOURCE1], self.TABLE_REF, client) job.reload() conn.api_request.assert_called_once_with( - method='GET', - path=PATH, - query_params={}) + method="GET", path=PATH, query_params={} + ) self._verifyResourceProperties(job, RESOURCE) def test_reload_w_alternate_client(self): - PATH = '/projects/%s/jobs/%s' % (self.PROJECT, self.JOB_ID) + PATH = "/projects/%s/jobs/%s" % (self.PROJECT, self.JOB_ID) RESOURCE = self._make_resource() conn1 = _make_connection() client1 = _make_client(project=self.PROJECT, connection=conn1) conn2 = _make_connection(RESOURCE) client2 = _make_client(project=self.PROJECT, connection=conn2) - job = self._make_one( - self.JOB_ID, [self.SOURCE1], self.TABLE_REF, client1) + job = self._make_one(self.JOB_ID, [self.SOURCE1], self.TABLE_REF, client1) job.reload(client=client2) conn1.api_request.assert_not_called() conn2.api_request.assert_called_once_with( - method='GET', - path=PATH, - query_params={}) + method="GET", path=PATH, query_params={} + ) self._verifyResourceProperties(job, RESOURCE) def test_reload_w_job_reference(self): from google.cloud.bigquery import job resource = self._make_resource(ended=True) - resource['jobReference']['projectId'] = 'alternative-project' - resource['jobReference']['location'] = 'US' - job_ref = job._JobReference(self.JOB_ID, 'alternative-project', 'US') + resource["jobReference"]["projectId"] = "alternative-project" + resource["jobReference"]["location"] = "US" + job_ref = job._JobReference(self.JOB_ID, "alternative-project", "US") conn = _make_connection(resource) client = _make_client(project=self.PROJECT, connection=conn) - load_job = self._make_one( - job_ref, [self.SOURCE1], self.TABLE_REF, client) + load_job = self._make_one(job_ref, [self.SOURCE1], self.TABLE_REF, client) load_job.reload() conn.api_request.assert_called_once_with( - method='GET', - path='/projects/alternative-project/jobs/{}'.format( - self.JOB_ID), - query_params={'location': 'US'}) + method="GET", + path="/projects/alternative-project/jobs/{}".format(self.JOB_ID), + query_params={"location": "US"}, + ) def test_cancel_w_bound_client(self): - PATH = '/projects/%s/jobs/%s/cancel' % (self.PROJECT, self.JOB_ID) + PATH = "/projects/%s/jobs/%s/cancel" % (self.PROJECT, self.JOB_ID) RESOURCE = self._make_resource(ended=True) - RESPONSE = {'job': RESOURCE} + RESPONSE = {"job": RESOURCE} conn = _make_connection(RESPONSE) client = _make_client(project=self.PROJECT, connection=conn) - job = self._make_one( - self.JOB_ID, [self.SOURCE1], self.TABLE_REF, client) + job = self._make_one(self.JOB_ID, [self.SOURCE1], self.TABLE_REF, client) job.cancel() conn.api_request.assert_called_once_with( - method='POST', - path=PATH, - query_params={}) + method="POST", path=PATH, query_params={} + ) self._verifyResourceProperties(job, RESOURCE) def test_cancel_w_alternate_client(self): - PATH = '/projects/%s/jobs/%s/cancel' % (self.PROJECT, self.JOB_ID) + PATH = "/projects/%s/jobs/%s/cancel" % (self.PROJECT, self.JOB_ID) RESOURCE = self._make_resource(ended=True) - RESPONSE = {'job': RESOURCE} + RESPONSE = {"job": RESOURCE} conn1 = _make_connection() client1 = _make_client(project=self.PROJECT, connection=conn1) conn2 = _make_connection(RESPONSE) client2 = _make_client(project=self.PROJECT, connection=conn2) - job = self._make_one( - self.JOB_ID, [self.SOURCE1], self.TABLE_REF, client1) + job = self._make_one(self.JOB_ID, [self.SOURCE1], self.TABLE_REF, client1) job.cancel(client=client2) conn1.api_request.assert_not_called() conn2.api_request.assert_called_once_with( - method='POST', - path=PATH, - query_params={}) + method="POST", path=PATH, query_params={} + ) self._verifyResourceProperties(job, RESOURCE) def test_cancel_w_job_reference(self): from google.cloud.bigquery import job resource = self._make_resource(ended=True) - resource['jobReference']['projectId'] = 'alternative-project' - resource['jobReference']['location'] = 'US' - job_ref = job._JobReference(self.JOB_ID, 'alternative-project', 'US') - conn = _make_connection({'job': resource}) + resource["jobReference"]["projectId"] = "alternative-project" + resource["jobReference"]["location"] = "US" + job_ref = job._JobReference(self.JOB_ID, "alternative-project", "US") + conn = _make_connection({"job": resource}) client = _make_client(project=self.PROJECT, connection=conn) - load_job = self._make_one( - job_ref, [self.SOURCE1], self.TABLE_REF, client) + load_job = self._make_one(job_ref, [self.SOURCE1], self.TABLE_REF, client) load_job.cancel() conn.api_request.assert_called_once_with( - method='POST', - path='/projects/alternative-project/jobs/{}/cancel'.format( - self.JOB_ID), - query_params={'location': 'US'}) + method="POST", + path="/projects/alternative-project/jobs/{}/cancel".format(self.JOB_ID), + query_params={"location": "US"}, + ) class TestCopyJobConfig(unittest.TestCase, _Base): - JOB_TYPE = 'copy' + JOB_TYPE = "copy" @staticmethod def _get_target_class(): from google.cloud.bigquery.job import CopyJobConfig + return CopyJobConfig def test_ctor_w_properties(self): @@ -2496,8 +2328,7 @@ def test_ctor_w_properties(self): create_disposition = CreateDisposition.CREATE_NEVER write_disposition = WriteDisposition.WRITE_TRUNCATE config = self._get_target_class()( - create_disposition=create_disposition, - write_disposition=write_disposition + create_disposition=create_disposition, write_disposition=write_disposition ) self.assertEqual(config.create_disposition, create_disposition) @@ -2508,35 +2339,33 @@ def test_to_api_repr_with_encryption(self): config = self._make_one() config.destination_encryption_configuration = EncryptionConfiguration( - kms_key_name=self.KMS_KEY_NAME) + kms_key_name=self.KMS_KEY_NAME + ) resource = config.to_api_repr() self.assertEqual( resource, { - 'copy': { - 'destinationEncryptionConfiguration': { - 'kmsKeyName': self.KMS_KEY_NAME, - }, - }, - }) + "copy": { + "destinationEncryptionConfiguration": { + "kmsKeyName": self.KMS_KEY_NAME + } + } + }, + ) def test_to_api_repr_with_encryption_none(self): config = self._make_one() config.destination_encryption_configuration = None resource = config.to_api_repr() self.assertEqual( - resource, - { - 'copy': { - 'destinationEncryptionConfiguration': None, - }, - }) + resource, {"copy": {"destinationEncryptionConfiguration": None}} + ) class TestCopyJob(unittest.TestCase, _Base): - JOB_TYPE = 'copy' - SOURCE_TABLE = 'source_table' - DESTINATION_TABLE = 'destination_table' + JOB_TYPE = "copy" + SOURCE_TABLE = "source_table" + DESTINATION_TABLE = "destination_table" @staticmethod def _get_target_class(): @@ -2545,18 +2374,19 @@ def _get_target_class(): return CopyJob def _make_resource(self, started=False, ended=False): - resource = super(TestCopyJob, self)._make_resource( - started, ended) - config = resource['configuration']['copy'] - config['sourceTables'] = [{ - 'projectId': self.PROJECT, - 'datasetId': self.DS_ID, - 'tableId': self.SOURCE_TABLE, - }] - config['destinationTable'] = { - 'projectId': self.PROJECT, - 'datasetId': self.DS_ID, - 'tableId': self.DESTINATION_TABLE, + resource = super(TestCopyJob, self)._make_resource(started, ended) + config = resource["configuration"]["copy"] + config["sourceTables"] = [ + { + "projectId": self.PROJECT, + "datasetId": self.DS_ID, + "tableId": self.SOURCE_TABLE, + } + ] + config["destinationTable"] = { + "projectId": self.PROJECT, + "datasetId": self.DS_ID, + "tableId": self.DESTINATION_TABLE, } return resource @@ -2564,39 +2394,38 @@ def _make_resource(self, started=False, ended=False): def _verifyResourceProperties(self, job, resource): self._verifyReadonlyResourceProperties(job, resource) - config = resource.get('configuration', {}).get('copy') + config = resource.get("configuration", {}).get("copy") - table_ref = config['destinationTable'] - self.assertEqual(job.destination.project, table_ref['projectId']) - self.assertEqual(job.destination.dataset_id, table_ref['datasetId']) - self.assertEqual(job.destination.table_id, table_ref['tableId']) + table_ref = config["destinationTable"] + self.assertEqual(job.destination.project, table_ref["projectId"]) + self.assertEqual(job.destination.dataset_id, table_ref["datasetId"]) + self.assertEqual(job.destination.table_id, table_ref["tableId"]) - sources = config.get('sourceTables') + sources = config.get("sourceTables") if sources is None: - sources = [config['sourceTable']] + sources = [config["sourceTable"]] self.assertEqual(len(sources), len(job.sources)) for table_ref, table in zip(sources, job.sources): - self.assertEqual(table.project, table_ref['projectId']) - self.assertEqual(table.dataset_id, table_ref['datasetId']) - self.assertEqual(table.table_id, table_ref['tableId']) + self.assertEqual(table.project, table_ref["projectId"]) + self.assertEqual(table.dataset_id, table_ref["datasetId"]) + self.assertEqual(table.table_id, table_ref["tableId"]) - if 'createDisposition' in config: - self.assertEqual(job.create_disposition, - config['createDisposition']) + if "createDisposition" in config: + self.assertEqual(job.create_disposition, config["createDisposition"]) else: self.assertIsNone(job.create_disposition) - if 'writeDisposition' in config: - self.assertEqual(job.write_disposition, - config['writeDisposition']) + if "writeDisposition" in config: + self.assertEqual(job.write_disposition, config["writeDisposition"]) else: self.assertIsNone(job.write_disposition) - if 'destinationEncryptionConfiguration' in config: + if "destinationEncryptionConfiguration" in config: self.assertIsNotNone(job.destination_encryption_configuration) self.assertEqual( job.destination_encryption_configuration.kms_key_name, - config['destinationEncryptionConfiguration']['kmsKeyName']) + config["destinationEncryptionConfiguration"]["kmsKeyName"], + ) else: self.assertIsNone(job.destination_encryption_configuration) @@ -2609,9 +2438,7 @@ def test_ctor(self): self.assertEqual(job.sources, [source]) self.assertIs(job._client, client) self.assertEqual(job.job_type, self.JOB_TYPE) - self.assertEqual( - job.path, - '/projects/%s/jobs/%s' % (self.PROJECT, self.JOB_ID)) + self.assertEqual(job.path, "/projects/%s/jobs/%s" % (self.PROJECT, self.JOB_ID)) self._verifyInitialReadonlyProperties(job) @@ -2632,11 +2459,8 @@ def test_from_api_repr_missing_config(self): self._setUpConstants() client = _make_client(project=self.PROJECT) RESOURCE = { - 'id': '%s:%s' % (self.PROJECT, self.DS_ID), - 'jobReference': { - 'projectId': self.PROJECT, - 'jobId': self.JOB_ID, - } + "id": "%s:%s" % (self.PROJECT, self.DS_ID), + "jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID}, } klass = self._get_target_class() with self.assertRaises(KeyError): @@ -2646,22 +2470,21 @@ def test_from_api_repr_bare(self): self._setUpConstants() client = _make_client(project=self.PROJECT) RESOURCE = { - 'id': self.JOB_ID, - 'jobReference': { - 'projectId': self.PROJECT, - 'jobId': self.JOB_ID, - }, - 'configuration': { - 'copy': { - 'sourceTables': [{ - 'projectId': self.PROJECT, - 'datasetId': self.DS_ID, - 'tableId': self.SOURCE_TABLE, - }], - 'destinationTable': { - 'projectId': self.PROJECT, - 'datasetId': self.DS_ID, - 'tableId': self.DESTINATION_TABLE, + "id": self.JOB_ID, + "jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID}, + "configuration": { + "copy": { + "sourceTables": [ + { + "projectId": self.PROJECT, + "datasetId": self.DS_ID, + "tableId": self.SOURCE_TABLE, + } + ], + "destinationTable": { + "projectId": self.PROJECT, + "datasetId": self.DS_ID, + "tableId": self.DESTINATION_TABLE, }, } }, @@ -2675,26 +2498,25 @@ def test_from_api_with_encryption(self): self._setUpConstants() client = _make_client(project=self.PROJECT) RESOURCE = { - 'id': self.JOB_ID, - 'jobReference': { - 'projectId': self.PROJECT, - 'jobId': self.JOB_ID, - }, - 'configuration': { - 'copy': { - 'sourceTables': [{ - 'projectId': self.PROJECT, - 'datasetId': self.DS_ID, - 'tableId': self.SOURCE_TABLE, - }], - 'destinationTable': { - 'projectId': self.PROJECT, - 'datasetId': self.DS_ID, - 'tableId': self.DESTINATION_TABLE, + "id": self.JOB_ID, + "jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID}, + "configuration": { + "copy": { + "sourceTables": [ + { + "projectId": self.PROJECT, + "datasetId": self.DS_ID, + "tableId": self.SOURCE_TABLE, + } + ], + "destinationTable": { + "projectId": self.PROJECT, + "datasetId": self.DS_ID, + "tableId": self.DESTINATION_TABLE, + }, + "destinationEncryptionConfiguration": { + "kmsKeyName": self.KMS_KEY_NAME }, - 'destinationEncryptionConfiguration': { - 'kmsKeyName': self.KMS_KEY_NAME - } } }, } @@ -2707,22 +2529,19 @@ def test_from_api_repr_w_sourcetable(self): self._setUpConstants() client = _make_client(project=self.PROJECT) RESOURCE = { - 'id': self.JOB_ID, - 'jobReference': { - 'projectId': self.PROJECT, - 'jobId': self.JOB_ID, - }, - 'configuration': { - 'copy': { - 'sourceTable': { - 'projectId': self.PROJECT, - 'datasetId': self.DS_ID, - 'tableId': self.SOURCE_TABLE, + "id": self.JOB_ID, + "jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID}, + "configuration": { + "copy": { + "sourceTable": { + "projectId": self.PROJECT, + "datasetId": self.DS_ID, + "tableId": self.SOURCE_TABLE, }, - 'destinationTable': { - 'projectId': self.PROJECT, - 'datasetId': self.DS_ID, - 'tableId': self.DESTINATION_TABLE, + "destinationTable": { + "projectId": self.PROJECT, + "datasetId": self.DS_ID, + "tableId": self.DESTINATION_TABLE, }, } }, @@ -2736,18 +2555,15 @@ def test_from_api_repr_wo_sources(self): self._setUpConstants() client = _make_client(project=self.PROJECT) RESOURCE = { - 'id': self.JOB_ID, - 'jobReference': { - 'projectId': self.PROJECT, - 'jobId': self.JOB_ID, - }, - 'configuration': { - 'copy': { - 'destinationTable': { - 'projectId': self.PROJECT, - 'datasetId': self.DS_ID, - 'tableId': self.DESTINATION_TABLE, - }, + "id": self.JOB_ID, + "jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID}, + "configuration": { + "copy": { + "destinationTable": { + "projectId": self.PROJECT, + "datasetId": self.DS_ID, + "tableId": self.DESTINATION_TABLE, + } } }, } @@ -2757,23 +2573,24 @@ def test_from_api_repr_wo_sources(self): def test_from_api_repr_w_properties(self): from google.cloud.bigquery.job import CreateDisposition + client = _make_client(project=self.PROJECT) RESOURCE = self._make_resource() - copy_config = RESOURCE['configuration']['copy'] - copy_config['createDisposition'] = CreateDisposition.CREATE_IF_NEEDED + copy_config = RESOURCE["configuration"]["copy"] + copy_config["createDisposition"] = CreateDisposition.CREATE_IF_NEEDED klass = self._get_target_class() job = klass.from_api_repr(RESOURCE, client=client) self.assertIs(job._client, client) self._verifyResourceProperties(job, RESOURCE) def test_begin_w_bound_client(self): - PATH = '/projects/%s/jobs' % (self.PROJECT,) + PATH = "/projects/%s/jobs" % (self.PROJECT,) RESOURCE = self._make_resource() # Ensure None for missing server-set props - del RESOURCE['statistics']['creationTime'] - del RESOURCE['etag'] - del RESOURCE['selfLink'] - del RESOURCE['user_email'] + del RESOURCE["statistics"]["creationTime"] + del RESOURCE["etag"] + del RESOURCE["selfLink"] + del RESOURCE["user_email"] conn = _make_connection(RESOURCE) client = _make_client(project=self.PROJECT, connection=conn) source = self._table_ref(self.SOURCE_TABLE) @@ -2783,28 +2600,28 @@ def test_begin_w_bound_client(self): job._begin() conn.api_request.assert_called_once_with( - method='POST', + method="POST", path=PATH, data={ - 'jobReference': { - 'projectId': self.PROJECT, - 'jobId': self.JOB_ID, - }, - 'configuration': { - 'copy': { - 'sourceTables': [{ - 'projectId': self.PROJECT, - 'datasetId': self.DS_ID, - 'tableId': self.SOURCE_TABLE - }], - 'destinationTable': { - 'projectId': self.PROJECT, - 'datasetId': self.DS_ID, - 'tableId': self.DESTINATION_TABLE, + "jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID}, + "configuration": { + "copy": { + "sourceTables": [ + { + "projectId": self.PROJECT, + "datasetId": self.DS_ID, + "tableId": self.SOURCE_TABLE, + } + ], + "destinationTable": { + "projectId": self.PROJECT, + "datasetId": self.DS_ID, + "tableId": self.DESTINATION_TABLE, }, - }, + } }, - }) + }, + ) self._verifyResourceProperties(job, RESOURCE) def test_begin_w_alternate_client(self): @@ -2812,23 +2629,26 @@ def test_begin_w_alternate_client(self): from google.cloud.bigquery.job import CreateDisposition from google.cloud.bigquery.job import WriteDisposition - PATH = '/projects/%s/jobs' % (self.PROJECT,) + + PATH = "/projects/%s/jobs" % (self.PROJECT,) RESOURCE = self._make_resource(ended=True) COPY_CONFIGURATION = { - 'sourceTables': [{ - 'projectId': self.PROJECT, - 'datasetId': self.DS_ID, - 'tableId': self.SOURCE_TABLE, - }], - 'destinationTable': { - 'projectId': self.PROJECT, - 'datasetId': self.DS_ID, - 'tableId': self.DESTINATION_TABLE, + "sourceTables": [ + { + "projectId": self.PROJECT, + "datasetId": self.DS_ID, + "tableId": self.SOURCE_TABLE, + } + ], + "destinationTable": { + "projectId": self.PROJECT, + "datasetId": self.DS_ID, + "tableId": self.DESTINATION_TABLE, }, - 'createDisposition': CreateDisposition.CREATE_NEVER, - 'writeDisposition': WriteDisposition.WRITE_TRUNCATE, + "createDisposition": CreateDisposition.CREATE_NEVER, + "writeDisposition": WriteDisposition.WRITE_TRUNCATE, } - RESOURCE['configuration']['copy'] = COPY_CONFIGURATION + RESOURCE["configuration"]["copy"] = COPY_CONFIGURATION conn1 = _make_connection() client1 = _make_client(project=self.PROJECT, connection=conn1) conn2 = _make_connection(RESOURCE) @@ -2838,27 +2658,22 @@ def test_begin_w_alternate_client(self): config = CopyJobConfig() config.create_disposition = CreateDisposition.CREATE_NEVER config.write_disposition = WriteDisposition.WRITE_TRUNCATE - job = self._make_one(self.JOB_ID, [source], destination, client1, - config) + job = self._make_one(self.JOB_ID, [source], destination, client1, config) job._begin(client=client2) conn1.api_request.assert_not_called() conn2.api_request.assert_called_once_with( - method='POST', + method="POST", path=PATH, data={ - 'jobReference': { - 'projectId': self.PROJECT, - 'jobId': self.JOB_ID, - }, - 'configuration': { - 'copy': COPY_CONFIGURATION, - }, - }) + "jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID}, + "configuration": {"copy": COPY_CONFIGURATION}, + }, + ) self._verifyResourceProperties(job, RESOURCE) def test_exists_miss_w_bound_client(self): - PATH = '/projects/%s/jobs/%s' % (self.PROJECT, self.JOB_ID) + PATH = "/projects/%s/jobs/%s" % (self.PROJECT, self.JOB_ID) conn = _make_connection() client = _make_client(project=self.PROJECT, connection=conn) @@ -2869,12 +2684,11 @@ def test_exists_miss_w_bound_client(self): self.assertFalse(job.exists()) conn.api_request.assert_called_once_with( - method='GET', - path=PATH, - query_params={'fields': 'id'}) + method="GET", path=PATH, query_params={"fields": "id"} + ) def test_exists_hit_w_alternate_client(self): - PATH = '/projects/%s/jobs/%s' % (self.PROJECT, self.JOB_ID) + PATH = "/projects/%s/jobs/%s" % (self.PROJECT, self.JOB_ID) conn1 = _make_connection() client1 = _make_client(project=self.PROJECT, connection=conn1) conn2 = _make_connection({}) @@ -2887,12 +2701,11 @@ def test_exists_hit_w_alternate_client(self): conn1.api_request.assert_not_called() conn2.api_request.assert_called_once_with( - method='GET', - path=PATH, - query_params={'fields': 'id'}) + method="GET", path=PATH, query_params={"fields": "id"} + ) def test_reload_w_bound_client(self): - PATH = '/projects/%s/jobs/%s' % (self.PROJECT, self.JOB_ID) + PATH = "/projects/%s/jobs/%s" % (self.PROJECT, self.JOB_ID) RESOURCE = self._make_resource() conn = _make_connection(RESOURCE) client = _make_client(project=self.PROJECT, connection=conn) @@ -2903,13 +2716,12 @@ def test_reload_w_bound_client(self): job.reload() conn.api_request.assert_called_once_with( - method='GET', - path=PATH, - query_params={}) + method="GET", path=PATH, query_params={} + ) self._verifyResourceProperties(job, RESOURCE) def test_reload_w_alternate_client(self): - PATH = '/projects/%s/jobs/%s' % (self.PROJECT, self.JOB_ID) + PATH = "/projects/%s/jobs/%s" % (self.PROJECT, self.JOB_ID) RESOURCE = self._make_resource() conn1 = _make_connection() client1 = _make_client(project=self.PROJECT, connection=conn1) @@ -2923,72 +2735,73 @@ def test_reload_w_alternate_client(self): conn1.api_request.assert_not_called() conn2.api_request.assert_called_once_with( - method='GET', - path=PATH, - query_params={}) + method="GET", path=PATH, query_params={} + ) self._verifyResourceProperties(job, RESOURCE) class TestExtractJobConfig(unittest.TestCase, _Base): - JOB_TYPE = 'extract' + JOB_TYPE = "extract" @staticmethod def _get_target_class(): from google.cloud.bigquery.job import ExtractJobConfig + return ExtractJobConfig def test_ctor_w_properties(self): - config = self._get_target_class()( - field_delimiter='\t', print_header=True) + config = self._get_target_class()(field_delimiter="\t", print_header=True) - self.assertEqual(config.field_delimiter, '\t') + self.assertEqual(config.field_delimiter, "\t") self.assertTrue(config.print_header) def test_to_api_repr(self): from google.cloud.bigquery import job + config = self._make_one() config.compression = job.Compression.SNAPPY config.destination_format = job.DestinationFormat.AVRO - config.field_delimiter = 'ignored for avro' + config.field_delimiter = "ignored for avro" config.print_header = False - config._properties['extract']['someNewField'] = 'some-value' + config._properties["extract"]["someNewField"] = "some-value" resource = config.to_api_repr() self.assertEqual( resource, { - 'extract': { - 'compression': 'SNAPPY', - 'destinationFormat': 'AVRO', - 'fieldDelimiter': 'ignored for avro', - 'printHeader': False, - 'someNewField': 'some-value', - }, - }) + "extract": { + "compression": "SNAPPY", + "destinationFormat": "AVRO", + "fieldDelimiter": "ignored for avro", + "printHeader": False, + "someNewField": "some-value", + } + }, + ) def test_from_api_repr(self): cls = self._get_target_class() config = cls.from_api_repr( { - 'extract': { - 'compression': 'NONE', - 'destinationFormat': 'CSV', - 'fieldDelimiter': '\t', - 'printHeader': True, - 'someNewField': 'some-value', - }, - }) - self.assertEqual(config.compression, 'NONE') - self.assertEqual(config.destination_format, 'CSV') - self.assertEqual(config.field_delimiter, '\t') + "extract": { + "compression": "NONE", + "destinationFormat": "CSV", + "fieldDelimiter": "\t", + "printHeader": True, + "someNewField": "some-value", + } + } + ) + self.assertEqual(config.compression, "NONE") + self.assertEqual(config.destination_format, "CSV") + self.assertEqual(config.field_delimiter, "\t") self.assertEqual(config.print_header, True) - self.assertEqual( - config._properties['extract']['someNewField'], 'some-value') + self.assertEqual(config._properties["extract"]["someNewField"], "some-value") class TestExtractJob(unittest.TestCase, _Base): - JOB_TYPE = 'extract' - SOURCE_TABLE = 'source_table' - DESTINATION_URI = 'gs://bucket_name/object_name' + JOB_TYPE = "extract" + SOURCE_TABLE = "source_table" + DESTINATION_URI = "gs://bucket_name/object_name" @staticmethod def _get_target_class(): @@ -2997,50 +2810,45 @@ def _get_target_class(): return ExtractJob def _make_resource(self, started=False, ended=False): - resource = super(TestExtractJob, self)._make_resource( - started, ended) - config = resource['configuration']['extract'] - config['sourceTable'] = { - 'projectId': self.PROJECT, - 'datasetId': self.DS_ID, - 'tableId': self.SOURCE_TABLE, + resource = super(TestExtractJob, self)._make_resource(started, ended) + config = resource["configuration"]["extract"] + config["sourceTable"] = { + "projectId": self.PROJECT, + "datasetId": self.DS_ID, + "tableId": self.SOURCE_TABLE, } - config['destinationUris'] = [self.DESTINATION_URI] + config["destinationUris"] = [self.DESTINATION_URI] return resource def _verifyResourceProperties(self, job, resource): self._verifyReadonlyResourceProperties(job, resource) - config = resource.get('configuration', {}).get('extract') + config = resource.get("configuration", {}).get("extract") - self.assertEqual(job.destination_uris, config['destinationUris']) + self.assertEqual(job.destination_uris, config["destinationUris"]) - table_ref = config['sourceTable'] - self.assertEqual(job.source.project, table_ref['projectId']) - self.assertEqual(job.source.dataset_id, table_ref['datasetId']) - self.assertEqual(job.source.table_id, table_ref['tableId']) + table_ref = config["sourceTable"] + self.assertEqual(job.source.project, table_ref["projectId"]) + self.assertEqual(job.source.dataset_id, table_ref["datasetId"]) + self.assertEqual(job.source.table_id, table_ref["tableId"]) - if 'compression' in config: - self.assertEqual( - job.compression, config['compression']) + if "compression" in config: + self.assertEqual(job.compression, config["compression"]) else: self.assertIsNone(job.compression) - if 'destinationFormat' in config: - self.assertEqual( - job.destination_format, config['destinationFormat']) + if "destinationFormat" in config: + self.assertEqual(job.destination_format, config["destinationFormat"]) else: self.assertIsNone(job.destination_format) - if 'fieldDelimiter' in config: - self.assertEqual( - job.field_delimiter, config['fieldDelimiter']) + if "fieldDelimiter" in config: + self.assertEqual(job.field_delimiter, config["fieldDelimiter"]) else: self.assertIsNone(job.field_delimiter) - if 'printHeader' in config: - self.assertEqual( - job.print_header, config['printHeader']) + if "printHeader" in config: + self.assertEqual(job.print_header, config["printHeader"]) else: self.assertIsNone(job.print_header) @@ -3049,17 +2857,14 @@ def test_ctor(self): client = _make_client(project=self.PROJECT) source = Table(self.TABLE_REF) - job = self._make_one( - self.JOB_ID, source, [self.DESTINATION_URI], client) + job = self._make_one(self.JOB_ID, source, [self.DESTINATION_URI], client) self.assertEqual(job.source.project, self.PROJECT) self.assertEqual(job.source.dataset_id, self.DS_ID) self.assertEqual(job.source.table_id, self.TABLE_ID) self.assertEqual(job.destination_uris, [self.DESTINATION_URI]) self.assertIs(job._client, client) self.assertEqual(job.job_type, self.JOB_TYPE) - self.assertEqual( - job.path, - '/projects/%s/jobs/%s' % (self.PROJECT, self.JOB_ID)) + self.assertEqual(job.path, "/projects/%s/jobs/%s" % (self.PROJECT, self.JOB_ID)) self._verifyInitialReadonlyProperties(job) @@ -3073,16 +2878,17 @@ def test_destination_uri_file_counts(self): file_counts = 23 client = _make_client(project=self.PROJECT) job = self._make_one( - self.JOB_ID, self.TABLE_REF, [self.DESTINATION_URI], client) + self.JOB_ID, self.TABLE_REF, [self.DESTINATION_URI], client + ) self.assertIsNone(job.destination_uri_file_counts) - statistics = job._properties['statistics'] = {} + statistics = job._properties["statistics"] = {} self.assertIsNone(job.destination_uri_file_counts) - extract_stats = statistics['extract'] = {} + extract_stats = statistics["extract"] = {} self.assertIsNone(job.destination_uri_file_counts) - extract_stats['destinationUriFileCounts'] = [str(file_counts)] + extract_stats["destinationUriFileCounts"] = [str(file_counts)] self.assertEqual(job.destination_uri_file_counts, [file_counts]) def test_from_api_repr_missing_identity(self): @@ -3097,11 +2903,8 @@ def test_from_api_repr_missing_config(self): self._setUpConstants() client = _make_client(project=self.PROJECT) RESOURCE = { - 'id': '%s:%s' % (self.PROJECT, self.DS_ID), - 'jobReference': { - 'projectId': self.PROJECT, - 'jobId': self.JOB_ID, - } + "id": "%s:%s" % (self.PROJECT, self.DS_ID), + "jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID}, } klass = self._get_target_class() with self.assertRaises(KeyError): @@ -3111,19 +2914,16 @@ def test_from_api_repr_bare(self): self._setUpConstants() client = _make_client(project=self.PROJECT) RESOURCE = { - 'id': self.JOB_ID, - 'jobReference': { - 'projectId': self.PROJECT, - 'jobId': self.JOB_ID, - }, - 'configuration': { - 'extract': { - 'sourceTable': { - 'projectId': self.PROJECT, - 'datasetId': self.DS_ID, - 'tableId': self.SOURCE_TABLE, + "id": self.JOB_ID, + "jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID}, + "configuration": { + "extract": { + "sourceTable": { + "projectId": self.PROJECT, + "datasetId": self.DS_ID, + "tableId": self.SOURCE_TABLE, }, - 'destinationUris': [self.DESTINATION_URI], + "destinationUris": [self.DESTINATION_URI], } }, } @@ -3134,10 +2934,11 @@ def test_from_api_repr_bare(self): def test_from_api_repr_w_properties(self): from google.cloud.bigquery.job import Compression + client = _make_client(project=self.PROJECT) RESOURCE = self._make_resource() - extract_config = RESOURCE['configuration']['extract'] - extract_config['compression'] = Compression.GZIP + extract_config = RESOURCE["configuration"]["extract"] + extract_config["compression"] = Compression.GZIP klass = self._get_target_class() job = klass.from_api_repr(RESOURCE, client=client) self.assertIs(job._client, client) @@ -3146,41 +2947,38 @@ def test_from_api_repr_w_properties(self): def test_begin_w_bound_client(self): from google.cloud.bigquery.dataset import DatasetReference - PATH = '/projects/%s/jobs' % (self.PROJECT,) + PATH = "/projects/%s/jobs" % (self.PROJECT,) RESOURCE = self._make_resource() # Ensure None for missing server-set props - del RESOURCE['statistics']['creationTime'] - del RESOURCE['etag'] - del RESOURCE['selfLink'] - del RESOURCE['user_email'] + del RESOURCE["statistics"]["creationTime"] + del RESOURCE["etag"] + del RESOURCE["selfLink"] + del RESOURCE["user_email"] conn = _make_connection(RESOURCE) client = _make_client(project=self.PROJECT, connection=conn) source_dataset = DatasetReference(self.PROJECT, self.DS_ID) source = source_dataset.table(self.SOURCE_TABLE) - job = self._make_one(self.JOB_ID, source, [self.DESTINATION_URI], - client) + job = self._make_one(self.JOB_ID, source, [self.DESTINATION_URI], client) job._begin() conn.api_request.assert_called_once_with( - method='POST', + method="POST", path=PATH, data={ - 'jobReference': { - 'projectId': self.PROJECT, - 'jobId': self.JOB_ID, - }, - 'configuration': { - 'extract': { - 'sourceTable': { - 'projectId': self.PROJECT, - 'datasetId': self.DS_ID, - 'tableId': self.SOURCE_TABLE + "jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID}, + "configuration": { + "extract": { + "sourceTable": { + "projectId": self.PROJECT, + "datasetId": self.DS_ID, + "tableId": self.SOURCE_TABLE, }, - 'destinationUris': [self.DESTINATION_URI], - }, + "destinationUris": [self.DESTINATION_URI], + } }, - }) + }, + ) self._verifyResourceProperties(job, RESOURCE) def test_begin_w_alternate_client(self): @@ -3189,21 +2987,21 @@ def test_begin_w_alternate_client(self): from google.cloud.bigquery.job import DestinationFormat from google.cloud.bigquery.job import ExtractJobConfig - PATH = '/projects/%s/jobs' % (self.PROJECT,) + PATH = "/projects/%s/jobs" % (self.PROJECT,) RESOURCE = self._make_resource(ended=True) EXTRACT_CONFIGURATION = { - 'sourceTable': { - 'projectId': self.PROJECT, - 'datasetId': self.DS_ID, - 'tableId': self.SOURCE_TABLE, + "sourceTable": { + "projectId": self.PROJECT, + "datasetId": self.DS_ID, + "tableId": self.SOURCE_TABLE, }, - 'destinationUris': [self.DESTINATION_URI], - 'compression': Compression.GZIP, - 'destinationFormat': DestinationFormat.NEWLINE_DELIMITED_JSON, - 'fieldDelimiter': '|', - 'printHeader': False, + "destinationUris": [self.DESTINATION_URI], + "compression": Compression.GZIP, + "destinationFormat": DestinationFormat.NEWLINE_DELIMITED_JSON, + "fieldDelimiter": "|", + "printHeader": False, } - RESOURCE['configuration']['extract'] = EXTRACT_CONFIGURATION + RESOURCE["configuration"]["extract"] = EXTRACT_CONFIGURATION conn1 = _make_connection() client1 = _make_client(project=self.PROJECT, connection=conn1) conn2 = _make_connection(RESOURCE) @@ -3213,81 +3011,78 @@ def test_begin_w_alternate_client(self): config = ExtractJobConfig() config.compression = Compression.GZIP config.destination_format = DestinationFormat.NEWLINE_DELIMITED_JSON - config.field_delimiter = '|' + config.field_delimiter = "|" config.print_header = False - job = self._make_one(self.JOB_ID, source, [self.DESTINATION_URI], - client1, config) + job = self._make_one( + self.JOB_ID, source, [self.DESTINATION_URI], client1, config + ) job._begin(client=client2) conn1.api_request.assert_not_called() conn2.api_request.assert_called_once_with( - method='POST', + method="POST", path=PATH, data={ - 'jobReference': { - 'projectId': self.PROJECT, - 'jobId': self.JOB_ID, - }, - 'configuration': { - 'extract': EXTRACT_CONFIGURATION, - }, - }) + "jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID}, + "configuration": {"extract": EXTRACT_CONFIGURATION}, + }, + ) self._verifyResourceProperties(job, RESOURCE) def test_exists_miss_w_bound_client(self): - PATH = '/projects/%s/jobs/%s' % (self.PROJECT, self.JOB_ID) + PATH = "/projects/%s/jobs/%s" % (self.PROJECT, self.JOB_ID) conn = _make_connection() client = _make_client(project=self.PROJECT, connection=conn) job = self._make_one( - self.JOB_ID, self.TABLE_REF, [self.DESTINATION_URI], client) + self.JOB_ID, self.TABLE_REF, [self.DESTINATION_URI], client + ) self.assertFalse(job.exists()) conn.api_request.assert_called_once_with( - method='GET', - path=PATH, - query_params={'fields': 'id'}) + method="GET", path=PATH, query_params={"fields": "id"} + ) def test_exists_hit_w_alternate_client(self): - PATH = '/projects/%s/jobs/%s' % (self.PROJECT, self.JOB_ID) + PATH = "/projects/%s/jobs/%s" % (self.PROJECT, self.JOB_ID) conn1 = _make_connection() client1 = _make_client(project=self.PROJECT, connection=conn1) conn2 = _make_connection({}) client2 = _make_client(project=self.PROJECT, connection=conn2) job = self._make_one( - self.JOB_ID, self.TABLE_REF, [self.DESTINATION_URI], client1) + self.JOB_ID, self.TABLE_REF, [self.DESTINATION_URI], client1 + ) self.assertTrue(job.exists(client=client2)) conn1.api_request.assert_not_called() conn2.api_request.assert_called_once_with( - method='GET', - path=PATH, - query_params={'fields': 'id'}) + method="GET", path=PATH, query_params={"fields": "id"} + ) def test_reload_w_bound_client(self): from google.cloud.bigquery.dataset import DatasetReference - PATH = '/projects/%s/jobs/%s' % (self.PROJECT, self.JOB_ID) + PATH = "/projects/%s/jobs/%s" % (self.PROJECT, self.JOB_ID) RESOURCE = self._make_resource() conn = _make_connection(RESOURCE) client = _make_client(project=self.PROJECT, connection=conn) source_dataset = DatasetReference(self.PROJECT, self.DS_ID) source = source_dataset.table(self.SOURCE_TABLE) - job = self._make_one(self.JOB_ID, source, [self.DESTINATION_URI], - client) + job = self._make_one(self.JOB_ID, source, [self.DESTINATION_URI], client) job.reload() conn.api_request.assert_called_once_with( - method='GET', path=PATH, query_params={}) + method="GET", path=PATH, query_params={} + ) self._verifyResourceProperties(job, RESOURCE) def test_reload_w_alternate_client(self): from google.cloud.bigquery.dataset import DatasetReference - PATH = '/projects/%s/jobs/%s' % (self.PROJECT, self.JOB_ID) + PATH = "/projects/%s/jobs/%s" % (self.PROJECT, self.JOB_ID) RESOURCE = self._make_resource() conn1 = _make_connection() client1 = _make_client(project=self.PROJECT, connection=conn1) @@ -3295,14 +3090,14 @@ def test_reload_w_alternate_client(self): client2 = _make_client(project=self.PROJECT, connection=conn2) source_dataset = DatasetReference(self.PROJECT, self.DS_ID) source = source_dataset.table(self.SOURCE_TABLE) - job = self._make_one(self.JOB_ID, source, [self.DESTINATION_URI], - client1) + job = self._make_one(self.JOB_ID, source, [self.DESTINATION_URI], client1) job.reload(client=client2) conn1.api_request.assert_not_called() conn2.api_request.assert_called_once_with( - method='GET', path=PATH, query_params={}) + method="GET", path=PATH, query_params={} + ) self._verifyResourceProperties(job, RESOURCE) @@ -3318,7 +3113,7 @@ def _make_one(self, *args, **kw): def test_ctor(self): config = self._make_one() - self.assertEqual(config._properties, {'query': {}}) + self.assertEqual(config._properties, {"query": {}}) def test_ctor_w_none(self): config = self._make_one() @@ -3328,8 +3123,7 @@ def test_ctor_w_none(self): self.assertIsNone(config.destination) def test_ctor_w_properties(self): - config = self._get_target_class()( - use_query_cache=False, use_legacy_sql=True) + config = self._get_target_class()(use_query_cache=False, use_legacy_sql=True) self.assertFalse(config.use_query_cache) self.assertTrue(config.use_legacy_sql) @@ -3338,22 +3132,22 @@ def test_time_partitioning(self): from google.cloud.bigquery import table time_partitioning = table.TimePartitioning( - type_=table.TimePartitioningType.DAY, field='name') + type_=table.TimePartitioningType.DAY, field="name" + ) config = self._make_one() config.time_partitioning = time_partitioning # TimePartitioning should be configurable after assigning time_partitioning.expiration_ms = 10000 - self.assertEqual( - config.time_partitioning.type_, table.TimePartitioningType.DAY) - self.assertEqual(config.time_partitioning.field, 'name') + self.assertEqual(config.time_partitioning.type_, table.TimePartitioningType.DAY) + self.assertEqual(config.time_partitioning.field, "name") self.assertEqual(config.time_partitioning.expiration_ms, 10000) config.time_partitioning = None self.assertIsNone(config.time_partitioning) def test_clustering_fields(self): - fields = ['email', 'postal_code'] + fields = ["email", "postal_code"] config = self._get_target_class()() config.clustering_fields = fields self.assertEqual(config.clustering_fields, fields) @@ -3374,16 +3168,16 @@ def test_from_api_repr_normal(self): from google.cloud.bigquery.dataset import DatasetReference resource = { - 'query': { - 'useLegacySql': True, - 'query': 'no property for me', - 'defaultDataset': { - 'projectId': 'someproject', - 'datasetId': 'somedataset', + "query": { + "useLegacySql": True, + "query": "no property for me", + "defaultDataset": { + "projectId": "someproject", + "datasetId": "somedataset", }, - 'someNewProperty': 'I should be saved, too.', + "someNewProperty": "I should be saved, too.", }, - 'dryRun': True, + "dryRun": True, } klass = self._get_target_class() @@ -3391,85 +3185,81 @@ def test_from_api_repr_normal(self): self.assertTrue(config.use_legacy_sql) self.assertEqual( - config.default_dataset, - DatasetReference('someproject', 'somedataset')) + config.default_dataset, DatasetReference("someproject", "somedataset") + ) self.assertTrue(config.dry_run) # Make sure unknown properties propagate. + self.assertEqual(config._properties["query"]["query"], "no property for me") self.assertEqual( - config._properties['query']['query'], 'no property for me') - self.assertEqual( - config._properties['query']['someNewProperty'], - 'I should be saved, too.') + config._properties["query"]["someNewProperty"], "I should be saved, too." + ) def test_to_api_repr_normal(self): from google.cloud.bigquery.dataset import DatasetReference config = self._make_one() config.use_legacy_sql = True - config.default_dataset = DatasetReference( - 'someproject', 'somedataset') + config.default_dataset = DatasetReference("someproject", "somedataset") config.dry_run = False - config._properties['someNewProperty'] = 'Woohoo, alpha stuff.' + config._properties["someNewProperty"] = "Woohoo, alpha stuff." resource = config.to_api_repr() - self.assertFalse(resource['dryRun']) - self.assertTrue(resource['query']['useLegacySql']) + self.assertFalse(resource["dryRun"]) + self.assertTrue(resource["query"]["useLegacySql"]) self.assertEqual( - resource['query']['defaultDataset']['projectId'], 'someproject') + resource["query"]["defaultDataset"]["projectId"], "someproject" + ) self.assertEqual( - resource['query']['defaultDataset']['datasetId'], 'somedataset') + resource["query"]["defaultDataset"]["datasetId"], "somedataset" + ) # Make sure unknown properties propagate. - self.assertEqual( - resource['someNewProperty'], 'Woohoo, alpha stuff.') + self.assertEqual(resource["someNewProperty"], "Woohoo, alpha stuff.") def test_to_api_repr_with_encryption(self): from google.cloud.bigquery.table import EncryptionConfiguration config = self._make_one() config.destination_encryption_configuration = EncryptionConfiguration( - kms_key_name=self.KMS_KEY_NAME) + kms_key_name=self.KMS_KEY_NAME + ) resource = config.to_api_repr() self.assertEqual( - resource, { - 'query': { - 'destinationEncryptionConfiguration': { - 'kmsKeyName': self.KMS_KEY_NAME, - }, - }, - }) + resource, + { + "query": { + "destinationEncryptionConfiguration": { + "kmsKeyName": self.KMS_KEY_NAME + } + } + }, + ) def test_to_api_repr_with_encryption_none(self): config = self._make_one() config.destination_encryption_configuration = None resource = config.to_api_repr() self.assertEqual( - resource, - { - 'query': { - 'destinationEncryptionConfiguration': None, - }, - }) + resource, {"query": {"destinationEncryptionConfiguration": None}} + ) def test_from_api_repr_with_encryption(self): resource = { - 'query': { - 'destinationEncryptionConfiguration': { - 'kmsKeyName': self.KMS_KEY_NAME, - }, - }, + "query": { + "destinationEncryptionConfiguration": {"kmsKeyName": self.KMS_KEY_NAME} + } } klass = self._get_target_class() config = klass.from_api_repr(resource) self.assertEqual( - config.destination_encryption_configuration.kms_key_name, - self.KMS_KEY_NAME) + config.destination_encryption_configuration.kms_key_name, self.KMS_KEY_NAME + ) class TestQueryJob(unittest.TestCase, _Base): - JOB_TYPE = 'query' - QUERY = 'select count(*) from persons' - DESTINATION_TABLE = 'destination_table' + JOB_TYPE = "query" + QUERY = "select count(*) from persons" + DESTINATION_TABLE = "destination_table" @staticmethod def _get_target_class(): @@ -3478,71 +3268,66 @@ def _get_target_class(): return QueryJob def _make_resource(self, started=False, ended=False): - resource = super(TestQueryJob, self)._make_resource( - started, ended) - config = resource['configuration']['query'] - config['query'] = self.QUERY + resource = super(TestQueryJob, self)._make_resource(started, ended) + config = resource["configuration"]["query"] + config["query"] = self.QUERY if ended: - resource['status'] = {'state': 'DONE'} + resource["status"] = {"state": "DONE"} return resource def _verifyBooleanResourceProperties(self, job, config): - if 'allowLargeResults' in config: - self.assertEqual(job.allow_large_results, - config['allowLargeResults']) + if "allowLargeResults" in config: + self.assertEqual(job.allow_large_results, config["allowLargeResults"]) else: self.assertIsNone(job.allow_large_results) - if 'flattenResults' in config: - self.assertEqual(job.flatten_results, - config['flattenResults']) + if "flattenResults" in config: + self.assertEqual(job.flatten_results, config["flattenResults"]) else: self.assertIsNone(job.flatten_results) - if 'useQueryCache' in config: - self.assertEqual(job.use_query_cache, - config['useQueryCache']) + if "useQueryCache" in config: + self.assertEqual(job.use_query_cache, config["useQueryCache"]) else: self.assertIsNone(job.use_query_cache) - if 'useLegacySql' in config: - self.assertEqual(job.use_legacy_sql, - config['useLegacySql']) + if "useLegacySql" in config: + self.assertEqual(job.use_legacy_sql, config["useLegacySql"]) else: self.assertIsNone(job.use_legacy_sql) def _verifyIntegerResourceProperties(self, job, config): - if 'maximumBillingTier' in config: - self.assertEqual( - job.maximum_billing_tier, config['maximumBillingTier']) + if "maximumBillingTier" in config: + self.assertEqual(job.maximum_billing_tier, config["maximumBillingTier"]) else: self.assertIsNone(job.maximum_billing_tier) - if 'maximumBytesBilled' in config: + if "maximumBytesBilled" in config: self.assertEqual( - str(job.maximum_bytes_billed), config['maximumBytesBilled']) + str(job.maximum_bytes_billed), config["maximumBytesBilled"] + ) self.assertIsInstance(job.maximum_bytes_billed, int) else: self.assertIsNone(job.maximum_bytes_billed) def _verify_udf_resources(self, job, config): - udf_resources = config.get('userDefinedFunctionResources', ()) + udf_resources = config.get("userDefinedFunctionResources", ()) self.assertEqual(len(job.udf_resources), len(udf_resources)) for found, expected in zip(job.udf_resources, udf_resources): - if 'resourceUri' in expected: - self.assertEqual(found.udf_type, 'resourceUri') - self.assertEqual(found.value, expected['resourceUri']) + if "resourceUri" in expected: + self.assertEqual(found.udf_type, "resourceUri") + self.assertEqual(found.value, expected["resourceUri"]) else: - self.assertEqual(found.udf_type, 'inlineCode') - self.assertEqual(found.value, expected['inlineCode']) + self.assertEqual(found.udf_type, "inlineCode") + self.assertEqual(found.value, expected["inlineCode"]) def _verifyQueryParameters(self, job, config): - query_parameters = config.get('queryParameters', ()) + query_parameters = config.get("queryParameters", ()) self.assertEqual(len(job.query_parameters), len(query_parameters)) for found, expected in zip(job.query_parameters, query_parameters): self.assertEqual(found.to_api_repr(), expected) def _verify_table_definitions(self, job, config): - table_defs = config.get('tableDefinitions') + table_defs = config.get("tableDefinitions") if job.table_definitions is None: self.assertIsNone(table_defs) else: @@ -3553,71 +3338,65 @@ def _verify_table_definitions(self, job, config): self.assertEqual(found_ec.to_api_repr(), expected_ec) def _verify_configuration_properties(self, job, configuration): - if 'dryRun' in configuration: - self.assertEqual(job.dry_run, - configuration['dryRun']) + if "dryRun" in configuration: + self.assertEqual(job.dry_run, configuration["dryRun"]) else: self.assertIsNone(job.dry_run) def _verifyResourceProperties(self, job, resource): self._verifyReadonlyResourceProperties(job, resource) - configuration = resource.get('configuration', {}) + configuration = resource.get("configuration", {}) self._verify_configuration_properties(job, configuration) - query_config = resource.get('configuration', {}).get('query') + query_config = resource.get("configuration", {}).get("query") self._verifyBooleanResourceProperties(job, query_config) self._verifyIntegerResourceProperties(job, query_config) self._verify_udf_resources(job, query_config) self._verifyQueryParameters(job, query_config) self._verify_table_definitions(job, query_config) - self.assertEqual(job.query, query_config['query']) - if 'createDisposition' in query_config: - self.assertEqual(job.create_disposition, - query_config['createDisposition']) + self.assertEqual(job.query, query_config["query"]) + if "createDisposition" in query_config: + self.assertEqual(job.create_disposition, query_config["createDisposition"]) else: self.assertIsNone(job.create_disposition) - if 'defaultDataset' in query_config: + if "defaultDataset" in query_config: ds_ref = job.default_dataset - ds_ref = { - 'projectId': ds_ref.project, - 'datasetId': ds_ref.dataset_id, - } - self.assertEqual(ds_ref, query_config['defaultDataset']) + ds_ref = {"projectId": ds_ref.project, "datasetId": ds_ref.dataset_id} + self.assertEqual(ds_ref, query_config["defaultDataset"]) else: self.assertIsNone(job.default_dataset) - if 'destinationTable' in query_config: + if "destinationTable" in query_config: table = job.destination tb_ref = { - 'projectId': table.project, - 'datasetId': table.dataset_id, - 'tableId': table.table_id + "projectId": table.project, + "datasetId": table.dataset_id, + "tableId": table.table_id, } - self.assertEqual(tb_ref, query_config['destinationTable']) + self.assertEqual(tb_ref, query_config["destinationTable"]) else: self.assertIsNone(job.destination) - if 'priority' in query_config: - self.assertEqual(job.priority, - query_config['priority']) + if "priority" in query_config: + self.assertEqual(job.priority, query_config["priority"]) else: self.assertIsNone(job.priority) - if 'writeDisposition' in query_config: - self.assertEqual(job.write_disposition, - query_config['writeDisposition']) + if "writeDisposition" in query_config: + self.assertEqual(job.write_disposition, query_config["writeDisposition"]) else: self.assertIsNone(job.write_disposition) - if 'destinationEncryptionConfiguration' in query_config: + if "destinationEncryptionConfiguration" in query_config: self.assertIsNotNone(job.destination_encryption_configuration) self.assertEqual( job.destination_encryption_configuration.kms_key_name, - query_config['destinationEncryptionConfiguration'][ - 'kmsKeyName']) + query_config["destinationEncryptionConfiguration"]["kmsKeyName"], + ) else: self.assertIsNone(job.destination_encryption_configuration) - if 'schemaUpdateOptions' in query_config: + if "schemaUpdateOptions" in query_config: self.assertEqual( - job.schema_update_options, query_config['schemaUpdateOptions']) + job.schema_update_options, query_config["schemaUpdateOptions"] + ) else: self.assertIsNone(job.schema_update_options) @@ -3627,9 +3406,7 @@ def test_ctor_defaults(self): self.assertEqual(job.query, self.QUERY) self.assertIs(job._client, client) self.assertEqual(job.job_type, self.JOB_TYPE) - self.assertEqual( - job.path, - '/projects/%s/jobs/%s' % (self.PROJECT, self.JOB_ID)) + self.assertEqual(job.path, "/projects/%s/jobs/%s" % (self.PROJECT, self.JOB_ID)) self._verifyInitialReadonlyProperties(job) @@ -3657,24 +3434,22 @@ def test_ctor_w_udf_resources(self): from google.cloud.bigquery.job import QueryJobConfig from google.cloud.bigquery.query import UDFResource - RESOURCE_URI = 'gs://some-bucket/js/lib.js' + RESOURCE_URI = "gs://some-bucket/js/lib.js" udf_resources = [UDFResource("resourceUri", RESOURCE_URI)] client = _make_client(project=self.PROJECT) config = QueryJobConfig() config.udf_resources = udf_resources - job = self._make_one( - self.JOB_ID, self.QUERY, client, job_config=config) + job = self._make_one(self.JOB_ID, self.QUERY, client, job_config=config) self.assertEqual(job.udf_resources, udf_resources) def test_ctor_w_query_parameters(self): from google.cloud.bigquery.job import QueryJobConfig from google.cloud.bigquery.query import ScalarQueryParameter - query_parameters = [ScalarQueryParameter("foo", 'INT64', 123)] + query_parameters = [ScalarQueryParameter("foo", "INT64", 123)] client = _make_client(project=self.PROJECT) config = QueryJobConfig(query_parameters=query_parameters) - job = self._make_one( - self.JOB_ID, self.QUERY, client, job_config=config) + job = self._make_one(self.JOB_ID, self.QUERY, client, job_config=config) self.assertEqual(job.query_parameters, query_parameters) def test_from_api_repr_missing_identity(self): @@ -3689,11 +3464,8 @@ def test_from_api_repr_missing_config(self): self._setUpConstants() client = _make_client(project=self.PROJECT) RESOURCE = { - 'id': '%s:%s' % (self.PROJECT, self.DS_ID), - 'jobReference': { - 'projectId': self.PROJECT, - 'jobId': self.JOB_ID, - } + "id": "%s:%s" % (self.PROJECT, self.DS_ID), + "jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID}, } klass = self._get_target_class() with self.assertRaises(KeyError): @@ -3703,14 +3475,9 @@ def test_from_api_repr_bare(self): self._setUpConstants() client = _make_client(project=self.PROJECT) RESOURCE = { - 'id': self.JOB_ID, - 'jobReference': { - 'projectId': self.PROJECT, - 'jobId': self.JOB_ID, - }, - 'configuration': { - 'query': {'query': self.QUERY}, - }, + "id": self.JOB_ID, + "jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID}, + "configuration": {"query": {"query": self.QUERY}}, } klass = self._get_target_class() job = klass.from_api_repr(RESOURCE, client=client) @@ -3721,18 +3488,15 @@ def test_from_api_repr_with_encryption(self): self._setUpConstants() client = _make_client(project=self.PROJECT) RESOURCE = { - 'id': self.JOB_ID, - 'jobReference': { - 'projectId': self.PROJECT, - 'jobId': self.JOB_ID, - }, - 'configuration': { - 'query': { - 'query': self.QUERY, - 'destinationEncryptionConfiguration': { - 'kmsKeyName': self.KMS_KEY_NAME - } - }, + "id": self.JOB_ID, + "jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID}, + "configuration": { + "query": { + "query": self.QUERY, + "destinationEncryptionConfiguration": { + "kmsKeyName": self.KMS_KEY_NAME + }, + } }, } klass = self._get_target_class() @@ -3747,17 +3511,15 @@ def test_from_api_repr_w_properties(self): client = _make_client(project=self.PROJECT) RESOURCE = self._make_resource() - query_config = RESOURCE['configuration']['query'] - query_config['createDisposition'] = CreateDisposition.CREATE_IF_NEEDED - query_config['writeDisposition'] = WriteDisposition.WRITE_TRUNCATE - query_config['destinationTable'] = { - 'projectId': self.PROJECT, - 'datasetId': self.DS_ID, - 'tableId': self.DESTINATION_TABLE, + query_config = RESOURCE["configuration"]["query"] + query_config["createDisposition"] = CreateDisposition.CREATE_IF_NEEDED + query_config["writeDisposition"] = WriteDisposition.WRITE_TRUNCATE + query_config["destinationTable"] = { + "projectId": self.PROJECT, + "datasetId": self.DS_ID, + "tableId": self.DESTINATION_TABLE, } - query_config['schemaUpdateOptions'] = [ - SchemaUpdateOption.ALLOW_FIELD_ADDITION, - ] + query_config["schemaUpdateOptions"] = [SchemaUpdateOption.ALLOW_FIELD_ADDITION] klass = self._get_target_class() job = klass.from_api_repr(RESOURCE, client=client) self.assertIs(job._client, client) @@ -3766,11 +3528,9 @@ def test_from_api_repr_w_properties(self): def test_cancelled(self): client = _make_client(project=self.PROJECT) job = self._make_one(self.JOB_ID, self.QUERY, client) - job._properties['status'] = { - 'state': 'DONE', - 'errorResult': { - 'reason': 'stopped' - } + job._properties["status"] = { + "state": "DONE", + "errorResult": {"reason": "stopped"}, } self.assertTrue(job.cancelled()) @@ -3786,113 +3546,102 @@ def test_query_plan(self): from google.cloud.bigquery.job import QueryPlanEntry from google.cloud.bigquery.job import QueryPlanEntryStep - plan_entries = [{ - 'name': 'NAME', - 'id': '1234', - 'inputStages': ['88', '101'], - 'startMs': '1522540800000', - 'endMs': '1522540804000', - 'parallelInputs': '1000', - 'completedParallelInputs': '5', - 'waitMsAvg': '33', - 'waitMsMax': '400', - 'waitRatioAvg': 2.71828, - 'waitRatioMax': 3.14159, - 'readMsAvg': '45', - 'readMsMax': '90', - 'readRatioAvg': 1.41421, - 'readRatioMax': 1.73205, - 'computeMsAvg': '55', - 'computeMsMax': '99', - 'computeRatioAvg': 0.69315, - 'computeRatioMax': 1.09861, - 'writeMsAvg': '203', - 'writeMsMax': '340', - 'writeRatioAvg': 3.32193, - 'writeRatioMax': 2.30258, - 'recordsRead': '100', - 'recordsWritten': '1', - 'status': 'STATUS', - 'shuffleOutputBytes': '1024', - 'shuffleOutputBytesSpilled': '1', - 'steps': [{ - 'kind': 'KIND', - 'substeps': ['SUBSTEP1', 'SUBSTEP2'], - }], - }] + plan_entries = [ + { + "name": "NAME", + "id": "1234", + "inputStages": ["88", "101"], + "startMs": "1522540800000", + "endMs": "1522540804000", + "parallelInputs": "1000", + "completedParallelInputs": "5", + "waitMsAvg": "33", + "waitMsMax": "400", + "waitRatioAvg": 2.71828, + "waitRatioMax": 3.14159, + "readMsAvg": "45", + "readMsMax": "90", + "readRatioAvg": 1.41421, + "readRatioMax": 1.73205, + "computeMsAvg": "55", + "computeMsMax": "99", + "computeRatioAvg": 0.69315, + "computeRatioMax": 1.09861, + "writeMsAvg": "203", + "writeMsMax": "340", + "writeRatioAvg": 3.32193, + "writeRatioMax": 2.30258, + "recordsRead": "100", + "recordsWritten": "1", + "status": "STATUS", + "shuffleOutputBytes": "1024", + "shuffleOutputBytesSpilled": "1", + "steps": [{"kind": "KIND", "substeps": ["SUBSTEP1", "SUBSTEP2"]}], + } + ] client = _make_client(project=self.PROJECT) job = self._make_one(self.JOB_ID, self.QUERY, client) self.assertEqual(job.query_plan, []) - statistics = job._properties['statistics'] = {} + statistics = job._properties["statistics"] = {} self.assertEqual(job.query_plan, []) - query_stats = statistics['query'] = {} + query_stats = statistics["query"] = {} self.assertEqual(job.query_plan, []) - query_stats['queryPlan'] = plan_entries + query_stats["queryPlan"] = plan_entries self.assertEqual(len(job.query_plan), len(plan_entries)) for found, expected in zip(job.query_plan, plan_entries): self.assertIsInstance(found, QueryPlanEntry) - self.assertEqual(found.name, expected['name']) - self.assertEqual(found.entry_id, expected['id']) - self.assertEqual( - len(found.input_stages), - len(expected['inputStages'])) + self.assertEqual(found.name, expected["name"]) + self.assertEqual(found.entry_id, expected["id"]) + self.assertEqual(len(found.input_stages), len(expected["inputStages"])) for f_id in found.input_stages: - self.assertIn(f_id, [int(e) for e in expected['inputStages']]) - self.assertEqual( - found.start.strftime(_RFC3339_MICROS), - '2018-04-01T00:00:00.000000Z') + self.assertIn(f_id, [int(e) for e in expected["inputStages"]]) self.assertEqual( - found.end.strftime(_RFC3339_MICROS), - '2018-04-01T00:00:04.000000Z') + found.start.strftime(_RFC3339_MICROS), "2018-04-01T00:00:00.000000Z" + ) self.assertEqual( - found.parallel_inputs, - int(expected['parallelInputs'])) + found.end.strftime(_RFC3339_MICROS), "2018-04-01T00:00:04.000000Z" + ) + self.assertEqual(found.parallel_inputs, int(expected["parallelInputs"])) self.assertEqual( found.completed_parallel_inputs, - int(expected['completedParallelInputs'])) - self.assertEqual(found.wait_ms_avg, int(expected['waitMsAvg'])) - self.assertEqual(found.wait_ms_max, int(expected['waitMsMax'])) - self.assertEqual(found.wait_ratio_avg, expected['waitRatioAvg']) - self.assertEqual(found.wait_ratio_max, expected['waitRatioMax']) - self.assertEqual(found.read_ms_avg, int(expected['readMsAvg'])) - self.assertEqual(found.read_ms_max, int(expected['readMsMax'])) - self.assertEqual(found.read_ratio_avg, expected['readRatioAvg']) - self.assertEqual(found.read_ratio_max, expected['readRatioMax']) - self.assertEqual( - found.compute_ms_avg, - int(expected['computeMsAvg'])) - self.assertEqual( - found.compute_ms_max, - int(expected['computeMsMax'])) - self.assertEqual( - found.compute_ratio_avg, expected['computeRatioAvg']) + int(expected["completedParallelInputs"]), + ) + self.assertEqual(found.wait_ms_avg, int(expected["waitMsAvg"])) + self.assertEqual(found.wait_ms_max, int(expected["waitMsMax"])) + self.assertEqual(found.wait_ratio_avg, expected["waitRatioAvg"]) + self.assertEqual(found.wait_ratio_max, expected["waitRatioMax"]) + self.assertEqual(found.read_ms_avg, int(expected["readMsAvg"])) + self.assertEqual(found.read_ms_max, int(expected["readMsMax"])) + self.assertEqual(found.read_ratio_avg, expected["readRatioAvg"]) + self.assertEqual(found.read_ratio_max, expected["readRatioMax"]) + self.assertEqual(found.compute_ms_avg, int(expected["computeMsAvg"])) + self.assertEqual(found.compute_ms_max, int(expected["computeMsMax"])) + self.assertEqual(found.compute_ratio_avg, expected["computeRatioAvg"]) + self.assertEqual(found.compute_ratio_max, expected["computeRatioMax"]) + self.assertEqual(found.write_ms_avg, int(expected["writeMsAvg"])) + self.assertEqual(found.write_ms_max, int(expected["writeMsMax"])) + self.assertEqual(found.write_ratio_avg, expected["writeRatioAvg"]) + self.assertEqual(found.write_ratio_max, expected["writeRatioMax"]) + self.assertEqual(found.records_read, int(expected["recordsRead"])) + self.assertEqual(found.records_written, int(expected["recordsWritten"])) + self.assertEqual(found.status, expected["status"]) self.assertEqual( - found.compute_ratio_max, expected['computeRatioMax']) - self.assertEqual(found.write_ms_avg, int(expected['writeMsAvg'])) - self.assertEqual(found.write_ms_max, int(expected['writeMsMax'])) - self.assertEqual(found.write_ratio_avg, expected['writeRatioAvg']) - self.assertEqual(found.write_ratio_max, expected['writeRatioMax']) - self.assertEqual( - found.records_read, int(expected['recordsRead'])) - self.assertEqual( - found.records_written, int(expected['recordsWritten'])) - self.assertEqual(found.status, expected['status']) - self.assertEqual( - found.shuffle_output_bytes, - int(expected['shuffleOutputBytes'])) + found.shuffle_output_bytes, int(expected["shuffleOutputBytes"]) + ) self.assertEqual( found.shuffle_output_bytes_spilled, - int(expected['shuffleOutputBytesSpilled'])) + int(expected["shuffleOutputBytesSpilled"]), + ) - self.assertEqual(len(found.steps), len(expected['steps'])) - for f_step, e_step in zip(found.steps, expected['steps']): + self.assertEqual(len(found.steps), len(expected["steps"])) + for f_step, e_step in zip(found.steps, expected["steps"]): self.assertIsInstance(f_step, QueryPlanEntryStep) - self.assertEqual(f_step.kind, e_step['kind']) - self.assertEqual(f_step.substeps, e_step['substeps']) + self.assertEqual(f_step.kind, e_step["kind"]) + self.assertEqual(f_step.substeps, e_step["substeps"]) def test_total_bytes_processed(self): total_bytes = 1234 @@ -3900,13 +3649,13 @@ def test_total_bytes_processed(self): job = self._make_one(self.JOB_ID, self.QUERY, client) self.assertIsNone(job.total_bytes_processed) - statistics = job._properties['statistics'] = {} + statistics = job._properties["statistics"] = {} self.assertIsNone(job.total_bytes_processed) - query_stats = statistics['query'] = {} + query_stats = statistics["query"] = {} self.assertIsNone(job.total_bytes_processed) - query_stats['totalBytesProcessed'] = str(total_bytes) + query_stats["totalBytesProcessed"] = str(total_bytes) self.assertEqual(job.total_bytes_processed, total_bytes) def test_total_bytes_billed(self): @@ -3915,13 +3664,13 @@ def test_total_bytes_billed(self): job = self._make_one(self.JOB_ID, self.QUERY, client) self.assertIsNone(job.total_bytes_billed) - statistics = job._properties['statistics'] = {} + statistics = job._properties["statistics"] = {} self.assertIsNone(job.total_bytes_billed) - query_stats = statistics['query'] = {} + query_stats = statistics["query"] = {} self.assertIsNone(job.total_bytes_billed) - query_stats['totalBytesBilled'] = str(total_bytes) + query_stats["totalBytesBilled"] = str(total_bytes) self.assertEqual(job.total_bytes_billed, total_bytes) def test_billing_tier(self): @@ -3930,13 +3679,13 @@ def test_billing_tier(self): job = self._make_one(self.JOB_ID, self.QUERY, client) self.assertIsNone(job.billing_tier) - statistics = job._properties['statistics'] = {} + statistics = job._properties["statistics"] = {} self.assertIsNone(job.billing_tier) - query_stats = statistics['query'] = {} + query_stats = statistics["query"] = {} self.assertIsNone(job.billing_tier) - query_stats['billingTier'] = billing_tier + query_stats["billingTier"] = billing_tier self.assertEqual(job.billing_tier, billing_tier) def test_cache_hit(self): @@ -3944,52 +3693,52 @@ def test_cache_hit(self): job = self._make_one(self.JOB_ID, self.QUERY, client) self.assertIsNone(job.cache_hit) - statistics = job._properties['statistics'] = {} + statistics = job._properties["statistics"] = {} self.assertIsNone(job.cache_hit) - query_stats = statistics['query'] = {} + query_stats = statistics["query"] = {} self.assertIsNone(job.cache_hit) - query_stats['cacheHit'] = True + query_stats["cacheHit"] = True self.assertTrue(job.cache_hit) def test_ddl_operation_performed(self): - op = 'SKIP' + op = "SKIP" client = _make_client(project=self.PROJECT) job = self._make_one(self.JOB_ID, self.QUERY, client) self.assertIsNone(job.ddl_operation_performed) - statistics = job._properties['statistics'] = {} + statistics = job._properties["statistics"] = {} self.assertIsNone(job.ddl_operation_performed) - query_stats = statistics['query'] = {} + query_stats = statistics["query"] = {} self.assertIsNone(job.ddl_operation_performed) - query_stats['ddlOperationPerformed'] = op + query_stats["ddlOperationPerformed"] = op self.assertEqual(job.ddl_operation_performed, op) def test_ddl_target_table(self): from google.cloud.bigquery.table import TableReference ref_table = { - 'projectId': self.PROJECT, - 'datasetId': 'ddl_ds', - 'tableId': 'targettable', + "projectId": self.PROJECT, + "datasetId": "ddl_ds", + "tableId": "targettable", } client = _make_client(project=self.PROJECT) job = self._make_one(self.JOB_ID, self.QUERY, client) self.assertIsNone(job.ddl_target_table) - statistics = job._properties['statistics'] = {} + statistics = job._properties["statistics"] = {} self.assertIsNone(job.ddl_target_table) - query_stats = statistics['query'] = {} + query_stats = statistics["query"] = {} self.assertIsNone(job.ddl_target_table) - query_stats['ddlTargetTable'] = ref_table + query_stats["ddlTargetTable"] = ref_table self.assertIsInstance(job.ddl_target_table, TableReference) - self.assertEqual(job.ddl_target_table.table_id, 'targettable') - self.assertEqual(job.ddl_target_table.dataset_id, 'ddl_ds') + self.assertEqual(job.ddl_target_table.table_id, "targettable") + self.assertEqual(job.ddl_target_table.dataset_id, "ddl_ds") self.assertEqual(job.ddl_target_table.project, self.PROJECT) def test_num_dml_affected_rows(self): @@ -3998,13 +3747,13 @@ def test_num_dml_affected_rows(self): job = self._make_one(self.JOB_ID, self.QUERY, client) self.assertIsNone(job.num_dml_affected_rows) - statistics = job._properties['statistics'] = {} + statistics = job._properties["statistics"] = {} self.assertIsNone(job.num_dml_affected_rows) - query_stats = statistics['query'] = {} + query_stats = statistics["query"] = {} self.assertIsNone(job.num_dml_affected_rows) - query_stats['numDmlAffectedRows'] = str(num_rows) + query_stats["numDmlAffectedRows"] = str(num_rows) self.assertEqual(job.num_dml_affected_rows, num_rows) def test_slot_millis(self): @@ -4013,97 +3762,93 @@ def test_slot_millis(self): job = self._make_one(self.JOB_ID, self.QUERY, client) self.assertIsNone(job.slot_millis) - statistics = job._properties['statistics'] = {} + statistics = job._properties["statistics"] = {} self.assertIsNone(job.slot_millis) - query_stats = statistics['query'] = {} + query_stats = statistics["query"] = {} self.assertIsNone(job.slot_millis) - query_stats['totalSlotMs'] = millis + query_stats["totalSlotMs"] = millis self.assertEqual(job.slot_millis, millis) def test_statement_type(self): - statement_type = 'SELECT' + statement_type = "SELECT" client = _make_client(project=self.PROJECT) job = self._make_one(self.JOB_ID, self.QUERY, client) self.assertIsNone(job.statement_type) - statistics = job._properties['statistics'] = {} + statistics = job._properties["statistics"] = {} self.assertIsNone(job.statement_type) - query_stats = statistics['query'] = {} + query_stats = statistics["query"] = {} self.assertIsNone(job.statement_type) - query_stats['statementType'] = statement_type + query_stats["statementType"] = statement_type self.assertEqual(job.statement_type, statement_type) def test_referenced_tables(self): from google.cloud.bigquery.table import TableReference - ref_tables_resource = [{ - 'projectId': self.PROJECT, - 'datasetId': 'dataset', - 'tableId': 'local1', - }, { - - 'projectId': self.PROJECT, - 'datasetId': 'dataset', - 'tableId': 'local2', - }, { - - 'projectId': 'other-project-123', - 'datasetId': 'other-dataset', - 'tableId': 'other-table', - }] + ref_tables_resource = [ + {"projectId": self.PROJECT, "datasetId": "dataset", "tableId": "local1"}, + {"projectId": self.PROJECT, "datasetId": "dataset", "tableId": "local2"}, + { + "projectId": "other-project-123", + "datasetId": "other-dataset", + "tableId": "other-table", + }, + ] client = _make_client(project=self.PROJECT) job = self._make_one(self.JOB_ID, self.QUERY, client) self.assertEqual(job.referenced_tables, []) - statistics = job._properties['statistics'] = {} + statistics = job._properties["statistics"] = {} self.assertEqual(job.referenced_tables, []) - query_stats = statistics['query'] = {} + query_stats = statistics["query"] = {} self.assertEqual(job.referenced_tables, []) - query_stats['referencedTables'] = ref_tables_resource + query_stats["referencedTables"] = ref_tables_resource local1, local2, remote = job.referenced_tables self.assertIsInstance(local1, TableReference) - self.assertEqual(local1.table_id, 'local1') - self.assertEqual(local1.dataset_id, 'dataset') + self.assertEqual(local1.table_id, "local1") + self.assertEqual(local1.dataset_id, "dataset") self.assertEqual(local1.project, self.PROJECT) self.assertIsInstance(local2, TableReference) - self.assertEqual(local2.table_id, 'local2') - self.assertEqual(local2.dataset_id, 'dataset') + self.assertEqual(local2.table_id, "local2") + self.assertEqual(local2.dataset_id, "dataset") self.assertEqual(local2.project, self.PROJECT) self.assertIsInstance(remote, TableReference) - self.assertEqual(remote.table_id, 'other-table') - self.assertEqual(remote.dataset_id, 'other-dataset') - self.assertEqual(remote.project, 'other-project-123') + self.assertEqual(remote.table_id, "other-table") + self.assertEqual(remote.dataset_id, "other-dataset") + self.assertEqual(remote.project, "other-project-123") def test_timeline(self): - timeline_resource = [{ - 'elapsedMs': 1, - 'activeUnits': 22, - 'pendingUnits': 33, - 'completedUnits': 44, - 'totalSlotMs': 101, - }] + timeline_resource = [ + { + "elapsedMs": 1, + "activeUnits": 22, + "pendingUnits": 33, + "completedUnits": 44, + "totalSlotMs": 101, + } + ] client = _make_client(project=self.PROJECT) job = self._make_one(self.JOB_ID, self.QUERY, client) self.assertEqual(job.timeline, []) - statistics = job._properties['statistics'] = {} + statistics = job._properties["statistics"] = {} self.assertEqual(job.timeline, []) - query_stats = statistics['query'] = {} + query_stats = statistics["query"] = {} self.assertEqual(job.timeline, []) - query_stats['timeline'] = timeline_resource + query_stats["timeline"] = timeline_resource self.assertEqual(len(job.timeline), len(timeline_resource)) self.assertEqual(job.timeline[0].elapsed_ms, 1) @@ -4117,75 +3862,56 @@ def test_undeclared_query_parameters(self): from google.cloud.bigquery.query import ScalarQueryParameter from google.cloud.bigquery.query import StructQueryParameter - undeclared = [{ - 'name': 'my_scalar', - 'parameterType': { - 'type': 'STRING', - }, - 'parameterValue': { - 'value': 'value', + undeclared = [ + { + "name": "my_scalar", + "parameterType": {"type": "STRING"}, + "parameterValue": {"value": "value"}, }, - }, { - 'name': 'my_array', - 'parameterType': { - 'type': 'ARRAY', - 'arrayType': { - 'type': 'INT64', + { + "name": "my_array", + "parameterType": {"type": "ARRAY", "arrayType": {"type": "INT64"}}, + "parameterValue": { + "arrayValues": [{"value": "1066"}, {"value": "1745"}] }, }, - 'parameterValue': { - 'arrayValues': [ - {'value': '1066'}, - {'value': '1745'}, - ], - }, - }, { - 'name': 'my_struct', - 'parameterType': { - 'type': 'STRUCT', - 'structTypes': [{ - 'name': 'count', - 'type': { - 'type': 'INT64', - } - }], - }, - 'parameterValue': { - 'structValues': { - 'count': { - 'value': '123', - }, - } + { + "name": "my_struct", + "parameterType": { + "type": "STRUCT", + "structTypes": [{"name": "count", "type": {"type": "INT64"}}], + }, + "parameterValue": {"structValues": {"count": {"value": "123"}}}, }, - }] + ] client = _make_client(project=self.PROJECT) job = self._make_one(self.JOB_ID, self.QUERY, client) self.assertEqual(job.undeclared_query_parameters, []) - statistics = job._properties['statistics'] = {} + statistics = job._properties["statistics"] = {} self.assertEqual(job.undeclared_query_parameters, []) - query_stats = statistics['query'] = {} + query_stats = statistics["query"] = {} self.assertEqual(job.undeclared_query_parameters, []) - query_stats['undeclaredQueryParameters'] = undeclared + query_stats["undeclaredQueryParameters"] = undeclared scalar, array, struct = job.undeclared_query_parameters self.assertIsInstance(scalar, ScalarQueryParameter) - self.assertEqual(scalar.name, 'my_scalar') - self.assertEqual(scalar.type_, 'STRING') - self.assertEqual(scalar.value, 'value') + self.assertEqual(scalar.name, "my_scalar") + self.assertEqual(scalar.type_, "STRING") + self.assertEqual(scalar.value, "value") self.assertIsInstance(array, ArrayQueryParameter) - self.assertEqual(array.name, 'my_array') - self.assertEqual(array.array_type, 'INT64') + self.assertEqual(array.name, "my_array") + self.assertEqual(array.array_type, "INT64") self.assertEqual(array.values, [1066, 1745]) self.assertIsInstance(struct, StructQueryParameter) - self.assertEqual(struct.name, 'my_struct') - self.assertEqual(struct.struct_types, {'count': 'INT64'}) - self.assertEqual(struct.struct_values, {'count': 123}) + self.assertEqual(struct.name, "my_struct") + self.assertEqual(struct.struct_types, {"count": "INT64"}) + self.assertEqual(struct.struct_values, {"count": 123}) def test_estimated_bytes_processed(self): est_bytes = 123456 @@ -4194,23 +3920,20 @@ def test_estimated_bytes_processed(self): job = self._make_one(self.JOB_ID, self.QUERY, client) self.assertIsNone(job.estimated_bytes_processed) - statistics = job._properties['statistics'] = {} + statistics = job._properties["statistics"] = {} self.assertIsNone(job.estimated_bytes_processed) - query_stats = statistics['query'] = {} + query_stats = statistics["query"] = {} self.assertIsNone(job.estimated_bytes_processed) - query_stats['estimatedBytesProcessed'] = str(est_bytes) + query_stats["estimatedBytesProcessed"] = str(est_bytes) self.assertEqual(job.estimated_bytes_processed, est_bytes) def test_result(self): query_resource = { - 'jobComplete': True, - 'jobReference': { - 'projectId': self.PROJECT, - 'jobId': self.JOB_ID, - }, - 'schema': {'fields': [{'name': 'col1', 'type': 'STRING'}]}, + "jobComplete": True, + "jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID}, + "schema": {"fields": [{"name": "col1", "type": "STRING"}]}, } connection = _make_connection(query_resource, query_resource) client = _make_client(self.PROJECT, connection=connection) @@ -4224,12 +3947,9 @@ def test_result(self): def test_result_w_empty_schema(self): # Destination table may have no schema for some DDL and DML queries. query_resource = { - 'jobComplete': True, - 'jobReference': { - 'projectId': self.PROJECT, - 'jobId': self.JOB_ID, - }, - 'schema': {'fields': []}, + "jobComplete": True, + "jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID}, + "schema": {"fields": []}, } connection = _make_connection(query_resource, query_resource) client = _make_client(self.PROJECT, connection=connection) @@ -4243,20 +3963,21 @@ def test_result_w_empty_schema(self): def test_result_invokes_begins(self): begun_resource = self._make_resource() incomplete_resource = { - 'jobComplete': False, - 'jobReference': { - 'projectId': self.PROJECT, - 'jobId': self.JOB_ID, - }, - 'schema': {'fields': [{'name': 'col1', 'type': 'STRING'}]}, + "jobComplete": False, + "jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID}, + "schema": {"fields": [{"name": "col1", "type": "STRING"}]}, } query_resource = copy.deepcopy(incomplete_resource) - query_resource['jobComplete'] = True + query_resource["jobComplete"] = True done_resource = copy.deepcopy(begun_resource) - done_resource['status'] = {'state': 'DONE'} + done_resource["status"] = {"state": "DONE"} connection = _make_connection( - begun_resource, incomplete_resource, query_resource, done_resource, - query_resource) + begun_resource, + incomplete_resource, + query_resource, + done_resource, + query_resource, + ) client = _make_client(project=self.PROJECT, connection=connection) job = self._make_one(self.JOB_ID, self.QUERY, client) @@ -4266,24 +3987,20 @@ def test_result_invokes_begins(self): begin_request = connection.api_request.call_args_list[0] query_request = connection.api_request.call_args_list[2] reload_request = connection.api_request.call_args_list[3] - self.assertEqual(begin_request[1]['method'], 'POST') - self.assertEqual(query_request[1]['method'], 'GET') - self.assertEqual(reload_request[1]['method'], 'GET') + self.assertEqual(begin_request[1]["method"], "POST") + self.assertEqual(query_request[1]["method"], "GET") + self.assertEqual(reload_request[1]["method"], "GET") def test_result_w_timeout(self): begun_resource = self._make_resource() query_resource = { - 'jobComplete': True, - 'jobReference': { - 'projectId': self.PROJECT, - 'jobId': self.JOB_ID, - }, - 'schema': {'fields': [{'name': 'col1', 'type': 'STRING'}]}, + "jobComplete": True, + "jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID}, + "schema": {"fields": [{"name": "col1", "type": "STRING"}]}, } done_resource = copy.deepcopy(begun_resource) - done_resource['status'] = {'state': 'DONE'} - connection = _make_connection( - begun_resource, query_resource, done_resource) + done_resource["status"] = {"state": "DONE"} + connection = _make_connection(begun_resource, query_resource, done_resource) client = _make_client(project=self.PROJECT, connection=connection) job = self._make_one(self.JOB_ID, self.QUERY, client) @@ -4293,13 +4010,14 @@ def test_result_w_timeout(self): begin_request = connection.api_request.call_args_list[0] query_request = connection.api_request.call_args_list[1] reload_request = connection.api_request.call_args_list[2] - self.assertEqual(begin_request[1]['method'], 'POST') - self.assertEqual(query_request[1]['method'], 'GET') + self.assertEqual(begin_request[1]["method"], "POST") + self.assertEqual(query_request[1]["method"], "GET") self.assertEqual( - query_request[1]['path'], - '/projects/{}/queries/{}'.format(self.PROJECT, self.JOB_ID)) - self.assertEqual(query_request[1]['query_params']['timeoutMs'], 900) - self.assertEqual(reload_request[1]['method'], 'GET') + query_request[1]["path"], + "/projects/{}/queries/{}".format(self.PROJECT, self.JOB_ID), + ) + self.assertEqual(query_request[1]["query_params"]["timeoutMs"], 900) + self.assertEqual(reload_request[1]["method"], "GET") def test_result_error(self): from google.cloud import exceptions @@ -4307,15 +4025,15 @@ def test_result_error(self): client = _make_client(project=self.PROJECT) job = self._make_one(self.JOB_ID, self.QUERY, client) error_result = { - 'debugInfo': 'DEBUG', - 'location': 'LOCATION', - 'message': 'MESSAGE', - 'reason': 'invalid' + "debugInfo": "DEBUG", + "location": "LOCATION", + "message": "MESSAGE", + "reason": "invalid", } - job._properties['status'] = { - 'errorResult': error_result, - 'errors': [error_result], - 'state': 'DONE' + job._properties["status"] = { + "errorResult": error_result, + "errors": [error_result], + "state": "DONE", } job._set_future_result() @@ -4329,45 +4047,42 @@ def test_begin_w_bound_client(self): from google.cloud.bigquery.dataset import DatasetReference from google.cloud.bigquery.job import QueryJobConfig - PATH = '/projects/%s/jobs' % (self.PROJECT,) - DS_ID = 'DATASET' + PATH = "/projects/%s/jobs" % (self.PROJECT,) + DS_ID = "DATASET" RESOURCE = self._make_resource() # Ensure None for missing server-set props - del RESOURCE['statistics']['creationTime'] - del RESOURCE['etag'] - del RESOURCE['selfLink'] - del RESOURCE['user_email'] + del RESOURCE["statistics"]["creationTime"] + del RESOURCE["etag"] + del RESOURCE["selfLink"] + del RESOURCE["user_email"] conn = _make_connection(RESOURCE) client = _make_client(project=self.PROJECT, connection=conn) config = QueryJobConfig() config.default_dataset = DatasetReference(self.PROJECT, DS_ID) - job = self._make_one( - self.JOB_ID, self.QUERY, client, job_config=config) + job = self._make_one(self.JOB_ID, self.QUERY, client, job_config=config) job._begin() self.assertIsNone(job.default_dataset) self.assertEqual(job.udf_resources, []) conn.api_request.assert_called_once_with( - method='POST', + method="POST", path=PATH, data={ - 'jobReference': { - 'projectId': self.PROJECT, - 'jobId': self.JOB_ID, - }, - 'configuration': { - 'query': { - 'query': self.QUERY, - 'useLegacySql': False, - 'defaultDataset': { - 'projectId': self.PROJECT, - 'datasetId': DS_ID, + "jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID}, + "configuration": { + "query": { + "query": self.QUERY, + "useLegacySql": False, + "defaultDataset": { + "projectId": self.PROJECT, + "datasetId": DS_ID, }, - }, + } }, - }) + }, + ) self._verifyResourceProperties(job, RESOURCE) def test_begin_w_alternate_client(self): @@ -4378,36 +4093,31 @@ def test_begin_w_alternate_client(self): from google.cloud.bigquery.job import SchemaUpdateOption from google.cloud.bigquery.job import WriteDisposition - PATH = '/projects/%s/jobs' % (self.PROJECT,) - TABLE = 'TABLE' - DS_ID = 'DATASET' + PATH = "/projects/%s/jobs" % (self.PROJECT,) + TABLE = "TABLE" + DS_ID = "DATASET" RESOURCE = self._make_resource(ended=True) QUERY_CONFIGURATION = { - 'query': self.QUERY, - 'allowLargeResults': True, - 'createDisposition': CreateDisposition.CREATE_NEVER, - 'defaultDataset': { - 'projectId': self.PROJECT, - 'datasetId': DS_ID, - }, - 'destinationTable': { - 'projectId': self.PROJECT, - 'datasetId': DS_ID, - 'tableId': TABLE, + "query": self.QUERY, + "allowLargeResults": True, + "createDisposition": CreateDisposition.CREATE_NEVER, + "defaultDataset": {"projectId": self.PROJECT, "datasetId": DS_ID}, + "destinationTable": { + "projectId": self.PROJECT, + "datasetId": DS_ID, + "tableId": TABLE, }, - 'flattenResults': True, - 'priority': QueryPriority.INTERACTIVE, - 'useQueryCache': True, - 'useLegacySql': True, - 'writeDisposition': WriteDisposition.WRITE_TRUNCATE, - 'maximumBillingTier': 4, - 'maximumBytesBilled': '123456', - 'schemaUpdateOptions': [ - SchemaUpdateOption.ALLOW_FIELD_RELAXATION, - ] + "flattenResults": True, + "priority": QueryPriority.INTERACTIVE, + "useQueryCache": True, + "useLegacySql": True, + "writeDisposition": WriteDisposition.WRITE_TRUNCATE, + "maximumBillingTier": 4, + "maximumBytesBilled": "123456", + "schemaUpdateOptions": [SchemaUpdateOption.ALLOW_FIELD_RELAXATION], } - RESOURCE['configuration']['query'] = QUERY_CONFIGURATION - RESOURCE['configuration']['dryRun'] = True + RESOURCE["configuration"]["query"] = QUERY_CONFIGURATION + RESOURCE["configuration"]["dryRun"] = True conn1 = _make_connection() client1 = _make_client(project=self.PROJECT, connection=conn1) conn2 = _make_connection(RESOURCE) @@ -4428,46 +4138,38 @@ def test_begin_w_alternate_client(self): config.use_query_cache = True config.write_disposition = WriteDisposition.WRITE_TRUNCATE config.maximum_bytes_billed = 123456 - config.schema_update_options = [ - SchemaUpdateOption.ALLOW_FIELD_RELAXATION, - ] - job = self._make_one( - self.JOB_ID, self.QUERY, client1, job_config=config) + config.schema_update_options = [SchemaUpdateOption.ALLOW_FIELD_RELAXATION] + job = self._make_one(self.JOB_ID, self.QUERY, client1, job_config=config) job._begin(client=client2) conn1.api_request.assert_not_called() conn2.api_request.assert_called_once_with( - method='POST', + method="POST", path=PATH, data={ - 'jobReference': { - 'projectId': self.PROJECT, - 'jobId': self.JOB_ID, - }, - 'configuration': { - 'dryRun': True, - 'query': QUERY_CONFIGURATION, - }, - }) + "jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID}, + "configuration": {"dryRun": True, "query": QUERY_CONFIGURATION}, + }, + ) self._verifyResourceProperties(job, RESOURCE) def test_begin_w_udf(self): from google.cloud.bigquery.job import QueryJobConfig from google.cloud.bigquery.query import UDFResource - RESOURCE_URI = 'gs://some-bucket/js/lib.js' + RESOURCE_URI = "gs://some-bucket/js/lib.js" INLINE_UDF_CODE = 'var someCode = "here";' - PATH = '/projects/%s/jobs' % (self.PROJECT,) + PATH = "/projects/%s/jobs" % (self.PROJECT,) RESOURCE = self._make_resource() # Ensure None for missing server-set props - del RESOURCE['statistics']['creationTime'] - del RESOURCE['etag'] - del RESOURCE['selfLink'] - del RESOURCE['user_email'] - RESOURCE['configuration']['query']['userDefinedFunctionResources'] = [ - {'resourceUri': RESOURCE_URI}, - {'inlineCode': INLINE_UDF_CODE}, + del RESOURCE["statistics"]["creationTime"] + del RESOURCE["etag"] + del RESOURCE["selfLink"] + del RESOURCE["user_email"] + RESOURCE["configuration"]["query"]["userDefinedFunctionResources"] = [ + {"resourceUri": RESOURCE_URI}, + {"inlineCode": INLINE_UDF_CODE}, ] conn = _make_connection(RESOURCE) client = _make_client(project=self.PROJECT, connection=conn) @@ -4478,138 +4180,118 @@ def test_begin_w_udf(self): config = QueryJobConfig() config.udf_resources = udf_resources config.use_legacy_sql = True - job = self._make_one( - self.JOB_ID, self.QUERY, client, job_config=config) + job = self._make_one(self.JOB_ID, self.QUERY, client, job_config=config) job._begin() self.assertEqual(job.udf_resources, udf_resources) conn.api_request.assert_called_once_with( - method='POST', + method="POST", path=PATH, data={ - 'jobReference': { - 'projectId': self.PROJECT, - 'jobId': self.JOB_ID, - }, - 'configuration': { - 'query': { - 'query': self.QUERY, - 'useLegacySql': True, - 'userDefinedFunctionResources': [ - {'resourceUri': RESOURCE_URI}, - {'inlineCode': INLINE_UDF_CODE}, - ] - }, + "jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID}, + "configuration": { + "query": { + "query": self.QUERY, + "useLegacySql": True, + "userDefinedFunctionResources": [ + {"resourceUri": RESOURCE_URI}, + {"inlineCode": INLINE_UDF_CODE}, + ], + } }, - }) + }, + ) self._verifyResourceProperties(job, RESOURCE) def test_begin_w_named_query_parameter(self): from google.cloud.bigquery.job import QueryJobConfig from google.cloud.bigquery.query import ScalarQueryParameter - query_parameters = [ScalarQueryParameter('foo', 'INT64', 123)] - PATH = '/projects/%s/jobs' % (self.PROJECT,) + query_parameters = [ScalarQueryParameter("foo", "INT64", 123)] + PATH = "/projects/%s/jobs" % (self.PROJECT,) RESOURCE = self._make_resource() # Ensure None for missing server-set props - del RESOURCE['statistics']['creationTime'] - del RESOURCE['etag'] - del RESOURCE['selfLink'] - del RESOURCE['user_email'] - config = RESOURCE['configuration']['query'] - config['parameterMode'] = 'NAMED' - config['queryParameters'] = [ + del RESOURCE["statistics"]["creationTime"] + del RESOURCE["etag"] + del RESOURCE["selfLink"] + del RESOURCE["user_email"] + config = RESOURCE["configuration"]["query"] + config["parameterMode"] = "NAMED" + config["queryParameters"] = [ { - 'name': 'foo', - 'parameterType': { - 'type': 'INT64', - }, - 'parameterValue': { - 'value': '123', - }, - }, + "name": "foo", + "parameterType": {"type": "INT64"}, + "parameterValue": {"value": "123"}, + } ] conn = _make_connection(RESOURCE) client = _make_client(project=self.PROJECT, connection=conn) jconfig = QueryJobConfig() jconfig.query_parameters = query_parameters - job = self._make_one( - self.JOB_ID, self.QUERY, client, job_config=jconfig) + job = self._make_one(self.JOB_ID, self.QUERY, client, job_config=jconfig) job._begin() self.assertEqual(job.query_parameters, query_parameters) conn.api_request.assert_called_once_with( - method='POST', + method="POST", path=PATH, data={ - 'jobReference': { - 'projectId': self.PROJECT, - 'jobId': self.JOB_ID, - }, - 'configuration': { - 'query': { - 'query': self.QUERY, - 'useLegacySql': False, - 'parameterMode': 'NAMED', - 'queryParameters': config['queryParameters'], - }, + "jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID}, + "configuration": { + "query": { + "query": self.QUERY, + "useLegacySql": False, + "parameterMode": "NAMED", + "queryParameters": config["queryParameters"], + } }, - }) + }, + ) self._verifyResourceProperties(job, RESOURCE) def test_begin_w_positional_query_parameter(self): from google.cloud.bigquery.job import QueryJobConfig from google.cloud.bigquery.query import ScalarQueryParameter - query_parameters = [ScalarQueryParameter.positional('INT64', 123)] - PATH = '/projects/%s/jobs' % (self.PROJECT,) + query_parameters = [ScalarQueryParameter.positional("INT64", 123)] + PATH = "/projects/%s/jobs" % (self.PROJECT,) RESOURCE = self._make_resource() # Ensure None for missing server-set props - del RESOURCE['statistics']['creationTime'] - del RESOURCE['etag'] - del RESOURCE['selfLink'] - del RESOURCE['user_email'] - config = RESOURCE['configuration']['query'] - config['parameterMode'] = 'POSITIONAL' - config['queryParameters'] = [ - { - 'parameterType': { - 'type': 'INT64', - }, - 'parameterValue': { - 'value': '123', - }, - }, + del RESOURCE["statistics"]["creationTime"] + del RESOURCE["etag"] + del RESOURCE["selfLink"] + del RESOURCE["user_email"] + config = RESOURCE["configuration"]["query"] + config["parameterMode"] = "POSITIONAL" + config["queryParameters"] = [ + {"parameterType": {"type": "INT64"}, "parameterValue": {"value": "123"}} ] conn = _make_connection(RESOURCE) client = _make_client(project=self.PROJECT, connection=conn) jconfig = QueryJobConfig() jconfig.query_parameters = query_parameters - job = self._make_one( - self.JOB_ID, self.QUERY, client, job_config=jconfig) + job = self._make_one(self.JOB_ID, self.QUERY, client, job_config=jconfig) job._begin() self.assertEqual(job.query_parameters, query_parameters) conn.api_request.assert_called_once_with( - method='POST', + method="POST", path=PATH, data={ - 'jobReference': { - 'projectId': self.PROJECT, - 'jobId': self.JOB_ID, - }, - 'configuration': { - 'query': { - 'query': self.QUERY, - 'useLegacySql': False, - 'parameterMode': 'POSITIONAL', - 'queryParameters': config['queryParameters'], - }, + "jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID}, + "configuration": { + "query": { + "query": self.QUERY, + "useLegacySql": False, + "parameterMode": "POSITIONAL", + "queryParameters": config["queryParameters"], + } }, - }) + }, + ) self._verifyResourceProperties(job, RESOURCE) def test_begin_w_table_defs(self): @@ -4618,47 +4300,44 @@ def test_begin_w_table_defs(self): from google.cloud.bigquery.external_config import BigtableColumn from google.cloud.bigquery.external_config import BigtableColumnFamily - PATH = '/projects/%s/jobs' % (self.PROJECT,) + PATH = "/projects/%s/jobs" % (self.PROJECT,) RESOURCE = self._make_resource() # Ensure None for missing server-set props - del RESOURCE['statistics']['creationTime'] - del RESOURCE['etag'] - del RESOURCE['selfLink'] - del RESOURCE['user_email'] + del RESOURCE["statistics"]["creationTime"] + del RESOURCE["etag"] + del RESOURCE["selfLink"] + del RESOURCE["user_email"] - bt_config = ExternalConfig('BIGTABLE') + bt_config = ExternalConfig("BIGTABLE") bt_config.ignore_unknown_values = True bt_config.options.read_rowkey_as_string = True cf = BigtableColumnFamily() - cf.family_id = 'cf' + cf.family_id = "cf" col = BigtableColumn() - col.field_name = 'fn' + col.field_name = "fn" cf.columns = [col] bt_config.options.column_families = [cf] BT_CONFIG_RESOURCE = { - 'sourceFormat': 'BIGTABLE', - 'ignoreUnknownValues': True, - 'bigtableOptions': { - 'readRowkeyAsString': True, - 'columnFamilies': [{ - 'familyId': 'cf', - 'columns': [{'fieldName': 'fn'}], - }], + "sourceFormat": "BIGTABLE", + "ignoreUnknownValues": True, + "bigtableOptions": { + "readRowkeyAsString": True, + "columnFamilies": [ + {"familyId": "cf", "columns": [{"fieldName": "fn"}]} + ], }, } CSV_CONFIG_RESOURCE = { - 'sourceFormat': 'CSV', - 'maxBadRecords': 8, - 'csvOptions': { - 'allowJaggedRows': True, - }, + "sourceFormat": "CSV", + "maxBadRecords": 8, + "csvOptions": {"allowJaggedRows": True}, } - csv_config = ExternalConfig('CSV') + csv_config = ExternalConfig("CSV") csv_config.max_bad_records = 8 csv_config.options.allow_jagged_rows = True - bt_table = 'bigtable-table' - csv_table = 'csv-table' - RESOURCE['configuration']['query']['tableDefinitions'] = { + bt_table = "bigtable-table" + csv_table = "csv-table" + RESOURCE["configuration"]["query"]["tableDefinitions"] = { bt_table: BT_CONFIG_RESOURCE, csv_table: CSV_CONFIG_RESOURCE, } @@ -4666,77 +4345,65 @@ def test_begin_w_table_defs(self): conn = _make_connection(RESOURCE) client = _make_client(project=self.PROJECT, connection=conn) config = QueryJobConfig() - config.table_definitions = { - bt_table: bt_config, - csv_table: csv_config, - } + config.table_definitions = {bt_table: bt_config, csv_table: csv_config} config.use_legacy_sql = True - job = self._make_one( - self.JOB_ID, self.QUERY, client, job_config=config) + job = self._make_one(self.JOB_ID, self.QUERY, client, job_config=config) job._begin() conn.api_request.assert_called_once_with( - method='POST', + method="POST", path=PATH, data={ - 'jobReference': { - 'projectId': self.PROJECT, - 'jobId': self.JOB_ID, - }, - 'configuration': { - 'query': { - 'query': self.QUERY, - 'useLegacySql': True, - 'tableDefinitions': { + "jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID}, + "configuration": { + "query": { + "query": self.QUERY, + "useLegacySql": True, + "tableDefinitions": { bt_table: BT_CONFIG_RESOURCE, csv_table: CSV_CONFIG_RESOURCE, }, - }, + } }, - }) + }, + ) self._verifyResourceProperties(job, want_resource) def test_dry_run_query(self): from google.cloud.bigquery.job import QueryJobConfig - PATH = '/projects/%s/jobs' % (self.PROJECT,) + PATH = "/projects/%s/jobs" % (self.PROJECT,) RESOURCE = self._make_resource() # Ensure None for missing server-set props - del RESOURCE['statistics']['creationTime'] - del RESOURCE['etag'] - del RESOURCE['selfLink'] - del RESOURCE['user_email'] - RESOURCE['configuration']['dryRun'] = True + del RESOURCE["statistics"]["creationTime"] + del RESOURCE["etag"] + del RESOURCE["selfLink"] + del RESOURCE["user_email"] + RESOURCE["configuration"]["dryRun"] = True conn = _make_connection(RESOURCE) client = _make_client(project=self.PROJECT, connection=conn) config = QueryJobConfig() config.dry_run = True - job = self._make_one( - self.JOB_ID, self.QUERY, client, job_config=config) + job = self._make_one(self.JOB_ID, self.QUERY, client, job_config=config) job._begin() self.assertEqual(job.udf_resources, []) conn.api_request.assert_called_once_with( - method='POST', + method="POST", path=PATH, data={ - 'jobReference': { - 'projectId': self.PROJECT, - 'jobId': self.JOB_ID, + "jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID}, + "configuration": { + "query": {"query": self.QUERY, "useLegacySql": False}, + "dryRun": True, }, - 'configuration': { - 'query': { - 'query': self.QUERY, - 'useLegacySql': False, - }, - 'dryRun': True, - }, - }) + }, + ) self._verifyResourceProperties(job, RESOURCE) def test_exists_miss_w_bound_client(self): - PATH = '/projects/%s/jobs/%s' % (self.PROJECT, self.JOB_ID) + PATH = "/projects/%s/jobs/%s" % (self.PROJECT, self.JOB_ID) conn = _make_connection() client = _make_client(project=self.PROJECT, connection=conn) job = self._make_one(self.JOB_ID, self.QUERY, client) @@ -4744,12 +4411,11 @@ def test_exists_miss_w_bound_client(self): self.assertFalse(job.exists()) conn.api_request.assert_called_once_with( - method='GET', - path=PATH, - query_params={'fields': 'id'}) + method="GET", path=PATH, query_params={"fields": "id"} + ) def test_exists_hit_w_alternate_client(self): - PATH = '/projects/%s/jobs/%s' % (self.PROJECT, self.JOB_ID) + PATH = "/projects/%s/jobs/%s" % (self.PROJECT, self.JOB_ID) conn1 = _make_connection() client1 = _make_client(project=self.PROJECT, connection=conn1) conn2 = _make_connection({}) @@ -4760,17 +4426,16 @@ def test_exists_hit_w_alternate_client(self): conn1.api_request.assert_not_called() conn2.api_request.assert_called_once_with( - method='GET', - path=PATH, - query_params={'fields': 'id'}) + method="GET", path=PATH, query_params={"fields": "id"} + ) def test_reload_w_bound_client(self): from google.cloud.bigquery.dataset import DatasetReference from google.cloud.bigquery.job import QueryJobConfig - PATH = '/projects/%s/jobs/%s' % (self.PROJECT, self.JOB_ID) - DS_ID = 'DATASET' - DEST_TABLE = 'dest_table' + PATH = "/projects/%s/jobs/%s" % (self.PROJECT, self.JOB_ID) + DS_ID = "DATASET" + DEST_TABLE = "dest_table" RESOURCE = self._make_resource() conn = _make_connection(RESOURCE) client = _make_client(project=self.PROJECT, connection=conn) @@ -4785,19 +4450,20 @@ def test_reload_w_bound_client(self): self.assertNotEqual(job.destination, table_ref) conn.api_request.assert_called_once_with( - method='GET', path=PATH, query_params={}) + method="GET", path=PATH, query_params={} + ) self._verifyResourceProperties(job, RESOURCE) def test_reload_w_alternate_client(self): - PATH = '/projects/%s/jobs/%s' % (self.PROJECT, self.JOB_ID) - DS_ID = 'DATASET' - DEST_TABLE = 'dest_table' + PATH = "/projects/%s/jobs/%s" % (self.PROJECT, self.JOB_ID) + DS_ID = "DATASET" + DEST_TABLE = "dest_table" RESOURCE = self._make_resource() - q_config = RESOURCE['configuration']['query'] - q_config['destinationTable'] = { - 'projectId': self.PROJECT, - 'datasetId': DS_ID, - 'tableId': DEST_TABLE, + q_config = RESOURCE["configuration"]["query"] + q_config["destinationTable"] = { + "projectId": self.PROJECT, + "datasetId": DS_ID, + "tableId": DEST_TABLE, } conn1 = _make_connection() client1 = _make_client(project=self.PROJECT, connection=conn1) @@ -4809,36 +4475,35 @@ def test_reload_w_alternate_client(self): conn1.api_request.assert_not_called() conn2.api_request.assert_called_once_with( - method='GET', path=PATH, query_params={}) + method="GET", path=PATH, query_params={} + ) self._verifyResourceProperties(job, RESOURCE) - @unittest.skipIf(pandas is None, 'Requires `pandas`') + @unittest.skipIf(pandas is None, "Requires `pandas`") def test_to_dataframe(self): begun_resource = self._make_resource() query_resource = { - 'jobComplete': True, - 'jobReference': { - 'projectId': self.PROJECT, - 'jobId': self.JOB_ID, + "jobComplete": True, + "jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID}, + "totalRows": "4", + "schema": { + "fields": [ + {"name": "name", "type": "STRING", "mode": "NULLABLE"}, + {"name": "age", "type": "INTEGER", "mode": "NULLABLE"}, + ] }, - 'totalRows': '4', - 'schema': { - 'fields': [ - {'name': 'name', 'type': 'STRING', 'mode': 'NULLABLE'}, - {'name': 'age', 'type': 'INTEGER', 'mode': 'NULLABLE'}, - ], - }, - 'rows': [ - {'f': [{'v': 'Phred Phlyntstone'}, {'v': '32'}]}, - {'f': [{'v': 'Bharney Rhubble'}, {'v': '33'}]}, - {'f': [{'v': 'Wylma Phlyntstone'}, {'v': '29'}]}, - {'f': [{'v': 'Bhettye Rhubble'}, {'v': '27'}]}, + "rows": [ + {"f": [{"v": "Phred Phlyntstone"}, {"v": "32"}]}, + {"f": [{"v": "Bharney Rhubble"}, {"v": "33"}]}, + {"f": [{"v": "Wylma Phlyntstone"}, {"v": "29"}]}, + {"f": [{"v": "Bhettye Rhubble"}, {"v": "27"}]}, ], } done_resource = copy.deepcopy(begun_resource) - done_resource['status'] = {'state': 'DONE'} + done_resource["status"] = {"state": "DONE"} connection = _make_connection( - begun_resource, query_resource, done_resource, query_resource) + begun_resource, query_resource, done_resource, query_resource + ) client = _make_client(project=self.PROJECT, connection=connection) job = self._make_one(self.JOB_ID, self.QUERY, client) @@ -4846,25 +4511,21 @@ def test_to_dataframe(self): self.assertIsInstance(df, pandas.DataFrame) self.assertEqual(len(df), 4) # verify the number of rows - self.assertEqual(list(df), ['name', 'age']) # verify the column names + self.assertEqual(list(df), ["name", "age"]) # verify the column names def test_iter(self): import types begun_resource = self._make_resource() query_resource = { - 'jobComplete': True, - 'jobReference': { - 'projectId': self.PROJECT, - 'jobId': self.JOB_ID, - }, - 'totalRows': '0', - 'schema': {'fields': [{'name': 'col1', 'type': 'STRING'}]}, + "jobComplete": True, + "jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID}, + "totalRows": "0", + "schema": {"fields": [{"name": "col1", "type": "STRING"}]}, } done_resource = copy.deepcopy(begun_resource) - done_resource['status'] = {'state': 'DONE'} - connection = _make_connection( - begun_resource, query_resource, done_resource) + done_resource["status"] = {"state": "DONE"} + connection = _make_connection(begun_resource, query_resource, done_resource) client = _make_client(project=self.PROJECT, connection=connection) job = self._make_one(self.JOB_ID, self.QUERY, client) @@ -4872,8 +4533,8 @@ def test_iter(self): class TestQueryPlanEntryStep(unittest.TestCase, _Base): - KIND = 'KIND' - SUBSTEPS = ('SUB1', 'SUB2') + KIND = "KIND" + SUBSTEPS = ("SUB1", "SUB2") @staticmethod def _get_target_class(): @@ -4896,10 +4557,7 @@ def test_from_api_repr_empty(self): self.assertEqual(step.substeps, []) def test_from_api_repr_normal(self): - resource = { - 'kind': self.KIND, - 'substeps': self.SUBSTEPS, - } + resource = {"kind": self.KIND, "substeps": self.SUBSTEPS} klass = self._get_target_class() step = klass.from_api_repr(resource) self.assertEqual(step.kind, self.KIND) @@ -4911,7 +4569,7 @@ def test___eq___mismatched_type(self): def test___eq___mismatch_kind(self): step = self._make_one(self.KIND, self.SUBSTEPS) - other = self._make_one('OTHER', self.SUBSTEPS) + other = self._make_one("OTHER", self.SUBSTEPS) self.assertNotEqual(step, other) def test___eq___mismatch_substeps(self): @@ -4926,11 +4584,11 @@ def test___eq___hit(self): def test___eq___wrong_type(self): step = self._make_one(self.KIND, self.SUBSTEPS) - self.assertFalse(step == 'hello') + self.assertFalse(step == "hello") class TestQueryPlanEntry(unittest.TestCase, _Base): - NAME = 'NAME' + NAME = "NAME" ENTRY_ID = 1234 START_MS = 1522540800000 END_MS = 1522540804000 @@ -4955,12 +4613,12 @@ class TestQueryPlanEntry(unittest.TestCase, _Base): WRITE_RATIO_MAX = 2.30258 RECORDS_READ = 100 RECORDS_WRITTEN = 1 - STATUS = 'STATUS' + STATUS = "STATUS" SHUFFLE_OUTPUT_BYTES = 1024 SHUFFLE_OUTPUT_BYTES_SPILLED = 1 - START_RFC3339_MICROS = '2018-04-01T00:00:00.000000Z' - END_RFC3339_MICROS = '2018-04-01T00:00:04.000000Z' + START_RFC3339_MICROS = "2018-04-01T00:00:00.000000Z" + END_RFC3339_MICROS = "2018-04-01T00:00:04.000000Z" @staticmethod def _get_target_class(): @@ -5006,40 +4664,45 @@ def test_from_api_repr_empty(self): def test_from_api_repr_normal(self): from google.cloud.bigquery.job import QueryPlanEntryStep - steps = [QueryPlanEntryStep( - kind=TestQueryPlanEntryStep.KIND, - substeps=TestQueryPlanEntryStep.SUBSTEPS)] + steps = [ + QueryPlanEntryStep( + kind=TestQueryPlanEntryStep.KIND, + substeps=TestQueryPlanEntryStep.SUBSTEPS, + ) + ] resource = { - 'name': self.NAME, - 'id': self.ENTRY_ID, - 'inputStages': self.INPUT_STAGES, - 'startMs': self.START_MS, - 'endMs': self.END_MS, - 'waitMsAvg': self.WAIT_MS_AVG, - 'waitMsMax': self.WAIT_MS_MAX, - 'waitRatioAvg': self.WAIT_RATIO_AVG, - 'waitRatioMax': self.WAIT_RATIO_MAX, - 'readMsAvg': self.READ_MS_AVG, - 'readMsMax': self.READ_MS_MAX, - 'readRatioAvg': self.READ_RATIO_AVG, - 'readRatioMax': self.READ_RATIO_MAX, - 'computeMsAvg': self.COMPUTE_MS_AVG, - 'computeMsMax': self.COMPUTE_MS_MAX, - 'computeRatioAvg': self.COMPUTE_RATIO_AVG, - 'computeRatioMax': self.COMPUTE_RATIO_MAX, - 'writeMsAvg': self.WRITE_MS_AVG, - 'writeMsMax': self.WRITE_MS_MAX, - 'writeRatioAvg': self.WRITE_RATIO_AVG, - 'writeRatioMax': self.WRITE_RATIO_MAX, - 'recordsRead': self.RECORDS_READ, - 'recordsWritten': self.RECORDS_WRITTEN, - 'status': self.STATUS, - 'shuffleOutputBytes': self.SHUFFLE_OUTPUT_BYTES, - 'shuffleOutputBytesSpilled': self.SHUFFLE_OUTPUT_BYTES_SPILLED, - 'steps': [{ - 'kind': TestQueryPlanEntryStep.KIND, - 'substeps': TestQueryPlanEntryStep.SUBSTEPS, - }] + "name": self.NAME, + "id": self.ENTRY_ID, + "inputStages": self.INPUT_STAGES, + "startMs": self.START_MS, + "endMs": self.END_MS, + "waitMsAvg": self.WAIT_MS_AVG, + "waitMsMax": self.WAIT_MS_MAX, + "waitRatioAvg": self.WAIT_RATIO_AVG, + "waitRatioMax": self.WAIT_RATIO_MAX, + "readMsAvg": self.READ_MS_AVG, + "readMsMax": self.READ_MS_MAX, + "readRatioAvg": self.READ_RATIO_AVG, + "readRatioMax": self.READ_RATIO_MAX, + "computeMsAvg": self.COMPUTE_MS_AVG, + "computeMsMax": self.COMPUTE_MS_MAX, + "computeRatioAvg": self.COMPUTE_RATIO_AVG, + "computeRatioMax": self.COMPUTE_RATIO_MAX, + "writeMsAvg": self.WRITE_MS_AVG, + "writeMsMax": self.WRITE_MS_MAX, + "writeRatioAvg": self.WRITE_RATIO_AVG, + "writeRatioMax": self.WRITE_RATIO_MAX, + "recordsRead": self.RECORDS_READ, + "recordsWritten": self.RECORDS_WRITTEN, + "status": self.STATUS, + "shuffleOutputBytes": self.SHUFFLE_OUTPUT_BYTES, + "shuffleOutputBytesSpilled": self.SHUFFLE_OUTPUT_BYTES_SPILLED, + "steps": [ + { + "kind": TestQueryPlanEntryStep.KIND, + "substeps": TestQueryPlanEntryStep.SUBSTEPS, + } + ], } klass = self._get_target_class() @@ -5065,14 +4728,12 @@ def test_start(self): klass = self._get_target_class() entry = klass.from_api_repr({}) - self.assertEqual( - entry.start, - None) + self.assertEqual(entry.start, None) - entry._properties['startMs'] = self.START_MS + entry._properties["startMs"] = self.START_MS self.assertEqual( - entry.start.strftime(_RFC3339_MICROS), - self.START_RFC3339_MICROS) + entry.start.strftime(_RFC3339_MICROS), self.START_RFC3339_MICROS + ) def test_end(self): from google.cloud._helpers import _RFC3339_MICROS @@ -5080,14 +4741,10 @@ def test_end(self): klass = self._get_target_class() entry = klass.from_api_repr({}) - self.assertEqual( - entry.end, - None) + self.assertEqual(entry.end, None) - entry._properties['endMs'] = self.END_MS - self.assertEqual( - entry.end.strftime(_RFC3339_MICROS), - self.END_RFC3339_MICROS) + entry._properties["endMs"] = self.END_MS + self.assertEqual(entry.end.strftime(_RFC3339_MICROS), self.END_RFC3339_MICROS) class TestTimelineEntry(unittest.TestCase, _Base): @@ -5100,6 +4757,7 @@ class TestTimelineEntry(unittest.TestCase, _Base): @staticmethod def _get_target_class(): from google.cloud.bigquery.job import TimelineEntry + return TimelineEntry def test_from_api_repr_empty(self): @@ -5113,11 +4771,11 @@ def test_from_api_repr_empty(self): def test_from_api_repr_normal(self): resource = { - 'elapsedMs': self.ELAPSED_MS, - 'activeUnits': self.ACTIVE_UNITS, - 'pendingUnits': self.PENDING_UNITS, - 'completedUnits': self.COMPLETED_UNITS, - 'totalSlotMs': self.SLOT_MILLIS, + "elapsedMs": self.ELAPSED_MS, + "activeUnits": self.ACTIVE_UNITS, + "pendingUnits": self.PENDING_UNITS, + "completedUnits": self.COMPLETED_UNITS, + "totalSlotMs": self.SLOT_MILLIS, } klass = self._get_target_class() diff --git a/packages/google-cloud-bigquery/tests/unit/test_magics.py b/packages/google-cloud-bigquery/tests/unit/test_magics.py index b0e08661ca00..5729146b053a 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_magics.py +++ b/packages/google-cloud-bigquery/tests/unit/test_magics.py @@ -17,6 +17,7 @@ from concurrent import futures import pytest + try: import pandas except ImportError: # pragma: NO COVER @@ -34,10 +35,10 @@ from google.cloud.bigquery import magics -pytestmark = pytest.mark.skipif(IPython is None, reason='Requires `ipython`') +pytestmark = pytest.mark.skipif(IPython is None, reason="Requires `ipython`") -@pytest.fixture(scope='session') +@pytest.fixture(scope="session") def ipython(): config = tools.default_config() config.TerminalInteractiveShell.simple_prompt = True @@ -62,11 +63,13 @@ def test_context_credentials_auto_set_w_application_default_credentials(): assert magics.context._credentials is None assert magics.context._project is None - project = 'prahj-ekt' + project = "prahj-ekt" credentials_mock = mock.create_autospec( - google.auth.credentials.Credentials, instance=True) + google.auth.credentials.Credentials, instance=True + ) default_patch = mock.patch( - 'google.auth.default', return_value=(credentials_mock, project)) + "google.auth.default", return_value=(credentials_mock, project) + ) with default_patch as default_mock: assert magics.context.credentials is credentials_mock assert magics.context.project == project @@ -75,12 +78,14 @@ def test_context_credentials_auto_set_w_application_default_credentials(): def test_context_credentials_and_project_can_be_set_explicitly(): - project1 = 'one-project-55564' - project2 = 'other-project-52569' + project1 = "one-project-55564" + project2 = "other-project-52569" credentials_mock = mock.create_autospec( - google.auth.credentials.Credentials, instance=True) + google.auth.credentials.Credentials, instance=True + ) default_patch = mock.patch( - 'google.auth.default', return_value=(credentials_mock, project1)) + "google.auth.default", return_value=(credentials_mock, project1) + ) with default_patch as default_mock: magics.context.credentials = credentials_mock magics.context.project = project2 @@ -94,247 +99,264 @@ def test_context_credentials_and_project_can_be_set_explicitly(): def test__run_query(): magics.context._credentials = None - job_id = 'job_1234' - sql = 'SELECT 17' + job_id = "job_1234" + sql = "SELECT 17" responses = [ futures.TimeoutError, futures.TimeoutError, - [table.Row((17,), {'num': 0})] + [table.Row((17,), {"num": 0})], ] client_patch = mock.patch( - 'google.cloud.bigquery.magics.bigquery.Client', autospec=True) + "google.cloud.bigquery.magics.bigquery.Client", autospec=True + ) with client_patch as client_mock, io.capture_output() as captured: client_mock().query(sql).result.side_effect = responses client_mock().query(sql).job_id = job_id query_job = magics._run_query(client_mock(), sql) - lines = re.split('\n|\r', captured.stdout) + lines = re.split("\n|\r", captured.stdout) # Removes blanks & terminal code (result of display clearing) - updates = list(filter(lambda x: bool(x) and x != '\x1b[2K', lines)) + updates = list(filter(lambda x: bool(x) and x != "\x1b[2K", lines)) assert query_job.job_id == job_id expected_first_line = "Executing query with job ID: {}".format(job_id) assert updates[0] == expected_first_line execution_updates = updates[1:-1] assert len(execution_updates) == 3 # one update per API response - assert all(re.match("Query executing: .*s", line) - for line in execution_updates) + assert all(re.match("Query executing: .*s", line) for line in execution_updates) assert re.match("Query complete after .*s", updates[-1]) -@pytest.mark.usefixtures('ipython_interactive') +@pytest.mark.usefixtures("ipython_interactive") def test_extension_load(): ip = IPython.get_ipython() - ip.extension_manager.load_extension('google.cloud.bigquery') + ip.extension_manager.load_extension("google.cloud.bigquery") # verify that the magic is registered and has the correct source - magic = ip.magics_manager.magics['cell'].get('bigquery') - assert magic.__module__ == 'google.cloud.bigquery.magics' + magic = ip.magics_manager.magics["cell"].get("bigquery") + assert magic.__module__ == "google.cloud.bigquery.magics" -@pytest.mark.usefixtures('ipython_interactive') -@pytest.mark.skipif(pandas is None, reason='Requires `pandas`') +@pytest.mark.usefixtures("ipython_interactive") +@pytest.mark.skipif(pandas is None, reason="Requires `pandas`") def test_bigquery_magic_without_optional_arguments(): ip = IPython.get_ipython() - ip.extension_manager.load_extension('google.cloud.bigquery') + ip.extension_manager.load_extension("google.cloud.bigquery") magics.context.credentials = mock.create_autospec( - google.auth.credentials.Credentials, instance=True) + google.auth.credentials.Credentials, instance=True + ) - sql = 'SELECT 17 AS num' - result = pandas.DataFrame([17], columns=['num']) + sql = "SELECT 17 AS num" + result = pandas.DataFrame([17], columns=["num"]) run_query_patch = mock.patch( - 'google.cloud.bigquery.magics._run_query', autospec=True) + "google.cloud.bigquery.magics._run_query", autospec=True + ) query_job_mock = mock.create_autospec( - google.cloud.bigquery.job.QueryJob, instance=True) + google.cloud.bigquery.job.QueryJob, instance=True + ) query_job_mock.to_dataframe.return_value = result with run_query_patch as run_query_mock: run_query_mock.return_value = query_job_mock - result = ip.run_cell_magic('bigquery', '', sql) + result = ip.run_cell_magic("bigquery", "", sql) assert isinstance(result, pandas.DataFrame) - assert len(result) == len(result) # verify row count + assert len(result) == len(result) # verify row count assert list(result) == list(result) # verify column names -@pytest.mark.usefixtures('ipython_interactive') +@pytest.mark.usefixtures("ipython_interactive") def test_bigquery_magic_with_legacy_sql(): ip = IPython.get_ipython() - ip.extension_manager.load_extension('google.cloud.bigquery') + ip.extension_manager.load_extension("google.cloud.bigquery") magics.context.credentials = mock.create_autospec( - google.auth.credentials.Credentials, instance=True) + google.auth.credentials.Credentials, instance=True + ) run_query_patch = mock.patch( - 'google.cloud.bigquery.magics._run_query', autospec=True) + "google.cloud.bigquery.magics._run_query", autospec=True + ) with run_query_patch as run_query_mock: - ip.run_cell_magic( - 'bigquery', '--use_legacy_sql', 'SELECT 17 AS num') + ip.run_cell_magic("bigquery", "--use_legacy_sql", "SELECT 17 AS num") job_config_used = run_query_mock.call_args_list[0][0][-1] assert job_config_used.use_legacy_sql is True -@pytest.mark.usefixtures('ipython_interactive') -@pytest.mark.skipif(pandas is None, reason='Requires `pandas`') +@pytest.mark.usefixtures("ipython_interactive") +@pytest.mark.skipif(pandas is None, reason="Requires `pandas`") def test_bigquery_magic_with_result_saved_to_variable(): ip = IPython.get_ipython() - ip.extension_manager.load_extension('google.cloud.bigquery') + ip.extension_manager.load_extension("google.cloud.bigquery") magics.context.credentials = mock.create_autospec( - google.auth.credentials.Credentials, instance=True) + google.auth.credentials.Credentials, instance=True + ) - sql = 'SELECT 17 AS num' - result = pandas.DataFrame([17], columns=['num']) - assert 'df' not in ip.user_ns + sql = "SELECT 17 AS num" + result = pandas.DataFrame([17], columns=["num"]) + assert "df" not in ip.user_ns run_query_patch = mock.patch( - 'google.cloud.bigquery.magics._run_query', autospec=True) + "google.cloud.bigquery.magics._run_query", autospec=True + ) query_job_mock = mock.create_autospec( - google.cloud.bigquery.job.QueryJob, instance=True) + google.cloud.bigquery.job.QueryJob, instance=True + ) query_job_mock.to_dataframe.return_value = result with run_query_patch as run_query_mock: run_query_mock.return_value = query_job_mock - ip.run_cell_magic('bigquery', 'df', sql) + ip.run_cell_magic("bigquery", "df", sql) - assert 'df' in ip.user_ns # verify that variable exists - df = ip.user_ns['df'] - assert len(df) == len(result) # verify row count + assert "df" in ip.user_ns # verify that variable exists + df = ip.user_ns["df"] + assert len(df) == len(result) # verify row count assert list(df) == list(result) # verify column names -@pytest.mark.usefixtures('ipython_interactive') +@pytest.mark.usefixtures("ipython_interactive") def test_bigquery_magic_does_not_clear_display_in_verbose_mode(): ip = IPython.get_ipython() - ip.extension_manager.load_extension('google.cloud.bigquery') + ip.extension_manager.load_extension("google.cloud.bigquery") magics.context.credentials = mock.create_autospec( - google.auth.credentials.Credentials, instance=True) + google.auth.credentials.Credentials, instance=True + ) clear_patch = mock.patch( - 'google.cloud.bigquery.magics.display.clear_output', autospec=True) + "google.cloud.bigquery.magics.display.clear_output", autospec=True + ) run_query_patch = mock.patch( - 'google.cloud.bigquery.magics._run_query', autospec=True) + "google.cloud.bigquery.magics._run_query", autospec=True + ) with clear_patch as clear_mock, run_query_patch: - ip.run_cell_magic('bigquery', '--verbose', 'SELECT 17 as num') + ip.run_cell_magic("bigquery", "--verbose", "SELECT 17 as num") assert clear_mock.call_count == 0 -@pytest.mark.usefixtures('ipython_interactive') +@pytest.mark.usefixtures("ipython_interactive") def test_bigquery_magic_clears_display_in_verbose_mode(): ip = IPython.get_ipython() - ip.extension_manager.load_extension('google.cloud.bigquery') + ip.extension_manager.load_extension("google.cloud.bigquery") magics.context.credentials = mock.create_autospec( - google.auth.credentials.Credentials, instance=True) + google.auth.credentials.Credentials, instance=True + ) clear_patch = mock.patch( - 'google.cloud.bigquery.magics.display.clear_output', autospec=True) + "google.cloud.bigquery.magics.display.clear_output", autospec=True + ) run_query_patch = mock.patch( - 'google.cloud.bigquery.magics._run_query', autospec=True) + "google.cloud.bigquery.magics._run_query", autospec=True + ) with clear_patch as clear_mock, run_query_patch: - ip.run_cell_magic('bigquery', '', 'SELECT 17 as num') + ip.run_cell_magic("bigquery", "", "SELECT 17 as num") assert clear_mock.call_count == 1 -@pytest.mark.usefixtures('ipython_interactive') +@pytest.mark.usefixtures("ipython_interactive") def test_bigquery_magic_with_project(): ip = IPython.get_ipython() - ip.extension_manager.load_extension('google.cloud.bigquery') + ip.extension_manager.load_extension("google.cloud.bigquery") magics.context._project = None credentials_mock = mock.create_autospec( - google.auth.credentials.Credentials, instance=True) + google.auth.credentials.Credentials, instance=True + ) default_patch = mock.patch( - 'google.auth.default', - return_value=(credentials_mock, 'general-project')) + "google.auth.default", return_value=(credentials_mock, "general-project") + ) run_query_patch = mock.patch( - 'google.cloud.bigquery.magics._run_query', autospec=True) + "google.cloud.bigquery.magics._run_query", autospec=True + ) with run_query_patch as run_query_mock, default_patch: - ip.run_cell_magic( - 'bigquery', '--project=specific-project', 'SELECT 17 as num') + ip.run_cell_magic("bigquery", "--project=specific-project", "SELECT 17 as num") client_used = run_query_mock.call_args_list[0][0][0] - assert client_used.project == 'specific-project' + assert client_used.project == "specific-project" # context project should not change - assert magics.context.project == 'general-project' + assert magics.context.project == "general-project" -@pytest.mark.usefixtures('ipython_interactive') -@pytest.mark.skipif(pandas is None, reason='Requires `pandas`') +@pytest.mark.usefixtures("ipython_interactive") +@pytest.mark.skipif(pandas is None, reason="Requires `pandas`") def test_bigquery_magic_with_string_params(): ip = IPython.get_ipython() - ip.extension_manager.load_extension('google.cloud.bigquery') + ip.extension_manager.load_extension("google.cloud.bigquery") magics.context.credentials = mock.create_autospec( - google.auth.credentials.Credentials, instance=True) + google.auth.credentials.Credentials, instance=True + ) - sql = 'SELECT @num AS num' - result = pandas.DataFrame([17], columns=['num']) - assert 'params_string_df' not in ip.user_ns + sql = "SELECT @num AS num" + result = pandas.DataFrame([17], columns=["num"]) + assert "params_string_df" not in ip.user_ns run_query_patch = mock.patch( - 'google.cloud.bigquery.magics._run_query', autospec=True) + "google.cloud.bigquery.magics._run_query", autospec=True + ) query_job_mock = mock.create_autospec( - google.cloud.bigquery.job.QueryJob, instance=True) + google.cloud.bigquery.job.QueryJob, instance=True + ) query_job_mock.to_dataframe.return_value = result with run_query_patch as run_query_mock: run_query_mock.return_value = query_job_mock - ip.run_cell_magic( - 'bigquery', 'params_string_df --params {"num":17}', sql) - run_query_mock.assert_called_once_with( - mock.ANY, sql.format(num=17), mock.ANY) + ip.run_cell_magic("bigquery", 'params_string_df --params {"num":17}', sql) + run_query_mock.assert_called_once_with(mock.ANY, sql.format(num=17), mock.ANY) - assert 'params_string_df' in ip.user_ns # verify that the variable exists - df = ip.user_ns['params_string_df'] - assert len(df) == len(result) # verify row count - assert list(df) == list(result) # verify column names + assert "params_string_df" in ip.user_ns # verify that the variable exists + df = ip.user_ns["params_string_df"] + assert len(df) == len(result) # verify row count + assert list(df) == list(result) # verify column names -@pytest.mark.usefixtures('ipython_interactive') -@pytest.mark.skipif(pandas is None, reason='Requires `pandas`') +@pytest.mark.usefixtures("ipython_interactive") +@pytest.mark.skipif(pandas is None, reason="Requires `pandas`") def test_bigquery_magic_with_dict_params(): ip = IPython.get_ipython() - ip.extension_manager.load_extension('google.cloud.bigquery') + ip.extension_manager.load_extension("google.cloud.bigquery") magics.context.credentials = mock.create_autospec( - google.auth.credentials.Credentials, instance=True) + google.auth.credentials.Credentials, instance=True + ) - sql = 'SELECT @num AS num' - result = pandas.DataFrame([17], columns=['num']) - assert 'params_dict_df' not in ip.user_ns + sql = "SELECT @num AS num" + result = pandas.DataFrame([17], columns=["num"]) + assert "params_dict_df" not in ip.user_ns run_query_patch = mock.patch( - 'google.cloud.bigquery.magics._run_query', autospec=True) + "google.cloud.bigquery.magics._run_query", autospec=True + ) query_job_mock = mock.create_autospec( - google.cloud.bigquery.job.QueryJob, instance=True) + google.cloud.bigquery.job.QueryJob, instance=True + ) query_job_mock.to_dataframe.return_value = result with run_query_patch as run_query_mock: run_query_mock.return_value = query_job_mock params = {"num": 17} # Insert dictionary into user namespace so that it can be expanded - ip.user_ns['params'] = params - ip.run_cell_magic('bigquery', 'params_dict_df --params $params', sql) - run_query_mock.assert_called_once_with( - mock.ANY, sql.format(num=17), mock.ANY) + ip.user_ns["params"] = params + ip.run_cell_magic("bigquery", "params_dict_df --params $params", sql) + run_query_mock.assert_called_once_with(mock.ANY, sql.format(num=17), mock.ANY) - assert 'params_dict_df' in ip.user_ns # verify that the variable exists - df = ip.user_ns['params_dict_df'] - assert len(df) == len(result) # verify row count - assert list(df) == list(result) # verify column names + assert "params_dict_df" in ip.user_ns # verify that the variable exists + df = ip.user_ns["params_dict_df"] + assert len(df) == len(result) # verify row count + assert list(df) == list(result) # verify column names -@pytest.mark.usefixtures('ipython_interactive') -@pytest.mark.skipif(pandas is None, reason='Requires `pandas`') +@pytest.mark.usefixtures("ipython_interactive") +@pytest.mark.skipif(pandas is None, reason="Requires `pandas`") def test_bigquery_magic_with_improperly_formatted_params(): ip = IPython.get_ipython() - ip.extension_manager.load_extension('google.cloud.bigquery') + ip.extension_manager.load_extension("google.cloud.bigquery") magics.context.credentials = mock.create_autospec( - google.auth.credentials.Credentials, instance=True) + google.auth.credentials.Credentials, instance=True + ) - sql = 'SELECT @num AS num' + sql = "SELECT @num AS num" with pytest.raises(SyntaxError): - ip.run_cell_magic( - 'bigquery', '--params {17}', sql) + ip.run_cell_magic("bigquery", "--params {17}", sql) diff --git a/packages/google-cloud-bigquery/tests/unit/test_query.py b/packages/google-cloud-bigquery/tests/unit/test_query.py index c262132f8e0c..f50335082349 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_query.py +++ b/packages/google-cloud-bigquery/tests/unit/test_query.py @@ -19,7 +19,6 @@ class Test_UDFResource(unittest.TestCase): - @staticmethod def _get_target_class(): from google.cloud.bigquery.query import UDFResource @@ -30,23 +29,21 @@ def _make_one(self, *args, **kw): return self._get_target_class()(*args, **kw) def test_ctor(self): - udf = self._make_one('resourceUri', 'gs://some_bucket/some_file') - self.assertEqual(udf.udf_type, 'resourceUri') - self.assertEqual(udf.value, 'gs://some_bucket/some_file') + udf = self._make_one("resourceUri", "gs://some_bucket/some_file") + self.assertEqual(udf.udf_type, "resourceUri") + self.assertEqual(udf.value, "gs://some_bucket/some_file") def test___eq__(self): - udf = self._make_one('resourceUri', 'gs://some_bucket/some_file') + udf = self._make_one("resourceUri", "gs://some_bucket/some_file") self.assertEqual(udf, udf) self.assertNotEqual(udf, object()) - wrong_val = self._make_one( - 'resourceUri', 'gs://some_bucket/other_file') + wrong_val = self._make_one("resourceUri", "gs://some_bucket/other_file") self.assertNotEqual(udf, wrong_val) - wrong_type = self._make_one('inlineCode', udf.value) + wrong_type = self._make_one("inlineCode", udf.value) self.assertNotEqual(udf, wrong_type) class Test__AbstractQueryParameter(unittest.TestCase): - @staticmethod def _get_target_class(): from google.cloud.bigquery.query import _AbstractQueryParameter @@ -68,7 +65,6 @@ def test_to_api_virtual(self): class Test_ScalarQueryParameter(unittest.TestCase): - @staticmethod def _get_target_class(): from google.cloud.bigquery.query import ScalarQueryParameter @@ -79,141 +75,108 @@ def _make_one(self, *args, **kw): return self._get_target_class()(*args, **kw) def test_ctor(self): - param = self._make_one(name='foo', type_='INT64', value=123) - self.assertEqual(param.name, 'foo') - self.assertEqual(param.type_, 'INT64') + param = self._make_one(name="foo", type_="INT64", value=123) + self.assertEqual(param.name, "foo") + self.assertEqual(param.type_, "INT64") self.assertEqual(param.value, 123) def test___eq__(self): - param = self._make_one(name='foo', type_='INT64', value=123) + param = self._make_one(name="foo", type_="INT64", value=123) self.assertEqual(param, param) self.assertNotEqual(param, object()) - alias = self._make_one(name='bar', type_='INT64', value=123) + alias = self._make_one(name="bar", type_="INT64", value=123) self.assertNotEqual(param, alias) - wrong_type = self._make_one(name='foo', type_='FLOAT64', value=123.0) + wrong_type = self._make_one(name="foo", type_="FLOAT64", value=123.0) self.assertNotEqual(param, wrong_type) - wrong_val = self._make_one(name='foo', type_='INT64', value=234) + wrong_val = self._make_one(name="foo", type_="INT64", value=234) self.assertNotEqual(param, wrong_val) def test_positional(self): klass = self._get_target_class() - param = klass.positional(type_='INT64', value=123) + param = klass.positional(type_="INT64", value=123) self.assertEqual(param.name, None) - self.assertEqual(param.type_, 'INT64') + self.assertEqual(param.type_, "INT64") self.assertEqual(param.value, 123) def test_from_api_repr_w_name(self): RESOURCE = { - 'name': 'foo', - 'parameterType': { - 'type': 'INT64', - }, - 'parameterValue': { - 'value': 123, - }, + "name": "foo", + "parameterType": {"type": "INT64"}, + "parameterValue": {"value": 123}, } klass = self._get_target_class() param = klass.from_api_repr(RESOURCE) - self.assertEqual(param.name, 'foo') - self.assertEqual(param.type_, 'INT64') + self.assertEqual(param.name, "foo") + self.assertEqual(param.type_, "INT64") self.assertEqual(param.value, 123) def test_from_api_repr_wo_name(self): RESOURCE = { - 'parameterType': { - 'type': 'INT64', - }, - 'parameterValue': { - 'value': '123', - }, + "parameterType": {"type": "INT64"}, + "parameterValue": {"value": "123"}, } klass = self._get_target_class() param = klass.from_api_repr(RESOURCE) self.assertEqual(param.name, None) - self.assertEqual(param.type_, 'INT64') + self.assertEqual(param.type_, "INT64") self.assertEqual(param.value, 123) def test_to_api_repr_w_name(self): EXPECTED = { - 'name': 'foo', - 'parameterType': { - 'type': 'INT64', - }, - 'parameterValue': { - 'value': '123', - }, + "name": "foo", + "parameterType": {"type": "INT64"}, + "parameterValue": {"value": "123"}, } - param = self._make_one(name='foo', type_='INT64', value=123) + param = self._make_one(name="foo", type_="INT64", value=123) self.assertEqual(param.to_api_repr(), EXPECTED) def test_to_api_repr_wo_name(self): EXPECTED = { - 'parameterType': { - 'type': 'INT64', - }, - 'parameterValue': { - 'value': '123', - }, + "parameterType": {"type": "INT64"}, + "parameterValue": {"value": "123"}, } klass = self._get_target_class() - param = klass.positional(type_='INT64', value=123) + param = klass.positional(type_="INT64", value=123) self.assertEqual(param.to_api_repr(), EXPECTED) def test_to_api_repr_w_float(self): EXPECTED = { - 'parameterType': { - 'type': 'FLOAT64', - }, - 'parameterValue': { - 'value': 12.345, - }, + "parameterType": {"type": "FLOAT64"}, + "parameterValue": {"value": 12.345}, } klass = self._get_target_class() - param = klass.positional(type_='FLOAT64', value=12.345) + param = klass.positional(type_="FLOAT64", value=12.345) self.assertEqual(param.to_api_repr(), EXPECTED) def test_to_api_repr_w_numeric(self): EXPECTED = { - 'parameterType': { - 'type': 'NUMERIC', - }, - 'parameterValue': { - 'value': '123456789.123456789', - }, + "parameterType": {"type": "NUMERIC"}, + "parameterValue": {"value": "123456789.123456789"}, } klass = self._get_target_class() - param = klass.positional(type_='NUMERIC', - value='123456789.123456789') + param = klass.positional(type_="NUMERIC", value="123456789.123456789") self.assertEqual(param.to_api_repr(), EXPECTED) def test_to_api_repr_w_bool(self): EXPECTED = { - 'parameterType': { - 'type': 'BOOL', - }, - 'parameterValue': { - 'value': 'false', - }, + "parameterType": {"type": "BOOL"}, + "parameterValue": {"value": "false"}, } klass = self._get_target_class() - param = klass.positional(type_='BOOL', value=False) + param = klass.positional(type_="BOOL", value=False) self.assertEqual(param.to_api_repr(), EXPECTED) def test_to_api_repr_w_timestamp_datetime(self): from google.cloud._helpers import UTC - STAMP = '2016-12-20 15:58:27.339328+00:00' + STAMP = "2016-12-20 15:58:27.339328+00:00" when = datetime.datetime(2016, 12, 20, 15, 58, 27, 339328, tzinfo=UTC) EXPECTED = { - 'parameterType': { - 'type': 'TIMESTAMP', - }, - 'parameterValue': { - 'value': STAMP, - }, + "parameterType": {"type": "TIMESTAMP"}, + "parameterValue": {"value": STAMP}, } klass = self._get_target_class() - param = klass.positional(type_='TIMESTAMP', value=when) + param = klass.positional(type_="TIMESTAMP", value=when) self.assertEqual(param.to_api_repr(), EXPECTED) def test_to_api_repr_w_timestamp_micros(self): @@ -222,15 +185,11 @@ def test_to_api_repr_w_timestamp_micros(self): now = datetime.datetime.utcnow() seconds = _microseconds_from_datetime(now) / 1.0e6 EXPECTED = { - 'parameterType': { - 'type': 'TIMESTAMP', - }, - 'parameterValue': { - 'value': seconds, - }, + "parameterType": {"type": "TIMESTAMP"}, + "parameterValue": {"value": seconds}, } klass = self._get_target_class() - param = klass.positional(type_='TIMESTAMP', value=seconds) + param = klass.positional(type_="TIMESTAMP", value=seconds) self.assertEqual(param.to_api_repr(), EXPECTED) def test_to_api_repr_w_datetime_datetime(self): @@ -238,15 +197,13 @@ def test_to_api_repr_w_datetime_datetime(self): now = datetime.datetime.utcnow() EXPECTED = { - 'parameterType': { - 'type': 'DATETIME', - }, - 'parameterValue': { - 'value': _datetime_to_rfc3339(now)[:-1], # strip trailing 'Z' + "parameterType": {"type": "DATETIME"}, + "parameterValue": { + "value": _datetime_to_rfc3339(now)[:-1] # strip trailing 'Z' }, } klass = self._get_target_class() - param = klass.positional(type_='DATETIME', value=now) + param = klass.positional(type_="DATETIME", value=now) self.assertEqual(param.to_api_repr(), EXPECTED) def test_to_api_repr_w_datetime_string(self): @@ -255,105 +212,89 @@ def test_to_api_repr_w_datetime_string(self): now = datetime.datetime.utcnow() now_str = _datetime_to_rfc3339(now) EXPECTED = { - 'parameterType': { - 'type': 'DATETIME', - }, - 'parameterValue': { - 'value': now_str, - }, + "parameterType": {"type": "DATETIME"}, + "parameterValue": {"value": now_str}, } klass = self._get_target_class() - param = klass.positional(type_='DATETIME', value=now_str) + param = klass.positional(type_="DATETIME", value=now_str) self.assertEqual(param.to_api_repr(), EXPECTED) def test_to_api_repr_w_date_date(self): today = datetime.date.today() EXPECTED = { - 'parameterType': { - 'type': 'DATE', - }, - 'parameterValue': { - 'value': today.isoformat(), - }, + "parameterType": {"type": "DATE"}, + "parameterValue": {"value": today.isoformat()}, } klass = self._get_target_class() - param = klass.positional(type_='DATE', value=today) + param = klass.positional(type_="DATE", value=today) self.assertEqual(param.to_api_repr(), EXPECTED) def test_to_api_repr_w_date_string(self): today = datetime.date.today() - today_str = today.isoformat(), + today_str = (today.isoformat(),) EXPECTED = { - 'parameterType': { - 'type': 'DATE', - }, - 'parameterValue': { - 'value': today_str, - }, + "parameterType": {"type": "DATE"}, + "parameterValue": {"value": today_str}, } klass = self._get_target_class() - param = klass.positional(type_='DATE', value=today_str) + param = klass.positional(type_="DATE", value=today_str) self.assertEqual(param.to_api_repr(), EXPECTED) def test_to_api_repr_w_unknown_type(self): EXPECTED = { - 'parameterType': { - 'type': 'UNKNOWN', - }, - 'parameterValue': { - 'value': 'unknown', - }, + "parameterType": {"type": "UNKNOWN"}, + "parameterValue": {"value": "unknown"}, } klass = self._get_target_class() - param = klass.positional(type_='UNKNOWN', value='unknown') + param = klass.positional(type_="UNKNOWN", value="unknown") self.assertEqual(param.to_api_repr(), EXPECTED) def test___eq___wrong_type(self): - field = self._make_one('test', 'STRING', 'value') + field = self._make_one("test", "STRING", "value") other = object() self.assertNotEqual(field, other) self.assertEqual(field, mock.ANY) def test___eq___name_mismatch(self): - field = self._make_one('test', 'STRING', 'value') - other = self._make_one('other', 'STRING', 'value') + field = self._make_one("test", "STRING", "value") + other = self._make_one("other", "STRING", "value") self.assertNotEqual(field, other) def test___eq___field_type_mismatch(self): - field = self._make_one('test', 'STRING', None) - other = self._make_one('test', 'INT64', None) + field = self._make_one("test", "STRING", None) + other = self._make_one("test", "INT64", None) self.assertNotEqual(field, other) def test___eq___value_mismatch(self): - field = self._make_one('test', 'STRING', 'hello') - other = self._make_one('test', 'STRING', 'world') + field = self._make_one("test", "STRING", "hello") + other = self._make_one("test", "STRING", "world") self.assertNotEqual(field, other) def test___eq___hit(self): - field = self._make_one('test', 'STRING', 'gotcha') - other = self._make_one('test', 'STRING', 'gotcha') + field = self._make_one("test", "STRING", "gotcha") + other = self._make_one("test", "STRING", "gotcha") self.assertEqual(field, other) def test___ne___wrong_type(self): - field = self._make_one('toast', 'INT64', 13) + field = self._make_one("toast", "INT64", 13) other = object() self.assertNotEqual(field, other) self.assertEqual(field, mock.ANY) def test___ne___same_value(self): - field1 = self._make_one('test', 'INT64', 12) - field2 = self._make_one('test', 'INT64', 12) + field1 = self._make_one("test", "INT64", 12) + field2 = self._make_one("test", "INT64", 12) # unittest ``assertEqual`` uses ``==`` not ``!=``. - comparison_val = (field1 != field2) + comparison_val = field1 != field2 self.assertFalse(comparison_val) def test___ne___different_values(self): - field1 = self._make_one('test', 'INT64', 11) - field2 = self._make_one('test', 'INT64', 12) + field1 = self._make_one("test", "INT64", 11) + field2 = self._make_one("test", "INT64", 12) self.assertNotEqual(field1, field2) def test___repr__(self): - field1 = self._make_one('field1', 'STRING', 'value') + field1 = self._make_one("field1", "STRING", "value") expected = "ScalarQueryParameter('field1', 'STRING', 'value')" self.assertEqual(repr(field1), expected) @@ -365,7 +306,6 @@ def _make_subparam(name, type_, value): class Test_ArrayQueryParameter(unittest.TestCase): - @staticmethod def _get_target_class(): from google.cloud.bigquery.query import ArrayQueryParameter @@ -376,119 +316,81 @@ def _make_one(self, *args, **kw): return self._get_target_class()(*args, **kw) def test_ctor(self): - param = self._make_one(name='foo', array_type='INT64', values=[1, 2]) - self.assertEqual(param.name, 'foo') - self.assertEqual(param.array_type, 'INT64') + param = self._make_one(name="foo", array_type="INT64", values=[1, 2]) + self.assertEqual(param.name, "foo") + self.assertEqual(param.array_type, "INT64") self.assertEqual(param.values, [1, 2]) def test___eq__(self): - param = self._make_one(name='foo', array_type='INT64', values=[123]) + param = self._make_one(name="foo", array_type="INT64", values=[123]) self.assertEqual(param, param) self.assertNotEqual(param, object()) - alias = self._make_one(name='bar', array_type='INT64', values=[123]) + alias = self._make_one(name="bar", array_type="INT64", values=[123]) self.assertNotEqual(param, alias) - wrong_type = self._make_one( - name='foo', array_type='FLOAT64', values=[123.0]) + wrong_type = self._make_one(name="foo", array_type="FLOAT64", values=[123.0]) self.assertNotEqual(param, wrong_type) - wrong_val = self._make_one( - name='foo', array_type='INT64', values=[234]) + wrong_val = self._make_one(name="foo", array_type="INT64", values=[234]) self.assertNotEqual(param, wrong_val) def test_positional(self): klass = self._get_target_class() - param = klass.positional(array_type='INT64', values=[1, 2]) + param = klass.positional(array_type="INT64", values=[1, 2]) self.assertEqual(param.name, None) - self.assertEqual(param.array_type, 'INT64') + self.assertEqual(param.array_type, "INT64") self.assertEqual(param.values, [1, 2]) def test_from_api_repr_w_name(self): RESOURCE = { - 'name': 'foo', - 'parameterType': { - 'type': 'ARRAY', - 'arrayType': { - 'type': 'INT64', - }, - }, - 'parameterValue': { - 'arrayValues': [ - { - 'value': '1', - }, - { - 'value': '2' - }, - ], - }, + "name": "foo", + "parameterType": {"type": "ARRAY", "arrayType": {"type": "INT64"}}, + "parameterValue": {"arrayValues": [{"value": "1"}, {"value": "2"}]}, } klass = self._get_target_class() param = klass.from_api_repr(RESOURCE) - self.assertEqual(param.name, 'foo') - self.assertEqual(param.array_type, 'INT64') + self.assertEqual(param.name, "foo") + self.assertEqual(param.array_type, "INT64") self.assertEqual(param.values, [1, 2]) def test_from_api_repr_wo_name(self): RESOURCE = { - 'parameterType': { - 'type': 'ARRAY', - 'arrayType': { - 'type': 'INT64', - }, - }, - 'parameterValue': { - 'arrayValues': [ - { - 'value': '1', - }, - { - 'value': '2' - }, - ], - }, + "parameterType": {"type": "ARRAY", "arrayType": {"type": "INT64"}}, + "parameterValue": {"arrayValues": [{"value": "1"}, {"value": "2"}]}, } klass = self._get_target_class() param = klass.from_api_repr(RESOURCE) self.assertEqual(param.name, None) - self.assertEqual(param.array_type, 'INT64') + self.assertEqual(param.array_type, "INT64") self.assertEqual(param.values, [1, 2]) def test_from_api_repr_w_struct_type(self): from google.cloud.bigquery.query import StructQueryParameter RESOURCE = { - 'parameterType': { - 'type': 'ARRAY', - 'arrayType': { - 'type': 'STRUCT', - 'structTypes': [ - { - 'name': 'name', - 'type': {'type': 'STRING'}, - }, - { - 'name': 'age', - 'type': {'type': 'INT64'}, - }, + "parameterType": { + "type": "ARRAY", + "arrayType": { + "type": "STRUCT", + "structTypes": [ + {"name": "name", "type": {"type": "STRING"}}, + {"name": "age", "type": {"type": "INT64"}}, ], }, }, - 'parameterValue': { - 'arrayValues': [ + "parameterValue": { + "arrayValues": [ { - 'structValues': { - 'name': {'value': 'Phred Phlyntstone'}, - 'age': {'value': '32'}, - }, + "structValues": { + "name": {"value": "Phred Phlyntstone"}, + "age": {"value": "32"}, + } }, { - 'structValues': { - 'name': { - 'value': 'Bharney Rhubbyl', - }, - 'age': {'value': '31'}, - }, + "structValues": { + "name": {"value": "Bharney Rhubbyl"}, + "age": {"value": "31"}, + } }, - ], + ] }, } @@ -496,162 +398,121 @@ def test_from_api_repr_w_struct_type(self): param = klass.from_api_repr(RESOURCE) phred = StructQueryParameter.positional( - _make_subparam('name', 'STRING', 'Phred Phlyntstone'), - _make_subparam('age', 'INT64', 32)) + _make_subparam("name", "STRING", "Phred Phlyntstone"), + _make_subparam("age", "INT64", 32), + ) bharney = StructQueryParameter.positional( - _make_subparam('name', 'STRING', 'Bharney Rhubbyl'), - _make_subparam('age', 'INT64', 31)) - self.assertEqual(param.array_type, 'STRUCT') + _make_subparam("name", "STRING", "Bharney Rhubbyl"), + _make_subparam("age", "INT64", 31), + ) + self.assertEqual(param.array_type, "STRUCT") self.assertEqual(param.values, [phred, bharney]) def test_to_api_repr_w_name(self): EXPECTED = { - 'name': 'foo', - 'parameterType': { - 'type': 'ARRAY', - 'arrayType': { - 'type': 'INT64', - }, - }, - 'parameterValue': { - 'arrayValues': [ - { - 'value': '1', - }, - { - 'value': '2' - }, - ], - }, + "name": "foo", + "parameterType": {"type": "ARRAY", "arrayType": {"type": "INT64"}}, + "parameterValue": {"arrayValues": [{"value": "1"}, {"value": "2"}]}, } - param = self._make_one(name='foo', array_type='INT64', values=[1, 2]) + param = self._make_one(name="foo", array_type="INT64", values=[1, 2]) self.assertEqual(param.to_api_repr(), EXPECTED) def test_to_api_repr_wo_name(self): EXPECTED = { - 'parameterType': { - 'type': 'ARRAY', - 'arrayType': { - 'type': 'INT64', - }, - }, - 'parameterValue': { - 'arrayValues': [ - { - 'value': '1', - }, - { - 'value': '2' - }, - ], - }, + "parameterType": {"type": "ARRAY", "arrayType": {"type": "INT64"}}, + "parameterValue": {"arrayValues": [{"value": "1"}, {"value": "2"}]}, } klass = self._get_target_class() - param = klass.positional(array_type='INT64', values=[1, 2]) + param = klass.positional(array_type="INT64", values=[1, 2]) self.assertEqual(param.to_api_repr(), EXPECTED) def test_to_api_repr_w_unknown_type(self): EXPECTED = { - 'parameterType': { - 'type': 'ARRAY', - 'arrayType': { - 'type': 'UNKNOWN', - }, - }, - 'parameterValue': { - 'arrayValues': [ - { - 'value': 'unknown', - } - ], - }, + "parameterType": {"type": "ARRAY", "arrayType": {"type": "UNKNOWN"}}, + "parameterValue": {"arrayValues": [{"value": "unknown"}]}, } klass = self._get_target_class() - param = klass.positional(array_type='UNKNOWN', values=['unknown']) + param = klass.positional(array_type="UNKNOWN", values=["unknown"]) self.assertEqual(param.to_api_repr(), EXPECTED) def test_to_api_repr_w_record_type(self): from google.cloud.bigquery.query import StructQueryParameter EXPECTED = { - 'parameterType': { - 'type': 'ARRAY', - 'arrayType': { - 'type': 'STRUCT', - 'structTypes': [ - {'name': 'foo', 'type': {'type': 'STRING'}}, - {'name': 'bar', 'type': {'type': 'INT64'}}, + "parameterType": { + "type": "ARRAY", + "arrayType": { + "type": "STRUCT", + "structTypes": [ + {"name": "foo", "type": {"type": "STRING"}}, + {"name": "bar", "type": {"type": "INT64"}}, ], }, }, - 'parameterValue': { - 'arrayValues': [{ - 'structValues': { - 'foo': {'value': 'Foo'}, - 'bar': {'value': '123'}, - } - }] + "parameterValue": { + "arrayValues": [ + {"structValues": {"foo": {"value": "Foo"}, "bar": {"value": "123"}}} + ] }, } - one = _make_subparam('foo', 'STRING', 'Foo') - another = _make_subparam('bar', 'INT64', 123) + one = _make_subparam("foo", "STRING", "Foo") + another = _make_subparam("bar", "INT64", 123) struct = StructQueryParameter.positional(one, another) klass = self._get_target_class() - param = klass.positional(array_type='RECORD', values=[struct]) + param = klass.positional(array_type="RECORD", values=[struct]) self.assertEqual(param.to_api_repr(), EXPECTED) def test___eq___wrong_type(self): - field = self._make_one('test', 'STRING', ['value']) + field = self._make_one("test", "STRING", ["value"]) other = object() self.assertNotEqual(field, other) self.assertEqual(field, mock.ANY) def test___eq___name_mismatch(self): - field = self._make_one('field', 'STRING', ['value']) - other = self._make_one('other', 'STRING', ['value']) + field = self._make_one("field", "STRING", ["value"]) + other = self._make_one("other", "STRING", ["value"]) self.assertNotEqual(field, other) def test___eq___field_type_mismatch(self): - field = self._make_one('test', 'STRING', []) - other = self._make_one('test', 'INT64', []) + field = self._make_one("test", "STRING", []) + other = self._make_one("test", "INT64", []) self.assertNotEqual(field, other) def test___eq___value_mismatch(self): - field = self._make_one('test', 'STRING', ['hello']) - other = self._make_one('test', 'STRING', ['hello', 'world']) + field = self._make_one("test", "STRING", ["hello"]) + other = self._make_one("test", "STRING", ["hello", "world"]) self.assertNotEqual(field, other) def test___eq___hit(self): - field = self._make_one('test', 'STRING', ['gotcha']) - other = self._make_one('test', 'STRING', ['gotcha']) + field = self._make_one("test", "STRING", ["gotcha"]) + other = self._make_one("test", "STRING", ["gotcha"]) self.assertEqual(field, other) def test___ne___wrong_type(self): - field = self._make_one('toast', 'INT64', [13]) + field = self._make_one("toast", "INT64", [13]) other = object() self.assertNotEqual(field, other) self.assertEqual(field, mock.ANY) def test___ne___same_value(self): - field1 = self._make_one('test', 'INT64', [12]) - field2 = self._make_one('test', 'INT64', [12]) + field1 = self._make_one("test", "INT64", [12]) + field2 = self._make_one("test", "INT64", [12]) # unittest ``assertEqual`` uses ``==`` not ``!=``. - comparison_val = (field1 != field2) + comparison_val = field1 != field2 self.assertFalse(comparison_val) def test___ne___different_values(self): - field1 = self._make_one('test', 'INT64', [11]) - field2 = self._make_one('test', 'INT64', [12]) + field1 = self._make_one("test", "INT64", [11]) + field2 = self._make_one("test", "INT64", [12]) self.assertNotEqual(field1, field2) def test___repr__(self): - field1 = self._make_one('field1', 'STRING', ['value']) + field1 = self._make_one("field1", "STRING", ["value"]) expected = "ArrayQueryParameter('field1', 'STRING', ['value'])" self.assertEqual(repr(field1), expected) class Test_StructQueryParameter(unittest.TestCase): - @staticmethod def _get_target_class(): from google.cloud.bigquery.query import StructQueryParameter @@ -662,105 +523,96 @@ def _make_one(self, *args, **kw): return self._get_target_class()(*args, **kw) def test_ctor(self): - sub_1 = _make_subparam('bar', 'INT64', 123) - sub_2 = _make_subparam('baz', 'STRING', 'abc') - param = self._make_one('foo', sub_1, sub_2) - self.assertEqual(param.name, 'foo') - self.assertEqual(param.struct_types, {'bar': 'INT64', 'baz': 'STRING'}) - self.assertEqual(param.struct_values, {'bar': 123, 'baz': 'abc'}) + sub_1 = _make_subparam("bar", "INT64", 123) + sub_2 = _make_subparam("baz", "STRING", "abc") + param = self._make_one("foo", sub_1, sub_2) + self.assertEqual(param.name, "foo") + self.assertEqual(param.struct_types, {"bar": "INT64", "baz": "STRING"}) + self.assertEqual(param.struct_values, {"bar": 123, "baz": "abc"}) def test___eq__(self): - sub_1 = _make_subparam('bar', 'INT64', 123) - sub_2 = _make_subparam('baz', 'STRING', 'abc') - sub_3 = _make_subparam('baz', 'STRING', 'def') - sub_1_float = _make_subparam('bar', 'FLOAT64', 123.0) - param = self._make_one('foo', sub_1, sub_2) + sub_1 = _make_subparam("bar", "INT64", 123) + sub_2 = _make_subparam("baz", "STRING", "abc") + sub_3 = _make_subparam("baz", "STRING", "def") + sub_1_float = _make_subparam("bar", "FLOAT64", 123.0) + param = self._make_one("foo", sub_1, sub_2) self.assertEqual(param, param) self.assertNotEqual(param, object()) - alias = self._make_one('bar', sub_1, sub_2) + alias = self._make_one("bar", sub_1, sub_2) self.assertNotEqual(param, alias) - wrong_type = self._make_one('foo', sub_1_float, sub_2) + wrong_type = self._make_one("foo", sub_1_float, sub_2) self.assertNotEqual(param, wrong_type) - wrong_val = self._make_one('foo', sub_2, sub_3) + wrong_val = self._make_one("foo", sub_2, sub_3) self.assertNotEqual(param, wrong_val) def test_positional(self): - sub_1 = _make_subparam('bar', 'INT64', 123) - sub_2 = _make_subparam('baz', 'STRING', 'abc') + sub_1 = _make_subparam("bar", "INT64", 123) + sub_2 = _make_subparam("baz", "STRING", "abc") klass = self._get_target_class() param = klass.positional(sub_1, sub_2) self.assertEqual(param.name, None) - self.assertEqual(param.struct_types, {'bar': 'INT64', 'baz': 'STRING'}) - self.assertEqual(param.struct_values, {'bar': 123, 'baz': 'abc'}) + self.assertEqual(param.struct_types, {"bar": "INT64", "baz": "STRING"}) + self.assertEqual(param.struct_values, {"bar": 123, "baz": "abc"}) def test_from_api_repr_w_name(self): RESOURCE = { - 'name': 'foo', - 'parameterType': { - 'type': 'STRUCT', - 'structTypes': [ - {'name': 'bar', 'type': {'type': 'INT64'}}, - {'name': 'baz', 'type': {'type': 'STRING'}}, + "name": "foo", + "parameterType": { + "type": "STRUCT", + "structTypes": [ + {"name": "bar", "type": {"type": "INT64"}}, + {"name": "baz", "type": {"type": "STRING"}}, ], }, - 'parameterValue': { - 'structValues': { - 'bar': {'value': 123}, - 'baz': {'value': 'abc'}, - }, + "parameterValue": { + "structValues": {"bar": {"value": 123}, "baz": {"value": "abc"}} }, } klass = self._get_target_class() param = klass.from_api_repr(RESOURCE) - self.assertEqual(param.name, 'foo') - self.assertEqual(param.struct_types, {'bar': 'INT64', 'baz': 'STRING'}) - self.assertEqual(param.struct_values, {'bar': 123, 'baz': 'abc'}) + self.assertEqual(param.name, "foo") + self.assertEqual(param.struct_types, {"bar": "INT64", "baz": "STRING"}) + self.assertEqual(param.struct_values, {"bar": 123, "baz": "abc"}) def test_from_api_repr_wo_name(self): RESOURCE = { - 'parameterType': { - 'type': 'STRUCT', - 'structTypes': [ - {'name': 'bar', 'type': {'type': 'INT64'}}, - {'name': 'baz', 'type': {'type': 'STRING'}}, + "parameterType": { + "type": "STRUCT", + "structTypes": [ + {"name": "bar", "type": {"type": "INT64"}}, + {"name": "baz", "type": {"type": "STRING"}}, ], }, - 'parameterValue': { - 'structValues': { - 'bar': {'value': 123}, - 'baz': {'value': 'abc'}, - }, + "parameterValue": { + "structValues": {"bar": {"value": 123}, "baz": {"value": "abc"}} }, } klass = self._get_target_class() param = klass.from_api_repr(RESOURCE) self.assertEqual(param.name, None) - self.assertEqual(param.struct_types, {'bar': 'INT64', 'baz': 'STRING'}) - self.assertEqual(param.struct_values, {'bar': 123, 'baz': 'abc'}) + self.assertEqual(param.struct_types, {"bar": "INT64", "baz": "STRING"}) + self.assertEqual(param.struct_values, {"bar": 123, "baz": "abc"}) def test_from_api_repr_w_nested_array(self): from google.cloud.bigquery.query import ArrayQueryParameter RESOURCE = { - 'name': 'foo', - 'parameterType': { - 'type': 'STRUCT', - 'structTypes': [ - {'name': 'bar', 'type': {'type': 'STRING'}}, - {'name': 'baz', 'type': { - 'type': 'ARRAY', - 'arrayType': {'type': 'INT64'}, - }}, + "name": "foo", + "parameterType": { + "type": "STRUCT", + "structTypes": [ + {"name": "bar", "type": {"type": "STRING"}}, + { + "name": "baz", + "type": {"type": "ARRAY", "arrayType": {"type": "INT64"}}, + }, ], }, - 'parameterValue': { - 'structValues': { - 'bar': {'value': 'abc'}, - 'baz': {'arrayValues': [ - {'value': '123'}, - {'value': '456'}, - ]}, - }, + "parameterValue": { + "structValues": { + "bar": {"value": "abc"}, + "baz": {"arrayValues": [{"value": "123"}, {"value": "456"}]}, + } }, } klass = self._get_target_class() @@ -768,34 +620,41 @@ def test_from_api_repr_w_nested_array(self): self.assertEqual( param, self._make_one( - 'foo', - _make_subparam('bar', 'STRING', 'abc'), - ArrayQueryParameter('baz', 'INT64', [123, 456]))) + "foo", + _make_subparam("bar", "STRING", "abc"), + ArrayQueryParameter("baz", "INT64", [123, 456]), + ), + ) def test_from_api_repr_w_nested_struct(self): RESOURCE = { - 'name': 'foo', - 'parameterType': { - 'type': 'STRUCT', - 'structTypes': [ - {'name': 'bar', 'type': {'type': 'STRING'}}, - {'name': 'baz', 'type': { - 'type': 'STRUCT', - 'structTypes': [ - {'name': 'qux', 'type': {'type': 'INT64'}}, - {'name': 'spam', 'type': {'type': 'BOOL'}}, - ], - }}, + "name": "foo", + "parameterType": { + "type": "STRUCT", + "structTypes": [ + {"name": "bar", "type": {"type": "STRING"}}, + { + "name": "baz", + "type": { + "type": "STRUCT", + "structTypes": [ + {"name": "qux", "type": {"type": "INT64"}}, + {"name": "spam", "type": {"type": "BOOL"}}, + ], + }, + }, ], }, - 'parameterValue': { - 'structValues': { - 'bar': {'value': 'abc'}, - 'baz': {'structValues': { - 'qux': {'value': '123'}, - 'spam': {'value': 'true'}, - }}, - }, + "parameterValue": { + "structValues": { + "bar": {"value": "abc"}, + "baz": { + "structValues": { + "qux": {"value": "123"}, + "spam": {"value": "true"}, + } + }, + } }, } @@ -803,56 +662,52 @@ def test_from_api_repr_w_nested_struct(self): param = klass.from_api_repr(RESOURCE) expected = self._make_one( - 'foo', - _make_subparam('bar', 'STRING', 'abc'), + "foo", + _make_subparam("bar", "STRING", "abc"), self._make_one( - 'baz', - _make_subparam('qux', 'INT64', 123), - _make_subparam('spam', 'BOOL', True))) - self.assertEqual(param.name, 'foo') + "baz", + _make_subparam("qux", "INT64", 123), + _make_subparam("spam", "BOOL", True), + ), + ) + self.assertEqual(param.name, "foo") self.assertEqual(param.struct_types, expected.struct_types) self.assertEqual(param.struct_values, expected.struct_values) def test_to_api_repr_w_name(self): EXPECTED = { - 'name': 'foo', - 'parameterType': { - 'type': 'STRUCT', - 'structTypes': [ - {'name': 'bar', 'type': {'type': 'INT64'}}, - {'name': 'baz', 'type': {'type': 'STRING'}}, + "name": "foo", + "parameterType": { + "type": "STRUCT", + "structTypes": [ + {"name": "bar", "type": {"type": "INT64"}}, + {"name": "baz", "type": {"type": "STRING"}}, ], }, - 'parameterValue': { - 'structValues': { - 'bar': {'value': '123'}, - 'baz': {'value': 'abc'}, - }, + "parameterValue": { + "structValues": {"bar": {"value": "123"}, "baz": {"value": "abc"}} }, } - sub_1 = _make_subparam('bar', 'INT64', 123) - sub_2 = _make_subparam('baz', 'STRING', 'abc') - param = self._make_one('foo', sub_1, sub_2) + sub_1 = _make_subparam("bar", "INT64", 123) + sub_2 = _make_subparam("baz", "STRING", "abc") + param = self._make_one("foo", sub_1, sub_2) self.assertEqual(param.to_api_repr(), EXPECTED) def test_to_api_repr_wo_name(self): EXPECTED = { - 'parameterType': { - 'type': 'STRUCT', - 'structTypes': [ - {'name': 'bar', 'type': {'type': 'INT64'}}, - {'name': 'baz', 'type': {'type': 'STRING'}}, + "parameterType": { + "type": "STRUCT", + "structTypes": [ + {"name": "bar", "type": {"type": "INT64"}}, + {"name": "baz", "type": {"type": "STRING"}}, ], }, - 'parameterValue': { - 'structValues': { - 'bar': {'value': '123'}, - 'baz': {'value': 'abc'}, - }, + "parameterValue": { + "structValues": {"bar": {"value": "123"}, "baz": {"value": "abc"}} }, } - sub_1 = _make_subparam('bar', 'INT64', 123) - sub_2 = _make_subparam('baz', 'STRING', 'abc') + sub_1 = _make_subparam("bar", "INT64", 123) + sub_2 = _make_subparam("baz", "STRING", "abc") klass = self._get_target_class() param = klass.positional(sub_1, sub_2) self.assertEqual(param.to_api_repr(), EXPECTED) @@ -861,136 +716,123 @@ def test_to_api_repr_w_nested_array(self): from google.cloud.bigquery.query import ArrayQueryParameter EXPECTED = { - 'name': 'foo', - 'parameterType': { - 'type': 'STRUCT', - 'structTypes': [ - {'name': 'bar', 'type': {'type': 'STRING'}}, - {'name': 'baz', 'type': { - 'type': 'ARRAY', - 'arrayType': {'type': 'INT64'}, - }}, + "name": "foo", + "parameterType": { + "type": "STRUCT", + "structTypes": [ + {"name": "bar", "type": {"type": "STRING"}}, + { + "name": "baz", + "type": {"type": "ARRAY", "arrayType": {"type": "INT64"}}, + }, ], }, - 'parameterValue': { - 'structValues': { - 'bar': {'value': 'abc'}, - 'baz': {'arrayValues': [ - {'value': '123'}, - {'value': '456'}, - ]}, - }, + "parameterValue": { + "structValues": { + "bar": {"value": "abc"}, + "baz": {"arrayValues": [{"value": "123"}, {"value": "456"}]}, + } }, } - scalar = _make_subparam('bar', 'STRING', 'abc') - array = ArrayQueryParameter('baz', 'INT64', [123, 456]) - param = self._make_one('foo', scalar, array) + scalar = _make_subparam("bar", "STRING", "abc") + array = ArrayQueryParameter("baz", "INT64", [123, 456]) + param = self._make_one("foo", scalar, array) self.assertEqual(param.to_api_repr(), EXPECTED) def test_to_api_repr_w_nested_struct(self): EXPECTED = { - 'name': 'foo', - 'parameterType': { - 'type': 'STRUCT', - 'structTypes': [ - {'name': 'bar', 'type': {'type': 'STRING'}}, - {'name': 'baz', 'type': { - 'type': 'STRUCT', - 'structTypes': [ - {'name': 'qux', 'type': {'type': 'INT64'}}, - {'name': 'spam', 'type': {'type': 'BOOL'}}, - ], - }}, + "name": "foo", + "parameterType": { + "type": "STRUCT", + "structTypes": [ + {"name": "bar", "type": {"type": "STRING"}}, + { + "name": "baz", + "type": { + "type": "STRUCT", + "structTypes": [ + {"name": "qux", "type": {"type": "INT64"}}, + {"name": "spam", "type": {"type": "BOOL"}}, + ], + }, + }, ], }, - 'parameterValue': { - 'structValues': { - 'bar': {'value': 'abc'}, - 'baz': {'structValues': { - 'qux': {'value': '123'}, - 'spam': {'value': 'true'}, - }}, - }, + "parameterValue": { + "structValues": { + "bar": {"value": "abc"}, + "baz": { + "structValues": { + "qux": {"value": "123"}, + "spam": {"value": "true"}, + } + }, + } }, } - scalar_1 = _make_subparam('bar', 'STRING', 'abc') - scalar_2 = _make_subparam('qux', 'INT64', 123) - scalar_3 = _make_subparam('spam', 'BOOL', True) - sub = self._make_one('baz', scalar_2, scalar_3) - param = self._make_one('foo', scalar_1, sub) + scalar_1 = _make_subparam("bar", "STRING", "abc") + scalar_2 = _make_subparam("qux", "INT64", 123) + scalar_3 = _make_subparam("spam", "BOOL", True) + sub = self._make_one("baz", scalar_2, scalar_3) + param = self._make_one("foo", scalar_1, sub) self.assertEqual(param.to_api_repr(), EXPECTED) def test___eq___wrong_type(self): - field = self._make_one( - 'test', _make_subparam('bar', 'STRING', 'abc')) + field = self._make_one("test", _make_subparam("bar", "STRING", "abc")) other = object() self.assertNotEqual(field, other) self.assertEqual(field, mock.ANY) def test___eq___name_mismatch(self): - field = self._make_one( - 'test', _make_subparam('bar', 'STRING', 'abc')) - other = self._make_one( - 'other ', _make_subparam('bar', 'STRING', 'abc')) + field = self._make_one("test", _make_subparam("bar", "STRING", "abc")) + other = self._make_one("other ", _make_subparam("bar", "STRING", "abc")) self.assertNotEqual(field, other) def test___eq___field_type_mismatch(self): - field = self._make_one( - 'test', _make_subparam('bar', 'STRING', None)) - other = self._make_one( - 'test', _make_subparam('bar', 'INT64', None)) + field = self._make_one("test", _make_subparam("bar", "STRING", None)) + other = self._make_one("test", _make_subparam("bar", "INT64", None)) self.assertNotEqual(field, other) def test___eq___value_mismatch(self): - field = self._make_one( - 'test', _make_subparam('bar', 'STRING', 'hello')) - other = self._make_one( - 'test', _make_subparam('bar', 'STRING', 'world')) + field = self._make_one("test", _make_subparam("bar", "STRING", "hello")) + other = self._make_one("test", _make_subparam("bar", "STRING", "world")) self.assertNotEqual(field, other) def test___eq___hit(self): - field = self._make_one( - 'test', _make_subparam('bar', 'STRING', 'gotcha')) - other = self._make_one( - 'test', _make_subparam('bar', 'STRING', 'gotcha')) + field = self._make_one("test", _make_subparam("bar", "STRING", "gotcha")) + other = self._make_one("test", _make_subparam("bar", "STRING", "gotcha")) self.assertEqual(field, other) def test___ne___wrong_type(self): - field = self._make_one( - 'test', _make_subparam('bar', 'STRING', 'hello')) + field = self._make_one("test", _make_subparam("bar", "STRING", "hello")) other = object() self.assertNotEqual(field, other) self.assertEqual(field, mock.ANY) def test___ne___same_value(self): - field1 = self._make_one( - 'test', _make_subparam('bar', 'STRING', 'hello')) - field2 = self._make_one( - 'test', _make_subparam('bar', 'STRING', 'hello')) + field1 = self._make_one("test", _make_subparam("bar", "STRING", "hello")) + field2 = self._make_one("test", _make_subparam("bar", "STRING", "hello")) # unittest ``assertEqual`` uses ``==`` not ``!=``. - comparison_val = (field1 != field2) + comparison_val = field1 != field2 self.assertFalse(comparison_val) def test___ne___different_values(self): - field1 = self._make_one( - 'test', _make_subparam('bar', 'STRING', 'hello')) - field2 = self._make_one( - 'test', _make_subparam('bar', 'STRING', 'world')) + field1 = self._make_one("test", _make_subparam("bar", "STRING", "hello")) + field2 = self._make_one("test", _make_subparam("bar", "STRING", "world")) self.assertNotEqual(field1, field2) def test___repr__(self): - field1 = self._make_one( - 'test', _make_subparam('field1', 'STRING', 'hello')) + field1 = self._make_one("test", _make_subparam("field1", "STRING", "hello")) got = repr(field1) - self.assertIn('StructQueryParameter', got) + self.assertIn("StructQueryParameter", got) self.assertIn("'field1', 'STRING'", got) self.assertIn("'field1': 'hello'", got) class Test_QueryResults(unittest.TestCase): - PROJECT = 'project' - JOB_ID = 'test-synchronous-query' - TOKEN = 'TOKEN' + PROJECT = "project" + JOB_ID = "test-synchronous-query" + TOKEN = "TOKEN" @staticmethod def _get_target_class(): @@ -1002,27 +844,21 @@ def _make_one(self, *args, **kw): return self._get_target_class()(*args, **kw) def _make_resource(self): - return { - 'jobReference': { - 'projectId': self.PROJECT, - 'jobId': self.JOB_ID, - }, - } + return {"jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID}} def _verifySchema(self, query, resource): from google.cloud.bigquery.schema import SchemaField - if 'schema' in resource: - fields = resource['schema']['fields'] + if "schema" in resource: + fields = resource["schema"]["fields"] self.assertEqual(len(query.schema), len(fields)) for found, expected in zip(query.schema, fields): self.assertIsInstance(found, SchemaField) - self.assertEqual(found.name, expected['name']) - self.assertEqual(found.field_type, expected['type']) - self.assertEqual(found.mode, expected['mode']) - self.assertEqual(found.description, - expected.get('description')) - self.assertEqual(found.fields, expected.get('fields', ())) + self.assertEqual(found.name, expected["name"]) + self.assertEqual(found.field_type, expected["type"]) + self.assertEqual(found.mode, expected["mode"]) + self.assertEqual(found.description, expected.get("description")) + self.assertEqual(found.fields, expected.get("fields", ())) else: self.assertEqual(query.schema, ()) @@ -1044,7 +880,7 @@ def test_cache_hit_missing(self): def test_cache_hit_present(self): resource = self._make_resource() - resource['cacheHit'] = True + resource["cacheHit"] = True query = self._make_one(resource) self.assertTrue(query.cache_hit) @@ -1054,7 +890,7 @@ def test_complete_missing(self): def test_complete_present(self): resource = self._make_resource() - resource['jobComplete'] = True + resource["jobComplete"] = True query = self._make_one(resource) self.assertTrue(query.complete) @@ -1063,11 +899,9 @@ def test_errors_missing(self): self.assertIsNone(query.errors) def test_errors_present(self): - ERRORS = [ - {'reason': 'testing'}, - ] + ERRORS = [{"reason": "testing"}] resource = self._make_resource() - resource['errors'] = ERRORS + resource["errors"] = ERRORS query = self._make_one(resource) self.assertEqual(query.errors, ERRORS) @@ -1076,15 +910,15 @@ def test_job_id_missing(self): self._make_one({}) def test_job_id_broken_job_reference(self): - resource = {'jobReference': {'bogus': 'BOGUS'}} + resource = {"jobReference": {"bogus": "BOGUS"}} with self.assertRaises(ValueError): self._make_one(resource) def test_job_id_present(self): resource = self._make_resource() - resource['jobReference']['jobId'] = 'custom-job' + resource["jobReference"]["jobId"] = "custom-job" query = self._make_one(resource) - self.assertEqual(query.job_id, 'custom-job') + self.assertEqual(query.job_id, "custom-job") def test_page_token_missing(self): query = self._make_one(self._make_resource()) @@ -1092,19 +926,19 @@ def test_page_token_missing(self): def test_page_token_present(self): resource = self._make_resource() - resource['pageToken'] = 'TOKEN' + resource["pageToken"] = "TOKEN" query = self._make_one(resource) - self.assertEqual(query.page_token, 'TOKEN') + self.assertEqual(query.page_token, "TOKEN") def test_total_rows_present_integer(self): resource = self._make_resource() - resource['totalRows'] = 42 + resource["totalRows"] = 42 query = self._make_one(resource) self.assertEqual(query.total_rows, 42) def test_total_rows_present_string(self): resource = self._make_resource() - resource['totalRows'] = '42' + resource["totalRows"] = "42" query = self._make_one(resource) self.assertEqual(query.total_rows, 42) @@ -1114,13 +948,13 @@ def test_total_bytes_processed_missing(self): def test_total_bytes_processed_present_integer(self): resource = self._make_resource() - resource['totalBytesProcessed'] = 123456 + resource["totalBytesProcessed"] = 123456 query = self._make_one(resource) self.assertEqual(query.total_bytes_processed, 123456) def test_total_bytes_processed_present_string(self): resource = self._make_resource() - resource['totalBytesProcessed'] = '123456' + resource["totalBytesProcessed"] = "123456" query = self._make_one(resource) self.assertEqual(query.total_bytes_processed, 123456) @@ -1130,13 +964,13 @@ def test_num_dml_affected_rows_missing(self): def test_num_dml_affected_rows_present_integer(self): resource = self._make_resource() - resource['numDmlAffectedRows'] = 123456 + resource["numDmlAffectedRows"] = 123456 query = self._make_one(resource) self.assertEqual(query.num_dml_affected_rows, 123456) def test_num_dml_affected_rows_present_string(self): resource = self._make_resource() - resource['numDmlAffectedRows'] = '123456' + resource["numDmlAffectedRows"] = "123456" query = self._make_one(resource) self.assertEqual(query.num_dml_affected_rows, 123456) @@ -1144,18 +978,17 @@ def test_schema(self): query = self._make_one(self._make_resource()) self._verifySchema(query, self._make_resource()) resource = self._make_resource() - resource['schema'] = { - 'fields': [ - {'name': 'full_name', 'type': 'STRING', 'mode': 'REQURED'}, - {'name': 'age', 'type': 'INTEGER', 'mode': 'REQURED'}, - ], + resource["schema"] = { + "fields": [ + {"name": "full_name", "type": "STRING", "mode": "REQURED"}, + {"name": "age", "type": "INTEGER", "mode": "REQURED"}, + ] } query._set_properties(resource) self._verifySchema(query, resource) class Test__query_param_from_api_repr(unittest.TestCase): - @staticmethod def _call_fut(resource): from google.cloud.bigquery.query import _query_param_from_api_repr @@ -1166,16 +999,16 @@ def test_w_scalar(self): from google.cloud.bigquery.query import ScalarQueryParameter RESOURCE = { - 'name': 'foo', - 'parameterType': {'type': 'INT64'}, - 'parameterValue': {'value': '123'}, + "name": "foo", + "parameterType": {"type": "INT64"}, + "parameterValue": {"value": "123"}, } parameter = self._call_fut(RESOURCE) self.assertIsInstance(parameter, ScalarQueryParameter) - self.assertEqual(parameter.name, 'foo') - self.assertEqual(parameter.type_, 'INT64') + self.assertEqual(parameter.name, "foo") + self.assertEqual(parameter.type_, "INT64") self.assertEqual(parameter.value, 123) def test_w_scalar_timestamp(self): @@ -1183,85 +1016,75 @@ def test_w_scalar_timestamp(self): from google.cloud.bigquery.query import ScalarQueryParameter RESOURCE = { - 'name': 'zoned', - 'parameterType': {'type': 'TIMESTAMP'}, - 'parameterValue': {'value': '2012-03-04 05:06:07+00:00'}, + "name": "zoned", + "parameterType": {"type": "TIMESTAMP"}, + "parameterValue": {"value": "2012-03-04 05:06:07+00:00"}, } parameter = self._call_fut(RESOURCE) self.assertIsInstance(parameter, ScalarQueryParameter) - self.assertEqual(parameter.name, 'zoned') - self.assertEqual(parameter.type_, 'TIMESTAMP') + self.assertEqual(parameter.name, "zoned") + self.assertEqual(parameter.type_, "TIMESTAMP") self.assertEqual( - parameter.value, - datetime.datetime(2012, 3, 4, 5, 6, 7, tzinfo=UTC)) + parameter.value, datetime.datetime(2012, 3, 4, 5, 6, 7, tzinfo=UTC) + ) def test_w_scalar_timestamp_micros(self): from google.cloud._helpers import UTC from google.cloud.bigquery.query import ScalarQueryParameter RESOURCE = { - 'name': 'zoned', - 'parameterType': {'type': 'TIMESTAMP'}, - 'parameterValue': {'value': '2012-03-04 05:06:07.250000+00:00'}, + "name": "zoned", + "parameterType": {"type": "TIMESTAMP"}, + "parameterValue": {"value": "2012-03-04 05:06:07.250000+00:00"}, } parameter = self._call_fut(RESOURCE) self.assertIsInstance(parameter, ScalarQueryParameter) - self.assertEqual(parameter.name, 'zoned') - self.assertEqual(parameter.type_, 'TIMESTAMP') + self.assertEqual(parameter.name, "zoned") + self.assertEqual(parameter.type_, "TIMESTAMP") self.assertEqual( - parameter.value, - datetime.datetime(2012, 3, 4, 5, 6, 7, 250000, tzinfo=UTC)) + parameter.value, datetime.datetime(2012, 3, 4, 5, 6, 7, 250000, tzinfo=UTC) + ) def test_w_array(self): from google.cloud.bigquery.query import ArrayQueryParameter RESOURCE = { - 'name': 'foo', - 'parameterType': { - 'type': 'ARRAY', - 'arrayType': {'type': 'INT64'}, - }, - 'parameterValue': { - 'arrayValues': [ - {'value': '123'}, - ]}, + "name": "foo", + "parameterType": {"type": "ARRAY", "arrayType": {"type": "INT64"}}, + "parameterValue": {"arrayValues": [{"value": "123"}]}, } parameter = self._call_fut(RESOURCE) self.assertIsInstance(parameter, ArrayQueryParameter) - self.assertEqual(parameter.name, 'foo') - self.assertEqual(parameter.array_type, 'INT64') + self.assertEqual(parameter.name, "foo") + self.assertEqual(parameter.array_type, "INT64") self.assertEqual(parameter.values, [123]) def test_w_struct(self): from google.cloud.bigquery.query import StructQueryParameter RESOURCE = { - 'name': 'foo', - 'parameterType': { - 'type': 'STRUCT', - 'structTypes': [ - {'name': 'foo', 'type': {'type': 'STRING'}}, - {'name': 'bar', 'type': {'type': 'INT64'}}, + "name": "foo", + "parameterType": { + "type": "STRUCT", + "structTypes": [ + {"name": "foo", "type": {"type": "STRING"}}, + {"name": "bar", "type": {"type": "INT64"}}, ], }, - 'parameterValue': { - 'structValues': { - 'foo': {'value': 'Foo'}, - 'bar': {'value': '123'}, - } + "parameterValue": { + "structValues": {"foo": {"value": "Foo"}, "bar": {"value": "123"}} }, } parameter = self._call_fut(RESOURCE) self.assertIsInstance(parameter, StructQueryParameter) - self.assertEqual(parameter.name, 'foo') - self.assertEqual( - parameter.struct_types, {'foo': 'STRING', 'bar': 'INT64'}) - self.assertEqual(parameter.struct_values, {'foo': 'Foo', 'bar': 123}) + self.assertEqual(parameter.name, "foo") + self.assertEqual(parameter.struct_types, {"foo": "STRING", "bar": "INT64"}) + self.assertEqual(parameter.struct_values, {"foo": "Foo", "bar": 123}) diff --git a/packages/google-cloud-bigquery/tests/unit/test_retry.py b/packages/google-cloud-bigquery/tests/unit/test_retry.py index 2b9f77cb4162..d9f867cb30f7 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_retry.py +++ b/packages/google-cloud-bigquery/tests/unit/test_retry.py @@ -1,4 +1,3 @@ - # Copyright 2018 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -19,7 +18,6 @@ class Test_should_retry(unittest.TestCase): - def _call_fut(self, exc): from google.cloud.bigquery.retry import _should_retry @@ -29,48 +27,43 @@ def test_wo_errors_attribute(self): self.assertFalse(self._call_fut(object())) def test_w_empty_errors(self): - exc = mock.Mock(errors=[], spec=['errors']) + exc = mock.Mock(errors=[], spec=["errors"]) self.assertFalse(self._call_fut(exc)) def test_w_non_matching_reason(self): - exc = mock.Mock( - errors=[{'reason': 'bogus'}], spec=['errors']) + exc = mock.Mock(errors=[{"reason": "bogus"}], spec=["errors"]) self.assertFalse(self._call_fut(exc)) def test_w_backendError(self): - exc = mock.Mock( - errors=[{'reason': 'backendError'}], spec=['errors']) + exc = mock.Mock(errors=[{"reason": "backendError"}], spec=["errors"]) self.assertTrue(self._call_fut(exc)) def test_w_rateLimitExceeded(self): - exc = mock.Mock( - errors=[{'reason': 'rateLimitExceeded'}], spec=['errors']) + exc = mock.Mock(errors=[{"reason": "rateLimitExceeded"}], spec=["errors"]) self.assertTrue(self._call_fut(exc)) def test_w_unstructured_too_many_requests(self): from google.api_core.exceptions import TooManyRequests - exc = TooManyRequests('testing') + exc = TooManyRequests("testing") self.assertTrue(self._call_fut(exc)) def test_w_internalError(self): - exc = mock.Mock( - errors=[{'reason': 'internalError'}], spec=['errors']) + exc = mock.Mock(errors=[{"reason": "internalError"}], spec=["errors"]) self.assertTrue(self._call_fut(exc)) def test_w_unstructured_internal_server_error(self): from google.api_core.exceptions import InternalServerError - exc = InternalServerError('testing') + exc = InternalServerError("testing") self.assertTrue(self._call_fut(exc)) def test_w_badGateway(self): - exc = mock.Mock( - errors=[{'reason': 'badGateway'}], spec=['errors']) + exc = mock.Mock(errors=[{"reason": "badGateway"}], spec=["errors"]) self.assertTrue(self._call_fut(exc)) def test_w_unstructured_bad_gateway(self): from google.api_core.exceptions import BadGateway - exc = BadGateway('testing') + exc = BadGateway("testing") self.assertTrue(self._call_fut(exc)) diff --git a/packages/google-cloud-bigquery/tests/unit/test_schema.py b/packages/google-cloud-bigquery/tests/unit/test_schema.py index 6be6abeb56dc..4694aaf63cd8 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_schema.py +++ b/packages/google-cloud-bigquery/tests/unit/test_schema.py @@ -18,7 +18,6 @@ class TestSchemaField(unittest.TestCase): - @staticmethod def _get_target_class(): from google.cloud.bigquery.schema import SchemaField @@ -29,241 +28,234 @@ def _make_one(self, *args, **kw): return self._get_target_class()(*args, **kw) def test_constructor_defaults(self): - field = self._make_one('test', 'STRING') - self.assertEqual(field._name, 'test') - self.assertEqual(field._field_type, 'STRING') - self.assertEqual(field._mode, 'NULLABLE') + field = self._make_one("test", "STRING") + self.assertEqual(field._name, "test") + self.assertEqual(field._field_type, "STRING") + self.assertEqual(field._mode, "NULLABLE") self.assertIsNone(field._description) self.assertEqual(field._fields, ()) def test_constructor_explicit(self): - field = self._make_one('test', 'STRING', mode='REQUIRED', - description='Testing') - self.assertEqual(field._name, 'test') - self.assertEqual(field._field_type, 'STRING') - self.assertEqual(field._mode, 'REQUIRED') - self.assertEqual(field._description, 'Testing') + field = self._make_one("test", "STRING", mode="REQUIRED", description="Testing") + self.assertEqual(field._name, "test") + self.assertEqual(field._field_type, "STRING") + self.assertEqual(field._mode, "REQUIRED") + self.assertEqual(field._description, "Testing") self.assertEqual(field._fields, ()) def test_constructor_subfields(self): - sub_field1 = self._make_one('area_code', 'STRING') - sub_field2 = self._make_one('local_number', 'STRING') + sub_field1 = self._make_one("area_code", "STRING") + sub_field2 = self._make_one("local_number", "STRING") field = self._make_one( - 'phone_number', - 'RECORD', - fields=[sub_field1, sub_field2], + "phone_number", "RECORD", fields=[sub_field1, sub_field2] ) - self.assertEqual(field._name, 'phone_number') - self.assertEqual(field._field_type, 'RECORD') - self.assertEqual(field._mode, 'NULLABLE') + self.assertEqual(field._name, "phone_number") + self.assertEqual(field._field_type, "RECORD") + self.assertEqual(field._mode, "NULLABLE") self.assertIsNone(field._description) self.assertEqual(len(field._fields), 2) self.assertIs(field._fields[0], sub_field1) self.assertIs(field._fields[1], sub_field2) def test_to_api_repr(self): - field = self._make_one('foo', 'INTEGER', 'NULLABLE') - self.assertEqual(field.to_api_repr(), { - 'mode': 'NULLABLE', - 'name': 'foo', - 'type': 'INTEGER', - 'description': None, - }) + field = self._make_one("foo", "INTEGER", "NULLABLE") + self.assertEqual( + field.to_api_repr(), + {"mode": "NULLABLE", "name": "foo", "type": "INTEGER", "description": None}, + ) def test_to_api_repr_with_subfield(self): - subfield = self._make_one('bar', 'INTEGER', 'NULLABLE') - field = self._make_one('foo', 'RECORD', 'REQUIRED', fields=(subfield,)) - self.assertEqual(field.to_api_repr(), { - 'fields': [{ - 'mode': 'NULLABLE', - 'name': 'bar', - 'type': 'INTEGER', - 'description': None, - }], - 'mode': 'REQUIRED', - 'name': 'foo', - 'type': 'RECORD', - 'description': None, - }) + subfield = self._make_one("bar", "INTEGER", "NULLABLE") + field = self._make_one("foo", "RECORD", "REQUIRED", fields=(subfield,)) + self.assertEqual( + field.to_api_repr(), + { + "fields": [ + { + "mode": "NULLABLE", + "name": "bar", + "type": "INTEGER", + "description": None, + } + ], + "mode": "REQUIRED", + "name": "foo", + "type": "RECORD", + "description": None, + }, + ) def test_from_api_repr(self): - field = self._get_target_class().from_api_repr({ - 'fields': [{ - 'mode': 'nullable', - 'name': 'bar', - 'type': 'integer', - }], - 'mode': 'required', - 'description': 'test_description', - 'name': 'foo', - 'type': 'record', - }) - self.assertEqual(field.name, 'foo') - self.assertEqual(field.field_type, 'RECORD') - self.assertEqual(field.mode, 'REQUIRED') - self.assertEqual(field.description, 'test_description') + field = self._get_target_class().from_api_repr( + { + "fields": [{"mode": "nullable", "name": "bar", "type": "integer"}], + "mode": "required", + "description": "test_description", + "name": "foo", + "type": "record", + } + ) + self.assertEqual(field.name, "foo") + self.assertEqual(field.field_type, "RECORD") + self.assertEqual(field.mode, "REQUIRED") + self.assertEqual(field.description, "test_description") self.assertEqual(len(field.fields), 1) - self.assertEqual(field.fields[0].name, 'bar') - self.assertEqual(field.fields[0].field_type, 'INTEGER') - self.assertEqual(field.fields[0].mode, 'NULLABLE') + self.assertEqual(field.fields[0].name, "bar") + self.assertEqual(field.fields[0].field_type, "INTEGER") + self.assertEqual(field.fields[0].mode, "NULLABLE") def test_from_api_repr_defaults(self): - field = self._get_target_class().from_api_repr({ - 'name': 'foo', - 'type': 'record', - }) - self.assertEqual(field.name, 'foo') - self.assertEqual(field.field_type, 'RECORD') - self.assertEqual(field.mode, 'NULLABLE') + field = self._get_target_class().from_api_repr( + {"name": "foo", "type": "record"} + ) + self.assertEqual(field.name, "foo") + self.assertEqual(field.field_type, "RECORD") + self.assertEqual(field.mode, "NULLABLE") self.assertEqual(field.description, None) self.assertEqual(len(field.fields), 0) def test_name_property(self): - name = 'lemon-ness' - schema_field = self._make_one(name, 'INTEGER') + name = "lemon-ness" + schema_field = self._make_one(name, "INTEGER") self.assertIs(schema_field.name, name) def test_field_type_property(self): - field_type = 'BOOLEAN' - schema_field = self._make_one('whether', field_type) + field_type = "BOOLEAN" + schema_field = self._make_one("whether", field_type) self.assertIs(schema_field.field_type, field_type) def test_mode_property(self): - mode = 'REPEATED' - schema_field = self._make_one('again', 'FLOAT', mode=mode) + mode = "REPEATED" + schema_field = self._make_one("again", "FLOAT", mode=mode) self.assertIs(schema_field.mode, mode) def test_is_nullable(self): - mode = 'NULLABLE' - schema_field = self._make_one('test', 'FLOAT', mode=mode) + mode = "NULLABLE" + schema_field = self._make_one("test", "FLOAT", mode=mode) self.assertTrue(schema_field.is_nullable) def test_is_not_nullable(self): - mode = 'REPEATED' - schema_field = self._make_one('test', 'FLOAT', mode=mode) + mode = "REPEATED" + schema_field = self._make_one("test", "FLOAT", mode=mode) self.assertFalse(schema_field.is_nullable) def test_description_property(self): - description = 'It holds some data.' - schema_field = self._make_one( - 'do', 'TIMESTAMP', description=description) + description = "It holds some data." + schema_field = self._make_one("do", "TIMESTAMP", description=description) self.assertIs(schema_field.description, description) def test_fields_property(self): - sub_field1 = self._make_one('one', 'STRING') - sub_field2 = self._make_one('fish', 'INTEGER') + sub_field1 = self._make_one("one", "STRING") + sub_field2 = self._make_one("fish", "INTEGER") fields = (sub_field1, sub_field2) - schema_field = self._make_one('boat', 'RECORD', fields=fields) + schema_field = self._make_one("boat", "RECORD", fields=fields) self.assertIs(schema_field.fields, fields) def test___eq___wrong_type(self): - field = self._make_one('test', 'STRING') + field = self._make_one("test", "STRING") other = object() self.assertNotEqual(field, other) self.assertEqual(field, mock.ANY) def test___eq___name_mismatch(self): - field = self._make_one('test', 'STRING') - other = self._make_one('other', 'STRING') + field = self._make_one("test", "STRING") + other = self._make_one("other", "STRING") self.assertNotEqual(field, other) def test___eq___field_type_mismatch(self): - field = self._make_one('test', 'STRING') - other = self._make_one('test', 'INTEGER') + field = self._make_one("test", "STRING") + other = self._make_one("test", "INTEGER") self.assertNotEqual(field, other) def test___eq___mode_mismatch(self): - field = self._make_one('test', 'STRING', mode='REQUIRED') - other = self._make_one('test', 'STRING', mode='NULLABLE') + field = self._make_one("test", "STRING", mode="REQUIRED") + other = self._make_one("test", "STRING", mode="NULLABLE") self.assertNotEqual(field, other) def test___eq___description_mismatch(self): - field = self._make_one('test', 'STRING', description='Testing') - other = self._make_one('test', 'STRING', description='Other') + field = self._make_one("test", "STRING", description="Testing") + other = self._make_one("test", "STRING", description="Other") self.assertNotEqual(field, other) def test___eq___fields_mismatch(self): - sub1 = self._make_one('sub1', 'STRING') - sub2 = self._make_one('sub2', 'STRING') - field = self._make_one('test', 'RECORD', fields=[sub1]) - other = self._make_one('test', 'RECORD', fields=[sub2]) + sub1 = self._make_one("sub1", "STRING") + sub2 = self._make_one("sub2", "STRING") + field = self._make_one("test", "RECORD", fields=[sub1]) + other = self._make_one("test", "RECORD", fields=[sub2]) self.assertNotEqual(field, other) def test___eq___hit(self): - field = self._make_one('test', 'STRING', mode='REQUIRED', - description='Testing') - other = self._make_one('test', 'STRING', mode='REQUIRED', - description='Testing') + field = self._make_one("test", "STRING", mode="REQUIRED", description="Testing") + other = self._make_one("test", "STRING", mode="REQUIRED", description="Testing") self.assertEqual(field, other) def test___eq___hit_case_diff_on_type(self): - field = self._make_one('test', 'STRING', mode='REQUIRED', - description='Testing') - other = self._make_one('test', 'string', mode='REQUIRED', - description='Testing') + field = self._make_one("test", "STRING", mode="REQUIRED", description="Testing") + other = self._make_one("test", "string", mode="REQUIRED", description="Testing") self.assertEqual(field, other) def test___eq___hit_w_fields(self): - sub1 = self._make_one('sub1', 'STRING') - sub2 = self._make_one('sub2', 'STRING') - field = self._make_one('test', 'RECORD', fields=[sub1, sub2]) - other = self._make_one('test', 'RECORD', fields=[sub1, sub2]) + sub1 = self._make_one("sub1", "STRING") + sub2 = self._make_one("sub2", "STRING") + field = self._make_one("test", "RECORD", fields=[sub1, sub2]) + other = self._make_one("test", "RECORD", fields=[sub1, sub2]) self.assertEqual(field, other) def test___ne___wrong_type(self): - field = self._make_one('toast', 'INTEGER') + field = self._make_one("toast", "INTEGER") other = object() self.assertNotEqual(field, other) self.assertEqual(field, mock.ANY) def test___ne___same_value(self): - field1 = self._make_one('test', 'TIMESTAMP', mode='REPEATED') - field2 = self._make_one('test', 'TIMESTAMP', mode='REPEATED') + field1 = self._make_one("test", "TIMESTAMP", mode="REPEATED") + field2 = self._make_one("test", "TIMESTAMP", mode="REPEATED") # unittest ``assertEqual`` uses ``==`` not ``!=``. - comparison_val = (field1 != field2) + comparison_val = field1 != field2 self.assertFalse(comparison_val) def test___ne___different_values(self): field1 = self._make_one( - 'test1', 'FLOAT', mode='REPEATED', description='Not same') + "test1", "FLOAT", mode="REPEATED", description="Not same" + ) field2 = self._make_one( - 'test2', 'FLOAT', mode='NULLABLE', description='Knot saym') + "test2", "FLOAT", mode="NULLABLE", description="Knot saym" + ) self.assertNotEqual(field1, field2) def test___hash__set_equality(self): - sub1 = self._make_one('sub1', 'STRING') - sub2 = self._make_one('sub2', 'STRING') - field1 = self._make_one('test', 'RECORD', fields=[sub1]) - field2 = self._make_one('test', 'RECORD', fields=[sub2]) + sub1 = self._make_one("sub1", "STRING") + sub2 = self._make_one("sub2", "STRING") + field1 = self._make_one("test", "RECORD", fields=[sub1]) + field2 = self._make_one("test", "RECORD", fields=[sub2]) set_one = {field1, field2} set_two = {field1, field2} self.assertEqual(set_one, set_two) def test___hash__not_equals(self): - sub1 = self._make_one('sub1', 'STRING') - sub2 = self._make_one('sub2', 'STRING') - field1 = self._make_one('test', 'RECORD', fields=[sub1]) - field2 = self._make_one('test', 'RECORD', fields=[sub2]) + sub1 = self._make_one("sub1", "STRING") + sub2 = self._make_one("sub2", "STRING") + field1 = self._make_one("test", "RECORD", fields=[sub1]) + field2 = self._make_one("test", "RECORD", fields=[sub2]) set_one = {field1} set_two = {field2} self.assertNotEqual(set_one, set_two) def test___repr__(self): - field1 = self._make_one('field1', 'STRING') + field1 = self._make_one("field1", "STRING") expected = "SchemaField('field1', 'STRING', 'NULLABLE', None, ())" self.assertEqual(repr(field1), expected) # TODO: dedup with the same class in test_table.py. class _SchemaBase(object): - def _verify_field(self, field, r_field): - self.assertEqual(field.name, r_field['name']) - self.assertEqual(field.field_type, r_field['type']) - self.assertEqual(field.mode, r_field.get('mode', 'NULLABLE')) + self.assertEqual(field.name, r_field["name"]) + self.assertEqual(field.field_type, r_field["type"]) + self.assertEqual(field.mode, r_field.get("mode", "NULLABLE")) def _verifySchema(self, schema, resource): - r_fields = resource['schema']['fields'] + r_fields = resource["schema"]["fields"] self.assertEqual(len(schema), len(r_fields)) for field, r_field in zip(schema, r_fields): @@ -271,7 +263,6 @@ def _verifySchema(self, schema, resource): class Test_parse_schema_resource(unittest.TestCase, _SchemaBase): - def _call_fut(self, resource): from google.cloud.bigquery.schema import _parse_schema_resource @@ -279,44 +270,44 @@ def _call_fut(self, resource): def _make_resource(self): return { - 'schema': {'fields': [ - {'name': 'full_name', 'type': 'STRING', 'mode': 'REQUIRED'}, - {'name': 'age', 'type': 'INTEGER', 'mode': 'REQUIRED'}, - ]}, + "schema": { + "fields": [ + {"name": "full_name", "type": "STRING", "mode": "REQUIRED"}, + {"name": "age", "type": "INTEGER", "mode": "REQUIRED"}, + ] + } } def test__parse_schema_resource_defaults(self): RESOURCE = self._make_resource() - schema = self._call_fut(RESOURCE['schema']) + schema = self._call_fut(RESOURCE["schema"]) self._verifySchema(schema, RESOURCE) def test__parse_schema_resource_subfields(self): RESOURCE = self._make_resource() - RESOURCE['schema']['fields'].append( - {'name': 'phone', - 'type': 'RECORD', - 'mode': 'REPEATED', - 'fields': [{'name': 'type', - 'type': 'STRING', - 'mode': 'REQUIRED'}, - {'name': 'number', - 'type': 'STRING', - 'mode': 'REQUIRED'}]}) - schema = self._call_fut(RESOURCE['schema']) + RESOURCE["schema"]["fields"].append( + { + "name": "phone", + "type": "RECORD", + "mode": "REPEATED", + "fields": [ + {"name": "type", "type": "STRING", "mode": "REQUIRED"}, + {"name": "number", "type": "STRING", "mode": "REQUIRED"}, + ], + } + ) + schema = self._call_fut(RESOURCE["schema"]) self._verifySchema(schema, RESOURCE) def test__parse_schema_resource_fields_without_mode(self): RESOURCE = self._make_resource() - RESOURCE['schema']['fields'].append( - {'name': 'phone', - 'type': 'STRING'}) + RESOURCE["schema"]["fields"].append({"name": "phone", "type": "STRING"}) - schema = self._call_fut(RESOURCE['schema']) + schema = self._call_fut(RESOURCE["schema"]) self._verifySchema(schema, RESOURCE) class Test_build_schema_resource(unittest.TestCase, _SchemaBase): - def _call_fut(self, resource): from google.cloud.bigquery.schema import _build_schema_resource @@ -325,66 +316,88 @@ def _call_fut(self, resource): def test_defaults(self): from google.cloud.bigquery.schema import SchemaField - full_name = SchemaField('full_name', 'STRING', mode='REQUIRED') - age = SchemaField('age', 'INTEGER', mode='REQUIRED') + full_name = SchemaField("full_name", "STRING", mode="REQUIRED") + age = SchemaField("age", "INTEGER", mode="REQUIRED") resource = self._call_fut([full_name, age]) self.assertEqual(len(resource), 2) - self.assertEqual(resource[0], - {'name': 'full_name', - 'type': 'STRING', - 'mode': 'REQUIRED', - 'description': None}) - self.assertEqual(resource[1], - {'name': 'age', - 'type': 'INTEGER', - 'mode': 'REQUIRED', - 'description': None}) + self.assertEqual( + resource[0], + { + "name": "full_name", + "type": "STRING", + "mode": "REQUIRED", + "description": None, + }, + ) + self.assertEqual( + resource[1], + {"name": "age", "type": "INTEGER", "mode": "REQUIRED", "description": None}, + ) def test_w_description(self): from google.cloud.bigquery.schema import SchemaField - DESCRIPTION = 'DESCRIPTION' - full_name = SchemaField('full_name', 'STRING', mode='REQUIRED', - description=DESCRIPTION) - age = SchemaField('age', 'INTEGER', mode='REQUIRED') + DESCRIPTION = "DESCRIPTION" + full_name = SchemaField( + "full_name", "STRING", mode="REQUIRED", description=DESCRIPTION + ) + age = SchemaField("age", "INTEGER", mode="REQUIRED") resource = self._call_fut([full_name, age]) self.assertEqual(len(resource), 2) - self.assertEqual(resource[0], - {'name': 'full_name', - 'type': 'STRING', - 'mode': 'REQUIRED', - 'description': DESCRIPTION}) - self.assertEqual(resource[1], - {'name': 'age', - 'type': 'INTEGER', - 'mode': 'REQUIRED', - 'description': None}) + self.assertEqual( + resource[0], + { + "name": "full_name", + "type": "STRING", + "mode": "REQUIRED", + "description": DESCRIPTION, + }, + ) + self.assertEqual( + resource[1], + {"name": "age", "type": "INTEGER", "mode": "REQUIRED", "description": None}, + ) def test_w_subfields(self): from google.cloud.bigquery.schema import SchemaField - full_name = SchemaField('full_name', 'STRING', mode='REQUIRED') - ph_type = SchemaField('type', 'STRING', 'REQUIRED') - ph_num = SchemaField('number', 'STRING', 'REQUIRED') - phone = SchemaField('phone', 'RECORD', mode='REPEATED', - fields=[ph_type, ph_num]) + full_name = SchemaField("full_name", "STRING", mode="REQUIRED") + ph_type = SchemaField("type", "STRING", "REQUIRED") + ph_num = SchemaField("number", "STRING", "REQUIRED") + phone = SchemaField( + "phone", "RECORD", mode="REPEATED", fields=[ph_type, ph_num] + ) resource = self._call_fut([full_name, phone]) self.assertEqual(len(resource), 2) - self.assertEqual(resource[0], - {'name': 'full_name', - 'type': 'STRING', - 'mode': 'REQUIRED', - 'description': None}) - self.assertEqual(resource[1], - {'name': 'phone', - 'type': 'RECORD', - 'mode': 'REPEATED', - 'description': None, - 'fields': [{'name': 'type', - 'type': 'STRING', - 'mode': 'REQUIRED', - 'description': None}, - {'name': 'number', - 'type': 'STRING', - 'mode': 'REQUIRED', - 'description': None}]}) + self.assertEqual( + resource[0], + { + "name": "full_name", + "type": "STRING", + "mode": "REQUIRED", + "description": None, + }, + ) + self.assertEqual( + resource[1], + { + "name": "phone", + "type": "RECORD", + "mode": "REPEATED", + "description": None, + "fields": [ + { + "name": "type", + "type": "STRING", + "mode": "REQUIRED", + "description": None, + }, + { + "name": "number", + "type": "STRING", + "mode": "REQUIRED", + "description": None, + }, + ], + }, + ) diff --git a/packages/google-cloud-bigquery/tests/unit/test_table.py b/packages/google-cloud-bigquery/tests/unit/test_table.py index 5795a3c92e39..04a67c603ba9 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_table.py +++ b/packages/google-cloud-bigquery/tests/unit/test_table.py @@ -16,6 +16,7 @@ import mock import six + try: import pandas except (ImportError, AttributeError): # pragma: NO COVER @@ -25,14 +26,13 @@ class _SchemaBase(object): - def _verify_field(self, field, r_field): - self.assertEqual(field.name, r_field['name']) - self.assertEqual(field.field_type, r_field['type']) - self.assertEqual(field.mode, r_field.get('mode', 'NULLABLE')) + self.assertEqual(field.name, r_field["name"]) + self.assertEqual(field.field_type, r_field["type"]) + self.assertEqual(field.mode, r_field.get("mode", "NULLABLE")) def _verifySchema(self, schema, resource): - r_fields = resource['schema']['fields'] + r_fields = resource["schema"]["fields"] self.assertEqual(len(schema), len(r_fields)) for field, r_field in zip(schema, r_fields): @@ -40,7 +40,7 @@ def _verifySchema(self, schema, resource): class TestEncryptionConfiguration(unittest.TestCase): - KMS_KEY_NAME = 'projects/1/locations/global/keyRings/1/cryptoKeys/1' + KMS_KEY_NAME = "projects/1/locations/global/keyRings/1/cryptoKeys/1" @staticmethod def _get_target_class(): @@ -68,9 +68,7 @@ def test_kms_key_name_setter(self): self.assertIsNone(encryption_config.kms_key_name) def test_from_api_repr(self): - RESOURCE = { - 'kmsKeyName': self.KMS_KEY_NAME, - } + RESOURCE = {"kmsKeyName": self.KMS_KEY_NAME} klass = self._get_target_class() encryption_config = klass.from_api_repr(RESOURCE) self.assertEqual(encryption_config.kms_key_name, self.KMS_KEY_NAME) @@ -78,11 +76,7 @@ def test_from_api_repr(self): def test_to_api_repr(self): encryption_config = self._make_one(kms_key_name=self.KMS_KEY_NAME) resource = encryption_config.to_api_repr() - self.assertEqual( - resource, - { - 'kmsKeyName': self.KMS_KEY_NAME, - }) + self.assertEqual(resource, {"kmsKeyName": self.KMS_KEY_NAME}) def test___eq___wrong_type(self): encryption_config = self._make_one() @@ -110,7 +104,7 @@ def test___ne___same_value(self): encryption_config1 = self._make_one(self.KMS_KEY_NAME) encryption_config2 = self._make_one(self.KMS_KEY_NAME) # unittest ``assertEqual`` uses ``==`` not ``!=``. - comparison_val = (encryption_config1 != encryption_config2) + comparison_val = encryption_config1 != encryption_config2 self.assertFalse(comparison_val) def test___ne___different_values(self): @@ -139,7 +133,6 @@ def test___repr__(self): class TestTableReference(unittest.TestCase): - @staticmethod def _get_target_class(): from google.cloud.bigquery.table import TableReference @@ -151,154 +144,159 @@ def _make_one(self, *args, **kw): def test_ctor_defaults(self): from google.cloud.bigquery.dataset import DatasetReference - dataset_ref = DatasetReference('project_1', 'dataset_1') - table_ref = self._make_one(dataset_ref, 'table_1') + dataset_ref = DatasetReference("project_1", "dataset_1") + + table_ref = self._make_one(dataset_ref, "table_1") self.assertEqual(table_ref.dataset_id, dataset_ref.dataset_id) - self.assertEqual(table_ref.table_id, 'table_1') + self.assertEqual(table_ref.table_id, "table_1") def test_to_api_repr(self): from google.cloud.bigquery.dataset import DatasetReference - dataset_ref = DatasetReference('project_1', 'dataset_1') - table_ref = self._make_one(dataset_ref, 'table_1') + + dataset_ref = DatasetReference("project_1", "dataset_1") + table_ref = self._make_one(dataset_ref, "table_1") resource = table_ref.to_api_repr() self.assertEqual( resource, - { - 'projectId': 'project_1', - 'datasetId': 'dataset_1', - 'tableId': 'table_1', - }) + {"projectId": "project_1", "datasetId": "dataset_1", "tableId": "table_1"}, + ) def test_from_api_repr(self): from google.cloud.bigquery.dataset import DatasetReference from google.cloud.bigquery.table import TableReference - dataset_ref = DatasetReference('project_1', 'dataset_1') - expected = self._make_one(dataset_ref, 'table_1') + + dataset_ref = DatasetReference("project_1", "dataset_1") + expected = self._make_one(dataset_ref, "table_1") got = TableReference.from_api_repr( - { - 'projectId': 'project_1', - 'datasetId': 'dataset_1', - 'tableId': 'table_1', - }) + {"projectId": "project_1", "datasetId": "dataset_1", "tableId": "table_1"} + ) self.assertEqual(expected, got) def test_from_string(self): cls = self._get_target_class() - got = cls.from_string('string-project.string_dataset.string_table') - self.assertEqual(got.project, 'string-project') - self.assertEqual(got.dataset_id, 'string_dataset') - self.assertEqual(got.table_id, 'string_table') + got = cls.from_string("string-project.string_dataset.string_table") + self.assertEqual(got.project, "string-project") + self.assertEqual(got.dataset_id, "string_dataset") + self.assertEqual(got.table_id, "string_table") def test_from_string_legacy_string(self): cls = self._get_target_class() with self.assertRaises(ValueError): - cls.from_string('string-project:string_dataset.string_table') + cls.from_string("string-project:string_dataset.string_table") def test_from_string_not_fully_qualified(self): cls = self._get_target_class() with self.assertRaises(ValueError): - cls.from_string('string_table') + cls.from_string("string_table") with self.assertRaises(ValueError): - cls.from_string('string_dataset.string_table') + cls.from_string("string_dataset.string_table") with self.assertRaises(ValueError): - cls.from_string('a.b.c.d') + cls.from_string("a.b.c.d") def test_from_string_with_default_project(self): cls = self._get_target_class() got = cls.from_string( - 'string_dataset.string_table', default_project='default-project') - self.assertEqual(got.project, 'default-project') - self.assertEqual(got.dataset_id, 'string_dataset') - self.assertEqual(got.table_id, 'string_table') + "string_dataset.string_table", default_project="default-project" + ) + self.assertEqual(got.project, "default-project") + self.assertEqual(got.dataset_id, "string_dataset") + self.assertEqual(got.table_id, "string_table") def test_from_string_ignores_default_project(self): cls = self._get_target_class() got = cls.from_string( - 'string-project.string_dataset.string_table', - default_project='default-project') - self.assertEqual(got.project, 'string-project') - self.assertEqual(got.dataset_id, 'string_dataset') - self.assertEqual(got.table_id, 'string_table') + "string-project.string_dataset.string_table", + default_project="default-project", + ) + self.assertEqual(got.project, "string-project") + self.assertEqual(got.dataset_id, "string_dataset") + self.assertEqual(got.table_id, "string_table") def test___eq___wrong_type(self): from google.cloud.bigquery.dataset import DatasetReference - dataset_ref = DatasetReference('project_1', 'dataset_1') - table = self._make_one(dataset_ref, 'table_1') + + dataset_ref = DatasetReference("project_1", "dataset_1") + table = self._make_one(dataset_ref, "table_1") other = object() self.assertNotEqual(table, other) self.assertEqual(table, mock.ANY) def test___eq___project_mismatch(self): from google.cloud.bigquery.dataset import DatasetReference - dataset = DatasetReference('project_1', 'dataset_1') - other_dataset = DatasetReference('project_2', 'dataset_1') - table = self._make_one(dataset, 'table_1') - other = self._make_one(other_dataset, 'table_1') + + dataset = DatasetReference("project_1", "dataset_1") + other_dataset = DatasetReference("project_2", "dataset_1") + table = self._make_one(dataset, "table_1") + other = self._make_one(other_dataset, "table_1") self.assertNotEqual(table, other) def test___eq___dataset_mismatch(self): from google.cloud.bigquery.dataset import DatasetReference - dataset = DatasetReference('project_1', 'dataset_1') - other_dataset = DatasetReference('project_1', 'dataset_2') - table = self._make_one(dataset, 'table_1') - other = self._make_one(other_dataset, 'table_1') + + dataset = DatasetReference("project_1", "dataset_1") + other_dataset = DatasetReference("project_1", "dataset_2") + table = self._make_one(dataset, "table_1") + other = self._make_one(other_dataset, "table_1") self.assertNotEqual(table, other) def test___eq___table_mismatch(self): from google.cloud.bigquery.dataset import DatasetReference - dataset = DatasetReference('project_1', 'dataset_1') - table = self._make_one(dataset, 'table_1') - other = self._make_one(dataset, 'table_2') + + dataset = DatasetReference("project_1", "dataset_1") + table = self._make_one(dataset, "table_1") + other = self._make_one(dataset, "table_2") self.assertNotEqual(table, other) def test___eq___equality(self): from google.cloud.bigquery.dataset import DatasetReference - dataset = DatasetReference('project_1', 'dataset_1') - table = self._make_one(dataset, 'table_1') - other = self._make_one(dataset, 'table_1') + + dataset = DatasetReference("project_1", "dataset_1") + table = self._make_one(dataset, "table_1") + other = self._make_one(dataset, "table_1") self.assertEqual(table, other) def test___hash__set_equality(self): from google.cloud.bigquery.dataset import DatasetReference - dataset = DatasetReference('project_1', 'dataset_1') - table1 = self._make_one(dataset, 'table1') - table2 = self._make_one(dataset, 'table2') + + dataset = DatasetReference("project_1", "dataset_1") + table1 = self._make_one(dataset, "table1") + table2 = self._make_one(dataset, "table2") set_one = {table1, table2} set_two = {table1, table2} self.assertEqual(set_one, set_two) def test___hash__not_equals(self): from google.cloud.bigquery.dataset import DatasetReference - dataset = DatasetReference('project_1', 'dataset_1') - table1 = self._make_one(dataset, 'table1') - table2 = self._make_one(dataset, 'table2') + + dataset = DatasetReference("project_1", "dataset_1") + table1 = self._make_one(dataset, "table1") + table2 = self._make_one(dataset, "table2") set_one = {table1} set_two = {table2} self.assertNotEqual(set_one, set_two) def test___repr__(self): - dataset = DatasetReference('project1', 'dataset1') - table1 = self._make_one(dataset, 'table1') + dataset = DatasetReference("project1", "dataset1") + table1 = self._make_one(dataset, "table1") expected = ( - "TableReference(DatasetReference('project1', 'dataset1'), " - "'table1')" + "TableReference(DatasetReference('project1', 'dataset1'), " "'table1')" ) self.assertEqual(repr(table1), expected) class TestTable(unittest.TestCase, _SchemaBase): - PROJECT = 'prahj-ekt' - DS_ID = 'dataset-name' - TABLE_NAME = 'table-name' - KMS_KEY_NAME = 'projects/1/locations/global/keyRings/1/cryptoKeys/1' + PROJECT = "prahj-ekt" + DS_ID = "dataset-name" + TABLE_NAME = "table-name" + KMS_KEY_NAME = "projects/1/locations/global/keyRings/1/cryptoKeys/1" @staticmethod def _get_target_class(): @@ -314,12 +312,10 @@ def _setUpConstants(self): from google.cloud._helpers import UTC self.WHEN_TS = 1437767599.006 - self.WHEN = datetime.datetime.utcfromtimestamp(self.WHEN_TS).replace( - tzinfo=UTC) - self.ETAG = 'ETAG' - self.TABLE_FULL_ID = '%s:%s.%s' % ( - self.PROJECT, self.DS_ID, self.TABLE_NAME) - self.RESOURCE_URL = 'http://example.com/path/to/resource' + self.WHEN = datetime.datetime.utcfromtimestamp(self.WHEN_TS).replace(tzinfo=UTC) + self.ETAG = "ETAG" + self.TABLE_FULL_ID = "%s:%s.%s" % (self.PROJECT, self.DS_ID, self.TABLE_NAME) + self.RESOURCE_URL = "http://example.com/path/to/resource" self.NUM_BYTES = 12345 self.NUM_ROWS = 67 self.NUM_EST_BYTES = 1234 @@ -328,115 +324,119 @@ def _setUpConstants(self): def _make_resource(self): self._setUpConstants() return { - 'creationTime': self.WHEN_TS * 1000, - 'tableReference': - {'projectId': self.PROJECT, - 'datasetId': self.DS_ID, - 'tableId': self.TABLE_NAME}, - 'schema': {'fields': [ - {'name': 'full_name', 'type': 'STRING', 'mode': 'REQUIRED'}, - {'name': 'age', 'type': 'INTEGER', 'mode': 'REQUIRED'}]}, - 'etag': 'ETAG', - 'id': self.TABLE_FULL_ID, - 'lastModifiedTime': self.WHEN_TS * 1000, - 'location': 'US', - 'selfLink': self.RESOURCE_URL, - 'numRows': self.NUM_ROWS, - 'numBytes': self.NUM_BYTES, - 'type': 'TABLE', - 'streamingBuffer': { - 'estimatedRows': str(self.NUM_EST_ROWS), - 'estimatedBytes': str(self.NUM_EST_BYTES), - 'oldestEntryTime': self.WHEN_TS * 1000}, - 'externalDataConfiguration': { - 'sourceFormat': 'CSV', - 'csvOptions': { - 'allowJaggedRows': True, - 'encoding': 'encoding'}}, - 'labels': {'x': 'y'}, + "creationTime": self.WHEN_TS * 1000, + "tableReference": { + "projectId": self.PROJECT, + "datasetId": self.DS_ID, + "tableId": self.TABLE_NAME, + }, + "schema": { + "fields": [ + {"name": "full_name", "type": "STRING", "mode": "REQUIRED"}, + {"name": "age", "type": "INTEGER", "mode": "REQUIRED"}, + ] + }, + "etag": "ETAG", + "id": self.TABLE_FULL_ID, + "lastModifiedTime": self.WHEN_TS * 1000, + "location": "US", + "selfLink": self.RESOURCE_URL, + "numRows": self.NUM_ROWS, + "numBytes": self.NUM_BYTES, + "type": "TABLE", + "streamingBuffer": { + "estimatedRows": str(self.NUM_EST_ROWS), + "estimatedBytes": str(self.NUM_EST_BYTES), + "oldestEntryTime": self.WHEN_TS * 1000, + }, + "externalDataConfiguration": { + "sourceFormat": "CSV", + "csvOptions": {"allowJaggedRows": True, "encoding": "encoding"}, + }, + "labels": {"x": "y"}, } def _verifyReadonlyResourceProperties(self, table, resource): - if 'creationTime' in resource: + if "creationTime" in resource: self.assertEqual(table.created, self.WHEN) else: self.assertIsNone(table.created) - if 'etag' in resource: + if "etag" in resource: self.assertEqual(table.etag, self.ETAG) else: self.assertIsNone(table.etag) - if 'numRows' in resource: + if "numRows" in resource: self.assertEqual(table.num_rows, self.NUM_ROWS) else: self.assertIsNone(table.num_rows) - if 'numBytes' in resource: + if "numBytes" in resource: self.assertEqual(table.num_bytes, self.NUM_BYTES) else: self.assertIsNone(table.num_bytes) - if 'selfLink' in resource: + if "selfLink" in resource: self.assertEqual(table.self_link, self.RESOURCE_URL) else: self.assertIsNone(table.self_link) - if 'streamingBuffer' in resource: - self.assertEqual(table.streaming_buffer.estimated_rows, - self.NUM_EST_ROWS) - self.assertEqual(table.streaming_buffer.estimated_bytes, - self.NUM_EST_BYTES) - self.assertEqual(table.streaming_buffer.oldest_entry_time, - self.WHEN) + if "streamingBuffer" in resource: + self.assertEqual(table.streaming_buffer.estimated_rows, self.NUM_EST_ROWS) + self.assertEqual(table.streaming_buffer.estimated_bytes, self.NUM_EST_BYTES) + self.assertEqual(table.streaming_buffer.oldest_entry_time, self.WHEN) else: self.assertIsNone(table.streaming_buffer) self.assertEqual(table.full_table_id, self.TABLE_FULL_ID) - self.assertEqual(table.table_type, - 'TABLE' if 'view' not in resource else 'VIEW') + self.assertEqual( + table.table_type, "TABLE" if "view" not in resource else "VIEW" + ) def _verifyResourceProperties(self, table, resource): self._verifyReadonlyResourceProperties(table, resource) - if 'expirationTime' in resource: + if "expirationTime" in resource: self.assertEqual(table.expires, self.EXP_TIME) else: self.assertIsNone(table.expires) - self.assertEqual(table.description, resource.get('description')) - self.assertEqual(table.friendly_name, resource.get('friendlyName')) - self.assertEqual(table.location, resource.get('location')) + self.assertEqual(table.description, resource.get("description")) + self.assertEqual(table.friendly_name, resource.get("friendlyName")) + self.assertEqual(table.location, resource.get("location")) - if 'view' in resource: - self.assertEqual(table.view_query, resource['view']['query']) + if "view" in resource: + self.assertEqual(table.view_query, resource["view"]["query"]) self.assertEqual( - table.view_use_legacy_sql, - resource['view'].get('useLegacySql', True)) + table.view_use_legacy_sql, resource["view"].get("useLegacySql", True) + ) else: self.assertIsNone(table.view_query) self.assertIsNone(table.view_use_legacy_sql) - if 'schema' in resource: + if "schema" in resource: self._verifySchema(table.schema, resource) else: self.assertEqual(table.schema, []) - if 'externalDataConfiguration' in resource: + if "externalDataConfiguration" in resource: edc = table.external_data_configuration - self.assertEqual(edc.source_format, 'CSV') + self.assertEqual(edc.source_format, "CSV") self.assertEqual(edc.options.allow_jagged_rows, True) - if 'labels' in resource: - self.assertEqual(table.labels, {'x': 'y'}) + if "labels" in resource: + self.assertEqual(table.labels, {"x": "y"}) else: self.assertEqual(table.labels, {}) - if 'encryptionConfiguration' in resource: + if "encryptionConfiguration" in resource: self.assertIsNotNone(table.encryption_configuration) - self.assertEqual(table.encryption_configuration.kms_key_name, - resource['encryptionConfiguration']['kmsKeyName']) + self.assertEqual( + table.encryption_configuration.kms_key_name, + resource["encryptionConfiguration"]["kmsKeyName"], + ) else: self.assertIsNone(table.encryption_configuration) @@ -453,8 +453,9 @@ def test_ctor(self): self.assertEqual(table.reference.dataset_id, self.DS_ID) self.assertEqual( table.path, - '/projects/%s/datasets/%s/tables/%s' % ( - self.PROJECT, self.DS_ID, self.TABLE_NAME)) + "/projects/%s/datasets/%s/tables/%s" + % (self.PROJECT, self.DS_ID, self.TABLE_NAME), + ) self.assertEqual(table.schema, []) self.assertIsNone(table.created) @@ -482,8 +483,8 @@ def test_ctor_w_schema(self): dataset = DatasetReference(self.PROJECT, self.DS_ID) table_ref = dataset.table(self.TABLE_NAME) - full_name = SchemaField('full_name', 'STRING', mode='REQUIRED') - age = SchemaField('age', 'INTEGER', mode='REQUIRED') + full_name = SchemaField("full_name", "STRING", mode="REQUIRED") + age = SchemaField("age", "INTEGER", mode="REQUIRED") table = self._make_one(table_ref, schema=[full_name, age]) self.assertEqual(table.schema, [full_name, age]) @@ -498,17 +499,17 @@ def test_num_bytes_getter(self): num_bytes = 1337 # Check with integer value set. - table._properties = {'numBytes': num_bytes} + table._properties = {"numBytes": num_bytes} self.assertEqual(table.num_bytes, num_bytes) # Check with a string value set. - table._properties = {'numBytes': str(num_bytes)} + table._properties = {"numBytes": str(num_bytes)} self.assertEqual(table.num_bytes, num_bytes) # Check with invalid int value. - table._properties = {'numBytes': 'x'} + table._properties = {"numBytes": "x"} with self.assertRaises(ValueError): - getattr(table, 'num_bytes') + getattr(table, "num_bytes") def test_num_rows_getter(self): dataset = DatasetReference(self.PROJECT, self.DS_ID) @@ -520,17 +521,17 @@ def test_num_rows_getter(self): num_rows = 42 # Check with integer value set. - table._properties = {'numRows': num_rows} + table._properties = {"numRows": num_rows} self.assertEqual(table.num_rows, num_rows) # Check with a string value set. - table._properties = {'numRows': str(num_rows)} + table._properties = {"numRows": str(num_rows)} self.assertEqual(table.num_rows, num_rows) # Check with invalid int value. - table._properties = {'numRows': 'x'} + table._properties = {"numRows": "x"} with self.assertRaises(ValueError): - getattr(table, 'num_rows') + getattr(table, "num_rows") def test_schema_setter_non_list(self): dataset = DatasetReference(self.PROJECT, self.DS_ID) @@ -545,7 +546,7 @@ def test_schema_setter_invalid_field(self): dataset = DatasetReference(self.PROJECT, self.DS_ID) table_ref = dataset.table(self.TABLE_NAME) table = self._make_one(table_ref) - full_name = SchemaField('full_name', 'STRING', mode='REQUIRED') + full_name = SchemaField("full_name", "STRING", mode="REQUIRED") with self.assertRaises(ValueError): table.schema = [full_name, object()] @@ -555,8 +556,8 @@ def test_schema_setter(self): dataset = DatasetReference(self.PROJECT, self.DS_ID) table_ref = dataset.table(self.TABLE_NAME) table = self._make_one(table_ref) - full_name = SchemaField('full_name', 'STRING', mode='REQUIRED') - age = SchemaField('age', 'INTEGER', mode='REQUIRED') + full_name = SchemaField("full_name", "STRING", mode="REQUIRED") + age = SchemaField("age", "INTEGER", mode="REQUIRED") table.schema = [full_name, age] self.assertEqual(table.schema, [full_name, age]) @@ -567,30 +568,32 @@ def test_props_set_by_server(self): CREATED = datetime.datetime(2015, 7, 29, 12, 13, 22, tzinfo=UTC) MODIFIED = datetime.datetime(2015, 7, 29, 14, 47, 15, tzinfo=UTC) - TABLE_FULL_ID = '%s:%s.%s' % ( - self.PROJECT, self.DS_ID, self.TABLE_NAME) - URL = 'http://example.com/projects/%s/datasets/%s/tables/%s' % ( - self.PROJECT, self.DS_ID, self.TABLE_NAME) + TABLE_FULL_ID = "%s:%s.%s" % (self.PROJECT, self.DS_ID, self.TABLE_NAME) + URL = "http://example.com/projects/%s/datasets/%s/tables/%s" % ( + self.PROJECT, + self.DS_ID, + self.TABLE_NAME, + ) dataset = DatasetReference(self.PROJECT, self.DS_ID) table_ref = dataset.table(self.TABLE_NAME) table = self._make_one(table_ref) - table._properties['creationTime'] = _millis(CREATED) - table._properties['etag'] = 'ETAG' - table._properties['lastModifiedTime'] = _millis(MODIFIED) - table._properties['numBytes'] = 12345 - table._properties['numRows'] = 66 - table._properties['selfLink'] = URL - table._properties['id'] = TABLE_FULL_ID - table._properties['type'] = 'TABLE' + table._properties["creationTime"] = _millis(CREATED) + table._properties["etag"] = "ETAG" + table._properties["lastModifiedTime"] = _millis(MODIFIED) + table._properties["numBytes"] = 12345 + table._properties["numRows"] = 66 + table._properties["selfLink"] = URL + table._properties["id"] = TABLE_FULL_ID + table._properties["type"] = "TABLE" self.assertEqual(table.created, CREATED) - self.assertEqual(table.etag, 'ETAG') + self.assertEqual(table.etag, "ETAG") self.assertEqual(table.modified, MODIFIED) self.assertEqual(table.num_bytes, 12345) self.assertEqual(table.num_rows, 66) self.assertEqual(table.self_link, URL) self.assertEqual(table.full_table_id, TABLE_FULL_ID) - self.assertEqual(table.table_type, 'TABLE') + self.assertEqual(table.table_type, "TABLE") def test_description_setter_bad_value(self): dataset = DatasetReference(self.PROJECT, self.DS_ID) @@ -603,8 +606,8 @@ def test_description_setter(self): dataset = DatasetReference(self.PROJECT, self.DS_ID) table_ref = dataset.table(self.TABLE_NAME) table = self._make_one(table_ref) - table.description = 'DESCRIPTION' - self.assertEqual(table.description, 'DESCRIPTION') + table.description = "DESCRIPTION" + self.assertEqual(table.description, "DESCRIPTION") def test_expires_setter_bad_value(self): dataset = DatasetReference(self.PROJECT, self.DS_ID) @@ -635,8 +638,8 @@ def test_friendly_name_setter(self): dataset = DatasetReference(self.PROJECT, self.DS_ID) table_ref = dataset.table(self.TABLE_NAME) table = self._make_one(table_ref) - table.friendly_name = 'FRIENDLY' - self.assertEqual(table.friendly_name, 'FRIENDLY') + table.friendly_name = "FRIENDLY" + self.assertEqual(table.friendly_name, "FRIENDLY") def test_view_query_setter_bad_value(self): dataset = DatasetReference(self.PROJECT, self.DS_ID) @@ -649,8 +652,8 @@ def test_view_query_setter(self): dataset = DatasetReference(self.PROJECT, self.DS_ID) table_ref = dataset.table(self.TABLE_NAME) table = self._make_one(table_ref) - table.view_query = 'select * from foo' - self.assertEqual(table.view_query, 'select * from foo') + table.view_query = "select * from foo" + self.assertEqual(table.view_query, "select * from foo") self.assertEqual(table.view_use_legacy_sql, False) table.view_use_legacy_sql = True @@ -660,7 +663,7 @@ def test_view_query_deleter(self): dataset = DatasetReference(self.PROJECT, self.DS_ID) table_ref = dataset.table(self.TABLE_NAME) table = self._make_one(table_ref) - table.view_query = 'select * from foo' + table.view_query = "select * from foo" del table.view_query self.assertIsNone(table.view_query) self.assertIsNone(table.view_use_legacy_sql) @@ -677,14 +680,14 @@ def test_view_use_legacy_sql_setter(self): table_ref = dataset.table(self.TABLE_NAME) table = self._make_one(table_ref) table.view_use_legacy_sql = True - table.view_query = 'select * from foo' + table.view_query = "select * from foo" self.assertEqual(table.view_use_legacy_sql, True) - self.assertEqual(table.view_query, 'select * from foo') + self.assertEqual(table.view_query, "select * from foo") def test_external_data_configuration_setter(self): from google.cloud.bigquery.external_config import ExternalConfig - external_config = ExternalConfig('CSV') + external_config = ExternalConfig("CSV") dataset = DatasetReference(self.PROJECT, self.DS_ID) table_ref = dataset.table(self.TABLE_NAME) table = self._make_one(table_ref) @@ -693,7 +696,8 @@ def test_external_data_configuration_setter(self): self.assertEqual( table.external_data_configuration.source_format, - external_config.source_format) + external_config.source_format, + ) def test_external_data_configuration_setter_none(self): dataset = DatasetReference(self.PROJECT, self.DS_ID) @@ -715,10 +719,10 @@ def test_labels_update_in_place(self): dataset = DatasetReference(self.PROJECT, self.DS_ID) table_ref = dataset.table(self.TABLE_NAME) table = self._make_one(table_ref) - del table._properties['labels'] # don't start w/ existing dict + del table._properties["labels"] # don't start w/ existing dict labels = table.labels - labels['foo'] = 'bar' # update in place - self.assertEqual(table.labels, {'foo': 'bar'}) + labels["foo"] = "bar" # update in place + self.assertEqual(table.labels, {"foo": "bar"}) def test_labels_setter_bad_value(self): dataset = DatasetReference(self.PROJECT, self.DS_ID) @@ -729,20 +733,20 @@ def test_labels_setter_bad_value(self): def test_from_string(self): cls = self._get_target_class() - got = cls.from_string('string-project.string_dataset.string_table') - self.assertEqual(got.project, 'string-project') - self.assertEqual(got.dataset_id, 'string_dataset') - self.assertEqual(got.table_id, 'string_table') + got = cls.from_string("string-project.string_dataset.string_table") + self.assertEqual(got.project, "string-project") + self.assertEqual(got.dataset_id, "string_dataset") + self.assertEqual(got.table_id, "string_table") def test_from_string_legacy_string(self): cls = self._get_target_class() with self.assertRaises(ValueError): - cls.from_string('string-project:string_dataset.string_table') + cls.from_string("string-project:string_dataset.string_table") def test_from_string_not_fully_qualified(self): cls = self._get_target_class() with self.assertRaises(ValueError): - cls.from_string('string_dataset.string_table') + cls.from_string("string_dataset.string_table") def test_from_api_repr_missing_identity(self): self._setUpConstants() @@ -754,13 +758,13 @@ def test_from_api_repr_missing_identity(self): def test_from_api_repr_bare(self): self._setUpConstants() RESOURCE = { - 'id': '%s:%s.%s' % (self.PROJECT, self.DS_ID, self.TABLE_NAME), - 'tableReference': { - 'projectId': self.PROJECT, - 'datasetId': self.DS_ID, - 'tableId': self.TABLE_NAME, + "id": "%s:%s.%s" % (self.PROJECT, self.DS_ID, self.TABLE_NAME), + "tableReference": { + "projectId": self.PROJECT, + "datasetId": self.DS_ID, + "tableId": self.TABLE_NAME, }, - 'type': 'TABLE', + "type": "TABLE", } klass = self._get_target_class() table = klass.from_api_repr(RESOURCE) @@ -773,11 +777,11 @@ def test_from_api_repr_w_properties(self): from google.cloud._helpers import _millis RESOURCE = self._make_resource() - RESOURCE['view'] = {'query': 'select fullname, age from person_ages'} - RESOURCE['type'] = 'VIEW' - RESOURCE['location'] = 'EU' + RESOURCE["view"] = {"query": "select fullname, age from person_ages"} + RESOURCE["type"] = "VIEW" + RESOURCE["location"] = "EU" self.EXP_TIME = datetime.datetime(2015, 8, 1, 23, 59, 59, tzinfo=UTC) - RESOURCE['expirationTime'] = _millis(self.EXP_TIME) + RESOURCE["expirationTime"] = _millis(self.EXP_TIME) klass = self._get_target_class() table = klass.from_api_repr(RESOURCE) self._verifyResourceProperties(table, RESOURCE) @@ -785,16 +789,14 @@ def test_from_api_repr_w_properties(self): def test_from_api_with_encryption(self): self._setUpConstants() RESOURCE = { - 'id': '%s:%s.%s' % (self.PROJECT, self.DS_ID, self.TABLE_NAME), - 'tableReference': { - 'projectId': self.PROJECT, - 'datasetId': self.DS_ID, - 'tableId': self.TABLE_NAME, + "id": "%s:%s.%s" % (self.PROJECT, self.DS_ID, self.TABLE_NAME), + "tableReference": { + "projectId": self.PROJECT, + "datasetId": self.DS_ID, + "tableId": self.TABLE_NAME, }, - 'encryptionConfiguration': { - 'kmsKeyName': self.KMS_KEY_NAME - }, - 'type': 'TABLE', + "encryptionConfiguration": {"kmsKeyName": self.KMS_KEY_NAME}, + "type": "TABLE", } klass = self._get_target_class() table = klass.from_api_repr(RESOURCE) @@ -804,13 +806,13 @@ def test_to_api_repr_w_custom_field(self): dataset = DatasetReference(self.PROJECT, self.DS_ID) table_ref = dataset.table(self.TABLE_NAME) table = self._make_one(table_ref) - table._properties['newAlphaProperty'] = 'unreleased property' + table._properties["newAlphaProperty"] = "unreleased property" resource = table.to_api_repr() exp_resource = { - 'tableReference': table_ref.to_api_repr(), - 'labels': {}, - 'newAlphaProperty': 'unreleased property' + "tableReference": table_ref.to_api_repr(), + "labels": {}, + "newAlphaProperty": "unreleased property", } self.assertEqual(resource, exp_resource) @@ -818,20 +820,18 @@ def test__build_resource_w_custom_field(self): dataset = DatasetReference(self.PROJECT, self.DS_ID) table_ref = dataset.table(self.TABLE_NAME) table = self._make_one(table_ref) - table._properties['newAlphaProperty'] = 'unreleased property' - resource = table._build_resource(['newAlphaProperty']) + table._properties["newAlphaProperty"] = "unreleased property" + resource = table._build_resource(["newAlphaProperty"]) - exp_resource = { - 'newAlphaProperty': 'unreleased property' - } + exp_resource = {"newAlphaProperty": "unreleased property"} self.assertEqual(resource, exp_resource) def test__build_resource_w_custom_field_not_in__properties(self): dataset = DatasetReference(self.PROJECT, self.DS_ID) table = self._make_one(dataset.table(self.TABLE_NAME)) - table.bad = 'value' + table.bad = "value" with self.assertRaises(ValueError): - table._build_resource(['bad']) + table._build_resource(["bad"]) def test_time_partitioning_setter(self): from google.cloud.bigquery.table import TimePartitioning @@ -844,19 +844,18 @@ def test_time_partitioning_setter(self): table.time_partitioning = time_partitioning - self.assertEqual( - table.time_partitioning.type_, TimePartitioningType.DAY) + self.assertEqual(table.time_partitioning.type_, TimePartitioningType.DAY) # Both objects point to the same properties dict self.assertIs( - table._properties['timePartitioning'], - time_partitioning._properties) + table._properties["timePartitioning"], time_partitioning._properties + ) time_partitioning.expiration_ms = 10000 # Changes to TimePartitioning object are reflected in Table properties self.assertEqual( - table.time_partitioning.expiration_ms, - time_partitioning.expiration_ms) + table.time_partitioning.expiration_ms, time_partitioning.expiration_ms + ) def test_time_partitioning_setter_bad_type(self): dataset = DatasetReference(self.PROJECT, self.DS_ID) @@ -864,7 +863,7 @@ def test_time_partitioning_setter_bad_type(self): table = self._make_one(table_ref) with self.assertRaises(ValueError): - table.time_partitioning = {'timePartitioning': {'type': 'DAY'}} + table.time_partitioning = {"timePartitioning": {"type": "DAY"}} def test_time_partitioning_setter_none(self): dataset = DatasetReference(self.PROJECT, self.DS_ID) @@ -888,7 +887,7 @@ def test_partitioning_type_setter(self): table.partitioning_type = TimePartitioningType.DAY - self.assertEqual(table.partitioning_type, 'DAY') + self.assertEqual(table.partitioning_type, "DAY") self.assertEqual(len(warned), 3) for warning in warned: @@ -904,9 +903,9 @@ def test_partitioning_type_setter_w_time_partitioning_set(self): table.time_partitioning = TimePartitioning() with warnings.catch_warnings(record=True) as warned: - table.partitioning_type = 'NEW_FAKE_TYPE' + table.partitioning_type = "NEW_FAKE_TYPE" - self.assertEqual(table.partitioning_type, 'NEW_FAKE_TYPE') + self.assertEqual(table.partitioning_type, "NEW_FAKE_TYPE") self.assertEqual(len(warned), 2) for warning in warned: @@ -944,7 +943,7 @@ def test_partition_expiration_setter(self): self.assertEqual(table.partition_expiration, 100) # defaults to 'DAY' when expiration is set and type is not set - self.assertEqual(table.partitioning_type, 'DAY') + self.assertEqual(table.partitioning_type, "DAY") self.assertEqual(len(warned), 4) for warning in warned: @@ -954,22 +953,22 @@ def test_clustering_fields_setter_w_fields(self): dataset = DatasetReference(self.PROJECT, self.DS_ID) table_ref = dataset.table(self.TABLE_NAME) table = self._make_one(table_ref) - fields = ['email', 'phone'] + fields = ["email", "phone"] table.clustering_fields = fields self.assertEqual(table.clustering_fields, fields) - self.assertEqual(table._properties['clustering'], {'fields': fields}) + self.assertEqual(table._properties["clustering"], {"fields": fields}) def test_clustering_fields_setter_w_none(self): dataset = DatasetReference(self.PROJECT, self.DS_ID) table_ref = dataset.table(self.TABLE_NAME) table = self._make_one(table_ref) - fields = ['email', 'phone'] + fields = ["email", "phone"] - table._properties['clustering'] = {'fields': fields} + table._properties["clustering"] = {"fields": fields} table.clustering_fields = None self.assertEqual(table.clustering_fields, None) - self.assertFalse('clustering' in table._properties) + self.assertFalse("clustering" in table._properties) def test_clustering_fields_setter_w_none_noop(self): dataset = DatasetReference(self.PROJECT, self.DS_ID) @@ -978,25 +977,27 @@ def test_clustering_fields_setter_w_none_noop(self): table.clustering_fields = None self.assertEqual(table.clustering_fields, None) - self.assertFalse('clustering' in table._properties) + self.assertFalse("clustering" in table._properties) def test_encryption_configuration_setter(self): from google.cloud.bigquery.table import EncryptionConfiguration + dataset = DatasetReference(self.PROJECT, self.DS_ID) table_ref = dataset.table(self.TABLE_NAME) table = self._make_one(table_ref) encryption_configuration = EncryptionConfiguration( - kms_key_name=self.KMS_KEY_NAME) + kms_key_name=self.KMS_KEY_NAME + ) table.encryption_configuration = encryption_configuration - self.assertEqual(table.encryption_configuration.kms_key_name, - self.KMS_KEY_NAME) + self.assertEqual(table.encryption_configuration.kms_key_name, self.KMS_KEY_NAME) table.encryption_configuration = None self.assertIsNone(table.encryption_configuration) def test___repr__(self): from google.cloud.bigquery.table import TableReference - dataset = DatasetReference('project1', 'dataset1') - table1 = self._make_one(TableReference(dataset, 'table1')) + + dataset = DatasetReference("project1", "dataset1") + table1 = self._make_one(TableReference(dataset, "table1")) expected = ( "Table(TableReference(" "DatasetReference('project1', 'dataset1'), " @@ -1007,9 +1008,9 @@ def test___repr__(self): class Test_row_from_mapping(unittest.TestCase, _SchemaBase): - PROJECT = 'prahj-ekt' - DS_ID = 'dataset-name' - TABLE_NAME = 'table-name' + PROJECT = "prahj-ekt" + DS_ID = "dataset-name" + TABLE_NAME = "table-name" def _call_fut(self, mapping, schema): from google.cloud.bigquery.table import _row_from_mapping @@ -1018,7 +1019,8 @@ def _call_fut(self, mapping, schema): def test__row_from_mapping_wo_schema(self): from google.cloud.bigquery.table import Table, _TABLE_HAS_NO_SCHEMA - MAPPING = {'full_name': 'Phred Phlyntstone', 'age': 32} + + MAPPING = {"full_name": "Phred Phlyntstone", "age": 32} dataset = DatasetReference(self.PROJECT, self.DS_ID) table_ref = dataset.table(self.TABLE_NAME) table = Table(table_ref) @@ -1030,48 +1032,50 @@ def test__row_from_mapping_wo_schema(self): def test__row_from_mapping_w_invalid_schema(self): from google.cloud.bigquery.table import Table, SchemaField + MAPPING = { - 'full_name': 'Phred Phlyntstone', - 'age': 32, - 'colors': ['red', 'green'], - 'bogus': 'WHATEVER', + "full_name": "Phred Phlyntstone", + "age": 32, + "colors": ["red", "green"], + "bogus": "WHATEVER", } dataset = DatasetReference(self.PROJECT, self.DS_ID) table_ref = dataset.table(self.TABLE_NAME) - full_name = SchemaField('full_name', 'STRING', mode='REQUIRED') - age = SchemaField('age', 'INTEGER', mode='REQUIRED') - colors = SchemaField('colors', 'DATETIME', mode='REPEATED') - bogus = SchemaField('joined', 'STRING', mode='BOGUS') + full_name = SchemaField("full_name", "STRING", mode="REQUIRED") + age = SchemaField("age", "INTEGER", mode="REQUIRED") + colors = SchemaField("colors", "DATETIME", mode="REPEATED") + bogus = SchemaField("joined", "STRING", mode="BOGUS") table = Table(table_ref, schema=[full_name, age, colors, bogus]) with self.assertRaises(ValueError) as exc: self._call_fut(MAPPING, table.schema) - self.assertIn('Unknown field mode: BOGUS', str(exc.exception)) + self.assertIn("Unknown field mode: BOGUS", str(exc.exception)) def test__row_from_mapping_w_schema(self): from google.cloud.bigquery.table import Table, SchemaField + MAPPING = { - 'full_name': 'Phred Phlyntstone', - 'age': 32, - 'colors': ['red', 'green'], - 'extra': 'IGNORED', + "full_name": "Phred Phlyntstone", + "age": 32, + "colors": ["red", "green"], + "extra": "IGNORED", } dataset = DatasetReference(self.PROJECT, self.DS_ID) table_ref = dataset.table(self.TABLE_NAME) - full_name = SchemaField('full_name', 'STRING', mode='REQUIRED') - age = SchemaField('age', 'INTEGER', mode='REQUIRED') - colors = SchemaField('colors', 'DATETIME', mode='REPEATED') - joined = SchemaField('joined', 'STRING', mode='NULLABLE') + full_name = SchemaField("full_name", "STRING", mode="REQUIRED") + age = SchemaField("age", "INTEGER", mode="REQUIRED") + colors = SchemaField("colors", "DATETIME", mode="REPEATED") + joined = SchemaField("joined", "STRING", mode="NULLABLE") table = Table(table_ref, schema=[full_name, age, colors, joined]) self.assertEqual( self._call_fut(MAPPING, table.schema), - ('Phred Phlyntstone', 32, ['red', 'green'], None)) + ("Phred Phlyntstone", 32, ["red", "green"], None), + ) class TestTableListItem(unittest.TestCase): - @staticmethod def _get_target_class(): from google.cloud.bigquery.table import TableListItem @@ -1084,27 +1088,25 @@ def _make_one(self, *args, **kw): def test_ctor(self): import warnings - project = 'test-project' - dataset_id = 'test_dataset' - table_id = 'coffee_table' + project = "test-project" + dataset_id = "test_dataset" + table_id = "coffee_table" resource = { - 'kind': 'bigquery#table', - 'id': '{}:{}.{}'.format(project, dataset_id, table_id), - 'tableReference': { - 'projectId': project, - 'datasetId': dataset_id, - 'tableId': table_id, - }, - 'friendlyName': 'Mahogany Coffee Table', - 'type': 'TABLE', - 'timePartitioning': { - 'type': 'DAY', - 'field': 'mycolumn', - 'expirationMs': '10000', + "kind": "bigquery#table", + "id": "{}:{}.{}".format(project, dataset_id, table_id), + "tableReference": { + "projectId": project, + "datasetId": dataset_id, + "tableId": table_id, }, - 'labels': { - 'some-stuff': 'this-is-a-label', + "friendlyName": "Mahogany Coffee Table", + "type": "TABLE", + "timePartitioning": { + "type": "DAY", + "field": "mycolumn", + "expirationMs": "10000", }, + "labels": {"some-stuff": "this-is-a-label"}, } table = self._make_one(resource) @@ -1112,21 +1114,21 @@ def test_ctor(self): self.assertEqual(table.dataset_id, dataset_id) self.assertEqual(table.table_id, table_id) self.assertEqual( - table.full_table_id, - '{}:{}.{}'.format(project, dataset_id, table_id)) + table.full_table_id, "{}:{}.{}".format(project, dataset_id, table_id) + ) self.assertEqual(table.reference.project, project) self.assertEqual(table.reference.dataset_id, dataset_id) self.assertEqual(table.reference.table_id, table_id) - self.assertEqual(table.friendly_name, 'Mahogany Coffee Table') - self.assertEqual(table.table_type, 'TABLE') - self.assertEqual(table.time_partitioning.type_, 'DAY') + self.assertEqual(table.friendly_name, "Mahogany Coffee Table") + self.assertEqual(table.table_type, "TABLE") + self.assertEqual(table.time_partitioning.type_, "DAY") self.assertEqual(table.time_partitioning.expiration_ms, 10000) - self.assertEqual(table.time_partitioning.field, 'mycolumn') - self.assertEqual(table.labels['some-stuff'], 'this-is-a-label') + self.assertEqual(table.time_partitioning.field, "mycolumn") + self.assertEqual(table.labels["some-stuff"], "this-is-a-label") self.assertIsNone(table.view_use_legacy_sql) with warnings.catch_warnings(record=True) as warned: - self.assertEqual(table.partitioning_type, 'DAY') + self.assertEqual(table.partitioning_type, "DAY") self.assertEqual(table.partition_expiration, 10000) self.assertEqual(len(warned), 2) @@ -1134,18 +1136,18 @@ def test_ctor(self): self.assertIs(warning.category, PendingDeprecationWarning) def test_ctor_view(self): - project = 'test-project' - dataset_id = 'test_dataset' - table_id = 'just_looking' + project = "test-project" + dataset_id = "test_dataset" + table_id = "just_looking" resource = { - 'kind': 'bigquery#table', - 'id': '{}:{}.{}'.format(project, dataset_id, table_id), - 'tableReference': { - 'projectId': project, - 'datasetId': dataset_id, - 'tableId': table_id, + "kind": "bigquery#table", + "id": "{}:{}.{}".format(project, dataset_id, table_id), + "tableReference": { + "projectId": project, + "datasetId": dataset_id, + "tableId": table_id, }, - 'type': 'VIEW', + "type": "VIEW", } table = self._make_one(resource) @@ -1153,12 +1155,12 @@ def test_ctor_view(self): self.assertEqual(table.dataset_id, dataset_id) self.assertEqual(table.table_id, table_id) self.assertEqual( - table.full_table_id, - '{}:{}.{}'.format(project, dataset_id, table_id)) + table.full_table_id, "{}:{}.{}".format(project, dataset_id, table_id) + ) self.assertEqual(table.reference.project, project) self.assertEqual(table.reference.dataset_id, dataset_id) self.assertEqual(table.reference.table_id, table_id) - self.assertEqual(table.table_type, 'VIEW') + self.assertEqual(table.table_type, "VIEW") # Server default for useLegacySql is True. self.assertTrue(table.view_use_legacy_sql) @@ -1166,16 +1168,16 @@ def test_ctor_missing_properties(self): import warnings resource = { - 'tableReference': { - 'projectId': 'testproject', - 'datasetId': 'testdataset', - 'tableId': 'testtable', - }, + "tableReference": { + "projectId": "testproject", + "datasetId": "testdataset", + "tableId": "testtable", + } } table = self._make_one(resource) - self.assertEqual(table.project, 'testproject') - self.assertEqual(table.dataset_id, 'testdataset') - self.assertEqual(table.table_id, 'testtable') + self.assertEqual(table.project, "testproject") + self.assertEqual(table.dataset_id, "testdataset") + self.assertEqual(table.table_id, "testtable") self.assertIsNone(table.full_table_id) self.assertIsNone(table.friendly_name) self.assertIsNone(table.table_type) @@ -1193,30 +1195,21 @@ def test_ctor_missing_properties(self): def test_ctor_wo_project(self): resource = { - 'tableReference': { - 'datasetId': 'testdataset', - 'tableId': 'testtable', - }, + "tableReference": {"datasetId": "testdataset", "tableId": "testtable"} } with self.assertRaises(ValueError): self._make_one(resource) def test_ctor_wo_dataset(self): resource = { - 'tableReference': { - 'projectId': 'testproject', - 'tableId': 'testtable', - }, + "tableReference": {"projectId": "testproject", "tableId": "testtable"} } with self.assertRaises(ValueError): self._make_one(resource) def test_ctor_wo_table(self): resource = { - 'tableReference': { - 'projectId': 'testproject', - 'datasetId': 'testdataset', - }, + "tableReference": {"projectId": "testproject", "datasetId": "testdataset"} } with self.assertRaises(ValueError): self._make_one(resource) @@ -1227,59 +1220,57 @@ def test_ctor_wo_reference(self): def test_labels_update_in_place(self): resource = { - 'tableReference': { - 'projectId': 'testproject', - 'datasetId': 'testdataset', - 'tableId': 'testtable', - }, + "tableReference": { + "projectId": "testproject", + "datasetId": "testdataset", + "tableId": "testtable", + } } table = self._make_one(resource) labels = table.labels - labels['foo'] = 'bar' # update in place - self.assertEqual(table.labels, {'foo': 'bar'}) + labels["foo"] = "bar" # update in place + self.assertEqual(table.labels, {"foo": "bar"}) class TestRow(unittest.TestCase): - def test_row(self): from google.cloud.bigquery.table import Row VALUES = (1, 2, 3) - row = Row(VALUES, {'a': 0, 'b': 1, 'c': 2}) + row = Row(VALUES, {"a": 0, "b": 1, "c": 2}) self.assertEqual(row.a, 1) self.assertEqual(row[1], 2) - self.assertEqual(row['c'], 3) + self.assertEqual(row["c"], 3) self.assertEqual(len(row), 3) self.assertEqual(row.values(), VALUES) - self.assertEqual(set(row.keys()), set({'a': 1, 'b': 2, 'c': 3}.keys())) - self.assertEqual(set(row.items()), - set({'a': 1, 'b': 2, 'c': 3}.items())) - self.assertEqual(row.get('a'), 1) - self.assertEqual(row.get('d'), None) - self.assertEqual(row.get('d', ''), '') - self.assertEqual(row.get('d', default=''), '') - self.assertEqual(repr(row), - "Row((1, 2, 3), {'a': 0, 'b': 1, 'c': 2})") + self.assertEqual(set(row.keys()), set({"a": 1, "b": 2, "c": 3}.keys())) + self.assertEqual(set(row.items()), set({"a": 1, "b": 2, "c": 3}.items())) + self.assertEqual(row.get("a"), 1) + self.assertEqual(row.get("d"), None) + self.assertEqual(row.get("d", ""), "") + self.assertEqual(row.get("d", default=""), "") + self.assertEqual(repr(row), "Row((1, 2, 3), {'a': 0, 'b': 1, 'c': 2})") self.assertFalse(row != row) self.assertFalse(row == 3) with self.assertRaises(AttributeError): row.z with self.assertRaises(KeyError): - row['z'] + row["z"] class Test_EmptyRowIterator(unittest.TestCase): - - @mock.patch('google.cloud.bigquery.table.pandas', new=None) + @mock.patch("google.cloud.bigquery.table.pandas", new=None) def test_to_dataframe_error_if_pandas_is_none(self): from google.cloud.bigquery.table import _EmptyRowIterator + row_iterator = _EmptyRowIterator() with self.assertRaises(ValueError): row_iterator.to_dataframe() - @unittest.skipIf(pandas is None, 'Requires `pandas`') + @unittest.skipIf(pandas is None, "Requires `pandas`") def test_to_dataframe(self): from google.cloud.bigquery.table import _EmptyRowIterator + row_iterator = _EmptyRowIterator() df = row_iterator.to_dataframe() self.assertIsInstance(df, pandas.DataFrame) @@ -1287,7 +1278,6 @@ def test_to_dataframe(self): class TestRowIterator(unittest.TestCase): - def test_constructor(self): from google.cloud.bigquery.table import RowIterator from google.cloud.bigquery.table import _item_to_row @@ -1295,7 +1285,7 @@ def test_constructor(self): client = mock.sentinel.client api_request = mock.sentinel.api_request - path = '/foo' + path = "/foo" schema = [] iterator = RowIterator(client, api_request, path, schema) @@ -1303,7 +1293,7 @@ def test_constructor(self): self.assertIs(iterator.client, client) self.assertEqual(iterator.path, path) self.assertIs(iterator.item_to_value, _item_to_row) - self.assertEqual(iterator._items_key, 'rows') + self.assertEqual(iterator._items_key, "rows") self.assertIsNone(iterator.max_results) self.assertEqual(iterator.extra_params, {}) self.assertIs(iterator._page_start, _rows_page_start) @@ -1317,131 +1307,129 @@ def test_iterate(self): from google.cloud.bigquery.table import SchemaField schema = [ - SchemaField('name', 'STRING', mode='REQUIRED'), - SchemaField('age', 'INTEGER', mode='REQUIRED') + SchemaField("name", "STRING", mode="REQUIRED"), + SchemaField("age", "INTEGER", mode="REQUIRED"), ] rows = [ - {'f': [{'v': 'Phred Phlyntstone'}, {'v': '32'}]}, - {'f': [{'v': 'Bharney Rhubble'}, {'v': '33'}]}, + {"f": [{"v": "Phred Phlyntstone"}, {"v": "32"}]}, + {"f": [{"v": "Bharney Rhubble"}, {"v": "33"}]}, ] - path = '/foo' - api_request = mock.Mock(return_value={'rows': rows}) - row_iterator = RowIterator( - mock.sentinel.client, api_request, path, schema) + path = "/foo" + api_request = mock.Mock(return_value={"rows": rows}) + row_iterator = RowIterator(mock.sentinel.client, api_request, path, schema) self.assertEqual(row_iterator.num_results, 0) rows_iter = iter(row_iterator) val1 = six.next(rows_iter) - self.assertEqual(val1.name, 'Phred Phlyntstone') + self.assertEqual(val1.name, "Phred Phlyntstone") self.assertEqual(row_iterator.num_results, 1) val2 = six.next(rows_iter) - self.assertEqual(val2.name, 'Bharney Rhubble') + self.assertEqual(val2.name, "Bharney Rhubble") self.assertEqual(row_iterator.num_results, 2) with self.assertRaises(StopIteration): six.next(rows_iter) - api_request.assert_called_once_with( - method='GET', path=path, query_params={}) + api_request.assert_called_once_with(method="GET", path=path, query_params={}) def test_page_size(self): from google.cloud.bigquery.table import RowIterator from google.cloud.bigquery.table import SchemaField schema = [ - SchemaField('name', 'STRING', mode='REQUIRED'), - SchemaField('age', 'INTEGER', mode='REQUIRED') + SchemaField("name", "STRING", mode="REQUIRED"), + SchemaField("age", "INTEGER", mode="REQUIRED"), ] rows = [ - {'f': [{'v': 'Phred Phlyntstone'}, {'v': '32'}]}, - {'f': [{'v': 'Bharney Rhubble'}, {'v': '33'}]}, + {"f": [{"v": "Phred Phlyntstone"}, {"v": "32"}]}, + {"f": [{"v": "Bharney Rhubble"}, {"v": "33"}]}, ] - path = '/foo' - api_request = mock.Mock(return_value={'rows': rows}) + path = "/foo" + api_request = mock.Mock(return_value={"rows": rows}) row_iterator = RowIterator( - mock.sentinel.client, api_request, path, schema, page_size=4) + mock.sentinel.client, api_request, path, schema, page_size=4 + ) row_iterator._get_next_page_response() api_request.assert_called_once_with( - method='GET', path=path, query_params={ - 'maxResults': row_iterator._page_size}) + method="GET", + path=path, + query_params={"maxResults": row_iterator._page_size}, + ) - @unittest.skipIf(pandas is None, 'Requires `pandas`') + @unittest.skipIf(pandas is None, "Requires `pandas`") def test_to_dataframe(self): from google.cloud.bigquery.table import RowIterator from google.cloud.bigquery.table import SchemaField schema = [ - SchemaField('name', 'STRING', mode='REQUIRED'), - SchemaField('age', 'INTEGER', mode='REQUIRED') + SchemaField("name", "STRING", mode="REQUIRED"), + SchemaField("age", "INTEGER", mode="REQUIRED"), ] rows = [ - {'f': [{'v': 'Phred Phlyntstone'}, {'v': '32'}]}, - {'f': [{'v': 'Bharney Rhubble'}, {'v': '33'}]}, - {'f': [{'v': 'Wylma Phlyntstone'}, {'v': '29'}]}, - {'f': [{'v': 'Bhettye Rhubble'}, {'v': '27'}]}, + {"f": [{"v": "Phred Phlyntstone"}, {"v": "32"}]}, + {"f": [{"v": "Bharney Rhubble"}, {"v": "33"}]}, + {"f": [{"v": "Wylma Phlyntstone"}, {"v": "29"}]}, + {"f": [{"v": "Bhettye Rhubble"}, {"v": "27"}]}, ] - path = '/foo' - api_request = mock.Mock(return_value={'rows': rows}) - row_iterator = RowIterator( - mock.sentinel.client, api_request, path, schema) + path = "/foo" + api_request = mock.Mock(return_value={"rows": rows}) + row_iterator = RowIterator(mock.sentinel.client, api_request, path, schema) df = row_iterator.to_dataframe() self.assertIsInstance(df, pandas.DataFrame) self.assertEqual(len(df), 4) # verify the number of rows - self.assertEqual(list(df), ['name', 'age']) # verify the column names - self.assertEqual(df.name.dtype.name, 'object') - self.assertEqual(df.age.dtype.name, 'int64') + self.assertEqual(list(df), ["name", "age"]) # verify the column names + self.assertEqual(df.name.dtype.name, "object") + self.assertEqual(df.age.dtype.name, "int64") - @unittest.skipIf(pandas is None, 'Requires `pandas`') + @unittest.skipIf(pandas is None, "Requires `pandas`") def test_to_dataframe_w_empty_results(self): from google.cloud.bigquery.table import RowIterator from google.cloud.bigquery.table import SchemaField schema = [ - SchemaField('name', 'STRING', mode='REQUIRED'), - SchemaField('age', 'INTEGER', mode='REQUIRED') + SchemaField("name", "STRING", mode="REQUIRED"), + SchemaField("age", "INTEGER", mode="REQUIRED"), ] - path = '/foo' - api_request = mock.Mock(return_value={'rows': []}) - row_iterator = RowIterator( - mock.sentinel.client, api_request, path, schema) + path = "/foo" + api_request = mock.Mock(return_value={"rows": []}) + row_iterator = RowIterator(mock.sentinel.client, api_request, path, schema) df = row_iterator.to_dataframe() self.assertIsInstance(df, pandas.DataFrame) self.assertEqual(len(df), 0) # verify the number of rows - self.assertEqual(list(df), ['name', 'age']) # verify the column names + self.assertEqual(list(df), ["name", "age"]) # verify the column names - @unittest.skipIf(pandas is None, 'Requires `pandas`') + @unittest.skipIf(pandas is None, "Requires `pandas`") def test_to_dataframe_w_various_types_nullable(self): import datetime from google.cloud.bigquery.table import RowIterator from google.cloud.bigquery.table import SchemaField schema = [ - SchemaField('start_timestamp', 'TIMESTAMP'), - SchemaField('seconds', 'INT64'), - SchemaField('miles', 'FLOAT64'), - SchemaField('payment_type', 'STRING'), - SchemaField('complete', 'BOOL'), - SchemaField('date', 'DATE'), + SchemaField("start_timestamp", "TIMESTAMP"), + SchemaField("seconds", "INT64"), + SchemaField("miles", "FLOAT64"), + SchemaField("payment_type", "STRING"), + SchemaField("complete", "BOOL"), + SchemaField("date", "DATE"), ] row_data = [ [None, None, None, None, None, None], - ['1.4338368E9', '420', '1.1', 'Cash', 'true', '1999-12-01'], - ['1.3878117E9', '2580', '17.7', 'Cash', 'false', '1953-06-14'], - ['1.3855653E9', '2280', '4.4', 'Credit', 'true', '1981-11-04'], + ["1.4338368E9", "420", "1.1", "Cash", "true", "1999-12-01"], + ["1.3878117E9", "2580", "17.7", "Cash", "false", "1953-06-14"], + ["1.3855653E9", "2280", "4.4", "Credit", "true", "1981-11-04"], ] - rows = [{'f': [{'v': field} for field in row]} for row in row_data] - path = '/foo' - api_request = mock.Mock(return_value={'rows': rows}) - row_iterator = RowIterator( - mock.sentinel.client, api_request, path, schema) + rows = [{"f": [{"v": field} for field in row]} for row in row_data] + path = "/foo" + api_request = mock.Mock(return_value={"rows": rows}) + row_iterator = RowIterator(mock.sentinel.client, api_request, path, schema) df = row_iterator.to_dataframe() @@ -1460,29 +1448,28 @@ def test_to_dataframe_w_various_types_nullable(self): self.assertIsInstance(row.complete, bool) self.assertIsInstance(row.date, datetime.date) - @unittest.skipIf(pandas is None, 'Requires `pandas`') + @unittest.skipIf(pandas is None, "Requires `pandas`") def test_to_dataframe_column_dtypes(self): from google.cloud.bigquery.table import RowIterator from google.cloud.bigquery.table import SchemaField schema = [ - SchemaField('start_timestamp', 'TIMESTAMP'), - SchemaField('seconds', 'INT64'), - SchemaField('miles', 'FLOAT64'), - SchemaField('payment_type', 'STRING'), - SchemaField('complete', 'BOOL'), - SchemaField('date', 'DATE'), + SchemaField("start_timestamp", "TIMESTAMP"), + SchemaField("seconds", "INT64"), + SchemaField("miles", "FLOAT64"), + SchemaField("payment_type", "STRING"), + SchemaField("complete", "BOOL"), + SchemaField("date", "DATE"), ] row_data = [ - ['1.4338368E9', '420', '1.1', 'Cash', 'true', '1999-12-01'], - ['1.3878117E9', '2580', '17.7', 'Cash', 'false', '1953-06-14'], - ['1.3855653E9', '2280', '4.4', 'Credit', 'true', '1981-11-04'], + ["1.4338368E9", "420", "1.1", "Cash", "true", "1999-12-01"], + ["1.3878117E9", "2580", "17.7", "Cash", "false", "1953-06-14"], + ["1.3855653E9", "2280", "4.4", "Credit", "true", "1981-11-04"], ] - rows = [{'f': [{'v': field} for field in row]} for row in row_data] - path = '/foo' - api_request = mock.Mock(return_value={'rows': rows}) - row_iterator = RowIterator( - mock.sentinel.client, api_request, path, schema) + rows = [{"f": [{"v": field} for field in row]} for row in row_data] + path = "/foo" + api_request = mock.Mock(return_value={"rows": rows}) + row_iterator = RowIterator(mock.sentinel.client, api_request, path, schema) df = row_iterator.to_dataframe() @@ -1491,37 +1478,35 @@ def test_to_dataframe_column_dtypes(self): exp_columns = [field.name for field in schema] self.assertEqual(list(df), exp_columns) # verify the column names - self.assertEqual(df.start_timestamp.dtype.name, 'datetime64[ns, UTC]') - self.assertEqual(df.seconds.dtype.name, 'int64') - self.assertEqual(df.miles.dtype.name, 'float64') - self.assertEqual(df.payment_type.dtype.name, 'object') - self.assertEqual(df.complete.dtype.name, 'bool') - self.assertEqual(df.date.dtype.name, 'object') + self.assertEqual(df.start_timestamp.dtype.name, "datetime64[ns, UTC]") + self.assertEqual(df.seconds.dtype.name, "int64") + self.assertEqual(df.miles.dtype.name, "float64") + self.assertEqual(df.payment_type.dtype.name, "object") + self.assertEqual(df.complete.dtype.name, "bool") + self.assertEqual(df.date.dtype.name, "object") - @mock.patch('google.cloud.bigquery.table.pandas', new=None) + @mock.patch("google.cloud.bigquery.table.pandas", new=None) def test_to_dataframe_error_if_pandas_is_none(self): from google.cloud.bigquery.table import RowIterator from google.cloud.bigquery.table import SchemaField schema = [ - SchemaField('name', 'STRING', mode='REQUIRED'), - SchemaField('age', 'INTEGER', mode='REQUIRED') + SchemaField("name", "STRING", mode="REQUIRED"), + SchemaField("age", "INTEGER", mode="REQUIRED"), ] rows = [ - {'f': [{'v': 'Phred Phlyntstone'}, {'v': '32'}]}, - {'f': [{'v': 'Bharney Rhubble'}, {'v': '33'}]}, + {"f": [{"v": "Phred Phlyntstone"}, {"v": "32"}]}, + {"f": [{"v": "Bharney Rhubble"}, {"v": "33"}]}, ] - path = '/foo' - api_request = mock.Mock(return_value={'rows': rows}) - row_iterator = RowIterator( - mock.sentinel.client, api_request, path, schema) + path = "/foo" + api_request = mock.Mock(return_value={"rows": rows}) + row_iterator = RowIterator(mock.sentinel.client, api_request, path, schema) with self.assertRaises(ValueError): row_iterator.to_dataframe() class TestTimePartitioning(unittest.TestCase): - def _get_target_class(self): from google.cloud.bigquery.table import TimePartitioning @@ -1533,7 +1518,7 @@ def _make_one(self, *args, **kw): def test_constructor_defaults(self): time_partitioning = self._make_one() - self.assertEqual(time_partitioning.type_, 'DAY') + self.assertEqual(time_partitioning.type_, "DAY") self.assertIsNone(time_partitioning.field) self.assertIsNone(time_partitioning.expiration_ms) self.assertIsNone(time_partitioning.require_partition_filter) @@ -1543,13 +1528,13 @@ def test_constructor_explicit(self): time_partitioning = self._make_one( type_=TimePartitioningType.DAY, - field='name', + field="name", expiration_ms=10000, - require_partition_filter=True + require_partition_filter=True, ) - self.assertEqual(time_partitioning.type_, 'DAY') - self.assertEqual(time_partitioning.field, 'name') + self.assertEqual(time_partitioning.type_, "DAY") + self.assertEqual(time_partitioning.field, "name") self.assertEqual(time_partitioning.expiration_ms, 10000) self.assertTrue(time_partitioning.require_partition_filter) @@ -1557,7 +1542,7 @@ def test_from_api_repr_minimal(self): from google.cloud.bigquery.table import TimePartitioningType klass = self._get_target_class() - api_repr = {'type': 'DAY'} + api_repr = {"type": "DAY"} time_partitioning = klass.from_api_repr(api_repr) self.assertEqual(time_partitioning.type_, TimePartitioningType.DAY) @@ -1570,21 +1555,21 @@ def test_from_api_repr_explicit(self): klass = self._get_target_class() api_repr = { - 'type': 'DAY', - 'field': 'name', - 'expirationMs': '10000', - 'requirePartitionFilter': True, + "type": "DAY", + "field": "name", + "expirationMs": "10000", + "requirePartitionFilter": True, } time_partitioning = klass.from_api_repr(api_repr) self.assertEqual(time_partitioning.type_, TimePartitioningType.DAY) - self.assertEqual(time_partitioning.field, 'name') + self.assertEqual(time_partitioning.field, "name") self.assertEqual(time_partitioning.expiration_ms, 10000) self.assertTrue(time_partitioning.require_partition_filter) def test_to_api_repr_defaults(self): time_partitioning = self._make_one() - expected = {'type': 'DAY'} + expected = {"type": "DAY"} self.assertEqual(time_partitioning.to_api_repr(), expected) def test_to_api_repr_explicit(self): @@ -1592,16 +1577,16 @@ def test_to_api_repr_explicit(self): time_partitioning = self._make_one( type_=TimePartitioningType.DAY, - field='name', + field="name", expiration_ms=10000, - require_partition_filter=True + require_partition_filter=True, ) expected = { - 'type': 'DAY', - 'field': 'name', - 'expirationMs': '10000', - 'requirePartitionFilter': True, + "type": "DAY", + "field": "name", + "expirationMs": "10000", + "requirePartitionFilter": True, } self.assertEqual(time_partitioning.to_api_repr(), expected) @@ -1613,31 +1598,35 @@ def test___eq___wrong_type(self): def test___eq___type__mismatch(self): time_partitioning = self._make_one() - other = self._make_one(type_='HOUR') + other = self._make_one(type_="HOUR") self.assertNotEqual(time_partitioning, other) def test___eq___field_mismatch(self): - time_partitioning = self._make_one(field='foo') - other = self._make_one(field='bar') + time_partitioning = self._make_one(field="foo") + other = self._make_one(field="bar") self.assertNotEqual(time_partitioning, other) def test___eq___expiration_ms_mismatch(self): - time_partitioning = self._make_one(field='foo', expiration_ms=100000) - other = self._make_one(field='foo', expiration_ms=200000) + time_partitioning = self._make_one(field="foo", expiration_ms=100000) + other = self._make_one(field="foo", expiration_ms=200000) self.assertNotEqual(time_partitioning, other) def test___eq___require_partition_filter_mismatch(self): time_partitioning = self._make_one( - field='foo', expiration_ms=100000, require_partition_filter=True) + field="foo", expiration_ms=100000, require_partition_filter=True + ) other = self._make_one( - field='foo', expiration_ms=100000, require_partition_filter=False) + field="foo", expiration_ms=100000, require_partition_filter=False + ) self.assertNotEqual(time_partitioning, other) def test___eq___hit(self): time_partitioning = self._make_one( - field='foo', expiration_ms=100000, require_partition_filter=True) + field="foo", expiration_ms=100000, require_partition_filter=True + ) other = self._make_one( - field='foo', expiration_ms=100000, require_partition_filter=True) + field="foo", expiration_ms=100000, require_partition_filter=True + ) self.assertEqual(time_partitioning, other) def test___ne___wrong_type(self): @@ -1650,24 +1639,24 @@ def test___ne___same_value(self): time_partitioning1 = self._make_one() time_partitioning2 = self._make_one() # unittest ``assertEqual`` uses ``==`` not ``!=``. - comparison_val = (time_partitioning1 != time_partitioning2) + comparison_val = time_partitioning1 != time_partitioning2 self.assertFalse(comparison_val) def test___ne___different_values(self): time_partitioning1 = self._make_one() - time_partitioning2 = self._make_one(type_='HOUR') + time_partitioning2 = self._make_one(type_="HOUR") self.assertNotEqual(time_partitioning1, time_partitioning2) def test___hash__set_equality(self): - time_partitioning1 = self._make_one(field='foo') - time_partitioning2 = self._make_one(field='foo') + time_partitioning1 = self._make_one(field="foo") + time_partitioning2 = self._make_one(field="foo") set_one = {time_partitioning1, time_partitioning2} set_two = {time_partitioning1, time_partitioning2} self.assertEqual(set_one, set_two) def test___hash__not_equals(self): - time_partitioning1 = self._make_one(field='foo') - time_partitioning2 = self._make_one(field='bar') + time_partitioning1 = self._make_one(field="foo") + time_partitioning2 = self._make_one(field="bar") set_one = {time_partitioning1} set_two = {time_partitioning2} self.assertNotEqual(set_one, set_two) @@ -1682,14 +1671,15 @@ def test___repr___explicit(self): time_partitioning = self._make_one( type_=TimePartitioningType.DAY, - field='name', + field="name", expiration_ms=10000, - require_partition_filter=True + require_partition_filter=True, ) expected = ( "TimePartitioning(" "expirationMs=10000," "field=name," "requirePartitionFilter=True," - "type=DAY)") + "type=DAY)" + ) self.assertEqual(repr(time_partitioning), expected) From ad151c6e49559c4ae26f423cb1724c1802bb60b9 Mon Sep 17 00:00:00 2001 From: Tres Seaver Date: Fri, 30 Nov 2018 19:27:11 -0500 Subject: [PATCH 0520/2016] Add 'retry' argument to '_AsyncJob.result'. (#6302) Pass it through to the '_begin' call. Note that we need to modify the 'api_core...PollingFuture' class before we can safely pass the 'retry' through to its 'result'. --- .../google/cloud/bigquery/job.py | 7 +++++-- .../google-cloud-bigquery/tests/unit/test_job.py | 16 +++++++++++++++- 2 files changed, 20 insertions(+), 3 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/job.py b/packages/google-cloud-bigquery/google/cloud/bigquery/job.py index cdb275ed5f83..ba8a41958357 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/job.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/job.py @@ -678,7 +678,7 @@ def done(self, retry=DEFAULT_RETRY): self.reload(retry=retry) return self.state == _DONE_STATE - def result(self, timeout=None): + def result(self, timeout=None, retry=DEFAULT_RETRY): """Start the job and wait for it to complete and get the result. :type timeout: float @@ -686,6 +686,9 @@ def result(self, timeout=None): How long (in seconds) to wait for job to complete before raising a :class:`concurrent.futures.TimeoutError`. + :type retry: :class:`google.api_core.retry.Retry` + :param retry: (Optional) How to retry the RPC. + :rtype: _AsyncJob :returns: This instance. @@ -695,7 +698,7 @@ def result(self, timeout=None): not complete in the given timeout. """ if self.state is None: - self._begin() + self._begin(retry=retry) # TODO: modify PollingFuture so it can pass a retry argument to done(). return super(_AsyncJob, self).result(timeout=timeout) diff --git a/packages/google-cloud-bigquery/tests/unit/test_job.py b/packages/google-cloud-bigquery/tests/unit/test_job.py index 8d5aef8f4603..3d9dad8dc734 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_job.py +++ b/packages/google-cloud-bigquery/tests/unit/test_job.py @@ -789,13 +789,27 @@ def test_done_already(self): @mock.patch("google.api_core.future.polling.PollingFuture.result") def test_result_default_wo_state(self, result): + from google.cloud.bigquery.retry import DEFAULT_RETRY + client = _make_client(project=self.PROJECT) job = self._make_one(self.JOB_ID, client) begin = job._begin = mock.Mock() self.assertIs(job.result(), result.return_value) - begin.assert_called_once() + begin.assert_called_once_with(retry=DEFAULT_RETRY) + result.assert_called_once_with(timeout=None) + + @mock.patch('google.api_core.future.polling.PollingFuture.result') + def test_result_w_retry_wo_state(self, result): + client = _make_client(project=self.PROJECT) + job = self._make_one(self.JOB_ID, client) + begin = job._begin = mock.Mock() + retry = mock.Mock() + + self.assertIs(job.result(retry=retry), result.return_value) + + begin.assert_called_once_with(retry=retry) result.assert_called_once_with(timeout=None) @mock.patch("google.api_core.future.polling.PollingFuture.result") From 19eb9ec848909e860ffeda51ba4234b06ff4161b Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Mon, 3 Dec 2018 10:40:55 -0800 Subject: [PATCH 0521/2016] Allow setting partition expiration to 'None'. (#6823) --- .../google-cloud-bigquery/google/cloud/bigquery/table.py | 5 ++++- packages/google-cloud-bigquery/tests/unit/test_table.py | 5 +++++ 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py index c145d6a9dc1c..cb72dde189b2 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py @@ -1335,7 +1335,10 @@ def expiration_ms(self): @expiration_ms.setter def expiration_ms(self, value): - self._properties["expirationMs"] = str(value) + if value is not None: + # Allow explicitly setting the expiration to None. + value = str(value) + self._properties["expirationMs"] = value @property def require_partition_filter(self): diff --git a/packages/google-cloud-bigquery/tests/unit/test_table.py b/packages/google-cloud-bigquery/tests/unit/test_table.py index 04a67c603ba9..d84fa6e4b1c8 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_table.py +++ b/packages/google-cloud-bigquery/tests/unit/test_table.py @@ -1683,3 +1683,8 @@ def test___repr___explicit(self): "type=DAY)" ) self.assertEqual(repr(time_partitioning), expected) + + def test_set_expiration_w_none(self): + time_partitioning = self._make_one() + time_partitioning.expiration_ms = None + assert time_partitioning._properties["expirationMs"] is None From fbd3f4a168a3a16a55ac34ff56aa1bf349268e29 Mon Sep 17 00:00:00 2001 From: shollyman Date: Mon, 3 Dec 2018 11:53:06 -0800 Subject: [PATCH 0522/2016] Add avro logical type control for load jobs. (#6827) --- .../google/cloud/bigquery/job.py | 19 +++++++++++++++++++ .../tests/unit/test_job.py | 16 ++++++++++++++++ 2 files changed, 35 insertions(+) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/job.py b/packages/google-cloud-bigquery/google/cloud/bigquery/job.py index ba8a41958357..f72bb320c80f 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/job.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/job.py @@ -1195,6 +1195,18 @@ def time_partitioning(self, value): else: self._del_sub_prop("timePartitioning") + @property + def use_avro_logical_types(self): + """bool: For loads of Avro data, governs whether Avro logical types are + converted to their corresponding BigQuery types(e.g. TIMESTAMP) rather than + raw types (e.g. INTEGER). + """ + return self._get_sub_prop("useAvroLogicalTypes") + + @use_avro_logical_types.setter + def use_avro_logical_types(self, value): + self._set_sub_prop("useAvroLogicalTypes", bool(value)) + @property def write_disposition(self): """google.cloud.bigquery.job.WriteDisposition: Action that occurs if @@ -1362,6 +1374,13 @@ def time_partitioning(self): """ return self._configuration.time_partitioning + @property + def use_avro_logical_types(self): + """See + :attr:`google.cloud.bigquery.job.LoadJobConfig.use_avro_logical_types`. + """ + return self._configuration.use_avro_logical_types + @property def clustering_fields(self): """See diff --git a/packages/google-cloud-bigquery/tests/unit/test_job.py b/packages/google-cloud-bigquery/tests/unit/test_job.py index 3d9dad8dc734..699101745814 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_job.py +++ b/packages/google-cloud-bigquery/tests/unit/test_job.py @@ -1588,6 +1588,15 @@ def test_time_partitioning_setter_w_none(self): self.assertIsNone(config.time_partitioning) self.assertNotIn("timePartitioning", config._properties["load"]) + def test_use_avro_logical_types(self): + config = self._get_target_class()() + self.assertIsNone(config.use_avro_logical_types) + + def test_use_avro_logical_types_setter(self): + config = self._get_target_class()() + config.use_avro_logical_types = True + self.assertTrue(config._properties["load"]["useAvroLogicalTypes"]) + def test_write_disposition_missing(self): config = self._get_target_class()() self.assertIsNone(config.write_disposition) @@ -1663,6 +1672,10 @@ def _verifyBooleanConfigProperties(self, job, config): self.assertEqual(job.ignore_unknown_values, config["ignoreUnknownValues"]) else: self.assertIsNone(job.ignore_unknown_values) + if "useAvroLogicalTypes" in config: + self.assertEqual(job.use_avro_logical_types, config["useAvroLogicalTypes"]) + else: + self.assertIsNone(job.use_avro_logical_types) def _verifyEnumConfigProperties(self, job, config): if "createDisposition" in config: @@ -1765,6 +1778,7 @@ def test_ctor(self): self.assertIsNone(job.write_disposition) self.assertIsNone(job.destination_encryption_configuration) self.assertIsNone(job.time_partitioning) + self.assertIsNone(job.use_avro_logical_types) self.assertIsNone(job.clustering_fields) self.assertIsNone(job.schema_update_options) @@ -2097,6 +2111,7 @@ def test_begin_w_alternate_client(self): "quote": "'", "skipLeadingRows": "1", "sourceFormat": "CSV", + "useAvroLogicalTypes": True, "writeDisposition": WriteDisposition.WRITE_TRUNCATE, "schema": { "fields": [ @@ -2139,6 +2154,7 @@ def test_begin_w_alternate_client(self): config.quote_character = "'" config.skip_leading_rows = 1 config.source_format = "CSV" + config.use_avro_logical_types = True config.write_disposition = WriteDisposition.WRITE_TRUNCATE config.schema_update_options = [SchemaUpdateOption.ALLOW_FIELD_ADDITION] From 952080cb7faf48a8b7057d5106073b6e2918e003 Mon Sep 17 00:00:00 2001 From: Alix Hamilton Date: Mon, 3 Dec 2018 13:38:23 -0700 Subject: [PATCH 0523/2016] BigQuery: add avro load samples (#6832) --- .../google-cloud-bigquery/docs/snippets.py | 47 +++++++++++++++++-- 1 file changed, 44 insertions(+), 3 deletions(-) diff --git a/packages/google-cloud-bigquery/docs/snippets.py b/packages/google-cloud-bigquery/docs/snippets.py index a60f587d03cf..90762f98b37e 100644 --- a/packages/google-cloud-bigquery/docs/snippets.py +++ b/packages/google-cloud-bigquery/docs/snippets.py @@ -1313,6 +1313,39 @@ def test_load_table_from_file(client, to_delete): assert row2 in rows +def test_load_table_from_uri_avro(client, to_delete, capsys): + dataset_id = 'load_table_from_uri_avro_{}'.format(_millis()) + dataset = bigquery.Dataset(client.dataset(dataset_id)) + client.create_dataset(dataset) + to_delete.append(dataset) + + # [START bigquery_load_table_gcs_avro] + # from google.cloud import bigquery + # client = bigquery.Client() + # dataset_id = 'my_dataset' + + dataset_ref = client.dataset(dataset_id) + job_config = bigquery.LoadJobConfig() + job_config.source_format = bigquery.SourceFormat.AVRO + uri = 'gs://cloud-samples-data/bigquery/us-states/us-states.avro' + + load_job = client.load_table_from_uri( + uri, + dataset_ref.table('us_states'), + job_config=job_config) # API request + print('Starting job {}'.format(load_job.job_id)) + + load_job.result() # Waits for table load to complete. + print('Job finished.') + + destination_table = client.get_table(dataset_ref.table('us_states')) + print('Loaded {} rows.'.format(destination_table.num_rows)) + # [END bigquery_load_table_gcs_avro] + + out, _ = capsys.readouterr() + assert 'Loaded 50 rows.' in out + + def test_load_table_from_uri_csv(client, to_delete, capsys): dataset_id = "load_table_from_uri_csv_{}".format(_millis()) dataset = bigquery.Dataset(client.dataset(dataset_id)) @@ -1588,8 +1621,11 @@ def test_load_table_from_uri_truncate(client, to_delete, capsys): table_ref = dataset.table("us_states") body = six.BytesIO(b"Washington,WA") client.load_table_from_file(body, table_ref, job_config=job_config).result() + previous_rows = client.get_table(table_ref).num_rows + assert previous_rows > 0 # Shared code + # [START bigquery_load_table_gcs_avro_truncate] # [START bigquery_load_table_gcs_csv_truncate] # [START bigquery_load_table_gcs_json_truncate] # [START bigquery_load_table_gcs_parquet_truncate] @@ -1598,17 +1634,20 @@ def test_load_table_from_uri_truncate(client, to_delete, capsys): # client = bigquery.Client() # table_ref = client.dataset('my_dataset').table('existing_table') - previous_rows = client.get_table(table_ref).num_rows - assert previous_rows > 0 - job_config = bigquery.LoadJobConfig() job_config.write_disposition = bigquery.WriteDisposition.WRITE_TRUNCATE + # [END bigquery_load_table_gcs_avro_truncate] # [END bigquery_load_table_gcs_csv_truncate] # [END bigquery_load_table_gcs_json_truncate] # [END bigquery_load_table_gcs_parquet_truncate] # [END bigquery_load_table_gcs_orc_truncate] # Format-specific code + # [START bigquery_load_table_gcs_avro_truncate] + job_config.source_format = bigquery.SourceFormat.AVRO + uri = "gs://cloud-samples-data/bigquery/us-states/us-states.avro" + # [END bigquery_load_table_gcs_avro_truncate] + # [START bigquery_load_table_gcs_csv_truncate] job_config.skip_leading_rows = 1 # The source format defaults to CSV, so the line below is optional. @@ -1634,6 +1673,7 @@ def test_load_table_from_uri_truncate(client, to_delete, capsys): # [END bigquery_load_table_gcs_orc_truncate] # Shared code + # [START bigquery_load_table_gcs_avro_truncate] # [START bigquery_load_table_gcs_csv_truncate] # [START bigquery_load_table_gcs_json_truncate] # [START bigquery_load_table_gcs_parquet_truncate] @@ -1648,6 +1688,7 @@ def test_load_table_from_uri_truncate(client, to_delete, capsys): destination_table = client.get_table(table_ref) print("Loaded {} rows.".format(destination_table.num_rows)) + # [END bigquery_load_table_gcs_avro_truncate] # [END bigquery_load_table_gcs_csv_truncate] # [END bigquery_load_table_gcs_json_truncate] # [END bigquery_load_table_gcs_parquet_truncate] From bd2c09e6ed3f1363f45e5e8ada70a0f8956a1dfb Mon Sep 17 00:00:00 2001 From: Christopher Wilcox Date: Mon, 3 Dec 2018 13:59:48 -0800 Subject: [PATCH 0524/2016] Use moved iam.policy now at google.api_core.iam.policy (#6741) * update references to iam to use api-core\ * Update dependency to api_core --- packages/google-cloud-bigquery/setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/setup.py b/packages/google-cloud-bigquery/setup.py index 5c4aa468d74d..7022e0d03558 100644 --- a/packages/google-cloud-bigquery/setup.py +++ b/packages/google-cloud-bigquery/setup.py @@ -29,7 +29,7 @@ # 'Development Status :: 5 - Production/Stable' release_status = 'Development Status :: 5 - Production/Stable' dependencies = [ - 'google-api-core >= 1.0.0, < 2.0.0dev', + 'google-api-core >= 1.6.0, < 2.0.0dev', 'google-cloud-core >= 0.28.0, < 0.29dev', 'google-resumable-media >= 0.3.1', ] From 2e80e6c752ee9bf1ec93d6ba74deecd43ccbf729 Mon Sep 17 00:00:00 2001 From: Christopher Wilcox Date: Tue, 4 Dec 2018 09:00:08 -0800 Subject: [PATCH 0525/2016] Update dependency to google-cloud-core (#6835) --- packages/google-cloud-bigquery/setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/setup.py b/packages/google-cloud-bigquery/setup.py index 7022e0d03558..0cd725cf1505 100644 --- a/packages/google-cloud-bigquery/setup.py +++ b/packages/google-cloud-bigquery/setup.py @@ -30,7 +30,7 @@ release_status = 'Development Status :: 5 - Production/Stable' dependencies = [ 'google-api-core >= 1.6.0, < 2.0.0dev', - 'google-cloud-core >= 0.28.0, < 0.29dev', + 'google-cloud-core >= 0.29.0, < 0.30dev', 'google-resumable-media >= 0.3.1', ] extras = { From a3d5247cd4acbb761eef68399296a340682e9fa9 Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Wed, 5 Dec 2018 10:23:24 -0800 Subject: [PATCH 0526/2016] Add to_bqstorage to convert from Table[Reference] google-cloud-bigquery-storage reference (#6840) * Add to_bqstorage to convert from Table[Reference] google-cloud-bigquery-storage reference. This makes it easier to use the new BigQuery Storage API (currently in Alpha) in combination with the BigQuery API. * Remove logic for partition filter and snapshot selector. * Remove unused selected_fields argument. --- .../google-cloud-bigquery/docs/snippets.py | 19 +++-- .../google/cloud/bigquery/table.py | 76 +++++++++++++++++++ packages/google-cloud-bigquery/noxfile.py | 6 +- packages/google-cloud-bigquery/setup.py | 1 + .../tests/unit/test_job.py | 2 +- .../tests/unit/test_table.py | 31 ++++++++ 6 files changed, 121 insertions(+), 14 deletions(-) diff --git a/packages/google-cloud-bigquery/docs/snippets.py b/packages/google-cloud-bigquery/docs/snippets.py index 90762f98b37e..538c5e41eb65 100644 --- a/packages/google-cloud-bigquery/docs/snippets.py +++ b/packages/google-cloud-bigquery/docs/snippets.py @@ -1314,7 +1314,7 @@ def test_load_table_from_file(client, to_delete): def test_load_table_from_uri_avro(client, to_delete, capsys): - dataset_id = 'load_table_from_uri_avro_{}'.format(_millis()) + dataset_id = "load_table_from_uri_avro_{}".format(_millis()) dataset = bigquery.Dataset(client.dataset(dataset_id)) client.create_dataset(dataset) to_delete.append(dataset) @@ -1327,23 +1327,22 @@ def test_load_table_from_uri_avro(client, to_delete, capsys): dataset_ref = client.dataset(dataset_id) job_config = bigquery.LoadJobConfig() job_config.source_format = bigquery.SourceFormat.AVRO - uri = 'gs://cloud-samples-data/bigquery/us-states/us-states.avro' + uri = "gs://cloud-samples-data/bigquery/us-states/us-states.avro" load_job = client.load_table_from_uri( - uri, - dataset_ref.table('us_states'), - job_config=job_config) # API request - print('Starting job {}'.format(load_job.job_id)) + uri, dataset_ref.table("us_states"), job_config=job_config + ) # API request + print("Starting job {}".format(load_job.job_id)) load_job.result() # Waits for table load to complete. - print('Job finished.') + print("Job finished.") - destination_table = client.get_table(dataset_ref.table('us_states')) - print('Loaded {} rows.'.format(destination_table.num_rows)) + destination_table = client.get_table(dataset_ref.table("us_states")) + print("Loaded {} rows.".format(destination_table.num_rows)) # [END bigquery_load_table_gcs_avro] out, _ = capsys.readouterr() - assert 'Loaded 50 rows.' in out + assert "Loaded 50 rows." in out def test_load_table_from_uri_csv(client, to_delete, capsys): diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py index cb72dde189b2..868921fe5a4c 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py @@ -279,6 +279,38 @@ def to_api_repr(self): "tableId": self._table_id, } + def to_bqstorage(self): + """Construct a BigQuery Storage API representation of this table. + + If the ``table_id`` contains a partition identifier (e.g. + ``my_table$201812``) or a snapshot identifier (e.g. + ``mytable@1234567890``), it is ignored. Use + :class:`google.cloud.bigquery_storage_v1beta1.types.TableReadOptions` + to filter rows by partition. Use + :class:`google.cloud.bigquery_storage_v1beta1.types.TableModifiers` + to select a specific snapshot to read from. + + Returns: + google.cloud.bigquery_storage_v1beta1.types.TableReference: + A reference to this table in the BigQuery Storage API. + """ + from google.cloud import bigquery_storage_v1beta1 + + table_ref = bigquery_storage_v1beta1.types.TableReference() + table_ref.project_id = self._project + table_ref.dataset_id = self._dataset_id + table_id = self._table_id + + if "@" in table_id: + table_id = table_id.split("@")[0] + + if "$" in table_id: + table_id = table_id.split("$")[0] + + table_ref.table_id = table_id + + return table_ref + def _key(self): """A tuple key that uniquely describes this field. @@ -820,6 +852,15 @@ def to_api_repr(self): """ return copy.deepcopy(self._properties) + def to_bqstorage(self): + """Construct a BigQuery Storage API representation of this table. + + Returns: + google.cloud.bigquery_storage_v1beta1.types.TableReference: + A reference to this table in the BigQuery Storage API. + """ + return self.reference.to_bqstorage() + def _build_resource(self, filter_fields): """Generate a resource for ``update``.""" partial = {} @@ -971,6 +1012,41 @@ def friendly_name(self): view_use_legacy_sql = property(_view_use_legacy_sql_getter) + @classmethod + def from_string(cls, full_table_id): + """Construct a table from fully-qualified table ID. + + Args: + full_table_id (str): + A fully-qualified table ID in standard SQL format. Must + included a project ID, dataset ID, and table ID, each + separated by ``.``. + + Returns: + Table: Table parsed from ``full_table_id``. + + Examples: + >>> Table.from_string('my-project.mydataset.mytable') + Table(TableRef...(D...('my-project', 'mydataset'), 'mytable')) + + Raises: + ValueError: + If ``full_table_id`` is not a fully-qualified table ID in + standard SQL format. + """ + return cls( + {"tableReference": TableReference.from_string(full_table_id).to_api_repr()} + ) + + def to_bqstorage(self): + """Construct a BigQuery Storage API representation of this table. + + Returns: + google.cloud.bigquery_storage_v1beta1.types.TableReference: + A reference to this table in the BigQuery Storage API. + """ + return self.reference.to_bqstorage() + def _row_from_mapping(mapping, schema): """Convert a mapping to a row tuple using the schema. diff --git a/packages/google-cloud-bigquery/noxfile.py b/packages/google-cloud-bigquery/noxfile.py index 0927d2d430f1..7bf58f18043b 100644 --- a/packages/google-cloud-bigquery/noxfile.py +++ b/packages/google-cloud-bigquery/noxfile.py @@ -20,7 +20,7 @@ LOCAL_DEPS = ( - os.path.join('..', 'api_core'), + os.path.join('..', 'api_core[grpc]'), os.path.join('..', 'core'), ) @@ -40,9 +40,9 @@ def default(session): # Pyarrow does not support Python 3.7 if session.python == '3.7': - dev_install = '.[pandas]' + dev_install = '.[bqstorage, pandas]' else: - dev_install = '.[pandas, pyarrow]' + dev_install = '.[bqstorage, pandas, pyarrow]' session.install('-e', dev_install) # IPython does not support Python 2 after version 5.x diff --git a/packages/google-cloud-bigquery/setup.py b/packages/google-cloud-bigquery/setup.py index 0cd725cf1505..da2f2fbbb5f6 100644 --- a/packages/google-cloud-bigquery/setup.py +++ b/packages/google-cloud-bigquery/setup.py @@ -34,6 +34,7 @@ 'google-resumable-media >= 0.3.1', ] extras = { + 'bqstorage': 'google-cloud-bigquery-storage<=2.0.0dev', 'pandas': 'pandas>=0.17.1', # Exclude PyArrow dependency from Windows Python 2.7. 'pyarrow: platform_system != "Windows" or python_version >= "3.4"': diff --git a/packages/google-cloud-bigquery/tests/unit/test_job.py b/packages/google-cloud-bigquery/tests/unit/test_job.py index 699101745814..75a84d77e2c7 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_job.py +++ b/packages/google-cloud-bigquery/tests/unit/test_job.py @@ -800,7 +800,7 @@ def test_result_default_wo_state(self, result): begin.assert_called_once_with(retry=DEFAULT_RETRY) result.assert_called_once_with(timeout=None) - @mock.patch('google.api_core.future.polling.PollingFuture.result') + @mock.patch("google.api_core.future.polling.PollingFuture.result") def test_result_w_retry_wo_state(self, result): client = _make_client(project=self.PROJECT) job = self._make_one(self.JOB_ID, client) diff --git a/packages/google-cloud-bigquery/tests/unit/test_table.py b/packages/google-cloud-bigquery/tests/unit/test_table.py index d84fa6e4b1c8..d51a5dfec52e 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_table.py +++ b/packages/google-cloud-bigquery/tests/unit/test_table.py @@ -12,11 +12,17 @@ # See the License for the specific language governing permissions and # limitations under the License. +import itertools import unittest import mock +import pytest import six +try: + from google.cloud import bigquery_storage_v1beta1 +except ImportError: # pragma: NO COVER + bigquery_storage_v1beta1 = None try: import pandas except (ImportError, AttributeError): # pragma: NO COVER @@ -1688,3 +1694,28 @@ def test_set_expiration_w_none(self): time_partitioning = self._make_one() time_partitioning.expiration_ms = None assert time_partitioning._properties["expirationMs"] is None + + +@pytest.mark.skipif( + bigquery_storage_v1beta1 is None, reason="Requires `google-cloud-bigquery-storage`" +) +def test_table_reference_to_bqstorage(): + from google.cloud.bigquery import table as mut + + # Can't use parametrized pytest because bigquery_storage_v1beta1 may not be + # available. + expected = bigquery_storage_v1beta1.types.TableReference( + project_id="my-project", dataset_id="my_dataset", table_id="my_table" + ) + cases = ( + "my-project.my_dataset.my_table", + "my-project.my_dataset.my_table$20181225", + "my-project.my_dataset.my_table@1234567890", + "my-project.my_dataset.my_table$20181225@1234567890", + ) + + classes = (mut.TableReference, mut.Table, mut.TableListItem) + + for case, cls in itertools.product(cases, classes): + got = cls.from_string(case).to_bqstorage() + assert got == expected From 42f7a6bf830dae344f2f3e24348853d46e767992 Mon Sep 17 00:00:00 2001 From: Alix Hamilton Date: Wed, 5 Dec 2018 11:40:18 -0800 Subject: [PATCH 0527/2016] BigQuery: Fix exception type in comment (#6847) --- packages/google-cloud-bigquery/docs/snippets.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/docs/snippets.py b/packages/google-cloud-bigquery/docs/snippets.py index 538c5e41eb65..4d7e17e5363e 100644 --- a/packages/google-cloud-bigquery/docs/snippets.py +++ b/packages/google-cloud-bigquery/docs/snippets.py @@ -207,7 +207,7 @@ def test_create_dataset(client, to_delete): dataset.location = "US" # Send the dataset to the API for creation. - # Raises google.api_core.exceptions.AlreadyExists if the Dataset already + # Raises google.api_core.exceptions.Conflict if the Dataset already # exists within the project. dataset = client.create_dataset(dataset) # API request # [END bigquery_create_dataset] From 58857af4b5de7c3f5bcf554c1e7ccc0e6bab384c Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Wed, 5 Dec 2018 16:36:56 -0800 Subject: [PATCH 0528/2016] Add option to use BQ Storage API with to_dataframe (#6854) * Add option to use BQ Storage API with to_dataframe This is a faster method to read a dataframe from a table using the (alpha) BigQuery Storage API. Supply a BQ Storage API to to_dataframe() to use the faster method. Currently it cannot read data from (small) anonymous query results tables, thus why the system test has a destination table for the query. * Remove thread prefix (not present in Python 3.5) --- .../google/cloud/bigquery/client.py | 4 + .../google/cloud/bigquery/table.py | 100 ++++++++++- .../google-cloud-bigquery/tests/system.py | 53 +++++- .../tests/unit/test_table.py | 164 +++++++++++++++++- 4 files changed, 306 insertions(+), 15 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py index 12c0b57ad641..3f95e55e9290 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py @@ -1717,6 +1717,10 @@ def list_rows( max_results=max_results, page_size=page_size, extra_params=params, + table=table, + # Pass in selected_fields separately from schema so that full + # tables can be fetched without a column filter. + selected_fields=selected_fields, ) return row_iterator diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py index 868921fe5a4c..8fadbd499bec 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py @@ -18,6 +18,7 @@ import copy import datetime +import json import operator import warnings @@ -1242,6 +1243,17 @@ class RowIterator(HTTPIterator): page_size (int, optional): The number of items to return per page. extra_params (Dict[str, object]): Extra query string parameters for the API call. + table (Union[ \ + :class:`~google.cloud.bigquery.table.Table`, \ + :class:`~google.cloud.bigquery.table.TableReference`, \ + ]): + Optional. The table which these rows belong to, or a reference to + it. Used to call the BigQuery Storage API to fetch rows. + selected_fields (Sequence[ \ + google.cloud.bigquery.schema.SchemaField, \ + ]): + Optional. A subset of columns to select from this table. + """ def __init__( @@ -1254,6 +1266,8 @@ def __init__( max_results=None, page_size=None, extra_params=None, + table=None, + selected_fields=None, ): super(RowIterator, self).__init__( client, @@ -1271,6 +1285,9 @@ def __init__( self._field_to_index = _helpers._field_to_index_mapping(schema) self._total_rows = None self._page_size = page_size + self._table = table + self._selected_fields = selected_fields + self._project = client.project def _get_next_page_response(self): """Requests the next page from the path provided. @@ -1296,9 +1313,81 @@ def total_rows(self): """int: The total number of rows in the table.""" return self._total_rows - def to_dataframe(self): + def _to_dataframe_tabledata_list(self): + """Use (slower, but free) tabledata.list to construct a DataFrame.""" + column_headers = [field.name for field in self.schema] + # Use generator, rather than pulling the whole rowset into memory. + rows = (row.values() for row in iter(self)) + return pandas.DataFrame(rows, columns=column_headers) + + def _to_dataframe_bqstorage(self, bqstorage_client): + """Use (faster, but billable) BQ Storage API to construct DataFrame.""" + import concurrent.futures + from google.cloud import bigquery_storage_v1beta1 + + if "$" in self._table.table_id: + raise ValueError( + "Reading from a specific partition is not currently supported." + ) + if "@" in self._table.table_id: + raise ValueError( + "Reading from a specific snapshot is not currently supported." + ) + + read_options = bigquery_storage_v1beta1.types.TableReadOptions() + if self._selected_fields is not None: + for field in self._selected_fields: + read_options.selected_fields.append(field.name) + + session = bqstorage_client.create_read_session( + self._table.to_bqstorage(), + "projects/{}".format(self._project), + read_options=read_options, + ) + + # We need to parse the schema manually so that we can rearrange the + # columns. + schema = json.loads(session.avro_schema.schema) + columns = [field["name"] for field in schema["fields"]] + + # Avoid reading rows from an empty table. pandas.concat will fail on an + # empty list. + if not session.streams: + return pandas.DataFrame(columns=columns) + + def get_dataframe(stream): + position = bigquery_storage_v1beta1.types.StreamPosition(stream=stream) + rowstream = bqstorage_client.read_rows(position) + return rowstream.to_dataframe(session) + + with concurrent.futures.ThreadPoolExecutor() as pool: + frames = pool.map(get_dataframe, session.streams) + + # rowstream.to_dataframe() does not preserve column order. Rearrange at + # the end using manually-parsed schema. + return pandas.concat(frames)[columns] + + def to_dataframe(self, bqstorage_client=None): """Create a pandas DataFrame from the query results. + Args: + bqstorage_client ( \ + google.cloud.bigquery_storage_v1beta1.BigQueryStorageClient \ + ): + Optional. A BigQuery Storage API client. If supplied, use the + faster BigQuery Storage API to fetch rows from BigQuery. This + API is a billable API. + + This method requires the ``fastavro`` and + ``google-cloud-bigquery-storage`` libraries. + + Reading from a specific partition or snapshot is not + currently supported by this method. + + **Caution**: There is a known issue reading small anonymous + query result tables with the BQ Storage API. Write your query + results to a destination table to work around this issue. + Returns: pandas.DataFrame: A :class:`~pandas.DataFrame` populated with row data and column @@ -1312,11 +1401,10 @@ def to_dataframe(self): if pandas is None: raise ValueError(_NO_PANDAS_ERROR) - column_headers = [field.name for field in self.schema] - # Use generator, rather than pulling the whole rowset into memory. - rows = (row.values() for row in iter(self)) - - return pandas.DataFrame(rows, columns=column_headers) + if bqstorage_client is not None: + return self._to_dataframe_bqstorage(bqstorage_client) + else: + return self._to_dataframe_tabledata_list() class _EmptyRowIterator(object): diff --git a/packages/google-cloud-bigquery/tests/system.py b/packages/google-cloud-bigquery/tests/system.py index fe5e3ce3dea0..a77d270c4279 100644 --- a/packages/google-cloud-bigquery/tests/system.py +++ b/packages/google-cloud-bigquery/tests/system.py @@ -28,6 +28,10 @@ import six import pytest +try: + from google.cloud import bigquery_storage_v1beta1 +except ImportError: # pragma: NO COVER + bigquery_storage_v1beta1 = None try: import pandas except ImportError: # pragma: NO COVER @@ -1496,7 +1500,7 @@ def test_query_iter(self): def test_query_results_to_dataframe(self): QUERY = """ SELECT id, author, time_ts, dead - from `bigquery-public-data.hacker_news.comments` + FROM `bigquery-public-data.hacker_news.comments` LIMIT 10 """ @@ -1518,6 +1522,53 @@ def test_query_results_to_dataframe(self): if not row[col] is None: self.assertIsInstance(row[col], exp_datatypes[col]) + @unittest.skipIf(pandas is None, "Requires `pandas`") + @unittest.skipIf( + bigquery_storage_v1beta1 is None, "Requires `google-cloud-bigquery-storage`" + ) + def test_query_results_to_dataframe_w_bqstorage(self): + dest_dataset = self.temp_dataset(_make_dataset_id("bqstorage_to_dataframe_")) + dest_ref = dest_dataset.table("query_results") + + query = """ + SELECT id, author, time_ts, dead + FROM `bigquery-public-data.hacker_news.comments` + LIMIT 10 + """ + + bqstorage_client = bigquery_storage_v1beta1.BigQueryStorageClient( + credentials=Config.CLIENT._credentials + ) + df = ( + Config.CLIENT.query( + query, + # There is a known issue reading small anonymous query result + # tables with the BQ Storage API. Writing to a destination + # table works around this issue. + job_config=bigquery.QueryJobConfig( + destination=dest_ref, write_disposition="WRITE_TRUNCATE" + ), + ) + .result() + .to_dataframe(bqstorage_client) + ) + + self.assertIsInstance(df, pandas.DataFrame) + self.assertEqual(len(df), 10) # verify the number of rows + column_names = ["id", "author", "time_ts", "dead"] + self.assertEqual(list(df), column_names) + exp_datatypes = { + "id": int, + "author": six.text_type, + "time_ts": pandas.Timestamp, + "dead": bool, + } + for index, row in df.iterrows(): + for col in column_names: + # all the schema fields are nullable, so None is acceptable + if not row[col] is None: + self.assertIsInstance(row[col], exp_datatypes[col]) + def test_insert_rows_nested_nested(self): # See #2951 SF = bigquery.SchemaField diff --git a/packages/google-cloud-bigquery/tests/unit/test_table.py b/packages/google-cloud-bigquery/tests/unit/test_table.py index d51a5dfec52e..9ed6eea2a3d0 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_table.py +++ b/packages/google-cloud-bigquery/tests/unit/test_table.py @@ -13,6 +13,7 @@ # limitations under the License. import itertools +import json import unittest import mock @@ -31,6 +32,14 @@ from google.cloud.bigquery.dataset import DatasetReference +def _mock_client(): + from google.cloud.bigquery import client + + mock_client = mock.create_autospec(client.Client) + mock_client.project = "my-project" + return mock_client + + class _SchemaBase(object): def _verify_field(self, field, r_field): self.assertEqual(field.name, r_field["name"]) @@ -1289,7 +1298,7 @@ def test_constructor(self): from google.cloud.bigquery.table import _item_to_row from google.cloud.bigquery.table import _rows_page_start - client = mock.sentinel.client + client = _mock_client() api_request = mock.sentinel.api_request path = "/foo" schema = [] @@ -1322,7 +1331,7 @@ def test_iterate(self): ] path = "/foo" api_request = mock.Mock(return_value={"rows": rows}) - row_iterator = RowIterator(mock.sentinel.client, api_request, path, schema) + row_iterator = RowIterator(_mock_client(), api_request, path, schema) self.assertEqual(row_iterator.num_results, 0) rows_iter = iter(row_iterator) @@ -1356,7 +1365,7 @@ def test_page_size(self): api_request = mock.Mock(return_value={"rows": rows}) row_iterator = RowIterator( - mock.sentinel.client, api_request, path, schema, page_size=4 + _mock_client(), api_request, path, schema, page_size=4 ) row_iterator._get_next_page_response() @@ -1383,7 +1392,7 @@ def test_to_dataframe(self): ] path = "/foo" api_request = mock.Mock(return_value={"rows": rows}) - row_iterator = RowIterator(mock.sentinel.client, api_request, path, schema) + row_iterator = RowIterator(_mock_client(), api_request, path, schema) df = row_iterator.to_dataframe() @@ -1404,7 +1413,7 @@ def test_to_dataframe_w_empty_results(self): ] path = "/foo" api_request = mock.Mock(return_value={"rows": []}) - row_iterator = RowIterator(mock.sentinel.client, api_request, path, schema) + row_iterator = RowIterator(_mock_client(), api_request, path, schema) df = row_iterator.to_dataframe() @@ -1435,7 +1444,7 @@ def test_to_dataframe_w_various_types_nullable(self): rows = [{"f": [{"v": field} for field in row]} for row in row_data] path = "/foo" api_request = mock.Mock(return_value={"rows": rows}) - row_iterator = RowIterator(mock.sentinel.client, api_request, path, schema) + row_iterator = RowIterator(_mock_client(), api_request, path, schema) df = row_iterator.to_dataframe() @@ -1475,7 +1484,7 @@ def test_to_dataframe_column_dtypes(self): rows = [{"f": [{"v": field} for field in row]} for row in row_data] path = "/foo" api_request = mock.Mock(return_value={"rows": rows}) - row_iterator = RowIterator(mock.sentinel.client, api_request, path, schema) + row_iterator = RowIterator(_mock_client(), api_request, path, schema) df = row_iterator.to_dataframe() @@ -1506,11 +1515,150 @@ def test_to_dataframe_error_if_pandas_is_none(self): ] path = "/foo" api_request = mock.Mock(return_value={"rows": rows}) - row_iterator = RowIterator(mock.sentinel.client, api_request, path, schema) + row_iterator = RowIterator(_mock_client(), api_request, path, schema) with self.assertRaises(ValueError): row_iterator.to_dataframe() + @unittest.skipIf(pandas is None, "Requires `pandas`") + @unittest.skipIf( + bigquery_storage_v1beta1 is None, "Requires `google-cloud-bigquery-storage`" + ) + def test_to_dataframe_w_bqstorage_empty(self): + from google.cloud.bigquery import schema + from google.cloud.bigquery import table as mut + + bqstorage_client = mock.create_autospec( + bigquery_storage_v1beta1.BigQueryStorageClient + ) + session = bigquery_storage_v1beta1.types.ReadSession() + session.avro_schema.schema = json.dumps( + { + "fields": [ + {"name": "colA"}, + # Not alphabetical to test column order. + {"name": "colC"}, + {"name": "colB"}, + ] + } + ) + bqstorage_client.create_read_session.return_value = session + + row_iterator = mut.RowIterator( + _mock_client(), + None, # api_request: ignored + None, # path: ignored + [ + schema.SchemaField("colA", "IGNORED"), + schema.SchemaField("colC", "IGNORED"), + schema.SchemaField("colB", "IGNORED"), + ], + table=mut.TableReference.from_string("proj.dset.tbl"), + ) + + got = row_iterator.to_dataframe(bqstorage_client) + column_names = ["colA", "colC", "colB"] + self.assertEqual(list(got), column_names) + self.assertTrue(got.empty) + + @unittest.skipIf(pandas is None, "Requires `pandas`") + @unittest.skipIf( + bigquery_storage_v1beta1 is None, "Requires `google-cloud-bigquery-storage`" + ) + def test_to_dataframe_w_bqstorage_nonempty(self): + from google.cloud.bigquery import schema + from google.cloud.bigquery import table as mut + from google.cloud.bigquery_storage_v1beta1 import reader + + mock_rowstream = mock.create_autospec(reader.ReadRowsStream) + mock_rowstream.to_dataframe.return_value = pandas.DataFrame( + [ + {"colA": 1, "colB": "abc", "colC": 2.0}, + {"colA": -1, "colB": "def", "colC": 4.0}, + ] + ) + bqstorage_client = mock.create_autospec( + bigquery_storage_v1beta1.BigQueryStorageClient + ) + session = bigquery_storage_v1beta1.types.ReadSession( + streams=[{"name": "/projects/proj/dataset/dset/tables/tbl/streams/1234"}] + ) + session.avro_schema.schema = json.dumps( + { + "fields": [ + {"name": "colA"}, + # Not alphabetical to test column order. + {"name": "colC"}, + {"name": "colB"}, + ] + } + ) + bqstorage_client.create_read_session.return_value = session + bqstorage_client.read_rows.return_value = mock_rowstream + schema = [ + schema.SchemaField("colA", "IGNORED"), + schema.SchemaField("colC", "IGNORED"), + schema.SchemaField("colB", "IGNORED"), + ] + + row_iterator = mut.RowIterator( + _mock_client(), + None, # api_request: ignored + None, # path: ignored + schema, + table=mut.TableReference.from_string("proj.dset.tbl"), + selected_fields=schema, + ) + + got = row_iterator.to_dataframe(bqstorage_client) + column_names = ["colA", "colC", "colB"] + self.assertEqual(list(got), column_names) + self.assertEqual(len(got.index), 2) + + @unittest.skipIf( + bigquery_storage_v1beta1 is None, "Requires `google-cloud-bigquery-storage`" + ) + def test_to_dataframe_w_bqstorage_partition(self): + from google.cloud.bigquery import schema + from google.cloud.bigquery import table as mut + + bqstorage_client = mock.create_autospec( + bigquery_storage_v1beta1.BigQueryStorageClient + ) + + row_iterator = mut.RowIterator( + _mock_client(), + None, # api_request: ignored + None, # path: ignored + [schema.SchemaField("colA", "IGNORED")], + table=mut.TableReference.from_string("proj.dset.tbl$20181225"), + ) + + with pytest.raises(ValueError): + row_iterator.to_dataframe(bqstorage_client) + + @unittest.skipIf( + bigquery_storage_v1beta1 is None, "Requires `google-cloud-bigquery-storage`" + ) + def test_to_dataframe_w_bqstorage_snapshot(self): + from google.cloud.bigquery import schema + from google.cloud.bigquery import table as mut + + bqstorage_client = mock.create_autospec( + bigquery_storage_v1beta1.BigQueryStorageClient + ) + + row_iterator = mut.RowIterator( + _mock_client(), + None, # api_request: ignored + None, # path: ignored + [schema.SchemaField("colA", "IGNORED")], + table=mut.TableReference.from_string("proj.dset.tbl@1234567890000"), + ) + + with pytest.raises(ValueError): + row_iterator.to_dataframe(bqstorage_client) + class TestTimePartitioning(unittest.TestCase): def _get_target_class(self): From 6b6a86601dca77db6ce507eb49b55eea3390c9a0 Mon Sep 17 00:00:00 2001 From: Alix Hamilton Date: Mon, 10 Dec 2018 09:14:33 -0800 Subject: [PATCH 0529/2016] Fix copy/paste typos in noxfile comments (#6831) --- packages/google-cloud-bigquery/noxfile.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/packages/google-cloud-bigquery/noxfile.py b/packages/google-cloud-bigquery/noxfile.py index 7bf58f18043b..121993e36595 100644 --- a/packages/google-cloud-bigquery/noxfile.py +++ b/packages/google-cloud-bigquery/noxfile.py @@ -108,9 +108,9 @@ def system(session): @nox.session(python=['2.7', '3.6']) def snippets(session): - """Run the system test suite.""" + """Run the snippets test suite.""" - # Sanity check: Only run system tests if the environment variable is set. + # Sanity check: Only run snippets tests if the environment variable is set. if not os.environ.get('GOOGLE_APPLICATION_CREDENTIALS', ''): session.skip('Credentials must be set via environment variable.') @@ -122,7 +122,7 @@ def snippets(session): session.install('-e', os.path.join('..', 'test_utils')) session.install('-e', '.[pandas, pyarrow]') - # Run py.test against the system tests. + # Run py.test against the snippets tests. session.run( 'py.test', os.path.join('docs', 'snippets.py'), *session.posargs) @@ -176,4 +176,4 @@ def blacken(session): "google", "tests", "docs", - ) \ No newline at end of file + ) From b825f504ca25d27552d702ee41d54680c1d39ef2 Mon Sep 17 00:00:00 2001 From: Christopher Wilcox Date: Mon, 10 Dec 2018 13:47:14 -0800 Subject: [PATCH 0530/2016] Release bigquery 1.8.0 (#6890) * Release 1.8.0 --- packages/google-cloud-bigquery/CHANGELOG.md | 24 +++++++++++++++++++++ packages/google-cloud-bigquery/setup.py | 2 +- 2 files changed, 25 insertions(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/CHANGELOG.md b/packages/google-cloud-bigquery/CHANGELOG.md index 4ef594f61226..ee74857bd2d9 100644 --- a/packages/google-cloud-bigquery/CHANGELOG.md +++ b/packages/google-cloud-bigquery/CHANGELOG.md @@ -4,6 +4,30 @@ [1]: https://pypi.org/project/google-cloud-bigquery/#history +## 1.8.0 + +12-10-2018 12:39 PST + + +### Implementation Changes +- Add option to use BQ Storage API with `to_dataframe` ([#6854](https://github.com/googleapis/google-cloud-python/pull/6854)) +- Fix exception type in comment ([#6847](https://github.com/googleapis/google-cloud-python/pull/6847)) +- Add `to_bqstorage` to convert from Table[Reference] google-cloud-bigquery-storage reference ([#6840](https://github.com/googleapis/google-cloud-python/pull/6840)) +- Import `iam.policy` from `google.api_core`. ([#6741](https://github.com/googleapis/google-cloud-python/pull/6741)) +- Add avro logical type control for load jobs. ([#6827](https://github.com/googleapis/google-cloud-python/pull/6827)) +- Allow setting partition expiration to 'None'. ([#6823](https://github.com/googleapis/google-cloud-python/pull/6823)) +- Add `retry` argument to `_AsyncJob.result`. ([#6302](https://github.com/googleapis/google-cloud-python/pull/6302)) + +### Dependencies +- Update dependency to google-cloud-core ([#6835](https://github.com/googleapis/google-cloud-python/pull/6835)) + +### Documentation +- Add avro load samples ([#6832](https://github.com/googleapis/google-cloud-python/pull/6832)) + +### Internal / Testing Changes +- Blacken libraries ([#6794](https://github.com/googleapis/google-cloud-python/pull/6794)) +- Fix copy/paste typos in noxfile comments ([#6831](https://github.com/googleapis/google-cloud-python/pull/6831)) + ## 1.7.0 11-05-2018 16:41 PST diff --git a/packages/google-cloud-bigquery/setup.py b/packages/google-cloud-bigquery/setup.py index da2f2fbbb5f6..1e92988a2d78 100644 --- a/packages/google-cloud-bigquery/setup.py +++ b/packages/google-cloud-bigquery/setup.py @@ -22,7 +22,7 @@ name = 'google-cloud-bigquery' description = 'Google BigQuery API client library' -version = '1.7.0' +version = '1.8.0' # Should be one of: # 'Development Status :: 3 - Alpha' # 'Development Status :: 4 - Beta' From 7a1433ca6573b501c1d3789050db63d88b935049 Mon Sep 17 00:00:00 2001 From: Tres Seaver Date: Thu, 13 Dec 2018 18:02:28 -0500 Subject: [PATCH 0531/2016] Docs/fixit: normalize docs for 'page_size' / 'max_results' / 'page_token' (#6842) --- .../google-cloud-bigquery/google/cloud/bigquery/client.py | 5 +++-- .../google-cloud-bigquery/google/cloud/bigquery/table.py | 4 +++- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py index 3f95e55e9290..dbc68119530f 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py @@ -1674,8 +1674,9 @@ def list_rows( start_index (int): (Optional) The zero-based index of the starting row to read. page_size (int): - (Optional) The maximum number of items to return per page in - the iterator. + Optional. The maximum number of rows in each page of results + from this request. Non-positive values are ignored. Defaults + to a sensible value set by the API. retry (:class:`google.api_core.retry.Retry`): (Optional) How to retry the RPC. diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py index 8fadbd499bec..123dbea7bb07 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py @@ -1240,7 +1240,9 @@ class RowIterator(HTTPIterator): page_token (str): A token identifying a page in a result set to start fetching results from. max_results (int, optional): The maximum number of results to fetch. - page_size (int, optional): The number of items to return per page. + page_size (int, optional): The maximum number of rows in each page + of results from this request. Non-positive values are ignored. + Defaults to a sensible value set by the API. extra_params (Dict[str, object]): Extra query string parameters for the API call. table (Union[ \ From 84960574e39e39847d21b05e58ef9ca59d14d44a Mon Sep 17 00:00:00 2001 From: Christopher Wilcox Date: Fri, 14 Dec 2018 12:25:37 -0800 Subject: [PATCH 0532/2016] Document Python 2 deprecation (#6910) --- packages/google-cloud-bigquery/README.rst | 9 +++++++++ packages/google-cloud-bigquery/setup.py | 1 + 2 files changed, 10 insertions(+) diff --git a/packages/google-cloud-bigquery/README.rst b/packages/google-cloud-bigquery/README.rst index 7d5fbb067071..4a237b3fa6ab 100644 --- a/packages/google-cloud-bigquery/README.rst +++ b/packages/google-cloud-bigquery/README.rst @@ -50,6 +50,15 @@ dependencies. .. _`virtualenv`: https://virtualenv.pypa.io/en/latest/ +Supported Python Versions +^^^^^^^^^^^^^^^^^^^^^^^^^ +Python >= 3.4 + +Deprecated Python Versions +^^^^^^^^^^^^^^^^^^^^^^^^^^ +Python == 2.7. Python 2.7 support will be removed on January 1, 2020. + + Mac/Linux ^^^^^^^^^ diff --git a/packages/google-cloud-bigquery/setup.py b/packages/google-cloud-bigquery/setup.py index 1e92988a2d78..d835b10c669f 100644 --- a/packages/google-cloud-bigquery/setup.py +++ b/packages/google-cloud-bigquery/setup.py @@ -90,6 +90,7 @@ namespace_packages=namespaces, install_requires=dependencies, extras_require=extras, + python_requires='>=2.7,!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*', include_package_data=True, zip_safe=False, ) From a8e1e81f0a946653b7df232f2f1eb866ee172952 Mon Sep 17 00:00:00 2001 From: Christopher Wilcox Date: Tue, 18 Dec 2018 12:29:27 -0800 Subject: [PATCH 0533/2016] Release 1.8.1 (#6957) --- packages/google-cloud-bigquery/CHANGELOG.md | 9 +++++++++ packages/google-cloud-bigquery/setup.py | 2 +- 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/CHANGELOG.md b/packages/google-cloud-bigquery/CHANGELOG.md index ee74857bd2d9..975a26d4c739 100644 --- a/packages/google-cloud-bigquery/CHANGELOG.md +++ b/packages/google-cloud-bigquery/CHANGELOG.md @@ -4,6 +4,15 @@ [1]: https://pypi.org/project/google-cloud-bigquery/#history +## 1.8.1 + +12-17-2018 17:53 PST + + +### Documentation +- Document Python 2 deprecation ([#6910](https://github.com/googleapis/google-cloud-python/pull/6910)) +- Normalize docs for 'page_size' / 'max_results' / 'page_token' ([#6842](https://github.com/googleapis/google-cloud-python/pull/6842)) + ## 1.8.0 12-10-2018 12:39 PST diff --git a/packages/google-cloud-bigquery/setup.py b/packages/google-cloud-bigquery/setup.py index d835b10c669f..3f8f1c7f9388 100644 --- a/packages/google-cloud-bigquery/setup.py +++ b/packages/google-cloud-bigquery/setup.py @@ -22,7 +22,7 @@ name = 'google-cloud-bigquery' description = 'Google BigQuery API client library' -version = '1.8.0' +version = '1.8.1' # Should be one of: # 'Development Status :: 3 - Alpha' # 'Development Status :: 4 - Beta' From 09d830b752e5a7778c1d3824667f085e1a6cf215 Mon Sep 17 00:00:00 2001 From: Alix Hamilton Date: Wed, 19 Dec 2018 13:45:13 -0800 Subject: [PATCH 0534/2016] BigQuery: Do not return results of query magic when assigned to variable (#7010) --- .../google/cloud/bigquery/magics.py | 24 +++++++------------ .../tests/unit/test_magics.py | 11 +++++---- 2 files changed, 14 insertions(+), 21 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/magics.py b/packages/google-cloud-bigquery/google/cloud/bigquery/magics.py index 1b8f1f2ee923..4494d7ba5ad1 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/magics.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/magics.py @@ -27,7 +27,8 @@ Parameters: * ```` (optional, line argument): - variable to store the query results. + variable to store the query results. The results are not displayed if + this parameter is used. * ``--project `` (optional, line argument): Project to use for running the query. Defaults to the context :attr:`~google.cloud.bigquery.magics.Context.project`. @@ -96,12 +97,6 @@ Query executing: 2.61s Query complete after 2.92s - Out[3]: name count - ...: ---------------------- - ...: 0 Mary 3736239 - ...: 1 Patricia 1568495 - ...: 2 Elizabeth 1519946 - In [4]: df Out[4]: name count @@ -110,7 +105,7 @@ ...: 1 Patricia 1568495 ...: 2 Elizabeth 1519946 - In [5]: %%bigquery df --params {"num": 17} + In [5]: %%bigquery --params {"num": 17} ...: SELECT @num AS num Out[5]: num @@ -119,7 +114,7 @@ In [6]: params = {"num": 17} - In [7]: %%bigquery df --params $params + In [7]: %%bigquery --params $params ...: SELECT @num AS num Out[7]: num @@ -262,17 +257,13 @@ def _run_query(client, query, job_config=None): @magic_arguments.argument( "destination_var", nargs="?", - help=( - "If provided, save the output to this variable in addition " "to displaying it." - ), + help=("If provided, save the output to this variable instead of displaying it."), ) @magic_arguments.argument( "--project", type=str, default=None, - help=( - "Project to use for executing this query. Defaults to the context " "project." - ), + help=("Project to use for executing this query. Defaults to the context project."), ) @magic_arguments.argument( "--use_legacy_sql", @@ -348,4 +339,5 @@ def _cell_magic(line, query): result = query_job.to_dataframe() if args.destination_var: IPython.get_ipython().push({args.destination_var: result}) - return result + else: + return result diff --git a/packages/google-cloud-bigquery/tests/unit/test_magics.py b/packages/google-cloud-bigquery/tests/unit/test_magics.py index 5729146b053a..6ec6123e726c 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_magics.py +++ b/packages/google-cloud-bigquery/tests/unit/test_magics.py @@ -160,11 +160,11 @@ def test_bigquery_magic_without_optional_arguments(): with run_query_patch as run_query_mock: run_query_mock.return_value = query_job_mock - result = ip.run_cell_magic("bigquery", "", sql) + return_value = ip.run_cell_magic("bigquery", "", sql) - assert isinstance(result, pandas.DataFrame) - assert len(result) == len(result) # verify row count - assert list(result) == list(result) # verify column names + assert isinstance(return_value, pandas.DataFrame) + assert len(return_value) == len(result) # verify row count + assert list(return_value) == list(result) # verify column names @pytest.mark.usefixtures("ipython_interactive") @@ -208,8 +208,9 @@ def test_bigquery_magic_with_result_saved_to_variable(): with run_query_patch as run_query_mock: run_query_mock.return_value = query_job_mock - ip.run_cell_magic("bigquery", "df", sql) + return_value = ip.run_cell_magic("bigquery", "df", sql) + assert return_value is None assert "df" in ip.user_ns # verify that variable exists df = ip.user_ns["df"] assert len(df) == len(result) # verify row count From d7778c5d0e9290b083b86ae632f6c970db289372 Mon Sep 17 00:00:00 2001 From: Adam Fletcher Date: Wed, 2 Jan 2019 07:04:12 -0800 Subject: [PATCH 0535/2016] Fix nested schema parsing in insert_rows (#7022) Before this change, insert_rows did not convert record/struct or repeated/array types to the correct JSON. Now the new _helpers._field_to_json function will recursively convert a values into the correct JSON for the provided schema. --- .../google/cloud/bigquery/_helpers.py | 107 ++++++++++++++++ .../google/cloud/bigquery/client.py | 18 +-- .../tests/unit/test_client.py | 115 +++++++++++++++--- 3 files changed, 204 insertions(+), 36 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py b/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py index 6990fb3eaa69..10753cfc998b 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py @@ -15,6 +15,7 @@ """Shared helper functions for BigQuery API classes.""" import base64 +import copy import datetime import decimal @@ -329,6 +330,112 @@ def _time_to_json(value): _SCALAR_VALUE_TO_JSON_PARAM["TIMESTAMP"] = _timestamp_to_json_parameter +def _scalar_field_to_json(field, row_value): + """Maps a field and value to a JSON-safe value. + + Args: + field ( \ + :class:`~google.cloud.bigquery.schema.SchemaField`, \ + ): + The SchemaField to use for type conversion and field name. + row_value (any): + Value to be converted, based on the field's type. + + Returns: + any: + A JSON-serializable object. + """ + converter = _SCALAR_VALUE_TO_JSON_ROW.get(field.field_type) + if converter is None: # STRING doesn't need converting + return row_value + return converter(row_value) + + +def _repeated_field_to_json(field, row_value): + """Convert a repeated/array field to its JSON representation. + + Args: + field ( \ + :class:`~google.cloud.bigquery.schema.SchemaField`, \ + ): + The SchemaField to use for type conversion and field name. The + field mode must equal ``REPEATED``. + row_value (Sequence[any]): + A sequence of values to convert to JSON-serializable values. + + Returns: + List[any]: + A list of JSON-serializable objects. + """ + # Remove the REPEATED, but keep the other fields. This allows us to process + # each item as if it were a top-level field. + item_field = copy.deepcopy(field) + item_field._mode = "NULLABLE" + values = [] + for item in row_value: + values.append(_field_to_json(item_field, item)) + return values + + +def _record_field_to_json(fields, row_value): + """Convert a record/struct field to its JSON representation. + + Args: + fields ( \ + Sequence[:class:`~google.cloud.bigquery.schema.SchemaField`], \ + ): + The :class:`~google.cloud.bigquery.schema.SchemaField`s of the + record's subfields to use for type conversion and field names. + row_value (Union[Tuple[Any], Mapping[str, Any]): + A tuple or dictionary to convert to JSON-serializable values. + + Returns: + Mapping[str, any]: + A JSON-serializable dictionary. + """ + record = {} + isdict = isinstance(row_value, dict) + + for subindex, subfield in enumerate(fields): + subname = subfield.name + subvalue = row_value[subname] if isdict else row_value[subindex] + record[subname] = _field_to_json(subfield, subvalue) + return record + + +def _field_to_json(field, row_value): + """Convert a field into JSON-serializable values. + + Args: + field ( \ + :class:`~google.cloud.bigquery.schema.SchemaField`, \ + ): + The SchemaField to use for type conversion and field name. + + row_value (Union[ \ + Sequence[list], \ + any, \ + ]): + Row data to be inserted. If the SchemaField's mode is + REPEATED, assume this is a list. If not, the type + is inferred from the SchemaField's field_type. + + Returns: + any: + A JSON-serializable object. + """ + if row_value is None: + return None + + if field.mode == "REPEATED": + return _repeated_field_to_json(field, row_value) + + if field.field_type == "RECORD": + return _record_field_to_json(field.fields, row_value) + + return _scalar_field_to_json(field, row_value) + + def _snake_to_camel_case(value): """Convert snake case string to camel case.""" words = value.split("_") diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py index dbc68119530f..96f1310c3f99 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py @@ -37,7 +37,7 @@ from google.cloud import exceptions from google.cloud.client import ClientWithProject -from google.cloud.bigquery._helpers import _SCALAR_VALUE_TO_JSON_ROW +from google.cloud.bigquery._helpers import _record_field_to_json from google.cloud.bigquery._helpers import _str_or_none from google.cloud.bigquery._http import Connection from google.cloud.bigquery.dataset import Dataset @@ -51,7 +51,6 @@ from google.cloud.bigquery.table import TableReference from google.cloud.bigquery.table import RowIterator from google.cloud.bigquery.table import _TABLE_HAS_NO_SCHEMA -from google.cloud.bigquery.table import _row_from_mapping _DEFAULT_CHUNKSIZE = 1048576 # 1024 * 1024 B = 1 MB @@ -1495,20 +1494,7 @@ def insert_rows(self, table, rows, selected_fields=None, **kwargs): else: raise TypeError("table should be Table or TableReference") - json_rows = [] - - for index, row in enumerate(rows): - if isinstance(row, dict): - row = _row_from_mapping(row, schema) - json_row = {} - - for field, value in zip(schema, row): - converter = _SCALAR_VALUE_TO_JSON_ROW.get(field.field_type) - if converter is not None: # STRING doesn't need converting - value = converter(value) - json_row[field.name] = value - - json_rows.append(json_row) + json_rows = [_record_field_to_json(schema, row) for row in rows] return self.insert_rows_json(table, json_rows, **kwargs) diff --git a/packages/google-cloud-bigquery/tests/unit/test_client.py b/packages/google-cloud-bigquery/tests/unit/test_client.py index 0fc14b160a9c..c3d90ed640fb 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_client.py +++ b/packages/google-cloud-bigquery/tests/unit/test_client.py @@ -25,6 +25,7 @@ import six from six.moves import http_client import pytest +import pytz try: import pandas @@ -3482,20 +3483,76 @@ def test_insert_rows_w_repeated_fields(self): http = object() client = self._make_one(project=self.PROJECT, credentials=creds, _http=http) conn = client._connection = _make_connection({}) - full_name = SchemaField("color", "STRING", mode="REPEATED") - index = SchemaField("index", "INTEGER", "REPEATED") - score = SchemaField("score", "FLOAT", "REPEATED") - struct = SchemaField("struct", "RECORD", mode="REPEATED", fields=[index, score]) - table = Table(self.TABLE_REF, schema=[full_name, struct]) - ROWS = [(["red", "green"], [{"index": [1, 2], "score": [3.1415, 1.414]}])] - - def _row_data(row): - return {"color": row[0], "struct": row[1]} + color = SchemaField("color", "STRING", mode="REPEATED") + items = SchemaField("items", "INTEGER", mode="REPEATED") + score = SchemaField("score", "INTEGER") + times = SchemaField("times", "TIMESTAMP", mode="REPEATED") + distances = SchemaField("distances", "FLOAT", mode="REPEATED") + structs = SchemaField( + "structs", "RECORD", mode="REPEATED", fields=[score, times, distances] + ) + table = Table(self.TABLE_REF, schema=[color, items, structs]) + ROWS = [ + ( + ["red", "green"], + [1, 2], + [ + ( + 12, + [ + datetime.datetime(2018, 12, 1, 12, 0, 0, tzinfo=pytz.utc), + datetime.datetime(2018, 12, 1, 13, 0, 0, tzinfo=pytz.utc), + ], + [1.25, 2.5], + ), + { + "score": 13, + "times": [ + datetime.datetime(2018, 12, 2, 12, 0, 0, tzinfo=pytz.utc), + datetime.datetime(2018, 12, 2, 13, 0, 0, tzinfo=pytz.utc), + ], + "distances": [-1.25, -2.5], + }, + ], + ), + {"color": None, "items": [], "structs": [(None, [], [3.5])]}, + ] SENT = { "rows": [ - {"json": _row_data(row), "insertId": str(i)} - for i, row in enumerate(ROWS) + { + "json": { + "color": ["red", "green"], + "items": ["1", "2"], + "structs": [ + { + "score": "12", + "times": [ + 1543665600.0, # 2018-12-01 12:00 UTC + 1543669200.0, # 2018-12-01 13:00 UTC + ], + "distances": [1.25, 2.5], + }, + { + "score": "13", + "times": [ + 1543752000.0, # 2018-12-02 12:00 UTC + 1543755600.0, # 2018-12-02 13:00 UTC + ], + "distances": [-1.25, -2.5], + }, + ], + }, + "insertId": "0", + }, + { + "json": { + "color": None, + "items": [], + "structs": [{"score": None, "times": [], "distances": [3.5]}], + }, + "insertId": "1", + }, ] } @@ -3531,20 +3588,38 @@ def test_insert_rows_w_record_schema(self): "Phred Phlyntstone", {"area_code": "800", "local_number": "555-1212", "rank": 1}, ), - ( - "Bharney Rhubble", - {"area_code": "877", "local_number": "768-5309", "rank": 2}, - ), + ("Bharney Rhubble", ("877", "768-5309", 2)), ("Wylma Phlyntstone", None), ] - def _row_data(row): - return {"full_name": row[0], "phone": row[1]} - SENT = { "rows": [ - {"json": _row_data(row), "insertId": str(i)} - for i, row in enumerate(ROWS) + { + "json": { + "full_name": "Phred Phlyntstone", + "phone": { + "area_code": "800", + "local_number": "555-1212", + "rank": "1", + }, + }, + "insertId": "0", + }, + { + "json": { + "full_name": "Bharney Rhubble", + "phone": { + "area_code": "877", + "local_number": "768-5309", + "rank": "2", + }, + }, + "insertId": "1", + }, + { + "json": {"full_name": "Wylma Phlyntstone", "phone": None}, + "insertId": "2", + }, ] } From 78d3b5757434cf09c019d69cfde6ec25edb8f617 Mon Sep 17 00:00:00 2001 From: Tres Seaver Date: Thu, 3 Jan 2019 17:38:38 -0500 Subject: [PATCH 0536/2016] BigQuery: Harden systests against transient GCS errors. (#7006) - Centralize bucket creation in BQ systests. - Add retry for transient errors from 'bucket.create', blob operations Closes #7005. --- .../google-cloud-bigquery/tests/system.py | 82 +++++++++---------- 1 file changed, 41 insertions(+), 41 deletions(-) diff --git a/packages/google-cloud-bigquery/tests/system.py b/packages/google-cloud-bigquery/tests/system.py index a77d270c4279..ad67e7b035bd 100644 --- a/packages/google-cloud-bigquery/tests/system.py +++ b/packages/google-cloud-bigquery/tests/system.py @@ -49,6 +49,8 @@ from google.api_core.exceptions import Conflict from google.api_core.exceptions import Forbidden from google.api_core.exceptions import NotFound +from google.api_core.exceptions import InternalServerError +from google.api_core.exceptions import ServiceUnavailable from google.api_core.exceptions import TooManyRequests from google.cloud import bigquery from google.cloud.bigquery.dataset import Dataset @@ -97,6 +99,10 @@ ), ] +retry_storage_errors = RetryErrors( + (TooManyRequests, InternalServerError, ServiceUnavailable) +) + def _has_rows(result): return len(result) > 0 @@ -154,10 +160,12 @@ def _still_in_use(bad_request): ) retry_in_use = RetryErrors(BadRequest, error_predicate=_still_in_use) - retry_409_429 = RetryErrors((Conflict, TooManyRequests)) + retry_storage_errors_conflict = RetryErrors( + (Conflict, TooManyRequests, InternalServerError, ServiceUnavailable) + ) for doomed in self.to_delete: if isinstance(doomed, storage.Bucket): - retry_409_429(doomed.delete)(force=True) + retry_storage_errors_conflict(doomed.delete)(force=True) elif isinstance(doomed, (Dataset, bigquery.DatasetReference)): retry_in_use(Config.CLIENT.delete_dataset)(doomed, delete_contents=True) elif isinstance(doomed, (Table, bigquery.TableReference)): @@ -173,6 +181,14 @@ def test_get_service_account_email(self): self.assertIsInstance(got, six.text_type) self.assertIn("@", got) + def _create_bucket(self, bucket_name, location=None): + storage_client = storage.Client() + bucket = storage_client.bucket(bucket_name) + retry_storage_errors(bucket.create)(location=location) + self.to_delete.append(bucket) + + return bucket + def test_create_dataset(self): DATASET_ID = _make_dataset_id("create_dataset") dataset = self.temp_dataset(DATASET_ID) @@ -683,12 +699,8 @@ def test_load_table_from_uri_then_dump_table(self): def test_load_table_from_file_w_explicit_location(self): # Create a temporary bucket for extract files. - storage_client = storage.Client() bucket_name = "bq_load_table_eu_extract_test" + unique_resource_id() - bucket = storage_client.bucket(bucket_name) - bucket.location = "eu" - self.to_delete.append(bucket) - bucket.create() + self._create_bucket(bucket_name, location="eu") # Create a temporary dataset & table in the EU. table_bytes = six.BytesIO(b"a,3\nb,2\nc,1\n") @@ -768,20 +780,12 @@ def test_load_table_from_file_w_explicit_location(self): table_ref, "gs://{}/letters-us.csv".format(bucket_name), location="US" ).result() - def _create_storage(self, bucket_name, blob_name): - storage_client = storage.Client() - - # In the **very** rare case the bucket name is reserved, this - # fails with a ConnectionError. - bucket = storage_client.create_bucket(bucket_name) - self.to_delete.append(bucket) - - return bucket.blob(blob_name) - def _write_csv_to_storage(self, bucket_name, blob_name, header_row, data_rows): from google.cloud._testing import _NamedTemporaryFile - blob = self._create_storage(bucket_name, blob_name) + bucket = self._create_bucket(bucket_name) + blob = bucket.blob(blob_name) + with _NamedTemporaryFile() as temp: with open(temp.name, "w") as csv_write: writer = csv.writer(csv_write) @@ -789,30 +793,26 @@ def _write_csv_to_storage(self, bucket_name, blob_name, header_row, data_rows): writer.writerows(data_rows) with open(temp.name, "rb") as csv_read: - blob.upload_from_file(csv_read, content_type="text/csv") + retry_storage_errors(blob.upload_from_file)( + csv_read, content_type="text/csv" + ) self.to_delete.insert(0, blob) return "gs://{}/{}".format(bucket_name, blob_name) def _write_avro_to_storage(self, bucket_name, blob_name, avro_file): - blob = self._create_storage(bucket_name, blob_name) - blob.upload_from_file(avro_file, content_type="application/x-avro-binary") + bucket = self._create_bucket(bucket_name) + blob = bucket.blob(blob_name) + retry_storage_errors(blob.upload_from_file)( + avro_file, content_type="application/x-avro-binary" + ) self.to_delete.insert(0, blob) return "gs://{}/{}".format(bucket_name, blob_name) - def _load_table_for_extract_table( - self, storage_client, rows, bucket_name, blob_name, table - ): + def _load_table_for_extract_table(self, bucket, blob_name, table, rows): from google.cloud._testing import _NamedTemporaryFile - gs_url = "gs://{}/{}".format(bucket_name, blob_name) - - # In the **very** rare case the bucket name is reserved, this - # fails with a ConnectionError. - bucket = storage_client.create_bucket(bucket_name) - self.to_delete.append(bucket) blob = bucket.blob(blob_name) - with _NamedTemporaryFile() as temp: with open(temp.name, "w") as csv_write: writer = csv.writer(csv_write) @@ -820,13 +820,17 @@ def _load_table_for_extract_table( writer.writerows(rows) with open(temp.name, "rb") as csv_read: - blob.upload_from_file(csv_read, content_type="text/csv") + retry_storage_errors(blob.upload_from_file)( + csv_read, content_type="text/csv" + ) + self.to_delete.insert(0, blob) dataset = self.temp_dataset(table.dataset_id) table_ref = dataset.table(table.table_id) config = bigquery.LoadJobConfig() config.autodetect = True + gs_url = "gs://{}/{}".format(bucket.name, blob_name) job = Config.CLIENT.load_table_from_uri(gs_url, table_ref, job_config=config) # TODO(jba): do we need this retry now that we have job.result()? # Allow for 90 seconds of "warm up" before rows visible. See @@ -836,21 +840,16 @@ def _load_table_for_extract_table( retry(job.reload)() def test_extract_table(self): - from google.cloud.storage import Client as StorageClient - - storage_client = StorageClient() local_id = unique_resource_id() bucket_name = "bq_extract_test" + local_id - blob_name = "person_ages.csv" + source_blob_name = "person_ages.csv" dataset_id = _make_dataset_id("load_gcs_then_extract") table_id = "test_table" table_ref = Config.CLIENT.dataset(dataset_id).table(table_id) table = Table(table_ref) self.to_delete.insert(0, table) - self._load_table_for_extract_table( - storage_client, ROWS, bucket_name, blob_name, table_ref - ) - bucket = storage_client.bucket(bucket_name) + bucket = self._create_bucket(bucket_name) + self._load_table_for_extract_table(bucket, source_blob_name, table_ref, ROWS) destination_blob_name = "person_ages_out.csv" destination = bucket.blob(destination_blob_name) destination_uri = "gs://{}/person_ages_out.csv".format(bucket_name) @@ -859,7 +858,8 @@ def test_extract_table(self): job.result(timeout=100) self.to_delete.insert(0, destination) - got = destination.download_as_string().decode("utf-8") + got_bytes = retry_storage_errors(destination.download_as_string)() + got = got_bytes.decode("utf-8") self.assertIn("Bharney Rhubble", got) def test_copy_table(self): From 034ac555991c19e06702b5d557b31e8c75567a2e Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Wed, 16 Jan 2019 16:17:18 -0800 Subject: [PATCH 0537/2016] Add option to choose dtypes by column in to_dataframe. (#7126) * Add option to choose dtypes by column in to_dataframe. This allows pandas users to select different sized floats for performance at the expense of accuracy. With pandas 0.24, it will also allow pandas users to use the new pandas.Int64Dtype() for nullable integer columns. * Adjust deps for testing. Blacken. --- .../google/cloud/bigquery/table.py | 46 +++++++++++++------ packages/google-cloud-bigquery/noxfile.py | 7 ++- packages/google-cloud-bigquery/setup.py | 2 +- .../google-cloud-bigquery/tests/system.py | 20 ++++++-- .../tests/unit/test_table.py | 10 ++-- 5 files changed, 62 insertions(+), 23 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py index 123dbea7bb07..b7668fd28ed1 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py @@ -16,6 +16,7 @@ from __future__ import absolute_import +import collections import copy import datetime import json @@ -1315,14 +1316,24 @@ def total_rows(self): """int: The total number of rows in the table.""" return self._total_rows - def _to_dataframe_tabledata_list(self): + def _to_dataframe_dtypes(self, page, column_names, dtypes): + columns = collections.defaultdict(list) + for row in page: + for column in column_names: + columns[column].append(row[column]) + for column in dtypes: + columns[column] = pandas.Series(columns[column], dtype=dtypes[column]) + return pandas.DataFrame(columns, columns=column_names) + + def _to_dataframe_tabledata_list(self, dtypes): """Use (slower, but free) tabledata.list to construct a DataFrame.""" - column_headers = [field.name for field in self.schema] - # Use generator, rather than pulling the whole rowset into memory. - rows = (row.values() for row in iter(self)) - return pandas.DataFrame(rows, columns=column_headers) + column_names = [field.name for field in self.schema] + frames = [] + for page in iter(self.pages): + frames.append(self._to_dataframe_dtypes(page, column_names, dtypes)) + return pandas.concat(frames) - def _to_dataframe_bqstorage(self, bqstorage_client): + def _to_dataframe_bqstorage(self, bqstorage_client, dtypes): """Use (faster, but billable) BQ Storage API to construct DataFrame.""" import concurrent.futures from google.cloud import bigquery_storage_v1beta1 @@ -1360,7 +1371,7 @@ def _to_dataframe_bqstorage(self, bqstorage_client): def get_dataframe(stream): position = bigquery_storage_v1beta1.types.StreamPosition(stream=stream) rowstream = bqstorage_client.read_rows(position) - return rowstream.to_dataframe(session) + return rowstream.to_dataframe(session, dtypes=dtypes) with concurrent.futures.ThreadPoolExecutor() as pool: frames = pool.map(get_dataframe, session.streams) @@ -1369,16 +1380,16 @@ def get_dataframe(stream): # the end using manually-parsed schema. return pandas.concat(frames)[columns] - def to_dataframe(self, bqstorage_client=None): + def to_dataframe(self, bqstorage_client=None, dtypes=None): """Create a pandas DataFrame from the query results. Args: bqstorage_client ( \ google.cloud.bigquery_storage_v1beta1.BigQueryStorageClient \ ): - Optional. A BigQuery Storage API client. If supplied, use the - faster BigQuery Storage API to fetch rows from BigQuery. This - API is a billable API. + **Alpha Feature** Optional. A BigQuery Storage API client. If + supplied, use the faster BigQuery Storage API to fetch rows + from BigQuery. This API is a billable API. This method requires the ``fastavro`` and ``google-cloud-bigquery-storage`` libraries. @@ -1389,6 +1400,13 @@ def to_dataframe(self, bqstorage_client=None): **Caution**: There is a known issue reading small anonymous query result tables with the BQ Storage API. Write your query results to a destination table to work around this issue. + dtypes ( \ + Map[str, Union[str, pandas.Series.dtype]] \ + ): + Optional. A dictionary of column names pandas ``dtype``s. The + provided ``dtype`` is used when constructing the series for + the column specified. Otherwise, the default pandas behavior + is used. Returns: pandas.DataFrame: @@ -1402,11 +1420,13 @@ def to_dataframe(self, bqstorage_client=None): """ if pandas is None: raise ValueError(_NO_PANDAS_ERROR) + if dtypes is None: + dtypes = {} if bqstorage_client is not None: - return self._to_dataframe_bqstorage(bqstorage_client) + return self._to_dataframe_bqstorage(bqstorage_client, dtypes) else: - return self._to_dataframe_tabledata_list() + return self._to_dataframe_tabledata_list(dtypes) class _EmptyRowIterator(object): diff --git a/packages/google-cloud-bigquery/noxfile.py b/packages/google-cloud-bigquery/noxfile.py index 121993e36595..089a82375606 100644 --- a/packages/google-cloud-bigquery/noxfile.py +++ b/packages/google-cloud-bigquery/noxfile.py @@ -22,6 +22,9 @@ LOCAL_DEPS = ( os.path.join('..', 'api_core[grpc]'), os.path.join('..', 'core'), + # TODO: Move bigquery_storage back to dev_install once dtypes feature is + # released. Issue #7049 + os.path.join('..', 'bigquery_storage[pandas,fastavro]'), ) @@ -40,9 +43,9 @@ def default(session): # Pyarrow does not support Python 3.7 if session.python == '3.7': - dev_install = '.[bqstorage, pandas]' + dev_install = '.[pandas]' else: - dev_install = '.[bqstorage, pandas, pyarrow]' + dev_install = '.[pandas, pyarrow]' session.install('-e', dev_install) # IPython does not support Python 2 after version 5.x diff --git a/packages/google-cloud-bigquery/setup.py b/packages/google-cloud-bigquery/setup.py index 3f8f1c7f9388..5d155e24d1a8 100644 --- a/packages/google-cloud-bigquery/setup.py +++ b/packages/google-cloud-bigquery/setup.py @@ -34,7 +34,7 @@ 'google-resumable-media >= 0.3.1', ] extras = { - 'bqstorage': 'google-cloud-bigquery-storage<=2.0.0dev', + 'bqstorage': 'google-cloud-bigquery-storage >= 0.2.0dev1, <2.0.0dev', 'pandas': 'pandas>=0.17.1', # Exclude PyArrow dependency from Windows Python 2.7. 'pyarrow: platform_system != "Windows" or python_version >= "3.4"': diff --git a/packages/google-cloud-bigquery/tests/system.py b/packages/google-cloud-bigquery/tests/system.py index ad67e7b035bd..1f0755cd64e8 100644 --- a/packages/google-cloud-bigquery/tests/system.py +++ b/packages/google-cloud-bigquery/tests/system.py @@ -1733,13 +1733,22 @@ def test_nested_table_to_dataframe(self): ), ], ), + SF("bigfloat_col", "FLOAT", mode="NULLABLE"), + SF("smallfloat_col", "FLOAT", mode="NULLABLE"), ] record = { "nested_string": "another string value", "nested_repeated": [0, 1, 2], "nested_record": {"nested_nested_string": "some deep insight"}, } - to_insert = [{"string_col": "Some value", "record_col": record}] + to_insert = [ + { + "string_col": "Some value", + "record_col": record, + "bigfloat_col": 3.14, + "smallfloat_col": 2.72, + } + ] rows = [json.dumps(row) for row in to_insert] body = six.BytesIO("{}\n".format("\n".join(rows)).encode("ascii")) table_id = "test_table" @@ -1753,11 +1762,13 @@ def test_nested_table_to_dataframe(self): # Load a table using a local JSON file from memory. Config.CLIENT.load_table_from_file(body, table, job_config=job_config).result() - df = Config.CLIENT.list_rows(table, selected_fields=schema).to_dataframe() + df = Config.CLIENT.list_rows(table, selected_fields=schema).to_dataframe( + dtypes={"smallfloat_col": "float16"} + ) self.assertIsInstance(df, pandas.DataFrame) self.assertEqual(len(df), 1) # verify the number of rows - exp_columns = ["string_col", "record_col"] + exp_columns = ["string_col", "record_col", "bigfloat_col", "smallfloat_col"] self.assertEqual(list(df), exp_columns) # verify the column names row = df.iloc[0] # verify the row content @@ -1769,6 +1780,9 @@ def test_nested_table_to_dataframe(self): row["record_col"]["nested_record"]["nested_nested_string"], "some deep insight", ) + # verify dtypes + self.assertEqual(df.dtypes["bigfloat_col"].name, "float64") + self.assertEqual(df.dtypes["smallfloat_col"].name, "float16") def test_list_rows_empty_table(self): from google.cloud.bigquery.table import RowIterator diff --git a/packages/google-cloud-bigquery/tests/unit/test_table.py b/packages/google-cloud-bigquery/tests/unit/test_table.py index 9ed6eea2a3d0..af20c396ac88 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_table.py +++ b/packages/google-cloud-bigquery/tests/unit/test_table.py @@ -1472,21 +1472,22 @@ def test_to_dataframe_column_dtypes(self): SchemaField("start_timestamp", "TIMESTAMP"), SchemaField("seconds", "INT64"), SchemaField("miles", "FLOAT64"), + SchemaField("km", "FLOAT64"), SchemaField("payment_type", "STRING"), SchemaField("complete", "BOOL"), SchemaField("date", "DATE"), ] row_data = [ - ["1.4338368E9", "420", "1.1", "Cash", "true", "1999-12-01"], - ["1.3878117E9", "2580", "17.7", "Cash", "false", "1953-06-14"], - ["1.3855653E9", "2280", "4.4", "Credit", "true", "1981-11-04"], + ["1.4338368E9", "420", "1.1", "1.77", "Cash", "true", "1999-12-01"], + ["1.3878117E9", "2580", "17.7", "28.5", "Cash", "false", "1953-06-14"], + ["1.3855653E9", "2280", "4.4", "7.1", "Credit", "true", "1981-11-04"], ] rows = [{"f": [{"v": field} for field in row]} for row in row_data] path = "/foo" api_request = mock.Mock(return_value={"rows": rows}) row_iterator = RowIterator(_mock_client(), api_request, path, schema) - df = row_iterator.to_dataframe() + df = row_iterator.to_dataframe(dtypes={"km": "float16"}) self.assertIsInstance(df, pandas.DataFrame) self.assertEqual(len(df), 3) # verify the number of rows @@ -1496,6 +1497,7 @@ def test_to_dataframe_column_dtypes(self): self.assertEqual(df.start_timestamp.dtype.name, "datetime64[ns, UTC]") self.assertEqual(df.seconds.dtype.name, "int64") self.assertEqual(df.miles.dtype.name, "float64") + self.assertEqual(df.km.dtype.name, "float16") self.assertEqual(df.payment_type.dtype.name, "object") self.assertEqual(df.complete.dtype.name, "bool") self.assertEqual(df.date.dtype.name, "object") From 070a957d7636654d3aa72853f2892e97cdebefca Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Tue, 29 Jan 2019 16:42:06 -0800 Subject: [PATCH 0538/2016] Add sample for fetching total_rows from query results. (#7217) * Add sample for fetching total_rows from query results. * Copypasta --- .../google-cloud-bigquery/docs/snippets.py | 26 +++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/packages/google-cloud-bigquery/docs/snippets.py b/packages/google-cloud-bigquery/docs/snippets.py index 4d7e17e5363e..5356700a495a 100644 --- a/packages/google-cloud-bigquery/docs/snippets.py +++ b/packages/google-cloud-bigquery/docs/snippets.py @@ -2225,6 +2225,32 @@ def test_client_query_legacy_sql(client): # [END bigquery_query_legacy] +def test_client_query_total_rows(client, capsys): + """Run a query and just check for how many rows.""" + # [START bigquery_query_total_rows] + # from google.cloud import bigquery + # client = bigquery.Client() + + query = ( + "SELECT name FROM `bigquery-public-data.usa_names.usa_1910_2013` " + 'WHERE state = "TX" ' + "LIMIT 100" + ) + query_job = client.query( + query, + # Location must match that of the dataset(s) referenced in the query. + location="US", + ) # API request - starts the query + + results = query_job.result() # Waits for query to complete. + next(iter(results)) # Fetch the first page of results, which contains total_rows. + print("Got {} rows.".format(results.total_rows)) + # [END bigquery_query_total_rows] + + out, _ = capsys.readouterr() + assert "Got 100 rows." in out + + def test_manage_job(client): sql = """ SELECT corpus From 70a50accf95d071252cc5f6cc122ed7c116bb707 Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Fri, 1 Feb 2019 13:14:52 -0800 Subject: [PATCH 0539/2016] Add args from 'RowIterator.to_dataframe()' to 'QueryJob.to_dataframe()'. (#7241) --- .../google/cloud/bigquery/job.py | 31 ++++- .../google/cloud/bigquery/table.py | 14 ++- .../tests/unit/test_job.py | 113 ++++++++++++++++++ 3 files changed, 155 insertions(+), 3 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/job.py b/packages/google-cloud-bigquery/google/cloud/bigquery/job.py index f72bb320c80f..4df7a92ba63c 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/job.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/job.py @@ -2778,9 +2778,34 @@ def result(self, timeout=None, retry=DEFAULT_RETRY): dest_table = Table(dest_table_ref, schema=schema) return self._client.list_rows(dest_table, retry=retry) - def to_dataframe(self): + def to_dataframe(self, bqstorage_client=None, dtypes=None): """Return a pandas DataFrame from a QueryJob + Args: + bqstorage_client ( \ + google.cloud.bigquery_storage_v1beta1.BigQueryStorageClient \ + ): + **Alpha Feature** Optional. A BigQuery Storage API client. If + supplied, use the faster BigQuery Storage API to fetch rows + from BigQuery. This API is a billable API. + + This method requires the ``fastavro`` and + ``google-cloud-bigquery-storage`` libraries. + + Reading from a specific partition or snapshot is not + currently supported by this method. + + **Caution**: There is a known issue reading small anonymous + query result tables with the BQ Storage API. Write your query + results to a destination table to work around this issue. + dtypes ( \ + Map[str, Union[str, pandas.Series.dtype]] \ + ): + Optional. A dictionary of column names pandas ``dtype``s. The + provided ``dtype`` is used when constructing the series for + the column specified. Otherwise, the default pandas behavior + is used. + Returns: A :class:`~pandas.DataFrame` populated with row data and column headers from the query results. The column headers are derived @@ -2789,7 +2814,9 @@ def to_dataframe(self): Raises: ValueError: If the `pandas` library cannot be imported. """ - return self.result().to_dataframe() + return self.result().to_dataframe( + bqstorage_client=bqstorage_client, dtypes=dtypes + ) def __iter__(self): return iter(self.result()) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py index b7668fd28ed1..7f3392631adb 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py @@ -1441,7 +1441,19 @@ class _EmptyRowIterator(object): pages = () total_rows = 0 - def to_dataframe(self): + def to_dataframe(self, bqstorage_client=None, dtypes=None): + """Create an empty dataframe. + + Args: + bqstorage_client (Any): + Ignored. Added for compatibility with RowIterator. + dtypes (Any): + Ignored. Added for compatibility with RowIterator. + + Returns: + pandas.DataFrame: + An empty :class:`~pandas.DataFrame`. + """ if pandas is None: raise ValueError(_NO_PANDAS_ERROR) return pandas.DataFrame() diff --git a/packages/google-cloud-bigquery/tests/unit/test_job.py b/packages/google-cloud-bigquery/tests/unit/test_job.py index 75a84d77e2c7..5023c41e6374 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_job.py +++ b/packages/google-cloud-bigquery/tests/unit/test_job.py @@ -13,6 +13,7 @@ # limitations under the License. import copy +import json import unittest import mock @@ -22,6 +23,10 @@ import pandas except (ImportError, AttributeError): # pragma: NO COVER pandas = None +try: + from google.cloud import bigquery_storage_v1beta1 +except (ImportError, AttributeError): # pragma: NO COVER + bigquery_storage_v1beta1 = None def _make_credentials(): @@ -4543,6 +4548,114 @@ def test_to_dataframe(self): self.assertEqual(len(df), 4) # verify the number of rows self.assertEqual(list(df), ["name", "age"]) # verify the column names + @unittest.skipIf(pandas is None, "Requires `pandas`") + def test_to_dataframe_ddl_query(self): + # Destination table may have no schema for some DDL and DML queries. + query_resource = { + "jobComplete": True, + "jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID}, + "schema": {"fields": []}, + } + connection = _make_connection(query_resource) + client = _make_client(self.PROJECT, connection=connection) + resource = self._make_resource(ended=True) + job = self._get_target_class().from_api_repr(resource, client) + + df = job.to_dataframe() + + self.assertEqual(len(df), 0) + + @unittest.skipIf(pandas is None, "Requires `pandas`") + @unittest.skipIf( + bigquery_storage_v1beta1 is None, "Requires `google-cloud-bigquery-storage`" + ) + def test_to_dataframe_bqstorage(self): + query_resource = { + "jobComplete": True, + "jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID}, + "totalRows": "4", + "schema": { + "fields": [ + {"name": "name", "type": "STRING", "mode": "NULLABLE"}, + {"name": "age", "type": "INTEGER", "mode": "NULLABLE"}, + ] + }, + } + connection = _make_connection(query_resource) + client = _make_client(self.PROJECT, connection=connection) + resource = self._make_resource(ended=True) + job = self._get_target_class().from_api_repr(resource, client) + bqstorage_client = mock.create_autospec( + bigquery_storage_v1beta1.BigQueryStorageClient + ) + session = bigquery_storage_v1beta1.types.ReadSession() + session.avro_schema.schema = json.dumps( + { + "type": "record", + "name": "__root__", + "fields": [ + {"name": "name", "type": ["null", "string"]}, + {"name": "age", "type": ["null", "long"]}, + ], + } + ) + bqstorage_client.create_read_session.return_value = session + + job.to_dataframe(bqstorage_client=bqstorage_client) + + bqstorage_client.create_read_session.assert_called_once_with( + mock.ANY, "projects/{}".format(self.PROJECT), read_options=mock.ANY + ) + + @unittest.skipIf(pandas is None, "Requires `pandas`") + def test_to_dataframe_column_dtypes(self): + begun_resource = self._make_resource() + query_resource = { + "jobComplete": True, + "jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID}, + "totalRows": "4", + "schema": { + "fields": [ + {"name": "start_timestamp", "type": "TIMESTAMP"}, + {"name": "seconds", "type": "INT64"}, + {"name": "miles", "type": "FLOAT64"}, + {"name": "km", "type": "FLOAT64"}, + {"name": "payment_type", "type": "STRING"}, + {"name": "complete", "type": "BOOL"}, + {"name": "date", "type": "DATE"}, + ] + }, + } + row_data = [ + ["1.4338368E9", "420", "1.1", "1.77", "Cash", "true", "1999-12-01"], + ["1.3878117E9", "2580", "17.7", "28.5", "Cash", "false", "1953-06-14"], + ["1.3855653E9", "2280", "4.4", "7.1", "Credit", "true", "1981-11-04"], + ] + rows = [{"f": [{"v": field} for field in row]} for row in row_data] + query_resource["rows"] = rows + done_resource = copy.deepcopy(begun_resource) + done_resource["status"] = {"state": "DONE"} + connection = _make_connection( + begun_resource, query_resource, done_resource, query_resource + ) + client = _make_client(project=self.PROJECT, connection=connection) + job = self._make_one(self.JOB_ID, self.QUERY, client) + + df = job.to_dataframe(dtypes={"km": "float16"}) + + self.assertIsInstance(df, pandas.DataFrame) + self.assertEqual(len(df), 3) # verify the number of rows + exp_columns = [field["name"] for field in query_resource["schema"]["fields"]] + self.assertEqual(list(df), exp_columns) # verify the column names + + self.assertEqual(df.start_timestamp.dtype.name, "datetime64[ns, UTC]") + self.assertEqual(df.seconds.dtype.name, "int64") + self.assertEqual(df.miles.dtype.name, "float64") + self.assertEqual(df.km.dtype.name, "float16") + self.assertEqual(df.payment_type.dtype.name, "object") + self.assertEqual(df.complete.dtype.name, "bool") + self.assertEqual(df.date.dtype.name, "object") + def test_iter(self): import types From eed6a9c6bac99a28266c244ed14fd67dc8f37ef8 Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Mon, 4 Feb 2019 14:53:30 -0800 Subject: [PATCH 0540/2016] Update error in system tests that expect error for wrong location (#7286) We used to get BadRequest for querying with the wrong location. Now we get NotFound. I update the test to just look for the base class of GoogleAPICallError, since it seems we aren't guarnteed the actual status code. --- packages/google-cloud-bigquery/tests/system.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/packages/google-cloud-bigquery/tests/system.py b/packages/google-cloud-bigquery/tests/system.py index 1f0755cd64e8..04c4c3bf18fd 100644 --- a/packages/google-cloud-bigquery/tests/system.py +++ b/packages/google-cloud-bigquery/tests/system.py @@ -48,6 +48,7 @@ from google.api_core.exceptions import BadRequest from google.api_core.exceptions import Conflict from google.api_core.exceptions import Forbidden +from google.api_core.exceptions import GoogleAPICallError from google.api_core.exceptions import NotFound from google.api_core.exceptions import InternalServerError from google.api_core.exceptions import ServiceUnavailable @@ -765,17 +766,17 @@ def test_load_table_from_file_w_explicit_location(self): self.assertTrue(eu_query.done) # Cannot query from US. - with self.assertRaises(BadRequest): + with self.assertRaises(GoogleAPICallError): list(client.query(query_string, location="US", job_config=query_config)) # Cannot copy from US. - with self.assertRaises(BadRequest): + with self.assertRaises(GoogleAPICallError): client.copy_table( table_ref, dataset.table("letters2_us"), location="US" ).result() # Cannot extract from US. - with self.assertRaises(BadRequest): + with self.assertRaises(GoogleAPICallError): client.extract_table( table_ref, "gs://{}/letters-us.csv".format(bucket_name), location="US" ).result() From c5216d02223d2a3d86328d26ed9d1b92420f5a6e Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Mon, 4 Feb 2019 15:55:11 -0800 Subject: [PATCH 0541/2016] Release bigquery 1.9.0 (#7285) * Release 1.9.0 --- packages/google-cloud-bigquery/CHANGELOG.md | 12 ++++++++++++ packages/google-cloud-bigquery/setup.py | 2 +- 2 files changed, 13 insertions(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/CHANGELOG.md b/packages/google-cloud-bigquery/CHANGELOG.md index 975a26d4c739..a50593975516 100644 --- a/packages/google-cloud-bigquery/CHANGELOG.md +++ b/packages/google-cloud-bigquery/CHANGELOG.md @@ -4,6 +4,18 @@ [1]: https://pypi.org/project/google-cloud-bigquery/#history +## 1.9.0 + +02-04-2019 13:28 PST + +### New Features + +- Add arguments to select `dtypes` and use BQ Storage API to `QueryJob.to_dataframe()`. ([#7241](https://github.com/googleapis/google-cloud-python/pull/7241)) + +### Documentation + +- Add sample for fetching `total_rows` from query results. ([#7217](https://github.com/googleapis/google-cloud-python/pull/7217)) + ## 1.8.1 12-17-2018 17:53 PST diff --git a/packages/google-cloud-bigquery/setup.py b/packages/google-cloud-bigquery/setup.py index 5d155e24d1a8..2dd4f94031b5 100644 --- a/packages/google-cloud-bigquery/setup.py +++ b/packages/google-cloud-bigquery/setup.py @@ -22,7 +22,7 @@ name = 'google-cloud-bigquery' description = 'Google BigQuery API client library' -version = '1.8.1' +version = '1.9.0' # Should be one of: # 'Development Status :: 3 - Alpha' # 'Development Status :: 4 - Beta' From b32b119e0231f23cf0835cba4e22fdd2279f298c Mon Sep 17 00:00:00 2001 From: Tres Seaver Date: Thu, 7 Feb 2019 13:40:38 -0500 Subject: [PATCH 0542/2016] Allow nested records w/ null values. (#7297) Adds explicit unit tests for helpers added in #7022. Closes #7294. --- .../google/cloud/bigquery/_helpers.py | 5 +- .../tests/unit/test__helpers.py | 121 ++++++++++++++++++ 2 files changed, 125 insertions(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py b/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py index 10753cfc998b..72e1fa276dd7 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py @@ -398,7 +398,10 @@ def _record_field_to_json(fields, row_value): for subindex, subfield in enumerate(fields): subname = subfield.name - subvalue = row_value[subname] if isdict else row_value[subindex] + if isdict: + subvalue = row_value.get(subname) + else: + subvalue = row_value[subindex] record[subname] = _field_to_json(subfield, subvalue) return record diff --git a/packages/google-cloud-bigquery/tests/unit/test__helpers.py b/packages/google-cloud-bigquery/tests/unit/test__helpers.py index c2c4f9f7f787..3884695d83af 100644 --- a/packages/google-cloud-bigquery/tests/unit/test__helpers.py +++ b/packages/google-cloud-bigquery/tests/unit/test__helpers.py @@ -776,6 +776,127 @@ def test_w_datetime(self): self.assertEqual(self._call_fut(when), "12:13:41") +def _make_field(field_type, mode="NULLABLE", name="testing", fields=()): + from google.cloud.bigquery.schema import SchemaField + + return SchemaField(name=name, field_type=field_type, mode=mode, fields=fields) + + +class Test_scalar_field_to_json(unittest.TestCase): + def _call_fut(self, field, value): + from google.cloud.bigquery._helpers import _scalar_field_to_json + + return _scalar_field_to_json(field, value) + + def test_w_unknown_field_type(self): + field = _make_field("UNKNOWN") + original = object() + converted = self._call_fut(field, original) + self.assertIs(converted, original) + + def test_w_known_field_type(self): + field = _make_field("INT64") + original = 42 + converted = self._call_fut(field, original) + self.assertEqual(converted, str(original)) + + +class Test_repeated_field_to_json(unittest.TestCase): + def _call_fut(self, field, value): + from google.cloud.bigquery._helpers import _repeated_field_to_json + + return _repeated_field_to_json(field, value) + + def test_w_empty(self): + field = _make_field("INT64", mode="REPEATED") + original = [] + converted = self._call_fut(field, original) + self.assertEqual(converted, original) + self.assertEqual(field.mode, "REPEATED") + + def test_w_non_empty(self): + field = _make_field("INT64", mode="REPEATED") + original = [42] + converted = self._call_fut(field, original) + self.assertEqual(converted, [str(value) for value in original]) + self.assertEqual(field.mode, "REPEATED") + + +class Test_record_field_to_json(unittest.TestCase): + def _call_fut(self, field, value): + from google.cloud.bigquery._helpers import _record_field_to_json + + return _record_field_to_json(field, value) + + def test_w_empty(self): + fields = [] + original = [] + converted = self._call_fut(fields, original) + self.assertEqual(converted, {}) + + def test_w_non_empty_list(self): + fields = [ + _make_field("INT64", name="one", mode="NULLABLE"), + _make_field("STRING", name="two", mode="NULLABLE"), + ] + original = [42, "two"] + converted = self._call_fut(fields, original) + self.assertEqual(converted, {"one": "42", "two": "two"}) + + def test_w_non_empty_dict(self): + fields = [ + _make_field("INT64", name="one", mode="NULLABLE"), + _make_field("STRING", name="two", mode="NULLABLE"), + ] + original = {"one": 42, "two": "two"} + converted = self._call_fut(fields, original) + self.assertEqual(converted, {"one": "42", "two": "two"}) + + def test_w_missing_nullable(self): + fields = [ + _make_field("INT64", name="one", mode="NULLABLE"), + _make_field("STRING", name="two", mode="NULLABLE"), + ] + original = {"one": 42} + converted = self._call_fut(fields, original) + self.assertEqual(converted, {"one": "42", "two": None}) + + +class Test_field_to_json(unittest.TestCase): + def _call_fut(self, field, value): + from google.cloud.bigquery._helpers import _field_to_json + + return _field_to_json(field, value) + + def test_w_none(self): + field = _make_field("INT64") + original = None + converted = self._call_fut(field, original) + self.assertIsNone(converted) + + def test_w_repeated(self): + field = _make_field("INT64", mode="REPEATED") + original = [42, 17] + converted = self._call_fut(field, original) + self.assertEqual(converted, [str(value) for value in original]) + + def test_w_record(self): + subfields = [ + _make_field("INT64", name="one"), + _make_field("STRING", name="two"), + ] + field = _make_field("RECORD", fields=subfields) + original = {"one": 42, "two": "two"} + converted = self._call_fut(field, original) + self.assertEqual(converted, {"one": "42", "two": "two"}) + + def test_w_scalar(self): + field = _make_field("INT64") + original = 42 + converted = self._call_fut(field, original) + self.assertEqual(converted, str(original)) + + class Test_snake_to_camel_case(unittest.TestCase): def _call_fut(self, value): from google.cloud.bigquery._helpers import _snake_to_camel_case From 6a265cf150b62cf015c6032786eb249fe0bc28d6 Mon Sep 17 00:00:00 2001 From: Pravin Dahal Date: Mon, 11 Feb 2019 19:12:32 +0100 Subject: [PATCH 0543/2016] Updated client library documentation URLs. (#7307) Previously, the URLs would redirect using JavaScript, which would either be slow or not work at all (in case JavaScript is disabled on the browser) --- packages/google-cloud-bigquery/README.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/packages/google-cloud-bigquery/README.rst b/packages/google-cloud-bigquery/README.rst index 4a237b3fa6ab..a25b3c3a87fc 100644 --- a/packages/google-cloud-bigquery/README.rst +++ b/packages/google-cloud-bigquery/README.rst @@ -18,7 +18,7 @@ processing power of Google's infrastructure. .. |versions| image:: https://img.shields.io/pypi/pyversions/google-cloud-bigquery.svg :target: https://pypi.org/project/google-cloud-bigquery/ .. _BigQuery: https://cloud.google.com/bigquery/what-is-bigquery -.. _Client Library Documentation: https://googlecloudplatform.github.io/google-cloud-python/latest/bigquery/index.html +.. _Client Library Documentation: https://googleapis.github.io/google-cloud-python/latest/bigquery/index.html .. _Product Documentation: https://cloud.google.com/bigquery/docs/reference/v2/ Quick Start @@ -34,7 +34,7 @@ In order to use this library, you first need to go through the following steps: .. _Select or create a Cloud Platform project.: https://console.cloud.google.com/project .. _Enable billing for your project.: https://cloud.google.com/billing/docs/how-to/modify-project#enable_billing_for_a_project .. _Enable the Google Cloud BigQuery API.: https://cloud.google.com/bigquery -.. _Setup Authentication.: https://googlecloudplatform.github.io/google-cloud-python/latest/core/auth.html +.. _Setup Authentication.: https://googleapis.github.io/google-cloud-python/latest/core/auth.html Installation ~~~~~~~~~~~~ From 84199fe7beebe5fe2eead556cf7a2407ee2fe38d Mon Sep 17 00:00:00 2001 From: Tres Seaver Date: Tue, 12 Feb 2019 16:05:54 -0500 Subject: [PATCH 0544/2016] Harden 'ArrayQueryParameter.from_api_repr' against missing 'parameterValue'. (#7311) Closes #7309. --- .../google/cloud/bigquery/query.py | 4 +++- .../google-cloud-bigquery/tests/unit/test_query.py | 12 ++++++++++++ 2 files changed, 15 insertions(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/query.py b/packages/google-cloud-bigquery/google/cloud/bigquery/query.py index 685d83cf9c7f..726b598d3aaf 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/query.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/query.py @@ -230,7 +230,9 @@ def _from_api_repr_struct(cls, resource): def _from_api_repr_scalar(cls, resource): name = resource.get("name") array_type = resource["parameterType"]["arrayType"]["type"] - values = [value["value"] for value in resource["parameterValue"]["arrayValues"]] + parameter_value = resource.get("parameterValue", {}) + array_values = parameter_value.get("arrayValues", ()) + values = [value["value"] for value in array_values] converted = [ _QUERY_PARAMS_FROM_JSON[array_type](value, None) for value in values ] diff --git a/packages/google-cloud-bigquery/tests/unit/test_query.py b/packages/google-cloud-bigquery/tests/unit/test_query.py index f50335082349..896ab78e3024 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_query.py +++ b/packages/google-cloud-bigquery/tests/unit/test_query.py @@ -362,6 +362,18 @@ def test_from_api_repr_wo_name(self): self.assertEqual(param.array_type, "INT64") self.assertEqual(param.values, [1, 2]) + def test_from_api_repr_wo_values(self): + # Back-end may not send back values for empty array params. See #7309 + RESOURCE = { + "name": "foo", + "parameterType": {"type": "ARRAY", "arrayType": {"type": "INT64"}}, + } + klass = self._get_target_class() + param = klass.from_api_repr(RESOURCE) + self.assertEqual(param.name, "foo") + self.assertEqual(param.array_type, "INT64") + self.assertEqual(param.values, []) + def test_from_api_repr_w_struct_type(self): from google.cloud.bigquery.query import StructQueryParameter From 24d2f2a97e01a6c2265b89db487111e2b74d0c47 Mon Sep 17 00:00:00 2001 From: Kyle Krueger Date: Thu, 14 Feb 2019 01:15:10 +0100 Subject: [PATCH 0545/2016] Update docstring of RowIterator's to_dataframe (#7306) --- packages/google-cloud-bigquery/google/cloud/bigquery/table.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py index 7f3392631adb..1614c70dc069 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py @@ -1381,7 +1381,8 @@ def get_dataframe(stream): return pandas.concat(frames)[columns] def to_dataframe(self, bqstorage_client=None, dtypes=None): - """Create a pandas DataFrame from the query results. + """Create a pandas DataFrame by loading all pages of a query. + Args: bqstorage_client ( \ From 9bbfe599a874eefcb58b7e1f8772ba051049d8c8 Mon Sep 17 00:00:00 2001 From: Tres Seaver Date: Tue, 19 Feb 2019 16:19:09 -0500 Subject: [PATCH 0546/2016] Fix lint. (#7383) W293 blank line contains whitespace. --- packages/google-cloud-bigquery/google/cloud/bigquery/table.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py index 1614c70dc069..0639810f896d 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py @@ -1382,7 +1382,7 @@ def get_dataframe(stream): def to_dataframe(self, bqstorage_client=None, dtypes=None): """Create a pandas DataFrame by loading all pages of a query. - + Args: bqstorage_client ( \ From 034c00a83f23f2f7e50b0f349652eab0403d0784 Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Tue, 5 Mar 2019 14:51:52 -0800 Subject: [PATCH 0547/2016] Accept a string in Table and Dataset constructors. (#7483) This removes the another need to manually create a TableReference or DatasetReference. Instead, a developer can pass in a string to the constructor and then set the needed properties on the resource. --- .../google/cloud/bigquery/dataset.py | 11 +++++++++-- .../google/cloud/bigquery/table.py | 11 +++++++++-- .../google-cloud-bigquery/tests/unit/test_dataset.py | 11 +++++++++++ .../google-cloud-bigquery/tests/unit/test_table.py | 11 +++++++++++ 4 files changed, 40 insertions(+), 4 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/dataset.py b/packages/google-cloud-bigquery/google/cloud/bigquery/dataset.py index c4e8e839497c..9530eac9ee60 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/dataset.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/dataset.py @@ -306,8 +306,13 @@ class Dataset(object): https://cloud.google.com/bigquery/docs/reference/rest/v2/datasets Args: - dataset_ref (google.cloud.bigquery.dataset.DatasetReference): - a pointer to a dataset + dataset_ref (Union[ \ + :class:`~google.cloud.bigquery.dataset.DatasetReference`, \ + str, \ + ]): + A pointer to a dataset. If ``dataset_ref`` is a string, it must + include both the project ID and the dataset ID, separated by + ``.``. """ _PROPERTY_TO_API_FIELD = { @@ -318,6 +323,8 @@ class Dataset(object): } def __init__(self, dataset_ref): + if isinstance(dataset_ref, six.string_types): + dataset_ref = DatasetReference.from_string(dataset_ref) self._properties = {"datasetReference": dataset_ref.to_api_repr(), "labels": {}} @property diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py index 0639810f896d..c4b9a4c664c7 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py @@ -348,8 +348,13 @@ class Table(object): https://cloud.google.com/bigquery/docs/reference/rest/v2/tables Args: - table_ref (google.cloud.bigquery.table.TableReference): - A pointer to a table + table_ref (Union[ \ + :class:`~google.cloud.bigquery.table.TableReference`, \ + str, \ + ]): + A pointer to a table. If ``table_ref`` is a string, it must + included a project ID, dataset ID, and table ID, each separated + by ``.``. schema (List[google.cloud.bigquery.schema.SchemaField]): The table's schema """ @@ -367,6 +372,8 @@ class Table(object): } def __init__(self, table_ref, schema=None): + if isinstance(table_ref, six.string_types): + table_ref = TableReference.from_string(table_ref) self._properties = {"tableReference": table_ref.to_api_repr(), "labels": {}} # Let the @property do validation. if schema is not None: diff --git a/packages/google-cloud-bigquery/tests/unit/test_dataset.py b/packages/google-cloud-bigquery/tests/unit/test_dataset.py index f477904c2f7d..7774ccfe8814 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_dataset.py +++ b/packages/google-cloud-bigquery/tests/unit/test_dataset.py @@ -15,6 +15,7 @@ import unittest import mock +import pytest class TestAccessEntry(unittest.TestCase): @@ -364,6 +365,16 @@ def test_ctor_defaults(self): self.assertIsNone(dataset.friendly_name) self.assertIsNone(dataset.location) + def test_ctor_string(self): + dataset = self._make_one("some-project.some_dset") + self.assertEqual(dataset.project, "some-project") + self.assertEqual(dataset.dataset_id, "some_dset") + + def test_ctor_string_wo_project_id(self): + with pytest.raises(ValueError): + # Project ID is missing. + self._make_one("some_dset") + def test_ctor_explicit(self): from google.cloud.bigquery.dataset import DatasetReference, AccessEntry diff --git a/packages/google-cloud-bigquery/tests/unit/test_table.py b/packages/google-cloud-bigquery/tests/unit/test_table.py index af20c396ac88..d9ba9db3f05d 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_table.py +++ b/packages/google-cloud-bigquery/tests/unit/test_table.py @@ -504,6 +504,17 @@ def test_ctor_w_schema(self): self.assertEqual(table.schema, [full_name, age]) + def test_ctor_string(self): + table = self._make_one("some-project.some_dset.some_tbl") + self.assertEqual(table.project, "some-project") + self.assertEqual(table.dataset_id, "some_dset") + self.assertEqual(table.table_id, "some_tbl") + + def test_ctor_string_wo_project_id(self): + with pytest.raises(ValueError): + # Project ID is missing. + self._make_one("some_dset.some_tbl") + def test_num_bytes_getter(self): dataset = DatasetReference(self.PROJECT, self.DS_ID) table_ref = dataset.table(self.TABLE_NAME) From 1775e06f72fbeec1fe8dec5bce068ffeb8bc07a8 Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Wed, 6 Mar 2019 15:04:07 -0800 Subject: [PATCH 0548/2016] Add options to ignore errors when creating/deleting datasets/tables. (#7491) In create_dataset and create_table, exists_ok ignores errors when a dataset/table already exists. It makes a GET request to get the existing resource if it already exists. In delete_dataset and delete_table, not_found_ok ignores errors when a dataset/table doesn't exist. --- .../google/cloud/bigquery/client.py | 70 +++++-- .../tests/unit/test_client.py | 189 ++++++++++++++++++ 2 files changed, 243 insertions(+), 16 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py index 96f1310c3f99..95d49e12968a 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py @@ -32,6 +32,7 @@ from google.resumable_media.requests import MultipartUpload from google.resumable_media.requests import ResumableUpload +import google.api_core.exceptions from google.api_core import page_iterator import google.cloud._helpers from google.cloud import exceptions @@ -297,7 +298,7 @@ def dataset(self, dataset_id, project=None): return DatasetReference(project, dataset_id) - def create_dataset(self, dataset): + def create_dataset(self, dataset, exists_ok=False, retry=DEFAULT_RETRY): """API call: create the dataset via a POST request. See @@ -312,6 +313,11 @@ def create_dataset(self, dataset): A :class:`~google.cloud.bigquery.dataset.Dataset` to create. If ``dataset`` is a reference, an empty dataset is created with the specified ID and client's default location. + exists_ok (bool): + Defaults to ``False``. If ``True``, ignore "already exists" + errors when creating the dataset. + retry (google.api_core.retry.Retry): + Optional. How to retry the RPC. Returns: google.cloud.bigquery.dataset.Dataset: @@ -338,11 +344,15 @@ def create_dataset(self, dataset): if data.get("location") is None and self.location is not None: data["location"] = self.location - api_response = self._connection.api_request(method="POST", path=path, data=data) - - return Dataset.from_api_repr(api_response) - - def create_table(self, table): + try: + api_response = self._call_api(retry, method="POST", path=path, data=data) + return Dataset.from_api_repr(api_response) + except google.api_core.exceptions.Conflict: + if not exists_ok: + raise + return self.get_dataset(dataset.reference, retry=retry) + + def create_table(self, table, exists_ok=False, retry=DEFAULT_RETRY): """API call: create a table via a PUT request See @@ -358,6 +368,11 @@ def create_table(self, table): If ``table`` is a reference, an empty table is created with the specified ID. The dataset that the table belongs to must already exist. + exists_ok (bool): + Defaults to ``False``. If ``True``, ignore "already exists" + errors when creating the table. + retry (google.api_core.retry.Retry): + Optional. How to retry the RPC. Returns: google.cloud.bigquery.table.Table: @@ -369,10 +384,14 @@ def create_table(self, table): table = Table(table) path = "/projects/%s/datasets/%s/tables" % (table.project, table.dataset_id) - api_response = self._connection.api_request( - method="POST", path=path, data=table.to_api_repr() - ) - return Table.from_api_repr(api_response) + data = table.to_api_repr() + try: + api_response = self._call_api(retry, method="POST", path=path, data=data) + return Table.from_api_repr(api_response) + except google.api_core.exceptions.Conflict: + if not exists_ok: + raise + return self.get_table(table.reference, retry=retry) def _call_api(self, retry, **kwargs): call = functools.partial(self._connection.api_request, **kwargs) @@ -563,7 +582,9 @@ def list_tables( result.dataset = dataset return result - def delete_dataset(self, dataset, delete_contents=False, retry=DEFAULT_RETRY): + def delete_dataset( + self, dataset, delete_contents=False, retry=DEFAULT_RETRY, not_found_ok=False + ): """Delete a dataset. See @@ -579,12 +600,15 @@ def delete_dataset(self, dataset, delete_contents=False, retry=DEFAULT_RETRY): in, this method attempts to create a dataset reference from a string using :func:`google.cloud.bigquery.dataset.DatasetReference.from_string`. - retry (:class:`google.api_core.retry.Retry`): - (Optional) How to retry the RPC. delete_contents (boolean): (Optional) If True, delete all the tables in the dataset. If False and the dataset contains tables, the request will fail. Default is False. + retry (:class:`google.api_core.retry.Retry`): + (Optional) How to retry the RPC. + not_found_ok (bool): + Defaults to ``False``. If ``True``, ignore "not found" errors + when deleting the dataset. """ if isinstance(dataset, str): dataset = DatasetReference.from_string( @@ -598,9 +622,15 @@ def delete_dataset(self, dataset, delete_contents=False, retry=DEFAULT_RETRY): if delete_contents: params["deleteContents"] = "true" - self._call_api(retry, method="DELETE", path=dataset.path, query_params=params) + try: + self._call_api( + retry, method="DELETE", path=dataset.path, query_params=params + ) + except google.api_core.exceptions.NotFound: + if not not_found_ok: + raise - def delete_table(self, table, retry=DEFAULT_RETRY): + def delete_table(self, table, retry=DEFAULT_RETRY, not_found_ok=False): """Delete a table See @@ -618,13 +648,21 @@ def delete_table(self, table, retry=DEFAULT_RETRY): :func:`google.cloud.bigquery.table.TableReference.from_string`. retry (:class:`google.api_core.retry.Retry`): (Optional) How to retry the RPC. + not_found_ok (bool): + Defaults to ``False``. If ``True``, ignore "not found" errors + when deleting the table. """ if isinstance(table, str): table = TableReference.from_string(table, default_project=self.project) if not isinstance(table, (Table, TableReference)): raise TypeError("table must be a Table or a TableReference") - self._call_api(retry, method="DELETE", path=table.path) + + try: + self._call_api(retry, method="DELETE", path=table.path) + except google.api_core.exceptions.NotFound: + if not not_found_ok: + raise def _get_query_results( self, job_id, retry, project=None, timeout_ms=None, location=None diff --git a/packages/google-cloud-bigquery/tests/unit/test_client.py b/packages/google-cloud-bigquery/tests/unit/test_client.py index c3d90ed640fb..a98ee79aa116 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_client.py +++ b/packages/google-cloud-bigquery/tests/unit/test_client.py @@ -36,6 +36,7 @@ except (ImportError, AttributeError): # pragma: NO COVER pyarrow = None +import google.api_core.exceptions from google.cloud.bigquery.dataset import DatasetReference @@ -804,6 +805,61 @@ def test_create_dataset_w_string(self): }, ) + def test_create_dataset_alreadyexists_w_exists_ok_false(self): + creds = _make_credentials() + client = self._make_one( + project=self.PROJECT, credentials=creds, location=self.LOCATION + ) + client._connection = _make_connection( + google.api_core.exceptions.AlreadyExists("dataset already exists") + ) + + with pytest.raises(google.api_core.exceptions.AlreadyExists): + client.create_dataset(self.DS_ID) + + def test_create_dataset_alreadyexists_w_exists_ok_true(self): + post_path = "/projects/{}/datasets".format(self.PROJECT) + get_path = "/projects/{}/datasets/{}".format(self.PROJECT, self.DS_ID) + resource = { + "datasetReference": {"projectId": self.PROJECT, "datasetId": self.DS_ID}, + "etag": "etag", + "id": "{}:{}".format(self.PROJECT, self.DS_ID), + "location": self.LOCATION, + } + creds = _make_credentials() + client = self._make_one( + project=self.PROJECT, credentials=creds, location=self.LOCATION + ) + conn = client._connection = _make_connection( + google.api_core.exceptions.AlreadyExists("dataset already exists"), resource + ) + + dataset = client.create_dataset(self.DS_ID, exists_ok=True) + + self.assertEqual(dataset.dataset_id, self.DS_ID) + self.assertEqual(dataset.project, self.PROJECT) + self.assertEqual(dataset.etag, resource["etag"]) + self.assertEqual(dataset.full_dataset_id, resource["id"]) + self.assertEqual(dataset.location, self.LOCATION) + + conn.api_request.assert_has_calls( + [ + mock.call( + method="POST", + path=post_path, + data={ + "datasetReference": { + "projectId": self.PROJECT, + "datasetId": self.DS_ID, + }, + "labels": {}, + "location": self.LOCATION, + }, + ), + mock.call(method="GET", path=get_path), + ] + ) + def test_create_table_w_day_partition(self): from google.cloud.bigquery.table import Table from google.cloud.bigquery.table import TimePartitioning @@ -1177,6 +1233,79 @@ def test_create_table_w_string(self): ) self.assertEqual(got.table_id, self.TABLE_ID) + def test_create_table_alreadyexists_w_exists_ok_false(self): + post_path = "/projects/{}/datasets/{}/tables".format(self.PROJECT, self.DS_ID) + creds = _make_credentials() + client = self._make_one( + project=self.PROJECT, credentials=creds, location=self.LOCATION + ) + conn = client._connection = _make_connection( + google.api_core.exceptions.AlreadyExists("table already exists") + ) + + with pytest.raises(google.api_core.exceptions.AlreadyExists): + client.create_table("{}.{}".format(self.DS_ID, self.TABLE_ID)) + + conn.api_request.assert_called_once_with( + method="POST", + path=post_path, + data={ + "tableReference": { + "projectId": self.PROJECT, + "datasetId": self.DS_ID, + "tableId": self.TABLE_ID, + }, + "labels": {}, + }, + ) + + def test_create_table_alreadyexists_w_exists_ok_true(self): + post_path = "/projects/{}/datasets/{}/tables".format(self.PROJECT, self.DS_ID) + get_path = "/projects/{}/datasets/{}/tables/{}".format( + self.PROJECT, self.DS_ID, self.TABLE_ID + ) + resource = { + "id": "%s:%s:%s" % (self.PROJECT, self.DS_ID, self.TABLE_ID), + "tableReference": { + "projectId": self.PROJECT, + "datasetId": self.DS_ID, + "tableId": self.TABLE_ID, + }, + } + creds = _make_credentials() + client = self._make_one( + project=self.PROJECT, credentials=creds, location=self.LOCATION + ) + conn = client._connection = _make_connection( + google.api_core.exceptions.AlreadyExists("table already exists"), resource + ) + + got = client.create_table( + "{}.{}".format(self.DS_ID, self.TABLE_ID), exists_ok=True + ) + + self.assertEqual(got.project, self.PROJECT) + self.assertEqual(got.dataset_id, self.DS_ID) + self.assertEqual(got.table_id, self.TABLE_ID) + + conn.api_request.assert_has_calls( + [ + mock.call( + method="POST", + path=post_path, + data={ + "tableReference": { + "projectId": self.PROJECT, + "datasetId": self.DS_ID, + "tableId": self.TABLE_ID, + }, + "labels": {}, + }, + ), + mock.call(method="GET", path=get_path), + ] + ) + def test_get_table(self): path = "projects/%s/datasets/%s/tables/%s" % ( self.PROJECT, @@ -1804,6 +1933,33 @@ def test_delete_dataset_wrong_type(self): with self.assertRaises(TypeError): client.delete_dataset(client.dataset(self.DS_ID).table("foo")) + def test_delete_dataset_w_not_found_ok_false(self): + path = "/projects/{}/datasets/{}".format(self.PROJECT, self.DS_ID) + creds = _make_credentials() + http = object() + client = self._make_one(project=self.PROJECT, credentials=creds, _http=http) + conn = client._connection = _make_connection( + google.api_core.exceptions.NotFound("dataset not found") + ) + + with self.assertRaises(google.api_core.exceptions.NotFound): + client.delete_dataset(self.DS_ID) + + conn.api_request.assert_called_with(method="DELETE", path=path, query_params={}) + + def test_delete_dataset_w_not_found_ok_true(self): + path = "/projects/{}/datasets/{}".format(self.PROJECT, self.DS_ID) + creds = _make_credentials() + http = object() + client = self._make_one(project=self.PROJECT, credentials=creds, _http=http) + conn = client._connection = _make_connection( + google.api_core.exceptions.NotFound("dataset not found") + ) + + client.delete_dataset(self.DS_ID, not_found_ok=True) + + conn.api_request.assert_called_with(method="DELETE", path=path, query_params={}) + def test_delete_table(self): from google.cloud.bigquery.table import Table @@ -1836,6 +1992,39 @@ def test_delete_table_w_wrong_type(self): with self.assertRaises(TypeError): client.delete_table(client.dataset(self.DS_ID)) + def test_delete_table_w_not_found_ok_false(self): + path = "/projects/{}/datasets/{}/tables/{}".format( + self.PROJECT, self.DS_ID, self.TABLE_ID + ) + creds = _make_credentials() + http = object() + client = self._make_one(project=self.PROJECT, credentials=creds, _http=http) + conn = client._connection = _make_connection( + google.api_core.exceptions.NotFound("table not found") + ) + + with self.assertRaises(google.api_core.exceptions.NotFound): + client.delete_table("{}.{}".format(self.DS_ID, self.TABLE_ID)) + + conn.api_request.assert_called_with(method="DELETE", path=path) + + def test_delete_table_w_not_found_ok_true(self): + path = "/projects/{}/datasets/{}/tables/{}".format( + self.PROJECT, self.DS_ID, self.TABLE_ID + ) + creds = _make_credentials() + http = object() + client = self._make_one(project=self.PROJECT, credentials=creds, _http=http) + conn = client._connection = _make_connection( + google.api_core.exceptions.NotFound("table not found") + ) + + client.delete_table( + "{}.{}".format(self.DS_ID, self.TABLE_ID), not_found_ok=True + ) + + conn.api_request.assert_called_with(method="DELETE", path=path) + def test_job_from_resource_unknown_type(self): from google.cloud.bigquery.job import UnknownJob From 7528e22f6181e21fb5d6ed414a477ffa79535132 Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Wed, 6 Mar 2019 15:29:40 -0800 Subject: [PATCH 0549/2016] Release 1.10.0 (#7497) --- packages/google-cloud-bigquery/CHANGELOG.md | 23 +++++++++++++++++++++ packages/google-cloud-bigquery/setup.py | 2 +- 2 files changed, 24 insertions(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/CHANGELOG.md b/packages/google-cloud-bigquery/CHANGELOG.md index a50593975516..be3414e2bcf1 100644 --- a/packages/google-cloud-bigquery/CHANGELOG.md +++ b/packages/google-cloud-bigquery/CHANGELOG.md @@ -4,6 +4,29 @@ [1]: https://pypi.org/project/google-cloud-bigquery/#history +## 1.10.0 + +03-06-2019 15:20 PST + +### Implementation Changes + +- Harden 'ArrayQueryParameter.from_api_repr' against missing 'parameterValue'. ([#7311](https://github.com/googleapis/google-cloud-python/pull/7311)) +- Allow nested records w/ null values. ([#7297](https://github.com/googleapis/google-cloud-python/pull/7297)) + +### New Features + +- Add options to ignore errors when creating/deleting datasets/tables. ([#7491](https://github.com/googleapis/google-cloud-python/pull/7491)) +- Accept a string in Table and Dataset constructors. ([#7483](https://github.com/googleapis/google-cloud-python/pull/7483)) + +### Documentation + +- Update docstring of RowIterator's to_dataframe ([#7306](https://github.com/googleapis/google-cloud-python/pull/7306)) +- Updated client library documentation URLs. ([#7307](https://github.com/googleapis/google-cloud-python/pull/7307)) + +### Internal / Testing Changes + +- Fix lint. ([#7383](https://github.com/googleapis/google-cloud-python/pull/7383)) + ## 1.9.0 02-04-2019 13:28 PST diff --git a/packages/google-cloud-bigquery/setup.py b/packages/google-cloud-bigquery/setup.py index 2dd4f94031b5..7cd901917e4c 100644 --- a/packages/google-cloud-bigquery/setup.py +++ b/packages/google-cloud-bigquery/setup.py @@ -22,7 +22,7 @@ name = 'google-cloud-bigquery' description = 'Google BigQuery API client library' -version = '1.9.0' +version = '1.10.0' # Should be one of: # 'Development Status :: 3 - Alpha' # 'Development Status :: 4 - Beta' From f548ab322d31beaadf35d4e2e5d793e70791effb Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Wed, 13 Mar 2019 14:22:15 -0700 Subject: [PATCH 0550/2016] Remove typo in 'Table.from_api_repr' docstring. (#7509) --- packages/google-cloud-bigquery/google/cloud/bigquery/table.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py index c4b9a4c664c7..35f950e7d94a 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py @@ -821,8 +821,6 @@ def from_api_repr(cls, resource): Args: resource (Dict[str, object]): Table resource representation from the API - dataset (google.cloud.bigquery.dataset.Dataset): - The dataset containing the table. Returns: google.cloud.bigquery.table.Table: Table parsed from ``resource``. From 708cbe467a3b019a4663c615d0187285560525c8 Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Mon, 18 Mar 2019 09:23:47 -0700 Subject: [PATCH 0551/2016] Refactor table() methods into shared implementation. (#7516) The method and docstring for the table() method are identical for Dataset, DatasetReference, and DatasetListItem. This commit moves the implementation to a shared method to reduce code duplication. --- .../google/cloud/bigquery/dataset.py | 49 ++++++------------- 1 file changed, 16 insertions(+), 33 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/dataset.py b/packages/google-cloud-bigquery/google/cloud/bigquery/dataset.py index 9530eac9ee60..2a71c900baf0 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/dataset.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/dataset.py @@ -24,6 +24,19 @@ from google.cloud.bigquery.table import TableReference +def _get_table_reference(self, table_id): + """Constructs a TableReference. + + Args: + table_id (str): The ID of the table. + + Returns: + google.cloud.bigquery.table.TableReference: + A table reference for a table in this dataset. + """ + return TableReference(self, table_id) + + class AccessEntry(object): """Represents grant of an access role to an entity. @@ -191,17 +204,7 @@ def path(self): """str: URL path for the dataset based on project and dataset ID.""" return "/projects/%s/datasets/%s" % (self.project, self.dataset_id) - def table(self, table_id): - """Constructs a TableReference. - - Args: - table_id (str): The ID of the table. - - Returns: - google.cloud.bigquery.table.TableReference: - A table reference for a table in this dataset. - """ - return TableReference(self, table_id) + table = _get_table_reference @classmethod def from_api_repr(cls, resource): @@ -578,17 +581,7 @@ def _build_resource(self, filter_fields): return partial - def table(self, table_id): - """Constructs a TableReference. - - Args: - table_id (str): the ID of the table. - - Returns: - google.cloud.bigquery.table.TableReference: - A TableReference for a table in this dataset. - """ - return TableReference(self.reference, table_id) + table = _get_table_reference def __repr__(self): return "Dataset({})".format(repr(self.reference)) @@ -668,14 +661,4 @@ def reference(self): """ return DatasetReference(self.project, self.dataset_id) - def table(self, table_id): - """Constructs a TableReference. - - Args: - table_id (str): the ID of the table. - - Returns: - google.cloud.bigquery.table.TableReference: - A TableReference for a table in this dataset. - """ - return TableReference(self.reference, table_id) + table = _get_table_reference From 2c2d2a2dad0b0c54ffba2e27b2d0ef4d39af7d98 Mon Sep 17 00:00:00 2001 From: Christopher Wilcox Date: Wed, 20 Mar 2019 13:41:12 -0700 Subject: [PATCH 0552/2016] Remove classifier for Python 3.4 for end-of-life. (#7535) * Remove classifier for Python 3.4 for end-of-life. * Update supported versions in Client README, Contributing Guide --- packages/google-cloud-bigquery/README.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/README.rst b/packages/google-cloud-bigquery/README.rst index a25b3c3a87fc..4279f215d6c3 100644 --- a/packages/google-cloud-bigquery/README.rst +++ b/packages/google-cloud-bigquery/README.rst @@ -52,7 +52,7 @@ dependencies. Supported Python Versions ^^^^^^^^^^^^^^^^^^^^^^^^^ -Python >= 3.4 +Python >= 3.5 Deprecated Python Versions ^^^^^^^^^^^^^^^^^^^^^^^^^^ From d86655158cbeda449d236ba357e6542d5185a64e Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Fri, 22 Mar 2019 16:40:57 -0700 Subject: [PATCH 0553/2016] Add docs session to nox configuration for BigQuery (#7541) * Add docs session to nox configuration for BigQuery * Run with `nox -s docs`. This allows us to just build the BigQuery docs to preview changes more quickly. * Moves the magics module documentation to the top level. I found it impossible to debug the autosummary errors with generating the docs for the magics module, so I move those docs to our more standard automodule pattern. With automodule, the error messages were more clear (the IPython package was missing in my original implementation). Since this module is referenced in documentation and elsewhere, add a redirect to the new magics module documentation from the old generated location. * Add license header to conf.py --- packages/google-cloud-bigquery/.gitignore | 1 + .../google-cloud-bigquery/docs/README.rst | 1 + packages/google-cloud-bigquery/docs/conf.py | 363 ++++++++++++++++++ .../google.cloud.bigquery.magics.html | 8 + packages/google-cloud-bigquery/docs/index.rst | 3 +- .../google-cloud-bigquery/docs/magics.rst | 5 + .../google-cloud-bigquery/docs/reference.rst | 4 +- packages/google-cloud-bigquery/noxfile.py | 24 ++ 8 files changed, 405 insertions(+), 4 deletions(-) create mode 100644 packages/google-cloud-bigquery/.gitignore create mode 120000 packages/google-cloud-bigquery/docs/README.rst create mode 100644 packages/google-cloud-bigquery/docs/conf.py create mode 100644 packages/google-cloud-bigquery/docs/generated/google.cloud.bigquery.magics.html create mode 100644 packages/google-cloud-bigquery/docs/magics.rst diff --git a/packages/google-cloud-bigquery/.gitignore b/packages/google-cloud-bigquery/.gitignore new file mode 100644 index 000000000000..9e3a5f25770c --- /dev/null +++ b/packages/google-cloud-bigquery/.gitignore @@ -0,0 +1 @@ +docs/_build \ No newline at end of file diff --git a/packages/google-cloud-bigquery/docs/README.rst b/packages/google-cloud-bigquery/docs/README.rst new file mode 120000 index 000000000000..89a0106941ff --- /dev/null +++ b/packages/google-cloud-bigquery/docs/README.rst @@ -0,0 +1 @@ +../README.rst \ No newline at end of file diff --git a/packages/google-cloud-bigquery/docs/conf.py b/packages/google-cloud-bigquery/docs/conf.py new file mode 100644 index 000000000000..62815ae73b38 --- /dev/null +++ b/packages/google-cloud-bigquery/docs/conf.py @@ -0,0 +1,363 @@ +# -*- coding: utf-8 -*- +# Copyright 2019 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# google-cloud-bigquery documentation build configuration file +# +# This file is execfile()d with the current directory set to its +# containing dir. +# +# Note that not all possible configuration values are present in this +# autogenerated file. +# +# All configuration values have a default; values that are commented out +# serve to show the default. + +import sys +import os +import shutil + +from sphinx.util import logging + +logger = logging.getLogger(__name__) + +# If extensions (or modules to document with autodoc) are in another directory, +# add these directories to sys.path here. If the directory is relative to the +# documentation root, use os.path.abspath to make it absolute, like shown here. +sys.path.insert(0, os.path.abspath("..")) + +__version__ = "0.1.0" + +# -- General configuration ------------------------------------------------ + +# If your documentation needs a minimal Sphinx version, state it here. +# needs_sphinx = '1.0' + +# Add any Sphinx extension module names here, as strings. They can be +# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom +# ones. +extensions = [ + "sphinx.ext.autodoc", + "sphinx.ext.autosummary", + "sphinx.ext.intersphinx", + "sphinx.ext.coverage", + "sphinx.ext.napoleon", + "sphinx.ext.viewcode", +] + +# autodoc/autosummary flags +autoclass_content = "both" +autodoc_default_flags = ["members"] +autosummary_generate = True + +# Add any paths that contain templates here, relative to this directory. +templates_path = ["_templates", os.path.join("..", "..", "docs", "_templates")] + +# Allow markdown includes (so releases.md can include CHANGLEOG.md) +# http://www.sphinx-doc.org/en/master/markdown.html +source_parsers = {".md": "recommonmark.parser.CommonMarkParser"} + +# The suffix(es) of source filenames. +source_suffix = [".rst", ".md"] + +# The encoding of source files. +# source_encoding = 'utf-8-sig' + +# The master toctree document. +master_doc = "index" + +# General information about the project. +project = u"google-cloud-bigquery" +copyright = u"2015, Google" +author = u"Google APIs" + +# The version info for the project you're documenting, acts as replacement for +# |version| and |release|, also used in various other places throughout the +# built documents. +# +# The full version, including alpha/beta/rc tags. +release = __version__ +# The short X.Y version. +version = ".".join(release.split(".")[0:2]) + +# The language for content autogenerated by Sphinx. Refer to documentation +# for a list of supported languages. +# +# This is also used if you do content translation via gettext catalogs. +# Usually you set "language" from the command line for these cases. +language = None + +# There are two options for replacing |today|: either, you set today to some +# non-false value, then it is used: +# today = '' +# Else, today_fmt is used as the format for a strftime call. +# today_fmt = '%B %d, %Y' + +# List of patterns, relative to source directory, that match files and +# directories to ignore when looking for source files. +exclude_patterns = ["_build"] + +# The reST default role (used for this markup: `text`) to use for all +# documents. +# default_role = None + +# If true, '()' will be appended to :func: etc. cross-reference text. +# add_function_parentheses = True + +# If true, the current module name will be prepended to all description +# unit titles (such as .. function::). +# add_module_names = True + +# If true, sectionauthor and moduleauthor directives will be shown in the +# output. They are ignored by default. +# show_authors = False + +# The name of the Pygments (syntax highlighting) style to use. +pygments_style = "sphinx" + +# A list of ignored prefixes for module index sorting. +# modindex_common_prefix = [] + +# If true, keep warnings as "system message" paragraphs in the built documents. +# keep_warnings = False + +# If true, `todo` and `todoList` produce output, else they produce nothing. +todo_include_todos = True + +# -- Options for HTML output ---------------------------------------------- + +# The theme to use for HTML and HTML Help pages. See the documentation for +# a list of builtin themes. +html_theme = "sphinx_rtd_theme" + +# Theme options are theme-specific and customize the look and feel of a theme +# further. For a list of options available for each theme, see the +# documentation. +# html_theme_options = {} + +# Add any paths that contain custom themes here, relative to this directory. +# html_theme_path = [] + +# The name for this set of Sphinx documents. If None, it defaults to +# " v documentation". +# html_title = None + +# A shorter title for the navigation bar. Default is the same as html_title. +# html_short_title = None + +# The name of an image file (relative to this directory) to place at the top +# of the sidebar. +# html_logo = None + +# The name of an image file (within the static path) to use as favicon of the +# docs. This file should be a Windows icon file (.ico) being 16x16 or 32x32 +# pixels large. +# html_favicon = None + +# Add any paths that contain custom static files (such as style sheets) here, +# relative to this directory. They are copied after the builtin static files, +# so a file named "default.css" will overwrite the builtin "default.css". +# html_static_path = [] + +# Add any extra paths that contain custom files (such as robots.txt or +# .htaccess) here, relative to this directory. These files are copied +# directly to the root of the documentation. +# html_extra_path = [] + +# If not '', a 'Last updated on:' timestamp is inserted at every page bottom, +# using the given strftime format. +# html_last_updated_fmt = '%b %d, %Y' + +# If true, SmartyPants will be used to convert quotes and dashes to +# typographically correct entities. +# html_use_smartypants = True + +# Custom sidebar templates, maps document names to template names. +# html_sidebars = {} + +# Additional templates that should be rendered to pages, maps page names to +# template names. +# html_additional_pages = {} + +# If false, no module index is generated. +# html_domain_indices = True + +# If false, no index is generated. +# html_use_index = True + +# If true, the index is split into individual pages for each letter. +# html_split_index = False + +# If true, links to the reST sources are added to the pages. +# html_show_sourcelink = True + +# If true, "Created using Sphinx" is shown in the HTML footer. Default is True. +# html_show_sphinx = True + +# If true, "(C) Copyright ..." is shown in the HTML footer. Default is True. +# html_show_copyright = True + +# If true, an OpenSearch description file will be output, and all pages will +# contain a tag referring to it. The value of this option must be the +# base URL from which the finished HTML is served. +# html_use_opensearch = '' + +# This is the file name suffix for HTML files (e.g. ".xhtml"). +# html_file_suffix = None + +# Language to be used for generating the HTML full-text search index. +# Sphinx supports the following languages: +# 'da', 'de', 'en', 'es', 'fi', 'fr', 'hu', 'it', 'ja' +# 'nl', 'no', 'pt', 'ro', 'ru', 'sv', 'tr' +# html_search_language = 'en' + +# A dictionary with options for the search language support, empty by default. +# Now only 'ja' uses this config value +# html_search_options = {'type': 'default'} + +# The name of a javascript file (relative to the configuration directory) that +# implements a search results scorer. If empty, the default will be used. +# html_search_scorer = 'scorer.js' + +# Output file base name for HTML help builder. +htmlhelp_basename = "google-cloud-bigquery-doc" + +# -- Options for LaTeX output --------------------------------------------- + +latex_elements = { + # The paper size ('letterpaper' or 'a4paper'). + #'papersize': 'letterpaper', + # The font size ('10pt', '11pt' or '12pt'). + #'pointsize': '10pt', + # Additional stuff for the LaTeX preamble. + #'preamble': '', + # Latex figure (float) alignment + #'figure_align': 'htbp', +} + +# Grouping the document tree into LaTeX files. List of tuples +# (source start file, target name, title, +# author, documentclass [howto, manual, or own class]). +latex_documents = [ + ( + master_doc, + "google-cloud-bigquery.tex", + u"google-cloud-bigquery Documentation", + author, + "manual", + ) +] + +# The name of an image file (relative to this directory) to place at the top of +# the title page. +# latex_logo = None + +# For "manual" documents, if this is true, then toplevel headings are parts, +# not chapters. +# latex_use_parts = False + +# If true, show page references after internal links. +# latex_show_pagerefs = False + +# If true, show URL addresses after external links. +# latex_show_urls = False + +# Documents to append as an appendix to all manuals. +# latex_appendices = [] + +# If false, no module index is generated. +# latex_domain_indices = True + +# -- Options for manual page output --------------------------------------- + +# One entry per manual page. List of tuples +# (source start file, name, description, authors, manual section). +man_pages = [ + ( + master_doc, + "google-cloud-bigquery", + u"google-cloud-bigquery Documentation", + [author], + 1, + ) +] + +# If true, show URL addresses after external links. +# man_show_urls = False + +# -- Options for Texinfo output ------------------------------------------- + +# Grouping the document tree into Texinfo files. List of tuples +# (source start file, target name, title, author, +# dir menu entry, description, category) +texinfo_documents = [ + ( + master_doc, + "google-cloud-bigquery", + u"google-cloud-bigquery Documentation", + author, + "google-cloud-bigquery", + "APIs", + ) +] + +# Documents to append as an appendix to all manuals. +# texinfo_appendices = [] + +# If false, no module index is generated. +# texinfo_domain_indices = True + +# How to display URL addresses: 'footnote', 'no', or 'inline'. +# texinfo_show_urls = 'footnote' + +# If true, do not generate a @detailmenu in the "Top" node's menu. +# texinfo_no_detailmenu = False + +# Example configuration for intersphinx: refer to the Python standard library. +intersphinx_mapping = { + "python": ("http://python.readthedocs.org/en/latest/", None), + "gax": ("https://gax-python.readthedocs.org/en/latest/", None), + "pandas": ("https://pandas.pydata.org/pandas-docs/stable/", None), +} + +# Napoleon settings +napoleon_google_docstring = True +napoleon_numpy_docstring = True +napoleon_include_private_with_doc = False +napoleon_include_special_with_doc = True +napoleon_use_admonition_for_examples = False +napoleon_use_admonition_for_notes = False +napoleon_use_admonition_for_references = False +napoleon_use_ivar = False +napoleon_use_param = True +napoleon_use_rtype = True + +# Static HTML pages, e.g. to support redirects +# See: https://tech.signavio.com/2017/managing-sphinx-redirects +# HTML pages to be copied from source to target +static_html_pages = ["usage.html", "generated/google.cloud.bigquery.magics.html"] + + +def copy_static_html_pages(app, exception): + if exception is None and app.builder.name == "html": + for static_html_page in static_html_pages: + target_path = app.outdir + "/" + static_html_page + src_path = app.srcdir + "/" + static_html_page + if os.path.isfile(src_path): + logger.info("Copying static html: %s -> %s", src_path, target_path) + shutil.copyfile(src_path, target_path) + + +def setup(app): + app.connect("build-finished", copy_static_html_pages) diff --git a/packages/google-cloud-bigquery/docs/generated/google.cloud.bigquery.magics.html b/packages/google-cloud-bigquery/docs/generated/google.cloud.bigquery.magics.html new file mode 100644 index 000000000000..0d2a00fa14c9 --- /dev/null +++ b/packages/google-cloud-bigquery/docs/generated/google.cloud.bigquery.magics.html @@ -0,0 +1,8 @@ + + + + + + diff --git a/packages/google-cloud-bigquery/docs/index.rst b/packages/google-cloud-bigquery/docs/index.rst index 99977545436d..ec360de69770 100644 --- a/packages/google-cloud-bigquery/docs/index.rst +++ b/packages/google-cloud-bigquery/docs/index.rst @@ -1,4 +1,4 @@ -.. include:: /../bigquery/README.rst +.. include:: README.rst More Examples ~~~~~~~~~~~~~ @@ -27,4 +27,3 @@ For a list of all ``google-cloud-bigquery`` releases: :maxdepth: 2 changelog - diff --git a/packages/google-cloud-bigquery/docs/magics.rst b/packages/google-cloud-bigquery/docs/magics.rst new file mode 100644 index 000000000000..732c27af94dd --- /dev/null +++ b/packages/google-cloud-bigquery/docs/magics.rst @@ -0,0 +1,5 @@ +IPython Magics for BigQuery +=========================== + +.. automodule:: google.cloud.bigquery.magics + :members: diff --git a/packages/google-cloud-bigquery/docs/reference.rst b/packages/google-cloud-bigquery/docs/reference.rst index b66117b59b31..6676c6f0128f 100644 --- a/packages/google-cloud-bigquery/docs/reference.rst +++ b/packages/google-cloud-bigquery/docs/reference.rst @@ -141,7 +141,7 @@ External Configuration Magics ====================== -.. autosummary:: - :toctree: generated +.. toctree:: + :maxdepth: 2 magics diff --git a/packages/google-cloud-bigquery/noxfile.py b/packages/google-cloud-bigquery/noxfile.py index 089a82375606..a3c19819c90a 100644 --- a/packages/google-cloud-bigquery/noxfile.py +++ b/packages/google-cloud-bigquery/noxfile.py @@ -15,6 +15,7 @@ from __future__ import absolute_import import os +import shutil import nox @@ -180,3 +181,26 @@ def blacken(session): "tests", "docs", ) + + +@nox.session(python='3.6') +def docs(session): + """Build the docs.""" + + session.install('ipython', 'recommonmark', 'sphinx', 'sphinx_rtd_theme') + for local_dep in LOCAL_DEPS: + session.install('-e', local_dep) + session.install('-e', os.path.join('..', 'storage')) + session.install('-e', '.[pandas, pyarrow]') + + shutil.rmtree(os.path.join('docs', '_build'), ignore_errors=True) + session.run( + 'sphinx-build', + '-W', # warnings as errors + '-T', # show full traceback on exception + '-N', # no colors + '-b', 'html', + '-d', os.path.join('docs', '_build', 'doctrees', ''), + os.path.join('docs', ''), + os.path.join('docs', '_build', 'html', ''), + ) From d2979a17a3e0cd60a1dcad9a009f40d9230a82fe Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Mon, 25 Mar 2019 09:07:18 -0700 Subject: [PATCH 0554/2016] Use temporary file in load_table_from_dataframe (#7545) * Use temporary file in load_table_from_dataframe This fixes a bug where `load_table_from_dataframe` could not be used with the `fastparquet` library. It should also use less memory when uploading large dataframes. * Add tests using the fastparquet engine. --- .../google-cloud-bigquery/docs/snippets.py | 15 ++++++- .../google/cloud/bigquery/client.py | 39 ++++++++++++------- packages/google-cloud-bigquery/noxfile.py | 2 +- packages/google-cloud-bigquery/setup.py | 1 + .../tests/unit/test_client.py | 14 +++---- 5 files changed, 45 insertions(+), 26 deletions(-) diff --git a/packages/google-cloud-bigquery/docs/snippets.py b/packages/google-cloud-bigquery/docs/snippets.py index 5356700a495a..00569c40af18 100644 --- a/packages/google-cloud-bigquery/docs/snippets.py +++ b/packages/google-cloud-bigquery/docs/snippets.py @@ -30,6 +30,10 @@ import pytest import six +try: + import fastparquet +except (ImportError, AttributeError): + fastparquet = None try: import pandas except (ImportError, AttributeError): @@ -3108,8 +3112,15 @@ def test_list_rows_as_dataframe(client): @pytest.mark.skipif(pandas is None, reason="Requires `pandas`") -@pytest.mark.skipif(pyarrow is None, reason="Requires `pyarrow`") -def test_load_table_from_dataframe(client, to_delete): +@pytest.mark.parametrize("parquet_engine", ["pyarrow", "fastparquet"]) +def test_load_table_from_dataframe(client, to_delete, parquet_engine): + if parquet_engine == "pyarrow" and pyarrow is None: + pytest.skip("Requires `pyarrow`") + if parquet_engine == "fastparquet" and fastparquet is None: + pytest.skip("Requires `fastparquet`") + + pandas.set_option("io.parquet.engine", parquet_engine) + dataset_id = "load_table_from_dataframe_{}".format(_millis()) dataset = bigquery.Dataset(client.dataset(dataset_id)) client.create_dataset(dataset) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py index 95d49e12968a..7fe412478bfa 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py @@ -24,6 +24,7 @@ import functools import gzip import os +import tempfile import uuid import six @@ -1124,10 +1125,10 @@ def load_table_from_dataframe( Raises: ImportError: If a usable parquet engine cannot be found. This method - requires :mod:`pyarrow` to be installed. + requires :mod:`pyarrow` or :mod:`fastparquet` to be + installed. """ - buffer = six.BytesIO() - dataframe.to_parquet(buffer) + job_id = _make_job_id(job_id, job_id_prefix) if job_config is None: job_config = job.LoadJobConfig() @@ -1136,17 +1137,27 @@ def load_table_from_dataframe( if location is None: location = self.location - return self.load_table_from_file( - buffer, - destination, - num_retries=num_retries, - rewind=True, - job_id=job_id, - job_id_prefix=job_id_prefix, - location=location, - project=project, - job_config=job_config, - ) + tmpfd, tmppath = tempfile.mkstemp(suffix="_job_{}.parquet".format(job_id[:8])) + os.close(tmpfd) + + try: + dataframe.to_parquet(tmppath) + + with open(tmppath, "rb") as parquet_file: + return self.load_table_from_file( + parquet_file, + destination, + num_retries=num_retries, + rewind=True, + job_id=job_id, + job_id_prefix=job_id_prefix, + location=location, + project=project, + job_config=job_config, + ) + + finally: + os.remove(tmppath) def _do_resumable_upload(self, stream, metadata, num_retries): """Perform a resumable upload. diff --git a/packages/google-cloud-bigquery/noxfile.py b/packages/google-cloud-bigquery/noxfile.py index a3c19819c90a..82846604306e 100644 --- a/packages/google-cloud-bigquery/noxfile.py +++ b/packages/google-cloud-bigquery/noxfile.py @@ -124,7 +124,7 @@ def snippets(session): session.install('-e', local_dep) session.install('-e', os.path.join('..', 'storage')) session.install('-e', os.path.join('..', 'test_utils')) - session.install('-e', '.[pandas, pyarrow]') + session.install('-e', '.[pandas, pyarrow, fastparquet]') # Run py.test against the snippets tests. session.run( diff --git a/packages/google-cloud-bigquery/setup.py b/packages/google-cloud-bigquery/setup.py index 7cd901917e4c..6b4edaf561c0 100644 --- a/packages/google-cloud-bigquery/setup.py +++ b/packages/google-cloud-bigquery/setup.py @@ -39,6 +39,7 @@ # Exclude PyArrow dependency from Windows Python 2.7. 'pyarrow: platform_system != "Windows" or python_version >= "3.4"': 'pyarrow>=0.4.1', + 'fastparquet': ['fastparquet', 'python-snappy'], } diff --git a/packages/google-cloud-bigquery/tests/unit/test_client.py b/packages/google-cloud-bigquery/tests/unit/test_client.py index a98ee79aa116..794b76a0a9f4 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_client.py +++ b/packages/google-cloud-bigquery/tests/unit/test_client.py @@ -4658,7 +4658,7 @@ def test_load_table_from_dataframe(self): self.TABLE_REF, num_retries=_DEFAULT_NUM_RETRIES, rewind=True, - job_id=None, + job_id=mock.ANY, job_id_prefix=None, location=None, project=None, @@ -4666,9 +4666,7 @@ def test_load_table_from_dataframe(self): ) sent_file = load_table_from_file.mock_calls[0][1][1] - sent_bytes = sent_file.getvalue() - assert isinstance(sent_bytes, bytes) - assert len(sent_bytes) > 0 + assert sent_file.closed sent_config = load_table_from_file.mock_calls[0][2]["job_config"] assert sent_config.source_format == job.SourceFormat.PARQUET @@ -4695,7 +4693,7 @@ def test_load_table_from_dataframe_w_client_location(self): self.TABLE_REF, num_retries=_DEFAULT_NUM_RETRIES, rewind=True, - job_id=None, + job_id=mock.ANY, job_id_prefix=None, location=self.LOCATION, project=None, @@ -4703,9 +4701,7 @@ def test_load_table_from_dataframe_w_client_location(self): ) sent_file = load_table_from_file.mock_calls[0][1][1] - sent_bytes = sent_file.getvalue() - assert isinstance(sent_bytes, bytes) - assert len(sent_bytes) > 0 + assert sent_file.closed sent_config = load_table_from_file.mock_calls[0][2]["job_config"] assert sent_config.source_format == job.SourceFormat.PARQUET @@ -4735,7 +4731,7 @@ def test_load_table_from_dataframe_w_custom_job_config(self): self.TABLE_REF, num_retries=_DEFAULT_NUM_RETRIES, rewind=True, - job_id=None, + job_id=mock.ANY, job_id_prefix=None, location=self.LOCATION, project=None, From e4d7cd791793b6dff0243cf28f7c8376d6399f2d Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Wed, 27 Mar 2019 11:24:42 -0700 Subject: [PATCH 0555/2016] Allow string for copy sources, query destination, and default dataset (#7560) * Allow string for copy sources, query destination, and default dataset This removes a few more cases where it was required to create a TableReference or DatasetReference. * Allow a string for destination in QueryJobConfig . * Allow a string for default_dataset in QueryJobConfig . * Allow a list of strings for sources in Client.copy_table. * Refactor str/table/ref to ref helper Rename the helper to `_table_arg_to_table_ref` and always return a TableReference when something sensible is passed in. Add a second helper `_table_arg_to_table` that always returns a `Table`, which is needed in `list_rows` and `insert_rows` because only a `Table` contains a schema. * Rename table_ref arg to table. Fix lint errors. * Fix typo in docstring. --- .../google/cloud/bigquery/client.py | 142 ++++++++++-------- .../google/cloud/bigquery/job.py | 44 +++++- .../google/cloud/bigquery/table.py | 25 +++ .../tests/unit/test_client.py | 31 +++- .../tests/unit/test_job.py | 43 ++++++ 5 files changed, 212 insertions(+), 73 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py index 7fe412478bfa..fd69502f6ea1 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py @@ -48,11 +48,12 @@ from google.cloud.bigquery import job from google.cloud.bigquery.query import _QueryResults from google.cloud.bigquery.retry import DEFAULT_RETRY +from google.cloud.bigquery.table import _table_arg_to_table +from google.cloud.bigquery.table import _table_arg_to_table_ref from google.cloud.bigquery.table import Table from google.cloud.bigquery.table import TableListItem from google.cloud.bigquery.table import TableReference from google.cloud.bigquery.table import RowIterator -from google.cloud.bigquery.table import _TABLE_HAS_NO_SCHEMA _DEFAULT_CHUNKSIZE = 1048576 # 1024 * 1024 B = 1 MB @@ -68,6 +69,9 @@ _READ_LESS_THAN_SIZE = ( "Size {:d} was specified but the file-like object only had " "{:d} bytes remaining." ) +_NEED_TABLE_ARGUMENT = ( + "The table argument should be a table ID string, Table, or TableReference" +) class Project(object): @@ -379,10 +383,7 @@ def create_table(self, table, exists_ok=False, retry=DEFAULT_RETRY): google.cloud.bigquery.table.Table: A new ``Table`` returned from the service. """ - if isinstance(table, str): - table = TableReference.from_string(table, default_project=self.project) - if isinstance(table, TableReference): - table = Table(table) + table = _table_arg_to_table(table, default_project=self.project) path = "/projects/%s/datasets/%s/tables" % (table.project, table.dataset_id) data = table.to_api_repr() @@ -427,11 +428,12 @@ def get_dataset(self, dataset_ref, retry=DEFAULT_RETRY): api_response = self._call_api(retry, method="GET", path=dataset_ref.path) return Dataset.from_api_repr(api_response) - def get_table(self, table_ref, retry=DEFAULT_RETRY): - """Fetch the table referenced by ``table_ref``. + def get_table(self, table, retry=DEFAULT_RETRY): + """Fetch the table referenced by ``table``. Args: - table_ref (Union[ \ + table (Union[ \ + :class:`~google.cloud.bigquery.table.Table`, \ :class:`~google.cloud.bigquery.table.TableReference`, \ str, \ ]): @@ -446,11 +448,7 @@ def get_table(self, table_ref, retry=DEFAULT_RETRY): google.cloud.bigquery.table.Table: A ``Table`` instance. """ - if isinstance(table_ref, str): - table_ref = TableReference.from_string( - table_ref, default_project=self.project - ) - + table_ref = _table_arg_to_table_ref(table, default_project=self.project) api_response = self._call_api(retry, method="GET", path=table_ref.path) return Table.from_api_repr(api_response) @@ -653,11 +651,9 @@ def delete_table(self, table, retry=DEFAULT_RETRY, not_found_ok=False): Defaults to ``False``. If ``True``, ignore "not found" errors when deleting the table. """ - if isinstance(table, str): - table = TableReference.from_string(table, default_project=self.project) - - if not isinstance(table, (Table, TableReference)): - raise TypeError("table must be a Table or a TableReference") + table = _table_arg_to_table_ref(table, default_project=self.project) + if not isinstance(table, TableReference): + raise TypeError("Unable to get TableReference for table '{}'".format(table)) try: self._call_api(retry, method="DELETE", path=table.path) @@ -925,6 +921,7 @@ def load_table_from_uri( URIs of data files to be loaded; in format ``gs:///``. destination (Union[ \ + :class:`~google.cloud.bigquery.table.Table`, \ :class:`~google.cloud.bigquery.table.TableReference`, \ str, \ ]): @@ -966,11 +963,7 @@ def load_table_from_uri( if isinstance(source_uris, six.string_types): source_uris = [source_uris] - if isinstance(destination, str): - destination = TableReference.from_string( - destination, default_project=self.project - ) - + destination = _table_arg_to_table_ref(destination, default_project=self.project) load_job = job.LoadJob(job_ref, source_uris, destination, self, job_config) load_job._begin(retry=retry) @@ -997,6 +990,7 @@ def load_table_from_file( Arguments: file_obj (file): A file handle opened in binary mode for reading. destination (Union[ \ + :class:`~google.cloud.bigquery.table.Table`, \ :class:`~google.cloud.bigquery.table.TableReference`, \ str, \ ]): @@ -1045,11 +1039,7 @@ def load_table_from_file( if location is None: location = self.location - if isinstance(destination, str): - destination = TableReference.from_string( - destination, default_project=self.project - ) - + destination = _table_arg_to_table_ref(destination, default_project=self.project) job_ref = job._JobReference(job_id, project=project, location=location) load_job = job.LoadJob(job_ref, None, destination, self, job_config) job_resource = load_job.to_api_repr() @@ -1286,13 +1276,20 @@ def copy_table( Arguments: sources (Union[ \ + :class:`~google.cloud.bigquery.table.Table`, \ :class:`~google.cloud.bigquery.table.TableReference`, \ str, \ Sequence[ \ - :class:`~google.cloud.bigquery.table.TableReference`], \ + Union[ \ + :class:`~google.cloud.bigquery.table.Table`, \ + :class:`~google.cloud.bigquery.table.TableReference`, \ + str, \ + ] \ + ], \ ]): Table or tables to be copied. destination (Union[ + :class:`~google.cloud.bigquery.table.Table`, \ :class:`~google.cloud.bigquery.table.TableReference`, \ str, \ ]): @@ -1328,17 +1325,23 @@ def copy_table( job_ref = job._JobReference(job_id, project=project, location=location) - if isinstance(sources, str): - sources = TableReference.from_string(sources, default_project=self.project) - - if isinstance(destination, str): - destination = TableReference.from_string( - destination, default_project=self.project - ) + # sources can be one of many different input types. (string, Table, + # TableReference, or a sequence of any of those.) Convert them all to a + # list of TableReferences. + # + # _table_arg_to_table_ref leaves lists unmodified. + sources = _table_arg_to_table_ref(sources, default_project=self.project) if not isinstance(sources, collections_abc.Sequence): sources = [sources] + sources = [ + _table_arg_to_table_ref(source, default_project=self.project) + for source in sources + ] + + destination = _table_arg_to_table_ref(destination, default_project=self.project) + copy_job = job.CopyJob( job_ref, sources, destination, client=self, job_config=job_config ) @@ -1364,6 +1367,7 @@ def extract_table( Arguments: source (Union[ \ + :class:`google.cloud.bigquery.table.Table`, \ :class:`google.cloud.bigquery.table.TableReference`, \ src, \ ]): @@ -1405,9 +1409,7 @@ def extract_table( location = self.location job_ref = job._JobReference(job_id, project=project, location=location) - - if isinstance(source, str): - source = TableReference.from_string(source, default_project=self.project) + source = _table_arg_to_table_ref(source, default_project=self.project) if isinstance(destination_uris, six.string_types): destination_uris = [destination_uris] @@ -1529,19 +1531,24 @@ def insert_rows(self, table, rows, selected_fields=None, **kwargs): Raises: ValueError: if table's schema is not set """ - if isinstance(table, str): - table = TableReference.from_string(table, default_project=self.project) + table = _table_arg_to_table(table, default_project=self.project) + + if not isinstance(table, Table): + raise TypeError(_NEED_TABLE_ARGUMENT) + + schema = table.schema + # selected_fields can override the table schema. if selected_fields is not None: schema = selected_fields - elif isinstance(table, TableReference): - raise ValueError("need selected_fields with TableReference") - elif isinstance(table, Table): - if len(table.schema) == 0: - raise ValueError(_TABLE_HAS_NO_SCHEMA) - schema = table.schema - else: - raise TypeError("table should be Table or TableReference") + + if len(schema) == 0: + raise ValueError( + ( + "Could not determine schema for table '{}'. Call client.get_table() " + "or pass in a list of schema fields to the selected_fields argument." + ).format(table) + ) json_rows = [_record_field_to_json(schema, row) for row in rows] @@ -1596,9 +1603,10 @@ def insert_rows_json( identifies the row, and the "errors" key contains a list of the mappings describing one or more problems with the row. """ - if isinstance(table, str): - table = TableReference.from_string(table, default_project=self.project) - + # Convert table to just a reference because unlike insert_rows, + # insert_rows_json doesn't need the table schema. It's not doing any + # type conversions. + table = _table_arg_to_table_ref(table, default_project=self.project) rows_info = [] data = {"rows": rows_info} @@ -1647,9 +1655,7 @@ def list_partitions(self, table, retry=DEFAULT_RETRY): List[str]: A list of the partition ids present in the partitioned table """ - if isinstance(table, str): - table = TableReference.from_string(table, default_project=self.project) - + table = _table_arg_to_table_ref(table, default_project=self.project) meta_table = self.get_table( TableReference( self.dataset(table.dataset_id, project=table.project), @@ -1724,19 +1730,25 @@ def list_rows( (this is distinct from the total number of rows in the current page: ``iterator.page.num_items``). """ - if isinstance(table, str): - table = TableReference.from_string(table, default_project=self.project) + table = _table_arg_to_table(table, default_project=self.project) + + if not isinstance(table, Table): + raise TypeError(_NEED_TABLE_ARGUMENT) + + schema = table.schema + # selected_fields can override the table schema. if selected_fields is not None: schema = selected_fields - elif isinstance(table, TableReference): - raise ValueError("need selected_fields with TableReference") - elif isinstance(table, Table): - if len(table.schema) == 0 and table.created is None: - raise ValueError(_TABLE_HAS_NO_SCHEMA) - schema = table.schema - else: - raise TypeError("table should be Table or TableReference") + + # Allow listing rows of an empty table by not raising if the table exists. + elif len(schema) == 0 and table.created is None: + raise ValueError( + ( + "Could not determine schema for table '{}'. Call client.get_table() " + "or pass in a list of schema fields to the selected_fields argument." + ).format(table) + ) params = {} if selected_fields is not None: diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/job.py b/packages/google-cloud-bigquery/google/cloud/bigquery/job.py index 4df7a92ba63c..f4c919a2d8fc 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/job.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/job.py @@ -17,11 +17,14 @@ import copy import threading +import six from six.moves import http_client import google.api_core.future.polling from google.cloud import exceptions from google.cloud.exceptions import NotFound +from google.cloud.bigquery.dataset import Dataset +from google.cloud.bigquery.dataset import DatasetListItem from google.cloud.bigquery.dataset import DatasetReference from google.cloud.bigquery.external_config import ExternalConfig from google.cloud.bigquery.query import _query_param_from_api_repr @@ -33,6 +36,7 @@ from google.cloud.bigquery.schema import SchemaField from google.cloud.bigquery.table import _EmptyRowIterator from google.cloud.bigquery.table import EncryptionConfiguration +from google.cloud.bigquery.table import _table_arg_to_table_ref from google.cloud.bigquery.table import TableReference from google.cloud.bigquery.table import Table from google.cloud.bigquery.table import TimePartitioning @@ -1999,6 +2003,14 @@ def default_dataset(self): to use for unqualified table names in the query or :data:`None` if not set. + The ``default_dataset`` setter accepts: + + - a :class:`~google.cloud.bigquery.dataset.Dataset`, or + - a :class:`~google.cloud.bigquery.dataset.DatasetReference`, or + - a :class:`str` of the fully-qualified dataset ID in standard SQL + format. The value must included a project ID and dataset ID + separated by ``.``. For example: ``your-project.your_dataset``. + See https://g.co/cloud/bigquery/docs/reference/v2/jobs#configuration.query.defaultDataset """ @@ -2009,9 +2021,17 @@ def default_dataset(self): @default_dataset.setter def default_dataset(self, value): - resource = None - if value is not None: - resource = value.to_api_repr() + if value is None: + self._set_sub_prop("defaultDataset", None) + return + + if isinstance(value, six.string_types): + value = DatasetReference.from_string(value) + + if isinstance(value, (Dataset, DatasetListItem)): + value = value.reference + + resource = value.to_api_repr() self._set_sub_prop("defaultDataset", resource) @property @@ -2019,6 +2039,15 @@ def destination(self): """google.cloud.bigquery.table.TableReference: table where results are written or :data:`None` if not set. + The ``destination`` setter accepts: + + - a :class:`~google.cloud.bigquery.table.Table`, or + - a :class:`~google.cloud.bigquery.table.TableReference`, or + - a :class:`str` of the fully-qualified table ID in standard SQL + format. The value must included a project ID, dataset ID, and table + ID, each separated by ``.``. For example: + ``your-project.your_dataset.your_table``. + See https://g.co/cloud/bigquery/docs/reference/rest/v2/jobs#configuration.query.destinationTable """ @@ -2029,9 +2058,12 @@ def destination(self): @destination.setter def destination(self, value): - resource = None - if value is not None: - resource = value.to_api_repr() + if value is None: + self._set_sub_prop("destinationTable", None) + return + + value = _table_arg_to_table_ref(value) + resource = value.to_api_repr() self._set_sub_prop("destinationTable", resource) @property diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py index 35f950e7d94a..46419e70a83e 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py @@ -1661,3 +1661,28 @@ def _rows_page_start(iterator, page, response): # pylint: enable=unused-argument + + +def _table_arg_to_table_ref(value, default_project=None): + """Helper to convert a string or Table to TableReference. + + This function keeps TableReference and other kinds of objects unchanged. + """ + if isinstance(value, six.string_types): + value = TableReference.from_string(value, default_project=default_project) + if isinstance(value, (Table, TableListItem)): + value = value.reference + return value + + +def _table_arg_to_table(value, default_project=None): + """Helper to convert a string or TableReference to a Table. + + This function keeps Table and other kinds of objects unchanged. + """ + if isinstance(value, six.string_types): + value = TableReference.from_string(value, default_project=default_project) + if isinstance(value, TableReference): + value = Table(value) + + return value diff --git a/packages/google-cloud-bigquery/tests/unit/test_client.py b/packages/google-cloud-bigquery/tests/unit/test_client.py index 794b76a0a9f4..780f13bcbe3e 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_client.py +++ b/packages/google-cloud-bigquery/tests/unit/test_client.py @@ -2820,6 +2820,33 @@ def test_copy_table_w_client_location(self): method="POST", path="/projects/other-project/jobs", data=resource ) + def test_copy_table_w_source_strings(self): + creds = _make_credentials() + http = object() + client = self._make_one(project=self.PROJECT, credentials=creds, _http=http) + client._connection = _make_connection({}) + sources = [ + "dataset_wo_proj.some_table", + "other_project.other_dataset.other_table", + client.dataset("dataset_from_ref").table("table_from_ref"), + ] + destination = "some_project.some_dataset.destination_table" + + job = client.copy_table(sources, destination) + + expected_sources = [ + client.dataset("dataset_wo_proj").table("some_table"), + client.dataset("other_dataset", project="other_project").table( + "other_table" + ), + client.dataset("dataset_from_ref").table("table_from_ref"), + ] + self.assertEqual(list(job.sources), expected_sources) + expected_destination = client.dataset( + "some_dataset", project="some_project" + ).table("destination_table") + self.assertEqual(job.destination, expected_destination) + def test_extract_table(self): from google.cloud.bigquery.job import ExtractJob @@ -3400,7 +3427,7 @@ def test_query_w_query_parameters(self): ) def test_insert_rows_wo_schema(self): - from google.cloud.bigquery.table import Table, _TABLE_HAS_NO_SCHEMA + from google.cloud.bigquery.table import Table creds = _make_credentials() http = object() @@ -3416,7 +3443,7 @@ def test_insert_rows_wo_schema(self): with self.assertRaises(ValueError) as exc: client.insert_rows(table, ROWS) - self.assertEqual(exc.exception.args, (_TABLE_HAS_NO_SCHEMA,)) + self.assertIn("Could not determine schema for table", exc.exception.args[0]) def test_insert_rows_w_schema(self): import datetime diff --git a/packages/google-cloud-bigquery/tests/unit/test_job.py b/packages/google-cloud-bigquery/tests/unit/test_job.py index 5023c41e6374..833081ce066d 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_job.py +++ b/packages/google-cloud-bigquery/tests/unit/test_job.py @@ -3163,6 +3163,49 @@ def test_ctor_w_properties(self): self.assertFalse(config.use_query_cache) self.assertTrue(config.use_legacy_sql) + def test_ctor_w_string_default_dataset(self): + from google.cloud.bigquery import dataset + + default_dataset = "default-proj.default_dset" + config = self._get_target_class()(default_dataset=default_dataset) + expected = dataset.DatasetReference.from_string(default_dataset) + self.assertEqual(config.default_dataset, expected) + + def test_ctor_w_string_destinaton(self): + from google.cloud.bigquery import table + + destination = "dest-proj.dest_dset.dest_tbl" + config = self._get_target_class()(destination=destination) + expected = table.TableReference.from_string(destination) + self.assertEqual(config.destination, expected) + + def test_default_dataset_w_string(self): + from google.cloud.bigquery import dataset + + default_dataset = "default-proj.default_dset" + config = self._make_one() + config.default_dataset = default_dataset + expected = dataset.DatasetReference.from_string(default_dataset) + self.assertEqual(config.default_dataset, expected) + + def test_default_dataset_w_dataset(self): + from google.cloud.bigquery import dataset + + default_dataset = "default-proj.default_dset" + expected = dataset.DatasetReference.from_string(default_dataset) + config = self._make_one() + config.default_dataset = dataset.Dataset(expected) + self.assertEqual(config.default_dataset, expected) + + def test_destinaton_w_string(self): + from google.cloud.bigquery import table + + destination = "dest-proj.dest_dset.dest_tbl" + config = self._make_one() + config.destination = destination + expected = table.TableReference.from_string(destination) + self.assertEqual(config.destination, expected) + def test_time_partitioning(self): from google.cloud.bigquery import table From ad04036eb27937b4c86ea9b6836d95c056f27f9d Mon Sep 17 00:00:00 2001 From: John Paton Date: Thu, 28 Mar 2019 23:10:20 +0100 Subject: [PATCH 0556/2016] Add tqdm progress bar for to_dataframe downloads (#7552) Add progress_bar_type argument to to_dataframe Install tqdm to use this feature. If there are any tqdm errors during progress bar construction, a warning is displayed and no progress bar is displayed. --- packages/google-cloud-bigquery/docs/conf.py | 2 +- .../google/cloud/bigquery/job.py | 12 +- .../google/cloud/bigquery/table.py | 78 +++++++++- packages/google-cloud-bigquery/noxfile.py | 4 +- packages/google-cloud-bigquery/setup.py | 1 + .../tests/unit/test_table.py | 138 ++++++++++++++++-- 6 files changed, 217 insertions(+), 18 deletions(-) diff --git a/packages/google-cloud-bigquery/docs/conf.py b/packages/google-cloud-bigquery/docs/conf.py index 62815ae73b38..c9ff82d8e72b 100644 --- a/packages/google-cloud-bigquery/docs/conf.py +++ b/packages/google-cloud-bigquery/docs/conf.py @@ -326,9 +326,9 @@ # Example configuration for intersphinx: refer to the Python standard library. intersphinx_mapping = { - "python": ("http://python.readthedocs.org/en/latest/", None), "gax": ("https://gax-python.readthedocs.org/en/latest/", None), "pandas": ("https://pandas.pydata.org/pandas-docs/stable/", None), + "python": ("http://python.readthedocs.org/en/latest/", None), } # Napoleon settings diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/job.py b/packages/google-cloud-bigquery/google/cloud/bigquery/job.py index f4c919a2d8fc..bc87f109a484 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/job.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/job.py @@ -2810,7 +2810,7 @@ def result(self, timeout=None, retry=DEFAULT_RETRY): dest_table = Table(dest_table_ref, schema=schema) return self._client.list_rows(dest_table, retry=retry) - def to_dataframe(self, bqstorage_client=None, dtypes=None): + def to_dataframe(self, bqstorage_client=None, dtypes=None, progress_bar_type=None): """Return a pandas DataFrame from a QueryJob Args: @@ -2837,6 +2837,16 @@ def to_dataframe(self, bqstorage_client=None, dtypes=None): provided ``dtype`` is used when constructing the series for the column specified. Otherwise, the default pandas behavior is used. + progress_bar_type (Optional[str]): + If set, use the `tqdm `_ library to + display a progress bar while the data downloads. Install the + ``tqdm`` package to use this feature. + + See + :func:`~google.cloud.bigquery.table.RowIterator.to_dataframe` + for details. + + ..versionadded:: 1.11.0 Returns: A :class:`~pandas.DataFrame` populated with row data and column diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py index 46419e70a83e..dcb25d8bb3c6 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py @@ -30,6 +30,11 @@ except ImportError: # pragma: NO COVER pandas = None +try: + import tqdm +except ImportError: # pragma: NO COVER + tqdm = None + from google.api_core.page_iterator import HTTPIterator import google.cloud._helpers @@ -44,6 +49,10 @@ "The pandas library is not installed, please install " "pandas to use the to_dataframe() function." ) +_NO_TQDM_ERROR = ( + "A progress bar was requested, but there was an error loading the tqdm " + "library. Please install tqdm to use the progress bar functionality." +) _TABLE_HAS_NO_SCHEMA = 'Table has no schema: call "client.get_table()"' _MARKER = object() @@ -1330,12 +1339,22 @@ def _to_dataframe_dtypes(self, page, column_names, dtypes): columns[column] = pandas.Series(columns[column], dtype=dtypes[column]) return pandas.DataFrame(columns, columns=column_names) - def _to_dataframe_tabledata_list(self, dtypes): + def _to_dataframe_tabledata_list(self, dtypes, progress_bar=None): """Use (slower, but free) tabledata.list to construct a DataFrame.""" column_names = [field.name for field in self.schema] frames = [] + for page in iter(self.pages): - frames.append(self._to_dataframe_dtypes(page, column_names, dtypes)) + current_frame = self._to_dataframe_dtypes(page, column_names, dtypes) + frames.append(current_frame) + + if progress_bar is not None: + # In some cases, the number of total rows is not populated + # until the first page of rows is fetched. Update the + # progress bar's total to keep an accurate count. + progress_bar.total = progress_bar.total or self.total_rows + progress_bar.update(len(current_frame)) + return pandas.concat(frames) def _to_dataframe_bqstorage(self, bqstorage_client, dtypes): @@ -1385,10 +1404,37 @@ def get_dataframe(stream): # the end using manually-parsed schema. return pandas.concat(frames)[columns] - def to_dataframe(self, bqstorage_client=None, dtypes=None): + def _get_progress_bar(self, progress_bar_type): + """Construct a tqdm progress bar object, if tqdm is installed.""" + if tqdm is None: + if progress_bar_type is not None: + warnings.warn(_NO_TQDM_ERROR, UserWarning, stacklevel=3) + return None + + description = "Downloading" + unit = "rows" + + try: + if progress_bar_type == "tqdm": + return tqdm.tqdm(desc=description, total=self.total_rows, unit=unit) + elif progress_bar_type == "tqdm_notebook": + return tqdm.tqdm_notebook( + desc=description, total=self.total_rows, unit=unit + ) + elif progress_bar_type == "tqdm_gui": + return tqdm.tqdm_gui( + desc=description, total=self.total_rows, unit=unit + ) + except (KeyError, TypeError): + # Protect ourselves from any tqdm errors. In case of + # unexpected tqdm behavior, just fall back to showing + # no progress bar. + warnings.warn(_NO_TQDM_ERROR, UserWarning, stacklevel=3) + return None + + def to_dataframe(self, bqstorage_client=None, dtypes=None, progress_bar_type=None): """Create a pandas DataFrame by loading all pages of a query. - Args: bqstorage_client ( \ google.cloud.bigquery_storage_v1beta1.BigQueryStorageClient \ @@ -1413,6 +1459,26 @@ def to_dataframe(self, bqstorage_client=None, dtypes=None): provided ``dtype`` is used when constructing the series for the column specified. Otherwise, the default pandas behavior is used. + progress_bar_type (Optional[str]): + If set, use the `tqdm `_ library to + display a progress bar while the data downloads. Install the + ``tqdm`` package to use this feature. + + Possible values of ``progress_bar_type`` include: + + ``None`` + No progress bar. + ``'tqdm'`` + Use the :func:`tqdm.tqdm` function to print a progress bar + to :data:`sys.stderr`. + ``'tqdm_notebook'`` + Use the :func:`tqdm.tqdm_notebook` function to display a + progress bar as a Jupyter notebook widget. + ``'tqdm_gui'`` + Use the :func:`tqdm.tqdm_gui` function to display a + progress bar as a graphical dialog box. + + ..versionadded:: 1.11.0 Returns: pandas.DataFrame: @@ -1429,10 +1495,12 @@ def to_dataframe(self, bqstorage_client=None, dtypes=None): if dtypes is None: dtypes = {} + progress_bar = self._get_progress_bar(progress_bar_type) + if bqstorage_client is not None: return self._to_dataframe_bqstorage(bqstorage_client, dtypes) else: - return self._to_dataframe_tabledata_list(dtypes) + return self._to_dataframe_tabledata_list(dtypes, progress_bar=progress_bar) class _EmptyRowIterator(object): diff --git a/packages/google-cloud-bigquery/noxfile.py b/packages/google-cloud-bigquery/noxfile.py index 82846604306e..2c11f5b67056 100644 --- a/packages/google-cloud-bigquery/noxfile.py +++ b/packages/google-cloud-bigquery/noxfile.py @@ -44,9 +44,9 @@ def default(session): # Pyarrow does not support Python 3.7 if session.python == '3.7': - dev_install = '.[pandas]' + dev_install = '.[pandas, tqdm]' else: - dev_install = '.[pandas, pyarrow]' + dev_install = '.[pandas, pyarrow, tqdm]' session.install('-e', dev_install) # IPython does not support Python 2 after version 5.x diff --git a/packages/google-cloud-bigquery/setup.py b/packages/google-cloud-bigquery/setup.py index 6b4edaf561c0..696e093cd6ff 100644 --- a/packages/google-cloud-bigquery/setup.py +++ b/packages/google-cloud-bigquery/setup.py @@ -39,6 +39,7 @@ # Exclude PyArrow dependency from Windows Python 2.7. 'pyarrow: platform_system != "Windows" or python_version >= "3.4"': 'pyarrow>=0.4.1', + 'tqdm': 'tqdm >= 4.0.0, <5.0.0dev', 'fastparquet': ['fastparquet', 'python-snappy'], } diff --git a/packages/google-cloud-bigquery/tests/unit/test_table.py b/packages/google-cloud-bigquery/tests/unit/test_table.py index d9ba9db3f05d..4500856ec2a4 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_table.py +++ b/packages/google-cloud-bigquery/tests/unit/test_table.py @@ -15,6 +15,7 @@ import itertools import json import unittest +import warnings import mock import pytest @@ -29,6 +30,11 @@ except (ImportError, AttributeError): # pragma: NO COVER pandas = None +try: + from tqdm import tqdm +except (ImportError, AttributeError): # pragma: NO COVER + tqdm = None + from google.cloud.bigquery.dataset import DatasetReference @@ -901,7 +907,6 @@ def test_time_partitioning_setter_none(self): self.assertIsNone(table.time_partitioning) def test_partitioning_type_setter(self): - import warnings from google.cloud.bigquery.table import TimePartitioningType dataset = DatasetReference(self.PROJECT, self.DS_ID) @@ -920,7 +925,6 @@ def test_partitioning_type_setter(self): self.assertIs(warning.category, PendingDeprecationWarning) def test_partitioning_type_setter_w_time_partitioning_set(self): - import warnings from google.cloud.bigquery.table import TimePartitioning dataset = DatasetReference(self.PROJECT, self.DS_ID) @@ -938,7 +942,6 @@ def test_partitioning_type_setter_w_time_partitioning_set(self): self.assertIs(warning.category, PendingDeprecationWarning) def test_partitioning_expiration_setter_w_time_partitioning_set(self): - import warnings from google.cloud.bigquery.table import TimePartitioning dataset = DatasetReference(self.PROJECT, self.DS_ID) @@ -956,8 +959,6 @@ def test_partitioning_expiration_setter_w_time_partitioning_set(self): self.assertIs(warning.category, PendingDeprecationWarning) def test_partition_expiration_setter(self): - import warnings - dataset = DatasetReference(self.PROJECT, self.DS_ID) table_ref = dataset.table(self.TABLE_NAME) table = self._make_one(table_ref) @@ -1112,8 +1113,6 @@ def _make_one(self, *args, **kw): return self._get_target_class()(*args, **kw) def test_ctor(self): - import warnings - project = "test-project" dataset_id = "test_dataset" table_id = "coffee_table" @@ -1191,8 +1190,6 @@ def test_ctor_view(self): self.assertTrue(table.view_use_legacy_sql) def test_ctor_missing_properties(self): - import warnings - resource = { "tableReference": { "projectId": "testproject", @@ -1413,6 +1410,129 @@ def test_to_dataframe(self): self.assertEqual(df.name.dtype.name, "object") self.assertEqual(df.age.dtype.name, "int64") + @unittest.skipIf(pandas is None, "Requires `pandas`") + @unittest.skipIf(tqdm is None, "Requires `tqdm`") + @mock.patch("tqdm.tqdm_gui") + @mock.patch("tqdm.tqdm_notebook") + @mock.patch("tqdm.tqdm") + def test_to_dataframe_progress_bar( + self, tqdm_mock, tqdm_notebook_mock, tqdm_gui_mock + ): + from google.cloud.bigquery.table import RowIterator + from google.cloud.bigquery.table import SchemaField + + schema = [ + SchemaField("name", "STRING", mode="REQUIRED"), + SchemaField("age", "INTEGER", mode="REQUIRED"), + ] + rows = [ + {"f": [{"v": "Phred Phlyntstone"}, {"v": "32"}]}, + {"f": [{"v": "Bharney Rhubble"}, {"v": "33"}]}, + {"f": [{"v": "Wylma Phlyntstone"}, {"v": "29"}]}, + {"f": [{"v": "Bhettye Rhubble"}, {"v": "27"}]}, + ] + path = "/foo" + api_request = mock.Mock(return_value={"rows": rows}) + + progress_bars = ( + ("tqdm", tqdm_mock), + ("tqdm_notebook", tqdm_notebook_mock), + ("tqdm_gui", tqdm_gui_mock), + ) + + for progress_bar_type, progress_bar_mock in progress_bars: + row_iterator = RowIterator(_mock_client(), api_request, path, schema) + df = row_iterator.to_dataframe(progress_bar_type=progress_bar_type) + + progress_bar_mock.assert_called() + progress_bar_mock().update.assert_called() + self.assertEqual(len(df), 4) + + @unittest.skipIf(pandas is None, "Requires `pandas`") + @mock.patch("google.cloud.bigquery.table.tqdm", new=None) + def test_to_dataframe_no_tqdm_no_progress_bar(self): + from google.cloud.bigquery.table import RowIterator + from google.cloud.bigquery.table import SchemaField + + schema = [ + SchemaField("name", "STRING", mode="REQUIRED"), + SchemaField("age", "INTEGER", mode="REQUIRED"), + ] + rows = [ + {"f": [{"v": "Phred Phlyntstone"}, {"v": "32"}]}, + {"f": [{"v": "Bharney Rhubble"}, {"v": "33"}]}, + {"f": [{"v": "Wylma Phlyntstone"}, {"v": "29"}]}, + {"f": [{"v": "Bhettye Rhubble"}, {"v": "27"}]}, + ] + path = "/foo" + api_request = mock.Mock(return_value={"rows": rows}) + row_iterator = RowIterator(_mock_client(), api_request, path, schema) + + with warnings.catch_warnings(record=True) as warned: + df = row_iterator.to_dataframe() + + self.assertEqual(len(warned), 0) + self.assertEqual(len(df), 4) + + @unittest.skipIf(pandas is None, "Requires `pandas`") + @mock.patch("google.cloud.bigquery.table.tqdm", new=None) + def test_to_dataframe_no_tqdm(self): + from google.cloud.bigquery.table import RowIterator + from google.cloud.bigquery.table import SchemaField + + schema = [ + SchemaField("name", "STRING", mode="REQUIRED"), + SchemaField("age", "INTEGER", mode="REQUIRED"), + ] + rows = [ + {"f": [{"v": "Phred Phlyntstone"}, {"v": "32"}]}, + {"f": [{"v": "Bharney Rhubble"}, {"v": "33"}]}, + {"f": [{"v": "Wylma Phlyntstone"}, {"v": "29"}]}, + {"f": [{"v": "Bhettye Rhubble"}, {"v": "27"}]}, + ] + path = "/foo" + api_request = mock.Mock(return_value={"rows": rows}) + row_iterator = RowIterator(_mock_client(), api_request, path, schema) + + with warnings.catch_warnings(record=True) as warned: + df = row_iterator.to_dataframe(progress_bar_type="tqdm") + + self.assertEqual(len(warned), 1) + for warning in warned: + self.assertIs(warning.category, UserWarning) + + # Even though the progress bar won't show, downloading the dataframe + # should still work. + self.assertEqual(len(df), 4) + + @unittest.skipIf(pandas is None, "Requires `pandas`") + @unittest.skipIf(tqdm is None, "Requires `tqdm`") + @mock.patch("tqdm.tqdm_gui", new=None) # will raise TypeError on call + @mock.patch("tqdm.tqdm_notebook", new=None) # will raise TypeError on call + @mock.patch("tqdm.tqdm", new=None) # will raise TypeError on call + def test_to_dataframe_tqdm_error(self): + from google.cloud.bigquery.table import RowIterator + from google.cloud.bigquery.table import SchemaField + + schema = [ + SchemaField("name", "STRING", mode="REQUIRED"), + SchemaField("age", "INTEGER", mode="REQUIRED"), + ] + rows = [ + {"f": [{"v": "Phred Phlyntstone"}, {"v": "32"}]}, + {"f": [{"v": "Bharney Rhubble"}, {"v": "33"}]}, + {"f": [{"v": "Wylma Phlyntstone"}, {"v": "29"}]}, + {"f": [{"v": "Bhettye Rhubble"}, {"v": "27"}]}, + ] + path = "/foo" + + for progress_bar_type in ("tqdm", "tqdm_notebook", "tqdm_gui"): + api_request = mock.Mock(return_value={"rows": rows}) + row_iterator = RowIterator(_mock_client(), api_request, path, schema) + df = row_iterator.to_dataframe(progress_bar_type=progress_bar_type) + + self.assertEqual(len(df), 4) # all should be well + @unittest.skipIf(pandas is None, "Requires `pandas`") def test_to_dataframe_w_empty_results(self): from google.cloud.bigquery.table import RowIterator From 8b28574de03560e88ab576d7a54af00ad87bfeae Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Fri, 29 Mar 2019 20:26:53 -0700 Subject: [PATCH 0557/2016] Blacken noxfile and setup file in nox session (#7619) --- .../google/cloud/bigquery/table.py | 4 +- packages/google-cloud-bigquery/noxfile.py | 161 +++++++++--------- packages/google-cloud-bigquery/setup.py | 73 ++++---- 3 files changed, 114 insertions(+), 124 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py index dcb25d8bb3c6..2bff7ec58a84 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py @@ -1422,9 +1422,7 @@ def _get_progress_bar(self, progress_bar_type): desc=description, total=self.total_rows, unit=unit ) elif progress_bar_type == "tqdm_gui": - return tqdm.tqdm_gui( - desc=description, total=self.total_rows, unit=unit - ) + return tqdm.tqdm_gui(desc=description, total=self.total_rows, unit=unit) except (KeyError, TypeError): # Protect ourselves from any tqdm errors. In case of # unexpected tqdm behavior, just fall back to showing diff --git a/packages/google-cloud-bigquery/noxfile.py b/packages/google-cloud-bigquery/noxfile.py index 2c11f5b67056..0a1296aa4c73 100644 --- a/packages/google-cloud-bigquery/noxfile.py +++ b/packages/google-cloud-bigquery/noxfile.py @@ -21,13 +21,15 @@ LOCAL_DEPS = ( - os.path.join('..', 'api_core[grpc]'), - os.path.join('..', 'core'), + os.path.join("..", "api_core[grpc]"), + os.path.join("..", "core"), # TODO: Move bigquery_storage back to dev_install once dtypes feature is # released. Issue #7049 - os.path.join('..', 'bigquery_storage[pandas,fastavro]'), + os.path.join("..", "bigquery_storage[pandas,fastavro]"), ) +BLACK_PATHS = ("google", "tests", "docs", "noxfile.py", "setup.py") + def default(session): """Default unit test session. @@ -38,112 +40,108 @@ def default(session): run the tests. """ # Install all test dependencies, then install local packages in-place. - session.install('mock', 'pytest', 'pytest-cov') + session.install("mock", "pytest", "pytest-cov") for local_dep in LOCAL_DEPS: - session.install('-e', local_dep) + session.install("-e", local_dep) # Pyarrow does not support Python 3.7 - if session.python == '3.7': - dev_install = '.[pandas, tqdm]' + if session.python == "3.7": + dev_install = ".[pandas, tqdm]" else: - dev_install = '.[pandas, pyarrow, tqdm]' - session.install('-e', dev_install) + dev_install = ".[pandas, pyarrow, tqdm]" + session.install("-e", dev_install) # IPython does not support Python 2 after version 5.x - if session.python == '2.7': - session.install('ipython==5.5') + if session.python == "2.7": + session.install("ipython==5.5") else: - session.install('ipython') + session.install("ipython") # Run py.test against the unit tests. session.run( - 'py.test', - '--quiet', - '--cov=google.cloud.bigquery', - '--cov=tests.unit', - '--cov-append', - '--cov-config=.coveragerc', - '--cov-report=', - '--cov-fail-under=97', - os.path.join('tests', 'unit'), + "py.test", + "--quiet", + "--cov=google.cloud.bigquery", + "--cov=tests.unit", + "--cov-append", + "--cov-config=.coveragerc", + "--cov-report=", + "--cov-fail-under=97", + os.path.join("tests", "unit"), *session.posargs ) -@nox.session(python=['2.7', '3.5', '3.6', '3.7']) +@nox.session(python=["2.7", "3.5", "3.6", "3.7"]) def unit(session): """Run the unit test suite.""" default(session) -@nox.session(python=['2.7', '3.6']) +@nox.session(python=["2.7", "3.6"]) def system(session): """Run the system test suite.""" # Sanity check: Only run system tests if the environment variable is set. - if not os.environ.get('GOOGLE_APPLICATION_CREDENTIALS', ''): - session.skip('Credentials must be set via environment variable.') + if not os.environ.get("GOOGLE_APPLICATION_CREDENTIALS", ""): + session.skip("Credentials must be set via environment variable.") # Use pre-release gRPC for system tests. - session.install('--pre', 'grpcio') + session.install("--pre", "grpcio") # Install all test dependencies, then install local packages in place. - session.install('mock', 'pytest') + session.install("mock", "pytest") for local_dep in LOCAL_DEPS: - session.install('-e', local_dep) - session.install('-e', os.path.join('..', 'storage')) - session.install('-e', os.path.join('..', 'test_utils')) - session.install('-e', '.[pandas]') + session.install("-e", local_dep) + session.install("-e", os.path.join("..", "storage")) + session.install("-e", os.path.join("..", "test_utils")) + session.install("-e", ".[pandas]") # IPython does not support Python 2 after version 5.x - if session.python == '2.7': - session.install('ipython==5.5') + if session.python == "2.7": + session.install("ipython==5.5") else: - session.install('ipython') + session.install("ipython") # Run py.test against the system tests. session.run( - 'py.test', - '--quiet', - os.path.join('tests', 'system.py'), - *session.posargs + "py.test", "--quiet", os.path.join("tests", "system.py"), *session.posargs ) -@nox.session(python=['2.7', '3.6']) +@nox.session(python=["2.7", "3.6"]) def snippets(session): """Run the snippets test suite.""" # Sanity check: Only run snippets tests if the environment variable is set. - if not os.environ.get('GOOGLE_APPLICATION_CREDENTIALS', ''): - session.skip('Credentials must be set via environment variable.') + if not os.environ.get("GOOGLE_APPLICATION_CREDENTIALS", ""): + session.skip("Credentials must be set via environment variable.") # Install all test dependencies, then install local packages in place. - session.install('mock', 'pytest') + session.install("mock", "pytest") for local_dep in LOCAL_DEPS: - session.install('-e', local_dep) - session.install('-e', os.path.join('..', 'storage')) - session.install('-e', os.path.join('..', 'test_utils')) - session.install('-e', '.[pandas, pyarrow, fastparquet]') + session.install("-e", local_dep) + session.install("-e", os.path.join("..", "storage")) + session.install("-e", os.path.join("..", "test_utils")) + session.install("-e", ".[pandas, pyarrow, fastparquet]") # Run py.test against the snippets tests. - session.run( - 'py.test', os.path.join('docs', 'snippets.py'), *session.posargs) + session.run("py.test", os.path.join("docs", "snippets.py"), *session.posargs) -@nox.session(python='3.6') +@nox.session(python="3.6") def cover(session): """Run the final coverage report. This outputs the coverage report aggregating coverage from the unit test runs (not system test runs), and then erases coverage data. """ - session.install('coverage', 'pytest-cov') - session.run('coverage', 'report', '--show-missing', '--fail-under=100') - session.run('coverage', 'erase') + session.install("coverage", "pytest-cov") + session.run("coverage", "report", "--show-missing", "--fail-under=100") + session.run("coverage", "erase") -@nox.session(python='3.6') +@nox.session(python="3.6") def lint(session): """Run linters. @@ -151,22 +149,20 @@ def lint(session): serious code quality issues. """ - session.install('flake8', *LOCAL_DEPS) - session.install('.') - session.run('flake8', os.path.join('google', 'cloud', 'bigquery')) - session.run('flake8', 'tests') - session.run( - 'flake8', os.path.join('docs', 'snippets.py')) + session.install("black", "flake8", *LOCAL_DEPS) + session.install(".") + session.run("flake8", os.path.join("google", "cloud", "bigquery")) + session.run("flake8", "tests") + session.run("flake8", os.path.join("docs", "snippets.py")) + session.run("black", "--check", *BLACK_PATHS) -@nox.session(python='3.6') +@nox.session(python="3.6") def lint_setup_py(session): """Verify that setup.py is valid (including RST check).""" - session.install('docutils', 'Pygments') - session.run( - 'python', 'setup.py', 'check', '--restructuredtext', '--strict') - + session.install("docutils", "Pygments") + session.run("python", "setup.py", "check", "--restructuredtext", "--strict") @nox.session(python="3.6") @@ -175,32 +171,29 @@ def blacken(session): Format code to uniform standard. """ session.install("black") - session.run( - "black", - "google", - "tests", - "docs", - ) + session.run("black", *BLACK_PATHS) -@nox.session(python='3.6') +@nox.session(python="3.6") def docs(session): """Build the docs.""" - session.install('ipython', 'recommonmark', 'sphinx', 'sphinx_rtd_theme') + session.install("ipython", "recommonmark", "sphinx", "sphinx_rtd_theme") for local_dep in LOCAL_DEPS: - session.install('-e', local_dep) - session.install('-e', os.path.join('..', 'storage')) - session.install('-e', '.[pandas, pyarrow]') + session.install("-e", local_dep) + session.install("-e", os.path.join("..", "storage")) + session.install("-e", ".[pandas, pyarrow]") - shutil.rmtree(os.path.join('docs', '_build'), ignore_errors=True) + shutil.rmtree(os.path.join("docs", "_build"), ignore_errors=True) session.run( - 'sphinx-build', - '-W', # warnings as errors - '-T', # show full traceback on exception - '-N', # no colors - '-b', 'html', - '-d', os.path.join('docs', '_build', 'doctrees', ''), - os.path.join('docs', ''), - os.path.join('docs', '_build', 'html', ''), + "sphinx-build", + "-W", # warnings as errors + "-T", # show full traceback on exception + "-N", # no colors + "-b", + "html", + "-d", + os.path.join("docs", "_build", "doctrees", ""), + os.path.join("docs", ""), + os.path.join("docs", "_build", "html", ""), ) diff --git a/packages/google-cloud-bigquery/setup.py b/packages/google-cloud-bigquery/setup.py index 696e093cd6ff..1badaae98175 100644 --- a/packages/google-cloud-bigquery/setup.py +++ b/packages/google-cloud-bigquery/setup.py @@ -20,27 +20,26 @@ # Package metadata. -name = 'google-cloud-bigquery' -description = 'Google BigQuery API client library' -version = '1.10.0' +name = "google-cloud-bigquery" +description = "Google BigQuery API client library" +version = "1.10.0" # Should be one of: # 'Development Status :: 3 - Alpha' # 'Development Status :: 4 - Beta' # 'Development Status :: 5 - Production/Stable' -release_status = 'Development Status :: 5 - Production/Stable' +release_status = "Development Status :: 5 - Production/Stable" dependencies = [ - 'google-api-core >= 1.6.0, < 2.0.0dev', - 'google-cloud-core >= 0.29.0, < 0.30dev', - 'google-resumable-media >= 0.3.1', + "google-api-core >= 1.6.0, < 2.0.0dev", + "google-cloud-core >= 0.29.0, < 0.30dev", + "google-resumable-media >= 0.3.1", ] extras = { - 'bqstorage': 'google-cloud-bigquery-storage >= 0.2.0dev1, <2.0.0dev', - 'pandas': 'pandas>=0.17.1', + "bqstorage": "google-cloud-bigquery-storage >= 0.2.0dev1, <2.0.0dev", + "pandas": "pandas>=0.17.1", # Exclude PyArrow dependency from Windows Python 2.7. - 'pyarrow: platform_system != "Windows" or python_version >= "3.4"': - 'pyarrow>=0.4.1', - 'tqdm': 'tqdm >= 4.0.0, <5.0.0dev', - 'fastparquet': ['fastparquet', 'python-snappy'], + 'pyarrow: platform_system != "Windows" or python_version >= "3.4"': "pyarrow>=0.4.1", + "tqdm": "tqdm >= 4.0.0, <5.0.0dev", + "fastparquet": ["fastparquet", "python-snappy"], } @@ -48,20 +47,20 @@ package_root = os.path.abspath(os.path.dirname(__file__)) -readme_filename = os.path.join(package_root, 'README.rst') -with io.open(readme_filename, encoding='utf-8') as readme_file: +readme_filename = os.path.join(package_root, "README.rst") +with io.open(readme_filename, encoding="utf-8") as readme_file: readme = readme_file.read() # Only include packages under the 'google' namespace. Do not include tests, # benchmarks, etc. packages = [ - package for package in setuptools.find_packages() - if package.startswith('google')] + package for package in setuptools.find_packages() if package.startswith("google") +] # Determine which namespaces are needed. -namespaces = ['google'] -if 'google.cloud' in packages: - namespaces.append('google.cloud') +namespaces = ["google"] +if "google.cloud" in packages: + namespaces.append("google.cloud") setuptools.setup( @@ -69,30 +68,30 @@ version=version, description=description, long_description=readme, - author='Google LLC', - author_email='googleapis-packages@google.com', - license='Apache 2.0', - url='https://github.com/GoogleCloudPlatform/google-cloud-python', + author="Google LLC", + author_email="googleapis-packages@google.com", + license="Apache 2.0", + url="https://github.com/GoogleCloudPlatform/google-cloud-python", classifiers=[ release_status, - 'Intended Audience :: Developers', - 'License :: OSI Approved :: Apache Software License', - 'Programming Language :: Python', - 'Programming Language :: Python :: 2', - 'Programming Language :: Python :: 2.7', - 'Programming Language :: Python :: 3', - 'Programming Language :: Python :: 3.5', - 'Programming Language :: Python :: 3.6', - 'Programming Language :: Python :: 3.7', - 'Operating System :: OS Independent', - 'Topic :: Internet', + "Intended Audience :: Developers", + "License :: OSI Approved :: Apache Software License", + "Programming Language :: Python", + "Programming Language :: Python :: 2", + "Programming Language :: Python :: 2.7", + "Programming Language :: Python :: 3", + "Programming Language :: Python :: 3.5", + "Programming Language :: Python :: 3.6", + "Programming Language :: Python :: 3.7", + "Operating System :: OS Independent", + "Topic :: Internet", ], - platforms='Posix; MacOS X; Windows', + platforms="Posix; MacOS X; Windows", packages=packages, namespace_packages=namespaces, install_requires=dependencies, extras_require=extras, - python_requires='>=2.7,!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*', + python_requires=">=2.7,!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*", include_package_data=True, zip_safe=False, ) From 5b1cb06e2cf13d078791d9379e177e2e61cb687a Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Fri, 29 Mar 2019 20:55:37 -0700 Subject: [PATCH 0558/2016] Actually use the 'progress_bar_type' argument in 'QueryJob.to_dataframe()'. (#7616) --- .../google/cloud/bigquery/job.py | 4 ++- .../google/cloud/bigquery/table.py | 4 ++- .../tests/unit/test_job.py | 35 +++++++++++++++++++ 3 files changed, 41 insertions(+), 2 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/job.py b/packages/google-cloud-bigquery/google/cloud/bigquery/job.py index bc87f109a484..94a2290cc29e 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/job.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/job.py @@ -2857,7 +2857,9 @@ def to_dataframe(self, bqstorage_client=None, dtypes=None, progress_bar_type=Non ValueError: If the `pandas` library cannot be imported. """ return self.result().to_dataframe( - bqstorage_client=bqstorage_client, dtypes=dtypes + bqstorage_client=bqstorage_client, + dtypes=dtypes, + progress_bar_type=progress_bar_type, ) def __iter__(self): diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py index 2bff7ec58a84..5b0ad45af4be 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py @@ -1513,7 +1513,7 @@ class _EmptyRowIterator(object): pages = () total_rows = 0 - def to_dataframe(self, bqstorage_client=None, dtypes=None): + def to_dataframe(self, bqstorage_client=None, dtypes=None, progress_bar_type=None): """Create an empty dataframe. Args: @@ -1521,6 +1521,8 @@ def to_dataframe(self, bqstorage_client=None, dtypes=None): Ignored. Added for compatibility with RowIterator. dtypes (Any): Ignored. Added for compatibility with RowIterator. + progress_bar_type (Any): + Ignored. Added for compatibility with RowIterator. Returns: pandas.DataFrame: diff --git a/packages/google-cloud-bigquery/tests/unit/test_job.py b/packages/google-cloud-bigquery/tests/unit/test_job.py index 833081ce066d..a42d9ffc311c 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_job.py +++ b/packages/google-cloud-bigquery/tests/unit/test_job.py @@ -27,6 +27,10 @@ from google.cloud import bigquery_storage_v1beta1 except (ImportError, AttributeError): # pragma: NO COVER bigquery_storage_v1beta1 = None +try: + from tqdm import tqdm +except (ImportError, AttributeError): # pragma: NO COVER + tqdm = None def _make_credentials(): @@ -4699,6 +4703,37 @@ def test_to_dataframe_column_dtypes(self): self.assertEqual(df.complete.dtype.name, "bool") self.assertEqual(df.date.dtype.name, "object") + @unittest.skipIf(pandas is None, "Requires `pandas`") + @unittest.skipIf(tqdm is None, "Requires `tqdm`") + @mock.patch("tqdm.tqdm") + def test_to_dataframe_with_progress_bar(self, tqdm_mock): + begun_resource = self._make_resource() + query_resource = { + "jobComplete": True, + "jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID}, + "totalRows": "4", + "schema": { + "fields": [{"name": "name", "type": "STRING", "mode": "NULLABLE"}] + }, + } + done_resource = copy.deepcopy(begun_resource) + done_resource["status"] = {"state": "DONE"} + connection = _make_connection( + begun_resource, + query_resource, + done_resource, + query_resource, + query_resource, + ) + client = _make_client(project=self.PROJECT, connection=connection) + job = self._make_one(self.JOB_ID, self.QUERY, client) + + job.to_dataframe(progress_bar_type=None) + tqdm_mock.assert_not_called() + + job.to_dataframe(progress_bar_type="tqdm") + tqdm_mock.assert_called() + def test_iter(self): import types From 5752801f0addfcfac320511eed82164acead8c12 Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Mon, 1 Apr 2019 08:59:43 -0700 Subject: [PATCH 0559/2016] Call get_table in list_rows if the schema is not available (#7621) This is kinder than raising an error message saying to call get_table yourself. Also, it guarantees the schema is as up-to-date as possible. This also fixes an issue where rows could not be listed on the TableListItem objects that are returned from list_tables. --- .../google/cloud/bigquery/client.py | 23 +++--- .../google/cloud/bigquery/table.py | 4 ++ .../tests/unit/test_client.py | 70 ++++++++++++++++--- 3 files changed, 77 insertions(+), 20 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py index fd69502f6ea1..b7c6a70b27a8 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py @@ -1694,15 +1694,19 @@ def list_rows( Args: table (Union[ \ :class:`~google.cloud.bigquery.table.Table`, \ + :class:`~google.cloud.bigquery.table.TableListItem`, \ :class:`~google.cloud.bigquery.table.TableReference`, \ str, \ ]): - The table to list, or a reference to it. + The table to list, or a reference to it. When the table + object does not contain a schema and ``selected_fields`` is + not supplied, this method calls ``get_table`` to fetch the + table schema. selected_fields (Sequence[ \ :class:`~google.cloud.bigquery.schema.SchemaField` \ ]): - The fields to return. Required if ``table`` is a - :class:`~google.cloud.bigquery.table.TableReference`. + The fields to return. If not supplied, data for all columns + are downloaded. max_results (int): (Optional) maximum number of rows to return. page_token (str): @@ -1741,14 +1745,11 @@ def list_rows( if selected_fields is not None: schema = selected_fields - # Allow listing rows of an empty table by not raising if the table exists. - elif len(schema) == 0 and table.created is None: - raise ValueError( - ( - "Could not determine schema for table '{}'. Call client.get_table() " - "or pass in a list of schema fields to the selected_fields argument." - ).format(table) - ) + # No schema, but no selected_fields. Assume the developer wants all + # columns, so get the table resource for them rather than failing. + elif len(schema) == 0: + table = self.get_table(table.reference, retry=retry) + schema = table.schema params = {} if selected_fields is not None: diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py index 5b0ad45af4be..ab22407eff1a 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py @@ -1752,5 +1752,9 @@ def _table_arg_to_table(value, default_project=None): value = TableReference.from_string(value, default_project=default_project) if isinstance(value, TableReference): value = Table(value) + if isinstance(value, TableListItem): + newvalue = Table(value.reference) + newvalue._properties = value._properties + value = newvalue return value diff --git a/packages/google-cloud-bigquery/tests/unit/test_client.py b/packages/google-cloud-bigquery/tests/unit/test_client.py index 780f13bcbe3e..671bbdf29778 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_client.py +++ b/packages/google-cloud-bigquery/tests/unit/test_client.py @@ -4276,20 +4276,72 @@ def test_list_rows_w_record_schema(self): method="GET", path="/%s" % PATH, query_params={} ) - def test_list_rows_errors(self): - from google.cloud.bigquery.table import Table + def test_list_rows_with_missing_schema(self): + from google.cloud.bigquery.table import Table, TableListItem + + table_path = "/projects/{}/datasets/{}/tables/{}".format( + self.PROJECT, self.DS_ID, self.TABLE_ID + ) + tabledata_path = "{}/data".format(table_path) + + table_list_item_data = { + "id": "%s:%s:%s" % (self.PROJECT, self.DS_ID, self.TABLE_ID), + "tableReference": { + "projectId": self.PROJECT, + "datasetId": self.DS_ID, + "tableId": self.TABLE_ID, + }, + } + table_data = copy.deepcopy(table_list_item_data) + # Intentionally make wrong, since total_rows can update during iteration. + table_data["numRows"] = 2 + table_data["schema"] = { + "fields": [ + {"name": "name", "type": "STRING"}, + {"name": "age", "type": "INTEGER"}, + ] + } + rows_data = { + "totalRows": 3, + "pageToken": None, + "rows": [ + {"f": [{"v": "Phred Phlyntstone"}, {"v": "32"}]}, + {"f": [{"v": "Bharney Rhubble"}, {"v": "31"}]}, + {"f": [{"v": "Wylma Phlyntstone"}, {"v": None}]}, + ], + } creds = _make_credentials() http = object() - client = self._make_one(project=self.PROJECT, credentials=creds, _http=http) - # table ref with no selected fields - with self.assertRaises(ValueError): - client.list_rows(self.TABLE_REF) + schemaless_tables = ( + "{}.{}".format(self.DS_ID, self.TABLE_ID), + self.TABLE_REF, + Table(self.TABLE_REF), + TableListItem(table_list_item_data), + ) - # table with no schema - with self.assertRaises(ValueError): - client.list_rows(Table(self.TABLE_REF)) + for table in schemaless_tables: + client = self._make_one(project=self.PROJECT, credentials=creds, _http=http) + conn = client._connection = _make_connection(table_data, rows_data) + + row_iter = client.list_rows(table) + + conn.api_request.assert_called_once_with(method="GET", path=table_path) + conn.api_request.reset_mock() + self.assertIsNone(row_iter.total_rows, msg=repr(table)) + + rows = list(row_iter) + conn.api_request.assert_called_once_with( + method="GET", path=tabledata_path, query_params={} + ) + self.assertEqual(row_iter.total_rows, 3, msg=repr(table)) + self.assertEqual(rows[0].name, "Phred Phlyntstone", msg=repr(table)) + self.assertEqual(rows[1].age, 31, msg=repr(table)) + self.assertIsNone(rows[2].age, msg=repr(table)) + + def test_list_rows_error(self): + client = self._make_one() # neither Table nor tableReference with self.assertRaises(TypeError): From 2e736456b173b6b06bc2f026edda5e48c97348b1 Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Mon, 1 Apr 2019 14:01:37 -0700 Subject: [PATCH 0560/2016] Fallback to BQ API when there are problems reading from BQ Storage. (#7633) The tabledata.list API works for more kinds of tables, including small anonymous query results tables. By falling back to this API, we enable a developer to always specify a `bqstorage_client` even when they aren't writing their query results to a destination table and don't know how large their query results will be. --- .../google/cloud/bigquery/table.py | 21 +++++-- .../google-cloud-bigquery/tests/system.py | 61 +++++++++++-------- .../tests/unit/test_table.py | 45 ++++++++++++++ 3 files changed, 94 insertions(+), 33 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py index ab22407eff1a..ae090a4b0b83 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py @@ -35,6 +35,7 @@ except ImportError: # pragma: NO COVER tqdm = None +import google.api_core.exceptions from google.api_core.page_iterator import HTTPIterator import google.cloud._helpers @@ -1437,7 +1438,7 @@ def to_dataframe(self, bqstorage_client=None, dtypes=None, progress_bar_type=Non bqstorage_client ( \ google.cloud.bigquery_storage_v1beta1.BigQueryStorageClient \ ): - **Alpha Feature** Optional. A BigQuery Storage API client. If + **Beta Feature** Optional. A BigQuery Storage API client. If supplied, use the faster BigQuery Storage API to fetch rows from BigQuery. This API is a billable API. @@ -1448,8 +1449,9 @@ def to_dataframe(self, bqstorage_client=None, dtypes=None, progress_bar_type=Non currently supported by this method. **Caution**: There is a known issue reading small anonymous - query result tables with the BQ Storage API. Write your query - results to a destination table to work around this issue. + query result tables with the BQ Storage API. When a problem + is encountered reading a table, the tabledata.list method + from the BigQuery API is used, instead. dtypes ( \ Map[str, Union[str, pandas.Series.dtype]] \ ): @@ -1496,9 +1498,16 @@ def to_dataframe(self, bqstorage_client=None, dtypes=None, progress_bar_type=Non progress_bar = self._get_progress_bar(progress_bar_type) if bqstorage_client is not None: - return self._to_dataframe_bqstorage(bqstorage_client, dtypes) - else: - return self._to_dataframe_tabledata_list(dtypes, progress_bar=progress_bar) + try: + return self._to_dataframe_bqstorage(bqstorage_client, dtypes) + except google.api_core.exceptions.GoogleAPICallError: + # There is a known issue with reading from small anonymous + # query results tables, so some errors are expected. Rather + # than throw those errors, try reading the DataFrame again, but + # with the tabledata.list API. + pass + + return self._to_dataframe_tabledata_list(dtypes, progress_bar=progress_bar) class _EmptyRowIterator(object): diff --git a/packages/google-cloud-bigquery/tests/system.py b/packages/google-cloud-bigquery/tests/system.py index 04c4c3bf18fd..cceca192b8f7 100644 --- a/packages/google-cloud-bigquery/tests/system.py +++ b/packages/google-cloud-bigquery/tests/system.py @@ -1540,35 +1540,42 @@ def test_query_results_to_dataframe_w_bqstorage(self): bqstorage_client = bigquery_storage_v1beta1.BigQueryStorageClient( credentials=Config.CLIENT._credentials ) - df = ( - Config.CLIENT.query( - query, - # There is a known issue reading small anonymous query result - # tables with the BQ Storage API. Writing to a destination - # table works around this issue. - job_config=bigquery.QueryJobConfig( - destination=dest_ref, write_disposition="WRITE_TRUNCATE" - ), - ) - .result() - .to_dataframe(bqstorage_client) + + job_configs = ( + # There is a known issue reading small anonymous query result + # tables with the BQ Storage API. Writing to a destination + # table works around this issue. + bigquery.QueryJobConfig( + destination=dest_ref, write_disposition="WRITE_TRUNCATE" + ), + # Check that the client is able to work around the issue with + # reading small anonymous query result tables by falling back to + # the tabledata.list API. + None, ) - self.assertIsInstance(df, pandas.DataFrame) - self.assertEqual(len(df), 10) # verify the number of rows - column_names = ["id", "author", "time_ts", "dead"] - self.assertEqual(list(df), column_names) - exp_datatypes = { - "id": int, - "author": six.text_type, - "time_ts": pandas.Timestamp, - "dead": bool, - } - for index, row in df.iterrows(): - for col in column_names: - # all the schema fields are nullable, so None is acceptable - if not row[col] is None: - self.assertIsInstance(row[col], exp_datatypes[col]) + for job_config in job_configs: + df = ( + Config.CLIENT.query(query, job_config=job_config) + .result() + .to_dataframe(bqstorage_client) + ) + + self.assertIsInstance(df, pandas.DataFrame) + self.assertEqual(len(df), 10) # verify the number of rows + column_names = ["id", "author", "time_ts", "dead"] + self.assertEqual(list(df), column_names) + exp_datatypes = { + "id": int, + "author": six.text_type, + "time_ts": pandas.Timestamp, + "dead": bool, + } + for index, row in df.iterrows(): + for col in column_names: + # all the schema fields are nullable, so None is acceptable + if not row[col] is None: + self.assertIsInstance(row[col], exp_datatypes[col]) def test_insert_rows_nested_nested(self): # See #2951 diff --git a/packages/google-cloud-bigquery/tests/unit/test_table.py b/packages/google-cloud-bigquery/tests/unit/test_table.py index 4500856ec2a4..b0a1318c6f11 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_table.py +++ b/packages/google-cloud-bigquery/tests/unit/test_table.py @@ -21,10 +21,13 @@ import pytest import six +import google.api_core.exceptions + try: from google.cloud import bigquery_storage_v1beta1 except ImportError: # pragma: NO COVER bigquery_storage_v1beta1 = None + try: import pandas except (ImportError, AttributeError): # pragma: NO COVER @@ -1748,6 +1751,48 @@ def test_to_dataframe_w_bqstorage_nonempty(self): self.assertEqual(list(got), column_names) self.assertEqual(len(got.index), 2) + @unittest.skipIf(pandas is None, "Requires `pandas`") + @unittest.skipIf( + bigquery_storage_v1beta1 is None, "Requires `google-cloud-bigquery-storage`" + ) + def test_to_dataframe_w_bqstorage_fallback_to_tabledata_list(self): + from google.cloud.bigquery import schema + from google.cloud.bigquery import table as mut + + bqstorage_client = mock.create_autospec( + bigquery_storage_v1beta1.BigQueryStorageClient + ) + bqstorage_client.create_read_session.side_effect = google.api_core.exceptions.InternalServerError( + "can't read with bqstorage_client" + ) + iterator_schema = [ + schema.SchemaField("name", "STRING", mode="REQUIRED"), + schema.SchemaField("age", "INTEGER", mode="REQUIRED"), + ] + rows = [ + {"f": [{"v": "Phred Phlyntstone"}, {"v": "32"}]}, + {"f": [{"v": "Bharney Rhubble"}, {"v": "33"}]}, + {"f": [{"v": "Wylma Phlyntstone"}, {"v": "29"}]}, + {"f": [{"v": "Bhettye Rhubble"}, {"v": "27"}]}, + ] + path = "/foo" + api_request = mock.Mock(return_value={"rows": rows}) + row_iterator = mut.RowIterator( + _mock_client(), + api_request, + path, + iterator_schema, + table=mut.Table("proj.dset.tbl"), + ) + + df = row_iterator.to_dataframe(bqstorage_client=bqstorage_client) + + self.assertIsInstance(df, pandas.DataFrame) + self.assertEqual(len(df), 4) # verify the number of rows + self.assertEqual(list(df), ["name", "age"]) # verify the column names + self.assertEqual(df.name.dtype.name, "object") + self.assertEqual(df.age.dtype.name, "int64") + @unittest.skipIf( bigquery_storage_v1beta1 is None, "Requires `google-cloud-bigquery-storage`" ) From d719d127287eb8b8f99a075d63e588591ab51d4b Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Wed, 3 Apr 2019 16:27:02 -0700 Subject: [PATCH 0561/2016] Add option to use BigQuery Storage API from IPython magics (#7640) * Add option to use BigQuery Storage API from IPython magics Construct and use a BigQuery Storage API client when either the context.use_bqstorage_api property is set, or the --use_bqstorage_api parameter is passed into the magic. * Add tests for when BigQuery Storage client isn't installed --- .../google/cloud/bigquery/magics.py | 53 ++++- .../tests/unit/test_magics.py | 184 +++++++++++++++++- 2 files changed, 235 insertions(+), 2 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/magics.py b/packages/google-cloud-bigquery/google/cloud/bigquery/magics.py index 4494d7ba5ad1..27626752d8e7 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/magics.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/magics.py @@ -32,6 +32,11 @@ * ``--project `` (optional, line argument): Project to use for running the query. Defaults to the context :attr:`~google.cloud.bigquery.magics.Context.project`. + * ``--use_bqstorage_api`` (optional, line argument): + Downloads the DataFrame using the BigQuery Storage API. To use this + option, install the ``google-cloud-bigquery-storage`` and ``fastavro`` + packages, and `enable the BigQuery Storage API + `_. * ``--use_legacy_sql`` (optional, line argument): Runs the query using Legacy SQL syntax. Defaults to Standard SQL if this argument not used. @@ -134,6 +139,10 @@ from IPython.core import magic_arguments except ImportError: # pragma: NO COVER raise ImportError("This module can only be loaded in IPython.") +try: + from google.cloud import bigquery_storage_v1beta1 +except ImportError: # pragma: NO COVER + bigquery_storage_v1beta1 = None import google.auth from google.cloud import bigquery @@ -150,6 +159,7 @@ class Context(object): def __init__(self): self._credentials = None self._project = None + self._use_bqstorage_api = None @property def credentials(self): @@ -211,6 +221,21 @@ def project(self): def project(self, value): self._project = value + @property + def use_bqstorage_api(self): + """bool: [Beta] Set to True to use the BigQuery Storage API to + download query results + + To use this option, install the ``google-cloud-bigquery-storage`` and + ``fastavro`` packages, and `enable the BigQuery Storage API + `_. + """ + return self._use_bqstorage_api + + @use_bqstorage_api.setter + def use_bqstorage_api(self, value): + self._use_bqstorage_api = value + context = Context() @@ -274,6 +299,16 @@ def _run_query(client, query, job_config=None): "Standard SQL if this argument is not used." ), ) +@magic_arguments.argument( + "--use_bqstorage_api", + action="store_true", + default=False, + help=( + "[Beta] Use the BigQuery Storage API to download large query results. " + "To use this option, install the google-cloud-bigquery-storage and " + "fastavro packages, and enable the BigQuery Storage API." + ), +) @magic_arguments.argument( "--verbose", action="store_true", @@ -328,6 +363,9 @@ def _cell_magic(line, query): project = args.project or context.project client = bigquery.Client(project=project, credentials=context.credentials) + bqstorage_client = _make_bqstorage_client( + args.use_bqstorage_api or context.use_bqstorage_api, context.credentials + ) job_config = bigquery.job.QueryJobConfig() job_config.query_parameters = params job_config.use_legacy_sql = args.use_legacy_sql @@ -336,8 +374,21 @@ def _cell_magic(line, query): if not args.verbose: display.clear_output() - result = query_job.to_dataframe() + result = query_job.to_dataframe(bqstorage_client=bqstorage_client) if args.destination_var: IPython.get_ipython().push({args.destination_var: result}) else: return result + + +def _make_bqstorage_client(use_bqstorage_api, credentials): + if not use_bqstorage_api: + return None + + if bigquery_storage_v1beta1 is None: + raise ImportError( + "Install the google-cloud-bigquery-storage and fastavro packages " + "to use the BigQuery Storage API." + ) + + return bigquery_storage_v1beta1.BigQueryStorageClient(credentials=credentials) diff --git a/packages/google-cloud-bigquery/tests/unit/test_magics.py b/packages/google-cloud-bigquery/tests/unit/test_magics.py index 6ec6123e726c..0db0bc5ebf52 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_magics.py +++ b/packages/google-cloud-bigquery/tests/unit/test_magics.py @@ -31,6 +31,11 @@ IPython = None import google.auth.credentials + +try: + from google.cloud import bigquery_storage_v1beta1 +except ImportError: # pragma: NO COVER + bigquery_storage_v1beta1 = None from google.cloud.bigquery import table from google.cloud.bigquery import magics @@ -129,6 +134,37 @@ def test__run_query(): assert re.match("Query complete after .*s", updates[-1]) +def test__make_bqstorage_client_false(): + credentials_mock = mock.create_autospec( + google.auth.credentials.Credentials, instance=True + ) + got = magics._make_bqstorage_client(False, credentials_mock) + assert got is None + + +@pytest.mark.skipIf( + bigquery_storage_v1beta1 is None, "Requires `google-cloud-bigquery-storage`" +) +def test__make_bqstorage_client_true(): + credentials_mock = mock.create_autospec( + google.auth.credentials.Credentials, instance=True + ) + got = magics._make_bqstorage_client(True, credentials_mock) + assert isinstance(got, bigquery_storage_v1beta1.BigQueryStorageClient) + + +def test__make_bqstorage_client_true_raises_import_error(monkeypatch): + monkeypatch.setattr(magics, "bigquery_storage_v1beta1", None) + credentials_mock = mock.create_autospec( + google.auth.credentials.Credentials, instance=True + ) + + with pytest.raises(ImportError) as exc: + magics._make_bqstorage_client(True, credentials_mock) + + assert "google-cloud-bigquery-storage" in str(exc) + + @pytest.mark.usefixtures("ipython_interactive") def test_extension_load(): ip = IPython.get_ipython() @@ -141,13 +177,16 @@ def test_extension_load(): @pytest.mark.usefixtures("ipython_interactive") @pytest.mark.skipif(pandas is None, reason="Requires `pandas`") -def test_bigquery_magic_without_optional_arguments(): +def test_bigquery_magic_without_optional_arguments(monkeypatch): ip = IPython.get_ipython() ip.extension_manager.load_extension("google.cloud.bigquery") magics.context.credentials = mock.create_autospec( google.auth.credentials.Credentials, instance=True ) + # Shouldn't fail when BigQuery Storage client isn't installed. + monkeypatch.setattr(magics, "bigquery_storage_v1beta1", None) + sql = "SELECT 17 AS num" result = pandas.DataFrame([17], columns=["num"]) run_query_patch = mock.patch( @@ -257,6 +296,149 @@ def test_bigquery_magic_clears_display_in_verbose_mode(): assert clear_mock.call_count == 1 +@pytest.mark.usefixtures("ipython_interactive") +@pytest.mark.skipIf( + bigquery_storage_v1beta1 is None, "Requires `google-cloud-bigquery-storage`" +) +def test_bigquery_magic_with_bqstorage_from_argument(monkeypatch): + ip = IPython.get_ipython() + ip.extension_manager.load_extension("google.cloud.bigquery") + mock_credentials = mock.create_autospec( + google.auth.credentials.Credentials, instance=True + ) + + # Set up the context with monkeypatch so that it's reset for subsequent + # tests. + monkeypatch.setattr(magics.context, "credentials", mock_credentials) + monkeypatch.setattr(magics.context, "use_bqstorage_api", False) + + # Mock out the BigQuery Storage API. + bqstorage_mock = mock.create_autospec( + bigquery_storage_v1beta1.BigQueryStorageClient + ) + bqstorage_instance_mock = mock.create_autospec( + bigquery_storage_v1beta1.BigQueryStorageClient, instance=True + ) + bqstorage_mock.return_value = bqstorage_instance_mock + monkeypatch.setattr( + magics.bigquery_storage_v1beta1, "BigQueryStorageClient", bqstorage_mock + ) + + sql = "SELECT 17 AS num" + result = pandas.DataFrame([17], columns=["num"]) + run_query_patch = mock.patch( + "google.cloud.bigquery.magics._run_query", autospec=True + ) + query_job_mock = mock.create_autospec( + google.cloud.bigquery.job.QueryJob, instance=True + ) + query_job_mock.to_dataframe.return_value = result + with run_query_patch as run_query_mock: + run_query_mock.return_value = query_job_mock + + return_value = ip.run_cell_magic("bigquery", "--use_bqstorage_api", sql) + + bqstorage_mock.assert_called_once_with(credentials=mock_credentials) + query_job_mock.to_dataframe.assert_called_once_with( + bqstorage_client=bqstorage_instance_mock + ) + + assert isinstance(return_value, pandas.DataFrame) + + +@pytest.mark.usefixtures("ipython_interactive") +@pytest.mark.skipIf( + bigquery_storage_v1beta1 is None, "Requires `google-cloud-bigquery-storage`" +) +def test_bigquery_magic_with_bqstorage_from_context(monkeypatch): + ip = IPython.get_ipython() + ip.extension_manager.load_extension("google.cloud.bigquery") + mock_credentials = mock.create_autospec( + google.auth.credentials.Credentials, instance=True + ) + + # Set up the context with monkeypatch so that it's reset for subsequent + # tests. + monkeypatch.setattr(magics.context, "credentials", mock_credentials) + monkeypatch.setattr(magics.context, "use_bqstorage_api", True) + + # Mock out the BigQuery Storage API. + bqstorage_mock = mock.create_autospec( + bigquery_storage_v1beta1.BigQueryStorageClient + ) + bqstorage_instance_mock = mock.create_autospec( + bigquery_storage_v1beta1.BigQueryStorageClient, instance=True + ) + bqstorage_mock.return_value = bqstorage_instance_mock + monkeypatch.setattr( + magics.bigquery_storage_v1beta1, "BigQueryStorageClient", bqstorage_mock + ) + + sql = "SELECT 17 AS num" + result = pandas.DataFrame([17], columns=["num"]) + run_query_patch = mock.patch( + "google.cloud.bigquery.magics._run_query", autospec=True + ) + query_job_mock = mock.create_autospec( + google.cloud.bigquery.job.QueryJob, instance=True + ) + query_job_mock.to_dataframe.return_value = result + with run_query_patch as run_query_mock: + run_query_mock.return_value = query_job_mock + + return_value = ip.run_cell_magic("bigquery", "", sql) + + bqstorage_mock.assert_called_once_with(credentials=mock_credentials) + query_job_mock.to_dataframe.assert_called_once_with( + bqstorage_client=bqstorage_instance_mock + ) + + assert isinstance(return_value, pandas.DataFrame) + + +@pytest.mark.usefixtures("ipython_interactive") +@pytest.mark.skipIf( + bigquery_storage_v1beta1 is None, "Requires `google-cloud-bigquery-storage`" +) +def test_bigquery_magic_without_bqstorage(monkeypatch): + ip = IPython.get_ipython() + ip.extension_manager.load_extension("google.cloud.bigquery") + mock_credentials = mock.create_autospec( + google.auth.credentials.Credentials, instance=True + ) + + # Set up the context with monkeypatch so that it's reset for subsequent + # tests. + monkeypatch.setattr(magics.context, "credentials", mock_credentials) + + # Mock out the BigQuery Storage API. + bqstorage_mock = mock.create_autospec( + bigquery_storage_v1beta1.BigQueryStorageClient + ) + monkeypatch.setattr( + magics.bigquery_storage_v1beta1, "BigQueryStorageClient", bqstorage_mock + ) + + sql = "SELECT 17 AS num" + result = pandas.DataFrame([17], columns=["num"]) + run_query_patch = mock.patch( + "google.cloud.bigquery.magics._run_query", autospec=True + ) + query_job_mock = mock.create_autospec( + google.cloud.bigquery.job.QueryJob, instance=True + ) + query_job_mock.to_dataframe.return_value = result + with run_query_patch as run_query_mock: + run_query_mock.return_value = query_job_mock + + return_value = ip.run_cell_magic("bigquery", "", sql) + + bqstorage_mock.assert_not_called() + query_job_mock.to_dataframe.assert_called_once_with(bqstorage_client=None) + + assert isinstance(return_value, pandas.DataFrame) + + @pytest.mark.usefixtures("ipython_interactive") def test_bigquery_magic_with_project(): ip = IPython.get_ipython() From 724e74d44ab70b34062b7a36a43b3b551837685f Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Wed, 3 Apr 2019 18:59:04 -0700 Subject: [PATCH 0562/2016] Add methods for Models API (#7562) * Generate protos for Models API. The Models API is a component of the bigquery_v2 interface. It is not available as a gRPC API, but it does provide protocol buffers. This commit adds those protocol buffers to the client so that they can be used to avoid much manual work to create resource classes that can be serialized to/from JSON. * Add handwritten model API classes. These classes provide the top-level classes for the Model API. The protocol buffer objects are used for all sub-objects. The pattern for mutable properties follows the same as with Table and Dataset: a `_properties` dictionary contains the property values in the REST API format. --- packages/google-cloud-bigquery/LICENSE | 7 +- packages/google-cloud-bigquery/MANIFEST.in | 4 +- .../docs/gapic/v2/enums.rst | 8 + .../docs/gapic/v2/types.rst | 5 + .../google-cloud-bigquery/docs/reference.rst | 21 +- .../google-cloud-bigquery/google/__init__.py | 6 +- .../google/cloud/__init__.py | 6 +- .../google/cloud/bigquery/__init__.py | 5 + .../google/cloud/bigquery/_helpers.py | 49 + .../google/cloud/bigquery/client.py | 171 + .../google/cloud/bigquery/dataset.py | 36 +- .../google/cloud/bigquery/model.py | 406 ++ .../google/cloud/bigquery/table.py | 48 +- .../google/cloud/bigquery_v2/__init__.py | 33 + .../cloud/bigquery_v2/gapic/__init__.py | 0 .../google/cloud/bigquery_v2/gapic/enums.py | 137 + .../cloud/bigquery_v2/proto/__init__.py | 0 .../bigquery_v2/proto/location_metadata.proto | 34 + .../proto/location_metadata_pb2.py | 98 + .../proto/location_metadata_pb2_grpc.py | 2 + .../cloud/bigquery_v2/proto/model.proto | 513 +++ .../cloud/bigquery_v2/proto/model_pb2.py | 3331 +++++++++++++++++ .../cloud/bigquery_v2/proto/model_pb2_grpc.py | 102 + .../bigquery_v2/proto/model_reference.proto | 39 + .../bigquery_v2/proto/model_reference_pb2.py | 137 + .../proto/model_reference_pb2_grpc.py | 2 + .../bigquery_v2/proto/standard_sql.proto | 110 + .../bigquery_v2/proto/standard_sql_pb2.py | 365 ++ .../proto/standard_sql_pb2_grpc.py | 2 + .../google/cloud/bigquery_v2/types.py | 45 + packages/google-cloud-bigquery/noxfile.py | 3 +- .../google-cloud-bigquery/samples/__init__.py | 0 .../samples/delete_model.py | 31 + .../samples/get_model.py | 35 + .../samples/list_models.py | 38 + .../samples/tests/__init__.py | 0 .../samples/tests/conftest.py | 62 + .../samples/tests/test_model_samples.py | 39 + .../samples/update_model.py | 38 + packages/google-cloud-bigquery/setup.py | 1 + packages/google-cloud-bigquery/synth.metadata | 32 + packages/google-cloud-bigquery/synth.py | 54 + .../tests/unit/model/__init__.py | 0 .../tests/unit/model/test_model.py | 276 ++ .../tests/unit/model/test_model_reference.py | 126 + .../tests/unit/test_client.py | 244 ++ .../tests/unit/test_dataset.py | 7 + 47 files changed, 6644 insertions(+), 64 deletions(-) create mode 100644 packages/google-cloud-bigquery/docs/gapic/v2/enums.rst create mode 100644 packages/google-cloud-bigquery/docs/gapic/v2/types.rst create mode 100644 packages/google-cloud-bigquery/google/cloud/bigquery/model.py create mode 100644 packages/google-cloud-bigquery/google/cloud/bigquery_v2/__init__.py create mode 100644 packages/google-cloud-bigquery/google/cloud/bigquery_v2/gapic/__init__.py create mode 100644 packages/google-cloud-bigquery/google/cloud/bigquery_v2/gapic/enums.py create mode 100644 packages/google-cloud-bigquery/google/cloud/bigquery_v2/proto/__init__.py create mode 100644 packages/google-cloud-bigquery/google/cloud/bigquery_v2/proto/location_metadata.proto create mode 100644 packages/google-cloud-bigquery/google/cloud/bigquery_v2/proto/location_metadata_pb2.py create mode 100644 packages/google-cloud-bigquery/google/cloud/bigquery_v2/proto/location_metadata_pb2_grpc.py create mode 100644 packages/google-cloud-bigquery/google/cloud/bigquery_v2/proto/model.proto create mode 100644 packages/google-cloud-bigquery/google/cloud/bigquery_v2/proto/model_pb2.py create mode 100644 packages/google-cloud-bigquery/google/cloud/bigquery_v2/proto/model_pb2_grpc.py create mode 100644 packages/google-cloud-bigquery/google/cloud/bigquery_v2/proto/model_reference.proto create mode 100644 packages/google-cloud-bigquery/google/cloud/bigquery_v2/proto/model_reference_pb2.py create mode 100644 packages/google-cloud-bigquery/google/cloud/bigquery_v2/proto/model_reference_pb2_grpc.py create mode 100644 packages/google-cloud-bigquery/google/cloud/bigquery_v2/proto/standard_sql.proto create mode 100644 packages/google-cloud-bigquery/google/cloud/bigquery_v2/proto/standard_sql_pb2.py create mode 100644 packages/google-cloud-bigquery/google/cloud/bigquery_v2/proto/standard_sql_pb2_grpc.py create mode 100644 packages/google-cloud-bigquery/google/cloud/bigquery_v2/types.py create mode 100644 packages/google-cloud-bigquery/samples/__init__.py create mode 100644 packages/google-cloud-bigquery/samples/delete_model.py create mode 100644 packages/google-cloud-bigquery/samples/get_model.py create mode 100644 packages/google-cloud-bigquery/samples/list_models.py create mode 100644 packages/google-cloud-bigquery/samples/tests/__init__.py create mode 100644 packages/google-cloud-bigquery/samples/tests/conftest.py create mode 100644 packages/google-cloud-bigquery/samples/tests/test_model_samples.py create mode 100644 packages/google-cloud-bigquery/samples/update_model.py create mode 100644 packages/google-cloud-bigquery/synth.metadata create mode 100644 packages/google-cloud-bigquery/synth.py create mode 100644 packages/google-cloud-bigquery/tests/unit/model/__init__.py create mode 100644 packages/google-cloud-bigquery/tests/unit/model/test_model.py create mode 100644 packages/google-cloud-bigquery/tests/unit/model/test_model_reference.py diff --git a/packages/google-cloud-bigquery/LICENSE b/packages/google-cloud-bigquery/LICENSE index d64569567334..a8ee855de2aa 100644 --- a/packages/google-cloud-bigquery/LICENSE +++ b/packages/google-cloud-bigquery/LICENSE @@ -1,7 +1,6 @@ - - Apache License + Apache License Version 2.0, January 2004 - http://www.apache.org/licenses/ + https://www.apache.org/licenses/ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION @@ -193,7 +192,7 @@ you may not use this file except in compliance with the License. You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 + https://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, diff --git a/packages/google-cloud-bigquery/MANIFEST.in b/packages/google-cloud-bigquery/MANIFEST.in index 1fbc0d0b321e..9cbf175afe6b 100644 --- a/packages/google-cloud-bigquery/MANIFEST.in +++ b/packages/google-cloud-bigquery/MANIFEST.in @@ -1,3 +1,5 @@ include README.rst LICENSE +recursive-include google *.json *.proto recursive-include tests * -global-exclude *.pyc __pycache__ +global-exclude *.py[co] +global-exclude __pycache__ diff --git a/packages/google-cloud-bigquery/docs/gapic/v2/enums.rst b/packages/google-cloud-bigquery/docs/gapic/v2/enums.rst new file mode 100644 index 000000000000..0e0f05adaea5 --- /dev/null +++ b/packages/google-cloud-bigquery/docs/gapic/v2/enums.rst @@ -0,0 +1,8 @@ +Enums for BigQuery API Client +============================= + +.. autoclass:: google.cloud.bigquery_v2.gapic.enums.Model + :members: + +.. autoclass:: google.cloud.bigquery_v2.gapic.enums.StandardSqlDataType + :members: diff --git a/packages/google-cloud-bigquery/docs/gapic/v2/types.rst b/packages/google-cloud-bigquery/docs/gapic/v2/types.rst new file mode 100644 index 000000000000..97938768a690 --- /dev/null +++ b/packages/google-cloud-bigquery/docs/gapic/v2/types.rst @@ -0,0 +1,5 @@ +Types for BigQuery API Client +============================= + +.. automodule:: google.cloud.bigquery_v2.types + :members: \ No newline at end of file diff --git a/packages/google-cloud-bigquery/docs/reference.rst b/packages/google-cloud-bigquery/docs/reference.rst index 6676c6f0128f..b3f949e3daab 100644 --- a/packages/google-cloud-bigquery/docs/reference.rst +++ b/packages/google-cloud-bigquery/docs/reference.rst @@ -92,6 +92,14 @@ Table table.TimePartitioning table.TimePartitioningType +Model +===== + +.. autosummary:: + :toctree: generated + + model.Model + model.ModelReference Schema ====== @@ -139,9 +147,20 @@ External Configuration Magics -====================== +====== .. toctree:: :maxdepth: 2 magics + +Additional Types +================ + +Protocol buffer classes for working with the Models API. + +.. toctree:: + :maxdepth: 2 + + gapic/v2/enums + gapic/v2/types diff --git a/packages/google-cloud-bigquery/google/__init__.py b/packages/google-cloud-bigquery/google/__init__.py index 0e1bc5131ba6..8fcc60e2b9c6 100644 --- a/packages/google-cloud-bigquery/google/__init__.py +++ b/packages/google-cloud-bigquery/google/__init__.py @@ -1,10 +1,12 @@ -# Copyright 2016 Google LLC +# -*- coding: utf-8 -*- +# +# Copyright 2019 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # -# http://www.apache.org/licenses/LICENSE-2.0 +# https://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, diff --git a/packages/google-cloud-bigquery/google/cloud/__init__.py b/packages/google-cloud-bigquery/google/cloud/__init__.py index 0e1bc5131ba6..8fcc60e2b9c6 100644 --- a/packages/google-cloud-bigquery/google/cloud/__init__.py +++ b/packages/google-cloud-bigquery/google/cloud/__init__.py @@ -1,10 +1,12 @@ -# Copyright 2016 Google LLC +# -*- coding: utf-8 -*- +# +# Copyright 2019 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # -# http://www.apache.org/licenses/LICENSE-2.0 +# https://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/__init__.py b/packages/google-cloud-bigquery/google/cloud/bigquery/__init__.py index c3865d511b6c..0b972bb7297b 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/__init__.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/__init__.py @@ -60,6 +60,8 @@ from google.cloud.bigquery.job import SourceFormat from google.cloud.bigquery.job import UnknownJob from google.cloud.bigquery.job import WriteDisposition +from google.cloud.bigquery.model import Model +from google.cloud.bigquery.model import ModelReference from google.cloud.bigquery.query import ArrayQueryParameter from google.cloud.bigquery.query import ScalarQueryParameter from google.cloud.bigquery.query import StructQueryParameter @@ -100,6 +102,9 @@ "UnknownJob", "TimePartitioningType", "TimePartitioning", + # Models + "Model", + "ModelReference", # Shared helpers "SchemaField", "UDFResource", diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py b/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py index 72e1fa276dd7..90b1f14016b7 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py @@ -581,3 +581,52 @@ def _str_or_none(value): """Helper: serialize value to JSON string.""" if value is not None: return str(value) + + +def _parse_3_part_id(full_id, default_project=None, property_name="table_id"): + output_project_id = default_project + output_dataset_id = None + output_resource_id = None + parts = full_id.split(".") + + if len(parts) != 2 and len(parts) != 3: + raise ValueError( + "{property_name} must be a fully-qualified ID in " + 'standard SQL format. e.g. "project.dataset.{property_name}", ' + "got {}".format(full_id, property_name=property_name) + ) + + if len(parts) == 2 and not default_project: + raise ValueError( + "When default_project is not set, {property_name} must be a " + "fully-qualified ID in standard SQL format. " + 'e.g. "project.dataset_id.{property_name}", got {}'.format( + full_id, property_name=property_name + ) + ) + + if len(parts) == 2: + output_dataset_id, output_resource_id = parts + else: + output_project_id, output_dataset_id, output_resource_id = parts + + return output_project_id, output_dataset_id, output_resource_id + + +def _build_resource_from_properties(obj, filter_fields): + """Build a resource based on a ``_properties`` dictionary, filtered by + ``filter_fields``, which follow the name of the Python object. + """ + partial = {} + for filter_field in filter_fields: + api_field = obj._PROPERTY_TO_API_FIELD.get(filter_field) + if api_field is None and filter_field not in obj._properties: + raise ValueError("No property %s" % filter_field) + elif api_field is not None: + partial[api_field] = obj._properties.get(api_field) + else: + # allows properties that are not defined in the library + # and properties that have the same name as API resource key + partial[filter_field] = obj._properties[filter_field] + + return partial diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py index b7c6a70b27a8..86e131438f32 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py @@ -46,6 +46,8 @@ from google.cloud.bigquery.dataset import DatasetListItem from google.cloud.bigquery.dataset import DatasetReference from google.cloud.bigquery import job +from google.cloud.bigquery.model import Model +from google.cloud.bigquery.model import ModelReference from google.cloud.bigquery.query import _QueryResults from google.cloud.bigquery.retry import DEFAULT_RETRY from google.cloud.bigquery.table import _table_arg_to_table @@ -428,6 +430,33 @@ def get_dataset(self, dataset_ref, retry=DEFAULT_RETRY): api_response = self._call_api(retry, method="GET", path=dataset_ref.path) return Dataset.from_api_repr(api_response) + def get_model(self, model_ref, retry=DEFAULT_RETRY): + """[Beta] Fetch the model referenced by ``model_ref``. + + Args: + model_ref (Union[ \ + :class:`~google.cloud.bigquery.model.ModelReference`, \ + str, \ + ]): + A reference to the model to fetch from the BigQuery API. + If a string is passed in, this method attempts to create a + model reference from a string using + :func:`google.cloud.bigquery.model.ModelReference.from_string`. + retry (:class:`google.api_core.retry.Retry`): + (Optional) How to retry the RPC. + + Returns: + google.cloud.bigquery.model.Model: + A ``Model`` instance. + """ + if isinstance(model_ref, str): + model_ref = ModelReference.from_string( + model_ref, default_project=self.project + ) + + api_response = self._call_api(retry, method="GET", path=model_ref.path) + return Model.from_api_repr(api_response) + def get_table(self, table, retry=DEFAULT_RETRY): """Fetch the table referenced by ``table``. @@ -488,6 +517,41 @@ def update_dataset(self, dataset, fields, retry=DEFAULT_RETRY): ) return Dataset.from_api_repr(api_response) + def update_model(self, model, fields, retry=DEFAULT_RETRY): + """[Beta] Change some fields of a model. + + Use ``fields`` to specify which fields to update. At least one field + must be provided. If a field is listed in ``fields`` and is ``None`` + in ``model``, it will be deleted. + + If ``model.etag`` is not ``None``, the update will only succeed if + the model on the server has the same ETag. Thus reading a model with + ``get_model``, changing its fields, and then passing it to + ``update_model`` will ensure that the changes will only be saved if + no modifications to the model occurred since the read. + + Args: + model (google.cloud.bigquery.model.Model): The model to update. + fields (Sequence[str]): + The fields of ``model`` to change, spelled as the Model + properties (e.g. "friendly_name"). + retry (google.api_core.retry.Retry): + (Optional) A description of how to retry the API call. + + Returns: + google.cloud.bigquery.model.Model: + The model resource returned from the API call. + """ + partial = model._build_resource(fields) + if model.etag: + headers = {"If-Match": model.etag} + else: + headers = None + api_response = self._call_api( + retry, method="PATCH", path=model.path, data=partial, headers=headers + ) + return Model.from_api_repr(api_response) + def update_table(self, table, fields, retry=DEFAULT_RETRY): """Change some fields of a table. @@ -523,6 +587,64 @@ def update_table(self, table, fields, retry=DEFAULT_RETRY): ) return Table.from_api_repr(api_response) + def list_models( + self, dataset, max_results=None, page_token=None, retry=DEFAULT_RETRY + ): + """[Beta] List models in the dataset. + + See + https://cloud.google.com/bigquery/docs/reference/rest/v2/models/list + + Args: + dataset (Union[ \ + :class:`~google.cloud.bigquery.dataset.Dataset`, \ + :class:`~google.cloud.bigquery.dataset.DatasetReference`, \ + str, \ + ]): + A reference to the dataset whose models to list from the + BigQuery API. If a string is passed in, this method attempts + to create a dataset reference from a string using + :func:`google.cloud.bigquery.dataset.DatasetReference.from_string`. + max_results (int): + (Optional) Maximum number of models to return. If not passed, + defaults to a value set by the API. + page_token (str): + (Optional) Token representing a cursor into the models. If + not passed, the API will return the first page of models. The + token marks the beginning of the iterator to be returned and + the value of the ``page_token`` can be accessed at + ``next_page_token`` of the + :class:`~google.api_core.page_iterator.HTTPIterator`. + retry (:class:`google.api_core.retry.Retry`): + (Optional) How to retry the RPC. + + Returns: + google.api_core.page_iterator.Iterator: + Iterator of + :class:`~google.cloud.bigquery.model.Model` contained + within the requested dataset. + """ + if isinstance(dataset, str): + dataset = DatasetReference.from_string( + dataset, default_project=self.project + ) + + if not isinstance(dataset, (Dataset, DatasetReference)): + raise TypeError("dataset must be a Dataset, DatasetReference, or string") + + path = "%s/models" % dataset.path + result = page_iterator.HTTPIterator( + client=self, + api_request=functools.partial(self._call_api, retry), + path=path, + item_to_value=_item_to_model, + items_key="models", + page_token=page_token, + max_results=max_results, + ) + result.dataset = dataset + return result + def list_tables( self, dataset, max_results=None, page_token=None, retry=DEFAULT_RETRY ): @@ -629,6 +751,40 @@ def delete_dataset( if not not_found_ok: raise + def delete_model(self, model, retry=DEFAULT_RETRY, not_found_ok=False): + """[Beta] Delete a model + + See + https://cloud.google.com/bigquery/docs/reference/rest/v2/models/delete + + Args: + model (Union[ \ + :class:`~google.cloud.bigquery.model.Model`, \ + :class:`~google.cloud.bigquery.model.ModelReference`, \ + str, \ + ]): + A reference to the model to delete. If a string is passed in, + this method attempts to create a model reference from a + string using + :func:`google.cloud.bigquery.model.ModelReference.from_string`. + retry (:class:`google.api_core.retry.Retry`): + (Optional) How to retry the RPC. + not_found_ok (bool): + Defaults to ``False``. If ``True``, ignore "not found" errors + when deleting the model. + """ + if isinstance(model, str): + model = ModelReference.from_string(model, default_project=self.project) + + if not isinstance(model, (Model, ModelReference)): + raise TypeError("model must be a Model or a ModelReference") + + try: + self._call_api(retry, method="DELETE", path=model.path) + except google.api_core.exceptions.NotFound: + if not not_found_ok: + raise + def delete_table(self, table, retry=DEFAULT_RETRY, not_found_ok=False): """Delete a table @@ -1823,6 +1979,21 @@ def _item_to_job(iterator, resource): return iterator.client.job_from_resource(resource) +def _item_to_model(iterator, resource): + """Convert a JSON model to the native object. + + Args: + iterator (google.api_core.page_iterator.Iterator): + The iterator that is currently in use. + resource (dict): + An item to be converted to a model. + + Returns: + google.cloud.bigquery.model.Model: The next model in the page. + """ + return Model.from_api_repr(resource) + + def _item_to_table(iterator, resource): """Convert a JSON table to the native object. diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/dataset.py b/packages/google-cloud-bigquery/google/cloud/bigquery/dataset.py index 2a71c900baf0..8566e183cda0 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/dataset.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/dataset.py @@ -21,6 +21,7 @@ import google.cloud._helpers from google.cloud.bigquery import _helpers +from google.cloud.bigquery.model import ModelReference from google.cloud.bigquery.table import TableReference @@ -37,6 +38,21 @@ def _get_table_reference(self, table_id): return TableReference(self, table_id) +def _get_model_reference(self, model_id): + """Constructs a ModelReference. + + Args: + model_id (str): the ID of the model. + + Returns: + google.cloud.bigquery.model.ModelReference: + A ModelReference for a model in this dataset. + """ + return ModelReference.from_api_repr( + {"projectId": self.project, "datasetId": self.dataset_id, "modelId": model_id} + ) + + class AccessEntry(object): """Represents grant of an access role to an entity. @@ -206,6 +222,8 @@ def path(self): table = _get_table_reference + model = _get_model_reference + @classmethod def from_api_repr(cls, resource): """Factory: construct a dataset reference given its API representation @@ -567,22 +585,12 @@ def to_api_repr(self): def _build_resource(self, filter_fields): """Generate a resource for ``update``.""" - partial = {} - for filter_field in filter_fields: - api_field = self._PROPERTY_TO_API_FIELD.get(filter_field) - if api_field is None and filter_field not in self._properties: - raise ValueError("No Dataset property %s" % filter_field) - elif api_field is not None: - partial[api_field] = self._properties.get(api_field) - else: - # allows properties that are not defined in the library - # and properties that have the same name as API resource key - partial[filter_field] = self._properties[filter_field] - - return partial + return _helpers._build_resource_from_properties(self, filter_fields) table = _get_table_reference + model = _get_model_reference + def __repr__(self): return "Dataset({})".format(repr(self.reference)) @@ -662,3 +670,5 @@ def reference(self): return DatasetReference(self.project, self.dataset_id) table = _get_table_reference + + model = _get_model_reference diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/model.py b/packages/google-cloud-bigquery/google/cloud/bigquery/model.py new file mode 100644 index 000000000000..8b29e4008558 --- /dev/null +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/model.py @@ -0,0 +1,406 @@ +# -*- coding: utf-8 -*- +# +# Copyright 2019 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Define resources for the BigQuery ML Models API.""" + +import copy + +from google.protobuf import json_format +import six + +import google.cloud._helpers +from google.api_core import datetime_helpers +from google.cloud.bigquery import _helpers +from google.cloud.bigquery_v2 import types + + +class Model(object): + """Model represents a machine learning model resource. + + See + https://cloud.google.com/bigquery/docs/reference/rest/v2/models + + Args: + model_ref (Union[ \ + :class:`~google.cloud.bigquery.model.ModelReference`, \ + str, \ + ]): + A pointer to a model. If ``model_ref`` is a string, it must + included a project ID, dataset ID, and model ID, each separated + by ``.``. + """ + + _PROPERTY_TO_API_FIELD = { + "expires": "expirationTime", + "friendly_name": "friendlyName", + # Even though it's not necessary for field mapping to map when the + # property name equals the resource name, we add these here so that we + # have an exhaustive list of all mutable properties. + "labels": "labels", + "description": "description", + } + + def __init__(self, model_ref): + # Use _proto on read-only properties to use it's built-in type + # conversion. + self._proto = types.Model() + + # Use _properties on read-write properties to match the REST API + # semantics. The BigQuery API makes a distinction between an unset + # value, a null value, and a default value (0 or ""), but the protocol + # buffer classes do not. + self._properties = {} + + if isinstance(model_ref, six.string_types): + model_ref = ModelReference.from_string(model_ref) + + if model_ref: + self._proto.model_reference.CopyFrom(model_ref._proto) + + @property + def reference(self): + """A :class:`~google.cloud.bigquery.model.ModelReference` pointing to + this model. + + Read-only. + + Returns: + google.cloud.bigquery.model.ModelReference: pointer to this model. + """ + ref = ModelReference() + ref._proto = self._proto.model_reference + return ref + + @property + def project(self): + """str: Project bound to the model""" + return self.reference.project + + @property + def dataset_id(self): + """str: ID of dataset containing the model.""" + return self.reference.dataset_id + + @property + def model_id(self): + """str: The model ID.""" + return self.reference.model_id + + @property + def path(self): + """str: URL path for the model's APIs.""" + return self.reference.path + + @property + def location(self): + """str: The geographic location where the model resides. This value + is inherited from the dataset. + + Read-only. + """ + return self._proto.location + + @property + def etag(self): + """str: ETag for the model resource (:data:`None` until + set from the server). + + Read-only. + """ + return self._proto.etag + + @property + def created(self): + """Union[datetime.datetime, None]: Datetime at which the model was + created (:data:`None` until set from the server). + + Read-only. + """ + value = self._proto.creation_time + if value is not None and value != 0: + # value will be in milliseconds. + return google.cloud._helpers._datetime_from_microseconds( + 1000.0 * float(value) + ) + + @property + def modified(self): + """Union[datetime.datetime, None]: Datetime at which the model was last + modified (:data:`None` until set from the server). + + Read-only. + """ + value = self._proto.last_modified_time + if value is not None and value != 0: + # value will be in milliseconds. + return google.cloud._helpers._datetime_from_microseconds( + 1000.0 * float(value) + ) + + @property + def model_type(self): + """google.cloud.bigquery_v2.gapic.enums.Model.ModelType: Type of the + model resource. + + Read-only. + + The value is one of elements of the + :class:`~google.cloud.bigquery_v2.gapic.enums.Model.ModelType` + enumeration. + """ + return self._proto.model_type + + @property + def training_runs(self): + """Sequence[google.cloud.bigquery_v2.types.Model.TrainingRun]: Information + for all training runs in increasing order of start time. + + Read-only. + + An iterable of :class:`~google.cloud.bigquery_v2.types.Model.TrainingRun`. + """ + return self._proto.training_runs + + @property + def feature_columns(self): + """Sequence[google.cloud.bigquery_v2.types.StandardSqlField]: Input + feature columns that were used to train this model. + + Read-only. + + An iterable of :class:`~google.cloud.bigquery_v2.types.StandardSqlField`. + """ + return self._proto.feature_columns + + @property + def label_columns(self): + """Sequence[google.cloud.bigquery_v2.types.StandardSqlField]: Label + columns that were used to train this model. The output of the model + will have a ``predicted_`` prefix to these columns. + + Read-only. + + An iterable of :class:`~google.cloud.bigquery_v2.types.StandardSqlField`. + """ + return self._proto.label_columns + + @property + def expires(self): + """Union[datetime.datetime, None]: The datetime when this model + expires. If not present, the model will persist indefinitely. Expired + models will be deleted and their storage reclaimed. + """ + value = self._properties.get("expirationTime") + if value is not None: + # value will be in milliseconds. + return google.cloud._helpers._datetime_from_microseconds( + 1000.0 * float(value) + ) + + @expires.setter + def expires(self, value): + if value is not None: + value = str(google.cloud._helpers._millis_from_datetime(value)) + self._properties["expirationTime"] = value + + @property + def description(self): + """Union[str, None]: Description of the model (defaults to + :data:`None`). + """ + return self._properties.get("description") + + @description.setter + def description(self, value): + self._properties["description"] = value + + @property + def friendly_name(self): + """Union[str, None]: Title of the table (defaults to :data:`None`). + + Raises: + ValueError: For invalid value types. + """ + return self._properties.get("friendlyName") + + @friendly_name.setter + def friendly_name(self, value): + self._properties["friendlyName"] = value + + @property + def labels(self): + """Dict[str, str]: Labels for the table. + + This method always returns a dict. To change a model's labels, + modify the dict, then call ``Client.update_model``. To delete a + label, set its value to :data:`None` before updating. + """ + return self._properties.setdefault("labels", {}) + + @labels.setter + def labels(self, value): + if value is None: + value = {} + self._properties["labels"] = value + + @classmethod + def from_api_repr(cls, resource): + """Factory: construct a model resource given its API representation + + Args: + resource (Dict[str, object]): + Model resource representation from the API + + Returns: + google.cloud.bigquery.model.Model: Model parsed from ``resource``. + """ + this = cls(None) + + # Convert from millis-from-epoch to timestamp well-known type. + # TODO: Remove this hack once CL 238585470 hits prod. + resource = copy.deepcopy(resource) + for training_run in resource.get("trainingRuns", ()): + start_time = training_run.get("startTime") + if not start_time or "-" in start_time: # Already right format? + continue + start_time = datetime_helpers.from_microseconds(1e3 * float(start_time)) + training_run["startTime"] = datetime_helpers.to_rfc3339(start_time) + + this._proto = json_format.ParseDict(resource, types.Model()) + for key in six.itervalues(cls._PROPERTY_TO_API_FIELD): + # Leave missing keys unset. This allows us to use setdefault in the + # getters where we want a default value other than None. + if key in resource: + this._properties[key] = resource[key] + return this + + def _build_resource(self, filter_fields): + """Generate a resource for ``update``.""" + return _helpers._build_resource_from_properties(self, filter_fields) + + def __repr__(self): + return "Model(reference={})".format(repr(self.reference)) + + +class ModelReference(object): + """ModelReferences are pointers to models. + + See + https://cloud.google.com/bigquery/docs/reference/rest/v2/models + """ + + def __init__(self): + self._proto = types.ModelReference() + + @property + def project(self): + """str: Project bound to the model""" + return self._proto.project_id + + @property + def dataset_id(self): + """str: ID of dataset containing the model.""" + return self._proto.dataset_id + + @property + def model_id(self): + """str: The model ID.""" + return self._proto.model_id + + @property + def path(self): + """str: URL path for the model's APIs.""" + return "/projects/%s/datasets/%s/models/%s" % ( + self._proto.project_id, + self._proto.dataset_id, + self._proto.model_id, + ) + + @classmethod + def from_api_repr(cls, resource): + """Factory: construct a model reference given its API representation + + Args: + resource (Dict[str, object]): + Model reference representation returned from the API + + Returns: + google.cloud.bigquery.model.ModelReference: + Model reference parsed from ``resource``. + """ + ref = cls() + ref._proto = json_format.ParseDict(resource, types.ModelReference()) + return ref + + @classmethod + def from_string(cls, model_id, default_project=None): + """Construct a model reference from model ID string. + + Args: + model_id (str): + A model ID in standard SQL format. If ``default_project`` + is not specified, this must included a project ID, dataset + ID, and model ID, each separated by ``.``. + default_project (str): + Optional. The project ID to use when ``model_id`` does not + include a project ID. + + Returns: + google.cloud.bigquery.model.ModelReference: + Model reference parsed from ``model_id``. + + Raises: + ValueError: + If ``model_id`` is not a fully-qualified table ID in + standard SQL format. + """ + proj, dset, model = _helpers._parse_3_part_id( + model_id, default_project=default_project, property_name="model_id" + ) + return cls.from_api_repr( + {"projectId": proj, "datasetId": dset, "modelId": model} + ) + + def to_api_repr(self): + """Construct the API resource representation of this model reference. + + Returns: + Dict[str, object]: Model reference represented as an API resource + """ + return json_format.MessageToDict(self._proto) + + def _key(self): + """Unique key for this model. + + This is used for hashing a ModelReference. + """ + return self.project, self.dataset_id, self.model_id + + def __eq__(self, other): + if not isinstance(other, ModelReference): + return NotImplemented + return self._proto == other._proto + + def __ne__(self, other): + return not self == other + + def __hash__(self): + return hash(self._key()) + + def __repr__(self): + return "ModelReference(project='{}', dataset_id='{}', project_id='{}')".format( + self.project, self.dataset_id, self.model_id + ) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py index ae090a4b0b83..7dbaf527274f 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py @@ -228,33 +228,13 @@ def from_string(cls, table_id, default_project=None): """ from google.cloud.bigquery.dataset import DatasetReference - output_project_id = default_project - output_dataset_id = None - output_table_id = None - parts = table_id.split(".") - - if len(parts) < 2: - raise ValueError( - "table_id must be a fully-qualified table ID in " - 'standard SQL format. e.g. "project.dataset.table", got ' - "{}".format(table_id) - ) - elif len(parts) == 2: - if not default_project: - raise ValueError( - "When default_project is not set, table_id must be a " - "fully-qualified table ID in standard SQL format. " - 'e.g. "project.dataset_id.table_id", got {}'.format(table_id) - ) - output_dataset_id, output_table_id = parts - elif len(parts) == 3: - output_project_id, output_dataset_id, output_table_id = parts - if len(parts) > 3: - raise ValueError( - "Too many parts in table_id. Must be a fully-qualified table " - 'ID in standard SQL format. e.g. "project.dataset.table", ' - "got {}".format(table_id) - ) + ( + output_project_id, + output_dataset_id, + output_table_id, + ) = _helpers._parse_3_part_id( + table_id, default_project=default_project, property_name="table_id" + ) return cls( DatasetReference(output_project_id, output_dataset_id), output_table_id @@ -880,19 +860,7 @@ def to_bqstorage(self): def _build_resource(self, filter_fields): """Generate a resource for ``update``.""" - partial = {} - for filter_field in filter_fields: - api_field = self._PROPERTY_TO_API_FIELD.get(filter_field) - if api_field is None and filter_field not in self._properties: - raise ValueError("No Table property %s" % filter_field) - elif api_field is not None: - partial[api_field] = self._properties.get(api_field) - else: - # allows properties that are not defined in the library - # and properties that have the same name as API resource key - partial[filter_field] = self._properties[filter_field] - - return partial + return _helpers._build_resource_from_properties(self, filter_fields) def __repr__(self): return "Table({})".format(repr(self.reference)) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery_v2/__init__.py b/packages/google-cloud-bigquery/google/cloud/bigquery_v2/__init__.py new file mode 100644 index 000000000000..e582214329f3 --- /dev/null +++ b/packages/google-cloud-bigquery/google/cloud/bigquery_v2/__init__.py @@ -0,0 +1,33 @@ +# -*- coding: utf-8 -*- +# +# Copyright 2018 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import absolute_import + +import pkg_resources + +__version__ = pkg_resources.get_distribution("google-cloud-bigquery").version # noqa + +from google.cloud.bigquery_v2 import types +from google.cloud.bigquery_v2.gapic import enums + + +__all__ = ( + # google.cloud.bigquery_v2 + "__version__", + "types", + # google.cloud.bigquery_v2 + "enums", +) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery_v2/gapic/__init__.py b/packages/google-cloud-bigquery/google/cloud/bigquery_v2/gapic/__init__.py new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery_v2/gapic/enums.py b/packages/google-cloud-bigquery/google/cloud/bigquery_v2/gapic/enums.py new file mode 100644 index 000000000000..69d3d670a628 --- /dev/null +++ b/packages/google-cloud-bigquery/google/cloud/bigquery_v2/gapic/enums.py @@ -0,0 +1,137 @@ +# -*- coding: utf-8 -*- +# +# Copyright 2019 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Wrappers for protocol buffer enum types.""" + +import enum + + +class Model(object): + class DataSplitMethod(enum.IntEnum): + """ + Indicates the method to split input data into multiple tables. + + Attributes: + DATA_SPLIT_METHOD_UNSPECIFIED (int) + RANDOM (int): Splits data randomly. + CUSTOM (int): Splits data with the user provided tags. + SEQUENTIAL (int): Splits data sequentially. + NO_SPLIT (int): Data split will be skipped. + AUTO_SPLIT (int): Splits data automatically: Uses NO\_SPLIT if the data size is small. + Otherwise uses RANDOM. + """ + + DATA_SPLIT_METHOD_UNSPECIFIED = 0 + RANDOM = 1 + CUSTOM = 2 + SEQUENTIAL = 3 + NO_SPLIT = 4 + AUTO_SPLIT = 5 + + class DistanceType(enum.IntEnum): + """ + Distance metric used to compute the distance between two points. + + Attributes: + DISTANCE_TYPE_UNSPECIFIED (int) + EUCLIDEAN (int): Eculidean distance. + COSINE (int): Cosine distance. + """ + + DISTANCE_TYPE_UNSPECIFIED = 0 + EUCLIDEAN = 1 + COSINE = 2 + + class LearnRateStrategy(enum.IntEnum): + """ + Indicates the learning rate optimization strategy to use. + + Attributes: + LEARN_RATE_STRATEGY_UNSPECIFIED (int) + LINE_SEARCH (int): Use line search to determine learning rate. + CONSTANT (int): Use a constant learning rate. + """ + + LEARN_RATE_STRATEGY_UNSPECIFIED = 0 + LINE_SEARCH = 1 + CONSTANT = 2 + + class LossType(enum.IntEnum): + """ + Loss metric to evaluate model training performance. + + Attributes: + LOSS_TYPE_UNSPECIFIED (int) + MEAN_SQUARED_LOSS (int): Mean squared loss, used for linear regression. + MEAN_LOG_LOSS (int): Mean log loss, used for logistic regression. + """ + + LOSS_TYPE_UNSPECIFIED = 0 + MEAN_SQUARED_LOSS = 1 + MEAN_LOG_LOSS = 2 + + class ModelType(enum.IntEnum): + """ + Indicates the type of the Model. + + Attributes: + MODEL_TYPE_UNSPECIFIED (int) + LINEAR_REGRESSION (int): Linear regression model. + LOGISTIC_REGRESSION (int): Logistic regression model. + KMEANS (int): [Beta] K-means clustering model. + """ + + MODEL_TYPE_UNSPECIFIED = 0 + LINEAR_REGRESSION = 1 + LOGISTIC_REGRESSION = 2 + KMEANS = 3 + + +class StandardSqlDataType(object): + class TypeKind(enum.IntEnum): + """ + Attributes: + TYPE_KIND_UNSPECIFIED (int): Invalid type. + INT64 (int): Encoded as a string in decimal format. + BOOL (int): Encoded as a boolean "false" or "true". + FLOAT64 (int): Encoded as a number, or string "NaN", "Infinity" or "-Infinity". + STRING (int): Encoded as a string value. + BYTES (int): Encoded as a base64 string per RFC 4648, section 4. + TIMESTAMP (int): Encoded as an RFC 3339 timestamp with mandatory "Z" time zone string: + 1985-04-12T23:20:50.52Z + DATE (int): Encoded as RFC 3339 full-date format string: 1985-04-12 + TIME (int): Encoded as RFC 3339 partial-time format string: 23:20:50.52 + DATETIME (int): Encoded as RFC 3339 full-date "T" partial-time: 1985-04-12T23:20:50.52 + GEOGRAPHY (int): Encoded as WKT + NUMERIC (int): Encoded as a decimal string. + ARRAY (int): Encoded as a list with types matching Type.array\_type. + STRUCT (int): Encoded as a list with fields of type Type.struct\_type[i]. List is used + because a JSON object cannot have duplicate field names. + """ + + TYPE_KIND_UNSPECIFIED = 0 + INT64 = 2 + BOOL = 5 + FLOAT64 = 7 + STRING = 8 + BYTES = 9 + TIMESTAMP = 19 + DATE = 10 + TIME = 20 + DATETIME = 21 + GEOGRAPHY = 22 + NUMERIC = 23 + ARRAY = 16 + STRUCT = 17 diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery_v2/proto/__init__.py b/packages/google-cloud-bigquery/google/cloud/bigquery_v2/proto/__init__.py new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery_v2/proto/location_metadata.proto b/packages/google-cloud-bigquery/google/cloud/bigquery_v2/proto/location_metadata.proto new file mode 100644 index 000000000000..95a3133c5755 --- /dev/null +++ b/packages/google-cloud-bigquery/google/cloud/bigquery_v2/proto/location_metadata.proto @@ -0,0 +1,34 @@ +// Copyright 2019 Google LLC. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// + +syntax = "proto3"; + +package google.cloud.bigquery.v2; + +import "google/api/annotations.proto"; + +option go_package = "google.golang.org/genproto/googleapis/cloud/bigquery/v2;bigquery"; +option java_outer_classname = "LocationMetadataProto"; +option java_package = "com.google.cloud.bigquery.v2"; + + +// BigQuery-specific metadata about a location. This will be set on +// google.cloud.location.Location.metadata in Cloud Location API +// responses. +message LocationMetadata { + // The legacy BigQuery location ID, e.g. “EU” for the “europe” location. + // This is for any API consumers that need the legacy “US” and “EU” locations. + string legacy_location_id = 1; +} diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery_v2/proto/location_metadata_pb2.py b/packages/google-cloud-bigquery/google/cloud/bigquery_v2/proto/location_metadata_pb2.py new file mode 100644 index 000000000000..6dd9da52e4ed --- /dev/null +++ b/packages/google-cloud-bigquery/google/cloud/bigquery_v2/proto/location_metadata_pb2.py @@ -0,0 +1,98 @@ +# Generated by the protocol buffer compiler. DO NOT EDIT! +# source: google/cloud/bigquery_v2/proto/location_metadata.proto + +import sys + +_b = sys.version_info[0] < 3 and (lambda x: x) or (lambda x: x.encode("latin1")) +from google.protobuf import descriptor as _descriptor +from google.protobuf import message as _message +from google.protobuf import reflection as _reflection +from google.protobuf import symbol_database as _symbol_database + +# @@protoc_insertion_point(imports) + +_sym_db = _symbol_database.Default() + + +from google.api import annotations_pb2 as google_dot_api_dot_annotations__pb2 + + +DESCRIPTOR = _descriptor.FileDescriptor( + name="google/cloud/bigquery_v2/proto/location_metadata.proto", + package="google.cloud.bigquery.v2", + syntax="proto3", + serialized_options=_b( + "\n\034com.google.cloud.bigquery.v2B\025LocationMetadataProtoZ@google.golang.org/genproto/googleapis/cloud/bigquery/v2;bigquery" + ), + serialized_pb=_b( + '\n6google/cloud/bigquery_v2/proto/location_metadata.proto\x12\x18google.cloud.bigquery.v2\x1a\x1cgoogle/api/annotations.proto".\n\x10LocationMetadata\x12\x1a\n\x12legacy_location_id\x18\x01 \x01(\tBw\n\x1c\x63om.google.cloud.bigquery.v2B\x15LocationMetadataProtoZ@google.golang.org/genproto/googleapis/cloud/bigquery/v2;bigqueryb\x06proto3' + ), + dependencies=[google_dot_api_dot_annotations__pb2.DESCRIPTOR], +) + + +_LOCATIONMETADATA = _descriptor.Descriptor( + name="LocationMetadata", + full_name="google.cloud.bigquery.v2.LocationMetadata", + filename=None, + file=DESCRIPTOR, + containing_type=None, + fields=[ + _descriptor.FieldDescriptor( + name="legacy_location_id", + full_name="google.cloud.bigquery.v2.LocationMetadata.legacy_location_id", + index=0, + number=1, + type=9, + cpp_type=9, + label=1, + has_default_value=False, + default_value=_b("").decode("utf-8"), + message_type=None, + enum_type=None, + containing_type=None, + is_extension=False, + extension_scope=None, + serialized_options=None, + file=DESCRIPTOR, + ) + ], + extensions=[], + nested_types=[], + enum_types=[], + serialized_options=None, + is_extendable=False, + syntax="proto3", + extension_ranges=[], + oneofs=[], + serialized_start=114, + serialized_end=160, +) + +DESCRIPTOR.message_types_by_name["LocationMetadata"] = _LOCATIONMETADATA +_sym_db.RegisterFileDescriptor(DESCRIPTOR) + +LocationMetadata = _reflection.GeneratedProtocolMessageType( + "LocationMetadata", + (_message.Message,), + dict( + DESCRIPTOR=_LOCATIONMETADATA, + __module__="google.cloud.bigquery_v2.proto.location_metadata_pb2", + __doc__="""BigQuery-specific metadata about a location. This will be set on + google.cloud.location.Location.metadata in Cloud Location API responses. + + + Attributes: + legacy_location_id: + The legacy BigQuery location ID, e.g. ``EU`` for the ``europe`` + location. This is for any API consumers that need the legacy + ``US`` and ``EU`` locations. + """, + # @@protoc_insertion_point(class_scope:google.cloud.bigquery.v2.LocationMetadata) + ), +) +_sym_db.RegisterMessage(LocationMetadata) + + +DESCRIPTOR._options = None +# @@protoc_insertion_point(module_scope) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery_v2/proto/location_metadata_pb2_grpc.py b/packages/google-cloud-bigquery/google/cloud/bigquery_v2/proto/location_metadata_pb2_grpc.py new file mode 100644 index 000000000000..07cb78fe03a9 --- /dev/null +++ b/packages/google-cloud-bigquery/google/cloud/bigquery_v2/proto/location_metadata_pb2_grpc.py @@ -0,0 +1,2 @@ +# Generated by the gRPC Python protocol compiler plugin. DO NOT EDIT! +import grpc diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery_v2/proto/model.proto b/packages/google-cloud-bigquery/google/cloud/bigquery_v2/proto/model.proto new file mode 100644 index 000000000000..b94cb3f5efd5 --- /dev/null +++ b/packages/google-cloud-bigquery/google/cloud/bigquery_v2/proto/model.proto @@ -0,0 +1,513 @@ +// Copyright 2019 Google LLC. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// + +syntax = "proto3"; + +package google.cloud.bigquery.v2; + +import "google/cloud/bigquery/v2/model_reference.proto"; +import "google/cloud/bigquery/v2/standard_sql.proto"; +import "google/protobuf/empty.proto"; +import "google/protobuf/timestamp.proto"; +import "google/protobuf/wrappers.proto"; +import "google/api/annotations.proto"; + +option go_package = "google.golang.org/genproto/googleapis/cloud/bigquery/v2;bigquery"; +option java_outer_classname = "ModelProto"; +option java_package = "com.google.cloud.bigquery.v2"; + + +service ModelService { + // Gets the specified model resource by model ID. + rpc GetModel(GetModelRequest) returns (Model) { + } + + // Lists all models in the specified dataset. Requires the READER dataset + // role. + rpc ListModels(ListModelsRequest) returns (ListModelsResponse) { + } + + // Patch specific fields in the specified model. + rpc PatchModel(PatchModelRequest) returns (Model) { + } + + // Deletes the model specified by modelId from the dataset. + rpc DeleteModel(DeleteModelRequest) returns (google.protobuf.Empty) { + } +} + +message Model { + // Evaluation metrics for regression models. + message RegressionMetrics { + // Mean absolute error. + google.protobuf.DoubleValue mean_absolute_error = 1; + + // Mean squared error. + google.protobuf.DoubleValue mean_squared_error = 2; + + // Mean squared log error. + google.protobuf.DoubleValue mean_squared_log_error = 3; + + // Median absolute error. + google.protobuf.DoubleValue median_absolute_error = 4; + + // R^2 score. + google.protobuf.DoubleValue r_squared = 5; + } + + // Aggregate metrics for classification models. For multi-class models, + // the metrics are either macro-averaged: metrics are calculated for each + // label and then an unweighted average is taken of those values or + // micro-averaged: the metric is calculated globally by counting the total + // number of correctly predicted rows. + message AggregateClassificationMetrics { + // Precision is the fraction of actual positive predictions that had + // positive actual labels. For multiclass this is a macro-averaged + // metric treating each class as a binary classifier. + google.protobuf.DoubleValue precision = 1; + + // Recall is the fraction of actual positive labels that were given a + // positive prediction. For multiclass this is a macro-averaged metric. + google.protobuf.DoubleValue recall = 2; + + // Accuracy is the fraction of predictions given the correct label. For + // multiclass this is a micro-averaged metric. + google.protobuf.DoubleValue accuracy = 3; + + // Threshold at which the metrics are computed. For binary + // classification models this is the positive class threshold. + // For multi-class classfication models this is the confidence + // threshold. + google.protobuf.DoubleValue threshold = 4; + + // The F1 score is an average of recall and precision. For multiclass + // this is a macro-averaged metric. + google.protobuf.DoubleValue f1_score = 5; + + // Logarithmic Loss. For multiclass this is a macro-averaged metric. + google.protobuf.DoubleValue log_loss = 6; + + // Area Under a ROC Curve. For multiclass this is a macro-averaged + // metric. + google.protobuf.DoubleValue roc_auc = 7; + } + + // Evaluation metrics for binary classification models. + message BinaryClassificationMetrics { + // Confusion matrix for binary classification models. + message BinaryConfusionMatrix { + // Threshold value used when computing each of the following metric. + google.protobuf.DoubleValue positive_class_threshold = 1; + + // Number of true samples predicted as true. + google.protobuf.Int64Value true_positives = 2; + + // Number of false samples predicted as true. + google.protobuf.Int64Value false_positives = 3; + + // Number of true samples predicted as false. + google.protobuf.Int64Value true_negatives = 4; + + // Number of false samples predicted as false. + google.protobuf.Int64Value false_negatives = 5; + + // Aggregate precision. + google.protobuf.DoubleValue precision = 6; + + // Aggregate recall. + google.protobuf.DoubleValue recall = 7; + } + + // Aggregate classification metrics. + AggregateClassificationMetrics aggregate_classification_metrics = 1; + + // Binary confusion matrix at multiple thresholds. + repeated BinaryConfusionMatrix binary_confusion_matrix_list = 2; + } + + // Evaluation metrics for multi-class classification models. + message MultiClassClassificationMetrics { + // Confusion matrix for multi-class classification models. + message ConfusionMatrix { + // A single entry in the confusion matrix. + message Entry { + // The predicted label. For confidence_threshold > 0, we will + // also add an entry indicating the number of items under the + // confidence threshold. + string predicted_label = 1; + + // Number of items being predicted as this label. + google.protobuf.Int64Value item_count = 2; + } + + // A single row in the confusion matrix. + message Row { + // The original label of this row. + string actual_label = 1; + + // Info describing predicted label distribution. + repeated Entry entries = 2; + } + + // Confidence threshold used when computing the entries of the + // confusion matrix. + google.protobuf.DoubleValue confidence_threshold = 1; + + // One row per actual label. + repeated Row rows = 2; + } + + // Aggregate classification metrics. + AggregateClassificationMetrics aggregate_classification_metrics = 1; + + // Confusion matrix at different thresholds. + repeated ConfusionMatrix confusion_matrix_list = 2; + } + + // Evaluation metrics for clustering models. + message ClusteringMetrics { + // Davies-Bouldin index. + google.protobuf.DoubleValue davies_bouldin_index = 1; + + // Mean of squared distances between each sample to its cluster centroid. + google.protobuf.DoubleValue mean_squared_distance = 2; + } + + // Evaluation metrics of a model. These are either computed on all + // training data or just the eval data based on whether eval data was used + // during training. + message EvaluationMetrics { + oneof metrics { + // Populated for regression models. + RegressionMetrics regression_metrics = 1; + + // Populated for binary classification models. + BinaryClassificationMetrics binary_classification_metrics = 2; + + // Populated for multi-class classification models. + MultiClassClassificationMetrics multi_class_classification_metrics = 3; + + // [Beta] Populated for clustering models. + ClusteringMetrics clustering_metrics = 4; + } + } + + // Information about a single training query run for the model. + message TrainingRun { + message TrainingOptions { + // The maximum number of iterations in training. + int64 max_iterations = 1; + + // Type of loss function used during training run. + LossType loss_type = 2; + + // Learning rate in training. + double learn_rate = 3; + + // L1 regularization coefficient. + google.protobuf.DoubleValue l1_regularization = 4; + + // L2 regularization coefficient. + google.protobuf.DoubleValue l2_regularization = 5; + + // When early_stop is true, stops training when accuracy improvement is + // less than 'min_relative_progress'. + google.protobuf.DoubleValue min_relative_progress = 6; + + // Whether to train a model from the last checkpoint. + google.protobuf.BoolValue warm_start = 7; + + // Whether to stop early when the loss doesn't improve significantly + // any more (compared to min_relative_progress). + google.protobuf.BoolValue early_stop = 8; + + // Name of input label columns in training data. + repeated string input_label_columns = 9; + + // The data split type for training and evaluation, e.g. RANDOM. + DataSplitMethod data_split_method = 10; + + // The fraction of evaluation data over the whole input data. The rest + // of data will be used as training data. The format should be double. + // Accurate to two decimal places. + // Default value is 0.2. + double data_split_eval_fraction = 11; + + // The column to split data with. This column won't be used as a + // feature. + // 1. When data_split_method is CUSTOM, the corresponding column should + // be boolean. The rows with true value tag are eval data, and the false + // are training data. + // 2. When data_split_method is SEQ, the first DATA_SPLIT_EVAL_FRACTION + // rows (from smallest to largest) in the corresponding column are used + // as training data, and the rest are eval data. It respects the order + // in Orderable data types: + // https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#data-type-properties + string data_split_column = 12; + + // The strategy to determine learning rate. + LearnRateStrategy learn_rate_strategy = 13; + + // Specifies the initial learning rate for line search to start at. + double initial_learn_rate = 16; + + // Weights associated with each label class, for rebalancing the + // training data. + map label_class_weights = 17; + + // [Beta] Distance type for clustering models. + DistanceType distance_type = 20; + + // [Beta] Number of clusters for clustering models. + int64 num_clusters = 21; + } + + // Information about a single iteration of the training run. + message IterationResult { + // Information about a single cluster for clustering model. + message ClusterInfo { + // Centroid id. + int64 centroid_id = 1; + + // Cluster radius, the average distance from centroid + // to each point assigned to the cluster. + google.protobuf.DoubleValue cluster_radius = 2; + + // Cluster size, the total number of points assigned to the cluster. + google.protobuf.Int64Value cluster_size = 3; + } + + // Index of the iteration, 0 based. + google.protobuf.Int32Value index = 1; + + // Time taken to run the iteration in milliseconds. + google.protobuf.Int64Value duration_ms = 4; + + // Loss computed on the training data at the end of iteration. + google.protobuf.DoubleValue training_loss = 5; + + // Loss computed on the eval data at the end of iteration. + google.protobuf.DoubleValue eval_loss = 6; + + // Learn rate used for this iteration. + double learn_rate = 7; + + // [Beta] Information about top clusters for clustering models. + repeated ClusterInfo cluster_infos = 8; + } + + // Options that were used for this training run, includes + // user specified and default options that were used. + TrainingOptions training_options = 1; + + // The start time of this training run. + google.protobuf.Timestamp start_time = 8; + + // Output of each iteration run, results.size() <= max_iterations. + repeated IterationResult results = 6; + + // The evaluation metrics over training/eval data that were computed at the + // end of training. + EvaluationMetrics evaluation_metrics = 7; + } + + // Indicates the type of the Model. + enum ModelType { + MODEL_TYPE_UNSPECIFIED = 0; + + // Linear regression model. + LINEAR_REGRESSION = 1; + + // Logistic regression model. + LOGISTIC_REGRESSION = 2; + + // [Beta] K-means clustering model. + KMEANS = 3; + } + + // Loss metric to evaluate model training performance. + enum LossType { + LOSS_TYPE_UNSPECIFIED = 0; + + // Mean squared loss, used for linear regression. + MEAN_SQUARED_LOSS = 1; + + // Mean log loss, used for logistic regression. + MEAN_LOG_LOSS = 2; + } + + // Distance metric used to compute the distance between two points. + enum DistanceType { + DISTANCE_TYPE_UNSPECIFIED = 0; + + // Eculidean distance. + EUCLIDEAN = 1; + + // Cosine distance. + COSINE = 2; + } + + // Indicates the method to split input data into multiple tables. + enum DataSplitMethod { + DATA_SPLIT_METHOD_UNSPECIFIED = 0; + + // Splits data randomly. + RANDOM = 1; + + // Splits data with the user provided tags. + CUSTOM = 2; + + // Splits data sequentially. + SEQUENTIAL = 3; + + // Data split will be skipped. + NO_SPLIT = 4; + + // Splits data automatically: Uses NO_SPLIT if the data size is small. + // Otherwise uses RANDOM. + AUTO_SPLIT = 5; + } + + // Indicates the learning rate optimization strategy to use. + enum LearnRateStrategy { + LEARN_RATE_STRATEGY_UNSPECIFIED = 0; + + // Use line search to determine learning rate. + LINE_SEARCH = 1; + + // Use a constant learning rate. + CONSTANT = 2; + } + + // Output only. A hash of this resource. + string etag = 1; + + // Required. Unique identifier for this model. + ModelReference model_reference = 2; + + // Output only. The time when this model was created, in millisecs since the + // epoch. + int64 creation_time = 5; + + // Output only. The time when this model was last modified, in millisecs + // since the epoch. + int64 last_modified_time = 6; + + // [Optional] A user-friendly description of this model. + // @mutable bigquery.models.patch + string description = 12; + + // [Optional] A descriptive name for this model. + // @mutable bigquery.models.patch + string friendly_name = 14; + + // [Optional] The labels associated with this model. You can use these to + // organize and group your models. Label keys and values can be no longer + // than 63 characters, can only contain lowercase letters, numeric + // characters, underscores and dashes. International characters are allowed. + // Label values are optional. Label keys must start with a letter and each + // label in the list must have a different key. + // @mutable bigquery.models.patch + map labels = 15; + + // [Optional] The time when this model expires, in milliseconds since the + // epoch. If not present, the model will persist indefinitely. Expired models + // will be deleted and their storage reclaimed. The defaultTableExpirationMs + // property of the encapsulating dataset can be used to set a default + // expirationTime on newly created models. + // @mutable bigquery.models.patch + int64 expiration_time = 16; + + // Output only. The geographic location where the model resides. This value + // is inherited from the dataset. + string location = 13; + + // Output only. Type of the model resource. + ModelType model_type = 7; + + // Output only. Information for all training runs in increasing order of + // start_time. + repeated TrainingRun training_runs = 9; + + // Output only. Input feature columns that were used to train this model. + repeated StandardSqlField feature_columns = 10; + + // Output only. Label columns that were used to train this model. + // The output of the model will have a “predicted_” prefix to these columns. + repeated StandardSqlField label_columns = 11; +} + +message GetModelRequest { + // Project ID of the requested model. + string project_id = 1; + + // Dataset ID of the requested model. + string dataset_id = 2; + + // Model ID of the requested model. + string model_id = 3; +} + +message PatchModelRequest { + // Project ID of the model to patch. + string project_id = 1; + + // Dataset ID of the model to patch. + string dataset_id = 2; + + // Model ID of the model to patch. + string model_id = 3; + + // Patched model. + // Follows patch semantics. Missing fields are not updated. To clear a field, + // explicitly set to default value. + Model model = 4; +} + +message DeleteModelRequest { + // Project ID of the model to delete. + string project_id = 1; + + // Dataset ID of the model to delete. + string dataset_id = 2; + + // Model ID of the model to delete. + string model_id = 3; +} + +message ListModelsRequest { + // Project ID of the models to list. + string project_id = 1; + + // Dataset ID of the models to list. + string dataset_id = 2; + + // The maximum number of results per page. + google.protobuf.UInt32Value max_results = 3; + + // Page token, returned by a previous call to request the next page of + // results + string page_token = 4; +} + +message ListModelsResponse { + // Models in the requested dataset. Only the following fields are populated: + // model_reference, model_type, creation_time, last_modified_time and + // labels. + repeated Model models = 1; + + // A token to request the next page of results. + string next_page_token = 2; +} diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery_v2/proto/model_pb2.py b/packages/google-cloud-bigquery/google/cloud/bigquery_v2/proto/model_pb2.py new file mode 100644 index 000000000000..afa3d8cf640d --- /dev/null +++ b/packages/google-cloud-bigquery/google/cloud/bigquery_v2/proto/model_pb2.py @@ -0,0 +1,3331 @@ +# Generated by the protocol buffer compiler. DO NOT EDIT! +# source: google/cloud/bigquery_v2/proto/model.proto + +import sys + +_b = sys.version_info[0] < 3 and (lambda x: x) or (lambda x: x.encode("latin1")) +from google.protobuf import descriptor as _descriptor +from google.protobuf import message as _message +from google.protobuf import reflection as _reflection +from google.protobuf import symbol_database as _symbol_database + +# @@protoc_insertion_point(imports) + +_sym_db = _symbol_database.Default() + + +from google.cloud.bigquery_v2.proto import ( + model_reference_pb2 as google_dot_cloud_dot_bigquery__v2_dot_proto_dot_model__reference__pb2, +) +from google.cloud.bigquery_v2.proto import ( + standard_sql_pb2 as google_dot_cloud_dot_bigquery__v2_dot_proto_dot_standard__sql__pb2, +) +from google.protobuf import empty_pb2 as google_dot_protobuf_dot_empty__pb2 +from google.protobuf import timestamp_pb2 as google_dot_protobuf_dot_timestamp__pb2 +from google.protobuf import wrappers_pb2 as google_dot_protobuf_dot_wrappers__pb2 +from google.api import annotations_pb2 as google_dot_api_dot_annotations__pb2 + + +DESCRIPTOR = _descriptor.FileDescriptor( + name="google/cloud/bigquery_v2/proto/model.proto", + package="google.cloud.bigquery.v2", + syntax="proto3", + serialized_options=_b( + "\n\034com.google.cloud.bigquery.v2B\nModelProtoZ@google.golang.org/genproto/googleapis/cloud/bigquery/v2;bigquery" + ), + serialized_pb=_b( + '\n*google/cloud/bigquery_v2/proto/model.proto\x12\x18google.cloud.bigquery.v2\x1a\x34google/cloud/bigquery_v2/proto/model_reference.proto\x1a\x31google/cloud/bigquery_v2/proto/standard_sql.proto\x1a\x1bgoogle/protobuf/empty.proto\x1a\x1fgoogle/protobuf/timestamp.proto\x1a\x1egoogle/protobuf/wrappers.proto\x1a\x1cgoogle/api/annotations.proto"\xcc)\n\x05Model\x12\x0c\n\x04\x65tag\x18\x01 \x01(\t\x12\x41\n\x0fmodel_reference\x18\x02 \x01(\x0b\x32(.google.cloud.bigquery.v2.ModelReference\x12\x15\n\rcreation_time\x18\x05 \x01(\x03\x12\x1a\n\x12last_modified_time\x18\x06 \x01(\x03\x12\x13\n\x0b\x64\x65scription\x18\x0c \x01(\t\x12\x15\n\rfriendly_name\x18\x0e \x01(\t\x12;\n\x06labels\x18\x0f \x03(\x0b\x32+.google.cloud.bigquery.v2.Model.LabelsEntry\x12\x17\n\x0f\x65xpiration_time\x18\x10 \x01(\x03\x12\x10\n\x08location\x18\r \x01(\t\x12=\n\nmodel_type\x18\x07 \x01(\x0e\x32).google.cloud.bigquery.v2.Model.ModelType\x12\x42\n\rtraining_runs\x18\t \x03(\x0b\x32+.google.cloud.bigquery.v2.Model.TrainingRun\x12\x43\n\x0f\x66\x65\x61ture_columns\x18\n \x03(\x0b\x32*.google.cloud.bigquery.v2.StandardSqlField\x12\x41\n\rlabel_columns\x18\x0b \x03(\x0b\x32*.google.cloud.bigquery.v2.StandardSqlField\x1a\xb4\x02\n\x11RegressionMetrics\x12\x39\n\x13mean_absolute_error\x18\x01 \x01(\x0b\x32\x1c.google.protobuf.DoubleValue\x12\x38\n\x12mean_squared_error\x18\x02 \x01(\x0b\x32\x1c.google.protobuf.DoubleValue\x12<\n\x16mean_squared_log_error\x18\x03 \x01(\x0b\x32\x1c.google.protobuf.DoubleValue\x12;\n\x15median_absolute_error\x18\x04 \x01(\x0b\x32\x1c.google.protobuf.DoubleValue\x12/\n\tr_squared\x18\x05 \x01(\x0b\x32\x1c.google.protobuf.DoubleValue\x1a\xef\x02\n\x1e\x41ggregateClassificationMetrics\x12/\n\tprecision\x18\x01 \x01(\x0b\x32\x1c.google.protobuf.DoubleValue\x12,\n\x06recall\x18\x02 \x01(\x0b\x32\x1c.google.protobuf.DoubleValue\x12.\n\x08\x61\x63\x63uracy\x18\x03 \x01(\x0b\x32\x1c.google.protobuf.DoubleValue\x12/\n\tthreshold\x18\x04 \x01(\x0b\x32\x1c.google.protobuf.DoubleValue\x12.\n\x08\x66\x31_score\x18\x05 \x01(\x0b\x32\x1c.google.protobuf.DoubleValue\x12.\n\x08log_loss\x18\x06 \x01(\x0b\x32\x1c.google.protobuf.DoubleValue\x12-\n\x07roc_auc\x18\x07 \x01(\x0b\x32\x1c.google.protobuf.DoubleValue\x1a\x8f\x05\n\x1b\x42inaryClassificationMetrics\x12h\n aggregate_classification_metrics\x18\x01 \x01(\x0b\x32>.google.cloud.bigquery.v2.Model.AggregateClassificationMetrics\x12w\n\x1c\x62inary_confusion_matrix_list\x18\x02 \x03(\x0b\x32Q.google.cloud.bigquery.v2.Model.BinaryClassificationMetrics.BinaryConfusionMatrix\x1a\x8c\x03\n\x15\x42inaryConfusionMatrix\x12>\n\x18positive_class_threshold\x18\x01 \x01(\x0b\x32\x1c.google.protobuf.DoubleValue\x12\x33\n\x0etrue_positives\x18\x02 \x01(\x0b\x32\x1b.google.protobuf.Int64Value\x12\x34\n\x0f\x66\x61lse_positives\x18\x03 \x01(\x0b\x32\x1b.google.protobuf.Int64Value\x12\x33\n\x0etrue_negatives\x18\x04 \x01(\x0b\x32\x1b.google.protobuf.Int64Value\x12\x34\n\x0f\x66\x61lse_negatives\x18\x05 \x01(\x0b\x32\x1b.google.protobuf.Int64Value\x12/\n\tprecision\x18\x06 \x01(\x0b\x32\x1c.google.protobuf.DoubleValue\x12,\n\x06recall\x18\x07 \x01(\x0b\x32\x1c.google.protobuf.DoubleValue\x1a\x87\x05\n\x1fMultiClassClassificationMetrics\x12h\n aggregate_classification_metrics\x18\x01 \x01(\x0b\x32>.google.cloud.bigquery.v2.Model.AggregateClassificationMetrics\x12n\n\x15\x63onfusion_matrix_list\x18\x02 \x03(\x0b\x32O.google.cloud.bigquery.v2.Model.MultiClassClassificationMetrics.ConfusionMatrix\x1a\x89\x03\n\x0f\x43onfusionMatrix\x12:\n\x14\x63onfidence_threshold\x18\x01 \x01(\x0b\x32\x1c.google.protobuf.DoubleValue\x12\x61\n\x04rows\x18\x02 \x03(\x0b\x32S.google.cloud.bigquery.v2.Model.MultiClassClassificationMetrics.ConfusionMatrix.Row\x1aQ\n\x05\x45ntry\x12\x17\n\x0fpredicted_label\x18\x01 \x01(\t\x12/\n\nitem_count\x18\x02 \x01(\x0b\x32\x1b.google.protobuf.Int64Value\x1a\x83\x01\n\x03Row\x12\x14\n\x0c\x61\x63tual_label\x18\x01 \x01(\t\x12\x66\n\x07\x65ntries\x18\x02 \x03(\x0b\x32U.google.cloud.bigquery.v2.Model.MultiClassClassificationMetrics.ConfusionMatrix.Entry\x1a\x8c\x01\n\x11\x43lusteringMetrics\x12:\n\x14\x64\x61vies_bouldin_index\x18\x01 \x01(\x0b\x32\x1c.google.protobuf.DoubleValue\x12;\n\x15mean_squared_distance\x18\x02 \x01(\x0b\x32\x1c.google.protobuf.DoubleValue\x1a\x95\x03\n\x11\x45valuationMetrics\x12O\n\x12regression_metrics\x18\x01 \x01(\x0b\x32\x31.google.cloud.bigquery.v2.Model.RegressionMetricsH\x00\x12\x64\n\x1d\x62inary_classification_metrics\x18\x02 \x01(\x0b\x32;.google.cloud.bigquery.v2.Model.BinaryClassificationMetricsH\x00\x12m\n"multi_class_classification_metrics\x18\x03 \x01(\x0b\x32?.google.cloud.bigquery.v2.Model.MultiClassClassificationMetricsH\x00\x12O\n\x12\x63lustering_metrics\x18\x04 \x01(\x0b\x32\x31.google.cloud.bigquery.v2.Model.ClusteringMetricsH\x00\x42\t\n\x07metrics\x1a\xaf\r\n\x0bTrainingRun\x12U\n\x10training_options\x18\x01 \x01(\x0b\x32;.google.cloud.bigquery.v2.Model.TrainingRun.TrainingOptions\x12.\n\nstart_time\x18\x08 \x01(\x0b\x32\x1a.google.protobuf.Timestamp\x12L\n\x07results\x18\x06 \x03(\x0b\x32;.google.cloud.bigquery.v2.Model.TrainingRun.IterationResult\x12M\n\x12\x65valuation_metrics\x18\x07 \x01(\x0b\x32\x31.google.cloud.bigquery.v2.Model.EvaluationMetrics\x1a\xa1\x07\n\x0fTrainingOptions\x12\x16\n\x0emax_iterations\x18\x01 \x01(\x03\x12;\n\tloss_type\x18\x02 \x01(\x0e\x32(.google.cloud.bigquery.v2.Model.LossType\x12\x12\n\nlearn_rate\x18\x03 \x01(\x01\x12\x37\n\x11l1_regularization\x18\x04 \x01(\x0b\x32\x1c.google.protobuf.DoubleValue\x12\x37\n\x11l2_regularization\x18\x05 \x01(\x0b\x32\x1c.google.protobuf.DoubleValue\x12;\n\x15min_relative_progress\x18\x06 \x01(\x0b\x32\x1c.google.protobuf.DoubleValue\x12.\n\nwarm_start\x18\x07 \x01(\x0b\x32\x1a.google.protobuf.BoolValue\x12.\n\nearly_stop\x18\x08 \x01(\x0b\x32\x1a.google.protobuf.BoolValue\x12\x1b\n\x13input_label_columns\x18\t \x03(\t\x12J\n\x11\x64\x61ta_split_method\x18\n \x01(\x0e\x32/.google.cloud.bigquery.v2.Model.DataSplitMethod\x12 \n\x18\x64\x61ta_split_eval_fraction\x18\x0b \x01(\x01\x12\x19\n\x11\x64\x61ta_split_column\x18\x0c \x01(\t\x12N\n\x13learn_rate_strategy\x18\r \x01(\x0e\x32\x31.google.cloud.bigquery.v2.Model.LearnRateStrategy\x12\x1a\n\x12initial_learn_rate\x18\x10 \x01(\x01\x12o\n\x13label_class_weights\x18\x11 \x03(\x0b\x32R.google.cloud.bigquery.v2.Model.TrainingRun.TrainingOptions.LabelClassWeightsEntry\x12\x43\n\rdistance_type\x18\x14 \x01(\x0e\x32,.google.cloud.bigquery.v2.Model.DistanceType\x12\x14\n\x0cnum_clusters\x18\x15 \x01(\x03\x1a\x38\n\x16LabelClassWeightsEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\x01:\x02\x38\x01\x1a\xd7\x03\n\x0fIterationResult\x12*\n\x05index\x18\x01 \x01(\x0b\x32\x1b.google.protobuf.Int32Value\x12\x30\n\x0b\x64uration_ms\x18\x04 \x01(\x0b\x32\x1b.google.protobuf.Int64Value\x12\x33\n\rtraining_loss\x18\x05 \x01(\x0b\x32\x1c.google.protobuf.DoubleValue\x12/\n\teval_loss\x18\x06 \x01(\x0b\x32\x1c.google.protobuf.DoubleValue\x12\x12\n\nlearn_rate\x18\x07 \x01(\x01\x12^\n\rcluster_infos\x18\x08 \x03(\x0b\x32G.google.cloud.bigquery.v2.Model.TrainingRun.IterationResult.ClusterInfo\x1a\x8b\x01\n\x0b\x43lusterInfo\x12\x13\n\x0b\x63\x65ntroid_id\x18\x01 \x01(\x03\x12\x34\n\x0e\x63luster_radius\x18\x02 \x01(\x0b\x32\x1c.google.protobuf.DoubleValue\x12\x31\n\x0c\x63luster_size\x18\x03 \x01(\x0b\x32\x1b.google.protobuf.Int64Value\x1a-\n\x0bLabelsEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\t:\x02\x38\x01"c\n\tModelType\x12\x1a\n\x16MODEL_TYPE_UNSPECIFIED\x10\x00\x12\x15\n\x11LINEAR_REGRESSION\x10\x01\x12\x17\n\x13LOGISTIC_REGRESSION\x10\x02\x12\n\n\x06KMEANS\x10\x03"O\n\x08LossType\x12\x19\n\x15LOSS_TYPE_UNSPECIFIED\x10\x00\x12\x15\n\x11MEAN_SQUARED_LOSS\x10\x01\x12\x11\n\rMEAN_LOG_LOSS\x10\x02"H\n\x0c\x44istanceType\x12\x1d\n\x19\x44ISTANCE_TYPE_UNSPECIFIED\x10\x00\x12\r\n\tEUCLIDEAN\x10\x01\x12\n\n\x06\x43OSINE\x10\x02"z\n\x0f\x44\x61taSplitMethod\x12!\n\x1d\x44\x41TA_SPLIT_METHOD_UNSPECIFIED\x10\x00\x12\n\n\x06RANDOM\x10\x01\x12\n\n\x06\x43USTOM\x10\x02\x12\x0e\n\nSEQUENTIAL\x10\x03\x12\x0c\n\x08NO_SPLIT\x10\x04\x12\x0e\n\nAUTO_SPLIT\x10\x05"W\n\x11LearnRateStrategy\x12#\n\x1fLEARN_RATE_STRATEGY_UNSPECIFIED\x10\x00\x12\x0f\n\x0bLINE_SEARCH\x10\x01\x12\x0c\n\x08\x43ONSTANT\x10\x02"K\n\x0fGetModelRequest\x12\x12\n\nproject_id\x18\x01 \x01(\t\x12\x12\n\ndataset_id\x18\x02 \x01(\t\x12\x10\n\x08model_id\x18\x03 \x01(\t"}\n\x11PatchModelRequest\x12\x12\n\nproject_id\x18\x01 \x01(\t\x12\x12\n\ndataset_id\x18\x02 \x01(\t\x12\x10\n\x08model_id\x18\x03 \x01(\t\x12.\n\x05model\x18\x04 \x01(\x0b\x32\x1f.google.cloud.bigquery.v2.Model"N\n\x12\x44\x65leteModelRequest\x12\x12\n\nproject_id\x18\x01 \x01(\t\x12\x12\n\ndataset_id\x18\x02 \x01(\t\x12\x10\n\x08model_id\x18\x03 \x01(\t"\x82\x01\n\x11ListModelsRequest\x12\x12\n\nproject_id\x18\x01 \x01(\t\x12\x12\n\ndataset_id\x18\x02 \x01(\t\x12\x31\n\x0bmax_results\x18\x03 \x01(\x0b\x32\x1c.google.protobuf.UInt32Value\x12\x12\n\npage_token\x18\x04 \x01(\t"^\n\x12ListModelsResponse\x12/\n\x06models\x18\x01 \x03(\x0b\x32\x1f.google.cloud.bigquery.v2.Model\x12\x17\n\x0fnext_page_token\x18\x02 \x01(\t2\x88\x03\n\x0cModelService\x12X\n\x08GetModel\x12).google.cloud.bigquery.v2.GetModelRequest\x1a\x1f.google.cloud.bigquery.v2.Model"\x00\x12i\n\nListModels\x12+.google.cloud.bigquery.v2.ListModelsRequest\x1a,.google.cloud.bigquery.v2.ListModelsResponse"\x00\x12\\\n\nPatchModel\x12+.google.cloud.bigquery.v2.PatchModelRequest\x1a\x1f.google.cloud.bigquery.v2.Model"\x00\x12U\n\x0b\x44\x65leteModel\x12,.google.cloud.bigquery.v2.DeleteModelRequest\x1a\x16.google.protobuf.Empty"\x00\x42l\n\x1c\x63om.google.cloud.bigquery.v2B\nModelProtoZ@google.golang.org/genproto/googleapis/cloud/bigquery/v2;bigqueryb\x06proto3' + ), + dependencies=[ + google_dot_cloud_dot_bigquery__v2_dot_proto_dot_model__reference__pb2.DESCRIPTOR, + google_dot_cloud_dot_bigquery__v2_dot_proto_dot_standard__sql__pb2.DESCRIPTOR, + google_dot_protobuf_dot_empty__pb2.DESCRIPTOR, + google_dot_protobuf_dot_timestamp__pb2.DESCRIPTOR, + google_dot_protobuf_dot_wrappers__pb2.DESCRIPTOR, + google_dot_api_dot_annotations__pb2.DESCRIPTOR, + ], +) + + +_MODEL_MODELTYPE = _descriptor.EnumDescriptor( + name="ModelType", + full_name="google.cloud.bigquery.v2.Model.ModelType", + filename=None, + file=DESCRIPTOR, + values=[ + _descriptor.EnumValueDescriptor( + name="MODEL_TYPE_UNSPECIFIED", + index=0, + number=0, + serialized_options=None, + type=None, + ), + _descriptor.EnumValueDescriptor( + name="LINEAR_REGRESSION", + index=1, + number=1, + serialized_options=None, + type=None, + ), + _descriptor.EnumValueDescriptor( + name="LOGISTIC_REGRESSION", + index=2, + number=2, + serialized_options=None, + type=None, + ), + _descriptor.EnumValueDescriptor( + name="KMEANS", index=3, number=3, serialized_options=None, type=None + ), + ], + containing_type=None, + serialized_options=None, + serialized_start=5159, + serialized_end=5258, +) +_sym_db.RegisterEnumDescriptor(_MODEL_MODELTYPE) + +_MODEL_LOSSTYPE = _descriptor.EnumDescriptor( + name="LossType", + full_name="google.cloud.bigquery.v2.Model.LossType", + filename=None, + file=DESCRIPTOR, + values=[ + _descriptor.EnumValueDescriptor( + name="LOSS_TYPE_UNSPECIFIED", + index=0, + number=0, + serialized_options=None, + type=None, + ), + _descriptor.EnumValueDescriptor( + name="MEAN_SQUARED_LOSS", + index=1, + number=1, + serialized_options=None, + type=None, + ), + _descriptor.EnumValueDescriptor( + name="MEAN_LOG_LOSS", index=2, number=2, serialized_options=None, type=None + ), + ], + containing_type=None, + serialized_options=None, + serialized_start=5260, + serialized_end=5339, +) +_sym_db.RegisterEnumDescriptor(_MODEL_LOSSTYPE) + +_MODEL_DISTANCETYPE = _descriptor.EnumDescriptor( + name="DistanceType", + full_name="google.cloud.bigquery.v2.Model.DistanceType", + filename=None, + file=DESCRIPTOR, + values=[ + _descriptor.EnumValueDescriptor( + name="DISTANCE_TYPE_UNSPECIFIED", + index=0, + number=0, + serialized_options=None, + type=None, + ), + _descriptor.EnumValueDescriptor( + name="EUCLIDEAN", index=1, number=1, serialized_options=None, type=None + ), + _descriptor.EnumValueDescriptor( + name="COSINE", index=2, number=2, serialized_options=None, type=None + ), + ], + containing_type=None, + serialized_options=None, + serialized_start=5341, + serialized_end=5413, +) +_sym_db.RegisterEnumDescriptor(_MODEL_DISTANCETYPE) + +_MODEL_DATASPLITMETHOD = _descriptor.EnumDescriptor( + name="DataSplitMethod", + full_name="google.cloud.bigquery.v2.Model.DataSplitMethod", + filename=None, + file=DESCRIPTOR, + values=[ + _descriptor.EnumValueDescriptor( + name="DATA_SPLIT_METHOD_UNSPECIFIED", + index=0, + number=0, + serialized_options=None, + type=None, + ), + _descriptor.EnumValueDescriptor( + name="RANDOM", index=1, number=1, serialized_options=None, type=None + ), + _descriptor.EnumValueDescriptor( + name="CUSTOM", index=2, number=2, serialized_options=None, type=None + ), + _descriptor.EnumValueDescriptor( + name="SEQUENTIAL", index=3, number=3, serialized_options=None, type=None + ), + _descriptor.EnumValueDescriptor( + name="NO_SPLIT", index=4, number=4, serialized_options=None, type=None + ), + _descriptor.EnumValueDescriptor( + name="AUTO_SPLIT", index=5, number=5, serialized_options=None, type=None + ), + ], + containing_type=None, + serialized_options=None, + serialized_start=5415, + serialized_end=5537, +) +_sym_db.RegisterEnumDescriptor(_MODEL_DATASPLITMETHOD) + +_MODEL_LEARNRATESTRATEGY = _descriptor.EnumDescriptor( + name="LearnRateStrategy", + full_name="google.cloud.bigquery.v2.Model.LearnRateStrategy", + filename=None, + file=DESCRIPTOR, + values=[ + _descriptor.EnumValueDescriptor( + name="LEARN_RATE_STRATEGY_UNSPECIFIED", + index=0, + number=0, + serialized_options=None, + type=None, + ), + _descriptor.EnumValueDescriptor( + name="LINE_SEARCH", index=1, number=1, serialized_options=None, type=None + ), + _descriptor.EnumValueDescriptor( + name="CONSTANT", index=2, number=2, serialized_options=None, type=None + ), + ], + containing_type=None, + serialized_options=None, + serialized_start=5539, + serialized_end=5626, +) +_sym_db.RegisterEnumDescriptor(_MODEL_LEARNRATESTRATEGY) + + +_MODEL_REGRESSIONMETRICS = _descriptor.Descriptor( + name="RegressionMetrics", + full_name="google.cloud.bigquery.v2.Model.RegressionMetrics", + filename=None, + file=DESCRIPTOR, + containing_type=None, + fields=[ + _descriptor.FieldDescriptor( + name="mean_absolute_error", + full_name="google.cloud.bigquery.v2.Model.RegressionMetrics.mean_absolute_error", + index=0, + number=1, + type=11, + cpp_type=10, + label=1, + has_default_value=False, + default_value=None, + message_type=None, + enum_type=None, + containing_type=None, + is_extension=False, + extension_scope=None, + serialized_options=None, + file=DESCRIPTOR, + ), + _descriptor.FieldDescriptor( + name="mean_squared_error", + full_name="google.cloud.bigquery.v2.Model.RegressionMetrics.mean_squared_error", + index=1, + number=2, + type=11, + cpp_type=10, + label=1, + has_default_value=False, + default_value=None, + message_type=None, + enum_type=None, + containing_type=None, + is_extension=False, + extension_scope=None, + serialized_options=None, + file=DESCRIPTOR, + ), + _descriptor.FieldDescriptor( + name="mean_squared_log_error", + full_name="google.cloud.bigquery.v2.Model.RegressionMetrics.mean_squared_log_error", + index=2, + number=3, + type=11, + cpp_type=10, + label=1, + has_default_value=False, + default_value=None, + message_type=None, + enum_type=None, + containing_type=None, + is_extension=False, + extension_scope=None, + serialized_options=None, + file=DESCRIPTOR, + ), + _descriptor.FieldDescriptor( + name="median_absolute_error", + full_name="google.cloud.bigquery.v2.Model.RegressionMetrics.median_absolute_error", + index=3, + number=4, + type=11, + cpp_type=10, + label=1, + has_default_value=False, + default_value=None, + message_type=None, + enum_type=None, + containing_type=None, + is_extension=False, + extension_scope=None, + serialized_options=None, + file=DESCRIPTOR, + ), + _descriptor.FieldDescriptor( + name="r_squared", + full_name="google.cloud.bigquery.v2.Model.RegressionMetrics.r_squared", + index=4, + number=5, + type=11, + cpp_type=10, + label=1, + has_default_value=False, + default_value=None, + message_type=None, + enum_type=None, + containing_type=None, + is_extension=False, + extension_scope=None, + serialized_options=None, + file=DESCRIPTOR, + ), + ], + extensions=[], + nested_types=[], + enum_types=[], + serialized_options=None, + is_extendable=False, + syntax="proto3", + extension_ranges=[], + oneofs=[], + serialized_start=859, + serialized_end=1167, +) + +_MODEL_AGGREGATECLASSIFICATIONMETRICS = _descriptor.Descriptor( + name="AggregateClassificationMetrics", + full_name="google.cloud.bigquery.v2.Model.AggregateClassificationMetrics", + filename=None, + file=DESCRIPTOR, + containing_type=None, + fields=[ + _descriptor.FieldDescriptor( + name="precision", + full_name="google.cloud.bigquery.v2.Model.AggregateClassificationMetrics.precision", + index=0, + number=1, + type=11, + cpp_type=10, + label=1, + has_default_value=False, + default_value=None, + message_type=None, + enum_type=None, + containing_type=None, + is_extension=False, + extension_scope=None, + serialized_options=None, + file=DESCRIPTOR, + ), + _descriptor.FieldDescriptor( + name="recall", + full_name="google.cloud.bigquery.v2.Model.AggregateClassificationMetrics.recall", + index=1, + number=2, + type=11, + cpp_type=10, + label=1, + has_default_value=False, + default_value=None, + message_type=None, + enum_type=None, + containing_type=None, + is_extension=False, + extension_scope=None, + serialized_options=None, + file=DESCRIPTOR, + ), + _descriptor.FieldDescriptor( + name="accuracy", + full_name="google.cloud.bigquery.v2.Model.AggregateClassificationMetrics.accuracy", + index=2, + number=3, + type=11, + cpp_type=10, + label=1, + has_default_value=False, + default_value=None, + message_type=None, + enum_type=None, + containing_type=None, + is_extension=False, + extension_scope=None, + serialized_options=None, + file=DESCRIPTOR, + ), + _descriptor.FieldDescriptor( + name="threshold", + full_name="google.cloud.bigquery.v2.Model.AggregateClassificationMetrics.threshold", + index=3, + number=4, + type=11, + cpp_type=10, + label=1, + has_default_value=False, + default_value=None, + message_type=None, + enum_type=None, + containing_type=None, + is_extension=False, + extension_scope=None, + serialized_options=None, + file=DESCRIPTOR, + ), + _descriptor.FieldDescriptor( + name="f1_score", + full_name="google.cloud.bigquery.v2.Model.AggregateClassificationMetrics.f1_score", + index=4, + number=5, + type=11, + cpp_type=10, + label=1, + has_default_value=False, + default_value=None, + message_type=None, + enum_type=None, + containing_type=None, + is_extension=False, + extension_scope=None, + serialized_options=None, + file=DESCRIPTOR, + ), + _descriptor.FieldDescriptor( + name="log_loss", + full_name="google.cloud.bigquery.v2.Model.AggregateClassificationMetrics.log_loss", + index=5, + number=6, + type=11, + cpp_type=10, + label=1, + has_default_value=False, + default_value=None, + message_type=None, + enum_type=None, + containing_type=None, + is_extension=False, + extension_scope=None, + serialized_options=None, + file=DESCRIPTOR, + ), + _descriptor.FieldDescriptor( + name="roc_auc", + full_name="google.cloud.bigquery.v2.Model.AggregateClassificationMetrics.roc_auc", + index=6, + number=7, + type=11, + cpp_type=10, + label=1, + has_default_value=False, + default_value=None, + message_type=None, + enum_type=None, + containing_type=None, + is_extension=False, + extension_scope=None, + serialized_options=None, + file=DESCRIPTOR, + ), + ], + extensions=[], + nested_types=[], + enum_types=[], + serialized_options=None, + is_extendable=False, + syntax="proto3", + extension_ranges=[], + oneofs=[], + serialized_start=1170, + serialized_end=1537, +) + +_MODEL_BINARYCLASSIFICATIONMETRICS_BINARYCONFUSIONMATRIX = _descriptor.Descriptor( + name="BinaryConfusionMatrix", + full_name="google.cloud.bigquery.v2.Model.BinaryClassificationMetrics.BinaryConfusionMatrix", + filename=None, + file=DESCRIPTOR, + containing_type=None, + fields=[ + _descriptor.FieldDescriptor( + name="positive_class_threshold", + full_name="google.cloud.bigquery.v2.Model.BinaryClassificationMetrics.BinaryConfusionMatrix.positive_class_threshold", + index=0, + number=1, + type=11, + cpp_type=10, + label=1, + has_default_value=False, + default_value=None, + message_type=None, + enum_type=None, + containing_type=None, + is_extension=False, + extension_scope=None, + serialized_options=None, + file=DESCRIPTOR, + ), + _descriptor.FieldDescriptor( + name="true_positives", + full_name="google.cloud.bigquery.v2.Model.BinaryClassificationMetrics.BinaryConfusionMatrix.true_positives", + index=1, + number=2, + type=11, + cpp_type=10, + label=1, + has_default_value=False, + default_value=None, + message_type=None, + enum_type=None, + containing_type=None, + is_extension=False, + extension_scope=None, + serialized_options=None, + file=DESCRIPTOR, + ), + _descriptor.FieldDescriptor( + name="false_positives", + full_name="google.cloud.bigquery.v2.Model.BinaryClassificationMetrics.BinaryConfusionMatrix.false_positives", + index=2, + number=3, + type=11, + cpp_type=10, + label=1, + has_default_value=False, + default_value=None, + message_type=None, + enum_type=None, + containing_type=None, + is_extension=False, + extension_scope=None, + serialized_options=None, + file=DESCRIPTOR, + ), + _descriptor.FieldDescriptor( + name="true_negatives", + full_name="google.cloud.bigquery.v2.Model.BinaryClassificationMetrics.BinaryConfusionMatrix.true_negatives", + index=3, + number=4, + type=11, + cpp_type=10, + label=1, + has_default_value=False, + default_value=None, + message_type=None, + enum_type=None, + containing_type=None, + is_extension=False, + extension_scope=None, + serialized_options=None, + file=DESCRIPTOR, + ), + _descriptor.FieldDescriptor( + name="false_negatives", + full_name="google.cloud.bigquery.v2.Model.BinaryClassificationMetrics.BinaryConfusionMatrix.false_negatives", + index=4, + number=5, + type=11, + cpp_type=10, + label=1, + has_default_value=False, + default_value=None, + message_type=None, + enum_type=None, + containing_type=None, + is_extension=False, + extension_scope=None, + serialized_options=None, + file=DESCRIPTOR, + ), + _descriptor.FieldDescriptor( + name="precision", + full_name="google.cloud.bigquery.v2.Model.BinaryClassificationMetrics.BinaryConfusionMatrix.precision", + index=5, + number=6, + type=11, + cpp_type=10, + label=1, + has_default_value=False, + default_value=None, + message_type=None, + enum_type=None, + containing_type=None, + is_extension=False, + extension_scope=None, + serialized_options=None, + file=DESCRIPTOR, + ), + _descriptor.FieldDescriptor( + name="recall", + full_name="google.cloud.bigquery.v2.Model.BinaryClassificationMetrics.BinaryConfusionMatrix.recall", + index=6, + number=7, + type=11, + cpp_type=10, + label=1, + has_default_value=False, + default_value=None, + message_type=None, + enum_type=None, + containing_type=None, + is_extension=False, + extension_scope=None, + serialized_options=None, + file=DESCRIPTOR, + ), + ], + extensions=[], + nested_types=[], + enum_types=[], + serialized_options=None, + is_extendable=False, + syntax="proto3", + extension_ranges=[], + oneofs=[], + serialized_start=1799, + serialized_end=2195, +) + +_MODEL_BINARYCLASSIFICATIONMETRICS = _descriptor.Descriptor( + name="BinaryClassificationMetrics", + full_name="google.cloud.bigquery.v2.Model.BinaryClassificationMetrics", + filename=None, + file=DESCRIPTOR, + containing_type=None, + fields=[ + _descriptor.FieldDescriptor( + name="aggregate_classification_metrics", + full_name="google.cloud.bigquery.v2.Model.BinaryClassificationMetrics.aggregate_classification_metrics", + index=0, + number=1, + type=11, + cpp_type=10, + label=1, + has_default_value=False, + default_value=None, + message_type=None, + enum_type=None, + containing_type=None, + is_extension=False, + extension_scope=None, + serialized_options=None, + file=DESCRIPTOR, + ), + _descriptor.FieldDescriptor( + name="binary_confusion_matrix_list", + full_name="google.cloud.bigquery.v2.Model.BinaryClassificationMetrics.binary_confusion_matrix_list", + index=1, + number=2, + type=11, + cpp_type=10, + label=3, + has_default_value=False, + default_value=[], + message_type=None, + enum_type=None, + containing_type=None, + is_extension=False, + extension_scope=None, + serialized_options=None, + file=DESCRIPTOR, + ), + ], + extensions=[], + nested_types=[_MODEL_BINARYCLASSIFICATIONMETRICS_BINARYCONFUSIONMATRIX], + enum_types=[], + serialized_options=None, + is_extendable=False, + syntax="proto3", + extension_ranges=[], + oneofs=[], + serialized_start=1540, + serialized_end=2195, +) + +_MODEL_MULTICLASSCLASSIFICATIONMETRICS_CONFUSIONMATRIX_ENTRY = _descriptor.Descriptor( + name="Entry", + full_name="google.cloud.bigquery.v2.Model.MultiClassClassificationMetrics.ConfusionMatrix.Entry", + filename=None, + file=DESCRIPTOR, + containing_type=None, + fields=[ + _descriptor.FieldDescriptor( + name="predicted_label", + full_name="google.cloud.bigquery.v2.Model.MultiClassClassificationMetrics.ConfusionMatrix.Entry.predicted_label", + index=0, + number=1, + type=9, + cpp_type=9, + label=1, + has_default_value=False, + default_value=_b("").decode("utf-8"), + message_type=None, + enum_type=None, + containing_type=None, + is_extension=False, + extension_scope=None, + serialized_options=None, + file=DESCRIPTOR, + ), + _descriptor.FieldDescriptor( + name="item_count", + full_name="google.cloud.bigquery.v2.Model.MultiClassClassificationMetrics.ConfusionMatrix.Entry.item_count", + index=1, + number=2, + type=11, + cpp_type=10, + label=1, + has_default_value=False, + default_value=None, + message_type=None, + enum_type=None, + containing_type=None, + is_extension=False, + extension_scope=None, + serialized_options=None, + file=DESCRIPTOR, + ), + ], + extensions=[], + nested_types=[], + enum_types=[], + serialized_options=None, + is_extendable=False, + syntax="proto3", + extension_ranges=[], + oneofs=[], + serialized_start=2630, + serialized_end=2711, +) + +_MODEL_MULTICLASSCLASSIFICATIONMETRICS_CONFUSIONMATRIX_ROW = _descriptor.Descriptor( + name="Row", + full_name="google.cloud.bigquery.v2.Model.MultiClassClassificationMetrics.ConfusionMatrix.Row", + filename=None, + file=DESCRIPTOR, + containing_type=None, + fields=[ + _descriptor.FieldDescriptor( + name="actual_label", + full_name="google.cloud.bigquery.v2.Model.MultiClassClassificationMetrics.ConfusionMatrix.Row.actual_label", + index=0, + number=1, + type=9, + cpp_type=9, + label=1, + has_default_value=False, + default_value=_b("").decode("utf-8"), + message_type=None, + enum_type=None, + containing_type=None, + is_extension=False, + extension_scope=None, + serialized_options=None, + file=DESCRIPTOR, + ), + _descriptor.FieldDescriptor( + name="entries", + full_name="google.cloud.bigquery.v2.Model.MultiClassClassificationMetrics.ConfusionMatrix.Row.entries", + index=1, + number=2, + type=11, + cpp_type=10, + label=3, + has_default_value=False, + default_value=[], + message_type=None, + enum_type=None, + containing_type=None, + is_extension=False, + extension_scope=None, + serialized_options=None, + file=DESCRIPTOR, + ), + ], + extensions=[], + nested_types=[], + enum_types=[], + serialized_options=None, + is_extendable=False, + syntax="proto3", + extension_ranges=[], + oneofs=[], + serialized_start=2714, + serialized_end=2845, +) + +_MODEL_MULTICLASSCLASSIFICATIONMETRICS_CONFUSIONMATRIX = _descriptor.Descriptor( + name="ConfusionMatrix", + full_name="google.cloud.bigquery.v2.Model.MultiClassClassificationMetrics.ConfusionMatrix", + filename=None, + file=DESCRIPTOR, + containing_type=None, + fields=[ + _descriptor.FieldDescriptor( + name="confidence_threshold", + full_name="google.cloud.bigquery.v2.Model.MultiClassClassificationMetrics.ConfusionMatrix.confidence_threshold", + index=0, + number=1, + type=11, + cpp_type=10, + label=1, + has_default_value=False, + default_value=None, + message_type=None, + enum_type=None, + containing_type=None, + is_extension=False, + extension_scope=None, + serialized_options=None, + file=DESCRIPTOR, + ), + _descriptor.FieldDescriptor( + name="rows", + full_name="google.cloud.bigquery.v2.Model.MultiClassClassificationMetrics.ConfusionMatrix.rows", + index=1, + number=2, + type=11, + cpp_type=10, + label=3, + has_default_value=False, + default_value=[], + message_type=None, + enum_type=None, + containing_type=None, + is_extension=False, + extension_scope=None, + serialized_options=None, + file=DESCRIPTOR, + ), + ], + extensions=[], + nested_types=[ + _MODEL_MULTICLASSCLASSIFICATIONMETRICS_CONFUSIONMATRIX_ENTRY, + _MODEL_MULTICLASSCLASSIFICATIONMETRICS_CONFUSIONMATRIX_ROW, + ], + enum_types=[], + serialized_options=None, + is_extendable=False, + syntax="proto3", + extension_ranges=[], + oneofs=[], + serialized_start=2452, + serialized_end=2845, +) + +_MODEL_MULTICLASSCLASSIFICATIONMETRICS = _descriptor.Descriptor( + name="MultiClassClassificationMetrics", + full_name="google.cloud.bigquery.v2.Model.MultiClassClassificationMetrics", + filename=None, + file=DESCRIPTOR, + containing_type=None, + fields=[ + _descriptor.FieldDescriptor( + name="aggregate_classification_metrics", + full_name="google.cloud.bigquery.v2.Model.MultiClassClassificationMetrics.aggregate_classification_metrics", + index=0, + number=1, + type=11, + cpp_type=10, + label=1, + has_default_value=False, + default_value=None, + message_type=None, + enum_type=None, + containing_type=None, + is_extension=False, + extension_scope=None, + serialized_options=None, + file=DESCRIPTOR, + ), + _descriptor.FieldDescriptor( + name="confusion_matrix_list", + full_name="google.cloud.bigquery.v2.Model.MultiClassClassificationMetrics.confusion_matrix_list", + index=1, + number=2, + type=11, + cpp_type=10, + label=3, + has_default_value=False, + default_value=[], + message_type=None, + enum_type=None, + containing_type=None, + is_extension=False, + extension_scope=None, + serialized_options=None, + file=DESCRIPTOR, + ), + ], + extensions=[], + nested_types=[_MODEL_MULTICLASSCLASSIFICATIONMETRICS_CONFUSIONMATRIX], + enum_types=[], + serialized_options=None, + is_extendable=False, + syntax="proto3", + extension_ranges=[], + oneofs=[], + serialized_start=2198, + serialized_end=2845, +) + +_MODEL_CLUSTERINGMETRICS = _descriptor.Descriptor( + name="ClusteringMetrics", + full_name="google.cloud.bigquery.v2.Model.ClusteringMetrics", + filename=None, + file=DESCRIPTOR, + containing_type=None, + fields=[ + _descriptor.FieldDescriptor( + name="davies_bouldin_index", + full_name="google.cloud.bigquery.v2.Model.ClusteringMetrics.davies_bouldin_index", + index=0, + number=1, + type=11, + cpp_type=10, + label=1, + has_default_value=False, + default_value=None, + message_type=None, + enum_type=None, + containing_type=None, + is_extension=False, + extension_scope=None, + serialized_options=None, + file=DESCRIPTOR, + ), + _descriptor.FieldDescriptor( + name="mean_squared_distance", + full_name="google.cloud.bigquery.v2.Model.ClusteringMetrics.mean_squared_distance", + index=1, + number=2, + type=11, + cpp_type=10, + label=1, + has_default_value=False, + default_value=None, + message_type=None, + enum_type=None, + containing_type=None, + is_extension=False, + extension_scope=None, + serialized_options=None, + file=DESCRIPTOR, + ), + ], + extensions=[], + nested_types=[], + enum_types=[], + serialized_options=None, + is_extendable=False, + syntax="proto3", + extension_ranges=[], + oneofs=[], + serialized_start=2848, + serialized_end=2988, +) + +_MODEL_EVALUATIONMETRICS = _descriptor.Descriptor( + name="EvaluationMetrics", + full_name="google.cloud.bigquery.v2.Model.EvaluationMetrics", + filename=None, + file=DESCRIPTOR, + containing_type=None, + fields=[ + _descriptor.FieldDescriptor( + name="regression_metrics", + full_name="google.cloud.bigquery.v2.Model.EvaluationMetrics.regression_metrics", + index=0, + number=1, + type=11, + cpp_type=10, + label=1, + has_default_value=False, + default_value=None, + message_type=None, + enum_type=None, + containing_type=None, + is_extension=False, + extension_scope=None, + serialized_options=None, + file=DESCRIPTOR, + ), + _descriptor.FieldDescriptor( + name="binary_classification_metrics", + full_name="google.cloud.bigquery.v2.Model.EvaluationMetrics.binary_classification_metrics", + index=1, + number=2, + type=11, + cpp_type=10, + label=1, + has_default_value=False, + default_value=None, + message_type=None, + enum_type=None, + containing_type=None, + is_extension=False, + extension_scope=None, + serialized_options=None, + file=DESCRIPTOR, + ), + _descriptor.FieldDescriptor( + name="multi_class_classification_metrics", + full_name="google.cloud.bigquery.v2.Model.EvaluationMetrics.multi_class_classification_metrics", + index=2, + number=3, + type=11, + cpp_type=10, + label=1, + has_default_value=False, + default_value=None, + message_type=None, + enum_type=None, + containing_type=None, + is_extension=False, + extension_scope=None, + serialized_options=None, + file=DESCRIPTOR, + ), + _descriptor.FieldDescriptor( + name="clustering_metrics", + full_name="google.cloud.bigquery.v2.Model.EvaluationMetrics.clustering_metrics", + index=3, + number=4, + type=11, + cpp_type=10, + label=1, + has_default_value=False, + default_value=None, + message_type=None, + enum_type=None, + containing_type=None, + is_extension=False, + extension_scope=None, + serialized_options=None, + file=DESCRIPTOR, + ), + ], + extensions=[], + nested_types=[], + enum_types=[], + serialized_options=None, + is_extendable=False, + syntax="proto3", + extension_ranges=[], + oneofs=[ + _descriptor.OneofDescriptor( + name="metrics", + full_name="google.cloud.bigquery.v2.Model.EvaluationMetrics.metrics", + index=0, + containing_type=None, + fields=[], + ) + ], + serialized_start=2991, + serialized_end=3396, +) + +_MODEL_TRAININGRUN_TRAININGOPTIONS_LABELCLASSWEIGHTSENTRY = _descriptor.Descriptor( + name="LabelClassWeightsEntry", + full_name="google.cloud.bigquery.v2.Model.TrainingRun.TrainingOptions.LabelClassWeightsEntry", + filename=None, + file=DESCRIPTOR, + containing_type=None, + fields=[ + _descriptor.FieldDescriptor( + name="key", + full_name="google.cloud.bigquery.v2.Model.TrainingRun.TrainingOptions.LabelClassWeightsEntry.key", + index=0, + number=1, + type=9, + cpp_type=9, + label=1, + has_default_value=False, + default_value=_b("").decode("utf-8"), + message_type=None, + enum_type=None, + containing_type=None, + is_extension=False, + extension_scope=None, + serialized_options=None, + file=DESCRIPTOR, + ), + _descriptor.FieldDescriptor( + name="value", + full_name="google.cloud.bigquery.v2.Model.TrainingRun.TrainingOptions.LabelClassWeightsEntry.value", + index=1, + number=2, + type=1, + cpp_type=5, + label=1, + has_default_value=False, + default_value=float(0), + message_type=None, + enum_type=None, + containing_type=None, + is_extension=False, + extension_scope=None, + serialized_options=None, + file=DESCRIPTOR, + ), + ], + extensions=[], + nested_types=[], + enum_types=[], + serialized_options=_b("8\001"), + is_extendable=False, + syntax="proto3", + extension_ranges=[], + oneofs=[], + serialized_start=4580, + serialized_end=4636, +) + +_MODEL_TRAININGRUN_TRAININGOPTIONS = _descriptor.Descriptor( + name="TrainingOptions", + full_name="google.cloud.bigquery.v2.Model.TrainingRun.TrainingOptions", + filename=None, + file=DESCRIPTOR, + containing_type=None, + fields=[ + _descriptor.FieldDescriptor( + name="max_iterations", + full_name="google.cloud.bigquery.v2.Model.TrainingRun.TrainingOptions.max_iterations", + index=0, + number=1, + type=3, + cpp_type=2, + label=1, + has_default_value=False, + default_value=0, + message_type=None, + enum_type=None, + containing_type=None, + is_extension=False, + extension_scope=None, + serialized_options=None, + file=DESCRIPTOR, + ), + _descriptor.FieldDescriptor( + name="loss_type", + full_name="google.cloud.bigquery.v2.Model.TrainingRun.TrainingOptions.loss_type", + index=1, + number=2, + type=14, + cpp_type=8, + label=1, + has_default_value=False, + default_value=0, + message_type=None, + enum_type=None, + containing_type=None, + is_extension=False, + extension_scope=None, + serialized_options=None, + file=DESCRIPTOR, + ), + _descriptor.FieldDescriptor( + name="learn_rate", + full_name="google.cloud.bigquery.v2.Model.TrainingRun.TrainingOptions.learn_rate", + index=2, + number=3, + type=1, + cpp_type=5, + label=1, + has_default_value=False, + default_value=float(0), + message_type=None, + enum_type=None, + containing_type=None, + is_extension=False, + extension_scope=None, + serialized_options=None, + file=DESCRIPTOR, + ), + _descriptor.FieldDescriptor( + name="l1_regularization", + full_name="google.cloud.bigquery.v2.Model.TrainingRun.TrainingOptions.l1_regularization", + index=3, + number=4, + type=11, + cpp_type=10, + label=1, + has_default_value=False, + default_value=None, + message_type=None, + enum_type=None, + containing_type=None, + is_extension=False, + extension_scope=None, + serialized_options=None, + file=DESCRIPTOR, + ), + _descriptor.FieldDescriptor( + name="l2_regularization", + full_name="google.cloud.bigquery.v2.Model.TrainingRun.TrainingOptions.l2_regularization", + index=4, + number=5, + type=11, + cpp_type=10, + label=1, + has_default_value=False, + default_value=None, + message_type=None, + enum_type=None, + containing_type=None, + is_extension=False, + extension_scope=None, + serialized_options=None, + file=DESCRIPTOR, + ), + _descriptor.FieldDescriptor( + name="min_relative_progress", + full_name="google.cloud.bigquery.v2.Model.TrainingRun.TrainingOptions.min_relative_progress", + index=5, + number=6, + type=11, + cpp_type=10, + label=1, + has_default_value=False, + default_value=None, + message_type=None, + enum_type=None, + containing_type=None, + is_extension=False, + extension_scope=None, + serialized_options=None, + file=DESCRIPTOR, + ), + _descriptor.FieldDescriptor( + name="warm_start", + full_name="google.cloud.bigquery.v2.Model.TrainingRun.TrainingOptions.warm_start", + index=6, + number=7, + type=11, + cpp_type=10, + label=1, + has_default_value=False, + default_value=None, + message_type=None, + enum_type=None, + containing_type=None, + is_extension=False, + extension_scope=None, + serialized_options=None, + file=DESCRIPTOR, + ), + _descriptor.FieldDescriptor( + name="early_stop", + full_name="google.cloud.bigquery.v2.Model.TrainingRun.TrainingOptions.early_stop", + index=7, + number=8, + type=11, + cpp_type=10, + label=1, + has_default_value=False, + default_value=None, + message_type=None, + enum_type=None, + containing_type=None, + is_extension=False, + extension_scope=None, + serialized_options=None, + file=DESCRIPTOR, + ), + _descriptor.FieldDescriptor( + name="input_label_columns", + full_name="google.cloud.bigquery.v2.Model.TrainingRun.TrainingOptions.input_label_columns", + index=8, + number=9, + type=9, + cpp_type=9, + label=3, + has_default_value=False, + default_value=[], + message_type=None, + enum_type=None, + containing_type=None, + is_extension=False, + extension_scope=None, + serialized_options=None, + file=DESCRIPTOR, + ), + _descriptor.FieldDescriptor( + name="data_split_method", + full_name="google.cloud.bigquery.v2.Model.TrainingRun.TrainingOptions.data_split_method", + index=9, + number=10, + type=14, + cpp_type=8, + label=1, + has_default_value=False, + default_value=0, + message_type=None, + enum_type=None, + containing_type=None, + is_extension=False, + extension_scope=None, + serialized_options=None, + file=DESCRIPTOR, + ), + _descriptor.FieldDescriptor( + name="data_split_eval_fraction", + full_name="google.cloud.bigquery.v2.Model.TrainingRun.TrainingOptions.data_split_eval_fraction", + index=10, + number=11, + type=1, + cpp_type=5, + label=1, + has_default_value=False, + default_value=float(0), + message_type=None, + enum_type=None, + containing_type=None, + is_extension=False, + extension_scope=None, + serialized_options=None, + file=DESCRIPTOR, + ), + _descriptor.FieldDescriptor( + name="data_split_column", + full_name="google.cloud.bigquery.v2.Model.TrainingRun.TrainingOptions.data_split_column", + index=11, + number=12, + type=9, + cpp_type=9, + label=1, + has_default_value=False, + default_value=_b("").decode("utf-8"), + message_type=None, + enum_type=None, + containing_type=None, + is_extension=False, + extension_scope=None, + serialized_options=None, + file=DESCRIPTOR, + ), + _descriptor.FieldDescriptor( + name="learn_rate_strategy", + full_name="google.cloud.bigquery.v2.Model.TrainingRun.TrainingOptions.learn_rate_strategy", + index=12, + number=13, + type=14, + cpp_type=8, + label=1, + has_default_value=False, + default_value=0, + message_type=None, + enum_type=None, + containing_type=None, + is_extension=False, + extension_scope=None, + serialized_options=None, + file=DESCRIPTOR, + ), + _descriptor.FieldDescriptor( + name="initial_learn_rate", + full_name="google.cloud.bigquery.v2.Model.TrainingRun.TrainingOptions.initial_learn_rate", + index=13, + number=16, + type=1, + cpp_type=5, + label=1, + has_default_value=False, + default_value=float(0), + message_type=None, + enum_type=None, + containing_type=None, + is_extension=False, + extension_scope=None, + serialized_options=None, + file=DESCRIPTOR, + ), + _descriptor.FieldDescriptor( + name="label_class_weights", + full_name="google.cloud.bigquery.v2.Model.TrainingRun.TrainingOptions.label_class_weights", + index=14, + number=17, + type=11, + cpp_type=10, + label=3, + has_default_value=False, + default_value=[], + message_type=None, + enum_type=None, + containing_type=None, + is_extension=False, + extension_scope=None, + serialized_options=None, + file=DESCRIPTOR, + ), + _descriptor.FieldDescriptor( + name="distance_type", + full_name="google.cloud.bigquery.v2.Model.TrainingRun.TrainingOptions.distance_type", + index=15, + number=20, + type=14, + cpp_type=8, + label=1, + has_default_value=False, + default_value=0, + message_type=None, + enum_type=None, + containing_type=None, + is_extension=False, + extension_scope=None, + serialized_options=None, + file=DESCRIPTOR, + ), + _descriptor.FieldDescriptor( + name="num_clusters", + full_name="google.cloud.bigquery.v2.Model.TrainingRun.TrainingOptions.num_clusters", + index=16, + number=21, + type=3, + cpp_type=2, + label=1, + has_default_value=False, + default_value=0, + message_type=None, + enum_type=None, + containing_type=None, + is_extension=False, + extension_scope=None, + serialized_options=None, + file=DESCRIPTOR, + ), + ], + extensions=[], + nested_types=[_MODEL_TRAININGRUN_TRAININGOPTIONS_LABELCLASSWEIGHTSENTRY], + enum_types=[], + serialized_options=None, + is_extendable=False, + syntax="proto3", + extension_ranges=[], + oneofs=[], + serialized_start=3707, + serialized_end=4636, +) + +_MODEL_TRAININGRUN_ITERATIONRESULT_CLUSTERINFO = _descriptor.Descriptor( + name="ClusterInfo", + full_name="google.cloud.bigquery.v2.Model.TrainingRun.IterationResult.ClusterInfo", + filename=None, + file=DESCRIPTOR, + containing_type=None, + fields=[ + _descriptor.FieldDescriptor( + name="centroid_id", + full_name="google.cloud.bigquery.v2.Model.TrainingRun.IterationResult.ClusterInfo.centroid_id", + index=0, + number=1, + type=3, + cpp_type=2, + label=1, + has_default_value=False, + default_value=0, + message_type=None, + enum_type=None, + containing_type=None, + is_extension=False, + extension_scope=None, + serialized_options=None, + file=DESCRIPTOR, + ), + _descriptor.FieldDescriptor( + name="cluster_radius", + full_name="google.cloud.bigquery.v2.Model.TrainingRun.IterationResult.ClusterInfo.cluster_radius", + index=1, + number=2, + type=11, + cpp_type=10, + label=1, + has_default_value=False, + default_value=None, + message_type=None, + enum_type=None, + containing_type=None, + is_extension=False, + extension_scope=None, + serialized_options=None, + file=DESCRIPTOR, + ), + _descriptor.FieldDescriptor( + name="cluster_size", + full_name="google.cloud.bigquery.v2.Model.TrainingRun.IterationResult.ClusterInfo.cluster_size", + index=2, + number=3, + type=11, + cpp_type=10, + label=1, + has_default_value=False, + default_value=None, + message_type=None, + enum_type=None, + containing_type=None, + is_extension=False, + extension_scope=None, + serialized_options=None, + file=DESCRIPTOR, + ), + ], + extensions=[], + nested_types=[], + enum_types=[], + serialized_options=None, + is_extendable=False, + syntax="proto3", + extension_ranges=[], + oneofs=[], + serialized_start=4971, + serialized_end=5110, +) + +_MODEL_TRAININGRUN_ITERATIONRESULT = _descriptor.Descriptor( + name="IterationResult", + full_name="google.cloud.bigquery.v2.Model.TrainingRun.IterationResult", + filename=None, + file=DESCRIPTOR, + containing_type=None, + fields=[ + _descriptor.FieldDescriptor( + name="index", + full_name="google.cloud.bigquery.v2.Model.TrainingRun.IterationResult.index", + index=0, + number=1, + type=11, + cpp_type=10, + label=1, + has_default_value=False, + default_value=None, + message_type=None, + enum_type=None, + containing_type=None, + is_extension=False, + extension_scope=None, + serialized_options=None, + file=DESCRIPTOR, + ), + _descriptor.FieldDescriptor( + name="duration_ms", + full_name="google.cloud.bigquery.v2.Model.TrainingRun.IterationResult.duration_ms", + index=1, + number=4, + type=11, + cpp_type=10, + label=1, + has_default_value=False, + default_value=None, + message_type=None, + enum_type=None, + containing_type=None, + is_extension=False, + extension_scope=None, + serialized_options=None, + file=DESCRIPTOR, + ), + _descriptor.FieldDescriptor( + name="training_loss", + full_name="google.cloud.bigquery.v2.Model.TrainingRun.IterationResult.training_loss", + index=2, + number=5, + type=11, + cpp_type=10, + label=1, + has_default_value=False, + default_value=None, + message_type=None, + enum_type=None, + containing_type=None, + is_extension=False, + extension_scope=None, + serialized_options=None, + file=DESCRIPTOR, + ), + _descriptor.FieldDescriptor( + name="eval_loss", + full_name="google.cloud.bigquery.v2.Model.TrainingRun.IterationResult.eval_loss", + index=3, + number=6, + type=11, + cpp_type=10, + label=1, + has_default_value=False, + default_value=None, + message_type=None, + enum_type=None, + containing_type=None, + is_extension=False, + extension_scope=None, + serialized_options=None, + file=DESCRIPTOR, + ), + _descriptor.FieldDescriptor( + name="learn_rate", + full_name="google.cloud.bigquery.v2.Model.TrainingRun.IterationResult.learn_rate", + index=4, + number=7, + type=1, + cpp_type=5, + label=1, + has_default_value=False, + default_value=float(0), + message_type=None, + enum_type=None, + containing_type=None, + is_extension=False, + extension_scope=None, + serialized_options=None, + file=DESCRIPTOR, + ), + _descriptor.FieldDescriptor( + name="cluster_infos", + full_name="google.cloud.bigquery.v2.Model.TrainingRun.IterationResult.cluster_infos", + index=5, + number=8, + type=11, + cpp_type=10, + label=3, + has_default_value=False, + default_value=[], + message_type=None, + enum_type=None, + containing_type=None, + is_extension=False, + extension_scope=None, + serialized_options=None, + file=DESCRIPTOR, + ), + ], + extensions=[], + nested_types=[_MODEL_TRAININGRUN_ITERATIONRESULT_CLUSTERINFO], + enum_types=[], + serialized_options=None, + is_extendable=False, + syntax="proto3", + extension_ranges=[], + oneofs=[], + serialized_start=4639, + serialized_end=5110, +) + +_MODEL_TRAININGRUN = _descriptor.Descriptor( + name="TrainingRun", + full_name="google.cloud.bigquery.v2.Model.TrainingRun", + filename=None, + file=DESCRIPTOR, + containing_type=None, + fields=[ + _descriptor.FieldDescriptor( + name="training_options", + full_name="google.cloud.bigquery.v2.Model.TrainingRun.training_options", + index=0, + number=1, + type=11, + cpp_type=10, + label=1, + has_default_value=False, + default_value=None, + message_type=None, + enum_type=None, + containing_type=None, + is_extension=False, + extension_scope=None, + serialized_options=None, + file=DESCRIPTOR, + ), + _descriptor.FieldDescriptor( + name="start_time", + full_name="google.cloud.bigquery.v2.Model.TrainingRun.start_time", + index=1, + number=8, + type=11, + cpp_type=10, + label=1, + has_default_value=False, + default_value=None, + message_type=None, + enum_type=None, + containing_type=None, + is_extension=False, + extension_scope=None, + serialized_options=None, + file=DESCRIPTOR, + ), + _descriptor.FieldDescriptor( + name="results", + full_name="google.cloud.bigquery.v2.Model.TrainingRun.results", + index=2, + number=6, + type=11, + cpp_type=10, + label=3, + has_default_value=False, + default_value=[], + message_type=None, + enum_type=None, + containing_type=None, + is_extension=False, + extension_scope=None, + serialized_options=None, + file=DESCRIPTOR, + ), + _descriptor.FieldDescriptor( + name="evaluation_metrics", + full_name="google.cloud.bigquery.v2.Model.TrainingRun.evaluation_metrics", + index=3, + number=7, + type=11, + cpp_type=10, + label=1, + has_default_value=False, + default_value=None, + message_type=None, + enum_type=None, + containing_type=None, + is_extension=False, + extension_scope=None, + serialized_options=None, + file=DESCRIPTOR, + ), + ], + extensions=[], + nested_types=[ + _MODEL_TRAININGRUN_TRAININGOPTIONS, + _MODEL_TRAININGRUN_ITERATIONRESULT, + ], + enum_types=[], + serialized_options=None, + is_extendable=False, + syntax="proto3", + extension_ranges=[], + oneofs=[], + serialized_start=3399, + serialized_end=5110, +) + +_MODEL_LABELSENTRY = _descriptor.Descriptor( + name="LabelsEntry", + full_name="google.cloud.bigquery.v2.Model.LabelsEntry", + filename=None, + file=DESCRIPTOR, + containing_type=None, + fields=[ + _descriptor.FieldDescriptor( + name="key", + full_name="google.cloud.bigquery.v2.Model.LabelsEntry.key", + index=0, + number=1, + type=9, + cpp_type=9, + label=1, + has_default_value=False, + default_value=_b("").decode("utf-8"), + message_type=None, + enum_type=None, + containing_type=None, + is_extension=False, + extension_scope=None, + serialized_options=None, + file=DESCRIPTOR, + ), + _descriptor.FieldDescriptor( + name="value", + full_name="google.cloud.bigquery.v2.Model.LabelsEntry.value", + index=1, + number=2, + type=9, + cpp_type=9, + label=1, + has_default_value=False, + default_value=_b("").decode("utf-8"), + message_type=None, + enum_type=None, + containing_type=None, + is_extension=False, + extension_scope=None, + serialized_options=None, + file=DESCRIPTOR, + ), + ], + extensions=[], + nested_types=[], + enum_types=[], + serialized_options=_b("8\001"), + is_extendable=False, + syntax="proto3", + extension_ranges=[], + oneofs=[], + serialized_start=5112, + serialized_end=5157, +) + +_MODEL = _descriptor.Descriptor( + name="Model", + full_name="google.cloud.bigquery.v2.Model", + filename=None, + file=DESCRIPTOR, + containing_type=None, + fields=[ + _descriptor.FieldDescriptor( + name="etag", + full_name="google.cloud.bigquery.v2.Model.etag", + index=0, + number=1, + type=9, + cpp_type=9, + label=1, + has_default_value=False, + default_value=_b("").decode("utf-8"), + message_type=None, + enum_type=None, + containing_type=None, + is_extension=False, + extension_scope=None, + serialized_options=None, + file=DESCRIPTOR, + ), + _descriptor.FieldDescriptor( + name="model_reference", + full_name="google.cloud.bigquery.v2.Model.model_reference", + index=1, + number=2, + type=11, + cpp_type=10, + label=1, + has_default_value=False, + default_value=None, + message_type=None, + enum_type=None, + containing_type=None, + is_extension=False, + extension_scope=None, + serialized_options=None, + file=DESCRIPTOR, + ), + _descriptor.FieldDescriptor( + name="creation_time", + full_name="google.cloud.bigquery.v2.Model.creation_time", + index=2, + number=5, + type=3, + cpp_type=2, + label=1, + has_default_value=False, + default_value=0, + message_type=None, + enum_type=None, + containing_type=None, + is_extension=False, + extension_scope=None, + serialized_options=None, + file=DESCRIPTOR, + ), + _descriptor.FieldDescriptor( + name="last_modified_time", + full_name="google.cloud.bigquery.v2.Model.last_modified_time", + index=3, + number=6, + type=3, + cpp_type=2, + label=1, + has_default_value=False, + default_value=0, + message_type=None, + enum_type=None, + containing_type=None, + is_extension=False, + extension_scope=None, + serialized_options=None, + file=DESCRIPTOR, + ), + _descriptor.FieldDescriptor( + name="description", + full_name="google.cloud.bigquery.v2.Model.description", + index=4, + number=12, + type=9, + cpp_type=9, + label=1, + has_default_value=False, + default_value=_b("").decode("utf-8"), + message_type=None, + enum_type=None, + containing_type=None, + is_extension=False, + extension_scope=None, + serialized_options=None, + file=DESCRIPTOR, + ), + _descriptor.FieldDescriptor( + name="friendly_name", + full_name="google.cloud.bigquery.v2.Model.friendly_name", + index=5, + number=14, + type=9, + cpp_type=9, + label=1, + has_default_value=False, + default_value=_b("").decode("utf-8"), + message_type=None, + enum_type=None, + containing_type=None, + is_extension=False, + extension_scope=None, + serialized_options=None, + file=DESCRIPTOR, + ), + _descriptor.FieldDescriptor( + name="labels", + full_name="google.cloud.bigquery.v2.Model.labels", + index=6, + number=15, + type=11, + cpp_type=10, + label=3, + has_default_value=False, + default_value=[], + message_type=None, + enum_type=None, + containing_type=None, + is_extension=False, + extension_scope=None, + serialized_options=None, + file=DESCRIPTOR, + ), + _descriptor.FieldDescriptor( + name="expiration_time", + full_name="google.cloud.bigquery.v2.Model.expiration_time", + index=7, + number=16, + type=3, + cpp_type=2, + label=1, + has_default_value=False, + default_value=0, + message_type=None, + enum_type=None, + containing_type=None, + is_extension=False, + extension_scope=None, + serialized_options=None, + file=DESCRIPTOR, + ), + _descriptor.FieldDescriptor( + name="location", + full_name="google.cloud.bigquery.v2.Model.location", + index=8, + number=13, + type=9, + cpp_type=9, + label=1, + has_default_value=False, + default_value=_b("").decode("utf-8"), + message_type=None, + enum_type=None, + containing_type=None, + is_extension=False, + extension_scope=None, + serialized_options=None, + file=DESCRIPTOR, + ), + _descriptor.FieldDescriptor( + name="model_type", + full_name="google.cloud.bigquery.v2.Model.model_type", + index=9, + number=7, + type=14, + cpp_type=8, + label=1, + has_default_value=False, + default_value=0, + message_type=None, + enum_type=None, + containing_type=None, + is_extension=False, + extension_scope=None, + serialized_options=None, + file=DESCRIPTOR, + ), + _descriptor.FieldDescriptor( + name="training_runs", + full_name="google.cloud.bigquery.v2.Model.training_runs", + index=10, + number=9, + type=11, + cpp_type=10, + label=3, + has_default_value=False, + default_value=[], + message_type=None, + enum_type=None, + containing_type=None, + is_extension=False, + extension_scope=None, + serialized_options=None, + file=DESCRIPTOR, + ), + _descriptor.FieldDescriptor( + name="feature_columns", + full_name="google.cloud.bigquery.v2.Model.feature_columns", + index=11, + number=10, + type=11, + cpp_type=10, + label=3, + has_default_value=False, + default_value=[], + message_type=None, + enum_type=None, + containing_type=None, + is_extension=False, + extension_scope=None, + serialized_options=None, + file=DESCRIPTOR, + ), + _descriptor.FieldDescriptor( + name="label_columns", + full_name="google.cloud.bigquery.v2.Model.label_columns", + index=12, + number=11, + type=11, + cpp_type=10, + label=3, + has_default_value=False, + default_value=[], + message_type=None, + enum_type=None, + containing_type=None, + is_extension=False, + extension_scope=None, + serialized_options=None, + file=DESCRIPTOR, + ), + ], + extensions=[], + nested_types=[ + _MODEL_REGRESSIONMETRICS, + _MODEL_AGGREGATECLASSIFICATIONMETRICS, + _MODEL_BINARYCLASSIFICATIONMETRICS, + _MODEL_MULTICLASSCLASSIFICATIONMETRICS, + _MODEL_CLUSTERINGMETRICS, + _MODEL_EVALUATIONMETRICS, + _MODEL_TRAININGRUN, + _MODEL_LABELSENTRY, + ], + enum_types=[ + _MODEL_MODELTYPE, + _MODEL_LOSSTYPE, + _MODEL_DISTANCETYPE, + _MODEL_DATASPLITMETHOD, + _MODEL_LEARNRATESTRATEGY, + ], + serialized_options=None, + is_extendable=False, + syntax="proto3", + extension_ranges=[], + oneofs=[], + serialized_start=302, + serialized_end=5626, +) + + +_GETMODELREQUEST = _descriptor.Descriptor( + name="GetModelRequest", + full_name="google.cloud.bigquery.v2.GetModelRequest", + filename=None, + file=DESCRIPTOR, + containing_type=None, + fields=[ + _descriptor.FieldDescriptor( + name="project_id", + full_name="google.cloud.bigquery.v2.GetModelRequest.project_id", + index=0, + number=1, + type=9, + cpp_type=9, + label=1, + has_default_value=False, + default_value=_b("").decode("utf-8"), + message_type=None, + enum_type=None, + containing_type=None, + is_extension=False, + extension_scope=None, + serialized_options=None, + file=DESCRIPTOR, + ), + _descriptor.FieldDescriptor( + name="dataset_id", + full_name="google.cloud.bigquery.v2.GetModelRequest.dataset_id", + index=1, + number=2, + type=9, + cpp_type=9, + label=1, + has_default_value=False, + default_value=_b("").decode("utf-8"), + message_type=None, + enum_type=None, + containing_type=None, + is_extension=False, + extension_scope=None, + serialized_options=None, + file=DESCRIPTOR, + ), + _descriptor.FieldDescriptor( + name="model_id", + full_name="google.cloud.bigquery.v2.GetModelRequest.model_id", + index=2, + number=3, + type=9, + cpp_type=9, + label=1, + has_default_value=False, + default_value=_b("").decode("utf-8"), + message_type=None, + enum_type=None, + containing_type=None, + is_extension=False, + extension_scope=None, + serialized_options=None, + file=DESCRIPTOR, + ), + ], + extensions=[], + nested_types=[], + enum_types=[], + serialized_options=None, + is_extendable=False, + syntax="proto3", + extension_ranges=[], + oneofs=[], + serialized_start=5628, + serialized_end=5703, +) + + +_PATCHMODELREQUEST = _descriptor.Descriptor( + name="PatchModelRequest", + full_name="google.cloud.bigquery.v2.PatchModelRequest", + filename=None, + file=DESCRIPTOR, + containing_type=None, + fields=[ + _descriptor.FieldDescriptor( + name="project_id", + full_name="google.cloud.bigquery.v2.PatchModelRequest.project_id", + index=0, + number=1, + type=9, + cpp_type=9, + label=1, + has_default_value=False, + default_value=_b("").decode("utf-8"), + message_type=None, + enum_type=None, + containing_type=None, + is_extension=False, + extension_scope=None, + serialized_options=None, + file=DESCRIPTOR, + ), + _descriptor.FieldDescriptor( + name="dataset_id", + full_name="google.cloud.bigquery.v2.PatchModelRequest.dataset_id", + index=1, + number=2, + type=9, + cpp_type=9, + label=1, + has_default_value=False, + default_value=_b("").decode("utf-8"), + message_type=None, + enum_type=None, + containing_type=None, + is_extension=False, + extension_scope=None, + serialized_options=None, + file=DESCRIPTOR, + ), + _descriptor.FieldDescriptor( + name="model_id", + full_name="google.cloud.bigquery.v2.PatchModelRequest.model_id", + index=2, + number=3, + type=9, + cpp_type=9, + label=1, + has_default_value=False, + default_value=_b("").decode("utf-8"), + message_type=None, + enum_type=None, + containing_type=None, + is_extension=False, + extension_scope=None, + serialized_options=None, + file=DESCRIPTOR, + ), + _descriptor.FieldDescriptor( + name="model", + full_name="google.cloud.bigquery.v2.PatchModelRequest.model", + index=3, + number=4, + type=11, + cpp_type=10, + label=1, + has_default_value=False, + default_value=None, + message_type=None, + enum_type=None, + containing_type=None, + is_extension=False, + extension_scope=None, + serialized_options=None, + file=DESCRIPTOR, + ), + ], + extensions=[], + nested_types=[], + enum_types=[], + serialized_options=None, + is_extendable=False, + syntax="proto3", + extension_ranges=[], + oneofs=[], + serialized_start=5705, + serialized_end=5830, +) + + +_DELETEMODELREQUEST = _descriptor.Descriptor( + name="DeleteModelRequest", + full_name="google.cloud.bigquery.v2.DeleteModelRequest", + filename=None, + file=DESCRIPTOR, + containing_type=None, + fields=[ + _descriptor.FieldDescriptor( + name="project_id", + full_name="google.cloud.bigquery.v2.DeleteModelRequest.project_id", + index=0, + number=1, + type=9, + cpp_type=9, + label=1, + has_default_value=False, + default_value=_b("").decode("utf-8"), + message_type=None, + enum_type=None, + containing_type=None, + is_extension=False, + extension_scope=None, + serialized_options=None, + file=DESCRIPTOR, + ), + _descriptor.FieldDescriptor( + name="dataset_id", + full_name="google.cloud.bigquery.v2.DeleteModelRequest.dataset_id", + index=1, + number=2, + type=9, + cpp_type=9, + label=1, + has_default_value=False, + default_value=_b("").decode("utf-8"), + message_type=None, + enum_type=None, + containing_type=None, + is_extension=False, + extension_scope=None, + serialized_options=None, + file=DESCRIPTOR, + ), + _descriptor.FieldDescriptor( + name="model_id", + full_name="google.cloud.bigquery.v2.DeleteModelRequest.model_id", + index=2, + number=3, + type=9, + cpp_type=9, + label=1, + has_default_value=False, + default_value=_b("").decode("utf-8"), + message_type=None, + enum_type=None, + containing_type=None, + is_extension=False, + extension_scope=None, + serialized_options=None, + file=DESCRIPTOR, + ), + ], + extensions=[], + nested_types=[], + enum_types=[], + serialized_options=None, + is_extendable=False, + syntax="proto3", + extension_ranges=[], + oneofs=[], + serialized_start=5832, + serialized_end=5910, +) + + +_LISTMODELSREQUEST = _descriptor.Descriptor( + name="ListModelsRequest", + full_name="google.cloud.bigquery.v2.ListModelsRequest", + filename=None, + file=DESCRIPTOR, + containing_type=None, + fields=[ + _descriptor.FieldDescriptor( + name="project_id", + full_name="google.cloud.bigquery.v2.ListModelsRequest.project_id", + index=0, + number=1, + type=9, + cpp_type=9, + label=1, + has_default_value=False, + default_value=_b("").decode("utf-8"), + message_type=None, + enum_type=None, + containing_type=None, + is_extension=False, + extension_scope=None, + serialized_options=None, + file=DESCRIPTOR, + ), + _descriptor.FieldDescriptor( + name="dataset_id", + full_name="google.cloud.bigquery.v2.ListModelsRequest.dataset_id", + index=1, + number=2, + type=9, + cpp_type=9, + label=1, + has_default_value=False, + default_value=_b("").decode("utf-8"), + message_type=None, + enum_type=None, + containing_type=None, + is_extension=False, + extension_scope=None, + serialized_options=None, + file=DESCRIPTOR, + ), + _descriptor.FieldDescriptor( + name="max_results", + full_name="google.cloud.bigquery.v2.ListModelsRequest.max_results", + index=2, + number=3, + type=11, + cpp_type=10, + label=1, + has_default_value=False, + default_value=None, + message_type=None, + enum_type=None, + containing_type=None, + is_extension=False, + extension_scope=None, + serialized_options=None, + file=DESCRIPTOR, + ), + _descriptor.FieldDescriptor( + name="page_token", + full_name="google.cloud.bigquery.v2.ListModelsRequest.page_token", + index=3, + number=4, + type=9, + cpp_type=9, + label=1, + has_default_value=False, + default_value=_b("").decode("utf-8"), + message_type=None, + enum_type=None, + containing_type=None, + is_extension=False, + extension_scope=None, + serialized_options=None, + file=DESCRIPTOR, + ), + ], + extensions=[], + nested_types=[], + enum_types=[], + serialized_options=None, + is_extendable=False, + syntax="proto3", + extension_ranges=[], + oneofs=[], + serialized_start=5913, + serialized_end=6043, +) + + +_LISTMODELSRESPONSE = _descriptor.Descriptor( + name="ListModelsResponse", + full_name="google.cloud.bigquery.v2.ListModelsResponse", + filename=None, + file=DESCRIPTOR, + containing_type=None, + fields=[ + _descriptor.FieldDescriptor( + name="models", + full_name="google.cloud.bigquery.v2.ListModelsResponse.models", + index=0, + number=1, + type=11, + cpp_type=10, + label=3, + has_default_value=False, + default_value=[], + message_type=None, + enum_type=None, + containing_type=None, + is_extension=False, + extension_scope=None, + serialized_options=None, + file=DESCRIPTOR, + ), + _descriptor.FieldDescriptor( + name="next_page_token", + full_name="google.cloud.bigquery.v2.ListModelsResponse.next_page_token", + index=1, + number=2, + type=9, + cpp_type=9, + label=1, + has_default_value=False, + default_value=_b("").decode("utf-8"), + message_type=None, + enum_type=None, + containing_type=None, + is_extension=False, + extension_scope=None, + serialized_options=None, + file=DESCRIPTOR, + ), + ], + extensions=[], + nested_types=[], + enum_types=[], + serialized_options=None, + is_extendable=False, + syntax="proto3", + extension_ranges=[], + oneofs=[], + serialized_start=6045, + serialized_end=6139, +) + +_MODEL_REGRESSIONMETRICS.fields_by_name[ + "mean_absolute_error" +].message_type = google_dot_protobuf_dot_wrappers__pb2._DOUBLEVALUE +_MODEL_REGRESSIONMETRICS.fields_by_name[ + "mean_squared_error" +].message_type = google_dot_protobuf_dot_wrappers__pb2._DOUBLEVALUE +_MODEL_REGRESSIONMETRICS.fields_by_name[ + "mean_squared_log_error" +].message_type = google_dot_protobuf_dot_wrappers__pb2._DOUBLEVALUE +_MODEL_REGRESSIONMETRICS.fields_by_name[ + "median_absolute_error" +].message_type = google_dot_protobuf_dot_wrappers__pb2._DOUBLEVALUE +_MODEL_REGRESSIONMETRICS.fields_by_name[ + "r_squared" +].message_type = google_dot_protobuf_dot_wrappers__pb2._DOUBLEVALUE +_MODEL_REGRESSIONMETRICS.containing_type = _MODEL +_MODEL_AGGREGATECLASSIFICATIONMETRICS.fields_by_name[ + "precision" +].message_type = google_dot_protobuf_dot_wrappers__pb2._DOUBLEVALUE +_MODEL_AGGREGATECLASSIFICATIONMETRICS.fields_by_name[ + "recall" +].message_type = google_dot_protobuf_dot_wrappers__pb2._DOUBLEVALUE +_MODEL_AGGREGATECLASSIFICATIONMETRICS.fields_by_name[ + "accuracy" +].message_type = google_dot_protobuf_dot_wrappers__pb2._DOUBLEVALUE +_MODEL_AGGREGATECLASSIFICATIONMETRICS.fields_by_name[ + "threshold" +].message_type = google_dot_protobuf_dot_wrappers__pb2._DOUBLEVALUE +_MODEL_AGGREGATECLASSIFICATIONMETRICS.fields_by_name[ + "f1_score" +].message_type = google_dot_protobuf_dot_wrappers__pb2._DOUBLEVALUE +_MODEL_AGGREGATECLASSIFICATIONMETRICS.fields_by_name[ + "log_loss" +].message_type = google_dot_protobuf_dot_wrappers__pb2._DOUBLEVALUE +_MODEL_AGGREGATECLASSIFICATIONMETRICS.fields_by_name[ + "roc_auc" +].message_type = google_dot_protobuf_dot_wrappers__pb2._DOUBLEVALUE +_MODEL_AGGREGATECLASSIFICATIONMETRICS.containing_type = _MODEL +_MODEL_BINARYCLASSIFICATIONMETRICS_BINARYCONFUSIONMATRIX.fields_by_name[ + "positive_class_threshold" +].message_type = google_dot_protobuf_dot_wrappers__pb2._DOUBLEVALUE +_MODEL_BINARYCLASSIFICATIONMETRICS_BINARYCONFUSIONMATRIX.fields_by_name[ + "true_positives" +].message_type = google_dot_protobuf_dot_wrappers__pb2._INT64VALUE +_MODEL_BINARYCLASSIFICATIONMETRICS_BINARYCONFUSIONMATRIX.fields_by_name[ + "false_positives" +].message_type = google_dot_protobuf_dot_wrappers__pb2._INT64VALUE +_MODEL_BINARYCLASSIFICATIONMETRICS_BINARYCONFUSIONMATRIX.fields_by_name[ + "true_negatives" +].message_type = google_dot_protobuf_dot_wrappers__pb2._INT64VALUE +_MODEL_BINARYCLASSIFICATIONMETRICS_BINARYCONFUSIONMATRIX.fields_by_name[ + "false_negatives" +].message_type = google_dot_protobuf_dot_wrappers__pb2._INT64VALUE +_MODEL_BINARYCLASSIFICATIONMETRICS_BINARYCONFUSIONMATRIX.fields_by_name[ + "precision" +].message_type = google_dot_protobuf_dot_wrappers__pb2._DOUBLEVALUE +_MODEL_BINARYCLASSIFICATIONMETRICS_BINARYCONFUSIONMATRIX.fields_by_name[ + "recall" +].message_type = google_dot_protobuf_dot_wrappers__pb2._DOUBLEVALUE +_MODEL_BINARYCLASSIFICATIONMETRICS_BINARYCONFUSIONMATRIX.containing_type = ( + _MODEL_BINARYCLASSIFICATIONMETRICS +) +_MODEL_BINARYCLASSIFICATIONMETRICS.fields_by_name[ + "aggregate_classification_metrics" +].message_type = _MODEL_AGGREGATECLASSIFICATIONMETRICS +_MODEL_BINARYCLASSIFICATIONMETRICS.fields_by_name[ + "binary_confusion_matrix_list" +].message_type = _MODEL_BINARYCLASSIFICATIONMETRICS_BINARYCONFUSIONMATRIX +_MODEL_BINARYCLASSIFICATIONMETRICS.containing_type = _MODEL +_MODEL_MULTICLASSCLASSIFICATIONMETRICS_CONFUSIONMATRIX_ENTRY.fields_by_name[ + "item_count" +].message_type = google_dot_protobuf_dot_wrappers__pb2._INT64VALUE +_MODEL_MULTICLASSCLASSIFICATIONMETRICS_CONFUSIONMATRIX_ENTRY.containing_type = ( + _MODEL_MULTICLASSCLASSIFICATIONMETRICS_CONFUSIONMATRIX +) +_MODEL_MULTICLASSCLASSIFICATIONMETRICS_CONFUSIONMATRIX_ROW.fields_by_name[ + "entries" +].message_type = _MODEL_MULTICLASSCLASSIFICATIONMETRICS_CONFUSIONMATRIX_ENTRY +_MODEL_MULTICLASSCLASSIFICATIONMETRICS_CONFUSIONMATRIX_ROW.containing_type = ( + _MODEL_MULTICLASSCLASSIFICATIONMETRICS_CONFUSIONMATRIX +) +_MODEL_MULTICLASSCLASSIFICATIONMETRICS_CONFUSIONMATRIX.fields_by_name[ + "confidence_threshold" +].message_type = google_dot_protobuf_dot_wrappers__pb2._DOUBLEVALUE +_MODEL_MULTICLASSCLASSIFICATIONMETRICS_CONFUSIONMATRIX.fields_by_name[ + "rows" +].message_type = _MODEL_MULTICLASSCLASSIFICATIONMETRICS_CONFUSIONMATRIX_ROW +_MODEL_MULTICLASSCLASSIFICATIONMETRICS_CONFUSIONMATRIX.containing_type = ( + _MODEL_MULTICLASSCLASSIFICATIONMETRICS +) +_MODEL_MULTICLASSCLASSIFICATIONMETRICS.fields_by_name[ + "aggregate_classification_metrics" +].message_type = _MODEL_AGGREGATECLASSIFICATIONMETRICS +_MODEL_MULTICLASSCLASSIFICATIONMETRICS.fields_by_name[ + "confusion_matrix_list" +].message_type = _MODEL_MULTICLASSCLASSIFICATIONMETRICS_CONFUSIONMATRIX +_MODEL_MULTICLASSCLASSIFICATIONMETRICS.containing_type = _MODEL +_MODEL_CLUSTERINGMETRICS.fields_by_name[ + "davies_bouldin_index" +].message_type = google_dot_protobuf_dot_wrappers__pb2._DOUBLEVALUE +_MODEL_CLUSTERINGMETRICS.fields_by_name[ + "mean_squared_distance" +].message_type = google_dot_protobuf_dot_wrappers__pb2._DOUBLEVALUE +_MODEL_CLUSTERINGMETRICS.containing_type = _MODEL +_MODEL_EVALUATIONMETRICS.fields_by_name[ + "regression_metrics" +].message_type = _MODEL_REGRESSIONMETRICS +_MODEL_EVALUATIONMETRICS.fields_by_name[ + "binary_classification_metrics" +].message_type = _MODEL_BINARYCLASSIFICATIONMETRICS +_MODEL_EVALUATIONMETRICS.fields_by_name[ + "multi_class_classification_metrics" +].message_type = _MODEL_MULTICLASSCLASSIFICATIONMETRICS +_MODEL_EVALUATIONMETRICS.fields_by_name[ + "clustering_metrics" +].message_type = _MODEL_CLUSTERINGMETRICS +_MODEL_EVALUATIONMETRICS.containing_type = _MODEL +_MODEL_EVALUATIONMETRICS.oneofs_by_name["metrics"].fields.append( + _MODEL_EVALUATIONMETRICS.fields_by_name["regression_metrics"] +) +_MODEL_EVALUATIONMETRICS.fields_by_name[ + "regression_metrics" +].containing_oneof = _MODEL_EVALUATIONMETRICS.oneofs_by_name["metrics"] +_MODEL_EVALUATIONMETRICS.oneofs_by_name["metrics"].fields.append( + _MODEL_EVALUATIONMETRICS.fields_by_name["binary_classification_metrics"] +) +_MODEL_EVALUATIONMETRICS.fields_by_name[ + "binary_classification_metrics" +].containing_oneof = _MODEL_EVALUATIONMETRICS.oneofs_by_name["metrics"] +_MODEL_EVALUATIONMETRICS.oneofs_by_name["metrics"].fields.append( + _MODEL_EVALUATIONMETRICS.fields_by_name["multi_class_classification_metrics"] +) +_MODEL_EVALUATIONMETRICS.fields_by_name[ + "multi_class_classification_metrics" +].containing_oneof = _MODEL_EVALUATIONMETRICS.oneofs_by_name["metrics"] +_MODEL_EVALUATIONMETRICS.oneofs_by_name["metrics"].fields.append( + _MODEL_EVALUATIONMETRICS.fields_by_name["clustering_metrics"] +) +_MODEL_EVALUATIONMETRICS.fields_by_name[ + "clustering_metrics" +].containing_oneof = _MODEL_EVALUATIONMETRICS.oneofs_by_name["metrics"] +_MODEL_TRAININGRUN_TRAININGOPTIONS_LABELCLASSWEIGHTSENTRY.containing_type = ( + _MODEL_TRAININGRUN_TRAININGOPTIONS +) +_MODEL_TRAININGRUN_TRAININGOPTIONS.fields_by_name[ + "loss_type" +].enum_type = _MODEL_LOSSTYPE +_MODEL_TRAININGRUN_TRAININGOPTIONS.fields_by_name[ + "l1_regularization" +].message_type = google_dot_protobuf_dot_wrappers__pb2._DOUBLEVALUE +_MODEL_TRAININGRUN_TRAININGOPTIONS.fields_by_name[ + "l2_regularization" +].message_type = google_dot_protobuf_dot_wrappers__pb2._DOUBLEVALUE +_MODEL_TRAININGRUN_TRAININGOPTIONS.fields_by_name[ + "min_relative_progress" +].message_type = google_dot_protobuf_dot_wrappers__pb2._DOUBLEVALUE +_MODEL_TRAININGRUN_TRAININGOPTIONS.fields_by_name[ + "warm_start" +].message_type = google_dot_protobuf_dot_wrappers__pb2._BOOLVALUE +_MODEL_TRAININGRUN_TRAININGOPTIONS.fields_by_name[ + "early_stop" +].message_type = google_dot_protobuf_dot_wrappers__pb2._BOOLVALUE +_MODEL_TRAININGRUN_TRAININGOPTIONS.fields_by_name[ + "data_split_method" +].enum_type = _MODEL_DATASPLITMETHOD +_MODEL_TRAININGRUN_TRAININGOPTIONS.fields_by_name[ + "learn_rate_strategy" +].enum_type = _MODEL_LEARNRATESTRATEGY +_MODEL_TRAININGRUN_TRAININGOPTIONS.fields_by_name[ + "label_class_weights" +].message_type = _MODEL_TRAININGRUN_TRAININGOPTIONS_LABELCLASSWEIGHTSENTRY +_MODEL_TRAININGRUN_TRAININGOPTIONS.fields_by_name[ + "distance_type" +].enum_type = _MODEL_DISTANCETYPE +_MODEL_TRAININGRUN_TRAININGOPTIONS.containing_type = _MODEL_TRAININGRUN +_MODEL_TRAININGRUN_ITERATIONRESULT_CLUSTERINFO.fields_by_name[ + "cluster_radius" +].message_type = google_dot_protobuf_dot_wrappers__pb2._DOUBLEVALUE +_MODEL_TRAININGRUN_ITERATIONRESULT_CLUSTERINFO.fields_by_name[ + "cluster_size" +].message_type = google_dot_protobuf_dot_wrappers__pb2._INT64VALUE +_MODEL_TRAININGRUN_ITERATIONRESULT_CLUSTERINFO.containing_type = ( + _MODEL_TRAININGRUN_ITERATIONRESULT +) +_MODEL_TRAININGRUN_ITERATIONRESULT.fields_by_name[ + "index" +].message_type = google_dot_protobuf_dot_wrappers__pb2._INT32VALUE +_MODEL_TRAININGRUN_ITERATIONRESULT.fields_by_name[ + "duration_ms" +].message_type = google_dot_protobuf_dot_wrappers__pb2._INT64VALUE +_MODEL_TRAININGRUN_ITERATIONRESULT.fields_by_name[ + "training_loss" +].message_type = google_dot_protobuf_dot_wrappers__pb2._DOUBLEVALUE +_MODEL_TRAININGRUN_ITERATIONRESULT.fields_by_name[ + "eval_loss" +].message_type = google_dot_protobuf_dot_wrappers__pb2._DOUBLEVALUE +_MODEL_TRAININGRUN_ITERATIONRESULT.fields_by_name[ + "cluster_infos" +].message_type = _MODEL_TRAININGRUN_ITERATIONRESULT_CLUSTERINFO +_MODEL_TRAININGRUN_ITERATIONRESULT.containing_type = _MODEL_TRAININGRUN +_MODEL_TRAININGRUN.fields_by_name[ + "training_options" +].message_type = _MODEL_TRAININGRUN_TRAININGOPTIONS +_MODEL_TRAININGRUN.fields_by_name[ + "start_time" +].message_type = google_dot_protobuf_dot_timestamp__pb2._TIMESTAMP +_MODEL_TRAININGRUN.fields_by_name[ + "results" +].message_type = _MODEL_TRAININGRUN_ITERATIONRESULT +_MODEL_TRAININGRUN.fields_by_name[ + "evaluation_metrics" +].message_type = _MODEL_EVALUATIONMETRICS +_MODEL_TRAININGRUN.containing_type = _MODEL +_MODEL_LABELSENTRY.containing_type = _MODEL +_MODEL.fields_by_name[ + "model_reference" +].message_type = ( + google_dot_cloud_dot_bigquery__v2_dot_proto_dot_model__reference__pb2._MODELREFERENCE +) +_MODEL.fields_by_name["labels"].message_type = _MODEL_LABELSENTRY +_MODEL.fields_by_name["model_type"].enum_type = _MODEL_MODELTYPE +_MODEL.fields_by_name["training_runs"].message_type = _MODEL_TRAININGRUN +_MODEL.fields_by_name[ + "feature_columns" +].message_type = ( + google_dot_cloud_dot_bigquery__v2_dot_proto_dot_standard__sql__pb2._STANDARDSQLFIELD +) +_MODEL.fields_by_name[ + "label_columns" +].message_type = ( + google_dot_cloud_dot_bigquery__v2_dot_proto_dot_standard__sql__pb2._STANDARDSQLFIELD +) +_MODEL_MODELTYPE.containing_type = _MODEL +_MODEL_LOSSTYPE.containing_type = _MODEL +_MODEL_DISTANCETYPE.containing_type = _MODEL +_MODEL_DATASPLITMETHOD.containing_type = _MODEL +_MODEL_LEARNRATESTRATEGY.containing_type = _MODEL +_PATCHMODELREQUEST.fields_by_name["model"].message_type = _MODEL +_LISTMODELSREQUEST.fields_by_name[ + "max_results" +].message_type = google_dot_protobuf_dot_wrappers__pb2._UINT32VALUE +_LISTMODELSRESPONSE.fields_by_name["models"].message_type = _MODEL +DESCRIPTOR.message_types_by_name["Model"] = _MODEL +DESCRIPTOR.message_types_by_name["GetModelRequest"] = _GETMODELREQUEST +DESCRIPTOR.message_types_by_name["PatchModelRequest"] = _PATCHMODELREQUEST +DESCRIPTOR.message_types_by_name["DeleteModelRequest"] = _DELETEMODELREQUEST +DESCRIPTOR.message_types_by_name["ListModelsRequest"] = _LISTMODELSREQUEST +DESCRIPTOR.message_types_by_name["ListModelsResponse"] = _LISTMODELSRESPONSE +_sym_db.RegisterFileDescriptor(DESCRIPTOR) + +Model = _reflection.GeneratedProtocolMessageType( + "Model", + (_message.Message,), + dict( + RegressionMetrics=_reflection.GeneratedProtocolMessageType( + "RegressionMetrics", + (_message.Message,), + dict( + DESCRIPTOR=_MODEL_REGRESSIONMETRICS, + __module__="google.cloud.bigquery_v2.proto.model_pb2", + __doc__="""Evaluation metrics for regression models. + + + Attributes: + mean_absolute_error: + Mean absolute error. + mean_squared_error: + Mean squared error. + mean_squared_log_error: + Mean squared log error. + median_absolute_error: + Median absolute error. + r_squared: + R^2 score. + """, + # @@protoc_insertion_point(class_scope:google.cloud.bigquery.v2.Model.RegressionMetrics) + ), + ), + AggregateClassificationMetrics=_reflection.GeneratedProtocolMessageType( + "AggregateClassificationMetrics", + (_message.Message,), + dict( + DESCRIPTOR=_MODEL_AGGREGATECLASSIFICATIONMETRICS, + __module__="google.cloud.bigquery_v2.proto.model_pb2", + __doc__="""Aggregate metrics for classification models. For multi-class models, the + metrics are either macro-averaged: metrics are calculated for each label + and then an unweighted average is taken of those values or + micro-averaged: the metric is calculated globally by counting the total + number of correctly predicted rows. + + + Attributes: + precision: + Precision is the fraction of actual positive predictions that + had positive actual labels. For multiclass this is a macro- + averaged metric treating each class as a binary classifier. + recall: + Recall is the fraction of actual positive labels that were + given a positive prediction. For multiclass this is a macro- + averaged metric. + accuracy: + Accuracy is the fraction of predictions given the correct + label. For multiclass this is a micro-averaged metric. + threshold: + Threshold at which the metrics are computed. For binary + classification models this is the positive class threshold. + For multi-class classfication models this is the confidence + threshold. + f1_score: + The F1 score is an average of recall and precision. For + multiclass this is a macro-averaged metric. + log_loss: + Logarithmic Loss. For multiclass this is a macro-averaged + metric. + roc_auc: + Area Under a ROC Curve. For multiclass this is a macro- + averaged metric. + """, + # @@protoc_insertion_point(class_scope:google.cloud.bigquery.v2.Model.AggregateClassificationMetrics) + ), + ), + BinaryClassificationMetrics=_reflection.GeneratedProtocolMessageType( + "BinaryClassificationMetrics", + (_message.Message,), + dict( + BinaryConfusionMatrix=_reflection.GeneratedProtocolMessageType( + "BinaryConfusionMatrix", + (_message.Message,), + dict( + DESCRIPTOR=_MODEL_BINARYCLASSIFICATIONMETRICS_BINARYCONFUSIONMATRIX, + __module__="google.cloud.bigquery_v2.proto.model_pb2", + __doc__="""Confusion matrix for binary classification models. + + + Attributes: + positive_class_threshold: + Threshold value used when computing each of the following + metric. + true_positives: + Number of true samples predicted as true. + false_positives: + Number of false samples predicted as true. + true_negatives: + Number of true samples predicted as false. + false_negatives: + Number of false samples predicted as false. + precision: + Aggregate precision. + recall: + Aggregate recall. + """, + # @@protoc_insertion_point(class_scope:google.cloud.bigquery.v2.Model.BinaryClassificationMetrics.BinaryConfusionMatrix) + ), + ), + DESCRIPTOR=_MODEL_BINARYCLASSIFICATIONMETRICS, + __module__="google.cloud.bigquery_v2.proto.model_pb2", + __doc__="""Evaluation metrics for binary classification models. + + + Attributes: + aggregate_classification_metrics: + Aggregate classification metrics. + binary_confusion_matrix_list: + Binary confusion matrix at multiple thresholds. + """, + # @@protoc_insertion_point(class_scope:google.cloud.bigquery.v2.Model.BinaryClassificationMetrics) + ), + ), + MultiClassClassificationMetrics=_reflection.GeneratedProtocolMessageType( + "MultiClassClassificationMetrics", + (_message.Message,), + dict( + ConfusionMatrix=_reflection.GeneratedProtocolMessageType( + "ConfusionMatrix", + (_message.Message,), + dict( + Entry=_reflection.GeneratedProtocolMessageType( + "Entry", + (_message.Message,), + dict( + DESCRIPTOR=_MODEL_MULTICLASSCLASSIFICATIONMETRICS_CONFUSIONMATRIX_ENTRY, + __module__="google.cloud.bigquery_v2.proto.model_pb2", + __doc__="""A single entry in the confusion matrix. + + + Attributes: + predicted_label: + The predicted label. For confidence\_threshold > 0, we will + also add an entry indicating the number of items under the + confidence threshold. + item_count: + Number of items being predicted as this label. + """, + # @@protoc_insertion_point(class_scope:google.cloud.bigquery.v2.Model.MultiClassClassificationMetrics.ConfusionMatrix.Entry) + ), + ), + Row=_reflection.GeneratedProtocolMessageType( + "Row", + (_message.Message,), + dict( + DESCRIPTOR=_MODEL_MULTICLASSCLASSIFICATIONMETRICS_CONFUSIONMATRIX_ROW, + __module__="google.cloud.bigquery_v2.proto.model_pb2", + __doc__="""A single row in the confusion matrix. + + + Attributes: + actual_label: + The original label of this row. + entries: + Info describing predicted label distribution. + """, + # @@protoc_insertion_point(class_scope:google.cloud.bigquery.v2.Model.MultiClassClassificationMetrics.ConfusionMatrix.Row) + ), + ), + DESCRIPTOR=_MODEL_MULTICLASSCLASSIFICATIONMETRICS_CONFUSIONMATRIX, + __module__="google.cloud.bigquery_v2.proto.model_pb2", + __doc__="""Confusion matrix for multi-class classification models. + + + Attributes: + confidence_threshold: + Confidence threshold used when computing the entries of the + confusion matrix. + rows: + One row per actual label. + """, + # @@protoc_insertion_point(class_scope:google.cloud.bigquery.v2.Model.MultiClassClassificationMetrics.ConfusionMatrix) + ), + ), + DESCRIPTOR=_MODEL_MULTICLASSCLASSIFICATIONMETRICS, + __module__="google.cloud.bigquery_v2.proto.model_pb2", + __doc__="""Evaluation metrics for multi-class classification models. + + + Attributes: + aggregate_classification_metrics: + Aggregate classification metrics. + confusion_matrix_list: + Confusion matrix at different thresholds. + """, + # @@protoc_insertion_point(class_scope:google.cloud.bigquery.v2.Model.MultiClassClassificationMetrics) + ), + ), + ClusteringMetrics=_reflection.GeneratedProtocolMessageType( + "ClusteringMetrics", + (_message.Message,), + dict( + DESCRIPTOR=_MODEL_CLUSTERINGMETRICS, + __module__="google.cloud.bigquery_v2.proto.model_pb2", + __doc__="""Evaluation metrics for clustering models. + + + Attributes: + davies_bouldin_index: + Davies-Bouldin index. + mean_squared_distance: + Mean of squared distances between each sample to its cluster + centroid. + """, + # @@protoc_insertion_point(class_scope:google.cloud.bigquery.v2.Model.ClusteringMetrics) + ), + ), + EvaluationMetrics=_reflection.GeneratedProtocolMessageType( + "EvaluationMetrics", + (_message.Message,), + dict( + DESCRIPTOR=_MODEL_EVALUATIONMETRICS, + __module__="google.cloud.bigquery_v2.proto.model_pb2", + __doc__="""Evaluation metrics of a model. These are either computed on all training + data or just the eval data based on whether eval data was used during + training. + + + Attributes: + regression_metrics: + Populated for regression models. + binary_classification_metrics: + Populated for binary classification models. + multi_class_classification_metrics: + Populated for multi-class classification models. + clustering_metrics: + [Beta] Populated for clustering models. + """, + # @@protoc_insertion_point(class_scope:google.cloud.bigquery.v2.Model.EvaluationMetrics) + ), + ), + TrainingRun=_reflection.GeneratedProtocolMessageType( + "TrainingRun", + (_message.Message,), + dict( + TrainingOptions=_reflection.GeneratedProtocolMessageType( + "TrainingOptions", + (_message.Message,), + dict( + LabelClassWeightsEntry=_reflection.GeneratedProtocolMessageType( + "LabelClassWeightsEntry", + (_message.Message,), + dict( + DESCRIPTOR=_MODEL_TRAININGRUN_TRAININGOPTIONS_LABELCLASSWEIGHTSENTRY, + __module__="google.cloud.bigquery_v2.proto.model_pb2" + # @@protoc_insertion_point(class_scope:google.cloud.bigquery.v2.Model.TrainingRun.TrainingOptions.LabelClassWeightsEntry) + ), + ), + DESCRIPTOR=_MODEL_TRAININGRUN_TRAININGOPTIONS, + __module__="google.cloud.bigquery_v2.proto.model_pb2", + __doc__="""Protocol buffer. + + Attributes: + max_iterations: + The maximum number of iterations in training. + loss_type: + Type of loss function used during training run. + learn_rate: + Learning rate in training. + l1_regularization: + L1 regularization coefficient. + l2_regularization: + L2 regularization coefficient. + min_relative_progress: + When early\_stop is true, stops training when accuracy + improvement is less than 'min\_relative\_progress'. + warm_start: + Whether to train a model from the last checkpoint. + early_stop: + Whether to stop early when the loss doesn't improve + significantly any more (compared to min\_relative\_progress). + input_label_columns: + Name of input label columns in training data. + data_split_method: + The data split type for training and evaluation, e.g. RANDOM. + data_split_eval_fraction: + The fraction of evaluation data over the whole input data. The + rest of data will be used as training data. The format should + be double. Accurate to two decimal places. Default value is + 0.2. + data_split_column: + The column to split data with. This column won't be used as a + feature. 1. When data\_split\_method is CUSTOM, the + corresponding column should be boolean. The rows with true + value tag are eval data, and the false are training data. 2. + When data\_split\_method is SEQ, the first + DATA\_SPLIT\_EVAL\_FRACTION rows (from smallest to largest) in + the corresponding column are used as training data, and the + rest are eval data. It respects the order in Orderable data + types: + https://cloud.google.com/bigquery/docs/reference/standard- + sql/data-types#data-type-properties + learn_rate_strategy: + The strategy to determine learning rate. + initial_learn_rate: + Specifies the initial learning rate for line search to start + at. + label_class_weights: + Weights associated with each label class, for rebalancing the + training data. + distance_type: + [Beta] Distance type for clustering models. + num_clusters: + [Beta] Number of clusters for clustering models. + """, + # @@protoc_insertion_point(class_scope:google.cloud.bigquery.v2.Model.TrainingRun.TrainingOptions) + ), + ), + IterationResult=_reflection.GeneratedProtocolMessageType( + "IterationResult", + (_message.Message,), + dict( + ClusterInfo=_reflection.GeneratedProtocolMessageType( + "ClusterInfo", + (_message.Message,), + dict( + DESCRIPTOR=_MODEL_TRAININGRUN_ITERATIONRESULT_CLUSTERINFO, + __module__="google.cloud.bigquery_v2.proto.model_pb2", + __doc__="""Information about a single cluster for clustering model. + + + Attributes: + centroid_id: + Centroid id. + cluster_radius: + Cluster radius, the average distance from centroid to each + point assigned to the cluster. + cluster_size: + Cluster size, the total number of points assigned to the + cluster. + """, + # @@protoc_insertion_point(class_scope:google.cloud.bigquery.v2.Model.TrainingRun.IterationResult.ClusterInfo) + ), + ), + DESCRIPTOR=_MODEL_TRAININGRUN_ITERATIONRESULT, + __module__="google.cloud.bigquery_v2.proto.model_pb2", + __doc__="""Information about a single iteration of the training run. + + + Attributes: + index: + Index of the iteration, 0 based. + duration_ms: + Time taken to run the iteration in milliseconds. + training_loss: + Loss computed on the training data at the end of iteration. + eval_loss: + Loss computed on the eval data at the end of iteration. + learn_rate: + Learn rate used for this iteration. + cluster_infos: + [Beta] Information about top clusters for clustering models. + """, + # @@protoc_insertion_point(class_scope:google.cloud.bigquery.v2.Model.TrainingRun.IterationResult) + ), + ), + DESCRIPTOR=_MODEL_TRAININGRUN, + __module__="google.cloud.bigquery_v2.proto.model_pb2", + __doc__="""Information about a single training query run for the model. + + + Attributes: + training_options: + Options that were used for this training run, includes user + specified and default options that were used. + start_time: + The start time of this training run. + results: + Output of each iteration run, results.size() <= + max\_iterations. + evaluation_metrics: + The evaluation metrics over training/eval data that were + computed at the end of training. + """, + # @@protoc_insertion_point(class_scope:google.cloud.bigquery.v2.Model.TrainingRun) + ), + ), + LabelsEntry=_reflection.GeneratedProtocolMessageType( + "LabelsEntry", + (_message.Message,), + dict( + DESCRIPTOR=_MODEL_LABELSENTRY, + __module__="google.cloud.bigquery_v2.proto.model_pb2" + # @@protoc_insertion_point(class_scope:google.cloud.bigquery.v2.Model.LabelsEntry) + ), + ), + DESCRIPTOR=_MODEL, + __module__="google.cloud.bigquery_v2.proto.model_pb2", + __doc__="""Protocol buffer. + + Attributes: + etag: + Output only. A hash of this resource. + model_reference: + Required. Unique identifier for this model. + creation_time: + Output only. The time when this model was created, in + millisecs since the epoch. + last_modified_time: + Output only. The time when this model was last modified, in + millisecs since the epoch. + description: + [Optional] A user-friendly description of this model. @mutable + bigquery.models.patch + friendly_name: + [Optional] A descriptive name for this model. @mutable + bigquery.models.patch + labels: + [Optional] The labels associated with this model. You can use + these to organize and group your models. Label keys and values + can be no longer than 63 characters, can only contain + lowercase letters, numeric characters, underscores and dashes. + International characters are allowed. Label values are + optional. Label keys must start with a letter and each label + in the list must have a different key. @mutable + bigquery.models.patch + expiration_time: + [Optional] The time when this model expires, in milliseconds + since the epoch. If not present, the model will persist + indefinitely. Expired models will be deleted and their storage + reclaimed. The defaultTableExpirationMs property of the + encapsulating dataset can be used to set a default + expirationTime on newly created models. @mutable + bigquery.models.patch + location: + Output only. The geographic location where the model resides. + This value is inherited from the dataset. + model_type: + Output only. Type of the model resource. + training_runs: + Output only. Information for all training runs in increasing + order of start\_time. + feature_columns: + Output only. Input feature columns that were used to train + this model. + label_columns: + Output only. Label columns that were used to train this model. + The output of the model will have a ``predicted\_`` prefix to + these columns. + """, + # @@protoc_insertion_point(class_scope:google.cloud.bigquery.v2.Model) + ), +) +_sym_db.RegisterMessage(Model) +_sym_db.RegisterMessage(Model.RegressionMetrics) +_sym_db.RegisterMessage(Model.AggregateClassificationMetrics) +_sym_db.RegisterMessage(Model.BinaryClassificationMetrics) +_sym_db.RegisterMessage(Model.BinaryClassificationMetrics.BinaryConfusionMatrix) +_sym_db.RegisterMessage(Model.MultiClassClassificationMetrics) +_sym_db.RegisterMessage(Model.MultiClassClassificationMetrics.ConfusionMatrix) +_sym_db.RegisterMessage(Model.MultiClassClassificationMetrics.ConfusionMatrix.Entry) +_sym_db.RegisterMessage(Model.MultiClassClassificationMetrics.ConfusionMatrix.Row) +_sym_db.RegisterMessage(Model.ClusteringMetrics) +_sym_db.RegisterMessage(Model.EvaluationMetrics) +_sym_db.RegisterMessage(Model.TrainingRun) +_sym_db.RegisterMessage(Model.TrainingRun.TrainingOptions) +_sym_db.RegisterMessage(Model.TrainingRun.TrainingOptions.LabelClassWeightsEntry) +_sym_db.RegisterMessage(Model.TrainingRun.IterationResult) +_sym_db.RegisterMessage(Model.TrainingRun.IterationResult.ClusterInfo) +_sym_db.RegisterMessage(Model.LabelsEntry) + +GetModelRequest = _reflection.GeneratedProtocolMessageType( + "GetModelRequest", + (_message.Message,), + dict( + DESCRIPTOR=_GETMODELREQUEST, + __module__="google.cloud.bigquery_v2.proto.model_pb2", + __doc__="""Protocol buffer. + + Attributes: + project_id: + Project ID of the requested model. + dataset_id: + Dataset ID of the requested model. + model_id: + Model ID of the requested model. + """, + # @@protoc_insertion_point(class_scope:google.cloud.bigquery.v2.GetModelRequest) + ), +) +_sym_db.RegisterMessage(GetModelRequest) + +PatchModelRequest = _reflection.GeneratedProtocolMessageType( + "PatchModelRequest", + (_message.Message,), + dict( + DESCRIPTOR=_PATCHMODELREQUEST, + __module__="google.cloud.bigquery_v2.proto.model_pb2", + __doc__="""Protocol buffer. + + Attributes: + project_id: + Project ID of the model to patch. + dataset_id: + Dataset ID of the model to patch. + model_id: + Model ID of the model to patch. + model: + Patched model. Follows patch semantics. Missing fields are not + updated. To clear a field, explicitly set to default value. + """, + # @@protoc_insertion_point(class_scope:google.cloud.bigquery.v2.PatchModelRequest) + ), +) +_sym_db.RegisterMessage(PatchModelRequest) + +DeleteModelRequest = _reflection.GeneratedProtocolMessageType( + "DeleteModelRequest", + (_message.Message,), + dict( + DESCRIPTOR=_DELETEMODELREQUEST, + __module__="google.cloud.bigquery_v2.proto.model_pb2", + __doc__="""Protocol buffer. + + Attributes: + project_id: + Project ID of the model to delete. + dataset_id: + Dataset ID of the model to delete. + model_id: + Model ID of the model to delete. + """, + # @@protoc_insertion_point(class_scope:google.cloud.bigquery.v2.DeleteModelRequest) + ), +) +_sym_db.RegisterMessage(DeleteModelRequest) + +ListModelsRequest = _reflection.GeneratedProtocolMessageType( + "ListModelsRequest", + (_message.Message,), + dict( + DESCRIPTOR=_LISTMODELSREQUEST, + __module__="google.cloud.bigquery_v2.proto.model_pb2", + __doc__="""Protocol buffer. + + Attributes: + project_id: + Project ID of the models to list. + dataset_id: + Dataset ID of the models to list. + max_results: + The maximum number of results per page. + page_token: + Page token, returned by a previous call to request the next + page of results + """, + # @@protoc_insertion_point(class_scope:google.cloud.bigquery.v2.ListModelsRequest) + ), +) +_sym_db.RegisterMessage(ListModelsRequest) + +ListModelsResponse = _reflection.GeneratedProtocolMessageType( + "ListModelsResponse", + (_message.Message,), + dict( + DESCRIPTOR=_LISTMODELSRESPONSE, + __module__="google.cloud.bigquery_v2.proto.model_pb2", + __doc__="""Protocol buffer. + + Attributes: + models: + Models in the requested dataset. Only the following fields are + populated: model\_reference, model\_type, creation\_time, + last\_modified\_time and labels. + next_page_token: + A token to request the next page of results. + """, + # @@protoc_insertion_point(class_scope:google.cloud.bigquery.v2.ListModelsResponse) + ), +) +_sym_db.RegisterMessage(ListModelsResponse) + + +DESCRIPTOR._options = None +_MODEL_TRAININGRUN_TRAININGOPTIONS_LABELCLASSWEIGHTSENTRY._options = None +_MODEL_LABELSENTRY._options = None + +_MODELSERVICE = _descriptor.ServiceDescriptor( + name="ModelService", + full_name="google.cloud.bigquery.v2.ModelService", + file=DESCRIPTOR, + index=0, + serialized_options=None, + serialized_start=6142, + serialized_end=6534, + methods=[ + _descriptor.MethodDescriptor( + name="GetModel", + full_name="google.cloud.bigquery.v2.ModelService.GetModel", + index=0, + containing_service=None, + input_type=_GETMODELREQUEST, + output_type=_MODEL, + serialized_options=None, + ), + _descriptor.MethodDescriptor( + name="ListModels", + full_name="google.cloud.bigquery.v2.ModelService.ListModels", + index=1, + containing_service=None, + input_type=_LISTMODELSREQUEST, + output_type=_LISTMODELSRESPONSE, + serialized_options=None, + ), + _descriptor.MethodDescriptor( + name="PatchModel", + full_name="google.cloud.bigquery.v2.ModelService.PatchModel", + index=2, + containing_service=None, + input_type=_PATCHMODELREQUEST, + output_type=_MODEL, + serialized_options=None, + ), + _descriptor.MethodDescriptor( + name="DeleteModel", + full_name="google.cloud.bigquery.v2.ModelService.DeleteModel", + index=3, + containing_service=None, + input_type=_DELETEMODELREQUEST, + output_type=google_dot_protobuf_dot_empty__pb2._EMPTY, + serialized_options=None, + ), + ], +) +_sym_db.RegisterServiceDescriptor(_MODELSERVICE) + +DESCRIPTOR.services_by_name["ModelService"] = _MODELSERVICE + +# @@protoc_insertion_point(module_scope) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery_v2/proto/model_pb2_grpc.py b/packages/google-cloud-bigquery/google/cloud/bigquery_v2/proto/model_pb2_grpc.py new file mode 100644 index 000000000000..5abcdf0f2606 --- /dev/null +++ b/packages/google-cloud-bigquery/google/cloud/bigquery_v2/proto/model_pb2_grpc.py @@ -0,0 +1,102 @@ +# Generated by the gRPC Python protocol compiler plugin. DO NOT EDIT! +import grpc + +from google.cloud.bigquery_v2.proto import ( + model_pb2 as google_dot_cloud_dot_bigquery__v2_dot_proto_dot_model__pb2, +) +from google.protobuf import empty_pb2 as google_dot_protobuf_dot_empty__pb2 + + +class ModelServiceStub(object): + # missing associated documentation comment in .proto file + pass + + def __init__(self, channel): + """Constructor. + + Args: + channel: A grpc.Channel. + """ + self.GetModel = channel.unary_unary( + "/google.cloud.bigquery.v2.ModelService/GetModel", + request_serializer=google_dot_cloud_dot_bigquery__v2_dot_proto_dot_model__pb2.GetModelRequest.SerializeToString, + response_deserializer=google_dot_cloud_dot_bigquery__v2_dot_proto_dot_model__pb2.Model.FromString, + ) + self.ListModels = channel.unary_unary( + "/google.cloud.bigquery.v2.ModelService/ListModels", + request_serializer=google_dot_cloud_dot_bigquery__v2_dot_proto_dot_model__pb2.ListModelsRequest.SerializeToString, + response_deserializer=google_dot_cloud_dot_bigquery__v2_dot_proto_dot_model__pb2.ListModelsResponse.FromString, + ) + self.PatchModel = channel.unary_unary( + "/google.cloud.bigquery.v2.ModelService/PatchModel", + request_serializer=google_dot_cloud_dot_bigquery__v2_dot_proto_dot_model__pb2.PatchModelRequest.SerializeToString, + response_deserializer=google_dot_cloud_dot_bigquery__v2_dot_proto_dot_model__pb2.Model.FromString, + ) + self.DeleteModel = channel.unary_unary( + "/google.cloud.bigquery.v2.ModelService/DeleteModel", + request_serializer=google_dot_cloud_dot_bigquery__v2_dot_proto_dot_model__pb2.DeleteModelRequest.SerializeToString, + response_deserializer=google_dot_protobuf_dot_empty__pb2.Empty.FromString, + ) + + +class ModelServiceServicer(object): + # missing associated documentation comment in .proto file + pass + + def GetModel(self, request, context): + """Gets the specified model resource by model ID. + """ + context.set_code(grpc.StatusCode.UNIMPLEMENTED) + context.set_details("Method not implemented!") + raise NotImplementedError("Method not implemented!") + + def ListModels(self, request, context): + """Lists all models in the specified dataset. Requires the READER dataset + role. + """ + context.set_code(grpc.StatusCode.UNIMPLEMENTED) + context.set_details("Method not implemented!") + raise NotImplementedError("Method not implemented!") + + def PatchModel(self, request, context): + """Patch specific fields in the specified model. + """ + context.set_code(grpc.StatusCode.UNIMPLEMENTED) + context.set_details("Method not implemented!") + raise NotImplementedError("Method not implemented!") + + def DeleteModel(self, request, context): + """Deletes the model specified by modelId from the dataset. + """ + context.set_code(grpc.StatusCode.UNIMPLEMENTED) + context.set_details("Method not implemented!") + raise NotImplementedError("Method not implemented!") + + +def add_ModelServiceServicer_to_server(servicer, server): + rpc_method_handlers = { + "GetModel": grpc.unary_unary_rpc_method_handler( + servicer.GetModel, + request_deserializer=google_dot_cloud_dot_bigquery__v2_dot_proto_dot_model__pb2.GetModelRequest.FromString, + response_serializer=google_dot_cloud_dot_bigquery__v2_dot_proto_dot_model__pb2.Model.SerializeToString, + ), + "ListModels": grpc.unary_unary_rpc_method_handler( + servicer.ListModels, + request_deserializer=google_dot_cloud_dot_bigquery__v2_dot_proto_dot_model__pb2.ListModelsRequest.FromString, + response_serializer=google_dot_cloud_dot_bigquery__v2_dot_proto_dot_model__pb2.ListModelsResponse.SerializeToString, + ), + "PatchModel": grpc.unary_unary_rpc_method_handler( + servicer.PatchModel, + request_deserializer=google_dot_cloud_dot_bigquery__v2_dot_proto_dot_model__pb2.PatchModelRequest.FromString, + response_serializer=google_dot_cloud_dot_bigquery__v2_dot_proto_dot_model__pb2.Model.SerializeToString, + ), + "DeleteModel": grpc.unary_unary_rpc_method_handler( + servicer.DeleteModel, + request_deserializer=google_dot_cloud_dot_bigquery__v2_dot_proto_dot_model__pb2.DeleteModelRequest.FromString, + response_serializer=google_dot_protobuf_dot_empty__pb2.Empty.SerializeToString, + ), + } + generic_handler = grpc.method_handlers_generic_handler( + "google.cloud.bigquery.v2.ModelService", rpc_method_handlers + ) + server.add_generic_rpc_handlers((generic_handler,)) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery_v2/proto/model_reference.proto b/packages/google-cloud-bigquery/google/cloud/bigquery_v2/proto/model_reference.proto new file mode 100644 index 000000000000..8e1e218f969c --- /dev/null +++ b/packages/google-cloud-bigquery/google/cloud/bigquery_v2/proto/model_reference.proto @@ -0,0 +1,39 @@ +// Copyright 2019 Google LLC. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// + +syntax = "proto3"; + +package google.cloud.bigquery.v2; + +import "google/api/annotations.proto"; + +option go_package = "google.golang.org/genproto/googleapis/cloud/bigquery/v2;bigquery"; +option java_outer_classname = "ModelReferenceProto"; +option java_package = "com.google.cloud.bigquery.v2"; + + +// Id path of a model. +message ModelReference { + // [Required] The ID of the project containing this model. + string project_id = 1; + + // [Required] The ID of the dataset containing this model. + string dataset_id = 2; + + // [Required] The ID of the model. The ID must contain only + // letters (a-z, A-Z), numbers (0-9), or underscores (_). The maximum + // length is 1,024 characters. + string model_id = 3; +} diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery_v2/proto/model_reference_pb2.py b/packages/google-cloud-bigquery/google/cloud/bigquery_v2/proto/model_reference_pb2.py new file mode 100644 index 000000000000..3d1b53e44800 --- /dev/null +++ b/packages/google-cloud-bigquery/google/cloud/bigquery_v2/proto/model_reference_pb2.py @@ -0,0 +1,137 @@ +# Generated by the protocol buffer compiler. DO NOT EDIT! +# source: google/cloud/bigquery_v2/proto/model_reference.proto + +import sys + +_b = sys.version_info[0] < 3 and (lambda x: x) or (lambda x: x.encode("latin1")) +from google.protobuf import descriptor as _descriptor +from google.protobuf import message as _message +from google.protobuf import reflection as _reflection +from google.protobuf import symbol_database as _symbol_database + +# @@protoc_insertion_point(imports) + +_sym_db = _symbol_database.Default() + + +from google.api import annotations_pb2 as google_dot_api_dot_annotations__pb2 + + +DESCRIPTOR = _descriptor.FileDescriptor( + name="google/cloud/bigquery_v2/proto/model_reference.proto", + package="google.cloud.bigquery.v2", + syntax="proto3", + serialized_options=_b( + "\n\034com.google.cloud.bigquery.v2B\023ModelReferenceProtoZ@google.golang.org/genproto/googleapis/cloud/bigquery/v2;bigquery" + ), + serialized_pb=_b( + '\n4google/cloud/bigquery_v2/proto/model_reference.proto\x12\x18google.cloud.bigquery.v2\x1a\x1cgoogle/api/annotations.proto"J\n\x0eModelReference\x12\x12\n\nproject_id\x18\x01 \x01(\t\x12\x12\n\ndataset_id\x18\x02 \x01(\t\x12\x10\n\x08model_id\x18\x03 \x01(\tBu\n\x1c\x63om.google.cloud.bigquery.v2B\x13ModelReferenceProtoZ@google.golang.org/genproto/googleapis/cloud/bigquery/v2;bigqueryb\x06proto3' + ), + dependencies=[google_dot_api_dot_annotations__pb2.DESCRIPTOR], +) + + +_MODELREFERENCE = _descriptor.Descriptor( + name="ModelReference", + full_name="google.cloud.bigquery.v2.ModelReference", + filename=None, + file=DESCRIPTOR, + containing_type=None, + fields=[ + _descriptor.FieldDescriptor( + name="project_id", + full_name="google.cloud.bigquery.v2.ModelReference.project_id", + index=0, + number=1, + type=9, + cpp_type=9, + label=1, + has_default_value=False, + default_value=_b("").decode("utf-8"), + message_type=None, + enum_type=None, + containing_type=None, + is_extension=False, + extension_scope=None, + serialized_options=None, + file=DESCRIPTOR, + ), + _descriptor.FieldDescriptor( + name="dataset_id", + full_name="google.cloud.bigquery.v2.ModelReference.dataset_id", + index=1, + number=2, + type=9, + cpp_type=9, + label=1, + has_default_value=False, + default_value=_b("").decode("utf-8"), + message_type=None, + enum_type=None, + containing_type=None, + is_extension=False, + extension_scope=None, + serialized_options=None, + file=DESCRIPTOR, + ), + _descriptor.FieldDescriptor( + name="model_id", + full_name="google.cloud.bigquery.v2.ModelReference.model_id", + index=2, + number=3, + type=9, + cpp_type=9, + label=1, + has_default_value=False, + default_value=_b("").decode("utf-8"), + message_type=None, + enum_type=None, + containing_type=None, + is_extension=False, + extension_scope=None, + serialized_options=None, + file=DESCRIPTOR, + ), + ], + extensions=[], + nested_types=[], + enum_types=[], + serialized_options=None, + is_extendable=False, + syntax="proto3", + extension_ranges=[], + oneofs=[], + serialized_start=112, + serialized_end=186, +) + +DESCRIPTOR.message_types_by_name["ModelReference"] = _MODELREFERENCE +_sym_db.RegisterFileDescriptor(DESCRIPTOR) + +ModelReference = _reflection.GeneratedProtocolMessageType( + "ModelReference", + (_message.Message,), + dict( + DESCRIPTOR=_MODELREFERENCE, + __module__="google.cloud.bigquery_v2.proto.model_reference_pb2", + __doc__="""Id path of a model. + + + Attributes: + project_id: + [Required] The ID of the project containing this model. + dataset_id: + [Required] The ID of the dataset containing this model. + model_id: + [Required] The ID of the model. The ID must contain only + letters (a-z, A-Z), numbers (0-9), or underscores (\_). The + maximum length is 1,024 characters. + """, + # @@protoc_insertion_point(class_scope:google.cloud.bigquery.v2.ModelReference) + ), +) +_sym_db.RegisterMessage(ModelReference) + + +DESCRIPTOR._options = None +# @@protoc_insertion_point(module_scope) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery_v2/proto/model_reference_pb2_grpc.py b/packages/google-cloud-bigquery/google/cloud/bigquery_v2/proto/model_reference_pb2_grpc.py new file mode 100644 index 000000000000..07cb78fe03a9 --- /dev/null +++ b/packages/google-cloud-bigquery/google/cloud/bigquery_v2/proto/model_reference_pb2_grpc.py @@ -0,0 +1,2 @@ +# Generated by the gRPC Python protocol compiler plugin. DO NOT EDIT! +import grpc diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery_v2/proto/standard_sql.proto b/packages/google-cloud-bigquery/google/cloud/bigquery_v2/proto/standard_sql.proto new file mode 100644 index 000000000000..82d9960242b3 --- /dev/null +++ b/packages/google-cloud-bigquery/google/cloud/bigquery_v2/proto/standard_sql.proto @@ -0,0 +1,110 @@ +// Copyright 2019 Google LLC. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// + +syntax = "proto3"; + +package google.cloud.bigquery.v2; + +import "google/api/annotations.proto"; + +option go_package = "google.golang.org/genproto/googleapis/cloud/bigquery/v2;bigquery"; +option java_outer_classname = "StandardSqlProto"; +option java_package = "com.google.cloud.bigquery.v2"; + + +// The type of a variable, e.g., a function argument. +// Examples: +// INT64: {type_kind="INT64"} +// ARRAY: {type_kind="ARRAY", array_element_type="STRING"} +// STRUCT>: +// {type_kind="STRUCT", +// struct_type={fields=[ +// {name="x", type={type_kind="STRING"}}, +// {name="y", type={type_kind="ARRAY", array_element_type="DATE"}} +// ]}} +message StandardSqlDataType { + enum TypeKind { + // Invalid type. + TYPE_KIND_UNSPECIFIED = 0; + + // Encoded as a string in decimal format. + INT64 = 2; + + // Encoded as a boolean "false" or "true". + BOOL = 5; + + // Encoded as a number, or string "NaN", "Infinity" or "-Infinity". + FLOAT64 = 7; + + // Encoded as a string value. + STRING = 8; + + // Encoded as a base64 string per RFC 4648, section 4. + BYTES = 9; + + // Encoded as an RFC 3339 timestamp with mandatory "Z" time zone string: + // 1985-04-12T23:20:50.52Z + TIMESTAMP = 19; + + // Encoded as RFC 3339 full-date format string: 1985-04-12 + DATE = 10; + + // Encoded as RFC 3339 partial-time format string: 23:20:50.52 + TIME = 20; + + // Encoded as RFC 3339 full-date "T" partial-time: 1985-04-12T23:20:50.52 + DATETIME = 21; + + // Encoded as WKT + GEOGRAPHY = 22; + + // Encoded as a decimal string. + NUMERIC = 23; + + // Encoded as a list with types matching Type.array_type. + ARRAY = 16; + + // Encoded as a list with fields of type Type.struct_type[i]. List is used + // because a JSON object cannot have duplicate field names. + STRUCT = 17; + } + + // Required. The top level type of this field. + // Can be any standard SQL data type (e.g., "INT64", "DATE", "ARRAY"). + TypeKind type_kind = 1; + + oneof sub_type { + // The type of the array's elements, if type_kind = "ARRAY". + StandardSqlDataType array_element_type = 2; + + // The fields of this struct, in order, if type_kind = "STRUCT". + StandardSqlStructType struct_type = 3; + } +} + +// A field or a column. +message StandardSqlField { + // Optional. The name of this field. Can be absent for struct fields. + string name = 1; + + // Optional. The type of this parameter. Absent if not explicitly + // specified (e.g., CREATE FUNCTION statement can omit the return type; + // in this case the output parameter does not have this "type" field). + StandardSqlDataType type = 2; +} + +message StandardSqlStructType { + repeated StandardSqlField fields = 1; +} diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery_v2/proto/standard_sql_pb2.py b/packages/google-cloud-bigquery/google/cloud/bigquery_v2/proto/standard_sql_pb2.py new file mode 100644 index 000000000000..5f4a40278140 --- /dev/null +++ b/packages/google-cloud-bigquery/google/cloud/bigquery_v2/proto/standard_sql_pb2.py @@ -0,0 +1,365 @@ +# Generated by the protocol buffer compiler. DO NOT EDIT! +# source: google/cloud/bigquery_v2/proto/standard_sql.proto + +import sys + +_b = sys.version_info[0] < 3 and (lambda x: x) or (lambda x: x.encode("latin1")) +from google.protobuf import descriptor as _descriptor +from google.protobuf import message as _message +from google.protobuf import reflection as _reflection +from google.protobuf import symbol_database as _symbol_database + +# @@protoc_insertion_point(imports) + +_sym_db = _symbol_database.Default() + + +from google.api import annotations_pb2 as google_dot_api_dot_annotations__pb2 + + +DESCRIPTOR = _descriptor.FileDescriptor( + name="google/cloud/bigquery_v2/proto/standard_sql.proto", + package="google.cloud.bigquery.v2", + syntax="proto3", + serialized_options=_b( + "\n\034com.google.cloud.bigquery.v2B\020StandardSqlProtoZ@google.golang.org/genproto/googleapis/cloud/bigquery/v2;bigquery" + ), + serialized_pb=_b( + '\n1google/cloud/bigquery_v2/proto/standard_sql.proto\x12\x18google.cloud.bigquery.v2\x1a\x1cgoogle/api/annotations.proto"\xc6\x03\n\x13StandardSqlDataType\x12I\n\ttype_kind\x18\x01 \x01(\x0e\x32\x36.google.cloud.bigquery.v2.StandardSqlDataType.TypeKind\x12K\n\x12\x61rray_element_type\x18\x02 \x01(\x0b\x32-.google.cloud.bigquery.v2.StandardSqlDataTypeH\x00\x12\x46\n\x0bstruct_type\x18\x03 \x01(\x0b\x32/.google.cloud.bigquery.v2.StandardSqlStructTypeH\x00"\xc2\x01\n\x08TypeKind\x12\x19\n\x15TYPE_KIND_UNSPECIFIED\x10\x00\x12\t\n\x05INT64\x10\x02\x12\x08\n\x04\x42OOL\x10\x05\x12\x0b\n\x07\x46LOAT64\x10\x07\x12\n\n\x06STRING\x10\x08\x12\t\n\x05\x42YTES\x10\t\x12\r\n\tTIMESTAMP\x10\x13\x12\x08\n\x04\x44\x41TE\x10\n\x12\x08\n\x04TIME\x10\x14\x12\x0c\n\x08\x44\x41TETIME\x10\x15\x12\r\n\tGEOGRAPHY\x10\x16\x12\x0b\n\x07NUMERIC\x10\x17\x12\t\n\x05\x41RRAY\x10\x10\x12\n\n\x06STRUCT\x10\x11\x42\n\n\x08sub_type"]\n\x10StandardSqlField\x12\x0c\n\x04name\x18\x01 \x01(\t\x12;\n\x04type\x18\x02 \x01(\x0b\x32-.google.cloud.bigquery.v2.StandardSqlDataType"S\n\x15StandardSqlStructType\x12:\n\x06\x66ields\x18\x01 \x03(\x0b\x32*.google.cloud.bigquery.v2.StandardSqlFieldBr\n\x1c\x63om.google.cloud.bigquery.v2B\x10StandardSqlProtoZ@google.golang.org/genproto/googleapis/cloud/bigquery/v2;bigqueryb\x06proto3' + ), + dependencies=[google_dot_api_dot_annotations__pb2.DESCRIPTOR], +) + + +_STANDARDSQLDATATYPE_TYPEKIND = _descriptor.EnumDescriptor( + name="TypeKind", + full_name="google.cloud.bigquery.v2.StandardSqlDataType.TypeKind", + filename=None, + file=DESCRIPTOR, + values=[ + _descriptor.EnumValueDescriptor( + name="TYPE_KIND_UNSPECIFIED", + index=0, + number=0, + serialized_options=None, + type=None, + ), + _descriptor.EnumValueDescriptor( + name="INT64", index=1, number=2, serialized_options=None, type=None + ), + _descriptor.EnumValueDescriptor( + name="BOOL", index=2, number=5, serialized_options=None, type=None + ), + _descriptor.EnumValueDescriptor( + name="FLOAT64", index=3, number=7, serialized_options=None, type=None + ), + _descriptor.EnumValueDescriptor( + name="STRING", index=4, number=8, serialized_options=None, type=None + ), + _descriptor.EnumValueDescriptor( + name="BYTES", index=5, number=9, serialized_options=None, type=None + ), + _descriptor.EnumValueDescriptor( + name="TIMESTAMP", index=6, number=19, serialized_options=None, type=None + ), + _descriptor.EnumValueDescriptor( + name="DATE", index=7, number=10, serialized_options=None, type=None + ), + _descriptor.EnumValueDescriptor( + name="TIME", index=8, number=20, serialized_options=None, type=None + ), + _descriptor.EnumValueDescriptor( + name="DATETIME", index=9, number=21, serialized_options=None, type=None + ), + _descriptor.EnumValueDescriptor( + name="GEOGRAPHY", index=10, number=22, serialized_options=None, type=None + ), + _descriptor.EnumValueDescriptor( + name="NUMERIC", index=11, number=23, serialized_options=None, type=None + ), + _descriptor.EnumValueDescriptor( + name="ARRAY", index=12, number=16, serialized_options=None, type=None + ), + _descriptor.EnumValueDescriptor( + name="STRUCT", index=13, number=17, serialized_options=None, type=None + ), + ], + containing_type=None, + serialized_options=None, + serialized_start=358, + serialized_end=552, +) +_sym_db.RegisterEnumDescriptor(_STANDARDSQLDATATYPE_TYPEKIND) + + +_STANDARDSQLDATATYPE = _descriptor.Descriptor( + name="StandardSqlDataType", + full_name="google.cloud.bigquery.v2.StandardSqlDataType", + filename=None, + file=DESCRIPTOR, + containing_type=None, + fields=[ + _descriptor.FieldDescriptor( + name="type_kind", + full_name="google.cloud.bigquery.v2.StandardSqlDataType.type_kind", + index=0, + number=1, + type=14, + cpp_type=8, + label=1, + has_default_value=False, + default_value=0, + message_type=None, + enum_type=None, + containing_type=None, + is_extension=False, + extension_scope=None, + serialized_options=None, + file=DESCRIPTOR, + ), + _descriptor.FieldDescriptor( + name="array_element_type", + full_name="google.cloud.bigquery.v2.StandardSqlDataType.array_element_type", + index=1, + number=2, + type=11, + cpp_type=10, + label=1, + has_default_value=False, + default_value=None, + message_type=None, + enum_type=None, + containing_type=None, + is_extension=False, + extension_scope=None, + serialized_options=None, + file=DESCRIPTOR, + ), + _descriptor.FieldDescriptor( + name="struct_type", + full_name="google.cloud.bigquery.v2.StandardSqlDataType.struct_type", + index=2, + number=3, + type=11, + cpp_type=10, + label=1, + has_default_value=False, + default_value=None, + message_type=None, + enum_type=None, + containing_type=None, + is_extension=False, + extension_scope=None, + serialized_options=None, + file=DESCRIPTOR, + ), + ], + extensions=[], + nested_types=[], + enum_types=[_STANDARDSQLDATATYPE_TYPEKIND], + serialized_options=None, + is_extendable=False, + syntax="proto3", + extension_ranges=[], + oneofs=[ + _descriptor.OneofDescriptor( + name="sub_type", + full_name="google.cloud.bigquery.v2.StandardSqlDataType.sub_type", + index=0, + containing_type=None, + fields=[], + ) + ], + serialized_start=110, + serialized_end=564, +) + + +_STANDARDSQLFIELD = _descriptor.Descriptor( + name="StandardSqlField", + full_name="google.cloud.bigquery.v2.StandardSqlField", + filename=None, + file=DESCRIPTOR, + containing_type=None, + fields=[ + _descriptor.FieldDescriptor( + name="name", + full_name="google.cloud.bigquery.v2.StandardSqlField.name", + index=0, + number=1, + type=9, + cpp_type=9, + label=1, + has_default_value=False, + default_value=_b("").decode("utf-8"), + message_type=None, + enum_type=None, + containing_type=None, + is_extension=False, + extension_scope=None, + serialized_options=None, + file=DESCRIPTOR, + ), + _descriptor.FieldDescriptor( + name="type", + full_name="google.cloud.bigquery.v2.StandardSqlField.type", + index=1, + number=2, + type=11, + cpp_type=10, + label=1, + has_default_value=False, + default_value=None, + message_type=None, + enum_type=None, + containing_type=None, + is_extension=False, + extension_scope=None, + serialized_options=None, + file=DESCRIPTOR, + ), + ], + extensions=[], + nested_types=[], + enum_types=[], + serialized_options=None, + is_extendable=False, + syntax="proto3", + extension_ranges=[], + oneofs=[], + serialized_start=566, + serialized_end=659, +) + + +_STANDARDSQLSTRUCTTYPE = _descriptor.Descriptor( + name="StandardSqlStructType", + full_name="google.cloud.bigquery.v2.StandardSqlStructType", + filename=None, + file=DESCRIPTOR, + containing_type=None, + fields=[ + _descriptor.FieldDescriptor( + name="fields", + full_name="google.cloud.bigquery.v2.StandardSqlStructType.fields", + index=0, + number=1, + type=11, + cpp_type=10, + label=3, + has_default_value=False, + default_value=[], + message_type=None, + enum_type=None, + containing_type=None, + is_extension=False, + extension_scope=None, + serialized_options=None, + file=DESCRIPTOR, + ) + ], + extensions=[], + nested_types=[], + enum_types=[], + serialized_options=None, + is_extendable=False, + syntax="proto3", + extension_ranges=[], + oneofs=[], + serialized_start=661, + serialized_end=744, +) + +_STANDARDSQLDATATYPE.fields_by_name[ + "type_kind" +].enum_type = _STANDARDSQLDATATYPE_TYPEKIND +_STANDARDSQLDATATYPE.fields_by_name[ + "array_element_type" +].message_type = _STANDARDSQLDATATYPE +_STANDARDSQLDATATYPE.fields_by_name["struct_type"].message_type = _STANDARDSQLSTRUCTTYPE +_STANDARDSQLDATATYPE_TYPEKIND.containing_type = _STANDARDSQLDATATYPE +_STANDARDSQLDATATYPE.oneofs_by_name["sub_type"].fields.append( + _STANDARDSQLDATATYPE.fields_by_name["array_element_type"] +) +_STANDARDSQLDATATYPE.fields_by_name[ + "array_element_type" +].containing_oneof = _STANDARDSQLDATATYPE.oneofs_by_name["sub_type"] +_STANDARDSQLDATATYPE.oneofs_by_name["sub_type"].fields.append( + _STANDARDSQLDATATYPE.fields_by_name["struct_type"] +) +_STANDARDSQLDATATYPE.fields_by_name[ + "struct_type" +].containing_oneof = _STANDARDSQLDATATYPE.oneofs_by_name["sub_type"] +_STANDARDSQLFIELD.fields_by_name["type"].message_type = _STANDARDSQLDATATYPE +_STANDARDSQLSTRUCTTYPE.fields_by_name["fields"].message_type = _STANDARDSQLFIELD +DESCRIPTOR.message_types_by_name["StandardSqlDataType"] = _STANDARDSQLDATATYPE +DESCRIPTOR.message_types_by_name["StandardSqlField"] = _STANDARDSQLFIELD +DESCRIPTOR.message_types_by_name["StandardSqlStructType"] = _STANDARDSQLSTRUCTTYPE +_sym_db.RegisterFileDescriptor(DESCRIPTOR) + +StandardSqlDataType = _reflection.GeneratedProtocolMessageType( + "StandardSqlDataType", + (_message.Message,), + dict( + DESCRIPTOR=_STANDARDSQLDATATYPE, + __module__="google.cloud.bigquery_v2.proto.standard_sql_pb2", + __doc__="""The type of a variable, e.g., a function argument. Examples: INT64: + {type\_kind="INT64"} ARRAY: {type\_kind="ARRAY", + array\_element\_type="STRING"} STRUCT: + {type\_kind="STRUCT", struct\_type={fields=[ {name="x", + type={type\_kind="STRING"}}, {name="y", type={type\_kind="ARRAY", + array\_element\_type="DATE"}} ]}} + + + Attributes: + type_kind: + Required. The top level type of this field. Can be any + standard SQL data type (e.g., "INT64", "DATE", "ARRAY"). + array_element_type: + The type of the array's elements, if type\_kind = "ARRAY". + struct_type: + The fields of this struct, in order, if type\_kind = "STRUCT". + """, + # @@protoc_insertion_point(class_scope:google.cloud.bigquery.v2.StandardSqlDataType) + ), +) +_sym_db.RegisterMessage(StandardSqlDataType) + +StandardSqlField = _reflection.GeneratedProtocolMessageType( + "StandardSqlField", + (_message.Message,), + dict( + DESCRIPTOR=_STANDARDSQLFIELD, + __module__="google.cloud.bigquery_v2.proto.standard_sql_pb2", + __doc__="""A field or a column. + + + Attributes: + name: + Optional. The name of this field. Can be absent for struct + fields. + type: + Optional. The type of this parameter. Absent if not explicitly + specified (e.g., CREATE FUNCTION statement can omit the return + type; in this case the output parameter does not have this + "type" field). + """, + # @@protoc_insertion_point(class_scope:google.cloud.bigquery.v2.StandardSqlField) + ), +) +_sym_db.RegisterMessage(StandardSqlField) + +StandardSqlStructType = _reflection.GeneratedProtocolMessageType( + "StandardSqlStructType", + (_message.Message,), + dict( + DESCRIPTOR=_STANDARDSQLSTRUCTTYPE, + __module__="google.cloud.bigquery_v2.proto.standard_sql_pb2" + # @@protoc_insertion_point(class_scope:google.cloud.bigquery.v2.StandardSqlStructType) + ), +) +_sym_db.RegisterMessage(StandardSqlStructType) + + +DESCRIPTOR._options = None +# @@protoc_insertion_point(module_scope) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery_v2/proto/standard_sql_pb2_grpc.py b/packages/google-cloud-bigquery/google/cloud/bigquery_v2/proto/standard_sql_pb2_grpc.py new file mode 100644 index 000000000000..07cb78fe03a9 --- /dev/null +++ b/packages/google-cloud-bigquery/google/cloud/bigquery_v2/proto/standard_sql_pb2_grpc.py @@ -0,0 +1,2 @@ +# Generated by the gRPC Python protocol compiler plugin. DO NOT EDIT! +import grpc diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery_v2/types.py b/packages/google-cloud-bigquery/google/cloud/bigquery_v2/types.py new file mode 100644 index 000000000000..0d228e88abac --- /dev/null +++ b/packages/google-cloud-bigquery/google/cloud/bigquery_v2/types.py @@ -0,0 +1,45 @@ +# -*- coding: utf-8 -*- +# +# Copyright 2019 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import absolute_import +import sys + +from google.api_core.protobuf_helpers import get_messages + +from google.cloud.bigquery_v2.proto import model_pb2 +from google.cloud.bigquery_v2.proto import model_reference_pb2 +from google.cloud.bigquery_v2.proto import standard_sql_pb2 +from google.protobuf import empty_pb2 +from google.protobuf import timestamp_pb2 +from google.protobuf import wrappers_pb2 + +_shared_modules = [empty_pb2, timestamp_pb2, wrappers_pb2] + +_local_modules = [model_pb2, model_reference_pb2, standard_sql_pb2] + +names = [] + +for module in _shared_modules: + for name, message in get_messages(module).items(): + setattr(sys.modules[__name__], name, message) + names.append(name) +for module in _local_modules: + for name, message in get_messages(module).items(): + message.__module__ = "google.cloud.bigquery_v2.types" + setattr(sys.modules[__name__], name, message) + names.append(name) + +__all__ = tuple(sorted(names)) diff --git a/packages/google-cloud-bigquery/noxfile.py b/packages/google-cloud-bigquery/noxfile.py index 0a1296aa4c73..09339596ea79 100644 --- a/packages/google-cloud-bigquery/noxfile.py +++ b/packages/google-cloud-bigquery/noxfile.py @@ -28,7 +28,7 @@ os.path.join("..", "bigquery_storage[pandas,fastavro]"), ) -BLACK_PATHS = ("google", "tests", "docs", "noxfile.py", "setup.py") +BLACK_PATHS = ("docs", "google", "samples", "tests", "noxfile.py", "setup.py") def default(session): @@ -127,6 +127,7 @@ def snippets(session): # Run py.test against the snippets tests. session.run("py.test", os.path.join("docs", "snippets.py"), *session.posargs) + session.run("py.test", "samples", *session.posargs) @nox.session(python="3.6") diff --git a/packages/google-cloud-bigquery/samples/__init__.py b/packages/google-cloud-bigquery/samples/__init__.py new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/packages/google-cloud-bigquery/samples/delete_model.py b/packages/google-cloud-bigquery/samples/delete_model.py new file mode 100644 index 000000000000..dfe23cd7ef29 --- /dev/null +++ b/packages/google-cloud-bigquery/samples/delete_model.py @@ -0,0 +1,31 @@ +# Copyright 2019 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +def delete_model(client, model_id): + """Sample ID: go/samples-tracker/1534""" + + # [START bigquery_delete_model] + from google.cloud import bigquery + + # TODO(developer): Construct a BigQuery client object. + # client = bigquery.Client() + + # TODO(developer): Set model_id to the ID of the model to fetch. + # model_id = 'your-project.your_dataset.your_model' + + client.delete_model(model_id) + # [END bigquery_delete_model] + + print("Deleted model '{}'.".format(model_id)) diff --git a/packages/google-cloud-bigquery/samples/get_model.py b/packages/google-cloud-bigquery/samples/get_model.py new file mode 100644 index 000000000000..8e43e53ec450 --- /dev/null +++ b/packages/google-cloud-bigquery/samples/get_model.py @@ -0,0 +1,35 @@ +# Copyright 2019 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +def get_model(client, model_id): + """Sample ID: go/samples-tracker/1510""" + + # [START bigquery_get_model] + from google.cloud import bigquery + + # TODO(developer): Construct a BigQuery client object. + # client = bigquery.Client() + + # TODO(developer): Set model_id to the ID of the model to fetch. + # model_id = 'your-project.your_dataset.your_model' + + model = client.get_model(model_id) + + full_model_id = "{}.{}.{}".format(model.project, model.dataset_id, model.model_id) + friendly_name = model.friendly_name + print( + "Got model '{}' with friendly_name '{}'.".format(full_model_id, friendly_name) + ) + # [END bigquery_get_model] diff --git a/packages/google-cloud-bigquery/samples/list_models.py b/packages/google-cloud-bigquery/samples/list_models.py new file mode 100644 index 000000000000..cb6e4fb5569f --- /dev/null +++ b/packages/google-cloud-bigquery/samples/list_models.py @@ -0,0 +1,38 @@ +# Copyright 2019 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +def list_models(client, dataset_id): + """Sample ID: go/samples-tracker/1512""" + + # [START bigquery_list_models] + from google.cloud import bigquery + + # TODO(developer): Construct a BigQuery client object. + # client = bigquery.Client() + + # TODO(developer): Set dataset_id to the ID of the dataset that contains + # the models you are listing. + # dataset_id = 'your-project.your_dataset' + + models = client.list_models(dataset_id) + + print("Models contained in '{}':".format(dataset_id)) + for model in models: + full_model_id = "{}.{}.{}".format( + model.project, model.dataset_id, model.model_id + ) + friendly_name = model.friendly_name + print("{}: friendly_name='{}'".format(full_model_id, friendly_name)) + # [END bigquery_list_models] diff --git a/packages/google-cloud-bigquery/samples/tests/__init__.py b/packages/google-cloud-bigquery/samples/tests/__init__.py new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/packages/google-cloud-bigquery/samples/tests/conftest.py b/packages/google-cloud-bigquery/samples/tests/conftest.py new file mode 100644 index 000000000000..1543e1fdcd0a --- /dev/null +++ b/packages/google-cloud-bigquery/samples/tests/conftest.py @@ -0,0 +1,62 @@ +# Copyright 2019 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import datetime +import uuid + +import pytest + +from google.cloud import bigquery + + +@pytest.fixture(scope="module") +def client(): + return bigquery.Client() + + +@pytest.fixture +def dataset_id(client): + now = datetime.datetime.now() + dataset_id = "python_samples_{}_{}".format( + now.strftime("%Y%m%d%H%M%S"), uuid.uuid4().hex[:8] + ) + dataset = client.create_dataset(dataset_id) + yield "{}.{}".format(dataset.project, dataset.dataset_id) + client.delete_dataset(dataset, delete_contents=True) + + +@pytest.fixture +def model_id(client, dataset_id): + model_id = "{}.{}".format(dataset_id, uuid.uuid4().hex) + + # The only way to create a model resource is via SQL. + # Use a very small dataset (2 points), to train a model quickly. + sql = """ + CREATE MODEL `{}` + OPTIONS ( + model_type='linear_reg', + max_iteration=1, + learn_rate=0.4, + learn_rate_strategy='constant' + ) AS ( + SELECT 'a' AS f1, 2.0 AS label + UNION ALL + SELECT 'b' AS f1, 3.8 AS label + ) + """.format( + model_id + ) + + client.query(sql).result() + return model_id diff --git a/packages/google-cloud-bigquery/samples/tests/test_model_samples.py b/packages/google-cloud-bigquery/samples/tests/test_model_samples.py new file mode 100644 index 000000000000..d7b06a92a3e1 --- /dev/null +++ b/packages/google-cloud-bigquery/samples/tests/test_model_samples.py @@ -0,0 +1,39 @@ +# Copyright 2019 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from .. import delete_model +from .. import get_model +from .. import list_models +from .. import update_model + + +def test_model_samples(capsys, client, dataset_id, model_id): + """Since creating a model is a long operation, test all model samples in + the same test, following a typical end-to-end flow. + """ + get_model.get_model(client, model_id) + out, err = capsys.readouterr() + assert model_id in out + + list_models.list_models(client, dataset_id) + out, err = capsys.readouterr() + assert "Models contained in '{}':".format(dataset_id) in out + + update_model.update_model(client, model_id) + out, err = capsys.readouterr() + assert "This model was modified from a Python program." in out + + delete_model.delete_model(client, model_id) + out, err = capsys.readouterr() + assert "Deleted model '{}'.".format(model_id) in out diff --git a/packages/google-cloud-bigquery/samples/update_model.py b/packages/google-cloud-bigquery/samples/update_model.py new file mode 100644 index 000000000000..2440066ae1ec --- /dev/null +++ b/packages/google-cloud-bigquery/samples/update_model.py @@ -0,0 +1,38 @@ +# Copyright 2019 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +def update_model(client, model_id): + """Sample ID: go/samples-tracker/1533""" + + # [START bigquery_update_model_description] + from google.cloud import bigquery + + # TODO(developer): Construct a BigQuery client object. + # client = bigquery.Client() + + # TODO(developer): Set model_id to the ID of the model to fetch. + # model_id = 'your-project.your_dataset.your_model' + + model = client.get_model(model_id) + model.description = "This model was modified from a Python program." + model = client.update_model(model, ["description"]) + + full_model_id = "{}.{}.{}".format(model.project, model.dataset_id, model.model_id) + print( + "Updated model '{}' with description '{}'.".format( + full_model_id, model.description + ) + ) + # [END bigquery_update_model_description] diff --git a/packages/google-cloud-bigquery/setup.py b/packages/google-cloud-bigquery/setup.py index 1badaae98175..b3e9ef84db6c 100644 --- a/packages/google-cloud-bigquery/setup.py +++ b/packages/google-cloud-bigquery/setup.py @@ -32,6 +32,7 @@ "google-api-core >= 1.6.0, < 2.0.0dev", "google-cloud-core >= 0.29.0, < 0.30dev", "google-resumable-media >= 0.3.1", + 'enum34; python_version < "3.4"', ] extras = { "bqstorage": "google-cloud-bigquery-storage >= 0.2.0dev1, <2.0.0dev", diff --git a/packages/google-cloud-bigquery/synth.metadata b/packages/google-cloud-bigquery/synth.metadata new file mode 100644 index 000000000000..4320d1d271bf --- /dev/null +++ b/packages/google-cloud-bigquery/synth.metadata @@ -0,0 +1,32 @@ +{ + "updateTime": "2019-04-03T23:38:10.307198Z", + "sources": [ + { + "generator": { + "name": "artman", + "version": "0.16.23", + "dockerImage": "googleapis/artman@sha256:f3a3f88000dc1cd1b4826104c5574aa5c534f6793fbf66e888d11c0d7ef5762e" + } + }, + { + "git": { + "name": "googleapis", + "remote": "git@github.com:googleapis/googleapis.git", + "sha": "04193ea2f8121388c998ab49c382f2c03417dcce", + "internalRef": "241828309" + } + } + ], + "destinations": [ + { + "client": { + "source": "googleapis", + "apiName": "bigquery", + "apiVersion": "v2", + "language": "python", + "generator": "gapic", + "config": "google/cloud/bigquery/artman_bigquery_v2.yaml" + } + } + ] +} \ No newline at end of file diff --git a/packages/google-cloud-bigquery/synth.py b/packages/google-cloud-bigquery/synth.py new file mode 100644 index 000000000000..a8370fd4e96a --- /dev/null +++ b/packages/google-cloud-bigquery/synth.py @@ -0,0 +1,54 @@ +# Copyright 2018 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""This script is used to synthesize generated parts of this library.""" + +import synthtool as s +from synthtool import gcp + +gapic = gcp.GAPICGenerator() + +version = 'v2' + +library = gapic.py_library( + 'bigquery', + version, + config_path='/google/cloud/bigquery/' + 'artman_bigquery_v2.yaml', + artman_output_name='bigquery-v2', + include_protos=True, +) + +s.move( + [ + library / "google/cloud/bigquery_v2/gapic/enums.py", + library / "google/cloud/bigquery_v2/types.py", + library / "google/cloud/bigquery_v2/proto/location*", + library / "google/cloud/bigquery_v2/proto/model*", + library / "google/cloud/bigquery_v2/proto/standard_sql*", + ], +) + +# Fix up proto docs that are missing summary line. +s.replace( + "google/cloud/bigquery_v2/proto/model_pb2.py", + '"""Attributes:', + '"""Protocol buffer.\n\n Attributes:', +) + +# Remove non-ascii characters from docstrings for Python 2.7. +# Format quoted strings as plain text. +s.replace("google/cloud/bigquery_v2/proto/*.py", "[“”]", '``') + +s.shell.run(["nox", "-s", "blacken"], hide_output=False) diff --git a/packages/google-cloud-bigquery/tests/unit/model/__init__.py b/packages/google-cloud-bigquery/tests/unit/model/__init__.py new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/packages/google-cloud-bigquery/tests/unit/model/test_model.py b/packages/google-cloud-bigquery/tests/unit/model/test_model.py new file mode 100644 index 000000000000..2086c333486d --- /dev/null +++ b/packages/google-cloud-bigquery/tests/unit/model/test_model.py @@ -0,0 +1,276 @@ +# -*- coding: utf-8 -*- +# +# Copyright 2019 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import datetime + +import pytest + +import google.cloud._helpers +from google.cloud.bigquery_v2.gapic import enums + + +@pytest.fixture +def target_class(): + from google.cloud.bigquery import Model + + return Model + + +@pytest.fixture +def object_under_test(target_class): + return target_class("project-id.dataset_id.model_id") + + +def test_ctor(target_class): + from google.cloud.bigquery import ModelReference + + ref = ModelReference.from_string("my-proj.my_dset.my_model") + got = target_class(ref) + assert got.reference == ref + + +def test_ctor_string(target_class): + from google.cloud.bigquery import ModelReference + + model_id = "my-proj.my_dset.my_model" + ref = ModelReference.from_string(model_id) + got = target_class(model_id) + assert got.reference == ref + + +def test_from_api_repr(target_class): + from google.cloud.bigquery import ModelReference + + creation_time = datetime.datetime( + 2010, 5, 19, 16, 0, 0, tzinfo=google.cloud._helpers.UTC + ) + modified_time = datetime.datetime( + 2011, 10, 1, 16, 0, 0, tzinfo=google.cloud._helpers.UTC + ) + expiration_time = datetime.datetime( + 2012, 12, 21, 16, 0, 0, tzinfo=google.cloud._helpers.UTC + ) + resource = { + "modelReference": { + "projectId": "my-project", + "datasetId": "my_dataset", + "modelId": "my_model", + }, + "location": "US", + "etag": "abcdefg", + "creationTime": str(google.cloud._helpers._millis(creation_time)), + "lastModifiedTime": str(google.cloud._helpers._millis(modified_time)), + "expirationTime": str(google.cloud._helpers._millis(expiration_time)), + "description": "A friendly description.", + "friendlyName": "A friendly name.", + "modelType": "LOGISTIC_REGRESSION", + "labels": {"greeting": u"こんにちは"}, + "trainingRuns": [ + { + "trainingOptions": {"initialLearnRate": 1.0}, + "startTime": str( + google.cloud._helpers._datetime_to_rfc3339(creation_time) + ), + }, + { + "trainingOptions": {"initialLearnRate": 0.5}, + "startTime": str( + google.cloud._helpers._datetime_to_rfc3339(modified_time) + ), + }, + { + "trainingOptions": {"initialLearnRate": 0.25}, + # Allow milliseconds since epoch format. + # TODO: Remove this hack once CL 238585470 hits prod. + "startTime": str(google.cloud._helpers._millis(expiration_time)), + }, + ], + "featureColumns": [], + } + got = target_class.from_api_repr(resource) + + assert got.project == "my-project" + assert got.dataset_id == "my_dataset" + assert got.model_id == "my_model" + assert got.reference == ModelReference.from_string("my-project.my_dataset.my_model") + assert got.path == "/projects/my-project/datasets/my_dataset/models/my_model" + assert got.location == "US" + assert got.etag == "abcdefg" + assert got.created == creation_time + assert got.modified == modified_time + assert got.expires == expiration_time + assert got.description == u"A friendly description." + assert got.friendly_name == u"A friendly name." + assert got.model_type == enums.Model.ModelType.LOGISTIC_REGRESSION + assert got.labels == {"greeting": u"こんにちは"} + assert got.training_runs[0].training_options.initial_learn_rate == 1.0 + assert ( + got.training_runs[0] + .start_time.ToDatetime() + .replace(tzinfo=google.cloud._helpers.UTC) + == creation_time + ) + assert got.training_runs[1].training_options.initial_learn_rate == 0.5 + assert ( + got.training_runs[1] + .start_time.ToDatetime() + .replace(tzinfo=google.cloud._helpers.UTC) + == modified_time + ) + assert got.training_runs[2].training_options.initial_learn_rate == 0.25 + assert ( + got.training_runs[2] + .start_time.ToDatetime() + .replace(tzinfo=google.cloud._helpers.UTC) + == expiration_time + ) + + +def test_from_api_repr_w_minimal_resource(target_class): + from google.cloud.bigquery import ModelReference + + resource = { + "modelReference": { + "projectId": "my-project", + "datasetId": "my_dataset", + "modelId": "my_model", + } + } + got = target_class.from_api_repr(resource) + assert got.reference == ModelReference.from_string("my-project.my_dataset.my_model") + assert got.location == "" + assert got.etag == "" + assert got.created is None + assert got.modified is None + assert got.expires is None + assert got.description is None + assert got.friendly_name is None + assert got.model_type == enums.Model.ModelType.MODEL_TYPE_UNSPECIFIED + assert got.labels == {} + assert len(got.training_runs) == 0 + assert len(got.feature_columns) == 0 + assert len(got.label_columns) == 0 + + +@pytest.mark.parametrize( + "resource,filter_fields,expected", + [ + ( + { + "friendlyName": "hello", + "description": "world", + "expirationTime": "12345", + "labels": {"a-label": "a-value"}, + }, + ["description"], + {"description": "world"}, + ), + ( + {"friendlyName": "hello", "description": "world"}, + ["friendlyName"], + {"friendlyName": "hello"}, + ), + ( + { + "friendlyName": "hello", + "description": "world", + "expirationTime": "12345", + "labels": {"a-label": "a-value"}, + }, + ["expires"], + {"expirationTime": "12345"}, + ), + ( + { + "friendlyName": "hello", + "description": "world", + "expirationTime": None, + "labels": {"a-label": "a-value"}, + }, + ["expires"], + {"expirationTime": None}, + ), + ( + { + "friendlyName": "hello", + "description": "world", + "expirationTime": None, + "labels": {"a-label": "a-value"}, + }, + ["labels"], + {"labels": {"a-label": "a-value"}}, + ), + ], +) +def test_build_resource(object_under_test, resource, filter_fields, expected): + object_under_test._properties = resource + got = object_under_test._build_resource(filter_fields) + assert got == expected + + +def test_set_description(object_under_test): + assert not object_under_test.description + object_under_test.description = "A model description." + assert object_under_test.description == "A model description." + object_under_test.description = None + assert not object_under_test.description + + +def test_set_expires(object_under_test): + assert not object_under_test.expires + expiration_time = datetime.datetime( + 2012, 12, 21, 16, 0, 0, tzinfo=google.cloud._helpers.UTC + ) + object_under_test.expires = expiration_time + assert object_under_test.expires == expiration_time + object_under_test.expires = None + assert not object_under_test.expires + + +def test_set_friendly_name(object_under_test): + assert not object_under_test.friendly_name + object_under_test.friendly_name = "A model name." + assert object_under_test.friendly_name == "A model name." + object_under_test.friendly_name = None + assert not object_under_test.friendly_name + + +def test_set_labels(object_under_test): + assert object_under_test.labels == {} + object_under_test.labels["data_owner"] = "someteam" + assert object_under_test.labels == {"data_owner": "someteam"} + del object_under_test.labels["data_owner"] + assert object_under_test.labels == {} + + +def test_replace_labels(object_under_test): + assert object_under_test.labels == {} + object_under_test.labels = {"data_owner": "someteam"} + assert object_under_test.labels == {"data_owner": "someteam"} + labels = {} + object_under_test.labels = labels + assert object_under_test.labels is labels + object_under_test.labels = None + assert object_under_test.labels == {} + + +def test_repr(target_class): + model = target_class("my-proj.my_dset.my_model") + got = repr(model) + assert got == ( + "Model(reference=ModelReference(" + "project='my-proj', dataset_id='my_dset', project_id='my_model'))" + ) diff --git a/packages/google-cloud-bigquery/tests/unit/model/test_model_reference.py b/packages/google-cloud-bigquery/tests/unit/model/test_model_reference.py new file mode 100644 index 000000000000..0145c76f6ad0 --- /dev/null +++ b/packages/google-cloud-bigquery/tests/unit/model/test_model_reference.py @@ -0,0 +1,126 @@ +# -*- coding: utf-8 -*- +# +# Copyright 2019 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import pytest + + +@pytest.fixture +def target_class(): + from google.cloud.bigquery import ModelReference + + return ModelReference + + +def test_from_api_repr(target_class): + resource = { + "projectId": "my-project", + "datasetId": "my_dataset", + "modelId": "my_model", + } + got = target_class.from_api_repr(resource) + assert got.project == "my-project" + assert got.dataset_id == "my_dataset" + assert got.model_id == "my_model" + assert got.path == "/projects/my-project/datasets/my_dataset/models/my_model" + + +def test_to_api_repr(target_class): + ref = target_class.from_string("my-project.my_dataset.my_model") + got = ref.to_api_repr() + assert got == { + "projectId": "my-project", + "datasetId": "my_dataset", + "modelId": "my_model", + } + + +def test_from_string(target_class): + got = target_class.from_string("string-project.string_dataset.string_model") + assert got.project == "string-project" + assert got.dataset_id == "string_dataset" + assert got.model_id == "string_model" + assert got.path == ( + "/projects/string-project/datasets/string_dataset/models/string_model" + ) + + +def test_from_string_legacy_string(target_class): + with pytest.raises(ValueError): + target_class.from_string("string-project:string_dataset.string_model") + + +def test_from_string_not_fully_qualified(target_class): + with pytest.raises(ValueError): + target_class.from_string("string_model") + + with pytest.raises(ValueError): + target_class.from_string("string_dataset.string_model") + + with pytest.raises(ValueError): + target_class.from_string("a.b.c.d") + + +def test_from_string_with_default_project(target_class): + got = target_class.from_string( + "string_dataset.string_model", default_project="default-project" + ) + assert got.project == "default-project" + assert got.dataset_id == "string_dataset" + assert got.model_id == "string_model" + + +def test_from_string_ignores_default_project(target_class): + got = target_class.from_string( + "string-project.string_dataset.string_model", default_project="default-project" + ) + assert got.project == "string-project" + assert got.dataset_id == "string_dataset" + assert got.model_id == "string_model" + + +def test_eq(target_class): + model = target_class.from_string("my-proj.my_dset.my_model") + model_too = target_class.from_string("my-proj.my_dset.my_model") + assert model == model_too + assert not (model != model_too) + + other_model = target_class.from_string("my-proj.my_dset.my_model2") + assert not (model == other_model) + assert model != other_model + + notamodel = object() + assert not (model == notamodel) + assert model != notamodel + + +def test_hash(target_class): + model = target_class.from_string("my-proj.my_dset.my_model") + model2 = target_class.from_string("my-proj.my_dset.model2") + got = {model: "hello", model2: "world"} + assert got[model] == "hello" + assert got[model2] == "world" + + model_too = target_class.from_string("my-proj.my_dset.my_model") + assert got[model_too] == "hello" + + +def test_repr(target_class): + model = target_class.from_string("my-proj.my_dset.my_model") + got = repr(model) + assert ( + got + == "ModelReference(project='my-proj', dataset_id='my_dset', project_id='my_model')" + ) diff --git a/packages/google-cloud-bigquery/tests/unit/test_client.py b/packages/google-cloud-bigquery/tests/unit/test_client.py index 671bbdf29778..4348462c83ee 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_client.py +++ b/packages/google-cloud-bigquery/tests/unit/test_client.py @@ -37,6 +37,7 @@ pyarrow = None import google.api_core.exceptions +import google.cloud._helpers from google.cloud.bigquery.dataset import DatasetReference @@ -81,6 +82,7 @@ class TestClient(unittest.TestCase): PROJECT = "PROJECT" DS_ID = "DATASET_ID" TABLE_ID = "TABLE_ID" + MODEL_ID = "MODEL_ID" TABLE_REF = DatasetReference(PROJECT, DS_ID).table(TABLE_ID) KMS_KEY_NAME = "projects/1/locations/global/keyRings/1/cryptoKeys/1" LOCATION = "us-central" @@ -1306,6 +1308,54 @@ def test_create_table_alreadyexists_w_exists_ok_true(self): ] ) + def test_get_model(self): + path = "projects/%s/datasets/%s/models/%s" % ( + self.PROJECT, + self.DS_ID, + self.MODEL_ID, + ) + creds = _make_credentials() + http = object() + client = self._make_one(project=self.PROJECT, credentials=creds, _http=http) + resource = { + "modelReference": { + "projectId": self.PROJECT, + "datasetId": self.DS_ID, + "modelId": self.MODEL_ID, + } + } + conn = client._connection = _make_connection(resource) + + model_ref = client.dataset(self.DS_ID).model(self.MODEL_ID) + got = client.get_model(model_ref) + + conn.api_request.assert_called_once_with(method="GET", path="/%s" % path) + self.assertEqual(got.model_id, self.MODEL_ID) + + def test_get_model_w_string(self): + path = "projects/%s/datasets/%s/models/%s" % ( + self.PROJECT, + self.DS_ID, + self.MODEL_ID, + ) + creds = _make_credentials() + http = object() + client = self._make_one(project=self.PROJECT, credentials=creds, _http=http) + resource = { + "modelReference": { + "projectId": self.PROJECT, + "datasetId": self.DS_ID, + "modelId": self.MODEL_ID, + } + } + conn = client._connection = _make_connection(resource) + + model_id = "{}.{}.{}".format(self.PROJECT, self.DS_ID, self.MODEL_ID) + got = client.get_model(model_id) + + conn.api_request.assert_called_once_with(method="GET", path="/%s" % path) + self.assertEqual(got.model_id, self.MODEL_ID) + def test_get_table(self): path = "projects/%s/datasets/%s/tables/%s" % ( self.PROJECT, @@ -1422,6 +1472,66 @@ def test_update_dataset_w_custom_property(self): self.assertEqual(dataset.project, self.PROJECT) self.assertEqual(dataset._properties["newAlphaProperty"], "unreleased property") + def test_update_model(self): + from google.cloud.bigquery.model import Model + + path = "projects/%s/datasets/%s/models/%s" % ( + self.PROJECT, + self.DS_ID, + self.MODEL_ID, + ) + description = "description" + title = "title" + expires = datetime.datetime( + 2012, 12, 21, 16, 0, 0, tzinfo=google.cloud._helpers.UTC + ) + resource = { + "modelReference": { + "projectId": self.PROJECT, + "datasetId": self.DS_ID, + "modelId": self.MODEL_ID, + }, + "description": description, + "etag": "etag", + "expirationTime": str(google.cloud._helpers._millis(expires)), + "friendlyName": title, + "labels": {"x": "y"}, + } + creds = _make_credentials() + client = self._make_one(project=self.PROJECT, credentials=creds) + conn = client._connection = _make_connection(resource, resource) + model_id = "{}.{}.{}".format(self.PROJECT, self.DS_ID, self.MODEL_ID) + model = Model(model_id) + model.description = description + model.friendly_name = title + model.expires = expires + model.labels = {"x": "y"} + + updated_model = client.update_model( + model, ["description", "friendly_name", "labels", "expires"] + ) + + sent = { + "description": description, + "expirationTime": str(google.cloud._helpers._millis(expires)), + "friendlyName": title, + "labels": {"x": "y"}, + } + conn.api_request.assert_called_once_with( + method="PATCH", data=sent, path="/" + path, headers=None + ) + self.assertEqual(updated_model.model_id, model.model_id) + self.assertEqual(updated_model.description, model.description) + self.assertEqual(updated_model.friendly_name, model.friendly_name) + self.assertEqual(updated_model.labels, model.labels) + self.assertEqual(updated_model.expires, model.expires) + + # ETag becomes If-Match header. + model._proto.etag = "etag" + client.update_model(model, []) + req = conn.api_request.call_args + self.assertEqual(req[1]["headers"]["If-Match"], "etag") + def test_update_table(self): from google.cloud.bigquery.table import Table, SchemaField @@ -1773,6 +1883,78 @@ def test_list_tables_empty(self): method="GET", path=path, query_params={} ) + def test_list_models_empty(self): + path = "/projects/{}/datasets/{}/models".format(self.PROJECT, self.DS_ID) + creds = _make_credentials() + client = self._make_one(project=self.PROJECT, credentials=creds) + conn = client._connection = _make_connection({}) + + dataset_id = "{}.{}".format(self.PROJECT, self.DS_ID) + iterator = client.list_models(dataset_id) + page = six.next(iterator.pages) + models = list(page) + token = iterator.next_page_token + + self.assertEqual(models, []) + self.assertIsNone(token) + conn.api_request.assert_called_once_with( + method="GET", path=path, query_params={} + ) + + def test_list_models_defaults(self): + from google.cloud.bigquery.model import Model + + MODEL_1 = "model_one" + MODEL_2 = "model_two" + PATH = "projects/%s/datasets/%s/models" % (self.PROJECT, self.DS_ID) + TOKEN = "TOKEN" + DATA = { + "nextPageToken": TOKEN, + "models": [ + { + "modelReference": { + "modelId": MODEL_1, + "datasetId": self.DS_ID, + "projectId": self.PROJECT, + } + }, + { + "modelReference": { + "modelId": MODEL_2, + "datasetId": self.DS_ID, + "projectId": self.PROJECT, + } + }, + ], + } + + creds = _make_credentials() + client = self._make_one(project=self.PROJECT, credentials=creds) + conn = client._connection = _make_connection(DATA) + dataset = client.dataset(self.DS_ID) + + iterator = client.list_models(dataset) + self.assertIs(iterator.dataset, dataset) + page = six.next(iterator.pages) + models = list(page) + token = iterator.next_page_token + + self.assertEqual(len(models), len(DATA["models"])) + for found, expected in zip(models, DATA["models"]): + self.assertIsInstance(found, Model) + self.assertEqual(found.model_id, expected["modelReference"]["modelId"]) + self.assertEqual(token, TOKEN) + + conn.api_request.assert_called_once_with( + method="GET", path="/%s" % PATH, query_params={} + ) + + def test_list_models_wrong_type(self): + creds = _make_credentials() + client = self._make_one(project=self.PROJECT, credentials=creds) + with self.assertRaises(TypeError): + client.list_models(client.dataset(self.DS_ID).model("foo")) + def test_list_tables_defaults(self): from google.cloud.bigquery.table import TableListItem @@ -1960,6 +2142,68 @@ def test_delete_dataset_w_not_found_ok_true(self): conn.api_request.assert_called_with(method="DELETE", path=path, query_params={}) + def test_delete_model(self): + from google.cloud.bigquery.model import Model + + path = "projects/%s/datasets/%s/models/%s" % ( + self.PROJECT, + self.DS_ID, + self.MODEL_ID, + ) + creds = _make_credentials() + http = object() + client = self._make_one(project=self.PROJECT, credentials=creds, _http=http) + model_id = "{}.{}.{}".format(self.PROJECT, self.DS_ID, self.MODEL_ID) + models = ( + model_id, + client.dataset(self.DS_ID).model(self.MODEL_ID), + Model(model_id), + ) + conn = client._connection = _make_connection(*([{}] * len(models))) + + for arg in models: + client.delete_model(arg) + conn.api_request.assert_called_with(method="DELETE", path="/%s" % path) + + def test_delete_model_w_wrong_type(self): + creds = _make_credentials() + client = self._make_one(project=self.PROJECT, credentials=creds) + with self.assertRaises(TypeError): + client.delete_model(client.dataset(self.DS_ID)) + + def test_delete_model_w_not_found_ok_false(self): + path = "/projects/{}/datasets/{}/models/{}".format( + self.PROJECT, self.DS_ID, self.MODEL_ID + ) + creds = _make_credentials() + http = object() + client = self._make_one(project=self.PROJECT, credentials=creds, _http=http) + conn = client._connection = _make_connection( + google.api_core.exceptions.NotFound("model not found") + ) + + with self.assertRaises(google.api_core.exceptions.NotFound): + client.delete_model("{}.{}".format(self.DS_ID, self.MODEL_ID)) + + conn.api_request.assert_called_with(method="DELETE", path=path) + + def test_delete_model_w_not_found_ok_true(self): + path = "/projects/{}/datasets/{}/models/{}".format( + self.PROJECT, self.DS_ID, self.MODEL_ID + ) + creds = _make_credentials() + http = object() + client = self._make_one(project=self.PROJECT, credentials=creds, _http=http) + conn = client._connection = _make_connection( + google.api_core.exceptions.NotFound("model not found") + ) + + client.delete_model( + "{}.{}".format(self.DS_ID, self.MODEL_ID), not_found_ok=True + ) + + conn.api_request.assert_called_with(method="DELETE", path=path) + def test_delete_table(self): from google.cloud.bigquery.table import Table diff --git a/packages/google-cloud-bigquery/tests/unit/test_dataset.py b/packages/google-cloud-bigquery/tests/unit/test_dataset.py index 7774ccfe8814..96a2ace7da0c 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_dataset.py +++ b/packages/google-cloud-bigquery/tests/unit/test_dataset.py @@ -151,6 +151,13 @@ def test_table(self): self.assertEqual(table_ref.project, "some-project-1") self.assertEqual(table_ref.table_id, "table_1") + def test_model(self): + dataset_ref = self._make_one("some-project-1", "dataset_1") + model_ref = dataset_ref.model("model_1") + self.assertEqual(model_ref.project, "some-project-1") + self.assertEqual(model_ref.dataset_id, "dataset_1") + self.assertEqual(model_ref.model_id, "model_1") + def test_to_api_repr(self): dataset = self._make_one("project_1", "dataset_1") From bb6e433ee39da19a14e2e26bc0951c68e7197630 Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Thu, 4 Apr 2019 06:12:27 -0700 Subject: [PATCH 0563/2016] Release bigquery 1.11.0 (#7657) * Release 1.11.0 --- packages/google-cloud-bigquery/CHANGELOG.md | 29 +++++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/packages/google-cloud-bigquery/CHANGELOG.md b/packages/google-cloud-bigquery/CHANGELOG.md index be3414e2bcf1..b3c08dbbb6fb 100644 --- a/packages/google-cloud-bigquery/CHANGELOG.md +++ b/packages/google-cloud-bigquery/CHANGELOG.md @@ -4,6 +4,35 @@ [1]: https://pypi.org/project/google-cloud-bigquery/#history +## 1.11.0 + +04-03-2019 19:33 PDT + +### Implementation Changes + +- Remove classifier for Python 3.4 for end-of-life. ([#7535](https://github.com/googleapis/google-cloud-python/pull/7535)) + +### New Features + +- Enable fastparquet support by using temporary file in `load_table_from_dataframe` ([#7545](https://github.com/googleapis/google-cloud-python/pull/7545)) +- Allow string for copy sources, query destination, and default dataset ([#7560](https://github.com/googleapis/google-cloud-python/pull/7560)) +- Add `progress_bar_type` argument to `to_dataframe` to use `tqdm` to display a progress bar ([#7552](https://github.com/googleapis/google-cloud-python/pull/7552)) +- Call `get_table` in `list_rows` if the schema is not available ([#7621](https://github.com/googleapis/google-cloud-python/pull/7621)) +- Fallback to BQ API when there are problems reading from BQ Storage. ([#7633](https://github.com/googleapis/google-cloud-python/pull/7633)) +- Add methods for Models API ([#7562](https://github.com/googleapis/google-cloud-python/pull/7562)) +- Add option to use BigQuery Storage API from IPython magics ([#7640](https://github.com/googleapis/google-cloud-python/pull/7640)) + +### Documentation + +- Remove typo in `Table.from_api_repr` docstring. ([#7509](https://github.com/googleapis/google-cloud-python/pull/7509)) +- Add docs session to nox configuration for BigQuery ([#7541](https://github.com/googleapis/google-cloud-python/pull/7541)) + +### Internal / Testing Changes + +- Refactor `table()` methods into shared implementation. ([#7516](https://github.com/googleapis/google-cloud-python/pull/7516)) +- Blacken noxfile and setup file in nox session ([#7619](https://github.com/googleapis/google-cloud-python/pull/7619)) +- Actually use the `progress_bar_type` argument in `QueryJob.to_dataframe()`. ([#7616](https://github.com/googleapis/google-cloud-python/pull/7616)) + ## 1.10.0 03-06-2019 15:20 PST From 505f555c0cf63900dcbeadbd75df3366bdd01193 Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Thu, 4 Apr 2019 09:25:21 -0700 Subject: [PATCH 0564/2016] Release bigquery 1.11.1 (#7662) * Release 1.11.1 * Increment version number. --- packages/google-cloud-bigquery/CHANGELOG.md | 8 ++++++++ packages/google-cloud-bigquery/setup.py | 2 +- 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/CHANGELOG.md b/packages/google-cloud-bigquery/CHANGELOG.md index b3c08dbbb6fb..31ed76af3536 100644 --- a/packages/google-cloud-bigquery/CHANGELOG.md +++ b/packages/google-cloud-bigquery/CHANGELOG.md @@ -4,6 +4,14 @@ [1]: https://pypi.org/project/google-cloud-bigquery/#history +## 1.11.1 + +04-04-2019 09:19 PDT + +### Internal / Testing Changes + +- Increment version number in `setup.py`. + ## 1.11.0 04-03-2019 19:33 PDT diff --git a/packages/google-cloud-bigquery/setup.py b/packages/google-cloud-bigquery/setup.py index b3e9ef84db6c..34c420a03ef4 100644 --- a/packages/google-cloud-bigquery/setup.py +++ b/packages/google-cloud-bigquery/setup.py @@ -22,7 +22,7 @@ name = "google-cloud-bigquery" description = "Google BigQuery API client library" -version = "1.10.0" +version = "1.11.1" # Should be one of: # 'Development Status :: 3 - Alpha' # 'Development Status :: 4 - Beta' From 8e14e435a7d88b1acf44bb7b2742f0b5253753af Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Fri, 5 Apr 2019 10:13:22 -0500 Subject: [PATCH 0565/2016] Add dependency on protobuf. (#7668) This is required for the generated classed from the Models API. --- packages/google-cloud-bigquery/setup.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/setup.py b/packages/google-cloud-bigquery/setup.py index 34c420a03ef4..04ef9d63ee98 100644 --- a/packages/google-cloud-bigquery/setup.py +++ b/packages/google-cloud-bigquery/setup.py @@ -29,10 +29,11 @@ # 'Development Status :: 5 - Production/Stable' release_status = "Development Status :: 5 - Production/Stable" dependencies = [ + 'enum34; python_version < "3.4"', "google-api-core >= 1.6.0, < 2.0.0dev", "google-cloud-core >= 0.29.0, < 0.30dev", "google-resumable-media >= 0.3.1", - 'enum34; python_version < "3.4"', + "protobuf >= 3.6.0", ] extras = { "bqstorage": "google-cloud-bigquery-storage >= 0.2.0dev1, <2.0.0dev", From 7f9138d72f892feabf5284dd7d183dd17b7a6ae6 Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Fri, 5 Apr 2019 10:38:42 -0500 Subject: [PATCH 0566/2016] Release 1.11.2 (#7669) --- packages/google-cloud-bigquery/CHANGELOG.md | 8 ++++++++ packages/google-cloud-bigquery/setup.py | 2 +- 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/CHANGELOG.md b/packages/google-cloud-bigquery/CHANGELOG.md index 31ed76af3536..cf83c2e89b27 100644 --- a/packages/google-cloud-bigquery/CHANGELOG.md +++ b/packages/google-cloud-bigquery/CHANGELOG.md @@ -4,6 +4,14 @@ [1]: https://pypi.org/project/google-cloud-bigquery/#history +## 1.11.2 + +04-05-2019 08:16 PDT + +### Dependencies + +- Add dependency on protobuf. ([#7668](https://github.com/googleapis/google-cloud-python/pull/7668)) + ## 1.11.1 04-04-2019 09:19 PDT diff --git a/packages/google-cloud-bigquery/setup.py b/packages/google-cloud-bigquery/setup.py index 04ef9d63ee98..24a52aca3d56 100644 --- a/packages/google-cloud-bigquery/setup.py +++ b/packages/google-cloud-bigquery/setup.py @@ -22,7 +22,7 @@ name = "google-cloud-bigquery" description = "Google BigQuery API client library" -version = "1.11.1" +version = "1.11.2" # Should be one of: # 'Development Status :: 3 - Alpha' # 'Development Status :: 4 - Beta' From 35633eb246cfe90dc09455e2c47db3deab97460d Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Mon, 8 Apr 2019 17:50:12 -0500 Subject: [PATCH 0567/2016] Don't mask auth errors in `to_dataframe` with BQ Storage API (#7674) * Don't mask auth errors in to_dataframe with BQ Storage API Don't hide errors such as insufficient permissions to create a read session, or the API is not enabled. Both of those are clearly problems if the developer has explicitly asked for BigQuery Storage API support. * Blacken * Remove unused import. --- .../google/cloud/bigquery/table.py | 6 +++++ .../tests/unit/test_table.py | 22 +++++++++++++++++++ 2 files changed, 28 insertions(+) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py index 7dbaf527274f..d7bfc1bb5dbe 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py @@ -1468,6 +1468,12 @@ def to_dataframe(self, bqstorage_client=None, dtypes=None, progress_bar_type=Non if bqstorage_client is not None: try: return self._to_dataframe_bqstorage(bqstorage_client, dtypes) + except google.api_core.exceptions.Forbidden: + # Don't hide errors such as insufficient permissions to create + # a read session, or the API is not enabled. Both of those are + # clearly problems if the developer has explicitly asked for + # BigQuery Storage API support. + raise except google.api_core.exceptions.GoogleAPICallError: # There is a known issue with reading from small anonymous # query results tables, so some errors are expected. Rather diff --git a/packages/google-cloud-bigquery/tests/unit/test_table.py b/packages/google-cloud-bigquery/tests/unit/test_table.py index b0a1318c6f11..77d80aff0925 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_table.py +++ b/packages/google-cloud-bigquery/tests/unit/test_table.py @@ -1793,6 +1793,28 @@ def test_to_dataframe_w_bqstorage_fallback_to_tabledata_list(self): self.assertEqual(df.name.dtype.name, "object") self.assertEqual(df.age.dtype.name, "int64") + @unittest.skipIf(pandas is None, "Requires `pandas`") + @unittest.skipIf( + bigquery_storage_v1beta1 is None, "Requires `google-cloud-bigquery-storage`" + ) + def test_to_dataframe_w_bqstorage_raises_auth_error(self): + from google.cloud.bigquery import table as mut + + bqstorage_client = mock.create_autospec( + bigquery_storage_v1beta1.BigQueryStorageClient + ) + bqstorage_client.create_read_session.side_effect = google.api_core.exceptions.Forbidden( + "TEST BigQuery Storage API not enabled. TEST" + ) + path = "/foo" + api_request = mock.Mock(return_value={"rows": []}) + row_iterator = mut.RowIterator( + _mock_client(), api_request, path, [], table=mut.Table("proj.dset.tbl") + ) + + with pytest.raises(google.api_core.exceptions.Forbidden): + row_iterator.to_dataframe(bqstorage_client=bqstorage_client) + @unittest.skipIf( bigquery_storage_v1beta1 is None, "Requires `google-cloud-bigquery-storage`" ) From 69bc514639b54bfb50beb07a253bbbe58ff9f6c6 Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Tue, 9 Apr 2019 12:12:42 -0700 Subject: [PATCH 0568/2016] Add [all] extras to install all extra dependencies (#7610) * Add [all] extras to install all extra dependencies An [all] extra will be useful for convenience to get all the "bonus" features like progress bars, pandas integration, and BigQuery Storage API integration. It also simplifies the noxfile a bit, since the list of extras installed in each session was getting long, especially after using the bqstorage extra instead of in LOCAL_DEPS. * Need fastavro to actually read rows from bqstorage. * Blacken * Use lists for all extras. --- packages/google-cloud-bigquery/noxfile.py | 19 +++++-------------- packages/google-cloud-bigquery/setup.py | 19 +++++++++++++++---- 2 files changed, 20 insertions(+), 18 deletions(-) diff --git a/packages/google-cloud-bigquery/noxfile.py b/packages/google-cloud-bigquery/noxfile.py index 09339596ea79..39e5f4548c0b 100644 --- a/packages/google-cloud-bigquery/noxfile.py +++ b/packages/google-cloud-bigquery/noxfile.py @@ -20,13 +20,7 @@ import nox -LOCAL_DEPS = ( - os.path.join("..", "api_core[grpc]"), - os.path.join("..", "core"), - # TODO: Move bigquery_storage back to dev_install once dtypes feature is - # released. Issue #7049 - os.path.join("..", "bigquery_storage[pandas,fastavro]"), -) +LOCAL_DEPS = (os.path.join("..", "api_core[grpc]"), os.path.join("..", "core")) BLACK_PATHS = ("docs", "google", "samples", "tests", "noxfile.py", "setup.py") @@ -45,10 +39,7 @@ def default(session): session.install("-e", local_dep) # Pyarrow does not support Python 3.7 - if session.python == "3.7": - dev_install = ".[pandas, tqdm]" - else: - dev_install = ".[pandas, pyarrow, tqdm]" + dev_install = ".[all]" session.install("-e", dev_install) # IPython does not support Python 2 after version 5.x @@ -95,7 +86,7 @@ def system(session): session.install("-e", local_dep) session.install("-e", os.path.join("..", "storage")) session.install("-e", os.path.join("..", "test_utils")) - session.install("-e", ".[pandas]") + session.install("-e", ".[all]") # IPython does not support Python 2 after version 5.x if session.python == "2.7": @@ -123,7 +114,7 @@ def snippets(session): session.install("-e", local_dep) session.install("-e", os.path.join("..", "storage")) session.install("-e", os.path.join("..", "test_utils")) - session.install("-e", ".[pandas, pyarrow, fastparquet]") + session.install("-e", ".[all]") # Run py.test against the snippets tests. session.run("py.test", os.path.join("docs", "snippets.py"), *session.posargs) @@ -183,7 +174,7 @@ def docs(session): for local_dep in LOCAL_DEPS: session.install("-e", local_dep) session.install("-e", os.path.join("..", "storage")) - session.install("-e", ".[pandas, pyarrow]") + session.install("-e", ".[all]") shutil.rmtree(os.path.join("docs", "_build"), ignore_errors=True) session.run( diff --git a/packages/google-cloud-bigquery/setup.py b/packages/google-cloud-bigquery/setup.py index 24a52aca3d56..2c4d570d2c7e 100644 --- a/packages/google-cloud-bigquery/setup.py +++ b/packages/google-cloud-bigquery/setup.py @@ -36,14 +36,25 @@ "protobuf >= 3.6.0", ] extras = { - "bqstorage": "google-cloud-bigquery-storage >= 0.2.0dev1, <2.0.0dev", - "pandas": "pandas>=0.17.1", + "bqstorage": [ + "google-cloud-bigquery-storage >= 0.2.0dev1, <2.0.0dev", + "fastavro>=0.21.2", + ], + "pandas": ["pandas>=0.17.1"], # Exclude PyArrow dependency from Windows Python 2.7. - 'pyarrow: platform_system != "Windows" or python_version >= "3.4"': "pyarrow>=0.4.1", - "tqdm": "tqdm >= 4.0.0, <5.0.0dev", + 'pyarrow: platform_system != "Windows" or python_version >= "3.4"': [ + "pyarrow>=0.4.1" + ], + "tqdm": ["tqdm >= 4.0.0, <5.0.0dev"], "fastparquet": ["fastparquet", "python-snappy"], } +all_extras = [] + +for extra in extras: + all_extras.extend(extras[extra]) + +extras["all"] = all_extras # Setup boilerplate below this line. From 01a020d79ec4bb080411f9f16e6804895c78e08c Mon Sep 17 00:00:00 2001 From: lbristol88 Date: Wed, 10 Apr 2019 11:09:24 -0700 Subject: [PATCH 0569/2016] Adding creationTime and expirationTime properties to TableListItem (#7684) * Added properties created and expires to TableListItem --- .../google/cloud/bigquery/table.py | 24 +++++++++++++++++++ .../tests/unit/test_table.py | 18 ++++++++++++++ 2 files changed, 42 insertions(+) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py index d7bfc1bb5dbe..bf5fbc65b5a3 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py @@ -906,6 +906,30 @@ def __init__(self, resource): self._properties = resource + @property + def created(self): + """Union[datetime.datetime, None]: Datetime at which the table was + created (:data:`None` until set from the server). + """ + creation_time = self._properties.get("creationTime") + if creation_time is not None: + # creation_time will be in milliseconds. + return google.cloud._helpers._datetime_from_microseconds( + 1000.0 * float(creation_time) + ) + + @property + def expires(self): + """Union[datetime.datetime, None]: Datetime at which the table will be + deleted. + """ + expiration_time = self._properties.get("expirationTime") + if expiration_time is not None: + # expiration_time will be in milliseconds. + return google.cloud._helpers._datetime_from_microseconds( + 1000.0 * float(expiration_time) + ) + @property def project(self): """str: Project bound to the table.""" diff --git a/packages/google-cloud-bigquery/tests/unit/test_table.py b/packages/google-cloud-bigquery/tests/unit/test_table.py index 77d80aff0925..965dd5f0f195 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_table.py +++ b/packages/google-cloud-bigquery/tests/unit/test_table.py @@ -1115,11 +1115,24 @@ def _get_target_class(): def _make_one(self, *args, **kw): return self._get_target_class()(*args, **kw) + def _setUpConstants(self): + import datetime + from google.cloud._helpers import UTC + + self.WHEN_TS = 1437767599.125 + self.WHEN = datetime.datetime.utcfromtimestamp(self.WHEN_TS).replace(tzinfo=UTC) + self.EXP_TIME = datetime.datetime(2015, 8, 1, 23, 59, 59, tzinfo=UTC) + def test_ctor(self): + from google.cloud._helpers import _millis + + self._setUpConstants() project = "test-project" dataset_id = "test_dataset" table_id = "coffee_table" resource = { + "creationTime": self.WHEN_TS * 1000, + "expirationTime": _millis(self.EXP_TIME), "kind": "bigquery#table", "id": "{}:{}.{}".format(project, dataset_id, table_id), "tableReference": { @@ -1138,6 +1151,9 @@ def test_ctor(self): } table = self._make_one(resource) + + self.assertEqual(table.created, self.WHEN) + self.assertEqual(table.expires, self.EXP_TIME) self.assertEqual(table.project, project) self.assertEqual(table.dataset_id, dataset_id) self.assertEqual(table.table_id, table_id) @@ -1204,6 +1220,8 @@ def test_ctor_missing_properties(self): self.assertEqual(table.project, "testproject") self.assertEqual(table.dataset_id, "testdataset") self.assertEqual(table.table_id, "testtable") + self.assertIsNone(table.created) + self.assertIsNone(table.expires) self.assertIsNone(table.full_table_id) self.assertIsNone(table.friendly_name) self.assertIsNone(table.table_type) From 9bb0d11b369c1874f7828d9344ac3a216afbbed3 Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Wed, 10 Apr 2019 13:19:48 -0700 Subject: [PATCH 0570/2016] Move table and dataset snippets to samples/ directory (#7683) * Move create table sample to samples directory These samples follow the same pattern as the Models API samples, following our rubric guidelines for 1 sample per file. This work was completed by Layla and reviewed on Tim's personal fork, as the Models API PR hadn't yet merged when she started this work. --- packages/google-cloud-bigquery/docs/samples | 1 + .../google-cloud-bigquery/docs/snippets.py | 338 ------------------ .../docs/usage/datasets.rst | 12 +- .../docs/usage/tables.rst | 8 +- .../samples/create_dataset.py | 38 ++ .../samples/create_table.py | 37 ++ .../samples/delete_dataset.py | 32 ++ .../samples/delete_model.py | 3 +- .../samples/delete_table.py | 31 ++ .../samples/get_dataset.py | 56 +++ .../samples/get_table.py | 37 ++ .../samples/list_datasets.py | 33 ++ .../samples/list_tables.py | 33 ++ .../samples/tests/conftest.py | 34 +- .../samples/tests/test_create_dataset.py | 22 ++ .../samples/tests/test_create_table.py | 22 ++ .../samples/tests/test_delete_dataset.py | 22 ++ .../samples/tests/test_delete_table.py | 22 ++ .../samples/tests/test_get_dataset.py | 22 ++ .../samples/tests/test_get_table.py | 35 ++ .../samples/tests/test_list_datasets.py | 22 ++ .../samples/tests/test_list_tables.py | 23 ++ .../tests/test_update_dataset_access.py | 24 ++ ...update_dataset_default_table_expiration.py | 29 ++ .../tests/test_update_dataset_description.py | 22 ++ .../samples/update_dataset_access.py | 45 +++ ...update_dataset_default_table_expiration.py | 40 +++ .../samples/update_dataset_description.py | 37 ++ 28 files changed, 729 insertions(+), 351 deletions(-) create mode 120000 packages/google-cloud-bigquery/docs/samples create mode 100644 packages/google-cloud-bigquery/samples/create_dataset.py create mode 100644 packages/google-cloud-bigquery/samples/create_table.py create mode 100644 packages/google-cloud-bigquery/samples/delete_dataset.py create mode 100644 packages/google-cloud-bigquery/samples/delete_table.py create mode 100644 packages/google-cloud-bigquery/samples/get_dataset.py create mode 100644 packages/google-cloud-bigquery/samples/get_table.py create mode 100644 packages/google-cloud-bigquery/samples/list_datasets.py create mode 100644 packages/google-cloud-bigquery/samples/list_tables.py create mode 100644 packages/google-cloud-bigquery/samples/tests/test_create_dataset.py create mode 100644 packages/google-cloud-bigquery/samples/tests/test_create_table.py create mode 100644 packages/google-cloud-bigquery/samples/tests/test_delete_dataset.py create mode 100644 packages/google-cloud-bigquery/samples/tests/test_delete_table.py create mode 100644 packages/google-cloud-bigquery/samples/tests/test_get_dataset.py create mode 100644 packages/google-cloud-bigquery/samples/tests/test_get_table.py create mode 100644 packages/google-cloud-bigquery/samples/tests/test_list_datasets.py create mode 100644 packages/google-cloud-bigquery/samples/tests/test_list_tables.py create mode 100644 packages/google-cloud-bigquery/samples/tests/test_update_dataset_access.py create mode 100644 packages/google-cloud-bigquery/samples/tests/test_update_dataset_default_table_expiration.py create mode 100644 packages/google-cloud-bigquery/samples/tests/test_update_dataset_description.py create mode 100644 packages/google-cloud-bigquery/samples/update_dataset_access.py create mode 100644 packages/google-cloud-bigquery/samples/update_dataset_default_table_expiration.py create mode 100644 packages/google-cloud-bigquery/samples/update_dataset_description.py diff --git a/packages/google-cloud-bigquery/docs/samples b/packages/google-cloud-bigquery/docs/samples new file mode 120000 index 000000000000..18cd9a30aaff --- /dev/null +++ b/packages/google-cloud-bigquery/docs/samples @@ -0,0 +1 @@ +../samples/ \ No newline at end of file diff --git a/packages/google-cloud-bigquery/docs/snippets.py b/packages/google-cloud-bigquery/docs/snippets.py index 00569c40af18..01ad3e014608 100644 --- a/packages/google-cloud-bigquery/docs/snippets.py +++ b/packages/google-cloud-bigquery/docs/snippets.py @@ -147,24 +147,6 @@ def test_create_client_json_credentials(): assert client is not None -def test_list_datasets(client): - """List datasets for a project.""" - # [START bigquery_list_datasets] - # from google.cloud import bigquery - # client = bigquery.Client() - - datasets = list(client.list_datasets()) - project = client.project - - if datasets: - print("Datasets in project {}:".format(project)) - for dataset in datasets: # API request(s) - print("\t{}".format(dataset.dataset_id)) - else: - print("{} project does not contain any datasets.".format(project)) - # [END bigquery_list_datasets] - - def test_list_datasets_by_label(client, to_delete): dataset_id = "list_datasets_by_label_{}".format(_millis()) dataset = bigquery.Dataset(client.dataset(dataset_id)) @@ -192,78 +174,6 @@ def test_list_datasets_by_label(client, to_delete): assert dataset_id in found -def test_create_dataset(client, to_delete): - """Create a dataset.""" - dataset_id = "create_dataset_{}".format(_millis()) - - # [START bigquery_create_dataset] - # from google.cloud import bigquery - # client = bigquery.Client() - # dataset_id = 'my_dataset' - - # Create a DatasetReference using a chosen dataset ID. - # The project defaults to the Client's project if not specified. - dataset_ref = client.dataset(dataset_id) - - # Construct a full Dataset object to send to the API. - dataset = bigquery.Dataset(dataset_ref) - # Specify the geographic location where the dataset should reside. - dataset.location = "US" - - # Send the dataset to the API for creation. - # Raises google.api_core.exceptions.Conflict if the Dataset already - # exists within the project. - dataset = client.create_dataset(dataset) # API request - # [END bigquery_create_dataset] - - to_delete.append(dataset) - - -def test_get_dataset_information(client, to_delete): - """View information about a dataset.""" - dataset_id = "get_dataset_{}".format(_millis()) - dataset_labels = {"color": "green"} - dataset_ref = client.dataset(dataset_id) - dataset = bigquery.Dataset(dataset_ref) - dataset.description = ORIGINAL_DESCRIPTION - dataset.labels = dataset_labels - dataset = client.create_dataset(dataset) # API request - to_delete.append(dataset) - - # [START bigquery_get_dataset] - # from google.cloud import bigquery - # client = bigquery.Client() - # dataset_id = 'my_dataset' - - dataset_ref = client.dataset(dataset_id) - dataset = client.get_dataset(dataset_ref) # API request - - # View dataset properties - print("Dataset ID: {}".format(dataset_id)) - print("Description: {}".format(dataset.description)) - print("Labels:") - labels = dataset.labels - if labels: - for label, value in labels.items(): - print("\t{}: {}".format(label, value)) - else: - print("\tDataset has no labels defined.") - - # View tables in dataset - print("Tables:") - tables = list(client.list_tables(dataset_ref)) # API request(s) - if tables: - for table in tables: - print("\t{}".format(table.table_id)) - else: - print("\tThis dataset does not contain any tables.") - # [END bigquery_get_dataset] - - assert dataset.description == ORIGINAL_DESCRIPTION - assert dataset.labels == dataset_labels - assert tables == [] - - # [START bigquery_dataset_exists] def dataset_exists(client, dataset_reference): """Return if a dataset exists. @@ -301,66 +211,6 @@ def test_dataset_exists(client, to_delete): assert not dataset_exists(client, client.dataset("i_dont_exist")) -@pytest.mark.skip( - reason=( - "update_dataset() is flaky " - "https://github.com/GoogleCloudPlatform/google-cloud-python/issues/5588" - ) -) -def test_update_dataset_description(client, to_delete): - """Update a dataset's description.""" - dataset_id = "update_dataset_description_{}".format(_millis()) - dataset = bigquery.Dataset(client.dataset(dataset_id)) - dataset.description = "Original description." - client.create_dataset(dataset) - to_delete.append(dataset) - - # [START bigquery_update_dataset_description] - # from google.cloud import bigquery - # client = bigquery.Client() - # dataset_ref = client.dataset('my_dataset') - # dataset = client.get_dataset(dataset_ref) # API request - - assert dataset.description == "Original description." - dataset.description = "Updated description." - - dataset = client.update_dataset(dataset, ["description"]) # API request - - assert dataset.description == "Updated description." - # [END bigquery_update_dataset_description] - - -@pytest.mark.skip( - reason=( - "update_dataset() is flaky " - "https://github.com/GoogleCloudPlatform/google-cloud-python/issues/5588" - ) -) -def test_update_dataset_default_table_expiration(client, to_delete): - """Update a dataset's default table expiration.""" - dataset_id = "update_dataset_default_expiration_{}".format(_millis()) - dataset = bigquery.Dataset(client.dataset(dataset_id)) - dataset = client.create_dataset(dataset) - to_delete.append(dataset) - - # [START bigquery_update_dataset_expiration] - # from google.cloud import bigquery - # client = bigquery.Client() - # dataset_ref = client.dataset('my_dataset') - # dataset = client.get_dataset(dataset_ref) # API request - - assert dataset.default_table_expiration_ms is None - one_day_ms = 24 * 60 * 60 * 1000 # in milliseconds - dataset.default_table_expiration_ms = one_day_ms - - dataset = client.update_dataset( - dataset, ["default_table_expiration_ms"] - ) # API request - - assert dataset.default_table_expiration_ms == one_day_ms - # [END bigquery_update_dataset_expiration] - - @pytest.mark.skip( reason=( "update_dataset() is flaky " @@ -424,129 +274,6 @@ def test_manage_dataset_labels(client, to_delete): # [END bigquery_delete_label_dataset] -@pytest.mark.skip( - reason=( - "update_dataset() is flaky " - "https://github.com/GoogleCloudPlatform/google-cloud-python/issues/5588" - ) -) -def test_update_dataset_access(client, to_delete): - """Update a dataset's access controls.""" - dataset_id = "update_dataset_access_{}".format(_millis()) - dataset = bigquery.Dataset(client.dataset(dataset_id)) - dataset = client.create_dataset(dataset) - to_delete.append(dataset) - - # [START bigquery_update_dataset_access] - # from google.cloud import bigquery - # client = bigquery.Client() - # dataset = client.get_dataset(client.dataset('my_dataset')) - - entry = bigquery.AccessEntry( - role="READER", - entity_type="userByEmail", - entity_id="sample.bigquery.dev@gmail.com", - ) - assert entry not in dataset.access_entries - entries = list(dataset.access_entries) - entries.append(entry) - dataset.access_entries = entries - - dataset = client.update_dataset(dataset, ["access_entries"]) # API request - - assert entry in dataset.access_entries - # [END bigquery_update_dataset_access] - - -def test_delete_dataset(client): - """Delete a dataset.""" - from google.cloud.exceptions import NotFound - - dataset1_id = "delete_dataset_{}".format(_millis()) - dataset1 = bigquery.Dataset(client.dataset(dataset1_id)) - client.create_dataset(dataset1) - - dataset2_id = "delete_dataset_with_tables{}".format(_millis()) - dataset2 = bigquery.Dataset(client.dataset(dataset2_id)) - client.create_dataset(dataset2) - - table = bigquery.Table(dataset2.table("new_table")) - client.create_table(table) - - # [START bigquery_delete_dataset] - # from google.cloud import bigquery - # client = bigquery.Client() - - # Delete a dataset that does not contain any tables - # dataset1_id = 'my_empty_dataset' - dataset1_ref = client.dataset(dataset1_id) - client.delete_dataset(dataset1_ref) # API request - - print("Dataset {} deleted.".format(dataset1_id)) - - # Use the delete_contents parameter to delete a dataset and its contents - # dataset2_id = 'my_dataset_with_tables' - dataset2_ref = client.dataset(dataset2_id) - client.delete_dataset(dataset2_ref, delete_contents=True) # API request - - print("Dataset {} deleted.".format(dataset2_id)) - # [END bigquery_delete_dataset] - - for dataset in [dataset1, dataset2]: - with pytest.raises(NotFound): - client.get_dataset(dataset) # API request - - -def test_list_tables(client, to_delete): - """List tables within a dataset.""" - dataset_id = "list_tables_dataset_{}".format(_millis()) - dataset_ref = client.dataset(dataset_id) - dataset = client.create_dataset(bigquery.Dataset(dataset_ref)) - to_delete.append(dataset) - - # [START bigquery_list_tables] - # from google.cloud import bigquery - # client = bigquery.Client() - # dataset_ref = client.dataset('my_dataset') - - tables = list(client.list_tables(dataset_ref)) # API request(s) - assert len(tables) == 0 - - table_ref = dataset.table("my_table") - table = bigquery.Table(table_ref) - client.create_table(table) # API request - tables = list(client.list_tables(dataset)) # API request(s) - - assert len(tables) == 1 - assert tables[0].table_id == "my_table" - # [END bigquery_list_tables] - - -def test_create_table(client, to_delete): - """Create a table.""" - dataset_id = "create_table_dataset_{}".format(_millis()) - dataset_ref = client.dataset(dataset_id) - dataset = bigquery.Dataset(dataset_ref) - client.create_dataset(dataset) - to_delete.append(dataset) - - # [START bigquery_create_table] - # from google.cloud import bigquery - # client = bigquery.Client() - # dataset_ref = client.dataset('my_dataset') - - schema = [ - bigquery.SchemaField("full_name", "STRING", mode="REQUIRED"), - bigquery.SchemaField("age", "INTEGER", mode="REQUIRED"), - ] - table_ref = dataset_ref.table("my_table") - table = bigquery.Table(table_ref, schema=schema) - table = client.create_table(table) # API request - - assert table.table_id == "my_table" - # [END bigquery_create_table] - - def test_create_table_nested_repeated_schema(client, to_delete): dataset_id = "create_table_nested_repeated_{}".format(_millis()) dataset_ref = client.dataset(dataset_id) @@ -729,40 +456,6 @@ def test_load_and_query_partitioned_table(client, to_delete): assert len(rows) == 29 -def test_get_table_information(client, to_delete): - """Show a table's properties.""" - dataset_id = "show_table_dataset_{}".format(_millis()) - table_id = "show_table_table_{}".format(_millis()) - dataset_ref = client.dataset(dataset_id) - dataset = bigquery.Dataset(dataset_ref) - client.create_dataset(dataset) - to_delete.append(dataset) - - table = bigquery.Table(dataset.table(table_id), schema=SCHEMA) - table.description = ORIGINAL_DESCRIPTION - table = client.create_table(table) - - # [START bigquery_get_table] - # from google.cloud import bigquery - # client = bigquery.Client() - # dataset_id = 'my_dataset' - # table_id = 'my_table' - - dataset_ref = client.dataset(dataset_id) - table_ref = dataset_ref.table(table_id) - table = client.get_table(table_ref) # API Request - - # View table properties - print(table.schema) - print(table.description) - print(table.num_rows) - # [END bigquery_get_table] - - assert table.schema == SCHEMA - assert table.description == ORIGINAL_DESCRIPTION - assert table.num_rows == 0 - - # [START bigquery_table_exists] def table_exists(client, table_reference): """Return if a table exists. @@ -2081,37 +1774,6 @@ def test_extract_table_compressed(client, to_delete): to_delete.insert(0, blob) -def test_delete_table(client, to_delete): - """Delete a table.""" - from google.cloud.exceptions import NotFound - - dataset_id = "delete_table_dataset_{}".format(_millis()) - table_id = "delete_table_table_{}".format(_millis()) - dataset_ref = client.dataset(dataset_id) - dataset = bigquery.Dataset(dataset_ref) - dataset.location = "US" - dataset = client.create_dataset(dataset) - to_delete.append(dataset) - - table_ref = dataset.table(table_id) - table = bigquery.Table(table_ref, schema=SCHEMA) - client.create_table(table) - # [START bigquery_delete_table] - # from google.cloud import bigquery - # client = bigquery.Client() - # dataset_id = 'my_dataset' - # table_id = 'my_table' - - table_ref = client.dataset(dataset_id).table(table_id) - client.delete_table(table_ref) # API request - - print("Table {}:{} deleted.".format(dataset_id, table_id)) - # [END bigquery_delete_table] - - with pytest.raises(NotFound): - client.get_table(table) # API request - - def test_undelete_table(client, to_delete): dataset_id = "undelete_table_dataset_{}".format(_millis()) table_id = "undelete_table_table_{}".format(_millis()) diff --git a/packages/google-cloud-bigquery/docs/usage/datasets.rst b/packages/google-cloud-bigquery/docs/usage/datasets.rst index 09ae90767cdc..d5646355c00d 100644 --- a/packages/google-cloud-bigquery/docs/usage/datasets.rst +++ b/packages/google-cloud-bigquery/docs/usage/datasets.rst @@ -19,7 +19,7 @@ Listing Datasets List datasets for a project with the :func:`~google.cloud.bigquery.client.Client.list_datasets` method: -.. literalinclude:: ../snippets.py +.. literalinclude:: ../samples/list_datasets.py :language: python :dedent: 4 :start-after: [START bigquery_list_datasets] @@ -31,7 +31,7 @@ Getting a Dataset Get a dataset resource (to pick up changes made by another client) with the :func:`~google.cloud.bigquery.client.Client.get_dataset` method: -.. literalinclude:: ../snippets.py +.. literalinclude:: ../samples/get_dataset.py :language: python :dedent: 4 :start-after: [START bigquery_get_dataset] @@ -43,7 +43,7 @@ Creating a Dataset Create a new dataset with the :func:`~google.cloud.bigquery.client.Client.create_dataset` method: -.. literalinclude:: ../snippets.py +.. literalinclude:: ../samples/create_dataset.py :language: python :dedent: 4 :start-after: [START bigquery_create_dataset] @@ -55,7 +55,7 @@ Updating a Dataset Update a property in a dataset's metadata with the :func:`~google.cloud.bigquery.client.Client.update_dataset` method: -.. literalinclude:: ../snippets.py +.. literalinclude:: ../samples/update_dataset_description.py :language: python :dedent: 4 :start-after: [START bigquery_update_dataset_description] @@ -64,7 +64,7 @@ Update a property in a dataset's metadata with the Modify user permissions on a dataset with the :func:`~google.cloud.bigquery.client.Client.update_dataset` method: -.. literalinclude:: ../snippets.py +.. literalinclude:: ../samples/update_dataset_access.py :language: python :dedent: 4 :start-after: [START bigquery_update_dataset_access] @@ -76,7 +76,7 @@ Deleting a Dataset Delete a dataset with the :func:`~google.cloud.bigquery.client.Client.delete_dataset` method: -.. literalinclude:: ../snippets.py +.. literalinclude:: ../samples/delete_dataset.py :language: python :dedent: 4 :start-after: [START bigquery_delete_dataset] diff --git a/packages/google-cloud-bigquery/docs/usage/tables.rst b/packages/google-cloud-bigquery/docs/usage/tables.rst index 555366fd2a4b..4aede9545cd8 100644 --- a/packages/google-cloud-bigquery/docs/usage/tables.rst +++ b/packages/google-cloud-bigquery/docs/usage/tables.rst @@ -10,7 +10,7 @@ Listing Tables List the tables belonging to a dataset with the :func:`~google.cloud.bigquery.client.Client.list_tables` method: -.. literalinclude:: ../snippets.py +.. literalinclude:: ../samples/list_tables.py :language: python :dedent: 4 :start-after: [START bigquery_list_tables] @@ -22,7 +22,7 @@ Getting a Table Get a table resource with the :func:`~google.cloud.bigquery.client.Client.get_table` method: -.. literalinclude:: ../snippets.py +.. literalinclude:: ../samples/get_table.py :language: python :dedent: 4 :start-after: [START bigquery_get_table] @@ -43,7 +43,7 @@ Creating a Table Create an empty table with the :func:`~google.cloud.bigquery.client.Client.create_table` method: -.. literalinclude:: ../snippets.py +.. literalinclude:: ../samples/create_table.py :language: python :dedent: 4 :start-after: [START bigquery_create_table] @@ -140,7 +140,7 @@ Deleting a Table Delete a table with the :func:`~google.cloud.bigquery.client.Client.delete_table` method: -.. literalinclude:: ../snippets.py +.. literalinclude:: ../samples/delete_table.py :language: python :dedent: 4 :start-after: [START bigquery_delete_table] diff --git a/packages/google-cloud-bigquery/samples/create_dataset.py b/packages/google-cloud-bigquery/samples/create_dataset.py new file mode 100644 index 000000000000..89ca9d38f5f3 --- /dev/null +++ b/packages/google-cloud-bigquery/samples/create_dataset.py @@ -0,0 +1,38 @@ +# Copyright 2019 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +def create_dataset(client, dataset_id): + + # [START bigquery_create_dataset] + from google.cloud import bigquery + + # TODO(developer): Construct a BigQuery client object. + # client = bigquery.Client() + + # TODO(developer): Set dataset_id to the ID of the dataset to create. + # dataset_id = "{}.your_dataset".format(client.project) + + # Construct a full Dataset object to send to the API. + dataset = bigquery.Dataset(dataset_id) + + # TODO(developer): Specify the geographic location where the dataset should reside. + dataset.location = "US" + + # Send the dataset to the API for creation. + # Raises google.api_core.exceptions.Conflict if the Dataset already + # exists within the project. + dataset = client.create_dataset(dataset) # API request + print("Created dataset {}.{}".format(client.project, dataset.dataset_id)) + # [END bigquery_create_dataset] diff --git a/packages/google-cloud-bigquery/samples/create_table.py b/packages/google-cloud-bigquery/samples/create_table.py new file mode 100644 index 000000000000..5e2e34d41d99 --- /dev/null +++ b/packages/google-cloud-bigquery/samples/create_table.py @@ -0,0 +1,37 @@ +# Copyright 2019 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +def create_table(client, table_id): + + # [START bigquery_create_table] + from google.cloud import bigquery + + schema = [ + bigquery.SchemaField("full_name", "STRING", mode="REQUIRED"), + bigquery.SchemaField("age", "INTEGER", mode="REQUIRED"), + ] + + # TODO(developer): Construct a BigQuery client object. + # client = bigquery.Client() + + # TODO(developer): Set table_id to the ID of the table to create + # table_id = "your-project.your_dataset.your_table_name" + + table = bigquery.Table(table_id, schema=schema) + table = client.create_table(table) # API request + print( + "Created table {}.{}.{}".format(table.project, table.dataset_id, table.table_id) + ) + # [END bigquery_create_table] diff --git a/packages/google-cloud-bigquery/samples/delete_dataset.py b/packages/google-cloud-bigquery/samples/delete_dataset.py new file mode 100644 index 000000000000..58851f1e2120 --- /dev/null +++ b/packages/google-cloud-bigquery/samples/delete_dataset.py @@ -0,0 +1,32 @@ +# Copyright 2019 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +def delete_dataset(client, dataset_id): + + # [START bigquery_delete_dataset] + from google.cloud import bigquery + + # TODO(developer): Construct a BigQuery client object. + # client = bigquery.Client() + + # TODO(developer): Set model_id to the ID of the model to fetch. + # dataset_id = 'your-project.your_dataset' + + # Use the delete_contents parameter to delete a dataset and its contents + # Use the not_found_ok parameter to not receive an error if the dataset has already been deleted. + client.delete_dataset(dataset_id, delete_contents=True, not_found_ok=True) + + print("Deleted dataset '{}'.".format(dataset_id)) + # [END bigquery_delete_dataset] diff --git a/packages/google-cloud-bigquery/samples/delete_model.py b/packages/google-cloud-bigquery/samples/delete_model.py index dfe23cd7ef29..371f9003576b 100644 --- a/packages/google-cloud-bigquery/samples/delete_model.py +++ b/packages/google-cloud-bigquery/samples/delete_model.py @@ -26,6 +26,5 @@ def delete_model(client, model_id): # model_id = 'your-project.your_dataset.your_model' client.delete_model(model_id) - # [END bigquery_delete_model] - print("Deleted model '{}'.".format(model_id)) + # [END bigquery_delete_model] diff --git a/packages/google-cloud-bigquery/samples/delete_table.py b/packages/google-cloud-bigquery/samples/delete_table.py new file mode 100644 index 000000000000..3eb7dc918da7 --- /dev/null +++ b/packages/google-cloud-bigquery/samples/delete_table.py @@ -0,0 +1,31 @@ +# Copyright 2019 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +def delete_table(client, table_id): + + # [START bigquery_delete_table] + from google.cloud import bigquery + + # TODO(developer): Construct a BigQuery client object. + # client = bigquery.Client() + + # TODO(developer): Set table_id to the ID of the table to fetch. + # table_id = 'your-project.your_dataset.your_table' + + # If the table does not exist, delete_table raises + # google.api_core.exceptions.NotFound unless not_found_ok is True + client.delete_table(table_id, not_found_ok=True) + print("Deleted table '{}'.".format(table_id)) + # [END bigquery_delete_table] diff --git a/packages/google-cloud-bigquery/samples/get_dataset.py b/packages/google-cloud-bigquery/samples/get_dataset.py new file mode 100644 index 000000000000..eeab2e088d2f --- /dev/null +++ b/packages/google-cloud-bigquery/samples/get_dataset.py @@ -0,0 +1,56 @@ +# Copyright 2019 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +def get_dataset(client, dataset_id): + + # [START bigquery_get_dataset] + from google.cloud import bigquery + + # TODO(developer): Construct a BigQuery client object. + # client = bigquery.Client() + + # TODO(developer): Set dataset_id to the ID of the dataset to fetch. + # dataset_id = 'your-project.your_dataset' + + dataset = client.get_dataset(dataset_id) + + full_dataset_id = "{}.{}".format(dataset.project, dataset.dataset_id) + friendly_name = dataset.friendly_name + print( + "Got dataset '{}' with friendly_name '{}'.".format( + full_dataset_id, friendly_name + ) + ) + + # View dataset properties + print("Description: {}".format(dataset.description)) + print("Labels:") + labels = dataset.labels + if labels: + for label, value in labels.items(): + print("\t{}: {}".format(label, value)) + else: + print("\tDataset has no labels defined.") + + # View tables in dataset + print("Tables:") + tables = list(client.list_tables(dataset)) # API request(s) + if tables: + for table in tables: + print("\t{}".format(table.table_id)) + else: + print("\tThis dataset does not contain any tables.") + + # [END bigquery_get_dataset] diff --git a/packages/google-cloud-bigquery/samples/get_table.py b/packages/google-cloud-bigquery/samples/get_table.py new file mode 100644 index 000000000000..e6a5c502e2b3 --- /dev/null +++ b/packages/google-cloud-bigquery/samples/get_table.py @@ -0,0 +1,37 @@ +# Copyright 2019 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +def get_table(client, table_id): + + # [START bigquery_get_table] + from google.cloud import bigquery + + # TODO(developer): Construct a BigQuery client object. + # client = bigquery.Client() + + # TODO(developer): Set table_id to the ID of the model to fetch. + # table_id = 'your-project.your_dataset.your_table' + + table = client.get_table(table_id) + + print( + "Got table '{}.{}.{}'.".format(table.project, table.dataset_id, table.table_id) + ) + + # View table properties + print("Table schema: {}".format(table.schema)) + print("Table description: {}".format(table.description)) + print("Table has {} rows".format(table.num_rows)) + # [END bigquery_get_table] diff --git a/packages/google-cloud-bigquery/samples/list_datasets.py b/packages/google-cloud-bigquery/samples/list_datasets.py new file mode 100644 index 000000000000..c9ddf4f2523c --- /dev/null +++ b/packages/google-cloud-bigquery/samples/list_datasets.py @@ -0,0 +1,33 @@ +# Copyright 2019 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +def list_datasets(client): + + # [START bigquery_list_datasets] + from google.cloud import bigquery + + # TODO(developer): Construct a BigQuery client object. + # client = bigquery.Client() + + datasets = list(client.list_datasets()) + project = client.project + + if datasets: + print("Datasets in project {}:".format(project)) + for dataset in datasets: # API request(s) + print("\t{}".format(dataset.dataset_id)) + else: + print("{} project does not contain any datasets.".format(project)) + # [END bigquery_list_datasets] diff --git a/packages/google-cloud-bigquery/samples/list_tables.py b/packages/google-cloud-bigquery/samples/list_tables.py new file mode 100644 index 000000000000..33ed408906b0 --- /dev/null +++ b/packages/google-cloud-bigquery/samples/list_tables.py @@ -0,0 +1,33 @@ +# Copyright 2019 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +def list_tables(client, dataset_id): + + # [START bigquery_list_tables] + from google.cloud import bigquery + + # TODO(developer): Construct a BigQuery client object. + # client = bigquery.Client() + + # TODO(developer): Set dataset_id to the ID of the dataset that contains + # the tables you are listing. + # dataset_id = 'your-project.your_dataset' + + tables = client.list_tables(dataset_id) + + print("Tables contained in '{}':".format(dataset_id)) + for table in tables: + print("{}.{}.{}".format(table.project, table.dataset_id, table.table_id)) + # [END bigquery_list_tables] diff --git a/packages/google-cloud-bigquery/samples/tests/conftest.py b/packages/google-cloud-bigquery/samples/tests/conftest.py index 1543e1fdcd0a..629b23473b01 100644 --- a/packages/google-cloud-bigquery/samples/tests/conftest.py +++ b/packages/google-cloud-bigquery/samples/tests/conftest.py @@ -25,6 +25,25 @@ def client(): return bigquery.Client() +@pytest.fixture +def random_table_id(client, dataset_id): + now = datetime.datetime.now() + random_table_id = "example_table_{}_{}".format( + now.strftime("%Y%m%d%H%M%S"), uuid.uuid4().hex[:8] + ) + return "{}.{}".format(dataset_id, random_table_id) + + +@pytest.fixture +def random_dataset_id(client): + now = datetime.datetime.now() + random_dataset_id = "example_dataset_{}_{}".format( + now.strftime("%Y%m%d%H%M%S"), uuid.uuid4().hex[:8] + ) + yield "{}.{}".format(client.project, random_dataset_id) + client.delete_dataset(random_dataset_id, delete_contents=True, not_found_ok=True) + + @pytest.fixture def dataset_id(client): now = datetime.datetime.now() @@ -33,7 +52,20 @@ def dataset_id(client): ) dataset = client.create_dataset(dataset_id) yield "{}.{}".format(dataset.project, dataset.dataset_id) - client.delete_dataset(dataset, delete_contents=True) + client.delete_dataset(dataset, delete_contents=True, not_found_ok=True) + + +@pytest.fixture +def table_id(client, dataset_id): + now = datetime.datetime.now() + table_id = "python_samples_{}_{}".format( + now.strftime("%Y%m%d%H%M%S"), uuid.uuid4().hex[:8] + ) + + table = bigquery.Table("{}.{}".format(dataset_id, table_id)) + table = client.create_table(table) + yield "{}.{}.{}".format(table.project, table.dataset_id, table.table_id) + client.delete_table(table, not_found_ok=True) @pytest.fixture diff --git a/packages/google-cloud-bigquery/samples/tests/test_create_dataset.py b/packages/google-cloud-bigquery/samples/tests/test_create_dataset.py new file mode 100644 index 000000000000..dfadc67d8468 --- /dev/null +++ b/packages/google-cloud-bigquery/samples/tests/test_create_dataset.py @@ -0,0 +1,22 @@ +# Copyright 2019 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from .. import create_dataset + + +def test_create_dataset(capsys, client, random_dataset_id): + + create_dataset.create_dataset(client, random_dataset_id) + out, err = capsys.readouterr() + assert "Created dataset {}".format(random_dataset_id) in out diff --git a/packages/google-cloud-bigquery/samples/tests/test_create_table.py b/packages/google-cloud-bigquery/samples/tests/test_create_table.py new file mode 100644 index 000000000000..903f76b536ea --- /dev/null +++ b/packages/google-cloud-bigquery/samples/tests/test_create_table.py @@ -0,0 +1,22 @@ +# Copyright 2019 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from .. import create_table + + +def test_create_table(capsys, client, random_table_id): + + create_table.create_table(client, random_table_id) + out, err = capsys.readouterr() + assert "Created table {}".format(random_table_id) in out diff --git a/packages/google-cloud-bigquery/samples/tests/test_delete_dataset.py b/packages/google-cloud-bigquery/samples/tests/test_delete_dataset.py new file mode 100644 index 000000000000..2b1b6ad06195 --- /dev/null +++ b/packages/google-cloud-bigquery/samples/tests/test_delete_dataset.py @@ -0,0 +1,22 @@ +# Copyright 2019 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from .. import delete_dataset + + +def test_delete_dataset(capsys, client, dataset_id): + + delete_dataset.delete_dataset(client, dataset_id) + out, err = capsys.readouterr() + assert "Deleted dataset '{}'.".format(dataset_id) in out diff --git a/packages/google-cloud-bigquery/samples/tests/test_delete_table.py b/packages/google-cloud-bigquery/samples/tests/test_delete_table.py new file mode 100644 index 000000000000..8f4796623a83 --- /dev/null +++ b/packages/google-cloud-bigquery/samples/tests/test_delete_table.py @@ -0,0 +1,22 @@ +# Copyright 2019 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from .. import delete_table + + +def test_delete_table(capsys, client, table_id): + + delete_table.delete_table(client, table_id) + out, err = capsys.readouterr() + assert "Deleted table '{}'.".format(table_id) in out diff --git a/packages/google-cloud-bigquery/samples/tests/test_get_dataset.py b/packages/google-cloud-bigquery/samples/tests/test_get_dataset.py new file mode 100644 index 000000000000..374f8835211a --- /dev/null +++ b/packages/google-cloud-bigquery/samples/tests/test_get_dataset.py @@ -0,0 +1,22 @@ +# Copyright 2019 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from .. import get_dataset + + +def test_get_dataset(capsys, client, dataset_id): + + get_dataset.get_dataset(client, dataset_id) + out, err = capsys.readouterr() + assert "{}".format(dataset_id) in out diff --git a/packages/google-cloud-bigquery/samples/tests/test_get_table.py b/packages/google-cloud-bigquery/samples/tests/test_get_table.py new file mode 100644 index 000000000000..debf1b63a3fc --- /dev/null +++ b/packages/google-cloud-bigquery/samples/tests/test_get_table.py @@ -0,0 +1,35 @@ +# Copyright 2019 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from google.cloud import bigquery +from .. import get_table + + +def test_get_table(capsys, client, random_table_id): + schema = [ + bigquery.SchemaField("full_name", "STRING", mode="REQUIRED"), + bigquery.SchemaField("age", "INTEGER", mode="REQUIRED"), + ] + + table = bigquery.Table(random_table_id, schema) + table.description = "Sample Table" + table = client.create_table(table) + + get_table.get_table(client, random_table_id) + out, err = capsys.readouterr() + assert "Got table '{}'.".format(random_table_id) in out + assert "full_name" in out # test that schema is printed + assert "Table description: Sample Table" in out + assert "Table has 0 rows" in out + client.delete_table(table, not_found_ok=True) diff --git a/packages/google-cloud-bigquery/samples/tests/test_list_datasets.py b/packages/google-cloud-bigquery/samples/tests/test_list_datasets.py new file mode 100644 index 000000000000..4c66a24f9b1a --- /dev/null +++ b/packages/google-cloud-bigquery/samples/tests/test_list_datasets.py @@ -0,0 +1,22 @@ +# Copyright 2019 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from .. import list_datasets + + +def test_list_datasets(capsys, client, dataset_id): + + list_datasets.list_datasets(client) + out, err = capsys.readouterr() + assert "Datasets in project {}:".format(client.project) in out diff --git a/packages/google-cloud-bigquery/samples/tests/test_list_tables.py b/packages/google-cloud-bigquery/samples/tests/test_list_tables.py new file mode 100644 index 000000000000..ec1621ac7579 --- /dev/null +++ b/packages/google-cloud-bigquery/samples/tests/test_list_tables.py @@ -0,0 +1,23 @@ +# Copyright 2019 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from .. import list_tables + + +def test_list_tables(capsys, client, dataset_id, table_id): + + list_tables.list_tables(client, dataset_id) + out, err = capsys.readouterr() + assert "Tables contained in '{}':".format(dataset_id) in out + assert table_id in out diff --git a/packages/google-cloud-bigquery/samples/tests/test_update_dataset_access.py b/packages/google-cloud-bigquery/samples/tests/test_update_dataset_access.py new file mode 100644 index 000000000000..ae33dbfe4a4c --- /dev/null +++ b/packages/google-cloud-bigquery/samples/tests/test_update_dataset_access.py @@ -0,0 +1,24 @@ +# Copyright 2019 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from .. import update_dataset_access + + +def test_update_dataset_access(capsys, client, dataset_id): + + update_dataset_access.update_dataset_access(client, dataset_id) + out, err = capsys.readouterr() + assert ( + "Updated dataset '{}' with modified user permissions.".format(dataset_id) in out + ) diff --git a/packages/google-cloud-bigquery/samples/tests/test_update_dataset_default_table_expiration.py b/packages/google-cloud-bigquery/samples/tests/test_update_dataset_default_table_expiration.py new file mode 100644 index 000000000000..46e9654209ed --- /dev/null +++ b/packages/google-cloud-bigquery/samples/tests/test_update_dataset_default_table_expiration.py @@ -0,0 +1,29 @@ +# Copyright 2019 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from .. import update_dataset_default_table_expiration + + +def test_update_dataset_default_table_expiration(capsys, client, dataset_id): + + one_day_ms = 24 * 60 * 60 * 1000 # in milliseconds + + update_dataset_default_table_expiration.update_dataset_default_table_expiration( + client, dataset_id + ) + out, err = capsys.readouterr() + assert ( + "Updated dataset {} with new expiration {}".format(dataset_id, one_day_ms) + in out + ) diff --git a/packages/google-cloud-bigquery/samples/tests/test_update_dataset_description.py b/packages/google-cloud-bigquery/samples/tests/test_update_dataset_description.py new file mode 100644 index 000000000000..c6f8889f50da --- /dev/null +++ b/packages/google-cloud-bigquery/samples/tests/test_update_dataset_description.py @@ -0,0 +1,22 @@ +# Copyright 2019 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from .. import update_dataset_description + + +def test_update_dataset_description(capsys, client, dataset_id): + + update_dataset_description.update_dataset_description(client, dataset_id) + out, err = capsys.readouterr() + assert "Updated description." in out diff --git a/packages/google-cloud-bigquery/samples/update_dataset_access.py b/packages/google-cloud-bigquery/samples/update_dataset_access.py new file mode 100644 index 000000000000..aa316a38dff9 --- /dev/null +++ b/packages/google-cloud-bigquery/samples/update_dataset_access.py @@ -0,0 +1,45 @@ +# Copyright 2019 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +def update_dataset_access(client, dataset_id): + + # [START bigquery_update_dataset_access] + from google.cloud import bigquery + + # TODO(developer): Construct a BigQuery client object. + # client = bigquery.Client() + + # TODO(developer): Set dataset_id to the ID of the dataset to fetch. + # dataset_id = 'your-project.your_dataset' + + dataset = client.get_dataset(dataset_id) + + entry = bigquery.AccessEntry( + role="READER", + entity_type="userByEmail", + entity_id="sample.bigquery.dev@gmail.com", + ) + + entries = list(dataset.access_entries) + entries.append(entry) + dataset.access_entries = entries + + dataset = client.update_dataset(dataset, ["access_entries"]) # API request + + full_dataset_id = "{}.{}".format(dataset.project, dataset.dataset_id) + print( + "Updated dataset '{}' with modified user permissions.".format(full_dataset_id) + ) + # [END bigquery_update_dataset_access] diff --git a/packages/google-cloud-bigquery/samples/update_dataset_default_table_expiration.py b/packages/google-cloud-bigquery/samples/update_dataset_default_table_expiration.py new file mode 100644 index 000000000000..a5ac38c01a99 --- /dev/null +++ b/packages/google-cloud-bigquery/samples/update_dataset_default_table_expiration.py @@ -0,0 +1,40 @@ +# Copyright 2019 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +def update_dataset_default_table_expiration(client, dataset_id): + + # [START bigquery_update_dataset_expiration] + from google.cloud import bigquery + + # TODO(developer): Construct a BigQuery client object. + # client = bigquery.Client() + + # TODO(developer): Set dataset_id to the ID of the dataset to fetch. + # dataset_id = 'your-project.your_dataset' + + dataset = client.get_dataset(dataset_id) + dataset.default_table_expiration_ms = 24 * 60 * 60 * 1000 # in milliseconds + + dataset = client.update_dataset( + dataset, ["default_table_expiration_ms"] + ) # API request + + full_dataset_id = "{}.{}".format(dataset.project, dataset.dataset_id) + print( + "Updated dataset {} with new expiration {}".format( + full_dataset_id, dataset.default_table_expiration_ms + ) + ) + # [END bigquery_update_dataset_expiration] diff --git a/packages/google-cloud-bigquery/samples/update_dataset_description.py b/packages/google-cloud-bigquery/samples/update_dataset_description.py new file mode 100644 index 000000000000..70be80b7507e --- /dev/null +++ b/packages/google-cloud-bigquery/samples/update_dataset_description.py @@ -0,0 +1,37 @@ +# Copyright 2019 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +def update_dataset_description(client, dataset_id): + + # [START bigquery_update_dataset_description] + from google.cloud import bigquery + + # TODO(developer): Construct a BigQuery client object. + # client = bigquery.Client() + + # TODO(developer): Set dataset_id to the ID of the dataset to fetch. + # dataset_id = 'your-project.your_dataset' + + dataset = client.get_dataset(dataset_id) + dataset.description = "Updated description." + dataset = client.update_dataset(dataset, ["description"]) + + full_dataset_id = "{}.{}".format(dataset.project, dataset.dataset_id) + print( + "Updated dataset '{}' with description '{}'.".format( + full_dataset_id, dataset.description + ) + ) + # [END bigquery_update_dataset_description] From ac8d7303802d1c1c0dee4197c36c604b8ed89fb6 Mon Sep 17 00:00:00 2001 From: lbristol88 Date: Wed, 10 Apr 2019 13:43:45 -0700 Subject: [PATCH 0571/2016] Adding `clustering_fields` property to TableListItem (#7692) * Added property clustering_fields to TableListItem along with test * Updated tests for clustering fields. --- .../google/cloud/bigquery/table.py | 17 +++++++++++++++++ .../tests/unit/test_table.py | 3 +++ 2 files changed, 20 insertions(+) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py index bf5fbc65b5a3..e9f06998d2ce 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py @@ -1021,6 +1021,23 @@ def friendly_name(self): view_use_legacy_sql = property(_view_use_legacy_sql_getter) + @property + def clustering_fields(self): + """Union[List[str], None]: Fields defining clustering for the table + + (Defaults to :data:`None`). + + Clustering fields are immutable after table creation. + + .. note:: + + As of 2018-06-29, clustering fields cannot be set on a table + which does not also have time partioning defined. + """ + prop = self._properties.get("clustering") + if prop is not None: + return list(prop.get("fields", ())) + @classmethod def from_string(cls, full_table_id): """Construct a table from fully-qualified table ID. diff --git a/packages/google-cloud-bigquery/tests/unit/test_table.py b/packages/google-cloud-bigquery/tests/unit/test_table.py index 965dd5f0f195..557d57741382 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_table.py +++ b/packages/google-cloud-bigquery/tests/unit/test_table.py @@ -1148,6 +1148,7 @@ def test_ctor(self): "expirationMs": "10000", }, "labels": {"some-stuff": "this-is-a-label"}, + "clustering": {"fields": ["string"]}, } table = self._make_one(resource) @@ -1170,6 +1171,7 @@ def test_ctor(self): self.assertEqual(table.time_partitioning.field, "mycolumn") self.assertEqual(table.labels["some-stuff"], "this-is-a-label") self.assertIsNone(table.view_use_legacy_sql) + self.assertEqual(table.clustering_fields, ["string"]) with warnings.catch_warnings(record=True) as warned: self.assertEqual(table.partitioning_type, "DAY") @@ -1222,6 +1224,7 @@ def test_ctor_missing_properties(self): self.assertEqual(table.table_id, "testtable") self.assertIsNone(table.created) self.assertIsNone(table.expires) + self.assertIsNone(table.clustering_fields) self.assertIsNone(table.full_table_id) self.assertIsNone(table.friendly_name) self.assertIsNone(table.table_type) From 69e1d09635a31be2de17b4e339e9103252dfffee Mon Sep 17 00:00:00 2001 From: lbristol88 Date: Mon, 15 Apr 2019 15:38:29 -0700 Subject: [PATCH 0572/2016] Adding missing properties from LoadJobConfig to LoadJob library (#7710) * Added new properties to LoadJob that are in LoadJobConfig. * Added destination property and updated tests per feedback. --- .../google/cloud/bigquery/job.py | 29 ++++++++++++++++++- .../tests/unit/test_job.py | 12 ++++++++ 2 files changed, 40 insertions(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/job.py b/packages/google-cloud-bigquery/google/cloud/bigquery/job.py index 94a2290cc29e..1aa99e699db3 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/job.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/job.py @@ -1257,9 +1257,18 @@ def __init__(self, job_id, source_uris, destination, client, job_config=None): job_config = LoadJobConfig() self.source_uris = source_uris - self.destination = destination + self._destination = destination self._configuration = job_config + @property + def destination(self): + """google.cloud.bigquery.table.TableReference: table where loaded rows are written + + See: + https://g.co/cloud/bigquery/docs/reference/rest/v2/jobs#configuration.load.destinationTable + """ + return self._destination + @property def allow_jagged_rows(self): """See @@ -1371,6 +1380,24 @@ def destination_encryption_configuration(self): """ return self._configuration.destination_encryption_configuration + @property + def destination_table_description(self): + """Union[str, None] name given to destination table. + + See: + https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.load.destinationTableProperties.description + """ + return self._configuration.destination_table_description + + @property + def destination_table_friendly_name(self): + """Union[str, None] name given to destination table. + + See: + https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.load.destinationTableProperties.friendlyName + """ + return self._configuration.destination_table_friendly_name + @property def time_partitioning(self): """See diff --git a/packages/google-cloud-bigquery/tests/unit/test_job.py b/packages/google-cloud-bigquery/tests/unit/test_job.py index a42d9ffc311c..bbb2c54d852f 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_job.py +++ b/packages/google-cloud-bigquery/tests/unit/test_job.py @@ -1786,6 +1786,8 @@ def test_ctor(self): self.assertIsNone(job.source_format) self.assertIsNone(job.write_disposition) self.assertIsNone(job.destination_encryption_configuration) + self.assertIsNone(job.destination_table_description) + self.assertIsNone(job.destination_table_friendly_name) self.assertIsNone(job.time_partitioning) self.assertIsNone(job.use_avro_logical_types) self.assertIsNone(job.clustering_fields) @@ -1804,6 +1806,16 @@ def test_ctor_w_config(self): self.JOB_ID, [self.SOURCE1], self.TABLE_REF, client, config ) self.assertEqual(job.schema, [full_name, age]) + config.destination_table_description = "Description" + expected = {"description": "Description"} + self.assertEqual( + config._properties["load"]["destinationTableProperties"], expected + ) + friendly_name = "Friendly Name" + config._properties["load"]["destinationTableProperties"] = { + "friendlyName": friendly_name + } + self.assertEqual(config.destination_table_friendly_name, friendly_name) def test_ctor_w_job_reference(self): from google.cloud.bigquery import job From e1165ac5802f1e0740b04cc11f3928a4a4c92b9d Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Tue, 16 Apr 2019 11:42:18 -0700 Subject: [PATCH 0573/2016] Make total_rows available on RowIterator before iteration (#7622) * Make total_rows available on RowIterator before iteration After running a query, the total number of rows is available from the call to the getQueryResults API. This commit plumbs the total rows through to the faux Table created in QueryJob.results and then on through to the RowIterator created by list_rows. * Simplify RowIterator constructor. Add test comments. Use getattr instead of protecting with hasattr in the RowIterator constructor. Add comments about intentionally conflicting values for total_rows. --- .../google-cloud-bigquery/docs/snippets.py | 3 +- .../google/cloud/bigquery/job.py | 1 + .../google/cloud/bigquery/table.py | 8 +- .../tests/unit/test_client.py | 27 ++--- .../tests/unit/test_job.py | 25 ++++- .../tests/unit/test_table.py | 98 ++++++++++++------- 6 files changed, 105 insertions(+), 57 deletions(-) diff --git a/packages/google-cloud-bigquery/docs/snippets.py b/packages/google-cloud-bigquery/docs/snippets.py index 01ad3e014608..b11dccfbfd09 100644 --- a/packages/google-cloud-bigquery/docs/snippets.py +++ b/packages/google-cloud-bigquery/docs/snippets.py @@ -1908,8 +1908,7 @@ def test_client_query_total_rows(client, capsys): location="US", ) # API request - starts the query - results = query_job.result() # Waits for query to complete. - next(iter(results)) # Fetch the first page of results, which contains total_rows. + results = query_job.result() # Wait for query to complete. print("Got {} rows.".format(results.total_rows)) # [END bigquery_query_total_rows] diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/job.py b/packages/google-cloud-bigquery/google/cloud/bigquery/job.py index 1aa99e699db3..90461513c0cc 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/job.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/job.py @@ -2835,6 +2835,7 @@ def result(self, timeout=None, retry=DEFAULT_RETRY): schema = self._query_results.schema dest_table_ref = self.destination dest_table = Table(dest_table_ref, schema=schema) + dest_table._properties["numRows"] = self._query_results.total_rows return self._client.list_rows(dest_table, retry=retry) def to_dataframe(self, bqstorage_client=None, dtypes=None, progress_bar_type=None): diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py index e9f06998d2ce..d50b43a18058 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py @@ -1308,13 +1308,13 @@ def __init__( page_start=_rows_page_start, next_token="pageToken", ) - self._schema = schema self._field_to_index = _helpers._field_to_index_mapping(schema) - self._total_rows = None self._page_size = page_size - self._table = table - self._selected_fields = selected_fields self._project = client.project + self._schema = schema + self._selected_fields = selected_fields + self._table = table + self._total_rows = getattr(table, "num_rows", None) def _get_next_page_response(self): """Requests the next page from the path provided. diff --git a/packages/google-cloud-bigquery/tests/unit/test_client.py b/packages/google-cloud-bigquery/tests/unit/test_client.py index 4348462c83ee..46734079a03d 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_client.py +++ b/packages/google-cloud-bigquery/tests/unit/test_client.py @@ -4359,18 +4359,21 @@ def test_list_rows_empty_table(self): client._connection = _make_connection(response, response) # Table that has no schema because it's an empty table. - rows = tuple( - client.list_rows( - # Test with using a string for the table ID. - "{}.{}.{}".format( - self.TABLE_REF.project, - self.TABLE_REF.dataset_id, - self.TABLE_REF.table_id, - ), - selected_fields=[], - ) + rows = client.list_rows( + # Test with using a string for the table ID. + "{}.{}.{}".format( + self.TABLE_REF.project, + self.TABLE_REF.dataset_id, + self.TABLE_REF.table_id, + ), + selected_fields=[], ) - self.assertEqual(rows, ()) + + # When a table reference / string and selected_fields is provided, + # total_rows can't be populated until iteration starts. + self.assertIsNone(rows.total_rows) + self.assertEqual(tuple(rows), ()) + self.assertEqual(rows.total_rows, 0) def test_list_rows_query_params(self): from google.cloud.bigquery.table import Table, SchemaField @@ -4573,7 +4576,7 @@ def test_list_rows_with_missing_schema(self): conn.api_request.assert_called_once_with(method="GET", path=table_path) conn.api_request.reset_mock() - self.assertIsNone(row_iter.total_rows, msg=repr(table)) + self.assertEqual(row_iter.total_rows, 2, msg=repr(table)) rows = list(row_iter) conn.api_request.assert_called_once_with( diff --git a/packages/google-cloud-bigquery/tests/unit/test_job.py b/packages/google-cloud-bigquery/tests/unit/test_job.py index bbb2c54d852f..a30c026a82c0 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_job.py +++ b/packages/google-cloud-bigquery/tests/unit/test_job.py @@ -4024,21 +4024,41 @@ def test_estimated_bytes_processed(self): self.assertEqual(job.estimated_bytes_processed, est_bytes) def test_result(self): + from google.cloud.bigquery.table import RowIterator + query_resource = { "jobComplete": True, "jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID}, "schema": {"fields": [{"name": "col1", "type": "STRING"}]}, + "totalRows": "2", } - connection = _make_connection(query_resource, query_resource) + tabledata_resource = { + # Explicitly set totalRows to be different from the query response. + # to test update during iteration. + "totalRows": "1", + "pageToken": None, + "rows": [{"f": [{"v": "abc"}]}], + } + connection = _make_connection(query_resource, tabledata_resource) client = _make_client(self.PROJECT, connection=connection) resource = self._make_resource(ended=True) job = self._get_target_class().from_api_repr(resource, client) result = job.result() - self.assertEqual(list(result), []) + self.assertIsInstance(result, RowIterator) + self.assertEqual(result.total_rows, 2) + + rows = list(result) + self.assertEqual(len(rows), 1) + self.assertEqual(rows[0].col1, "abc") + # Test that the total_rows property has changed during iteration, based + # on the response from tabledata.list. + self.assertEqual(result.total_rows, 1) def test_result_w_empty_schema(self): + from google.cloud.bigquery.table import _EmptyRowIterator + # Destination table may have no schema for some DDL and DML queries. query_resource = { "jobComplete": True, @@ -4052,6 +4072,7 @@ def test_result_w_empty_schema(self): result = job.result() + self.assertIsInstance(result, _EmptyRowIterator) self.assertEqual(list(result), []) def test_result_invokes_begins(self): diff --git a/packages/google-cloud-bigquery/tests/unit/test_table.py b/packages/google-cloud-bigquery/tests/unit/test_table.py index 557d57741382..e8ebbdfc6c90 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_table.py +++ b/packages/google-cloud-bigquery/tests/unit/test_table.py @@ -1306,51 +1306,85 @@ def test_row(self): class Test_EmptyRowIterator(unittest.TestCase): - @mock.patch("google.cloud.bigquery.table.pandas", new=None) - def test_to_dataframe_error_if_pandas_is_none(self): + def _make_one(self): from google.cloud.bigquery.table import _EmptyRowIterator - row_iterator = _EmptyRowIterator() + return _EmptyRowIterator() + + def test_total_rows_eq_zero(self): + row_iterator = self._make_one() + self.assertEqual(row_iterator.total_rows, 0) + + @mock.patch("google.cloud.bigquery.table.pandas", new=None) + def test_to_dataframe_error_if_pandas_is_none(self): + row_iterator = self._make_one() with self.assertRaises(ValueError): row_iterator.to_dataframe() @unittest.skipIf(pandas is None, "Requires `pandas`") def test_to_dataframe(self): - from google.cloud.bigquery.table import _EmptyRowIterator - - row_iterator = _EmptyRowIterator() + row_iterator = self._make_one() df = row_iterator.to_dataframe() self.assertIsInstance(df, pandas.DataFrame) self.assertEqual(len(df), 0) # verify the number of rows class TestRowIterator(unittest.TestCase): - def test_constructor(self): + def _make_one( + self, client=None, api_request=None, path=None, schema=None, **kwargs + ): from google.cloud.bigquery.table import RowIterator + + if client is None: + client = _mock_client() + + if api_request is None: + api_request = mock.sentinel.api_request + + if path is None: + path = "/foo" + + if schema is None: + schema = [] + + return RowIterator(client, api_request, path, schema, **kwargs) + + def test_constructor(self): from google.cloud.bigquery.table import _item_to_row from google.cloud.bigquery.table import _rows_page_start client = _mock_client() - api_request = mock.sentinel.api_request - path = "/foo" - schema = [] - iterator = RowIterator(client, api_request, path, schema) + path = "/some/path" + iterator = self._make_one(client=client, path=path) - self.assertFalse(iterator._started) + # Objects are set without copying. self.assertIs(iterator.client, client) - self.assertEqual(iterator.path, path) self.assertIs(iterator.item_to_value, _item_to_row) + self.assertIs(iterator._page_start, _rows_page_start) + # Properties have the expect value. + self.assertEqual(iterator.extra_params, {}) self.assertEqual(iterator._items_key, "rows") self.assertIsNone(iterator.max_results) - self.assertEqual(iterator.extra_params, {}) - self.assertIs(iterator._page_start, _rows_page_start) + self.assertEqual(iterator.path, path) + self.assertFalse(iterator._started) + self.assertIsNone(iterator.total_rows) # Changing attributes. self.assertEqual(iterator.page_number, 0) self.assertIsNone(iterator.next_page_token) self.assertEqual(iterator.num_results, 0) + def test_constructor_with_table(self): + from google.cloud.bigquery.table import Table + + table = Table("proj.dset.tbl") + table._properties["numRows"] = 100 + + iterator = self._make_one(table=table) + + self.assertIs(iterator._table, table) + self.assertEqual(iterator.total_rows, 100) + def test_iterate(self): - from google.cloud.bigquery.table import RowIterator from google.cloud.bigquery.table import SchemaField schema = [ @@ -1363,7 +1397,7 @@ def test_iterate(self): ] path = "/foo" api_request = mock.Mock(return_value={"rows": rows}) - row_iterator = RowIterator(_mock_client(), api_request, path, schema) + row_iterator = self._make_one(_mock_client(), api_request, path, schema) self.assertEqual(row_iterator.num_results, 0) rows_iter = iter(row_iterator) @@ -1382,7 +1416,6 @@ def test_iterate(self): api_request.assert_called_once_with(method="GET", path=path, query_params={}) def test_page_size(self): - from google.cloud.bigquery.table import RowIterator from google.cloud.bigquery.table import SchemaField schema = [ @@ -1396,7 +1429,7 @@ def test_page_size(self): path = "/foo" api_request = mock.Mock(return_value={"rows": rows}) - row_iterator = RowIterator( + row_iterator = self._make_one( _mock_client(), api_request, path, schema, page_size=4 ) row_iterator._get_next_page_response() @@ -1409,7 +1442,6 @@ def test_page_size(self): @unittest.skipIf(pandas is None, "Requires `pandas`") def test_to_dataframe(self): - from google.cloud.bigquery.table import RowIterator from google.cloud.bigquery.table import SchemaField schema = [ @@ -1424,7 +1456,7 @@ def test_to_dataframe(self): ] path = "/foo" api_request = mock.Mock(return_value={"rows": rows}) - row_iterator = RowIterator(_mock_client(), api_request, path, schema) + row_iterator = self._make_one(_mock_client(), api_request, path, schema) df = row_iterator.to_dataframe() @@ -1442,7 +1474,6 @@ def test_to_dataframe(self): def test_to_dataframe_progress_bar( self, tqdm_mock, tqdm_notebook_mock, tqdm_gui_mock ): - from google.cloud.bigquery.table import RowIterator from google.cloud.bigquery.table import SchemaField schema = [ @@ -1465,7 +1496,7 @@ def test_to_dataframe_progress_bar( ) for progress_bar_type, progress_bar_mock in progress_bars: - row_iterator = RowIterator(_mock_client(), api_request, path, schema) + row_iterator = self._make_one(_mock_client(), api_request, path, schema) df = row_iterator.to_dataframe(progress_bar_type=progress_bar_type) progress_bar_mock.assert_called() @@ -1475,7 +1506,6 @@ def test_to_dataframe_progress_bar( @unittest.skipIf(pandas is None, "Requires `pandas`") @mock.patch("google.cloud.bigquery.table.tqdm", new=None) def test_to_dataframe_no_tqdm_no_progress_bar(self): - from google.cloud.bigquery.table import RowIterator from google.cloud.bigquery.table import SchemaField schema = [ @@ -1490,7 +1520,7 @@ def test_to_dataframe_no_tqdm_no_progress_bar(self): ] path = "/foo" api_request = mock.Mock(return_value={"rows": rows}) - row_iterator = RowIterator(_mock_client(), api_request, path, schema) + row_iterator = self._make_one(_mock_client(), api_request, path, schema) with warnings.catch_warnings(record=True) as warned: df = row_iterator.to_dataframe() @@ -1501,7 +1531,6 @@ def test_to_dataframe_no_tqdm_no_progress_bar(self): @unittest.skipIf(pandas is None, "Requires `pandas`") @mock.patch("google.cloud.bigquery.table.tqdm", new=None) def test_to_dataframe_no_tqdm(self): - from google.cloud.bigquery.table import RowIterator from google.cloud.bigquery.table import SchemaField schema = [ @@ -1516,7 +1545,7 @@ def test_to_dataframe_no_tqdm(self): ] path = "/foo" api_request = mock.Mock(return_value={"rows": rows}) - row_iterator = RowIterator(_mock_client(), api_request, path, schema) + row_iterator = self._make_one(_mock_client(), api_request, path, schema) with warnings.catch_warnings(record=True) as warned: df = row_iterator.to_dataframe(progress_bar_type="tqdm") @@ -1535,7 +1564,6 @@ def test_to_dataframe_no_tqdm(self): @mock.patch("tqdm.tqdm_notebook", new=None) # will raise TypeError on call @mock.patch("tqdm.tqdm", new=None) # will raise TypeError on call def test_to_dataframe_tqdm_error(self): - from google.cloud.bigquery.table import RowIterator from google.cloud.bigquery.table import SchemaField schema = [ @@ -1552,14 +1580,13 @@ def test_to_dataframe_tqdm_error(self): for progress_bar_type in ("tqdm", "tqdm_notebook", "tqdm_gui"): api_request = mock.Mock(return_value={"rows": rows}) - row_iterator = RowIterator(_mock_client(), api_request, path, schema) + row_iterator = self._make_one(_mock_client(), api_request, path, schema) df = row_iterator.to_dataframe(progress_bar_type=progress_bar_type) self.assertEqual(len(df), 4) # all should be well @unittest.skipIf(pandas is None, "Requires `pandas`") def test_to_dataframe_w_empty_results(self): - from google.cloud.bigquery.table import RowIterator from google.cloud.bigquery.table import SchemaField schema = [ @@ -1568,7 +1595,7 @@ def test_to_dataframe_w_empty_results(self): ] path = "/foo" api_request = mock.Mock(return_value={"rows": []}) - row_iterator = RowIterator(_mock_client(), api_request, path, schema) + row_iterator = self._make_one(_mock_client(), api_request, path, schema) df = row_iterator.to_dataframe() @@ -1579,7 +1606,6 @@ def test_to_dataframe_w_empty_results(self): @unittest.skipIf(pandas is None, "Requires `pandas`") def test_to_dataframe_w_various_types_nullable(self): import datetime - from google.cloud.bigquery.table import RowIterator from google.cloud.bigquery.table import SchemaField schema = [ @@ -1599,7 +1625,7 @@ def test_to_dataframe_w_various_types_nullable(self): rows = [{"f": [{"v": field} for field in row]} for row in row_data] path = "/foo" api_request = mock.Mock(return_value={"rows": rows}) - row_iterator = RowIterator(_mock_client(), api_request, path, schema) + row_iterator = self._make_one(_mock_client(), api_request, path, schema) df = row_iterator.to_dataframe() @@ -1620,7 +1646,6 @@ def test_to_dataframe_w_various_types_nullable(self): @unittest.skipIf(pandas is None, "Requires `pandas`") def test_to_dataframe_column_dtypes(self): - from google.cloud.bigquery.table import RowIterator from google.cloud.bigquery.table import SchemaField schema = [ @@ -1640,7 +1665,7 @@ def test_to_dataframe_column_dtypes(self): rows = [{"f": [{"v": field} for field in row]} for row in row_data] path = "/foo" api_request = mock.Mock(return_value={"rows": rows}) - row_iterator = RowIterator(_mock_client(), api_request, path, schema) + row_iterator = self._make_one(_mock_client(), api_request, path, schema) df = row_iterator.to_dataframe(dtypes={"km": "float16"}) @@ -1659,7 +1684,6 @@ def test_to_dataframe_column_dtypes(self): @mock.patch("google.cloud.bigquery.table.pandas", new=None) def test_to_dataframe_error_if_pandas_is_none(self): - from google.cloud.bigquery.table import RowIterator from google.cloud.bigquery.table import SchemaField schema = [ @@ -1672,7 +1696,7 @@ def test_to_dataframe_error_if_pandas_is_none(self): ] path = "/foo" api_request = mock.Mock(return_value={"rows": rows}) - row_iterator = RowIterator(_mock_client(), api_request, path, schema) + row_iterator = self._make_one(_mock_client(), api_request, path, schema) with self.assertRaises(ValueError): row_iterator.to_dataframe() From 88233c231585a6f6bdd3391dfbb1091d3d2f5ab9 Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Fri, 19 Apr 2019 10:35:33 -0700 Subject: [PATCH 0574/2016] Raise ValueError when BQ Storage is required but missing (#7726) Move the BigQuery Storage API optional dependency imports to the top of the module, following the existing pattern for other optional dependencies. --- .../google/cloud/bigquery/table.py | 30 +++++++++++++-- .../tests/unit/test_table.py | 37 +++++++++++++++++++ 2 files changed, 63 insertions(+), 4 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py index d50b43a18058..92af19c43ce5 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py @@ -17,6 +17,7 @@ from __future__ import absolute_import import collections +import concurrent.futures import copy import datetime import json @@ -25,6 +26,11 @@ import six +try: + from google.cloud import bigquery_storage_v1beta1 +except ImportError: # pragma: NO COVER + bigquery_storage_v1beta1 = None + try: import pandas except ImportError: # pragma: NO COVER @@ -46,6 +52,10 @@ from google.cloud.bigquery.external_config import ExternalConfig +_NO_BQSTORAGE_ERROR = ( + "The google-cloud-bigquery-storage library is not installed, " + "please install google-cloud-bigquery-storage to use bqstorage features." +) _NO_PANDAS_ERROR = ( "The pandas library is not installed, please install " "pandas to use the to_dataframe() function." @@ -274,6 +284,9 @@ def to_api_repr(self): def to_bqstorage(self): """Construct a BigQuery Storage API representation of this table. + Install the ``google-cloud-bigquery-storage`` package to use this + feature. + If the ``table_id`` contains a partition identifier (e.g. ``my_table$201812``) or a snapshot identifier (e.g. ``mytable@1234567890``), it is ignored. Use @@ -285,8 +298,14 @@ def to_bqstorage(self): Returns: google.cloud.bigquery_storage_v1beta1.types.TableReference: A reference to this table in the BigQuery Storage API. + + Raises: + ValueError: + If the :mod:`google.cloud.bigquery_storage_v1beta1` module + cannot be imported. """ - from google.cloud import bigquery_storage_v1beta1 + if bigquery_storage_v1beta1 is None: + raise ValueError(_NO_BQSTORAGE_ERROR) table_ref = bigquery_storage_v1beta1.types.TableReference() table_ref.project_id = self._project @@ -1369,8 +1388,8 @@ def _to_dataframe_tabledata_list(self, dtypes, progress_bar=None): def _to_dataframe_bqstorage(self, bqstorage_client, dtypes): """Use (faster, but billable) BQ Storage API to construct DataFrame.""" - import concurrent.futures - from google.cloud import bigquery_storage_v1beta1 + if bigquery_storage_v1beta1 is None: + raise ValueError(_NO_BQSTORAGE_ERROR) if "$" in self._table.table_id: raise ValueError( @@ -1496,7 +1515,10 @@ def to_dataframe(self, bqstorage_client=None, dtypes=None, progress_bar_type=Non from the destination table's schema. Raises: - ValueError: If the :mod:`pandas` library cannot be imported. + ValueError: + If the :mod:`pandas` library cannot be imported, or the + :mod:`google.cloud.bigquery_storage_v1beta1` module is + required but cannot be imported. """ if pandas is None: diff --git a/packages/google-cloud-bigquery/tests/unit/test_table.py b/packages/google-cloud-bigquery/tests/unit/test_table.py index e8ebbdfc6c90..7ac982394c9d 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_table.py +++ b/packages/google-cloud-bigquery/tests/unit/test_table.py @@ -1860,6 +1860,28 @@ def test_to_dataframe_w_bqstorage_raises_auth_error(self): with pytest.raises(google.api_core.exceptions.Forbidden): row_iterator.to_dataframe(bqstorage_client=bqstorage_client) + @unittest.skipIf(pandas is None, "Requires `pandas`") + @unittest.skipIf( + bigquery_storage_v1beta1 is None, "Requires `google-cloud-bigquery-storage`" + ) + def test_to_dataframe_w_bqstorage_raises_import_error(self): + from google.cloud.bigquery import table as mut + + bqstorage_client = mock.create_autospec( + bigquery_storage_v1beta1.BigQueryStorageClient + ) + path = "/foo" + api_request = mock.Mock(return_value={"rows": []}) + row_iterator = mut.RowIterator( + _mock_client(), api_request, path, [], table=mut.Table("proj.dset.tbl") + ) + + with mock.patch.object(mut, "bigquery_storage_v1beta1", None), pytest.raises( + ValueError + ) as exc: + row_iterator.to_dataframe(bqstorage_client=bqstorage_client) + assert mut._NO_BQSTORAGE_ERROR in str(exc) + @unittest.skipIf( bigquery_storage_v1beta1 is None, "Requires `google-cloud-bigquery-storage`" ) @@ -2112,3 +2134,18 @@ def test_table_reference_to_bqstorage(): for case, cls in itertools.product(cases, classes): got = cls.from_string(case).to_bqstorage() assert got == expected + + +@unittest.skipIf( + bigquery_storage_v1beta1 is None, "Requires `google-cloud-bigquery-storage`" +) +def test_table_reference_to_bqstorage_raises_import_error(): + from google.cloud.bigquery import table as mut + + classes = (mut.TableReference, mut.Table, mut.TableListItem) + for cls in classes: + with mock.patch.object(mut, "bigquery_storage_v1beta1", None), pytest.raises( + ValueError + ) as exc: + cls.from_string("my-project.my_dataset.my_table").to_bqstorage() + assert mut._NO_BQSTORAGE_ERROR in str(exc) From 53cfcbdd61510f78bec7f12b29c5cc0a46d3116b Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Mon, 22 Apr 2019 11:09:44 -0700 Subject: [PATCH 0575/2016] BigQuery: ensure that `KeyboardInterrupt` during `to_dataframe`no longer hangs. (#7698) * fix: `KeyboardInterrupt` during `to_dataframe` (with BQ Storage API) no longer hangs I noticed in manually testing `to_dataframe` that it would stop the current cell when I hit Ctrl-C, but data kept on downloading in the background. Trying to exit the Python shell, I'd notice that it would hang until I pressed Ctrl-C a few more times. Rather than get the DataFrame for each stream in one big chunk, loop through each block and exit if the function needs to quit early. This follows the pattern at https://stackoverflow.com/a/29237343/101923 Update tests to ensure multiple progress interval loops. * Refactor _to_dataframe_bqstorage_stream --- .../google/cloud/bigquery/table.py | 67 ++++++- packages/google-cloud-bigquery/setup.py | 2 +- .../tests/unit/test_table.py | 177 +++++++++++++++++- 3 files changed, 228 insertions(+), 18 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py index 92af19c43ce5..742c1a3efad1 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py @@ -66,6 +66,7 @@ ) _TABLE_HAS_NO_SCHEMA = 'Table has no schema: call "client.get_table()"' _MARKER = object() +_PROGRESS_INTERVAL = 1.0 # Time between download status updates, in seconds. def _reference_getter(table): @@ -1386,6 +1387,27 @@ def _to_dataframe_tabledata_list(self, dtypes, progress_bar=None): return pandas.concat(frames) + def _to_dataframe_bqstorage_stream( + self, bqstorage_client, dtypes, columns, session, stream + ): + position = bigquery_storage_v1beta1.types.StreamPosition(stream=stream) + rowstream = bqstorage_client.read_rows(position).rows(session) + + frames = [] + for page in rowstream.pages: + if self._to_dataframe_finished: + return + frames.append(page.to_dataframe(dtypes=dtypes)) + + # Avoid errors on unlucky streams with no blocks. pandas.concat + # will fail on an empty list. + if not frames: + return pandas.DataFrame(columns=columns) + + # page.to_dataframe() does not preserve column order. Rearrange at + # the end using manually-parsed schema. + return pandas.concat(frames)[columns] + def _to_dataframe_bqstorage(self, bqstorage_client, dtypes): """Use (faster, but billable) BQ Storage API to construct DataFrame.""" if bigquery_storage_v1beta1 is None: @@ -1421,17 +1443,46 @@ def _to_dataframe_bqstorage(self, bqstorage_client, dtypes): if not session.streams: return pandas.DataFrame(columns=columns) - def get_dataframe(stream): - position = bigquery_storage_v1beta1.types.StreamPosition(stream=stream) - rowstream = bqstorage_client.read_rows(position) - return rowstream.to_dataframe(session, dtypes=dtypes) + # Use _to_dataframe_finished to notify worker threads when to quit. + # See: https://stackoverflow.com/a/29237343/101923 + self._to_dataframe_finished = False + + def get_frames(pool): + frames = [] + + # Manually submit jobs and wait for download to complete rather + # than using pool.map because pool.map continues running in the + # background even if there is an exception on the main thread. + # See: https://github.com/googleapis/google-cloud-python/pull/7698 + not_done = [ + pool.submit( + self._to_dataframe_bqstorage_stream, + bqstorage_client, + dtypes, + columns, + session, + stream, + ) + for stream in session.streams + ] + + while not_done: + done, not_done = concurrent.futures.wait( + not_done, timeout=_PROGRESS_INTERVAL + ) + frames.extend([future.result() for future in done]) + return frames with concurrent.futures.ThreadPoolExecutor() as pool: - frames = pool.map(get_dataframe, session.streams) + try: + frames = get_frames(pool) + finally: + # No need for a lock because reading/replacing a variable is + # defined to be an atomic operation in the Python language + # definition (enforced by the global interpreter lock). + self._to_dataframe_finished = True - # rowstream.to_dataframe() does not preserve column order. Rearrange at - # the end using manually-parsed schema. - return pandas.concat(frames)[columns] + return pandas.concat(frames) def _get_progress_bar(self, progress_bar_type): """Construct a tqdm progress bar object, if tqdm is installed.""" diff --git a/packages/google-cloud-bigquery/setup.py b/packages/google-cloud-bigquery/setup.py index 2c4d570d2c7e..b51fa63a9a75 100644 --- a/packages/google-cloud-bigquery/setup.py +++ b/packages/google-cloud-bigquery/setup.py @@ -37,7 +37,7 @@ ] extras = { "bqstorage": [ - "google-cloud-bigquery-storage >= 0.2.0dev1, <2.0.0dev", + "google-cloud-bigquery-storage >= 0.4.0, <2.0.0dev", "fastavro>=0.21.2", ], "pandas": ["pandas>=0.17.1"], diff --git a/packages/google-cloud-bigquery/tests/unit/test_table.py b/packages/google-cloud-bigquery/tests/unit/test_table.py index 7ac982394c9d..8bba2befccbc 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_table.py +++ b/packages/google-cloud-bigquery/tests/unit/test_table.py @@ -12,8 +12,10 @@ # See the License for the specific language governing permissions and # limitations under the License. +import concurrent.futures import itertools import json +import time import unittest import warnings @@ -1705,7 +1707,7 @@ def test_to_dataframe_error_if_pandas_is_none(self): @unittest.skipIf( bigquery_storage_v1beta1 is None, "Requires `google-cloud-bigquery-storage`" ) - def test_to_dataframe_w_bqstorage_empty(self): + def test_to_dataframe_w_bqstorage_no_streams(self): from google.cloud.bigquery import schema from google.cloud.bigquery import table as mut @@ -1746,18 +1748,70 @@ def test_to_dataframe_w_bqstorage_empty(self): @unittest.skipIf( bigquery_storage_v1beta1 is None, "Requires `google-cloud-bigquery-storage`" ) - def test_to_dataframe_w_bqstorage_nonempty(self): + def test_to_dataframe_w_bqstorage_empty_streams(self): from google.cloud.bigquery import schema from google.cloud.bigquery import table as mut from google.cloud.bigquery_storage_v1beta1 import reader + bqstorage_client = mock.create_autospec( + bigquery_storage_v1beta1.BigQueryStorageClient + ) + session = bigquery_storage_v1beta1.types.ReadSession( + streams=[{"name": "/projects/proj/dataset/dset/tables/tbl/streams/1234"}] + ) + session.avro_schema.schema = json.dumps( + { + "fields": [ + {"name": "colA"}, + # Not alphabetical to test column order. + {"name": "colC"}, + {"name": "colB"}, + ] + } + ) + bqstorage_client.create_read_session.return_value = session + mock_rowstream = mock.create_autospec(reader.ReadRowsStream) - mock_rowstream.to_dataframe.return_value = pandas.DataFrame( - [ - {"colA": 1, "colB": "abc", "colC": 2.0}, - {"colA": -1, "colB": "def", "colC": 4.0}, - ] + bqstorage_client.read_rows.return_value = mock_rowstream + + mock_rows = mock.create_autospec(reader.ReadRowsIterable) + mock_rowstream.rows.return_value = mock_rows + mock_pages = mock.PropertyMock(return_value=()) + type(mock_rows).pages = mock_pages + + schema = [ + schema.SchemaField("colA", "IGNORED"), + schema.SchemaField("colC", "IGNORED"), + schema.SchemaField("colB", "IGNORED"), + ] + + row_iterator = mut.RowIterator( + _mock_client(), + None, # api_request: ignored + None, # path: ignored + schema, + table=mut.TableReference.from_string("proj.dset.tbl"), + selected_fields=schema, ) + + got = row_iterator.to_dataframe(bqstorage_client) + + column_names = ["colA", "colC", "colB"] + self.assertEqual(list(got), column_names) + self.assertTrue(got.empty) + + @unittest.skipIf(pandas is None, "Requires `pandas`") + @unittest.skipIf( + bigquery_storage_v1beta1 is None, "Requires `google-cloud-bigquery-storage`" + ) + def test_to_dataframe_w_bqstorage_nonempty(self): + from google.cloud.bigquery import schema + from google.cloud.bigquery import table as mut + from google.cloud.bigquery_storage_v1beta1 import reader + + # Speed up testing. + mut._PROGRESS_INTERVAL = 0.01 + bqstorage_client = mock.create_autospec( bigquery_storage_v1beta1.BigQueryStorageClient ) @@ -1775,7 +1829,27 @@ def test_to_dataframe_w_bqstorage_nonempty(self): } ) bqstorage_client.create_read_session.return_value = session + + mock_rowstream = mock.create_autospec(reader.ReadRowsStream) bqstorage_client.read_rows.return_value = mock_rowstream + + mock_rows = mock.create_autospec(reader.ReadRowsIterable) + mock_rowstream.rows.return_value = mock_rows + + def blocking_to_dataframe(*args, **kwargs): + # Sleep for longer than the waiting interval so that we know we're + # only reading one page per loop at most. + time.sleep(2 * mut._PROGRESS_INTERVAL) + return pandas.DataFrame( + {"colA": [1, -1], "colB": ["abc", "def"], "colC": [2.0, 4.0]}, + columns=["colA", "colB", "colC"], + ) + + mock_page = mock.create_autospec(reader.ReadRowsPage) + mock_page.to_dataframe.side_effect = blocking_to_dataframe + mock_pages = mock.PropertyMock(return_value=(mock_page, mock_page, mock_page)) + type(mock_rows).pages = mock_pages + schema = [ schema.SchemaField("colA", "IGNORED"), schema.SchemaField("colC", "IGNORED"), @@ -1791,10 +1865,95 @@ def test_to_dataframe_w_bqstorage_nonempty(self): selected_fields=schema, ) - got = row_iterator.to_dataframe(bqstorage_client) + with mock.patch( + "concurrent.futures.wait", wraps=concurrent.futures.wait + ) as mock_wait: + got = row_iterator.to_dataframe(bqstorage_client=bqstorage_client) + column_names = ["colA", "colC", "colB"] self.assertEqual(list(got), column_names) - self.assertEqual(len(got.index), 2) + self.assertEqual(len(got.index), 6) + # Make sure that this test looped through multiple progress intervals. + self.assertGreaterEqual(mock_wait.call_count, 2) + + @unittest.skipIf(pandas is None, "Requires `pandas`") + @unittest.skipIf( + bigquery_storage_v1beta1 is None, "Requires `google-cloud-bigquery-storage`" + ) + def test_to_dataframe_w_bqstorage_exits_on_keyboardinterrupt(self): + from google.cloud.bigquery import schema + from google.cloud.bigquery import table as mut + from google.cloud.bigquery_storage_v1beta1 import reader + + # Speed up testing. + mut._PROGRESS_INTERVAL = 0.01 + + bqstorage_client = mock.create_autospec( + bigquery_storage_v1beta1.BigQueryStorageClient + ) + session = bigquery_storage_v1beta1.types.ReadSession( + streams=[ + # Use two streams because one will fail with a + # KeyboardInterrupt, and we want to check that the other stream + # ends early. + {"name": "/projects/proj/dataset/dset/tables/tbl/streams/1234"}, + {"name": "/projects/proj/dataset/dset/tables/tbl/streams/5678"}, + ] + ) + session.avro_schema.schema = json.dumps( + {"fields": [{"name": "colA"}, {"name": "colB"}, {"name": "colC"}]} + ) + bqstorage_client.create_read_session.return_value = session + + def blocking_to_dataframe(*args, **kwargs): + # Sleep for longer than the waiting interval so that we know we're + # only reading one page per loop at most. + time.sleep(2 * mut._PROGRESS_INTERVAL) + return pandas.DataFrame( + {"colA": [1, -1], "colB": ["abc", "def"], "colC": [2.0, 4.0]}, + columns=["colA", "colB", "colC"], + ) + + mock_page = mock.create_autospec(reader.ReadRowsPage) + mock_page.to_dataframe.side_effect = blocking_to_dataframe + mock_rows = mock.create_autospec(reader.ReadRowsIterable) + mock_pages = mock.PropertyMock(return_value=(mock_page, mock_page, mock_page)) + type(mock_rows).pages = mock_pages + mock_rowstream = mock.create_autospec(reader.ReadRowsStream) + mock_rowstream.rows.return_value = mock_rows + + mock_cancelled_rows = mock.create_autospec(reader.ReadRowsIterable) + mock_cancelled_pages = mock.PropertyMock(side_effect=KeyboardInterrupt) + type(mock_cancelled_rows).pages = mock_cancelled_pages + mock_cancelled_rowstream = mock.create_autospec(reader.ReadRowsStream) + mock_cancelled_rowstream.rows.return_value = mock_cancelled_rows + + bqstorage_client.read_rows.side_effect = ( + mock_cancelled_rowstream, + mock_rowstream, + ) + + schema = [ + schema.SchemaField("colA", "IGNORED"), + schema.SchemaField("colB", "IGNORED"), + schema.SchemaField("colC", "IGNORED"), + ] + + row_iterator = mut.RowIterator( + _mock_client(), + None, # api_request: ignored + None, # path: ignored + schema, + table=mut.TableReference.from_string("proj.dset.tbl"), + selected_fields=schema, + ) + + with pytest.raises(KeyboardInterrupt): + row_iterator.to_dataframe(bqstorage_client=bqstorage_client) + + # Should not have fetched the third page of results because exit_early + # should have been set. + self.assertLessEqual(mock_page.to_dataframe.call_count, 2) @unittest.skipIf(pandas is None, "Requires `pandas`") @unittest.skipIf( From 16ad45ec7074bfd4c4e8715faf9651dc118132a0 Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Mon, 22 Apr 2019 12:26:29 -0700 Subject: [PATCH 0576/2016] Close the to_dataframe progress bar when finished. (#7757) --- packages/google-cloud-bigquery/google/cloud/bigquery/table.py | 4 ++++ packages/google-cloud-bigquery/tests/unit/test_table.py | 1 + 2 files changed, 5 insertions(+) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py index 742c1a3efad1..101e754d118f 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py @@ -1385,6 +1385,10 @@ def _to_dataframe_tabledata_list(self, dtypes, progress_bar=None): progress_bar.total = progress_bar.total or self.total_rows progress_bar.update(len(current_frame)) + if progress_bar is not None: + # Indicate that the download has finished. + progress_bar.close() + return pandas.concat(frames) def _to_dataframe_bqstorage_stream( diff --git a/packages/google-cloud-bigquery/tests/unit/test_table.py b/packages/google-cloud-bigquery/tests/unit/test_table.py index 8bba2befccbc..ef397195882f 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_table.py +++ b/packages/google-cloud-bigquery/tests/unit/test_table.py @@ -1503,6 +1503,7 @@ def test_to_dataframe_progress_bar( progress_bar_mock.assert_called() progress_bar_mock().update.assert_called() + progress_bar_mock().close.assert_called_once() self.assertEqual(len(df), 4) @unittest.skipIf(pandas is None, "Requires `pandas`") From d0c0639e8c5513303637c5af3791703b214411f5 Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Tue, 23 Apr 2019 13:20:25 -0700 Subject: [PATCH 0577/2016] Refactor QueryJob.query to read from resource dictionary (#7763) This explicitly documents the QueryJob.query property and also aligns it with the other resource properties in the BigQuery client. --- .../google/cloud/bigquery/job.py | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/job.py b/packages/google-cloud-bigquery/google/cloud/bigquery/job.py index 90461513c0cc..d14119b1f1b1 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/job.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/job.py @@ -2357,7 +2357,10 @@ def __init__(self, job_id, query, client, job_config=None): if job_config.use_legacy_sql is None: job_config.use_legacy_sql = False - self.query = query + _helpers._set_sub_prop( + self._properties, ["configuration", "query", "query"], query + ) + self._configuration = job_config self._query_results = None self._done_timeout = None @@ -2424,6 +2427,17 @@ def priority(self): """ return self._configuration.priority + @property + def query(self): + """str: The query text used in this query job. + + See: + https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query.query + """ + return _helpers._get_sub_prop( + self._properties, ["configuration", "query", "query"] + ) + @property def query_parameters(self): """See @@ -2516,7 +2530,6 @@ def to_api_repr(self): def _copy_configuration_properties(self, configuration): """Helper: assign subclass configuration properties in cleaned.""" self._configuration._properties = copy.deepcopy(configuration) - self.query = _helpers._get_sub_prop(configuration, ["query", "query"]) @classmethod def from_api_repr(cls, resource, client): From 7a6cd44d133a2d4ac72129aceb4ffe34b91f1631 Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Tue, 23 Apr 2019 14:31:54 -0700 Subject: [PATCH 0578/2016] Respect `progress_bar_type` in `to_dataframe` when used with BQ Storage API (#7697) * fix: `to_dataframe` respects `progress_bar_type` with BQ Storage API * Add unit test for progress bar. * Add test for full queue. * Add worker queue for progress bar to prevent lost tqdm updates. The worker queue runs in a background thread, so it's more likely to be able to keep up with the other workers that are adding to the worker queue. * Test that progress bar updates more than once. --- .../google/cloud/bigquery/table.py | 101 +++++++++++++++- .../tests/unit/test_table.py | 114 ++++++++++++++++-- 2 files changed, 200 insertions(+), 15 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py index 101e754d118f..d50fec487a31 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py @@ -22,9 +22,12 @@ import datetime import json import operator +import threading +import time import warnings import six +from six.moves import queue try: from google.cloud import bigquery_storage_v1beta1 @@ -66,7 +69,12 @@ ) _TABLE_HAS_NO_SCHEMA = 'Table has no schema: call "client.get_table()"' _MARKER = object() -_PROGRESS_INTERVAL = 1.0 # Time between download status updates, in seconds. +_PROGRESS_INTERVAL = 0.2 # Time between download status updates, in seconds. + +# Send multiple updates from the worker threads, so there are at least a few +# waiting next time the prgrogess bar is updated. +_PROGRESS_UPDATES_PER_INTERVAL = 3 +_PROGRESS_WORKER_INTERVAL = _PROGRESS_INTERVAL / _PROGRESS_UPDATES_PER_INTERVAL def _reference_getter(table): @@ -1274,6 +1282,16 @@ def __repr__(self): return "Row({}, {})".format(self._xxx_values, f2i) +class _NoopProgressBarQueue(object): + """A fake Queue class that does nothing. + + This is used when there is no progress bar to send updates to. + """ + + def put_nowait(self, item): + """Don't actually do anything with the item.""" + + class RowIterator(HTTPIterator): """A class for iterating through HTTP/JSON API row list responses. @@ -1392,7 +1410,7 @@ def _to_dataframe_tabledata_list(self, dtypes, progress_bar=None): return pandas.concat(frames) def _to_dataframe_bqstorage_stream( - self, bqstorage_client, dtypes, columns, session, stream + self, bqstorage_client, dtypes, columns, session, stream, worker_queue ): position = bigquery_storage_v1beta1.types.StreamPosition(stream=stream) rowstream = bqstorage_client.read_rows(position).rows(session) @@ -1403,6 +1421,13 @@ def _to_dataframe_bqstorage_stream( return frames.append(page.to_dataframe(dtypes=dtypes)) + try: + worker_queue.put_nowait(page.num_items) + except queue.Full: + # It's okay if we miss a few progress updates. Don't slow + # down parsing for that. + pass + # Avoid errors on unlucky streams with no blocks. pandas.concat # will fail on an empty list. if not frames: @@ -1412,7 +1437,47 @@ def _to_dataframe_bqstorage_stream( # the end using manually-parsed schema. return pandas.concat(frames)[columns] - def _to_dataframe_bqstorage(self, bqstorage_client, dtypes): + def _process_worker_updates(self, worker_queue, progress_queue): + last_update_time = time.time() + current_update = 0 + + # Sum all updates in a contant loop. + while True: + try: + current_update += worker_queue.get(timeout=_PROGRESS_INTERVAL) + + # Time to send to the progress bar queue? + current_time = time.time() + elapsed_time = current_time - last_update_time + if elapsed_time > _PROGRESS_WORKER_INTERVAL: + progress_queue.put(current_update) + last_update_time = current_time + current_update = 0 + + except queue.Empty: + # Keep going, unless there probably aren't going to be any + # additional updates. + if self._to_dataframe_finished: + progress_queue.put(current_update) + return + + def _process_progress_updates(self, progress_queue, progress_bar): + if progress_bar is None: + return + + # Output all updates since the last interval. + while True: + try: + next_update = progress_queue.get_nowait() + progress_bar.update(next_update) + except queue.Empty: + break + + if self._to_dataframe_finished: + progress_bar.close() + return + + def _to_dataframe_bqstorage(self, bqstorage_client, dtypes, progress_bar=None): """Use (faster, but billable) BQ Storage API to construct DataFrame.""" if bigquery_storage_v1beta1 is None: raise ValueError(_NO_BQSTORAGE_ERROR) @@ -1451,6 +1516,18 @@ def _to_dataframe_bqstorage(self, bqstorage_client, dtypes): # See: https://stackoverflow.com/a/29237343/101923 self._to_dataframe_finished = False + # Create a queue to track progress updates across threads. + worker_queue = _NoopProgressBarQueue() + progress_queue = None + progress_thread = None + if progress_bar is not None: + worker_queue = queue.Queue() + progress_queue = queue.Queue() + progress_thread = threading.Thread( + target=self._process_worker_updates, args=(worker_queue, progress_queue) + ) + progress_thread.start() + def get_frames(pool): frames = [] @@ -1466,6 +1543,7 @@ def get_frames(pool): columns, session, stream, + worker_queue, ) for stream in session.streams ] @@ -1475,6 +1553,11 @@ def get_frames(pool): not_done, timeout=_PROGRESS_INTERVAL ) frames.extend([future.result() for future in done]) + + # The progress bar needs to update on the main thread to avoid + # contention over stdout / stderr. + self._process_progress_updates(progress_queue, progress_bar) + return frames with concurrent.futures.ThreadPoolExecutor() as pool: @@ -1486,6 +1569,14 @@ def get_frames(pool): # definition (enforced by the global interpreter lock). self._to_dataframe_finished = True + # Shutdown all background threads, now that they should know to + # exit early. + pool.shutdown(wait=True) + if progress_thread is not None: + progress_thread.join() + + # Update the progress bar one last time to close it. + self._process_progress_updates(progress_queue, progress_bar) return pandas.concat(frames) def _get_progress_bar(self, progress_bar_type): @@ -1585,7 +1676,9 @@ def to_dataframe(self, bqstorage_client=None, dtypes=None, progress_bar_type=Non if bqstorage_client is not None: try: - return self._to_dataframe_bqstorage(bqstorage_client, dtypes) + return self._to_dataframe_bqstorage( + bqstorage_client, dtypes, progress_bar=progress_bar + ) except google.api_core.exceptions.Forbidden: # Don't hide errors such as insufficient permissions to create # a read session, or the API is not enabled. Both of those are diff --git a/packages/google-cloud-bigquery/tests/unit/test_table.py b/packages/google-cloud-bigquery/tests/unit/test_table.py index ef397195882f..18ca125e804c 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_table.py +++ b/packages/google-cloud-bigquery/tests/unit/test_table.py @@ -22,6 +22,7 @@ import mock import pytest import six +from six.moves import queue import google.api_core.exceptions @@ -1816,9 +1817,12 @@ def test_to_dataframe_w_bqstorage_nonempty(self): bqstorage_client = mock.create_autospec( bigquery_storage_v1beta1.BigQueryStorageClient ) - session = bigquery_storage_v1beta1.types.ReadSession( - streams=[{"name": "/projects/proj/dataset/dset/tables/tbl/streams/1234"}] - ) + streams = [ + # Use two streams we want to check frames are read from each stream. + {"name": "/projects/proj/dataset/dset/tables/tbl/streams/1234"}, + {"name": "/projects/proj/dataset/dset/tables/tbl/streams/5678"}, + ] + session = bigquery_storage_v1beta1.types.ReadSession(streams=streams) session.avro_schema.schema = json.dumps( { "fields": [ @@ -1836,20 +1840,25 @@ def test_to_dataframe_w_bqstorage_nonempty(self): mock_rows = mock.create_autospec(reader.ReadRowsIterable) mock_rowstream.rows.return_value = mock_rows + page_items = [ + {"colA": 1, "colB": "abc", "colC": 2.0}, + {"colA": -1, "colB": "def", "colC": 4.0}, + ] def blocking_to_dataframe(*args, **kwargs): # Sleep for longer than the waiting interval so that we know we're # only reading one page per loop at most. time.sleep(2 * mut._PROGRESS_INTERVAL) - return pandas.DataFrame( - {"colA": [1, -1], "colB": ["abc", "def"], "colC": [2.0, 4.0]}, - columns=["colA", "colB", "colC"], - ) + return pandas.DataFrame(page_items, columns=["colA", "colB", "colC"]) mock_page = mock.create_autospec(reader.ReadRowsPage) mock_page.to_dataframe.side_effect = blocking_to_dataframe - mock_pages = mock.PropertyMock(return_value=(mock_page, mock_page, mock_page)) - type(mock_rows).pages = mock_pages + mock_pages = (mock_page, mock_page, mock_page) + type(mock_rows).pages = mock.PropertyMock(return_value=mock_pages) + + # Test that full queue errors are ignored. + mock_queue = mock.create_autospec(mut._NoopProgressBarQueue) + mock_queue().put_nowait.side_effect = queue.Full schema = [ schema.SchemaField("colA", "IGNORED"), @@ -1866,17 +1875,100 @@ def blocking_to_dataframe(*args, **kwargs): selected_fields=schema, ) - with mock.patch( + with mock.patch.object(mut, "_NoopProgressBarQueue", mock_queue), mock.patch( "concurrent.futures.wait", wraps=concurrent.futures.wait ) as mock_wait: got = row_iterator.to_dataframe(bqstorage_client=bqstorage_client) + # Are the columns in the expected order? column_names = ["colA", "colC", "colB"] self.assertEqual(list(got), column_names) - self.assertEqual(len(got.index), 6) + + # Have expected number of rows? + total_pages = len(streams) * len(mock_pages) + total_rows = len(page_items) * total_pages + self.assertEqual(len(got.index), total_rows) + # Make sure that this test looped through multiple progress intervals. self.assertGreaterEqual(mock_wait.call_count, 2) + # Make sure that this test pushed to the progress queue. + self.assertEqual(mock_queue().put_nowait.call_count, total_pages) + + @unittest.skipIf(pandas is None, "Requires `pandas`") + @unittest.skipIf( + bigquery_storage_v1beta1 is None, "Requires `google-cloud-bigquery-storage`" + ) + @unittest.skipIf(tqdm is None, "Requires `tqdm`") + @mock.patch("tqdm.tqdm") + def test_to_dataframe_w_bqstorage_updates_progress_bar(self, tqdm_mock): + from google.cloud.bigquery import schema + from google.cloud.bigquery import table as mut + from google.cloud.bigquery_storage_v1beta1 import reader + + # Speed up testing. + mut._PROGRESS_INTERVAL = 0.01 + + bqstorage_client = mock.create_autospec( + bigquery_storage_v1beta1.BigQueryStorageClient + ) + streams = [ + # Use two streams we want to check that progress bar updates are + # sent from each stream. + {"name": "/projects/proj/dataset/dset/tables/tbl/streams/1234"}, + {"name": "/projects/proj/dataset/dset/tables/tbl/streams/5678"}, + ] + session = bigquery_storage_v1beta1.types.ReadSession(streams=streams) + session.avro_schema.schema = json.dumps({"fields": [{"name": "testcol"}]}) + bqstorage_client.create_read_session.return_value = session + + mock_rowstream = mock.create_autospec(reader.ReadRowsStream) + bqstorage_client.read_rows.return_value = mock_rowstream + + mock_rows = mock.create_autospec(reader.ReadRowsIterable) + mock_rowstream.rows.return_value = mock_rows + mock_page = mock.create_autospec(reader.ReadRowsPage) + page_items = [-1, 0, 1] + type(mock_page).num_items = mock.PropertyMock(return_value=len(page_items)) + + def blocking_to_dataframe(*args, **kwargs): + # Sleep for longer than the waiting interval. This ensures the + # progress_queue gets written to more than once because it gives + # the worker->progress updater time to sum intermediate updates. + time.sleep(2 * mut._PROGRESS_INTERVAL) + return pandas.DataFrame({"testcol": page_items}) + + mock_page.to_dataframe.side_effect = blocking_to_dataframe + mock_pages = (mock_page, mock_page, mock_page, mock_page, mock_page) + type(mock_rows).pages = mock.PropertyMock(return_value=mock_pages) + + schema = [schema.SchemaField("testcol", "IGNORED")] + + row_iterator = mut.RowIterator( + _mock_client(), + None, # api_request: ignored + None, # path: ignored + schema, + table=mut.TableReference.from_string("proj.dset.tbl"), + selected_fields=schema, + ) + + row_iterator.to_dataframe( + bqstorage_client=bqstorage_client, progress_bar_type="tqdm" + ) + + # Make sure that this test updated the progress bar once per page from + # each stream. + total_pages = len(streams) * len(mock_pages) + expected_total_rows = total_pages * len(page_items) + progress_updates = [ + args[0] for args, kwargs in tqdm_mock().update.call_args_list + ] + # Should have sent >1 update due to delay in blocking_to_dataframe. + self.assertGreater(len(progress_updates), 1) + self.assertEqual(sum(progress_updates), expected_total_rows) + tqdm_mock().close.assert_called_once() + @unittest.skipIf(pandas is None, "Requires `pandas`") @unittest.skipIf( bigquery_storage_v1beta1 is None, "Requires `google-cloud-bigquery-storage`" From 101ecec9d8f8b48a7596bb0418b2850a759cf232 Mon Sep 17 00:00:00 2001 From: lbristol88 Date: Wed, 24 Apr 2019 17:10:41 -0700 Subject: [PATCH 0579/2016] Phase 1 for storing schemas for later use. (#7761) * Added functions to client for loading and saving schemas to a file. * Tests for schema to/from json. --- .../google/cloud/bigquery/client.py | 47 +++++ .../tests/unit/test_client.py | 180 ++++++++++++++++++ 2 files changed, 227 insertions(+) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py index 86e131438f32..bb6a375975f2 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py @@ -23,6 +23,8 @@ import functools import gzip +import io +import json import os import tempfile import uuid @@ -50,6 +52,7 @@ from google.cloud.bigquery.model import ModelReference from google.cloud.bigquery.query import _QueryResults from google.cloud.bigquery.retry import DEFAULT_RETRY +from google.cloud.bigquery.schema import SchemaField from google.cloud.bigquery.table import _table_arg_to_table from google.cloud.bigquery.table import _table_arg_to_table_ref from google.cloud.bigquery.table import Table @@ -1929,6 +1932,50 @@ def list_rows( ) return row_iterator + def _schema_from_json_file_object(self, file_obj): + """Helper function for schema_from_json that takes a + file object that describes a table schema. + + Returns: + List of schema field objects. + """ + json_data = json.load(file_obj) + return [SchemaField.from_api_repr(field) for field in json_data] + + def _schema_to_json_file_object(self, schema_list, file_obj): + """Helper function for schema_to_json that takes a schema list and file + object and writes the schema list to the file object with json.dump + """ + json.dump(schema_list, file_obj, indent=2, sort_keys=True) + + def schema_from_json(self, file_or_path): + """Takes a file object or file path that contains json that describes + a table schema. + + Returns: + List of schema field objects. + """ + if isinstance(file_or_path, io.IOBase): + return self._schema_from_json_file_object(file_or_path) + + with open(file_or_path) as file_obj: + return self._schema_from_json_file_object(file_obj) + + def schema_to_json(self, schema_list, destination): + """Takes a list of schema field objects. + + Serializes the list of schema field objects as json to a file. + + Destination is a file path or a file object. + """ + json_schema_list = [f.to_api_repr() for f in schema_list] + + if isinstance(destination, io.IOBase): + return self._schema_to_json_file_object(json_schema_list, destination) + + with open(destination, mode="w") as file_obj: + return self._schema_to_json_file_object(json_schema_list, file_obj) + # pylint: disable=unused-argument def _item_to_project(iterator, resource): diff --git a/packages/google-cloud-bigquery/tests/unit/test_client.py b/packages/google-cloud-bigquery/tests/unit/test_client.py index 46734079a03d..45e80f1a37a3 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_client.py +++ b/packages/google-cloud-bigquery/tests/unit/test_client.py @@ -5161,3 +5161,183 @@ def test__do_multipart_upload_wrong_size(self): with pytest.raises(ValueError): client._do_multipart_upload(file_obj, {}, file_obj_len + 1, None) + + def test_schema_from_json_with_file_path(self): + from google.cloud.bigquery.schema import SchemaField + + file_content = """[ + { + "description": "quarter", + "mode": "REQUIRED", + "name": "qtr", + "type": "STRING" + }, + { + "description": "sales representative", + "mode": "NULLABLE", + "name": "rep", + "type": "STRING" + }, + { + "description": "total sales", + "mode": "NULLABLE", + "name": "sales", + "type": "FLOAT" + } + ]""" + + expected = [ + SchemaField("qtr", "STRING", "REQUIRED", "quarter"), + SchemaField("rep", "STRING", "NULLABLE", "sales representative"), + SchemaField("sales", "FLOAT", "NULLABLE", "total sales"), + ] + + client = self._make_client() + mock_file_path = "/mocked/file.json" + + if six.PY2: + open_patch = mock.patch( + "__builtin__.open", mock.mock_open(read_data=file_content) + ) + else: + open_patch = mock.patch( + "builtins.open", new=mock.mock_open(read_data=file_content) + ) + + with open_patch as _mock_file: + actual = client.schema_from_json(mock_file_path) + _mock_file.assert_called_once_with(mock_file_path) + # This assert is to make sure __exit__ is called in the context + # manager that opens the file in the function + _mock_file().__exit__.assert_called_once() + + assert expected == actual + + def test_schema_from_json_with_file_object(self): + from google.cloud.bigquery.schema import SchemaField + + file_content = """[ + { + "description": "quarter", + "mode": "REQUIRED", + "name": "qtr", + "type": "STRING" + }, + { + "description": "sales representative", + "mode": "NULLABLE", + "name": "rep", + "type": "STRING" + }, + { + "description": "total sales", + "mode": "NULLABLE", + "name": "sales", + "type": "FLOAT" + } + ]""" + + expected = [ + SchemaField("qtr", "STRING", "REQUIRED", "quarter"), + SchemaField("rep", "STRING", "NULLABLE", "sales representative"), + SchemaField("sales", "FLOAT", "NULLABLE", "total sales"), + ] + + client = self._make_client() + + if six.PY2: + fake_file = io.BytesIO(file_content) + else: + fake_file = io.StringIO(file_content) + + actual = client.schema_from_json(fake_file) + + assert expected == actual + + def test_schema_to_json_with_file_path(self): + from google.cloud.bigquery.schema import SchemaField + + file_content = [ + { + "description": "quarter", + "mode": "REQUIRED", + "name": "qtr", + "type": "STRING", + }, + { + "description": "sales representative", + "mode": "NULLABLE", + "name": "rep", + "type": "STRING", + }, + { + "description": "total sales", + "mode": "NULLABLE", + "name": "sales", + "type": "FLOAT", + }, + ] + + schema_list = [ + SchemaField("qtr", "STRING", "REQUIRED", "quarter"), + SchemaField("rep", "STRING", "NULLABLE", "sales representative"), + SchemaField("sales", "FLOAT", "NULLABLE", "total sales"), + ] + + client = self._make_client() + mock_file_path = "/mocked/file.json" + + if six.PY2: + open_patch = mock.patch("__builtin__.open", mock.mock_open()) + else: + open_patch = mock.patch("builtins.open", mock.mock_open()) + + with open_patch as mock_file, mock.patch("json.dump") as mock_dump: + client.schema_to_json(schema_list, mock_file_path) + mock_file.assert_called_once_with(mock_file_path, mode="w") + # This assert is to make sure __exit__ is called in the context + # manager that opens the file in the function + mock_file().__exit__.assert_called_once() + mock_dump.assert_called_with( + file_content, mock_file.return_value, indent=2, sort_keys=True + ) + + def test_schema_to_json_with_file_object(self): + from google.cloud.bigquery.schema import SchemaField + + file_content = [ + { + "description": "quarter", + "mode": "REQUIRED", + "name": "qtr", + "type": "STRING", + }, + { + "description": "sales representative", + "mode": "NULLABLE", + "name": "rep", + "type": "STRING", + }, + { + "description": "total sales", + "mode": "NULLABLE", + "name": "sales", + "type": "FLOAT", + }, + ] + + schema_list = [ + SchemaField("qtr", "STRING", "REQUIRED", "quarter"), + SchemaField("rep", "STRING", "NULLABLE", "sales representative"), + SchemaField("sales", "FLOAT", "NULLABLE", "total sales"), + ] + + if six.PY2: + fake_file = io.BytesIO() + else: + fake_file = io.StringIO() + + client = self._make_client() + + client.schema_to_json(schema_list, fake_file) + assert file_content == json.loads(fake_file.getvalue()) From 685e753b1aa23234eaffbdee1bdd2e05e795ab21 Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Thu, 25 Apr 2019 08:31:06 -0700 Subject: [PATCH 0580/2016] Cleanup client tests with method to create minimal table resource (#7802) * Cleanup client tests with method to create minimal table resource This removes a lot of repeated code that wasn't relevant to the actual tests. * Remove unused import. --- .../tests/unit/test_client.py | 314 ++++++------------ 1 file changed, 105 insertions(+), 209 deletions(-) diff --git a/packages/google-cloud-bigquery/tests/unit/test_client.py b/packages/google-cloud-bigquery/tests/unit/test_client.py index 45e80f1a37a3..ac2e036cdb9d 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_client.py +++ b/packages/google-cloud-bigquery/tests/unit/test_client.py @@ -96,6 +96,16 @@ def _get_target_class(): def _make_one(self, *args, **kw): return self._get_target_class()(*args, **kw) + def _make_table_resource(self): + return { + "id": "%s:%s:%s" % (self.PROJECT, self.DS_ID, self.TABLE_ID), + "tableReference": { + "projectId": self.PROJECT, + "datasetId": self.DS_ID, + "tableId": self.TABLE_ID, + }, + } + def test_ctor_defaults(self): from google.cloud.bigquery._http import Connection @@ -869,14 +879,7 @@ def test_create_table_w_day_partition(self): path = "projects/%s/datasets/%s/tables" % (self.PROJECT, self.DS_ID) creds = _make_credentials() client = self._make_one(project=self.PROJECT, credentials=creds) - resource = { - "id": "%s:%s:%s" % (self.PROJECT, self.DS_ID, self.TABLE_ID), - "tableReference": { - "projectId": self.PROJECT, - "datasetId": self.DS_ID, - "tableId": self.TABLE_ID, - }, - } + resource = self._make_table_resource() conn = client._connection = _make_connection(resource) table = Table(self.TABLE_REF) table.time_partitioning = TimePartitioning() @@ -907,15 +910,8 @@ def test_create_table_w_custom_property(self): path = "projects/%s/datasets/%s/tables" % (self.PROJECT, self.DS_ID) creds = _make_credentials() client = self._make_one(project=self.PROJECT, credentials=creds) - resource = { - "id": "%s:%s:%s" % (self.PROJECT, self.DS_ID, self.TABLE_ID), - "tableReference": { - "projectId": self.PROJECT, - "datasetId": self.DS_ID, - "tableId": self.TABLE_ID, - }, - "newAlphaProperty": "unreleased property", - } + resource = self._make_table_resource() + resource["newAlphaProperty"] = "unreleased property" conn = client._connection = _make_connection(resource) table = Table(self.TABLE_REF) table._properties["newAlphaProperty"] = "unreleased property" @@ -945,14 +941,7 @@ def test_create_table_w_encryption_configuration(self): path = "projects/%s/datasets/%s/tables" % (self.PROJECT, self.DS_ID) creds = _make_credentials() client = self._make_one(project=self.PROJECT, credentials=creds) - resource = { - "id": "%s:%s:%s" % (self.PROJECT, self.DS_ID, self.TABLE_ID), - "tableReference": { - "projectId": self.PROJECT, - "datasetId": self.DS_ID, - "tableId": self.TABLE_ID, - }, - } + resource = self._make_table_resource() conn = client._connection = _make_connection(resource) table = Table(self.TABLE_REF) table.encryption_configuration = EncryptionConfiguration( @@ -983,14 +972,7 @@ def test_create_table_w_day_partition_and_expire(self): path = "projects/%s/datasets/%s/tables" % (self.PROJECT, self.DS_ID) creds = _make_credentials() client = self._make_one(project=self.PROJECT, credentials=creds) - resource = { - "id": "%s:%s:%s" % (self.PROJECT, self.DS_ID, self.TABLE_ID), - "tableReference": { - "projectId": self.PROJECT, - "datasetId": self.DS_ID, - "tableId": self.TABLE_ID, - }, - } + resource = self._make_table_resource() conn = client._connection = _make_connection(resource) table = Table(self.TABLE_REF) table.time_partitioning = TimePartitioning(expiration_ms=100) @@ -1021,31 +1003,28 @@ def test_create_table_w_schema_and_query(self): query = "SELECT * from %s:%s" % (self.DS_ID, self.TABLE_ID) creds = _make_credentials() client = self._make_one(project=self.PROJECT, credentials=creds) - resource = { - "id": "%s:%s:%s" % (self.PROJECT, self.DS_ID, self.TABLE_ID), - "tableReference": { - "projectId": self.PROJECT, - "datasetId": self.DS_ID, - "tableId": self.TABLE_ID, - }, - "schema": { - "fields": [ - { - "name": "full_name", - "type": "STRING", - "mode": "REQUIRED", - "description": None, - }, - { - "name": "age", - "type": "INTEGER", - "mode": "REQUIRED", - "description": None, - }, - ] - }, - "view": {"query": query}, - } + resource = self._make_table_resource() + resource.update( + { + "schema": { + "fields": [ + { + "name": "full_name", + "type": "STRING", + "mode": "REQUIRED", + "description": None, + }, + { + "name": "age", + "type": "INTEGER", + "mode": "REQUIRED", + "description": None, + }, + ] + }, + "view": {"query": query}, + } + ) schema = [ SchemaField("full_name", "STRING", mode="REQUIRED"), SchemaField("age", "INTEGER", mode="REQUIRED"), @@ -1099,18 +1078,15 @@ def test_create_table_w_external(self): path = "projects/%s/datasets/%s/tables" % (self.PROJECT, self.DS_ID) creds = _make_credentials() client = self._make_one(project=self.PROJECT, credentials=creds) - resource = { - "id": "%s:%s:%s" % (self.PROJECT, self.DS_ID, self.TABLE_ID), - "tableReference": { - "projectId": self.PROJECT, - "datasetId": self.DS_ID, - "tableId": self.TABLE_ID, - }, - "externalDataConfiguration": { - "sourceFormat": SourceFormat.CSV, - "autodetect": True, - }, - } + resource = self._make_table_resource() + resource.update( + { + "externalDataConfiguration": { + "sourceFormat": SourceFormat.CSV, + "autodetect": True, + } + } + ) conn = client._connection = _make_connection(resource) table = Table(self.TABLE_REF) ec = ExternalConfig("CSV") @@ -1147,14 +1123,7 @@ def test_create_table_w_reference(self): path = "projects/%s/datasets/%s/tables" % (self.PROJECT, self.DS_ID) creds = _make_credentials() client = self._make_one(project=self.PROJECT, credentials=creds) - resource = { - "id": "%s:%s:%s" % (self.PROJECT, self.DS_ID, self.TABLE_ID), - "tableReference": { - "projectId": self.PROJECT, - "datasetId": self.DS_ID, - "tableId": self.TABLE_ID, - }, - } + resource = self._make_table_resource() conn = client._connection = _make_connection(resource) got = client.create_table(self.TABLE_REF) @@ -1177,14 +1146,7 @@ def test_create_table_w_fully_qualified_string(self): path = "projects/%s/datasets/%s/tables" % (self.PROJECT, self.DS_ID) creds = _make_credentials() client = self._make_one(project=self.PROJECT, credentials=creds) - resource = { - "id": "%s:%s:%s" % (self.PROJECT, self.DS_ID, self.TABLE_ID), - "tableReference": { - "projectId": self.PROJECT, - "datasetId": self.DS_ID, - "tableId": self.TABLE_ID, - }, - } + resource = self._make_table_resource() conn = client._connection = _make_connection(resource) got = client.create_table( @@ -1209,14 +1171,7 @@ def test_create_table_w_string(self): path = "projects/%s/datasets/%s/tables" % (self.PROJECT, self.DS_ID) creds = _make_credentials() client = self._make_one(project=self.PROJECT, credentials=creds) - resource = { - "id": "%s:%s:%s" % (self.PROJECT, self.DS_ID, self.TABLE_ID), - "tableReference": { - "projectId": self.PROJECT, - "datasetId": self.DS_ID, - "tableId": self.TABLE_ID, - }, - } + resource = self._make_table_resource() conn = client._connection = _make_connection(resource) got = client.create_table("{}.{}".format(self.DS_ID, self.TABLE_ID)) @@ -1266,14 +1221,7 @@ def test_create_table_alreadyexists_w_exists_ok_true(self): get_path = "/projects/{}/datasets/{}/tables/{}".format( self.PROJECT, self.DS_ID, self.TABLE_ID ) - resource = { - "id": "%s:%s:%s" % (self.PROJECT, self.DS_ID, self.TABLE_ID), - "tableReference": { - "projectId": self.PROJECT, - "datasetId": self.DS_ID, - "tableId": self.TABLE_ID, - }, - } + resource = self._make_table_resource() creds = _make_credentials() client = self._make_one( project=self.PROJECT, credentials=creds, location=self.LOCATION @@ -1365,14 +1313,7 @@ def test_get_table(self): creds = _make_credentials() http = object() client = self._make_one(project=self.PROJECT, credentials=creds, _http=http) - resource = { - "id": "%s:%s:%s" % (self.PROJECT, self.DS_ID, self.TABLE_ID), - "tableReference": { - "projectId": self.PROJECT, - "datasetId": self.DS_ID, - "tableId": self.TABLE_ID, - }, - } + resource = self._make_table_resource() conn = client._connection = _make_connection(resource) table = client.get_table(self.TABLE_REF) @@ -1542,34 +1483,31 @@ def test_update_table(self): ) description = "description" title = "title" - resource = { - "id": "%s:%s:%s" % (self.PROJECT, self.DS_ID, self.TABLE_ID), - "tableReference": { - "projectId": self.PROJECT, - "datasetId": self.DS_ID, - "tableId": self.TABLE_ID, - }, - "schema": { - "fields": [ - { - "name": "full_name", - "type": "STRING", - "mode": "REQUIRED", - "description": None, - }, - { - "name": "age", - "type": "INTEGER", - "mode": "REQUIRED", - "description": None, - }, - ] - }, - "etag": "etag", - "description": description, - "friendlyName": title, - "labels": {"x": "y"}, - } + resource = self._make_table_resource() + resource.update( + { + "schema": { + "fields": [ + { + "name": "full_name", + "type": "STRING", + "mode": "REQUIRED", + "description": None, + }, + { + "name": "age", + "type": "INTEGER", + "mode": "REQUIRED", + "description": None, + }, + ] + }, + "etag": "etag", + "description": description, + "friendlyName": title, + "labels": {"x": "y"}, + } + ) schema = [ SchemaField("full_name", "STRING", mode="REQUIRED"), SchemaField("age", "INTEGER", mode="REQUIRED"), @@ -1629,15 +1567,8 @@ def test_update_table_w_custom_property(self): self.DS_ID, self.TABLE_ID, ) - resource = { - "id": "%s:%s:%s" % (self.PROJECT, self.DS_ID, self.TABLE_ID), - "tableReference": { - "projectId": self.PROJECT, - "datasetId": self.DS_ID, - "tableId": self.TABLE_ID, - }, - "newAlphaProperty": "unreleased property", - } + resource = self._make_table_resource() + resource["newAlphaProperty"] = "unreleased property" creds = _make_credentials() client = self._make_one(project=self.PROJECT, credentials=creds) conn = client._connection = _make_connection(resource) @@ -1664,15 +1595,8 @@ def test_update_table_only_use_legacy_sql(self): self.DS_ID, self.TABLE_ID, ) - resource = { - "id": "%s:%s:%s" % (self.PROJECT, self.DS_ID, self.TABLE_ID), - "tableReference": { - "projectId": self.PROJECT, - "datasetId": self.DS_ID, - "tableId": self.TABLE_ID, - }, - "view": {"useLegacySql": True}, - } + resource = self._make_table_resource() + resource["view"] = {"useLegacySql": True} creds = _make_credentials() client = self._make_one(project=self.PROJECT, credentials=creds) conn = client._connection = _make_connection(resource) @@ -1723,18 +1647,15 @@ def test_update_table_w_query(self): SchemaField("full_name", "STRING", mode="REQUIRED"), SchemaField("age", "INTEGER", mode="REQUIRED"), ] - resource = { - "id": "%s:%s:%s" % (self.PROJECT, self.DS_ID, self.TABLE_ID), - "tableReference": { - "projectId": self.PROJECT, - "datasetId": self.DS_ID, - "tableId": self.TABLE_ID, - }, - "schema": schema_resource, - "view": {"query": query, "useLegacySql": True}, - "location": location, - "expirationTime": _millis(exp_time), - } + resource = self._make_table_resource() + resource.update( + { + "schema": schema_resource, + "view": {"query": query, "useLegacySql": True}, + "location": location, + "expirationTime": _millis(exp_time), + } + ) creds = _make_credentials() client = self._make_one(project=self.PROJECT, credentials=creds) conn = client._connection = _make_connection(resource) @@ -1771,28 +1692,18 @@ def test_update_table_w_schema_None(self): self.DS_ID, self.TABLE_ID, ) - resource1 = { - "id": "%s:%s:%s" % (self.PROJECT, self.DS_ID, self.TABLE_ID), - "tableReference": { - "projectId": self.PROJECT, - "datasetId": self.DS_ID, - "tableId": self.TABLE_ID, - }, - "schema": { - "fields": [ - {"name": "full_name", "type": "STRING", "mode": "REQUIRED"}, - {"name": "age", "type": "INTEGER", "mode": "REQUIRED"}, - ] - }, - } - resource2 = { - "id": "%s:%s:%s" % (self.PROJECT, self.DS_ID, self.TABLE_ID), - "tableReference": { - "projectId": self.PROJECT, - "datasetId": self.DS_ID, - "tableId": self.TABLE_ID, - }, - } + resource1 = self._make_table_resource() + resource1.update( + { + "schema": { + "fields": [ + {"name": "full_name", "type": "STRING", "mode": "REQUIRED"}, + {"name": "age", "type": "INTEGER", "mode": "REQUIRED"}, + ] + } + } + ) + resource2 = self._make_table_resource() creds = _make_credentials() client = self._make_one(project=self.PROJECT, credentials=creds) conn = client._connection = _make_connection(resource1, resource2) @@ -1826,25 +1737,10 @@ def test_update_table_delete_property(self): self.DS_ID, self.TABLE_ID, ) - resource1 = { - "id": "%s:%s:%s" % (self.PROJECT, self.DS_ID, self.TABLE_ID), - "tableReference": { - "projectId": self.PROJECT, - "datasetId": self.DS_ID, - "tableId": self.TABLE_ID, - }, - "description": description, - "friendlyName": title, - } - resource2 = { - "id": "%s:%s:%s" % (self.PROJECT, self.DS_ID, self.TABLE_ID), - "tableReference": { - "projectId": self.PROJECT, - "datasetId": self.DS_ID, - "tableId": self.TABLE_ID, - }, - "description": None, - } + resource1 = self._make_table_resource() + resource1.update({"description": description, "friendlyName": title}) + resource2 = self._make_table_resource() + resource2["description"] = None creds = _make_credentials() client = self._make_one(project=self.PROJECT, credentials=creds) conn = client._connection = _make_connection(resource1, resource2) From 151bbe3180cb788560ad2d8697331bd798824704 Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Tue, 30 Apr 2019 07:39:03 -0700 Subject: [PATCH 0581/2016] Add `client_info` to BigQuery constructor for user-amenable user agent headers (#7806) * Add client_info to BigQuery constructor for user-amenable user agent headers This aligns BigQuery's behavior regarding the User-Agent and X-Goog-Api-Client headers with that of the GAPIC-based clients. Old: X-Goog-API-Client: gl-python/3.7.2 gccl/1.11.2 User-Agent: gcloud-python/0.29.1 New: X-Goog-API-Client: optional-application-id/1.2.3 gl-python/3.7.2 grpc/1.20.0 gax/1.9.0 gapic/1.11.2 gccl/1.11.2 User-Agent: optional-application-id/1.2.3 gl-python/3.7.2 grpc/1.20.0 gax/1.9.0 gapic/1.11.2 gccl/1.11.2 In order to set the `optional-application-id/1.2.3`, the latest version of `api_core` is required, but since that's an uncommon usecase and it doesn't break, just ignore the custom User-Agent if an older version is used, I didn't update the minimum version `setup.py`. * flake8 & blacken --- .../google/cloud/bigquery/_http.py | 36 +++++++++-- .../google/cloud/bigquery/client.py | 8 ++- .../tests/unit/test__http.py | 61 ++++++++++++++++++- .../tests/unit/test_client.py | 34 +++++++++++ 4 files changed, 132 insertions(+), 7 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/_http.py b/packages/google-cloud-bigquery/google/cloud/bigquery/_http.py index 1dd7524542a5..0e5475f5f54b 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/_http.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/_http.py @@ -14,14 +14,12 @@ """Create / interact with Google BigQuery connections.""" +import google.api_core.gapic_v1.client_info from google.cloud import _http from google.cloud.bigquery import __version__ -_CLIENT_INFO = _http.CLIENT_INFO_TEMPLATE.format(__version__) - - class Connection(_http.JSONConnection): """A connection to Google BigQuery via the JSON REST API. @@ -29,6 +27,19 @@ class Connection(_http.JSONConnection): :param client: The client that owns the current connection. """ + def __init__(self, client, client_info=None): + super(Connection, self).__init__(client) + + if client_info is None: + client_info = google.api_core.gapic_v1.client_info.ClientInfo( + gapic_version=__version__, client_library_version=__version__ + ) + else: + client_info.gapic_version = __version__ + client_info.client_library_version = __version__ + self._client_info = client_info + self._extra_headers = {} + API_BASE_URL = "https://www.googleapis.com" """The base of the API call URL.""" @@ -38,4 +49,21 @@ class Connection(_http.JSONConnection): API_URL_TEMPLATE = "{api_base_url}/bigquery/{api_version}{path}" """A template for the URL of a particular API call.""" - _EXTRA_HEADERS = {_http.CLIENT_INFO_HEADER: _CLIENT_INFO} + @property + def USER_AGENT(self): + return self._client_info.to_user_agent() + + @USER_AGENT.setter + def USER_AGENT(self, value): + self._client_info.user_agent = value + + @property + def _EXTRA_HEADERS(self): + self._extra_headers[ + _http.CLIENT_INFO_HEADER + ] = self._client_info.to_user_agent() + return self._extra_headers + + @_EXTRA_HEADERS.setter + def _EXTRA_HEADERS(self, value): + self._extra_headers = value diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py index bb6a375975f2..db53dab9ef11 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py @@ -128,6 +128,11 @@ class Client(ClientWithProject): default_query_job_config (google.cloud.bigquery.job.QueryJobConfig): (Optional) Default ``QueryJobConfig``. Will be merged into job configs passed into the ``query`` method. + client_info (google.api_core.gapic_v1.client_info.ClientInfo): + The client info used to send a user-agent string along with API + requests. If ``None``, then default info will be used. Generally, + you only need to set this if you're developing your own library + or partner tool. Raises: google.auth.exceptions.DefaultCredentialsError: @@ -148,11 +153,12 @@ def __init__( _http=None, location=None, default_query_job_config=None, + client_info=None, ): super(Client, self).__init__( project=project, credentials=credentials, _http=_http ) - self._connection = Connection(self) + self._connection = Connection(self, client_info=client_info) self._location = location self._default_query_job_config = default_query_job_config diff --git a/packages/google-cloud-bigquery/tests/unit/test__http.py b/packages/google-cloud-bigquery/tests/unit/test__http.py index 890046ee05fa..d7d25ea445a0 100644 --- a/packages/google-cloud-bigquery/tests/unit/test__http.py +++ b/packages/google-cloud-bigquery/tests/unit/test__http.py @@ -45,9 +45,36 @@ def test_build_api_url_w_extra_query_params(self): parms = dict(parse_qsl(qs)) self.assertEqual(parms["bar"], "baz") + def test_user_agent(self): + from google.cloud import _http as base_http + + http = mock.create_autospec(requests.Session, instance=True) + response = requests.Response() + response.status_code = 200 + data = b"brent-spiner" + response._content = data + http.request.return_value = response + client = mock.Mock(_http=http, spec=["_http"]) + + conn = self._make_one(client) + conn.USER_AGENT = "my-application/1.2.3" + req_data = "req-data-boring" + result = conn.api_request("GET", "/rainbow", data=req_data, expect_json=False) + self.assertEqual(result, data) + + expected_headers = { + "Accept-Encoding": "gzip", + base_http.CLIENT_INFO_HEADER: conn.USER_AGENT, + "User-Agent": conn.USER_AGENT, + } + expected_uri = conn.build_api_url("/rainbow") + http.request.assert_called_once_with( + data=req_data, headers=expected_headers, method="GET", url=expected_uri + ) + self.assertIn("my-application/1.2.3", conn.USER_AGENT) + def test_extra_headers(self): from google.cloud import _http as base_http - from google.cloud.bigquery import _http as MUT http = mock.create_autospec(requests.Session, instance=True) response = requests.Response() @@ -58,14 +85,44 @@ def test_extra_headers(self): client = mock.Mock(_http=http, spec=["_http"]) conn = self._make_one(client) + conn._EXTRA_HEADERS["x-test-header"] = "a test value" + req_data = "req-data-boring" + result = conn.api_request("GET", "/rainbow", data=req_data, expect_json=False) + self.assertEqual(result, data) + + expected_headers = { + "Accept-Encoding": "gzip", + base_http.CLIENT_INFO_HEADER: conn.USER_AGENT, + "User-Agent": conn.USER_AGENT, + "x-test-header": "a test value", + } + expected_uri = conn.build_api_url("/rainbow") + http.request.assert_called_once_with( + data=req_data, headers=expected_headers, method="GET", url=expected_uri + ) + + def test_extra_headers_replace(self): + from google.cloud import _http as base_http + + http = mock.create_autospec(requests.Session, instance=True) + response = requests.Response() + response.status_code = 200 + data = b"brent-spiner" + response._content = data + http.request.return_value = response + client = mock.Mock(_http=http, spec=["_http"]) + + conn = self._make_one(client) + conn._EXTRA_HEADERS = {"x-test-header": "a test value"} req_data = "req-data-boring" result = conn.api_request("GET", "/rainbow", data=req_data, expect_json=False) self.assertEqual(result, data) expected_headers = { "Accept-Encoding": "gzip", - base_http.CLIENT_INFO_HEADER: MUT._CLIENT_INFO, + base_http.CLIENT_INFO_HEADER: conn.USER_AGENT, "User-Agent": conn.USER_AGENT, + "x-test-header": "a test value", } expected_uri = conn.build_api_url("/rainbow") http.request.assert_called_once_with( diff --git a/packages/google-cloud-bigquery/tests/unit/test_client.py b/packages/google-cloud-bigquery/tests/unit/test_client.py index ac2e036cdb9d..08c36e0ac277 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_client.py +++ b/packages/google-cloud-bigquery/tests/unit/test_client.py @@ -22,6 +22,7 @@ import unittest import mock +import requests import six from six.moves import http_client import pytest @@ -37,6 +38,7 @@ pyarrow = None import google.api_core.exceptions +from google.api_core.gapic_v1 import client_info import google.cloud._helpers from google.cloud.bigquery.dataset import DatasetReference @@ -1320,6 +1322,38 @@ def test_get_table(self): conn.api_request.assert_called_once_with(method="GET", path="/%s" % path) self.assertEqual(table.table_id, self.TABLE_ID) + def test_get_table_sets_user_agent(self): + creds = _make_credentials() + http = mock.create_autospec(requests.Session) + mock_response = http.request( + url=mock.ANY, method=mock.ANY, headers=mock.ANY, data=mock.ANY + ) + http.reset_mock() + mock_response.status_code = 200 + mock_response.json.return_value = self._make_table_resource() + user_agent_override = client_info.ClientInfo(user_agent="my-application/1.2.3") + client = self._make_one( + project=self.PROJECT, + credentials=creds, + client_info=user_agent_override, + _http=http, + ) + + client.get_table(self.TABLE_REF) + + expected_user_agent = user_agent_override.to_user_agent() + http.request.assert_called_once_with( + url=mock.ANY, + method="GET", + headers={ + "X-Goog-API-Client": expected_user_agent, + "Accept-Encoding": "gzip", + "User-Agent": expected_user_agent, + }, + data=mock.ANY, + ) + self.assertIn("my-application/1.2.3", expected_user_agent) + def test_update_dataset_w_invalid_field(self): from google.cloud.bigquery.dataset import Dataset From ae05e0dc371d7978ffcdd36a022ff204818ee005 Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Tue, 30 Apr 2019 10:51:51 -0700 Subject: [PATCH 0582/2016] Preserve order in `to_dataframe` with BQ Storage from queries containing `ORDER BY` (#7793) * Preserve order in `to_dataframe` with BQ Storage and queries containing ORDER BY This fixes an issue where due to reading from multiple stream in parallel, the order of rows is not preserved. Normally this isn't an issue, but it is when the rows are query results from an ORDER BY query. * Compile regex. * Assert based on truthiness not equality. --- .../google/cloud/bigquery/job.py | 31 ++- .../google/cloud/bigquery/table.py | 10 +- .../tests/unit/test_job.py | 180 +++++++++++++++--- 3 files changed, 192 insertions(+), 29 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/job.py b/packages/google-cloud-bigquery/google/cloud/bigquery/job.py index d14119b1f1b1..18f22270feac 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/job.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/job.py @@ -15,6 +15,7 @@ """Define API Jobs.""" import copy +import re import threading import six @@ -45,6 +46,7 @@ _DONE_STATE = "DONE" _STOPPED_REASON = "stopped" _TIMEOUT_BUFFER_SECS = 0.1 +_CONTAINS_ORDER_BY = re.compile(r"ORDER\s+BY", re.IGNORECASE) _ERROR_REASON_TO_EXCEPTION = { "accessDenied": http_client.FORBIDDEN, @@ -92,6 +94,29 @@ def _error_result_to_exception(error_result): ) +def _contains_order_by(query): + """Do we need to preserve the order of the query results? + + This function has known false positives, such as with ordered window + functions: + + .. code-block:: sql + + SELECT SUM(x) OVER ( + window_name + PARTITION BY... + ORDER BY... + window_frame_clause) + FROM ... + + This false positive failure case means the behavior will be correct, but + downloading results with the BigQuery Storage API may be slower than it + otherwise would. This is preferable to the false negative case, where + results are expected to be in order but are not (due to parallel reads). + """ + return query and _CONTAINS_ORDER_BY.search(query) + + class Compression(object): """The compression type to use for exported files. The default value is :attr:`NONE`. @@ -2546,7 +2571,7 @@ def from_api_repr(cls, resource, client): :returns: Job parsed from ``resource``. """ job_id, config = cls._get_resource_config(resource) - query = config["query"]["query"] + query = _helpers._get_sub_prop(config, ["query", "query"]) job = cls(job_id, query, client=client) job._set_properties(resource) return job @@ -2849,7 +2874,9 @@ def result(self, timeout=None, retry=DEFAULT_RETRY): dest_table_ref = self.destination dest_table = Table(dest_table_ref, schema=schema) dest_table._properties["numRows"] = self._query_results.total_rows - return self._client.list_rows(dest_table, retry=retry) + rows = self._client.list_rows(dest_table, retry=retry) + rows._preserve_order = _contains_order_by(self.query) + return rows def to_dataframe(self, bqstorage_client=None, dtypes=None, progress_bar_type=None): """Return a pandas DataFrame from a QueryJob diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py index d50fec487a31..46213d5fe8bf 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py @@ -1348,6 +1348,7 @@ def __init__( ) self._field_to_index = _helpers._field_to_index_mapping(schema) self._page_size = page_size + self._preserve_order = False self._project = client.project self._schema = schema self._selected_fields = selected_fields @@ -1496,10 +1497,15 @@ def _to_dataframe_bqstorage(self, bqstorage_client, dtypes, progress_bar=None): for field in self._selected_fields: read_options.selected_fields.append(field.name) + requested_streams = 0 + if self._preserve_order: + requested_streams = 1 + session = bqstorage_client.create_read_session( self._table.to_bqstorage(), "projects/{}".format(self._project), read_options=read_options, + requested_streams=requested_streams, ) # We need to parse the schema manually so that we can rearrange the @@ -1512,6 +1518,8 @@ def _to_dataframe_bqstorage(self, bqstorage_client, dtypes, progress_bar=None): if not session.streams: return pandas.DataFrame(columns=columns) + total_streams = len(session.streams) + # Use _to_dataframe_finished to notify worker threads when to quit. # See: https://stackoverflow.com/a/29237343/101923 self._to_dataframe_finished = False @@ -1560,7 +1568,7 @@ def get_frames(pool): return frames - with concurrent.futures.ThreadPoolExecutor() as pool: + with concurrent.futures.ThreadPoolExecutor(max_workers=total_streams) as pool: try: frames = get_frames(pool) finally: diff --git a/packages/google-cloud-bigquery/tests/unit/test_job.py b/packages/google-cloud-bigquery/tests/unit/test_job.py index a30c026a82c0..bb6f03f3efb3 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_job.py +++ b/packages/google-cloud-bigquery/tests/unit/test_job.py @@ -17,6 +17,7 @@ import unittest import mock +import pytest from six.moves import http_client try: @@ -59,6 +60,47 @@ def _make_connection(*responses): return mock_conn +def _make_job_resource( + creation_time_ms=1437767599006, + started_time_ms=1437767600007, + ended_time_ms=1437767601008, + started=False, + ended=False, + etag="abc-def-hjk", + endpoint="https://www.googleapis.com", + job_type="load", + job_id="a-random-id", + project_id="some-project", + user_email="bq-user@example.com", +): + resource = { + "configuration": {job_type: {}}, + "statistics": {"creationTime": creation_time_ms, job_type: {}}, + "etag": etag, + "id": "{}:{}".format(project_id, job_id), + "jobReference": {"projectId": project_id, "jobId": job_id}, + "selfLink": "{}/bigquery/v2/projects/{}/jobs/{}".format( + endpoint, project_id, job_id + ), + "user_email": user_email, + } + + if started or ended: + resource["statistics"]["startTime"] = started_time_ms + + if ended: + resource["statistics"]["endTime"] = ended_time_ms + + if job_type == "query": + resource["configuration"]["query"]["destinationTable"] = { + "projectId": project_id, + "datasetId": "_temp_dataset", + "tableId": "_temp_table", + } + + return resource + + class Test__error_result_to_exception(unittest.TestCase): def _call_fut(self, *args, **kwargs): from google.cloud.bigquery import job @@ -974,6 +1016,7 @@ class _Base(object): from google.cloud.bigquery.dataset import DatasetReference from google.cloud.bigquery.table import TableReference + ENDPOINT = "https://www.googleapis.com" PROJECT = "project" SOURCE1 = "http://example.com/source1.csv" DS_ID = "dataset_id" @@ -994,7 +1037,9 @@ def _setUpConstants(self): self.WHEN = datetime.datetime.utcfromtimestamp(self.WHEN_TS).replace(tzinfo=UTC) self.ETAG = "ETAG" self.FULL_JOB_ID = "%s:%s" % (self.PROJECT, self.JOB_ID) - self.RESOURCE_URL = "http://example.com/path/to/resource" + self.RESOURCE_URL = "{}/bigquery/v2/projects/{}/jobs/{}".format( + self.ENDPOINT, self.PROJECT, self.JOB_ID + ) self.USER_EMAIL = "phred@example.com" def _table_ref(self, table_id): @@ -1004,30 +1049,19 @@ def _table_ref(self, table_id): def _make_resource(self, started=False, ended=False): self._setUpConstants() - resource = { - "configuration": {self.JOB_TYPE: {}}, - "statistics": {"creationTime": self.WHEN_TS * 1000, self.JOB_TYPE: {}}, - "etag": self.ETAG, - "id": self.FULL_JOB_ID, - "jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID}, - "selfLink": self.RESOURCE_URL, - "user_email": self.USER_EMAIL, - } - - if started or ended: - resource["statistics"]["startTime"] = self.WHEN_TS * 1000 - - if ended: - resource["statistics"]["endTime"] = (self.WHEN_TS + 1000) * 1000 - - if self.JOB_TYPE == "query": - resource["configuration"]["query"]["destinationTable"] = { - "projectId": self.PROJECT, - "datasetId": "_temp_dataset", - "tableId": "_temp_table", - } - - return resource + return _make_job_resource( + creation_time_ms=int(self.WHEN_TS * 1000), + started_time_ms=int(self.WHEN_TS * 1000), + ended_time_ms=int(self.WHEN_TS * 1000) + 1000000, + started=started, + ended=ended, + etag=self.ETAG, + endpoint=self.ENDPOINT, + job_type=self.JOB_TYPE, + job_id=self.JOB_ID, + project_id=self.PROJECT, + user_email=self.USER_EMAIL, + ) def _verifyInitialReadonlyProperties(self, job): # root elements of resource @@ -4684,7 +4718,11 @@ def test_to_dataframe_bqstorage(self): job.to_dataframe(bqstorage_client=bqstorage_client) bqstorage_client.create_read_session.assert_called_once_with( - mock.ANY, "projects/{}".format(self.PROJECT), read_options=mock.ANY + mock.ANY, + "projects/{}".format(self.PROJECT), + read_options=mock.ANY, + # Use default number of streams for best performance. + requested_streams=0, ) @unittest.skipIf(pandas is None, "Requires `pandas`") @@ -5039,3 +5077,93 @@ def test_from_api_repr_normal(self): self.assertEqual(entry.pending_units, self.PENDING_UNITS) self.assertEqual(entry.completed_units, self.COMPLETED_UNITS) self.assertEqual(entry.slot_millis, self.SLOT_MILLIS) + + +@pytest.mark.parametrize( + "query,expected", + ( + (None, False), + ("", False), + ("select name, age from table", False), + ("select name, age from table LIMIT 10;", False), + ("select name, age from table order by other_column;", True), + ("Select name, age From table Order By other_column", True), + ("SELECT name, age FROM table ORDER BY other_column;", True), + ("select name, age from table order\nby other_column", True), + ("Select name, age From table Order\nBy other_column;", True), + ("SELECT name, age FROM table ORDER\nBY other_column", True), + ("SelecT name, age froM table OrdeR \n\t BY other_column;", True), + ), +) +def test__contains_order_by(query, expected): + from google.cloud.bigquery import job as mut + + if expected: + assert mut._contains_order_by(query) + else: + assert not mut._contains_order_by(query) + + +@pytest.mark.skipif(pandas is None, reason="Requires `pandas`") +@pytest.mark.skipif( + bigquery_storage_v1beta1 is None, reason="Requires `google-cloud-bigquery-storage`" +) +@pytest.mark.parametrize( + "query", + ( + "select name, age from table order by other_column;", + "Select name, age From table Order By other_column;", + "SELECT name, age FROM table ORDER BY other_column;", + "select name, age from table order\nby other_column;", + "Select name, age From table Order\nBy other_column;", + "SELECT name, age FROM table ORDER\nBY other_column;", + "SelecT name, age froM table OrdeR \n\t BY other_column;", + ), +) +def test_to_dataframe_bqstorage_preserve_order(query): + from google.cloud.bigquery.job import QueryJob as target_class + + job_resource = _make_job_resource( + project_id="test-project", job_type="query", ended=True + ) + job_resource["configuration"]["query"]["query"] = query + job_resource["status"] = {"state": "DONE"} + get_query_results_resource = { + "jobComplete": True, + "jobReference": {"projectId": "test-project", "jobId": "test-job"}, + "schema": { + "fields": [ + {"name": "name", "type": "STRING", "mode": "NULLABLE"}, + {"name": "age", "type": "INTEGER", "mode": "NULLABLE"}, + ] + }, + "totalRows": "4", + } + connection = _make_connection(get_query_results_resource, job_resource) + client = _make_client(connection=connection) + job = target_class.from_api_repr(job_resource, client) + bqstorage_client = mock.create_autospec( + bigquery_storage_v1beta1.BigQueryStorageClient + ) + session = bigquery_storage_v1beta1.types.ReadSession() + session.avro_schema.schema = json.dumps( + { + "type": "record", + "name": "__root__", + "fields": [ + {"name": "name", "type": ["null", "string"]}, + {"name": "age", "type": ["null", "long"]}, + ], + } + ) + bqstorage_client.create_read_session.return_value = session + + job.to_dataframe(bqstorage_client=bqstorage_client) + + bqstorage_client.create_read_session.assert_called_once_with( + mock.ANY, + "projects/test-project", + read_options=mock.ANY, + # Use a single stream to preserve row order. + requested_streams=1, + ) From 947cd1d598f6a145f1ccc3e5701dc9e4271ec69d Mon Sep 17 00:00:00 2001 From: Tres Seaver Date: Tue, 7 May 2019 11:32:23 -0400 Subject: [PATCH 0583/2016] Refactor 'client_info' support. (#7849) * Add 'user_agent' and 'extra_headers' properties to 'Connection'. Deprecate the 'USER_AGENT' and '_EXTRA_HEADERS' class-level attributes. * Add 'client_info' parameter to 'Connection' ctor. * Implement 'Connection.user_agent' via its '_client_info'. * Ensure 'X-Goog-API-Client' header is always passed. * Create/use non-GAPIC-specific 'ClientInfo' class FBO HTTP/JSON clients. Derive the existing GAPIC class from it. --- .../google/cloud/bigquery/_http.py | 36 +++------------- .../google/cloud/bigquery/client.py | 6 +-- .../tests/unit/test__http.py | 42 ++++--------------- .../tests/unit/test_client.py | 10 +++-- 4 files changed, 22 insertions(+), 72 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/_http.py b/packages/google-cloud-bigquery/google/cloud/bigquery/_http.py index 0e5475f5f54b..643b24920bee 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/_http.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/_http.py @@ -14,7 +14,6 @@ """Create / interact with Google BigQuery connections.""" -import google.api_core.gapic_v1.client_info from google.cloud import _http from google.cloud.bigquery import __version__ @@ -25,20 +24,16 @@ class Connection(_http.JSONConnection): :type client: :class:`~google.cloud.bigquery.client.Client` :param client: The client that owns the current connection. + + :type client_info: :class:`~google.api_core.client_info.ClientInfo` + :param client_info: (Optional) instance used to generate user agent. """ def __init__(self, client, client_info=None): - super(Connection, self).__init__(client) + super(Connection, self).__init__(client, client_info) - if client_info is None: - client_info = google.api_core.gapic_v1.client_info.ClientInfo( - gapic_version=__version__, client_library_version=__version__ - ) - else: - client_info.gapic_version = __version__ - client_info.client_library_version = __version__ - self._client_info = client_info - self._extra_headers = {} + self._client_info.gapic_version = __version__ + self._client_info.client_library_version = __version__ API_BASE_URL = "https://www.googleapis.com" """The base of the API call URL.""" @@ -48,22 +43,3 @@ def __init__(self, client, client_info=None): API_URL_TEMPLATE = "{api_base_url}/bigquery/{api_version}{path}" """A template for the URL of a particular API call.""" - - @property - def USER_AGENT(self): - return self._client_info.to_user_agent() - - @USER_AGENT.setter - def USER_AGENT(self, value): - self._client_info.user_agent = value - - @property - def _EXTRA_HEADERS(self): - self._extra_headers[ - _http.CLIENT_INFO_HEADER - ] = self._client_info.to_user_agent() - return self._extra_headers - - @_EXTRA_HEADERS.setter - def _EXTRA_HEADERS(self, value): - self._extra_headers = value diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py index db53dab9ef11..f61c18f11bd4 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py @@ -128,7 +128,7 @@ class Client(ClientWithProject): default_query_job_config (google.cloud.bigquery.job.QueryJobConfig): (Optional) Default ``QueryJobConfig``. Will be merged into job configs passed into the ``query`` method. - client_info (google.api_core.gapic_v1.client_info.ClientInfo): + client_info (google.api_core.client_info.ClientInfo): The client info used to send a user-agent string along with API requests. If ``None``, then default info will be used. Generally, you only need to set this if you're developing your own library @@ -1363,7 +1363,7 @@ def _initiate_resumable_upload(self, stream, metadata, num_retries): """ chunk_size = _DEFAULT_CHUNKSIZE transport = self._http - headers = _get_upload_headers(self._connection.USER_AGENT) + headers = _get_upload_headers(self._connection.user_agent) upload_url = _RESUMABLE_URL_TEMPLATE.format(project=self.project) # TODO: modify ResumableUpload to take a retry.Retry object # that it can use for the initial RPC. @@ -1409,7 +1409,7 @@ def _do_multipart_upload(self, stream, metadata, size, num_retries): msg = _READ_LESS_THAN_SIZE.format(size, len(data)) raise ValueError(msg) - headers = _get_upload_headers(self._connection.USER_AGENT) + headers = _get_upload_headers(self._connection.user_agent) upload_url = _MULTIPART_URL_TEMPLATE.format(project=self.project) upload = MultipartUpload(upload_url, headers=headers) diff --git a/packages/google-cloud-bigquery/tests/unit/test__http.py b/packages/google-cloud-bigquery/tests/unit/test__http.py index d7d25ea445a0..939b5668e1e2 100644 --- a/packages/google-cloud-bigquery/tests/unit/test__http.py +++ b/packages/google-cloud-bigquery/tests/unit/test__http.py @@ -57,49 +57,21 @@ def test_user_agent(self): client = mock.Mock(_http=http, spec=["_http"]) conn = self._make_one(client) - conn.USER_AGENT = "my-application/1.2.3" + conn.user_agent = "my-application/1.2.3" req_data = "req-data-boring" result = conn.api_request("GET", "/rainbow", data=req_data, expect_json=False) self.assertEqual(result, data) expected_headers = { "Accept-Encoding": "gzip", - base_http.CLIENT_INFO_HEADER: conn.USER_AGENT, - "User-Agent": conn.USER_AGENT, - } - expected_uri = conn.build_api_url("/rainbow") - http.request.assert_called_once_with( - data=req_data, headers=expected_headers, method="GET", url=expected_uri - ) - self.assertIn("my-application/1.2.3", conn.USER_AGENT) - - def test_extra_headers(self): - from google.cloud import _http as base_http - - http = mock.create_autospec(requests.Session, instance=True) - response = requests.Response() - response.status_code = 200 - data = b"brent-spiner" - response._content = data - http.request.return_value = response - client = mock.Mock(_http=http, spec=["_http"]) - - conn = self._make_one(client) - conn._EXTRA_HEADERS["x-test-header"] = "a test value" - req_data = "req-data-boring" - result = conn.api_request("GET", "/rainbow", data=req_data, expect_json=False) - self.assertEqual(result, data) - - expected_headers = { - "Accept-Encoding": "gzip", - base_http.CLIENT_INFO_HEADER: conn.USER_AGENT, - "User-Agent": conn.USER_AGENT, - "x-test-header": "a test value", + base_http.CLIENT_INFO_HEADER: conn.user_agent, + "User-Agent": conn.user_agent, } expected_uri = conn.build_api_url("/rainbow") http.request.assert_called_once_with( data=req_data, headers=expected_headers, method="GET", url=expected_uri ) + self.assertIn("my-application/1.2.3", conn.user_agent) def test_extra_headers_replace(self): from google.cloud import _http as base_http @@ -113,15 +85,15 @@ def test_extra_headers_replace(self): client = mock.Mock(_http=http, spec=["_http"]) conn = self._make_one(client) - conn._EXTRA_HEADERS = {"x-test-header": "a test value"} + conn.extra_headers = {"x-test-header": "a test value"} req_data = "req-data-boring" result = conn.api_request("GET", "/rainbow", data=req_data, expect_json=False) self.assertEqual(result, data) expected_headers = { "Accept-Encoding": "gzip", - base_http.CLIENT_INFO_HEADER: conn.USER_AGENT, - "User-Agent": conn.USER_AGENT, + base_http.CLIENT_INFO_HEADER: conn.user_agent, + "User-Agent": conn.user_agent, "x-test-header": "a test value", } expected_uri = conn.build_api_url("/rainbow") diff --git a/packages/google-cloud-bigquery/tests/unit/test_client.py b/packages/google-cloud-bigquery/tests/unit/test_client.py index 08c36e0ac277..13889f90d7e8 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_client.py +++ b/packages/google-cloud-bigquery/tests/unit/test_client.py @@ -54,7 +54,7 @@ def _make_connection(*responses): from google.cloud.exceptions import NotFound mock_conn = mock.create_autospec(google.cloud.bigquery._http.Connection) - mock_conn.USER_AGENT = "testing 1.2.3" + mock_conn.user_agent = "testing 1.2.3" mock_conn.api_request.side_effect = list(responses) + [NotFound("miss")] return mock_conn @@ -2752,7 +2752,7 @@ def _initiate_resumable_upload_helper(self, num_retries=None): + "/jobs?uploadType=resumable" ) self.assertEqual(upload.upload_url, upload_url) - expected_headers = _get_upload_headers(conn.USER_AGENT) + expected_headers = _get_upload_headers(conn.user_agent) self.assertEqual(upload._headers, expected_headers) self.assertFalse(upload.finished) self.assertEqual(upload._chunk_size, _DEFAULT_CHUNKSIZE) @@ -2830,7 +2830,7 @@ def _do_multipart_upload_success_helper(self, get_boundary, num_retries=None): + b"\r\n" + b"--==0==--" ) - headers = _get_upload_headers(conn.USER_AGENT) + headers = _get_upload_headers(conn.user_agent) headers["content-type"] = b'multipart/related; boundary="==0=="' fake_transport.request.assert_called_once_with( "POST", upload_url, data=payload, headers=headers @@ -4518,7 +4518,9 @@ def test_list_rows_with_missing_schema(self): self.assertIsNone(rows[2].age, msg=repr(table)) def test_list_rows_error(self): - client = self._make_one() + creds = _make_credentials() + http = object() + client = self._make_one(project=self.PROJECT, credentials=creds, _http=http) # neither Table nor tableReference with self.assertRaises(TypeError): From 0b62133c676d57a87157aeed7cec944bed3745c8 Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Wed, 8 May 2019 17:05:24 -0700 Subject: [PATCH 0584/2016] Prevent error when time partitioning is populated with empty dict (#7904) * Prevent error when time partitioning is populated with empty dict As reported in internal bug 131167013, calling `table.time_partitioning` can sometimes fail with `KeyError: 'type'` when attempting to read this property on a non-partitioned table. * Add tests for TimePartitioning.from_api_repr * Remove unused imports * Speed up lint session by installing local deps inplace. --- .../google/cloud/bigquery/table.py | 4 +- packages/google-cloud-bigquery/noxfile.py | 6 +- .../tests/unit/test_table.py | 68 +++++++++++++++++++ 3 files changed, 74 insertions(+), 4 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py index 46213d5fe8bf..864fff4458b1 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py @@ -1787,7 +1787,7 @@ def type_(self): """google.cloud.bigquery.table.TimePartitioningType: The type of time partitioning to use. """ - return self._properties["type"] + return self._properties.get("type") @type_.setter def type_(self, value): @@ -1849,7 +1849,7 @@ def from_api_repr(cls, api_repr): google.cloud.bigquery.table.TimePartitioning: The ``TimePartitioning`` object. """ - instance = cls(api_repr["type"]) + instance = cls() instance._properties = api_repr return instance diff --git a/packages/google-cloud-bigquery/noxfile.py b/packages/google-cloud-bigquery/noxfile.py index 39e5f4548c0b..a9df7a67cfcc 100644 --- a/packages/google-cloud-bigquery/noxfile.py +++ b/packages/google-cloud-bigquery/noxfile.py @@ -141,8 +141,10 @@ def lint(session): serious code quality issues. """ - session.install("black", "flake8", *LOCAL_DEPS) - session.install(".") + session.install("black", "flake8") + for local_dep in LOCAL_DEPS: + session.install("-e", local_dep) + session.install("-e", ".") session.run("flake8", os.path.join("google", "cloud", "bigquery")) session.run("flake8", "tests") session.run("flake8", os.path.join("docs", "snippets.py")) diff --git a/packages/google-cloud-bigquery/tests/unit/test_table.py b/packages/google-cloud-bigquery/tests/unit/test_table.py index 18ca125e804c..07a625b98825 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_table.py +++ b/packages/google-cloud-bigquery/tests/unit/test_table.py @@ -871,6 +871,54 @@ def test__build_resource_w_custom_field_not_in__properties(self): with self.assertRaises(ValueError): table._build_resource(["bad"]) + def test_time_partitioning_getter(self): + from google.cloud.bigquery.table import TimePartitioning + from google.cloud.bigquery.table import TimePartitioningType + + dataset = DatasetReference(self.PROJECT, self.DS_ID) + table_ref = dataset.table(self.TABLE_NAME) + table = self._make_one(table_ref) + + table._properties["timePartitioning"] = { + "type": "DAY", + "field": "col1", + "expirationMs": "123456", + "requirePartitionFilter": False, + } + self.assertIsInstance(table.time_partitioning, TimePartitioning) + self.assertEqual(table.time_partitioning.type_, TimePartitioningType.DAY) + self.assertEqual(table.time_partitioning.field, "col1") + self.assertEqual(table.time_partitioning.expiration_ms, 123456) + self.assertFalse(table.time_partitioning.require_partition_filter) + + def test_time_partitioning_getter_w_none(self): + dataset = DatasetReference(self.PROJECT, self.DS_ID) + table_ref = dataset.table(self.TABLE_NAME) + table = self._make_one(table_ref) + + table._properties["timePartitioning"] = None + self.assertIsNone(table.time_partitioning) + + del table._properties["timePartitioning"] + self.assertIsNone(table.time_partitioning) + + def test_time_partitioning_getter_w_empty(self): + from google.cloud.bigquery.table import TimePartitioning + + dataset = DatasetReference(self.PROJECT, self.DS_ID) + table_ref = dataset.table(self.TABLE_NAME) + table = self._make_one(table_ref) + + # Even though there are required properties according to the API + # specification, sometimes time partitioning is populated as an empty + # object. See internal bug 131167013. + table._properties["timePartitioning"] = {} + self.assertIsInstance(table.time_partitioning, TimePartitioning) + self.assertIsNone(table.time_partitioning.type_) + self.assertIsNone(table.time_partitioning.field) + self.assertIsNone(table.time_partitioning.expiration_ms) + self.assertIsNone(table.time_partitioning.require_partition_filter) + def test_time_partitioning_setter(self): from google.cloud.bigquery.table import TimePartitioning from google.cloud.bigquery.table import TimePartitioningType @@ -2211,6 +2259,20 @@ def test_constructor_explicit(self): self.assertEqual(time_partitioning.expiration_ms, 10000) self.assertTrue(time_partitioning.require_partition_filter) + def test_from_api_repr_empty(self): + klass = self._get_target_class() + + # Even though there are required properties according to the API + # specification, sometimes time partitioning is populated as an empty + # object. See internal bug 131167013. + api_repr = {} + time_partitioning = klass.from_api_repr(api_repr) + + self.assertIsNone(time_partitioning.type_) + self.assertIsNone(time_partitioning.field) + self.assertIsNone(time_partitioning.expiration_ms) + self.assertIsNone(time_partitioning.require_partition_filter) + def test_from_api_repr_minimal(self): from google.cloud.bigquery.table import TimePartitioningType @@ -2223,6 +2285,12 @@ def test_from_api_repr_minimal(self): self.assertIsNone(time_partitioning.expiration_ms) self.assertIsNone(time_partitioning.require_partition_filter) + def test_from_api_repr_doesnt_override_type(self): + klass = self._get_target_class() + api_repr = {"type": "HOUR"} + time_partitioning = klass.from_api_repr(api_repr) + self.assertEqual(time_partitioning.type_, "HOUR") + def test_from_api_repr_explicit(self): from google.cloud.bigquery.table import TimePartitioningType From 7634566bf05a2f8252f0a887418d992a68c53817 Mon Sep 17 00:00:00 2001 From: Erik Nilsson Date: Mon, 13 May 2019 08:44:34 -0400 Subject: [PATCH 0585/2016] Remove duplicates from index on pandas DataFrames returned by to_datafram(). (#7953) * added tests confirming the bug When loading large datasets from BIgQuery as pandas DataFrames, sometimes the index contains duplicates. This happens when the results are collected as multiple DataFrames and then concatenated without resetting the index. * reset index when concating data frames --- .../google/cloud/bigquery/table.py | 4 +- .../tests/unit/test_table.py | 67 +++++++++++++++++++ 2 files changed, 69 insertions(+), 2 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py index 864fff4458b1..1f9bb5eee3d4 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py @@ -1408,7 +1408,7 @@ def _to_dataframe_tabledata_list(self, dtypes, progress_bar=None): # Indicate that the download has finished. progress_bar.close() - return pandas.concat(frames) + return pandas.concat(frames, ignore_index=True) def _to_dataframe_bqstorage_stream( self, bqstorage_client, dtypes, columns, session, stream, worker_queue @@ -1585,7 +1585,7 @@ def get_frames(pool): # Update the progress bar one last time to close it. self._process_progress_updates(progress_queue, progress_bar) - return pandas.concat(frames) + return pandas.concat(frames, ignore_index=True) def _get_progress_bar(self, progress_bar_type): """Construct a tqdm progress bar object, if tqdm is installed.""" diff --git a/packages/google-cloud-bigquery/tests/unit/test_table.py b/packages/google-cloud-bigquery/tests/unit/test_table.py index 07a625b98825..816ba4ec48da 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_table.py +++ b/packages/google-cloud-bigquery/tests/unit/test_table.py @@ -1943,6 +1943,51 @@ def blocking_to_dataframe(*args, **kwargs): # Make sure that this test pushed to the progress queue. self.assertEqual(mock_queue().put_nowait.call_count, total_pages) + @unittest.skipIf(pandas is None, "Requires `pandas`") + @unittest.skipIf( + bigquery_storage_v1beta1 is None, "Requires `google-cloud-bigquery-storage`" + ) + def test_to_dataframe_w_bqstorage_multiple_streams_return_unique_index(self): + from google.cloud.bigquery import schema + from google.cloud.bigquery import table as mut + from google.cloud.bigquery_storage_v1beta1 import reader + + streams = [ + {"name": "/projects/proj/dataset/dset/tables/tbl/streams/1234"}, + {"name": "/projects/proj/dataset/dset/tables/tbl/streams/5678"}, + ] + session = bigquery_storage_v1beta1.types.ReadSession(streams=streams) + session.avro_schema.schema = json.dumps({"fields": [{"name": "colA"}]}) + + bqstorage_client = mock.create_autospec( + bigquery_storage_v1beta1.BigQueryStorageClient) + bqstorage_client.create_read_session.return_value = session + + mock_rowstream = mock.create_autospec(reader.ReadRowsStream) + bqstorage_client.read_rows.return_value = mock_rowstream + + mock_rows = mock.create_autospec(reader.ReadRowsIterable) + mock_rowstream.rows.return_value = mock_rows + + page_data_frame = pandas.DataFrame( + [{"colA": 1}, {"colA": -1}], columns=["colA"]) + mock_page = mock.create_autospec(reader.ReadRowsPage) + mock_page.to_dataframe.return_value = page_data_frame + mock_pages = (mock_page, mock_page, mock_page) + type(mock_rows).pages = mock.PropertyMock(return_value=mock_pages) + + row_iterator = self._make_one( + schema=[schema.SchemaField("colA", "IGNORED")], + table=mut.TableReference.from_string("proj.dset.tbl"), + ) + got = row_iterator.to_dataframe(bqstorage_client=bqstorage_client) + + self.assertEqual(list(got), ["colA"]) + total_pages = len(streams) * len(mock_pages) + total_rows = len(page_data_frame) * total_pages + self.assertEqual(len(got.index), total_rows) + self.assertTrue(got.index.is_unique) + @unittest.skipIf(pandas is None, "Requires `pandas`") @unittest.skipIf( bigquery_storage_v1beta1 is None, "Requires `google-cloud-bigquery-storage`" @@ -2138,6 +2183,28 @@ def test_to_dataframe_w_bqstorage_fallback_to_tabledata_list(self): self.assertEqual(df.name.dtype.name, "object") self.assertEqual(df.age.dtype.name, "int64") + @unittest.skipIf(pandas is None, "Requires `pandas`") + @mock.patch("google.cloud.bigquery.table.RowIterator.pages", + new_callable=mock.PropertyMock) + def test_to_dataframe_tabledata_list_w_multiple_pages_return_unique_index(self, mock_pages): + from google.cloud.bigquery import schema + + iterator_schema = [ + schema.SchemaField("name", "STRING", mode="REQUIRED"), + ] + pages = [[{"name": "Bengt"}], [{"name": "Sven"}]] + + mock_pages.return_value = pages + row_iterator = self._make_one(schema=iterator_schema) + + df = row_iterator.to_dataframe(bqstorage_client=None) + + self.assertIsInstance(df, pandas.DataFrame) + self.assertEqual(len(df), 2) + self.assertEqual(list(df), ["name"]) + self.assertEqual(df.name.dtype.name, "object") + self.assertTrue(df.index.is_unique) + @unittest.skipIf(pandas is None, "Requires `pandas`") @unittest.skipIf( bigquery_storage_v1beta1 is None, "Requires `google-cloud-bigquery-storage`" From eaf39c237f4580d760af203642757ae0a68d9d10 Mon Sep 17 00:00:00 2001 From: Yoshi Automation Bot <44816363+yoshi-automation@users.noreply.github.com> Date: Tue, 14 May 2019 07:50:06 -0700 Subject: [PATCH 0586/2016] Blacken unit tests. (#7960) --- packages/google-cloud-bigquery/synth.metadata | 12 ++++++------ .../tests/unit/test_table.py | 19 +++++++++++-------- 2 files changed, 17 insertions(+), 14 deletions(-) diff --git a/packages/google-cloud-bigquery/synth.metadata b/packages/google-cloud-bigquery/synth.metadata index 4320d1d271bf..fe82cdbbbdaa 100644 --- a/packages/google-cloud-bigquery/synth.metadata +++ b/packages/google-cloud-bigquery/synth.metadata @@ -1,19 +1,19 @@ { - "updateTime": "2019-04-03T23:38:10.307198Z", + "updateTime": "2019-05-14T12:12:11.320300Z", "sources": [ { "generator": { "name": "artman", - "version": "0.16.23", - "dockerImage": "googleapis/artman@sha256:f3a3f88000dc1cd1b4826104c5574aa5c534f6793fbf66e888d11c0d7ef5762e" + "version": "0.19.0", + "dockerImage": "googleapis/artman@sha256:d3df563538225ac6caac45d8ad86499500211d1bcb2536955a6dbda15e1b368e" } }, { "git": { "name": "googleapis", - "remote": "git@github.com:googleapis/googleapis.git", - "sha": "04193ea2f8121388c998ab49c382f2c03417dcce", - "internalRef": "241828309" + "remote": "https://github.com/googleapis/googleapis.git", + "sha": "ffa256646fb15c4a6c720f0ed8f984a0ea513416", + "internalRef": "248066377" } } ], diff --git a/packages/google-cloud-bigquery/tests/unit/test_table.py b/packages/google-cloud-bigquery/tests/unit/test_table.py index 816ba4ec48da..378323ce2932 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_table.py +++ b/packages/google-cloud-bigquery/tests/unit/test_table.py @@ -1960,7 +1960,8 @@ def test_to_dataframe_w_bqstorage_multiple_streams_return_unique_index(self): session.avro_schema.schema = json.dumps({"fields": [{"name": "colA"}]}) bqstorage_client = mock.create_autospec( - bigquery_storage_v1beta1.BigQueryStorageClient) + bigquery_storage_v1beta1.BigQueryStorageClient + ) bqstorage_client.create_read_session.return_value = session mock_rowstream = mock.create_autospec(reader.ReadRowsStream) @@ -1970,7 +1971,8 @@ def test_to_dataframe_w_bqstorage_multiple_streams_return_unique_index(self): mock_rowstream.rows.return_value = mock_rows page_data_frame = pandas.DataFrame( - [{"colA": 1}, {"colA": -1}], columns=["colA"]) + [{"colA": 1}, {"colA": -1}], columns=["colA"] + ) mock_page = mock.create_autospec(reader.ReadRowsPage) mock_page.to_dataframe.return_value = page_data_frame mock_pages = (mock_page, mock_page, mock_page) @@ -2184,14 +2186,15 @@ def test_to_dataframe_w_bqstorage_fallback_to_tabledata_list(self): self.assertEqual(df.age.dtype.name, "int64") @unittest.skipIf(pandas is None, "Requires `pandas`") - @mock.patch("google.cloud.bigquery.table.RowIterator.pages", - new_callable=mock.PropertyMock) - def test_to_dataframe_tabledata_list_w_multiple_pages_return_unique_index(self, mock_pages): + @mock.patch( + "google.cloud.bigquery.table.RowIterator.pages", new_callable=mock.PropertyMock + ) + def test_to_dataframe_tabledata_list_w_multiple_pages_return_unique_index( + self, mock_pages + ): from google.cloud.bigquery import schema - iterator_schema = [ - schema.SchemaField("name", "STRING", mode="REQUIRED"), - ] + iterator_schema = [schema.SchemaField("name", "STRING", mode="REQUIRED")] pages = [[{"name": "Bengt"}], [{"name": "Sven"}]] mock_pages.return_value = pages From b597f747eb0ffa8b44c2d21534f576de1ccadde7 Mon Sep 17 00:00:00 2001 From: Tres Seaver Date: Thu, 16 May 2019 13:22:32 -0400 Subject: [PATCH 0587/2016] Pin 'google-cloud-core >= 1.0.0, < 2.0dev'. (#7993) --- packages/google-cloud-bigquery/setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/setup.py b/packages/google-cloud-bigquery/setup.py index b51fa63a9a75..bf3b5ee35ad1 100644 --- a/packages/google-cloud-bigquery/setup.py +++ b/packages/google-cloud-bigquery/setup.py @@ -31,7 +31,7 @@ dependencies = [ 'enum34; python_version < "3.4"', "google-api-core >= 1.6.0, < 2.0.0dev", - "google-cloud-core >= 0.29.0, < 0.30dev", + "google-cloud-core >= 1.0.0, < 2.0dev", "google-resumable-media >= 0.3.1", "protobuf >= 3.6.0", ] From 0f6a43699a306665bf9f7ab924c1ced3781791ec Mon Sep 17 00:00:00 2001 From: Tres Seaver Date: Fri, 17 May 2019 11:33:21 -0400 Subject: [PATCH 0588/2016] Release bigquery-1.12.0 (#8001) --- packages/google-cloud-bigquery/CHANGELOG.md | 34 +++++++++++++++++++++ packages/google-cloud-bigquery/setup.py | 2 +- 2 files changed, 35 insertions(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/CHANGELOG.md b/packages/google-cloud-bigquery/CHANGELOG.md index cf83c2e89b27..b672204cd205 100644 --- a/packages/google-cloud-bigquery/CHANGELOG.md +++ b/packages/google-cloud-bigquery/CHANGELOG.md @@ -4,6 +4,40 @@ [1]: https://pypi.org/project/google-cloud-bigquery/#history +## 1.12.0 + +05-16-2019 11:25 PDT + +### Implementation Changes +- Remove duplicates from index on pandas DataFrames returned by `to_dataframe()`. ([#7953](https://github.com/googleapis/google-cloud-python/pull/7953)) +- Prevent error when time partitioning is populated with empty dict ([#7904](https://github.com/googleapis/google-cloud-python/pull/7904)) +- Preserve order in `to_dataframe` with BQ Storage from queries containing `ORDER BY` ([#7793](https://github.com/googleapis/google-cloud-python/pull/7793)) +- Respect `progress_bar_type` in `to_dataframe` when used with BQ Storage API ([#7697](https://github.com/googleapis/google-cloud-python/pull/7697)) +- Refactor QueryJob.query to read from resource dictionary ([#7763](https://github.com/googleapis/google-cloud-python/pull/7763)) +- Close the `to_dataframe` progress bar when finished. ([#7757](https://github.com/googleapis/google-cloud-python/pull/7757)) +- Ensure that `KeyboardInterrupt` during `to_dataframe`no longer hangs. ([#7698](https://github.com/googleapis/google-cloud-python/pull/7698)) +- Raise ValueError when BQ Storage is required but missing ([#7726](https://github.com/googleapis/google-cloud-python/pull/7726)) +- Make `total_rows` available on RowIterator before iteration ([#7622](https://github.com/googleapis/google-cloud-python/pull/7622)) +- Avoid masking auth errors in `to_dataframe` with BQ Storage API ([#7674](https://github.com/googleapis/google-cloud-python/pull/7674)) + +### New Features +- Add support for passing `client_info`. ([#7849](https://github.com/googleapis/google-cloud-python/pull/7849) and ([#7806](https://github.com/googleapis/google-cloud-python/pull/7806)) +- Phase 1 for storing schemas for later use. ([#7761](https://github.com/googleapis/google-cloud-python/pull/7761)) +- Add `destination` and related properties to LoadJob. ([#7710](https://github.com/googleapis/google-cloud-python/pull/7710)) +- Add `clustering_fields` property to TableListItem ([#7692](https://github.com/googleapis/google-cloud-python/pull/7692)) +- Add `created` and `expires` properties to TableListItem ([#7684](https://github.com/googleapis/google-cloud-python/pull/7684)) + +### Dependencies +- Pin `google-cloud-core >= 1.0.0, < 2.0dev`. ([#7993](https://github.com/googleapis/google-cloud-python/pull/7993)) +- Add `[all]` extras to install all extra dependencies ([#7610](https://github.com/googleapis/google-cloud-python/pull/7610)) + +### Documentation +- Move table and dataset snippets to samples/ directory ([#7683](https://github.com/googleapis/google-cloud-python/pull/7683)) + +### Internal / Testing Changes +- Blacken unit tests. ([#7960](https://github.com/googleapis/google-cloud-python/pull/7960)) +- Cleanup client tests with method to create minimal table resource ([#7802](https://github.com/googleapis/google-cloud-python/pull/7802)) + ## 1.11.2 04-05-2019 08:16 PDT diff --git a/packages/google-cloud-bigquery/setup.py b/packages/google-cloud-bigquery/setup.py index bf3b5ee35ad1..57f26a60b916 100644 --- a/packages/google-cloud-bigquery/setup.py +++ b/packages/google-cloud-bigquery/setup.py @@ -22,7 +22,7 @@ name = "google-cloud-bigquery" description = "Google BigQuery API client library" -version = "1.11.2" +version = "1.12.0" # Should be one of: # 'Development Status :: 3 - Alpha' # 'Development Status :: 4 - Beta' From bf37a87c9ebb3cce5eee3e19b4d1f482ec8cc6a0 Mon Sep 17 00:00:00 2001 From: Tres Seaver Date: Fri, 17 May 2019 15:21:09 -0400 Subject: [PATCH 0589/2016] Use alabaster theme everwhere. (#8021) 'sphinx_rtd_theme' is no longer installed by default. --- packages/google-cloud-bigquery/docs/conf.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/docs/conf.py b/packages/google-cloud-bigquery/docs/conf.py index c9ff82d8e72b..b3cfa267dd7d 100644 --- a/packages/google-cloud-bigquery/docs/conf.py +++ b/packages/google-cloud-bigquery/docs/conf.py @@ -139,7 +139,7 @@ # The theme to use for HTML and HTML Help pages. See the documentation for # a list of builtin themes. -html_theme = "sphinx_rtd_theme" +html_theme = "alabaster" # Theme options are theme-specific and customize the look and feel of a theme # further. For a list of options available for each theme, see the From 24e5575899bd823caeff916fbf79158727c23a96 Mon Sep 17 00:00:00 2001 From: Yoshi Automation Bot Date: Tue, 21 May 2019 10:02:40 -0700 Subject: [PATCH 0590/2016] Bigquery: Add empty lines (via synth). (#8049) --- .../google/cloud/bigquery_v2/gapic/enums.py | 1 + .../google/cloud/bigquery_v2/types.py | 3 +++ packages/google-cloud-bigquery/synth.metadata | 10 +++++----- 3 files changed, 9 insertions(+), 5 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery_v2/gapic/enums.py b/packages/google-cloud-bigquery/google/cloud/bigquery_v2/gapic/enums.py index 69d3d670a628..ba6c0c721266 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery_v2/gapic/enums.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery_v2/gapic/enums.py @@ -13,6 +13,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. + """Wrappers for protocol buffer enum types.""" import enum diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery_v2/types.py b/packages/google-cloud-bigquery/google/cloud/bigquery_v2/types.py index 0d228e88abac..43414960e03f 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery_v2/types.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery_v2/types.py @@ -14,6 +14,7 @@ # See the License for the specific language governing permissions and # limitations under the License. + from __future__ import absolute_import import sys @@ -26,6 +27,7 @@ from google.protobuf import timestamp_pb2 from google.protobuf import wrappers_pb2 + _shared_modules = [empty_pb2, timestamp_pb2, wrappers_pb2] _local_modules = [model_pb2, model_reference_pb2, standard_sql_pb2] @@ -42,4 +44,5 @@ setattr(sys.modules[__name__], name, message) names.append(name) + __all__ = tuple(sorted(names)) diff --git a/packages/google-cloud-bigquery/synth.metadata b/packages/google-cloud-bigquery/synth.metadata index fe82cdbbbdaa..8014f67e980c 100644 --- a/packages/google-cloud-bigquery/synth.metadata +++ b/packages/google-cloud-bigquery/synth.metadata @@ -1,19 +1,19 @@ { - "updateTime": "2019-05-14T12:12:11.320300Z", + "updateTime": "2019-05-21T12:12:55.399616Z", "sources": [ { "generator": { "name": "artman", - "version": "0.19.0", - "dockerImage": "googleapis/artman@sha256:d3df563538225ac6caac45d8ad86499500211d1bcb2536955a6dbda15e1b368e" + "version": "0.20.0", + "dockerImage": "googleapis/artman@sha256:3246adac900f4bdbd62920e80de2e5877380e44036b3feae13667ec255ebf5ec" } }, { "git": { "name": "googleapis", "remote": "https://github.com/googleapis/googleapis.git", - "sha": "ffa256646fb15c4a6c720f0ed8f984a0ea513416", - "internalRef": "248066377" + "sha": "32a10f69e2c9ce15bba13ab1ff928bacebb25160", + "internalRef": "249058354" } } ], From 7401224082c8969dd294f791368a85c5570c409d Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Tue, 21 May 2019 11:15:12 -0700 Subject: [PATCH 0591/2016] Don't raise error when encountering unknown fields in Models API. (#8083) As new fields are added, the JSON -> Protobuf conversion should not fail. Instead, it should ignore unknown fields. So that this data is not discarded, use `_properties` as is the convention in our REST libraries. It's private, but can be used as a workaround to get access to fields that haven't yet been added to the client library. --- .../google/cloud/bigquery/model.py | 20 ++++++++++++------- .../tests/unit/model/test_model.py | 16 +++++++++++++++ .../tests/unit/model/test_model_reference.py | 14 +++++++++++++ 3 files changed, 43 insertions(+), 7 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/model.py b/packages/google-cloud-bigquery/google/cloud/bigquery/model.py index 8b29e4008558..4049a9232467 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/model.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/model.py @@ -268,6 +268,9 @@ def from_api_repr(cls, resource): google.cloud.bigquery.model.Model: Model parsed from ``resource``. """ this = cls(None) + # Keep a reference to the resource as a workaround to find unknown + # field values. + this._properties = resource # Convert from millis-from-epoch to timestamp well-known type. # TODO: Remove this hack once CL 238585470 hits prod. @@ -279,12 +282,9 @@ def from_api_repr(cls, resource): start_time = datetime_helpers.from_microseconds(1e3 * float(start_time)) training_run["startTime"] = datetime_helpers.to_rfc3339(start_time) - this._proto = json_format.ParseDict(resource, types.Model()) - for key in six.itervalues(cls._PROPERTY_TO_API_FIELD): - # Leave missing keys unset. This allows us to use setdefault in the - # getters where we want a default value other than None. - if key in resource: - this._properties[key] = resource[key] + this._proto = json_format.ParseDict( + resource, types.Model(), ignore_unknown_fields=True + ) return this def _build_resource(self, filter_fields): @@ -304,6 +304,7 @@ class ModelReference(object): def __init__(self): self._proto = types.ModelReference() + self._properties = {} @property def project(self): @@ -342,7 +343,12 @@ def from_api_repr(cls, resource): Model reference parsed from ``resource``. """ ref = cls() - ref._proto = json_format.ParseDict(resource, types.ModelReference()) + # Keep a reference to the resource as a workaround to find unknown + # field values. + ref._properties = resource + ref._proto = json_format.ParseDict( + resource, types.ModelReference(), ignore_unknown_fields=True + ) return ref @classmethod diff --git a/packages/google-cloud-bigquery/tests/unit/model/test_model.py b/packages/google-cloud-bigquery/tests/unit/model/test_model.py index 2086c333486d..b6d9756e15fe 100644 --- a/packages/google-cloud-bigquery/tests/unit/model/test_model.py +++ b/packages/google-cloud-bigquery/tests/unit/model/test_model.py @@ -165,6 +165,22 @@ def test_from_api_repr_w_minimal_resource(target_class): assert len(got.label_columns) == 0 +def test_from_api_repr_w_unknown_fields(target_class): + from google.cloud.bigquery import ModelReference + + resource = { + "modelReference": { + "projectId": "my-project", + "datasetId": "my_dataset", + "modelId": "my_model", + }, + "thisFieldIsNotInTheProto": "just ignore me", + } + got = target_class.from_api_repr(resource) + assert got.reference == ModelReference.from_string("my-project.my_dataset.my_model") + assert got._properties is resource + + @pytest.mark.parametrize( "resource,filter_fields,expected", [ diff --git a/packages/google-cloud-bigquery/tests/unit/model/test_model_reference.py b/packages/google-cloud-bigquery/tests/unit/model/test_model_reference.py index 0145c76f6ad0..ff1d1df7d499 100644 --- a/packages/google-cloud-bigquery/tests/unit/model/test_model_reference.py +++ b/packages/google-cloud-bigquery/tests/unit/model/test_model_reference.py @@ -37,6 +37,20 @@ def test_from_api_repr(target_class): assert got.path == "/projects/my-project/datasets/my_dataset/models/my_model" +def test_from_api_repr_w_unknown_fields(target_class): + resource = { + "projectId": "my-project", + "datasetId": "my_dataset", + "modelId": "my_model", + "thisFieldIsNotInTheProto": "just ignore me", + } + got = target_class.from_api_repr(resource) + assert got.project == "my-project" + assert got.dataset_id == "my_dataset" + assert got.model_id == "my_model" + assert got._properties is resource + + def test_to_api_repr(target_class): ref = target_class.from_string("my-project.my_dataset.my_model") got = ref.to_api_repr() From 676e4e09467192823e0d67ed6b87e0487e45332f Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Tue, 21 May 2019 12:27:01 -0700 Subject: [PATCH 0592/2016] Release bigquery 1.12.1 (#8084) * Release 1.12.1 --- packages/google-cloud-bigquery/CHANGELOG.md | 16 ++++++++++++++++ packages/google-cloud-bigquery/setup.py | 2 +- 2 files changed, 17 insertions(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/CHANGELOG.md b/packages/google-cloud-bigquery/CHANGELOG.md index b672204cd205..db65224a957c 100644 --- a/packages/google-cloud-bigquery/CHANGELOG.md +++ b/packages/google-cloud-bigquery/CHANGELOG.md @@ -4,6 +4,22 @@ [1]: https://pypi.org/project/google-cloud-bigquery/#history +## 1.12.1 + +05-21-2019 11:16 PDT + +### Implementation Changes + +- Don't raise error when encountering unknown fields in Models API. ([#8083](https://github.com/googleapis/google-cloud-python/pull/8083)) + +### Documentation + +- Use alabaster theme everwhere. ([#8021](https://github.com/googleapis/google-cloud-python/pull/8021)) + +### Internal / Testing Changes + +- Add empty lines (via synth). ([#8049](https://github.com/googleapis/google-cloud-python/pull/8049)) + ## 1.12.0 05-16-2019 11:25 PDT diff --git a/packages/google-cloud-bigquery/setup.py b/packages/google-cloud-bigquery/setup.py index 57f26a60b916..2b0211750acf 100644 --- a/packages/google-cloud-bigquery/setup.py +++ b/packages/google-cloud-bigquery/setup.py @@ -22,7 +22,7 @@ name = "google-cloud-bigquery" description = "Google BigQuery API client library" -version = "1.12.0" +version = "1.12.1" # Should be one of: # 'Development Status :: 3 - Alpha' # 'Development Status :: 4 - Beta' From 4499535f09aa6a4cb38624498b8c0097a309765c Mon Sep 17 00:00:00 2001 From: Yoshi Automation Bot Date: Sat, 25 May 2019 10:02:39 -0700 Subject: [PATCH 0593/2016] Fix coverage in 'types.py' (via synth). (#8146) --- .../google/cloud/bigquery_v2/types.py | 2 +- packages/google-cloud-bigquery/synth.metadata | 10 +++++----- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery_v2/types.py b/packages/google-cloud-bigquery/google/cloud/bigquery_v2/types.py index 43414960e03f..36fc2d6bfbd2 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery_v2/types.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery_v2/types.py @@ -34,7 +34,7 @@ names = [] -for module in _shared_modules: +for module in _shared_modules: # pragma: NO COVER for name, message in get_messages(module).items(): setattr(sys.modules[__name__], name, message) names.append(name) diff --git a/packages/google-cloud-bigquery/synth.metadata b/packages/google-cloud-bigquery/synth.metadata index 8014f67e980c..3d09d31e5ed8 100644 --- a/packages/google-cloud-bigquery/synth.metadata +++ b/packages/google-cloud-bigquery/synth.metadata @@ -1,19 +1,19 @@ { - "updateTime": "2019-05-21T12:12:55.399616Z", + "updateTime": "2019-05-25T12:13:19.608155Z", "sources": [ { "generator": { "name": "artman", - "version": "0.20.0", - "dockerImage": "googleapis/artman@sha256:3246adac900f4bdbd62920e80de2e5877380e44036b3feae13667ec255ebf5ec" + "version": "0.21.0", + "dockerImage": "googleapis/artman@sha256:28d4271586772b275cd3bc95cb46bd227a24d3c9048de45dccdb7f3afb0bfba9" } }, { "git": { "name": "googleapis", "remote": "https://github.com/googleapis/googleapis.git", - "sha": "32a10f69e2c9ce15bba13ab1ff928bacebb25160", - "internalRef": "249058354" + "sha": "7ca19138ccebe219a67be2245200e821b3e32123", + "internalRef": "249916728" } } ], From 625c1d1ff561e733fddb915513f993b9a07ae024 Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Thu, 30 May 2019 17:23:48 -0700 Subject: [PATCH 0594/2016] Use `job_config.schema` for data type conversion if specified in `load_table_from_dataframe`. (#8105) * Use `job_config.schema` for data type conversion if specified in `load_table_from_dataframe`. Use the BigQuery schema to inform encoding of file used in load job. This fixes an issue where a dataframe with ambiguous types (such as an `object` column containing all `None` values) could not be appended to an existing table, since the schemas wouldn't match in most cases. * Add system test for loading dataframe with non-nulls and explicit schema. --- .../google/cloud/bigquery/_pandas_helpers.py | 152 ++++++ .../google/cloud/bigquery/client.py | 15 +- .../google-cloud-bigquery/tests/system.py | 158 ++++++ .../tests/unit/test__pandas_helpers.py | 458 ++++++++++++++++++ .../tests/unit/test_client.py | 43 ++ 5 files changed, 824 insertions(+), 2 deletions(-) create mode 100644 packages/google-cloud-bigquery/google/cloud/bigquery/_pandas_helpers.py create mode 100644 packages/google-cloud-bigquery/tests/unit/test__pandas_helpers.py diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/_pandas_helpers.py b/packages/google-cloud-bigquery/google/cloud/bigquery/_pandas_helpers.py new file mode 100644 index 000000000000..eeb65e0b9766 --- /dev/null +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/_pandas_helpers.py @@ -0,0 +1,152 @@ +# Copyright 2019 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Shared helper functions for connecting BigQuery and pandas.""" + +try: + import pyarrow + import pyarrow.parquet +except ImportError: # pragma: NO COVER + pyarrow = None + +from google.cloud.bigquery import schema + + +STRUCT_TYPES = ("RECORD", "STRUCT") + + +def pyarrow_datetime(): + return pyarrow.timestamp("us", tz=None) + + +def pyarrow_numeric(): + return pyarrow.decimal128(38, 9) + + +def pyarrow_time(): + return pyarrow.time64("us") + + +def pyarrow_timestamp(): + return pyarrow.timestamp("us", tz="UTC") + + +if pyarrow: + BQ_TO_ARROW_SCALARS = { + "BOOL": pyarrow.bool_, + "BOOLEAN": pyarrow.bool_, + "BYTES": pyarrow.binary, + "DATE": pyarrow.date32, + "DATETIME": pyarrow_datetime, + "FLOAT": pyarrow.float64, + "FLOAT64": pyarrow.float64, + "GEOGRAPHY": pyarrow.string, + "INT64": pyarrow.int64, + "INTEGER": pyarrow.int64, + "NUMERIC": pyarrow_numeric, + "STRING": pyarrow.string, + "TIME": pyarrow_time, + "TIMESTAMP": pyarrow_timestamp, + } +else: # pragma: NO COVER + BQ_TO_ARROW_SCALARS = {} # pragma: NO COVER + + +def bq_to_arrow_struct_data_type(field): + arrow_fields = [] + for subfield in field.fields: + arrow_subfield = bq_to_arrow_field(subfield) + if arrow_subfield: + arrow_fields.append(arrow_subfield) + else: + # Could not determine a subfield type. Fallback to type + # inference. + return None + return pyarrow.struct(arrow_fields) + + +def bq_to_arrow_data_type(field): + """Return the Arrow data type, corresponding to a given BigQuery column. + + Returns None if default Arrow type inspection should be used. + """ + if field.mode is not None and field.mode.upper() == "REPEATED": + inner_type = bq_to_arrow_data_type( + schema.SchemaField(field.name, field.field_type) + ) + if inner_type: + return pyarrow.list_(inner_type) + return None + + if field.field_type.upper() in STRUCT_TYPES: + return bq_to_arrow_struct_data_type(field) + + data_type_constructor = BQ_TO_ARROW_SCALARS.get(field.field_type.upper()) + if data_type_constructor is None: + return None + return data_type_constructor() + + +def bq_to_arrow_field(bq_field): + """Return the Arrow field, corresponding to a given BigQuery column. + + Returns None if the Arrow type cannot be determined. + """ + arrow_type = bq_to_arrow_data_type(bq_field) + if arrow_type: + is_nullable = bq_field.mode.upper() == "NULLABLE" + return pyarrow.field(bq_field.name, arrow_type, nullable=is_nullable) + return None + + +def bq_to_arrow_array(series, bq_field): + arrow_type = bq_to_arrow_data_type(bq_field) + if bq_field.mode.upper() == "REPEATED": + return pyarrow.ListArray.from_pandas(series, type=arrow_type) + if bq_field.field_type.upper() in STRUCT_TYPES: + return pyarrow.StructArray.from_pandas(series, type=arrow_type) + return pyarrow.array(series, type=arrow_type) + + +def to_parquet(dataframe, bq_schema, filepath): + """Write dataframe as a Parquet file, according to the desired BQ schema. + + This function requires the :mod:`pyarrow` package. Arrow is used as an + intermediate format. + + Args: + dataframe (pandas.DataFrame): + DataFrame to convert to convert to Parquet file. + bq_schema (Sequence[google.cloud.bigquery.schema.SchemaField]): + Desired BigQuery schema. Number of columns must match number of + columns in the DataFrame. + filepath (str): + Path to write Parquet file to. + """ + if pyarrow is None: + raise ValueError("pyarrow is required for BigQuery schema conversion.") + + if len(bq_schema) != len(dataframe.columns): + raise ValueError( + "Number of columns in schema must match number of columns in dataframe." + ) + + arrow_arrays = [] + arrow_names = [] + for bq_field in bq_schema: + arrow_names.append(bq_field.name) + arrow_arrays.append(bq_to_arrow_array(dataframe[bq_field.name], bq_field)) + + arrow_table = pyarrow.Table.from_arrays(arrow_arrays, names=arrow_names) + pyarrow.parquet.write_table(arrow_table, filepath) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py index f61c18f11bd4..78d718aa6a2a 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py @@ -44,6 +44,7 @@ from google.cloud.bigquery._helpers import _record_field_to_json from google.cloud.bigquery._helpers import _str_or_none from google.cloud.bigquery._http import Connection +from google.cloud.bigquery import _pandas_helpers from google.cloud.bigquery.dataset import Dataset from google.cloud.bigquery.dataset import DatasetListItem from google.cloud.bigquery.dataset import DatasetReference @@ -1271,9 +1272,16 @@ def load_table_from_dataframe( project (str, optional): Project ID of the project of where to run the job. Defaults to the client's project. - job_config (google.cloud.bigquery.job.LoadJobConfig, optional): + job_config (~google.cloud.bigquery.job.LoadJobConfig, optional): Extra configuration options for the job. + To override the default pandas data type conversions, supply + a value for + :attr:`~google.cloud.bigquery.job.LoadJobConfig.schema` with + column names matching those of the dataframe. The BigQuery + schema is used to determine the correct data type conversion. + Indexes are not loaded. Requires the :mod:`pyarrow` library. + Returns: google.cloud.bigquery.job.LoadJob: A new load job. @@ -1296,7 +1304,10 @@ def load_table_from_dataframe( os.close(tmpfd) try: - dataframe.to_parquet(tmppath) + if job_config.schema: + _pandas_helpers.to_parquet(dataframe, job_config.schema, tmppath) + else: + dataframe.to_parquet(tmppath) with open(tmppath, "rb") as parquet_file: return self.load_table_from_file( diff --git a/packages/google-cloud-bigquery/tests/system.py b/packages/google-cloud-bigquery/tests/system.py index cceca192b8f7..2b4aa84b8faf 100644 --- a/packages/google-cloud-bigquery/tests/system.py +++ b/packages/google-cloud-bigquery/tests/system.py @@ -27,6 +27,7 @@ import six import pytest +import pytz try: from google.cloud import bigquery_storage_v1beta1 @@ -36,6 +37,10 @@ import pandas except ImportError: # pragma: NO COVER pandas = None +try: + import pyarrow +except ImportError: # pragma: NO COVER + pyarrow = None try: import IPython from IPython.utils import io @@ -622,6 +627,159 @@ def test_load_table_from_local_avro_file_then_dump_table(self): sorted(row_tuples, key=by_wavelength), sorted(ROWS, key=by_wavelength) ) + @unittest.skipIf(pandas is None, "Requires `pandas`") + @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") + def test_load_table_from_dataframe_w_nulls(self): + """Test that a DataFrame with null columns can be uploaded if a + BigQuery schema is specified. + + See: https://github.com/googleapis/google-cloud-python/issues/7370 + """ + # Schema with all scalar types. + scalars_schema = ( + bigquery.SchemaField("bool_col", "BOOLEAN"), + bigquery.SchemaField("bytes_col", "BYTES"), + bigquery.SchemaField("date_col", "DATE"), + bigquery.SchemaField("dt_col", "DATETIME"), + bigquery.SchemaField("float_col", "FLOAT"), + bigquery.SchemaField("geo_col", "GEOGRAPHY"), + bigquery.SchemaField("int_col", "INTEGER"), + bigquery.SchemaField("num_col", "NUMERIC"), + bigquery.SchemaField("str_col", "STRING"), + bigquery.SchemaField("time_col", "TIME"), + bigquery.SchemaField("ts_col", "TIMESTAMP"), + ) + table_schema = scalars_schema + ( + # TODO: Array columns can't be read due to NULLABLE versus REPEATED + # mode mismatch. See: + # https://issuetracker.google.com/133415569#comment3 + # bigquery.SchemaField("array_col", "INTEGER", mode="REPEATED"), + # TODO: Support writing StructArrays to Parquet. See: + # https://jira.apache.org/jira/browse/ARROW-2587 + # bigquery.SchemaField("struct_col", "RECORD", fields=scalars_schema), + ) + num_rows = 100 + nulls = [None] * num_rows + dataframe = pandas.DataFrame( + { + "bool_col": nulls, + "bytes_col": nulls, + "date_col": nulls, + "dt_col": nulls, + "float_col": nulls, + "geo_col": nulls, + "int_col": nulls, + "num_col": nulls, + "str_col": nulls, + "time_col": nulls, + "ts_col": nulls, + } + ) + + dataset_id = _make_dataset_id("bq_load_test") + self.temp_dataset(dataset_id) + table_id = "{}.{}.load_table_from_dataframe_w_nulls".format( + Config.CLIENT.project, dataset_id + ) + + # Create the table before loading so that schema mismatch errors are + # identified. + table = retry_403(Config.CLIENT.create_table)( + Table(table_id, schema=table_schema) + ) + self.to_delete.insert(0, table) + + job_config = bigquery.LoadJobConfig(schema=table_schema) + load_job = Config.CLIENT.load_table_from_dataframe( + dataframe, table_id, job_config=job_config + ) + load_job.result() + + table = Config.CLIENT.get_table(table) + self.assertEqual(tuple(table.schema), table_schema) + self.assertEqual(table.num_rows, num_rows) + + @unittest.skipIf(pandas is None, "Requires `pandas`") + @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") + def test_load_table_from_dataframe_w_explicit_schema(self): + # Schema with all scalar types. + scalars_schema = ( + bigquery.SchemaField("bool_col", "BOOLEAN"), + bigquery.SchemaField("bytes_col", "BYTES"), + bigquery.SchemaField("date_col", "DATE"), + bigquery.SchemaField("dt_col", "DATETIME"), + bigquery.SchemaField("float_col", "FLOAT"), + bigquery.SchemaField("geo_col", "GEOGRAPHY"), + bigquery.SchemaField("int_col", "INTEGER"), + bigquery.SchemaField("num_col", "NUMERIC"), + bigquery.SchemaField("str_col", "STRING"), + bigquery.SchemaField("time_col", "TIME"), + bigquery.SchemaField("ts_col", "TIMESTAMP"), + ) + table_schema = scalars_schema + ( + # TODO: Array columns can't be read due to NULLABLE versus REPEATED + # mode mismatch. See: + # https://issuetracker.google.com/133415569#comment3 + # bigquery.SchemaField("array_col", "INTEGER", mode="REPEATED"), + # TODO: Support writing StructArrays to Parquet. See: + # https://jira.apache.org/jira/browse/ARROW-2587 + # bigquery.SchemaField("struct_col", "RECORD", fields=scalars_schema), + ) + dataframe = pandas.DataFrame( + { + "bool_col": [True, None, False], + "bytes_col": [b"abc", None, b"def"], + "date_col": [datetime.date(1, 1, 1), None, datetime.date(9999, 12, 31)], + "dt_col": [ + datetime.datetime(1, 1, 1, 0, 0, 0), + None, + datetime.datetime(9999, 12, 31, 23, 59, 59, 999999), + ], + "float_col": [float("-inf"), float("nan"), float("inf")], + "geo_col": [ + "POINT(30 10)", + None, + "POLYGON ((30 10, 40 40, 20 40, 10 20, 30 10))", + ], + "int_col": [-9223372036854775808, None, 9223372036854775807], + "num_col": [ + decimal.Decimal("-99999999999999999999999999999.999999999"), + None, + decimal.Decimal("99999999999999999999999999999.999999999"), + ], + "str_col": ["abc", None, "def"], + "time_col": [ + datetime.time(0, 0, 0), + None, + datetime.time(23, 59, 59, 999999), + ], + "ts_col": [ + datetime.datetime(1, 1, 1, 0, 0, 0, tzinfo=pytz.utc), + None, + datetime.datetime( + 9999, 12, 31, 23, 59, 59, 999999, tzinfo=pytz.utc + ), + ], + }, + dtype="object", + ) + + dataset_id = _make_dataset_id("bq_load_test") + self.temp_dataset(dataset_id) + table_id = "{}.{}.load_table_from_dataframe_w_explicit_schema".format( + Config.CLIENT.project, dataset_id + ) + + job_config = bigquery.LoadJobConfig(schema=table_schema) + load_job = Config.CLIENT.load_table_from_dataframe( + dataframe, table_id, job_config=job_config + ) + load_job.result() + + table = Config.CLIENT.get_table(table_id) + self.assertEqual(tuple(table.schema), table_schema) + self.assertEqual(table.num_rows, 3) + def test_load_avro_from_uri_then_dump_table(self): from google.cloud.bigquery.job import CreateDisposition from google.cloud.bigquery.job import SourceFormat diff --git a/packages/google-cloud-bigquery/tests/unit/test__pandas_helpers.py b/packages/google-cloud-bigquery/tests/unit/test__pandas_helpers.py new file mode 100644 index 000000000000..f04f95307806 --- /dev/null +++ b/packages/google-cloud-bigquery/tests/unit/test__pandas_helpers.py @@ -0,0 +1,458 @@ +# Copyright 2019 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import datetime +import decimal +import functools + +try: + import pandas +except ImportError: # pragma: NO COVER + pandas = None +try: + import pyarrow + import pyarrow.types +except ImportError: # pragma: NO COVER + pyarrow = None +import pytest + +from google.cloud.bigquery import schema + + +@pytest.fixture +def module_under_test(): + from google.cloud.bigquery import _pandas_helpers + + return _pandas_helpers + + +def is_none(value): + return value is None + + +def is_datetime(type_): + # See: https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#datetime-type + return all_( + pyarrow.types.is_timestamp, + lambda type_: type_.unit == "us", + lambda type_: type_.tz is None, + )(type_) + + +def is_numeric(type_): + # See: https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#numeric-type + return all_( + pyarrow.types.is_decimal, + lambda type_: type_.precision == 38, + lambda type_: type_.scale == 9, + )(type_) + + +def is_timestamp(type_): + # See: https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#timestamp-type + return all_( + pyarrow.types.is_timestamp, + lambda type_: type_.unit == "us", + lambda type_: type_.tz == "UTC", + )(type_) + + +def do_all(functions, value): + return all((func(value) for func in functions)) + + +def all_(*functions): + return functools.partial(do_all, functions) + + +@pytest.mark.skipIf(pyarrow is None, "Requires `pyarrow`") +def test_is_datetime(): + assert is_datetime(pyarrow.timestamp("us", tz=None)) + assert not is_datetime(pyarrow.timestamp("ms", tz=None)) + assert not is_datetime(pyarrow.timestamp("us", tz="UTC")) + assert not is_datetime(pyarrow.string()) + + +def test_do_all(): + assert do_all((lambda _: True, lambda _: True), None) + assert not do_all((lambda _: True, lambda _: False), None) + assert not do_all((lambda _: False,), None) + + +def test_all_(): + assert all_(lambda _: True, lambda _: True)(None) + assert not all_(lambda _: True, lambda _: False)(None) + + +@pytest.mark.parametrize( + "bq_type,bq_mode,is_correct_type", + [ + ("STRING", "NULLABLE", pyarrow.types.is_string), + ("STRING", None, pyarrow.types.is_string), + ("string", "NULLABLE", pyarrow.types.is_string), + ("StRiNg", "NULLABLE", pyarrow.types.is_string), + ("BYTES", "NULLABLE", pyarrow.types.is_binary), + ("INTEGER", "NULLABLE", pyarrow.types.is_int64), + ("INT64", "NULLABLE", pyarrow.types.is_int64), + ("FLOAT", "NULLABLE", pyarrow.types.is_float64), + ("FLOAT64", "NULLABLE", pyarrow.types.is_float64), + ("NUMERIC", "NULLABLE", is_numeric), + ("BOOLEAN", "NULLABLE", pyarrow.types.is_boolean), + ("BOOL", "NULLABLE", pyarrow.types.is_boolean), + ("TIMESTAMP", "NULLABLE", is_timestamp), + ("DATE", "NULLABLE", pyarrow.types.is_date32), + ("TIME", "NULLABLE", pyarrow.types.is_time64), + ("DATETIME", "NULLABLE", is_datetime), + ("GEOGRAPHY", "NULLABLE", pyarrow.types.is_string), + ("UNKNOWN_TYPE", "NULLABLE", is_none), + # Use pyarrow.list_(item_type) for repeated (array) fields. + ( + "STRING", + "REPEATED", + all_( + pyarrow.types.is_list, + lambda type_: pyarrow.types.is_string(type_.value_type), + ), + ), + ( + "STRING", + "repeated", + all_( + pyarrow.types.is_list, + lambda type_: pyarrow.types.is_string(type_.value_type), + ), + ), + ( + "STRING", + "RePeAtEd", + all_( + pyarrow.types.is_list, + lambda type_: pyarrow.types.is_string(type_.value_type), + ), + ), + ( + "BYTES", + "REPEATED", + all_( + pyarrow.types.is_list, + lambda type_: pyarrow.types.is_binary(type_.value_type), + ), + ), + ( + "INTEGER", + "REPEATED", + all_( + pyarrow.types.is_list, + lambda type_: pyarrow.types.is_int64(type_.value_type), + ), + ), + ( + "INT64", + "REPEATED", + all_( + pyarrow.types.is_list, + lambda type_: pyarrow.types.is_int64(type_.value_type), + ), + ), + ( + "FLOAT", + "REPEATED", + all_( + pyarrow.types.is_list, + lambda type_: pyarrow.types.is_float64(type_.value_type), + ), + ), + ( + "FLOAT64", + "REPEATED", + all_( + pyarrow.types.is_list, + lambda type_: pyarrow.types.is_float64(type_.value_type), + ), + ), + ( + "NUMERIC", + "REPEATED", + all_(pyarrow.types.is_list, lambda type_: is_numeric(type_.value_type)), + ), + ( + "BOOLEAN", + "REPEATED", + all_( + pyarrow.types.is_list, + lambda type_: pyarrow.types.is_boolean(type_.value_type), + ), + ), + ( + "BOOL", + "REPEATED", + all_( + pyarrow.types.is_list, + lambda type_: pyarrow.types.is_boolean(type_.value_type), + ), + ), + ( + "TIMESTAMP", + "REPEATED", + all_(pyarrow.types.is_list, lambda type_: is_timestamp(type_.value_type)), + ), + ( + "DATE", + "REPEATED", + all_( + pyarrow.types.is_list, + lambda type_: pyarrow.types.is_date32(type_.value_type), + ), + ), + ( + "TIME", + "REPEATED", + all_( + pyarrow.types.is_list, + lambda type_: pyarrow.types.is_time64(type_.value_type), + ), + ), + ( + "DATETIME", + "REPEATED", + all_(pyarrow.types.is_list, lambda type_: is_datetime(type_.value_type)), + ), + ( + "GEOGRAPHY", + "REPEATED", + all_( + pyarrow.types.is_list, + lambda type_: pyarrow.types.is_string(type_.value_type), + ), + ), + ("RECORD", "REPEATED", is_none), + ("UNKNOWN_TYPE", "REPEATED", is_none), + ], +) +@pytest.mark.skipIf(pyarrow is None, "Requires `pyarrow`") +def test_bq_to_arrow_data_type(module_under_test, bq_type, bq_mode, is_correct_type): + field = schema.SchemaField("ignored_name", bq_type, mode=bq_mode) + actual = module_under_test.bq_to_arrow_data_type(field) + assert is_correct_type(actual) + + +@pytest.mark.parametrize("bq_type", ["RECORD", "record", "STRUCT", "struct"]) +@pytest.mark.skipIf(pyarrow is None, "Requires `pyarrow`") +def test_bq_to_arrow_data_type_w_struct(module_under_test, bq_type): + fields = ( + schema.SchemaField("field01", "STRING"), + schema.SchemaField("field02", "BYTES"), + schema.SchemaField("field03", "INTEGER"), + schema.SchemaField("field04", "INT64"), + schema.SchemaField("field05", "FLOAT"), + schema.SchemaField("field06", "FLOAT64"), + schema.SchemaField("field07", "NUMERIC"), + schema.SchemaField("field08", "BOOLEAN"), + schema.SchemaField("field09", "BOOL"), + schema.SchemaField("field10", "TIMESTAMP"), + schema.SchemaField("field11", "DATE"), + schema.SchemaField("field12", "TIME"), + schema.SchemaField("field13", "DATETIME"), + schema.SchemaField("field14", "GEOGRAPHY"), + ) + field = schema.SchemaField("ignored_name", bq_type, mode="NULLABLE", fields=fields) + actual = module_under_test.bq_to_arrow_data_type(field) + expected = pyarrow.struct( + ( + pyarrow.field("field01", pyarrow.string()), + pyarrow.field("field02", pyarrow.binary()), + pyarrow.field("field03", pyarrow.int64()), + pyarrow.field("field04", pyarrow.int64()), + pyarrow.field("field05", pyarrow.float64()), + pyarrow.field("field06", pyarrow.float64()), + pyarrow.field("field07", module_under_test.pyarrow_numeric()), + pyarrow.field("field08", pyarrow.bool_()), + pyarrow.field("field09", pyarrow.bool_()), + pyarrow.field("field10", module_under_test.pyarrow_timestamp()), + pyarrow.field("field11", pyarrow.date32()), + pyarrow.field("field12", module_under_test.pyarrow_time()), + pyarrow.field("field13", module_under_test.pyarrow_datetime()), + pyarrow.field("field14", pyarrow.string()), + ) + ) + assert pyarrow.types.is_struct(actual) + assert actual.num_children == len(fields) + assert actual.equals(expected) + + +@pytest.mark.skipIf(pyarrow is None, "Requires `pyarrow`") +def test_bq_to_arrow_data_type_w_struct_unknown_subfield(module_under_test): + fields = ( + schema.SchemaField("field1", "STRING"), + schema.SchemaField("field2", "INTEGER"), + # Don't know what to convert UNKNOWN_TYPE to, let type inference work, + # instead. + schema.SchemaField("field3", "UNKNOWN_TYPE"), + ) + field = schema.SchemaField("ignored_name", "RECORD", mode="NULLABLE", fields=fields) + actual = module_under_test.bq_to_arrow_data_type(field) + assert actual is None + + +@pytest.mark.parametrize( + "bq_type,rows", + [ + ("STRING", ["abc", None, "def", None]), + ("BYTES", [b"abc", None, b"def", None]), + ("INTEGER", [123, None, 456, None]), + ("INT64", [-9223372036854775808, None, 9223372036854775807, 123]), + ("FLOAT", [1.25, None, 3.5, None]), + ( + "NUMERIC", + [ + decimal.Decimal("-99999999999999999999999999999.999999999"), + None, + decimal.Decimal("99999999999999999999999999999.999999999"), + decimal.Decimal("999.123456789"), + ], + ), + ("BOOLEAN", [True, None, False, None]), + ("BOOL", [False, None, True, None]), + # TODO: Once https://issues.apache.org/jira/browse/ARROW-5450 is + # resolved, test with TIMESTAMP column. Conversion from pyarrow + # TimestampArray to list of Python objects fails with OverflowError: + # Python int too large to convert to C long. + # + # ( + # "TIMESTAMP", + # [ + # datetime.datetime(1, 1, 1, 0, 0, 0, tzinfo=pytz.utc), + # None, + # datetime.datetime(9999, 12, 31, 23, 59, 59, 999999, tzinfo=pytz.utc), + # datetime.datetime(1970, 1, 1, 0, 0, 0, tzinfo=pytz.utc), + # ], + # ), + ( + "DATE", + [ + datetime.date(1, 1, 1), + None, + datetime.date(9999, 12, 31), + datetime.date(1970, 1, 1), + ], + ), + ( + "TIME", + [ + datetime.time(0, 0, 0), + None, + datetime.time(23, 59, 59, 999999), + datetime.time(12, 0, 0), + ], + ), + # TODO: Once https://issues.apache.org/jira/browse/ARROW-5450 is + # resolved, test with DATETIME column. Conversion from pyarrow + # TimestampArray to list of Python objects fails with OverflowError: + # Python int too large to convert to C long. + # + # ( + # "DATETIME", + # [ + # datetime.datetime(1, 1, 1, 0, 0, 0), + # None, + # datetime.datetime(9999, 12, 31, 23, 59, 59, 999999), + # datetime.datetime(1970, 1, 1, 0, 0, 0), + # ], + # ), + ( + "GEOGRAPHY", + [ + "POINT(30, 10)", + None, + "LINESTRING (30 10, 10 30, 40 40)", + "POLYGON ((30 10, 40 40, 20 40, 10 20, 30 10))", + ], + ), + ], +) +@pytest.mark.skipIf(pandas is None, "Requires `pandas`") +@pytest.mark.skipIf(pyarrow is None, "Requires `pyarrow`") +def test_bq_to_arrow_array_w_nullable_scalars(module_under_test, bq_type, rows): + series = pandas.Series(rows, dtype="object") + bq_field = schema.SchemaField("field_name", bq_type) + arrow_array = module_under_test.bq_to_arrow_array(series, bq_field) + roundtrip = arrow_array.to_pylist() + assert rows == roundtrip + + +@pytest.mark.skipIf(pandas is None, "Requires `pandas`") +@pytest.mark.skipIf(pyarrow is None, "Requires `pyarrow`") +def test_bq_to_arrow_array_w_arrays(module_under_test): + rows = [[1, 2, 3], [], [4, 5, 6]] + series = pandas.Series(rows, dtype="object") + bq_field = schema.SchemaField("field_name", "INTEGER", mode="REPEATED") + arrow_array = module_under_test.bq_to_arrow_array(series, bq_field) + roundtrip = arrow_array.to_pylist() + assert rows == roundtrip + + +@pytest.mark.parametrize("bq_type", ["RECORD", "record", "STRUCT", "struct"]) +@pytest.mark.skipIf(pandas is None, "Requires `pandas`") +@pytest.mark.skipIf(pyarrow is None, "Requires `pyarrow`") +def test_bq_to_arrow_array_w_structs(module_under_test, bq_type): + rows = [ + {"int_col": 123, "string_col": "abc"}, + None, + {"int_col": 456, "string_col": "def"}, + ] + series = pandas.Series(rows, dtype="object") + bq_field = schema.SchemaField( + "field_name", + bq_type, + fields=( + schema.SchemaField("int_col", "INTEGER"), + schema.SchemaField("string_col", "STRING"), + ), + ) + arrow_array = module_under_test.bq_to_arrow_array(series, bq_field) + roundtrip = arrow_array.to_pylist() + assert rows == roundtrip + + +@pytest.mark.skipIf(pandas is None, "Requires `pandas`") +@pytest.mark.skipIf(pyarrow is None, "Requires `pyarrow`") +def test_bq_to_arrow_array_w_special_floats(module_under_test): + bq_field = schema.SchemaField("field_name", "FLOAT64") + rows = [float("-inf"), float("nan"), float("inf"), None] + series = pandas.Series(rows, dtype="object") + arrow_array = module_under_test.bq_to_arrow_array(series, bq_field) + roundtrip = arrow_array.to_pylist() + assert len(rows) == len(roundtrip) + assert roundtrip[0] == float("-inf") + assert roundtrip[1] != roundtrip[1] # NaN doesn't equal itself. + assert roundtrip[2] == float("inf") + assert roundtrip[3] is None + + +@pytest.mark.skipIf(pandas is None, "Requires `pandas`") +def test_to_parquet_without_pyarrow(module_under_test, monkeypatch): + monkeypatch.setattr(module_under_test, "pyarrow", None) + with pytest.raises(ValueError) as exc: + module_under_test.to_parquet(pandas.DataFrame(), (), None) + assert "pyarrow is required" in str(exc) + + +@pytest.mark.skipIf(pandas is None, "Requires `pandas`") +@pytest.mark.skipIf(pyarrow is None, "Requires `pyarrow`") +def test_to_parquet_w_missing_columns(module_under_test, monkeypatch): + with pytest.raises(ValueError) as exc: + module_under_test.to_parquet( + pandas.DataFrame(), (schema.SchemaField("not_found", "STRING"),), None + ) + assert "columns in schema must match" in str(exc) diff --git a/packages/google-cloud-bigquery/tests/unit/test_client.py b/packages/google-cloud-bigquery/tests/unit/test_client.py index 13889f90d7e8..dd98f2bcce64 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_client.py +++ b/packages/google-cloud-bigquery/tests/unit/test_client.py @@ -5000,6 +5000,49 @@ def test_load_table_from_dataframe_w_custom_job_config(self): assert sent_config is job_config assert sent_config.source_format == job.SourceFormat.PARQUET + @unittest.skipIf(pandas is None, "Requires `pandas`") + @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") + def test_load_table_from_dataframe_w_nulls(self): + """Test that a DataFrame with null columns can be uploaded if a + BigQuery schema is specified. + + See: https://github.com/googleapis/google-cloud-python/issues/7370 + """ + from google.cloud.bigquery.schema import SchemaField + from google.cloud.bigquery.client import _DEFAULT_NUM_RETRIES + from google.cloud.bigquery import job + + client = self._make_client() + records = [{"name": None, "age": None}, {"name": None, "age": None}] + dataframe = pandas.DataFrame(records) + schema = [SchemaField("name", "STRING"), SchemaField("age", "INTEGER")] + job_config = job.LoadJobConfig(schema=schema) + + load_patch = mock.patch( + "google.cloud.bigquery.client.Client.load_table_from_file", autospec=True + ) + with load_patch as load_table_from_file: + client.load_table_from_dataframe( + dataframe, self.TABLE_REF, job_config=job_config, location=self.LOCATION + ) + + load_table_from_file.assert_called_once_with( + client, + mock.ANY, + self.TABLE_REF, + num_retries=_DEFAULT_NUM_RETRIES, + rewind=True, + job_id=mock.ANY, + job_id_prefix=None, + location=self.LOCATION, + project=None, + job_config=mock.ANY, + ) + + sent_config = load_table_from_file.mock_calls[0][2]["job_config"] + assert sent_config is job_config + assert sent_config.source_format == job.SourceFormat.PARQUET + # Low-level tests @classmethod From f5a0cf314a9e4dc986838c63a8b8f7829b1b1eea Mon Sep 17 00:00:00 2001 From: Victor Mota Date: Fri, 31 May 2019 09:46:39 -0700 Subject: [PATCH 0595/2016] Adds _connection object to bigquery magics context. (#8192) * Adds _connection object to bigquery magics context. * Moves make_connection to helpers.py and adds unit test for no connection in context. --- .../google/cloud/bigquery/magics.py | 3 + .../tests/unit/helpers.py | 24 ++ .../tests/unit/test_client.py | 245 +++++++++--------- .../tests/unit/test_magics.py | 109 ++++++++ 4 files changed, 254 insertions(+), 127 deletions(-) create mode 100644 packages/google-cloud-bigquery/tests/unit/helpers.py diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/magics.py b/packages/google-cloud-bigquery/google/cloud/bigquery/magics.py index 27626752d8e7..6bd1c45dfcd5 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/magics.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/magics.py @@ -159,6 +159,7 @@ class Context(object): def __init__(self): self._credentials = None self._project = None + self._connection = None self._use_bqstorage_api = None @property @@ -363,6 +364,8 @@ def _cell_magic(line, query): project = args.project or context.project client = bigquery.Client(project=project, credentials=context.credentials) + if context._connection: + client._connection = context._connection bqstorage_client = _make_bqstorage_client( args.use_bqstorage_api or context.use_bqstorage_api, context.credentials ) diff --git a/packages/google-cloud-bigquery/tests/unit/helpers.py b/packages/google-cloud-bigquery/tests/unit/helpers.py new file mode 100644 index 000000000000..5b731a763a99 --- /dev/null +++ b/packages/google-cloud-bigquery/tests/unit/helpers.py @@ -0,0 +1,24 @@ +# Copyright 2018 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +def make_connection(*responses): + import google.cloud.bigquery._http + import mock + from google.cloud.exceptions import NotFound + + mock_conn = mock.create_autospec(google.cloud.bigquery._http.Connection) + mock_conn.user_agent = "testing 1.2.3" + mock_conn.api_request.side_effect = list(responses) + [NotFound("miss")] + return mock_conn diff --git a/packages/google-cloud-bigquery/tests/unit/test_client.py b/packages/google-cloud-bigquery/tests/unit/test_client.py index dd98f2bcce64..3fdfeeb9a68b 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_client.py +++ b/packages/google-cloud-bigquery/tests/unit/test_client.py @@ -40,6 +40,7 @@ import google.api_core.exceptions from google.api_core.gapic_v1 import client_info import google.cloud._helpers +from tests.unit.helpers import make_connection from google.cloud.bigquery.dataset import DatasetReference @@ -49,16 +50,6 @@ def _make_credentials(): return mock.Mock(spec=google.auth.credentials.Credentials) -def _make_connection(*responses): - import google.cloud.bigquery._http - from google.cloud.exceptions import NotFound - - mock_conn = mock.create_autospec(google.cloud.bigquery._http.Connection) - mock_conn.user_agent = "testing 1.2.3" - mock_conn.api_request.side_effect = list(responses) + [NotFound("miss")] - return mock_conn - - def _make_list_partitons_meta_info(project, dataset_id, table_id, num_rows=0): return { "tableReference": { @@ -163,7 +154,7 @@ def test__get_query_results_miss_w_explicit_project_and_timeout(self): creds = _make_credentials() client = self._make_one(self.PROJECT, creds) - conn = client._connection = _make_connection() + conn = client._connection = make_connection() with self.assertRaises(NotFound): client._get_query_results( @@ -185,7 +176,7 @@ def test__get_query_results_miss_w_client_location(self): creds = _make_credentials() client = self._make_one(self.PROJECT, creds, location=self.LOCATION) - conn = client._connection = _make_connection() + conn = client._connection = make_connection() with self.assertRaises(NotFound): client._get_query_results("nothere", None) @@ -216,7 +207,7 @@ def test__get_query_results_hit(self): creds = _make_credentials() client = self._make_one(self.PROJECT, creds) - client._connection = _make_connection(data) + client._connection = make_connection(data) query_results = client._get_query_results(job_id, None) self.assertEqual(query_results.total_rows, 10) @@ -229,7 +220,7 @@ def test_get_service_account_email(self): client = self._make_one(project=self.PROJECT, credentials=creds, _http=http) email = "bq-123@bigquery-encryption.iam.gserviceaccount.com" resource = {"kind": "bigquery#getServiceAccountResponse", "email": email} - conn = client._connection = _make_connection(resource) + conn = client._connection = make_connection(resource) service_account_email = client.get_service_account_email() @@ -244,7 +235,7 @@ def test_get_service_account_email_w_alternate_project(self): client = self._make_one(project=self.PROJECT, credentials=creds, _http=http) email = "bq-123@bigquery-encryption.iam.gserviceaccount.com" resource = {"kind": "bigquery#getServiceAccountResponse", "email": email} - conn = client._connection = _make_connection(resource) + conn = client._connection = make_connection(resource) service_account_email = client.get_service_account_email(project=project) @@ -278,7 +269,7 @@ def test_list_projects_defaults(self): } creds = _make_credentials() client = self._make_one(PROJECT_1, creds) - conn = client._connection = _make_connection(DATA) + conn = client._connection = make_connection(DATA) iterator = client.list_projects() page = six.next(iterator.pages) @@ -302,7 +293,7 @@ def test_list_projects_explicit_response_missing_projects_key(self): DATA = {} creds = _make_credentials() client = self._make_one(self.PROJECT, creds) - conn = client._connection = _make_connection(DATA) + conn = client._connection = make_connection(DATA) iterator = client.list_projects(max_results=3, page_token=TOKEN) page = six.next(iterator.pages) @@ -350,7 +341,7 @@ def test_list_datasets_defaults(self): } creds = _make_credentials() client = self._make_one(self.PROJECT, creds) - conn = client._connection = _make_connection(DATA) + conn = client._connection = make_connection(DATA) iterator = client.list_datasets() page = six.next(iterator.pages) @@ -371,7 +362,7 @@ def test_list_datasets_defaults(self): def test_list_datasets_w_project(self): creds = _make_credentials() client = self._make_one(self.PROJECT, creds) - conn = client._connection = _make_connection({}) + conn = client._connection = make_connection({}) list(client.list_datasets(project="other-project")) @@ -386,7 +377,7 @@ def test_list_datasets_explicit_response_missing_datasets_key(self): DATA = {} creds = _make_credentials() client = self._make_one(self.PROJECT, creds) - conn = client._connection = _make_connection(DATA) + conn = client._connection = make_connection(DATA) iterator = client.list_datasets( include_all=True, filter=FILTER, max_results=3, page_token=TOKEN @@ -442,7 +433,7 @@ def test_get_dataset(self): "id": "%s:%s" % (self.PROJECT, self.DS_ID), "datasetReference": {"projectId": self.PROJECT, "datasetId": self.DS_ID}, } - conn = client._connection = _make_connection(resource) + conn = client._connection = make_connection(resource) dataset_ref = client.dataset(self.DS_ID) dataset = client.get_dataset(dataset_ref) @@ -453,31 +444,31 @@ def test_get_dataset(self): # Test retry. # Not a cloud API exception (missing 'errors' field). - client._connection = _make_connection(Exception(""), resource) + client._connection = make_connection(Exception(""), resource) with self.assertRaises(Exception): client.get_dataset(dataset_ref) # Zero-length errors field. - client._connection = _make_connection(ServerError(""), resource) + client._connection = make_connection(ServerError(""), resource) with self.assertRaises(ServerError): client.get_dataset(dataset_ref) # Non-retryable reason. - client._connection = _make_connection( + client._connection = make_connection( ServerError("", errors=[{"reason": "serious"}]), resource ) with self.assertRaises(ServerError): client.get_dataset(dataset_ref) # Retryable reason, but retry is disabled. - client._connection = _make_connection( + client._connection = make_connection( ServerError("", errors=[{"reason": "backendError"}]), resource ) with self.assertRaises(ServerError): client.get_dataset(dataset_ref, retry=None) # Retryable reason, default retry: success. - client._connection = _make_connection( + client._connection = make_connection( ServerError("", errors=[{"reason": "backendError"}]), resource ) dataset = client.get_dataset( @@ -497,7 +488,7 @@ def test_create_dataset_minimal(self): } creds = _make_credentials() client = self._make_one(project=self.PROJECT, credentials=creds) - conn = client._connection = _make_connection(RESOURCE) + conn = client._connection = make_connection(RESOURCE) ds_ref = client.dataset(self.DS_ID) before = Dataset(ds_ref) @@ -548,7 +539,7 @@ def test_create_dataset_w_attrs(self): } creds = _make_credentials() client = self._make_one(project=self.PROJECT, credentials=creds) - conn = client._connection = _make_connection(RESOURCE) + conn = client._connection = make_connection(RESOURCE) entries = [ AccessEntry("OWNER", "userByEmail", USER_EMAIL), AccessEntry(None, "view", VIEW), @@ -607,7 +598,7 @@ def test_create_dataset_w_custom_property(self): } creds = _make_credentials() client = self._make_one(project=self.PROJECT, credentials=creds) - conn = client._connection = _make_connection(resource) + conn = client._connection = make_connection(resource) ds_ref = client.dataset(self.DS_ID) before = Dataset(ds_ref) @@ -646,7 +637,7 @@ def test_create_dataset_w_client_location_wo_dataset_location(self): client = self._make_one( project=self.PROJECT, credentials=creds, location=self.LOCATION ) - conn = client._connection = _make_connection(RESOURCE) + conn = client._connection = make_connection(RESOURCE) ds_ref = client.dataset(self.DS_ID) before = Dataset(ds_ref) @@ -687,7 +678,7 @@ def test_create_dataset_w_client_location_w_dataset_location(self): client = self._make_one( project=self.PROJECT, credentials=creds, location=self.LOCATION ) - conn = client._connection = _make_connection(RESOURCE) + conn = client._connection = make_connection(RESOURCE) ds_ref = client.dataset(self.DS_ID) before = Dataset(ds_ref) @@ -726,7 +717,7 @@ def test_create_dataset_w_reference(self): client = self._make_one( project=self.PROJECT, credentials=creds, location=self.LOCATION ) - conn = client._connection = _make_connection(resource) + conn = client._connection = make_connection(resource) dataset = client.create_dataset(client.dataset(self.DS_ID)) @@ -761,7 +752,7 @@ def test_create_dataset_w_fully_qualified_string(self): client = self._make_one( project=self.PROJECT, credentials=creds, location=self.LOCATION ) - conn = client._connection = _make_connection(resource) + conn = client._connection = make_connection(resource) dataset = client.create_dataset("{}.{}".format(self.PROJECT, self.DS_ID)) @@ -796,7 +787,7 @@ def test_create_dataset_w_string(self): client = self._make_one( project=self.PROJECT, credentials=creds, location=self.LOCATION ) - conn = client._connection = _make_connection(resource) + conn = client._connection = make_connection(resource) dataset = client.create_dataset(self.DS_ID) @@ -824,7 +815,7 @@ def test_create_dataset_alreadyexists_w_exists_ok_false(self): client = self._make_one( project=self.PROJECT, credentials=creds, location=self.LOCATION ) - client._connection = _make_connection( + client._connection = make_connection( google.api_core.exceptions.AlreadyExists("dataset already exists") ) @@ -844,7 +835,7 @@ def test_create_dataset_alreadyexists_w_exists_ok_true(self): client = self._make_one( project=self.PROJECT, credentials=creds, location=self.LOCATION ) - conn = client._connection = _make_connection( + conn = client._connection = make_connection( google.api_core.exceptions.AlreadyExists("dataset already exists"), resource ) @@ -882,7 +873,7 @@ def test_create_table_w_day_partition(self): creds = _make_credentials() client = self._make_one(project=self.PROJECT, credentials=creds) resource = self._make_table_resource() - conn = client._connection = _make_connection(resource) + conn = client._connection = make_connection(resource) table = Table(self.TABLE_REF) table.time_partitioning = TimePartitioning() @@ -914,7 +905,7 @@ def test_create_table_w_custom_property(self): client = self._make_one(project=self.PROJECT, credentials=creds) resource = self._make_table_resource() resource["newAlphaProperty"] = "unreleased property" - conn = client._connection = _make_connection(resource) + conn = client._connection = make_connection(resource) table = Table(self.TABLE_REF) table._properties["newAlphaProperty"] = "unreleased property" @@ -944,7 +935,7 @@ def test_create_table_w_encryption_configuration(self): creds = _make_credentials() client = self._make_one(project=self.PROJECT, credentials=creds) resource = self._make_table_resource() - conn = client._connection = _make_connection(resource) + conn = client._connection = make_connection(resource) table = Table(self.TABLE_REF) table.encryption_configuration = EncryptionConfiguration( kms_key_name=self.KMS_KEY_NAME @@ -975,7 +966,7 @@ def test_create_table_w_day_partition_and_expire(self): creds = _make_credentials() client = self._make_one(project=self.PROJECT, credentials=creds) resource = self._make_table_resource() - conn = client._connection = _make_connection(resource) + conn = client._connection = make_connection(resource) table = Table(self.TABLE_REF) table.time_partitioning = TimePartitioning(expiration_ms=100) @@ -1031,7 +1022,7 @@ def test_create_table_w_schema_and_query(self): SchemaField("full_name", "STRING", mode="REQUIRED"), SchemaField("age", "INTEGER", mode="REQUIRED"), ] - conn = client._connection = _make_connection(resource) + conn = client._connection = make_connection(resource) table = Table(self.TABLE_REF, schema=schema) table.view_query = query @@ -1089,7 +1080,7 @@ def test_create_table_w_external(self): } } ) - conn = client._connection = _make_connection(resource) + conn = client._connection = make_connection(resource) table = Table(self.TABLE_REF) ec = ExternalConfig("CSV") ec.autodetect = True @@ -1126,7 +1117,7 @@ def test_create_table_w_reference(self): creds = _make_credentials() client = self._make_one(project=self.PROJECT, credentials=creds) resource = self._make_table_resource() - conn = client._connection = _make_connection(resource) + conn = client._connection = make_connection(resource) got = client.create_table(self.TABLE_REF) @@ -1149,7 +1140,7 @@ def test_create_table_w_fully_qualified_string(self): creds = _make_credentials() client = self._make_one(project=self.PROJECT, credentials=creds) resource = self._make_table_resource() - conn = client._connection = _make_connection(resource) + conn = client._connection = make_connection(resource) got = client.create_table( "{}.{}.{}".format(self.PROJECT, self.DS_ID, self.TABLE_ID) @@ -1174,7 +1165,7 @@ def test_create_table_w_string(self): creds = _make_credentials() client = self._make_one(project=self.PROJECT, credentials=creds) resource = self._make_table_resource() - conn = client._connection = _make_connection(resource) + conn = client._connection = make_connection(resource) got = client.create_table("{}.{}".format(self.DS_ID, self.TABLE_ID)) @@ -1198,7 +1189,7 @@ def test_create_table_alreadyexists_w_exists_ok_false(self): client = self._make_one( project=self.PROJECT, credentials=creds, location=self.LOCATION ) - conn = client._connection = _make_connection( + conn = client._connection = make_connection( google.api_core.exceptions.AlreadyExists("table already exists") ) @@ -1228,7 +1219,7 @@ def test_create_table_alreadyexists_w_exists_ok_true(self): client = self._make_one( project=self.PROJECT, credentials=creds, location=self.LOCATION ) - conn = client._connection = _make_connection( + conn = client._connection = make_connection( google.api_core.exceptions.AlreadyExists("table already exists"), resource ) @@ -1274,7 +1265,7 @@ def test_get_model(self): "modelId": self.MODEL_ID, } } - conn = client._connection = _make_connection(resource) + conn = client._connection = make_connection(resource) model_ref = client.dataset(self.DS_ID).model(self.MODEL_ID) got = client.get_model(model_ref) @@ -1298,7 +1289,7 @@ def test_get_model_w_string(self): "modelId": self.MODEL_ID, } } - conn = client._connection = _make_connection(resource) + conn = client._connection = make_connection(resource) model_id = "{}.{}.{}".format(self.PROJECT, self.DS_ID, self.MODEL_ID) got = client.get_model(model_id) @@ -1316,7 +1307,7 @@ def test_get_table(self): http = object() client = self._make_one(project=self.PROJECT, credentials=creds, _http=http) resource = self._make_table_resource() - conn = client._connection = _make_connection(resource) + conn = client._connection = make_connection(resource) table = client.get_table(self.TABLE_REF) conn.api_request.assert_called_once_with(method="GET", path="/%s" % path) @@ -1384,7 +1375,7 @@ def test_update_dataset(self): } creds = _make_credentials() client = self._make_one(project=self.PROJECT, credentials=creds) - conn = client._connection = _make_connection(RESOURCE, RESOURCE) + conn = client._connection = make_connection(RESOURCE, RESOURCE) ds = Dataset(client.dataset(self.DS_ID)) ds.description = DESCRIPTION ds.friendly_name = FRIENDLY_NAME @@ -1431,7 +1422,7 @@ def test_update_dataset_w_custom_property(self): } creds = _make_credentials() client = self._make_one(project=self.PROJECT, credentials=creds) - conn = client._connection = _make_connection(resource) + conn = client._connection = make_connection(resource) dataset = Dataset(client.dataset(self.DS_ID)) dataset._properties["newAlphaProperty"] = "unreleased property" @@ -1474,7 +1465,7 @@ def test_update_model(self): } creds = _make_credentials() client = self._make_one(project=self.PROJECT, credentials=creds) - conn = client._connection = _make_connection(resource, resource) + conn = client._connection = make_connection(resource, resource) model_id = "{}.{}.{}".format(self.PROJECT, self.DS_ID, self.MODEL_ID) model = Model(model_id) model.description = description @@ -1548,7 +1539,7 @@ def test_update_table(self): ] creds = _make_credentials() client = self._make_one(project=self.PROJECT, credentials=creds) - conn = client._connection = _make_connection(resource, resource) + conn = client._connection = make_connection(resource, resource) table = Table(self.TABLE_REF, schema=schema) table.description = description table.friendly_name = title @@ -1605,7 +1596,7 @@ def test_update_table_w_custom_property(self): resource["newAlphaProperty"] = "unreleased property" creds = _make_credentials() client = self._make_one(project=self.PROJECT, credentials=creds) - conn = client._connection = _make_connection(resource) + conn = client._connection = make_connection(resource) table = Table(self.TABLE_REF) table._properties["newAlphaProperty"] = "unreleased property" @@ -1633,7 +1624,7 @@ def test_update_table_only_use_legacy_sql(self): resource["view"] = {"useLegacySql": True} creds = _make_credentials() client = self._make_one(project=self.PROJECT, credentials=creds) - conn = client._connection = _make_connection(resource) + conn = client._connection = make_connection(resource) table = Table(self.TABLE_REF) table.view_use_legacy_sql = True @@ -1692,7 +1683,7 @@ def test_update_table_w_query(self): ) creds = _make_credentials() client = self._make_one(project=self.PROJECT, credentials=creds) - conn = client._connection = _make_connection(resource) + conn = client._connection = make_connection(resource) table = Table(self.TABLE_REF, schema=schema) table.expires = exp_time table.view_query = query @@ -1740,7 +1731,7 @@ def test_update_table_w_schema_None(self): resource2 = self._make_table_resource() creds = _make_credentials() client = self._make_one(project=self.PROJECT, credentials=creds) - conn = client._connection = _make_connection(resource1, resource2) + conn = client._connection = make_connection(resource1, resource2) table = client.get_table( # Test with string for table ID "{}.{}.{}".format( @@ -1777,7 +1768,7 @@ def test_update_table_delete_property(self): resource2["description"] = None creds = _make_credentials() client = self._make_one(project=self.PROJECT, credentials=creds) - conn = client._connection = _make_connection(resource1, resource2) + conn = client._connection = make_connection(resource1, resource2) table = Table(self.TABLE_REF) table.description = description table.friendly_name = title @@ -1798,7 +1789,7 @@ def test_list_tables_empty(self): path = "/projects/{}/datasets/{}/tables".format(self.PROJECT, self.DS_ID) creds = _make_credentials() client = self._make_one(project=self.PROJECT, credentials=creds) - conn = client._connection = _make_connection({}) + conn = client._connection = make_connection({}) dataset = client.dataset(self.DS_ID) iterator = client.list_tables(dataset) @@ -1817,7 +1808,7 @@ def test_list_models_empty(self): path = "/projects/{}/datasets/{}/models".format(self.PROJECT, self.DS_ID) creds = _make_credentials() client = self._make_one(project=self.PROJECT, credentials=creds) - conn = client._connection = _make_connection({}) + conn = client._connection = make_connection({}) dataset_id = "{}.{}".format(self.PROJECT, self.DS_ID) iterator = client.list_models(dataset_id) @@ -1860,7 +1851,7 @@ def test_list_models_defaults(self): creds = _make_credentials() client = self._make_one(project=self.PROJECT, credentials=creds) - conn = client._connection = _make_connection(DATA) + conn = client._connection = make_connection(DATA) dataset = client.dataset(self.DS_ID) iterator = client.list_models(dataset) @@ -1920,7 +1911,7 @@ def test_list_tables_defaults(self): creds = _make_credentials() client = self._make_one(project=self.PROJECT, credentials=creds) - conn = client._connection = _make_connection(DATA) + conn = client._connection = make_connection(DATA) dataset = client.dataset(self.DS_ID) iterator = client.list_tables(dataset) @@ -1974,7 +1965,7 @@ def test_list_tables_explicit(self): creds = _make_credentials() client = self._make_one(project=self.PROJECT, credentials=creds) - conn = client._connection = _make_connection(DATA) + conn = client._connection = make_connection(DATA) dataset = client.dataset(self.DS_ID) iterator = client.list_tables( @@ -2016,7 +2007,7 @@ def test_delete_dataset(self): PATH = "projects/%s/datasets/%s" % (self.PROJECT, self.DS_ID) creds = _make_credentials() client = self._make_one(project=self.PROJECT, credentials=creds) - conn = client._connection = _make_connection(*([{}] * len(datasets))) + conn = client._connection = make_connection(*([{}] * len(datasets))) for arg in datasets: client.delete_dataset(arg) conn.api_request.assert_called_with( @@ -2029,7 +2020,7 @@ def test_delete_dataset_delete_contents(self): PATH = "projects/%s/datasets/%s" % (self.PROJECT, self.DS_ID) creds = _make_credentials() client = self._make_one(project=self.PROJECT, credentials=creds) - conn = client._connection = _make_connection({}, {}) + conn = client._connection = make_connection({}, {}) ds_ref = client.dataset(self.DS_ID) for arg in (ds_ref, Dataset(ds_ref)): client.delete_dataset(arg, delete_contents=True) @@ -2050,7 +2041,7 @@ def test_delete_dataset_w_not_found_ok_false(self): creds = _make_credentials() http = object() client = self._make_one(project=self.PROJECT, credentials=creds, _http=http) - conn = client._connection = _make_connection( + conn = client._connection = make_connection( google.api_core.exceptions.NotFound("dataset not found") ) @@ -2064,7 +2055,7 @@ def test_delete_dataset_w_not_found_ok_true(self): creds = _make_credentials() http = object() client = self._make_one(project=self.PROJECT, credentials=creds, _http=http) - conn = client._connection = _make_connection( + conn = client._connection = make_connection( google.api_core.exceptions.NotFound("dataset not found") ) @@ -2089,7 +2080,7 @@ def test_delete_model(self): client.dataset(self.DS_ID).model(self.MODEL_ID), Model(model_id), ) - conn = client._connection = _make_connection(*([{}] * len(models))) + conn = client._connection = make_connection(*([{}] * len(models))) for arg in models: client.delete_model(arg) @@ -2108,7 +2099,7 @@ def test_delete_model_w_not_found_ok_false(self): creds = _make_credentials() http = object() client = self._make_one(project=self.PROJECT, credentials=creds, _http=http) - conn = client._connection = _make_connection( + conn = client._connection = make_connection( google.api_core.exceptions.NotFound("model not found") ) @@ -2124,7 +2115,7 @@ def test_delete_model_w_not_found_ok_true(self): creds = _make_credentials() http = object() client = self._make_one(project=self.PROJECT, credentials=creds, _http=http) - conn = client._connection = _make_connection( + conn = client._connection = make_connection( google.api_core.exceptions.NotFound("model not found") ) @@ -2154,7 +2145,7 @@ def test_delete_table(self): creds = _make_credentials() http = object() client = self._make_one(project=self.PROJECT, credentials=creds, _http=http) - conn = client._connection = _make_connection(*([{}] * len(tables))) + conn = client._connection = make_connection(*([{}] * len(tables))) for arg in tables: client.delete_table(arg) @@ -2173,7 +2164,7 @@ def test_delete_table_w_not_found_ok_false(self): creds = _make_credentials() http = object() client = self._make_one(project=self.PROJECT, credentials=creds, _http=http) - conn = client._connection = _make_connection( + conn = client._connection = make_connection( google.api_core.exceptions.NotFound("table not found") ) @@ -2189,7 +2180,7 @@ def test_delete_table_w_not_found_ok_true(self): creds = _make_credentials() http = object() client = self._make_one(project=self.PROJECT, credentials=creds, _http=http) - conn = client._connection = _make_connection( + conn = client._connection = make_connection( google.api_core.exceptions.NotFound("table not found") ) @@ -2215,7 +2206,7 @@ def test_get_job_miss_w_explict_project(self): JOB_ID = "NONESUCH" creds = _make_credentials() client = self._make_one(self.PROJECT, creds) - conn = client._connection = _make_connection() + conn = client._connection = make_connection() with self.assertRaises(NotFound): client.get_job(JOB_ID, project=OTHER_PROJECT, location=self.LOCATION) @@ -2233,7 +2224,7 @@ def test_get_job_miss_w_client_location(self): JOB_ID = "NONESUCH" creds = _make_credentials() client = self._make_one(self.PROJECT, creds, location=self.LOCATION) - conn = client._connection = _make_connection() + conn = client._connection = make_connection() with self.assertRaises(NotFound): client.get_job(JOB_ID, project=OTHER_PROJECT) @@ -2271,7 +2262,7 @@ def test_get_job_hit(self): } creds = _make_credentials() client = self._make_one(self.PROJECT, creds) - conn = client._connection = _make_connection(ASYNC_QUERY_DATA) + conn = client._connection = make_connection(ASYNC_QUERY_DATA) job = client.get_job(JOB_ID) @@ -2293,7 +2284,7 @@ def test_cancel_job_miss_w_explict_project(self): JOB_ID = "NONESUCH" creds = _make_credentials() client = self._make_one(self.PROJECT, creds) - conn = client._connection = _make_connection() + conn = client._connection = make_connection() with self.assertRaises(NotFound): client.cancel_job(JOB_ID, project=OTHER_PROJECT, location=self.LOCATION) @@ -2311,7 +2302,7 @@ def test_cancel_job_miss_w_client_location(self): JOB_ID = "NONESUCH" creds = _make_credentials() client = self._make_one(self.PROJECT, creds, location=self.LOCATION) - conn = client._connection = _make_connection() + conn = client._connection = make_connection() with self.assertRaises(NotFound): client.cancel_job(JOB_ID, project=OTHER_PROJECT) @@ -2336,7 +2327,7 @@ def test_cancel_job_hit(self): RESOURCE = {"job": QUERY_JOB_RESOURCE} creds = _make_credentials() client = self._make_one(self.PROJECT, creds) - conn = client._connection = _make_connection(RESOURCE) + conn = client._connection = make_connection(RESOURCE) job = client.cancel_job(JOB_ID) @@ -2446,7 +2437,7 @@ def test_list_jobs_defaults(self): } creds = _make_credentials() client = self._make_one(self.PROJECT, creds) - conn = client._connection = _make_connection(DATA) + conn = client._connection = make_connection(DATA) iterator = client.list_jobs() page = six.next(iterator.pages) @@ -2488,7 +2479,7 @@ def test_list_jobs_load_job_wo_sourceUris(self): DATA = {"nextPageToken": TOKEN, "jobs": [LOAD_DATA]} creds = _make_credentials() client = self._make_one(self.PROJECT, creds) - conn = client._connection = _make_connection(DATA) + conn = client._connection = make_connection(DATA) iterator = client.list_jobs() page = six.next(iterator.pages) @@ -2512,7 +2503,7 @@ def test_list_jobs_explicit_missing(self): TOKEN = "TOKEN" creds = _make_credentials() client = self._make_one(self.PROJECT, creds) - conn = client._connection = _make_connection(DATA) + conn = client._connection = make_connection(DATA) iterator = client.list_jobs( max_results=1000, page_token=TOKEN, all_users=True, state_filter="done" @@ -2539,7 +2530,7 @@ def test_list_jobs_explicit_missing(self): def test_list_jobs_w_project(self): creds = _make_credentials() client = self._make_one(self.PROJECT, creds) - conn = client._connection = _make_connection({}) + conn = client._connection = make_connection({}) list(client.list_jobs(project="other-project")) @@ -2552,7 +2543,7 @@ def test_list_jobs_w_project(self): def test_list_jobs_w_time_filter(self): creds = _make_credentials() client = self._make_one(self.PROJECT, creds) - conn = client._connection = _make_connection({}) + conn = client._connection = make_connection({}) # One millisecond after the unix epoch. start_time = datetime.datetime(1970, 1, 1, 0, 0, 0, 1000) @@ -2594,7 +2585,7 @@ def test_load_table_from_uri(self): creds = _make_credentials() http = object() client = self._make_one(project=self.PROJECT, credentials=creds, _http=http) - conn = client._connection = _make_connection(RESOURCE) + conn = client._connection = make_connection(RESOURCE) destination = client.dataset(self.DS_ID).table(DESTINATION) job = client.load_table_from_uri(SOURCE_URI, destination, job_id=JOB) @@ -2610,7 +2601,7 @@ def test_load_table_from_uri(self): self.assertEqual(list(job.source_uris), [SOURCE_URI]) self.assertIs(job.destination, destination) - conn = client._connection = _make_connection(RESOURCE) + conn = client._connection = make_connection(RESOURCE) job = client.load_table_from_uri([SOURCE_URI], destination, job_id=JOB) self.assertIsInstance(job, LoadJob) @@ -2643,7 +2634,7 @@ def test_load_table_from_uri_w_explicit_project(self): creds = _make_credentials() http = object() client = self._make_one(project=self.PROJECT, credentials=creds, _http=http) - conn = client._connection = _make_connection(resource) + conn = client._connection = make_connection(resource) destination = client.dataset(self.DS_ID).table(destination_id) client.load_table_from_uri( @@ -2685,7 +2676,7 @@ def test_load_table_from_uri_w_client_location(self): client = self._make_one( project=self.PROJECT, credentials=creds, _http=http, location=self.LOCATION ) - conn = client._connection = _make_connection(resource) + conn = client._connection = make_connection(resource) client.load_table_from_uri( source_uri, @@ -2731,7 +2722,7 @@ def _initiate_resumable_upload_helper(self, num_retries=None): response_headers = {"location": resumable_url} fake_transport = self._mock_transport(http_client.OK, response_headers) client = self._make_one(project=self.PROJECT, _http=fake_transport) - conn = client._connection = _make_connection() + conn = client._connection = make_connection() # Create some mock arguments and call the method under test. data = b"goodbye gudbi gootbee" @@ -2797,7 +2788,7 @@ def _do_multipart_upload_success_helper(self, get_boundary, num_retries=None): fake_transport = self._mock_transport(http_client.OK, {}) client = self._make_one(project=self.PROJECT, _http=fake_transport) - conn = client._connection = _make_connection() + conn = client._connection = make_connection() # Create some mock arguments. data = b"Bzzzz-zap \x00\x01\xf4" @@ -2872,7 +2863,7 @@ def test_copy_table(self): creds = _make_credentials() http = object() client = self._make_one(project=self.PROJECT, credentials=creds, _http=http) - conn = client._connection = _make_connection(RESOURCE) + conn = client._connection = make_connection(RESOURCE) dataset = client.dataset(self.DS_ID) source = dataset.table(SOURCE) destination = dataset.table(DESTINATION) @@ -2890,7 +2881,7 @@ def test_copy_table(self): self.assertEqual(list(job.sources), [source]) self.assertIs(job.destination, destination) - conn = client._connection = _make_connection(RESOURCE) + conn = client._connection = make_connection(RESOURCE) source2 = dataset.table(SOURCE + "2") job = client.copy_table([source, source2], destination, job_id=JOB) self.assertIsInstance(job, CopyJob) @@ -2929,7 +2920,7 @@ def test_copy_table_w_explicit_project(self): creds = _make_credentials() http = object() client = self._make_one(project=self.PROJECT, credentials=creds, _http=http) - conn = client._connection = _make_connection(resource) + conn = client._connection = make_connection(resource) dataset = client.dataset(self.DS_ID) source = dataset.table(source_id) destination = dataset.table(destination_id) @@ -2979,7 +2970,7 @@ def test_copy_table_w_client_location(self): client = self._make_one( project=self.PROJECT, credentials=creds, _http=http, location=self.LOCATION ) - conn = client._connection = _make_connection(resource) + conn = client._connection = make_connection(resource) client.copy_table( # Test with string for table IDs. @@ -2998,7 +2989,7 @@ def test_copy_table_w_source_strings(self): creds = _make_credentials() http = object() client = self._make_one(project=self.PROJECT, credentials=creds, _http=http) - client._connection = _make_connection({}) + client._connection = make_connection({}) sources = [ "dataset_wo_proj.some_table", "other_project.other_dataset.other_table", @@ -3043,7 +3034,7 @@ def test_extract_table(self): creds = _make_credentials() http = object() client = self._make_one(project=self.PROJECT, credentials=creds, _http=http) - conn = client._connection = _make_connection(RESOURCE) + conn = client._connection = make_connection(RESOURCE) dataset = client.dataset(self.DS_ID) source = dataset.table(SOURCE) @@ -3085,7 +3076,7 @@ def test_extract_table_w_explicit_project(self): creds = _make_credentials() http = object() client = self._make_one(project=self.PROJECT, credentials=creds, _http=http) - conn = client._connection = _make_connection(resource) + conn = client._connection = make_connection(resource) dataset = client.dataset(self.DS_ID) source = dataset.table(source_id) @@ -3128,7 +3119,7 @@ def test_extract_table_w_client_location(self): client = self._make_one( project=self.PROJECT, credentials=creds, _http=http, location=self.LOCATION ) - conn = client._connection = _make_connection(resource) + conn = client._connection = make_connection(resource) client.extract_table( # Test with string for table ID. @@ -3168,7 +3159,7 @@ def test_extract_table_generated_job_id(self): creds = _make_credentials() http = object() client = self._make_one(project=self.PROJECT, credentials=creds, _http=http) - conn = client._connection = _make_connection(RESOURCE) + conn = client._connection = make_connection(RESOURCE) dataset = client.dataset(self.DS_ID) source = dataset.table(SOURCE) job_config = ExtractJobConfig() @@ -3212,7 +3203,7 @@ def test_extract_table_w_destination_uris(self): creds = _make_credentials() http = object() client = self._make_one(project=self.PROJECT, credentials=creds, _http=http) - conn = client._connection = _make_connection(RESOURCE) + conn = client._connection = make_connection(RESOURCE) dataset = client.dataset(self.DS_ID) source = dataset.table(SOURCE) @@ -3242,7 +3233,7 @@ def test_query_defaults(self): creds = _make_credentials() http = object() client = self._make_one(project=self.PROJECT, credentials=creds, _http=http) - conn = client._connection = _make_connection(RESOURCE) + conn = client._connection = make_connection(RESOURCE) job = client.query(QUERY) @@ -3278,7 +3269,7 @@ def test_query_w_explicit_project(self): creds = _make_credentials() http = object() client = self._make_one(project=self.PROJECT, credentials=creds, _http=http) - conn = client._connection = _make_connection(resource) + conn = client._connection = make_connection(resource) client.query( query, job_id=job_id, project="other-project", location=self.LOCATION @@ -3329,7 +3320,7 @@ def test_query_w_explicit_job_config(self): _http=http, default_query_job_config=default_job_config, ) - conn = client._connection = _make_connection(resource) + conn = client._connection = make_connection(resource) job_config = QueryJobConfig() job_config.use_query_cache = True @@ -3381,7 +3372,7 @@ def test_query_w_explicit_job_config_override(self): _http=http, default_query_job_config=default_job_config, ) - conn = client._connection = _make_connection(resource) + conn = client._connection = make_connection(resource) job_config = QueryJobConfig() job_config.use_query_cache = True @@ -3429,7 +3420,7 @@ def test_query_w_client_default_config_no_incoming(self): _http=http, default_query_job_config=default_job_config, ) - conn = client._connection = _make_connection(resource) + conn = client._connection = make_connection(resource) client.query(query, job_id=job_id, location=self.LOCATION) @@ -3454,7 +3445,7 @@ def test_query_w_client_location(self): client = self._make_one( project=self.PROJECT, credentials=creds, _http=http, location=self.LOCATION ) - conn = client._connection = _make_connection(resource) + conn = client._connection = make_connection(resource) client.query(query, job_id=job_id, project="other-project") @@ -3478,7 +3469,7 @@ def test_query_detect_location(self): creds = _make_credentials() http = object() client = self._make_one(project=self.PROJECT, credentials=creds, _http=http) - conn = client._connection = _make_connection(resource) + conn = client._connection = make_connection(resource) job = client.query(query) @@ -3511,7 +3502,7 @@ def test_query_w_udf_resources(self): creds = _make_credentials() http = object() client = self._make_one(project=self.PROJECT, credentials=creds, _http=http) - conn = client._connection = _make_connection(RESOURCE) + conn = client._connection = make_connection(RESOURCE) udf_resources = [UDFResource("resourceUri", RESOURCE_URI)] config = QueryJobConfig() config.udf_resources = udf_resources @@ -3567,7 +3558,7 @@ def test_query_w_query_parameters(self): creds = _make_credentials() http = object() client = self._make_one(project=self.PROJECT, credentials=creds, _http=http) - conn = client._connection = _make_connection(RESOURCE) + conn = client._connection = make_connection(RESOURCE) query_parameters = [ScalarQueryParameter("foo", "INT64", 123)] config = QueryJobConfig() config.query_parameters = query_parameters @@ -3636,7 +3627,7 @@ def test_insert_rows_w_schema(self): creds = _make_credentials() http = object() client = self._make_one(project=self.PROJECT, credentials=creds, _http=http) - conn = client._connection = _make_connection({}) + conn = client._connection = make_connection({}) schema = [ SchemaField("full_name", "STRING", mode="REQUIRED"), SchemaField("age", "INTEGER", mode="REQUIRED"), @@ -3692,7 +3683,7 @@ def test_insert_rows_w_list_of_dictionaries(self): creds = _make_credentials() http = object() client = self._make_one(project=self.PROJECT, credentials=creds, _http=http) - conn = client._connection = _make_connection({}) + conn = client._connection = make_connection({}) schema = [ SchemaField("full_name", "STRING", mode="REQUIRED"), SchemaField("age", "INTEGER", mode="REQUIRED"), @@ -3753,7 +3744,7 @@ def test_insert_rows_w_list_of_Rows(self): creds = _make_credentials() http = object() client = self._make_one(project=self.PROJECT, credentials=creds, _http=http) - conn = client._connection = _make_connection({}) + conn = client._connection = make_connection({}) schema = [ SchemaField("full_name", "STRING", mode="REQUIRED"), SchemaField("age", "INTEGER", mode="REQUIRED"), @@ -3811,7 +3802,7 @@ def test_insert_rows_w_skip_invalid_and_ignore_unknown(self): creds = _make_credentials() http = object() client = self._make_one(project=self.PROJECT, credentials=creds, _http=http) - conn = client._connection = _make_connection(RESPONSE) + conn = client._connection = make_connection(RESPONSE) schema = [ SchemaField("full_name", "STRING", mode="REQUIRED"), SchemaField("age", "INTEGER", mode="REQUIRED"), @@ -3872,7 +3863,7 @@ def test_insert_rows_w_repeated_fields(self): creds = _make_credentials() http = object() client = self._make_one(project=self.PROJECT, credentials=creds, _http=http) - conn = client._connection = _make_connection({}) + conn = client._connection = make_connection({}) color = SchemaField("color", "STRING", mode="REPEATED") items = SchemaField("items", "INTEGER", mode="REPEATED") score = SchemaField("score", "INTEGER") @@ -3965,7 +3956,7 @@ def test_insert_rows_w_record_schema(self): creds = _make_credentials() http = object() client = self._make_one(project=self.PROJECT, credentials=creds, _http=http) - conn = client._connection = _make_connection({}) + conn = client._connection = make_connection({}) full_name = SchemaField("full_name", "STRING", mode="REQUIRED") area_code = SchemaField("area_code", "STRING", "REQUIRED") local_number = SchemaField("local_number", "STRING", "REQUIRED") @@ -4057,7 +4048,7 @@ def test_insert_rows_w_numeric(self): creds = _make_credentials() http = object() client = self._make_one(project=project, credentials=creds, _http=http) - conn = client._connection = _make_connection({}) + conn = client._connection = make_connection({}) table_ref = DatasetReference(project, ds_id).table(table_id) schema = [ table.SchemaField("account", "STRING"), @@ -4107,7 +4098,7 @@ def test_insert_rows_json(self): creds = _make_credentials() http = object() client = self._make_one(project=PROJECT, credentials=creds, _http=http) - conn = client._connection = _make_connection({}) + conn = client._connection = make_connection({}) table_ref = DatasetReference(PROJECT, DS_ID).table(TABLE_ID) schema = [ SchemaField("full_name", "STRING", mode="REQUIRED"), @@ -4145,7 +4136,7 @@ def test_insert_rows_json_with_string_id(self): client = self._make_one( project="default-project", credentials=creds, _http=http ) - conn = client._connection = _make_connection({}) + conn = client._connection = make_connection({}) with mock.patch("uuid.uuid4", side_effect=map(str, range(len(rows)))): errors = client.insert_rows_json("proj.dset.tbl", rows) @@ -4179,7 +4170,7 @@ def test_list_partitions(self): creds = _make_credentials() http = object() client = self._make_one(project=self.PROJECT, credentials=creds, _http=http) - client._connection = _make_connection(meta_info, data) + client._connection = make_connection(meta_info, data) table = Table(self.TABLE_REF) partition_list = client.list_partitions(table) @@ -4194,7 +4185,7 @@ def test_list_partitions_with_string_id(self): creds = _make_credentials() http = object() client = self._make_one(project=self.PROJECT, credentials=creds, _http=http) - client._connection = _make_connection(meta_info, {}) + client._connection = make_connection(meta_info, {}) partition_list = client.list_partitions( "{}.{}".format(self.DS_ID, self.TABLE_ID) @@ -4256,7 +4247,7 @@ def _bigquery_timestamp_float_repr(ts_float): creds = _make_credentials() http = object() client = self._make_one(project=self.PROJECT, credentials=creds, _http=http) - conn = client._connection = _make_connection(DATA, DATA) + conn = client._connection = make_connection(DATA, DATA) full_name = SchemaField("full_name", "STRING", mode="REQUIRED") age = SchemaField("age", "INTEGER", mode="NULLABLE") joined = SchemaField("joined", "TIMESTAMP", mode="NULLABLE") @@ -4286,7 +4277,7 @@ def test_list_rows_empty_table(self): creds = _make_credentials() http = object() client = self._make_one(project=self.PROJECT, credentials=creds, _http=http) - client._connection = _make_connection(response, response) + client._connection = make_connection(response, response) # Table that has no schema because it's an empty table. rows = client.list_rows( @@ -4320,7 +4311,7 @@ def test_list_rows_query_params(self): ({"max_results": 2}, {"maxResults": 2}), ({"start_index": 1, "max_results": 2}, {"startIndex": 1, "maxResults": 2}), ] - conn = client._connection = _make_connection(*len(tests) * [{}]) + conn = client._connection = make_connection(*len(tests) * [{}]) for i, test in enumerate(tests): iterator = client.list_rows(table, **test[0]) six.next(iterator.pages) @@ -4363,7 +4354,7 @@ def test_list_rows_repeated_fields(self): creds = _make_credentials() http = object() client = self._make_one(project=self.PROJECT, credentials=creds, _http=http) - conn = client._connection = _make_connection(DATA) + conn = client._connection = make_connection(DATA) color = SchemaField("color", "STRING", mode="REPEATED") index = SchemaField("index", "INTEGER", "REPEATED") score = SchemaField("score", "FLOAT", "REPEATED") @@ -4419,7 +4410,7 @@ def test_list_rows_w_record_schema(self): creds = _make_credentials() http = object() client = self._make_one(project=self.PROJECT, credentials=creds, _http=http) - conn = client._connection = _make_connection(DATA) + conn = client._connection = make_connection(DATA) full_name = SchemaField("full_name", "STRING", mode="REQUIRED") area_code = SchemaField("area_code", "STRING", "REQUIRED") local_number = SchemaField("local_number", "STRING", "REQUIRED") @@ -4500,7 +4491,7 @@ def test_list_rows_with_missing_schema(self): for table in schemaless_tables: client = self._make_one(project=self.PROJECT, credentials=creds, _http=http) - conn = client._connection = _make_connection(table_data, rows_data) + conn = client._connection = make_connection(table_data, rows_data) row_iter = client.list_rows(table) diff --git a/packages/google-cloud-bigquery/tests/unit/test_magics.py b/packages/google-cloud-bigquery/tests/unit/test_magics.py index 0db0bc5ebf52..70848cbcae64 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_magics.py +++ b/packages/google-cloud-bigquery/tests/unit/test_magics.py @@ -14,6 +14,7 @@ import re import mock +import six from concurrent import futures import pytest @@ -38,6 +39,7 @@ bigquery_storage_v1beta1 = None from google.cloud.bigquery import table from google.cloud.bigquery import magics +from tests.unit.helpers import make_connection pytestmark = pytest.mark.skipif(IPython is None, reason="Requires `ipython`") @@ -101,6 +103,113 @@ def test_context_credentials_and_project_can_be_set_explicitly(): assert default_mock.call_count == 0 +@pytest.mark.usefixtures("ipython_interactive") +def test_context_connection_can_be_overriden(): + ip = IPython.get_ipython() + ip.extension_manager.load_extension("google.cloud.bigquery") + magics.context._project = None + magics.context._credentials = None + + credentials_mock = mock.create_autospec( + google.auth.credentials.Credentials, instance=True + ) + project = "project-123" + default_patch = mock.patch( + "google.auth.default", return_value=(credentials_mock, project) + ) + + query = "select * from persons" + job_reference = {"projectId": project, "jobId": "some-random-id"} + table = {"projectId": project, "datasetId": "ds", "tableId": "persons"} + resource = { + "jobReference": job_reference, + "configuration": { + "query": { + "destinationTable": table, + "query": query, + "queryParameters": [], + "useLegacySql": False, + } + }, + "status": {"state": "DONE"}, + } + data = {"jobReference": job_reference, "totalRows": 0, "rows": []} + + conn = magics.context._connection = make_connection(resource, data) + list_rows_patch = mock.patch( + "google.cloud.bigquery.client.Client.list_rows", + return_value=google.cloud.bigquery.table._EmptyRowIterator(), + ) + with list_rows_patch as list_rows, default_patch: + ip.run_cell_magic("bigquery", "", query) + + # Check that query actually starts the job. + list_rows.assert_called() + assert len(conn.api_request.call_args_list) == 2 + _, req = conn.api_request.call_args_list[0] + assert req["method"] == "POST" + assert req["path"] == "/projects/{}/jobs".format(project) + sent = req["data"] + assert isinstance(sent["jobReference"]["jobId"], six.string_types) + sent_config = sent["configuration"]["query"] + assert sent_config["query"] == query + + +@pytest.mark.usefixtures("ipython_interactive") +def test_context_no_connection(): + ip = IPython.get_ipython() + ip.extension_manager.load_extension("google.cloud.bigquery") + magics.context._project = None + magics.context._credentials = None + magics.context._connection = None + + credentials_mock = mock.create_autospec( + google.auth.credentials.Credentials, instance=True + ) + project = "project-123" + default_patch = mock.patch( + "google.auth.default", return_value=(credentials_mock, project) + ) + + query = "select * from persons" + job_reference = {"projectId": project, "jobId": "some-random-id"} + table = {"projectId": project, "datasetId": "ds", "tableId": "persons"} + resource = { + "jobReference": job_reference, + "configuration": { + "query": { + "destinationTable": table, + "query": query, + "queryParameters": [], + "useLegacySql": False, + } + }, + "status": {"state": "DONE"}, + } + data = {"jobReference": job_reference, "totalRows": 0, "rows": []} + + conn_mock = make_connection(resource, data, data, data) + conn_patch = mock.patch("google.cloud.bigquery.client.Connection", autospec=True) + list_rows_patch = mock.patch( + "google.cloud.bigquery.client.Client.list_rows", + return_value=google.cloud.bigquery.table._EmptyRowIterator(), + ) + with conn_patch as conn, list_rows_patch as list_rows, default_patch: + conn.return_value = conn_mock + ip.run_cell_magic("bigquery", "", query) + + # Check that query actually starts the job. + list_rows.assert_called() + assert len(conn_mock.api_request.call_args_list) == 2 + _, req = conn_mock.api_request.call_args_list[0] + assert req["method"] == "POST" + assert req["path"] == "/projects/{}/jobs".format(project) + sent = req["data"] + assert isinstance(sent["jobReference"]["jobId"], six.string_types) + sent_config = sent["configuration"]["query"] + assert sent_config["query"] == query + + def test__run_query(): magics.context._credentials = None From 30dc8935eb841de579f6b8b601094eb2bd3b7ff8 Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Fri, 31 May 2019 10:18:35 -0700 Subject: [PATCH 0596/2016] Fix breaking change. Don't require pyarrow if schema is set, but warn. (#8202) --- .../google/cloud/bigquery/client.py | 15 +++++- .../tests/unit/test_client.py | 48 +++++++++++++++++++ 2 files changed, 62 insertions(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py index 78d718aa6a2a..9fd4c5368efa 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py @@ -28,7 +28,12 @@ import os import tempfile import uuid +import warnings +try: + import pyarrow +except ImportError: # pragma: NO COVER + pyarrow = None import six from google import resumable_media @@ -1304,9 +1309,17 @@ def load_table_from_dataframe( os.close(tmpfd) try: - if job_config.schema: + if pyarrow and job_config.schema: _pandas_helpers.to_parquet(dataframe, job_config.schema, tmppath) else: + if job_config.schema: + warnings.warn( + "job_config.schema is set, but not used to assist in " + "identifying correct types for data serialization. " + "Please install the pyarrow package.", + PendingDeprecationWarning, + stacklevel=2, + ) dataframe.to_parquet(tmppath) with open(tmppath, "rb") as parquet_file: diff --git a/packages/google-cloud-bigquery/tests/unit/test_client.py b/packages/google-cloud-bigquery/tests/unit/test_client.py index 3fdfeeb9a68b..ea4aad534a13 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_client.py +++ b/packages/google-cloud-bigquery/tests/unit/test_client.py @@ -20,6 +20,7 @@ import io import json import unittest +import warnings import mock import requests @@ -4991,6 +4992,53 @@ def test_load_table_from_dataframe_w_custom_job_config(self): assert sent_config is job_config assert sent_config.source_format == job.SourceFormat.PARQUET + @unittest.skipIf(pandas is None, "Requires `pandas`") + @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") + def test_load_table_from_dataframe_w_schema_wo_pyarrow(self): + from google.cloud.bigquery.client import _DEFAULT_NUM_RETRIES + from google.cloud.bigquery import job + from google.cloud.bigquery.schema import SchemaField + + client = self._make_client() + records = [{"name": "Monty", "age": 100}, {"name": "Python", "age": 60}] + dataframe = pandas.DataFrame(records) + schema = (SchemaField("name", "STRING"), SchemaField("age", "INTEGER")) + job_config = job.LoadJobConfig(schema=schema) + + load_patch = mock.patch( + "google.cloud.bigquery.client.Client.load_table_from_file", autospec=True + ) + pyarrow_patch = mock.patch("google.cloud.bigquery.client.pyarrow", None) + + with load_patch as load_table_from_file, pyarrow_patch, warnings.catch_warnings( + record=True + ) as warned: + client.load_table_from_dataframe( + dataframe, self.TABLE_REF, job_config=job_config, location=self.LOCATION + ) + + assert len(warned) == 1 + warning = warned[0] + assert warning.category is PendingDeprecationWarning + assert "pyarrow" in str(warning) + + load_table_from_file.assert_called_once_with( + client, + mock.ANY, + self.TABLE_REF, + num_retries=_DEFAULT_NUM_RETRIES, + rewind=True, + job_id=mock.ANY, + job_id_prefix=None, + location=self.LOCATION, + project=None, + job_config=mock.ANY, + ) + + sent_config = load_table_from_file.mock_calls[0][2]["job_config"] + assert sent_config.source_format == job.SourceFormat.PARQUET + assert tuple(sent_config.schema) == schema + @unittest.skipIf(pandas is None, "Requires `pandas`") @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_load_table_from_dataframe_w_nulls(self): From db3c0cb26771f89f66f4f6b06026a8c14e6ed8b1 Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Fri, 31 May 2019 11:31:46 -0700 Subject: [PATCH 0597/2016] Release 1.13.0 (#8203) --- packages/google-cloud-bigquery/CHANGELOG.md | 13 +++++++++++++ packages/google-cloud-bigquery/setup.py | 2 +- 2 files changed, 14 insertions(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/CHANGELOG.md b/packages/google-cloud-bigquery/CHANGELOG.md index db65224a957c..b5d29e6f594f 100644 --- a/packages/google-cloud-bigquery/CHANGELOG.md +++ b/packages/google-cloud-bigquery/CHANGELOG.md @@ -4,6 +4,19 @@ [1]: https://pypi.org/project/google-cloud-bigquery/#history +## 1.13.0 + +05-31-2019 10:22 PDT + +### New Features + +- Use `job_config.schema` for data type conversion if specified in `load_table_from_dataframe`. ([#8105](https://github.com/googleapis/google-cloud-python/pull/8105)) + +### Internal / Testing Changes + +- Adds private `_connection` object to magics context. ([#8192](https://github.com/googleapis/google-cloud-python/pull/8192)) +- Fix coverage in 'types.py' (via synth). ([#8146](https://github.com/googleapis/google-cloud-python/pull/8146)) + ## 1.12.1 05-21-2019 11:16 PDT diff --git a/packages/google-cloud-bigquery/setup.py b/packages/google-cloud-bigquery/setup.py index 2b0211750acf..b089e9e0536b 100644 --- a/packages/google-cloud-bigquery/setup.py +++ b/packages/google-cloud-bigquery/setup.py @@ -22,7 +22,7 @@ name = "google-cloud-bigquery" description = "Google BigQuery API client library" -version = "1.12.1" +version = "1.13.0" # Should be one of: # 'Development Status :: 3 - Alpha' # 'Development Status :: 4 - Beta' From 89684cf9109a97efca80f3115bf25c5ba2592561 Mon Sep 17 00:00:00 2001 From: HemangChothani <50404902+HemangChothani@users.noreply.github.com> Date: Tue, 4 Jun 2019 01:04:26 +0530 Subject: [PATCH 0598/2016] Add `maximum_bytes_billed` argument and `context.default_query_job_config` property to magics. (#8179) BigQuery queries can get expensive, but the `maximum_bytes_billed` query option adds a cap to the price billed and rejects queries that could be too expensive. Provide a default value for `maximum_bytes_billed` in the `%%bigquery` magics by specifying a value for `google.cloud.bigquery.magics.context.default_query_job_config.maximum_bytes_billed`. --- .../google/cloud/bigquery/magics.py | 43 +++- .../tests/unit/test_magics.py | 192 +++++++++++++++--- 2 files changed, 203 insertions(+), 32 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/magics.py b/packages/google-cloud-bigquery/google/cloud/bigquery/magics.py index 6bd1c45dfcd5..0acde4f21b5f 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/magics.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/magics.py @@ -161,6 +161,7 @@ def __init__(self): self._project = None self._connection = None self._use_bqstorage_api = None + self._default_query_job_config = bigquery.QueryJobConfig() @property def credentials(self): @@ -237,6 +238,28 @@ def use_bqstorage_api(self): def use_bqstorage_api(self, value): self._use_bqstorage_api = value + @property + def default_query_job_config(self): + """google.cloud.bigquery.job.QueryJobConfig: Default job + configuration for queries. + + The context's :class:`~google.cloud.bigquery.job.QueryJobConfig` is + used for queries. Some properties can be overridden with arguments to + the magics. + + Example: + Manually setting the default value for ``maximum_bytes_billed`` + to 100 MB: + + >>> from google.cloud.bigquery import magics + >>> magics.context.default_query_job_config.maximum_bytes_billed = 100000000 + """ + return self._default_query_job_config + + @default_query_job_config.setter + def default_query_job_config(self, value): + self._default_query_job_config = value + context = Context() @@ -291,6 +314,14 @@ def _run_query(client, query, job_config=None): default=None, help=("Project to use for executing this query. Defaults to the context project."), ) +@magic_arguments.argument( + "--maximum_bytes_billed", + default=None, + help=( + "maximum_bytes_billed to use for executing this query. Defaults to " + "the context default_query_job_config.maximum_bytes_billed." + ), +) @magic_arguments.argument( "--use_legacy_sql", action="store_true", @@ -363,7 +394,11 @@ def _cell_magic(line, query): ) project = args.project or context.project - client = bigquery.Client(project=project, credentials=context.credentials) + client = bigquery.Client( + project=project, + credentials=context.credentials, + default_query_job_config=context.default_query_job_config, + ) if context._connection: client._connection = context._connection bqstorage_client = _make_bqstorage_client( @@ -372,6 +407,12 @@ def _cell_magic(line, query): job_config = bigquery.job.QueryJobConfig() job_config.query_parameters = params job_config.use_legacy_sql = args.use_legacy_sql + + if args.maximum_bytes_billed == "None": + job_config.maximum_bytes_billed = 0 + elif args.maximum_bytes_billed is not None: + value = int(args.maximum_bytes_billed) + job_config.maximum_bytes_billed = value query_job = _run_query(client, query, job_config) if not args.verbose: diff --git a/packages/google-cloud-bigquery/tests/unit/test_magics.py b/packages/google-cloud-bigquery/tests/unit/test_magics.py index 70848cbcae64..f3e64a46faca 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_magics.py +++ b/packages/google-cloud-bigquery/tests/unit/test_magics.py @@ -12,12 +12,13 @@ # See the License for the specific language governing permissions and # limitations under the License. +import copy import re -import mock -import six from concurrent import futures +import mock import pytest +import six try: import pandas @@ -37,6 +38,7 @@ from google.cloud import bigquery_storage_v1beta1 except ImportError: # pragma: NO COVER bigquery_storage_v1beta1 = None +from google.cloud.bigquery import job from google.cloud.bigquery import table from google.cloud.bigquery import magics from tests.unit.helpers import make_connection @@ -63,6 +65,26 @@ def ipython_interactive(request, ipython): yield ipython +JOB_REFERENCE_RESOURCE = {"projectId": "its-a-project-eh", "jobId": "some-random-id"} +TABLE_REFERENCE_RESOURCE = { + "projectId": "its-a-project-eh", + "datasetId": "ds", + "tableId": "persons", +} +QUERY_RESOURCE = { + "jobReference": JOB_REFERENCE_RESOURCE, + "configuration": { + "query": { + "destinationTable": TABLE_REFERENCE_RESOURCE, + "query": "SELECT 42 FROM `life.the_universe.and_everything`;", + "queryParameters": [], + "useLegacySql": False, + } + }, + "status": {"state": "DONE"}, +} + + def test_context_credentials_auto_set_w_application_default_credentials(): """When Application Default Credentials are set, the context credentials will be created the first time it is called @@ -117,22 +139,13 @@ def test_context_connection_can_be_overriden(): default_patch = mock.patch( "google.auth.default", return_value=(credentials_mock, project) ) + job_reference = copy.deepcopy(JOB_REFERENCE_RESOURCE) + job_reference["projectId"] = project query = "select * from persons" - job_reference = {"projectId": project, "jobId": "some-random-id"} - table = {"projectId": project, "datasetId": "ds", "tableId": "persons"} - resource = { - "jobReference": job_reference, - "configuration": { - "query": { - "destinationTable": table, - "query": query, - "queryParameters": [], - "useLegacySql": False, - } - }, - "status": {"state": "DONE"}, - } + resource = copy.deepcopy(QUERY_RESOURCE) + resource["jobReference"] = job_reference + resource["configuration"]["query"]["query"] = query data = {"jobReference": job_reference, "totalRows": 0, "rows": []} conn = magics.context._connection = make_connection(resource, data) @@ -170,22 +183,13 @@ def test_context_no_connection(): default_patch = mock.patch( "google.auth.default", return_value=(credentials_mock, project) ) + job_reference = copy.deepcopy(JOB_REFERENCE_RESOURCE) + job_reference["projectId"] = project query = "select * from persons" - job_reference = {"projectId": project, "jobId": "some-random-id"} - table = {"projectId": project, "datasetId": "ds", "tableId": "persons"} - resource = { - "jobReference": job_reference, - "configuration": { - "query": { - "destinationTable": table, - "query": query, - "queryParameters": [], - "useLegacySql": False, - } - }, - "status": {"state": "DONE"}, - } + resource = copy.deepcopy(QUERY_RESOURCE) + resource["jobReference"] = job_reference + resource["configuration"]["query"]["query"] = query data = {"jobReference": job_reference, "totalRows": 0, "rows": []} conn_mock = make_connection(resource, data, data, data) @@ -239,7 +243,8 @@ def test__run_query(): assert updates[0] == expected_first_line execution_updates = updates[1:-1] assert len(execution_updates) == 3 # one update per API response - assert all(re.match("Query executing: .*s", line) for line in execution_updates) + for line in execution_updates: + assert re.match("Query executing: .*s", line) assert re.match("Query complete after .*s", updates[-1]) @@ -548,6 +553,131 @@ def test_bigquery_magic_without_bqstorage(monkeypatch): assert isinstance(return_value, pandas.DataFrame) +@pytest.mark.usefixtures("ipython_interactive") +def test_bigquery_magic_w_maximum_bytes_billed_invalid(): + ip = IPython.get_ipython() + ip.extension_manager.load_extension("google.cloud.bigquery") + magics.context._project = None + + sql = "SELECT 17 AS num" + + with pytest.raises(ValueError): + ip.run_cell_magic("bigquery", "--maximum_bytes_billed=abc", sql) + + +@pytest.mark.parametrize( + "param_value,expected", [("987654321", "987654321"), ("None", "0")] +) +@pytest.mark.usefixtures("ipython_interactive") +def test_bigquery_magic_w_maximum_bytes_billed_overrides_context(param_value, expected): + ip = IPython.get_ipython() + ip.extension_manager.load_extension("google.cloud.bigquery") + magics.context._project = None + + # Set the default maximum bytes billed, so we know it's overridable by the param. + magics.context.default_query_job_config.maximum_bytes_billed = 1234567 + + project = "test-project" + job_reference = copy.deepcopy(JOB_REFERENCE_RESOURCE) + job_reference["projectId"] = project + query = "SELECT 17 AS num" + resource = copy.deepcopy(QUERY_RESOURCE) + resource["jobReference"] = job_reference + resource["configuration"]["query"]["query"] = query + data = {"jobReference": job_reference, "totalRows": 0, "rows": []} + credentials_mock = mock.create_autospec( + google.auth.credentials.Credentials, instance=True + ) + default_patch = mock.patch( + "google.auth.default", return_value=(credentials_mock, "general-project") + ) + conn = magics.context._connection = make_connection(resource, data) + list_rows_patch = mock.patch( + "google.cloud.bigquery.client.Client.list_rows", + return_value=google.cloud.bigquery.table._EmptyRowIterator(), + ) + with list_rows_patch, default_patch: + ip.run_cell_magic( + "bigquery", "--maximum_bytes_billed={}".format(param_value), query + ) + + _, req = conn.api_request.call_args_list[0] + sent_config = req["data"]["configuration"]["query"] + assert sent_config["maximumBytesBilled"] == expected + + +@pytest.mark.usefixtures("ipython_interactive") +def test_bigquery_magic_w_maximum_bytes_billed_w_context_inplace(): + ip = IPython.get_ipython() + ip.extension_manager.load_extension("google.cloud.bigquery") + magics.context._project = None + + magics.context.default_query_job_config.maximum_bytes_billed = 1337 + + project = "test-project" + job_reference = copy.deepcopy(JOB_REFERENCE_RESOURCE) + job_reference["projectId"] = project + query = "SELECT 17 AS num" + resource = copy.deepcopy(QUERY_RESOURCE) + resource["jobReference"] = job_reference + resource["configuration"]["query"]["query"] = query + data = {"jobReference": job_reference, "totalRows": 0, "rows": []} + credentials_mock = mock.create_autospec( + google.auth.credentials.Credentials, instance=True + ) + default_patch = mock.patch( + "google.auth.default", return_value=(credentials_mock, "general-project") + ) + conn = magics.context._connection = make_connection(resource, data) + list_rows_patch = mock.patch( + "google.cloud.bigquery.client.Client.list_rows", + return_value=google.cloud.bigquery.table._EmptyRowIterator(), + ) + with list_rows_patch, default_patch: + ip.run_cell_magic("bigquery", "", query) + + _, req = conn.api_request.call_args_list[0] + sent_config = req["data"]["configuration"]["query"] + assert sent_config["maximumBytesBilled"] == "1337" + + +@pytest.mark.usefixtures("ipython_interactive") +def test_bigquery_magic_w_maximum_bytes_billed_w_context_setter(): + ip = IPython.get_ipython() + ip.extension_manager.load_extension("google.cloud.bigquery") + magics.context._project = None + + magics.context.default_query_job_config = job.QueryJobConfig( + maximum_bytes_billed=10203 + ) + + project = "test-project" + job_reference = copy.deepcopy(JOB_REFERENCE_RESOURCE) + job_reference["projectId"] = project + query = "SELECT 17 AS num" + resource = copy.deepcopy(QUERY_RESOURCE) + resource["jobReference"] = job_reference + resource["configuration"]["query"]["query"] = query + data = {"jobReference": job_reference, "totalRows": 0, "rows": []} + credentials_mock = mock.create_autospec( + google.auth.credentials.Credentials, instance=True + ) + default_patch = mock.patch( + "google.auth.default", return_value=(credentials_mock, "general-project") + ) + conn = magics.context._connection = make_connection(resource, data) + list_rows_patch = mock.patch( + "google.cloud.bigquery.client.Client.list_rows", + return_value=google.cloud.bigquery.table._EmptyRowIterator(), + ) + with list_rows_patch, default_patch: + ip.run_cell_magic("bigquery", "", query) + + _, req = conn.api_request.call_args_list[0] + sent_config = req["data"]["configuration"]["query"] + assert sent_config["maximumBytesBilled"] == "10203" + + @pytest.mark.usefixtures("ipython_interactive") def test_bigquery_magic_with_project(): ip = IPython.get_ipython() From 5fc5c41706f49635af18674cf48080f9694deaf9 Mon Sep 17 00:00:00 2001 From: Tres Seaver Date: Tue, 4 Jun 2019 14:07:15 -0400 Subject: [PATCH 0599/2016] Don't pin 'google-api-core' in libs using 'google-cloud-core'. (#8213) Closes #8085. --- packages/google-cloud-bigquery/setup.py | 1 - 1 file changed, 1 deletion(-) diff --git a/packages/google-cloud-bigquery/setup.py b/packages/google-cloud-bigquery/setup.py index b089e9e0536b..bf0ff3d97850 100644 --- a/packages/google-cloud-bigquery/setup.py +++ b/packages/google-cloud-bigquery/setup.py @@ -30,7 +30,6 @@ release_status = "Development Status :: 5 - Production/Stable" dependencies = [ 'enum34; python_version < "3.4"', - "google-api-core >= 1.6.0, < 2.0.0dev", "google-cloud-core >= 1.0.0, < 2.0dev", "google-resumable-media >= 0.3.1", "protobuf >= 3.6.0", From 7d0fd4fac234ac80f6e54b5a1c200d1b6e1ff302 Mon Sep 17 00:00:00 2001 From: Tres Seaver Date: Wed, 5 Jun 2019 12:45:58 -0400 Subject: [PATCH 0600/2016] Release 'google-cloud-bigquery' 1.14.0. (#8215) --- packages/google-cloud-bigquery/CHANGELOG.md | 11 +++++++++++ packages/google-cloud-bigquery/setup.py | 2 +- 2 files changed, 12 insertions(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/CHANGELOG.md b/packages/google-cloud-bigquery/CHANGELOG.md index b5d29e6f594f..d845e29fc742 100644 --- a/packages/google-cloud-bigquery/CHANGELOG.md +++ b/packages/google-cloud-bigquery/CHANGELOG.md @@ -4,6 +4,17 @@ [1]: https://pypi.org/project/google-cloud-bigquery/#history +## 1.14.0 + +06-04-2019 11:11 PDT + + +### New Features +- Add `maximum_bytes_billed` argument and `context.default_query_job_config` property to magics. ([#8179](https://github.com/googleapis/google-cloud-python/pull/8179)) + +### Dependencies +- Don't pin `google-api-core` in libs using `google-cloud-core`. ([#8213](https://github.com/googleapis/google-cloud-python/pull/8213)) + ## 1.13.0 05-31-2019 10:22 PDT diff --git a/packages/google-cloud-bigquery/setup.py b/packages/google-cloud-bigquery/setup.py index bf0ff3d97850..b06829bcb942 100644 --- a/packages/google-cloud-bigquery/setup.py +++ b/packages/google-cloud-bigquery/setup.py @@ -22,7 +22,7 @@ name = "google-cloud-bigquery" description = "Google BigQuery API client library" -version = "1.13.0" +version = "1.14.0" # Should be one of: # 'Development Status :: 3 - Alpha' # 'Development Status :: 4 - Beta' From 4b01e04782d92f8f9c62429216e3dd628181a6d8 Mon Sep 17 00:00:00 2001 From: AzemaBaptiste Date: Wed, 5 Jun 2019 19:07:20 +0200 Subject: [PATCH 0601/2016] Add `page_size` parameter to `QueryJob.result`. (#8206) --- .../google/cloud/bigquery/job.py | 44 ++++++++------- .../google-cloud-bigquery/tests/system.py | 9 +++ .../tests/unit/test_job.py | 56 +++++++++++++++++++ 3 files changed, 89 insertions(+), 20 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/job.py b/packages/google-cloud-bigquery/google/cloud/bigquery/job.py index 18f22270feac..381ad84f0312 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/job.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/job.py @@ -2832,29 +2832,33 @@ def _blocking_poll(self, timeout=None): self._done_timeout = timeout super(QueryJob, self)._blocking_poll(timeout=timeout) - def result(self, timeout=None, retry=DEFAULT_RETRY): + def result(self, timeout=None, page_size=None, retry=DEFAULT_RETRY): """Start the job and wait for it to complete and get the result. - :type timeout: float - :param timeout: - How long (in seconds) to wait for job to complete before raising - a :class:`concurrent.futures.TimeoutError`. - - :type retry: :class:`google.api_core.retry.Retry` - :param retry: (Optional) How to retry the call that retrieves rows. + Args: + timeout (float): + How long (in seconds) to wait for job to complete before + raising a :class:`concurrent.futures.TimeoutError`. + page_size (int): + (Optional) The maximum number of rows in each page of results + from this request. Non-positive values are ignored. + retry (google.api_core.retry.Retry): + (Optional) How to retry the call that retrieves rows. - :rtype: :class:`~google.cloud.bigquery.table.RowIterator` - :returns: - Iterator of row data :class:`~google.cloud.bigquery.table.Row`-s. - During each page, the iterator will have the ``total_rows`` - attribute set, which counts the total number of rows **in the - result set** (this is distinct from the total number of rows in - the current page: ``iterator.page.num_items``). + Returns: + google.cloud.bigquery.table.RowIterator: + Iterator of row data + :class:`~google.cloud.bigquery.table.Row`-s. During each + page, the iterator will have the ``total_rows`` attribute + set, which counts the total number of rows **in the result + set** (this is distinct from the total number of rows in the + current page: ``iterator.page.num_items``). - :raises: - :class:`~google.cloud.exceptions.GoogleCloudError` if the job - failed or :class:`concurrent.futures.TimeoutError` if the job did - not complete in the given timeout. + Raises: + google.cloud.exceptions.GoogleCloudError: + If the job failed. + concurrent.futures.TimeoutError: + If the job did not complete in the given timeout. """ super(QueryJob, self).result(timeout=timeout) # Return an iterator instead of returning the job. @@ -2874,7 +2878,7 @@ def result(self, timeout=None, retry=DEFAULT_RETRY): dest_table_ref = self.destination dest_table = Table(dest_table_ref, schema=schema) dest_table._properties["numRows"] = self._query_results.total_rows - rows = self._client.list_rows(dest_table, retry=retry) + rows = self._client.list_rows(dest_table, page_size=page_size, retry=retry) rows._preserve_order = _contains_order_by(self.query) return rows diff --git a/packages/google-cloud-bigquery/tests/system.py b/packages/google-cloud-bigquery/tests/system.py index 2b4aa84b8faf..d04bf7c1854b 100644 --- a/packages/google-cloud-bigquery/tests/system.py +++ b/packages/google-cloud-bigquery/tests/system.py @@ -1202,6 +1202,15 @@ def test_query_w_timeout(self): # 1 second is much too short for this query. query_job.result(timeout=1) + def test_query_w_page_size(self): + page_size = 45 + query_job = Config.CLIENT.query( + "SELECT word FROM `bigquery-public-data.samples.shakespeare`;", + job_id_prefix="test_query_w_page_size_", + ) + iterator = query_job.result(page_size=page_size) + self.assertEqual(next(iterator.pages).num_items, page_size) + def test_query_statistics(self): """ A system test to exercise some of the extended query statistics. diff --git a/packages/google-cloud-bigquery/tests/unit/test_job.py b/packages/google-cloud-bigquery/tests/unit/test_job.py index bb6f03f3efb3..abb2a2c4ec1e 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_job.py +++ b/packages/google-cloud-bigquery/tests/unit/test_job.py @@ -4168,6 +4168,62 @@ def test_result_w_timeout(self): self.assertEqual(query_request[1]["query_params"]["timeoutMs"], 900) self.assertEqual(reload_request[1]["method"], "GET") + def test_result_w_page_size(self): + # Arrange + query_results_resource = { + "jobComplete": True, + "jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID}, + "schema": {"fields": [{"name": "col1", "type": "STRING"}]}, + "totalRows": "4", + } + job_resource = self._make_resource(started=True, ended=True) + q_config = job_resource["configuration"]["query"] + q_config["destinationTable"] = { + "projectId": self.PROJECT, + "datasetId": self.DS_ID, + "tableId": self.TABLE_ID, + } + tabledata_resource = { + "totalRows": 4, + "pageToken": "some-page-token", + "rows": [ + {"f": [{"v": "row1"}]}, + {"f": [{"v": "row2"}]}, + {"f": [{"v": "row3"}]}, + ], + } + tabledata_resource_page_2 = {"totalRows": 4, "rows": [{"f": [{"v": "row4"}]}]} + conn = _make_connection( + query_results_resource, tabledata_resource, tabledata_resource_page_2 + ) + client = _make_client(self.PROJECT, connection=conn) + job = self._get_target_class().from_api_repr(job_resource, client) + + # Act + result = job.result(page_size=3) + + # Assert + actual_rows = list(result) + self.assertEqual(len(actual_rows), 4) + + tabledata_path = "/projects/%s/datasets/%s/tables/%s/data" % ( + self.PROJECT, + self.DS_ID, + self.TABLE_ID, + ) + conn.api_request.assert_has_calls( + [ + mock.call( + method="GET", path=tabledata_path, query_params={"maxResults": 3} + ), + mock.call( + method="GET", + path=tabledata_path, + query_params={"pageToken": "some-page-token", "maxResults": 3}, + ), + ] + ) + def test_result_error(self): from google.cloud import exceptions From 30c7c3e71d50e6541a069fbac67b096600ec75b8 Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Fri, 7 Jun 2019 12:08:35 -0700 Subject: [PATCH 0602/2016] Fix bug where `load_table_from_dataframe` could not append to REQUIRED fields. (#8230) If a BigQuery schema is supplied as part of the `job_config`, it can be used to set the `nullable` bit correctly on the serialized parquet file. --- .../google/cloud/bigquery/_pandas_helpers.py | 50 +++++++--- .../google-cloud-bigquery/tests/system.py | 39 ++++++++ .../tests/unit/test__pandas_helpers.py | 92 ++++++++++++++++++- 3 files changed, 169 insertions(+), 12 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/_pandas_helpers.py b/packages/google-cloud-bigquery/google/cloud/bigquery/_pandas_helpers.py index eeb65e0b9766..6a1b2dab910f 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/_pandas_helpers.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/_pandas_helpers.py @@ -14,6 +14,8 @@ """Shared helper functions for connecting BigQuery and pandas.""" +import warnings + try: import pyarrow import pyarrow.parquet @@ -107,6 +109,8 @@ def bq_to_arrow_field(bq_field): if arrow_type: is_nullable = bq_field.mode.upper() == "NULLABLE" return pyarrow.field(bq_field.name, arrow_type, nullable=is_nullable) + + warnings.warn("Unable to determine type for field '{}'.".format(bq_field.name)) return None @@ -119,11 +123,8 @@ def bq_to_arrow_array(series, bq_field): return pyarrow.array(series, type=arrow_type) -def to_parquet(dataframe, bq_schema, filepath): - """Write dataframe as a Parquet file, according to the desired BQ schema. - - This function requires the :mod:`pyarrow` package. Arrow is used as an - intermediate format. +def to_arrow(dataframe, bq_schema): + """Convert pandas dataframe to Arrow table, using BigQuery schema. Args: dataframe (pandas.DataFrame): @@ -131,12 +132,12 @@ def to_parquet(dataframe, bq_schema, filepath): bq_schema (Sequence[google.cloud.bigquery.schema.SchemaField]): Desired BigQuery schema. Number of columns must match number of columns in the DataFrame. - filepath (str): - Path to write Parquet file to. - """ - if pyarrow is None: - raise ValueError("pyarrow is required for BigQuery schema conversion.") + Returns: + pyarrow.Table: + Table containing dataframe data, with schema derived from + BigQuery schema. + """ if len(bq_schema) != len(dataframe.columns): raise ValueError( "Number of columns in schema must match number of columns in dataframe." @@ -144,9 +145,36 @@ def to_parquet(dataframe, bq_schema, filepath): arrow_arrays = [] arrow_names = [] + arrow_fields = [] for bq_field in bq_schema: + arrow_fields.append(bq_to_arrow_field(bq_field)) arrow_names.append(bq_field.name) arrow_arrays.append(bq_to_arrow_array(dataframe[bq_field.name], bq_field)) - arrow_table = pyarrow.Table.from_arrays(arrow_arrays, names=arrow_names) + if all((field is not None for field in arrow_fields)): + return pyarrow.Table.from_arrays( + arrow_arrays, schema=pyarrow.schema(arrow_fields) + ) + return pyarrow.Table.from_arrays(arrow_arrays, names=arrow_names) + + +def to_parquet(dataframe, bq_schema, filepath): + """Write dataframe as a Parquet file, according to the desired BQ schema. + + This function requires the :mod:`pyarrow` package. Arrow is used as an + intermediate format. + + Args: + dataframe (pandas.DataFrame): + DataFrame to convert to convert to Parquet file. + bq_schema (Sequence[google.cloud.bigquery.schema.SchemaField]): + Desired BigQuery schema. Number of columns must match number of + columns in the DataFrame. + filepath (str): + Path to write Parquet file to. + """ + if pyarrow is None: + raise ValueError("pyarrow is required for BigQuery schema conversion.") + + arrow_table = to_arrow(dataframe, bq_schema) pyarrow.parquet.write_table(arrow_table, filepath) diff --git a/packages/google-cloud-bigquery/tests/system.py b/packages/google-cloud-bigquery/tests/system.py index d04bf7c1854b..eba4c3b6adef 100644 --- a/packages/google-cloud-bigquery/tests/system.py +++ b/packages/google-cloud-bigquery/tests/system.py @@ -699,6 +699,45 @@ def test_load_table_from_dataframe_w_nulls(self): self.assertEqual(tuple(table.schema), table_schema) self.assertEqual(table.num_rows, num_rows) + @unittest.skipIf(pandas is None, "Requires `pandas`") + @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") + def test_load_table_from_dataframe_w_required(self): + """Test that a DataFrame with required columns can be uploaded if a + BigQuery schema is specified. + + See: https://github.com/googleapis/google-cloud-python/issues/8093 + """ + table_schema = ( + bigquery.SchemaField("name", "STRING", mode="REQUIRED"), + bigquery.SchemaField("age", "INTEGER", mode="REQUIRED"), + ) + + records = [{"name": "Chip", "age": 2}, {"name": "Dale", "age": 3}] + dataframe = pandas.DataFrame(records) + job_config = bigquery.LoadJobConfig(schema=table_schema) + dataset_id = _make_dataset_id("bq_load_test") + self.temp_dataset(dataset_id) + table_id = "{}.{}.load_table_from_dataframe_w_required".format( + Config.CLIENT.project, dataset_id + ) + + # Create the table before loading so that schema mismatch errors are + # identified. + table = retry_403(Config.CLIENT.create_table)( + Table(table_id, schema=table_schema) + ) + self.to_delete.insert(0, table) + + job_config = bigquery.LoadJobConfig(schema=table_schema) + load_job = Config.CLIENT.load_table_from_dataframe( + dataframe, table_id, job_config=job_config + ) + load_job.result() + + table = Config.CLIENT.get_table(table) + self.assertEqual(tuple(table.schema), table_schema) + self.assertEqual(table.num_rows, 2) + @unittest.skipIf(pandas is None, "Requires `pandas`") @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_load_table_from_dataframe_w_explicit_schema(self): diff --git a/packages/google-cloud-bigquery/tests/unit/test__pandas_helpers.py b/packages/google-cloud-bigquery/tests/unit/test__pandas_helpers.py index f04f95307806..40b4548dae28 100644 --- a/packages/google-cloud-bigquery/tests/unit/test__pandas_helpers.py +++ b/packages/google-cloud-bigquery/tests/unit/test__pandas_helpers.py @@ -15,6 +15,7 @@ import datetime import decimal import functools +import warnings try: import pandas @@ -26,6 +27,7 @@ except ImportError: # pragma: NO COVER pyarrow = None import pytest +import pytz from google.cloud.bigquery import schema @@ -373,7 +375,7 @@ def test_bq_to_arrow_data_type_w_struct_unknown_subfield(module_under_test): ( "GEOGRAPHY", [ - "POINT(30, 10)", + "POINT(30 10)", None, "LINESTRING (30 10, 10 30, 40 40)", "POLYGON ((30 10, 40 40, 20 40, 10 20, 30 10))", @@ -440,6 +442,94 @@ def test_bq_to_arrow_array_w_special_floats(module_under_test): assert roundtrip[3] is None +@pytest.mark.skipIf(pandas is None, "Requires `pandas`") +@pytest.mark.skipIf(pyarrow is None, "Requires `pyarrow`") +def test_to_arrow_w_required_fields(module_under_test): + bq_schema = ( + schema.SchemaField("field01", "STRING", mode="REQUIRED"), + schema.SchemaField("field02", "BYTES", mode="REQUIRED"), + schema.SchemaField("field03", "INTEGER", mode="REQUIRED"), + schema.SchemaField("field04", "INT64", mode="REQUIRED"), + schema.SchemaField("field05", "FLOAT", mode="REQUIRED"), + schema.SchemaField("field06", "FLOAT64", mode="REQUIRED"), + schema.SchemaField("field07", "NUMERIC", mode="REQUIRED"), + schema.SchemaField("field08", "BOOLEAN", mode="REQUIRED"), + schema.SchemaField("field09", "BOOL", mode="REQUIRED"), + schema.SchemaField("field10", "TIMESTAMP", mode="REQUIRED"), + schema.SchemaField("field11", "DATE", mode="REQUIRED"), + schema.SchemaField("field12", "TIME", mode="REQUIRED"), + schema.SchemaField("field13", "DATETIME", mode="REQUIRED"), + schema.SchemaField("field14", "GEOGRAPHY", mode="REQUIRED"), + ) + dataframe = pandas.DataFrame( + { + "field01": ["hello", "world"], + "field02": [b"abd", b"efg"], + "field03": [1, 2], + "field04": [3, 4], + "field05": [1.25, 9.75], + "field06": [-1.75, -3.5], + "field07": [decimal.Decimal("1.2345"), decimal.Decimal("6.7891")], + "field08": [True, False], + "field09": [False, True], + "field10": [ + datetime.datetime(1970, 1, 1, 0, 0, 0, tzinfo=pytz.utc), + datetime.datetime(2012, 12, 21, 9, 7, 42, tzinfo=pytz.utc), + ], + "field11": [datetime.date(9999, 12, 31), datetime.date(1970, 1, 1)], + "field12": [datetime.time(23, 59, 59, 999999), datetime.time(12, 0, 0)], + "field13": [ + datetime.datetime(1970, 1, 1, 0, 0, 0), + datetime.datetime(2012, 12, 21, 9, 7, 42), + ], + "field14": [ + "POINT(30 10)", + "POLYGON ((30 10, 40 40, 20 40, 10 20, 30 10))", + ], + } + ) + + arrow_table = module_under_test.to_arrow(dataframe, bq_schema) + arrow_schema = arrow_table.schema + + assert len(arrow_schema) == len(bq_schema) + for arrow_field in arrow_schema: + assert not arrow_field.nullable + + +@pytest.mark.skipIf(pandas is None, "Requires `pandas`") +@pytest.mark.skipIf(pyarrow is None, "Requires `pyarrow`") +def test_to_arrow_w_unknown_type(module_under_test): + bq_schema = ( + schema.SchemaField("field00", "UNKNOWN_TYPE"), + schema.SchemaField("field01", "STRING"), + schema.SchemaField("field02", "BYTES"), + schema.SchemaField("field03", "INTEGER"), + ) + dataframe = pandas.DataFrame( + { + "field00": ["whoami", "whatami"], + "field01": ["hello", "world"], + "field02": [b"abd", b"efg"], + "field03": [1, 2], + } + ) + + with warnings.catch_warnings(record=True) as warned: + arrow_table = module_under_test.to_arrow(dataframe, bq_schema) + arrow_schema = arrow_table.schema + + assert len(warned) == 1 + warning = warned[0] + assert "field00" in str(warning) + + assert len(arrow_schema) == len(bq_schema) + assert arrow_schema[0].name == "field00" + assert arrow_schema[1].name == "field01" + assert arrow_schema[2].name == "field02" + assert arrow_schema[3].name == "field03" + + @pytest.mark.skipIf(pandas is None, "Requires `pandas`") def test_to_parquet_without_pyarrow(module_under_test, monkeypatch): monkeypatch.setattr(module_under_test, "pyarrow", None) From e34252b642e3d19ec65bcda79958369b82dad746 Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Fri, 14 Jun 2019 10:14:12 -0700 Subject: [PATCH 0603/2016] Release 1.15.0 (#8339) --- packages/google-cloud-bigquery/CHANGELOG.md | 12 ++++++++++++ packages/google-cloud-bigquery/setup.py | 2 +- 2 files changed, 13 insertions(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/CHANGELOG.md b/packages/google-cloud-bigquery/CHANGELOG.md index d845e29fc742..c462c40a02d8 100644 --- a/packages/google-cloud-bigquery/CHANGELOG.md +++ b/packages/google-cloud-bigquery/CHANGELOG.md @@ -4,6 +4,18 @@ [1]: https://pypi.org/project/google-cloud-bigquery/#history +## 1.15.0 + +06-14-2019 10:10 PDT + +### Implementation Changes + +- Fix bug where `load_table_from_dataframe` could not append to REQUIRED fields. ([#8230](https://github.com/googleapis/google-cloud-python/pull/8230)) + +### New Features + +- Add `page_size` parameter to `QueryJob.result`. ([#8206](https://github.com/googleapis/google-cloud-python/pull/8206)) + ## 1.14.0 06-04-2019 11:11 PDT diff --git a/packages/google-cloud-bigquery/setup.py b/packages/google-cloud-bigquery/setup.py index b06829bcb942..8592a232ecb3 100644 --- a/packages/google-cloud-bigquery/setup.py +++ b/packages/google-cloud-bigquery/setup.py @@ -22,7 +22,7 @@ name = "google-cloud-bigquery" description = "Google BigQuery API client library" -version = "1.14.0" +version = "1.15.0" # Should be one of: # 'Development Status :: 3 - Alpha' # 'Development Status :: 4 - Beta' From 0e35fcb5b06575ecfe813a4183eadcf7a65f150d Mon Sep 17 00:00:00 2001 From: Grant Timmerman Date: Tue, 18 Jun 2019 12:43:22 -0700 Subject: [PATCH 0604/2016] BigQuery Snippets: Use autodetected location (#8341) --- packages/google-cloud-bigquery/docs/snippets.py | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/packages/google-cloud-bigquery/docs/snippets.py b/packages/google-cloud-bigquery/docs/snippets.py index b11dccfbfd09..7c6c6902170d 100644 --- a/packages/google-cloud-bigquery/docs/snippets.py +++ b/packages/google-cloud-bigquery/docs/snippets.py @@ -443,12 +443,8 @@ def test_load_and_query_partitioned_table(client, to_delete): bigquery.ScalarQueryParameter("end_date", "DATE", datetime.date(1899, 12, 31)), ] - query_job = client.query( - sql, - # Location must match that of the dataset(s) referenced in the query. - location="US", - job_config=job_config, - ) # API request + # API request + query_job = client.query(sql, job_config=job_config) rows = list(query_job) print("{} states were admitted to the US in the 1800s".format(len(rows))) From de780cae2d9e3f4a3eff2ee24e112d8c49da9aa0 Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Tue, 18 Jun 2019 16:17:54 -0700 Subject: [PATCH 0605/2016] Refactor `to_dataframe` deterministicly update progress bar. (#8303) * Refactor `to_dataframe` deterministicly update progress bar. Previously, a background thread was used to collect progress bar updates from worker threads. So as not to block downloads for progress bar updates, `put_nowait` was used to make progress bar updates. Missed writes to the progress bar were ignored. This caused non-deterministic progress bar updates and test flakiness. Now, worker threads push dataframes to the queue, and the return values for `download_dataframe_bqstorage` and `download_dataframe_tabledata_list` have been updated to return an iterable of pandas DataFrame objects instead of a single DataFrame. This allows progress bar updates to be done independently of which underlying API is used to download the DataFrames. Also, the logic for working with pandas has been moved to the `_pandas_helpers` module. --- .../google/cloud/bigquery/_pandas_helpers.py | 196 ++++++++++- .../google/cloud/bigquery/client.py | 4 +- .../google/cloud/bigquery/table.py | 303 ++++-------------- .../tests/unit/test__pandas_helpers.py | 16 +- .../tests/unit/test_table.py | 30 +- 5 files changed, 271 insertions(+), 278 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/_pandas_helpers.py b/packages/google-cloud-bigquery/google/cloud/bigquery/_pandas_helpers.py index 6a1b2dab910f..5261c2b99efd 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/_pandas_helpers.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/_pandas_helpers.py @@ -14,8 +14,22 @@ """Shared helper functions for connecting BigQuery and pandas.""" +import collections +import concurrent.futures import warnings +from six.moves import queue + +try: + from google.cloud import bigquery_storage_v1beta1 +except ImportError: # pragma: NO COVER + bigquery_storage_v1beta1 = None + +try: + import pandas +except ImportError: # pragma: NO COVER + pandas = None + try: import pyarrow import pyarrow.parquet @@ -25,7 +39,23 @@ from google.cloud.bigquery import schema +_NO_BQSTORAGE_ERROR = ( + "The google-cloud-bigquery-storage library is not installed, " + "please install google-cloud-bigquery-storage to use bqstorage features." +) + STRUCT_TYPES = ("RECORD", "STRUCT") +_PROGRESS_INTERVAL = 0.2 # Maximum time between download status checks, in seconds. + + +class _DownloadState(object): + """Flag to indicate that a thread should exit early.""" + + def __init__(self): + # No need for a lock because reading/replacing a variable is defined to + # be an atomic operation in the Python language definition (enforced by + # the global interpreter lock). + self.done = False def pyarrow_datetime(): @@ -123,7 +153,7 @@ def bq_to_arrow_array(series, bq_field): return pyarrow.array(series, type=arrow_type) -def to_arrow(dataframe, bq_schema): +def dataframe_to_arrow(dataframe, bq_schema): """Convert pandas dataframe to Arrow table, using BigQuery schema. Args: @@ -158,7 +188,7 @@ def to_arrow(dataframe, bq_schema): return pyarrow.Table.from_arrays(arrow_arrays, names=arrow_names) -def to_parquet(dataframe, bq_schema, filepath): +def dataframe_to_parquet(dataframe, bq_schema, filepath): """Write dataframe as a Parquet file, according to the desired BQ schema. This function requires the :mod:`pyarrow` package. Arrow is used as an @@ -176,5 +206,165 @@ def to_parquet(dataframe, bq_schema, filepath): if pyarrow is None: raise ValueError("pyarrow is required for BigQuery schema conversion.") - arrow_table = to_arrow(dataframe, bq_schema) + arrow_table = dataframe_to_arrow(dataframe, bq_schema) pyarrow.parquet.write_table(arrow_table, filepath) + + +def _tabledata_list_page_to_dataframe(page, column_names, dtypes): + columns = collections.defaultdict(list) + for row in page: + for column in column_names: + columns[column].append(row[column]) + for column in dtypes: + columns[column] = pandas.Series(columns[column], dtype=dtypes[column]) + return pandas.DataFrame(columns, columns=column_names) + + +def download_dataframe_tabledata_list(pages, schema, dtypes): + """Use (slower, but free) tabledata.list to construct a DataFrame.""" + column_names = [field.name for field in schema] + for page in pages: + yield _tabledata_list_page_to_dataframe(page, column_names, dtypes) + + +def _download_dataframe_bqstorage_stream( + download_state, + bqstorage_client, + column_names, + dtypes, + session, + stream, + worker_queue, +): + position = bigquery_storage_v1beta1.types.StreamPosition(stream=stream) + rowstream = bqstorage_client.read_rows(position).rows(session) + + for page in rowstream.pages: + if download_state.done: + return + # page.to_dataframe() does not preserve column order in some versions + # of google-cloud-bigquery-storage. Access by column name to rearrange. + frame = page.to_dataframe(dtypes=dtypes)[column_names] + worker_queue.put(frame) + + +def _nowait(futures): + """Separate finished and unfinished threads, much like + :func:`concurrent.futures.wait`, but don't wait. + """ + done = [] + not_done = [] + for future in futures: + if future.done(): + done.append(future) + else: + not_done.append(future) + return done, not_done + + +def download_dataframe_bqstorage( + project_id, + table, + bqstorage_client, + column_names, + dtypes, + preserve_order=False, + selected_fields=None, +): + """Use (faster, but billable) BQ Storage API to construct DataFrame.""" + if "$" in table.table_id: + raise ValueError( + "Reading from a specific partition is not currently supported." + ) + if "@" in table.table_id: + raise ValueError("Reading from a specific snapshot is not currently supported.") + + read_options = bigquery_storage_v1beta1.types.TableReadOptions() + if selected_fields is not None: + for field in selected_fields: + read_options.selected_fields.append(field.name) + + requested_streams = 0 + if preserve_order: + requested_streams = 1 + + session = bqstorage_client.create_read_session( + table.to_bqstorage(), + "projects/{}".format(project_id), + read_options=read_options, + requested_streams=requested_streams, + ) + + # Avoid reading rows from an empty table. pandas.concat will fail on an + # empty list. + if not session.streams: + yield pandas.DataFrame(columns=column_names) + return + + total_streams = len(session.streams) + + # Use _DownloadState to notify worker threads when to quit. + # See: https://stackoverflow.com/a/29237343/101923 + download_state = _DownloadState() + + # Create a queue to collect frames as they are created in each thread. + worker_queue = queue.Queue() + + with concurrent.futures.ThreadPoolExecutor(max_workers=total_streams) as pool: + try: + # Manually submit jobs and wait for download to complete rather + # than using pool.map because pool.map continues running in the + # background even if there is an exception on the main thread. + # See: https://github.com/googleapis/google-cloud-python/pull/7698 + not_done = [ + pool.submit( + _download_dataframe_bqstorage_stream, + download_state, + bqstorage_client, + column_names, + dtypes, + session, + stream, + worker_queue, + ) + for stream in session.streams + ] + + while not_done: + # Don't block on the worker threads. For performance reasons, + # we want to block on the queue's get method, instead. This + # prevents the queue from filling up, because the main thread + # has smaller gaps in time between calls to the queue's get + # method. For a detailed explaination, see: + # https://friendliness.dev/2019/06/18/python-nowait/ + done, not_done = _nowait(not_done) + for future in done: + # Call result() on any finished threads to raise any + # exceptions encountered. + future.result() + + try: + frame = worker_queue.get(timeout=_PROGRESS_INTERVAL) + yield frame + except queue.Empty: # pragma: NO COVER + continue + + # Return any remaining values after the workers finished. + while not worker_queue.empty(): + try: + # Include a timeout because even though the queue is + # non-empty, it doesn't guarantee that a subsequent call to + # get() will not block. + frame = worker_queue.get(timeout=_PROGRESS_INTERVAL) + yield frame + except queue.Empty: # pragma: NO COVER + continue + finally: + # No need for a lock because reading/replacing a variable is + # defined to be an atomic operation in the Python language + # definition (enforced by the global interpreter lock). + download_state.done = True + + # Shutdown all background threads, now that they should know to + # exit early. + pool.shutdown(wait=True) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py index 9fd4c5368efa..65d6915c7ea2 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py @@ -1310,7 +1310,9 @@ def load_table_from_dataframe( try: if pyarrow and job_config.schema: - _pandas_helpers.to_parquet(dataframe, job_config.schema, tmppath) + _pandas_helpers.dataframe_to_parquet( + dataframe, job_config.schema, tmppath + ) else: if job_config.schema: warnings.warn( diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py index 1f9bb5eee3d4..7af3bc6f48b4 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py @@ -16,18 +16,12 @@ from __future__ import absolute_import -import collections -import concurrent.futures import copy import datetime -import json import operator -import threading -import time import warnings import six -from six.moves import queue try: from google.cloud import bigquery_storage_v1beta1 @@ -49,6 +43,7 @@ import google.cloud._helpers from google.cloud.bigquery import _helpers +from google.cloud.bigquery import _pandas_helpers from google.cloud.bigquery.schema import SchemaField from google.cloud.bigquery.schema import _build_schema_resource from google.cloud.bigquery.schema import _parse_schema_resource @@ -68,13 +63,6 @@ "library. Please install tqdm to use the progress bar functionality." ) _TABLE_HAS_NO_SCHEMA = 'Table has no schema: call "client.get_table()"' -_MARKER = object() -_PROGRESS_INTERVAL = 0.2 # Time between download status updates, in seconds. - -# Send multiple updates from the worker threads, so there are at least a few -# waiting next time the prgrogess bar is updated. -_PROGRESS_UPDATES_PER_INTERVAL = 3 -_PROGRESS_WORKER_INTERVAL = _PROGRESS_INTERVAL / _PROGRESS_UPDATES_PER_INTERVAL def _reference_getter(table): @@ -1371,7 +1359,8 @@ def _get_next_page_response(self): @property def schema(self): - """List[google.cloud.bigquery.schema.SchemaField]: Table's schema.""" + """List[google.cloud.bigquery.schema.SchemaField]: The subset of + columns to be read from the table.""" return list(self._schema) @property @@ -1379,214 +1368,6 @@ def total_rows(self): """int: The total number of rows in the table.""" return self._total_rows - def _to_dataframe_dtypes(self, page, column_names, dtypes): - columns = collections.defaultdict(list) - for row in page: - for column in column_names: - columns[column].append(row[column]) - for column in dtypes: - columns[column] = pandas.Series(columns[column], dtype=dtypes[column]) - return pandas.DataFrame(columns, columns=column_names) - - def _to_dataframe_tabledata_list(self, dtypes, progress_bar=None): - """Use (slower, but free) tabledata.list to construct a DataFrame.""" - column_names = [field.name for field in self.schema] - frames = [] - - for page in iter(self.pages): - current_frame = self._to_dataframe_dtypes(page, column_names, dtypes) - frames.append(current_frame) - - if progress_bar is not None: - # In some cases, the number of total rows is not populated - # until the first page of rows is fetched. Update the - # progress bar's total to keep an accurate count. - progress_bar.total = progress_bar.total or self.total_rows - progress_bar.update(len(current_frame)) - - if progress_bar is not None: - # Indicate that the download has finished. - progress_bar.close() - - return pandas.concat(frames, ignore_index=True) - - def _to_dataframe_bqstorage_stream( - self, bqstorage_client, dtypes, columns, session, stream, worker_queue - ): - position = bigquery_storage_v1beta1.types.StreamPosition(stream=stream) - rowstream = bqstorage_client.read_rows(position).rows(session) - - frames = [] - for page in rowstream.pages: - if self._to_dataframe_finished: - return - frames.append(page.to_dataframe(dtypes=dtypes)) - - try: - worker_queue.put_nowait(page.num_items) - except queue.Full: - # It's okay if we miss a few progress updates. Don't slow - # down parsing for that. - pass - - # Avoid errors on unlucky streams with no blocks. pandas.concat - # will fail on an empty list. - if not frames: - return pandas.DataFrame(columns=columns) - - # page.to_dataframe() does not preserve column order. Rearrange at - # the end using manually-parsed schema. - return pandas.concat(frames)[columns] - - def _process_worker_updates(self, worker_queue, progress_queue): - last_update_time = time.time() - current_update = 0 - - # Sum all updates in a contant loop. - while True: - try: - current_update += worker_queue.get(timeout=_PROGRESS_INTERVAL) - - # Time to send to the progress bar queue? - current_time = time.time() - elapsed_time = current_time - last_update_time - if elapsed_time > _PROGRESS_WORKER_INTERVAL: - progress_queue.put(current_update) - last_update_time = current_time - current_update = 0 - - except queue.Empty: - # Keep going, unless there probably aren't going to be any - # additional updates. - if self._to_dataframe_finished: - progress_queue.put(current_update) - return - - def _process_progress_updates(self, progress_queue, progress_bar): - if progress_bar is None: - return - - # Output all updates since the last interval. - while True: - try: - next_update = progress_queue.get_nowait() - progress_bar.update(next_update) - except queue.Empty: - break - - if self._to_dataframe_finished: - progress_bar.close() - return - - def _to_dataframe_bqstorage(self, bqstorage_client, dtypes, progress_bar=None): - """Use (faster, but billable) BQ Storage API to construct DataFrame.""" - if bigquery_storage_v1beta1 is None: - raise ValueError(_NO_BQSTORAGE_ERROR) - - if "$" in self._table.table_id: - raise ValueError( - "Reading from a specific partition is not currently supported." - ) - if "@" in self._table.table_id: - raise ValueError( - "Reading from a specific snapshot is not currently supported." - ) - - read_options = bigquery_storage_v1beta1.types.TableReadOptions() - if self._selected_fields is not None: - for field in self._selected_fields: - read_options.selected_fields.append(field.name) - - requested_streams = 0 - if self._preserve_order: - requested_streams = 1 - - session = bqstorage_client.create_read_session( - self._table.to_bqstorage(), - "projects/{}".format(self._project), - read_options=read_options, - requested_streams=requested_streams, - ) - - # We need to parse the schema manually so that we can rearrange the - # columns. - schema = json.loads(session.avro_schema.schema) - columns = [field["name"] for field in schema["fields"]] - - # Avoid reading rows from an empty table. pandas.concat will fail on an - # empty list. - if not session.streams: - return pandas.DataFrame(columns=columns) - - total_streams = len(session.streams) - - # Use _to_dataframe_finished to notify worker threads when to quit. - # See: https://stackoverflow.com/a/29237343/101923 - self._to_dataframe_finished = False - - # Create a queue to track progress updates across threads. - worker_queue = _NoopProgressBarQueue() - progress_queue = None - progress_thread = None - if progress_bar is not None: - worker_queue = queue.Queue() - progress_queue = queue.Queue() - progress_thread = threading.Thread( - target=self._process_worker_updates, args=(worker_queue, progress_queue) - ) - progress_thread.start() - - def get_frames(pool): - frames = [] - - # Manually submit jobs and wait for download to complete rather - # than using pool.map because pool.map continues running in the - # background even if there is an exception on the main thread. - # See: https://github.com/googleapis/google-cloud-python/pull/7698 - not_done = [ - pool.submit( - self._to_dataframe_bqstorage_stream, - bqstorage_client, - dtypes, - columns, - session, - stream, - worker_queue, - ) - for stream in session.streams - ] - - while not_done: - done, not_done = concurrent.futures.wait( - not_done, timeout=_PROGRESS_INTERVAL - ) - frames.extend([future.result() for future in done]) - - # The progress bar needs to update on the main thread to avoid - # contention over stdout / stderr. - self._process_progress_updates(progress_queue, progress_bar) - - return frames - - with concurrent.futures.ThreadPoolExecutor(max_workers=total_streams) as pool: - try: - frames = get_frames(pool) - finally: - # No need for a lock because reading/replacing a variable is - # defined to be an atomic operation in the Python language - # definition (enforced by the global interpreter lock). - self._to_dataframe_finished = True - - # Shutdown all background threads, now that they should know to - # exit early. - pool.shutdown(wait=True) - if progress_thread is not None: - progress_thread.join() - - # Update the progress bar one last time to close it. - self._process_progress_updates(progress_queue, progress_bar) - return pandas.concat(frames, ignore_index=True) - def _get_progress_bar(self, progress_bar_type): """Construct a tqdm progress bar object, if tqdm is installed.""" if tqdm is None: @@ -1613,6 +1394,45 @@ def _get_progress_bar(self, progress_bar_type): warnings.warn(_NO_TQDM_ERROR, UserWarning, stacklevel=3) return None + def _to_dataframe_iterable(self, bqstorage_client=None, dtypes=None): + """Create an iterable of pandas DataFrames, to process the table as a stream. + + See ``to_dataframe`` for argument descriptions. + """ + if bqstorage_client is not None: + column_names = [field.name for field in self._schema] + try: + # Iterate over the stream so that read errors are raised (and + # the method can then fallback to tabledata.list). + for frame in _pandas_helpers.download_dataframe_bqstorage( + self._project, + self._table, + bqstorage_client, + column_names, + dtypes, + preserve_order=self._preserve_order, + selected_fields=self._selected_fields, + ): + yield frame + return + except google.api_core.exceptions.Forbidden: + # Don't hide errors such as insufficient permissions to create + # a read session, or the API is not enabled. Both of those are + # clearly problems if the developer has explicitly asked for + # BigQuery Storage API support. + raise + except google.api_core.exceptions.GoogleAPICallError: + # There is a known issue with reading from small anonymous + # query results tables, so some errors are expected. Rather + # than throw those errors, try reading the DataFrame again, but + # with the tabledata.list API. + pass + + for frame in _pandas_helpers.download_dataframe_tabledata_list( + iter(self.pages), self.schema, dtypes + ): + yield frame + def to_dataframe(self, bqstorage_client=None, dtypes=None, progress_bar_type=None): """Create a pandas DataFrame by loading all pages of a query. @@ -1682,25 +1502,28 @@ def to_dataframe(self, bqstorage_client=None, dtypes=None, progress_bar_type=Non progress_bar = self._get_progress_bar(progress_bar_type) - if bqstorage_client is not None: - try: - return self._to_dataframe_bqstorage( - bqstorage_client, dtypes, progress_bar=progress_bar - ) - except google.api_core.exceptions.Forbidden: - # Don't hide errors such as insufficient permissions to create - # a read session, or the API is not enabled. Both of those are - # clearly problems if the developer has explicitly asked for - # BigQuery Storage API support. - raise - except google.api_core.exceptions.GoogleAPICallError: - # There is a known issue with reading from small anonymous - # query results tables, so some errors are expected. Rather - # than throw those errors, try reading the DataFrame again, but - # with the tabledata.list API. - pass + frames = [] + for frame in self._to_dataframe_iterable( + bqstorage_client=bqstorage_client, dtypes=dtypes + ): + frames.append(frame) + + if progress_bar is not None: + # In some cases, the number of total rows is not populated + # until the first page of rows is fetched. Update the + # progress bar's total to keep an accurate count. + progress_bar.total = progress_bar.total or self.total_rows + progress_bar.update(len(frame)) - return self._to_dataframe_tabledata_list(dtypes, progress_bar=progress_bar) + if progress_bar is not None: + # Indicate that the download has finished. + progress_bar.close() + + # Avoid concatting an empty list. + if not frames: + column_names = [field.name for field in self._schema] + return pandas.DataFrame(columns=column_names) + return pandas.concat(frames, ignore_index=True) class _EmptyRowIterator(object): diff --git a/packages/google-cloud-bigquery/tests/unit/test__pandas_helpers.py b/packages/google-cloud-bigquery/tests/unit/test__pandas_helpers.py index 40b4548dae28..1c95aef0cec9 100644 --- a/packages/google-cloud-bigquery/tests/unit/test__pandas_helpers.py +++ b/packages/google-cloud-bigquery/tests/unit/test__pandas_helpers.py @@ -444,7 +444,7 @@ def test_bq_to_arrow_array_w_special_floats(module_under_test): @pytest.mark.skipIf(pandas is None, "Requires `pandas`") @pytest.mark.skipIf(pyarrow is None, "Requires `pyarrow`") -def test_to_arrow_w_required_fields(module_under_test): +def test_dataframe_to_arrow_w_required_fields(module_under_test): bq_schema = ( schema.SchemaField("field01", "STRING", mode="REQUIRED"), schema.SchemaField("field02", "BYTES", mode="REQUIRED"), @@ -489,7 +489,7 @@ def test_to_arrow_w_required_fields(module_under_test): } ) - arrow_table = module_under_test.to_arrow(dataframe, bq_schema) + arrow_table = module_under_test.dataframe_to_arrow(dataframe, bq_schema) arrow_schema = arrow_table.schema assert len(arrow_schema) == len(bq_schema) @@ -499,7 +499,7 @@ def test_to_arrow_w_required_fields(module_under_test): @pytest.mark.skipIf(pandas is None, "Requires `pandas`") @pytest.mark.skipIf(pyarrow is None, "Requires `pyarrow`") -def test_to_arrow_w_unknown_type(module_under_test): +def test_dataframe_to_arrow_w_unknown_type(module_under_test): bq_schema = ( schema.SchemaField("field00", "UNKNOWN_TYPE"), schema.SchemaField("field01", "STRING"), @@ -516,7 +516,7 @@ def test_to_arrow_w_unknown_type(module_under_test): ) with warnings.catch_warnings(record=True) as warned: - arrow_table = module_under_test.to_arrow(dataframe, bq_schema) + arrow_table = module_under_test.dataframe_to_arrow(dataframe, bq_schema) arrow_schema = arrow_table.schema assert len(warned) == 1 @@ -531,18 +531,18 @@ def test_to_arrow_w_unknown_type(module_under_test): @pytest.mark.skipIf(pandas is None, "Requires `pandas`") -def test_to_parquet_without_pyarrow(module_under_test, monkeypatch): +def test_dataframe_to_parquet_without_pyarrow(module_under_test, monkeypatch): monkeypatch.setattr(module_under_test, "pyarrow", None) with pytest.raises(ValueError) as exc: - module_under_test.to_parquet(pandas.DataFrame(), (), None) + module_under_test.dataframe_to_parquet(pandas.DataFrame(), (), None) assert "pyarrow is required" in str(exc) @pytest.mark.skipIf(pandas is None, "Requires `pandas`") @pytest.mark.skipIf(pyarrow is None, "Requires `pyarrow`") -def test_to_parquet_w_missing_columns(module_under_test, monkeypatch): +def test_dataframe_to_parquet_w_missing_columns(module_under_test, monkeypatch): with pytest.raises(ValueError) as exc: - module_under_test.to_parquet( + module_under_test.dataframe_to_parquet( pandas.DataFrame(), (schema.SchemaField("not_found", "STRING"),), None ) assert "columns in schema must match" in str(exc) diff --git a/packages/google-cloud-bigquery/tests/unit/test_table.py b/packages/google-cloud-bigquery/tests/unit/test_table.py index 378323ce2932..a0ded16173d4 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_table.py +++ b/packages/google-cloud-bigquery/tests/unit/test_table.py @@ -12,7 +12,6 @@ # See the License for the specific language governing permissions and # limitations under the License. -import concurrent.futures import itertools import json import time @@ -22,7 +21,6 @@ import mock import pytest import six -from six.moves import queue import google.api_core.exceptions @@ -1859,9 +1857,6 @@ def test_to_dataframe_w_bqstorage_nonempty(self): from google.cloud.bigquery import table as mut from google.cloud.bigquery_storage_v1beta1 import reader - # Speed up testing. - mut._PROGRESS_INTERVAL = 0.01 - bqstorage_client = mock.create_autospec( bigquery_storage_v1beta1.BigQueryStorageClient ) @@ -1893,21 +1888,13 @@ def test_to_dataframe_w_bqstorage_nonempty(self): {"colA": -1, "colB": "def", "colC": 4.0}, ] - def blocking_to_dataframe(*args, **kwargs): - # Sleep for longer than the waiting interval so that we know we're - # only reading one page per loop at most. - time.sleep(2 * mut._PROGRESS_INTERVAL) - return pandas.DataFrame(page_items, columns=["colA", "colB", "colC"]) - mock_page = mock.create_autospec(reader.ReadRowsPage) - mock_page.to_dataframe.side_effect = blocking_to_dataframe + mock_page.to_dataframe.return_value = pandas.DataFrame( + page_items, columns=["colA", "colB", "colC"] + ) mock_pages = (mock_page, mock_page, mock_page) type(mock_rows).pages = mock.PropertyMock(return_value=mock_pages) - # Test that full queue errors are ignored. - mock_queue = mock.create_autospec(mut._NoopProgressBarQueue) - mock_queue().put_nowait.side_effect = queue.Full - schema = [ schema.SchemaField("colA", "IGNORED"), schema.SchemaField("colC", "IGNORED"), @@ -1923,10 +1910,7 @@ def blocking_to_dataframe(*args, **kwargs): selected_fields=schema, ) - with mock.patch.object(mut, "_NoopProgressBarQueue", mock_queue), mock.patch( - "concurrent.futures.wait", wraps=concurrent.futures.wait - ) as mock_wait: - got = row_iterator.to_dataframe(bqstorage_client=bqstorage_client) + got = row_iterator.to_dataframe(bqstorage_client=bqstorage_client) # Are the columns in the expected order? column_names = ["colA", "colC", "colB"] @@ -1937,12 +1921,6 @@ def blocking_to_dataframe(*args, **kwargs): total_rows = len(page_items) * total_pages self.assertEqual(len(got.index), total_rows) - # Make sure that this test looped through multiple progress intervals. - self.assertGreaterEqual(mock_wait.call_count, 2) - - # Make sure that this test pushed to the progress queue. - self.assertEqual(mock_queue().put_nowait.call_count, total_pages) - @unittest.skipIf(pandas is None, "Requires `pandas`") @unittest.skipIf( bigquery_storage_v1beta1 is None, "Requires `google-cloud-bigquery-storage`" From f1871eee75e63afeca49d031f3002927d3668f6e Mon Sep 17 00:00:00 2001 From: Yoshi Automation Bot Date: Tue, 18 Jun 2019 16:20:59 -0700 Subject: [PATCH 0606/2016] Add more stats to Models API, such as `optimization_strategy` (via synth). (#8344) --- .../google/cloud/bigquery_v2/gapic/enums.py | 18 +- .../cloud/bigquery_v2/proto/model.proto | 98 +++-- .../cloud/bigquery_v2/proto/model_pb2.py | 351 +++++++++++++----- .../bigquery_v2/proto/model_reference.proto | 1 - .../bigquery_v2/proto/model_reference_pb2.py | 1 + .../bigquery_v2/proto/standard_sql.proto | 1 - .../bigquery_v2/proto/standard_sql_pb2.py | 1 + packages/google-cloud-bigquery/synth.metadata | 10 +- 8 files changed, 359 insertions(+), 122 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery_v2/gapic/enums.py b/packages/google-cloud-bigquery/google/cloud/bigquery_v2/gapic/enums.py index ba6c0c721266..e1ce20f9a130 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery_v2/gapic/enums.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery_v2/gapic/enums.py @@ -90,14 +90,30 @@ class ModelType(enum.IntEnum): Attributes: MODEL_TYPE_UNSPECIFIED (int) LINEAR_REGRESSION (int): Linear regression model. - LOGISTIC_REGRESSION (int): Logistic regression model. + LOGISTIC_REGRESSION (int): Logistic regression based classification model. KMEANS (int): [Beta] K-means clustering model. + TENSORFLOW (int): [Beta] An imported TensorFlow model. """ MODEL_TYPE_UNSPECIFIED = 0 LINEAR_REGRESSION = 1 LOGISTIC_REGRESSION = 2 KMEANS = 3 + TENSORFLOW = 6 + + class OptimizationStrategy(enum.IntEnum): + """ + Indicates the optimization strategy used for training. + + Attributes: + OPTIMIZATION_STRATEGY_UNSPECIFIED (int) + BATCH_GRADIENT_DESCENT (int): Uses an iterative batch gradient descent algorithm. + NORMAL_EQUATION (int): Uses a normal equation to solve linear regression problem. + """ + + OPTIMIZATION_STRATEGY_UNSPECIFIED = 0 + BATCH_GRADIENT_DESCENT = 1 + NORMAL_EQUATION = 2 class StandardSqlDataType(object): diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery_v2/proto/model.proto b/packages/google-cloud-bigquery/google/cloud/bigquery_v2/proto/model.proto index b94cb3f5efd5..2b5b6894db5b 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery_v2/proto/model.proto +++ b/packages/google-cloud-bigquery/google/cloud/bigquery_v2/proto/model.proto @@ -23,13 +23,19 @@ import "google/protobuf/empty.proto"; import "google/protobuf/timestamp.proto"; import "google/protobuf/wrappers.proto"; import "google/api/annotations.proto"; +import "google/api/client.proto"; option go_package = "google.golang.org/genproto/googleapis/cloud/bigquery/v2;bigquery"; option java_outer_classname = "ModelProto"; option java_package = "com.google.cloud.bigquery.v2"; - service ModelService { + option (google.api.default_host) = "bigquery.googleapis.com"; + option (google.api.oauth_scopes) = + "https://www.googleapis.com/auth/bigquery," + "https://www.googleapis.com/auth/cloud-platform," + "https://www.googleapis.com/auth/cloud-platform.read-only"; + // Gets the specified model resource by model ID. rpc GetModel(GetModelRequest) returns (Model) { } @@ -67,11 +73,12 @@ message Model { google.protobuf.DoubleValue r_squared = 5; } - // Aggregate metrics for classification models. For multi-class models, - // the metrics are either macro-averaged: metrics are calculated for each - // label and then an unweighted average is taken of those values or - // micro-averaged: the metric is calculated globally by counting the total - // number of correctly predicted rows. + // Aggregate metrics for classification/classifier models. For multi-class + // models, the metrics are either macro-averaged or micro-averaged. When + // macro-averaged, the metrics are calculated for each label and then an + // unweighted average is taken of those values. When micro-averaged, the + // metric is calculated globally by counting the total number of correctly + // predicted rows. message AggregateClassificationMetrics { // Precision is the fraction of actual positive predictions that had // positive actual labels. For multiclass this is a macro-averaged @@ -104,7 +111,7 @@ message Model { google.protobuf.DoubleValue roc_auc = 7; } - // Evaluation metrics for binary classification models. + // Evaluation metrics for binary classification/classifier models. message BinaryClassificationMetrics { // Confusion matrix for binary classification models. message BinaryConfusionMatrix { @@ -123,11 +130,19 @@ message Model { // Number of false samples predicted as false. google.protobuf.Int64Value false_negatives = 5; - // Aggregate precision. + // The fraction of actual positive predictions that had positive actual + // labels. google.protobuf.DoubleValue precision = 6; - // Aggregate recall. + // The fraction of actual positive labels that were given a positive + // prediction. google.protobuf.DoubleValue recall = 7; + + // The equally weighted average of recall and precision. + google.protobuf.DoubleValue f1_score = 8; + + // The fraction of predictions given the correct label. + google.protobuf.DoubleValue accuracy = 9; } // Aggregate classification metrics. @@ -135,9 +150,15 @@ message Model { // Binary confusion matrix at multiple thresholds. repeated BinaryConfusionMatrix binary_confusion_matrix_list = 2; + + // Label representing the positive class. + string positive_label = 3; + + // Label representing the negative class. + string negative_label = 4; } - // Evaluation metrics for multi-class classification models. + // Evaluation metrics for multi-class classification/classifier models. message MultiClassClassificationMetrics { // Confusion matrix for multi-class classification models. message ConfusionMatrix { @@ -185,18 +206,18 @@ message Model { google.protobuf.DoubleValue mean_squared_distance = 2; } - // Evaluation metrics of a model. These are either computed on all - // training data or just the eval data based on whether eval data was used - // during training. + // Evaluation metrics of a model. These are either computed on all training + // data or just the eval data based on whether eval data was used during + // training. These are not present for imported models. message EvaluationMetrics { oneof metrics { // Populated for regression models. RegressionMetrics regression_metrics = 1; - // Populated for binary classification models. + // Populated for binary classification/classifier models. BinaryClassificationMetrics binary_classification_metrics = 2; - // Populated for multi-class classification models. + // Populated for multi-class classification/classifier models. MultiClassClassificationMetrics multi_class_classification_metrics = 3; // [Beta] Populated for clustering models. @@ -207,13 +228,14 @@ message Model { // Information about a single training query run for the model. message TrainingRun { message TrainingOptions { - // The maximum number of iterations in training. + // The maximum number of iterations in training. Used only for iterative + // training algorithms. int64 max_iterations = 1; // Type of loss function used during training run. LossType loss_type = 2; - // Learning rate in training. + // Learning rate in training. Used only for iterative training algorithms. double learn_rate = 3; // L1 regularization coefficient. @@ -223,14 +245,16 @@ message Model { google.protobuf.DoubleValue l2_regularization = 5; // When early_stop is true, stops training when accuracy improvement is - // less than 'min_relative_progress'. + // less than 'min_relative_progress'. Used only for iterative training + // algorithms. google.protobuf.DoubleValue min_relative_progress = 6; // Whether to train a model from the last checkpoint. google.protobuf.BoolValue warm_start = 7; // Whether to stop early when the loss doesn't improve significantly - // any more (compared to min_relative_progress). + // any more (compared to min_relative_progress). Used only for iterative + // training algorithms. google.protobuf.BoolValue early_stop = 8; // Name of input label columns in training data. @@ -257,14 +281,15 @@ message Model { // https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#data-type-properties string data_split_column = 12; - // The strategy to determine learning rate. + // The strategy to determine learn rate for the current iteration. LearnRateStrategy learn_rate_strategy = 13; - // Specifies the initial learning rate for line search to start at. + // Specifies the initial learning rate for the line search learn rate + // strategy. double initial_learn_rate = 16; // Weights associated with each label class, for rebalancing the - // training data. + // training data. Only applicable for classification models. map label_class_weights = 17; // [Beta] Distance type for clustering models. @@ -272,6 +297,13 @@ message Model { // [Beta] Number of clusters for clustering models. int64 num_clusters = 21; + + // [Beta] Google Cloud Storage URI from which the model was imported. Only + // applicable for imported models. + string model_uri = 22; + + // Optimization strategy for training linear regression models. + OptimizationStrategy optimization_strategy = 23; } // Information about a single iteration of the training run. @@ -330,11 +362,14 @@ message Model { // Linear regression model. LINEAR_REGRESSION = 1; - // Logistic regression model. + // Logistic regression based classification model. LOGISTIC_REGRESSION = 2; // [Beta] K-means clustering model. KMEANS = 3; + + // [Beta] An imported TensorFlow model. + TENSORFLOW = 6; } // Loss metric to evaluate model training performance. @@ -391,6 +426,17 @@ message Model { CONSTANT = 2; } + // Indicates the optimization strategy used for training. + enum OptimizationStrategy { + OPTIMIZATION_STRATEGY_UNSPECIFIED = 0; + + // Uses an iterative batch gradient descent algorithm. + BATCH_GRADIENT_DESCENT = 1; + + // Uses a normal equation to solve linear regression problem. + NORMAL_EQUATION = 2; + } + // Output only. A hash of this resource. string etag = 1; @@ -406,11 +452,9 @@ message Model { int64 last_modified_time = 6; // [Optional] A user-friendly description of this model. - // @mutable bigquery.models.patch string description = 12; // [Optional] A descriptive name for this model. - // @mutable bigquery.models.patch string friendly_name = 14; // [Optional] The labels associated with this model. You can use these to @@ -419,7 +463,6 @@ message Model { // characters, underscores and dashes. International characters are allowed. // Label values are optional. Label keys must start with a letter and each // label in the list must have a different key. - // @mutable bigquery.models.patch map labels = 15; // [Optional] The time when this model expires, in milliseconds since the @@ -427,7 +470,6 @@ message Model { // will be deleted and their storage reclaimed. The defaultTableExpirationMs // property of the encapsulating dataset can be used to set a default // expirationTime on newly created models. - // @mutable bigquery.models.patch int64 expiration_time = 16; // Output only. The geographic location where the model resides. This value @@ -445,7 +487,7 @@ message Model { repeated StandardSqlField feature_columns = 10; // Output only. Label columns that were used to train this model. - // The output of the model will have a “predicted_” prefix to these columns. + // The output of the model will have a "predicted_" prefix to these columns. repeated StandardSqlField label_columns = 11; } diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery_v2/proto/model_pb2.py b/packages/google-cloud-bigquery/google/cloud/bigquery_v2/proto/model_pb2.py index afa3d8cf640d..ed82d8e4e8fd 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery_v2/proto/model_pb2.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery_v2/proto/model_pb2.py @@ -1,3 +1,4 @@ +# -*- coding: utf-8 -*- # Generated by the protocol buffer compiler. DO NOT EDIT! # source: google/cloud/bigquery_v2/proto/model.proto @@ -24,6 +25,7 @@ from google.protobuf import timestamp_pb2 as google_dot_protobuf_dot_timestamp__pb2 from google.protobuf import wrappers_pb2 as google_dot_protobuf_dot_wrappers__pb2 from google.api import annotations_pb2 as google_dot_api_dot_annotations__pb2 +from google.api import client_pb2 as google_dot_api_dot_client__pb2 DESCRIPTOR = _descriptor.FileDescriptor( @@ -34,7 +36,7 @@ "\n\034com.google.cloud.bigquery.v2B\nModelProtoZ@google.golang.org/genproto/googleapis/cloud/bigquery/v2;bigquery" ), serialized_pb=_b( - '\n*google/cloud/bigquery_v2/proto/model.proto\x12\x18google.cloud.bigquery.v2\x1a\x34google/cloud/bigquery_v2/proto/model_reference.proto\x1a\x31google/cloud/bigquery_v2/proto/standard_sql.proto\x1a\x1bgoogle/protobuf/empty.proto\x1a\x1fgoogle/protobuf/timestamp.proto\x1a\x1egoogle/protobuf/wrappers.proto\x1a\x1cgoogle/api/annotations.proto"\xcc)\n\x05Model\x12\x0c\n\x04\x65tag\x18\x01 \x01(\t\x12\x41\n\x0fmodel_reference\x18\x02 \x01(\x0b\x32(.google.cloud.bigquery.v2.ModelReference\x12\x15\n\rcreation_time\x18\x05 \x01(\x03\x12\x1a\n\x12last_modified_time\x18\x06 \x01(\x03\x12\x13\n\x0b\x64\x65scription\x18\x0c \x01(\t\x12\x15\n\rfriendly_name\x18\x0e \x01(\t\x12;\n\x06labels\x18\x0f \x03(\x0b\x32+.google.cloud.bigquery.v2.Model.LabelsEntry\x12\x17\n\x0f\x65xpiration_time\x18\x10 \x01(\x03\x12\x10\n\x08location\x18\r \x01(\t\x12=\n\nmodel_type\x18\x07 \x01(\x0e\x32).google.cloud.bigquery.v2.Model.ModelType\x12\x42\n\rtraining_runs\x18\t \x03(\x0b\x32+.google.cloud.bigquery.v2.Model.TrainingRun\x12\x43\n\x0f\x66\x65\x61ture_columns\x18\n \x03(\x0b\x32*.google.cloud.bigquery.v2.StandardSqlField\x12\x41\n\rlabel_columns\x18\x0b \x03(\x0b\x32*.google.cloud.bigquery.v2.StandardSqlField\x1a\xb4\x02\n\x11RegressionMetrics\x12\x39\n\x13mean_absolute_error\x18\x01 \x01(\x0b\x32\x1c.google.protobuf.DoubleValue\x12\x38\n\x12mean_squared_error\x18\x02 \x01(\x0b\x32\x1c.google.protobuf.DoubleValue\x12<\n\x16mean_squared_log_error\x18\x03 \x01(\x0b\x32\x1c.google.protobuf.DoubleValue\x12;\n\x15median_absolute_error\x18\x04 \x01(\x0b\x32\x1c.google.protobuf.DoubleValue\x12/\n\tr_squared\x18\x05 \x01(\x0b\x32\x1c.google.protobuf.DoubleValue\x1a\xef\x02\n\x1e\x41ggregateClassificationMetrics\x12/\n\tprecision\x18\x01 \x01(\x0b\x32\x1c.google.protobuf.DoubleValue\x12,\n\x06recall\x18\x02 \x01(\x0b\x32\x1c.google.protobuf.DoubleValue\x12.\n\x08\x61\x63\x63uracy\x18\x03 \x01(\x0b\x32\x1c.google.protobuf.DoubleValue\x12/\n\tthreshold\x18\x04 \x01(\x0b\x32\x1c.google.protobuf.DoubleValue\x12.\n\x08\x66\x31_score\x18\x05 \x01(\x0b\x32\x1c.google.protobuf.DoubleValue\x12.\n\x08log_loss\x18\x06 \x01(\x0b\x32\x1c.google.protobuf.DoubleValue\x12-\n\x07roc_auc\x18\x07 \x01(\x0b\x32\x1c.google.protobuf.DoubleValue\x1a\x8f\x05\n\x1b\x42inaryClassificationMetrics\x12h\n aggregate_classification_metrics\x18\x01 \x01(\x0b\x32>.google.cloud.bigquery.v2.Model.AggregateClassificationMetrics\x12w\n\x1c\x62inary_confusion_matrix_list\x18\x02 \x03(\x0b\x32Q.google.cloud.bigquery.v2.Model.BinaryClassificationMetrics.BinaryConfusionMatrix\x1a\x8c\x03\n\x15\x42inaryConfusionMatrix\x12>\n\x18positive_class_threshold\x18\x01 \x01(\x0b\x32\x1c.google.protobuf.DoubleValue\x12\x33\n\x0etrue_positives\x18\x02 \x01(\x0b\x32\x1b.google.protobuf.Int64Value\x12\x34\n\x0f\x66\x61lse_positives\x18\x03 \x01(\x0b\x32\x1b.google.protobuf.Int64Value\x12\x33\n\x0etrue_negatives\x18\x04 \x01(\x0b\x32\x1b.google.protobuf.Int64Value\x12\x34\n\x0f\x66\x61lse_negatives\x18\x05 \x01(\x0b\x32\x1b.google.protobuf.Int64Value\x12/\n\tprecision\x18\x06 \x01(\x0b\x32\x1c.google.protobuf.DoubleValue\x12,\n\x06recall\x18\x07 \x01(\x0b\x32\x1c.google.protobuf.DoubleValue\x1a\x87\x05\n\x1fMultiClassClassificationMetrics\x12h\n aggregate_classification_metrics\x18\x01 \x01(\x0b\x32>.google.cloud.bigquery.v2.Model.AggregateClassificationMetrics\x12n\n\x15\x63onfusion_matrix_list\x18\x02 \x03(\x0b\x32O.google.cloud.bigquery.v2.Model.MultiClassClassificationMetrics.ConfusionMatrix\x1a\x89\x03\n\x0f\x43onfusionMatrix\x12:\n\x14\x63onfidence_threshold\x18\x01 \x01(\x0b\x32\x1c.google.protobuf.DoubleValue\x12\x61\n\x04rows\x18\x02 \x03(\x0b\x32S.google.cloud.bigquery.v2.Model.MultiClassClassificationMetrics.ConfusionMatrix.Row\x1aQ\n\x05\x45ntry\x12\x17\n\x0fpredicted_label\x18\x01 \x01(\t\x12/\n\nitem_count\x18\x02 \x01(\x0b\x32\x1b.google.protobuf.Int64Value\x1a\x83\x01\n\x03Row\x12\x14\n\x0c\x61\x63tual_label\x18\x01 \x01(\t\x12\x66\n\x07\x65ntries\x18\x02 \x03(\x0b\x32U.google.cloud.bigquery.v2.Model.MultiClassClassificationMetrics.ConfusionMatrix.Entry\x1a\x8c\x01\n\x11\x43lusteringMetrics\x12:\n\x14\x64\x61vies_bouldin_index\x18\x01 \x01(\x0b\x32\x1c.google.protobuf.DoubleValue\x12;\n\x15mean_squared_distance\x18\x02 \x01(\x0b\x32\x1c.google.protobuf.DoubleValue\x1a\x95\x03\n\x11\x45valuationMetrics\x12O\n\x12regression_metrics\x18\x01 \x01(\x0b\x32\x31.google.cloud.bigquery.v2.Model.RegressionMetricsH\x00\x12\x64\n\x1d\x62inary_classification_metrics\x18\x02 \x01(\x0b\x32;.google.cloud.bigquery.v2.Model.BinaryClassificationMetricsH\x00\x12m\n"multi_class_classification_metrics\x18\x03 \x01(\x0b\x32?.google.cloud.bigquery.v2.Model.MultiClassClassificationMetricsH\x00\x12O\n\x12\x63lustering_metrics\x18\x04 \x01(\x0b\x32\x31.google.cloud.bigquery.v2.Model.ClusteringMetricsH\x00\x42\t\n\x07metrics\x1a\xaf\r\n\x0bTrainingRun\x12U\n\x10training_options\x18\x01 \x01(\x0b\x32;.google.cloud.bigquery.v2.Model.TrainingRun.TrainingOptions\x12.\n\nstart_time\x18\x08 \x01(\x0b\x32\x1a.google.protobuf.Timestamp\x12L\n\x07results\x18\x06 \x03(\x0b\x32;.google.cloud.bigquery.v2.Model.TrainingRun.IterationResult\x12M\n\x12\x65valuation_metrics\x18\x07 \x01(\x0b\x32\x31.google.cloud.bigquery.v2.Model.EvaluationMetrics\x1a\xa1\x07\n\x0fTrainingOptions\x12\x16\n\x0emax_iterations\x18\x01 \x01(\x03\x12;\n\tloss_type\x18\x02 \x01(\x0e\x32(.google.cloud.bigquery.v2.Model.LossType\x12\x12\n\nlearn_rate\x18\x03 \x01(\x01\x12\x37\n\x11l1_regularization\x18\x04 \x01(\x0b\x32\x1c.google.protobuf.DoubleValue\x12\x37\n\x11l2_regularization\x18\x05 \x01(\x0b\x32\x1c.google.protobuf.DoubleValue\x12;\n\x15min_relative_progress\x18\x06 \x01(\x0b\x32\x1c.google.protobuf.DoubleValue\x12.\n\nwarm_start\x18\x07 \x01(\x0b\x32\x1a.google.protobuf.BoolValue\x12.\n\nearly_stop\x18\x08 \x01(\x0b\x32\x1a.google.protobuf.BoolValue\x12\x1b\n\x13input_label_columns\x18\t \x03(\t\x12J\n\x11\x64\x61ta_split_method\x18\n \x01(\x0e\x32/.google.cloud.bigquery.v2.Model.DataSplitMethod\x12 \n\x18\x64\x61ta_split_eval_fraction\x18\x0b \x01(\x01\x12\x19\n\x11\x64\x61ta_split_column\x18\x0c \x01(\t\x12N\n\x13learn_rate_strategy\x18\r \x01(\x0e\x32\x31.google.cloud.bigquery.v2.Model.LearnRateStrategy\x12\x1a\n\x12initial_learn_rate\x18\x10 \x01(\x01\x12o\n\x13label_class_weights\x18\x11 \x03(\x0b\x32R.google.cloud.bigquery.v2.Model.TrainingRun.TrainingOptions.LabelClassWeightsEntry\x12\x43\n\rdistance_type\x18\x14 \x01(\x0e\x32,.google.cloud.bigquery.v2.Model.DistanceType\x12\x14\n\x0cnum_clusters\x18\x15 \x01(\x03\x1a\x38\n\x16LabelClassWeightsEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\x01:\x02\x38\x01\x1a\xd7\x03\n\x0fIterationResult\x12*\n\x05index\x18\x01 \x01(\x0b\x32\x1b.google.protobuf.Int32Value\x12\x30\n\x0b\x64uration_ms\x18\x04 \x01(\x0b\x32\x1b.google.protobuf.Int64Value\x12\x33\n\rtraining_loss\x18\x05 \x01(\x0b\x32\x1c.google.protobuf.DoubleValue\x12/\n\teval_loss\x18\x06 \x01(\x0b\x32\x1c.google.protobuf.DoubleValue\x12\x12\n\nlearn_rate\x18\x07 \x01(\x01\x12^\n\rcluster_infos\x18\x08 \x03(\x0b\x32G.google.cloud.bigquery.v2.Model.TrainingRun.IterationResult.ClusterInfo\x1a\x8b\x01\n\x0b\x43lusterInfo\x12\x13\n\x0b\x63\x65ntroid_id\x18\x01 \x01(\x03\x12\x34\n\x0e\x63luster_radius\x18\x02 \x01(\x0b\x32\x1c.google.protobuf.DoubleValue\x12\x31\n\x0c\x63luster_size\x18\x03 \x01(\x0b\x32\x1b.google.protobuf.Int64Value\x1a-\n\x0bLabelsEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\t:\x02\x38\x01"c\n\tModelType\x12\x1a\n\x16MODEL_TYPE_UNSPECIFIED\x10\x00\x12\x15\n\x11LINEAR_REGRESSION\x10\x01\x12\x17\n\x13LOGISTIC_REGRESSION\x10\x02\x12\n\n\x06KMEANS\x10\x03"O\n\x08LossType\x12\x19\n\x15LOSS_TYPE_UNSPECIFIED\x10\x00\x12\x15\n\x11MEAN_SQUARED_LOSS\x10\x01\x12\x11\n\rMEAN_LOG_LOSS\x10\x02"H\n\x0c\x44istanceType\x12\x1d\n\x19\x44ISTANCE_TYPE_UNSPECIFIED\x10\x00\x12\r\n\tEUCLIDEAN\x10\x01\x12\n\n\x06\x43OSINE\x10\x02"z\n\x0f\x44\x61taSplitMethod\x12!\n\x1d\x44\x41TA_SPLIT_METHOD_UNSPECIFIED\x10\x00\x12\n\n\x06RANDOM\x10\x01\x12\n\n\x06\x43USTOM\x10\x02\x12\x0e\n\nSEQUENTIAL\x10\x03\x12\x0c\n\x08NO_SPLIT\x10\x04\x12\x0e\n\nAUTO_SPLIT\x10\x05"W\n\x11LearnRateStrategy\x12#\n\x1fLEARN_RATE_STRATEGY_UNSPECIFIED\x10\x00\x12\x0f\n\x0bLINE_SEARCH\x10\x01\x12\x0c\n\x08\x43ONSTANT\x10\x02"K\n\x0fGetModelRequest\x12\x12\n\nproject_id\x18\x01 \x01(\t\x12\x12\n\ndataset_id\x18\x02 \x01(\t\x12\x10\n\x08model_id\x18\x03 \x01(\t"}\n\x11PatchModelRequest\x12\x12\n\nproject_id\x18\x01 \x01(\t\x12\x12\n\ndataset_id\x18\x02 \x01(\t\x12\x10\n\x08model_id\x18\x03 \x01(\t\x12.\n\x05model\x18\x04 \x01(\x0b\x32\x1f.google.cloud.bigquery.v2.Model"N\n\x12\x44\x65leteModelRequest\x12\x12\n\nproject_id\x18\x01 \x01(\t\x12\x12\n\ndataset_id\x18\x02 \x01(\t\x12\x10\n\x08model_id\x18\x03 \x01(\t"\x82\x01\n\x11ListModelsRequest\x12\x12\n\nproject_id\x18\x01 \x01(\t\x12\x12\n\ndataset_id\x18\x02 \x01(\t\x12\x31\n\x0bmax_results\x18\x03 \x01(\x0b\x32\x1c.google.protobuf.UInt32Value\x12\x12\n\npage_token\x18\x04 \x01(\t"^\n\x12ListModelsResponse\x12/\n\x06models\x18\x01 \x03(\x0b\x32\x1f.google.cloud.bigquery.v2.Model\x12\x17\n\x0fnext_page_token\x18\x02 \x01(\t2\x88\x03\n\x0cModelService\x12X\n\x08GetModel\x12).google.cloud.bigquery.v2.GetModelRequest\x1a\x1f.google.cloud.bigquery.v2.Model"\x00\x12i\n\nListModels\x12+.google.cloud.bigquery.v2.ListModelsRequest\x1a,.google.cloud.bigquery.v2.ListModelsResponse"\x00\x12\\\n\nPatchModel\x12+.google.cloud.bigquery.v2.PatchModelRequest\x1a\x1f.google.cloud.bigquery.v2.Model"\x00\x12U\n\x0b\x44\x65leteModel\x12,.google.cloud.bigquery.v2.DeleteModelRequest\x1a\x16.google.protobuf.Empty"\x00\x42l\n\x1c\x63om.google.cloud.bigquery.v2B\nModelProtoZ@google.golang.org/genproto/googleapis/cloud/bigquery/v2;bigqueryb\x06proto3' + '\n*google/cloud/bigquery_v2/proto/model.proto\x12\x18google.cloud.bigquery.v2\x1a\x34google/cloud/bigquery_v2/proto/model_reference.proto\x1a\x31google/cloud/bigquery_v2/proto/standard_sql.proto\x1a\x1bgoogle/protobuf/empty.proto\x1a\x1fgoogle/protobuf/timestamp.proto\x1a\x1egoogle/protobuf/wrappers.proto\x1a\x1cgoogle/api/annotations.proto\x1a\x17google/api/client.proto"\xc4,\n\x05Model\x12\x0c\n\x04\x65tag\x18\x01 \x01(\t\x12\x41\n\x0fmodel_reference\x18\x02 \x01(\x0b\x32(.google.cloud.bigquery.v2.ModelReference\x12\x15\n\rcreation_time\x18\x05 \x01(\x03\x12\x1a\n\x12last_modified_time\x18\x06 \x01(\x03\x12\x13\n\x0b\x64\x65scription\x18\x0c \x01(\t\x12\x15\n\rfriendly_name\x18\x0e \x01(\t\x12;\n\x06labels\x18\x0f \x03(\x0b\x32+.google.cloud.bigquery.v2.Model.LabelsEntry\x12\x17\n\x0f\x65xpiration_time\x18\x10 \x01(\x03\x12\x10\n\x08location\x18\r \x01(\t\x12=\n\nmodel_type\x18\x07 \x01(\x0e\x32).google.cloud.bigquery.v2.Model.ModelType\x12\x42\n\rtraining_runs\x18\t \x03(\x0b\x32+.google.cloud.bigquery.v2.Model.TrainingRun\x12\x43\n\x0f\x66\x65\x61ture_columns\x18\n \x03(\x0b\x32*.google.cloud.bigquery.v2.StandardSqlField\x12\x41\n\rlabel_columns\x18\x0b \x03(\x0b\x32*.google.cloud.bigquery.v2.StandardSqlField\x1a\xb4\x02\n\x11RegressionMetrics\x12\x39\n\x13mean_absolute_error\x18\x01 \x01(\x0b\x32\x1c.google.protobuf.DoubleValue\x12\x38\n\x12mean_squared_error\x18\x02 \x01(\x0b\x32\x1c.google.protobuf.DoubleValue\x12<\n\x16mean_squared_log_error\x18\x03 \x01(\x0b\x32\x1c.google.protobuf.DoubleValue\x12;\n\x15median_absolute_error\x18\x04 \x01(\x0b\x32\x1c.google.protobuf.DoubleValue\x12/\n\tr_squared\x18\x05 \x01(\x0b\x32\x1c.google.protobuf.DoubleValue\x1a\xef\x02\n\x1e\x41ggregateClassificationMetrics\x12/\n\tprecision\x18\x01 \x01(\x0b\x32\x1c.google.protobuf.DoubleValue\x12,\n\x06recall\x18\x02 \x01(\x0b\x32\x1c.google.protobuf.DoubleValue\x12.\n\x08\x61\x63\x63uracy\x18\x03 \x01(\x0b\x32\x1c.google.protobuf.DoubleValue\x12/\n\tthreshold\x18\x04 \x01(\x0b\x32\x1c.google.protobuf.DoubleValue\x12.\n\x08\x66\x31_score\x18\x05 \x01(\x0b\x32\x1c.google.protobuf.DoubleValue\x12.\n\x08log_loss\x18\x06 \x01(\x0b\x32\x1c.google.protobuf.DoubleValue\x12-\n\x07roc_auc\x18\x07 \x01(\x0b\x32\x1c.google.protobuf.DoubleValue\x1a\x9f\x06\n\x1b\x42inaryClassificationMetrics\x12h\n aggregate_classification_metrics\x18\x01 \x01(\x0b\x32>.google.cloud.bigquery.v2.Model.AggregateClassificationMetrics\x12w\n\x1c\x62inary_confusion_matrix_list\x18\x02 \x03(\x0b\x32Q.google.cloud.bigquery.v2.Model.BinaryClassificationMetrics.BinaryConfusionMatrix\x12\x16\n\x0epositive_label\x18\x03 \x01(\t\x12\x16\n\x0enegative_label\x18\x04 \x01(\t\x1a\xec\x03\n\x15\x42inaryConfusionMatrix\x12>\n\x18positive_class_threshold\x18\x01 \x01(\x0b\x32\x1c.google.protobuf.DoubleValue\x12\x33\n\x0etrue_positives\x18\x02 \x01(\x0b\x32\x1b.google.protobuf.Int64Value\x12\x34\n\x0f\x66\x61lse_positives\x18\x03 \x01(\x0b\x32\x1b.google.protobuf.Int64Value\x12\x33\n\x0etrue_negatives\x18\x04 \x01(\x0b\x32\x1b.google.protobuf.Int64Value\x12\x34\n\x0f\x66\x61lse_negatives\x18\x05 \x01(\x0b\x32\x1b.google.protobuf.Int64Value\x12/\n\tprecision\x18\x06 \x01(\x0b\x32\x1c.google.protobuf.DoubleValue\x12,\n\x06recall\x18\x07 \x01(\x0b\x32\x1c.google.protobuf.DoubleValue\x12.\n\x08\x66\x31_score\x18\x08 \x01(\x0b\x32\x1c.google.protobuf.DoubleValue\x12.\n\x08\x61\x63\x63uracy\x18\t \x01(\x0b\x32\x1c.google.protobuf.DoubleValue\x1a\x87\x05\n\x1fMultiClassClassificationMetrics\x12h\n aggregate_classification_metrics\x18\x01 \x01(\x0b\x32>.google.cloud.bigquery.v2.Model.AggregateClassificationMetrics\x12n\n\x15\x63onfusion_matrix_list\x18\x02 \x03(\x0b\x32O.google.cloud.bigquery.v2.Model.MultiClassClassificationMetrics.ConfusionMatrix\x1a\x89\x03\n\x0f\x43onfusionMatrix\x12:\n\x14\x63onfidence_threshold\x18\x01 \x01(\x0b\x32\x1c.google.protobuf.DoubleValue\x12\x61\n\x04rows\x18\x02 \x03(\x0b\x32S.google.cloud.bigquery.v2.Model.MultiClassClassificationMetrics.ConfusionMatrix.Row\x1aQ\n\x05\x45ntry\x12\x17\n\x0fpredicted_label\x18\x01 \x01(\t\x12/\n\nitem_count\x18\x02 \x01(\x0b\x32\x1b.google.protobuf.Int64Value\x1a\x83\x01\n\x03Row\x12\x14\n\x0c\x61\x63tual_label\x18\x01 \x01(\t\x12\x66\n\x07\x65ntries\x18\x02 \x03(\x0b\x32U.google.cloud.bigquery.v2.Model.MultiClassClassificationMetrics.ConfusionMatrix.Entry\x1a\x8c\x01\n\x11\x43lusteringMetrics\x12:\n\x14\x64\x61vies_bouldin_index\x18\x01 \x01(\x0b\x32\x1c.google.protobuf.DoubleValue\x12;\n\x15mean_squared_distance\x18\x02 \x01(\x0b\x32\x1c.google.protobuf.DoubleValue\x1a\x95\x03\n\x11\x45valuationMetrics\x12O\n\x12regression_metrics\x18\x01 \x01(\x0b\x32\x31.google.cloud.bigquery.v2.Model.RegressionMetricsH\x00\x12\x64\n\x1d\x62inary_classification_metrics\x18\x02 \x01(\x0b\x32;.google.cloud.bigquery.v2.Model.BinaryClassificationMetricsH\x00\x12m\n"multi_class_classification_metrics\x18\x03 \x01(\x0b\x32?.google.cloud.bigquery.v2.Model.MultiClassClassificationMetricsH\x00\x12O\n\x12\x63lustering_metrics\x18\x04 \x01(\x0b\x32\x31.google.cloud.bigquery.v2.Model.ClusteringMetricsH\x00\x42\t\n\x07metrics\x1a\x97\x0e\n\x0bTrainingRun\x12U\n\x10training_options\x18\x01 \x01(\x0b\x32;.google.cloud.bigquery.v2.Model.TrainingRun.TrainingOptions\x12.\n\nstart_time\x18\x08 \x01(\x0b\x32\x1a.google.protobuf.Timestamp\x12L\n\x07results\x18\x06 \x03(\x0b\x32;.google.cloud.bigquery.v2.Model.TrainingRun.IterationResult\x12M\n\x12\x65valuation_metrics\x18\x07 \x01(\x0b\x32\x31.google.cloud.bigquery.v2.Model.EvaluationMetrics\x1a\x89\x08\n\x0fTrainingOptions\x12\x16\n\x0emax_iterations\x18\x01 \x01(\x03\x12;\n\tloss_type\x18\x02 \x01(\x0e\x32(.google.cloud.bigquery.v2.Model.LossType\x12\x12\n\nlearn_rate\x18\x03 \x01(\x01\x12\x37\n\x11l1_regularization\x18\x04 \x01(\x0b\x32\x1c.google.protobuf.DoubleValue\x12\x37\n\x11l2_regularization\x18\x05 \x01(\x0b\x32\x1c.google.protobuf.DoubleValue\x12;\n\x15min_relative_progress\x18\x06 \x01(\x0b\x32\x1c.google.protobuf.DoubleValue\x12.\n\nwarm_start\x18\x07 \x01(\x0b\x32\x1a.google.protobuf.BoolValue\x12.\n\nearly_stop\x18\x08 \x01(\x0b\x32\x1a.google.protobuf.BoolValue\x12\x1b\n\x13input_label_columns\x18\t \x03(\t\x12J\n\x11\x64\x61ta_split_method\x18\n \x01(\x0e\x32/.google.cloud.bigquery.v2.Model.DataSplitMethod\x12 \n\x18\x64\x61ta_split_eval_fraction\x18\x0b \x01(\x01\x12\x19\n\x11\x64\x61ta_split_column\x18\x0c \x01(\t\x12N\n\x13learn_rate_strategy\x18\r \x01(\x0e\x32\x31.google.cloud.bigquery.v2.Model.LearnRateStrategy\x12\x1a\n\x12initial_learn_rate\x18\x10 \x01(\x01\x12o\n\x13label_class_weights\x18\x11 \x03(\x0b\x32R.google.cloud.bigquery.v2.Model.TrainingRun.TrainingOptions.LabelClassWeightsEntry\x12\x43\n\rdistance_type\x18\x14 \x01(\x0e\x32,.google.cloud.bigquery.v2.Model.DistanceType\x12\x14\n\x0cnum_clusters\x18\x15 \x01(\x03\x12\x11\n\tmodel_uri\x18\x16 \x01(\t\x12S\n\x15optimization_strategy\x18\x17 \x01(\x0e\x32\x34.google.cloud.bigquery.v2.Model.OptimizationStrategy\x1a\x38\n\x16LabelClassWeightsEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\x01:\x02\x38\x01\x1a\xd7\x03\n\x0fIterationResult\x12*\n\x05index\x18\x01 \x01(\x0b\x32\x1b.google.protobuf.Int32Value\x12\x30\n\x0b\x64uration_ms\x18\x04 \x01(\x0b\x32\x1b.google.protobuf.Int64Value\x12\x33\n\rtraining_loss\x18\x05 \x01(\x0b\x32\x1c.google.protobuf.DoubleValue\x12/\n\teval_loss\x18\x06 \x01(\x0b\x32\x1c.google.protobuf.DoubleValue\x12\x12\n\nlearn_rate\x18\x07 \x01(\x01\x12^\n\rcluster_infos\x18\x08 \x03(\x0b\x32G.google.cloud.bigquery.v2.Model.TrainingRun.IterationResult.ClusterInfo\x1a\x8b\x01\n\x0b\x43lusterInfo\x12\x13\n\x0b\x63\x65ntroid_id\x18\x01 \x01(\x03\x12\x34\n\x0e\x63luster_radius\x18\x02 \x01(\x0b\x32\x1c.google.protobuf.DoubleValue\x12\x31\n\x0c\x63luster_size\x18\x03 \x01(\x0b\x32\x1b.google.protobuf.Int64Value\x1a-\n\x0bLabelsEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\t:\x02\x38\x01"s\n\tModelType\x12\x1a\n\x16MODEL_TYPE_UNSPECIFIED\x10\x00\x12\x15\n\x11LINEAR_REGRESSION\x10\x01\x12\x17\n\x13LOGISTIC_REGRESSION\x10\x02\x12\n\n\x06KMEANS\x10\x03\x12\x0e\n\nTENSORFLOW\x10\x06"O\n\x08LossType\x12\x19\n\x15LOSS_TYPE_UNSPECIFIED\x10\x00\x12\x15\n\x11MEAN_SQUARED_LOSS\x10\x01\x12\x11\n\rMEAN_LOG_LOSS\x10\x02"H\n\x0c\x44istanceType\x12\x1d\n\x19\x44ISTANCE_TYPE_UNSPECIFIED\x10\x00\x12\r\n\tEUCLIDEAN\x10\x01\x12\n\n\x06\x43OSINE\x10\x02"z\n\x0f\x44\x61taSplitMethod\x12!\n\x1d\x44\x41TA_SPLIT_METHOD_UNSPECIFIED\x10\x00\x12\n\n\x06RANDOM\x10\x01\x12\n\n\x06\x43USTOM\x10\x02\x12\x0e\n\nSEQUENTIAL\x10\x03\x12\x0c\n\x08NO_SPLIT\x10\x04\x12\x0e\n\nAUTO_SPLIT\x10\x05"W\n\x11LearnRateStrategy\x12#\n\x1fLEARN_RATE_STRATEGY_UNSPECIFIED\x10\x00\x12\x0f\n\x0bLINE_SEARCH\x10\x01\x12\x0c\n\x08\x43ONSTANT\x10\x02"n\n\x14OptimizationStrategy\x12%\n!OPTIMIZATION_STRATEGY_UNSPECIFIED\x10\x00\x12\x1a\n\x16\x42\x41TCH_GRADIENT_DESCENT\x10\x01\x12\x13\n\x0fNORMAL_EQUATION\x10\x02"K\n\x0fGetModelRequest\x12\x12\n\nproject_id\x18\x01 \x01(\t\x12\x12\n\ndataset_id\x18\x02 \x01(\t\x12\x10\n\x08model_id\x18\x03 \x01(\t"}\n\x11PatchModelRequest\x12\x12\n\nproject_id\x18\x01 \x01(\t\x12\x12\n\ndataset_id\x18\x02 \x01(\t\x12\x10\n\x08model_id\x18\x03 \x01(\t\x12.\n\x05model\x18\x04 \x01(\x0b\x32\x1f.google.cloud.bigquery.v2.Model"N\n\x12\x44\x65leteModelRequest\x12\x12\n\nproject_id\x18\x01 \x01(\t\x12\x12\n\ndataset_id\x18\x02 \x01(\t\x12\x10\n\x08model_id\x18\x03 \x01(\t"\x82\x01\n\x11ListModelsRequest\x12\x12\n\nproject_id\x18\x01 \x01(\t\x12\x12\n\ndataset_id\x18\x02 \x01(\t\x12\x31\n\x0bmax_results\x18\x03 \x01(\x0b\x32\x1c.google.protobuf.UInt32Value\x12\x12\n\npage_token\x18\x04 \x01(\t"^\n\x12ListModelsResponse\x12/\n\x06models\x18\x01 \x03(\x0b\x32\x1f.google.cloud.bigquery.v2.Model\x12\x17\n\x0fnext_page_token\x18\x02 \x01(\t2\xb9\x04\n\x0cModelService\x12X\n\x08GetModel\x12).google.cloud.bigquery.v2.GetModelRequest\x1a\x1f.google.cloud.bigquery.v2.Model"\x00\x12i\n\nListModels\x12+.google.cloud.bigquery.v2.ListModelsRequest\x1a,.google.cloud.bigquery.v2.ListModelsResponse"\x00\x12\\\n\nPatchModel\x12+.google.cloud.bigquery.v2.PatchModelRequest\x1a\x1f.google.cloud.bigquery.v2.Model"\x00\x12U\n\x0b\x44\x65leteModel\x12,.google.cloud.bigquery.v2.DeleteModelRequest\x1a\x16.google.protobuf.Empty"\x00\x1a\xae\x01\xca\x41\x17\x62igquery.googleapis.com\xd2\x41\x90\x01https://www.googleapis.com/auth/bigquery,https://www.googleapis.com/auth/cloud-platform,https://www.googleapis.com/auth/cloud-platform.read-onlyBl\n\x1c\x63om.google.cloud.bigquery.v2B\nModelProtoZ@google.golang.org/genproto/googleapis/cloud/bigquery/v2;bigqueryb\x06proto3' ), dependencies=[ google_dot_cloud_dot_bigquery__v2_dot_proto_dot_model__reference__pb2.DESCRIPTOR, @@ -43,6 +45,7 @@ google_dot_protobuf_dot_timestamp__pb2.DESCRIPTOR, google_dot_protobuf_dot_wrappers__pb2.DESCRIPTOR, google_dot_api_dot_annotations__pb2.DESCRIPTOR, + google_dot_api_dot_client__pb2.DESCRIPTOR, ], ) @@ -77,11 +80,14 @@ _descriptor.EnumValueDescriptor( name="KMEANS", index=3, number=3, serialized_options=None, type=None ), + _descriptor.EnumValueDescriptor( + name="TENSORFLOW", index=4, number=6, serialized_options=None, type=None + ), ], containing_type=None, serialized_options=None, - serialized_start=5159, - serialized_end=5258, + serialized_start=5432, + serialized_end=5547, ) _sym_db.RegisterEnumDescriptor(_MODEL_MODELTYPE) @@ -111,8 +117,8 @@ ], containing_type=None, serialized_options=None, - serialized_start=5260, - serialized_end=5339, + serialized_start=5549, + serialized_end=5628, ) _sym_db.RegisterEnumDescriptor(_MODEL_LOSSTYPE) @@ -138,8 +144,8 @@ ], containing_type=None, serialized_options=None, - serialized_start=5341, - serialized_end=5413, + serialized_start=5630, + serialized_end=5702, ) _sym_db.RegisterEnumDescriptor(_MODEL_DISTANCETYPE) @@ -174,8 +180,8 @@ ], containing_type=None, serialized_options=None, - serialized_start=5415, - serialized_end=5537, + serialized_start=5704, + serialized_end=5826, ) _sym_db.RegisterEnumDescriptor(_MODEL_DATASPLITMETHOD) @@ -201,11 +207,46 @@ ], containing_type=None, serialized_options=None, - serialized_start=5539, - serialized_end=5626, + serialized_start=5828, + serialized_end=5915, ) _sym_db.RegisterEnumDescriptor(_MODEL_LEARNRATESTRATEGY) +_MODEL_OPTIMIZATIONSTRATEGY = _descriptor.EnumDescriptor( + name="OptimizationStrategy", + full_name="google.cloud.bigquery.v2.Model.OptimizationStrategy", + filename=None, + file=DESCRIPTOR, + values=[ + _descriptor.EnumValueDescriptor( + name="OPTIMIZATION_STRATEGY_UNSPECIFIED", + index=0, + number=0, + serialized_options=None, + type=None, + ), + _descriptor.EnumValueDescriptor( + name="BATCH_GRADIENT_DESCENT", + index=1, + number=1, + serialized_options=None, + type=None, + ), + _descriptor.EnumValueDescriptor( + name="NORMAL_EQUATION", + index=2, + number=2, + serialized_options=None, + type=None, + ), + ], + containing_type=None, + serialized_options=None, + serialized_start=5917, + serialized_end=6027, +) +_sym_db.RegisterEnumDescriptor(_MODEL_OPTIMIZATIONSTRATEGY) + _MODEL_REGRESSIONMETRICS = _descriptor.Descriptor( name="RegressionMetrics", @@ -313,8 +354,8 @@ syntax="proto3", extension_ranges=[], oneofs=[], - serialized_start=859, - serialized_end=1167, + serialized_start=884, + serialized_end=1192, ) _MODEL_AGGREGATECLASSIFICATIONMETRICS = _descriptor.Descriptor( @@ -459,8 +500,8 @@ syntax="proto3", extension_ranges=[], oneofs=[], - serialized_start=1170, - serialized_end=1537, + serialized_start=1195, + serialized_end=1562, ) _MODEL_BINARYCLASSIFICATIONMETRICS_BINARYCONFUSIONMATRIX = _descriptor.Descriptor( @@ -596,6 +637,42 @@ serialized_options=None, file=DESCRIPTOR, ), + _descriptor.FieldDescriptor( + name="f1_score", + full_name="google.cloud.bigquery.v2.Model.BinaryClassificationMetrics.BinaryConfusionMatrix.f1_score", + index=7, + number=8, + type=11, + cpp_type=10, + label=1, + has_default_value=False, + default_value=None, + message_type=None, + enum_type=None, + containing_type=None, + is_extension=False, + extension_scope=None, + serialized_options=None, + file=DESCRIPTOR, + ), + _descriptor.FieldDescriptor( + name="accuracy", + full_name="google.cloud.bigquery.v2.Model.BinaryClassificationMetrics.BinaryConfusionMatrix.accuracy", + index=8, + number=9, + type=11, + cpp_type=10, + label=1, + has_default_value=False, + default_value=None, + message_type=None, + enum_type=None, + containing_type=None, + is_extension=False, + extension_scope=None, + serialized_options=None, + file=DESCRIPTOR, + ), ], extensions=[], nested_types=[], @@ -605,8 +682,8 @@ syntax="proto3", extension_ranges=[], oneofs=[], - serialized_start=1799, - serialized_end=2195, + serialized_start=1872, + serialized_end=2364, ) _MODEL_BINARYCLASSIFICATIONMETRICS = _descriptor.Descriptor( @@ -652,6 +729,42 @@ serialized_options=None, file=DESCRIPTOR, ), + _descriptor.FieldDescriptor( + name="positive_label", + full_name="google.cloud.bigquery.v2.Model.BinaryClassificationMetrics.positive_label", + index=2, + number=3, + type=9, + cpp_type=9, + label=1, + has_default_value=False, + default_value=_b("").decode("utf-8"), + message_type=None, + enum_type=None, + containing_type=None, + is_extension=False, + extension_scope=None, + serialized_options=None, + file=DESCRIPTOR, + ), + _descriptor.FieldDescriptor( + name="negative_label", + full_name="google.cloud.bigquery.v2.Model.BinaryClassificationMetrics.negative_label", + index=3, + number=4, + type=9, + cpp_type=9, + label=1, + has_default_value=False, + default_value=_b("").decode("utf-8"), + message_type=None, + enum_type=None, + containing_type=None, + is_extension=False, + extension_scope=None, + serialized_options=None, + file=DESCRIPTOR, + ), ], extensions=[], nested_types=[_MODEL_BINARYCLASSIFICATIONMETRICS_BINARYCONFUSIONMATRIX], @@ -661,8 +774,8 @@ syntax="proto3", extension_ranges=[], oneofs=[], - serialized_start=1540, - serialized_end=2195, + serialized_start=1565, + serialized_end=2364, ) _MODEL_MULTICLASSCLASSIFICATIONMETRICS_CONFUSIONMATRIX_ENTRY = _descriptor.Descriptor( @@ -717,8 +830,8 @@ syntax="proto3", extension_ranges=[], oneofs=[], - serialized_start=2630, - serialized_end=2711, + serialized_start=2799, + serialized_end=2880, ) _MODEL_MULTICLASSCLASSIFICATIONMETRICS_CONFUSIONMATRIX_ROW = _descriptor.Descriptor( @@ -773,8 +886,8 @@ syntax="proto3", extension_ranges=[], oneofs=[], - serialized_start=2714, - serialized_end=2845, + serialized_start=2883, + serialized_end=3014, ) _MODEL_MULTICLASSCLASSIFICATIONMETRICS_CONFUSIONMATRIX = _descriptor.Descriptor( @@ -832,8 +945,8 @@ syntax="proto3", extension_ranges=[], oneofs=[], - serialized_start=2452, - serialized_end=2845, + serialized_start=2621, + serialized_end=3014, ) _MODEL_MULTICLASSCLASSIFICATIONMETRICS = _descriptor.Descriptor( @@ -888,8 +1001,8 @@ syntax="proto3", extension_ranges=[], oneofs=[], - serialized_start=2198, - serialized_end=2845, + serialized_start=2367, + serialized_end=3014, ) _MODEL_CLUSTERINGMETRICS = _descriptor.Descriptor( @@ -944,8 +1057,8 @@ syntax="proto3", extension_ranges=[], oneofs=[], - serialized_start=2848, - serialized_end=2988, + serialized_start=3017, + serialized_end=3157, ) _MODEL_EVALUATIONMETRICS = _descriptor.Descriptor( @@ -1044,8 +1157,8 @@ fields=[], ) ], - serialized_start=2991, - serialized_end=3396, + serialized_start=3160, + serialized_end=3565, ) _MODEL_TRAININGRUN_TRAININGOPTIONS_LABELCLASSWEIGHTSENTRY = _descriptor.Descriptor( @@ -1100,8 +1213,8 @@ syntax="proto3", extension_ranges=[], oneofs=[], - serialized_start=4580, - serialized_end=4636, + serialized_start=4853, + serialized_end=4909, ) _MODEL_TRAININGRUN_TRAININGOPTIONS = _descriptor.Descriptor( @@ -1417,6 +1530,42 @@ serialized_options=None, file=DESCRIPTOR, ), + _descriptor.FieldDescriptor( + name="model_uri", + full_name="google.cloud.bigquery.v2.Model.TrainingRun.TrainingOptions.model_uri", + index=17, + number=22, + type=9, + cpp_type=9, + label=1, + has_default_value=False, + default_value=_b("").decode("utf-8"), + message_type=None, + enum_type=None, + containing_type=None, + is_extension=False, + extension_scope=None, + serialized_options=None, + file=DESCRIPTOR, + ), + _descriptor.FieldDescriptor( + name="optimization_strategy", + full_name="google.cloud.bigquery.v2.Model.TrainingRun.TrainingOptions.optimization_strategy", + index=18, + number=23, + type=14, + cpp_type=8, + label=1, + has_default_value=False, + default_value=0, + message_type=None, + enum_type=None, + containing_type=None, + is_extension=False, + extension_scope=None, + serialized_options=None, + file=DESCRIPTOR, + ), ], extensions=[], nested_types=[_MODEL_TRAININGRUN_TRAININGOPTIONS_LABELCLASSWEIGHTSENTRY], @@ -1426,8 +1575,8 @@ syntax="proto3", extension_ranges=[], oneofs=[], - serialized_start=3707, - serialized_end=4636, + serialized_start=3876, + serialized_end=4909, ) _MODEL_TRAININGRUN_ITERATIONRESULT_CLUSTERINFO = _descriptor.Descriptor( @@ -1500,8 +1649,8 @@ syntax="proto3", extension_ranges=[], oneofs=[], - serialized_start=4971, - serialized_end=5110, + serialized_start=5244, + serialized_end=5383, ) _MODEL_TRAININGRUN_ITERATIONRESULT = _descriptor.Descriptor( @@ -1628,8 +1777,8 @@ syntax="proto3", extension_ranges=[], oneofs=[], - serialized_start=4639, - serialized_end=5110, + serialized_start=4912, + serialized_end=5383, ) _MODEL_TRAININGRUN = _descriptor.Descriptor( @@ -1723,8 +1872,8 @@ syntax="proto3", extension_ranges=[], oneofs=[], - serialized_start=3399, - serialized_end=5110, + serialized_start=3568, + serialized_end=5383, ) _MODEL_LABELSENTRY = _descriptor.Descriptor( @@ -1779,8 +1928,8 @@ syntax="proto3", extension_ranges=[], oneofs=[], - serialized_start=5112, - serialized_end=5157, + serialized_start=5385, + serialized_end=5430, ) _MODEL = _descriptor.Descriptor( @@ -2042,14 +2191,15 @@ _MODEL_DISTANCETYPE, _MODEL_DATASPLITMETHOD, _MODEL_LEARNRATESTRATEGY, + _MODEL_OPTIMIZATIONSTRATEGY, ], serialized_options=None, is_extendable=False, syntax="proto3", extension_ranges=[], oneofs=[], - serialized_start=302, - serialized_end=5626, + serialized_start=327, + serialized_end=6027, ) @@ -2123,8 +2273,8 @@ syntax="proto3", extension_ranges=[], oneofs=[], - serialized_start=5628, - serialized_end=5703, + serialized_start=6029, + serialized_end=6104, ) @@ -2216,8 +2366,8 @@ syntax="proto3", extension_ranges=[], oneofs=[], - serialized_start=5705, - serialized_end=5830, + serialized_start=6106, + serialized_end=6231, ) @@ -2291,8 +2441,8 @@ syntax="proto3", extension_ranges=[], oneofs=[], - serialized_start=5832, - serialized_end=5910, + serialized_start=6233, + serialized_end=6311, ) @@ -2384,8 +2534,8 @@ syntax="proto3", extension_ranges=[], oneofs=[], - serialized_start=5913, - serialized_end=6043, + serialized_start=6314, + serialized_end=6444, ) @@ -2441,8 +2591,8 @@ syntax="proto3", extension_ranges=[], oneofs=[], - serialized_start=6045, - serialized_end=6139, + serialized_start=6446, + serialized_end=6540, ) _MODEL_REGRESSIONMETRICS.fields_by_name[ @@ -2504,6 +2654,12 @@ _MODEL_BINARYCLASSIFICATIONMETRICS_BINARYCONFUSIONMATRIX.fields_by_name[ "recall" ].message_type = google_dot_protobuf_dot_wrappers__pb2._DOUBLEVALUE +_MODEL_BINARYCLASSIFICATIONMETRICS_BINARYCONFUSIONMATRIX.fields_by_name[ + "f1_score" +].message_type = google_dot_protobuf_dot_wrappers__pb2._DOUBLEVALUE +_MODEL_BINARYCLASSIFICATIONMETRICS_BINARYCONFUSIONMATRIX.fields_by_name[ + "accuracy" +].message_type = google_dot_protobuf_dot_wrappers__pb2._DOUBLEVALUE _MODEL_BINARYCLASSIFICATIONMETRICS_BINARYCONFUSIONMATRIX.containing_type = ( _MODEL_BINARYCLASSIFICATIONMETRICS ) @@ -2619,6 +2775,9 @@ _MODEL_TRAININGRUN_TRAININGOPTIONS.fields_by_name[ "distance_type" ].enum_type = _MODEL_DISTANCETYPE +_MODEL_TRAININGRUN_TRAININGOPTIONS.fields_by_name[ + "optimization_strategy" +].enum_type = _MODEL_OPTIMIZATIONSTRATEGY _MODEL_TRAININGRUN_TRAININGOPTIONS.containing_type = _MODEL_TRAININGRUN _MODEL_TRAININGRUN_ITERATIONRESULT_CLUSTERINFO.fields_by_name[ "cluster_radius" @@ -2682,6 +2841,7 @@ _MODEL_DISTANCETYPE.containing_type = _MODEL _MODEL_DATASPLITMETHOD.containing_type = _MODEL _MODEL_LEARNRATESTRATEGY.containing_type = _MODEL +_MODEL_OPTIMIZATIONSTRATEGY.containing_type = _MODEL _PATCHMODELREQUEST.fields_by_name["model"].message_type = _MODEL _LISTMODELSREQUEST.fields_by_name[ "max_results" @@ -2729,11 +2889,12 @@ dict( DESCRIPTOR=_MODEL_AGGREGATECLASSIFICATIONMETRICS, __module__="google.cloud.bigquery_v2.proto.model_pb2", - __doc__="""Aggregate metrics for classification models. For multi-class models, the - metrics are either macro-averaged: metrics are calculated for each label - and then an unweighted average is taken of those values or - micro-averaged: the metric is calculated globally by counting the total - number of correctly predicted rows. + __doc__="""Aggregate metrics for classification/classifier models. For multi-class + models, the metrics are either macro-averaged or micro-averaged. When + macro-averaged, the metrics are calculated for each label and then an + unweighted average is taken of those values. When micro-averaged, the + metric is calculated globally by counting the total number of correctly + predicted rows. Attributes: @@ -2792,16 +2953,22 @@ false_negatives: Number of false samples predicted as false. precision: - Aggregate precision. + The fraction of actual positive predictions that had positive + actual labels. recall: - Aggregate recall. + The fraction of actual positive labels that were given a + positive prediction. + f1_score: + The equally weighted average of recall and precision. + accuracy: + The fraction of predictions given the correct label. """, # @@protoc_insertion_point(class_scope:google.cloud.bigquery.v2.Model.BinaryClassificationMetrics.BinaryConfusionMatrix) ), ), DESCRIPTOR=_MODEL_BINARYCLASSIFICATIONMETRICS, __module__="google.cloud.bigquery_v2.proto.model_pb2", - __doc__="""Evaluation metrics for binary classification models. + __doc__="""Evaluation metrics for binary classification/classifier models. Attributes: @@ -2809,6 +2976,10 @@ Aggregate classification metrics. binary_confusion_matrix_list: Binary confusion matrix at multiple thresholds. + positive_label: + Label representing the positive class. + negative_label: + Label representing the negative class. """, # @@protoc_insertion_point(class_scope:google.cloud.bigquery.v2.Model.BinaryClassificationMetrics) ), @@ -2876,7 +3047,7 @@ ), DESCRIPTOR=_MODEL_MULTICLASSCLASSIFICATIONMETRICS, __module__="google.cloud.bigquery_v2.proto.model_pb2", - __doc__="""Evaluation metrics for multi-class classification models. + __doc__="""Evaluation metrics for multi-class classification/classifier models. Attributes: @@ -2915,16 +3086,16 @@ __module__="google.cloud.bigquery_v2.proto.model_pb2", __doc__="""Evaluation metrics of a model. These are either computed on all training data or just the eval data based on whether eval data was used during - training. + training. These are not present for imported models. Attributes: regression_metrics: Populated for regression models. binary_classification_metrics: - Populated for binary classification models. + Populated for binary classification/classifier models. multi_class_classification_metrics: - Populated for multi-class classification models. + Populated for multi-class classification/classifier models. clustering_metrics: [Beta] Populated for clustering models. """, @@ -2954,23 +3125,27 @@ Attributes: max_iterations: - The maximum number of iterations in training. + The maximum number of iterations in training. Used only for + iterative training algorithms. loss_type: Type of loss function used during training run. learn_rate: - Learning rate in training. + Learning rate in training. Used only for iterative training + algorithms. l1_regularization: L1 regularization coefficient. l2_regularization: L2 regularization coefficient. min_relative_progress: When early\_stop is true, stops training when accuracy - improvement is less than 'min\_relative\_progress'. + improvement is less than 'min\_relative\_progress'. Used only + for iterative training algorithms. warm_start: Whether to train a model from the last checkpoint. early_stop: Whether to stop early when the loss doesn't improve significantly any more (compared to min\_relative\_progress). + Used only for iterative training algorithms. input_label_columns: Name of input label columns in training data. data_split_method: @@ -2993,17 +3168,23 @@ https://cloud.google.com/bigquery/docs/reference/standard- sql/data-types#data-type-properties learn_rate_strategy: - The strategy to determine learning rate. + The strategy to determine learn rate for the current + iteration. initial_learn_rate: - Specifies the initial learning rate for line search to start - at. + Specifies the initial learning rate for the line search learn + rate strategy. label_class_weights: Weights associated with each label class, for rebalancing the - training data. + training data. Only applicable for classification models. distance_type: [Beta] Distance type for clustering models. num_clusters: [Beta] Number of clusters for clustering models. + model_uri: + [Beta] Google Cloud Storage URI from which the model was + imported. Only applicable for imported models. + optimization_strategy: + Optimization strategy for training linear regression models. """, # @@protoc_insertion_point(class_scope:google.cloud.bigquery.v2.Model.TrainingRun.TrainingOptions) ), @@ -3102,11 +3283,9 @@ Output only. The time when this model was last modified, in millisecs since the epoch. description: - [Optional] A user-friendly description of this model. @mutable - bigquery.models.patch + [Optional] A user-friendly description of this model. friendly_name: - [Optional] A descriptive name for this model. @mutable - bigquery.models.patch + [Optional] A descriptive name for this model. labels: [Optional] The labels associated with this model. You can use these to organize and group your models. Label keys and values @@ -3114,16 +3293,14 @@ lowercase letters, numeric characters, underscores and dashes. International characters are allowed. Label values are optional. Label keys must start with a letter and each label - in the list must have a different key. @mutable - bigquery.models.patch + in the list must have a different key. expiration_time: [Optional] The time when this model expires, in milliseconds since the epoch. If not present, the model will persist indefinitely. Expired models will be deleted and their storage reclaimed. The defaultTableExpirationMs property of the encapsulating dataset can be used to set a default - expirationTime on newly created models. @mutable - bigquery.models.patch + expirationTime on newly created models. location: Output only. The geographic location where the model resides. This value is inherited from the dataset. @@ -3137,7 +3314,7 @@ this model. label_columns: Output only. Label columns that were used to train this model. - The output of the model will have a ``predicted\_`` prefix to + The output of the model will have a "predicted\_" prefix to these columns. """, # @@protoc_insertion_point(class_scope:google.cloud.bigquery.v2.Model) @@ -3282,9 +3459,11 @@ full_name="google.cloud.bigquery.v2.ModelService", file=DESCRIPTOR, index=0, - serialized_options=None, - serialized_start=6142, - serialized_end=6534, + serialized_options=_b( + "\312A\027bigquery.googleapis.com\322A\220\001https://www.googleapis.com/auth/bigquery,https://www.googleapis.com/auth/cloud-platform,https://www.googleapis.com/auth/cloud-platform.read-only" + ), + serialized_start=6543, + serialized_end=7112, methods=[ _descriptor.MethodDescriptor( name="GetModel", diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery_v2/proto/model_reference.proto b/packages/google-cloud-bigquery/google/cloud/bigquery_v2/proto/model_reference.proto index 8e1e218f969c..f436659abd4f 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery_v2/proto/model_reference.proto +++ b/packages/google-cloud-bigquery/google/cloud/bigquery_v2/proto/model_reference.proto @@ -23,7 +23,6 @@ option go_package = "google.golang.org/genproto/googleapis/cloud/bigquery/v2;big option java_outer_classname = "ModelReferenceProto"; option java_package = "com.google.cloud.bigquery.v2"; - // Id path of a model. message ModelReference { // [Required] The ID of the project containing this model. diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery_v2/proto/model_reference_pb2.py b/packages/google-cloud-bigquery/google/cloud/bigquery_v2/proto/model_reference_pb2.py index 3d1b53e44800..94f6116b5a44 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery_v2/proto/model_reference_pb2.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery_v2/proto/model_reference_pb2.py @@ -1,3 +1,4 @@ +# -*- coding: utf-8 -*- # Generated by the protocol buffer compiler. DO NOT EDIT! # source: google/cloud/bigquery_v2/proto/model_reference.proto diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery_v2/proto/standard_sql.proto b/packages/google-cloud-bigquery/google/cloud/bigquery_v2/proto/standard_sql.proto index 82d9960242b3..98173092ff71 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery_v2/proto/standard_sql.proto +++ b/packages/google-cloud-bigquery/google/cloud/bigquery_v2/proto/standard_sql.proto @@ -23,7 +23,6 @@ option go_package = "google.golang.org/genproto/googleapis/cloud/bigquery/v2;big option java_outer_classname = "StandardSqlProto"; option java_package = "com.google.cloud.bigquery.v2"; - // The type of a variable, e.g., a function argument. // Examples: // INT64: {type_kind="INT64"} diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery_v2/proto/standard_sql_pb2.py b/packages/google-cloud-bigquery/google/cloud/bigquery_v2/proto/standard_sql_pb2.py index 5f4a40278140..91f1554f7f80 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery_v2/proto/standard_sql_pb2.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery_v2/proto/standard_sql_pb2.py @@ -1,3 +1,4 @@ +# -*- coding: utf-8 -*- # Generated by the protocol buffer compiler. DO NOT EDIT! # source: google/cloud/bigquery_v2/proto/standard_sql.proto diff --git a/packages/google-cloud-bigquery/synth.metadata b/packages/google-cloud-bigquery/synth.metadata index 3d09d31e5ed8..44837073d6d6 100644 --- a/packages/google-cloud-bigquery/synth.metadata +++ b/packages/google-cloud-bigquery/synth.metadata @@ -1,19 +1,19 @@ { - "updateTime": "2019-05-25T12:13:19.608155Z", + "updateTime": "2019-06-15T12:11:48.348952Z", "sources": [ { "generator": { "name": "artman", - "version": "0.21.0", - "dockerImage": "googleapis/artman@sha256:28d4271586772b275cd3bc95cb46bd227a24d3c9048de45dccdb7f3afb0bfba9" + "version": "0.26.0", + "dockerImage": "googleapis/artman@sha256:6db0735b0d3beec5b887153a2a7c7411fc7bb53f73f6f389a822096bd14a3a15" } }, { "git": { "name": "googleapis", "remote": "https://github.com/googleapis/googleapis.git", - "sha": "7ca19138ccebe219a67be2245200e821b3e32123", - "internalRef": "249916728" + "sha": "7b58b37559f6a5337c4c564518e9573d742df225", + "internalRef": "253322136" } } ], From ed25de561e6e78502e2617e1dfed8e38a3b8ae1a Mon Sep 17 00:00:00 2001 From: Grant Timmerman Date: Tue, 18 Jun 2019 17:10:47 -0700 Subject: [PATCH 0607/2016] BigQuery Snippets: Use autodetected location (#8340) * b/116968956 Remove location since it is now auto-detected. * Update snippets.py --- packages/google-cloud-bigquery/docs/snippets.py | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/packages/google-cloud-bigquery/docs/snippets.py b/packages/google-cloud-bigquery/docs/snippets.py index 7c6c6902170d..8697eb74e080 100644 --- a/packages/google-cloud-bigquery/docs/snippets.py +++ b/packages/google-cloud-bigquery/docs/snippets.py @@ -983,12 +983,7 @@ def test_load_table_from_file(client, to_delete): job_config.autodetect = True with open(filename, "rb") as source_file: - job = client.load_table_from_file( - source_file, - table_ref, - location="US", # Must match the destination dataset location. - job_config=job_config, - ) # API request + job = client.load_table_from_file(source_file, table_ref, job_config=job_config) job.result() # Waits for table load to complete. From cd742bf1688a41917166384347acd325d4100543 Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Wed, 19 Jun 2019 15:35:25 -0700 Subject: [PATCH 0608/2016] Add sample demonstrating how to create a job. (#8422) * Add sample demonstrating how to create a job. This sample is intended for https://cloud.google.com/bigquery/docs/running-jobs It demonstrates advanced features, such as overriding location autodetection and overriding job ID generation. * Blacken. Remove link and location auto-detect information from comments. --- .../samples/create_job.py | 39 +++++++++++++++++++ .../samples/tests/test_create_job.py | 23 +++++++++++ 2 files changed, 62 insertions(+) create mode 100644 packages/google-cloud-bigquery/samples/create_job.py create mode 100644 packages/google-cloud-bigquery/samples/tests/test_create_job.py diff --git a/packages/google-cloud-bigquery/samples/create_job.py b/packages/google-cloud-bigquery/samples/create_job.py new file mode 100644 index 000000000000..7570dc49f1d2 --- /dev/null +++ b/packages/google-cloud-bigquery/samples/create_job.py @@ -0,0 +1,39 @@ +# Copyright 2019 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +def create_job(client): + # [START bigquery_create_job] + from google.cloud import bigquery + + # TODO(developer): Construct a BigQuery client object. + # client = bigquery.Client() + + query_job = client.query( + "SELECT country_name from `bigquery-public-data.utility_us.country_code_iso`", + # Explicitly force job execution to be routed to a specific processing + # location. + location="US", + # Specify a job configuration to set optional job resource properties. + job_config=bigquery.QueryJobConfig( + labels={"example-label": "example-value"}, maximum_bytes_billed=1000000 + ), + # The client libraries automatically generate a job ID. Override the + # generated ID with either the job_id_prefix or job_id parameters. + job_id_prefix="code_sample_", + ) # API request + + print("Started job: {}".format(query_job.job_id)) + # [END bigquery_create_job] + return query_job diff --git a/packages/google-cloud-bigquery/samples/tests/test_create_job.py b/packages/google-cloud-bigquery/samples/tests/test_create_job.py new file mode 100644 index 000000000000..fce005ae8236 --- /dev/null +++ b/packages/google-cloud-bigquery/samples/tests/test_create_job.py @@ -0,0 +1,23 @@ +# Copyright 2019 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from .. import create_job + + +def test_create_job(capsys, client): + + query_job = create_job.create_job(client) + client.cancel_job(query_job.job_id, location="US") + out, err = capsys.readouterr() + assert "Started job: {}".format(query_job.job_id) in out From 7ee2737f3e727cedde1b5a00c33dc32b2dbfaf35 Mon Sep 17 00:00:00 2001 From: Bu Sun Kim <8822365+busunkim96@users.noreply.github.com> Date: Tue, 25 Jun 2019 12:44:16 -0700 Subject: [PATCH 0609/2016] All: Add docs job to publish to googleapis.dev. (#8464) --- packages/google-cloud-bigquery/.repo-metadata.json | 13 +++++++++++++ 1 file changed, 13 insertions(+) create mode 100644 packages/google-cloud-bigquery/.repo-metadata.json diff --git a/packages/google-cloud-bigquery/.repo-metadata.json b/packages/google-cloud-bigquery/.repo-metadata.json new file mode 100644 index 000000000000..5b4734b8e389 --- /dev/null +++ b/packages/google-cloud-bigquery/.repo-metadata.json @@ -0,0 +1,13 @@ +{ + "name": "bigquery", + "name_pretty": "Google Cloud BigQuery", + "product_documentation": "https://cloud.google.com/bigquery", + "client_documentation": "https://googleapis.dev/python/bigquery/latest", + "issue_tracker": "https://issuetracker.google.com/savedsearches/559654", + "release_level": "ga", + "language": "python", + "repo": "googleapis/google-cloud-python", + "distribution_name": "google-cloud-bigquery", + "api_id": "bigquery.googleapis.com", + "requires_billing": false +} \ No newline at end of file From de4ff128ff78b4e3d4d29cdd4fbe1aacd8cf4cf8 Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Fri, 28 Jun 2019 16:47:39 -0700 Subject: [PATCH 0610/2016] Add Routines API. (#8491) * Add Routines API. Adds support for managing permanent functions in BigQuery, such as scalar UDFs and stored procedures. At present, only scalar UDF functionality is available. Routines are registered as resources inside of datasets, and allow expected CRUD operations. Currently, routines do not support partial updates. See: https://cloud.google.com/bigquery/docs/reference/rest/v2/routines * Add QueryJob.ddl_target_routine property. Adjust docstrings. --- .../google-cloud-bigquery/docs/reference.rst | 10 + .../google/cloud/bigquery/__init__.py | 7 + .../google/cloud/bigquery/client.py | 224 +++++++- .../google/cloud/bigquery/dataset.py | 26 + .../google/cloud/bigquery/job.py | 16 +- .../google/cloud/bigquery/routine.py | 513 ++++++++++++++++++ .../samples/create_routine.py | 46 ++ .../samples/create_routine_ddl.py | 44 ++ .../samples/delete_routine.py | 30 + .../samples/get_routine.py | 39 ++ .../samples/list_routines.py | 34 ++ .../samples/tests/conftest.py | 35 ++ .../samples/tests/test_create_table.py | 1 - .../samples/tests/test_routine_samples.py | 89 +++ .../samples/update_routine.py | 44 ++ .../google-cloud-bigquery/tests/system.py | 35 ++ .../tests/unit/routine/__init__.py | 0 .../tests/unit/routine/test_routine.py | 305 +++++++++++ .../unit/routine/test_routine_argument.py | 100 ++++ .../unit/routine/test_routine_reference.py | 138 +++++ .../tests/unit/test_client.py | 338 +++++++++++- .../tests/unit/test_dataset.py | 7 + .../tests/unit/test_job.py | 24 + 23 files changed, 2100 insertions(+), 5 deletions(-) create mode 100644 packages/google-cloud-bigquery/google/cloud/bigquery/routine.py create mode 100644 packages/google-cloud-bigquery/samples/create_routine.py create mode 100644 packages/google-cloud-bigquery/samples/create_routine_ddl.py create mode 100644 packages/google-cloud-bigquery/samples/delete_routine.py create mode 100644 packages/google-cloud-bigquery/samples/get_routine.py create mode 100644 packages/google-cloud-bigquery/samples/list_routines.py create mode 100644 packages/google-cloud-bigquery/samples/tests/test_routine_samples.py create mode 100644 packages/google-cloud-bigquery/samples/update_routine.py create mode 100644 packages/google-cloud-bigquery/tests/unit/routine/__init__.py create mode 100644 packages/google-cloud-bigquery/tests/unit/routine/test_routine.py create mode 100644 packages/google-cloud-bigquery/tests/unit/routine/test_routine_argument.py create mode 100644 packages/google-cloud-bigquery/tests/unit/routine/test_routine_reference.py diff --git a/packages/google-cloud-bigquery/docs/reference.rst b/packages/google-cloud-bigquery/docs/reference.rst index b3f949e3daab..39b3e8407d30 100644 --- a/packages/google-cloud-bigquery/docs/reference.rst +++ b/packages/google-cloud-bigquery/docs/reference.rst @@ -101,6 +101,16 @@ Model model.Model model.ModelReference +Routine +======= + +.. autosummary:: + :toctree: generated + + routine.Routine + routine.RoutineArgument + routine.RoutineReference + Schema ====== diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/__init__.py b/packages/google-cloud-bigquery/google/cloud/bigquery/__init__.py index 0b972bb7297b..b84051fc6be1 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/__init__.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/__init__.py @@ -67,6 +67,9 @@ from google.cloud.bigquery.query import StructQueryParameter from google.cloud.bigquery.query import UDFResource from google.cloud.bigquery.retry import DEFAULT_RETRY +from google.cloud.bigquery.routine import Routine +from google.cloud.bigquery.routine import RoutineArgument +from google.cloud.bigquery.routine import RoutineReference from google.cloud.bigquery.schema import SchemaField from google.cloud.bigquery.table import EncryptionConfiguration from google.cloud.bigquery.table import Table @@ -105,6 +108,10 @@ # Models "Model", "ModelReference", + # Routines + "Routine", + "RoutineArgument", + "RoutineReference", # Shared helpers "SchemaField", "UDFResource", diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py index 65d6915c7ea2..b8ce2d5a33f3 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py @@ -58,6 +58,8 @@ from google.cloud.bigquery.model import ModelReference from google.cloud.bigquery.query import _QueryResults from google.cloud.bigquery.retry import DEFAULT_RETRY +from google.cloud.bigquery.routine import Routine +from google.cloud.bigquery.routine import RoutineReference from google.cloud.bigquery.schema import SchemaField from google.cloud.bigquery.table import _table_arg_to_table from google.cloud.bigquery.table import _table_arg_to_table_ref @@ -374,6 +376,41 @@ def create_dataset(self, dataset, exists_ok=False, retry=DEFAULT_RETRY): raise return self.get_dataset(dataset.reference, retry=retry) + def create_routine(self, routine, exists_ok=False, retry=DEFAULT_RETRY): + """[Beta] Create a routine via a POST request. + + See + https://cloud.google.com/bigquery/docs/reference/rest/v2/routines/insert + + Args: + routine (:class:`~google.cloud.bigquery.routine.Routine`): + A :class:`~google.cloud.bigquery.routine.Routine` to create. + The dataset that the routine belongs to must already exist. + exists_ok (bool): + Defaults to ``False``. If ``True``, ignore "already exists" + errors when creating the routine. + retry (google.api_core.retry.Retry): + Optional. How to retry the RPC. + + Returns: + google.cloud.bigquery.routine.Routine: + A new ``Routine`` returned from the service. + """ + reference = routine.reference + path = "/projects/{}/datasets/{}/routines".format( + reference.project, reference.dataset_id + ) + resource = routine.to_api_repr() + try: + api_response = self._call_api( + retry, method="POST", path=path, data=resource + ) + return Routine.from_api_repr(api_response) + except google.api_core.exceptions.Conflict: + if not exists_ok: + raise + return self.get_routine(routine.reference, retry=retry) + def create_table(self, table, exists_ok=False, retry=DEFAULT_RETRY): """API call: create a table via a PUT request @@ -472,6 +509,34 @@ def get_model(self, model_ref, retry=DEFAULT_RETRY): api_response = self._call_api(retry, method="GET", path=model_ref.path) return Model.from_api_repr(api_response) + def get_routine(self, routine_ref, retry=DEFAULT_RETRY): + """[Beta] Get the routine referenced by ``routine_ref``. + + Args: + routine_ref (Union[ \ + :class:`~google.cloud.bigquery.routine.Routine`, \ + :class:`~google.cloud.bigquery.routine.RoutineReference`, \ + str, \ + ]): + A reference to the routine to fetch from the BigQuery API. If + a string is passed in, this method attempts to create a + reference from a string using + :func:`google.cloud.bigquery.routine.RoutineReference.from_string`. + retry (:class:`google.api_core.retry.Retry`): + (Optional) How to retry the API call. + + Returns: + google.cloud.bigquery.routine.Routine: + A ``Routine`` instance. + """ + if isinstance(routine_ref, str): + routine_ref = RoutineReference.from_string( + routine_ref, default_project=self.project + ) + + api_response = self._call_api(retry, method="GET", path=routine_ref.path) + return Routine.from_api_repr(api_response) + def get_table(self, table, retry=DEFAULT_RETRY): """Fetch the table referenced by ``table``. @@ -537,7 +602,7 @@ def update_model(self, model, fields, retry=DEFAULT_RETRY): Use ``fields`` to specify which fields to update. At least one field must be provided. If a field is listed in ``fields`` and is ``None`` - in ``model``, it will be deleted. + in ``model``, the field value will be deleted. If ``model.etag`` is not ``None``, the update will only succeed if the model on the server has the same ETag. Thus reading a model with @@ -567,12 +632,58 @@ def update_model(self, model, fields, retry=DEFAULT_RETRY): ) return Model.from_api_repr(api_response) + def update_routine(self, routine, fields, retry=DEFAULT_RETRY): + """[Beta] Change some fields of a routine. + + Use ``fields`` to specify which fields to update. At least one field + must be provided. If a field is listed in ``fields`` and is ``None`` + in ``routine``, the field value will be deleted. + + .. warning:: + During beta, partial updates are not supported. You must provide + all fields in the resource. + + If :attr:`~google.cloud.bigquery.routine.Routine.etag` is not + ``None``, the update will only succeed if the resource on the server + has the same ETag. Thus reading a routine with + :func:`~google.cloud.bigquery.client.Client.get_routine`, changing + its fields, and then passing it to this method will ensure that the + changes will only be saved if no modifications to the resource + occurred since the read. + + Args: + routine (google.cloud.bigquery.routine.Routine): The routine to update. + fields (Sequence[str]): + The fields of ``routine`` to change, spelled as the + :class:`~google.cloud.bigquery.routine.Routine` properties + (e.g. ``type_``). + retry (google.api_core.retry.Retry): + (Optional) A description of how to retry the API call. + + Returns: + google.cloud.bigquery.routine.Routine: + The routine resource returned from the API call. + """ + partial = routine._build_resource(fields) + if routine.etag: + headers = {"If-Match": routine.etag} + else: + headers = None + + # TODO: remove when routines update supports partial requests. + partial["routineReference"] = routine.reference.to_api_repr() + + api_response = self._call_api( + retry, method="PUT", path=routine.path, data=partial, headers=headers + ) + return Routine.from_api_repr(api_response) + def update_table(self, table, fields, retry=DEFAULT_RETRY): """Change some fields of a table. Use ``fields`` to specify which fields to update. At least one field must be provided. If a field is listed in ``fields`` and is ``None`` - in ``table``, it will be deleted. + in ``table``, the field value will be deleted. If ``table.etag`` is not ``None``, the update will only succeed if the table on the server has the same ETag. Thus reading a table with @@ -660,6 +771,64 @@ def list_models( result.dataset = dataset return result + def list_routines( + self, dataset, max_results=None, page_token=None, retry=DEFAULT_RETRY + ): + """[Beta] List routines in the dataset. + + See + https://cloud.google.com/bigquery/docs/reference/rest/v2/routines/list + + Args: + dataset (Union[ \ + :class:`~google.cloud.bigquery.dataset.Dataset`, \ + :class:`~google.cloud.bigquery.dataset.DatasetReference`, \ + str, \ + ]): + A reference to the dataset whose routines to list from the + BigQuery API. If a string is passed in, this method attempts + to create a dataset reference from a string using + :func:`google.cloud.bigquery.dataset.DatasetReference.from_string`. + max_results (int): + (Optional) Maximum number of routines to return. If not passed, + defaults to a value set by the API. + page_token (str): + (Optional) Token representing a cursor into the routines. If + not passed, the API will return the first page of routines. The + token marks the beginning of the iterator to be returned and + the value of the ``page_token`` can be accessed at + ``next_page_token`` of the + :class:`~google.api_core.page_iterator.HTTPIterator`. + retry (:class:`google.api_core.retry.Retry`): + (Optional) How to retry the RPC. + + Returns: + google.api_core.page_iterator.Iterator: + Iterator of all + :class:`~google.cloud.bigquery.routine.Routine`s contained + within the requested dataset, limited by ``max_results``. + """ + if isinstance(dataset, str): + dataset = DatasetReference.from_string( + dataset, default_project=self.project + ) + + if not isinstance(dataset, (Dataset, DatasetReference)): + raise TypeError("dataset must be a Dataset, DatasetReference, or string") + + path = "{}/routines".format(dataset.path) + result = page_iterator.HTTPIterator( + client=self, + api_request=functools.partial(self._call_api, retry), + path=path, + item_to_value=_item_to_routine, + items_key="routines", + page_token=page_token, + max_results=max_results, + ) + result.dataset = dataset + return result + def list_tables( self, dataset, max_results=None, page_token=None, retry=DEFAULT_RETRY ): @@ -800,6 +969,42 @@ def delete_model(self, model, retry=DEFAULT_RETRY, not_found_ok=False): if not not_found_ok: raise + def delete_routine(self, routine, retry=DEFAULT_RETRY, not_found_ok=False): + """[Beta] Delete a routine. + + See + https://cloud.google.com/bigquery/docs/reference/rest/v2/routines/delete + + Args: + model (Union[ \ + :class:`~google.cloud.bigquery.routine.Routine`, \ + :class:`~google.cloud.bigquery.routine.RoutineReference`, \ + str, \ + ]): + A reference to the routine to delete. If a string is passed + in, this method attempts to create a routine reference from a + string using + :func:`google.cloud.bigquery.routine.RoutineReference.from_string`. + retry (:class:`google.api_core.retry.Retry`): + (Optional) How to retry the RPC. + not_found_ok (bool): + Defaults to ``False``. If ``True``, ignore "not found" errors + when deleting the routine. + """ + if isinstance(routine, str): + routine = RoutineReference.from_string( + routine, default_project=self.project + ) + + if not isinstance(routine, (Routine, RoutineReference)): + raise TypeError("routine must be a Routine or a RoutineReference") + + try: + self._call_api(retry, method="DELETE", path=routine.path) + except google.api_core.exceptions.NotFound: + if not not_found_ok: + raise + def delete_table(self, table, retry=DEFAULT_RETRY, not_found_ok=False): """Delete a table @@ -2073,6 +2278,21 @@ def _item_to_model(iterator, resource): return Model.from_api_repr(resource) +def _item_to_routine(iterator, resource): + """Convert a JSON model to the native object. + + Args: + iterator (google.api_core.page_iterator.Iterator): + The iterator that is currently in use. + resource (dict): + An item to be converted to a routine. + + Returns: + google.cloud.bigquery.routine.Routine: The next routine in the page. + """ + return Routine.from_api_repr(resource) + + def _item_to_table(iterator, resource): """Convert a JSON table to the native object. diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/dataset.py b/packages/google-cloud-bigquery/google/cloud/bigquery/dataset.py index 8566e183cda0..01260ccc6e68 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/dataset.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/dataset.py @@ -22,6 +22,7 @@ import google.cloud._helpers from google.cloud.bigquery import _helpers from google.cloud.bigquery.model import ModelReference +from google.cloud.bigquery.routine import RoutineReference from google.cloud.bigquery.table import TableReference @@ -53,6 +54,25 @@ def _get_model_reference(self, model_id): ) +def _get_routine_reference(self, routine_id): + """Constructs a RoutineReference. + + Args: + routine_id (str): the ID of the routine. + + Returns: + google.cloud.bigquery.routine.RoutineReference: + A RoutineReference for a routine in this dataset. + """ + return RoutineReference.from_api_repr( + { + "projectId": self.project, + "datasetId": self.dataset_id, + "routineId": routine_id, + } + ) + + class AccessEntry(object): """Represents grant of an access role to an entity. @@ -224,6 +244,8 @@ def path(self): model = _get_model_reference + routine = _get_routine_reference + @classmethod def from_api_repr(cls, resource): """Factory: construct a dataset reference given its API representation @@ -591,6 +613,8 @@ def _build_resource(self, filter_fields): model = _get_model_reference + routine = _get_routine_reference + def __repr__(self): return "Dataset({})".format(repr(self.reference)) @@ -672,3 +696,5 @@ def reference(self): table = _get_table_reference model = _get_model_reference + + routine = _get_routine_reference diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/job.py b/packages/google-cloud-bigquery/google/cloud/bigquery/job.py index 381ad84f0312..87dab59e339b 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/job.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/job.py @@ -34,6 +34,7 @@ from google.cloud.bigquery.query import StructQueryParameter from google.cloud.bigquery.query import UDFResource from google.cloud.bigquery.retry import DEFAULT_RETRY +from google.cloud.bigquery.routine import RoutineReference from google.cloud.bigquery.schema import SchemaField from google.cloud.bigquery.table import _EmptyRowIterator from google.cloud.bigquery.table import EncryptionConfiguration @@ -2666,9 +2667,22 @@ def ddl_operation_performed(self): """ return self._job_statistics().get("ddlOperationPerformed") + @property + def ddl_target_routine(self): + """Optional[google.cloud.bigquery.routine.RoutineReference]: Return the DDL target routine, present + for CREATE/DROP FUNCTION/PROCEDURE queries. + + See: + https://cloud.google.com/bigquery/docs/reference/rest/v2/JobStatistics + """ + prop = self._job_statistics().get("ddlTargetRoutine") + if prop is not None: + prop = RoutineReference.from_api_repr(prop) + return prop + @property def ddl_target_table(self): - """Optional[TableReference]: Return the DDL target table, present + """Optional[google.cloud.bigquery.table.TableReference]: Return the DDL target table, present for CREATE/DROP TABLE/VIEW queries. See: diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/routine.py b/packages/google-cloud-bigquery/google/cloud/bigquery/routine.py new file mode 100644 index 000000000000..d5bb752dfddb --- /dev/null +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/routine.py @@ -0,0 +1,513 @@ +# -*- coding: utf-8 -*- +# +# Copyright 2019 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Define resources for the BigQuery Routines API.""" + +from google.protobuf import json_format +import six + +import google.cloud._helpers +from google.cloud.bigquery import _helpers +import google.cloud.bigquery_v2.types + + +class Routine(object): + """Resource representing a user-defined routine. + + See + https://cloud.google.com/bigquery/docs/reference/rest/v2/routines + + Args: + routine_ref (Union[ \ + str, \ + google.cloud.bigquery.routine.RoutineReference, \ + ]): + A pointer to a routine. If ``routine_ref`` is a string, it must + included a project ID, dataset ID, and routine ID, each separated + by ``.``. + ``**kwargs`` (Dict): + Initial property values. + """ + + _PROPERTY_TO_API_FIELD = { + "arguments": "arguments", + "body": "definitionBody", + "created": "creationTime", + "etag": "etag", + "imported_libraries": "importedLibraries", + "language": "language", + "modified": "lastModifiedTime", + "reference": "routineReference", + "return_type": "returnType", + "type_": "routineType", + } + + def __init__(self, routine_ref, **kwargs): + if isinstance(routine_ref, six.string_types): + routine_ref = RoutineReference.from_string(routine_ref) + + self._properties = {"routineReference": routine_ref.to_api_repr()} + for property_name in kwargs: + setattr(self, property_name, kwargs[property_name]) + + @property + def reference(self): + """google.cloud.bigquery.routine.RoutineReference: Reference + describing the ID of this routine. + """ + return RoutineReference.from_api_repr( + self._properties[self._PROPERTY_TO_API_FIELD["reference"]] + ) + + @property + def path(self): + """str: URL path for the routine's APIs.""" + return self.reference.path + + @property + def project(self): + """str: ID of the project containing the routine.""" + return self.reference.project + + @property + def dataset_id(self): + """str: ID of dataset containing the routine.""" + return self.reference.dataset_id + + @property + def routine_id(self): + """str: The routine ID.""" + return self.reference.routine_id + + @property + def etag(self): + """str: ETag for the resource (:data:`None` until set from the + server). + + Read-only. + """ + return self._properties.get(self._PROPERTY_TO_API_FIELD["etag"]) + + @property + def type_(self): + """str: The fine-grained type of the routine. + + See: + https://cloud.google.com/bigquery/docs/reference/rest/v2/routines#RoutineType + """ + return self._properties.get(self._PROPERTY_TO_API_FIELD["type_"]) + + @type_.setter + def type_(self, value): + self._properties[self._PROPERTY_TO_API_FIELD["type_"]] = value + + @property + def created(self): + """Optional[datetime.datetime]: Datetime at which the routine was + created (:data:`None` until set from the server). + + Read-only. + """ + value = self._properties.get(self._PROPERTY_TO_API_FIELD["created"]) + if value is not None and value != 0: + # value will be in milliseconds. + return google.cloud._helpers._datetime_from_microseconds( + 1000.0 * float(value) + ) + + @property + def modified(self): + """Optional[datetime.datetime]: Datetime at which the routine was + last modified (:data:`None` until set from the server). + + Read-only. + """ + value = self._properties.get(self._PROPERTY_TO_API_FIELD["modified"]) + if value is not None and value != 0: + # value will be in milliseconds. + return google.cloud._helpers._datetime_from_microseconds( + 1000.0 * float(value) + ) + + @property + def language(self): + """Optional[str]: The language of the routine. + + Defaults to ``SQL``. + """ + return self._properties.get(self._PROPERTY_TO_API_FIELD["language"]) + + @language.setter + def language(self, value): + self._properties[self._PROPERTY_TO_API_FIELD["language"]] = value + + @property + def arguments(self): + """List[google.cloud.bigquery.routine.RoutineArgument]: Input/output + argument of a function or a stored procedure. + + In-place modification is not supported. To set, replace the entire + property value with the modified list of + :class:`~google.cloud.bigquery.routine.RoutineArgument` objects. + """ + resources = self._properties.get(self._PROPERTY_TO_API_FIELD["arguments"], []) + return [RoutineArgument.from_api_repr(resource) for resource in resources] + + @arguments.setter + def arguments(self, value): + if not value: + resource = [] + else: + resource = [argument.to_api_repr() for argument in value] + self._properties[self._PROPERTY_TO_API_FIELD["arguments"]] = resource + + @property + def return_type(self): + """google.cloud.bigquery_v2.types.StandardSqlDataType: Return type of + the routine. + + If absent, the return type is inferred from + :attr:`~google.cloud.bigquery.routine.Routine.body` at query time in + each query that references this routine. If present, then the + evaluated result will be cast to the specified returned type at query + time. + + See: + https://cloud.google.com/bigquery/docs/reference/rest/v2/routines#resource-routine + """ + resource = self._properties.get(self._PROPERTY_TO_API_FIELD["return_type"]) + if not resource: + return resource + output = google.cloud.bigquery_v2.types.StandardSqlDataType() + output = json_format.ParseDict(resource, output, ignore_unknown_fields=True) + return output + + @return_type.setter + def return_type(self, value): + if value: + resource = json_format.MessageToDict(value) + else: + resource = None + self._properties[self._PROPERTY_TO_API_FIELD["return_type"]] = resource + + @property + def imported_libraries(self): + """List[str]: The path of the imported JavaScript libraries. + + The :attr:`~google.cloud.bigquery.routine.Routine.language` must + equal ``JAVACRIPT``. + + Examples: + Set the ``imported_libraries`` to a list of Google Cloud Storage + URIs. + + .. code-block:: python + + routine = bigquery.Routine("proj.dataset.routine_id") + routine.imported_libraries = [ + "gs://cloud-samples-data/bigquery/udfs/max-value.js", + ] + """ + return self._properties.get( + self._PROPERTY_TO_API_FIELD["imported_libraries"], [] + ) + + @imported_libraries.setter + def imported_libraries(self, value): + if not value: + resource = [] + else: + resource = value + self._properties[self._PROPERTY_TO_API_FIELD["imported_libraries"]] = resource + + @property + def body(self): + """str: The body of the routine.""" + return self._properties.get(self._PROPERTY_TO_API_FIELD["body"]) + + @body.setter + def body(self, value): + self._properties[self._PROPERTY_TO_API_FIELD["body"]] = value + + @classmethod + def from_api_repr(cls, resource): + """Factory: construct a routine given its API representation. + + Args: + resource (Dict[str, object]): + Resource, as returned from the API. + + Returns: + google.cloud.bigquery.routine.Routine: + Python object, as parsed from ``resource``. + """ + ref = cls(RoutineReference.from_api_repr(resource["routineReference"])) + ref._properties = resource + return ref + + def to_api_repr(self): + """Construct the API resource representation of this routine. + + Returns: + Dict[str, object]: + Routine represented as an API resource. + """ + return self._properties + + def _build_resource(self, filter_fields): + """Generate a resource for ``update``.""" + return _helpers._build_resource_from_properties(self, filter_fields) + + def __repr__(self): + return "Routine('{}.{}.{}')".format( + self.project, self.dataset_id, self.routine_id + ) + + +class RoutineArgument(object): + """Input/output argument of a function or a stored procedure. + + See + https://cloud.google.com/bigquery/docs/reference/rest/v2/routines + + Args: + ``**kwargs`` (Dict): + Initial property values. + """ + + _PROPERTY_TO_API_FIELD = { + "data_type": "dataType", + "kind": "argumentKind", + # Even though it's not necessary for field mapping to map when the + # property name equals the resource name, we add these here so that we + # have an exhaustive list of all properties. + "name": "name", + "mode": "mode", + } + + def __init__(self, **kwargs): + self._properties = {} + for property_name in kwargs: + setattr(self, property_name, kwargs[property_name]) + + @property + def name(self): + """Optional[str]: Name of this argument. + + Can be absent for function return argument. + """ + return self._properties.get(self._PROPERTY_TO_API_FIELD["name"]) + + @name.setter + def name(self, value): + self._properties[self._PROPERTY_TO_API_FIELD["name"]] = value + + @property + def kind(self): + """Optional[str]: The kind of argument, for example ``FIXED_TYPE`` or + ``ANY_TYPE``. + + See: + https://cloud.google.com/bigquery/docs/reference/rest/v2/routines#ArgumentKind + """ + return self._properties.get(self._PROPERTY_TO_API_FIELD["kind"]) + + @kind.setter + def kind(self, value): + self._properties[self._PROPERTY_TO_API_FIELD["kind"]] = value + + @property + def mode(self): + """Optional[str]: The input/output mode of the argument.""" + return self._properties.get(self._PROPERTY_TO_API_FIELD["mode"]) + + @mode.setter + def mode(self, value): + self._properties[self._PROPERTY_TO_API_FIELD["mode"]] = value + + @property + def data_type(self): + """Optional[google.cloud.bigquery_v2.types.StandardSqlDataType]: Type + of a variable, e.g., a function argument. + + See: + https://cloud.google.com/bigquery/docs/reference/rest/v2/StandardSqlDataType + """ + resource = self._properties.get(self._PROPERTY_TO_API_FIELD["data_type"]) + if not resource: + return resource + output = google.cloud.bigquery_v2.types.StandardSqlDataType() + output = json_format.ParseDict(resource, output, ignore_unknown_fields=True) + return output + + @data_type.setter + def data_type(self, value): + if value: + resource = json_format.MessageToDict(value) + else: + resource = None + self._properties[self._PROPERTY_TO_API_FIELD["data_type"]] = resource + + @classmethod + def from_api_repr(cls, resource): + """Factory: construct a routine argument given its API representation. + + Args: + resource (Dict[str, object]): + Resource, as returned from the API. + + Returns: + google.cloud.bigquery.routine.RoutineArgument: + Python object, as parsed from ``resource``. + """ + ref = cls() + ref._properties = resource + return ref + + def to_api_repr(self): + """Construct the API resource representation of this routine argument. + + Returns: + Dict[str, object]: + Routine argument represented as an API resource. + """ + return self._properties + + def __eq__(self, other): + if not isinstance(other, RoutineArgument): + return NotImplemented + return self._properties == other._properties + + def __ne__(self, other): + return not self == other + + def __repr__(self): + all_properties = [ + "{}={}".format(property_name, repr(getattr(self, property_name))) + for property_name in sorted(self._PROPERTY_TO_API_FIELD) + ] + return "RoutineArgument({})".format(", ".join(all_properties)) + + +class RoutineReference(object): + """A pointer to a routine. + + See + https://cloud.google.com/bigquery/docs/reference/rest/v2/routines + """ + + def __init__(self): + self._properties = {} + + @property + def project(self): + """str: ID of the project containing the routine.""" + return self._properties["projectId"] + + @property + def dataset_id(self): + """str: ID of dataset containing the routine.""" + return self._properties["datasetId"] + + @property + def routine_id(self): + """str: The routine ID.""" + return self._properties["routineId"] + + @property + def path(self): + """str: URL path for the routine's APIs.""" + return "/projects/%s/datasets/%s/routines/%s" % ( + self.project, + self.dataset_id, + self.routine_id, + ) + + @classmethod + def from_api_repr(cls, resource): + """Factory: construct a routine reference given its API representation. + + Args: + resource (Dict[str, object]): + Routine reference representation returned from the API. + + Returns: + google.cloud.bigquery.routine.RoutineReference: + Routine reference parsed from ``resource``. + """ + ref = cls() + ref._properties = resource + return ref + + @classmethod + def from_string(cls, routine_id, default_project=None): + """Factory: construct a routine reference from routine ID string. + + Args: + routine_id (str): + A routine ID in standard SQL format. If ``default_project`` + is not specified, this must included a project ID, dataset + ID, and routine ID, each separated by ``.``. + default_project (str): + Optional. The project ID to use when ``routine_id`` does not + include a project ID. + + Returns: + google.cloud.bigquery.routine.RoutineReference: + Routine reference parsed from ``routine_id``. + + Raises: + ValueError: + If ``routine_id`` is not a fully-qualified routine ID in + standard SQL format. + """ + proj, dset, routine = _helpers._parse_3_part_id( + routine_id, default_project=default_project, property_name="routine_id" + ) + return cls.from_api_repr( + {"projectId": proj, "datasetId": dset, "routineId": routine} + ) + + def to_api_repr(self): + """Construct the API resource representation of this routine reference. + + Returns: + Dict[str, object]: + Routine reference represented as an API resource. + """ + return self._properties + + def __eq__(self, other): + """Two RoutineReferences are equal if they point to the same routine.""" + if not isinstance(other, RoutineReference): + return NotImplemented + return str(self) == str(other) + + def __hash__(self): + return hash(str(self)) + + def __ne__(self, other): + return not self == other + + def __repr__(self): + return "RoutineReference.from_string('{}')".format(str(self)) + + def __str__(self): + """String representation of the reference. + + This is a fully-qualified ID, including the project ID and dataset ID. + """ + return "{}.{}.{}".format(self.project, self.dataset_id, self.routine_id) diff --git a/packages/google-cloud-bigquery/samples/create_routine.py b/packages/google-cloud-bigquery/samples/create_routine.py new file mode 100644 index 000000000000..18b999980d72 --- /dev/null +++ b/packages/google-cloud-bigquery/samples/create_routine.py @@ -0,0 +1,46 @@ +# Copyright 2019 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +def main(client, routine_id): + # [START bigquery_create_routine] + from google.cloud import bigquery + from google.cloud import bigquery_v2 + + # TODO(developer): Construct a BigQuery client object. + # client = bigquery.Client() + + # TODO(developer): Choose a fully-qualified ID for the routine. + # routine_id = "my-project.my_dataset.my_routine" + + routine = bigquery.Routine( + routine_id, + type_="SCALAR_FUNCTION", + language="SQL", + body="x * 3", + arguments=[ + bigquery.RoutineArgument( + name="x", + data_type=bigquery_v2.types.StandardSqlDataType( + type_kind=bigquery_v2.enums.StandardSqlDataType.TypeKind.INT64 + ), + ) + ], + ) + + routine = client.create_routine(routine) + + print("Created routine {}".format(routine.reference)) + # [END bigquery_create_routine] + return routine diff --git a/packages/google-cloud-bigquery/samples/create_routine_ddl.py b/packages/google-cloud-bigquery/samples/create_routine_ddl.py new file mode 100644 index 000000000000..aa6254b1139a --- /dev/null +++ b/packages/google-cloud-bigquery/samples/create_routine_ddl.py @@ -0,0 +1,44 @@ +# Copyright 2019 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +def main(client, routine_id): + # [START bigquery_create_routine_ddl] + # TODO(developer): Import the client library. + # from google.cloud import bigquery + + # TODO(developer): Construct a BigQuery client object. + # client = bigquery.Client() + + # TODO(developer): Choose a fully-qualified ID for the routine. + # routine_id = "my-project.my_dataset.my_routine" + + sql = """ + CREATE FUNCTION `{}`( + arr ARRAY> + ) AS ( + (SELECT SUM(IF(elem.name = "foo",elem.val,null)) FROM UNNEST(arr) AS elem) + ) + """.format( + routine_id + ) + + # Initiate the query to create the routine. + query_job = client.query(sql) + + # Wait for the query to complete. + query_job.result() + + print("Created routine {}".format(query_job.ddl_target_routine)) + # [END bigquery_create_routine_ddl] diff --git a/packages/google-cloud-bigquery/samples/delete_routine.py b/packages/google-cloud-bigquery/samples/delete_routine.py new file mode 100644 index 000000000000..505faa4780f3 --- /dev/null +++ b/packages/google-cloud-bigquery/samples/delete_routine.py @@ -0,0 +1,30 @@ +# Copyright 2019 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +def main(client, routine_id): + # [START bigquery_delete_routine] + # TODO(developer): Import the client library. + # from google.cloud import bigquery + + # TODO(developer): Construct a BigQuery client object. + # client = bigquery.Client() + + # TODO(developer): Set the fully-qualified ID for the routine. + # routine_id = "my-project.my_dataset.my_routine" + + client.delete_routine(routine_id) + # [END bigquery_delete_routine] + + print("Deleted routine {}.".format(routine_id)) diff --git a/packages/google-cloud-bigquery/samples/get_routine.py b/packages/google-cloud-bigquery/samples/get_routine.py new file mode 100644 index 000000000000..5850d8d06477 --- /dev/null +++ b/packages/google-cloud-bigquery/samples/get_routine.py @@ -0,0 +1,39 @@ +# Copyright 2019 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +def main(client, routine_id): + # [START bigquery_get_routine] + # TODO(developer): Import the client library. + # from google.cloud import bigquery + + # TODO(developer): Construct a BigQuery client object. + # client = bigquery.Client() + + # TODO(developer): Set the fully-qualified ID for the routine. + # routine_id = "my-project.my_dataset.my_routine" + + routine = client.get_routine(routine_id) + + print("Routine `{}`:".format(routine.reference)) + print(" Type: '{}'".format(routine.type_)) + print(" Language: '{}'".format(routine.language)) + print(" Arguments:") + + for argument in routine.arguments: + print(" Name: '{}'".format(argument.name)) + print(" Type: '{}'".format(argument.type_)) + + # [END bigquery_get_routine] + return routine diff --git a/packages/google-cloud-bigquery/samples/list_routines.py b/packages/google-cloud-bigquery/samples/list_routines.py new file mode 100644 index 000000000000..9e90c87a3d9c --- /dev/null +++ b/packages/google-cloud-bigquery/samples/list_routines.py @@ -0,0 +1,34 @@ +# Copyright 2019 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +def main(client, dataset_id): + + # [START bigquery_list_routines] + # TODO(developer): Import the client library. + # from google.cloud import bigquery + + # TODO(developer): Construct a BigQuery client object. + # client = bigquery.Client() + + # TODO(developer): Set dataset_id to the ID of the dataset that contains + # the routines you are listing. + # dataset_id = 'your-project.your_dataset' + + routines = client.list_routines(dataset_id) + + print("Routines contained in dataset {}:".format(dataset_id)) + for routine in routines: + print(routine.reference) + # [END bigquery_list_routines] diff --git a/packages/google-cloud-bigquery/samples/tests/conftest.py b/packages/google-cloud-bigquery/samples/tests/conftest.py index 629b23473b01..fe5391ee8a4d 100644 --- a/packages/google-cloud-bigquery/samples/tests/conftest.py +++ b/packages/google-cloud-bigquery/samples/tests/conftest.py @@ -18,6 +18,7 @@ import pytest from google.cloud import bigquery +from google.cloud import bigquery_v2 @pytest.fixture(scope="module") @@ -44,6 +45,15 @@ def random_dataset_id(client): client.delete_dataset(random_dataset_id, delete_contents=True, not_found_ok=True) +@pytest.fixture +def random_routine_id(client, dataset_id): + now = datetime.datetime.now() + random_routine_id = "example_routine_{}_{}".format( + now.strftime("%Y%m%d%H%M%S"), uuid.uuid4().hex[:8] + ) + return "{}.{}".format(dataset_id, random_routine_id) + + @pytest.fixture def dataset_id(client): now = datetime.datetime.now() @@ -68,6 +78,31 @@ def table_id(client, dataset_id): client.delete_table(table, not_found_ok=True) +@pytest.fixture +def routine_id(client, dataset_id): + now = datetime.datetime.now() + routine_id = "python_samples_{}_{}".format( + now.strftime("%Y%m%d%H%M%S"), uuid.uuid4().hex[:8] + ) + + routine = bigquery.Routine("{}.{}".format(dataset_id, routine_id)) + routine.type_ = "SCALAR_FUNCTION" + routine.language = "SQL" + routine.body = "x * 3" + routine.arguments = [ + bigquery.RoutineArgument( + name="x", + data_type=bigquery_v2.types.StandardSqlDataType( + type_kind=bigquery_v2.enums.StandardSqlDataType.TypeKind.INT64 + ), + ) + ] + + routine = client.create_routine(routine) + yield "{}.{}.{}".format(routine.project, routine.dataset_id, routine.routine_id) + client.delete_routine(routine, not_found_ok=True) + + @pytest.fixture def model_id(client, dataset_id): model_id = "{}.{}".format(dataset_id, uuid.uuid4().hex) diff --git a/packages/google-cloud-bigquery/samples/tests/test_create_table.py b/packages/google-cloud-bigquery/samples/tests/test_create_table.py index 903f76b536ea..093ee6e94277 100644 --- a/packages/google-cloud-bigquery/samples/tests/test_create_table.py +++ b/packages/google-cloud-bigquery/samples/tests/test_create_table.py @@ -16,7 +16,6 @@ def test_create_table(capsys, client, random_table_id): - create_table.create_table(client, random_table_id) out, err = capsys.readouterr() assert "Created table {}".format(random_table_id) in out diff --git a/packages/google-cloud-bigquery/samples/tests/test_routine_samples.py b/packages/google-cloud-bigquery/samples/tests/test_routine_samples.py new file mode 100644 index 000000000000..5905d2e69439 --- /dev/null +++ b/packages/google-cloud-bigquery/samples/tests/test_routine_samples.py @@ -0,0 +1,89 @@ +# Copyright 2019 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from google.cloud import bigquery +from google.cloud import bigquery_v2 + + +def test_create_routine(capsys, client, random_routine_id): + from .. import create_routine + + create_routine.main(client, random_routine_id) + out, err = capsys.readouterr() + assert "Created routine {}".format(random_routine_id) in out + + +def test_create_routine_ddl(capsys, client, random_routine_id): + from .. import create_routine_ddl + + create_routine_ddl.main(client, random_routine_id) + routine = client.get_routine(random_routine_id) + out, err = capsys.readouterr() + + assert "Created routine {}".format(random_routine_id) in out + return routine + assert routine.type_ == "SCALAR_FUNCTION" + assert routine.language == "SQL" + expected_arguments = [ + bigquery.RoutineArgument( + name="arr", + data_type=bigquery_v2.types.StandardSqlDataType( + type_kind=bigquery_v2.enums.StandardSqlDataType.TypeKind.ARRAY, + array_element_type=bigquery_v2.types.StandardSqlDataType( + type_kind=bigquery_v2.enums.StandardSqlDataType.TypeKind.STRUCT, + struct_type=bigquery_v2.types.StandardSqlStructType( + fields=[ + bigquery_v2.types.StandardSqlField( + name="name", + type=bigquery_v2.types.StandardSqlDataType( + type_kind=bigquery_v2.enums.StandardSqlDataType.TypeKind.STRING + ), + ), + bigquery_v2.types.StandardSqlField( + name="val", + type=bigquery_v2.types.StandardSqlDataType( + type_kind=bigquery_v2.enums.StandardSqlDataType.TypeKind.INT64 + ), + ), + ] + ), + ), + ), + ) + ] + assert routine.arguments == expected_arguments + + +def test_list_routines(capsys, client, dataset_id, routine_id): + from .. import list_routines + + list_routines.main(client, dataset_id) + out, err = capsys.readouterr() + assert "Routines contained in dataset {}:".format(dataset_id) in out + assert routine_id in out + + +def test_delete_routine(capsys, client, routine_id): + from .. import delete_routine + + delete_routine.main(client, routine_id) + out, err = capsys.readouterr() + assert "Deleted routine {}.".format(routine_id) in out + + +def test_update_routine(client, routine_id): + from .. import update_routine + + routine = update_routine.main(client, routine_id) + assert routine.body == "x * 4" diff --git a/packages/google-cloud-bigquery/samples/update_routine.py b/packages/google-cloud-bigquery/samples/update_routine.py new file mode 100644 index 000000000000..8683e761562f --- /dev/null +++ b/packages/google-cloud-bigquery/samples/update_routine.py @@ -0,0 +1,44 @@ +# Copyright 2019 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +def main(client, routine_id): + # [START bigquery_update_routine] + # TODO(developer): Import the client library. + # from google.cloud import bigquery + + # TODO(developer): Construct a BigQuery client object. + # client = bigquery.Client() + + # TODO(developer): Set the fully-qualified ID for the routine. + # routine_id = "my-project.my_dataset.my_routine" + + routine = client.get_routine(routine_id) + + routine.body = "x * 4" + + routine = client.update_routine( + routine, + [ + "body", + # Due to a limitation of the API, all fields are required, not just + # those that have been updated. + "arguments", + "language", + "type_", + "return_type", + ], + ) + # [END bigquery_update_routine] + return routine diff --git a/packages/google-cloud-bigquery/tests/system.py b/packages/google-cloud-bigquery/tests/system.py index eba4c3b6adef..2213bc7c88da 100644 --- a/packages/google-cloud-bigquery/tests/system.py +++ b/packages/google-cloud-bigquery/tests/system.py @@ -59,6 +59,7 @@ from google.api_core.exceptions import ServiceUnavailable from google.api_core.exceptions import TooManyRequests from google.cloud import bigquery +from google.cloud import bigquery_v2 from google.cloud.bigquery.dataset import Dataset from google.cloud.bigquery.dataset import DatasetReference from google.cloud.bigquery.table import Table @@ -1864,6 +1865,40 @@ def test_insert_rows_nested_nested_dictionary(self): expected_rows = [("Some value", record)] self.assertEqual(row_tuples, expected_rows) + def test_create_routine(self): + routine_name = "test_routine" + dataset = self.temp_dataset(_make_dataset_id("create_routine")) + float64_type = bigquery_v2.types.StandardSqlDataType( + type_kind=bigquery_v2.enums.StandardSqlDataType.TypeKind.FLOAT64 + ) + routine = bigquery.Routine( + dataset.routine(routine_name), + language="JAVASCRIPT", + type_="SCALAR_FUNCTION", + return_type=float64_type, + imported_libraries=["gs://cloud-samples-data/bigquery/udfs/max-value.js"], + ) + routine.arguments = [ + bigquery.RoutineArgument( + name="arr", + data_type=bigquery_v2.types.StandardSqlDataType( + type_kind=bigquery_v2.enums.StandardSqlDataType.TypeKind.ARRAY, + array_element_type=float64_type, + ), + ) + ] + routine.body = "return maxValue(arr)" + query_string = "SELECT `{}`([-100.0, 3.14, 100.0, 42.0]) as max_value;".format( + str(routine.reference) + ) + + routine = retry_403(Config.CLIENT.create_routine)(routine) + query_job = retry_403(Config.CLIENT.query)(query_string) + rows = list(query_job.result()) + + assert len(rows) == 1 + assert rows[0].max_value == 100.0 + def test_create_table_rows_fetch_nested_schema(self): table_name = "test_table" dataset = self.temp_dataset(_make_dataset_id("create_table_nested_schema")) diff --git a/packages/google-cloud-bigquery/tests/unit/routine/__init__.py b/packages/google-cloud-bigquery/tests/unit/routine/__init__.py new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/packages/google-cloud-bigquery/tests/unit/routine/test_routine.py b/packages/google-cloud-bigquery/tests/unit/routine/test_routine.py new file mode 100644 index 000000000000..02d4a2ee2883 --- /dev/null +++ b/packages/google-cloud-bigquery/tests/unit/routine/test_routine.py @@ -0,0 +1,305 @@ +# -*- coding: utf-8 -*- +# +# Copyright 2019 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import datetime + +import pytest + +import google.cloud._helpers +from google.cloud import bigquery_v2 + + +@pytest.fixture +def target_class(): + from google.cloud.bigquery.routine import Routine + + return Routine + + +@pytest.fixture +def object_under_test(target_class): + return target_class("project-id.dataset_id.routine_id") + + +def test_ctor(target_class): + from google.cloud.bigquery.routine import RoutineReference + + ref = RoutineReference.from_string("my-proj.my_dset.my_routine") + actual_routine = target_class(ref) + assert actual_routine.reference == ref + assert ( + actual_routine.path == "/projects/my-proj/datasets/my_dset/routines/my_routine" + ) + + +def test_ctor_w_string(target_class): + from google.cloud.bigquery.routine import RoutineReference + + routine_id = "my-proj.my_dset.my_routine" + ref = RoutineReference.from_string(routine_id) + actual_routine = target_class(routine_id) + assert actual_routine.reference == ref + + +def test_ctor_w_properties(target_class): + from google.cloud.bigquery.routine import RoutineArgument + from google.cloud.bigquery.routine import RoutineReference + + routine_id = "my-proj.my_dset.my_routine" + arguments = [ + RoutineArgument( + name="x", + data_type=bigquery_v2.types.StandardSqlDataType( + type_kind=bigquery_v2.enums.StandardSqlDataType.TypeKind.INT64 + ), + ) + ] + body = "x * 3" + language = "SQL" + return_type = bigquery_v2.types.StandardSqlDataType( + type_kind=bigquery_v2.enums.StandardSqlDataType.TypeKind.INT64 + ) + type_ = "SCALAR_FUNCTION" + + actual_routine = target_class( + routine_id, + arguments=arguments, + body=body, + language=language, + return_type=return_type, + type_=type_, + ) + + ref = RoutineReference.from_string(routine_id) + assert actual_routine.reference == ref + assert actual_routine.arguments == arguments + assert actual_routine.body == body + assert actual_routine.language == language + assert actual_routine.return_type == return_type + assert actual_routine.type_ == type_ + + +def test_from_api_repr(target_class): + from google.cloud.bigquery.routine import RoutineArgument + from google.cloud.bigquery.routine import RoutineReference + + creation_time = datetime.datetime( + 2010, 5, 19, 16, 0, 0, tzinfo=google.cloud._helpers.UTC + ) + modified_time = datetime.datetime( + 2011, 10, 1, 16, 0, 0, tzinfo=google.cloud._helpers.UTC + ) + resource = { + "routineReference": { + "projectId": "my-project", + "datasetId": "my_dataset", + "routineId": "my_routine", + }, + "etag": "abcdefg", + "creationTime": str(google.cloud._helpers._millis(creation_time)), + "lastModifiedTime": str(google.cloud._helpers._millis(modified_time)), + "definitionBody": "42", + "arguments": [{"name": "x", "dataType": {"typeKind": "INT64"}}], + "language": "SQL", + "returnType": {"typeKind": "INT64"}, + "routineType": "SCALAR_FUNCTION", + "someNewField": "someValue", + } + actual_routine = target_class.from_api_repr(resource) + + assert actual_routine.project == "my-project" + assert actual_routine.dataset_id == "my_dataset" + assert actual_routine.routine_id == "my_routine" + assert ( + actual_routine.path + == "/projects/my-project/datasets/my_dataset/routines/my_routine" + ) + assert actual_routine.reference == RoutineReference.from_string( + "my-project.my_dataset.my_routine" + ) + assert actual_routine.etag == "abcdefg" + assert actual_routine.created == creation_time + assert actual_routine.modified == modified_time + assert actual_routine.arguments == [ + RoutineArgument( + name="x", + data_type=bigquery_v2.types.StandardSqlDataType( + type_kind=bigquery_v2.enums.StandardSqlDataType.TypeKind.INT64 + ), + ) + ] + assert actual_routine.body == "42" + assert actual_routine.language == "SQL" + assert actual_routine.return_type == bigquery_v2.types.StandardSqlDataType( + type_kind=bigquery_v2.enums.StandardSqlDataType.TypeKind.INT64 + ) + assert actual_routine.type_ == "SCALAR_FUNCTION" + assert actual_routine._properties["someNewField"] == "someValue" + + +def test_from_api_repr_w_minimal_resource(target_class): + from google.cloud.bigquery.routine import RoutineReference + + resource = { + "routineReference": { + "projectId": "my-project", + "datasetId": "my_dataset", + "routineId": "my_routine", + } + } + actual_routine = target_class.from_api_repr(resource) + assert actual_routine.reference == RoutineReference.from_string( + "my-project.my_dataset.my_routine" + ) + assert actual_routine.etag is None + assert actual_routine.created is None + assert actual_routine.modified is None + assert actual_routine.arguments == [] + assert actual_routine.body is None + assert actual_routine.language is None + assert actual_routine.return_type is None + assert actual_routine.type_ is None + + +def test_from_api_repr_w_unknown_fields(target_class): + from google.cloud.bigquery.routine import RoutineReference + + resource = { + "routineReference": { + "projectId": "my-project", + "datasetId": "my_dataset", + "routineId": "my_routine", + }, + "thisFieldIsNotInTheProto": "just ignore me", + } + actual_routine = target_class.from_api_repr(resource) + assert actual_routine.reference == RoutineReference.from_string( + "my-project.my_dataset.my_routine" + ) + assert actual_routine._properties is resource + + +@pytest.mark.parametrize( + "resource,filter_fields,expected", + [ + ( + { + "arguments": [{"name": "x", "dataType": {"typeKind": "INT64"}}], + "definitionBody": "x * 3", + "language": "SQL", + "returnType": {"typeKind": "INT64"}, + "routineType": "SCALAR_FUNCTION", + }, + ["arguments"], + {"arguments": [{"name": "x", "dataType": {"typeKind": "INT64"}}]}, + ), + ( + { + "arguments": [{"name": "x", "dataType": {"typeKind": "INT64"}}], + "definitionBody": "x * 3", + "language": "SQL", + "returnType": {"typeKind": "INT64"}, + "routineType": "SCALAR_FUNCTION", + }, + ["body"], + {"definitionBody": "x * 3"}, + ), + ( + { + "arguments": [{"name": "x", "dataType": {"typeKind": "INT64"}}], + "definitionBody": "x * 3", + "language": "SQL", + "returnType": {"typeKind": "INT64"}, + "routineType": "SCALAR_FUNCTION", + }, + ["language"], + {"language": "SQL"}, + ), + ( + { + "arguments": [{"name": "x", "dataType": {"typeKind": "INT64"}}], + "definitionBody": "x * 3", + "language": "SQL", + "returnType": {"typeKind": "INT64"}, + "routineType": "SCALAR_FUNCTION", + }, + ["return_type"], + {"returnType": {"typeKind": "INT64"}}, + ), + ( + { + "arguments": [{"name": "x", "dataType": {"typeKind": "INT64"}}], + "definitionBody": "x * 3", + "language": "SQL", + "returnType": {"typeKind": "INT64"}, + "routineType": "SCALAR_FUNCTION", + }, + ["type_"], + {"routineType": "SCALAR_FUNCTION"}, + ), + ( + {}, + ["arguments", "language", "body", "type_", "return_type"], + { + "arguments": None, + "definitionBody": None, + "language": None, + "returnType": None, + "routineType": None, + }, + ), + ( + {"someNewField": "someValue"}, + ["someNewField"], + {"someNewField": "someValue"}, + ), + ], +) +def test_build_resource(object_under_test, resource, filter_fields, expected): + object_under_test._properties = resource + actual_routine = object_under_test._build_resource(filter_fields) + assert actual_routine == expected + + +def test_set_arguments_w_none(object_under_test): + object_under_test.arguments = None + assert object_under_test.arguments == [] + assert object_under_test._properties["arguments"] == [] + + +def test_set_imported_libraries(object_under_test): + imported_libraries = ["gs://cloud-samples-data/bigquery/udfs/max-value.js"] + object_under_test.imported_libraries = imported_libraries + assert object_under_test.imported_libraries == imported_libraries + assert object_under_test._properties["importedLibraries"] == imported_libraries + + +def test_set_imported_libraries_w_none(object_under_test): + object_under_test.imported_libraries = None + assert object_under_test.imported_libraries == [] + assert object_under_test._properties["importedLibraries"] == [] + + +def test_set_return_type_w_none(object_under_test): + object_under_test.return_type = None + assert object_under_test.return_type is None + assert object_under_test._properties["returnType"] is None + + +def test_repr(target_class): + model = target_class("my-proj.my_dset.my_routine") + actual_routine = repr(model) + assert actual_routine == "Routine('my-proj.my_dset.my_routine')" diff --git a/packages/google-cloud-bigquery/tests/unit/routine/test_routine_argument.py b/packages/google-cloud-bigquery/tests/unit/routine/test_routine_argument.py new file mode 100644 index 000000000000..7d17b5fc703f --- /dev/null +++ b/packages/google-cloud-bigquery/tests/unit/routine/test_routine_argument.py @@ -0,0 +1,100 @@ +# -*- coding: utf-8 -*- +# +# Copyright 2019 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import pytest + +from google.cloud import bigquery_v2 + + +@pytest.fixture +def target_class(): + from google.cloud.bigquery.routine import RoutineArgument + + return RoutineArgument + + +def test_ctor(target_class): + data_type = bigquery_v2.types.StandardSqlDataType( + type_kind=bigquery_v2.enums.StandardSqlDataType.TypeKind.INT64 + ) + actual_arg = target_class( + name="field_name", kind="FIXED_TYPE", mode="IN", data_type=data_type + ) + assert actual_arg.name == "field_name" + assert actual_arg.kind == "FIXED_TYPE" + assert actual_arg.mode == "IN" + assert actual_arg.data_type == data_type + + +def test_from_api_repr(target_class): + resource = { + "argumentKind": "FIXED_TYPE", + "dataType": {"typeKind": "INT64"}, + "mode": "IN", + "name": "field_name", + } + actual_arg = target_class.from_api_repr(resource) + assert actual_arg.name == "field_name" + assert actual_arg.kind == "FIXED_TYPE" + assert actual_arg.mode == "IN" + assert actual_arg.data_type == bigquery_v2.types.StandardSqlDataType( + type_kind=bigquery_v2.enums.StandardSqlDataType.TypeKind.INT64 + ) + + +def test_from_api_repr_w_minimal_resource(target_class): + resource = {} + actual_arg = target_class.from_api_repr(resource) + assert actual_arg.name is None + assert actual_arg.kind is None + assert actual_arg.mode is None + assert actual_arg.data_type is None + + +def test_from_api_repr_w_unknown_fields(target_class): + resource = {"thisFieldIsNotInTheProto": "just ignore me"} + actual_arg = target_class.from_api_repr(resource) + assert actual_arg._properties is resource + + +def test_eq(target_class): + data_type = bigquery_v2.types.StandardSqlDataType( + type_kind=bigquery_v2.enums.StandardSqlDataType.TypeKind.INT64 + ) + arg = target_class( + name="field_name", kind="FIXED_TYPE", mode="IN", data_type=data_type + ) + arg_too = target_class( + name="field_name", kind="FIXED_TYPE", mode="IN", data_type=data_type + ) + assert arg == arg_too + assert not (arg != arg_too) + + other_arg = target_class() + assert not (arg == other_arg) + assert arg != other_arg + + notanarg = object() + assert not (arg == notanarg) + assert arg != notanarg + + +def test_repr(target_class): + arg = target_class(name="field_name", kind="FIXED_TYPE", mode="IN", data_type=None) + actual_repr = repr(arg) + assert actual_repr == ( + "RoutineArgument(data_type=None, kind='FIXED_TYPE', mode='IN', name='field_name')" + ) diff --git a/packages/google-cloud-bigquery/tests/unit/routine/test_routine_reference.py b/packages/google-cloud-bigquery/tests/unit/routine/test_routine_reference.py new file mode 100644 index 000000000000..9d3d551a6294 --- /dev/null +++ b/packages/google-cloud-bigquery/tests/unit/routine/test_routine_reference.py @@ -0,0 +1,138 @@ +# -*- coding: utf-8 -*- +# +# Copyright 2019 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import pytest + + +@pytest.fixture +def target_class(): + from google.cloud.bigquery.routine import RoutineReference + + return RoutineReference + + +def test_from_api_repr(target_class): + resource = { + "projectId": "my-project", + "datasetId": "my_dataset", + "routineId": "my_routine", + } + got = target_class.from_api_repr(resource) + assert got.project == "my-project" + assert got.dataset_id == "my_dataset" + assert got.routine_id == "my_routine" + assert got.path == "/projects/my-project/datasets/my_dataset/routines/my_routine" + + +def test_from_api_repr_w_unknown_fields(target_class): + resource = { + "projectId": "my-project", + "datasetId": "my_dataset", + "routineId": "my_routine", + "thisFieldIsNotInTheProto": "just ignore me", + } + got = target_class.from_api_repr(resource) + assert got.project == "my-project" + assert got.dataset_id == "my_dataset" + assert got.routine_id == "my_routine" + assert got._properties is resource + + +def test_to_api_repr(target_class): + ref = target_class.from_string("my-project.my_dataset.my_routine") + got = ref.to_api_repr() + assert got == { + "projectId": "my-project", + "datasetId": "my_dataset", + "routineId": "my_routine", + } + + +def test_from_string(target_class): + got = target_class.from_string("string-project.string_dataset.string_routine") + assert got.project == "string-project" + assert got.dataset_id == "string_dataset" + assert got.routine_id == "string_routine" + assert got.path == ( + "/projects/string-project/datasets/string_dataset/routines/string_routine" + ) + + +def test_from_string_legacy_string(target_class): + with pytest.raises(ValueError): + target_class.from_string("string-project:string_dataset.string_routine") + + +def test_from_string_not_fully_qualified(target_class): + with pytest.raises(ValueError): + target_class.from_string("string_routine") + + with pytest.raises(ValueError): + target_class.from_string("string_dataset.string_routine") + + with pytest.raises(ValueError): + target_class.from_string("a.b.c.d") + + +def test_from_string_with_default_project(target_class): + got = target_class.from_string( + "string_dataset.string_routine", default_project="default-project" + ) + assert got.project == "default-project" + assert got.dataset_id == "string_dataset" + assert got.routine_id == "string_routine" + + +def test_from_string_ignores_default_project(target_class): + got = target_class.from_string( + "string-project.string_dataset.string_routine", + default_project="default-project", + ) + assert got.project == "string-project" + assert got.dataset_id == "string_dataset" + assert got.routine_id == "string_routine" + + +def test_eq(target_class): + routine = target_class.from_string("my-proj.my_dset.my_routine") + routine_too = target_class.from_string("my-proj.my_dset.my_routine") + assert routine == routine_too + assert not (routine != routine_too) + + other_routine = target_class.from_string("my-proj.my_dset.my_routine2") + assert not (routine == other_routine) + assert routine != other_routine + + notaroutine = object() + assert not (routine == notaroutine) + assert routine != notaroutine + + +def test_hash(target_class): + routine = target_class.from_string("my-proj.my_dset.my_routine") + routine2 = target_class.from_string("my-proj.my_dset.routine2") + got = {routine: "hello", routine2: "world"} + assert got[routine] == "hello" + assert got[routine2] == "world" + + routine_too = target_class.from_string("my-proj.my_dset.my_routine") + assert got[routine_too] == "hello" + + +def test_repr(target_class): + routine = target_class.from_string("my-proj.my_dset.my_routine") + got = repr(routine) + assert got == "RoutineReference.from_string('my-proj.my_dset.my_routine')" diff --git a/packages/google-cloud-bigquery/tests/unit/test_client.py b/packages/google-cloud-bigquery/tests/unit/test_client.py index ea4aad534a13..8ad9dc8858c6 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_client.py +++ b/packages/google-cloud-bigquery/tests/unit/test_client.py @@ -41,8 +41,9 @@ import google.api_core.exceptions from google.api_core.gapic_v1 import client_info import google.cloud._helpers -from tests.unit.helpers import make_connection +from google.cloud import bigquery_v2 from google.cloud.bigquery.dataset import DatasetReference +from tests.unit.helpers import make_connection def _make_credentials(): @@ -866,6 +867,98 @@ def test_create_dataset_alreadyexists_w_exists_ok_true(self): ] ) + def test_create_routine_w_minimal_resource(self): + from google.cloud.bigquery.routine import Routine + from google.cloud.bigquery.routine import RoutineReference + + creds = _make_credentials() + resource = { + "routineReference": { + "projectId": "test-routine-project", + "datasetId": "test_routines", + "routineId": "minimal_routine", + } + } + client = self._make_one(project=self.PROJECT, credentials=creds) + conn = client._connection = make_connection(resource) + full_routine_id = "test-routine-project.test_routines.minimal_routine" + routine = Routine(full_routine_id) + + actual_routine = client.create_routine(routine) + + conn.api_request.assert_called_once_with( + method="POST", + path="/projects/test-routine-project/datasets/test_routines/routines", + data=resource, + ) + self.assertEqual( + actual_routine.reference, RoutineReference.from_string(full_routine_id) + ) + + def test_create_routine_w_conflict(self): + from google.cloud.bigquery.routine import Routine + + creds = _make_credentials() + client = self._make_one(project=self.PROJECT, credentials=creds) + conn = client._connection = make_connection( + google.api_core.exceptions.AlreadyExists("routine already exists") + ) + full_routine_id = "test-routine-project.test_routines.minimal_routine" + routine = Routine(full_routine_id) + + with pytest.raises(google.api_core.exceptions.AlreadyExists): + client.create_routine(routine) + + resource = { + "routineReference": { + "projectId": "test-routine-project", + "datasetId": "test_routines", + "routineId": "minimal_routine", + } + } + conn.api_request.assert_called_once_with( + method="POST", + path="/projects/test-routine-project/datasets/test_routines/routines", + data=resource, + ) + + def test_create_routine_w_conflict_exists_ok(self): + from google.cloud.bigquery.routine import Routine + + creds = _make_credentials() + client = self._make_one(project=self.PROJECT, credentials=creds) + resource = { + "routineReference": { + "projectId": "test-routine-project", + "datasetId": "test_routines", + "routineId": "minimal_routine", + } + } + conn = client._connection = make_connection( + google.api_core.exceptions.AlreadyExists("routine already exists"), resource + ) + full_routine_id = "test-routine-project.test_routines.minimal_routine" + routine = Routine(full_routine_id) + + actual_routine = client.create_routine(routine, exists_ok=True) + + self.assertEqual(actual_routine.project, "test-routine-project") + self.assertEqual(actual_routine.dataset_id, "test_routines") + self.assertEqual(actual_routine.routine_id, "minimal_routine") + conn.api_request.assert_has_calls( + [ + mock.call( + method="POST", + path="/projects/test-routine-project/datasets/test_routines/routines", + data=resource, + ), + mock.call( + method="GET", + path="/projects/test-routine-project/datasets/test_routines/routines/minimal_routine", + ), + ] + ) + def test_create_table_w_day_partition(self): from google.cloud.bigquery.table import Table from google.cloud.bigquery.table import TimePartitioning @@ -1298,6 +1391,52 @@ def test_get_model_w_string(self): conn.api_request.assert_called_once_with(method="GET", path="/%s" % path) self.assertEqual(got.model_id, self.MODEL_ID) + def test_get_routine(self): + from google.cloud.bigquery.routine import Routine + from google.cloud.bigquery.routine import RoutineReference + + full_routine_id = "test-routine-project.test_routines.minimal_routine" + routines = [ + full_routine_id, + Routine(full_routine_id), + RoutineReference.from_string(full_routine_id), + ] + for routine in routines: + creds = _make_credentials() + resource = { + "etag": "im-an-etag", + "routineReference": { + "projectId": "test-routine-project", + "datasetId": "test_routines", + "routineId": "minimal_routine", + }, + "routineType": "SCALAR_FUNCTION", + } + client = self._make_one(project=self.PROJECT, credentials=creds) + conn = client._connection = make_connection(resource) + + actual_routine = client.get_routine(routine) + + conn.api_request.assert_called_once_with( + method="GET", + path="/projects/test-routine-project/datasets/test_routines/routines/minimal_routine", + ) + self.assertEqual( + actual_routine.reference, + RoutineReference.from_string(full_routine_id), + msg="routine={}".format(repr(routine)), + ) + self.assertEqual( + actual_routine.etag, + "im-an-etag", + msg="routine={}".format(repr(routine)), + ) + self.assertEqual( + actual_routine.type_, + "SCALAR_FUNCTION", + msg="routine={}".format(repr(routine)), + ) + def test_get_table(self): path = "projects/%s/datasets/%s/tables/%s" % ( self.PROJECT, @@ -1499,6 +1638,66 @@ def test_update_model(self): req = conn.api_request.call_args self.assertEqual(req[1]["headers"]["If-Match"], "etag") + def test_update_routine(self): + from google.cloud.bigquery.routine import Routine + from google.cloud.bigquery.routine import RoutineArgument + + full_routine_id = "routines-project.test_routines.updated_routine" + resource = { + "routineReference": { + "projectId": "routines-project", + "datasetId": "test_routines", + "routineId": "updated_routine", + }, + "routineType": "SCALAR_FUNCTION", + "language": "SQL", + "definitionBody": "x * 3", + "arguments": [{"name": "x", "dataType": {"typeKind": "INT64"}}], + "returnType": None, + "someNewField": "someValue", + } + creds = _make_credentials() + client = self._make_one(project=self.PROJECT, credentials=creds) + conn = client._connection = make_connection(resource, resource) + routine = Routine(full_routine_id) + routine.arguments = [ + RoutineArgument( + name="x", + data_type=bigquery_v2.types.StandardSqlDataType( + type_kind=bigquery_v2.enums.StandardSqlDataType.TypeKind.INT64 + ), + ) + ] + routine.body = "x * 3" + routine.language = "SQL" + routine.type_ = "SCALAR_FUNCTION" + routine._properties["someNewField"] = "someValue" + + actual_routine = client.update_routine( + routine, + ["arguments", "language", "body", "type_", "return_type", "someNewField"], + ) + + # TODO: routineReference isn't needed when the Routines API supports + # partial updates. + sent = resource + conn.api_request.assert_called_once_with( + method="PUT", + data=sent, + path="/projects/routines-project/datasets/test_routines/routines/updated_routine", + headers=None, + ) + self.assertEqual(actual_routine.arguments, routine.arguments) + self.assertEqual(actual_routine.body, routine.body) + self.assertEqual(actual_routine.language, routine.language) + self.assertEqual(actual_routine.type_, routine.type_) + + # ETag becomes If-Match header. + routine._properties["etag"] = "im-an-etag" + client.update_routine(routine, []) + req = conn.api_request.call_args + self.assertEqual(req[1]["headers"]["If-Match"], "im-an-etag") + def test_update_table(self): from google.cloud.bigquery.table import Table, SchemaField @@ -1877,6 +2076,82 @@ def test_list_models_wrong_type(self): with self.assertRaises(TypeError): client.list_models(client.dataset(self.DS_ID).model("foo")) + def test_list_routines_empty(self): + creds = _make_credentials() + client = self._make_one(project=self.PROJECT, credentials=creds) + conn = client._connection = make_connection({}) + + iterator = client.list_routines("test-routines.test_routines") + page = six.next(iterator.pages) + routines = list(page) + token = iterator.next_page_token + + self.assertEqual(routines, []) + self.assertIsNone(token) + conn.api_request.assert_called_once_with( + method="GET", + path="/projects/test-routines/datasets/test_routines/routines", + query_params={}, + ) + + def test_list_routines_defaults(self): + from google.cloud.bigquery.routine import Routine + + project_id = "test-routines" + dataset_id = "test_routines" + path = "/projects/test-routines/datasets/test_routines/routines" + routine_1 = "routine_one" + routine_2 = "routine_two" + token = "TOKEN" + resource = { + "nextPageToken": token, + "routines": [ + { + "routineReference": { + "routineId": routine_1, + "datasetId": dataset_id, + "projectId": project_id, + } + }, + { + "routineReference": { + "routineId": routine_2, + "datasetId": dataset_id, + "projectId": project_id, + } + }, + ], + } + + creds = _make_credentials() + client = self._make_one(project=project_id, credentials=creds) + conn = client._connection = make_connection(resource) + dataset = client.dataset(dataset_id) + + iterator = client.list_routines(dataset) + self.assertIs(iterator.dataset, dataset) + page = six.next(iterator.pages) + routines = list(page) + actual_token = iterator.next_page_token + + self.assertEqual(len(routines), len(resource["routines"])) + for found, expected in zip(routines, resource["routines"]): + self.assertIsInstance(found, Routine) + self.assertEqual( + found.routine_id, expected["routineReference"]["routineId"] + ) + self.assertEqual(actual_token, token) + + conn.api_request.assert_called_once_with( + method="GET", path=path, query_params={} + ) + + def test_list_routines_wrong_type(self): + creds = _make_credentials() + client = self._make_one(project=self.PROJECT, credentials=creds) + with self.assertRaises(TypeError): + client.list_routines(client.dataset(self.DS_ID).table("foo")) + def test_list_tables_defaults(self): from google.cloud.bigquery.table import TableListItem @@ -2126,6 +2401,67 @@ def test_delete_model_w_not_found_ok_true(self): conn.api_request.assert_called_with(method="DELETE", path=path) + def test_delete_routine(self): + from google.cloud.bigquery.routine import Routine + from google.cloud.bigquery.routine import RoutineReference + + full_routine_id = "test-routine-project.test_routines.minimal_routine" + routines = [ + full_routine_id, + Routine(full_routine_id), + RoutineReference.from_string(full_routine_id), + ] + creds = _make_credentials() + http = object() + client = self._make_one(project=self.PROJECT, credentials=creds, _http=http) + conn = client._connection = make_connection(*([{}] * len(routines))) + + for routine in routines: + client.delete_routine(routine) + conn.api_request.assert_called_with( + method="DELETE", + path="/projects/test-routine-project/datasets/test_routines/routines/minimal_routine", + ) + + def test_delete_routine_w_wrong_type(self): + creds = _make_credentials() + client = self._make_one(project=self.PROJECT, credentials=creds) + with self.assertRaises(TypeError): + client.delete_routine(client.dataset(self.DS_ID)) + + def test_delete_routine_w_not_found_ok_false(self): + creds = _make_credentials() + http = object() + client = self._make_one(project=self.PROJECT, credentials=creds, _http=http) + conn = client._connection = make_connection( + google.api_core.exceptions.NotFound("routine not found") + ) + + with self.assertRaises(google.api_core.exceptions.NotFound): + client.delete_routine("routines-project.test_routines.test_routine") + + conn.api_request.assert_called_with( + method="DELETE", + path="/projects/routines-project/datasets/test_routines/routines/test_routine", + ) + + def test_delete_routine_w_not_found_ok_true(self): + creds = _make_credentials() + http = object() + client = self._make_one(project=self.PROJECT, credentials=creds, _http=http) + conn = client._connection = make_connection( + google.api_core.exceptions.NotFound("routine not found") + ) + + client.delete_routine( + "routines-project.test_routines.test_routine", not_found_ok=True + ) + + conn.api_request.assert_called_with( + method="DELETE", + path="/projects/routines-project/datasets/test_routines/routines/test_routine", + ) + def test_delete_table(self): from google.cloud.bigquery.table import Table diff --git a/packages/google-cloud-bigquery/tests/unit/test_dataset.py b/packages/google-cloud-bigquery/tests/unit/test_dataset.py index 96a2ace7da0c..b8805a9c7ce3 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_dataset.py +++ b/packages/google-cloud-bigquery/tests/unit/test_dataset.py @@ -158,6 +158,13 @@ def test_model(self): self.assertEqual(model_ref.dataset_id, "dataset_1") self.assertEqual(model_ref.model_id, "model_1") + def test_routine(self): + dataset_ref = self._make_one("some-project-1", "dataset_1") + routine_ref = dataset_ref.routine("routine_1") + self.assertEqual(routine_ref.project, "some-project-1") + self.assertEqual(routine_ref.dataset_id, "dataset_1") + self.assertEqual(routine_ref.routine_id, "routine_1") + def test_to_api_repr(self): dataset = self._make_one("project_1", "dataset_1") diff --git a/packages/google-cloud-bigquery/tests/unit/test_job.py b/packages/google-cloud-bigquery/tests/unit/test_job.py index abb2a2c4ec1e..3561fb857647 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_job.py +++ b/packages/google-cloud-bigquery/tests/unit/test_job.py @@ -3845,6 +3845,30 @@ def test_ddl_operation_performed(self): query_stats["ddlOperationPerformed"] = op self.assertEqual(job.ddl_operation_performed, op) + def test_ddl_target_routine(self): + from google.cloud.bigquery.routine import RoutineReference + + ref_routine = { + "projectId": self.PROJECT, + "datasetId": "ddl_ds", + "routineId": "targetroutine", + } + client = _make_client(project=self.PROJECT) + job = self._make_one(self.JOB_ID, self.QUERY, client) + self.assertIsNone(job.ddl_target_routine) + + statistics = job._properties["statistics"] = {} + self.assertIsNone(job.ddl_target_routine) + + query_stats = statistics["query"] = {} + self.assertIsNone(job.ddl_target_routine) + + query_stats["ddlTargetRoutine"] = ref_routine + self.assertIsInstance(job.ddl_target_routine, RoutineReference) + self.assertEqual(job.ddl_target_routine.routine_id, "targetroutine") + self.assertEqual(job.ddl_target_routine.dataset_id, "ddl_ds") + self.assertEqual(job.ddl_target_routine.project, self.PROJECT) + def test_ddl_target_table(self): from google.cloud.bigquery.table import TableReference From bdd212af314d14a6e4e6eb48f89e42b34ed4fe7e Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Mon, 1 Jul 2019 16:37:07 -0700 Subject: [PATCH 0611/2016] BigQuery: Fix unit tests for new versions of fastparquet and pytest (#8553) * Exclude fastparquet. * Use exeception from pytest.raises context instead of context manager itself. --- packages/google-cloud-bigquery/setup.py | 5 +++++ .../tests/unit/test__pandas_helpers.py | 8 ++++---- packages/google-cloud-bigquery/tests/unit/test_magics.py | 4 ++-- packages/google-cloud-bigquery/tests/unit/test_table.py | 8 ++++---- 4 files changed, 15 insertions(+), 10 deletions(-) diff --git a/packages/google-cloud-bigquery/setup.py b/packages/google-cloud-bigquery/setup.py index 8592a232ecb3..afe1cf15ca79 100644 --- a/packages/google-cloud-bigquery/setup.py +++ b/packages/google-cloud-bigquery/setup.py @@ -51,6 +51,11 @@ all_extras = [] for extra in extras: + if extra == "fastparquet": + # Skip fastparquet from "all" because it is redundant with pyarrow and + # creates a dependency on pre-release versions of numpy. See: + # https://github.com/googleapis/google-cloud-python/issues/8549 + continue all_extras.extend(extras[extra]) extras["all"] = all_extras diff --git a/packages/google-cloud-bigquery/tests/unit/test__pandas_helpers.py b/packages/google-cloud-bigquery/tests/unit/test__pandas_helpers.py index 1c95aef0cec9..10189a6d3f2f 100644 --- a/packages/google-cloud-bigquery/tests/unit/test__pandas_helpers.py +++ b/packages/google-cloud-bigquery/tests/unit/test__pandas_helpers.py @@ -533,16 +533,16 @@ def test_dataframe_to_arrow_w_unknown_type(module_under_test): @pytest.mark.skipIf(pandas is None, "Requires `pandas`") def test_dataframe_to_parquet_without_pyarrow(module_under_test, monkeypatch): monkeypatch.setattr(module_under_test, "pyarrow", None) - with pytest.raises(ValueError) as exc: + with pytest.raises(ValueError) as exc_context: module_under_test.dataframe_to_parquet(pandas.DataFrame(), (), None) - assert "pyarrow is required" in str(exc) + assert "pyarrow is required" in str(exc_context.value) @pytest.mark.skipIf(pandas is None, "Requires `pandas`") @pytest.mark.skipIf(pyarrow is None, "Requires `pyarrow`") def test_dataframe_to_parquet_w_missing_columns(module_under_test, monkeypatch): - with pytest.raises(ValueError) as exc: + with pytest.raises(ValueError) as exc_context: module_under_test.dataframe_to_parquet( pandas.DataFrame(), (schema.SchemaField("not_found", "STRING"),), None ) - assert "columns in schema must match" in str(exc) + assert "columns in schema must match" in str(exc_context.value) diff --git a/packages/google-cloud-bigquery/tests/unit/test_magics.py b/packages/google-cloud-bigquery/tests/unit/test_magics.py index f3e64a46faca..ab5b14174a7a 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_magics.py +++ b/packages/google-cloud-bigquery/tests/unit/test_magics.py @@ -273,10 +273,10 @@ def test__make_bqstorage_client_true_raises_import_error(monkeypatch): google.auth.credentials.Credentials, instance=True ) - with pytest.raises(ImportError) as exc: + with pytest.raises(ImportError) as exc_context: magics._make_bqstorage_client(True, credentials_mock) - assert "google-cloud-bigquery-storage" in str(exc) + assert "google-cloud-bigquery-storage" in str(exc_context.value) @pytest.mark.usefixtures("ipython_interactive") diff --git a/packages/google-cloud-bigquery/tests/unit/test_table.py b/packages/google-cloud-bigquery/tests/unit/test_table.py index a0ded16173d4..72d6cf401c69 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_table.py +++ b/packages/google-cloud-bigquery/tests/unit/test_table.py @@ -2226,9 +2226,9 @@ def test_to_dataframe_w_bqstorage_raises_import_error(self): with mock.patch.object(mut, "bigquery_storage_v1beta1", None), pytest.raises( ValueError - ) as exc: + ) as exc_context: row_iterator.to_dataframe(bqstorage_client=bqstorage_client) - assert mut._NO_BQSTORAGE_ERROR in str(exc) + assert mut._NO_BQSTORAGE_ERROR in str(exc_context.value) @unittest.skipIf( bigquery_storage_v1beta1 is None, "Requires `google-cloud-bigquery-storage`" @@ -2514,6 +2514,6 @@ def test_table_reference_to_bqstorage_raises_import_error(): for cls in classes: with mock.patch.object(mut, "bigquery_storage_v1beta1", None), pytest.raises( ValueError - ) as exc: + ) as exc_context: cls.from_string("my-project.my_dataset.my_table").to_bqstorage() - assert mut._NO_BQSTORAGE_ERROR in str(exc) + assert mut._NO_BQSTORAGE_ERROR in str(exc_context.value) From 1709c623cb21079697ca5f669604e3e6d5df4a66 Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Tue, 2 Jul 2019 09:43:21 -0700 Subject: [PATCH 0612/2016] Release 1.16.0 (#8548) --- packages/google-cloud-bigquery/CHANGELOG.md | 19 +++++++++++++++++++ packages/google-cloud-bigquery/setup.py | 2 +- 2 files changed, 20 insertions(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/CHANGELOG.md b/packages/google-cloud-bigquery/CHANGELOG.md index c462c40a02d8..2f2bb2688ff9 100644 --- a/packages/google-cloud-bigquery/CHANGELOG.md +++ b/packages/google-cloud-bigquery/CHANGELOG.md @@ -4,6 +4,25 @@ [1]: https://pypi.org/project/google-cloud-bigquery/#history +## 1.16.0 + +07-01-2019 10:22 PDT + +### New Features + +- Add Routines API. ([#8491](https://github.com/googleapis/google-cloud-python/pull/8491)) +- Add more stats to Models API, such as `optimization_strategy` (via synth). ([#8344](https://github.com/googleapis/google-cloud-python/pull/8344)) + +### Documentation + +- Add docs job to publish to googleapis.dev. ([#8464](https://github.com/googleapis/google-cloud-python/pull/8464)) +- Add sample demonstrating how to create a job. ([#8422](https://github.com/googleapis/google-cloud-python/pull/8422)) +- Use autodetected location in code samples. ([#8340](https://github.com/googleapis/google-cloud-python/pull/8340), [#8341](https://github.com/googleapis/google-cloud-python/pull/8341)) + +### Internal / Testing Changes + +- Refactor `to_dataframe` to deterministicly update progress bar. ([#8303](https://github.com/googleapis/google-cloud-python/pull/8303)) + ## 1.15.0 06-14-2019 10:10 PDT diff --git a/packages/google-cloud-bigquery/setup.py b/packages/google-cloud-bigquery/setup.py index afe1cf15ca79..9bd4445637e8 100644 --- a/packages/google-cloud-bigquery/setup.py +++ b/packages/google-cloud-bigquery/setup.py @@ -22,7 +22,7 @@ name = "google-cloud-bigquery" description = "Google BigQuery API client library" -version = "1.15.0" +version = "1.16.0" # Should be one of: # 'Development Status :: 3 - Alpha' # 'Development Status :: 4 - Beta' From 341f4523c809f52b74d031ed6ac1352fced4b5ed Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Tue, 9 Jul 2019 18:31:41 -0700 Subject: [PATCH 0613/2016] Add `to_arrow` to get a `pyarrow.Table` from query results. (#8609) * Add `to_arrow` to get a `pyarrow.Table` from query results. An Arrow `Table` supports a richer set of types than a pandas `DataFrame`, and is the basis of many data analysis systems. It can be used in conjunction with pandas through the `Table.to_pandas()` method or the pandas extension types provided by the `fletcher` package. * Exclude pyarrow 0.14.0 due to bad manylinux wheels. --- .../google/cloud/bigquery/_helpers.py | 15 +- .../google/cloud/bigquery/_pandas_helpers.py | 60 +++- .../google/cloud/bigquery/job.py | 38 +++ .../google/cloud/bigquery/table.py | 113 +++++++ .../samples/query_to_arrow.py | 58 ++++ .../samples/tests/test_query_to_arrow.py | 29 ++ packages/google-cloud-bigquery/setup.py | 4 +- .../tests/unit/test__pandas_helpers.py | 66 +++- .../tests/unit/test_job.py | 101 +++++- .../tests/unit/test_table.py | 302 +++++++++++++++++- 10 files changed, 753 insertions(+), 33 deletions(-) create mode 100644 packages/google-cloud-bigquery/samples/query_to_arrow.py create mode 100644 packages/google-cloud-bigquery/samples/tests/test_query_to_arrow.py diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py b/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py index 90b1f14016b7..bb3998732a5a 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py @@ -197,6 +197,14 @@ def _field_to_index_mapping(schema): return {f.name: i for i, f in enumerate(schema)} +def _field_from_json(resource, field): + converter = _CELLDATA_FROM_JSON.get(field.field_type, lambda value, _: value) + if field.mode == "REPEATED": + return [converter(item["v"], field) for item in resource] + else: + return converter(resource, field) + + def _row_tuple_from_json(row, schema): """Convert JSON row data to row with appropriate types. @@ -214,12 +222,7 @@ def _row_tuple_from_json(row, schema): """ row_data = [] for field, cell in zip(schema, row["f"]): - converter = _CELLDATA_FROM_JSON[field.field_type] - if field.mode == "REPEATED": - row_data.append([converter(item["v"], field) for item in cell["v"]]) - else: - row_data.append(converter(cell["v"], field)) - + row_data.append(_field_from_json(cell["v"], field)) return tuple(row_data) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/_pandas_helpers.py b/packages/google-cloud-bigquery/google/cloud/bigquery/_pandas_helpers.py index 5261c2b99efd..5a3a9833b572 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/_pandas_helpers.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/_pandas_helpers.py @@ -14,7 +14,6 @@ """Shared helper functions for connecting BigQuery and pandas.""" -import collections import concurrent.futures import warnings @@ -115,7 +114,7 @@ def bq_to_arrow_data_type(field): """ if field.mode is not None and field.mode.upper() == "REPEATED": inner_type = bq_to_arrow_data_type( - schema.SchemaField(field.name, field.field_type) + schema.SchemaField(field.name, field.field_type, fields=field.fields) ) if inner_type: return pyarrow.list_(inner_type) @@ -144,6 +143,21 @@ def bq_to_arrow_field(bq_field): return None +def bq_to_arrow_schema(bq_schema): + """Return the Arrow schema, corresponding to a given BigQuery schema. + + Returns None if any Arrow type cannot be determined. + """ + arrow_fields = [] + for bq_field in bq_schema: + arrow_field = bq_to_arrow_field(bq_field) + if arrow_field is None: + # Auto-detect the schema if there is an unknown field type. + return None + arrow_fields.append(arrow_field) + return pyarrow.schema(arrow_fields) + + def bq_to_arrow_array(series, bq_field): arrow_type = bq_to_arrow_data_type(bq_field) if bq_field.mode.upper() == "REPEATED": @@ -210,13 +224,41 @@ def dataframe_to_parquet(dataframe, bq_schema, filepath): pyarrow.parquet.write_table(arrow_table, filepath) +def _tabledata_list_page_to_arrow(page, column_names, arrow_types): + # Iterate over the page to force the API request to get the page data. + try: + next(iter(page)) + except StopIteration: + pass + + arrays = [] + for column_index, arrow_type in enumerate(arrow_types): + arrays.append(pyarrow.array(page._columns[column_index], type=arrow_type)) + + return pyarrow.RecordBatch.from_arrays(arrays, column_names) + + +def download_arrow_tabledata_list(pages, schema): + """Use tabledata.list to construct an iterable of RecordBatches.""" + column_names = bq_to_arrow_schema(schema) or [field.name for field in schema] + arrow_types = [bq_to_arrow_data_type(field) for field in schema] + + for page in pages: + yield _tabledata_list_page_to_arrow(page, column_names, arrow_types) + + def _tabledata_list_page_to_dataframe(page, column_names, dtypes): - columns = collections.defaultdict(list) - for row in page: - for column in column_names: - columns[column].append(row[column]) - for column in dtypes: - columns[column] = pandas.Series(columns[column], dtype=dtypes[column]) + # Iterate over the page to force the API request to get the page data. + try: + next(iter(page)) + except StopIteration: + pass + + columns = {} + for column_index, column_name in enumerate(column_names): + dtype = dtypes.get(column_name) + columns[column_name] = pandas.Series(page._columns[column_index], dtype=dtype) + return pandas.DataFrame(columns, columns=column_names) @@ -350,7 +392,7 @@ def download_dataframe_bqstorage( continue # Return any remaining values after the workers finished. - while not worker_queue.empty(): + while not worker_queue.empty(): # pragma: NO COVER try: # Include a timeout because even though the queue is # non-empty, it doesn't guarantee that a subsequent call to diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/job.py b/packages/google-cloud-bigquery/google/cloud/bigquery/job.py index 87dab59e339b..442420a7191b 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/job.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/job.py @@ -2896,6 +2896,44 @@ def result(self, timeout=None, page_size=None, retry=DEFAULT_RETRY): rows._preserve_order = _contains_order_by(self.query) return rows + def to_arrow(self, progress_bar_type=None): + """[Beta] Create a class:`pyarrow.Table` by loading all pages of a + table or query. + + Args: + progress_bar_type (Optional[str]): + If set, use the `tqdm `_ library to + display a progress bar while the data downloads. Install the + ``tqdm`` package to use this feature. + + Possible values of ``progress_bar_type`` include: + + ``None`` + No progress bar. + ``'tqdm'`` + Use the :func:`tqdm.tqdm` function to print a progress bar + to :data:`sys.stderr`. + ``'tqdm_notebook'`` + Use the :func:`tqdm.tqdm_notebook` function to display a + progress bar as a Jupyter notebook widget. + ``'tqdm_gui'`` + Use the :func:`tqdm.tqdm_gui` function to display a + progress bar as a graphical dialog box. + + Returns: + pyarrow.Table + A :class:`pyarrow.Table` populated with row data and column + headers from the query results. The column headers are derived + from the destination table's schema. + + Raises: + ValueError: + If the :mod:`pyarrow` library cannot be imported. + + ..versionadded:: 1.17.0 + """ + return self.result().to_arrow(progress_bar_type=progress_bar_type) + def to_dataframe(self, bqstorage_client=None, dtypes=None, progress_bar_type=None): """Return a pandas DataFrame from a QueryJob diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py index 7af3bc6f48b4..8aa7788acdfa 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py @@ -33,6 +33,11 @@ except ImportError: # pragma: NO COVER pandas = None +try: + import pyarrow +except ImportError: # pragma: NO COVER + pyarrow = None + try: import tqdm except ImportError: # pragma: NO COVER @@ -58,6 +63,10 @@ "The pandas library is not installed, please install " "pandas to use the to_dataframe() function." ) +_NO_PYARROW_ERROR = ( + "The pyarrow library is not installed, please install " + "pandas to use the to_arrow() function." +) _NO_TQDM_ERROR = ( "A progress bar was requested, but there was an error loading the tqdm " "library. Please install tqdm to use the progress bar functionality." @@ -1394,6 +1403,72 @@ def _get_progress_bar(self, progress_bar_type): warnings.warn(_NO_TQDM_ERROR, UserWarning, stacklevel=3) return None + def _to_arrow_iterable(self): + """Create an iterable of arrow RecordBatches, to process the table as a stream.""" + for record_batch in _pandas_helpers.download_arrow_tabledata_list( + iter(self.pages), self.schema + ): + yield record_batch + + def to_arrow(self, progress_bar_type=None): + """[Beta] Create a class:`pyarrow.Table` by loading all pages of a + table or query. + + Args: + progress_bar_type (Optional[str]): + If set, use the `tqdm `_ library to + display a progress bar while the data downloads. Install the + ``tqdm`` package to use this feature. + + Possible values of ``progress_bar_type`` include: + + ``None`` + No progress bar. + ``'tqdm'`` + Use the :func:`tqdm.tqdm` function to print a progress bar + to :data:`sys.stderr`. + ``'tqdm_notebook'`` + Use the :func:`tqdm.tqdm_notebook` function to display a + progress bar as a Jupyter notebook widget. + ``'tqdm_gui'`` + Use the :func:`tqdm.tqdm_gui` function to display a + progress bar as a graphical dialog box. + + Returns: + pyarrow.Table + A :class:`pyarrow.Table` populated with row data and column + headers from the query results. The column headers are derived + from the destination table's schema. + + Raises: + ValueError: + If the :mod:`pyarrow` library cannot be imported. + + ..versionadded:: 1.17.0 + """ + if pyarrow is None: + raise ValueError(_NO_PYARROW_ERROR) + + progress_bar = self._get_progress_bar(progress_bar_type) + + record_batches = [] + for record_batch in self._to_arrow_iterable(): + record_batches.append(record_batch) + + if progress_bar is not None: + # In some cases, the number of total rows is not populated + # until the first page of rows is fetched. Update the + # progress bar's total to keep an accurate count. + progress_bar.total = progress_bar.total or self.total_rows + progress_bar.update(record_batch.num_rows) + + if progress_bar is not None: + # Indicate that the download has finished. + progress_bar.close() + + arrow_schema = _pandas_helpers.bq_to_arrow_schema(self._schema) + return pyarrow.Table.from_batches(record_batches, schema=arrow_schema) + def _to_dataframe_iterable(self, bqstorage_client=None, dtypes=None): """Create an iterable of pandas DataFrames, to process the table as a stream. @@ -1538,6 +1613,21 @@ class _EmptyRowIterator(object): pages = () total_rows = 0 + def to_arrow(self, progress_bar_type=None): + """[Beta] Create an empty class:`pyarrow.Table`. + + Args: + progress_bar_type (Optional[str]): + Ignored. Added for compatibility with RowIterator. + + Returns: + pyarrow.Table: + An empty :class:`pyarrow.Table`. + """ + if pyarrow is None: + raise ValueError(_NO_PYARROW_ERROR) + return pyarrow.Table.from_arrays(()) + def to_dataframe(self, bqstorage_client=None, dtypes=None, progress_bar_type=None): """Create an empty dataframe. @@ -1734,6 +1824,25 @@ def _item_to_row(iterator, resource): ) +def _tabledata_list_page_columns(schema, response): + """Make a generator of all the columns in a page from tabledata.list. + + This enables creating a :class:`pandas.DataFrame` and other + column-oriented data structures such as :class:`pyarrow.RecordBatch` + """ + columns = [] + rows = response.get("rows", []) + + def get_column_data(field_index, field): + for row in rows: + yield _helpers._field_from_json(row["f"][field_index]["v"], field) + + for field_index, field in enumerate(schema): + columns.append(get_column_data(field_index, field)) + + return columns + + # pylint: disable=unused-argument def _rows_page_start(iterator, page, response): """Grab total rows when :class:`~google.cloud.iterator.Page` starts. @@ -1747,6 +1856,10 @@ def _rows_page_start(iterator, page, response): :type response: dict :param response: The JSON API response for a page of rows in a table. """ + # Make a (lazy) copy of the page in column-oriented format for use in data + # science packages. + page._columns = _tabledata_list_page_columns(iterator._schema, response) + total_rows = response.get("totalRows") if total_rows is not None: total_rows = int(total_rows) diff --git a/packages/google-cloud-bigquery/samples/query_to_arrow.py b/packages/google-cloud-bigquery/samples/query_to_arrow.py new file mode 100644 index 000000000000..e3ddc23f889a --- /dev/null +++ b/packages/google-cloud-bigquery/samples/query_to_arrow.py @@ -0,0 +1,58 @@ +# Copyright 2019 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +def main(client): + # [START bigquery_query_to_arrow] + # TODO(developer): Import the client library. + # from google.cloud import bigquery + + # TODO(developer): Construct a BigQuery client object. + # client = bigquery.Client() + + sql = """ + WITH races AS ( + SELECT "800M" AS race, + [STRUCT("Rudisha" as name, [23.4, 26.3, 26.4, 26.1] as splits), + STRUCT("Makhloufi" as name, [24.5, 25.4, 26.6, 26.1] as splits), + STRUCT("Murphy" as name, [23.9, 26.0, 27.0, 26.0] as splits), + STRUCT("Bosse" as name, [23.6, 26.2, 26.5, 27.1] as splits), + STRUCT("Rotich" as name, [24.7, 25.6, 26.9, 26.4] as splits), + STRUCT("Lewandowski" as name, [25.0, 25.7, 26.3, 27.2] as splits), + STRUCT("Kipketer" as name, [23.2, 26.1, 27.3, 29.4] as splits), + STRUCT("Berian" as name, [23.7, 26.1, 27.0, 29.3] as splits)] + AS participants) + SELECT + race, + participant + FROM races r + CROSS JOIN UNNEST(r.participants) as participant; + """ + query_job = client.query(sql) + arrow_table = query_job.to_arrow() + + print( + "Downloaded {} rows, {} columns.".format( + arrow_table.num_rows, arrow_table.num_columns + ) + ) + print("\nSchema:\n{}".format(repr(arrow_table.schema))) + # [END bigquery_query_to_arrow] + return arrow_table + + +if __name__ == "__main__": + from google.cloud import bigquery + + main(bigquery.Client()) diff --git a/packages/google-cloud-bigquery/samples/tests/test_query_to_arrow.py b/packages/google-cloud-bigquery/samples/tests/test_query_to_arrow.py new file mode 100644 index 000000000000..9e36bcee346f --- /dev/null +++ b/packages/google-cloud-bigquery/samples/tests/test_query_to_arrow.py @@ -0,0 +1,29 @@ +# Copyright 2019 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import pyarrow + +from .. import query_to_arrow + + +def test_main(capsys, client): + + arrow_table = query_to_arrow.main(client) + out, err = capsys.readouterr() + assert "Downloaded 8 rows, 2 columns." in out + + arrow_schema = arrow_table.schema + assert arrow_schema.names == ["race", "participant"] + assert pyarrow.types.is_string(arrow_schema.types[0]) + assert pyarrow.types.is_struct(arrow_schema.types[1]) diff --git a/packages/google-cloud-bigquery/setup.py b/packages/google-cloud-bigquery/setup.py index 9bd4445637e8..5637c0f4fd53 100644 --- a/packages/google-cloud-bigquery/setup.py +++ b/packages/google-cloud-bigquery/setup.py @@ -42,7 +42,9 @@ "pandas": ["pandas>=0.17.1"], # Exclude PyArrow dependency from Windows Python 2.7. 'pyarrow: platform_system != "Windows" or python_version >= "3.4"': [ - "pyarrow>=0.4.1" + # Bad Linux release for 0.14.0. + # https://issues.apache.org/jira/browse/ARROW-5868 + "pyarrow>=0.4.1, != 0.14.0" ], "tqdm": ["tqdm >= 4.0.0, <5.0.0dev"], "fastparquet": ["fastparquet", "python-snappy"], diff --git a/packages/google-cloud-bigquery/tests/unit/test__pandas_helpers.py b/packages/google-cloud-bigquery/tests/unit/test__pandas_helpers.py index 10189a6d3f2f..62902cd7a71b 100644 --- a/packages/google-cloud-bigquery/tests/unit/test__pandas_helpers.py +++ b/packages/google-cloud-bigquery/tests/unit/test__pandas_helpers.py @@ -293,6 +293,51 @@ def test_bq_to_arrow_data_type_w_struct(module_under_test, bq_type): assert actual.equals(expected) +@pytest.mark.parametrize("bq_type", ["RECORD", "record", "STRUCT", "struct"]) +@pytest.mark.skipIf(pyarrow is None, "Requires `pyarrow`") +def test_bq_to_arrow_data_type_w_array_struct(module_under_test, bq_type): + fields = ( + schema.SchemaField("field01", "STRING"), + schema.SchemaField("field02", "BYTES"), + schema.SchemaField("field03", "INTEGER"), + schema.SchemaField("field04", "INT64"), + schema.SchemaField("field05", "FLOAT"), + schema.SchemaField("field06", "FLOAT64"), + schema.SchemaField("field07", "NUMERIC"), + schema.SchemaField("field08", "BOOLEAN"), + schema.SchemaField("field09", "BOOL"), + schema.SchemaField("field10", "TIMESTAMP"), + schema.SchemaField("field11", "DATE"), + schema.SchemaField("field12", "TIME"), + schema.SchemaField("field13", "DATETIME"), + schema.SchemaField("field14", "GEOGRAPHY"), + ) + field = schema.SchemaField("ignored_name", bq_type, mode="REPEATED", fields=fields) + actual = module_under_test.bq_to_arrow_data_type(field) + expected_value_type = pyarrow.struct( + ( + pyarrow.field("field01", pyarrow.string()), + pyarrow.field("field02", pyarrow.binary()), + pyarrow.field("field03", pyarrow.int64()), + pyarrow.field("field04", pyarrow.int64()), + pyarrow.field("field05", pyarrow.float64()), + pyarrow.field("field06", pyarrow.float64()), + pyarrow.field("field07", module_under_test.pyarrow_numeric()), + pyarrow.field("field08", pyarrow.bool_()), + pyarrow.field("field09", pyarrow.bool_()), + pyarrow.field("field10", module_under_test.pyarrow_timestamp()), + pyarrow.field("field11", pyarrow.date32()), + pyarrow.field("field12", module_under_test.pyarrow_time()), + pyarrow.field("field13", module_under_test.pyarrow_datetime()), + pyarrow.field("field14", pyarrow.string()), + ) + ) + assert pyarrow.types.is_list(actual) + assert pyarrow.types.is_struct(actual.value_type) + assert actual.value_type.num_children == len(fields) + assert actual.value_type.equals(expected_value_type) + + @pytest.mark.skipIf(pyarrow is None, "Requires `pyarrow`") def test_bq_to_arrow_data_type_w_struct_unknown_subfield(module_under_test): fields = ( @@ -303,8 +348,14 @@ def test_bq_to_arrow_data_type_w_struct_unknown_subfield(module_under_test): schema.SchemaField("field3", "UNKNOWN_TYPE"), ) field = schema.SchemaField("ignored_name", "RECORD", mode="NULLABLE", fields=fields) - actual = module_under_test.bq_to_arrow_data_type(field) + + with warnings.catch_warnings(record=True) as warned: + actual = module_under_test.bq_to_arrow_data_type(field) + assert actual is None + assert len(warned) == 1 + warning = warned[0] + assert "field3" in str(warning) @pytest.mark.parametrize( @@ -442,6 +493,19 @@ def test_bq_to_arrow_array_w_special_floats(module_under_test): assert roundtrip[3] is None +@pytest.mark.skipIf(pyarrow is None, "Requires `pyarrow`") +def test_bq_to_arrow_schema_w_unknown_type(module_under_test): + fields = ( + schema.SchemaField("field1", "STRING"), + schema.SchemaField("field2", "INTEGER"), + # Don't know what to convert UNKNOWN_TYPE to, let type inference work, + # instead. + schema.SchemaField("field3", "UNKNOWN_TYPE"), + ) + actual = module_under_test.bq_to_arrow_schema(fields) + assert actual is None + + @pytest.mark.skipIf(pandas is None, "Requires `pandas`") @pytest.mark.skipIf(pyarrow is None, "Requires `pyarrow`") def test_dataframe_to_arrow_w_required_fields(module_under_test): diff --git a/packages/google-cloud-bigquery/tests/unit/test_job.py b/packages/google-cloud-bigquery/tests/unit/test_job.py index 3561fb857647..22809c245d4b 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_job.py +++ b/packages/google-cloud-bigquery/tests/unit/test_job.py @@ -24,6 +24,11 @@ import pandas except (ImportError, AttributeError): # pragma: NO COVER pandas = None + +try: + import pyarrow +except ImportError: # pragma: NO COVER + pyarrow = None try: from google.cloud import bigquery_storage_v1beta1 except (ImportError, AttributeError): # pragma: NO COVER @@ -4708,6 +4713,96 @@ def test_reload_w_alternate_client(self): ) self._verifyResourceProperties(job, RESOURCE) + @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") + def test_to_arrow(self): + begun_resource = self._make_resource() + query_resource = { + "jobComplete": True, + "jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID}, + "totalRows": "4", + "schema": { + "fields": [ + { + "name": "spouse_1", + "type": "RECORD", + "fields": [ + {"name": "name", "type": "STRING", "mode": "NULLABLE"}, + {"name": "age", "type": "INTEGER", "mode": "NULLABLE"}, + ], + }, + { + "name": "spouse_2", + "type": "RECORD", + "fields": [ + {"name": "name", "type": "STRING", "mode": "NULLABLE"}, + {"name": "age", "type": "INTEGER", "mode": "NULLABLE"}, + ], + }, + ] + }, + } + tabledata_resource = { + "rows": [ + { + "f": [ + {"v": {"f": [{"v": "Phred Phlyntstone"}, {"v": "32"}]}}, + {"v": {"f": [{"v": "Wylma Phlyntstone"}, {"v": "29"}]}}, + ] + }, + { + "f": [ + {"v": {"f": [{"v": "Bhettye Rhubble"}, {"v": "27"}]}}, + {"v": {"f": [{"v": "Bharney Rhubble"}, {"v": "33"}]}}, + ] + }, + ] + } + done_resource = copy.deepcopy(begun_resource) + done_resource["status"] = {"state": "DONE"} + connection = _make_connection( + begun_resource, query_resource, done_resource, tabledata_resource + ) + client = _make_client(project=self.PROJECT, connection=connection) + job = self._make_one(self.JOB_ID, self.QUERY, client) + + tbl = job.to_arrow() + + self.assertIsInstance(tbl, pyarrow.Table) + self.assertEqual(tbl.num_rows, 2) + + # Check the schema. + self.assertEqual(tbl.schema[0].name, "spouse_1") + self.assertEqual(tbl.schema[0].type[0].name, "name") + self.assertEqual(tbl.schema[0].type[1].name, "age") + self.assertTrue(pyarrow.types.is_struct(tbl.schema[0].type)) + self.assertTrue(pyarrow.types.is_string(tbl.schema[0].type[0].type)) + self.assertTrue(pyarrow.types.is_int64(tbl.schema[0].type[1].type)) + self.assertEqual(tbl.schema[1].name, "spouse_2") + self.assertEqual(tbl.schema[1].type[0].name, "name") + self.assertEqual(tbl.schema[1].type[1].name, "age") + self.assertTrue(pyarrow.types.is_struct(tbl.schema[1].type)) + self.assertTrue(pyarrow.types.is_string(tbl.schema[1].type[0].type)) + self.assertTrue(pyarrow.types.is_int64(tbl.schema[1].type[1].type)) + + # Check the data. + tbl_data = tbl.to_pydict() + spouse_1 = tbl_data["spouse_1"] + self.assertEqual( + spouse_1, + [ + {"name": "Phred Phlyntstone", "age": 32}, + {"name": "Bhettye Rhubble", "age": 27}, + ], + ) + spouse_2 = tbl_data["spouse_2"] + self.assertEqual( + spouse_2, + [ + {"name": "Wylma Phlyntstone", "age": 29}, + {"name": "Bharney Rhubble", "age": 33}, + ], + ) + @unittest.skipIf(pandas is None, "Requires `pandas`") def test_to_dataframe(self): begun_resource = self._make_resource() @@ -4721,17 +4816,19 @@ def test_to_dataframe(self): {"name": "age", "type": "INTEGER", "mode": "NULLABLE"}, ] }, + } + tabledata_resource = { "rows": [ {"f": [{"v": "Phred Phlyntstone"}, {"v": "32"}]}, {"f": [{"v": "Bharney Rhubble"}, {"v": "33"}]}, {"f": [{"v": "Wylma Phlyntstone"}, {"v": "29"}]}, {"f": [{"v": "Bhettye Rhubble"}, {"v": "27"}]}, - ], + ] } done_resource = copy.deepcopy(begun_resource) done_resource["status"] = {"state": "DONE"} connection = _make_connection( - begun_resource, query_resource, done_resource, query_resource + begun_resource, query_resource, done_resource, tabledata_resource ) client = _make_client(project=self.PROJECT, connection=connection) job = self._make_one(self.JOB_ID, self.QUERY, client) diff --git a/packages/google-cloud-bigquery/tests/unit/test_table.py b/packages/google-cloud-bigquery/tests/unit/test_table.py index 72d6cf401c69..a892dccf9f28 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_table.py +++ b/packages/google-cloud-bigquery/tests/unit/test_table.py @@ -34,6 +34,12 @@ except (ImportError, AttributeError): # pragma: NO COVER pandas = None +try: + import pyarrow + import pyarrow.types +except ImportError: # pragma: NO COVER + pyarrow = None + try: from tqdm import tqdm except (ImportError, AttributeError): # pragma: NO COVER @@ -1364,6 +1370,19 @@ def test_total_rows_eq_zero(self): row_iterator = self._make_one() self.assertEqual(row_iterator.total_rows, 0) + @mock.patch("google.cloud.bigquery.table.pyarrow", new=None) + def test_to_arrow_error_if_pyarrow_is_none(self): + row_iterator = self._make_one() + with self.assertRaises(ValueError): + row_iterator.to_arrow() + + @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") + def test_to_arrow(self): + row_iterator = self._make_one() + tbl = row_iterator.to_arrow() + self.assertIsInstance(tbl, pyarrow.Table) + self.assertEqual(tbl.num_rows, 0) + @mock.patch("google.cloud.bigquery.table.pandas", new=None) def test_to_dataframe_error_if_pandas_is_none(self): row_iterator = self._make_one() @@ -1379,11 +1398,14 @@ def test_to_dataframe(self): class TestRowIterator(unittest.TestCase): + def _class_under_test(self): + from google.cloud.bigquery.table import RowIterator + + return RowIterator + def _make_one( self, client=None, api_request=None, path=None, schema=None, **kwargs ): - from google.cloud.bigquery.table import RowIterator - if client is None: client = _mock_client() @@ -1396,7 +1418,7 @@ def _make_one( if schema is None: schema = [] - return RowIterator(client, api_request, path, schema, **kwargs) + return self._class_under_test()(client, api_request, path, schema, **kwargs) def test_constructor(self): from google.cloud.bigquery.table import _item_to_row @@ -1489,6 +1511,245 @@ def test_page_size(self): query_params={"maxResults": row_iterator._page_size}, ) + @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") + def test_to_arrow(self): + from google.cloud.bigquery.table import SchemaField + + schema = [ + SchemaField("name", "STRING", mode="REQUIRED"), + SchemaField("age", "INTEGER", mode="REQUIRED"), + SchemaField( + "child", + "RECORD", + mode="REPEATED", + fields=[ + SchemaField("name", "STRING", mode="REQUIRED"), + SchemaField("age", "INTEGER", mode="REQUIRED"), + ], + ), + ] + rows = [ + { + "f": [ + {"v": "Bharney Rhubble"}, + {"v": "33"}, + { + "v": [ + {"v": {"f": [{"v": "Whamm-Whamm Rhubble"}, {"v": "3"}]}}, + {"v": {"f": [{"v": "Hoppy"}, {"v": "1"}]}}, + ] + }, + ] + }, + { + "f": [ + {"v": "Wylma Phlyntstone"}, + {"v": "29"}, + { + "v": [ + {"v": {"f": [{"v": "Bepples Phlyntstone"}, {"v": "0"}]}}, + {"v": {"f": [{"v": "Dino"}, {"v": "4"}]}}, + ] + }, + ] + }, + ] + path = "/foo" + api_request = mock.Mock(return_value={"rows": rows}) + row_iterator = self._make_one(_mock_client(), api_request, path, schema) + + tbl = row_iterator.to_arrow() + + self.assertIsInstance(tbl, pyarrow.Table) + self.assertEqual(tbl.num_rows, 2) + + # Check the schema. + self.assertEqual(tbl.schema[0].name, "name") + self.assertTrue(pyarrow.types.is_string(tbl.schema[0].type)) + self.assertEqual(tbl.schema[1].name, "age") + self.assertTrue(pyarrow.types.is_int64(tbl.schema[1].type)) + child_field = tbl.schema[2] + self.assertEqual(child_field.name, "child") + self.assertTrue(pyarrow.types.is_list(child_field.type)) + self.assertTrue(pyarrow.types.is_struct(child_field.type.value_type)) + self.assertEqual(child_field.type.value_type[0].name, "name") + self.assertEqual(child_field.type.value_type[1].name, "age") + + # Check the data. + tbl_data = tbl.to_pydict() + names = tbl_data["name"] + ages = tbl_data["age"] + children = tbl_data["child"] + self.assertEqual(names, ["Bharney Rhubble", "Wylma Phlyntstone"]) + self.assertEqual(ages, [33, 29]) + self.assertEqual( + children, + [ + [ + {"name": "Whamm-Whamm Rhubble", "age": 3}, + {"name": "Hoppy", "age": 1}, + ], + [{"name": "Bepples Phlyntstone", "age": 0}, {"name": "Dino", "age": 4}], + ], + ) + + @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") + def test_to_arrow_w_nulls(self): + from google.cloud.bigquery.table import SchemaField + + schema = [SchemaField("name", "STRING"), SchemaField("age", "INTEGER")] + rows = [ + {"f": [{"v": "Donkey"}, {"v": 32}]}, + {"f": [{"v": "Diddy"}, {"v": 29}]}, + {"f": [{"v": "Dixie"}, {"v": None}]}, + {"f": [{"v": None}, {"v": 111}]}, + ] + path = "/foo" + api_request = mock.Mock(return_value={"rows": rows}) + row_iterator = self._make_one(_mock_client(), api_request, path, schema) + + tbl = row_iterator.to_arrow() + + self.assertIsInstance(tbl, pyarrow.Table) + self.assertEqual(tbl.num_rows, 4) + + # Check the schema. + self.assertEqual(tbl.schema[0].name, "name") + self.assertTrue(pyarrow.types.is_string(tbl.schema[0].type)) + self.assertEqual(tbl.schema[1].name, "age") + self.assertTrue(pyarrow.types.is_int64(tbl.schema[1].type)) + + # Check the data. + tbl_data = tbl.to_pydict() + names = tbl_data["name"] + ages = tbl_data["age"] + self.assertEqual(names, ["Donkey", "Diddy", "Dixie", None]) + self.assertEqual(ages, [32, 29, None, 111]) + + @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") + def test_to_arrow_w_unknown_type(self): + from google.cloud.bigquery.table import SchemaField + + schema = [ + SchemaField("name", "STRING", mode="REQUIRED"), + SchemaField("age", "INTEGER", mode="REQUIRED"), + SchemaField("sport", "UNKNOWN_TYPE", mode="REQUIRED"), + ] + rows = [ + {"f": [{"v": "Bharney Rhubble"}, {"v": "33"}, {"v": "volleyball"}]}, + {"f": [{"v": "Wylma Phlyntstone"}, {"v": "29"}, {"v": "basketball"}]}, + ] + path = "/foo" + api_request = mock.Mock(return_value={"rows": rows}) + row_iterator = self._make_one(_mock_client(), api_request, path, schema) + + tbl = row_iterator.to_arrow() + + self.assertIsInstance(tbl, pyarrow.Table) + self.assertEqual(tbl.num_rows, 2) + + # Check the schema. + self.assertEqual(tbl.schema[0].name, "name") + self.assertTrue(pyarrow.types.is_string(tbl.schema[0].type)) + self.assertEqual(tbl.schema[1].name, "age") + self.assertTrue(pyarrow.types.is_int64(tbl.schema[1].type)) + self.assertEqual(tbl.schema[2].name, "sport") + + # Check the data. + tbl_data = tbl.to_pydict() + names = tbl_data["name"] + ages = tbl_data["age"] + sports = tbl_data["sport"] + self.assertEqual(names, ["Bharney Rhubble", "Wylma Phlyntstone"]) + self.assertEqual(ages, [33, 29]) + self.assertEqual(sports, ["volleyball", "basketball"]) + + @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") + def test_to_arrow_w_empty_table(self): + from google.cloud.bigquery.table import SchemaField + + schema = [ + SchemaField("name", "STRING", mode="REQUIRED"), + SchemaField("age", "INTEGER", mode="REQUIRED"), + SchemaField( + "child", + "RECORD", + mode="REPEATED", + fields=[ + SchemaField("name", "STRING", mode="REQUIRED"), + SchemaField("age", "INTEGER", mode="REQUIRED"), + ], + ), + ] + rows = [] + path = "/foo" + api_request = mock.Mock(return_value={"rows": rows}) + row_iterator = self._make_one(_mock_client(), api_request, path, schema) + + tbl = row_iterator.to_arrow() + + self.assertIsInstance(tbl, pyarrow.Table) + self.assertEqual(tbl.num_rows, 0) + + # Check the schema. + self.assertEqual(tbl.schema[0].name, "name") + self.assertTrue(pyarrow.types.is_string(tbl.schema[0].type)) + self.assertEqual(tbl.schema[1].name, "age") + self.assertTrue(pyarrow.types.is_int64(tbl.schema[1].type)) + child_field = tbl.schema[2] + self.assertEqual(child_field.name, "child") + self.assertTrue(pyarrow.types.is_list(child_field.type)) + self.assertTrue(pyarrow.types.is_struct(child_field.type.value_type)) + self.assertEqual(child_field.type.value_type[0].name, "name") + self.assertEqual(child_field.type.value_type[1].name, "age") + + @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") + @unittest.skipIf(tqdm is None, "Requires `tqdm`") + @mock.patch("tqdm.tqdm_gui") + @mock.patch("tqdm.tqdm_notebook") + @mock.patch("tqdm.tqdm") + def test_to_arrow_progress_bar(self, tqdm_mock, tqdm_notebook_mock, tqdm_gui_mock): + from google.cloud.bigquery.table import SchemaField + + schema = [ + SchemaField("name", "STRING", mode="REQUIRED"), + SchemaField("age", "INTEGER", mode="REQUIRED"), + ] + rows = [ + {"f": [{"v": "Phred Phlyntstone"}, {"v": "32"}]}, + {"f": [{"v": "Bharney Rhubble"}, {"v": "33"}]}, + {"f": [{"v": "Wylma Phlyntstone"}, {"v": "29"}]}, + {"f": [{"v": "Bhettye Rhubble"}, {"v": "27"}]}, + ] + path = "/foo" + api_request = mock.Mock(return_value={"rows": rows}) + + progress_bars = ( + ("tqdm", tqdm_mock), + ("tqdm_notebook", tqdm_notebook_mock), + ("tqdm_gui", tqdm_gui_mock), + ) + + for progress_bar_type, progress_bar_mock in progress_bars: + row_iterator = self._make_one(_mock_client(), api_request, path, schema) + tbl = row_iterator.to_arrow(progress_bar_type=progress_bar_type) + + progress_bar_mock.assert_called() + progress_bar_mock().update.assert_called() + progress_bar_mock().close.assert_called_once() + self.assertEqual(tbl.num_rows, 4) + + @mock.patch("google.cloud.bigquery.table.pyarrow", new=None) + def test_to_arrow_w_pyarrow_none(self): + schema = [] + rows = [] + path = "/foo" + api_request = mock.Mock(return_value={"rows": rows}) + row_iterator = self._make_one(_mock_client(), api_request, path, schema) + + with self.assertRaises(ValueError): + row_iterator.to_arrow() + @unittest.skipIf(pandas is None, "Requires `pandas`") def test_to_dataframe(self): from google.cloud.bigquery.table import SchemaField @@ -1631,10 +1892,17 @@ def test_to_dataframe_tqdm_error(self): for progress_bar_type in ("tqdm", "tqdm_notebook", "tqdm_gui"): api_request = mock.Mock(return_value={"rows": rows}) row_iterator = self._make_one(_mock_client(), api_request, path, schema) - df = row_iterator.to_dataframe(progress_bar_type=progress_bar_type) + + with warnings.catch_warnings(record=True) as warned: + df = row_iterator.to_dataframe(progress_bar_type=progress_bar_type) self.assertEqual(len(df), 4) # all should be well + # Warn that a progress bar was requested, but creating the tqdm + # progress bar failed. + for warning in warned: + self.assertIs(warning.category, UserWarning) + @unittest.skipIf(pandas is None, "Requires `pandas`") def test_to_dataframe_w_empty_results(self): from google.cloud.bigquery.table import SchemaField @@ -2164,19 +2432,25 @@ def test_to_dataframe_w_bqstorage_fallback_to_tabledata_list(self): self.assertEqual(df.age.dtype.name, "int64") @unittest.skipIf(pandas is None, "Requires `pandas`") - @mock.patch( - "google.cloud.bigquery.table.RowIterator.pages", new_callable=mock.PropertyMock - ) - def test_to_dataframe_tabledata_list_w_multiple_pages_return_unique_index( - self, mock_pages - ): + def test_to_dataframe_tabledata_list_w_multiple_pages_return_unique_index(self): from google.cloud.bigquery import schema + from google.cloud.bigquery import table as mut iterator_schema = [schema.SchemaField("name", "STRING", mode="REQUIRED")] - pages = [[{"name": "Bengt"}], [{"name": "Sven"}]] - - mock_pages.return_value = pages - row_iterator = self._make_one(schema=iterator_schema) + path = "/foo" + api_request = mock.Mock( + side_effect=[ + {"rows": [{"f": [{"v": "Bengt"}]}], "pageToken": "NEXTPAGE"}, + {"rows": [{"f": [{"v": "Sven"}]}]}, + ] + ) + row_iterator = mut.RowIterator( + _mock_client(), + api_request, + path, + iterator_schema, + table=mut.Table("proj.dset.tbl"), + ) df = row_iterator.to_dataframe(bqstorage_client=None) From 57e5bd934f4c8dfd69d1efbea82c8d6c27259056 Mon Sep 17 00:00:00 2001 From: Brian Hulette Date: Fri, 12 Jul 2019 07:18:12 -0700 Subject: [PATCH 0614/2016] Support faster Arrow data format in `to_dataframe` and `to_arrow` when using BigQuery Storage API. (#8551) * Use Arrow format in client.list_rows(..).to_dataframe(..) with BQ Storage client * Add system test for arrow wire format. * Add system test for to_arrow. * Exclude bad pyarrow release. --- .../google/cloud/bigquery/_pandas_helpers.py | 77 ++++++++--- .../google/cloud/bigquery/table.py | 130 ++++++++++++------ packages/google-cloud-bigquery/setup.py | 6 +- .../google-cloud-bigquery/tests/system.py | 66 +++++++++ .../tests/unit/test_job.py | 2 + .../tests/unit/test_table.py | 119 ++++++++++++++++ 6 files changed, 334 insertions(+), 66 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/_pandas_helpers.py b/packages/google-cloud-bigquery/google/cloud/bigquery/_pandas_helpers.py index 5a3a9833b572..d77aa67d5cf5 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/_pandas_helpers.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/_pandas_helpers.py @@ -15,6 +15,7 @@ """Shared helper functions for connecting BigQuery and pandas.""" import concurrent.futures +import functools import warnings from six.moves import queue @@ -74,6 +75,8 @@ def pyarrow_timestamp(): if pyarrow: + # This dictionary is duplicated in bigquery_storage/test/unite/test_reader.py + # When modifying it be sure to update it there as well. BQ_TO_ARROW_SCALARS = { "BOOL": pyarrow.bool_, "BOOLEAN": pyarrow.bool_, @@ -269,14 +272,18 @@ def download_dataframe_tabledata_list(pages, schema, dtypes): yield _tabledata_list_page_to_dataframe(page, column_names, dtypes) -def _download_dataframe_bqstorage_stream( - download_state, - bqstorage_client, - column_names, - dtypes, - session, - stream, - worker_queue, +def _bqstorage_page_to_arrow(page): + return page.to_arrow() + + +def _bqstorage_page_to_dataframe(column_names, dtypes, page): + # page.to_dataframe() does not preserve column order in some versions + # of google-cloud-bigquery-storage. Access by column name to rearrange. + return page.to_dataframe(dtypes=dtypes)[column_names] + + +def _download_table_bqstorage_stream( + download_state, bqstorage_client, session, stream, worker_queue, page_to_item ): position = bigquery_storage_v1beta1.types.StreamPosition(stream=stream) rowstream = bqstorage_client.read_rows(position).rows(session) @@ -284,10 +291,8 @@ def _download_dataframe_bqstorage_stream( for page in rowstream.pages: if download_state.done: return - # page.to_dataframe() does not preserve column order in some versions - # of google-cloud-bigquery-storage. Access by column name to rearrange. - frame = page.to_dataframe(dtypes=dtypes)[column_names] - worker_queue.put(frame) + item = page_to_item(page) + worker_queue.put(item) def _nowait(futures): @@ -304,14 +309,13 @@ def _nowait(futures): return done, not_done -def download_dataframe_bqstorage( +def _download_table_bqstorage( project_id, table, bqstorage_client, - column_names, - dtypes, preserve_order=False, selected_fields=None, + page_to_item=None, ): """Use (faster, but billable) BQ Storage API to construct DataFrame.""" if "$" in table.table_id: @@ -333,14 +337,13 @@ def download_dataframe_bqstorage( session = bqstorage_client.create_read_session( table.to_bqstorage(), "projects/{}".format(project_id), + format_=bigquery_storage_v1beta1.enums.DataFormat.ARROW, read_options=read_options, requested_streams=requested_streams, ) - # Avoid reading rows from an empty table. pandas.concat will fail on an - # empty list. + # Avoid reading rows from an empty table. if not session.streams: - yield pandas.DataFrame(columns=column_names) return total_streams = len(session.streams) @@ -360,14 +363,13 @@ def download_dataframe_bqstorage( # See: https://github.com/googleapis/google-cloud-python/pull/7698 not_done = [ pool.submit( - _download_dataframe_bqstorage_stream, + _download_table_bqstorage_stream, download_state, bqstorage_client, - column_names, - dtypes, session, stream, worker_queue, + page_to_item, ) for stream in session.streams ] @@ -410,3 +412,36 @@ def download_dataframe_bqstorage( # Shutdown all background threads, now that they should know to # exit early. pool.shutdown(wait=True) + + +def download_arrow_bqstorage( + project_id, table, bqstorage_client, preserve_order=False, selected_fields=None +): + return _download_table_bqstorage( + project_id, + table, + bqstorage_client, + preserve_order=preserve_order, + selected_fields=selected_fields, + page_to_item=_bqstorage_page_to_arrow, + ) + + +def download_dataframe_bqstorage( + project_id, + table, + bqstorage_client, + column_names, + dtypes, + preserve_order=False, + selected_fields=None, +): + page_to_item = functools.partial(_bqstorage_page_to_dataframe, column_names, dtypes) + return _download_table_bqstorage( + project_id, + table, + bqstorage_client, + preserve_order=preserve_order, + selected_fields=selected_fields, + page_to_item=page_to_item, + ) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py index 8aa7788acdfa..3c8ce6cc39d7 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py @@ -18,6 +18,7 @@ import copy import datetime +import functools import operator import warnings @@ -1403,14 +1404,52 @@ def _get_progress_bar(self, progress_bar_type): warnings.warn(_NO_TQDM_ERROR, UserWarning, stacklevel=3) return None - def _to_arrow_iterable(self): + def _to_page_iterable( + self, bqstorage_download, tabledata_list_download, bqstorage_client=None + ): + if bqstorage_client is not None: + try: + # Iterate over the stream so that read errors are raised (and + # the method can then fallback to tabledata.list). + for item in bqstorage_download(): + yield item + return + except google.api_core.exceptions.Forbidden: + # Don't hide errors such as insufficient permissions to create + # a read session, or the API is not enabled. Both of those are + # clearly problems if the developer has explicitly asked for + # BigQuery Storage API support. + raise + except google.api_core.exceptions.GoogleAPICallError: + # There is a known issue with reading from small anonymous + # query results tables, so some errors are expected. Rather + # than throw those errors, try reading the DataFrame again, but + # with the tabledata.list API. + pass + + for item in tabledata_list_download(): + yield item + + def _to_arrow_iterable(self, bqstorage_client=None): """Create an iterable of arrow RecordBatches, to process the table as a stream.""" - for record_batch in _pandas_helpers.download_arrow_tabledata_list( - iter(self.pages), self.schema - ): - yield record_batch + bqstorage_download = functools.partial( + _pandas_helpers.download_arrow_bqstorage, + self._project, + self._table, + bqstorage_client, + preserve_order=self._preserve_order, + selected_fields=self._selected_fields, + ) + tabledata_list_download = functools.partial( + _pandas_helpers.download_arrow_tabledata_list, iter(self.pages), self.schema + ) + return self._to_page_iterable( + bqstorage_download, + tabledata_list_download, + bqstorage_client=bqstorage_client, + ) - def to_arrow(self, progress_bar_type=None): + def to_arrow(self, progress_bar_type=None, bqstorage_client=None): """[Beta] Create a class:`pyarrow.Table` by loading all pages of a table or query. @@ -1433,6 +1472,18 @@ def to_arrow(self, progress_bar_type=None): ``'tqdm_gui'`` Use the :func:`tqdm.tqdm_gui` function to display a progress bar as a graphical dialog box. + bqstorage_client ( \ + google.cloud.bigquery_storage_v1beta1.BigQueryStorageClient \ + ): + **Beta Feature** Optional. A BigQuery Storage API client. If + supplied, use the faster BigQuery Storage API to fetch rows + from BigQuery. This API is a billable API. + + This method requires the ``pyarrow`` and + ``google-cloud-bigquery-storage`` libraries. + + Reading from a specific partition or snapshot is not + currently supported by this method. Returns: pyarrow.Table @@ -1452,7 +1503,7 @@ def to_arrow(self, progress_bar_type=None): progress_bar = self._get_progress_bar(progress_bar_type) record_batches = [] - for record_batch in self._to_arrow_iterable(): + for record_batch in self._to_arrow_iterable(bqstorage_client=bqstorage_client): record_batches.append(record_batch) if progress_bar is not None: @@ -1466,47 +1517,40 @@ def to_arrow(self, progress_bar_type=None): # Indicate that the download has finished. progress_bar.close() - arrow_schema = _pandas_helpers.bq_to_arrow_schema(self._schema) - return pyarrow.Table.from_batches(record_batches, schema=arrow_schema) + if record_batches: + return pyarrow.Table.from_batches(record_batches) + else: + # No records, use schema based on BigQuery schema. + arrow_schema = _pandas_helpers.bq_to_arrow_schema(self._schema) + return pyarrow.Table.from_batches(record_batches, schema=arrow_schema) def _to_dataframe_iterable(self, bqstorage_client=None, dtypes=None): """Create an iterable of pandas DataFrames, to process the table as a stream. See ``to_dataframe`` for argument descriptions. """ - if bqstorage_client is not None: - column_names = [field.name for field in self._schema] - try: - # Iterate over the stream so that read errors are raised (and - # the method can then fallback to tabledata.list). - for frame in _pandas_helpers.download_dataframe_bqstorage( - self._project, - self._table, - bqstorage_client, - column_names, - dtypes, - preserve_order=self._preserve_order, - selected_fields=self._selected_fields, - ): - yield frame - return - except google.api_core.exceptions.Forbidden: - # Don't hide errors such as insufficient permissions to create - # a read session, or the API is not enabled. Both of those are - # clearly problems if the developer has explicitly asked for - # BigQuery Storage API support. - raise - except google.api_core.exceptions.GoogleAPICallError: - # There is a known issue with reading from small anonymous - # query results tables, so some errors are expected. Rather - # than throw those errors, try reading the DataFrame again, but - # with the tabledata.list API. - pass - - for frame in _pandas_helpers.download_dataframe_tabledata_list( - iter(self.pages), self.schema, dtypes - ): - yield frame + column_names = [field.name for field in self._schema] + bqstorage_download = functools.partial( + _pandas_helpers.download_dataframe_bqstorage, + self._project, + self._table, + bqstorage_client, + column_names, + dtypes, + preserve_order=self._preserve_order, + selected_fields=self._selected_fields, + ) + tabledata_list_download = functools.partial( + _pandas_helpers.download_dataframe_tabledata_list, + iter(self.pages), + self.schema, + dtypes, + ) + return self._to_page_iterable( + bqstorage_download, + tabledata_list_download, + bqstorage_client=bqstorage_client, + ) def to_dataframe(self, bqstorage_client=None, dtypes=None, progress_bar_type=None): """Create a pandas DataFrame by loading all pages of a query. @@ -1519,7 +1563,7 @@ def to_dataframe(self, bqstorage_client=None, dtypes=None, progress_bar_type=Non supplied, use the faster BigQuery Storage API to fetch rows from BigQuery. This API is a billable API. - This method requires the ``fastavro`` and + This method requires the ``pyarrow`` and ``google-cloud-bigquery-storage`` libraries. Reading from a specific partition or snapshot is not diff --git a/packages/google-cloud-bigquery/setup.py b/packages/google-cloud-bigquery/setup.py index 5637c0f4fd53..6dfde2e439b2 100644 --- a/packages/google-cloud-bigquery/setup.py +++ b/packages/google-cloud-bigquery/setup.py @@ -36,8 +36,10 @@ ] extras = { "bqstorage": [ - "google-cloud-bigquery-storage >= 0.4.0, <2.0.0dev", - "fastavro>=0.21.2", + "google-cloud-bigquery-storage >= 0.6.0, <2.0.0dev", + # Bad Linux release for 0.14.0. + # https://issues.apache.org/jira/browse/ARROW-5868 + "pyarrow>=0.13.0, != 0.14.0", ], "pandas": ["pandas>=0.17.1"], # Exclude PyArrow dependency from Windows Python 2.7. diff --git a/packages/google-cloud-bigquery/tests/system.py b/packages/google-cloud-bigquery/tests/system.py index 2213bc7c88da..f234a431d51f 100644 --- a/packages/google-cloud-bigquery/tests/system.py +++ b/packages/google-cloud-bigquery/tests/system.py @@ -39,6 +39,7 @@ pandas = None try: import pyarrow + import pyarrow.types except ImportError: # pragma: NO COVER pyarrow = None try: @@ -1959,6 +1960,71 @@ def test_create_table_rows_fetch_nested_schema(self): def _fetch_dataframe(self, query): return Config.CLIENT.query(query).result().to_dataframe() + @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") + @unittest.skipIf( + bigquery_storage_v1beta1 is None, "Requires `google-cloud-bigquery-storage`" + ) + def test_nested_table_to_arrow(self): + from google.cloud.bigquery.job import SourceFormat + from google.cloud.bigquery.job import WriteDisposition + + SF = bigquery.SchemaField + schema = [ + SF("string_col", "STRING", mode="NULLABLE"), + SF( + "record_col", + "RECORD", + mode="NULLABLE", + fields=[ + SF("nested_string", "STRING", mode="NULLABLE"), + SF("nested_repeated", "INTEGER", mode="REPEATED"), + ], + ), + SF("float_col", "FLOAT", mode="NULLABLE"), + ] + record = {"nested_string": "another string value", "nested_repeated": [0, 1, 2]} + to_insert = [ + {"string_col": "Some value", "record_col": record, "float_col": 3.14} + ] + rows = [json.dumps(row) for row in to_insert] + body = six.BytesIO("{}\n".format("\n".join(rows)).encode("ascii")) + table_id = "test_table" + dataset = self.temp_dataset(_make_dataset_id("nested_df")) + table = dataset.table(table_id) + self.to_delete.insert(0, table) + job_config = bigquery.LoadJobConfig() + job_config.write_disposition = WriteDisposition.WRITE_TRUNCATE + job_config.source_format = SourceFormat.NEWLINE_DELIMITED_JSON + job_config.schema = schema + # Load a table using a local JSON file from memory. + Config.CLIENT.load_table_from_file(body, table, job_config=job_config).result() + bqstorage_client = bigquery_storage_v1beta1.BigQueryStorageClient( + credentials=Config.CLIENT._credentials + ) + + tbl = Config.CLIENT.list_rows(table, selected_fields=schema).to_arrow( + bqstorage_client=bqstorage_client + ) + + self.assertIsInstance(tbl, pyarrow.Table) + self.assertEqual(tbl.num_rows, 1) + self.assertEqual(tbl.num_columns, 3) + # Columns may not appear in the requested order. + self.assertTrue( + pyarrow.types.is_float64(tbl.schema.field_by_name("float_col").type) + ) + self.assertTrue( + pyarrow.types.is_string(tbl.schema.field_by_name("string_col").type) + ) + record_col = tbl.schema.field_by_name("record_col").type + self.assertTrue(pyarrow.types.is_struct(record_col)) + self.assertEqual(record_col.num_children, 2) + self.assertEqual(record_col[0].name, "nested_string") + self.assertTrue(pyarrow.types.is_string(record_col[0].type)) + self.assertEqual(record_col[1].name, "nested_repeated") + self.assertTrue(pyarrow.types.is_list(record_col[1].type)) + self.assertTrue(pyarrow.types.is_int64(record_col[1].type.value_type)) + @unittest.skipIf(pandas is None, "Requires `pandas`") def test_nested_table_to_dataframe(self): from google.cloud.bigquery.job import SourceFormat diff --git a/packages/google-cloud-bigquery/tests/unit/test_job.py b/packages/google-cloud-bigquery/tests/unit/test_job.py index 22809c245d4b..dcc90b2d96a8 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_job.py +++ b/packages/google-cloud-bigquery/tests/unit/test_job.py @@ -4897,6 +4897,7 @@ def test_to_dataframe_bqstorage(self): bqstorage_client.create_read_session.assert_called_once_with( mock.ANY, "projects/{}".format(self.PROJECT), + format_=bigquery_storage_v1beta1.enums.DataFormat.ARROW, read_options=mock.ANY, # Use default number of streams for best performance. requested_streams=0, @@ -5340,6 +5341,7 @@ def test_to_dataframe_bqstorage_preserve_order(query): bqstorage_client.create_read_session.assert_called_once_with( mock.ANY, "projects/test-project", + format_=bigquery_storage_v1beta1.enums.DataFormat.ARROW, read_options=mock.ANY, # Use a single stream to preserve row order. requested_streams=1, diff --git a/packages/google-cloud-bigquery/tests/unit/test_table.py b/packages/google-cloud-bigquery/tests/unit/test_table.py index a892dccf9f28..a141d8f38abf 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_table.py +++ b/packages/google-cloud-bigquery/tests/unit/test_table.py @@ -1703,6 +1703,125 @@ def test_to_arrow_w_empty_table(self): self.assertEqual(child_field.type.value_type[0].name, "name") self.assertEqual(child_field.type.value_type[1].name, "age") + @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") + @unittest.skipIf( + bigquery_storage_v1beta1 is None, "Requires `google-cloud-bigquery-storage`" + ) + def test_to_arrow_w_bqstorage(self): + from google.cloud.bigquery import schema + from google.cloud.bigquery import table as mut + from google.cloud.bigquery_storage_v1beta1 import reader + + bqstorage_client = mock.create_autospec( + bigquery_storage_v1beta1.BigQueryStorageClient + ) + streams = [ + # Use two streams we want to check frames are read from each stream. + {"name": "/projects/proj/dataset/dset/tables/tbl/streams/1234"}, + {"name": "/projects/proj/dataset/dset/tables/tbl/streams/5678"}, + ] + session = bigquery_storage_v1beta1.types.ReadSession(streams=streams) + arrow_schema = pyarrow.schema( + [ + pyarrow.field("colA", pyarrow.int64()), + # Not alphabetical to test column order. + pyarrow.field("colC", pyarrow.float64()), + pyarrow.field("colB", pyarrow.string()), + ] + ) + session.arrow_schema.serialized_schema = arrow_schema.serialize().to_pybytes() + bqstorage_client.create_read_session.return_value = session + + mock_rowstream = mock.create_autospec(reader.ReadRowsStream) + bqstorage_client.read_rows.return_value = mock_rowstream + + mock_rows = mock.create_autospec(reader.ReadRowsIterable) + mock_rowstream.rows.return_value = mock_rows + expected_num_rows = 2 + expected_num_columns = 3 + page_items = [ + pyarrow.array([1, -1]), + pyarrow.array([2.0, 4.0]), + pyarrow.array(["abc", "def"]), + ] + + mock_page = mock.create_autospec(reader.ReadRowsPage) + mock_page.to_arrow.return_value = pyarrow.RecordBatch.from_arrays( + page_items, arrow_schema + ) + mock_pages = (mock_page, mock_page, mock_page) + type(mock_rows).pages = mock.PropertyMock(return_value=mock_pages) + + schema = [ + schema.SchemaField("colA", "INTEGER"), + schema.SchemaField("colC", "FLOAT"), + schema.SchemaField("colB", "STRING"), + ] + + row_iterator = mut.RowIterator( + _mock_client(), + None, # api_request: ignored + None, # path: ignored + schema, + table=mut.TableReference.from_string("proj.dset.tbl"), + selected_fields=schema, + ) + + actual_tbl = row_iterator.to_arrow(bqstorage_client=bqstorage_client) + + # Are the columns in the expected order? + self.assertEqual(actual_tbl.num_columns, expected_num_columns) + self.assertEqual(actual_tbl.schema[0].name, "colA") + self.assertEqual(actual_tbl.schema[1].name, "colC") + self.assertEqual(actual_tbl.schema[2].name, "colB") + + # Have expected number of rows? + total_pages = len(streams) * len(mock_pages) + total_rows = expected_num_rows * total_pages + self.assertEqual(actual_tbl.num_rows, total_rows) + + @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") + @unittest.skipIf( + bigquery_storage_v1beta1 is None, "Requires `google-cloud-bigquery-storage`" + ) + def test_to_arrow_w_bqstorage_no_streams(self): + from google.cloud.bigquery import schema + from google.cloud.bigquery import table as mut + + bqstorage_client = mock.create_autospec( + bigquery_storage_v1beta1.BigQueryStorageClient + ) + session = bigquery_storage_v1beta1.types.ReadSession() + arrow_schema = pyarrow.schema( + [ + pyarrow.field("colA", pyarrow.string()), + # Not alphabetical to test column order. + pyarrow.field("colC", pyarrow.string()), + pyarrow.field("colB", pyarrow.string()), + ] + ) + session.arrow_schema.serialized_schema = arrow_schema.serialize().to_pybytes() + bqstorage_client.create_read_session.return_value = session + + row_iterator = mut.RowIterator( + _mock_client(), + None, # api_request: ignored + None, # path: ignored + [ + schema.SchemaField("colA", "STRING"), + schema.SchemaField("colC", "STRING"), + schema.SchemaField("colB", "STRING"), + ], + table=mut.TableReference.from_string("proj.dset.tbl"), + ) + + actual_table = row_iterator.to_arrow(bqstorage_client=bqstorage_client) + self.assertEqual(actual_table.num_columns, 3) + self.assertEqual(actual_table.num_rows, 0) + self.assertEqual(actual_table.schema[0].name, "colA") + self.assertEqual(actual_table.schema[1].name, "colC") + self.assertEqual(actual_table.schema[2].name, "colB") + @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") @unittest.skipIf(tqdm is None, "Requires `tqdm`") @mock.patch("tqdm.tqdm_gui") From 458e80a53ae65dacf5b3529561cfb871c56d0a89 Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Fri, 12 Jul 2019 10:50:26 -0500 Subject: [PATCH 0615/2016] Release 1.17.0 (#8663) --- packages/google-cloud-bigquery/CHANGELOG.md | 13 +++++++++++++ packages/google-cloud-bigquery/setup.py | 2 +- 2 files changed, 14 insertions(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/CHANGELOG.md b/packages/google-cloud-bigquery/CHANGELOG.md index 2f2bb2688ff9..0d6630ef3bba 100644 --- a/packages/google-cloud-bigquery/CHANGELOG.md +++ b/packages/google-cloud-bigquery/CHANGELOG.md @@ -4,6 +4,19 @@ [1]: https://pypi.org/project/google-cloud-bigquery/#history +## 1.17.0 + +07-12-2019 07:56 PDT + +### New Features + +- Support faster Arrow data format in `to_dataframe` when using BigQuery Storage API. ([#8551](https://github.com/googleapis/google-cloud-python/pull/8551)) +- Add `to_arrow` to get a `pyarrow.Table` from query results. ([#8609](https://github.com/googleapis/google-cloud-python/pull/8609)) + +### Dependencies + +- Exclude bad 0.14.0 `pyarrow` release. ([#8551](https://github.com/googleapis/google-cloud-python/pull/8551)) + ## 1.16.0 07-01-2019 10:22 PDT diff --git a/packages/google-cloud-bigquery/setup.py b/packages/google-cloud-bigquery/setup.py index 6dfde2e439b2..a05c18053379 100644 --- a/packages/google-cloud-bigquery/setup.py +++ b/packages/google-cloud-bigquery/setup.py @@ -22,7 +22,7 @@ name = "google-cloud-bigquery" description = "Google BigQuery API client library" -version = "1.16.0" +version = "1.17.0" # Should be one of: # 'Development Status :: 3 - Alpha' # 'Development Status :: 4 - Beta' From bf14af1eb2bc117954f22d7fa290c7ee36954544 Mon Sep 17 00:00:00 2001 From: ylil93 Date: Mon, 15 Jul 2019 12:12:29 -0700 Subject: [PATCH 0616/2016] Add compatibility check badges to READMEs. (#8288) --- packages/google-cloud-bigquery/README.rst | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/README.rst b/packages/google-cloud-bigquery/README.rst index 4279f215d6c3..a3f0687eef9f 100644 --- a/packages/google-cloud-bigquery/README.rst +++ b/packages/google-cloud-bigquery/README.rst @@ -1,7 +1,7 @@ Python Client for Google BigQuery ================================= -|GA| |pypi| |versions| +|GA| |pypi| |versions| |compat_check_pypi| |compat_check_github| Querying massive datasets can be time consuming and expensive without the right hardware and infrastructure. Google `BigQuery`_ solves this problem by @@ -17,6 +17,10 @@ processing power of Google's infrastructure. :target: https://pypi.org/project/google-cloud-bigquery/ .. |versions| image:: https://img.shields.io/pypi/pyversions/google-cloud-bigquery.svg :target: https://pypi.org/project/google-cloud-bigquery/ +.. |compat_check_pypi| image:: https://python-compatibility-tools.appspot.com/one_badge_image?package=google-cloud-bigquery + :target: https://python-compatibility-tools.appspot.com/one_badge_target?package=google-cloud-bigquery +.. |compat_check_github| image:: https://python-compatibility-tools.appspot.com/one_badge_image?package=git%2Bgit%3A//github.com/googleapis/google-cloud-python.git%23subdirectory%3Dbigquery + :target: https://python-compatibility-tools.appspot.com/one_badge_target?package=git%2Bgit%3A//github.com/googleapis/google-cloud-python.git%23subdirectory%3Dbigquery .. _BigQuery: https://cloud.google.com/bigquery/what-is-bigquery .. _Client Library Documentation: https://googleapis.github.io/google-cloud-python/latest/bigquery/index.html .. _Product Documentation: https://cloud.google.com/bigquery/docs/reference/v2/ From 4e53e5822d61f67b97ce591efc9519909ac9d71f Mon Sep 17 00:00:00 2001 From: Peter Lamut Date: Wed, 17 Jul 2019 19:01:25 +0200 Subject: [PATCH 0617/2016] Add `bqstorage_client` param to `QueryJob.to_arrow()` (#8693) * Add method signature compatibility reminders * Add bqstorage_client param to QueryJob.to_arrow() * Add test for to_*() method signature compatibility The method signatures for to_arrow() and to_dataframe() methods in the job.QueryJob and table.RowIterator classes must match to present a consistent API for users. * Skip method signature test in old Pythons inspect.signature() method is only available in older Python versions --- .../google/cloud/bigquery/job.py | 22 +++++++- .../google/cloud/bigquery/table.py | 4 ++ .../unit/test_signature_compatibility.py | 51 +++++++++++++++++++ 3 files changed, 75 insertions(+), 2 deletions(-) create mode 100644 packages/google-cloud-bigquery/tests/unit/test_signature_compatibility.py diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/job.py b/packages/google-cloud-bigquery/google/cloud/bigquery/job.py index 442420a7191b..5cd90ada9637 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/job.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/job.py @@ -2896,7 +2896,9 @@ def result(self, timeout=None, page_size=None, retry=DEFAULT_RETRY): rows._preserve_order = _contains_order_by(self.query) return rows - def to_arrow(self, progress_bar_type=None): + # If changing the signature of this method, make sure to apply the same + # changes to table.RowIterator.to_arrow() + def to_arrow(self, progress_bar_type=None, bqstorage_client=None): """[Beta] Create a class:`pyarrow.Table` by loading all pages of a table or query. @@ -2919,6 +2921,18 @@ def to_arrow(self, progress_bar_type=None): ``'tqdm_gui'`` Use the :func:`tqdm.tqdm_gui` function to display a progress bar as a graphical dialog box. + bqstorage_client ( \ + google.cloud.bigquery_storage_v1beta1.BigQueryStorageClient \ + ): + **Beta Feature** Optional. A BigQuery Storage API client. If + supplied, use the faster BigQuery Storage API to fetch rows + from BigQuery. This API is a billable API. + + This method requires the ``pyarrow`` and + ``google-cloud-bigquery-storage`` libraries. + + Reading from a specific partition or snapshot is not + currently supported by this method. Returns: pyarrow.Table @@ -2932,8 +2946,12 @@ def to_arrow(self, progress_bar_type=None): ..versionadded:: 1.17.0 """ - return self.result().to_arrow(progress_bar_type=progress_bar_type) + return self.result().to_arrow( + progress_bar_type=progress_bar_type, bqstorage_client=bqstorage_client + ) + # If changing the signature of this method, make sure to apply the same + # changes to table.RowIterator.to_dataframe() def to_dataframe(self, bqstorage_client=None, dtypes=None, progress_bar_type=None): """Return a pandas DataFrame from a QueryJob diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py index 3c8ce6cc39d7..5b1de345c068 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py @@ -1449,6 +1449,8 @@ def _to_arrow_iterable(self, bqstorage_client=None): bqstorage_client=bqstorage_client, ) + # If changing the signature of this method, make sure to apply the same + # changes to job.QueryJob.to_arrow() def to_arrow(self, progress_bar_type=None, bqstorage_client=None): """[Beta] Create a class:`pyarrow.Table` by loading all pages of a table or query. @@ -1552,6 +1554,8 @@ def _to_dataframe_iterable(self, bqstorage_client=None, dtypes=None): bqstorage_client=bqstorage_client, ) + # If changing the signature of this method, make sure to apply the same + # changes to job.QueryJob.to_dataframe() def to_dataframe(self, bqstorage_client=None, dtypes=None, progress_bar_type=None): """Create a pandas DataFrame by loading all pages of a query. diff --git a/packages/google-cloud-bigquery/tests/unit/test_signature_compatibility.py b/packages/google-cloud-bigquery/tests/unit/test_signature_compatibility.py new file mode 100644 index 000000000000..6002ae3e87c9 --- /dev/null +++ b/packages/google-cloud-bigquery/tests/unit/test_signature_compatibility.py @@ -0,0 +1,51 @@ +# Copyright 2019 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import inspect + +import pytest + + +@pytest.fixture +def query_job_class(): + from google.cloud.bigquery.job import QueryJob + + return QueryJob + + +@pytest.fixture +def row_iterator_class(): + from google.cloud.bigquery.table import RowIterator + + return RowIterator + + +@pytest.mark.skipif( + not hasattr(inspect, "signature"), + reason="inspect.signature() is not availalbe in older Python versions", +) +def test_to_arrow_method_signatures_match(query_job_class, row_iterator_class): + sig = inspect.signature(query_job_class.to_arrow) + sig2 = inspect.signature(row_iterator_class.to_arrow) + assert sig == sig2 + + +@pytest.mark.skipif( + not hasattr(inspect, "signature"), + reason="inspect.signature() is not availalbe in older Python versions", +) +def test_to_dataframe_method_signatures_match(query_job_class, row_iterator_class): + sig = inspect.signature(query_job_class.to_dataframe) + sig2 = inspect.signature(row_iterator_class.to_dataframe) + assert sig == sig2 From 683fbd6c3584b0389154df07e5212ff51e1172f7 Mon Sep 17 00:00:00 2001 From: Peter Lamut Date: Wed, 17 Jul 2019 19:08:26 +0200 Subject: [PATCH 0618/2016] Fix several pytest "skip if" markers (#8694) "skipif" should be used instead of "skipIf", the latter is the thing from the unittest nodule. --- .../tests/unit/test__pandas_helpers.py | 42 +++++++++---------- .../tests/unit/test_magics.py | 16 +++---- 2 files changed, 29 insertions(+), 29 deletions(-) diff --git a/packages/google-cloud-bigquery/tests/unit/test__pandas_helpers.py b/packages/google-cloud-bigquery/tests/unit/test__pandas_helpers.py index 62902cd7a71b..13cfab71e74c 100644 --- a/packages/google-cloud-bigquery/tests/unit/test__pandas_helpers.py +++ b/packages/google-cloud-bigquery/tests/unit/test__pandas_helpers.py @@ -78,7 +78,7 @@ def all_(*functions): return functools.partial(do_all, functions) -@pytest.mark.skipIf(pyarrow is None, "Requires `pyarrow`") +@pytest.mark.skipif(pyarrow is None, reason="Requires `pyarrow`") def test_is_datetime(): assert is_datetime(pyarrow.timestamp("us", tz=None)) assert not is_datetime(pyarrow.timestamp("ms", tz=None)) @@ -242,7 +242,7 @@ def test_all_(): ("UNKNOWN_TYPE", "REPEATED", is_none), ], ) -@pytest.mark.skipIf(pyarrow is None, "Requires `pyarrow`") +@pytest.mark.skipif(pyarrow is None, reason="Requires `pyarrow`") def test_bq_to_arrow_data_type(module_under_test, bq_type, bq_mode, is_correct_type): field = schema.SchemaField("ignored_name", bq_type, mode=bq_mode) actual = module_under_test.bq_to_arrow_data_type(field) @@ -250,7 +250,7 @@ def test_bq_to_arrow_data_type(module_under_test, bq_type, bq_mode, is_correct_t @pytest.mark.parametrize("bq_type", ["RECORD", "record", "STRUCT", "struct"]) -@pytest.mark.skipIf(pyarrow is None, "Requires `pyarrow`") +@pytest.mark.skipif(pyarrow is None, reason="Requires `pyarrow`") def test_bq_to_arrow_data_type_w_struct(module_under_test, bq_type): fields = ( schema.SchemaField("field01", "STRING"), @@ -294,7 +294,7 @@ def test_bq_to_arrow_data_type_w_struct(module_under_test, bq_type): @pytest.mark.parametrize("bq_type", ["RECORD", "record", "STRUCT", "struct"]) -@pytest.mark.skipIf(pyarrow is None, "Requires `pyarrow`") +@pytest.mark.skipif(pyarrow is None, reason="Requires `pyarrow`") def test_bq_to_arrow_data_type_w_array_struct(module_under_test, bq_type): fields = ( schema.SchemaField("field01", "STRING"), @@ -338,7 +338,7 @@ def test_bq_to_arrow_data_type_w_array_struct(module_under_test, bq_type): assert actual.value_type.equals(expected_value_type) -@pytest.mark.skipIf(pyarrow is None, "Requires `pyarrow`") +@pytest.mark.skipif(pyarrow is None, reason="Requires `pyarrow`") def test_bq_to_arrow_data_type_w_struct_unknown_subfield(module_under_test): fields = ( schema.SchemaField("field1", "STRING"), @@ -434,8 +434,8 @@ def test_bq_to_arrow_data_type_w_struct_unknown_subfield(module_under_test): ), ], ) -@pytest.mark.skipIf(pandas is None, "Requires `pandas`") -@pytest.mark.skipIf(pyarrow is None, "Requires `pyarrow`") +@pytest.mark.skipif(pandas is None, reason="Requires `pandas`") +@pytest.mark.skipif(pyarrow is None, reason="Requires `pyarrow`") def test_bq_to_arrow_array_w_nullable_scalars(module_under_test, bq_type, rows): series = pandas.Series(rows, dtype="object") bq_field = schema.SchemaField("field_name", bq_type) @@ -444,8 +444,8 @@ def test_bq_to_arrow_array_w_nullable_scalars(module_under_test, bq_type, rows): assert rows == roundtrip -@pytest.mark.skipIf(pandas is None, "Requires `pandas`") -@pytest.mark.skipIf(pyarrow is None, "Requires `pyarrow`") +@pytest.mark.skipif(pandas is None, reason="Requires `pandas`") +@pytest.mark.skipif(pyarrow is None, reason="Requires `pyarrow`") def test_bq_to_arrow_array_w_arrays(module_under_test): rows = [[1, 2, 3], [], [4, 5, 6]] series = pandas.Series(rows, dtype="object") @@ -456,8 +456,8 @@ def test_bq_to_arrow_array_w_arrays(module_under_test): @pytest.mark.parametrize("bq_type", ["RECORD", "record", "STRUCT", "struct"]) -@pytest.mark.skipIf(pandas is None, "Requires `pandas`") -@pytest.mark.skipIf(pyarrow is None, "Requires `pyarrow`") +@pytest.mark.skipif(pandas is None, reason="Requires `pandas`") +@pytest.mark.skipif(pyarrow is None, reason="Requires `pyarrow`") def test_bq_to_arrow_array_w_structs(module_under_test, bq_type): rows = [ {"int_col": 123, "string_col": "abc"}, @@ -478,8 +478,8 @@ def test_bq_to_arrow_array_w_structs(module_under_test, bq_type): assert rows == roundtrip -@pytest.mark.skipIf(pandas is None, "Requires `pandas`") -@pytest.mark.skipIf(pyarrow is None, "Requires `pyarrow`") +@pytest.mark.skipif(pandas is None, reason="Requires `pandas`") +@pytest.mark.skipif(pyarrow is None, reason="Requires `pyarrow`") def test_bq_to_arrow_array_w_special_floats(module_under_test): bq_field = schema.SchemaField("field_name", "FLOAT64") rows = [float("-inf"), float("nan"), float("inf"), None] @@ -493,7 +493,7 @@ def test_bq_to_arrow_array_w_special_floats(module_under_test): assert roundtrip[3] is None -@pytest.mark.skipIf(pyarrow is None, "Requires `pyarrow`") +@pytest.mark.skipif(pyarrow is None, reason="Requires `pyarrow`") def test_bq_to_arrow_schema_w_unknown_type(module_under_test): fields = ( schema.SchemaField("field1", "STRING"), @@ -506,8 +506,8 @@ def test_bq_to_arrow_schema_w_unknown_type(module_under_test): assert actual is None -@pytest.mark.skipIf(pandas is None, "Requires `pandas`") -@pytest.mark.skipIf(pyarrow is None, "Requires `pyarrow`") +@pytest.mark.skipif(pandas is None, reason="Requires `pandas`") +@pytest.mark.skipif(pyarrow is None, reason="Requires `pyarrow`") def test_dataframe_to_arrow_w_required_fields(module_under_test): bq_schema = ( schema.SchemaField("field01", "STRING", mode="REQUIRED"), @@ -561,8 +561,8 @@ def test_dataframe_to_arrow_w_required_fields(module_under_test): assert not arrow_field.nullable -@pytest.mark.skipIf(pandas is None, "Requires `pandas`") -@pytest.mark.skipIf(pyarrow is None, "Requires `pyarrow`") +@pytest.mark.skipif(pandas is None, reason="Requires `pandas`") +@pytest.mark.skipif(pyarrow is None, reason="Requires `pyarrow`") def test_dataframe_to_arrow_w_unknown_type(module_under_test): bq_schema = ( schema.SchemaField("field00", "UNKNOWN_TYPE"), @@ -594,7 +594,7 @@ def test_dataframe_to_arrow_w_unknown_type(module_under_test): assert arrow_schema[3].name == "field03" -@pytest.mark.skipIf(pandas is None, "Requires `pandas`") +@pytest.mark.skipif(pandas is None, reason="Requires `pandas`") def test_dataframe_to_parquet_without_pyarrow(module_under_test, monkeypatch): monkeypatch.setattr(module_under_test, "pyarrow", None) with pytest.raises(ValueError) as exc_context: @@ -602,8 +602,8 @@ def test_dataframe_to_parquet_without_pyarrow(module_under_test, monkeypatch): assert "pyarrow is required" in str(exc_context.value) -@pytest.mark.skipIf(pandas is None, "Requires `pandas`") -@pytest.mark.skipIf(pyarrow is None, "Requires `pyarrow`") +@pytest.mark.skipif(pandas is None, reason="Requires `pandas`") +@pytest.mark.skipif(pyarrow is None, reason="Requires `pyarrow`") def test_dataframe_to_parquet_w_missing_columns(module_under_test, monkeypatch): with pytest.raises(ValueError) as exc_context: module_under_test.dataframe_to_parquet( diff --git a/packages/google-cloud-bigquery/tests/unit/test_magics.py b/packages/google-cloud-bigquery/tests/unit/test_magics.py index ab5b14174a7a..6e8c941bbc02 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_magics.py +++ b/packages/google-cloud-bigquery/tests/unit/test_magics.py @@ -256,8 +256,8 @@ def test__make_bqstorage_client_false(): assert got is None -@pytest.mark.skipIf( - bigquery_storage_v1beta1 is None, "Requires `google-cloud-bigquery-storage`" +@pytest.mark.skipif( + bigquery_storage_v1beta1 is None, reason="Requires `google-cloud-bigquery-storage`" ) def test__make_bqstorage_client_true(): credentials_mock = mock.create_autospec( @@ -411,8 +411,8 @@ def test_bigquery_magic_clears_display_in_verbose_mode(): @pytest.mark.usefixtures("ipython_interactive") -@pytest.mark.skipIf( - bigquery_storage_v1beta1 is None, "Requires `google-cloud-bigquery-storage`" +@pytest.mark.skipif( + bigquery_storage_v1beta1 is None, reason="Requires `google-cloud-bigquery-storage`" ) def test_bigquery_magic_with_bqstorage_from_argument(monkeypatch): ip = IPython.get_ipython() @@ -461,8 +461,8 @@ def test_bigquery_magic_with_bqstorage_from_argument(monkeypatch): @pytest.mark.usefixtures("ipython_interactive") -@pytest.mark.skipIf( - bigquery_storage_v1beta1 is None, "Requires `google-cloud-bigquery-storage`" +@pytest.mark.skipif( + bigquery_storage_v1beta1 is None, reason="Requires `google-cloud-bigquery-storage`" ) def test_bigquery_magic_with_bqstorage_from_context(monkeypatch): ip = IPython.get_ipython() @@ -511,8 +511,8 @@ def test_bigquery_magic_with_bqstorage_from_context(monkeypatch): @pytest.mark.usefixtures("ipython_interactive") -@pytest.mark.skipIf( - bigquery_storage_v1beta1 is None, "Requires `google-cloud-bigquery-storage`" +@pytest.mark.skipif( + bigquery_storage_v1beta1 is None, reason="Requires `google-cloud-bigquery-storage`" ) def test_bigquery_magic_without_bqstorage(monkeypatch): ip = IPython.get_ipython() From 346eccee56fdcd7548f2d8792340991e109646b0 Mon Sep 17 00:00:00 2001 From: Bu Sun Kim <8822365+busunkim96@users.noreply.github.com> Date: Fri, 19 Jul 2019 14:45:47 -0700 Subject: [PATCH 0619/2016] Link to googleapis.dev documentation in READMEs. (#8705) --- packages/google-cloud-bigquery/README.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/packages/google-cloud-bigquery/README.rst b/packages/google-cloud-bigquery/README.rst index a3f0687eef9f..67da531a6597 100644 --- a/packages/google-cloud-bigquery/README.rst +++ b/packages/google-cloud-bigquery/README.rst @@ -22,7 +22,7 @@ processing power of Google's infrastructure. .. |compat_check_github| image:: https://python-compatibility-tools.appspot.com/one_badge_image?package=git%2Bgit%3A//github.com/googleapis/google-cloud-python.git%23subdirectory%3Dbigquery :target: https://python-compatibility-tools.appspot.com/one_badge_target?package=git%2Bgit%3A//github.com/googleapis/google-cloud-python.git%23subdirectory%3Dbigquery .. _BigQuery: https://cloud.google.com/bigquery/what-is-bigquery -.. _Client Library Documentation: https://googleapis.github.io/google-cloud-python/latest/bigquery/index.html +.. _Client Library Documentation: https://googleapis.dev/python/bigquery/latest .. _Product Documentation: https://cloud.google.com/bigquery/docs/reference/v2/ Quick Start @@ -38,7 +38,7 @@ In order to use this library, you first need to go through the following steps: .. _Select or create a Cloud Platform project.: https://console.cloud.google.com/project .. _Enable billing for your project.: https://cloud.google.com/billing/docs/how-to/modify-project#enable_billing_for_a_project .. _Enable the Google Cloud BigQuery API.: https://cloud.google.com/bigquery -.. _Setup Authentication.: https://googleapis.github.io/google-cloud-python/latest/core/auth.html +.. _Setup Authentication.: https://googleapis.dev/python/google-api-core/latest/auth.html Installation ~~~~~~~~~~~~ From 98a819e11fa04e577491e8b8f80a86e4ae8dd781 Mon Sep 17 00:00:00 2001 From: Peter Lamut Date: Mon, 22 Jul 2019 19:35:22 +0200 Subject: [PATCH 0620/2016] Set IPython user agent when running queries with IPython cell magic (#8713) * Set ipython user agent when in IPython cell * Revert year bump in license headers * Fix false positive failure in pyarrow warning test Any additional warnings on top of the expected pyarrow warning would cause one of the tests to fail. This commit makes that test more robust by only focusing on the existence of the specific warning of interest. * Modify test - *all* warnings should be pyarrow's --- .../google/cloud/bigquery/magics.py | 4 +++ .../tests/unit/test_client.py | 8 +++--- .../tests/unit/test_magics.py | 25 +++++++++++++++++++ 3 files changed, 33 insertions(+), 4 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/magics.py b/packages/google-cloud-bigquery/google/cloud/bigquery/magics.py index 0acde4f21b5f..9bf2019c5c2e 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/magics.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/magics.py @@ -144,6 +144,7 @@ except ImportError: # pragma: NO COVER bigquery_storage_v1beta1 = None +from google.api_core import client_info import google.auth from google.cloud import bigquery from google.cloud.bigquery.dbapi import _helpers @@ -398,6 +399,9 @@ def _cell_magic(line, query): project=project, credentials=context.credentials, default_query_job_config=context.default_query_job_config, + client_info=client_info.ClientInfo( + user_agent="ipython-{}".format(IPython.__version__) + ), ) if context._connection: client._connection = context._connection diff --git a/packages/google-cloud-bigquery/tests/unit/test_client.py b/packages/google-cloud-bigquery/tests/unit/test_client.py index 8ad9dc8858c6..2be40a52e1fc 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_client.py +++ b/packages/google-cloud-bigquery/tests/unit/test_client.py @@ -5353,10 +5353,10 @@ def test_load_table_from_dataframe_w_schema_wo_pyarrow(self): dataframe, self.TABLE_REF, job_config=job_config, location=self.LOCATION ) - assert len(warned) == 1 - warning = warned[0] - assert warning.category is PendingDeprecationWarning - assert "pyarrow" in str(warning) + assert warned # there should be at least one warning + for warning in warned: + assert "pyarrow" in str(warning) + assert warning.category in (DeprecationWarning, PendingDeprecationWarning) load_table_from_file.assert_called_once_with( client, diff --git a/packages/google-cloud-bigquery/tests/unit/test_magics.py b/packages/google-cloud-bigquery/tests/unit/test_magics.py index 6e8c941bbc02..1b2a4dd5932f 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_magics.py +++ b/packages/google-cloud-bigquery/tests/unit/test_magics.py @@ -320,6 +320,31 @@ def test_bigquery_magic_without_optional_arguments(monkeypatch): assert list(return_value) == list(result) # verify column names +@pytest.mark.usefixtures("ipython_interactive") +def test_bigquery_magic_default_connection_user_agent(): + ip = IPython.get_ipython() + ip.extension_manager.load_extension("google.cloud.bigquery") + magics.context._connection = None + + credentials_mock = mock.create_autospec( + google.auth.credentials.Credentials, instance=True + ) + default_patch = mock.patch( + "google.auth.default", return_value=(credentials_mock, "general-project") + ) + run_query_patch = mock.patch( + "google.cloud.bigquery.magics._run_query", autospec=True + ) + conn_patch = mock.patch("google.cloud.bigquery.client.Connection", autospec=True) + + with conn_patch as conn, run_query_patch, default_patch: + ip.run_cell_magic("bigquery", "", "SELECT 17 as num") + + client_info_arg = conn.call_args.kwargs.get("client_info") + assert client_info_arg is not None + assert client_info_arg.user_agent == "ipython-" + IPython.__version__ + + @pytest.mark.usefixtures("ipython_interactive") def test_bigquery_magic_with_legacy_sql(): ip = IPython.get_ipython() From 41e32a6715ac7d30162d7167dc6ca3fac48d08ec Mon Sep 17 00:00:00 2001 From: Peter Lamut Date: Mon, 22 Jul 2019 20:28:59 +0200 Subject: [PATCH 0621/2016] Mock external calls in one of BigQuery unit tests (#8727) --- .../google-cloud-bigquery/tests/unit/test_magics.py | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/tests/unit/test_magics.py b/packages/google-cloud-bigquery/tests/unit/test_magics.py index 1b2a4dd5932f..44e0571d1ee4 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_magics.py +++ b/packages/google-cloud-bigquery/tests/unit/test_magics.py @@ -584,9 +584,17 @@ def test_bigquery_magic_w_maximum_bytes_billed_invalid(): ip.extension_manager.load_extension("google.cloud.bigquery") magics.context._project = None + credentials_mock = mock.create_autospec( + google.auth.credentials.Credentials, instance=True + ) + default_patch = mock.patch( + "google.auth.default", return_value=(credentials_mock, "general-project") + ) + client_query_patch = mock.patch("google.cloud.bigquery.client.Client.query") + sql = "SELECT 17 AS num" - with pytest.raises(ValueError): + with pytest.raises(ValueError), default_patch, client_query_patch: ip.run_cell_magic("bigquery", "--maximum_bytes_billed=abc", sql) From e615f5500249684cca150dc2a2555c38487af862 Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Thu, 25 Jul 2019 16:54:45 -0700 Subject: [PATCH 0622/2016] Update tests to support conversion of NaN as NULL in pyarrow `0.14.*`. (#8785) * Treat NaN as NULL when converting from pandas to arrow. Fixes broken unit tests from latest pyarrow release. * Remove outdated pyarrow comment. --- packages/google-cloud-bigquery/noxfile.py | 1 - .../google-cloud-bigquery/tests/unit/test__pandas_helpers.py | 5 ++++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/packages/google-cloud-bigquery/noxfile.py b/packages/google-cloud-bigquery/noxfile.py index a9df7a67cfcc..9cfe5cc856dd 100644 --- a/packages/google-cloud-bigquery/noxfile.py +++ b/packages/google-cloud-bigquery/noxfile.py @@ -38,7 +38,6 @@ def default(session): for local_dep in LOCAL_DEPS: session.install("-e", local_dep) - # Pyarrow does not support Python 3.7 dev_install = ".[all]" session.install("-e", dev_install) diff --git a/packages/google-cloud-bigquery/tests/unit/test__pandas_helpers.py b/packages/google-cloud-bigquery/tests/unit/test__pandas_helpers.py index 13cfab71e74c..9348635f2dc6 100644 --- a/packages/google-cloud-bigquery/tests/unit/test__pandas_helpers.py +++ b/packages/google-cloud-bigquery/tests/unit/test__pandas_helpers.py @@ -488,7 +488,10 @@ def test_bq_to_arrow_array_w_special_floats(module_under_test): roundtrip = arrow_array.to_pylist() assert len(rows) == len(roundtrip) assert roundtrip[0] == float("-inf") - assert roundtrip[1] != roundtrip[1] # NaN doesn't equal itself. + # Since we are converting from pandas, NaN is treated as NULL in pyarrow + # due to pandas conventions. + # https://arrow.apache.org/docs/python/data.html#none-values-and-nan-handling + assert roundtrip[1] is None assert roundtrip[2] == float("inf") assert roundtrip[3] is None From 3f55803b38b145db49c50fce95c3dd3eaad6f427 Mon Sep 17 00:00:00 2001 From: Peter Lamut Date: Sat, 27 Jul 2019 00:56:30 +0200 Subject: [PATCH 0623/2016] Include SQL query and job ID in exception messages. (#8748) * Include SQL query and job ID in job errors The code for query format in exception messages is a modified version of the original proposal by @bencaine1 in the feature request: https://github.com/googleapis/google-cloud-python/issues/5408#issue-327761423 * Simplify amending message on job result error All GoogleAPICallError (GoogleCloudError) instances have the "message" attribute, even in Python 3, thus the additional info can be appended directly to that attribute. * Change query header text in job error messages It's not always the query that causes the error, thus this commit makes the header text more neutral. --- .../google/cloud/bigquery/job.py | 47 ++++++++++++++++--- .../tests/unit/test_job.py | 19 +++++++- 2 files changed, 59 insertions(+), 7 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/job.py b/packages/google-cloud-bigquery/google/cloud/bigquery/job.py index 5cd90ada9637..084181cdf757 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/job.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/job.py @@ -2846,6 +2846,36 @@ def _blocking_poll(self, timeout=None): self._done_timeout = timeout super(QueryJob, self)._blocking_poll(timeout=timeout) + @staticmethod + def _format_for_exception(query, job_id): + """Format a query for the output in exception message. + + Args: + query (str): The SQL query to format. + job_id (str): The ID of the job that ran the query. + + Returns: (str) + A formatted query text. + """ + template = "\n\n(job ID: {job_id})\n\n{header}\n\n{ruler}\n{body}\n{ruler}" + + lines = query.splitlines() + max_line_len = max(len(l) for l in lines) + + header = "-----Query Job SQL Follows-----" + header = "{:^{total_width}}".format(header, total_width=max_line_len + 5) + + # Print out a "ruler" above and below the SQL so we can judge columns. + # Left pad for the line numbers (4 digits plus ":"). + ruler = " |" + " . |" * (max_line_len // 10) + + # Put line numbers next to the SQL. + body = "\n".join( + "{:4}:{}".format(n, line) for n, line in enumerate(lines, start=1) + ) + + return template.format(job_id=job_id, header=header, ruler=ruler, body=body) + def result(self, timeout=None, page_size=None, retry=DEFAULT_RETRY): """Start the job and wait for it to complete and get the result. @@ -2874,12 +2904,17 @@ def result(self, timeout=None, page_size=None, retry=DEFAULT_RETRY): concurrent.futures.TimeoutError: If the job did not complete in the given timeout. """ - super(QueryJob, self).result(timeout=timeout) - # Return an iterator instead of returning the job. - if not self._query_results: - self._query_results = self._client._get_query_results( - self.job_id, retry, project=self.project, location=self.location - ) + try: + super(QueryJob, self).result(timeout=timeout) + + # Return an iterator instead of returning the job. + if not self._query_results: + self._query_results = self._client._get_query_results( + self.job_id, retry, project=self.project, location=self.location + ) + except exceptions.GoogleCloudError as exc: + exc.message += self._format_for_exception(self.query, self.job_id) + raise # If the query job is complete but there are no query results, this was # special job, such as a DDL query. Return an empty result set to diff --git a/packages/google-cloud-bigquery/tests/unit/test_job.py b/packages/google-cloud-bigquery/tests/unit/test_job.py index dcc90b2d96a8..1e75373c84b6 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_job.py +++ b/packages/google-cloud-bigquery/tests/unit/test_job.py @@ -14,6 +14,7 @@ import copy import json +import textwrap import unittest import mock @@ -4256,8 +4257,15 @@ def test_result_w_page_size(self): def test_result_error(self): from google.cloud import exceptions + query = textwrap.dedent( + """ + SELECT foo, bar + FROM table_baz + WHERE foo == bar""" + ) + client = _make_client(project=self.PROJECT) - job = self._make_one(self.JOB_ID, self.QUERY, client) + job = self._make_one(self.JOB_ID, query, client) error_result = { "debugInfo": "DEBUG", "location": "LOCATION", @@ -4277,6 +4285,15 @@ def test_result_error(self): self.assertIsInstance(exc_info.exception, exceptions.GoogleCloudError) self.assertEqual(exc_info.exception.code, http_client.BAD_REQUEST) + full_text = str(exc_info.exception) + + assert job.job_id in full_text + assert "Query Job SQL Follows" in full_text + + for i, line in enumerate(query.splitlines(), start=1): + expected_line = "{}:{}".format(i, line) + assert expected_line in full_text + def test_begin_w_bound_client(self): from google.cloud.bigquery.dataset import DatasetReference from google.cloud.bigquery.job import QueryJobConfig From df83f824eab9692f56b7a7b9a5a3fe98bee5e89f Mon Sep 17 00:00:00 2001 From: Leonid Emar-Kar <46078689+Emar-Kar@users.noreply.github.com> Date: Sat, 27 Jul 2019 02:06:10 +0300 Subject: [PATCH 0624/2016] Allow using TableListItem to construct a Table object. (#8738) * BigQuery: Table creation using the TableListItem The TableListItem is treated as a reference. No other properties are copied to the Table object. --- .../google/cloud/bigquery/table.py | 3 +- .../tests/unit/test_table.py | 41 +++++++++++++++++++ 2 files changed, 42 insertions(+), 2 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py index 5b1de345c068..b1fe36c3dbed 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py @@ -388,8 +388,7 @@ class Table(object): } def __init__(self, table_ref, schema=None): - if isinstance(table_ref, six.string_types): - table_ref = TableReference.from_string(table_ref) + table_ref = _table_arg_to_table_ref(table_ref) self._properties = {"tableReference": table_ref.to_api_repr(), "labels": {}} # Let the @property do validation. if schema is not None: diff --git a/packages/google-cloud-bigquery/tests/unit/test_table.py b/packages/google-cloud-bigquery/tests/unit/test_table.py index a141d8f38abf..e14420846846 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_table.py +++ b/packages/google-cloud-bigquery/tests/unit/test_table.py @@ -526,6 +526,47 @@ def test_ctor_string(self): self.assertEqual(table.dataset_id, "some_dset") self.assertEqual(table.table_id, "some_tbl") + def test_ctor_tablelistitem(self): + from google.cloud.bigquery.table import Table, TableListItem + + import datetime + from google.cloud._helpers import _millis, UTC + + self.WHEN_TS = 1437767599.125 + self.EXP_TIME = datetime.datetime(2015, 8, 1, 23, 59, 59, tzinfo=UTC) + + project = "test-project" + dataset_id = "test_dataset" + table_id = "coffee_table" + resource = { + "creationTime": self.WHEN_TS * 1000, + "expirationTime": _millis(self.EXP_TIME), + "kind": "bigquery#table", + "id": "{}:{}.{}".format(project, dataset_id, table_id), + "tableReference": { + "projectId": project, + "datasetId": dataset_id, + "tableId": table_id, + }, + "friendlyName": "Mahogany Coffee Table", + "type": "TABLE", + "timePartitioning": { + "type": "DAY", + "field": "mycolumn", + "expirationMs": "10000", + }, + "labels": {"some-stuff": "this-is-a-label"}, + "clustering": {"fields": ["string"]}, + } + + table_list_item = TableListItem(resource) + table = Table(table_list_item) + + self.assertIsNone(table.created) + self.assertEqual(table.reference.project, project) + self.assertEqual(table.reference.dataset_id, dataset_id) + self.assertEqual(table.reference.table_id, table_id) + def test_ctor_string_wo_project_id(self): with pytest.raises(ValueError): # Project ID is missing. From 5ab307370c355bc17f3d86e1a5691b12de5f2057 Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Mon, 29 Jul 2019 09:36:26 -0700 Subject: [PATCH 0625/2016] Use configurable bucket name for GCS samples data in systems tests. (#8783) * Use configurable bucket name for GCS samples data in systems tests. This allows the VPC-SC team to use their private mirror which is within the security boundary when testing BigQuery VPC-SC support. * Blacken --- packages/google-cloud-bigquery/tests/system.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/tests/system.py b/packages/google-cloud-bigquery/tests/system.py index f234a431d51f..fd9efa7752cf 100644 --- a/packages/google-cloud-bigquery/tests/system.py +++ b/packages/google-cloud-bigquery/tests/system.py @@ -107,6 +107,11 @@ ), ] +# The VPC-SC team maintains a mirror of the GCS bucket used for code +# samples. The public bucket crosses the configured security boundary. +# See: https://github.com/googleapis/google-cloud-python/issues/8550 +SAMPLES_BUCKET = os.environ.get("GCLOUD_TEST_SAMPLES_BUCKET", "cloud-samples-data") + retry_storage_errors = RetryErrors( (TooManyRequests, InternalServerError, ServiceUnavailable) ) @@ -1877,7 +1882,9 @@ def test_create_routine(self): language="JAVASCRIPT", type_="SCALAR_FUNCTION", return_type=float64_type, - imported_libraries=["gs://cloud-samples-data/bigquery/udfs/max-value.js"], + imported_libraries=[ + "gs://{}/bigquery/udfs/max-value.js".format(SAMPLES_BUCKET) + ], ) routine.arguments = [ bigquery.RoutineArgument( From e00a3d2f744113b7bc16bec08748934835198674 Mon Sep 17 00:00:00 2001 From: Peter Lamut Date: Tue, 30 Jul 2019 08:13:18 +0200 Subject: [PATCH 0626/2016] BigQuery: Add StandardSqlDataTypes enum to BigQuery (#8782) * Add StandardSqlDataTypes enum to BigQuery This is a convenience enum that contains scalar SQL data types constants (a subsset of types defined in the gapic enum generated from the .proto definitions). * Move StandardSqlDataTypes tests under enums/ dir * Treat GEOGRAPHY as scalar SQL type in enum * Use more descriptive name in generator expression * Replace enum out of sync warning with a loud test * Add Enums section to BigQuery API reference docs --- .../google-cloud-bigquery/docs/reference.rst | 10 +++ .../google/cloud/bigquery/__init__.py | 2 + .../google/cloud/bigquery/enums.py | 69 ++++++++++++++++++ .../tests/unit/enums/__init__.py | 13 ++++ .../enums/test_standard_sql_data_types.py | 73 +++++++++++++++++++ 5 files changed, 167 insertions(+) create mode 100644 packages/google-cloud-bigquery/google/cloud/bigquery/enums.py create mode 100644 packages/google-cloud-bigquery/tests/unit/enums/__init__.py create mode 100644 packages/google-cloud-bigquery/tests/unit/enums/test_standard_sql_data_types.py diff --git a/packages/google-cloud-bigquery/docs/reference.rst b/packages/google-cloud-bigquery/docs/reference.rst index 39b3e8407d30..e01443808795 100644 --- a/packages/google-cloud-bigquery/docs/reference.rst +++ b/packages/google-cloud-bigquery/docs/reference.rst @@ -164,6 +164,16 @@ Magics magics + +Enums +===== + +.. autosummary:: + :toctree: generated + + enums.StandardSqlDataTypes + + Additional Types ================ diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/__init__.py b/packages/google-cloud-bigquery/google/cloud/bigquery/__init__.py index b84051fc6be1..c41ceb6b0306 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/__init__.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/__init__.py @@ -36,6 +36,7 @@ from google.cloud.bigquery.dataset import AccessEntry from google.cloud.bigquery.dataset import Dataset from google.cloud.bigquery.dataset import DatasetReference +from google.cloud.bigquery.enums import StandardSqlDataTypes from google.cloud.bigquery.external_config import ExternalConfig from google.cloud.bigquery.external_config import BigtableOptions from google.cloud.bigquery.external_config import BigtableColumnFamily @@ -130,6 +131,7 @@ "Encoding", "QueryPriority", "SchemaUpdateOption", + "StandardSqlDataTypes", "SourceFormat", "WriteDisposition", ] diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/enums.py b/packages/google-cloud-bigquery/google/cloud/bigquery/enums.py new file mode 100644 index 000000000000..098a918e474f --- /dev/null +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/enums.py @@ -0,0 +1,69 @@ +# Copyright 2019 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import re + +import enum +import six + +from google.cloud.bigquery_v2.gapic import enums as gapic_enums + + +_SQL_SCALAR_TYPES = frozenset( + ( + "INT64", + "BOOL", + "FLOAT64", + "STRING", + "BYTES", + "TIMESTAMP", + "DATE", + "TIME", + "DATETIME", + "GEOGRAPHY", + "NUMERIC", + ) +) + +_SQL_NONSCALAR_TYPES = frozenset(("TYPE_KIND_UNSPECIFIED", "ARRAY", "STRUCT")) + + +def _make_sql_scalars_enum(): + """Create an enum based on a gapic enum containing only SQL scalar types.""" + + new_enum = enum.Enum( + "StandardSqlDataTypes", + ( + (member.name, member.value) + for member in gapic_enums.StandardSqlDataType.TypeKind + if member.name in _SQL_SCALAR_TYPES + ), + ) + + # make sure the docstring for the new enum is also correct + orig_doc = gapic_enums.StandardSqlDataType.TypeKind.__doc__ + skip_pattern = re.compile( + "|".join(_SQL_NONSCALAR_TYPES) + + "|because a JSON object" # the second description line of STRUCT member + ) + + new_doc = "\n".join( + six.moves.filterfalse(skip_pattern.search, orig_doc.splitlines()) + ) + new_enum.__doc__ = "An Enum of scalar SQL types.\n" + new_doc + + return new_enum + + +StandardSqlDataTypes = _make_sql_scalars_enum() diff --git a/packages/google-cloud-bigquery/tests/unit/enums/__init__.py b/packages/google-cloud-bigquery/tests/unit/enums/__init__.py new file mode 100644 index 000000000000..c5cce043083c --- /dev/null +++ b/packages/google-cloud-bigquery/tests/unit/enums/__init__.py @@ -0,0 +1,13 @@ +# Copyright 2019, Google LLC All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/packages/google-cloud-bigquery/tests/unit/enums/test_standard_sql_data_types.py b/packages/google-cloud-bigquery/tests/unit/enums/test_standard_sql_data_types.py new file mode 100644 index 000000000000..6fa4f057fb98 --- /dev/null +++ b/packages/google-cloud-bigquery/tests/unit/enums/test_standard_sql_data_types.py @@ -0,0 +1,73 @@ +# Copyright 2019 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import pytest + + +@pytest.fixture +def module_under_test(): + from google.cloud.bigquery import enums + + return enums + + +@pytest.fixture +def enum_under_test(): + from google.cloud.bigquery.enums import StandardSqlDataTypes + + return StandardSqlDataTypes + + +@pytest.fixture +def gapic_enum(): + """The referential autogenerated enum the enum under test is based on.""" + from google.cloud.bigquery_v2.gapic.enums import StandardSqlDataType + + return StandardSqlDataType.TypeKind + + +def test_all_gapic_enum_members_are_known(module_under_test, gapic_enum): + gapic_names = set(type_.name for type_ in gapic_enum) + anticipated_names = ( + module_under_test._SQL_SCALAR_TYPES | module_under_test._SQL_NONSCALAR_TYPES + ) + assert not (gapic_names - anticipated_names) # no unhandled names + + +def test_standard_sql_types_enum_members(enum_under_test, gapic_enum): + # check the presence of a few typical SQL types + for name in ("INT64", "FLOAT64", "DATE", "BOOL", "GEOGRAPHY"): + assert name in enum_under_test.__members__ + + # the enum members must match those in the original gapic enum + for member in enum_under_test: + assert member.name in gapic_enum.__members__ + assert member.value == gapic_enum[member.name].value + + # check a few members that should *not* be copied over from the gapic enum + for name in ("STRUCT", "ARRAY"): + assert name in gapic_enum.__members__ + assert name not in enum_under_test.__members__ + + +def test_standard_sql_types_enum_docstring(enum_under_test, gapic_enum): + assert "STRUCT (int):" not in enum_under_test.__doc__ + assert "BOOL (int):" in enum_under_test.__doc__ + assert "TIME (int):" in enum_under_test.__doc__ + + # All lines in the docstring should actually come from the original docstring, + # except for the header. + assert "An Enum of scalar SQL types." in enum_under_test.__doc__ + doc_lines = enum_under_test.__doc__.splitlines() + assert set(doc_lines[1:]) <= set(gapic_enum.__doc__.splitlines()) From 5e110968e600b0b58c6f5c427811ac090d6d9d0c Mon Sep 17 00:00:00 2001 From: Peter Lamut Date: Tue, 30 Jul 2019 21:04:01 +0200 Subject: [PATCH 0627/2016] Set BQ Storage client user-agent when in Jupyter cell (#8734) * Move optional bqstorage import into function * Set BQ storage client useragent in IPython cell * Extract import patcher factory to test helpers * Use ClientInfo from gapic for BQ storage client The client is a grpc client, thus the grpc ClientInfo class should be used to configure it. * Replace fastavro with pyerror in import error msg --- .../google/cloud/bigquery/magics.py | 34 ++++-- .../tests/unit/helpers.py | 25 +++++ .../tests/unit/test_magics.py | 101 +++++++++++++----- 3 files changed, 122 insertions(+), 38 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/magics.py b/packages/google-cloud-bigquery/google/cloud/bigquery/magics.py index 9bf2019c5c2e..44596f2ef88e 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/magics.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/magics.py @@ -139,15 +139,15 @@ from IPython.core import magic_arguments except ImportError: # pragma: NO COVER raise ImportError("This module can only be loaded in IPython.") -try: - from google.cloud import bigquery_storage_v1beta1 -except ImportError: # pragma: NO COVER - bigquery_storage_v1beta1 = None from google.api_core import client_info import google.auth from google.cloud import bigquery from google.cloud.bigquery.dbapi import _helpers +import six + + +IPYTHON_USER_AGENT = "ipython-{}".format(IPython.__version__) class Context(object): @@ -399,9 +399,7 @@ def _cell_magic(line, query): project=project, credentials=context.credentials, default_query_job_config=context.default_query_job_config, - client_info=client_info.ClientInfo( - user_agent="ipython-{}".format(IPython.__version__) - ), + client_info=client_info.ClientInfo(user_agent=IPYTHON_USER_AGENT), ) if context._connection: client._connection = context._connection @@ -433,10 +431,24 @@ def _make_bqstorage_client(use_bqstorage_api, credentials): if not use_bqstorage_api: return None - if bigquery_storage_v1beta1 is None: - raise ImportError( - "Install the google-cloud-bigquery-storage and fastavro packages " + try: + from google.cloud import bigquery_storage_v1beta1 + except ImportError as err: + customized_error = ImportError( + "Install the google-cloud-bigquery-storage and pyarrow packages " "to use the BigQuery Storage API." ) + six.raise_from(customized_error, err) - return bigquery_storage_v1beta1.BigQueryStorageClient(credentials=credentials) + try: + from google.api_core.gapic_v1 import client_info as gapic_client_info + except ImportError as err: + customized_error = ImportError( + "Install the grpcio package to use the BigQuery Storage API." + ) + six.raise_from(customized_error, err) + + return bigquery_storage_v1beta1.BigQueryStorageClient( + credentials=credentials, + client_info=gapic_client_info.ClientInfo(user_agent=IPYTHON_USER_AGENT), + ) diff --git a/packages/google-cloud-bigquery/tests/unit/helpers.py b/packages/google-cloud-bigquery/tests/unit/helpers.py index 5b731a763a99..673aa8ac5f02 100644 --- a/packages/google-cloud-bigquery/tests/unit/helpers.py +++ b/packages/google-cloud-bigquery/tests/unit/helpers.py @@ -12,6 +12,9 @@ # See the License for the specific language governing permissions and # limitations under the License. +import mock +import six + def make_connection(*responses): import google.cloud.bigquery._http @@ -22,3 +25,25 @@ def make_connection(*responses): mock_conn.user_agent = "testing 1.2.3" mock_conn.api_request.side_effect = list(responses) + [NotFound("miss")] return mock_conn + + +def maybe_fail_import(predicate): + """Create and return a patcher that conditionally makes an import fail. + + Args: + predicate (Callable[[...], bool]): A callable that, if it returns `True`, + triggers an `ImportError`. It must accept the same arguments as the + built-in `__import__` function. + https://docs.python.org/3/library/functions.html#__import__ + + Returns: + A mock patcher object that can be used to enable patched import behavior. + """ + orig_import = six.moves.builtins.__import__ + + def custom_import(name, globals=None, locals=None, fromlist=(), level=0): + if predicate(name, globals, locals, fromlist, level): + raise ImportError + return orig_import(name, globals, locals, fromlist, level) + + return mock.patch.object(six.moves.builtins, "__import__", new=custom_import) diff --git a/packages/google-cloud-bigquery/tests/unit/test_magics.py b/packages/google-cloud-bigquery/tests/unit/test_magics.py index 44e0571d1ee4..760b6ccf568d 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_magics.py +++ b/packages/google-cloud-bigquery/tests/unit/test_magics.py @@ -42,6 +42,7 @@ from google.cloud.bigquery import table from google.cloud.bigquery import magics from tests.unit.helpers import make_connection +from tests.unit.helpers import maybe_fail_import pytestmark = pytest.mark.skipif(IPython is None, reason="Requires `ipython`") @@ -65,6 +66,30 @@ def ipython_interactive(request, ipython): yield ipython +@pytest.fixture(scope="session") +def missing_bq_storage(): + """Provide a patcher that can make the bigquery storage import to fail.""" + + def fail_if(name, globals, locals, fromlist, level): + # NOTE: *very* simplified, assuming a straightforward absolute import + return "bigquery_storage_v1beta1" in name or ( + fromlist is not None and "bigquery_storage_v1beta1" in fromlist + ) + + return maybe_fail_import(predicate=fail_if) + + +@pytest.fixture(scope="session") +def missing_grpcio_lib(): + """Provide a patcher that can make the gapic library import to fail.""" + + def fail_if(name, globals, locals, fromlist, level): + # NOTE: *very* simplified, assuming a straightforward absolute import + return "gapic_v1" in name or (fromlist is not None and "gapic_v1" in fromlist) + + return maybe_fail_import(predicate=fail_if) + + JOB_REFERENCE_RESOURCE = {"projectId": "its-a-project-eh", "jobId": "some-random-id"} TABLE_REFERENCE_RESOURCE = { "projectId": "its-a-project-eh", @@ -267,16 +292,28 @@ def test__make_bqstorage_client_true(): assert isinstance(got, bigquery_storage_v1beta1.BigQueryStorageClient) -def test__make_bqstorage_client_true_raises_import_error(monkeypatch): - monkeypatch.setattr(magics, "bigquery_storage_v1beta1", None) +def test__make_bqstorage_client_true_raises_import_error(missing_bq_storage): + credentials_mock = mock.create_autospec( + google.auth.credentials.Credentials, instance=True + ) + + with pytest.raises(ImportError) as exc_context, missing_bq_storage: + magics._make_bqstorage_client(True, credentials_mock) + + error_msg = str(exc_context.value) + assert "google-cloud-bigquery-storage" in error_msg + assert "pyarrow" in error_msg + + +def test__make_bqstorage_client_true_missing_gapic(missing_grpcio_lib): credentials_mock = mock.create_autospec( google.auth.credentials.Credentials, instance=True ) - with pytest.raises(ImportError) as exc_context: + with pytest.raises(ImportError) as exc_context, missing_grpcio_lib: magics._make_bqstorage_client(True, credentials_mock) - assert "google-cloud-bigquery-storage" in str(exc_context.value) + assert "grpcio" in str(exc_context.value) @pytest.mark.usefixtures("ipython_interactive") @@ -291,16 +328,13 @@ def test_extension_load(): @pytest.mark.usefixtures("ipython_interactive") @pytest.mark.skipif(pandas is None, reason="Requires `pandas`") -def test_bigquery_magic_without_optional_arguments(monkeypatch): +def test_bigquery_magic_without_optional_arguments(missing_bq_storage): ip = IPython.get_ipython() ip.extension_manager.load_extension("google.cloud.bigquery") magics.context.credentials = mock.create_autospec( google.auth.credentials.Credentials, instance=True ) - # Shouldn't fail when BigQuery Storage client isn't installed. - monkeypatch.setattr(magics, "bigquery_storage_v1beta1", None) - sql = "SELECT 17 AS num" result = pandas.DataFrame([17], columns=["num"]) run_query_patch = mock.patch( @@ -310,9 +344,10 @@ def test_bigquery_magic_without_optional_arguments(monkeypatch): google.cloud.bigquery.job.QueryJob, instance=True ) query_job_mock.to_dataframe.return_value = result - with run_query_patch as run_query_mock: - run_query_mock.return_value = query_job_mock + # Shouldn't fail when BigQuery Storage client isn't installed. + with run_query_patch as run_query_mock, missing_bq_storage: + run_query_mock.return_value = query_job_mock return_value = ip.run_cell_magic("bigquery", "", sql) assert isinstance(return_value, pandas.DataFrame) @@ -459,8 +494,8 @@ def test_bigquery_magic_with_bqstorage_from_argument(monkeypatch): bigquery_storage_v1beta1.BigQueryStorageClient, instance=True ) bqstorage_mock.return_value = bqstorage_instance_mock - monkeypatch.setattr( - magics.bigquery_storage_v1beta1, "BigQueryStorageClient", bqstorage_mock + bqstorage_client_patch = mock.patch( + "google.cloud.bigquery_storage_v1beta1.BigQueryStorageClient", bqstorage_mock ) sql = "SELECT 17 AS num" @@ -472,15 +507,21 @@ def test_bigquery_magic_with_bqstorage_from_argument(monkeypatch): google.cloud.bigquery.job.QueryJob, instance=True ) query_job_mock.to_dataframe.return_value = result - with run_query_patch as run_query_mock: + with run_query_patch as run_query_mock, bqstorage_client_patch: run_query_mock.return_value = query_job_mock return_value = ip.run_cell_magic("bigquery", "--use_bqstorage_api", sql) - bqstorage_mock.assert_called_once_with(credentials=mock_credentials) - query_job_mock.to_dataframe.assert_called_once_with( - bqstorage_client=bqstorage_instance_mock - ) + assert len(bqstorage_mock.call_args_list) == 1 + kwargs = bqstorage_mock.call_args_list[0].kwargs + assert kwargs.get("credentials") is mock_credentials + client_info = kwargs.get("client_info") + assert client_info is not None + assert client_info.user_agent == "ipython-" + IPython.__version__ + + query_job_mock.to_dataframe.assert_called_once_with( + bqstorage_client=bqstorage_instance_mock + ) assert isinstance(return_value, pandas.DataFrame) @@ -509,8 +550,8 @@ def test_bigquery_magic_with_bqstorage_from_context(monkeypatch): bigquery_storage_v1beta1.BigQueryStorageClient, instance=True ) bqstorage_mock.return_value = bqstorage_instance_mock - monkeypatch.setattr( - magics.bigquery_storage_v1beta1, "BigQueryStorageClient", bqstorage_mock + bqstorage_client_patch = mock.patch( + "google.cloud.bigquery_storage_v1beta1.BigQueryStorageClient", bqstorage_mock ) sql = "SELECT 17 AS num" @@ -522,15 +563,21 @@ def test_bigquery_magic_with_bqstorage_from_context(monkeypatch): google.cloud.bigquery.job.QueryJob, instance=True ) query_job_mock.to_dataframe.return_value = result - with run_query_patch as run_query_mock: + with run_query_patch as run_query_mock, bqstorage_client_patch: run_query_mock.return_value = query_job_mock return_value = ip.run_cell_magic("bigquery", "", sql) - bqstorage_mock.assert_called_once_with(credentials=mock_credentials) - query_job_mock.to_dataframe.assert_called_once_with( - bqstorage_client=bqstorage_instance_mock - ) + assert len(bqstorage_mock.call_args_list) == 1 + kwargs = bqstorage_mock.call_args_list[0].kwargs + assert kwargs.get("credentials") is mock_credentials + client_info = kwargs.get("client_info") + assert client_info is not None + assert client_info.user_agent == "ipython-" + IPython.__version__ + + query_job_mock.to_dataframe.assert_called_once_with( + bqstorage_client=bqstorage_instance_mock + ) assert isinstance(return_value, pandas.DataFrame) @@ -554,8 +601,8 @@ def test_bigquery_magic_without_bqstorage(monkeypatch): bqstorage_mock = mock.create_autospec( bigquery_storage_v1beta1.BigQueryStorageClient ) - monkeypatch.setattr( - magics.bigquery_storage_v1beta1, "BigQueryStorageClient", bqstorage_mock + bqstorage_client_patch = mock.patch( + "google.cloud.bigquery_storage_v1beta1.BigQueryStorageClient", bqstorage_mock ) sql = "SELECT 17 AS num" @@ -567,7 +614,7 @@ def test_bigquery_magic_without_bqstorage(monkeypatch): google.cloud.bigquery.job.QueryJob, instance=True ) query_job_mock.to_dataframe.return_value = result - with run_query_patch as run_query_mock: + with run_query_patch as run_query_mock, bqstorage_client_patch: run_query_mock.return_value = query_job_mock return_value = ip.run_cell_magic("bigquery", "", sql) From 35a6674edded3c96c56ad066d1ef260977304145 Mon Sep 17 00:00:00 2001 From: Peter Lamut Date: Wed, 31 Jul 2019 22:15:00 +0200 Subject: [PATCH 0628/2016] Move maybe_fail_import() to top level test utils (#8840) * Move maybe_fail_import() to top level test utils * Install local test utils in multiple nox sessions --- packages/google-cloud-bigquery/noxfile.py | 6 ++++- .../tests/unit/helpers.py | 25 ------------------- .../tests/unit/test_magics.py | 2 +- 3 files changed, 6 insertions(+), 27 deletions(-) diff --git a/packages/google-cloud-bigquery/noxfile.py b/packages/google-cloud-bigquery/noxfile.py index 9cfe5cc856dd..37611a5ce296 100644 --- a/packages/google-cloud-bigquery/noxfile.py +++ b/packages/google-cloud-bigquery/noxfile.py @@ -20,7 +20,11 @@ import nox -LOCAL_DEPS = (os.path.join("..", "api_core[grpc]"), os.path.join("..", "core")) +LOCAL_DEPS = ( + os.path.join("..", "api_core[grpc]"), + os.path.join("..", "core"), + os.path.join("..", "test_utils"), +) BLACK_PATHS = ("docs", "google", "samples", "tests", "noxfile.py", "setup.py") diff --git a/packages/google-cloud-bigquery/tests/unit/helpers.py b/packages/google-cloud-bigquery/tests/unit/helpers.py index 673aa8ac5f02..5b731a763a99 100644 --- a/packages/google-cloud-bigquery/tests/unit/helpers.py +++ b/packages/google-cloud-bigquery/tests/unit/helpers.py @@ -12,9 +12,6 @@ # See the License for the specific language governing permissions and # limitations under the License. -import mock -import six - def make_connection(*responses): import google.cloud.bigquery._http @@ -25,25 +22,3 @@ def make_connection(*responses): mock_conn.user_agent = "testing 1.2.3" mock_conn.api_request.side_effect = list(responses) + [NotFound("miss")] return mock_conn - - -def maybe_fail_import(predicate): - """Create and return a patcher that conditionally makes an import fail. - - Args: - predicate (Callable[[...], bool]): A callable that, if it returns `True`, - triggers an `ImportError`. It must accept the same arguments as the - built-in `__import__` function. - https://docs.python.org/3/library/functions.html#__import__ - - Returns: - A mock patcher object that can be used to enable patched import behavior. - """ - orig_import = six.moves.builtins.__import__ - - def custom_import(name, globals=None, locals=None, fromlist=(), level=0): - if predicate(name, globals, locals, fromlist, level): - raise ImportError - return orig_import(name, globals, locals, fromlist, level) - - return mock.patch.object(six.moves.builtins, "__import__", new=custom_import) diff --git a/packages/google-cloud-bigquery/tests/unit/test_magics.py b/packages/google-cloud-bigquery/tests/unit/test_magics.py index 760b6ccf568d..82b7eb6a3892 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_magics.py +++ b/packages/google-cloud-bigquery/tests/unit/test_magics.py @@ -42,7 +42,7 @@ from google.cloud.bigquery import table from google.cloud.bigquery import magics from tests.unit.helpers import make_connection -from tests.unit.helpers import maybe_fail_import +from test_utils.imports import maybe_fail_import pytestmark = pytest.mark.skipif(IPython is None, reason="Requires `ipython`") From f67a6c96ca358a46e4f6193173849f92a20c6928 Mon Sep 17 00:00:00 2001 From: Peter Lamut Date: Thu, 1 Aug 2019 09:33:25 +0200 Subject: [PATCH 0629/2016] BigQuery: Hide error traceback in BigQuery cell magic (#8808) * Hide error traceback in BigQuery cell magic The traceback is an internal detail of the IPython magic, the users should only see the relevant error information. * Output BigQuery cell magic error message to stderr --- .../google/cloud/bigquery/magics.py | 12 +++++++- .../tests/unit/test_magics.py | 28 +++++++++++++++++++ 2 files changed, 39 insertions(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/magics.py b/packages/google-cloud-bigquery/google/cloud/bigquery/magics.py index 44596f2ef88e..b4ec8951b0a6 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/magics.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/magics.py @@ -130,6 +130,7 @@ from __future__ import print_function import ast +import sys import time from concurrent import futures @@ -415,11 +416,20 @@ def _cell_magic(line, query): elif args.maximum_bytes_billed is not None: value = int(args.maximum_bytes_billed) job_config.maximum_bytes_billed = value - query_job = _run_query(client, query, job_config) + + error = None + try: + query_job = _run_query(client, query, job_config) + except Exception as ex: + error = str(ex) if not args.verbose: display.clear_output() + if error: + print("\nERROR:\n", error, file=sys.stderr) + return + result = query_job.to_dataframe(bqstorage_client=bqstorage_client) if args.destination_var: IPython.get_ipython().push({args.destination_var: result}) diff --git a/packages/google-cloud-bigquery/tests/unit/test_magics.py b/packages/google-cloud-bigquery/tests/unit/test_magics.py index 82b7eb6a3892..ebe194329ec1 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_magics.py +++ b/packages/google-cloud-bigquery/tests/unit/test_magics.py @@ -32,6 +32,7 @@ except ImportError: # pragma: NO COVER IPython = None +from google.api_core import exceptions import google.auth.credentials try: @@ -862,3 +863,30 @@ def test_bigquery_magic_with_improperly_formatted_params(): with pytest.raises(SyntaxError): ip.run_cell_magic("bigquery", "--params {17}", sql) + + +@pytest.mark.usefixtures("ipython_interactive") +def test_bigquery_magic_omits_tracebacks_from_error_message(): + ip = IPython.get_ipython() + ip.extension_manager.load_extension("google.cloud.bigquery") + + credentials_mock = mock.create_autospec( + google.auth.credentials.Credentials, instance=True + ) + default_patch = mock.patch( + "google.auth.default", return_value=(credentials_mock, "general-project") + ) + + run_query_patch = mock.patch( + "google.cloud.bigquery.magics._run_query", + autospec=True, + side_effect=exceptions.BadRequest("Syntax error in SQL query"), + ) + + with run_query_patch, default_patch, io.capture_output() as captured_io: + ip.run_cell_magic("bigquery", "", "SELECT foo FROM WHERE LIMIT bar") + + output = captured_io.stderr + assert "400 Syntax error in SQL query" in output + assert "Traceback (most recent call last)" not in output + assert "Syntax error" not in captured_io.stdout From af67a5aeb041a9550d4936b236a0008f02dc048d Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Thu, 1 Aug 2019 10:59:52 -0700 Subject: [PATCH 0630/2016] Add debug logging statements to track when BQ Storage API is used. (#8838) * Add debug logging statements to track when BQ Storage API is used. * Add tests for debug logs. --- .../google/cloud/bigquery/_pandas_helpers.py | 8 ++ .../google/cloud/bigquery/table.py | 8 ++ .../tests/unit/test_table.py | 81 +++++++++++++++---- 3 files changed, 80 insertions(+), 17 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/_pandas_helpers.py b/packages/google-cloud-bigquery/google/cloud/bigquery/_pandas_helpers.py index d77aa67d5cf5..d508929e5d6a 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/_pandas_helpers.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/_pandas_helpers.py @@ -16,6 +16,7 @@ import concurrent.futures import functools +import logging import warnings from six.moves import queue @@ -39,6 +40,8 @@ from google.cloud.bigquery import schema +_LOGGER = logging.getLogger(__name__) + _NO_BQSTORAGE_ERROR = ( "The google-cloud-bigquery-storage library is not installed, " "please install google-cloud-bigquery-storage to use bqstorage features." @@ -341,6 +344,11 @@ def _download_table_bqstorage( read_options=read_options, requested_streams=requested_streams, ) + _LOGGER.debug( + "Started reading table '{}.{}.{}' with BQ Storage API session '{}'.".format( + table.project, table.dataset_id, table.table_id, session.name + ) + ) # Avoid reading rows from an empty table. if not session.streams: diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py index b1fe36c3dbed..62072cf88804 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py @@ -19,6 +19,7 @@ import copy import datetime import functools +import logging import operator import warnings @@ -56,6 +57,8 @@ from google.cloud.bigquery.external_config import ExternalConfig +_LOGGER = logging.getLogger(__name__) + _NO_BQSTORAGE_ERROR = ( "The google-cloud-bigquery-storage library is not installed, " "please install google-cloud-bigquery-storage to use bqstorage features." @@ -1426,6 +1429,11 @@ def _to_page_iterable( # with the tabledata.list API. pass + _LOGGER.debug( + "Started reading table '{}.{}.{}' with tabledata.list.".format( + self._table.project, self._table.dataset_id, self._table.table_id + ) + ) for item in tabledata_list_download(): yield item diff --git a/packages/google-cloud-bigquery/tests/unit/test_table.py b/packages/google-cloud-bigquery/tests/unit/test_table.py index e14420846846..dacaa8074f6a 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_table.py +++ b/packages/google-cloud-bigquery/tests/unit/test_table.py @@ -14,6 +14,7 @@ import itertools import json +import logging import time import unittest import warnings @@ -1445,8 +1446,16 @@ def _class_under_test(self): return RowIterator def _make_one( - self, client=None, api_request=None, path=None, schema=None, **kwargs + self, + client=None, + api_request=None, + path=None, + schema=None, + table=None, + **kwargs ): + from google.cloud.bigquery.table import TableReference + if client is None: client = _mock_client() @@ -1459,7 +1468,12 @@ def _make_one( if schema is None: schema = [] - return self._class_under_test()(client, api_request, path, schema, **kwargs) + if table is None: + table = TableReference.from_string("my-project.my_dataset.my_table") + + return self._class_under_test()( + client, api_request, path, schema, table=table, **kwargs + ) def test_constructor(self): from google.cloud.bigquery.table import _item_to_row @@ -2071,9 +2085,8 @@ def test_to_dataframe_w_empty_results(self): SchemaField("name", "STRING", mode="REQUIRED"), SchemaField("age", "INTEGER", mode="REQUIRED"), ] - path = "/foo" api_request = mock.Mock(return_value={"rows": []}) - row_iterator = self._make_one(_mock_client(), api_request, path, schema) + row_iterator = self._make_one(_mock_client(), api_request, schema=schema) df = row_iterator.to_dataframe() @@ -2081,6 +2094,23 @@ def test_to_dataframe_w_empty_results(self): self.assertEqual(len(df), 0) # verify the number of rows self.assertEqual(list(df), ["name", "age"]) # verify the column names + @unittest.skipIf(pandas is None, "Requires `pandas`") + def test_to_dataframe_logs_tabledata_list(self): + from google.cloud.bigquery.table import Table + + mock_logger = mock.create_autospec(logging.Logger) + api_request = mock.Mock(return_value={"rows": []}) + row_iterator = self._make_one( + _mock_client(), api_request, table=Table("debug-proj.debug_dset.debug_tbl") + ) + + with mock.patch("google.cloud.bigquery.table._LOGGER", mock_logger): + row_iterator.to_dataframe() + + mock_logger.debug.assert_any_call( + "Started reading table 'debug-proj.debug_dset.debug_tbl' with tabledata.list." + ) + @unittest.skipIf(pandas is None, "Requires `pandas`") def test_to_dataframe_w_various_types_nullable(self): import datetime @@ -2191,23 +2221,13 @@ def test_to_dataframe_w_bqstorage_no_streams(self): bigquery_storage_v1beta1.BigQueryStorageClient ) session = bigquery_storage_v1beta1.types.ReadSession() - session.avro_schema.schema = json.dumps( - { - "fields": [ - {"name": "colA"}, - # Not alphabetical to test column order. - {"name": "colC"}, - {"name": "colB"}, - ] - } - ) bqstorage_client.create_read_session.return_value = session row_iterator = mut.RowIterator( _mock_client(), - None, # api_request: ignored - None, # path: ignored - [ + api_request=None, + path=None, + schema=[ schema.SchemaField("colA", "IGNORED"), schema.SchemaField("colC", "IGNORED"), schema.SchemaField("colB", "IGNORED"), @@ -2220,6 +2240,33 @@ def test_to_dataframe_w_bqstorage_no_streams(self): self.assertEqual(list(got), column_names) self.assertTrue(got.empty) + @unittest.skipIf( + bigquery_storage_v1beta1 is None, "Requires `google-cloud-bigquery-storage`" + ) + @unittest.skipIf(pandas is None, "Requires `pandas`") + @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") + def test_to_dataframe_w_bqstorage_logs_session(self): + from google.cloud.bigquery.table import Table + + bqstorage_client = mock.create_autospec( + bigquery_storage_v1beta1.BigQueryStorageClient + ) + session = bigquery_storage_v1beta1.types.ReadSession() + session.name = "projects/test-proj/locations/us/sessions/SOMESESSION" + bqstorage_client.create_read_session.return_value = session + mock_logger = mock.create_autospec(logging.Logger) + row_iterator = self._make_one( + _mock_client(), table=Table("debug-proj.debug_dset.debug_tbl") + ) + + with mock.patch("google.cloud.bigquery._pandas_helpers._LOGGER", mock_logger): + row_iterator.to_dataframe(bqstorage_client=bqstorage_client) + + mock_logger.debug.assert_any_call( + "Started reading table 'debug-proj.debug_dset.debug_tbl' " + "with BQ Storage API session 'projects/test-proj/locations/us/sessions/SOMESESSION'." + ) + @unittest.skipIf(pandas is None, "Requires `pandas`") @unittest.skipIf( bigquery_storage_v1beta1 is None, "Requires `google-cloud-bigquery-storage`" From a941db3fa9d1d48885a926ad154333d3892a787f Mon Sep 17 00:00:00 2001 From: Peter Lamut Date: Mon, 5 Aug 2019 10:38:52 +0200 Subject: [PATCH 0631/2016] BigQuery: Add to_standard_sql() method to SchemaField (#8880) * Add to_standard_sql() method to SchemaField * Support standard SQL names in to_standard_sql() * Add support for ARRAY type in to_standard_sql() --- .../google/cloud/bigquery/schema.py | 62 +++++++ .../tests/unit/test_schema.py | 166 ++++++++++++++++++ 2 files changed, 228 insertions(+) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/schema.py b/packages/google-cloud-bigquery/google/cloud/bigquery/schema.py index 99fc65fc0c46..61bc0bcedfd6 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/schema.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/schema.py @@ -14,6 +14,33 @@ """Schemas for BigQuery tables / queries.""" +from google.cloud.bigquery_v2 import types + + +# SQL types reference: +# https://cloud.google.com/bigquery/data-types#legacy_sql_data_types +# https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types +LEGACY_TO_STANDARD_TYPES = { + "STRING": types.StandardSqlDataType.STRING, + "BYTES": types.StandardSqlDataType.BYTES, + "INTEGER": types.StandardSqlDataType.INT64, + "INT64": types.StandardSqlDataType.INT64, + "FLOAT": types.StandardSqlDataType.FLOAT64, + "FLOAT64": types.StandardSqlDataType.FLOAT64, + "NUMERIC": types.StandardSqlDataType.NUMERIC, + "BOOLEAN": types.StandardSqlDataType.BOOL, + "BOOL": types.StandardSqlDataType.BOOL, + "GEOGRAPHY": types.StandardSqlDataType.GEOGRAPHY, + "RECORD": types.StandardSqlDataType.STRUCT, + "STRUCT": types.StandardSqlDataType.STRUCT, + "TIMESTAMP": types.StandardSqlDataType.TIMESTAMP, + "DATE": types.StandardSqlDataType.DATE, + "TIME": types.StandardSqlDataType.TIME, + "DATETIME": types.StandardSqlDataType.DATETIME, + # no direct conversion from ARRAY, the latter is represented by mode="REPEATED" +} +"""String names of the legacy SQL types to integer codes of Standard SQL types.""" + class SchemaField(object): """Describe a single field within a table schema. @@ -146,6 +173,41 @@ def _key(self): self._fields, ) + def to_standard_sql(self): + """Return the field as the standard SQL field representation object. + + Returns: + An instance of :class:`~google.cloud.bigquery_v2.types.StandardSqlField`. + """ + sql_type = types.StandardSqlDataType() + + if self.mode == "REPEATED": + sql_type.type_kind = types.StandardSqlDataType.ARRAY + else: + sql_type.type_kind = LEGACY_TO_STANDARD_TYPES.get( + self.field_type, types.StandardSqlDataType.TYPE_KIND_UNSPECIFIED + ) + + if sql_type.type_kind == types.StandardSqlDataType.ARRAY: # noqa: E721 + array_element_type = LEGACY_TO_STANDARD_TYPES.get( + self.field_type, types.StandardSqlDataType.TYPE_KIND_UNSPECIFIED + ) + sql_type.array_element_type.type_kind = array_element_type + + # ARRAY cannot directly contain other arrays, only scalar types and STRUCTs + # https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#array-type + if array_element_type == types.StandardSqlDataType.STRUCT: # noqa: E721 + sql_type.array_element_type.struct_type.fields.extend( + field.to_standard_sql() for field in self.fields + ) + + elif sql_type.type_kind == types.StandardSqlDataType.STRUCT: # noqa: E721 + sql_type.struct_type.fields.extend( + field.to_standard_sql() for field in self.fields + ) + + return types.StandardSqlField(name=self.name, type=sql_type) + def __eq__(self, other): if not isinstance(other, SchemaField): return NotImplemented diff --git a/packages/google-cloud-bigquery/tests/unit/test_schema.py b/packages/google-cloud-bigquery/tests/unit/test_schema.py index 4694aaf63cd8..682e45895852 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_schema.py +++ b/packages/google-cloud-bigquery/tests/unit/test_schema.py @@ -24,6 +24,12 @@ def _get_target_class(): return SchemaField + @staticmethod + def _get_standard_sql_data_type_class(): + from google.cloud.bigquery_v2 import types + + return types.StandardSqlDataType + def _make_one(self, *args, **kw): return self._get_target_class()(*args, **kw) @@ -151,6 +157,166 @@ def test_fields_property(self): schema_field = self._make_one("boat", "RECORD", fields=fields) self.assertIs(schema_field.fields, fields) + def test_to_standard_sql_simple_type(self): + sql_type = self._get_standard_sql_data_type_class() + examples = ( + # a few legacy types + ("INTEGER", sql_type.INT64), + ("FLOAT", sql_type.FLOAT64), + ("BOOLEAN", sql_type.BOOL), + ("DATETIME", sql_type.DATETIME), + # a few standard types + ("INT64", sql_type.INT64), + ("FLOAT64", sql_type.FLOAT64), + ("BOOL", sql_type.BOOL), + ("GEOGRAPHY", sql_type.GEOGRAPHY), + ) + for legacy_type, standard_type in examples: + field = self._make_one("some_field", legacy_type) + standard_field = field.to_standard_sql() + self.assertEqual(standard_field.name, "some_field") + self.assertEqual(standard_field.type.type_kind, standard_type) + self.assertFalse(standard_field.type.HasField("sub_type")) + + def test_to_standard_sql_struct_type(self): + from google.cloud.bigquery_v2 import types + + # Expected result object: + # + # name: "image_usage" + # type { + # type_kind: STRUCT + # struct_type { + # fields { + # name: "image_content" + # type {type_kind: BYTES} + # } + # fields { + # name: "last_used" + # type { + # type_kind: STRUCT + # struct_type { + # fields { + # name: "date_field" + # type {type_kind: DATE} + # } + # fields { + # name: "time_field" + # type {type_kind: TIME} + # } + # } + # } + # } + # } + # } + + sql_type = self._get_standard_sql_data_type_class() + + # level 2 fields + sub_sub_field_date = types.StandardSqlField( + name="date_field", type=sql_type(type_kind=sql_type.DATE) + ) + sub_sub_field_time = types.StandardSqlField( + name="time_field", type=sql_type(type_kind=sql_type.TIME) + ) + + # level 1 fields + sub_field_struct = types.StandardSqlField( + name="last_used", type=sql_type(type_kind=sql_type.STRUCT) + ) + sub_field_struct.type.struct_type.fields.extend( + [sub_sub_field_date, sub_sub_field_time] + ) + sub_field_bytes = types.StandardSqlField( + name="image_content", type=sql_type(type_kind=sql_type.BYTES) + ) + + # level 0 (top level) + expected_result = types.StandardSqlField( + name="image_usage", type=sql_type(type_kind=sql_type.STRUCT) + ) + expected_result.type.struct_type.fields.extend( + [sub_field_bytes, sub_field_struct] + ) + + # construct legacy SchemaField object + sub_sub_field1 = self._make_one("date_field", "DATE") + sub_sub_field2 = self._make_one("time_field", "TIME") + sub_field_record = self._make_one( + "last_used", "RECORD", fields=(sub_sub_field1, sub_sub_field2) + ) + sub_field_bytes = self._make_one("image_content", "BYTES") + + for type_name in ("RECORD", "STRUCT"): + schema_field = self._make_one( + "image_usage", type_name, fields=(sub_field_bytes, sub_field_record) + ) + standard_field = schema_field.to_standard_sql() + self.assertEqual(standard_field, expected_result) + + def test_to_standard_sql_array_type_simple(self): + from google.cloud.bigquery_v2 import types + + sql_type = self._get_standard_sql_data_type_class() + + # construct expected result object + expected_sql_type = sql_type(type_kind=sql_type.ARRAY) + expected_sql_type.array_element_type.type_kind = sql_type.INT64 + expected_result = types.StandardSqlField( + name="valid_numbers", type=expected_sql_type + ) + + # construct "repeated" SchemaField object and convert to standard SQL + schema_field = self._make_one("valid_numbers", "INT64", mode="REPEATED") + standard_field = schema_field.to_standard_sql() + + self.assertEqual(standard_field, expected_result) + + def test_to_standard_sql_array_type_struct(self): + from google.cloud.bigquery_v2 import types + + sql_type = self._get_standard_sql_data_type_class() + + # define person STRUCT + name_field = types.StandardSqlField( + name="name", type=sql_type(type_kind=sql_type.STRING) + ) + age_field = types.StandardSqlField( + name="age", type=sql_type(type_kind=sql_type.INT64) + ) + person_struct = types.StandardSqlField( + name="person_info", type=sql_type(type_kind=sql_type.STRUCT) + ) + person_struct.type.struct_type.fields.extend([name_field, age_field]) + + # define expected result - an ARRAY of person structs + expected_sql_type = sql_type( + type_kind=sql_type.ARRAY, array_element_type=person_struct.type + ) + expected_result = types.StandardSqlField( + name="known_people", type=expected_sql_type + ) + + # construct legacy repeated SchemaField object + sub_field1 = self._make_one("name", "STRING") + sub_field2 = self._make_one("age", "INTEGER") + schema_field = self._make_one( + "known_people", "RECORD", fields=(sub_field1, sub_field2), mode="REPEATED" + ) + + standard_field = schema_field.to_standard_sql() + self.assertEqual(standard_field, expected_result) + + def test_to_standard_sql_unknown_type(self): + sql_type = self._get_standard_sql_data_type_class() + field = self._make_one("weird_field", "TROOLEAN") + + standard_field = field.to_standard_sql() + + self.assertEqual(standard_field.name, "weird_field") + self.assertEqual(standard_field.type.type_kind, sql_type.TYPE_KIND_UNSPECIFIED) + self.assertFalse(standard_field.type.HasField("sub_type")) + def test___eq___wrong_type(self): field = self._make_one("test", "STRING") other = object() From c31502abd2c5c05344216ae2fb8c6d4fe90a1a71 Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Mon, 5 Aug 2019 15:35:04 -0700 Subject: [PATCH 0632/2016] BigQuery: Remove redundant service account key code sample. (#8891) * Remove redundant service account key code sample. * Remove unused import. --- .../google-cloud-bigquery/docs/snippets.py | 22 ------------------- 1 file changed, 22 deletions(-) diff --git a/packages/google-cloud-bigquery/docs/snippets.py b/packages/google-cloud-bigquery/docs/snippets.py index 8697eb74e080..249fbf7baafa 100644 --- a/packages/google-cloud-bigquery/docs/snippets.py +++ b/packages/google-cloud-bigquery/docs/snippets.py @@ -26,7 +26,6 @@ import os import time -import mock import pytest import six @@ -126,27 +125,6 @@ def test_create_client_default_credentials(): assert client is not None -def test_create_client_json_credentials(): - """Create a BigQuery client with Application Default Credentials""" - with open(os.environ["GOOGLE_APPLICATION_CREDENTIALS"]) as creds_file: - creds_file_data = creds_file.read() - - open_mock = mock.mock_open(read_data=creds_file_data) - - with mock.patch("io.open", open_mock): - # [START bigquery_client_json_credentials] - from google.cloud import bigquery - - # Explicitly use service account credentials by specifying the private - # key file. All clients in google-cloud-python have this helper. - client = bigquery.Client.from_service_account_json( - "path/to/service_account.json" - ) - # [END bigquery_client_json_credentials] - - assert client is not None - - def test_list_datasets_by_label(client, to_delete): dataset_id = "list_datasets_by_label_{}".format(_millis()) dataset = bigquery.Dataset(client.dataset(dataset_id)) From ed59bcc3800250d5b48425e5af1f4f0c6e9c62e3 Mon Sep 17 00:00:00 2001 From: Yoshi Automation Bot Date: Tue, 6 Aug 2019 09:26:12 -0700 Subject: [PATCH 0633/2016] [CHANGE ME] Re-generated bigquery to pick up changes in the API or client library generator. (#8945) --- .../google/cloud/bigquery_v2/gapic/enums.py | 19 +- .../cloud/bigquery_v2/proto/model.proto | 97 ++- .../cloud/bigquery_v2/proto/model_pb2.py | 654 ++++++++++++++++-- packages/google-cloud-bigquery/synth.metadata | 10 +- 4 files changed, 695 insertions(+), 85 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery_v2/gapic/enums.py b/packages/google-cloud-bigquery/google/cloud/bigquery_v2/gapic/enums.py index e1ce20f9a130..5d95f2590785 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery_v2/gapic/enums.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery_v2/gapic/enums.py @@ -91,7 +91,7 @@ class ModelType(enum.IntEnum): MODEL_TYPE_UNSPECIFIED (int) LINEAR_REGRESSION (int): Linear regression model. LOGISTIC_REGRESSION (int): Logistic regression based classification model. - KMEANS (int): [Beta] K-means clustering model. + KMEANS (int): K-means clustering model. TENSORFLOW (int): [Beta] An imported TensorFlow model. """ @@ -115,6 +115,23 @@ class OptimizationStrategy(enum.IntEnum): BATCH_GRADIENT_DESCENT = 1 NORMAL_EQUATION = 2 + class KmeansEnums(object): + class KmeansInitializationMethod(enum.IntEnum): + """ + Indicates the method used to initialize the centroids for KMeans + clustering algorithm. + + Attributes: + KMEANS_INITIALIZATION_METHOD_UNSPECIFIED (int) + RANDOM (int): Initializes the centroids randomly. + CUSTOM (int): Initializes the centroids using data specified in + kmeans\_initialization\_column. + """ + + KMEANS_INITIALIZATION_METHOD_UNSPECIFIED = 0 + RANDOM = 1 + CUSTOM = 2 + class StandardSqlDataType(object): class TypeKind(enum.IntEnum): diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery_v2/proto/model.proto b/packages/google-cloud-bigquery/google/cloud/bigquery_v2/proto/model.proto index 2b5b6894db5b..42246e8efff2 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery_v2/proto/model.proto +++ b/packages/google-cloud-bigquery/google/cloud/bigquery_v2/proto/model.proto @@ -33,6 +33,7 @@ service ModelService { option (google.api.default_host) = "bigquery.googleapis.com"; option (google.api.oauth_scopes) = "https://www.googleapis.com/auth/bigquery," + "https://www.googleapis.com/auth/bigquery.readonly," "https://www.googleapis.com/auth/cloud-platform," "https://www.googleapis.com/auth/cloud-platform.read-only"; @@ -55,7 +56,25 @@ service ModelService { } message Model { - // Evaluation metrics for regression models. + message KmeansEnums { + // Indicates the method used to initialize the centroids for KMeans + // clustering algorithm. + enum KmeansInitializationMethod { + KMEANS_INITIALIZATION_METHOD_UNSPECIFIED = 0; + + // Initializes the centroids randomly. + RANDOM = 1; + + // Initializes the centroids using data specified in + // kmeans_initialization_column. + CUSTOM = 2; + } + + + } + + // Evaluation metrics for regression and explicit feedback type matrix + // factorization models. message RegressionMetrics { // Mean absolute error. google.protobuf.DoubleValue mean_absolute_error = 1; @@ -199,11 +218,60 @@ message Model { // Evaluation metrics for clustering models. message ClusteringMetrics { + // Message containing the information about one cluster. + message Cluster { + // Representative value of a single feature within the cluster. + message FeatureValue { + // Representative value of a categorical feature. + message CategoricalValue { + // Represents the count of a single category within the cluster. + message CategoryCount { + // The name of category. + string category = 1; + + // The count of training samples matching the category within the + // cluster. + google.protobuf.Int64Value count = 2; + } + + // Counts of all categories for the categorical feature. If there are + // more than ten categories, we return top ten (by count) and return + // one more CategoryCount with category ‘_OTHER_’ and count as + // aggregate counts of remaining categories. + repeated CategoryCount category_counts = 1; + } + + // The feature column name. + string feature_column = 1; + + oneof value { + // The numerical feature value. This is the centroid value for this + // feature. + google.protobuf.DoubleValue numerical_value = 2; + + // The categorical feature value. + CategoricalValue categorical_value = 3; + } + } + + // Centroid id. + int64 centroid_id = 1; + + // Values of highly variant features for this cluster. + repeated FeatureValue feature_values = 2; + + // Count of training data rows that were assigned to this cluster. + google.protobuf.Int64Value count = 3; + } + // Davies-Bouldin index. google.protobuf.DoubleValue davies_bouldin_index = 1; // Mean of squared distances between each sample to its cluster centroid. google.protobuf.DoubleValue mean_squared_distance = 2; + + // [Beta] Information for all clusters. + repeated Cluster clusters = 3; } // Evaluation metrics of a model. These are either computed on all training @@ -211,7 +279,8 @@ message Model { // training. These are not present for imported models. message EvaluationMetrics { oneof metrics { - // Populated for regression models. + // Populated for regression models and explicit feedback type matrix + // factorization models. RegressionMetrics regression_metrics = 1; // Populated for binary classification/classifier models. @@ -220,7 +289,7 @@ message Model { // Populated for multi-class classification/classifier models. MultiClassClassificationMetrics multi_class_classification_metrics = 3; - // [Beta] Populated for clustering models. + // Populated for clustering models. ClusteringMetrics clustering_metrics = 4; } } @@ -292,10 +361,10 @@ message Model { // training data. Only applicable for classification models. map label_class_weights = 17; - // [Beta] Distance type for clustering models. + // Distance type for clustering models. DistanceType distance_type = 20; - // [Beta] Number of clusters for clustering models. + // Number of clusters for clustering models. int64 num_clusters = 21; // [Beta] Google Cloud Storage URI from which the model was imported. Only @@ -304,6 +373,13 @@ message Model { // Optimization strategy for training linear regression models. OptimizationStrategy optimization_strategy = 23; + + // The method used to initialize the centroids for kmeans algorithm. + KmeansEnums.KmeansInitializationMethod kmeans_initialization_method = 33; + + // The column used to provide the initial centroids for kmeans algorithm + // when kmeans_initialization_method is CUSTOM. + string kmeans_initialization_column = 34; } // Information about a single iteration of the training run. @@ -336,7 +412,7 @@ message Model { // Learn rate used for this iteration. double learn_rate = 7; - // [Beta] Information about top clusters for clustering models. + // Information about top clusters for clustering models. repeated ClusterInfo cluster_infos = 8; } @@ -365,7 +441,7 @@ message Model { // Logistic regression based classification model. LOGISTIC_REGRESSION = 2; - // [Beta] K-means clustering model. + // K-means clustering model. KMEANS = 3; // [Beta] An imported TensorFlow model. @@ -513,8 +589,8 @@ message PatchModelRequest { string model_id = 3; // Patched model. - // Follows patch semantics. Missing fields are not updated. To clear a field, - // explicitly set to default value. + // Follows RFC5789 patch semantics. Missing fields are not updated. + // To clear a field, explicitly set to default value. Model model = 4; } @@ -536,7 +612,8 @@ message ListModelsRequest { // Dataset ID of the models to list. string dataset_id = 2; - // The maximum number of results per page. + // The maximum number of results to return in a single response page. + // Leverage the page tokens to iterate through the entire collection. google.protobuf.UInt32Value max_results = 3; // Page token, returned by a previous call to request the next page of diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery_v2/proto/model_pb2.py b/packages/google-cloud-bigquery/google/cloud/bigquery_v2/proto/model_pb2.py index ed82d8e4e8fd..45e6cefdf72c 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery_v2/proto/model_pb2.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery_v2/proto/model_pb2.py @@ -36,7 +36,7 @@ "\n\034com.google.cloud.bigquery.v2B\nModelProtoZ@google.golang.org/genproto/googleapis/cloud/bigquery/v2;bigquery" ), serialized_pb=_b( - '\n*google/cloud/bigquery_v2/proto/model.proto\x12\x18google.cloud.bigquery.v2\x1a\x34google/cloud/bigquery_v2/proto/model_reference.proto\x1a\x31google/cloud/bigquery_v2/proto/standard_sql.proto\x1a\x1bgoogle/protobuf/empty.proto\x1a\x1fgoogle/protobuf/timestamp.proto\x1a\x1egoogle/protobuf/wrappers.proto\x1a\x1cgoogle/api/annotations.proto\x1a\x17google/api/client.proto"\xc4,\n\x05Model\x12\x0c\n\x04\x65tag\x18\x01 \x01(\t\x12\x41\n\x0fmodel_reference\x18\x02 \x01(\x0b\x32(.google.cloud.bigquery.v2.ModelReference\x12\x15\n\rcreation_time\x18\x05 \x01(\x03\x12\x1a\n\x12last_modified_time\x18\x06 \x01(\x03\x12\x13\n\x0b\x64\x65scription\x18\x0c \x01(\t\x12\x15\n\rfriendly_name\x18\x0e \x01(\t\x12;\n\x06labels\x18\x0f \x03(\x0b\x32+.google.cloud.bigquery.v2.Model.LabelsEntry\x12\x17\n\x0f\x65xpiration_time\x18\x10 \x01(\x03\x12\x10\n\x08location\x18\r \x01(\t\x12=\n\nmodel_type\x18\x07 \x01(\x0e\x32).google.cloud.bigquery.v2.Model.ModelType\x12\x42\n\rtraining_runs\x18\t \x03(\x0b\x32+.google.cloud.bigquery.v2.Model.TrainingRun\x12\x43\n\x0f\x66\x65\x61ture_columns\x18\n \x03(\x0b\x32*.google.cloud.bigquery.v2.StandardSqlField\x12\x41\n\rlabel_columns\x18\x0b \x03(\x0b\x32*.google.cloud.bigquery.v2.StandardSqlField\x1a\xb4\x02\n\x11RegressionMetrics\x12\x39\n\x13mean_absolute_error\x18\x01 \x01(\x0b\x32\x1c.google.protobuf.DoubleValue\x12\x38\n\x12mean_squared_error\x18\x02 \x01(\x0b\x32\x1c.google.protobuf.DoubleValue\x12<\n\x16mean_squared_log_error\x18\x03 \x01(\x0b\x32\x1c.google.protobuf.DoubleValue\x12;\n\x15median_absolute_error\x18\x04 \x01(\x0b\x32\x1c.google.protobuf.DoubleValue\x12/\n\tr_squared\x18\x05 \x01(\x0b\x32\x1c.google.protobuf.DoubleValue\x1a\xef\x02\n\x1e\x41ggregateClassificationMetrics\x12/\n\tprecision\x18\x01 \x01(\x0b\x32\x1c.google.protobuf.DoubleValue\x12,\n\x06recall\x18\x02 \x01(\x0b\x32\x1c.google.protobuf.DoubleValue\x12.\n\x08\x61\x63\x63uracy\x18\x03 \x01(\x0b\x32\x1c.google.protobuf.DoubleValue\x12/\n\tthreshold\x18\x04 \x01(\x0b\x32\x1c.google.protobuf.DoubleValue\x12.\n\x08\x66\x31_score\x18\x05 \x01(\x0b\x32\x1c.google.protobuf.DoubleValue\x12.\n\x08log_loss\x18\x06 \x01(\x0b\x32\x1c.google.protobuf.DoubleValue\x12-\n\x07roc_auc\x18\x07 \x01(\x0b\x32\x1c.google.protobuf.DoubleValue\x1a\x9f\x06\n\x1b\x42inaryClassificationMetrics\x12h\n aggregate_classification_metrics\x18\x01 \x01(\x0b\x32>.google.cloud.bigquery.v2.Model.AggregateClassificationMetrics\x12w\n\x1c\x62inary_confusion_matrix_list\x18\x02 \x03(\x0b\x32Q.google.cloud.bigquery.v2.Model.BinaryClassificationMetrics.BinaryConfusionMatrix\x12\x16\n\x0epositive_label\x18\x03 \x01(\t\x12\x16\n\x0enegative_label\x18\x04 \x01(\t\x1a\xec\x03\n\x15\x42inaryConfusionMatrix\x12>\n\x18positive_class_threshold\x18\x01 \x01(\x0b\x32\x1c.google.protobuf.DoubleValue\x12\x33\n\x0etrue_positives\x18\x02 \x01(\x0b\x32\x1b.google.protobuf.Int64Value\x12\x34\n\x0f\x66\x61lse_positives\x18\x03 \x01(\x0b\x32\x1b.google.protobuf.Int64Value\x12\x33\n\x0etrue_negatives\x18\x04 \x01(\x0b\x32\x1b.google.protobuf.Int64Value\x12\x34\n\x0f\x66\x61lse_negatives\x18\x05 \x01(\x0b\x32\x1b.google.protobuf.Int64Value\x12/\n\tprecision\x18\x06 \x01(\x0b\x32\x1c.google.protobuf.DoubleValue\x12,\n\x06recall\x18\x07 \x01(\x0b\x32\x1c.google.protobuf.DoubleValue\x12.\n\x08\x66\x31_score\x18\x08 \x01(\x0b\x32\x1c.google.protobuf.DoubleValue\x12.\n\x08\x61\x63\x63uracy\x18\t \x01(\x0b\x32\x1c.google.protobuf.DoubleValue\x1a\x87\x05\n\x1fMultiClassClassificationMetrics\x12h\n aggregate_classification_metrics\x18\x01 \x01(\x0b\x32>.google.cloud.bigquery.v2.Model.AggregateClassificationMetrics\x12n\n\x15\x63onfusion_matrix_list\x18\x02 \x03(\x0b\x32O.google.cloud.bigquery.v2.Model.MultiClassClassificationMetrics.ConfusionMatrix\x1a\x89\x03\n\x0f\x43onfusionMatrix\x12:\n\x14\x63onfidence_threshold\x18\x01 \x01(\x0b\x32\x1c.google.protobuf.DoubleValue\x12\x61\n\x04rows\x18\x02 \x03(\x0b\x32S.google.cloud.bigquery.v2.Model.MultiClassClassificationMetrics.ConfusionMatrix.Row\x1aQ\n\x05\x45ntry\x12\x17\n\x0fpredicted_label\x18\x01 \x01(\t\x12/\n\nitem_count\x18\x02 \x01(\x0b\x32\x1b.google.protobuf.Int64Value\x1a\x83\x01\n\x03Row\x12\x14\n\x0c\x61\x63tual_label\x18\x01 \x01(\t\x12\x66\n\x07\x65ntries\x18\x02 \x03(\x0b\x32U.google.cloud.bigquery.v2.Model.MultiClassClassificationMetrics.ConfusionMatrix.Entry\x1a\x8c\x01\n\x11\x43lusteringMetrics\x12:\n\x14\x64\x61vies_bouldin_index\x18\x01 \x01(\x0b\x32\x1c.google.protobuf.DoubleValue\x12;\n\x15mean_squared_distance\x18\x02 \x01(\x0b\x32\x1c.google.protobuf.DoubleValue\x1a\x95\x03\n\x11\x45valuationMetrics\x12O\n\x12regression_metrics\x18\x01 \x01(\x0b\x32\x31.google.cloud.bigquery.v2.Model.RegressionMetricsH\x00\x12\x64\n\x1d\x62inary_classification_metrics\x18\x02 \x01(\x0b\x32;.google.cloud.bigquery.v2.Model.BinaryClassificationMetricsH\x00\x12m\n"multi_class_classification_metrics\x18\x03 \x01(\x0b\x32?.google.cloud.bigquery.v2.Model.MultiClassClassificationMetricsH\x00\x12O\n\x12\x63lustering_metrics\x18\x04 \x01(\x0b\x32\x31.google.cloud.bigquery.v2.Model.ClusteringMetricsH\x00\x42\t\n\x07metrics\x1a\x97\x0e\n\x0bTrainingRun\x12U\n\x10training_options\x18\x01 \x01(\x0b\x32;.google.cloud.bigquery.v2.Model.TrainingRun.TrainingOptions\x12.\n\nstart_time\x18\x08 \x01(\x0b\x32\x1a.google.protobuf.Timestamp\x12L\n\x07results\x18\x06 \x03(\x0b\x32;.google.cloud.bigquery.v2.Model.TrainingRun.IterationResult\x12M\n\x12\x65valuation_metrics\x18\x07 \x01(\x0b\x32\x31.google.cloud.bigquery.v2.Model.EvaluationMetrics\x1a\x89\x08\n\x0fTrainingOptions\x12\x16\n\x0emax_iterations\x18\x01 \x01(\x03\x12;\n\tloss_type\x18\x02 \x01(\x0e\x32(.google.cloud.bigquery.v2.Model.LossType\x12\x12\n\nlearn_rate\x18\x03 \x01(\x01\x12\x37\n\x11l1_regularization\x18\x04 \x01(\x0b\x32\x1c.google.protobuf.DoubleValue\x12\x37\n\x11l2_regularization\x18\x05 \x01(\x0b\x32\x1c.google.protobuf.DoubleValue\x12;\n\x15min_relative_progress\x18\x06 \x01(\x0b\x32\x1c.google.protobuf.DoubleValue\x12.\n\nwarm_start\x18\x07 \x01(\x0b\x32\x1a.google.protobuf.BoolValue\x12.\n\nearly_stop\x18\x08 \x01(\x0b\x32\x1a.google.protobuf.BoolValue\x12\x1b\n\x13input_label_columns\x18\t \x03(\t\x12J\n\x11\x64\x61ta_split_method\x18\n \x01(\x0e\x32/.google.cloud.bigquery.v2.Model.DataSplitMethod\x12 \n\x18\x64\x61ta_split_eval_fraction\x18\x0b \x01(\x01\x12\x19\n\x11\x64\x61ta_split_column\x18\x0c \x01(\t\x12N\n\x13learn_rate_strategy\x18\r \x01(\x0e\x32\x31.google.cloud.bigquery.v2.Model.LearnRateStrategy\x12\x1a\n\x12initial_learn_rate\x18\x10 \x01(\x01\x12o\n\x13label_class_weights\x18\x11 \x03(\x0b\x32R.google.cloud.bigquery.v2.Model.TrainingRun.TrainingOptions.LabelClassWeightsEntry\x12\x43\n\rdistance_type\x18\x14 \x01(\x0e\x32,.google.cloud.bigquery.v2.Model.DistanceType\x12\x14\n\x0cnum_clusters\x18\x15 \x01(\x03\x12\x11\n\tmodel_uri\x18\x16 \x01(\t\x12S\n\x15optimization_strategy\x18\x17 \x01(\x0e\x32\x34.google.cloud.bigquery.v2.Model.OptimizationStrategy\x1a\x38\n\x16LabelClassWeightsEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\x01:\x02\x38\x01\x1a\xd7\x03\n\x0fIterationResult\x12*\n\x05index\x18\x01 \x01(\x0b\x32\x1b.google.protobuf.Int32Value\x12\x30\n\x0b\x64uration_ms\x18\x04 \x01(\x0b\x32\x1b.google.protobuf.Int64Value\x12\x33\n\rtraining_loss\x18\x05 \x01(\x0b\x32\x1c.google.protobuf.DoubleValue\x12/\n\teval_loss\x18\x06 \x01(\x0b\x32\x1c.google.protobuf.DoubleValue\x12\x12\n\nlearn_rate\x18\x07 \x01(\x01\x12^\n\rcluster_infos\x18\x08 \x03(\x0b\x32G.google.cloud.bigquery.v2.Model.TrainingRun.IterationResult.ClusterInfo\x1a\x8b\x01\n\x0b\x43lusterInfo\x12\x13\n\x0b\x63\x65ntroid_id\x18\x01 \x01(\x03\x12\x34\n\x0e\x63luster_radius\x18\x02 \x01(\x0b\x32\x1c.google.protobuf.DoubleValue\x12\x31\n\x0c\x63luster_size\x18\x03 \x01(\x0b\x32\x1b.google.protobuf.Int64Value\x1a-\n\x0bLabelsEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\t:\x02\x38\x01"s\n\tModelType\x12\x1a\n\x16MODEL_TYPE_UNSPECIFIED\x10\x00\x12\x15\n\x11LINEAR_REGRESSION\x10\x01\x12\x17\n\x13LOGISTIC_REGRESSION\x10\x02\x12\n\n\x06KMEANS\x10\x03\x12\x0e\n\nTENSORFLOW\x10\x06"O\n\x08LossType\x12\x19\n\x15LOSS_TYPE_UNSPECIFIED\x10\x00\x12\x15\n\x11MEAN_SQUARED_LOSS\x10\x01\x12\x11\n\rMEAN_LOG_LOSS\x10\x02"H\n\x0c\x44istanceType\x12\x1d\n\x19\x44ISTANCE_TYPE_UNSPECIFIED\x10\x00\x12\r\n\tEUCLIDEAN\x10\x01\x12\n\n\x06\x43OSINE\x10\x02"z\n\x0f\x44\x61taSplitMethod\x12!\n\x1d\x44\x41TA_SPLIT_METHOD_UNSPECIFIED\x10\x00\x12\n\n\x06RANDOM\x10\x01\x12\n\n\x06\x43USTOM\x10\x02\x12\x0e\n\nSEQUENTIAL\x10\x03\x12\x0c\n\x08NO_SPLIT\x10\x04\x12\x0e\n\nAUTO_SPLIT\x10\x05"W\n\x11LearnRateStrategy\x12#\n\x1fLEARN_RATE_STRATEGY_UNSPECIFIED\x10\x00\x12\x0f\n\x0bLINE_SEARCH\x10\x01\x12\x0c\n\x08\x43ONSTANT\x10\x02"n\n\x14OptimizationStrategy\x12%\n!OPTIMIZATION_STRATEGY_UNSPECIFIED\x10\x00\x12\x1a\n\x16\x42\x41TCH_GRADIENT_DESCENT\x10\x01\x12\x13\n\x0fNORMAL_EQUATION\x10\x02"K\n\x0fGetModelRequest\x12\x12\n\nproject_id\x18\x01 \x01(\t\x12\x12\n\ndataset_id\x18\x02 \x01(\t\x12\x10\n\x08model_id\x18\x03 \x01(\t"}\n\x11PatchModelRequest\x12\x12\n\nproject_id\x18\x01 \x01(\t\x12\x12\n\ndataset_id\x18\x02 \x01(\t\x12\x10\n\x08model_id\x18\x03 \x01(\t\x12.\n\x05model\x18\x04 \x01(\x0b\x32\x1f.google.cloud.bigquery.v2.Model"N\n\x12\x44\x65leteModelRequest\x12\x12\n\nproject_id\x18\x01 \x01(\t\x12\x12\n\ndataset_id\x18\x02 \x01(\t\x12\x10\n\x08model_id\x18\x03 \x01(\t"\x82\x01\n\x11ListModelsRequest\x12\x12\n\nproject_id\x18\x01 \x01(\t\x12\x12\n\ndataset_id\x18\x02 \x01(\t\x12\x31\n\x0bmax_results\x18\x03 \x01(\x0b\x32\x1c.google.protobuf.UInt32Value\x12\x12\n\npage_token\x18\x04 \x01(\t"^\n\x12ListModelsResponse\x12/\n\x06models\x18\x01 \x03(\x0b\x32\x1f.google.cloud.bigquery.v2.Model\x12\x17\n\x0fnext_page_token\x18\x02 \x01(\t2\xb9\x04\n\x0cModelService\x12X\n\x08GetModel\x12).google.cloud.bigquery.v2.GetModelRequest\x1a\x1f.google.cloud.bigquery.v2.Model"\x00\x12i\n\nListModels\x12+.google.cloud.bigquery.v2.ListModelsRequest\x1a,.google.cloud.bigquery.v2.ListModelsResponse"\x00\x12\\\n\nPatchModel\x12+.google.cloud.bigquery.v2.PatchModelRequest\x1a\x1f.google.cloud.bigquery.v2.Model"\x00\x12U\n\x0b\x44\x65leteModel\x12,.google.cloud.bigquery.v2.DeleteModelRequest\x1a\x16.google.protobuf.Empty"\x00\x1a\xae\x01\xca\x41\x17\x62igquery.googleapis.com\xd2\x41\x90\x01https://www.googleapis.com/auth/bigquery,https://www.googleapis.com/auth/cloud-platform,https://www.googleapis.com/auth/cloud-platform.read-onlyBl\n\x1c\x63om.google.cloud.bigquery.v2B\nModelProtoZ@google.golang.org/genproto/googleapis/cloud/bigquery/v2;bigqueryb\x06proto3' + '\n*google/cloud/bigquery_v2/proto/model.proto\x12\x18google.cloud.bigquery.v2\x1a\x34google/cloud/bigquery_v2/proto/model_reference.proto\x1a\x31google/cloud/bigquery_v2/proto/standard_sql.proto\x1a\x1bgoogle/protobuf/empty.proto\x1a\x1fgoogle/protobuf/timestamp.proto\x1a\x1egoogle/protobuf/wrappers.proto\x1a\x1cgoogle/api/annotations.proto\x1a\x17google/api/client.proto"\x8a\x34\n\x05Model\x12\x0c\n\x04\x65tag\x18\x01 \x01(\t\x12\x41\n\x0fmodel_reference\x18\x02 \x01(\x0b\x32(.google.cloud.bigquery.v2.ModelReference\x12\x15\n\rcreation_time\x18\x05 \x01(\x03\x12\x1a\n\x12last_modified_time\x18\x06 \x01(\x03\x12\x13\n\x0b\x64\x65scription\x18\x0c \x01(\t\x12\x15\n\rfriendly_name\x18\x0e \x01(\t\x12;\n\x06labels\x18\x0f \x03(\x0b\x32+.google.cloud.bigquery.v2.Model.LabelsEntry\x12\x17\n\x0f\x65xpiration_time\x18\x10 \x01(\x03\x12\x10\n\x08location\x18\r \x01(\t\x12=\n\nmodel_type\x18\x07 \x01(\x0e\x32).google.cloud.bigquery.v2.Model.ModelType\x12\x42\n\rtraining_runs\x18\t \x03(\x0b\x32+.google.cloud.bigquery.v2.Model.TrainingRun\x12\x43\n\x0f\x66\x65\x61ture_columns\x18\n \x03(\x0b\x32*.google.cloud.bigquery.v2.StandardSqlField\x12\x41\n\rlabel_columns\x18\x0b \x03(\x0b\x32*.google.cloud.bigquery.v2.StandardSqlField\x1aq\n\x0bKmeansEnums"b\n\x1aKmeansInitializationMethod\x12,\n(KMEANS_INITIALIZATION_METHOD_UNSPECIFIED\x10\x00\x12\n\n\x06RANDOM\x10\x01\x12\n\n\x06\x43USTOM\x10\x02\x1a\xb4\x02\n\x11RegressionMetrics\x12\x39\n\x13mean_absolute_error\x18\x01 \x01(\x0b\x32\x1c.google.protobuf.DoubleValue\x12\x38\n\x12mean_squared_error\x18\x02 \x01(\x0b\x32\x1c.google.protobuf.DoubleValue\x12<\n\x16mean_squared_log_error\x18\x03 \x01(\x0b\x32\x1c.google.protobuf.DoubleValue\x12;\n\x15median_absolute_error\x18\x04 \x01(\x0b\x32\x1c.google.protobuf.DoubleValue\x12/\n\tr_squared\x18\x05 \x01(\x0b\x32\x1c.google.protobuf.DoubleValue\x1a\xef\x02\n\x1e\x41ggregateClassificationMetrics\x12/\n\tprecision\x18\x01 \x01(\x0b\x32\x1c.google.protobuf.DoubleValue\x12,\n\x06recall\x18\x02 \x01(\x0b\x32\x1c.google.protobuf.DoubleValue\x12.\n\x08\x61\x63\x63uracy\x18\x03 \x01(\x0b\x32\x1c.google.protobuf.DoubleValue\x12/\n\tthreshold\x18\x04 \x01(\x0b\x32\x1c.google.protobuf.DoubleValue\x12.\n\x08\x66\x31_score\x18\x05 \x01(\x0b\x32\x1c.google.protobuf.DoubleValue\x12.\n\x08log_loss\x18\x06 \x01(\x0b\x32\x1c.google.protobuf.DoubleValue\x12-\n\x07roc_auc\x18\x07 \x01(\x0b\x32\x1c.google.protobuf.DoubleValue\x1a\x9f\x06\n\x1b\x42inaryClassificationMetrics\x12h\n aggregate_classification_metrics\x18\x01 \x01(\x0b\x32>.google.cloud.bigquery.v2.Model.AggregateClassificationMetrics\x12w\n\x1c\x62inary_confusion_matrix_list\x18\x02 \x03(\x0b\x32Q.google.cloud.bigquery.v2.Model.BinaryClassificationMetrics.BinaryConfusionMatrix\x12\x16\n\x0epositive_label\x18\x03 \x01(\t\x12\x16\n\x0enegative_label\x18\x04 \x01(\t\x1a\xec\x03\n\x15\x42inaryConfusionMatrix\x12>\n\x18positive_class_threshold\x18\x01 \x01(\x0b\x32\x1c.google.protobuf.DoubleValue\x12\x33\n\x0etrue_positives\x18\x02 \x01(\x0b\x32\x1b.google.protobuf.Int64Value\x12\x34\n\x0f\x66\x61lse_positives\x18\x03 \x01(\x0b\x32\x1b.google.protobuf.Int64Value\x12\x33\n\x0etrue_negatives\x18\x04 \x01(\x0b\x32\x1b.google.protobuf.Int64Value\x12\x34\n\x0f\x66\x61lse_negatives\x18\x05 \x01(\x0b\x32\x1b.google.protobuf.Int64Value\x12/\n\tprecision\x18\x06 \x01(\x0b\x32\x1c.google.protobuf.DoubleValue\x12,\n\x06recall\x18\x07 \x01(\x0b\x32\x1c.google.protobuf.DoubleValue\x12.\n\x08\x66\x31_score\x18\x08 \x01(\x0b\x32\x1c.google.protobuf.DoubleValue\x12.\n\x08\x61\x63\x63uracy\x18\t \x01(\x0b\x32\x1c.google.protobuf.DoubleValue\x1a\x87\x05\n\x1fMultiClassClassificationMetrics\x12h\n aggregate_classification_metrics\x18\x01 \x01(\x0b\x32>.google.cloud.bigquery.v2.Model.AggregateClassificationMetrics\x12n\n\x15\x63onfusion_matrix_list\x18\x02 \x03(\x0b\x32O.google.cloud.bigquery.v2.Model.MultiClassClassificationMetrics.ConfusionMatrix\x1a\x89\x03\n\x0f\x43onfusionMatrix\x12:\n\x14\x63onfidence_threshold\x18\x01 \x01(\x0b\x32\x1c.google.protobuf.DoubleValue\x12\x61\n\x04rows\x18\x02 \x03(\x0b\x32S.google.cloud.bigquery.v2.Model.MultiClassClassificationMetrics.ConfusionMatrix.Row\x1aQ\n\x05\x45ntry\x12\x17\n\x0fpredicted_label\x18\x01 \x01(\t\x12/\n\nitem_count\x18\x02 \x01(\x0b\x32\x1b.google.protobuf.Int64Value\x1a\x83\x01\n\x03Row\x12\x14\n\x0c\x61\x63tual_label\x18\x01 \x01(\t\x12\x66\n\x07\x65ntries\x18\x02 \x03(\x0b\x32U.google.cloud.bigquery.v2.Model.MultiClassClassificationMetrics.ConfusionMatrix.Entry\x1a\xcb\x06\n\x11\x43lusteringMetrics\x12:\n\x14\x64\x61vies_bouldin_index\x18\x01 \x01(\x0b\x32\x1c.google.protobuf.DoubleValue\x12;\n\x15mean_squared_distance\x18\x02 \x01(\x0b\x32\x1c.google.protobuf.DoubleValue\x12K\n\x08\x63lusters\x18\x03 \x03(\x0b\x32\x39.google.cloud.bigquery.v2.Model.ClusteringMetrics.Cluster\x1a\xef\x04\n\x07\x43luster\x12\x13\n\x0b\x63\x65ntroid_id\x18\x01 \x01(\x03\x12^\n\x0e\x66\x65\x61ture_values\x18\x02 \x03(\x0b\x32\x46.google.cloud.bigquery.v2.Model.ClusteringMetrics.Cluster.FeatureValue\x12*\n\x05\x63ount\x18\x03 \x01(\x0b\x32\x1b.google.protobuf.Int64Value\x1a\xc2\x03\n\x0c\x46\x65\x61tureValue\x12\x16\n\x0e\x66\x65\x61ture_column\x18\x01 \x01(\t\x12\x37\n\x0fnumerical_value\x18\x02 \x01(\x0b\x32\x1c.google.protobuf.DoubleValueH\x00\x12t\n\x11\x63\x61tegorical_value\x18\x03 \x01(\x0b\x32W.google.cloud.bigquery.v2.Model.ClusteringMetrics.Cluster.FeatureValue.CategoricalValueH\x00\x1a\xe1\x01\n\x10\x43\x61tegoricalValue\x12~\n\x0f\x63\x61tegory_counts\x18\x01 \x03(\x0b\x32\x65.google.cloud.bigquery.v2.Model.ClusteringMetrics.Cluster.FeatureValue.CategoricalValue.CategoryCount\x1aM\n\rCategoryCount\x12\x10\n\x08\x63\x61tegory\x18\x01 \x01(\t\x12*\n\x05\x63ount\x18\x02 \x01(\x0b\x32\x1b.google.protobuf.Int64ValueB\x07\n\x05value\x1a\x95\x03\n\x11\x45valuationMetrics\x12O\n\x12regression_metrics\x18\x01 \x01(\x0b\x32\x31.google.cloud.bigquery.v2.Model.RegressionMetricsH\x00\x12\x64\n\x1d\x62inary_classification_metrics\x18\x02 \x01(\x0b\x32;.google.cloud.bigquery.v2.Model.BinaryClassificationMetricsH\x00\x12m\n"multi_class_classification_metrics\x18\x03 \x01(\x0b\x32?.google.cloud.bigquery.v2.Model.MultiClassClassificationMetricsH\x00\x12O\n\x12\x63lustering_metrics\x18\x04 \x01(\x0b\x32\x31.google.cloud.bigquery.v2.Model.ClusteringMetricsH\x00\x42\t\n\x07metrics\x1a\xab\x0f\n\x0bTrainingRun\x12U\n\x10training_options\x18\x01 \x01(\x0b\x32;.google.cloud.bigquery.v2.Model.TrainingRun.TrainingOptions\x12.\n\nstart_time\x18\x08 \x01(\x0b\x32\x1a.google.protobuf.Timestamp\x12L\n\x07results\x18\x06 \x03(\x0b\x32;.google.cloud.bigquery.v2.Model.TrainingRun.IterationResult\x12M\n\x12\x65valuation_metrics\x18\x07 \x01(\x0b\x32\x31.google.cloud.bigquery.v2.Model.EvaluationMetrics\x1a\x9d\t\n\x0fTrainingOptions\x12\x16\n\x0emax_iterations\x18\x01 \x01(\x03\x12;\n\tloss_type\x18\x02 \x01(\x0e\x32(.google.cloud.bigquery.v2.Model.LossType\x12\x12\n\nlearn_rate\x18\x03 \x01(\x01\x12\x37\n\x11l1_regularization\x18\x04 \x01(\x0b\x32\x1c.google.protobuf.DoubleValue\x12\x37\n\x11l2_regularization\x18\x05 \x01(\x0b\x32\x1c.google.protobuf.DoubleValue\x12;\n\x15min_relative_progress\x18\x06 \x01(\x0b\x32\x1c.google.protobuf.DoubleValue\x12.\n\nwarm_start\x18\x07 \x01(\x0b\x32\x1a.google.protobuf.BoolValue\x12.\n\nearly_stop\x18\x08 \x01(\x0b\x32\x1a.google.protobuf.BoolValue\x12\x1b\n\x13input_label_columns\x18\t \x03(\t\x12J\n\x11\x64\x61ta_split_method\x18\n \x01(\x0e\x32/.google.cloud.bigquery.v2.Model.DataSplitMethod\x12 \n\x18\x64\x61ta_split_eval_fraction\x18\x0b \x01(\x01\x12\x19\n\x11\x64\x61ta_split_column\x18\x0c \x01(\t\x12N\n\x13learn_rate_strategy\x18\r \x01(\x0e\x32\x31.google.cloud.bigquery.v2.Model.LearnRateStrategy\x12\x1a\n\x12initial_learn_rate\x18\x10 \x01(\x01\x12o\n\x13label_class_weights\x18\x11 \x03(\x0b\x32R.google.cloud.bigquery.v2.Model.TrainingRun.TrainingOptions.LabelClassWeightsEntry\x12\x43\n\rdistance_type\x18\x14 \x01(\x0e\x32,.google.cloud.bigquery.v2.Model.DistanceType\x12\x14\n\x0cnum_clusters\x18\x15 \x01(\x03\x12\x11\n\tmodel_uri\x18\x16 \x01(\t\x12S\n\x15optimization_strategy\x18\x17 \x01(\x0e\x32\x34.google.cloud.bigquery.v2.Model.OptimizationStrategy\x12l\n\x1ckmeans_initialization_method\x18! \x01(\x0e\x32\x46.google.cloud.bigquery.v2.Model.KmeansEnums.KmeansInitializationMethod\x12$\n\x1ckmeans_initialization_column\x18" \x01(\t\x1a\x38\n\x16LabelClassWeightsEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\x01:\x02\x38\x01\x1a\xd7\x03\n\x0fIterationResult\x12*\n\x05index\x18\x01 \x01(\x0b\x32\x1b.google.protobuf.Int32Value\x12\x30\n\x0b\x64uration_ms\x18\x04 \x01(\x0b\x32\x1b.google.protobuf.Int64Value\x12\x33\n\rtraining_loss\x18\x05 \x01(\x0b\x32\x1c.google.protobuf.DoubleValue\x12/\n\teval_loss\x18\x06 \x01(\x0b\x32\x1c.google.protobuf.DoubleValue\x12\x12\n\nlearn_rate\x18\x07 \x01(\x01\x12^\n\rcluster_infos\x18\x08 \x03(\x0b\x32G.google.cloud.bigquery.v2.Model.TrainingRun.IterationResult.ClusterInfo\x1a\x8b\x01\n\x0b\x43lusterInfo\x12\x13\n\x0b\x63\x65ntroid_id\x18\x01 \x01(\x03\x12\x34\n\x0e\x63luster_radius\x18\x02 \x01(\x0b\x32\x1c.google.protobuf.DoubleValue\x12\x31\n\x0c\x63luster_size\x18\x03 \x01(\x0b\x32\x1b.google.protobuf.Int64Value\x1a-\n\x0bLabelsEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\t:\x02\x38\x01"s\n\tModelType\x12\x1a\n\x16MODEL_TYPE_UNSPECIFIED\x10\x00\x12\x15\n\x11LINEAR_REGRESSION\x10\x01\x12\x17\n\x13LOGISTIC_REGRESSION\x10\x02\x12\n\n\x06KMEANS\x10\x03\x12\x0e\n\nTENSORFLOW\x10\x06"O\n\x08LossType\x12\x19\n\x15LOSS_TYPE_UNSPECIFIED\x10\x00\x12\x15\n\x11MEAN_SQUARED_LOSS\x10\x01\x12\x11\n\rMEAN_LOG_LOSS\x10\x02"H\n\x0c\x44istanceType\x12\x1d\n\x19\x44ISTANCE_TYPE_UNSPECIFIED\x10\x00\x12\r\n\tEUCLIDEAN\x10\x01\x12\n\n\x06\x43OSINE\x10\x02"z\n\x0f\x44\x61taSplitMethod\x12!\n\x1d\x44\x41TA_SPLIT_METHOD_UNSPECIFIED\x10\x00\x12\n\n\x06RANDOM\x10\x01\x12\n\n\x06\x43USTOM\x10\x02\x12\x0e\n\nSEQUENTIAL\x10\x03\x12\x0c\n\x08NO_SPLIT\x10\x04\x12\x0e\n\nAUTO_SPLIT\x10\x05"W\n\x11LearnRateStrategy\x12#\n\x1fLEARN_RATE_STRATEGY_UNSPECIFIED\x10\x00\x12\x0f\n\x0bLINE_SEARCH\x10\x01\x12\x0c\n\x08\x43ONSTANT\x10\x02"n\n\x14OptimizationStrategy\x12%\n!OPTIMIZATION_STRATEGY_UNSPECIFIED\x10\x00\x12\x1a\n\x16\x42\x41TCH_GRADIENT_DESCENT\x10\x01\x12\x13\n\x0fNORMAL_EQUATION\x10\x02"K\n\x0fGetModelRequest\x12\x12\n\nproject_id\x18\x01 \x01(\t\x12\x12\n\ndataset_id\x18\x02 \x01(\t\x12\x10\n\x08model_id\x18\x03 \x01(\t"}\n\x11PatchModelRequest\x12\x12\n\nproject_id\x18\x01 \x01(\t\x12\x12\n\ndataset_id\x18\x02 \x01(\t\x12\x10\n\x08model_id\x18\x03 \x01(\t\x12.\n\x05model\x18\x04 \x01(\x0b\x32\x1f.google.cloud.bigquery.v2.Model"N\n\x12\x44\x65leteModelRequest\x12\x12\n\nproject_id\x18\x01 \x01(\t\x12\x12\n\ndataset_id\x18\x02 \x01(\t\x12\x10\n\x08model_id\x18\x03 \x01(\t"\x82\x01\n\x11ListModelsRequest\x12\x12\n\nproject_id\x18\x01 \x01(\t\x12\x12\n\ndataset_id\x18\x02 \x01(\t\x12\x31\n\x0bmax_results\x18\x03 \x01(\x0b\x32\x1c.google.protobuf.UInt32Value\x12\x12\n\npage_token\x18\x04 \x01(\t"^\n\x12ListModelsResponse\x12/\n\x06models\x18\x01 \x03(\x0b\x32\x1f.google.cloud.bigquery.v2.Model\x12\x17\n\x0fnext_page_token\x18\x02 \x01(\t2\xeb\x04\n\x0cModelService\x12X\n\x08GetModel\x12).google.cloud.bigquery.v2.GetModelRequest\x1a\x1f.google.cloud.bigquery.v2.Model"\x00\x12i\n\nListModels\x12+.google.cloud.bigquery.v2.ListModelsRequest\x1a,.google.cloud.bigquery.v2.ListModelsResponse"\x00\x12\\\n\nPatchModel\x12+.google.cloud.bigquery.v2.PatchModelRequest\x1a\x1f.google.cloud.bigquery.v2.Model"\x00\x12U\n\x0b\x44\x65leteModel\x12,.google.cloud.bigquery.v2.DeleteModelRequest\x1a\x16.google.protobuf.Empty"\x00\x1a\xe0\x01\xca\x41\x17\x62igquery.googleapis.com\xd2\x41\xc2\x01https://www.googleapis.com/auth/bigquery,https://www.googleapis.com/auth/bigquery.readonly,https://www.googleapis.com/auth/cloud-platform,https://www.googleapis.com/auth/cloud-platform.read-onlyBl\n\x1c\x63om.google.cloud.bigquery.v2B\nModelProtoZ@google.golang.org/genproto/googleapis/cloud/bigquery/v2;bigqueryb\x06proto3' ), dependencies=[ google_dot_cloud_dot_bigquery__v2_dot_proto_dot_model__reference__pb2.DESCRIPTOR, @@ -50,6 +50,33 @@ ) +_MODEL_KMEANSENUMS_KMEANSINITIALIZATIONMETHOD = _descriptor.EnumDescriptor( + name="KmeansInitializationMethod", + full_name="google.cloud.bigquery.v2.Model.KmeansEnums.KmeansInitializationMethod", + filename=None, + file=DESCRIPTOR, + values=[ + _descriptor.EnumValueDescriptor( + name="KMEANS_INITIALIZATION_METHOD_UNSPECIFIED", + index=0, + number=0, + serialized_options=None, + type=None, + ), + _descriptor.EnumValueDescriptor( + name="RANDOM", index=1, number=1, serialized_options=None, type=None + ), + _descriptor.EnumValueDescriptor( + name="CUSTOM", index=2, number=2, serialized_options=None, type=None + ), + ], + containing_type=None, + serialized_options=None, + serialized_start=898, + serialized_end=996, +) +_sym_db.RegisterEnumDescriptor(_MODEL_KMEANSENUMS_KMEANSINITIALIZATIONMETHOD) + _MODEL_MODELTYPE = _descriptor.EnumDescriptor( name="ModelType", full_name="google.cloud.bigquery.v2.Model.ModelType", @@ -86,8 +113,8 @@ ], containing_type=None, serialized_options=None, - serialized_start=5432, - serialized_end=5547, + serialized_start=6398, + serialized_end=6513, ) _sym_db.RegisterEnumDescriptor(_MODEL_MODELTYPE) @@ -117,8 +144,8 @@ ], containing_type=None, serialized_options=None, - serialized_start=5549, - serialized_end=5628, + serialized_start=6515, + serialized_end=6594, ) _sym_db.RegisterEnumDescriptor(_MODEL_LOSSTYPE) @@ -144,8 +171,8 @@ ], containing_type=None, serialized_options=None, - serialized_start=5630, - serialized_end=5702, + serialized_start=6596, + serialized_end=6668, ) _sym_db.RegisterEnumDescriptor(_MODEL_DISTANCETYPE) @@ -180,8 +207,8 @@ ], containing_type=None, serialized_options=None, - serialized_start=5704, - serialized_end=5826, + serialized_start=6670, + serialized_end=6792, ) _sym_db.RegisterEnumDescriptor(_MODEL_DATASPLITMETHOD) @@ -207,8 +234,8 @@ ], containing_type=None, serialized_options=None, - serialized_start=5828, - serialized_end=5915, + serialized_start=6794, + serialized_end=6881, ) _sym_db.RegisterEnumDescriptor(_MODEL_LEARNRATESTRATEGY) @@ -242,12 +269,31 @@ ], containing_type=None, serialized_options=None, - serialized_start=5917, - serialized_end=6027, + serialized_start=6883, + serialized_end=6993, ) _sym_db.RegisterEnumDescriptor(_MODEL_OPTIMIZATIONSTRATEGY) +_MODEL_KMEANSENUMS = _descriptor.Descriptor( + name="KmeansEnums", + full_name="google.cloud.bigquery.v2.Model.KmeansEnums", + filename=None, + file=DESCRIPTOR, + containing_type=None, + fields=[], + extensions=[], + nested_types=[], + enum_types=[_MODEL_KMEANSENUMS_KMEANSINITIALIZATIONMETHOD], + serialized_options=None, + is_extendable=False, + syntax="proto3", + extension_ranges=[], + oneofs=[], + serialized_start=883, + serialized_end=996, +) + _MODEL_REGRESSIONMETRICS = _descriptor.Descriptor( name="RegressionMetrics", full_name="google.cloud.bigquery.v2.Model.RegressionMetrics", @@ -354,8 +400,8 @@ syntax="proto3", extension_ranges=[], oneofs=[], - serialized_start=884, - serialized_end=1192, + serialized_start=999, + serialized_end=1307, ) _MODEL_AGGREGATECLASSIFICATIONMETRICS = _descriptor.Descriptor( @@ -500,8 +546,8 @@ syntax="proto3", extension_ranges=[], oneofs=[], - serialized_start=1195, - serialized_end=1562, + serialized_start=1310, + serialized_end=1677, ) _MODEL_BINARYCLASSIFICATIONMETRICS_BINARYCONFUSIONMATRIX = _descriptor.Descriptor( @@ -682,8 +728,8 @@ syntax="proto3", extension_ranges=[], oneofs=[], - serialized_start=1872, - serialized_end=2364, + serialized_start=1987, + serialized_end=2479, ) _MODEL_BINARYCLASSIFICATIONMETRICS = _descriptor.Descriptor( @@ -774,8 +820,8 @@ syntax="proto3", extension_ranges=[], oneofs=[], - serialized_start=1565, - serialized_end=2364, + serialized_start=1680, + serialized_end=2479, ) _MODEL_MULTICLASSCLASSIFICATIONMETRICS_CONFUSIONMATRIX_ENTRY = _descriptor.Descriptor( @@ -830,8 +876,8 @@ syntax="proto3", extension_ranges=[], oneofs=[], - serialized_start=2799, - serialized_end=2880, + serialized_start=2914, + serialized_end=2995, ) _MODEL_MULTICLASSCLASSIFICATIONMETRICS_CONFUSIONMATRIX_ROW = _descriptor.Descriptor( @@ -886,8 +932,8 @@ syntax="proto3", extension_ranges=[], oneofs=[], - serialized_start=2883, - serialized_end=3014, + serialized_start=2998, + serialized_end=3129, ) _MODEL_MULTICLASSCLASSIFICATIONMETRICS_CONFUSIONMATRIX = _descriptor.Descriptor( @@ -945,8 +991,8 @@ syntax="proto3", extension_ranges=[], oneofs=[], - serialized_start=2621, - serialized_end=3014, + serialized_start=2736, + serialized_end=3129, ) _MODEL_MULTICLASSCLASSIFICATIONMETRICS = _descriptor.Descriptor( @@ -1001,8 +1047,260 @@ syntax="proto3", extension_ranges=[], oneofs=[], - serialized_start=2367, - serialized_end=3014, + serialized_start=2482, + serialized_end=3129, +) + +_MODEL_CLUSTERINGMETRICS_CLUSTER_FEATUREVALUE_CATEGORICALVALUE_CATEGORYCOUNT = _descriptor.Descriptor( + name="CategoryCount", + full_name="google.cloud.bigquery.v2.Model.ClusteringMetrics.Cluster.FeatureValue.CategoricalValue.CategoryCount", + filename=None, + file=DESCRIPTOR, + containing_type=None, + fields=[ + _descriptor.FieldDescriptor( + name="category", + full_name="google.cloud.bigquery.v2.Model.ClusteringMetrics.Cluster.FeatureValue.CategoricalValue.CategoryCount.category", + index=0, + number=1, + type=9, + cpp_type=9, + label=1, + has_default_value=False, + default_value=_b("").decode("utf-8"), + message_type=None, + enum_type=None, + containing_type=None, + is_extension=False, + extension_scope=None, + serialized_options=None, + file=DESCRIPTOR, + ), + _descriptor.FieldDescriptor( + name="count", + full_name="google.cloud.bigquery.v2.Model.ClusteringMetrics.Cluster.FeatureValue.CategoricalValue.CategoryCount.count", + index=1, + number=2, + type=11, + cpp_type=10, + label=1, + has_default_value=False, + default_value=None, + message_type=None, + enum_type=None, + containing_type=None, + is_extension=False, + extension_scope=None, + serialized_options=None, + file=DESCRIPTOR, + ), + ], + extensions=[], + nested_types=[], + enum_types=[], + serialized_options=None, + is_extendable=False, + syntax="proto3", + extension_ranges=[], + oneofs=[], + serialized_start=3889, + serialized_end=3966, +) + +_MODEL_CLUSTERINGMETRICS_CLUSTER_FEATUREVALUE_CATEGORICALVALUE = _descriptor.Descriptor( + name="CategoricalValue", + full_name="google.cloud.bigquery.v2.Model.ClusteringMetrics.Cluster.FeatureValue.CategoricalValue", + filename=None, + file=DESCRIPTOR, + containing_type=None, + fields=[ + _descriptor.FieldDescriptor( + name="category_counts", + full_name="google.cloud.bigquery.v2.Model.ClusteringMetrics.Cluster.FeatureValue.CategoricalValue.category_counts", + index=0, + number=1, + type=11, + cpp_type=10, + label=3, + has_default_value=False, + default_value=[], + message_type=None, + enum_type=None, + containing_type=None, + is_extension=False, + extension_scope=None, + serialized_options=None, + file=DESCRIPTOR, + ) + ], + extensions=[], + nested_types=[ + _MODEL_CLUSTERINGMETRICS_CLUSTER_FEATUREVALUE_CATEGORICALVALUE_CATEGORYCOUNT + ], + enum_types=[], + serialized_options=None, + is_extendable=False, + syntax="proto3", + extension_ranges=[], + oneofs=[], + serialized_start=3741, + serialized_end=3966, +) + +_MODEL_CLUSTERINGMETRICS_CLUSTER_FEATUREVALUE = _descriptor.Descriptor( + name="FeatureValue", + full_name="google.cloud.bigquery.v2.Model.ClusteringMetrics.Cluster.FeatureValue", + filename=None, + file=DESCRIPTOR, + containing_type=None, + fields=[ + _descriptor.FieldDescriptor( + name="feature_column", + full_name="google.cloud.bigquery.v2.Model.ClusteringMetrics.Cluster.FeatureValue.feature_column", + index=0, + number=1, + type=9, + cpp_type=9, + label=1, + has_default_value=False, + default_value=_b("").decode("utf-8"), + message_type=None, + enum_type=None, + containing_type=None, + is_extension=False, + extension_scope=None, + serialized_options=None, + file=DESCRIPTOR, + ), + _descriptor.FieldDescriptor( + name="numerical_value", + full_name="google.cloud.bigquery.v2.Model.ClusteringMetrics.Cluster.FeatureValue.numerical_value", + index=1, + number=2, + type=11, + cpp_type=10, + label=1, + has_default_value=False, + default_value=None, + message_type=None, + enum_type=None, + containing_type=None, + is_extension=False, + extension_scope=None, + serialized_options=None, + file=DESCRIPTOR, + ), + _descriptor.FieldDescriptor( + name="categorical_value", + full_name="google.cloud.bigquery.v2.Model.ClusteringMetrics.Cluster.FeatureValue.categorical_value", + index=2, + number=3, + type=11, + cpp_type=10, + label=1, + has_default_value=False, + default_value=None, + message_type=None, + enum_type=None, + containing_type=None, + is_extension=False, + extension_scope=None, + serialized_options=None, + file=DESCRIPTOR, + ), + ], + extensions=[], + nested_types=[_MODEL_CLUSTERINGMETRICS_CLUSTER_FEATUREVALUE_CATEGORICALVALUE], + enum_types=[], + serialized_options=None, + is_extendable=False, + syntax="proto3", + extension_ranges=[], + oneofs=[ + _descriptor.OneofDescriptor( + name="value", + full_name="google.cloud.bigquery.v2.Model.ClusteringMetrics.Cluster.FeatureValue.value", + index=0, + containing_type=None, + fields=[], + ) + ], + serialized_start=3525, + serialized_end=3975, +) + +_MODEL_CLUSTERINGMETRICS_CLUSTER = _descriptor.Descriptor( + name="Cluster", + full_name="google.cloud.bigquery.v2.Model.ClusteringMetrics.Cluster", + filename=None, + file=DESCRIPTOR, + containing_type=None, + fields=[ + _descriptor.FieldDescriptor( + name="centroid_id", + full_name="google.cloud.bigquery.v2.Model.ClusteringMetrics.Cluster.centroid_id", + index=0, + number=1, + type=3, + cpp_type=2, + label=1, + has_default_value=False, + default_value=0, + message_type=None, + enum_type=None, + containing_type=None, + is_extension=False, + extension_scope=None, + serialized_options=None, + file=DESCRIPTOR, + ), + _descriptor.FieldDescriptor( + name="feature_values", + full_name="google.cloud.bigquery.v2.Model.ClusteringMetrics.Cluster.feature_values", + index=1, + number=2, + type=11, + cpp_type=10, + label=3, + has_default_value=False, + default_value=[], + message_type=None, + enum_type=None, + containing_type=None, + is_extension=False, + extension_scope=None, + serialized_options=None, + file=DESCRIPTOR, + ), + _descriptor.FieldDescriptor( + name="count", + full_name="google.cloud.bigquery.v2.Model.ClusteringMetrics.Cluster.count", + index=2, + number=3, + type=11, + cpp_type=10, + label=1, + has_default_value=False, + default_value=None, + message_type=None, + enum_type=None, + containing_type=None, + is_extension=False, + extension_scope=None, + serialized_options=None, + file=DESCRIPTOR, + ), + ], + extensions=[], + nested_types=[_MODEL_CLUSTERINGMETRICS_CLUSTER_FEATUREVALUE], + enum_types=[], + serialized_options=None, + is_extendable=False, + syntax="proto3", + extension_ranges=[], + oneofs=[], + serialized_start=3352, + serialized_end=3975, ) _MODEL_CLUSTERINGMETRICS = _descriptor.Descriptor( @@ -1048,17 +1346,35 @@ serialized_options=None, file=DESCRIPTOR, ), + _descriptor.FieldDescriptor( + name="clusters", + full_name="google.cloud.bigquery.v2.Model.ClusteringMetrics.clusters", + index=2, + number=3, + type=11, + cpp_type=10, + label=3, + has_default_value=False, + default_value=[], + message_type=None, + enum_type=None, + containing_type=None, + is_extension=False, + extension_scope=None, + serialized_options=None, + file=DESCRIPTOR, + ), ], extensions=[], - nested_types=[], + nested_types=[_MODEL_CLUSTERINGMETRICS_CLUSTER], enum_types=[], serialized_options=None, is_extendable=False, syntax="proto3", extension_ranges=[], oneofs=[], - serialized_start=3017, - serialized_end=3157, + serialized_start=3132, + serialized_end=3975, ) _MODEL_EVALUATIONMETRICS = _descriptor.Descriptor( @@ -1157,8 +1473,8 @@ fields=[], ) ], - serialized_start=3160, - serialized_end=3565, + serialized_start=3978, + serialized_end=4383, ) _MODEL_TRAININGRUN_TRAININGOPTIONS_LABELCLASSWEIGHTSENTRY = _descriptor.Descriptor( @@ -1213,8 +1529,8 @@ syntax="proto3", extension_ranges=[], oneofs=[], - serialized_start=4853, - serialized_end=4909, + serialized_start=5819, + serialized_end=5875, ) _MODEL_TRAININGRUN_TRAININGOPTIONS = _descriptor.Descriptor( @@ -1566,6 +1882,42 @@ serialized_options=None, file=DESCRIPTOR, ), + _descriptor.FieldDescriptor( + name="kmeans_initialization_method", + full_name="google.cloud.bigquery.v2.Model.TrainingRun.TrainingOptions.kmeans_initialization_method", + index=19, + number=33, + type=14, + cpp_type=8, + label=1, + has_default_value=False, + default_value=0, + message_type=None, + enum_type=None, + containing_type=None, + is_extension=False, + extension_scope=None, + serialized_options=None, + file=DESCRIPTOR, + ), + _descriptor.FieldDescriptor( + name="kmeans_initialization_column", + full_name="google.cloud.bigquery.v2.Model.TrainingRun.TrainingOptions.kmeans_initialization_column", + index=20, + number=34, + type=9, + cpp_type=9, + label=1, + has_default_value=False, + default_value=_b("").decode("utf-8"), + message_type=None, + enum_type=None, + containing_type=None, + is_extension=False, + extension_scope=None, + serialized_options=None, + file=DESCRIPTOR, + ), ], extensions=[], nested_types=[_MODEL_TRAININGRUN_TRAININGOPTIONS_LABELCLASSWEIGHTSENTRY], @@ -1575,8 +1927,8 @@ syntax="proto3", extension_ranges=[], oneofs=[], - serialized_start=3876, - serialized_end=4909, + serialized_start=4694, + serialized_end=5875, ) _MODEL_TRAININGRUN_ITERATIONRESULT_CLUSTERINFO = _descriptor.Descriptor( @@ -1649,8 +2001,8 @@ syntax="proto3", extension_ranges=[], oneofs=[], - serialized_start=5244, - serialized_end=5383, + serialized_start=6210, + serialized_end=6349, ) _MODEL_TRAININGRUN_ITERATIONRESULT = _descriptor.Descriptor( @@ -1777,8 +2129,8 @@ syntax="proto3", extension_ranges=[], oneofs=[], - serialized_start=4912, - serialized_end=5383, + serialized_start=5878, + serialized_end=6349, ) _MODEL_TRAININGRUN = _descriptor.Descriptor( @@ -1872,8 +2224,8 @@ syntax="proto3", extension_ranges=[], oneofs=[], - serialized_start=3568, - serialized_end=5383, + serialized_start=4386, + serialized_end=6349, ) _MODEL_LABELSENTRY = _descriptor.Descriptor( @@ -1928,8 +2280,8 @@ syntax="proto3", extension_ranges=[], oneofs=[], - serialized_start=5385, - serialized_end=5430, + serialized_start=6351, + serialized_end=6396, ) _MODEL = _descriptor.Descriptor( @@ -2176,6 +2528,7 @@ ], extensions=[], nested_types=[ + _MODEL_KMEANSENUMS, _MODEL_REGRESSIONMETRICS, _MODEL_AGGREGATECLASSIFICATIONMETRICS, _MODEL_BINARYCLASSIFICATIONMETRICS, @@ -2199,7 +2552,7 @@ extension_ranges=[], oneofs=[], serialized_start=327, - serialized_end=6027, + serialized_end=6993, ) @@ -2273,8 +2626,8 @@ syntax="proto3", extension_ranges=[], oneofs=[], - serialized_start=6029, - serialized_end=6104, + serialized_start=6995, + serialized_end=7070, ) @@ -2366,8 +2719,8 @@ syntax="proto3", extension_ranges=[], oneofs=[], - serialized_start=6106, - serialized_end=6231, + serialized_start=7072, + serialized_end=7197, ) @@ -2441,8 +2794,8 @@ syntax="proto3", extension_ranges=[], oneofs=[], - serialized_start=6233, - serialized_end=6311, + serialized_start=7199, + serialized_end=7277, ) @@ -2534,8 +2887,8 @@ syntax="proto3", extension_ranges=[], oneofs=[], - serialized_start=6314, - serialized_end=6444, + serialized_start=7280, + serialized_end=7410, ) @@ -2591,10 +2944,12 @@ syntax="proto3", extension_ranges=[], oneofs=[], - serialized_start=6446, - serialized_end=6540, + serialized_start=7412, + serialized_end=7506, ) +_MODEL_KMEANSENUMS.containing_type = _MODEL +_MODEL_KMEANSENUMS_KMEANSINITIALIZATIONMETHOD.containing_type = _MODEL_KMEANSENUMS _MODEL_REGRESSIONMETRICS.fields_by_name[ "mean_absolute_error" ].message_type = google_dot_protobuf_dot_wrappers__pb2._DOUBLEVALUE @@ -2698,12 +3053,61 @@ "confusion_matrix_list" ].message_type = _MODEL_MULTICLASSCLASSIFICATIONMETRICS_CONFUSIONMATRIX _MODEL_MULTICLASSCLASSIFICATIONMETRICS.containing_type = _MODEL +_MODEL_CLUSTERINGMETRICS_CLUSTER_FEATUREVALUE_CATEGORICALVALUE_CATEGORYCOUNT.fields_by_name[ + "count" +].message_type = google_dot_protobuf_dot_wrappers__pb2._INT64VALUE +_MODEL_CLUSTERINGMETRICS_CLUSTER_FEATUREVALUE_CATEGORICALVALUE_CATEGORYCOUNT.containing_type = ( + _MODEL_CLUSTERINGMETRICS_CLUSTER_FEATUREVALUE_CATEGORICALVALUE +) +_MODEL_CLUSTERINGMETRICS_CLUSTER_FEATUREVALUE_CATEGORICALVALUE.fields_by_name[ + "category_counts" +].message_type = ( + _MODEL_CLUSTERINGMETRICS_CLUSTER_FEATUREVALUE_CATEGORICALVALUE_CATEGORYCOUNT +) +_MODEL_CLUSTERINGMETRICS_CLUSTER_FEATUREVALUE_CATEGORICALVALUE.containing_type = ( + _MODEL_CLUSTERINGMETRICS_CLUSTER_FEATUREVALUE +) +_MODEL_CLUSTERINGMETRICS_CLUSTER_FEATUREVALUE.fields_by_name[ + "numerical_value" +].message_type = google_dot_protobuf_dot_wrappers__pb2._DOUBLEVALUE +_MODEL_CLUSTERINGMETRICS_CLUSTER_FEATUREVALUE.fields_by_name[ + "categorical_value" +].message_type = _MODEL_CLUSTERINGMETRICS_CLUSTER_FEATUREVALUE_CATEGORICALVALUE +_MODEL_CLUSTERINGMETRICS_CLUSTER_FEATUREVALUE.containing_type = ( + _MODEL_CLUSTERINGMETRICS_CLUSTER +) +_MODEL_CLUSTERINGMETRICS_CLUSTER_FEATUREVALUE.oneofs_by_name["value"].fields.append( + _MODEL_CLUSTERINGMETRICS_CLUSTER_FEATUREVALUE.fields_by_name["numerical_value"] +) +_MODEL_CLUSTERINGMETRICS_CLUSTER_FEATUREVALUE.fields_by_name[ + "numerical_value" +].containing_oneof = _MODEL_CLUSTERINGMETRICS_CLUSTER_FEATUREVALUE.oneofs_by_name[ + "value" +] +_MODEL_CLUSTERINGMETRICS_CLUSTER_FEATUREVALUE.oneofs_by_name["value"].fields.append( + _MODEL_CLUSTERINGMETRICS_CLUSTER_FEATUREVALUE.fields_by_name["categorical_value"] +) +_MODEL_CLUSTERINGMETRICS_CLUSTER_FEATUREVALUE.fields_by_name[ + "categorical_value" +].containing_oneof = _MODEL_CLUSTERINGMETRICS_CLUSTER_FEATUREVALUE.oneofs_by_name[ + "value" +] +_MODEL_CLUSTERINGMETRICS_CLUSTER.fields_by_name[ + "feature_values" +].message_type = _MODEL_CLUSTERINGMETRICS_CLUSTER_FEATUREVALUE +_MODEL_CLUSTERINGMETRICS_CLUSTER.fields_by_name[ + "count" +].message_type = google_dot_protobuf_dot_wrappers__pb2._INT64VALUE +_MODEL_CLUSTERINGMETRICS_CLUSTER.containing_type = _MODEL_CLUSTERINGMETRICS _MODEL_CLUSTERINGMETRICS.fields_by_name[ "davies_bouldin_index" ].message_type = google_dot_protobuf_dot_wrappers__pb2._DOUBLEVALUE _MODEL_CLUSTERINGMETRICS.fields_by_name[ "mean_squared_distance" ].message_type = google_dot_protobuf_dot_wrappers__pb2._DOUBLEVALUE +_MODEL_CLUSTERINGMETRICS.fields_by_name[ + "clusters" +].message_type = _MODEL_CLUSTERINGMETRICS_CLUSTER _MODEL_CLUSTERINGMETRICS.containing_type = _MODEL _MODEL_EVALUATIONMETRICS.fields_by_name[ "regression_metrics" @@ -2778,6 +3182,9 @@ _MODEL_TRAININGRUN_TRAININGOPTIONS.fields_by_name[ "optimization_strategy" ].enum_type = _MODEL_OPTIMIZATIONSTRATEGY +_MODEL_TRAININGRUN_TRAININGOPTIONS.fields_by_name[ + "kmeans_initialization_method" +].enum_type = _MODEL_KMEANSENUMS_KMEANSINITIALIZATIONMETHOD _MODEL_TRAININGRUN_TRAININGOPTIONS.containing_type = _MODEL_TRAININGRUN _MODEL_TRAININGRUN_ITERATIONRESULT_CLUSTERINFO.fields_by_name[ "cluster_radius" @@ -2859,13 +3266,23 @@ "Model", (_message.Message,), dict( + KmeansEnums=_reflection.GeneratedProtocolMessageType( + "KmeansEnums", + (_message.Message,), + dict( + DESCRIPTOR=_MODEL_KMEANSENUMS, + __module__="google.cloud.bigquery_v2.proto.model_pb2" + # @@protoc_insertion_point(class_scope:google.cloud.bigquery.v2.Model.KmeansEnums) + ), + ), RegressionMetrics=_reflection.GeneratedProtocolMessageType( "RegressionMetrics", (_message.Message,), dict( DESCRIPTOR=_MODEL_REGRESSIONMETRICS, __module__="google.cloud.bigquery_v2.proto.model_pb2", - __doc__="""Evaluation metrics for regression models. + __doc__="""Evaluation metrics for regression and explicit feedback type matrix + factorization models. Attributes: @@ -3063,6 +3480,86 @@ "ClusteringMetrics", (_message.Message,), dict( + Cluster=_reflection.GeneratedProtocolMessageType( + "Cluster", + (_message.Message,), + dict( + FeatureValue=_reflection.GeneratedProtocolMessageType( + "FeatureValue", + (_message.Message,), + dict( + CategoricalValue=_reflection.GeneratedProtocolMessageType( + "CategoricalValue", + (_message.Message,), + dict( + CategoryCount=_reflection.GeneratedProtocolMessageType( + "CategoryCount", + (_message.Message,), + dict( + DESCRIPTOR=_MODEL_CLUSTERINGMETRICS_CLUSTER_FEATUREVALUE_CATEGORICALVALUE_CATEGORYCOUNT, + __module__="google.cloud.bigquery_v2.proto.model_pb2", + __doc__="""Represents the count of a single category within the cluster. + + + Attributes: + category: + The name of category. + count: + The count of training samples matching the category within the + cluster. + """, + # @@protoc_insertion_point(class_scope:google.cloud.bigquery.v2.Model.ClusteringMetrics.Cluster.FeatureValue.CategoricalValue.CategoryCount) + ), + ), + DESCRIPTOR=_MODEL_CLUSTERINGMETRICS_CLUSTER_FEATUREVALUE_CATEGORICALVALUE, + __module__="google.cloud.bigquery_v2.proto.model_pb2", + __doc__="""Representative value of a categorical feature. + + + Attributes: + category_counts: + Counts of all categories for the categorical feature. If there + are more than ten categories, we return top ten (by count) and + return one more CategoryCount with category ‘\ *OTHER*\ ’ and + count as aggregate counts of remaining categories. + """, + # @@protoc_insertion_point(class_scope:google.cloud.bigquery.v2.Model.ClusteringMetrics.Cluster.FeatureValue.CategoricalValue) + ), + ), + DESCRIPTOR=_MODEL_CLUSTERINGMETRICS_CLUSTER_FEATUREVALUE, + __module__="google.cloud.bigquery_v2.proto.model_pb2", + __doc__="""Representative value of a single feature within the cluster. + + + Attributes: + feature_column: + The feature column name. + numerical_value: + The numerical feature value. This is the centroid value for + this feature. + categorical_value: + The categorical feature value. + """, + # @@protoc_insertion_point(class_scope:google.cloud.bigquery.v2.Model.ClusteringMetrics.Cluster.FeatureValue) + ), + ), + DESCRIPTOR=_MODEL_CLUSTERINGMETRICS_CLUSTER, + __module__="google.cloud.bigquery_v2.proto.model_pb2", + __doc__="""Message containing the information about one cluster. + + + Attributes: + centroid_id: + Centroid id. + feature_values: + Values of highly variant features for this cluster. + count: + Count of training data rows that were assigned to this + cluster. + """, + # @@protoc_insertion_point(class_scope:google.cloud.bigquery.v2.Model.ClusteringMetrics.Cluster) + ), + ), DESCRIPTOR=_MODEL_CLUSTERINGMETRICS, __module__="google.cloud.bigquery_v2.proto.model_pb2", __doc__="""Evaluation metrics for clustering models. @@ -3074,6 +3571,8 @@ mean_squared_distance: Mean of squared distances between each sample to its cluster centroid. + clusters: + [Beta] Information for all clusters. """, # @@protoc_insertion_point(class_scope:google.cloud.bigquery.v2.Model.ClusteringMetrics) ), @@ -3091,13 +3590,14 @@ Attributes: regression_metrics: - Populated for regression models. + Populated for regression models and explicit feedback type + matrix factorization models. binary_classification_metrics: Populated for binary classification/classifier models. multi_class_classification_metrics: Populated for multi-class classification/classifier models. clustering_metrics: - [Beta] Populated for clustering models. + Populated for clustering models. """, # @@protoc_insertion_point(class_scope:google.cloud.bigquery.v2.Model.EvaluationMetrics) ), @@ -3177,14 +3677,20 @@ Weights associated with each label class, for rebalancing the training data. Only applicable for classification models. distance_type: - [Beta] Distance type for clustering models. + Distance type for clustering models. num_clusters: - [Beta] Number of clusters for clustering models. + Number of clusters for clustering models. model_uri: [Beta] Google Cloud Storage URI from which the model was imported. Only applicable for imported models. optimization_strategy: Optimization strategy for training linear regression models. + kmeans_initialization_method: + The method used to initialize the centroids for kmeans + algorithm. + kmeans_initialization_column: + The column used to provide the initial centroids for kmeans + algorithm when kmeans\_initialization\_method is CUSTOM. """, # @@protoc_insertion_point(class_scope:google.cloud.bigquery.v2.Model.TrainingRun.TrainingOptions) ), @@ -3232,7 +3738,7 @@ learn_rate: Learn rate used for this iteration. cluster_infos: - [Beta] Information about top clusters for clustering models. + Information about top clusters for clustering models. """, # @@protoc_insertion_point(class_scope:google.cloud.bigquery.v2.Model.TrainingRun.IterationResult) ), @@ -3321,6 +3827,7 @@ ), ) _sym_db.RegisterMessage(Model) +_sym_db.RegisterMessage(Model.KmeansEnums) _sym_db.RegisterMessage(Model.RegressionMetrics) _sym_db.RegisterMessage(Model.AggregateClassificationMetrics) _sym_db.RegisterMessage(Model.BinaryClassificationMetrics) @@ -3330,6 +3837,12 @@ _sym_db.RegisterMessage(Model.MultiClassClassificationMetrics.ConfusionMatrix.Entry) _sym_db.RegisterMessage(Model.MultiClassClassificationMetrics.ConfusionMatrix.Row) _sym_db.RegisterMessage(Model.ClusteringMetrics) +_sym_db.RegisterMessage(Model.ClusteringMetrics.Cluster) +_sym_db.RegisterMessage(Model.ClusteringMetrics.Cluster.FeatureValue) +_sym_db.RegisterMessage(Model.ClusteringMetrics.Cluster.FeatureValue.CategoricalValue) +_sym_db.RegisterMessage( + Model.ClusteringMetrics.Cluster.FeatureValue.CategoricalValue.CategoryCount +) _sym_db.RegisterMessage(Model.EvaluationMetrics) _sym_db.RegisterMessage(Model.TrainingRun) _sym_db.RegisterMessage(Model.TrainingRun.TrainingOptions) @@ -3375,8 +3888,9 @@ model_id: Model ID of the model to patch. model: - Patched model. Follows patch semantics. Missing fields are not - updated. To clear a field, explicitly set to default value. + Patched model. Follows RFC5789 patch semantics. Missing fields + are not updated. To clear a field, explicitly set to default + value. """, # @@protoc_insertion_point(class_scope:google.cloud.bigquery.v2.PatchModelRequest) ), @@ -3418,7 +3932,9 @@ dataset_id: Dataset ID of the models to list. max_results: - The maximum number of results per page. + The maximum number of results to return in a single response + page. Leverage the page tokens to iterate through the entire + collection. page_token: Page token, returned by a previous call to request the next page of results @@ -3460,10 +3976,10 @@ file=DESCRIPTOR, index=0, serialized_options=_b( - "\312A\027bigquery.googleapis.com\322A\220\001https://www.googleapis.com/auth/bigquery,https://www.googleapis.com/auth/cloud-platform,https://www.googleapis.com/auth/cloud-platform.read-only" + "\312A\027bigquery.googleapis.com\322A\302\001https://www.googleapis.com/auth/bigquery,https://www.googleapis.com/auth/bigquery.readonly,https://www.googleapis.com/auth/cloud-platform,https://www.googleapis.com/auth/cloud-platform.read-only" ), - serialized_start=6543, - serialized_end=7112, + serialized_start=7509, + serialized_end=8128, methods=[ _descriptor.MethodDescriptor( name="GetModel", diff --git a/packages/google-cloud-bigquery/synth.metadata b/packages/google-cloud-bigquery/synth.metadata index 44837073d6d6..b5cce0083f8e 100644 --- a/packages/google-cloud-bigquery/synth.metadata +++ b/packages/google-cloud-bigquery/synth.metadata @@ -1,19 +1,19 @@ { - "updateTime": "2019-06-15T12:11:48.348952Z", + "updateTime": "2019-08-06T12:13:25.893023Z", "sources": [ { "generator": { "name": "artman", - "version": "0.26.0", - "dockerImage": "googleapis/artman@sha256:6db0735b0d3beec5b887153a2a7c7411fc7bb53f73f6f389a822096bd14a3a15" + "version": "0.32.1", + "dockerImage": "googleapis/artman@sha256:a684d40ba9a4e15946f5f2ca6b4bd9fe301192f522e9de4fff622118775f309b" } }, { "git": { "name": "googleapis", "remote": "https://github.com/googleapis/googleapis.git", - "sha": "7b58b37559f6a5337c4c564518e9573d742df225", - "internalRef": "253322136" + "sha": "e699b0cba64ffddfae39633417180f1f65875896", + "internalRef": "261759677" } } ], From edff674a7e92fb98a08bbc721c71f5ced071dfb7 Mon Sep 17 00:00:00 2001 From: Peter Lamut Date: Tue, 6 Aug 2019 19:57:15 +0200 Subject: [PATCH 0634/2016] BigQuery: Allow choice of compression when loading from dataframe (#8938) * Allow choice of compression when loading from DF * Mark parquet_compression parameter as [Beta] * Support compression arg in dataframe_to_parquet() --- .../google/cloud/bigquery/_pandas_helpers.py | 9 ++- .../google/cloud/bigquery/client.py | 25 +++++++- .../tests/unit/test__pandas_helpers.py | 22 +++++++ .../tests/unit/test_client.py | 60 +++++++++++++++++++ 4 files changed, 112 insertions(+), 4 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/_pandas_helpers.py b/packages/google-cloud-bigquery/google/cloud/bigquery/_pandas_helpers.py index d508929e5d6a..5ac0505e91ae 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/_pandas_helpers.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/_pandas_helpers.py @@ -208,7 +208,7 @@ def dataframe_to_arrow(dataframe, bq_schema): return pyarrow.Table.from_arrays(arrow_arrays, names=arrow_names) -def dataframe_to_parquet(dataframe, bq_schema, filepath): +def dataframe_to_parquet(dataframe, bq_schema, filepath, parquet_compression="SNAPPY"): """Write dataframe as a Parquet file, according to the desired BQ schema. This function requires the :mod:`pyarrow` package. Arrow is used as an @@ -222,12 +222,17 @@ def dataframe_to_parquet(dataframe, bq_schema, filepath): columns in the DataFrame. filepath (str): Path to write Parquet file to. + parquet_compression (str): + (optional) The compression codec to use by the the + ``pyarrow.parquet.write_table`` serializing method. Defaults to + "SNAPPY". + https://arrow.apache.org/docs/python/generated/pyarrow.parquet.write_table.html#pyarrow-parquet-write-table """ if pyarrow is None: raise ValueError("pyarrow is required for BigQuery schema conversion.") arrow_table = dataframe_to_arrow(dataframe, bq_schema) - pyarrow.parquet.write_table(arrow_table, filepath) + pyarrow.parquet.write_table(arrow_table, filepath, compression=parquet_compression) def _tabledata_list_page_to_arrow(page, column_names, arrow_types): diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py index b8ce2d5a33f3..04c596975eec 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py @@ -1449,6 +1449,7 @@ def load_table_from_dataframe( location=None, project=None, job_config=None, + parquet_compression="snappy", ): """Upload the contents of a table from a pandas DataFrame. @@ -1491,6 +1492,20 @@ def load_table_from_dataframe( column names matching those of the dataframe. The BigQuery schema is used to determine the correct data type conversion. Indexes are not loaded. Requires the :mod:`pyarrow` library. + parquet_compression (str): + [Beta] The compression method to use if intermittently + serializing ``dataframe`` to a parquet file. + + If ``pyarrow`` and job config schema are used, the argument + is directly passed as the ``compression`` argument to the + underlying ``pyarrow.parquet.write_table()`` method (the + default value "snappy" gets converted to uppercase). + https://arrow.apache.org/docs/python/generated/pyarrow.parquet.write_table.html#pyarrow-parquet-write-table + + If either ``pyarrow`` or job config schema are missing, the + argument is directly passed as the ``compression`` argument + to the underlying ``DataFrame.to_parquet()`` method. + https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.to_parquet.html#pandas.DataFrame.to_parquet Returns: google.cloud.bigquery.job.LoadJob: A new load job. @@ -1515,8 +1530,14 @@ def load_table_from_dataframe( try: if pyarrow and job_config.schema: + if parquet_compression == "snappy": # adjust the default value + parquet_compression = parquet_compression.upper() + _pandas_helpers.dataframe_to_parquet( - dataframe, job_config.schema, tmppath + dataframe, + job_config.schema, + tmppath, + parquet_compression=parquet_compression, ) else: if job_config.schema: @@ -1527,7 +1548,7 @@ def load_table_from_dataframe( PendingDeprecationWarning, stacklevel=2, ) - dataframe.to_parquet(tmppath) + dataframe.to_parquet(tmppath, compression=parquet_compression) with open(tmppath, "rb") as parquet_file: return self.load_table_from_file( diff --git a/packages/google-cloud-bigquery/tests/unit/test__pandas_helpers.py b/packages/google-cloud-bigquery/tests/unit/test__pandas_helpers.py index 9348635f2dc6..6aad587837b4 100644 --- a/packages/google-cloud-bigquery/tests/unit/test__pandas_helpers.py +++ b/packages/google-cloud-bigquery/tests/unit/test__pandas_helpers.py @@ -17,6 +17,8 @@ import functools import warnings +import mock + try: import pandas except ImportError: # pragma: NO COVER @@ -613,3 +615,23 @@ def test_dataframe_to_parquet_w_missing_columns(module_under_test, monkeypatch): pandas.DataFrame(), (schema.SchemaField("not_found", "STRING"),), None ) assert "columns in schema must match" in str(exc_context.value) + + +@pytest.mark.skipif(pandas is None, reason="Requires `pandas`") +@pytest.mark.skipif(pyarrow is None, reason="Requires `pyarrow`") +def test_dataframe_to_parquet_compression_method(module_under_test): + bq_schema = (schema.SchemaField("field00", "STRING"),) + dataframe = pandas.DataFrame({"field00": ["foo", "bar"]}) + + write_table_patch = mock.patch.object( + module_under_test.pyarrow.parquet, "write_table", autospec=True + ) + + with write_table_patch as fake_write_table: + module_under_test.dataframe_to_parquet( + dataframe, bq_schema, None, parquet_compression="ZSTD" + ) + + call_args = fake_write_table.call_args + assert call_args is not None + assert call_args.kwargs.get("compression") == "ZSTD" diff --git a/packages/google-cloud-bigquery/tests/unit/test_client.py b/packages/google-cloud-bigquery/tests/unit/test_client.py index 2be40a52e1fc..c4e9c5e830ac 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_client.py +++ b/packages/google-cloud-bigquery/tests/unit/test_client.py @@ -5375,6 +5375,66 @@ def test_load_table_from_dataframe_w_schema_wo_pyarrow(self): assert sent_config.source_format == job.SourceFormat.PARQUET assert tuple(sent_config.schema) == schema + @unittest.skipIf(pandas is None, "Requires `pandas`") + @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") + def test_load_table_from_dataframe_w_schema_arrow_custom_compression(self): + from google.cloud.bigquery import job + from google.cloud.bigquery.schema import SchemaField + + client = self._make_client() + records = [{"name": "Monty", "age": 100}, {"name": "Python", "age": 60}] + dataframe = pandas.DataFrame(records) + schema = (SchemaField("name", "STRING"), SchemaField("age", "INTEGER")) + job_config = job.LoadJobConfig(schema=schema) + + load_patch = mock.patch( + "google.cloud.bigquery.client.Client.load_table_from_file", autospec=True + ) + to_parquet_patch = mock.patch( + "google.cloud.bigquery.client._pandas_helpers.dataframe_to_parquet", + autospec=True, + ) + + with load_patch, to_parquet_patch as fake_to_parquet: + client.load_table_from_dataframe( + dataframe, + self.TABLE_REF, + job_config=job_config, + location=self.LOCATION, + parquet_compression="LZ4", + ) + + call_args = fake_to_parquet.call_args + assert call_args is not None + assert call_args.kwargs.get("parquet_compression") == "LZ4" + + @unittest.skipIf(pandas is None, "Requires `pandas`") + @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") + def test_load_table_from_dataframe_wo_pyarrow_custom_compression(self): + client = self._make_client() + records = [{"name": "Monty", "age": 100}, {"name": "Python", "age": 60}] + dataframe = pandas.DataFrame(records) + + load_patch = mock.patch( + "google.cloud.bigquery.client.Client.load_table_from_file", autospec=True + ) + pyarrow_patch = mock.patch("google.cloud.bigquery.client.pyarrow", None) + to_parquet_patch = mock.patch.object( + dataframe, "to_parquet", wraps=dataframe.to_parquet + ) + + with load_patch, pyarrow_patch, to_parquet_patch as to_parquet_spy: + client.load_table_from_dataframe( + dataframe, + self.TABLE_REF, + location=self.LOCATION, + parquet_compression="gzip", + ) + + call_args = to_parquet_spy.call_args + assert call_args is not None + assert call_args.kwargs.get("compression") == "gzip" + @unittest.skipIf(pandas is None, "Requires `pandas`") @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_load_table_from_dataframe_w_nulls(self): From 1da57cd5cb3f75f50d0c09468ed9c168eb209452 Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Thu, 8 Aug 2019 13:53:21 -0700 Subject: [PATCH 0635/2016] Release bigquery 1.18.0 (#8995) --- packages/google-cloud-bigquery/CHANGELOG.md | 32 +++++++++++++++++++++ packages/google-cloud-bigquery/setup.py | 2 +- 2 files changed, 33 insertions(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/CHANGELOG.md b/packages/google-cloud-bigquery/CHANGELOG.md index 0d6630ef3bba..6d1c71ccbf79 100644 --- a/packages/google-cloud-bigquery/CHANGELOG.md +++ b/packages/google-cloud-bigquery/CHANGELOG.md @@ -4,6 +4,38 @@ [1]: https://pypi.org/project/google-cloud-bigquery/#history +## 1.18.0 + +08-08-2019 12:28 PDT + +### New Features + +- Add `bqstorage_client` param to `QueryJob.to_arrow()` ([#8693](https://github.com/googleapis/google-cloud-python/pull/8693)) +- Include SQL query and job ID in exception messages. ([#8748](https://github.com/googleapis/google-cloud-python/pull/8748)) +- Allow using TableListItem to construct a Table object. ([#8738](https://github.com/googleapis/google-cloud-python/pull/8738)) +- Add StandardSqlDataTypes enum to BigQuery ([#8782](https://github.com/googleapis/google-cloud-python/pull/8782)) +- Add `to_standard_sql()` method to SchemaField ([#8880](https://github.com/googleapis/google-cloud-python/pull/8880)) +- Add debug logging statements to track when BQ Storage API is used. ([#8838](https://github.com/googleapis/google-cloud-python/pull/8838)) +- Hide error traceback in BigQuery cell magic ([#8808](https://github.com/googleapis/google-cloud-python/pull/8808)) +- Allow choice of compression when loading from dataframe ([#8938](https://github.com/googleapis/google-cloud-python/pull/8938)) +- Additional clustering metrics for BQML K-means models (via synth). ([#8945](https://github.com/googleapis/google-cloud-python/pull/8945)) + +### Documentation + +- Add compatibility check badges to READMEs. ([#8288](https://github.com/googleapis/google-cloud-python/pull/8288)) +- Link to googleapis.dev documentation in READMEs. ([#8705](https://github.com/googleapis/google-cloud-python/pull/8705)) +- Remove redundant service account key code sample. ([#8891](https://github.com/googleapis/google-cloud-python/pull/8891)) + +### Internal / Testing Changes + +- Fix several pytest "skip if" markers ([#8694](https://github.com/googleapis/google-cloud-python/pull/8694)) +- Update tests to support conversion of NaN as NULL in pyarrow `0.14.*`. ([#8785](https://github.com/googleapis/google-cloud-python/pull/8785)) +- Mock external calls in one of BigQuery unit tests ([#8727](https://github.com/googleapis/google-cloud-python/pull/8727)) +- Set IPython user agent when running queries with IPython cell magic ([#8713](https://github.com/googleapis/google-cloud-python/pull/8713)) +- Use configurable bucket name for GCS samples data in systems tests. ([#8783](https://github.com/googleapis/google-cloud-python/pull/8783)) +- Move `maybe_fail_import()` to top level test utils ([#8840](https://github.com/googleapis/google-cloud-python/pull/8840)) +- Set BQ Storage client user-agent when in Jupyter cell ([#8734](https://github.com/googleapis/google-cloud-python/pull/8734)) + ## 1.17.0 07-12-2019 07:56 PDT diff --git a/packages/google-cloud-bigquery/setup.py b/packages/google-cloud-bigquery/setup.py index a05c18053379..337a974ac485 100644 --- a/packages/google-cloud-bigquery/setup.py +++ b/packages/google-cloud-bigquery/setup.py @@ -22,7 +22,7 @@ name = "google-cloud-bigquery" description = "Google BigQuery API client library" -version = "1.17.0" +version = "1.18.0" # Should be one of: # 'Development Status :: 3 - Alpha' # 'Development Status :: 4 - Beta' From 0aa4328c986edb4f80ce669e5e12d20fbb3a29e3 Mon Sep 17 00:00:00 2001 From: Peter Lamut Date: Tue, 13 Aug 2019 21:12:23 +0200 Subject: [PATCH 0636/2016] Fix Pandas DataFrame load example under Python 2.7 (#9022) --- packages/google-cloud-bigquery/docs/snippets.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/packages/google-cloud-bigquery/docs/snippets.py b/packages/google-cloud-bigquery/docs/snippets.py index 249fbf7baafa..9b4218286402 100644 --- a/packages/google-cloud-bigquery/docs/snippets.py +++ b/packages/google-cloud-bigquery/docs/snippets.py @@ -2765,15 +2765,15 @@ def test_load_table_from_dataframe(client, to_delete, parquet_engine): dataset_ref = client.dataset(dataset_id) table_ref = dataset_ref.table("monty_python") records = [ - {"title": "The Meaning of Life", "release_year": 1983}, - {"title": "Monty Python and the Holy Grail", "release_year": 1975}, - {"title": "Life of Brian", "release_year": 1979}, - {"title": "And Now for Something Completely Different", "release_year": 1971}, + {"title": u"The Meaning of Life", "release_year": 1983}, + {"title": u"Monty Python and the Holy Grail", "release_year": 1975}, + {"title": u"Life of Brian", "release_year": 1979}, + {"title": u"And Now for Something Completely Different", "release_year": 1971}, ] # Optionally set explicit indices. # If indices are not specified, a column will be created for the default # indices created by pandas. - index = ["Q24980", "Q25043", "Q24953", "Q16403"] + index = [u"Q24980", u"Q25043", u"Q24953", u"Q16403"] dataframe = pandas.DataFrame(records, index=pandas.Index(index, name="wikidata_id")) job = client.load_table_from_dataframe(dataframe, table_ref, location="US") From 373feb590a235a89fcc1418f60409d18374a1149 Mon Sep 17 00:00:00 2001 From: Peter Lamut Date: Wed, 14 Aug 2019 17:29:06 +0200 Subject: [PATCH 0637/2016] Fix deserializing None QueryParameters (#9029) For None parameters, the back-end does not return the parameter value in response. This commit adjusts the ScalarQueryParameter's method from_api_repr(). --- .../google/cloud/bigquery/query.py | 11 +++++++++-- .../google-cloud-bigquery/tests/unit/test_query.py | 9 +++++++++ 2 files changed, 18 insertions(+), 2 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/query.py b/packages/google-cloud-bigquery/google/cloud/bigquery/query.py index 726b598d3aaf..4039be33db8c 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/query.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/query.py @@ -126,8 +126,15 @@ def from_api_repr(cls, resource): """ name = resource.get("name") type_ = resource["parameterType"]["type"] - value = resource["parameterValue"]["value"] - converted = _QUERY_PARAMS_FROM_JSON[type_](value, None) + + # parameterValue might not be present if JSON resource originates + # from the back-end - the latter omits it for None values. + value = resource.get("parameterValue", {}).get("value") + if value is not None: + converted = _QUERY_PARAMS_FROM_JSON[type_](value, None) + else: + converted = None + return cls(name, type_, converted) def to_api_repr(self): diff --git a/packages/google-cloud-bigquery/tests/unit/test_query.py b/packages/google-cloud-bigquery/tests/unit/test_query.py index 896ab78e3024..a7c639ed1e77 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_query.py +++ b/packages/google-cloud-bigquery/tests/unit/test_query.py @@ -121,6 +121,15 @@ def test_from_api_repr_wo_name(self): self.assertEqual(param.type_, "INT64") self.assertEqual(param.value, 123) + def test_from_api_repr_wo_value(self): + # Back-end may not send back values for None params. See #9027 + RESOURCE = {"name": "foo", "parameterType": {"type": "INT64"}} + klass = self._get_target_class() + param = klass.from_api_repr(RESOURCE) + self.assertEqual(param.name, "foo") + self.assertEqual(param.type_, "INT64") + self.assertIs(param.value, None) + def test_to_api_repr_w_name(self): EXPECTED = { "name": "foo", From a25c4b2febc476a4e1bdacd873e98f553d3d1c62 Mon Sep 17 00:00:00 2001 From: Peter Lamut Date: Wed, 14 Aug 2019 21:54:16 +0200 Subject: [PATCH 0638/2016] Fix schema recognition of struct field types (#9001) * Fix schema recognition of struct field types A struct field can be referred to as "RECORD" or "STRUCT", and this commit assures that the to_api_repr() logic is correct. * Mark STRUCT_TYPES as private in schema.py --- .../google/cloud/bigquery/_pandas_helpers.py | 5 +-- .../google/cloud/bigquery/schema.py | 4 +- .../tests/unit/test_schema.py | 39 ++++++++++--------- 3 files changed, 25 insertions(+), 23 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/_pandas_helpers.py b/packages/google-cloud-bigquery/google/cloud/bigquery/_pandas_helpers.py index 5ac0505e91ae..5cc69e434b04 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/_pandas_helpers.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/_pandas_helpers.py @@ -47,7 +47,6 @@ "please install google-cloud-bigquery-storage to use bqstorage features." ) -STRUCT_TYPES = ("RECORD", "STRUCT") _PROGRESS_INTERVAL = 0.2 # Maximum time between download status checks, in seconds. @@ -126,7 +125,7 @@ def bq_to_arrow_data_type(field): return pyarrow.list_(inner_type) return None - if field.field_type.upper() in STRUCT_TYPES: + if field.field_type.upper() in schema._STRUCT_TYPES: return bq_to_arrow_struct_data_type(field) data_type_constructor = BQ_TO_ARROW_SCALARS.get(field.field_type.upper()) @@ -168,7 +167,7 @@ def bq_to_arrow_array(series, bq_field): arrow_type = bq_to_arrow_data_type(bq_field) if bq_field.mode.upper() == "REPEATED": return pyarrow.ListArray.from_pandas(series, type=arrow_type) - if bq_field.field_type.upper() in STRUCT_TYPES: + if bq_field.field_type.upper() in schema._STRUCT_TYPES: return pyarrow.StructArray.from_pandas(series, type=arrow_type) return pyarrow.array(series, type=arrow_type) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/schema.py b/packages/google-cloud-bigquery/google/cloud/bigquery/schema.py index 61bc0bcedfd6..e0673d85baf6 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/schema.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/schema.py @@ -17,6 +17,8 @@ from google.cloud.bigquery_v2 import types +_STRUCT_TYPES = ("RECORD", "STRUCT") + # SQL types reference: # https://cloud.google.com/bigquery/data-types#legacy_sql_data_types # https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types @@ -150,7 +152,7 @@ def to_api_repr(self): # If this is a RECORD type, then sub-fields are also included, # add this to the serialized representation. - if self.field_type.upper() == "RECORD": + if self.field_type.upper() in _STRUCT_TYPES: answer["fields"] = [f.to_api_repr() for f in self.fields] # Done; return the serialized dictionary. diff --git a/packages/google-cloud-bigquery/tests/unit/test_schema.py b/packages/google-cloud-bigquery/tests/unit/test_schema.py index 682e45895852..862d8a823e62 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_schema.py +++ b/packages/google-cloud-bigquery/tests/unit/test_schema.py @@ -71,25 +71,26 @@ def test_to_api_repr(self): ) def test_to_api_repr_with_subfield(self): - subfield = self._make_one("bar", "INTEGER", "NULLABLE") - field = self._make_one("foo", "RECORD", "REQUIRED", fields=(subfield,)) - self.assertEqual( - field.to_api_repr(), - { - "fields": [ - { - "mode": "NULLABLE", - "name": "bar", - "type": "INTEGER", - "description": None, - } - ], - "mode": "REQUIRED", - "name": "foo", - "type": "RECORD", - "description": None, - }, - ) + for record_type in ("RECORD", "STRUCT"): + subfield = self._make_one("bar", "INTEGER", "NULLABLE") + field = self._make_one("foo", record_type, "REQUIRED", fields=(subfield,)) + self.assertEqual( + field.to_api_repr(), + { + "fields": [ + { + "mode": "NULLABLE", + "name": "bar", + "type": "INTEGER", + "description": None, + } + ], + "mode": "REQUIRED", + "name": "foo", + "type": record_type, + "description": None, + }, + ) def test_from_api_repr(self): field = self._get_target_class().from_api_repr( From 58fd86acebb3e3bb0a4879a22ff00671e48a9670 Mon Sep 17 00:00:00 2001 From: Bu Sun Kim <8822365+busunkim96@users.noreply.github.com> Date: Fri, 16 Aug 2019 13:25:32 -0700 Subject: [PATCH 0639/2016] Remove compatability badges from READMEs. (#9035) --- packages/google-cloud-bigquery/README.rst | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/packages/google-cloud-bigquery/README.rst b/packages/google-cloud-bigquery/README.rst index 67da531a6597..8f73576d6f7d 100644 --- a/packages/google-cloud-bigquery/README.rst +++ b/packages/google-cloud-bigquery/README.rst @@ -1,7 +1,7 @@ Python Client for Google BigQuery ================================= -|GA| |pypi| |versions| |compat_check_pypi| |compat_check_github| +|GA| |pypi| |versions| Querying massive datasets can be time consuming and expensive without the right hardware and infrastructure. Google `BigQuery`_ solves this problem by @@ -17,10 +17,6 @@ processing power of Google's infrastructure. :target: https://pypi.org/project/google-cloud-bigquery/ .. |versions| image:: https://img.shields.io/pypi/pyversions/google-cloud-bigquery.svg :target: https://pypi.org/project/google-cloud-bigquery/ -.. |compat_check_pypi| image:: https://python-compatibility-tools.appspot.com/one_badge_image?package=google-cloud-bigquery - :target: https://python-compatibility-tools.appspot.com/one_badge_target?package=google-cloud-bigquery -.. |compat_check_github| image:: https://python-compatibility-tools.appspot.com/one_badge_image?package=git%2Bgit%3A//github.com/googleapis/google-cloud-python.git%23subdirectory%3Dbigquery - :target: https://python-compatibility-tools.appspot.com/one_badge_target?package=git%2Bgit%3A//github.com/googleapis/google-cloud-python.git%23subdirectory%3Dbigquery .. _BigQuery: https://cloud.google.com/bigquery/what-is-bigquery .. _Client Library Documentation: https://googleapis.dev/python/bigquery/latest .. _Product Documentation: https://cloud.google.com/bigquery/docs/reference/v2/ From 86f189276e624ab0bc204ecf43136809b7bc2a44 Mon Sep 17 00:00:00 2001 From: Peter Lamut Date: Tue, 20 Aug 2019 07:53:04 +0200 Subject: [PATCH 0640/2016] BigQuery: Add enum with SQL type names allowed to be used in SchemaField (#9040) * Add enum with legacy SQL type names * Add standard types to enum, rename enum --- .../google/cloud/bigquery/enums.py | 23 +++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/enums.py b/packages/google-cloud-bigquery/google/cloud/bigquery/enums.py index 098a918e474f..29fe543f6505 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/enums.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/enums.py @@ -67,3 +67,26 @@ def _make_sql_scalars_enum(): StandardSqlDataTypes = _make_sql_scalars_enum() + + +# See also: https://cloud.google.com/bigquery/data-types#legacy_sql_data_types +# and https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types +class SqlTypeNames(str, enum.Enum): + """Enum of allowed SQL type names in schema.SchemaField.""" + + STRING = "STRING" + BYTES = "BYTES" + INTEGER = "INTEGER" + INT64 = "INTEGER" + FLOAT = "FLOAT" + FLOAT64 = "FLOAT" + NUMERIC = "NUMERIC" + BOOLEAN = "BOOLEAN" + BOOL = "BOOLEAN" + GEOGRAPHY = "GEOGRAPHY" # NOTE: not available in legacy types + RECORD = "RECORD" + STRUCT = "RECORD" + TIMESTAMP = "TIMESTAMP" + DATE = "DATE" + TIME = "TIME" + DATETIME = "DATETIME" From 925fe30d1d64f8f129dda66c3d40fb01608279ba Mon Sep 17 00:00:00 2001 From: Peter Lamut Date: Tue, 20 Aug 2019 07:53:39 +0200 Subject: [PATCH 0641/2016] Replace avro with arrow schemas in test_table.py (#9056) --- .../tests/unit/test_table.py | 80 ++++++++++++------- 1 file changed, 50 insertions(+), 30 deletions(-) diff --git a/packages/google-cloud-bigquery/tests/unit/test_table.py b/packages/google-cloud-bigquery/tests/unit/test_table.py index dacaa8074f6a..8ba7fee892e5 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_table.py +++ b/packages/google-cloud-bigquery/tests/unit/test_table.py @@ -13,7 +13,6 @@ # limitations under the License. import itertools -import json import logging import time import unittest @@ -2271,26 +2270,26 @@ def test_to_dataframe_w_bqstorage_logs_session(self): @unittest.skipIf( bigquery_storage_v1beta1 is None, "Requires `google-cloud-bigquery-storage`" ) + @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_to_dataframe_w_bqstorage_empty_streams(self): from google.cloud.bigquery import schema from google.cloud.bigquery import table as mut from google.cloud.bigquery_storage_v1beta1 import reader + arrow_fields = [ + pyarrow.field("colA", pyarrow.int64()), + # Not alphabetical to test column order. + pyarrow.field("colC", pyarrow.float64()), + pyarrow.field("colB", pyarrow.utf8()), + ] + arrow_schema = pyarrow.schema(arrow_fields) + bqstorage_client = mock.create_autospec( bigquery_storage_v1beta1.BigQueryStorageClient ) session = bigquery_storage_v1beta1.types.ReadSession( - streams=[{"name": "/projects/proj/dataset/dset/tables/tbl/streams/1234"}] - ) - session.avro_schema.schema = json.dumps( - { - "fields": [ - {"name": "colA"}, - # Not alphabetical to test column order. - {"name": "colC"}, - {"name": "colB"}, - ] - } + streams=[{"name": "/projects/proj/dataset/dset/tables/tbl/streams/1234"}], + arrow_schema={"serialized_schema": arrow_schema.serialize().to_pybytes()}, ) bqstorage_client.create_read_session.return_value = session @@ -2327,11 +2326,20 @@ def test_to_dataframe_w_bqstorage_empty_streams(self): @unittest.skipIf( bigquery_storage_v1beta1 is None, "Requires `google-cloud-bigquery-storage`" ) + @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_to_dataframe_w_bqstorage_nonempty(self): from google.cloud.bigquery import schema from google.cloud.bigquery import table as mut from google.cloud.bigquery_storage_v1beta1 import reader + arrow_fields = [ + pyarrow.field("colA", pyarrow.int64()), + # Not alphabetical to test column order. + pyarrow.field("colC", pyarrow.float64()), + pyarrow.field("colB", pyarrow.utf8()), + ] + arrow_schema = pyarrow.schema(arrow_fields) + bqstorage_client = mock.create_autospec( bigquery_storage_v1beta1.BigQueryStorageClient ) @@ -2340,16 +2348,9 @@ def test_to_dataframe_w_bqstorage_nonempty(self): {"name": "/projects/proj/dataset/dset/tables/tbl/streams/1234"}, {"name": "/projects/proj/dataset/dset/tables/tbl/streams/5678"}, ] - session = bigquery_storage_v1beta1.types.ReadSession(streams=streams) - session.avro_schema.schema = json.dumps( - { - "fields": [ - {"name": "colA"}, - # Not alphabetical to test column order. - {"name": "colC"}, - {"name": "colB"}, - ] - } + session = bigquery_storage_v1beta1.types.ReadSession( + streams=streams, + arrow_schema={"serialized_schema": arrow_schema.serialize().to_pybytes()}, ) bqstorage_client.create_read_session.return_value = session @@ -2400,17 +2401,23 @@ def test_to_dataframe_w_bqstorage_nonempty(self): @unittest.skipIf( bigquery_storage_v1beta1 is None, "Requires `google-cloud-bigquery-storage`" ) + @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_to_dataframe_w_bqstorage_multiple_streams_return_unique_index(self): from google.cloud.bigquery import schema from google.cloud.bigquery import table as mut from google.cloud.bigquery_storage_v1beta1 import reader + arrow_fields = [pyarrow.field("colA", pyarrow.int64())] + arrow_schema = pyarrow.schema(arrow_fields) + streams = [ {"name": "/projects/proj/dataset/dset/tables/tbl/streams/1234"}, {"name": "/projects/proj/dataset/dset/tables/tbl/streams/5678"}, ] - session = bigquery_storage_v1beta1.types.ReadSession(streams=streams) - session.avro_schema.schema = json.dumps({"fields": [{"name": "colA"}]}) + session = bigquery_storage_v1beta1.types.ReadSession( + streams=streams, + arrow_schema={"serialized_schema": arrow_schema.serialize().to_pybytes()}, + ) bqstorage_client = mock.create_autospec( bigquery_storage_v1beta1.BigQueryStorageClient @@ -2448,6 +2455,7 @@ def test_to_dataframe_w_bqstorage_multiple_streams_return_unique_index(self): bigquery_storage_v1beta1 is None, "Requires `google-cloud-bigquery-storage`" ) @unittest.skipIf(tqdm is None, "Requires `tqdm`") + @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") @mock.patch("tqdm.tqdm") def test_to_dataframe_w_bqstorage_updates_progress_bar(self, tqdm_mock): from google.cloud.bigquery import schema @@ -2457,6 +2465,9 @@ def test_to_dataframe_w_bqstorage_updates_progress_bar(self, tqdm_mock): # Speed up testing. mut._PROGRESS_INTERVAL = 0.01 + arrow_fields = [pyarrow.field("testcol", pyarrow.int64())] + arrow_schema = pyarrow.schema(arrow_fields) + bqstorage_client = mock.create_autospec( bigquery_storage_v1beta1.BigQueryStorageClient ) @@ -2466,8 +2477,10 @@ def test_to_dataframe_w_bqstorage_updates_progress_bar(self, tqdm_mock): {"name": "/projects/proj/dataset/dset/tables/tbl/streams/1234"}, {"name": "/projects/proj/dataset/dset/tables/tbl/streams/5678"}, ] - session = bigquery_storage_v1beta1.types.ReadSession(streams=streams) - session.avro_schema.schema = json.dumps({"fields": [{"name": "testcol"}]}) + session = bigquery_storage_v1beta1.types.ReadSession( + streams=streams, + arrow_schema={"serialized_schema": arrow_schema.serialize().to_pybytes()}, + ) bqstorage_client.create_read_session.return_value = session mock_rowstream = mock.create_autospec(reader.ReadRowsStream) @@ -2521,6 +2534,7 @@ def blocking_to_dataframe(*args, **kwargs): @unittest.skipIf( bigquery_storage_v1beta1 is None, "Requires `google-cloud-bigquery-storage`" ) + @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_to_dataframe_w_bqstorage_exits_on_keyboardinterrupt(self): from google.cloud.bigquery import schema from google.cloud.bigquery import table as mut @@ -2529,6 +2543,14 @@ def test_to_dataframe_w_bqstorage_exits_on_keyboardinterrupt(self): # Speed up testing. mut._PROGRESS_INTERVAL = 0.01 + arrow_fields = [ + pyarrow.field("colA", pyarrow.int64()), + # Not alphabetical to test column order. + pyarrow.field("colC", pyarrow.float64()), + pyarrow.field("colB", pyarrow.utf8()), + ] + arrow_schema = pyarrow.schema(arrow_fields) + bqstorage_client = mock.create_autospec( bigquery_storage_v1beta1.BigQueryStorageClient ) @@ -2539,10 +2561,8 @@ def test_to_dataframe_w_bqstorage_exits_on_keyboardinterrupt(self): # ends early. {"name": "/projects/proj/dataset/dset/tables/tbl/streams/1234"}, {"name": "/projects/proj/dataset/dset/tables/tbl/streams/5678"}, - ] - ) - session.avro_schema.schema = json.dumps( - {"fields": [{"name": "colA"}, {"name": "colB"}, {"name": "colC"}]} + ], + arrow_schema={"serialized_schema": arrow_schema.serialize().to_pybytes()}, ) bqstorage_client.create_read_session.return_value = session From eb5a65c2699765d1012a96e4e897e700f488c61f Mon Sep 17 00:00:00 2001 From: Peter Lamut Date: Wed, 21 Aug 2019 18:24:22 +0200 Subject: [PATCH 0642/2016] BigQuery: Raise helpful error when loading table from dataframe with STRUCT columns (#9053) * Issue warning if no schema when loading from DF * Raise error if serializing DF with struct fields * Rewrite test assertion to make coverage happy * Make the unsupported type message more general * Remove warning on missing schema The warning will be added once the support for partial schemas and automatic schema detection is implemented. --- .../google/cloud/bigquery/client.py | 11 ++++++ .../tests/unit/test_client.py | 34 +++++++++++++++++++ 2 files changed, 45 insertions(+) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py index 04c596975eec..ae9adb4da15f 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py @@ -60,6 +60,7 @@ from google.cloud.bigquery.retry import DEFAULT_RETRY from google.cloud.bigquery.routine import Routine from google.cloud.bigquery.routine import RoutineReference +from google.cloud.bigquery.schema import _STRUCT_TYPES from google.cloud.bigquery.schema import SchemaField from google.cloud.bigquery.table import _table_arg_to_table from google.cloud.bigquery.table import _table_arg_to_table_ref @@ -1529,6 +1530,15 @@ def load_table_from_dataframe( os.close(tmpfd) try: + if job_config.schema: + for field in job_config.schema: + if field.field_type in _STRUCT_TYPES: + raise ValueError( + "Uploading dataframes with struct (record) column types " + "is not supported. See: " + "https://github.com/googleapis/google-cloud-python/issues/8191" + ) + if pyarrow and job_config.schema: if parquet_compression == "snappy": # adjust the default value parquet_compression = parquet_compression.upper() @@ -1548,6 +1558,7 @@ def load_table_from_dataframe( PendingDeprecationWarning, stacklevel=2, ) + dataframe.to_parquet(tmppath, compression=parquet_compression) with open(tmppath, "rb") as parquet_file: diff --git a/packages/google-cloud-bigquery/tests/unit/test_client.py b/packages/google-cloud-bigquery/tests/unit/test_client.py index c4e9c5e830ac..d7ff3d2a90b3 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_client.py +++ b/packages/google-cloud-bigquery/tests/unit/test_client.py @@ -5328,6 +5328,40 @@ def test_load_table_from_dataframe_w_custom_job_config(self): assert sent_config is job_config assert sent_config.source_format == job.SourceFormat.PARQUET + @unittest.skipIf(pandas is None, "Requires `pandas`") + @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") + def test_load_table_from_dataframe_struct_fields_error(self): + from google.cloud.bigquery import job + from google.cloud.bigquery.schema import SchemaField + + client = self._make_client() + + records = [{"float_column": 3.14, "struct_column": [{"foo": 1}, {"bar": -1}]}] + dataframe = pandas.DataFrame(data=records) + + schema = [ + SchemaField("float_column", "FLOAT"), + SchemaField( + "agg_col", + "RECORD", + fields=[SchemaField("foo", "INTEGER"), SchemaField("bar", "INTEGER")], + ), + ] + job_config = job.LoadJobConfig(schema=schema) + + load_patch = mock.patch( + "google.cloud.bigquery.client.Client.load_table_from_file", autospec=True + ) + + with pytest.raises(ValueError) as exc_info, load_patch: + client.load_table_from_dataframe( + dataframe, self.TABLE_REF, job_config=job_config, location=self.LOCATION + ) + + err_msg = str(exc_info.value) + assert "struct" in err_msg + assert "not support" in err_msg + @unittest.skipIf(pandas is None, "Requires `pandas`") @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_load_table_from_dataframe_w_schema_wo_pyarrow(self): From 47a05244de660799e81b6178ffe44729d71f10c1 Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Wed, 21 Aug 2019 11:27:48 -0700 Subject: [PATCH 0643/2016] Determine the schema in `load_table_from_dataframe` based on dtypes. (#9049) * Determine the schema in `load_table_from_dataframe` based on dtypes. This PR updates `load_table_from_dataframe` to automatically determine the BigQuery schema based on the DataFrame's dtypes. If any field's type cannot be determined, fallback to the logic in the pandas `to_parquet` method. * Fix test coverage. * Reduce duplication by using OrderedDict * Add columns option to DataFrame constructor to ensure correct column order. --- .../google/cloud/bigquery/_pandas_helpers.py | 40 ++++++++++ .../google/cloud/bigquery/client.py | 15 ++++ .../google-cloud-bigquery/tests/system.py | 76 +++++++++++++++++++ .../tests/unit/test_client.py | 74 +++++++++++++++++- 4 files changed, 203 insertions(+), 2 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/_pandas_helpers.py b/packages/google-cloud-bigquery/google/cloud/bigquery/_pandas_helpers.py index 5cc69e434b04..db7f36f3d93e 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/_pandas_helpers.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/_pandas_helpers.py @@ -49,6 +49,21 @@ _PROGRESS_INTERVAL = 0.2 # Maximum time between download status checks, in seconds. +_PANDAS_DTYPE_TO_BQ = { + "bool": "BOOLEAN", + "datetime64[ns, UTC]": "TIMESTAMP", + "datetime64[ns]": "DATETIME", + "float32": "FLOAT", + "float64": "FLOAT", + "int8": "INTEGER", + "int16": "INTEGER", + "int32": "INTEGER", + "int64": "INTEGER", + "uint8": "INTEGER", + "uint16": "INTEGER", + "uint32": "INTEGER", +} + class _DownloadState(object): """Flag to indicate that a thread should exit early.""" @@ -172,6 +187,31 @@ def bq_to_arrow_array(series, bq_field): return pyarrow.array(series, type=arrow_type) +def dataframe_to_bq_schema(dataframe): + """Convert a pandas DataFrame schema to a BigQuery schema. + + TODO(GH#8140): Add bq_schema argument to allow overriding autodetected + schema for a subset of columns. + + Args: + dataframe (pandas.DataFrame): + DataFrame to convert to convert to Parquet file. + + Returns: + Optional[Sequence[google.cloud.bigquery.schema.SchemaField]]: + The automatically determined schema. Returns None if the type of + any column cannot be determined. + """ + bq_schema = [] + for column, dtype in zip(dataframe.columns, dataframe.dtypes): + bq_type = _PANDAS_DTYPE_TO_BQ.get(dtype.name) + if not bq_type: + return None + bq_field = schema.SchemaField(column, bq_type) + bq_schema.append(bq_field) + return tuple(bq_schema) + + def dataframe_to_arrow(dataframe, bq_schema): """Convert pandas dataframe to Arrow table, using BigQuery schema. diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py index ae9adb4da15f..1b13ee126a5d 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py @@ -21,6 +21,7 @@ except ImportError: # Python 2.7 import collections as collections_abc +import copy import functools import gzip import io @@ -1521,11 +1522,25 @@ def load_table_from_dataframe( if job_config is None: job_config = job.LoadJobConfig() + else: + # Make a copy so that the job config isn't modified in-place. + job_config_properties = copy.deepcopy(job_config._properties) + job_config = job.LoadJobConfig() + job_config._properties = job_config_properties job_config.source_format = job.SourceFormat.PARQUET if location is None: location = self.location + if not job_config.schema: + autodetected_schema = _pandas_helpers.dataframe_to_bq_schema(dataframe) + + # Only use an explicit schema if we were able to determine one + # matching the dataframe. If not, fallback to the pandas to_parquet + # method. + if autodetected_schema: + job_config.schema = autodetected_schema + tmpfd, tmppath = tempfile.mkstemp(suffix="_job_{}.parquet".format(job_id[:8])) os.close(tmpfd) diff --git a/packages/google-cloud-bigquery/tests/system.py b/packages/google-cloud-bigquery/tests/system.py index fd9efa7752cf..59a72297ed87 100644 --- a/packages/google-cloud-bigquery/tests/system.py +++ b/packages/google-cloud-bigquery/tests/system.py @@ -13,6 +13,7 @@ # limitations under the License. import base64 +import collections import concurrent.futures import csv import datetime @@ -634,6 +635,81 @@ def test_load_table_from_local_avro_file_then_dump_table(self): sorted(row_tuples, key=by_wavelength), sorted(ROWS, key=by_wavelength) ) + @unittest.skipIf(pandas is None, "Requires `pandas`") + @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") + def test_load_table_from_dataframe_w_automatic_schema(self): + """Test that a DataFrame with dtypes that map well to BigQuery types + can be uploaded without specifying a schema. + + https://github.com/googleapis/google-cloud-python/issues/9044 + """ + df_data = collections.OrderedDict( + [ + ("bool_col", pandas.Series([True, False, True], dtype="bool")), + ( + "ts_col", + pandas.Series( + [ + datetime.datetime(2010, 1, 2, 3, 44, 50), + datetime.datetime(2011, 2, 3, 14, 50, 59), + datetime.datetime(2012, 3, 14, 15, 16), + ], + dtype="datetime64[ns]", + ).dt.tz_localize(pytz.utc), + ), + ( + "dt_col", + pandas.Series( + [ + datetime.datetime(2010, 1, 2, 3, 44, 50), + datetime.datetime(2011, 2, 3, 14, 50, 59), + datetime.datetime(2012, 3, 14, 15, 16), + ], + dtype="datetime64[ns]", + ), + ), + ("float32_col", pandas.Series([1.0, 2.0, 3.0], dtype="float32")), + ("float64_col", pandas.Series([4.0, 5.0, 6.0], dtype="float64")), + ("int8_col", pandas.Series([-12, -11, -10], dtype="int8")), + ("int16_col", pandas.Series([-9, -8, -7], dtype="int16")), + ("int32_col", pandas.Series([-6, -5, -4], dtype="int32")), + ("int64_col", pandas.Series([-3, -2, -1], dtype="int64")), + ("uint8_col", pandas.Series([0, 1, 2], dtype="uint8")), + ("uint16_col", pandas.Series([3, 4, 5], dtype="uint16")), + ("uint32_col", pandas.Series([6, 7, 8], dtype="uint32")), + ] + ) + dataframe = pandas.DataFrame(df_data, columns=df_data.keys()) + + dataset_id = _make_dataset_id("bq_load_test") + self.temp_dataset(dataset_id) + table_id = "{}.{}.load_table_from_dataframe_w_automatic_schema".format( + Config.CLIENT.project, dataset_id + ) + + load_job = Config.CLIENT.load_table_from_dataframe(dataframe, table_id) + load_job.result() + + table = Config.CLIENT.get_table(table_id) + self.assertEqual( + tuple(table.schema), + ( + bigquery.SchemaField("bool_col", "BOOLEAN"), + bigquery.SchemaField("ts_col", "TIMESTAMP"), + bigquery.SchemaField("dt_col", "DATETIME"), + bigquery.SchemaField("float32_col", "FLOAT"), + bigquery.SchemaField("float64_col", "FLOAT"), + bigquery.SchemaField("int8_col", "INTEGER"), + bigquery.SchemaField("int16_col", "INTEGER"), + bigquery.SchemaField("int32_col", "INTEGER"), + bigquery.SchemaField("int64_col", "INTEGER"), + bigquery.SchemaField("uint8_col", "INTEGER"), + bigquery.SchemaField("uint16_col", "INTEGER"), + bigquery.SchemaField("uint32_col", "INTEGER"), + ), + ) + self.assertEqual(table.num_rows, 3) + @unittest.skipIf(pandas is None, "Requires `pandas`") @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_load_table_from_dataframe_w_nulls(self): diff --git a/packages/google-cloud-bigquery/tests/unit/test_client.py b/packages/google-cloud-bigquery/tests/unit/test_client.py index d7ff3d2a90b3..8a2a1228cd65 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_client.py +++ b/packages/google-cloud-bigquery/tests/unit/test_client.py @@ -13,6 +13,7 @@ # limitations under the License. import copy +import collections import datetime import decimal import email @@ -5325,9 +5326,78 @@ def test_load_table_from_dataframe_w_custom_job_config(self): ) sent_config = load_table_from_file.mock_calls[0][2]["job_config"] - assert sent_config is job_config assert sent_config.source_format == job.SourceFormat.PARQUET + @unittest.skipIf(pandas is None, "Requires `pandas`") + @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") + def test_load_table_from_dataframe_w_automatic_schema(self): + from google.cloud.bigquery.client import _DEFAULT_NUM_RETRIES + from google.cloud.bigquery import job + from google.cloud.bigquery.schema import SchemaField + + client = self._make_client() + df_data = collections.OrderedDict( + [ + ("int_col", [1, 2, 3]), + ("float_col", [1.0, 2.0, 3.0]), + ("bool_col", [True, False, True]), + ( + "dt_col", + pandas.Series( + [ + datetime.datetime(2010, 1, 2, 3, 44, 50), + datetime.datetime(2011, 2, 3, 14, 50, 59), + datetime.datetime(2012, 3, 14, 15, 16), + ], + dtype="datetime64[ns]", + ), + ), + ( + "ts_col", + pandas.Series( + [ + datetime.datetime(2010, 1, 2, 3, 44, 50), + datetime.datetime(2011, 2, 3, 14, 50, 59), + datetime.datetime(2012, 3, 14, 15, 16), + ], + dtype="datetime64[ns]", + ).dt.tz_localize(pytz.utc), + ), + ] + ) + dataframe = pandas.DataFrame(df_data, columns=df_data.keys()) + load_patch = mock.patch( + "google.cloud.bigquery.client.Client.load_table_from_file", autospec=True + ) + + with load_patch as load_table_from_file: + client.load_table_from_dataframe( + dataframe, self.TABLE_REF, location=self.LOCATION + ) + + load_table_from_file.assert_called_once_with( + client, + mock.ANY, + self.TABLE_REF, + num_retries=_DEFAULT_NUM_RETRIES, + rewind=True, + job_id=mock.ANY, + job_id_prefix=None, + location=self.LOCATION, + project=None, + job_config=mock.ANY, + ) + + sent_config = load_table_from_file.mock_calls[0][2]["job_config"] + assert sent_config.source_format == job.SourceFormat.PARQUET + assert tuple(sent_config.schema) == ( + SchemaField("int_col", "INTEGER"), + SchemaField("float_col", "FLOAT"), + SchemaField("bool_col", "BOOLEAN"), + SchemaField("dt_col", "DATETIME"), + SchemaField("ts_col", "TIMESTAMP"), + ) + @unittest.skipIf(pandas is None, "Requires `pandas`") @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_load_table_from_dataframe_struct_fields_error(self): @@ -5509,7 +5579,7 @@ def test_load_table_from_dataframe_w_nulls(self): ) sent_config = load_table_from_file.mock_calls[0][2]["job_config"] - assert sent_config is job_config + assert sent_config.schema == schema assert sent_config.source_format == job.SourceFormat.PARQUET # Low-level tests From 99f8ede5a38eaabd8eb32304964ef5b112d34a9a Mon Sep 17 00:00:00 2001 From: Leonid Emar-Kar <46078689+Emar-Kar@users.noreply.github.com> Date: Thu, 22 Aug 2019 17:41:57 +0300 Subject: [PATCH 0644/2016] BigQuery: Add support to Dataset for project_ids with org prefix. (#8877) --- .../google/cloud/bigquery/dataset.py | 25 +++++++++++++++---- .../tests/unit/test_dataset.py | 16 ++++++++++++ 2 files changed, 36 insertions(+), 5 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/dataset.py b/packages/google-cloud-bigquery/google/cloud/bigquery/dataset.py index 01260ccc6e68..494c219d4f67 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/dataset.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/dataset.py @@ -18,6 +18,7 @@ import six import copy +import re import google.cloud._helpers from google.cloud.bigquery import _helpers @@ -26,6 +27,14 @@ from google.cloud.bigquery.table import TableReference +_PROJECT_PREFIX_PATTERN = re.compile( + r""" + (?P\S+\:[^.]+)\.(?P[^.]+)$ +""", + re.VERBOSE, +) + + def _get_table_reference(self, table_id): """Constructs a TableReference. @@ -269,7 +278,7 @@ def from_string(cls, dataset_id, default_project=None): Args: dataset_id (str): A dataset ID in standard SQL format. If ``default_project`` - is not specified, this must included both the project ID and + is not specified, this must include both the project ID and the dataset ID, separated by ``.``. default_project (str): Optional. The project ID to use when ``dataset_id`` does not @@ -290,13 +299,19 @@ def from_string(cls, dataset_id, default_project=None): """ output_dataset_id = dataset_id output_project_id = default_project - parts = dataset_id.split(".") + with_prefix = _PROJECT_PREFIX_PATTERN.match(dataset_id) + if with_prefix is None: + parts = dataset_id.split(".") + else: + project_id = with_prefix.group("project_id") + dataset_id = with_prefix.group("dataset_id") + parts = [project_id, dataset_id] if len(parts) == 1 and not default_project: raise ValueError( "When default_project is not set, dataset_id must be a " - "fully-qualified dataset ID in standard SQL format. " - 'e.g. "project.dataset_id", got {}'.format(dataset_id) + "fully-qualified dataset ID in standard SQL format, " + 'e.g., "project.dataset_id" got {}'.format(dataset_id) ) elif len(parts) == 2: output_project_id, output_dataset_id = parts @@ -554,7 +569,7 @@ def from_string(cls, full_dataset_id): Args: full_dataset_id (str): A fully-qualified dataset ID in standard SQL format. Must - included both the project ID and the dataset ID, separated by + include both the project ID and the dataset ID, separated by ``.``. Returns: diff --git a/packages/google-cloud-bigquery/tests/unit/test_dataset.py b/packages/google-cloud-bigquery/tests/unit/test_dataset.py index b8805a9c7ce3..26b1729a240c 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_dataset.py +++ b/packages/google-cloud-bigquery/tests/unit/test_dataset.py @@ -186,11 +186,27 @@ def test_from_string(self): self.assertEqual(got.project, "string-project") self.assertEqual(got.dataset_id, "string_dataset") + def test_from_string_w_prefix(self): + cls = self._get_target_class() + got = cls.from_string("google.com:string-project.string_dataset") + self.assertEqual(got.project, "google.com:string-project") + self.assertEqual(got.dataset_id, "string_dataset") + def test_from_string_legacy_string(self): cls = self._get_target_class() with self.assertRaises(ValueError): cls.from_string("string-project:string_dataset") + def test_from_string_w_incorrect_prefix(self): + cls = self._get_target_class() + with self.assertRaises(ValueError): + cls.from_string("google.com.string-project.dataset_id") + + def test_from_string_w_prefix_and_too_many_parts(self): + cls = self._get_target_class() + with self.assertRaises(ValueError): + cls.from_string("google.com:string-project.dataset_id.table_id") + def test_from_string_not_fully_qualified(self): cls = self._get_target_class() with self.assertRaises(ValueError): From ffca03f5f9d41f4003d3d1fe81549b0e5475e36b Mon Sep 17 00:00:00 2001 From: Peter Lamut Date: Thu, 22 Aug 2019 17:01:28 +0200 Subject: [PATCH 0645/2016] Add support for unsetting LoadJobConfig schema (#9077) --- .../google/cloud/bigquery/job.py | 4 ++++ .../google-cloud-bigquery/tests/unit/test_job.py | 13 +++++++++++++ 2 files changed, 17 insertions(+) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/job.py b/packages/google-cloud-bigquery/google/cloud/bigquery/job.py index 084181cdf757..38b3b39c2c3e 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/job.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/job.py @@ -1160,6 +1160,10 @@ def schema(self): @schema.setter def schema(self, value): + if value is None: + self._del_sub_prop("schema") + return + if not all(hasattr(field, "to_api_repr") for field in value): raise ValueError("Schema items must be fields") _helpers._set_sub_prop( diff --git a/packages/google-cloud-bigquery/tests/unit/test_job.py b/packages/google-cloud-bigquery/tests/unit/test_job.py index 1e75373c84b6..8bd62d7e4f51 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_job.py +++ b/packages/google-cloud-bigquery/tests/unit/test_job.py @@ -1504,6 +1504,19 @@ def test_schema_setter(self): config._properties["load"]["schema"], {"fields": [full_name_repr, age_repr]} ) + def test_schema_setter_unsetting_schema(self): + from google.cloud.bigquery.schema import SchemaField + + config = self._get_target_class()() + config._properties["load"]["schema"] = [ + SchemaField("full_name", "STRING", mode="REQUIRED"), + SchemaField("age", "INTEGER", mode="REQUIRED"), + ] + + config.schema = None + self.assertNotIn("schema", config._properties["load"]) + config.schema = None # no error, idempotent operation + def test_schema_update_options_missing(self): config = self._get_target_class()() self.assertIsNone(config.schema_update_options) From 88356b9048f23409b8a69851d42757f819ae705c Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Thu, 22 Aug 2019 12:35:56 -0700 Subject: [PATCH 0646/2016] Allow subset of schema to be passed into `load_table_from_dataframe`. (#9064) * Allow subset of schema to be passed into `load_table_from_dataframe`. The types of any remaining columns will be auto-detected. * Warn when it's not possible to determine a column type. --- .../google/cloud/bigquery/_pandas_helpers.py | 41 +++- .../google/cloud/bigquery/client.py | 21 +- .../google-cloud-bigquery/tests/system.py | 119 ++++++----- .../tests/unit/test_client.py | 202 +++++++++++++++++- 4 files changed, 295 insertions(+), 88 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/_pandas_helpers.py b/packages/google-cloud-bigquery/google/cloud/bigquery/_pandas_helpers.py index db7f36f3d93e..57ced8fac0c1 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/_pandas_helpers.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/_pandas_helpers.py @@ -187,29 +187,50 @@ def bq_to_arrow_array(series, bq_field): return pyarrow.array(series, type=arrow_type) -def dataframe_to_bq_schema(dataframe): +def dataframe_to_bq_schema(dataframe, bq_schema): """Convert a pandas DataFrame schema to a BigQuery schema. - TODO(GH#8140): Add bq_schema argument to allow overriding autodetected - schema for a subset of columns. - Args: dataframe (pandas.DataFrame): - DataFrame to convert to convert to Parquet file. + DataFrame for which the client determines the BigQuery schema. + bq_schema (Sequence[google.cloud.bigquery.schema.SchemaField]): + A BigQuery schema. Use this argument to override the autodetected + type for some or all of the DataFrame columns. Returns: Optional[Sequence[google.cloud.bigquery.schema.SchemaField]]: The automatically determined schema. Returns None if the type of any column cannot be determined. """ - bq_schema = [] + if bq_schema: + for field in bq_schema: + if field.field_type in schema._STRUCT_TYPES: + raise ValueError( + "Uploading dataframes with struct (record) column types " + "is not supported. See: " + "https://github.com/googleapis/google-cloud-python/issues/8191" + ) + bq_schema_index = {field.name: field for field in bq_schema} + else: + bq_schema_index = {} + + bq_schema_out = [] for column, dtype in zip(dataframe.columns, dataframe.dtypes): + # Use provided type from schema, if present. + bq_field = bq_schema_index.get(column) + if bq_field: + bq_schema_out.append(bq_field) + continue + + # Otherwise, try to automatically determine the type based on the + # pandas dtype. bq_type = _PANDAS_DTYPE_TO_BQ.get(dtype.name) if not bq_type: + warnings.warn("Unable to determine type of column '{}'.".format(column)) return None bq_field = schema.SchemaField(column, bq_type) - bq_schema.append(bq_field) - return tuple(bq_schema) + bq_schema_out.append(bq_field) + return tuple(bq_schema_out) def dataframe_to_arrow(dataframe, bq_schema): @@ -217,7 +238,7 @@ def dataframe_to_arrow(dataframe, bq_schema): Args: dataframe (pandas.DataFrame): - DataFrame to convert to convert to Parquet file. + DataFrame to convert to Arrow table. bq_schema (Sequence[google.cloud.bigquery.schema.SchemaField]): Desired BigQuery schema. Number of columns must match number of columns in the DataFrame. @@ -255,7 +276,7 @@ def dataframe_to_parquet(dataframe, bq_schema, filepath, parquet_compression="SN Args: dataframe (pandas.DataFrame): - DataFrame to convert to convert to Parquet file. + DataFrame to convert to Parquet file. bq_schema (Sequence[google.cloud.bigquery.schema.SchemaField]): Desired BigQuery schema. Number of columns must match number of columns in the DataFrame. diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py index 1b13ee126a5d..da169cb55bf2 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py @@ -61,7 +61,6 @@ from google.cloud.bigquery.retry import DEFAULT_RETRY from google.cloud.bigquery.routine import Routine from google.cloud.bigquery.routine import RoutineReference -from google.cloud.bigquery.schema import _STRUCT_TYPES from google.cloud.bigquery.schema import SchemaField from google.cloud.bigquery.table import _table_arg_to_table from google.cloud.bigquery.table import _table_arg_to_table_ref @@ -1532,28 +1531,14 @@ def load_table_from_dataframe( if location is None: location = self.location - if not job_config.schema: - autodetected_schema = _pandas_helpers.dataframe_to_bq_schema(dataframe) - - # Only use an explicit schema if we were able to determine one - # matching the dataframe. If not, fallback to the pandas to_parquet - # method. - if autodetected_schema: - job_config.schema = autodetected_schema + job_config.schema = _pandas_helpers.dataframe_to_bq_schema( + dataframe, job_config.schema + ) tmpfd, tmppath = tempfile.mkstemp(suffix="_job_{}.parquet".format(job_id[:8])) os.close(tmpfd) try: - if job_config.schema: - for field in job_config.schema: - if field.field_type in _STRUCT_TYPES: - raise ValueError( - "Uploading dataframes with struct (record) column types " - "is not supported. See: " - "https://github.com/googleapis/google-cloud-python/issues/8191" - ) - if pyarrow and job_config.schema: if parquet_compression == "snappy": # adjust the default value parquet_compression = parquet_compression.upper() diff --git a/packages/google-cloud-bigquery/tests/system.py b/packages/google-cloud-bigquery/tests/system.py index 59a72297ed87..1422c3c7cb60 100644 --- a/packages/google-cloud-bigquery/tests/system.py +++ b/packages/google-cloud-bigquery/tests/system.py @@ -743,21 +743,22 @@ def test_load_table_from_dataframe_w_nulls(self): ) num_rows = 100 nulls = [None] * num_rows - dataframe = pandas.DataFrame( - { - "bool_col": nulls, - "bytes_col": nulls, - "date_col": nulls, - "dt_col": nulls, - "float_col": nulls, - "geo_col": nulls, - "int_col": nulls, - "num_col": nulls, - "str_col": nulls, - "time_col": nulls, - "ts_col": nulls, - } + df_data = collections.OrderedDict( + [ + ("bool_col", nulls), + ("bytes_col", nulls), + ("date_col", nulls), + ("dt_col", nulls), + ("float_col", nulls), + ("geo_col", nulls), + ("int_col", nulls), + ("num_col", nulls), + ("str_col", nulls), + ("time_col", nulls), + ("ts_col", nulls), + ] ) + dataframe = pandas.DataFrame(df_data, columns=df_data.keys()) dataset_id = _make_dataset_id("bq_load_test") self.temp_dataset(dataset_id) @@ -796,7 +797,7 @@ def test_load_table_from_dataframe_w_required(self): ) records = [{"name": "Chip", "age": 2}, {"name": "Dale", "age": 3}] - dataframe = pandas.DataFrame(records) + dataframe = pandas.DataFrame(records, columns=["name", "age"]) job_config = bigquery.LoadJobConfig(schema=table_schema) dataset_id = _make_dataset_id("bq_load_test") self.temp_dataset(dataset_id) @@ -847,44 +848,58 @@ def test_load_table_from_dataframe_w_explicit_schema(self): # https://jira.apache.org/jira/browse/ARROW-2587 # bigquery.SchemaField("struct_col", "RECORD", fields=scalars_schema), ) - dataframe = pandas.DataFrame( - { - "bool_col": [True, None, False], - "bytes_col": [b"abc", None, b"def"], - "date_col": [datetime.date(1, 1, 1), None, datetime.date(9999, 12, 31)], - "dt_col": [ - datetime.datetime(1, 1, 1, 0, 0, 0), - None, - datetime.datetime(9999, 12, 31, 23, 59, 59, 999999), - ], - "float_col": [float("-inf"), float("nan"), float("inf")], - "geo_col": [ - "POINT(30 10)", - None, - "POLYGON ((30 10, 40 40, 20 40, 10 20, 30 10))", - ], - "int_col": [-9223372036854775808, None, 9223372036854775807], - "num_col": [ - decimal.Decimal("-99999999999999999999999999999.999999999"), - None, - decimal.Decimal("99999999999999999999999999999.999999999"), - ], - "str_col": ["abc", None, "def"], - "time_col": [ - datetime.time(0, 0, 0), - None, - datetime.time(23, 59, 59, 999999), - ], - "ts_col": [ - datetime.datetime(1, 1, 1, 0, 0, 0, tzinfo=pytz.utc), - None, - datetime.datetime( - 9999, 12, 31, 23, 59, 59, 999999, tzinfo=pytz.utc - ), - ], - }, - dtype="object", + df_data = collections.OrderedDict( + [ + ("bool_col", [True, None, False]), + ("bytes_col", [b"abc", None, b"def"]), + ( + "date_col", + [datetime.date(1, 1, 1), None, datetime.date(9999, 12, 31)], + ), + ( + "dt_col", + [ + datetime.datetime(1, 1, 1, 0, 0, 0), + None, + datetime.datetime(9999, 12, 31, 23, 59, 59, 999999), + ], + ), + ("float_col", [float("-inf"), float("nan"), float("inf")]), + ( + "geo_col", + [ + "POINT(30 10)", + None, + "POLYGON ((30 10, 40 40, 20 40, 10 20, 30 10))", + ], + ), + ("int_col", [-9223372036854775808, None, 9223372036854775807]), + ( + "num_col", + [ + decimal.Decimal("-99999999999999999999999999999.999999999"), + None, + decimal.Decimal("99999999999999999999999999999.999999999"), + ], + ), + ("str_col", [u"abc", None, u"def"]), + ( + "time_col", + [datetime.time(0, 0, 0), None, datetime.time(23, 59, 59, 999999)], + ), + ( + "ts_col", + [ + datetime.datetime(1, 1, 1, 0, 0, 0, tzinfo=pytz.utc), + None, + datetime.datetime( + 9999, 12, 31, 23, 59, 59, 999999, tzinfo=pytz.utc + ), + ], + ), + ] ) + dataframe = pandas.DataFrame(df_data, dtype="object", columns=df_data.keys()) dataset_id = _make_dataset_id("bq_load_test") self.temp_dataset(dataset_id) diff --git a/packages/google-cloud-bigquery/tests/unit/test_client.py b/packages/google-cloud-bigquery/tests/unit/test_client.py index 8a2a1228cd65..1fd6d87487ae 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_client.py +++ b/packages/google-cloud-bigquery/tests/unit/test_client.py @@ -5230,7 +5230,7 @@ def test_load_table_from_dataframe(self): from google.cloud.bigquery import job client = self._make_client() - records = [{"name": "Monty", "age": 100}, {"name": "Python", "age": 60}] + records = [{"id": 1, "age": 100}, {"id": 2, "age": 60}] dataframe = pandas.DataFrame(records) load_patch = mock.patch( @@ -5265,7 +5265,7 @@ def test_load_table_from_dataframe_w_client_location(self): from google.cloud.bigquery import job client = self._make_client(location=self.LOCATION) - records = [{"name": "Monty", "age": 100}, {"name": "Python", "age": 60}] + records = [{"id": 1, "age": 100}, {"id": 2, "age": 60}] dataframe = pandas.DataFrame(records) load_patch = mock.patch( @@ -5300,7 +5300,7 @@ def test_load_table_from_dataframe_w_custom_job_config(self): from google.cloud.bigquery import job client = self._make_client() - records = [{"name": "Monty", "age": 100}, {"name": "Python", "age": 60}] + records = [{"id": 1, "age": 100}, {"id": 2, "age": 60}] dataframe = pandas.DataFrame(records) job_config = job.LoadJobConfig() @@ -5432,6 +5432,192 @@ def test_load_table_from_dataframe_struct_fields_error(self): assert "struct" in err_msg assert "not support" in err_msg + @unittest.skipIf(pandas is None, "Requires `pandas`") + @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") + def test_load_table_from_dataframe_w_partial_schema(self): + from google.cloud.bigquery.client import _DEFAULT_NUM_RETRIES + from google.cloud.bigquery import job + from google.cloud.bigquery.schema import SchemaField + + client = self._make_client() + df_data = collections.OrderedDict( + [ + ("int_col", [1, 2, 3]), + ("int_as_float_col", [1.0, float("nan"), 3.0]), + ("float_col", [1.0, 2.0, 3.0]), + ("bool_col", [True, False, True]), + ( + "dt_col", + pandas.Series( + [ + datetime.datetime(2010, 1, 2, 3, 44, 50), + datetime.datetime(2011, 2, 3, 14, 50, 59), + datetime.datetime(2012, 3, 14, 15, 16), + ], + dtype="datetime64[ns]", + ), + ), + ( + "ts_col", + pandas.Series( + [ + datetime.datetime(2010, 1, 2, 3, 44, 50), + datetime.datetime(2011, 2, 3, 14, 50, 59), + datetime.datetime(2012, 3, 14, 15, 16), + ], + dtype="datetime64[ns]", + ).dt.tz_localize(pytz.utc), + ), + ("string_col", [u"abc", None, u"def"]), + ("bytes_col", [b"abc", b"def", None]), + ] + ) + dataframe = pandas.DataFrame(df_data, columns=df_data.keys()) + load_patch = mock.patch( + "google.cloud.bigquery.client.Client.load_table_from_file", autospec=True + ) + + schema = ( + SchemaField("int_as_float_col", "INTEGER"), + SchemaField("string_col", "STRING"), + SchemaField("bytes_col", "BYTES"), + ) + job_config = job.LoadJobConfig(schema=schema) + with load_patch as load_table_from_file: + client.load_table_from_dataframe( + dataframe, self.TABLE_REF, job_config=job_config, location=self.LOCATION + ) + + load_table_from_file.assert_called_once_with( + client, + mock.ANY, + self.TABLE_REF, + num_retries=_DEFAULT_NUM_RETRIES, + rewind=True, + job_id=mock.ANY, + job_id_prefix=None, + location=self.LOCATION, + project=None, + job_config=mock.ANY, + ) + + sent_config = load_table_from_file.mock_calls[0][2]["job_config"] + assert sent_config.source_format == job.SourceFormat.PARQUET + assert tuple(sent_config.schema) == ( + SchemaField("int_col", "INTEGER"), + SchemaField("int_as_float_col", "INTEGER"), + SchemaField("float_col", "FLOAT"), + SchemaField("bool_col", "BOOLEAN"), + SchemaField("dt_col", "DATETIME"), + SchemaField("ts_col", "TIMESTAMP"), + SchemaField("string_col", "STRING"), + SchemaField("bytes_col", "BYTES"), + ) + + @unittest.skipIf(pandas is None, "Requires `pandas`") + @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") + def test_load_table_from_dataframe_w_partial_schema_extra_types(self): + from google.cloud.bigquery.client import _DEFAULT_NUM_RETRIES + from google.cloud.bigquery import job + from google.cloud.bigquery.schema import SchemaField + + client = self._make_client() + df_data = collections.OrderedDict( + [ + ("int_col", [1, 2, 3]), + ("int_as_float_col", [1.0, float("nan"), 3.0]), + ("string_col", [u"abc", None, u"def"]), + ] + ) + dataframe = pandas.DataFrame(df_data, columns=df_data.keys()) + load_patch = mock.patch( + "google.cloud.bigquery.client.Client.load_table_from_file", autospec=True + ) + + schema = ( + SchemaField("int_as_float_col", "INTEGER"), + SchemaField("string_col", "STRING"), + SchemaField("unknown_col", "BYTES"), + ) + job_config = job.LoadJobConfig(schema=schema) + with load_patch as load_table_from_file: + client.load_table_from_dataframe( + dataframe, self.TABLE_REF, job_config=job_config, location=self.LOCATION + ) + + load_table_from_file.assert_called_once_with( + client, + mock.ANY, + self.TABLE_REF, + num_retries=_DEFAULT_NUM_RETRIES, + rewind=True, + job_id=mock.ANY, + job_id_prefix=None, + location=self.LOCATION, + project=None, + job_config=mock.ANY, + ) + + sent_config = load_table_from_file.mock_calls[0][2]["job_config"] + assert sent_config.source_format == job.SourceFormat.PARQUET + assert tuple(sent_config.schema) == ( + SchemaField("int_col", "INTEGER"), + SchemaField("int_as_float_col", "INTEGER"), + SchemaField("string_col", "STRING"), + ) + + @unittest.skipIf(pandas is None, "Requires `pandas`") + @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") + def test_load_table_from_dataframe_w_partial_schema_missing_types(self): + from google.cloud.bigquery.client import _DEFAULT_NUM_RETRIES + from google.cloud.bigquery import job + from google.cloud.bigquery.schema import SchemaField + + client = self._make_client() + df_data = collections.OrderedDict( + [ + ("string_col", [u"abc", u"def", u"ghi"]), + ("unknown_col", [b"jkl", None, b"mno"]), + ] + ) + dataframe = pandas.DataFrame(df_data, columns=df_data.keys()) + load_patch = mock.patch( + "google.cloud.bigquery.client.Client.load_table_from_file", autospec=True + ) + + schema = (SchemaField("string_col", "STRING"),) + job_config = job.LoadJobConfig(schema=schema) + with load_patch as load_table_from_file, warnings.catch_warnings( + record=True + ) as warned: + client.load_table_from_dataframe( + dataframe, self.TABLE_REF, job_config=job_config, location=self.LOCATION + ) + + load_table_from_file.assert_called_once_with( + client, + mock.ANY, + self.TABLE_REF, + num_retries=_DEFAULT_NUM_RETRIES, + rewind=True, + job_id=mock.ANY, + job_id_prefix=None, + location=self.LOCATION, + project=None, + job_config=mock.ANY, + ) + + assert warned # there should be at least one warning + unknown_col_warnings = [ + warning for warning in warned if "unknown_col" in str(warning) + ] + assert unknown_col_warnings + assert unknown_col_warnings[0].category == UserWarning + + sent_config = load_table_from_file.mock_calls[0][2]["job_config"] + assert sent_config.source_format == job.SourceFormat.PARQUET + assert sent_config.schema is None + @unittest.skipIf(pandas is None, "Requires `pandas`") @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_load_table_from_dataframe_w_schema_wo_pyarrow(self): @@ -5440,8 +5626,8 @@ def test_load_table_from_dataframe_w_schema_wo_pyarrow(self): from google.cloud.bigquery.schema import SchemaField client = self._make_client() - records = [{"name": "Monty", "age": 100}, {"name": "Python", "age": 60}] - dataframe = pandas.DataFrame(records) + records = [{"name": u"Monty", "age": 100}, {"name": u"Python", "age": 60}] + dataframe = pandas.DataFrame(records, columns=["name", "age"]) schema = (SchemaField("name", "STRING"), SchemaField("age", "INTEGER")) job_config = job.LoadJobConfig(schema=schema) @@ -5486,7 +5672,7 @@ def test_load_table_from_dataframe_w_schema_arrow_custom_compression(self): from google.cloud.bigquery.schema import SchemaField client = self._make_client() - records = [{"name": "Monty", "age": 100}, {"name": "Python", "age": 60}] + records = [{"name": u"Monty", "age": 100}, {"name": u"Python", "age": 60}] dataframe = pandas.DataFrame(records) schema = (SchemaField("name", "STRING"), SchemaField("age", "INTEGER")) job_config = job.LoadJobConfig(schema=schema) @@ -5516,7 +5702,7 @@ def test_load_table_from_dataframe_w_schema_arrow_custom_compression(self): @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_load_table_from_dataframe_wo_pyarrow_custom_compression(self): client = self._make_client() - records = [{"name": "Monty", "age": 100}, {"name": "Python", "age": 60}] + records = [{"id": 1, "age": 100}, {"id": 2, "age": 60}] dataframe = pandas.DataFrame(records) load_patch = mock.patch( @@ -5553,7 +5739,7 @@ def test_load_table_from_dataframe_w_nulls(self): client = self._make_client() records = [{"name": None, "age": None}, {"name": None, "age": None}] - dataframe = pandas.DataFrame(records) + dataframe = pandas.DataFrame(records, columns=["name", "age"]) schema = [SchemaField("name", "STRING"), SchemaField("age", "INTEGER")] job_config = job.LoadJobConfig(schema=schema) From 7e085d3f8a485ff6fa115486d8b7e2c364c8da99 Mon Sep 17 00:00:00 2001 From: Peter Lamut Date: Thu, 22 Aug 2019 22:01:49 +0200 Subject: [PATCH 0647/2016] BigQuery: Add load_table_from_json() method to BQ client (#9076) * Add load_table_from_json() method to BQ client * Manipulate a copy of the job config if provided The load_table_from_json() should not directly change the job config passed in as an argument. * Enable schema autodetect if no explicit schema * Cover the path of schema provided in unit tests * Improve tests readability and harden assertions --- .../google/cloud/bigquery/client.py | 82 +++++++++++++++++ .../google-cloud-bigquery/tests/system.py | 70 +++++++++++++++ .../tests/unit/test_client.py | 88 +++++++++++++++++++ 3 files changed, 240 insertions(+) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py index da169cb55bf2..1985a45057ba 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py @@ -1577,6 +1577,88 @@ def load_table_from_dataframe( finally: os.remove(tmppath) + def load_table_from_json( + self, + json_rows, + destination, + num_retries=_DEFAULT_NUM_RETRIES, + job_id=None, + job_id_prefix=None, + location=None, + project=None, + job_config=None, + ): + """Upload the contents of a table from a JSON string or dict. + + Arguments: + json_rows (Iterable[Dict[str, Any]]): + Row data to be inserted. Keys must match the table schema fields + and values must be JSON-compatible representations. + destination (Union[ \ + :class:`~google.cloud.bigquery.table.Table`, \ + :class:`~google.cloud.bigquery.table.TableReference`, \ + str, \ + ]): + Table into which data is to be loaded. If a string is passed + in, this method attempts to create a table reference from a + string using + :func:`google.cloud.bigquery.table.TableReference.from_string`. + + Keyword Arguments: + num_retries (int, optional): Number of upload retries. + job_id (str): (Optional) Name of the job. + job_id_prefix (str): + (Optional) the user-provided prefix for a randomly generated + job ID. This parameter will be ignored if a ``job_id`` is + also given. + location (str): + Location where to run the job. Must match the location of the + destination table. + project (str): + Project ID of the project of where to run the job. Defaults + to the client's project. + job_config (google.cloud.bigquery.job.LoadJobConfig): + (Optional) Extra configuration options for the job. The + ``source_format`` setting is always set to + :attr:`~google.cloud.bigquery.job.SourceFormat.NEWLINE_DELIMITED_JSON`. + + Returns: + google.cloud.bigquery.job.LoadJob: A new load job. + """ + job_id = _make_job_id(job_id, job_id_prefix) + + if job_config is None: + job_config = job.LoadJobConfig() + else: + # Make a copy so that the job config isn't modified in-place. + job_config = copy.deepcopy(job_config) + job_config.source_format = job.SourceFormat.NEWLINE_DELIMITED_JSON + + if job_config.schema is None: + job_config.autodetect = True + + if project is None: + project = self.project + + if location is None: + location = self.location + + destination = _table_arg_to_table_ref(destination, default_project=self.project) + + data_str = u"\n".join(json.dumps(item) for item in json_rows) + data_file = io.BytesIO(data_str.encode()) + + return self.load_table_from_file( + data_file, + destination, + num_retries=num_retries, + job_id=job_id, + job_id_prefix=job_id_prefix, + location=location, + project=project, + job_config=job_config, + ) + def _do_resumable_upload(self, stream, metadata, num_retries): """Perform a resumable upload. diff --git a/packages/google-cloud-bigquery/tests/system.py b/packages/google-cloud-bigquery/tests/system.py index 1422c3c7cb60..3593e1ecb609 100644 --- a/packages/google-cloud-bigquery/tests/system.py +++ b/packages/google-cloud-bigquery/tests/system.py @@ -917,6 +917,76 @@ def test_load_table_from_dataframe_w_explicit_schema(self): self.assertEqual(tuple(table.schema), table_schema) self.assertEqual(table.num_rows, 3) + def test_load_table_from_json_basic_use(self): + table_schema = ( + bigquery.SchemaField("name", "STRING", mode="REQUIRED"), + bigquery.SchemaField("age", "INTEGER", mode="REQUIRED"), + bigquery.SchemaField("birthday", "DATE", mode="REQUIRED"), + bigquery.SchemaField("is_awesome", "BOOLEAN", mode="REQUIRED"), + ) + + json_rows = [ + {"name": "John", "age": 18, "birthday": "2001-10-15", "is_awesome": False}, + {"name": "Chuck", "age": 79, "birthday": "1940-03-10", "is_awesome": True}, + ] + + dataset_id = _make_dataset_id("bq_system_test") + self.temp_dataset(dataset_id) + table_id = "{}.{}.load_table_from_json_basic_use".format( + Config.CLIENT.project, dataset_id + ) + + # Create the table before loading so that schema mismatch errors are + # identified. + table = retry_403(Config.CLIENT.create_table)( + Table(table_id, schema=table_schema) + ) + self.to_delete.insert(0, table) + + job_config = bigquery.LoadJobConfig(schema=table_schema) + load_job = Config.CLIENT.load_table_from_json( + json_rows, table_id, job_config=job_config + ) + load_job.result() + + table = Config.CLIENT.get_table(table) + self.assertEqual(tuple(table.schema), table_schema) + self.assertEqual(table.num_rows, 2) + + def test_load_table_from_json_schema_autodetect(self): + json_rows = [ + {"name": "John", "age": 18, "birthday": "2001-10-15", "is_awesome": False}, + {"name": "Chuck", "age": 79, "birthday": "1940-03-10", "is_awesome": True}, + ] + + dataset_id = _make_dataset_id("bq_system_test") + self.temp_dataset(dataset_id) + table_id = "{}.{}.load_table_from_json_basic_use".format( + Config.CLIENT.project, dataset_id + ) + + # Use schema with NULLABLE fields, because schema autodetection + # defaults to field mode NULLABLE. + table_schema = ( + bigquery.SchemaField("name", "STRING", mode="NULLABLE"), + bigquery.SchemaField("age", "INTEGER", mode="NULLABLE"), + bigquery.SchemaField("birthday", "DATE", mode="NULLABLE"), + bigquery.SchemaField("is_awesome", "BOOLEAN", mode="NULLABLE"), + ) + # create the table before loading so that the column order is predictable + table = retry_403(Config.CLIENT.create_table)( + Table(table_id, schema=table_schema) + ) + self.to_delete.insert(0, table) + + # do not pass an explicit job config to trigger automatic schema detection + load_job = Config.CLIENT.load_table_from_json(json_rows, table_id) + load_job.result() + + table = Config.CLIENT.get_table(table) + self.assertEqual(tuple(table.schema), table_schema) + self.assertEqual(table.num_rows, 2) + def test_load_avro_from_uri_then_dump_table(self): from google.cloud.bigquery.job import CreateDisposition from google.cloud.bigquery.job import SourceFormat diff --git a/packages/google-cloud-bigquery/tests/unit/test_client.py b/packages/google-cloud-bigquery/tests/unit/test_client.py index 1fd6d87487ae..ce03ffbf7f35 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_client.py +++ b/packages/google-cloud-bigquery/tests/unit/test_client.py @@ -5768,6 +5768,94 @@ def test_load_table_from_dataframe_w_nulls(self): assert sent_config.schema == schema assert sent_config.source_format == job.SourceFormat.PARQUET + def test_load_table_from_json_basic_use(self): + from google.cloud.bigquery.client import _DEFAULT_NUM_RETRIES + from google.cloud.bigquery import job + + client = self._make_client() + + json_rows = [ + {"name": "One", "age": 11, "birthday": "2008-09-10", "adult": False}, + {"name": "Two", "age": 22, "birthday": "1997-08-09", "adult": True}, + ] + + load_patch = mock.patch( + "google.cloud.bigquery.client.Client.load_table_from_file", autospec=True + ) + + with load_patch as load_table_from_file: + client.load_table_from_json(json_rows, self.TABLE_REF) + + load_table_from_file.assert_called_once_with( + client, + mock.ANY, + self.TABLE_REF, + num_retries=_DEFAULT_NUM_RETRIES, + job_id=mock.ANY, + job_id_prefix=None, + location=client.location, + project=client.project, + job_config=mock.ANY, + ) + + sent_config = load_table_from_file.mock_calls[0][2]["job_config"] + assert sent_config.source_format == job.SourceFormat.NEWLINE_DELIMITED_JSON + assert sent_config.schema is None + assert sent_config.autodetect + + def test_load_table_from_json_non_default_args(self): + from google.cloud.bigquery import job + from google.cloud.bigquery.client import _DEFAULT_NUM_RETRIES + from google.cloud.bigquery.schema import SchemaField + + client = self._make_client() + + json_rows = [ + {"name": "One", "age": 11, "birthday": "2008-09-10", "adult": False}, + {"name": "Two", "age": 22, "birthday": "1997-08-09", "adult": True}, + ] + + schema = [ + SchemaField("name", "STRING"), + SchemaField("age", "INTEGER"), + SchemaField("adult", "BOOLEAN"), + ] + job_config = job.LoadJobConfig(schema=schema) + job_config._properties["load"]["unknown_field"] = "foobar" + + load_patch = mock.patch( + "google.cloud.bigquery.client.Client.load_table_from_file", autospec=True + ) + + with load_patch as load_table_from_file: + client.load_table_from_json( + json_rows, + self.TABLE_REF, + job_config=job_config, + project="project-x", + location="EU", + ) + + load_table_from_file.assert_called_once_with( + client, + mock.ANY, + self.TABLE_REF, + num_retries=_DEFAULT_NUM_RETRIES, + job_id=mock.ANY, + job_id_prefix=None, + location="EU", + project="project-x", + job_config=mock.ANY, + ) + + sent_config = load_table_from_file.mock_calls[0][2]["job_config"] + assert job_config.source_format is None # the original was not modified + assert sent_config.source_format == job.SourceFormat.NEWLINE_DELIMITED_JSON + assert sent_config.schema == schema + assert not sent_config.autodetect + # all properties should have been cloned and sent to the backend + assert sent_config._properties.get("load", {}).get("unknown_field") == "foobar" + # Low-level tests @classmethod From 545a8d3b78c34b5c2e2b2e2588136590b2508d84 Mon Sep 17 00:00:00 2001 From: Shubha Rajan Date: Fri, 23 Aug 2019 15:10:15 -0700 Subject: [PATCH 0648/2016] Add `--dry_run` option to `%%bigquery` magic. (#9067) * added dry_run option to bigquery magics. when --dry_run flag is present, a QueryJob object is returned for inspection instead of an empty DataFrame * print estimated bytes instead of total bytes * updated docstring for _AsyncJob._begin * Update docstring for QueryJob._begin * added SQL query to error output and messaging for failure to save to variable in magics Co-Authored-By: Peter Lamut Co-Authored-By: Tim Swast --- .../google/cloud/bigquery/job.py | 41 ++++-- .../google/cloud/bigquery/magics.py | 30 +++++ .../tests/unit/test_job.py | 33 +++++ .../tests/unit/test_magics.py | 117 ++++++++++++++++++ 4 files changed, 213 insertions(+), 8 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/job.py b/packages/google-cloud-bigquery/google/cloud/bigquery/job.py index 38b3b39c2c3e..6e1eb81648f5 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/job.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/job.py @@ -561,15 +561,16 @@ def _begin(self, client=None, retry=DEFAULT_RETRY): See https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs/insert - :type client: :class:`~google.cloud.bigquery.client.Client` or - ``NoneType`` - :param client: the client to use. If not passed, falls back to the - ``client`` stored on the current dataset. - - :type retry: :class:`google.api_core.retry.Retry` - :param retry: (Optional) How to retry the RPC. + Args: + client (Optional[google.cloud.bigquery.client.Client]): + The client to use. If not passed, falls back to the ``client`` + associated with the job object or``NoneType`` + retry (Optional[google.api_core.retry.Retry]): + How to retry the RPC. - :raises: :exc:`ValueError` if the job has already begin. + Raises: + ValueError: + If the job has already begun. """ if self.state is not None: raise ValueError("Job already begun.") @@ -2880,6 +2881,30 @@ def _format_for_exception(query, job_id): return template.format(job_id=job_id, header=header, ruler=ruler, body=body) + def _begin(self, client=None, retry=DEFAULT_RETRY): + """API call: begin the job via a POST request + + See + https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs/insert + + Args: + client (Optional[google.cloud.bigquery.client.Client]): + The client to use. If not passed, falls back to the ``client`` + associated with the job object or``NoneType``. + retry (Optional[google.api_core.retry.Retry]): + How to retry the RPC. + + Raises: + ValueError: + If the job has already begun. + """ + + try: + super(QueryJob, self)._begin(client=client, retry=retry) + except exceptions.GoogleCloudError as exc: + exc.message += self._format_for_exception(self.query, self.job_id) + raise + def result(self, timeout=None, page_size=None, retry=DEFAULT_RETRY): """Start the job and wait for it to complete and get the result. diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/magics.py b/packages/google-cloud-bigquery/google/cloud/bigquery/magics.py index b4ec8951b0a6..b2dae2511ec8 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/magics.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/magics.py @@ -291,6 +291,10 @@ def _run_query(client, query, job_config=None): """ start_time = time.time() query_job = client.query(query, job_config=job_config) + + if job_config and job_config.dry_run: + return query_job + print("Executing query with job ID: {}".format(query_job.job_id)) while True: @@ -324,6 +328,15 @@ def _run_query(client, query, job_config=None): "the context default_query_job_config.maximum_bytes_billed." ), ) +@magic_arguments.argument( + "--dry_run", + action="store_true", + default=False, + help=( + "Sets query to be a dry run to estimate costs. " + "Defaults to executing the query instead of dry run if this argument is not used." + ), +) @magic_arguments.argument( "--use_legacy_sql", action="store_true", @@ -410,6 +423,7 @@ def _cell_magic(line, query): job_config = bigquery.job.QueryJobConfig() job_config.query_parameters = params job_config.use_legacy_sql = args.use_legacy_sql + job_config.dry_run = args.dry_run if args.maximum_bytes_billed == "None": job_config.maximum_bytes_billed = 0 @@ -427,9 +441,25 @@ def _cell_magic(line, query): display.clear_output() if error: + if args.destination_var: + print( + "Could not save output to variable '{}'.".format(args.destination_var), + file=sys.stderr, + ) print("\nERROR:\n", error, file=sys.stderr) return + if args.dry_run and args.destination_var: + IPython.get_ipython().push({args.destination_var: query_job}) + return + elif args.dry_run: + print( + "Query validated. This query will process {} bytes.".format( + query_job.total_bytes_processed + ) + ) + return query_job + result = query_job.to_dataframe(bqstorage_client=bqstorage_client) if args.destination_var: IPython.get_ipython().push({args.destination_var: result}) diff --git a/packages/google-cloud-bigquery/tests/unit/test_job.py b/packages/google-cloud-bigquery/tests/unit/test_job.py index 8bd62d7e4f51..19409d8d43c3 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_job.py +++ b/packages/google-cloud-bigquery/tests/unit/test_job.py @@ -4307,6 +4307,39 @@ def test_result_error(self): expected_line = "{}:{}".format(i, line) assert expected_line in full_text + def test__begin_error(self): + from google.cloud import exceptions + + query = textwrap.dedent( + """ + SELECT foo, bar + FROM table_baz + WHERE foo == bar""" + ) + + client = _make_client(project=self.PROJECT) + job = self._make_one(self.JOB_ID, query, client) + call_api_patch = mock.patch( + "google.cloud.bigquery.client.Client._call_api", + autospec=True, + side_effect=exceptions.BadRequest("Syntax error in SQL query"), + ) + + with call_api_patch, self.assertRaises(exceptions.GoogleCloudError) as exc_info: + job.result() + + self.assertIsInstance(exc_info.exception, exceptions.GoogleCloudError) + self.assertEqual(exc_info.exception.code, http_client.BAD_REQUEST) + + full_text = str(exc_info.exception) + + assert job.job_id in full_text + assert "Query Job SQL Follows" in full_text + + for i, line in enumerate(query.splitlines(), start=1): + expected_line = "{}:{}".format(i, line) + assert expected_line in full_text + def test_begin_w_bound_client(self): from google.cloud.bigquery.dataset import DatasetReference from google.cloud.bigquery.job import QueryJobConfig diff --git a/packages/google-cloud-bigquery/tests/unit/test_magics.py b/packages/google-cloud-bigquery/tests/unit/test_magics.py index ebe194329ec1..fbea9bdd9050 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_magics.py +++ b/packages/google-cloud-bigquery/tests/unit/test_magics.py @@ -274,6 +274,25 @@ def test__run_query(): assert re.match("Query complete after .*s", updates[-1]) +def test__run_query_dry_run_without_errors_is_silent(): + magics.context._credentials = None + + sql = "SELECT 17" + + client_patch = mock.patch( + "google.cloud.bigquery.magics.bigquery.Client", autospec=True + ) + + job_config = job.QueryJobConfig() + job_config.dry_run = True + with client_patch as client_mock, io.capture_output() as captured: + client_mock().query(sql).job_id = None + magics._run_query(client_mock(), sql, job_config=job_config) + + assert len(captured.stderr) == 0 + assert len(captured.stdout) == 0 + + def test__make_bqstorage_client_false(): credentials_mock = mock.create_autospec( google.auth.credentials.Credentials, instance=True @@ -626,6 +645,104 @@ def test_bigquery_magic_without_bqstorage(monkeypatch): assert isinstance(return_value, pandas.DataFrame) +@pytest.mark.usefixtures("ipython_interactive") +def test_bigquery_magic_dryrun_option_sets_job_config(): + ip = IPython.get_ipython() + ip.extension_manager.load_extension("google.cloud.bigquery") + magics.context.credentials = mock.create_autospec( + google.auth.credentials.Credentials, instance=True + ) + + run_query_patch = mock.patch( + "google.cloud.bigquery.magics._run_query", autospec=True + ) + + sql = "SELECT 17 AS num" + + with run_query_patch as run_query_mock: + ip.run_cell_magic("bigquery", "--dry_run", sql) + + job_config_used = run_query_mock.call_args_list[0][0][-1] + assert job_config_used.dry_run is True + + +@pytest.mark.usefixtures("ipython_interactive") +def test_bigquery_magic_dryrun_option_returns_query_job(): + ip = IPython.get_ipython() + ip.extension_manager.load_extension("google.cloud.bigquery") + magics.context.credentials = mock.create_autospec( + google.auth.credentials.Credentials, instance=True + ) + query_job_mock = mock.create_autospec( + google.cloud.bigquery.job.QueryJob, instance=True + ) + run_query_patch = mock.patch( + "google.cloud.bigquery.magics._run_query", autospec=True + ) + + sql = "SELECT 17 AS num" + + with run_query_patch as run_query_mock, io.capture_output() as captured_io: + run_query_mock.return_value = query_job_mock + return_value = ip.run_cell_magic("bigquery", "--dry_run", sql) + + assert "Query validated. This query will process" in captured_io.stdout + assert isinstance(return_value, job.QueryJob) + + +@pytest.mark.usefixtures("ipython_interactive") +def test_bigquery_magic_dryrun_option_variable_error_message(): + ip = IPython.get_ipython() + ip.extension_manager.load_extension("google.cloud.bigquery") + magics.context.credentials = mock.create_autospec( + google.auth.credentials.Credentials, instance=True + ) + + run_query_patch = mock.patch( + "google.cloud.bigquery.magics._run_query", + autospec=True, + side_effect=exceptions.BadRequest("Syntax error in SQL query"), + ) + + sql = "SELECT SELECT 17 AS num" + + assert "q_job" not in ip.user_ns + + with run_query_patch, io.capture_output() as captured: + ip.run_cell_magic("bigquery", "q_job --dry_run", sql) + + full_text = captured.stderr + assert "Could not save output to variable 'q_job'." in full_text + + +@pytest.mark.usefixtures("ipython_interactive") +def test_bigquery_magic_dryrun_option_saves_query_job_to_variable(): + ip = IPython.get_ipython() + ip.extension_manager.load_extension("google.cloud.bigquery") + magics.context.credentials = mock.create_autospec( + google.auth.credentials.Credentials, instance=True + ) + query_job_mock = mock.create_autospec( + google.cloud.bigquery.job.QueryJob, instance=True + ) + run_query_patch = mock.patch( + "google.cloud.bigquery.magics._run_query", autospec=True + ) + + sql = "SELECT 17 AS num" + + assert "q_job" not in ip.user_ns + + with run_query_patch as run_query_mock: + run_query_mock.return_value = query_job_mock + return_value = ip.run_cell_magic("bigquery", "q_job --dry_run", sql) + + assert return_value is None + assert "q_job" in ip.user_ns + q_job = ip.user_ns["q_job"] + assert isinstance(q_job, job.QueryJob) + + @pytest.mark.usefixtures("ipython_interactive") def test_bigquery_magic_w_maximum_bytes_billed_invalid(): ip = IPython.get_ipython() From 4a11a88d2a99cb9f27e90e5fc019f48802ab047d Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Mon, 26 Aug 2019 03:51:27 -0700 Subject: [PATCH 0649/2016] Raise with extra or missing columns in `load_table_from_dataframe` schema. (#9096) I found it to be difficult to debug typos in column/index names in the schema, so I have hardened the error messages to indicate when unknown field values are found. --- .../google/cloud/bigquery/_pandas_helpers.py | 28 ++++++++++++++++-- .../tests/unit/test__pandas_helpers.py | 20 +++++++++++-- .../tests/unit/test_client.py | 29 +++++-------------- 3 files changed, 50 insertions(+), 27 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/_pandas_helpers.py b/packages/google-cloud-bigquery/google/cloud/bigquery/_pandas_helpers.py index 57ced8fac0c1..2d2fb8af24d3 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/_pandas_helpers.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/_pandas_helpers.py @@ -211,8 +211,10 @@ def dataframe_to_bq_schema(dataframe, bq_schema): "https://github.com/googleapis/google-cloud-python/issues/8191" ) bq_schema_index = {field.name: field for field in bq_schema} + bq_schema_unused = set(bq_schema_index.keys()) else: bq_schema_index = {} + bq_schema_unused = set() bq_schema_out = [] for column, dtype in zip(dataframe.columns, dataframe.dtypes): @@ -220,6 +222,7 @@ def dataframe_to_bq_schema(dataframe, bq_schema): bq_field = bq_schema_index.get(column) if bq_field: bq_schema_out.append(bq_field) + bq_schema_unused.discard(bq_field.name) continue # Otherwise, try to automatically determine the type based on the @@ -230,6 +233,15 @@ def dataframe_to_bq_schema(dataframe, bq_schema): return None bq_field = schema.SchemaField(column, bq_type) bq_schema_out.append(bq_field) + + # Catch any schema mismatch. The developer explicitly asked to serialize a + # column, but it was not found. + if bq_schema_unused: + raise ValueError( + "bq_schema contains fields not present in dataframe: {}".format( + bq_schema_unused + ) + ) return tuple(bq_schema_out) @@ -248,9 +260,21 @@ def dataframe_to_arrow(dataframe, bq_schema): Table containing dataframe data, with schema derived from BigQuery schema. """ - if len(bq_schema) != len(dataframe.columns): + column_names = set(dataframe.columns) + bq_field_names = set(field.name for field in bq_schema) + + extra_fields = bq_field_names - column_names + if extra_fields: + raise ValueError( + "bq_schema contains fields not present in dataframe: {}".format( + extra_fields + ) + ) + + missing_fields = column_names - bq_field_names + if missing_fields: raise ValueError( - "Number of columns in schema must match number of columns in dataframe." + "bq_schema is missing fields from dataframe: {}".format(missing_fields) ) arrow_arrays = [] diff --git a/packages/google-cloud-bigquery/tests/unit/test__pandas_helpers.py b/packages/google-cloud-bigquery/tests/unit/test__pandas_helpers.py index 6aad587837b4..facfb79b3ccb 100644 --- a/packages/google-cloud-bigquery/tests/unit/test__pandas_helpers.py +++ b/packages/google-cloud-bigquery/tests/unit/test__pandas_helpers.py @@ -609,12 +609,26 @@ def test_dataframe_to_parquet_without_pyarrow(module_under_test, monkeypatch): @pytest.mark.skipif(pandas is None, reason="Requires `pandas`") @pytest.mark.skipif(pyarrow is None, reason="Requires `pyarrow`") -def test_dataframe_to_parquet_w_missing_columns(module_under_test, monkeypatch): +def test_dataframe_to_parquet_w_extra_fields(module_under_test, monkeypatch): with pytest.raises(ValueError) as exc_context: module_under_test.dataframe_to_parquet( - pandas.DataFrame(), (schema.SchemaField("not_found", "STRING"),), None + pandas.DataFrame(), (schema.SchemaField("not_in_df", "STRING"),), None ) - assert "columns in schema must match" in str(exc_context.value) + message = str(exc_context.value) + assert "bq_schema contains fields not present in dataframe" in message + assert "not_in_df" in message + + +@pytest.mark.skipif(pandas is None, reason="Requires `pandas`") +@pytest.mark.skipif(pyarrow is None, reason="Requires `pyarrow`") +def test_dataframe_to_parquet_w_missing_fields(module_under_test, monkeypatch): + with pytest.raises(ValueError) as exc_context: + module_under_test.dataframe_to_parquet( + pandas.DataFrame({"not_in_bq": [1, 2, 3]}), (), None + ) + message = str(exc_context.value) + assert "bq_schema is missing fields from dataframe" in message + assert "not_in_bq" in message @pytest.mark.skipif(pandas is None, reason="Requires `pandas`") diff --git a/packages/google-cloud-bigquery/tests/unit/test_client.py b/packages/google-cloud-bigquery/tests/unit/test_client.py index ce03ffbf7f35..0b06876dedcd 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_client.py +++ b/packages/google-cloud-bigquery/tests/unit/test_client.py @@ -5517,7 +5517,6 @@ def test_load_table_from_dataframe_w_partial_schema(self): @unittest.skipIf(pandas is None, "Requires `pandas`") @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_load_table_from_dataframe_w_partial_schema_extra_types(self): - from google.cloud.bigquery.client import _DEFAULT_NUM_RETRIES from google.cloud.bigquery import job from google.cloud.bigquery.schema import SchemaField @@ -5540,31 +5539,17 @@ def test_load_table_from_dataframe_w_partial_schema_extra_types(self): SchemaField("unknown_col", "BYTES"), ) job_config = job.LoadJobConfig(schema=schema) - with load_patch as load_table_from_file: + with load_patch as load_table_from_file, pytest.raises( + ValueError + ) as exc_context: client.load_table_from_dataframe( dataframe, self.TABLE_REF, job_config=job_config, location=self.LOCATION ) - load_table_from_file.assert_called_once_with( - client, - mock.ANY, - self.TABLE_REF, - num_retries=_DEFAULT_NUM_RETRIES, - rewind=True, - job_id=mock.ANY, - job_id_prefix=None, - location=self.LOCATION, - project=None, - job_config=mock.ANY, - ) - - sent_config = load_table_from_file.mock_calls[0][2]["job_config"] - assert sent_config.source_format == job.SourceFormat.PARQUET - assert tuple(sent_config.schema) == ( - SchemaField("int_col", "INTEGER"), - SchemaField("int_as_float_col", "INTEGER"), - SchemaField("string_col", "STRING"), - ) + load_table_from_file.assert_not_called() + message = str(exc_context.value) + assert "bq_schema contains fields not present in dataframe" in message + assert "unknown_col" in message @unittest.skipIf(pandas is None, "Requires `pandas`") @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") From 4ec1d9fd2abdcc600b2dcb3a186c9c8045e5a632 Mon Sep 17 00:00:00 2001 From: Peter Lamut Date: Tue, 27 Aug 2019 19:51:59 +0200 Subject: [PATCH 0650/2016] Fix BigQuery client unit test assertions (#9112) Since a timeout argument is now sent with HTTP requests by default, the call assertions need adjustment. --- packages/google-cloud-bigquery/tests/unit/test_client.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/tests/unit/test_client.py b/packages/google-cloud-bigquery/tests/unit/test_client.py index 0b06876dedcd..f75e6bfbfa7b 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_client.py +++ b/packages/google-cloud-bigquery/tests/unit/test_client.py @@ -3110,6 +3110,7 @@ def _initiate_resumable_upload_helper(self, num_retries=None): upload_url, data=json.dumps(metadata).encode("utf-8"), headers=request_headers, + timeout=mock.ANY, ) def test__initiate_resumable_upload(self): @@ -3162,7 +3163,7 @@ def _do_multipart_upload_success_helper(self, get_boundary, num_retries=None): headers = _get_upload_headers(conn.user_agent) headers["content-type"] = b'multipart/related; boundary="==0=="' fake_transport.request.assert_called_once_with( - "POST", upload_url, data=payload, headers=headers + "POST", upload_url, data=payload, headers=headers, timeout=mock.ANY ) @mock.patch(u"google.resumable_media._upload.get_boundary", return_value=b"==0==") @@ -5896,6 +5897,7 @@ def test__do_resumable_upload(self): mock.ANY, data=json.dumps(self.EXPECTED_CONFIGURATION).encode("utf-8"), headers=mock.ANY, + timeout=mock.ANY, ) def test__do_multipart_upload(self): From 56993e7927f8ff25eeb336f2d0ead5ef77c02bce Mon Sep 17 00:00:00 2001 From: Leonid Emar-Kar <46078689+Emar-Kar@users.noreply.github.com> Date: Tue, 27 Aug 2019 23:25:26 +0300 Subject: [PATCH 0651/2016] Add client_options to constructor. (#8999) --- .../google/cloud/bigquery/_http.py | 9 ++-- .../google/cloud/bigquery/client.py | 18 ++++++- packages/google-cloud-bigquery/setup.py | 2 +- .../tests/unit/test__http.py | 8 ++- .../tests/unit/test_client.py | 50 +++++++++++++++++++ 5 files changed, 79 insertions(+), 8 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/_http.py b/packages/google-cloud-bigquery/google/cloud/bigquery/_http.py index 643b24920bee..5dd660ea0b8d 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/_http.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/_http.py @@ -29,15 +29,14 @@ class Connection(_http.JSONConnection): :param client_info: (Optional) instance used to generate user agent. """ - def __init__(self, client, client_info=None): - super(Connection, self).__init__(client, client_info) + DEFAULT_API_ENDPOINT = "https://www.googleapis.com" + def __init__(self, client, client_info=None, api_endpoint=DEFAULT_API_ENDPOINT): + super(Connection, self).__init__(client, client_info) + self.API_BASE_URL = api_endpoint self._client_info.gapic_version = __version__ self._client_info.client_library_version = __version__ - API_BASE_URL = "https://www.googleapis.com" - """The base of the API call URL.""" - API_VERSION = "v2" """The version of the API, used in building the API call's URL.""" diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py index 1985a45057ba..047633642e1e 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py @@ -41,6 +41,7 @@ from google.resumable_media.requests import MultipartUpload from google.resumable_media.requests import ResumableUpload +import google.api_core.client_options import google.api_core.exceptions from google.api_core import page_iterator import google.cloud._helpers @@ -142,6 +143,9 @@ class Client(ClientWithProject): requests. If ``None``, then default info will be used. Generally, you only need to set this if you're developing your own library or partner tool. + client_options (Union[~google.api_core.client_options.ClientOptions, dict]): + (Optional) Client options used to set user options on the client. + API Endpoint should be set through client_options. Raises: google.auth.exceptions.DefaultCredentialsError: @@ -163,11 +167,23 @@ def __init__( location=None, default_query_job_config=None, client_info=None, + client_options=None, ): super(Client, self).__init__( project=project, credentials=credentials, _http=_http ) - self._connection = Connection(self, client_info=client_info) + + kw_args = {"client_info": client_info} + if client_options: + if type(client_options) == dict: + client_options = google.api_core.client_options.from_dict( + client_options + ) + if client_options.api_endpoint: + api_endpoint = client_options.api_endpoint + kw_args["api_endpoint"] = api_endpoint + + self._connection = Connection(self, **kw_args) self._location = location self._default_query_job_config = default_query_job_config diff --git a/packages/google-cloud-bigquery/setup.py b/packages/google-cloud-bigquery/setup.py index 337a974ac485..b3eeb15b5d64 100644 --- a/packages/google-cloud-bigquery/setup.py +++ b/packages/google-cloud-bigquery/setup.py @@ -30,7 +30,7 @@ release_status = "Development Status :: 5 - Production/Stable" dependencies = [ 'enum34; python_version < "3.4"', - "google-cloud-core >= 1.0.0, < 2.0dev", + "google-cloud-core >= 1.0.3, < 2.0dev", "google-resumable-media >= 0.3.1", "protobuf >= 3.6.0", ] diff --git a/packages/google-cloud-bigquery/tests/unit/test__http.py b/packages/google-cloud-bigquery/tests/unit/test__http.py index 939b5668e1e2..629e8031a52f 100644 --- a/packages/google-cloud-bigquery/tests/unit/test__http.py +++ b/packages/google-cloud-bigquery/tests/unit/test__http.py @@ -30,7 +30,13 @@ def _make_one(self, *args, **kw): def test_build_api_url_no_extra_query_params(self): conn = self._make_one(object()) - URI = "/".join([conn.API_BASE_URL, "bigquery", conn.API_VERSION, "foo"]) + URI = "/".join([conn.DEFAULT_API_ENDPOINT, "bigquery", conn.API_VERSION, "foo"]) + self.assertEqual(conn.build_api_url("/foo"), URI) + + def test_build_api_url_w_custom_endpoint(self): + custom_endpoint = "https://www.foo-googleapis.com" + conn = self._make_one(object(), api_endpoint=custom_endpoint) + URI = "/".join([custom_endpoint, "bigquery", conn.API_VERSION, "foo"]) self.assertEqual(conn.build_api_url("/foo"), URI) def test_build_api_url_w_extra_query_params(self): diff --git a/packages/google-cloud-bigquery/tests/unit/test_client.py b/packages/google-cloud-bigquery/tests/unit/test_client.py index f75e6bfbfa7b..f31d8587322b 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_client.py +++ b/packages/google-cloud-bigquery/tests/unit/test_client.py @@ -112,6 +112,56 @@ def test_ctor_defaults(self): self.assertIs(client._connection.credentials, creds) self.assertIs(client._connection.http, http) self.assertIsNone(client.location) + self.assertEqual( + client._connection.API_BASE_URL, Connection.DEFAULT_API_ENDPOINT + ) + + def test_ctor_w_empty_client_options(self): + from google.api_core.client_options import ClientOptions + + creds = _make_credentials() + http = object() + client_options = ClientOptions() + client = self._make_one( + project=self.PROJECT, + credentials=creds, + _http=http, + client_options=client_options, + ) + self.assertEqual( + client._connection.API_BASE_URL, client._connection.DEFAULT_API_ENDPOINT + ) + + def test_ctor_w_client_options_dict(self): + + creds = _make_credentials() + http = object() + client_options = {"api_endpoint": "https://www.foo-googleapis.com"} + client = self._make_one( + project=self.PROJECT, + credentials=creds, + _http=http, + client_options=client_options, + ) + self.assertEqual( + client._connection.API_BASE_URL, "https://www.foo-googleapis.com" + ) + + def test_ctor_w_client_options_object(self): + from google.api_core.client_options import ClientOptions + + creds = _make_credentials() + http = object() + client_options = ClientOptions(api_endpoint="https://www.foo-googleapis.com") + client = self._make_one( + project=self.PROJECT, + credentials=creds, + _http=http, + client_options=client_options, + ) + self.assertEqual( + client._connection.API_BASE_URL, "https://www.foo-googleapis.com" + ) def test_ctor_w_location(self): from google.cloud.bigquery._http import Connection From 1fcace314c17d893255b72d08ffd2156442c5ab3 Mon Sep 17 00:00:00 2001 From: Leonid Emar-Kar <46078689+Emar-Kar@users.noreply.github.com> Date: Wed, 28 Aug 2019 11:08:52 +0300 Subject: [PATCH 0652/2016] BigQuery: deprecate `client.dataset()` part 1 (#9032) Deprecate `client.dataset()` part 1 --- .../google-cloud-bigquery/docs/snippets.py | 205 ------------------ .../docs/usage/datasets.rst | 48 ++++ .../docs/usage/tables.rst | 11 +- .../samples/add_empty_column.py | 40 ++++ .../samples/browse_table_data.py | 47 ++++ .../samples/create_job.py | 1 + .../samples/create_routine.py | 3 +- .../samples/create_routine_ddl.py | 3 +- .../samples/create_table.py | 10 +- .../samples/dataset_exists.py | 29 +++ .../samples/delete_dataset.py | 3 +- .../samples/delete_dataset_labels.py | 36 +++ .../samples/delete_model.py | 4 +- .../samples/delete_routine.py | 5 +- .../samples/delete_table.py | 3 +- .../samples/get_dataset.py | 4 +- .../samples/get_dataset_labels.py | 38 ++++ .../samples/get_model.py | 3 +- .../samples/get_routine.py | 4 +- .../samples/get_table.py | 3 +- .../samples/label_dataset.py | 33 +++ .../samples/list_datasets.py | 3 +- .../samples/list_datasets_by_label.py | 34 +++ .../samples/list_models.py | 3 +- .../samples/list_routines.py | 2 +- .../samples/list_tables.py | 3 +- .../samples/query_to_arrow.py | 9 +- .../samples/tests/conftest.py | 5 + .../samples/tests/test_add_empty_column.py | 23 ++ .../samples/tests/test_browse_table_data.py | 26 +++ .../samples/tests/test_dataset_exists.py | 30 +++ .../tests/test_dataset_label_samples.py | 33 +++ .../samples/tests/test_get_table.py | 1 + .../tests/test_list_datasets_by_label.py | 26 +++ .../samples/tests/test_query_to_arrow.py | 4 +- .../samples/tests/test_routine_samples.py | 10 +- ...update_dataset_default_table_expiration.py | 3 +- .../samples/update_dataset_description.py | 3 +- .../samples/update_model.py | 3 +- .../samples/update_routine.py | 3 +- 40 files changed, 511 insertions(+), 246 deletions(-) create mode 100644 packages/google-cloud-bigquery/samples/add_empty_column.py create mode 100644 packages/google-cloud-bigquery/samples/browse_table_data.py create mode 100644 packages/google-cloud-bigquery/samples/dataset_exists.py create mode 100644 packages/google-cloud-bigquery/samples/delete_dataset_labels.py create mode 100644 packages/google-cloud-bigquery/samples/get_dataset_labels.py create mode 100644 packages/google-cloud-bigquery/samples/label_dataset.py create mode 100644 packages/google-cloud-bigquery/samples/list_datasets_by_label.py create mode 100644 packages/google-cloud-bigquery/samples/tests/test_add_empty_column.py create mode 100644 packages/google-cloud-bigquery/samples/tests/test_browse_table_data.py create mode 100644 packages/google-cloud-bigquery/samples/tests/test_dataset_exists.py create mode 100644 packages/google-cloud-bigquery/samples/tests/test_dataset_label_samples.py create mode 100644 packages/google-cloud-bigquery/samples/tests/test_list_datasets_by_label.py diff --git a/packages/google-cloud-bigquery/docs/snippets.py b/packages/google-cloud-bigquery/docs/snippets.py index 9b4218286402..40fcc05067a7 100644 --- a/packages/google-cloud-bigquery/docs/snippets.py +++ b/packages/google-cloud-bigquery/docs/snippets.py @@ -125,133 +125,6 @@ def test_create_client_default_credentials(): assert client is not None -def test_list_datasets_by_label(client, to_delete): - dataset_id = "list_datasets_by_label_{}".format(_millis()) - dataset = bigquery.Dataset(client.dataset(dataset_id)) - dataset.labels = {"color": "green"} - dataset = client.create_dataset(dataset) # API request - to_delete.append(dataset) - - # [START bigquery_list_datasets_by_label] - # from google.cloud import bigquery - # client = bigquery.Client() - - # The following label filter example will find datasets with an - # arbitrary 'color' label set to 'green' - label_filter = "labels.color:green" - datasets = list(client.list_datasets(filter=label_filter)) - - if datasets: - print("Datasets filtered by {}:".format(label_filter)) - for dataset in datasets: # API request(s) - print("\t{}".format(dataset.dataset_id)) - else: - print("No datasets found with this filter.") - # [END bigquery_list_datasets_by_label] - found = set([dataset.dataset_id for dataset in datasets]) - assert dataset_id in found - - -# [START bigquery_dataset_exists] -def dataset_exists(client, dataset_reference): - """Return if a dataset exists. - - Args: - client (google.cloud.bigquery.client.Client): - A client to connect to the BigQuery API. - dataset_reference (google.cloud.bigquery.dataset.DatasetReference): - A reference to the dataset to look for. - - Returns: - bool: ``True`` if the dataset exists, ``False`` otherwise. - """ - from google.cloud.exceptions import NotFound - - try: - client.get_dataset(dataset_reference) - return True - except NotFound: - return False - - -# [END bigquery_dataset_exists] - - -def test_dataset_exists(client, to_delete): - """Determine if a dataset exists.""" - DATASET_ID = "get_table_dataset_{}".format(_millis()) - dataset_ref = client.dataset(DATASET_ID) - dataset = bigquery.Dataset(dataset_ref) - dataset = client.create_dataset(dataset) - to_delete.append(dataset) - - assert dataset_exists(client, dataset_ref) - assert not dataset_exists(client, client.dataset("i_dont_exist")) - - -@pytest.mark.skip( - reason=( - "update_dataset() is flaky " - "https://github.com/GoogleCloudPlatform/google-cloud-python/issues/5588" - ) -) -def test_manage_dataset_labels(client, to_delete): - dataset_id = "label_dataset_{}".format(_millis()) - dataset = bigquery.Dataset(client.dataset(dataset_id)) - dataset = client.create_dataset(dataset) - to_delete.append(dataset) - - # [START bigquery_label_dataset] - # from google.cloud import bigquery - # client = bigquery.Client() - # dataset_ref = client.dataset('my_dataset') - # dataset = client.get_dataset(dataset_ref) # API request - - assert dataset.labels == {} - labels = {"color": "green"} - dataset.labels = labels - - dataset = client.update_dataset(dataset, ["labels"]) # API request - - assert dataset.labels == labels - # [END bigquery_label_dataset] - - # [START bigquery_get_dataset_labels] - # from google.cloud import bigquery - # client = bigquery.Client() - # dataset_id = 'my_dataset' - - dataset_ref = client.dataset(dataset_id) - dataset = client.get_dataset(dataset_ref) # API request - - # View dataset labels - print("Dataset ID: {}".format(dataset_id)) - print("Labels:") - if dataset.labels: - for label, value in dataset.labels.items(): - print("\t{}: {}".format(label, value)) - else: - print("\tDataset has no labels defined.") - # [END bigquery_get_dataset_labels] - assert dataset.labels == labels - - # [START bigquery_delete_label_dataset] - # from google.cloud import bigquery - # client = bigquery.Client() - # dataset_ref = client.dataset('my_dataset') - # dataset = client.get_dataset(dataset_ref) # API request - - # This example dataset starts with one label - assert dataset.labels == {"color": "green"} - # To delete a label from a dataset, set its value to None - dataset.labels["color"] = None - - dataset = client.update_dataset(dataset, ["labels"]) # API request - - assert dataset.labels == {} - # [END bigquery_delete_label_dataset] - - def test_create_table_nested_repeated_schema(client, to_delete): dataset_id = "create_table_nested_repeated_{}".format(_millis()) dataset_ref = client.dataset(dataset_id) @@ -612,43 +485,6 @@ def test_update_table_expiration(client, to_delete): # [END bigquery_update_table_expiration] -@pytest.mark.skip( - reason=( - "update_table() is flaky " - "https://github.com/GoogleCloudPlatform/google-cloud-python/issues/5589" - ) -) -def test_add_empty_column(client, to_delete): - """Adds an empty column to an existing table.""" - dataset_id = "add_empty_column_dataset_{}".format(_millis()) - table_id = "add_empty_column_table_{}".format(_millis()) - dataset = bigquery.Dataset(client.dataset(dataset_id)) - dataset = client.create_dataset(dataset) - to_delete.append(dataset) - - table = bigquery.Table(dataset.table(table_id), schema=SCHEMA) - table = client.create_table(table) - - # [START bigquery_add_empty_column] - # from google.cloud import bigquery - # client = bigquery.Client() - # dataset_id = 'my_dataset' - # table_id = 'my_table' - - table_ref = client.dataset(dataset_id).table(table_id) - table = client.get_table(table_ref) # API request - - original_schema = table.schema - new_schema = original_schema[:] # creates a copy of the schema - new_schema.append(bigquery.SchemaField("phone", "STRING")) - - table.schema = new_schema - table = client.update_table(table, ["schema"]) # API request - - assert len(table.schema) == len(original_schema) + 1 == len(new_schema) - # [END bigquery_add_empty_column] - - @pytest.mark.skip( reason=( "update_table() is flaky " @@ -737,47 +573,6 @@ def test_update_table_cmek(client, to_delete): # [END bigquery_update_table_cmek] -def test_browse_table_data(client, to_delete, capsys): - """Retreive selected row data from a table.""" - - # [START bigquery_browse_table] - # from google.cloud import bigquery - # client = bigquery.Client() - - dataset_ref = client.dataset("samples", project="bigquery-public-data") - table_ref = dataset_ref.table("shakespeare") - table = client.get_table(table_ref) # API call - - # Load all rows from a table - rows = client.list_rows(table) - assert len(list(rows)) == table.num_rows - - # Load the first 10 rows - rows = client.list_rows(table, max_results=10) - assert len(list(rows)) == 10 - - # Specify selected fields to limit the results to certain columns - fields = table.schema[:2] # first two columns - rows = client.list_rows(table, selected_fields=fields, max_results=10) - assert len(rows.schema) == 2 - assert len(list(rows)) == 10 - - # Use the start index to load an arbitrary portion of the table - rows = client.list_rows(table, start_index=10, max_results=10) - - # Print row data in tabular format - format_string = "{!s:<16} " * len(rows.schema) - field_names = [field.name for field in rows.schema] - print(format_string.format(*field_names)) # prints column headers - for row in rows: - print(format_string.format(*row)) # prints row data - # [END bigquery_browse_table] - - out, err = capsys.readouterr() - out = list(filter(bool, out.split("\n"))) # list of non-blank lines - assert len(out) == 11 - - @pytest.mark.skip( reason=( "update_table() is flaky " diff --git a/packages/google-cloud-bigquery/docs/usage/datasets.rst b/packages/google-cloud-bigquery/docs/usage/datasets.rst index d5646355c00d..2daee77f36d2 100644 --- a/packages/google-cloud-bigquery/docs/usage/datasets.rst +++ b/packages/google-cloud-bigquery/docs/usage/datasets.rst @@ -25,6 +25,15 @@ List datasets for a project with the :start-after: [START bigquery_list_datasets] :end-before: [END bigquery_list_datasets] +List datasets by label for a project with the +:func:`~google.cloud.bigquery.client.Client.list_datasets` method: + +.. literalinclude:: ../samples/list_datasets_by_label.py + :language: python + :dedent: 4 + :start-after: [START bigquery_list_datasets_by_label] + :end-before: [END bigquery_list_datasets_by_label] + Getting a Dataset ^^^^^^^^^^^^^^^^^ @@ -37,6 +46,15 @@ Get a dataset resource (to pick up changes made by another client) with the :start-after: [START bigquery_get_dataset] :end-before: [END bigquery_get_dataset] +Determine if a dataset exists with the +:func:`~google.cloud.bigquery.client.Client.get_dataset` method: + +.. literalinclude:: ../samples/dataset_exists.py + :language: python + :dedent: 4 + :start-after: [START bigquery_dataset_exists] + :end-before: [END bigquery_dataset_exists] + Creating a Dataset ^^^^^^^^^^^^^^^^^^ @@ -70,6 +88,36 @@ Modify user permissions on a dataset with the :start-after: [START bigquery_update_dataset_access] :end-before: [END bigquery_update_dataset_access] +Manage Dataset labels +^^^^^^^^^^^^^^^^^^^^^ + +Add labels to a dataset with the +:func:`~google.cloud.bigquery.client.Client.update_dataset` method: + +.. literalinclude:: ../samples/label_dataset.py + :language: python + :dedent: 4 + :start-after: [START bigquery_label_dataset] + :end-before: [END bigquery_label_dataset] + +Get dataset's labels with the +:func:`~google.cloud.bigquery.client.Client.get_dataset` method: + +.. literalinclude:: ../samples/get_dataset_labels.py + :language: python + :dedent: 4 + :start-after: [START bigquery_get_dataset_labels] + :end-before: [END bigquery_get_dataset_labels] + +Delete dataset's labels with the +:func:`~google.cloud.bigquery.client.Client.update_dataset` method: + +.. literalinclude:: ../samples/delete_dataset_labels.py + :language: python + :dedent: 4 + :start-after: [START bigquery_delete_label_dataset] + :end-before: [END bigquery_delete_label_dataset] + Deleting a Dataset ^^^^^^^^^^^^^^^^^^ diff --git a/packages/google-cloud-bigquery/docs/usage/tables.rst b/packages/google-cloud-bigquery/docs/usage/tables.rst index 4aede9545cd8..458c5b0009ba 100644 --- a/packages/google-cloud-bigquery/docs/usage/tables.rst +++ b/packages/google-cloud-bigquery/docs/usage/tables.rst @@ -31,7 +31,7 @@ Get a table resource with the Browse data rows in a table with the :func:`~google.cloud.bigquery.client.Client.list_rows` method: -.. literalinclude:: ../snippets.py +.. literalinclude:: ../samples/browse_table_data.py :language: python :dedent: 4 :start-after: [START bigquery_browse_table] @@ -113,6 +113,15 @@ Insert rows into a table's data with the :start-after: [START bigquery_table_insert_rows] :end-before: [END bigquery_table_insert_rows] +Add an empty column to the existing table with the +:func:`~google.cloud.bigquery.update_table` method: + +.. literalinclude:: ../samples/add_empty_column.py + :language: python + :dedent: 4 + :start-after: [START bigquery_add_empty_column] + :end-before: [END bigquery_add_empty_column] + Copying a Table ^^^^^^^^^^^^^^^ diff --git a/packages/google-cloud-bigquery/samples/add_empty_column.py b/packages/google-cloud-bigquery/samples/add_empty_column.py new file mode 100644 index 000000000000..eb84037598d3 --- /dev/null +++ b/packages/google-cloud-bigquery/samples/add_empty_column.py @@ -0,0 +1,40 @@ +# Copyright 2019 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +def add_empty_column(client, table_id): + + # [START bigquery_add_empty_column] + from google.cloud import bigquery + + # TODO(developer): Construct a BigQuery client object. + # client = bigquery.Client() + + # TODO(developer): Set table_id to the ID of the table to add an empty column. + # table_id = "your-project.your_dataset.your_table_name" + + table = client.get_table(table_id) + + original_schema = table.schema + new_schema = original_schema[:] # creates a copy of the schema + new_schema.append(bigquery.SchemaField("phone", "STRING")) + + table.schema = new_schema + table = client.update_table(table, ["schema"]) # API request + + if len(table.schema) == len(original_schema) + 1 == len(new_schema): + print("A new column has been added.") + else: + print("The column has not been added.") + # [END bigquery_add_empty_column] diff --git a/packages/google-cloud-bigquery/samples/browse_table_data.py b/packages/google-cloud-bigquery/samples/browse_table_data.py new file mode 100644 index 000000000000..dd6c572cab6d --- /dev/null +++ b/packages/google-cloud-bigquery/samples/browse_table_data.py @@ -0,0 +1,47 @@ +# Copyright 2019 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +def browse_table_data(client, table_id): + + # [START bigquery_browse_table] + # TODO(developer): Import the client library. + # from google.cloud import bigquery + + # TODO(developer): Construct a BigQuery client object. + # client = bigquery.Client() + + # TODO(developer): Set table_id to the ID of the table to browse data rows. + # table_id = "your-project.your_dataset.your_table_name" + + # Download all rows from a table. + rows_iter = client.list_rows(table_id) + + # Iterate over rows to make the API requests to fetch row data. + rows = list(rows_iter) + print("Downloaded {} rows from table {}".format(len(rows), table_id)) + + # Download at most 10 rows. + rows_iter = client.list_rows(table_id, max_results=10) + rows = list(rows_iter) + print("Downloaded {} rows from table {}".format(len(rows), table_id)) + + # Specify selected fields to limit the results to certain columns. + table = client.get_table(table_id) + fields = table.schema[:2] # first two columns + rows_iter = client.list_rows(table_id, selected_fields=fields, max_results=10) + rows = list(rows_iter) + print("Selected {} columns from table {}.".format(len(rows_iter.schema), table_id)) + print("Downloaded {} rows from table {}".format(len(rows), table_id)) + # [END bigquery_browse_table] diff --git a/packages/google-cloud-bigquery/samples/create_job.py b/packages/google-cloud-bigquery/samples/create_job.py index 7570dc49f1d2..24bb85510598 100644 --- a/packages/google-cloud-bigquery/samples/create_job.py +++ b/packages/google-cloud-bigquery/samples/create_job.py @@ -14,6 +14,7 @@ def create_job(client): + # [START bigquery_create_job] from google.cloud import bigquery diff --git a/packages/google-cloud-bigquery/samples/create_routine.py b/packages/google-cloud-bigquery/samples/create_routine.py index 18b999980d72..c08ec4799a3e 100644 --- a/packages/google-cloud-bigquery/samples/create_routine.py +++ b/packages/google-cloud-bigquery/samples/create_routine.py @@ -13,7 +13,8 @@ # limitations under the License. -def main(client, routine_id): +def create_routine(client, routine_id): + # [START bigquery_create_routine] from google.cloud import bigquery from google.cloud import bigquery_v2 diff --git a/packages/google-cloud-bigquery/samples/create_routine_ddl.py b/packages/google-cloud-bigquery/samples/create_routine_ddl.py index aa6254b1139a..a4ae3318e7b4 100644 --- a/packages/google-cloud-bigquery/samples/create_routine_ddl.py +++ b/packages/google-cloud-bigquery/samples/create_routine_ddl.py @@ -13,7 +13,8 @@ # limitations under the License. -def main(client, routine_id): +def create_routine_ddl(client, routine_id): + # [START bigquery_create_routine_ddl] # TODO(developer): Import the client library. # from google.cloud import bigquery diff --git a/packages/google-cloud-bigquery/samples/create_table.py b/packages/google-cloud-bigquery/samples/create_table.py index 5e2e34d41d99..2a6e98fc72f6 100644 --- a/packages/google-cloud-bigquery/samples/create_table.py +++ b/packages/google-cloud-bigquery/samples/create_table.py @@ -18,17 +18,17 @@ def create_table(client, table_id): # [START bigquery_create_table] from google.cloud import bigquery - schema = [ - bigquery.SchemaField("full_name", "STRING", mode="REQUIRED"), - bigquery.SchemaField("age", "INTEGER", mode="REQUIRED"), - ] - # TODO(developer): Construct a BigQuery client object. # client = bigquery.Client() # TODO(developer): Set table_id to the ID of the table to create # table_id = "your-project.your_dataset.your_table_name" + schema = [ + bigquery.SchemaField("full_name", "STRING", mode="REQUIRED"), + bigquery.SchemaField("age", "INTEGER", mode="REQUIRED"), + ] + table = bigquery.Table(table_id, schema=schema) table = client.create_table(table) # API request print( diff --git a/packages/google-cloud-bigquery/samples/dataset_exists.py b/packages/google-cloud-bigquery/samples/dataset_exists.py new file mode 100644 index 000000000000..46cf26a623bf --- /dev/null +++ b/packages/google-cloud-bigquery/samples/dataset_exists.py @@ -0,0 +1,29 @@ +# Copyright 2019 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +def dataset_exists(client, dataset_id): + + # [START bigquery_dataset_exists] + from google.cloud.exceptions import NotFound + + # TODO(developer): Set dataset_id to the ID of the dataset to determine existence. + # dataset_id = "your-project.your_dataset" + + try: + client.get_dataset(dataset_id) + print("Dataset {} already exists".format(dataset_id)) + except NotFound: + print("Dataset {} is not found".format(dataset_id)) + # [END bigquery_dataset_exists] diff --git a/packages/google-cloud-bigquery/samples/delete_dataset.py b/packages/google-cloud-bigquery/samples/delete_dataset.py index 58851f1e2120..6cde1b6b2d27 100644 --- a/packages/google-cloud-bigquery/samples/delete_dataset.py +++ b/packages/google-cloud-bigquery/samples/delete_dataset.py @@ -16,7 +16,8 @@ def delete_dataset(client, dataset_id): # [START bigquery_delete_dataset] - from google.cloud import bigquery + # TODO(developer): Import the client library. + # from google.cloud import bigquery # TODO(developer): Construct a BigQuery client object. # client = bigquery.Client() diff --git a/packages/google-cloud-bigquery/samples/delete_dataset_labels.py b/packages/google-cloud-bigquery/samples/delete_dataset_labels.py new file mode 100644 index 000000000000..33ff5c0f2620 --- /dev/null +++ b/packages/google-cloud-bigquery/samples/delete_dataset_labels.py @@ -0,0 +1,36 @@ +# Copyright 2019 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +def delete_dataset_labels(client, dataset_id): + + # [START bigquery_delete_label_dataset] + # TODO(developer): Import the client library. + # from google.cloud import bigquery + + # TODO(developer): Construct a BigQuery client object. + # client = bigquery.Client() + + # TODO(developer): Set dataset_id to the ID of the dataset to fetch. + # dataset_id = "your-project.your_dataset" + + dataset = client.get_dataset(dataset_id) + + # To delete a label from a dataset, set its value to None + dataset.labels["color"] = None + + dataset = client.update_dataset(dataset, ["labels"]) + print("Labels deleted from {}".format(dataset_id)) + # [END bigquery_delete_label_dataset] + return dataset diff --git a/packages/google-cloud-bigquery/samples/delete_model.py b/packages/google-cloud-bigquery/samples/delete_model.py index 371f9003576b..5ac4305bc97e 100644 --- a/packages/google-cloud-bigquery/samples/delete_model.py +++ b/packages/google-cloud-bigquery/samples/delete_model.py @@ -17,7 +17,8 @@ def delete_model(client, model_id): """Sample ID: go/samples-tracker/1534""" # [START bigquery_delete_model] - from google.cloud import bigquery + # TODO(developer): Import the client library. + # from google.cloud import bigquery # TODO(developer): Construct a BigQuery client object. # client = bigquery.Client() @@ -26,5 +27,6 @@ def delete_model(client, model_id): # model_id = 'your-project.your_dataset.your_model' client.delete_model(model_id) + print("Deleted model '{}'.".format(model_id)) # [END bigquery_delete_model] diff --git a/packages/google-cloud-bigquery/samples/delete_routine.py b/packages/google-cloud-bigquery/samples/delete_routine.py index 505faa4780f3..c0164b415008 100644 --- a/packages/google-cloud-bigquery/samples/delete_routine.py +++ b/packages/google-cloud-bigquery/samples/delete_routine.py @@ -13,7 +13,8 @@ # limitations under the License. -def main(client, routine_id): +def delete_routine(client, routine_id): + # [START bigquery_delete_routine] # TODO(developer): Import the client library. # from google.cloud import bigquery @@ -25,6 +26,6 @@ def main(client, routine_id): # routine_id = "my-project.my_dataset.my_routine" client.delete_routine(routine_id) - # [END bigquery_delete_routine] print("Deleted routine {}.".format(routine_id)) + # [END bigquery_delete_routine] diff --git a/packages/google-cloud-bigquery/samples/delete_table.py b/packages/google-cloud-bigquery/samples/delete_table.py index 3eb7dc918da7..dcdd3d855b2e 100644 --- a/packages/google-cloud-bigquery/samples/delete_table.py +++ b/packages/google-cloud-bigquery/samples/delete_table.py @@ -16,7 +16,8 @@ def delete_table(client, table_id): # [START bigquery_delete_table] - from google.cloud import bigquery + # TODO(developer): Import the client library. + # from google.cloud import bigquery # TODO(developer): Construct a BigQuery client object. # client = bigquery.Client() diff --git a/packages/google-cloud-bigquery/samples/get_dataset.py b/packages/google-cloud-bigquery/samples/get_dataset.py index eeab2e088d2f..5586c2b95ebb 100644 --- a/packages/google-cloud-bigquery/samples/get_dataset.py +++ b/packages/google-cloud-bigquery/samples/get_dataset.py @@ -16,7 +16,8 @@ def get_dataset(client, dataset_id): # [START bigquery_get_dataset] - from google.cloud import bigquery + # TODO(developer): Import the client library. + # from google.cloud import bigquery # TODO(developer): Construct a BigQuery client object. # client = bigquery.Client() @@ -52,5 +53,4 @@ def get_dataset(client, dataset_id): print("\t{}".format(table.table_id)) else: print("\tThis dataset does not contain any tables.") - # [END bigquery_get_dataset] diff --git a/packages/google-cloud-bigquery/samples/get_dataset_labels.py b/packages/google-cloud-bigquery/samples/get_dataset_labels.py new file mode 100644 index 000000000000..2f21723a550b --- /dev/null +++ b/packages/google-cloud-bigquery/samples/get_dataset_labels.py @@ -0,0 +1,38 @@ +# Copyright 2019 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +def get_dataset_labels(client, dataset_id): + + # [START bigquery_get_dataset_labels] + # TODO(developer): Import the client library. + # from google.cloud import bigquery + + # TODO(developer): Construct a BigQuery client object. + # client = bigquery.Client() + + # TODO(developer): Set dataset_id to the ID of the dataset to fetch. + # dataset_id = "your-project.your_dataset" + + dataset = client.get_dataset(dataset_id) + + # View dataset labels + print("Dataset ID: {}".format(dataset_id)) + print("Labels:") + if dataset.labels: + for label, value in dataset.labels.items(): + print("\t{}: {}".format(label, value)) + else: + print("\tDataset has no labels defined.") + # [END bigquery_get_dataset_labels] diff --git a/packages/google-cloud-bigquery/samples/get_model.py b/packages/google-cloud-bigquery/samples/get_model.py index 8e43e53ec450..69986733c50b 100644 --- a/packages/google-cloud-bigquery/samples/get_model.py +++ b/packages/google-cloud-bigquery/samples/get_model.py @@ -17,7 +17,8 @@ def get_model(client, model_id): """Sample ID: go/samples-tracker/1510""" # [START bigquery_get_model] - from google.cloud import bigquery + # TODO(developer): Import the client library. + # from google.cloud import bigquery # TODO(developer): Construct a BigQuery client object. # client = bigquery.Client() diff --git a/packages/google-cloud-bigquery/samples/get_routine.py b/packages/google-cloud-bigquery/samples/get_routine.py index 5850d8d06477..d9035c282438 100644 --- a/packages/google-cloud-bigquery/samples/get_routine.py +++ b/packages/google-cloud-bigquery/samples/get_routine.py @@ -13,7 +13,8 @@ # limitations under the License. -def main(client, routine_id): +def get_routine(client, routine_id): + # [START bigquery_get_routine] # TODO(developer): Import the client library. # from google.cloud import bigquery @@ -34,6 +35,5 @@ def main(client, routine_id): for argument in routine.arguments: print(" Name: '{}'".format(argument.name)) print(" Type: '{}'".format(argument.type_)) - # [END bigquery_get_routine] return routine diff --git a/packages/google-cloud-bigquery/samples/get_table.py b/packages/google-cloud-bigquery/samples/get_table.py index e6a5c502e2b3..e49e032f6e23 100644 --- a/packages/google-cloud-bigquery/samples/get_table.py +++ b/packages/google-cloud-bigquery/samples/get_table.py @@ -16,7 +16,8 @@ def get_table(client, table_id): # [START bigquery_get_table] - from google.cloud import bigquery + # TODO(developer): Import the client library. + # from google.cloud import bigquery # TODO(developer): Construct a BigQuery client object. # client = bigquery.Client() diff --git a/packages/google-cloud-bigquery/samples/label_dataset.py b/packages/google-cloud-bigquery/samples/label_dataset.py new file mode 100644 index 000000000000..7840ea25a63f --- /dev/null +++ b/packages/google-cloud-bigquery/samples/label_dataset.py @@ -0,0 +1,33 @@ +# Copyright 2019 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +def label_dataset(client, dataset_id): + + # [START bigquery_label_dataset] + # TODO(developer): Import the client library. + # from google.cloud import bigquery + + # TODO(developer): Construct a BigQuery client object. + # client = bigquery.Client() + + # TODO(developer): Set dataset_id to the ID of the dataset to fetch. + # dataset_id = "your-project.your_dataset" + + dataset = client.get_dataset(dataset_id) + dataset.labels = {"color": "green"} + dataset = client.update_dataset(dataset, ["labels"]) + + print("Labels added to {}".format(dataset_id)) + # [END bigquery_label_dataset] diff --git a/packages/google-cloud-bigquery/samples/list_datasets.py b/packages/google-cloud-bigquery/samples/list_datasets.py index c9ddf4f2523c..b57aad1b5e7b 100644 --- a/packages/google-cloud-bigquery/samples/list_datasets.py +++ b/packages/google-cloud-bigquery/samples/list_datasets.py @@ -16,7 +16,8 @@ def list_datasets(client): # [START bigquery_list_datasets] - from google.cloud import bigquery + # TODO(developer): Import the client library. + # from google.cloud import bigquery # TODO(developer): Construct a BigQuery client object. # client = bigquery.Client() diff --git a/packages/google-cloud-bigquery/samples/list_datasets_by_label.py b/packages/google-cloud-bigquery/samples/list_datasets_by_label.py new file mode 100644 index 000000000000..8b574b1110eb --- /dev/null +++ b/packages/google-cloud-bigquery/samples/list_datasets_by_label.py @@ -0,0 +1,34 @@ +# Copyright 2019 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +def list_datasets_by_label(client): + + # [START bigquery_list_datasets_by_label] + # TODO(developer): Import the client library. + # from google.cloud import bigquery + + # TODO(developer): Construct a BigQuery client object. + # client = bigquery.Client() + + label_filter = "labels.color:green" + datasets = list(client.list_datasets(filter=label_filter)) + + if datasets: + print("Datasets filtered by {}:".format(label_filter)) + for dataset in datasets: + print("\t{}.{}".format(dataset.project, dataset.dataset_id)) + else: + print("No datasets found with this filter.") + # [END bigquery_list_datasets_by_label] diff --git a/packages/google-cloud-bigquery/samples/list_models.py b/packages/google-cloud-bigquery/samples/list_models.py index cb6e4fb5569f..5b4d21799b28 100644 --- a/packages/google-cloud-bigquery/samples/list_models.py +++ b/packages/google-cloud-bigquery/samples/list_models.py @@ -17,7 +17,8 @@ def list_models(client, dataset_id): """Sample ID: go/samples-tracker/1512""" # [START bigquery_list_models] - from google.cloud import bigquery + # TODO(developer): Import the client library. + # from google.cloud import bigquery # TODO(developer): Construct a BigQuery client object. # client = bigquery.Client() diff --git a/packages/google-cloud-bigquery/samples/list_routines.py b/packages/google-cloud-bigquery/samples/list_routines.py index 9e90c87a3d9c..1ae4f441cde1 100644 --- a/packages/google-cloud-bigquery/samples/list_routines.py +++ b/packages/google-cloud-bigquery/samples/list_routines.py @@ -13,7 +13,7 @@ # limitations under the License. -def main(client, dataset_id): +def list_routines(client, dataset_id): # [START bigquery_list_routines] # TODO(developer): Import the client library. diff --git a/packages/google-cloud-bigquery/samples/list_tables.py b/packages/google-cloud-bigquery/samples/list_tables.py index 33ed408906b0..2057f2d73891 100644 --- a/packages/google-cloud-bigquery/samples/list_tables.py +++ b/packages/google-cloud-bigquery/samples/list_tables.py @@ -16,7 +16,8 @@ def list_tables(client, dataset_id): # [START bigquery_list_tables] - from google.cloud import bigquery + # TODO(developer): Import the client library. + # from google.cloud import bigquery # TODO(developer): Construct a BigQuery client object. # client = bigquery.Client() diff --git a/packages/google-cloud-bigquery/samples/query_to_arrow.py b/packages/google-cloud-bigquery/samples/query_to_arrow.py index e3ddc23f889a..b13dcf3e1413 100644 --- a/packages/google-cloud-bigquery/samples/query_to_arrow.py +++ b/packages/google-cloud-bigquery/samples/query_to_arrow.py @@ -13,7 +13,8 @@ # limitations under the License. -def main(client): +def query_to_arrow(client): + # [START bigquery_query_to_arrow] # TODO(developer): Import the client library. # from google.cloud import bigquery @@ -50,9 +51,3 @@ def main(client): print("\nSchema:\n{}".format(repr(arrow_table.schema))) # [END bigquery_query_to_arrow] return arrow_table - - -if __name__ == "__main__": - from google.cloud import bigquery - - main(bigquery.Client()) diff --git a/packages/google-cloud-bigquery/samples/tests/conftest.py b/packages/google-cloud-bigquery/samples/tests/conftest.py index fe5391ee8a4d..f2bb93112a22 100644 --- a/packages/google-cloud-bigquery/samples/tests/conftest.py +++ b/packages/google-cloud-bigquery/samples/tests/conftest.py @@ -78,6 +78,11 @@ def table_id(client, dataset_id): client.delete_table(table, not_found_ok=True) +@pytest.fixture +def table_with_data_id(client): + return "bigquery-public-data.samples.shakespeare" + + @pytest.fixture def routine_id(client, dataset_id): now = datetime.datetime.now() diff --git a/packages/google-cloud-bigquery/samples/tests/test_add_empty_column.py b/packages/google-cloud-bigquery/samples/tests/test_add_empty_column.py new file mode 100644 index 000000000000..e6c56e6cbfbc --- /dev/null +++ b/packages/google-cloud-bigquery/samples/tests/test_add_empty_column.py @@ -0,0 +1,23 @@ +# Copyright 2019 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +from .. import add_empty_column + + +def test_add_empty_column(capsys, client, table_id): + + add_empty_column.add_empty_column(client, table_id) + out, err = capsys.readouterr() + assert "A new column has been added." in out diff --git a/packages/google-cloud-bigquery/samples/tests/test_browse_table_data.py b/packages/google-cloud-bigquery/samples/tests/test_browse_table_data.py new file mode 100644 index 000000000000..f777bf91ca00 --- /dev/null +++ b/packages/google-cloud-bigquery/samples/tests/test_browse_table_data.py @@ -0,0 +1,26 @@ +# Copyright 2019 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +from .. import browse_table_data + + +def test_browse_table_data(capsys, client, table_with_data_id): + + browse_table_data.browse_table_data(client, table_with_data_id) + out, err = capsys.readouterr() + assert "Downloaded 164656 rows from table {}".format(table_with_data_id) in out + assert "Downloaded 10 rows from table {}".format(table_with_data_id) in out + assert "Selected 2 columns from table {}".format(table_with_data_id) in out + assert "Downloaded 10 rows from table {}".format(table_with_data_id) in out diff --git a/packages/google-cloud-bigquery/samples/tests/test_dataset_exists.py b/packages/google-cloud-bigquery/samples/tests/test_dataset_exists.py new file mode 100644 index 000000000000..203c4b884dc4 --- /dev/null +++ b/packages/google-cloud-bigquery/samples/tests/test_dataset_exists.py @@ -0,0 +1,30 @@ +# Copyright 2019 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +from google.cloud import bigquery + +from .. import dataset_exists + + +def test_dataset_exists(capsys, client, random_dataset_id): + + dataset_exists.dataset_exists(client, random_dataset_id) + out, err = capsys.readouterr() + assert "Dataset {} is not found".format(random_dataset_id) in out + dataset = bigquery.Dataset(random_dataset_id) + dataset = client.create_dataset(dataset) + dataset_exists.dataset_exists(client, random_dataset_id) + out, err = capsys.readouterr() + assert "Dataset {} already exists".format(random_dataset_id) in out diff --git a/packages/google-cloud-bigquery/samples/tests/test_dataset_label_samples.py b/packages/google-cloud-bigquery/samples/tests/test_dataset_label_samples.py new file mode 100644 index 000000000000..94a2092407b0 --- /dev/null +++ b/packages/google-cloud-bigquery/samples/tests/test_dataset_label_samples.py @@ -0,0 +1,33 @@ +# Copyright 2019 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from .. import delete_dataset_labels +from .. import get_dataset_labels +from .. import label_dataset + + +def test_dataset_label_samples(capsys, client, dataset_id): + + label_dataset.label_dataset(client, dataset_id) + out, err = capsys.readouterr() + assert "Labels added to {}".format(dataset_id) in out + + get_dataset_labels.get_dataset_labels(client, dataset_id) + out, err = capsys.readouterr() + assert "color: green" in out + + dataset = delete_dataset_labels.delete_dataset_labels(client, dataset_id) + out, err = capsys.readouterr() + assert "Labels deleted from {}".format(dataset_id) in out + assert dataset.labels.get("color") is None diff --git a/packages/google-cloud-bigquery/samples/tests/test_get_table.py b/packages/google-cloud-bigquery/samples/tests/test_get_table.py index debf1b63a3fc..b811ccecad1f 100644 --- a/packages/google-cloud-bigquery/samples/tests/test_get_table.py +++ b/packages/google-cloud-bigquery/samples/tests/test_get_table.py @@ -17,6 +17,7 @@ def test_get_table(capsys, client, random_table_id): + schema = [ bigquery.SchemaField("full_name", "STRING", mode="REQUIRED"), bigquery.SchemaField("age", "INTEGER", mode="REQUIRED"), diff --git a/packages/google-cloud-bigquery/samples/tests/test_list_datasets_by_label.py b/packages/google-cloud-bigquery/samples/tests/test_list_datasets_by_label.py new file mode 100644 index 000000000000..346cbf1a982d --- /dev/null +++ b/packages/google-cloud-bigquery/samples/tests/test_list_datasets_by_label.py @@ -0,0 +1,26 @@ +# Copyright 2019 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +from .. import list_datasets_by_label + + +def test_list_datasets_by_label(capsys, client, dataset_id): + + dataset = client.get_dataset(dataset_id) + dataset.labels = {"color": "green"} + dataset = client.update_dataset(dataset, ["labels"]) + list_datasets_by_label.list_datasets_by_label(client) + out, err = capsys.readouterr() + assert "{}".format(dataset_id) in out diff --git a/packages/google-cloud-bigquery/samples/tests/test_query_to_arrow.py b/packages/google-cloud-bigquery/samples/tests/test_query_to_arrow.py index 9e36bcee346f..f70bd49fe565 100644 --- a/packages/google-cloud-bigquery/samples/tests/test_query_to_arrow.py +++ b/packages/google-cloud-bigquery/samples/tests/test_query_to_arrow.py @@ -17,9 +17,9 @@ from .. import query_to_arrow -def test_main(capsys, client): +def test_query_to_arrow(capsys, client): - arrow_table = query_to_arrow.main(client) + arrow_table = query_to_arrow.query_to_arrow(client) out, err = capsys.readouterr() assert "Downloaded 8 rows, 2 columns." in out diff --git a/packages/google-cloud-bigquery/samples/tests/test_routine_samples.py b/packages/google-cloud-bigquery/samples/tests/test_routine_samples.py index 5905d2e69439..5a1c69c7f60f 100644 --- a/packages/google-cloud-bigquery/samples/tests/test_routine_samples.py +++ b/packages/google-cloud-bigquery/samples/tests/test_routine_samples.py @@ -19,7 +19,7 @@ def test_create_routine(capsys, client, random_routine_id): from .. import create_routine - create_routine.main(client, random_routine_id) + create_routine.create_routine(client, random_routine_id) out, err = capsys.readouterr() assert "Created routine {}".format(random_routine_id) in out @@ -27,7 +27,7 @@ def test_create_routine(capsys, client, random_routine_id): def test_create_routine_ddl(capsys, client, random_routine_id): from .. import create_routine_ddl - create_routine_ddl.main(client, random_routine_id) + create_routine_ddl.create_routine_ddl(client, random_routine_id) routine = client.get_routine(random_routine_id) out, err = capsys.readouterr() @@ -68,7 +68,7 @@ def test_create_routine_ddl(capsys, client, random_routine_id): def test_list_routines(capsys, client, dataset_id, routine_id): from .. import list_routines - list_routines.main(client, dataset_id) + list_routines.list_routines(client, dataset_id) out, err = capsys.readouterr() assert "Routines contained in dataset {}:".format(dataset_id) in out assert routine_id in out @@ -77,7 +77,7 @@ def test_list_routines(capsys, client, dataset_id, routine_id): def test_delete_routine(capsys, client, routine_id): from .. import delete_routine - delete_routine.main(client, routine_id) + delete_routine.delete_routine(client, routine_id) out, err = capsys.readouterr() assert "Deleted routine {}.".format(routine_id) in out @@ -85,5 +85,5 @@ def test_delete_routine(capsys, client, routine_id): def test_update_routine(client, routine_id): from .. import update_routine - routine = update_routine.main(client, routine_id) + routine = update_routine.update_routine(client, routine_id) assert routine.body == "x * 4" diff --git a/packages/google-cloud-bigquery/samples/update_dataset_default_table_expiration.py b/packages/google-cloud-bigquery/samples/update_dataset_default_table_expiration.py index a5ac38c01a99..4534bb2011eb 100644 --- a/packages/google-cloud-bigquery/samples/update_dataset_default_table_expiration.py +++ b/packages/google-cloud-bigquery/samples/update_dataset_default_table_expiration.py @@ -16,7 +16,8 @@ def update_dataset_default_table_expiration(client, dataset_id): # [START bigquery_update_dataset_expiration] - from google.cloud import bigquery + # TODO(developer): Import the client library. + # from google.cloud import bigquery # TODO(developer): Construct a BigQuery client object. # client = bigquery.Client() diff --git a/packages/google-cloud-bigquery/samples/update_dataset_description.py b/packages/google-cloud-bigquery/samples/update_dataset_description.py index 70be80b7507e..f3afb7fa68ce 100644 --- a/packages/google-cloud-bigquery/samples/update_dataset_description.py +++ b/packages/google-cloud-bigquery/samples/update_dataset_description.py @@ -16,7 +16,8 @@ def update_dataset_description(client, dataset_id): # [START bigquery_update_dataset_description] - from google.cloud import bigquery + # TODO(developer): Import the client library. + # from google.cloud import bigquery # TODO(developer): Construct a BigQuery client object. # client = bigquery.Client() diff --git a/packages/google-cloud-bigquery/samples/update_model.py b/packages/google-cloud-bigquery/samples/update_model.py index 2440066ae1ec..5df4ada886ed 100644 --- a/packages/google-cloud-bigquery/samples/update_model.py +++ b/packages/google-cloud-bigquery/samples/update_model.py @@ -17,7 +17,8 @@ def update_model(client, model_id): """Sample ID: go/samples-tracker/1533""" # [START bigquery_update_model_description] - from google.cloud import bigquery + # TODO(developer): Import the client library. + # from google.cloud import bigquery # TODO(developer): Construct a BigQuery client object. # client = bigquery.Client() diff --git a/packages/google-cloud-bigquery/samples/update_routine.py b/packages/google-cloud-bigquery/samples/update_routine.py index 8683e761562f..4d491d42e168 100644 --- a/packages/google-cloud-bigquery/samples/update_routine.py +++ b/packages/google-cloud-bigquery/samples/update_routine.py @@ -13,7 +13,8 @@ # limitations under the License. -def main(client, routine_id): +def update_routine(client, routine_id): + # [START bigquery_update_routine] # TODO(developer): Import the client library. # from google.cloud import bigquery From 26900a9d2f7bdeb1396538b57b532308e8ab2143 Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Wed, 28 Aug 2019 12:36:44 -0700 Subject: [PATCH 0653/2016] Optionally include indexes in table written by `load_table_from_dataframe`. (#9084) * Specify the index data type in partial schema to `load_table_from_dataframe` to include it. If an index (or level of a multi-index) has a name and is present in the schema passed to `load_table_from_dataframe`, then that index will be serialized and written to the table. Otherwise, the index is omitted from the serialized table. * Don't include index if has same name as column name. * Move `load_table_dataframe` sample from `snippets.py` to `samples/`. Sample now demonstrates how to manually include the index with a partial schema definition. Update docs reference to new `load_table_dataframe` sample location. --- .../google-cloud-bigquery/docs/snippets.py | 47 ---- .../docs/usage/pandas.rst | 2 +- .../google/cloud/bigquery/__init__.py | 2 + .../google/cloud/bigquery/_pandas_helpers.py | 64 ++++- .../samples/load_table_dataframe.py | 73 +++++ .../tests/test_load_table_dataframe.py | 30 ++ .../tests/unit/test__pandas_helpers.py | 260 +++++++++++++++++- 7 files changed, 421 insertions(+), 57 deletions(-) create mode 100644 packages/google-cloud-bigquery/samples/load_table_dataframe.py create mode 100644 packages/google-cloud-bigquery/samples/tests/test_load_table_dataframe.py diff --git a/packages/google-cloud-bigquery/docs/snippets.py b/packages/google-cloud-bigquery/docs/snippets.py index 40fcc05067a7..4c39ff912230 100644 --- a/packages/google-cloud-bigquery/docs/snippets.py +++ b/packages/google-cloud-bigquery/docs/snippets.py @@ -2536,52 +2536,5 @@ def test_list_rows_as_dataframe(client): assert len(df) == table.num_rows # verify the number of rows -@pytest.mark.skipif(pandas is None, reason="Requires `pandas`") -@pytest.mark.parametrize("parquet_engine", ["pyarrow", "fastparquet"]) -def test_load_table_from_dataframe(client, to_delete, parquet_engine): - if parquet_engine == "pyarrow" and pyarrow is None: - pytest.skip("Requires `pyarrow`") - if parquet_engine == "fastparquet" and fastparquet is None: - pytest.skip("Requires `fastparquet`") - - pandas.set_option("io.parquet.engine", parquet_engine) - - dataset_id = "load_table_from_dataframe_{}".format(_millis()) - dataset = bigquery.Dataset(client.dataset(dataset_id)) - client.create_dataset(dataset) - to_delete.append(dataset) - - # [START bigquery_load_table_dataframe] - # from google.cloud import bigquery - # import pandas - # client = bigquery.Client() - # dataset_id = 'my_dataset' - - dataset_ref = client.dataset(dataset_id) - table_ref = dataset_ref.table("monty_python") - records = [ - {"title": u"The Meaning of Life", "release_year": 1983}, - {"title": u"Monty Python and the Holy Grail", "release_year": 1975}, - {"title": u"Life of Brian", "release_year": 1979}, - {"title": u"And Now for Something Completely Different", "release_year": 1971}, - ] - # Optionally set explicit indices. - # If indices are not specified, a column will be created for the default - # indices created by pandas. - index = [u"Q24980", u"Q25043", u"Q24953", u"Q16403"] - dataframe = pandas.DataFrame(records, index=pandas.Index(index, name="wikidata_id")) - - job = client.load_table_from_dataframe(dataframe, table_ref, location="US") - - job.result() # Waits for table load to complete. - - assert job.state == "DONE" - table = client.get_table(table_ref) - assert table.num_rows == 4 - # [END bigquery_load_table_dataframe] - column_names = [field.name for field in table.schema] - assert sorted(column_names) == ["release_year", "title", "wikidata_id"] - - if __name__ == "__main__": pytest.main() diff --git a/packages/google-cloud-bigquery/docs/usage/pandas.rst b/packages/google-cloud-bigquery/docs/usage/pandas.rst index 9504bd19673a..9db98dfbbccb 100644 --- a/packages/google-cloud-bigquery/docs/usage/pandas.rst +++ b/packages/google-cloud-bigquery/docs/usage/pandas.rst @@ -55,7 +55,7 @@ install the BigQuery python client library with :mod:`pandas` and The following example demonstrates how to create a :class:`pandas.DataFrame` and load it into a new table: -.. literalinclude:: ../snippets.py +.. literalinclude:: ../samples/load_table_dataframe.py :language: python :dedent: 4 :start-after: [START bigquery_load_table_dataframe] diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/__init__.py b/packages/google-cloud-bigquery/google/cloud/bigquery/__init__.py index c41ceb6b0306..bda8c5611435 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/__init__.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/__init__.py @@ -36,6 +36,7 @@ from google.cloud.bigquery.dataset import AccessEntry from google.cloud.bigquery.dataset import Dataset from google.cloud.bigquery.dataset import DatasetReference +from google.cloud.bigquery import enums from google.cloud.bigquery.enums import StandardSqlDataTypes from google.cloud.bigquery.external_config import ExternalConfig from google.cloud.bigquery.external_config import BigtableOptions @@ -124,6 +125,7 @@ "GoogleSheetsOptions", "DEFAULT_RETRY", # Enum Constants + "enums", "Compression", "CreateDisposition", "DestinationFormat", diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/_pandas_helpers.py b/packages/google-cloud-bigquery/google/cloud/bigquery/_pandas_helpers.py index 2d2fb8af24d3..5e73c9f58e22 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/_pandas_helpers.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/_pandas_helpers.py @@ -187,6 +187,49 @@ def bq_to_arrow_array(series, bq_field): return pyarrow.array(series, type=arrow_type) +def get_column_or_index(dataframe, name): + """Return a column or index as a pandas series.""" + if name in dataframe.columns: + return dataframe[name].reset_index(drop=True) + + if isinstance(dataframe.index, pandas.MultiIndex): + if name in dataframe.index.names: + return ( + dataframe.index.get_level_values(name) + .to_series() + .reset_index(drop=True) + ) + else: + if name == dataframe.index.name: + return dataframe.index.to_series().reset_index(drop=True) + + raise ValueError("column or index '{}' not found.".format(name)) + + +def list_columns_and_indexes(dataframe): + """Return all index and column names with dtypes. + + Returns: + Sequence[Tuple[dtype, str]]: + Returns a sorted list of indexes and column names with + corresponding dtypes. If an index is missing a name or has the + same name as a column, the index is omitted. + """ + column_names = frozenset(dataframe.columns) + columns_and_indexes = [] + if isinstance(dataframe.index, pandas.MultiIndex): + for name in dataframe.index.names: + if name and name not in column_names: + values = dataframe.index.get_level_values(name) + columns_and_indexes.append((name, values.dtype)) + else: + if dataframe.index.name and dataframe.index.name not in column_names: + columns_and_indexes.append((dataframe.index.name, dataframe.index.dtype)) + + columns_and_indexes += zip(dataframe.columns, dataframe.dtypes) + return columns_and_indexes + + def dataframe_to_bq_schema(dataframe, bq_schema): """Convert a pandas DataFrame schema to a BigQuery schema. @@ -217,7 +260,7 @@ def dataframe_to_bq_schema(dataframe, bq_schema): bq_schema_unused = set() bq_schema_out = [] - for column, dtype in zip(dataframe.columns, dataframe.dtypes): + for column, dtype in list_columns_and_indexes(dataframe): # Use provided type from schema, if present. bq_field = bq_schema_index.get(column) if bq_field: @@ -229,7 +272,7 @@ def dataframe_to_bq_schema(dataframe, bq_schema): # pandas dtype. bq_type = _PANDAS_DTYPE_TO_BQ.get(dtype.name) if not bq_type: - warnings.warn("Unable to determine type of column '{}'.".format(column)) + warnings.warn(u"Unable to determine type of column '{}'.".format(column)) return None bq_field = schema.SchemaField(column, bq_type) bq_schema_out.append(bq_field) @@ -238,7 +281,7 @@ def dataframe_to_bq_schema(dataframe, bq_schema): # column, but it was not found. if bq_schema_unused: raise ValueError( - "bq_schema contains fields not present in dataframe: {}".format( + u"bq_schema contains fields not present in dataframe: {}".format( bq_schema_unused ) ) @@ -261,20 +304,25 @@ def dataframe_to_arrow(dataframe, bq_schema): BigQuery schema. """ column_names = set(dataframe.columns) + column_and_index_names = set( + name for name, _ in list_columns_and_indexes(dataframe) + ) bq_field_names = set(field.name for field in bq_schema) - extra_fields = bq_field_names - column_names + extra_fields = bq_field_names - column_and_index_names if extra_fields: raise ValueError( - "bq_schema contains fields not present in dataframe: {}".format( + u"bq_schema contains fields not present in dataframe: {}".format( extra_fields ) ) + # It's okay for indexes to be missing from bq_schema, but it's not okay to + # be missing columns. missing_fields = column_names - bq_field_names if missing_fields: raise ValueError( - "bq_schema is missing fields from dataframe: {}".format(missing_fields) + u"bq_schema is missing fields from dataframe: {}".format(missing_fields) ) arrow_arrays = [] @@ -283,7 +331,9 @@ def dataframe_to_arrow(dataframe, bq_schema): for bq_field in bq_schema: arrow_fields.append(bq_to_arrow_field(bq_field)) arrow_names.append(bq_field.name) - arrow_arrays.append(bq_to_arrow_array(dataframe[bq_field.name], bq_field)) + arrow_arrays.append( + bq_to_arrow_array(get_column_or_index(dataframe, bq_field.name), bq_field) + ) if all((field is not None for field in arrow_fields)): return pyarrow.Table.from_arrays( diff --git a/packages/google-cloud-bigquery/samples/load_table_dataframe.py b/packages/google-cloud-bigquery/samples/load_table_dataframe.py new file mode 100644 index 000000000000..69eeb6ef89d0 --- /dev/null +++ b/packages/google-cloud-bigquery/samples/load_table_dataframe.py @@ -0,0 +1,73 @@ +# Copyright 2019 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +def load_table_dataframe(client, table_id): + # [START bigquery_load_table_dataframe] + from google.cloud import bigquery + import pandas + + # TODO(developer): Construct a BigQuery client object. + # client = bigquery.Client() + + # TODO(developer): Set table_id to the ID of the table to create. + # table_id = "your-project.your_dataset.your_table_name" + + records = [ + {"title": u"The Meaning of Life", "release_year": 1983}, + {"title": u"Monty Python and the Holy Grail", "release_year": 1975}, + {"title": u"Life of Brian", "release_year": 1979}, + {"title": u"And Now for Something Completely Different", "release_year": 1971}, + ] + dataframe = pandas.DataFrame( + records, + # In the loaded table, the column order reflects the order of the + # columns in the DataFrame. + columns=["title", "release_year"], + # Optionally, set a named index, which can also be written to the + # BigQuery table. + index=pandas.Index( + [u"Q24980", u"Q25043", u"Q24953", u"Q16403"], name="wikidata_id" + ), + ) + job_config = bigquery.LoadJobConfig( + # Specify a (partial) schema. All columns are always written to the + # table. The schema is used to assist in data type definitions. + schema=[ + # Specify the type of columns whose type cannot be auto-detected. For + # example the "title" column uses pandas dtype "object", so its + # data type is ambiguous. + bigquery.SchemaField("title", bigquery.enums.SqlTypeNames.STRING), + # Indexes are written if included in the schema by name. + bigquery.SchemaField("wikidata_id", bigquery.enums.SqlTypeNames.STRING), + ], + # Optionally, set the write disposition. BigQuery appends loaded rows + # to an existing table by default, but with WRITE_TRUNCATE write + # disposition it replaces the table with the loaded data. + write_disposition="WRITE_TRUNCATE", + ) + + job = client.load_table_from_dataframe( + dataframe, table_id, job_config=job_config, location="US" + ) + job.result() # Waits for table load to complete. + + table = client.get_table(table_id) + print( + "Loaded {} rows and {} columns to {}".format( + table.num_rows, len(table.schema), table_id + ) + ) + # [END bigquery_load_table_dataframe] + return table diff --git a/packages/google-cloud-bigquery/samples/tests/test_load_table_dataframe.py b/packages/google-cloud-bigquery/samples/tests/test_load_table_dataframe.py new file mode 100644 index 000000000000..d553d449a525 --- /dev/null +++ b/packages/google-cloud-bigquery/samples/tests/test_load_table_dataframe.py @@ -0,0 +1,30 @@ +# Copyright 2019 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import pytest + +from .. import load_table_dataframe + + +pytest.importorskip("pandas") +pytest.importorskip("pyarrow") + + +def test_load_table_dataframe(capsys, client, random_table_id): + table = load_table_dataframe.load_table_dataframe(client, random_table_id) + out, _ = capsys.readouterr() + assert "Loaded 4 rows and 3 columns" in out + + column_names = [field.name for field in table.schema] + assert column_names == ["wikidata_id", "title", "release_year"] diff --git a/packages/google-cloud-bigquery/tests/unit/test__pandas_helpers.py b/packages/google-cloud-bigquery/tests/unit/test__pandas_helpers.py index facfb79b3ccb..b539abe9a89a 100644 --- a/packages/google-cloud-bigquery/tests/unit/test__pandas_helpers.py +++ b/packages/google-cloud-bigquery/tests/unit/test__pandas_helpers.py @@ -12,6 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. +import collections import datetime import decimal import functools @@ -21,6 +22,8 @@ try: import pandas + import pandas.api.types + import pandas.testing except ImportError: # pragma: NO COVER pandas = None try: @@ -511,9 +514,262 @@ def test_bq_to_arrow_schema_w_unknown_type(module_under_test): assert actual is None +@pytest.mark.skipif(pandas is None, reason="Requires `pandas`") +def test_get_column_or_index_not_found(module_under_test): + dataframe = pandas.DataFrame({"not_the_column_youre_looking_for": [1, 2, 3]}) + with pytest.raises(ValueError, match="col_is_missing"): + module_under_test.get_column_or_index(dataframe, "col_is_missing") + + +@pytest.mark.skipif(pandas is None, reason="Requires `pandas`") +def test_get_column_or_index_with_multiindex_not_found(module_under_test): + dataframe = pandas.DataFrame( + {"column_name": [1, 2, 3, 4, 5, 6]}, + index=pandas.MultiIndex.from_tuples( + [("a", 0), ("a", 1), ("b", 0), ("b", 1), ("c", 0), ("c", 1)] + ), + ) + with pytest.raises(ValueError, match="not_in_df"): + module_under_test.get_column_or_index(dataframe, "not_in_df") + + +@pytest.mark.skipif(pandas is None, reason="Requires `pandas`") +def test_get_column_or_index_with_both_prefers_column(module_under_test): + dataframe = pandas.DataFrame( + {"some_name": [1, 2, 3]}, index=pandas.Index([0, 1, 2], name="some_name") + ) + series = module_under_test.get_column_or_index(dataframe, "some_name") + expected = pandas.Series([1, 2, 3], name="some_name") + pandas.testing.assert_series_equal(series, expected) + + +@pytest.mark.skipif(pandas is None, reason="Requires `pandas`") +def test_get_column_or_index_with_column(module_under_test): + dataframe = pandas.DataFrame({"column_name": [1, 2, 3], "other_column": [4, 5, 6]}) + series = module_under_test.get_column_or_index(dataframe, "column_name") + expected = pandas.Series([1, 2, 3], name="column_name") + pandas.testing.assert_series_equal(series, expected) + + +@pytest.mark.skipif(pandas is None, reason="Requires `pandas`") +def test_get_column_or_index_with_named_index(module_under_test): + dataframe = pandas.DataFrame( + {"column_name": [1, 2, 3]}, index=pandas.Index([4, 5, 6], name="index_name") + ) + series = module_under_test.get_column_or_index(dataframe, "index_name") + expected = pandas.Series([4, 5, 6], name="index_name") + pandas.testing.assert_series_equal(series, expected) + + +@pytest.mark.skipif(pandas is None, reason="Requires `pandas`") +def test_get_column_or_index_with_datetimeindex(module_under_test): + datetimes = [ + datetime.datetime(2000, 1, 2, 3, 4, 5, 101), + datetime.datetime(2006, 7, 8, 9, 10, 11, 202), + datetime.datetime(2012, 1, 14, 15, 16, 17, 303), + ] + dataframe = pandas.DataFrame( + {"column_name": [1, 2, 3]}, + index=pandas.DatetimeIndex(datetimes, name="index_name"), + ) + series = module_under_test.get_column_or_index(dataframe, "index_name") + expected = pandas.Series(datetimes, name="index_name") + pandas.testing.assert_series_equal(series, expected) + + +@pytest.mark.skipif(pandas is None, reason="Requires `pandas`") +def test_get_column_or_index_with_multiindex(module_under_test): + dataframe = pandas.DataFrame( + {"column_name": [1, 2, 3, 4, 5, 6]}, + index=pandas.MultiIndex.from_tuples( + [("a", 0), ("a", 1), ("b", 0), ("b", 1), ("c", 0), ("c", 1)], + names=["letters", "numbers"], + ), + ) + + series = module_under_test.get_column_or_index(dataframe, "letters") + expected = pandas.Series(["a", "a", "b", "b", "c", "c"], name="letters") + pandas.testing.assert_series_equal(series, expected) + + series = module_under_test.get_column_or_index(dataframe, "numbers") + expected = pandas.Series([0, 1, 0, 1, 0, 1], name="numbers") + pandas.testing.assert_series_equal(series, expected) + + +@pytest.mark.skipif(pandas is None, reason="Requires `pandas`") +def test_list_columns_and_indexes_without_named_index(module_under_test): + df_data = collections.OrderedDict( + [ + ("a_series", [1, 2, 3, 4]), + ("b_series", [0.1, 0.2, 0.3, 0.4]), + ("c_series", ["a", "b", "c", "d"]), + ] + ) + dataframe = pandas.DataFrame(df_data) + + columns_and_indexes = module_under_test.list_columns_and_indexes(dataframe) + expected = [ + ("a_series", pandas.api.types.pandas_dtype("int64")), + ("b_series", pandas.api.types.pandas_dtype("float64")), + ("c_series", pandas.api.types.pandas_dtype("object")), + ] + assert columns_and_indexes == expected + + +@pytest.mark.skipif(pandas is None, reason="Requires `pandas`") +def test_list_columns_and_indexes_with_named_index_same_as_column_name( + module_under_test +): + df_data = collections.OrderedDict( + [ + ("a_series", [1, 2, 3, 4]), + ("b_series", [0.1, 0.2, 0.3, 0.4]), + ("c_series", ["a", "b", "c", "d"]), + ] + ) + dataframe = pandas.DataFrame( + df_data, + # Use same name as an integer column but a different datatype so that + # we can verify that the column is listed but the index isn't. + index=pandas.Index([0.1, 0.2, 0.3, 0.4], name="a_series"), + ) + + columns_and_indexes = module_under_test.list_columns_and_indexes(dataframe) + expected = [ + ("a_series", pandas.api.types.pandas_dtype("int64")), + ("b_series", pandas.api.types.pandas_dtype("float64")), + ("c_series", pandas.api.types.pandas_dtype("object")), + ] + assert columns_and_indexes == expected + + +@pytest.mark.skipif(pandas is None, reason="Requires `pandas`") +def test_list_columns_and_indexes_with_named_index(module_under_test): + df_data = collections.OrderedDict( + [ + ("a_series", [1, 2, 3, 4]), + ("b_series", [0.1, 0.2, 0.3, 0.4]), + ("c_series", ["a", "b", "c", "d"]), + ] + ) + dataframe = pandas.DataFrame( + df_data, index=pandas.Index([4, 5, 6, 7], name="a_index") + ) + + columns_and_indexes = module_under_test.list_columns_and_indexes(dataframe) + expected = [ + ("a_index", pandas.api.types.pandas_dtype("int64")), + ("a_series", pandas.api.types.pandas_dtype("int64")), + ("b_series", pandas.api.types.pandas_dtype("float64")), + ("c_series", pandas.api.types.pandas_dtype("object")), + ] + assert columns_and_indexes == expected + + +@pytest.mark.skipif(pandas is None, reason="Requires `pandas`") +def test_list_columns_and_indexes_with_multiindex(module_under_test): + df_data = collections.OrderedDict( + [ + ("a_series", [1, 2, 3, 4]), + ("b_series", [0.1, 0.2, 0.3, 0.4]), + ("c_series", ["a", "b", "c", "d"]), + ] + ) + dataframe = pandas.DataFrame( + df_data, + index=pandas.MultiIndex.from_tuples( + [(0, 0, 41), (0, 0, 42), (1, 0, 41), (1, 1, 41)], + names=[ + "a_index", + # Use same name as column, but different dtype so we can verify + # the column type is included. + "b_series", + "c_index", + ], + ), + ) + + columns_and_indexes = module_under_test.list_columns_and_indexes(dataframe) + expected = [ + ("a_index", pandas.api.types.pandas_dtype("int64")), + ("c_index", pandas.api.types.pandas_dtype("int64")), + ("a_series", pandas.api.types.pandas_dtype("int64")), + ("b_series", pandas.api.types.pandas_dtype("float64")), + ("c_series", pandas.api.types.pandas_dtype("object")), + ] + assert columns_and_indexes == expected + + +@pytest.mark.skipif(pandas is None, reason="Requires `pandas`") +@pytest.mark.skipif(pyarrow is None, reason="Requires `pyarrow`") +def test_dataframe_to_arrow_with_multiindex(module_under_test): + bq_schema = ( + schema.SchemaField("str_index", "STRING"), + # int_index is intentionally omitted, to verify that it's okay to be + # missing indexes from the schema. + schema.SchemaField("dt_index", "DATETIME"), + schema.SchemaField("int_col", "INTEGER"), + schema.SchemaField("nullable_int_col", "INTEGER"), + schema.SchemaField("str_col", "STRING"), + ) + df_data = collections.OrderedDict( + [ + ("int_col", [1, 2, 3, 4, 5, 6]), + ("nullable_int_col", [6.0, float("nan"), 7.0, float("nan"), 8.0, 9.0]), + ("str_col", ["apple", "banana", "cherry", "durian", "etrog", "fig"]), + ] + ) + df_index = pandas.MultiIndex.from_tuples( + [ + ("a", 0, datetime.datetime(1999, 12, 31, 23, 59, 59, 999999)), + ("a", 0, datetime.datetime(2000, 1, 1, 0, 0, 0)), + ("a", 1, datetime.datetime(1999, 12, 31, 23, 59, 59, 999999)), + ("b", 1, datetime.datetime(2000, 1, 1, 0, 0, 0)), + ("b", 0, datetime.datetime(1999, 12, 31, 23, 59, 59, 999999)), + ("b", 0, datetime.datetime(2000, 1, 1, 0, 0, 0)), + ], + names=["str_index", "int_index", "dt_index"], + ) + dataframe = pandas.DataFrame(df_data, index=df_index) + + arrow_table = module_under_test.dataframe_to_arrow(dataframe, bq_schema) + + assert arrow_table.schema.names == [ + "str_index", + "dt_index", + "int_col", + "nullable_int_col", + "str_col", + ] + arrow_data = arrow_table.to_pydict() + assert arrow_data["str_index"] == ["a", "a", "a", "b", "b", "b"] + expected_dt_index = [ + pandas.Timestamp(dt) + for dt in ( + datetime.datetime(1999, 12, 31, 23, 59, 59, 999999), + datetime.datetime(2000, 1, 1, 0, 0, 0), + datetime.datetime(1999, 12, 31, 23, 59, 59, 999999), + datetime.datetime(2000, 1, 1, 0, 0, 0), + datetime.datetime(1999, 12, 31, 23, 59, 59, 999999), + datetime.datetime(2000, 1, 1, 0, 0, 0), + ) + ] + assert arrow_data["dt_index"] == expected_dt_index + assert arrow_data["int_col"] == [1, 2, 3, 4, 5, 6] + assert arrow_data["nullable_int_col"] == [6, None, 7, None, 8, 9] + assert arrow_data["str_col"] == [ + "apple", + "banana", + "cherry", + "durian", + "etrog", + "fig", + ] + + @pytest.mark.skipif(pandas is None, reason="Requires `pandas`") @pytest.mark.skipif(pyarrow is None, reason="Requires `pyarrow`") -def test_dataframe_to_arrow_w_required_fields(module_under_test): +def test_dataframe_to_arrow_with_required_fields(module_under_test): bq_schema = ( schema.SchemaField("field01", "STRING", mode="REQUIRED"), schema.SchemaField("field02", "BYTES", mode="REQUIRED"), @@ -568,7 +824,7 @@ def test_dataframe_to_arrow_w_required_fields(module_under_test): @pytest.mark.skipif(pandas is None, reason="Requires `pandas`") @pytest.mark.skipif(pyarrow is None, reason="Requires `pyarrow`") -def test_dataframe_to_arrow_w_unknown_type(module_under_test): +def test_dataframe_to_arrow_with_unknown_type(module_under_test): bq_schema = ( schema.SchemaField("field00", "UNKNOWN_TYPE"), schema.SchemaField("field01", "STRING"), From 0d4ab44bb1851d8525dd05f74fdb939f29c7852a Mon Sep 17 00:00:00 2001 From: Peter Lamut Date: Mon, 2 Sep 2019 22:50:55 +0200 Subject: [PATCH 0654/2016] Disable failing snippets test (#9156) test_copy_table_cmek() results in internal error on the backend, causing the test to fail, thus the test is marked as skipped. --- packages/google-cloud-bigquery/docs/snippets.py | 1 + 1 file changed, 1 insertion(+) diff --git a/packages/google-cloud-bigquery/docs/snippets.py b/packages/google-cloud-bigquery/docs/snippets.py index 4c39ff912230..387f35dfac07 100644 --- a/packages/google-cloud-bigquery/docs/snippets.py +++ b/packages/google-cloud-bigquery/docs/snippets.py @@ -1393,6 +1393,7 @@ def test_copy_table_multiple_source(client, to_delete): assert dest_table.num_rows == 2 +@pytest.mark.skip(reason="Backend responds with a 500 internal error.") def test_copy_table_cmek(client, to_delete): dataset_id = "copy_table_cmek_{}".format(_millis()) dest_dataset = bigquery.Dataset(client.dataset(dataset_id)) From 25808085d77f232ef131ee52a6d852beb022d24f Mon Sep 17 00:00:00 2001 From: Ryan Yuan Date: Tue, 3 Sep 2019 20:33:40 +1000 Subject: [PATCH 0655/2016] Docs: fix the reference URL for BigQuery create_dataset() (#9149) --- packages/google-cloud-bigquery/google/cloud/bigquery/client.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py index 047633642e1e..cc53ffa22985 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py @@ -343,7 +343,7 @@ def create_dataset(self, dataset, exists_ok=False, retry=DEFAULT_RETRY): """API call: create the dataset via a POST request. See - https://cloud.google.com/bigquery/docs/reference/rest/v2/tables/insert + https://cloud.google.com/bigquery/docs/reference/rest/v2/datasets/insert Args: dataset (Union[ \ From 7bf3f8884cbf14d90139b65c3203edd218f85140 Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Tue, 3 Sep 2019 15:55:47 -0700 Subject: [PATCH 0656/2016] Release bigquery 1.19.0 (#9168) * Release bigquery 1.19.0 * Use code font for `create_dataset()` Co-Authored-By: Tres Seaver --- packages/google-cloud-bigquery/CHANGELOG.md | 38 +++++++++++++++++++++ packages/google-cloud-bigquery/setup.py | 2 +- 2 files changed, 39 insertions(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/CHANGELOG.md b/packages/google-cloud-bigquery/CHANGELOG.md index 6d1c71ccbf79..c938c05b7f13 100644 --- a/packages/google-cloud-bigquery/CHANGELOG.md +++ b/packages/google-cloud-bigquery/CHANGELOG.md @@ -4,6 +4,44 @@ [1]: https://pypi.org/project/google-cloud-bigquery/#history +## 1.19.0 + +09-03-2019 14:33 PDT + +### Implementation Changes + +- Raise when unexpected fields are present in the `LoadJobConfig.schema` when calling `load_table_from_dataframe`. ([#9096](https://github.com/googleapis/google-cloud-python/pull/9096)) +- Determine the schema in `load_table_from_dataframe` based on dtypes. ([#9049](https://github.com/googleapis/google-cloud-python/pull/9049)) +- Raise helpful error when loading table from dataframe with `STRUCT` columns. ([#9053](https://github.com/googleapis/google-cloud-python/pull/9053)) +- Fix schema recognition of struct field types. ([#9001](https://github.com/googleapis/google-cloud-python/pull/9001)) +- Fix deserializing `None` in `QueryJob` for queries with parameters. ([#9029](https://github.com/googleapis/google-cloud-python/pull/9029)) + +### New Features + +- Include indexes in table written by `load_table_from_dataframe`, only if + fields corresponding to indexes are present in `LoadJobConfig.schema`. + ([#9084](https://github.com/googleapis/google-cloud-python/pull/9084)) +- Add `client_options` to constructor. ([#8999](https://github.com/googleapis/google-cloud-python/pull/8999)) +- Add `--dry_run` option to `%%bigquery` magic. ([#9067](https://github.com/googleapis/google-cloud-python/pull/9067)) +- Add `load_table_from_json()` method to create a table from a list of dictionaries. ([#9076](https://github.com/googleapis/google-cloud-python/pull/9076)) +- Allow subset of schema to be passed into `load_table_from_dataframe`. ([#9064](https://github.com/googleapis/google-cloud-python/pull/9064)) +- Add support for unsetting `LoadJobConfig.schema`. ([#9077](https://github.com/googleapis/google-cloud-python/pull/9077)) +- Add support to `Dataset` for project IDs containing an org prefix. ([#8877](https://github.com/googleapis/google-cloud-python/pull/8877)) +- Add enum with SQL type names allowed to be used in `SchemaField`. ([#9040](https://github.com/googleapis/google-cloud-python/pull/9040)) + +### Documentation + +- Fix the reference URL for `Client.create_dataset()`. ([#9149](https://github.com/googleapis/google-cloud-python/pull/9149)) +- Update code samples to use strings for table names instead of `client.dataset()`. ([#9032](https://github.com/googleapis/google-cloud-python/pull/9032)) +- Remove compatability badges from READMEs. ([#9035](https://github.com/googleapis/google-cloud-python/pull/9035)) +- Fix Pandas DataFrame load example under Python 2.7. ([#9022](https://github.com/googleapis/google-cloud-python/pull/9022)) + +### Internal / Testing Changes + +- Disable failing snippets test for copying CMEK-protected tables. ([#9156](https://github.com/googleapis/google-cloud-python/pull/9156)) +- Fix BigQuery client unit test assertions ([#9112](https://github.com/googleapis/google-cloud-python/pull/9112)) +- Replace avro with arrow schemas in `test_table.py` ([#9056](https://github.com/googleapis/google-cloud-python/pull/9056)) + ## 1.18.0 08-08-2019 12:28 PDT diff --git a/packages/google-cloud-bigquery/setup.py b/packages/google-cloud-bigquery/setup.py index b3eeb15b5d64..42f59516fc13 100644 --- a/packages/google-cloud-bigquery/setup.py +++ b/packages/google-cloud-bigquery/setup.py @@ -22,7 +22,7 @@ name = "google-cloud-bigquery" description = "Google BigQuery API client library" -version = "1.18.0" +version = "1.19.0" # Should be one of: # 'Development Status :: 3 - Alpha' # 'Development Status :: 4 - Beta' From b7d74c1fa004a234253d0a194385736813498855 Mon Sep 17 00:00:00 2001 From: Shubha Rajan Date: Wed, 4 Sep 2019 12:40:19 -0700 Subject: [PATCH 0657/2016] Add `max_results` parameter to `QueryJob.result()`. (#9167) * added max_results param to QueryJob.result() * added test to check that max_results param appears in tabledata.list call arguments --- .../google/cloud/bigquery/job.py | 8 +++- .../tests/unit/test_job.py | 39 +++++++++++++++++++ 2 files changed, 45 insertions(+), 2 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/job.py b/packages/google-cloud-bigquery/google/cloud/bigquery/job.py index 6e1eb81648f5..155474536d14 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/job.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/job.py @@ -2905,7 +2905,9 @@ def _begin(self, client=None, retry=DEFAULT_RETRY): exc.message += self._format_for_exception(self.query, self.job_id) raise - def result(self, timeout=None, page_size=None, retry=DEFAULT_RETRY): + def result( + self, timeout=None, page_size=None, retry=DEFAULT_RETRY, max_results=None + ): """Start the job and wait for it to complete and get the result. Args: @@ -2956,7 +2958,9 @@ def result(self, timeout=None, page_size=None, retry=DEFAULT_RETRY): dest_table_ref = self.destination dest_table = Table(dest_table_ref, schema=schema) dest_table._properties["numRows"] = self._query_results.total_rows - rows = self._client.list_rows(dest_table, page_size=page_size, retry=retry) + rows = self._client.list_rows( + dest_table, page_size=page_size, retry=retry, max_results=max_results + ) rows._preserve_order = _contains_order_by(self.query) return rows diff --git a/packages/google-cloud-bigquery/tests/unit/test_job.py b/packages/google-cloud-bigquery/tests/unit/test_job.py index 19409d8d43c3..98090a5257fd 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_job.py +++ b/packages/google-cloud-bigquery/tests/unit/test_job.py @@ -4133,6 +4133,45 @@ def test_result(self): # on the response from tabledata.list. self.assertEqual(result.total_rows, 1) + def test_result_with_max_results(self): + from google.cloud.bigquery.table import RowIterator + + query_resource = { + "jobComplete": True, + "jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID}, + "schema": {"fields": [{"name": "col1", "type": "STRING"}]}, + "totalRows": "5", + } + tabledata_resource = { + "totalRows": "5", + "pageToken": None, + "rows": [ + {"f": [{"v": "abc"}]}, + {"f": [{"v": "def"}]}, + {"f": [{"v": "ghi"}]}, + ], + } + connection = _make_connection(query_resource, tabledata_resource) + client = _make_client(self.PROJECT, connection=connection) + resource = self._make_resource(ended=True) + job = self._get_target_class().from_api_repr(resource, client) + + max_results = 3 + + result = job.result(max_results=max_results) + + self.assertIsInstance(result, RowIterator) + self.assertEqual(result.total_rows, 5) + + rows = list(result) + + self.assertEqual(len(rows), 3) + self.assertEqual(len(connection.api_request.call_args_list), 2) + tabledata_list_request = connection.api_request.call_args_list[1] + self.assertEqual( + tabledata_list_request[1]["query_params"]["maxResults"], max_results + ) + def test_result_w_empty_schema(self): from google.cloud.bigquery.table import _EmptyRowIterator From 13ed22741f1658c51f65f0f979473f8fcc6975a9 Mon Sep 17 00:00:00 2001 From: Peter Lamut Date: Wed, 4 Sep 2019 22:12:17 +0200 Subject: [PATCH 0658/2016] BigQuery: Autofetch table schema on load if not provided (#9108) * Autofetch table schema on load if not provided * Avoid fetching table schema if WRITE_TRUNCATE job * Skip dataframe columns list check A similar check is already performed on the server, and server-side errors are preferred to client errors. * Raise table NotFound in auto Pandas schema tests A mock should raise this error instead of returning a table to trigger schema generation from Pandas dtypes. * Use list_columns_and_indexes() for names list --- .../google/cloud/bigquery/_pandas_helpers.py | 2 +- .../google/cloud/bigquery/client.py | 21 +++ .../tests/unit/test_client.py | 147 +++++++++++++++++- 3 files changed, 163 insertions(+), 7 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/_pandas_helpers.py b/packages/google-cloud-bigquery/google/cloud/bigquery/_pandas_helpers.py index 5e73c9f58e22..bfbaf92bbe38 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/_pandas_helpers.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/_pandas_helpers.py @@ -210,7 +210,7 @@ def list_columns_and_indexes(dataframe): """Return all index and column names with dtypes. Returns: - Sequence[Tuple[dtype, str]]: + Sequence[Tuple[str, dtype]]: Returns a sorted list of indexes and column names with corresponding dtypes. If an index is missing a name or has the same name as a column, the index is omitted. diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py index cc53ffa22985..c33e119cbc74 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py @@ -1547,6 +1547,27 @@ def load_table_from_dataframe( if location is None: location = self.location + # If table schema is not provided, we try to fetch the existing table + # schema, and check if dataframe schema is compatible with it - except + # for WRITE_TRUNCATE jobs, the existing schema does not matter then. + if ( + not job_config.schema + and job_config.write_disposition != job.WriteDisposition.WRITE_TRUNCATE + ): + try: + table = self.get_table(destination) + except google.api_core.exceptions.NotFound: + table = None + else: + columns_and_indexes = frozenset( + name + for name, _ in _pandas_helpers.list_columns_and_indexes(dataframe) + ) + # schema fields not present in the dataframe are not needed + job_config.schema = [ + field for field in table.schema if field.name in columns_and_indexes + ] + job_config.schema = _pandas_helpers.dataframe_to_bq_schema( dataframe, job_config.schema ) diff --git a/packages/google-cloud-bigquery/tests/unit/test_client.py b/packages/google-cloud-bigquery/tests/unit/test_client.py index f31d8587322b..da3cee11e5d0 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_client.py +++ b/packages/google-cloud-bigquery/tests/unit/test_client.py @@ -20,6 +20,7 @@ import gzip import io import json +import operator import unittest import warnings @@ -5279,15 +5280,23 @@ def test_load_table_from_file_bad_mode(self): def test_load_table_from_dataframe(self): from google.cloud.bigquery.client import _DEFAULT_NUM_RETRIES from google.cloud.bigquery import job + from google.cloud.bigquery.schema import SchemaField client = self._make_client() records = [{"id": 1, "age": 100}, {"id": 2, "age": 60}] dataframe = pandas.DataFrame(records) + get_table_patch = mock.patch( + "google.cloud.bigquery.client.Client.get_table", + autospec=True, + return_value=mock.Mock( + schema=[SchemaField("id", "INTEGER"), SchemaField("age", "INTEGER")] + ), + ) load_patch = mock.patch( "google.cloud.bigquery.client.Client.load_table_from_file", autospec=True ) - with load_patch as load_table_from_file: + with load_patch as load_table_from_file, get_table_patch: client.load_table_from_dataframe(dataframe, self.TABLE_REF) load_table_from_file.assert_called_once_with( @@ -5314,15 +5323,23 @@ def test_load_table_from_dataframe(self): def test_load_table_from_dataframe_w_client_location(self): from google.cloud.bigquery.client import _DEFAULT_NUM_RETRIES from google.cloud.bigquery import job + from google.cloud.bigquery.schema import SchemaField client = self._make_client(location=self.LOCATION) records = [{"id": 1, "age": 100}, {"id": 2, "age": 60}] dataframe = pandas.DataFrame(records) + get_table_patch = mock.patch( + "google.cloud.bigquery.client.Client.get_table", + autospec=True, + return_value=mock.Mock( + schema=[SchemaField("id", "INTEGER"), SchemaField("age", "INTEGER")] + ), + ) load_patch = mock.patch( "google.cloud.bigquery.client.Client.load_table_from_file", autospec=True ) - with load_patch as load_table_from_file: + with load_patch as load_table_from_file, get_table_patch: client.load_table_from_dataframe(dataframe, self.TABLE_REF) load_table_from_file.assert_called_once_with( @@ -5349,20 +5366,33 @@ def test_load_table_from_dataframe_w_client_location(self): def test_load_table_from_dataframe_w_custom_job_config(self): from google.cloud.bigquery.client import _DEFAULT_NUM_RETRIES from google.cloud.bigquery import job + from google.cloud.bigquery.schema import SchemaField client = self._make_client() records = [{"id": 1, "age": 100}, {"id": 2, "age": 60}] dataframe = pandas.DataFrame(records) - job_config = job.LoadJobConfig() + job_config = job.LoadJobConfig( + write_disposition=job.WriteDisposition.WRITE_TRUNCATE + ) + get_table_patch = mock.patch( + "google.cloud.bigquery.client.Client.get_table", + autospec=True, + return_value=mock.Mock( + schema=[SchemaField("id", "INTEGER"), SchemaField("age", "INTEGER")] + ), + ) load_patch = mock.patch( "google.cloud.bigquery.client.Client.load_table_from_file", autospec=True ) - with load_patch as load_table_from_file: + with load_patch as load_table_from_file, get_table_patch as get_table: client.load_table_from_dataframe( dataframe, self.TABLE_REF, job_config=job_config, location=self.LOCATION ) + # no need to fetch and inspect table schema for WRITE_TRUNCATE jobs + assert not get_table.called + load_table_from_file.assert_called_once_with( client, mock.ANY, @@ -5378,6 +5408,7 @@ def test_load_table_from_dataframe_w_custom_job_config(self): sent_config = load_table_from_file.mock_calls[0][2]["job_config"] assert sent_config.source_format == job.SourceFormat.PARQUET + assert sent_config.write_disposition == job.WriteDisposition.WRITE_TRUNCATE @unittest.skipIf(pandas is None, "Requires `pandas`") @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") @@ -5421,7 +5452,12 @@ def test_load_table_from_dataframe_w_automatic_schema(self): "google.cloud.bigquery.client.Client.load_table_from_file", autospec=True ) - with load_patch as load_table_from_file: + get_table_patch = mock.patch( + "google.cloud.bigquery.client.Client.get_table", + autospec=True, + side_effect=google.api_core.exceptions.NotFound("Table not found"), + ) + with load_patch as load_table_from_file, get_table_patch: client.load_table_from_dataframe( dataframe, self.TABLE_REF, location=self.LOCATION ) @@ -5449,6 +5485,100 @@ def test_load_table_from_dataframe_w_automatic_schema(self): SchemaField("ts_col", "TIMESTAMP"), ) + @unittest.skipIf(pandas is None, "Requires `pandas`") + @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") + def test_load_table_from_dataframe_w_index_and_auto_schema(self): + from google.cloud.bigquery.client import _DEFAULT_NUM_RETRIES + from google.cloud.bigquery import job + from google.cloud.bigquery.schema import SchemaField + + client = self._make_client() + df_data = collections.OrderedDict( + [("int_col", [10, 20, 30]), ("float_col", [1.0, 2.0, 3.0])] + ) + dataframe = pandas.DataFrame( + df_data, + index=pandas.Index(name="unique_name", data=["one", "two", "three"]), + ) + + load_patch = mock.patch( + "google.cloud.bigquery.client.Client.load_table_from_file", autospec=True + ) + + get_table_patch = mock.patch( + "google.cloud.bigquery.client.Client.get_table", + autospec=True, + return_value=mock.Mock( + schema=[ + SchemaField("int_col", "INTEGER"), + SchemaField("float_col", "FLOAT"), + SchemaField("unique_name", "STRING"), + ] + ), + ) + with load_patch as load_table_from_file, get_table_patch: + client.load_table_from_dataframe( + dataframe, self.TABLE_REF, location=self.LOCATION + ) + + load_table_from_file.assert_called_once_with( + client, + mock.ANY, + self.TABLE_REF, + num_retries=_DEFAULT_NUM_RETRIES, + rewind=True, + job_id=mock.ANY, + job_id_prefix=None, + location=self.LOCATION, + project=None, + job_config=mock.ANY, + ) + + sent_config = load_table_from_file.mock_calls[0][2]["job_config"] + assert sent_config.source_format == job.SourceFormat.PARQUET + + sent_schema = sorted(sent_config.schema, key=operator.attrgetter("name")) + expected_sent_schema = [ + SchemaField("float_col", "FLOAT"), + SchemaField("int_col", "INTEGER"), + SchemaField("unique_name", "STRING"), + ] + assert sent_schema == expected_sent_schema + + @unittest.skipIf(pandas is None, "Requires `pandas`") + @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") + def test_load_table_from_dataframe_unknown_table(self): + from google.cloud.bigquery.client import _DEFAULT_NUM_RETRIES + + client = self._make_client() + records = [{"id": 1, "age": 100}, {"id": 2, "age": 60}] + dataframe = pandas.DataFrame(records) + + get_table_patch = mock.patch( + "google.cloud.bigquery.client.Client.get_table", + autospec=True, + side_effect=google.api_core.exceptions.NotFound("Table not found"), + ) + load_patch = mock.patch( + "google.cloud.bigquery.client.Client.load_table_from_file", autospec=True + ) + with load_patch as load_table_from_file, get_table_patch: + # there should be no error + client.load_table_from_dataframe(dataframe, self.TABLE_REF) + + load_table_from_file.assert_called_once_with( + client, + mock.ANY, + self.TABLE_REF, + num_retries=_DEFAULT_NUM_RETRIES, + rewind=True, + job_id=mock.ANY, + job_id_prefix=None, + location=None, + project=None, + job_config=mock.ANY, + ) + @unittest.skipIf(pandas is None, "Requires `pandas`") @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_load_table_from_dataframe_struct_fields_error(self): @@ -5741,6 +5871,11 @@ def test_load_table_from_dataframe_wo_pyarrow_custom_compression(self): records = [{"id": 1, "age": 100}, {"id": 2, "age": 60}] dataframe = pandas.DataFrame(records) + get_table_patch = mock.patch( + "google.cloud.bigquery.client.Client.get_table", + autospec=True, + side_effect=google.api_core.exceptions.NotFound("Table not found"), + ) load_patch = mock.patch( "google.cloud.bigquery.client.Client.load_table_from_file", autospec=True ) @@ -5749,7 +5884,7 @@ def test_load_table_from_dataframe_wo_pyarrow_custom_compression(self): dataframe, "to_parquet", wraps=dataframe.to_parquet ) - with load_patch, pyarrow_patch, to_parquet_patch as to_parquet_spy: + with load_patch, get_table_patch, pyarrow_patch, to_parquet_patch as to_parquet_spy: client.load_table_from_dataframe( dataframe, self.TABLE_REF, From d1b5412e53cf907b34bff0143eaa81a54c56e4d4 Mon Sep 17 00:00:00 2001 From: Shubha Rajan Date: Thu, 5 Sep 2019 17:56:17 -0700 Subject: [PATCH 0659/2016] Add `--max_results` option to Jupyter magics. (#9169) * added max_results magic option and fixed broken tests * added tests for --max_results magic option * added max_results magic option and fixed broken tests * added tests for --max_results magic option * Removed duplicate `--max_results` magic argument * removed max_results param from run_query, updated tests --- .../google/cloud/bigquery/magics.py | 24 +++++++- .../tests/unit/test_magics.py | 57 ++++++++++++++++++- 2 files changed, 77 insertions(+), 4 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/magics.py b/packages/google-cloud-bigquery/google/cloud/bigquery/magics.py index b2dae2511ec8..4c93d1307a42 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/magics.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/magics.py @@ -320,6 +320,14 @@ def _run_query(client, query, job_config=None): default=None, help=("Project to use for executing this query. Defaults to the context project."), ) +@magic_arguments.argument( + "--max_results", + default=None, + help=( + "Maximum number of rows in dataframe returned from executing the query." + "Defaults to returning all rows." + ), +) @magic_arguments.argument( "--maximum_bytes_billed", default=None, @@ -420,6 +428,12 @@ def _cell_magic(line, query): bqstorage_client = _make_bqstorage_client( args.use_bqstorage_api or context.use_bqstorage_api, context.credentials ) + + if args.max_results: + max_results = int(args.max_results) + else: + max_results = None + job_config = bigquery.job.QueryJobConfig() job_config.query_parameters = params job_config.use_legacy_sql = args.use_legacy_sql @@ -433,7 +447,7 @@ def _cell_magic(line, query): error = None try: - query_job = _run_query(client, query, job_config) + query_job = _run_query(client, query, job_config=job_config) except Exception as ex: error = str(ex) @@ -460,7 +474,13 @@ def _cell_magic(line, query): ) return query_job - result = query_job.to_dataframe(bqstorage_client=bqstorage_client) + if max_results: + result = query_job.result(max_results=max_results).to_dataframe( + bqstorage_client=bqstorage_client + ) + else: + result = query_job.to_dataframe(bqstorage_client=bqstorage_client) + if args.destination_var: IPython.get_ipython().push({args.destination_var: result}) else: diff --git a/packages/google-cloud-bigquery/tests/unit/test_magics.py b/packages/google-cloud-bigquery/tests/unit/test_magics.py index fbea9bdd9050..ed748d2dd5e3 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_magics.py +++ b/packages/google-cloud-bigquery/tests/unit/test_magics.py @@ -414,7 +414,7 @@ def test_bigquery_magic_with_legacy_sql(): with run_query_patch as run_query_mock: ip.run_cell_magic("bigquery", "--use_legacy_sql", "SELECT 17 AS num") - job_config_used = run_query_mock.call_args_list[0][0][-1] + job_config_used = run_query_mock.call_args_list[0][1]["job_config"] assert job_config_used.use_legacy_sql is True @@ -645,6 +645,57 @@ def test_bigquery_magic_without_bqstorage(monkeypatch): assert isinstance(return_value, pandas.DataFrame) +@pytest.mark.usefixtures("ipython_interactive") +def test_bigquery_magic_w_max_results_invalid(): + ip = IPython.get_ipython() + ip.extension_manager.load_extension("google.cloud.bigquery") + magics.context._project = None + + credentials_mock = mock.create_autospec( + google.auth.credentials.Credentials, instance=True + ) + default_patch = mock.patch( + "google.auth.default", return_value=(credentials_mock, "general-project") + ) + client_query_patch = mock.patch( + "google.cloud.bigquery.client.Client.query", autospec=True + ) + + sql = "SELECT 17 AS num" + + with pytest.raises(ValueError), default_patch, client_query_patch: + ip.run_cell_magic("bigquery", "--max_results=abc", sql) + + +@pytest.mark.usefixtures("ipython_interactive") +def test_bigquery_magic_w_max_results_valid_calls_queryjob_result(): + ip = IPython.get_ipython() + ip.extension_manager.load_extension("google.cloud.bigquery") + magics.context._project = None + + credentials_mock = mock.create_autospec( + google.auth.credentials.Credentials, instance=True + ) + default_patch = mock.patch( + "google.auth.default", return_value=(credentials_mock, "general-project") + ) + client_query_patch = mock.patch( + "google.cloud.bigquery.client.Client.query", autospec=True + ) + + sql = "SELECT 17 AS num" + + query_job_mock = mock.create_autospec( + google.cloud.bigquery.job.QueryJob, instance=True + ) + + with client_query_patch as client_query_mock, default_patch: + client_query_mock.return_value = query_job_mock + ip.run_cell_magic("bigquery", "--max_results=5", sql) + + query_job_mock.result.assert_called_with(max_results=5) + + @pytest.mark.usefixtures("ipython_interactive") def test_bigquery_magic_dryrun_option_sets_job_config(): ip = IPython.get_ipython() @@ -662,7 +713,7 @@ def test_bigquery_magic_dryrun_option_sets_job_config(): with run_query_patch as run_query_mock: ip.run_cell_magic("bigquery", "--dry_run", sql) - job_config_used = run_query_mock.call_args_list[0][0][-1] + job_config_used = run_query_mock.call_args_list[0][1]["job_config"] assert job_config_used.dry_run is True @@ -924,6 +975,7 @@ def test_bigquery_magic_with_string_params(): run_query_mock.return_value = query_job_mock ip.run_cell_magic("bigquery", 'params_string_df --params {"num":17}', sql) + run_query_mock.assert_called_once_with(mock.ANY, sql.format(num=17), mock.ANY) assert "params_string_df" in ip.user_ns # verify that the variable exists @@ -959,6 +1011,7 @@ def test_bigquery_magic_with_dict_params(): # Insert dictionary into user namespace so that it can be expanded ip.user_ns["params"] = params ip.run_cell_magic("bigquery", "params_dict_df --params $params", sql) + run_query_mock.assert_called_once_with(mock.ANY, sql.format(num=17), mock.ANY) assert "params_dict_df" in ip.user_ns # verify that the variable exists From 361e96cbadad9669ea0061dad50129278f2b7683 Mon Sep 17 00:00:00 2001 From: Peter Lamut Date: Fri, 6 Sep 2019 10:30:03 +0200 Subject: [PATCH 0660/2016] Fix list_rows() max results with BQ storage client (#9178) If max results is set, the streaming API should not be used, and a user warning is issued. --- .../google/cloud/bigquery/table.py | 8 +++++ .../google-cloud-bigquery/tests/system.py | 21 +++++++++++ .../tests/unit/test_table.py | 36 +++++++++++++++++++ 3 files changed, 65 insertions(+) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py index 62072cf88804..71fc9ef945d4 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py @@ -1630,6 +1630,14 @@ def to_dataframe(self, bqstorage_client=None, dtypes=None, progress_bar_type=Non if dtypes is None: dtypes = {} + if bqstorage_client and self.max_results is not None: + warnings.warn( + "Cannot use bqstorage_client if max_results is set, " + "reverting to fetching data with the tabledata.list endpoint.", + stacklevel=2, + ) + bqstorage_client = None + progress_bar = self._get_progress_bar(progress_bar_type) frames = [] diff --git a/packages/google-cloud-bigquery/tests/system.py b/packages/google-cloud-bigquery/tests/system.py index 3593e1ecb609..b1e583bbfaa3 100644 --- a/packages/google-cloud-bigquery/tests/system.py +++ b/packages/google-cloud-bigquery/tests/system.py @@ -2315,6 +2315,27 @@ def test_list_rows_page_size(self): page = next(pages) self.assertEqual(page.num_items, num_last_page) + @unittest.skipIf(pandas is None, "Requires `pandas`") + @unittest.skipIf( + bigquery_storage_v1beta1 is None, "Requires `google-cloud-bigquery-storage`" + ) + def test_list_rows_max_results_w_bqstorage(self): + table_ref = DatasetReference("bigquery-public-data", "utility_us").table( + "country_code_iso" + ) + bqstorage_client = bigquery_storage_v1beta1.BigQueryStorageClient( + credentials=Config.CLIENT._credentials + ) + + row_iterator = Config.CLIENT.list_rows( + table_ref, + selected_fields=[bigquery.SchemaField("country_name", "STRING")], + max_results=100, + ) + dataframe = row_iterator.to_dataframe(bqstorage_client=bqstorage_client) + + self.assertEqual(len(dataframe.index), 100) + def temp_dataset(self, dataset_id, location=None): dataset = Dataset(Config.CLIENT.dataset(dataset_id)) if location: diff --git a/packages/google-cloud-bigquery/tests/unit/test_table.py b/packages/google-cloud-bigquery/tests/unit/test_table.py index 8ba7fee892e5..d36eb43e4feb 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_table.py +++ b/packages/google-cloud-bigquery/tests/unit/test_table.py @@ -2208,6 +2208,42 @@ def test_to_dataframe_error_if_pandas_is_none(self): with self.assertRaises(ValueError): row_iterator.to_dataframe() + @unittest.skipIf(pandas is None, "Requires `pandas`") + def test_to_dataframe_max_results_w_bqstorage_warning(self): + from google.cloud.bigquery.table import SchemaField + + schema = [ + SchemaField("name", "STRING", mode="REQUIRED"), + SchemaField("age", "INTEGER", mode="REQUIRED"), + ] + rows = [ + {"f": [{"v": "Phred Phlyntstone"}, {"v": "32"}]}, + {"f": [{"v": "Bharney Rhubble"}, {"v": "33"}]}, + ] + path = "/foo" + api_request = mock.Mock(return_value={"rows": rows}) + bqstorage_client = mock.Mock() + + row_iterator = self._make_one( + client=_mock_client(), + api_request=api_request, + path=path, + schema=schema, + max_results=42, + ) + + with warnings.catch_warnings(record=True) as warned: + row_iterator.to_dataframe(bqstorage_client=bqstorage_client) + + matches = [ + warning + for warning in warned + if warning.category is UserWarning + and "cannot use bqstorage_client" in str(warning).lower() + and "tabledata.list" in str(warning) + ] + self.assertEqual(len(matches), 1, msg="User warning was not emitted.") + @unittest.skipIf(pandas is None, "Requires `pandas`") @unittest.skipIf( bigquery_storage_v1beta1 is None, "Requires `google-cloud-bigquery-storage`" From bf4999239dd443e37475f8b89f3586b99c2ced8e Mon Sep 17 00:00:00 2001 From: Peter Lamut Date: Fri, 6 Sep 2019 10:30:42 +0200 Subject: [PATCH 0661/2016] Deprecate automatic schema conversion (#9176) --- .../google/cloud/bigquery/client.py | 10 ++++++ .../tests/unit/test_client.py | 35 +++++++++++++++++++ 2 files changed, 45 insertions(+) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py index c33e119cbc74..bcc93c0b7273 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py @@ -1572,6 +1572,16 @@ def load_table_from_dataframe( dataframe, job_config.schema ) + if not job_config.schema: + # the schema could not be fully detected + warnings.warn( + "Schema could not be detected for all columns. Loading from a " + "dataframe without a schema will be deprecated in the future, " + "please provide a schema.", + PendingDeprecationWarning, + stacklevel=2, + ) + tmpfd, tmppath = tempfile.mkstemp(suffix="_job_{}.parquet".format(job_id[:8])) os.close(tmpfd) diff --git a/packages/google-cloud-bigquery/tests/unit/test_client.py b/packages/google-cloud-bigquery/tests/unit/test_client.py index da3cee11e5d0..3ee45f52405c 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_client.py +++ b/packages/google-cloud-bigquery/tests/unit/test_client.py @@ -5579,6 +5579,41 @@ def test_load_table_from_dataframe_unknown_table(self): job_config=mock.ANY, ) + @unittest.skipIf(pandas is None, "Requires `pandas`") + @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") + def test_load_table_from_dataframe_no_schema_warning(self): + client = self._make_client() + + # Pick at least one column type that translates to Pandas dtype + # "object". A string column matches that. + records = [{"name": "Monty", "age": 100}, {"name": "Python", "age": 60}] + dataframe = pandas.DataFrame(records) + + get_table_patch = mock.patch( + "google.cloud.bigquery.client.Client.get_table", + autospec=True, + side_effect=google.api_core.exceptions.NotFound("Table not found"), + ) + load_patch = mock.patch( + "google.cloud.bigquery.client.Client.load_table_from_file", autospec=True + ) + pyarrow_patch = mock.patch("google.cloud.bigquery.client.pyarrow", None) + catch_warnings = warnings.catch_warnings(record=True) + + with get_table_patch, load_patch, pyarrow_patch, catch_warnings as warned: + client.load_table_from_dataframe( + dataframe, self.TABLE_REF, location=self.LOCATION + ) + + matches = [ + warning + for warning in warned + if warning.category in (DeprecationWarning, PendingDeprecationWarning) + and "could not be detected" in str(warning) + and "please provide a schema" in str(warning) + ] + assert matches, "A missing schema deprecation warning was not raised." + @unittest.skipIf(pandas is None, "Requires `pandas`") @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_load_table_from_dataframe_struct_fields_error(self): From b65573abf1377b1728e6a8ef22d2ed6dd81d2d8a Mon Sep 17 00:00:00 2001 From: Leonid Emar-Kar <46078689+Emar-Kar@users.noreply.github.com> Date: Fri, 6 Sep 2019 21:50:51 +0300 Subject: [PATCH 0662/2016] BigQuery: Add support of the project ID with org prefix to the Table.from_string() method (#9161) * add prefix support * Update _helpers.py * consolidate the regex * update split_id method * '_parse_id' method renamed to '_split_id' * switched to 're.groups' implementation instead of partly grouping * Update dataset.py flake8 fixed * Update _helpers.py * added the docstring for the '_split_id' method * fix lint failure --- .../google/cloud/bigquery/_helpers.py | 33 ++++++++++++++++--- .../google/cloud/bigquery/dataset.py | 17 +--------- .../tests/unit/test_table.py | 12 +++++++ 3 files changed, 42 insertions(+), 20 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py b/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py index bb3998732a5a..eb5161c9fe71 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py @@ -18,6 +18,7 @@ import copy import datetime import decimal +import re from google.cloud._helpers import UTC from google.cloud._helpers import _date_from_iso8601_date @@ -29,6 +30,12 @@ _RFC3339_MICROS_NO_ZULU = "%Y-%m-%dT%H:%M:%S.%f" _TIMEONLY_WO_MICROS = "%H:%M:%S" _TIMEONLY_W_MICROS = "%H:%M:%S.%f" +_PROJECT_PREFIX_PATTERN = re.compile( + r""" + (?P\S+\:[^.]+)\.(?P[^.]+)(?:$|\.(?P[^.]+)$) +""", + re.VERBOSE, +) def _not_null(value, field): @@ -586,24 +593,42 @@ def _str_or_none(value): return str(value) +def _split_id(full_id): + """Helper: split full_id into composite parts. + + Args: + full_id (str): Fully-qualified ID in standard SQL format. + + Returns: + List[str]: ID's parts separated into components. + """ + with_prefix = _PROJECT_PREFIX_PATTERN.match(full_id) + if with_prefix is None: + parts = full_id.split(".") + else: + parts = with_prefix.groups() + parts = [part for part in parts if part] + return parts + + def _parse_3_part_id(full_id, default_project=None, property_name="table_id"): output_project_id = default_project output_dataset_id = None output_resource_id = None - parts = full_id.split(".") + parts = _split_id(full_id) if len(parts) != 2 and len(parts) != 3: raise ValueError( "{property_name} must be a fully-qualified ID in " - 'standard SQL format. e.g. "project.dataset.{property_name}", ' + 'standard SQL format, e.g., "project.dataset.{property_name}", ' "got {}".format(full_id, property_name=property_name) ) if len(parts) == 2 and not default_project: raise ValueError( "When default_project is not set, {property_name} must be a " - "fully-qualified ID in standard SQL format. " - 'e.g. "project.dataset_id.{property_name}", got {}'.format( + "fully-qualified ID in standard SQL format, " + 'e.g., "project.dataset_id.{property_name}", got {}'.format( full_id, property_name=property_name ) ) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/dataset.py b/packages/google-cloud-bigquery/google/cloud/bigquery/dataset.py index 494c219d4f67..67a7353f94e7 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/dataset.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/dataset.py @@ -18,7 +18,6 @@ import six import copy -import re import google.cloud._helpers from google.cloud.bigquery import _helpers @@ -27,14 +26,6 @@ from google.cloud.bigquery.table import TableReference -_PROJECT_PREFIX_PATTERN = re.compile( - r""" - (?P\S+\:[^.]+)\.(?P[^.]+)$ -""", - re.VERBOSE, -) - - def _get_table_reference(self, table_id): """Constructs a TableReference. @@ -299,13 +290,7 @@ def from_string(cls, dataset_id, default_project=None): """ output_dataset_id = dataset_id output_project_id = default_project - with_prefix = _PROJECT_PREFIX_PATTERN.match(dataset_id) - if with_prefix is None: - parts = dataset_id.split(".") - else: - project_id = with_prefix.group("project_id") - dataset_id = with_prefix.group("dataset_id") - parts = [project_id, dataset_id] + parts = _helpers._split_id(dataset_id) if len(parts) == 1 and not default_project: raise ValueError( diff --git a/packages/google-cloud-bigquery/tests/unit/test_table.py b/packages/google-cloud-bigquery/tests/unit/test_table.py index d36eb43e4feb..562bcf6b4e7d 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_table.py +++ b/packages/google-cloud-bigquery/tests/unit/test_table.py @@ -215,11 +215,23 @@ def test_from_string(self): self.assertEqual(got.dataset_id, "string_dataset") self.assertEqual(got.table_id, "string_table") + def test_from_string_w_prefix(self): + cls = self._get_target_class() + got = cls.from_string("google.com:string-project.string_dataset.string_table") + self.assertEqual(got.project, "google.com:string-project") + self.assertEqual(got.dataset_id, "string_dataset") + self.assertEqual(got.table_id, "string_table") + def test_from_string_legacy_string(self): cls = self._get_target_class() with self.assertRaises(ValueError): cls.from_string("string-project:string_dataset.string_table") + def test_from_string_w_incorrect_prefix(self): + cls = self._get_target_class() + with self.assertRaises(ValueError): + cls.from_string("google.com.string-project.string_dataset.string_table") + def test_from_string_not_fully_qualified(self): cls = self._get_target_class() with self.assertRaises(ValueError): From 90fd9b55c6cab59c4d2b59873be2a9289c8310f2 Mon Sep 17 00:00:00 2001 From: Walter Poupore Date: Tue, 10 Sep 2019 11:01:57 -0700 Subject: [PATCH 0663/2016] Fix doc link (#9200) --- packages/google-cloud-bigquery/google/cloud/bigquery/job.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/job.py b/packages/google-cloud-bigquery/google/cloud/bigquery/job.py index 155474536d14..ccbab8b5eb44 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/job.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/job.py @@ -2678,7 +2678,7 @@ def ddl_target_routine(self): for CREATE/DROP FUNCTION/PROCEDURE queries. See: - https://cloud.google.com/bigquery/docs/reference/rest/v2/JobStatistics + https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#jobstatistics """ prop = self._job_statistics().get("ddlTargetRoutine") if prop is not None: From 64d852851d891192fa71a63cb55317261d05d5f7 Mon Sep 17 00:00:00 2001 From: Peter Lamut Date: Tue, 10 Sep 2019 21:09:12 +0200 Subject: [PATCH 0664/2016] BigQuery: Add support for array parameters to Cursor.execute() (#9189) * Add support for array params to Cursor.execute() * Raise NotImplementedError for STRUCT-like values --- .../google/cloud/bigquery/dbapi/_helpers.py | 146 +++++++++++++++--- .../tests/unit/test_dbapi__helpers.py | 93 +++++++++++ 2 files changed, 214 insertions(+), 25 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/dbapi/_helpers.py b/packages/google-cloud-bigquery/google/cloud/bigquery/dbapi/_helpers.py index 6e7f58bd4944..e5f4cff51666 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/dbapi/_helpers.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/dbapi/_helpers.py @@ -43,27 +43,9 @@ def scalar_to_query_parameter(value, name=None): :raises: :class:`~google.cloud.bigquery.dbapi.exceptions.ProgrammingError` if the type cannot be determined. """ - parameter_type = None + parameter_type = bigquery_scalar_type(value) - if isinstance(value, bool): - parameter_type = "BOOL" - elif isinstance(value, numbers.Integral): - parameter_type = "INT64" - elif isinstance(value, numbers.Real): - parameter_type = "FLOAT64" - elif isinstance(value, decimal.Decimal): - parameter_type = "NUMERIC" - elif isinstance(value, six.text_type): - parameter_type = "STRING" - elif isinstance(value, six.binary_type): - parameter_type = "BYTES" - elif isinstance(value, datetime.datetime): - parameter_type = "DATETIME" if value.tzinfo is None else "TIMESTAMP" - elif isinstance(value, datetime.date): - parameter_type = "DATE" - elif isinstance(value, datetime.time): - parameter_type = "TIME" - else: + if parameter_type is None: raise exceptions.ProgrammingError( "encountered parameter {} with value {} of unexpected type".format( name, value @@ -72,6 +54,46 @@ def scalar_to_query_parameter(value, name=None): return bigquery.ScalarQueryParameter(name, parameter_type, value) +def array_to_query_parameter(value, name=None): + """Convert an array-like value into a query parameter. + + Args: + value (Sequence[Any]): The elements of the array (should not be a + string-like Sequence). + name (Optional[str]): Name of the query parameter. + + Returns: + A query parameter corresponding with the type and value of the plain + Python object. + + Raises: + :class:`~google.cloud.bigquery.dbapi.exceptions.ProgrammingError` + if the type of array elements cannot be determined. + """ + if not array_like(value): + raise exceptions.ProgrammingError( + "The value of parameter {} must be a sequence that is " + "not string-like.".format(name) + ) + + if not value: + raise exceptions.ProgrammingError( + "Encountered an empty array-like value of parameter {}, cannot " + "determine array elements type.".format(name) + ) + + # Assume that all elements are of the same type, and let the backend handle + # any type incompatibilities among the array elements + array_type = bigquery_scalar_type(value[0]) + if array_type is None: + raise exceptions.ProgrammingError( + "Encountered unexpected first array element of parameter {}, " + "cannot determine array elements type.".format(name) + ) + + return bigquery.ArrayQueryParameter(name, array_type, value) + + def to_query_parameters_list(parameters): """Converts a sequence of parameter values into query parameters. @@ -81,7 +103,18 @@ def to_query_parameters_list(parameters): :rtype: List[google.cloud.bigquery.query._AbstractQueryParameter] :returns: A list of query parameters. """ - return [scalar_to_query_parameter(value) for value in parameters] + result = [] + + for value in parameters: + if isinstance(value, collections_abc.Mapping): + raise NotImplementedError("STRUCT-like parameter values are not supported.") + elif array_like(value): + param = array_to_query_parameter(value) + else: + param = scalar_to_query_parameter(value) + result.append(param) + + return result def to_query_parameters_dict(parameters): @@ -93,10 +126,21 @@ def to_query_parameters_dict(parameters): :rtype: List[google.cloud.bigquery.query._AbstractQueryParameter] :returns: A list of named query parameters. """ - return [ - scalar_to_query_parameter(value, name=name) - for name, value in six.iteritems(parameters) - ] + result = [] + + for name, value in six.iteritems(parameters): + if isinstance(value, collections_abc.Mapping): + raise NotImplementedError( + "STRUCT-like parameter values are not supported " + "(parameter {}).".format(name) + ) + elif array_like(value): + param = array_to_query_parameter(value, name=name) + else: + param = scalar_to_query_parameter(value, name=name) + result.append(param) + + return result def to_query_parameters(parameters): @@ -115,3 +159,55 @@ def to_query_parameters(parameters): return to_query_parameters_dict(parameters) return to_query_parameters_list(parameters) + + +def bigquery_scalar_type(value): + """Return a BigQuery name of the scalar type that matches the given value. + + If the scalar type name could not be determined (e.g. for non-scalar + values), ``None`` is returned. + + Args: + value (Any) + + Returns: + Optional[str]: The BigQuery scalar type name. + """ + if isinstance(value, bool): + return "BOOL" + elif isinstance(value, numbers.Integral): + return "INT64" + elif isinstance(value, numbers.Real): + return "FLOAT64" + elif isinstance(value, decimal.Decimal): + return "NUMERIC" + elif isinstance(value, six.text_type): + return "STRING" + elif isinstance(value, six.binary_type): + return "BYTES" + elif isinstance(value, datetime.datetime): + return "DATETIME" if value.tzinfo is None else "TIMESTAMP" + elif isinstance(value, datetime.date): + return "DATE" + elif isinstance(value, datetime.time): + return "TIME" + + return None + + +def array_like(value): + """Determine if the given value is array-like. + + Examples of array-like values (as interpreted by this function) are + sequences such as ``list`` and ``tuple``, but not strings and other + iterables such as sets. + + Args: + value (Any) + + Returns: + bool: ``True`` if the value is considered array-like, ``False`` otherwise. + """ + return isinstance(value, collections_abc.Sequence) and not isinstance( + value, (six.text_type, six.binary_type, bytearray) + ) diff --git a/packages/google-cloud-bigquery/tests/unit/test_dbapi__helpers.py b/packages/google-cloud-bigquery/tests/unit/test_dbapi__helpers.py index bcc3e0879f87..45c690ede363 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_dbapi__helpers.py +++ b/packages/google-cloud-bigquery/tests/unit/test_dbapi__helpers.py @@ -66,6 +66,61 @@ def test_scalar_to_query_parameter_w_special_floats(self): self.assertTrue(math.isinf(inf_parameter.value)) self.assertEqual(inf_parameter.type_, "FLOAT64") + def test_array_to_query_parameter_valid_argument(self): + expected_types = [ + ([True, False], "BOOL"), + ([123, -456, 0], "INT64"), + ([1.25, 2.50], "FLOAT64"), + ([decimal.Decimal("1.25")], "NUMERIC"), + ([b"foo", b"bar"], "BYTES"), + ([u"foo", u"bar"], "STRING"), + ([datetime.date(2017, 4, 1), datetime.date(2018, 4, 1)], "DATE"), + ([datetime.time(12, 34, 56), datetime.time(10, 20, 30)], "TIME"), + ( + [ + datetime.datetime(2012, 3, 4, 5, 6, 7), + datetime.datetime(2013, 1, 1, 10, 20, 30), + ], + "DATETIME", + ), + ( + [ + datetime.datetime( + 2012, 3, 4, 5, 6, 7, tzinfo=google.cloud._helpers.UTC + ), + datetime.datetime( + 2013, 1, 1, 10, 20, 30, tzinfo=google.cloud._helpers.UTC + ), + ], + "TIMESTAMP", + ), + ] + + for values, expected_type in expected_types: + msg = "value: {} expected_type: {}".format(values, expected_type) + parameter = _helpers.array_to_query_parameter(values) + self.assertIsNone(parameter.name, msg=msg) + self.assertEqual(parameter.array_type, expected_type, msg=msg) + self.assertEqual(parameter.values, values, msg=msg) + named_param = _helpers.array_to_query_parameter(values, name="my_param") + self.assertEqual(named_param.name, "my_param", msg=msg) + self.assertEqual(named_param.array_type, expected_type, msg=msg) + self.assertEqual(named_param.values, values, msg=msg) + + def test_array_to_query_parameter_empty_argument(self): + with self.assertRaises(exceptions.ProgrammingError): + _helpers.array_to_query_parameter([]) + + def test_array_to_query_parameter_unsupported_sequence(self): + unsupported_iterables = [{10, 20, 30}, u"foo", b"bar", bytearray([65, 75, 85])] + for iterable in unsupported_iterables: + with self.assertRaises(exceptions.ProgrammingError): + _helpers.array_to_query_parameter(iterable) + + def test_array_to_query_parameter_sequence_w_invalid_elements(self): + with self.assertRaises(exceptions.ProgrammingError): + _helpers.array_to_query_parameter([object(), 2, 7]) + def test_to_query_parameters_w_dict(self): parameters = {"somebool": True, "somestring": u"a-string-value"} query_parameters = _helpers.to_query_parameters(parameters) @@ -82,6 +137,23 @@ def test_to_query_parameters_w_dict(self): ), ) + def test_to_query_parameters_w_dict_array_param(self): + parameters = {"somelist": [10, 20]} + query_parameters = _helpers.to_query_parameters(parameters) + + self.assertEqual(len(query_parameters), 1) + param = query_parameters[0] + + self.assertEqual(param.name, "somelist") + self.assertEqual(param.array_type, "INT64") + self.assertEqual(param.values, [10, 20]) + + def test_to_query_parameters_w_dict_dict_param(self): + parameters = {"my_param": {"foo": "bar"}} + + with self.assertRaises(NotImplementedError): + _helpers.to_query_parameters(parameters) + def test_to_query_parameters_w_list(self): parameters = [True, u"a-string-value"] query_parameters = _helpers.to_query_parameters(parameters) @@ -92,3 +164,24 @@ def test_to_query_parameters_w_list(self): sorted(query_parameter_tuples), sorted([(None, "BOOL", True), (None, "STRING", u"a-string-value")]), ) + + def test_to_query_parameters_w_list_array_param(self): + parameters = [[10, 20]] + query_parameters = _helpers.to_query_parameters(parameters) + + self.assertEqual(len(query_parameters), 1) + param = query_parameters[0] + + self.assertIsNone(param.name) + self.assertEqual(param.array_type, "INT64") + self.assertEqual(param.values, [10, 20]) + + def test_to_query_parameters_w_list_dict_param(self): + parameters = [{"foo": "bar"}] + + with self.assertRaises(NotImplementedError): + _helpers.to_query_parameters(parameters) + + def test_to_query_parameters_none_argument(self): + query_parameters = _helpers.to_query_parameters(None) + self.assertEqual(query_parameters, []) From adf6481170374fdfb4a1c17965a9e4ddb766b6a3 Mon Sep 17 00:00:00 2001 From: Peter Lamut Date: Thu, 12 Sep 2019 17:25:46 +0200 Subject: [PATCH 0665/2016] BigQuery: Add client.insert_rows_from_dataframe() method (#9162) * Add client.insert_rows_from_dataframe() method * Avoid using nametuples for dataframe row iteration dataframe.itertuples() returns plain tuples under certain conditions, thus this commit enforces always returning plain tuples, and constructs the row dictionary manually from each tuple. * Skip insert_rows_from_dataframe tests if no Pandas --- .../google/cloud/bigquery/client.py | 54 ++++++++ .../google-cloud-bigquery/tests/system.py | 67 ++++++++++ .../tests/unit/test_client.py | 124 ++++++++++++++++++ 3 files changed, 245 insertions(+) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py index bcc93c0b7273..9df8b87eb8dd 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py @@ -15,6 +15,7 @@ """Client for interacting with the Google BigQuery API.""" from __future__ import absolute_import +from __future__ import division try: from collections import abc as collections_abc @@ -25,7 +26,9 @@ import functools import gzip import io +import itertools import json +import math import os import tempfile import uuid @@ -2111,6 +2114,57 @@ def insert_rows(self, table, rows, selected_fields=None, **kwargs): return self.insert_rows_json(table, json_rows, **kwargs) + def insert_rows_from_dataframe( + self, table, dataframe, selected_fields=None, chunk_size=500, **kwargs + ): + """Insert rows into a table from a dataframe via the streaming API. + + Args: + table (Union[ \ + :class:`~google.cloud.bigquery.table.Table`, \ + :class:`~google.cloud.bigquery.table.TableReference`, \ + str, \ + ]): + The destination table for the row data, or a reference to it. + dataframe (pandas.DataFrame): + A :class:`~pandas.DataFrame` containing the data to load. + selected_fields (Sequence[ \ + :class:`~google.cloud.bigquery.schema.SchemaField`, \ + ]): + The fields to return. Required if ``table`` is a + :class:`~google.cloud.bigquery.table.TableReference`. + chunk_size (int): + The number of rows to stream in a single chunk. Must be positive. + kwargs (dict): + Keyword arguments to + :meth:`~google.cloud.bigquery.client.Client.insert_rows_json`. + + Returns: + Sequence[Sequence[Mappings]]: + A list with insert errors for each insert chunk. Each element + is a list containing one mapping per row with insert errors: + the "index" key identifies the row, and the "errors" key + contains a list of the mappings describing one or more problems + with the row. + + Raises: + ValueError: if table's schema is not set + """ + insert_results = [] + + chunk_count = int(math.ceil(len(dataframe) / chunk_size)) + rows_iter = ( + dict(six.moves.zip(dataframe.columns, row)) + for row in dataframe.itertuples(index=False, name=None) + ) + + for _ in range(chunk_count): + rows_chunk = itertools.islice(rows_iter, chunk_size) + result = self.insert_rows(table, rows_chunk, selected_fields, **kwargs) + insert_results.append(result) + + return insert_results + def insert_rows_json( self, table, diff --git a/packages/google-cloud-bigquery/tests/system.py b/packages/google-cloud-bigquery/tests/system.py index b1e583bbfaa3..4816962a70d6 100644 --- a/packages/google-cloud-bigquery/tests/system.py +++ b/packages/google-cloud-bigquery/tests/system.py @@ -1951,6 +1951,73 @@ def test_query_results_to_dataframe_w_bqstorage(self): if not row[col] is None: self.assertIsInstance(row[col], exp_datatypes[col]) + @unittest.skipIf(pandas is None, "Requires `pandas`") + def test_insert_rows_from_dataframe(self): + SF = bigquery.SchemaField + schema = [ + SF("float_col", "FLOAT", mode="REQUIRED"), + SF("int_col", "INTEGER", mode="REQUIRED"), + SF("bool_col", "BOOLEAN", mode="REQUIRED"), + SF("string_col", "STRING", mode="NULLABLE"), + ] + + dataframe = pandas.DataFrame( + [ + { + "float_col": 1.11, + "bool_col": True, + "string_col": "my string", + "int_col": 10, + }, + { + "float_col": 2.22, + "bool_col": False, + "string_col": "another string", + "int_col": 20, + }, + { + "float_col": 3.33, + "bool_col": False, + "string_col": "another string", + "int_col": 30, + }, + { + "float_col": 4.44, + "bool_col": True, + "string_col": "another string", + "int_col": 40, + }, + { + "float_col": 5.55, + "bool_col": False, + "string_col": "another string", + "int_col": 50, + }, + ] + ) + + table_id = "test_table" + dataset = self.temp_dataset(_make_dataset_id("issue_7553")) + table_arg = Table(dataset.table(table_id), schema=schema) + table = retry_403(Config.CLIENT.create_table)(table_arg) + self.to_delete.insert(0, table) + + Config.CLIENT.insert_rows_from_dataframe(table, dataframe, chunk_size=3) + + retry = RetryResult(_has_rows, max_tries=8) + rows = retry(self._fetch_single_page)(table) + + sorted_rows = sorted(rows, key=operator.attrgetter("int_col")) + row_tuples = [r.values() for r in sorted_rows] + expected = [tuple(data_row) for data_row in dataframe.itertuples(index=False)] + + assert len(row_tuples) == len(expected) + + for row, expected_row in zip(row_tuples, expected): + six.assertCountEqual( + self, row, expected_row + ) # column order does not matter + def test_insert_rows_nested_nested(self): # See #2951 SF = bigquery.SchemaField diff --git a/packages/google-cloud-bigquery/tests/unit/test_client.py b/packages/google-cloud-bigquery/tests/unit/test_client.py index 3ee45f52405c..46666347e331 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_client.py +++ b/packages/google-cloud-bigquery/tests/unit/test_client.py @@ -4473,6 +4473,130 @@ def test_insert_rows_w_numeric(self): data=sent, ) + @unittest.skipIf(pandas is None, "Requires `pandas`") + def test_insert_rows_from_dataframe(self): + from google.cloud.bigquery.table import SchemaField + from google.cloud.bigquery.table import Table + + API_PATH = "/projects/{}/datasets/{}/tables/{}/insertAll".format( + self.PROJECT, self.DS_ID, self.TABLE_REF.table_id + ) + + dataframe = pandas.DataFrame( + [ + {"name": u"Little One", "age": 10, "adult": False}, + {"name": u"Young Gun", "age": 20, "adult": True}, + {"name": u"Dad", "age": 30, "adult": True}, + {"name": u"Stranger", "age": 40, "adult": True}, + ] + ) + + # create client + creds = _make_credentials() + http = object() + client = self._make_one(project=self.PROJECT, credentials=creds, _http=http) + conn = client._connection = make_connection({}, {}) + + # create table + schema = [ + SchemaField("name", "STRING", mode="REQUIRED"), + SchemaField("age", "INTEGER", mode="REQUIRED"), + SchemaField("adult", "BOOLEAN", mode="REQUIRED"), + ] + table = Table(self.TABLE_REF, schema=schema) + + with mock.patch("uuid.uuid4", side_effect=map(str, range(len(dataframe)))): + error_info = client.insert_rows_from_dataframe( + table, dataframe, chunk_size=3 + ) + + self.assertEqual(len(error_info), 2) + for chunk_errors in error_info: + assert chunk_errors == [] + + EXPECTED_SENT_DATA = [ + { + "rows": [ + { + "insertId": "0", + "json": {"name": "Little One", "age": "10", "adult": "false"}, + }, + { + "insertId": "1", + "json": {"name": "Young Gun", "age": "20", "adult": "true"}, + }, + { + "insertId": "2", + "json": {"name": "Dad", "age": "30", "adult": "true"}, + }, + ] + }, + { + "rows": [ + { + "insertId": "3", + "json": {"name": "Stranger", "age": "40", "adult": "true"}, + } + ] + }, + ] + + actual_calls = conn.api_request.call_args_list + + for call, expected_data in six.moves.zip_longest( + actual_calls, EXPECTED_SENT_DATA + ): + expected_call = mock.call(method="POST", path=API_PATH, data=expected_data) + assert call == expected_call + + @unittest.skipIf(pandas is None, "Requires `pandas`") + def test_insert_rows_from_dataframe_many_columns(self): + from google.cloud.bigquery.table import SchemaField + from google.cloud.bigquery.table import Table + + API_PATH = "/projects/{}/datasets/{}/tables/{}/insertAll".format( + self.PROJECT, self.DS_ID, self.TABLE_REF.table_id + ) + N_COLUMNS = 256 # should be >= 256 + + dataframe = pandas.DataFrame( + [{"foo_{}".format(i): "bar_{}".format(i) for i in range(N_COLUMNS)}] + ) + + # create client + creds = _make_credentials() + http = object() + client = self._make_one(project=self.PROJECT, credentials=creds, _http=http) + conn = client._connection = make_connection({}, {}) + + # create table + schema = [SchemaField("foo_{}".format(i), "STRING") for i in range(N_COLUMNS)] + table = Table(self.TABLE_REF, schema=schema) + + with mock.patch("uuid.uuid4", side_effect=map(str, range(len(dataframe)))): + error_info = client.insert_rows_from_dataframe( + table, dataframe, chunk_size=3 + ) + + assert len(error_info) == 1 + assert error_info[0] == [] + + EXPECTED_SENT_DATA = { + "rows": [ + { + "insertId": "0", + "json": { + "foo_{}".format(i): "bar_{}".format(i) for i in range(N_COLUMNS) + }, + } + ] + } + expected_call = mock.call(method="POST", path=API_PATH, data=EXPECTED_SENT_DATA) + + actual_calls = conn.api_request.call_args_list + assert len(actual_calls) == 1 + assert actual_calls[0] == expected_call + def test_insert_rows_json(self): from google.cloud.bigquery.table import Table, SchemaField from google.cloud.bigquery.dataset import DatasetReference From 309e0d6dc7d6ef56e71de5503f72bcee0dc75301 Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Thu, 12 Sep 2019 10:02:12 -0700 Subject: [PATCH 0666/2016] Revert "Disable failing snippets test (#9156)" (#9220) This reverts commit 8061c0e6057714569f9fcfcbff3337735bc507ec. --- packages/google-cloud-bigquery/docs/snippets.py | 1 - 1 file changed, 1 deletion(-) diff --git a/packages/google-cloud-bigquery/docs/snippets.py b/packages/google-cloud-bigquery/docs/snippets.py index 387f35dfac07..4c39ff912230 100644 --- a/packages/google-cloud-bigquery/docs/snippets.py +++ b/packages/google-cloud-bigquery/docs/snippets.py @@ -1393,7 +1393,6 @@ def test_copy_table_multiple_source(client, to_delete): assert dest_table.num_rows == 2 -@pytest.mark.skip(reason="Backend responds with a 500 internal error.") def test_copy_table_cmek(client, to_delete): dataset_id = "copy_table_cmek_{}".format(_millis()) dest_dataset = bigquery.Dataset(client.dataset(dataset_id)) From f0232ff99294640111d20ebd4f892113255675c2 Mon Sep 17 00:00:00 2001 From: Peter Lamut Date: Thu, 12 Sep 2019 19:38:31 +0200 Subject: [PATCH 0667/2016] BigQuery: Change the default value of Cursor instances' arraysize attribute to None (#9199) * Add performance note to fetchall() docs * Set default cursor arraysize to None Let the backend pick the most appropriate size automatically, instead of enforcing the size of 1 on it (despite thise being a deviation from PEP 249). --- .../google/cloud/bigquery/dbapi/cursor.py | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/dbapi/cursor.py b/packages/google-cloud-bigquery/google/cloud/bigquery/dbapi/cursor.py index 1fbd9fb10cc4..3fdc750951e2 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/dbapi/cursor.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/dbapi/cursor.py @@ -61,8 +61,10 @@ def __init__(self, connection): # cannot be determined by the interface. self.rowcount = -1 # Per PEP 249: The arraysize attribute defaults to 1, meaning to fetch - # a single row at a time. - self.arraysize = 1 + # a single row at a time. However, we deviate from that, and set the + # default to None, allowing the backend to automatically determine the + # most appropriate size. + self.arraysize = None self._query_data = None self._query_job = None @@ -241,7 +243,8 @@ def fetchmany(self, size=None): :type size: int :param size: (Optional) Maximum number of rows to return. Defaults to the - ``arraysize`` property value. + ``arraysize`` property value. If ``arraysize`` is not set, it + defaults to ``1``. :rtype: List[tuple] :returns: A list of rows. @@ -249,7 +252,10 @@ def fetchmany(self, size=None): if called before ``execute()``. """ if size is None: - size = self.arraysize + # Since self.arraysize can be None (a deviation from PEP 249), + # use an actual PEP 249 default of 1 in such case (*some* number + # is needed here). + size = self.arraysize if self.arraysize else 1 self._try_fetch(size=size) rows = [] From c0a3beaaf335e33bf12fbfd1acd2bede1f9847f2 Mon Sep 17 00:00:00 2001 From: Tres Seaver Date: Fri, 13 Sep 2019 14:09:04 -0400 Subject: [PATCH 0668/2016] Add 'Model.encryption_config' (via synth). (#9214) * Add 'Model.encryption_config' (via synth). --- .../bigquery_v2/proto/encryption_config.proto | 33 ++ .../proto/encryption_config_pb2.py | 108 ++++++ .../proto/encryption_config_pb2_grpc.py | 2 + .../cloud/bigquery_v2/proto/model.proto | 108 +++--- .../cloud/bigquery_v2/proto/model_pb2.py | 309 +++++++++++------- .../bigquery_v2/proto/model_reference.proto | 13 +- .../bigquery_v2/proto/model_reference_pb2.py | 27 +- .../bigquery_v2/proto/standard_sql.proto | 7 +- .../bigquery_v2/proto/standard_sql_pb2.py | 33 +- .../google/cloud/bigquery_v2/types.py | 8 +- packages/google-cloud-bigquery/synth.metadata | 10 +- packages/google-cloud-bigquery/synth.py | 6 + 12 files changed, 450 insertions(+), 214 deletions(-) create mode 100644 packages/google-cloud-bigquery/google/cloud/bigquery_v2/proto/encryption_config.proto create mode 100644 packages/google-cloud-bigquery/google/cloud/bigquery_v2/proto/encryption_config_pb2.py create mode 100644 packages/google-cloud-bigquery/google/cloud/bigquery_v2/proto/encryption_config_pb2_grpc.py diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery_v2/proto/encryption_config.proto b/packages/google-cloud-bigquery/google/cloud/bigquery_v2/proto/encryption_config.proto new file mode 100644 index 000000000000..54445f0fa770 --- /dev/null +++ b/packages/google-cloud-bigquery/google/cloud/bigquery_v2/proto/encryption_config.proto @@ -0,0 +1,33 @@ +// Copyright 2019 Google LLC. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// + +syntax = "proto3"; + +package google.cloud.bigquery.v2; + +import "google/api/field_behavior.proto"; +import "google/protobuf/wrappers.proto"; +import "google/api/annotations.proto"; + +option go_package = "google.golang.org/genproto/googleapis/cloud/bigquery/v2;bigquery"; +option java_outer_classname = "EncryptionConfigProto"; +option java_package = "com.google.cloud.bigquery.v2"; + +message EncryptionConfiguration { + // Optional. Describes the Cloud KMS encryption key that will be used to + // protect destination BigQuery table. The BigQuery Service Account associated + // with your project requires access to this encryption key. + google.protobuf.StringValue kms_key_name = 1 [(google.api.field_behavior) = OPTIONAL]; +} diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery_v2/proto/encryption_config_pb2.py b/packages/google-cloud-bigquery/google/cloud/bigquery_v2/proto/encryption_config_pb2.py new file mode 100644 index 000000000000..b04cc3d58e9c --- /dev/null +++ b/packages/google-cloud-bigquery/google/cloud/bigquery_v2/proto/encryption_config_pb2.py @@ -0,0 +1,108 @@ +# -*- coding: utf-8 -*- +# Generated by the protocol buffer compiler. DO NOT EDIT! +# source: google/cloud/bigquery_v2/proto/encryption_config.proto + +import sys + +_b = sys.version_info[0] < 3 and (lambda x: x) or (lambda x: x.encode("latin1")) +from google.protobuf import descriptor as _descriptor +from google.protobuf import message as _message +from google.protobuf import reflection as _reflection +from google.protobuf import symbol_database as _symbol_database + +# @@protoc_insertion_point(imports) + +_sym_db = _symbol_database.Default() + + +from google.api import field_behavior_pb2 as google_dot_api_dot_field__behavior__pb2 +from google.protobuf import wrappers_pb2 as google_dot_protobuf_dot_wrappers__pb2 +from google.api import annotations_pb2 as google_dot_api_dot_annotations__pb2 + + +DESCRIPTOR = _descriptor.FileDescriptor( + name="google/cloud/bigquery_v2/proto/encryption_config.proto", + package="google.cloud.bigquery.v2", + syntax="proto3", + serialized_options=_b( + "\n\034com.google.cloud.bigquery.v2B\025EncryptionConfigProtoZ@google.golang.org/genproto/googleapis/cloud/bigquery/v2;bigquery" + ), + serialized_pb=_b( + '\n6google/cloud/bigquery_v2/proto/encryption_config.proto\x12\x18google.cloud.bigquery.v2\x1a\x1fgoogle/api/field_behavior.proto\x1a\x1egoogle/protobuf/wrappers.proto\x1a\x1cgoogle/api/annotations.proto"R\n\x17\x45ncryptionConfiguration\x12\x37\n\x0ckms_key_name\x18\x01 \x01(\x0b\x32\x1c.google.protobuf.StringValueB\x03\xe0\x41\x01\x42w\n\x1c\x63om.google.cloud.bigquery.v2B\x15\x45ncryptionConfigProtoZ@google.golang.org/genproto/googleapis/cloud/bigquery/v2;bigqueryb\x06proto3' + ), + dependencies=[ + google_dot_api_dot_field__behavior__pb2.DESCRIPTOR, + google_dot_protobuf_dot_wrappers__pb2.DESCRIPTOR, + google_dot_api_dot_annotations__pb2.DESCRIPTOR, + ], +) + + +_ENCRYPTIONCONFIGURATION = _descriptor.Descriptor( + name="EncryptionConfiguration", + full_name="google.cloud.bigquery.v2.EncryptionConfiguration", + filename=None, + file=DESCRIPTOR, + containing_type=None, + fields=[ + _descriptor.FieldDescriptor( + name="kms_key_name", + full_name="google.cloud.bigquery.v2.EncryptionConfiguration.kms_key_name", + index=0, + number=1, + type=11, + cpp_type=10, + label=1, + has_default_value=False, + default_value=None, + message_type=None, + enum_type=None, + containing_type=None, + is_extension=False, + extension_scope=None, + serialized_options=_b("\340A\001"), + file=DESCRIPTOR, + ) + ], + extensions=[], + nested_types=[], + enum_types=[], + serialized_options=None, + is_extendable=False, + syntax="proto3", + extension_ranges=[], + oneofs=[], + serialized_start=179, + serialized_end=261, +) + +_ENCRYPTIONCONFIGURATION.fields_by_name[ + "kms_key_name" +].message_type = google_dot_protobuf_dot_wrappers__pb2._STRINGVALUE +DESCRIPTOR.message_types_by_name["EncryptionConfiguration"] = _ENCRYPTIONCONFIGURATION +_sym_db.RegisterFileDescriptor(DESCRIPTOR) + +EncryptionConfiguration = _reflection.GeneratedProtocolMessageType( + "EncryptionConfiguration", + (_message.Message,), + dict( + DESCRIPTOR=_ENCRYPTIONCONFIGURATION, + __module__="google.cloud.bigquery_v2.proto.encryption_config_pb2", + __doc__="""Encryption configuration. + + Attributes: + kms_key_name: + Optional. Describes the Cloud KMS encryption key that will be + used to protect destination BigQuery table. The BigQuery + Service Account associated with your project requires access + to this encryption key. + """, + # @@protoc_insertion_point(class_scope:google.cloud.bigquery.v2.EncryptionConfiguration) + ), +) +_sym_db.RegisterMessage(EncryptionConfiguration) + + +DESCRIPTOR._options = None +_ENCRYPTIONCONFIGURATION.fields_by_name["kms_key_name"]._options = None +# @@protoc_insertion_point(module_scope) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery_v2/proto/encryption_config_pb2_grpc.py b/packages/google-cloud-bigquery/google/cloud/bigquery_v2/proto/encryption_config_pb2_grpc.py new file mode 100644 index 000000000000..07cb78fe03a9 --- /dev/null +++ b/packages/google-cloud-bigquery/google/cloud/bigquery_v2/proto/encryption_config_pb2_grpc.py @@ -0,0 +1,2 @@ +# Generated by the gRPC Python protocol compiler plugin. DO NOT EDIT! +import grpc diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery_v2/proto/model.proto b/packages/google-cloud-bigquery/google/cloud/bigquery_v2/proto/model.proto index 42246e8efff2..13d980774413 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery_v2/proto/model.proto +++ b/packages/google-cloud-bigquery/google/cloud/bigquery_v2/proto/model.proto @@ -17,13 +17,15 @@ syntax = "proto3"; package google.cloud.bigquery.v2; +import "google/api/client.proto"; +import "google/api/field_behavior.proto"; +import "google/cloud/bigquery/v2/encryption_config.proto"; import "google/cloud/bigquery/v2/model_reference.proto"; import "google/cloud/bigquery/v2/standard_sql.proto"; import "google/protobuf/empty.proto"; import "google/protobuf/timestamp.proto"; import "google/protobuf/wrappers.proto"; import "google/api/annotations.proto"; -import "google/api/client.proto"; option go_package = "google.golang.org/genproto/googleapis/cloud/bigquery/v2;bigquery"; option java_outer_classname = "ModelProto"; @@ -39,19 +41,23 @@ service ModelService { // Gets the specified model resource by model ID. rpc GetModel(GetModelRequest) returns (Model) { + option (google.api.method_signature) = "project_id,dataset_id,model_id"; } // Lists all models in the specified dataset. Requires the READER dataset // role. rpc ListModels(ListModelsRequest) returns (ListModelsResponse) { + option (google.api.method_signature) = "project_id,dataset_id,max_results"; } // Patch specific fields in the specified model. rpc PatchModel(PatchModelRequest) returns (Model) { + option (google.api.method_signature) = "project_id,dataset_id,model_id,model"; } // Deletes the model specified by modelId from the dataset. rpc DeleteModel(DeleteModelRequest) returns (google.protobuf.Empty) { + option (google.api.method_signature) = "project_id,dataset_id,model_id"; } } @@ -236,7 +242,7 @@ message Model { // Counts of all categories for the categorical feature. If there are // more than ten categories, we return top ten (by count) and return - // one more CategoryCount with category ‘_OTHER_’ and count as + // one more CategoryCount with category "_OTHER_" and count as // aggregate counts of remaining categories. repeated CategoryCount category_counts = 1; } @@ -514,103 +520,105 @@ message Model { } // Output only. A hash of this resource. - string etag = 1; + string etag = 1 [(google.api.field_behavior) = OUTPUT_ONLY]; // Required. Unique identifier for this model. - ModelReference model_reference = 2; + ModelReference model_reference = 2 [(google.api.field_behavior) = REQUIRED]; - // Output only. The time when this model was created, in millisecs since the - // epoch. - int64 creation_time = 5; + // Output only. The time when this model was created, in millisecs since the epoch. + int64 creation_time = 5 [(google.api.field_behavior) = OUTPUT_ONLY]; - // Output only. The time when this model was last modified, in millisecs - // since the epoch. - int64 last_modified_time = 6; + // Output only. The time when this model was last modified, in millisecs since the epoch. + int64 last_modified_time = 6 [(google.api.field_behavior) = OUTPUT_ONLY]; - // [Optional] A user-friendly description of this model. - string description = 12; + // Optional. A user-friendly description of this model. + string description = 12 [(google.api.field_behavior) = OPTIONAL]; - // [Optional] A descriptive name for this model. - string friendly_name = 14; + // Optional. A descriptive name for this model. + string friendly_name = 14 [(google.api.field_behavior) = OPTIONAL]; - // [Optional] The labels associated with this model. You can use these to - // organize and group your models. Label keys and values can be no longer + // The labels associated with this model. You can use these to organize + // and group your models. Label keys and values can be no longer // than 63 characters, can only contain lowercase letters, numeric // characters, underscores and dashes. International characters are allowed. // Label values are optional. Label keys must start with a letter and each // label in the list must have a different key. map labels = 15; - // [Optional] The time when this model expires, in milliseconds since the - // epoch. If not present, the model will persist indefinitely. Expired models + // Optional. The time when this model expires, in milliseconds since the epoch. + // If not present, the model will persist indefinitely. Expired models // will be deleted and their storage reclaimed. The defaultTableExpirationMs // property of the encapsulating dataset can be used to set a default // expirationTime on newly created models. - int64 expiration_time = 16; + int64 expiration_time = 16 [(google.api.field_behavior) = OPTIONAL]; // Output only. The geographic location where the model resides. This value // is inherited from the dataset. - string location = 13; + string location = 13 [(google.api.field_behavior) = OUTPUT_ONLY]; + + // Custom encryption configuration (e.g., Cloud KMS keys). This shows the + // encryption configuration of the model data while stored in BigQuery + // storage. + google.cloud.bigquery.v2.EncryptionConfiguration encryption_configuration = 17; // Output only. Type of the model resource. - ModelType model_type = 7; + ModelType model_type = 7 [(google.api.field_behavior) = OUTPUT_ONLY]; - // Output only. Information for all training runs in increasing order of - // start_time. - repeated TrainingRun training_runs = 9; + // Output only. Information for all training runs in increasing order of start_time. + repeated TrainingRun training_runs = 9 [(google.api.field_behavior) = OUTPUT_ONLY]; // Output only. Input feature columns that were used to train this model. - repeated StandardSqlField feature_columns = 10; + repeated StandardSqlField feature_columns = 10 [(google.api.field_behavior) = OUTPUT_ONLY]; // Output only. Label columns that were used to train this model. // The output of the model will have a "predicted_" prefix to these columns. - repeated StandardSqlField label_columns = 11; + repeated StandardSqlField label_columns = 11 [(google.api.field_behavior) = OUTPUT_ONLY]; } message GetModelRequest { - // Project ID of the requested model. - string project_id = 1; + // Required. Project ID of the requested model. + string project_id = 1 [(google.api.field_behavior) = REQUIRED]; - // Dataset ID of the requested model. - string dataset_id = 2; + // Required. Dataset ID of the requested model. + string dataset_id = 2 [(google.api.field_behavior) = REQUIRED]; - // Model ID of the requested model. - string model_id = 3; + // Required. Model ID of the requested model. + string model_id = 3 [(google.api.field_behavior) = REQUIRED]; } message PatchModelRequest { - // Project ID of the model to patch. - string project_id = 1; + // Required. Project ID of the model to patch. + string project_id = 1 [(google.api.field_behavior) = REQUIRED]; - // Dataset ID of the model to patch. - string dataset_id = 2; + // Required. Dataset ID of the model to patch. + string dataset_id = 2 [(google.api.field_behavior) = REQUIRED]; - // Model ID of the model to patch. - string model_id = 3; + // Required. Model ID of the model to patch. + string model_id = 3 [(google.api.field_behavior) = REQUIRED]; - // Patched model. + // Required. Patched model. // Follows RFC5789 patch semantics. Missing fields are not updated. // To clear a field, explicitly set to default value. - Model model = 4; + Model model = 4 [(google.api.field_behavior) = REQUIRED]; } message DeleteModelRequest { - // Project ID of the model to delete. - string project_id = 1; + // Required. Project ID of the model to delete. + string project_id = 1 [(google.api.field_behavior) = REQUIRED]; - // Dataset ID of the model to delete. - string dataset_id = 2; + // Required. Dataset ID of the model to delete. + string dataset_id = 2 [(google.api.field_behavior) = REQUIRED]; - // Model ID of the model to delete. - string model_id = 3; + // Required. Model ID of the model to delete. + string model_id = 3 [(google.api.field_behavior) = REQUIRED]; } message ListModelsRequest { - // Project ID of the models to list. - string project_id = 1; + // Required. Project ID of the models to list. + string project_id = 1 [(google.api.field_behavior) = REQUIRED]; - // Dataset ID of the models to list. - string dataset_id = 2; + // Required. Dataset ID of the models to list. + string dataset_id = 2 [(google.api.field_behavior) = REQUIRED]; // The maximum number of results to return in a single response page. // Leverage the page tokens to iterate through the entire collection. diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery_v2/proto/model_pb2.py b/packages/google-cloud-bigquery/google/cloud/bigquery_v2/proto/model_pb2.py index 45e6cefdf72c..98dfa4b1a22c 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery_v2/proto/model_pb2.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery_v2/proto/model_pb2.py @@ -15,6 +15,11 @@ _sym_db = _symbol_database.Default() +from google.api import client_pb2 as google_dot_api_dot_client__pb2 +from google.api import field_behavior_pb2 as google_dot_api_dot_field__behavior__pb2 +from google.cloud.bigquery_v2.proto import ( + encryption_config_pb2 as google_dot_cloud_dot_bigquery__v2_dot_proto_dot_encryption__config__pb2, +) from google.cloud.bigquery_v2.proto import ( model_reference_pb2 as google_dot_cloud_dot_bigquery__v2_dot_proto_dot_model__reference__pb2, ) @@ -25,7 +30,6 @@ from google.protobuf import timestamp_pb2 as google_dot_protobuf_dot_timestamp__pb2 from google.protobuf import wrappers_pb2 as google_dot_protobuf_dot_wrappers__pb2 from google.api import annotations_pb2 as google_dot_api_dot_annotations__pb2 -from google.api import client_pb2 as google_dot_api_dot_client__pb2 DESCRIPTOR = _descriptor.FileDescriptor( @@ -36,16 +40,18 @@ "\n\034com.google.cloud.bigquery.v2B\nModelProtoZ@google.golang.org/genproto/googleapis/cloud/bigquery/v2;bigquery" ), serialized_pb=_b( - '\n*google/cloud/bigquery_v2/proto/model.proto\x12\x18google.cloud.bigquery.v2\x1a\x34google/cloud/bigquery_v2/proto/model_reference.proto\x1a\x31google/cloud/bigquery_v2/proto/standard_sql.proto\x1a\x1bgoogle/protobuf/empty.proto\x1a\x1fgoogle/protobuf/timestamp.proto\x1a\x1egoogle/protobuf/wrappers.proto\x1a\x1cgoogle/api/annotations.proto\x1a\x17google/api/client.proto"\x8a\x34\n\x05Model\x12\x0c\n\x04\x65tag\x18\x01 \x01(\t\x12\x41\n\x0fmodel_reference\x18\x02 \x01(\x0b\x32(.google.cloud.bigquery.v2.ModelReference\x12\x15\n\rcreation_time\x18\x05 \x01(\x03\x12\x1a\n\x12last_modified_time\x18\x06 \x01(\x03\x12\x13\n\x0b\x64\x65scription\x18\x0c \x01(\t\x12\x15\n\rfriendly_name\x18\x0e \x01(\t\x12;\n\x06labels\x18\x0f \x03(\x0b\x32+.google.cloud.bigquery.v2.Model.LabelsEntry\x12\x17\n\x0f\x65xpiration_time\x18\x10 \x01(\x03\x12\x10\n\x08location\x18\r \x01(\t\x12=\n\nmodel_type\x18\x07 \x01(\x0e\x32).google.cloud.bigquery.v2.Model.ModelType\x12\x42\n\rtraining_runs\x18\t \x03(\x0b\x32+.google.cloud.bigquery.v2.Model.TrainingRun\x12\x43\n\x0f\x66\x65\x61ture_columns\x18\n \x03(\x0b\x32*.google.cloud.bigquery.v2.StandardSqlField\x12\x41\n\rlabel_columns\x18\x0b \x03(\x0b\x32*.google.cloud.bigquery.v2.StandardSqlField\x1aq\n\x0bKmeansEnums"b\n\x1aKmeansInitializationMethod\x12,\n(KMEANS_INITIALIZATION_METHOD_UNSPECIFIED\x10\x00\x12\n\n\x06RANDOM\x10\x01\x12\n\n\x06\x43USTOM\x10\x02\x1a\xb4\x02\n\x11RegressionMetrics\x12\x39\n\x13mean_absolute_error\x18\x01 \x01(\x0b\x32\x1c.google.protobuf.DoubleValue\x12\x38\n\x12mean_squared_error\x18\x02 \x01(\x0b\x32\x1c.google.protobuf.DoubleValue\x12<\n\x16mean_squared_log_error\x18\x03 \x01(\x0b\x32\x1c.google.protobuf.DoubleValue\x12;\n\x15median_absolute_error\x18\x04 \x01(\x0b\x32\x1c.google.protobuf.DoubleValue\x12/\n\tr_squared\x18\x05 \x01(\x0b\x32\x1c.google.protobuf.DoubleValue\x1a\xef\x02\n\x1e\x41ggregateClassificationMetrics\x12/\n\tprecision\x18\x01 \x01(\x0b\x32\x1c.google.protobuf.DoubleValue\x12,\n\x06recall\x18\x02 \x01(\x0b\x32\x1c.google.protobuf.DoubleValue\x12.\n\x08\x61\x63\x63uracy\x18\x03 \x01(\x0b\x32\x1c.google.protobuf.DoubleValue\x12/\n\tthreshold\x18\x04 \x01(\x0b\x32\x1c.google.protobuf.DoubleValue\x12.\n\x08\x66\x31_score\x18\x05 \x01(\x0b\x32\x1c.google.protobuf.DoubleValue\x12.\n\x08log_loss\x18\x06 \x01(\x0b\x32\x1c.google.protobuf.DoubleValue\x12-\n\x07roc_auc\x18\x07 \x01(\x0b\x32\x1c.google.protobuf.DoubleValue\x1a\x9f\x06\n\x1b\x42inaryClassificationMetrics\x12h\n aggregate_classification_metrics\x18\x01 \x01(\x0b\x32>.google.cloud.bigquery.v2.Model.AggregateClassificationMetrics\x12w\n\x1c\x62inary_confusion_matrix_list\x18\x02 \x03(\x0b\x32Q.google.cloud.bigquery.v2.Model.BinaryClassificationMetrics.BinaryConfusionMatrix\x12\x16\n\x0epositive_label\x18\x03 \x01(\t\x12\x16\n\x0enegative_label\x18\x04 \x01(\t\x1a\xec\x03\n\x15\x42inaryConfusionMatrix\x12>\n\x18positive_class_threshold\x18\x01 \x01(\x0b\x32\x1c.google.protobuf.DoubleValue\x12\x33\n\x0etrue_positives\x18\x02 \x01(\x0b\x32\x1b.google.protobuf.Int64Value\x12\x34\n\x0f\x66\x61lse_positives\x18\x03 \x01(\x0b\x32\x1b.google.protobuf.Int64Value\x12\x33\n\x0etrue_negatives\x18\x04 \x01(\x0b\x32\x1b.google.protobuf.Int64Value\x12\x34\n\x0f\x66\x61lse_negatives\x18\x05 \x01(\x0b\x32\x1b.google.protobuf.Int64Value\x12/\n\tprecision\x18\x06 \x01(\x0b\x32\x1c.google.protobuf.DoubleValue\x12,\n\x06recall\x18\x07 \x01(\x0b\x32\x1c.google.protobuf.DoubleValue\x12.\n\x08\x66\x31_score\x18\x08 \x01(\x0b\x32\x1c.google.protobuf.DoubleValue\x12.\n\x08\x61\x63\x63uracy\x18\t \x01(\x0b\x32\x1c.google.protobuf.DoubleValue\x1a\x87\x05\n\x1fMultiClassClassificationMetrics\x12h\n aggregate_classification_metrics\x18\x01 \x01(\x0b\x32>.google.cloud.bigquery.v2.Model.AggregateClassificationMetrics\x12n\n\x15\x63onfusion_matrix_list\x18\x02 \x03(\x0b\x32O.google.cloud.bigquery.v2.Model.MultiClassClassificationMetrics.ConfusionMatrix\x1a\x89\x03\n\x0f\x43onfusionMatrix\x12:\n\x14\x63onfidence_threshold\x18\x01 \x01(\x0b\x32\x1c.google.protobuf.DoubleValue\x12\x61\n\x04rows\x18\x02 \x03(\x0b\x32S.google.cloud.bigquery.v2.Model.MultiClassClassificationMetrics.ConfusionMatrix.Row\x1aQ\n\x05\x45ntry\x12\x17\n\x0fpredicted_label\x18\x01 \x01(\t\x12/\n\nitem_count\x18\x02 \x01(\x0b\x32\x1b.google.protobuf.Int64Value\x1a\x83\x01\n\x03Row\x12\x14\n\x0c\x61\x63tual_label\x18\x01 \x01(\t\x12\x66\n\x07\x65ntries\x18\x02 \x03(\x0b\x32U.google.cloud.bigquery.v2.Model.MultiClassClassificationMetrics.ConfusionMatrix.Entry\x1a\xcb\x06\n\x11\x43lusteringMetrics\x12:\n\x14\x64\x61vies_bouldin_index\x18\x01 \x01(\x0b\x32\x1c.google.protobuf.DoubleValue\x12;\n\x15mean_squared_distance\x18\x02 \x01(\x0b\x32\x1c.google.protobuf.DoubleValue\x12K\n\x08\x63lusters\x18\x03 \x03(\x0b\x32\x39.google.cloud.bigquery.v2.Model.ClusteringMetrics.Cluster\x1a\xef\x04\n\x07\x43luster\x12\x13\n\x0b\x63\x65ntroid_id\x18\x01 \x01(\x03\x12^\n\x0e\x66\x65\x61ture_values\x18\x02 \x03(\x0b\x32\x46.google.cloud.bigquery.v2.Model.ClusteringMetrics.Cluster.FeatureValue\x12*\n\x05\x63ount\x18\x03 \x01(\x0b\x32\x1b.google.protobuf.Int64Value\x1a\xc2\x03\n\x0c\x46\x65\x61tureValue\x12\x16\n\x0e\x66\x65\x61ture_column\x18\x01 \x01(\t\x12\x37\n\x0fnumerical_value\x18\x02 \x01(\x0b\x32\x1c.google.protobuf.DoubleValueH\x00\x12t\n\x11\x63\x61tegorical_value\x18\x03 \x01(\x0b\x32W.google.cloud.bigquery.v2.Model.ClusteringMetrics.Cluster.FeatureValue.CategoricalValueH\x00\x1a\xe1\x01\n\x10\x43\x61tegoricalValue\x12~\n\x0f\x63\x61tegory_counts\x18\x01 \x03(\x0b\x32\x65.google.cloud.bigquery.v2.Model.ClusteringMetrics.Cluster.FeatureValue.CategoricalValue.CategoryCount\x1aM\n\rCategoryCount\x12\x10\n\x08\x63\x61tegory\x18\x01 \x01(\t\x12*\n\x05\x63ount\x18\x02 \x01(\x0b\x32\x1b.google.protobuf.Int64ValueB\x07\n\x05value\x1a\x95\x03\n\x11\x45valuationMetrics\x12O\n\x12regression_metrics\x18\x01 \x01(\x0b\x32\x31.google.cloud.bigquery.v2.Model.RegressionMetricsH\x00\x12\x64\n\x1d\x62inary_classification_metrics\x18\x02 \x01(\x0b\x32;.google.cloud.bigquery.v2.Model.BinaryClassificationMetricsH\x00\x12m\n"multi_class_classification_metrics\x18\x03 \x01(\x0b\x32?.google.cloud.bigquery.v2.Model.MultiClassClassificationMetricsH\x00\x12O\n\x12\x63lustering_metrics\x18\x04 \x01(\x0b\x32\x31.google.cloud.bigquery.v2.Model.ClusteringMetricsH\x00\x42\t\n\x07metrics\x1a\xab\x0f\n\x0bTrainingRun\x12U\n\x10training_options\x18\x01 \x01(\x0b\x32;.google.cloud.bigquery.v2.Model.TrainingRun.TrainingOptions\x12.\n\nstart_time\x18\x08 \x01(\x0b\x32\x1a.google.protobuf.Timestamp\x12L\n\x07results\x18\x06 \x03(\x0b\x32;.google.cloud.bigquery.v2.Model.TrainingRun.IterationResult\x12M\n\x12\x65valuation_metrics\x18\x07 \x01(\x0b\x32\x31.google.cloud.bigquery.v2.Model.EvaluationMetrics\x1a\x9d\t\n\x0fTrainingOptions\x12\x16\n\x0emax_iterations\x18\x01 \x01(\x03\x12;\n\tloss_type\x18\x02 \x01(\x0e\x32(.google.cloud.bigquery.v2.Model.LossType\x12\x12\n\nlearn_rate\x18\x03 \x01(\x01\x12\x37\n\x11l1_regularization\x18\x04 \x01(\x0b\x32\x1c.google.protobuf.DoubleValue\x12\x37\n\x11l2_regularization\x18\x05 \x01(\x0b\x32\x1c.google.protobuf.DoubleValue\x12;\n\x15min_relative_progress\x18\x06 \x01(\x0b\x32\x1c.google.protobuf.DoubleValue\x12.\n\nwarm_start\x18\x07 \x01(\x0b\x32\x1a.google.protobuf.BoolValue\x12.\n\nearly_stop\x18\x08 \x01(\x0b\x32\x1a.google.protobuf.BoolValue\x12\x1b\n\x13input_label_columns\x18\t \x03(\t\x12J\n\x11\x64\x61ta_split_method\x18\n \x01(\x0e\x32/.google.cloud.bigquery.v2.Model.DataSplitMethod\x12 \n\x18\x64\x61ta_split_eval_fraction\x18\x0b \x01(\x01\x12\x19\n\x11\x64\x61ta_split_column\x18\x0c \x01(\t\x12N\n\x13learn_rate_strategy\x18\r \x01(\x0e\x32\x31.google.cloud.bigquery.v2.Model.LearnRateStrategy\x12\x1a\n\x12initial_learn_rate\x18\x10 \x01(\x01\x12o\n\x13label_class_weights\x18\x11 \x03(\x0b\x32R.google.cloud.bigquery.v2.Model.TrainingRun.TrainingOptions.LabelClassWeightsEntry\x12\x43\n\rdistance_type\x18\x14 \x01(\x0e\x32,.google.cloud.bigquery.v2.Model.DistanceType\x12\x14\n\x0cnum_clusters\x18\x15 \x01(\x03\x12\x11\n\tmodel_uri\x18\x16 \x01(\t\x12S\n\x15optimization_strategy\x18\x17 \x01(\x0e\x32\x34.google.cloud.bigquery.v2.Model.OptimizationStrategy\x12l\n\x1ckmeans_initialization_method\x18! \x01(\x0e\x32\x46.google.cloud.bigquery.v2.Model.KmeansEnums.KmeansInitializationMethod\x12$\n\x1ckmeans_initialization_column\x18" \x01(\t\x1a\x38\n\x16LabelClassWeightsEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\x01:\x02\x38\x01\x1a\xd7\x03\n\x0fIterationResult\x12*\n\x05index\x18\x01 \x01(\x0b\x32\x1b.google.protobuf.Int32Value\x12\x30\n\x0b\x64uration_ms\x18\x04 \x01(\x0b\x32\x1b.google.protobuf.Int64Value\x12\x33\n\rtraining_loss\x18\x05 \x01(\x0b\x32\x1c.google.protobuf.DoubleValue\x12/\n\teval_loss\x18\x06 \x01(\x0b\x32\x1c.google.protobuf.DoubleValue\x12\x12\n\nlearn_rate\x18\x07 \x01(\x01\x12^\n\rcluster_infos\x18\x08 \x03(\x0b\x32G.google.cloud.bigquery.v2.Model.TrainingRun.IterationResult.ClusterInfo\x1a\x8b\x01\n\x0b\x43lusterInfo\x12\x13\n\x0b\x63\x65ntroid_id\x18\x01 \x01(\x03\x12\x34\n\x0e\x63luster_radius\x18\x02 \x01(\x0b\x32\x1c.google.protobuf.DoubleValue\x12\x31\n\x0c\x63luster_size\x18\x03 \x01(\x0b\x32\x1b.google.protobuf.Int64Value\x1a-\n\x0bLabelsEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\t:\x02\x38\x01"s\n\tModelType\x12\x1a\n\x16MODEL_TYPE_UNSPECIFIED\x10\x00\x12\x15\n\x11LINEAR_REGRESSION\x10\x01\x12\x17\n\x13LOGISTIC_REGRESSION\x10\x02\x12\n\n\x06KMEANS\x10\x03\x12\x0e\n\nTENSORFLOW\x10\x06"O\n\x08LossType\x12\x19\n\x15LOSS_TYPE_UNSPECIFIED\x10\x00\x12\x15\n\x11MEAN_SQUARED_LOSS\x10\x01\x12\x11\n\rMEAN_LOG_LOSS\x10\x02"H\n\x0c\x44istanceType\x12\x1d\n\x19\x44ISTANCE_TYPE_UNSPECIFIED\x10\x00\x12\r\n\tEUCLIDEAN\x10\x01\x12\n\n\x06\x43OSINE\x10\x02"z\n\x0f\x44\x61taSplitMethod\x12!\n\x1d\x44\x41TA_SPLIT_METHOD_UNSPECIFIED\x10\x00\x12\n\n\x06RANDOM\x10\x01\x12\n\n\x06\x43USTOM\x10\x02\x12\x0e\n\nSEQUENTIAL\x10\x03\x12\x0c\n\x08NO_SPLIT\x10\x04\x12\x0e\n\nAUTO_SPLIT\x10\x05"W\n\x11LearnRateStrategy\x12#\n\x1fLEARN_RATE_STRATEGY_UNSPECIFIED\x10\x00\x12\x0f\n\x0bLINE_SEARCH\x10\x01\x12\x0c\n\x08\x43ONSTANT\x10\x02"n\n\x14OptimizationStrategy\x12%\n!OPTIMIZATION_STRATEGY_UNSPECIFIED\x10\x00\x12\x1a\n\x16\x42\x41TCH_GRADIENT_DESCENT\x10\x01\x12\x13\n\x0fNORMAL_EQUATION\x10\x02"K\n\x0fGetModelRequest\x12\x12\n\nproject_id\x18\x01 \x01(\t\x12\x12\n\ndataset_id\x18\x02 \x01(\t\x12\x10\n\x08model_id\x18\x03 \x01(\t"}\n\x11PatchModelRequest\x12\x12\n\nproject_id\x18\x01 \x01(\t\x12\x12\n\ndataset_id\x18\x02 \x01(\t\x12\x10\n\x08model_id\x18\x03 \x01(\t\x12.\n\x05model\x18\x04 \x01(\x0b\x32\x1f.google.cloud.bigquery.v2.Model"N\n\x12\x44\x65leteModelRequest\x12\x12\n\nproject_id\x18\x01 \x01(\t\x12\x12\n\ndataset_id\x18\x02 \x01(\t\x12\x10\n\x08model_id\x18\x03 \x01(\t"\x82\x01\n\x11ListModelsRequest\x12\x12\n\nproject_id\x18\x01 \x01(\t\x12\x12\n\ndataset_id\x18\x02 \x01(\t\x12\x31\n\x0bmax_results\x18\x03 \x01(\x0b\x32\x1c.google.protobuf.UInt32Value\x12\x12\n\npage_token\x18\x04 \x01(\t"^\n\x12ListModelsResponse\x12/\n\x06models\x18\x01 \x03(\x0b\x32\x1f.google.cloud.bigquery.v2.Model\x12\x17\n\x0fnext_page_token\x18\x02 \x01(\t2\xeb\x04\n\x0cModelService\x12X\n\x08GetModel\x12).google.cloud.bigquery.v2.GetModelRequest\x1a\x1f.google.cloud.bigquery.v2.Model"\x00\x12i\n\nListModels\x12+.google.cloud.bigquery.v2.ListModelsRequest\x1a,.google.cloud.bigquery.v2.ListModelsResponse"\x00\x12\\\n\nPatchModel\x12+.google.cloud.bigquery.v2.PatchModelRequest\x1a\x1f.google.cloud.bigquery.v2.Model"\x00\x12U\n\x0b\x44\x65leteModel\x12,.google.cloud.bigquery.v2.DeleteModelRequest\x1a\x16.google.protobuf.Empty"\x00\x1a\xe0\x01\xca\x41\x17\x62igquery.googleapis.com\xd2\x41\xc2\x01https://www.googleapis.com/auth/bigquery,https://www.googleapis.com/auth/bigquery.readonly,https://www.googleapis.com/auth/cloud-platform,https://www.googleapis.com/auth/cloud-platform.read-onlyBl\n\x1c\x63om.google.cloud.bigquery.v2B\nModelProtoZ@google.golang.org/genproto/googleapis/cloud/bigquery/v2;bigqueryb\x06proto3' + '\n*google/cloud/bigquery_v2/proto/model.proto\x12\x18google.cloud.bigquery.v2\x1a\x17google/api/client.proto\x1a\x1fgoogle/api/field_behavior.proto\x1a\x36google/cloud/bigquery_v2/proto/encryption_config.proto\x1a\x34google/cloud/bigquery_v2/proto/model_reference.proto\x1a\x31google/cloud/bigquery_v2/proto/standard_sql.proto\x1a\x1bgoogle/protobuf/empty.proto\x1a\x1fgoogle/protobuf/timestamp.proto\x1a\x1egoogle/protobuf/wrappers.proto\x1a\x1cgoogle/api/annotations.proto"\x9b\x35\n\x05Model\x12\x11\n\x04\x65tag\x18\x01 \x01(\tB\x03\xe0\x41\x03\x12\x46\n\x0fmodel_reference\x18\x02 \x01(\x0b\x32(.google.cloud.bigquery.v2.ModelReferenceB\x03\xe0\x41\x02\x12\x1a\n\rcreation_time\x18\x05 \x01(\x03\x42\x03\xe0\x41\x03\x12\x1f\n\x12last_modified_time\x18\x06 \x01(\x03\x42\x03\xe0\x41\x03\x12\x18\n\x0b\x64\x65scription\x18\x0c \x01(\tB\x03\xe0\x41\x01\x12\x1a\n\rfriendly_name\x18\x0e \x01(\tB\x03\xe0\x41\x01\x12;\n\x06labels\x18\x0f \x03(\x0b\x32+.google.cloud.bigquery.v2.Model.LabelsEntry\x12\x1c\n\x0f\x65xpiration_time\x18\x10 \x01(\x03\x42\x03\xe0\x41\x01\x12\x15\n\x08location\x18\r \x01(\tB\x03\xe0\x41\x03\x12S\n\x18\x65ncryption_configuration\x18\x11 \x01(\x0b\x32\x31.google.cloud.bigquery.v2.EncryptionConfiguration\x12\x42\n\nmodel_type\x18\x07 \x01(\x0e\x32).google.cloud.bigquery.v2.Model.ModelTypeB\x03\xe0\x41\x03\x12G\n\rtraining_runs\x18\t \x03(\x0b\x32+.google.cloud.bigquery.v2.Model.TrainingRunB\x03\xe0\x41\x03\x12H\n\x0f\x66\x65\x61ture_columns\x18\n \x03(\x0b\x32*.google.cloud.bigquery.v2.StandardSqlFieldB\x03\xe0\x41\x03\x12\x46\n\rlabel_columns\x18\x0b \x03(\x0b\x32*.google.cloud.bigquery.v2.StandardSqlFieldB\x03\xe0\x41\x03\x1aq\n\x0bKmeansEnums"b\n\x1aKmeansInitializationMethod\x12,\n(KMEANS_INITIALIZATION_METHOD_UNSPECIFIED\x10\x00\x12\n\n\x06RANDOM\x10\x01\x12\n\n\x06\x43USTOM\x10\x02\x1a\xb4\x02\n\x11RegressionMetrics\x12\x39\n\x13mean_absolute_error\x18\x01 \x01(\x0b\x32\x1c.google.protobuf.DoubleValue\x12\x38\n\x12mean_squared_error\x18\x02 \x01(\x0b\x32\x1c.google.protobuf.DoubleValue\x12<\n\x16mean_squared_log_error\x18\x03 \x01(\x0b\x32\x1c.google.protobuf.DoubleValue\x12;\n\x15median_absolute_error\x18\x04 \x01(\x0b\x32\x1c.google.protobuf.DoubleValue\x12/\n\tr_squared\x18\x05 \x01(\x0b\x32\x1c.google.protobuf.DoubleValue\x1a\xef\x02\n\x1e\x41ggregateClassificationMetrics\x12/\n\tprecision\x18\x01 \x01(\x0b\x32\x1c.google.protobuf.DoubleValue\x12,\n\x06recall\x18\x02 \x01(\x0b\x32\x1c.google.protobuf.DoubleValue\x12.\n\x08\x61\x63\x63uracy\x18\x03 \x01(\x0b\x32\x1c.google.protobuf.DoubleValue\x12/\n\tthreshold\x18\x04 \x01(\x0b\x32\x1c.google.protobuf.DoubleValue\x12.\n\x08\x66\x31_score\x18\x05 \x01(\x0b\x32\x1c.google.protobuf.DoubleValue\x12.\n\x08log_loss\x18\x06 \x01(\x0b\x32\x1c.google.protobuf.DoubleValue\x12-\n\x07roc_auc\x18\x07 \x01(\x0b\x32\x1c.google.protobuf.DoubleValue\x1a\x9f\x06\n\x1b\x42inaryClassificationMetrics\x12h\n aggregate_classification_metrics\x18\x01 \x01(\x0b\x32>.google.cloud.bigquery.v2.Model.AggregateClassificationMetrics\x12w\n\x1c\x62inary_confusion_matrix_list\x18\x02 \x03(\x0b\x32Q.google.cloud.bigquery.v2.Model.BinaryClassificationMetrics.BinaryConfusionMatrix\x12\x16\n\x0epositive_label\x18\x03 \x01(\t\x12\x16\n\x0enegative_label\x18\x04 \x01(\t\x1a\xec\x03\n\x15\x42inaryConfusionMatrix\x12>\n\x18positive_class_threshold\x18\x01 \x01(\x0b\x32\x1c.google.protobuf.DoubleValue\x12\x33\n\x0etrue_positives\x18\x02 \x01(\x0b\x32\x1b.google.protobuf.Int64Value\x12\x34\n\x0f\x66\x61lse_positives\x18\x03 \x01(\x0b\x32\x1b.google.protobuf.Int64Value\x12\x33\n\x0etrue_negatives\x18\x04 \x01(\x0b\x32\x1b.google.protobuf.Int64Value\x12\x34\n\x0f\x66\x61lse_negatives\x18\x05 \x01(\x0b\x32\x1b.google.protobuf.Int64Value\x12/\n\tprecision\x18\x06 \x01(\x0b\x32\x1c.google.protobuf.DoubleValue\x12,\n\x06recall\x18\x07 \x01(\x0b\x32\x1c.google.protobuf.DoubleValue\x12.\n\x08\x66\x31_score\x18\x08 \x01(\x0b\x32\x1c.google.protobuf.DoubleValue\x12.\n\x08\x61\x63\x63uracy\x18\t \x01(\x0b\x32\x1c.google.protobuf.DoubleValue\x1a\x87\x05\n\x1fMultiClassClassificationMetrics\x12h\n aggregate_classification_metrics\x18\x01 \x01(\x0b\x32>.google.cloud.bigquery.v2.Model.AggregateClassificationMetrics\x12n\n\x15\x63onfusion_matrix_list\x18\x02 \x03(\x0b\x32O.google.cloud.bigquery.v2.Model.MultiClassClassificationMetrics.ConfusionMatrix\x1a\x89\x03\n\x0f\x43onfusionMatrix\x12:\n\x14\x63onfidence_threshold\x18\x01 \x01(\x0b\x32\x1c.google.protobuf.DoubleValue\x12\x61\n\x04rows\x18\x02 \x03(\x0b\x32S.google.cloud.bigquery.v2.Model.MultiClassClassificationMetrics.ConfusionMatrix.Row\x1aQ\n\x05\x45ntry\x12\x17\n\x0fpredicted_label\x18\x01 \x01(\t\x12/\n\nitem_count\x18\x02 \x01(\x0b\x32\x1b.google.protobuf.Int64Value\x1a\x83\x01\n\x03Row\x12\x14\n\x0c\x61\x63tual_label\x18\x01 \x01(\t\x12\x66\n\x07\x65ntries\x18\x02 \x03(\x0b\x32U.google.cloud.bigquery.v2.Model.MultiClassClassificationMetrics.ConfusionMatrix.Entry\x1a\xcb\x06\n\x11\x43lusteringMetrics\x12:\n\x14\x64\x61vies_bouldin_index\x18\x01 \x01(\x0b\x32\x1c.google.protobuf.DoubleValue\x12;\n\x15mean_squared_distance\x18\x02 \x01(\x0b\x32\x1c.google.protobuf.DoubleValue\x12K\n\x08\x63lusters\x18\x03 \x03(\x0b\x32\x39.google.cloud.bigquery.v2.Model.ClusteringMetrics.Cluster\x1a\xef\x04\n\x07\x43luster\x12\x13\n\x0b\x63\x65ntroid_id\x18\x01 \x01(\x03\x12^\n\x0e\x66\x65\x61ture_values\x18\x02 \x03(\x0b\x32\x46.google.cloud.bigquery.v2.Model.ClusteringMetrics.Cluster.FeatureValue\x12*\n\x05\x63ount\x18\x03 \x01(\x0b\x32\x1b.google.protobuf.Int64Value\x1a\xc2\x03\n\x0c\x46\x65\x61tureValue\x12\x16\n\x0e\x66\x65\x61ture_column\x18\x01 \x01(\t\x12\x37\n\x0fnumerical_value\x18\x02 \x01(\x0b\x32\x1c.google.protobuf.DoubleValueH\x00\x12t\n\x11\x63\x61tegorical_value\x18\x03 \x01(\x0b\x32W.google.cloud.bigquery.v2.Model.ClusteringMetrics.Cluster.FeatureValue.CategoricalValueH\x00\x1a\xe1\x01\n\x10\x43\x61tegoricalValue\x12~\n\x0f\x63\x61tegory_counts\x18\x01 \x03(\x0b\x32\x65.google.cloud.bigquery.v2.Model.ClusteringMetrics.Cluster.FeatureValue.CategoricalValue.CategoryCount\x1aM\n\rCategoryCount\x12\x10\n\x08\x63\x61tegory\x18\x01 \x01(\t\x12*\n\x05\x63ount\x18\x02 \x01(\x0b\x32\x1b.google.protobuf.Int64ValueB\x07\n\x05value\x1a\x95\x03\n\x11\x45valuationMetrics\x12O\n\x12regression_metrics\x18\x01 \x01(\x0b\x32\x31.google.cloud.bigquery.v2.Model.RegressionMetricsH\x00\x12\x64\n\x1d\x62inary_classification_metrics\x18\x02 \x01(\x0b\x32;.google.cloud.bigquery.v2.Model.BinaryClassificationMetricsH\x00\x12m\n"multi_class_classification_metrics\x18\x03 \x01(\x0b\x32?.google.cloud.bigquery.v2.Model.MultiClassClassificationMetricsH\x00\x12O\n\x12\x63lustering_metrics\x18\x04 \x01(\x0b\x32\x31.google.cloud.bigquery.v2.Model.ClusteringMetricsH\x00\x42\t\n\x07metrics\x1a\xab\x0f\n\x0bTrainingRun\x12U\n\x10training_options\x18\x01 \x01(\x0b\x32;.google.cloud.bigquery.v2.Model.TrainingRun.TrainingOptions\x12.\n\nstart_time\x18\x08 \x01(\x0b\x32\x1a.google.protobuf.Timestamp\x12L\n\x07results\x18\x06 \x03(\x0b\x32;.google.cloud.bigquery.v2.Model.TrainingRun.IterationResult\x12M\n\x12\x65valuation_metrics\x18\x07 \x01(\x0b\x32\x31.google.cloud.bigquery.v2.Model.EvaluationMetrics\x1a\x9d\t\n\x0fTrainingOptions\x12\x16\n\x0emax_iterations\x18\x01 \x01(\x03\x12;\n\tloss_type\x18\x02 \x01(\x0e\x32(.google.cloud.bigquery.v2.Model.LossType\x12\x12\n\nlearn_rate\x18\x03 \x01(\x01\x12\x37\n\x11l1_regularization\x18\x04 \x01(\x0b\x32\x1c.google.protobuf.DoubleValue\x12\x37\n\x11l2_regularization\x18\x05 \x01(\x0b\x32\x1c.google.protobuf.DoubleValue\x12;\n\x15min_relative_progress\x18\x06 \x01(\x0b\x32\x1c.google.protobuf.DoubleValue\x12.\n\nwarm_start\x18\x07 \x01(\x0b\x32\x1a.google.protobuf.BoolValue\x12.\n\nearly_stop\x18\x08 \x01(\x0b\x32\x1a.google.protobuf.BoolValue\x12\x1b\n\x13input_label_columns\x18\t \x03(\t\x12J\n\x11\x64\x61ta_split_method\x18\n \x01(\x0e\x32/.google.cloud.bigquery.v2.Model.DataSplitMethod\x12 \n\x18\x64\x61ta_split_eval_fraction\x18\x0b \x01(\x01\x12\x19\n\x11\x64\x61ta_split_column\x18\x0c \x01(\t\x12N\n\x13learn_rate_strategy\x18\r \x01(\x0e\x32\x31.google.cloud.bigquery.v2.Model.LearnRateStrategy\x12\x1a\n\x12initial_learn_rate\x18\x10 \x01(\x01\x12o\n\x13label_class_weights\x18\x11 \x03(\x0b\x32R.google.cloud.bigquery.v2.Model.TrainingRun.TrainingOptions.LabelClassWeightsEntry\x12\x43\n\rdistance_type\x18\x14 \x01(\x0e\x32,.google.cloud.bigquery.v2.Model.DistanceType\x12\x14\n\x0cnum_clusters\x18\x15 \x01(\x03\x12\x11\n\tmodel_uri\x18\x16 \x01(\t\x12S\n\x15optimization_strategy\x18\x17 \x01(\x0e\x32\x34.google.cloud.bigquery.v2.Model.OptimizationStrategy\x12l\n\x1ckmeans_initialization_method\x18! \x01(\x0e\x32\x46.google.cloud.bigquery.v2.Model.KmeansEnums.KmeansInitializationMethod\x12$\n\x1ckmeans_initialization_column\x18" \x01(\t\x1a\x38\n\x16LabelClassWeightsEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\x01:\x02\x38\x01\x1a\xd7\x03\n\x0fIterationResult\x12*\n\x05index\x18\x01 \x01(\x0b\x32\x1b.google.protobuf.Int32Value\x12\x30\n\x0b\x64uration_ms\x18\x04 \x01(\x0b\x32\x1b.google.protobuf.Int64Value\x12\x33\n\rtraining_loss\x18\x05 \x01(\x0b\x32\x1c.google.protobuf.DoubleValue\x12/\n\teval_loss\x18\x06 \x01(\x0b\x32\x1c.google.protobuf.DoubleValue\x12\x12\n\nlearn_rate\x18\x07 \x01(\x01\x12^\n\rcluster_infos\x18\x08 \x03(\x0b\x32G.google.cloud.bigquery.v2.Model.TrainingRun.IterationResult.ClusterInfo\x1a\x8b\x01\n\x0b\x43lusterInfo\x12\x13\n\x0b\x63\x65ntroid_id\x18\x01 \x01(\x03\x12\x34\n\x0e\x63luster_radius\x18\x02 \x01(\x0b\x32\x1c.google.protobuf.DoubleValue\x12\x31\n\x0c\x63luster_size\x18\x03 \x01(\x0b\x32\x1b.google.protobuf.Int64Value\x1a-\n\x0bLabelsEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\t:\x02\x38\x01"s\n\tModelType\x12\x1a\n\x16MODEL_TYPE_UNSPECIFIED\x10\x00\x12\x15\n\x11LINEAR_REGRESSION\x10\x01\x12\x17\n\x13LOGISTIC_REGRESSION\x10\x02\x12\n\n\x06KMEANS\x10\x03\x12\x0e\n\nTENSORFLOW\x10\x06"O\n\x08LossType\x12\x19\n\x15LOSS_TYPE_UNSPECIFIED\x10\x00\x12\x15\n\x11MEAN_SQUARED_LOSS\x10\x01\x12\x11\n\rMEAN_LOG_LOSS\x10\x02"H\n\x0c\x44istanceType\x12\x1d\n\x19\x44ISTANCE_TYPE_UNSPECIFIED\x10\x00\x12\r\n\tEUCLIDEAN\x10\x01\x12\n\n\x06\x43OSINE\x10\x02"z\n\x0f\x44\x61taSplitMethod\x12!\n\x1d\x44\x41TA_SPLIT_METHOD_UNSPECIFIED\x10\x00\x12\n\n\x06RANDOM\x10\x01\x12\n\n\x06\x43USTOM\x10\x02\x12\x0e\n\nSEQUENTIAL\x10\x03\x12\x0c\n\x08NO_SPLIT\x10\x04\x12\x0e\n\nAUTO_SPLIT\x10\x05"W\n\x11LearnRateStrategy\x12#\n\x1fLEARN_RATE_STRATEGY_UNSPECIFIED\x10\x00\x12\x0f\n\x0bLINE_SEARCH\x10\x01\x12\x0c\n\x08\x43ONSTANT\x10\x02"n\n\x14OptimizationStrategy\x12%\n!OPTIMIZATION_STRATEGY_UNSPECIFIED\x10\x00\x12\x1a\n\x16\x42\x41TCH_GRADIENT_DESCENT\x10\x01\x12\x13\n\x0fNORMAL_EQUATION\x10\x02"Z\n\x0fGetModelRequest\x12\x17\n\nproject_id\x18\x01 \x01(\tB\x03\xe0\x41\x02\x12\x17\n\ndataset_id\x18\x02 \x01(\tB\x03\xe0\x41\x02\x12\x15\n\x08model_id\x18\x03 \x01(\tB\x03\xe0\x41\x02"\x91\x01\n\x11PatchModelRequest\x12\x17\n\nproject_id\x18\x01 \x01(\tB\x03\xe0\x41\x02\x12\x17\n\ndataset_id\x18\x02 \x01(\tB\x03\xe0\x41\x02\x12\x15\n\x08model_id\x18\x03 \x01(\tB\x03\xe0\x41\x02\x12\x33\n\x05model\x18\x04 \x01(\x0b\x32\x1f.google.cloud.bigquery.v2.ModelB\x03\xe0\x41\x02"]\n\x12\x44\x65leteModelRequest\x12\x17\n\nproject_id\x18\x01 \x01(\tB\x03\xe0\x41\x02\x12\x17\n\ndataset_id\x18\x02 \x01(\tB\x03\xe0\x41\x02\x12\x15\n\x08model_id\x18\x03 \x01(\tB\x03\xe0\x41\x02"\x8c\x01\n\x11ListModelsRequest\x12\x17\n\nproject_id\x18\x01 \x01(\tB\x03\xe0\x41\x02\x12\x17\n\ndataset_id\x18\x02 \x01(\tB\x03\xe0\x41\x02\x12\x31\n\x0bmax_results\x18\x03 \x01(\x0b\x32\x1c.google.protobuf.UInt32Value\x12\x12\n\npage_token\x18\x04 \x01(\t"^\n\x12ListModelsResponse\x12/\n\x06models\x18\x01 \x03(\x0b\x32\x1f.google.cloud.bigquery.v2.Model\x12\x17\n\x0fnext_page_token\x18\x02 \x01(\t2\xfa\x05\n\x0cModelService\x12y\n\x08GetModel\x12).google.cloud.bigquery.v2.GetModelRequest\x1a\x1f.google.cloud.bigquery.v2.Model"!\xda\x41\x1eproject_id,dataset_id,model_id\x12\x8d\x01\n\nListModels\x12+.google.cloud.bigquery.v2.ListModelsRequest\x1a,.google.cloud.bigquery.v2.ListModelsResponse"$\xda\x41!project_id,dataset_id,max_results\x12\x83\x01\n\nPatchModel\x12+.google.cloud.bigquery.v2.PatchModelRequest\x1a\x1f.google.cloud.bigquery.v2.Model"\'\xda\x41$project_id,dataset_id,model_id,model\x12v\n\x0b\x44\x65leteModel\x12,.google.cloud.bigquery.v2.DeleteModelRequest\x1a\x16.google.protobuf.Empty"!\xda\x41\x1eproject_id,dataset_id,model_id\x1a\xe0\x01\xca\x41\x17\x62igquery.googleapis.com\xd2\x41\xc2\x01https://www.googleapis.com/auth/bigquery,https://www.googleapis.com/auth/bigquery.readonly,https://www.googleapis.com/auth/cloud-platform,https://www.googleapis.com/auth/cloud-platform.read-onlyBl\n\x1c\x63om.google.cloud.bigquery.v2B\nModelProtoZ@google.golang.org/genproto/googleapis/cloud/bigquery/v2;bigqueryb\x06proto3' ), dependencies=[ + google_dot_api_dot_client__pb2.DESCRIPTOR, + google_dot_api_dot_field__behavior__pb2.DESCRIPTOR, + google_dot_cloud_dot_bigquery__v2_dot_proto_dot_encryption__config__pb2.DESCRIPTOR, google_dot_cloud_dot_bigquery__v2_dot_proto_dot_model__reference__pb2.DESCRIPTOR, google_dot_cloud_dot_bigquery__v2_dot_proto_dot_standard__sql__pb2.DESCRIPTOR, google_dot_protobuf_dot_empty__pb2.DESCRIPTOR, google_dot_protobuf_dot_timestamp__pb2.DESCRIPTOR, google_dot_protobuf_dot_wrappers__pb2.DESCRIPTOR, google_dot_api_dot_annotations__pb2.DESCRIPTOR, - google_dot_api_dot_client__pb2.DESCRIPTOR, ], ) @@ -72,8 +78,8 @@ ], containing_type=None, serialized_options=None, - serialized_start=898, - serialized_end=996, + serialized_start=1132, + serialized_end=1230, ) _sym_db.RegisterEnumDescriptor(_MODEL_KMEANSENUMS_KMEANSINITIALIZATIONMETHOD) @@ -113,8 +119,8 @@ ], containing_type=None, serialized_options=None, - serialized_start=6398, - serialized_end=6513, + serialized_start=6632, + serialized_end=6747, ) _sym_db.RegisterEnumDescriptor(_MODEL_MODELTYPE) @@ -144,8 +150,8 @@ ], containing_type=None, serialized_options=None, - serialized_start=6515, - serialized_end=6594, + serialized_start=6749, + serialized_end=6828, ) _sym_db.RegisterEnumDescriptor(_MODEL_LOSSTYPE) @@ -171,8 +177,8 @@ ], containing_type=None, serialized_options=None, - serialized_start=6596, - serialized_end=6668, + serialized_start=6830, + serialized_end=6902, ) _sym_db.RegisterEnumDescriptor(_MODEL_DISTANCETYPE) @@ -207,8 +213,8 @@ ], containing_type=None, serialized_options=None, - serialized_start=6670, - serialized_end=6792, + serialized_start=6904, + serialized_end=7026, ) _sym_db.RegisterEnumDescriptor(_MODEL_DATASPLITMETHOD) @@ -234,8 +240,8 @@ ], containing_type=None, serialized_options=None, - serialized_start=6794, - serialized_end=6881, + serialized_start=7028, + serialized_end=7115, ) _sym_db.RegisterEnumDescriptor(_MODEL_LEARNRATESTRATEGY) @@ -269,8 +275,8 @@ ], containing_type=None, serialized_options=None, - serialized_start=6883, - serialized_end=6993, + serialized_start=7117, + serialized_end=7227, ) _sym_db.RegisterEnumDescriptor(_MODEL_OPTIMIZATIONSTRATEGY) @@ -290,8 +296,8 @@ syntax="proto3", extension_ranges=[], oneofs=[], - serialized_start=883, - serialized_end=996, + serialized_start=1117, + serialized_end=1230, ) _MODEL_REGRESSIONMETRICS = _descriptor.Descriptor( @@ -400,8 +406,8 @@ syntax="proto3", extension_ranges=[], oneofs=[], - serialized_start=999, - serialized_end=1307, + serialized_start=1233, + serialized_end=1541, ) _MODEL_AGGREGATECLASSIFICATIONMETRICS = _descriptor.Descriptor( @@ -546,8 +552,8 @@ syntax="proto3", extension_ranges=[], oneofs=[], - serialized_start=1310, - serialized_end=1677, + serialized_start=1544, + serialized_end=1911, ) _MODEL_BINARYCLASSIFICATIONMETRICS_BINARYCONFUSIONMATRIX = _descriptor.Descriptor( @@ -728,8 +734,8 @@ syntax="proto3", extension_ranges=[], oneofs=[], - serialized_start=1987, - serialized_end=2479, + serialized_start=2221, + serialized_end=2713, ) _MODEL_BINARYCLASSIFICATIONMETRICS = _descriptor.Descriptor( @@ -820,8 +826,8 @@ syntax="proto3", extension_ranges=[], oneofs=[], - serialized_start=1680, - serialized_end=2479, + serialized_start=1914, + serialized_end=2713, ) _MODEL_MULTICLASSCLASSIFICATIONMETRICS_CONFUSIONMATRIX_ENTRY = _descriptor.Descriptor( @@ -876,8 +882,8 @@ syntax="proto3", extension_ranges=[], oneofs=[], - serialized_start=2914, - serialized_end=2995, + serialized_start=3148, + serialized_end=3229, ) _MODEL_MULTICLASSCLASSIFICATIONMETRICS_CONFUSIONMATRIX_ROW = _descriptor.Descriptor( @@ -932,8 +938,8 @@ syntax="proto3", extension_ranges=[], oneofs=[], - serialized_start=2998, - serialized_end=3129, + serialized_start=3232, + serialized_end=3363, ) _MODEL_MULTICLASSCLASSIFICATIONMETRICS_CONFUSIONMATRIX = _descriptor.Descriptor( @@ -991,8 +997,8 @@ syntax="proto3", extension_ranges=[], oneofs=[], - serialized_start=2736, - serialized_end=3129, + serialized_start=2970, + serialized_end=3363, ) _MODEL_MULTICLASSCLASSIFICATIONMETRICS = _descriptor.Descriptor( @@ -1047,8 +1053,8 @@ syntax="proto3", extension_ranges=[], oneofs=[], - serialized_start=2482, - serialized_end=3129, + serialized_start=2716, + serialized_end=3363, ) _MODEL_CLUSTERINGMETRICS_CLUSTER_FEATUREVALUE_CATEGORICALVALUE_CATEGORYCOUNT = _descriptor.Descriptor( @@ -1103,8 +1109,8 @@ syntax="proto3", extension_ranges=[], oneofs=[], - serialized_start=3889, - serialized_end=3966, + serialized_start=4123, + serialized_end=4200, ) _MODEL_CLUSTERINGMETRICS_CLUSTER_FEATUREVALUE_CATEGORICALVALUE = _descriptor.Descriptor( @@ -1143,8 +1149,8 @@ syntax="proto3", extension_ranges=[], oneofs=[], - serialized_start=3741, - serialized_end=3966, + serialized_start=3975, + serialized_end=4200, ) _MODEL_CLUSTERINGMETRICS_CLUSTER_FEATUREVALUE = _descriptor.Descriptor( @@ -1225,8 +1231,8 @@ fields=[], ) ], - serialized_start=3525, - serialized_end=3975, + serialized_start=3759, + serialized_end=4209, ) _MODEL_CLUSTERINGMETRICS_CLUSTER = _descriptor.Descriptor( @@ -1299,8 +1305,8 @@ syntax="proto3", extension_ranges=[], oneofs=[], - serialized_start=3352, - serialized_end=3975, + serialized_start=3586, + serialized_end=4209, ) _MODEL_CLUSTERINGMETRICS = _descriptor.Descriptor( @@ -1373,8 +1379,8 @@ syntax="proto3", extension_ranges=[], oneofs=[], - serialized_start=3132, - serialized_end=3975, + serialized_start=3366, + serialized_end=4209, ) _MODEL_EVALUATIONMETRICS = _descriptor.Descriptor( @@ -1473,8 +1479,8 @@ fields=[], ) ], - serialized_start=3978, - serialized_end=4383, + serialized_start=4212, + serialized_end=4617, ) _MODEL_TRAININGRUN_TRAININGOPTIONS_LABELCLASSWEIGHTSENTRY = _descriptor.Descriptor( @@ -1529,8 +1535,8 @@ syntax="proto3", extension_ranges=[], oneofs=[], - serialized_start=5819, - serialized_end=5875, + serialized_start=6053, + serialized_end=6109, ) _MODEL_TRAININGRUN_TRAININGOPTIONS = _descriptor.Descriptor( @@ -1927,8 +1933,8 @@ syntax="proto3", extension_ranges=[], oneofs=[], - serialized_start=4694, - serialized_end=5875, + serialized_start=4928, + serialized_end=6109, ) _MODEL_TRAININGRUN_ITERATIONRESULT_CLUSTERINFO = _descriptor.Descriptor( @@ -2001,8 +2007,8 @@ syntax="proto3", extension_ranges=[], oneofs=[], - serialized_start=6210, - serialized_end=6349, + serialized_start=6444, + serialized_end=6583, ) _MODEL_TRAININGRUN_ITERATIONRESULT = _descriptor.Descriptor( @@ -2129,8 +2135,8 @@ syntax="proto3", extension_ranges=[], oneofs=[], - serialized_start=5878, - serialized_end=6349, + serialized_start=6112, + serialized_end=6583, ) _MODEL_TRAININGRUN = _descriptor.Descriptor( @@ -2224,8 +2230,8 @@ syntax="proto3", extension_ranges=[], oneofs=[], - serialized_start=4386, - serialized_end=6349, + serialized_start=4620, + serialized_end=6583, ) _MODEL_LABELSENTRY = _descriptor.Descriptor( @@ -2280,8 +2286,8 @@ syntax="proto3", extension_ranges=[], oneofs=[], - serialized_start=6351, - serialized_end=6396, + serialized_start=6585, + serialized_end=6630, ) _MODEL = _descriptor.Descriptor( @@ -2306,7 +2312,7 @@ containing_type=None, is_extension=False, extension_scope=None, - serialized_options=None, + serialized_options=_b("\340A\003"), file=DESCRIPTOR, ), _descriptor.FieldDescriptor( @@ -2324,7 +2330,7 @@ containing_type=None, is_extension=False, extension_scope=None, - serialized_options=None, + serialized_options=_b("\340A\002"), file=DESCRIPTOR, ), _descriptor.FieldDescriptor( @@ -2342,7 +2348,7 @@ containing_type=None, is_extension=False, extension_scope=None, - serialized_options=None, + serialized_options=_b("\340A\003"), file=DESCRIPTOR, ), _descriptor.FieldDescriptor( @@ -2360,7 +2366,7 @@ containing_type=None, is_extension=False, extension_scope=None, - serialized_options=None, + serialized_options=_b("\340A\003"), file=DESCRIPTOR, ), _descriptor.FieldDescriptor( @@ -2378,7 +2384,7 @@ containing_type=None, is_extension=False, extension_scope=None, - serialized_options=None, + serialized_options=_b("\340A\001"), file=DESCRIPTOR, ), _descriptor.FieldDescriptor( @@ -2396,7 +2402,7 @@ containing_type=None, is_extension=False, extension_scope=None, - serialized_options=None, + serialized_options=_b("\340A\001"), file=DESCRIPTOR, ), _descriptor.FieldDescriptor( @@ -2432,7 +2438,7 @@ containing_type=None, is_extension=False, extension_scope=None, - serialized_options=None, + serialized_options=_b("\340A\001"), file=DESCRIPTOR, ), _descriptor.FieldDescriptor( @@ -2450,13 +2456,31 @@ containing_type=None, is_extension=False, extension_scope=None, + serialized_options=_b("\340A\003"), + file=DESCRIPTOR, + ), + _descriptor.FieldDescriptor( + name="encryption_configuration", + full_name="google.cloud.bigquery.v2.Model.encryption_configuration", + index=9, + number=17, + type=11, + cpp_type=10, + label=1, + has_default_value=False, + default_value=None, + message_type=None, + enum_type=None, + containing_type=None, + is_extension=False, + extension_scope=None, serialized_options=None, file=DESCRIPTOR, ), _descriptor.FieldDescriptor( name="model_type", full_name="google.cloud.bigquery.v2.Model.model_type", - index=9, + index=10, number=7, type=14, cpp_type=8, @@ -2468,13 +2492,13 @@ containing_type=None, is_extension=False, extension_scope=None, - serialized_options=None, + serialized_options=_b("\340A\003"), file=DESCRIPTOR, ), _descriptor.FieldDescriptor( name="training_runs", full_name="google.cloud.bigquery.v2.Model.training_runs", - index=10, + index=11, number=9, type=11, cpp_type=10, @@ -2486,13 +2510,13 @@ containing_type=None, is_extension=False, extension_scope=None, - serialized_options=None, + serialized_options=_b("\340A\003"), file=DESCRIPTOR, ), _descriptor.FieldDescriptor( name="feature_columns", full_name="google.cloud.bigquery.v2.Model.feature_columns", - index=11, + index=12, number=10, type=11, cpp_type=10, @@ -2504,13 +2528,13 @@ containing_type=None, is_extension=False, extension_scope=None, - serialized_options=None, + serialized_options=_b("\340A\003"), file=DESCRIPTOR, ), _descriptor.FieldDescriptor( name="label_columns", full_name="google.cloud.bigquery.v2.Model.label_columns", - index=12, + index=13, number=11, type=11, cpp_type=10, @@ -2522,7 +2546,7 @@ containing_type=None, is_extension=False, extension_scope=None, - serialized_options=None, + serialized_options=_b("\340A\003"), file=DESCRIPTOR, ), ], @@ -2551,8 +2575,8 @@ syntax="proto3", extension_ranges=[], oneofs=[], - serialized_start=327, - serialized_end=6993, + serialized_start=416, + serialized_end=7227, ) @@ -2578,7 +2602,7 @@ containing_type=None, is_extension=False, extension_scope=None, - serialized_options=None, + serialized_options=_b("\340A\002"), file=DESCRIPTOR, ), _descriptor.FieldDescriptor( @@ -2596,7 +2620,7 @@ containing_type=None, is_extension=False, extension_scope=None, - serialized_options=None, + serialized_options=_b("\340A\002"), file=DESCRIPTOR, ), _descriptor.FieldDescriptor( @@ -2614,7 +2638,7 @@ containing_type=None, is_extension=False, extension_scope=None, - serialized_options=None, + serialized_options=_b("\340A\002"), file=DESCRIPTOR, ), ], @@ -2626,8 +2650,8 @@ syntax="proto3", extension_ranges=[], oneofs=[], - serialized_start=6995, - serialized_end=7070, + serialized_start=7229, + serialized_end=7319, ) @@ -2653,7 +2677,7 @@ containing_type=None, is_extension=False, extension_scope=None, - serialized_options=None, + serialized_options=_b("\340A\002"), file=DESCRIPTOR, ), _descriptor.FieldDescriptor( @@ -2671,7 +2695,7 @@ containing_type=None, is_extension=False, extension_scope=None, - serialized_options=None, + serialized_options=_b("\340A\002"), file=DESCRIPTOR, ), _descriptor.FieldDescriptor( @@ -2689,7 +2713,7 @@ containing_type=None, is_extension=False, extension_scope=None, - serialized_options=None, + serialized_options=_b("\340A\002"), file=DESCRIPTOR, ), _descriptor.FieldDescriptor( @@ -2707,7 +2731,7 @@ containing_type=None, is_extension=False, extension_scope=None, - serialized_options=None, + serialized_options=_b("\340A\002"), file=DESCRIPTOR, ), ], @@ -2719,8 +2743,8 @@ syntax="proto3", extension_ranges=[], oneofs=[], - serialized_start=7072, - serialized_end=7197, + serialized_start=7322, + serialized_end=7467, ) @@ -2746,7 +2770,7 @@ containing_type=None, is_extension=False, extension_scope=None, - serialized_options=None, + serialized_options=_b("\340A\002"), file=DESCRIPTOR, ), _descriptor.FieldDescriptor( @@ -2764,7 +2788,7 @@ containing_type=None, is_extension=False, extension_scope=None, - serialized_options=None, + serialized_options=_b("\340A\002"), file=DESCRIPTOR, ), _descriptor.FieldDescriptor( @@ -2782,7 +2806,7 @@ containing_type=None, is_extension=False, extension_scope=None, - serialized_options=None, + serialized_options=_b("\340A\002"), file=DESCRIPTOR, ), ], @@ -2794,8 +2818,8 @@ syntax="proto3", extension_ranges=[], oneofs=[], - serialized_start=7199, - serialized_end=7277, + serialized_start=7469, + serialized_end=7562, ) @@ -2821,7 +2845,7 @@ containing_type=None, is_extension=False, extension_scope=None, - serialized_options=None, + serialized_options=_b("\340A\002"), file=DESCRIPTOR, ), _descriptor.FieldDescriptor( @@ -2839,7 +2863,7 @@ containing_type=None, is_extension=False, extension_scope=None, - serialized_options=None, + serialized_options=_b("\340A\002"), file=DESCRIPTOR, ), _descriptor.FieldDescriptor( @@ -2887,8 +2911,8 @@ syntax="proto3", extension_ranges=[], oneofs=[], - serialized_start=7280, - serialized_end=7410, + serialized_start=7565, + serialized_end=7705, ) @@ -2944,8 +2968,8 @@ syntax="proto3", extension_ranges=[], oneofs=[], - serialized_start=7412, - serialized_end=7506, + serialized_start=7707, + serialized_end=7801, ) _MODEL_KMEANSENUMS.containing_type = _MODEL @@ -3231,6 +3255,11 @@ google_dot_cloud_dot_bigquery__v2_dot_proto_dot_model__reference__pb2._MODELREFERENCE ) _MODEL.fields_by_name["labels"].message_type = _MODEL_LABELSENTRY +_MODEL.fields_by_name[ + "encryption_configuration" +].message_type = ( + google_dot_cloud_dot_bigquery__v2_dot_proto_dot_encryption__config__pb2._ENCRYPTIONCONFIGURATION +) _MODEL.fields_by_name["model_type"].enum_type = _MODEL_MODELTYPE _MODEL.fields_by_name["training_runs"].message_type = _MODEL_TRAININGRUN _MODEL.fields_by_name[ @@ -3520,7 +3549,7 @@ category_counts: Counts of all categories for the categorical feature. If there are more than ten categories, we return top ten (by count) and - return one more CategoryCount with category ‘\ *OTHER*\ ’ and + return one more CategoryCount with category "*OTHER*" and count as aggregate counts of remaining categories. """, # @@protoc_insertion_point(class_scope:google.cloud.bigquery.v2.Model.ClusteringMetrics.Cluster.FeatureValue.CategoricalValue) @@ -3789,19 +3818,19 @@ Output only. The time when this model was last modified, in millisecs since the epoch. description: - [Optional] A user-friendly description of this model. + Optional. A user-friendly description of this model. friendly_name: - [Optional] A descriptive name for this model. + Optional. A descriptive name for this model. labels: - [Optional] The labels associated with this model. You can use - these to organize and group your models. Label keys and values - can be no longer than 63 characters, can only contain - lowercase letters, numeric characters, underscores and dashes. + The labels associated with this model. You can use these to + organize and group your models. Label keys and values can be + no longer than 63 characters, can only contain lowercase + letters, numeric characters, underscores and dashes. International characters are allowed. Label values are optional. Label keys must start with a letter and each label in the list must have a different key. expiration_time: - [Optional] The time when this model expires, in milliseconds + Optional. The time when this model expires, in milliseconds since the epoch. If not present, the model will persist indefinitely. Expired models will be deleted and their storage reclaimed. The defaultTableExpirationMs property of the @@ -3810,6 +3839,10 @@ location: Output only. The geographic location where the model resides. This value is inherited from the dataset. + encryption_configuration: + Custom encryption configuration (e.g., Cloud KMS keys). This + shows the encryption configuration of the model data while + stored in BigQuery storage. model_type: Output only. Type of the model resource. training_runs: @@ -3861,11 +3894,11 @@ Attributes: project_id: - Project ID of the requested model. + Required. Project ID of the requested model. dataset_id: - Dataset ID of the requested model. + Required. Dataset ID of the requested model. model_id: - Model ID of the requested model. + Required. Model ID of the requested model. """, # @@protoc_insertion_point(class_scope:google.cloud.bigquery.v2.GetModelRequest) ), @@ -3882,15 +3915,15 @@ Attributes: project_id: - Project ID of the model to patch. + Required. Project ID of the model to patch. dataset_id: - Dataset ID of the model to patch. + Required. Dataset ID of the model to patch. model_id: - Model ID of the model to patch. + Required. Model ID of the model to patch. model: - Patched model. Follows RFC5789 patch semantics. Missing fields - are not updated. To clear a field, explicitly set to default - value. + Required. Patched model. Follows RFC5789 patch semantics. + Missing fields are not updated. To clear a field, explicitly + set to default value. """, # @@protoc_insertion_point(class_scope:google.cloud.bigquery.v2.PatchModelRequest) ), @@ -3907,11 +3940,11 @@ Attributes: project_id: - Project ID of the model to delete. + Required. Project ID of the model to delete. dataset_id: - Dataset ID of the model to delete. + Required. Dataset ID of the model to delete. model_id: - Model ID of the model to delete. + Required. Model ID of the model to delete. """, # @@protoc_insertion_point(class_scope:google.cloud.bigquery.v2.DeleteModelRequest) ), @@ -3928,9 +3961,9 @@ Attributes: project_id: - Project ID of the models to list. + Required. Project ID of the models to list. dataset_id: - Dataset ID of the models to list. + Required. Dataset ID of the models to list. max_results: The maximum number of results to return in a single response page. Leverage the page tokens to iterate through the entire @@ -3969,6 +4002,30 @@ DESCRIPTOR._options = None _MODEL_TRAININGRUN_TRAININGOPTIONS_LABELCLASSWEIGHTSENTRY._options = None _MODEL_LABELSENTRY._options = None +_MODEL.fields_by_name["etag"]._options = None +_MODEL.fields_by_name["model_reference"]._options = None +_MODEL.fields_by_name["creation_time"]._options = None +_MODEL.fields_by_name["last_modified_time"]._options = None +_MODEL.fields_by_name["description"]._options = None +_MODEL.fields_by_name["friendly_name"]._options = None +_MODEL.fields_by_name["expiration_time"]._options = None +_MODEL.fields_by_name["location"]._options = None +_MODEL.fields_by_name["model_type"]._options = None +_MODEL.fields_by_name["training_runs"]._options = None +_MODEL.fields_by_name["feature_columns"]._options = None +_MODEL.fields_by_name["label_columns"]._options = None +_GETMODELREQUEST.fields_by_name["project_id"]._options = None +_GETMODELREQUEST.fields_by_name["dataset_id"]._options = None +_GETMODELREQUEST.fields_by_name["model_id"]._options = None +_PATCHMODELREQUEST.fields_by_name["project_id"]._options = None +_PATCHMODELREQUEST.fields_by_name["dataset_id"]._options = None +_PATCHMODELREQUEST.fields_by_name["model_id"]._options = None +_PATCHMODELREQUEST.fields_by_name["model"]._options = None +_DELETEMODELREQUEST.fields_by_name["project_id"]._options = None +_DELETEMODELREQUEST.fields_by_name["dataset_id"]._options = None +_DELETEMODELREQUEST.fields_by_name["model_id"]._options = None +_LISTMODELSREQUEST.fields_by_name["project_id"]._options = None +_LISTMODELSREQUEST.fields_by_name["dataset_id"]._options = None _MODELSERVICE = _descriptor.ServiceDescriptor( name="ModelService", @@ -3978,8 +4035,8 @@ serialized_options=_b( "\312A\027bigquery.googleapis.com\322A\302\001https://www.googleapis.com/auth/bigquery,https://www.googleapis.com/auth/bigquery.readonly,https://www.googleapis.com/auth/cloud-platform,https://www.googleapis.com/auth/cloud-platform.read-only" ), - serialized_start=7509, - serialized_end=8128, + serialized_start=7804, + serialized_end=8566, methods=[ _descriptor.MethodDescriptor( name="GetModel", @@ -3988,7 +4045,7 @@ containing_service=None, input_type=_GETMODELREQUEST, output_type=_MODEL, - serialized_options=None, + serialized_options=_b("\332A\036project_id,dataset_id,model_id"), ), _descriptor.MethodDescriptor( name="ListModels", @@ -3997,7 +4054,7 @@ containing_service=None, input_type=_LISTMODELSREQUEST, output_type=_LISTMODELSRESPONSE, - serialized_options=None, + serialized_options=_b("\332A!project_id,dataset_id,max_results"), ), _descriptor.MethodDescriptor( name="PatchModel", @@ -4006,7 +4063,7 @@ containing_service=None, input_type=_PATCHMODELREQUEST, output_type=_MODEL, - serialized_options=None, + serialized_options=_b("\332A$project_id,dataset_id,model_id,model"), ), _descriptor.MethodDescriptor( name="DeleteModel", @@ -4015,7 +4072,7 @@ containing_service=None, input_type=_DELETEMODELREQUEST, output_type=google_dot_protobuf_dot_empty__pb2._EMPTY, - serialized_options=None, + serialized_options=_b("\332A\036project_id,dataset_id,model_id"), ), ], ) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery_v2/proto/model_reference.proto b/packages/google-cloud-bigquery/google/cloud/bigquery_v2/proto/model_reference.proto index f436659abd4f..fadd175146b0 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery_v2/proto/model_reference.proto +++ b/packages/google-cloud-bigquery/google/cloud/bigquery_v2/proto/model_reference.proto @@ -17,6 +17,7 @@ syntax = "proto3"; package google.cloud.bigquery.v2; +import "google/api/field_behavior.proto"; import "google/api/annotations.proto"; option go_package = "google.golang.org/genproto/googleapis/cloud/bigquery/v2;bigquery"; @@ -25,14 +26,14 @@ option java_package = "com.google.cloud.bigquery.v2"; // Id path of a model. message ModelReference { - // [Required] The ID of the project containing this model. - string project_id = 1; + // Required. The ID of the project containing this model. + string project_id = 1 [(google.api.field_behavior) = REQUIRED]; - // [Required] The ID of the dataset containing this model. - string dataset_id = 2; + // Required. The ID of the dataset containing this model. + string dataset_id = 2 [(google.api.field_behavior) = REQUIRED]; - // [Required] The ID of the model. The ID must contain only + // Required. The ID of the model. The ID must contain only // letters (a-z, A-Z), numbers (0-9), or underscores (_). The maximum // length is 1,024 characters. - string model_id = 3; + string model_id = 3 [(google.api.field_behavior) = REQUIRED]; } diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery_v2/proto/model_reference_pb2.py b/packages/google-cloud-bigquery/google/cloud/bigquery_v2/proto/model_reference_pb2.py index 94f6116b5a44..01e6e29522a5 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery_v2/proto/model_reference_pb2.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery_v2/proto/model_reference_pb2.py @@ -15,6 +15,7 @@ _sym_db = _symbol_database.Default() +from google.api import field_behavior_pb2 as google_dot_api_dot_field__behavior__pb2 from google.api import annotations_pb2 as google_dot_api_dot_annotations__pb2 @@ -26,9 +27,12 @@ "\n\034com.google.cloud.bigquery.v2B\023ModelReferenceProtoZ@google.golang.org/genproto/googleapis/cloud/bigquery/v2;bigquery" ), serialized_pb=_b( - '\n4google/cloud/bigquery_v2/proto/model_reference.proto\x12\x18google.cloud.bigquery.v2\x1a\x1cgoogle/api/annotations.proto"J\n\x0eModelReference\x12\x12\n\nproject_id\x18\x01 \x01(\t\x12\x12\n\ndataset_id\x18\x02 \x01(\t\x12\x10\n\x08model_id\x18\x03 \x01(\tBu\n\x1c\x63om.google.cloud.bigquery.v2B\x13ModelReferenceProtoZ@google.golang.org/genproto/googleapis/cloud/bigquery/v2;bigqueryb\x06proto3' + '\n4google/cloud/bigquery_v2/proto/model_reference.proto\x12\x18google.cloud.bigquery.v2\x1a\x1fgoogle/api/field_behavior.proto\x1a\x1cgoogle/api/annotations.proto"Y\n\x0eModelReference\x12\x17\n\nproject_id\x18\x01 \x01(\tB\x03\xe0\x41\x02\x12\x17\n\ndataset_id\x18\x02 \x01(\tB\x03\xe0\x41\x02\x12\x15\n\x08model_id\x18\x03 \x01(\tB\x03\xe0\x41\x02\x42u\n\x1c\x63om.google.cloud.bigquery.v2B\x13ModelReferenceProtoZ@google.golang.org/genproto/googleapis/cloud/bigquery/v2;bigqueryb\x06proto3' ), - dependencies=[google_dot_api_dot_annotations__pb2.DESCRIPTOR], + dependencies=[ + google_dot_api_dot_field__behavior__pb2.DESCRIPTOR, + google_dot_api_dot_annotations__pb2.DESCRIPTOR, + ], ) @@ -54,7 +58,7 @@ containing_type=None, is_extension=False, extension_scope=None, - serialized_options=None, + serialized_options=_b("\340A\002"), file=DESCRIPTOR, ), _descriptor.FieldDescriptor( @@ -72,7 +76,7 @@ containing_type=None, is_extension=False, extension_scope=None, - serialized_options=None, + serialized_options=_b("\340A\002"), file=DESCRIPTOR, ), _descriptor.FieldDescriptor( @@ -90,7 +94,7 @@ containing_type=None, is_extension=False, extension_scope=None, - serialized_options=None, + serialized_options=_b("\340A\002"), file=DESCRIPTOR, ), ], @@ -102,8 +106,8 @@ syntax="proto3", extension_ranges=[], oneofs=[], - serialized_start=112, - serialized_end=186, + serialized_start=145, + serialized_end=234, ) DESCRIPTOR.message_types_by_name["ModelReference"] = _MODELREFERENCE @@ -120,11 +124,11 @@ Attributes: project_id: - [Required] The ID of the project containing this model. + Required. The ID of the project containing this model. dataset_id: - [Required] The ID of the dataset containing this model. + Required. The ID of the dataset containing this model. model_id: - [Required] The ID of the model. The ID must contain only + Required. The ID of the model. The ID must contain only letters (a-z, A-Z), numbers (0-9), or underscores (\_). The maximum length is 1,024 characters. """, @@ -135,4 +139,7 @@ DESCRIPTOR._options = None +_MODELREFERENCE.fields_by_name["project_id"]._options = None +_MODELREFERENCE.fields_by_name["dataset_id"]._options = None +_MODELREFERENCE.fields_by_name["model_id"]._options = None # @@protoc_insertion_point(module_scope) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery_v2/proto/standard_sql.proto b/packages/google-cloud-bigquery/google/cloud/bigquery_v2/proto/standard_sql.proto index 98173092ff71..ff69dfc4eb30 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery_v2/proto/standard_sql.proto +++ b/packages/google-cloud-bigquery/google/cloud/bigquery_v2/proto/standard_sql.proto @@ -17,6 +17,7 @@ syntax = "proto3"; package google.cloud.bigquery.v2; +import "google/api/field_behavior.proto"; import "google/api/annotations.proto"; option go_package = "google.golang.org/genproto/googleapis/cloud/bigquery/v2;bigquery"; @@ -82,7 +83,7 @@ message StandardSqlDataType { // Required. The top level type of this field. // Can be any standard SQL data type (e.g., "INT64", "DATE", "ARRAY"). - TypeKind type_kind = 1; + TypeKind type_kind = 1 [(google.api.field_behavior) = REQUIRED]; oneof sub_type { // The type of the array's elements, if type_kind = "ARRAY". @@ -96,12 +97,12 @@ message StandardSqlDataType { // A field or a column. message StandardSqlField { // Optional. The name of this field. Can be absent for struct fields. - string name = 1; + string name = 1 [(google.api.field_behavior) = OPTIONAL]; // Optional. The type of this parameter. Absent if not explicitly // specified (e.g., CREATE FUNCTION statement can omit the return type; // in this case the output parameter does not have this "type" field). - StandardSqlDataType type = 2; + StandardSqlDataType type = 2 [(google.api.field_behavior) = OPTIONAL]; } message StandardSqlStructType { diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery_v2/proto/standard_sql_pb2.py b/packages/google-cloud-bigquery/google/cloud/bigquery_v2/proto/standard_sql_pb2.py index 91f1554f7f80..19ca829a4061 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery_v2/proto/standard_sql_pb2.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery_v2/proto/standard_sql_pb2.py @@ -15,6 +15,7 @@ _sym_db = _symbol_database.Default() +from google.api import field_behavior_pb2 as google_dot_api_dot_field__behavior__pb2 from google.api import annotations_pb2 as google_dot_api_dot_annotations__pb2 @@ -26,9 +27,12 @@ "\n\034com.google.cloud.bigquery.v2B\020StandardSqlProtoZ@google.golang.org/genproto/googleapis/cloud/bigquery/v2;bigquery" ), serialized_pb=_b( - '\n1google/cloud/bigquery_v2/proto/standard_sql.proto\x12\x18google.cloud.bigquery.v2\x1a\x1cgoogle/api/annotations.proto"\xc6\x03\n\x13StandardSqlDataType\x12I\n\ttype_kind\x18\x01 \x01(\x0e\x32\x36.google.cloud.bigquery.v2.StandardSqlDataType.TypeKind\x12K\n\x12\x61rray_element_type\x18\x02 \x01(\x0b\x32-.google.cloud.bigquery.v2.StandardSqlDataTypeH\x00\x12\x46\n\x0bstruct_type\x18\x03 \x01(\x0b\x32/.google.cloud.bigquery.v2.StandardSqlStructTypeH\x00"\xc2\x01\n\x08TypeKind\x12\x19\n\x15TYPE_KIND_UNSPECIFIED\x10\x00\x12\t\n\x05INT64\x10\x02\x12\x08\n\x04\x42OOL\x10\x05\x12\x0b\n\x07\x46LOAT64\x10\x07\x12\n\n\x06STRING\x10\x08\x12\t\n\x05\x42YTES\x10\t\x12\r\n\tTIMESTAMP\x10\x13\x12\x08\n\x04\x44\x41TE\x10\n\x12\x08\n\x04TIME\x10\x14\x12\x0c\n\x08\x44\x41TETIME\x10\x15\x12\r\n\tGEOGRAPHY\x10\x16\x12\x0b\n\x07NUMERIC\x10\x17\x12\t\n\x05\x41RRAY\x10\x10\x12\n\n\x06STRUCT\x10\x11\x42\n\n\x08sub_type"]\n\x10StandardSqlField\x12\x0c\n\x04name\x18\x01 \x01(\t\x12;\n\x04type\x18\x02 \x01(\x0b\x32-.google.cloud.bigquery.v2.StandardSqlDataType"S\n\x15StandardSqlStructType\x12:\n\x06\x66ields\x18\x01 \x03(\x0b\x32*.google.cloud.bigquery.v2.StandardSqlFieldBr\n\x1c\x63om.google.cloud.bigquery.v2B\x10StandardSqlProtoZ@google.golang.org/genproto/googleapis/cloud/bigquery/v2;bigqueryb\x06proto3' + '\n1google/cloud/bigquery_v2/proto/standard_sql.proto\x12\x18google.cloud.bigquery.v2\x1a\x1fgoogle/api/field_behavior.proto\x1a\x1cgoogle/api/annotations.proto"\xcb\x03\n\x13StandardSqlDataType\x12N\n\ttype_kind\x18\x01 \x01(\x0e\x32\x36.google.cloud.bigquery.v2.StandardSqlDataType.TypeKindB\x03\xe0\x41\x02\x12K\n\x12\x61rray_element_type\x18\x02 \x01(\x0b\x32-.google.cloud.bigquery.v2.StandardSqlDataTypeH\x00\x12\x46\n\x0bstruct_type\x18\x03 \x01(\x0b\x32/.google.cloud.bigquery.v2.StandardSqlStructTypeH\x00"\xc2\x01\n\x08TypeKind\x12\x19\n\x15TYPE_KIND_UNSPECIFIED\x10\x00\x12\t\n\x05INT64\x10\x02\x12\x08\n\x04\x42OOL\x10\x05\x12\x0b\n\x07\x46LOAT64\x10\x07\x12\n\n\x06STRING\x10\x08\x12\t\n\x05\x42YTES\x10\t\x12\r\n\tTIMESTAMP\x10\x13\x12\x08\n\x04\x44\x41TE\x10\n\x12\x08\n\x04TIME\x10\x14\x12\x0c\n\x08\x44\x41TETIME\x10\x15\x12\r\n\tGEOGRAPHY\x10\x16\x12\x0b\n\x07NUMERIC\x10\x17\x12\t\n\x05\x41RRAY\x10\x10\x12\n\n\x06STRUCT\x10\x11\x42\n\n\x08sub_type"g\n\x10StandardSqlField\x12\x11\n\x04name\x18\x01 \x01(\tB\x03\xe0\x41\x01\x12@\n\x04type\x18\x02 \x01(\x0b\x32-.google.cloud.bigquery.v2.StandardSqlDataTypeB\x03\xe0\x41\x01"S\n\x15StandardSqlStructType\x12:\n\x06\x66ields\x18\x01 \x03(\x0b\x32*.google.cloud.bigquery.v2.StandardSqlFieldBr\n\x1c\x63om.google.cloud.bigquery.v2B\x10StandardSqlProtoZ@google.golang.org/genproto/googleapis/cloud/bigquery/v2;bigqueryb\x06proto3' ), - dependencies=[google_dot_api_dot_annotations__pb2.DESCRIPTOR], + dependencies=[ + google_dot_api_dot_field__behavior__pb2.DESCRIPTOR, + google_dot_api_dot_annotations__pb2.DESCRIPTOR, + ], ) @@ -87,8 +91,8 @@ ], containing_type=None, serialized_options=None, - serialized_start=358, - serialized_end=552, + serialized_start=396, + serialized_end=590, ) _sym_db.RegisterEnumDescriptor(_STANDARDSQLDATATYPE_TYPEKIND) @@ -115,7 +119,7 @@ containing_type=None, is_extension=False, extension_scope=None, - serialized_options=None, + serialized_options=_b("\340A\002"), file=DESCRIPTOR, ), _descriptor.FieldDescriptor( @@ -171,8 +175,8 @@ fields=[], ) ], - serialized_start=110, - serialized_end=564, + serialized_start=143, + serialized_end=602, ) @@ -198,7 +202,7 @@ containing_type=None, is_extension=False, extension_scope=None, - serialized_options=None, + serialized_options=_b("\340A\001"), file=DESCRIPTOR, ), _descriptor.FieldDescriptor( @@ -216,7 +220,7 @@ containing_type=None, is_extension=False, extension_scope=None, - serialized_options=None, + serialized_options=_b("\340A\001"), file=DESCRIPTOR, ), ], @@ -228,8 +232,8 @@ syntax="proto3", extension_ranges=[], oneofs=[], - serialized_start=566, - serialized_end=659, + serialized_start=604, + serialized_end=707, ) @@ -267,8 +271,8 @@ syntax="proto3", extension_ranges=[], oneofs=[], - serialized_start=661, - serialized_end=744, + serialized_start=709, + serialized_end=792, ) _STANDARDSQLDATATYPE.fields_by_name[ @@ -363,4 +367,7 @@ DESCRIPTOR._options = None +_STANDARDSQLDATATYPE.fields_by_name["type_kind"]._options = None +_STANDARDSQLFIELD.fields_by_name["name"]._options = None +_STANDARDSQLFIELD.fields_by_name["type"]._options = None # @@protoc_insertion_point(module_scope) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery_v2/types.py b/packages/google-cloud-bigquery/google/cloud/bigquery_v2/types.py index 36fc2d6bfbd2..da9287c07824 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery_v2/types.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery_v2/types.py @@ -20,6 +20,7 @@ from google.api_core.protobuf_helpers import get_messages +from google.cloud.bigquery_v2.proto import encryption_config_pb2 from google.cloud.bigquery_v2.proto import model_pb2 from google.cloud.bigquery_v2.proto import model_reference_pb2 from google.cloud.bigquery_v2.proto import standard_sql_pb2 @@ -30,7 +31,12 @@ _shared_modules = [empty_pb2, timestamp_pb2, wrappers_pb2] -_local_modules = [model_pb2, model_reference_pb2, standard_sql_pb2] +_local_modules = [ + encryption_config_pb2, + model_pb2, + model_reference_pb2, + standard_sql_pb2, +] names = [] diff --git a/packages/google-cloud-bigquery/synth.metadata b/packages/google-cloud-bigquery/synth.metadata index b5cce0083f8e..b85a00155ee8 100644 --- a/packages/google-cloud-bigquery/synth.metadata +++ b/packages/google-cloud-bigquery/synth.metadata @@ -1,19 +1,19 @@ { - "updateTime": "2019-08-06T12:13:25.893023Z", + "updateTime": "2019-09-12T14:51:08.578469Z", "sources": [ { "generator": { "name": "artman", - "version": "0.32.1", - "dockerImage": "googleapis/artman@sha256:a684d40ba9a4e15946f5f2ca6b4bd9fe301192f522e9de4fff622118775f309b" + "version": "0.36.2", + "dockerImage": "googleapis/artman@sha256:0e6f3a668cd68afc768ecbe08817cf6e56a0e64fcbdb1c58c3b97492d12418a1" } }, { "git": { "name": "googleapis", "remote": "https://github.com/googleapis/googleapis.git", - "sha": "e699b0cba64ffddfae39633417180f1f65875896", - "internalRef": "261759677" + "sha": "1cb29d0fd49437d8e5d7de327e258739e998f01c", + "internalRef": "268598527" } } ], diff --git a/packages/google-cloud-bigquery/synth.py b/packages/google-cloud-bigquery/synth.py index a8370fd4e96a..a20426d3910f 100644 --- a/packages/google-cloud-bigquery/synth.py +++ b/packages/google-cloud-bigquery/synth.py @@ -35,6 +35,7 @@ library / "google/cloud/bigquery_v2/gapic/enums.py", library / "google/cloud/bigquery_v2/types.py", library / "google/cloud/bigquery_v2/proto/location*", + library / "google/cloud/bigquery_v2/proto/encryption_config*", library / "google/cloud/bigquery_v2/proto/model*", library / "google/cloud/bigquery_v2/proto/standard_sql*", ], @@ -46,6 +47,11 @@ '"""Attributes:', '"""Protocol buffer.\n\n Attributes:', ) +s.replace( + "google/cloud/bigquery_v2/proto/encryption_config_pb2.py", + '"""Attributes:', + '"""Encryption configuration.\n\n Attributes:', +) # Remove non-ascii characters from docstrings for Python 2.7. # Format quoted strings as plain text. From 3974431b9ca79120a2163d68b2b753c2ef2e0775 Mon Sep 17 00:00:00 2001 From: Bu Sun Kim <8822365+busunkim96@users.noreply.github.com> Date: Fri, 13 Sep 2019 11:21:17 -0700 Subject: [PATCH 0669/2016] Change default endpoint to bigquery.googleapis.com (#9213) --- packages/google-cloud-bigquery/google/cloud/bigquery/_http.py | 2 +- .../google-cloud-bigquery/google/cloud/bigquery/client.py | 2 +- packages/google-cloud-bigquery/tests/unit/test_client.py | 4 ++-- packages/google-cloud-bigquery/tests/unit/test_job.py | 4 ++-- 4 files changed, 6 insertions(+), 6 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/_http.py b/packages/google-cloud-bigquery/google/cloud/bigquery/_http.py index 5dd660ea0b8d..dd0d9d01c9de 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/_http.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/_http.py @@ -29,7 +29,7 @@ class Connection(_http.JSONConnection): :param client_info: (Optional) instance used to generate user agent. """ - DEFAULT_API_ENDPOINT = "https://www.googleapis.com" + DEFAULT_API_ENDPOINT = "https://bigquery.googleapis.com" def __init__(self, client, client_info=None, api_endpoint=DEFAULT_API_ENDPOINT): super(Connection, self).__init__(client, client_info) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py index 9df8b87eb8dd..f14444c0c48a 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py @@ -78,7 +78,7 @@ _MAX_MULTIPART_SIZE = 5 * 1024 * 1024 _DEFAULT_NUM_RETRIES = 6 _BASE_UPLOAD_TEMPLATE = ( - u"https://www.googleapis.com/upload/bigquery/v2/projects/" + u"https://bigquery.googleapis.com/upload/bigquery/v2/projects/" u"{project}/jobs?uploadType=" ) _MULTIPART_URL_TEMPLATE = _BASE_UPLOAD_TEMPLATE + u"multipart" diff --git a/packages/google-cloud-bigquery/tests/unit/test_client.py b/packages/google-cloud-bigquery/tests/unit/test_client.py index 46666347e331..ea4b114358a9 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_client.py +++ b/packages/google-cloud-bigquery/tests/unit/test_client.py @@ -3127,7 +3127,7 @@ def _initiate_resumable_upload_helper(self, num_retries=None): # Check the returned values. self.assertIsInstance(upload, ResumableUpload) upload_url = ( - "https://www.googleapis.com/upload/bigquery/v2/projects/" + "https://bigquery.googleapis.com/upload/bigquery/v2/projects/" + self.PROJECT + "/jobs?uploadType=resumable" ) @@ -3196,7 +3196,7 @@ def _do_multipart_upload_success_helper(self, get_boundary, num_retries=None): get_boundary.assert_called_once_with() upload_url = ( - "https://www.googleapis.com/upload/bigquery/v2/projects/" + "https://bigquery.googleapis.com/upload/bigquery/v2/projects/" + self.PROJECT + "/jobs?uploadType=multipart" ) diff --git a/packages/google-cloud-bigquery/tests/unit/test_job.py b/packages/google-cloud-bigquery/tests/unit/test_job.py index 98090a5257fd..b34184f00cd9 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_job.py +++ b/packages/google-cloud-bigquery/tests/unit/test_job.py @@ -73,7 +73,7 @@ def _make_job_resource( started=False, ended=False, etag="abc-def-hjk", - endpoint="https://www.googleapis.com", + endpoint="https://bigquery.googleapis.com", job_type="load", job_id="a-random-id", project_id="some-project", @@ -1022,7 +1022,7 @@ class _Base(object): from google.cloud.bigquery.dataset import DatasetReference from google.cloud.bigquery.table import TableReference - ENDPOINT = "https://www.googleapis.com" + ENDPOINT = "https://bigquery.googleapis.com" PROJECT = "project" SOURCE1 = "http://example.com/source1.csv" DS_ID = "dataset_id" From 02ac2156461044f3f43727f2de8451abfba33f1a Mon Sep 17 00:00:00 2001 From: Bu Sun Kim <8822365+busunkim96@users.noreply.github.com> Date: Mon, 16 Sep 2019 14:22:01 -0700 Subject: [PATCH 0670/2016] Release bigquery 1.20.0 (#9227) --- packages/google-cloud-bigquery/CHANGELOG.md | 26 +++++++++++++++++++++ packages/google-cloud-bigquery/setup.py | 2 +- 2 files changed, 27 insertions(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/CHANGELOG.md b/packages/google-cloud-bigquery/CHANGELOG.md index c938c05b7f13..9170d004ecc7 100644 --- a/packages/google-cloud-bigquery/CHANGELOG.md +++ b/packages/google-cloud-bigquery/CHANGELOG.md @@ -4,6 +4,32 @@ [1]: https://pypi.org/project/google-cloud-bigquery/#history +## 1.20.0 + +09-13-2019 11:22 PDT + + +### Implementation Changes +- Change default endpoint to bigquery.googleapis.com ([#9213](https://github.com/googleapis/google-cloud-python/pull/9213)) +- Change the default value of Cursor instances' `arraysize` attribute to None ([#9199](https://github.com/googleapis/google-cloud-python/pull/9199)) +- Deprecate automatic schema conversion. ([#9176](https://github.com/googleapis/google-cloud-python/pull/9176)) +- Fix `list_rows()` max results with BQ storage client ([#9178](https://github.com/googleapis/google-cloud-python/pull/9178)) + +### New Features +- Add `Model.encryption_config`. (via synth) ([#9214](https://github.com/googleapis/google-cloud-python/pull/9214)) +- Add `Client.insert_rows_from_dataframe()` method ([#9162](https://github.com/googleapis/google-cloud-python/pull/9162)) +- Add support for array parameters to `Cursor.execute()`. ([#9189](https://github.com/googleapis/google-cloud-python/pull/9189)) +- Add support for project IDs with org prefix to `Table.from_string()` factory. ([#9161](https://github.com/googleapis/google-cloud-python/pull/9161)) +- Add `--max_results` option to Jupyter magics ([#9169](https://github.com/googleapis/google-cloud-python/pull/9169)) +- Autofetch table schema on load if not provided. ([#9108](https://github.com/googleapis/google-cloud-python/pull/9108)) +- Add `max_results` parameter to `QueryJob.result()`. ([#9167](https://github.com/googleapis/google-cloud-python/pull/9167)) + +### Documentation +- Fix doc link. ([#9200](https://github.com/googleapis/google-cloud-python/pull/9200)) + +### Internal / Testing Changes +- Revert "Disable failing snippets test ([#9156](https://github.com/googleapis/google-cloud-python/pull/9156))." ([#9220](https://github.com/googleapis/google-cloud-python/pull/9220)) + ## 1.19.0 09-03-2019 14:33 PDT diff --git a/packages/google-cloud-bigquery/setup.py b/packages/google-cloud-bigquery/setup.py index 42f59516fc13..c779b688f552 100644 --- a/packages/google-cloud-bigquery/setup.py +++ b/packages/google-cloud-bigquery/setup.py @@ -22,7 +22,7 @@ name = "google-cloud-bigquery" description = "Google BigQuery API client library" -version = "1.19.0" +version = "1.20.0" # Should be one of: # 'Development Status :: 3 - Alpha' # 'Development Status :: 4 - Beta' From d1f9aaabf15a389ca14ec5889d42b9e6f7232333 Mon Sep 17 00:00:00 2001 From: Christopher Wilcox Date: Thu, 19 Sep 2019 12:36:19 -0700 Subject: [PATCH 0671/2016] BigQuery: Restrict version range of google-resumable-media. (#9243) * Restrict version range of google-resumable-media Co-Authored-By: Tres Seaver --- packages/google-cloud-bigquery/setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/setup.py b/packages/google-cloud-bigquery/setup.py index c779b688f552..897a7eac7f36 100644 --- a/packages/google-cloud-bigquery/setup.py +++ b/packages/google-cloud-bigquery/setup.py @@ -31,7 +31,7 @@ dependencies = [ 'enum34; python_version < "3.4"', "google-cloud-core >= 1.0.3, < 2.0dev", - "google-resumable-media >= 0.3.1", + "google-resumable-media >= 0.3.1, != 0.4.0, < 0.5.0dev", "protobuf >= 3.6.0", ] extras = { From 6e5ee16a0e4e2a9c5865cfaa3955618f15b143ce Mon Sep 17 00:00:00 2001 From: Shubha Rajan Date: Mon, 23 Sep 2019 01:03:55 -0700 Subject: [PATCH 0672/2016] BigQuery: Add ability to pass in a table ID instead of a query to the %%bigquery magic. (#9170) * cell magic accepts table_ids instead of queries added default patch to unit tests * simplified error handling to return from exception * added comment and updated to use strip instead of rstrip * blacken/lint * reformatted return statement * removed trailing whitespace --- .../google/cloud/bigquery/magics.py | 41 +++++-- .../tests/unit/test_magics.py | 108 ++++++++++++++++++ 2 files changed, 138 insertions(+), 11 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/magics.py b/packages/google-cloud-bigquery/google/cloud/bigquery/magics.py index 4c93d1307a42..c238bb50317a 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/magics.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/magics.py @@ -129,6 +129,7 @@ from __future__ import print_function +import re import ast import sys import time @@ -266,6 +267,15 @@ def default_query_job_config(self, value): context = Context() +def _print_error(error, destination_var=None): + if destination_var: + print( + "Could not save output to variable '{}'.".format(destination_var), + file=sys.stderr, + ) + print("\nERROR:\n", error, file=sys.stderr) + + def _run_query(client, query, job_config=None): """Runs a query while printing status updates @@ -434,6 +444,24 @@ def _cell_magic(line, query): else: max_results = None + query = query.strip() + + # Any query that does not contain whitespace (aside from leading and trailing whitespace) + # is assumed to be a table id + if not re.search(r"\s", query): + try: + rows = client.list_rows(query, max_results=max_results) + except Exception as ex: + _print_error(str(ex), args.destination_var) + return + + result = rows.to_dataframe(bqstorage_client=bqstorage_client) + if args.destination_var: + IPython.get_ipython().push({args.destination_var: result}) + return + else: + return result + job_config = bigquery.job.QueryJobConfig() job_config.query_parameters = params job_config.use_legacy_sql = args.use_legacy_sql @@ -445,24 +473,15 @@ def _cell_magic(line, query): value = int(args.maximum_bytes_billed) job_config.maximum_bytes_billed = value - error = None try: query_job = _run_query(client, query, job_config=job_config) except Exception as ex: - error = str(ex) + _print_error(str(ex), args.destination_var) + return if not args.verbose: display.clear_output() - if error: - if args.destination_var: - print( - "Could not save output to variable '{}'.".format(args.destination_var), - file=sys.stderr, - ) - print("\nERROR:\n", error, file=sys.stderr) - return - if args.dry_run and args.destination_var: IPython.get_ipython().push({args.destination_var: query_job}) return diff --git a/packages/google-cloud-bigquery/tests/unit/test_magics.py b/packages/google-cloud-bigquery/tests/unit/test_magics.py index ed748d2dd5e3..ec642ff384e1 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_magics.py +++ b/packages/google-cloud-bigquery/tests/unit/test_magics.py @@ -696,6 +696,114 @@ def test_bigquery_magic_w_max_results_valid_calls_queryjob_result(): query_job_mock.result.assert_called_with(max_results=5) +def test_bigquery_magic_w_table_id_invalid(): + ip = IPython.get_ipython() + ip.extension_manager.load_extension("google.cloud.bigquery") + magics.context._project = None + + credentials_mock = mock.create_autospec( + google.auth.credentials.Credentials, instance=True + ) + default_patch = mock.patch( + "google.auth.default", return_value=(credentials_mock, "general-project") + ) + + list_rows_patch = mock.patch( + "google.cloud.bigquery.magics.bigquery.Client.list_rows", + autospec=True, + side_effect=exceptions.BadRequest("Not a valid table ID"), + ) + + table_id = "not-a-real-table" + + with list_rows_patch, default_patch, io.capture_output() as captured_io: + ip.run_cell_magic("bigquery", "df", table_id) + + output = captured_io.stderr + assert "Could not save output to variable" in output + assert "400 Not a valid table ID" in output + assert "Traceback (most recent call last)" not in output + + +@pytest.mark.usefixtures("ipython_interactive") +def test_bigquery_magic_w_table_id_and_destination_var(): + ip = IPython.get_ipython() + ip.extension_manager.load_extension("google.cloud.bigquery") + magics.context._project = None + + credentials_mock = mock.create_autospec( + google.auth.credentials.Credentials, instance=True + ) + default_patch = mock.patch( + "google.auth.default", return_value=(credentials_mock, "general-project") + ) + + row_iterator_mock = mock.create_autospec( + google.cloud.bigquery.table.RowIterator, instance=True + ) + + client_patch = mock.patch( + "google.cloud.bigquery.magics.bigquery.Client", autospec=True + ) + + table_id = "bigquery-public-data.samples.shakespeare" + result = pandas.DataFrame([17], columns=["num"]) + + with client_patch as client_mock, default_patch: + client_mock().list_rows.return_value = row_iterator_mock + row_iterator_mock.to_dataframe.return_value = result + + ip.run_cell_magic("bigquery", "df", table_id) + + assert "df" in ip.user_ns + df = ip.user_ns["df"] + + assert isinstance(df, pandas.DataFrame) + + +@pytest.mark.usefixtures("ipython_interactive") +def test_bigquery_magic_w_table_id_and_bqstorage_client(): + ip = IPython.get_ipython() + ip.extension_manager.load_extension("google.cloud.bigquery") + magics.context._project = None + + credentials_mock = mock.create_autospec( + google.auth.credentials.Credentials, instance=True + ) + default_patch = mock.patch( + "google.auth.default", return_value=(credentials_mock, "general-project") + ) + + row_iterator_mock = mock.create_autospec( + google.cloud.bigquery.table.RowIterator, instance=True + ) + + client_patch = mock.patch( + "google.cloud.bigquery.magics.bigquery.Client", autospec=True + ) + + bqstorage_mock = mock.create_autospec( + bigquery_storage_v1beta1.BigQueryStorageClient + ) + bqstorage_instance_mock = mock.create_autospec( + bigquery_storage_v1beta1.BigQueryStorageClient, instance=True + ) + bqstorage_mock.return_value = bqstorage_instance_mock + bqstorage_client_patch = mock.patch( + "google.cloud.bigquery_storage_v1beta1.BigQueryStorageClient", bqstorage_mock + ) + + table_id = "bigquery-public-data.samples.shakespeare" + + with default_patch, client_patch as client_mock, bqstorage_client_patch: + client_mock().list_rows.return_value = row_iterator_mock + + ip.run_cell_magic("bigquery", "--use_bqstorage_api --max_results=5", table_id) + row_iterator_mock.to_dataframe.assert_called_once_with( + bqstorage_client=bqstorage_instance_mock + ) + + @pytest.mark.usefixtures("ipython_interactive") def test_bigquery_magic_dryrun_option_sets_job_config(): ip = IPython.get_ipython() From 2aa273f2702426d4303d345e3818432e59df9192 Mon Sep 17 00:00:00 2001 From: TobKed Date: Thu, 26 Sep 2019 09:36:44 +0200 Subject: [PATCH 0673/2016] feat(bigquery): add support for custom QueryJobConfig in BigQuery.cursor.execute method (#9278) * feat(big_query): add support for custom QueryJobConfig in execute cursor method * fixup! feat(big_query): add support for custom QueryJobConfig in execute cursor method --- .../google/cloud/bigquery/dbapi/cursor.py | 8 +++++--- .../tests/unit/test_dbapi_cursor.py | 14 ++++++++++++++ 2 files changed, 19 insertions(+), 3 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/dbapi/cursor.py b/packages/google-cloud-bigquery/google/cloud/bigquery/dbapi/cursor.py index 3fdc750951e2..9b7a895b367f 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/dbapi/cursor.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/dbapi/cursor.py @@ -116,7 +116,7 @@ def _set_rowcount(self, query_results): total_rows = num_dml_affected_rows self.rowcount = total_rows - def execute(self, operation, parameters=None, job_id=None): + def execute(self, operation, parameters=None, job_id=None, job_config=None): """Prepare and execute a database operation. .. note:: @@ -148,6 +148,9 @@ def execute(self, operation, parameters=None, job_id=None): :type job_id: str :param job_id: (Optional) The job_id to use. If not set, a job ID is generated at random. + + :type job_config: :class:`~google.cloud.bigquery.job.QueryJobConfig` + :param job_config: (Optional) Extra configuration options for the query job. """ self._query_data = None self._query_job = None @@ -160,9 +163,8 @@ def execute(self, operation, parameters=None, job_id=None): formatted_operation = _format_operation(operation, parameters=parameters) query_parameters = _helpers.to_query_parameters(parameters) - config = job.QueryJobConfig() + config = job_config or job.QueryJobConfig(use_legacy_sql=False) config.query_parameters = query_parameters - config.use_legacy_sql = False self._query_job = client.query( formatted_operation, job_config=config, job_id=job_id ) diff --git a/packages/google-cloud-bigquery/tests/unit/test_dbapi_cursor.py b/packages/google-cloud-bigquery/tests/unit/test_dbapi_cursor.py index 4a675c73958d..4ccd5e71af72 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_dbapi_cursor.py +++ b/packages/google-cloud-bigquery/tests/unit/test_dbapi_cursor.py @@ -191,6 +191,20 @@ def test_execute_custom_job_id(self): self.assertEqual(args[0], "SELECT 1;") self.assertEqual(kwargs["job_id"], "foo") + def test_execute_custom_job_config(self): + from google.cloud.bigquery.dbapi import connect + from google.cloud.bigquery import job + + config = job.QueryJobConfig(use_legacy_sql=True) + client = self._mock_client(rows=[], num_dml_affected_rows=0) + connection = connect(client) + cursor = connection.cursor() + cursor.execute("SELECT 1;", job_id="foo", job_config=config) + args, kwargs = client.query.call_args + self.assertEqual(args[0], "SELECT 1;") + self.assertEqual(kwargs["job_id"], "foo") + self.assertEqual(kwargs["job_config"], config) + def test_execute_w_dml(self): from google.cloud.bigquery.dbapi import connect From ee3fc8ce5e7ace315cedc9458b5474af90bf7da6 Mon Sep 17 00:00:00 2001 From: Peter Lamut Date: Thu, 26 Sep 2019 21:34:29 +0200 Subject: [PATCH 0674/2016] docs(bigquery): document how to load data as JSON string (#9231) --- .../google/cloud/bigquery/client.py | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py index f14444c0c48a..0fd9483963d3 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py @@ -1644,6 +1644,22 @@ def load_table_from_json( json_rows (Iterable[Dict[str, Any]]): Row data to be inserted. Keys must match the table schema fields and values must be JSON-compatible representations. + + .. note:: + + If your data is already a newline-delimited JSON string, + it is best to wrap it into a file-like object and pass it + to :meth:`~google.cloud.bigquery.client.Client.load_table_from_file`:: + + import io + from google.cloud import bigquery + + data = u'{"foo": "bar"}' + data_as_file = io.StringIO(data) + + client = bigquery.Client() + client.load_table_from_file(data_as_file, ...) + destination (Union[ \ :class:`~google.cloud.bigquery.table.Table`, \ :class:`~google.cloud.bigquery.table.TableReference`, \ From 94a9d3b3608d43f21e9c456b54f0640b1c26c3ea Mon Sep 17 00:00:00 2001 From: Peter Lamut Date: Thu, 26 Sep 2019 21:35:40 +0200 Subject: [PATCH 0675/2016] style(bigquery): add code samples to lint check (#9277) --- packages/google-cloud-bigquery/noxfile.py | 1 + 1 file changed, 1 insertion(+) diff --git a/packages/google-cloud-bigquery/noxfile.py b/packages/google-cloud-bigquery/noxfile.py index 37611a5ce296..fc44db1c120e 100644 --- a/packages/google-cloud-bigquery/noxfile.py +++ b/packages/google-cloud-bigquery/noxfile.py @@ -150,6 +150,7 @@ def lint(session): session.install("-e", ".") session.run("flake8", os.path.join("google", "cloud", "bigquery")) session.run("flake8", "tests") + session.run("flake8", os.path.join("docs", "samples")) session.run("flake8", os.path.join("docs", "snippets.py")) session.run("black", "--check", *BLACK_PATHS) From 28d259cfc3822d6b2ebef49100f13613f651d8a1 Mon Sep 17 00:00:00 2001 From: Peter Lamut Date: Thu, 26 Sep 2019 21:37:57 +0200 Subject: [PATCH 0676/2016] feat(bigquery): store QueryJob to destination var on error (#9245) --- .../google/cloud/bigquery/job.py | 2 ++ .../google/cloud/bigquery/magics.py | 36 ++++++++++++++----- .../tests/unit/test_job.py | 8 +++-- .../tests/unit/test_magics.py | 31 ++++++++++++++++ 4 files changed, 66 insertions(+), 11 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/job.py b/packages/google-cloud-bigquery/google/cloud/bigquery/job.py index ccbab8b5eb44..8ea8860a7ac2 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/job.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/job.py @@ -2903,6 +2903,7 @@ def _begin(self, client=None, retry=DEFAULT_RETRY): super(QueryJob, self)._begin(client=client, retry=retry) except exceptions.GoogleCloudError as exc: exc.message += self._format_for_exception(self.query, self.job_id) + exc.query_job = self raise def result( @@ -2945,6 +2946,7 @@ def result( ) except exceptions.GoogleCloudError as exc: exc.message += self._format_for_exception(self.query, self.job_id) + exc.query_job = self raise # If the query job is complete but there are no query results, this was diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/magics.py b/packages/google-cloud-bigquery/google/cloud/bigquery/magics.py index c238bb50317a..2a174cefeea3 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/magics.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/magics.py @@ -28,7 +28,9 @@ * ```` (optional, line argument): variable to store the query results. The results are not displayed if - this parameter is used. + this parameter is used. If an error occurs during the query execution, + the corresponding ``QueryJob`` instance (if available) is stored in + the variable instead. * ``--project `` (optional, line argument): Project to use for running the query. Defaults to the context :attr:`~google.cloud.bigquery.magics.Context.project`. @@ -267,13 +269,29 @@ def default_query_job_config(self, value): context = Context() -def _print_error(error, destination_var=None): +def _handle_error(error, destination_var=None): + """Process a query execution error. + + Args: + error (Exception): + An exception that ocurred during the query exectution. + destination_var (Optional[str]): + The name of the IPython session variable to store the query job. + """ if destination_var: - print( - "Could not save output to variable '{}'.".format(destination_var), - file=sys.stderr, - ) - print("\nERROR:\n", error, file=sys.stderr) + query_job = getattr(error, "query_job", None) + + if query_job is not None: + IPython.get_ipython().push({destination_var: query_job}) + else: + # this is the case when previewing table rows by providing just + # table ID to cell magic + print( + "Could not save output to variable '{}'.".format(destination_var), + file=sys.stderr, + ) + + print("\nERROR:\n", str(error), file=sys.stderr) def _run_query(client, query, job_config=None): @@ -452,7 +470,7 @@ def _cell_magic(line, query): try: rows = client.list_rows(query, max_results=max_results) except Exception as ex: - _print_error(str(ex), args.destination_var) + _handle_error(ex, args.destination_var) return result = rows.to_dataframe(bqstorage_client=bqstorage_client) @@ -476,7 +494,7 @@ def _cell_magic(line, query): try: query_job = _run_query(client, query, job_config=job_config) except Exception as ex: - _print_error(str(ex), args.destination_var) + _handle_error(ex, args.destination_var) return if not args.verbose: diff --git a/packages/google-cloud-bigquery/tests/unit/test_job.py b/packages/google-cloud-bigquery/tests/unit/test_job.py index b34184f00cd9..07c3ba88da5b 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_job.py +++ b/packages/google-cloud-bigquery/tests/unit/test_job.py @@ -4337,8 +4337,10 @@ def test_result_error(self): self.assertIsInstance(exc_info.exception, exceptions.GoogleCloudError) self.assertEqual(exc_info.exception.code, http_client.BAD_REQUEST) - full_text = str(exc_info.exception) + exc_job_instance = getattr(exc_info.exception, "query_job", None) + self.assertIs(exc_job_instance, job) + full_text = str(exc_info.exception) assert job.job_id in full_text assert "Query Job SQL Follows" in full_text @@ -4370,8 +4372,10 @@ def test__begin_error(self): self.assertIsInstance(exc_info.exception, exceptions.GoogleCloudError) self.assertEqual(exc_info.exception.code, http_client.BAD_REQUEST) - full_text = str(exc_info.exception) + exc_job_instance = getattr(exc_info.exception, "query_job", None) + self.assertIs(exc_job_instance, job) + full_text = str(exc_info.exception) assert job.job_id in full_text assert "Query Job SQL Follows" in full_text diff --git a/packages/google-cloud-bigquery/tests/unit/test_magics.py b/packages/google-cloud-bigquery/tests/unit/test_magics.py index ec642ff384e1..ed253636c468 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_magics.py +++ b/packages/google-cloud-bigquery/tests/unit/test_magics.py @@ -902,6 +902,37 @@ def test_bigquery_magic_dryrun_option_saves_query_job_to_variable(): assert isinstance(q_job, job.QueryJob) +@pytest.mark.usefixtures("ipython_interactive") +def test_bigquery_magic_saves_query_job_to_variable_on_error(): + ip = IPython.get_ipython() + ip.extension_manager.load_extension("google.cloud.bigquery") + magics.context.credentials = mock.create_autospec( + google.auth.credentials.Credentials, instance=True + ) + + client_query_patch = mock.patch( + "google.cloud.bigquery.client.Client.query", autospec=True + ) + + query_job = mock.create_autospec(job.QueryJob, instance=True) + exception = Exception("Unexpected SELECT") + exception.query_job = query_job + query_job.result.side_effect = exception + + sql = "SELECT SELECT 17 AS num" + + assert "result" not in ip.user_ns + + with client_query_patch as client_query_mock: + client_query_mock.return_value = query_job + return_value = ip.run_cell_magic("bigquery", "result", sql) + + assert return_value is None + assert "result" in ip.user_ns + result = ip.user_ns["result"] + assert isinstance(result, job.QueryJob) + + @pytest.mark.usefixtures("ipython_interactive") def test_bigquery_magic_w_maximum_bytes_billed_invalid(): ip = IPython.get_ipython() From 6bb69990a8d322fc3024c6365a5a97027bc02c2b Mon Sep 17 00:00:00 2001 From: Leonid Emar-Kar <46078689+Emar-Kar@users.noreply.github.com> Date: Fri, 27 Sep 2019 00:51:00 +0300 Subject: [PATCH 0677/2016] docs(bigquery): standardize comments and formatting in existing code samples (#9212) * update samples * comments rephrasing * docs: comments rephrasing --- .../samples/add_empty_column.py | 7 ++-- .../samples/browse_table_data.py | 12 +++++-- .../samples/create_dataset.py | 2 +- .../samples/create_job.py | 2 +- .../samples/create_routine.py | 2 +- .../samples/create_routine_ddl.py | 4 +-- .../samples/create_table.py | 2 +- .../samples/dataset_exists.py | 2 +- .../samples/delete_dataset.py | 4 ++- .../samples/delete_dataset_labels.py | 4 +-- .../samples/delete_model.py | 2 +- .../samples/delete_routine.py | 2 +- .../samples/delete_table.py | 2 +- .../samples/get_dataset.py | 2 +- .../samples/get_dataset_labels.py | 2 +- .../samples/get_model.py | 2 +- .../samples/get_routine.py | 14 ++++---- .../samples/get_table.py | 5 ++- .../samples/label_dataset.py | 4 +-- .../samples/list_datasets.py | 4 +-- .../samples/list_datasets_by_label.py | 2 +- .../samples/list_models.py | 2 +- .../samples/list_routines.py | 2 +- .../samples/list_tables.py | 2 +- .../samples/load_table_dataframe.py | 13 ++++--- .../samples/query_to_arrow.py | 2 +- .../samples/tests/test_browse_table_data.py | 2 ++ .../samples/tests/test_create_routine.py | 23 ++++++++++++ ..._samples.py => test_create_routine_ddl.py} | 35 +------------------ .../samples/tests/test_delete_routine.py | 23 ++++++++++++ .../samples/tests/test_get_dataset.py | 2 +- .../samples/tests/test_get_routine.py | 27 ++++++++++++++ .../samples/tests/test_get_table.py | 2 +- .../tests/test_list_datasets_by_label.py | 2 +- .../samples/tests/test_list_routines.py | 24 +++++++++++++ .../tests/test_load_table_dataframe.py | 6 ++-- .../samples/tests/test_query_to_arrow.py | 1 - .../samples/tests/test_update_routine.py | 22 ++++++++++++ .../samples/update_dataset_access.py | 4 +-- ...update_dataset_default_table_expiration.py | 6 ++-- .../samples/update_dataset_description.py | 4 +-- .../samples/update_model.py | 4 +-- .../samples/update_routine.py | 5 +-- 43 files changed, 200 insertions(+), 95 deletions(-) create mode 100644 packages/google-cloud-bigquery/samples/tests/test_create_routine.py rename packages/google-cloud-bigquery/samples/tests/{test_routine_samples.py => test_create_routine_ddl.py} (69%) create mode 100644 packages/google-cloud-bigquery/samples/tests/test_delete_routine.py create mode 100644 packages/google-cloud-bigquery/samples/tests/test_get_routine.py create mode 100644 packages/google-cloud-bigquery/samples/tests/test_list_routines.py create mode 100644 packages/google-cloud-bigquery/samples/tests/test_update_routine.py diff --git a/packages/google-cloud-bigquery/samples/add_empty_column.py b/packages/google-cloud-bigquery/samples/add_empty_column.py index eb84037598d3..4f0b971e577a 100644 --- a/packages/google-cloud-bigquery/samples/add_empty_column.py +++ b/packages/google-cloud-bigquery/samples/add_empty_column.py @@ -21,17 +21,18 @@ def add_empty_column(client, table_id): # TODO(developer): Construct a BigQuery client object. # client = bigquery.Client() - # TODO(developer): Set table_id to the ID of the table to add an empty column. + # TODO(developer): Set table_id to the ID of the table + # to add an empty column. # table_id = "your-project.your_dataset.your_table_name" - table = client.get_table(table_id) + table = client.get_table(table_id) # Make an API request. original_schema = table.schema new_schema = original_schema[:] # creates a copy of the schema new_schema.append(bigquery.SchemaField("phone", "STRING")) table.schema = new_schema - table = client.update_table(table, ["schema"]) # API request + table = client.update_table(table, ["schema"]) # Make an API request. if len(table.schema) == len(original_schema) + 1 == len(new_schema): print("A new column has been added.") diff --git a/packages/google-cloud-bigquery/samples/browse_table_data.py b/packages/google-cloud-bigquery/samples/browse_table_data.py index dd6c572cab6d..bba8dc434dd9 100644 --- a/packages/google-cloud-bigquery/samples/browse_table_data.py +++ b/packages/google-cloud-bigquery/samples/browse_table_data.py @@ -26,7 +26,7 @@ def browse_table_data(client, table_id): # table_id = "your-project.your_dataset.your_table_name" # Download all rows from a table. - rows_iter = client.list_rows(table_id) + rows_iter = client.list_rows(table_id) # Make an API request. # Iterate over rows to make the API requests to fetch row data. rows = list(rows_iter) @@ -38,10 +38,18 @@ def browse_table_data(client, table_id): print("Downloaded {} rows from table {}".format(len(rows), table_id)) # Specify selected fields to limit the results to certain columns. - table = client.get_table(table_id) + table = client.get_table(table_id) # Make an API request. fields = table.schema[:2] # first two columns rows_iter = client.list_rows(table_id, selected_fields=fields, max_results=10) rows = list(rows_iter) print("Selected {} columns from table {}.".format(len(rows_iter.schema), table_id)) print("Downloaded {} rows from table {}".format(len(rows), table_id)) + + # Print row data in tabular format. + rows = client.list_rows(table, max_results=10) + format_string = "{!s:<16} " * len(rows.schema) + field_names = [field.name for field in rows.schema] + print(format_string.format(*field_names)) # Prints column headers. + for row in rows: + print(format_string.format(*row)) # Prints row data. # [END bigquery_browse_table] diff --git a/packages/google-cloud-bigquery/samples/create_dataset.py b/packages/google-cloud-bigquery/samples/create_dataset.py index 89ca9d38f5f3..3d64473a2321 100644 --- a/packages/google-cloud-bigquery/samples/create_dataset.py +++ b/packages/google-cloud-bigquery/samples/create_dataset.py @@ -33,6 +33,6 @@ def create_dataset(client, dataset_id): # Send the dataset to the API for creation. # Raises google.api_core.exceptions.Conflict if the Dataset already # exists within the project. - dataset = client.create_dataset(dataset) # API request + dataset = client.create_dataset(dataset) # Make an API request. print("Created dataset {}.{}".format(client.project, dataset.dataset_id)) # [END bigquery_create_dataset] diff --git a/packages/google-cloud-bigquery/samples/create_job.py b/packages/google-cloud-bigquery/samples/create_job.py index 24bb85510598..4f7f27a8e668 100644 --- a/packages/google-cloud-bigquery/samples/create_job.py +++ b/packages/google-cloud-bigquery/samples/create_job.py @@ -33,7 +33,7 @@ def create_job(client): # The client libraries automatically generate a job ID. Override the # generated ID with either the job_id_prefix or job_id parameters. job_id_prefix="code_sample_", - ) # API request + ) # Make an API request. print("Started job: {}".format(query_job.job_id)) # [END bigquery_create_job] diff --git a/packages/google-cloud-bigquery/samples/create_routine.py b/packages/google-cloud-bigquery/samples/create_routine.py index c08ec4799a3e..424ee4ef5553 100644 --- a/packages/google-cloud-bigquery/samples/create_routine.py +++ b/packages/google-cloud-bigquery/samples/create_routine.py @@ -40,7 +40,7 @@ def create_routine(client, routine_id): ], ) - routine = client.create_routine(routine) + routine = client.create_routine(routine) # Make an API request. print("Created routine {}".format(routine.reference)) # [END bigquery_create_routine] diff --git a/packages/google-cloud-bigquery/samples/create_routine_ddl.py b/packages/google-cloud-bigquery/samples/create_routine_ddl.py index a4ae3318e7b4..836e0cdde34a 100644 --- a/packages/google-cloud-bigquery/samples/create_routine_ddl.py +++ b/packages/google-cloud-bigquery/samples/create_routine_ddl.py @@ -36,10 +36,10 @@ def create_routine_ddl(client, routine_id): ) # Initiate the query to create the routine. - query_job = client.query(sql) + query_job = client.query(sql) # Make an API request. # Wait for the query to complete. - query_job.result() + query_job.result() # Waits for the job to complete. print("Created routine {}".format(query_job.ddl_target_routine)) # [END bigquery_create_routine_ddl] diff --git a/packages/google-cloud-bigquery/samples/create_table.py b/packages/google-cloud-bigquery/samples/create_table.py index 2a6e98fc72f6..b77812f7e0ce 100644 --- a/packages/google-cloud-bigquery/samples/create_table.py +++ b/packages/google-cloud-bigquery/samples/create_table.py @@ -30,7 +30,7 @@ def create_table(client, table_id): ] table = bigquery.Table(table_id, schema=schema) - table = client.create_table(table) # API request + table = client.create_table(table) # Make an API request. print( "Created table {}.{}.{}".format(table.project, table.dataset_id, table.table_id) ) diff --git a/packages/google-cloud-bigquery/samples/dataset_exists.py b/packages/google-cloud-bigquery/samples/dataset_exists.py index 46cf26a623bf..b8b53b8a4580 100644 --- a/packages/google-cloud-bigquery/samples/dataset_exists.py +++ b/packages/google-cloud-bigquery/samples/dataset_exists.py @@ -22,7 +22,7 @@ def dataset_exists(client, dataset_id): # dataset_id = "your-project.your_dataset" try: - client.get_dataset(dataset_id) + client.get_dataset(dataset_id) # Make an API request. print("Dataset {} already exists".format(dataset_id)) except NotFound: print("Dataset {} is not found".format(dataset_id)) diff --git a/packages/google-cloud-bigquery/samples/delete_dataset.py b/packages/google-cloud-bigquery/samples/delete_dataset.py index 6cde1b6b2d27..29302f099998 100644 --- a/packages/google-cloud-bigquery/samples/delete_dataset.py +++ b/packages/google-cloud-bigquery/samples/delete_dataset.py @@ -27,7 +27,9 @@ def delete_dataset(client, dataset_id): # Use the delete_contents parameter to delete a dataset and its contents # Use the not_found_ok parameter to not receive an error if the dataset has already been deleted. - client.delete_dataset(dataset_id, delete_contents=True, not_found_ok=True) + client.delete_dataset( + dataset_id, delete_contents=True, not_found_ok=True + ) # Make an API request. print("Deleted dataset '{}'.".format(dataset_id)) # [END bigquery_delete_dataset] diff --git a/packages/google-cloud-bigquery/samples/delete_dataset_labels.py b/packages/google-cloud-bigquery/samples/delete_dataset_labels.py index 33ff5c0f2620..425bc98dd96e 100644 --- a/packages/google-cloud-bigquery/samples/delete_dataset_labels.py +++ b/packages/google-cloud-bigquery/samples/delete_dataset_labels.py @@ -25,12 +25,12 @@ def delete_dataset_labels(client, dataset_id): # TODO(developer): Set dataset_id to the ID of the dataset to fetch. # dataset_id = "your-project.your_dataset" - dataset = client.get_dataset(dataset_id) + dataset = client.get_dataset(dataset_id) # Make an API request. # To delete a label from a dataset, set its value to None dataset.labels["color"] = None - dataset = client.update_dataset(dataset, ["labels"]) + dataset = client.update_dataset(dataset, ["labels"]) # Make an API request. print("Labels deleted from {}".format(dataset_id)) # [END bigquery_delete_label_dataset] return dataset diff --git a/packages/google-cloud-bigquery/samples/delete_model.py b/packages/google-cloud-bigquery/samples/delete_model.py index 5ac4305bc97e..b6f32a59ebd9 100644 --- a/packages/google-cloud-bigquery/samples/delete_model.py +++ b/packages/google-cloud-bigquery/samples/delete_model.py @@ -26,7 +26,7 @@ def delete_model(client, model_id): # TODO(developer): Set model_id to the ID of the model to fetch. # model_id = 'your-project.your_dataset.your_model' - client.delete_model(model_id) + client.delete_model(model_id) # Make an API request. print("Deleted model '{}'.".format(model_id)) # [END bigquery_delete_model] diff --git a/packages/google-cloud-bigquery/samples/delete_routine.py b/packages/google-cloud-bigquery/samples/delete_routine.py index c0164b415008..c20b49837b75 100644 --- a/packages/google-cloud-bigquery/samples/delete_routine.py +++ b/packages/google-cloud-bigquery/samples/delete_routine.py @@ -25,7 +25,7 @@ def delete_routine(client, routine_id): # TODO(developer): Set the fully-qualified ID for the routine. # routine_id = "my-project.my_dataset.my_routine" - client.delete_routine(routine_id) + client.delete_routine(routine_id) # Make an API request. print("Deleted routine {}.".format(routine_id)) # [END bigquery_delete_routine] diff --git a/packages/google-cloud-bigquery/samples/delete_table.py b/packages/google-cloud-bigquery/samples/delete_table.py index dcdd3d855b2e..4c4377418556 100644 --- a/packages/google-cloud-bigquery/samples/delete_table.py +++ b/packages/google-cloud-bigquery/samples/delete_table.py @@ -27,6 +27,6 @@ def delete_table(client, table_id): # If the table does not exist, delete_table raises # google.api_core.exceptions.NotFound unless not_found_ok is True - client.delete_table(table_id, not_found_ok=True) + client.delete_table(table_id, not_found_ok=True) # Make an API request. print("Deleted table '{}'.".format(table_id)) # [END bigquery_delete_table] diff --git a/packages/google-cloud-bigquery/samples/get_dataset.py b/packages/google-cloud-bigquery/samples/get_dataset.py index 5586c2b95ebb..cd35745c0dc5 100644 --- a/packages/google-cloud-bigquery/samples/get_dataset.py +++ b/packages/google-cloud-bigquery/samples/get_dataset.py @@ -25,7 +25,7 @@ def get_dataset(client, dataset_id): # TODO(developer): Set dataset_id to the ID of the dataset to fetch. # dataset_id = 'your-project.your_dataset' - dataset = client.get_dataset(dataset_id) + dataset = client.get_dataset(dataset_id) # Make an API request. full_dataset_id = "{}.{}".format(dataset.project, dataset.dataset_id) friendly_name = dataset.friendly_name diff --git a/packages/google-cloud-bigquery/samples/get_dataset_labels.py b/packages/google-cloud-bigquery/samples/get_dataset_labels.py index 2f21723a550b..46e38a3a9a56 100644 --- a/packages/google-cloud-bigquery/samples/get_dataset_labels.py +++ b/packages/google-cloud-bigquery/samples/get_dataset_labels.py @@ -25,7 +25,7 @@ def get_dataset_labels(client, dataset_id): # TODO(developer): Set dataset_id to the ID of the dataset to fetch. # dataset_id = "your-project.your_dataset" - dataset = client.get_dataset(dataset_id) + dataset = client.get_dataset(dataset_id) # Make an API request. # View dataset labels print("Dataset ID: {}".format(dataset_id)) diff --git a/packages/google-cloud-bigquery/samples/get_model.py b/packages/google-cloud-bigquery/samples/get_model.py index 69986733c50b..0ebd59c9d067 100644 --- a/packages/google-cloud-bigquery/samples/get_model.py +++ b/packages/google-cloud-bigquery/samples/get_model.py @@ -26,7 +26,7 @@ def get_model(client, model_id): # TODO(developer): Set model_id to the ID of the model to fetch. # model_id = 'your-project.your_dataset.your_model' - model = client.get_model(model_id) + model = client.get_model(model_id) # Make an API request. full_model_id = "{}.{}.{}".format(model.project, model.dataset_id, model.model_id) friendly_name = model.friendly_name diff --git a/packages/google-cloud-bigquery/samples/get_routine.py b/packages/google-cloud-bigquery/samples/get_routine.py index d9035c282438..da4e89f57f19 100644 --- a/packages/google-cloud-bigquery/samples/get_routine.py +++ b/packages/google-cloud-bigquery/samples/get_routine.py @@ -25,15 +25,15 @@ def get_routine(client, routine_id): # TODO(developer): Set the fully-qualified ID for the routine. # routine_id = "my-project.my_dataset.my_routine" - routine = client.get_routine(routine_id) + routine = client.get_routine(routine_id) # Make an API request. - print("Routine `{}`:".format(routine.reference)) - print(" Type: '{}'".format(routine.type_)) - print(" Language: '{}'".format(routine.language)) - print(" Arguments:") + print("Routine '{}':".format(routine.reference)) + print("\tType: '{}'".format(routine.type_)) + print("\tLanguage: '{}'".format(routine.language)) + print("\tArguments:") for argument in routine.arguments: - print(" Name: '{}'".format(argument.name)) - print(" Type: '{}'".format(argument.type_)) + print("\t\tName: '{}'".format(argument.name)) + print("\t\tType: '{}'".format(argument.data_type)) # [END bigquery_get_routine] return routine diff --git a/packages/google-cloud-bigquery/samples/get_table.py b/packages/google-cloud-bigquery/samples/get_table.py index e49e032f6e23..201b8808a846 100644 --- a/packages/google-cloud-bigquery/samples/get_table.py +++ b/packages/google-cloud-bigquery/samples/get_table.py @@ -25,13 +25,12 @@ def get_table(client, table_id): # TODO(developer): Set table_id to the ID of the model to fetch. # table_id = 'your-project.your_dataset.your_table' - table = client.get_table(table_id) + table = client.get_table(table_id) # Make an API request. + # View table properties print( "Got table '{}.{}.{}'.".format(table.project, table.dataset_id, table.table_id) ) - - # View table properties print("Table schema: {}".format(table.schema)) print("Table description: {}".format(table.description)) print("Table has {} rows".format(table.num_rows)) diff --git a/packages/google-cloud-bigquery/samples/label_dataset.py b/packages/google-cloud-bigquery/samples/label_dataset.py index 7840ea25a63f..019b2aa374a0 100644 --- a/packages/google-cloud-bigquery/samples/label_dataset.py +++ b/packages/google-cloud-bigquery/samples/label_dataset.py @@ -25,9 +25,9 @@ def label_dataset(client, dataset_id): # TODO(developer): Set dataset_id to the ID of the dataset to fetch. # dataset_id = "your-project.your_dataset" - dataset = client.get_dataset(dataset_id) + dataset = client.get_dataset(dataset_id) # Make an API request. dataset.labels = {"color": "green"} - dataset = client.update_dataset(dataset, ["labels"]) + dataset = client.update_dataset(dataset, ["labels"]) # Make an API request. print("Labels added to {}".format(dataset_id)) # [END bigquery_label_dataset] diff --git a/packages/google-cloud-bigquery/samples/list_datasets.py b/packages/google-cloud-bigquery/samples/list_datasets.py index b57aad1b5e7b..77ae8c785d22 100644 --- a/packages/google-cloud-bigquery/samples/list_datasets.py +++ b/packages/google-cloud-bigquery/samples/list_datasets.py @@ -22,12 +22,12 @@ def list_datasets(client): # TODO(developer): Construct a BigQuery client object. # client = bigquery.Client() - datasets = list(client.list_datasets()) + datasets = list(client.list_datasets()) # Make an API request. project = client.project if datasets: print("Datasets in project {}:".format(project)) - for dataset in datasets: # API request(s) + for dataset in datasets: print("\t{}".format(dataset.dataset_id)) else: print("{} project does not contain any datasets.".format(project)) diff --git a/packages/google-cloud-bigquery/samples/list_datasets_by_label.py b/packages/google-cloud-bigquery/samples/list_datasets_by_label.py index 8b574b1110eb..9fa939ad0c19 100644 --- a/packages/google-cloud-bigquery/samples/list_datasets_by_label.py +++ b/packages/google-cloud-bigquery/samples/list_datasets_by_label.py @@ -23,7 +23,7 @@ def list_datasets_by_label(client): # client = bigquery.Client() label_filter = "labels.color:green" - datasets = list(client.list_datasets(filter=label_filter)) + datasets = list(client.list_datasets(filter=label_filter)) # Make an API request. if datasets: print("Datasets filtered by {}:".format(label_filter)) diff --git a/packages/google-cloud-bigquery/samples/list_models.py b/packages/google-cloud-bigquery/samples/list_models.py index 5b4d21799b28..a2477ffc795b 100644 --- a/packages/google-cloud-bigquery/samples/list_models.py +++ b/packages/google-cloud-bigquery/samples/list_models.py @@ -27,7 +27,7 @@ def list_models(client, dataset_id): # the models you are listing. # dataset_id = 'your-project.your_dataset' - models = client.list_models(dataset_id) + models = client.list_models(dataset_id) # Make an API request. print("Models contained in '{}':".format(dataset_id)) for model in models: diff --git a/packages/google-cloud-bigquery/samples/list_routines.py b/packages/google-cloud-bigquery/samples/list_routines.py index 1ae4f441cde1..5eaad0cec8f4 100644 --- a/packages/google-cloud-bigquery/samples/list_routines.py +++ b/packages/google-cloud-bigquery/samples/list_routines.py @@ -26,7 +26,7 @@ def list_routines(client, dataset_id): # the routines you are listing. # dataset_id = 'your-project.your_dataset' - routines = client.list_routines(dataset_id) + routines = client.list_routines(dataset_id) # Make an API request. print("Routines contained in dataset {}:".format(dataset_id)) for routine in routines: diff --git a/packages/google-cloud-bigquery/samples/list_tables.py b/packages/google-cloud-bigquery/samples/list_tables.py index 2057f2d73891..d7576616e191 100644 --- a/packages/google-cloud-bigquery/samples/list_tables.py +++ b/packages/google-cloud-bigquery/samples/list_tables.py @@ -26,7 +26,7 @@ def list_tables(client, dataset_id): # the tables you are listing. # dataset_id = 'your-project.your_dataset' - tables = client.list_tables(dataset_id) + tables = client.list_tables(dataset_id) # Make an API request. print("Tables contained in '{}':".format(dataset_id)) for table in tables: diff --git a/packages/google-cloud-bigquery/samples/load_table_dataframe.py b/packages/google-cloud-bigquery/samples/load_table_dataframe.py index 69eeb6ef89d0..f08712d4dc32 100644 --- a/packages/google-cloud-bigquery/samples/load_table_dataframe.py +++ b/packages/google-cloud-bigquery/samples/load_table_dataframe.py @@ -14,8 +14,10 @@ def load_table_dataframe(client, table_id): + # [START bigquery_load_table_dataframe] from google.cloud import bigquery + import pandas # TODO(developer): Construct a BigQuery client object. @@ -59,11 +61,14 @@ def load_table_dataframe(client, table_id): ) job = client.load_table_from_dataframe( - dataframe, table_id, job_config=job_config, location="US" - ) - job.result() # Waits for table load to complete. + dataframe, + table_id, + job_config=job_config, + location="US", # Must match the destination dataset location. + ) # Make an API request. + job.result() # Waits for the job to complete. - table = client.get_table(table_id) + table = client.get_table(table_id) # Make an API request. print( "Loaded {} rows and {} columns to {}".format( table.num_rows, len(table.schema), table_id diff --git a/packages/google-cloud-bigquery/samples/query_to_arrow.py b/packages/google-cloud-bigquery/samples/query_to_arrow.py index b13dcf3e1413..4cc69d4e902a 100644 --- a/packages/google-cloud-bigquery/samples/query_to_arrow.py +++ b/packages/google-cloud-bigquery/samples/query_to_arrow.py @@ -41,7 +41,7 @@ def query_to_arrow(client): CROSS JOIN UNNEST(r.participants) as participant; """ query_job = client.query(sql) - arrow_table = query_job.to_arrow() + arrow_table = query_job.to_arrow() # Make an API request. print( "Downloaded {} rows, {} columns.".format( diff --git a/packages/google-cloud-bigquery/samples/tests/test_browse_table_data.py b/packages/google-cloud-bigquery/samples/tests/test_browse_table_data.py index f777bf91ca00..0e9cc6055494 100644 --- a/packages/google-cloud-bigquery/samples/tests/test_browse_table_data.py +++ b/packages/google-cloud-bigquery/samples/tests/test_browse_table_data.py @@ -24,3 +24,5 @@ def test_browse_table_data(capsys, client, table_with_data_id): assert "Downloaded 10 rows from table {}".format(table_with_data_id) in out assert "Selected 2 columns from table {}".format(table_with_data_id) in out assert "Downloaded 10 rows from table {}".format(table_with_data_id) in out + assert "word" in out + assert "LVII" in out diff --git a/packages/google-cloud-bigquery/samples/tests/test_create_routine.py b/packages/google-cloud-bigquery/samples/tests/test_create_routine.py new file mode 100644 index 000000000000..7220d63542e2 --- /dev/null +++ b/packages/google-cloud-bigquery/samples/tests/test_create_routine.py @@ -0,0 +1,23 @@ +# Copyright 2019 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +from .. import create_routine + + +def test_create_routine(capsys, client, random_routine_id): + + create_routine.create_routine(client, random_routine_id) + out, err = capsys.readouterr() + assert "Created routine {}".format(random_routine_id) in out diff --git a/packages/google-cloud-bigquery/samples/tests/test_routine_samples.py b/packages/google-cloud-bigquery/samples/tests/test_create_routine_ddl.py similarity index 69% rename from packages/google-cloud-bigquery/samples/tests/test_routine_samples.py rename to packages/google-cloud-bigquery/samples/tests/test_create_routine_ddl.py index 5a1c69c7f60f..cecda2f654ec 100644 --- a/packages/google-cloud-bigquery/samples/tests/test_routine_samples.py +++ b/packages/google-cloud-bigquery/samples/tests/test_create_routine_ddl.py @@ -15,24 +15,15 @@ from google.cloud import bigquery from google.cloud import bigquery_v2 - -def test_create_routine(capsys, client, random_routine_id): - from .. import create_routine - - create_routine.create_routine(client, random_routine_id) - out, err = capsys.readouterr() - assert "Created routine {}".format(random_routine_id) in out +from .. import create_routine_ddl def test_create_routine_ddl(capsys, client, random_routine_id): - from .. import create_routine_ddl create_routine_ddl.create_routine_ddl(client, random_routine_id) routine = client.get_routine(random_routine_id) out, err = capsys.readouterr() - assert "Created routine {}".format(random_routine_id) in out - return routine assert routine.type_ == "SCALAR_FUNCTION" assert routine.language == "SQL" expected_arguments = [ @@ -63,27 +54,3 @@ def test_create_routine_ddl(capsys, client, random_routine_id): ) ] assert routine.arguments == expected_arguments - - -def test_list_routines(capsys, client, dataset_id, routine_id): - from .. import list_routines - - list_routines.list_routines(client, dataset_id) - out, err = capsys.readouterr() - assert "Routines contained in dataset {}:".format(dataset_id) in out - assert routine_id in out - - -def test_delete_routine(capsys, client, routine_id): - from .. import delete_routine - - delete_routine.delete_routine(client, routine_id) - out, err = capsys.readouterr() - assert "Deleted routine {}.".format(routine_id) in out - - -def test_update_routine(client, routine_id): - from .. import update_routine - - routine = update_routine.update_routine(client, routine_id) - assert routine.body == "x * 4" diff --git a/packages/google-cloud-bigquery/samples/tests/test_delete_routine.py b/packages/google-cloud-bigquery/samples/tests/test_delete_routine.py new file mode 100644 index 000000000000..9347d1e22dc2 --- /dev/null +++ b/packages/google-cloud-bigquery/samples/tests/test_delete_routine.py @@ -0,0 +1,23 @@ +# Copyright 2019 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +from .. import delete_routine + + +def test_delete_routine(capsys, client, routine_id): + + delete_routine.delete_routine(client, routine_id) + out, err = capsys.readouterr() + assert "Deleted routine {}.".format(routine_id) in out diff --git a/packages/google-cloud-bigquery/samples/tests/test_get_dataset.py b/packages/google-cloud-bigquery/samples/tests/test_get_dataset.py index 374f8835211a..dedec1d7b29e 100644 --- a/packages/google-cloud-bigquery/samples/tests/test_get_dataset.py +++ b/packages/google-cloud-bigquery/samples/tests/test_get_dataset.py @@ -19,4 +19,4 @@ def test_get_dataset(capsys, client, dataset_id): get_dataset.get_dataset(client, dataset_id) out, err = capsys.readouterr() - assert "{}".format(dataset_id) in out + assert dataset_id in out diff --git a/packages/google-cloud-bigquery/samples/tests/test_get_routine.py b/packages/google-cloud-bigquery/samples/tests/test_get_routine.py new file mode 100644 index 000000000000..fa5f3093116c --- /dev/null +++ b/packages/google-cloud-bigquery/samples/tests/test_get_routine.py @@ -0,0 +1,27 @@ +# Copyright 2019 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +from .. import get_routine + + +def test_get_routine(capsys, client, routine_id): + + get_routine.get_routine(client, routine_id) + out, err = capsys.readouterr() + assert "Routine '{}':".format(routine_id) in out + assert "Type: 'SCALAR_FUNCTION'" in out + assert "Language: 'SQL'" in out + assert "Name: 'x'" in out + assert "Type: 'type_kind: INT64\n'" in out diff --git a/packages/google-cloud-bigquery/samples/tests/test_get_table.py b/packages/google-cloud-bigquery/samples/tests/test_get_table.py index b811ccecad1f..efbd464d54ad 100644 --- a/packages/google-cloud-bigquery/samples/tests/test_get_table.py +++ b/packages/google-cloud-bigquery/samples/tests/test_get_table.py @@ -30,7 +30,7 @@ def test_get_table(capsys, client, random_table_id): get_table.get_table(client, random_table_id) out, err = capsys.readouterr() assert "Got table '{}'.".format(random_table_id) in out - assert "full_name" in out # test that schema is printed + assert "full_name" in out assert "Table description: Sample Table" in out assert "Table has 0 rows" in out client.delete_table(table, not_found_ok=True) diff --git a/packages/google-cloud-bigquery/samples/tests/test_list_datasets_by_label.py b/packages/google-cloud-bigquery/samples/tests/test_list_datasets_by_label.py index 346cbf1a982d..f414539b00b3 100644 --- a/packages/google-cloud-bigquery/samples/tests/test_list_datasets_by_label.py +++ b/packages/google-cloud-bigquery/samples/tests/test_list_datasets_by_label.py @@ -23,4 +23,4 @@ def test_list_datasets_by_label(capsys, client, dataset_id): dataset = client.update_dataset(dataset, ["labels"]) list_datasets_by_label.list_datasets_by_label(client) out, err = capsys.readouterr() - assert "{}".format(dataset_id) in out + assert dataset_id in out diff --git a/packages/google-cloud-bigquery/samples/tests/test_list_routines.py b/packages/google-cloud-bigquery/samples/tests/test_list_routines.py new file mode 100644 index 000000000000..e249238e1976 --- /dev/null +++ b/packages/google-cloud-bigquery/samples/tests/test_list_routines.py @@ -0,0 +1,24 @@ +# Copyright 2019 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +from .. import list_routines + + +def test_list_routines(capsys, client, dataset_id, routine_id): + + list_routines.list_routines(client, dataset_id) + out, err = capsys.readouterr() + assert "Routines contained in dataset {}:".format(dataset_id) in out + assert routine_id in out diff --git a/packages/google-cloud-bigquery/samples/tests/test_load_table_dataframe.py b/packages/google-cloud-bigquery/samples/tests/test_load_table_dataframe.py index d553d449a525..2151704d3b25 100644 --- a/packages/google-cloud-bigquery/samples/tests/test_load_table_dataframe.py +++ b/packages/google-cloud-bigquery/samples/tests/test_load_table_dataframe.py @@ -12,16 +12,18 @@ # See the License for the specific language governing permissions and # limitations under the License. + import pytest from .. import load_table_dataframe -pytest.importorskip("pandas") -pytest.importorskip("pyarrow") +pandas = pytest.importorskip("pandas") +pyarrow = pytest.importorskip("pyarrow") def test_load_table_dataframe(capsys, client, random_table_id): + table = load_table_dataframe.load_table_dataframe(client, random_table_id) out, _ = capsys.readouterr() assert "Loaded 4 rows and 3 columns" in out diff --git a/packages/google-cloud-bigquery/samples/tests/test_query_to_arrow.py b/packages/google-cloud-bigquery/samples/tests/test_query_to_arrow.py index f70bd49fe565..dd9b3ab508cc 100644 --- a/packages/google-cloud-bigquery/samples/tests/test_query_to_arrow.py +++ b/packages/google-cloud-bigquery/samples/tests/test_query_to_arrow.py @@ -22,7 +22,6 @@ def test_query_to_arrow(capsys, client): arrow_table = query_to_arrow.query_to_arrow(client) out, err = capsys.readouterr() assert "Downloaded 8 rows, 2 columns." in out - arrow_schema = arrow_table.schema assert arrow_schema.names == ["race", "participant"] assert pyarrow.types.is_string(arrow_schema.types[0]) diff --git a/packages/google-cloud-bigquery/samples/tests/test_update_routine.py b/packages/google-cloud-bigquery/samples/tests/test_update_routine.py new file mode 100644 index 000000000000..8adfab32e032 --- /dev/null +++ b/packages/google-cloud-bigquery/samples/tests/test_update_routine.py @@ -0,0 +1,22 @@ +# Copyright 2019 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +from .. import update_routine + + +def test_update_routine(client, routine_id): + + routine = update_routine.update_routine(client, routine_id) + assert routine.body == "x * 4" diff --git a/packages/google-cloud-bigquery/samples/update_dataset_access.py b/packages/google-cloud-bigquery/samples/update_dataset_access.py index aa316a38dff9..134cf1b940cf 100644 --- a/packages/google-cloud-bigquery/samples/update_dataset_access.py +++ b/packages/google-cloud-bigquery/samples/update_dataset_access.py @@ -24,7 +24,7 @@ def update_dataset_access(client, dataset_id): # TODO(developer): Set dataset_id to the ID of the dataset to fetch. # dataset_id = 'your-project.your_dataset' - dataset = client.get_dataset(dataset_id) + dataset = client.get_dataset(dataset_id) # Make an API request. entry = bigquery.AccessEntry( role="READER", @@ -36,7 +36,7 @@ def update_dataset_access(client, dataset_id): entries.append(entry) dataset.access_entries = entries - dataset = client.update_dataset(dataset, ["access_entries"]) # API request + dataset = client.update_dataset(dataset, ["access_entries"]) # Make an API request. full_dataset_id = "{}.{}".format(dataset.project, dataset.dataset_id) print( diff --git a/packages/google-cloud-bigquery/samples/update_dataset_default_table_expiration.py b/packages/google-cloud-bigquery/samples/update_dataset_default_table_expiration.py index 4534bb2011eb..7b68ede8d2be 100644 --- a/packages/google-cloud-bigquery/samples/update_dataset_default_table_expiration.py +++ b/packages/google-cloud-bigquery/samples/update_dataset_default_table_expiration.py @@ -25,12 +25,12 @@ def update_dataset_default_table_expiration(client, dataset_id): # TODO(developer): Set dataset_id to the ID of the dataset to fetch. # dataset_id = 'your-project.your_dataset' - dataset = client.get_dataset(dataset_id) - dataset.default_table_expiration_ms = 24 * 60 * 60 * 1000 # in milliseconds + dataset = client.get_dataset(dataset_id) # Make an API request. + dataset.default_table_expiration_ms = 24 * 60 * 60 * 1000 # in milliseconds. dataset = client.update_dataset( dataset, ["default_table_expiration_ms"] - ) # API request + ) # Make an API request. full_dataset_id = "{}.{}".format(dataset.project, dataset.dataset_id) print( diff --git a/packages/google-cloud-bigquery/samples/update_dataset_description.py b/packages/google-cloud-bigquery/samples/update_dataset_description.py index f3afb7fa68ce..08eed8da2b64 100644 --- a/packages/google-cloud-bigquery/samples/update_dataset_description.py +++ b/packages/google-cloud-bigquery/samples/update_dataset_description.py @@ -25,9 +25,9 @@ def update_dataset_description(client, dataset_id): # TODO(developer): Set dataset_id to the ID of the dataset to fetch. # dataset_id = 'your-project.your_dataset' - dataset = client.get_dataset(dataset_id) + dataset = client.get_dataset(dataset_id) # Make an API request. dataset.description = "Updated description." - dataset = client.update_dataset(dataset, ["description"]) + dataset = client.update_dataset(dataset, ["description"]) # Make an API request. full_dataset_id = "{}.{}".format(dataset.project, dataset.dataset_id) print( diff --git a/packages/google-cloud-bigquery/samples/update_model.py b/packages/google-cloud-bigquery/samples/update_model.py index 5df4ada886ed..7583c410e1ef 100644 --- a/packages/google-cloud-bigquery/samples/update_model.py +++ b/packages/google-cloud-bigquery/samples/update_model.py @@ -26,9 +26,9 @@ def update_model(client, model_id): # TODO(developer): Set model_id to the ID of the model to fetch. # model_id = 'your-project.your_dataset.your_model' - model = client.get_model(model_id) + model = client.get_model(model_id) # Make an API request. model.description = "This model was modified from a Python program." - model = client.update_model(model, ["description"]) + model = client.update_model(model, ["description"]) # Make an API request. full_model_id = "{}.{}.{}".format(model.project, model.dataset_id, model.model_id) print( diff --git a/packages/google-cloud-bigquery/samples/update_routine.py b/packages/google-cloud-bigquery/samples/update_routine.py index 4d491d42e168..4489d68f7ee4 100644 --- a/packages/google-cloud-bigquery/samples/update_routine.py +++ b/packages/google-cloud-bigquery/samples/update_routine.py @@ -33,13 +33,14 @@ def update_routine(client, routine_id): routine, [ "body", - # Due to a limitation of the API, all fields are required, not just + # Due to a limitation of the API, + # all fields are required, not just # those that have been updated. "arguments", "language", "type_", "return_type", ], - ) + ) # Make an API request. # [END bigquery_update_routine] return routine From b2a378d1fd8d410e25d2b216ae1535a88e3a2e05 Mon Sep 17 00:00:00 2001 From: Peter Lamut Date: Fri, 27 Sep 2019 10:17:52 +0200 Subject: [PATCH 0678/2016] feat(bigquery): add support for listing jobs by parent job (#9225) * Add parent_job_id, num_child_jobs to *Job classes * Add parent_job parameter to client.list_jobs() * Add system test for listing scripting jobs --- .../google/cloud/bigquery/client.py | 10 ++++ .../google/cloud/bigquery/job.py | 25 ++++++++++ .../google-cloud-bigquery/tests/system.py | 48 +++++++++++++++++++ .../tests/unit/test_client.py | 18 +++++++ .../tests/unit/test_job.py | 16 +++++++ 5 files changed, 117 insertions(+) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py index 0fd9483963d3..5811b7f2d563 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py @@ -1216,6 +1216,7 @@ def cancel_job(self, job_id, project=None, location=None, retry=DEFAULT_RETRY): def list_jobs( self, project=None, + parent_job=None, max_results=None, page_token=None, all_users=None, @@ -1233,6 +1234,11 @@ def list_jobs( project (str, optional): Project ID to use for retreiving datasets. Defaults to the client's project. + parent_job (Optional[Union[ \ + :class:`~google.cloud.bigquery.job._AsyncJob`, \ + str, \ + ]]): + If set, retrieve only child jobs of the specified parent. max_results (int, optional): Maximum number of jobs to return. page_token (str, optional): @@ -1265,6 +1271,9 @@ def list_jobs( google.api_core.page_iterator.Iterator: Iterable of job instances. """ + if isinstance(parent_job, job._AsyncJob): + parent_job = parent_job.job_id + extra_params = { "allUsers": all_users, "stateFilter": state_filter, @@ -1275,6 +1284,7 @@ def list_jobs( google.cloud._helpers._millis_from_datetime(max_creation_time) ), "projection": "full", + "parentJobId": parent_job, } extra_params = { diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/job.py b/packages/google-cloud-bigquery/google/cloud/bigquery/job.py index 8ea8860a7ac2..b15189651d3c 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/job.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/job.py @@ -332,6 +332,31 @@ def job_id(self): """str: ID of the job.""" return _helpers._get_sub_prop(self._properties, ["jobReference", "jobId"]) + @property + def parent_job_id(self): + """Return the ID of the parent job. + + See: + https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobStatistics.FIELDS.parent_job_id + + Returns: + Optional[str] + """ + return _helpers._get_sub_prop(self._properties, ["statistics", "parentJobId"]) + + @property + def num_child_jobs(self): + """The number of child jobs executed. + + See: + https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobStatistics.FIELDS.num_child_jobs + + Returns: + int + """ + count = _helpers._get_sub_prop(self._properties, ["statistics", "numChildJobs"]) + return int(count) if count is not None else 0 + @property def project(self): """Project bound to the job. diff --git a/packages/google-cloud-bigquery/tests/system.py b/packages/google-cloud-bigquery/tests/system.py index 4816962a70d6..ce34b0007101 100644 --- a/packages/google-cloud-bigquery/tests/system.py +++ b/packages/google-cloud-bigquery/tests/system.py @@ -431,6 +431,54 @@ def test_list_tables(self): ) self.assertGreater(len(list(iterator)), 0) + def test_listing_scripting_jobs(self): + # run an SQL script + sql_script = """ + -- Declare a variable to hold names as an array. + DECLARE top_names ARRAY; + + -- Build an array of the top 100 names from the year 2017. + SET top_names = ( + SELECT ARRAY_AGG(name ORDER BY number DESC LIMIT 100) + FROM `bigquery-public-data.usa_names.usa_1910_current` + WHERE year = 2017 + ); + + -- Which names appear as words in Shakespeare's plays? + SELECT + name AS shakespeare_name + FROM UNNEST(top_names) AS name + WHERE name IN ( + SELECT word + FROM `bigquery-public-data.samples.shakespeare` + ); + """ + test_start = datetime.datetime.utcnow() + query_job = Config.CLIENT.query(sql_script, project=Config.CLIENT.project) + query_job.result() + + # fetch jobs created by the SQL script, sort them into parent and + # child jobs + script_jobs = list(Config.CLIENT.list_jobs(min_creation_time=test_start)) + + parent_jobs = [] + child_jobs = [] + + for job in script_jobs: + if job.num_child_jobs > 0: + parent_jobs.append(job) + else: + child_jobs.append(job) + + assert len(parent_jobs) == 1 # also implying num_child_jobs > 0 + assert len(child_jobs) == parent_jobs[0].num_child_jobs + + # fetch jobs using the parent job filter, verify that results are as expected + fetched_jobs = list(Config.CLIENT.list_jobs(parent_job=parent_jobs[0])) + assert sorted(job.job_id for job in fetched_jobs) == sorted( + job.job_id for job in child_jobs + ) + def test_update_table(self): dataset = self.temp_dataset(_make_dataset_id("update_table")) diff --git a/packages/google-cloud-bigquery/tests/unit/test_client.py b/packages/google-cloud-bigquery/tests/unit/test_client.py index ea4b114358a9..db70eaa861ee 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_client.py +++ b/packages/google-cloud-bigquery/tests/unit/test_client.py @@ -2952,6 +2952,24 @@ def test_list_jobs_w_time_filter(self): }, ) + def test_list_jobs_w_parent_job_filter(self): + from google.cloud.bigquery import job + + creds = _make_credentials() + client = self._make_one(self.PROJECT, creds) + conn = client._connection = make_connection({}, {}) + + parent_job_args = ["parent-job-123", job._AsyncJob("parent-job-123", client)] + + for parent_job in parent_job_args: + list(client.list_jobs(parent_job=parent_job)) + conn.api_request.assert_called_once_with( + method="GET", + path="/projects/%s/jobs" % self.PROJECT, + query_params={"projection": "full", "parentJobId": "parent-job-123"}, + ) + conn.api_request.reset_mock() + def test_load_table_from_uri(self): from google.cloud.bigquery.job import LoadJob diff --git a/packages/google-cloud-bigquery/tests/unit/test_job.py b/packages/google-cloud-bigquery/tests/unit/test_job.py index 07c3ba88da5b..a46004b1a97f 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_job.py +++ b/packages/google-cloud-bigquery/tests/unit/test_job.py @@ -268,6 +268,22 @@ def test_job_type(self): self.assertEqual(derived.job_type, "derived") + def test_parent_job_id(self): + client = _make_client(project=self.PROJECT) + job = self._make_one(self.JOB_ID, client) + + self.assertIsNone(job.parent_job_id) + job._properties["statistics"] = {"parentJobId": "parent-job-123"} + self.assertEqual(job.parent_job_id, "parent-job-123") + + def test_num_child_jobs(self): + client = _make_client(project=self.PROJECT) + job = self._make_one(self.JOB_ID, client) + + self.assertEqual(job.num_child_jobs, 0) + job._properties["statistics"] = {"numChildJobs": "17"} + self.assertEqual(job.num_child_jobs, 17) + def test_labels_miss(self): client = _make_client(project=self.PROJECT) job = self._make_one(self.JOB_ID, client) From c2b59c4ee3cefb30de15c503adc51a784a5fdc7a Mon Sep 17 00:00:00 2001 From: Leonid Emar-Kar <46078689+Emar-Kar@users.noreply.github.com> Date: Wed, 9 Oct 2019 01:51:06 +0300 Subject: [PATCH 0679/2016] feat(bigquery): add support for sheets ranges (#9416) * add range parameter * update samples to use range * update renge usage --- .../google-cloud-bigquery/docs/snippets.py | 102 ------------------ .../google/cloud/bigquery/external_config.py | 14 +++ .../query_external_sheets_permanent_table.py | 73 +++++++++++++ .../query_external_sheets_temporary_table.py | 69 ++++++++++++ ...t_query_external_sheets_permanent_table.py | 25 +++++ ...t_query_external_sheets_temporary_table.py | 23 ++++ .../tests/unit/test_external_config.py | 11 +- 7 files changed, 213 insertions(+), 104 deletions(-) create mode 100644 packages/google-cloud-bigquery/samples/query_external_sheets_permanent_table.py create mode 100644 packages/google-cloud-bigquery/samples/query_external_sheets_temporary_table.py create mode 100644 packages/google-cloud-bigquery/samples/tests/test_query_external_sheets_permanent_table.py create mode 100644 packages/google-cloud-bigquery/samples/tests/test_query_external_sheets_temporary_table.py diff --git a/packages/google-cloud-bigquery/docs/snippets.py b/packages/google-cloud-bigquery/docs/snippets.py index 4c39ff912230..0104976a739a 100644 --- a/packages/google-cloud-bigquery/docs/snippets.py +++ b/packages/google-cloud-bigquery/docs/snippets.py @@ -2303,108 +2303,6 @@ def test_query_external_gcs_permanent_table(client, to_delete): assert len(w_states) == 4 -def test_query_external_sheets_temporary_table(client): - # [START bigquery_query_external_sheets_temp] - # [START bigquery_auth_drive_scope] - import google.auth - - # from google.cloud import bigquery - - # Create credentials with Drive & BigQuery API scopes - # Both APIs must be enabled for your project before running this code - credentials, project = google.auth.default( - scopes=[ - "https://www.googleapis.com/auth/drive", - "https://www.googleapis.com/auth/bigquery", - ] - ) - client = bigquery.Client(credentials=credentials, project=project) - # [END bigquery_auth_drive_scope] - - # Configure the external data source and query job - external_config = bigquery.ExternalConfig("GOOGLE_SHEETS") - # Use a shareable link or grant viewing access to the email address you - # used to authenticate with BigQuery (this example Sheet is public) - sheet_url = ( - "https://docs.google.com/spreadsheets" - "/d/1i_QCL-7HcSyUZmIbP9E6lO_T5u3HnpLe7dnpHaijg_E/edit?usp=sharing" - ) - external_config.source_uris = [sheet_url] - external_config.schema = [ - bigquery.SchemaField("name", "STRING"), - bigquery.SchemaField("post_abbr", "STRING"), - ] - external_config.options.skip_leading_rows = 1 # optionally skip header row - table_id = "us_states" - job_config = bigquery.QueryJobConfig() - job_config.table_definitions = {table_id: external_config} - - # Example query to find states starting with 'W' - sql = 'SELECT * FROM `{}` WHERE name LIKE "W%"'.format(table_id) - - query_job = client.query(sql, job_config=job_config) # API request - - w_states = list(query_job) # Waits for query to finish - print("There are {} states with names starting with W.".format(len(w_states))) - # [END bigquery_query_external_sheets_temp] - assert len(w_states) == 4 - - -def test_query_external_sheets_permanent_table(client, to_delete): - dataset_id = "query_external_sheets_{}".format(_millis()) - dataset = bigquery.Dataset(client.dataset(dataset_id)) - client.create_dataset(dataset) - to_delete.append(dataset) - - # [START bigquery_query_external_sheets_perm] - import google.auth - - # from google.cloud import bigquery - # dataset_id = 'my_dataset' - - # Create credentials with Drive & BigQuery API scopes - # Both APIs must be enabled for your project before running this code - credentials, project = google.auth.default( - scopes=[ - "https://www.googleapis.com/auth/drive", - "https://www.googleapis.com/auth/bigquery", - ] - ) - client = bigquery.Client(credentials=credentials, project=project) - - # Configure the external data source - dataset_ref = client.dataset(dataset_id) - table_id = "us_states" - schema = [ - bigquery.SchemaField("name", "STRING"), - bigquery.SchemaField("post_abbr", "STRING"), - ] - table = bigquery.Table(dataset_ref.table(table_id), schema=schema) - external_config = bigquery.ExternalConfig("GOOGLE_SHEETS") - # Use a shareable link or grant viewing access to the email address you - # used to authenticate with BigQuery (this example Sheet is public) - sheet_url = ( - "https://docs.google.com/spreadsheets" - "/d/1i_QCL-7HcSyUZmIbP9E6lO_T5u3HnpLe7dnpHaijg_E/edit?usp=sharing" - ) - external_config.source_uris = [sheet_url] - external_config.options.skip_leading_rows = 1 # optionally skip header row - table.external_data_configuration = external_config - - # Create a permanent table linked to the Sheets file - table = client.create_table(table) # API request - - # Example query to find states starting with 'W' - sql = 'SELECT * FROM `{}.{}` WHERE name LIKE "W%"'.format(dataset_id, table_id) - - query_job = client.query(sql) # API request - - w_states = list(query_job) # Waits for query to finish - print("There are {} states with names starting with W.".format(len(w_states))) - # [END bigquery_query_external_sheets_perm] - assert len(w_states) == 4 - - def test_ddl_create_view(client, to_delete, capsys): """Create a view via a DDL query.""" project = client.project diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/external_config.py b/packages/google-cloud-bigquery/google/cloud/bigquery/external_config.py index 048c2178a654..ce0efd2f20d2 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/external_config.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/external_config.py @@ -26,6 +26,7 @@ from google.cloud.bigquery._helpers import _to_bytes from google.cloud.bigquery._helpers import _bytes_to_json from google.cloud.bigquery._helpers import _int_or_none +from google.cloud.bigquery._helpers import _str_or_none from google.cloud.bigquery.schema import SchemaField @@ -524,6 +525,19 @@ def skip_leading_rows(self): def skip_leading_rows(self, value): self._properties["skipLeadingRows"] = str(value) + @property + def range(self): + """str: The range of a sheet that BigQuery will query from. + + See + https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#GoogleSheetsOptions + """ + return _str_or_none(self._properties.get("range")) + + @range.setter + def range(self, value): + self._properties["range"] = value + def to_api_repr(self): """Build an API representation of this object. diff --git a/packages/google-cloud-bigquery/samples/query_external_sheets_permanent_table.py b/packages/google-cloud-bigquery/samples/query_external_sheets_permanent_table.py new file mode 100644 index 000000000000..fd4f7577f1b1 --- /dev/null +++ b/packages/google-cloud-bigquery/samples/query_external_sheets_permanent_table.py @@ -0,0 +1,73 @@ +# Copyright 2019 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +def query_external_sheets_permanent_table(dataset_id): + + # [START bigquery_query_external_sheets_perm] + from google.cloud import bigquery + import google.auth + + # Create credentials with Drive & BigQuery API scopes. + # Both APIs must be enabled for your project before running this code. + credentials, project = google.auth.default( + scopes=[ + "https://www.googleapis.com/auth/drive", + "https://www.googleapis.com/auth/bigquery", + ] + ) + + # TODO(developer): Construct a BigQuery client object. + client = bigquery.Client(credentials=credentials, project=project) + + # TODO(developer): Set dataset_id to the ID of the dataset to fetch. + # dataset_id = "your-project.your_dataset" + + # Configure the external data source. + dataset = client.get_dataset(dataset_id) + table_id = "us_states" + schema = [ + bigquery.SchemaField("name", "STRING"), + bigquery.SchemaField("post_abbr", "STRING"), + ] + table = bigquery.Table(dataset.table(table_id), schema=schema) + external_config = bigquery.ExternalConfig("GOOGLE_SHEETS") + # Use a shareable link or grant viewing access to the email address you + # used to authenticate with BigQuery (this example Sheet is public). + sheet_url = ( + "https://docs.google.com/spreadsheets" + "/d/1i_QCL-7HcSyUZmIbP9E6lO_T5u3HnpLe7dnpHaijg_E/edit?usp=sharing" + ) + external_config.source_uris = [sheet_url] + external_config.options.skip_leading_rows = 1 # Optionally skip header row. + external_config.options.range = ( + "us-states!A20:B49" + ) # Optionally set range of the sheet to query from. + table.external_data_configuration = external_config + + # Create a permanent table linked to the Sheets file. + table = client.create_table(table) # Make an API request. + + # Example query to find states starting with "W". + sql = 'SELECT * FROM `{}.{}` WHERE name LIKE "W%"'.format(dataset_id, table_id) + query_job = client.query(sql) # Make an API request. + + # Wait for the query to complete. + w_states = list(query_job) + print( + "There are {} states with names starting with W in the selected range.".format( + len(w_states) + ) + ) + # [END bigquery_query_external_sheets_perm] diff --git a/packages/google-cloud-bigquery/samples/query_external_sheets_temporary_table.py b/packages/google-cloud-bigquery/samples/query_external_sheets_temporary_table.py new file mode 100644 index 000000000000..9f17e91a46cc --- /dev/null +++ b/packages/google-cloud-bigquery/samples/query_external_sheets_temporary_table.py @@ -0,0 +1,69 @@ +# Copyright 2019 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +def query_external_sheets_temporary_table(): + + # [START bigquery_query_external_sheets_temp] + # [START bigquery_auth_drive_scope] + from google.cloud import bigquery + import google.auth + + # Create credentials with Drive & BigQuery API scopes. + # Both APIs must be enabled for your project before running this code. + credentials, project = google.auth.default( + scopes=[ + "https://www.googleapis.com/auth/drive", + "https://www.googleapis.com/auth/bigquery", + ] + ) + + # TODO(developer): Construct a BigQuery client object. + client = bigquery.Client(credentials=credentials, project=project) + # [END bigquery_auth_drive_scope] + + # Configure the external data source and query job. + external_config = bigquery.ExternalConfig("GOOGLE_SHEETS") + + # Use a shareable link or grant viewing access to the email address you + # used to authenticate with BigQuery (this example Sheet is public). + sheet_url = ( + "https://docs.google.com/spreadsheets" + "/d/1i_QCL-7HcSyUZmIbP9E6lO_T5u3HnpLe7dnpHaijg_E/edit?usp=sharing" + ) + external_config.source_uris = [sheet_url] + external_config.schema = [ + bigquery.SchemaField("name", "STRING"), + bigquery.SchemaField("post_abbr", "STRING"), + ] + external_config.options.skip_leading_rows = 1 # Optionally skip header row. + external_config.options.range = ( + "us-states!A20:B49" + ) # Optionally set range of the sheet to query from. + table_id = "us_states" + job_config = bigquery.QueryJobConfig() + job_config.table_definitions = {table_id: external_config} + + # Example query to find states starting with "W". + sql = 'SELECT * FROM `{}` WHERE name LIKE "W%"'.format(table_id) + query_job = client.query(sql, job_config=job_config) # Make an API request. + + # Wait for the query to complete. + w_states = list(query_job) + print( + "There are {} states with names starting with W in the selected range.".format( + len(w_states) + ) + ) + # [END bigquery_query_external_sheets_temp] diff --git a/packages/google-cloud-bigquery/samples/tests/test_query_external_sheets_permanent_table.py b/packages/google-cloud-bigquery/samples/tests/test_query_external_sheets_permanent_table.py new file mode 100644 index 000000000000..a7b5db09e5af --- /dev/null +++ b/packages/google-cloud-bigquery/samples/tests/test_query_external_sheets_permanent_table.py @@ -0,0 +1,25 @@ +# Copyright 2019 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +from .. import query_external_sheets_permanent_table + + +def test_query_external_sheets_permanent_table(capsys, dataset_id): + + query_external_sheets_permanent_table.query_external_sheets_permanent_table( + dataset_id + ) + out, err = capsys.readouterr() + assert "There are 2 states with names starting with W in the selected range." in out diff --git a/packages/google-cloud-bigquery/samples/tests/test_query_external_sheets_temporary_table.py b/packages/google-cloud-bigquery/samples/tests/test_query_external_sheets_temporary_table.py new file mode 100644 index 000000000000..4856b6a49d2b --- /dev/null +++ b/packages/google-cloud-bigquery/samples/tests/test_query_external_sheets_temporary_table.py @@ -0,0 +1,23 @@ +# Copyright 2019 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +from .. import query_external_sheets_temporary_table + + +def test_query_external_sheets_temporary_table(capsys): + + query_external_sheets_temporary_table.query_external_sheets_temporary_table() + out, err = capsys.readouterr() + assert "There are 2 states with names starting with W in the selected range." in out diff --git a/packages/google-cloud-bigquery/tests/unit/test_external_config.py b/packages/google-cloud-bigquery/tests/unit/test_external_config.py index ddf95e317969..dab4391cbe04 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_external_config.py +++ b/packages/google-cloud-bigquery/tests/unit/test_external_config.py @@ -130,7 +130,10 @@ def test_from_api_repr_sheets(self): self.BASE_RESOURCE, { "sourceFormat": "GOOGLE_SHEETS", - "googleSheetsOptions": {"skipLeadingRows": "123"}, + "googleSheetsOptions": { + "skipLeadingRows": "123", + "range": "Sheet1!A5:B10", + }, }, ) @@ -140,14 +143,17 @@ def test_from_api_repr_sheets(self): self.assertEqual(ec.source_format, "GOOGLE_SHEETS") self.assertIsInstance(ec.options, external_config.GoogleSheetsOptions) self.assertEqual(ec.options.skip_leading_rows, 123) + self.assertEqual(ec.options.range, "Sheet1!A5:B10") got_resource = ec.to_api_repr() self.assertEqual(got_resource, resource) del resource["googleSheetsOptions"]["skipLeadingRows"] + del resource["googleSheetsOptions"]["range"] ec = external_config.ExternalConfig.from_api_repr(resource) self.assertIsNone(ec.options.skip_leading_rows) + self.assertIsNone(ec.options.range) got_resource = ec.to_api_repr() self.assertEqual(got_resource, resource) @@ -155,11 +161,12 @@ def test_to_api_repr_sheets(self): ec = external_config.ExternalConfig("GOOGLE_SHEETS") options = external_config.GoogleSheetsOptions() options.skip_leading_rows = 123 + options.range = "Sheet1!A5:B10" ec._options = options exp_resource = { "sourceFormat": "GOOGLE_SHEETS", - "googleSheetsOptions": {"skipLeadingRows": "123"}, + "googleSheetsOptions": {"skipLeadingRows": "123", "range": "Sheet1!A5:B10"}, } got_resource = ec.to_api_repr() From dbf5e678ecec952c71c163feb00d0df8c2826172 Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Thu, 10 Oct 2019 14:16:31 -0700 Subject: [PATCH 0680/2016] feat(bigquery): add script statistics to job resource (#9428) * feat(bigquery): add script statistics to job resource * add explicit unit test coverage for the ScriptStackFrame and ScriptStatistics classes --- .../google/cloud/bigquery/job.py | 89 +++++++++++++ .../tests/unit/test_job.py | 117 ++++++++++++++++++ 2 files changed, 206 insertions(+) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/job.py b/packages/google-cloud-bigquery/google/cloud/bigquery/job.py index b15189651d3c..6768e45fbbcf 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/job.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/job.py @@ -344,6 +344,15 @@ def parent_job_id(self): """ return _helpers._get_sub_prop(self._properties, ["statistics", "parentJobId"]) + @property + def script_statistics(self): + resource = _helpers._get_sub_prop( + self._properties, ["statistics", "scriptStatistics"] + ) + if resource is None: + return None + return ScriptStatistics(resource) + @property def num_child_jobs(self): """The number of child jobs executed. @@ -3456,3 +3465,83 @@ def from_api_repr(cls, resource, client): resource["jobReference"] = job_ref_properties job._properties = resource return job + + +class ScriptStackFrame(object): + """Stack frame showing the line/column/procedure name where the current + evaluation happened. + + Args: + resource (Map[str, Any]): + JSON representation of object. + """ + + def __init__(self, resource): + self._properties = resource + + @property + def procedure_id(self): + """Optional[str]: Name of the active procedure. + + Omitted if in a top-level script. + """ + return self._properties.get("procedureId") + + @property + def text(self): + """str: Text of the current statement/expression.""" + return self._properties.get("text") + + @property + def start_line(self): + """int: One-based start line.""" + return _helpers._int_or_none(self._properties.get("startLine")) + + @property + def start_column(self): + """int: One-based start column.""" + return _helpers._int_or_none(self._properties.get("startColumn")) + + @property + def end_line(self): + """int: One-based end line.""" + return _helpers._int_or_none(self._properties.get("endLine")) + + @property + def end_column(self): + """int: One-based end column.""" + return _helpers._int_or_none(self._properties.get("endColumn")) + + +class ScriptStatistics(object): + """Statistics for a child job of a script. + + Args: + resource (Map[str, Any]): + JSON representation of object. + """ + + def __init__(self, resource): + self._properties = resource + + @property + def stack_frames(self): + """List[ScriptStackFrame]: Stack trace where the current evaluation + happened. + + Shows line/column/procedure name of each frame on the stack at the + point where the current evaluation happened. + + The leaf frame is first, the primary script is last. + """ + return [ + ScriptStackFrame(frame) for frame in self._properties.get("stackFrames", []) + ] + + @property + def evaluation_kind(self): + """str: Indicates the type of child job. + + Possible values include ``STATEMENT`` and ``EXPRESSION``. + """ + return self._properties.get("evaluationKind") diff --git a/packages/google-cloud-bigquery/tests/unit/test_job.py b/packages/google-cloud-bigquery/tests/unit/test_job.py index a46004b1a97f..9710085105c4 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_job.py +++ b/packages/google-cloud-bigquery/tests/unit/test_job.py @@ -276,6 +276,37 @@ def test_parent_job_id(self): job._properties["statistics"] = {"parentJobId": "parent-job-123"} self.assertEqual(job.parent_job_id, "parent-job-123") + def test_script_statistics(self): + client = _make_client(project=self.PROJECT) + job = self._make_one(self.JOB_ID, client) + + self.assertIsNone(job.script_statistics) + job._properties["statistics"] = { + "scriptStatistics": { + "evaluationKind": "EXPRESSION", + "stackFrames": [ + { + "startLine": 5, + "startColumn": 29, + "endLine": 9, + "endColumn": 14, + "text": "QUERY TEXT", + } + ], + } + } + script_stats = job.script_statistics + self.assertEqual(script_stats.evaluation_kind, "EXPRESSION") + stack_frames = script_stats.stack_frames + self.assertEqual(len(stack_frames), 1) + stack_frame = stack_frames[0] + self.assertIsNone(stack_frame.procedure_id) + self.assertEqual(stack_frame.start_line, 5) + self.assertEqual(stack_frame.start_column, 29) + self.assertEqual(stack_frame.end_line, 9) + self.assertEqual(stack_frame.end_column, 14) + self.assertEqual(stack_frame.text, "QUERY TEXT") + def test_num_child_jobs(self): client = _make_client(project=self.PROJECT) job = self._make_one(self.JOB_ID, client) @@ -5339,6 +5370,92 @@ def test_end(self): self.assertEqual(entry.end.strftime(_RFC3339_MICROS), self.END_RFC3339_MICROS) +class TestScriptStackFrame(unittest.TestCase, _Base): + def _make_one(self, resource): + from google.cloud.bigquery.job import ScriptStackFrame + + return ScriptStackFrame(resource) + + def test_procedure_id(self): + frame = self._make_one({"procedureId": "some-procedure"}) + self.assertEqual(frame.procedure_id, "some-procedure") + del frame._properties["procedureId"] + self.assertIsNone(frame.procedure_id) + + def test_start_line(self): + frame = self._make_one({"startLine": 5}) + self.assertEqual(frame.start_line, 5) + frame._properties["startLine"] = "5" + self.assertEqual(frame.start_line, 5) + + def test_start_column(self): + frame = self._make_one({"startColumn": 29}) + self.assertEqual(frame.start_column, 29) + frame._properties["startColumn"] = "29" + self.assertEqual(frame.start_column, 29) + + def test_end_line(self): + frame = self._make_one({"endLine": 9}) + self.assertEqual(frame.end_line, 9) + frame._properties["endLine"] = "9" + self.assertEqual(frame.end_line, 9) + + def test_end_column(self): + frame = self._make_one({"endColumn": 14}) + self.assertEqual(frame.end_column, 14) + frame._properties["endColumn"] = "14" + self.assertEqual(frame.end_column, 14) + + def test_text(self): + frame = self._make_one({"text": "QUERY TEXT"}) + self.assertEqual(frame.text, "QUERY TEXT") + + +class TestScriptStatistics(unittest.TestCase, _Base): + def _make_one(self, resource): + from google.cloud.bigquery.job import ScriptStatistics + + return ScriptStatistics(resource) + + def test_evalutation_kind(self): + stats = self._make_one({"evaluationKind": "EXPRESSION"}) + self.assertEqual(stats.evaluation_kind, "EXPRESSION") + self.assertEqual(stats.stack_frames, []) + + def test_stack_frames(self): + stats = self._make_one( + { + "stackFrames": [ + { + "procedureId": "some-procedure", + "startLine": 5, + "startColumn": 29, + "endLine": 9, + "endColumn": 14, + "text": "QUERY TEXT", + }, + {}, + ] + } + ) + stack_frames = stats.stack_frames + self.assertEqual(len(stack_frames), 2) + stack_frame = stack_frames[0] + self.assertEqual(stack_frame.procedure_id, "some-procedure") + self.assertEqual(stack_frame.start_line, 5) + self.assertEqual(stack_frame.start_column, 29) + self.assertEqual(stack_frame.end_line, 9) + self.assertEqual(stack_frame.end_column, 14) + self.assertEqual(stack_frame.text, "QUERY TEXT") + stack_frame = stack_frames[1] + self.assertIsNone(stack_frame.procedure_id) + self.assertIsNone(stack_frame.start_line) + self.assertIsNone(stack_frame.start_column) + self.assertIsNone(stack_frame.end_line) + self.assertIsNone(stack_frame.end_column) + self.assertIsNone(stack_frame.text) + + class TestTimelineEntry(unittest.TestCase, _Base): ELAPSED_MS = 101 ACTIVE_UNITS = 50 From a665f4de3c450ede2b02ade661f4f1a1be0fc082 Mon Sep 17 00:00:00 2001 From: Gurov Ilya Date: Fri, 11 Oct 2019 00:37:07 +0300 Subject: [PATCH 0681/2016] refactor(bigquery): rewrite docs in Google style, part 1 (#9326) towards issue #9092 --- .../google/cloud/bigquery/_http.py | 7 +- .../google/cloud/bigquery/dataset.py | 5 +- .../google/cloud/bigquery/dbapi/connection.py | 20 +- .../google/cloud/bigquery/dbapi/types.py | 23 +- .../google/cloud/bigquery/external_config.py | 25 +- .../google/cloud/bigquery/model.py | 5 +- .../google/cloud/bigquery/query.py | 224 +++++++++--------- .../google/cloud/bigquery/routine.py | 17 +- .../google/cloud/bigquery/schema.py | 28 +-- .../google/cloud/bigquery/table.py | 72 ++---- 10 files changed, 189 insertions(+), 237 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/_http.py b/packages/google-cloud-bigquery/google/cloud/bigquery/_http.py index dd0d9d01c9de..2ff4effefb76 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/_http.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/_http.py @@ -22,11 +22,10 @@ class Connection(_http.JSONConnection): """A connection to Google BigQuery via the JSON REST API. - :type client: :class:`~google.cloud.bigquery.client.Client` - :param client: The client that owns the current connection. + Args: + client (google.cloud.bigquery.client.Client): The client that owns the current connection. - :type client_info: :class:`~google.api_core.client_info.ClientInfo` - :param client_info: (Optional) instance used to generate user agent. + client_info (google.api_core.client_info.ClientInfo): (Optional) instance used to generate user agent. """ DEFAULT_API_ENDPOINT = "https://bigquery.googleapis.com" diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/dataset.py b/packages/google-cloud-bigquery/google/cloud/bigquery/dataset.py index 67a7353f94e7..0b973a4bec8a 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/dataset.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/dataset.py @@ -349,10 +349,7 @@ class Dataset(object): https://cloud.google.com/bigquery/docs/reference/rest/v2/datasets Args: - dataset_ref (Union[ \ - :class:`~google.cloud.bigquery.dataset.DatasetReference`, \ - str, \ - ]): + dataset_ref (Union[google.cloud.bigquery.dataset.DatasetReference, str]): A pointer to a dataset. If ``dataset_ref`` is a string, it must include both the project ID and the dataset ID, separated by ``.``. diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/dbapi/connection.py b/packages/google-cloud-bigquery/google/cloud/bigquery/dbapi/connection.py index 0dbc9143b255..ee7d0dc3cc59 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/dbapi/connection.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/dbapi/connection.py @@ -21,8 +21,8 @@ class Connection(object): """DB-API Connection to Google BigQuery. - :type client: :class:`~google.cloud.bigquery.Client` - :param client: A client used to connect to BigQuery. + Args: + client (google.cloud.bigquery.Client): A client used to connect to BigQuery. """ def __init__(self, client): @@ -37,8 +37,8 @@ def commit(self): def cursor(self): """Return a new cursor object. - :rtype: :class:`~google.cloud.bigquery.dbapi.Cursor` - :returns: A DB-API cursor that uses this connection. + Returns: + google.cloud.bigquery.dbapi.Cursor: A DB-API cursor that uses this connection. """ return cursor.Cursor(self) @@ -46,13 +46,13 @@ def cursor(self): def connect(client=None): """Construct a DB-API connection to Google BigQuery. - :type client: :class:`~google.cloud.bigquery.Client` - :param client: - (Optional) A client used to connect to BigQuery. If not passed, a - client is created using default options inferred from the environment. + Args: + client (google.cloud.bigquery.Client): + (Optional) A client used to connect to BigQuery. If not passed, a + client is created using default options inferred from the environment. - :rtype: :class:`~google.cloud.bigquery.dbapi.Connection` - :returns: A new DB-API connection to BigQuery. + Returns: + google.cloud.bigquery.dbapi.Connection: A new DB-API connection to BigQuery. """ if client is None: client = bigquery.Client() diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/dbapi/types.py b/packages/google-cloud-bigquery/google/cloud/bigquery/dbapi/types.py index 3c8c454a011a..14917820cd38 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/dbapi/types.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/dbapi/types.py @@ -33,11 +33,11 @@ def Binary(string): """Contruct a DB-API binary value. - :type string: str - :param string: A string to encode as a binary value. + Args: + string (str): A string to encode as a binary value. - :rtype: bytes - :returns: The UTF-8 encoded bytes representing the string. + Returns: + bytes: The UTF-8 encoded bytes representing the string. """ return string.encode("utf-8") @@ -45,16 +45,15 @@ def Binary(string): def TimeFromTicks(ticks, tz=None): """Construct a DB-API time value from the given ticks value. - :type ticks: float - :param ticks: - a number of seconds since the epoch; see the documentation of the - standard Python time module for details. + Args: + ticks (float): + a number of seconds since the epoch; see the documentation of the + standard Python time module for details. - :type tz: :class:`datetime.tzinfo` - :param tz: (Optional) time zone to use for conversion + tz (datetime.tzinfo): (Optional) time zone to use for conversion - :rtype: :class:`datetime.time` - :returns: time represented by ticks. + Returns: + datetime.time: time represented by ticks. """ dt = datetime.datetime.fromtimestamp(ticks, tz=tz) return dt.timetz() diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/external_config.py b/packages/google-cloud-bigquery/google/cloud/bigquery/external_config.py index ce0efd2f20d2..c637d37d185c 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/external_config.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/external_config.py @@ -176,8 +176,7 @@ def from_api_repr(cls, resource): API. Returns: - :class:`~.external_config.BigtableColumn`: - Configuration parsed from ``resource``. + external_config.BigtableColumn: Configuration parsed from ``resource``. """ config = cls() config._properties = copy.deepcopy(resource) @@ -249,7 +248,7 @@ def type_(self, value): @property def columns(self): - """List[:class:`~.external_config.BigtableColumn`]: Lists of columns + """List[BigtableColumn]: Lists of columns that should be exposed as individual fields. See @@ -369,8 +368,7 @@ def from_api_repr(cls, resource): API. Returns: - :class:`~.external_config.BigtableOptions`: - Configuration parsed from ``resource``. + BigtableOptions: Configuration parsed from ``resource``. """ config = cls() config._properties = copy.deepcopy(resource) @@ -476,8 +474,7 @@ def to_api_repr(self): """Build an API representation of this object. Returns: - Dict[str, Any]: - A dictionary in the format used by the BigQuery API. + Dict[str, Any]: A dictionary in the format used by the BigQuery API. """ return copy.deepcopy(self._properties) @@ -493,8 +490,7 @@ def from_api_repr(cls, resource): API. Returns: - :class:`~.external_config.CSVOptions`: - Configuration parsed from ``resource``. + CSVOptions: Configuration parsed from ``resource``. """ config = cls() config._properties = copy.deepcopy(resource) @@ -542,8 +538,7 @@ def to_api_repr(self): """Build an API representation of this object. Returns: - Dict[str, Any]: - A dictionary in the format used by the BigQuery API. + Dict[str, Any]: A dictionary in the format used by the BigQuery API. """ return copy.deepcopy(self._properties) @@ -559,8 +554,7 @@ def from_api_repr(cls, resource): API. Returns: - :class:`~.external_config.GoogleSheetsOptions`: - Configuration parsed from ``resource``. + GoogleSheetsOptions: Configuration parsed from ``resource``. """ config = cls() config._properties = copy.deepcopy(resource) @@ -574,7 +568,7 @@ class ExternalConfig(object): """Description of an external data source. Args: - source_format (:class:`~.external_config.ExternalSourceFormat`): + source_format (ExternalSourceFormat): See :attr:`source_format`. """ @@ -719,8 +713,7 @@ def from_api_repr(cls, resource): API. Returns: - :class:`~.external_config.ExternalConfig`: - Configuration parsed from ``resource``. + ExternalConfig: Configuration parsed from ``resource``. """ config = cls(resource["sourceFormat"]) for optcls in _OPTION_CLASSES: diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/model.py b/packages/google-cloud-bigquery/google/cloud/bigquery/model.py index 4049a9232467..9dd4e3627196 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/model.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/model.py @@ -34,10 +34,7 @@ class Model(object): https://cloud.google.com/bigquery/docs/reference/rest/v2/models Args: - model_ref (Union[ \ - :class:`~google.cloud.bigquery.model.ModelReference`, \ - str, \ - ]): + model_ref (Union[google.cloud.bigquery.model.ModelReference, str]): A pointer to a model. If ``model_ref`` is a string, it must included a project ID, dataset ID, and model ID, each separated by ``.``. diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/query.py b/packages/google-cloud-bigquery/google/cloud/bigquery/query.py index 4039be33db8c..a971ab8ab747 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/query.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/query.py @@ -26,11 +26,10 @@ class UDFResource(object): """Describe a single user-defined function (UDF) resource. - :type udf_type: str - :param udf_type: the type of the resource ('inlineCode' or 'resourceUri') + Args: + udf_type (str): the type of the resource ('inlineCode' or 'resourceUri') - :type value: str - :param value: the inline code or resource URI. + value (str): the inline code or resource URI. See https://cloud.google.com/bigquery/user-defined-functions#api @@ -57,10 +56,11 @@ class _AbstractQueryParameter(object): def from_api_repr(cls, resource): """Factory: construct parameter from JSON resource. - :type resource: dict - :param resource: JSON mapping of parameter + Args: + resource (Dict): JSON mapping of parameter - :rtype: :class:`~google.cloud.bigquery.query.ScalarQueryParameter` + Returns: + google.cloud.bigquery.query.ScalarQueryParameter """ raise NotImplementedError @@ -75,18 +75,18 @@ def to_api_repr(self): class ScalarQueryParameter(_AbstractQueryParameter): """Named / positional query parameters for scalar values. - :type name: str or None - :param name: Parameter name, used via ``@foo`` syntax. If None, the - parameter can only be addressed via position (``?``). + Args: + name (Optional[str]): + Parameter name, used via ``@foo`` syntax. If None, the + parameter can only be addressed via position (``?``). - :type type_: str - :param type_: name of parameter type. One of 'STRING', 'INT64', - 'FLOAT64', 'NUMERIC', 'BOOL', 'TIMESTAMP', 'DATETIME', or - 'DATE'. + type_ (str): + name of parameter type. One of 'STRING', 'INT64', + 'FLOAT64', 'NUMERIC', 'BOOL', 'TIMESTAMP', 'DATETIME', or + 'DATE'. - :type value: str, int, float, :class:`decimal.Decimal`, bool, - :class:`datetime.datetime`, or :class:`datetime.date`. - :param value: the scalar parameter value. + value (Union[str, int, float, decimal.Decimal, bool, + datetime.datetime, datetime.date]): the scalar parameter value. """ def __init__(self, name, type_, value): @@ -98,19 +98,18 @@ def __init__(self, name, type_, value): def positional(cls, type_, value): """Factory for positional paramater. - :type type_: str - :param type_: - name of parameter type. One of 'STRING', 'INT64', - 'FLOAT64', 'NUMERIC', 'BOOL', 'TIMESTAMP', 'DATETIME', or - 'DATE'. + Args: + type_ (str): + name of parameter type. One of 'STRING', 'INT64', + 'FLOAT64', 'NUMERIC', 'BOOL', 'TIMESTAMP', 'DATETIME', or + 'DATE'. - :type value: str, int, float, :class:`decimal.Decimal`, bool, - :class:`datetime.datetime`, or - :class:`datetime.date`. - :param value: the scalar parameter value. + value (Union[str, int, float, decimal.Decimal, bool, + datetime.datetime, + datetime.date]): the scalar parameter value. - :rtype: :class:`~google.cloud.bigquery.query.ScalarQueryParameter` - :returns: instance without name + Returns: + google.cloud.bigquery.query.ScalarQueryParameter: instance without name """ return cls(None, type_, value) @@ -118,11 +117,11 @@ def positional(cls, type_, value): def from_api_repr(cls, resource): """Factory: construct parameter from JSON resource. - :type resource: dict - :param resource: JSON mapping of parameter + Args: + resource (Dict): JSON mapping of parameter - :rtype: :class:`~google.cloud.bigquery.query.ScalarQueryParameter` - :returns: instance + Returns: + google.cloud.bigquery.query.ScalarQueryParameter: instance """ name = resource.get("name") type_ = resource["parameterType"]["type"] @@ -140,8 +139,8 @@ def from_api_repr(cls, resource): def to_api_repr(self): """Construct JSON API representation for the parameter. - :rtype: dict - :returns: JSON mapping + Returns: + Dict: JSON mapping """ value = self.value converter = _SCALAR_VALUE_TO_JSON_PARAM.get(self.type_) @@ -161,8 +160,7 @@ def _key(self): Used to compute this instance's hashcode and evaluate equality. Returns: - tuple: The contents of this - :class:`~google.cloud.bigquery.query.ScalarQueryParameter`. + Tuple: The contents of this :class:`~google.cloud.bigquery.query.ScalarQueryParameter`. """ return (self.name, self.type_.upper(), self.value) @@ -181,17 +179,16 @@ def __repr__(self): class ArrayQueryParameter(_AbstractQueryParameter): """Named / positional query parameters for array values. - :type name: str or None - :param name: Parameter name, used via ``@foo`` syntax. If None, the - parameter can only be addressed via position (``?``). + Args: + name (Optional[str]): + Parameter name, used via ``@foo`` syntax. If None, the + parameter can only be addressed via position (``?``). - :type array_type: str - :param array_type: - name of type of array elements. One of `'STRING'`, `'INT64'`, - `'FLOAT64'`, `'NUMERIC'`, `'BOOL'`, `'TIMESTAMP'`, or `'DATE'`. + array_type (str): + name of type of array elements. One of `'STRING'`, `'INT64'`, + `'FLOAT64'`, `'NUMERIC'`, `'BOOL'`, `'TIMESTAMP'`, or `'DATE'`. - :type values: list of appropriate scalar type. - :param values: the parameter array values. + values (List[appropriate scalar type]): the parameter array values. """ def __init__(self, name, array_type, values): @@ -203,16 +200,15 @@ def __init__(self, name, array_type, values): def positional(cls, array_type, values): """Factory for positional parameters. - :type array_type: str - :param array_type: - name of type of array elements. One of `'STRING'`, `'INT64'`, - `'FLOAT64'`, `'NUMERIC'`, `'BOOL'`, `'TIMESTAMP'`, or `'DATE'`. + Args: + array_type (str): + name of type of array elements. One of `'STRING'`, `'INT64'`, + `'FLOAT64'`, `'NUMERIC'`, `'BOOL'`, `'TIMESTAMP'`, or `'DATE'`. - :type values: list of appropriate scalar type - :param values: the parameter array values. + values (List[appropriate scalar type]): the parameter array values. - :rtype: :class:`~google.cloud.bigquery.query.ArrayQueryParameter` - :returns: instance without name + Returns: + google.cloud.bigquery.query.ArrayQueryParameter: instance without name """ return cls(None, array_type, values) @@ -249,11 +245,11 @@ def _from_api_repr_scalar(cls, resource): def from_api_repr(cls, resource): """Factory: construct parameter from JSON resource. - :type resource: dict - :param resource: JSON mapping of parameter + Args: + resource (Dict): JSON mapping of parameter - :rtype: :class:`~google.cloud.bigquery.query.ArrayQueryParameter` - :returns: instance + Returns: + google.cloud.bigquery.query.ArrayQueryParameter: instance """ array_type = resource["parameterType"]["arrayType"]["type"] if array_type == "STRUCT": @@ -263,8 +259,8 @@ def from_api_repr(cls, resource): def to_api_repr(self): """Construct JSON API representation for the parameter. - :rtype: dict - :returns: JSON mapping + Returns: + Dict: JSON mapping """ values = self.values if self.array_type == "RECORD" or self.array_type == "STRUCT": @@ -291,8 +287,7 @@ def _key(self): Used to compute this instance's hashcode and evaluate equality. Returns: - tuple: The contents of this - :class:`~google.cloud.bigquery.query.ArrayQueryParameter`. + Tuple: The contents of this :class:`~google.cloud.bigquery.query.ArrayQueryParameter`. """ return (self.name, self.array_type.upper(), self.values) @@ -311,15 +306,16 @@ def __repr__(self): class StructQueryParameter(_AbstractQueryParameter): """Named / positional query parameters for struct values. - :type name: str or None - :param name: Parameter name, used via ``@foo`` syntax. If None, the - parameter can only be addressed via position (``?``). + Args: + name (Optional[str]): + Parameter name, used via ``@foo`` syntax. If None, the + parameter can only be addressed via position (``?``). - :type sub_params: - tuple of :class:`~google.cloud.bigquery.query.ScalarQueryParameter`, - :class:`~google.cloud.bigquery.query.ArrayQueryParameter`, or - :class:`~google.cloud.bigquery.query.StructQueryParameter` - :param sub_params: the sub-parameters for the struct + sub_params (Union[Tuple[ + google.cloud.bigquery.query.ScalarQueryParameter, + google.cloud.bigquery.query.ArrayQueryParameter, + google.cloud.bigquery.query.StructQueryParameter + ]]): the sub-parameters for the struct """ def __init__(self, name, *sub_params): @@ -341,15 +337,15 @@ def __init__(self, name, *sub_params): def positional(cls, *sub_params): """Factory for positional parameters. - :type sub_params: - tuple of - :class:`~google.cloud.bigquery.query.ScalarQueryParameter`, - :class:`~google.cloud.bigquery.query.ArrayQueryParameter`, or - :class:`~google.cloud.bigquery.query.StructQueryParameter` - :param sub_params: the sub-parameters for the struct + Args: + sub_params (Union[Tuple[ + google.cloud.bigquery.query.ScalarQueryParameter, + google.cloud.bigquery.query.ArrayQueryParameter, + google.cloud.bigquery.query.StructQueryParameter + ]]): the sub-parameters for the struct - :rtype: :class:`~google.cloud.bigquery.query.StructQueryParameter` - :returns: instance without name + Returns: + google.cloud.bigquery.query.StructQueryParameter: instance without name """ return cls(None, *sub_params) @@ -357,11 +353,11 @@ def positional(cls, *sub_params): def from_api_repr(cls, resource): """Factory: construct parameter from JSON resource. - :type resource: dict - :param resource: JSON mapping of parameter + Args: + resource (Dict): JSON mapping of parameter - :rtype: :class:`~google.cloud.bigquery.query.StructQueryParameter` - :returns: instance + Returns: + google.cloud.bigquery.query.StructQueryParameter: instance """ name = resource.get("name") instance = cls(name) @@ -397,8 +393,8 @@ def from_api_repr(cls, resource): def to_api_repr(self): """Construct JSON API representation for the parameter. - :rtype: dict - :returns: JSON mapping + Returns: + Dict: JSON mapping """ s_types = {} values = {} @@ -432,8 +428,7 @@ def _key(self): Used to compute this instance's hashcode and evaluate equality. Returns: - tuple: The contents of this - :class:`~google.cloud.biquery.ArrayQueryParameter`. + Tuple: The contents of this :class:`~google.cloud.biquery.ArrayQueryParameter`. """ return (self.name, self.struct_types, self.struct_values) @@ -468,8 +463,8 @@ def from_api_repr(cls, api_response): def project(self): """Project bound to the query job. - :rtype: str - :returns: the project that the query job is associated with. + Returns: + str: The project that the query job is associated with. """ return self._properties.get("jobReference", {}).get("projectId") @@ -480,9 +475,10 @@ def cache_hit(self): See https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs/query#cacheHit - :rtype: bool or ``NoneType`` - :returns: True if the query results were served from cache (None - until set by the server). + Returns: + Optional[bool]: + True if the query results were served from cache (None + until set by the server). """ return self._properties.get("cacheHit") @@ -493,9 +489,10 @@ def complete(self): See https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs/query#jobComplete - :rtype: bool or ``NoneType`` - :returns: True if the query completed on the server (None - until set by the server). + Returns: + Optional[bool]: + True if the query completed on the server (None + until set by the server). """ return self._properties.get("jobComplete") @@ -506,9 +503,10 @@ def errors(self): See https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs/query#errors - :rtype: list of mapping, or ``NoneType`` - :returns: Mappings describing errors generated on the server (None - until set by the server). + Returns: + Optional[List[Mapping]]: + Mappings describing errors generated on the server (None + until set by the server). """ return self._properties.get("errors") @@ -519,8 +517,8 @@ def job_id(self): See https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs/query#jobReference - :rtype: string - :returns: Job ID of the query job. + Returns: + str: Job ID of the query job. """ return self._properties.get("jobReference", {}).get("jobId") @@ -531,8 +529,8 @@ def page_token(self): See https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs/query#pageToken - :rtype: str, or ``NoneType`` - :returns: Token generated on the server (None until set by the server). + Returns: + Optional[str]: Token generated on the server (None until set by the server). """ return self._properties.get("pageToken") @@ -543,8 +541,8 @@ def total_rows(self): See https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs/query#totalRows - :rtype: int, or ``NoneType`` - :returns: Count generated on the server (None until set by the server). + Returns: + Optional[int}: Count generated on the server (None until set by the server). """ total_rows = self._properties.get("totalRows") if total_rows is not None: @@ -557,8 +555,8 @@ def total_bytes_processed(self): See https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs/query#totalBytesProcessed - :rtype: int, or ``NoneType`` - :returns: Count generated on the server (None until set by the server). + Returns: + Optional[int]: Count generated on the server (None until set by the server). """ total_bytes_processed = self._properties.get("totalBytesProcessed") if total_bytes_processed is not None: @@ -571,8 +569,8 @@ def num_dml_affected_rows(self): See https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs/query#numDmlAffectedRows - :rtype: int, or ``NoneType`` - :returns: Count generated on the server (None until set by the server). + Returns: + Optional[int]: Count generated on the server (None until set by the server). """ num_dml_affected_rows = self._properties.get("numDmlAffectedRows") if num_dml_affected_rows is not None: @@ -585,8 +583,9 @@ def rows(self): See https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs/query#rows - :rtype: list of :class:`~google.cloud.bigquery.table.Row` - :returns: fields describing the schema (None until set by the server). + Returns: + Optional[List[google.cloud.bigquery.table.Row]]: + Fields describing the schema (None until set by the server). """ return _rows_from_json(self._properties.get("rows", ()), self.schema) @@ -597,16 +596,17 @@ def schema(self): See https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs/query#schema - :rtype: list of :class:`SchemaField`, or ``NoneType`` - :returns: fields describing the schema (None until set by the server). + Returns: + Optional[List[SchemaField]]: + Fields describing the schema (None until set by the server). """ return _parse_schema_resource(self._properties.get("schema", {})) def _set_properties(self, api_response): """Update properties from resource in body of ``api_response`` - :type api_response: dict - :param api_response: response returned from an API call + Args: + api_response (Dict): response returned from an API call """ job_id_present = ( "jobReference" in api_response diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/routine.py b/packages/google-cloud-bigquery/google/cloud/bigquery/routine.py index d5bb752dfddb..14c7fcc103c2 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/routine.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/routine.py @@ -31,10 +31,7 @@ class Routine(object): https://cloud.google.com/bigquery/docs/reference/rest/v2/routines Args: - routine_ref (Union[ \ - str, \ - google.cloud.bigquery.routine.RoutineReference, \ - ]): + routine_ref (Union[str, google.cloud.bigquery.routine.RoutineReference]): A pointer to a routine. If ``routine_ref`` is a string, it must included a project ID, dataset ID, and routine ID, each separated by ``.``. @@ -262,8 +259,7 @@ def to_api_repr(self): """Construct the API resource representation of this routine. Returns: - Dict[str, object]: - Routine represented as an API resource. + Dict[str, object]: Routine represented as an API resource. """ return self._properties @@ -366,8 +362,7 @@ def from_api_repr(cls, resource): """Factory: construct a routine argument given its API representation. Args: - resource (Dict[str, object]): - Resource, as returned from the API. + resource (Dict[str, object]): Resource, as returned from the API. Returns: google.cloud.bigquery.routine.RoutineArgument: @@ -381,8 +376,7 @@ def to_api_repr(self): """Construct the API resource representation of this routine argument. Returns: - Dict[str, object]: - Routine argument represented as an API resource. + Dict[str, object]: Routine argument represented as an API resource. """ return self._properties @@ -485,8 +479,7 @@ def to_api_repr(self): """Construct the API resource representation of this routine reference. Returns: - Dict[str, object]: - Routine reference represented as an API resource. + Dict[str, object]: Routine reference represented as an API resource. """ return self._properties diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/schema.py b/packages/google-cloud-bigquery/google/cloud/bigquery/schema.py index e0673d85baf6..4cbdb83b2814 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/schema.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/schema.py @@ -53,12 +53,12 @@ class SchemaField(object): field_type (str): the type of the field. See https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#schema.fields.type - mode (str): the mode of the field. See + mode (str): the mode of the field. See https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#schema.fields.mode - description (Optional[str]):description for the field. + description (Optional[str]): description for the field. - fields (Tuple[:class:`~google.cloud.bigquery.schema.SchemaField`]): + fields (Tuple[google.cloud.bigquery.schema.SchemaField]): subfields (requires ``field_type`` of 'RECORD'). """ @@ -79,8 +79,7 @@ def from_api_repr(cls, api_repr): :meth:`to_api_repr`. Returns: - google.cloud.biquery.schema.SchemaField: - The ``SchemaField`` object. + google.cloud.biquery.schema.SchemaField: The ``SchemaField`` object. """ # Handle optional properties with default values mode = api_repr.get("mode", "NULLABLE") @@ -139,8 +138,7 @@ def to_api_repr(self): """Return a dictionary representing this schema field. Returns: - dict: A dictionary representing the SchemaField in a serialized - form. + Dict: A dictionary representing the SchemaField in a serialized form. """ # Put together the basic representation. See http://bit.ly/2hOAT5u. answer = { @@ -164,8 +162,7 @@ def _key(self): Used to compute this instance's hashcode and evaluate equality. Returns: - tuple: The contents of this - :class:`~google.cloud.bigquery.schema.SchemaField`. + Tuple: The contents of this :class:`~google.cloud.bigquery.schema.SchemaField`. """ return ( self._name, @@ -229,11 +226,11 @@ def _parse_schema_resource(info): """Parse a resource fragment into a schema field. Args: - info: (Mapping[str->dict]): should contain a "fields" key to be parsed + info: (Mapping[str, Dict]): should contain a "fields" key to be parsed Returns: - (Union[Sequence[:class:`google.cloud.bigquery.schema.SchemaField`],None]) - a list of parsed fields, or ``None`` if no "fields" key found. + Optional[Sequence[google.cloud.bigquery.schema.SchemaField`]: + A list of parsed fields, or ``None`` if no "fields" key found. """ if "fields" not in info: return () @@ -253,10 +250,9 @@ def _build_schema_resource(fields): """Generate a resource fragment for a schema. Args: - fields [Sequence[:class:`~google.cloud.bigquery.schema.SchemaField`]): - schema to be dumped + fields (Sequence[google.cloud.bigquery.schema.SchemaField): schema to be dumped. - Returns: (Sequence[dict]) - mappings describing the schema of the supplied fields. + Returns: + Sequence[Dict]: Mappings describing the schema of the supplied fields. """ return [field.to_api_repr() for field in fields] diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py index 71fc9ef945d4..9b8518c214ae 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py @@ -367,10 +367,7 @@ class Table(object): https://cloud.google.com/bigquery/docs/reference/rest/v2/tables Args: - table_ref (Union[ \ - :class:`~google.cloud.bigquery.table.TableReference`, \ - str, \ - ]): + table_ref (Union[google.cloud.bigquery.table.TableReference, str]): A pointer to a table. If ``table_ref`` is a string, it must included a project ID, dataset ID, and table ID, each separated by ``.``. @@ -1308,15 +1305,13 @@ class RowIterator(HTTPIterator): Defaults to a sensible value set by the API. extra_params (Dict[str, object]): Extra query string parameters for the API call. - table (Union[ \ - :class:`~google.cloud.bigquery.table.Table`, \ - :class:`~google.cloud.bigquery.table.TableReference`, \ + table (Union[ + google.cloud.bigquery.table.Table, + google.cloud.bigquery.table.TableReference, ]): Optional. The table which these rows belong to, or a reference to it. Used to call the BigQuery Storage API to fetch rows. - selected_fields (Sequence[ \ - google.cloud.bigquery.schema.SchemaField, \ - ]): + selected_fields (Sequence[google.cloud.bigquery.schema.SchemaField]): Optional. A subset of columns to select from this table. """ @@ -1481,8 +1476,8 @@ def to_arrow(self, progress_bar_type=None, bqstorage_client=None): ``'tqdm_gui'`` Use the :func:`tqdm.tqdm_gui` function to display a progress bar as a graphical dialog box. - bqstorage_client ( \ - google.cloud.bigquery_storage_v1beta1.BigQueryStorageClient \ + bqstorage_client ( + google.cloud.bigquery_storage_v1beta1.BigQueryStorageClient ): **Beta Feature** Optional. A BigQuery Storage API client. If supplied, use the faster BigQuery Storage API to fetch rows @@ -1501,8 +1496,7 @@ def to_arrow(self, progress_bar_type=None, bqstorage_client=None): from the destination table's schema. Raises: - ValueError: - If the :mod:`pyarrow` library cannot be imported. + ValueError: If the :mod:`pyarrow` library cannot be imported. ..versionadded:: 1.17.0 """ @@ -1567,9 +1561,7 @@ def to_dataframe(self, bqstorage_client=None, dtypes=None, progress_bar_type=Non """Create a pandas DataFrame by loading all pages of a query. Args: - bqstorage_client ( \ - google.cloud.bigquery_storage_v1beta1.BigQueryStorageClient \ - ): + bqstorage_client (google.cloud.bigquery_storage_v1beta1.BigQueryStorageClient): **Beta Feature** Optional. A BigQuery Storage API client. If supplied, use the faster BigQuery Storage API to fetch rows from BigQuery. This API is a billable API. @@ -1584,9 +1576,7 @@ def to_dataframe(self, bqstorage_client=None, dtypes=None, progress_bar_type=Non query result tables with the BQ Storage API. When a problem is encountered reading a table, the tabledata.list method from the BigQuery API is used, instead. - dtypes ( \ - Map[str, Union[str, pandas.Series.dtype]] \ - ): + dtypes (Map[str, Union[str, pandas.Series.dtype]]): Optional. A dictionary of column names pandas ``dtype``s. The provided ``dtype`` is used when constructing the series for the column specified. Otherwise, the default pandas behavior @@ -1680,12 +1670,10 @@ def to_arrow(self, progress_bar_type=None): """[Beta] Create an empty class:`pyarrow.Table`. Args: - progress_bar_type (Optional[str]): - Ignored. Added for compatibility with RowIterator. + progress_bar_type (Optional[str]): Ignored. Added for compatibility with RowIterator. Returns: - pyarrow.Table: - An empty :class:`pyarrow.Table`. + pyarrow.Table: An empty :class:`pyarrow.Table`. """ if pyarrow is None: raise ValueError(_NO_PYARROW_ERROR) @@ -1695,16 +1683,12 @@ def to_dataframe(self, bqstorage_client=None, dtypes=None, progress_bar_type=Non """Create an empty dataframe. Args: - bqstorage_client (Any): - Ignored. Added for compatibility with RowIterator. - dtypes (Any): - Ignored. Added for compatibility with RowIterator. - progress_bar_type (Any): - Ignored. Added for compatibility with RowIterator. + bqstorage_client (Any): Ignored. Added for compatibility with RowIterator. + dtypes (Any): Ignored. Added for compatibility with RowIterator. + progress_bar_type (Any): Ignored. Added for compatibility with RowIterator. Returns: - pandas.DataFrame: - An empty :class:`~pandas.DataFrame`. + pandas.DataFrame: An empty :class:`~pandas.DataFrame`. """ if pandas is None: raise ValueError(_NO_PANDAS_ERROR) @@ -1872,14 +1856,12 @@ def _item_to_row(iterator, resource): added to the iterator after being created, which should be done by the caller. - :type iterator: :class:`~google.api_core.page_iterator.Iterator` - :param iterator: The iterator that is currently in use. - - :type resource: dict - :param resource: An item to be converted to a row. + Args: + iterator (google.api_core.page_iterator.Iterator): The iterator that is currently in use. + resource (Dict): An item to be converted to a row. - :rtype: :class:`~google.cloud.bigquery.table.Row` - :returns: The next row in the page. + Returns: + google.cloud.bigquery.table.Row: The next row in the page. """ return Row( _helpers._row_tuple_from_json(resource, iterator.schema), @@ -1910,14 +1892,10 @@ def get_column_data(field_index, field): def _rows_page_start(iterator, page, response): """Grab total rows when :class:`~google.cloud.iterator.Page` starts. - :type iterator: :class:`~google.api_core.page_iterator.Iterator` - :param iterator: The iterator that is currently in use. - - :type page: :class:`~google.api_core.page_iterator.Page` - :param page: The page that was just created. - - :type response: dict - :param response: The JSON API response for a page of rows in a table. + Args: + iterator (google.api_core.page_iterator.Iterator): The iterator that is currently in use. + page (google.api_core.page_iterator.Page): The page that was just created. + response (Dict): The JSON API response for a page of rows in a table. """ # Make a (lazy) copy of the page in column-oriented format for use in data # science packages. From 8b623664ed024acbcc8fafe9bd949758a582c979 Mon Sep 17 00:00:00 2001 From: Gurov Ilya Date: Fri, 11 Oct 2019 13:13:36 +0300 Subject: [PATCH 0682/2016] docs(bigquery): fix incorrect links to REST API in reference docs (#9436) fixes issue #9429 --- .../google/cloud/bigquery/client.py | 10 +- .../google/cloud/bigquery/dataset.py | 4 +- .../google/cloud/bigquery/job.py | 122 +++++++++--------- .../google/cloud/bigquery/model.py | 2 +- .../google/cloud/bigquery/query.py | 20 +-- .../google/cloud/bigquery/routine.py | 10 +- .../google/cloud/bigquery/schema.py | 10 +- .../google/cloud/bigquery/table.py | 4 +- 8 files changed, 91 insertions(+), 91 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py index 5811b7f2d563..1ad107ba8151 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py @@ -284,7 +284,7 @@ def list_datasets( filter (str): Optional. An expression for filtering the results by label. For syntax, see - https://cloud.google.com/bigquery/docs/reference/rest/v2/datasets/list#filter. + https://cloud.google.com/bigquery/docs/reference/rest/v2/datasets/list#body.QUERY_PARAMETERS.filter max_results (int): Optional. Maximum number of datasets to return. page_token (str): @@ -1320,7 +1320,7 @@ def load_table_from_uri( """Starts a job for loading data into a table from CloudStorage. See - https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.load + https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#jobconfigurationload Arguments: source_uris (Union[str, Sequence[str]]): @@ -1858,7 +1858,7 @@ def copy_table( """Copy one or more tables to another table. See - https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.copy + https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#jobconfigurationtablecopy Arguments: sources (Union[ \ @@ -1949,7 +1949,7 @@ def extract_table( """Start a job to extract a table into Cloud Storage files. See - https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.extract + https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#jobconfigurationextract Arguments: source (Union[ \ @@ -2020,7 +2020,7 @@ def query( """Run a SQL query. See - https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query + https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#jobconfigurationquery Arguments: query (str): diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/dataset.py b/packages/google-cloud-bigquery/google/cloud/bigquery/dataset.py index 0b973a4bec8a..3b241dd7776d 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/dataset.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/dataset.py @@ -207,7 +207,7 @@ class DatasetReference(object): """DatasetReferences are pointers to datasets. See - https://cloud.google.com/bigquery/docs/reference/rest/v2/datasets + https://cloud.google.com/bigquery/docs/reference/rest/v2/datasets#datasetreference Args: project (str): The ID of the project @@ -346,7 +346,7 @@ class Dataset(object): """Datasets are containers for tables. See - https://cloud.google.com/bigquery/docs/reference/rest/v2/datasets + https://cloud.google.com/bigquery/docs/reference/rest/v2/datasets#resource-dataset Args: dataset_ref (Union[google.cloud.bigquery.dataset.DatasetReference, str]): diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/job.py b/packages/google-cloud-bigquery/google/cloud/bigquery/job.py index 6768e45fbbcf..ed58d7b89185 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/job.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/job.py @@ -962,7 +962,7 @@ def allow_jagged_rows(self): """bool: Allow missing trailing optional columns (CSV only). See - https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.load.allowJaggedRows + https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationLoad.FIELDS.allow_jagged_rows """ return self._get_sub_prop("allowJaggedRows") @@ -975,7 +975,7 @@ def allow_quoted_newlines(self): """bool: Allow quoted data containing newline characters (CSV only). See - https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.load.allowQuotedNewlines + https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationLoad.FIELDS.allow_quoted_newlines """ return self._get_sub_prop("allowQuotedNewlines") @@ -988,7 +988,7 @@ def autodetect(self): """bool: Automatically infer the schema from a sample of the data. See - https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.load.autodetect + https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationLoad.FIELDS.autodetect """ return self._get_sub_prop("autodetect") @@ -1030,7 +1030,7 @@ def create_disposition(self): for creating tables. See - https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.load.createDisposition + https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationLoad.FIELDS.create_disposition """ return self._get_sub_prop("createDisposition") @@ -1047,7 +1047,7 @@ def destination_encryption_configuration(self): if using default encryption. See - https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.load.destinationEncryptionConfiguration + https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationLoad.FIELDS.destination_encryption_configuration """ prop = self._get_sub_prop("destinationEncryptionConfiguration") if prop is not None: @@ -1068,7 +1068,7 @@ def destination_table_description(self): """Union[str, None] name given to destination table. See: - https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.load.destinationTableProperties.description + https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#DestinationTableProperties.FIELDS.description """ prop = self._get_sub_prop("destinationTableProperties") if prop is not None: @@ -1087,7 +1087,7 @@ def destination_table_friendly_name(self): """Union[str, None] name given to destination table. See: - https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.load.destinationTableProperties.friendlyName + https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#DestinationTableProperties.FIELDS.friendly_name """ prop = self._get_sub_prop("destinationTableProperties") if prop is not None: @@ -1107,7 +1107,7 @@ def encoding(self): data. See - https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.load.encoding + https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationLoad.FIELDS.encoding """ return self._get_sub_prop("encoding") @@ -1120,7 +1120,7 @@ def field_delimiter(self): """str: The separator for fields in a CSV file. See - https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.load.fieldDelimiter + https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationLoad.FIELDS.field_delimiter """ return self._get_sub_prop("fieldDelimiter") @@ -1133,7 +1133,7 @@ def ignore_unknown_values(self): """bool: Ignore extra values not represented in the table schema. See - https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.load.ignoreUnknownValues + https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationLoad.FIELDS.ignore_unknown_values """ return self._get_sub_prop("ignoreUnknownValues") @@ -1146,7 +1146,7 @@ def max_bad_records(self): """int: Number of invalid rows to ignore. See - https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.load.maxBadRecords + https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationLoad.FIELDS.max_bad_records """ return _helpers._int_or_none(self._get_sub_prop("maxBadRecords")) @@ -1159,7 +1159,7 @@ def null_marker(self): """str: Represents a null value (CSV only). See - https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.load.nullMarker + https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationLoad.FIELDS.null_marker """ return self._get_sub_prop("nullMarker") @@ -1172,7 +1172,7 @@ def quote_character(self): """str: Character used to quote data sections (CSV only). See - https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.load.quote + https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationLoad.FIELDS.quote """ return self._get_sub_prop("quote") @@ -1186,7 +1186,7 @@ def schema(self): destination table. See - https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.load.schema + https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationLoad.FIELDS.schema """ schema = _helpers._get_sub_prop(self._properties, ["load", "schema", "fields"]) if schema is None: @@ -1224,7 +1224,7 @@ def skip_leading_rows(self): """int: Number of rows to skip when reading data (CSV only). See - https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.load.skipLeadingRows + https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationLoad.FIELDS.skip_leading_rows """ return _helpers._int_or_none(self._get_sub_prop("skipLeadingRows")) @@ -1237,7 +1237,7 @@ def source_format(self): """google.cloud.bigquery.job.SourceFormat: File format of the data. See - https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.load.sourceFormat + https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationLoad.FIELDS.source_format """ return self._get_sub_prop("sourceFormat") @@ -1282,7 +1282,7 @@ def write_disposition(self): the destination table already exists. See - https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.load.writeDisposition + https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationLoad.FIELDS.write_disposition """ return self._get_sub_prop("writeDisposition") @@ -1302,7 +1302,7 @@ class LoadJob(_AsyncJob): :type source_uris: sequence of string or ``NoneType`` :param source_uris: URIs of one or more data files to be loaded. See - https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.load.sourceUris + https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationLoad.FIELDS.source_uris for supported URI formats. Pass None for jobs that load from a file. :type destination: :class:`google.cloud.bigquery.table.TableReference` @@ -1330,7 +1330,7 @@ def destination(self): """google.cloud.bigquery.table.TableReference: table where loaded rows are written See: - https://g.co/cloud/bigquery/docs/reference/rest/v2/jobs#configuration.load.destinationTable + https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationLoad.FIELDS.destination_table """ return self._destination @@ -1450,7 +1450,7 @@ def destination_table_description(self): """Union[str, None] name given to destination table. See: - https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.load.destinationTableProperties.description + https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#DestinationTableProperties.FIELDS.description """ return self._configuration.destination_table_description @@ -1459,7 +1459,7 @@ def destination_table_friendly_name(self): """Union[str, None] name given to destination table. See: - https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.load.destinationTableProperties.friendlyName + https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#DestinationTableProperties.FIELDS.friendly_name """ return self._configuration.destination_table_friendly_name @@ -1614,7 +1614,7 @@ def create_disposition(self): for creating tables. See - https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.copy.createDisposition + https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationTableCopy.FIELDS.create_disposition """ return self._get_sub_prop("createDisposition") @@ -1628,7 +1628,7 @@ def write_disposition(self): the destination table already exists. See - https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.copy.writeDisposition + https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationTableCopy.FIELDS.write_disposition """ return self._get_sub_prop("writeDisposition") @@ -1645,7 +1645,7 @@ def destination_encryption_configuration(self): if using default encryption. See - https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.copy.destinationEncryptionConfiguration + https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationTableCopy.FIELDS.destination_encryption_configuration """ prop = self._get_sub_prop("destinationEncryptionConfiguration") if prop is not None: @@ -1809,7 +1809,7 @@ def compression(self): exported files. See - https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.extract.compression + https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationExtract.FIELDS.compression """ return self._get_sub_prop("compression") @@ -1822,7 +1822,7 @@ def destination_format(self): """google.cloud.bigquery.job.DestinationFormat: Exported file format. See - https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.extract.destinationFormat + https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationExtract.FIELDS.destination_format """ return self._get_sub_prop("destinationFormat") @@ -1835,7 +1835,7 @@ def field_delimiter(self): """str: Delimiter to use between fields in the exported data. See - https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.extract.fieldDelimiter + https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationExtract.FIELDS.field_delimiter """ return self._get_sub_prop("fieldDelimiter") @@ -1848,7 +1848,7 @@ def print_header(self): """bool: Print a header row in the exported data. See - https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.extract.printHeader + https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationExtract.FIELDS.print_header """ return self._get_sub_prop("printHeader") @@ -1925,7 +1925,7 @@ def destination_uri_file_counts(self): """Return file counts from job statistics, if present. See: - https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#statistics.extract.destinationUriFileCounts + https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobStatistics4.FIELDS.destination_uri_file_counts Returns: a list of integer counts, each representing the number of files @@ -2048,7 +2048,7 @@ def destination_encryption_configuration(self): if using default encryption. See - https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query.destinationEncryptionConfiguration + https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationQuery.FIELDS.destination_encryption_configuration """ prop = self._get_sub_prop("destinationEncryptionConfiguration") if prop is not None: @@ -2067,7 +2067,7 @@ def allow_large_results(self): """bool: Allow large query results tables (legacy SQL, only) See - https://g.co/cloud/bigquery/docs/reference/rest/v2/jobs#configuration.query.allowLargeResults + https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationQuery.FIELDS.allow_large_results """ return self._get_sub_prop("allowLargeResults") @@ -2081,7 +2081,7 @@ def create_disposition(self): for creating tables. See - https://g.co/cloud/bigquery/docs/reference/rest/v2/jobs#configuration.query.createDisposition + https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationQuery.FIELDS.create_disposition """ return self._get_sub_prop("createDisposition") @@ -2104,7 +2104,7 @@ def default_dataset(self): separated by ``.``. For example: ``your-project.your_dataset``. See - https://g.co/cloud/bigquery/docs/reference/v2/jobs#configuration.query.defaultDataset + https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationQuery.FIELDS.default_dataset """ prop = self._get_sub_prop("defaultDataset") if prop is not None: @@ -2141,7 +2141,7 @@ def destination(self): ``your-project.your_dataset.your_table``. See - https://g.co/cloud/bigquery/docs/reference/rest/v2/jobs#configuration.query.destinationTable + https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationQuery.FIELDS.destination_table """ prop = self._get_sub_prop("destinationTable") if prop is not None: @@ -2164,7 +2164,7 @@ def dry_run(self): costs. See - https://g.co/cloud/bigquery/docs/reference/v2/jobs#configuration.dryRun + https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfiguration.FIELDS.dry_run """ return self._properties.get("dryRun") @@ -2177,7 +2177,7 @@ def flatten_results(self): """bool: Flatten nested/repeated fields in results. (Legacy SQL only) See - https://g.co/cloud/bigquery/docs/reference/rest/v2/jobs#configuration.query.flattenResults + https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationQuery.FIELDS.flatten_results """ return self._get_sub_prop("flattenResults") @@ -2191,7 +2191,7 @@ def maximum_billing_tier(self): queries. See - https://g.co/cloud/bigquery/docs/reference/rest/v2/jobs#configuration.query.maximumBillingTier + https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationQuery.FIELDS.maximum_billing_tier """ return self._get_sub_prop("maximumBillingTier") @@ -2204,7 +2204,7 @@ def maximum_bytes_billed(self): """int: Maximum bytes to be billed for this job or :data:`None` if not set. See - https://g.co/cloud/bigquery/docs/reference/rest/v2/jobs#configuration.query.maximumBytesBilled + https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationQuery.FIELDS.maximum_bytes_billed """ return _helpers._int_or_none(self._get_sub_prop("maximumBytesBilled")) @@ -2217,7 +2217,7 @@ def priority(self): """google.cloud.bigquery.job.QueryPriority: Priority of the query. See - https://g.co/cloud/bigquery/docs/reference/rest/v2/jobs#configuration.query.priority + https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationQuery.FIELDS.priority """ return self._get_sub_prop("priority") @@ -2233,7 +2233,7 @@ def query_parameters(self): for parameterized query (empty by default) See: - https://g.co/cloud/bigquery/docs/reference/rest/v2/jobs#configuration.query.queryParameters + https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationQuery.FIELDS.query_parameters """ prop = self._get_sub_prop("queryParameters", default=[]) return _from_api_repr_query_parameters(prop) @@ -2248,7 +2248,7 @@ def udf_resources(self): defined function resources (empty by default) See: - https://g.co/cloud/bigquery/docs/reference/rest/v2/jobs#configuration.query.userDefinedFunctionResources + https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationQuery.FIELDS.user_defined_function_resources """ prop = self._get_sub_prop("userDefinedFunctionResources", default=[]) return _from_api_repr_udf_resources(prop) @@ -2264,7 +2264,7 @@ def use_legacy_sql(self): """bool: Use legacy SQL syntax. See - https://g.co/cloud/bigquery/docs/reference/v2/jobs#configuration.query.useLegacySql + https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationQuery.FIELDS.use_legacy_sql """ return self._get_sub_prop("useLegacySql") @@ -2277,7 +2277,7 @@ def use_query_cache(self): """bool: Look for the query result in the cache. See - https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query.useQueryCache + https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationQuery.FIELDS.use_query_cache """ return self._get_sub_prop("useQueryCache") @@ -2291,7 +2291,7 @@ def write_disposition(self): the destination table already exists. See - https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query.writeDisposition + https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationQuery.FIELDS.write_disposition """ return self._get_sub_prop("writeDisposition") @@ -2305,7 +2305,7 @@ def table_definitions(self): Definitions for external tables or :data:`None` if not set. See - https://g.co/cloud/bigquery/docs/reference/rest/v2/jobs#configuration.query.tableDefinitions + https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationQuery.FIELDS.external_table_definitions """ prop = self._get_sub_prop("tableDefinitions") if prop is not None: @@ -2497,7 +2497,7 @@ def query(self): """str: The query text used in this query job. See: - https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query.query + https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationQuery.FIELDS.query """ return _helpers._get_sub_prop( self._properties, ["configuration", "query", "query"] @@ -2621,7 +2621,7 @@ def query_plan(self): """Return query plan from job statistics, if present. See: - https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#statistics.query.queryPlan + https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobStatistics2.FIELDS.query_plan :rtype: list of :class:`QueryPlanEntry` :returns: mappings describing the query plan, or an empty list @@ -2643,7 +2643,7 @@ def total_bytes_processed(self): """Return total bytes processed from job statistics, if present. See: - https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#statistics.query.totalBytesProcessed + https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobStatistics2.FIELDS.total_bytes_processed :rtype: int or None :returns: total bytes processed by the job, or None if job is not @@ -2659,7 +2659,7 @@ def total_bytes_billed(self): """Return total bytes billed from job statistics, if present. See: - https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#statistics.query.totalBytesBilled + https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobStatistics2.FIELDS.total_bytes_billed :rtype: int or None :returns: total bytes processed by the job, or None if job is not @@ -2675,7 +2675,7 @@ def billing_tier(self): """Return billing tier from job statistics, if present. See: - https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#statistics.query.billingTier + https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobStatistics2.FIELDS.billing_tier :rtype: int or None :returns: billing tier used by the job, or None if job is not @@ -2688,7 +2688,7 @@ def cache_hit(self): """Return whether or not query results were served from cache. See: - https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#statistics.query.cacheHit + https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobStatistics2.FIELDS.cache_hit :rtype: bool or None :returns: whether the query results were returned from cache, or None @@ -2701,7 +2701,7 @@ def ddl_operation_performed(self): """Optional[str]: Return the DDL operation performed. See: - https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#statistics.query.ddlOperationPerformed + https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobStatistics2.FIELDS.ddl_operation_performed """ return self._job_statistics().get("ddlOperationPerformed") @@ -2712,7 +2712,7 @@ def ddl_target_routine(self): for CREATE/DROP FUNCTION/PROCEDURE queries. See: - https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#jobstatistics + https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobStatistics2.FIELDS.ddl_target_routine """ prop = self._job_statistics().get("ddlTargetRoutine") if prop is not None: @@ -2725,7 +2725,7 @@ def ddl_target_table(self): for CREATE/DROP TABLE/VIEW queries. See: - https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#statistics.query.ddlTargetTable + https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobStatistics2.FIELDS.ddl_target_table """ prop = self._job_statistics().get("ddlTargetTable") if prop is not None: @@ -2737,7 +2737,7 @@ def num_dml_affected_rows(self): """Return the number of DML rows affected by the job. See: - https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#statistics.query.numDmlAffectedRows + https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobStatistics2.FIELDS.num_dml_affected_rows :rtype: int or None :returns: number of DML rows affected by the job, or None if job is not @@ -2758,7 +2758,7 @@ def statement_type(self): """Return statement type from job statistics, if present. See: - https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#statistics.query.statementType + https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobStatistics2.FIELDS.statement_type :rtype: str or None :returns: type of statement used by the job, or None if job is not @@ -2771,7 +2771,7 @@ def referenced_tables(self): """Return referenced tables from job statistics, if present. See: - https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#statistics.query.referencedTables + https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobStatistics2.FIELDS.referenced_tables :rtype: list of dict :returns: mappings describing the query plan, or an empty list @@ -2800,7 +2800,7 @@ def undeclared_query_parameters(self): """Return undeclared query parameters from job statistics, if present. See: - https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#statistics.query.undeclaredQueryParameters + https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobStatistics2.FIELDS.undeclared_query_parameters :rtype: list of @@ -2832,7 +2832,7 @@ def estimated_bytes_processed(self): """Return the estimated number of bytes processed by the query. See: - https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#statistics.query.estimatedBytesProcessed + https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobStatistics2.FIELDS.estimated_bytes_processed :rtype: int or None :returns: number of DML rows affected by the job, or None if job is not @@ -3148,7 +3148,7 @@ class QueryPlanEntry(object): """QueryPlanEntry represents a single stage of a query execution plan. See - https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs + https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#ExplainQueryStage for the underlying API representation within query statistics. """ @@ -3387,7 +3387,7 @@ class TimelineEntry(object): point in time. See - https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs + https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#querytimelinesample for the underlying API representation within query statistics. """ diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/model.py b/packages/google-cloud-bigquery/google/cloud/bigquery/model.py index 9dd4e3627196..f0c3ee79f33e 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/model.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/model.py @@ -296,7 +296,7 @@ class ModelReference(object): """ModelReferences are pointers to models. See - https://cloud.google.com/bigquery/docs/reference/rest/v2/models + https://cloud.google.com/bigquery/docs/reference/rest/v2/models#modelreference """ def __init__(self): diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/query.py b/packages/google-cloud-bigquery/google/cloud/bigquery/query.py index a971ab8ab747..59fcd1a59c64 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/query.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/query.py @@ -473,7 +473,7 @@ def cache_hit(self): """Query results served from cache. See - https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs/query#cacheHit + https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs/query#body.QueryResponse.FIELDS.cache_hit Returns: Optional[bool]: @@ -487,7 +487,7 @@ def complete(self): """Server completed query. See - https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs/query#jobComplete + https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs/query#body.QueryResponse.FIELDS.job_complete Returns: Optional[bool]: @@ -501,7 +501,7 @@ def errors(self): """Errors generated by the query. See - https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs/query#errors + https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs/query#body.QueryResponse.FIELDS.errors Returns: Optional[List[Mapping]]: @@ -515,7 +515,7 @@ def job_id(self): """Job ID of the query job these results are from. See - https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs/query#jobReference + https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs/query#body.QueryResponse.FIELDS.job_reference Returns: str: Job ID of the query job. @@ -527,7 +527,7 @@ def page_token(self): """Token for fetching next bach of results. See - https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs/query#pageToken + https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs/query#body.QueryResponse.FIELDS.page_token Returns: Optional[str]: Token generated on the server (None until set by the server). @@ -539,7 +539,7 @@ def total_rows(self): """Total number of rows returned by the query. See - https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs/query#totalRows + https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs/query#body.QueryResponse.FIELDS.total_rows Returns: Optional[int}: Count generated on the server (None until set by the server). @@ -553,7 +553,7 @@ def total_bytes_processed(self): """Total number of bytes processed by the query. See - https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs/query#totalBytesProcessed + https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs/query#body.QueryResponse.FIELDS.total_bytes_processed Returns: Optional[int]: Count generated on the server (None until set by the server). @@ -567,7 +567,7 @@ def num_dml_affected_rows(self): """Total number of rows affected by a DML query. See - https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs/query#numDmlAffectedRows + https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs/query#body.QueryResponse.FIELDS.num_dml_affected_rows Returns: Optional[int]: Count generated on the server (None until set by the server). @@ -581,7 +581,7 @@ def rows(self): """Query results. See - https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs/query#rows + https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs/query#body.QueryResponse.FIELDS.rows Returns: Optional[List[google.cloud.bigquery.table.Row]]: @@ -594,7 +594,7 @@ def schema(self): """Schema for query results. See - https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs/query#schema + https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs/query#body.QueryResponse.FIELDS.schema Returns: Optional[List[SchemaField]]: diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/routine.py b/packages/google-cloud-bigquery/google/cloud/bigquery/routine.py index 14c7fcc103c2..044368e75108 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/routine.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/routine.py @@ -183,7 +183,7 @@ def return_type(self): time. See: - https://cloud.google.com/bigquery/docs/reference/rest/v2/routines#resource-routine + https://cloud.google.com/bigquery/docs/reference/rest/v2/routines#Routine.FIELDS.return_type """ resource = self._properties.get(self._PROPERTY_TO_API_FIELD["return_type"]) if not resource: @@ -277,7 +277,7 @@ class RoutineArgument(object): """Input/output argument of a function or a stored procedure. See - https://cloud.google.com/bigquery/docs/reference/rest/v2/routines + https://cloud.google.com/bigquery/docs/reference/rest/v2/routines#argument Args: ``**kwargs`` (Dict): @@ -317,7 +317,7 @@ def kind(self): ``ANY_TYPE``. See: - https://cloud.google.com/bigquery/docs/reference/rest/v2/routines#ArgumentKind + https://cloud.google.com/bigquery/docs/reference/rest/v2/routines#Argument.FIELDS.argument_kind """ return self._properties.get(self._PROPERTY_TO_API_FIELD["kind"]) @@ -340,7 +340,7 @@ def data_type(self): of a variable, e.g., a function argument. See: - https://cloud.google.com/bigquery/docs/reference/rest/v2/StandardSqlDataType + https://cloud.google.com/bigquery/docs/reference/rest/v2/routines#Argument.FIELDS.data_type """ resource = self._properties.get(self._PROPERTY_TO_API_FIELD["data_type"]) if not resource: @@ -400,7 +400,7 @@ class RoutineReference(object): """A pointer to a routine. See - https://cloud.google.com/bigquery/docs/reference/rest/v2/routines + https://cloud.google.com/bigquery/docs/reference/rest/v2/routines#routinereference """ def __init__(self): diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/schema.py b/packages/google-cloud-bigquery/google/cloud/bigquery/schema.py index 4cbdb83b2814..cb94133abdad 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/schema.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/schema.py @@ -51,10 +51,10 @@ class SchemaField(object): name (str): the name of the field. field_type (str): the type of the field. See - https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#schema.fields.type + https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#TableFieldSchema.FIELDS.type - mode (str): the mode of the field. See - https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#schema.fields.mode + mode (str): the mode of the field. See + https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#TableFieldSchema.FIELDS.mode description (Optional[str]): description for the field. @@ -103,7 +103,7 @@ def field_type(self): """str: The type of the field. See: - https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#schema.fields.type + https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#TableFieldSchema.FIELDS.type """ return self._field_type @@ -112,7 +112,7 @@ def mode(self): """str: The mode of the field. See: - https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#schema.fields.mode + https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#TableFieldSchema.FIELDS.mode """ return self._mode diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py index 9b8518c214ae..f3c0916811e7 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py @@ -184,7 +184,7 @@ class TableReference(object): """TableReferences are pointers to tables. See - https://cloud.google.com/bigquery/docs/reference/rest/v2/tables + https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#tablereference Args: dataset_ref (google.cloud.bigquery.dataset.DatasetReference): @@ -364,7 +364,7 @@ class Table(object): """Tables represent a set of rows whose values correspond to a schema. See - https://cloud.google.com/bigquery/docs/reference/rest/v2/tables + https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#resource-table Args: table_ref (Union[google.cloud.bigquery.table.TableReference, str]): From 24eb1dfbfebc4d4166268daa0e991f6de68c3263 Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Mon, 14 Oct 2019 06:27:48 -0700 Subject: [PATCH 0683/2016] test(bigquery): simplify scripting system test to reduce flakiness (#9458) No need to filter by time in the scripting system tests. Rather, we only care about filtering by parent job when testing those features. This addresses flakiness encountered: * https://source.cloud.google.com/results/invocations/a29867f9-6492-4477-85d4-d9de8d9ff85b/targets/cloud-devrel%2Fclient-libraries%2Fgoogle-cloud-python%2Fpresubmit%2Fbigquery/log * https://source.cloud.google.com/results/invocations/cdb77d4a-b0e3-4339-ab55-ae08fb2b297d/targets/cloud-devrel%2Fclient-libraries%2Fgoogle-cloud-python%2Fpresubmit%2Fbigquery/log --- .../google-cloud-bigquery/tests/system.py | 31 +++++-------------- 1 file changed, 7 insertions(+), 24 deletions(-) diff --git a/packages/google-cloud-bigquery/tests/system.py b/packages/google-cloud-bigquery/tests/system.py index ce34b0007101..09fa7f456214 100644 --- a/packages/google-cloud-bigquery/tests/system.py +++ b/packages/google-cloud-bigquery/tests/system.py @@ -432,7 +432,7 @@ def test_list_tables(self): self.assertGreater(len(list(iterator)), 0) def test_listing_scripting_jobs(self): - # run an SQL script + # Run a SQL script. sql_script = """ -- Declare a variable to hold names as an array. DECLARE top_names ARRAY; @@ -453,31 +453,14 @@ def test_listing_scripting_jobs(self): FROM `bigquery-public-data.samples.shakespeare` ); """ - test_start = datetime.datetime.utcnow() - query_job = Config.CLIENT.query(sql_script, project=Config.CLIENT.project) - query_job.result() - - # fetch jobs created by the SQL script, sort them into parent and - # child jobs - script_jobs = list(Config.CLIENT.list_jobs(min_creation_time=test_start)) - - parent_jobs = [] - child_jobs = [] + parent_job = Config.CLIENT.query(sql_script, project=Config.CLIENT.project) + parent_job.result() - for job in script_jobs: - if job.num_child_jobs > 0: - parent_jobs.append(job) - else: - child_jobs.append(job) - - assert len(parent_jobs) == 1 # also implying num_child_jobs > 0 - assert len(child_jobs) == parent_jobs[0].num_child_jobs + # Fetch jobs created by the SQL script. + child_jobs = list(Config.CLIENT.list_jobs(parent_job=parent_job)) - # fetch jobs using the parent job filter, verify that results are as expected - fetched_jobs = list(Config.CLIENT.list_jobs(parent_job=parent_jobs[0])) - assert sorted(job.job_id for job in fetched_jobs) == sorted( - job.job_id for job in child_jobs - ) + assert parent_job.num_child_jobs > 0 + assert len(child_jobs) == parent_job.num_child_jobs def test_update_table(self): dataset = self.temp_dataset(_make_dataset_id("update_table")) From 44afa50f88b122425478904c56af40e73b6c87f5 Mon Sep 17 00:00:00 2001 From: Leonid Emar-Kar <46078689+Emar-Kar@users.noreply.github.com> Date: Tue, 15 Oct 2019 00:37:44 -0700 Subject: [PATCH 0684/2016] refactor(bigquery): update code samples to use strings for table and dataset IDs (#9136) --- .../google-cloud-bigquery/docs/snippets.py | 170 ------------------ .../google-cloud-bigquery/docs/usage/jobs.rst | 11 +- .../docs/usage/queries.rst | 5 +- .../docs/usage/tables.rst | 13 +- .../samples/add_empty_column.py | 2 +- .../samples/browse_table_data.py | 2 +- .../samples/client_list_jobs.py | 50 ++++++ .../samples/client_query.py | 41 +++++ .../samples/copy_table.py | 39 ++++ .../samples/create_routine_ddl.py | 6 +- .../samples/create_table.py | 2 +- .../samples/delete_dataset.py | 2 +- .../samples/delete_dataset_labels.py | 2 +- .../samples/delete_table.py | 2 +- .../samples/get_dataset.py | 6 +- .../samples/get_dataset_labels.py | 2 +- .../samples/load_table_dataframe.py | 2 +- .../samples/table_exists.py | 29 +++ .../samples/table_insert_rows.py | 34 ++++ .../samples/tests/conftest.py | 6 +- .../samples/tests/test_client_list_jobs.py | 31 ++++ .../samples/tests/test_client_query.py | 24 +++ .../samples/tests/test_copy_table.py | 27 +++ .../samples/tests/test_create_dataset.py | 1 + .../samples/tests/test_create_job.py | 1 + .../samples/tests/test_create_routine_ddl.py | 1 + .../samples/tests/test_create_table.py | 1 + .../tests/test_dataset_label_samples.py | 1 + .../samples/tests/test_delete_dataset.py | 1 + .../samples/tests/test_delete_table.py | 1 + .../samples/tests/test_get_dataset.py | 1 + .../samples/tests/test_get_table.py | 2 + .../samples/tests/test_list_datasets.py | 1 + .../samples/tests/test_list_tables.py | 1 + .../samples/tests/test_model_samples.py | 1 + .../samples/tests/test_query_to_arrow.py | 1 + .../samples/tests/test_table_exists.py | 30 ++++ .../samples/tests/test_table_insert_rows.py | 33 ++++ .../tests/test_update_dataset_access.py | 1 + ...update_dataset_default_table_expiration.py | 1 + .../tests/test_update_dataset_description.py | 1 + ...update_dataset_default_table_expiration.py | 2 +- 42 files changed, 392 insertions(+), 198 deletions(-) create mode 100644 packages/google-cloud-bigquery/samples/client_list_jobs.py create mode 100644 packages/google-cloud-bigquery/samples/client_query.py create mode 100644 packages/google-cloud-bigquery/samples/copy_table.py create mode 100644 packages/google-cloud-bigquery/samples/table_exists.py create mode 100644 packages/google-cloud-bigquery/samples/table_insert_rows.py create mode 100644 packages/google-cloud-bigquery/samples/tests/test_client_list_jobs.py create mode 100644 packages/google-cloud-bigquery/samples/tests/test_client_query.py create mode 100644 packages/google-cloud-bigquery/samples/tests/test_copy_table.py create mode 100644 packages/google-cloud-bigquery/samples/tests/test_table_exists.py create mode 100644 packages/google-cloud-bigquery/samples/tests/test_table_insert_rows.py diff --git a/packages/google-cloud-bigquery/docs/snippets.py b/packages/google-cloud-bigquery/docs/snippets.py index 0104976a739a..f76c645660bb 100644 --- a/packages/google-cloud-bigquery/docs/snippets.py +++ b/packages/google-cloud-bigquery/docs/snippets.py @@ -13,11 +13,9 @@ # limitations under the License. """Testable usage examples for Google BigQuery API wrapper - Each example function takes a ``client`` argument (which must be an instance of :class:`google.cloud.bigquery.client.Client`) and uses it to perform a task with the API. - To facilitate running the examples as system tests, each example is also passed a ``to_delete`` list; the function adds to the list any objects created which need to be deleted during teardown. @@ -303,47 +301,6 @@ def test_load_and_query_partitioned_table(client, to_delete): assert len(rows) == 29 -# [START bigquery_table_exists] -def table_exists(client, table_reference): - """Return if a table exists. - - Args: - client (google.cloud.bigquery.client.Client): - A client to connect to the BigQuery API. - table_reference (google.cloud.bigquery.table.TableReference): - A reference to the table to look for. - - Returns: - bool: ``True`` if the table exists, ``False`` otherwise. - """ - from google.cloud.exceptions import NotFound - - try: - client.get_table(table_reference) - return True - except NotFound: - return False - - -# [END bigquery_table_exists] - - -def test_table_exists(client, to_delete): - """Determine if a table exists.""" - DATASET_ID = "get_table_dataset_{}".format(_millis()) - TABLE_ID = "get_table_table_{}".format(_millis()) - dataset = bigquery.Dataset(client.dataset(DATASET_ID)) - dataset = client.create_dataset(dataset) - to_delete.append(dataset) - - table_ref = dataset.table(TABLE_ID) - table = bigquery.Table(table_ref, schema=SCHEMA) - table = client.create_table(table) - - assert table_exists(client, table_ref) - assert not table_exists(client, dataset.table("i_dont_exist")) - - @pytest.mark.skip( reason=( "update_table() is flaky " @@ -698,36 +655,6 @@ def test_manage_views(client, to_delete): # [END bigquery_grant_view_access] -def test_table_insert_rows(client, to_delete): - """Insert / fetch table data.""" - dataset_id = "table_insert_rows_dataset_{}".format(_millis()) - table_id = "table_insert_rows_table_{}".format(_millis()) - dataset = bigquery.Dataset(client.dataset(dataset_id)) - dataset = client.create_dataset(dataset) - dataset.location = "US" - to_delete.append(dataset) - - table = bigquery.Table(dataset.table(table_id), schema=SCHEMA) - table = client.create_table(table) - - # [START bigquery_table_insert_rows] - # TODO(developer): Uncomment the lines below and replace with your values. - # from google.cloud import bigquery - # client = bigquery.Client() - # dataset_id = 'my_dataset' # replace with your dataset ID - # For this sample, the table must already exist and have a defined schema - # table_id = 'my_table' # replace with your table ID - # table_ref = client.dataset(dataset_id).table(table_id) - # table = client.get_table(table_ref) # API request - - rows_to_insert = [(u"Phred Phlyntstone", 32), (u"Wylma Phlyntstone", 29)] - - errors = client.insert_rows(table, rows_to_insert) # API request - - assert errors == [] - # [END bigquery_table_insert_rows] - - def test_load_table_from_file(client, to_delete): """Upload table data from a CSV file.""" dataset_id = "load_table_from_file_dataset_{}".format(_millis()) @@ -993,12 +920,10 @@ def test_load_table_from_uri_orc(client, to_delete, capsys): def test_load_table_from_uri_autodetect(client, to_delete, capsys): """Load table from a GCS URI using various formats and auto-detected schema - Each file format has its own tested load from URI sample. Because most of the code is common for autodetect, append, and truncate, this sample includes snippets for all supported formats but only calls a single load job. - This code snippet is made up of shared code, then format-specific code, followed by more shared code. Note that only the last format in the format-specific code section will be tested in this test. @@ -1058,12 +983,10 @@ def test_load_table_from_uri_autodetect(client, to_delete, capsys): def test_load_table_from_uri_truncate(client, to_delete, capsys): """Replaces table data with data from a GCS URI using various formats - Each file format has its own tested load from URI sample. Because most of the code is common for autodetect, append, and truncate, this sample includes snippets for all supported formats but only calls a single load job. - This code snippet is made up of shared code, then format-specific code, followed by more shared code. Note that only the last format in the format-specific code section will be tested in this test. @@ -1303,38 +1226,6 @@ def test_load_table_relax_column(client, to_delete): assert table.num_rows > 0 -def test_copy_table(client, to_delete): - dataset_id = "copy_table_dataset_{}".format(_millis()) - dest_dataset = bigquery.Dataset(client.dataset(dataset_id)) - dest_dataset.location = "US" - dest_dataset = client.create_dataset(dest_dataset) - to_delete.append(dest_dataset) - - # [START bigquery_copy_table] - # from google.cloud import bigquery - # client = bigquery.Client() - - source_dataset = client.dataset("samples", project="bigquery-public-data") - source_table_ref = source_dataset.table("shakespeare") - - # dataset_id = 'my_dataset' - dest_table_ref = client.dataset(dataset_id).table("destination_table") - - job = client.copy_table( - source_table_ref, - dest_table_ref, - # Location must match that of the source and destination tables. - location="US", - ) # API request - - job.result() # Waits for job to complete. - - assert job.state == "DONE" - dest_table = client.get_table(dest_table_ref) # API request - assert dest_table.num_rows > 0 - # [END bigquery_copy_table] - - def test_copy_table_multiple_source(client, to_delete): dest_dataset_id = "dest_dataset_{}".format(_millis()) dest_dataset = bigquery.Dataset(client.dataset(dest_dataset_id)) @@ -1601,31 +1492,6 @@ def test_undelete_table(client, to_delete): # [END bigquery_undelete_table] -def test_client_query(client): - """Run a simple query.""" - - # [START bigquery_query] - # from google.cloud import bigquery - # client = bigquery.Client() - - query = ( - "SELECT name FROM `bigquery-public-data.usa_names.usa_1910_2013` " - 'WHERE state = "TX" ' - "LIMIT 100" - ) - query_job = client.query( - query, - # Location must match that of the dataset(s) referenced in the query. - location="US", - ) # API request - starts the query - - for row in query_job: # API request - fetches results - # Row values can be accessed by field name or index - assert row[0] == row.name == row["name"] - print(row) - # [END bigquery_query] - - def test_client_query_legacy_sql(client): """Run a query with Legacy SQL explicitly set""" # [START bigquery_query_legacy] @@ -2360,42 +2226,6 @@ def test_ddl_create_view(client, to_delete, capsys): assert len(df) == 0 -def test_client_list_jobs(client): - """List jobs for a project.""" - - # [START bigquery_list_jobs] - # TODO(developer): Uncomment the lines below and replace with your values. - # from google.cloud import bigquery - # project = 'my_project' # replace with your project ID - # client = bigquery.Client(project=project) - import datetime - - # List the 10 most recent jobs in reverse chronological order. - # Omit the max_results parameter to list jobs from the past 6 months. - print("Last 10 jobs:") - for job in client.list_jobs(max_results=10): # API request(s) - print(job.job_id) - - # The following are examples of additional optional parameters: - - # Use min_creation_time and/or max_creation_time to specify a time window. - print("Jobs from the last ten minutes:") - ten_mins_ago = datetime.datetime.utcnow() - datetime.timedelta(minutes=10) - for job in client.list_jobs(min_creation_time=ten_mins_ago): - print(job.job_id) - - # Use all_users to include jobs run by all users in the project. - print("Last 10 jobs run by all users:") - for job in client.list_jobs(max_results=10, all_users=True): - print("{} run by user: {}".format(job.job_id, job.user_email)) - - # Use state_filter to filter by job state. - print("Jobs currently running:") - for job in client.list_jobs(state_filter="RUNNING"): - print(job.job_id) - # [END bigquery_list_jobs] - - @pytest.mark.skipif(pandas is None, reason="Requires `pandas`") def test_query_results_as_dataframe(client): # [START bigquery_query_results_dataframe] diff --git a/packages/google-cloud-bigquery/docs/usage/jobs.rst b/packages/google-cloud-bigquery/docs/usage/jobs.rst index 914d1d459ee7..c3dd71031bfc 100644 --- a/packages/google-cloud-bigquery/docs/usage/jobs.rst +++ b/packages/google-cloud-bigquery/docs/usage/jobs.rst @@ -1,9 +1,6 @@ Managing Jobs ~~~~~~~~~~~~~ -List jobs for a project -^^^^^^^^^^^^^^^^^^^^^^^ - Jobs describe actions performed on data in BigQuery tables: - Load data into a table @@ -11,7 +8,13 @@ Jobs describe actions performed on data in BigQuery tables: - Extract data from a table - Copy a table -.. literalinclude:: ../snippets.py +Listing jobs +^^^^^^^^^^^^ + +List jobs for a project with the +:func:`~google.cloud.bigquery.client.Client.list_jobs` method: + +.. literalinclude:: ../samples/client_list_jobs.py :language: python :dedent: 4 :start-after: [START bigquery_list_jobs] diff --git a/packages/google-cloud-bigquery/docs/usage/queries.rst b/packages/google-cloud-bigquery/docs/usage/queries.rst index fc77bb5b80cd..1f0720e47f1a 100644 --- a/packages/google-cloud-bigquery/docs/usage/queries.rst +++ b/packages/google-cloud-bigquery/docs/usage/queries.rst @@ -4,9 +4,10 @@ Running Queries Querying data ^^^^^^^^^^^^^ -Run a query and wait for it to finish: +Run a query and wait for it to finish with the +:func:`~google.cloud.bigquery.client.Client.query` method: -.. literalinclude:: ../snippets.py +.. literalinclude:: ../samples/client_query.py :language: python :dedent: 4 :start-after: [START bigquery_query] diff --git a/packages/google-cloud-bigquery/docs/usage/tables.rst b/packages/google-cloud-bigquery/docs/usage/tables.rst index 458c5b0009ba..6a6cbd356639 100644 --- a/packages/google-cloud-bigquery/docs/usage/tables.rst +++ b/packages/google-cloud-bigquery/docs/usage/tables.rst @@ -28,6 +28,15 @@ Get a table resource with the :start-after: [START bigquery_get_table] :end-before: [END bigquery_get_table] +Determine if a table exists with the +:func:`~google.cloud.bigquery.client.Client.get_table` method: + +.. literalinclude:: ../samples/table_exists.py + :language: python + :dedent: 4 + :start-after: [START bigquery_table_exists] + :end-before: [END bigquery_table_exists] + Browse data rows in a table with the :func:`~google.cloud.bigquery.client.Client.list_rows` method: @@ -107,7 +116,7 @@ Update a property in a table's metadata with the Insert rows into a table's data with the :func:`~google.cloud.bigquery.client.Client.insert_rows` method: -.. literalinclude:: ../snippets.py +.. literalinclude:: ../samples/table_insert_rows.py :language: python :dedent: 4 :start-after: [START bigquery_table_insert_rows] @@ -128,7 +137,7 @@ Copying a Table Copy a table with the :func:`~google.cloud.bigquery.client.Client.copy_table` method: -.. literalinclude:: ../snippets.py +.. literalinclude:: ../samples/copy_table.py :language: python :dedent: 4 :start-after: [START bigquery_copy_table] diff --git a/packages/google-cloud-bigquery/samples/add_empty_column.py b/packages/google-cloud-bigquery/samples/add_empty_column.py index 4f0b971e577a..bd531898eb29 100644 --- a/packages/google-cloud-bigquery/samples/add_empty_column.py +++ b/packages/google-cloud-bigquery/samples/add_empty_column.py @@ -28,7 +28,7 @@ def add_empty_column(client, table_id): table = client.get_table(table_id) # Make an API request. original_schema = table.schema - new_schema = original_schema[:] # creates a copy of the schema + new_schema = original_schema[:] # Creates a copy of the schema. new_schema.append(bigquery.SchemaField("phone", "STRING")) table.schema = new_schema diff --git a/packages/google-cloud-bigquery/samples/browse_table_data.py b/packages/google-cloud-bigquery/samples/browse_table_data.py index bba8dc434dd9..78d1d351a7a7 100644 --- a/packages/google-cloud-bigquery/samples/browse_table_data.py +++ b/packages/google-cloud-bigquery/samples/browse_table_data.py @@ -39,7 +39,7 @@ def browse_table_data(client, table_id): # Specify selected fields to limit the results to certain columns. table = client.get_table(table_id) # Make an API request. - fields = table.schema[:2] # first two columns + fields = table.schema[:2] # First two columns. rows_iter = client.list_rows(table_id, selected_fields=fields, max_results=10) rows = list(rows_iter) print("Selected {} columns from table {}.".format(len(rows_iter.schema), table_id)) diff --git a/packages/google-cloud-bigquery/samples/client_list_jobs.py b/packages/google-cloud-bigquery/samples/client_list_jobs.py new file mode 100644 index 000000000000..08eb4fbd99ef --- /dev/null +++ b/packages/google-cloud-bigquery/samples/client_list_jobs.py @@ -0,0 +1,50 @@ +# Copyright 2019 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +def client_list_jobs(client): + + # [START bigquery_list_jobs] + # TODO(developer): Import the client library. + # from google.cloud import bigquery + + import datetime + + # TODO(developer): Construct a BigQuery client object. + # client = bigquery.Client() + + # List the 10 most recent jobs in reverse chronological order. + # Omit the max_results parameter to list jobs from the past 6 months. + print("Last 10 jobs:") + for job in client.list_jobs(max_results=10): # API request(s) + print("{}".format(job.job_id)) + + # The following are examples of additional optional parameters: + + # Use min_creation_time and/or max_creation_time to specify a time window. + print("Jobs from the last ten minutes:") + ten_mins_ago = datetime.datetime.utcnow() - datetime.timedelta(minutes=10) + for job in client.list_jobs(min_creation_time=ten_mins_ago): + print("{}".format(job.job_id)) + + # Use all_users to include jobs run by all users in the project. + print("Last 10 jobs run by all users:") + for job in client.list_jobs(max_results=10, all_users=True): + print("{} run by user: {}".format(job.job_id, job.user_email)) + + # Use state_filter to filter by job state. + print("Last 10 jobs done:") + for job in client.list_jobs(max_results=10, state_filter="DONE"): + print("{}".format(job.job_id)) + # [END bigquery_list_jobs] diff --git a/packages/google-cloud-bigquery/samples/client_query.py b/packages/google-cloud-bigquery/samples/client_query.py new file mode 100644 index 000000000000..9dccfd38cbcf --- /dev/null +++ b/packages/google-cloud-bigquery/samples/client_query.py @@ -0,0 +1,41 @@ +# Copyright 2019 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +def client_query(client): + + # [START bigquery_query] + # TODO(developer): Import the client library. + # from google.cloud import bigquery + + # TODO(developer): Construct a BigQuery client object. + # client = bigquery.Client() + + query = """ + SELECT name, SUM(number) as total_people + FROM `bigquery-public-data.usa_names.usa_1910_2013` + WHERE state = 'TX' + GROUP BY name, state + ORDER BY total_people DESC + LIMIT 20 + """ + query_job = client.query( + query, location="US" # Must match the destination dataset(s) location. + ) # Make an API request. + + print("The query data:") + for row in query_job: + # Row values can be accessed by field name or index. + print("name={}, count={}".format(row[0], row["total_people"])) + # [END bigquery_query] diff --git a/packages/google-cloud-bigquery/samples/copy_table.py b/packages/google-cloud-bigquery/samples/copy_table.py new file mode 100644 index 000000000000..f6ebd91470eb --- /dev/null +++ b/packages/google-cloud-bigquery/samples/copy_table.py @@ -0,0 +1,39 @@ +# Copyright 2019 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +def copy_table(client, source_table_id, destination_table_id): + + # [START bigquery_copy_table] + # TODO(developer): Import the client library. + # from google.cloud import bigquery + + # TODO(developer): Construct a BigQuery client object. + # client = bigquery.Client() + + # TODO(developer): Set source_table_id to the ID of the original table. + # source_table_id = "your-project.source_dataset.source_table" + + # TODO(developer): Set destination_table_id to the ID of the destination table. + # destination_table_id = "your-project.destination_dataset.destination_table" + + job = client.copy_table( + source_table_id, + destination_table_id, + location="US", # Must match the source and destination tables location. + ) + job.result() # Waits for job to complete. + + print("A copy of the table created.") + # [END bigquery_copy_table] diff --git a/packages/google-cloud-bigquery/samples/create_routine_ddl.py b/packages/google-cloud-bigquery/samples/create_routine_ddl.py index 836e0cdde34a..eb5af0388503 100644 --- a/packages/google-cloud-bigquery/samples/create_routine_ddl.py +++ b/packages/google-cloud-bigquery/samples/create_routine_ddl.py @@ -34,12 +34,8 @@ def create_routine_ddl(client, routine_id): """.format( routine_id ) - - # Initiate the query to create the routine. query_job = client.query(sql) # Make an API request. - - # Wait for the query to complete. - query_job.result() # Waits for the job to complete. + query_job.result() # Wait for the job to complete. print("Created routine {}".format(query_job.ddl_target_routine)) # [END bigquery_create_routine_ddl] diff --git a/packages/google-cloud-bigquery/samples/create_table.py b/packages/google-cloud-bigquery/samples/create_table.py index b77812f7e0ce..ae26c57fed00 100644 --- a/packages/google-cloud-bigquery/samples/create_table.py +++ b/packages/google-cloud-bigquery/samples/create_table.py @@ -21,7 +21,7 @@ def create_table(client, table_id): # TODO(developer): Construct a BigQuery client object. # client = bigquery.Client() - # TODO(developer): Set table_id to the ID of the table to create + # TODO(developer): Set table_id to the ID of the table to create. # table_id = "your-project.your_dataset.your_table_name" schema = [ diff --git a/packages/google-cloud-bigquery/samples/delete_dataset.py b/packages/google-cloud-bigquery/samples/delete_dataset.py index 29302f099998..8ce95d953392 100644 --- a/packages/google-cloud-bigquery/samples/delete_dataset.py +++ b/packages/google-cloud-bigquery/samples/delete_dataset.py @@ -25,7 +25,7 @@ def delete_dataset(client, dataset_id): # TODO(developer): Set model_id to the ID of the model to fetch. # dataset_id = 'your-project.your_dataset' - # Use the delete_contents parameter to delete a dataset and its contents + # Use the delete_contents parameter to delete a dataset and its contents. # Use the not_found_ok parameter to not receive an error if the dataset has already been deleted. client.delete_dataset( dataset_id, delete_contents=True, not_found_ok=True diff --git a/packages/google-cloud-bigquery/samples/delete_dataset_labels.py b/packages/google-cloud-bigquery/samples/delete_dataset_labels.py index 425bc98dd96e..9e6493694ddc 100644 --- a/packages/google-cloud-bigquery/samples/delete_dataset_labels.py +++ b/packages/google-cloud-bigquery/samples/delete_dataset_labels.py @@ -27,7 +27,7 @@ def delete_dataset_labels(client, dataset_id): dataset = client.get_dataset(dataset_id) # Make an API request. - # To delete a label from a dataset, set its value to None + # To delete a label from a dataset, set its value to None. dataset.labels["color"] = None dataset = client.update_dataset(dataset, ["labels"]) # Make an API request. diff --git a/packages/google-cloud-bigquery/samples/delete_table.py b/packages/google-cloud-bigquery/samples/delete_table.py index 4c4377418556..b83a92890b09 100644 --- a/packages/google-cloud-bigquery/samples/delete_table.py +++ b/packages/google-cloud-bigquery/samples/delete_table.py @@ -26,7 +26,7 @@ def delete_table(client, table_id): # table_id = 'your-project.your_dataset.your_table' # If the table does not exist, delete_table raises - # google.api_core.exceptions.NotFound unless not_found_ok is True + # google.api_core.exceptions.NotFound unless not_found_ok is True. client.delete_table(table_id, not_found_ok=True) # Make an API request. print("Deleted table '{}'.".format(table_id)) # [END bigquery_delete_table] diff --git a/packages/google-cloud-bigquery/samples/get_dataset.py b/packages/google-cloud-bigquery/samples/get_dataset.py index cd35745c0dc5..bb3d4a0d4c40 100644 --- a/packages/google-cloud-bigquery/samples/get_dataset.py +++ b/packages/google-cloud-bigquery/samples/get_dataset.py @@ -35,7 +35,7 @@ def get_dataset(client, dataset_id): ) ) - # View dataset properties + # View dataset properties. print("Description: {}".format(dataset.description)) print("Labels:") labels = dataset.labels @@ -45,9 +45,9 @@ def get_dataset(client, dataset_id): else: print("\tDataset has no labels defined.") - # View tables in dataset + # View tables in dataset. print("Tables:") - tables = list(client.list_tables(dataset)) # API request(s) + tables = list(client.list_tables(dataset)) # Make an API request(s). if tables: for table in tables: print("\t{}".format(table.table_id)) diff --git a/packages/google-cloud-bigquery/samples/get_dataset_labels.py b/packages/google-cloud-bigquery/samples/get_dataset_labels.py index 46e38a3a9a56..411607f84664 100644 --- a/packages/google-cloud-bigquery/samples/get_dataset_labels.py +++ b/packages/google-cloud-bigquery/samples/get_dataset_labels.py @@ -27,7 +27,7 @@ def get_dataset_labels(client, dataset_id): dataset = client.get_dataset(dataset_id) # Make an API request. - # View dataset labels + # View dataset labels. print("Dataset ID: {}".format(dataset_id)) print("Labels:") if dataset.labels: diff --git a/packages/google-cloud-bigquery/samples/load_table_dataframe.py b/packages/google-cloud-bigquery/samples/load_table_dataframe.py index f08712d4dc32..ea6fe5d02384 100644 --- a/packages/google-cloud-bigquery/samples/load_table_dataframe.py +++ b/packages/google-cloud-bigquery/samples/load_table_dataframe.py @@ -66,7 +66,7 @@ def load_table_dataframe(client, table_id): job_config=job_config, location="US", # Must match the destination dataset location. ) # Make an API request. - job.result() # Waits for the job to complete. + job.result() # Wait for the job to complete. table = client.get_table(table_id) # Make an API request. print( diff --git a/packages/google-cloud-bigquery/samples/table_exists.py b/packages/google-cloud-bigquery/samples/table_exists.py new file mode 100644 index 000000000000..a011e6e2915d --- /dev/null +++ b/packages/google-cloud-bigquery/samples/table_exists.py @@ -0,0 +1,29 @@ +# Copyright 2019 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +def table_exists(client, table_id): + + # [START bigquery_table_exists] + from google.cloud.exceptions import NotFound + + # TODO(developer): Set table_id to the ID of the table to determine existence. + # table_id = "your-project.your_dataset.your_table" + + try: + client.get_table(table_id) # Make an API request. + print("Table {} already exists.".format(table_id)) + except NotFound: + print("Table {} is not found.".format(table_id)) + # [END bigquery_table_exists] diff --git a/packages/google-cloud-bigquery/samples/table_insert_rows.py b/packages/google-cloud-bigquery/samples/table_insert_rows.py new file mode 100644 index 000000000000..e2f949b635a6 --- /dev/null +++ b/packages/google-cloud-bigquery/samples/table_insert_rows.py @@ -0,0 +1,34 @@ +# Copyright 2019 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +def table_insert_rows(client, table_id): + + # [START bigquery_table_insert_rows] + # TODO(developer): Import the client library. + # from google.cloud import bigquery + + # TODO(developer): Construct a BigQuery client object. + # client = bigquery.Client() + + # TODO(developer): Set table_id to the ID of the model to fetch. + # table_id = "your-project.your_dataset.your_table" + + table = client.get_table(table_id) # Make an API request. + rows_to_insert = [(u"Phred Phlyntstone", 32), (u"Wylma Phlyntstone", 29)] + + errors = client.insert_rows(table, rows_to_insert) # Make an API request. + if errors == []: + print("New rows have been added.") + # [END bigquery_table_insert_rows] diff --git a/packages/google-cloud-bigquery/samples/tests/conftest.py b/packages/google-cloud-bigquery/samples/tests/conftest.py index f2bb93112a22..32b23931aa91 100644 --- a/packages/google-cloud-bigquery/samples/tests/conftest.py +++ b/packages/google-cloud-bigquery/samples/tests/conftest.py @@ -57,7 +57,7 @@ def random_routine_id(client, dataset_id): @pytest.fixture def dataset_id(client): now = datetime.datetime.now() - dataset_id = "python_samples_{}_{}".format( + dataset_id = "python_dataset_sample_{}_{}".format( now.strftime("%Y%m%d%H%M%S"), uuid.uuid4().hex[:8] ) dataset = client.create_dataset(dataset_id) @@ -68,7 +68,7 @@ def dataset_id(client): @pytest.fixture def table_id(client, dataset_id): now = datetime.datetime.now() - table_id = "python_samples_{}_{}".format( + table_id = "python_table_sample_{}_{}".format( now.strftime("%Y%m%d%H%M%S"), uuid.uuid4().hex[:8] ) @@ -86,7 +86,7 @@ def table_with_data_id(client): @pytest.fixture def routine_id(client, dataset_id): now = datetime.datetime.now() - routine_id = "python_samples_{}_{}".format( + routine_id = "python_routine_sample_{}_{}".format( now.strftime("%Y%m%d%H%M%S"), uuid.uuid4().hex[:8] ) diff --git a/packages/google-cloud-bigquery/samples/tests/test_client_list_jobs.py b/packages/google-cloud-bigquery/samples/tests/test_client_list_jobs.py new file mode 100644 index 000000000000..011e081fdee4 --- /dev/null +++ b/packages/google-cloud-bigquery/samples/tests/test_client_list_jobs.py @@ -0,0 +1,31 @@ +# Copyright 2019 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +from .. import client_list_jobs +from .. import create_job + + +def test_client_list_jobs(capsys, client): + + job = create_job.create_job(client) + client.cancel_job(job.job_id) + job.cancel() + client_list_jobs.client_list_jobs(client) + out, err = capsys.readouterr() + assert "Started job: {}".format(job.job_id) in out + assert "Last 10 jobs:" in out + assert "Jobs from the last ten minutes:" in out + assert "Last 10 jobs run by all users:" in out + assert "Last 10 jobs done:" in out diff --git a/packages/google-cloud-bigquery/samples/tests/test_client_query.py b/packages/google-cloud-bigquery/samples/tests/test_client_query.py new file mode 100644 index 000000000000..fd5b8e7edd97 --- /dev/null +++ b/packages/google-cloud-bigquery/samples/tests/test_client_query.py @@ -0,0 +1,24 @@ +# Copyright 2019 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +from .. import client_query + + +def test_client_query(capsys, client): + + client_query.client_query(client) + out, err = capsys.readouterr() + assert "The query data:" in out + assert "name=James, count=272793" in out diff --git a/packages/google-cloud-bigquery/samples/tests/test_copy_table.py b/packages/google-cloud-bigquery/samples/tests/test_copy_table.py new file mode 100644 index 000000000000..6d7de2d9132c --- /dev/null +++ b/packages/google-cloud-bigquery/samples/tests/test_copy_table.py @@ -0,0 +1,27 @@ +# Copyright 2019 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +from .. import copy_table + + +def test_copy_table(capsys, client, table_with_data_id, random_table_id): + + copy_table.copy_table(client, table_with_data_id, random_table_id) + out, err = capsys.readouterr() + assert "A copy of the table created." in out + assert ( + client.get_table(random_table_id).num_rows + == client.get_table(table_with_data_id).num_rows + ) diff --git a/packages/google-cloud-bigquery/samples/tests/test_create_dataset.py b/packages/google-cloud-bigquery/samples/tests/test_create_dataset.py index dfadc67d8468..e52e9ddfdced 100644 --- a/packages/google-cloud-bigquery/samples/tests/test_create_dataset.py +++ b/packages/google-cloud-bigquery/samples/tests/test_create_dataset.py @@ -12,6 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. + from .. import create_dataset diff --git a/packages/google-cloud-bigquery/samples/tests/test_create_job.py b/packages/google-cloud-bigquery/samples/tests/test_create_job.py index fce005ae8236..5ead51156606 100644 --- a/packages/google-cloud-bigquery/samples/tests/test_create_job.py +++ b/packages/google-cloud-bigquery/samples/tests/test_create_job.py @@ -12,6 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. + from .. import create_job diff --git a/packages/google-cloud-bigquery/samples/tests/test_create_routine_ddl.py b/packages/google-cloud-bigquery/samples/tests/test_create_routine_ddl.py index cecda2f654ec..bcb3249d26ef 100644 --- a/packages/google-cloud-bigquery/samples/tests/test_create_routine_ddl.py +++ b/packages/google-cloud-bigquery/samples/tests/test_create_routine_ddl.py @@ -12,6 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. + from google.cloud import bigquery from google.cloud import bigquery_v2 diff --git a/packages/google-cloud-bigquery/samples/tests/test_create_table.py b/packages/google-cloud-bigquery/samples/tests/test_create_table.py index 093ee6e94277..f9ebc0e5d70d 100644 --- a/packages/google-cloud-bigquery/samples/tests/test_create_table.py +++ b/packages/google-cloud-bigquery/samples/tests/test_create_table.py @@ -12,6 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. + from .. import create_table diff --git a/packages/google-cloud-bigquery/samples/tests/test_dataset_label_samples.py b/packages/google-cloud-bigquery/samples/tests/test_dataset_label_samples.py index 94a2092407b0..1e526f2339ac 100644 --- a/packages/google-cloud-bigquery/samples/tests/test_dataset_label_samples.py +++ b/packages/google-cloud-bigquery/samples/tests/test_dataset_label_samples.py @@ -12,6 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. + from .. import delete_dataset_labels from .. import get_dataset_labels from .. import label_dataset diff --git a/packages/google-cloud-bigquery/samples/tests/test_delete_dataset.py b/packages/google-cloud-bigquery/samples/tests/test_delete_dataset.py index 2b1b6ad06195..836b3aebb272 100644 --- a/packages/google-cloud-bigquery/samples/tests/test_delete_dataset.py +++ b/packages/google-cloud-bigquery/samples/tests/test_delete_dataset.py @@ -12,6 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. + from .. import delete_dataset diff --git a/packages/google-cloud-bigquery/samples/tests/test_delete_table.py b/packages/google-cloud-bigquery/samples/tests/test_delete_table.py index 8f4796623a83..f76ad8624cc6 100644 --- a/packages/google-cloud-bigquery/samples/tests/test_delete_table.py +++ b/packages/google-cloud-bigquery/samples/tests/test_delete_table.py @@ -12,6 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. + from .. import delete_table diff --git a/packages/google-cloud-bigquery/samples/tests/test_get_dataset.py b/packages/google-cloud-bigquery/samples/tests/test_get_dataset.py index dedec1d7b29e..8682be7ee3e9 100644 --- a/packages/google-cloud-bigquery/samples/tests/test_get_dataset.py +++ b/packages/google-cloud-bigquery/samples/tests/test_get_dataset.py @@ -12,6 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. + from .. import get_dataset diff --git a/packages/google-cloud-bigquery/samples/tests/test_get_table.py b/packages/google-cloud-bigquery/samples/tests/test_get_table.py index efbd464d54ad..8adaa6557954 100644 --- a/packages/google-cloud-bigquery/samples/tests/test_get_table.py +++ b/packages/google-cloud-bigquery/samples/tests/test_get_table.py @@ -12,7 +12,9 @@ # See the License for the specific language governing permissions and # limitations under the License. + from google.cloud import bigquery + from .. import get_table diff --git a/packages/google-cloud-bigquery/samples/tests/test_list_datasets.py b/packages/google-cloud-bigquery/samples/tests/test_list_datasets.py index 4c66a24f9b1a..d8c32e91ee20 100644 --- a/packages/google-cloud-bigquery/samples/tests/test_list_datasets.py +++ b/packages/google-cloud-bigquery/samples/tests/test_list_datasets.py @@ -12,6 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. + from .. import list_datasets diff --git a/packages/google-cloud-bigquery/samples/tests/test_list_tables.py b/packages/google-cloud-bigquery/samples/tests/test_list_tables.py index ec1621ac7579..61ac04ea26ce 100644 --- a/packages/google-cloud-bigquery/samples/tests/test_list_tables.py +++ b/packages/google-cloud-bigquery/samples/tests/test_list_tables.py @@ -12,6 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. + from .. import list_tables diff --git a/packages/google-cloud-bigquery/samples/tests/test_model_samples.py b/packages/google-cloud-bigquery/samples/tests/test_model_samples.py index d7b06a92a3e1..99d838533917 100644 --- a/packages/google-cloud-bigquery/samples/tests/test_model_samples.py +++ b/packages/google-cloud-bigquery/samples/tests/test_model_samples.py @@ -12,6 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. + from .. import delete_model from .. import get_model from .. import list_models diff --git a/packages/google-cloud-bigquery/samples/tests/test_query_to_arrow.py b/packages/google-cloud-bigquery/samples/tests/test_query_to_arrow.py index dd9b3ab508cc..2fbed807ece4 100644 --- a/packages/google-cloud-bigquery/samples/tests/test_query_to_arrow.py +++ b/packages/google-cloud-bigquery/samples/tests/test_query_to_arrow.py @@ -12,6 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. + import pyarrow from .. import query_to_arrow diff --git a/packages/google-cloud-bigquery/samples/tests/test_table_exists.py b/packages/google-cloud-bigquery/samples/tests/test_table_exists.py new file mode 100644 index 000000000000..232d77fbcb60 --- /dev/null +++ b/packages/google-cloud-bigquery/samples/tests/test_table_exists.py @@ -0,0 +1,30 @@ +# Copyright 2019 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +from google.cloud import bigquery + +from .. import table_exists + + +def test_table_exists(capsys, client, random_table_id): + + table_exists.table_exists(client, random_table_id) + out, err = capsys.readouterr() + assert "Table {} is not found.".format(random_table_id) in out + table = bigquery.Table(random_table_id) + table = client.create_table(table) + table_exists.table_exists(client, random_table_id) + out, err = capsys.readouterr() + assert "Table {} already exists.".format(random_table_id) in out diff --git a/packages/google-cloud-bigquery/samples/tests/test_table_insert_rows.py b/packages/google-cloud-bigquery/samples/tests/test_table_insert_rows.py new file mode 100644 index 000000000000..95d119dbdc93 --- /dev/null +++ b/packages/google-cloud-bigquery/samples/tests/test_table_insert_rows.py @@ -0,0 +1,33 @@ +# Copyright 2019 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +from google.cloud import bigquery + +from .. import table_insert_rows + + +def test_table_insert_rows(capsys, client, random_table_id): + + schema = [ + bigquery.SchemaField("full_name", "STRING", mode="REQUIRED"), + bigquery.SchemaField("age", "INTEGER", mode="REQUIRED"), + ] + + table = bigquery.Table(random_table_id, schema=schema) + table = client.create_table(table) + + table_insert_rows.table_insert_rows(client, random_table_id) + out, err = capsys.readouterr() + assert "New rows have been added." in out diff --git a/packages/google-cloud-bigquery/samples/tests/test_update_dataset_access.py b/packages/google-cloud-bigquery/samples/tests/test_update_dataset_access.py index ae33dbfe4a4c..679b700731e3 100644 --- a/packages/google-cloud-bigquery/samples/tests/test_update_dataset_access.py +++ b/packages/google-cloud-bigquery/samples/tests/test_update_dataset_access.py @@ -12,6 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. + from .. import update_dataset_access diff --git a/packages/google-cloud-bigquery/samples/tests/test_update_dataset_default_table_expiration.py b/packages/google-cloud-bigquery/samples/tests/test_update_dataset_default_table_expiration.py index 46e9654209ed..a97de11a2f1a 100644 --- a/packages/google-cloud-bigquery/samples/tests/test_update_dataset_default_table_expiration.py +++ b/packages/google-cloud-bigquery/samples/tests/test_update_dataset_default_table_expiration.py @@ -12,6 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. + from .. import update_dataset_default_table_expiration diff --git a/packages/google-cloud-bigquery/samples/tests/test_update_dataset_description.py b/packages/google-cloud-bigquery/samples/tests/test_update_dataset_description.py index c6f8889f50da..63826077b976 100644 --- a/packages/google-cloud-bigquery/samples/tests/test_update_dataset_description.py +++ b/packages/google-cloud-bigquery/samples/tests/test_update_dataset_description.py @@ -12,6 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. + from .. import update_dataset_description diff --git a/packages/google-cloud-bigquery/samples/update_dataset_default_table_expiration.py b/packages/google-cloud-bigquery/samples/update_dataset_default_table_expiration.py index 7b68ede8d2be..8de354b1f21b 100644 --- a/packages/google-cloud-bigquery/samples/update_dataset_default_table_expiration.py +++ b/packages/google-cloud-bigquery/samples/update_dataset_default_table_expiration.py @@ -26,7 +26,7 @@ def update_dataset_default_table_expiration(client, dataset_id): # dataset_id = 'your-project.your_dataset' dataset = client.get_dataset(dataset_id) # Make an API request. - dataset.default_table_expiration_ms = 24 * 60 * 60 * 1000 # in milliseconds. + dataset.default_table_expiration_ms = 24 * 60 * 60 * 1000 # In milliseconds. dataset = client.update_dataset( dataset, ["default_table_expiration_ms"] From c83f545572b32d07c76880c2d978d8855e88d37d Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Tue, 15 Oct 2019 15:40:10 -0700 Subject: [PATCH 0685/2016] feat(bigquery): add Dataset.default_partition_expiration_ms and Table.require_partition_filter properties (#9464) * feat(bigquery): add Dataset.default_partiion_expiration_ms and Table.require_partition_filter properties Samples double as system tests. I aim to use them here: * https://cloud.google.com/bigquery/docs/managing-partitioned-tables#require-filter * https://cloud.google.com/bigquery/docs/updating-datasets#partition-expiration Note: this also (silently) deprecates TimePartitioning.require_partition_filter, as that's duplicating the same functionality. I was curious why the expiration_ms wasn't also moving up, but then I realized that property only makes sense if a partition is assocatied with a timestamp. * add deprecation warning to require_partition_filter * blacken --- .../google/cloud/bigquery/dataset.py | 29 +++++ .../google/cloud/bigquery/table.py | 41 ++++++- ...te_dataset_default_partition_expiration.py | 31 ++++++ ...t_update_table_require_partition_filter.py | 33 ++++++ ...te_dataset_default_partition_expiration.py | 43 ++++++++ .../update_table_require_partition_filter.py | 41 +++++++ .../tests/unit/test_dataset.py | 8 ++ .../tests/unit/test_table.py | 100 +++++++++++------- 8 files changed, 284 insertions(+), 42 deletions(-) create mode 100644 packages/google-cloud-bigquery/samples/tests/test_update_dataset_default_partition_expiration.py create mode 100644 packages/google-cloud-bigquery/samples/tests/test_update_table_require_partition_filter.py create mode 100644 packages/google-cloud-bigquery/samples/update_dataset_default_partition_expiration.py create mode 100644 packages/google-cloud-bigquery/samples/update_table_require_partition_filter.py diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/dataset.py b/packages/google-cloud-bigquery/google/cloud/bigquery/dataset.py index 3b241dd7776d..ced80581a758 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/dataset.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/dataset.py @@ -358,6 +358,7 @@ class Dataset(object): _PROPERTY_TO_API_FIELD = { "access_entries": "access", "created": "creationTime", + "default_partition_expiration_ms": "defaultPartitionExpirationMs", "default_table_expiration_ms": "defaultTableExpirationMs", "friendly_name": "friendlyName", } @@ -460,6 +461,34 @@ def self_link(self): """ return self._properties.get("selfLink") + @property + def default_partition_expiration_ms(self): + """Optional[int]: The default partition expiration for all + partitioned tables in the dataset, in milliseconds. + + Once this property is set, all newly-created partitioned tables in + the dataset will have an ``time_paritioning.expiration_ms`` property + set to this value, and changing the value will only affect new + tables, not existing ones. The storage in a partition will have an + expiration time of its partition time plus this value. + + Setting this property overrides the use of + ``default_table_expiration_ms`` for partitioned tables: only one of + ``default_table_expiration_ms`` and + ``default_partition_expiration_ms`` will be used for any new + partitioned table. If you provide an explicit + ``time_partitioning.expiration_ms`` when creating or updating a + partitioned table, that value takes precedence over the default + partition expiration time indicated by this property. + """ + return _helpers._int_or_none( + self._properties.get("defaultPartitionExpirationMs") + ) + + @default_partition_expiration_ms.setter + def default_partition_expiration_ms(self, value): + self._properties["defaultPartitionExpirationMs"] = _helpers._str_or_none(value) + @property def default_table_expiration_ms(self): """Union[int, None]: Default expiration time for tables in the dataset diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py index f3c0916811e7..4373d99c590f 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py @@ -385,6 +385,7 @@ class Table(object): "view_query": "view", "external_data_configuration": "externalDataConfiguration", "encryption_configuration": "encryptionConfiguration", + "require_partition_filter": "requirePartitionFilter", } def __init__(self, table_ref, schema=None): @@ -420,6 +421,18 @@ def path(self): self.table_id, ) + @property + def require_partition_filter(self): + """bool: If set to true, queries over the partitioned table require a + partition filter that can be used for partition elimination to be + specified. + """ + return self._properties.get("requirePartitionFilter") + + @require_partition_filter.setter + def require_partition_filter(self, value): + self._properties["requirePartitionFilter"] = value + @property def schema(self): """List[google.cloud.bigquery.schema.SchemaField]: Table's schema. @@ -1722,9 +1735,9 @@ class TimePartitioning(object): Number of milliseconds for which to keep the storage for a partition. require_partition_filter (bool, optional): - If set to true, queries over the partitioned table require a - partition filter that can be used for partition elimination to be - specified. + DEPRECATED: Use + :attr:`~google.cloud.bigquery.table.Table.require_partition_filter`, + instead. """ def __init__( @@ -1777,11 +1790,33 @@ def expiration_ms(self, value): @property def require_partition_filter(self): """bool: Specifies whether partition filters are required for queries + + DEPRECATED: Use + :attr:`~google.cloud.bigquery.table.Table.require_partition_filter`, + instead. """ + warnings.warn( + ( + "TimePartitioning.require_partition_filter will be removed in " + "future versions. Please use Table.require_partition_filter " + "instead." + ), + PendingDeprecationWarning, + stacklevel=2, + ) return self._properties.get("requirePartitionFilter") @require_partition_filter.setter def require_partition_filter(self, value): + warnings.warn( + ( + "TimePartitioning.require_partition_filter will be removed in " + "future versions. Please use Table.require_partition_filter " + "instead." + ), + PendingDeprecationWarning, + stacklevel=2, + ) self._properties["requirePartitionFilter"] = value @classmethod diff --git a/packages/google-cloud-bigquery/samples/tests/test_update_dataset_default_partition_expiration.py b/packages/google-cloud-bigquery/samples/tests/test_update_dataset_default_partition_expiration.py new file mode 100644 index 000000000000..55fa4b0d96fb --- /dev/null +++ b/packages/google-cloud-bigquery/samples/tests/test_update_dataset_default_partition_expiration.py @@ -0,0 +1,31 @@ +# Copyright 2019 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from .. import update_dataset_default_partition_expiration + + +def test_update_dataset_default_partition_expiration(capsys, client, dataset_id): + + ninety_days_ms = 90 * 24 * 60 * 60 * 1000 # in milliseconds + + update_dataset_default_partition_expiration.update_dataset_default_partition_expiration( + client, dataset_id + ) + out, _ = capsys.readouterr() + assert ( + "Updated dataset {} with new default partition expiration {}".format( + dataset_id, ninety_days_ms + ) + in out + ) diff --git a/packages/google-cloud-bigquery/samples/tests/test_update_table_require_partition_filter.py b/packages/google-cloud-bigquery/samples/tests/test_update_table_require_partition_filter.py new file mode 100644 index 000000000000..1cbd2b2279b2 --- /dev/null +++ b/packages/google-cloud-bigquery/samples/tests/test_update_table_require_partition_filter.py @@ -0,0 +1,33 @@ +# Copyright 2019 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from google.cloud import bigquery +from .. import update_table_require_partition_filter + + +def test_update_table_require_partition_filter(capsys, client, random_table_id): + # Make a partitioned table. + schema = [bigquery.SchemaField("transaction_timestamp", "TIMESTAMP")] + table = bigquery.Table(random_table_id, schema) + table.time_partitioning = bigquery.TimePartitioning(field="transaction_timestamp") + table = client.create_table(table) + + update_table_require_partition_filter.update_table_require_partition_filter( + client, random_table_id + ) + out, _ = capsys.readouterr() + assert ( + "Updated table '{}' with require_partition_filter=True".format(random_table_id) + in out + ) diff --git a/packages/google-cloud-bigquery/samples/update_dataset_default_partition_expiration.py b/packages/google-cloud-bigquery/samples/update_dataset_default_partition_expiration.py new file mode 100644 index 000000000000..502d52ff199b --- /dev/null +++ b/packages/google-cloud-bigquery/samples/update_dataset_default_partition_expiration.py @@ -0,0 +1,43 @@ +# Copyright 2019 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +def update_dataset_default_partition_expiration(client, dataset_id): + + # [START bigquery_update_dataset_partition_expiration] + # TODO(developer): Import the client library. + # from google.cloud import bigquery + + # TODO(developer): Construct a BigQuery client object. + # client = bigquery.Client() + + # TODO(developer): Set dataset_id to the ID of the dataset to fetch. + # dataset_id = 'your-project.your_dataset' + + dataset = client.get_dataset(dataset_id) # Make an API request. + + # Set the default partition expiration (applies to new tables, only) in + # milliseconds. This example sets the default expiration to 90 days. + dataset.default_partition_expiration_ms = 90 * 24 * 60 * 60 * 1000 + + dataset = client.update_dataset( + dataset, ["default_partition_expiration_ms"] + ) # Make an API request. + + print( + "Updated dataset {}.{} with new default partition expiration {}".format( + dataset.project, dataset.dataset_id, dataset.default_partition_expiration_ms + ) + ) + # [END bigquery_update_dataset_partition_expiration] diff --git a/packages/google-cloud-bigquery/samples/update_table_require_partition_filter.py b/packages/google-cloud-bigquery/samples/update_table_require_partition_filter.py new file mode 100644 index 000000000000..4c6be2d2cedc --- /dev/null +++ b/packages/google-cloud-bigquery/samples/update_table_require_partition_filter.py @@ -0,0 +1,41 @@ +# Copyright 2019 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +def update_table_require_partition_filter(client, table_id): + + # [START bigquery_update_table_require_partition_filter] + # TODO(developer): Import the client library. + # from google.cloud import bigquery + + # TODO(developer): Construct a BigQuery client object. + # client = bigquery.Client() + + # TODO(developer): Set table_id to the ID of the model to fetch. + # table_id = 'your-project.your_dataset.your_table' + + table = client.get_table(table_id) # Make an API request. + table.require_partition_filter = True + table = client.update_table(table, ["require_partition_filter"]) + + # View table properties + print( + "Updated table '{}.{}.{}' with require_partition_filter={}.".format( + table.project, + table.dataset_id, + table.table_id, + table.require_partition_filter, + ) + ) + # [END bigquery_update_table_require_partition_filter] diff --git a/packages/google-cloud-bigquery/tests/unit/test_dataset.py b/packages/google-cloud-bigquery/tests/unit/test_dataset.py index 26b1729a240c..9b2276480843 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_dataset.py +++ b/packages/google-cloud-bigquery/tests/unit/test_dataset.py @@ -454,6 +454,14 @@ def test_access_entries_setter(self): dataset.access_entries = [phred, bharney] self.assertEqual(dataset.access_entries, [phred, bharney]) + def test_default_partition_expiration_ms(self): + dataset = self._make_one("proj.dset") + assert dataset.default_partition_expiration_ms is None + dataset.default_partition_expiration_ms = 12345 + assert dataset.default_partition_expiration_ms == 12345 + dataset.default_partition_expiration_ms = None + assert dataset.default_partition_expiration_ms is None + def test_default_table_expiration_ms_setter_bad_value(self): dataset = self._make_one(self.DS_REF) with self.assertRaises(ValueError): diff --git a/packages/google-cloud-bigquery/tests/unit/test_table.py b/packages/google-cloud-bigquery/tests/unit/test_table.py index 562bcf6b4e7d..68fc71db3f10 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_table.py +++ b/packages/google-cloud-bigquery/tests/unit/test_table.py @@ -928,6 +928,17 @@ def test__build_resource_w_custom_field_not_in__properties(self): with self.assertRaises(ValueError): table._build_resource(["bad"]) + def test_require_partitioning_filter(self): + table = self._make_one("proj.dset.tbl") + assert table.require_partition_filter is None + table.require_partition_filter = True + assert table.require_partition_filter + table.require_partition_filter = False + assert table.require_partition_filter is not None + assert not table.require_partition_filter + table.require_partition_filter = None + assert table.require_partition_filter is None + def test_time_partitioning_getter(self): from google.cloud.bigquery.table import TimePartitioning from google.cloud.bigquery.table import TimePartitioningType @@ -946,7 +957,12 @@ def test_time_partitioning_getter(self): self.assertEqual(table.time_partitioning.type_, TimePartitioningType.DAY) self.assertEqual(table.time_partitioning.field, "col1") self.assertEqual(table.time_partitioning.expiration_ms, 123456) - self.assertFalse(table.time_partitioning.require_partition_filter) + + with warnings.catch_warnings(record=True) as warned: + self.assertFalse(table.time_partitioning.require_partition_filter) + + assert len(warned) == 1 + self.assertIs(warned[0].category, PendingDeprecationWarning) def test_time_partitioning_getter_w_none(self): dataset = DatasetReference(self.PROJECT, self.DS_ID) @@ -974,7 +990,12 @@ def test_time_partitioning_getter_w_empty(self): self.assertIsNone(table.time_partitioning.type_) self.assertIsNone(table.time_partitioning.field) self.assertIsNone(table.time_partitioning.expiration_ms) - self.assertIsNone(table.time_partitioning.require_partition_filter) + + with warnings.catch_warnings(record=True) as warned: + self.assertIsNone(table.time_partitioning.require_partition_filter) + + for warning in warned: + self.assertIs(warning.category, PendingDeprecationWarning) def test_time_partitioning_setter(self): from google.cloud.bigquery.table import TimePartitioning @@ -2835,26 +2856,32 @@ def _make_one(self, *args, **kw): def test_constructor_defaults(self): time_partitioning = self._make_one() - self.assertEqual(time_partitioning.type_, "DAY") self.assertIsNone(time_partitioning.field) self.assertIsNone(time_partitioning.expiration_ms) - self.assertIsNone(time_partitioning.require_partition_filter) def test_constructor_explicit(self): from google.cloud.bigquery.table import TimePartitioningType time_partitioning = self._make_one( - type_=TimePartitioningType.DAY, - field="name", - expiration_ms=10000, - require_partition_filter=True, + type_=TimePartitioningType.DAY, field="name", expiration_ms=10000 ) self.assertEqual(time_partitioning.type_, "DAY") self.assertEqual(time_partitioning.field, "name") self.assertEqual(time_partitioning.expiration_ms, 10000) - self.assertTrue(time_partitioning.require_partition_filter) + + def test_require_partition_filter_warns_deprecation(self): + object_under_test = self._make_one() + + with warnings.catch_warnings(record=True) as warned: + assert object_under_test.require_partition_filter is None + object_under_test.require_partition_filter = True + assert object_under_test.require_partition_filter + + assert len(warned) == 3 + for warning in warned: + self.assertIs(warning.category, PendingDeprecationWarning) def test_from_api_repr_empty(self): klass = self._get_target_class() @@ -2868,7 +2895,6 @@ def test_from_api_repr_empty(self): self.assertIsNone(time_partitioning.type_) self.assertIsNone(time_partitioning.field) self.assertIsNone(time_partitioning.expiration_ms) - self.assertIsNone(time_partitioning.require_partition_filter) def test_from_api_repr_minimal(self): from google.cloud.bigquery.table import TimePartitioningType @@ -2880,7 +2906,6 @@ def test_from_api_repr_minimal(self): self.assertEqual(time_partitioning.type_, TimePartitioningType.DAY) self.assertIsNone(time_partitioning.field) self.assertIsNone(time_partitioning.expiration_ms) - self.assertIsNone(time_partitioning.require_partition_filter) def test_from_api_repr_doesnt_override_type(self): klass = self._get_target_class() @@ -2903,7 +2928,11 @@ def test_from_api_repr_explicit(self): self.assertEqual(time_partitioning.type_, TimePartitioningType.DAY) self.assertEqual(time_partitioning.field, "name") self.assertEqual(time_partitioning.expiration_ms, 10000) - self.assertTrue(time_partitioning.require_partition_filter) + + with warnings.catch_warnings(record=True) as warned: + self.assertTrue(time_partitioning.require_partition_filter) + + self.assertIs(warned[0].category, PendingDeprecationWarning) def test_to_api_repr_defaults(self): time_partitioning = self._make_one() @@ -2914,12 +2943,14 @@ def test_to_api_repr_explicit(self): from google.cloud.bigquery.table import TimePartitioningType time_partitioning = self._make_one( - type_=TimePartitioningType.DAY, - field="name", - expiration_ms=10000, - require_partition_filter=True, + type_=TimePartitioningType.DAY, field="name", expiration_ms=10000 ) + with warnings.catch_warnings(record=True) as warned: + time_partitioning.require_partition_filter = True + + self.assertIs(warned[0].category, PendingDeprecationWarning) + expected = { "type": "DAY", "field": "name", @@ -2950,21 +2981,21 @@ def test___eq___expiration_ms_mismatch(self): self.assertNotEqual(time_partitioning, other) def test___eq___require_partition_filter_mismatch(self): - time_partitioning = self._make_one( - field="foo", expiration_ms=100000, require_partition_filter=True - ) - other = self._make_one( - field="foo", expiration_ms=100000, require_partition_filter=False - ) + time_partitioning = self._make_one(field="foo", expiration_ms=100000) + other = self._make_one(field="foo", expiration_ms=100000) + with warnings.catch_warnings(record=True) as warned: + time_partitioning.require_partition_filter = True + other.require_partition_filter = False + + assert len(warned) == 2 + for warning in warned: + self.assertIs(warning.category, PendingDeprecationWarning) + self.assertNotEqual(time_partitioning, other) def test___eq___hit(self): - time_partitioning = self._make_one( - field="foo", expiration_ms=100000, require_partition_filter=True - ) - other = self._make_one( - field="foo", expiration_ms=100000, require_partition_filter=True - ) + time_partitioning = self._make_one(field="foo", expiration_ms=100000) + other = self._make_one(field="foo", expiration_ms=100000) self.assertEqual(time_partitioning, other) def test___ne___wrong_type(self): @@ -3008,18 +3039,9 @@ def test___repr___explicit(self): from google.cloud.bigquery.table import TimePartitioningType time_partitioning = self._make_one( - type_=TimePartitioningType.DAY, - field="name", - expiration_ms=10000, - require_partition_filter=True, - ) - expected = ( - "TimePartitioning(" - "expirationMs=10000," - "field=name," - "requirePartitionFilter=True," - "type=DAY)" + type_=TimePartitioningType.DAY, field="name", expiration_ms=10000 ) + expected = "TimePartitioning(" "expirationMs=10000," "field=name," "type=DAY)" self.assertEqual(repr(time_partitioning), expected) def test_set_expiration_w_none(self): From 36c08595d6a8508a6d347e55815fbc5fc0713ee4 Mon Sep 17 00:00:00 2001 From: HemangChothani <50404902+HemangChothani@users.noreply.github.com> Date: Wed, 16 Oct 2019 22:24:58 +0530 Subject: [PATCH 0686/2016] feat(bigquery): expose customer managed encryption key for ML models (#9302) * feat: expose customer managed encryption key for ML models * feat: add encryptionConfiguration in _PROPERTY_TO_API_FIELD * changes in condition * change in document and parameter * create a new file for class EncryptionConfiguration * feat(bigquery): refactor test class of encryption configuration and change location in key * feat(bigquery): add unit test in table class * feat(bigquery): add apache license in file and test to show previous location works --- .../google-cloud-bigquery/docs/reference.rst | 8 +- .../google/cloud/bigquery/__init__.py | 5 +- .../bigquery/encryption_configuration.py | 84 +++++++++++++ .../google/cloud/bigquery/job.py | 14 +-- .../google/cloud/bigquery/model.py | 26 ++++ .../google/cloud/bigquery/table.py | 70 +---------- .../tests/unit/model/test_model.py | 28 +++++ .../tests/unit/test_client.py | 6 +- .../unit/test_encryption_configuration.py | 111 ++++++++++++++++++ .../tests/unit/test_job.py | 18 ++- .../tests/unit/test_table.py | 80 +------------ 11 files changed, 291 insertions(+), 159 deletions(-) create mode 100644 packages/google-cloud-bigquery/google/cloud/bigquery/encryption_configuration.py create mode 100644 packages/google-cloud-bigquery/tests/unit/test_encryption_configuration.py diff --git a/packages/google-cloud-bigquery/docs/reference.rst b/packages/google-cloud-bigquery/docs/reference.rst index e01443808795..a0fc0e1ead70 100644 --- a/packages/google-cloud-bigquery/docs/reference.rst +++ b/packages/google-cloud-bigquery/docs/reference.rst @@ -88,7 +88,6 @@ Table table.TableReference table.Row table.RowIterator - table.EncryptionConfiguration table.TimePartitioning table.TimePartitioningType @@ -173,6 +172,13 @@ Enums enums.StandardSqlDataTypes +Encryption Configuration +======================== + +.. autosummary:: + :toctree: generated + + encryption_configuration.EncryptionConfiguration Additional Types ================ diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/__init__.py b/packages/google-cloud-bigquery/google/cloud/bigquery/__init__.py index bda8c5611435..da13375365e9 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/__init__.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/__init__.py @@ -73,12 +73,12 @@ from google.cloud.bigquery.routine import RoutineArgument from google.cloud.bigquery.routine import RoutineReference from google.cloud.bigquery.schema import SchemaField -from google.cloud.bigquery.table import EncryptionConfiguration from google.cloud.bigquery.table import Table from google.cloud.bigquery.table import TableReference from google.cloud.bigquery.table import Row from google.cloud.bigquery.table import TimePartitioningType from google.cloud.bigquery.table import TimePartitioning +from google.cloud.bigquery.encryption_configuration import EncryptionConfiguration __all__ = [ "__version__", @@ -94,7 +94,6 @@ "DatasetReference", "AccessEntry", # Tables - "EncryptionConfiguration", "Table", "TableReference", "Row", @@ -136,6 +135,8 @@ "StandardSqlDataTypes", "SourceFormat", "WriteDisposition", + # EncryptionConfiguration + "EncryptionConfiguration", ] diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/encryption_configuration.py b/packages/google-cloud-bigquery/google/cloud/bigquery/encryption_configuration.py new file mode 100644 index 000000000000..ba04ae2c45a7 --- /dev/null +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/encryption_configuration.py @@ -0,0 +1,84 @@ +# Copyright 2015 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Define class for the custom encryption configuration.""" + +import copy + + +class EncryptionConfiguration(object): + """Custom encryption configuration (e.g., Cloud KMS keys). + + Args: + kms_key_name (str): resource ID of Cloud KMS key used for encryption + """ + + def __init__(self, kms_key_name=None): + self._properties = {} + if kms_key_name is not None: + self._properties["kmsKeyName"] = kms_key_name + + @property + def kms_key_name(self): + """str: Resource ID of Cloud KMS key + + Resource ID of Cloud KMS key or :data:`None` if using default + encryption. + """ + return self._properties.get("kmsKeyName") + + @kms_key_name.setter + def kms_key_name(self, value): + self._properties["kmsKeyName"] = value + + @classmethod + def from_api_repr(cls, resource): + """Construct an encryption configuration from its API representation + + Args: + resource (Dict[str, object]): + An encryption configuration representation as returned from + the API. + + Returns: + google.cloud.bigquery.table.EncryptionConfiguration: + An encryption configuration parsed from ``resource``. + """ + config = cls() + config._properties = copy.deepcopy(resource) + return config + + def to_api_repr(self): + """Construct the API resource representation of this encryption + configuration. + + Returns: + Dict[str, object]: + Encryption configuration as represented as an API resource + """ + return copy.deepcopy(self._properties) + + def __eq__(self, other): + if not isinstance(other, EncryptionConfiguration): + return NotImplemented + return self.kms_key_name == other.kms_key_name + + def __ne__(self, other): + return not self == other + + def __hash__(self): + return hash(self.kms_key_name) + + def __repr__(self): + return "EncryptionConfiguration({})".format(self.kms_key_name) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/job.py b/packages/google-cloud-bigquery/google/cloud/bigquery/job.py index ed58d7b89185..a8e75835c6ea 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/job.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/job.py @@ -37,12 +37,12 @@ from google.cloud.bigquery.routine import RoutineReference from google.cloud.bigquery.schema import SchemaField from google.cloud.bigquery.table import _EmptyRowIterator -from google.cloud.bigquery.table import EncryptionConfiguration from google.cloud.bigquery.table import _table_arg_to_table_ref from google.cloud.bigquery.table import TableReference from google.cloud.bigquery.table import Table from google.cloud.bigquery.table import TimePartitioning from google.cloud.bigquery import _helpers +from google.cloud.bigquery.encryption_configuration import EncryptionConfiguration _DONE_STATE = "DONE" _STOPPED_REASON = "stopped" @@ -1040,7 +1040,7 @@ def create_disposition(self, value): @property def destination_encryption_configuration(self): - """google.cloud.bigquery.table.EncryptionConfiguration: Custom + """google.cloud.bigquery.encryption_configuration.EncryptionConfiguration: Custom encryption configuration for the destination table. Custom encryption configuration (e.g., Cloud KMS keys) or :data:`None` @@ -1434,7 +1434,7 @@ def schema(self): @property def destination_encryption_configuration(self): - """google.cloud.bigquery.table.EncryptionConfiguration: Custom + """google.cloud.bigquery.encryption_configuration.EncryptionConfiguration: Custom encryption configuration for the destination table. Custom encryption configuration (e.g., Cloud KMS keys) @@ -1638,7 +1638,7 @@ def write_disposition(self, value): @property def destination_encryption_configuration(self): - """google.cloud.bigquery.table.EncryptionConfiguration: Custom + """google.cloud.bigquery.encryption_configuration.EncryptionConfiguration: Custom encryption configuration for the destination table. Custom encryption configuration (e.g., Cloud KMS keys) or :data:`None` @@ -1709,7 +1709,7 @@ def write_disposition(self): @property def destination_encryption_configuration(self): - """google.cloud.bigquery.table.EncryptionConfiguration: Custom + """google.cloud.bigquery.encryption_configuration.EncryptionConfiguration: Custom encryption configuration for the destination table. Custom encryption configuration (e.g., Cloud KMS keys) or :data:`None` @@ -2041,7 +2041,7 @@ def __init__(self, **kwargs): @property def destination_encryption_configuration(self): - """google.cloud.bigquery.table.EncryptionConfiguration: Custom + """google.cloud.bigquery.encryption_configuration.EncryptionConfiguration: Custom encryption configuration for the destination table. Custom encryption configuration (e.g., Cloud KMS keys) or :data:`None` @@ -2460,7 +2460,7 @@ def destination(self): @property def destination_encryption_configuration(self): - """google.cloud.bigquery.table.EncryptionConfiguration: Custom + """google.cloud.bigquery.encryption_configuration.EncryptionConfiguration: Custom encryption configuration for the destination table. Custom encryption configuration (e.g., Cloud KMS keys) or :data:`None` diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/model.py b/packages/google-cloud-bigquery/google/cloud/bigquery/model.py index f0c3ee79f33e..7bad752ea658 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/model.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/model.py @@ -25,6 +25,7 @@ from google.api_core import datetime_helpers from google.cloud.bigquery import _helpers from google.cloud.bigquery_v2 import types +from google.cloud.bigquery.encryption_configuration import EncryptionConfiguration class Model(object): @@ -48,6 +49,7 @@ class Model(object): # have an exhaustive list of all mutable properties. "labels": "labels", "description": "description", + "encryption_configuration": "encryptionConfiguration", } def __init__(self, model_ref): @@ -253,6 +255,30 @@ def labels(self, value): value = {} self._properties["labels"] = value + @property + def encryption_configuration(self): + """google.cloud.bigquery.encryption_configuration.EncryptionConfiguration: Custom + encryption configuration for the model. + + Custom encryption configuration (e.g., Cloud KMS keys) or :data:`None` + if using default encryption. + + See `protecting data with Cloud KMS keys + `_ + in the BigQuery documentation. + """ + prop = self._properties.get("encryptionConfiguration") + if prop: + prop = EncryptionConfiguration.from_api_repr(prop) + return prop + + @encryption_configuration.setter + def encryption_configuration(self, value): + api_repr = value + if value: + api_repr = value.to_api_repr() + self._properties["encryptionConfiguration"] = api_repr + @classmethod def from_api_repr(cls, resource): """Factory: construct a model resource given its API representation diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py index 4373d99c590f..90cd5d96406a 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py @@ -55,6 +55,7 @@ from google.cloud.bigquery.schema import _build_schema_resource from google.cloud.bigquery.schema import _parse_schema_resource from google.cloud.bigquery.external_config import ExternalConfig +from google.cloud.bigquery.encryption_configuration import EncryptionConfiguration _LOGGER = logging.getLogger(__name__) @@ -113,73 +114,6 @@ def _view_use_legacy_sql_getter(table): return True -class EncryptionConfiguration(object): - """Custom encryption configuration (e.g., Cloud KMS keys). - - Args: - kms_key_name (str): resource ID of Cloud KMS key used for encryption - """ - - def __init__(self, kms_key_name=None): - self._properties = {} - if kms_key_name is not None: - self._properties["kmsKeyName"] = kms_key_name - - @property - def kms_key_name(self): - """str: Resource ID of Cloud KMS key - - Resource ID of Cloud KMS key or :data:`None` if using default - encryption. - """ - return self._properties.get("kmsKeyName") - - @kms_key_name.setter - def kms_key_name(self, value): - self._properties["kmsKeyName"] = value - - @classmethod - def from_api_repr(cls, resource): - """Construct an encryption configuration from its API representation - - Args: - resource (Dict[str, object]): - An encryption configuration representation as returned from - the API. - - Returns: - google.cloud.bigquery.table.EncryptionConfiguration: - An encryption configuration parsed from ``resource``. - """ - config = cls() - config._properties = copy.deepcopy(resource) - return config - - def to_api_repr(self): - """Construct the API resource representation of this encryption - configuration. - - Returns: - Dict[str, object]: - Encryption configuration as represented as an API resource - """ - return copy.deepcopy(self._properties) - - def __eq__(self, other): - if not isinstance(other, EncryptionConfiguration): - return NotImplemented - return self.kms_key_name == other.kms_key_name - - def __ne__(self, other): - return not self == other - - def __hash__(self): - return hash(self.kms_key_name) - - def __repr__(self): - return "EncryptionConfiguration({})".format(self.kms_key_name) - - class TableReference(object): """TableReferences are pointers to tables. @@ -479,7 +413,7 @@ def labels(self, value): @property def encryption_configuration(self): - """google.cloud.bigquery.table.EncryptionConfiguration: Custom + """google.cloud.bigquery.encryption_configuration.EncryptionConfiguration: Custom encryption configuration for the table. Custom encryption configuration (e.g., Cloud KMS keys) or :data:`None` diff --git a/packages/google-cloud-bigquery/tests/unit/model/test_model.py b/packages/google-cloud-bigquery/tests/unit/model/test_model.py index b6d9756e15fe..bbb93ef9e897 100644 --- a/packages/google-cloud-bigquery/tests/unit/model/test_model.py +++ b/packages/google-cloud-bigquery/tests/unit/model/test_model.py @@ -21,6 +21,8 @@ import google.cloud._helpers from google.cloud.bigquery_v2.gapic import enums +KMS_KEY_NAME = "projects/1/locations/us/keyRings/1/cryptoKeys/1" + @pytest.fixture def target_class(): @@ -99,6 +101,7 @@ def test_from_api_repr(target_class): }, ], "featureColumns": [], + "encryptionConfiguration": {"kmsKeyName": KMS_KEY_NAME}, } got = target_class.from_api_repr(resource) @@ -116,6 +119,7 @@ def test_from_api_repr(target_class): assert got.friendly_name == u"A friendly name." assert got.model_type == enums.Model.ModelType.LOGISTIC_REGRESSION assert got.labels == {"greeting": u"こんにちは"} + assert got.encryption_configuration.kms_key_name == KMS_KEY_NAME assert got.training_runs[0].training_options.initial_learn_rate == 1.0 assert ( got.training_runs[0] @@ -160,6 +164,7 @@ def test_from_api_repr_w_minimal_resource(target_class): assert got.friendly_name is None assert got.model_type == enums.Model.ModelType.MODEL_TYPE_UNSPECIFIED assert got.labels == {} + assert got.encryption_configuration is None assert len(got.training_runs) == 0 assert len(got.feature_columns) == 0 assert len(got.label_columns) == 0 @@ -229,6 +234,17 @@ def test_from_api_repr_w_unknown_fields(target_class): ["labels"], {"labels": {"a-label": "a-value"}}, ), + ( + { + "friendlyName": "hello", + "description": "world", + "expirationTime": None, + "labels": {"a-label": "a-value"}, + "encryptionConfiguration": {"kmsKeyName": KMS_KEY_NAME}, + }, + ["encryptionConfiguration"], + {"encryptionConfiguration": {"kmsKeyName": KMS_KEY_NAME}}, + ), ], ) def test_build_resource(object_under_test, resource, filter_fields, expected): @@ -283,6 +299,18 @@ def test_replace_labels(object_under_test): assert object_under_test.labels == {} +def test_set_encryption_configuration(object_under_test): + from google.cloud.bigquery.encryption_configuration import EncryptionConfiguration + + assert not object_under_test.encryption_configuration + object_under_test.encryption_configuration = EncryptionConfiguration( + kms_key_name=KMS_KEY_NAME + ) + assert object_under_test.encryption_configuration.kms_key_name == KMS_KEY_NAME + object_under_test.encryption_configuration = None + assert not object_under_test.encryption_configuration + + def test_repr(target_class): model = target_class("my-proj.my_dset.my_model") got = repr(model) diff --git a/packages/google-cloud-bigquery/tests/unit/test_client.py b/packages/google-cloud-bigquery/tests/unit/test_client.py index db70eaa861ee..b8a367e17cb9 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_client.py +++ b/packages/google-cloud-bigquery/tests/unit/test_client.py @@ -81,7 +81,7 @@ class TestClient(unittest.TestCase): TABLE_ID = "TABLE_ID" MODEL_ID = "MODEL_ID" TABLE_REF = DatasetReference(PROJECT, DS_ID).table(TABLE_ID) - KMS_KEY_NAME = "projects/1/locations/global/keyRings/1/cryptoKeys/1" + KMS_KEY_NAME = "projects/1/locations/us/keyRings/1/cryptoKeys/1" LOCATION = "us-central" @staticmethod @@ -1074,7 +1074,9 @@ def test_create_table_w_custom_property(self): self.assertEqual(got.table_id, self.TABLE_ID) def test_create_table_w_encryption_configuration(self): - from google.cloud.bigquery.table import EncryptionConfiguration + from google.cloud.bigquery.encryption_configuration import ( + EncryptionConfiguration, + ) from google.cloud.bigquery.table import Table path = "projects/%s/datasets/%s/tables" % (self.PROJECT, self.DS_ID) diff --git a/packages/google-cloud-bigquery/tests/unit/test_encryption_configuration.py b/packages/google-cloud-bigquery/tests/unit/test_encryption_configuration.py new file mode 100644 index 000000000000..f432a903b4cc --- /dev/null +++ b/packages/google-cloud-bigquery/tests/unit/test_encryption_configuration.py @@ -0,0 +1,111 @@ +# Copyright 2015 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import unittest +import mock + + +class TestEncryptionConfiguration(unittest.TestCase): + KMS_KEY_NAME = "projects/1/locations/us/keyRings/1/cryptoKeys/1" + + @staticmethod + def _get_target_class(): + from google.cloud.bigquery.encryption_configuration import ( + EncryptionConfiguration, + ) + + return EncryptionConfiguration + + def _make_one(self, *args, **kw): + return self._get_target_class()(*args, **kw) + + def test_ctor_defaults(self): + encryption_config = self._make_one() + self.assertIsNone(encryption_config.kms_key_name) + + def test_ctor_with_key(self): + encryption_config = self._make_one(kms_key_name=self.KMS_KEY_NAME) + self.assertEqual(encryption_config.kms_key_name, self.KMS_KEY_NAME) + + def test_kms_key_name_setter(self): + encryption_config = self._make_one() + self.assertIsNone(encryption_config.kms_key_name) + encryption_config.kms_key_name = self.KMS_KEY_NAME + self.assertEqual(encryption_config.kms_key_name, self.KMS_KEY_NAME) + encryption_config.kms_key_name = None + self.assertIsNone(encryption_config.kms_key_name) + + def test_from_api_repr(self): + RESOURCE = {"kmsKeyName": self.KMS_KEY_NAME} + klass = self._get_target_class() + encryption_config = klass.from_api_repr(RESOURCE) + self.assertEqual(encryption_config.kms_key_name, self.KMS_KEY_NAME) + + def test_to_api_repr(self): + encryption_config = self._make_one(kms_key_name=self.KMS_KEY_NAME) + resource = encryption_config.to_api_repr() + self.assertEqual(resource, {"kmsKeyName": self.KMS_KEY_NAME}) + + def test___eq___wrong_type(self): + encryption_config = self._make_one() + other = object() + self.assertNotEqual(encryption_config, other) + self.assertEqual(encryption_config, mock.ANY) + + def test___eq___kms_key_name_mismatch(self): + encryption_config = self._make_one() + other = self._make_one(self.KMS_KEY_NAME) + self.assertNotEqual(encryption_config, other) + + def test___eq___hit(self): + encryption_config = self._make_one(self.KMS_KEY_NAME) + other = self._make_one(self.KMS_KEY_NAME) + self.assertEqual(encryption_config, other) + + def test___ne___wrong_type(self): + encryption_config = self._make_one() + other = object() + self.assertNotEqual(encryption_config, other) + self.assertEqual(encryption_config, mock.ANY) + + def test___ne___same_value(self): + encryption_config1 = self._make_one(self.KMS_KEY_NAME) + encryption_config2 = self._make_one(self.KMS_KEY_NAME) + # unittest ``assertEqual`` uses ``==`` not ``!=``. + comparison_val = encryption_config1 != encryption_config2 + self.assertFalse(comparison_val) + + def test___ne___different_values(self): + encryption_config1 = self._make_one() + encryption_config2 = self._make_one(self.KMS_KEY_NAME) + self.assertNotEqual(encryption_config1, encryption_config2) + + def test___hash__set_equality(self): + encryption_config1 = self._make_one(self.KMS_KEY_NAME) + encryption_config2 = self._make_one(self.KMS_KEY_NAME) + set_one = {encryption_config1, encryption_config2} + set_two = {encryption_config1, encryption_config2} + self.assertEqual(set_one, set_two) + + def test___hash__not_equals(self): + encryption_config1 = self._make_one() + encryption_config2 = self._make_one(self.KMS_KEY_NAME) + set_one = {encryption_config1} + set_two = {encryption_config2} + self.assertNotEqual(set_one, set_two) + + def test___repr__(self): + encryption_config = self._make_one(self.KMS_KEY_NAME) + expected = "EncryptionConfiguration({})".format(self.KMS_KEY_NAME) + self.assertEqual(repr(encryption_config), expected) diff --git a/packages/google-cloud-bigquery/tests/unit/test_job.py b/packages/google-cloud-bigquery/tests/unit/test_job.py index 9710085105c4..16964722ec2e 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_job.py +++ b/packages/google-cloud-bigquery/tests/unit/test_job.py @@ -1077,7 +1077,7 @@ class _Base(object): TABLE_ID = "table_id" TABLE_REF = TableReference(DS_REF, TABLE_ID) JOB_ID = "JOB_ID" - KMS_KEY_NAME = "projects/1/locations/global/keyRings/1/cryptoKeys/1" + KMS_KEY_NAME = "projects/1/locations/us/keyRings/1/cryptoKeys/1" def _make_one(self, *args, **kw): return self._get_target_class()(*args, **kw) @@ -1276,7 +1276,9 @@ def test_destination_encryption_configuration_missing(self): self.assertIsNone(config.destination_encryption_configuration) def test_destination_encryption_configuration_hit(self): - from google.cloud.bigquery.table import EncryptionConfiguration + from google.cloud.bigquery.encryption_configuration import ( + EncryptionConfiguration, + ) kms_key_name = "kms-key-name" encryption_configuration = EncryptionConfiguration(kms_key_name) @@ -1289,7 +1291,9 @@ def test_destination_encryption_configuration_hit(self): ) def test_destination_encryption_configuration_setter(self): - from google.cloud.bigquery.table import EncryptionConfiguration + from google.cloud.bigquery.encryption_configuration import ( + EncryptionConfiguration, + ) kms_key_name = "kms-key-name" encryption_configuration = EncryptionConfiguration(kms_key_name) @@ -2486,7 +2490,9 @@ def test_ctor_w_properties(self): self.assertEqual(config.write_disposition, write_disposition) def test_to_api_repr_with_encryption(self): - from google.cloud.bigquery.table import EncryptionConfiguration + from google.cloud.bigquery.encryption_configuration import ( + EncryptionConfiguration, + ) config = self._make_one() config.destination_encryption_configuration = EncryptionConfiguration( @@ -3411,7 +3417,9 @@ def test_to_api_repr_normal(self): self.assertEqual(resource["someNewProperty"], "Woohoo, alpha stuff.") def test_to_api_repr_with_encryption(self): - from google.cloud.bigquery.table import EncryptionConfiguration + from google.cloud.bigquery.encryption_configuration import ( + EncryptionConfiguration, + ) config = self._make_one() config.destination_encryption_configuration = EncryptionConfiguration( diff --git a/packages/google-cloud-bigquery/tests/unit/test_table.py b/packages/google-cloud-bigquery/tests/unit/test_table.py index 68fc71db3f10..dc2162d35fc9 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_table.py +++ b/packages/google-cloud-bigquery/tests/unit/test_table.py @@ -71,7 +71,7 @@ def _verifySchema(self, schema, resource): class TestEncryptionConfiguration(unittest.TestCase): - KMS_KEY_NAME = "projects/1/locations/global/keyRings/1/cryptoKeys/1" + KMS_KEY_NAME = "projects/1/locations/us/keyRings/1/cryptoKeys/1" @staticmethod def _get_target_class(): @@ -90,78 +90,6 @@ def test_ctor_with_key(self): encryption_config = self._make_one(kms_key_name=self.KMS_KEY_NAME) self.assertEqual(encryption_config.kms_key_name, self.KMS_KEY_NAME) - def test_kms_key_name_setter(self): - encryption_config = self._make_one() - self.assertIsNone(encryption_config.kms_key_name) - encryption_config.kms_key_name = self.KMS_KEY_NAME - self.assertEqual(encryption_config.kms_key_name, self.KMS_KEY_NAME) - encryption_config.kms_key_name = None - self.assertIsNone(encryption_config.kms_key_name) - - def test_from_api_repr(self): - RESOURCE = {"kmsKeyName": self.KMS_KEY_NAME} - klass = self._get_target_class() - encryption_config = klass.from_api_repr(RESOURCE) - self.assertEqual(encryption_config.kms_key_name, self.KMS_KEY_NAME) - - def test_to_api_repr(self): - encryption_config = self._make_one(kms_key_name=self.KMS_KEY_NAME) - resource = encryption_config.to_api_repr() - self.assertEqual(resource, {"kmsKeyName": self.KMS_KEY_NAME}) - - def test___eq___wrong_type(self): - encryption_config = self._make_one() - other = object() - self.assertNotEqual(encryption_config, other) - self.assertEqual(encryption_config, mock.ANY) - - def test___eq___kms_key_name_mismatch(self): - encryption_config = self._make_one() - other = self._make_one(self.KMS_KEY_NAME) - self.assertNotEqual(encryption_config, other) - - def test___eq___hit(self): - encryption_config = self._make_one(self.KMS_KEY_NAME) - other = self._make_one(self.KMS_KEY_NAME) - self.assertEqual(encryption_config, other) - - def test___ne___wrong_type(self): - encryption_config = self._make_one() - other = object() - self.assertNotEqual(encryption_config, other) - self.assertEqual(encryption_config, mock.ANY) - - def test___ne___same_value(self): - encryption_config1 = self._make_one(self.KMS_KEY_NAME) - encryption_config2 = self._make_one(self.KMS_KEY_NAME) - # unittest ``assertEqual`` uses ``==`` not ``!=``. - comparison_val = encryption_config1 != encryption_config2 - self.assertFalse(comparison_val) - - def test___ne___different_values(self): - encryption_config1 = self._make_one() - encryption_config2 = self._make_one(self.KMS_KEY_NAME) - self.assertNotEqual(encryption_config1, encryption_config2) - - def test___hash__set_equality(self): - encryption_config1 = self._make_one(self.KMS_KEY_NAME) - encryption_config2 = self._make_one(self.KMS_KEY_NAME) - set_one = {encryption_config1, encryption_config2} - set_two = {encryption_config1, encryption_config2} - self.assertEqual(set_one, set_two) - - def test___hash__not_equals(self): - encryption_config1 = self._make_one() - encryption_config2 = self._make_one(self.KMS_KEY_NAME) - set_one = {encryption_config1} - set_two = {encryption_config2} - self.assertNotEqual(set_one, set_two) - - def test___repr__(self): - encryption_config = self._make_one(self.KMS_KEY_NAME) - expected = "EncryptionConfiguration({})".format(self.KMS_KEY_NAME) - self.assertEqual(repr(encryption_config), expected) - class TestTableReference(unittest.TestCase): @staticmethod @@ -339,7 +267,7 @@ class TestTable(unittest.TestCase, _SchemaBase): PROJECT = "prahj-ekt" DS_ID = "dataset-name" TABLE_NAME = "table-name" - KMS_KEY_NAME = "projects/1/locations/global/keyRings/1/cryptoKeys/1" + KMS_KEY_NAME = "projects/1/locations/us/keyRings/1/cryptoKeys/1" @staticmethod def _get_target_class(): @@ -1139,6 +1067,10 @@ def test_clustering_fields_setter_w_none_noop(self): self.assertFalse("clustering" in table._properties) def test_encryption_configuration_setter(self): + # Previously, the EncryptionConfiguration class was in the table module, not the + # encryption_configuration module. It was moved to support models encryption. + # This test import from the table module to ensure that the previous location + # continues to function as an alias. from google.cloud.bigquery.table import EncryptionConfiguration dataset = DatasetReference(self.PROJECT, self.DS_ID) From 856890d95d861a2f2632edaa02ed61fffc191db2 Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Thu, 17 Oct 2019 12:30:42 -0700 Subject: [PATCH 0687/2016] chore(bigquery): release 1.21.0 (#9484) --- packages/google-cloud-bigquery/CHANGELOG.md | 33 +++++++++++++++++++++ packages/google-cloud-bigquery/setup.py | 2 +- 2 files changed, 34 insertions(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/CHANGELOG.md b/packages/google-cloud-bigquery/CHANGELOG.md index 9170d004ecc7..1560e456a24e 100644 --- a/packages/google-cloud-bigquery/CHANGELOG.md +++ b/packages/google-cloud-bigquery/CHANGELOG.md @@ -4,6 +4,39 @@ [1]: https://pypi.org/project/google-cloud-bigquery/#history +## 1.21.0 + +10-16-2019 10:33 PDT + + +### New Features + +- add ability to pass in a table ID instead of a query to the `%%bigquery` magic ([#9170](https://github.com/googleapis/google-cloud-python/pull/9170)) +- add support for custom `QueryJobConfig` in `BigQuery.cursor.execute` method ([#9278](https://github.com/googleapis/google-cloud-python/pull/9278)) +- store `QueryJob` to destination var on error in `%%bigquery` magic ([#9245](https://github.com/googleapis/google-cloud-python/pull/9245)) +- add script statistics to job resource ([#9428](https://github.com/googleapis/google-cloud-python/pull/9428)) +- add support for sheets ranges ([#9416](https://github.com/googleapis/google-cloud-python/pull/9416)) +- add support for listing jobs by parent job ([#9225](https://github.com/googleapis/google-cloud-python/pull/9225)) +- expose customer managed encryption key for ML models ([#9302](https://github.com/googleapis/google-cloud-python/pull/9302)) +- add `Dataset.default_partition_expiration_ms` and `Table.require_partition_filter` properties ([#9464](https://github.com/googleapis/google-cloud-python/pull/9464)) + +### Dependencies + +- restrict version range of `google-resumable-media` ([#9243](https://github.com/googleapis/google-cloud-python/pull/9243)) + +### Documentation + +- document how to load data as JSON string ([#9231](https://github.com/googleapis/google-cloud-python/pull/9231)) +- standardize comments and formatting in existing code samples ([#9212](https://github.com/googleapis/google-cloud-python/pull/9212)) +- rewrite docstrings in Google style ([#9326](https://github.com/googleapis/google-cloud-python/pull/9326)) +- fix incorrect links to REST API in reference docs ([#9436](https://github.com/googleapis/google-cloud-python/pull/9436)) + +### Internal / Testing Changes + +- add code samples to lint check ([#9277](https://github.com/googleapis/google-cloud-python/pull/9277)) +- update code samples to use strings for table and dataset IDs ([#9136](https://github.com/googleapis/google-cloud-python/pull/9136)) +- simplify scripting system test to reduce flakiness ([#9458](https://github.com/googleapis/google-cloud-python/pull/9458)) + ## 1.20.0 09-13-2019 11:22 PDT diff --git a/packages/google-cloud-bigquery/setup.py b/packages/google-cloud-bigquery/setup.py index 897a7eac7f36..e0f3edf19d45 100644 --- a/packages/google-cloud-bigquery/setup.py +++ b/packages/google-cloud-bigquery/setup.py @@ -22,7 +22,7 @@ name = "google-cloud-bigquery" description = "Google BigQuery API client library" -version = "1.20.0" +version = "1.21.0" # Should be one of: # 'Development Status :: 3 - Alpha' # 'Development Status :: 4 - Beta' From b75600e795abb2915893908ed7f3f1461816deea Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Thu, 17 Oct 2019 17:01:03 -0700 Subject: [PATCH 0688/2016] feat(bigquery): add range partitioning to tables, load jobs, and query jobs (#9477) * feat(bigquery): add range partitioning to tables, load jobs, and query jobs These classes and properties add support for the integer range partitioning feature. These offer more flexibility in partitioning options than time-based partitioning. * Add integer range partitioning classes to bigquery module * Rename PartitionRange to RangeDefinition * Revert "Rename PartitionRange to RangeDefinition" This reverts commit 9bb5d8beb95eb394de6b84c1e9409cf9b3463bad. * Add Beta disclaimer to range partitioning features. --- .../google-cloud-bigquery/docs/reference.rst | 6 +- .../google/cloud/bigquery/__init__.py | 11 +- .../google/cloud/bigquery/job.py | 105 +++++++++- .../google/cloud/bigquery/table.py | 185 +++++++++++++++++- .../tests/unit/test_job.py | 78 ++++++++ .../tests/unit/test_table.py | 113 +++++++++++ 6 files changed, 487 insertions(+), 11 deletions(-) diff --git a/packages/google-cloud-bigquery/docs/reference.rst b/packages/google-cloud-bigquery/docs/reference.rst index a0fc0e1ead70..981059de5226 100644 --- a/packages/google-cloud-bigquery/docs/reference.rst +++ b/packages/google-cloud-bigquery/docs/reference.rst @@ -83,11 +83,13 @@ Table .. autosummary:: :toctree: generated + table.PartitionRange + table.RangePartitioning + table.Row + table.RowIterator table.Table table.TableListItem table.TableReference - table.Row - table.RowIterator table.TimePartitioning table.TimePartitioningType diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/__init__.py b/packages/google-cloud-bigquery/google/cloud/bigquery/__init__.py index da13375365e9..3982c1175850 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/__init__.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/__init__.py @@ -73,9 +73,11 @@ from google.cloud.bigquery.routine import RoutineArgument from google.cloud.bigquery.routine import RoutineReference from google.cloud.bigquery.schema import SchemaField +from google.cloud.bigquery.table import PartitionRange +from google.cloud.bigquery.table import RangePartitioning +from google.cloud.bigquery.table import Row from google.cloud.bigquery.table import Table from google.cloud.bigquery.table import TableReference -from google.cloud.bigquery.table import Row from google.cloud.bigquery.table import TimePartitioningType from google.cloud.bigquery.table import TimePartitioning from google.cloud.bigquery.encryption_configuration import EncryptionConfiguration @@ -96,7 +98,12 @@ # Tables "Table", "TableReference", + "PartitionRange", + "RangePartitioning", "Row", + "TimePartitioning", + "TimePartitioningType", + # Jobs "CopyJob", "CopyJobConfig", "ExtractJob", @@ -104,8 +111,6 @@ "LoadJob", "LoadJobConfig", "UnknownJob", - "TimePartitioningType", - "TimePartitioning", # Models "Model", "ModelReference", diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/job.py b/packages/google-cloud-bigquery/google/cloud/bigquery/job.py index a8e75835c6ea..cfc5a3797c70 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/job.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/job.py @@ -27,7 +27,9 @@ from google.cloud.bigquery.dataset import Dataset from google.cloud.bigquery.dataset import DatasetListItem from google.cloud.bigquery.dataset import DatasetReference +from google.cloud.bigquery.encryption_configuration import EncryptionConfiguration from google.cloud.bigquery.external_config import ExternalConfig +from google.cloud.bigquery import _helpers from google.cloud.bigquery.query import _query_param_from_api_repr from google.cloud.bigquery.query import ArrayQueryParameter from google.cloud.bigquery.query import ScalarQueryParameter @@ -37,12 +39,11 @@ from google.cloud.bigquery.routine import RoutineReference from google.cloud.bigquery.schema import SchemaField from google.cloud.bigquery.table import _EmptyRowIterator +from google.cloud.bigquery.table import RangePartitioning from google.cloud.bigquery.table import _table_arg_to_table_ref from google.cloud.bigquery.table import TableReference from google.cloud.bigquery.table import Table from google.cloud.bigquery.table import TimePartitioning -from google.cloud.bigquery import _helpers -from google.cloud.bigquery.encryption_configuration import EncryptionConfiguration _DONE_STATE = "DONE" _STOPPED_REASON = "stopped" @@ -1180,6 +1181,40 @@ def quote_character(self): def quote_character(self, value): self._set_sub_prop("quote", value) + @property + def range_partitioning(self): + """Optional[google.cloud.bigquery.table.RangePartitioning]: + Configures range-based partitioning for destination table. + + .. note:: + **Beta**. The integer range partitioning feature is in a + pre-release state and might change or have limited support. + + Only specify at most one of + :attr:`~google.cloud.bigquery.job.LoadJobConfig.time_partitioning` or + :attr:`~google.cloud.bigquery.job.LoadJobConfig.range_partitioning`. + + Raises: + ValueError: + If the value is not + :class:`~google.cloud.bigquery.table.RangePartitioning` or + :data:`None`. + """ + resource = self._get_sub_prop("rangePartitioning") + if resource is not None: + return RangePartitioning(_properties=resource) + + @range_partitioning.setter + def range_partitioning(self, value): + resource = value + if isinstance(value, RangePartitioning): + resource = value._properties + elif value is not None: + raise ValueError( + "Expected value to be RangePartitioning or None, got {}.".format(value) + ) + self._set_sub_prop("rangePartitioning", resource) + @property def schema(self): """List[google.cloud.bigquery.schema.SchemaField]: Schema of the @@ -1249,6 +1284,10 @@ def source_format(self, value): def time_partitioning(self): """google.cloud.bigquery.table.TimePartitioning: Specifies time-based partitioning for the destination table. + + Only specify at most one of + :attr:`~google.cloud.bigquery.job.LoadJobConfig.time_partitioning` or + :attr:`~google.cloud.bigquery.job.LoadJobConfig.range_partitioning`. """ prop = self._get_sub_prop("timePartitioning") if prop is not None: @@ -1463,6 +1502,13 @@ def destination_table_friendly_name(self): """ return self._configuration.destination_table_friendly_name + @property + def range_partitioning(self): + """See + :attr:`google.cloud.bigquery.job.LoadJobConfig.range_partitioning`. + """ + return self._configuration.range_partitioning + @property def time_partitioning(self): """See @@ -2242,6 +2288,40 @@ def query_parameters(self): def query_parameters(self, values): self._set_sub_prop("queryParameters", _to_api_repr_query_parameters(values)) + @property + def range_partitioning(self): + """Optional[google.cloud.bigquery.table.RangePartitioning]: + Configures range-based partitioning for destination table. + + .. note:: + **Beta**. The integer range partitioning feature is in a + pre-release state and might change or have limited support. + + Only specify at most one of + :attr:`~google.cloud.bigquery.job.LoadJobConfig.time_partitioning` or + :attr:`~google.cloud.bigquery.job.LoadJobConfig.range_partitioning`. + + Raises: + ValueError: + If the value is not + :class:`~google.cloud.bigquery.table.RangePartitioning` or + :data:`None`. + """ + resource = self._get_sub_prop("rangePartitioning") + if resource is not None: + return RangePartitioning(_properties=resource) + + @range_partitioning.setter + def range_partitioning(self, value): + resource = value + if isinstance(value, RangePartitioning): + resource = value._properties + elif value is not None: + raise ValueError( + "Expected value to be RangePartitioning or None, got {}.".format(value) + ) + self._set_sub_prop("rangePartitioning", resource) + @property def udf_resources(self): """List[google.cloud.bigquery.query.UDFResource]: user @@ -2318,8 +2398,18 @@ def table_definitions(self, values): @property def time_partitioning(self): - """google.cloud.bigquery.table.TimePartitioning: Specifies time-based - partitioning for the destination table. + """Optional[google.cloud.bigquery.table.TimePartitioning]: Specifies + time-based partitioning for the destination table. + + Only specify at most one of + :attr:`~google.cloud.bigquery.job.LoadJobConfig.time_partitioning` or + :attr:`~google.cloud.bigquery.job.LoadJobConfig.range_partitioning`. + + Raises: + ValueError: + If the value is not + :class:`~google.cloud.bigquery.table.TimePartitioning` or + :data:`None`. """ prop = self._get_sub_prop("timePartitioning") if prop is not None: @@ -2552,6 +2642,13 @@ def maximum_bytes_billed(self): """ return self._configuration.maximum_bytes_billed + @property + def range_partitioning(self): + """See + :attr:`google.cloud.bigquery.job.QueryJobConfig.range_partitioning`. + """ + return self._configuration.range_partitioning + @property def table_definitions(self): """See diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py index 90cd5d96406a..72ff8f71385c 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py @@ -505,14 +505,54 @@ def table_type(self): """ return self._properties.get("type") + @property + def range_partitioning(self): + """Optional[google.cloud.bigquery.table.RangePartitioning]: + Configures range-based partitioning for a table. + + .. note:: + **Beta**. The integer range partitioning feature is in a + pre-release state and might change or have limited support. + + Only specify at most one of + :attr:`~google.cloud.bigquery.table.Table.time_partitioning` or + :attr:`~google.cloud.bigquery.table.Table.range_partitioning`. + + Raises: + ValueError: + If the value is not + :class:`~google.cloud.bigquery.table.RangePartitioning` or + :data:`None`. + """ + resource = self._properties.get("rangePartitioning") + if resource is not None: + return RangePartitioning(_properties=resource) + + @range_partitioning.setter + def range_partitioning(self, value): + resource = value + if isinstance(value, RangePartitioning): + resource = value._properties + elif value is not None: + raise ValueError( + "Expected value to be RangePartitioning or None, got {}.".format(value) + ) + self._properties["rangePartitioning"] = resource + @property def time_partitioning(self): - """google.cloud.bigquery.table.TimePartitioning: Configures time-based + """Optional[google.cloud.bigquery.table.TimePartitioning]: Configures time-based partitioning for a table. + Only specify at most one of + :attr:`~google.cloud.bigquery.table.Table.time_partitioning` or + :attr:`~google.cloud.bigquery.table.Table.range_partitioning`. + Raises: ValueError: - If the value is not :class:`TimePartitioning` or :data:`None`. + If the value is not + :class:`~google.cloud.bigquery.table.TimePartitioning` or + :data:`None`. """ prop = self._properties.get("timePartitioning") if prop is not None: @@ -1645,6 +1685,147 @@ def __iter__(self): return iter(()) +class PartitionRange(object): + """Definition of the ranges for range partitioning. + + .. note:: + **Beta**. The integer range partitioning feature is in a pre-release + state and might change or have limited support. + + Args: + start (Optional[int]): + Sets the + :attr:`~google.cloud.bigquery.table.PartitionRange.start` + property. + end (Optional[int]): + Sets the + :attr:`~google.cloud.bigquery.table.PartitionRange.end` + property. + interval (Optional[int]): + Sets the + :attr:`~google.cloud.bigquery.table.PartitionRange.interval` + property. + _properties (Optional[dict]): + Private. Used to construct object from API resource. + """ + + def __init__(self, start=None, end=None, interval=None, _properties=None): + if _properties is None: + _properties = {} + self._properties = _properties + + if start is not None: + self.start = start + if end is not None: + self.end = end + if interval is not None: + self.interval = interval + + @property + def start(self): + """int: The start of range partitioning, inclusive.""" + return _helpers._int_or_none(self._properties.get("start")) + + @start.setter + def start(self, value): + self._properties["start"] = _helpers._str_or_none(value) + + @property + def end(self): + """int: The end of range partitioning, exclusive.""" + return _helpers._int_or_none(self._properties.get("end")) + + @end.setter + def end(self, value): + self._properties["end"] = _helpers._str_or_none(value) + + @property + def interval(self): + """int: The width of each interval.""" + return _helpers._int_or_none(self._properties.get("interval")) + + @interval.setter + def interval(self, value): + self._properties["interval"] = _helpers._str_or_none(value) + + def _key(self): + return tuple(sorted(self._properties.items())) + + def __repr__(self): + key_vals = ["{}={}".format(key, val) for key, val in self._key()] + return "PartitionRange({})".format(", ".join(key_vals)) + + +class RangePartitioning(object): + """Range-based partitioning configuration for a table. + + .. note:: + **Beta**. The integer range partitioning feature is in a pre-release + state and might change or have limited support. + + Args: + range_ (Optional[google.cloud.bigquery.table.PartitionRange]): + Sets the + :attr:`google.cloud.bigquery.table.RangePartitioning.range_` + property. + field (Optional[str]): + Sets the + :attr:`google.cloud.bigquery.table.RangePartitioning.field` + property. + _properties (Optional[dict]): + Private. Used to construct object from API resource. + """ + + def __init__(self, range_=None, field=None, _properties=None): + if _properties is None: + _properties = {} + self._properties = _properties + + if range_ is not None: + self.range_ = range_ + if field is not None: + self.field = field + + # Trailing underscore to prevent conflict with built-in range() function. + @property + def range_(self): + """google.cloud.bigquery.table.PartitionRange: Defines the + ranges for range partitioning. + + Raises: + ValueError: + If the value is not a :class:`PartitionRange`. + """ + range_properties = self._properties.setdefault("range", {}) + return PartitionRange(_properties=range_properties) + + @range_.setter + def range_(self, value): + if not isinstance(value, PartitionRange): + raise ValueError("Expected a PartitionRange, but got {}.".format(value)) + self._properties["range"] = value._properties + + @property + def field(self): + """str: The table is partitioned by this field. + + The field must be a top-level ``NULLABLE`` / ``REQUIRED`` field. The + only supported type is ``INTEGER`` / ``INT64``. + """ + return self._properties.get("field") + + @field.setter + def field(self, value): + self._properties["field"] = value + + def _key(self): + return (("field", self.field), ("range_", self.range_)) + + def __repr__(self): + key_vals = ["{}={}".format(key, repr(val)) for key, val in self._key()] + return "RangePartitioning({})".format(", ".join(key_vals)) + + class TimePartitioningType(object): """Specifies the type of time partitioning to perform.""" diff --git a/packages/google-cloud-bigquery/tests/unit/test_job.py b/packages/google-cloud-bigquery/tests/unit/test_job.py index 16964722ec2e..5f3d3ee965b8 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_job.py +++ b/packages/google-cloud-bigquery/tests/unit/test_job.py @@ -1638,6 +1638,44 @@ def test_source_format_setter(self): config.source_format = source_format self.assertEqual(config._properties["load"]["sourceFormat"], source_format) + def test_range_partitioning_w_none(self): + object_under_test = self._get_target_class()() + assert object_under_test.range_partitioning is None + + def test_range_partitioning_w_value(self): + object_under_test = self._get_target_class()() + object_under_test._properties["load"]["rangePartitioning"] = { + "field": "column_one", + "range": {"start": 1, "end": 1000, "interval": 10}, + } + object_under_test.range_partitioning.field == "column_one" + object_under_test.range_partitioning.range_.start == 1 + object_under_test.range_partitioning.range_.end == 1000 + object_under_test.range_partitioning.range_.interval == 10 + + def test_range_partitioning_setter(self): + from google.cloud.bigquery.table import PartitionRange + from google.cloud.bigquery.table import RangePartitioning + + object_under_test = self._get_target_class()() + object_under_test.range_partitioning = RangePartitioning( + field="column_one", range_=PartitionRange(start=1, end=1000, interval=10) + ) + object_under_test.range_partitioning.field == "column_one" + object_under_test.range_partitioning.range_.start == 1 + object_under_test.range_partitioning.range_.end == 1000 + object_under_test.range_partitioning.range_.interval == 10 + + def test_range_partitioning_setter_w_none(self): + object_under_test = self._get_target_class()() + object_under_test.range_partitioning = None + assert object_under_test.range_partitioning is None + + def test_range_partitioning_setter_w_wrong_type(self): + object_under_test = self._get_target_class()() + with pytest.raises(ValueError, match="RangePartitioning"): + object_under_test.range_partitioning = object() + def test_time_partitioning_miss(self): config = self._get_target_class()() self.assertIsNone(config.time_partitioning) @@ -1892,6 +1930,7 @@ def test_ctor(self): self.assertIsNone(job.destination_encryption_configuration) self.assertIsNone(job.destination_table_description) self.assertIsNone(job.destination_table_friendly_name) + self.assertIsNone(job.range_partitioning) self.assertIsNone(job.time_partitioning) self.assertIsNone(job.use_avro_logical_types) self.assertIsNone(job.clustering_fields) @@ -3328,6 +3367,44 @@ def test_destinaton_w_string(self): expected = table.TableReference.from_string(destination) self.assertEqual(config.destination, expected) + def test_range_partitioning_w_none(self): + object_under_test = self._get_target_class()() + assert object_under_test.range_partitioning is None + + def test_range_partitioning_w_value(self): + object_under_test = self._get_target_class()() + object_under_test._properties["query"]["rangePartitioning"] = { + "field": "column_one", + "range": {"start": 1, "end": 1000, "interval": 10}, + } + object_under_test.range_partitioning.field == "column_one" + object_under_test.range_partitioning.range_.start == 1 + object_under_test.range_partitioning.range_.end == 1000 + object_under_test.range_partitioning.range_.interval == 10 + + def test_range_partitioning_setter(self): + from google.cloud.bigquery.table import PartitionRange + from google.cloud.bigquery.table import RangePartitioning + + object_under_test = self._get_target_class()() + object_under_test.range_partitioning = RangePartitioning( + field="column_one", range_=PartitionRange(start=1, end=1000, interval=10) + ) + object_under_test.range_partitioning.field == "column_one" + object_under_test.range_partitioning.range_.start == 1 + object_under_test.range_partitioning.range_.end == 1000 + object_under_test.range_partitioning.range_.interval == 10 + + def test_range_partitioning_setter_w_none(self): + object_under_test = self._get_target_class()() + object_under_test.range_partitioning = None + assert object_under_test.range_partitioning is None + + def test_range_partitioning_setter_w_wrong_type(self): + object_under_test = self._get_target_class()() + with pytest.raises(ValueError, match="RangePartitioning"): + object_under_test.range_partitioning = object() + def test_time_partitioning(self): from google.cloud.bigquery import table @@ -3628,6 +3705,7 @@ def test_ctor_defaults(self): self.assertIsNone(job.maximum_bytes_billed) self.assertIsNone(job.table_definitions) self.assertIsNone(job.destination_encryption_configuration) + self.assertIsNone(job.range_partitioning) self.assertIsNone(job.time_partitioning) self.assertIsNone(job.clustering_fields) self.assertIsNone(job.schema_update_options) diff --git a/packages/google-cloud-bigquery/tests/unit/test_table.py b/packages/google-cloud-bigquery/tests/unit/test_table.py index dc2162d35fc9..b04a4491e6ca 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_table.py +++ b/packages/google-cloud-bigquery/tests/unit/test_table.py @@ -856,6 +856,29 @@ def test__build_resource_w_custom_field_not_in__properties(self): with self.assertRaises(ValueError): table._build_resource(["bad"]) + def test_range_partitioning(self): + from google.cloud.bigquery.table import RangePartitioning + from google.cloud.bigquery.table import PartitionRange + + table = self._make_one("proj.dset.tbl") + assert table.range_partitioning is None + + table.range_partitioning = RangePartitioning( + field="col1", range_=PartitionRange(start=-512, end=1024, interval=128) + ) + assert table.range_partitioning.field == "col1" + assert table.range_partitioning.range_.start == -512 + assert table.range_partitioning.range_.end == 1024 + assert table.range_partitioning.range_.interval == 128 + + table.range_partitioning = None + assert table.range_partitioning is None + + def test_range_partitioning_w_wrong_type(self): + object_under_test = self._make_one("proj.dset.tbl") + with pytest.raises(ValueError, match="RangePartitioning"): + object_under_test.range_partitioning = object() + def test_require_partitioning_filter(self): table = self._make_one("proj.dset.tbl") assert table.require_partition_filter is None @@ -2777,6 +2800,96 @@ def test_to_dataframe_w_bqstorage_snapshot(self): row_iterator.to_dataframe(bqstorage_client) +class TestPartitionRange(unittest.TestCase): + def _get_target_class(self): + from google.cloud.bigquery.table import PartitionRange + + return PartitionRange + + def _make_one(self, *args, **kw): + return self._get_target_class()(*args, **kw) + + def test_constructor_defaults(self): + object_under_test = self._make_one() + assert object_under_test.start is None + assert object_under_test.end is None + assert object_under_test.interval is None + + def test_constructor_w_properties(self): + object_under_test = self._make_one(start=1, end=10, interval=2) + assert object_under_test.start == 1 + assert object_under_test.end == 10 + assert object_under_test.interval == 2 + + def test_constructor_w_resource(self): + object_under_test = self._make_one( + _properties={"start": -1234567890, "end": 1234567890, "interval": 1000000} + ) + assert object_under_test.start == -1234567890 + assert object_under_test.end == 1234567890 + assert object_under_test.interval == 1000000 + + def test_repr(self): + object_under_test = self._make_one(start=1, end=10, interval=2) + assert repr(object_under_test) == "PartitionRange(end=10, interval=2, start=1)" + + +class TestRangePartitioning(unittest.TestCase): + def _get_target_class(self): + from google.cloud.bigquery.table import RangePartitioning + + return RangePartitioning + + def _make_one(self, *args, **kw): + return self._get_target_class()(*args, **kw) + + def test_constructor_defaults(self): + object_under_test = self._make_one() + assert object_under_test.field is None + assert object_under_test.range_.start is None + assert object_under_test.range_.end is None + assert object_under_test.range_.interval is None + + def test_constructor_w_properties(self): + from google.cloud.bigquery.table import PartitionRange + + object_under_test = self._make_one( + range_=PartitionRange(start=1, end=10, interval=2), field="integer_col" + ) + assert object_under_test.field == "integer_col" + assert object_under_test.range_.start == 1 + assert object_under_test.range_.end == 10 + assert object_under_test.range_.interval == 2 + + def test_constructor_w_resource(self): + object_under_test = self._make_one( + _properties={ + "field": "some_column", + "range": {"start": -1234567890, "end": 1234567890, "interval": 1000000}, + } + ) + assert object_under_test.field == "some_column" + assert object_under_test.range_.start == -1234567890 + assert object_under_test.range_.end == 1234567890 + assert object_under_test.range_.interval == 1000000 + + def test_range_w_wrong_type(self): + object_under_test = self._make_one() + with pytest.raises(ValueError, match="PartitionRange"): + object_under_test.range_ = object() + + def test_repr(self): + from google.cloud.bigquery.table import PartitionRange + + object_under_test = self._make_one( + range_=PartitionRange(start=1, end=10, interval=2), field="integer_col" + ) + assert ( + repr(object_under_test) + == "RangePartitioning(field='integer_col', range_=PartitionRange(end=10, interval=2, start=1))" + ) + + class TestTimePartitioning(unittest.TestCase): def _get_target_class(self): from google.cloud.bigquery.table import TimePartitioning From 02dbf5789eba9093f0bccdf9a6fed2206def56af Mon Sep 17 00:00:00 2001 From: HemangChothani <50404902+HemangChothani@users.noreply.github.com> Date: Sat, 19 Oct 2019 05:02:39 +0530 Subject: [PATCH 0689/2016] feat(bigquery): implement defaultEncryptionConfiguration on datasets (#9489) * feat: add customer managed encryption key for dataset * change as recommended. * change paramter name as per document * cosmetic changes * feat(bigquery): refactor class imports as it moved to new file * feat(bigquery): location name updated in key as suggested --- .../google/cloud/bigquery/dataset.py | 26 +++++++++++++++++++ .../tests/unit/test_dataset.py | 25 ++++++++++++++++++ 2 files changed, 51 insertions(+) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/dataset.py b/packages/google-cloud-bigquery/google/cloud/bigquery/dataset.py index ced80581a758..754a2fa00d00 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/dataset.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/dataset.py @@ -24,6 +24,7 @@ from google.cloud.bigquery.model import ModelReference from google.cloud.bigquery.routine import RoutineReference from google.cloud.bigquery.table import TableReference +from google.cloud.bigquery.encryption_configuration import EncryptionConfiguration def _get_table_reference(self, table_id): @@ -361,6 +362,7 @@ class Dataset(object): "default_partition_expiration_ms": "defaultPartitionExpirationMs", "default_table_expiration_ms": "defaultTableExpirationMs", "friendly_name": "friendlyName", + "default_encryption_configuration": "defaultEncryptionConfiguration", } def __init__(self, dataset_ref): @@ -573,6 +575,30 @@ def labels(self, value): raise ValueError("Pass a dict") self._properties["labels"] = value + @property + def default_encryption_configuration(self): + """google.cloud.bigquery.encryption_configuration.EncryptionConfiguration: Custom + encryption configuration for all tables in the dataset. + + Custom encryption configuration (e.g., Cloud KMS keys) or :data:`None` + if using default encryption. + + See `protecting data with Cloud KMS keys + `_ + in the BigQuery documentation. + """ + prop = self._properties.get("defaultEncryptionConfiguration") + if prop: + prop = EncryptionConfiguration.from_api_repr(prop) + return prop + + @default_encryption_configuration.setter + def default_encryption_configuration(self, value): + api_repr = value + if value: + api_repr = value.to_api_repr() + self._properties["defaultEncryptionConfiguration"] = api_repr + @classmethod def from_string(cls, full_dataset_id): """Construct a dataset from fully-qualified dataset ID. diff --git a/packages/google-cloud-bigquery/tests/unit/test_dataset.py b/packages/google-cloud-bigquery/tests/unit/test_dataset.py index 9b2276480843..ac13e00932ba 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_dataset.py +++ b/packages/google-cloud-bigquery/tests/unit/test_dataset.py @@ -275,6 +275,7 @@ class TestDataset(unittest.TestCase): PROJECT = "project" DS_ID = "dataset-id" DS_REF = DatasetReference(PROJECT, DS_ID) + KMS_KEY_NAME = "projects/1/locations/us/keyRings/1/cryptoKeys/1" @staticmethod def _get_target_class(): @@ -314,6 +315,7 @@ def _make_resource(self): {"role": "WRITER", "specialGroup": "projectWriters"}, {"role": "READER", "specialGroup": "projectReaders"}, ], + "defaultEncryptionConfiguration": {"kmsKeyName": self.KMS_KEY_NAME}, } def _verify_access_entry(self, access_entries, resource): @@ -369,6 +371,13 @@ def _verify_resource_properties(self, dataset, resource): self.assertEqual(dataset.description, resource.get("description")) self.assertEqual(dataset.friendly_name, resource.get("friendlyName")) self.assertEqual(dataset.location, resource.get("location")) + if "defaultEncryptionConfiguration" in resource: + self.assertEqual( + dataset.default_encryption_configuration.kms_key_name, + resource.get("defaultEncryptionConfiguration")["kmsKeyName"], + ) + else: + self.assertIsNone(dataset.default_encryption_configuration) if "access" in resource: self._verify_access_entry(dataset.access_entries, resource) @@ -558,6 +567,22 @@ def test_to_api_repr_w_custom_field(self): } self.assertEqual(resource, exp_resource) + def test_default_encryption_configuration_setter(self): + from google.cloud.bigquery.encryption_configuration import ( + EncryptionConfiguration, + ) + + dataset = self._make_one(self.DS_REF) + encryption_configuration = EncryptionConfiguration( + kms_key_name=self.KMS_KEY_NAME + ) + dataset.default_encryption_configuration = encryption_configuration + self.assertEqual( + dataset.default_encryption_configuration.kms_key_name, self.KMS_KEY_NAME + ) + dataset.default_encryption_configuration = None + self.assertIsNone(dataset.default_encryption_configuration) + def test_from_string(self): cls = self._get_target_class() got = cls.from_string("string-project.string_dataset") From a9de1af44ee6a6886c3a31e342757dcb3e2cf16c Mon Sep 17 00:00:00 2001 From: Peter Lamut Date: Wed, 23 Oct 2019 01:59:21 +0300 Subject: [PATCH 0690/2016] fix(bigquery): fix arrow deprecation warning (#9504) This commit fixes a warning that can be issued when downloading results as pyarrow record batches. --- .../google/cloud/bigquery/_pandas_helpers.py | 16 ++++- .../tests/unit/test__pandas_helpers.py | 72 +++++++++++++++++++ 2 files changed, 86 insertions(+), 2 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/_pandas_helpers.py b/packages/google-cloud-bigquery/google/cloud/bigquery/_pandas_helpers.py index bfbaf92bbe38..fc0010361f24 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/_pandas_helpers.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/_pandas_helpers.py @@ -380,11 +380,23 @@ def _tabledata_list_page_to_arrow(page, column_names, arrow_types): for column_index, arrow_type in enumerate(arrow_types): arrays.append(pyarrow.array(page._columns[column_index], type=arrow_type)) - return pyarrow.RecordBatch.from_arrays(arrays, column_names) + if isinstance(column_names, pyarrow.Schema): + return pyarrow.RecordBatch.from_arrays(arrays, schema=column_names) + return pyarrow.RecordBatch.from_arrays(arrays, names=column_names) def download_arrow_tabledata_list(pages, schema): - """Use tabledata.list to construct an iterable of RecordBatches.""" + """Use tabledata.list to construct an iterable of RecordBatches. + + Args: + pages (Iterator[:class:`google.api_core.page_iterator.Page`]): + An iterator over the result pages. + schema (Sequence[google.cloud.bigquery.schema.SchemaField]): + A decription of the fields in result pages. + Yields: + :class:`pyarrow.RecordBatch` + The next page of records as a ``pyarrow`` record batch. + """ column_names = bq_to_arrow_schema(schema) or [field.name for field in schema] arrow_types = [bq_to_arrow_data_type(field) for field in schema] diff --git a/packages/google-cloud-bigquery/tests/unit/test__pandas_helpers.py b/packages/google-cloud-bigquery/tests/unit/test__pandas_helpers.py index b539abe9a89a..46fb59180740 100644 --- a/packages/google-cloud-bigquery/tests/unit/test__pandas_helpers.py +++ b/packages/google-cloud-bigquery/tests/unit/test__pandas_helpers.py @@ -34,6 +34,7 @@ import pytest import pytz +from google import api_core from google.cloud.bigquery import schema @@ -905,3 +906,74 @@ def test_dataframe_to_parquet_compression_method(module_under_test): call_args = fake_write_table.call_args assert call_args is not None assert call_args.kwargs.get("compression") == "ZSTD" + + +@pytest.mark.skipif(pyarrow is None, reason="Requires `pyarrow`") +def test_download_arrow_tabledata_list_unknown_field_type(module_under_test): + fake_page = api_core.page_iterator.Page( + parent=mock.Mock(), + items=[{"page_data": "foo"}], + item_to_value=api_core.page_iterator._item_to_value_identity, + ) + fake_page._columns = [[1, 10, 100], [2.2, 22.22, 222.222]] + pages = [fake_page] + + bq_schema = [ + schema.SchemaField("population_size", "INTEGER"), + schema.SchemaField("alien_field", "ALIEN_FLOAT_TYPE"), + ] + + results_gen = module_under_test.download_arrow_tabledata_list(pages, bq_schema) + + with warnings.catch_warnings(record=True) as warned: + result = next(results_gen) + + unwanted_warnings = [ + warning + for warning in warned + if "please pass schema= explicitly" in str(warning).lower() + ] + assert not unwanted_warnings + + assert len(result.columns) == 2 + col = result.columns[0] + assert type(col) is pyarrow.lib.Int64Array + assert list(col) == [1, 10, 100] + col = result.columns[1] + assert type(col) is pyarrow.lib.DoubleArray + assert list(col) == [2.2, 22.22, 222.222] + + +@pytest.mark.skipif(pyarrow is None, reason="Requires `pyarrow`") +def test_download_arrow_tabledata_list_known_field_type(module_under_test): + fake_page = api_core.page_iterator.Page( + parent=mock.Mock(), + items=[{"page_data": "foo"}], + item_to_value=api_core.page_iterator._item_to_value_identity, + ) + fake_page._columns = [[1, 10, 100], ["2.2", "22.22", "222.222"]] + pages = [fake_page] + + bq_schema = [ + schema.SchemaField("population_size", "INTEGER"), + schema.SchemaField("non_alien_field", "STRING"), + ] + + results_gen = module_under_test.download_arrow_tabledata_list(pages, bq_schema) + with warnings.catch_warnings(record=True) as warned: + result = next(results_gen) + + unwanted_warnings = [ + warning + for warning in warned + if "please pass schema= explicitly" in str(warning).lower() + ] + assert not unwanted_warnings + + assert len(result.columns) == 2 + col = result.columns[0] + assert type(col) is pyarrow.lib.Int64Array + assert list(col) == [1, 10, 100] + col = result.columns[1] + assert type(col) is pyarrow.lib.StringArray + assert list(col) == ["2.2", "22.22", "222.222"] From fe8a9070135784fd1117165a12cae0dbf5f8253e Mon Sep 17 00:00:00 2001 From: HemangChothani <50404902+HemangChothani@users.noreply.github.com> Date: Wed, 23 Oct 2019 11:07:17 +0530 Subject: [PATCH 0691/2016] feat(bigquery): add TypeError if wrong job_config type is passed to client job methods (#9506) * feat(bigquery): raise TypeError if wrong job_config type is passed to client job methods * feat(bigquery): cosmetic changes * feat(bigquery): code refactor --- .../google/cloud/bigquery/_helpers.py | 15 ++ .../google/cloud/bigquery/client.py | 69 +++++- .../tests/unit/test_client.py | 203 +++++++++++++++++- 3 files changed, 279 insertions(+), 8 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py b/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py index eb5161c9fe71..bcb9d0696bc3 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py @@ -658,3 +658,18 @@ def _build_resource_from_properties(obj, filter_fields): partial[filter_field] = obj._properties[filter_field] return partial + + +def _verify_job_config_type(job_config, expected_type, param_name="job_config"): + if not isinstance(job_config, expected_type): + msg = ( + "Expected an instance of {expected_type} class for the {param_name} parameter, " + "but received {param_name} = {job_config}" + ) + raise TypeError( + msg.format( + expected_type=expected_type.__name__, + param_name=param_name, + job_config=job_config, + ) + ) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py index 1ad107ba8151..e7810dbbd66a 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py @@ -53,6 +53,7 @@ from google.cloud.bigquery._helpers import _record_field_to_json from google.cloud.bigquery._helpers import _str_or_none +from google.cloud.bigquery._helpers import _verify_job_config_type from google.cloud.bigquery._http import Connection from google.cloud.bigquery import _pandas_helpers from google.cloud.bigquery.dataset import Dataset @@ -1355,6 +1356,11 @@ def load_table_from_uri( Returns: google.cloud.bigquery.job.LoadJob: A new load job. + + Raises: + TypeError: + If ``job_config`` is not an instance of :class:`~google.cloud.bigquery.job.LoadJobConfig` + class. """ job_id = _make_job_id(job_id, job_id_prefix) @@ -1370,6 +1376,10 @@ def load_table_from_uri( source_uris = [source_uris] destination = _table_arg_to_table_ref(destination, default_project=self.project) + + if job_config: + _verify_job_config_type(job_config, google.cloud.bigquery.job.LoadJobConfig) + load_job = job.LoadJob(job_ref, source_uris, destination, self, job_config) load_job._begin(retry=retry) @@ -1436,6 +1446,10 @@ def load_table_from_file( If ``size`` is not passed in and can not be determined, or if the ``file_obj`` can be detected to be a file opened in text mode. + + TypeError: + If ``job_config`` is not an instance of :class:`~google.cloud.bigquery.job.LoadJobConfig` + class. """ job_id = _make_job_id(job_id, job_id_prefix) @@ -1447,6 +1461,8 @@ def load_table_from_file( destination = _table_arg_to_table_ref(destination, default_project=self.project) job_ref = job._JobReference(job_id, project=project, location=location) + if job_config: + _verify_job_config_type(job_config, google.cloud.bigquery.job.LoadJobConfig) load_job = job.LoadJob(job_ref, None, destination, self, job_config) job_resource = load_job.to_api_repr() @@ -1545,16 +1561,22 @@ def load_table_from_dataframe( If a usable parquet engine cannot be found. This method requires :mod:`pyarrow` or :mod:`fastparquet` to be installed. + TypeError: + If ``job_config`` is not an instance of :class:`~google.cloud.bigquery.job.LoadJobConfig` + class. """ job_id = _make_job_id(job_id, job_id_prefix) - if job_config is None: - job_config = job.LoadJobConfig() - else: + if job_config: + _verify_job_config_type(job_config, google.cloud.bigquery.job.LoadJobConfig) # Make a copy so that the job config isn't modified in-place. job_config_properties = copy.deepcopy(job_config._properties) job_config = job.LoadJobConfig() job_config._properties = job_config_properties + + else: + job_config = job.LoadJobConfig() + job_config.source_format = job.SourceFormat.PARQUET if location is None: @@ -1700,14 +1722,21 @@ def load_table_from_json( Returns: google.cloud.bigquery.job.LoadJob: A new load job. + + Raises: + TypeError: + If ``job_config`` is not an instance of :class:`~google.cloud.bigquery.job.LoadJobConfig` + class. """ job_id = _make_job_id(job_id, job_id_prefix) - if job_config is None: - job_config = job.LoadJobConfig() - else: + if job_config: + _verify_job_config_type(job_config, google.cloud.bigquery.job.LoadJobConfig) # Make a copy so that the job config isn't modified in-place. job_config = copy.deepcopy(job_config) + else: + job_config = job.LoadJobConfig() + job_config.source_format = job.SourceFormat.NEWLINE_DELIMITED_JSON if job_config.schema is None: @@ -1900,6 +1929,11 @@ def copy_table( Returns: google.cloud.bigquery.job.CopyJob: A new copy job instance. + + Raises: + TypeError: + If ``job_config`` is not an instance of :class:`~google.cloud.bigquery.job.CopyJobConfig` + class. """ job_id = _make_job_id(job_id, job_id_prefix) @@ -1928,6 +1962,8 @@ def copy_table( destination = _table_arg_to_table_ref(destination, default_project=self.project) + if job_config: + _verify_job_config_type(job_config, google.cloud.bigquery.job.CopyJobConfig) copy_job = job.CopyJob( job_ref, sources, destination, client=self, job_config=job_config ) @@ -1985,6 +2021,11 @@ def extract_table( Returns: google.cloud.bigquery.job.ExtractJob: A new extract job instance. + + Raises: + TypeError: + If ``job_config`` is not an instance of :class:`~google.cloud.bigquery.job.ExtractJobConfig` + class. """ job_id = _make_job_id(job_id, job_id_prefix) @@ -2000,6 +2041,10 @@ def extract_table( if isinstance(destination_uris, six.string_types): destination_uris = [destination_uris] + if job_config: + _verify_job_config_type( + job_config, google.cloud.bigquery.job.ExtractJobConfig + ) extract_job = job.ExtractJob( job_ref, source, destination_uris, client=self, job_config=job_config ) @@ -2049,6 +2094,11 @@ def query( Returns: google.cloud.bigquery.job.QueryJob: A new query job instance. + + Raises: + TypeError: + If ``job_config`` is not an instance of :class:`~google.cloud.bigquery.job.QueryJobConfig` + class. """ job_id = _make_job_id(job_id, job_id_prefix) @@ -2060,6 +2110,9 @@ def query( if self._default_query_job_config: if job_config: + _verify_job_config_type( + job_config, google.cloud.bigquery.job.QueryJobConfig + ) # anything that's not defined on the incoming # that is in the default, # should be filled in with the default @@ -2068,6 +2121,10 @@ def query( self._default_query_job_config ) else: + _verify_job_config_type( + self._default_query_job_config, + google.cloud.bigquery.job.QueryJobConfig, + ) job_config = self._default_query_job_config job_ref = job._JobReference(job_id, project=project, location=location) diff --git a/packages/google-cloud-bigquery/tests/unit/test_client.py b/packages/google-cloud-bigquery/tests/unit/test_client.py index b8a367e17cb9..91b9bc642187 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_client.py +++ b/packages/google-cloud-bigquery/tests/unit/test_client.py @@ -2973,7 +2973,7 @@ def test_list_jobs_w_parent_job_filter(self): conn.api_request.reset_mock() def test_load_table_from_uri(self): - from google.cloud.bigquery.job import LoadJob + from google.cloud.bigquery.job import LoadJob, LoadJobConfig JOB = "job_name" DESTINATION = "destination_table" @@ -2993,11 +2993,14 @@ def test_load_table_from_uri(self): } creds = _make_credentials() http = object() + job_config = LoadJobConfig() client = self._make_one(project=self.PROJECT, credentials=creds, _http=http) conn = client._connection = make_connection(RESOURCE) destination = client.dataset(self.DS_ID).table(DESTINATION) - job = client.load_table_from_uri(SOURCE_URI, destination, job_id=JOB) + job = client.load_table_from_uri( + SOURCE_URI, destination, job_id=JOB, job_config=job_config + ) # Check that load_table_from_uri actually starts the job. conn.api_request.assert_called_once_with( @@ -3005,6 +3008,7 @@ def test_load_table_from_uri(self): ) self.assertIsInstance(job, LoadJob) + self.assertIsInstance(job._configuration, LoadJobConfig) self.assertIs(job._client, client) self.assertEqual(job.job_id, JOB) self.assertEqual(list(job.source_uris), [SOURCE_URI]) @@ -3100,6 +3104,26 @@ def test_load_table_from_uri_w_client_location(self): method="POST", path="/projects/other-project/jobs", data=resource ) + def test_load_table_from_uri_w_invalid_job_config(self): + from google.cloud.bigquery import job + + JOB = "job_name" + DESTINATION = "destination_table" + SOURCE_URI = "http://example.com/source.csv" + + creds = _make_credentials() + http = object() + job_config = job.CopyJobConfig() + client = self._make_one(project=self.PROJECT, credentials=creds, _http=http) + destination = client.dataset(self.DS_ID).table(DESTINATION) + + with self.assertRaises(TypeError) as exc: + client.load_table_from_uri( + SOURCE_URI, destination, job_id=JOB, job_config=job_config + ) + + self.assertIn("Expected an instance of LoadJobConfig", exc.exception.args[0]) + @staticmethod def _mock_requests_response(status_code, headers, content=b""): return mock.Mock( @@ -3422,6 +3446,66 @@ def test_copy_table_w_source_strings(self): ).table("destination_table") self.assertEqual(job.destination, expected_destination) + def test_copy_table_w_invalid_job_config(self): + from google.cloud.bigquery import job + + JOB = "job_name" + SOURCE = "source_table" + DESTINATION = "destination_table" + + creds = _make_credentials() + http = object() + client = self._make_one(project=self.PROJECT, credentials=creds, _http=http) + job_config = job.ExtractJobConfig() + dataset = client.dataset(self.DS_ID) + source = dataset.table(SOURCE) + destination = dataset.table(DESTINATION) + with self.assertRaises(TypeError) as exc: + client.copy_table(source, destination, job_id=JOB, job_config=job_config) + + self.assertIn("Expected an instance of CopyJobConfig", exc.exception.args[0]) + + def test_copy_table_w_valid_job_config(self): + from google.cloud.bigquery.job import CopyJobConfig + + JOB = "job_name" + SOURCE = "source_table" + DESTINATION = "destination_table" + RESOURCE = { + "jobReference": {"projectId": self.PROJECT, "jobId": JOB}, + "configuration": { + "copy": { + "sourceTables": [ + { + "projectId": self.PROJECT, + "datasetId": self.DS_ID, + "tableId": SOURCE, + } + ], + "destinationTable": { + "projectId": self.PROJECT, + "datasetId": self.DS_ID, + "tableId": DESTINATION, + }, + } + }, + } + creds = _make_credentials() + http = object() + client = self._make_one(project=self.PROJECT, credentials=creds, _http=http) + job_config = CopyJobConfig() + conn = client._connection = make_connection(RESOURCE) + dataset = client.dataset(self.DS_ID) + source = dataset.table(SOURCE) + destination = dataset.table(DESTINATION) + + job = client.copy_table(source, destination, job_id=JOB, job_config=job_config) + # Check that copy_table actually starts the job. + conn.api_request.assert_called_once_with( + method="POST", path="/projects/%s/jobs" % self.PROJECT, data=RESOURCE + ) + self.assertIsInstance(job._configuration, CopyJobConfig) + def test_extract_table(self): from google.cloud.bigquery.job import ExtractJob @@ -3462,6 +3546,24 @@ def test_extract_table(self): self.assertEqual(job.source, source) self.assertEqual(list(job.destination_uris), [DESTINATION]) + def test_extract_table_w_invalid_job_config(self): + from google.cloud.bigquery import job + + JOB = "job_id" + SOURCE = "source_table" + DESTINATION = "gs://bucket_name/object_name" + + creds = _make_credentials() + http = object() + client = self._make_one(project=self.PROJECT, credentials=creds, _http=http) + dataset = client.dataset(self.DS_ID) + source = dataset.table(SOURCE) + job_config = job.LoadJobConfig() + with self.assertRaises(TypeError) as exc: + client.extract_table(source, DESTINATION, job_id=JOB, job_config=job_config) + + self.assertIn("Expected an instance of ExtractJobConfig", exc.exception.args[0]) + def test_extract_table_w_explicit_project(self): job_id = "job_id" source_id = "source_table" @@ -3745,6 +3847,35 @@ def test_query_w_explicit_job_config(self): method="POST", path="/projects/PROJECT/jobs", data=resource ) + def test_query_w_invalid_job_config(self): + from google.cloud.bigquery import QueryJobConfig, DatasetReference + from google.cloud.bigquery import job + + job_id = "some-job-id" + query = "select count(*) from persons" + creds = _make_credentials() + http = object() + default_job_config = QueryJobConfig() + default_job_config.default_dataset = DatasetReference( + self.PROJECT, "some-dataset" + ) + default_job_config.maximum_bytes_billed = 1000 + + client = self._make_one( + project=self.PROJECT, + credentials=creds, + _http=http, + default_query_job_config=default_job_config, + ) + + job_config = job.LoadJobConfig() + + with self.assertRaises(TypeError) as exc: + client.query( + query, job_id=job_id, location=self.LOCATION, job_config=job_config + ) + self.assertIn("Expected an instance of QueryJobConfig", exc.exception.args[0]) + def test_query_w_explicit_job_config_override(self): job_id = "some-job-id" query = "select count(*) from persons" @@ -3839,6 +3970,23 @@ def test_query_w_client_default_config_no_incoming(self): method="POST", path="/projects/PROJECT/jobs", data=resource ) + def test_query_w_invalid_default_job_config(self): + job_id = "some-job-id" + query = "select count(*) from persons" + creds = _make_credentials() + http = object() + default_job_config = object() + client = self._make_one( + project=self.PROJECT, + credentials=creds, + _http=http, + default_query_job_config=default_job_config, + ) + + with self.assertRaises(TypeError) as exc: + client.query(query, job_id=job_id, location=self.LOCATION) + self.assertIn("Expected an instance of QueryJobConfig", exc.exception.args[0]) + def test_query_w_client_location(self): job_id = "some-job-id" query = "select count(*) from persons" @@ -5419,6 +5567,19 @@ def test_load_table_from_file_bad_mode(self): with pytest.raises(ValueError): client.load_table_from_file(file_obj, self.TABLE_REF) + def test_load_table_from_file_w_invalid_job_config(self): + from google.cloud.bigquery import job + + client = self._make_client() + gzip_file = self._make_gzip_file_obj(writable=True) + config = job.QueryJobConfig() + with pytest.raises(TypeError) as exc: + client.load_table_from_file( + gzip_file, self.TABLE_REF, job_id="job_id", job_config=config + ) + err_msg = str(exc.value) + assert "Expected an instance of LoadJobConfig" in err_msg + @unittest.skipIf(pandas is None, "Requires `pandas`") @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_load_table_from_dataframe(self): @@ -6118,6 +6279,24 @@ def test_load_table_from_dataframe_w_nulls(self): assert sent_config.schema == schema assert sent_config.source_format == job.SourceFormat.PARQUET + @unittest.skipIf(pandas is None, "Requires `pandas`") + def test_load_table_from_dataframe_w_invaild_job_config(self): + from google.cloud.bigquery import job + + client = self._make_client() + + records = [{"float_column": 3.14, "struct_column": [{"foo": 1}, {"bar": -1}]}] + dataframe = pandas.DataFrame(data=records) + job_config = job.CopyJobConfig() + + with pytest.raises(TypeError) as exc: + client.load_table_from_dataframe( + dataframe, self.TABLE_REF, job_config=job_config, location=self.LOCATION + ) + + err_msg = str(exc.value) + assert "Expected an instance of LoadJobConfig" in err_msg + def test_load_table_from_json_basic_use(self): from google.cloud.bigquery.client import _DEFAULT_NUM_RETRIES from google.cloud.bigquery import job @@ -6206,6 +6385,26 @@ def test_load_table_from_json_non_default_args(self): # all properties should have been cloned and sent to the backend assert sent_config._properties.get("load", {}).get("unknown_field") == "foobar" + def test_load_table_from_json_w_invalid_job_config(self): + from google.cloud.bigquery import job + + client = self._make_client() + json_rows = [ + {"name": "One", "age": 11, "birthday": "2008-09-10", "adult": False}, + {"name": "Two", "age": 22, "birthday": "1997-08-09", "adult": True}, + ] + job_config = job.CopyJobConfig() + with pytest.raises(TypeError) as exc: + client.load_table_from_json( + json_rows, + self.TABLE_REF, + job_config=job_config, + project="project-x", + location="EU", + ) + err_msg = str(exc.value) + assert "Expected an instance of LoadJobConfig" in err_msg + # Low-level tests @classmethod From 6d4d09e41d76e569216841576777cbeb76fa4e42 Mon Sep 17 00:00:00 2001 From: Gurov Ilya Date: Thu, 24 Oct 2019 02:57:59 +0300 Subject: [PATCH 0692/2016] refactor(bigquery): use multi-regional key path for CMEK in snippets (#9523) * refactor(bigquery): use multi-regional key path for CMEK in snippets * black reformat --- packages/google-cloud-bigquery/docs/snippets.py | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/packages/google-cloud-bigquery/docs/snippets.py b/packages/google-cloud-bigquery/docs/snippets.py index f76c645660bb..83795460a955 100644 --- a/packages/google-cloud-bigquery/docs/snippets.py +++ b/packages/google-cloud-bigquery/docs/snippets.py @@ -179,7 +179,7 @@ def test_create_table_cmek(client, to_delete): # Set the encryption key to use for the table. # TODO: Replace this key with a key you have created in Cloud KMS. kms_key_name = "projects/{}/locations/{}/keyRings/{}/cryptoKeys/{}".format( - "cloud-samples-tests", "us-central1", "test", "test" + "cloud-samples-tests", "us", "test", "test" ) table.encryption_configuration = bigquery.EncryptionConfiguration( kms_key_name=kms_key_name @@ -500,7 +500,7 @@ def test_update_table_cmek(client, to_delete): table = bigquery.Table(dataset.table(table_id)) original_kms_key_name = "projects/{}/locations/{}/keyRings/{}/cryptoKeys/{}".format( - "cloud-samples-tests", "us-central1", "test", "test" + "cloud-samples-tests", "us", "test", "test" ) table.encryption_configuration = bigquery.EncryptionConfiguration( kms_key_name=original_kms_key_name @@ -516,8 +516,7 @@ def test_update_table_cmek(client, to_delete): # Set a new encryption key to use for the destination. # TODO: Replace this key with a key you have created in KMS. updated_kms_key_name = ( - "projects/cloud-samples-tests/locations/us-central1/" - "keyRings/test/cryptoKeys/otherkey" + "projects/cloud-samples-tests/locations/us/keyRings/test/cryptoKeys/otherkey" ) table.encryption_configuration = bigquery.EncryptionConfiguration( kms_key_name=updated_kms_key_name @@ -831,7 +830,7 @@ def test_load_table_from_uri_cmek(client, to_delete): # Set the encryption key to use for the destination. # TODO: Replace this key with a key you have created in KMS. kms_key_name = "projects/{}/locations/{}/keyRings/{}/cryptoKeys/{}".format( - "cloud-samples-tests", "us-central1", "test", "test" + "cloud-samples-tests", "us", "test", "test" ) encryption_config = bigquery.EncryptionConfiguration(kms_key_name=kms_key_name) job_config.destination_encryption_configuration = encryption_config @@ -1305,7 +1304,7 @@ def test_copy_table_cmek(client, to_delete): # Set the encryption key to use for the destination. # TODO: Replace this key with a key you have created in KMS. kms_key_name = "projects/{}/locations/{}/keyRings/{}/cryptoKeys/{}".format( - "cloud-samples-tests", "us-central1", "test", "test" + "cloud-samples-tests", "us", "test", "test" ) encryption_config = bigquery.EncryptionConfiguration(kms_key_name=kms_key_name) job_config = bigquery.CopyJobConfig() @@ -1685,7 +1684,7 @@ def test_client_query_destination_table_cmek(client, to_delete): # Set the encryption key to use for the destination. # TODO: Replace this key with a key you have created in KMS. kms_key_name = "projects/{}/locations/{}/keyRings/{}/cryptoKeys/{}".format( - "cloud-samples-tests", "us-central1", "test", "test" + "cloud-samples-tests", "us", "test", "test" ) encryption_config = bigquery.EncryptionConfiguration(kms_key_name=kms_key_name) job_config.destination_encryption_configuration = encryption_config From dfac7c1d23ee3d9be3a72e9e6e0f1c94840aed40 Mon Sep 17 00:00:00 2001 From: Peter Lamut Date: Thu, 24 Oct 2019 20:13:18 +0300 Subject: [PATCH 0693/2016] chore(bigquery): remove duplicate test dependencies (#9503) * chore(bigquery): remove duplicate test dependencies * Demote test_utils from LOCAL_DEPS in noxfile The test_utils dependency is only nedeed for test sessions, but not for some other nox sessions such as "lint" and "docs". --- packages/google-cloud-bigquery/noxfile.py | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/packages/google-cloud-bigquery/noxfile.py b/packages/google-cloud-bigquery/noxfile.py index fc44db1c120e..a6d8094ebbc3 100644 --- a/packages/google-cloud-bigquery/noxfile.py +++ b/packages/google-cloud-bigquery/noxfile.py @@ -20,11 +20,7 @@ import nox -LOCAL_DEPS = ( - os.path.join("..", "api_core[grpc]"), - os.path.join("..", "core"), - os.path.join("..", "test_utils"), -) +LOCAL_DEPS = (os.path.join("..", "api_core[grpc]"), os.path.join("..", "core")) BLACK_PATHS = ("docs", "google", "samples", "tests", "noxfile.py", "setup.py") @@ -42,6 +38,7 @@ def default(session): for local_dep in LOCAL_DEPS: session.install("-e", local_dep) + session.install("-e", os.path.join("..", "test_utils")) dev_install = ".[all]" session.install("-e", dev_install) From ed04171ee03cc1337cc04577ee4b0f4a07915718 Mon Sep 17 00:00:00 2001 From: Gurov Ilya Date: Sat, 26 Oct 2019 11:20:23 +0300 Subject: [PATCH 0694/2016] refactor(bigquery): rewrite docs in Google style, part 2 (#9481) towards issue #9092 --- .../google/cloud/bigquery/_helpers.py | 83 ++- .../google/cloud/bigquery/_pandas_helpers.py | 9 +- .../google/cloud/bigquery/client.py | 512 ++++++++------- .../google/cloud/bigquery/dbapi/_helpers.py | 55 +- .../google/cloud/bigquery/dbapi/cursor.py | 142 +++-- .../google/cloud/bigquery/job.py | 595 +++++++++--------- .../google/cloud/bigquery/query.py | 3 +- .../google/cloud/bigquery/table.py | 10 +- 8 files changed, 701 insertions(+), 708 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py b/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py index bcb9d0696bc3..266bfc2c666c 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py @@ -90,12 +90,15 @@ def _timestamp_query_param_from_json(value, field): Args: value (str): The timestamp. - field (.SchemaField): The field corresponding to the value. + + field (google.cloud.bigquery.schema.SchemaField): + The field corresponding to the value. Returns: - Optional[datetime.datetime]: The parsed datetime object from - ``value`` if the ``field`` is not null (otherwise it is - :data:`None`). + Optional[datetime.datetime]: + The parsed datetime object from + ``value`` if the ``field`` is not null (otherwise it is + :data:`None`). """ if _not_null(value, field): # Canonical formats for timestamps in BigQuery are flexible. See: @@ -125,12 +128,14 @@ def _datetime_from_json(value, field): Args: value (str): The timestamp. - field (.SchemaField): The field corresponding to the value. + field (google.cloud.bigquery.schema.SchemaField): + The field corresponding to the value. Returns: - Optional[datetime.datetime]: The parsed datetime object from - ``value`` if the ``field`` is not null (otherwise it is - :data:`None`). + Optional[datetime.datetime]: + The parsed datetime object from + ``value`` if the ``field`` is not null (otherwise it is + :data:`None`). """ if _not_null(value, field): if "." in value: @@ -217,15 +222,12 @@ def _row_tuple_from_json(row, schema): Note: ``row['f']`` and ``schema`` are presumed to be of the same length. - :type row: dict - :param row: A JSON response row to be converted. - - :type schema: tuple - :param schema: A tuple of - :class:`~google.cloud.bigquery.schema.SchemaField`. + Args: + row (Dict): A JSON response row to be converted. + schema (Tuple): A tuple of :class:`~google.cloud.bigquery.schema.SchemaField`. - :rtype: tuple - :returns: A tuple of data converted to native types. + Returns: + Tuple: A tuple of data converted to native types. """ row_data = [] for field, cell in zip(schema, row["f"]): @@ -344,16 +346,13 @@ def _scalar_field_to_json(field, row_value): """Maps a field and value to a JSON-safe value. Args: - field ( \ - :class:`~google.cloud.bigquery.schema.SchemaField`, \ - ): + field (google.cloud.bigquery.schema.SchemaField): The SchemaField to use for type conversion and field name. - row_value (any): + row_value (Any): Value to be converted, based on the field's type. Returns: - any: - A JSON-serializable object. + Any: A JSON-serializable object. """ converter = _SCALAR_VALUE_TO_JSON_ROW.get(field.field_type) if converter is None: # STRING doesn't need converting @@ -365,17 +364,14 @@ def _repeated_field_to_json(field, row_value): """Convert a repeated/array field to its JSON representation. Args: - field ( \ - :class:`~google.cloud.bigquery.schema.SchemaField`, \ - ): + field (google.cloud.bigquery.schema.SchemaField): The SchemaField to use for type conversion and field name. The field mode must equal ``REPEATED``. - row_value (Sequence[any]): + row_value (Sequence[Any]): A sequence of values to convert to JSON-serializable values. Returns: - List[any]: - A list of JSON-serializable objects. + List[Any]: A list of JSON-serializable objects. """ # Remove the REPEATED, but keep the other fields. This allows us to process # each item as if it were a top-level field. @@ -391,17 +387,14 @@ def _record_field_to_json(fields, row_value): """Convert a record/struct field to its JSON representation. Args: - fields ( \ - Sequence[:class:`~google.cloud.bigquery.schema.SchemaField`], \ - ): + fields (Sequence[google.cloud.bigquery.schema.SchemaField]): The :class:`~google.cloud.bigquery.schema.SchemaField`s of the record's subfields to use for type conversion and field names. row_value (Union[Tuple[Any], Mapping[str, Any]): A tuple or dictionary to convert to JSON-serializable values. Returns: - Mapping[str, any]: - A JSON-serializable dictionary. + Mapping[str, Any]: A JSON-serializable dictionary. """ record = {} isdict = isinstance(row_value, dict) @@ -420,22 +413,16 @@ def _field_to_json(field, row_value): """Convert a field into JSON-serializable values. Args: - field ( \ - :class:`~google.cloud.bigquery.schema.SchemaField`, \ - ): + field (google.cloud.bigquery.schema.SchemaField): The SchemaField to use for type conversion and field name. - row_value (Union[ \ - Sequence[list], \ - any, \ - ]): + row_value (Union[Sequence[List], Any]): Row data to be inserted. If the SchemaField's mode is REPEATED, assume this is a list. If not, the type is inferred from the SchemaField's field_type. Returns: - any: - A JSON-serializable object. + Any: A JSON-serializable object. """ if row_value is None: return None @@ -461,9 +448,9 @@ def _get_sub_prop(container, keys, default=None): This method works like ``dict.get(key)``, but for nested values. Arguments: - container (dict): + container (Dict): A dictionary which may contain other dictionaries as values. - keys (iterable): + keys (Iterable): A sequence of keys to attempt to get the value for. Each item in the sequence represents a deeper nesting. The first key is for the top level. If there is a dictionary there, the second key @@ -504,9 +491,9 @@ def _set_sub_prop(container, keys, value): """Set a nested value in a dictionary. Arguments: - container (dict): + container (Dict): A dictionary which may contain other dictionaries as values. - keys (iterable): + keys (Iterable): A sequence of keys to attempt to set the value for. Each item in the sequence represents a deeper nesting. The first key is for the top level. If there is a dictionary there, the second key @@ -547,9 +534,9 @@ def _del_sub_prop(container, keys): """Remove a nested key fro a dictionary. Arguments: - container (dict): + container (Dict): A dictionary which may contain other dictionaries as values. - keys (iterable): + keys (Iterable): A sequence of keys to attempt to clear the value for. Each item in the sequence represents a deeper nesting. The first key is for the top level. If there is a dictionary there, the second key diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/_pandas_helpers.py b/packages/google-cloud-bigquery/google/cloud/bigquery/_pandas_helpers.py index fc0010361f24..c7edf2ae51f5 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/_pandas_helpers.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/_pandas_helpers.py @@ -130,7 +130,8 @@ def bq_to_arrow_struct_data_type(field): def bq_to_arrow_data_type(field): """Return the Arrow data type, corresponding to a given BigQuery column. - Returns None if default Arrow type inspection should be used. + Returns: + None: if default Arrow type inspection should be used. """ if field.mode is not None and field.mode.upper() == "REPEATED": inner_type = bq_to_arrow_data_type( @@ -152,7 +153,8 @@ def bq_to_arrow_data_type(field): def bq_to_arrow_field(bq_field): """Return the Arrow field, corresponding to a given BigQuery column. - Returns None if the Arrow type cannot be determined. + Returns: + None: if the Arrow type cannot be determined. """ arrow_type = bq_to_arrow_data_type(bq_field) if arrow_type: @@ -166,7 +168,8 @@ def bq_to_arrow_field(bq_field): def bq_to_arrow_schema(bq_schema): """Return the Arrow schema, corresponding to a given BigQuery schema. - Returns None if any Arrow type cannot be determined. + Returns: + None: if any Arrow type cannot be determined. """ arrow_fields = [] for bq_field in bq_schema: diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py index e7810dbbd66a..02bfc651af0d 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py @@ -96,14 +96,12 @@ class Project(object): """Wrapper for resource describing a BigQuery project. - :type project_id: str - :param project_id: Opaque ID of the project + Args: + project_id (str): Opaque ID of the project - :type numeric_id: int - :param numeric_id: Numeric ID of the project + numeric_id (int): Numeric ID of the project - :type friendly_name: str - :param friendly_name: Display name of the project + friendly_name (str): Display name of the project """ def __init__(self, project_id, numeric_id, friendly_name): @@ -147,7 +145,7 @@ class Client(ClientWithProject): requests. If ``None``, then default info will be used. Generally, you only need to set this if you're developing your own library or partner tool. - client_options (Union[~google.api_core.client_options.ClientOptions, dict]): + client_options (Union[google.api_core.client_options.ClientOptions, Dict]): (Optional) Client options used to set user options on the client. API Endpoint should be set through client_options. @@ -231,25 +229,25 @@ def list_projects(self, max_results=None, page_token=None, retry=DEFAULT_RETRY): See https://cloud.google.com/bigquery/docs/reference/rest/v2/projects/list - :type max_results: int - :param max_results: (Optional) maximum number of projects to return, - If not passed, defaults to a value set by the API. - - :type page_token: str - :param page_token: - (Optional) Token representing a cursor into the projects. If - not passed, the API will return the first page of projects. - The token marks the beginning of the iterator to be returned - and the value of the ``page_token`` can be accessed at - ``next_page_token`` of the - :class:`~google.api_core.page_iterator.HTTPIterator`. - - :type retry: :class:`google.api_core.retry.Retry` - :param retry: (Optional) How to retry the RPC. - - :rtype: :class:`~google.api_core.page_iterator.Iterator` - :returns: Iterator of :class:`~google.cloud.bigquery.client.Project` - accessible to the current client. + Args: + max_results (int): + (Optional) maximum number of projects to return, + If not passed, defaults to a value set by the API. + + page_token (str): + (Optional) Token representing a cursor into the projects. If + not passed, the API will return the first page of projects. + The token marks the beginning of the iterator to be returned + and the value of the ``page_token`` can be accessed at + ``next_page_token`` of the + :class:`~google.api_core.page_iterator.HTTPIterator`. + + retry (google.api_core.retry.Retry): (Optional) How to retry the RPC. + + Returns: + google.api_core.page_iterator.Iterator: + Iterator of :class:`~google.cloud.bigquery.client.Project` + accessible to the current client. """ return page_iterator.HTTPIterator( client=self, @@ -300,8 +298,7 @@ def list_datasets( Returns: google.api_core.page_iterator.Iterator: - Iterator of - :class:`~google.cloud.bigquery.dataset.DatasetListItem`. + Iterator of :class:`~google.cloud.bigquery.dataset.DatasetListItem`. associated with the project. """ extra_params = {} @@ -328,15 +325,16 @@ def list_datasets( def dataset(self, dataset_id, project=None): """Construct a reference to a dataset. - :type dataset_id: str - :param dataset_id: ID of the dataset. + Args: + dataset_id (str): ID of the dataset. - :type project: str - :param project: (Optional) project ID for the dataset (defaults to - the project of the client). + project (str): + (Optional) project ID for the dataset (defaults to + the project of the client). - :rtype: :class:`google.cloud.bigquery.dataset.DatasetReference` - :returns: a new ``DatasetReference`` instance + Returns: + google.cloud.bigquery.dataset.DatasetReference: + a new ``DatasetReference`` instance. """ if project is None: project = self.project @@ -351,8 +349,8 @@ def create_dataset(self, dataset, exists_ok=False, retry=DEFAULT_RETRY): Args: dataset (Union[ \ - :class:`~google.cloud.bigquery.dataset.Dataset`, \ - :class:`~google.cloud.bigquery.dataset.DatasetReference`, \ + google.cloud.bigquery.dataset.Dataset, \ + google.cloud.bigquery.dataset.DatasetReference, \ str, \ ]): A :class:`~google.cloud.bigquery.dataset.Dataset` to create. @@ -404,7 +402,7 @@ def create_routine(self, routine, exists_ok=False, retry=DEFAULT_RETRY): https://cloud.google.com/bigquery/docs/reference/rest/v2/routines/insert Args: - routine (:class:`~google.cloud.bigquery.routine.Routine`): + routine (google.cloud.bigquery.routine.Routine): A :class:`~google.cloud.bigquery.routine.Routine` to create. The dataset that the routine belongs to must already exist. exists_ok (bool): @@ -440,8 +438,8 @@ def create_table(self, table, exists_ok=False, retry=DEFAULT_RETRY): Args: table (Union[ \ - :class:`~google.cloud.bigquery.table.Table`, \ - :class:`~google.cloud.bigquery.table.TableReference`, \ + google.cloud.bigquery.table.Table, \ + google.cloud.bigquery.table.TableReference, \ str, \ ]): A :class:`~google.cloud.bigquery.table.Table` to create. @@ -481,14 +479,14 @@ def get_dataset(self, dataset_ref, retry=DEFAULT_RETRY): Args: dataset_ref (Union[ \ - :class:`~google.cloud.bigquery.dataset.DatasetReference`, \ + google.cloud.bigquery.dataset.DatasetReference, \ str, \ ]): A reference to the dataset to fetch from the BigQuery API. If a string is passed in, this method attempts to create a dataset reference from a string using :func:`~google.cloud.bigquery.dataset.DatasetReference.from_string`. - retry (:class:`google.api_core.retry.Retry`): + retry (google.api_core.retry.Retry): (Optional) How to retry the RPC. Returns: @@ -508,19 +506,18 @@ def get_model(self, model_ref, retry=DEFAULT_RETRY): Args: model_ref (Union[ \ - :class:`~google.cloud.bigquery.model.ModelReference`, \ + google.cloud.bigquery.model.ModelReference, \ str, \ ]): A reference to the model to fetch from the BigQuery API. If a string is passed in, this method attempts to create a model reference from a string using :func:`google.cloud.bigquery.model.ModelReference.from_string`. - retry (:class:`google.api_core.retry.Retry`): + retry (google.api_core.retry.Retry): (Optional) How to retry the RPC. Returns: - google.cloud.bigquery.model.Model: - A ``Model`` instance. + google.cloud.bigquery.model.Model: A ``Model`` instance. """ if isinstance(model_ref, str): model_ref = ModelReference.from_string( @@ -535,15 +532,15 @@ def get_routine(self, routine_ref, retry=DEFAULT_RETRY): Args: routine_ref (Union[ \ - :class:`~google.cloud.bigquery.routine.Routine`, \ - :class:`~google.cloud.bigquery.routine.RoutineReference`, \ + google.cloud.bigquery.routine.Routine, \ + google.cloud.bigquery.routine.RoutineReference, \ str, \ ]): A reference to the routine to fetch from the BigQuery API. If a string is passed in, this method attempts to create a reference from a string using :func:`google.cloud.bigquery.routine.RoutineReference.from_string`. - retry (:class:`google.api_core.retry.Retry`): + retry (google.api_core.retry.Retry): (Optional) How to retry the API call. Returns: @@ -563,15 +560,15 @@ def get_table(self, table, retry=DEFAULT_RETRY): Args: table (Union[ \ - :class:`~google.cloud.bigquery.table.Table`, \ - :class:`~google.cloud.bigquery.table.TableReference`, \ + google.cloud.bigquery.table.Table, \ + google.cloud.bigquery.table.TableReference, \ str, \ ]): A reference to the table to fetch from the BigQuery API. If a string is passed in, this method attempts to create a table reference from a string using :func:`google.cloud.bigquery.table.TableReference.from_string`. - retry (:class:`google.api_core.retry.Retry`): + retry (google.api_core.retry.Retry): (Optional) How to retry the RPC. Returns: @@ -744,8 +741,8 @@ def list_models( Args: dataset (Union[ \ - :class:`~google.cloud.bigquery.dataset.Dataset`, \ - :class:`~google.cloud.bigquery.dataset.DatasetReference`, \ + google.cloud.bigquery.dataset.Dataset, \ + google.cloud.bigquery.dataset.DatasetReference, \ str, \ ]): A reference to the dataset whose models to list from the @@ -762,7 +759,7 @@ def list_models( the value of the ``page_token`` can be accessed at ``next_page_token`` of the :class:`~google.api_core.page_iterator.HTTPIterator`. - retry (:class:`google.api_core.retry.Retry`): + retry (google.api_core.retry.Retry): (Optional) How to retry the RPC. Returns: @@ -802,8 +799,8 @@ def list_routines( Args: dataset (Union[ \ - :class:`~google.cloud.bigquery.dataset.Dataset`, \ - :class:`~google.cloud.bigquery.dataset.DatasetReference`, \ + google.cloud.bigquery.dataset.Dataset, \ + google.cloud.bigquery.dataset.DatasetReference, \ str, \ ]): A reference to the dataset whose routines to list from the @@ -820,7 +817,7 @@ def list_routines( the value of the ``page_token`` can be accessed at ``next_page_token`` of the :class:`~google.api_core.page_iterator.HTTPIterator`. - retry (:class:`google.api_core.retry.Retry`): + retry (google.api_core.retry.Retry): (Optional) How to retry the RPC. Returns: @@ -860,8 +857,8 @@ def list_tables( Args: dataset (Union[ \ - :class:`~google.cloud.bigquery.dataset.Dataset`, \ - :class:`~google.cloud.bigquery.dataset.DatasetReference`, \ + google.cloud.bigquery.dataset.Dataset, \ + google.cloud.bigquery.dataset.DatasetReference, \ str, \ ]): A reference to the dataset whose tables to list from the @@ -878,7 +875,7 @@ def list_tables( the value of the ``page_token`` can be accessed at ``next_page_token`` of the :class:`~google.api_core.page_iterator.HTTPIterator`. - retry (:class:`google.api_core.retry.Retry`): + retry (google.api_core.retry.Retry): (Optional) How to retry the RPC. Returns: @@ -918,8 +915,8 @@ def delete_dataset( Args dataset (Union[ \ - :class:`~google.cloud.bigquery.dataset.Dataset`, \ - :class:`~google.cloud.bigquery.dataset.DatasetReference`, \ + google.cloud.bigquery.dataset.Dataset, \ + google.cloud.bigquery.dataset.DatasetReference, \ str, \ ]): A reference to the dataset to delete. If a string is passed @@ -930,7 +927,7 @@ def delete_dataset( (Optional) If True, delete all the tables in the dataset. If False and the dataset contains tables, the request will fail. Default is False. - retry (:class:`google.api_core.retry.Retry`): + retry (google.api_core.retry.Retry): (Optional) How to retry the RPC. not_found_ok (bool): Defaults to ``False``. If ``True``, ignore "not found" errors @@ -964,15 +961,15 @@ def delete_model(self, model, retry=DEFAULT_RETRY, not_found_ok=False): Args: model (Union[ \ - :class:`~google.cloud.bigquery.model.Model`, \ - :class:`~google.cloud.bigquery.model.ModelReference`, \ + google.cloud.bigquery.model.Model, \ + google.cloud.bigquery.model.ModelReference, \ str, \ ]): A reference to the model to delete. If a string is passed in, this method attempts to create a model reference from a string using :func:`google.cloud.bigquery.model.ModelReference.from_string`. - retry (:class:`google.api_core.retry.Retry`): + retry (google.api_core.retry.Retry): (Optional) How to retry the RPC. not_found_ok (bool): Defaults to ``False``. If ``True``, ignore "not found" errors @@ -998,15 +995,15 @@ def delete_routine(self, routine, retry=DEFAULT_RETRY, not_found_ok=False): Args: model (Union[ \ - :class:`~google.cloud.bigquery.routine.Routine`, \ - :class:`~google.cloud.bigquery.routine.RoutineReference`, \ + google.cloud.bigquery.routine.Routine, \ + google.cloud.bigquery.routine.RoutineReference, \ str, \ ]): A reference to the routine to delete. If a string is passed in, this method attempts to create a routine reference from a string using :func:`google.cloud.bigquery.routine.RoutineReference.from_string`. - retry (:class:`google.api_core.retry.Retry`): + retry (google.api_core.retry.Retry): (Optional) How to retry the RPC. not_found_ok (bool): Defaults to ``False``. If ``True``, ignore "not found" errors @@ -1034,15 +1031,15 @@ def delete_table(self, table, retry=DEFAULT_RETRY, not_found_ok=False): Args: table (Union[ \ - :class:`~google.cloud.bigquery.table.Table`, \ - :class:`~google.cloud.bigquery.table.TableReference`, \ + google.cloud.bigquery.table.Table, \ + google.cloud.bigquery.table.TableReference, \ str, \ ]): A reference to the table to delete. If a string is passed in, this method attempts to create a table reference from a string using :func:`google.cloud.bigquery.table.TableReference.from_string`. - retry (:class:`google.api_core.retry.Retry`): + retry (google.api_core.retry.Retry): (Optional) How to retry the RPC. not_found_ok (bool): Defaults to ``False``. If ``True``, ignore "not found" errors @@ -1107,15 +1104,17 @@ def _get_query_results( def job_from_resource(self, resource): """Detect correct job type from resource and instantiate. - :type resource: dict - :param resource: one job resource from API response + Args: + resource (Dict): one job resource from API response - :rtype: One of: - :class:`google.cloud.bigquery.job.LoadJob`, - :class:`google.cloud.bigquery.job.CopyJob`, - :class:`google.cloud.bigquery.job.ExtractJob`, - or :class:`google.cloud.bigquery.job.QueryJob` - :returns: the job instance, constructed via the resource + Returns: + Union[ \ + google.cloud.bigquery.job.LoadJob, \ + google.cloud.bigquery.job.CopyJob, \ + google.cloud.bigquery.job.ExtractJob, \ + google.cloud.bigquery.job.QueryJob \ + ]: + The job instance, constructed via the resource. """ config = resource.get("configuration", {}) if "load" in config: @@ -1146,10 +1145,12 @@ def get_job(self, job_id, project=None, location=None, retry=DEFAULT_RETRY): (Optional) How to retry the RPC. Returns: - Union[google.cloud.bigquery.job.LoadJob, \ - google.cloud.bigquery.job.CopyJob, \ - google.cloud.bigquery.job.ExtractJob, \ - google.cloud.bigquery.job.QueryJob]: + Union[ \ + google.cloud.bigquery.job.LoadJob, \ + google.cloud.bigquery.job.CopyJob, \ + google.cloud.bigquery.job.ExtractJob, \ + google.cloud.bigquery.job.QueryJob \ + ]: Job instance, based on the resource returned by the API. """ extra_params = {"projection": "full"} @@ -1177,7 +1178,7 @@ def cancel_job(self, job_id, project=None, location=None, retry=DEFAULT_RETRY): See https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs/cancel - Arguments: + Args: job_id (str): Unique job identifier. Keyword Arguments: @@ -1189,10 +1190,12 @@ def cancel_job(self, job_id, project=None, location=None, retry=DEFAULT_RETRY): (Optional) How to retry the RPC. Returns: - Union[google.cloud.bigquery.job.LoadJob, \ - google.cloud.bigquery.job.CopyJob, \ - google.cloud.bigquery.job.ExtractJob, \ - google.cloud.bigquery.job.QueryJob]: + Union[ \ + google.cloud.bigquery.job.LoadJob, \ + google.cloud.bigquery.job.CopyJob, \ + google.cloud.bigquery.job.ExtractJob, \ + google.cloud.bigquery.job.QueryJob, \ + ]: Job instance, based on the resource returned by the API. """ extra_params = {"projection": "full"} @@ -1232,38 +1235,38 @@ def list_jobs( https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs/list Args: - project (str, optional): + project (Optional[str]): Project ID to use for retreiving datasets. Defaults to the client's project. parent_job (Optional[Union[ \ - :class:`~google.cloud.bigquery.job._AsyncJob`, \ + google.cloud.bigquery.job._AsyncJob, \ str, \ ]]): If set, retrieve only child jobs of the specified parent. - max_results (int, optional): + max_results (Optional[int]): Maximum number of jobs to return. - page_token (str, optional): + page_token (Optional[str]): Opaque marker for the next "page" of jobs. If not passed, the API will return the first page of jobs. The token marks the beginning of the iterator to be returned and the value of the ``page_token`` can be accessed at ``next_page_token`` of :class:`~google.api_core.page_iterator.HTTPIterator`. - all_users (bool, optional): + all_users (Optional[bool]): If true, include jobs owned by all users in the project. Defaults to :data:`False`. - state_filter (str, optional): + state_filter (Optional[str]): If set, include only jobs matching the given state. One of: * ``"done"`` * ``"pending"`` * ``"running"`` - retry (google.api_core.retry.Retry, optional): + retry (Optional[google.api_core.retry.Retry]): How to retry the RPC. - min_creation_time (datetime.datetime, optional): + min_creation_time (Optional[datetime.datetime]): Min value for job creation time. If set, only jobs created after or at this timestamp are returned. If the datetime has no time zone assumes UTC time. - max_creation_time (datetime.datetime, optional): + max_creation_time (Optional[datetime.datetime]): Max value for job creation time. If set, only jobs created before or at this timestamp are returned. If the datetime has no time zone assumes UTC time. @@ -1328,8 +1331,8 @@ def load_table_from_uri( URIs of data files to be loaded; in format ``gs:///``. destination (Union[ \ - :class:`~google.cloud.bigquery.table.Table`, \ - :class:`~google.cloud.bigquery.table.TableReference`, \ + google.cloud.bigquery.table.Table, \ + google.cloud.bigquery.table.TableReference, \ str, \ ]): Table into which data is to be loaded. If a string is passed @@ -1406,8 +1409,8 @@ def load_table_from_file( Arguments: file_obj (file): A file handle opened in binary mode for reading. destination (Union[ \ - :class:`~google.cloud.bigquery.table.Table`, \ - :class:`~google.cloud.bigquery.table.TableReference`, \ + google.cloud.bigquery.table.Table, \ + google.cloud.bigquery.table.TableReference, \ str, \ ]): Table into which data is to be loaded. If a string is passed @@ -1517,19 +1520,19 @@ def load_table_from_dataframe( :func:`google.cloud.bigquery.table.TableReference.from_string`. Keyword Arguments: - num_retries (int, optional): Number of upload retries. - job_id (str, optional): Name of the job. - job_id_prefix (str, optional): + num_retries (Optional[int]): Number of upload retries. + job_id (Optional[str]): Name of the job. + job_id_prefix (Optional[str]): The user-provided prefix for a randomly generated job ID. This parameter will be ignored if a ``job_id`` is also given. location (str): Location where to run the job. Must match the location of the destination table. - project (str, optional): + project (Optional[str]): Project ID of the project of where to run the job. Defaults to the client's project. - job_config (~google.cloud.bigquery.job.LoadJobConfig, optional): + job_config (Optional[google.cloud.bigquery.job.LoadJobConfig]): Extra configuration options for the job. To override the default pandas data type conversions, supply @@ -1672,7 +1675,7 @@ def load_table_from_json( ): """Upload the contents of a table from a JSON string or dict. - Arguments: + Args: json_rows (Iterable[Dict[str, Any]]): Row data to be inserted. Keys must match the table schema fields and values must be JSON-compatible representations. @@ -1693,8 +1696,8 @@ def load_table_from_json( client.load_table_from_file(data_as_file, ...) destination (Union[ \ - :class:`~google.cloud.bigquery.table.Table`, \ - :class:`~google.cloud.bigquery.table.TableReference`, \ + google.cloud.bigquery.table.Table, \ + google.cloud.bigquery.table.TableReference, \ str, \ ]): Table into which data is to be loaded. If a string is passed @@ -1703,7 +1706,7 @@ def load_table_from_json( :func:`google.cloud.bigquery.table.TableReference.from_string`. Keyword Arguments: - num_retries (int, optional): Number of upload retries. + num_retries (Optional[int]): Number of upload retries. job_id (str): (Optional) Name of the job. job_id_prefix (str): (Optional) the user-provided prefix for a randomly generated @@ -1767,19 +1770,19 @@ def load_table_from_json( def _do_resumable_upload(self, stream, metadata, num_retries): """Perform a resumable upload. - :type stream: IO[bytes] - :param stream: A bytes IO object open for reading. + Args: + stream (IO[bytes]): A bytes IO object open for reading. - :type metadata: dict - :param metadata: The metadata associated with the upload. + metadata (Dict): The metadata associated with the upload. - :type num_retries: int - :param num_retries: Number of upload retries. (Deprecated: This - argument will be removed in a future release.) + num_retries (int): + Number of upload retries. (Deprecated: This + argument will be removed in a future release.) - :rtype: :class:`~requests.Response` - :returns: The "200 OK" response object returned after the final chunk - is uploaded. + Returns: + requests.Response: + The "200 OK" response object returned after the final chunk + is uploaded. """ upload, transport = self._initiate_resumable_upload( stream, metadata, num_retries @@ -1793,23 +1796,22 @@ def _do_resumable_upload(self, stream, metadata, num_retries): def _initiate_resumable_upload(self, stream, metadata, num_retries): """Initiate a resumable upload. - :type stream: IO[bytes] - :param stream: A bytes IO object open for reading. + Args: + stream (IO[bytes]): A bytes IO object open for reading. - :type metadata: dict - :param metadata: The metadata associated with the upload. + metadata (Dict): The metadata associated with the upload. - :type num_retries: int - :param num_retries: Number of upload retries. (Deprecated: This - argument will be removed in a future release.) + num_retries (int): + Number of upload retries. (Deprecated: This + argument will be removed in a future release.) - :rtype: tuple - :returns: - Pair of + Returns: + Tuple: + Pair of - * The :class:`~google.resumable_media.requests.ResumableUpload` - that was created - * The ``transport`` used to initiate the upload. + * The :class:`~google.resumable_media.requests.ResumableUpload` + that was created + * The ``transport`` used to initiate the upload. """ chunk_size = _DEFAULT_CHUNKSIZE transport = self._http @@ -1833,26 +1835,29 @@ def _initiate_resumable_upload(self, stream, metadata, num_retries): def _do_multipart_upload(self, stream, metadata, size, num_retries): """Perform a multipart upload. - :type stream: IO[bytes] - :param stream: A bytes IO object open for reading. + Args: + stream (IO[bytes]): A bytes IO object open for reading. - :type metadata: dict - :param metadata: The metadata associated with the upload. + metadata (Dict): The metadata associated with the upload. - :type size: int - :param size: The number of bytes to be uploaded (which will be read - from ``stream``). If not provided, the upload will be - concluded once ``stream`` is exhausted (or :data:`None`). + size (int): + The number of bytes to be uploaded (which will be read + from ``stream``). If not provided, the upload will be + concluded once ``stream`` is exhausted (or :data:`None`). + + num_retries (int): + Number of upload retries. (Deprecated: This + argument will be removed in a future release.) - :type num_retries: int - :param num_retries: Number of upload retries. (Deprecated: This - argument will be removed in a future release.) + Returns: + requests.Response: + The "200 OK" response object returned after the multipart + upload request. - :rtype: :class:`~requests.Response` - :returns: The "200 OK" response object returned after the multipart - upload request. - :raises: :exc:`ValueError` if the ``stream`` has fewer than ``size`` - bytes remaining. + Raises: + ValueError: + if the ``stream`` has fewer than ``size`` + bytes remaining. """ data = stream.read(size) if len(data) < size: @@ -1889,23 +1894,23 @@ def copy_table( See https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#jobconfigurationtablecopy - Arguments: + Args: sources (Union[ \ - :class:`~google.cloud.bigquery.table.Table`, \ - :class:`~google.cloud.bigquery.table.TableReference`, \ + google.cloud.bigquery.table.Table, \ + google.cloud.bigquery.table.TableReference, \ str, \ Sequence[ \ Union[ \ - :class:`~google.cloud.bigquery.table.Table`, \ - :class:`~google.cloud.bigquery.table.TableReference`, \ + google.cloud.bigquery.table.Table, \ + google.cloud.bigquery.table.TableReference, \ str, \ ] \ ], \ ]): Table or tables to be copied. - destination (Union[ - :class:`~google.cloud.bigquery.table.Table`, \ - :class:`~google.cloud.bigquery.table.TableReference`, \ + destination (Union[ \ + google.cloud.bigquery.table.Table, \ + google.cloud.bigquery.table.TableReference, \ str, \ ]): Table into which data is to be copied. @@ -1987,10 +1992,10 @@ def extract_table( See https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#jobconfigurationextract - Arguments: + Args: source (Union[ \ - :class:`google.cloud.bigquery.table.Table`, \ - :class:`google.cloud.bigquery.table.TableReference`, \ + google.cloud.bigquery.table.Table, \ + google.cloud.bigquery.table.TableReference, \ src, \ ]): Table to be extracted. @@ -2015,9 +2020,8 @@ def extract_table( (Optional) Extra configuration options for the job. retry (google.api_core.retry.Retry): (Optional) How to retry the RPC. - :type source: :class:`google.cloud.bigquery.table.TableReference` - :param source: table to be extracted. - + Args: + source (google.cloud.bigquery.table.TableReference): table to be extracted. Returns: google.cloud.bigquery.job.ExtractJob: A new extract job instance. @@ -2067,7 +2071,7 @@ def query( See https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#jobconfigurationquery - Arguments: + Args: query (str): SQL query to be executed. Defaults to the standard SQL dialect. Use the ``job_config`` parameter to change dialects. @@ -2141,27 +2145,22 @@ def insert_rows(self, table, rows, selected_fields=None, **kwargs): Args: table (Union[ \ - :class:`~google.cloud.bigquery.table.Table`, \ - :class:`~google.cloud.bigquery.table.TableReference`, \ + google.cloud.bigquery.table.Table, \ + google.cloud.bigquery.table.TableReference, \ str, \ ]): The destination table for the row data, or a reference to it. - rows (Union[ \ - Sequence[Tuple], \ - Sequence[dict], \ - ]): + rows (Union[Sequence[Tuple], Sequence[dict]]): Row data to be inserted. If a list of tuples is given, each tuple should contain data for each schema field on the current table and in the same order as the schema fields. If a list of dictionaries is given, the keys must include all required fields in the schema. Keys which do not correspond to a field in the schema are ignored. - selected_fields (Sequence[ \ - :class:`~google.cloud.bigquery.schema.SchemaField`, \ - ]): + selected_fields (Sequence[google.cloud.bigquery.schema.SchemaField]): The fields to return. Required if ``table`` is a :class:`~google.cloud.bigquery.table.TableReference`. - kwargs (dict): + kwargs (Dict): Keyword arguments to :meth:`~google.cloud.bigquery.client.Client.insert_rows_json`. @@ -2204,21 +2203,19 @@ def insert_rows_from_dataframe( Args: table (Union[ \ - :class:`~google.cloud.bigquery.table.Table`, \ - :class:`~google.cloud.bigquery.table.TableReference`, \ + google.cloud.bigquery.table.Table, \ + google.cloud.bigquery.table.TableReference, \ str, \ ]): The destination table for the row data, or a reference to it. dataframe (pandas.DataFrame): A :class:`~pandas.DataFrame` containing the data to load. - selected_fields (Sequence[ \ - :class:`~google.cloud.bigquery.schema.SchemaField`, \ - ]): + selected_fields (Sequence[google.cloud.bigquery.schema.SchemaField]): The fields to return. Required if ``table`` is a :class:`~google.cloud.bigquery.table.TableReference`. chunk_size (int): The number of rows to stream in a single chunk. Must be positive. - kwargs (dict): + kwargs (Dict): Keyword arguments to :meth:`~google.cloud.bigquery.client.Client.insert_rows_json`. @@ -2263,33 +2260,33 @@ def insert_rows_json( See https://cloud.google.com/bigquery/docs/reference/rest/v2/tabledata/insertAll - table (Union[ \ - :class:`~google.cloud.bigquery.table.Table` \ - :class:`~google.cloud.bigquery.table.TableReference`, \ - str, \ - ]): - The destination table for the row data, or a reference to it. - json_rows (Sequence[dict]): - Row data to be inserted. Keys must match the table schema fields - and values must be JSON-compatible representations. - row_ids (Sequence[str]): - (Optional) Unique ids, one per row being inserted. If omitted, - unique IDs are created. - skip_invalid_rows (bool): - (Optional) Insert all valid rows of a request, even if invalid - rows exist. The default value is False, which causes the entire - request to fail if any invalid rows exist. - ignore_unknown_values (bool): - (Optional) Accept rows that contain values that do not match the - schema. The unknown values are ignored. Default is False, which - treats unknown values as errors. - template_suffix (str): - (Optional) treat ``name`` as a template table and provide a suffix. - BigQuery will create the table `` + `` based - on the schema of the template table. See - https://cloud.google.com/bigquery/streaming-data-into-bigquery#template-tables - retry (:class:`google.api_core.retry.Retry`): - (Optional) How to retry the RPC. + Args: + table (Union[ \ + google.cloud.bigquery.table.Table \ + google.cloud.bigquery.table.TableReference, \ + str, \ + ]): + The destination table for the row data, or a reference to it. + json_rows (Sequence[Dict]): + Row data to be inserted. Keys must match the table schema fields + and values must be JSON-compatible representations. + row_ids (Sequence[str]): + (Optional) Unique ids, one per row being inserted. If omitted, + unique IDs are created. + skip_invalid_rows (bool): + (Optional) Insert all valid rows of a request, even if invalid + rows exist. The default value is False, which causes the entire + request to fail if any invalid rows exist. + ignore_unknown_values (bool): + (Optional) Accept rows that contain values that do not match the + schema. The unknown values are ignored. Default is False, which + treats unknown values as errors. + template_suffix (str): + (Optional) treat ``name`` as a template table and provide a suffix. + BigQuery will create the table `` + `` based + on the schema of the template table. See + https://cloud.google.com/bigquery/streaming-data-into-bigquery#template-tables + retry (google.api_core.retry.Retry): (Optional) How to retry the RPC. Returns: Sequence[Mappings]: @@ -2335,10 +2332,10 @@ def insert_rows_json( def list_partitions(self, table, retry=DEFAULT_RETRY): """List the partitions in a table. - Arguments: + Args: table (Union[ \ - :class:`~google.cloud.bigquery.table.Table`, \ - :class:`~google.cloud.bigquery.table.TableReference`, \ + google.cloud.bigquery.table.Table, \ + google.cloud.bigquery.table.TableReference, \ str, \ ]): The table or reference from which to get partition info @@ -2387,18 +2384,16 @@ def list_rows( Args: table (Union[ \ - :class:`~google.cloud.bigquery.table.Table`, \ - :class:`~google.cloud.bigquery.table.TableListItem`, \ - :class:`~google.cloud.bigquery.table.TableReference`, \ + google.cloud.bigquery.table.Table, \ + google.cloud.bigquery.table.TableListItem, \ + google.cloud.bigquery.table.TableReference, \ str, \ ]): The table to list, or a reference to it. When the table object does not contain a schema and ``selected_fields`` is not supplied, this method calls ``get_table`` to fetch the table schema. - selected_fields (Sequence[ \ - :class:`~google.cloud.bigquery.schema.SchemaField` \ - ]): + selected_fields (Sequence[google.cloud.bigquery.schema.SchemaField]): The fields to return. If not supplied, data for all columns are downloaded. max_results (int): @@ -2416,7 +2411,7 @@ def list_rows( Optional. The maximum number of rows in each page of results from this request. Non-positive values are ignored. Defaults to a sensible value set by the API. - retry (:class:`google.api_core.retry.Retry`): + retry (google.api_core.retry.Retry): (Optional) How to retry the RPC. Returns: @@ -2516,14 +2511,13 @@ def schema_to_json(self, schema_list, destination): def _item_to_project(iterator, resource): """Convert a JSON project to the native object. - :type iterator: :class:`~google.api_core.page_iterator.Iterator` - :param iterator: The iterator that is currently in use. + Args: + iterator (google.api_core.page_iterator.Iterator): The iterator that is currently in use. - :type resource: dict - :param resource: An item to be converted to a project. + resource (Dict): An item to be converted to a project. - :rtype: :class:`.Project` - :returns: The next project in the page. + Returns: + google.cloud.bigquery.client.Project: The next project in the page. """ return Project.from_api_repr(resource) @@ -2534,14 +2528,13 @@ def _item_to_project(iterator, resource): def _item_to_dataset(iterator, resource): """Convert a JSON dataset to the native object. - :type iterator: :class:`~google.api_core.page_iterator.Iterator` - :param iterator: The iterator that is currently in use. + Args: + iterator (google.api_core.page_iterator.Iterator): The iterator that is currently in use. - :type resource: dict - :param resource: An item to be converted to a dataset. + resource (Dict): An item to be converted to a dataset. - :rtype: :class:`.DatasetListItem` - :returns: The next dataset in the page. + Returns: + google.cloud.bigquery.dataset.DatasetListItem: The next dataset in the page. """ return DatasetListItem(resource) @@ -2549,14 +2542,13 @@ def _item_to_dataset(iterator, resource): def _item_to_job(iterator, resource): """Convert a JSON job to the native object. - :type iterator: :class:`~google.api_core.page_iterator.Iterator` - :param iterator: The iterator that is currently in use. + Args: + iterator (google.api_core.page_iterator.Iterator): The iterator that is currently in use. - :type resource: dict - :param resource: An item to be converted to a job. + resource (Dict): An item to be converted to a job. - :rtype: job instance. - :returns: The next job in the page. + Returns: + job instance: The next job in the page. """ return iterator.client.job_from_resource(resource) @@ -2567,8 +2559,7 @@ def _item_to_model(iterator, resource): Args: iterator (google.api_core.page_iterator.Iterator): The iterator that is currently in use. - resource (dict): - An item to be converted to a model. + resource (Dict): An item to be converted to a model. Returns: google.cloud.bigquery.model.Model: The next model in the page. @@ -2582,8 +2573,7 @@ def _item_to_routine(iterator, resource): Args: iterator (google.api_core.page_iterator.Iterator): The iterator that is currently in use. - resource (dict): - An item to be converted to a routine. + resource (Dict): An item to be converted to a routine. Returns: google.cloud.bigquery.routine.Routine: The next routine in the page. @@ -2594,14 +2584,13 @@ def _item_to_routine(iterator, resource): def _item_to_table(iterator, resource): """Convert a JSON table to the native object. - :type iterator: :class:`~google.api_core.page_iterator.Iterator` - :param iterator: The iterator that is currently in use. + Args: + iterator (google.api_core.page_iterator.Iterator): The iterator that is currently in use. - :type resource: dict - :param resource: An item to be converted to a table. + resource (Dict): An item to be converted to a table. - :rtype: :class:`~google.cloud.bigquery.table.Table` - :returns: The next table in the page. + Returns: + google.cloud.bigquery.table.Table: The next table in the page. """ return TableListItem(resource) @@ -2609,14 +2598,13 @@ def _item_to_table(iterator, resource): def _make_job_id(job_id, prefix=None): """Construct an ID for a new job. - :type job_id: str or ``NoneType`` - :param job_id: the user-provided job ID + Args: + job_id (Optional[str]): the user-provided job ID. - :type prefix: str or ``NoneType`` - :param prefix: (Optional) the user-provided prefix for a job ID + prefix (Optional[str]): the user-provided prefix for a job ID. - :rtype: str - :returns: A job ID + Returns: + str: A job ID """ if job_id is not None: return job_id @@ -2629,11 +2617,13 @@ def _make_job_id(job_id, prefix=None): def _check_mode(stream): """Check that a stream was opened in read-binary mode. - :type stream: IO[bytes] - :param stream: A bytes IO object open for reading. + Args: + stream (IO[bytes]): A bytes IO object open for reading. - :raises: :exc:`ValueError` if the ``stream.mode`` is a valid attribute - and is not among ``rb``, ``r+b`` or ``rb+``. + Raises: + ValueError: + if the ``stream.mode`` is a valid attribute + and is not among ``rb``, ``r+b`` or ``rb+``. """ mode = getattr(stream, "mode", None) @@ -2654,11 +2644,11 @@ def _check_mode(stream): def _get_upload_headers(user_agent): """Get the headers for an upload request. - :type user_agent: str - :param user_agent: The user-agent for requests. + Args: + user_agent (str): The user-agent for requests. - :rtype: dict - :returns: The headers to be used for the request. + Returns: + Dict: The headers to be used for the request. """ return { "Accept": "application/json", diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/dbapi/_helpers.py b/packages/google-cloud-bigquery/google/cloud/bigquery/dbapi/_helpers.py index e5f4cff51666..651880feac90 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/dbapi/_helpers.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/dbapi/_helpers.py @@ -30,18 +30,21 @@ def scalar_to_query_parameter(value, name=None): """Convert a scalar value into a query parameter. - :type value: any - :param value: A scalar value to convert into a query parameter. + Args: + value (Any): + A scalar value to convert into a query parameter. - :type name: str - :param name: (Optional) Name of the query parameter. + name (str): + (Optional) Name of the query parameter. - :rtype: :class:`~google.cloud.bigquery.ScalarQueryParameter` - :returns: - A query parameter corresponding with the type and value of the plain - Python object. - :raises: :class:`~google.cloud.bigquery.dbapi.exceptions.ProgrammingError` - if the type cannot be determined. + Returns: + google.cloud.bigquery.ScalarQueryParameter: + A query parameter corresponding with the type and value of the plain + Python object. + + Raises: + google.cloud.bigquery.dbapi.exceptions.ProgrammingError: + if the type cannot be determined. """ parameter_type = bigquery_scalar_type(value) @@ -67,8 +70,8 @@ def array_to_query_parameter(value, name=None): Python object. Raises: - :class:`~google.cloud.bigquery.dbapi.exceptions.ProgrammingError` - if the type of array elements cannot be determined. + google.cloud.bigquery.dbapi.exceptions.ProgrammingError: + if the type of array elements cannot be determined. """ if not array_like(value): raise exceptions.ProgrammingError( @@ -97,11 +100,12 @@ def array_to_query_parameter(value, name=None): def to_query_parameters_list(parameters): """Converts a sequence of parameter values into query parameters. - :type parameters: Sequence[Any] - :param parameters: Sequence of query parameter values. + Args: + parameters (Sequence[Any]): Sequence of query parameter values. - :rtype: List[google.cloud.bigquery.query._AbstractQueryParameter] - :returns: A list of query parameters. + Returns: + List[google.cloud.bigquery.query._AbstractQueryParameter]: + A list of query parameters. """ result = [] @@ -120,11 +124,12 @@ def to_query_parameters_list(parameters): def to_query_parameters_dict(parameters): """Converts a dictionary of parameter values into query parameters. - :type parameters: Mapping[str, Any] - :param parameters: Dictionary of query parameter values. + Args: + parameters (Mapping[str, Any]): Dictionary of query parameter values. - :rtype: List[google.cloud.bigquery.query._AbstractQueryParameter] - :returns: A list of named query parameters. + Returns: + List[google.cloud.bigquery.query._AbstractQueryParameter]: + A list of named query parameters. """ result = [] @@ -146,11 +151,13 @@ def to_query_parameters_dict(parameters): def to_query_parameters(parameters): """Converts DB-API parameter values into query parameters. - :type parameters: Mapping[str, Any] or Sequence[Any] - :param parameters: A dictionary or sequence of query parameter values. + Args: + parameters (Union[Mapping[str, Any], Sequence[Any]]): + A dictionary or sequence of query parameter values. - :rtype: List[google.cloud.bigquery.query._AbstractQueryParameter] - :returns: A list of query parameters. + Returns: + List[google.cloud.bigquery.query._AbstractQueryParameter]: + A list of query parameters. """ if parameters is None: return [] diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/dbapi/cursor.py b/packages/google-cloud-bigquery/google/cloud/bigquery/dbapi/cursor.py index 9b7a895b367f..a3e6ea5be87e 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/dbapi/cursor.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/dbapi/cursor.py @@ -49,8 +49,9 @@ class Cursor(object): """DB-API Cursor to Google BigQuery. - :type connection: :class:`~google.cloud.bigquery.dbapi.Connection` - :param connection: A DB-API connection to Google BigQuery. + Args: + connection (google.cloud.bigquery.dbapi.Connection): + A DB-API connection to Google BigQuery. """ def __init__(self, connection): @@ -74,8 +75,9 @@ def close(self): def _set_description(self, schema): """Set description from schema. - :type schema: Sequence[google.cloud.bigquery.schema.SchemaField] - :param schema: A description of fields in the schema. + Args: + schema (Sequence[google.cloud.bigquery.schema.SchemaField]): + A description of fields in the schema. """ if schema is None: self.description = None @@ -103,9 +105,9 @@ def _set_rowcount(self, query_results): query, but if it was a DML statement, it sets rowcount to the number of modified rows. - :type query_results: - :class:`~google.cloud.bigquery.query._QueryResults` - :param query_results: results of a query + Args: + query_results (google.cloud.bigquery.query._QueryResults): + Results of a query. """ total_rows = 0 num_dml_affected_rows = query_results.num_dml_affected_rows @@ -138,19 +140,18 @@ def execute(self, operation, parameters=None, job_id=None, job_config=None): yet supported. See: https://github.com/GoogleCloudPlatform/google-cloud-python/issues/3524 - :type operation: str - :param operation: A Google BigQuery query string. + Args: + operation (str): A Google BigQuery query string. - :type parameters: Mapping[str, Any] or Sequence[Any] - :param parameters: - (Optional) dictionary or sequence of parameter values. + parameters (Union[Mapping[str, Any], Sequence[Any]]): + (Optional) dictionary or sequence of parameter values. - :type job_id: str - :param job_id: (Optional) The job_id to use. If not set, a job ID - is generated at random. + job_id (str): + (Optional) The job_id to use. If not set, a job ID + is generated at random. - :type job_config: :class:`~google.cloud.bigquery.job.QueryJobConfig` - :param job_config: (Optional) Extra configuration options for the query job. + job_config (google.cloud.bigquery.job.QueryJobConfig): + (Optional) Extra configuration options for the query job. """ self._query_data = None self._query_job = None @@ -182,11 +183,11 @@ def execute(self, operation, parameters=None, job_id=None, job_config=None): def executemany(self, operation, seq_of_parameters): """Prepare and execute a database operation multiple times. - :type operation: str - :param operation: A Google BigQuery query string. + Args: + operation (str): A Google BigQuery query string. - :type seq_of_parameters: Sequence[Mapping[str, Any] or Sequence[Any]] - :param parameters: Sequence of many sets of parameter values. + seq_of_parameters (Union[Sequence[Mapping[str, Any], Sequence[Any]]]): + Sequence of many sets of parameter values. """ for parameters in seq_of_parameters: self.execute(operation, parameters) @@ -221,12 +222,13 @@ def _try_fetch(self, size=None): def fetchone(self): """Fetch a single row from the results of the last ``execute*()`` call. - :rtype: tuple - :returns: - A tuple representing a row or ``None`` if no more data is - available. - :raises: :class:`~google.cloud.bigquery.dbapi.InterfaceError` - if called before ``execute()``. + Returns: + Tuple: + A tuple representing a row or ``None`` if no more data is + available. + + Raises: + google.cloud.bigquery.dbapi.InterfaceError: if called before ``execute()``. """ self._try_fetch() try: @@ -242,16 +244,17 @@ def fetchmany(self, size=None): Set the ``arraysize`` attribute before calling ``execute()`` to set the batch size. - :type size: int - :param size: - (Optional) Maximum number of rows to return. Defaults to the - ``arraysize`` property value. If ``arraysize`` is not set, it - defaults to ``1``. + Args: + size (int): + (Optional) Maximum number of rows to return. Defaults to the + ``arraysize`` property value. If ``arraysize`` is not set, it + defaults to ``1``. + + Returns: + List[Tuple]: A list of rows. - :rtype: List[tuple] - :returns: A list of rows. - :raises: :class:`~google.cloud.bigquery.dbapi.InterfaceError` - if called before ``execute()``. + Raises: + google.cloud.bigquery.dbapi.InterfaceError: if called before ``execute()``. """ if size is None: # Since self.arraysize can be None (a deviation from PEP 249), @@ -272,10 +275,11 @@ def fetchmany(self, size=None): def fetchall(self): """Fetch all remaining results from the last ``execute*()`` call. - :rtype: List[tuple] - :returns: A list of all the rows in the results. - :raises: :class:`~google.cloud.bigquery.dbapi.InterfaceError` - if called before ``execute()``. + Returns: + List[Tuple]: A list of all the rows in the results. + + Raises: + google.cloud.bigquery.dbapi.InterfaceError: if called before ``execute()``. """ self._try_fetch() return list(self._query_data) @@ -293,17 +297,18 @@ def _format_operation_list(operation, parameters): The input operation will be a query like ``SELECT %s`` and the output will be a query like ``SELECT ?``. - :type operation: str - :param operation: A Google BigQuery query string. + Args: + operation (str): A Google BigQuery query string. - :type parameters: Sequence[Any] - :param parameters: Sequence of parameter values. + parameters (Sequence[Any]): Sequence of parameter values. - :rtype: str - :returns: A formatted query string. - :raises: :class:`~google.cloud.bigquery.dbapi.ProgrammingError` - if a parameter used in the operation is not found in the - ``parameters`` argument. + Returns: + str: A formatted query string. + + Raises: + google.cloud.bigquery.dbapi.ProgrammingError: + if a parameter used in the operation is not found in the + ``parameters`` argument. """ formatted_params = ["?" for _ in parameters] @@ -319,17 +324,18 @@ def _format_operation_dict(operation, parameters): The input operation will be a query like ``SELECT %(namedparam)s`` and the output will be a query like ``SELECT @namedparam``. - :type operation: str - :param operation: A Google BigQuery query string. + Args: + operation (str): A Google BigQuery query string. + + parameters (Mapping[str, Any]): Dictionary of parameter values. - :type parameters: Mapping[str, Any] - :param parameters: Dictionary of parameter values. + Returns: + str: A formatted query string. - :rtype: str - :returns: A formatted query string. - :raises: :class:`~google.cloud.bigquery.dbapi.ProgrammingError` - if a parameter used in the operation is not found in the - ``parameters`` argument. + Raises: + google.cloud.bigquery.dbapi.ProgrammingError: + if a parameter used in the operation is not found in the + ``parameters`` argument. """ formatted_params = {} for name in parameters: @@ -345,17 +351,19 @@ def _format_operation_dict(operation, parameters): def _format_operation(operation, parameters=None): """Formats parameters in operation in way BigQuery expects. - :type: str - :param operation: A Google BigQuery query string. + Args: + operation (str): A Google BigQuery query string. + + parameters (Union[Mapping[str, Any], Sequence[Any]]): + Optional parameter values. - :type: Mapping[str, Any] or Sequence[Any] - :param parameters: Optional parameter values. + Returns: + str: A formatted query string. - :rtype: str - :returns: A formatted query string. - :raises: :class:`~google.cloud.bigquery.dbapi.ProgrammingError` - if a parameter used in the operation is not found in the - ``parameters`` argument. + Raises: + google.cloud.bigquery.dbapi.ProgrammingError: + if a parameter used in the operation is not found in the + ``parameters`` argument. """ if parameters is None: return operation diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/job.py b/packages/google-cloud-bigquery/google/cloud/bigquery/job.py index cfc5a3797c70..96724c9f805b 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/job.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/job.py @@ -81,11 +81,11 @@ def _error_result_to_exception(error_result): .. _troubleshooting errors: https://cloud.google.com/bigquery\ /troubleshooting-errors - :type error_result: Mapping[str, str] - :param error_result: The error result from BigQuery. + Args: + error_result (Mapping[str, str]): The error result from BigQuery. - :rtype google.cloud.exceptions.GoogleCloudError: - :returns: The mapped exception. + Returns: + google.cloud.exceptions.GoogleCloudError: The mapped exception. """ reason = error_result.get("reason") status_code = _ERROR_REASON_TO_EXCEPTION.get( @@ -341,7 +341,7 @@ def parent_job_id(self): https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobStatistics.FIELDS.parent_job_id Returns: - Optional[str] + Optional[str]: parent job id. """ return _helpers._get_sub_prop(self._properties, ["statistics", "parentJobId"]) @@ -371,8 +371,8 @@ def num_child_jobs(self): def project(self): """Project bound to the job. - :rtype: str - :returns: the project (derived from the client). + Returns: + str: the project (derived from the client). """ return _helpers._get_sub_prop(self._properties, ["jobReference", "projectId"]) @@ -384,13 +384,14 @@ def location(self): def _require_client(self, client): """Check client or verify over-ride. - :type client: :class:`~google.cloud.bigquery.client.Client` or - ``NoneType`` - :param client: the client to use. If not passed, falls back to the - ``client`` stored on the current dataset. + Args: + client (Optional[google.cloud.bigquery.client.Client]): + the client to use. If not passed, falls back to the + ``client`` stored on the current dataset. - :rtype: :class:`google.cloud.bigquery.client.Client` - :returns: The client passed in or the currently bound client. + Returns: + google.cloud.bigquery.client.Client: + The client passed in or the currently bound client. """ if client is None: client = self._client @@ -398,10 +399,10 @@ def _require_client(self, client): @property def job_type(self): - """Type of job + """Type of job. - :rtype: str - :returns: one of 'load', 'copy', 'extract', 'query' + Returns: + str: one of 'load', 'copy', 'extract', 'query'. """ return self._JOB_TYPE @@ -409,8 +410,8 @@ def job_type(self): def path(self): """URL path for the job's APIs. - :rtype: str - :returns: the path based on project and job ID. + Returns: + str: the path based on project and job ID. """ return "/projects/%s/jobs/%s" % (self.project, self.job_id) @@ -423,8 +424,8 @@ def labels(self): def etag(self): """ETag for the job resource. - :rtype: str, or ``NoneType`` - :returns: the ETag (None until set from the server). + Returns: + Optional[str]: the ETag (None until set from the server). """ return self._properties.get("etag") @@ -432,8 +433,8 @@ def etag(self): def self_link(self): """URL for the job resource. - :rtype: str, or ``NoneType`` - :returns: the URL (None until set from the server). + Returns: + Optional[str]: the URL (None until set from the server). """ return self._properties.get("selfLink") @@ -441,8 +442,8 @@ def self_link(self): def user_email(self): """E-mail address of user who submitted the job. - :rtype: str, or ``NoneType`` - :returns: the URL (None until set from the server). + Returns: + Optional[str]: the URL (None until set from the server). """ return self._properties.get("user_email") @@ -450,8 +451,9 @@ def user_email(self): def created(self): """Datetime at which the job was created. - :rtype: ``datetime.datetime``, or ``NoneType`` - :returns: the creation time (None until set from the server). + Returns: + Optional[datetime.datetime]: + the creation time (None until set from the server). """ statistics = self._properties.get("statistics") if statistics is not None: @@ -463,8 +465,9 @@ def created(self): def started(self): """Datetime at which the job was started. - :rtype: ``datetime.datetime``, or ``NoneType`` - :returns: the start time (None until set from the server). + Returns: + Optional[datetime.datetime]: + the start time (None until set from the server). """ statistics = self._properties.get("statistics") if statistics is not None: @@ -476,8 +479,9 @@ def started(self): def ended(self): """Datetime at which the job finished. - :rtype: ``datetime.datetime``, or ``NoneType`` - :returns: the end time (None until set from the server). + Returns: + Optional[datetime.datetime]: + the end time (None until set from the server). """ statistics = self._properties.get("statistics") if statistics is not None: @@ -494,8 +498,8 @@ def _job_statistics(self): def error_result(self): """Error information about the job as a whole. - :rtype: mapping, or ``NoneType`` - :returns: the error information (None until set from the server). + Returns: + Optional[Mapping]: the error information (None until set from the server). """ status = self._properties.get("status") if status is not None: @@ -505,8 +509,9 @@ def error_result(self): def errors(self): """Information about individual errors generated by the job. - :rtype: list of mappings, or ``NoneType`` - :returns: the error information (None until set from the server). + Returns: + Optional[List[Mapping]]: + the error information (None until set from the server). """ status = self._properties.get("status") if status is not None: @@ -516,8 +521,9 @@ def errors(self): def state(self): """Status of the job. - :rtype: str, or ``NoneType`` - :returns: the state (None until set from the server). + Returns: + Optional[str]: + the state (None until set from the server). """ status = self._properties.get("status") if status is not None: @@ -534,8 +540,8 @@ def _copy_configuration_properties(self, configuration): def _set_properties(self, api_response): """Update properties from resource in body of ``api_response`` - :type api_response: dict - :param api_response: response returned from an API call + Args: + api_response (Dict): response returned from an API call. """ cleaned = api_response.copy() self._scrub_local_properties(cleaned) @@ -559,14 +565,18 @@ def _set_properties(self, api_response): def _get_resource_config(cls, resource): """Helper for :meth:`from_api_repr` - :type resource: dict - :param resource: resource for the job + Args: + resource (Dict): resource for the job. + + Returns: + (str, Dict): + tuple (string, dict), where the first element is the + job ID and the second contains job-specific configuration. - :rtype: dict - :returns: tuple (string, dict), where the first element is the - job ID and the second contains job-specific configuration. - :raises: :class:`KeyError` if the resource has no identifier, or - is missing the appropriate configuration. + Raises: + KeyError: + If the resource has no identifier, or + is missing the appropriate configuration. """ if "jobReference" not in resource or "jobId" not in resource["jobReference"]: raise KeyError( @@ -626,16 +636,15 @@ def exists(self, client=None, retry=DEFAULT_RETRY): See https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs/get - :type client: :class:`~google.cloud.bigquery.client.Client` or - ``NoneType`` - :param client: the client to use. If not passed, falls back to the - ``client`` stored on the current dataset. + Args: + client (Optional[google.cloud.bigquery.client.Client]): + the client to use. If not passed, falls back to the + ``client`` stored on the current dataset. - :type retry: :class:`google.api_core.retry.Retry` - :param retry: (Optional) How to retry the RPC. + retry (google.api_core.retry.Retry): (Optional) How to retry the RPC. - :rtype: bool - :returns: Boolean indicating existence of the job. + Returns: + bool: Boolean indicating existence of the job. """ client = self._require_client(client) @@ -658,13 +667,12 @@ def reload(self, client=None, retry=DEFAULT_RETRY): See https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs/get - :type client: :class:`~google.cloud.bigquery.client.Client` or - ``NoneType`` - :param client: the client to use. If not passed, falls back to the - ``client`` stored on the current dataset. + Args: + client (Optional[google.cloud.bigquery.client.Client]): + the client to use. If not passed, falls back to the + ``client`` stored on the current dataset. - :type retry: :class:`google.api_core.retry.Retry` - :param retry: (Optional) How to retry the RPC. + retry (google.api_core.retry.Retry): (Optional) How to retry the RPC. """ client = self._require_client(client) @@ -683,13 +691,13 @@ def cancel(self, client=None): See https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs/cancel - :type client: :class:`~google.cloud.bigquery.client.Client` or - ``NoneType`` - :param client: the client to use. If not passed, falls back to the - ``client`` stored on the current dataset. + Args: + client (Optional[google.cloud.bigquery.client.Client]): + the client to use. If not passed, falls back to the + ``client`` stored on the current dataset. - :rtype: bool - :returns: Boolean indicating that the cancel request was sent. + Returns: + bool: Boolean indicating that the cancel request was sent. """ client = self._require_client(client) @@ -732,11 +740,11 @@ def _set_future_result(self): def done(self, retry=DEFAULT_RETRY): """Refresh the job and checks if it is complete. - :type retry: :class:`google.api_core.retry.Retry` - :param retry: (Optional) How to retry the RPC. + Args: + retry (google.api_core.retry.Retry): (Optional) How to retry the RPC. - :rtype: bool - :returns: True if the job is complete, False otherwise. + Returns: + bool: True if the job is complete, False otherwise. """ # Do not refresh is the state is already done, as the job will not # change once complete. @@ -747,21 +755,21 @@ def done(self, retry=DEFAULT_RETRY): def result(self, timeout=None, retry=DEFAULT_RETRY): """Start the job and wait for it to complete and get the result. - :type timeout: float - :param timeout: - How long (in seconds) to wait for job to complete before raising - a :class:`concurrent.futures.TimeoutError`. + Args: + timeout (float): + How long (in seconds) to wait for job to complete before raising + a :class:`concurrent.futures.TimeoutError`. - :type retry: :class:`google.api_core.retry.Retry` - :param retry: (Optional) How to retry the RPC. + retry (google.api_core.retry.Retry): (Optional) How to retry the RPC. - :rtype: _AsyncJob - :returns: This instance. + Returns: + _AsyncJob: This instance. - :raises: - :class:`~google.cloud.exceptions.GoogleCloudError` if the job - failed or :class:`concurrent.futures.TimeoutError` if the job did - not complete in the given timeout. + Raises: + google.cloud.exceptions.GoogleCloudError: + if the job failed. + concurrent.futures.TimeoutError: + if the job did not complete in the given timeout. """ if self.state is None: self._begin(retry=retry) @@ -775,8 +783,8 @@ def cancelled(self): cancelled in the API. This method is here to satisfy the interface for :class:`google.api_core.future.Future`. - :rtype: bool - :returns: False + Returns: + bool: False """ return ( self.error_result is not None @@ -787,7 +795,7 @@ def cancelled(self): class _JobConfig(object): """Abstract base class for job configuration objects. - Arguments: + Args: job_type (str): The key to use for the job configuration. """ @@ -830,10 +838,10 @@ def _get_sub_prop(self, key, default=None): _helpers._get_sub_prop( self._properties, ['query', 'destinationTable']) - Arguments: + Args: key (str): - Key for the value to get in the - ``self._properties[self._job_type]`` dictionary. + Key for the value to get in the + ``self._properties[self._job_type]`` dictionary. default (object): (Optional) Default value to return if the key is not found. Defaults to :data:`None`. @@ -859,10 +867,10 @@ def _set_sub_prop(self, key, value): _helper._set_sub_prop( self._properties, ['query', 'useLegacySql'], False) - Arguments: + Args: key (str): - Key to set in the ``self._properties[self._job_type]`` - dictionary. + Key to set in the ``self._properties[self._job_type]`` + dictionary. value (object): Value to set. """ _helpers._set_sub_prop(self._properties, [self._job_type, key], value) @@ -881,18 +889,18 @@ def _del_sub_prop(self, key): _helper._del_sub_prop( self._properties, ['query', 'useLegacySql']) - Arguments: + Args: key (str): - Key to remove in the ``self._properties[self._job_type]`` - dictionary. + Key to remove in the ``self._properties[self._job_type]`` + dictionary. """ _helpers._del_sub_prop(self._properties, [self._job_type, key]) def to_api_repr(self): """Build an API representation of the job config. - :rtype: dict - :returns: A dictionary in the format used by the BigQuery API. + Returns: + Dict: A dictionary in the format used by the BigQuery API. """ return copy.deepcopy(self._properties) @@ -903,12 +911,12 @@ def _fill_from_default(self, default_job_config): config. The merge is done at the top-level as well as for keys one level below the job type. - Arguments: + Args: default_job_config (google.cloud.bigquery.job._JobConfig): The default job config that will be used to fill in self. Returns: - google.cloud.bigquery.job._JobConfig A new (merged) job config. + google.cloud.bigquery.job._JobConfig: A new (merged) job config. """ if self._job_type != default_job_config._job_type: raise TypeError( @@ -934,13 +942,13 @@ def _fill_from_default(self, default_job_config): def from_api_repr(cls, resource): """Factory: construct a job configuration given its API representation - :type resource: dict - :param resource: - An extract job configuration in the same representation as is - returned from the API. + Args: + resource (Dict): + An extract job configuration in the same representation as is + returned from the API. - :rtype: :class:`google.cloud.bigquery.job._JobConfig` - :returns: Configuration parsed from ``resource``. + Returns: + google.cloud.bigquery.job._JobConfig: Configuration parsed from ``resource``. """ config = cls() config._properties = copy.deepcopy(resource) @@ -1335,21 +1343,19 @@ class LoadJob(_AsyncJob): Can load from Google Cloud Storage URIs or from a file. - :type job_id: str - :param job_id: the job's ID + Args: + job_id (str): the job's ID - :type source_uris: sequence of string or ``NoneType`` - :param source_uris: - URIs of one or more data files to be loaded. See - https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationLoad.FIELDS.source_uris - for supported URI formats. Pass None for jobs that load from a file. + source_uris (Optional[Sequence[str]]): + URIs of one or more data files to be loaded. See + https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationLoad.FIELDS.source_uris + for supported URI formats. Pass None for jobs that load from a file. - :type destination: :class:`google.cloud.bigquery.table.TableReference` - :param destination: reference to table into which data is to be loaded. + destination (google.cloud.bigquery.table.TableReference): reference to table into which data is to be loaded. - :type client: :class:`google.cloud.bigquery.client.Client` - :param client: A client which holds credentials and project configuration - for the dataset (which requires a project). + client (google.cloud.bigquery.client.Client): + A client which holds credentials and project configuration + for the dataset (which requires a project). """ _JOB_TYPE = "load" @@ -1486,7 +1492,7 @@ def destination_encryption_configuration(self): @property def destination_table_description(self): - """Union[str, None] name given to destination table. + """Optional[str] name given to destination table. See: https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#DestinationTableProperties.FIELDS.description @@ -1495,7 +1501,7 @@ def destination_table_description(self): @property def destination_table_friendly_name(self): - """Union[str, None] name given to destination table. + """Optional[str] name given to destination table. See: https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#DestinationTableProperties.FIELDS.friendly_name @@ -1541,9 +1547,11 @@ def schema_update_options(self): def input_file_bytes(self): """Count of bytes loaded from source files. - :rtype: int, or ``NoneType`` - :returns: the count (None until set from the server). - :raises: ValueError for invalid value types. + Returns: + Optional[int]: the count (None until set from the server). + + Raises: + ValueError: for invalid value types. """ return _helpers._int_or_none( _helpers._get_sub_prop( @@ -1555,8 +1563,8 @@ def input_file_bytes(self): def input_files(self): """Count of source files. - :rtype: int, or ``NoneType`` - :returns: the count (None until set from the server). + Returns: + Optional[int]: the count (None until set from the server). """ return _helpers._int_or_none( _helpers._get_sub_prop( @@ -1568,8 +1576,8 @@ def input_files(self): def output_bytes(self): """Count of bytes saved to destination table. - :rtype: int, or ``NoneType`` - :returns: the count (None until set from the server). + Returns: + Optional[int]: the count (None until set from the server). """ return _helpers._int_or_none( _helpers._get_sub_prop( @@ -1581,8 +1589,8 @@ def output_bytes(self): def output_rows(self): """Count of rows saved to destination table. - :rtype: int, or ``NoneType`` - :returns: the count (None until set from the server). + Returns: + Optional[int]: the count (None until set from the server). """ return _helpers._int_or_none( _helpers._get_sub_prop( @@ -1619,15 +1627,15 @@ def from_api_repr(cls, resource, client): This method assumes that the project found in the resource matches the client's project. - :type resource: dict - :param resource: dataset job representation returned from the API + Args: + resource (Dict): dataset job representation returned from the API - :type client: :class:`google.cloud.bigquery.client.Client` - :param client: Client which holds credentials and project - configuration for the dataset. + client (google.cloud.bigquery.client.Client): + Client which holds credentials and project + configuration for the dataset. - :rtype: :class:`google.cloud.bigquery.job.LoadJob` - :returns: Job parsed from ``resource``. + Returns: + google.cloud.bigquery.job.LoadJob: Job parsed from ``resource``. """ config_resource = resource.get("configuration", {}) config = LoadJobConfig.from_api_repr(config_resource) @@ -1709,22 +1717,19 @@ def destination_encryption_configuration(self, value): class CopyJob(_AsyncJob): """Asynchronous job: copy data into a table from other tables. - :type job_id: str - :param job_id: the job's ID, within the project belonging to ``client``. + Args: + job_id (str): the job's ID, within the project belonging to ``client``. - :type sources: list of :class:`google.cloud.bigquery.table.TableReference` - :param sources: Table from which data is to be loaded. + sources (List[google.cloud.bigquery.table.TableReference]): Table from which data is to be loaded. - :type destination: :class:`google.cloud.bigquery.table.TableReference` - :param destination: Table into which data is to be loaded. + destination (google.cloud.bigquery.table.TableReference): Table into which data is to be loaded. - :type client: :class:`google.cloud.bigquery.client.Client` - :param client: A client which holds credentials and project configuration - for the dataset (which requires a project). + client (google.cloud.bigquery.client.Client): + A client which holds credentials and project configuration + for the dataset (which requires a project). - :type job_config: :class:`~google.cloud.bigquery.job.CopyJobConfig` - :param job_config: - (Optional) Extra configuration options for the copy job. + job_config (google.cloud.bigquery.job.CopyJobConfig): + (Optional) Extra configuration options for the copy job. """ _JOB_TYPE = "copy" @@ -1808,15 +1813,15 @@ def from_api_repr(cls, resource, client): This method assumes that the project found in the resource matches the client's project. - :type resource: dict - :param resource: dataset job representation returned from the API + Args: + resource (Dict): dataset job representation returned from the API - :type client: :class:`google.cloud.bigquery.client.Client` - :param client: Client which holds credentials and project - configuration for the dataset. + client (google.cloud.bigquery.client.Client): + Client which holds credentials and project + configuration for the dataset. - :rtype: :class:`google.cloud.bigquery.job.CopyJob` - :returns: Job parsed from ``resource``. + Returns: + google.cloud.bigquery.job.CopyJob: Job parsed from ``resource``. """ job_id, config_resource = cls._get_resource_config(resource) config = CopyJobConfig.from_api_repr(config_resource) @@ -1906,24 +1911,21 @@ def print_header(self, value): class ExtractJob(_AsyncJob): """Asynchronous job: extract data from a table into Cloud Storage. - :type job_id: str - :param job_id: the job's ID + Args: + job_id (str): the job's ID. - :type source: :class:`google.cloud.bigquery.table.TableReference` - :param source: Table into which data is to be loaded. + source (google.cloud.bigquery.table.TableReference): + Table into which data is to be loaded. - :type destination_uris: list of string - :param destination_uris: - URIs describing where the extracted data will be written in Cloud - Storage, using the format ``gs:///``. + destination_uris (List[str]): + URIs describing where the extracted data will be written in Cloud + Storage, using the format ``gs:///``. - :type client: :class:`google.cloud.bigquery.client.Client` - :param client: - A client which holds credentials and project configuration. + client (google.cloud.bigquery.client.Client): + A client which holds credentials and project configuration. - :type job_config: :class:`~google.cloud.bigquery.job.ExtractJobConfig` - :param job_config: - (Optional) Extra configuration options for the extract job. + job_config (google.cloud.bigquery.job.ExtractJobConfig): + (Optional) Extra configuration options for the extract job. """ _JOB_TYPE = "extract" @@ -1974,11 +1976,12 @@ def destination_uri_file_counts(self): https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobStatistics4.FIELDS.destination_uri_file_counts Returns: - a list of integer counts, each representing the number of files - per destination URI or URI pattern specified in the extract - configuration. These values will be in the same order as the URIs - specified in the 'destinationUris' field. Returns None if job is - not yet complete. + List[int]: + A list of integer counts, each representing the number of files + per destination URI or URI pattern specified in the extract + configuration. These values will be in the same order as the URIs + specified in the 'destinationUris' field. Returns None if job is + not yet complete. """ counts = self._job_statistics().get("destinationUriFileCounts") if counts is not None: @@ -2018,15 +2021,15 @@ def from_api_repr(cls, resource, client): This method assumes that the project found in the resource matches the client's project. - :type resource: dict - :param resource: dataset job representation returned from the API + Args: + resource (Dict): dataset job representation returned from the API - :type client: :class:`google.cloud.bigquery.client.Client` - :param client: Client which holds credentials and project - configuration for the dataset. + client (google.cloud.bigquery.client.Client): + Client which holds credentials and project + configuration for the dataset. - :rtype: :class:`google.cloud.bigquery.job.ExtractJob` - :returns: Job parsed from ``resource``. + Returns: + google.cloud.bigquery.job.ExtractJob: Job parsed from ``resource``. """ job_id, config_resource = cls._get_resource_config(resource) config = ExtractJobConfig.from_api_repr(config_resource) @@ -2425,7 +2428,7 @@ def time_partitioning(self, value): @property def clustering_fields(self): - """Union[List[str], None]: Fields defining clustering for the table + """Optional[List[str]]: Fields defining clustering for the table (Defaults to :data:`None`). @@ -2442,7 +2445,7 @@ def clustering_fields(self): @clustering_fields.setter def clustering_fields(self, value): - """Union[List[str], None]: Fields defining clustering for the table + """Optional[List[str]]: Fields defining clustering for the table (Defaults to :data:`None`). """ @@ -2467,7 +2470,7 @@ def to_api_repr(self): """Build an API representation of the query job config. Returns: - dict: A dictionary in the format used by the BigQuery API. + Dict: A dictionary in the format used by the BigQuery API. """ resource = copy.deepcopy(self._properties) @@ -2486,19 +2489,17 @@ def to_api_repr(self): class QueryJob(_AsyncJob): """Asynchronous job: query tables. - :type job_id: str - :param job_id: the job's ID, within the project belonging to ``client``. + Args: + job_id (str): the job's ID, within the project belonging to ``client``. - :type query: str - :param query: SQL query string + query (str): SQL query string. - :type client: :class:`google.cloud.bigquery.client.Client` - :param client: A client which holds credentials and project configuration - for the dataset (which requires a project). + client (google.cloud.bigquery.client.Client): + A client which holds credentials and project configuration + for the dataset (which requires a project). - :type job_config: :class:`~google.cloud.bigquery.job.QueryJobConfig` - :param job_config: - (Optional) Extra configuration options for the query job. + job_config (google.cloud.bigquery.job.QueryJobConfig): + (Optional) Extra configuration options for the query job. """ _JOB_TYPE = "query" @@ -2697,15 +2698,15 @@ def _copy_configuration_properties(self, configuration): def from_api_repr(cls, resource, client): """Factory: construct a job given its API representation - :type resource: dict - :param resource: dataset job representation returned from the API + Args: + resource (Dict): dataset job representation returned from the API - :type client: :class:`google.cloud.bigquery.client.Client` - :param client: Client which holds credentials and project - configuration for the dataset. + client (google.cloud.bigquery.client.Client): + Client which holds credentials and project + configuration for the dataset. - :rtype: :class:`google.cloud.bigquery.job.QueryJob` - :returns: Job parsed from ``resource``. + Returns: + google.cloud.bigquery.job.QueryJob: Job parsed from ``resource``. """ job_id, config = cls._get_resource_config(resource) query = _helpers._get_sub_prop(config, ["query", "query"]) @@ -2720,9 +2721,10 @@ def query_plan(self): See: https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobStatistics2.FIELDS.query_plan - :rtype: list of :class:`QueryPlanEntry` - :returns: mappings describing the query plan, or an empty list - if the query has not yet completed. + Returns: + List[QueryPlanEntry]: + mappings describing the query plan, or an empty list + if the query has not yet completed. """ plan_entries = self._job_statistics().get("queryPlan", ()) return [QueryPlanEntry.from_api_repr(entry) for entry in plan_entries] @@ -2742,9 +2744,10 @@ def total_bytes_processed(self): See: https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobStatistics2.FIELDS.total_bytes_processed - :rtype: int or None - :returns: total bytes processed by the job, or None if job is not - yet complete. + Returns: + Optional[int]: + Total bytes processed by the job, or None if job is not + yet complete. """ result = self._job_statistics().get("totalBytesProcessed") if result is not None: @@ -2758,9 +2761,10 @@ def total_bytes_billed(self): See: https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobStatistics2.FIELDS.total_bytes_billed - :rtype: int or None - :returns: total bytes processed by the job, or None if job is not - yet complete. + Returns: + Optional[int]: + Total bytes processed by the job, or None if job is not + yet complete. """ result = self._job_statistics().get("totalBytesBilled") if result is not None: @@ -2774,9 +2778,10 @@ def billing_tier(self): See: https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobStatistics2.FIELDS.billing_tier - :rtype: int or None - :returns: billing tier used by the job, or None if job is not - yet complete. + Returns: + Optional[int]: + Billing tier used by the job, or None if job is not + yet complete. """ return self._job_statistics().get("billingTier") @@ -2787,9 +2792,10 @@ def cache_hit(self): See: https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobStatistics2.FIELDS.cache_hit - :rtype: bool or None - :returns: whether the query results were returned from cache, or None - if job is not yet complete. + Returns: + Optional[bool]: + whether the query results were returned from cache, or None + if job is not yet complete. """ return self._job_statistics().get("cacheHit") @@ -2836,9 +2842,10 @@ def num_dml_affected_rows(self): See: https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobStatistics2.FIELDS.num_dml_affected_rows - :rtype: int or None - :returns: number of DML rows affected by the job, or None if job is not - yet complete. + Returns: + Optional[int]: + number of DML rows affected by the job, or None if job is not + yet complete. """ result = self._job_statistics().get("numDmlAffectedRows") if result is not None: @@ -2857,9 +2864,10 @@ def statement_type(self): See: https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobStatistics2.FIELDS.statement_type - :rtype: str or None - :returns: type of statement used by the job, or None if job is not - yet complete. + Returns: + Optional[str]: + type of statement used by the job, or None if job is not + yet complete. """ return self._job_statistics().get("statementType") @@ -2870,9 +2878,10 @@ def referenced_tables(self): See: https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobStatistics2.FIELDS.referenced_tables - :rtype: list of dict - :returns: mappings describing the query plan, or an empty list - if the query has not yet completed. + Returns: + List[Dict]: + mappings describing the query plan, or an empty list + if the query has not yet completed. """ tables = [] datasets_by_project_name = {} @@ -2899,13 +2908,14 @@ def undeclared_query_parameters(self): See: https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobStatistics2.FIELDS.undeclared_query_parameters - :rtype: - list of - :class:`~google.cloud.bigquery.ArrayQueryParameter`, - :class:`~google.cloud.bigquery.ScalarQueryParameter`, or - :class:`~google.cloud.bigquery.StructQueryParameter` - :returns: undeclared parameters, or an empty list if the query has - not yet completed. + Returns: + List[Union[ \ + google.cloud.bigquery.query.ArrayQueryParameter, \ + google.cloud.bigquery.query.ScalarQueryParameter, \ + google.cloud.bigquery.query.StructQueryParameter \ + ]]: + Undeclared parameters, or an empty list if the query has + not yet completed. """ parameters = [] undeclared = self._job_statistics().get("undeclaredQueryParameters", ()) @@ -2931,9 +2941,10 @@ def estimated_bytes_processed(self): See: https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobStatistics2.FIELDS.estimated_bytes_processed - :rtype: int or None - :returns: number of DML rows affected by the job, or None if job is not - yet complete. + Returns: + Optional[int]: + number of DML rows affected by the job, or None if job is not + yet complete. """ result = self._job_statistics().get("estimatedBytesProcessed") if result is not None: @@ -2943,8 +2954,8 @@ def estimated_bytes_processed(self): def done(self, retry=DEFAULT_RETRY): """Refresh the job and checks if it is complete. - :rtype: bool - :returns: True if the job is complete, False otherwise. + Returns: + bool: True if the job is complete, False otherwise. """ # Since the API to getQueryResults can hang up to the timeout value # (default of 10 seconds), set the timeout parameter to ensure that @@ -2990,8 +3001,8 @@ def _format_for_exception(query, job_id): query (str): The SQL query to format. job_id (str): The ID of the job that ran the query. - Returns: (str) - A formatted query text. + Returns: + str: A formatted query text. """ template = "\n\n(job ID: {job_id})\n\n{header}\n\n{ruler}\n{body}\n{ruler}" @@ -3026,8 +3037,7 @@ def _begin(self, client=None, retry=DEFAULT_RETRY): How to retry the RPC. Raises: - ValueError: - If the job has already begun. + ValueError: If the job has already begun. """ try: @@ -3122,9 +3132,7 @@ def to_arrow(self, progress_bar_type=None, bqstorage_client=None): ``'tqdm_gui'`` Use the :func:`tqdm.tqdm_gui` function to display a progress bar as a graphical dialog box. - bqstorage_client ( \ - google.cloud.bigquery_storage_v1beta1.BigQueryStorageClient \ - ): + bqstorage_client (google.cloud.bigquery_storage_v1beta1.BigQueryStorageClient): **Beta Feature** Optional. A BigQuery Storage API client. If supplied, use the faster BigQuery Storage API to fetch rows from BigQuery. This API is a billable API. @@ -3157,9 +3165,7 @@ def to_dataframe(self, bqstorage_client=None, dtypes=None, progress_bar_type=Non """Return a pandas DataFrame from a QueryJob Args: - bqstorage_client ( \ - google.cloud.bigquery_storage_v1beta1.BigQueryStorageClient \ - ): + bqstorage_client (google.cloud.bigquery_storage_v1beta1.BigQueryStorageClient): **Alpha Feature** Optional. A BigQuery Storage API client. If supplied, use the faster BigQuery Storage API to fetch rows from BigQuery. This API is a billable API. @@ -3173,9 +3179,7 @@ def to_dataframe(self, bqstorage_client=None, dtypes=None, progress_bar_type=Non **Caution**: There is a known issue reading small anonymous query result tables with the BQ Storage API. Write your query results to a destination table to work around this issue. - dtypes ( \ - Map[str, Union[str, pandas.Series.dtype]] \ - ): + dtypes (Map[str, Union[str, pandas.Series.dtype]]): Optional. A dictionary of column names pandas ``dtype``s. The provided ``dtype`` is used when constructing the series for the column specified. Otherwise, the default pandas behavior @@ -3212,11 +3216,10 @@ def __iter__(self): class QueryPlanEntryStep(object): """Map a single step in a query plan entry. - :type kind: str - :param kind: step type + Args: + kind (str): step type. - :type substeps: - :param substeps: names of substeps + substeps (List): names of substeps. """ def __init__(self, kind, substeps): @@ -3227,11 +3230,11 @@ def __init__(self, kind, substeps): def from_api_repr(cls, resource): """Factory: construct instance from the JSON repr. - :type resource: dict - :param resource: JSON representation of the entry + Args: + resource (Dict): JSON representation of the entry. - :rtype: :class:`QueryPlanEntryStep` - :return: new instance built from the resource + Returns: + QueryPlanEntryStep: new instance built from the resource. """ return cls(kind=resource.get("kind"), substeps=resource.get("substeps", ())) @@ -3247,7 +3250,6 @@ class QueryPlanEntry(object): See https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#ExplainQueryStage for the underlying API representation within query statistics. - """ def __init__(self): @@ -3259,11 +3261,11 @@ def from_api_repr(cls, resource): Args: resource(Dict[str: object]): - ExplainQueryStage representation returned from API + ExplainQueryStage representation returned from API. Returns: google.cloud.bigquery.QueryPlanEntry: - Query plan entry parsed from ``resource`` + Query plan entry parsed from ``resource``. """ entry = cls() entry._properties = resource @@ -3271,17 +3273,17 @@ def from_api_repr(cls, resource): @property def name(self): - """Union[str, None]: Human-readable name of the stage.""" + """Optional[str]: Human-readable name of the stage.""" return self._properties.get("name") @property def entry_id(self): - """Union[str, None]: Unique ID for the stage within the plan.""" + """Optional[str]: Unique ID for the stage within the plan.""" return self._properties.get("id") @property def start(self): - """Union[Datetime, None]: Datetime when the stage started.""" + """Optional[Datetime]: Datetime when the stage started.""" if self._properties.get("startMs") is None: return None return _helpers._datetime_from_microseconds( @@ -3290,7 +3292,7 @@ def start(self): @property def end(self): - """Union[Datetime, None]: Datetime when the stage ended.""" + """Optional[Datetime]: Datetime when the stage ended.""" if self._properties.get("endMs") is None: return None return _helpers._datetime_from_microseconds( @@ -3309,33 +3311,33 @@ def input_stages(self): @property def parallel_inputs(self): - """Union[int, None]: Number of parallel input segments within + """Optional[int]: Number of parallel input segments within the stage. """ return _helpers._int_or_none(self._properties.get("parallelInputs")) @property def completed_parallel_inputs(self): - """Union[int, None]: Number of parallel input segments completed.""" + """Optional[int]: Number of parallel input segments completed.""" return _helpers._int_or_none(self._properties.get("completedParallelInputs")) @property def wait_ms_avg(self): - """Union[int, None]: Milliseconds the average worker spent waiting to + """Optional[int]: Milliseconds the average worker spent waiting to be scheduled. """ return _helpers._int_or_none(self._properties.get("waitMsAvg")) @property def wait_ms_max(self): - """Union[int, None]: Milliseconds the slowest worker spent waiting to + """Optional[int]: Milliseconds the slowest worker spent waiting to be scheduled. """ return _helpers._int_or_none(self._properties.get("waitMsMax")) @property def wait_ratio_avg(self): - """Union[float, None]: Ratio of time the average worker spent waiting + """Optional[float]: Ratio of time the average worker spent waiting to be scheduled, relative to the longest time spent by any worker in any stage of the overall plan. """ @@ -3343,7 +3345,7 @@ def wait_ratio_avg(self): @property def wait_ratio_max(self): - """Union[float, None]: Ratio of time the slowest worker spent waiting + """Optional[float]: Ratio of time the slowest worker spent waiting to be scheduled, relative to the longest time spent by any worker in any stage of the overall plan. """ @@ -3351,21 +3353,21 @@ def wait_ratio_max(self): @property def read_ms_avg(self): - """Union[int, None]: Milliseconds the average worker spent reading + """Optional[int]: Milliseconds the average worker spent reading input. """ return _helpers._int_or_none(self._properties.get("readMsAvg")) @property def read_ms_max(self): - """Union[int, None]: Milliseconds the slowest worker spent reading + """Optional[int]: Milliseconds the slowest worker spent reading input. """ return _helpers._int_or_none(self._properties.get("readMsMax")) @property def read_ratio_avg(self): - """Union[float, None]: Ratio of time the average worker spent reading + """Optional[float]: Ratio of time the average worker spent reading input, relative to the longest time spent by any worker in any stage of the overall plan. """ @@ -3373,7 +3375,7 @@ def read_ratio_avg(self): @property def read_ratio_max(self): - """Union[float, None]: Ratio of time the slowest worker spent reading + """Optional[float]: Ratio of time the slowest worker spent reading to be scheduled, relative to the longest time spent by any worker in any stage of the overall plan. """ @@ -3381,21 +3383,21 @@ def read_ratio_max(self): @property def compute_ms_avg(self): - """Union[int, None]: Milliseconds the average worker spent on CPU-bound + """Optional[int]: Milliseconds the average worker spent on CPU-bound processing. """ return _helpers._int_or_none(self._properties.get("computeMsAvg")) @property def compute_ms_max(self): - """Union[int, None]: Milliseconds the slowest worker spent on CPU-bound + """Optional[int]: Milliseconds the slowest worker spent on CPU-bound processing. """ return _helpers._int_or_none(self._properties.get("computeMsMax")) @property def compute_ratio_avg(self): - """Union[float, None]: Ratio of time the average worker spent on + """Optional[float]: Ratio of time the average worker spent on CPU-bound processing, relative to the longest time spent by any worker in any stage of the overall plan. """ @@ -3403,7 +3405,7 @@ def compute_ratio_avg(self): @property def compute_ratio_max(self): - """Union[float, None]: Ratio of time the slowest worker spent on + """Optional[float]: Ratio of time the slowest worker spent on CPU-bound processing, relative to the longest time spent by any worker in any stage of the overall plan. """ @@ -3411,21 +3413,21 @@ def compute_ratio_max(self): @property def write_ms_avg(self): - """Union[int, None]: Milliseconds the average worker spent writing + """Optional[int]: Milliseconds the average worker spent writing output data. """ return _helpers._int_or_none(self._properties.get("writeMsAvg")) @property def write_ms_max(self): - """Union[int, None]: Milliseconds the slowest worker spent writing + """Optional[int]: Milliseconds the slowest worker spent writing output data. """ return _helpers._int_or_none(self._properties.get("writeMsMax")) @property def write_ratio_avg(self): - """Union[float, None]: Ratio of time the average worker spent writing + """Optional[float]: Ratio of time the average worker spent writing output data, relative to the longest time spent by any worker in any stage of the overall plan. """ @@ -3433,7 +3435,7 @@ def write_ratio_avg(self): @property def write_ratio_max(self): - """Union[float, None]: Ratio of time the slowest worker spent writing + """Optional[float]: Ratio of time the slowest worker spent writing output data, relative to the longest time spent by any worker in any stage of the overall plan. """ @@ -3441,29 +3443,29 @@ def write_ratio_max(self): @property def records_read(self): - """Union[int, None]: Number of records read by this stage.""" + """Optional[int]: Number of records read by this stage.""" return _helpers._int_or_none(self._properties.get("recordsRead")) @property def records_written(self): - """Union[int, None]: Number of records written by this stage.""" + """Optional[int]: Number of records written by this stage.""" return _helpers._int_or_none(self._properties.get("recordsWritten")) @property def status(self): - """Union[str, None]: status of this stage.""" + """Optional[str]: status of this stage.""" return self._properties.get("status") @property def shuffle_output_bytes(self): - """Union[int, None]: Number of bytes written by this stage to + """Optional[int]: Number of bytes written by this stage to intermediate shuffle. """ return _helpers._int_or_none(self._properties.get("shuffleOutputBytes")) @property def shuffle_output_bytes_spilled(self): - """Union[int, None]: Number of bytes written by this stage to + """Optional[int]: Number of bytes written by this stage to intermediate shuffle and spilled to disk. """ return _helpers._int_or_none(self._properties.get("shuffleOutputBytesSpilled")) @@ -3486,7 +3488,6 @@ class TimelineEntry(object): See https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#querytimelinesample for the underlying API representation within query statistics. - """ def __init__(self): @@ -3498,11 +3499,11 @@ def from_api_repr(cls, resource): Args: resource(Dict[str: object]): - QueryTimelineSample representation returned from API + QueryTimelineSample representation returned from API. Returns: google.cloud.bigquery.TimelineEntry: - Timeline sample parsed from ``resource`` + Timeline sample parsed from ``resource``. """ entry = cls() entry._properties = resource @@ -3510,31 +3511,31 @@ def from_api_repr(cls, resource): @property def elapsed_ms(self): - """Union[int, None]: Milliseconds elapsed since start of query + """Optional[int]: Milliseconds elapsed since start of query execution.""" return _helpers._int_or_none(self._properties.get("elapsedMs")) @property def active_units(self): - """Union[int, None]: Current number of input units being processed + """Optional[int]: Current number of input units being processed by workers, reported as largest value since the last sample.""" return _helpers._int_or_none(self._properties.get("activeUnits")) @property def pending_units(self): - """Union[int, None]: Current number of input units remaining for + """Optional[int]: Current number of input units remaining for query stages active at this sample time.""" return _helpers._int_or_none(self._properties.get("pendingUnits")) @property def completed_units(self): - """Union[int, None]: Current number of input units completed by + """Optional[int]: Current number of input units completed by this query.""" return _helpers._int_or_none(self._properties.get("completedUnits")) @property def slot_millis(self): - """Union[int, None]: Cumulative slot-milliseconds consumed by + """Optional[int]: Cumulative slot-milliseconds consumed by this query.""" return _helpers._int_or_none(self._properties.get("totalSlotMs")) @@ -3547,7 +3548,7 @@ def from_api_repr(cls, resource, client): """Construct an UnknownJob from the JSON representation. Args: - resource (dict): JSON representation of a job. + resource (Dict): JSON representation of a job. client (google.cloud.bigquery.client.Client): Client connected to BigQuery API. @@ -3569,8 +3570,7 @@ class ScriptStackFrame(object): evaluation happened. Args: - resource (Map[str, Any]): - JSON representation of object. + resource (Map[str, Any]): JSON representation of object. """ def __init__(self, resource): @@ -3614,8 +3614,7 @@ class ScriptStatistics(object): """Statistics for a child job of a script. Args: - resource (Map[str, Any]): - JSON representation of object. + resource (Map[str, Any]): JSON representation of object. """ def __init__(self, resource): diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/query.py b/packages/google-cloud-bigquery/google/cloud/bigquery/query.py index 59fcd1a59c64..925f3e29d298 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/query.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/query.py @@ -67,7 +67,8 @@ def from_api_repr(cls, resource): def to_api_repr(self): """Construct JSON API representation for the parameter. - :rtype: dict + Returns: + Dict: JSON representation for the parameter. """ raise NotImplementedError diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py index 72ff8f71385c..7e36c582c42b 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py @@ -1292,9 +1292,9 @@ class RowIterator(HTTPIterator): Defaults to a sensible value set by the API. extra_params (Dict[str, object]): Extra query string parameters for the API call. - table (Union[ - google.cloud.bigquery.table.Table, - google.cloud.bigquery.table.TableReference, + table (Union[ \ + google.cloud.bigquery.table.Table, \ + google.cloud.bigquery.table.TableReference, \ ]): Optional. The table which these rows belong to, or a reference to it. Used to call the BigQuery Storage API to fetch rows. @@ -1463,9 +1463,7 @@ def to_arrow(self, progress_bar_type=None, bqstorage_client=None): ``'tqdm_gui'`` Use the :func:`tqdm.tqdm_gui` function to display a progress bar as a graphical dialog box. - bqstorage_client ( - google.cloud.bigquery_storage_v1beta1.BigQueryStorageClient - ): + bqstorage_client (google.cloud.bigquery_storage_v1beta1.BigQueryStorageClient): **Beta Feature** Optional. A BigQuery Storage API client. If supplied, use the faster BigQuery Storage API to fetch rows from BigQuery. This API is a billable API. From 7c2cf9f84251f1b0453e045f8436cfd68805f809 Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Mon, 28 Oct 2019 09:56:46 -0700 Subject: [PATCH 0695/2016] doc(bigquery): add code sample for scripting (#9537) * doc(bigquery): add code sample for scripting This code sample initiates a scripting job and then demonstrates how to fetch the results and the child jobs. Removes system test for scripting, as it's redundant with the code sample. * blacken * add snippet to query how-to --- .../docs/usage/queries.rst | 13 ++++ .../samples/query_script.py | 69 +++++++++++++++++++ .../samples/tests/test_query_script.py | 28 ++++++++ .../google-cloud-bigquery/tests/system.py | 31 --------- 4 files changed, 110 insertions(+), 31 deletions(-) create mode 100644 packages/google-cloud-bigquery/samples/query_script.py create mode 100644 packages/google-cloud-bigquery/samples/tests/test_query_script.py diff --git a/packages/google-cloud-bigquery/docs/usage/queries.rst b/packages/google-cloud-bigquery/docs/usage/queries.rst index 1f0720e47f1a..5c9dbe18fa63 100644 --- a/packages/google-cloud-bigquery/docs/usage/queries.rst +++ b/packages/google-cloud-bigquery/docs/usage/queries.rst @@ -48,3 +48,16 @@ See BigQuery documentation for more information on :dedent: 4 :start-after: [START bigquery_query_params_named] :end-before: [END bigquery_query_params_named] + +Run a script +^^^^^^^^^^^^ + +See BigQuery documentation for more information on `scripting in BigQuery +standard SQL +`_. + +.. literalinclude:: ../samples/query_script.py + :language: python + :dedent: 4 + :start-after: [START bigquery_query_script] + :end-before: [END bigquery_query_script] diff --git a/packages/google-cloud-bigquery/samples/query_script.py b/packages/google-cloud-bigquery/samples/query_script.py new file mode 100644 index 000000000000..453b7c6f9435 --- /dev/null +++ b/packages/google-cloud-bigquery/samples/query_script.py @@ -0,0 +1,69 @@ +# Copyright 2019 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +def query_script(client): + # [START bigquery_query_script] + # TODO(developer): Import the client library. + # from google.cloud import bigquery + + # TODO(developer): Construct a BigQuery client object. + # client = bigquery.Client() + + # Run a SQL script. + sql_script = """ + -- Declare a variable to hold names as an array. + DECLARE top_names ARRAY; + + -- Build an array of the top 100 names from the year 2017. + SET top_names = ( + SELECT ARRAY_AGG(name ORDER BY number DESC LIMIT 100) + FROM `bigquery-public-data.usa_names.usa_1910_2013` + WHERE year = 2000 + ); + + -- Which names appear as words in Shakespeare's plays? + SELECT + name AS shakespeare_name + FROM UNNEST(top_names) AS name + WHERE name IN ( + SELECT word + FROM `bigquery-public-data.samples.shakespeare` + ); + """ + parent_job = client.query(sql_script) + + # Wait for the whole script to finish. + rows_iterable = parent_job.result() + print("Script created {} child jobs.".format(parent_job.num_child_jobs)) + + # Fetch result rows for the final sub-job in the script. + rows = list(rows_iterable) + print( + "{} of the top 100 names from year 2000 also appear in Shakespeare's works.".format( + len(rows) + ) + ) + + # Fetch jobs created by the SQL script. + child_jobs_iterable = client.list_jobs(parent_job=parent_job) + for child_job in child_jobs_iterable: + child_rows = list(child_job.result()) + print( + "Child job with ID {} produced {} row(s).".format( + child_job.job_id, len(child_rows) + ) + ) + + # [END bigquery_query_script] diff --git a/packages/google-cloud-bigquery/samples/tests/test_query_script.py b/packages/google-cloud-bigquery/samples/tests/test_query_script.py new file mode 100644 index 000000000000..70bb9df76fd4 --- /dev/null +++ b/packages/google-cloud-bigquery/samples/tests/test_query_script.py @@ -0,0 +1,28 @@ +# Copyright 2019 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from .. import query_script + + +def test_query_script(capsys, client): + + query_script.query_script(client) + out, _ = capsys.readouterr() + assert "Script created 2 child jobs." in out + assert ( + "53 of the top 100 names from year 2000 also appear in Shakespeare's works." + in out + ) + assert "produced 53 row(s)" in out + assert "produced 1 row(s)" in out diff --git a/packages/google-cloud-bigquery/tests/system.py b/packages/google-cloud-bigquery/tests/system.py index 09fa7f456214..4816962a70d6 100644 --- a/packages/google-cloud-bigquery/tests/system.py +++ b/packages/google-cloud-bigquery/tests/system.py @@ -431,37 +431,6 @@ def test_list_tables(self): ) self.assertGreater(len(list(iterator)), 0) - def test_listing_scripting_jobs(self): - # Run a SQL script. - sql_script = """ - -- Declare a variable to hold names as an array. - DECLARE top_names ARRAY; - - -- Build an array of the top 100 names from the year 2017. - SET top_names = ( - SELECT ARRAY_AGG(name ORDER BY number DESC LIMIT 100) - FROM `bigquery-public-data.usa_names.usa_1910_current` - WHERE year = 2017 - ); - - -- Which names appear as words in Shakespeare's plays? - SELECT - name AS shakespeare_name - FROM UNNEST(top_names) AS name - WHERE name IN ( - SELECT word - FROM `bigquery-public-data.samples.shakespeare` - ); - """ - parent_job = Config.CLIENT.query(sql_script, project=Config.CLIENT.project) - parent_job.result() - - # Fetch jobs created by the SQL script. - child_jobs = list(Config.CLIENT.list_jobs(parent_job=parent_job)) - - assert parent_job.num_child_jobs > 0 - assert len(child_jobs) == parent_job.num_child_jobs - def test_update_table(self): dataset = self.temp_dataset(_make_dataset_id("update_table")) From fc61d6a26fd7b7af2ebaf9188fc58d658fb2aa51 Mon Sep 17 00:00:00 2001 From: Yoshi Automation Bot Date: Tue, 29 Oct 2019 09:55:05 -0700 Subject: [PATCH 0696/2016] chore(bigquery): format code with latest version of black (#9556) --- .../proto/encryption_config_pb2.py | 2 +- .../cloud/bigquery_v2/proto/model_pb2.py | 24 +++++++++---------- .../bigquery_v2/proto/standard_sql_pb2.py | 6 ++--- .../google/cloud/bigquery_v2/types.py | 6 ++++- .../query_external_sheets_permanent_table.py | 4 ++-- .../query_external_sheets_temporary_table.py | 4 ++-- packages/google-cloud-bigquery/synth.metadata | 10 ++++---- .../tests/unit/test__pandas_helpers.py | 2 +- .../tests/unit/test_job.py | 2 +- 9 files changed, 32 insertions(+), 28 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery_v2/proto/encryption_config_pb2.py b/packages/google-cloud-bigquery/google/cloud/bigquery_v2/proto/encryption_config_pb2.py index b04cc3d58e9c..f7b26be5547f 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery_v2/proto/encryption_config_pb2.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery_v2/proto/encryption_config_pb2.py @@ -62,7 +62,7 @@ extension_scope=None, serialized_options=_b("\340A\001"), file=DESCRIPTOR, - ) + ), ], extensions=[], nested_types=[], diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery_v2/proto/model_pb2.py b/packages/google-cloud-bigquery/google/cloud/bigquery_v2/proto/model_pb2.py index 98dfa4b1a22c..3994660ec46d 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery_v2/proto/model_pb2.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery_v2/proto/model_pb2.py @@ -290,7 +290,7 @@ fields=[], extensions=[], nested_types=[], - enum_types=[_MODEL_KMEANSENUMS_KMEANSINITIALIZATIONMETHOD], + enum_types=[_MODEL_KMEANSENUMS_KMEANSINITIALIZATIONMETHOD,], serialized_options=None, is_extendable=False, syntax="proto3", @@ -819,7 +819,7 @@ ), ], extensions=[], - nested_types=[_MODEL_BINARYCLASSIFICATIONMETRICS_BINARYCONFUSIONMATRIX], + nested_types=[_MODEL_BINARYCLASSIFICATIONMETRICS_BINARYCONFUSIONMATRIX,], enum_types=[], serialized_options=None, is_extendable=False, @@ -1046,7 +1046,7 @@ ), ], extensions=[], - nested_types=[_MODEL_MULTICLASSCLASSIFICATIONMETRICS_CONFUSIONMATRIX], + nested_types=[_MODEL_MULTICLASSCLASSIFICATIONMETRICS_CONFUSIONMATRIX,], enum_types=[], serialized_options=None, is_extendable=False, @@ -1137,11 +1137,11 @@ extension_scope=None, serialized_options=None, file=DESCRIPTOR, - ) + ), ], extensions=[], nested_types=[ - _MODEL_CLUSTERINGMETRICS_CLUSTER_FEATUREVALUE_CATEGORICALVALUE_CATEGORYCOUNT + _MODEL_CLUSTERINGMETRICS_CLUSTER_FEATUREVALUE_CATEGORICALVALUE_CATEGORYCOUNT, ], enum_types=[], serialized_options=None, @@ -1216,7 +1216,7 @@ ), ], extensions=[], - nested_types=[_MODEL_CLUSTERINGMETRICS_CLUSTER_FEATUREVALUE_CATEGORICALVALUE], + nested_types=[_MODEL_CLUSTERINGMETRICS_CLUSTER_FEATUREVALUE_CATEGORICALVALUE,], enum_types=[], serialized_options=None, is_extendable=False, @@ -1229,7 +1229,7 @@ index=0, containing_type=None, fields=[], - ) + ), ], serialized_start=3759, serialized_end=4209, @@ -1298,7 +1298,7 @@ ), ], extensions=[], - nested_types=[_MODEL_CLUSTERINGMETRICS_CLUSTER_FEATUREVALUE], + nested_types=[_MODEL_CLUSTERINGMETRICS_CLUSTER_FEATUREVALUE,], enum_types=[], serialized_options=None, is_extendable=False, @@ -1372,7 +1372,7 @@ ), ], extensions=[], - nested_types=[_MODEL_CLUSTERINGMETRICS_CLUSTER], + nested_types=[_MODEL_CLUSTERINGMETRICS_CLUSTER,], enum_types=[], serialized_options=None, is_extendable=False, @@ -1477,7 +1477,7 @@ index=0, containing_type=None, fields=[], - ) + ), ], serialized_start=4212, serialized_end=4617, @@ -1926,7 +1926,7 @@ ), ], extensions=[], - nested_types=[_MODEL_TRAININGRUN_TRAININGOPTIONS_LABELCLASSWEIGHTSENTRY], + nested_types=[_MODEL_TRAININGRUN_TRAININGOPTIONS_LABELCLASSWEIGHTSENTRY,], enum_types=[], serialized_options=None, is_extendable=False, @@ -2128,7 +2128,7 @@ ), ], extensions=[], - nested_types=[_MODEL_TRAININGRUN_ITERATIONRESULT_CLUSTERINFO], + nested_types=[_MODEL_TRAININGRUN_ITERATIONRESULT_CLUSTERINFO,], enum_types=[], serialized_options=None, is_extendable=False, diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery_v2/proto/standard_sql_pb2.py b/packages/google-cloud-bigquery/google/cloud/bigquery_v2/proto/standard_sql_pb2.py index 19ca829a4061..3b394b8bf10e 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery_v2/proto/standard_sql_pb2.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery_v2/proto/standard_sql_pb2.py @@ -161,7 +161,7 @@ ], extensions=[], nested_types=[], - enum_types=[_STANDARDSQLDATATYPE_TYPEKIND], + enum_types=[_STANDARDSQLDATATYPE_TYPEKIND,], serialized_options=None, is_extendable=False, syntax="proto3", @@ -173,7 +173,7 @@ index=0, containing_type=None, fields=[], - ) + ), ], serialized_start=143, serialized_end=602, @@ -261,7 +261,7 @@ extension_scope=None, serialized_options=None, file=DESCRIPTOR, - ) + ), ], extensions=[], nested_types=[], diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery_v2/types.py b/packages/google-cloud-bigquery/google/cloud/bigquery_v2/types.py index da9287c07824..ee852364a10f 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery_v2/types.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery_v2/types.py @@ -29,7 +29,11 @@ from google.protobuf import wrappers_pb2 -_shared_modules = [empty_pb2, timestamp_pb2, wrappers_pb2] +_shared_modules = [ + empty_pb2, + timestamp_pb2, + wrappers_pb2, +] _local_modules = [ encryption_config_pb2, diff --git a/packages/google-cloud-bigquery/samples/query_external_sheets_permanent_table.py b/packages/google-cloud-bigquery/samples/query_external_sheets_permanent_table.py index fd4f7577f1b1..ce9b1c928782 100644 --- a/packages/google-cloud-bigquery/samples/query_external_sheets_permanent_table.py +++ b/packages/google-cloud-bigquery/samples/query_external_sheets_permanent_table.py @@ -52,8 +52,8 @@ def query_external_sheets_permanent_table(dataset_id): external_config.source_uris = [sheet_url] external_config.options.skip_leading_rows = 1 # Optionally skip header row. external_config.options.range = ( - "us-states!A20:B49" - ) # Optionally set range of the sheet to query from. + "us-states!A20:B49" # Optionally set range of the sheet to query from. + ) table.external_data_configuration = external_config # Create a permanent table linked to the Sheets file. diff --git a/packages/google-cloud-bigquery/samples/query_external_sheets_temporary_table.py b/packages/google-cloud-bigquery/samples/query_external_sheets_temporary_table.py index 9f17e91a46cc..e89b6efab362 100644 --- a/packages/google-cloud-bigquery/samples/query_external_sheets_temporary_table.py +++ b/packages/google-cloud-bigquery/samples/query_external_sheets_temporary_table.py @@ -49,8 +49,8 @@ def query_external_sheets_temporary_table(): ] external_config.options.skip_leading_rows = 1 # Optionally skip header row. external_config.options.range = ( - "us-states!A20:B49" - ) # Optionally set range of the sheet to query from. + "us-states!A20:B49" # Optionally set range of the sheet to query from. + ) table_id = "us_states" job_config = bigquery.QueryJobConfig() job_config.table_definitions = {table_id: external_config} diff --git a/packages/google-cloud-bigquery/synth.metadata b/packages/google-cloud-bigquery/synth.metadata index b85a00155ee8..863d7b1ad9e6 100644 --- a/packages/google-cloud-bigquery/synth.metadata +++ b/packages/google-cloud-bigquery/synth.metadata @@ -1,19 +1,19 @@ { - "updateTime": "2019-09-12T14:51:08.578469Z", + "updateTime": "2019-10-29T12:13:17.119821Z", "sources": [ { "generator": { "name": "artman", - "version": "0.36.2", - "dockerImage": "googleapis/artman@sha256:0e6f3a668cd68afc768ecbe08817cf6e56a0e64fcbdb1c58c3b97492d12418a1" + "version": "0.40.3", + "dockerImage": "googleapis/artman@sha256:c805f50525f5f557886c94ab76f56eaa09cb1da58c3ee95111fd34259376621a" } }, { "git": { "name": "googleapis", "remote": "https://github.com/googleapis/googleapis.git", - "sha": "1cb29d0fd49437d8e5d7de327e258739e998f01c", - "internalRef": "268598527" + "sha": "532773acbed8d09451dafb3d403ab1823e6a6e1e", + "internalRef": "277177415" } } ], diff --git a/packages/google-cloud-bigquery/tests/unit/test__pandas_helpers.py b/packages/google-cloud-bigquery/tests/unit/test__pandas_helpers.py index 46fb59180740..6f228fafcf8e 100644 --- a/packages/google-cloud-bigquery/tests/unit/test__pandas_helpers.py +++ b/packages/google-cloud-bigquery/tests/unit/test__pandas_helpers.py @@ -619,7 +619,7 @@ def test_list_columns_and_indexes_without_named_index(module_under_test): @pytest.mark.skipif(pandas is None, reason="Requires `pandas`") def test_list_columns_and_indexes_with_named_index_same_as_column_name( - module_under_test + module_under_test, ): df_data = collections.OrderedDict( [ diff --git a/packages/google-cloud-bigquery/tests/unit/test_job.py b/packages/google-cloud-bigquery/tests/unit/test_job.py index 5f3d3ee965b8..84f52627b7f3 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_job.py +++ b/packages/google-cloud-bigquery/tests/unit/test_job.py @@ -843,7 +843,7 @@ def test__set_future_result_w_done_wo_result_set_w_error(self): set_exception.assert_called_once() args, kw = set_exception.call_args - exception, = args + (exception,) = args self.assertIsInstance(exception, NotFound) self.assertEqual(exception.message, "testing") self.assertEqual(kw, {}) From 036185af6fb35b9ff06adb2ded20a2307d9f78e6 Mon Sep 17 00:00:00 2001 From: Peter Lamut Date: Fri, 1 Nov 2019 23:25:04 +0200 Subject: [PATCH 0697/2016] docs(bigquery): document how to achieve higher write limit and add tests (#9574) * test(bigquery): add insert_rows*() tests w/o row IDs * Groom the insert_rows_json() method's docstring * docs: document how to achieve higher insert write limit * Make method names less confusing for insert IDs --- .../docs/usage/tables.rst | 14 +++ .../google/cloud/bigquery/client.py | 35 +++--- ...le_insert_rows_explicit_none_insert_ids.py | 36 ++++++ ...le_insert_rows_explicit_none_insert_ids.py | 33 ++++++ .../tests/unit/test_client.py | 104 ++++++++++++++++++ 5 files changed, 206 insertions(+), 16 deletions(-) create mode 100644 packages/google-cloud-bigquery/samples/table_insert_rows_explicit_none_insert_ids.py create mode 100644 packages/google-cloud-bigquery/samples/tests/test_table_insert_rows_explicit_none_insert_ids.py diff --git a/packages/google-cloud-bigquery/docs/usage/tables.rst b/packages/google-cloud-bigquery/docs/usage/tables.rst index 6a6cbd356639..d58dcc5d9ac4 100644 --- a/packages/google-cloud-bigquery/docs/usage/tables.rst +++ b/packages/google-cloud-bigquery/docs/usage/tables.rst @@ -122,6 +122,20 @@ Insert rows into a table's data with the :start-after: [START bigquery_table_insert_rows] :end-before: [END bigquery_table_insert_rows] +Insert rows into a table's data with the +:func:`~google.cloud.bigquery.client.Client.insert_rows` method, achieving +higher write limit: + +.. literalinclude:: ../samples/table_insert_rows_explicit_none_insert_ids.py + :language: python + :dedent: 4 + :start-after: [START bigquery_table_insert_rows_explicit_none_insert_ids] + :end-before: [END bigquery_table_insert_rows_explicit_none_insert_ids] + +Mind that inserting data with ``None`` row insert IDs can come at the expense of +more duplicate inserts. See also: +`Streaming inserts `_. + Add an empty column to the existing table with the :func:`~google.cloud.bigquery.update_table` method: diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py index 02bfc651af0d..bae4359300f8 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py @@ -2264,29 +2264,32 @@ def insert_rows_json( table (Union[ \ google.cloud.bigquery.table.Table \ google.cloud.bigquery.table.TableReference, \ - str, \ + str \ ]): The destination table for the row data, or a reference to it. json_rows (Sequence[Dict]): Row data to be inserted. Keys must match the table schema fields and values must be JSON-compatible representations. - row_ids (Sequence[str]): - (Optional) Unique ids, one per row being inserted. If omitted, - unique IDs are created. - skip_invalid_rows (bool): - (Optional) Insert all valid rows of a request, even if invalid - rows exist. The default value is False, which causes the entire - request to fail if any invalid rows exist. - ignore_unknown_values (bool): - (Optional) Accept rows that contain values that do not match the - schema. The unknown values are ignored. Default is False, which + row_ids (Optional[Sequence[Optional[str]]]): + Unique IDs, one per row being inserted. An ID can also be + ``None``, indicating that an explicit insert ID should **not** + be used for that row. If the argument is omitted altogether, + unique IDs are created automatically. + skip_invalid_rows (Optional[bool]): + Insert all valid rows of a request, even if invalid rows exist. + The default value is ``False``, which causes the entire request + to fail if any invalid rows exist. + ignore_unknown_values (Optional[bool]): + Accept rows that contain values that do not match the schema. + The unknown values are ignored. Default is ``False``, which treats unknown values as errors. - template_suffix (str): - (Optional) treat ``name`` as a template table and provide a suffix. - BigQuery will create the table `` + `` based - on the schema of the template table. See + template_suffix (Optional[str]): + Treat ``name`` as a template table and provide a suffix. + BigQuery will create the table `` + `` + based on the schema of the template table. See https://cloud.google.com/bigquery/streaming-data-into-bigquery#template-tables - retry (google.api_core.retry.Retry): (Optional) How to retry the RPC. + retry (Optional[google.api_core.retry.Retry]): + How to retry the RPC. Returns: Sequence[Mappings]: diff --git a/packages/google-cloud-bigquery/samples/table_insert_rows_explicit_none_insert_ids.py b/packages/google-cloud-bigquery/samples/table_insert_rows_explicit_none_insert_ids.py new file mode 100644 index 000000000000..953e7e210312 --- /dev/null +++ b/packages/google-cloud-bigquery/samples/table_insert_rows_explicit_none_insert_ids.py @@ -0,0 +1,36 @@ +# Copyright 2019 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +def table_insert_rows_explicit_none_insert_ids(client, table_id): + + # [START bigquery_table_insert_rows_explicit_none_insert_ids] + # TODO(developer): Import the client library. + # from google.cloud import bigquery + + # TODO(developer): Construct a BigQuery client object. + # client = bigquery.Client() + + # TODO(developer): Set table_id to the ID of the model to fetch. + # table_id = "your-project.your_dataset.your_table" + + table = client.get_table(table_id) # Make an API request. + rows_to_insert = [(u"Phred Phlyntstone", 32), (u"Wylma Phlyntstone", 29)] + + errors = client.insert_rows( + table, rows_to_insert, row_ids=[None] * len(rows_to_insert) + ) # Make an API request. + if errors == []: + print("New rows have been added.") + # [END bigquery_table_insert_rows_explicit_none_insert_ids] diff --git a/packages/google-cloud-bigquery/samples/tests/test_table_insert_rows_explicit_none_insert_ids.py b/packages/google-cloud-bigquery/samples/tests/test_table_insert_rows_explicit_none_insert_ids.py new file mode 100644 index 000000000000..6a59609baacf --- /dev/null +++ b/packages/google-cloud-bigquery/samples/tests/test_table_insert_rows_explicit_none_insert_ids.py @@ -0,0 +1,33 @@ +# Copyright 2019 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +from google.cloud import bigquery + +from .. import table_insert_rows_explicit_none_insert_ids as mut + + +def test_table_insert_rows_explicit_none_insert_ids(capsys, client, random_table_id): + + schema = [ + bigquery.SchemaField("full_name", "STRING", mode="REQUIRED"), + bigquery.SchemaField("age", "INTEGER", mode="REQUIRED"), + ] + + table = bigquery.Table(random_table_id, schema=schema) + table = client.create_table(table) + + mut.table_insert_rows_explicit_none_insert_ids(client, random_table_id) + out, err = capsys.readouterr() + assert "New rows have been added." in out diff --git a/packages/google-cloud-bigquery/tests/unit/test_client.py b/packages/google-cloud-bigquery/tests/unit/test_client.py index 91b9bc642187..b4e5e96f1e8e 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_client.py +++ b/packages/google-cloud-bigquery/tests/unit/test_client.py @@ -4572,6 +4572,40 @@ def test_insert_rows_w_record_schema(self): method="POST", path="/%s" % PATH, data=SENT ) + def test_insert_rows_w_explicit_none_insert_ids(self): + from google.cloud.bigquery.schema import SchemaField + from google.cloud.bigquery.table import Table + + PATH = "projects/{}/datasets/{}/tables/{}/insertAll".format( + self.PROJECT, self.DS_ID, self.TABLE_ID, + ) + creds = _make_credentials() + http = object() + client = self._make_one(project=self.PROJECT, credentials=creds, _http=http) + conn = client._connection = make_connection({}) + schema = [ + SchemaField("full_name", "STRING", mode="REQUIRED"), + SchemaField("age", "INTEGER", mode="REQUIRED"), + ] + table = Table(self.TABLE_REF, schema=schema) + ROWS = [ + {"full_name": "Phred Phlyntstone", "age": 32}, + {"full_name": "Bharney Rhubble", "age": 33}, + ] + + def _row_data(row): + row["age"] = str(row["age"]) + return row + + SENT = {"rows": [{"json": _row_data(row), "insertId": None} for row in ROWS]} + + errors = client.insert_rows(table, ROWS, row_ids=[None] * len(ROWS)) + + self.assertEqual(len(errors), 0) + conn.api_request.assert_called_once_with( + method="POST", path="/{}".format(PATH), data=SENT + ) + def test_insert_rows_errors(self): from google.cloud.bigquery.table import Table @@ -4765,6 +4799,55 @@ def test_insert_rows_from_dataframe_many_columns(self): assert len(actual_calls) == 1 assert actual_calls[0] == expected_call + @unittest.skipIf(pandas is None, "Requires `pandas`") + def test_insert_rows_from_dataframe_w_explicit_none_insert_ids(self): + from google.cloud.bigquery.table import SchemaField + from google.cloud.bigquery.table import Table + + API_PATH = "/projects/{}/datasets/{}/tables/{}/insertAll".format( + self.PROJECT, self.DS_ID, self.TABLE_REF.table_id + ) + + dataframe = pandas.DataFrame( + [ + {"name": u"Little One", "adult": False}, + {"name": u"Young Gun", "adult": True}, + ] + ) + + # create client + creds = _make_credentials() + http = object() + client = self._make_one(project=self.PROJECT, credentials=creds, _http=http) + conn = client._connection = make_connection({}, {}) + + # create table + schema = [ + SchemaField("name", "STRING", mode="REQUIRED"), + SchemaField("adult", "BOOLEAN", mode="REQUIRED"), + ] + table = Table(self.TABLE_REF, schema=schema) + + error_info = client.insert_rows_from_dataframe( + table, dataframe, row_ids=[None] * len(dataframe) + ) + + self.assertEqual(len(error_info), 1) + assert error_info[0] == [] # no chunk errors + + EXPECTED_SENT_DATA = { + "rows": [ + {"insertId": None, "json": {"name": "Little One", "adult": "false"}}, + {"insertId": None, "json": {"name": "Young Gun", "adult": "true"}}, + ] + } + + actual_calls = conn.api_request.call_args_list + assert len(actual_calls) == 1 + assert actual_calls[0] == mock.call( + method="POST", path=API_PATH, data=EXPECTED_SENT_DATA + ) + def test_insert_rows_json(self): from google.cloud.bigquery.table import Table, SchemaField from google.cloud.bigquery.dataset import DatasetReference @@ -4833,6 +4916,27 @@ def test_insert_rows_json_with_string_id(self): data=expected, ) + def test_insert_rows_json_w_explicit_none_insert_ids(self): + rows = [{"col1": "val1"}, {"col2": "val2"}] + creds = _make_credentials() + http = object() + client = self._make_one( + project="default-project", credentials=creds, _http=http + ) + conn = client._connection = make_connection({}) + + errors = client.insert_rows_json( + "proj.dset.tbl", rows, row_ids=[None] * len(rows), + ) + + self.assertEqual(len(errors), 0) + expected = {"rows": [{"json": row, "insertId": None} for row in rows]} + conn.api_request.assert_called_once_with( + method="POST", + path="/projects/proj/datasets/dset/tables/tbl/insertAll", + data=expected, + ) + def test_list_partitions(self): from google.cloud.bigquery.table import Table From beedfd915ef44f3ab4bb0e26d4785e171ffc16fc Mon Sep 17 00:00:00 2001 From: Peter Lamut Date: Sun, 3 Nov 2019 23:08:37 +0200 Subject: [PATCH 0698/2016] feat(bigquery): allow passing schema as a sequence of dicts (#9550) * feat(bigquery): add _to_schema_fields() schema helper * Allow passing schema as dicts _helpers * Allow passing schema as dicts in table.py * Allow passing schema as dicts in job.py * Import SchemaField directly in several tests SchemaField should not be imported from bigquery.table, but directly from where it's defined, so that any changes to the imports in bigquery.table do not cause unnecessary test failures. * Allow passing schema as dicts in pandas helpers * Replace return statement with an else block * Alter the type spec of values in schema field dict * Blacken a few files * Simplify _to_schema_fields() schema helper * Update docstrings for schema parameter --- .../google/cloud/bigquery/_helpers.py | 27 +++- .../google/cloud/bigquery/_pandas_helpers.py | 58 ++++++-- .../google/cloud/bigquery/job.py | 11 +- .../google/cloud/bigquery/schema.py | 35 +++++ .../google/cloud/bigquery/table.py | 36 +++-- .../tests/unit/test__helpers.py | 28 +++- .../tests/unit/test__pandas_helpers.py | 137 ++++++++++++++++++ .../tests/unit/test_client.py | 47 +++--- .../tests/unit/test_job.py | 38 ++++- .../tests/unit/test_schema.py | 66 +++++++++ .../tests/unit/test_table.py | 108 +++++++++++--- 11 files changed, 516 insertions(+), 75 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py b/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py index 266bfc2c666c..98eadb0a2f8e 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py @@ -224,11 +224,18 @@ def _row_tuple_from_json(row, schema): Args: row (Dict): A JSON response row to be converted. - schema (Tuple): A tuple of :class:`~google.cloud.bigquery.schema.SchemaField`. + schema (Sequence[Union[ \ + :class:`~google.cloud.bigquery.schema.SchemaField`, \ + Mapping[str, Any] \ + ]]): Specification of the field types in ``row``. Returns: Tuple: A tuple of data converted to native types. """ + from google.cloud.bigquery.schema import _to_schema_fields + + schema = _to_schema_fields(schema) + row_data = [] for field, cell in zip(schema, row["f"]): row_data.append(_field_from_json(cell["v"], field)) @@ -236,9 +243,25 @@ def _row_tuple_from_json(row, schema): def _rows_from_json(values, schema): - """Convert JSON row data to rows with appropriate types.""" + """Convert JSON row data to rows with appropriate types. + + Args: + values (Sequence[Dict]): The list of responses (JSON rows) to convert. + schema (Sequence[Union[ \ + :class:`~google.cloud.bigquery.schema.SchemaField`, \ + Mapping[str, Any] \ + ]]): + The table's schema. If any item is a mapping, its content must be + compatible with + :meth:`~google.cloud.bigquery.schema.SchemaField.from_api_repr`. + + Returns: + List[:class:`~google.cloud.bigquery.Row`] + """ from google.cloud.bigquery import Row + from google.cloud.bigquery.schema import _to_schema_fields + schema = _to_schema_fields(schema) field_to_index = _field_to_index_mapping(schema) return [Row(_row_tuple_from_json(r, schema), field_to_index) for r in values] diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/_pandas_helpers.py b/packages/google-cloud-bigquery/google/cloud/bigquery/_pandas_helpers.py index c7edf2ae51f5..aeb18c2d213d 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/_pandas_helpers.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/_pandas_helpers.py @@ -239,7 +239,10 @@ def dataframe_to_bq_schema(dataframe, bq_schema): Args: dataframe (pandas.DataFrame): DataFrame for which the client determines the BigQuery schema. - bq_schema (Sequence[google.cloud.bigquery.schema.SchemaField]): + bq_schema (Sequence[Union[ \ + :class:`~google.cloud.bigquery.schema.SchemaField`, \ + Mapping[str, Any] \ + ]]): A BigQuery schema. Use this argument to override the autodetected type for some or all of the DataFrame columns. @@ -249,6 +252,7 @@ def dataframe_to_bq_schema(dataframe, bq_schema): any column cannot be determined. """ if bq_schema: + bq_schema = schema._to_schema_fields(bq_schema) for field in bq_schema: if field.field_type in schema._STRUCT_TYPES: raise ValueError( @@ -297,9 +301,12 @@ def dataframe_to_arrow(dataframe, bq_schema): Args: dataframe (pandas.DataFrame): DataFrame to convert to Arrow table. - bq_schema (Sequence[google.cloud.bigquery.schema.SchemaField]): - Desired BigQuery schema. Number of columns must match number of - columns in the DataFrame. + bq_schema (Sequence[Union[ \ + :class:`~google.cloud.bigquery.schema.SchemaField`, \ + Mapping[str, Any] \ + ]]): + Desired BigQuery schema. The number of columns must match the + number of columns in the DataFrame. Returns: pyarrow.Table: @@ -310,6 +317,8 @@ def dataframe_to_arrow(dataframe, bq_schema): column_and_index_names = set( name for name, _ in list_columns_and_indexes(dataframe) ) + + bq_schema = schema._to_schema_fields(bq_schema) bq_field_names = set(field.name for field in bq_schema) extra_fields = bq_field_names - column_and_index_names @@ -354,7 +363,10 @@ def dataframe_to_parquet(dataframe, bq_schema, filepath, parquet_compression="SN Args: dataframe (pandas.DataFrame): DataFrame to convert to Parquet file. - bq_schema (Sequence[google.cloud.bigquery.schema.SchemaField]): + bq_schema (Sequence[Union[ \ + :class:`~google.cloud.bigquery.schema.SchemaField`, \ + Mapping[str, Any] \ + ]]): Desired BigQuery schema. Number of columns must match number of columns in the DataFrame. filepath (str): @@ -368,6 +380,7 @@ def dataframe_to_parquet(dataframe, bq_schema, filepath, parquet_compression="SN if pyarrow is None: raise ValueError("pyarrow is required for BigQuery schema conversion.") + bq_schema = schema._to_schema_fields(bq_schema) arrow_table = dataframe_to_arrow(dataframe, bq_schema) pyarrow.parquet.write_table(arrow_table, filepath, compression=parquet_compression) @@ -388,20 +401,24 @@ def _tabledata_list_page_to_arrow(page, column_names, arrow_types): return pyarrow.RecordBatch.from_arrays(arrays, names=column_names) -def download_arrow_tabledata_list(pages, schema): +def download_arrow_tabledata_list(pages, bq_schema): """Use tabledata.list to construct an iterable of RecordBatches. Args: pages (Iterator[:class:`google.api_core.page_iterator.Page`]): An iterator over the result pages. - schema (Sequence[google.cloud.bigquery.schema.SchemaField]): + bq_schema (Sequence[Union[ \ + :class:`~google.cloud.bigquery.schema.SchemaField`, \ + Mapping[str, Any] \ + ]]): A decription of the fields in result pages. Yields: :class:`pyarrow.RecordBatch` The next page of records as a ``pyarrow`` record batch. """ - column_names = bq_to_arrow_schema(schema) or [field.name for field in schema] - arrow_types = [bq_to_arrow_data_type(field) for field in schema] + bq_schema = schema._to_schema_fields(bq_schema) + column_names = bq_to_arrow_schema(bq_schema) or [field.name for field in bq_schema] + arrow_types = [bq_to_arrow_data_type(field) for field in bq_schema] for page in pages: yield _tabledata_list_page_to_arrow(page, column_names, arrow_types) @@ -422,9 +439,26 @@ def _tabledata_list_page_to_dataframe(page, column_names, dtypes): return pandas.DataFrame(columns, columns=column_names) -def download_dataframe_tabledata_list(pages, schema, dtypes): - """Use (slower, but free) tabledata.list to construct a DataFrame.""" - column_names = [field.name for field in schema] +def download_dataframe_tabledata_list(pages, bq_schema, dtypes): + """Use (slower, but free) tabledata.list to construct a DataFrame. + + Args: + pages (Iterator[:class:`google.api_core.page_iterator.Page`]): + An iterator over the result pages. + bq_schema (Sequence[Union[ \ + :class:`~google.cloud.bigquery.schema.SchemaField`, \ + Mapping[str, Any] \ + ]]): + A decription of the fields in result pages. + dtypes(Mapping[str, numpy.dtype]): + The types of columns in result data to hint construction of the + resulting DataFrame. Not all column types have to be specified. + Yields: + :class:`pandas.DataFrame` + The next page of records as a ``pandas.DataFrame`` record batch. + """ + bq_schema = schema._to_schema_fields(bq_schema) + column_names = [field.name for field in bq_schema] for page in pages: yield _tabledata_list_page_to_dataframe(page, column_names, dtypes) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/job.py b/packages/google-cloud-bigquery/google/cloud/bigquery/job.py index 96724c9f805b..a8d797f4bef5 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/job.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/job.py @@ -38,6 +38,7 @@ from google.cloud.bigquery.retry import DEFAULT_RETRY from google.cloud.bigquery.routine import RoutineReference from google.cloud.bigquery.schema import SchemaField +from google.cloud.bigquery.schema import _to_schema_fields from google.cloud.bigquery.table import _EmptyRowIterator from google.cloud.bigquery.table import RangePartitioning from google.cloud.bigquery.table import _table_arg_to_table_ref @@ -1225,8 +1226,10 @@ def range_partitioning(self, value): @property def schema(self): - """List[google.cloud.bigquery.schema.SchemaField]: Schema of the - destination table. + """Sequence[Union[ \ + :class:`~google.cloud.bigquery.schema.SchemaField`, \ + Mapping[str, Any] \ + ]]: Schema of the destination table. See https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationLoad.FIELDS.schema @@ -1242,8 +1245,8 @@ def schema(self, value): self._del_sub_prop("schema") return - if not all(hasattr(field, "to_api_repr") for field in value): - raise ValueError("Schema items must be fields") + value = _to_schema_fields(value) + _helpers._set_sub_prop( self._properties, ["load", "schema", "fields"], diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/schema.py b/packages/google-cloud-bigquery/google/cloud/bigquery/schema.py index cb94133abdad..d766cb542608 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/schema.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/schema.py @@ -14,6 +14,8 @@ """Schemas for BigQuery tables / queries.""" +import collections + from google.cloud.bigquery_v2 import types @@ -256,3 +258,36 @@ def _build_schema_resource(fields): Sequence[Dict]: Mappings describing the schema of the supplied fields. """ return [field.to_api_repr() for field in fields] + + +def _to_schema_fields(schema): + """Coerce `schema` to a list of schema field instances. + + Args: + schema(Sequence[Union[ \ + :class:`~google.cloud.bigquery.schema.SchemaField`, \ + Mapping[str, Any] \ + ]]): + Table schema to convert. If some items are passed as mappings, + their content must be compatible with + :meth:`~google.cloud.bigquery.schema.SchemaField.from_api_repr`. + + Returns: + Sequence[:class:`~google.cloud.bigquery.schema.SchemaField`] + + Raises: + Exception: If ``schema`` is not a sequence, or if any item in the + sequence is not a :class:`~google.cloud.bigquery.schema.SchemaField` + instance or a compatible mapping representation of the field. + """ + for field in schema: + if not isinstance(field, (SchemaField, collections.Mapping)): + raise ValueError( + "Schema items must either be fields or compatible " + "mapping representations." + ) + + return [ + field if isinstance(field, SchemaField) else SchemaField.from_api_repr(field) + for field in schema + ] diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py index 7e36c582c42b..2f2ee50cc89e 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py @@ -51,9 +51,9 @@ import google.cloud._helpers from google.cloud.bigquery import _helpers from google.cloud.bigquery import _pandas_helpers -from google.cloud.bigquery.schema import SchemaField from google.cloud.bigquery.schema import _build_schema_resource from google.cloud.bigquery.schema import _parse_schema_resource +from google.cloud.bigquery.schema import _to_schema_fields from google.cloud.bigquery.external_config import ExternalConfig from google.cloud.bigquery.encryption_configuration import EncryptionConfiguration @@ -305,8 +305,13 @@ class Table(object): A pointer to a table. If ``table_ref`` is a string, it must included a project ID, dataset ID, and table ID, each separated by ``.``. - schema (List[google.cloud.bigquery.schema.SchemaField]): - The table's schema + schema (Optional[Sequence[Union[ \ + :class:`~google.cloud.bigquery.schema.SchemaField`, \ + Mapping[str, Any] \ + ]]]): + The table's schema. If any item is a mapping, its content must be + compatible with + :meth:`~google.cloud.bigquery.schema.SchemaField.from_api_repr`. """ _PROPERTY_TO_API_FIELD = { @@ -369,13 +374,17 @@ def require_partition_filter(self, value): @property def schema(self): - """List[google.cloud.bigquery.schema.SchemaField]: Table's schema. + """Sequence[Union[ \ + :class:`~google.cloud.bigquery.schema.SchemaField`, \ + Mapping[str, Any] \ + ]]: + Table's schema. Raises: - TypeError: If 'value' is not a sequence - ValueError: - If any item in the sequence is not a - :class:`~google.cloud.bigquery.schema.SchemaField` + Exception: + If ``schema`` is not a sequence, or if any item in the sequence + is not a :class:`~google.cloud.bigquery.schema.SchemaField` + instance or a compatible mapping representation of the field. """ prop = self._properties.get("schema") if not prop: @@ -387,9 +396,8 @@ def schema(self): def schema(self, value): if value is None: self._properties["schema"] = None - elif not all(isinstance(field, SchemaField) for field in value): - raise ValueError("Schema items must be fields") else: + value = _to_schema_fields(value) self._properties["schema"] = {"fields": _build_schema_resource(value)} @property @@ -1284,6 +1292,13 @@ class RowIterator(HTTPIterator): api_request (Callable[google.cloud._http.JSONConnection.api_request]): The function to use to make API requests. path (str): The method path to query for the list of items. + schema (Sequence[Union[ \ + :class:`~google.cloud.bigquery.schema.SchemaField`, \ + Mapping[str, Any] \ + ]]): + The table's schema. If any item is a mapping, its content must be + compatible with + :meth:`~google.cloud.bigquery.schema.SchemaField.from_api_repr`. page_token (str): A token identifying a page in a result set to start fetching results from. max_results (int, optional): The maximum number of results to fetch. @@ -1328,6 +1343,7 @@ def __init__( page_start=_rows_page_start, next_token="pageToken", ) + schema = _to_schema_fields(schema) self._field_to_index = _helpers._field_to_index_mapping(schema) self._page_size = page_size self._preserve_order = False diff --git a/packages/google-cloud-bigquery/tests/unit/test__helpers.py b/packages/google-cloud-bigquery/tests/unit/test__helpers.py index 3884695d83af..6d92b4de73ba 100644 --- a/packages/google-cloud-bigquery/tests/unit/test__helpers.py +++ b/packages/google-cloud-bigquery/tests/unit/test__helpers.py @@ -17,6 +17,8 @@ import decimal import unittest +import mock + class Test_not_null(unittest.TestCase): def _call_fut(self, value, field): @@ -412,7 +414,8 @@ class Test_row_tuple_from_json(unittest.TestCase): def _call_fut(self, row, schema): from google.cloud.bigquery._helpers import _row_tuple_from_json - return _row_tuple_from_json(row, schema) + with _field_isinstance_patcher(): + return _row_tuple_from_json(row, schema) def test_w_single_scalar_column(self): # SELECT 1 AS col @@ -529,7 +532,8 @@ class Test_rows_from_json(unittest.TestCase): def _call_fut(self, rows, schema): from google.cloud.bigquery._helpers import _rows_from_json - return _rows_from_json(rows, schema) + with _field_isinstance_patcher(): + return _rows_from_json(rows, schema) def test_w_record_subfield(self): from google.cloud.bigquery.table import Row @@ -1023,3 +1027,23 @@ def __init__(self, mode, name="unknown", field_type="UNKNOWN", fields=()): self.name = name self.field_type = field_type self.fields = fields + + +def _field_isinstance_patcher(): + """A patcher thank makes _Field instances seem like SchemaField instances. + """ + from google.cloud.bigquery.schema import SchemaField + + def fake_isinstance(instance, target_class): + if instance.__class__.__name__ != "_Field": + return isinstance(instance, target_class) # pragma: NO COVER + + # pretend that _Field() instances are actually instances of SchemaField + return target_class is SchemaField or ( + isinstance(target_class, tuple) and SchemaField in target_class + ) + + patcher = mock.patch( + "google.cloud.bigquery.schema.isinstance", side_effect=fake_isinstance + ) + return patcher diff --git a/packages/google-cloud-bigquery/tests/unit/test__pandas_helpers.py b/packages/google-cloud-bigquery/tests/unit/test__pandas_helpers.py index 6f228fafcf8e..56ac62820841 100644 --- a/packages/google-cloud-bigquery/tests/unit/test__pandas_helpers.py +++ b/packages/google-cloud-bigquery/tests/unit/test__pandas_helpers.py @@ -701,6 +701,32 @@ def test_list_columns_and_indexes_with_multiindex(module_under_test): assert columns_and_indexes == expected +@pytest.mark.skipif(pandas is None, reason="Requires `pandas`") +def test_dataframe_to_bq_schema_dict_sequence(module_under_test): + df_data = collections.OrderedDict( + [ + ("str_column", [u"hello", u"world"]), + ("int_column", [42, 8]), + ("bool_column", [True, False]), + ] + ) + dataframe = pandas.DataFrame(df_data) + + dict_schema = [ + {"name": "str_column", "type": "STRING", "mode": "NULLABLE"}, + {"name": "bool_column", "type": "BOOL", "mode": "REQUIRED"}, + ] + + returned_schema = module_under_test.dataframe_to_bq_schema(dataframe, dict_schema) + + expected_schema = ( + schema.SchemaField("str_column", "STRING", "NULLABLE"), + schema.SchemaField("int_column", "INTEGER", "NULLABLE"), + schema.SchemaField("bool_column", "BOOL", "REQUIRED"), + ) + assert returned_schema == expected_schema + + @pytest.mark.skipif(pandas is None, reason="Requires `pandas`") @pytest.mark.skipif(pyarrow is None, reason="Requires `pyarrow`") def test_dataframe_to_arrow_with_multiindex(module_under_test): @@ -856,6 +882,28 @@ def test_dataframe_to_arrow_with_unknown_type(module_under_test): assert arrow_schema[3].name == "field03" +@pytest.mark.skipif(pandas is None, reason="Requires `pandas`") +@pytest.mark.skipif(pyarrow is None, reason="Requires `pyarrow`") +def test_dataframe_to_arrow_dict_sequence_schema(module_under_test): + dict_schema = [ + {"name": "field01", "type": "STRING", "mode": "REQUIRED"}, + {"name": "field02", "type": "BOOL", "mode": "NULLABLE"}, + ] + + dataframe = pandas.DataFrame( + {"field01": [u"hello", u"world"], "field02": [True, False]} + ) + + arrow_table = module_under_test.dataframe_to_arrow(dataframe, dict_schema) + arrow_schema = arrow_table.schema + + expected_fields = [ + pyarrow.field("field01", "string", nullable=False), + pyarrow.field("field02", "bool", nullable=True), + ] + assert list(arrow_schema) == expected_fields + + @pytest.mark.skipif(pandas is None, reason="Requires `pandas`") def test_dataframe_to_parquet_without_pyarrow(module_under_test, monkeypatch): monkeypatch.setattr(module_under_test, "pyarrow", None) @@ -908,6 +956,36 @@ def test_dataframe_to_parquet_compression_method(module_under_test): assert call_args.kwargs.get("compression") == "ZSTD" +@pytest.mark.skipif(pandas is None, reason="Requires `pandas`") +@pytest.mark.skipif(pyarrow is None, reason="Requires `pyarrow`") +def test_dataframe_to_parquet_dict_sequence_schema(module_under_test): + dict_schema = [ + {"name": "field01", "type": "STRING", "mode": "REQUIRED"}, + {"name": "field02", "type": "BOOL", "mode": "NULLABLE"}, + ] + + dataframe = pandas.DataFrame( + {"field01": [u"hello", u"world"], "field02": [True, False]} + ) + + write_table_patch = mock.patch.object( + module_under_test.pyarrow.parquet, "write_table", autospec=True + ) + to_arrow_patch = mock.patch.object( + module_under_test, "dataframe_to_arrow", autospec=True + ) + + with write_table_patch, to_arrow_patch as fake_to_arrow: + module_under_test.dataframe_to_parquet(dataframe, dict_schema, None) + + expected_schema_arg = [ + schema.SchemaField("field01", "STRING", mode="REQUIRED"), + schema.SchemaField("field02", "BOOL", mode="NULLABLE"), + ] + schema_arg = fake_to_arrow.call_args.args[1] + assert schema_arg == expected_schema_arg + + @pytest.mark.skipif(pyarrow is None, reason="Requires `pyarrow`") def test_download_arrow_tabledata_list_unknown_field_type(module_under_test): fake_page = api_core.page_iterator.Page( @@ -977,3 +1055,62 @@ def test_download_arrow_tabledata_list_known_field_type(module_under_test): col = result.columns[1] assert type(col) is pyarrow.lib.StringArray assert list(col) == ["2.2", "22.22", "222.222"] + + +@pytest.mark.skipif(pyarrow is None, reason="Requires `pyarrow`") +def test_download_arrow_tabledata_list_dict_sequence_schema(module_under_test): + fake_page = api_core.page_iterator.Page( + parent=mock.Mock(), + items=[{"page_data": "foo"}], + item_to_value=api_core.page_iterator._item_to_value_identity, + ) + fake_page._columns = [[1, 10, 100], ["2.2", "22.22", "222.222"]] + pages = [fake_page] + + dict_schema = [ + {"name": "population_size", "type": "INTEGER", "mode": "NULLABLE"}, + {"name": "non_alien_field", "type": "STRING", "mode": "NULLABLE"}, + ] + + results_gen = module_under_test.download_arrow_tabledata_list(pages, dict_schema) + result = next(results_gen) + + assert len(result.columns) == 2 + col = result.columns[0] + assert type(col) is pyarrow.lib.Int64Array + assert list(col) == [1, 10, 100] + col = result.columns[1] + assert type(col) is pyarrow.lib.StringArray + assert list(col) == ["2.2", "22.22", "222.222"] + + +@pytest.mark.skipif(pandas is None, reason="Requires `pandas`") +@pytest.mark.skipif(pyarrow is None, reason="Requires `pyarrow`") +def test_download_dataframe_tabledata_list_dict_sequence_schema(module_under_test): + fake_page = api_core.page_iterator.Page( + parent=mock.Mock(), + items=[{"page_data": "foo"}], + item_to_value=api_core.page_iterator._item_to_value_identity, + ) + fake_page._columns = [[1, 10, 100], ["2.2", "22.22", "222.222"]] + pages = [fake_page] + + dict_schema = [ + {"name": "population_size", "type": "INTEGER", "mode": "NULLABLE"}, + {"name": "non_alien_field", "type": "STRING", "mode": "NULLABLE"}, + ] + + results_gen = module_under_test.download_dataframe_tabledata_list( + pages, dict_schema, dtypes={} + ) + result = next(results_gen) + + expected_result = pandas.DataFrame( + collections.OrderedDict( + [ + ("population_size", [1, 10, 100]), + ("non_alien_field", ["2.2", "22.22", "222.222"]), + ] + ) + ) + assert result.equals(expected_result) diff --git a/packages/google-cloud-bigquery/tests/unit/test_client.py b/packages/google-cloud-bigquery/tests/unit/test_client.py index b4e5e96f1e8e..bc56fac34c6a 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_client.py +++ b/packages/google-cloud-bigquery/tests/unit/test_client.py @@ -1138,7 +1138,8 @@ def test_create_table_w_day_partition_and_expire(self): self.assertEqual(got.table_id, self.TABLE_ID) def test_create_table_w_schema_and_query(self): - from google.cloud.bigquery.table import Table, SchemaField + from google.cloud.bigquery.schema import SchemaField + from google.cloud.bigquery.table import Table path = "projects/%s/datasets/%s/tables" % (self.PROJECT, self.DS_ID) query = "SELECT * from %s:%s" % (self.DS_ID, self.TABLE_ID) @@ -1753,7 +1754,8 @@ def test_update_routine(self): self.assertEqual(req[1]["headers"]["If-Match"], "im-an-etag") def test_update_table(self): - from google.cloud.bigquery.table import Table, SchemaField + from google.cloud.bigquery.schema import SchemaField + from google.cloud.bigquery.table import Table path = "projects/%s/datasets/%s/tables/%s" % ( self.PROJECT, @@ -1896,7 +1898,8 @@ def test_update_table_w_query(self): import datetime from google.cloud._helpers import UTC from google.cloud._helpers import _millis - from google.cloud.bigquery.table import Table, SchemaField + from google.cloud.bigquery.schema import SchemaField + from google.cloud.bigquery.table import Table path = "projects/%s/datasets/%s/tables/%s" % ( self.PROJECT, @@ -4173,7 +4176,7 @@ def test_insert_rows_w_schema(self): from google.cloud._helpers import UTC from google.cloud._helpers import _datetime_to_rfc3339 from google.cloud._helpers import _microseconds_from_datetime - from google.cloud.bigquery.table import SchemaField + from google.cloud.bigquery.schema import SchemaField WHEN_TS = 1437767599.006 WHEN = datetime.datetime.utcfromtimestamp(WHEN_TS).replace(tzinfo=UTC) @@ -4229,7 +4232,8 @@ def test_insert_rows_w_list_of_dictionaries(self): from google.cloud._helpers import UTC from google.cloud._helpers import _datetime_to_rfc3339 from google.cloud._helpers import _microseconds_from_datetime - from google.cloud.bigquery.table import Table, SchemaField + from google.cloud.bigquery.schema import SchemaField + from google.cloud.bigquery.table import Table WHEN_TS = 1437767599.006 WHEN = datetime.datetime.utcfromtimestamp(WHEN_TS).replace(tzinfo=UTC) @@ -4290,8 +4294,8 @@ def _row_data(row): ) def test_insert_rows_w_list_of_Rows(self): + from google.cloud.bigquery.schema import SchemaField from google.cloud.bigquery.table import Table - from google.cloud.bigquery.table import SchemaField from google.cloud.bigquery.table import Row PATH = "projects/%s/datasets/%s/tables/%s/insertAll" % ( @@ -4335,7 +4339,8 @@ def _row_data(row): ) def test_insert_rows_w_skip_invalid_and_ignore_unknown(self): - from google.cloud.bigquery.table import Table, SchemaField + from google.cloud.bigquery.schema import SchemaField + from google.cloud.bigquery.table import Table PATH = "projects/%s/datasets/%s/tables/%s/insertAll" % ( self.PROJECT, @@ -4411,7 +4416,8 @@ def _row_data(row): ) def test_insert_rows_w_repeated_fields(self): - from google.cloud.bigquery.table import Table, SchemaField + from google.cloud.bigquery.schema import SchemaField + from google.cloud.bigquery.table import Table PATH = "projects/%s/datasets/%s/tables/%s/insertAll" % ( self.PROJECT, @@ -4504,7 +4510,7 @@ def test_insert_rows_w_repeated_fields(self): ) def test_insert_rows_w_record_schema(self): - from google.cloud.bigquery.table import SchemaField + from google.cloud.bigquery.schema import SchemaField PATH = "projects/%s/datasets/%s/tables/%s/insertAll" % ( self.PROJECT, @@ -4633,6 +4639,7 @@ def test_insert_rows_errors(self): def test_insert_rows_w_numeric(self): from google.cloud.bigquery import table + from google.cloud.bigquery.schema import SchemaField project = "PROJECT" ds_id = "DS_ID" @@ -4642,10 +4649,7 @@ def test_insert_rows_w_numeric(self): client = self._make_one(project=project, credentials=creds, _http=http) conn = client._connection = make_connection({}) table_ref = DatasetReference(project, ds_id).table(table_id) - schema = [ - table.SchemaField("account", "STRING"), - table.SchemaField("balance", "NUMERIC"), - ] + schema = [SchemaField("account", "STRING"), SchemaField("balance", "NUMERIC")] insert_table = table.Table(table_ref, schema=schema) rows = [ ("Savings", decimal.Decimal("23.47")), @@ -4677,7 +4681,7 @@ def test_insert_rows_w_numeric(self): @unittest.skipIf(pandas is None, "Requires `pandas`") def test_insert_rows_from_dataframe(self): - from google.cloud.bigquery.table import SchemaField + from google.cloud.bigquery.schema import SchemaField from google.cloud.bigquery.table import Table API_PATH = "/projects/{}/datasets/{}/tables/{}/insertAll".format( @@ -4753,7 +4757,7 @@ def test_insert_rows_from_dataframe(self): @unittest.skipIf(pandas is None, "Requires `pandas`") def test_insert_rows_from_dataframe_many_columns(self): - from google.cloud.bigquery.table import SchemaField + from google.cloud.bigquery.schema import SchemaField from google.cloud.bigquery.table import Table API_PATH = "/projects/{}/datasets/{}/tables/{}/insertAll".format( @@ -4849,8 +4853,9 @@ def test_insert_rows_from_dataframe_w_explicit_none_insert_ids(self): ) def test_insert_rows_json(self): - from google.cloud.bigquery.table import Table, SchemaField from google.cloud.bigquery.dataset import DatasetReference + from google.cloud.bigquery.schema import SchemaField + from google.cloud.bigquery.table import Table PROJECT = "PROJECT" DS_ID = "DS_ID" @@ -4982,8 +4987,8 @@ def test_list_partitions_with_string_id(self): def test_list_rows(self): import datetime from google.cloud._helpers import UTC + from google.cloud.bigquery.schema import SchemaField from google.cloud.bigquery.table import Table - from google.cloud.bigquery.table import SchemaField from google.cloud.bigquery.table import Row PATH = "projects/%s/datasets/%s/tables/%s/data" % ( @@ -5083,7 +5088,8 @@ def test_list_rows_empty_table(self): self.assertEqual(rows.total_rows, 0) def test_list_rows_query_params(self): - from google.cloud.bigquery.table import Table, SchemaField + from google.cloud.bigquery.schema import SchemaField + from google.cloud.bigquery.table import Table creds = _make_credentials() http = object() @@ -5105,7 +5111,7 @@ def test_list_rows_query_params(self): self.assertEqual(req[1]["query_params"], test[1], "for kwargs %s" % test[0]) def test_list_rows_repeated_fields(self): - from google.cloud.bigquery.table import SchemaField + from google.cloud.bigquery.schema import SchemaField PATH = "projects/%s/datasets/%s/tables/%s/data" % ( self.PROJECT, @@ -5165,7 +5171,8 @@ def test_list_rows_repeated_fields(self): ) def test_list_rows_w_record_schema(self): - from google.cloud.bigquery.table import Table, SchemaField + from google.cloud.bigquery.schema import SchemaField + from google.cloud.bigquery.table import Table PATH = "projects/%s/datasets/%s/tables/%s/data" % ( self.PROJECT, diff --git a/packages/google-cloud-bigquery/tests/unit/test_job.py b/packages/google-cloud-bigquery/tests/unit/test_job.py index 84f52627b7f3..a2aeb5efbc4a 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_job.py +++ b/packages/google-cloud-bigquery/tests/unit/test_job.py @@ -1532,7 +1532,7 @@ def test_schema_hit(self): self.assertEqual(all_props, SchemaField.from_api_repr(all_props_repr)) self.assertEqual(minimal, SchemaField.from_api_repr(minimal_repr)) - def test_schema_setter(self): + def test_schema_setter_fields(self): from google.cloud.bigquery.schema import SchemaField config = self._get_target_class()() @@ -1555,6 +1555,42 @@ def test_schema_setter(self): config._properties["load"]["schema"], {"fields": [full_name_repr, age_repr]} ) + def test_schema_setter_valid_mappings_list(self): + config = self._get_target_class()() + + schema = [ + {"name": "full_name", "type": "STRING", "mode": "REQUIRED"}, + {"name": "age", "type": "INTEGER", "mode": "REQUIRED"}, + ] + config.schema = schema + + full_name_repr = { + "name": "full_name", + "type": "STRING", + "mode": "REQUIRED", + "description": None, + } + age_repr = { + "name": "age", + "type": "INTEGER", + "mode": "REQUIRED", + "description": None, + } + self.assertEqual( + config._properties["load"]["schema"], {"fields": [full_name_repr, age_repr]} + ) + + def test_schema_setter_invalid_mappings_list(self): + config = self._get_target_class()() + + schema = [ + {"name": "full_name", "type": "STRING", "mode": "REQUIRED"}, + {"name": "age", "typeoo": "INTEGER", "mode": "REQUIRED"}, + ] + + with self.assertRaises(Exception): + config.schema = schema + def test_schema_setter_unsetting_schema(self): from google.cloud.bigquery.schema import SchemaField diff --git a/packages/google-cloud-bigquery/tests/unit/test_schema.py b/packages/google-cloud-bigquery/tests/unit/test_schema.py index 862d8a823e62..fc8a41c68c46 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_schema.py +++ b/packages/google-cloud-bigquery/tests/unit/test_schema.py @@ -568,3 +568,69 @@ def test_w_subfields(self): ], }, ) + + +class Test_to_schema_fields(unittest.TestCase): + @staticmethod + def _call_fut(schema): + from google.cloud.bigquery.schema import _to_schema_fields + + return _to_schema_fields(schema) + + def test_invalid_type(self): + schema = [ + ("full_name", "STRING", "REQUIRED"), + ("address", "STRING", "REQUIRED"), + ] + with self.assertRaises(ValueError): + self._call_fut(schema) + + def test_schema_fields_sequence(self): + from google.cloud.bigquery.schema import SchemaField + + schema = [ + SchemaField("full_name", "STRING", mode="REQUIRED"), + SchemaField("age", "INT64", mode="NULLABLE"), + ] + result = self._call_fut(schema) + self.assertEqual(result, schema) + + def test_invalid_mapping_representation(self): + schema = [ + {"name": "full_name", "type": "STRING", "mode": "REQUIRED"}, + {"name": "address", "typeooo": "STRING", "mode": "REQUIRED"}, + ] + with self.assertRaises(Exception): + self._call_fut(schema) + + def test_valid_mapping_representation(self): + from google.cloud.bigquery.schema import SchemaField + + schema = [ + {"name": "full_name", "type": "STRING", "mode": "REQUIRED"}, + { + "name": "residence", + "type": "STRUCT", + "mode": "NULLABLE", + "fields": [ + {"name": "foo", "type": "DATE", "mode": "NULLABLE"}, + {"name": "bar", "type": "BYTES", "mode": "REQUIRED"}, + ], + }, + ] + + expected_schema = [ + SchemaField("full_name", "STRING", mode="REQUIRED"), + SchemaField( + "residence", + "STRUCT", + mode="NULLABLE", + fields=[ + SchemaField("foo", "DATE", mode="NULLABLE"), + SchemaField("bar", "BYTES", mode="REQUIRED"), + ], + ), + ] + + result = self._call_fut(schema) + self.assertEqual(result, expected_schema) diff --git a/packages/google-cloud-bigquery/tests/unit/test_table.py b/packages/google-cloud-bigquery/tests/unit/test_table.py index b04a4491e6ca..97a7b4ae745e 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_table.py +++ b/packages/google-cloud-bigquery/tests/unit/test_table.py @@ -450,7 +450,7 @@ def test_ctor(self): self.assertIsNone(table.clustering_fields) def test_ctor_w_schema(self): - from google.cloud.bigquery.table import SchemaField + from google.cloud.bigquery.schema import SchemaField dataset = DatasetReference(self.PROJECT, self.DS_ID) table_ref = dataset.table(self.TABLE_NAME) @@ -556,7 +556,7 @@ def test_num_rows_getter(self): with self.assertRaises(ValueError): getattr(table, "num_rows") - def test_schema_setter_non_list(self): + def test_schema_setter_non_sequence(self): dataset = DatasetReference(self.PROJECT, self.DS_ID) table_ref = dataset.table(self.TABLE_NAME) table = self._make_one(table_ref) @@ -564,7 +564,7 @@ def test_schema_setter_non_list(self): table.schema = object() def test_schema_setter_invalid_field(self): - from google.cloud.bigquery.table import SchemaField + from google.cloud.bigquery.schema import SchemaField dataset = DatasetReference(self.PROJECT, self.DS_ID) table_ref = dataset.table(self.TABLE_NAME) @@ -573,8 +573,8 @@ def test_schema_setter_invalid_field(self): with self.assertRaises(ValueError): table.schema = [full_name, object()] - def test_schema_setter(self): - from google.cloud.bigquery.table import SchemaField + def test_schema_setter_valid_fields(self): + from google.cloud.bigquery.schema import SchemaField dataset = DatasetReference(self.PROJECT, self.DS_ID) table_ref = dataset.table(self.TABLE_NAME) @@ -584,6 +584,48 @@ def test_schema_setter(self): table.schema = [full_name, age] self.assertEqual(table.schema, [full_name, age]) + def test_schema_setter_invalid_mapping_representation(self): + dataset = DatasetReference(self.PROJECT, self.DS_ID) + table_ref = dataset.table(self.TABLE_NAME) + table = self._make_one(table_ref) + full_name = {"name": "full_name", "type": "STRING", "mode": "REQUIRED"} + invalid_field = {"name": "full_name", "typeooo": "STRING", "mode": "REQUIRED"} + with self.assertRaises(Exception): + table.schema = [full_name, invalid_field] + + def test_schema_setter_valid_mapping_representation(self): + from google.cloud.bigquery.schema import SchemaField + + dataset = DatasetReference(self.PROJECT, self.DS_ID) + table_ref = dataset.table(self.TABLE_NAME) + table = self._make_one(table_ref) + full_name = {"name": "full_name", "type": "STRING", "mode": "REQUIRED"} + job_status = { + "name": "is_employed", + "type": "STRUCT", + "mode": "NULLABLE", + "fields": [ + {"name": "foo", "type": "DATE", "mode": "NULLABLE"}, + {"name": "bar", "type": "BYTES", "mode": "REQUIRED"}, + ], + } + + table.schema = [full_name, job_status] + + expected_schema = [ + SchemaField("full_name", "STRING", mode="REQUIRED"), + SchemaField( + "is_employed", + "STRUCT", + mode="NULLABLE", + fields=[ + SchemaField("foo", "DATE", mode="NULLABLE"), + SchemaField("bar", "BYTES", mode="REQUIRED"), + ], + ), + ] + self.assertEqual(table.schema, expected_schema) + def test_props_set_by_server(self): import datetime from google.cloud._helpers import UTC @@ -1145,7 +1187,8 @@ def test__row_from_mapping_wo_schema(self): self.assertEqual(exc.exception.args, (_TABLE_HAS_NO_SCHEMA,)) def test__row_from_mapping_w_invalid_schema(self): - from google.cloud.bigquery.table import Table, SchemaField + from google.cloud.bigquery.schema import SchemaField + from google.cloud.bigquery.table import Table MAPPING = { "full_name": "Phred Phlyntstone", @@ -1167,7 +1210,8 @@ def test__row_from_mapping_w_invalid_schema(self): self.assertIn("Unknown field mode: BOGUS", str(exc.exception)) def test__row_from_mapping_w_schema(self): - from google.cloud.bigquery.table import Table, SchemaField + from google.cloud.bigquery.schema import SchemaField + from google.cloud.bigquery.table import Table MAPPING = { "full_name": "Phred Phlyntstone", @@ -1497,8 +1541,24 @@ def test_constructor_with_table(self): self.assertIs(iterator._table, table) self.assertEqual(iterator.total_rows, 100) + def test_constructor_with_dict_schema(self): + from google.cloud.bigquery.schema import SchemaField + + schema = [ + {"name": "full_name", "type": "STRING", "mode": "REQUIRED"}, + {"name": "age", "type": "INT64", "mode": "NULLABLE"}, + ] + + iterator = self._make_one(schema=schema) + + expected_schema = [ + SchemaField("full_name", "STRING", mode="REQUIRED"), + SchemaField("age", "INT64", mode="NULLABLE"), + ] + self.assertEqual(iterator.schema, expected_schema) + def test_iterate(self): - from google.cloud.bigquery.table import SchemaField + from google.cloud.bigquery.schema import SchemaField schema = [ SchemaField("name", "STRING", mode="REQUIRED"), @@ -1529,7 +1589,7 @@ def test_iterate(self): api_request.assert_called_once_with(method="GET", path=path, query_params={}) def test_page_size(self): - from google.cloud.bigquery.table import SchemaField + from google.cloud.bigquery.schema import SchemaField schema = [ SchemaField("name", "STRING", mode="REQUIRED"), @@ -1555,7 +1615,7 @@ def test_page_size(self): @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_to_arrow(self): - from google.cloud.bigquery.table import SchemaField + from google.cloud.bigquery.schema import SchemaField schema = [ SchemaField("name", "STRING", mode="REQUIRED"), @@ -1637,7 +1697,7 @@ def test_to_arrow(self): @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_to_arrow_w_nulls(self): - from google.cloud.bigquery.table import SchemaField + from google.cloud.bigquery.schema import SchemaField schema = [SchemaField("name", "STRING"), SchemaField("age", "INTEGER")] rows = [ @@ -1670,7 +1730,7 @@ def test_to_arrow_w_nulls(self): @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_to_arrow_w_unknown_type(self): - from google.cloud.bigquery.table import SchemaField + from google.cloud.bigquery.schema import SchemaField schema = [ SchemaField("name", "STRING", mode="REQUIRED"), @@ -1708,7 +1768,7 @@ def test_to_arrow_w_unknown_type(self): @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_to_arrow_w_empty_table(self): - from google.cloud.bigquery.table import SchemaField + from google.cloud.bigquery.schema import SchemaField schema = [ SchemaField("name", "STRING", mode="REQUIRED"), @@ -1870,7 +1930,7 @@ def test_to_arrow_w_bqstorage_no_streams(self): @mock.patch("tqdm.tqdm_notebook") @mock.patch("tqdm.tqdm") def test_to_arrow_progress_bar(self, tqdm_mock, tqdm_notebook_mock, tqdm_gui_mock): - from google.cloud.bigquery.table import SchemaField + from google.cloud.bigquery.schema import SchemaField schema = [ SchemaField("name", "STRING", mode="REQUIRED"), @@ -1913,7 +1973,7 @@ def test_to_arrow_w_pyarrow_none(self): @unittest.skipIf(pandas is None, "Requires `pandas`") def test_to_dataframe(self): - from google.cloud.bigquery.table import SchemaField + from google.cloud.bigquery.schema import SchemaField schema = [ SchemaField("name", "STRING", mode="REQUIRED"), @@ -1945,7 +2005,7 @@ def test_to_dataframe(self): def test_to_dataframe_progress_bar( self, tqdm_mock, tqdm_notebook_mock, tqdm_gui_mock ): - from google.cloud.bigquery.table import SchemaField + from google.cloud.bigquery.schema import SchemaField schema = [ SchemaField("name", "STRING", mode="REQUIRED"), @@ -1978,7 +2038,7 @@ def test_to_dataframe_progress_bar( @unittest.skipIf(pandas is None, "Requires `pandas`") @mock.patch("google.cloud.bigquery.table.tqdm", new=None) def test_to_dataframe_no_tqdm_no_progress_bar(self): - from google.cloud.bigquery.table import SchemaField + from google.cloud.bigquery.schema import SchemaField schema = [ SchemaField("name", "STRING", mode="REQUIRED"), @@ -2003,7 +2063,7 @@ def test_to_dataframe_no_tqdm_no_progress_bar(self): @unittest.skipIf(pandas is None, "Requires `pandas`") @mock.patch("google.cloud.bigquery.table.tqdm", new=None) def test_to_dataframe_no_tqdm(self): - from google.cloud.bigquery.table import SchemaField + from google.cloud.bigquery.schema import SchemaField schema = [ SchemaField("name", "STRING", mode="REQUIRED"), @@ -2036,7 +2096,7 @@ def test_to_dataframe_no_tqdm(self): @mock.patch("tqdm.tqdm_notebook", new=None) # will raise TypeError on call @mock.patch("tqdm.tqdm", new=None) # will raise TypeError on call def test_to_dataframe_tqdm_error(self): - from google.cloud.bigquery.table import SchemaField + from google.cloud.bigquery.schema import SchemaField schema = [ SchemaField("name", "STRING", mode="REQUIRED"), @@ -2066,7 +2126,7 @@ def test_to_dataframe_tqdm_error(self): @unittest.skipIf(pandas is None, "Requires `pandas`") def test_to_dataframe_w_empty_results(self): - from google.cloud.bigquery.table import SchemaField + from google.cloud.bigquery.schema import SchemaField schema = [ SchemaField("name", "STRING", mode="REQUIRED"), @@ -2101,7 +2161,7 @@ def test_to_dataframe_logs_tabledata_list(self): @unittest.skipIf(pandas is None, "Requires `pandas`") def test_to_dataframe_w_various_types_nullable(self): import datetime - from google.cloud.bigquery.table import SchemaField + from google.cloud.bigquery.schema import SchemaField schema = [ SchemaField("start_timestamp", "TIMESTAMP"), @@ -2141,7 +2201,7 @@ def test_to_dataframe_w_various_types_nullable(self): @unittest.skipIf(pandas is None, "Requires `pandas`") def test_to_dataframe_column_dtypes(self): - from google.cloud.bigquery.table import SchemaField + from google.cloud.bigquery.schema import SchemaField schema = [ SchemaField("start_timestamp", "TIMESTAMP"), @@ -2179,7 +2239,7 @@ def test_to_dataframe_column_dtypes(self): @mock.patch("google.cloud.bigquery.table.pandas", new=None) def test_to_dataframe_error_if_pandas_is_none(self): - from google.cloud.bigquery.table import SchemaField + from google.cloud.bigquery.schema import SchemaField schema = [ SchemaField("name", "STRING", mode="REQUIRED"), @@ -2198,7 +2258,7 @@ def test_to_dataframe_error_if_pandas_is_none(self): @unittest.skipIf(pandas is None, "Requires `pandas`") def test_to_dataframe_max_results_w_bqstorage_warning(self): - from google.cloud.bigquery.table import SchemaField + from google.cloud.bigquery.schema import SchemaField schema = [ SchemaField("name", "STRING", mode="REQUIRED"), From 991c0010c2d9df0ed2057e256848f1c460abf170 Mon Sep 17 00:00:00 2001 From: Peter Lamut Date: Mon, 4 Nov 2019 22:56:02 +0200 Subject: [PATCH 0699/2016] fix(bigquery): use pyarrow fallback for improved schema detection (#9321) * fix(bigquery): use pyarrow fallback in schema autodetect * Improve and refactor pyarrow schema detection Add more pyarrow types, convert to pyarrow only the columns the schema could not be detected for, etc. * Use the word "augment" in helper's name * Fix failed import in one of the tests --- .../google/cloud/bigquery/_pandas_helpers.py | 113 ++++++++++- .../tests/unit/test__pandas_helpers.py | 180 ++++++++++++++++++ .../tests/unit/test_client.py | 16 +- 3 files changed, 296 insertions(+), 13 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/_pandas_helpers.py b/packages/google-cloud-bigquery/google/cloud/bigquery/_pandas_helpers.py index aeb18c2d213d..6e91a9624b06 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/_pandas_helpers.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/_pandas_helpers.py @@ -110,8 +110,35 @@ def pyarrow_timestamp(): "TIME": pyarrow_time, "TIMESTAMP": pyarrow_timestamp, } + ARROW_SCALAR_IDS_TO_BQ = { + # https://arrow.apache.org/docs/python/api/datatypes.html#type-classes + pyarrow.bool_().id: "BOOL", + pyarrow.int8().id: "INT64", + pyarrow.int16().id: "INT64", + pyarrow.int32().id: "INT64", + pyarrow.int64().id: "INT64", + pyarrow.uint8().id: "INT64", + pyarrow.uint16().id: "INT64", + pyarrow.uint32().id: "INT64", + pyarrow.uint64().id: "INT64", + pyarrow.float16().id: "FLOAT64", + pyarrow.float32().id: "FLOAT64", + pyarrow.float64().id: "FLOAT64", + pyarrow.time32("ms").id: "TIME", + pyarrow.time64("ns").id: "TIME", + pyarrow.timestamp("ns").id: "TIMESTAMP", + pyarrow.date32().id: "DATE", + pyarrow.date64().id: "DATETIME", # because millisecond resolution + pyarrow.binary().id: "BYTES", + pyarrow.string().id: "STRING", # also alias for pyarrow.utf8() + pyarrow.decimal128(38, scale=9).id: "NUMERIC", + # The exact decimal's scale and precision are not important, as only + # the type ID matters, and it's the same for all decimal128 instances. + } + else: # pragma: NO COVER BQ_TO_ARROW_SCALARS = {} # pragma: NO COVER + ARROW_SCALAR_IDS_TO_BQ = {} # pragma: NO_COVER def bq_to_arrow_struct_data_type(field): @@ -141,10 +168,11 @@ def bq_to_arrow_data_type(field): return pyarrow.list_(inner_type) return None - if field.field_type.upper() in schema._STRUCT_TYPES: + field_type_upper = field.field_type.upper() if field.field_type else "" + if field_type_upper in schema._STRUCT_TYPES: return bq_to_arrow_struct_data_type(field) - data_type_constructor = BQ_TO_ARROW_SCALARS.get(field.field_type.upper()) + data_type_constructor = BQ_TO_ARROW_SCALARS.get(field_type_upper) if data_type_constructor is None: return None return data_type_constructor() @@ -183,9 +211,12 @@ def bq_to_arrow_schema(bq_schema): def bq_to_arrow_array(series, bq_field): arrow_type = bq_to_arrow_data_type(bq_field) + + field_type_upper = bq_field.field_type.upper() if bq_field.field_type else "" + if bq_field.mode.upper() == "REPEATED": return pyarrow.ListArray.from_pandas(series, type=arrow_type) - if bq_field.field_type.upper() in schema._STRUCT_TYPES: + if field_type_upper in schema._STRUCT_TYPES: return pyarrow.StructArray.from_pandas(series, type=arrow_type) return pyarrow.array(series, type=arrow_type) @@ -267,6 +298,8 @@ def dataframe_to_bq_schema(dataframe, bq_schema): bq_schema_unused = set() bq_schema_out = [] + unknown_type_fields = [] + for column, dtype in list_columns_and_indexes(dataframe): # Use provided type from schema, if present. bq_field = bq_schema_index.get(column) @@ -278,12 +311,12 @@ def dataframe_to_bq_schema(dataframe, bq_schema): # Otherwise, try to automatically determine the type based on the # pandas dtype. bq_type = _PANDAS_DTYPE_TO_BQ.get(dtype.name) - if not bq_type: - warnings.warn(u"Unable to determine type of column '{}'.".format(column)) - return None bq_field = schema.SchemaField(column, bq_type) bq_schema_out.append(bq_field) + if bq_field.field_type is None: + unknown_type_fields.append(bq_field) + # Catch any schema mismatch. The developer explicitly asked to serialize a # column, but it was not found. if bq_schema_unused: @@ -292,7 +325,73 @@ def dataframe_to_bq_schema(dataframe, bq_schema): bq_schema_unused ) ) - return tuple(bq_schema_out) + + # If schema detection was not successful for all columns, also try with + # pyarrow, if available. + if unknown_type_fields: + if not pyarrow: + msg = u"Could not determine the type of columns: {}".format( + ", ".join(field.name for field in unknown_type_fields) + ) + warnings.warn(msg) + return None # We cannot detect the schema in full. + + # The augment_schema() helper itself will also issue unknown type + # warnings if detection still fails for any of the fields. + bq_schema_out = augment_schema(dataframe, bq_schema_out) + + return tuple(bq_schema_out) if bq_schema_out else None + + +def augment_schema(dataframe, current_bq_schema): + """Try to deduce the unknown field types and return an improved schema. + + This function requires ``pyarrow`` to run. If all the missing types still + cannot be detected, ``None`` is returned. If all types are already known, + a shallow copy of the given schema is returned. + + Args: + dataframe (pandas.DataFrame): + DataFrame for which some of the field types are still unknown. + current_bq_schema (Sequence[google.cloud.bigquery.schema.SchemaField]): + A BigQuery schema for ``dataframe``. The types of some or all of + the fields may be ``None``. + Returns: + Optional[Sequence[google.cloud.bigquery.schema.SchemaField]] + """ + augmented_schema = [] + unknown_type_fields = [] + + for field in current_bq_schema: + if field.field_type is not None: + augmented_schema.append(field) + continue + + arrow_table = pyarrow.array(dataframe[field.name]) + detected_type = ARROW_SCALAR_IDS_TO_BQ.get(arrow_table.type.id) + + if detected_type is None: + unknown_type_fields.append(field) + continue + + new_field = schema.SchemaField( + name=field.name, + field_type=detected_type, + mode=field.mode, + description=field.description, + fields=field.fields, + ) + augmented_schema.append(new_field) + + if unknown_type_fields: + warnings.warn( + u"Pyarrow could not determine the type of columns: {}.".format( + ", ".join(field.name for field in unknown_type_fields) + ) + ) + return None + + return augmented_schema def dataframe_to_arrow(dataframe, bq_schema): diff --git a/packages/google-cloud-bigquery/tests/unit/test__pandas_helpers.py b/packages/google-cloud-bigquery/tests/unit/test__pandas_helpers.py index 56ac62820841..a6ccec2e094f 100644 --- a/packages/google-cloud-bigquery/tests/unit/test__pandas_helpers.py +++ b/packages/google-cloud-bigquery/tests/unit/test__pandas_helpers.py @@ -16,6 +16,7 @@ import datetime import decimal import functools +import operator import warnings import mock @@ -957,6 +958,185 @@ def test_dataframe_to_parquet_compression_method(module_under_test): @pytest.mark.skipif(pandas is None, reason="Requires `pandas`") +def test_dataframe_to_bq_schema_fallback_needed_wo_pyarrow(module_under_test): + dataframe = pandas.DataFrame( + data=[ + {"id": 10, "status": u"FOO", "execution_date": datetime.date(2019, 5, 10)}, + {"id": 20, "status": u"BAR", "created_at": datetime.date(2018, 9, 12)}, + ] + ) + + no_pyarrow_patch = mock.patch(module_under_test.__name__ + ".pyarrow", None) + + with no_pyarrow_patch, warnings.catch_warnings(record=True) as warned: + detected_schema = module_under_test.dataframe_to_bq_schema( + dataframe, bq_schema=[] + ) + + assert detected_schema is None + + # a warning should also be issued + expected_warnings = [ + warning for warning in warned if "could not determine" in str(warning).lower() + ] + assert len(expected_warnings) == 1 + msg = str(expected_warnings[0]) + assert "execution_date" in msg and "created_at" in msg + + +@pytest.mark.skipif(pandas is None, reason="Requires `pandas`") +@pytest.mark.skipif(pyarrow is None, reason="Requires `pyarrow`") +def test_dataframe_to_bq_schema_fallback_needed_w_pyarrow(module_under_test): + dataframe = pandas.DataFrame( + data=[ + {"id": 10, "status": u"FOO", "created_at": datetime.date(2019, 5, 10)}, + {"id": 20, "status": u"BAR", "created_at": datetime.date(2018, 9, 12)}, + ] + ) + + with warnings.catch_warnings(record=True) as warned: + detected_schema = module_under_test.dataframe_to_bq_schema( + dataframe, bq_schema=[] + ) + + expected_schema = ( + schema.SchemaField("id", "INTEGER", mode="NULLABLE"), + schema.SchemaField("status", "STRING", mode="NULLABLE"), + schema.SchemaField("created_at", "DATE", mode="NULLABLE"), + ) + by_name = operator.attrgetter("name") + assert sorted(detected_schema, key=by_name) == sorted(expected_schema, key=by_name) + + # there should be no relevant warnings + unwanted_warnings = [ + warning for warning in warned if "could not determine" in str(warning).lower() + ] + assert not unwanted_warnings + + +@pytest.mark.skipif(pandas is None, reason="Requires `pandas`") +@pytest.mark.skipif(pyarrow is None, reason="Requires `pyarrow`") +def test_dataframe_to_bq_schema_pyarrow_fallback_fails(module_under_test): + dataframe = pandas.DataFrame( + data=[ + {"struct_field": {"one": 2}, "status": u"FOO"}, + {"struct_field": {"two": u"222"}, "status": u"BAR"}, + ] + ) + + with warnings.catch_warnings(record=True) as warned: + detected_schema = module_under_test.dataframe_to_bq_schema( + dataframe, bq_schema=[] + ) + + assert detected_schema is None + + # a warning should also be issued + expected_warnings = [ + warning for warning in warned if "could not determine" in str(warning).lower() + ] + assert len(expected_warnings) == 1 + assert "struct_field" in str(expected_warnings[0]) + + +@pytest.mark.skipif(pandas is None, reason="Requires `pandas`") +@pytest.mark.skipif(pyarrow is None, reason="Requires `pyarrow`") +def test_augment_schema_type_detection_succeeds(module_under_test): + dataframe = pandas.DataFrame( + data=[ + { + "bool_field": False, + "int_field": 123, + "float_field": 3.141592, + "time_field": datetime.time(17, 59, 47), + "timestamp_field": datetime.datetime(2005, 5, 31, 14, 25, 55), + "date_field": datetime.date(2005, 5, 31), + "bytes_field": b"some bytes", + "string_field": u"some characters", + "numeric_field": decimal.Decimal("123.456"), + } + ] + ) + + # NOTE: In Pandas dataframe, the dtype of Python's datetime instances is + # set to "datetime64[ns]", and pyarrow converts that to pyarrow.TimestampArray. + # We thus cannot expect to get a DATETIME date when converting back to the + # BigQuery type. + + current_schema = ( + schema.SchemaField("bool_field", field_type=None, mode="NULLABLE"), + schema.SchemaField("int_field", field_type=None, mode="NULLABLE"), + schema.SchemaField("float_field", field_type=None, mode="NULLABLE"), + schema.SchemaField("time_field", field_type=None, mode="NULLABLE"), + schema.SchemaField("timestamp_field", field_type=None, mode="NULLABLE"), + schema.SchemaField("date_field", field_type=None, mode="NULLABLE"), + schema.SchemaField("bytes_field", field_type=None, mode="NULLABLE"), + schema.SchemaField("string_field", field_type=None, mode="NULLABLE"), + schema.SchemaField("numeric_field", field_type=None, mode="NULLABLE"), + ) + + with warnings.catch_warnings(record=True) as warned: + augmented_schema = module_under_test.augment_schema(dataframe, current_schema) + + # there should be no relevant warnings + unwanted_warnings = [ + warning for warning in warned if "Pyarrow could not" in str(warning) + ] + assert not unwanted_warnings + + # the augmented schema must match the expected + expected_schema = ( + schema.SchemaField("bool_field", field_type="BOOL", mode="NULLABLE"), + schema.SchemaField("int_field", field_type="INT64", mode="NULLABLE"), + schema.SchemaField("float_field", field_type="FLOAT64", mode="NULLABLE"), + schema.SchemaField("time_field", field_type="TIME", mode="NULLABLE"), + schema.SchemaField("timestamp_field", field_type="TIMESTAMP", mode="NULLABLE"), + schema.SchemaField("date_field", field_type="DATE", mode="NULLABLE"), + schema.SchemaField("bytes_field", field_type="BYTES", mode="NULLABLE"), + schema.SchemaField("string_field", field_type="STRING", mode="NULLABLE"), + schema.SchemaField("numeric_field", field_type="NUMERIC", mode="NULLABLE"), + ) + by_name = operator.attrgetter("name") + assert sorted(augmented_schema, key=by_name) == sorted(expected_schema, key=by_name) + + +@pytest.mark.skipif(pandas is None, reason="Requires `pandas`") +@pytest.mark.skipif(pyarrow is None, reason="Requires `pyarrow`") +def test_augment_schema_type_detection_fails(module_under_test): + dataframe = pandas.DataFrame( + data=[ + { + "status": u"FOO", + "struct_field": {"one": 1}, + "struct_field_2": {"foo": u"123"}, + }, + { + "status": u"BAR", + "struct_field": {"two": u"111"}, + "struct_field_2": {"bar": 27}, + }, + ] + ) + current_schema = [ + schema.SchemaField("status", field_type="STRING", mode="NULLABLE"), + schema.SchemaField("struct_field", field_type=None, mode="NULLABLE"), + schema.SchemaField("struct_field_2", field_type=None, mode="NULLABLE"), + ] + + with warnings.catch_warnings(record=True) as warned: + augmented_schema = module_under_test.augment_schema(dataframe, current_schema) + + assert augmented_schema is None + + expected_warnings = [ + warning for warning in warned if "could not determine" in str(warning) + ] + assert len(expected_warnings) == 1 + warning_msg = str(expected_warnings[0]) + assert "pyarrow" in warning_msg.lower() + assert "struct_field" in warning_msg and "struct_field_2" in warning_msg + + @pytest.mark.skipif(pyarrow is None, reason="Requires `pyarrow`") def test_dataframe_to_parquet_dict_sequence_schema(module_under_test): dict_schema = [ diff --git a/packages/google-cloud-bigquery/tests/unit/test_client.py b/packages/google-cloud-bigquery/tests/unit/test_client.py index bc56fac34c6a..e6ed4d1c8072 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_client.py +++ b/packages/google-cloud-bigquery/tests/unit/test_client.py @@ -4805,7 +4805,7 @@ def test_insert_rows_from_dataframe_many_columns(self): @unittest.skipIf(pandas is None, "Requires `pandas`") def test_insert_rows_from_dataframe_w_explicit_none_insert_ids(self): - from google.cloud.bigquery.table import SchemaField + from google.cloud.bigquery.schema import SchemaField from google.cloud.bigquery.table import Table API_PATH = "/projects/{}/datasets/{}/tables/{}/insertAll".format( @@ -5996,8 +5996,7 @@ def test_load_table_from_dataframe_unknown_table(self): ) @unittest.skipIf(pandas is None, "Requires `pandas`") - @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") - def test_load_table_from_dataframe_no_schema_warning(self): + def test_load_table_from_dataframe_no_schema_warning_wo_pyarrow(self): client = self._make_client() # Pick at least one column type that translates to Pandas dtype @@ -6014,9 +6013,12 @@ def test_load_table_from_dataframe_no_schema_warning(self): "google.cloud.bigquery.client.Client.load_table_from_file", autospec=True ) pyarrow_patch = mock.patch("google.cloud.bigquery.client.pyarrow", None) + pyarrow_patch_helpers = mock.patch( + "google.cloud.bigquery._pandas_helpers.pyarrow", None + ) catch_warnings = warnings.catch_warnings(record=True) - with get_table_patch, load_patch, pyarrow_patch, catch_warnings as warned: + with get_table_patch, load_patch, pyarrow_patch, pyarrow_patch_helpers, catch_warnings as warned: client.load_table_from_dataframe( dataframe, self.TABLE_REF, location=self.LOCATION ) @@ -6184,7 +6186,6 @@ def test_load_table_from_dataframe_w_partial_schema_extra_types(self): assert "unknown_col" in message @unittest.skipIf(pandas is None, "Requires `pandas`") - @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_load_table_from_dataframe_w_partial_schema_missing_types(self): from google.cloud.bigquery.client import _DEFAULT_NUM_RETRIES from google.cloud.bigquery import job @@ -6201,10 +6202,13 @@ def test_load_table_from_dataframe_w_partial_schema_missing_types(self): load_patch = mock.patch( "google.cloud.bigquery.client.Client.load_table_from_file", autospec=True ) + pyarrow_patch = mock.patch( + "google.cloud.bigquery._pandas_helpers.pyarrow", None + ) schema = (SchemaField("string_col", "STRING"),) job_config = job.LoadJobConfig(schema=schema) - with load_patch as load_table_from_file, warnings.catch_warnings( + with pyarrow_patch, load_patch as load_table_from_file, warnings.catch_warnings( record=True ) as warned: client.load_table_from_dataframe( From f24dedfc601bd9bfc0e84bfc2b3af18b76ceee37 Mon Sep 17 00:00:00 2001 From: Tres Seaver Date: Tue, 5 Nov 2019 14:26:17 -0500 Subject: [PATCH 0700/2016] feat(storage): add opt-in raw download support (#9572) * deps(bigquery): pin to allow g-r-m 0.5.x * deps(storage): pin to require g-r-m >= 0.5.0 --- packages/google-cloud-bigquery/setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/setup.py b/packages/google-cloud-bigquery/setup.py index e0f3edf19d45..8331d2c07114 100644 --- a/packages/google-cloud-bigquery/setup.py +++ b/packages/google-cloud-bigquery/setup.py @@ -31,7 +31,7 @@ dependencies = [ 'enum34; python_version < "3.4"', "google-cloud-core >= 1.0.3, < 2.0dev", - "google-resumable-media >= 0.3.1, != 0.4.0, < 0.5.0dev", + "google-resumable-media >= 0.3.1, != 0.4.0, < 0.6.0dev", "protobuf >= 3.6.0", ] extras = { From ae92d24b74e3f86e4a574fc6698411f31478e951 Mon Sep 17 00:00:00 2001 From: Aleksey Vlasenko Date: Tue, 5 Nov 2019 22:13:22 -0800 Subject: [PATCH 0701/2016] feat(bigquery): add `--destination_table` parameter to IPython magic (#9599) --- .../google/cloud/bigquery/magics.py | 52 +++++++++++ .../tests/unit/test_magics.py | 91 +++++++++++++++++++ 2 files changed, 143 insertions(+) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/magics.py b/packages/google-cloud-bigquery/google/cloud/bigquery/magics.py index 2a174cefeea3..59265ed6b0c5 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/magics.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/magics.py @@ -31,6 +31,10 @@ this parameter is used. If an error occurs during the query execution, the corresponding ``QueryJob`` instance (if available) is stored in the variable instead. + * ``--destination_table`` (optional, line argument): + A dataset and table to store the query results. If table does not exists, + it will be created. If table already exists, its data will be overwritten. + Variable should be in a format .. * ``--project `` (optional, line argument): Project to use for running the query. Defaults to the context :attr:`~google.cloud.bigquery.magics.Context.project`. @@ -145,6 +149,7 @@ raise ImportError("This module can only be loaded in IPython.") from google.api_core import client_info +from google.api_core.exceptions import NotFound import google.auth from google.cloud import bigquery from google.cloud.bigquery.dbapi import _helpers @@ -336,12 +341,44 @@ def _run_query(client, query, job_config=None): return query_job +def _create_dataset_if_necessary(client, dataset_id): + """Create a dataset in the current project if it doesn't exist. + + Args: + client (google.cloud.bigquery.client.Client): + Client to bundle configuration needed for API requests. + dataset_id (str): + Dataset id. + """ + dataset_reference = bigquery.dataset.DatasetReference(client.project, dataset_id) + try: + dataset = client.get_dataset(dataset_reference) + return + except NotFound: + pass + dataset = bigquery.Dataset(dataset_reference) + dataset.location = client.location + print("Creating dataset: {}".format(dataset_id)) + dataset = client.create_dataset(dataset) + + @magic_arguments.magic_arguments() @magic_arguments.argument( "destination_var", nargs="?", help=("If provided, save the output to this variable instead of displaying it."), ) +@magic_arguments.argument( + "--destination_table", + type=str, + default=None, + help=( + "If provided, save the output of the query to a new BigQuery table. " + "Variable should be in a format .. " + "If table does not exists, it will be created. " + "If table already exists, its data will be overwritten." + ), +) @magic_arguments.argument( "--project", type=str, @@ -485,6 +522,21 @@ def _cell_magic(line, query): job_config.use_legacy_sql = args.use_legacy_sql job_config.dry_run = args.dry_run + if args.destination_table: + split = args.destination_table.split(".") + if len(split) != 2: + raise ValueError( + "--destination_table should be in a . format." + ) + dataset_id, table_id = split + job_config.allow_large_results = True + dataset_ref = client.dataset(dataset_id) + destination_table_ref = dataset_ref.table(table_id) + job_config.destination = destination_table_ref + job_config.create_disposition = "CREATE_IF_NEEDED" + job_config.write_disposition = "WRITE_TRUNCATE" + _create_dataset_if_necessary(client, dataset_id) + if args.maximum_bytes_billed == "None": job_config.maximum_bytes_billed = 0 elif args.maximum_bytes_billed is not None: diff --git a/packages/google-cloud-bigquery/tests/unit/test_magics.py b/packages/google-cloud-bigquery/tests/unit/test_magics.py index ed253636c468..6ff9819854a8 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_magics.py +++ b/packages/google-cloud-bigquery/tests/unit/test_magics.py @@ -39,6 +39,7 @@ from google.cloud import bigquery_storage_v1beta1 except ImportError: # pragma: NO COVER bigquery_storage_v1beta1 = None +from google.cloud import bigquery from google.cloud.bigquery import job from google.cloud.bigquery import table from google.cloud.bigquery import magics @@ -336,6 +337,37 @@ def test__make_bqstorage_client_true_missing_gapic(missing_grpcio_lib): assert "grpcio" in str(exc_context.value) +def test__create_dataset_if_necessary_exists(): + project = "project_id" + dataset_id = "dataset_id" + dataset_reference = bigquery.dataset.DatasetReference(project, dataset_id) + dataset = bigquery.Dataset(dataset_reference) + client_patch = mock.patch( + "google.cloud.bigquery.magics.bigquery.Client", autospec=True + ) + with client_patch as client_mock: + client = client_mock() + client.project = project + client.get_dataset.result_value = dataset + magics._create_dataset_if_necessary(client, dataset_id) + client.create_dataset.assert_not_called() + + +def test__create_dataset_if_necessary_not_exist(): + project = "project_id" + dataset_id = "dataset_id" + client_patch = mock.patch( + "google.cloud.bigquery.magics.bigquery.Client", autospec=True + ) + with client_patch as client_mock: + client = client_mock() + client.location = "us" + client.project = project + client.get_dataset.side_effect = exceptions.NotFound("dataset not found") + magics._create_dataset_if_necessary(client, dataset_id) + client.create_dataset.assert_called_once() + + @pytest.mark.usefixtures("ipython_interactive") def test_extension_load(): ip = IPython.get_ipython() @@ -1199,3 +1231,62 @@ def test_bigquery_magic_omits_tracebacks_from_error_message(): assert "400 Syntax error in SQL query" in output assert "Traceback (most recent call last)" not in output assert "Syntax error" not in captured_io.stdout + + +@pytest.mark.usefixtures("ipython_interactive") +def test_bigquery_magic_w_destination_table_invalid_format(): + ip = IPython.get_ipython() + ip.extension_manager.load_extension("google.cloud.bigquery") + magics.context._project = None + + credentials_mock = mock.create_autospec( + google.auth.credentials.Credentials, instance=True + ) + default_patch = mock.patch( + "google.auth.default", return_value=(credentials_mock, "general-project") + ) + + client_patch = mock.patch( + "google.cloud.bigquery.magics.bigquery.Client", autospec=True + ) + + with client_patch, default_patch, pytest.raises(ValueError) as exc_context: + ip.run_cell_magic( + "bigquery", "--destination_table dataset", "SELECT foo FROM WHERE LIMIT bar" + ) + error_msg = str(exc_context.value) + assert ( + "--destination_table should be in a " + ". format." in error_msg + ) + + +@pytest.mark.usefixtures("ipython_interactive") +def test_bigquery_magic_w_destination_table(): + ip = IPython.get_ipython() + ip.extension_manager.load_extension("google.cloud.bigquery") + magics.context.credentials = mock.create_autospec( + google.auth.credentials.Credentials, instance=True + ) + + create_dataset_if_necessary_patch = mock.patch( + "google.cloud.bigquery.magics._create_dataset_if_necessary", autospec=True + ) + + run_query_patch = mock.patch( + "google.cloud.bigquery.magics._run_query", autospec=True + ) + + with create_dataset_if_necessary_patch, run_query_patch as run_query_mock: + ip.run_cell_magic( + "bigquery", + "--destination_table dataset_id.table_id", + "SELECT foo FROM WHERE LIMIT bar", + ) + + job_config_used = run_query_mock.call_args_list[0][1]["job_config"] + assert job_config_used.allow_large_results is True + assert job_config_used.create_disposition == "CREATE_IF_NEEDED" + assert job_config_used.write_disposition == "WRITE_TRUNCATE" + assert job_config_used.destination.dataset_id == "dataset_id" + assert job_config_used.destination.table_id == "table_id" From 13a38e5790a12a1313703887ad8d30a6000cc73e Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Tue, 5 Nov 2019 22:18:40 -0800 Subject: [PATCH 0702/2016] doc(bigquery): add table create sample using integer range partitioning (#9478) * doc(bigquery): add table create sample using integer range partitioning This code sample doubles as a system test for integer range partitioning features. * blacken * add range partitioned sample to docs --- .../docs/usage/tables.rst | 9 ++++ .../samples/create_table_range_partitioned.py | 45 +++++++++++++++++++ .../test_create_table_range_partitioned.py | 28 ++++++++++++ 3 files changed, 82 insertions(+) create mode 100644 packages/google-cloud-bigquery/samples/create_table_range_partitioned.py create mode 100644 packages/google-cloud-bigquery/samples/tests/test_create_table_range_partitioned.py diff --git a/packages/google-cloud-bigquery/docs/usage/tables.rst b/packages/google-cloud-bigquery/docs/usage/tables.rst index d58dcc5d9ac4..20ed79a969f2 100644 --- a/packages/google-cloud-bigquery/docs/usage/tables.rst +++ b/packages/google-cloud-bigquery/docs/usage/tables.rst @@ -58,6 +58,15 @@ Create an empty table with the :start-after: [START bigquery_create_table] :end-before: [END bigquery_create_table] +Create an integer range partitioned table with the +:func:`~google.cloud.bigquery.client.Client.create_table` method: + +.. literalinclude:: ../samples/create_table_range_partitioned.py + :language: python + :dedent: 4 + :start-after: [START bigquery_create_table_range_partitioned] + :end-before: [END bigquery_create_table_range_partitioned] + Load table data from a file with the :func:`~google.cloud.bigquery.client.Client.load_table_from_file` method: diff --git a/packages/google-cloud-bigquery/samples/create_table_range_partitioned.py b/packages/google-cloud-bigquery/samples/create_table_range_partitioned.py new file mode 100644 index 000000000000..f9da09cff847 --- /dev/null +++ b/packages/google-cloud-bigquery/samples/create_table_range_partitioned.py @@ -0,0 +1,45 @@ +# Copyright 2019 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +def create_table_range_partitioned(client, table_id): + + # [START bigquery_create_table_range_partitioned] + from google.cloud import bigquery + + # TODO(developer): Construct a BigQuery client object. + # client = bigquery.Client() + + # TODO(developer): Set table_id to the ID of the table to create. + # table_id = "your-project.your_dataset.your_table_name" + + schema = [ + bigquery.SchemaField("full_name", "STRING"), + bigquery.SchemaField("city", "STRING"), + bigquery.SchemaField("zipcode", "INTEGER"), + ] + + table = bigquery.Table(table_id, schema=schema) + table.range_partitioning = bigquery.RangePartitioning( + # To use integer range partitioning, select a top-level REQUIRED / + # NULLABLE column with INTEGER / INT64 data type. + field="zipcode", + range_=bigquery.PartitionRange(start=0, end=100000, interval=10), + ) + table = client.create_table(table) # Make an API request. + print( + "Created table {}.{}.{}".format(table.project, table.dataset_id, table.table_id) + ) + # [END bigquery_create_table_range_partitioned] + return table diff --git a/packages/google-cloud-bigquery/samples/tests/test_create_table_range_partitioned.py b/packages/google-cloud-bigquery/samples/tests/test_create_table_range_partitioned.py new file mode 100644 index 000000000000..ca186f9a7554 --- /dev/null +++ b/packages/google-cloud-bigquery/samples/tests/test_create_table_range_partitioned.py @@ -0,0 +1,28 @@ +# Copyright 2019 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +from .. import create_table_range_partitioned + + +def test_create_table_range_partitioned(capsys, client, random_table_id): + table = create_table_range_partitioned.create_table_range_partitioned( + client, random_table_id + ) + out, _ = capsys.readouterr() + assert "Created table {}".format(random_table_id) in out + assert table.range_partitioning.field == "zipcode" + assert table.range_partitioning.range_.start == 0 + assert table.range_partitioning.range_.end == 100000 + assert table.range_partitioning.range_.interval == 10 From f47c906433b14ad9b1c1cc1f10e18d5907aa12b6 Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Fri, 8 Nov 2019 16:47:59 -0800 Subject: [PATCH 0703/2016] chore(bigquery): fix undelete table system test to use milliseconds in snapshot decorator (#9649) This fixes a flakey system test, where sometimes we sent an invalid timestamp as a snapshot to copy from. --- .../google-cloud-bigquery/docs/snippets.py | 64 ------------------ .../docs/usage/tables.rst | 12 ++++ .../samples/tests/conftest.py | 16 +++++ .../samples/tests/test_undelete_table.py | 26 +++++++ .../samples/undelete_table.py | 67 +++++++++++++++++++ 5 files changed, 121 insertions(+), 64 deletions(-) create mode 100644 packages/google-cloud-bigquery/samples/tests/test_undelete_table.py create mode 100644 packages/google-cloud-bigquery/samples/undelete_table.py diff --git a/packages/google-cloud-bigquery/docs/snippets.py b/packages/google-cloud-bigquery/docs/snippets.py index 83795460a955..4d7ad7506b6a 100644 --- a/packages/google-cloud-bigquery/docs/snippets.py +++ b/packages/google-cloud-bigquery/docs/snippets.py @@ -40,7 +40,6 @@ except (ImportError, AttributeError): pyarrow = None -from google.api_core import datetime_helpers from google.api_core.exceptions import InternalServerError from google.api_core.exceptions import ServiceUnavailable from google.api_core.exceptions import TooManyRequests @@ -1428,69 +1427,6 @@ def test_extract_table_compressed(client, to_delete): to_delete.insert(0, blob) -def test_undelete_table(client, to_delete): - dataset_id = "undelete_table_dataset_{}".format(_millis()) - table_id = "undelete_table_table_{}".format(_millis()) - dataset = bigquery.Dataset(client.dataset(dataset_id)) - dataset.location = "US" - dataset = client.create_dataset(dataset) - to_delete.append(dataset) - - table = bigquery.Table(dataset.table(table_id), schema=SCHEMA) - client.create_table(table) - - # [START bigquery_undelete_table] - # TODO(developer): Uncomment the lines below and replace with your values. - # import time - # from google.cloud import bigquery - # client = bigquery.Client() - # dataset_id = 'my_dataset' # Replace with your dataset ID. - # table_id = 'my_table' # Replace with your table ID. - - table_ref = client.dataset(dataset_id).table(table_id) - - # TODO(developer): Choose an appropriate snapshot point as epoch - # milliseconds. For this example, we choose the current time as we're about - # to delete the table immediately afterwards. - snapshot_epoch = int(time.time() * 1000) - # [END bigquery_undelete_table] - - # Due to very short lifecycle of the table, ensure we're not picking a time - # prior to the table creation due to time drift between backend and client. - table = client.get_table(table_ref) - created_epoch = datetime_helpers.to_microseconds(table.created) - if created_epoch > snapshot_epoch: - snapshot_epoch = created_epoch - - # [START bigquery_undelete_table] - - # "Accidentally" delete the table. - client.delete_table(table_ref) # API request - - # Construct the restore-from table ID using a snapshot decorator. - snapshot_table_id = "{}@{}".format(table_id, snapshot_epoch) - source_table_ref = client.dataset(dataset_id).table(snapshot_table_id) - - # Choose a new table ID for the recovered table data. - recovered_table_id = "{}_recovered".format(table_id) - dest_table_ref = client.dataset(dataset_id).table(recovered_table_id) - - # Construct and run a copy job. - job = client.copy_table( - source_table_ref, - dest_table_ref, - # Location must match that of the source and destination tables. - location="US", - ) # API request - - job.result() # Waits for job to complete. - - print( - "Copied data from deleted table {} to {}".format(table_id, recovered_table_id) - ) - # [END bigquery_undelete_table] - - def test_client_query_legacy_sql(client): """Run a query with Legacy SQL explicitly set""" # [START bigquery_query_legacy] diff --git a/packages/google-cloud-bigquery/docs/usage/tables.rst b/packages/google-cloud-bigquery/docs/usage/tables.rst index 20ed79a969f2..b6f8dbdde646 100644 --- a/packages/google-cloud-bigquery/docs/usage/tables.rst +++ b/packages/google-cloud-bigquery/docs/usage/tables.rst @@ -186,3 +186,15 @@ Delete a table with the :dedent: 4 :start-after: [START bigquery_delete_table] :end-before: [END bigquery_delete_table] + +Restoring a Deleted Table +^^^^^^^^^^^^^^^^^^^^^^^^^ + +Restore a deleted table from a snapshot by using the +:func:`~google.cloud.bigquery.client.Client.copy_table` method: + +.. literalinclude:: ../samples/undelete_table.py + :language: python + :dedent: 4 + :start-after: [START bigquery_undelete_table] + :end-before: [END bigquery_undelete_table] diff --git a/packages/google-cloud-bigquery/samples/tests/conftest.py b/packages/google-cloud-bigquery/samples/tests/conftest.py index 32b23931aa91..a06bb9c90d1d 100644 --- a/packages/google-cloud-bigquery/samples/tests/conftest.py +++ b/packages/google-cloud-bigquery/samples/tests/conftest.py @@ -78,6 +78,22 @@ def table_id(client, dataset_id): client.delete_table(table, not_found_ok=True) +@pytest.fixture +def table_with_schema_id(client, dataset_id): + now = datetime.datetime.now() + table_id = "python_table_with_schema_{}_{}".format( + now.strftime("%Y%m%d%H%M%S"), uuid.uuid4().hex[:8] + ) + schema = [ + bigquery.SchemaField("full_name", "STRING"), + bigquery.SchemaField("age", "INTEGER"), + ] + table = bigquery.Table("{}.{}".format(dataset_id, table_id), schema=schema) + table = client.create_table(table) + yield "{}.{}.{}".format(table.project, table.dataset_id, table.table_id) + client.delete_table(table, not_found_ok=True) + + @pytest.fixture def table_with_data_id(client): return "bigquery-public-data.samples.shakespeare" diff --git a/packages/google-cloud-bigquery/samples/tests/test_undelete_table.py b/packages/google-cloud-bigquery/samples/tests/test_undelete_table.py new file mode 100644 index 000000000000..8fd221a39b30 --- /dev/null +++ b/packages/google-cloud-bigquery/samples/tests/test_undelete_table.py @@ -0,0 +1,26 @@ +# Copyright 2019 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from .. import undelete_table + + +def test_undelete_table(capsys, client, table_with_schema_id, random_table_id): + undelete_table.undelete_table(client, table_with_schema_id, random_table_id) + out, _ = capsys.readouterr() + assert ( + "Copied data from deleted table {} to {}".format( + table_with_schema_id, random_table_id + ) + in out + ) diff --git a/packages/google-cloud-bigquery/samples/undelete_table.py b/packages/google-cloud-bigquery/samples/undelete_table.py new file mode 100644 index 000000000000..2d544cf5aa8c --- /dev/null +++ b/packages/google-cloud-bigquery/samples/undelete_table.py @@ -0,0 +1,67 @@ +# Copyright 2019 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from google.api_core import datetime_helpers + + +def undelete_table(client, table_id, recovered_table_id): + # [START bigquery_undelete_table] + import time + + # TODO(developer): Import the client library. + # from google.cloud import bigquery + + # TODO(developer): Construct a BigQuery client object. + # client = bigquery.Client() + + # TODO(developer): Choose a table to recover. + # table_id = "your-project.your_dataset.your_table" + + # TODO(developer): Choose a new table ID for the recovered table data. + # recovery_table_id = "your-project.your_dataset.your_table_recovered" + + # TODO(developer): Choose an appropriate snapshot point as epoch + # milliseconds. For this example, we choose the current time as we're about + # to delete the table immediately afterwards. + snapshot_epoch = int(time.time() * 1000) + + # [START_EXCLUDE] + # Due to very short lifecycle of the table, ensure we're not picking a time + # prior to the table creation due to time drift between backend and client. + table = client.get_table(table_id) + created_epoch = datetime_helpers.to_milliseconds(table.created) + if created_epoch > snapshot_epoch: + snapshot_epoch = created_epoch + # [END_EXCLUDE] + + # "Accidentally" delete the table. + client.delete_table(table_id) # API request + + # Construct the restore-from table ID using a snapshot decorator. + snapshot_table_id = "{}@{}".format(table_id, snapshot_epoch) + + # Construct and run a copy job. + job = client.copy_table( + snapshot_table_id, + recovered_table_id, + # Location must match that of the source and destination tables. + location="US", + ) # API request + + job.result() # Wait for job to complete. + + print( + "Copied data from deleted table {} to {}".format(table_id, recovered_table_id) + ) + # [END bigquery_undelete_table] From ae791930b475bff102bd981c8732c1cd669ef17b Mon Sep 17 00:00:00 2001 From: Bu Sun Kim <8822365+busunkim96@users.noreply.github.com> Date: Mon, 11 Nov 2019 15:15:32 -0800 Subject: [PATCH 0704/2016] docs: add python 2 sunset banner to documentation (#9036) --- .../docs/_static/custom.css | 4 ++ .../docs/_templates/layout.html | 49 +++++++++++++++++++ packages/google-cloud-bigquery/docs/conf.py | 2 +- 3 files changed, 54 insertions(+), 1 deletion(-) create mode 100644 packages/google-cloud-bigquery/docs/_static/custom.css create mode 100644 packages/google-cloud-bigquery/docs/_templates/layout.html diff --git a/packages/google-cloud-bigquery/docs/_static/custom.css b/packages/google-cloud-bigquery/docs/_static/custom.css new file mode 100644 index 000000000000..9a6f9f8ddc3a --- /dev/null +++ b/packages/google-cloud-bigquery/docs/_static/custom.css @@ -0,0 +1,4 @@ +div#python2-eol { + border-color: red; + border-width: medium; +} \ No newline at end of file diff --git a/packages/google-cloud-bigquery/docs/_templates/layout.html b/packages/google-cloud-bigquery/docs/_templates/layout.html new file mode 100644 index 000000000000..de457b2c2767 --- /dev/null +++ b/packages/google-cloud-bigquery/docs/_templates/layout.html @@ -0,0 +1,49 @@ +{% extends "!layout.html" %} +{%- block content %} +{%- if theme_fixed_sidebar|lower == 'true' %} +
+ {{ sidebar() }} + {%- block document %} +
+ {%- if render_sidebar %} +
+ {%- endif %} + + {%- block relbar_top %} + {%- if theme_show_relbar_top|tobool %} + + {%- endif %} + {% endblock %} + +
+
+ On January 1, 2020 this library will no longer support Python 2 on the latest released version. + Previously released library versions will continue to be available. For more information please + visit Python 2 support on Google Cloud. +
+ {% block body %} {% endblock %} +
+ + {%- block relbar_bottom %} + {%- if theme_show_relbar_bottom|tobool %} + + {%- endif %} + {% endblock %} + + {%- if render_sidebar %} +
+ {%- endif %} +
+ {%- endblock %} +
+
+{%- else %} +{{ super() }} +{%- endif %} +{%- endblock %} diff --git a/packages/google-cloud-bigquery/docs/conf.py b/packages/google-cloud-bigquery/docs/conf.py index b3cfa267dd7d..1b83501d1417 100644 --- a/packages/google-cloud-bigquery/docs/conf.py +++ b/packages/google-cloud-bigquery/docs/conf.py @@ -168,7 +168,7 @@ # Add any paths that contain custom static files (such as style sheets) here, # relative to this directory. They are copied after the builtin static files, # so a file named "default.css" will overwrite the builtin "default.css". -# html_static_path = [] +html_static_path = ["_static"] # Add any extra paths that contain custom files (such as robots.txt or # .htaccess) here, relative to this directory. These files are copied From 8758142ac5ebc7d684fb3efc5b169fe063397819 Mon Sep 17 00:00:00 2001 From: Peter Lamut Date: Tue, 12 Nov 2019 19:24:46 +0200 Subject: [PATCH 0705/2016] chore(bigquery): fix link anchors in external config docstrings (#9627) --- .../google/cloud/bigquery/external_config.py | 85 +++++++------------ 1 file changed, 29 insertions(+), 56 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/external_config.py b/packages/google-cloud-bigquery/google/cloud/bigquery/external_config.py index c637d37d185c..8355c0f0940d 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/external_config.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/external_config.py @@ -68,8 +68,7 @@ def encoding(self): """str: The encoding of the values when the type is not `STRING` See - https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query.tableDefinitions.%28key%29.bigtableOptions.columnFamilies.columns.encoding - https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#externalDataConfiguration.bigtableOptions.columnFamilies.columns.encoding + https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#BigtableColumn.FIELDS.encoding """ return self._properties.get("encoding") @@ -83,8 +82,7 @@ def field_name(self): field identifier See - https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query.tableDefinitions.%28key%29.bigtableOptions.columnFamilies.columns.fieldName - https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#externalDataConfiguration.bigtableOptions.columnFamilies.columns.fieldName + https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#BigtableColumn.FIELDS.field_name """ return self._properties.get("fieldName") @@ -98,8 +96,7 @@ def only_read_latest(self): column are exposed. See - https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query.tableDefinitions.%28key%29.bigtableOptions.columnFamilies.columns.onlyReadLatest - https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#externalDataConfiguration.bigtableOptions.columnFamilies.columns.onlyReadLatest + https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#BigtableColumn.FIELDS.only_read_latest """ return self._properties.get("onlyReadLatest") @@ -115,8 +112,7 @@ def qualifier_encoded(self): will handle base64 encoding for you. See - https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query.tableDefinitions.%28key%29.bigtableOptions.columnFamilies.columns.qualifierEncoded - https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#externalDataConfiguration.bigtableOptions.columnFamilies.columns.qualifierEncoded + https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#BigtableColumn.FIELDS.qualifier_encoded """ prop = self._properties.get("qualifierEncoded") if prop is None: @@ -132,8 +128,7 @@ def qualifier_string(self): """str: A valid UTF-8 string qualifier See - https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query.tableDefinitions.%28key%29.bigtableOptions.columnFamilies.columns.qualifierEncoded - https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#externalDataConfiguration.bigtableOptions.columnFamilies.columns.qualifierEncoded + https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#BigtableColumn.FIELDS.qualifier_string """ return self._properties.get("qualifierString") @@ -146,8 +141,7 @@ def type_(self): """str: The type to convert the value in cells of this column. See - https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query.tableDefinitions.%28key%29.bigtableOptions.columnFamilies.columns.type - https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#externalDataConfiguration.bigtableOptions.columnFamilies.columns.type + https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#BigtableColumn.FIELDS.type """ return self._properties.get("type") @@ -194,8 +188,7 @@ def encoding(self): """str: The encoding of the values when the type is not `STRING` See - https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query.tableDefinitions.(key).bigtableOptions.columnFamilies.encoding - https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#externalDataConfiguration.bigtableOptions.columnFamilies.encoding + https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#BigtableColumnFamily.FIELDS.encoding """ return self._properties.get("encoding") @@ -208,8 +201,7 @@ def family_id(self): """str: Identifier of the column family. See - https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query.tableDefinitions.(key).bigtableOptions.columnFamilies.familyId - https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#externalDataConfiguration.bigtableOptions.columnFamilies.familyId + https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#BigtableColumnFamily.FIELDS.family_id """ return self._properties.get("familyId") @@ -223,8 +215,7 @@ def only_read_latest(self): for all columns in this column family. See - https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query.tableDefinitions.(key).bigtableOptions.columnFamilies.onlyReadLatest - https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#externalDataConfiguration.bigtableOptions.columnFamilies.onlyReadLatest + https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#BigtableColumnFamily.FIELDS.only_read_latest """ return self._properties.get("onlyReadLatest") @@ -237,8 +228,7 @@ def type_(self): """str: The type to convert the value in cells of this column family. See - https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query.tableDefinitions.(key).bigtableOptions.columnFamilies.type - https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#externalDataConfiguration.bigtableOptions.columnFamilies.type + https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#BigtableColumnFamily.FIELDS.type """ return self._properties.get("type") @@ -252,8 +242,7 @@ def columns(self): that should be exposed as individual fields. See - https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query.tableDefinitions.(key).bigtableOptions.columnFamilies.columns - https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#externalDataConfiguration.bigtableOptions.columnFamilies.columns + https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#BigtableColumnFamily.FIELDS.columns """ prop = self._properties.get("columns", []) return [BigtableColumn.from_api_repr(col) for col in prop] @@ -307,8 +296,7 @@ def ignore_unspecified_column_families(self): :attr:`column_families` list. Defaults to :data:`False`. See - https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query.tableDefinitions.(key).bigtableOptions.ignoreUnspecifiedColumnFamilies - https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#externalDataConfiguration.bigtableOptions.ignoreUnspecifiedColumnFamilies + https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#BigtableOptions.FIELDS.ignore_unspecified_column_families """ return self._properties.get("ignoreUnspecifiedColumnFamilies") @@ -322,8 +310,7 @@ def read_rowkey_as_string(self): converted to string. Defaults to :data:`False`. See - https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query.tableDefinitions.(key).bigtableOptions.readRowkeyAsString - https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#externalDataConfiguration.bigtableOptions.readRowkeyAsString + https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#BigtableOptions.FIELDS.read_rowkey_as_string """ return self._properties.get("readRowkeyAsString") @@ -337,8 +324,7 @@ def column_families(self): column families to expose in the table schema along with their types. See - https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query.tableDefinitions.(key).bigtableOptions.columnFamilies - https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#externalDataConfiguration.bigtableOptions.columnFamilies + https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#BigtableOptions.FIELDS.column_families """ prop = self._properties.get("columnFamilies", []) return [BigtableColumnFamily.from_api_repr(cf) for cf in prop] @@ -390,8 +376,7 @@ def allow_jagged_rows(self): null values. Defaults to :data:`False`. See - https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query.tableDefinitions.(key).csvOptions.allowJaggedRows - https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#externalDataConfiguration.csvOptions.allowJaggedRows + https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#CsvOptions.FIELDS.allow_jagged_rows """ return self._properties.get("allowJaggedRows") @@ -405,8 +390,7 @@ def allow_quoted_newlines(self): characters in a CSV file are allowed. Defaults to :data:`False`. See - https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query.tableDefinitions.(key).csvOptions.allowQuotedNewlines - https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#externalDataConfiguration.csvOptions.allowQuotedNewlines + https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#CsvOptions.FIELDS.allow_quoted_newlines """ return self._properties.get("allowQuotedNewlines") @@ -419,8 +403,7 @@ def encoding(self): """str: The character encoding of the data. See - https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query.tableDefinitions.(key).csvOptions.encoding - https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#externalDataConfiguration.csvOptions.encoding + https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#CsvOptions.FIELDS.encoding """ return self._properties.get("encoding") @@ -433,8 +416,7 @@ def field_delimiter(self): """str: The separator for fields in a CSV file. Defaults to comma (','). See - https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query.tableDefinitions.(key).csvOptions.fieldDelimiter - https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#externalDataConfiguration.csvOptions.fieldDelimiter + https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#CsvOptions.FIELDS.field_delimiter """ return self._properties.get("fieldDelimiter") @@ -447,8 +429,7 @@ def quote_character(self): """str: The value that is used to quote data sections in a CSV file. See - https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query.tableDefinitions.(key).csvOptions.quote - https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#externalDataConfiguration.csvOptions.quote + https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#CsvOptions.FIELDS.quote """ return self._properties.get("quote") @@ -461,8 +442,7 @@ def skip_leading_rows(self): """int: The number of rows at the top of a CSV file. See - https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query.tableDefinitions.(key).csvOptions.skipLeadingRows - https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#externalDataConfiguration.csvOptions.skipLeadingRows + https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#CsvOptions.FIELDS.skip_leading_rows """ return _int_or_none(self._properties.get("skipLeadingRows")) @@ -512,8 +492,7 @@ def skip_leading_rows(self): skip when reading the data. See - https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query.tableDefinitions.(key).googleSheetsOptions.skipLeadingRows - https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#externalDataConfiguration.googleSheetsOptions.skipLeadingRows + https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#GoogleSheetsOptions.FIELDS.skip_leading_rows """ return _int_or_none(self._properties.get("skipLeadingRows")) @@ -526,7 +505,7 @@ def range(self): """str: The range of a sheet that BigQuery will query from. See - https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#GoogleSheetsOptions + https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#GoogleSheetsOptions.FIELDS.range """ return _str_or_none(self._properties.get("range")) @@ -586,7 +565,7 @@ def source_format(self): Format of external source. See - https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#externalDataConfiguration.sourceFormat + https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#ExternalDataConfiguration.FIELDS.source_format """ return self._properties["sourceFormat"] @@ -601,8 +580,7 @@ def autodetect(self): automatically. See - https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query.tableDefinitions.(key).autodetect - https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#externalDataConfiguration.autodetect + https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#ExternalDataConfiguration.FIELDS.autodetect """ return self._properties.get("autodetect") @@ -615,8 +593,7 @@ def compression(self): """str: The compression type of the data source. See - https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query.tableDefinitions.(key).compression - https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#externalDataConfiguration.compression + https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#ExternalDataConfiguration.FIELDS.compression """ return self._properties.get("compression") @@ -630,8 +607,7 @@ def ignore_unknown_values(self): table schema are ignored. Defaults to :data:`False`. See - https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query.tableDefinitions.(key).ignoreUnknownValues - https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#externalDataConfiguration.ignoreUnknownValues + https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#ExternalDataConfiguration.FIELDS.ignore_unknown_values """ return self._properties.get("ignoreUnknownValues") @@ -645,8 +621,7 @@ def max_bad_records(self): reading data. See - https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query.tableDefinitions.(key).maxBadRecords - https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#externalDataConfiguration.maxBadRecords + https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#ExternalDataConfiguration.FIELDS.max_bad_records """ return self._properties.get("maxBadRecords") @@ -659,8 +634,7 @@ def source_uris(self): """List[str]: URIs that point to your data in Google Cloud. See - https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query.tableDefinitions.(key).sourceUris - https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#externalDataConfiguration.sourceUris + https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#ExternalDataConfiguration.FIELDS.source_uris """ return self._properties.get("sourceUris", []) @@ -674,8 +648,7 @@ def schema(self): for the data. See - https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query.tableDefinitions.(key).schema - https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#externalDataConfiguration.schema + https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#ExternalDataConfiguration.FIELDS.schema """ prop = self._properties.get("schema", {}) return [SchemaField.from_api_repr(field) for field in prop.get("fields", [])] From ea91ff59217d4bcc4d53918ac31540da2d6616ac Mon Sep 17 00:00:00 2001 From: Peter Lamut Date: Wed, 13 Nov 2019 01:36:53 +0200 Subject: [PATCH 0706/2016] fix(bigquery): preserve job config passed to Client methods (#9735) This commit assures that Client's methods that accept job config as an argument operate on deep copies, and do not modify the original job config instances passed to them. --- .../google/cloud/bigquery/client.py | 12 +- .../tests/unit/test_client.py | 135 +++++++++++++++++- 2 files changed, 139 insertions(+), 8 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py index bae4359300f8..c8df21e91f55 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py @@ -187,7 +187,7 @@ def __init__( self._connection = Connection(self, **kw_args) self._location = location - self._default_query_job_config = default_query_job_config + self._default_query_job_config = copy.deepcopy(default_query_job_config) @property def location(self): @@ -1381,6 +1381,7 @@ def load_table_from_uri( destination = _table_arg_to_table_ref(destination, default_project=self.project) if job_config: + job_config = copy.deepcopy(job_config) _verify_job_config_type(job_config, google.cloud.bigquery.job.LoadJobConfig) load_job = job.LoadJob(job_ref, source_uris, destination, self, job_config) @@ -1465,6 +1466,7 @@ def load_table_from_file( destination = _table_arg_to_table_ref(destination, default_project=self.project) job_ref = job._JobReference(job_id, project=project, location=location) if job_config: + job_config = copy.deepcopy(job_config) _verify_job_config_type(job_config, google.cloud.bigquery.job.LoadJobConfig) load_job = job.LoadJob(job_ref, None, destination, self, job_config) job_resource = load_job.to_api_repr() @@ -1969,6 +1971,8 @@ def copy_table( if job_config: _verify_job_config_type(job_config, google.cloud.bigquery.job.CopyJobConfig) + job_config = copy.deepcopy(job_config) + copy_job = job.CopyJob( job_ref, sources, destination, client=self, job_config=job_config ) @@ -2049,6 +2053,8 @@ def extract_table( _verify_job_config_type( job_config, google.cloud.bigquery.job.ExtractJobConfig ) + job_config = copy.deepcopy(job_config) + extract_job = job.ExtractJob( job_ref, source, destination_uris, client=self, job_config=job_config ) @@ -2112,6 +2118,8 @@ def query( if location is None: location = self.location + job_config = copy.deepcopy(job_config) + if self._default_query_job_config: if job_config: _verify_job_config_type( @@ -2129,7 +2137,7 @@ def query( self._default_query_job_config, google.cloud.bigquery.job.QueryJobConfig, ) - job_config = self._default_query_job_config + job_config = copy.deepcopy(self._default_query_job_config) job_ref = job._JobReference(job_id, project=project, location=location) query_job = job.QueryJob(job_ref, query, client=self, job_config=job_config) diff --git a/packages/google-cloud-bigquery/tests/unit/test_client.py b/packages/google-cloud-bigquery/tests/unit/test_client.py index e6ed4d1c8072..da3fb2c56689 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_client.py +++ b/packages/google-cloud-bigquery/tests/unit/test_client.py @@ -2997,6 +2997,8 @@ def test_load_table_from_uri(self): creds = _make_credentials() http = object() job_config = LoadJobConfig() + original_config_copy = copy.deepcopy(job_config) + client = self._make_one(project=self.PROJECT, credentials=creds, _http=http) conn = client._connection = make_connection(RESOURCE) destination = client.dataset(self.DS_ID).table(DESTINATION) @@ -3010,6 +3012,9 @@ def test_load_table_from_uri(self): method="POST", path="/projects/%s/jobs" % self.PROJECT, data=RESOURCE ) + # the original config object should not have been modified + self.assertEqual(job_config.to_api_repr(), original_config_copy.to_api_repr()) + self.assertIsInstance(job, LoadJob) self.assertIsInstance(job._configuration, LoadJobConfig) self.assertIs(job._client, client) @@ -3496,19 +3501,24 @@ def test_copy_table_w_valid_job_config(self): creds = _make_credentials() http = object() client = self._make_one(project=self.PROJECT, credentials=creds, _http=http) - job_config = CopyJobConfig() conn = client._connection = make_connection(RESOURCE) dataset = client.dataset(self.DS_ID) source = dataset.table(SOURCE) destination = dataset.table(DESTINATION) + job_config = CopyJobConfig() + original_config_copy = copy.deepcopy(job_config) job = client.copy_table(source, destination, job_id=JOB, job_config=job_config) + # Check that copy_table actually starts the job. conn.api_request.assert_called_once_with( method="POST", path="/projects/%s/jobs" % self.PROJECT, data=RESOURCE ) self.assertIsInstance(job._configuration, CopyJobConfig) + # the original config object should not have been modified + assert job_config.to_api_repr() == original_config_copy.to_api_repr() + def test_extract_table(self): from google.cloud.bigquery.job import ExtractJob @@ -3679,6 +3689,7 @@ def test_extract_table_generated_job_id(self): source = dataset.table(SOURCE) job_config = ExtractJobConfig() job_config.destination_format = DestinationFormat.NEWLINE_DELIMITED_JSON + original_config_copy = copy.deepcopy(job_config) job = client.extract_table(source, DESTINATION, job_config=job_config) @@ -3695,6 +3706,9 @@ def test_extract_table_generated_job_id(self): self.assertEqual(job.source, source) self.assertEqual(list(job.destination_uris), [DESTINATION]) + # the original config object should not have been modified + assert job_config.to_api_repr() == original_config_copy.to_api_repr() + def test_extract_table_w_destination_uris(self): from google.cloud.bigquery.job import ExtractJob @@ -3840,6 +3854,7 @@ def test_query_w_explicit_job_config(self): job_config = QueryJobConfig() job_config.use_query_cache = True job_config.maximum_bytes_billed = 2000 + original_config_copy = copy.deepcopy(job_config) client.query( query, job_id=job_id, location=self.LOCATION, job_config=job_config @@ -3850,6 +3865,105 @@ def test_query_w_explicit_job_config(self): method="POST", path="/projects/PROJECT/jobs", data=resource ) + # the original config object should not have been modified + assert job_config.to_api_repr() == original_config_copy.to_api_repr() + + def test_query_preserving_explicit_job_config(self): + job_id = "some-job-id" + query = "select count(*) from persons" + resource = { + "jobReference": { + "jobId": job_id, + "projectId": self.PROJECT, + "location": self.LOCATION, + }, + "configuration": { + "query": { + "query": query, + "useLegacySql": False, + "useQueryCache": True, + "maximumBytesBilled": "2000", + } + }, + } + + creds = _make_credentials() + http = object() + + from google.cloud.bigquery import QueryJobConfig + + client = self._make_one(project=self.PROJECT, credentials=creds, _http=http,) + conn = client._connection = make_connection(resource) + + job_config = QueryJobConfig() + job_config.use_query_cache = True + job_config.maximum_bytes_billed = 2000 + original_config_copy = copy.deepcopy(job_config) + + client.query( + query, job_id=job_id, location=self.LOCATION, job_config=job_config + ) + + # Check that query actually starts the job. + conn.api_request.assert_called_once_with( + method="POST", path="/projects/PROJECT/jobs", data=resource + ) + + # the original config object should not have been modified + assert job_config.to_api_repr() == original_config_copy.to_api_repr() + + def test_query_preserving_explicit_default_job_config(self): + job_id = "some-job-id" + query = "select count(*) from persons" + resource = { + "jobReference": { + "jobId": job_id, + "projectId": self.PROJECT, + "location": self.LOCATION, + }, + "configuration": { + "query": { + "query": query, + "defaultDataset": { + "projectId": self.PROJECT, + "datasetId": "some-dataset", + }, + "useLegacySql": False, + "maximumBytesBilled": "1000", + } + }, + } + + creds = _make_credentials() + http = object() + + from google.cloud.bigquery import QueryJobConfig, DatasetReference + + default_job_config = QueryJobConfig() + default_job_config.default_dataset = DatasetReference( + self.PROJECT, "some-dataset" + ) + default_job_config.maximum_bytes_billed = 1000 + default_config_copy = copy.deepcopy(default_job_config) + + client = self._make_one( + project=self.PROJECT, + credentials=creds, + _http=http, + default_query_job_config=default_job_config, + ) + conn = client._connection = make_connection(resource) + + client.query(query, job_id=job_id, location=self.LOCATION, job_config=None) + + # Check that query actually starts the job. + conn.api_request.assert_called_once_with( + method="POST", path="/projects/PROJECT/jobs", data=resource + ) + + # the original default config object should not have been modified + assert default_job_config.to_api_repr() == default_config_copy.to_api_repr() + def test_query_w_invalid_job_config(self): from google.cloud.bigquery import QueryJobConfig, DatasetReference from google.cloud.bigquery import job @@ -5429,22 +5543,24 @@ def test_load_table_from_file_resumable(self): client = self._make_client() file_obj = self._make_file_obj() + job_config = self._make_config() + original_config_copy = copy.deepcopy(job_config) do_upload_patch = self._make_do_upload_patch( client, "_do_resumable_upload", self.EXPECTED_CONFIGURATION ) with do_upload_patch as do_upload: client.load_table_from_file( - file_obj, - self.TABLE_REF, - job_id="job_id", - job_config=self._make_config(), + file_obj, self.TABLE_REF, job_id="job_id", job_config=job_config, ) do_upload.assert_called_once_with( file_obj, self.EXPECTED_CONFIGURATION, _DEFAULT_NUM_RETRIES ) + # the original config object should not have been modified + assert job_config.to_api_repr() == original_config_copy.to_api_repr() + def test_load_table_from_file_w_explicit_project(self): from google.cloud.bigquery.client import _DEFAULT_NUM_RETRIES @@ -5790,6 +5906,7 @@ def test_load_table_from_dataframe_w_custom_job_config(self): job_config = job.LoadJobConfig( write_disposition=job.WriteDisposition.WRITE_TRUNCATE ) + original_config_copy = copy.deepcopy(job_config) get_table_patch = mock.patch( "google.cloud.bigquery.client.Client.get_table", @@ -5826,6 +5943,9 @@ def test_load_table_from_dataframe_w_custom_job_config(self): assert sent_config.source_format == job.SourceFormat.PARQUET assert sent_config.write_disposition == job.WriteDisposition.WRITE_TRUNCATE + # the original config object should not have been modified + assert job_config.to_api_repr() == original_config_copy.to_api_repr() + @unittest.skipIf(pandas is None, "Requires `pandas`") @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_load_table_from_dataframe_w_automatic_schema(self): @@ -6466,6 +6586,7 @@ def test_load_table_from_json_non_default_args(self): ] job_config = job.LoadJobConfig(schema=schema) job_config._properties["load"]["unknown_field"] = "foobar" + original_config_copy = copy.deepcopy(job_config) load_patch = mock.patch( "google.cloud.bigquery.client.Client.load_table_from_file", autospec=True @@ -6493,13 +6614,15 @@ def test_load_table_from_json_non_default_args(self): ) sent_config = load_table_from_file.mock_calls[0][2]["job_config"] - assert job_config.source_format is None # the original was not modified assert sent_config.source_format == job.SourceFormat.NEWLINE_DELIMITED_JSON assert sent_config.schema == schema assert not sent_config.autodetect # all properties should have been cloned and sent to the backend assert sent_config._properties.get("load", {}).get("unknown_field") == "foobar" + # the original config object should not have been modified + assert job_config.to_api_repr() == original_config_copy.to_api_repr() + def test_load_table_from_json_w_invalid_job_config(self): from google.cloud.bigquery import job From 2ba5876fa58790f9428aea5ba4d82089224d8db7 Mon Sep 17 00:00:00 2001 From: Tres Seaver Date: Wed, 13 Nov 2019 19:19:24 -0500 Subject: [PATCH 0707/2016] chore(bigquery): release 1.22.0 (#9788) --- packages/google-cloud-bigquery/CHANGELOG.md | 34 +++++++++++++++++++++ packages/google-cloud-bigquery/setup.py | 2 +- 2 files changed, 35 insertions(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/CHANGELOG.md b/packages/google-cloud-bigquery/CHANGELOG.md index 1560e456a24e..7b76ae652200 100644 --- a/packages/google-cloud-bigquery/CHANGELOG.md +++ b/packages/google-cloud-bigquery/CHANGELOG.md @@ -4,6 +4,40 @@ [1]: https://pypi.org/project/google-cloud-bigquery/#history +## 1.22.0 + +11-13-2019 12:23 PST + + +### Implementation Changes +- Preserve job config passed to Client methods. ([#9735](https://github.com/googleapis/google-cloud-python/pull/9735)) +- Use pyarrow fallback for improved schema detection. ([#9321](https://github.com/googleapis/google-cloud-python/pull/9321)) +- Add TypeError if wrong `job_config type` is passed to client job methods. ([#9506](https://github.com/googleapis/google-cloud-python/pull/9506)) +- Fix arrow deprecation warning. ([#9504](https://github.com/googleapis/google-cloud-python/pull/9504)) + +### New Features +- Add `--destination_table` parameter to IPython magic. ([#9599](https://github.com/googleapis/google-cloud-python/pull/9599)) +- Allow passing schema as a sequence of dicts. ([#9550](https://github.com/googleapis/google-cloud-python/pull/9550)) +- Implement defaultEncryptionConfiguration on datasets. ([#9489](https://github.com/googleapis/google-cloud-python/pull/9489)) +- Add range partitioning to tables, load jobs, and query jobs. ([#9477](https://github.com/googleapis/google-cloud-python/pull/9477)) + +### Dependencies +- Pin `google-resumable-media` to includ 0.5.x. ([#9572](https://github.com/googleapis/google-cloud-python/pull/9572)) + +### Documentation +- Fix link anchors in external config docstrings. ([#9627](https://github.com/googleapis/google-cloud-python/pull/9627)) +- Add python 2 sunset banner to documentation. ([#9036](https://github.com/googleapis/google-cloud-python/pull/9036)) +- Add table create sample using integer range partitioning. ([#9478](https://github.com/googleapis/google-cloud-python/pull/9478)) +- Document how to achieve higher write limit and add tests. ([#9574](https://github.com/googleapis/google-cloud-python/pull/9574)) +- Add code sample for scripting. ([#9537](https://github.com/googleapis/google-cloud-python/pull/9537)) +- Rewrite docs in Google style, part 2. ([#9481](https://github.com/googleapis/google-cloud-python/pull/9481)) +- Use multi-regional key path for CMEK in snippets. ([#9523](https://github.com/googleapis/google-cloud-python/pull/9523)) + +### Internal / Testing Changes +- Fix undelete table system test to use milliseconds in snapshot decorator. ([#9649](https://github.com/googleapis/google-cloud-python/pull/9649)) +- Format code with latest version of black. ([#9556](https://github.com/googleapis/google-cloud-python/pull/9556)) +- Remove duplicate test dependencies. ([#9503](https://github.com/googleapis/google-cloud-python/pull/9503)) + ## 1.21.0 10-16-2019 10:33 PDT diff --git a/packages/google-cloud-bigquery/setup.py b/packages/google-cloud-bigquery/setup.py index 8331d2c07114..45c99e7d9bfc 100644 --- a/packages/google-cloud-bigquery/setup.py +++ b/packages/google-cloud-bigquery/setup.py @@ -22,7 +22,7 @@ name = "google-cloud-bigquery" description = "Google BigQuery API client library" -version = "1.21.0" +version = "1.22.0" # Should be one of: # 'Development Status :: 3 - Alpha' # 'Development Status :: 4 - Beta' From f5d93c9a20c78912de6ab12b09110eb5774fe221 Mon Sep 17 00:00:00 2001 From: HemangChothani <50404902+HemangChothani@users.noreply.github.com> Date: Tue, 19 Nov 2019 06:46:57 +0530 Subject: [PATCH 0708/2016] feat(bigquery): add description for routine entities (#9785) * feat(bigquery): add description for routine entities * feat(bigquery): use the Optional shorthand instead of a Union with None --- .../google/cloud/bigquery/dataset.py | 2 +- .../google/cloud/bigquery/model.py | 2 +- .../google/cloud/bigquery/routine.py | 12 +++++++ .../tests/unit/routine/test_routine.py | 32 ++++++++++++++++++- 4 files changed, 45 insertions(+), 3 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/dataset.py b/packages/google-cloud-bigquery/google/cloud/bigquery/dataset.py index 754a2fa00d00..02664d87b153 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/dataset.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/dataset.py @@ -509,7 +509,7 @@ def default_table_expiration_ms(self, value): @property def description(self): - """Union[str, None]: Description of the dataset as set by the user + """Optional[str]: Description of the dataset as set by the user (defaults to :data:`None`). Raises: diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/model.py b/packages/google-cloud-bigquery/google/cloud/bigquery/model.py index 7bad752ea658..d39ec5f2f60c 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/model.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/model.py @@ -217,7 +217,7 @@ def expires(self, value): @property def description(self): - """Union[str, None]: Description of the model (defaults to + """Optional[str]: Description of the model (defaults to :data:`None`). """ return self._properties.get("description") diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/routine.py b/packages/google-cloud-bigquery/google/cloud/bigquery/routine.py index 044368e75108..e99d9c6fa162 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/routine.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/routine.py @@ -50,6 +50,7 @@ class Routine(object): "reference": "routineReference", "return_type": "returnType", "type_": "routineType", + "description": "description", } def __init__(self, routine_ref, **kwargs): @@ -239,6 +240,17 @@ def body(self): def body(self, value): self._properties[self._PROPERTY_TO_API_FIELD["body"]] = value + @property + def description(self): + """Optional[str]: Description of the routine (defaults to + :data:`None`). + """ + return self._properties.get(self._PROPERTY_TO_API_FIELD["description"]) + + @description.setter + def description(self, value): + self._properties[self._PROPERTY_TO_API_FIELD["description"]] = value + @classmethod def from_api_repr(cls, resource): """Factory: construct a routine given its API representation. diff --git a/packages/google-cloud-bigquery/tests/unit/routine/test_routine.py b/packages/google-cloud-bigquery/tests/unit/routine/test_routine.py index 02d4a2ee2883..02f703535227 100644 --- a/packages/google-cloud-bigquery/tests/unit/routine/test_routine.py +++ b/packages/google-cloud-bigquery/tests/unit/routine/test_routine.py @@ -73,6 +73,7 @@ def test_ctor_w_properties(target_class): type_kind=bigquery_v2.enums.StandardSqlDataType.TypeKind.INT64 ) type_ = "SCALAR_FUNCTION" + description = "A routine description." actual_routine = target_class( routine_id, @@ -81,6 +82,7 @@ def test_ctor_w_properties(target_class): language=language, return_type=return_type, type_=type_, + description=description, ) ref = RoutineReference.from_string(routine_id) @@ -90,6 +92,7 @@ def test_ctor_w_properties(target_class): assert actual_routine.language == language assert actual_routine.return_type == return_type assert actual_routine.type_ == type_ + assert actual_routine.description == description def test_from_api_repr(target_class): @@ -117,6 +120,7 @@ def test_from_api_repr(target_class): "returnType": {"typeKind": "INT64"}, "routineType": "SCALAR_FUNCTION", "someNewField": "someValue", + "description": "A routine description.", } actual_routine = target_class.from_api_repr(resource) @@ -148,6 +152,7 @@ def test_from_api_repr(target_class): ) assert actual_routine.type_ == "SCALAR_FUNCTION" assert actual_routine._properties["someNewField"] == "someValue" + assert actual_routine.description == "A routine description." def test_from_api_repr_w_minimal_resource(target_class): @@ -172,6 +177,7 @@ def test_from_api_repr_w_minimal_resource(target_class): assert actual_routine.language is None assert actual_routine.return_type is None assert actual_routine.type_ is None + assert actual_routine.description is None def test_from_api_repr_w_unknown_fields(target_class): @@ -202,6 +208,7 @@ def test_from_api_repr_w_unknown_fields(target_class): "language": "SQL", "returnType": {"typeKind": "INT64"}, "routineType": "SCALAR_FUNCTION", + "description": "A routine description.", }, ["arguments"], {"arguments": [{"name": "x", "dataType": {"typeKind": "INT64"}}]}, @@ -213,6 +220,7 @@ def test_from_api_repr_w_unknown_fields(target_class): "language": "SQL", "returnType": {"typeKind": "INT64"}, "routineType": "SCALAR_FUNCTION", + "description": "A routine description.", }, ["body"], {"definitionBody": "x * 3"}, @@ -224,6 +232,7 @@ def test_from_api_repr_w_unknown_fields(target_class): "language": "SQL", "returnType": {"typeKind": "INT64"}, "routineType": "SCALAR_FUNCTION", + "description": "A routine description.", }, ["language"], {"language": "SQL"}, @@ -235,6 +244,7 @@ def test_from_api_repr_w_unknown_fields(target_class): "language": "SQL", "returnType": {"typeKind": "INT64"}, "routineType": "SCALAR_FUNCTION", + "description": "A routine description.", }, ["return_type"], {"returnType": {"typeKind": "INT64"}}, @@ -246,19 +256,33 @@ def test_from_api_repr_w_unknown_fields(target_class): "language": "SQL", "returnType": {"typeKind": "INT64"}, "routineType": "SCALAR_FUNCTION", + "description": "A routine description.", }, ["type_"], {"routineType": "SCALAR_FUNCTION"}, ), + ( + { + "arguments": [{"name": "x", "dataType": {"typeKind": "INT64"}}], + "definitionBody": "x * 3", + "language": "SQL", + "returnType": {"typeKind": "INT64"}, + "routineType": "SCALAR_FUNCTION", + "description": "A routine description.", + }, + ["description"], + {"description": "A routine description."}, + ), ( {}, - ["arguments", "language", "body", "type_", "return_type"], + ["arguments", "language", "body", "type_", "return_type", "description"], { "arguments": None, "definitionBody": None, "language": None, "returnType": None, "routineType": None, + "description": None, }, ), ( @@ -299,6 +323,12 @@ def test_set_return_type_w_none(object_under_test): assert object_under_test._properties["returnType"] is None +def test_set_description_w_none(object_under_test): + object_under_test.description = None + assert object_under_test.description is None + assert object_under_test._properties["description"] is None + + def test_repr(target_class): model = target_class("my-proj.my_dset.my_routine") actual_routine = repr(model) From c9eca832e2fe60d3d69fe0ac04652d641bd5ec7a Mon Sep 17 00:00:00 2001 From: Peter Lamut Date: Tue, 19 Nov 2019 02:19:00 +0100 Subject: [PATCH 0709/2016] feat(bigquery): add support for hive partitioning options configuration (#9626) * feat(bigquery): add hive partitioning options to external config * Mark ExternalConfig.options property as optional * Support hive partitioning options in LoadJobConfig * Mark hive partitioning class and propertis as beta --- .../google/cloud/bigquery/external_config.py | 94 ++++++++++++++++++- .../google/cloud/bigquery/job.py | 28 ++++++ .../tests/unit/test_external_config.py | 52 ++++++++++ .../tests/unit/test_job.py | 40 ++++++++ 4 files changed, 213 insertions(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/external_config.py b/packages/google-cloud-bigquery/google/cloud/bigquery/external_config.py index 8355c0f0940d..d702d9d83302 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/external_config.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/external_config.py @@ -543,6 +543,76 @@ def from_api_repr(cls, resource): _OPTION_CLASSES = (BigtableOptions, CSVOptions, GoogleSheetsOptions) +class HivePartitioningOptions(object): + """[Beta] Options that configure hive partitioning. + + .. note:: + **Experimental**. This feature is experimental and might change or + have limited support. + + See + https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#HivePartitioningOptions + """ + + def __init__(self): + self._properties = {} + + @property + def mode(self): + """Optional[str]: When set, what mode of hive partitioning to use when reading data. + + Two modes are supported: "AUTO" and "STRINGS". + + See + https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#HivePartitioningOptions.FIELDS.mode + """ + return self._properties.get("mode") + + @mode.setter + def mode(self, value): + self._properties["mode"] = value + + @property + def source_uri_prefix(self): + """Optional[str]: When hive partition detection is requested, a common prefix for + all source URIs is required. + + See + https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#HivePartitioningOptions.FIELDS.source_uri_prefix + """ + return self._properties.get("sourceUriPrefix") + + @source_uri_prefix.setter + def source_uri_prefix(self, value): + self._properties["sourceUriPrefix"] = value + + def to_api_repr(self): + """Build an API representation of this object. + + Returns: + Dict[str, Any]: A dictionary in the format used by the BigQuery API. + """ + return copy.deepcopy(self._properties) + + @classmethod + def from_api_repr(cls, resource): + """Factory: construct a :class:`~.external_config.HivePartitioningOptions` + instance given its API representation. + + Args: + resource (Dict[str, Any]): + Definition of a :class:`~.external_config.HivePartitioningOptions` + instance in the same representation as is returned from the + API. + + Returns: + HivePartitioningOptions: Configuration parsed from ``resource``. + """ + config = cls() + config._properties = copy.deepcopy(resource) + return config + + class ExternalConfig(object): """Description of an external data source. @@ -571,7 +641,7 @@ def source_format(self): @property def options(self): - """Dict[str, Any]: Source-specific options.""" + """Optional[Dict[str, Any]]: Source-specific options.""" return self._options @property @@ -601,6 +671,28 @@ def compression(self): def compression(self, value): self._properties["compression"] = value + @property + def hive_partitioning(self): + """Optional[:class:`~.external_config.HivePartitioningOptions`]: [Beta] When set, \ + it configures hive partitioning support. + + .. note:: + **Experimental**. This feature is experimental and might change or + have limited support. + + See + https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#ExternalDataConfiguration.FIELDS.hive_partitioning_options + """ + prop = self._properties.get("hivePartitioningOptions") + if prop is None: + return None + return HivePartitioningOptions.from_api_repr(prop) + + @hive_partitioning.setter + def hive_partitioning(self, value): + prop = value.to_api_repr() if value is not None else None + self._properties["hivePartitioningOptions"] = prop + @property def ignore_unknown_values(self): """bool: If :data:`True`, extra values that are not represented in the diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/job.py b/packages/google-cloud-bigquery/google/cloud/bigquery/job.py index a8d797f4bef5..e150cc61ef79 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/job.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/job.py @@ -29,6 +29,7 @@ from google.cloud.bigquery.dataset import DatasetReference from google.cloud.bigquery.encryption_configuration import EncryptionConfiguration from google.cloud.bigquery.external_config import ExternalConfig +from google.cloud.bigquery.external_config import HivePartitioningOptions from google.cloud.bigquery import _helpers from google.cloud.bigquery.query import _query_param_from_api_repr from google.cloud.bigquery.query import ArrayQueryParameter @@ -1138,6 +1139,33 @@ def field_delimiter(self): def field_delimiter(self, value): self._set_sub_prop("fieldDelimiter", value) + @property + def hive_partitioning(self): + """Optional[:class:`~.external_config.HivePartitioningOptions`]: [Beta] When set, \ + it configures hive partitioning support. + + .. note:: + **Experimental**. This feature is experimental and might change or + have limited support. + + See + https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationLoad.FIELDS.hive_partitioning_options + """ + prop = self._get_sub_prop("hivePartitioningOptions") + if prop is None: + return None + return HivePartitioningOptions.from_api_repr(prop) + + @hive_partitioning.setter + def hive_partitioning(self, value): + if value is not None: + if isinstance(value, HivePartitioningOptions): + value = value.to_api_repr() + else: + raise TypeError("Expected a HivePartitioningOptions instance or None.") + + self._set_sub_prop("hivePartitioningOptions", value) + @property def ignore_unknown_values(self): """bool: Ignore extra values not represented in the table schema. diff --git a/packages/google-cloud-bigquery/tests/unit/test_external_config.py b/packages/google-cloud-bigquery/tests/unit/test_external_config.py index dab4391cbe04..6028d069bcbe 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_external_config.py +++ b/packages/google-cloud-bigquery/tests/unit/test_external_config.py @@ -173,6 +173,58 @@ def test_to_api_repr_sheets(self): self.assertEqual(got_resource, exp_resource) + def test_from_api_repr_hive_partitioning(self): + resource = _copy_and_update( + self.BASE_RESOURCE, + { + "sourceFormat": "FORMAT_FOO", + "hivePartitioningOptions": { + "sourceUriPrefix": "http://foo/bar", + "mode": "STRINGS", + }, + }, + ) + + ec = external_config.ExternalConfig.from_api_repr(resource) + + self._verify_base(ec) + self.assertEqual(ec.source_format, "FORMAT_FOO") + self.assertIsInstance( + ec.hive_partitioning, external_config.HivePartitioningOptions + ) + self.assertEqual(ec.hive_partitioning.source_uri_prefix, "http://foo/bar") + self.assertEqual(ec.hive_partitioning.mode, "STRINGS") + + # converting back to API representation should yield the same result + got_resource = ec.to_api_repr() + self.assertEqual(got_resource, resource) + + del resource["hivePartitioningOptions"] + ec = external_config.ExternalConfig.from_api_repr(resource) + self.assertIsNone(ec.hive_partitioning) + + got_resource = ec.to_api_repr() + self.assertEqual(got_resource, resource) + + def test_to_api_repr_hive_partitioning(self): + hive_partitioning = external_config.HivePartitioningOptions() + hive_partitioning.source_uri_prefix = "http://foo/bar" + hive_partitioning.mode = "STRINGS" + + ec = external_config.ExternalConfig("FORMAT_FOO") + ec.hive_partitioning = hive_partitioning + + got_resource = ec.to_api_repr() + + expected_resource = { + "sourceFormat": "FORMAT_FOO", + "hivePartitioningOptions": { + "sourceUriPrefix": "http://foo/bar", + "mode": "STRINGS", + }, + } + self.assertEqual(got_resource, expected_resource) + def test_from_api_repr_csv(self): resource = _copy_and_update( self.BASE_RESOURCE, diff --git a/packages/google-cloud-bigquery/tests/unit/test_job.py b/packages/google-cloud-bigquery/tests/unit/test_job.py index a2aeb5efbc4a..b7596e4db848 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_job.py +++ b/packages/google-cloud-bigquery/tests/unit/test_job.py @@ -1448,6 +1448,46 @@ def test_field_delimiter_setter(self): config.field_delimiter = field_delimiter self.assertEqual(config._properties["load"]["fieldDelimiter"], field_delimiter) + def test_hive_partitioning_missing(self): + config = self._get_target_class()() + self.assertIsNone(config.hive_partitioning) + + def test_hive_partitioning_hit(self): + from google.cloud.bigquery.external_config import HivePartitioningOptions + + config = self._get_target_class()() + config._properties["load"]["hivePartitioningOptions"] = { + "sourceUriPrefix": "http://foo/bar", + "mode": "STRINGS", + } + result = config.hive_partitioning + self.assertIsInstance(result, HivePartitioningOptions) + self.assertEqual(result.source_uri_prefix, "http://foo/bar") + self.assertEqual(result.mode, "STRINGS") + + def test_hive_partitioning_setter(self): + from google.cloud.bigquery.external_config import HivePartitioningOptions + + hive_partitioning = HivePartitioningOptions() + hive_partitioning.source_uri_prefix = "http://foo/bar" + hive_partitioning.mode = "AUTO" + + config = self._get_target_class()() + config.hive_partitioning = hive_partitioning + self.assertEqual( + config._properties["load"]["hivePartitioningOptions"], + {"sourceUriPrefix": "http://foo/bar", "mode": "AUTO"}, + ) + + config.hive_partitioning = None + self.assertIsNone(config._properties["load"]["hivePartitioningOptions"]) + + def test_hive_partitioning_invalid_type(self): + config = self._get_target_class()() + + with self.assertRaises(TypeError): + config.hive_partitioning = {"mode": "AUTO"} + def test_ignore_unknown_values_missing(self): config = self._get_target_class()() self.assertIsNone(config.ignore_unknown_values) From 9579fc0a46bf1bd5066afd056bc0fd9435d054fc Mon Sep 17 00:00:00 2001 From: HemangChothani <50404902+HemangChothani@users.noreply.github.com> Date: Wed, 20 Nov 2019 05:59:43 +0530 Subject: [PATCH 0710/2016] feat(bigquery): add support of use_avro_logical_types for extract jobs (#9642) * feat(bigquery): add support of use_avro_logical_types for extract jobs * feat(bigquery): cosmetic change * feat(bigquery): docstring change of use_avro_logical_types in LoadJobConfig class --- .../google/cloud/bigquery/job.py | 14 +++++++++++++- .../google-cloud-bigquery/tests/unit/test_job.py | 4 ++++ 2 files changed, 17 insertions(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/job.py b/packages/google-cloud-bigquery/google/cloud/bigquery/job.py index e150cc61ef79..f0312b0d4219 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/job.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/job.py @@ -1345,7 +1345,7 @@ def time_partitioning(self, value): @property def use_avro_logical_types(self): """bool: For loads of Avro data, governs whether Avro logical types are - converted to their corresponding BigQuery types(e.g. TIMESTAMP) rather than + converted to their corresponding BigQuery types (e.g. TIMESTAMP) rather than raw types (e.g. INTEGER). """ return self._get_sub_prop("useAvroLogicalTypes") @@ -1938,6 +1938,18 @@ def print_header(self): def print_header(self, value): self._set_sub_prop("printHeader", value) + @property + def use_avro_logical_types(self): + """bool: For loads of Avro data, governs whether Avro logical types are + converted to their corresponding BigQuery types (e.g. TIMESTAMP) rather than + raw types (e.g. INTEGER). + """ + return self._get_sub_prop("useAvroLogicalTypes") + + @use_avro_logical_types.setter + def use_avro_logical_types(self, value): + self._set_sub_prop("useAvroLogicalTypes", bool(value)) + class ExtractJob(_AsyncJob): """Asynchronous job: extract data from a table into Cloud Storage. diff --git a/packages/google-cloud-bigquery/tests/unit/test_job.py b/packages/google-cloud-bigquery/tests/unit/test_job.py index b7596e4db848..52e2abf8f304 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_job.py +++ b/packages/google-cloud-bigquery/tests/unit/test_job.py @@ -3036,6 +3036,7 @@ def test_to_api_repr(self): config.field_delimiter = "ignored for avro" config.print_header = False config._properties["extract"]["someNewField"] = "some-value" + config.use_avro_logical_types = True resource = config.to_api_repr() self.assertEqual( resource, @@ -3046,6 +3047,7 @@ def test_to_api_repr(self): "fieldDelimiter": "ignored for avro", "printHeader": False, "someNewField": "some-value", + "useAvroLogicalTypes": True, } }, ) @@ -3060,6 +3062,7 @@ def test_from_api_repr(self): "fieldDelimiter": "\t", "printHeader": True, "someNewField": "some-value", + "useAvroLogicalTypes": False, } } ) @@ -3068,6 +3071,7 @@ def test_from_api_repr(self): self.assertEqual(config.field_delimiter, "\t") self.assertEqual(config.print_header, True) self.assertEqual(config._properties["extract"]["someNewField"], "some-value") + self.assertEqual(config.use_avro_logical_types, False) class TestExtractJob(unittest.TestCase, _Base): From 7c7fb8f6d84fd5c093d79f6ac079fee809c6ee28 Mon Sep 17 00:00:00 2001 From: Alex Ford <30623923+alextford11@users.noreply.github.com> Date: Thu, 21 Nov 2019 20:50:34 +0000 Subject: [PATCH 0711/2016] fix(bigquery): import Mapping from collections.abc not from collections (#9826) * fix(bigquery): import Mapping from collections.abc not from collections * fix(bigquery): importing module not class * Use collections_abc import from six. --- .../google-cloud-bigquery/google/cloud/bigquery/schema.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/schema.py b/packages/google-cloud-bigquery/google/cloud/bigquery/schema.py index d766cb542608..3878a80a9f94 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/schema.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/schema.py @@ -14,7 +14,7 @@ """Schemas for BigQuery tables / queries.""" -import collections +from six.moves import collections_abc from google.cloud.bigquery_v2 import types @@ -281,7 +281,7 @@ def _to_schema_fields(schema): instance or a compatible mapping representation of the field. """ for field in schema: - if not isinstance(field, (SchemaField, collections.Mapping)): + if not isinstance(field, (SchemaField, collections_abc.Mapping)): raise ValueError( "Schema items must either be fields or compatible " "mapping representations." From 7d37ea8ff110791985aa0925302e3b04f5b53d99 Mon Sep 17 00:00:00 2001 From: Peter Lamut Date: Wed, 27 Nov 2019 22:55:43 +0100 Subject: [PATCH 0712/2016] fix(bigquery): add close() method to client for releasing open sockets (#9894) * Add close() method to Client * Add psutil as an extra test dependency * Fix open sockets leak in IPython magics * Move psutil test dependency to noxfile * Wrap entire cell magic into try-finally block A single common cleanup point at the end makes it much less likely to accidentally re-introduce an open socket leak. --- .../google/cloud/bigquery/client.py | 12 ++ .../google/cloud/bigquery/magics.py | 160 ++++++++++-------- packages/google-cloud-bigquery/noxfile.py | 2 +- .../google-cloud-bigquery/tests/system.py | 28 +++ .../tests/unit/test_client.py | 11 ++ .../tests/unit/test_magics.py | 67 ++++++++ 6 files changed, 211 insertions(+), 69 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py index c8df21e91f55..e6eaf5fcb3ba 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py @@ -194,6 +194,18 @@ def location(self): """Default location for jobs / datasets / tables.""" return self._location + def close(self): + """Close the underlying transport objects, releasing system resources. + + .. note:: + + The client instance can be used for making additional requests even + after closing, in which case the underlying connections are + automatically re-created. + """ + self._http._auth_request.session.close() + self._http.close() + def get_service_account_email(self, project=None): """Get the email address of the project's BigQuery service account diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/magics.py b/packages/google-cloud-bigquery/google/cloud/bigquery/magics.py index 59265ed6b0c5..5ca6817a99c6 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/magics.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/magics.py @@ -137,6 +137,7 @@ import re import ast +import functools import sys import time from concurrent import futures @@ -494,86 +495,91 @@ def _cell_magic(line, query): args.use_bqstorage_api or context.use_bqstorage_api, context.credentials ) - if args.max_results: - max_results = int(args.max_results) - else: - max_results = None + close_transports = functools.partial(_close_transports, client, bqstorage_client) - query = query.strip() + try: + if args.max_results: + max_results = int(args.max_results) + else: + max_results = None + + query = query.strip() + + # Any query that does not contain whitespace (aside from leading and trailing whitespace) + # is assumed to be a table id + if not re.search(r"\s", query): + try: + rows = client.list_rows(query, max_results=max_results) + except Exception as ex: + _handle_error(ex, args.destination_var) + return + + result = rows.to_dataframe(bqstorage_client=bqstorage_client) + if args.destination_var: + IPython.get_ipython().push({args.destination_var: result}) + return + else: + return result + + job_config = bigquery.job.QueryJobConfig() + job_config.query_parameters = params + job_config.use_legacy_sql = args.use_legacy_sql + job_config.dry_run = args.dry_run + + if args.destination_table: + split = args.destination_table.split(".") + if len(split) != 2: + raise ValueError( + "--destination_table should be in a . format." + ) + dataset_id, table_id = split + job_config.allow_large_results = True + dataset_ref = client.dataset(dataset_id) + destination_table_ref = dataset_ref.table(table_id) + job_config.destination = destination_table_ref + job_config.create_disposition = "CREATE_IF_NEEDED" + job_config.write_disposition = "WRITE_TRUNCATE" + _create_dataset_if_necessary(client, dataset_id) + + if args.maximum_bytes_billed == "None": + job_config.maximum_bytes_billed = 0 + elif args.maximum_bytes_billed is not None: + value = int(args.maximum_bytes_billed) + job_config.maximum_bytes_billed = value - # Any query that does not contain whitespace (aside from leading and trailing whitespace) - # is assumed to be a table id - if not re.search(r"\s", query): try: - rows = client.list_rows(query, max_results=max_results) + query_job = _run_query(client, query, job_config=job_config) except Exception as ex: _handle_error(ex, args.destination_var) return - result = rows.to_dataframe(bqstorage_client=bqstorage_client) - if args.destination_var: - IPython.get_ipython().push({args.destination_var: result}) - return - else: - return result - - job_config = bigquery.job.QueryJobConfig() - job_config.query_parameters = params - job_config.use_legacy_sql = args.use_legacy_sql - job_config.dry_run = args.dry_run + if not args.verbose: + display.clear_output() - if args.destination_table: - split = args.destination_table.split(".") - if len(split) != 2: - raise ValueError( - "--destination_table should be in a . format." + if args.dry_run and args.destination_var: + IPython.get_ipython().push({args.destination_var: query_job}) + return + elif args.dry_run: + print( + "Query validated. This query will process {} bytes.".format( + query_job.total_bytes_processed + ) ) - dataset_id, table_id = split - job_config.allow_large_results = True - dataset_ref = client.dataset(dataset_id) - destination_table_ref = dataset_ref.table(table_id) - job_config.destination = destination_table_ref - job_config.create_disposition = "CREATE_IF_NEEDED" - job_config.write_disposition = "WRITE_TRUNCATE" - _create_dataset_if_necessary(client, dataset_id) - - if args.maximum_bytes_billed == "None": - job_config.maximum_bytes_billed = 0 - elif args.maximum_bytes_billed is not None: - value = int(args.maximum_bytes_billed) - job_config.maximum_bytes_billed = value - - try: - query_job = _run_query(client, query, job_config=job_config) - except Exception as ex: - _handle_error(ex, args.destination_var) - return - - if not args.verbose: - display.clear_output() + return query_job - if args.dry_run and args.destination_var: - IPython.get_ipython().push({args.destination_var: query_job}) - return - elif args.dry_run: - print( - "Query validated. This query will process {} bytes.".format( - query_job.total_bytes_processed + if max_results: + result = query_job.result(max_results=max_results).to_dataframe( + bqstorage_client=bqstorage_client ) - ) - return query_job - - if max_results: - result = query_job.result(max_results=max_results).to_dataframe( - bqstorage_client=bqstorage_client - ) - else: - result = query_job.to_dataframe(bqstorage_client=bqstorage_client) + else: + result = query_job.to_dataframe(bqstorage_client=bqstorage_client) - if args.destination_var: - IPython.get_ipython().push({args.destination_var: result}) - else: - return result + if args.destination_var: + IPython.get_ipython().push({args.destination_var: result}) + else: + return result + finally: + close_transports() def _make_bqstorage_client(use_bqstorage_api, credentials): @@ -601,3 +607,21 @@ def _make_bqstorage_client(use_bqstorage_api, credentials): credentials=credentials, client_info=gapic_client_info.ClientInfo(user_agent=IPYTHON_USER_AGENT), ) + + +def _close_transports(client, bqstorage_client): + """Close the given clients' underlying transport channels. + + Closing the transport is needed to release system resources, namely open + sockets. + + Args: + client (:class:`~google.cloud.bigquery.client.Client`): + bqstorage_client + (Optional[:class:`~google.cloud.bigquery_storage_v1beta1.BigQueryStorageClient`]): + A client for the BigQuery Storage API. + + """ + client.close() + if bqstorage_client is not None: + bqstorage_client.transport.channel.close() diff --git a/packages/google-cloud-bigquery/noxfile.py b/packages/google-cloud-bigquery/noxfile.py index a6d8094ebbc3..87809b74a569 100644 --- a/packages/google-cloud-bigquery/noxfile.py +++ b/packages/google-cloud-bigquery/noxfile.py @@ -81,7 +81,7 @@ def system(session): session.install("--pre", "grpcio") # Install all test dependencies, then install local packages in place. - session.install("mock", "pytest") + session.install("mock", "pytest", "psutil") for local_dep in LOCAL_DEPS: session.install("-e", local_dep) session.install("-e", os.path.join("..", "storage")) diff --git a/packages/google-cloud-bigquery/tests/system.py b/packages/google-cloud-bigquery/tests/system.py index 4816962a70d6..bba527178f47 100644 --- a/packages/google-cloud-bigquery/tests/system.py +++ b/packages/google-cloud-bigquery/tests/system.py @@ -27,6 +27,7 @@ import re import six +import psutil import pytest import pytz @@ -203,6 +204,27 @@ def _create_bucket(self, bucket_name, location=None): return bucket + def test_close_releases_open_sockets(self): + current_process = psutil.Process() + conn_count_start = len(current_process.connections()) + + client = Config.CLIENT + client.query( + """ + SELECT + source_year AS year, COUNT(is_male) AS birth_count + FROM `bigquery-public-data.samples.natality` + GROUP BY year + ORDER BY year DESC + LIMIT 15 + """ + ) + + client.close() + + conn_count_end = len(current_process.connections()) + self.assertEqual(conn_count_end, conn_count_start) + def test_create_dataset(self): DATASET_ID = _make_dataset_id("create_dataset") dataset = self.temp_dataset(DATASET_ID) @@ -2417,6 +2439,9 @@ def temp_dataset(self, dataset_id, location=None): @pytest.mark.usefixtures("ipython_interactive") def test_bigquery_magic(): ip = IPython.get_ipython() + current_process = psutil.Process() + conn_count_start = len(current_process.connections()) + ip.extension_manager.load_extension("google.cloud.bigquery") sql = """ SELECT @@ -2432,6 +2457,8 @@ def test_bigquery_magic(): with io.capture_output() as captured: result = ip.run_cell_magic("bigquery", "", sql) + conn_count_end = len(current_process.connections()) + lines = re.split("\n|\r", captured.stdout) # Removes blanks & terminal code (result of display clearing) updates = list(filter(lambda x: bool(x) and x != "\x1b[2K", lines)) @@ -2441,6 +2468,7 @@ def test_bigquery_magic(): assert isinstance(result, pandas.DataFrame) assert len(result) == 10 # verify row count assert list(result) == ["url", "view_count"] # verify column names + assert conn_count_end == conn_count_start # system resources are released def _job_done(instance): diff --git a/packages/google-cloud-bigquery/tests/unit/test_client.py b/packages/google-cloud-bigquery/tests/unit/test_client.py index da3fb2c56689..ecde69d2cf97 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_client.py +++ b/packages/google-cloud-bigquery/tests/unit/test_client.py @@ -1398,6 +1398,17 @@ def test_create_table_alreadyexists_w_exists_ok_true(self): ] ) + def test_close(self): + creds = _make_credentials() + http = mock.Mock() + http._auth_request.session = mock.Mock() + client = self._make_one(project=self.PROJECT, credentials=creds, _http=http) + + client.close() + + http.close.assert_called_once() + http._auth_request.session.close.assert_called_once() + def test_get_model(self): path = "projects/%s/datasets/%s/models/%s" % ( self.PROJECT, diff --git a/packages/google-cloud-bigquery/tests/unit/test_magics.py b/packages/google-cloud-bigquery/tests/unit/test_magics.py index 6ff9819854a8..8e768c1b7d23 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_magics.py +++ b/packages/google-cloud-bigquery/tests/unit/test_magics.py @@ -545,6 +545,7 @@ def test_bigquery_magic_with_bqstorage_from_argument(monkeypatch): bqstorage_instance_mock = mock.create_autospec( bigquery_storage_v1beta1.BigQueryStorageClient, instance=True ) + bqstorage_instance_mock.transport = mock.Mock() bqstorage_mock.return_value = bqstorage_instance_mock bqstorage_client_patch = mock.patch( "google.cloud.bigquery_storage_v1beta1.BigQueryStorageClient", bqstorage_mock @@ -601,6 +602,7 @@ def test_bigquery_magic_with_bqstorage_from_context(monkeypatch): bqstorage_instance_mock = mock.create_autospec( bigquery_storage_v1beta1.BigQueryStorageClient, instance=True ) + bqstorage_instance_mock.transport = mock.Mock() bqstorage_mock.return_value = bqstorage_instance_mock bqstorage_client_patch = mock.patch( "google.cloud.bigquery_storage_v1beta1.BigQueryStorageClient", bqstorage_mock @@ -728,6 +730,41 @@ def test_bigquery_magic_w_max_results_valid_calls_queryjob_result(): query_job_mock.result.assert_called_with(max_results=5) +@pytest.mark.usefixtures("ipython_interactive") +def test_bigquery_magic_w_max_results_query_job_results_fails(): + ip = IPython.get_ipython() + ip.extension_manager.load_extension("google.cloud.bigquery") + magics.context._project = None + + credentials_mock = mock.create_autospec( + google.auth.credentials.Credentials, instance=True + ) + default_patch = mock.patch( + "google.auth.default", return_value=(credentials_mock, "general-project") + ) + client_query_patch = mock.patch( + "google.cloud.bigquery.client.Client.query", autospec=True + ) + close_transports_patch = mock.patch( + "google.cloud.bigquery.magics._close_transports", autospec=True, + ) + + sql = "SELECT 17 AS num" + + query_job_mock = mock.create_autospec( + google.cloud.bigquery.job.QueryJob, instance=True + ) + query_job_mock.result.side_effect = [[], OSError] + + with pytest.raises( + OSError + ), client_query_patch as client_query_mock, default_patch, close_transports_patch as close_transports: + client_query_mock.return_value = query_job_mock + ip.run_cell_magic("bigquery", "--max_results=5", sql) + + assert close_transports.called + + def test_bigquery_magic_w_table_id_invalid(): ip = IPython.get_ipython() ip.extension_manager.load_extension("google.cloud.bigquery") @@ -820,6 +857,7 @@ def test_bigquery_magic_w_table_id_and_bqstorage_client(): bqstorage_instance_mock = mock.create_autospec( bigquery_storage_v1beta1.BigQueryStorageClient, instance=True ) + bqstorage_instance_mock.transport = mock.Mock() bqstorage_mock.return_value = bqstorage_instance_mock bqstorage_client_patch = mock.patch( "google.cloud.bigquery_storage_v1beta1.BigQueryStorageClient", bqstorage_mock @@ -1290,3 +1328,32 @@ def test_bigquery_magic_w_destination_table(): assert job_config_used.write_disposition == "WRITE_TRUNCATE" assert job_config_used.destination.dataset_id == "dataset_id" assert job_config_used.destination.table_id == "table_id" + + +@pytest.mark.usefixtures("ipython_interactive") +def test_bigquery_magic_create_dataset_fails(): + ip = IPython.get_ipython() + ip.extension_manager.load_extension("google.cloud.bigquery") + magics.context.credentials = mock.create_autospec( + google.auth.credentials.Credentials, instance=True + ) + + create_dataset_if_necessary_patch = mock.patch( + "google.cloud.bigquery.magics._create_dataset_if_necessary", + autospec=True, + side_effect=OSError, + ) + close_transports_patch = mock.patch( + "google.cloud.bigquery.magics._close_transports", autospec=True, + ) + + with pytest.raises( + OSError + ), create_dataset_if_necessary_patch, close_transports_patch as close_transports: + ip.run_cell_magic( + "bigquery", + "--destination_table dataset_id.table_id", + "SELECT foo FROM WHERE LIMIT bar", + ) + + assert close_transports.called From f23f921efce492eb635b3c24825d58ded02592a2 Mon Sep 17 00:00:00 2001 From: Leonid Emar-Kar <46078689+Emar-Kar@users.noreply.github.com> Date: Mon, 2 Dec 2019 19:53:50 +0300 Subject: [PATCH 0713/2016] refactor(bigquery): update code samples to use strings for table and dataset IDs (#9495) * refactor(bigquery): update code samples * kms_key_name update * refactor(bigquery): kms_key_name update * samples update * docs session fix * docs session fix * update docs * assertion update with re * docs(bigquery): remove location parameter from samples The parameter can sometimes confuse new BigQuery developers. Since location autodetection now works pretty well, the parameter can be removed from code samples for better clarity, except where the samples want to explicitly demonstrate its usage. * Pass location where auto-detection not supported * update QueryJobConfig * unify undelete_table sample * refactor: update test files with the new conditions * refactor: remove extra blank line in test files --- .../google-cloud-bigquery/docs/snippets.py | 399 ------------------ .../docs/usage/encryption.rst | 4 +- .../docs/usage/queries.rst | 4 +- .../samples/client_query.py | 4 +- .../samples/client_query_add_column.py | 52 +++ .../samples/client_query_batch.py | 46 ++ .../samples/client_query_destination_table.py | 40 ++ .../client_query_destination_table_cmek.py | 49 +++ .../client_query_destination_table_legacy.py | 44 ++ .../samples/client_query_dry_run.py | 40 ++ .../samples/client_query_legacy_sql.py | 39 ++ .../samples/client_query_relax_column.py | 55 +++ .../samples/copy_table.py | 8 +- .../samples/copy_table_cmek.py | 47 +++ .../samples/copy_table_multiple_source.py | 35 ++ .../samples/load_table_dataframe.py | 5 +- .../query_external_sheets_permanent_table.py | 1 + .../query_external_sheets_temporary_table.py | 4 +- .../samples/tests/conftest.py | 5 + .../samples/tests/test_add_empty_column.py | 1 - .../samples/tests/test_browse_table_data.py | 1 - .../samples/tests/test_client_list_jobs.py | 1 - .../samples/tests/test_client_query.py | 1 - .../tests/test_client_query_add_column.py | 32 ++ ..._routine.py => test_client_query_batch.py} | 9 +- ...=> test_client_query_destination_table.py} | 10 +- ...est_client_query_destination_table_cmek.py | 26 ++ ..._client_query_destination_table_legacy.py} | 15 +- .../tests/test_client_query_dry_run.py | 25 ++ ...ine.py => test_client_query_legacy_sql.py} | 9 +- .../tests/test_client_query_relax_column.py | 32 ++ .../samples/tests/test_copy_table.py | 1 - .../samples/tests/test_copy_table_cmek.py | 26 ++ .../tests/test_copy_table_multiple_source.py | 55 +++ .../samples/tests/test_create_dataset.py | 1 - .../samples/tests/test_create_job.py | 3 +- .../samples/tests/test_create_table.py | 1 - .../test_create_table_range_partitioned.py | 1 - .../samples/tests/test_dataset_exists.py | 1 - .../tests/test_dataset_label_samples.py | 1 - .../samples/tests/test_delete_dataset.py | 1 - .../samples/tests/test_delete_table.py | 1 - .../samples/tests/test_get_dataset.py | 1 - .../samples/tests/test_get_table.py | 1 - .../samples/tests/test_list_datasets.py | 1 - .../tests/test_list_datasets_by_label.py | 1 - .../samples/tests/test_list_tables.py | 1 - .../tests/test_load_table_dataframe.py | 1 - .../samples/tests/test_model_samples.py | 1 - ...t_query_external_sheets_permanent_table.py | 1 - ...t_query_external_sheets_temporary_table.py | 1 - .../samples/tests/test_query_to_arrow.py | 1 - ...routine_ddl.py => test_routine_samples.py} | 48 ++- .../samples/tests/test_table_exists.py | 1 - .../samples/tests/test_table_insert_rows.py | 1 - ...le_insert_rows_explicit_none_insert_ids.py | 1 - .../tests/test_update_dataset_access.py | 1 - ...update_dataset_default_table_expiration.py | 1 - .../tests/test_update_dataset_description.py | 1 - .../samples/tests/test_update_routine.py | 22 - .../samples/undelete_table.py | 8 +- 61 files changed, 729 insertions(+), 500 deletions(-) create mode 100644 packages/google-cloud-bigquery/samples/client_query_add_column.py create mode 100644 packages/google-cloud-bigquery/samples/client_query_batch.py create mode 100644 packages/google-cloud-bigquery/samples/client_query_destination_table.py create mode 100644 packages/google-cloud-bigquery/samples/client_query_destination_table_cmek.py create mode 100644 packages/google-cloud-bigquery/samples/client_query_destination_table_legacy.py create mode 100644 packages/google-cloud-bigquery/samples/client_query_dry_run.py create mode 100644 packages/google-cloud-bigquery/samples/client_query_legacy_sql.py create mode 100644 packages/google-cloud-bigquery/samples/client_query_relax_column.py create mode 100644 packages/google-cloud-bigquery/samples/copy_table_cmek.py create mode 100644 packages/google-cloud-bigquery/samples/copy_table_multiple_source.py create mode 100644 packages/google-cloud-bigquery/samples/tests/test_client_query_add_column.py rename packages/google-cloud-bigquery/samples/tests/{test_create_routine.py => test_client_query_batch.py} (73%) rename packages/google-cloud-bigquery/samples/tests/{test_list_routines.py => test_client_query_destination_table.py} (69%) create mode 100644 packages/google-cloud-bigquery/samples/tests/test_client_query_destination_table_cmek.py rename packages/google-cloud-bigquery/samples/tests/{test_get_routine.py => test_client_query_destination_table_legacy.py} (64%) create mode 100644 packages/google-cloud-bigquery/samples/tests/test_client_query_dry_run.py rename packages/google-cloud-bigquery/samples/tests/{test_delete_routine.py => test_client_query_legacy_sql.py} (74%) create mode 100644 packages/google-cloud-bigquery/samples/tests/test_client_query_relax_column.py create mode 100644 packages/google-cloud-bigquery/samples/tests/test_copy_table_cmek.py create mode 100644 packages/google-cloud-bigquery/samples/tests/test_copy_table_multiple_source.py rename packages/google-cloud-bigquery/samples/tests/{test_create_routine_ddl.py => test_routine_samples.py} (62%) delete mode 100644 packages/google-cloud-bigquery/samples/tests/test_update_routine.py diff --git a/packages/google-cloud-bigquery/docs/snippets.py b/packages/google-cloud-bigquery/docs/snippets.py index 4d7ad7506b6a..2d950936a5a6 100644 --- a/packages/google-cloud-bigquery/docs/snippets.py +++ b/packages/google-cloud-bigquery/docs/snippets.py @@ -1224,106 +1224,6 @@ def test_load_table_relax_column(client, to_delete): assert table.num_rows > 0 -def test_copy_table_multiple_source(client, to_delete): - dest_dataset_id = "dest_dataset_{}".format(_millis()) - dest_dataset = bigquery.Dataset(client.dataset(dest_dataset_id)) - dest_dataset.location = "US" - dest_dataset = client.create_dataset(dest_dataset) - to_delete.append(dest_dataset) - - source_dataset_id = "source_dataset_{}".format(_millis()) - source_dataset = bigquery.Dataset(client.dataset(source_dataset_id)) - source_dataset.location = "US" - source_dataset = client.create_dataset(source_dataset) - to_delete.append(source_dataset) - - schema = [ - bigquery.SchemaField("name", "STRING"), - bigquery.SchemaField("post_abbr", "STRING"), - ] - - table_data = {"table1": b"Washington,WA", "table2": b"California,CA"} - for table_id, data in table_data.items(): - table_ref = source_dataset.table(table_id) - job_config = bigquery.LoadJobConfig() - job_config.schema = schema - body = six.BytesIO(data) - client.load_table_from_file( - body, - table_ref, - # Location must match that of the destination dataset. - location="US", - job_config=job_config, - ).result() - - # [START bigquery_copy_table_multiple_source] - # from google.cloud import bigquery - # client = bigquery.Client() - # source_dataset_id = 'my_source_dataset' - # dest_dataset_id = 'my_destination_dataset' - - table1_ref = client.dataset(source_dataset_id).table("table1") - table2_ref = client.dataset(source_dataset_id).table("table2") - dest_table_ref = client.dataset(dest_dataset_id).table("destination_table") - - job = client.copy_table( - [table1_ref, table2_ref], - dest_table_ref, - # Location must match that of the source and destination tables. - location="US", - ) # API request - job.result() # Waits for job to complete. - - assert job.state == "DONE" - dest_table = client.get_table(dest_table_ref) # API request - assert dest_table.num_rows > 0 - # [END bigquery_copy_table_multiple_source] - - assert dest_table.num_rows == 2 - - -def test_copy_table_cmek(client, to_delete): - dataset_id = "copy_table_cmek_{}".format(_millis()) - dest_dataset = bigquery.Dataset(client.dataset(dataset_id)) - dest_dataset.location = "US" - dest_dataset = client.create_dataset(dest_dataset) - to_delete.append(dest_dataset) - - # [START bigquery_copy_table_cmek] - # from google.cloud import bigquery - # client = bigquery.Client() - - source_dataset = bigquery.DatasetReference("bigquery-public-data", "samples") - source_table_ref = source_dataset.table("shakespeare") - - # dataset_id = 'my_dataset' - dest_dataset_ref = client.dataset(dataset_id) - dest_table_ref = dest_dataset_ref.table("destination_table") - - # Set the encryption key to use for the destination. - # TODO: Replace this key with a key you have created in KMS. - kms_key_name = "projects/{}/locations/{}/keyRings/{}/cryptoKeys/{}".format( - "cloud-samples-tests", "us", "test", "test" - ) - encryption_config = bigquery.EncryptionConfiguration(kms_key_name=kms_key_name) - job_config = bigquery.CopyJobConfig() - job_config.destination_encryption_configuration = encryption_config - - job = client.copy_table( - source_table_ref, - dest_table_ref, - # Location must match that of the source and destination tables. - location="US", - job_config=job_config, - ) # API request - job.result() # Waits for job to complete. - - assert job.state == "DONE" - dest_table = client.get_table(dest_table_ref) - assert dest_table.encryption_configuration.kms_key_name == kms_key_name - # [END bigquery_copy_table_cmek] - - def test_extract_table(client, to_delete): bucket_name = "extract_shakespeare_{}".format(_millis()) storage_client = storage.Client() @@ -1520,273 +1420,6 @@ def test_manage_job(client): # [END bigquery_get_job] -def test_client_query_destination_table(client, to_delete): - """Run a query""" - dataset_id = "query_destination_table_{}".format(_millis()) - dataset_ref = client.dataset(dataset_id) - to_delete.append(dataset_ref) - dataset = bigquery.Dataset(dataset_ref) - dataset.location = "US" - client.create_dataset(dataset) - - # [START bigquery_query_destination_table] - # from google.cloud import bigquery - # client = bigquery.Client() - # dataset_id = 'your_dataset_id' - - job_config = bigquery.QueryJobConfig() - # Set the destination table - table_ref = client.dataset(dataset_id).table("your_table_id") - job_config.destination = table_ref - sql = """ - SELECT corpus - FROM `bigquery-public-data.samples.shakespeare` - GROUP BY corpus; - """ - - # Start the query, passing in the extra configuration. - query_job = client.query( - sql, - # Location must match that of the dataset(s) referenced in the query - # and of the destination table. - location="US", - job_config=job_config, - ) # API request - starts the query - - query_job.result() # Waits for the query to finish - print("Query results loaded to table {}".format(table_ref.path)) - # [END bigquery_query_destination_table] - - -def test_client_query_destination_table_legacy(client, to_delete): - dataset_id = "query_destination_table_legacy_{}".format(_millis()) - dataset_ref = client.dataset(dataset_id) - to_delete.append(dataset_ref) - dataset = bigquery.Dataset(dataset_ref) - dataset.location = "US" - client.create_dataset(dataset) - - # [START bigquery_query_legacy_large_results] - # from google.cloud import bigquery - # client = bigquery.Client() - # dataset_id = 'your_dataset_id' - - job_config = bigquery.QueryJobConfig() - # Set use_legacy_sql to True to use legacy SQL syntax. - job_config.use_legacy_sql = True - # Set the destination table - table_ref = client.dataset(dataset_id).table("your_table_id") - job_config.destination = table_ref - job_config.allow_large_results = True - sql = """ - SELECT corpus - FROM [bigquery-public-data:samples.shakespeare] - GROUP BY corpus; - """ - # Start the query, passing in the extra configuration. - query_job = client.query( - sql, - # Location must match that of the dataset(s) referenced in the query - # and of the destination table. - location="US", - job_config=job_config, - ) # API request - starts the query - - query_job.result() # Waits for the query to finish - print("Query results loaded to table {}".format(table_ref.path)) - # [END bigquery_query_legacy_large_results] - - -def test_client_query_destination_table_cmek(client, to_delete): - """Run a query""" - dataset_id = "query_destination_table_{}".format(_millis()) - dataset_ref = client.dataset(dataset_id) - to_delete.append(dataset_ref) - dataset = bigquery.Dataset(dataset_ref) - dataset.location = "US" - client.create_dataset(dataset) - - # [START bigquery_query_destination_table_cmek] - # from google.cloud import bigquery - # client = bigquery.Client() - - job_config = bigquery.QueryJobConfig() - - # Set the destination table. Here, dataset_id is a string, such as: - # dataset_id = 'your_dataset_id' - table_ref = client.dataset(dataset_id).table("your_table_id") - job_config.destination = table_ref - - # Set the encryption key to use for the destination. - # TODO: Replace this key with a key you have created in KMS. - kms_key_name = "projects/{}/locations/{}/keyRings/{}/cryptoKeys/{}".format( - "cloud-samples-tests", "us", "test", "test" - ) - encryption_config = bigquery.EncryptionConfiguration(kms_key_name=kms_key_name) - job_config.destination_encryption_configuration = encryption_config - - # Start the query, passing in the extra configuration. - query_job = client.query( - "SELECT 17 AS my_col;", - # Location must match that of the dataset(s) referenced in the query - # and of the destination table. - location="US", - job_config=job_config, - ) # API request - starts the query - query_job.result() - - # The destination table is written using the encryption configuration. - table = client.get_table(table_ref) - assert table.encryption_configuration.kms_key_name == kms_key_name - # [END bigquery_query_destination_table_cmek] - - -def test_client_query_batch(client, to_delete): - # [START bigquery_query_batch] - # from google.cloud import bigquery - # client = bigquery.Client() - - job_config = bigquery.QueryJobConfig() - # Run at batch priority, which won't count toward concurrent rate limit. - job_config.priority = bigquery.QueryPriority.BATCH - sql = """ - SELECT corpus - FROM `bigquery-public-data.samples.shakespeare` - GROUP BY corpus; - """ - # Location must match that of the dataset(s) referenced in the query. - location = "US" - - # API request - starts the query - query_job = client.query(sql, location=location, job_config=job_config) - - # Check on the progress by getting the job's updated state. Once the state - # is `DONE`, the results are ready. - query_job = client.get_job( - query_job.job_id, location=location - ) # API request - fetches job - print("Job {} is currently in state {}".format(query_job.job_id, query_job.state)) - # [END bigquery_query_batch] - - -def test_client_query_relax_column(client, to_delete): - dataset_id = "query_relax_column_{}".format(_millis()) - dataset_ref = client.dataset(dataset_id) - dataset = bigquery.Dataset(dataset_ref) - dataset.location = "US" - dataset = client.create_dataset(dataset) - to_delete.append(dataset) - - table_ref = dataset_ref.table("my_table") - schema = [ - bigquery.SchemaField("full_name", "STRING", mode="REQUIRED"), - bigquery.SchemaField("age", "INTEGER", mode="REQUIRED"), - ] - table = client.create_table(bigquery.Table(table_ref, schema=schema)) - - # [START bigquery_relax_column_query_append] - # from google.cloud import bigquery - # client = bigquery.Client() - # dataset_ref = client.dataset('my_dataset') - - # Retrieves the destination table and checks the number of required fields - table_id = "my_table" - table_ref = dataset_ref.table(table_id) - table = client.get_table(table_ref) - original_required_fields = sum(field.mode == "REQUIRED" for field in table.schema) - # In this example, the existing table has 2 required fields - print("{} fields in the schema are required.".format(original_required_fields)) - - # Configures the query to append the results to a destination table, - # allowing field relaxation - job_config = bigquery.QueryJobConfig() - job_config.schema_update_options = [ - bigquery.SchemaUpdateOption.ALLOW_FIELD_RELAXATION - ] - job_config.destination = table_ref - job_config.write_disposition = bigquery.WriteDisposition.WRITE_APPEND - - query_job = client.query( - # In this example, the existing table contains 'full_name' and 'age' as - # required columns, but the query results will omit the second column. - 'SELECT "Beyonce" as full_name;', - # Location must match that of the dataset(s) referenced in the query - # and of the destination table. - location="US", - job_config=job_config, - ) # API request - starts the query - - query_job.result() # Waits for the query to finish - print("Query job {} complete.".format(query_job.job_id)) - - # Checks the updated number of required fields - table = client.get_table(table) - current_required_fields = sum(field.mode == "REQUIRED" for field in table.schema) - print("{} fields in the schema are now required.".format(current_required_fields)) - # [END bigquery_relax_column_query_append] - assert original_required_fields - current_required_fields > 0 - assert len(table.schema) == 2 - assert table.schema[1].mode == "NULLABLE" - assert table.num_rows > 0 - - -def test_client_query_add_column(client, to_delete): - dataset_id = "query_add_column_{}".format(_millis()) - dataset_ref = client.dataset(dataset_id) - dataset = bigquery.Dataset(dataset_ref) - dataset.location = "US" - dataset = client.create_dataset(dataset) - to_delete.append(dataset) - - table_ref = dataset_ref.table("my_table") - schema = [ - bigquery.SchemaField("full_name", "STRING", mode="REQUIRED"), - bigquery.SchemaField("age", "INTEGER", mode="REQUIRED"), - ] - table = client.create_table(bigquery.Table(table_ref, schema=schema)) - - # [START bigquery_add_column_query_append] - # from google.cloud import bigquery - # client = bigquery.Client() - # dataset_ref = client.dataset('my_dataset') - - # Retrieves the destination table and checks the length of the schema - table_id = "my_table" - table_ref = dataset_ref.table(table_id) - table = client.get_table(table_ref) - print("Table {} contains {} columns.".format(table_id, len(table.schema))) - - # Configures the query to append the results to a destination table, - # allowing field addition - job_config = bigquery.QueryJobConfig() - job_config.schema_update_options = [ - bigquery.SchemaUpdateOption.ALLOW_FIELD_ADDITION - ] - job_config.destination = table_ref - job_config.write_disposition = bigquery.WriteDisposition.WRITE_APPEND - - query_job = client.query( - # In this example, the existing table contains only the 'full_name' and - # 'age' columns, while the results of this query will contain an - # additional 'favorite_color' column. - 'SELECT "Timmy" as full_name, 85 as age, "Blue" as favorite_color;', - # Location must match that of the dataset(s) referenced in the query - # and of the destination table. - location="US", - job_config=job_config, - ) # API request - starts the query - - query_job.result() # Waits for the query to finish - print("Query job {} complete.".format(query_job.job_id)) - - # Checks the updated length of the schema - table = client.get_table(table) - print("Table {} now contains {} columns.".format(table_id, len(table.schema))) - # [END bigquery_add_column_query_append] - assert len(table.schema) == 3 - assert table.num_rows > 0 - - def test_client_query_w_named_params(client, capsys): """Run a query using named query parameters""" @@ -1977,38 +1610,6 @@ def test_client_query_w_struct_params(client, capsys): assert "foo" in out -def test_client_query_dry_run(client): - """Run a dry run query""" - - # [START bigquery_query_dry_run] - # from google.cloud import bigquery - # client = bigquery.Client() - - job_config = bigquery.QueryJobConfig() - job_config.dry_run = True - job_config.use_query_cache = False - query_job = client.query( - ( - "SELECT name, COUNT(*) as name_count " - "FROM `bigquery-public-data.usa_names.usa_1910_2013` " - "WHERE state = 'WA' " - "GROUP BY name" - ), - # Location must match that of the dataset(s) referenced in the query. - location="US", - job_config=job_config, - ) # API request - - # A dry run query completes immediately. - assert query_job.state == "DONE" - assert query_job.dry_run - - print("This query will process {} bytes.".format(query_job.total_bytes_processed)) - # [END bigquery_query_dry_run] - - assert query_job.total_bytes_processed > 0 - - def test_query_no_cache(client): # [START bigquery_query_no_cache] # from google.cloud import bigquery diff --git a/packages/google-cloud-bigquery/docs/usage/encryption.rst b/packages/google-cloud-bigquery/docs/usage/encryption.rst index 88d23067995e..b512e6c4d7bf 100644 --- a/packages/google-cloud-bigquery/docs/usage/encryption.rst +++ b/packages/google-cloud-bigquery/docs/usage/encryption.rst @@ -36,7 +36,7 @@ Cloud KMS for the destination table. Copy a table, using a customer-managed encryption key from Cloud KMS for the destination table. -.. literalinclude:: ../snippets.py +.. literalinclude:: ../samples/copy_table_cmek.py :language: python :dedent: 4 :start-after: [START bigquery_copy_table_cmek] @@ -45,7 +45,7 @@ destination table. Write query results to a table, using a customer-managed encryption key from Cloud KMS for the destination table. -.. literalinclude:: ../snippets.py +.. literalinclude:: ../samples/client_query_destination_table_cmek.py :language: python :dedent: 4 :start-after: [START bigquery_query_destination_table_cmek] diff --git a/packages/google-cloud-bigquery/docs/usage/queries.rst b/packages/google-cloud-bigquery/docs/usage/queries.rst index 5c9dbe18fa63..9210e04bc6a2 100644 --- a/packages/google-cloud-bigquery/docs/usage/queries.rst +++ b/packages/google-cloud-bigquery/docs/usage/queries.rst @@ -17,7 +17,7 @@ Run a query and wait for it to finish with the Run a dry run query ^^^^^^^^^^^^^^^^^^^ -.. literalinclude:: ../snippets.py +.. literalinclude:: ../samples/client_query_dry_run.py :language: python :dedent: 4 :start-after: [START bigquery_query_dry_run] @@ -30,7 +30,7 @@ Writing query results to a destination table See BigQuery documentation for more information on `writing query results `_. -.. literalinclude:: ../snippets.py +.. literalinclude:: ../samples/client_query_destination_table.py :language: python :dedent: 4 :start-after: [START bigquery_query_destination_table] diff --git a/packages/google-cloud-bigquery/samples/client_query.py b/packages/google-cloud-bigquery/samples/client_query.py index 9dccfd38cbcf..5242c854e220 100644 --- a/packages/google-cloud-bigquery/samples/client_query.py +++ b/packages/google-cloud-bigquery/samples/client_query.py @@ -30,9 +30,7 @@ def client_query(client): ORDER BY total_people DESC LIMIT 20 """ - query_job = client.query( - query, location="US" # Must match the destination dataset(s) location. - ) # Make an API request. + query_job = client.query(query) # Make an API request. print("The query data:") for row in query_job: diff --git a/packages/google-cloud-bigquery/samples/client_query_add_column.py b/packages/google-cloud-bigquery/samples/client_query_add_column.py new file mode 100644 index 000000000000..1cde370a35ed --- /dev/null +++ b/packages/google-cloud-bigquery/samples/client_query_add_column.py @@ -0,0 +1,52 @@ +# Copyright 2019 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +def client_query_add_column(client, table_id): + + # [START bigquery_add_column_query_append] + from google.cloud import bigquery + + # TODO(developer): Construct a BigQuery client object. + # client = bigquery.Client() + + # TODO(developer): Set table_id to the ID of the destination table. + # table_id = "your-project.your_dataset.your_table_name" + + # Retrieves the destination table and checks the length of the schema. + table = client.get_table(table_id) # Make an API request. + print("Table {} contains {} columns".format(table_id, len(table.schema))) + + # Configures the query to append the results to a destination table, + # allowing field addition. + job_config = bigquery.QueryJobConfig(destination=table_id) + job_config.schema_update_options = [ + bigquery.SchemaUpdateOption.ALLOW_FIELD_ADDITION + ] + job_config.write_disposition = bigquery.WriteDisposition.WRITE_APPEND + + # Start the query, passing in the extra configuration. + query_job = client.query( + # In this example, the existing table contains only the 'full_name' and + # 'age' columns, while the results of this query will contain an + # additional 'favorite_color' column. + 'SELECT "Timmy" as full_name, 85 as age, "Blue" as favorite_color;', + job_config=job_config, + ) # Make an API request. + query_job.result() # Wait for the job to complete. + + # Checks the updated length of the schema. + table = client.get_table(table_id) # Make an API request. + print("Table {} now contains {} columns".format(table_id, len(table.schema))) + # [END bigquery_add_column_query_append] diff --git a/packages/google-cloud-bigquery/samples/client_query_batch.py b/packages/google-cloud-bigquery/samples/client_query_batch.py new file mode 100644 index 000000000000..af9fcd8a1e40 --- /dev/null +++ b/packages/google-cloud-bigquery/samples/client_query_batch.py @@ -0,0 +1,46 @@ +# Copyright 2019 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +def client_query_batch(client): + + # [START bigquery_query_batch] + from google.cloud import bigquery + + # TODO(developer): Construct a BigQuery client object. + # client = bigquery.Client() + + job_config = bigquery.QueryJobConfig( + # Run at batch priority, which won't count toward concurrent rate limit. + priority=bigquery.QueryPriority.BATCH + ) + + sql = """ + SELECT corpus + FROM `bigquery-public-data.samples.shakespeare` + GROUP BY corpus; + """ + + # Start the query, passing in the extra configuration. + query_job = client.query(sql, job_config=job_config) # Make an API request. + + # Check on the progress by getting the job's updated state. Once the state + # is `DONE`, the results are ready. + query_job = client.get_job( + query_job.job_id, location=query_job.location + ) # Make an API request. + + print("Job {} is currently in state {}".format(query_job.job_id, query_job.state)) + # [END bigquery_query_batch] + return query_job diff --git a/packages/google-cloud-bigquery/samples/client_query_destination_table.py b/packages/google-cloud-bigquery/samples/client_query_destination_table.py new file mode 100644 index 000000000000..876df7904d9c --- /dev/null +++ b/packages/google-cloud-bigquery/samples/client_query_destination_table.py @@ -0,0 +1,40 @@ +# Copyright 2019 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +def client_query_destination_table(client, table_id): + + # [START bigquery_query_destination_table] + from google.cloud import bigquery + + # TODO(developer): Construct a BigQuery client object. + # client = bigquery.Client() + + # TODO(developer): Set table_id to the ID of the destination table. + # table_id = "your-project.your_dataset.your_table_name" + + job_config = bigquery.QueryJobConfig(destination=table_id) + + sql = """ + SELECT corpus + FROM `bigquery-public-data.samples.shakespeare` + GROUP BY corpus; + """ + + # Start the query, passing in the extra configuration. + query_job = client.query(sql, job_config=job_config) # Make an API request. + query_job.result() # Wait for the job to complete. + + print("Query results loaded to the table {}".format(table_id)) + # [END bigquery_query_destination_table] diff --git a/packages/google-cloud-bigquery/samples/client_query_destination_table_cmek.py b/packages/google-cloud-bigquery/samples/client_query_destination_table_cmek.py new file mode 100644 index 000000000000..d3409eecd77d --- /dev/null +++ b/packages/google-cloud-bigquery/samples/client_query_destination_table_cmek.py @@ -0,0 +1,49 @@ +# Copyright 2019 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +def client_query_destination_table_cmek(client, table_id, kms_key_name): + + # [START bigquery_query_destination_table_cmek] + from google.cloud import bigquery + + # TODO(developer): Construct a BigQuery client object. + # client = bigquery.Client() + + # TODO(developer): Set table_id to the ID of the destination table. + # table_id = "your-project.your_dataset.your_table_name" + + # Set the encryption key to use for the destination. + # TODO(developer): Replace this key with a key you have created in KMS. + # kms_key_name = "projects/{}/locations/{}/keyRings/{}/cryptoKeys/{}".format( + # your-project, location, your-ring, your-key + # ) + + job_config = bigquery.QueryJobConfig( + destination=table_id, + destination_encryption_configuration=bigquery.EncryptionConfiguration( + kms_key_name=kms_key_name + ), + ) + + # Start the query, passing in the extra configuration. + query_job = client.query( + "SELECT 17 AS my_col;", job_config=job_config + ) # Make an API request. + query_job.result() # Wait for the job to complete. + + table = client.get_table(table_id) # Make an API request. + if table.encryption_configuration.kms_key_name == kms_key_name: + print("The destination table is written using the encryption configuration") + # [END bigquery_query_destination_table_cmek] diff --git a/packages/google-cloud-bigquery/samples/client_query_destination_table_legacy.py b/packages/google-cloud-bigquery/samples/client_query_destination_table_legacy.py new file mode 100644 index 000000000000..8e977a92d996 --- /dev/null +++ b/packages/google-cloud-bigquery/samples/client_query_destination_table_legacy.py @@ -0,0 +1,44 @@ +# Copyright 2019 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +def client_query_destination_table_legacy(client, table_id): + + # [START bigquery_query_legacy_large_results] + from google.cloud import bigquery + + # TODO(developer): Construct a BigQuery client object. + # client = bigquery.Client() + + # TODO(developer): Set table_id to the ID of the destination table. + # table_id = "your-project.your_dataset.your_table_name" + + # Set the destination table and use_legacy_sql to True to use + # legacy SQL syntax. + job_config = bigquery.QueryJobConfig( + allow_large_results=True, destination=table_id, use_legacy_sql=True + ) + + sql = """ + SELECT corpus + FROM [bigquery-public-data:samples.shakespeare] + GROUP BY corpus; + """ + + # Start the query, passing in the extra configuration. + query_job = client.query(sql, job_config=job_config) # Make an API request. + query_job.result() # Wait for the job to complete. + + print("Query results loaded to the table {}".format(table_id)) + # [END bigquery_query_legacy_large_results] diff --git a/packages/google-cloud-bigquery/samples/client_query_dry_run.py b/packages/google-cloud-bigquery/samples/client_query_dry_run.py new file mode 100644 index 000000000000..2d09a1c25f4a --- /dev/null +++ b/packages/google-cloud-bigquery/samples/client_query_dry_run.py @@ -0,0 +1,40 @@ +# Copyright 2019 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +def client_query_dry_run(client): + + # [START bigquery_query_dry_run] + from google.cloud import bigquery + + # TODO(developer): Construct a BigQuery client object. + # client = bigquery.Client() + + job_config = bigquery.QueryJobConfig(dry_run=True, use_query_cache=False) + + # Start the query, passing in the extra configuration. + query_job = client.query( + ( + "SELECT name, COUNT(*) as name_count " + "FROM `bigquery-public-data.usa_names.usa_1910_2013` " + "WHERE state = 'WA' " + "GROUP BY name" + ), + job_config=job_config, + ) # Make an API request. + + # A dry run query completes immediately. + print("This query will process {} bytes.".format(query_job.total_bytes_processed)) + # [END bigquery_query_dry_run] + return query_job diff --git a/packages/google-cloud-bigquery/samples/client_query_legacy_sql.py b/packages/google-cloud-bigquery/samples/client_query_legacy_sql.py new file mode 100644 index 000000000000..c8dae20649e2 --- /dev/null +++ b/packages/google-cloud-bigquery/samples/client_query_legacy_sql.py @@ -0,0 +1,39 @@ +# Copyright 2019 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +def client_query_legacy_sql(client): + + # [START bigquery_query_legacy] + from google.cloud import bigquery + + # TODO(developer): Construct a BigQuery client object. + # client = bigquery.Client() + + query = ( + "SELECT name FROM [bigquery-public-data:usa_names.usa_1910_2013] " + 'WHERE state = "TX" ' + "LIMIT 100" + ) + + # Set use_legacy_sql to True to use legacy SQL syntax. + job_config = bigquery.QueryJobConfig(use_legacy_sql=True) + + # Start the query, passing in the extra configuration. + query_job = client.query(query, job_config=job_config) # Make an API request. + + print("The query data:") + for row in query_job: + print(row) + # [END bigquery_query_legacy] diff --git a/packages/google-cloud-bigquery/samples/client_query_relax_column.py b/packages/google-cloud-bigquery/samples/client_query_relax_column.py new file mode 100644 index 000000000000..d8e5743c1e33 --- /dev/null +++ b/packages/google-cloud-bigquery/samples/client_query_relax_column.py @@ -0,0 +1,55 @@ +# Copyright 2019 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +def client_query_relax_column(client, table_id): + + # [START bigquery_relax_column_query_append] + from google.cloud import bigquery + + # TODO(developer): Construct a BigQuery client object. + # client = bigquery.Client() + + # TODO(developer): Set table_id to the ID of the destination table. + # table_id = "your-project.your_dataset.your_table_name" + + # Retrieves the destination table and checks the number of required fields. + table = client.get_table(table_id) # Make an API request. + original_required_fields = sum(field.mode == "REQUIRED" for field in table.schema) + + # In this example, the existing table has 2 required fields. + print("{} fields in the schema are required.".format(original_required_fields)) + + # Configures the query to append the results to a destination table, + # allowing field relaxation. + job_config = bigquery.QueryJobConfig(destination=table_id) + job_config.schema_update_options = [ + bigquery.SchemaUpdateOption.ALLOW_FIELD_RELAXATION + ] + job_config.write_disposition = bigquery.WriteDisposition.WRITE_APPEND + + # Start the query, passing in the extra configuration. + query_job = client.query( + # In this example, the existing table contains 'full_name' and 'age' as + # required columns, but the query results will omit the second column. + 'SELECT "Beyonce" as full_name;', + job_config=job_config, + ) # Make an API request. + query_job.result() # Wait for the job to complete. + + # Checks the updated number of required fields. + table = client.get_table(table_id) # Make an API request. + current_required_fields = sum(field.mode == "REQUIRED" for field in table.schema) + print("{} fields in the schema are now required.".format(current_required_fields)) + # [END bigquery_relax_column_query_append] diff --git a/packages/google-cloud-bigquery/samples/copy_table.py b/packages/google-cloud-bigquery/samples/copy_table.py index f6ebd91470eb..20f6776cf87d 100644 --- a/packages/google-cloud-bigquery/samples/copy_table.py +++ b/packages/google-cloud-bigquery/samples/copy_table.py @@ -28,12 +28,8 @@ def copy_table(client, source_table_id, destination_table_id): # TODO(developer): Set destination_table_id to the ID of the destination table. # destination_table_id = "your-project.destination_dataset.destination_table" - job = client.copy_table( - source_table_id, - destination_table_id, - location="US", # Must match the source and destination tables location. - ) - job.result() # Waits for job to complete. + job = client.copy_table(source_table_id, destination_table_id) + job.result() # Wait for the job to complete. print("A copy of the table created.") # [END bigquery_copy_table] diff --git a/packages/google-cloud-bigquery/samples/copy_table_cmek.py b/packages/google-cloud-bigquery/samples/copy_table_cmek.py new file mode 100644 index 000000000000..1e9ee198c821 --- /dev/null +++ b/packages/google-cloud-bigquery/samples/copy_table_cmek.py @@ -0,0 +1,47 @@ +# Copyright 2019 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +def copy_table_cmek(client, dest_table_id, orig_table_id, kms_key_name): + + # [START bigquery_copy_table_cmek] + from google.cloud import bigquery + + # TODO(developer): Construct a BigQuery client object. + # client = bigquery.Client() + + # TODO(developer): Set dest_table_id to the ID of the destination table. + # dest_table_id = "your-project.your_dataset.your_table_name" + + # TODO(developer): Set orig_table_id to the ID of the original table. + # orig_table_id = "your-project.your_dataset.your_table_name" + + # Set the encryption key to use for the destination. + # TODO(developer): Replace this key with a key you have created in KMS. + # kms_key_name = "projects/{}/locations/{}/keyRings/{}/cryptoKeys/{}".format( + # your-project, location, your-ring, your-key + # ) + + job_config = bigquery.CopyJobConfig( + destination_encryption_configuration=bigquery.EncryptionConfiguration( + kms_key_name=kms_key_name + ) + ) + job = client.copy_table(orig_table_id, dest_table_id, job_config=job_config) + job.result() # Wait for the job to complete. + + dest_table = client.get_table(dest_table_id) # Make an API request. + if dest_table.encryption_configuration.kms_key_name == kms_key_name: + print("A copy of the table created") + # [END bigquery_copy_table_cmek] diff --git a/packages/google-cloud-bigquery/samples/copy_table_multiple_source.py b/packages/google-cloud-bigquery/samples/copy_table_multiple_source.py new file mode 100644 index 000000000000..532ea0a0ab90 --- /dev/null +++ b/packages/google-cloud-bigquery/samples/copy_table_multiple_source.py @@ -0,0 +1,35 @@ +# Copyright 2019 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +def copy_table_multiple_source(client, dest_table_id, table_ids): + + # [START bigquery_copy_table_multiple_source] + # TODO(developer): Import the client library. + # from google.cloud import bigquery + + # TODO(developer): Construct a BigQuery client object. + # client = bigquery.Client() + + # TODO(developer): Set dest_table_id to the ID of the destination table. + # dest_table_id = "your-project.your_dataset.your_table_name" + + # TODO(developer): Set table_ids to the list of the IDs of the original tables. + # table_ids = ["your-project.your_dataset.your_table_name", ...] + + job = client.copy_table(table_ids, dest_table_id) # Make an API request. + job.result() # Wait for the job to complete. + + print("The tables {} have been appended to {}".format(table_ids, dest_table_id)) + # [END bigquery_copy_table_multiple_source] diff --git a/packages/google-cloud-bigquery/samples/load_table_dataframe.py b/packages/google-cloud-bigquery/samples/load_table_dataframe.py index ea6fe5d02384..8cfb34424457 100644 --- a/packages/google-cloud-bigquery/samples/load_table_dataframe.py +++ b/packages/google-cloud-bigquery/samples/load_table_dataframe.py @@ -61,10 +61,7 @@ def load_table_dataframe(client, table_id): ) job = client.load_table_from_dataframe( - dataframe, - table_id, - job_config=job_config, - location="US", # Must match the destination dataset location. + dataframe, table_id, job_config=job_config ) # Make an API request. job.result() # Wait for the job to complete. diff --git a/packages/google-cloud-bigquery/samples/query_external_sheets_permanent_table.py b/packages/google-cloud-bigquery/samples/query_external_sheets_permanent_table.py index ce9b1c928782..e9bc908f5b15 100644 --- a/packages/google-cloud-bigquery/samples/query_external_sheets_permanent_table.py +++ b/packages/google-cloud-bigquery/samples/query_external_sheets_permanent_table.py @@ -61,6 +61,7 @@ def query_external_sheets_permanent_table(dataset_id): # Example query to find states starting with "W". sql = 'SELECT * FROM `{}.{}` WHERE name LIKE "W%"'.format(dataset_id, table_id) + query_job = client.query(sql) # Make an API request. # Wait for the query to complete. diff --git a/packages/google-cloud-bigquery/samples/query_external_sheets_temporary_table.py b/packages/google-cloud-bigquery/samples/query_external_sheets_temporary_table.py index e89b6efab362..7b6bde864b09 100644 --- a/packages/google-cloud-bigquery/samples/query_external_sheets_temporary_table.py +++ b/packages/google-cloud-bigquery/samples/query_external_sheets_temporary_table.py @@ -52,11 +52,11 @@ def query_external_sheets_temporary_table(): "us-states!A20:B49" # Optionally set range of the sheet to query from. ) table_id = "us_states" - job_config = bigquery.QueryJobConfig() - job_config.table_definitions = {table_id: external_config} + job_config = bigquery.QueryJobConfig(table_definitions={table_id: external_config}) # Example query to find states starting with "W". sql = 'SELECT * FROM `{}` WHERE name LIKE "W%"'.format(table_id) + query_job = client.query(sql, job_config=job_config) # Make an API request. # Wait for the query to complete. diff --git a/packages/google-cloud-bigquery/samples/tests/conftest.py b/packages/google-cloud-bigquery/samples/tests/conftest.py index a06bb9c90d1d..6d049e6c2312 100644 --- a/packages/google-cloud-bigquery/samples/tests/conftest.py +++ b/packages/google-cloud-bigquery/samples/tests/conftest.py @@ -148,3 +148,8 @@ def model_id(client, dataset_id): client.query(sql).result() return model_id + + +@pytest.fixture +def kms_key_name(): + return "projects/cloud-samples-tests/locations/us/keyRings/test/cryptoKeys/test" diff --git a/packages/google-cloud-bigquery/samples/tests/test_add_empty_column.py b/packages/google-cloud-bigquery/samples/tests/test_add_empty_column.py index e6c56e6cbfbc..de51bfed7672 100644 --- a/packages/google-cloud-bigquery/samples/tests/test_add_empty_column.py +++ b/packages/google-cloud-bigquery/samples/tests/test_add_empty_column.py @@ -12,7 +12,6 @@ # See the License for the specific language governing permissions and # limitations under the License. - from .. import add_empty_column diff --git a/packages/google-cloud-bigquery/samples/tests/test_browse_table_data.py b/packages/google-cloud-bigquery/samples/tests/test_browse_table_data.py index 0e9cc6055494..db9b867f5ab7 100644 --- a/packages/google-cloud-bigquery/samples/tests/test_browse_table_data.py +++ b/packages/google-cloud-bigquery/samples/tests/test_browse_table_data.py @@ -12,7 +12,6 @@ # See the License for the specific language governing permissions and # limitations under the License. - from .. import browse_table_data diff --git a/packages/google-cloud-bigquery/samples/tests/test_client_list_jobs.py b/packages/google-cloud-bigquery/samples/tests/test_client_list_jobs.py index 011e081fdee4..ada053239802 100644 --- a/packages/google-cloud-bigquery/samples/tests/test_client_list_jobs.py +++ b/packages/google-cloud-bigquery/samples/tests/test_client_list_jobs.py @@ -12,7 +12,6 @@ # See the License for the specific language governing permissions and # limitations under the License. - from .. import client_list_jobs from .. import create_job diff --git a/packages/google-cloud-bigquery/samples/tests/test_client_query.py b/packages/google-cloud-bigquery/samples/tests/test_client_query.py index fd5b8e7edd97..e73e7e5a0eb4 100644 --- a/packages/google-cloud-bigquery/samples/tests/test_client_query.py +++ b/packages/google-cloud-bigquery/samples/tests/test_client_query.py @@ -12,7 +12,6 @@ # See the License for the specific language governing permissions and # limitations under the License. - from .. import client_query diff --git a/packages/google-cloud-bigquery/samples/tests/test_client_query_add_column.py b/packages/google-cloud-bigquery/samples/tests/test_client_query_add_column.py new file mode 100644 index 000000000000..67ac328d5518 --- /dev/null +++ b/packages/google-cloud-bigquery/samples/tests/test_client_query_add_column.py @@ -0,0 +1,32 @@ +# Copyright 2019 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from google.cloud import bigquery + +from .. import client_query_add_column + + +def test_client_query_add_column(capsys, client, random_table_id): + + schema = [ + bigquery.SchemaField("full_name", "STRING", mode="REQUIRED"), + bigquery.SchemaField("age", "INTEGER", mode="REQUIRED"), + ] + + client.create_table(bigquery.Table(random_table_id, schema=schema)) + + client_query_add_column.client_query_add_column(client, random_table_id) + out, err = capsys.readouterr() + assert "Table {} contains 2 columns".format(random_table_id) in out + assert "Table {} now contains 3 columns".format(random_table_id) in out diff --git a/packages/google-cloud-bigquery/samples/tests/test_create_routine.py b/packages/google-cloud-bigquery/samples/tests/test_client_query_batch.py similarity index 73% rename from packages/google-cloud-bigquery/samples/tests/test_create_routine.py rename to packages/google-cloud-bigquery/samples/tests/test_client_query_batch.py index 7220d63542e2..79197e4565c7 100644 --- a/packages/google-cloud-bigquery/samples/tests/test_create_routine.py +++ b/packages/google-cloud-bigquery/samples/tests/test_client_query_batch.py @@ -12,12 +12,11 @@ # See the License for the specific language governing permissions and # limitations under the License. +from .. import client_query_batch -from .. import create_routine +def test_client_query_batch(capsys, client): -def test_create_routine(capsys, client, random_routine_id): - - create_routine.create_routine(client, random_routine_id) + job = client_query_batch.client_query_batch(client) out, err = capsys.readouterr() - assert "Created routine {}".format(random_routine_id) in out + assert "Job {} is currently in state {}".format(job.job_id, job.state) in out diff --git a/packages/google-cloud-bigquery/samples/tests/test_list_routines.py b/packages/google-cloud-bigquery/samples/tests/test_client_query_destination_table.py similarity index 69% rename from packages/google-cloud-bigquery/samples/tests/test_list_routines.py rename to packages/google-cloud-bigquery/samples/tests/test_client_query_destination_table.py index e249238e1976..d29aaebd3ce5 100644 --- a/packages/google-cloud-bigquery/samples/tests/test_list_routines.py +++ b/packages/google-cloud-bigquery/samples/tests/test_client_query_destination_table.py @@ -12,13 +12,11 @@ # See the License for the specific language governing permissions and # limitations under the License. +from .. import client_query_destination_table -from .. import list_routines +def test_client_query_destination_table(capsys, client, table_id): -def test_list_routines(capsys, client, dataset_id, routine_id): - - list_routines.list_routines(client, dataset_id) + client_query_destination_table.client_query_destination_table(client, table_id) out, err = capsys.readouterr() - assert "Routines contained in dataset {}:".format(dataset_id) in out - assert routine_id in out + assert "Query results loaded to the table {}".format(table_id) in out diff --git a/packages/google-cloud-bigquery/samples/tests/test_client_query_destination_table_cmek.py b/packages/google-cloud-bigquery/samples/tests/test_client_query_destination_table_cmek.py new file mode 100644 index 000000000000..cd4532be6d1d --- /dev/null +++ b/packages/google-cloud-bigquery/samples/tests/test_client_query_destination_table_cmek.py @@ -0,0 +1,26 @@ +# Copyright 2019 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from .. import client_query_destination_table_cmek + + +def test_client_query_destination_table_cmek( + capsys, client, random_table_id, kms_key_name +): + + client_query_destination_table_cmek.client_query_destination_table_cmek( + client, random_table_id, kms_key_name + ) + out, err = capsys.readouterr() + assert "The destination table is written using the encryption configuration" in out diff --git a/packages/google-cloud-bigquery/samples/tests/test_get_routine.py b/packages/google-cloud-bigquery/samples/tests/test_client_query_destination_table_legacy.py similarity index 64% rename from packages/google-cloud-bigquery/samples/tests/test_get_routine.py rename to packages/google-cloud-bigquery/samples/tests/test_client_query_destination_table_legacy.py index fa5f3093116c..da62baada213 100644 --- a/packages/google-cloud-bigquery/samples/tests/test_get_routine.py +++ b/packages/google-cloud-bigquery/samples/tests/test_client_query_destination_table_legacy.py @@ -12,16 +12,13 @@ # See the License for the specific language governing permissions and # limitations under the License. +from .. import client_query_destination_table_legacy -from .. import get_routine +def test_client_query_destination_table_legacy(capsys, client, random_table_id): -def test_get_routine(capsys, client, routine_id): - - get_routine.get_routine(client, routine_id) + client_query_destination_table_legacy.client_query_destination_table_legacy( + client, random_table_id + ) out, err = capsys.readouterr() - assert "Routine '{}':".format(routine_id) in out - assert "Type: 'SCALAR_FUNCTION'" in out - assert "Language: 'SQL'" in out - assert "Name: 'x'" in out - assert "Type: 'type_kind: INT64\n'" in out + assert "Query results loaded to the table {}".format(random_table_id) in out diff --git a/packages/google-cloud-bigquery/samples/tests/test_client_query_dry_run.py b/packages/google-cloud-bigquery/samples/tests/test_client_query_dry_run.py new file mode 100644 index 000000000000..c39a22767d25 --- /dev/null +++ b/packages/google-cloud-bigquery/samples/tests/test_client_query_dry_run.py @@ -0,0 +1,25 @@ +# Copyright 2019 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from .. import client_query_dry_run + + +def test_client_query_dry_run(capsys, client): + + query_job = client_query_dry_run.client_query_dry_run(client) + out, err = capsys.readouterr() + assert "This query will process" in out + assert query_job.state == "DONE" + assert query_job.dry_run + assert query_job.total_bytes_processed > 0 diff --git a/packages/google-cloud-bigquery/samples/tests/test_delete_routine.py b/packages/google-cloud-bigquery/samples/tests/test_client_query_legacy_sql.py similarity index 74% rename from packages/google-cloud-bigquery/samples/tests/test_delete_routine.py rename to packages/google-cloud-bigquery/samples/tests/test_client_query_legacy_sql.py index 9347d1e22dc2..fb6ee60bc6ec 100644 --- a/packages/google-cloud-bigquery/samples/tests/test_delete_routine.py +++ b/packages/google-cloud-bigquery/samples/tests/test_client_query_legacy_sql.py @@ -12,12 +12,13 @@ # See the License for the specific language governing permissions and # limitations under the License. +import re -from .. import delete_routine +from .. import client_query_legacy_sql -def test_delete_routine(capsys, client, routine_id): +def test_client_query_legacy_sql(capsys, client): - delete_routine.delete_routine(client, routine_id) + client_query_legacy_sql.client_query_legacy_sql(client) out, err = capsys.readouterr() - assert "Deleted routine {}.".format(routine_id) in out + assert re.search(r"(Row[\w(){}:', ]+)$", out) diff --git a/packages/google-cloud-bigquery/samples/tests/test_client_query_relax_column.py b/packages/google-cloud-bigquery/samples/tests/test_client_query_relax_column.py new file mode 100644 index 000000000000..685db9cb1fa0 --- /dev/null +++ b/packages/google-cloud-bigquery/samples/tests/test_client_query_relax_column.py @@ -0,0 +1,32 @@ +# Copyright 2019 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from google.cloud import bigquery + +from .. import client_query_relax_column + + +def test_client_query_relax_column(capsys, client, random_table_id): + + schema = [ + bigquery.SchemaField("full_name", "STRING", mode="REQUIRED"), + bigquery.SchemaField("age", "INTEGER", mode="REQUIRED"), + ] + + client.create_table(bigquery.Table(random_table_id, schema=schema)) + + client_query_relax_column.client_query_relax_column(client, random_table_id) + out, err = capsys.readouterr() + assert "2 fields in the schema are required." in out + assert "0 fields in the schema are now required." in out diff --git a/packages/google-cloud-bigquery/samples/tests/test_copy_table.py b/packages/google-cloud-bigquery/samples/tests/test_copy_table.py index 6d7de2d9132c..0138cd8ee1e2 100644 --- a/packages/google-cloud-bigquery/samples/tests/test_copy_table.py +++ b/packages/google-cloud-bigquery/samples/tests/test_copy_table.py @@ -12,7 +12,6 @@ # See the License for the specific language governing permissions and # limitations under the License. - from .. import copy_table diff --git a/packages/google-cloud-bigquery/samples/tests/test_copy_table_cmek.py b/packages/google-cloud-bigquery/samples/tests/test_copy_table_cmek.py new file mode 100644 index 000000000000..25238071b947 --- /dev/null +++ b/packages/google-cloud-bigquery/samples/tests/test_copy_table_cmek.py @@ -0,0 +1,26 @@ +# Copyright 2019 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from .. import copy_table_cmek + + +def test_copy_table_cmek( + capsys, client, random_table_id, table_with_data_id, kms_key_name +): + + copy_table_cmek.copy_table_cmek( + client, random_table_id, table_with_data_id, kms_key_name + ) + out, err = capsys.readouterr() + assert "A copy of the table created" in out diff --git a/packages/google-cloud-bigquery/samples/tests/test_copy_table_multiple_source.py b/packages/google-cloud-bigquery/samples/tests/test_copy_table_multiple_source.py new file mode 100644 index 000000000000..755fa2ccb5e9 --- /dev/null +++ b/packages/google-cloud-bigquery/samples/tests/test_copy_table_multiple_source.py @@ -0,0 +1,55 @@ +# Copyright 2019 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import six +from google.cloud import bigquery + +from .. import copy_table_multiple_source + + +def test_copy_table_multiple_source(capsys, client, random_table_id, random_dataset_id): + + schema = [ + bigquery.SchemaField("name", "STRING"), + bigquery.SchemaField("post_abbr", "STRING"), + ] + + dataset = bigquery.Dataset(random_dataset_id) + dataset.location = "US" + dataset = client.create_dataset(dataset) + table_data = {"table1": b"Washington,WA", "table2": b"California,CA"} + for table_id, data in table_data.items(): + table_ref = dataset.table(table_id) + job_config = bigquery.LoadJobConfig() + job_config.schema = schema + body = six.BytesIO(data) + client.load_table_from_file( + body, table_ref, location="US", job_config=job_config + ).result() + + table_ids = [ + "{}.table1".format(random_dataset_id), + "{}.table2".format(random_dataset_id), + ] + + copy_table_multiple_source.copy_table_multiple_source( + client, random_table_id, table_ids + ) + dest_table = client.get_table(random_table_id) + out, err = capsys.readouterr() + assert ( + "The tables {} have been appended to {}".format(table_ids, random_table_id) + in out + ) + assert dest_table.num_rows > 0 diff --git a/packages/google-cloud-bigquery/samples/tests/test_create_dataset.py b/packages/google-cloud-bigquery/samples/tests/test_create_dataset.py index e52e9ddfdced..dfadc67d8468 100644 --- a/packages/google-cloud-bigquery/samples/tests/test_create_dataset.py +++ b/packages/google-cloud-bigquery/samples/tests/test_create_dataset.py @@ -12,7 +12,6 @@ # See the License for the specific language governing permissions and # limitations under the License. - from .. import create_dataset diff --git a/packages/google-cloud-bigquery/samples/tests/test_create_job.py b/packages/google-cloud-bigquery/samples/tests/test_create_job.py index 5ead51156606..bbf880cbe402 100644 --- a/packages/google-cloud-bigquery/samples/tests/test_create_job.py +++ b/packages/google-cloud-bigquery/samples/tests/test_create_job.py @@ -12,13 +12,12 @@ # See the License for the specific language governing permissions and # limitations under the License. - from .. import create_job def test_create_job(capsys, client): query_job = create_job.create_job(client) - client.cancel_job(query_job.job_id, location="US") + client.cancel_job(query_job.job_id, location=query_job.location) out, err = capsys.readouterr() assert "Started job: {}".format(query_job.job_id) in out diff --git a/packages/google-cloud-bigquery/samples/tests/test_create_table.py b/packages/google-cloud-bigquery/samples/tests/test_create_table.py index f9ebc0e5d70d..093ee6e94277 100644 --- a/packages/google-cloud-bigquery/samples/tests/test_create_table.py +++ b/packages/google-cloud-bigquery/samples/tests/test_create_table.py @@ -12,7 +12,6 @@ # See the License for the specific language governing permissions and # limitations under the License. - from .. import create_table diff --git a/packages/google-cloud-bigquery/samples/tests/test_create_table_range_partitioned.py b/packages/google-cloud-bigquery/samples/tests/test_create_table_range_partitioned.py index ca186f9a7554..ac312b033832 100644 --- a/packages/google-cloud-bigquery/samples/tests/test_create_table_range_partitioned.py +++ b/packages/google-cloud-bigquery/samples/tests/test_create_table_range_partitioned.py @@ -12,7 +12,6 @@ # See the License for the specific language governing permissions and # limitations under the License. - from .. import create_table_range_partitioned diff --git a/packages/google-cloud-bigquery/samples/tests/test_dataset_exists.py b/packages/google-cloud-bigquery/samples/tests/test_dataset_exists.py index 203c4b884dc4..a44e60371120 100644 --- a/packages/google-cloud-bigquery/samples/tests/test_dataset_exists.py +++ b/packages/google-cloud-bigquery/samples/tests/test_dataset_exists.py @@ -12,7 +12,6 @@ # See the License for the specific language governing permissions and # limitations under the License. - from google.cloud import bigquery from .. import dataset_exists diff --git a/packages/google-cloud-bigquery/samples/tests/test_dataset_label_samples.py b/packages/google-cloud-bigquery/samples/tests/test_dataset_label_samples.py index 1e526f2339ac..94a2092407b0 100644 --- a/packages/google-cloud-bigquery/samples/tests/test_dataset_label_samples.py +++ b/packages/google-cloud-bigquery/samples/tests/test_dataset_label_samples.py @@ -12,7 +12,6 @@ # See the License for the specific language governing permissions and # limitations under the License. - from .. import delete_dataset_labels from .. import get_dataset_labels from .. import label_dataset diff --git a/packages/google-cloud-bigquery/samples/tests/test_delete_dataset.py b/packages/google-cloud-bigquery/samples/tests/test_delete_dataset.py index 836b3aebb272..2b1b6ad06195 100644 --- a/packages/google-cloud-bigquery/samples/tests/test_delete_dataset.py +++ b/packages/google-cloud-bigquery/samples/tests/test_delete_dataset.py @@ -12,7 +12,6 @@ # See the License for the specific language governing permissions and # limitations under the License. - from .. import delete_dataset diff --git a/packages/google-cloud-bigquery/samples/tests/test_delete_table.py b/packages/google-cloud-bigquery/samples/tests/test_delete_table.py index f76ad8624cc6..8f4796623a83 100644 --- a/packages/google-cloud-bigquery/samples/tests/test_delete_table.py +++ b/packages/google-cloud-bigquery/samples/tests/test_delete_table.py @@ -12,7 +12,6 @@ # See the License for the specific language governing permissions and # limitations under the License. - from .. import delete_table diff --git a/packages/google-cloud-bigquery/samples/tests/test_get_dataset.py b/packages/google-cloud-bigquery/samples/tests/test_get_dataset.py index 8682be7ee3e9..dedec1d7b29e 100644 --- a/packages/google-cloud-bigquery/samples/tests/test_get_dataset.py +++ b/packages/google-cloud-bigquery/samples/tests/test_get_dataset.py @@ -12,7 +12,6 @@ # See the License for the specific language governing permissions and # limitations under the License. - from .. import get_dataset diff --git a/packages/google-cloud-bigquery/samples/tests/test_get_table.py b/packages/google-cloud-bigquery/samples/tests/test_get_table.py index 8adaa6557954..b950d434aef6 100644 --- a/packages/google-cloud-bigquery/samples/tests/test_get_table.py +++ b/packages/google-cloud-bigquery/samples/tests/test_get_table.py @@ -12,7 +12,6 @@ # See the License for the specific language governing permissions and # limitations under the License. - from google.cloud import bigquery from .. import get_table diff --git a/packages/google-cloud-bigquery/samples/tests/test_list_datasets.py b/packages/google-cloud-bigquery/samples/tests/test_list_datasets.py index d8c32e91ee20..4c66a24f9b1a 100644 --- a/packages/google-cloud-bigquery/samples/tests/test_list_datasets.py +++ b/packages/google-cloud-bigquery/samples/tests/test_list_datasets.py @@ -12,7 +12,6 @@ # See the License for the specific language governing permissions and # limitations under the License. - from .. import list_datasets diff --git a/packages/google-cloud-bigquery/samples/tests/test_list_datasets_by_label.py b/packages/google-cloud-bigquery/samples/tests/test_list_datasets_by_label.py index f414539b00b3..6d04a281ff42 100644 --- a/packages/google-cloud-bigquery/samples/tests/test_list_datasets_by_label.py +++ b/packages/google-cloud-bigquery/samples/tests/test_list_datasets_by_label.py @@ -12,7 +12,6 @@ # See the License for the specific language governing permissions and # limitations under the License. - from .. import list_datasets_by_label diff --git a/packages/google-cloud-bigquery/samples/tests/test_list_tables.py b/packages/google-cloud-bigquery/samples/tests/test_list_tables.py index 61ac04ea26ce..ec1621ac7579 100644 --- a/packages/google-cloud-bigquery/samples/tests/test_list_tables.py +++ b/packages/google-cloud-bigquery/samples/tests/test_list_tables.py @@ -12,7 +12,6 @@ # See the License for the specific language governing permissions and # limitations under the License. - from .. import list_tables diff --git a/packages/google-cloud-bigquery/samples/tests/test_load_table_dataframe.py b/packages/google-cloud-bigquery/samples/tests/test_load_table_dataframe.py index 2151704d3b25..3b7cb16ea692 100644 --- a/packages/google-cloud-bigquery/samples/tests/test_load_table_dataframe.py +++ b/packages/google-cloud-bigquery/samples/tests/test_load_table_dataframe.py @@ -12,7 +12,6 @@ # See the License for the specific language governing permissions and # limitations under the License. - import pytest from .. import load_table_dataframe diff --git a/packages/google-cloud-bigquery/samples/tests/test_model_samples.py b/packages/google-cloud-bigquery/samples/tests/test_model_samples.py index 99d838533917..d7b06a92a3e1 100644 --- a/packages/google-cloud-bigquery/samples/tests/test_model_samples.py +++ b/packages/google-cloud-bigquery/samples/tests/test_model_samples.py @@ -12,7 +12,6 @@ # See the License for the specific language governing permissions and # limitations under the License. - from .. import delete_model from .. import get_model from .. import list_models diff --git a/packages/google-cloud-bigquery/samples/tests/test_query_external_sheets_permanent_table.py b/packages/google-cloud-bigquery/samples/tests/test_query_external_sheets_permanent_table.py index a7b5db09e5af..a00930cad881 100644 --- a/packages/google-cloud-bigquery/samples/tests/test_query_external_sheets_permanent_table.py +++ b/packages/google-cloud-bigquery/samples/tests/test_query_external_sheets_permanent_table.py @@ -12,7 +12,6 @@ # See the License for the specific language governing permissions and # limitations under the License. - from .. import query_external_sheets_permanent_table diff --git a/packages/google-cloud-bigquery/samples/tests/test_query_external_sheets_temporary_table.py b/packages/google-cloud-bigquery/samples/tests/test_query_external_sheets_temporary_table.py index 4856b6a49d2b..8274787cb644 100644 --- a/packages/google-cloud-bigquery/samples/tests/test_query_external_sheets_temporary_table.py +++ b/packages/google-cloud-bigquery/samples/tests/test_query_external_sheets_temporary_table.py @@ -12,7 +12,6 @@ # See the License for the specific language governing permissions and # limitations under the License. - from .. import query_external_sheets_temporary_table diff --git a/packages/google-cloud-bigquery/samples/tests/test_query_to_arrow.py b/packages/google-cloud-bigquery/samples/tests/test_query_to_arrow.py index 2fbed807ece4..dd9b3ab508cc 100644 --- a/packages/google-cloud-bigquery/samples/tests/test_query_to_arrow.py +++ b/packages/google-cloud-bigquery/samples/tests/test_query_to_arrow.py @@ -12,7 +12,6 @@ # See the License for the specific language governing permissions and # limitations under the License. - import pyarrow from .. import query_to_arrow diff --git a/packages/google-cloud-bigquery/samples/tests/test_create_routine_ddl.py b/packages/google-cloud-bigquery/samples/tests/test_routine_samples.py similarity index 62% rename from packages/google-cloud-bigquery/samples/tests/test_create_routine_ddl.py rename to packages/google-cloud-bigquery/samples/tests/test_routine_samples.py index bcb3249d26ef..81d33a0cf5df 100644 --- a/packages/google-cloud-bigquery/samples/tests/test_create_routine_ddl.py +++ b/packages/google-cloud-bigquery/samples/tests/test_routine_samples.py @@ -12,19 +12,27 @@ # See the License for the specific language governing permissions and # limitations under the License. - from google.cloud import bigquery from google.cloud import bigquery_v2 -from .. import create_routine_ddl + +def test_create_routine(capsys, client, random_routine_id): + from .. import create_routine + + create_routine.create_routine(client, random_routine_id) + out, err = capsys.readouterr() + assert "Created routine {}".format(random_routine_id) in out def test_create_routine_ddl(capsys, client, random_routine_id): + from .. import create_routine_ddl create_routine_ddl.create_routine_ddl(client, random_routine_id) routine = client.get_routine(random_routine_id) out, err = capsys.readouterr() + assert "Created routine {}".format(random_routine_id) in out + return routine assert routine.type_ == "SCALAR_FUNCTION" assert routine.language == "SQL" expected_arguments = [ @@ -55,3 +63,39 @@ def test_create_routine_ddl(capsys, client, random_routine_id): ) ] assert routine.arguments == expected_arguments + + +def test_list_routines(capsys, client, dataset_id, routine_id): + from .. import list_routines + + list_routines.list_routines(client, dataset_id) + out, err = capsys.readouterr() + assert "Routines contained in dataset {}:".format(dataset_id) in out + assert routine_id in out + + +def test_get_routine(capsys, client, routine_id): + from .. import get_routine + + get_routine.get_routine(client, routine_id) + out, err = capsys.readouterr() + assert "Routine '{}':".format(routine_id) in out + assert "Type: 'SCALAR_FUNCTION'" in out + assert "Language: 'SQL'" in out + assert "Name: 'x'" in out + assert "Type: 'type_kind: INT64\n'" in out + + +def test_delete_routine(capsys, client, routine_id): + from .. import delete_routine + + delete_routine.delete_routine(client, routine_id) + out, err = capsys.readouterr() + assert "Deleted routine {}.".format(routine_id) in out + + +def test_update_routine(client, routine_id): + from .. import update_routine + + routine = update_routine.update_routine(client, routine_id) + assert routine.body == "x * 4" diff --git a/packages/google-cloud-bigquery/samples/tests/test_table_exists.py b/packages/google-cloud-bigquery/samples/tests/test_table_exists.py index 232d77fbcb60..ae4fc65f847c 100644 --- a/packages/google-cloud-bigquery/samples/tests/test_table_exists.py +++ b/packages/google-cloud-bigquery/samples/tests/test_table_exists.py @@ -12,7 +12,6 @@ # See the License for the specific language governing permissions and # limitations under the License. - from google.cloud import bigquery from .. import table_exists diff --git a/packages/google-cloud-bigquery/samples/tests/test_table_insert_rows.py b/packages/google-cloud-bigquery/samples/tests/test_table_insert_rows.py index 95d119dbdc93..9c5fd5768cfb 100644 --- a/packages/google-cloud-bigquery/samples/tests/test_table_insert_rows.py +++ b/packages/google-cloud-bigquery/samples/tests/test_table_insert_rows.py @@ -12,7 +12,6 @@ # See the License for the specific language governing permissions and # limitations under the License. - from google.cloud import bigquery from .. import table_insert_rows diff --git a/packages/google-cloud-bigquery/samples/tests/test_table_insert_rows_explicit_none_insert_ids.py b/packages/google-cloud-bigquery/samples/tests/test_table_insert_rows_explicit_none_insert_ids.py index 6a59609baacf..a2a4febd7f75 100644 --- a/packages/google-cloud-bigquery/samples/tests/test_table_insert_rows_explicit_none_insert_ids.py +++ b/packages/google-cloud-bigquery/samples/tests/test_table_insert_rows_explicit_none_insert_ids.py @@ -12,7 +12,6 @@ # See the License for the specific language governing permissions and # limitations under the License. - from google.cloud import bigquery from .. import table_insert_rows_explicit_none_insert_ids as mut diff --git a/packages/google-cloud-bigquery/samples/tests/test_update_dataset_access.py b/packages/google-cloud-bigquery/samples/tests/test_update_dataset_access.py index 679b700731e3..ae33dbfe4a4c 100644 --- a/packages/google-cloud-bigquery/samples/tests/test_update_dataset_access.py +++ b/packages/google-cloud-bigquery/samples/tests/test_update_dataset_access.py @@ -12,7 +12,6 @@ # See the License for the specific language governing permissions and # limitations under the License. - from .. import update_dataset_access diff --git a/packages/google-cloud-bigquery/samples/tests/test_update_dataset_default_table_expiration.py b/packages/google-cloud-bigquery/samples/tests/test_update_dataset_default_table_expiration.py index a97de11a2f1a..46e9654209ed 100644 --- a/packages/google-cloud-bigquery/samples/tests/test_update_dataset_default_table_expiration.py +++ b/packages/google-cloud-bigquery/samples/tests/test_update_dataset_default_table_expiration.py @@ -12,7 +12,6 @@ # See the License for the specific language governing permissions and # limitations under the License. - from .. import update_dataset_default_table_expiration diff --git a/packages/google-cloud-bigquery/samples/tests/test_update_dataset_description.py b/packages/google-cloud-bigquery/samples/tests/test_update_dataset_description.py index 63826077b976..c6f8889f50da 100644 --- a/packages/google-cloud-bigquery/samples/tests/test_update_dataset_description.py +++ b/packages/google-cloud-bigquery/samples/tests/test_update_dataset_description.py @@ -12,7 +12,6 @@ # See the License for the specific language governing permissions and # limitations under the License. - from .. import update_dataset_description diff --git a/packages/google-cloud-bigquery/samples/tests/test_update_routine.py b/packages/google-cloud-bigquery/samples/tests/test_update_routine.py deleted file mode 100644 index 8adfab32e032..000000000000 --- a/packages/google-cloud-bigquery/samples/tests/test_update_routine.py +++ /dev/null @@ -1,22 +0,0 @@ -# Copyright 2019 Google LLC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - - -from .. import update_routine - - -def test_update_routine(client, routine_id): - - routine = update_routine.update_routine(client, routine_id) - assert routine.body == "x * 4" diff --git a/packages/google-cloud-bigquery/samples/undelete_table.py b/packages/google-cloud-bigquery/samples/undelete_table.py index 2d544cf5aa8c..9db9712d2a74 100644 --- a/packages/google-cloud-bigquery/samples/undelete_table.py +++ b/packages/google-cloud-bigquery/samples/undelete_table.py @@ -46,7 +46,7 @@ def undelete_table(client, table_id, recovered_table_id): # [END_EXCLUDE] # "Accidentally" delete the table. - client.delete_table(table_id) # API request + client.delete_table(table_id) # Make an API request. # Construct the restore-from table ID using a snapshot decorator. snapshot_table_id = "{}@{}".format(table_id, snapshot_epoch) @@ -55,11 +55,11 @@ def undelete_table(client, table_id, recovered_table_id): job = client.copy_table( snapshot_table_id, recovered_table_id, - # Location must match that of the source and destination tables. + # Must match the source and destination tables location. location="US", - ) # API request + ) # Make an API request. - job.result() # Wait for job to complete. + job.result() # Wait for the job to complete. print( "Copied data from deleted table {} to {}".format(table_id, recovered_table_id) From c67f4f940d8910eea43cd333dd331bb9710c7fd4 Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Mon, 2 Dec 2019 15:25:24 -0500 Subject: [PATCH 0714/2016] chore(bigquery): run unit tests with Python 3.8 (#9880) * chore(bigquery): run unit tests with Python 3.8 Coverage is a bit low because of the missing pyarrow dependency. * doc: add comments about changes for Python 3.8 testing * unit test with fastparquet --- packages/google-cloud-bigquery/noxfile.py | 35 ++++++++---- .../tests/unit/test__pandas_helpers.py | 56 ++++++++++--------- .../tests/unit/test_client.py | 6 ++ .../tests/unit/test_magics.py | 14 +++++ .../tests/unit/test_schema.py | 2 - 5 files changed, 74 insertions(+), 39 deletions(-) diff --git a/packages/google-cloud-bigquery/noxfile.py b/packages/google-cloud-bigquery/noxfile.py index 87809b74a569..8c041fa6a178 100644 --- a/packages/google-cloud-bigquery/noxfile.py +++ b/packages/google-cloud-bigquery/noxfile.py @@ -39,7 +39,22 @@ def default(session): session.install("-e", local_dep) session.install("-e", os.path.join("..", "test_utils")) - dev_install = ".[all]" + + coverage_fail_under = "--cov-fail-under=97" + + # fastparquet is not included in .[all] because, in general, it's redundant + # with pyarrow. We still want to run some unit tests with fastparquet + # serialization, though. + dev_install = ".[all,fastparquet]" + + # There is no pyarrow or fastparquet wheel for Python 3.8. + if session.python == "3.8": + # Since many tests are skipped due to missing dependencies, test + # coverage is much lower in Python 3.8. Remove once we can test with + # pyarrow. + coverage_fail_under = "--cov-fail-under=92" + dev_install = ".[pandas,tqdm]" + session.install("-e", dev_install) # IPython does not support Python 2 after version 5.x @@ -57,19 +72,19 @@ def default(session): "--cov-append", "--cov-config=.coveragerc", "--cov-report=", - "--cov-fail-under=97", + coverage_fail_under, os.path.join("tests", "unit"), *session.posargs ) -@nox.session(python=["2.7", "3.5", "3.6", "3.7"]) +@nox.session(python=["2.7", "3.5", "3.6", "3.7", "3.8"]) def unit(session): """Run the unit test suite.""" default(session) -@nox.session(python=["2.7", "3.6"]) +@nox.session(python=["2.7", "3.7"]) def system(session): """Run the system test suite.""" @@ -100,7 +115,7 @@ def system(session): ) -@nox.session(python=["2.7", "3.6"]) +@nox.session(python=["2.7", "3.7"]) def snippets(session): """Run the snippets test suite.""" @@ -121,7 +136,7 @@ def snippets(session): session.run("py.test", "samples", *session.posargs) -@nox.session(python="3.6") +@nox.session(python="3.7") def cover(session): """Run the final coverage report. @@ -133,7 +148,7 @@ def cover(session): session.run("coverage", "erase") -@nox.session(python="3.6") +@nox.session(python="3.7") def lint(session): """Run linters. @@ -152,7 +167,7 @@ def lint(session): session.run("black", "--check", *BLACK_PATHS) -@nox.session(python="3.6") +@nox.session(python="3.7") def lint_setup_py(session): """Verify that setup.py is valid (including RST check).""" @@ -160,7 +175,7 @@ def lint_setup_py(session): session.run("python", "setup.py", "check", "--restructuredtext", "--strict") -@nox.session(python="3.6") +@nox.session(python="3.7") def blacken(session): """Run black. Format code to uniform standard. @@ -169,7 +184,7 @@ def blacken(session): session.run("black", *BLACK_PATHS) -@nox.session(python="3.6") +@nox.session(python="3.7") def docs(session): """Build the docs.""" diff --git a/packages/google-cloud-bigquery/tests/unit/test__pandas_helpers.py b/packages/google-cloud-bigquery/tests/unit/test__pandas_helpers.py index a6ccec2e094f..b2d74d54e120 100644 --- a/packages/google-cloud-bigquery/tests/unit/test__pandas_helpers.py +++ b/packages/google-cloud-bigquery/tests/unit/test__pandas_helpers.py @@ -31,7 +31,9 @@ import pyarrow import pyarrow.types except ImportError: # pragma: NO COVER - pyarrow = None + # Mock out pyarrow when missing, because methods from pyarrow.types are + # used in test parameterization. + pyarrow = mock.Mock() import pytest import pytz @@ -85,7 +87,7 @@ def all_(*functions): return functools.partial(do_all, functions) -@pytest.mark.skipif(pyarrow is None, reason="Requires `pyarrow`") +@pytest.mark.skipif(isinstance(pyarrow, mock.Mock), reason="Requires `pyarrow`") def test_is_datetime(): assert is_datetime(pyarrow.timestamp("us", tz=None)) assert not is_datetime(pyarrow.timestamp("ms", tz=None)) @@ -249,7 +251,7 @@ def test_all_(): ("UNKNOWN_TYPE", "REPEATED", is_none), ], ) -@pytest.mark.skipif(pyarrow is None, reason="Requires `pyarrow`") +@pytest.mark.skipif(isinstance(pyarrow, mock.Mock), reason="Requires `pyarrow`") def test_bq_to_arrow_data_type(module_under_test, bq_type, bq_mode, is_correct_type): field = schema.SchemaField("ignored_name", bq_type, mode=bq_mode) actual = module_under_test.bq_to_arrow_data_type(field) @@ -257,7 +259,7 @@ def test_bq_to_arrow_data_type(module_under_test, bq_type, bq_mode, is_correct_t @pytest.mark.parametrize("bq_type", ["RECORD", "record", "STRUCT", "struct"]) -@pytest.mark.skipif(pyarrow is None, reason="Requires `pyarrow`") +@pytest.mark.skipif(isinstance(pyarrow, mock.Mock), reason="Requires `pyarrow`") def test_bq_to_arrow_data_type_w_struct(module_under_test, bq_type): fields = ( schema.SchemaField("field01", "STRING"), @@ -301,7 +303,7 @@ def test_bq_to_arrow_data_type_w_struct(module_under_test, bq_type): @pytest.mark.parametrize("bq_type", ["RECORD", "record", "STRUCT", "struct"]) -@pytest.mark.skipif(pyarrow is None, reason="Requires `pyarrow`") +@pytest.mark.skipif(isinstance(pyarrow, mock.Mock), reason="Requires `pyarrow`") def test_bq_to_arrow_data_type_w_array_struct(module_under_test, bq_type): fields = ( schema.SchemaField("field01", "STRING"), @@ -345,7 +347,7 @@ def test_bq_to_arrow_data_type_w_array_struct(module_under_test, bq_type): assert actual.value_type.equals(expected_value_type) -@pytest.mark.skipif(pyarrow is None, reason="Requires `pyarrow`") +@pytest.mark.skipif(isinstance(pyarrow, mock.Mock), reason="Requires `pyarrow`") def test_bq_to_arrow_data_type_w_struct_unknown_subfield(module_under_test): fields = ( schema.SchemaField("field1", "STRING"), @@ -442,7 +444,7 @@ def test_bq_to_arrow_data_type_w_struct_unknown_subfield(module_under_test): ], ) @pytest.mark.skipif(pandas is None, reason="Requires `pandas`") -@pytest.mark.skipif(pyarrow is None, reason="Requires `pyarrow`") +@pytest.mark.skipif(isinstance(pyarrow, mock.Mock), reason="Requires `pyarrow`") def test_bq_to_arrow_array_w_nullable_scalars(module_under_test, bq_type, rows): series = pandas.Series(rows, dtype="object") bq_field = schema.SchemaField("field_name", bq_type) @@ -452,7 +454,7 @@ def test_bq_to_arrow_array_w_nullable_scalars(module_under_test, bq_type, rows): @pytest.mark.skipif(pandas is None, reason="Requires `pandas`") -@pytest.mark.skipif(pyarrow is None, reason="Requires `pyarrow`") +@pytest.mark.skipif(isinstance(pyarrow, mock.Mock), reason="Requires `pyarrow`") def test_bq_to_arrow_array_w_arrays(module_under_test): rows = [[1, 2, 3], [], [4, 5, 6]] series = pandas.Series(rows, dtype="object") @@ -464,7 +466,7 @@ def test_bq_to_arrow_array_w_arrays(module_under_test): @pytest.mark.parametrize("bq_type", ["RECORD", "record", "STRUCT", "struct"]) @pytest.mark.skipif(pandas is None, reason="Requires `pandas`") -@pytest.mark.skipif(pyarrow is None, reason="Requires `pyarrow`") +@pytest.mark.skipif(isinstance(pyarrow, mock.Mock), reason="Requires `pyarrow`") def test_bq_to_arrow_array_w_structs(module_under_test, bq_type): rows = [ {"int_col": 123, "string_col": "abc"}, @@ -486,7 +488,7 @@ def test_bq_to_arrow_array_w_structs(module_under_test, bq_type): @pytest.mark.skipif(pandas is None, reason="Requires `pandas`") -@pytest.mark.skipif(pyarrow is None, reason="Requires `pyarrow`") +@pytest.mark.skipif(isinstance(pyarrow, mock.Mock), reason="Requires `pyarrow`") def test_bq_to_arrow_array_w_special_floats(module_under_test): bq_field = schema.SchemaField("field_name", "FLOAT64") rows = [float("-inf"), float("nan"), float("inf"), None] @@ -503,7 +505,7 @@ def test_bq_to_arrow_array_w_special_floats(module_under_test): assert roundtrip[3] is None -@pytest.mark.skipif(pyarrow is None, reason="Requires `pyarrow`") +@pytest.mark.skipif(isinstance(pyarrow, mock.Mock), reason="Requires `pyarrow`") def test_bq_to_arrow_schema_w_unknown_type(module_under_test): fields = ( schema.SchemaField("field1", "STRING"), @@ -729,7 +731,7 @@ def test_dataframe_to_bq_schema_dict_sequence(module_under_test): @pytest.mark.skipif(pandas is None, reason="Requires `pandas`") -@pytest.mark.skipif(pyarrow is None, reason="Requires `pyarrow`") +@pytest.mark.skipif(isinstance(pyarrow, mock.Mock), reason="Requires `pyarrow`") def test_dataframe_to_arrow_with_multiindex(module_under_test): bq_schema = ( schema.SchemaField("str_index", "STRING"), @@ -796,7 +798,7 @@ def test_dataframe_to_arrow_with_multiindex(module_under_test): @pytest.mark.skipif(pandas is None, reason="Requires `pandas`") -@pytest.mark.skipif(pyarrow is None, reason="Requires `pyarrow`") +@pytest.mark.skipif(isinstance(pyarrow, mock.Mock), reason="Requires `pyarrow`") def test_dataframe_to_arrow_with_required_fields(module_under_test): bq_schema = ( schema.SchemaField("field01", "STRING", mode="REQUIRED"), @@ -851,7 +853,7 @@ def test_dataframe_to_arrow_with_required_fields(module_under_test): @pytest.mark.skipif(pandas is None, reason="Requires `pandas`") -@pytest.mark.skipif(pyarrow is None, reason="Requires `pyarrow`") +@pytest.mark.skipif(isinstance(pyarrow, mock.Mock), reason="Requires `pyarrow`") def test_dataframe_to_arrow_with_unknown_type(module_under_test): bq_schema = ( schema.SchemaField("field00", "UNKNOWN_TYPE"), @@ -884,7 +886,7 @@ def test_dataframe_to_arrow_with_unknown_type(module_under_test): @pytest.mark.skipif(pandas is None, reason="Requires `pandas`") -@pytest.mark.skipif(pyarrow is None, reason="Requires `pyarrow`") +@pytest.mark.skipif(isinstance(pyarrow, mock.Mock), reason="Requires `pyarrow`") def test_dataframe_to_arrow_dict_sequence_schema(module_under_test): dict_schema = [ {"name": "field01", "type": "STRING", "mode": "REQUIRED"}, @@ -914,7 +916,7 @@ def test_dataframe_to_parquet_without_pyarrow(module_under_test, monkeypatch): @pytest.mark.skipif(pandas is None, reason="Requires `pandas`") -@pytest.mark.skipif(pyarrow is None, reason="Requires `pyarrow`") +@pytest.mark.skipif(isinstance(pyarrow, mock.Mock), reason="Requires `pyarrow`") def test_dataframe_to_parquet_w_extra_fields(module_under_test, monkeypatch): with pytest.raises(ValueError) as exc_context: module_under_test.dataframe_to_parquet( @@ -926,7 +928,7 @@ def test_dataframe_to_parquet_w_extra_fields(module_under_test, monkeypatch): @pytest.mark.skipif(pandas is None, reason="Requires `pandas`") -@pytest.mark.skipif(pyarrow is None, reason="Requires `pyarrow`") +@pytest.mark.skipif(isinstance(pyarrow, mock.Mock), reason="Requires `pyarrow`") def test_dataframe_to_parquet_w_missing_fields(module_under_test, monkeypatch): with pytest.raises(ValueError) as exc_context: module_under_test.dataframe_to_parquet( @@ -938,7 +940,7 @@ def test_dataframe_to_parquet_w_missing_fields(module_under_test, monkeypatch): @pytest.mark.skipif(pandas is None, reason="Requires `pandas`") -@pytest.mark.skipif(pyarrow is None, reason="Requires `pyarrow`") +@pytest.mark.skipif(isinstance(pyarrow, mock.Mock), reason="Requires `pyarrow`") def test_dataframe_to_parquet_compression_method(module_under_test): bq_schema = (schema.SchemaField("field00", "STRING"),) dataframe = pandas.DataFrame({"field00": ["foo", "bar"]}) @@ -985,7 +987,7 @@ def test_dataframe_to_bq_schema_fallback_needed_wo_pyarrow(module_under_test): @pytest.mark.skipif(pandas is None, reason="Requires `pandas`") -@pytest.mark.skipif(pyarrow is None, reason="Requires `pyarrow`") +@pytest.mark.skipif(isinstance(pyarrow, mock.Mock), reason="Requires `pyarrow`") def test_dataframe_to_bq_schema_fallback_needed_w_pyarrow(module_under_test): dataframe = pandas.DataFrame( data=[ @@ -1015,7 +1017,7 @@ def test_dataframe_to_bq_schema_fallback_needed_w_pyarrow(module_under_test): @pytest.mark.skipif(pandas is None, reason="Requires `pandas`") -@pytest.mark.skipif(pyarrow is None, reason="Requires `pyarrow`") +@pytest.mark.skipif(isinstance(pyarrow, mock.Mock), reason="Requires `pyarrow`") def test_dataframe_to_bq_schema_pyarrow_fallback_fails(module_under_test): dataframe = pandas.DataFrame( data=[ @@ -1040,7 +1042,7 @@ def test_dataframe_to_bq_schema_pyarrow_fallback_fails(module_under_test): @pytest.mark.skipif(pandas is None, reason="Requires `pandas`") -@pytest.mark.skipif(pyarrow is None, reason="Requires `pyarrow`") +@pytest.mark.skipif(isinstance(pyarrow, mock.Mock), reason="Requires `pyarrow`") def test_augment_schema_type_detection_succeeds(module_under_test): dataframe = pandas.DataFrame( data=[ @@ -1101,7 +1103,7 @@ def test_augment_schema_type_detection_succeeds(module_under_test): @pytest.mark.skipif(pandas is None, reason="Requires `pandas`") -@pytest.mark.skipif(pyarrow is None, reason="Requires `pyarrow`") +@pytest.mark.skipif(isinstance(pyarrow, mock.Mock), reason="Requires `pyarrow`") def test_augment_schema_type_detection_fails(module_under_test): dataframe = pandas.DataFrame( data=[ @@ -1137,7 +1139,7 @@ def test_augment_schema_type_detection_fails(module_under_test): assert "struct_field" in warning_msg and "struct_field_2" in warning_msg -@pytest.mark.skipif(pyarrow is None, reason="Requires `pyarrow`") +@pytest.mark.skipif(isinstance(pyarrow, mock.Mock), reason="Requires `pyarrow`") def test_dataframe_to_parquet_dict_sequence_schema(module_under_test): dict_schema = [ {"name": "field01", "type": "STRING", "mode": "REQUIRED"}, @@ -1166,7 +1168,7 @@ def test_dataframe_to_parquet_dict_sequence_schema(module_under_test): assert schema_arg == expected_schema_arg -@pytest.mark.skipif(pyarrow is None, reason="Requires `pyarrow`") +@pytest.mark.skipif(isinstance(pyarrow, mock.Mock), reason="Requires `pyarrow`") def test_download_arrow_tabledata_list_unknown_field_type(module_under_test): fake_page = api_core.page_iterator.Page( parent=mock.Mock(), @@ -1202,7 +1204,7 @@ def test_download_arrow_tabledata_list_unknown_field_type(module_under_test): assert list(col) == [2.2, 22.22, 222.222] -@pytest.mark.skipif(pyarrow is None, reason="Requires `pyarrow`") +@pytest.mark.skipif(isinstance(pyarrow, mock.Mock), reason="Requires `pyarrow`") def test_download_arrow_tabledata_list_known_field_type(module_under_test): fake_page = api_core.page_iterator.Page( parent=mock.Mock(), @@ -1237,7 +1239,7 @@ def test_download_arrow_tabledata_list_known_field_type(module_under_test): assert list(col) == ["2.2", "22.22", "222.222"] -@pytest.mark.skipif(pyarrow is None, reason="Requires `pyarrow`") +@pytest.mark.skipif(isinstance(pyarrow, mock.Mock), reason="Requires `pyarrow`") def test_download_arrow_tabledata_list_dict_sequence_schema(module_under_test): fake_page = api_core.page_iterator.Page( parent=mock.Mock(), @@ -1265,7 +1267,7 @@ def test_download_arrow_tabledata_list_dict_sequence_schema(module_under_test): @pytest.mark.skipif(pandas is None, reason="Requires `pandas`") -@pytest.mark.skipif(pyarrow is None, reason="Requires `pyarrow`") +@pytest.mark.skipif(isinstance(pyarrow, mock.Mock), reason="Requires `pyarrow`") def test_download_dataframe_tabledata_list_dict_sequence_schema(module_under_test): fake_page = api_core.page_iterator.Page( parent=mock.Mock(), diff --git a/packages/google-cloud-bigquery/tests/unit/test_client.py b/packages/google-cloud-bigquery/tests/unit/test_client.py index ecde69d2cf97..8a82b2e19aa8 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_client.py +++ b/packages/google-cloud-bigquery/tests/unit/test_client.py @@ -31,6 +31,10 @@ import pytest import pytz +try: + import fastparquet +except (ImportError, AttributeError): # pragma: NO COVER + fastparquet = None try: import pandas except (ImportError, AttributeError): # pragma: NO COVER @@ -6127,6 +6131,7 @@ def test_load_table_from_dataframe_unknown_table(self): ) @unittest.skipIf(pandas is None, "Requires `pandas`") + @unittest.skipIf(fastparquet is None, "Requires `fastparquet`") def test_load_table_from_dataframe_no_schema_warning_wo_pyarrow(self): client = self._make_client() @@ -6317,6 +6322,7 @@ def test_load_table_from_dataframe_w_partial_schema_extra_types(self): assert "unknown_col" in message @unittest.skipIf(pandas is None, "Requires `pandas`") + @unittest.skipIf(fastparquet is None, "Requires `fastparquet`") def test_load_table_from_dataframe_w_partial_schema_missing_types(self): from google.cloud.bigquery.client import _DEFAULT_NUM_RETRIES from google.cloud.bigquery import job diff --git a/packages/google-cloud-bigquery/tests/unit/test_magics.py b/packages/google-cloud-bigquery/tests/unit/test_magics.py index 8e768c1b7d23..3f66b2c4b765 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_magics.py +++ b/packages/google-cloud-bigquery/tests/unit/test_magics.py @@ -153,6 +153,7 @@ def test_context_credentials_and_project_can_be_set_explicitly(): @pytest.mark.usefixtures("ipython_interactive") +@pytest.mark.skipif(pandas is None, reason="Requires `pandas`") def test_context_connection_can_be_overriden(): ip = IPython.get_ipython() ip.extension_manager.load_extension("google.cloud.bigquery") @@ -196,6 +197,7 @@ def test_context_connection_can_be_overriden(): @pytest.mark.usefixtures("ipython_interactive") +@pytest.mark.skipif(pandas is None, reason="Requires `pandas`") def test_context_no_connection(): ip = IPython.get_ipython() ip.extension_manager.load_extension("google.cloud.bigquery") @@ -326,6 +328,10 @@ def test__make_bqstorage_client_true_raises_import_error(missing_bq_storage): assert "pyarrow" in error_msg +@pytest.mark.skipif( + bigquery_storage_v1beta1 is None, reason="Requires `google-cloud-bigquery-storage`" +) +@pytest.mark.skipif(pandas is None, reason="Requires `pandas`") def test__make_bqstorage_client_true_missing_gapic(missing_grpcio_lib): credentials_mock = mock.create_autospec( google.auth.credentials.Credentials, instance=True @@ -795,6 +801,7 @@ def test_bigquery_magic_w_table_id_invalid(): @pytest.mark.usefixtures("ipython_interactive") +@pytest.mark.skipif(pandas is None, reason="Requires `pandas`") def test_bigquery_magic_w_table_id_and_destination_var(): ip = IPython.get_ipython() ip.extension_manager.load_extension("google.cloud.bigquery") @@ -831,6 +838,10 @@ def test_bigquery_magic_w_table_id_and_destination_var(): @pytest.mark.usefixtures("ipython_interactive") +@pytest.mark.skipif( + bigquery_storage_v1beta1 is None, reason="Requires `google-cloud-bigquery-storage`" +) +@pytest.mark.skipif(pandas is None, reason="Requires `pandas`") def test_bigquery_magic_w_table_id_and_bqstorage_client(): ip = IPython.get_ipython() ip.extension_manager.load_extension("google.cloud.bigquery") @@ -1027,6 +1038,7 @@ def test_bigquery_magic_w_maximum_bytes_billed_invalid(): "param_value,expected", [("987654321", "987654321"), ("None", "0")] ) @pytest.mark.usefixtures("ipython_interactive") +@pytest.mark.skipif(pandas is None, reason="Requires `pandas`") def test_bigquery_magic_w_maximum_bytes_billed_overrides_context(param_value, expected): ip = IPython.get_ipython() ip.extension_manager.load_extension("google.cloud.bigquery") @@ -1065,6 +1077,7 @@ def test_bigquery_magic_w_maximum_bytes_billed_overrides_context(param_value, ex @pytest.mark.usefixtures("ipython_interactive") +@pytest.mark.skipif(pandas is None, reason="Requires `pandas`") def test_bigquery_magic_w_maximum_bytes_billed_w_context_inplace(): ip = IPython.get_ipython() ip.extension_manager.load_extension("google.cloud.bigquery") @@ -1100,6 +1113,7 @@ def test_bigquery_magic_w_maximum_bytes_billed_w_context_inplace(): @pytest.mark.usefixtures("ipython_interactive") +@pytest.mark.skipif(pandas is None, reason="Requires `pandas`") def test_bigquery_magic_w_maximum_bytes_billed_w_context_setter(): ip = IPython.get_ipython() ip.extension_manager.load_extension("google.cloud.bigquery") diff --git a/packages/google-cloud-bigquery/tests/unit/test_schema.py b/packages/google-cloud-bigquery/tests/unit/test_schema.py index fc8a41c68c46..e1bdd7b2fb73 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_schema.py +++ b/packages/google-cloud-bigquery/tests/unit/test_schema.py @@ -177,7 +177,6 @@ def test_to_standard_sql_simple_type(self): standard_field = field.to_standard_sql() self.assertEqual(standard_field.name, "some_field") self.assertEqual(standard_field.type.type_kind, standard_type) - self.assertFalse(standard_field.type.HasField("sub_type")) def test_to_standard_sql_struct_type(self): from google.cloud.bigquery_v2 import types @@ -316,7 +315,6 @@ def test_to_standard_sql_unknown_type(self): self.assertEqual(standard_field.name, "weird_field") self.assertEqual(standard_field.type.type_kind, sql_type.TYPE_KIND_UNSPECIFIED) - self.assertFalse(standard_field.type.HasField("sub_type")) def test___eq___wrong_type(self): field = self._make_one("test", "STRING") From a526ddad12cc7ec837804a7ac3fd6876ed5d18c5 Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Wed, 11 Dec 2019 16:35:00 -0800 Subject: [PATCH 0715/2016] chore(bigquery): release 1.23.0 (#9961) * chore(bigquery): release 1.23.0 * doc: sentence-case for changelog * fix: add timeouts to unit tests --- packages/google-cloud-bigquery/CHANGELOG.md | 20 +++++++++++++++++++ packages/google-cloud-bigquery/setup.py | 2 +- .../tests/unit/test__http.py | 12 +++++++++-- .../tests/unit/test_client.py | 1 + 4 files changed, 32 insertions(+), 3 deletions(-) diff --git a/packages/google-cloud-bigquery/CHANGELOG.md b/packages/google-cloud-bigquery/CHANGELOG.md index 7b76ae652200..3f51c5e88583 100644 --- a/packages/google-cloud-bigquery/CHANGELOG.md +++ b/packages/google-cloud-bigquery/CHANGELOG.md @@ -4,6 +4,26 @@ [1]: https://pypi.org/project/google-cloud-bigquery/#history +## 1.23.0 + +12-11-2019 13:31 PST + +### New Features + +- Add `close()` method to client for releasing open sockets. ([#9894](https://github.com/googleapis/google-cloud-python/pull/9894)) +- Add support of `use_avro_logical_types` for extract jobs. ([#9642](https://github.com/googleapis/google-cloud-python/pull/9642)) +- Add support for hive partitioning options configuration. ([#9626](https://github.com/googleapis/google-cloud-python/pull/9626)) +- Add description for routine entities. ([#9785](https://github.com/googleapis/google-cloud-python/pull/9785)) + +### Documentation + +- Update code samples to use strings for table and dataset IDs. ([#9495](https://github.com/googleapis/google-cloud-python/pull/9495)) + +### Internal / Testing Changes + +- Run unit tests with Python 3.8. ([#9880](https://github.com/googleapis/google-cloud-python/pull/9880)) +- Import `Mapping` from `collections.abc` not from `collections`. ([#9826](https://github.com/googleapis/google-cloud-python/pull/9826)) + ## 1.22.0 11-13-2019 12:23 PST diff --git a/packages/google-cloud-bigquery/setup.py b/packages/google-cloud-bigquery/setup.py index 45c99e7d9bfc..1b17fa88f368 100644 --- a/packages/google-cloud-bigquery/setup.py +++ b/packages/google-cloud-bigquery/setup.py @@ -22,7 +22,7 @@ name = "google-cloud-bigquery" description = "Google BigQuery API client library" -version = "1.22.0" +version = "1.23.0" # Should be one of: # 'Development Status :: 3 - Alpha' # 'Development Status :: 4 - Beta' diff --git a/packages/google-cloud-bigquery/tests/unit/test__http.py b/packages/google-cloud-bigquery/tests/unit/test__http.py index 629e8031a52f..1d8313776a0e 100644 --- a/packages/google-cloud-bigquery/tests/unit/test__http.py +++ b/packages/google-cloud-bigquery/tests/unit/test__http.py @@ -75,7 +75,11 @@ def test_user_agent(self): } expected_uri = conn.build_api_url("/rainbow") http.request.assert_called_once_with( - data=req_data, headers=expected_headers, method="GET", url=expected_uri + data=req_data, + headers=expected_headers, + method="GET", + url=expected_uri, + timeout=None, ) self.assertIn("my-application/1.2.3", conn.user_agent) @@ -104,5 +108,9 @@ def test_extra_headers_replace(self): } expected_uri = conn.build_api_url("/rainbow") http.request.assert_called_once_with( - data=req_data, headers=expected_headers, method="GET", url=expected_uri + data=req_data, + headers=expected_headers, + method="GET", + url=expected_uri, + timeout=None, ) diff --git a/packages/google-cloud-bigquery/tests/unit/test_client.py b/packages/google-cloud-bigquery/tests/unit/test_client.py index 8a82b2e19aa8..e661c86970db 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_client.py +++ b/packages/google-cloud-bigquery/tests/unit/test_client.py @@ -1552,6 +1552,7 @@ def test_get_table_sets_user_agent(self): "User-Agent": expected_user_agent, }, data=mock.ANY, + timeout=None, ) self.assertIn("my-application/1.2.3", expected_user_agent) From 318e7b68221d282c7e4b3613a8a6b21b575766a7 Mon Sep 17 00:00:00 2001 From: mbarkhau Date: Thu, 12 Dec 2019 16:53:40 +0000 Subject: [PATCH 0716/2016] fix(bigquery): fix typo in import error message (pandas -> pyarrow) (#9955) --- packages/google-cloud-bigquery/google/cloud/bigquery/table.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py index 2f2ee50cc89e..77cb67bfd0fe 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py @@ -70,7 +70,7 @@ ) _NO_PYARROW_ERROR = ( "The pyarrow library is not installed, please install " - "pandas to use the to_arrow() function." + "pyarrow to use the to_arrow() function." ) _NO_TQDM_ERROR = ( "A progress bar was requested, but there was an error loading the tqdm " From 8dcca217b5d2f8adaba26b7b68f59208b7ac58e3 Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Thu, 12 Dec 2019 12:50:20 -0800 Subject: [PATCH 0717/2016] docs(bigquery): add sample to read from query destination table (#9964) This sample addresses internal issue 134774673, in which we recieved feedback that it's not clear how to paginate through the results of a query. This sample shows that all completed queries have a destination table, which can be read from with tabledata.list. Note: The client library avoids the extra call to `get_table` if query results are read directly from the QueryJob, but it's confusing to show that we're able to get the schema from a private API call to getQueryResults. --- .../samples/query_pagination.py | 53 +++++++++++++++++++ .../samples/tests/test_query_pagination.py | 23 ++++++++ 2 files changed, 76 insertions(+) create mode 100644 packages/google-cloud-bigquery/samples/query_pagination.py create mode 100644 packages/google-cloud-bigquery/samples/tests/test_query_pagination.py diff --git a/packages/google-cloud-bigquery/samples/query_pagination.py b/packages/google-cloud-bigquery/samples/query_pagination.py new file mode 100644 index 000000000000..b5d1999bfb3c --- /dev/null +++ b/packages/google-cloud-bigquery/samples/query_pagination.py @@ -0,0 +1,53 @@ +# Copyright 2019 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +def query_pagination(client): + + # [START bigquery_query_pagination] + # TODO(developer): Import the client library. + # from google.cloud import bigquery + + # TODO(developer): Construct a BigQuery client object. + # client = bigquery.Client() + + query = """ + SELECT name, SUM(number) as total_people + FROM `bigquery-public-data.usa_names.usa_1910_2013` + GROUP BY name + ORDER BY total_people DESC + """ + query_job = client.query(query) # Make an API request. + query_job.result() # Wait for the query to complete. + + # Get the destination table for the query results. + # + # All queries write to a destination table. If a destination table is not + # specified, the BigQuery populates it with a reference to a temporary + # anonymous table after the query completes. + destination = query_job.destination + + # Get the schema (and other properties) for the destination table. + # + # A schema is useful for converting from BigQuery types to Python types. + destination = client.get_table(destination) + + # Download rows. + # + # The client library automatically handles pagination. + print("The query data:") + rows = client.list_rows(destination, max_results=20) + for row in rows: + print("name={}, count={}".format(row["name"], row["total_people"])) + # [END bigquery_query_pagination] diff --git a/packages/google-cloud-bigquery/samples/tests/test_query_pagination.py b/packages/google-cloud-bigquery/samples/tests/test_query_pagination.py new file mode 100644 index 000000000000..93352fa0bf1d --- /dev/null +++ b/packages/google-cloud-bigquery/samples/tests/test_query_pagination.py @@ -0,0 +1,23 @@ +# Copyright 2019 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from .. import query_pagination + + +def test_query_pagination(capsys, client): + + query_pagination.query_pagination(client) + out, _ = capsys.readouterr() + assert "The query data:" in out + assert "name=James, count=4942431" in out From d67a59b0d3945214222d05652e64eb2f2ed6e6a6 Mon Sep 17 00:00:00 2001 From: HemangChothani <50404902+HemangChothani@users.noreply.github.com> Date: Sat, 14 Dec 2019 04:05:27 +0530 Subject: [PATCH 0718/2016] feat(bigquery): add `iamMember` entity type to allowed access classes (#9973) --- packages/google-cloud-bigquery/google/cloud/bigquery/dataset.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/dataset.py b/packages/google-cloud-bigquery/google/cloud/bigquery/dataset.py index 02664d87b153..99c47026fe3a 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/dataset.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/dataset.py @@ -123,7 +123,7 @@ class AccessEntry(object): """ ENTITY_TYPES = frozenset( - ["userByEmail", "groupByEmail", "domain", "specialGroup", "view"] + ["userByEmail", "groupByEmail", "domain", "specialGroup", "view", "iamMember"] ) """Allowed entity types.""" From ed8189f9674a41450a236d1a0d4d9e782d6cfe1c Mon Sep 17 00:00:00 2001 From: HemangChothani <50404902+HemangChothani@users.noreply.github.com> Date: Mon, 16 Dec 2019 23:07:51 +0530 Subject: [PATCH 0719/2016] fix(bigquery): add six dependency in setup file (#9979) --- packages/google-cloud-bigquery/setup.py | 1 + 1 file changed, 1 insertion(+) diff --git a/packages/google-cloud-bigquery/setup.py b/packages/google-cloud-bigquery/setup.py index 1b17fa88f368..5c07de96141b 100644 --- a/packages/google-cloud-bigquery/setup.py +++ b/packages/google-cloud-bigquery/setup.py @@ -33,6 +33,7 @@ "google-cloud-core >= 1.0.3, < 2.0dev", "google-resumable-media >= 0.3.1, != 0.4.0, < 0.6.0dev", "protobuf >= 3.6.0", + "six >=1.13.0,< 2.0.0dev", ] extras = { "bqstorage": [ From d353b9ac313b247abbe3c23e84314830101de5cc Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Mon, 16 Dec 2019 10:36:57 -0800 Subject: [PATCH 0720/2016] chore(bigquery): release 1.23.1 (#9981) --- packages/google-cloud-bigquery/CHANGELOG.md | 18 ++++++++++++++++++ packages/google-cloud-bigquery/setup.py | 2 +- 2 files changed, 19 insertions(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/CHANGELOG.md b/packages/google-cloud-bigquery/CHANGELOG.md index 3f51c5e88583..b5fe8936599d 100644 --- a/packages/google-cloud-bigquery/CHANGELOG.md +++ b/packages/google-cloud-bigquery/CHANGELOG.md @@ -4,6 +4,24 @@ [1]: https://pypi.org/project/google-cloud-bigquery/#history +## 1.23.1 + +12-16-2019 09:39 PST + + +### Implementation Changes + +- Add `iamMember` entity type to allowed access classes. ([#9973](https://github.com/googleapis/google-cloud-python/pull/9973)) +- Fix typo in import error message (pandas -> pyarrow). ([#9955](https://github.com/googleapis/google-cloud-python/pull/9955)) + +### Dependencies + +- Add `six` as an explicit dependency. ([#9979](https://github.com/googleapis/google-cloud-python/pull/9979)) + +### Documentation + +- Add sample to read from query destination table. ([#9964](https://github.com/googleapis/google-cloud-python/pull/9964)) + ## 1.23.0 12-11-2019 13:31 PST diff --git a/packages/google-cloud-bigquery/setup.py b/packages/google-cloud-bigquery/setup.py index 5c07de96141b..42d53301ee0a 100644 --- a/packages/google-cloud-bigquery/setup.py +++ b/packages/google-cloud-bigquery/setup.py @@ -22,7 +22,7 @@ name = "google-cloud-bigquery" description = "Google BigQuery API client library" -version = "1.23.0" +version = "1.23.1" # Should be one of: # 'Development Status :: 3 - Alpha' # 'Development Status :: 4 - Beta' From e2d16777fccd957d3c118c5d71f7a2b12907c8ee Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Mon, 16 Dec 2019 16:50:44 -0800 Subject: [PATCH 0721/2016] feat(bigquery): add `create_bqstorage_client` param to `to_dataframe` and `to_arrow` (#9573) * feat(bigquery): add `create_bqstorage_client` param to `to_dataframe` and `to_arrow` When the `create_bqstorage_client` parameter is set to `True`, the BigQuery client constructs a BigQuery Storage API client for you. This removes the need for boilerplate code to manually construct both clients explitly with the same credentials. Does this make the `bqstorage_client` parameter unnecessary? In most cases, yes, but there are a few cases where we'll want to continue using it. * When partner tools use `to_dataframe`, they should continue to use `bqstorage_client` so that they can set the correct amended user-agent strings. * When a developer needs to override the default API endpoint for the BQ Storage API, they'll need to manually supply a `bqstorage_client`. * test for BQ Storage API usage in samples tests. * fix: close bqstorage client if created by to_dataframe/to_arrow * chore: blacken * doc: update versionadded * doc: update versionadded --- .../google/cloud/bigquery/client.py | 13 ++ .../google/cloud/bigquery/job.py | 40 +++++- .../google/cloud/bigquery/table.py | 136 +++++++++++++----- .../samples/download_public_data.py | 33 +++++ .../samples/download_public_data_sandbox.py | 34 +++++ .../tests/test_download_public_data.py | 34 +++++ .../test_download_public_data_sandbox.py | 34 +++++ .../tests/unit/test_client.py | 25 ++++ .../tests/unit/test_table.py | 119 +++++++++++++++ 9 files changed, 431 insertions(+), 37 deletions(-) create mode 100644 packages/google-cloud-bigquery/samples/download_public_data.py create mode 100644 packages/google-cloud-bigquery/samples/download_public_data_sandbox.py create mode 100644 packages/google-cloud-bigquery/samples/tests/test_download_public_data.py create mode 100644 packages/google-cloud-bigquery/samples/tests/test_download_public_data_sandbox.py diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py index e6eaf5fcb3ba..f3b7aab40789 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py @@ -353,6 +353,19 @@ def dataset(self, dataset_id, project=None): return DatasetReference(project, dataset_id) + def _create_bqstorage_client(self): + """Create a BigQuery Storage API client using this client's credentials. + + Returns: + google.cloud.bigquery_storage_v1beta1.BigQueryStorageClient: + A BigQuery Storage API client. + """ + from google.cloud import bigquery_storage_v1beta1 + + return bigquery_storage_v1beta1.BigQueryStorageClient( + credentials=self._credentials + ) + def create_dataset(self, dataset, exists_ok=False, retry=DEFAULT_RETRY): """API call: create the dataset via a POST request. diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/job.py b/packages/google-cloud-bigquery/google/cloud/bigquery/job.py index f0312b0d4219..19e4aaf185e4 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/job.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/job.py @@ -3152,7 +3152,12 @@ def result( # If changing the signature of this method, make sure to apply the same # changes to table.RowIterator.to_arrow() - def to_arrow(self, progress_bar_type=None, bqstorage_client=None): + def to_arrow( + self, + progress_bar_type=None, + bqstorage_client=None, + create_bqstorage_client=False, + ): """[Beta] Create a class:`pyarrow.Table` by loading all pages of a table or query. @@ -3185,6 +3190,16 @@ def to_arrow(self, progress_bar_type=None, bqstorage_client=None): Reading from a specific partition or snapshot is not currently supported by this method. + create_bqstorage_client (bool): + **Beta Feature** Optional. If ``True``, create a BigQuery + Storage API client using the default API settings. The + BigQuery Storage API is a faster way to fetch rows from + BigQuery. See the ``bqstorage_client`` parameter for more + information. + + This argument does nothing if ``bqstorage_client`` is supplied. + + ..versionadded:: 1.24.0 Returns: pyarrow.Table @@ -3199,12 +3214,20 @@ def to_arrow(self, progress_bar_type=None, bqstorage_client=None): ..versionadded:: 1.17.0 """ return self.result().to_arrow( - progress_bar_type=progress_bar_type, bqstorage_client=bqstorage_client + progress_bar_type=progress_bar_type, + bqstorage_client=bqstorage_client, + create_bqstorage_client=create_bqstorage_client, ) # If changing the signature of this method, make sure to apply the same # changes to table.RowIterator.to_dataframe() - def to_dataframe(self, bqstorage_client=None, dtypes=None, progress_bar_type=None): + def to_dataframe( + self, + bqstorage_client=None, + dtypes=None, + progress_bar_type=None, + create_bqstorage_client=False, + ): """Return a pandas DataFrame from a QueryJob Args: @@ -3237,6 +3260,16 @@ def to_dataframe(self, bqstorage_client=None, dtypes=None, progress_bar_type=Non for details. ..versionadded:: 1.11.0 + create_bqstorage_client (bool): + **Beta Feature** Optional. If ``True``, create a BigQuery + Storage API client using the default API settings. The + BigQuery Storage API is a faster way to fetch rows from + BigQuery. See the ``bqstorage_client`` parameter for more + information. + + This argument does nothing if ``bqstorage_client`` is supplied. + + ..versionadded:: 1.24.0 Returns: A :class:`~pandas.DataFrame` populated with row data and column @@ -3250,6 +3283,7 @@ def to_dataframe(self, bqstorage_client=None, dtypes=None, progress_bar_type=Non bqstorage_client=bqstorage_client, dtypes=dtypes, progress_bar_type=progress_bar_type, + create_bqstorage_client=create_bqstorage_client, ) def __iter__(self): diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py index 77cb67bfd0fe..a71acf8ecc8a 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py @@ -1456,7 +1456,12 @@ def _to_arrow_iterable(self, bqstorage_client=None): # If changing the signature of this method, make sure to apply the same # changes to job.QueryJob.to_arrow() - def to_arrow(self, progress_bar_type=None, bqstorage_client=None): + def to_arrow( + self, + progress_bar_type=None, + bqstorage_client=None, + create_bqstorage_client=False, + ): """[Beta] Create a class:`pyarrow.Table` by loading all pages of a table or query. @@ -1489,6 +1494,16 @@ def to_arrow(self, progress_bar_type=None, bqstorage_client=None): Reading from a specific partition or snapshot is not currently supported by this method. + create_bqstorage_client (bool): + **Beta Feature** Optional. If ``True``, create a BigQuery + Storage API client using the default API settings. The + BigQuery Storage API is a faster way to fetch rows from + BigQuery. See the ``bqstorage_client`` parameter for more + information. + + This argument does nothing if ``bqstorage_client`` is supplied. + + ..versionadded:: 1.24.0 Returns: pyarrow.Table @@ -1504,22 +1519,33 @@ def to_arrow(self, progress_bar_type=None, bqstorage_client=None): if pyarrow is None: raise ValueError(_NO_PYARROW_ERROR) - progress_bar = self._get_progress_bar(progress_bar_type) + owns_bqstorage_client = False + if not bqstorage_client and create_bqstorage_client: + owns_bqstorage_client = True + bqstorage_client = self.client._create_bqstorage_client() - record_batches = [] - for record_batch in self._to_arrow_iterable(bqstorage_client=bqstorage_client): - record_batches.append(record_batch) + try: + progress_bar = self._get_progress_bar(progress_bar_type) - if progress_bar is not None: - # In some cases, the number of total rows is not populated - # until the first page of rows is fetched. Update the - # progress bar's total to keep an accurate count. - progress_bar.total = progress_bar.total or self.total_rows - progress_bar.update(record_batch.num_rows) + record_batches = [] + for record_batch in self._to_arrow_iterable( + bqstorage_client=bqstorage_client + ): + record_batches.append(record_batch) - if progress_bar is not None: - # Indicate that the download has finished. - progress_bar.close() + if progress_bar is not None: + # In some cases, the number of total rows is not populated + # until the first page of rows is fetched. Update the + # progress bar's total to keep an accurate count. + progress_bar.total = progress_bar.total or self.total_rows + progress_bar.update(record_batch.num_rows) + + if progress_bar is not None: + # Indicate that the download has finished. + progress_bar.close() + finally: + if owns_bqstorage_client: + bqstorage_client.transport.channel.close() if record_batches: return pyarrow.Table.from_batches(record_batches) @@ -1558,14 +1584,20 @@ def _to_dataframe_iterable(self, bqstorage_client=None, dtypes=None): # If changing the signature of this method, make sure to apply the same # changes to job.QueryJob.to_dataframe() - def to_dataframe(self, bqstorage_client=None, dtypes=None, progress_bar_type=None): + def to_dataframe( + self, + bqstorage_client=None, + dtypes=None, + progress_bar_type=None, + create_bqstorage_client=False, + ): """Create a pandas DataFrame by loading all pages of a query. Args: bqstorage_client (google.cloud.bigquery_storage_v1beta1.BigQueryStorageClient): **Beta Feature** Optional. A BigQuery Storage API client. If supplied, use the faster BigQuery Storage API to fetch rows - from BigQuery. This API is a billable API. + from BigQuery. This method requires the ``pyarrow`` and ``google-cloud-bigquery-storage`` libraries. @@ -1602,6 +1634,16 @@ def to_dataframe(self, bqstorage_client=None, dtypes=None, progress_bar_type=Non progress bar as a graphical dialog box. ..versionadded:: 1.11.0 + create_bqstorage_client (bool): + **Beta Feature** Optional. If ``True``, create a BigQuery + Storage API client using the default API settings. The + BigQuery Storage API is a faster way to fetch rows from + BigQuery. See the ``bqstorage_client`` parameter for more + information. + + This argument does nothing if ``bqstorage_client`` is supplied. + + ..versionadded:: 1.24.0 Returns: pandas.DataFrame: @@ -1621,32 +1663,44 @@ def to_dataframe(self, bqstorage_client=None, dtypes=None, progress_bar_type=Non if dtypes is None: dtypes = {} - if bqstorage_client and self.max_results is not None: + if ( + bqstorage_client or create_bqstorage_client + ) and self.max_results is not None: warnings.warn( "Cannot use bqstorage_client if max_results is set, " "reverting to fetching data with the tabledata.list endpoint.", stacklevel=2, ) + create_bqstorage_client = False bqstorage_client = None - progress_bar = self._get_progress_bar(progress_bar_type) + owns_bqstorage_client = False + if not bqstorage_client and create_bqstorage_client: + owns_bqstorage_client = True + bqstorage_client = self.client._create_bqstorage_client() - frames = [] - for frame in self._to_dataframe_iterable( - bqstorage_client=bqstorage_client, dtypes=dtypes - ): - frames.append(frame) + try: + progress_bar = self._get_progress_bar(progress_bar_type) - if progress_bar is not None: - # In some cases, the number of total rows is not populated - # until the first page of rows is fetched. Update the - # progress bar's total to keep an accurate count. - progress_bar.total = progress_bar.total or self.total_rows - progress_bar.update(len(frame)) + frames = [] + for frame in self._to_dataframe_iterable( + bqstorage_client=bqstorage_client, dtypes=dtypes + ): + frames.append(frame) + + if progress_bar is not None: + # In some cases, the number of total rows is not populated + # until the first page of rows is fetched. Update the + # progress bar's total to keep an accurate count. + progress_bar.total = progress_bar.total or self.total_rows + progress_bar.update(len(frame)) - if progress_bar is not None: - # Indicate that the download has finished. - progress_bar.close() + if progress_bar is not None: + # Indicate that the download has finished. + progress_bar.close() + finally: + if owns_bqstorage_client: + bqstorage_client.transport.channel.close() # Avoid concatting an empty list. if not frames: @@ -1667,11 +1721,18 @@ class _EmptyRowIterator(object): pages = () total_rows = 0 - def to_arrow(self, progress_bar_type=None): + def to_arrow( + self, + progress_bar_type=None, + bqstorage_client=None, + create_bqstorage_client=False, + ): """[Beta] Create an empty class:`pyarrow.Table`. Args: progress_bar_type (Optional[str]): Ignored. Added for compatibility with RowIterator. + bqstorage_client (Any): Ignored. Added for compatibility with RowIterator. + create_bqstorage_client (bool): Ignored. Added for compatibility with RowIterator. Returns: pyarrow.Table: An empty :class:`pyarrow.Table`. @@ -1680,13 +1741,20 @@ def to_arrow(self, progress_bar_type=None): raise ValueError(_NO_PYARROW_ERROR) return pyarrow.Table.from_arrays(()) - def to_dataframe(self, bqstorage_client=None, dtypes=None, progress_bar_type=None): + def to_dataframe( + self, + bqstorage_client=None, + dtypes=None, + progress_bar_type=None, + create_bqstorage_client=False, + ): """Create an empty dataframe. Args: bqstorage_client (Any): Ignored. Added for compatibility with RowIterator. dtypes (Any): Ignored. Added for compatibility with RowIterator. progress_bar_type (Any): Ignored. Added for compatibility with RowIterator. + create_bqstorage_client (bool): Ignored. Added for compatibility with RowIterator. Returns: pandas.DataFrame: An empty :class:`~pandas.DataFrame`. diff --git a/packages/google-cloud-bigquery/samples/download_public_data.py b/packages/google-cloud-bigquery/samples/download_public_data.py new file mode 100644 index 000000000000..815d140fc6f1 --- /dev/null +++ b/packages/google-cloud-bigquery/samples/download_public_data.py @@ -0,0 +1,33 @@ +# Copyright 2019 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +def download_public_data(client): + + # [START bigquery_pandas_public_data] + # TODO(developer): Import the client library. + # from google.cloud import bigquery + + # TODO(developer): Construct a BigQuery client object. + # client = bigquery.Client() + + # TODO(developer): Set table_id to the fully-qualified table ID in standard + # SQL format, including the project ID and dataset ID. + table_id = "bigquery-public-data.usa_names.usa_1910_current" + + # Use the BigQuery Storage API to speed-up downloads of large tables. + dataframe = client.list_rows(table_id).to_dataframe(create_bqstorage_client=True) + + print(dataframe.info()) + # [END bigquery_pandas_public_data] diff --git a/packages/google-cloud-bigquery/samples/download_public_data_sandbox.py b/packages/google-cloud-bigquery/samples/download_public_data_sandbox.py new file mode 100644 index 000000000000..edb1466e4bd7 --- /dev/null +++ b/packages/google-cloud-bigquery/samples/download_public_data_sandbox.py @@ -0,0 +1,34 @@ +# Copyright 2019 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +def download_public_data_sandbox(client): + + # [START bigquery_pandas_public_data_sandbox] + # TODO(developer): Import the client library. + # from google.cloud import bigquery + + # TODO(developer): Construct a BigQuery client object. + # client = bigquery.Client() + + # `SELECT *` is an anti-pattern in BigQuery because it is cheaper and + # faster to use the BigQuery Storage API directly, but BigQuery Sandbox + # users can only use the BigQuery Storage API to download query results. + query_string = "SELECT * FROM `bigquery-public-data.usa_names.usa_1910_current`" + + # Use the BigQuery Storage API to speed-up downloads of large tables. + dataframe = client.query(query_string).to_dataframe(create_bqstorage_client=True) + + print(dataframe.info()) + # [END bigquery_pandas_public_data_sandbox] diff --git a/packages/google-cloud-bigquery/samples/tests/test_download_public_data.py b/packages/google-cloud-bigquery/samples/tests/test_download_public_data.py new file mode 100644 index 000000000000..8ee0e6a68c17 --- /dev/null +++ b/packages/google-cloud-bigquery/samples/tests/test_download_public_data.py @@ -0,0 +1,34 @@ +# Copyright 2019 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import logging + +from .. import download_public_data + + +def test_download_public_data(caplog, capsys, client): + # Enable debug-level logging to verify the BigQuery Storage API is used. + caplog.set_level(logging.DEBUG) + + download_public_data.download_public_data(client) + out, _ = capsys.readouterr() + assert "year" in out + assert "gender" in out + assert "name" in out + + assert any( + "Started reading table 'bigquery-public-data.usa_names.usa_1910_current' with BQ Storage API session" + in message + for message in caplog.messages + ) diff --git a/packages/google-cloud-bigquery/samples/tests/test_download_public_data_sandbox.py b/packages/google-cloud-bigquery/samples/tests/test_download_public_data_sandbox.py new file mode 100644 index 000000000000..74dadc1db3fb --- /dev/null +++ b/packages/google-cloud-bigquery/samples/tests/test_download_public_data_sandbox.py @@ -0,0 +1,34 @@ +# Copyright 2019 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import logging + +from .. import download_public_data_sandbox + + +def test_download_public_data_sandbox(caplog, capsys, client): + # Enable debug-level logging to verify the BigQuery Storage API is used. + caplog.set_level(logging.DEBUG) + + download_public_data_sandbox.download_public_data_sandbox(client) + out, err = capsys.readouterr() + assert "year" in out + assert "gender" in out + assert "name" in out + + assert any( + # An anonymous table is used because this sample reads from query results. + ("Started reading table" in message and "BQ Storage API session" in message) + for message in caplog.messages + ) diff --git a/packages/google-cloud-bigquery/tests/unit/test_client.py b/packages/google-cloud-bigquery/tests/unit/test_client.py index e661c86970db..c4cdb7fdfd2f 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_client.py +++ b/packages/google-cloud-bigquery/tests/unit/test_client.py @@ -49,6 +49,11 @@ import google.cloud._helpers from google.cloud import bigquery_v2 from google.cloud.bigquery.dataset import DatasetReference + +try: + from google.cloud import bigquery_storage_v1beta1 +except (ImportError, AttributeError): # pragma: NO COVER + bigquery_storage_v1beta1 = None from tests.unit.helpers import make_connection @@ -535,6 +540,26 @@ def test_get_dataset(self): ) self.assertEqual(dataset.dataset_id, self.DS_ID) + @unittest.skipIf( + bigquery_storage_v1beta1 is None, "Requires `google-cloud-bigquery-storage`" + ) + def test_create_bqstorage_client(self): + mock_client = mock.create_autospec( + bigquery_storage_v1beta1.BigQueryStorageClient + ) + mock_client_instance = object() + mock_client.return_value = mock_client_instance + creds = _make_credentials() + client = self._make_one(project=self.PROJECT, credentials=creds) + + with mock.patch( + "google.cloud.bigquery_storage_v1beta1.BigQueryStorageClient", mock_client + ): + bqstorage_client = client._create_bqstorage_client() + + self.assertIs(bqstorage_client, mock_client_instance) + mock_client.assert_called_once_with(credentials=creds) + def test_create_dataset_minimal(self): from google.cloud.bigquery.dataset import Dataset diff --git a/packages/google-cloud-bigquery/tests/unit/test_table.py b/packages/google-cloud-bigquery/tests/unit/test_table.py index 97a7b4ae745e..1043df45f9a3 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_table.py +++ b/packages/google-cloud-bigquery/tests/unit/test_table.py @@ -26,8 +26,12 @@ try: from google.cloud import bigquery_storage_v1beta1 + from google.cloud.bigquery_storage_v1beta1.gapic.transports import ( + big_query_storage_grpc_transport, + ) except ImportError: # pragma: NO COVER bigquery_storage_v1beta1 = None + big_query_storage_grpc_transport = None try: import pandas @@ -1817,6 +1821,9 @@ def test_to_arrow_w_bqstorage(self): bqstorage_client = mock.create_autospec( bigquery_storage_v1beta1.BigQueryStorageClient ) + bqstorage_client.transport = mock.create_autospec( + big_query_storage_grpc_transport.BigQueryStorageGrpcTransport + ) streams = [ # Use two streams we want to check frames are read from each stream. {"name": "/projects/proj/dataset/dset/tables/tbl/streams/1234"}, @@ -1882,6 +1889,42 @@ def test_to_arrow_w_bqstorage(self): total_rows = expected_num_rows * total_pages self.assertEqual(actual_tbl.num_rows, total_rows) + # Don't close the client if it was passed in. + bqstorage_client.transport.channel.close.assert_not_called() + + @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") + @unittest.skipIf( + bigquery_storage_v1beta1 is None, "Requires `google-cloud-bigquery-storage`" + ) + def test_to_arrow_w_bqstorage_creates_client(self): + from google.cloud.bigquery import schema + from google.cloud.bigquery import table as mut + + mock_client = _mock_client() + bqstorage_client = mock.create_autospec( + bigquery_storage_v1beta1.BigQueryStorageClient + ) + bqstorage_client.transport = mock.create_autospec( + big_query_storage_grpc_transport.BigQueryStorageGrpcTransport + ) + mock_client._create_bqstorage_client.return_value = bqstorage_client + session = bigquery_storage_v1beta1.types.ReadSession() + bqstorage_client.create_read_session.return_value = session + row_iterator = mut.RowIterator( + mock_client, + None, # api_request: ignored + None, # path: ignored + [ + schema.SchemaField("colA", "STRING"), + schema.SchemaField("colC", "STRING"), + schema.SchemaField("colB", "STRING"), + ], + table=mut.TableReference.from_string("proj.dset.tbl"), + ) + row_iterator.to_arrow(create_bqstorage_client=True) + mock_client._create_bqstorage_client.assert_called_once() + bqstorage_client.transport.channel.close.assert_called_once() + @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") @unittest.skipIf( bigquery_storage_v1beta1 is None, "Requires `google-cloud-bigquery-storage`" @@ -2292,6 +2335,76 @@ def test_to_dataframe_max_results_w_bqstorage_warning(self): ] self.assertEqual(len(matches), 1, msg="User warning was not emitted.") + @unittest.skipIf(pandas is None, "Requires `pandas`") + def test_to_dataframe_max_results_w_create_bqstorage_warning(self): + from google.cloud.bigquery.schema import SchemaField + + schema = [ + SchemaField("name", "STRING", mode="REQUIRED"), + SchemaField("age", "INTEGER", mode="REQUIRED"), + ] + rows = [ + {"f": [{"v": "Phred Phlyntstone"}, {"v": "32"}]}, + {"f": [{"v": "Bharney Rhubble"}, {"v": "33"}]}, + ] + path = "/foo" + api_request = mock.Mock(return_value={"rows": rows}) + mock_client = _mock_client() + + row_iterator = self._make_one( + client=mock_client, + api_request=api_request, + path=path, + schema=schema, + max_results=42, + ) + + with warnings.catch_warnings(record=True) as warned: + row_iterator.to_dataframe(create_bqstorage_client=True) + + matches = [ + warning + for warning in warned + if warning.category is UserWarning + and "cannot use bqstorage_client" in str(warning).lower() + and "tabledata.list" in str(warning) + ] + self.assertEqual(len(matches), 1, msg="User warning was not emitted.") + mock_client._create_bqstorage_client.assert_not_called() + + @unittest.skipIf(pandas is None, "Requires `pandas`") + @unittest.skipIf( + bigquery_storage_v1beta1 is None, "Requires `google-cloud-bigquery-storage`" + ) + def test_to_dataframe_w_bqstorage_creates_client(self): + from google.cloud.bigquery import schema + from google.cloud.bigquery import table as mut + + mock_client = _mock_client() + bqstorage_client = mock.create_autospec( + bigquery_storage_v1beta1.BigQueryStorageClient + ) + bqstorage_client.transport = mock.create_autospec( + big_query_storage_grpc_transport.BigQueryStorageGrpcTransport + ) + mock_client._create_bqstorage_client.return_value = bqstorage_client + session = bigquery_storage_v1beta1.types.ReadSession() + bqstorage_client.create_read_session.return_value = session + row_iterator = mut.RowIterator( + mock_client, + None, # api_request: ignored + None, # path: ignored + [ + schema.SchemaField("colA", "STRING"), + schema.SchemaField("colC", "STRING"), + schema.SchemaField("colB", "STRING"), + ], + table=mut.TableReference.from_string("proj.dset.tbl"), + ) + row_iterator.to_dataframe(create_bqstorage_client=True) + mock_client._create_bqstorage_client.assert_called_once() + bqstorage_client.transport.channel.close.assert_called_once() + @unittest.skipIf(pandas is None, "Requires `pandas`") @unittest.skipIf( bigquery_storage_v1beta1 is None, "Requires `google-cloud-bigquery-storage`" @@ -2427,6 +2540,9 @@ def test_to_dataframe_w_bqstorage_nonempty(self): bqstorage_client = mock.create_autospec( bigquery_storage_v1beta1.BigQueryStorageClient ) + bqstorage_client.transport = mock.create_autospec( + big_query_storage_grpc_transport.BigQueryStorageGrpcTransport + ) streams = [ # Use two streams we want to check frames are read from each stream. {"name": "/projects/proj/dataset/dset/tables/tbl/streams/1234"}, @@ -2481,6 +2597,9 @@ def test_to_dataframe_w_bqstorage_nonempty(self): total_rows = len(page_items) * total_pages self.assertEqual(len(got.index), total_rows) + # Don't close the client if it was passed in. + bqstorage_client.transport.channel.close.assert_not_called() + @unittest.skipIf(pandas is None, "Requires `pandas`") @unittest.skipIf( bigquery_storage_v1beta1 is None, "Requires `google-cloud-bigquery-storage`" From 3e5811df66383736cfddae9dacde71e495681a44 Mon Sep 17 00:00:00 2001 From: Peter Lamut Date: Thu, 19 Dec 2019 01:45:16 +0000 Subject: [PATCH 0722/2016] feat(bigquery): add timeout parameter to QueryJob.done() method (#9875) * feat(bigquery): add timeout to QueryJob.done() * Add tests for methods that got timeout param In addition, fix the timeout logic in QueryJob.done() - the timeouts are in different units (seconds vs. milliseconds) * Fix lint warning (unused variable) * Adjust timeout exception type in QueryJob.result() * Update dependency pins The new timeout feature requires more recent versions of the API core and google auth dependencies. * Add safety margin on top of server-side timeout If the server-side processing timeout is used (the `timeout_ms` API parameter) as the total timeout, it should be slightly longer than the actual server-side timeout in order to not timeout the connection while there might still be chance that the server-side processing has actually completed. --- .../google/cloud/bigquery/client.py | 7 +- .../google/cloud/bigquery/job.py | 52 +++++-- packages/google-cloud-bigquery/setup.py | 2 + .../google-cloud-bigquery/tests/system.py | 24 ++++ .../tests/unit/test_client.py | 3 + .../tests/unit/test_job.py | 133 ++++++++++++++++-- 6 files changed, 202 insertions(+), 19 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py index f3b7aab40789..5fd7bceea973 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py @@ -1081,7 +1081,7 @@ def delete_table(self, table, retry=DEFAULT_RETRY, not_found_ok=False): raise def _get_query_results( - self, job_id, retry, project=None, timeout_ms=None, location=None + self, job_id, retry, project=None, timeout_ms=None, location=None, timeout=None, ): """Get the query results object for a query job. @@ -1096,6 +1096,9 @@ def _get_query_results( (Optional) number of milliseconds the the API call should wait for the query to complete before the request times out. location (str): Location of the query job. + timeout (Optional[float]): + The number of seconds to wait for the underlying HTTP transport + before retrying the HTTP request. Returns: google.cloud.bigquery.query._QueryResults: @@ -1122,7 +1125,7 @@ def _get_query_results( # job is complete (from QueryJob.done(), called ultimately from # QueryJob.result()). So we don't need to poll here. resource = self._call_api( - retry, method="GET", path=path, query_params=extra_params + retry, method="GET", path=path, query_params=extra_params, timeout=timeout ) return _QueryResults.from_api_repr(resource) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/job.py b/packages/google-cloud-bigquery/google/cloud/bigquery/job.py index 19e4aaf185e4..d20e5b5fb11f 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/job.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/job.py @@ -14,10 +14,14 @@ """Define API Jobs.""" +from __future__ import division + +import concurrent.futures import copy import re import threading +import requests import six from six.moves import http_client @@ -50,6 +54,7 @@ _DONE_STATE = "DONE" _STOPPED_REASON = "stopped" _TIMEOUT_BUFFER_SECS = 0.1 +_SERVER_TIMEOUT_MARGIN_SECS = 1.0 _CONTAINS_ORDER_BY = re.compile(r"ORDER\s+BY", re.IGNORECASE) _ERROR_REASON_TO_EXCEPTION = { @@ -663,7 +668,7 @@ def exists(self, client=None, retry=DEFAULT_RETRY): else: return True - def reload(self, client=None, retry=DEFAULT_RETRY): + def reload(self, client=None, retry=DEFAULT_RETRY, timeout=None): """API call: refresh job properties via a GET request. See @@ -675,6 +680,9 @@ def reload(self, client=None, retry=DEFAULT_RETRY): ``client`` stored on the current dataset. retry (google.api_core.retry.Retry): (Optional) How to retry the RPC. + timeout (Optional[float]): + The number of seconds to wait for the underlying HTTP transport + before retrying the HTTP request. """ client = self._require_client(client) @@ -683,7 +691,11 @@ def reload(self, client=None, retry=DEFAULT_RETRY): extra_params["location"] = self.location api_response = client._call_api( - retry, method="GET", path=self.path, query_params=extra_params + retry, + method="GET", + path=self.path, + query_params=extra_params, + timeout=timeout, ) self._set_properties(api_response) @@ -2994,9 +3006,16 @@ def estimated_bytes_processed(self): result = int(result) return result - def done(self, retry=DEFAULT_RETRY): + def done(self, retry=DEFAULT_RETRY, timeout=None): """Refresh the job and checks if it is complete. + Args: + retry (Optional[google.api_core.retry.Retry]): + How to retry the call that retrieves query results. + timeout (Optional[float]): + The number of seconds to wait for the underlying HTTP transport + before retrying the HTTP request. + Returns: bool: True if the job is complete, False otherwise. """ @@ -3007,11 +3026,25 @@ def done(self, retry=DEFAULT_RETRY): timeout_ms = None if self._done_timeout is not None: # Subtract a buffer for context switching, network latency, etc. - timeout = self._done_timeout - _TIMEOUT_BUFFER_SECS - timeout = max(min(timeout, 10), 0) - self._done_timeout -= timeout + api_timeout = self._done_timeout - _TIMEOUT_BUFFER_SECS + api_timeout = max(min(api_timeout, 10), 0) + self._done_timeout -= api_timeout self._done_timeout = max(0, self._done_timeout) - timeout_ms = int(timeout * 1000) + timeout_ms = int(api_timeout * 1000) + + # If the server-side processing timeout (timeout_ms) is specified and + # would be picked as the total request timeout, we want to add a small + # margin to it - we don't want to timeout the connection just as the + # server-side processing might have completed, but instead slightly + # after the server-side deadline. + # However, if `timeout` is specified, and is shorter than the adjusted + # server timeout, the former prevails. + if timeout_ms is not None and timeout_ms > 0: + server_timeout_with_margin = timeout_ms / 1000 + _SERVER_TIMEOUT_MARGIN_SECS + if timeout is not None: + timeout = min(server_timeout_with_margin, timeout) + else: + timeout = server_timeout_with_margin # Do not refresh is the state is already done, as the job will not # change once complete. @@ -3022,13 +3055,14 @@ def done(self, retry=DEFAULT_RETRY): project=self.project, timeout_ms=timeout_ms, location=self.location, + timeout=timeout, ) # Only reload the job once we know the query is complete. # This will ensure that fields such as the destination table are # correctly populated. if self._query_results.complete: - self.reload(retry=retry) + self.reload(retry=retry, timeout=timeout) return self.state == _DONE_STATE @@ -3132,6 +3166,8 @@ def result( exc.message += self._format_for_exception(self.query, self.job_id) exc.query_job = self raise + except requests.exceptions.Timeout as exc: + six.raise_from(concurrent.futures.TimeoutError, exc) # If the query job is complete but there are no query results, this was # special job, such as a DDL query. Return an empty result set to diff --git a/packages/google-cloud-bigquery/setup.py b/packages/google-cloud-bigquery/setup.py index 42d53301ee0a..5e2ba87a4b1c 100644 --- a/packages/google-cloud-bigquery/setup.py +++ b/packages/google-cloud-bigquery/setup.py @@ -30,6 +30,8 @@ release_status = "Development Status :: 5 - Production/Stable" dependencies = [ 'enum34; python_version < "3.4"', + "google-auth >= 1.9.0, < 2.0dev", + "google-api-core >= 1.15.0, < 2.0dev", "google-cloud-core >= 1.0.3, < 2.0dev", "google-resumable-media >= 0.3.1, != 0.4.0, < 0.6.0dev", "protobuf >= 3.6.0", diff --git a/packages/google-cloud-bigquery/tests/system.py b/packages/google-cloud-bigquery/tests/system.py index bba527178f47..b431f628d001 100644 --- a/packages/google-cloud-bigquery/tests/system.py +++ b/packages/google-cloud-bigquery/tests/system.py @@ -26,6 +26,7 @@ import uuid import re +import requests import six import psutil import pytest @@ -1893,6 +1894,29 @@ def test_query_iter(self): row_tuples = [r.values() for r in query_job] self.assertEqual(row_tuples, [(1,)]) + def test_querying_data_w_timeout(self): + job_config = bigquery.QueryJobConfig() + job_config.use_query_cache = False + + query_job = Config.CLIENT.query( + """ + SELECT name, SUM(number) AS total_people + FROM `bigquery-public-data.usa_names.usa_1910_current` + GROUP BY name + """, + location="US", + job_config=job_config, + ) + + # Specify a very tight deadline to demonstrate that the timeout + # actually has effect. + with self.assertRaises(requests.exceptions.Timeout): + query_job.done(timeout=0.1) + + # Now wait for the result using a more realistic deadline. + query_job.result(timeout=30) + self.assertTrue(query_job.done(timeout=30)) + @unittest.skipIf(pandas is None, "Requires `pandas`") def test_query_results_to_dataframe(self): QUERY = """ diff --git a/packages/google-cloud-bigquery/tests/unit/test_client.py b/packages/google-cloud-bigquery/tests/unit/test_client.py index c4cdb7fdfd2f..c9166bd5d7c0 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_client.py +++ b/packages/google-cloud-bigquery/tests/unit/test_client.py @@ -226,12 +226,14 @@ def test__get_query_results_miss_w_explicit_project_and_timeout(self): project="other-project", location=self.LOCATION, timeout_ms=500, + timeout=42, ) conn.api_request.assert_called_once_with( method="GET", path="/projects/other-project/queries/nothere", query_params={"maxResults": 0, "timeoutMs": 500, "location": self.LOCATION}, + timeout=42, ) def test__get_query_results_miss_w_client_location(self): @@ -248,6 +250,7 @@ def test__get_query_results_miss_w_client_location(self): method="GET", path="/projects/PROJECT/queries/nothere", query_params={"maxResults": 0, "location": self.LOCATION}, + timeout=None, ) def test__get_query_results_hit(self): diff --git a/packages/google-cloud-bigquery/tests/unit/test_job.py b/packages/google-cloud-bigquery/tests/unit/test_job.py index 52e2abf8f304..e732bed4dcc6 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_job.py +++ b/packages/google-cloud-bigquery/tests/unit/test_job.py @@ -12,6 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. +import concurrent import copy import json import textwrap @@ -19,6 +20,7 @@ import mock import pytest +import requests from six.moves import http_client try: @@ -725,6 +727,7 @@ def test_reload_defaults(self): method="GET", path="/projects/{}/jobs/{}".format(self.PROJECT, self.JOB_ID), query_params={"location": self.LOCATION}, + timeout=None, ) self.assertEqual(job._properties, resource) @@ -746,13 +749,14 @@ def test_reload_explicit(self): call_api.return_value = resource retry = DEFAULT_RETRY.with_deadline(1) - job.reload(client=client, retry=retry) + job.reload(client=client, retry=retry, timeout=4.2) call_api.assert_called_once_with( retry, method="GET", path="/projects/{}/jobs/{}".format(self.PROJECT, self.JOB_ID), query_params={}, + timeout=4.2, ) self.assertEqual(job._properties, resource) @@ -2489,7 +2493,7 @@ def test_reload_w_bound_client(self): job.reload() conn.api_request.assert_called_once_with( - method="GET", path=PATH, query_params={} + method="GET", path=PATH, query_params={}, timeout=None ) self._verifyResourceProperties(job, RESOURCE) @@ -2506,7 +2510,7 @@ def test_reload_w_alternate_client(self): conn1.api_request.assert_not_called() conn2.api_request.assert_called_once_with( - method="GET", path=PATH, query_params={} + method="GET", path=PATH, query_params={}, timeout=None ) self._verifyResourceProperties(job, RESOURCE) @@ -2527,6 +2531,7 @@ def test_reload_w_job_reference(self): method="GET", path="/projects/alternative-project/jobs/{}".format(self.JOB_ID), query_params={"location": "US"}, + timeout=None, ) def test_cancel_w_bound_client(self): @@ -2988,7 +2993,7 @@ def test_reload_w_bound_client(self): job.reload() conn.api_request.assert_called_once_with( - method="GET", path=PATH, query_params={} + method="GET", path=PATH, query_params={}, timeout=None ) self._verifyResourceProperties(job, RESOURCE) @@ -3007,7 +3012,7 @@ def test_reload_w_alternate_client(self): conn1.api_request.assert_not_called() conn2.api_request.assert_called_once_with( - method="GET", path=PATH, query_params={} + method="GET", path=PATH, query_params={}, timeout=None ) self._verifyResourceProperties(job, RESOURCE) @@ -3351,7 +3356,7 @@ def test_reload_w_bound_client(self): job.reload() conn.api_request.assert_called_once_with( - method="GET", path=PATH, query_params={} + method="GET", path=PATH, query_params={}, timeout=None ) self._verifyResourceProperties(job, RESOURCE) @@ -3372,7 +3377,7 @@ def test_reload_w_alternate_client(self): conn1.api_request.assert_not_called() conn2.api_request.assert_called_once_with( - method="GET", path=PATH, query_params={} + method="GET", path=PATH, query_params={}, timeout=None ) self._verifyResourceProperties(job, RESOURCE) @@ -3901,6 +3906,72 @@ def test_done(self): job = self._get_target_class().from_api_repr(resource, client) self.assertTrue(job.done()) + def test_done_w_timeout(self): + client = _make_client(project=self.PROJECT) + resource = self._make_resource(ended=False) + job = self._get_target_class().from_api_repr(resource, client) + + with mock.patch.object( + client, "_get_query_results" + ) as fake_get_results, mock.patch.object(job, "reload") as fake_reload: + job.done(timeout=42) + + fake_get_results.assert_called_once() + call_args = fake_get_results.call_args + self.assertEqual(call_args.kwargs.get("timeout"), 42) + + call_args = fake_reload.call_args + self.assertEqual(call_args.kwargs.get("timeout"), 42) + + def test_done_w_timeout_and_shorter_internal_api_timeout(self): + from google.cloud.bigquery.job import _TIMEOUT_BUFFER_SECS + from google.cloud.bigquery.job import _SERVER_TIMEOUT_MARGIN_SECS + + client = _make_client(project=self.PROJECT) + resource = self._make_resource(ended=False) + job = self._get_target_class().from_api_repr(resource, client) + job._done_timeout = 8.8 + + with mock.patch.object( + client, "_get_query_results" + ) as fake_get_results, mock.patch.object(job, "reload") as fake_reload: + job.done(timeout=42) + + # The expected timeout used is the job's own done_timeout minus a + # fixed amount (bigquery.job._TIMEOUT_BUFFER_SECS) increased by the + # safety margin on top of server-side processing timeout - that's + # because that final number is smaller than the given timeout (42 seconds). + expected_timeout = 8.8 - _TIMEOUT_BUFFER_SECS + _SERVER_TIMEOUT_MARGIN_SECS + + fake_get_results.assert_called_once() + call_args = fake_get_results.call_args + self.assertAlmostEqual(call_args.kwargs.get("timeout"), expected_timeout) + + call_args = fake_reload.call_args + self.assertAlmostEqual(call_args.kwargs.get("timeout"), expected_timeout) + + def test_done_w_timeout_and_longer_internal_api_timeout(self): + client = _make_client(project=self.PROJECT) + resource = self._make_resource(ended=False) + job = self._get_target_class().from_api_repr(resource, client) + job._done_timeout = 8.8 + + with mock.patch.object( + client, "_get_query_results" + ) as fake_get_results, mock.patch.object(job, "reload") as fake_reload: + job.done(timeout=5.5) + + # The expected timeout used is simply the given timeout, as the latter + # is shorter than the job's internal done timeout. + expected_timeout = 5.5 + + fake_get_results.assert_called_once() + call_args = fake_get_results.call_args + self.assertAlmostEqual(call_args.kwargs.get("timeout"), expected_timeout) + + call_args = fake_reload.call_args + self.assertAlmostEqual(call_args.kwargs.get("timeout"), expected_timeout) + def test_query_plan(self): from google.cloud._helpers import _RFC3339_MICROS from google.cloud.bigquery.job import QueryPlanEntry @@ -4561,6 +4632,26 @@ def test_result_error(self): expected_line = "{}:{}".format(i, line) assert expected_line in full_text + def test_result_transport_timeout_error(self): + query = textwrap.dedent( + """ + SELECT foo, bar + FROM table_baz + WHERE foo == bar""" + ) + + client = _make_client(project=self.PROJECT) + job = self._make_one(self.JOB_ID, query, client) + call_api_patch = mock.patch( + "google.cloud.bigquery.client.Client._call_api", + autospec=True, + side_effect=requests.exceptions.Timeout("Server response took too long."), + ) + + # Make sure that timeout errors get rebranded to concurrent futures timeout. + with call_api_patch, self.assertRaises(concurrent.futures.TimeoutError): + job.result(timeout=1) + def test__begin_error(self): from google.cloud import exceptions @@ -5003,7 +5094,7 @@ def test_reload_w_bound_client(self): self.assertNotEqual(job.destination, table_ref) conn.api_request.assert_called_once_with( - method="GET", path=PATH, query_params={} + method="GET", path=PATH, query_params={}, timeout=None ) self._verifyResourceProperties(job, RESOURCE) @@ -5028,10 +5119,34 @@ def test_reload_w_alternate_client(self): conn1.api_request.assert_not_called() conn2.api_request.assert_called_once_with( - method="GET", path=PATH, query_params={} + method="GET", path=PATH, query_params={}, timeout=None ) self._verifyResourceProperties(job, RESOURCE) + def test_reload_w_timeout(self): + from google.cloud.bigquery.dataset import DatasetReference + from google.cloud.bigquery.job import QueryJobConfig + + PATH = "/projects/%s/jobs/%s" % (self.PROJECT, self.JOB_ID) + DS_ID = "DATASET" + DEST_TABLE = "dest_table" + RESOURCE = self._make_resource() + conn = _make_connection(RESOURCE) + client = _make_client(project=self.PROJECT, connection=conn) + dataset_ref = DatasetReference(self.PROJECT, DS_ID) + table_ref = dataset_ref.table(DEST_TABLE) + config = QueryJobConfig() + config.destination = table_ref + job = self._make_one(self.JOB_ID, None, client, job_config=config) + + job.reload(timeout=4.2) + + self.assertNotEqual(job.destination, table_ref) + + conn.api_request.assert_called_once_with( + method="GET", path=PATH, query_params={}, timeout=4.2 + ) + @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_to_arrow(self): begun_resource = self._make_resource() From 0d3563a51432c6b0f543e1dd5bf0cc3725420061 Mon Sep 17 00:00:00 2001 From: Leonid Emar-Kar <46078689+Emar-Kar@users.noreply.github.com> Date: Thu, 26 Dec 2019 21:10:12 +0300 Subject: [PATCH 0723/2016] refactor(bigquery): update code samples to use strings for table and dataset IDs (#9974) * load_and_query_partitioned_table * remove client_query_legacy_sql from snippets * client_query_w_named_params * client_query_w_positional_params * client_query_w_timestamp_params * client_query_w_array_params * client_query_w_struct_params * query_no_cache * query_external_gcs_temporary_table * unify test_update_table_require_partition_filter * Update test_copy_table_multiple_source.py * Update client_query_add_column.py * Update client_query_relax_column.py * flake8 correction * fix queries.rst file * import reformat + comma deletion --- .../google-cloud-bigquery/docs/snippets.py | 346 ------------------ .../docs/usage/queries.rst | 2 +- .../samples/client_load_partitioned_table.py | 50 +++ .../samples/client_query_add_column.py | 10 +- .../samples/client_query_partitioned_table.py | 51 +++ .../samples/client_query_relax_column.py | 10 +- .../samples/client_query_w_array_params.py | 43 +++ .../samples/client_query_w_named_params.py | 41 +++ .../client_query_w_positional_params.py | 43 +++ .../samples/client_query_w_struct_params.py | 38 ++ .../client_query_w_timestamp_params.py | 41 +++ .../query_external_gcs_temporary_table.py | 44 +++ .../samples/query_no_cache.py | 34 ++ .../test_client_load_partitioned_table.py | 22 ++ .../test_client_query_partitioned_table.py | 26 ++ .../tests/test_client_query_w_array_params.py | 22 ++ .../tests/test_client_query_w_named_params.py | 22 ++ .../test_client_query_w_positional_params.py | 22 ++ .../test_client_query_w_struct_params.py | 23 ++ .../test_client_query_w_timestamp_params.py | 22 ++ .../tests/test_copy_table_multiple_source.py | 13 +- ...test_query_external_gcs_temporary_table.py | 22 ++ .../samples/tests/test_query_no_cache.py | 24 ++ ...t_update_table_require_partition_filter.py | 4 +- 24 files changed, 610 insertions(+), 365 deletions(-) create mode 100644 packages/google-cloud-bigquery/samples/client_load_partitioned_table.py create mode 100644 packages/google-cloud-bigquery/samples/client_query_partitioned_table.py create mode 100644 packages/google-cloud-bigquery/samples/client_query_w_array_params.py create mode 100644 packages/google-cloud-bigquery/samples/client_query_w_named_params.py create mode 100644 packages/google-cloud-bigquery/samples/client_query_w_positional_params.py create mode 100644 packages/google-cloud-bigquery/samples/client_query_w_struct_params.py create mode 100644 packages/google-cloud-bigquery/samples/client_query_w_timestamp_params.py create mode 100644 packages/google-cloud-bigquery/samples/query_external_gcs_temporary_table.py create mode 100644 packages/google-cloud-bigquery/samples/query_no_cache.py create mode 100644 packages/google-cloud-bigquery/samples/tests/test_client_load_partitioned_table.py create mode 100644 packages/google-cloud-bigquery/samples/tests/test_client_query_partitioned_table.py create mode 100644 packages/google-cloud-bigquery/samples/tests/test_client_query_w_array_params.py create mode 100644 packages/google-cloud-bigquery/samples/tests/test_client_query_w_named_params.py create mode 100644 packages/google-cloud-bigquery/samples/tests/test_client_query_w_positional_params.py create mode 100644 packages/google-cloud-bigquery/samples/tests/test_client_query_w_struct_params.py create mode 100644 packages/google-cloud-bigquery/samples/tests/test_client_query_w_timestamp_params.py create mode 100644 packages/google-cloud-bigquery/samples/tests/test_query_external_gcs_temporary_table.py create mode 100644 packages/google-cloud-bigquery/samples/tests/test_query_no_cache.py diff --git a/packages/google-cloud-bigquery/docs/snippets.py b/packages/google-cloud-bigquery/docs/snippets.py index 2d950936a5a6..bb584fa0494a 100644 --- a/packages/google-cloud-bigquery/docs/snippets.py +++ b/packages/google-cloud-bigquery/docs/snippets.py @@ -228,78 +228,6 @@ def test_create_partitioned_table(client, to_delete): assert table.time_partitioning.expiration_ms == 7776000000 -def test_load_and_query_partitioned_table(client, to_delete): - dataset_id = "load_partitioned_table_dataset_{}".format(_millis()) - dataset = bigquery.Dataset(client.dataset(dataset_id)) - client.create_dataset(dataset) - to_delete.append(dataset) - - # [START bigquery_load_table_partitioned] - # from google.cloud import bigquery - # client = bigquery.Client() - # dataset_id = 'my_dataset' - table_id = "us_states_by_date" - - dataset_ref = client.dataset(dataset_id) - job_config = bigquery.LoadJobConfig() - job_config.schema = [ - bigquery.SchemaField("name", "STRING"), - bigquery.SchemaField("post_abbr", "STRING"), - bigquery.SchemaField("date", "DATE"), - ] - job_config.skip_leading_rows = 1 - job_config.time_partitioning = bigquery.TimePartitioning( - type_=bigquery.TimePartitioningType.DAY, - field="date", # name of column to use for partitioning - expiration_ms=7776000000, - ) # 90 days - uri = "gs://cloud-samples-data/bigquery/us-states/us-states-by-date.csv" - - load_job = client.load_table_from_uri( - uri, dataset_ref.table(table_id), job_config=job_config - ) # API request - - assert load_job.job_type == "load" - - load_job.result() # Waits for table load to complete. - - table = client.get_table(dataset_ref.table(table_id)) - print("Loaded {} rows to table {}".format(table.num_rows, table_id)) - # [END bigquery_load_table_partitioned] - assert table.num_rows == 50 - - project_id = client.project - - # [START bigquery_query_partitioned_table] - import datetime - - # from google.cloud import bigquery - # client = bigquery.Client() - # project_id = 'my-project' - # dataset_id = 'my_dataset' - table_id = "us_states_by_date" - - sql_template = """ - SELECT * - FROM `{}.{}.{}` - WHERE date BETWEEN @start_date AND @end_date - """ - sql = sql_template.format(project_id, dataset_id, table_id) - job_config = bigquery.QueryJobConfig() - job_config.query_parameters = [ - bigquery.ScalarQueryParameter("start_date", "DATE", datetime.date(1800, 1, 1)), - bigquery.ScalarQueryParameter("end_date", "DATE", datetime.date(1899, 12, 31)), - ] - - # API request - query_job = client.query(sql, job_config=job_config) - - rows = list(query_job) - print("{} states were admitted to the US in the 1800s".format(len(rows))) - # [END bigquery_query_partitioned_table] - assert len(rows) == 29 - - @pytest.mark.skip( reason=( "update_table() is flaky " @@ -1327,35 +1255,6 @@ def test_extract_table_compressed(client, to_delete): to_delete.insert(0, blob) -def test_client_query_legacy_sql(client): - """Run a query with Legacy SQL explicitly set""" - # [START bigquery_query_legacy] - # from google.cloud import bigquery - # client = bigquery.Client() - - query = ( - "SELECT name FROM [bigquery-public-data:usa_names.usa_1910_2013] " - 'WHERE state = "TX" ' - "LIMIT 100" - ) - - # Set use_legacy_sql to True to use legacy SQL syntax. - job_config = bigquery.QueryJobConfig() - job_config.use_legacy_sql = True - - query_job = client.query( - query, - # Location must match that of the dataset(s) referenced in the query. - location="US", - job_config=job_config, - ) # API request - starts the query - - # Print the results. - for row in query_job: # API request - fetches results - print(row) - # [END bigquery_query_legacy] - - def test_client_query_total_rows(client, capsys): """Run a query and just check for how many rows.""" # [START bigquery_query_total_rows] @@ -1420,251 +1319,6 @@ def test_manage_job(client): # [END bigquery_get_job] -def test_client_query_w_named_params(client, capsys): - """Run a query using named query parameters""" - - # [START bigquery_query_params_named] - # from google.cloud import bigquery - # client = bigquery.Client() - - query = """ - SELECT word, word_count - FROM `bigquery-public-data.samples.shakespeare` - WHERE corpus = @corpus - AND word_count >= @min_word_count - ORDER BY word_count DESC; - """ - query_params = [ - bigquery.ScalarQueryParameter("corpus", "STRING", "romeoandjuliet"), - bigquery.ScalarQueryParameter("min_word_count", "INT64", 250), - ] - job_config = bigquery.QueryJobConfig() - job_config.query_parameters = query_params - query_job = client.query( - query, - # Location must match that of the dataset(s) referenced in the query. - location="US", - job_config=job_config, - ) # API request - starts the query - - # Print the results - for row in query_job: - print("{}: \t{}".format(row.word, row.word_count)) - - assert query_job.state == "DONE" - # [END bigquery_query_params_named] - - out, _ = capsys.readouterr() - assert "the" in out - - -def test_client_query_w_positional_params(client, capsys): - """Run a query using query parameters""" - - # [START bigquery_query_params_positional] - # from google.cloud import bigquery - # client = bigquery.Client() - - query = """ - SELECT word, word_count - FROM `bigquery-public-data.samples.shakespeare` - WHERE corpus = ? - AND word_count >= ? - ORDER BY word_count DESC; - """ - # Set the name to None to use positional parameters. - # Note that you cannot mix named and positional parameters. - query_params = [ - bigquery.ScalarQueryParameter(None, "STRING", "romeoandjuliet"), - bigquery.ScalarQueryParameter(None, "INT64", 250), - ] - job_config = bigquery.QueryJobConfig() - job_config.query_parameters = query_params - query_job = client.query( - query, - # Location must match that of the dataset(s) referenced in the query. - location="US", - job_config=job_config, - ) # API request - starts the query - - # Print the results - for row in query_job: - print("{}: \t{}".format(row.word, row.word_count)) - - assert query_job.state == "DONE" - # [END bigquery_query_params_positional] - - out, _ = capsys.readouterr() - assert "the" in out - - -def test_client_query_w_timestamp_params(client, capsys): - """Run a query using query parameters""" - - # [START bigquery_query_params_timestamps] - # from google.cloud import bigquery - # client = bigquery.Client() - - import datetime - import pytz - - query = "SELECT TIMESTAMP_ADD(@ts_value, INTERVAL 1 HOUR);" - query_params = [ - bigquery.ScalarQueryParameter( - "ts_value", - "TIMESTAMP", - datetime.datetime(2016, 12, 7, 8, 0, tzinfo=pytz.UTC), - ) - ] - job_config = bigquery.QueryJobConfig() - job_config.query_parameters = query_params - query_job = client.query( - query, - # Location must match that of the dataset(s) referenced in the query. - location="US", - job_config=job_config, - ) # API request - starts the query - - # Print the results - for row in query_job: - print(row) - - assert query_job.state == "DONE" - # [END bigquery_query_params_timestamps] - - out, _ = capsys.readouterr() - assert "2016, 12, 7, 9, 0" in out - - -def test_client_query_w_array_params(client, capsys): - """Run a query using array query parameters""" - # [START bigquery_query_params_arrays] - # from google.cloud import bigquery - # client = bigquery.Client() - - query = """ - SELECT name, sum(number) as count - FROM `bigquery-public-data.usa_names.usa_1910_2013` - WHERE gender = @gender - AND state IN UNNEST(@states) - GROUP BY name - ORDER BY count DESC - LIMIT 10; - """ - query_params = [ - bigquery.ScalarQueryParameter("gender", "STRING", "M"), - bigquery.ArrayQueryParameter("states", "STRING", ["WA", "WI", "WV", "WY"]), - ] - job_config = bigquery.QueryJobConfig() - job_config.query_parameters = query_params - query_job = client.query( - query, - # Location must match that of the dataset(s) referenced in the query. - location="US", - job_config=job_config, - ) # API request - starts the query - - # Print the results - for row in query_job: - print("{}: \t{}".format(row.name, row.count)) - - assert query_job.state == "DONE" - # [END bigquery_query_params_arrays] - - out, _ = capsys.readouterr() - assert "James" in out - - -def test_client_query_w_struct_params(client, capsys): - """Run a query using struct query parameters""" - # [START bigquery_query_params_structs] - # from google.cloud import bigquery - # client = bigquery.Client() - - query = "SELECT @struct_value AS s;" - query_params = [ - bigquery.StructQueryParameter( - "struct_value", - bigquery.ScalarQueryParameter("x", "INT64", 1), - bigquery.ScalarQueryParameter("y", "STRING", "foo"), - ) - ] - job_config = bigquery.QueryJobConfig() - job_config.query_parameters = query_params - query_job = client.query( - query, - # Location must match that of the dataset(s) referenced in the query. - location="US", - job_config=job_config, - ) # API request - starts the query - - # Print the results - for row in query_job: - print(row.s) - - assert query_job.state == "DONE" - # [END bigquery_query_params_structs] - - out, _ = capsys.readouterr() - assert "1" in out - assert "foo" in out - - -def test_query_no_cache(client): - # [START bigquery_query_no_cache] - # from google.cloud import bigquery - # client = bigquery.Client() - - job_config = bigquery.QueryJobConfig() - job_config.use_query_cache = False - sql = """ - SELECT corpus - FROM `bigquery-public-data.samples.shakespeare` - GROUP BY corpus; - """ - query_job = client.query( - sql, - # Location must match that of the dataset(s) referenced in the query. - location="US", - job_config=job_config, - ) # API request - - # Print the results. - for row in query_job: # API request - fetches results - print(row) - # [END bigquery_query_no_cache] - - -def test_query_external_gcs_temporary_table(client): - # [START bigquery_query_external_gcs_temp] - # from google.cloud import bigquery - # client = bigquery.Client() - - # Configure the external data source and query job - external_config = bigquery.ExternalConfig("CSV") - external_config.source_uris = [ - "gs://cloud-samples-data/bigquery/us-states/us-states.csv" - ] - external_config.schema = [ - bigquery.SchemaField("name", "STRING"), - bigquery.SchemaField("post_abbr", "STRING"), - ] - external_config.options.skip_leading_rows = 1 # optionally skip header row - table_id = "us_states" - job_config = bigquery.QueryJobConfig() - job_config.table_definitions = {table_id: external_config} - - # Example query to find states starting with 'W' - sql = 'SELECT * FROM `{}` WHERE name LIKE "W%"'.format(table_id) - - query_job = client.query(sql, job_config=job_config) # API request - - w_states = list(query_job) # Waits for query to finish - print("There are {} states with names starting with W.".format(len(w_states))) - # [END bigquery_query_external_gcs_temp] - assert len(w_states) == 4 - - def test_query_external_gcs_permanent_table(client, to_delete): dataset_id = "query_external_gcs_{}".format(_millis()) dataset = bigquery.Dataset(client.dataset(dataset_id)) diff --git a/packages/google-cloud-bigquery/docs/usage/queries.rst b/packages/google-cloud-bigquery/docs/usage/queries.rst index 9210e04bc6a2..fc57e54de9df 100644 --- a/packages/google-cloud-bigquery/docs/usage/queries.rst +++ b/packages/google-cloud-bigquery/docs/usage/queries.rst @@ -43,7 +43,7 @@ Run a query using a named query parameter See BigQuery documentation for more information on `parameterized queries `_. -.. literalinclude:: ../snippets.py +.. literalinclude:: ../samples/client_query_w_named_params.py :language: python :dedent: 4 :start-after: [START bigquery_query_params_named] diff --git a/packages/google-cloud-bigquery/samples/client_load_partitioned_table.py b/packages/google-cloud-bigquery/samples/client_load_partitioned_table.py new file mode 100644 index 000000000000..3f9f86db9a9a --- /dev/null +++ b/packages/google-cloud-bigquery/samples/client_load_partitioned_table.py @@ -0,0 +1,50 @@ +# Copyright 2019 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +def client_load_partitioned_table(client, table_id): + + # [START bigquery_load_table_partitioned] + from google.cloud import bigquery + + # TODO(developer): Construct a BigQuery client object. + # client = bigquery.Client() + + # TODO(developer): Set table_id to the ID of the table to create. + # table_id = "your-project.your_dataset.your_table_name" + + job_config = bigquery.LoadJobConfig( + schema=[ + bigquery.SchemaField("name", "STRING"), + bigquery.SchemaField("post_abbr", "STRING"), + bigquery.SchemaField("date", "DATE"), + ], + skip_leading_rows=1, + time_partitioning=bigquery.TimePartitioning( + type_=bigquery.TimePartitioningType.DAY, + field="date", # Name of the column to use for partitioning. + expiration_ms=7776000000, # 90 days. + ), + ) + uri = "gs://cloud-samples-data/bigquery/us-states/us-states-by-date.csv" + + load_job = client.load_table_from_uri( + uri, table_id, job_config=job_config + ) # Make an API request. + + load_job.result() # Wait for the job to complete. + + table = client.get_table(table_id) + print("Loaded {} rows to table {}".format(table.num_rows, table_id)) + # [END bigquery_load_table_partitioned] diff --git a/packages/google-cloud-bigquery/samples/client_query_add_column.py b/packages/google-cloud-bigquery/samples/client_query_add_column.py index 1cde370a35ed..c35548d2a361 100644 --- a/packages/google-cloud-bigquery/samples/client_query_add_column.py +++ b/packages/google-cloud-bigquery/samples/client_query_add_column.py @@ -30,11 +30,11 @@ def client_query_add_column(client, table_id): # Configures the query to append the results to a destination table, # allowing field addition. - job_config = bigquery.QueryJobConfig(destination=table_id) - job_config.schema_update_options = [ - bigquery.SchemaUpdateOption.ALLOW_FIELD_ADDITION - ] - job_config.write_disposition = bigquery.WriteDisposition.WRITE_APPEND + job_config = bigquery.QueryJobConfig( + destination=table_id, + schema_update_options=[bigquery.SchemaUpdateOption.ALLOW_FIELD_ADDITION], + write_disposition=bigquery.WriteDisposition.WRITE_APPEND, + ) # Start the query, passing in the extra configuration. query_job = client.query( diff --git a/packages/google-cloud-bigquery/samples/client_query_partitioned_table.py b/packages/google-cloud-bigquery/samples/client_query_partitioned_table.py new file mode 100644 index 000000000000..71ec3a0e7086 --- /dev/null +++ b/packages/google-cloud-bigquery/samples/client_query_partitioned_table.py @@ -0,0 +1,51 @@ +# Copyright 2019 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +def client_query_partitioned_table(client, table_id): + + # [START bigquery_query_partitioned_table] + import datetime + + from google.cloud import bigquery + + # TODO(developer): Construct a BigQuery client object. + # client = bigquery.Client() + + # TODO(developer): Set table_id to the ID of the table to query from. + # table_id = "your-project.your_dataset.your_table_name" + + sql = """ + SELECT * + FROM `{}` + WHERE date BETWEEN @start_date AND @end_date + """.format( + table_id + ) + + job_config = bigquery.QueryJobConfig( + query_parameters=[ + bigquery.ScalarQueryParameter( + "start_date", "DATE", datetime.date(1800, 1, 1) + ), + bigquery.ScalarQueryParameter( + "end_date", "DATE", datetime.date(1899, 12, 31) + ), + ] + ) + query_job = client.query(sql, job_config=job_config) # Make an API request. + + rows = list(query_job) + print("{} states were admitted to the US in the 1800s".format(len(rows))) + # [END bigquery_query_partitioned_table] diff --git a/packages/google-cloud-bigquery/samples/client_query_relax_column.py b/packages/google-cloud-bigquery/samples/client_query_relax_column.py index d8e5743c1e33..8ec117e186fc 100644 --- a/packages/google-cloud-bigquery/samples/client_query_relax_column.py +++ b/packages/google-cloud-bigquery/samples/client_query_relax_column.py @@ -33,11 +33,11 @@ def client_query_relax_column(client, table_id): # Configures the query to append the results to a destination table, # allowing field relaxation. - job_config = bigquery.QueryJobConfig(destination=table_id) - job_config.schema_update_options = [ - bigquery.SchemaUpdateOption.ALLOW_FIELD_RELAXATION - ] - job_config.write_disposition = bigquery.WriteDisposition.WRITE_APPEND + job_config = bigquery.QueryJobConfig( + destination=table_id, + schema_update_options=[bigquery.SchemaUpdateOption.ALLOW_FIELD_RELAXATION], + write_disposition=bigquery.WriteDisposition.WRITE_APPEND, + ) # Start the query, passing in the extra configuration. query_job = client.query( diff --git a/packages/google-cloud-bigquery/samples/client_query_w_array_params.py b/packages/google-cloud-bigquery/samples/client_query_w_array_params.py new file mode 100644 index 000000000000..254173d4c540 --- /dev/null +++ b/packages/google-cloud-bigquery/samples/client_query_w_array_params.py @@ -0,0 +1,43 @@ +# Copyright 2019 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +def client_query_w_array_params(client): + + # [START bigquery_query_params_arrays] + from google.cloud import bigquery + + # TODO(developer): Construct a BigQuery client object. + # client = bigquery.Client() + + query = """ + SELECT name, sum(number) as count + FROM `bigquery-public-data.usa_names.usa_1910_2013` + WHERE gender = @gender + AND state IN UNNEST(@states) + GROUP BY name + ORDER BY count DESC + LIMIT 10; + """ + job_config = bigquery.QueryJobConfig( + query_parameters=[ + bigquery.ScalarQueryParameter("gender", "STRING", "M"), + bigquery.ArrayQueryParameter("states", "STRING", ["WA", "WI", "WV", "WY"]), + ] + ) + query_job = client.query(query, job_config=job_config) # Make an API request. + + for row in query_job: + print("{}: \t{}".format(row.name, row.count)) + # [END bigquery_query_params_arrays] diff --git a/packages/google-cloud-bigquery/samples/client_query_w_named_params.py b/packages/google-cloud-bigquery/samples/client_query_w_named_params.py new file mode 100644 index 000000000000..eba5bc221ff9 --- /dev/null +++ b/packages/google-cloud-bigquery/samples/client_query_w_named_params.py @@ -0,0 +1,41 @@ +# Copyright 2019 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +def client_query_w_named_params(client): + + # [START bigquery_query_params_named] + from google.cloud import bigquery + + # TODO(developer): Construct a BigQuery client object. + # client = bigquery.Client() + + query = """ + SELECT word, word_count + FROM `bigquery-public-data.samples.shakespeare` + WHERE corpus = @corpus + AND word_count >= @min_word_count + ORDER BY word_count DESC; + """ + job_config = bigquery.QueryJobConfig( + query_parameters=[ + bigquery.ScalarQueryParameter("corpus", "STRING", "romeoandjuliet"), + bigquery.ScalarQueryParameter("min_word_count", "INT64", 250), + ] + ) + query_job = client.query(query, job_config=job_config) # Make an API request. + + for row in query_job: + print("{}: \t{}".format(row.word, row.word_count)) + # [END bigquery_query_params_named] diff --git a/packages/google-cloud-bigquery/samples/client_query_w_positional_params.py b/packages/google-cloud-bigquery/samples/client_query_w_positional_params.py new file mode 100644 index 000000000000..3f7ce584bcf9 --- /dev/null +++ b/packages/google-cloud-bigquery/samples/client_query_w_positional_params.py @@ -0,0 +1,43 @@ +# Copyright 2019 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +def client_query_w_positional_params(client): + + # [START bigquery_query_params_positional] + from google.cloud import bigquery + + # TODO(developer): Construct a BigQuery client object. + # client = bigquery.Client() + + query = """ + SELECT word, word_count + FROM `bigquery-public-data.samples.shakespeare` + WHERE corpus = ? + AND word_count >= ? + ORDER BY word_count DESC; + """ + # Set the name to None to use positional parameters. + # Note that you cannot mix named and positional parameters. + job_config = bigquery.QueryJobConfig( + query_parameters=[ + bigquery.ScalarQueryParameter(None, "STRING", "romeoandjuliet"), + bigquery.ScalarQueryParameter(None, "INT64", 250), + ] + ) + query_job = client.query(query, job_config=job_config) # Make an API request. + + for row in query_job: + print("{}: \t{}".format(row.word, row.word_count)) + # [END bigquery_query_params_positional] diff --git a/packages/google-cloud-bigquery/samples/client_query_w_struct_params.py b/packages/google-cloud-bigquery/samples/client_query_w_struct_params.py new file mode 100644 index 000000000000..7c291447f0cb --- /dev/null +++ b/packages/google-cloud-bigquery/samples/client_query_w_struct_params.py @@ -0,0 +1,38 @@ +# Copyright 2019 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +def client_query_w_struct_params(client): + + # [START bigquery_query_params_structs] + from google.cloud import bigquery + + # TODO(developer): Construct a BigQuery client object. + # client = bigquery.Client() + + query = "SELECT @struct_value AS s;" + job_config = bigquery.QueryJobConfig( + query_parameters=[ + bigquery.StructQueryParameter( + "struct_value", + bigquery.ScalarQueryParameter("x", "INT64", 1), + bigquery.ScalarQueryParameter("y", "STRING", "foo"), + ) + ] + ) + query_job = client.query(query, job_config=job_config) # Make an API request. + + for row in query_job: + print(row.s) + # [END bigquery_query_params_structs] diff --git a/packages/google-cloud-bigquery/samples/client_query_w_timestamp_params.py b/packages/google-cloud-bigquery/samples/client_query_w_timestamp_params.py new file mode 100644 index 000000000000..cc334f7e9625 --- /dev/null +++ b/packages/google-cloud-bigquery/samples/client_query_w_timestamp_params.py @@ -0,0 +1,41 @@ +# Copyright 2019 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +def client_query_w_timestamp_params(client): + + # [START bigquery_query_params_timestamps] + import datetime + + import pytz + from google.cloud import bigquery + + # TODO(developer): Construct a BigQuery client object. + # client = bigquery.Client() + + query = "SELECT TIMESTAMP_ADD(@ts_value, INTERVAL 1 HOUR);" + job_config = bigquery.QueryJobConfig( + query_parameters=[ + bigquery.ScalarQueryParameter( + "ts_value", + "TIMESTAMP", + datetime.datetime(2016, 12, 7, 8, 0, tzinfo=pytz.UTC), + ) + ] + ) + query_job = client.query(query, job_config=job_config) # Make an API request. + + for row in query_job: + print(row) + # [END bigquery_query_params_timestamps] diff --git a/packages/google-cloud-bigquery/samples/query_external_gcs_temporary_table.py b/packages/google-cloud-bigquery/samples/query_external_gcs_temporary_table.py new file mode 100644 index 000000000000..3ef44bd32db1 --- /dev/null +++ b/packages/google-cloud-bigquery/samples/query_external_gcs_temporary_table.py @@ -0,0 +1,44 @@ +# Copyright 2019 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +def query_external_gcs_temporary_table(client): + + # [START bigquery_query_external_gcs_temp] + from google.cloud import bigquery + + # TODO(developer): Construct a BigQuery client object. + # client = bigquery.Client() + + # Configure the external data source and query job. + external_config = bigquery.ExternalConfig("CSV") + external_config.source_uris = [ + "gs://cloud-samples-data/bigquery/us-states/us-states.csv" + ] + external_config.schema = [ + bigquery.SchemaField("name", "STRING"), + bigquery.SchemaField("post_abbr", "STRING"), + ] + external_config.options.skip_leading_rows = 1 + table_id = "us_states" + job_config = bigquery.QueryJobConfig(table_definitions={table_id: external_config}) + + # Example query to find states starting with 'W'. + sql = 'SELECT * FROM `{}` WHERE name LIKE "W%"'.format(table_id) + + query_job = client.query(sql, job_config=job_config) # Make an API request. + + w_states = list(query_job) # Wait for the job to complete. + print("There are {} states with names starting with W.".format(len(w_states))) + # [END bigquery_query_external_gcs_temp] diff --git a/packages/google-cloud-bigquery/samples/query_no_cache.py b/packages/google-cloud-bigquery/samples/query_no_cache.py new file mode 100644 index 000000000000..3d542a96b7be --- /dev/null +++ b/packages/google-cloud-bigquery/samples/query_no_cache.py @@ -0,0 +1,34 @@ +# Copyright 2019 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +def query_no_cache(client): + + # [START bigquery_query_no_cache] + from google.cloud import bigquery + + # TODO(developer): Construct a BigQuery client object. + # client = bigquery.Client() + + job_config = bigquery.QueryJobConfig(use_query_cache=False) + sql = """ + SELECT corpus + FROM `bigquery-public-data.samples.shakespeare` + GROUP BY corpus; + """ + query_job = client.query(sql, job_config=job_config) # Make an API request. + + for row in query_job: + print(row) + # [END bigquery_query_no_cache] diff --git a/packages/google-cloud-bigquery/samples/tests/test_client_load_partitioned_table.py b/packages/google-cloud-bigquery/samples/tests/test_client_load_partitioned_table.py new file mode 100644 index 000000000000..4e4c8811181f --- /dev/null +++ b/packages/google-cloud-bigquery/samples/tests/test_client_load_partitioned_table.py @@ -0,0 +1,22 @@ +# Copyright 2019 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from .. import client_load_partitioned_table + + +def test_client_load_partitioned_table(capsys, client, random_table_id): + + client_load_partitioned_table.client_load_partitioned_table(client, random_table_id) + out, err = capsys.readouterr() + assert "Loaded 50 rows to table {}".format(random_table_id) in out diff --git a/packages/google-cloud-bigquery/samples/tests/test_client_query_partitioned_table.py b/packages/google-cloud-bigquery/samples/tests/test_client_query_partitioned_table.py new file mode 100644 index 000000000000..73fd71b54d5d --- /dev/null +++ b/packages/google-cloud-bigquery/samples/tests/test_client_query_partitioned_table.py @@ -0,0 +1,26 @@ +# Copyright 2019 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from .. import client_load_partitioned_table +from .. import client_query_partitioned_table + + +def test_client_query_partitioned_table(capsys, client, random_table_id): + + client_load_partitioned_table.client_load_partitioned_table(client, random_table_id) + client_query_partitioned_table.client_query_partitioned_table( + client, random_table_id + ) + out, err = capsys.readouterr() + assert "29 states were admitted to the US in the 1800s" in out diff --git a/packages/google-cloud-bigquery/samples/tests/test_client_query_w_array_params.py b/packages/google-cloud-bigquery/samples/tests/test_client_query_w_array_params.py new file mode 100644 index 000000000000..8603e9b8fe3d --- /dev/null +++ b/packages/google-cloud-bigquery/samples/tests/test_client_query_w_array_params.py @@ -0,0 +1,22 @@ +# Copyright 2019 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from .. import client_query_w_array_params + + +def test_client_query_w_array_params(capsys, client): + + client_query_w_array_params.client_query_w_array_params(client) + out, err = capsys.readouterr() + assert "James" in out diff --git a/packages/google-cloud-bigquery/samples/tests/test_client_query_w_named_params.py b/packages/google-cloud-bigquery/samples/tests/test_client_query_w_named_params.py new file mode 100644 index 000000000000..ae4a2fc27db3 --- /dev/null +++ b/packages/google-cloud-bigquery/samples/tests/test_client_query_w_named_params.py @@ -0,0 +1,22 @@ +# Copyright 2019 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from .. import client_query_w_named_params + + +def test_client_query_w_named_params(capsys, client): + + client_query_w_named_params.client_query_w_named_params(client) + out, err = capsys.readouterr() + assert "the" in out diff --git a/packages/google-cloud-bigquery/samples/tests/test_client_query_w_positional_params.py b/packages/google-cloud-bigquery/samples/tests/test_client_query_w_positional_params.py new file mode 100644 index 000000000000..37c15b67b120 --- /dev/null +++ b/packages/google-cloud-bigquery/samples/tests/test_client_query_w_positional_params.py @@ -0,0 +1,22 @@ +# Copyright 2019 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from .. import client_query_w_positional_params + + +def test_client_query_w_positional_params(capsys, client): + + client_query_w_positional_params.client_query_w_positional_params(client) + out, err = capsys.readouterr() + assert "the" in out diff --git a/packages/google-cloud-bigquery/samples/tests/test_client_query_w_struct_params.py b/packages/google-cloud-bigquery/samples/tests/test_client_query_w_struct_params.py new file mode 100644 index 000000000000..9d0c4282946b --- /dev/null +++ b/packages/google-cloud-bigquery/samples/tests/test_client_query_w_struct_params.py @@ -0,0 +1,23 @@ +# Copyright 2019 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from .. import client_query_w_struct_params + + +def test_client_query_w_struct_params(capsys, client): + + client_query_w_struct_params.client_query_w_struct_params(client) + out, err = capsys.readouterr() + assert "1" in out + assert "foo" in out diff --git a/packages/google-cloud-bigquery/samples/tests/test_client_query_w_timestamp_params.py b/packages/google-cloud-bigquery/samples/tests/test_client_query_w_timestamp_params.py new file mode 100644 index 000000000000..45f7b7518454 --- /dev/null +++ b/packages/google-cloud-bigquery/samples/tests/test_client_query_w_timestamp_params.py @@ -0,0 +1,22 @@ +# Copyright 2019 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from .. import client_query_w_timestamp_params + + +def test_client_query_w_timestamp_params(capsys, client): + + client_query_w_timestamp_params.client_query_w_timestamp_params(client) + out, err = capsys.readouterr() + assert "2016, 12, 7, 9, 0" in out diff --git a/packages/google-cloud-bigquery/samples/tests/test_copy_table_multiple_source.py b/packages/google-cloud-bigquery/samples/tests/test_copy_table_multiple_source.py index 755fa2ccb5e9..16c1de89627c 100644 --- a/packages/google-cloud-bigquery/samples/tests/test_copy_table_multiple_source.py +++ b/packages/google-cloud-bigquery/samples/tests/test_copy_table_multiple_source.py @@ -20,19 +20,18 @@ def test_copy_table_multiple_source(capsys, client, random_table_id, random_dataset_id): - schema = [ - bigquery.SchemaField("name", "STRING"), - bigquery.SchemaField("post_abbr", "STRING"), - ] - dataset = bigquery.Dataset(random_dataset_id) dataset.location = "US" dataset = client.create_dataset(dataset) table_data = {"table1": b"Washington,WA", "table2": b"California,CA"} for table_id, data in table_data.items(): table_ref = dataset.table(table_id) - job_config = bigquery.LoadJobConfig() - job_config.schema = schema + job_config = bigquery.LoadJobConfig( + schema=[ + bigquery.SchemaField("name", "STRING"), + bigquery.SchemaField("post_abbr", "STRING"), + ] + ) body = six.BytesIO(data) client.load_table_from_file( body, table_ref, location="US", job_config=job_config diff --git a/packages/google-cloud-bigquery/samples/tests/test_query_external_gcs_temporary_table.py b/packages/google-cloud-bigquery/samples/tests/test_query_external_gcs_temporary_table.py new file mode 100644 index 000000000000..ea5b5d4dfcda --- /dev/null +++ b/packages/google-cloud-bigquery/samples/tests/test_query_external_gcs_temporary_table.py @@ -0,0 +1,22 @@ +# Copyright 2019 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from .. import query_external_gcs_temporary_table + + +def test_query_external_gcs_temporary_table(capsys, client): + + query_external_gcs_temporary_table.query_external_gcs_temporary_table(client) + out, err = capsys.readouterr() + assert "There are 4 states with names starting with W." in out diff --git a/packages/google-cloud-bigquery/samples/tests/test_query_no_cache.py b/packages/google-cloud-bigquery/samples/tests/test_query_no_cache.py new file mode 100644 index 000000000000..68f0774d935f --- /dev/null +++ b/packages/google-cloud-bigquery/samples/tests/test_query_no_cache.py @@ -0,0 +1,24 @@ +# Copyright 2019 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import re + +from .. import query_no_cache + + +def test_query_no_cache(capsys, client): + + query_no_cache.query_no_cache(client) + out, err = capsys.readouterr() + assert re.search(r"(Row[\w(){}:', ]+)$", out) diff --git a/packages/google-cloud-bigquery/samples/tests/test_update_table_require_partition_filter.py b/packages/google-cloud-bigquery/samples/tests/test_update_table_require_partition_filter.py index 1cbd2b2279b2..7ce6d64c780a 100644 --- a/packages/google-cloud-bigquery/samples/tests/test_update_table_require_partition_filter.py +++ b/packages/google-cloud-bigquery/samples/tests/test_update_table_require_partition_filter.py @@ -13,13 +13,15 @@ # limitations under the License. from google.cloud import bigquery + from .. import update_table_require_partition_filter def test_update_table_require_partition_filter(capsys, client, random_table_id): + # Make a partitioned table. schema = [bigquery.SchemaField("transaction_timestamp", "TIMESTAMP")] - table = bigquery.Table(random_table_id, schema) + table = bigquery.Table(random_table_id, schema=schema) table.time_partitioning = bigquery.TimePartitioning(field="transaction_timestamp") table = client.create_table(table) From a84dca41badf727eedf0bd7c0bef2bd8e83f12ee Mon Sep 17 00:00:00 2001 From: Peter Lamut Date: Thu, 26 Dec 2019 23:12:44 +0000 Subject: [PATCH 0724/2016] feat(bigquery): add timeout parameter to client's and job's public methods (#10002) * Reverse argument order in job.result() This is for internal consistency with other methods such as reload(). * Add TODO reminder to _AsyncJob.cancel() method * Add timeout argument to public methods AN exception is the Client.load_table_from_file() method, and the methods that depend on it, because adding a timeout requires changes in the google-resumable-media dependency. * Explicitly test timeout parameter * Split timeout in multi-request methods If a method makes multiple requests and is given a timeout, that timeout should represent the total allowed time for all requests combined. * Fix minor styling issue * Add timeout with retry test for Client._call_api() --- .../google/cloud/bigquery/client.py | 322 ++++++++--- .../google/cloud/bigquery/job.py | 112 ++-- packages/google-cloud-bigquery/noxfile.py | 2 +- .../tests/unit/test_client.py | 510 ++++++++++++++---- .../tests/unit/test_job.py | 168 +++++- 5 files changed, 910 insertions(+), 204 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py index 5fd7bceea973..5707d57cdb62 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py @@ -22,6 +22,7 @@ except ImportError: # Python 2.7 import collections as collections_abc +import concurrent.futures import copy import functools import gzip @@ -47,6 +48,7 @@ import google.api_core.client_options import google.api_core.exceptions from google.api_core import page_iterator +from google.auth.transport.requests import TimeoutGuard import google.cloud._helpers from google.cloud import exceptions from google.cloud.client import ClientWithProject @@ -206,7 +208,7 @@ def close(self): self._http._auth_request.session.close() self._http.close() - def get_service_account_email(self, project=None): + def get_service_account_email(self, project=None, timeout=None): """Get the email address of the project's BigQuery service account Note: @@ -217,6 +219,8 @@ def get_service_account_email(self, project=None): project (str, optional): Project ID to use for retreiving service account email. Defaults to the client's project. + timeout (Optional[float]): + The number of seconds to wait for the API response. Returns: str: service account email address @@ -232,10 +236,16 @@ def get_service_account_email(self, project=None): if project is None: project = self.project path = "/projects/%s/serviceAccount" % (project,) - api_response = self._connection.api_request(method="GET", path=path) + + # TODO: call thorugh self._call_api() and allow passing in a retry? + api_response = self._connection.api_request( + method="GET", path=path, timeout=timeout + ) return api_response["email"] - def list_projects(self, max_results=None, page_token=None, retry=DEFAULT_RETRY): + def list_projects( + self, max_results=None, page_token=None, retry=DEFAULT_RETRY, timeout=None + ): """List projects for the project associated with this client. See @@ -256,6 +266,10 @@ def list_projects(self, max_results=None, page_token=None, retry=DEFAULT_RETRY): retry (google.api_core.retry.Retry): (Optional) How to retry the RPC. + timeout (Optional[float]): + The number of seconds to wait for the underlying HTTP transport + before using ``retry``. + Returns: google.api_core.page_iterator.Iterator: Iterator of :class:`~google.cloud.bigquery.client.Project` @@ -263,7 +277,7 @@ def list_projects(self, max_results=None, page_token=None, retry=DEFAULT_RETRY): """ return page_iterator.HTTPIterator( client=self, - api_request=functools.partial(self._call_api, retry), + api_request=functools.partial(self._call_api, retry, timeout=timeout), path="/projects", item_to_value=_item_to_project, items_key="projects", @@ -279,6 +293,7 @@ def list_datasets( max_results=None, page_token=None, retry=DEFAULT_RETRY, + timeout=None, ): """List datasets for the project associated with this client. @@ -307,6 +322,9 @@ def list_datasets( :class:`~google.api_core.page_iterator.HTTPIterator`. retry (google.api_core.retry.Retry): Optional. How to retry the RPC. + timeout (Optional[float]): + The number of seconds to wait for the underlying HTTP transport + before using ``retry``. Returns: google.api_core.page_iterator.Iterator: @@ -325,7 +343,7 @@ def list_datasets( path = "/projects/%s/datasets" % (project,) return page_iterator.HTTPIterator( client=self, - api_request=functools.partial(self._call_api, retry), + api_request=functools.partial(self._call_api, retry, timeout=timeout), path=path, item_to_value=_item_to_dataset, items_key="datasets", @@ -366,7 +384,9 @@ def _create_bqstorage_client(self): credentials=self._credentials ) - def create_dataset(self, dataset, exists_ok=False, retry=DEFAULT_RETRY): + def create_dataset( + self, dataset, exists_ok=False, retry=DEFAULT_RETRY, timeout=None + ): """API call: create the dataset via a POST request. See @@ -386,6 +406,9 @@ def create_dataset(self, dataset, exists_ok=False, retry=DEFAULT_RETRY): errors when creating the dataset. retry (google.api_core.retry.Retry): Optional. How to retry the RPC. + timeout (Optional[float]): + The number of seconds to wait for the underlying HTTP transport + before using ``retry``. Returns: google.cloud.bigquery.dataset.Dataset: @@ -413,14 +436,18 @@ def create_dataset(self, dataset, exists_ok=False, retry=DEFAULT_RETRY): data["location"] = self.location try: - api_response = self._call_api(retry, method="POST", path=path, data=data) + api_response = self._call_api( + retry, method="POST", path=path, data=data, timeout=timeout + ) return Dataset.from_api_repr(api_response) except google.api_core.exceptions.Conflict: if not exists_ok: raise return self.get_dataset(dataset.reference, retry=retry) - def create_routine(self, routine, exists_ok=False, retry=DEFAULT_RETRY): + def create_routine( + self, routine, exists_ok=False, retry=DEFAULT_RETRY, timeout=None + ): """[Beta] Create a routine via a POST request. See @@ -435,6 +462,9 @@ def create_routine(self, routine, exists_ok=False, retry=DEFAULT_RETRY): errors when creating the routine. retry (google.api_core.retry.Retry): Optional. How to retry the RPC. + timeout (Optional[float]): + The number of seconds to wait for the underlying HTTP transport + before using ``retry``. Returns: google.cloud.bigquery.routine.Routine: @@ -447,7 +477,7 @@ def create_routine(self, routine, exists_ok=False, retry=DEFAULT_RETRY): resource = routine.to_api_repr() try: api_response = self._call_api( - retry, method="POST", path=path, data=resource + retry, method="POST", path=path, data=resource, timeout=timeout ) return Routine.from_api_repr(api_response) except google.api_core.exceptions.Conflict: @@ -455,7 +485,7 @@ def create_routine(self, routine, exists_ok=False, retry=DEFAULT_RETRY): raise return self.get_routine(routine.reference, retry=retry) - def create_table(self, table, exists_ok=False, retry=DEFAULT_RETRY): + def create_table(self, table, exists_ok=False, retry=DEFAULT_RETRY, timeout=None): """API call: create a table via a PUT request See @@ -476,6 +506,9 @@ def create_table(self, table, exists_ok=False, retry=DEFAULT_RETRY): errors when creating the table. retry (google.api_core.retry.Retry): Optional. How to retry the RPC. + timeout (Optional[float]): + The number of seconds to wait for the underlying HTTP transport + before using ``retry``. Returns: google.cloud.bigquery.table.Table: @@ -486,7 +519,9 @@ def create_table(self, table, exists_ok=False, retry=DEFAULT_RETRY): path = "/projects/%s/datasets/%s/tables" % (table.project, table.dataset_id) data = table.to_api_repr() try: - api_response = self._call_api(retry, method="POST", path=path, data=data) + api_response = self._call_api( + retry, method="POST", path=path, data=data, timeout=timeout + ) return Table.from_api_repr(api_response) except google.api_core.exceptions.Conflict: if not exists_ok: @@ -499,7 +534,7 @@ def _call_api(self, retry, **kwargs): call = retry(call) return call() - def get_dataset(self, dataset_ref, retry=DEFAULT_RETRY): + def get_dataset(self, dataset_ref, retry=DEFAULT_RETRY, timeout=None): """Fetch the dataset referenced by ``dataset_ref`` Args: @@ -513,6 +548,9 @@ def get_dataset(self, dataset_ref, retry=DEFAULT_RETRY): :func:`~google.cloud.bigquery.dataset.DatasetReference.from_string`. retry (google.api_core.retry.Retry): (Optional) How to retry the RPC. + timeout (Optional[float]): + The number of seconds to wait for the underlying HTTP transport + before using ``retry``. Returns: google.cloud.bigquery.dataset.Dataset: @@ -523,10 +561,12 @@ def get_dataset(self, dataset_ref, retry=DEFAULT_RETRY): dataset_ref, default_project=self.project ) - api_response = self._call_api(retry, method="GET", path=dataset_ref.path) + api_response = self._call_api( + retry, method="GET", path=dataset_ref.path, timeout=timeout + ) return Dataset.from_api_repr(api_response) - def get_model(self, model_ref, retry=DEFAULT_RETRY): + def get_model(self, model_ref, retry=DEFAULT_RETRY, timeout=None): """[Beta] Fetch the model referenced by ``model_ref``. Args: @@ -540,6 +580,9 @@ def get_model(self, model_ref, retry=DEFAULT_RETRY): :func:`google.cloud.bigquery.model.ModelReference.from_string`. retry (google.api_core.retry.Retry): (Optional) How to retry the RPC. + timeout (Optional[float]): + The number of seconds to wait for the underlying HTTP transport + before using ``retry``. Returns: google.cloud.bigquery.model.Model: A ``Model`` instance. @@ -549,10 +592,12 @@ def get_model(self, model_ref, retry=DEFAULT_RETRY): model_ref, default_project=self.project ) - api_response = self._call_api(retry, method="GET", path=model_ref.path) + api_response = self._call_api( + retry, method="GET", path=model_ref.path, timeout=timeout + ) return Model.from_api_repr(api_response) - def get_routine(self, routine_ref, retry=DEFAULT_RETRY): + def get_routine(self, routine_ref, retry=DEFAULT_RETRY, timeout=None): """[Beta] Get the routine referenced by ``routine_ref``. Args: @@ -567,6 +612,9 @@ def get_routine(self, routine_ref, retry=DEFAULT_RETRY): :func:`google.cloud.bigquery.routine.RoutineReference.from_string`. retry (google.api_core.retry.Retry): (Optional) How to retry the API call. + timeout (Optional[float]): + The number of seconds to wait for the underlying HTTP transport + before using ``retry``. Returns: google.cloud.bigquery.routine.Routine: @@ -577,10 +625,12 @@ def get_routine(self, routine_ref, retry=DEFAULT_RETRY): routine_ref, default_project=self.project ) - api_response = self._call_api(retry, method="GET", path=routine_ref.path) + api_response = self._call_api( + retry, method="GET", path=routine_ref.path, timeout=timeout + ) return Routine.from_api_repr(api_response) - def get_table(self, table, retry=DEFAULT_RETRY): + def get_table(self, table, retry=DEFAULT_RETRY, timeout=None): """Fetch the table referenced by ``table``. Args: @@ -595,16 +645,21 @@ def get_table(self, table, retry=DEFAULT_RETRY): :func:`google.cloud.bigquery.table.TableReference.from_string`. retry (google.api_core.retry.Retry): (Optional) How to retry the RPC. + timeout (Optional[float]): + The number of seconds to wait for the underlying HTTP transport + before using ``retry``. Returns: google.cloud.bigquery.table.Table: A ``Table`` instance. """ table_ref = _table_arg_to_table_ref(table, default_project=self.project) - api_response = self._call_api(retry, method="GET", path=table_ref.path) + api_response = self._call_api( + retry, method="GET", path=table_ref.path, timeout=timeout + ) return Table.from_api_repr(api_response) - def update_dataset(self, dataset, fields, retry=DEFAULT_RETRY): + def update_dataset(self, dataset, fields, retry=DEFAULT_RETRY, timeout=None): """Change some fields of a dataset. Use ``fields`` to specify which fields to update. At least one field @@ -625,6 +680,9 @@ def update_dataset(self, dataset, fields, retry=DEFAULT_RETRY): The properties of ``dataset`` to change (e.g. "friendly_name"). retry (google.api_core.retry.Retry, optional): How to retry the RPC. + timeout (Optional[float]): + The number of seconds to wait for the underlying HTTP transport + before using ``retry``. Returns: google.cloud.bigquery.dataset.Dataset: @@ -636,11 +694,16 @@ def update_dataset(self, dataset, fields, retry=DEFAULT_RETRY): else: headers = None api_response = self._call_api( - retry, method="PATCH", path=dataset.path, data=partial, headers=headers + retry, + method="PATCH", + path=dataset.path, + data=partial, + headers=headers, + timeout=timeout, ) return Dataset.from_api_repr(api_response) - def update_model(self, model, fields, retry=DEFAULT_RETRY): + def update_model(self, model, fields, retry=DEFAULT_RETRY, timeout=None): """[Beta] Change some fields of a model. Use ``fields`` to specify which fields to update. At least one field @@ -660,6 +723,9 @@ def update_model(self, model, fields, retry=DEFAULT_RETRY): properties (e.g. "friendly_name"). retry (google.api_core.retry.Retry): (Optional) A description of how to retry the API call. + timeout (Optional[float]): + The number of seconds to wait for the underlying HTTP transport + before using ``retry``. Returns: google.cloud.bigquery.model.Model: @@ -671,11 +737,16 @@ def update_model(self, model, fields, retry=DEFAULT_RETRY): else: headers = None api_response = self._call_api( - retry, method="PATCH", path=model.path, data=partial, headers=headers + retry, + method="PATCH", + path=model.path, + data=partial, + headers=headers, + timeout=timeout, ) return Model.from_api_repr(api_response) - def update_routine(self, routine, fields, retry=DEFAULT_RETRY): + def update_routine(self, routine, fields, retry=DEFAULT_RETRY, timeout=None): """[Beta] Change some fields of a routine. Use ``fields`` to specify which fields to update. At least one field @@ -702,6 +773,9 @@ def update_routine(self, routine, fields, retry=DEFAULT_RETRY): (e.g. ``type_``). retry (google.api_core.retry.Retry): (Optional) A description of how to retry the API call. + timeout (Optional[float]): + The number of seconds to wait for the underlying HTTP transport + before using ``retry``. Returns: google.cloud.bigquery.routine.Routine: @@ -717,11 +791,16 @@ def update_routine(self, routine, fields, retry=DEFAULT_RETRY): partial["routineReference"] = routine.reference.to_api_repr() api_response = self._call_api( - retry, method="PUT", path=routine.path, data=partial, headers=headers + retry, + method="PUT", + path=routine.path, + data=partial, + headers=headers, + timeout=timeout, ) return Routine.from_api_repr(api_response) - def update_table(self, table, fields, retry=DEFAULT_RETRY): + def update_table(self, table, fields, retry=DEFAULT_RETRY, timeout=None): """Change some fields of a table. Use ``fields`` to specify which fields to update. At least one field @@ -741,6 +820,9 @@ def update_table(self, table, fields, retry=DEFAULT_RETRY): properties (e.g. "friendly_name"). retry (google.api_core.retry.Retry): (Optional) A description of how to retry the API call. + timeout (Optional[float]): + The number of seconds to wait for the underlying HTTP transport + before using ``retry``. Returns: google.cloud.bigquery.table.Table: @@ -752,12 +834,22 @@ def update_table(self, table, fields, retry=DEFAULT_RETRY): else: headers = None api_response = self._call_api( - retry, method="PATCH", path=table.path, data=partial, headers=headers + retry, + method="PATCH", + path=table.path, + data=partial, + headers=headers, + timeout=timeout, ) return Table.from_api_repr(api_response) def list_models( - self, dataset, max_results=None, page_token=None, retry=DEFAULT_RETRY + self, + dataset, + max_results=None, + page_token=None, + retry=DEFAULT_RETRY, + timeout=None, ): """[Beta] List models in the dataset. @@ -786,6 +878,9 @@ def list_models( :class:`~google.api_core.page_iterator.HTTPIterator`. retry (google.api_core.retry.Retry): (Optional) How to retry the RPC. + timeout (Optional[float]): + The number of seconds to wait for the underlying HTTP transport + before using ``retry``. Returns: google.api_core.page_iterator.Iterator: @@ -804,7 +899,7 @@ def list_models( path = "%s/models" % dataset.path result = page_iterator.HTTPIterator( client=self, - api_request=functools.partial(self._call_api, retry), + api_request=functools.partial(self._call_api, retry, timeout=timeout), path=path, item_to_value=_item_to_model, items_key="models", @@ -815,7 +910,12 @@ def list_models( return result def list_routines( - self, dataset, max_results=None, page_token=None, retry=DEFAULT_RETRY + self, + dataset, + max_results=None, + page_token=None, + retry=DEFAULT_RETRY, + timeout=None, ): """[Beta] List routines in the dataset. @@ -844,6 +944,9 @@ def list_routines( :class:`~google.api_core.page_iterator.HTTPIterator`. retry (google.api_core.retry.Retry): (Optional) How to retry the RPC. + timeout (Optional[float]): + The number of seconds to wait for the underlying HTTP transport + before using ``retry``. Returns: google.api_core.page_iterator.Iterator: @@ -862,7 +965,7 @@ def list_routines( path = "{}/routines".format(dataset.path) result = page_iterator.HTTPIterator( client=self, - api_request=functools.partial(self._call_api, retry), + api_request=functools.partial(self._call_api, retry, timeout=timeout), path=path, item_to_value=_item_to_routine, items_key="routines", @@ -873,7 +976,12 @@ def list_routines( return result def list_tables( - self, dataset, max_results=None, page_token=None, retry=DEFAULT_RETRY + self, + dataset, + max_results=None, + page_token=None, + retry=DEFAULT_RETRY, + timeout=None, ): """List tables in the dataset. @@ -902,6 +1010,9 @@ def list_tables( :class:`~google.api_core.page_iterator.HTTPIterator`. retry (google.api_core.retry.Retry): (Optional) How to retry the RPC. + timeout (Optional[float]): + The number of seconds to wait for the underlying HTTP transport + before using ``retry``. Returns: google.api_core.page_iterator.Iterator: @@ -920,7 +1031,7 @@ def list_tables( path = "%s/tables" % dataset.path result = page_iterator.HTTPIterator( client=self, - api_request=functools.partial(self._call_api, retry), + api_request=functools.partial(self._call_api, retry, timeout=timeout), path=path, item_to_value=_item_to_table, items_key="tables", @@ -931,7 +1042,12 @@ def list_tables( return result def delete_dataset( - self, dataset, delete_contents=False, retry=DEFAULT_RETRY, not_found_ok=False + self, + dataset, + delete_contents=False, + retry=DEFAULT_RETRY, + timeout=None, + not_found_ok=False, ): """Delete a dataset. @@ -954,6 +1070,9 @@ def delete_dataset( Default is False. retry (google.api_core.retry.Retry): (Optional) How to retry the RPC. + timeout (Optional[float]): + The number of seconds to wait for the underlying HTTP transport + before using ``retry``. not_found_ok (bool): Defaults to ``False``. If ``True``, ignore "not found" errors when deleting the dataset. @@ -972,13 +1091,19 @@ def delete_dataset( try: self._call_api( - retry, method="DELETE", path=dataset.path, query_params=params + retry, + method="DELETE", + path=dataset.path, + query_params=params, + timeout=timeout, ) except google.api_core.exceptions.NotFound: if not not_found_ok: raise - def delete_model(self, model, retry=DEFAULT_RETRY, not_found_ok=False): + def delete_model( + self, model, retry=DEFAULT_RETRY, timeout=None, not_found_ok=False + ): """[Beta] Delete a model See @@ -996,6 +1121,9 @@ def delete_model(self, model, retry=DEFAULT_RETRY, not_found_ok=False): :func:`google.cloud.bigquery.model.ModelReference.from_string`. retry (google.api_core.retry.Retry): (Optional) How to retry the RPC. + timeout (Optional[float]): + The number of seconds to wait for the underlying HTTP transport + before using ``retry``. not_found_ok (bool): Defaults to ``False``. If ``True``, ignore "not found" errors when deleting the model. @@ -1007,12 +1135,14 @@ def delete_model(self, model, retry=DEFAULT_RETRY, not_found_ok=False): raise TypeError("model must be a Model or a ModelReference") try: - self._call_api(retry, method="DELETE", path=model.path) + self._call_api(retry, method="DELETE", path=model.path, timeout=timeout) except google.api_core.exceptions.NotFound: if not not_found_ok: raise - def delete_routine(self, routine, retry=DEFAULT_RETRY, not_found_ok=False): + def delete_routine( + self, routine, retry=DEFAULT_RETRY, timeout=None, not_found_ok=False + ): """[Beta] Delete a routine. See @@ -1030,6 +1160,9 @@ def delete_routine(self, routine, retry=DEFAULT_RETRY, not_found_ok=False): :func:`google.cloud.bigquery.routine.RoutineReference.from_string`. retry (google.api_core.retry.Retry): (Optional) How to retry the RPC. + timeout (Optional[float]): + The number of seconds to wait for the underlying HTTP transport + before using ``retry``. not_found_ok (bool): Defaults to ``False``. If ``True``, ignore "not found" errors when deleting the routine. @@ -1043,12 +1176,14 @@ def delete_routine(self, routine, retry=DEFAULT_RETRY, not_found_ok=False): raise TypeError("routine must be a Routine or a RoutineReference") try: - self._call_api(retry, method="DELETE", path=routine.path) + self._call_api(retry, method="DELETE", path=routine.path, timeout=timeout) except google.api_core.exceptions.NotFound: if not not_found_ok: raise - def delete_table(self, table, retry=DEFAULT_RETRY, not_found_ok=False): + def delete_table( + self, table, retry=DEFAULT_RETRY, timeout=None, not_found_ok=False + ): """Delete a table See @@ -1066,6 +1201,9 @@ def delete_table(self, table, retry=DEFAULT_RETRY, not_found_ok=False): :func:`google.cloud.bigquery.table.TableReference.from_string`. retry (google.api_core.retry.Retry): (Optional) How to retry the RPC. + timeout (Optional[float]): + The number of seconds to wait for the underlying HTTP transport + before using ``retry``. not_found_ok (bool): Defaults to ``False``. If ``True``, ignore "not found" errors when deleting the table. @@ -1075,7 +1213,7 @@ def delete_table(self, table, retry=DEFAULT_RETRY, not_found_ok=False): raise TypeError("Unable to get TableReference for table '{}'".format(table)) try: - self._call_api(retry, method="DELETE", path=table.path) + self._call_api(retry, method="DELETE", path=table.path, timeout=timeout) except google.api_core.exceptions.NotFound: if not not_found_ok: raise @@ -1098,7 +1236,7 @@ def _get_query_results( location (str): Location of the query job. timeout (Optional[float]): The number of seconds to wait for the underlying HTTP transport - before retrying the HTTP request. + before using ``retry``. Returns: google.cloud.bigquery.query._QueryResults: @@ -1155,7 +1293,9 @@ def job_from_resource(self, resource): return job.QueryJob.from_api_repr(resource, self) return job.UnknownJob.from_api_repr(resource, self) - def get_job(self, job_id, project=None, location=None, retry=DEFAULT_RETRY): + def get_job( + self, job_id, project=None, location=None, retry=DEFAULT_RETRY, timeout=None + ): """Fetch a job for the project associated with this client. See @@ -1171,6 +1311,9 @@ def get_job(self, job_id, project=None, location=None, retry=DEFAULT_RETRY): location (str): Location where the job was run. retry (google.api_core.retry.Retry): (Optional) How to retry the RPC. + timeout (Optional[float]): + The number of seconds to wait for the underlying HTTP transport + before using ``retry``. Returns: Union[ \ @@ -1195,12 +1338,14 @@ def get_job(self, job_id, project=None, location=None, retry=DEFAULT_RETRY): path = "/projects/{}/jobs/{}".format(project, job_id) resource = self._call_api( - retry, method="GET", path=path, query_params=extra_params + retry, method="GET", path=path, query_params=extra_params, timeout=timeout ) return self.job_from_resource(resource) - def cancel_job(self, job_id, project=None, location=None, retry=DEFAULT_RETRY): + def cancel_job( + self, job_id, project=None, location=None, retry=DEFAULT_RETRY, timeout=None + ): """Attempt to cancel a job from a job ID. See @@ -1216,6 +1361,9 @@ def cancel_job(self, job_id, project=None, location=None, retry=DEFAULT_RETRY): location (str): Location where the job was run. retry (google.api_core.retry.Retry): (Optional) How to retry the RPC. + timeout (Optional[float]): + The number of seconds to wait for the underlying HTTP transport + before using ``retry``. Returns: Union[ \ @@ -1240,7 +1388,7 @@ def cancel_job(self, job_id, project=None, location=None, retry=DEFAULT_RETRY): path = "/projects/{}/jobs/{}/cancel".format(project, job_id) resource = self._call_api( - retry, method="POST", path=path, query_params=extra_params + retry, method="POST", path=path, query_params=extra_params, timeout=timeout ) return self.job_from_resource(resource["job"]) @@ -1254,6 +1402,7 @@ def list_jobs( all_users=None, state_filter=None, retry=DEFAULT_RETRY, + timeout=None, min_creation_time=None, max_creation_time=None, ): @@ -1290,6 +1439,9 @@ def list_jobs( * ``"running"`` retry (Optional[google.api_core.retry.Retry]): How to retry the RPC. + timeout (Optional[float]): + The number of seconds to wait for the underlying HTTP transport + before using ``retry``. min_creation_time (Optional[datetime.datetime]): Min value for job creation time. If set, only jobs created after or at this timestamp are returned. If the datetime has @@ -1329,7 +1481,7 @@ def list_jobs( path = "/projects/%s/jobs" % (project,) return page_iterator.HTTPIterator( client=self, - api_request=functools.partial(self._call_api, retry), + api_request=functools.partial(self._call_api, retry, timeout=timeout), path=path, item_to_value=_item_to_job, items_key="jobs", @@ -1348,6 +1500,7 @@ def load_table_from_uri( project=None, job_config=None, retry=DEFAULT_RETRY, + timeout=None, ): """Starts a job for loading data into a table from CloudStorage. @@ -1384,6 +1537,9 @@ def load_table_from_uri( (Optional) Extra configuration options for the job. retry (google.api_core.retry.Retry): (Optional) How to retry the RPC. + timeout (Optional[float]): + The number of seconds to wait for the underlying HTTP transport + before using ``retry``. Returns: google.cloud.bigquery.job.LoadJob: A new load job. @@ -1413,7 +1569,7 @@ def load_table_from_uri( _verify_job_config_type(job_config, google.cloud.bigquery.job.LoadJobConfig) load_job = job.LoadJob(job_ref, source_uris, destination, self, job_config) - load_job._begin(retry=retry) + load_job._begin(retry=retry, timeout=timeout) return load_job @@ -1918,6 +2074,7 @@ def copy_table( project=None, job_config=None, retry=DEFAULT_RETRY, + timeout=None, ): """Copy one or more tables to another table. @@ -1961,6 +2118,9 @@ def copy_table( (Optional) Extra configuration options for the job. retry (google.api_core.retry.Retry): (Optional) How to retry the RPC. + timeout (Optional[float]): + The number of seconds to wait for the underlying HTTP transport + before using ``retry``. Returns: google.cloud.bigquery.job.CopyJob: A new copy job instance. @@ -2004,7 +2164,7 @@ def copy_table( copy_job = job.CopyJob( job_ref, sources, destination, client=self, job_config=job_config ) - copy_job._begin(retry=retry) + copy_job._begin(retry=retry, timeout=timeout) return copy_job @@ -2018,6 +2178,7 @@ def extract_table( project=None, job_config=None, retry=DEFAULT_RETRY, + timeout=None, ): """Start a job to extract a table into Cloud Storage files. @@ -2052,6 +2213,9 @@ def extract_table( (Optional) Extra configuration options for the job. retry (google.api_core.retry.Retry): (Optional) How to retry the RPC. + timeout (Optional[float]): + The number of seconds to wait for the underlying HTTP transport + before using ``retry``. Args: source (google.cloud.bigquery.table.TableReference): table to be extracted. @@ -2086,7 +2250,7 @@ def extract_table( extract_job = job.ExtractJob( job_ref, source, destination_uris, client=self, job_config=job_config ) - extract_job._begin(retry=retry) + extract_job._begin(retry=retry, timeout=timeout) return extract_job @@ -2099,6 +2263,7 @@ def query( location=None, project=None, retry=DEFAULT_RETRY, + timeout=None, ): """Run a SQL query. @@ -2129,6 +2294,9 @@ def query( to the client's project. retry (google.api_core.retry.Retry): (Optional) How to retry the RPC. + timeout (Optional[float]): + The number of seconds to wait for the underlying HTTP transport + before using ``retry``. Returns: google.cloud.bigquery.job.QueryJob: A new query job instance. @@ -2169,7 +2337,7 @@ def query( job_ref = job._JobReference(job_id, project=project, location=location) query_job = job.QueryJob(job_ref, query, client=self, job_config=job_config) - query_job._begin(retry=retry) + query_job._begin(retry=retry, timeout=timeout) return query_job @@ -2290,6 +2458,7 @@ def insert_rows_json( ignore_unknown_values=None, template_suffix=None, retry=DEFAULT_RETRY, + timeout=None, ): """Insert rows into a table without applying local type conversions. @@ -2326,6 +2495,9 @@ def insert_rows_json( https://cloud.google.com/bigquery/streaming-data-into-bigquery#template-tables retry (Optional[google.api_core.retry.Retry]): How to retry the RPC. + timeout (Optional[float]): + The number of seconds to wait for the underlying HTTP transport + before using ``retry``. Returns: Sequence[Mappings]: @@ -2359,7 +2531,11 @@ def insert_rows_json( # We can always retry, because every row has an insert ID. response = self._call_api( - retry, method="POST", path="%s/insertAll" % table.path, data=data + retry, + method="POST", + path="%s/insertAll" % table.path, + data=data, + timeout=timeout, ) errors = [] @@ -2368,7 +2544,7 @@ def insert_rows_json( return errors - def list_partitions(self, table, retry=DEFAULT_RETRY): + def list_partitions(self, table, retry=DEFAULT_RETRY, timeout=None): """List the partitions in a table. Args: @@ -2380,23 +2556,37 @@ def list_partitions(self, table, retry=DEFAULT_RETRY): The table or reference from which to get partition info retry (google.api_core.retry.Retry): (Optional) How to retry the RPC. + timeout (Optional[float]): + The number of seconds to wait for the underlying HTTP transport + before using ``retry``. + If multiple requests are made under the hood, ``timeout`` is + interpreted as the approximate total time of **all** requests. Returns: List[str]: A list of the partition ids present in the partitioned table """ table = _table_arg_to_table_ref(table, default_project=self.project) - meta_table = self.get_table( - TableReference( - self.dataset(table.dataset_id, project=table.project), - "%s$__PARTITIONS_SUMMARY__" % table.table_id, + + with TimeoutGuard( + timeout, timeout_error_type=concurrent.futures.TimeoutError + ) as guard: + meta_table = self.get_table( + TableReference( + self.dataset(table.dataset_id, project=table.project), + "%s$__PARTITIONS_SUMMARY__" % table.table_id, + ), + retry=retry, + timeout=timeout, ) - ) + timeout = guard.remaining_timeout subset = [col for col in meta_table.schema if col.name == "partition_id"] return [ row[0] - for row in self.list_rows(meta_table, selected_fields=subset, retry=retry) + for row in self.list_rows( + meta_table, selected_fields=subset, retry=retry, timeout=timeout + ) ] def list_rows( @@ -2408,6 +2598,7 @@ def list_rows( start_index=None, page_size=None, retry=DEFAULT_RETRY, + timeout=None, ): """List the rows of the table. @@ -2452,6 +2643,11 @@ def list_rows( to a sensible value set by the API. retry (google.api_core.retry.Retry): (Optional) How to retry the RPC. + timeout (Optional[float]): + The number of seconds to wait for the underlying HTTP transport + before using ``retry``. + If multiple requests are made under the hood, ``timeout`` is + interpreted as the approximate total time of **all** requests. Returns: google.cloud.bigquery.table.RowIterator: @@ -2476,7 +2672,11 @@ def list_rows( # No schema, but no selected_fields. Assume the developer wants all # columns, so get the table resource for them rather than failing. elif len(schema) == 0: - table = self.get_table(table.reference, retry=retry) + with TimeoutGuard( + timeout, timeout_error_type=concurrent.futures.TimeoutError + ) as guard: + table = self.get_table(table.reference, retry=retry, timeout=timeout) + timeout = guard.remaining_timeout schema = table.schema params = {} @@ -2487,7 +2687,7 @@ def list_rows( row_iterator = RowIterator( client=self, - api_request=functools.partial(self._call_api, retry), + api_request=functools.partial(self._call_api, retry, timeout=timeout), path="%s/data" % (table.path,), schema=schema, page_token=page_token, diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/job.py b/packages/google-cloud-bigquery/google/cloud/bigquery/job.py index d20e5b5fb11f..34628350c922 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/job.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/job.py @@ -26,6 +26,7 @@ from six.moves import http_client import google.api_core.future.polling +from google.auth.transport.requests import TimeoutGuard from google.cloud import exceptions from google.cloud.exceptions import NotFound from google.cloud.bigquery.dataset import Dataset @@ -607,7 +608,7 @@ def to_api_repr(self): _build_resource = to_api_repr # backward-compatibility alias - def _begin(self, client=None, retry=DEFAULT_RETRY): + def _begin(self, client=None, retry=DEFAULT_RETRY, timeout=None): """API call: begin the job via a POST request See @@ -619,6 +620,9 @@ def _begin(self, client=None, retry=DEFAULT_RETRY): associated with the job object or``NoneType`` retry (Optional[google.api_core.retry.Retry]): How to retry the RPC. + timeout (Optional[float]): + The number of seconds to wait for the underlying HTTP transport + before using ``retry``. Raises: ValueError: @@ -633,11 +637,11 @@ def _begin(self, client=None, retry=DEFAULT_RETRY): # jobs.insert is idempotent because we ensure that every new # job has an ID. api_response = client._call_api( - retry, method="POST", path=path, data=self.to_api_repr() + retry, method="POST", path=path, data=self.to_api_repr(), timeout=timeout ) self._set_properties(api_response) - def exists(self, client=None, retry=DEFAULT_RETRY): + def exists(self, client=None, retry=DEFAULT_RETRY, timeout=None): """API call: test for the existence of the job via a GET request See @@ -649,6 +653,9 @@ def exists(self, client=None, retry=DEFAULT_RETRY): ``client`` stored on the current dataset. retry (google.api_core.retry.Retry): (Optional) How to retry the RPC. + timeout (Optional[float]): + The number of seconds to wait for the underlying HTTP transport + before using ``retry``. Returns: bool: Boolean indicating existence of the job. @@ -661,7 +668,11 @@ def exists(self, client=None, retry=DEFAULT_RETRY): try: client._call_api( - retry, method="GET", path=self.path, query_params=extra_params + retry, + method="GET", + path=self.path, + query_params=extra_params, + timeout=timeout, ) except NotFound: return False @@ -682,7 +693,7 @@ def reload(self, client=None, retry=DEFAULT_RETRY, timeout=None): retry (google.api_core.retry.Retry): (Optional) How to retry the RPC. timeout (Optional[float]): The number of seconds to wait for the underlying HTTP transport - before retrying the HTTP request. + before using ``retry``. """ client = self._require_client(client) @@ -699,7 +710,7 @@ def reload(self, client=None, retry=DEFAULT_RETRY, timeout=None): ) self._set_properties(api_response) - def cancel(self, client=None): + def cancel(self, client=None, timeout=None): """API call: cancel job via a POST request See @@ -709,6 +720,8 @@ def cancel(self, client=None): client (Optional[google.cloud.bigquery.client.Client]): the client to use. If not passed, falls back to the ``client`` stored on the current dataset. + timeout (Optional[float]): + The number of seconds to wait for the API response. Returns: bool: Boolean indicating that the cancel request was sent. @@ -719,8 +732,12 @@ def cancel(self, client=None): if self.location: extra_params["location"] = self.location + # TODO: call thorugh client._call_api() and allow passing in a retry? api_response = client._connection.api_request( - method="POST", path="%s/cancel" % (self.path,), query_params=extra_params + method="POST", + path="%s/cancel" % (self.path,), + query_params=extra_params, + timeout=timeout, ) self._set_properties(api_response["job"]) # The Future interface requires that we return True if the *attempt* @@ -751,11 +768,14 @@ def _set_future_result(self): else: self.set_result(self) - def done(self, retry=DEFAULT_RETRY): + def done(self, retry=DEFAULT_RETRY, timeout=None): """Refresh the job and checks if it is complete. Args: retry (google.api_core.retry.Retry): (Optional) How to retry the RPC. + timeout (Optional[float]): + The number of seconds to wait for the underlying HTTP transport + before using ``retry``. Returns: bool: True if the job is complete, False otherwise. @@ -763,18 +783,19 @@ def done(self, retry=DEFAULT_RETRY): # Do not refresh is the state is already done, as the job will not # change once complete. if self.state != _DONE_STATE: - self.reload(retry=retry) + self.reload(retry=retry, timeout=timeout) return self.state == _DONE_STATE - def result(self, timeout=None, retry=DEFAULT_RETRY): + def result(self, retry=DEFAULT_RETRY, timeout=None): """Start the job and wait for it to complete and get the result. Args: - timeout (float): - How long (in seconds) to wait for job to complete before raising - a :class:`concurrent.futures.TimeoutError`. - retry (google.api_core.retry.Retry): (Optional) How to retry the RPC. + timeout (Optional[float]): + The number of seconds to wait for the underlying HTTP transport + before using ``retry``. + If multiple requests are made under the hood, ``timeout`` is + interpreted as the approximate total time of **all** requests. Returns: _AsyncJob: This instance. @@ -786,7 +807,11 @@ def result(self, timeout=None, retry=DEFAULT_RETRY): if the job did not complete in the given timeout. """ if self.state is None: - self._begin(retry=retry) + with TimeoutGuard( + timeout, timeout_error_type=concurrent.futures.TimeoutError + ) as guard: + self._begin(retry=retry, timeout=timeout) + timeout = guard.remaining_timeout # TODO: modify PollingFuture so it can pass a retry argument to done(). return super(_AsyncJob, self).result(timeout=timeout) @@ -3014,7 +3039,7 @@ def done(self, retry=DEFAULT_RETRY, timeout=None): How to retry the call that retrieves query results. timeout (Optional[float]): The number of seconds to wait for the underlying HTTP transport - before retrying the HTTP request. + before using ``retry``. Returns: bool: True if the job is complete, False otherwise. @@ -3100,7 +3125,7 @@ def _format_for_exception(query, job_id): return template.format(job_id=job_id, header=header, ruler=ruler, body=body) - def _begin(self, client=None, retry=DEFAULT_RETRY): + def _begin(self, client=None, retry=DEFAULT_RETRY, timeout=None): """API call: begin the job via a POST request See @@ -3112,32 +3137,39 @@ def _begin(self, client=None, retry=DEFAULT_RETRY): associated with the job object or``NoneType``. retry (Optional[google.api_core.retry.Retry]): How to retry the RPC. + timeout (Optional[float]): + The number of seconds to wait for the underlying HTTP transport + before using ``retry``. Raises: ValueError: If the job has already begun. """ try: - super(QueryJob, self)._begin(client=client, retry=retry) + super(QueryJob, self)._begin(client=client, retry=retry, timeout=timeout) except exceptions.GoogleCloudError as exc: exc.message += self._format_for_exception(self.query, self.job_id) exc.query_job = self raise def result( - self, timeout=None, page_size=None, retry=DEFAULT_RETRY, max_results=None + self, page_size=None, max_results=None, retry=DEFAULT_RETRY, timeout=None ): """Start the job and wait for it to complete and get the result. Args: - timeout (float): - How long (in seconds) to wait for job to complete before - raising a :class:`concurrent.futures.TimeoutError`. - page_size (int): - (Optional) The maximum number of rows in each page of results - from this request. Non-positive values are ignored. - retry (google.api_core.retry.Retry): - (Optional) How to retry the call that retrieves rows. + page_size (Optional[int]): + The maximum number of rows in each page of results from this + request. Non-positive values are ignored. + max_results (Optional[int]): + The maximum total number of rows from this request. + retry (Optional[google.api_core.retry.Retry]): + How to retry the call that retrieves rows. + timeout (Optional[float]): + The number of seconds to wait for the underlying HTTP transport + before using ``retry``. + If multiple requests are made under the hood, ``timeout`` is + interpreted as the approximate total time of **all** requests. Returns: google.cloud.bigquery.table.RowIterator: @@ -3155,13 +3187,27 @@ def result( If the job did not complete in the given timeout. """ try: - super(QueryJob, self).result(timeout=timeout) + guard = TimeoutGuard( + timeout, timeout_error_type=concurrent.futures.TimeoutError + ) + with guard: + super(QueryJob, self).result(retry=retry, timeout=timeout) + timeout = guard.remaining_timeout # Return an iterator instead of returning the job. if not self._query_results: - self._query_results = self._client._get_query_results( - self.job_id, retry, project=self.project, location=self.location + guard = TimeoutGuard( + timeout, timeout_error_type=concurrent.futures.TimeoutError ) + with guard: + self._query_results = self._client._get_query_results( + self.job_id, + retry, + project=self.project, + location=self.location, + timeout=timeout, + ) + timeout = guard.remaining_timeout except exceptions.GoogleCloudError as exc: exc.message += self._format_for_exception(self.query, self.job_id) exc.query_job = self @@ -3181,7 +3227,11 @@ def result( dest_table = Table(dest_table_ref, schema=schema) dest_table._properties["numRows"] = self._query_results.total_rows rows = self._client.list_rows( - dest_table, page_size=page_size, retry=retry, max_results=max_results + dest_table, + page_size=page_size, + max_results=max_results, + retry=retry, + timeout=timeout, ) rows._preserve_order = _contains_order_by(self.query) return rows diff --git a/packages/google-cloud-bigquery/noxfile.py b/packages/google-cloud-bigquery/noxfile.py index 8c041fa6a178..69b96b3dc984 100644 --- a/packages/google-cloud-bigquery/noxfile.py +++ b/packages/google-cloud-bigquery/noxfile.py @@ -34,7 +34,7 @@ def default(session): run the tests. """ # Install all test dependencies, then install local packages in-place. - session.install("mock", "pytest", "pytest-cov") + session.install("mock", "pytest", "pytest-cov", "freezegun") for local_dep in LOCAL_DEPS: session.install("-e", local_dep) diff --git a/packages/google-cloud-bigquery/tests/unit/test_client.py b/packages/google-cloud-bigquery/tests/unit/test_client.py index c9166bd5d7c0..a5100fe6eaef 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_client.py +++ b/packages/google-cloud-bigquery/tests/unit/test_client.py @@ -24,6 +24,7 @@ import unittest import warnings +import freezegun import mock import requests import six @@ -212,6 +213,28 @@ def test_ctor_w_query_job_config(self): self.assertIsInstance(client._default_query_job_config, QueryJobConfig) self.assertTrue(client._default_query_job_config.dry_run) + def test__call_api_applying_custom_retry_on_timeout(self): + from concurrent.futures import TimeoutError + from google.cloud.bigquery.retry import DEFAULT_RETRY + + client = self._make_one() + + api_request_patcher = mock.patch.object( + client._connection, "api_request", side_effect=[TimeoutError, "result"], + ) + retry = DEFAULT_RETRY.with_deadline(1).with_predicate( + lambda exc: isinstance(exc, TimeoutError) + ) + + with api_request_patcher as fake_api_request: + result = client._call_api(retry, foo="bar") + + self.assertEqual(result, "result") + self.assertEqual( + fake_api_request.call_args_list, + [mock.call(foo="bar"), mock.call(foo="bar")], # was retried once + ) + def test__get_query_results_miss_w_explicit_project_and_timeout(self): from google.cloud.exceptions import NotFound @@ -288,9 +311,9 @@ def test_get_service_account_email(self): resource = {"kind": "bigquery#getServiceAccountResponse", "email": email} conn = client._connection = make_connection(resource) - service_account_email = client.get_service_account_email() + service_account_email = client.get_service_account_email(timeout=7.5) - conn.api_request.assert_called_once_with(method="GET", path=path) + conn.api_request.assert_called_once_with(method="GET", path=path, timeout=7.5) self.assertEqual(service_account_email, email) def test_get_service_account_email_w_alternate_project(self): @@ -305,7 +328,7 @@ def test_get_service_account_email_w_alternate_project(self): service_account_email = client.get_service_account_email(project=project) - conn.api_request.assert_called_once_with(method="GET", path=path) + conn.api_request.assert_called_once_with(method="GET", path=path, timeout=None) self.assertEqual(service_account_email, email) def test_list_projects_defaults(self): @@ -351,7 +374,25 @@ def test_list_projects_defaults(self): self.assertEqual(token, TOKEN) conn.api_request.assert_called_once_with( - method="GET", path="/projects", query_params={} + method="GET", path="/projects", query_params={}, timeout=None + ) + + def test_list_projects_w_timeout(self): + PROJECT_1 = "PROJECT_ONE" + TOKEN = "TOKEN" + DATA = { + "nextPageToken": TOKEN, + "projects": [], + } + creds = _make_credentials() + client = self._make_one(PROJECT_1, creds) + conn = client._connection = make_connection(DATA) + + iterator = client.list_projects(timeout=7.5) + six.next(iterator.pages) + + conn.api_request.assert_called_once_with( + method="GET", path="/projects", query_params={}, timeout=7.5 ) def test_list_projects_explicit_response_missing_projects_key(self): @@ -373,6 +414,7 @@ def test_list_projects_explicit_response_missing_projects_key(self): method="GET", path="/projects", query_params={"maxResults": 3, "pageToken": TOKEN}, + timeout=None, ) def test_list_datasets_defaults(self): @@ -422,18 +464,21 @@ def test_list_datasets_defaults(self): self.assertEqual(token, TOKEN) conn.api_request.assert_called_once_with( - method="GET", path="/%s" % PATH, query_params={} + method="GET", path="/%s" % PATH, query_params={}, timeout=None ) - def test_list_datasets_w_project(self): + def test_list_datasets_w_project_and_timeout(self): creds = _make_credentials() client = self._make_one(self.PROJECT, creds) conn = client._connection = make_connection({}) - list(client.list_datasets(project="other-project")) + list(client.list_datasets(project="other-project", timeout=7.5)) conn.api_request.assert_called_once_with( - method="GET", path="/projects/other-project/datasets", query_params={} + method="GET", + path="/projects/other-project/datasets", + query_params={}, + timeout=7.5, ) def test_list_datasets_explicit_response_missing_datasets_key(self): @@ -464,6 +509,7 @@ def test_list_datasets_explicit_response_missing_datasets_key(self): "maxResults": 3, "pageToken": TOKEN, }, + timeout=None, ) def test_dataset_with_specified_project(self): @@ -502,9 +548,11 @@ def test_get_dataset(self): conn = client._connection = make_connection(resource) dataset_ref = client.dataset(self.DS_ID) - dataset = client.get_dataset(dataset_ref) + dataset = client.get_dataset(dataset_ref, timeout=7.5) - conn.api_request.assert_called_once_with(method="GET", path="/%s" % path) + conn.api_request.assert_called_once_with( + method="GET", path="/%s" % path, timeout=7.5 + ) self.assertEqual(dataset.dataset_id, self.DS_ID) # Test retry. @@ -579,7 +627,7 @@ def test_create_dataset_minimal(self): ds_ref = client.dataset(self.DS_ID) before = Dataset(ds_ref) - after = client.create_dataset(before) + after = client.create_dataset(before, timeout=7.5) self.assertEqual(after.dataset_id, self.DS_ID) self.assertEqual(after.project, self.PROJECT) @@ -596,6 +644,7 @@ def test_create_dataset_minimal(self): }, "labels": {}, }, + timeout=7.5, ) def test_create_dataset_w_attrs(self): @@ -670,6 +719,7 @@ def test_create_dataset_w_attrs(self): ], "labels": LABELS, }, + timeout=None, ) def test_create_dataset_w_custom_property(self): @@ -707,6 +757,7 @@ def test_create_dataset_w_custom_property(self): "newAlphaProperty": "unreleased property", "labels": {}, }, + timeout=None, ) def test_create_dataset_w_client_location_wo_dataset_location(self): @@ -747,6 +798,7 @@ def test_create_dataset_w_client_location_wo_dataset_location(self): "labels": {}, "location": self.LOCATION, }, + timeout=None, ) def test_create_dataset_w_client_location_w_dataset_location(self): @@ -789,6 +841,7 @@ def test_create_dataset_w_client_location_w_dataset_location(self): "labels": {}, "location": OTHER_LOCATION, }, + timeout=None, ) def test_create_dataset_w_reference(self): @@ -824,6 +877,7 @@ def test_create_dataset_w_reference(self): "labels": {}, "location": self.LOCATION, }, + timeout=None, ) def test_create_dataset_w_fully_qualified_string(self): @@ -859,6 +913,7 @@ def test_create_dataset_w_fully_qualified_string(self): "labels": {}, "location": self.LOCATION, }, + timeout=None, ) def test_create_dataset_w_string(self): @@ -894,6 +949,7 @@ def test_create_dataset_w_string(self): "labels": {}, "location": self.LOCATION, }, + timeout=None, ) def test_create_dataset_alreadyexists_w_exists_ok_false(self): @@ -946,8 +1002,9 @@ def test_create_dataset_alreadyexists_w_exists_ok_true(self): "labels": {}, "location": self.LOCATION, }, + timeout=None, ), - mock.call(method="GET", path=get_path), + mock.call(method="GET", path=get_path, timeout=None), ] ) @@ -968,12 +1025,13 @@ def test_create_routine_w_minimal_resource(self): full_routine_id = "test-routine-project.test_routines.minimal_routine" routine = Routine(full_routine_id) - actual_routine = client.create_routine(routine) + actual_routine = client.create_routine(routine, timeout=7.5) conn.api_request.assert_called_once_with( method="POST", path="/projects/test-routine-project/datasets/test_routines/routines", data=resource, + timeout=7.5, ) self.assertEqual( actual_routine.reference, RoutineReference.from_string(full_routine_id) @@ -1004,6 +1062,7 @@ def test_create_routine_w_conflict(self): method="POST", path="/projects/test-routine-project/datasets/test_routines/routines", data=resource, + timeout=None, ) def test_create_routine_w_conflict_exists_ok(self): @@ -1035,10 +1094,12 @@ def test_create_routine_w_conflict_exists_ok(self): method="POST", path="/projects/test-routine-project/datasets/test_routines/routines", data=resource, + timeout=None, ), mock.call( method="GET", path="/projects/test-routine-project/datasets/test_routines/routines/minimal_routine", + timeout=None, ), ] ) @@ -1055,7 +1116,7 @@ def test_create_table_w_day_partition(self): table = Table(self.TABLE_REF) table.time_partitioning = TimePartitioning() - got = client.create_table(table) + got = client.create_table(table, timeout=7.5) conn.api_request.assert_called_once_with( method="POST", @@ -1069,6 +1130,7 @@ def test_create_table_w_day_partition(self): "timePartitioning": {"type": "DAY"}, "labels": {}, }, + timeout=7.5, ) self.assertEqual(table.time_partitioning.type_, "DAY") self.assertEqual(got.table_id, self.TABLE_ID) @@ -1101,6 +1163,7 @@ def test_create_table_w_custom_property(self): "newAlphaProperty": "unreleased property", "labels": {}, }, + timeout=None, ) self.assertEqual(got._properties["newAlphaProperty"], "unreleased property") self.assertEqual(got.table_id, self.TABLE_ID) @@ -1135,6 +1198,7 @@ def test_create_table_w_encryption_configuration(self): "labels": {}, "encryptionConfiguration": {"kmsKeyName": self.KMS_KEY_NAME}, }, + timeout=None, ) self.assertEqual(got.table_id, self.TABLE_ID) @@ -1164,6 +1228,7 @@ def test_create_table_w_day_partition_and_expire(self): "timePartitioning": {"type": "DAY", "expirationMs": "100"}, "labels": {}, }, + timeout=None, ) self.assertEqual(table.time_partitioning.type_, "DAY") self.assertEqual(table.time_partitioning.expiration_ms, 100) @@ -1237,6 +1302,7 @@ def test_create_table_w_schema_and_query(self): "view": {"query": query, "useLegacySql": False}, "labels": {}, }, + timeout=None, ) self.assertEqual(got.table_id, self.TABLE_ID) self.assertEqual(got.project, self.PROJECT) @@ -1284,6 +1350,7 @@ def test_create_table_w_external(self): }, "labels": {}, }, + timeout=None, ) self.assertEqual(got.table_id, self.TABLE_ID) self.assertEqual(got.project, self.PROJECT) @@ -1313,6 +1380,7 @@ def test_create_table_w_reference(self): }, "labels": {}, }, + timeout=None, ) self.assertEqual(got.table_id, self.TABLE_ID) @@ -1338,6 +1406,7 @@ def test_create_table_w_fully_qualified_string(self): }, "labels": {}, }, + timeout=None, ) self.assertEqual(got.table_id, self.TABLE_ID) @@ -1361,6 +1430,7 @@ def test_create_table_w_string(self): }, "labels": {}, }, + timeout=None, ) self.assertEqual(got.table_id, self.TABLE_ID) @@ -1388,6 +1458,7 @@ def test_create_table_alreadyexists_w_exists_ok_false(self): }, "labels": {}, }, + timeout=None, ) def test_create_table_alreadyexists_w_exists_ok_true(self): @@ -1425,8 +1496,9 @@ def test_create_table_alreadyexists_w_exists_ok_true(self): }, "labels": {}, }, + timeout=None, ), - mock.call(method="GET", path=get_path), + mock.call(method="GET", path=get_path, timeout=None), ] ) @@ -1460,9 +1532,11 @@ def test_get_model(self): conn = client._connection = make_connection(resource) model_ref = client.dataset(self.DS_ID).model(self.MODEL_ID) - got = client.get_model(model_ref) + got = client.get_model(model_ref, timeout=7.5) - conn.api_request.assert_called_once_with(method="GET", path="/%s" % path) + conn.api_request.assert_called_once_with( + method="GET", path="/%s" % path, timeout=7.5 + ) self.assertEqual(got.model_id, self.MODEL_ID) def test_get_model_w_string(self): @@ -1486,7 +1560,9 @@ def test_get_model_w_string(self): model_id = "{}.{}.{}".format(self.PROJECT, self.DS_ID, self.MODEL_ID) got = client.get_model(model_id) - conn.api_request.assert_called_once_with(method="GET", path="/%s" % path) + conn.api_request.assert_called_once_with( + method="GET", path="/%s" % path, timeout=None + ) self.assertEqual(got.model_id, self.MODEL_ID) def test_get_routine(self): @@ -1513,11 +1589,12 @@ def test_get_routine(self): client = self._make_one(project=self.PROJECT, credentials=creds) conn = client._connection = make_connection(resource) - actual_routine = client.get_routine(routine) + actual_routine = client.get_routine(routine, timeout=7.5) conn.api_request.assert_called_once_with( method="GET", path="/projects/test-routine-project/datasets/test_routines/routines/minimal_routine", + timeout=7.5, ) self.assertEqual( actual_routine.reference, @@ -1546,9 +1623,11 @@ def test_get_table(self): client = self._make_one(project=self.PROJECT, credentials=creds, _http=http) resource = self._make_table_resource() conn = client._connection = make_connection(resource) - table = client.get_table(self.TABLE_REF) + table = client.get_table(self.TABLE_REF, timeout=7.5) - conn.api_request.assert_called_once_with(method="GET", path="/%s" % path) + conn.api_request.assert_called_once_with( + method="GET", path="/%s" % path, timeout=7.5 + ) self.assertEqual(table.table_id, self.TABLE_ID) def test_get_table_sets_user_agent(self): @@ -1623,7 +1702,9 @@ def test_update_dataset(self): ds.labels = LABELS ds.access_entries = [AccessEntry("OWNER", "userByEmail", "phred@example.com")] ds2 = client.update_dataset( - ds, ["description", "friendly_name", "location", "labels", "access_entries"] + ds, + ["description", "friendly_name", "location", "labels", "access_entries"], + timeout=7.5, ) conn.api_request.assert_called_once_with( method="PATCH", @@ -1636,6 +1717,7 @@ def test_update_dataset(self): }, path="/" + PATH, headers=None, + timeout=7.5, ) self.assertEqual(ds2.description, ds.description) self.assertEqual(ds2.friendly_name, ds.friendly_name) @@ -1671,6 +1753,7 @@ def test_update_dataset_w_custom_property(self): data={"newAlphaProperty": "unreleased property"}, path=path, headers=None, + timeout=None, ) self.assertEqual(dataset.dataset_id, self.DS_ID) @@ -1713,7 +1796,7 @@ def test_update_model(self): model.labels = {"x": "y"} updated_model = client.update_model( - model, ["description", "friendly_name", "labels", "expires"] + model, ["description", "friendly_name", "labels", "expires"], timeout=7.5 ) sent = { @@ -1723,7 +1806,7 @@ def test_update_model(self): "labels": {"x": "y"}, } conn.api_request.assert_called_once_with( - method="PATCH", data=sent, path="/" + path, headers=None + method="PATCH", data=sent, path="/" + path, headers=None, timeout=7.5 ) self.assertEqual(updated_model.model_id, model.model_id) self.assertEqual(updated_model.description, model.description) @@ -1775,6 +1858,7 @@ def test_update_routine(self): actual_routine = client.update_routine( routine, ["arguments", "language", "body", "type_", "return_type", "someNewField"], + timeout=7.5, ) # TODO: routineReference isn't needed when the Routines API supports @@ -1785,6 +1869,7 @@ def test_update_routine(self): data=sent, path="/projects/routines-project/datasets/test_routines/routines/updated_routine", headers=None, + timeout=7.5, ) self.assertEqual(actual_routine.arguments, routine.arguments) self.assertEqual(actual_routine.body, routine.body) @@ -1846,7 +1931,7 @@ def test_update_table(self): table.labels = {"x": "y"} updated_table = client.update_table( - table, ["schema", "description", "friendly_name", "labels"] + table, ["schema", "description", "friendly_name", "labels"], timeout=7.5 ) sent = { @@ -1871,7 +1956,7 @@ def test_update_table(self): "labels": {"x": "y"}, } conn.api_request.assert_called_once_with( - method="PATCH", data=sent, path="/" + path, headers=None + method="PATCH", data=sent, path="/" + path, headers=None, timeout=7.5 ) self.assertEqual(updated_table.description, table.description) self.assertEqual(updated_table.friendly_name, table.friendly_name) @@ -1907,6 +1992,7 @@ def test_update_table_w_custom_property(self): path="/%s" % path, data={"newAlphaProperty": "unreleased property"}, headers=None, + timeout=None, ) self.assertEqual( updated_table._properties["newAlphaProperty"], "unreleased property" @@ -1935,6 +2021,7 @@ def test_update_table_only_use_legacy_sql(self): path="/%s" % path, data={"view": {"useLegacySql": True}}, headers=None, + timeout=None, ) self.assertEqual(updated_table.view_use_legacy_sql, table.view_use_legacy_sql) @@ -2008,6 +2095,7 @@ def test_update_table_w_query(self): "schema": schema_resource, }, headers=None, + timeout=None, ) def test_update_table_w_schema_None(self): @@ -2086,14 +2174,14 @@ def test_update_table_delete_property(self): self.assertEqual(req[1]["data"], sent) self.assertIsNone(table3.description) - def test_list_tables_empty(self): + def test_list_tables_empty_w_timeout(self): path = "/projects/{}/datasets/{}/tables".format(self.PROJECT, self.DS_ID) creds = _make_credentials() client = self._make_one(project=self.PROJECT, credentials=creds) conn = client._connection = make_connection({}) dataset = client.dataset(self.DS_ID) - iterator = client.list_tables(dataset) + iterator = client.list_tables(dataset, timeout=7.5) self.assertIs(iterator.dataset, dataset) page = six.next(iterator.pages) tables = list(page) @@ -2102,17 +2190,17 @@ def test_list_tables_empty(self): self.assertEqual(tables, []) self.assertIsNone(token) conn.api_request.assert_called_once_with( - method="GET", path=path, query_params={} + method="GET", path=path, query_params={}, timeout=7.5 ) - def test_list_models_empty(self): + def test_list_models_empty_w_timeout(self): path = "/projects/{}/datasets/{}/models".format(self.PROJECT, self.DS_ID) creds = _make_credentials() client = self._make_one(project=self.PROJECT, credentials=creds) conn = client._connection = make_connection({}) dataset_id = "{}.{}".format(self.PROJECT, self.DS_ID) - iterator = client.list_models(dataset_id) + iterator = client.list_models(dataset_id, timeout=7.5) page = six.next(iterator.pages) models = list(page) token = iterator.next_page_token @@ -2120,7 +2208,7 @@ def test_list_models_empty(self): self.assertEqual(models, []) self.assertIsNone(token) conn.api_request.assert_called_once_with( - method="GET", path=path, query_params={} + method="GET", path=path, query_params={}, timeout=7.5 ) def test_list_models_defaults(self): @@ -2168,7 +2256,7 @@ def test_list_models_defaults(self): self.assertEqual(token, TOKEN) conn.api_request.assert_called_once_with( - method="GET", path="/%s" % PATH, query_params={} + method="GET", path="/%s" % PATH, query_params={}, timeout=None ) def test_list_models_wrong_type(self): @@ -2177,12 +2265,12 @@ def test_list_models_wrong_type(self): with self.assertRaises(TypeError): client.list_models(client.dataset(self.DS_ID).model("foo")) - def test_list_routines_empty(self): + def test_list_routines_empty_w_timeout(self): creds = _make_credentials() client = self._make_one(project=self.PROJECT, credentials=creds) conn = client._connection = make_connection({}) - iterator = client.list_routines("test-routines.test_routines") + iterator = client.list_routines("test-routines.test_routines", timeout=7.5) page = six.next(iterator.pages) routines = list(page) token = iterator.next_page_token @@ -2193,6 +2281,7 @@ def test_list_routines_empty(self): method="GET", path="/projects/test-routines/datasets/test_routines/routines", query_params={}, + timeout=7.5, ) def test_list_routines_defaults(self): @@ -2244,7 +2333,7 @@ def test_list_routines_defaults(self): self.assertEqual(actual_token, token) conn.api_request.assert_called_once_with( - method="GET", path=path, query_params={} + method="GET", path=path, query_params={}, timeout=None ) def test_list_routines_wrong_type(self): @@ -2305,7 +2394,7 @@ def test_list_tables_defaults(self): self.assertEqual(token, TOKEN) conn.api_request.assert_called_once_with( - method="GET", path="/%s" % PATH, query_params={} + method="GET", path="/%s" % PATH, query_params={}, timeout=None ) def test_list_tables_explicit(self): @@ -2367,6 +2456,7 @@ def test_list_tables_explicit(self): method="GET", path="/%s" % PATH, query_params={"maxResults": 3, "pageToken": TOKEN}, + timeout=None, ) def test_list_tables_wrong_type(self): @@ -2386,9 +2476,9 @@ def test_delete_dataset(self): client = self._make_one(project=self.PROJECT, credentials=creds) conn = client._connection = make_connection(*([{}] * len(datasets))) for arg in datasets: - client.delete_dataset(arg) + client.delete_dataset(arg, timeout=7.5) conn.api_request.assert_called_with( - method="DELETE", path="/%s" % PATH, query_params={} + method="DELETE", path="/%s" % PATH, query_params={}, timeout=7.5 ) def test_delete_dataset_delete_contents(self): @@ -2405,6 +2495,7 @@ def test_delete_dataset_delete_contents(self): method="DELETE", path="/%s" % PATH, query_params={"deleteContents": "true"}, + timeout=None, ) def test_delete_dataset_wrong_type(self): @@ -2425,7 +2516,9 @@ def test_delete_dataset_w_not_found_ok_false(self): with self.assertRaises(google.api_core.exceptions.NotFound): client.delete_dataset(self.DS_ID) - conn.api_request.assert_called_with(method="DELETE", path=path, query_params={}) + conn.api_request.assert_called_with( + method="DELETE", path=path, query_params={}, timeout=None + ) def test_delete_dataset_w_not_found_ok_true(self): path = "/projects/{}/datasets/{}".format(self.PROJECT, self.DS_ID) @@ -2438,7 +2531,9 @@ def test_delete_dataset_w_not_found_ok_true(self): client.delete_dataset(self.DS_ID, not_found_ok=True) - conn.api_request.assert_called_with(method="DELETE", path=path, query_params={}) + conn.api_request.assert_called_with( + method="DELETE", path=path, query_params={}, timeout=None + ) def test_delete_model(self): from google.cloud.bigquery.model import Model @@ -2460,8 +2555,10 @@ def test_delete_model(self): conn = client._connection = make_connection(*([{}] * len(models))) for arg in models: - client.delete_model(arg) - conn.api_request.assert_called_with(method="DELETE", path="/%s" % path) + client.delete_model(arg, timeout=7.5) + conn.api_request.assert_called_with( + method="DELETE", path="/%s" % path, timeout=7.5 + ) def test_delete_model_w_wrong_type(self): creds = _make_credentials() @@ -2483,7 +2580,7 @@ def test_delete_model_w_not_found_ok_false(self): with self.assertRaises(google.api_core.exceptions.NotFound): client.delete_model("{}.{}".format(self.DS_ID, self.MODEL_ID)) - conn.api_request.assert_called_with(method="DELETE", path=path) + conn.api_request.assert_called_with(method="DELETE", path=path, timeout=None) def test_delete_model_w_not_found_ok_true(self): path = "/projects/{}/datasets/{}/models/{}".format( @@ -2500,7 +2597,7 @@ def test_delete_model_w_not_found_ok_true(self): "{}.{}".format(self.DS_ID, self.MODEL_ID), not_found_ok=True ) - conn.api_request.assert_called_with(method="DELETE", path=path) + conn.api_request.assert_called_with(method="DELETE", path=path, timeout=None) def test_delete_routine(self): from google.cloud.bigquery.routine import Routine @@ -2518,10 +2615,11 @@ def test_delete_routine(self): conn = client._connection = make_connection(*([{}] * len(routines))) for routine in routines: - client.delete_routine(routine) + client.delete_routine(routine, timeout=7.5) conn.api_request.assert_called_with( method="DELETE", path="/projects/test-routine-project/datasets/test_routines/routines/minimal_routine", + timeout=7.5, ) def test_delete_routine_w_wrong_type(self): @@ -2544,6 +2642,7 @@ def test_delete_routine_w_not_found_ok_false(self): conn.api_request.assert_called_with( method="DELETE", path="/projects/routines-project/datasets/test_routines/routines/test_routine", + timeout=None, ) def test_delete_routine_w_not_found_ok_true(self): @@ -2561,6 +2660,7 @@ def test_delete_routine_w_not_found_ok_true(self): conn.api_request.assert_called_with( method="DELETE", path="/projects/routines-project/datasets/test_routines/routines/test_routine", + timeout=None, ) def test_delete_table(self): @@ -2586,8 +2686,10 @@ def test_delete_table(self): conn = client._connection = make_connection(*([{}] * len(tables))) for arg in tables: - client.delete_table(arg) - conn.api_request.assert_called_with(method="DELETE", path="/%s" % path) + client.delete_table(arg, timeout=7.5) + conn.api_request.assert_called_with( + method="DELETE", path="/%s" % path, timeout=7.5 + ) def test_delete_table_w_wrong_type(self): creds = _make_credentials() @@ -2609,7 +2711,7 @@ def test_delete_table_w_not_found_ok_false(self): with self.assertRaises(google.api_core.exceptions.NotFound): client.delete_table("{}.{}".format(self.DS_ID, self.TABLE_ID)) - conn.api_request.assert_called_with(method="DELETE", path=path) + conn.api_request.assert_called_with(method="DELETE", path=path, timeout=None) def test_delete_table_w_not_found_ok_true(self): path = "/projects/{}/datasets/{}/tables/{}".format( @@ -2626,7 +2728,7 @@ def test_delete_table_w_not_found_ok_true(self): "{}.{}".format(self.DS_ID, self.TABLE_ID), not_found_ok=True ) - conn.api_request.assert_called_with(method="DELETE", path=path) + conn.api_request.assert_called_with(method="DELETE", path=path, timeout=None) def test_job_from_resource_unknown_type(self): from google.cloud.bigquery.job import UnknownJob @@ -2653,6 +2755,7 @@ def test_get_job_miss_w_explict_project(self): method="GET", path="/projects/OTHER_PROJECT/jobs/NONESUCH", query_params={"projection": "full", "location": self.LOCATION}, + timeout=None, ) def test_get_job_miss_w_client_location(self): @@ -2671,9 +2774,10 @@ def test_get_job_miss_w_client_location(self): method="GET", path="/projects/OTHER_PROJECT/jobs/NONESUCH", query_params={"projection": "full", "location": self.LOCATION}, + timeout=None, ) - def test_get_job_hit(self): + def test_get_job_hit_w_timeout(self): from google.cloud.bigquery.job import CreateDisposition from google.cloud.bigquery.job import QueryJob from google.cloud.bigquery.job import WriteDisposition @@ -2702,7 +2806,7 @@ def test_get_job_hit(self): client = self._make_one(self.PROJECT, creds) conn = client._connection = make_connection(ASYNC_QUERY_DATA) - job = client.get_job(JOB_ID) + job = client.get_job(JOB_ID, timeout=7.5) self.assertIsInstance(job, QueryJob) self.assertEqual(job.job_id, JOB_ID) @@ -2713,6 +2817,7 @@ def test_get_job_hit(self): method="GET", path="/projects/PROJECT/jobs/query_job", query_params={"projection": "full"}, + timeout=7.5, ) def test_cancel_job_miss_w_explict_project(self): @@ -2731,6 +2836,7 @@ def test_cancel_job_miss_w_explict_project(self): method="POST", path="/projects/OTHER_PROJECT/jobs/NONESUCH/cancel", query_params={"projection": "full", "location": self.LOCATION}, + timeout=None, ) def test_cancel_job_miss_w_client_location(self): @@ -2749,6 +2855,7 @@ def test_cancel_job_miss_w_client_location(self): method="POST", path="/projects/OTHER_PROJECT/jobs/NONESUCH/cancel", query_params={"projection": "full", "location": self.LOCATION}, + timeout=None, ) def test_cancel_job_hit(self): @@ -2777,6 +2884,31 @@ def test_cancel_job_hit(self): method="POST", path="/projects/PROJECT/jobs/query_job/cancel", query_params={"projection": "full"}, + timeout=None, + ) + + def test_cancel_job_w_timeout(self): + JOB_ID = "query_job" + QUERY = "SELECT * from test_dataset:test_table" + QUERY_JOB_RESOURCE = { + "id": "{}:{}".format(self.PROJECT, JOB_ID), + "jobReference": {"projectId": self.PROJECT, "jobId": "query_job"}, + "state": "RUNNING", + "configuration": {"query": {"query": QUERY}}, + } + RESOURCE = {"job": QUERY_JOB_RESOURCE} + + creds = _make_credentials() + client = self._make_one(self.PROJECT, creds) + conn = client._connection = make_connection(RESOURCE) + + client.cancel_job(JOB_ID, timeout=7.5) + + conn.api_request.assert_called_once_with( + method="POST", + path="/projects/{}/jobs/query_job/cancel".format(self.PROJECT), + query_params={"projection": "full"}, + timeout=7.5, ) def test_list_jobs_defaults(self): @@ -2890,7 +3022,10 @@ def test_list_jobs_defaults(self): self.assertEqual(token, TOKEN) conn.api_request.assert_called_once_with( - method="GET", path="/%s" % PATH, query_params={"projection": "full"} + method="GET", + path="/%s" % PATH, + query_params={"projection": "full"}, + timeout=None, ) def test_list_jobs_load_job_wo_sourceUris(self): @@ -2932,7 +3067,10 @@ def test_list_jobs_load_job_wo_sourceUris(self): self.assertEqual(token, TOKEN) conn.api_request.assert_called_once_with( - method="GET", path="/%s" % PATH, query_params={"projection": "full"} + method="GET", + path="/%s" % PATH, + query_params={"projection": "full"}, + timeout=None, ) def test_list_jobs_explicit_missing(self): @@ -2963,6 +3101,7 @@ def test_list_jobs_explicit_missing(self): "allUsers": True, "stateFilter": "done", }, + timeout=None, ) def test_list_jobs_w_project(self): @@ -2976,6 +3115,21 @@ def test_list_jobs_w_project(self): method="GET", path="/projects/other-project/jobs", query_params={"projection": "full"}, + timeout=None, + ) + + def test_list_jobs_w_timeout(self): + creds = _make_credentials() + client = self._make_one(self.PROJECT, creds) + conn = client._connection = make_connection({}) + + list(client.list_jobs(timeout=7.5)) + + conn.api_request.assert_called_once_with( + method="GET", + path="/projects/{}/jobs".format(self.PROJECT), + query_params={"projection": "full"}, + timeout=7.5, ) def test_list_jobs_w_time_filter(self): @@ -2999,6 +3153,7 @@ def test_list_jobs_w_time_filter(self): "minCreationTime": "1", "maxCreationTime": str(end_time_millis), }, + timeout=None, ) def test_list_jobs_w_parent_job_filter(self): @@ -3016,6 +3171,7 @@ def test_list_jobs_w_parent_job_filter(self): method="GET", path="/projects/%s/jobs" % self.PROJECT, query_params={"projection": "full", "parentJobId": "parent-job-123"}, + timeout=None, ) conn.api_request.reset_mock() @@ -3048,12 +3204,15 @@ def test_load_table_from_uri(self): destination = client.dataset(self.DS_ID).table(DESTINATION) job = client.load_table_from_uri( - SOURCE_URI, destination, job_id=JOB, job_config=job_config + SOURCE_URI, destination, job_id=JOB, job_config=job_config, timeout=7.5 ) # Check that load_table_from_uri actually starts the job. conn.api_request.assert_called_once_with( - method="POST", path="/projects/%s/jobs" % self.PROJECT, data=RESOURCE + method="POST", + path="/projects/%s/jobs" % self.PROJECT, + data=RESOURCE, + timeout=7.5, ) # the original config object should not have been modified @@ -3112,7 +3271,10 @@ def test_load_table_from_uri_w_explicit_project(self): # Check that load_table_from_uri actually starts the job. conn.api_request.assert_called_once_with( - method="POST", path="/projects/other-project/jobs", data=resource + method="POST", + path="/projects/other-project/jobs", + data=resource, + timeout=None, ) def test_load_table_from_uri_w_client_location(self): @@ -3153,7 +3315,10 @@ def test_load_table_from_uri_w_client_location(self): # Check that load_table_from_uri actually starts the job. conn.api_request.assert_called_once_with( - method="POST", path="/projects/other-project/jobs", data=resource + method="POST", + path="/projects/other-project/jobs", + data=resource, + timeout=None, ) def test_load_table_from_uri_w_invalid_job_config(self): @@ -3354,11 +3519,14 @@ def test_copy_table(self): source = dataset.table(SOURCE) destination = dataset.table(DESTINATION) - job = client.copy_table(source, destination, job_id=JOB) + job = client.copy_table(source, destination, job_id=JOB, timeout=7.5) # Check that copy_table actually starts the job. conn.api_request.assert_called_once_with( - method="POST", path="/projects/%s/jobs" % self.PROJECT, data=RESOURCE + method="POST", + path="/projects/%s/jobs" % self.PROJECT, + data=RESOURCE, + timeout=7.5, ) self.assertIsInstance(job, CopyJob) @@ -3421,7 +3589,10 @@ def test_copy_table_w_explicit_project(self): # Check that copy_table actually starts the job. conn.api_request.assert_called_once_with( - method="POST", path="/projects/other-project/jobs", data=resource + method="POST", + path="/projects/other-project/jobs", + data=resource, + timeout=None, ) def test_copy_table_w_client_location(self): @@ -3468,7 +3639,10 @@ def test_copy_table_w_client_location(self): # Check that copy_table actually starts the job. conn.api_request.assert_called_once_with( - method="POST", path="/projects/other-project/jobs", data=resource + method="POST", + path="/projects/other-project/jobs", + data=resource, + timeout=None, ) def test_copy_table_w_source_strings(self): @@ -3556,7 +3730,10 @@ def test_copy_table_w_valid_job_config(self): # Check that copy_table actually starts the job. conn.api_request.assert_called_once_with( - method="POST", path="/projects/%s/jobs" % self.PROJECT, data=RESOURCE + method="POST", + path="/projects/%s/jobs" % self.PROJECT, + data=RESOURCE, + timeout=None, ) self.assertIsInstance(job._configuration, CopyJobConfig) @@ -3589,11 +3766,11 @@ def test_extract_table(self): dataset = client.dataset(self.DS_ID) source = dataset.table(SOURCE) - job = client.extract_table(source, DESTINATION, job_id=JOB) + job = client.extract_table(source, DESTINATION, job_id=JOB, timeout=7.5) # Check that extract_table actually starts the job. conn.api_request.assert_called_once_with( - method="POST", path="/projects/PROJECT/jobs", data=RESOURCE + method="POST", path="/projects/PROJECT/jobs", data=RESOURCE, timeout=7.5, ) # Check the job resource. @@ -3659,7 +3836,10 @@ def test_extract_table_w_explicit_project(self): # Check that extract_table actually starts the job. conn.api_request.assert_called_once_with( - method="POST", path="/projects/other-project/jobs", data=resource + method="POST", + path="/projects/other-project/jobs", + data=resource, + timeout=None, ) def test_extract_table_w_client_location(self): @@ -3700,7 +3880,10 @@ def test_extract_table_w_client_location(self): # Check that extract_table actually starts the job. conn.api_request.assert_called_once_with( - method="POST", path="/projects/other-project/jobs", data=resource + method="POST", + path="/projects/other-project/jobs", + data=resource, + timeout=None, ) def test_extract_table_generated_job_id(self): @@ -3743,6 +3926,7 @@ def test_extract_table_generated_job_id(self): self.assertEqual(req["method"], "POST") self.assertEqual(req["path"], "/projects/PROJECT/jobs") self.assertIsInstance(req["data"]["jobReference"]["jobId"], six.string_types) + self.assertIsNone(req["timeout"]) # Check the job resource. self.assertIsInstance(job, ExtractJob) @@ -3787,6 +3971,7 @@ def test_extract_table_w_destination_uris(self): _, req = conn.api_request.call_args self.assertEqual(req["method"], "POST") self.assertEqual(req["path"], "/projects/PROJECT/jobs") + self.assertIsNone(req["timeout"]) # Check the job resource. self.assertIsInstance(job, ExtractJob) @@ -3822,12 +4007,34 @@ def test_query_defaults(self): _, req = conn.api_request.call_args self.assertEqual(req["method"], "POST") self.assertEqual(req["path"], "/projects/PROJECT/jobs") + self.assertIsNone(req["timeout"]) sent = req["data"] self.assertIsInstance(sent["jobReference"]["jobId"], six.string_types) sent_config = sent["configuration"]["query"] self.assertEqual(sent_config["query"], QUERY) self.assertFalse(sent_config["useLegacySql"]) + def test_query_w_explicit_timeout(self): + query = "select count(*) from persons" + resource = { + "jobReference": {"projectId": self.PROJECT, "jobId": mock.ANY}, + "configuration": {"query": {"query": query, "useLegacySql": False}}, + } + creds = _make_credentials() + http = object() + client = self._make_one(project=self.PROJECT, credentials=creds, _http=http) + conn = client._connection = make_connection(resource) + + client.query(query, timeout=7.5) + + # Check that query actually starts the job. + conn.api_request.assert_called_once_with( + method="POST", + path="/projects/{}/jobs".format(self.PROJECT), + data=resource, + timeout=7.5, + ) + def test_query_w_explicit_project(self): job_id = "some-job-id" query = "select count(*) from persons" @@ -3850,7 +4057,10 @@ def test_query_w_explicit_project(self): # Check that query actually starts the job. conn.api_request.assert_called_once_with( - method="POST", path="/projects/other-project/jobs", data=resource + method="POST", + path="/projects/other-project/jobs", + data=resource, + timeout=None, ) def test_query_w_explicit_job_config(self): @@ -3906,7 +4116,7 @@ def test_query_w_explicit_job_config(self): # Check that query actually starts the job. conn.api_request.assert_called_once_with( - method="POST", path="/projects/PROJECT/jobs", data=resource + method="POST", path="/projects/PROJECT/jobs", data=resource, timeout=None ) # the original config object should not have been modified @@ -3950,7 +4160,7 @@ def test_query_preserving_explicit_job_config(self): # Check that query actually starts the job. conn.api_request.assert_called_once_with( - method="POST", path="/projects/PROJECT/jobs", data=resource + method="POST", path="/projects/PROJECT/jobs", data=resource, timeout=None ) # the original config object should not have been modified @@ -4002,7 +4212,7 @@ def test_query_preserving_explicit_default_job_config(self): # Check that query actually starts the job. conn.api_request.assert_called_once_with( - method="POST", path="/projects/PROJECT/jobs", data=resource + method="POST", path="/projects/PROJECT/jobs", data=resource, timeout=None ) # the original default config object should not have been modified @@ -4087,7 +4297,7 @@ def test_query_w_explicit_job_config_override(self): # Check that query actually starts the job. conn.api_request.assert_called_once_with( - method="POST", path="/projects/PROJECT/jobs", data=resource + method="POST", path="/projects/PROJECT/jobs", data=resource, timeout=None ) def test_query_w_client_default_config_no_incoming(self): @@ -4128,7 +4338,7 @@ def test_query_w_client_default_config_no_incoming(self): # Check that query actually starts the job. conn.api_request.assert_called_once_with( - method="POST", path="/projects/PROJECT/jobs", data=resource + method="POST", path="/projects/PROJECT/jobs", data=resource, timeout=None ) def test_query_w_invalid_default_job_config(self): @@ -4170,7 +4380,10 @@ def test_query_w_client_location(self): # Check that query actually starts the job. conn.api_request.assert_called_once_with( - method="POST", path="/projects/other-project/jobs", data=resource + method="POST", + path="/projects/other-project/jobs", + data=resource, + timeout=None, ) def test_query_detect_location(self): @@ -4241,6 +4454,7 @@ def test_query_w_udf_resources(self): _, req = conn.api_request.call_args self.assertEqual(req["method"], "POST") self.assertEqual(req["path"], "/projects/PROJECT/jobs") + self.assertIsNone(req["timeout"]) sent = req["data"] self.assertIsInstance(sent["jobReference"]["jobId"], six.string_types) sent_config = sent["configuration"]["query"] @@ -4296,6 +4510,7 @@ def test_query_w_query_parameters(self): _, req = conn.api_request.call_args self.assertEqual(req["method"], "POST") self.assertEqual(req["path"], "/projects/PROJECT/jobs") + self.assertIsNone(req["timeout"]) sent = req["data"] self.assertEqual(sent["jobReference"]["jobId"], JOB) sent_config = sent["configuration"]["query"] @@ -4310,6 +4525,31 @@ def test_query_w_query_parameters(self): }, ) + def test_insert_rows_w_timeout(self): + from google.cloud.bigquery.schema import SchemaField + from google.cloud.bigquery.table import Table + + creds = _make_credentials() + http = object() + client = self._make_one(project=self.PROJECT, credentials=creds, _http=http) + conn = client._connection = make_connection({}) + table = Table(self.TABLE_REF) + + ROWS = [ + ("Phred Phlyntstone", 32), + ("Bharney Rhubble", 33), + ] + schema = [ + SchemaField("full_name", "STRING", mode="REQUIRED"), + SchemaField("age", "INTEGER", mode="REQUIRED"), + ] + + client.insert_rows(table, ROWS, selected_fields=schema, timeout=7.5) + + conn.api_request.assert_called_once() + _, req = conn.api_request.call_args + self.assertEqual(req.get("timeout"), 7.5) + def test_insert_rows_wo_schema(self): from google.cloud.bigquery.table import Table @@ -4384,6 +4624,7 @@ def _row_data(row): self.assertEqual(req["method"], "POST") self.assertEqual(req["path"], "/%s" % PATH) self.assertEqual(req["data"], SENT) + self.assertIsNone(req["timeout"]) def test_insert_rows_w_list_of_dictionaries(self): import datetime @@ -4448,7 +4689,7 @@ def _row_data(row): self.assertEqual(len(errors), 0) conn.api_request.assert_called_once_with( - method="POST", path="/%s" % PATH, data=SENT + method="POST", path="/%s" % PATH, data=SENT, timeout=None ) def test_insert_rows_w_list_of_Rows(self): @@ -4493,7 +4734,7 @@ def _row_data(row): self.assertEqual(len(errors), 0) conn.api_request.assert_called_once_with( - method="POST", path="/%s" % PATH, data=SENT + method="POST", path="/%s" % PATH, data=SENT, timeout=None ) def test_insert_rows_w_skip_invalid_and_ignore_unknown(self): @@ -4570,7 +4811,7 @@ def _row_data(row): errors[0]["errors"][0], RESPONSE["insertErrors"][0]["errors"][0] ) conn.api_request.assert_called_once_with( - method="POST", path="/%s" % PATH, data=SENT + method="POST", path="/%s" % PATH, data=SENT, timeout=None ) def test_insert_rows_w_repeated_fields(self): @@ -4664,7 +4905,7 @@ def test_insert_rows_w_repeated_fields(self): self.assertEqual(len(errors), 0) conn.api_request.assert_called_once_with( - method="POST", path="/%s" % PATH, data=SENT + method="POST", path="/%s" % PATH, data=SENT, timeout=None, ) def test_insert_rows_w_record_schema(self): @@ -4733,7 +4974,7 @@ def test_insert_rows_w_record_schema(self): self.assertEqual(len(errors), 0) conn.api_request.assert_called_once_with( - method="POST", path="/%s" % PATH, data=SENT + method="POST", path="/%s" % PATH, data=SENT, timeout=None ) def test_insert_rows_w_explicit_none_insert_ids(self): @@ -4767,7 +5008,7 @@ def _row_data(row): self.assertEqual(len(errors), 0) conn.api_request.assert_called_once_with( - method="POST", path="/{}".format(PATH), data=SENT + method="POST", path="/{}".format(PATH), data=SENT, timeout=None, ) def test_insert_rows_errors(self): @@ -4835,6 +5076,7 @@ def test_insert_rows_w_numeric(self): project, ds_id, table_id ), data=sent, + timeout=None, ) @unittest.skipIf(pandas is None, "Requires `pandas`") @@ -4871,7 +5113,7 @@ def test_insert_rows_from_dataframe(self): with mock.patch("uuid.uuid4", side_effect=map(str, range(len(dataframe)))): error_info = client.insert_rows_from_dataframe( - table, dataframe, chunk_size=3 + table, dataframe, chunk_size=3, timeout=7.5 ) self.assertEqual(len(error_info), 2) @@ -4910,7 +5152,9 @@ def test_insert_rows_from_dataframe(self): for call, expected_data in six.moves.zip_longest( actual_calls, EXPECTED_SENT_DATA ): - expected_call = mock.call(method="POST", path=API_PATH, data=expected_data) + expected_call = mock.call( + method="POST", path=API_PATH, data=expected_data, timeout=7.5 + ) assert call == expected_call @unittest.skipIf(pandas is None, "Requires `pandas`") @@ -4955,7 +5199,9 @@ def test_insert_rows_from_dataframe_many_columns(self): } ] } - expected_call = mock.call(method="POST", path=API_PATH, data=EXPECTED_SENT_DATA) + expected_call = mock.call( + method="POST", path=API_PATH, data=EXPECTED_SENT_DATA, timeout=None + ) actual_calls = conn.api_request.call_args_list assert len(actual_calls) == 1 @@ -5007,7 +5253,7 @@ def test_insert_rows_from_dataframe_w_explicit_none_insert_ids(self): actual_calls = conn.api_request.call_args_list assert len(actual_calls) == 1 assert actual_calls[0] == mock.call( - method="POST", path=API_PATH, data=EXPECTED_SENT_DATA + method="POST", path=API_PATH, data=EXPECTED_SENT_DATA, timeout=None ) def test_insert_rows_json(self): @@ -5050,11 +5296,11 @@ def test_insert_rows_json(self): } with mock.patch("uuid.uuid4", side_effect=map(str, range(len(ROWS)))): - errors = client.insert_rows_json(table, ROWS) + errors = client.insert_rows_json(table, ROWS, timeout=7.5) self.assertEqual(len(errors), 0) conn.api_request.assert_called_once_with( - method="POST", path="/%s" % PATH, data=SENT + method="POST", path="/%s" % PATH, data=SENT, timeout=7.5, ) def test_insert_rows_json_with_string_id(self): @@ -5077,6 +5323,7 @@ def test_insert_rows_json_with_string_id(self): method="POST", path="/projects/proj/datasets/dset/tables/tbl/insertAll", data=expected, + timeout=None, ) def test_insert_rows_json_w_explicit_none_insert_ids(self): @@ -5098,6 +5345,7 @@ def test_insert_rows_json_w_explicit_none_insert_ids(self): method="POST", path="/projects/proj/datasets/dset/tables/tbl/insertAll", data=expected, + timeout=None, ) def test_list_partitions(self): @@ -5142,6 +5390,43 @@ def test_list_partitions_with_string_id(self): self.assertEqual(len(partition_list), 0) + def test_list_partitions_splitting_timout_between_requests(self): + from google.cloud.bigquery.table import Table + + row_count = 2 + meta_info = _make_list_partitons_meta_info( + self.PROJECT, self.DS_ID, self.TABLE_ID, row_count + ) + + data = { + "totalRows": str(row_count), + "rows": [{"f": [{"v": "20180101"}]}, {"f": [{"v": "20180102"}]}], + } + creds = _make_credentials() + http = object() + client = self._make_one(project=self.PROJECT, credentials=creds, _http=http) + client._connection = make_connection(meta_info, data) + table = Table(self.TABLE_REF) + + with freezegun.freeze_time("2019-01-01 00:00:00", tick=False) as frozen_time: + + def delayed_get_table(*args, **kwargs): + frozen_time.tick(delta=1.4) + return orig_get_table(*args, **kwargs) + + orig_get_table = client.get_table + client.get_table = mock.Mock(side_effect=delayed_get_table) + + client.list_partitions(table, timeout=5.0) + + client.get_table.assert_called_once() + _, kwargs = client.get_table.call_args + self.assertEqual(kwargs.get("timeout"), 5.0) + + client._connection.api_request.assert_called() + _, kwargs = client._connection.api_request.call_args + self.assertAlmostEqual(kwargs.get("timeout"), 3.6, places=5) + def test_list_rows(self): import datetime from google.cloud._helpers import UTC @@ -5202,7 +5487,7 @@ def _bigquery_timestamp_float_repr(ts_float): joined = SchemaField("joined", "TIMESTAMP", mode="NULLABLE") table = Table(self.TABLE_REF, schema=[full_name, age, joined]) - iterator = client.list_rows(table) + iterator = client.list_rows(table, timeout=7.5) page = six.next(iterator.pages) rows = list(page) total_rows = iterator.total_rows @@ -5218,7 +5503,7 @@ def _bigquery_timestamp_float_repr(ts_float): self.assertEqual(page_token, TOKEN) conn.api_request.assert_called_once_with( - method="GET", path="/%s" % PATH, query_params={} + method="GET", path="/%s" % PATH, query_params={}, timeout=7.5 ) def test_list_rows_empty_table(self): @@ -5326,6 +5611,7 @@ def test_list_rows_repeated_fields(self): method="GET", path="/%s" % PATH, query_params={"selectedFields": "color,struct"}, + timeout=None, ) def test_list_rows_w_record_schema(self): @@ -5392,7 +5678,7 @@ def test_list_rows_w_record_schema(self): self.assertEqual(page_token, TOKEN) conn.api_request.assert_called_once_with( - method="GET", path="/%s" % PATH, query_params={} + method="GET", path="/%s" % PATH, query_params={}, timeout=None ) def test_list_rows_with_missing_schema(self): @@ -5446,19 +5732,61 @@ def test_list_rows_with_missing_schema(self): row_iter = client.list_rows(table) - conn.api_request.assert_called_once_with(method="GET", path=table_path) + conn.api_request.assert_called_once_with( + method="GET", path=table_path, timeout=None + ) conn.api_request.reset_mock() self.assertEqual(row_iter.total_rows, 2, msg=repr(table)) rows = list(row_iter) conn.api_request.assert_called_once_with( - method="GET", path=tabledata_path, query_params={} + method="GET", path=tabledata_path, query_params={}, timeout=None ) self.assertEqual(row_iter.total_rows, 3, msg=repr(table)) self.assertEqual(rows[0].name, "Phred Phlyntstone", msg=repr(table)) self.assertEqual(rows[1].age, 31, msg=repr(table)) self.assertIsNone(rows[2].age, msg=repr(table)) + def test_list_rows_splitting_timout_between_requests(self): + from google.cloud.bigquery.schema import SchemaField + from google.cloud.bigquery.table import Table + + response = {"totalRows": "0", "rows": []} + creds = _make_credentials() + http = object() + client = self._make_one(project=self.PROJECT, credentials=creds, _http=http) + client._connection = make_connection(response, response) + + table = Table( + self.TABLE_REF, schema=[SchemaField("field_x", "INTEGER", mode="NULLABLE")] + ) + + with freezegun.freeze_time("1970-01-01 00:00:00", tick=False) as frozen_time: + + def delayed_get_table(*args, **kwargs): + frozen_time.tick(delta=1.4) + return table + + client.get_table = mock.Mock(side_effect=delayed_get_table) + + rows_iter = client.list_rows( + "{}.{}.{}".format( + self.TABLE_REF.project, + self.TABLE_REF.dataset_id, + self.TABLE_REF.table_id, + ), + timeout=5.0, + ) + six.next(rows_iter.pages) + + client.get_table.assert_called_once() + _, kwargs = client.get_table.call_args + self.assertEqual(kwargs.get("timeout"), 5.0) + + client._connection.api_request.assert_called_once() + _, kwargs = client._connection.api_request.call_args + self.assertAlmostEqual(kwargs.get("timeout"), 3.6) + def test_list_rows_error(self): creds = _make_credentials() http = object() diff --git a/packages/google-cloud-bigquery/tests/unit/test_job.py b/packages/google-cloud-bigquery/tests/unit/test_job.py index e732bed4dcc6..b796f3f73675 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_job.py +++ b/packages/google-cloud-bigquery/tests/unit/test_job.py @@ -18,6 +18,7 @@ import textwrap import unittest +import freezegun import mock import pytest import requests @@ -626,6 +627,7 @@ def test__begin_defaults(self): method="POST", path="/projects/{}/jobs".format(self.PROJECT), data=resource, + timeout=None, ) self.assertEqual(job._properties, resource) @@ -649,13 +651,14 @@ def test__begin_explicit(self): call_api.return_value = resource retry = DEFAULT_RETRY.with_deadline(1) - job._begin(client=client, retry=retry) + job._begin(client=client, retry=retry, timeout=7.5) call_api.assert_called_once_with( retry, method="POST", path="/projects/{}/jobs".format(self.PROJECT), data=resource, + timeout=7.5, ) self.assertEqual(job._properties, resource) @@ -675,6 +678,7 @@ def test_exists_defaults_miss(self): method="GET", path="/projects/{}/jobs/{}".format(self.PROJECT, self.JOB_ID), query_params={"fields": "id", "location": self.LOCATION}, + timeout=None, ) def test_exists_explicit_hit(self): @@ -702,6 +706,24 @@ def test_exists_explicit_hit(self): method="GET", path="/projects/{}/jobs/{}".format(self.PROJECT, self.JOB_ID), query_params={"fields": "id"}, + timeout=None, + ) + + def test_exists_w_timeout(self): + from google.cloud.bigquery.retry import DEFAULT_RETRY + + PATH = "/projects/{}/jobs/{}".format(self.PROJECT, self.JOB_ID) + job = self._set_properties_job() + call_api = job._client._call_api = mock.Mock() + + job.exists(timeout=7.5) + + call_api.assert_called_once_with( + DEFAULT_RETRY, + method="GET", + path=PATH, + query_params={"fields": "id"}, + timeout=7.5, ) def test_reload_defaults(self): @@ -780,6 +802,7 @@ def test_cancel_defaults(self): method="POST", path="/projects/{}/jobs/{}/cancel".format(self.PROJECT, self.JOB_ID), query_params={"location": self.LOCATION}, + timeout=None, ) self.assertEqual(job._properties, resource) @@ -798,12 +821,13 @@ def test_cancel_explicit(self): client = _make_client(project=other_project) connection = client._connection = _make_connection(response) - self.assertTrue(job.cancel(client=client)) + self.assertTrue(job.cancel(client=client, timeout=7.5)) connection.api_request.assert_called_once_with( method="POST", path="/projects/{}/jobs/{}/cancel".format(self.PROJECT, self.JOB_ID), query_params={}, + timeout=7.5, ) self.assertEqual(job._properties, resource) @@ -874,7 +898,7 @@ def test_done_defaults_wo_state(self): self.assertFalse(job.done()) - reload_.assert_called_once_with(retry=DEFAULT_RETRY) + reload_.assert_called_once_with(retry=DEFAULT_RETRY, timeout=None) def test_done_explicit_wo_state(self): from google.cloud.bigquery.retry import DEFAULT_RETRY @@ -884,9 +908,9 @@ def test_done_explicit_wo_state(self): reload_ = job.reload = mock.Mock() retry = DEFAULT_RETRY.with_deadline(1) - self.assertFalse(job.done(retry=retry)) + self.assertFalse(job.done(retry=retry, timeout=7.5)) - reload_.assert_called_once_with(retry=retry) + reload_.assert_called_once_with(retry=retry, timeout=7.5) def test_done_already(self): client = _make_client(project=self.PROJECT) @@ -905,7 +929,7 @@ def test_result_default_wo_state(self, result): self.assertIs(job.result(), result.return_value) - begin.assert_called_once_with(retry=DEFAULT_RETRY) + begin.assert_called_once_with(retry=DEFAULT_RETRY, timeout=None) result.assert_called_once_with(timeout=None) @mock.patch("google.api_core.future.polling.PollingFuture.result") @@ -917,7 +941,7 @@ def test_result_w_retry_wo_state(self, result): self.assertIs(job.result(retry=retry), result.return_value) - begin.assert_called_once_with(retry=retry) + begin.assert_called_once_with(retry=retry, timeout=None) result.assert_called_once_with(timeout=None) @mock.patch("google.api_core.future.polling.PollingFuture.result") @@ -933,6 +957,24 @@ def test_result_explicit_w_state(self, result): begin.assert_not_called() result.assert_called_once_with(timeout=timeout) + @mock.patch("google.api_core.future.polling.PollingFuture.result") + def test_result_splitting_timout_between_requests(self, result): + client = _make_client(project=self.PROJECT) + job = self._make_one(self.JOB_ID, client) + begin = job._begin = mock.Mock() + retry = mock.Mock() + + with freezegun.freeze_time("1970-01-01 00:00:00", tick=False) as frozen_time: + + def delayed_begin(*args, **kwargs): + frozen_time.tick(delta=0.3) + + begin.side_effect = delayed_begin + job.result(retry=retry, timeout=1.0) + + begin.assert_called_once_with(retry=retry, timeout=1.0) + result.assert_called_once_with(timeout=0.7) + def test_cancelled_wo_error_result(self): client = _make_client(project=self.PROJECT) job = self._make_one(self.JOB_ID, client) @@ -2288,6 +2330,7 @@ def test_begin_w_bound_client(self): } }, }, + timeout=None, ) self._verifyResourceProperties(job, RESOURCE) @@ -2325,7 +2368,9 @@ def test_begin_w_autodetect(self): } }, } - conn.api_request.assert_called_once_with(method="POST", path=path, data=sent) + conn.api_request.assert_called_once_with( + method="POST", path=path, data=sent, timeout=None + ) self._verifyResourceProperties(job, resource) def test_begin_w_alternate_client(self): @@ -2449,7 +2494,7 @@ def test_exists_miss_w_bound_client(self): self.assertFalse(job.exists()) conn.api_request.assert_called_once_with( - method="GET", path=PATH, query_params={"fields": "id"} + method="GET", path=PATH, query_params={"fields": "id"}, timeout=None ) def test_exists_hit_w_alternate_client(self): @@ -2464,7 +2509,7 @@ def test_exists_hit_w_alternate_client(self): conn1.api_request.assert_not_called() conn2.api_request.assert_called_once_with( - method="GET", path=PATH, query_params={"fields": "id"} + method="GET", path=PATH, query_params={"fields": "id"}, timeout=None ) def test_exists_miss_w_job_reference(self): @@ -2481,6 +2526,7 @@ def test_exists_miss_w_job_reference(self): method="GET", path="/projects/other-project/jobs/my-job-id", query_params={"fields": "id", "location": "US"}, + timeout=None, ) def test_reload_w_bound_client(self): @@ -2545,7 +2591,7 @@ def test_cancel_w_bound_client(self): job.cancel() conn.api_request.assert_called_once_with( - method="POST", path=PATH, query_params={} + method="POST", path=PATH, query_params={}, timeout=None, ) self._verifyResourceProperties(job, RESOURCE) @@ -2563,7 +2609,7 @@ def test_cancel_w_alternate_client(self): conn1.api_request.assert_not_called() conn2.api_request.assert_called_once_with( - method="POST", path=PATH, query_params={} + method="POST", path=PATH, query_params={}, timeout=None, ) self._verifyResourceProperties(job, RESOURCE) @@ -2584,6 +2630,7 @@ def test_cancel_w_job_reference(self): method="POST", path="/projects/alternative-project/jobs/{}/cancel".format(self.JOB_ID), query_params={"location": "US"}, + timeout=None, ) @@ -2898,6 +2945,7 @@ def test_begin_w_bound_client(self): } }, }, + timeout=None, ) self._verifyResourceProperties(job, RESOURCE) @@ -2946,6 +2994,7 @@ def test_begin_w_alternate_client(self): "jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID}, "configuration": {"copy": COPY_CONFIGURATION}, }, + timeout=None, ) self._verifyResourceProperties(job, RESOURCE) @@ -2961,7 +3010,7 @@ def test_exists_miss_w_bound_client(self): self.assertFalse(job.exists()) conn.api_request.assert_called_once_with( - method="GET", path=PATH, query_params={"fields": "id"} + method="GET", path=PATH, query_params={"fields": "id"}, timeout=None, ) def test_exists_hit_w_alternate_client(self): @@ -2978,7 +3027,7 @@ def test_exists_hit_w_alternate_client(self): conn1.api_request.assert_not_called() conn2.api_request.assert_called_once_with( - method="GET", path=PATH, query_params={"fields": "id"} + method="GET", path=PATH, query_params={"fields": "id"}, timeout=None ) def test_reload_w_bound_client(self): @@ -3259,6 +3308,7 @@ def test_begin_w_bound_client(self): } }, }, + timeout=None, ) self._verifyResourceProperties(job, RESOURCE) @@ -3308,6 +3358,7 @@ def test_begin_w_alternate_client(self): "jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID}, "configuration": {"extract": EXTRACT_CONFIGURATION}, }, + timeout=None, ) self._verifyResourceProperties(job, RESOURCE) @@ -3322,7 +3373,7 @@ def test_exists_miss_w_bound_client(self): self.assertFalse(job.exists()) conn.api_request.assert_called_once_with( - method="GET", path=PATH, query_params={"fields": "id"} + method="GET", path=PATH, query_params={"fields": "id"}, timeout=None, ) def test_exists_hit_w_alternate_client(self): @@ -3339,7 +3390,7 @@ def test_exists_hit_w_alternate_client(self): conn1.api_request.assert_not_called() conn2.api_request.assert_called_once_with( - method="GET", path=PATH, query_params={"fields": "id"} + method="GET", path=PATH, query_params={"fields": "id"}, timeout=None ) def test_reload_w_bound_client(self): @@ -4519,7 +4570,8 @@ def test_result_w_timeout(self): client = _make_client(project=self.PROJECT, connection=connection) job = self._make_one(self.JOB_ID, self.QUERY, client) - job.result(timeout=1.0) + with freezegun.freeze_time("1970-01-01 00:00:00", tick=False): + job.result(timeout=1.0) self.assertEqual(len(connection.api_request.call_args_list), 3) begin_request = connection.api_request.call_args_list[0] @@ -4534,6 +4586,49 @@ def test_result_w_timeout(self): self.assertEqual(query_request[1]["query_params"]["timeoutMs"], 900) self.assertEqual(reload_request[1]["method"], "GET") + @mock.patch("google.api_core.future.polling.PollingFuture.result") + def test_result_splitting_timout_between_requests(self, polling_result): + begun_resource = self._make_resource() + query_resource = { + "jobComplete": True, + "jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID}, + "schema": {"fields": [{"name": "col1", "type": "STRING"}]}, + "totalRows": "5", + } + done_resource = copy.deepcopy(begun_resource) + done_resource["status"] = {"state": "DONE"} + + connection = _make_connection(begun_resource, query_resource, done_resource) + client = _make_client(project=self.PROJECT, connection=connection) + job = self._make_one(self.JOB_ID, self.QUERY, client) + + client.list_rows = mock.Mock() + + with freezegun.freeze_time("1970-01-01 00:00:00", tick=False) as frozen_time: + + def delayed_result(*args, **kwargs): + frozen_time.tick(delta=0.8) + + polling_result.side_effect = delayed_result + + def delayed_get_results(*args, **kwargs): + frozen_time.tick(delta=0.5) + return orig_get_results(*args, **kwargs) + + orig_get_results = client._get_query_results + client._get_query_results = mock.Mock(side_effect=delayed_get_results) + job.result(timeout=2.0) + + polling_result.assert_called_once_with(timeout=2.0) + + client._get_query_results.assert_called_once() + _, kwargs = client._get_query_results.call_args + self.assertAlmostEqual(kwargs.get("timeout"), 1.2) + + client.list_rows.assert_called_once() + _, kwargs = client.list_rows.call_args + self.assertAlmostEqual(kwargs.get("timeout"), 0.7) + def test_result_w_page_size(self): # Arrange query_results_resource = { @@ -4580,12 +4675,16 @@ def test_result_w_page_size(self): conn.api_request.assert_has_calls( [ mock.call( - method="GET", path=tabledata_path, query_params={"maxResults": 3} + method="GET", + path=tabledata_path, + query_params={"maxResults": 3}, + timeout=None, ), mock.call( method="GET", path=tabledata_path, query_params={"pageToken": "some-page-token", "maxResults": 3}, + timeout=None, ), ] ) @@ -4687,6 +4786,28 @@ def test__begin_error(self): expected_line = "{}:{}".format(i, line) assert expected_line in full_text + def test__begin_w_timeout(self): + PATH = "/projects/%s/jobs" % (self.PROJECT,) + RESOURCE = self._make_resource() + + conn = _make_connection(RESOURCE) + client = _make_client(project=self.PROJECT, connection=conn) + job = self._make_one(self.JOB_ID, self.QUERY, client) + + job._begin(timeout=7.5) + + conn.api_request.assert_called_once_with( + method="POST", + path=PATH, + data={ + "jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID}, + "configuration": { + "query": {"query": self.QUERY, "useLegacySql": False} + }, + }, + timeout=7.5, + ) + def test_begin_w_bound_client(self): from google.cloud.bigquery.dataset import DatasetReference from google.cloud.bigquery.job import QueryJobConfig @@ -4726,6 +4847,7 @@ def test_begin_w_bound_client(self): } }, }, + timeout=None, ) self._verifyResourceProperties(job, RESOURCE) @@ -4795,6 +4917,7 @@ def test_begin_w_alternate_client(self): "jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID}, "configuration": {"dryRun": True, "query": QUERY_CONFIGURATION}, }, + timeout=None, ) self._verifyResourceProperties(job, RESOURCE) @@ -4845,6 +4968,7 @@ def test_begin_w_udf(self): } }, }, + timeout=None, ) self._verifyResourceProperties(job, RESOURCE) @@ -4892,6 +5016,7 @@ def test_begin_w_named_query_parameter(self): } }, }, + timeout=None, ) self._verifyResourceProperties(job, RESOURCE) @@ -4935,6 +5060,7 @@ def test_begin_w_positional_query_parameter(self): } }, }, + timeout=None, ) self._verifyResourceProperties(job, RESOURCE) @@ -5011,6 +5137,7 @@ def test_begin_w_table_defs(self): } }, }, + timeout=None, ) self._verifyResourceProperties(job, want_resource) @@ -5043,6 +5170,7 @@ def test_dry_run_query(self): "dryRun": True, }, }, + timeout=None, ) self._verifyResourceProperties(job, RESOURCE) @@ -5055,7 +5183,7 @@ def test_exists_miss_w_bound_client(self): self.assertFalse(job.exists()) conn.api_request.assert_called_once_with( - method="GET", path=PATH, query_params={"fields": "id"} + method="GET", path=PATH, query_params={"fields": "id"}, timeout=None ) def test_exists_hit_w_alternate_client(self): @@ -5070,7 +5198,7 @@ def test_exists_hit_w_alternate_client(self): conn1.api_request.assert_not_called() conn2.api_request.assert_called_once_with( - method="GET", path=PATH, query_params={"fields": "id"} + method="GET", path=PATH, query_params={"fields": "id"}, timeout=None ) def test_reload_w_bound_client(self): From 627fb4081bd3312d7dc429cb5ccb4585d4f6ef06 Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Fri, 27 Dec 2019 14:56:46 -0600 Subject: [PATCH 0725/2016] cleanup(bigquery): remove unused query code sample (#10024) --- .../samples/client_query_partitioned_table.py | 51 ------------------- .../test_client_query_partitioned_table.py | 26 ---------- 2 files changed, 77 deletions(-) delete mode 100644 packages/google-cloud-bigquery/samples/client_query_partitioned_table.py delete mode 100644 packages/google-cloud-bigquery/samples/tests/test_client_query_partitioned_table.py diff --git a/packages/google-cloud-bigquery/samples/client_query_partitioned_table.py b/packages/google-cloud-bigquery/samples/client_query_partitioned_table.py deleted file mode 100644 index 71ec3a0e7086..000000000000 --- a/packages/google-cloud-bigquery/samples/client_query_partitioned_table.py +++ /dev/null @@ -1,51 +0,0 @@ -# Copyright 2019 Google LLC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - - -def client_query_partitioned_table(client, table_id): - - # [START bigquery_query_partitioned_table] - import datetime - - from google.cloud import bigquery - - # TODO(developer): Construct a BigQuery client object. - # client = bigquery.Client() - - # TODO(developer): Set table_id to the ID of the table to query from. - # table_id = "your-project.your_dataset.your_table_name" - - sql = """ - SELECT * - FROM `{}` - WHERE date BETWEEN @start_date AND @end_date - """.format( - table_id - ) - - job_config = bigquery.QueryJobConfig( - query_parameters=[ - bigquery.ScalarQueryParameter( - "start_date", "DATE", datetime.date(1800, 1, 1) - ), - bigquery.ScalarQueryParameter( - "end_date", "DATE", datetime.date(1899, 12, 31) - ), - ] - ) - query_job = client.query(sql, job_config=job_config) # Make an API request. - - rows = list(query_job) - print("{} states were admitted to the US in the 1800s".format(len(rows))) - # [END bigquery_query_partitioned_table] diff --git a/packages/google-cloud-bigquery/samples/tests/test_client_query_partitioned_table.py b/packages/google-cloud-bigquery/samples/tests/test_client_query_partitioned_table.py deleted file mode 100644 index 73fd71b54d5d..000000000000 --- a/packages/google-cloud-bigquery/samples/tests/test_client_query_partitioned_table.py +++ /dev/null @@ -1,26 +0,0 @@ -# Copyright 2019 Google LLC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from .. import client_load_partitioned_table -from .. import client_query_partitioned_table - - -def test_client_query_partitioned_table(capsys, client, random_table_id): - - client_load_partitioned_table.client_load_partitioned_table(client, random_table_id) - client_query_partitioned_table.client_query_partitioned_table( - client, random_table_id - ) - out, err = capsys.readouterr() - assert "29 states were admitted to the US in the 1800s" in out From 29cb76376f4da3cf5fce56e2c04f835f9f68ab45 Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Fri, 3 Jan 2020 16:10:04 -0600 Subject: [PATCH 0726/2016] doc(bigquery): uncomment `Client` constructor and imports in samples (#10058) * doc(bigquery): uncomment Client constructor in samples Use a session-scope filter to ensure that a client is only constructed once per test run, saving time on authentication. * remove unnecessary TODOs * add drive scope to credentials --- .../samples/add_empty_column.py | 6 ++--- .../samples/browse_table_data.py | 10 ++++---- .../samples/client_list_jobs.py | 10 ++++---- .../samples/client_load_partitioned_table.py | 6 ++--- .../samples/client_query.py | 10 ++++---- .../samples/client_query_add_column.py | 6 ++--- .../samples/client_query_batch.py | 6 ++--- .../samples/client_query_destination_table.py | 6 ++--- .../client_query_destination_table_cmek.py | 6 ++--- .../client_query_destination_table_legacy.py | 6 ++--- .../samples/client_query_dry_run.py | 6 ++--- .../samples/client_query_legacy_sql.py | 6 ++--- .../samples/client_query_relax_column.py | 6 ++--- .../samples/client_query_w_array_params.py | 6 ++--- .../samples/client_query_w_named_params.py | 6 ++--- .../client_query_w_positional_params.py | 6 ++--- .../samples/client_query_w_struct_params.py | 6 ++--- .../client_query_w_timestamp_params.py | 6 ++--- .../samples/copy_table.py | 10 ++++---- .../samples/copy_table_cmek.py | 6 ++--- .../samples/copy_table_multiple_source.py | 10 ++++---- .../samples/create_dataset.py | 6 ++--- .../samples/create_job.py | 6 ++--- .../samples/create_routine.py | 6 ++--- .../samples/create_routine_ddl.py | 10 ++++---- .../samples/create_table.py | 6 ++--- .../samples/create_table_range_partitioned.py | 6 ++--- .../samples/dataset_exists.py | 5 +++- .../samples/delete_dataset.py | 10 ++++---- .../samples/delete_dataset_labels.py | 10 ++++---- .../samples/delete_model.py | 10 ++++---- .../samples/delete_routine.py | 10 ++++---- .../samples/delete_table.py | 10 ++++---- .../samples/download_public_data.py | 10 ++++---- .../samples/download_public_data_sandbox.py | 10 ++++---- .../samples/get_dataset.py | 10 ++++---- .../samples/get_dataset_labels.py | 10 ++++---- .../samples/get_model.py | 10 ++++---- .../samples/get_routine.py | 10 ++++---- .../samples/get_table.py | 10 ++++---- .../samples/label_dataset.py | 10 ++++---- .../samples/list_datasets.py | 10 ++++---- .../samples/list_datasets_by_label.py | 10 ++++---- .../samples/list_models.py | 10 ++++---- .../samples/list_routines.py | 10 ++++---- .../samples/list_tables.py | 10 ++++---- .../samples/load_table_dataframe.py | 6 ++--- .../query_external_gcs_temporary_table.py | 6 ++--- .../query_external_sheets_permanent_table.py | 2 +- .../query_external_sheets_temporary_table.py | 2 +- .../samples/query_no_cache.py | 6 ++--- .../samples/query_pagination.py | 10 ++++---- .../samples/query_script.py | 10 ++++---- .../samples/query_to_arrow.py | 10 ++++---- .../samples/table_exists.py | 5 +++- .../samples/table_insert_rows.py | 10 ++++---- ...le_insert_rows_explicit_none_insert_ids.py | 10 ++++---- .../samples/tests/conftest.py | 22 +++++++++++++---- .../samples/tests/test_add_empty_column.py | 4 ++-- .../samples/tests/test_browse_table_data.py | 4 ++-- .../samples/tests/test_client_list_jobs.py | 4 ++-- .../test_client_load_partitioned_table.py | 4 ++-- .../samples/tests/test_client_query.py | 4 ++-- .../tests/test_client_query_add_column.py | 4 ++-- .../samples/tests/test_client_query_batch.py | 4 ++-- .../test_client_query_destination_table.py | 4 ++-- ...est_client_query_destination_table_cmek.py | 6 ++--- ...t_client_query_destination_table_legacy.py | 4 ++-- .../tests/test_client_query_dry_run.py | 4 ++-- .../tests/test_client_query_legacy_sql.py | 4 ++-- .../tests/test_client_query_relax_column.py | 4 ++-- .../tests/test_client_query_w_array_params.py | 4 ++-- .../tests/test_client_query_w_named_params.py | 4 ++-- .../test_client_query_w_positional_params.py | 4 ++-- .../test_client_query_w_struct_params.py | 4 ++-- .../test_client_query_w_timestamp_params.py | 4 ++-- .../samples/tests/test_copy_table.py | 4 ++-- .../samples/tests/test_copy_table_cmek.py | 8 ++----- .../tests/test_copy_table_multiple_source.py | 6 ++--- .../samples/tests/test_create_dataset.py | 4 ++-- .../samples/tests/test_create_job.py | 3 +-- .../samples/tests/test_create_table.py | 4 ++-- .../test_create_table_range_partitioned.py | 4 ++-- .../samples/tests/test_dataset_exists.py | 6 ++--- .../tests/test_dataset_label_samples.py | 8 +++---- .../samples/tests/test_delete_dataset.py | 4 ++-- .../samples/tests/test_delete_table.py | 4 ++-- .../tests/test_download_public_data.py | 4 ++-- .../test_download_public_data_sandbox.py | 4 ++-- .../samples/tests/test_get_dataset.py | 4 ++-- .../samples/tests/test_get_table.py | 4 ++-- .../samples/tests/test_list_datasets.py | 5 ++-- .../tests/test_list_datasets_by_label.py | 5 ++-- .../samples/tests/test_list_tables.py | 4 ++-- .../tests/test_load_table_dataframe.py | 4 ++-- .../samples/tests/test_model_samples.py | 10 ++++---- ...test_query_external_gcs_temporary_table.py | 4 ++-- .../samples/tests/test_query_no_cache.py | 4 ++-- .../samples/tests/test_query_pagination.py | 4 ++-- .../samples/tests/test_query_script.py | 4 ++-- .../samples/tests/test_query_to_arrow.py | 4 ++-- .../samples/tests/test_routine_samples.py | 24 +++++++++---------- .../samples/tests/test_table_exists.py | 6 ++--- .../samples/tests/test_table_insert_rows.py | 4 ++-- ...le_insert_rows_explicit_none_insert_ids.py | 4 ++-- .../samples/tests/test_undelete_table.py | 4 ++-- .../tests/test_update_dataset_access.py | 4 ++-- ...te_dataset_default_partition_expiration.py | 4 ++-- ...update_dataset_default_table_expiration.py | 4 ++-- .../tests/test_update_dataset_description.py | 4 ++-- ...t_update_table_require_partition_filter.py | 4 ++-- .../samples/undelete_table.py | 9 ++++--- .../samples/update_dataset_access.py | 6 ++--- ...te_dataset_default_partition_expiration.py | 10 ++++---- ...update_dataset_default_table_expiration.py | 10 ++++---- .../samples/update_dataset_description.py | 10 ++++---- .../samples/update_model.py | 10 ++++---- .../samples/update_routine.py | 10 ++++---- .../update_table_require_partition_filter.py | 10 ++++---- 119 files changed, 403 insertions(+), 397 deletions(-) diff --git a/packages/google-cloud-bigquery/samples/add_empty_column.py b/packages/google-cloud-bigquery/samples/add_empty_column.py index bd531898eb29..cd7cf5018e1f 100644 --- a/packages/google-cloud-bigquery/samples/add_empty_column.py +++ b/packages/google-cloud-bigquery/samples/add_empty_column.py @@ -13,13 +13,13 @@ # limitations under the License. -def add_empty_column(client, table_id): +def add_empty_column(table_id): # [START bigquery_add_empty_column] from google.cloud import bigquery - # TODO(developer): Construct a BigQuery client object. - # client = bigquery.Client() + # Construct a BigQuery client object. + client = bigquery.Client() # TODO(developer): Set table_id to the ID of the table # to add an empty column. diff --git a/packages/google-cloud-bigquery/samples/browse_table_data.py b/packages/google-cloud-bigquery/samples/browse_table_data.py index 78d1d351a7a7..29a1c2ff61e0 100644 --- a/packages/google-cloud-bigquery/samples/browse_table_data.py +++ b/packages/google-cloud-bigquery/samples/browse_table_data.py @@ -13,14 +13,14 @@ # limitations under the License. -def browse_table_data(client, table_id): +def browse_table_data(table_id): # [START bigquery_browse_table] - # TODO(developer): Import the client library. - # from google.cloud import bigquery - # TODO(developer): Construct a BigQuery client object. - # client = bigquery.Client() + from google.cloud import bigquery + + # Construct a BigQuery client object. + client = bigquery.Client() # TODO(developer): Set table_id to the ID of the table to browse data rows. # table_id = "your-project.your_dataset.your_table_name" diff --git a/packages/google-cloud-bigquery/samples/client_list_jobs.py b/packages/google-cloud-bigquery/samples/client_list_jobs.py index 08eb4fbd99ef..b2344e23c7f7 100644 --- a/packages/google-cloud-bigquery/samples/client_list_jobs.py +++ b/packages/google-cloud-bigquery/samples/client_list_jobs.py @@ -13,16 +13,16 @@ # limitations under the License. -def client_list_jobs(client): +def client_list_jobs(): # [START bigquery_list_jobs] - # TODO(developer): Import the client library. - # from google.cloud import bigquery + + from google.cloud import bigquery import datetime - # TODO(developer): Construct a BigQuery client object. - # client = bigquery.Client() + # Construct a BigQuery client object. + client = bigquery.Client() # List the 10 most recent jobs in reverse chronological order. # Omit the max_results parameter to list jobs from the past 6 months. diff --git a/packages/google-cloud-bigquery/samples/client_load_partitioned_table.py b/packages/google-cloud-bigquery/samples/client_load_partitioned_table.py index 3f9f86db9a9a..e4e8a296c9a3 100644 --- a/packages/google-cloud-bigquery/samples/client_load_partitioned_table.py +++ b/packages/google-cloud-bigquery/samples/client_load_partitioned_table.py @@ -13,13 +13,13 @@ # limitations under the License. -def client_load_partitioned_table(client, table_id): +def client_load_partitioned_table(table_id): # [START bigquery_load_table_partitioned] from google.cloud import bigquery - # TODO(developer): Construct a BigQuery client object. - # client = bigquery.Client() + # Construct a BigQuery client object. + client = bigquery.Client() # TODO(developer): Set table_id to the ID of the table to create. # table_id = "your-project.your_dataset.your_table_name" diff --git a/packages/google-cloud-bigquery/samples/client_query.py b/packages/google-cloud-bigquery/samples/client_query.py index 5242c854e220..7fedc3f90b1e 100644 --- a/packages/google-cloud-bigquery/samples/client_query.py +++ b/packages/google-cloud-bigquery/samples/client_query.py @@ -13,14 +13,14 @@ # limitations under the License. -def client_query(client): +def client_query(): # [START bigquery_query] - # TODO(developer): Import the client library. - # from google.cloud import bigquery - # TODO(developer): Construct a BigQuery client object. - # client = bigquery.Client() + from google.cloud import bigquery + + # Construct a BigQuery client object. + client = bigquery.Client() query = """ SELECT name, SUM(number) as total_people diff --git a/packages/google-cloud-bigquery/samples/client_query_add_column.py b/packages/google-cloud-bigquery/samples/client_query_add_column.py index c35548d2a361..ff7d5aa68add 100644 --- a/packages/google-cloud-bigquery/samples/client_query_add_column.py +++ b/packages/google-cloud-bigquery/samples/client_query_add_column.py @@ -13,13 +13,13 @@ # limitations under the License. -def client_query_add_column(client, table_id): +def client_query_add_column(table_id): # [START bigquery_add_column_query_append] from google.cloud import bigquery - # TODO(developer): Construct a BigQuery client object. - # client = bigquery.Client() + # Construct a BigQuery client object. + client = bigquery.Client() # TODO(developer): Set table_id to the ID of the destination table. # table_id = "your-project.your_dataset.your_table_name" diff --git a/packages/google-cloud-bigquery/samples/client_query_batch.py b/packages/google-cloud-bigquery/samples/client_query_batch.py index af9fcd8a1e40..e1680f4a18d2 100644 --- a/packages/google-cloud-bigquery/samples/client_query_batch.py +++ b/packages/google-cloud-bigquery/samples/client_query_batch.py @@ -13,13 +13,13 @@ # limitations under the License. -def client_query_batch(client): +def client_query_batch(): # [START bigquery_query_batch] from google.cloud import bigquery - # TODO(developer): Construct a BigQuery client object. - # client = bigquery.Client() + # Construct a BigQuery client object. + client = bigquery.Client() job_config = bigquery.QueryJobConfig( # Run at batch priority, which won't count toward concurrent rate limit. diff --git a/packages/google-cloud-bigquery/samples/client_query_destination_table.py b/packages/google-cloud-bigquery/samples/client_query_destination_table.py index 876df7904d9c..303ce5a0cc36 100644 --- a/packages/google-cloud-bigquery/samples/client_query_destination_table.py +++ b/packages/google-cloud-bigquery/samples/client_query_destination_table.py @@ -13,13 +13,13 @@ # limitations under the License. -def client_query_destination_table(client, table_id): +def client_query_destination_table(table_id): # [START bigquery_query_destination_table] from google.cloud import bigquery - # TODO(developer): Construct a BigQuery client object. - # client = bigquery.Client() + # Construct a BigQuery client object. + client = bigquery.Client() # TODO(developer): Set table_id to the ID of the destination table. # table_id = "your-project.your_dataset.your_table_name" diff --git a/packages/google-cloud-bigquery/samples/client_query_destination_table_cmek.py b/packages/google-cloud-bigquery/samples/client_query_destination_table_cmek.py index d3409eecd77d..24d4f22228c7 100644 --- a/packages/google-cloud-bigquery/samples/client_query_destination_table_cmek.py +++ b/packages/google-cloud-bigquery/samples/client_query_destination_table_cmek.py @@ -13,13 +13,13 @@ # limitations under the License. -def client_query_destination_table_cmek(client, table_id, kms_key_name): +def client_query_destination_table_cmek(table_id, kms_key_name): # [START bigquery_query_destination_table_cmek] from google.cloud import bigquery - # TODO(developer): Construct a BigQuery client object. - # client = bigquery.Client() + # Construct a BigQuery client object. + client = bigquery.Client() # TODO(developer): Set table_id to the ID of the destination table. # table_id = "your-project.your_dataset.your_table_name" diff --git a/packages/google-cloud-bigquery/samples/client_query_destination_table_legacy.py b/packages/google-cloud-bigquery/samples/client_query_destination_table_legacy.py index 8e977a92d996..c8fdd606f9f4 100644 --- a/packages/google-cloud-bigquery/samples/client_query_destination_table_legacy.py +++ b/packages/google-cloud-bigquery/samples/client_query_destination_table_legacy.py @@ -13,13 +13,13 @@ # limitations under the License. -def client_query_destination_table_legacy(client, table_id): +def client_query_destination_table_legacy(table_id): # [START bigquery_query_legacy_large_results] from google.cloud import bigquery - # TODO(developer): Construct a BigQuery client object. - # client = bigquery.Client() + # Construct a BigQuery client object. + client = bigquery.Client() # TODO(developer): Set table_id to the ID of the destination table. # table_id = "your-project.your_dataset.your_table_name" diff --git a/packages/google-cloud-bigquery/samples/client_query_dry_run.py b/packages/google-cloud-bigquery/samples/client_query_dry_run.py index 2d09a1c25f4a..1f7bd0c9c4e7 100644 --- a/packages/google-cloud-bigquery/samples/client_query_dry_run.py +++ b/packages/google-cloud-bigquery/samples/client_query_dry_run.py @@ -13,13 +13,13 @@ # limitations under the License. -def client_query_dry_run(client): +def client_query_dry_run(): # [START bigquery_query_dry_run] from google.cloud import bigquery - # TODO(developer): Construct a BigQuery client object. - # client = bigquery.Client() + # Construct a BigQuery client object. + client = bigquery.Client() job_config = bigquery.QueryJobConfig(dry_run=True, use_query_cache=False) diff --git a/packages/google-cloud-bigquery/samples/client_query_legacy_sql.py b/packages/google-cloud-bigquery/samples/client_query_legacy_sql.py index c8dae20649e2..3f94657795fb 100644 --- a/packages/google-cloud-bigquery/samples/client_query_legacy_sql.py +++ b/packages/google-cloud-bigquery/samples/client_query_legacy_sql.py @@ -13,13 +13,13 @@ # limitations under the License. -def client_query_legacy_sql(client): +def client_query_legacy_sql(): # [START bigquery_query_legacy] from google.cloud import bigquery - # TODO(developer): Construct a BigQuery client object. - # client = bigquery.Client() + # Construct a BigQuery client object. + client = bigquery.Client() query = ( "SELECT name FROM [bigquery-public-data:usa_names.usa_1910_2013] " diff --git a/packages/google-cloud-bigquery/samples/client_query_relax_column.py b/packages/google-cloud-bigquery/samples/client_query_relax_column.py index 8ec117e186fc..5e2ec8056a00 100644 --- a/packages/google-cloud-bigquery/samples/client_query_relax_column.py +++ b/packages/google-cloud-bigquery/samples/client_query_relax_column.py @@ -13,13 +13,13 @@ # limitations under the License. -def client_query_relax_column(client, table_id): +def client_query_relax_column(table_id): # [START bigquery_relax_column_query_append] from google.cloud import bigquery - # TODO(developer): Construct a BigQuery client object. - # client = bigquery.Client() + # Construct a BigQuery client object. + client = bigquery.Client() # TODO(developer): Set table_id to the ID of the destination table. # table_id = "your-project.your_dataset.your_table_name" diff --git a/packages/google-cloud-bigquery/samples/client_query_w_array_params.py b/packages/google-cloud-bigquery/samples/client_query_w_array_params.py index 254173d4c540..4077be2c7d3e 100644 --- a/packages/google-cloud-bigquery/samples/client_query_w_array_params.py +++ b/packages/google-cloud-bigquery/samples/client_query_w_array_params.py @@ -13,13 +13,13 @@ # limitations under the License. -def client_query_w_array_params(client): +def client_query_w_array_params(): # [START bigquery_query_params_arrays] from google.cloud import bigquery - # TODO(developer): Construct a BigQuery client object. - # client = bigquery.Client() + # Construct a BigQuery client object. + client = bigquery.Client() query = """ SELECT name, sum(number) as count diff --git a/packages/google-cloud-bigquery/samples/client_query_w_named_params.py b/packages/google-cloud-bigquery/samples/client_query_w_named_params.py index eba5bc221ff9..a0de8f63aa99 100644 --- a/packages/google-cloud-bigquery/samples/client_query_w_named_params.py +++ b/packages/google-cloud-bigquery/samples/client_query_w_named_params.py @@ -13,13 +13,13 @@ # limitations under the License. -def client_query_w_named_params(client): +def client_query_w_named_params(): # [START bigquery_query_params_named] from google.cloud import bigquery - # TODO(developer): Construct a BigQuery client object. - # client = bigquery.Client() + # Construct a BigQuery client object. + client = bigquery.Client() query = """ SELECT word, word_count diff --git a/packages/google-cloud-bigquery/samples/client_query_w_positional_params.py b/packages/google-cloud-bigquery/samples/client_query_w_positional_params.py index 3f7ce584bcf9..ee316044bda3 100644 --- a/packages/google-cloud-bigquery/samples/client_query_w_positional_params.py +++ b/packages/google-cloud-bigquery/samples/client_query_w_positional_params.py @@ -13,13 +13,13 @@ # limitations under the License. -def client_query_w_positional_params(client): +def client_query_w_positional_params(): # [START bigquery_query_params_positional] from google.cloud import bigquery - # TODO(developer): Construct a BigQuery client object. - # client = bigquery.Client() + # Construct a BigQuery client object. + client = bigquery.Client() query = """ SELECT word, word_count diff --git a/packages/google-cloud-bigquery/samples/client_query_w_struct_params.py b/packages/google-cloud-bigquery/samples/client_query_w_struct_params.py index 7c291447f0cb..041a3a0e3839 100644 --- a/packages/google-cloud-bigquery/samples/client_query_w_struct_params.py +++ b/packages/google-cloud-bigquery/samples/client_query_w_struct_params.py @@ -13,13 +13,13 @@ # limitations under the License. -def client_query_w_struct_params(client): +def client_query_w_struct_params(): # [START bigquery_query_params_structs] from google.cloud import bigquery - # TODO(developer): Construct a BigQuery client object. - # client = bigquery.Client() + # Construct a BigQuery client object. + client = bigquery.Client() query = "SELECT @struct_value AS s;" job_config = bigquery.QueryJobConfig( diff --git a/packages/google-cloud-bigquery/samples/client_query_w_timestamp_params.py b/packages/google-cloud-bigquery/samples/client_query_w_timestamp_params.py index cc334f7e9625..ca8eec0b5e89 100644 --- a/packages/google-cloud-bigquery/samples/client_query_w_timestamp_params.py +++ b/packages/google-cloud-bigquery/samples/client_query_w_timestamp_params.py @@ -13,7 +13,7 @@ # limitations under the License. -def client_query_w_timestamp_params(client): +def client_query_w_timestamp_params(): # [START bigquery_query_params_timestamps] import datetime @@ -21,8 +21,8 @@ def client_query_w_timestamp_params(client): import pytz from google.cloud import bigquery - # TODO(developer): Construct a BigQuery client object. - # client = bigquery.Client() + # Construct a BigQuery client object. + client = bigquery.Client() query = "SELECT TIMESTAMP_ADD(@ts_value, INTERVAL 1 HOUR);" job_config = bigquery.QueryJobConfig( diff --git a/packages/google-cloud-bigquery/samples/copy_table.py b/packages/google-cloud-bigquery/samples/copy_table.py index 20f6776cf87d..91c58e109cb9 100644 --- a/packages/google-cloud-bigquery/samples/copy_table.py +++ b/packages/google-cloud-bigquery/samples/copy_table.py @@ -13,14 +13,14 @@ # limitations under the License. -def copy_table(client, source_table_id, destination_table_id): +def copy_table(source_table_id, destination_table_id): # [START bigquery_copy_table] - # TODO(developer): Import the client library. - # from google.cloud import bigquery - # TODO(developer): Construct a BigQuery client object. - # client = bigquery.Client() + from google.cloud import bigquery + + # Construct a BigQuery client object. + client = bigquery.Client() # TODO(developer): Set source_table_id to the ID of the original table. # source_table_id = "your-project.source_dataset.source_table" diff --git a/packages/google-cloud-bigquery/samples/copy_table_cmek.py b/packages/google-cloud-bigquery/samples/copy_table_cmek.py index 1e9ee198c821..52ccb5f7b1df 100644 --- a/packages/google-cloud-bigquery/samples/copy_table_cmek.py +++ b/packages/google-cloud-bigquery/samples/copy_table_cmek.py @@ -13,13 +13,13 @@ # limitations under the License. -def copy_table_cmek(client, dest_table_id, orig_table_id, kms_key_name): +def copy_table_cmek(dest_table_id, orig_table_id, kms_key_name): # [START bigquery_copy_table_cmek] from google.cloud import bigquery - # TODO(developer): Construct a BigQuery client object. - # client = bigquery.Client() + # Construct a BigQuery client object. + client = bigquery.Client() # TODO(developer): Set dest_table_id to the ID of the destination table. # dest_table_id = "your-project.your_dataset.your_table_name" diff --git a/packages/google-cloud-bigquery/samples/copy_table_multiple_source.py b/packages/google-cloud-bigquery/samples/copy_table_multiple_source.py index 532ea0a0ab90..d86e380d0682 100644 --- a/packages/google-cloud-bigquery/samples/copy_table_multiple_source.py +++ b/packages/google-cloud-bigquery/samples/copy_table_multiple_source.py @@ -13,14 +13,14 @@ # limitations under the License. -def copy_table_multiple_source(client, dest_table_id, table_ids): +def copy_table_multiple_source(dest_table_id, table_ids): # [START bigquery_copy_table_multiple_source] - # TODO(developer): Import the client library. - # from google.cloud import bigquery - # TODO(developer): Construct a BigQuery client object. - # client = bigquery.Client() + from google.cloud import bigquery + + # Construct a BigQuery client object. + client = bigquery.Client() # TODO(developer): Set dest_table_id to the ID of the destination table. # dest_table_id = "your-project.your_dataset.your_table_name" diff --git a/packages/google-cloud-bigquery/samples/create_dataset.py b/packages/google-cloud-bigquery/samples/create_dataset.py index 3d64473a2321..e47d68a96b2a 100644 --- a/packages/google-cloud-bigquery/samples/create_dataset.py +++ b/packages/google-cloud-bigquery/samples/create_dataset.py @@ -13,13 +13,13 @@ # limitations under the License. -def create_dataset(client, dataset_id): +def create_dataset(dataset_id): # [START bigquery_create_dataset] from google.cloud import bigquery - # TODO(developer): Construct a BigQuery client object. - # client = bigquery.Client() + # Construct a BigQuery client object. + client = bigquery.Client() # TODO(developer): Set dataset_id to the ID of the dataset to create. # dataset_id = "{}.your_dataset".format(client.project) diff --git a/packages/google-cloud-bigquery/samples/create_job.py b/packages/google-cloud-bigquery/samples/create_job.py index 4f7f27a8e668..feed04ca00e8 100644 --- a/packages/google-cloud-bigquery/samples/create_job.py +++ b/packages/google-cloud-bigquery/samples/create_job.py @@ -13,13 +13,13 @@ # limitations under the License. -def create_job(client): +def create_job(): # [START bigquery_create_job] from google.cloud import bigquery - # TODO(developer): Construct a BigQuery client object. - # client = bigquery.Client() + # Construct a BigQuery client object. + client = bigquery.Client() query_job = client.query( "SELECT country_name from `bigquery-public-data.utility_us.country_code_iso`", diff --git a/packages/google-cloud-bigquery/samples/create_routine.py b/packages/google-cloud-bigquery/samples/create_routine.py index 424ee4ef5553..d9b221a4f62b 100644 --- a/packages/google-cloud-bigquery/samples/create_routine.py +++ b/packages/google-cloud-bigquery/samples/create_routine.py @@ -13,14 +13,14 @@ # limitations under the License. -def create_routine(client, routine_id): +def create_routine(routine_id): # [START bigquery_create_routine] from google.cloud import bigquery from google.cloud import bigquery_v2 - # TODO(developer): Construct a BigQuery client object. - # client = bigquery.Client() + # Construct a BigQuery client object. + client = bigquery.Client() # TODO(developer): Choose a fully-qualified ID for the routine. # routine_id = "my-project.my_dataset.my_routine" diff --git a/packages/google-cloud-bigquery/samples/create_routine_ddl.py b/packages/google-cloud-bigquery/samples/create_routine_ddl.py index eb5af0388503..c191bd385041 100644 --- a/packages/google-cloud-bigquery/samples/create_routine_ddl.py +++ b/packages/google-cloud-bigquery/samples/create_routine_ddl.py @@ -13,14 +13,14 @@ # limitations under the License. -def create_routine_ddl(client, routine_id): +def create_routine_ddl(routine_id): # [START bigquery_create_routine_ddl] - # TODO(developer): Import the client library. - # from google.cloud import bigquery - # TODO(developer): Construct a BigQuery client object. - # client = bigquery.Client() + from google.cloud import bigquery + + # Construct a BigQuery client object. + client = bigquery.Client() # TODO(developer): Choose a fully-qualified ID for the routine. # routine_id = "my-project.my_dataset.my_routine" diff --git a/packages/google-cloud-bigquery/samples/create_table.py b/packages/google-cloud-bigquery/samples/create_table.py index ae26c57fed00..d62e86681afc 100644 --- a/packages/google-cloud-bigquery/samples/create_table.py +++ b/packages/google-cloud-bigquery/samples/create_table.py @@ -13,13 +13,13 @@ # limitations under the License. -def create_table(client, table_id): +def create_table(table_id): # [START bigquery_create_table] from google.cloud import bigquery - # TODO(developer): Construct a BigQuery client object. - # client = bigquery.Client() + # Construct a BigQuery client object. + client = bigquery.Client() # TODO(developer): Set table_id to the ID of the table to create. # table_id = "your-project.your_dataset.your_table_name" diff --git a/packages/google-cloud-bigquery/samples/create_table_range_partitioned.py b/packages/google-cloud-bigquery/samples/create_table_range_partitioned.py index f9da09cff847..260041aa5d5d 100644 --- a/packages/google-cloud-bigquery/samples/create_table_range_partitioned.py +++ b/packages/google-cloud-bigquery/samples/create_table_range_partitioned.py @@ -13,13 +13,13 @@ # limitations under the License. -def create_table_range_partitioned(client, table_id): +def create_table_range_partitioned(table_id): # [START bigquery_create_table_range_partitioned] from google.cloud import bigquery - # TODO(developer): Construct a BigQuery client object. - # client = bigquery.Client() + # Construct a BigQuery client object. + client = bigquery.Client() # TODO(developer): Set table_id to the ID of the table to create. # table_id = "your-project.your_dataset.your_table_name" diff --git a/packages/google-cloud-bigquery/samples/dataset_exists.py b/packages/google-cloud-bigquery/samples/dataset_exists.py index b8b53b8a4580..b4db9353b37f 100644 --- a/packages/google-cloud-bigquery/samples/dataset_exists.py +++ b/packages/google-cloud-bigquery/samples/dataset_exists.py @@ -13,11 +13,14 @@ # limitations under the License. -def dataset_exists(client, dataset_id): +def dataset_exists(dataset_id): # [START bigquery_dataset_exists] + from google.cloud import bigquery from google.cloud.exceptions import NotFound + client = bigquery.Client() + # TODO(developer): Set dataset_id to the ID of the dataset to determine existence. # dataset_id = "your-project.your_dataset" diff --git a/packages/google-cloud-bigquery/samples/delete_dataset.py b/packages/google-cloud-bigquery/samples/delete_dataset.py index 8ce95d953392..e25740baaff0 100644 --- a/packages/google-cloud-bigquery/samples/delete_dataset.py +++ b/packages/google-cloud-bigquery/samples/delete_dataset.py @@ -13,14 +13,14 @@ # limitations under the License. -def delete_dataset(client, dataset_id): +def delete_dataset(dataset_id): # [START bigquery_delete_dataset] - # TODO(developer): Import the client library. - # from google.cloud import bigquery - # TODO(developer): Construct a BigQuery client object. - # client = bigquery.Client() + from google.cloud import bigquery + + # Construct a BigQuery client object. + client = bigquery.Client() # TODO(developer): Set model_id to the ID of the model to fetch. # dataset_id = 'your-project.your_dataset' diff --git a/packages/google-cloud-bigquery/samples/delete_dataset_labels.py b/packages/google-cloud-bigquery/samples/delete_dataset_labels.py index 9e6493694ddc..a52de2967e70 100644 --- a/packages/google-cloud-bigquery/samples/delete_dataset_labels.py +++ b/packages/google-cloud-bigquery/samples/delete_dataset_labels.py @@ -13,14 +13,14 @@ # limitations under the License. -def delete_dataset_labels(client, dataset_id): +def delete_dataset_labels(dataset_id): # [START bigquery_delete_label_dataset] - # TODO(developer): Import the client library. - # from google.cloud import bigquery - # TODO(developer): Construct a BigQuery client object. - # client = bigquery.Client() + from google.cloud import bigquery + + # Construct a BigQuery client object. + client = bigquery.Client() # TODO(developer): Set dataset_id to the ID of the dataset to fetch. # dataset_id = "your-project.your_dataset" diff --git a/packages/google-cloud-bigquery/samples/delete_model.py b/packages/google-cloud-bigquery/samples/delete_model.py index b6f32a59ebd9..0190315c6bed 100644 --- a/packages/google-cloud-bigquery/samples/delete_model.py +++ b/packages/google-cloud-bigquery/samples/delete_model.py @@ -13,15 +13,15 @@ # limitations under the License. -def delete_model(client, model_id): +def delete_model(model_id): """Sample ID: go/samples-tracker/1534""" # [START bigquery_delete_model] - # TODO(developer): Import the client library. - # from google.cloud import bigquery - # TODO(developer): Construct a BigQuery client object. - # client = bigquery.Client() + from google.cloud import bigquery + + # Construct a BigQuery client object. + client = bigquery.Client() # TODO(developer): Set model_id to the ID of the model to fetch. # model_id = 'your-project.your_dataset.your_model' diff --git a/packages/google-cloud-bigquery/samples/delete_routine.py b/packages/google-cloud-bigquery/samples/delete_routine.py index c20b49837b75..679cbee4bc94 100644 --- a/packages/google-cloud-bigquery/samples/delete_routine.py +++ b/packages/google-cloud-bigquery/samples/delete_routine.py @@ -13,14 +13,14 @@ # limitations under the License. -def delete_routine(client, routine_id): +def delete_routine(routine_id): # [START bigquery_delete_routine] - # TODO(developer): Import the client library. - # from google.cloud import bigquery - # TODO(developer): Construct a BigQuery client object. - # client = bigquery.Client() + from google.cloud import bigquery + + # Construct a BigQuery client object. + client = bigquery.Client() # TODO(developer): Set the fully-qualified ID for the routine. # routine_id = "my-project.my_dataset.my_routine" diff --git a/packages/google-cloud-bigquery/samples/delete_table.py b/packages/google-cloud-bigquery/samples/delete_table.py index b83a92890b09..3d0a6f0babc2 100644 --- a/packages/google-cloud-bigquery/samples/delete_table.py +++ b/packages/google-cloud-bigquery/samples/delete_table.py @@ -13,14 +13,14 @@ # limitations under the License. -def delete_table(client, table_id): +def delete_table(table_id): # [START bigquery_delete_table] - # TODO(developer): Import the client library. - # from google.cloud import bigquery - # TODO(developer): Construct a BigQuery client object. - # client = bigquery.Client() + from google.cloud import bigquery + + # Construct a BigQuery client object. + client = bigquery.Client() # TODO(developer): Set table_id to the ID of the table to fetch. # table_id = 'your-project.your_dataset.your_table' diff --git a/packages/google-cloud-bigquery/samples/download_public_data.py b/packages/google-cloud-bigquery/samples/download_public_data.py index 815d140fc6f1..d10ed161a5da 100644 --- a/packages/google-cloud-bigquery/samples/download_public_data.py +++ b/packages/google-cloud-bigquery/samples/download_public_data.py @@ -13,14 +13,14 @@ # limitations under the License. -def download_public_data(client): +def download_public_data(): # [START bigquery_pandas_public_data] - # TODO(developer): Import the client library. - # from google.cloud import bigquery - # TODO(developer): Construct a BigQuery client object. - # client = bigquery.Client() + from google.cloud import bigquery + + # Construct a BigQuery client object. + client = bigquery.Client() # TODO(developer): Set table_id to the fully-qualified table ID in standard # SQL format, including the project ID and dataset ID. diff --git a/packages/google-cloud-bigquery/samples/download_public_data_sandbox.py b/packages/google-cloud-bigquery/samples/download_public_data_sandbox.py index edb1466e4bd7..afb50b15c3a9 100644 --- a/packages/google-cloud-bigquery/samples/download_public_data_sandbox.py +++ b/packages/google-cloud-bigquery/samples/download_public_data_sandbox.py @@ -13,14 +13,14 @@ # limitations under the License. -def download_public_data_sandbox(client): +def download_public_data_sandbox(): # [START bigquery_pandas_public_data_sandbox] - # TODO(developer): Import the client library. - # from google.cloud import bigquery - # TODO(developer): Construct a BigQuery client object. - # client = bigquery.Client() + from google.cloud import bigquery + + # Construct a BigQuery client object. + client = bigquery.Client() # `SELECT *` is an anti-pattern in BigQuery because it is cheaper and # faster to use the BigQuery Storage API directly, but BigQuery Sandbox diff --git a/packages/google-cloud-bigquery/samples/get_dataset.py b/packages/google-cloud-bigquery/samples/get_dataset.py index bb3d4a0d4c40..54ba05781dd6 100644 --- a/packages/google-cloud-bigquery/samples/get_dataset.py +++ b/packages/google-cloud-bigquery/samples/get_dataset.py @@ -13,14 +13,14 @@ # limitations under the License. -def get_dataset(client, dataset_id): +def get_dataset(dataset_id): # [START bigquery_get_dataset] - # TODO(developer): Import the client library. - # from google.cloud import bigquery - # TODO(developer): Construct a BigQuery client object. - # client = bigquery.Client() + from google.cloud import bigquery + + # Construct a BigQuery client object. + client = bigquery.Client() # TODO(developer): Set dataset_id to the ID of the dataset to fetch. # dataset_id = 'your-project.your_dataset' diff --git a/packages/google-cloud-bigquery/samples/get_dataset_labels.py b/packages/google-cloud-bigquery/samples/get_dataset_labels.py index 411607f84664..18a9ca985f51 100644 --- a/packages/google-cloud-bigquery/samples/get_dataset_labels.py +++ b/packages/google-cloud-bigquery/samples/get_dataset_labels.py @@ -13,14 +13,14 @@ # limitations under the License. -def get_dataset_labels(client, dataset_id): +def get_dataset_labels(dataset_id): # [START bigquery_get_dataset_labels] - # TODO(developer): Import the client library. - # from google.cloud import bigquery - # TODO(developer): Construct a BigQuery client object. - # client = bigquery.Client() + from google.cloud import bigquery + + # Construct a BigQuery client object. + client = bigquery.Client() # TODO(developer): Set dataset_id to the ID of the dataset to fetch. # dataset_id = "your-project.your_dataset" diff --git a/packages/google-cloud-bigquery/samples/get_model.py b/packages/google-cloud-bigquery/samples/get_model.py index 0ebd59c9d067..1570ef816895 100644 --- a/packages/google-cloud-bigquery/samples/get_model.py +++ b/packages/google-cloud-bigquery/samples/get_model.py @@ -13,15 +13,15 @@ # limitations under the License. -def get_model(client, model_id): +def get_model(model_id): """Sample ID: go/samples-tracker/1510""" # [START bigquery_get_model] - # TODO(developer): Import the client library. - # from google.cloud import bigquery - # TODO(developer): Construct a BigQuery client object. - # client = bigquery.Client() + from google.cloud import bigquery + + # Construct a BigQuery client object. + client = bigquery.Client() # TODO(developer): Set model_id to the ID of the model to fetch. # model_id = 'your-project.your_dataset.your_model' diff --git a/packages/google-cloud-bigquery/samples/get_routine.py b/packages/google-cloud-bigquery/samples/get_routine.py index da4e89f57f19..72715ee1bcd7 100644 --- a/packages/google-cloud-bigquery/samples/get_routine.py +++ b/packages/google-cloud-bigquery/samples/get_routine.py @@ -13,14 +13,14 @@ # limitations under the License. -def get_routine(client, routine_id): +def get_routine(routine_id): # [START bigquery_get_routine] - # TODO(developer): Import the client library. - # from google.cloud import bigquery - # TODO(developer): Construct a BigQuery client object. - # client = bigquery.Client() + from google.cloud import bigquery + + # Construct a BigQuery client object. + client = bigquery.Client() # TODO(developer): Set the fully-qualified ID for the routine. # routine_id = "my-project.my_dataset.my_routine" diff --git a/packages/google-cloud-bigquery/samples/get_table.py b/packages/google-cloud-bigquery/samples/get_table.py index 201b8808a846..0d1d809ba791 100644 --- a/packages/google-cloud-bigquery/samples/get_table.py +++ b/packages/google-cloud-bigquery/samples/get_table.py @@ -13,14 +13,14 @@ # limitations under the License. -def get_table(client, table_id): +def get_table(table_id): # [START bigquery_get_table] - # TODO(developer): Import the client library. - # from google.cloud import bigquery - # TODO(developer): Construct a BigQuery client object. - # client = bigquery.Client() + from google.cloud import bigquery + + # Construct a BigQuery client object. + client = bigquery.Client() # TODO(developer): Set table_id to the ID of the model to fetch. # table_id = 'your-project.your_dataset.your_table' diff --git a/packages/google-cloud-bigquery/samples/label_dataset.py b/packages/google-cloud-bigquery/samples/label_dataset.py index 019b2aa374a0..bd4cd6721a57 100644 --- a/packages/google-cloud-bigquery/samples/label_dataset.py +++ b/packages/google-cloud-bigquery/samples/label_dataset.py @@ -13,14 +13,14 @@ # limitations under the License. -def label_dataset(client, dataset_id): +def label_dataset(dataset_id): # [START bigquery_label_dataset] - # TODO(developer): Import the client library. - # from google.cloud import bigquery - # TODO(developer): Construct a BigQuery client object. - # client = bigquery.Client() + from google.cloud import bigquery + + # Construct a BigQuery client object. + client = bigquery.Client() # TODO(developer): Set dataset_id to the ID of the dataset to fetch. # dataset_id = "your-project.your_dataset" diff --git a/packages/google-cloud-bigquery/samples/list_datasets.py b/packages/google-cloud-bigquery/samples/list_datasets.py index 77ae8c785d22..6a1b93d00bb6 100644 --- a/packages/google-cloud-bigquery/samples/list_datasets.py +++ b/packages/google-cloud-bigquery/samples/list_datasets.py @@ -13,14 +13,14 @@ # limitations under the License. -def list_datasets(client): +def list_datasets(): # [START bigquery_list_datasets] - # TODO(developer): Import the client library. - # from google.cloud import bigquery - # TODO(developer): Construct a BigQuery client object. - # client = bigquery.Client() + from google.cloud import bigquery + + # Construct a BigQuery client object. + client = bigquery.Client() datasets = list(client.list_datasets()) # Make an API request. project = client.project diff --git a/packages/google-cloud-bigquery/samples/list_datasets_by_label.py b/packages/google-cloud-bigquery/samples/list_datasets_by_label.py index 9fa939ad0c19..1b310049b4b3 100644 --- a/packages/google-cloud-bigquery/samples/list_datasets_by_label.py +++ b/packages/google-cloud-bigquery/samples/list_datasets_by_label.py @@ -13,14 +13,14 @@ # limitations under the License. -def list_datasets_by_label(client): +def list_datasets_by_label(): # [START bigquery_list_datasets_by_label] - # TODO(developer): Import the client library. - # from google.cloud import bigquery - # TODO(developer): Construct a BigQuery client object. - # client = bigquery.Client() + from google.cloud import bigquery + + # Construct a BigQuery client object. + client = bigquery.Client() label_filter = "labels.color:green" datasets = list(client.list_datasets(filter=label_filter)) # Make an API request. diff --git a/packages/google-cloud-bigquery/samples/list_models.py b/packages/google-cloud-bigquery/samples/list_models.py index a2477ffc795b..7251c001a770 100644 --- a/packages/google-cloud-bigquery/samples/list_models.py +++ b/packages/google-cloud-bigquery/samples/list_models.py @@ -13,15 +13,15 @@ # limitations under the License. -def list_models(client, dataset_id): +def list_models(dataset_id): """Sample ID: go/samples-tracker/1512""" # [START bigquery_list_models] - # TODO(developer): Import the client library. - # from google.cloud import bigquery - # TODO(developer): Construct a BigQuery client object. - # client = bigquery.Client() + from google.cloud import bigquery + + # Construct a BigQuery client object. + client = bigquery.Client() # TODO(developer): Set dataset_id to the ID of the dataset that contains # the models you are listing. diff --git a/packages/google-cloud-bigquery/samples/list_routines.py b/packages/google-cloud-bigquery/samples/list_routines.py index 5eaad0cec8f4..718d40d680aa 100644 --- a/packages/google-cloud-bigquery/samples/list_routines.py +++ b/packages/google-cloud-bigquery/samples/list_routines.py @@ -13,14 +13,14 @@ # limitations under the License. -def list_routines(client, dataset_id): +def list_routines(dataset_id): # [START bigquery_list_routines] - # TODO(developer): Import the client library. - # from google.cloud import bigquery - # TODO(developer): Construct a BigQuery client object. - # client = bigquery.Client() + from google.cloud import bigquery + + # Construct a BigQuery client object. + client = bigquery.Client() # TODO(developer): Set dataset_id to the ID of the dataset that contains # the routines you are listing. diff --git a/packages/google-cloud-bigquery/samples/list_tables.py b/packages/google-cloud-bigquery/samples/list_tables.py index d7576616e191..9ab527a4915f 100644 --- a/packages/google-cloud-bigquery/samples/list_tables.py +++ b/packages/google-cloud-bigquery/samples/list_tables.py @@ -13,14 +13,14 @@ # limitations under the License. -def list_tables(client, dataset_id): +def list_tables(dataset_id): # [START bigquery_list_tables] - # TODO(developer): Import the client library. - # from google.cloud import bigquery - # TODO(developer): Construct a BigQuery client object. - # client = bigquery.Client() + from google.cloud import bigquery + + # Construct a BigQuery client object. + client = bigquery.Client() # TODO(developer): Set dataset_id to the ID of the dataset that contains # the tables you are listing. diff --git a/packages/google-cloud-bigquery/samples/load_table_dataframe.py b/packages/google-cloud-bigquery/samples/load_table_dataframe.py index 8cfb34424457..feaa4550bac9 100644 --- a/packages/google-cloud-bigquery/samples/load_table_dataframe.py +++ b/packages/google-cloud-bigquery/samples/load_table_dataframe.py @@ -13,15 +13,15 @@ # limitations under the License. -def load_table_dataframe(client, table_id): +def load_table_dataframe(table_id): # [START bigquery_load_table_dataframe] from google.cloud import bigquery import pandas - # TODO(developer): Construct a BigQuery client object. - # client = bigquery.Client() + # Construct a BigQuery client object. + client = bigquery.Client() # TODO(developer): Set table_id to the ID of the table to create. # table_id = "your-project.your_dataset.your_table_name" diff --git a/packages/google-cloud-bigquery/samples/query_external_gcs_temporary_table.py b/packages/google-cloud-bigquery/samples/query_external_gcs_temporary_table.py index 3ef44bd32db1..3c3caf695870 100644 --- a/packages/google-cloud-bigquery/samples/query_external_gcs_temporary_table.py +++ b/packages/google-cloud-bigquery/samples/query_external_gcs_temporary_table.py @@ -13,13 +13,13 @@ # limitations under the License. -def query_external_gcs_temporary_table(client): +def query_external_gcs_temporary_table(): # [START bigquery_query_external_gcs_temp] from google.cloud import bigquery - # TODO(developer): Construct a BigQuery client object. - # client = bigquery.Client() + # Construct a BigQuery client object. + client = bigquery.Client() # Configure the external data source and query job. external_config = bigquery.ExternalConfig("CSV") diff --git a/packages/google-cloud-bigquery/samples/query_external_sheets_permanent_table.py b/packages/google-cloud-bigquery/samples/query_external_sheets_permanent_table.py index e9bc908f5b15..915e9acc303a 100644 --- a/packages/google-cloud-bigquery/samples/query_external_sheets_permanent_table.py +++ b/packages/google-cloud-bigquery/samples/query_external_sheets_permanent_table.py @@ -28,7 +28,7 @@ def query_external_sheets_permanent_table(dataset_id): ] ) - # TODO(developer): Construct a BigQuery client object. + # Construct a BigQuery client object. client = bigquery.Client(credentials=credentials, project=project) # TODO(developer): Set dataset_id to the ID of the dataset to fetch. diff --git a/packages/google-cloud-bigquery/samples/query_external_sheets_temporary_table.py b/packages/google-cloud-bigquery/samples/query_external_sheets_temporary_table.py index 7b6bde864b09..1b70e9531f96 100644 --- a/packages/google-cloud-bigquery/samples/query_external_sheets_temporary_table.py +++ b/packages/google-cloud-bigquery/samples/query_external_sheets_temporary_table.py @@ -29,7 +29,7 @@ def query_external_sheets_temporary_table(): ] ) - # TODO(developer): Construct a BigQuery client object. + # Construct a BigQuery client object. client = bigquery.Client(credentials=credentials, project=project) # [END bigquery_auth_drive_scope] diff --git a/packages/google-cloud-bigquery/samples/query_no_cache.py b/packages/google-cloud-bigquery/samples/query_no_cache.py index 3d542a96b7be..e380f0b15de0 100644 --- a/packages/google-cloud-bigquery/samples/query_no_cache.py +++ b/packages/google-cloud-bigquery/samples/query_no_cache.py @@ -13,13 +13,13 @@ # limitations under the License. -def query_no_cache(client): +def query_no_cache(): # [START bigquery_query_no_cache] from google.cloud import bigquery - # TODO(developer): Construct a BigQuery client object. - # client = bigquery.Client() + # Construct a BigQuery client object. + client = bigquery.Client() job_config = bigquery.QueryJobConfig(use_query_cache=False) sql = """ diff --git a/packages/google-cloud-bigquery/samples/query_pagination.py b/packages/google-cloud-bigquery/samples/query_pagination.py index b5d1999bfb3c..57a4212cf664 100644 --- a/packages/google-cloud-bigquery/samples/query_pagination.py +++ b/packages/google-cloud-bigquery/samples/query_pagination.py @@ -13,14 +13,14 @@ # limitations under the License. -def query_pagination(client): +def query_pagination(): # [START bigquery_query_pagination] - # TODO(developer): Import the client library. - # from google.cloud import bigquery - # TODO(developer): Construct a BigQuery client object. - # client = bigquery.Client() + from google.cloud import bigquery + + # Construct a BigQuery client object. + client = bigquery.Client() query = """ SELECT name, SUM(number) as total_people diff --git a/packages/google-cloud-bigquery/samples/query_script.py b/packages/google-cloud-bigquery/samples/query_script.py index 453b7c6f9435..9390d352dd40 100644 --- a/packages/google-cloud-bigquery/samples/query_script.py +++ b/packages/google-cloud-bigquery/samples/query_script.py @@ -13,13 +13,13 @@ # limitations under the License. -def query_script(client): +def query_script(): # [START bigquery_query_script] - # TODO(developer): Import the client library. - # from google.cloud import bigquery - # TODO(developer): Construct a BigQuery client object. - # client = bigquery.Client() + from google.cloud import bigquery + + # Construct a BigQuery client object. + client = bigquery.Client() # Run a SQL script. sql_script = """ diff --git a/packages/google-cloud-bigquery/samples/query_to_arrow.py b/packages/google-cloud-bigquery/samples/query_to_arrow.py index 4cc69d4e902a..4a57992d13c2 100644 --- a/packages/google-cloud-bigquery/samples/query_to_arrow.py +++ b/packages/google-cloud-bigquery/samples/query_to_arrow.py @@ -13,14 +13,14 @@ # limitations under the License. -def query_to_arrow(client): +def query_to_arrow(): # [START bigquery_query_to_arrow] - # TODO(developer): Import the client library. - # from google.cloud import bigquery - # TODO(developer): Construct a BigQuery client object. - # client = bigquery.Client() + from google.cloud import bigquery + + # Construct a BigQuery client object. + client = bigquery.Client() sql = """ WITH races AS ( diff --git a/packages/google-cloud-bigquery/samples/table_exists.py b/packages/google-cloud-bigquery/samples/table_exists.py index a011e6e2915d..152d95534add 100644 --- a/packages/google-cloud-bigquery/samples/table_exists.py +++ b/packages/google-cloud-bigquery/samples/table_exists.py @@ -13,11 +13,14 @@ # limitations under the License. -def table_exists(client, table_id): +def table_exists(table_id): # [START bigquery_table_exists] + from google.cloud import bigquery from google.cloud.exceptions import NotFound + client = bigquery.Client() + # TODO(developer): Set table_id to the ID of the table to determine existence. # table_id = "your-project.your_dataset.your_table" diff --git a/packages/google-cloud-bigquery/samples/table_insert_rows.py b/packages/google-cloud-bigquery/samples/table_insert_rows.py index e2f949b635a6..130f9dbbddf2 100644 --- a/packages/google-cloud-bigquery/samples/table_insert_rows.py +++ b/packages/google-cloud-bigquery/samples/table_insert_rows.py @@ -13,14 +13,14 @@ # limitations under the License. -def table_insert_rows(client, table_id): +def table_insert_rows(table_id): # [START bigquery_table_insert_rows] - # TODO(developer): Import the client library. - # from google.cloud import bigquery - # TODO(developer): Construct a BigQuery client object. - # client = bigquery.Client() + from google.cloud import bigquery + + # Construct a BigQuery client object. + client = bigquery.Client() # TODO(developer): Set table_id to the ID of the model to fetch. # table_id = "your-project.your_dataset.your_table" diff --git a/packages/google-cloud-bigquery/samples/table_insert_rows_explicit_none_insert_ids.py b/packages/google-cloud-bigquery/samples/table_insert_rows_explicit_none_insert_ids.py index 953e7e210312..2410ba1765fc 100644 --- a/packages/google-cloud-bigquery/samples/table_insert_rows_explicit_none_insert_ids.py +++ b/packages/google-cloud-bigquery/samples/table_insert_rows_explicit_none_insert_ids.py @@ -13,14 +13,14 @@ # limitations under the License. -def table_insert_rows_explicit_none_insert_ids(client, table_id): +def table_insert_rows_explicit_none_insert_ids(table_id): # [START bigquery_table_insert_rows_explicit_none_insert_ids] - # TODO(developer): Import the client library. - # from google.cloud import bigquery - # TODO(developer): Construct a BigQuery client object. - # client = bigquery.Client() + from google.cloud import bigquery + + # Construct a BigQuery client object. + client = bigquery.Client() # TODO(developer): Set table_id to the ID of the model to fetch. # table_id = "your-project.your_dataset.your_table" diff --git a/packages/google-cloud-bigquery/samples/tests/conftest.py b/packages/google-cloud-bigquery/samples/tests/conftest.py index 6d049e6c2312..d80085dd3425 100644 --- a/packages/google-cloud-bigquery/samples/tests/conftest.py +++ b/packages/google-cloud-bigquery/samples/tests/conftest.py @@ -15,19 +15,31 @@ import datetime import uuid +import google.auth +import mock import pytest from google.cloud import bigquery from google.cloud import bigquery_v2 -@pytest.fixture(scope="module") +@pytest.fixture(scope="session", autouse=True) def client(): - return bigquery.Client() + credentials, project = google.auth.default( + scopes=[ + "https://www.googleapis.com/auth/drive", + "https://www.googleapis.com/auth/bigquery", + ] + ) + real_client = bigquery.Client(credentials=credentials, project=project) + mock_client = mock.create_autospec(bigquery.Client) + mock_client.return_value = real_client + bigquery.Client = mock_client + return real_client @pytest.fixture -def random_table_id(client, dataset_id): +def random_table_id(dataset_id): now = datetime.datetime.now() random_table_id = "example_table_{}_{}".format( now.strftime("%Y%m%d%H%M%S"), uuid.uuid4().hex[:8] @@ -46,7 +58,7 @@ def random_dataset_id(client): @pytest.fixture -def random_routine_id(client, dataset_id): +def random_routine_id(dataset_id): now = datetime.datetime.now() random_routine_id = "example_routine_{}_{}".format( now.strftime("%Y%m%d%H%M%S"), uuid.uuid4().hex[:8] @@ -95,7 +107,7 @@ def table_with_schema_id(client, dataset_id): @pytest.fixture -def table_with_data_id(client): +def table_with_data_id(): return "bigquery-public-data.samples.shakespeare" diff --git a/packages/google-cloud-bigquery/samples/tests/test_add_empty_column.py b/packages/google-cloud-bigquery/samples/tests/test_add_empty_column.py index de51bfed7672..d89fcb6b7022 100644 --- a/packages/google-cloud-bigquery/samples/tests/test_add_empty_column.py +++ b/packages/google-cloud-bigquery/samples/tests/test_add_empty_column.py @@ -15,8 +15,8 @@ from .. import add_empty_column -def test_add_empty_column(capsys, client, table_id): +def test_add_empty_column(capsys, table_id): - add_empty_column.add_empty_column(client, table_id) + add_empty_column.add_empty_column(table_id) out, err = capsys.readouterr() assert "A new column has been added." in out diff --git a/packages/google-cloud-bigquery/samples/tests/test_browse_table_data.py b/packages/google-cloud-bigquery/samples/tests/test_browse_table_data.py index db9b867f5ab7..a5f647bdbda2 100644 --- a/packages/google-cloud-bigquery/samples/tests/test_browse_table_data.py +++ b/packages/google-cloud-bigquery/samples/tests/test_browse_table_data.py @@ -15,9 +15,9 @@ from .. import browse_table_data -def test_browse_table_data(capsys, client, table_with_data_id): +def test_browse_table_data(capsys, table_with_data_id): - browse_table_data.browse_table_data(client, table_with_data_id) + browse_table_data.browse_table_data(table_with_data_id) out, err = capsys.readouterr() assert "Downloaded 164656 rows from table {}".format(table_with_data_id) in out assert "Downloaded 10 rows from table {}".format(table_with_data_id) in out diff --git a/packages/google-cloud-bigquery/samples/tests/test_client_list_jobs.py b/packages/google-cloud-bigquery/samples/tests/test_client_list_jobs.py index ada053239802..896950a8253f 100644 --- a/packages/google-cloud-bigquery/samples/tests/test_client_list_jobs.py +++ b/packages/google-cloud-bigquery/samples/tests/test_client_list_jobs.py @@ -18,10 +18,10 @@ def test_client_list_jobs(capsys, client): - job = create_job.create_job(client) + job = create_job.create_job() client.cancel_job(job.job_id) job.cancel() - client_list_jobs.client_list_jobs(client) + client_list_jobs.client_list_jobs() out, err = capsys.readouterr() assert "Started job: {}".format(job.job_id) in out assert "Last 10 jobs:" in out diff --git a/packages/google-cloud-bigquery/samples/tests/test_client_load_partitioned_table.py b/packages/google-cloud-bigquery/samples/tests/test_client_load_partitioned_table.py index 4e4c8811181f..f1d72a8587c6 100644 --- a/packages/google-cloud-bigquery/samples/tests/test_client_load_partitioned_table.py +++ b/packages/google-cloud-bigquery/samples/tests/test_client_load_partitioned_table.py @@ -15,8 +15,8 @@ from .. import client_load_partitioned_table -def test_client_load_partitioned_table(capsys, client, random_table_id): +def test_client_load_partitioned_table(capsys, random_table_id): - client_load_partitioned_table.client_load_partitioned_table(client, random_table_id) + client_load_partitioned_table.client_load_partitioned_table(random_table_id) out, err = capsys.readouterr() assert "Loaded 50 rows to table {}".format(random_table_id) in out diff --git a/packages/google-cloud-bigquery/samples/tests/test_client_query.py b/packages/google-cloud-bigquery/samples/tests/test_client_query.py index e73e7e5a0eb4..810c46a17943 100644 --- a/packages/google-cloud-bigquery/samples/tests/test_client_query.py +++ b/packages/google-cloud-bigquery/samples/tests/test_client_query.py @@ -15,9 +15,9 @@ from .. import client_query -def test_client_query(capsys, client): +def test_client_query(capsys,): - client_query.client_query(client) + client_query.client_query() out, err = capsys.readouterr() assert "The query data:" in out assert "name=James, count=272793" in out diff --git a/packages/google-cloud-bigquery/samples/tests/test_client_query_add_column.py b/packages/google-cloud-bigquery/samples/tests/test_client_query_add_column.py index 67ac328d5518..254533f78778 100644 --- a/packages/google-cloud-bigquery/samples/tests/test_client_query_add_column.py +++ b/packages/google-cloud-bigquery/samples/tests/test_client_query_add_column.py @@ -17,7 +17,7 @@ from .. import client_query_add_column -def test_client_query_add_column(capsys, client, random_table_id): +def test_client_query_add_column(capsys, random_table_id, client): schema = [ bigquery.SchemaField("full_name", "STRING", mode="REQUIRED"), @@ -26,7 +26,7 @@ def test_client_query_add_column(capsys, client, random_table_id): client.create_table(bigquery.Table(random_table_id, schema=schema)) - client_query_add_column.client_query_add_column(client, random_table_id) + client_query_add_column.client_query_add_column(random_table_id) out, err = capsys.readouterr() assert "Table {} contains 2 columns".format(random_table_id) in out assert "Table {} now contains 3 columns".format(random_table_id) in out diff --git a/packages/google-cloud-bigquery/samples/tests/test_client_query_batch.py b/packages/google-cloud-bigquery/samples/tests/test_client_query_batch.py index 79197e4565c7..c5e19985dda9 100644 --- a/packages/google-cloud-bigquery/samples/tests/test_client_query_batch.py +++ b/packages/google-cloud-bigquery/samples/tests/test_client_query_batch.py @@ -15,8 +15,8 @@ from .. import client_query_batch -def test_client_query_batch(capsys, client): +def test_client_query_batch(capsys,): - job = client_query_batch.client_query_batch(client) + job = client_query_batch.client_query_batch() out, err = capsys.readouterr() assert "Job {} is currently in state {}".format(job.job_id, job.state) in out diff --git a/packages/google-cloud-bigquery/samples/tests/test_client_query_destination_table.py b/packages/google-cloud-bigquery/samples/tests/test_client_query_destination_table.py index d29aaebd3ce5..6bcdd498a215 100644 --- a/packages/google-cloud-bigquery/samples/tests/test_client_query_destination_table.py +++ b/packages/google-cloud-bigquery/samples/tests/test_client_query_destination_table.py @@ -15,8 +15,8 @@ from .. import client_query_destination_table -def test_client_query_destination_table(capsys, client, table_id): +def test_client_query_destination_table(capsys, table_id): - client_query_destination_table.client_query_destination_table(client, table_id) + client_query_destination_table.client_query_destination_table(table_id) out, err = capsys.readouterr() assert "Query results loaded to the table {}".format(table_id) in out diff --git a/packages/google-cloud-bigquery/samples/tests/test_client_query_destination_table_cmek.py b/packages/google-cloud-bigquery/samples/tests/test_client_query_destination_table_cmek.py index cd4532be6d1d..4f9e3bc9a944 100644 --- a/packages/google-cloud-bigquery/samples/tests/test_client_query_destination_table_cmek.py +++ b/packages/google-cloud-bigquery/samples/tests/test_client_query_destination_table_cmek.py @@ -15,12 +15,10 @@ from .. import client_query_destination_table_cmek -def test_client_query_destination_table_cmek( - capsys, client, random_table_id, kms_key_name -): +def test_client_query_destination_table_cmek(capsys, random_table_id, kms_key_name): client_query_destination_table_cmek.client_query_destination_table_cmek( - client, random_table_id, kms_key_name + random_table_id, kms_key_name ) out, err = capsys.readouterr() assert "The destination table is written using the encryption configuration" in out diff --git a/packages/google-cloud-bigquery/samples/tests/test_client_query_destination_table_legacy.py b/packages/google-cloud-bigquery/samples/tests/test_client_query_destination_table_legacy.py index da62baada213..46077497b1c7 100644 --- a/packages/google-cloud-bigquery/samples/tests/test_client_query_destination_table_legacy.py +++ b/packages/google-cloud-bigquery/samples/tests/test_client_query_destination_table_legacy.py @@ -15,10 +15,10 @@ from .. import client_query_destination_table_legacy -def test_client_query_destination_table_legacy(capsys, client, random_table_id): +def test_client_query_destination_table_legacy(capsys, random_table_id): client_query_destination_table_legacy.client_query_destination_table_legacy( - client, random_table_id + random_table_id ) out, err = capsys.readouterr() assert "Query results loaded to the table {}".format(random_table_id) in out diff --git a/packages/google-cloud-bigquery/samples/tests/test_client_query_dry_run.py b/packages/google-cloud-bigquery/samples/tests/test_client_query_dry_run.py index c39a22767d25..5cbf2e3fae6e 100644 --- a/packages/google-cloud-bigquery/samples/tests/test_client_query_dry_run.py +++ b/packages/google-cloud-bigquery/samples/tests/test_client_query_dry_run.py @@ -15,9 +15,9 @@ from .. import client_query_dry_run -def test_client_query_dry_run(capsys, client): +def test_client_query_dry_run(capsys,): - query_job = client_query_dry_run.client_query_dry_run(client) + query_job = client_query_dry_run.client_query_dry_run() out, err = capsys.readouterr() assert "This query will process" in out assert query_job.state == "DONE" diff --git a/packages/google-cloud-bigquery/samples/tests/test_client_query_legacy_sql.py b/packages/google-cloud-bigquery/samples/tests/test_client_query_legacy_sql.py index fb6ee60bc6ec..ab240fad1a1e 100644 --- a/packages/google-cloud-bigquery/samples/tests/test_client_query_legacy_sql.py +++ b/packages/google-cloud-bigquery/samples/tests/test_client_query_legacy_sql.py @@ -17,8 +17,8 @@ from .. import client_query_legacy_sql -def test_client_query_legacy_sql(capsys, client): +def test_client_query_legacy_sql(capsys,): - client_query_legacy_sql.client_query_legacy_sql(client) + client_query_legacy_sql.client_query_legacy_sql() out, err = capsys.readouterr() assert re.search(r"(Row[\w(){}:', ]+)$", out) diff --git a/packages/google-cloud-bigquery/samples/tests/test_client_query_relax_column.py b/packages/google-cloud-bigquery/samples/tests/test_client_query_relax_column.py index 685db9cb1fa0..0c5b7aa6f982 100644 --- a/packages/google-cloud-bigquery/samples/tests/test_client_query_relax_column.py +++ b/packages/google-cloud-bigquery/samples/tests/test_client_query_relax_column.py @@ -17,7 +17,7 @@ from .. import client_query_relax_column -def test_client_query_relax_column(capsys, client, random_table_id): +def test_client_query_relax_column(capsys, random_table_id, client): schema = [ bigquery.SchemaField("full_name", "STRING", mode="REQUIRED"), @@ -26,7 +26,7 @@ def test_client_query_relax_column(capsys, client, random_table_id): client.create_table(bigquery.Table(random_table_id, schema=schema)) - client_query_relax_column.client_query_relax_column(client, random_table_id) + client_query_relax_column.client_query_relax_column(random_table_id) out, err = capsys.readouterr() assert "2 fields in the schema are required." in out assert "0 fields in the schema are now required." in out diff --git a/packages/google-cloud-bigquery/samples/tests/test_client_query_w_array_params.py b/packages/google-cloud-bigquery/samples/tests/test_client_query_w_array_params.py index 8603e9b8fe3d..07e0294e93d1 100644 --- a/packages/google-cloud-bigquery/samples/tests/test_client_query_w_array_params.py +++ b/packages/google-cloud-bigquery/samples/tests/test_client_query_w_array_params.py @@ -15,8 +15,8 @@ from .. import client_query_w_array_params -def test_client_query_w_array_params(capsys, client): +def test_client_query_w_array_params(capsys,): - client_query_w_array_params.client_query_w_array_params(client) + client_query_w_array_params.client_query_w_array_params() out, err = capsys.readouterr() assert "James" in out diff --git a/packages/google-cloud-bigquery/samples/tests/test_client_query_w_named_params.py b/packages/google-cloud-bigquery/samples/tests/test_client_query_w_named_params.py index ae4a2fc27db3..2970dfdc47bd 100644 --- a/packages/google-cloud-bigquery/samples/tests/test_client_query_w_named_params.py +++ b/packages/google-cloud-bigquery/samples/tests/test_client_query_w_named_params.py @@ -15,8 +15,8 @@ from .. import client_query_w_named_params -def test_client_query_w_named_params(capsys, client): +def test_client_query_w_named_params(capsys,): - client_query_w_named_params.client_query_w_named_params(client) + client_query_w_named_params.client_query_w_named_params() out, err = capsys.readouterr() assert "the" in out diff --git a/packages/google-cloud-bigquery/samples/tests/test_client_query_w_positional_params.py b/packages/google-cloud-bigquery/samples/tests/test_client_query_w_positional_params.py index 37c15b67b120..e41ffa825584 100644 --- a/packages/google-cloud-bigquery/samples/tests/test_client_query_w_positional_params.py +++ b/packages/google-cloud-bigquery/samples/tests/test_client_query_w_positional_params.py @@ -15,8 +15,8 @@ from .. import client_query_w_positional_params -def test_client_query_w_positional_params(capsys, client): +def test_client_query_w_positional_params(capsys,): - client_query_w_positional_params.client_query_w_positional_params(client) + client_query_w_positional_params.client_query_w_positional_params() out, err = capsys.readouterr() assert "the" in out diff --git a/packages/google-cloud-bigquery/samples/tests/test_client_query_w_struct_params.py b/packages/google-cloud-bigquery/samples/tests/test_client_query_w_struct_params.py index 9d0c4282946b..03083a3a72c7 100644 --- a/packages/google-cloud-bigquery/samples/tests/test_client_query_w_struct_params.py +++ b/packages/google-cloud-bigquery/samples/tests/test_client_query_w_struct_params.py @@ -15,9 +15,9 @@ from .. import client_query_w_struct_params -def test_client_query_w_struct_params(capsys, client): +def test_client_query_w_struct_params(capsys,): - client_query_w_struct_params.client_query_w_struct_params(client) + client_query_w_struct_params.client_query_w_struct_params() out, err = capsys.readouterr() assert "1" in out assert "foo" in out diff --git a/packages/google-cloud-bigquery/samples/tests/test_client_query_w_timestamp_params.py b/packages/google-cloud-bigquery/samples/tests/test_client_query_w_timestamp_params.py index 45f7b7518454..9dddcb9a0e5d 100644 --- a/packages/google-cloud-bigquery/samples/tests/test_client_query_w_timestamp_params.py +++ b/packages/google-cloud-bigquery/samples/tests/test_client_query_w_timestamp_params.py @@ -15,8 +15,8 @@ from .. import client_query_w_timestamp_params -def test_client_query_w_timestamp_params(capsys, client): +def test_client_query_w_timestamp_params(capsys,): - client_query_w_timestamp_params.client_query_w_timestamp_params(client) + client_query_w_timestamp_params.client_query_w_timestamp_params() out, err = capsys.readouterr() assert "2016, 12, 7, 9, 0" in out diff --git a/packages/google-cloud-bigquery/samples/tests/test_copy_table.py b/packages/google-cloud-bigquery/samples/tests/test_copy_table.py index 0138cd8ee1e2..0b95c5443777 100644 --- a/packages/google-cloud-bigquery/samples/tests/test_copy_table.py +++ b/packages/google-cloud-bigquery/samples/tests/test_copy_table.py @@ -15,9 +15,9 @@ from .. import copy_table -def test_copy_table(capsys, client, table_with_data_id, random_table_id): +def test_copy_table(capsys, table_with_data_id, random_table_id, client): - copy_table.copy_table(client, table_with_data_id, random_table_id) + copy_table.copy_table(table_with_data_id, random_table_id) out, err = capsys.readouterr() assert "A copy of the table created." in out assert ( diff --git a/packages/google-cloud-bigquery/samples/tests/test_copy_table_cmek.py b/packages/google-cloud-bigquery/samples/tests/test_copy_table_cmek.py index 25238071b947..ac04675c989d 100644 --- a/packages/google-cloud-bigquery/samples/tests/test_copy_table_cmek.py +++ b/packages/google-cloud-bigquery/samples/tests/test_copy_table_cmek.py @@ -15,12 +15,8 @@ from .. import copy_table_cmek -def test_copy_table_cmek( - capsys, client, random_table_id, table_with_data_id, kms_key_name -): +def test_copy_table_cmek(capsys, random_table_id, table_with_data_id, kms_key_name): - copy_table_cmek.copy_table_cmek( - client, random_table_id, table_with_data_id, kms_key_name - ) + copy_table_cmek.copy_table_cmek(random_table_id, table_with_data_id, kms_key_name) out, err = capsys.readouterr() assert "A copy of the table created" in out diff --git a/packages/google-cloud-bigquery/samples/tests/test_copy_table_multiple_source.py b/packages/google-cloud-bigquery/samples/tests/test_copy_table_multiple_source.py index 16c1de89627c..45c6d34f5a41 100644 --- a/packages/google-cloud-bigquery/samples/tests/test_copy_table_multiple_source.py +++ b/packages/google-cloud-bigquery/samples/tests/test_copy_table_multiple_source.py @@ -18,7 +18,7 @@ from .. import copy_table_multiple_source -def test_copy_table_multiple_source(capsys, client, random_table_id, random_dataset_id): +def test_copy_table_multiple_source(capsys, random_table_id, random_dataset_id, client): dataset = bigquery.Dataset(random_dataset_id) dataset.location = "US" @@ -42,9 +42,7 @@ def test_copy_table_multiple_source(capsys, client, random_table_id, random_data "{}.table2".format(random_dataset_id), ] - copy_table_multiple_source.copy_table_multiple_source( - client, random_table_id, table_ids - ) + copy_table_multiple_source.copy_table_multiple_source(random_table_id, table_ids) dest_table = client.get_table(random_table_id) out, err = capsys.readouterr() assert ( diff --git a/packages/google-cloud-bigquery/samples/tests/test_create_dataset.py b/packages/google-cloud-bigquery/samples/tests/test_create_dataset.py index dfadc67d8468..a000038030e1 100644 --- a/packages/google-cloud-bigquery/samples/tests/test_create_dataset.py +++ b/packages/google-cloud-bigquery/samples/tests/test_create_dataset.py @@ -15,8 +15,8 @@ from .. import create_dataset -def test_create_dataset(capsys, client, random_dataset_id): +def test_create_dataset(capsys, random_dataset_id): - create_dataset.create_dataset(client, random_dataset_id) + create_dataset.create_dataset(random_dataset_id) out, err = capsys.readouterr() assert "Created dataset {}".format(random_dataset_id) in out diff --git a/packages/google-cloud-bigquery/samples/tests/test_create_job.py b/packages/google-cloud-bigquery/samples/tests/test_create_job.py index bbf880cbe402..eab4b3e485f9 100644 --- a/packages/google-cloud-bigquery/samples/tests/test_create_job.py +++ b/packages/google-cloud-bigquery/samples/tests/test_create_job.py @@ -16,8 +16,7 @@ def test_create_job(capsys, client): - - query_job = create_job.create_job(client) + query_job = create_job.create_job() client.cancel_job(query_job.job_id, location=query_job.location) out, err = capsys.readouterr() assert "Started job: {}".format(query_job.job_id) in out diff --git a/packages/google-cloud-bigquery/samples/tests/test_create_table.py b/packages/google-cloud-bigquery/samples/tests/test_create_table.py index 093ee6e94277..48e52889acce 100644 --- a/packages/google-cloud-bigquery/samples/tests/test_create_table.py +++ b/packages/google-cloud-bigquery/samples/tests/test_create_table.py @@ -15,7 +15,7 @@ from .. import create_table -def test_create_table(capsys, client, random_table_id): - create_table.create_table(client, random_table_id) +def test_create_table(capsys, random_table_id): + create_table.create_table(random_table_id) out, err = capsys.readouterr() assert "Created table {}".format(random_table_id) in out diff --git a/packages/google-cloud-bigquery/samples/tests/test_create_table_range_partitioned.py b/packages/google-cloud-bigquery/samples/tests/test_create_table_range_partitioned.py index ac312b033832..9745966bf02b 100644 --- a/packages/google-cloud-bigquery/samples/tests/test_create_table_range_partitioned.py +++ b/packages/google-cloud-bigquery/samples/tests/test_create_table_range_partitioned.py @@ -15,9 +15,9 @@ from .. import create_table_range_partitioned -def test_create_table_range_partitioned(capsys, client, random_table_id): +def test_create_table_range_partitioned(capsys, random_table_id): table = create_table_range_partitioned.create_table_range_partitioned( - client, random_table_id + random_table_id ) out, _ = capsys.readouterr() assert "Created table {}".format(random_table_id) in out diff --git a/packages/google-cloud-bigquery/samples/tests/test_dataset_exists.py b/packages/google-cloud-bigquery/samples/tests/test_dataset_exists.py index a44e60371120..6bc38b4d27ce 100644 --- a/packages/google-cloud-bigquery/samples/tests/test_dataset_exists.py +++ b/packages/google-cloud-bigquery/samples/tests/test_dataset_exists.py @@ -17,13 +17,13 @@ from .. import dataset_exists -def test_dataset_exists(capsys, client, random_dataset_id): +def test_dataset_exists(capsys, random_dataset_id, client): - dataset_exists.dataset_exists(client, random_dataset_id) + dataset_exists.dataset_exists(random_dataset_id) out, err = capsys.readouterr() assert "Dataset {} is not found".format(random_dataset_id) in out dataset = bigquery.Dataset(random_dataset_id) dataset = client.create_dataset(dataset) - dataset_exists.dataset_exists(client, random_dataset_id) + dataset_exists.dataset_exists(random_dataset_id) out, err = capsys.readouterr() assert "Dataset {} already exists".format(random_dataset_id) in out diff --git a/packages/google-cloud-bigquery/samples/tests/test_dataset_label_samples.py b/packages/google-cloud-bigquery/samples/tests/test_dataset_label_samples.py index 94a2092407b0..0dbb2a76bdd9 100644 --- a/packages/google-cloud-bigquery/samples/tests/test_dataset_label_samples.py +++ b/packages/google-cloud-bigquery/samples/tests/test_dataset_label_samples.py @@ -17,17 +17,17 @@ from .. import label_dataset -def test_dataset_label_samples(capsys, client, dataset_id): +def test_dataset_label_samples(capsys, dataset_id): - label_dataset.label_dataset(client, dataset_id) + label_dataset.label_dataset(dataset_id) out, err = capsys.readouterr() assert "Labels added to {}".format(dataset_id) in out - get_dataset_labels.get_dataset_labels(client, dataset_id) + get_dataset_labels.get_dataset_labels(dataset_id) out, err = capsys.readouterr() assert "color: green" in out - dataset = delete_dataset_labels.delete_dataset_labels(client, dataset_id) + dataset = delete_dataset_labels.delete_dataset_labels(dataset_id) out, err = capsys.readouterr() assert "Labels deleted from {}".format(dataset_id) in out assert dataset.labels.get("color") is None diff --git a/packages/google-cloud-bigquery/samples/tests/test_delete_dataset.py b/packages/google-cloud-bigquery/samples/tests/test_delete_dataset.py index 2b1b6ad06195..1f9b3c823fb9 100644 --- a/packages/google-cloud-bigquery/samples/tests/test_delete_dataset.py +++ b/packages/google-cloud-bigquery/samples/tests/test_delete_dataset.py @@ -15,8 +15,8 @@ from .. import delete_dataset -def test_delete_dataset(capsys, client, dataset_id): +def test_delete_dataset(capsys, dataset_id): - delete_dataset.delete_dataset(client, dataset_id) + delete_dataset.delete_dataset(dataset_id) out, err = capsys.readouterr() assert "Deleted dataset '{}'.".format(dataset_id) in out diff --git a/packages/google-cloud-bigquery/samples/tests/test_delete_table.py b/packages/google-cloud-bigquery/samples/tests/test_delete_table.py index 8f4796623a83..7065743b0485 100644 --- a/packages/google-cloud-bigquery/samples/tests/test_delete_table.py +++ b/packages/google-cloud-bigquery/samples/tests/test_delete_table.py @@ -15,8 +15,8 @@ from .. import delete_table -def test_delete_table(capsys, client, table_id): +def test_delete_table(capsys, table_id): - delete_table.delete_table(client, table_id) + delete_table.delete_table(table_id) out, err = capsys.readouterr() assert "Deleted table '{}'.".format(table_id) in out diff --git a/packages/google-cloud-bigquery/samples/tests/test_download_public_data.py b/packages/google-cloud-bigquery/samples/tests/test_download_public_data.py index 8ee0e6a68c17..82297b2032f6 100644 --- a/packages/google-cloud-bigquery/samples/tests/test_download_public_data.py +++ b/packages/google-cloud-bigquery/samples/tests/test_download_public_data.py @@ -17,11 +17,11 @@ from .. import download_public_data -def test_download_public_data(caplog, capsys, client): +def test_download_public_data(caplog, capsys): # Enable debug-level logging to verify the BigQuery Storage API is used. caplog.set_level(logging.DEBUG) - download_public_data.download_public_data(client) + download_public_data.download_public_data() out, _ = capsys.readouterr() assert "year" in out assert "gender" in out diff --git a/packages/google-cloud-bigquery/samples/tests/test_download_public_data_sandbox.py b/packages/google-cloud-bigquery/samples/tests/test_download_public_data_sandbox.py index 74dadc1db3fb..e322cb2e54c9 100644 --- a/packages/google-cloud-bigquery/samples/tests/test_download_public_data_sandbox.py +++ b/packages/google-cloud-bigquery/samples/tests/test_download_public_data_sandbox.py @@ -17,11 +17,11 @@ from .. import download_public_data_sandbox -def test_download_public_data_sandbox(caplog, capsys, client): +def test_download_public_data_sandbox(caplog, capsys): # Enable debug-level logging to verify the BigQuery Storage API is used. caplog.set_level(logging.DEBUG) - download_public_data_sandbox.download_public_data_sandbox(client) + download_public_data_sandbox.download_public_data_sandbox() out, err = capsys.readouterr() assert "year" in out assert "gender" in out diff --git a/packages/google-cloud-bigquery/samples/tests/test_get_dataset.py b/packages/google-cloud-bigquery/samples/tests/test_get_dataset.py index dedec1d7b29e..3afdb00d39bd 100644 --- a/packages/google-cloud-bigquery/samples/tests/test_get_dataset.py +++ b/packages/google-cloud-bigquery/samples/tests/test_get_dataset.py @@ -15,8 +15,8 @@ from .. import get_dataset -def test_get_dataset(capsys, client, dataset_id): +def test_get_dataset(capsys, dataset_id): - get_dataset.get_dataset(client, dataset_id) + get_dataset.get_dataset(dataset_id) out, err = capsys.readouterr() assert dataset_id in out diff --git a/packages/google-cloud-bigquery/samples/tests/test_get_table.py b/packages/google-cloud-bigquery/samples/tests/test_get_table.py index b950d434aef6..8bbd0681b584 100644 --- a/packages/google-cloud-bigquery/samples/tests/test_get_table.py +++ b/packages/google-cloud-bigquery/samples/tests/test_get_table.py @@ -17,7 +17,7 @@ from .. import get_table -def test_get_table(capsys, client, random_table_id): +def test_get_table(capsys, random_table_id, client): schema = [ bigquery.SchemaField("full_name", "STRING", mode="REQUIRED"), @@ -28,7 +28,7 @@ def test_get_table(capsys, client, random_table_id): table.description = "Sample Table" table = client.create_table(table) - get_table.get_table(client, random_table_id) + get_table.get_table(random_table_id) out, err = capsys.readouterr() assert "Got table '{}'.".format(random_table_id) in out assert "full_name" in out diff --git a/packages/google-cloud-bigquery/samples/tests/test_list_datasets.py b/packages/google-cloud-bigquery/samples/tests/test_list_datasets.py index 4c66a24f9b1a..1610d0e4a3ba 100644 --- a/packages/google-cloud-bigquery/samples/tests/test_list_datasets.py +++ b/packages/google-cloud-bigquery/samples/tests/test_list_datasets.py @@ -15,8 +15,7 @@ from .. import list_datasets -def test_list_datasets(capsys, client, dataset_id): - - list_datasets.list_datasets(client) +def test_list_datasets(capsys, dataset_id, client): + list_datasets.list_datasets() out, err = capsys.readouterr() assert "Datasets in project {}:".format(client.project) in out diff --git a/packages/google-cloud-bigquery/samples/tests/test_list_datasets_by_label.py b/packages/google-cloud-bigquery/samples/tests/test_list_datasets_by_label.py index 6d04a281ff42..5b375f4f4ee5 100644 --- a/packages/google-cloud-bigquery/samples/tests/test_list_datasets_by_label.py +++ b/packages/google-cloud-bigquery/samples/tests/test_list_datasets_by_label.py @@ -15,11 +15,10 @@ from .. import list_datasets_by_label -def test_list_datasets_by_label(capsys, client, dataset_id): - +def test_list_datasets_by_label(capsys, dataset_id, client): dataset = client.get_dataset(dataset_id) dataset.labels = {"color": "green"} dataset = client.update_dataset(dataset, ["labels"]) - list_datasets_by_label.list_datasets_by_label(client) + list_datasets_by_label.list_datasets_by_label() out, err = capsys.readouterr() assert dataset_id in out diff --git a/packages/google-cloud-bigquery/samples/tests/test_list_tables.py b/packages/google-cloud-bigquery/samples/tests/test_list_tables.py index ec1621ac7579..f9426aa53d21 100644 --- a/packages/google-cloud-bigquery/samples/tests/test_list_tables.py +++ b/packages/google-cloud-bigquery/samples/tests/test_list_tables.py @@ -15,9 +15,9 @@ from .. import list_tables -def test_list_tables(capsys, client, dataset_id, table_id): +def test_list_tables(capsys, dataset_id, table_id): - list_tables.list_tables(client, dataset_id) + list_tables.list_tables(dataset_id) out, err = capsys.readouterr() assert "Tables contained in '{}':".format(dataset_id) in out assert table_id in out diff --git a/packages/google-cloud-bigquery/samples/tests/test_load_table_dataframe.py b/packages/google-cloud-bigquery/samples/tests/test_load_table_dataframe.py index 3b7cb16ea692..17ec1114dc72 100644 --- a/packages/google-cloud-bigquery/samples/tests/test_load_table_dataframe.py +++ b/packages/google-cloud-bigquery/samples/tests/test_load_table_dataframe.py @@ -21,9 +21,9 @@ pyarrow = pytest.importorskip("pyarrow") -def test_load_table_dataframe(capsys, client, random_table_id): +def test_load_table_dataframe(capsys, random_table_id): - table = load_table_dataframe.load_table_dataframe(client, random_table_id) + table = load_table_dataframe.load_table_dataframe(random_table_id) out, _ = capsys.readouterr() assert "Loaded 4 rows and 3 columns" in out diff --git a/packages/google-cloud-bigquery/samples/tests/test_model_samples.py b/packages/google-cloud-bigquery/samples/tests/test_model_samples.py index d7b06a92a3e1..ebefad846642 100644 --- a/packages/google-cloud-bigquery/samples/tests/test_model_samples.py +++ b/packages/google-cloud-bigquery/samples/tests/test_model_samples.py @@ -18,22 +18,22 @@ from .. import update_model -def test_model_samples(capsys, client, dataset_id, model_id): +def test_model_samples(capsys, dataset_id, model_id): """Since creating a model is a long operation, test all model samples in the same test, following a typical end-to-end flow. """ - get_model.get_model(client, model_id) + get_model.get_model(model_id) out, err = capsys.readouterr() assert model_id in out - list_models.list_models(client, dataset_id) + list_models.list_models(dataset_id) out, err = capsys.readouterr() assert "Models contained in '{}':".format(dataset_id) in out - update_model.update_model(client, model_id) + update_model.update_model(model_id) out, err = capsys.readouterr() assert "This model was modified from a Python program." in out - delete_model.delete_model(client, model_id) + delete_model.delete_model(model_id) out, err = capsys.readouterr() assert "Deleted model '{}'.".format(model_id) in out diff --git a/packages/google-cloud-bigquery/samples/tests/test_query_external_gcs_temporary_table.py b/packages/google-cloud-bigquery/samples/tests/test_query_external_gcs_temporary_table.py index ea5b5d4dfcda..022b327be21b 100644 --- a/packages/google-cloud-bigquery/samples/tests/test_query_external_gcs_temporary_table.py +++ b/packages/google-cloud-bigquery/samples/tests/test_query_external_gcs_temporary_table.py @@ -15,8 +15,8 @@ from .. import query_external_gcs_temporary_table -def test_query_external_gcs_temporary_table(capsys, client): +def test_query_external_gcs_temporary_table(capsys,): - query_external_gcs_temporary_table.query_external_gcs_temporary_table(client) + query_external_gcs_temporary_table.query_external_gcs_temporary_table() out, err = capsys.readouterr() assert "There are 4 states with names starting with W." in out diff --git a/packages/google-cloud-bigquery/samples/tests/test_query_no_cache.py b/packages/google-cloud-bigquery/samples/tests/test_query_no_cache.py index 68f0774d935f..df17d0d0b04f 100644 --- a/packages/google-cloud-bigquery/samples/tests/test_query_no_cache.py +++ b/packages/google-cloud-bigquery/samples/tests/test_query_no_cache.py @@ -17,8 +17,8 @@ from .. import query_no_cache -def test_query_no_cache(capsys, client): +def test_query_no_cache(capsys,): - query_no_cache.query_no_cache(client) + query_no_cache.query_no_cache() out, err = capsys.readouterr() assert re.search(r"(Row[\w(){}:', ]+)$", out) diff --git a/packages/google-cloud-bigquery/samples/tests/test_query_pagination.py b/packages/google-cloud-bigquery/samples/tests/test_query_pagination.py index 93352fa0bf1d..7ab049c8ce7c 100644 --- a/packages/google-cloud-bigquery/samples/tests/test_query_pagination.py +++ b/packages/google-cloud-bigquery/samples/tests/test_query_pagination.py @@ -15,9 +15,9 @@ from .. import query_pagination -def test_query_pagination(capsys, client): +def test_query_pagination(capsys,): - query_pagination.query_pagination(client) + query_pagination.query_pagination() out, _ = capsys.readouterr() assert "The query data:" in out assert "name=James, count=4942431" in out diff --git a/packages/google-cloud-bigquery/samples/tests/test_query_script.py b/packages/google-cloud-bigquery/samples/tests/test_query_script.py index 70bb9df76fd4..037664d369ee 100644 --- a/packages/google-cloud-bigquery/samples/tests/test_query_script.py +++ b/packages/google-cloud-bigquery/samples/tests/test_query_script.py @@ -15,9 +15,9 @@ from .. import query_script -def test_query_script(capsys, client): +def test_query_script(capsys,): - query_script.query_script(client) + query_script.query_script() out, _ = capsys.readouterr() assert "Script created 2 child jobs." in out assert ( diff --git a/packages/google-cloud-bigquery/samples/tests/test_query_to_arrow.py b/packages/google-cloud-bigquery/samples/tests/test_query_to_arrow.py index dd9b3ab508cc..77d3f7130305 100644 --- a/packages/google-cloud-bigquery/samples/tests/test_query_to_arrow.py +++ b/packages/google-cloud-bigquery/samples/tests/test_query_to_arrow.py @@ -17,9 +17,9 @@ from .. import query_to_arrow -def test_query_to_arrow(capsys, client): +def test_query_to_arrow(capsys,): - arrow_table = query_to_arrow.query_to_arrow(client) + arrow_table = query_to_arrow.query_to_arrow() out, err = capsys.readouterr() assert "Downloaded 8 rows, 2 columns." in out arrow_schema = arrow_table.schema diff --git a/packages/google-cloud-bigquery/samples/tests/test_routine_samples.py b/packages/google-cloud-bigquery/samples/tests/test_routine_samples.py index 81d33a0cf5df..a4467c59a896 100644 --- a/packages/google-cloud-bigquery/samples/tests/test_routine_samples.py +++ b/packages/google-cloud-bigquery/samples/tests/test_routine_samples.py @@ -16,18 +16,18 @@ from google.cloud import bigquery_v2 -def test_create_routine(capsys, client, random_routine_id): +def test_create_routine(capsys, random_routine_id): from .. import create_routine - create_routine.create_routine(client, random_routine_id) + create_routine.create_routine(random_routine_id) out, err = capsys.readouterr() assert "Created routine {}".format(random_routine_id) in out -def test_create_routine_ddl(capsys, client, random_routine_id): +def test_create_routine_ddl(capsys, random_routine_id, client): from .. import create_routine_ddl - create_routine_ddl.create_routine_ddl(client, random_routine_id) + create_routine_ddl.create_routine_ddl(random_routine_id) routine = client.get_routine(random_routine_id) out, err = capsys.readouterr() @@ -65,19 +65,19 @@ def test_create_routine_ddl(capsys, client, random_routine_id): assert routine.arguments == expected_arguments -def test_list_routines(capsys, client, dataset_id, routine_id): +def test_list_routines(capsys, dataset_id, routine_id): from .. import list_routines - list_routines.list_routines(client, dataset_id) + list_routines.list_routines(dataset_id) out, err = capsys.readouterr() assert "Routines contained in dataset {}:".format(dataset_id) in out assert routine_id in out -def test_get_routine(capsys, client, routine_id): +def test_get_routine(capsys, routine_id): from .. import get_routine - get_routine.get_routine(client, routine_id) + get_routine.get_routine(routine_id) out, err = capsys.readouterr() assert "Routine '{}':".format(routine_id) in out assert "Type: 'SCALAR_FUNCTION'" in out @@ -86,16 +86,16 @@ def test_get_routine(capsys, client, routine_id): assert "Type: 'type_kind: INT64\n'" in out -def test_delete_routine(capsys, client, routine_id): +def test_delete_routine(capsys, routine_id): from .. import delete_routine - delete_routine.delete_routine(client, routine_id) + delete_routine.delete_routine(routine_id) out, err = capsys.readouterr() assert "Deleted routine {}.".format(routine_id) in out -def test_update_routine(client, routine_id): +def test_update_routine(routine_id): from .. import update_routine - routine = update_routine.update_routine(client, routine_id) + routine = update_routine.update_routine(routine_id) assert routine.body == "x * 4" diff --git a/packages/google-cloud-bigquery/samples/tests/test_table_exists.py b/packages/google-cloud-bigquery/samples/tests/test_table_exists.py index ae4fc65f847c..d1f579a64528 100644 --- a/packages/google-cloud-bigquery/samples/tests/test_table_exists.py +++ b/packages/google-cloud-bigquery/samples/tests/test_table_exists.py @@ -17,13 +17,13 @@ from .. import table_exists -def test_table_exists(capsys, client, random_table_id): +def test_table_exists(capsys, random_table_id, client): - table_exists.table_exists(client, random_table_id) + table_exists.table_exists(random_table_id) out, err = capsys.readouterr() assert "Table {} is not found.".format(random_table_id) in out table = bigquery.Table(random_table_id) table = client.create_table(table) - table_exists.table_exists(client, random_table_id) + table_exists.table_exists(random_table_id) out, err = capsys.readouterr() assert "Table {} already exists.".format(random_table_id) in out diff --git a/packages/google-cloud-bigquery/samples/tests/test_table_insert_rows.py b/packages/google-cloud-bigquery/samples/tests/test_table_insert_rows.py index 9c5fd5768cfb..72b51df9c485 100644 --- a/packages/google-cloud-bigquery/samples/tests/test_table_insert_rows.py +++ b/packages/google-cloud-bigquery/samples/tests/test_table_insert_rows.py @@ -17,7 +17,7 @@ from .. import table_insert_rows -def test_table_insert_rows(capsys, client, random_table_id): +def test_table_insert_rows(capsys, random_table_id, client): schema = [ bigquery.SchemaField("full_name", "STRING", mode="REQUIRED"), @@ -27,6 +27,6 @@ def test_table_insert_rows(capsys, client, random_table_id): table = bigquery.Table(random_table_id, schema=schema) table = client.create_table(table) - table_insert_rows.table_insert_rows(client, random_table_id) + table_insert_rows.table_insert_rows(random_table_id) out, err = capsys.readouterr() assert "New rows have been added." in out diff --git a/packages/google-cloud-bigquery/samples/tests/test_table_insert_rows_explicit_none_insert_ids.py b/packages/google-cloud-bigquery/samples/tests/test_table_insert_rows_explicit_none_insert_ids.py index a2a4febd7f75..c6199894a72c 100644 --- a/packages/google-cloud-bigquery/samples/tests/test_table_insert_rows_explicit_none_insert_ids.py +++ b/packages/google-cloud-bigquery/samples/tests/test_table_insert_rows_explicit_none_insert_ids.py @@ -17,7 +17,7 @@ from .. import table_insert_rows_explicit_none_insert_ids as mut -def test_table_insert_rows_explicit_none_insert_ids(capsys, client, random_table_id): +def test_table_insert_rows_explicit_none_insert_ids(capsys, random_table_id, client): schema = [ bigquery.SchemaField("full_name", "STRING", mode="REQUIRED"), @@ -27,6 +27,6 @@ def test_table_insert_rows_explicit_none_insert_ids(capsys, client, random_table table = bigquery.Table(random_table_id, schema=schema) table = client.create_table(table) - mut.table_insert_rows_explicit_none_insert_ids(client, random_table_id) + mut.table_insert_rows_explicit_none_insert_ids(random_table_id) out, err = capsys.readouterr() assert "New rows have been added." in out diff --git a/packages/google-cloud-bigquery/samples/tests/test_undelete_table.py b/packages/google-cloud-bigquery/samples/tests/test_undelete_table.py index 8fd221a39b30..a070abdbd36b 100644 --- a/packages/google-cloud-bigquery/samples/tests/test_undelete_table.py +++ b/packages/google-cloud-bigquery/samples/tests/test_undelete_table.py @@ -15,8 +15,8 @@ from .. import undelete_table -def test_undelete_table(capsys, client, table_with_schema_id, random_table_id): - undelete_table.undelete_table(client, table_with_schema_id, random_table_id) +def test_undelete_table(capsys, table_with_schema_id, random_table_id): + undelete_table.undelete_table(table_with_schema_id, random_table_id) out, _ = capsys.readouterr() assert ( "Copied data from deleted table {} to {}".format( diff --git a/packages/google-cloud-bigquery/samples/tests/test_update_dataset_access.py b/packages/google-cloud-bigquery/samples/tests/test_update_dataset_access.py index ae33dbfe4a4c..4c0aa835baf0 100644 --- a/packages/google-cloud-bigquery/samples/tests/test_update_dataset_access.py +++ b/packages/google-cloud-bigquery/samples/tests/test_update_dataset_access.py @@ -15,9 +15,9 @@ from .. import update_dataset_access -def test_update_dataset_access(capsys, client, dataset_id): +def test_update_dataset_access(capsys, dataset_id): - update_dataset_access.update_dataset_access(client, dataset_id) + update_dataset_access.update_dataset_access(dataset_id) out, err = capsys.readouterr() assert ( "Updated dataset '{}' with modified user permissions.".format(dataset_id) in out diff --git a/packages/google-cloud-bigquery/samples/tests/test_update_dataset_default_partition_expiration.py b/packages/google-cloud-bigquery/samples/tests/test_update_dataset_default_partition_expiration.py index 55fa4b0d96fb..a5a8e6b5202c 100644 --- a/packages/google-cloud-bigquery/samples/tests/test_update_dataset_default_partition_expiration.py +++ b/packages/google-cloud-bigquery/samples/tests/test_update_dataset_default_partition_expiration.py @@ -15,12 +15,12 @@ from .. import update_dataset_default_partition_expiration -def test_update_dataset_default_partition_expiration(capsys, client, dataset_id): +def test_update_dataset_default_partition_expiration(capsys, dataset_id): ninety_days_ms = 90 * 24 * 60 * 60 * 1000 # in milliseconds update_dataset_default_partition_expiration.update_dataset_default_partition_expiration( - client, dataset_id + dataset_id ) out, _ = capsys.readouterr() assert ( diff --git a/packages/google-cloud-bigquery/samples/tests/test_update_dataset_default_table_expiration.py b/packages/google-cloud-bigquery/samples/tests/test_update_dataset_default_table_expiration.py index 46e9654209ed..b0f7013228e6 100644 --- a/packages/google-cloud-bigquery/samples/tests/test_update_dataset_default_table_expiration.py +++ b/packages/google-cloud-bigquery/samples/tests/test_update_dataset_default_table_expiration.py @@ -15,12 +15,12 @@ from .. import update_dataset_default_table_expiration -def test_update_dataset_default_table_expiration(capsys, client, dataset_id): +def test_update_dataset_default_table_expiration(capsys, dataset_id): one_day_ms = 24 * 60 * 60 * 1000 # in milliseconds update_dataset_default_table_expiration.update_dataset_default_table_expiration( - client, dataset_id + dataset_id ) out, err = capsys.readouterr() assert ( diff --git a/packages/google-cloud-bigquery/samples/tests/test_update_dataset_description.py b/packages/google-cloud-bigquery/samples/tests/test_update_dataset_description.py index c6f8889f50da..e4ff586c7bc2 100644 --- a/packages/google-cloud-bigquery/samples/tests/test_update_dataset_description.py +++ b/packages/google-cloud-bigquery/samples/tests/test_update_dataset_description.py @@ -15,8 +15,8 @@ from .. import update_dataset_description -def test_update_dataset_description(capsys, client, dataset_id): +def test_update_dataset_description(capsys, dataset_id): - update_dataset_description.update_dataset_description(client, dataset_id) + update_dataset_description.update_dataset_description(dataset_id) out, err = capsys.readouterr() assert "Updated description." in out diff --git a/packages/google-cloud-bigquery/samples/tests/test_update_table_require_partition_filter.py b/packages/google-cloud-bigquery/samples/tests/test_update_table_require_partition_filter.py index 7ce6d64c780a..7e9ca6f2b44f 100644 --- a/packages/google-cloud-bigquery/samples/tests/test_update_table_require_partition_filter.py +++ b/packages/google-cloud-bigquery/samples/tests/test_update_table_require_partition_filter.py @@ -17,7 +17,7 @@ from .. import update_table_require_partition_filter -def test_update_table_require_partition_filter(capsys, client, random_table_id): +def test_update_table_require_partition_filter(capsys, random_table_id, client): # Make a partitioned table. schema = [bigquery.SchemaField("transaction_timestamp", "TIMESTAMP")] @@ -26,7 +26,7 @@ def test_update_table_require_partition_filter(capsys, client, random_table_id): table = client.create_table(table) update_table_require_partition_filter.update_table_require_partition_filter( - client, random_table_id + random_table_id ) out, _ = capsys.readouterr() assert ( diff --git a/packages/google-cloud-bigquery/samples/undelete_table.py b/packages/google-cloud-bigquery/samples/undelete_table.py index 9db9712d2a74..18b15801ffee 100644 --- a/packages/google-cloud-bigquery/samples/undelete_table.py +++ b/packages/google-cloud-bigquery/samples/undelete_table.py @@ -15,15 +15,14 @@ from google.api_core import datetime_helpers -def undelete_table(client, table_id, recovered_table_id): +def undelete_table(table_id, recovered_table_id): # [START bigquery_undelete_table] import time - # TODO(developer): Import the client library. - # from google.cloud import bigquery + from google.cloud import bigquery - # TODO(developer): Construct a BigQuery client object. - # client = bigquery.Client() + # Construct a BigQuery client object. + client = bigquery.Client() # TODO(developer): Choose a table to recover. # table_id = "your-project.your_dataset.your_table" diff --git a/packages/google-cloud-bigquery/samples/update_dataset_access.py b/packages/google-cloud-bigquery/samples/update_dataset_access.py index 134cf1b940cf..6e844cc90799 100644 --- a/packages/google-cloud-bigquery/samples/update_dataset_access.py +++ b/packages/google-cloud-bigquery/samples/update_dataset_access.py @@ -13,13 +13,13 @@ # limitations under the License. -def update_dataset_access(client, dataset_id): +def update_dataset_access(dataset_id): # [START bigquery_update_dataset_access] from google.cloud import bigquery - # TODO(developer): Construct a BigQuery client object. - # client = bigquery.Client() + # Construct a BigQuery client object. + client = bigquery.Client() # TODO(developer): Set dataset_id to the ID of the dataset to fetch. # dataset_id = 'your-project.your_dataset' diff --git a/packages/google-cloud-bigquery/samples/update_dataset_default_partition_expiration.py b/packages/google-cloud-bigquery/samples/update_dataset_default_partition_expiration.py index 502d52ff199b..18cfb92db9b4 100644 --- a/packages/google-cloud-bigquery/samples/update_dataset_default_partition_expiration.py +++ b/packages/google-cloud-bigquery/samples/update_dataset_default_partition_expiration.py @@ -13,14 +13,14 @@ # limitations under the License. -def update_dataset_default_partition_expiration(client, dataset_id): +def update_dataset_default_partition_expiration(dataset_id): # [START bigquery_update_dataset_partition_expiration] - # TODO(developer): Import the client library. - # from google.cloud import bigquery - # TODO(developer): Construct a BigQuery client object. - # client = bigquery.Client() + from google.cloud import bigquery + + # Construct a BigQuery client object. + client = bigquery.Client() # TODO(developer): Set dataset_id to the ID of the dataset to fetch. # dataset_id = 'your-project.your_dataset' diff --git a/packages/google-cloud-bigquery/samples/update_dataset_default_table_expiration.py b/packages/google-cloud-bigquery/samples/update_dataset_default_table_expiration.py index 8de354b1f21b..b7e5cea9b20d 100644 --- a/packages/google-cloud-bigquery/samples/update_dataset_default_table_expiration.py +++ b/packages/google-cloud-bigquery/samples/update_dataset_default_table_expiration.py @@ -13,14 +13,14 @@ # limitations under the License. -def update_dataset_default_table_expiration(client, dataset_id): +def update_dataset_default_table_expiration(dataset_id): # [START bigquery_update_dataset_expiration] - # TODO(developer): Import the client library. - # from google.cloud import bigquery - # TODO(developer): Construct a BigQuery client object. - # client = bigquery.Client() + from google.cloud import bigquery + + # Construct a BigQuery client object. + client = bigquery.Client() # TODO(developer): Set dataset_id to the ID of the dataset to fetch. # dataset_id = 'your-project.your_dataset' diff --git a/packages/google-cloud-bigquery/samples/update_dataset_description.py b/packages/google-cloud-bigquery/samples/update_dataset_description.py index 08eed8da2b64..0732b1c618e8 100644 --- a/packages/google-cloud-bigquery/samples/update_dataset_description.py +++ b/packages/google-cloud-bigquery/samples/update_dataset_description.py @@ -13,14 +13,14 @@ # limitations under the License. -def update_dataset_description(client, dataset_id): +def update_dataset_description(dataset_id): # [START bigquery_update_dataset_description] - # TODO(developer): Import the client library. - # from google.cloud import bigquery - # TODO(developer): Construct a BigQuery client object. - # client = bigquery.Client() + from google.cloud import bigquery + + # Construct a BigQuery client object. + client = bigquery.Client() # TODO(developer): Set dataset_id to the ID of the dataset to fetch. # dataset_id = 'your-project.your_dataset' diff --git a/packages/google-cloud-bigquery/samples/update_model.py b/packages/google-cloud-bigquery/samples/update_model.py index 7583c410e1ef..db262d8cc43c 100644 --- a/packages/google-cloud-bigquery/samples/update_model.py +++ b/packages/google-cloud-bigquery/samples/update_model.py @@ -13,15 +13,15 @@ # limitations under the License. -def update_model(client, model_id): +def update_model(model_id): """Sample ID: go/samples-tracker/1533""" # [START bigquery_update_model_description] - # TODO(developer): Import the client library. - # from google.cloud import bigquery - # TODO(developer): Construct a BigQuery client object. - # client = bigquery.Client() + from google.cloud import bigquery + + # Construct a BigQuery client object. + client = bigquery.Client() # TODO(developer): Set model_id to the ID of the model to fetch. # model_id = 'your-project.your_dataset.your_model' diff --git a/packages/google-cloud-bigquery/samples/update_routine.py b/packages/google-cloud-bigquery/samples/update_routine.py index 4489d68f7ee4..61c6855b5041 100644 --- a/packages/google-cloud-bigquery/samples/update_routine.py +++ b/packages/google-cloud-bigquery/samples/update_routine.py @@ -13,14 +13,14 @@ # limitations under the License. -def update_routine(client, routine_id): +def update_routine(routine_id): # [START bigquery_update_routine] - # TODO(developer): Import the client library. - # from google.cloud import bigquery - # TODO(developer): Construct a BigQuery client object. - # client = bigquery.Client() + from google.cloud import bigquery + + # Construct a BigQuery client object. + client = bigquery.Client() # TODO(developer): Set the fully-qualified ID for the routine. # routine_id = "my-project.my_dataset.my_routine" diff --git a/packages/google-cloud-bigquery/samples/update_table_require_partition_filter.py b/packages/google-cloud-bigquery/samples/update_table_require_partition_filter.py index 4c6be2d2cedc..cf1d532774b2 100644 --- a/packages/google-cloud-bigquery/samples/update_table_require_partition_filter.py +++ b/packages/google-cloud-bigquery/samples/update_table_require_partition_filter.py @@ -13,14 +13,14 @@ # limitations under the License. -def update_table_require_partition_filter(client, table_id): +def update_table_require_partition_filter(table_id): # [START bigquery_update_table_require_partition_filter] - # TODO(developer): Import the client library. - # from google.cloud import bigquery - # TODO(developer): Construct a BigQuery client object. - # client = bigquery.Client() + from google.cloud import bigquery + + # Construct a BigQuery client object. + client = bigquery.Client() # TODO(developer): Set table_id to the ID of the model to fetch. # table_id = 'your-project.your_dataset.your_table' From 2a0407719cdab689eca23612eb82f2372208684d Mon Sep 17 00:00:00 2001 From: Peter Lamut Date: Mon, 6 Jan 2020 09:55:37 +0000 Subject: [PATCH 0727/2016] feat(bigquery): add retry parameter to public methods where missing (#10026) * Add retry to Client.get_service_account_email() * Add retry to job.cancel() --- .../google/cloud/bigquery/client.py | 13 ++++--- .../google/cloud/bigquery/job.py | 12 +++--- .../tests/unit/test_client.py | 36 ++++++++++++++++++ .../tests/unit/test_job.py | 37 +++++++++++++++++++ 4 files changed, 87 insertions(+), 11 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py index 5707d57cdb62..34dceaeecd4a 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py @@ -208,7 +208,9 @@ def close(self): self._http._auth_request.session.close() self._http.close() - def get_service_account_email(self, project=None, timeout=None): + def get_service_account_email( + self, project=None, retry=DEFAULT_RETRY, timeout=None + ): """Get the email address of the project's BigQuery service account Note: @@ -219,8 +221,10 @@ def get_service_account_email(self, project=None, timeout=None): project (str, optional): Project ID to use for retreiving service account email. Defaults to the client's project. + retry (Optional[google.api_core.retry.Retry]): How to retry the RPC. timeout (Optional[float]): - The number of seconds to wait for the API response. + The number of seconds to wait for the underlying HTTP transport + before using ``retry``. Returns: str: service account email address @@ -237,10 +241,7 @@ def get_service_account_email(self, project=None, timeout=None): project = self.project path = "/projects/%s/serviceAccount" % (project,) - # TODO: call thorugh self._call_api() and allow passing in a retry? - api_response = self._connection.api_request( - method="GET", path=path, timeout=timeout - ) + api_response = self._call_api(retry, method="GET", path=path, timeout=timeout) return api_response["email"] def list_projects( diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/job.py b/packages/google-cloud-bigquery/google/cloud/bigquery/job.py index 34628350c922..096651351173 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/job.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/job.py @@ -710,7 +710,7 @@ def reload(self, client=None, retry=DEFAULT_RETRY, timeout=None): ) self._set_properties(api_response) - def cancel(self, client=None, timeout=None): + def cancel(self, client=None, retry=DEFAULT_RETRY, timeout=None): """API call: cancel job via a POST request See @@ -720,8 +720,10 @@ def cancel(self, client=None, timeout=None): client (Optional[google.cloud.bigquery.client.Client]): the client to use. If not passed, falls back to the ``client`` stored on the current dataset. + retry (Optional[google.api_core.retry.Retry]): How to retry the RPC. timeout (Optional[float]): - The number of seconds to wait for the API response. + The number of seconds to wait for the underlying HTTP transport + before using ``retry`` Returns: bool: Boolean indicating that the cancel request was sent. @@ -732,10 +734,10 @@ def cancel(self, client=None, timeout=None): if self.location: extra_params["location"] = self.location - # TODO: call thorugh client._call_api() and allow passing in a retry? - api_response = client._connection.api_request( + api_response = client._call_api( + retry, method="POST", - path="%s/cancel" % (self.path,), + path="{}/cancel".format(self.path), query_params=extra_params, timeout=timeout, ) diff --git a/packages/google-cloud-bigquery/tests/unit/test_client.py b/packages/google-cloud-bigquery/tests/unit/test_client.py index a5100fe6eaef..8d55e59599c4 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_client.py +++ b/packages/google-cloud-bigquery/tests/unit/test_client.py @@ -331,6 +331,42 @@ def test_get_service_account_email_w_alternate_project(self): conn.api_request.assert_called_once_with(method="GET", path=path, timeout=None) self.assertEqual(service_account_email, email) + def test_get_service_account_email_w_custom_retry(self): + from google.cloud.bigquery.retry import DEFAULT_RETRY + + api_path = "/projects/{}/serviceAccount".format(self.PROJECT) + creds = _make_credentials() + http = object() + client = self._make_one(project=self.PROJECT, credentials=creds, _http=http) + + resource = { + "kind": "bigquery#getServiceAccountResponse", + "email": "bq-123@bigquery-encryption.iam.gserviceaccount.com", + } + api_request_patcher = mock.patch.object( + client._connection, "api_request", side_effect=[ValueError, resource], + ) + + retry = DEFAULT_RETRY.with_deadline(1).with_predicate( + lambda exc: isinstance(exc, ValueError) + ) + + with api_request_patcher as fake_api_request: + service_account_email = client.get_service_account_email( + retry=retry, timeout=7.5 + ) + + self.assertEqual( + service_account_email, "bq-123@bigquery-encryption.iam.gserviceaccount.com" + ) + self.assertEqual( + fake_api_request.call_args_list, + [ + mock.call(method="GET", path=api_path, timeout=7.5), + mock.call(method="GET", path=api_path, timeout=7.5), # was retried once + ], + ) + def test_list_projects_defaults(self): from google.cloud.bigquery.client import Project diff --git a/packages/google-cloud-bigquery/tests/unit/test_job.py b/packages/google-cloud-bigquery/tests/unit/test_job.py index b796f3f73675..6b0d4b8fb352 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_job.py +++ b/packages/google-cloud-bigquery/tests/unit/test_job.py @@ -831,6 +831,43 @@ def test_cancel_explicit(self): ) self.assertEqual(job._properties, resource) + def test_cancel_w_custom_retry(self): + from google.cloud.bigquery.retry import DEFAULT_RETRY + + api_path = "/projects/{}/jobs/{}/cancel".format(self.PROJECT, self.JOB_ID) + resource = { + "jobReference": { + "jobId": self.JOB_ID, + "projectId": self.PROJECT, + "location": None, + }, + "configuration": {"test": True}, + } + response = {"job": resource} + job = self._set_properties_job() + + api_request_patcher = mock.patch.object( + job._client._connection, "api_request", side_effect=[ValueError, response], + ) + retry = DEFAULT_RETRY.with_deadline(1).with_predicate( + lambda exc: isinstance(exc, ValueError) + ) + + with api_request_patcher as fake_api_request: + result = job.cancel(retry=retry, timeout=7.5) + + self.assertTrue(result) + self.assertEqual(job._properties, resource) + self.assertEqual( + fake_api_request.call_args_list, + [ + mock.call(method="POST", path=api_path, query_params={}, timeout=7.5), + mock.call( + method="POST", path=api_path, query_params={}, timeout=7.5, + ), # was retried once + ], + ) + def test__set_future_result_wo_done(self): client = _make_client(project=self.PROJECT) job = self._make_one(self.JOB_ID, client) From f90f2e584ee5aa58329ec9751b68093d5c06e0e2 Mon Sep 17 00:00:00 2001 From: Yoshi Automation Bot Date: Wed, 8 Jan 2020 07:16:38 -0800 Subject: [PATCH 0728/2016] chore(bigquery): use 3.6 for blacken session (#10012) * [CHANGE ME] Re-generated bigquery to pick up changes in the API or client library generator. * chore: use 3.6 for blacken session * chore: blacken * chore: add comment about why 3.6 is used * chore: revert changes to metadata Co-authored-by: Bu Sun Kim <8822365+busunkim96@users.noreply.github.com> --- packages/google-cloud-bigquery/noxfile.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/noxfile.py b/packages/google-cloud-bigquery/noxfile.py index 69b96b3dc984..17a2dee417c0 100644 --- a/packages/google-cloud-bigquery/noxfile.py +++ b/packages/google-cloud-bigquery/noxfile.py @@ -175,10 +175,14 @@ def lint_setup_py(session): session.run("python", "setup.py", "check", "--restructuredtext", "--strict") -@nox.session(python="3.7") +@nox.session(python="3.6") def blacken(session): """Run black. Format code to uniform standard. + + This currently uses Python 3.6 due to the automated Kokoro run of synthtool. + That run uses an image that doesn't have 3.6 installed. Before updating this + check the state of the `gcp_ubuntu_config` we use for that Kokoro run. """ session.install("black") session.run("black", *BLACK_PATHS) From aa116df5cfaf54f83cb2531d0240ce64d3610b83 Mon Sep 17 00:00:00 2001 From: HemangChothani <50404902+HemangChothani@users.noreply.github.com> Date: Thu, 9 Jan 2020 17:49:04 +0530 Subject: [PATCH 0729/2016] fix(bigquery): fix minimum version of dependency (#10016) --- packages/google-cloud-bigquery/setup.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/packages/google-cloud-bigquery/setup.py b/packages/google-cloud-bigquery/setup.py index 5e2ba87a4b1c..a543ca416af1 100644 --- a/packages/google-cloud-bigquery/setup.py +++ b/packages/google-cloud-bigquery/setup.py @@ -32,8 +32,8 @@ 'enum34; python_version < "3.4"', "google-auth >= 1.9.0, < 2.0dev", "google-api-core >= 1.15.0, < 2.0dev", - "google-cloud-core >= 1.0.3, < 2.0dev", - "google-resumable-media >= 0.3.1, != 0.4.0, < 0.6.0dev", + "google-cloud-core >= 1.1.0, < 2.0dev", + "google-resumable-media >= 0.5.0, < 0.6dev", "protobuf >= 3.6.0", "six >=1.13.0,< 2.0.0dev", ] From 6bd23f6316a95d6171df43c1a923fc1f31a8f7ef Mon Sep 17 00:00:00 2001 From: HemangChothani <50404902+HemangChothani@users.noreply.github.com> Date: Sat, 11 Jan 2020 02:54:40 +0530 Subject: [PATCH 0730/2016] feat(bigquery): add `RowIterator.to_dataframe_iterable` method to get pandas DataFrame per page (#10017) * feat(bigquery): make rowIterator._to_dataframe_iterable public * feat(bigquery): cosmetic changes and unittest change * feat(bigquery): change as per comment --- .../google/cloud/bigquery/table.py | 39 +++++++++++- .../tests/unit/test_table.py | 62 +++++++++++++++++++ 2 files changed, 98 insertions(+), 3 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py index a71acf8ecc8a..585676490c38 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py @@ -1554,11 +1554,44 @@ def to_arrow( arrow_schema = _pandas_helpers.bq_to_arrow_schema(self._schema) return pyarrow.Table.from_batches(record_batches, schema=arrow_schema) - def _to_dataframe_iterable(self, bqstorage_client=None, dtypes=None): + def to_dataframe_iterable(self, bqstorage_client=None, dtypes=None): """Create an iterable of pandas DataFrames, to process the table as a stream. - See ``to_dataframe`` for argument descriptions. + Args: + bqstorage_client (google.cloud.bigquery_storage_v1beta1.BigQueryStorageClient): + **Beta Feature** Optional. A BigQuery Storage API client. If + supplied, use the faster BigQuery Storage API to fetch rows + from BigQuery. + + This method requires the ``pyarrow`` and + ``google-cloud-bigquery-storage`` libraries. + + Reading from a specific partition or snapshot is not + currently supported by this method. + + **Caution**: There is a known issue reading small anonymous + query result tables with the BQ Storage API. When a problem + is encountered reading a table, the tabledata.list method + from the BigQuery API is used, instead. + dtypes (Map[str, Union[str, pandas.Series.dtype]]): + Optional. A dictionary of column names pandas ``dtype``s. The + provided ``dtype`` is used when constructing the series for + the column specified. Otherwise, the default pandas behavior + is used. + + Returns: + pandas.DataFrame: + A generator of :class:`~pandas.DataFrame`. + + Raises: + ValueError: + If the :mod:`pandas` library cannot be imported. """ + if pandas is None: + raise ValueError(_NO_PANDAS_ERROR) + if dtypes is None: + dtypes = {} + column_names = [field.name for field in self._schema] bqstorage_download = functools.partial( _pandas_helpers.download_dataframe_bqstorage, @@ -1683,7 +1716,7 @@ def to_dataframe( progress_bar = self._get_progress_bar(progress_bar_type) frames = [] - for frame in self._to_dataframe_iterable( + for frame in self.to_dataframe_iterable( bqstorage_client=bqstorage_client, dtypes=dtypes ): frames.append(frame) diff --git a/packages/google-cloud-bigquery/tests/unit/test_table.py b/packages/google-cloud-bigquery/tests/unit/test_table.py index 1043df45f9a3..73fe1c10d49b 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_table.py +++ b/packages/google-cloud-bigquery/tests/unit/test_table.py @@ -2014,6 +2014,68 @@ def test_to_arrow_w_pyarrow_none(self): with self.assertRaises(ValueError): row_iterator.to_arrow() + @unittest.skipIf(pandas is None, "Requires `pandas`") + def test_to_dataframe_iterable(self): + from google.cloud.bigquery.schema import SchemaField + import types + + schema = [ + SchemaField("name", "STRING", mode="REQUIRED"), + SchemaField("age", "INTEGER", mode="REQUIRED"), + ] + + path = "/foo" + api_request = mock.Mock( + side_effect=[ + { + "rows": [{"f": [{"v": "Bengt"}, {"v": "32"}]}], + "pageToken": "NEXTPAGE", + }, + {"rows": [{"f": [{"v": "Sven"}, {"v": "33"}]}]}, + ] + ) + + row_iterator = self._make_one( + _mock_client(), api_request, path, schema, page_size=1, max_results=5 + ) + dfs = row_iterator.to_dataframe_iterable() + + self.assertIsInstance(dfs, types.GeneratorType) + + df_1 = next(dfs) + self.assertIsInstance(df_1, pandas.DataFrame) + self.assertEqual(df_1.name.dtype.name, "object") + self.assertEqual(df_1.age.dtype.name, "int64") + self.assertEqual(len(df_1), 1) # verify the number of rows + self.assertEqual( + df_1["name"][0], "Bengt" + ) # verify the first value of 'name' column + self.assertEqual(df_1["age"][0], 32) # verify the first value of 'age' column + + df_2 = next(dfs) + self.assertEqual(len(df_2), 1) # verify the number of rows + self.assertEqual(df_2["name"][0], "Sven") + self.assertEqual(df_2["age"][0], 33) + + @mock.patch("google.cloud.bigquery.table.pandas", new=None) + def test_to_dataframe_iterable_error_if_pandas_is_none(self): + from google.cloud.bigquery.schema import SchemaField + + schema = [ + SchemaField("name", "STRING", mode="REQUIRED"), + SchemaField("age", "INTEGER", mode="REQUIRED"), + ] + rows = [ + {"f": [{"v": "Phred Phlyntstone"}, {"v": "32"}]}, + {"f": [{"v": "Bharney Rhubble"}, {"v": "33"}]}, + ] + path = "/foo" + api_request = mock.Mock(return_value={"rows": rows}) + row_iterator = self._make_one(_mock_client(), api_request, path, schema) + + with pytest.raises(ValueError, match="pandas"): + row_iterator.to_dataframe_iterable() + @unittest.skipIf(pandas is None, "Requires `pandas`") def test_to_dataframe(self): from google.cloud.bigquery.schema import SchemaField From f05e760cc2cb2c8b800bf09a2910b6f11e9db1e1 Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Sat, 11 Jan 2020 11:41:29 -0600 Subject: [PATCH 0731/2016] fix(bigquery): write pandas `datetime[ns]` columns to BigQuery TIMESTAMP columns (#10028) * fix(bigquery): write pandas datetime[ns] columns to BigQuery TIMESTAMP columns Also: * Enable TIMESTAMP and DATETIME unit tests for `_pandas_helpers`. * Add more data types to load dataframe sample. * blacken * lint * update client tests * doc: show timezone conversions for timestamp columns Pandas doesn't automatically convert datetime objects to UTC time, so show how to do this in the code sample. * doc: update comments to indicate desired use of TIMESTAMP * fix: add missing client fixture --- .../google/cloud/bigquery/_pandas_helpers.py | 6 +- .../samples/load_table_dataframe.py | 53 ++++++++++-- .../tests/test_load_table_dataframe.py | 47 ++++++++++- .../google-cloud-bigquery/tests/system.py | 5 +- .../tests/unit/test__pandas_helpers.py | 84 ++++++++++++------- .../tests/unit/test_client.py | 4 +- 6 files changed, 157 insertions(+), 42 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/_pandas_helpers.py b/packages/google-cloud-bigquery/google/cloud/bigquery/_pandas_helpers.py index 6e91a9624b06..645478ff6d4b 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/_pandas_helpers.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/_pandas_helpers.py @@ -52,7 +52,9 @@ _PANDAS_DTYPE_TO_BQ = { "bool": "BOOLEAN", "datetime64[ns, UTC]": "TIMESTAMP", - "datetime64[ns]": "DATETIME", + # BigQuery does not support uploading DATETIME values from Parquet files. + # See: https://github.com/googleapis/google-cloud-python/issues/9996 + "datetime64[ns]": "TIMESTAMP", "float32": "FLOAT", "float64": "FLOAT", "int8": "INTEGER", @@ -218,7 +220,7 @@ def bq_to_arrow_array(series, bq_field): return pyarrow.ListArray.from_pandas(series, type=arrow_type) if field_type_upper in schema._STRUCT_TYPES: return pyarrow.StructArray.from_pandas(series, type=arrow_type) - return pyarrow.array(series, type=arrow_type) + return pyarrow.Array.from_pandas(series, type=arrow_type) def get_column_or_index(dataframe, name): diff --git a/packages/google-cloud-bigquery/samples/load_table_dataframe.py b/packages/google-cloud-bigquery/samples/load_table_dataframe.py index feaa4550bac9..91dd6e9f09fc 100644 --- a/packages/google-cloud-bigquery/samples/load_table_dataframe.py +++ b/packages/google-cloud-bigquery/samples/load_table_dataframe.py @@ -16,9 +16,11 @@ def load_table_dataframe(table_id): # [START bigquery_load_table_dataframe] - from google.cloud import bigquery + import datetime + from google.cloud import bigquery import pandas + import pytz # Construct a BigQuery client object. client = bigquery.Client() @@ -27,16 +29,55 @@ def load_table_dataframe(table_id): # table_id = "your-project.your_dataset.your_table_name" records = [ - {"title": u"The Meaning of Life", "release_year": 1983}, - {"title": u"Monty Python and the Holy Grail", "release_year": 1975}, - {"title": u"Life of Brian", "release_year": 1979}, - {"title": u"And Now for Something Completely Different", "release_year": 1971}, + { + "title": u"The Meaning of Life", + "release_year": 1983, + "length_minutes": 112.5, + "release_date": pytz.timezone("Europe/Paris") + .localize(datetime.datetime(1983, 5, 9, 13, 0, 0)) + .astimezone(pytz.utc), + # Assume UTC timezone when a datetime object contains no timezone. + "dvd_release": datetime.datetime(2002, 1, 22, 7, 0, 0), + }, + { + "title": u"Monty Python and the Holy Grail", + "release_year": 1975, + "length_minutes": 91.5, + "release_date": pytz.timezone("Europe/London") + .localize(datetime.datetime(1975, 4, 9, 23, 59, 2)) + .astimezone(pytz.utc), + "dvd_release": datetime.datetime(2002, 7, 16, 9, 0, 0), + }, + { + "title": u"Life of Brian", + "release_year": 1979, + "length_minutes": 94.25, + "release_date": pytz.timezone("America/New_York") + .localize(datetime.datetime(1979, 8, 17, 23, 59, 5)) + .astimezone(pytz.utc), + "dvd_release": datetime.datetime(2008, 1, 14, 8, 0, 0), + }, + { + "title": u"And Now for Something Completely Different", + "release_year": 1971, + "length_minutes": 88.0, + "release_date": pytz.timezone("Europe/London") + .localize(datetime.datetime(1971, 9, 28, 23, 59, 7)) + .astimezone(pytz.utc), + "dvd_release": datetime.datetime(2003, 10, 22, 10, 0, 0), + }, ] dataframe = pandas.DataFrame( records, # In the loaded table, the column order reflects the order of the # columns in the DataFrame. - columns=["title", "release_year"], + columns=[ + "title", + "release_year", + "length_minutes", + "release_date", + "dvd_release", + ], # Optionally, set a named index, which can also be written to the # BigQuery table. index=pandas.Index( diff --git a/packages/google-cloud-bigquery/samples/tests/test_load_table_dataframe.py b/packages/google-cloud-bigquery/samples/tests/test_load_table_dataframe.py index 17ec1114dc72..2286660469ff 100644 --- a/packages/google-cloud-bigquery/samples/tests/test_load_table_dataframe.py +++ b/packages/google-cloud-bigquery/samples/tests/test_load_table_dataframe.py @@ -21,11 +21,52 @@ pyarrow = pytest.importorskip("pyarrow") -def test_load_table_dataframe(capsys, random_table_id): +def test_load_table_dataframe(capsys, client, random_table_id): table = load_table_dataframe.load_table_dataframe(random_table_id) out, _ = capsys.readouterr() - assert "Loaded 4 rows and 3 columns" in out + expected_column_names = [ + "wikidata_id", + "title", + "release_year", + "length_minutes", + "release_date", + "dvd_release", + ] + assert "Loaded 4 rows and {} columns".format(len(expected_column_names)) in out column_names = [field.name for field in table.schema] - assert column_names == ["wikidata_id", "title", "release_year"] + assert column_names == expected_column_names + column_types = [field.field_type for field in table.schema] + assert column_types == [ + "STRING", + "STRING", + "INTEGER", + "FLOAT", + "TIMESTAMP", + "TIMESTAMP", + ] + + df = client.list_rows(table).to_dataframe() + df.sort_values("release_year", inplace=True) + assert df["title"].tolist() == [ + u"And Now for Something Completely Different", + u"Monty Python and the Holy Grail", + u"Life of Brian", + u"The Meaning of Life", + ] + assert df["release_year"].tolist() == [1971, 1975, 1979, 1983] + assert df["length_minutes"].tolist() == [88.0, 91.5, 94.25, 112.5] + assert df["release_date"].tolist() == [ + pandas.Timestamp("1971-09-28T22:59:07+00:00"), + pandas.Timestamp("1975-04-09T22:59:02+00:00"), + pandas.Timestamp("1979-08-18T03:59:05+00:00"), + pandas.Timestamp("1983-05-09T11:00:00+00:00"), + ] + assert df["dvd_release"].tolist() == [ + pandas.Timestamp("2003-10-22T10:00:00+00:00"), + pandas.Timestamp("2002-07-16T09:00:00+00:00"), + pandas.Timestamp("2008-01-14T08:00:00+00:00"), + pandas.Timestamp("2002-01-22T07:00:00+00:00"), + ] + assert df["wikidata_id"].tolist() == [u"Q16403", u"Q25043", u"Q24953", u"Q24980"] diff --git a/packages/google-cloud-bigquery/tests/system.py b/packages/google-cloud-bigquery/tests/system.py index b431f628d001..cd72352c29fd 100644 --- a/packages/google-cloud-bigquery/tests/system.py +++ b/packages/google-cloud-bigquery/tests/system.py @@ -719,7 +719,10 @@ def test_load_table_from_dataframe_w_automatic_schema(self): ( bigquery.SchemaField("bool_col", "BOOLEAN"), bigquery.SchemaField("ts_col", "TIMESTAMP"), - bigquery.SchemaField("dt_col", "DATETIME"), + # BigQuery does not support uploading DATETIME values from + # Parquet files. See: + # https://github.com/googleapis/google-cloud-python/issues/9996 + bigquery.SchemaField("dt_col", "TIMESTAMP"), bigquery.SchemaField("float32_col", "FLOAT"), bigquery.SchemaField("float64_col", "FLOAT"), bigquery.SchemaField("int8_col", "INTEGER"), diff --git a/packages/google-cloud-bigquery/tests/unit/test__pandas_helpers.py b/packages/google-cloud-bigquery/tests/unit/test__pandas_helpers.py index b2d74d54e120..6adf098c03c8 100644 --- a/packages/google-cloud-bigquery/tests/unit/test__pandas_helpers.py +++ b/packages/google-cloud-bigquery/tests/unit/test__pandas_helpers.py @@ -92,6 +92,7 @@ def test_is_datetime(): assert is_datetime(pyarrow.timestamp("us", tz=None)) assert not is_datetime(pyarrow.timestamp("ms", tz=None)) assert not is_datetime(pyarrow.timestamp("us", tz="UTC")) + assert not is_datetime(pyarrow.timestamp("ns", tz="UTC")) assert not is_datetime(pyarrow.string()) @@ -386,20 +387,15 @@ def test_bq_to_arrow_data_type_w_struct_unknown_subfield(module_under_test): ), ("BOOLEAN", [True, None, False, None]), ("BOOL", [False, None, True, None]), - # TODO: Once https://issues.apache.org/jira/browse/ARROW-5450 is - # resolved, test with TIMESTAMP column. Conversion from pyarrow - # TimestampArray to list of Python objects fails with OverflowError: - # Python int too large to convert to C long. - # - # ( - # "TIMESTAMP", - # [ - # datetime.datetime(1, 1, 1, 0, 0, 0, tzinfo=pytz.utc), - # None, - # datetime.datetime(9999, 12, 31, 23, 59, 59, 999999, tzinfo=pytz.utc), - # datetime.datetime(1970, 1, 1, 0, 0, 0, tzinfo=pytz.utc), - # ], - # ), + ( + "TIMESTAMP", + [ + datetime.datetime(1, 1, 1, 0, 0, 0, tzinfo=pytz.utc), + None, + datetime.datetime(9999, 12, 31, 23, 59, 59, 999999, tzinfo=pytz.utc), + datetime.datetime(1970, 1, 1, 0, 0, 0, tzinfo=pytz.utc), + ], + ), ( "DATE", [ @@ -418,20 +414,16 @@ def test_bq_to_arrow_data_type_w_struct_unknown_subfield(module_under_test): datetime.time(12, 0, 0), ], ), - # TODO: Once https://issues.apache.org/jira/browse/ARROW-5450 is - # resolved, test with DATETIME column. Conversion from pyarrow - # TimestampArray to list of Python objects fails with OverflowError: - # Python int too large to convert to C long. - # - # ( - # "DATETIME", - # [ - # datetime.datetime(1, 1, 1, 0, 0, 0), - # None, - # datetime.datetime(9999, 12, 31, 23, 59, 59, 999999), - # datetime.datetime(1970, 1, 1, 0, 0, 0), - # ], - # ), + ( + "DATETIME", + [ + datetime.datetime(1, 1, 1, 0, 0, 0), + datetime.datetime(9999, 12, 31, 23, 59, 59, 999999), + None, + datetime.datetime(1970, 1, 1, 0, 0, 0), + datetime.datetime(1999, 3, 14, 15, 9, 26, 535898), + ], + ), ( "GEOGRAPHY", [ @@ -453,6 +445,42 @@ def test_bq_to_arrow_array_w_nullable_scalars(module_under_test, bq_type, rows): assert rows == roundtrip +@pytest.mark.parametrize( + "bq_type,rows", + [ + ( + "TIMESTAMP", + [ + "1971-09-28T23:59:07+00:00", + "1975-04-09T23:59:02+00:00", + "1979-08-17T23:59:05+00:00", + "NaT", + "1983-05-09T13:00:00+00:00", + ], + ), + ( + "DATETIME", + [ + "1971-09-28T23:59:07", + "1975-04-09T23:59:02", + "1979-08-17T23:59:05", + "NaT", + "1983-05-09T13:00:00", + ], + ), + ], +) +@pytest.mark.skipif(pandas is None, reason="Requires `pandas`") +@pytest.mark.skipif(isinstance(pyarrow, mock.Mock), reason="Requires `pyarrow`") +def test_bq_to_arrow_array_w_pandas_timestamp(module_under_test, bq_type, rows): + rows = [pandas.Timestamp(row) for row in rows] + series = pandas.Series(rows) + bq_field = schema.SchemaField("field_name", bq_type) + arrow_array = module_under_test.bq_to_arrow_array(series, bq_field) + roundtrip = arrow_array.to_pandas() + assert series.equals(roundtrip) + + @pytest.mark.skipif(pandas is None, reason="Requires `pandas`") @pytest.mark.skipif(isinstance(pyarrow, mock.Mock), reason="Requires `pyarrow`") def test_bq_to_arrow_array_w_arrays(module_under_test): diff --git a/packages/google-cloud-bigquery/tests/unit/test_client.py b/packages/google-cloud-bigquery/tests/unit/test_client.py index 8d55e59599c4..b87ea52a057d 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_client.py +++ b/packages/google-cloud-bigquery/tests/unit/test_client.py @@ -6425,7 +6425,7 @@ def test_load_table_from_dataframe_w_automatic_schema(self): SchemaField("int_col", "INTEGER"), SchemaField("float_col", "FLOAT"), SchemaField("bool_col", "BOOLEAN"), - SchemaField("dt_col", "DATETIME"), + SchemaField("dt_col", "TIMESTAMP"), SchemaField("ts_col", "TIMESTAMP"), ) @@ -6671,7 +6671,7 @@ def test_load_table_from_dataframe_w_partial_schema(self): SchemaField("int_as_float_col", "INTEGER"), SchemaField("float_col", "FLOAT"), SchemaField("bool_col", "BOOLEAN"), - SchemaField("dt_col", "DATETIME"), + SchemaField("dt_col", "TIMESTAMP"), SchemaField("ts_col", "TIMESTAMP"), SchemaField("string_col", "STRING"), SchemaField("bytes_col", "BYTES"), From 3b3cca71099693e7399f0af6e315dcec74c207a5 Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Wed, 15 Jan 2020 08:42:51 -0600 Subject: [PATCH 0732/2016] refactor(bigquery): `to_dataframe` uses faster `to_arrow` + `to_pandas` when `pyarrow` is available (#10027) * fix(bigquery): to_dataframe uses 2x faster to_arrow + to_pandas when pyarrow is available * fix: skip to_arrow tests when pyarrow is missing * test: update test to work around numpy array encoding of nested arrays * test: add test for tabledata.list with no rows * test: boost test coverage * chore: fix lint --- .../google/cloud/bigquery/table.py | 63 ++-- .../google-cloud-bigquery/tests/system.py | 7 +- .../tests/unit/test_table.py | 278 +++++++++++++++--- 3 files changed, 286 insertions(+), 62 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py index 585676490c38..555f529f3670 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py @@ -1519,6 +1519,17 @@ def to_arrow( if pyarrow is None: raise ValueError(_NO_PYARROW_ERROR) + if ( + bqstorage_client or create_bqstorage_client + ) and self.max_results is not None: + warnings.warn( + "Cannot use bqstorage_client if max_results is set, " + "reverting to fetching data with the tabledata.list endpoint.", + stacklevel=2, + ) + create_bqstorage_client = False + bqstorage_client = None + owns_bqstorage_client = False if not bqstorage_client and create_bqstorage_client: owns_bqstorage_client = True @@ -1707,33 +1718,39 @@ def to_dataframe( create_bqstorage_client = False bqstorage_client = None - owns_bqstorage_client = False - if not bqstorage_client and create_bqstorage_client: - owns_bqstorage_client = True - bqstorage_client = self.client._create_bqstorage_client() - - try: - progress_bar = self._get_progress_bar(progress_bar_type) + if pyarrow is not None: + # If pyarrow is available, calling to_arrow, then converting to a + # pandas dataframe is about 2x faster. This is because pandas.concat is + # rarely no-copy, whereas pyarrow.Table.from_batches + to_pandas is + # usually no-copy. + record_batch = self.to_arrow( + progress_bar_type=progress_bar_type, + bqstorage_client=bqstorage_client, + create_bqstorage_client=create_bqstorage_client, + ) + df = record_batch.to_pandas() + for column in dtypes: + df[column] = pandas.Series(df[column], dtype=dtypes[column]) + return df - frames = [] - for frame in self.to_dataframe_iterable( - bqstorage_client=bqstorage_client, dtypes=dtypes - ): - frames.append(frame) + # The bqstorage_client is only used if pyarrow is available, so the + # rest of this method only needs to account for tabledata.list. + progress_bar = self._get_progress_bar(progress_bar_type) - if progress_bar is not None: - # In some cases, the number of total rows is not populated - # until the first page of rows is fetched. Update the - # progress bar's total to keep an accurate count. - progress_bar.total = progress_bar.total or self.total_rows - progress_bar.update(len(frame)) + frames = [] + for frame in self.to_dataframe_iterable(dtypes=dtypes): + frames.append(frame) if progress_bar is not None: - # Indicate that the download has finished. - progress_bar.close() - finally: - if owns_bqstorage_client: - bqstorage_client.transport.channel.close() + # In some cases, the number of total rows is not populated + # until the first page of rows is fetched. Update the + # progress bar's total to keep an accurate count. + progress_bar.total = progress_bar.total or self.total_rows + progress_bar.update(len(frame)) + + if progress_bar is not None: + # Indicate that the download has finished. + progress_bar.close() # Avoid concatting an empty list. if not frames: diff --git a/packages/google-cloud-bigquery/tests/system.py b/packages/google-cloud-bigquery/tests/system.py index cd72352c29fd..4a1c032717f5 100644 --- a/packages/google-cloud-bigquery/tests/system.py +++ b/packages/google-cloud-bigquery/tests/system.py @@ -2372,7 +2372,12 @@ def test_nested_table_to_dataframe(self): row = df.iloc[0] # verify the row content self.assertEqual(row["string_col"], "Some value") - self.assertEqual(row["record_col"], record) + expected_keys = tuple(sorted(record.keys())) + row_keys = tuple(sorted(row["record_col"].keys())) + self.assertEqual(row_keys, expected_keys) + # Can't compare numpy arrays, which pyarrow encodes the embedded + # repeated column to, so convert to list. + self.assertEqual(list(row["record_col"]["nested_repeated"]), [0, 1, 2]) # verify that nested data can be accessed with indices/keys self.assertEqual(row["record_col"]["nested_repeated"][0], 0) self.assertEqual( diff --git a/packages/google-cloud-bigquery/tests/unit/test_table.py b/packages/google-cloud-bigquery/tests/unit/test_table.py index 73fe1c10d49b..6e8958cdc46c 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_table.py +++ b/packages/google-cloud-bigquery/tests/unit/test_table.py @@ -1809,6 +1809,46 @@ def test_to_arrow_w_empty_table(self): self.assertEqual(child_field.type.value_type[0].name, "name") self.assertEqual(child_field.type.value_type[1].name, "age") + @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") + @unittest.skipIf( + bigquery_storage_v1beta1 is None, "Requires `google-cloud-bigquery-storage`" + ) + def test_to_arrow_max_results_w_create_bqstorage_warning(self): + from google.cloud.bigquery.schema import SchemaField + + schema = [ + SchemaField("name", "STRING", mode="REQUIRED"), + SchemaField("age", "INTEGER", mode="REQUIRED"), + ] + rows = [ + {"f": [{"v": "Phred Phlyntstone"}, {"v": "32"}]}, + {"f": [{"v": "Bharney Rhubble"}, {"v": "33"}]}, + ] + path = "/foo" + api_request = mock.Mock(return_value={"rows": rows}) + mock_client = _mock_client() + + row_iterator = self._make_one( + client=mock_client, + api_request=api_request, + path=path, + schema=schema, + max_results=42, + ) + + with warnings.catch_warnings(record=True) as warned: + row_iterator.to_arrow(create_bqstorage_client=True) + + matches = [ + warning + for warning in warned + if warning.category is UserWarning + and "cannot use bqstorage_client" in str(warning).lower() + and "tabledata.list" in str(warning) + ] + self.assertEqual(len(matches), 1, msg="User warning was not emitted.") + mock_client._create_bqstorage_client.assert_not_called() + @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") @unittest.skipIf( bigquery_storage_v1beta1 is None, "Requires `google-cloud-bigquery-storage`" @@ -1856,7 +1896,7 @@ def test_to_arrow_w_bqstorage(self): mock_page = mock.create_autospec(reader.ReadRowsPage) mock_page.to_arrow.return_value = pyarrow.RecordBatch.from_arrays( - page_items, arrow_schema + page_items, schema=arrow_schema ) mock_pages = (mock_page, mock_page, mock_page) type(mock_rows).pages = mock.PropertyMock(return_value=mock_pages) @@ -2057,6 +2097,80 @@ def test_to_dataframe_iterable(self): self.assertEqual(df_2["name"][0], "Sven") self.assertEqual(df_2["age"][0], 33) + @unittest.skipIf(pandas is None, "Requires `pandas`") + @unittest.skipIf( + bigquery_storage_v1beta1 is None, "Requires `google-cloud-bigquery-storage`" + ) + @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") + def test_to_dataframe_iterable_w_bqstorage(self): + from google.cloud.bigquery import schema + from google.cloud.bigquery import table as mut + from google.cloud.bigquery_storage_v1beta1 import reader + + arrow_fields = [ + pyarrow.field("colA", pyarrow.int64()), + # Not alphabetical to test column order. + pyarrow.field("colC", pyarrow.float64()), + pyarrow.field("colB", pyarrow.utf8()), + ] + arrow_schema = pyarrow.schema(arrow_fields) + + bqstorage_client = mock.create_autospec( + bigquery_storage_v1beta1.BigQueryStorageClient + ) + bqstorage_client.transport = mock.create_autospec( + big_query_storage_grpc_transport.BigQueryStorageGrpcTransport + ) + streams = [ + # Use two streams we want to check frames are read from each stream. + {"name": "/projects/proj/dataset/dset/tables/tbl/streams/1234"}, + {"name": "/projects/proj/dataset/dset/tables/tbl/streams/5678"}, + ] + session = bigquery_storage_v1beta1.types.ReadSession( + streams=streams, + arrow_schema={"serialized_schema": arrow_schema.serialize().to_pybytes()}, + ) + bqstorage_client.create_read_session.return_value = session + + mock_rowstream = mock.create_autospec(reader.ReadRowsStream) + bqstorage_client.read_rows.return_value = mock_rowstream + + mock_rows = mock.create_autospec(reader.ReadRowsIterable) + mock_rowstream.rows.return_value = mock_rows + page_dataframe = pandas.DataFrame( + {"colA": [1, -1], "colC": [2.0, 4.0], "colB": ["abc", "def"]}, + ) + mock_page = mock.create_autospec(reader.ReadRowsPage) + mock_page.to_dataframe.return_value = page_dataframe + mock_pages = (mock_page, mock_page, mock_page) + type(mock_rows).pages = mock.PropertyMock(return_value=mock_pages) + + schema = [ + schema.SchemaField("colA", "IGNORED"), + schema.SchemaField("colC", "IGNORED"), + schema.SchemaField("colB", "IGNORED"), + ] + + row_iterator = mut.RowIterator( + _mock_client(), + None, # api_request: ignored + None, # path: ignored + schema, + table=mut.TableReference.from_string("proj.dset.tbl"), + selected_fields=schema, + ) + + got = list( + row_iterator.to_dataframe_iterable(bqstorage_client=bqstorage_client) + ) + + # Have expected number of rows? + total_pages = len(streams) * len(mock_pages) + self.assertEqual(len(got), total_pages) + + # Don't close the client if it was passed in. + bqstorage_client.transport.channel.close.assert_not_called() + @mock.patch("google.cloud.bigquery.table.pandas", new=None) def test_to_dataframe_iterable_error_if_pandas_is_none(self): from google.cloud.bigquery.schema import SchemaField @@ -2140,6 +2254,45 @@ def test_to_dataframe_progress_bar( progress_bar_mock().close.assert_called_once() self.assertEqual(len(df), 4) + @unittest.skipIf(pandas is None, "Requires `pandas`") + @unittest.skipIf(tqdm is None, "Requires `tqdm`") + @mock.patch("tqdm.tqdm_gui") + @mock.patch("tqdm.tqdm_notebook") + @mock.patch("tqdm.tqdm") + def test_to_dataframe_progress_bar_wo_pyarrow( + self, tqdm_mock, tqdm_notebook_mock, tqdm_gui_mock + ): + from google.cloud.bigquery.schema import SchemaField + + schema = [ + SchemaField("name", "STRING", mode="REQUIRED"), + SchemaField("age", "INTEGER", mode="REQUIRED"), + ] + rows = [ + {"f": [{"v": "Phred Phlyntstone"}, {"v": "32"}]}, + {"f": [{"v": "Bharney Rhubble"}, {"v": "33"}]}, + {"f": [{"v": "Wylma Phlyntstone"}, {"v": "29"}]}, + {"f": [{"v": "Bhettye Rhubble"}, {"v": "27"}]}, + ] + path = "/foo" + api_request = mock.Mock(return_value={"rows": rows}) + + progress_bars = ( + ("tqdm", tqdm_mock), + ("tqdm_notebook", tqdm_notebook_mock), + ("tqdm_gui", tqdm_gui_mock), + ) + + for progress_bar_type, progress_bar_mock in progress_bars: + row_iterator = self._make_one(_mock_client(), api_request, path, schema) + with mock.patch("google.cloud.bigquery.table.pyarrow", None): + df = row_iterator.to_dataframe(progress_bar_type=progress_bar_type) + + progress_bar_mock.assert_called() + progress_bar_mock().update.assert_called() + progress_bar_mock().close.assert_called_once() + self.assertEqual(len(df), 4) + @unittest.skipIf(pandas is None, "Requires `pandas`") @mock.patch("google.cloud.bigquery.table.tqdm", new=None) def test_to_dataframe_no_tqdm_no_progress_bar(self): @@ -2246,6 +2399,47 @@ def test_to_dataframe_w_empty_results(self): self.assertEqual(len(df), 0) # verify the number of rows self.assertEqual(list(df), ["name", "age"]) # verify the column names + @unittest.skipIf(pandas is None, "Requires `pandas`") + def test_to_dataframe_w_empty_results_wo_pyarrow(self): + from google.cloud.bigquery.schema import SchemaField + + with mock.patch("google.cloud.bigquery.table.pyarrow", None): + schema = [ + SchemaField("name", "STRING", mode="REQUIRED"), + SchemaField("age", "INTEGER", mode="REQUIRED"), + ] + api_request = mock.Mock(return_value={"rows": []}) + row_iterator = self._make_one(_mock_client(), api_request, schema=schema) + + df = row_iterator.to_dataframe() + + self.assertIsInstance(df, pandas.DataFrame) + self.assertEqual(len(df), 0) # verify the number of rows + self.assertEqual(list(df), ["name", "age"]) # verify the column names + + @unittest.skipIf(pandas is None, "Requires `pandas`") + def test_to_dataframe_w_no_results_wo_pyarrow(self): + from google.cloud.bigquery.schema import SchemaField + + with mock.patch("google.cloud.bigquery.table.pyarrow", None): + schema = [ + SchemaField("name", "STRING", mode="REQUIRED"), + SchemaField("age", "INTEGER", mode="REQUIRED"), + ] + api_request = mock.Mock(return_value={"rows": []}) + row_iterator = self._make_one(_mock_client(), api_request, schema=schema) + + def empty_iterable(dtypes=None): + return [] + + row_iterator.to_dataframe_iterable = empty_iterable + + df = row_iterator.to_dataframe() + + self.assertIsInstance(df, pandas.DataFrame) + self.assertEqual(len(df), 0) # verify the number of rows + self.assertEqual(list(df), ["name", "age"]) # verify the column names + @unittest.skipIf(pandas is None, "Requires `pandas`") def test_to_dataframe_logs_tabledata_list(self): from google.cloud.bigquery.table import Table @@ -2278,9 +2472,9 @@ def test_to_dataframe_w_various_types_nullable(self): ] row_data = [ [None, None, None, None, None, None], - ["1.4338368E9", "420", "1.1", "Cash", "true", "1999-12-01"], - ["1.3878117E9", "2580", "17.7", "Cash", "false", "1953-06-14"], - ["1.3855653E9", "2280", "4.4", "Credit", "true", "1981-11-04"], + ["1.4338368E9", "420", "1.1", u"Cash", "true", "1999-12-01"], + ["1.3878117E9", "2580", "17.7", u"Cash", "false", "1953-06-14"], + ["1.3855653E9", "2280", "4.4", u"Credit", "true", "1981-11-04"], ] rows = [{"f": [{"v": field} for field in row]} for row in row_data] path = "/foo" @@ -2300,7 +2494,7 @@ def test_to_dataframe_w_various_types_nullable(self): else: self.assertIsInstance(row.start_timestamp, pandas.Timestamp) self.assertIsInstance(row.seconds, float) - self.assertIsInstance(row.payment_type, str) + self.assertIsInstance(row.payment_type, six.string_types) self.assertIsInstance(row.complete, bool) self.assertIsInstance(row.date, datetime.date) @@ -2318,9 +2512,9 @@ def test_to_dataframe_column_dtypes(self): SchemaField("date", "DATE"), ] row_data = [ - ["1.4338368E9", "420", "1.1", "1.77", "Cash", "true", "1999-12-01"], - ["1.3878117E9", "2580", "17.7", "28.5", "Cash", "false", "1953-06-14"], - ["1.3855653E9", "2280", "4.4", "7.1", "Credit", "true", "1981-11-04"], + ["1.4338368E9", "420", "1.1", "1.77", u"Cash", "true", "1999-12-01"], + ["1.3878117E9", "2580", "17.7", "28.5", u"Cash", "false", "1953-06-14"], + ["1.3855653E9", "2280", "4.4", "7.1", u"Credit", "true", "1981-11-04"], ] rows = [{"f": [{"v": field} for field in row]} for row in row_data] path = "/foo" @@ -2486,9 +2680,9 @@ def test_to_dataframe_w_bqstorage_no_streams(self): api_request=None, path=None, schema=[ - schema.SchemaField("colA", "IGNORED"), - schema.SchemaField("colC", "IGNORED"), - schema.SchemaField("colB", "IGNORED"), + schema.SchemaField("colA", "INTEGER"), + schema.SchemaField("colC", "FLOAT"), + schema.SchemaField("colB", "STRING"), ], table=mut.TableReference.from_string("proj.dset.tbl"), ) @@ -2560,10 +2754,11 @@ def test_to_dataframe_w_bqstorage_empty_streams(self): mock_pages = mock.PropertyMock(return_value=()) type(mock_rows).pages = mock_pages + # Schema is required when there are no record batches in the stream. schema = [ - schema.SchemaField("colA", "IGNORED"), - schema.SchemaField("colC", "IGNORED"), - schema.SchemaField("colB", "IGNORED"), + schema.SchemaField("colA", "INTEGER"), + schema.SchemaField("colC", "FLOAT"), + schema.SchemaField("colB", "STRING"), ] row_iterator = mut.RowIterator( @@ -2622,14 +2817,15 @@ def test_to_dataframe_w_bqstorage_nonempty(self): mock_rows = mock.create_autospec(reader.ReadRowsIterable) mock_rowstream.rows.return_value = mock_rows page_items = [ - {"colA": 1, "colB": "abc", "colC": 2.0}, - {"colA": -1, "colB": "def", "colC": 4.0}, + pyarrow.array([1, -1]), + pyarrow.array([2.0, 4.0]), + pyarrow.array(["abc", "def"]), ] - - mock_page = mock.create_autospec(reader.ReadRowsPage) - mock_page.to_dataframe.return_value = pandas.DataFrame( - page_items, columns=["colA", "colB", "colC"] + page_record_batch = pyarrow.RecordBatch.from_arrays( + page_items, schema=arrow_schema ) + mock_page = mock.create_autospec(reader.ReadRowsPage) + mock_page.to_arrow.return_value = page_record_batch mock_pages = (mock_page, mock_page, mock_page) type(mock_rows).pages = mock.PropertyMock(return_value=mock_pages) @@ -2656,7 +2852,7 @@ def test_to_dataframe_w_bqstorage_nonempty(self): # Have expected number of rows? total_pages = len(streams) * len(mock_pages) - total_rows = len(page_items) * total_pages + total_rows = len(page_items[0]) * total_pages self.assertEqual(len(got.index), total_rows) # Don't close the client if it was passed in. @@ -2695,11 +2891,14 @@ def test_to_dataframe_w_bqstorage_multiple_streams_return_unique_index(self): mock_rows = mock.create_autospec(reader.ReadRowsIterable) mock_rowstream.rows.return_value = mock_rows - page_data_frame = pandas.DataFrame( - [{"colA": 1}, {"colA": -1}], columns=["colA"] + page_items = [ + pyarrow.array([1, -1]), + ] + page_record_batch = pyarrow.RecordBatch.from_arrays( + page_items, schema=arrow_schema ) mock_page = mock.create_autospec(reader.ReadRowsPage) - mock_page.to_dataframe.return_value = page_data_frame + mock_page.to_arrow.return_value = page_record_batch mock_pages = (mock_page, mock_page, mock_page) type(mock_rows).pages = mock.PropertyMock(return_value=mock_pages) @@ -2711,7 +2910,7 @@ def test_to_dataframe_w_bqstorage_multiple_streams_return_unique_index(self): self.assertEqual(list(got), ["colA"]) total_pages = len(streams) * len(mock_pages) - total_rows = len(page_data_frame) * total_pages + total_rows = len(page_items[0]) * total_pages self.assertEqual(len(got.index), total_rows) self.assertTrue(got.index.is_unique) @@ -2757,14 +2956,15 @@ def test_to_dataframe_w_bqstorage_updates_progress_bar(self, tqdm_mock): page_items = [-1, 0, 1] type(mock_page).num_items = mock.PropertyMock(return_value=len(page_items)) - def blocking_to_dataframe(*args, **kwargs): - # Sleep for longer than the waiting interval. This ensures the - # progress_queue gets written to more than once because it gives - # the worker->progress updater time to sum intermediate updates. + def blocking_to_arrow(*args, **kwargs): + # Sleep for longer than the waiting interval so that we know we're + # only reading one page per loop at most. time.sleep(2 * mut._PROGRESS_INTERVAL) - return pandas.DataFrame({"testcol": page_items}) + return pyarrow.RecordBatch.from_arrays( + [pyarrow.array(page_items)], schema=arrow_schema + ) - mock_page.to_dataframe.side_effect = blocking_to_dataframe + mock_page.to_arrow.side_effect = blocking_to_arrow mock_pages = (mock_page, mock_page, mock_page, mock_page, mock_page) type(mock_rows).pages = mock.PropertyMock(return_value=mock_pages) @@ -2790,7 +2990,7 @@ def blocking_to_dataframe(*args, **kwargs): progress_updates = [ args[0] for args, kwargs in tqdm_mock().update.call_args_list ] - # Should have sent >1 update due to delay in blocking_to_dataframe. + # Should have sent >1 update due to delay in blocking_to_arrow. self.assertGreater(len(progress_updates), 1) self.assertEqual(sum(progress_updates), expected_total_rows) tqdm_mock().close.assert_called_once() @@ -2830,18 +3030,20 @@ def test_to_dataframe_w_bqstorage_exits_on_keyboardinterrupt(self): arrow_schema={"serialized_schema": arrow_schema.serialize().to_pybytes()}, ) bqstorage_client.create_read_session.return_value = session + page_items = [ + pyarrow.array([1, -1]), + pyarrow.array([2.0, 4.0]), + pyarrow.array(["abc", "def"]), + ] - def blocking_to_dataframe(*args, **kwargs): + def blocking_to_arrow(*args, **kwargs): # Sleep for longer than the waiting interval so that we know we're # only reading one page per loop at most. time.sleep(2 * mut._PROGRESS_INTERVAL) - return pandas.DataFrame( - {"colA": [1, -1], "colB": ["abc", "def"], "colC": [2.0, 4.0]}, - columns=["colA", "colB", "colC"], - ) + return pyarrow.RecordBatch.from_arrays(page_items, schema=arrow_schema) mock_page = mock.create_autospec(reader.ReadRowsPage) - mock_page.to_dataframe.side_effect = blocking_to_dataframe + mock_page.to_arrow.side_effect = blocking_to_arrow mock_rows = mock.create_autospec(reader.ReadRowsIterable) mock_pages = mock.PropertyMock(return_value=(mock_page, mock_page, mock_page)) type(mock_rows).pages = mock_pages From 664cd4a911b4deb910a34b841c0aa3fd1382edbc Mon Sep 17 00:00:00 2001 From: Gurov Ilya Date: Fri, 17 Jan 2020 18:33:05 +0300 Subject: [PATCH 0733/2016] feat(bigquery): check `json_rows` arg type in `insert_rows_json()` (#10162) * feat(bigquery): check json_rows arg type in insert_rows_json() * Spelling --- .../google/cloud/bigquery/client.py | 2 ++ .../tests/unit/test_client.py | 25 +++++++++++++++++++ 2 files changed, 27 insertions(+) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py index 34dceaeecd4a..d37d8ac19e21 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py @@ -2506,6 +2506,8 @@ def insert_rows_json( identifies the row, and the "errors" key contains a list of the mappings describing one or more problems with the row. """ + if not isinstance(json_rows, collections_abc.Sequence): + raise TypeError("json_rows argument should be a sequence of dicts") # Convert table to just a reference because unlike insert_rows, # insert_rows_json doesn't need the table schema. It's not doing any # type conversions. diff --git a/packages/google-cloud-bigquery/tests/unit/test_client.py b/packages/google-cloud-bigquery/tests/unit/test_client.py index b87ea52a057d..cce4bc532074 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_client.py +++ b/packages/google-cloud-bigquery/tests/unit/test_client.py @@ -5384,6 +5384,31 @@ def test_insert_rows_json_w_explicit_none_insert_ids(self): timeout=None, ) + def test_insert_rows_w_wrong_arg(self): + from google.cloud.bigquery.dataset import DatasetReference + from google.cloud.bigquery.schema import SchemaField + from google.cloud.bigquery.table import Table + + PROJECT = "PROJECT" + DS_ID = "DS_ID" + TABLE_ID = "TABLE_ID" + ROW = {"full_name": "Bhettye Rhubble", "age": "27", "joined": None} + + creds = _make_credentials() + client = self._make_one(project=PROJECT, credentials=creds, _http=object()) + client._connection = make_connection({}) + + table_ref = DatasetReference(PROJECT, DS_ID).table(TABLE_ID) + schema = [ + SchemaField("full_name", "STRING", mode="REQUIRED"), + SchemaField("age", "INTEGER", mode="REQUIRED"), + SchemaField("joined", "TIMESTAMP", mode="NULLABLE"), + ] + table = Table(table_ref, schema=schema) + + with self.assertRaises(TypeError): + client.insert_rows_json(table, ROW) + def test_list_partitions(self): from google.cloud.bigquery.table import Table From a1019daf0d4d9bdcc2789735b5132618b13a3f70 Mon Sep 17 00:00:00 2001 From: Gurov Ilya Date: Thu, 23 Jan 2020 19:54:09 +0300 Subject: [PATCH 0734/2016] feat(bigquery): check `rows` argument type in `insert_rows()` (#10174) * feat(bigquery): check rows arg type in insert_rows() * add class marking * add Iterator into if statement to pass islices * add Iterator into if statement to pass islices * black reformat --- .../google/cloud/bigquery/client.py | 16 ++++++++++++---- .../tests/unit/test_client.py | 15 +++++++++++++-- 2 files changed, 25 insertions(+), 6 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py index d37d8ac19e21..83e6bf8045ed 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py @@ -1220,7 +1220,7 @@ def delete_table( raise def _get_query_results( - self, job_id, retry, project=None, timeout_ms=None, location=None, timeout=None, + self, job_id, retry, project=None, timeout_ms=None, location=None, timeout=None ): """Get the query results object for a query job. @@ -2355,7 +2355,7 @@ def insert_rows(self, table, rows, selected_fields=None, **kwargs): str, \ ]): The destination table for the row data, or a reference to it. - rows (Union[Sequence[Tuple], Sequence[dict]]): + rows (Union[Sequence[Tuple], Sequence[Dict]]): Row data to be inserted. If a list of tuples is given, each tuple should contain data for each schema field on the current table and in the same order as the schema fields. If @@ -2376,8 +2376,11 @@ def insert_rows(self, table, rows, selected_fields=None, **kwargs): the mappings describing one or more problems with the row. Raises: - ValueError: if table's schema is not set + ValueError: if table's schema is not set or `rows` is not a `Sequence`. """ + if not isinstance(rows, (collections_abc.Sequence, collections_abc.Iterator)): + raise TypeError("rows argument should be a sequence of dicts or tuples") + table = _table_arg_to_table(table, default_project=self.project) if not isinstance(table, Table): @@ -2505,8 +2508,13 @@ def insert_rows_json( One mapping per row with insert errors: the "index" key identifies the row, and the "errors" key contains a list of the mappings describing one or more problems with the row. + + Raises: + TypeError: if `json_rows` is not a `Sequence`. """ - if not isinstance(json_rows, collections_abc.Sequence): + if not isinstance( + json_rows, (collections_abc.Sequence, collections_abc.Iterator) + ): raise TypeError("json_rows argument should be a sequence of dicts") # Convert table to just a reference because unlike insert_rows, # insert_rows_json doesn't need the table schema. It's not doing any diff --git a/packages/google-cloud-bigquery/tests/unit/test_client.py b/packages/google-cloud-bigquery/tests/unit/test_client.py index cce4bc532074..6b40d8a020a4 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_client.py +++ b/packages/google-cloud-bigquery/tests/unit/test_client.py @@ -5048,6 +5048,7 @@ def _row_data(row): ) def test_insert_rows_errors(self): + from google.cloud.bigquery.schema import SchemaField from google.cloud.bigquery.table import Table ROWS = [ @@ -5058,6 +5059,7 @@ def test_insert_rows_errors(self): ] creds = _make_credentials() http = object() + client = self._make_one(project=self.PROJECT, credentials=creds, _http=http) # table ref with no selected fields @@ -5068,10 +5070,19 @@ def test_insert_rows_errors(self): with self.assertRaises(ValueError): client.insert_rows(Table(self.TABLE_REF), ROWS) - # neither Table nor tableReference + # neither Table nor TableReference with self.assertRaises(TypeError): client.insert_rows(1, ROWS) + schema = [ + SchemaField("full_name", "STRING", mode="REQUIRED"), + ] + table = Table(self.TABLE_REF, schema=schema) + + # rows is just a dict + with self.assertRaises(TypeError): + client.insert_rows(table, {"full_name": "value"}) + def test_insert_rows_w_numeric(self): from google.cloud.bigquery import table from google.cloud.bigquery.schema import SchemaField @@ -5853,7 +5864,7 @@ def test_list_rows_error(self): http = object() client = self._make_one(project=self.PROJECT, credentials=creds, _http=http) - # neither Table nor tableReference + # neither Table nor TableReference with self.assertRaises(TypeError): client.list_rows(1) From 87cbc3a3c3a155f4fe64ed0df1a3aedfca90ffcf Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Mon, 27 Jan 2020 01:31:47 -0600 Subject: [PATCH 0735/2016] deprecate(bigquery): deprecate `client.dataset()` in favor of `DatasetReference` (#7753) * deprecate(bigquery): deprecate `client.dataset()` in favor of DatasetReference Now that all client methods that take a `DatasetReference` or `TableReference` also take a string, the `client.dataset()` method is unnecessary and confusing. * fix: remove unnecessary textwrap * Update bigquery/google/cloud/bigquery/client.py Co-Authored-By: Peter Lamut * docs: use sphinx deprecation directive Co-authored-by: Peter Lamut --- .../google/cloud/bigquery/client.py | 26 +++- .../google/cloud/bigquery/magics.py | 3 +- .../tests/unit/test_client.py | 126 +++++++++++------- 3 files changed, 104 insertions(+), 51 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py index 83e6bf8045ed..5da12990b390 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py @@ -354,7 +354,20 @@ def list_datasets( ) def dataset(self, dataset_id, project=None): - """Construct a reference to a dataset. + """Deprecated: Construct a reference to a dataset. + + .. deprecated:: 1.24.0 + Construct a + :class:`~google.cloud.bigquery.dataset.DatasetReference` using its + constructor or use a string where previously a reference object + was used. + + As of ``google-cloud-bigquery`` version 1.7.0, all client methods + that take a + :class:`~google.cloud.bigquery.dataset.DatasetReference` or + :class:`~google.cloud.bigquery.table.TableReference` also take a + string in standard SQL format, e.g. ``project.dataset_id`` or + ``project.dataset_id.table_id``. Args: dataset_id (str): ID of the dataset. @@ -370,6 +383,13 @@ def dataset(self, dataset_id, project=None): if project is None: project = self.project + warnings.warn( + "Client.dataset is deprecated and will be removed in a future version. " + "Use a string like 'my_project.my_dataset' or a " + "cloud.google.bigquery.DatasetReference object, instead.", + PendingDeprecationWarning, + stacklevel=2, + ) return DatasetReference(project, dataset_id) def _create_bqstorage_client(self): @@ -419,7 +439,7 @@ def create_dataset( >>> from google.cloud import bigquery >>> client = bigquery.Client() - >>> dataset = bigquery.Dataset(client.dataset('my_dataset')) + >>> dataset = bigquery.Dataset('my_project.my_dataset') >>> dataset = client.create_dataset(dataset) """ @@ -2584,7 +2604,7 @@ def list_partitions(self, table, retry=DEFAULT_RETRY, timeout=None): ) as guard: meta_table = self.get_table( TableReference( - self.dataset(table.dataset_id, project=table.project), + DatasetReference(table.project, table.dataset_id), "%s$__PARTITIONS_SUMMARY__" % table.table_id, ), retry=retry, diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/magics.py b/packages/google-cloud-bigquery/google/cloud/bigquery/magics.py index 5ca6817a99c6..39608b19fcde 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/magics.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/magics.py @@ -153,6 +153,7 @@ from google.api_core.exceptions import NotFound import google.auth from google.cloud import bigquery +import google.cloud.bigquery.dataset from google.cloud.bigquery.dbapi import _helpers import six @@ -534,7 +535,7 @@ def _cell_magic(line, query): ) dataset_id, table_id = split job_config.allow_large_results = True - dataset_ref = client.dataset(dataset_id) + dataset_ref = bigquery.dataset.DatasetReference(client.project, dataset_id) destination_table_ref = dataset_ref.table(table_id) job_config.destination = destination_table_ref job_config.create_disposition = "CREATE_IF_NEEDED" diff --git a/packages/google-cloud-bigquery/tests/unit/test_client.py b/packages/google-cloud-bigquery/tests/unit/test_client.py index 6b40d8a020a4..952c876dff39 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_client.py +++ b/packages/google-cloud-bigquery/tests/unit/test_client.py @@ -554,7 +554,19 @@ def test_dataset_with_specified_project(self): creds = _make_credentials() http = object() client = self._make_one(project=self.PROJECT, credentials=creds, _http=http) - dataset = client.dataset(self.DS_ID, self.PROJECT) + catch_warnings = warnings.catch_warnings(record=True) + + with catch_warnings as warned: + dataset = client.dataset(self.DS_ID, self.PROJECT) + + matches = [ + warning + for warning in warned + if warning.category in (DeprecationWarning, PendingDeprecationWarning) + and "Client.dataset" in str(warning) + and "my_project.my_dataset" in str(warning) + ] + assert matches, "A Client.dataset deprecation warning was not raised." self.assertIsInstance(dataset, DatasetReference) self.assertEqual(dataset.dataset_id, self.DS_ID) self.assertEqual(dataset.project, self.PROJECT) @@ -565,7 +577,19 @@ def test_dataset_with_default_project(self): creds = _make_credentials() http = object() client = self._make_one(project=self.PROJECT, credentials=creds, _http=http) - dataset = client.dataset(self.DS_ID) + catch_warnings = warnings.catch_warnings(record=True) + + with catch_warnings as warned: + dataset = client.dataset(self.DS_ID) + + matches = [ + warning + for warning in warned + if warning.category in (DeprecationWarning, PendingDeprecationWarning) + and "Client.dataset" in str(warning) + and "my_project.my_dataset" in str(warning) + ] + assert matches, "A Client.dataset deprecation warning was not raised." self.assertIsInstance(dataset, DatasetReference) self.assertEqual(dataset.dataset_id, self.DS_ID) self.assertEqual(dataset.project, self.PROJECT) @@ -582,7 +606,7 @@ def test_get_dataset(self): "datasetReference": {"projectId": self.PROJECT, "datasetId": self.DS_ID}, } conn = client._connection = make_connection(resource) - dataset_ref = client.dataset(self.DS_ID) + dataset_ref = DatasetReference(self.PROJECT, self.DS_ID) dataset = client.get_dataset(dataset_ref, timeout=7.5) @@ -660,7 +684,7 @@ def test_create_dataset_minimal(self): client = self._make_one(project=self.PROJECT, credentials=creds) conn = client._connection = make_connection(RESOURCE) - ds_ref = client.dataset(self.DS_ID) + ds_ref = DatasetReference(self.PROJECT, self.DS_ID) before = Dataset(ds_ref) after = client.create_dataset(before, timeout=7.5) @@ -716,7 +740,7 @@ def test_create_dataset_w_attrs(self): AccessEntry(None, "view", VIEW), ] - ds_ref = client.dataset(self.DS_ID) + ds_ref = DatasetReference(self.PROJECT, self.DS_ID) before = Dataset(ds_ref) before.access_entries = entries before.description = DESCRIPTION @@ -772,7 +796,7 @@ def test_create_dataset_w_custom_property(self): client = self._make_one(project=self.PROJECT, credentials=creds) conn = client._connection = make_connection(resource) - ds_ref = client.dataset(self.DS_ID) + ds_ref = DatasetReference(self.PROJECT, self.DS_ID) before = Dataset(ds_ref) before._properties["newAlphaProperty"] = "unreleased property" @@ -812,7 +836,7 @@ def test_create_dataset_w_client_location_wo_dataset_location(self): ) conn = client._connection = make_connection(RESOURCE) - ds_ref = client.dataset(self.DS_ID) + ds_ref = DatasetReference(self.PROJECT, self.DS_ID) before = Dataset(ds_ref) after = client.create_dataset(before) @@ -854,7 +878,7 @@ def test_create_dataset_w_client_location_w_dataset_location(self): ) conn = client._connection = make_connection(RESOURCE) - ds_ref = client.dataset(self.DS_ID) + ds_ref = DatasetReference(self.PROJECT, self.DS_ID) before = Dataset(ds_ref) before.location = OTHER_LOCATION @@ -894,7 +918,7 @@ def test_create_dataset_w_reference(self): ) conn = client._connection = make_connection(resource) - dataset = client.create_dataset(client.dataset(self.DS_ID)) + dataset = client.create_dataset(DatasetReference(self.PROJECT, self.DS_ID)) self.assertEqual(dataset.dataset_id, self.DS_ID) self.assertEqual(dataset.project, self.PROJECT) @@ -1567,7 +1591,7 @@ def test_get_model(self): } conn = client._connection = make_connection(resource) - model_ref = client.dataset(self.DS_ID).model(self.MODEL_ID) + model_ref = DatasetReference(self.PROJECT, self.DS_ID).model(self.MODEL_ID) got = client.get_model(model_ref, timeout=7.5) conn.api_request.assert_called_once_with( @@ -1705,7 +1729,9 @@ def test_update_dataset_w_invalid_field(self): creds = _make_credentials() client = self._make_one(project=self.PROJECT, credentials=creds) with self.assertRaises(ValueError): - client.update_dataset(Dataset(client.dataset(self.DS_ID)), ["foo"]) + client.update_dataset( + Dataset("{}.{}".format(self.PROJECT, self.DS_ID)), ["foo"] + ) def test_update_dataset(self): from google.cloud.bigquery.dataset import Dataset, AccessEntry @@ -1730,7 +1756,7 @@ def test_update_dataset(self): creds = _make_credentials() client = self._make_one(project=self.PROJECT, credentials=creds) conn = client._connection = make_connection(RESOURCE, RESOURCE) - ds = Dataset(client.dataset(self.DS_ID)) + ds = Dataset(DatasetReference(self.PROJECT, self.DS_ID)) ds.description = DESCRIPTION ds.friendly_name = FRIENDLY_NAME ds.location = LOCATION @@ -1780,7 +1806,7 @@ def test_update_dataset_w_custom_property(self): creds = _make_credentials() client = self._make_one(project=self.PROJECT, credentials=creds) conn = client._connection = make_connection(resource) - dataset = Dataset(client.dataset(self.DS_ID)) + dataset = Dataset(DatasetReference(self.PROJECT, self.DS_ID)) dataset._properties["newAlphaProperty"] = "unreleased property" dataset = client.update_dataset(dataset, ["newAlphaProperty"]) @@ -2216,7 +2242,7 @@ def test_list_tables_empty_w_timeout(self): client = self._make_one(project=self.PROJECT, credentials=creds) conn = client._connection = make_connection({}) - dataset = client.dataset(self.DS_ID) + dataset = DatasetReference(self.PROJECT, self.DS_ID) iterator = client.list_tables(dataset, timeout=7.5) self.assertIs(iterator.dataset, dataset) page = six.next(iterator.pages) @@ -2277,7 +2303,7 @@ def test_list_models_defaults(self): creds = _make_credentials() client = self._make_one(project=self.PROJECT, credentials=creds) conn = client._connection = make_connection(DATA) - dataset = client.dataset(self.DS_ID) + dataset = DatasetReference(self.PROJECT, self.DS_ID) iterator = client.list_models(dataset) self.assertIs(iterator.dataset, dataset) @@ -2299,7 +2325,7 @@ def test_list_models_wrong_type(self): creds = _make_credentials() client = self._make_one(project=self.PROJECT, credentials=creds) with self.assertRaises(TypeError): - client.list_models(client.dataset(self.DS_ID).model("foo")) + client.list_models(DatasetReference(self.PROJECT, self.DS_ID).model("foo")) def test_list_routines_empty_w_timeout(self): creds = _make_credentials() @@ -2352,7 +2378,7 @@ def test_list_routines_defaults(self): creds = _make_credentials() client = self._make_one(project=project_id, credentials=creds) conn = client._connection = make_connection(resource) - dataset = client.dataset(dataset_id) + dataset = DatasetReference(client.project, dataset_id) iterator = client.list_routines(dataset) self.assertIs(iterator.dataset, dataset) @@ -2376,7 +2402,9 @@ def test_list_routines_wrong_type(self): creds = _make_credentials() client = self._make_one(project=self.PROJECT, credentials=creds) with self.assertRaises(TypeError): - client.list_routines(client.dataset(self.DS_ID).table("foo")) + client.list_routines( + DatasetReference(self.PROJECT, self.DS_ID).table("foo") + ) def test_list_tables_defaults(self): from google.cloud.bigquery.table import TableListItem @@ -2414,7 +2442,7 @@ def test_list_tables_defaults(self): creds = _make_credentials() client = self._make_one(project=self.PROJECT, credentials=creds) conn = client._connection = make_connection(DATA) - dataset = client.dataset(self.DS_ID) + dataset = DatasetReference(self.PROJECT, self.DS_ID) iterator = client.list_tables(dataset) self.assertIs(iterator.dataset, dataset) @@ -2468,7 +2496,7 @@ def test_list_tables_explicit(self): creds = _make_credentials() client = self._make_one(project=self.PROJECT, credentials=creds) conn = client._connection = make_connection(DATA) - dataset = client.dataset(self.DS_ID) + dataset = DatasetReference(self.PROJECT, self.DS_ID) iterator = client.list_tables( # Test with string for dataset ID. @@ -2499,7 +2527,7 @@ def test_list_tables_wrong_type(self): creds = _make_credentials() client = self._make_one(project=self.PROJECT, credentials=creds) with self.assertRaises(TypeError): - client.list_tables(client.dataset(self.DS_ID).table("foo")) + client.list_tables(DatasetReference(self.PROJECT, self.DS_ID).table("foo")) def test_delete_dataset(self): from google.cloud.bigquery.dataset import Dataset @@ -2524,7 +2552,7 @@ def test_delete_dataset_delete_contents(self): creds = _make_credentials() client = self._make_one(project=self.PROJECT, credentials=creds) conn = client._connection = make_connection({}, {}) - ds_ref = client.dataset(self.DS_ID) + ds_ref = DatasetReference(self.PROJECT, self.DS_ID) for arg in (ds_ref, Dataset(ds_ref)): client.delete_dataset(arg, delete_contents=True) conn.api_request.assert_called_with( @@ -2538,7 +2566,9 @@ def test_delete_dataset_wrong_type(self): creds = _make_credentials() client = self._make_one(project=self.PROJECT, credentials=creds) with self.assertRaises(TypeError): - client.delete_dataset(client.dataset(self.DS_ID).table("foo")) + client.delete_dataset( + DatasetReference(self.PROJECT, self.DS_ID).table("foo") + ) def test_delete_dataset_w_not_found_ok_false(self): path = "/projects/{}/datasets/{}".format(self.PROJECT, self.DS_ID) @@ -2585,7 +2615,7 @@ def test_delete_model(self): model_id = "{}.{}.{}".format(self.PROJECT, self.DS_ID, self.MODEL_ID) models = ( model_id, - client.dataset(self.DS_ID).model(self.MODEL_ID), + DatasetReference(self.PROJECT, self.DS_ID).model(self.MODEL_ID), Model(model_id), ) conn = client._connection = make_connection(*([{}] * len(models))) @@ -2600,7 +2630,7 @@ def test_delete_model_w_wrong_type(self): creds = _make_credentials() client = self._make_one(project=self.PROJECT, credentials=creds) with self.assertRaises(TypeError): - client.delete_model(client.dataset(self.DS_ID)) + client.delete_model(DatasetReference(self.PROJECT, self.DS_ID)) def test_delete_model_w_not_found_ok_false(self): path = "/projects/{}/datasets/{}/models/{}".format( @@ -2662,7 +2692,7 @@ def test_delete_routine_w_wrong_type(self): creds = _make_credentials() client = self._make_one(project=self.PROJECT, credentials=creds) with self.assertRaises(TypeError): - client.delete_routine(client.dataset(self.DS_ID)) + client.delete_routine(DatasetReference(self.PROJECT, self.DS_ID)) def test_delete_routine_w_not_found_ok_false(self): creds = _make_credentials() @@ -2731,7 +2761,7 @@ def test_delete_table_w_wrong_type(self): creds = _make_credentials() client = self._make_one(project=self.PROJECT, credentials=creds) with self.assertRaises(TypeError): - client.delete_table(client.dataset(self.DS_ID)) + client.delete_table(DatasetReference(self.PROJECT, self.DS_ID)) def test_delete_table_w_not_found_ok_false(self): path = "/projects/{}/datasets/{}/tables/{}".format( @@ -3237,7 +3267,7 @@ def test_load_table_from_uri(self): client = self._make_one(project=self.PROJECT, credentials=creds, _http=http) conn = client._connection = make_connection(RESOURCE) - destination = client.dataset(self.DS_ID).table(DESTINATION) + destination = DatasetReference(self.PROJECT, self.DS_ID).table(DESTINATION) job = client.load_table_from_uri( SOURCE_URI, destination, job_id=JOB, job_config=job_config, timeout=7.5 @@ -3295,7 +3325,7 @@ def test_load_table_from_uri_w_explicit_project(self): http = object() client = self._make_one(project=self.PROJECT, credentials=creds, _http=http) conn = client._connection = make_connection(resource) - destination = client.dataset(self.DS_ID).table(destination_id) + destination = DatasetReference(self.PROJECT, self.DS_ID).table(destination_id) client.load_table_from_uri( source_uri, @@ -3368,7 +3398,7 @@ def test_load_table_from_uri_w_invalid_job_config(self): http = object() job_config = job.CopyJobConfig() client = self._make_one(project=self.PROJECT, credentials=creds, _http=http) - destination = client.dataset(self.DS_ID).table(DESTINATION) + destination = DatasetReference(self.PROJECT, self.DS_ID).table(DESTINATION) with self.assertRaises(TypeError) as exc: client.load_table_from_uri( @@ -3551,7 +3581,7 @@ def test_copy_table(self): http = object() client = self._make_one(project=self.PROJECT, credentials=creds, _http=http) conn = client._connection = make_connection(RESOURCE) - dataset = client.dataset(self.DS_ID) + dataset = DatasetReference(self.PROJECT, self.DS_ID) source = dataset.table(SOURCE) destination = dataset.table(DESTINATION) @@ -3611,7 +3641,7 @@ def test_copy_table_w_explicit_project(self): http = object() client = self._make_one(project=self.PROJECT, credentials=creds, _http=http) conn = client._connection = make_connection(resource) - dataset = client.dataset(self.DS_ID) + dataset = DatasetReference(self.PROJECT, self.DS_ID) source = dataset.table(source_id) destination = dataset.table(destination_id) @@ -3689,23 +3719,25 @@ def test_copy_table_w_source_strings(self): sources = [ "dataset_wo_proj.some_table", "other_project.other_dataset.other_table", - client.dataset("dataset_from_ref").table("table_from_ref"), + DatasetReference(client.project, "dataset_from_ref").table( + "table_from_ref" + ), ] destination = "some_project.some_dataset.destination_table" job = client.copy_table(sources, destination) expected_sources = [ - client.dataset("dataset_wo_proj").table("some_table"), - client.dataset("other_dataset", project="other_project").table( - "other_table" + DatasetReference(client.project, "dataset_wo_proj").table("some_table"), + DatasetReference("other_project", "other_dataset").table("other_table"), + DatasetReference(client.project, "dataset_from_ref").table( + "table_from_ref" ), - client.dataset("dataset_from_ref").table("table_from_ref"), ] self.assertEqual(list(job.sources), expected_sources) - expected_destination = client.dataset( - "some_dataset", project="some_project" - ).table("destination_table") + expected_destination = DatasetReference("some_project", "some_dataset").table( + "destination_table" + ) self.assertEqual(job.destination, expected_destination) def test_copy_table_w_invalid_job_config(self): @@ -3719,7 +3751,7 @@ def test_copy_table_w_invalid_job_config(self): http = object() client = self._make_one(project=self.PROJECT, credentials=creds, _http=http) job_config = job.ExtractJobConfig() - dataset = client.dataset(self.DS_ID) + dataset = DatasetReference(self.PROJECT, self.DS_ID) source = dataset.table(SOURCE) destination = dataset.table(DESTINATION) with self.assertRaises(TypeError) as exc: @@ -3756,7 +3788,7 @@ def test_copy_table_w_valid_job_config(self): http = object() client = self._make_one(project=self.PROJECT, credentials=creds, _http=http) conn = client._connection = make_connection(RESOURCE) - dataset = client.dataset(self.DS_ID) + dataset = DatasetReference(self.PROJECT, self.DS_ID) source = dataset.table(SOURCE) destination = dataset.table(DESTINATION) @@ -3799,7 +3831,7 @@ def test_extract_table(self): http = object() client = self._make_one(project=self.PROJECT, credentials=creds, _http=http) conn = client._connection = make_connection(RESOURCE) - dataset = client.dataset(self.DS_ID) + dataset = DatasetReference(self.PROJECT, self.DS_ID) source = dataset.table(SOURCE) job = client.extract_table(source, DESTINATION, job_id=JOB, timeout=7.5) @@ -3826,7 +3858,7 @@ def test_extract_table_w_invalid_job_config(self): creds = _make_credentials() http = object() client = self._make_one(project=self.PROJECT, credentials=creds, _http=http) - dataset = client.dataset(self.DS_ID) + dataset = DatasetReference(self.PROJECT, self.DS_ID) source = dataset.table(SOURCE) job_config = job.LoadJobConfig() with self.assertRaises(TypeError) as exc: @@ -3859,7 +3891,7 @@ def test_extract_table_w_explicit_project(self): http = object() client = self._make_one(project=self.PROJECT, credentials=creds, _http=http) conn = client._connection = make_connection(resource) - dataset = client.dataset(self.DS_ID) + dataset = DatasetReference(self.PROJECT, self.DS_ID) source = dataset.table(source_id) client.extract_table( @@ -3948,7 +3980,7 @@ def test_extract_table_generated_job_id(self): http = object() client = self._make_one(project=self.PROJECT, credentials=creds, _http=http) conn = client._connection = make_connection(RESOURCE) - dataset = client.dataset(self.DS_ID) + dataset = DatasetReference(self.PROJECT, self.DS_ID) source = dataset.table(SOURCE) job_config = ExtractJobConfig() job_config.destination_format = DestinationFormat.NEWLINE_DELIMITED_JSON @@ -3997,7 +4029,7 @@ def test_extract_table_w_destination_uris(self): http = object() client = self._make_one(project=self.PROJECT, credentials=creds, _http=http) conn = client._connection = make_connection(RESOURCE) - dataset = client.dataset(self.DS_ID) + dataset = DatasetReference(self.PROJECT, self.DS_ID) source = dataset.table(SOURCE) job = client.extract_table(source, [DESTINATION1, DESTINATION2], job_id=JOB) From 26601101939050822db70da87ef3169f0470db36 Mon Sep 17 00:00:00 2001 From: Gurov Ilya Date: Wed, 29 Jan 2020 15:56:52 +0300 Subject: [PATCH 0736/2016] docs(bigquery): typo fix (#10209) --- packages/google-cloud-bigquery/google/cloud/bigquery/job.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/job.py b/packages/google-cloud-bigquery/google/cloud/bigquery/job.py index 096651351173..5861febe830d 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/job.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/job.py @@ -3073,7 +3073,7 @@ def done(self, retry=DEFAULT_RETRY, timeout=None): else: timeout = server_timeout_with_margin - # Do not refresh is the state is already done, as the job will not + # Do not refresh if the state is already done, as the job will not # change once complete. if self.state != _DONE_STATE: self._query_results = self._client._get_query_results( From ee20134acc4234936200f74fe340418e3a261dd1 Mon Sep 17 00:00:00 2001 From: Peter Lamut Date: Thu, 30 Jan 2020 06:59:24 +0000 Subject: [PATCH 0737/2016] chore(bigquery): adjust test assertions to new default timeout (#10222) --- .../google-cloud-bigquery/tests/unit/test__http.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/packages/google-cloud-bigquery/tests/unit/test__http.py b/packages/google-cloud-bigquery/tests/unit/test__http.py index 1d8313776a0e..4da805d48c78 100644 --- a/packages/google-cloud-bigquery/tests/unit/test__http.py +++ b/packages/google-cloud-bigquery/tests/unit/test__http.py @@ -19,6 +19,12 @@ class TestConnection(unittest.TestCase): + @staticmethod + def _get_default_timeout(): + from google.cloud.bigquery._http import _http + + return _http._DEFAULT_TIMEOUT + @staticmethod def _get_target_class(): from google.cloud.bigquery._http import Connection @@ -79,7 +85,7 @@ def test_user_agent(self): headers=expected_headers, method="GET", url=expected_uri, - timeout=None, + timeout=self._get_default_timeout(), ) self.assertIn("my-application/1.2.3", conn.user_agent) @@ -112,5 +118,5 @@ def test_extra_headers_replace(self): headers=expected_headers, method="GET", url=expected_uri, - timeout=None, + timeout=self._get_default_timeout(), ) From f24f8c5ea29ad35a05451084b7beee8868381f44 Mon Sep 17 00:00:00 2001 From: Peter Lamut Date: Fri, 31 Jan 2020 07:12:03 +0000 Subject: [PATCH 0738/2016] test(bigquery): add tests for concatenating categorical columns (#10180) --- .../tests/unit/test_table.py | 168 ++++++++++++++++++ 1 file changed, 168 insertions(+) diff --git a/packages/google-cloud-bigquery/tests/unit/test_table.py b/packages/google-cloud-bigquery/tests/unit/test_table.py index 6e8958cdc46c..079ec6e000d3 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_table.py +++ b/packages/google-cloud-bigquery/tests/unit/test_table.py @@ -3242,6 +3242,174 @@ def test_to_dataframe_w_bqstorage_snapshot(self): with pytest.raises(ValueError): row_iterator.to_dataframe(bqstorage_client) + @unittest.skipIf(pandas is None, "Requires `pandas`") + @unittest.skipIf( + bigquery_storage_v1beta1 is None, "Requires `google-cloud-bigquery-storage`" + ) + @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") + def test_to_dataframe_concat_categorical_dtype_w_pyarrow(self): + from google.cloud.bigquery import schema + from google.cloud.bigquery import table as mut + from google.cloud.bigquery_storage_v1beta1 import reader + + arrow_fields = [ + # Not alphabetical to test column order. + pyarrow.field("col_str", pyarrow.utf8()), + # The backend returns strings, and without other info, pyarrow contains + # string data in categorical columns, too (and not maybe the Dictionary + # type that corresponds to pandas.Categorical). + pyarrow.field("col_category", pyarrow.utf8()), + ] + arrow_schema = pyarrow.schema(arrow_fields) + + # create a mock BQ storage client + bqstorage_client = mock.create_autospec( + bigquery_storage_v1beta1.BigQueryStorageClient + ) + bqstorage_client.transport = mock.create_autospec( + big_query_storage_grpc_transport.BigQueryStorageGrpcTransport + ) + session = bigquery_storage_v1beta1.types.ReadSession( + streams=[{"name": "/projects/proj/dataset/dset/tables/tbl/streams/1234"}], + arrow_schema={"serialized_schema": arrow_schema.serialize().to_pybytes()}, + ) + bqstorage_client.create_read_session.return_value = session + + mock_rowstream = mock.create_autospec(reader.ReadRowsStream) + bqstorage_client.read_rows.return_value = mock_rowstream + + # prepare the iterator over mocked rows + mock_rows = mock.create_autospec(reader.ReadRowsIterable) + mock_rowstream.rows.return_value = mock_rows + page_items = [ + [ + pyarrow.array(["foo", "bar", "baz"]), # col_str + pyarrow.array(["low", "medium", "low"]), # col_category + ], + [ + pyarrow.array(["foo_page2", "bar_page2", "baz_page2"]), # col_str + pyarrow.array(["medium", "high", "low"]), # col_category + ], + ] + + mock_pages = [] + + for record_list in page_items: + page_record_batch = pyarrow.RecordBatch.from_arrays( + record_list, schema=arrow_schema + ) + mock_page = mock.create_autospec(reader.ReadRowsPage) + mock_page.to_arrow.return_value = page_record_batch + mock_pages.append(mock_page) + + type(mock_rows).pages = mock.PropertyMock(return_value=mock_pages) + + schema = [ + schema.SchemaField("col_str", "IGNORED"), + schema.SchemaField("col_category", "IGNORED"), + ] + + row_iterator = mut.RowIterator( + _mock_client(), + None, # api_request: ignored + None, # path: ignored + schema, + table=mut.TableReference.from_string("proj.dset.tbl"), + selected_fields=schema, + ) + + # run the method under test + got = row_iterator.to_dataframe( + bqstorage_client=bqstorage_client, + dtypes={ + "col_category": pandas.core.dtypes.dtypes.CategoricalDtype( + categories=["low", "medium", "high"], ordered=False, + ), + }, + ) + + # Are the columns in the expected order? + column_names = ["col_str", "col_category"] + self.assertEqual(list(got), column_names) + + # Have expected number of rows? + total_pages = len(mock_pages) # we have a single stream, thus these two equal + total_rows = len(page_items[0][0]) * total_pages + self.assertEqual(len(got.index), total_rows) + + # Are column types correct? + expected_dtypes = [ + pandas.core.dtypes.dtypes.np.dtype("O"), # the default for string data + pandas.core.dtypes.dtypes.CategoricalDtype( + categories=["low", "medium", "high"], ordered=False, + ), + ] + self.assertEqual(list(got.dtypes), expected_dtypes) + + # And the data in the categorical column? + self.assertEqual( + list(got["col_category"]), + ["low", "medium", "low", "medium", "high", "low"], + ) + + # Don't close the client if it was passed in. + bqstorage_client.transport.channel.close.assert_not_called() + + @unittest.skipIf(pandas is None, "Requires `pandas`") + def test_to_dataframe_concat_categorical_dtype_wo_pyarrow(self): + from google.cloud.bigquery.schema import SchemaField + + schema = [ + SchemaField("col_str", "STRING"), + SchemaField("col_category", "STRING"), + ] + row_data = [ + [u"foo", u"low"], + [u"bar", u"medium"], + [u"baz", u"low"], + [u"foo_page2", u"medium"], + [u"bar_page2", u"high"], + [u"baz_page2", u"low"], + ] + path = "/foo" + + rows = [{"f": [{"v": field} for field in row]} for row in row_data[:3]] + rows_page2 = [{"f": [{"v": field} for field in row]} for row in row_data[3:]] + api_request = mock.Mock( + side_effect=[{"rows": rows, "pageToken": "NEXTPAGE"}, {"rows": rows_page2}] + ) + + row_iterator = self._make_one(_mock_client(), api_request, path, schema) + + with mock.patch("google.cloud.bigquery.table.pyarrow", None): + got = row_iterator.to_dataframe( + dtypes={ + "col_category": pandas.core.dtypes.dtypes.CategoricalDtype( + categories=["low", "medium", "high"], ordered=False, + ), + }, + ) + + self.assertIsInstance(got, pandas.DataFrame) + self.assertEqual(len(got), 6) # verify the number of rows + expected_columns = [field.name for field in schema] + self.assertEqual(list(got), expected_columns) # verify the column names + + # Are column types correct? + expected_dtypes = [ + pandas.core.dtypes.dtypes.np.dtype("O"), # the default for string data + pandas.core.dtypes.dtypes.CategoricalDtype( + categories=["low", "medium", "high"], ordered=False, + ), + ] + self.assertEqual(list(got.dtypes), expected_dtypes) + + # And the data in the categorical column? + self.assertEqual( + list(got["col_category"]), + ["low", "medium", "low", "medium", "high", "low"], + ) + class TestPartitionRange(unittest.TestCase): def _get_target_class(self): From 73ee8476482576d04b1506cb36f55f15ff3a1558 Mon Sep 17 00:00:00 2001 From: Peter Lamut Date: Fri, 31 Jan 2020 08:18:29 +0000 Subject: [PATCH 0739/2016] fix(bigquery): fix inserting missing repeated fields (#10196) * fix(bigquery): do not insert missing fields as explicit None * Omit all None values from JSON request body * Add an extra test for all missing row values * Flatten a block of code a bit --- .../google/cloud/bigquery/_helpers.py | 11 +++---- .../tests/unit/test__helpers.py | 29 +++++++++++++++++-- .../tests/unit/test_client.py | 20 +++++++------ 3 files changed, 44 insertions(+), 16 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py b/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py index 98eadb0a2f8e..21a8e3636d24 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py @@ -424,11 +424,12 @@ def _record_field_to_json(fields, row_value): for subindex, subfield in enumerate(fields): subname = subfield.name - if isdict: - subvalue = row_value.get(subname) - else: - subvalue = row_value[subindex] - record[subname] = _field_to_json(subfield, subvalue) + subvalue = row_value.get(subname) if isdict else row_value[subindex] + + # None values are unconditionally omitted + if subvalue is not None: + record[subname] = _field_to_json(subfield, subvalue) + return record diff --git a/packages/google-cloud-bigquery/tests/unit/test__helpers.py b/packages/google-cloud-bigquery/tests/unit/test__helpers.py index 6d92b4de73ba..fa6d27c981d8 100644 --- a/packages/google-cloud-bigquery/tests/unit/test__helpers.py +++ b/packages/google-cloud-bigquery/tests/unit/test__helpers.py @@ -856,14 +856,39 @@ def test_w_non_empty_dict(self): converted = self._call_fut(fields, original) self.assertEqual(converted, {"one": "42", "two": "two"}) - def test_w_missing_nullable(self): + def test_w_some_missing_nullables(self): fields = [ _make_field("INT64", name="one", mode="NULLABLE"), _make_field("STRING", name="two", mode="NULLABLE"), ] original = {"one": 42} converted = self._call_fut(fields, original) - self.assertEqual(converted, {"one": "42", "two": None}) + + # missing fields should not be converted to an explicit None + self.assertEqual(converted, {"one": "42"}) + + def test_w_all_missing_nullables(self): + fields = [ + _make_field("INT64", name="one", mode="NULLABLE"), + _make_field("STRING", name="two", mode="NULLABLE"), + ] + original = {} + converted = self._call_fut(fields, original) + + # we should get an empty dict, not None + self.assertEqual(converted, {}) + + def test_w_explicit_none_value(self): + fields = [ + _make_field("INT64", name="one", mode="NULLABLE"), + _make_field("STRING", name="two", mode="NULLABLE"), + _make_field("BOOL", name="three", mode="REPEATED"), + ] + original = {"three": None, "one": 42, "two": None} + converted = self._call_fut(fields, original) + + # None values should be dropped regardless of the field type + self.assertEqual(converted, {"one": "42"}) class Test_field_to_json(unittest.TestCase): diff --git a/packages/google-cloud-bigquery/tests/unit/test_client.py b/packages/google-cloud-bigquery/tests/unit/test_client.py index 952c876dff39..2227183a9236 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_client.py +++ b/packages/google-cloud-bigquery/tests/unit/test_client.py @@ -4668,10 +4668,13 @@ def test_insert_rows_w_schema(self): ] def _row_data(row): + result = {"full_name": row[0], "age": str(row[1])} joined = row[2] - if isinstance(row[2], datetime.datetime): + if isinstance(joined, datetime.datetime): joined = _microseconds_from_datetime(joined) * 1e-6 - return {"full_name": row[0], "age": str(row[1]), "joined": joined} + if joined is not None: + result["joined"] = joined + return result SENT = { "rows": [ @@ -4740,7 +4743,10 @@ def test_insert_rows_w_list_of_dictionaries(self): def _row_data(row): joined = row["joined"] - if isinstance(joined, datetime.datetime): + if joined is None: + row = copy.deepcopy(row) + del row["joined"] + elif isinstance(joined, datetime.datetime): row["joined"] = _microseconds_from_datetime(joined) * 1e-6 row["age"] = str(row["age"]) return row @@ -4959,9 +4965,8 @@ def test_insert_rows_w_repeated_fields(self): }, { "json": { - "color": None, "items": [], - "structs": [{"score": None, "times": [], "distances": [3.5]}], + "structs": [{"times": [], "distances": [3.5]}], }, "insertId": "1", }, @@ -5028,10 +5033,7 @@ def test_insert_rows_w_record_schema(self): }, "insertId": "1", }, - { - "json": {"full_name": "Wylma Phlyntstone", "phone": None}, - "insertId": "2", - }, + {"json": {"full_name": "Wylma Phlyntstone"}, "insertId": "2"}, ] } From 7d6f64cbb01726842d6db95c0978e7d316083921 Mon Sep 17 00:00:00 2001 From: HemangChothani <50404902+HemangChothani@users.noreply.github.com> Date: Mon, 3 Feb 2020 14:55:21 +0530 Subject: [PATCH 0740/2016] refactor(bigquery): update code samples of load table file and uri (#10175) * refactor(bigquery): update code samples of load table file and uri * refactor(bigquery): add uri for load orc and avro data * refactor(bigquery): fix lint and docs * refactor(bigquery): update copyright to 2020 --- .../google-cloud-bigquery/docs/snippets.py | 263 ------------------ .../docs/usage/encryption.rst | 2 +- .../docs/usage/tables.rst | 30 +- .../samples/load_table_file.py | 43 +++ .../samples/load_table_uri_avro.py | 38 +++ .../samples/load_table_uri_cmek.py | 60 ++++ .../samples/load_table_uri_csv.py | 46 +++ .../samples/load_table_uri_json.py | 46 +++ .../samples/load_table_uri_orc.py | 38 +++ .../samples/load_table_uri_parquet.py | 37 +++ .../samples/tests/test_load_table_file.py | 39 +++ .../samples/tests/test_load_table_uri_avro.py | 21 ++ .../samples/tests/test_load_table_uri_cmek.py | 22 ++ .../samples/tests/test_load_table_uri_csv.py | 22 ++ .../samples/tests/test_load_table_uri_json.py | 22 ++ .../samples/tests/test_load_table_uri_orc.py | 22 ++ .../tests/test_load_table_uri_parquet.py | 22 ++ 17 files changed, 505 insertions(+), 268 deletions(-) create mode 100644 packages/google-cloud-bigquery/samples/load_table_file.py create mode 100644 packages/google-cloud-bigquery/samples/load_table_uri_avro.py create mode 100644 packages/google-cloud-bigquery/samples/load_table_uri_cmek.py create mode 100644 packages/google-cloud-bigquery/samples/load_table_uri_csv.py create mode 100644 packages/google-cloud-bigquery/samples/load_table_uri_json.py create mode 100644 packages/google-cloud-bigquery/samples/load_table_uri_orc.py create mode 100644 packages/google-cloud-bigquery/samples/load_table_uri_parquet.py create mode 100644 packages/google-cloud-bigquery/samples/tests/test_load_table_file.py create mode 100644 packages/google-cloud-bigquery/samples/tests/test_load_table_uri_avro.py create mode 100644 packages/google-cloud-bigquery/samples/tests/test_load_table_uri_cmek.py create mode 100644 packages/google-cloud-bigquery/samples/tests/test_load_table_uri_csv.py create mode 100644 packages/google-cloud-bigquery/samples/tests/test_load_table_uri_json.py create mode 100644 packages/google-cloud-bigquery/samples/tests/test_load_table_uri_orc.py create mode 100644 packages/google-cloud-bigquery/samples/tests/test_load_table_uri_parquet.py diff --git a/packages/google-cloud-bigquery/docs/snippets.py b/packages/google-cloud-bigquery/docs/snippets.py index bb584fa0494a..4981a1e18100 100644 --- a/packages/google-cloud-bigquery/docs/snippets.py +++ b/packages/google-cloud-bigquery/docs/snippets.py @@ -581,269 +581,6 @@ def test_manage_views(client, to_delete): # [END bigquery_grant_view_access] -def test_load_table_from_file(client, to_delete): - """Upload table data from a CSV file.""" - dataset_id = "load_table_from_file_dataset_{}".format(_millis()) - table_id = "load_table_from_file_table_{}".format(_millis()) - dataset = bigquery.Dataset(client.dataset(dataset_id)) - dataset.location = "US" - client.create_dataset(dataset) - to_delete.append(dataset) - snippets_dir = os.path.abspath(os.path.dirname(__file__)) - filename = os.path.join( - snippets_dir, "..", "..", "bigquery", "tests", "data", "people.csv" - ) - - # [START bigquery_load_from_file] - # from google.cloud import bigquery - # client = bigquery.Client() - # filename = '/path/to/file.csv' - # dataset_id = 'my_dataset' - # table_id = 'my_table' - - dataset_ref = client.dataset(dataset_id) - table_ref = dataset_ref.table(table_id) - job_config = bigquery.LoadJobConfig() - job_config.source_format = bigquery.SourceFormat.CSV - job_config.skip_leading_rows = 1 - job_config.autodetect = True - - with open(filename, "rb") as source_file: - job = client.load_table_from_file(source_file, table_ref, job_config=job_config) - - job.result() # Waits for table load to complete. - - print("Loaded {} rows into {}:{}.".format(job.output_rows, dataset_id, table_id)) - # [END bigquery_load_from_file] - - table = client.get_table(table_ref) - rows = list(client.list_rows(table)) # API request - - assert len(rows) == 2 - # Order is not preserved, so compare individually - row1 = bigquery.Row(("Wylma Phlyntstone", 29), {"full_name": 0, "age": 1}) - assert row1 in rows - row2 = bigquery.Row(("Phred Phlyntstone", 32), {"full_name": 0, "age": 1}) - assert row2 in rows - - -def test_load_table_from_uri_avro(client, to_delete, capsys): - dataset_id = "load_table_from_uri_avro_{}".format(_millis()) - dataset = bigquery.Dataset(client.dataset(dataset_id)) - client.create_dataset(dataset) - to_delete.append(dataset) - - # [START bigquery_load_table_gcs_avro] - # from google.cloud import bigquery - # client = bigquery.Client() - # dataset_id = 'my_dataset' - - dataset_ref = client.dataset(dataset_id) - job_config = bigquery.LoadJobConfig() - job_config.source_format = bigquery.SourceFormat.AVRO - uri = "gs://cloud-samples-data/bigquery/us-states/us-states.avro" - - load_job = client.load_table_from_uri( - uri, dataset_ref.table("us_states"), job_config=job_config - ) # API request - print("Starting job {}".format(load_job.job_id)) - - load_job.result() # Waits for table load to complete. - print("Job finished.") - - destination_table = client.get_table(dataset_ref.table("us_states")) - print("Loaded {} rows.".format(destination_table.num_rows)) - # [END bigquery_load_table_gcs_avro] - - out, _ = capsys.readouterr() - assert "Loaded 50 rows." in out - - -def test_load_table_from_uri_csv(client, to_delete, capsys): - dataset_id = "load_table_from_uri_csv_{}".format(_millis()) - dataset = bigquery.Dataset(client.dataset(dataset_id)) - client.create_dataset(dataset) - to_delete.append(dataset) - - # [START bigquery_load_table_gcs_csv] - # from google.cloud import bigquery - # client = bigquery.Client() - # dataset_id = 'my_dataset' - - dataset_ref = client.dataset(dataset_id) - job_config = bigquery.LoadJobConfig() - job_config.schema = [ - bigquery.SchemaField("name", "STRING"), - bigquery.SchemaField("post_abbr", "STRING"), - ] - job_config.skip_leading_rows = 1 - # The source format defaults to CSV, so the line below is optional. - job_config.source_format = bigquery.SourceFormat.CSV - uri = "gs://cloud-samples-data/bigquery/us-states/us-states.csv" - - load_job = client.load_table_from_uri( - uri, dataset_ref.table("us_states"), job_config=job_config - ) # API request - print("Starting job {}".format(load_job.job_id)) - - load_job.result() # Waits for table load to complete. - print("Job finished.") - - destination_table = client.get_table(dataset_ref.table("us_states")) - print("Loaded {} rows.".format(destination_table.num_rows)) - # [END bigquery_load_table_gcs_csv] - - out, _ = capsys.readouterr() - assert "Loaded 50 rows." in out - - -def test_load_table_from_uri_json(client, to_delete, capsys): - dataset_id = "load_table_from_uri_json_{}".format(_millis()) - dataset = bigquery.Dataset(client.dataset(dataset_id)) - dataset.location = "US" - client.create_dataset(dataset) - to_delete.append(dataset) - - # [START bigquery_load_table_gcs_json] - # from google.cloud import bigquery - # client = bigquery.Client() - # dataset_id = 'my_dataset' - - dataset_ref = client.dataset(dataset_id) - job_config = bigquery.LoadJobConfig() - job_config.schema = [ - bigquery.SchemaField("name", "STRING"), - bigquery.SchemaField("post_abbr", "STRING"), - ] - job_config.source_format = bigquery.SourceFormat.NEWLINE_DELIMITED_JSON - uri = "gs://cloud-samples-data/bigquery/us-states/us-states.json" - - load_job = client.load_table_from_uri( - uri, - dataset_ref.table("us_states"), - location="US", # Location must match that of the destination dataset. - job_config=job_config, - ) # API request - print("Starting job {}".format(load_job.job_id)) - - load_job.result() # Waits for table load to complete. - print("Job finished.") - - destination_table = client.get_table(dataset_ref.table("us_states")) - print("Loaded {} rows.".format(destination_table.num_rows)) - # [END bigquery_load_table_gcs_json] - - out, _ = capsys.readouterr() - assert "Loaded 50 rows." in out - - -def test_load_table_from_uri_cmek(client, to_delete): - dataset_id = "load_table_from_uri_cmek_{}".format(_millis()) - dataset = bigquery.Dataset(client.dataset(dataset_id)) - dataset.location = "US" - client.create_dataset(dataset) - to_delete.append(dataset) - - # [START bigquery_load_table_gcs_json_cmek] - # from google.cloud import bigquery - # client = bigquery.Client() - # dataset_id = 'my_dataset' - - dataset_ref = client.dataset(dataset_id) - job_config = bigquery.LoadJobConfig() - job_config.autodetect = True - job_config.source_format = bigquery.SourceFormat.NEWLINE_DELIMITED_JSON - - # Set the encryption key to use for the destination. - # TODO: Replace this key with a key you have created in KMS. - kms_key_name = "projects/{}/locations/{}/keyRings/{}/cryptoKeys/{}".format( - "cloud-samples-tests", "us", "test", "test" - ) - encryption_config = bigquery.EncryptionConfiguration(kms_key_name=kms_key_name) - job_config.destination_encryption_configuration = encryption_config - uri = "gs://cloud-samples-data/bigquery/us-states/us-states.json" - - load_job = client.load_table_from_uri( - uri, - dataset_ref.table("us_states"), - location="US", # Location must match that of the destination dataset. - job_config=job_config, - ) # API request - - assert load_job.job_type == "load" - - load_job.result() # Waits for table load to complete. - - assert load_job.state == "DONE" - table = client.get_table(dataset_ref.table("us_states")) - assert table.encryption_configuration.kms_key_name == kms_key_name - # [END bigquery_load_table_gcs_json_cmek] - - -def test_load_table_from_uri_parquet(client, to_delete, capsys): - dataset_id = "load_table_from_uri_parquet_{}".format(_millis()) - dataset = bigquery.Dataset(client.dataset(dataset_id)) - client.create_dataset(dataset) - to_delete.append(dataset) - - # [START bigquery_load_table_gcs_parquet] - # from google.cloud import bigquery - # client = bigquery.Client() - # dataset_id = 'my_dataset' - - dataset_ref = client.dataset(dataset_id) - job_config = bigquery.LoadJobConfig() - job_config.source_format = bigquery.SourceFormat.PARQUET - uri = "gs://cloud-samples-data/bigquery/us-states/us-states.parquet" - - load_job = client.load_table_from_uri( - uri, dataset_ref.table("us_states"), job_config=job_config - ) # API request - print("Starting job {}".format(load_job.job_id)) - - load_job.result() # Waits for table load to complete. - print("Job finished.") - - destination_table = client.get_table(dataset_ref.table("us_states")) - print("Loaded {} rows.".format(destination_table.num_rows)) - # [END bigquery_load_table_gcs_parquet] - - out, _ = capsys.readouterr() - assert "Loaded 50 rows." in out - - -def test_load_table_from_uri_orc(client, to_delete, capsys): - dataset_id = "load_table_from_uri_orc_{}".format(_millis()) - dataset = bigquery.Dataset(client.dataset(dataset_id)) - client.create_dataset(dataset) - to_delete.append(dataset) - - # [START bigquery_load_table_gcs_orc] - # from google.cloud import bigquery - # client = bigquery.Client() - # dataset_id = 'my_dataset' - - dataset_ref = client.dataset(dataset_id) - job_config = bigquery.LoadJobConfig() - job_config.source_format = bigquery.SourceFormat.ORC - uri = "gs://cloud-samples-data/bigquery/us-states/us-states.orc" - - load_job = client.load_table_from_uri( - uri, dataset_ref.table("us_states"), job_config=job_config - ) # API request - print("Starting job {}".format(load_job.job_id)) - - load_job.result() # Waits for table load to complete. - print("Job finished.") - - destination_table = client.get_table(dataset_ref.table("us_states")) - print("Loaded {} rows.".format(destination_table.num_rows)) - # [END bigquery_load_table_gcs_orc] - - out, _ = capsys.readouterr() - assert "Loaded 50 rows." in out - - def test_load_table_from_uri_autodetect(client, to_delete, capsys): """Load table from a GCS URI using various formats and auto-detected schema Each file format has its own tested load from URI sample. Because most of diff --git a/packages/google-cloud-bigquery/docs/usage/encryption.rst b/packages/google-cloud-bigquery/docs/usage/encryption.rst index b512e6c4d7bf..6652f05658c6 100644 --- a/packages/google-cloud-bigquery/docs/usage/encryption.rst +++ b/packages/google-cloud-bigquery/docs/usage/encryption.rst @@ -27,7 +27,7 @@ Change the key used to encrypt a table. Load a file from Cloud Storage, using a customer-managed encryption key from Cloud KMS for the destination table. -.. literalinclude:: ../snippets.py +.. literalinclude:: ../samples/load_table_uri_cmek.py :language: python :dedent: 4 :start-after: [START bigquery_load_table_gcs_json_cmek] diff --git a/packages/google-cloud-bigquery/docs/usage/tables.rst b/packages/google-cloud-bigquery/docs/usage/tables.rst index b6f8dbdde646..45145cd19004 100644 --- a/packages/google-cloud-bigquery/docs/usage/tables.rst +++ b/packages/google-cloud-bigquery/docs/usage/tables.rst @@ -70,7 +70,7 @@ Create an integer range partitioned table with the Load table data from a file with the :func:`~google.cloud.bigquery.client.Client.load_table_from_file` method: -.. literalinclude:: ../snippets.py +.. literalinclude:: ../samples/load_table_file.py :language: python :dedent: 4 :start-after: [START bigquery_load_from_file] @@ -79,7 +79,7 @@ Load table data from a file with the Load a CSV file from Cloud Storage with the :func:`~google.cloud.bigquery.client.Client.load_table_from_uri` method: -.. literalinclude:: ../snippets.py +.. literalinclude:: ../samples/load_table_uri_csv.py :language: python :dedent: 4 :start-after: [START bigquery_load_table_gcs_csv] @@ -90,7 +90,7 @@ See also: `Loading CSV data from Cloud Storage Load a JSON file from Cloud Storage: -.. literalinclude:: ../snippets.py +.. literalinclude:: ../samples/load_table_uri_json.py :language: python :dedent: 4 :start-after: [START bigquery_load_table_gcs_json] @@ -101,7 +101,7 @@ See also: `Loading JSON data from Cloud Storage Load a Parquet file from Cloud Storage: -.. literalinclude:: ../snippets.py +.. literalinclude:: ../samples/load_table_uri_parquet.py :language: python :dedent: 4 :start-after: [START bigquery_load_table_gcs_parquet] @@ -110,6 +110,28 @@ Load a Parquet file from Cloud Storage: See also: `Loading Parquet data from Cloud Storage `_. +Load an Avro file from Cloud Storage: + +.. literalinclude:: ../samples/load_table_uri_avro.py + :language: python + :dedent: 4 + :start-after: [START bigquery_load_table_gcs_avro] + :end-before: [END bigquery_load_table_gcs_avro] + +See also: `Loading Avro data from Cloud Storage +`_. + +Load an ORC file from Cloud Storage: + +.. literalinclude:: ../samples/load_table_uri_orc.py + :language: python + :dedent: 4 + :start-after: [START bigquery_load_table_gcs_orc] + :end-before: [END bigquery_load_table_gcs_orc] + +See also: `Loading ORC data from Cloud Storage +`_. + Updating a Table ^^^^^^^^^^^^^^^^ diff --git a/packages/google-cloud-bigquery/samples/load_table_file.py b/packages/google-cloud-bigquery/samples/load_table_file.py new file mode 100644 index 000000000000..b7e45dac3a30 --- /dev/null +++ b/packages/google-cloud-bigquery/samples/load_table_file.py @@ -0,0 +1,43 @@ +# Copyright 2020 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +def load_table_file(file_path, table_id): + + # [START bigquery_load_from_file] + from google.cloud import bigquery + + # Construct a BigQuery client object. + client = bigquery.Client() + + # TODO(developer): Set table_id to the ID of the table to create. + # table_id = "your-project.your_dataset.your_table_name" + + job_config = bigquery.LoadJobConfig( + source_format=bigquery.SourceFormat.CSV, skip_leading_rows=1, autodetect=True, + ) + + with open(file_path, "rb") as source_file: + job = client.load_table_from_file(source_file, table_id, job_config=job_config) + + job.result() # Waits for the job to complete. + + table = client.get_table(table_id) # Make an API request. + print( + "Loaded {} rows and {} columns to {}".format( + table.num_rows, len(table.schema), table_id + ) + ) + # [END bigquery_load_from_file] + return table diff --git a/packages/google-cloud-bigquery/samples/load_table_uri_avro.py b/packages/google-cloud-bigquery/samples/load_table_uri_avro.py new file mode 100644 index 000000000000..5c25eed226b6 --- /dev/null +++ b/packages/google-cloud-bigquery/samples/load_table_uri_avro.py @@ -0,0 +1,38 @@ +# Copyright 2020 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +def load_table_uri_avro(table_id): + + # [START bigquery_load_table_gcs_avro] + from google.cloud import bigquery + + # Construct a BigQuery client object. + client = bigquery.Client() + + # TODO(developer): Set table_id to the ID of the table to create. + # table_id = "your-project.your_dataset.your_table_name + + job_config = bigquery.LoadJobConfig(source_format=bigquery.SourceFormat.AVRO) + uri = "gs://cloud-samples-data/bigquery/us-states/us-states.avro" + + load_job = client.load_table_from_uri( + uri, table_id, job_config=job_config + ) # Make an API request. + + load_job.result() # Waits for the job to complete. + + destination_table = client.get_table(table_id) + print("Loaded {} rows.".format(destination_table.num_rows)) + # [END bigquery_load_table_gcs_avro] diff --git a/packages/google-cloud-bigquery/samples/load_table_uri_cmek.py b/packages/google-cloud-bigquery/samples/load_table_uri_cmek.py new file mode 100644 index 000000000000..8bd84993c293 --- /dev/null +++ b/packages/google-cloud-bigquery/samples/load_table_uri_cmek.py @@ -0,0 +1,60 @@ +# Copyright 2020 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +def load_table_uri_cmek(table_id, kms_key_name): + + # [START bigquery_load_table_gcs_json_cmek] + from google.cloud import bigquery + + # Construct a BigQuery client object. + client = bigquery.Client() + + # TODO(developer): Set table_id to the ID of the table to create. + # table_id = "your-project.your_dataset.your_table_name + + # Set the encryption key to use for the destination. + # TODO: Replace this key with a key you have created in KMS. + # kms_key_name = "projects/{}/locations/{}/keyRings/{}/cryptoKeys/{}".format( + # "cloud-samples-tests", "us", "test", "test" + # ) + + job_config = bigquery.LoadJobConfig( + autodetect=True, + source_format=bigquery.SourceFormat.NEWLINE_DELIMITED_JSON, + destination_encryption_configuration=bigquery.EncryptionConfiguration( + kms_key_name=kms_key_name + ), + ) + + uri = "gs://cloud-samples-data/bigquery/us-states/us-states.json" + + load_job = client.load_table_from_uri( + uri, + table_id, + location="US", # Must match the destination dataset location. + job_config=job_config, + ) # Make an API request. + + assert load_job.job_type == "load" + + load_job.result() # Waits for the job to complete. + + assert load_job.state == "DONE" + table = client.get_table(table_id) + + if table.encryption_configuration.kms_key_name == kms_key_name: + print("A table loaded with encryption configuration key") + + # [END bigquery_load_table_gcs_json_cmek] diff --git a/packages/google-cloud-bigquery/samples/load_table_uri_csv.py b/packages/google-cloud-bigquery/samples/load_table_uri_csv.py new file mode 100644 index 000000000000..0736a560cc75 --- /dev/null +++ b/packages/google-cloud-bigquery/samples/load_table_uri_csv.py @@ -0,0 +1,46 @@ +# Copyright 2020 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +def load_table_uri_csv(table_id): + + # [START bigquery_load_table_gcs_csv] + from google.cloud import bigquery + + # Construct a BigQuery client object. + client = bigquery.Client() + + # TODO(developer): Set table_id to the ID of the table to create. + # table_id = "your-project.your_dataset.your_table_name" + + job_config = bigquery.LoadJobConfig( + schema=[ + bigquery.SchemaField("name", "STRING"), + bigquery.SchemaField("post_abbr", "STRING"), + ], + skip_leading_rows=1, + # The source format defaults to CSV, so the line below is optional. + source_format=bigquery.SourceFormat.CSV, + ) + uri = "gs://cloud-samples-data/bigquery/us-states/us-states.csv" + + load_job = client.load_table_from_uri( + uri, table_id, job_config=job_config + ) # Make an API request. + + load_job.result() # Waits for the job to complete. + + destination_table = client.get_table(table_id) # Make an API request. + print("Loaded {} rows.".format(destination_table.num_rows)) + # [END bigquery_load_table_gcs_csv] diff --git a/packages/google-cloud-bigquery/samples/load_table_uri_json.py b/packages/google-cloud-bigquery/samples/load_table_uri_json.py new file mode 100644 index 000000000000..3c21972c80a0 --- /dev/null +++ b/packages/google-cloud-bigquery/samples/load_table_uri_json.py @@ -0,0 +1,46 @@ +# Copyright 2020 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +def load_table_uri_json(table_id): + # [START bigquery_load_table_gcs_json] + from google.cloud import bigquery + + # Construct a BigQuery client object. + client = bigquery.Client() + + # TODO(developer): Set table_id to the ID of the table to create. + # table_id = "your-project.your_dataset.your_table_name" + + job_config = bigquery.LoadJobConfig( + schema=[ + bigquery.SchemaField("name", "STRING"), + bigquery.SchemaField("post_abbr", "STRING"), + ], + source_format=bigquery.SourceFormat.NEWLINE_DELIMITED_JSON, + ) + uri = "gs://cloud-samples-data/bigquery/us-states/us-states.json" + + load_job = client.load_table_from_uri( + uri, + table_id, + location="US", # Must match the destination dataset location. + job_config=job_config, + ) # Make an API request. + + load_job.result() # Waits for the job to complete. + + destination_table = client.get_table(table_id) + print("Loaded {} rows.".format(destination_table.num_rows)) + # [END bigquery_load_table_gcs_json] diff --git a/packages/google-cloud-bigquery/samples/load_table_uri_orc.py b/packages/google-cloud-bigquery/samples/load_table_uri_orc.py new file mode 100644 index 000000000000..3ab6ff45aa0a --- /dev/null +++ b/packages/google-cloud-bigquery/samples/load_table_uri_orc.py @@ -0,0 +1,38 @@ +# Copyright 2020 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +def load_table_uri_orc(table_id): + + # [START bigquery_load_table_gcs_orc] + from google.cloud import bigquery + + # Construct a BigQuery client object. + client = bigquery.Client() + + # TODO(developer): Set table_id to the ID of the table to create. + # table_id = "your-project.your_dataset.your_table_name + + job_config = bigquery.LoadJobConfig(source_format=bigquery.SourceFormat.ORC) + uri = "gs://cloud-samples-data/bigquery/us-states/us-states.orc" + + load_job = client.load_table_from_uri( + uri, table_id, job_config=job_config + ) # Make an API request. + + load_job.result() # Waits for the job to complete. + + destination_table = client.get_table(table_id) + print("Loaded {} rows.".format(destination_table.num_rows)) + # [END bigquery_load_table_gcs_orc] diff --git a/packages/google-cloud-bigquery/samples/load_table_uri_parquet.py b/packages/google-cloud-bigquery/samples/load_table_uri_parquet.py new file mode 100644 index 000000000000..3dce5e8efda8 --- /dev/null +++ b/packages/google-cloud-bigquery/samples/load_table_uri_parquet.py @@ -0,0 +1,37 @@ +# Copyright 2020 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +def load_table_uri_parquet(table_id): + # [START bigquery_load_table_gcs_parquet] + from google.cloud import bigquery + + # Construct a BigQuery client object. + client = bigquery.Client() + + # TODO(developer): Set table_id to the ID of the table to create. + # table_id = "your-project.your_dataset.your_table_name" + + job_config = bigquery.LoadJobConfig(source_format=bigquery.SourceFormat.PARQUET,) + uri = "gs://cloud-samples-data/bigquery/us-states/us-states.parquet" + + load_job = client.load_table_from_uri( + uri, table_id, job_config=job_config + ) # Make an API request. + + load_job.result() # Waits for the job to complete. + + destination_table = client.get_table(table_id) + print("Loaded {} rows.".format(destination_table.num_rows)) + # [END bigquery_load_table_gcs_parquet] diff --git a/packages/google-cloud-bigquery/samples/tests/test_load_table_file.py b/packages/google-cloud-bigquery/samples/tests/test_load_table_file.py new file mode 100644 index 000000000000..a7ebe768201a --- /dev/null +++ b/packages/google-cloud-bigquery/samples/tests/test_load_table_file.py @@ -0,0 +1,39 @@ +# Copyright 2020 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os + +from google.cloud import bigquery + +from .. import load_table_file + + +def test_load_table_file(capsys, random_table_id, client): + + samples_test_dir = os.path.abspath(os.path.dirname(__file__)) + file_path = os.path.join( + samples_test_dir, "..", "..", "tests", "data", "people.csv" + ) + table = load_table_file.load_table_file(file_path, random_table_id) + + out, _ = capsys.readouterr() + assert "Loaded 2 rows and 2 columns" in out + + rows = list(client.list_rows(table)) # Make an API request. + assert len(rows) == 2 + # Order is not preserved, so compare individually + row1 = bigquery.Row(("Wylma Phlyntstone", 29), {"full_name": 0, "age": 1}) + assert row1 in rows + row2 = bigquery.Row(("Phred Phlyntstone", 32), {"full_name": 0, "age": 1}) + assert row2 in rows diff --git a/packages/google-cloud-bigquery/samples/tests/test_load_table_uri_avro.py b/packages/google-cloud-bigquery/samples/tests/test_load_table_uri_avro.py new file mode 100644 index 000000000000..0be29d6b35ae --- /dev/null +++ b/packages/google-cloud-bigquery/samples/tests/test_load_table_uri_avro.py @@ -0,0 +1,21 @@ +# Copyright 2020 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from .. import load_table_uri_avro + + +def test_load_table_uri_avro(capsys, random_table_id): + load_table_uri_avro.load_table_uri_avro(random_table_id) + out, _ = capsys.readouterr() + assert "Loaded 50 rows." in out diff --git a/packages/google-cloud-bigquery/samples/tests/test_load_table_uri_cmek.py b/packages/google-cloud-bigquery/samples/tests/test_load_table_uri_cmek.py new file mode 100644 index 000000000000..c15dad9a754f --- /dev/null +++ b/packages/google-cloud-bigquery/samples/tests/test_load_table_uri_cmek.py @@ -0,0 +1,22 @@ +# Copyright 2020 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from .. import load_table_uri_cmek + + +def test_load_table_uri_cmek(capsys, random_table_id, kms_key_name): + + load_table_uri_cmek.load_table_uri_cmek(random_table_id, kms_key_name) + out, _ = capsys.readouterr() + assert "A table loaded with encryption configuration key" in out diff --git a/packages/google-cloud-bigquery/samples/tests/test_load_table_uri_csv.py b/packages/google-cloud-bigquery/samples/tests/test_load_table_uri_csv.py new file mode 100644 index 000000000000..fbcc69358466 --- /dev/null +++ b/packages/google-cloud-bigquery/samples/tests/test_load_table_uri_csv.py @@ -0,0 +1,22 @@ +# Copyright 2020 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from .. import load_table_uri_csv + + +def test_load_table_uri_csv(capsys, random_table_id): + + load_table_uri_csv.load_table_uri_csv(random_table_id) + out, _ = capsys.readouterr() + assert "Loaded 50 rows." in out diff --git a/packages/google-cloud-bigquery/samples/tests/test_load_table_uri_json.py b/packages/google-cloud-bigquery/samples/tests/test_load_table_uri_json.py new file mode 100644 index 000000000000..e054cb07ac3f --- /dev/null +++ b/packages/google-cloud-bigquery/samples/tests/test_load_table_uri_json.py @@ -0,0 +1,22 @@ +# Copyright 2020 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from .. import load_table_uri_json + + +def test_load_table_uri_json(capsys, random_table_id): + + load_table_uri_json.load_table_uri_json(random_table_id) + out, _ = capsys.readouterr() + assert "Loaded 50 rows." in out diff --git a/packages/google-cloud-bigquery/samples/tests/test_load_table_uri_orc.py b/packages/google-cloud-bigquery/samples/tests/test_load_table_uri_orc.py new file mode 100644 index 000000000000..96dc72022b0a --- /dev/null +++ b/packages/google-cloud-bigquery/samples/tests/test_load_table_uri_orc.py @@ -0,0 +1,22 @@ +# Copyright 2020 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from .. import load_table_uri_orc + + +def test_load_table_uri_orc(capsys, random_table_id): + + load_table_uri_orc.load_table_uri_orc(random_table_id) + out, _ = capsys.readouterr() + assert "Loaded 50 rows." in out diff --git a/packages/google-cloud-bigquery/samples/tests/test_load_table_uri_parquet.py b/packages/google-cloud-bigquery/samples/tests/test_load_table_uri_parquet.py new file mode 100644 index 000000000000..81ba3fcef604 --- /dev/null +++ b/packages/google-cloud-bigquery/samples/tests/test_load_table_uri_parquet.py @@ -0,0 +1,22 @@ +# Copyright 2020 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from .. import load_table_uri_parquet + + +def test_load_table_uri_json(capsys, random_table_id): + + load_table_uri_parquet.load_table_uri_parquet(random_table_id) + out, _ = capsys.readouterr() + assert "Loaded 50 rows." in out From db801aaa38f601537fee290824996ef297d1cf70 Mon Sep 17 00:00:00 2001 From: Yoshi Automation Bot Date: Mon, 3 Feb 2020 01:27:55 -0800 Subject: [PATCH 0741/2016] chore(bigquery): bump copyright year to 2020, tweak docstring formatting (via synth) #10225 --- .../google/cloud/bigquery_v2/gapic/enums.py | 2 +- .../cloud/bigquery_v2/proto/model_pb2.py | 34 +- .../bigquery_v2/proto/standard_sql_pb2.py | 4 +- .../google/cloud/bigquery_v2/types.py | 2 +- packages/google-cloud-bigquery/synth.metadata | 748 +++++++++++++++++- 5 files changed, 766 insertions(+), 24 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery_v2/gapic/enums.py b/packages/google-cloud-bigquery/google/cloud/bigquery_v2/gapic/enums.py index 5d95f2590785..97059414f368 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery_v2/gapic/enums.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery_v2/gapic/enums.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright 2019 Google LLC +# Copyright 2020 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery_v2/proto/model_pb2.py b/packages/google-cloud-bigquery/google/cloud/bigquery_v2/proto/model_pb2.py index 3994660ec46d..0b4e9d23ed26 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery_v2/proto/model_pb2.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery_v2/proto/model_pb2.py @@ -3310,8 +3310,8 @@ dict( DESCRIPTOR=_MODEL_REGRESSIONMETRICS, __module__="google.cloud.bigquery_v2.proto.model_pb2", - __doc__="""Evaluation metrics for regression and explicit feedback type matrix - factorization models. + __doc__="""Evaluation metrics for regression and explicit feedback + type matrix factorization models. Attributes: @@ -3335,12 +3335,12 @@ dict( DESCRIPTOR=_MODEL_AGGREGATECLASSIFICATIONMETRICS, __module__="google.cloud.bigquery_v2.proto.model_pb2", - __doc__="""Aggregate metrics for classification/classifier models. For multi-class - models, the metrics are either macro-averaged or micro-averaged. When - macro-averaged, the metrics are calculated for each label and then an - unweighted average is taken of those values. When micro-averaged, the - metric is calculated globally by counting the total number of correctly - predicted rows. + __doc__="""Aggregate metrics for classification/classifier models. + For multi-class models, the metrics are either macro-averaged or + micro-averaged. When macro-averaged, the metrics are calculated for each + label and then an unweighted average is taken of those values. When + micro-averaged, the metric is calculated globally by counting the total + number of correctly predicted rows. Attributes: @@ -3414,7 +3414,8 @@ ), DESCRIPTOR=_MODEL_BINARYCLASSIFICATIONMETRICS, __module__="google.cloud.bigquery_v2.proto.model_pb2", - __doc__="""Evaluation metrics for binary classification/classifier models. + __doc__="""Evaluation metrics for binary classification/classifier + models. Attributes: @@ -3493,7 +3494,8 @@ ), DESCRIPTOR=_MODEL_MULTICLASSCLASSIFICATIONMETRICS, __module__="google.cloud.bigquery_v2.proto.model_pb2", - __doc__="""Evaluation metrics for multi-class classification/classifier models. + __doc__="""Evaluation metrics for multi-class + classification/classifier models. Attributes: @@ -3527,7 +3529,8 @@ dict( DESCRIPTOR=_MODEL_CLUSTERINGMETRICS_CLUSTER_FEATUREVALUE_CATEGORICALVALUE_CATEGORYCOUNT, __module__="google.cloud.bigquery_v2.proto.model_pb2", - __doc__="""Represents the count of a single category within the cluster. + __doc__="""Represents the count of a single category within the + cluster. Attributes: @@ -3612,9 +3615,9 @@ dict( DESCRIPTOR=_MODEL_EVALUATIONMETRICS, __module__="google.cloud.bigquery_v2.proto.model_pb2", - __doc__="""Evaluation metrics of a model. These are either computed on all training - data or just the eval data based on whether eval data was used during - training. These are not present for imported models. + __doc__="""Evaluation metrics of a model. These are either computed + on all training data or just the eval data based on whether eval data + was used during training. These are not present for imported models. Attributes: @@ -3774,7 +3777,8 @@ ), DESCRIPTOR=_MODEL_TRAININGRUN, __module__="google.cloud.bigquery_v2.proto.model_pb2", - __doc__="""Information about a single training query run for the model. + __doc__="""Information about a single training query run for the + model. Attributes: diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery_v2/proto/standard_sql_pb2.py b/packages/google-cloud-bigquery/google/cloud/bigquery_v2/proto/standard_sql_pb2.py index 3b394b8bf10e..ca02014057d2 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery_v2/proto/standard_sql_pb2.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery_v2/proto/standard_sql_pb2.py @@ -308,8 +308,8 @@ dict( DESCRIPTOR=_STANDARDSQLDATATYPE, __module__="google.cloud.bigquery_v2.proto.standard_sql_pb2", - __doc__="""The type of a variable, e.g., a function argument. Examples: INT64: - {type\_kind="INT64"} ARRAY: {type\_kind="ARRAY", + __doc__="""The type of a variable, e.g., a function argument. + Examples: INT64: {type\_kind="INT64"} ARRAY: {type\_kind="ARRAY", array\_element\_type="STRING"} STRUCT: {type\_kind="STRUCT", struct\_type={fields=[ {name="x", type={type\_kind="STRING"}}, {name="y", type={type\_kind="ARRAY", diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery_v2/types.py b/packages/google-cloud-bigquery/google/cloud/bigquery_v2/types.py index ee852364a10f..7d4f9b7326e4 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery_v2/types.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery_v2/types.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright 2019 Google LLC +# Copyright 2020 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/packages/google-cloud-bigquery/synth.metadata b/packages/google-cloud-bigquery/synth.metadata index 863d7b1ad9e6..ef9fc79c57d3 100644 --- a/packages/google-cloud-bigquery/synth.metadata +++ b/packages/google-cloud-bigquery/synth.metadata @@ -1,19 +1,20 @@ { - "updateTime": "2019-10-29T12:13:17.119821Z", + "updateTime": "2020-01-29T13:17:11.693204Z", "sources": [ { "generator": { "name": "artman", - "version": "0.40.3", - "dockerImage": "googleapis/artman@sha256:c805f50525f5f557886c94ab76f56eaa09cb1da58c3ee95111fd34259376621a" + "version": "0.44.4", + "dockerImage": "googleapis/artman@sha256:19e945954fc960a4bdfee6cb34695898ab21a8cf0bac063ee39b91f00a1faec8" } }, { "git": { "name": "googleapis", "remote": "https://github.com/googleapis/googleapis.git", - "sha": "532773acbed8d09451dafb3d403ab1823e6a6e1e", - "internalRef": "277177415" + "sha": "cf3b61102ed5f36b827bc82ec39be09525f018c8", + "internalRef": "292034635", + "log": "cf3b61102ed5f36b827bc82ec39be09525f018c8\n Fix to protos for v1p1beta1 release of Cloud Security Command Center\n\nPiperOrigin-RevId: 292034635\n\n4e1cfaa7c0fede9e65d64213ca3da1b1255816c0\nUpdate the public proto to support UTF-8 encoded id for CatalogService API, increase the ListCatalogItems deadline to 300s and some minor documentation change\n\nPiperOrigin-RevId: 292030970\n\n9c483584f8fd5a1b862ae07973f4cc7bb3e46648\nasset: add annotations to v1p1beta1\n\nPiperOrigin-RevId: 292009868\n\ne19209fac29731d0baf6d9ac23da1164f7bdca24\nAdd the google.rpc.context.AttributeContext message to the open source\ndirectories.\n\nPiperOrigin-RevId: 291999930\n\nae5662960573f279502bf98a108a35ba1175e782\noslogin API: move file level option on top of the file to avoid protobuf.js bug.\n\nPiperOrigin-RevId: 291990506\n\neba3897fff7c49ed85d3c47fc96fe96e47f6f684\nAdd cc_proto_library and cc_grpc_library targets for Spanner and IAM protos.\n\nPiperOrigin-RevId: 291988651\n\n8e981acfd9b97ea2f312f11bbaa7b6c16e412dea\nBeta launch for PersonDetection and FaceDetection features.\n\nPiperOrigin-RevId: 291821782\n\n994e067fae3b21e195f7da932b08fff806d70b5d\nasset: add annotations to v1p2beta1\n\nPiperOrigin-RevId: 291815259\n\n244e1d2c89346ca2e0701b39e65552330d68545a\nAdd Playable Locations service\n\nPiperOrigin-RevId: 291806349\n\n909f8f67963daf45dd88d020877fb9029b76788d\nasset: add annotations to v1beta2\n\nPiperOrigin-RevId: 291805301\n\n3c39a1d6e23c1ef63c7fba4019c25e76c40dfe19\nKMS: add file-level message for CryptoKeyPath, it is defined in gapic yaml but not\nin proto files.\n\nPiperOrigin-RevId: 291420695\n\nc6f3f350b8387f8d1b85ed4506f30187ebaaddc3\ncontaineranalysis: update v1beta1 and bazel build with annotations\n\nPiperOrigin-RevId: 291401900\n\n92887d74b44e4e636252b7b8477d0d2570cd82db\nfix: fix the location of grpc config file.\n\nPiperOrigin-RevId: 291396015\n\ne26cab8afd19d396b929039dac5d874cf0b5336c\nexpr: add default_host and method_signature annotations to CelService\n\nPiperOrigin-RevId: 291240093\n\n06093ae3952441c34ec176d1f7431b8765cec0be\nirm: fix v1alpha2 bazel build by adding missing proto imports\n\nPiperOrigin-RevId: 291227940\n\na8a2514af326e4673063f9a3c9d0ef1091c87e6c\nAdd proto annotation for cloud/irm API\n\nPiperOrigin-RevId: 291217859\n\n8d16f76de065f530d395a4c7eabbf766d6a120fd\nGenerate Memcache v1beta2 API protos and gRPC ServiceConfig files\n\nPiperOrigin-RevId: 291008516\n\n3af1dabd93df9a9f17bf3624d3b875c11235360b\ngrafeas: Add containeranalysis default_host to Grafeas service\n\nPiperOrigin-RevId: 290965849\n\nbe2663fa95e31cba67d0cd62611a6674db9f74b7\nfix(google/maps/roads): add missing opening bracket\n\nPiperOrigin-RevId: 290964086\n\nfacc26550a0af0696e0534bc9cae9df14275aa7c\nUpdating v2 protos with the latest inline documentation (in comments) and adding a per-service .yaml file.\n\nPiperOrigin-RevId: 290952261\n\ncda99c1f7dc5e4ca9b1caeae1dc330838cbc1461\nChange api_name to 'asset' for v1p1beta1\n\nPiperOrigin-RevId: 290800639\n\n94e9e90c303a820ce40643d9129e7f0d2054e8a1\nAdds Google Maps Road service\n\nPiperOrigin-RevId: 290795667\n\na3b23dcb2eaecce98c600c7d009451bdec52dbda\nrpc: new message ErrorInfo, other comment updates\n\nPiperOrigin-RevId: 290781668\n\n26420ef4e46c37f193c0fbe53d6ebac481de460e\nAdd proto definition for Org Policy v1.\n\nPiperOrigin-RevId: 290771923\n\n7f0dab8177cf371ae019a082e2512de7ac102888\nPublish Routes Preferred API v1 service definitions.\n\nPiperOrigin-RevId: 290326986\n\nad6e508d0728e1d1bca6e3f328cd562718cb772d\nFix: Qualify resource type references with \"jobs.googleapis.com/\"\n\nPiperOrigin-RevId: 290285762\n\n58e770d568a2b78168ddc19a874178fee8265a9d\ncts client library\n\nPiperOrigin-RevId: 290146169\n\naf9daa4c3b4c4a8b7133b81588dd9ffd37270af2\nAdd more programming language options to public proto\n\nPiperOrigin-RevId: 290144091\n\nd9f2bbf2df301ef84641d4cec7c828736a0bd907\ntalent: add missing resource.proto dep to Bazel build target\n\nPiperOrigin-RevId: 290143164\n\n3b3968237451d027b42471cd28884a5a1faed6c7\nAnnotate Talent API.\nAdd gRPC service config for retry.\nUpdate bazel file with google.api.resource dependency.\n\nPiperOrigin-RevId: 290125172\n\n0735b4b096872960568d1f366bfa75b7b0e1f1a3\nWeekly library update.\n\nPiperOrigin-RevId: 289939042\n\n8760d3d9a4543d7f9c0d1c7870aca08b116e4095\nWeekly library update.\n\nPiperOrigin-RevId: 289939020\n\n8607df842f782a901805187e02fff598145b0b0e\nChange Talent API timeout to 30s.\n\nPiperOrigin-RevId: 289912621\n\n908155991fe32570653bcb72ecfdcfc896642f41\nAdd Recommendations AI V1Beta1\n\nPiperOrigin-RevId: 289901914\n\n5c9a8c2bebd8b71aa66d1cc473edfaac837a2c78\nAdding no-arg method signatures for ListBillingAccounts and ListServices\n\nPiperOrigin-RevId: 289891136\n\n50b0e8286ac988b0593bd890eb31fef6ea2f5767\nlongrunning: add grpc service config and default_host annotation to operations.proto\n\nPiperOrigin-RevId: 289876944\n\n6cac27dabe51c54807b0401698c32d34998948a9\n Updating default deadline for Cloud Security Command Center's v1 APIs.\n\nPiperOrigin-RevId: 289875412\n\nd99df0d67057a233c711187e0689baa4f8e6333d\nFix: Correct spelling in C# namespace option\n\nPiperOrigin-RevId: 289709813\n\n2fa8d48165cc48e35b0c62e6f7bdade12229326c\nfeat: Publish Recommender v1 to GitHub.\n\nCommitter: @lukesneeringer\nPiperOrigin-RevId: 289619243\n\n9118db63d1ab493a2e44a3b4973fde810a835c49\nfirestore: don't retry reads that fail with Aborted\n\nFor transaction reads that fail with ABORTED, we need to rollback and start a new transaction. Our current configuration makes it so that GAPIC retries ABORTED reads multiple times without making any progress. Instead, we should retry at the transaction level.\n\nPiperOrigin-RevId: 289532382\n\n1dbfd3fe4330790b1e99c0bb20beb692f1e20b8a\nFix bazel build\nAdd other langauges (Java was already there) for bigquery/storage/v1alpha2 api.\n\nPiperOrigin-RevId: 289519766\n\nc06599cdd7d11f8d3fd25f8d3249e5bb1a3d5d73\nInitial commit of google.cloud.policytroubleshooter API, The API helps in troubleshooting GCP policies. Refer https://cloud.google.com/iam/docs/troubleshooting-access for more information\n\nPiperOrigin-RevId: 289491444\n\nfce7d80fa16ea241e87f7bc33d68595422e94ecd\nDo not pass samples option for Artman config of recommender v1 API.\n\nPiperOrigin-RevId: 289477403\n\nef179e8c61436297e6bb124352e47e45c8c80cb1\nfix: Address missing Bazel dependency.\n\nBazel builds stopped working in 06ec6d5 because\nthe google/longrunning/operations.proto file took\nan import from google/api/client.proto, but that\nimport was not added to BUILD.bazel.\n\nCommitter: @lukesneeringer\nPiperOrigin-RevId: 289446074\n\n8841655b242c84fd691d77d7bcf21b61044f01ff\nMigrate Data Labeling v1beta1 to GAPIC v2.\n\nCommitter: @lukesneeringer\nPiperOrigin-RevId: 289446026\n\n06ec6d5d053fff299eaa6eaa38afdd36c5e2fc68\nAdd annotations to google.longrunning.v1\n\nCommitter: @lukesneeringer\nPiperOrigin-RevId: 289413169\n\n0480cf40be1d3cc231f4268a2fdb36a8dd60e641\nMigrate IAM Admin v1 to GAPIC v2.\n\nCommitter: @lukesneeringer\nPiperOrigin-RevId: 289411084\n\n1017173e9adeb858587639af61889ad970c728b1\nSpecify a C# namespace for BigQuery Connection v1beta1\n\nPiperOrigin-RevId: 289396763\n\nb08714b378e8e5b0c4ecdde73f92c36d6303b4b6\nfix: Integrate latest proto-docs-plugin fix.\nFixes dialogflow v2\n\nPiperOrigin-RevId: 289189004\n\n51217a67e79255ee1f2e70a6a3919df082513327\nCreate BUILD file for recommender v1\n\nPiperOrigin-RevId: 289183234\n\nacacd87263c0a60e458561b8b8ce9f67c760552a\nGenerate recommender v1 API protos and gRPC ServiceConfig files\n\nPiperOrigin-RevId: 289177510\n\n9d2f7133b97720b1fa3601f6dcd30760ba6d8a1e\nFix kokoro build script\n\nPiperOrigin-RevId: 289166315\n\nc43a67530d2a47a0220cad20ca8de39b3fbaf2c5\ncloudtasks: replace missing RPC timeout config for v2beta2 and v2beta3\n\nPiperOrigin-RevId: 289162391\n\n4cefc229a9197236fc0adf02d69b71c0c5cf59de\nSynchronize new proto/yaml changes.\n\nPiperOrigin-RevId: 289158456\n\n56f263fe959c50786dab42e3c61402d32d1417bd\nCatalog API: Adding config necessary to build client libraries\n\nPiperOrigin-RevId: 289149879\n\n4543762b23a57fc3c53d409efc3a9affd47b6ab3\nFix Bazel build\nbilling/v1 and dialogflow/v2 remain broken (not bazel-related issues).\nBilling has wrong configuration, dialogflow failure is caused by a bug in documentation plugin.\n\nPiperOrigin-RevId: 289140194\n\nc9dce519127b97e866ca133a01157f4ce27dcceb\nUpdate Bigtable docs\n\nPiperOrigin-RevId: 289114419\n\n802c5c5f2bf94c3facb011267d04e71942e0d09f\nMigrate DLP to proto annotations (but not GAPIC v2).\n\nCommitter: @lukesneeringer\nPiperOrigin-RevId: 289102579\n\n6357f30f2ec3cff1d8239d18b707ff9d438ea5da\nRemove gRPC configuration file that was in the wrong place.\n\nPiperOrigin-RevId: 289096111\n\n360a8792ed62f944109d7e22d613a04a010665b4\n Protos for v1p1beta1 release of Cloud Security Command Center\n\nPiperOrigin-RevId: 289011995\n\na79211c20c4f2807eec524d00123bf7c06ad3d6e\nRoll back containeranalysis v1 to GAPIC v1.\n\nCommitter: @lukesneeringer\nPiperOrigin-RevId: 288999068\n\n9e60345ba603e03484a8aaa33ce5ffa19c1c652b\nPublish Routes Preferred API v1 proto definitions.\n\nPiperOrigin-RevId: 288941399\n\nd52885b642ad2aa1f42b132ee62dbf49a73e1e24\nMigrate the service management API to GAPIC v2.\n\nCommitter: @lukesneeringer\nPiperOrigin-RevId: 288909426\n\n6ace586805c08896fef43e28a261337fcf3f022b\ncloudtasks: replace missing RPC timeout config\n\nPiperOrigin-RevId: 288783603\n\n51d906cabee4876b12497054b15b05d4a50ad027\nImport of Grafeas from Github.\n\nUpdate BUILD.bazel accordingly.\n\nPiperOrigin-RevId: 288783426\n\n5ef42bcd363ba0440f0ee65b3c80b499e9067ede\nMigrate Recommender v1beta1 to GAPIC v2.\n\nCommitter: @lukesneeringer\nPiperOrigin-RevId: 288713066\n\n94f986afd365b7d7e132315ddcd43d7af0e652fb\nMigrate Container Analysis v1 to GAPIC v2.\n\nCommitter: @lukesneeringer\nPiperOrigin-RevId: 288708382\n\n7a751a279184970d3b6ba90e4dd4d22a382a0747\nRemove Container Analysis v1alpha1 (nobody publishes it).\n\nCommitter: @lukesneeringer\nPiperOrigin-RevId: 288707473\n\n3c0d9c71242e70474b2b640e15bb0a435fd06ff0\nRemove specious annotation from BigQuery Data Transfer before\nanyone accidentally does anything that uses it.\n\nCommitter: @lukesneeringer\nPiperOrigin-RevId: 288701604\n\n1af307a4764bd415ef942ac5187fa1def043006f\nMigrate BigQuery Connection to GAPIC v2.\n\nCommitter: @lukesneeringer\nPiperOrigin-RevId: 288698681\n\n08b488e0660c59842a7dee0e3e2b65d9e3a514a9\nExposing cloud_catalog.proto (This API is already available through REST)\n\nPiperOrigin-RevId: 288625007\n\na613482977e11ac09fa47687a5d1b5a01efcf794\nUpdate the OS Login v1beta API description to render better in the UI.\n\nPiperOrigin-RevId: 288547940\n\n5e182b8d9943f1b17008d69d4c7e865dc83641a7\nUpdate the OS Login API description to render better in the UI.\n\nPiperOrigin-RevId: 288546443\n\ncb79155f596e0396dd900da93872be7066f6340d\nFix: Add a resource annotation for Agent\nFix: Correct the service name in annotations for Intent and SessionEntityType\n\nPiperOrigin-RevId: 288441307\n\nf7f6e9daec3315fd47cb638789bd8415bf4a27cc\nAdded cloud asset api v1p1beta1\n\nPiperOrigin-RevId: 288427239\n\nf2880f5b342c6345f3dcaad24fcb3c6ca9483654\nBilling account API: Adding config necessary to build client libraries\n\nPiperOrigin-RevId: 288351810\n\ndc250ffe071729f8f8bef9d6fd0fbbeb0254c666\nFix: Remove incorrect resource annotations in requests\n\nPiperOrigin-RevId: 288321208\n\n91ef2d9dd69807b0b79555f22566fb2d81e49ff9\nAdd GAPIC annotations to Cloud KMS (but do not migrate the GAPIC config yet).\n\nCommitter: @lukesneeringer\nPiperOrigin-RevId: 287999179\n\n4d45a6399e9444fbddaeb1c86aabfde210723714\nRefreshing Cloud Billing API protos.\n\nThis exposes the following API methods:\n- UpdateBillingAccount\n- CreateBillingAccount\n- GetIamPolicy\n- SetIamPolicy\n- TestIamPermissions\n\nThere are also some new fields to support the management of sub-accounts.\n\nPiperOrigin-RevId: 287908369\n\nec285d3d230810147ebbf8d5b691ee90320c6d2d\nHide not yet implemented update_transforms message\n\nPiperOrigin-RevId: 287608953\n\na202fb3b91cd0e4231be878b0348afd17067cbe2\nBigQuery Storage Write API v1alpha2 clients. The service is enabled by whitelist only.\n\nPiperOrigin-RevId: 287379998\n\n650d7f1f8adb0cfaf37b3ce2241c3168f24efd4d\nUpdate Readme.md to match latest Bazel updates\n090d98aea20270e3be4b64240775588f7ce50ff8\ndocs(bigtable): Fix library release level listed in generated documentation\n\nPiperOrigin-RevId: 287308849\n\n2c28f646ca77b1d57550368be22aa388adde2e66\nfirestore: retry reads that fail with contention\n\nPiperOrigin-RevId: 287250665\n\nfd3091fbe9b2083cabc53dc50c78035658bfc4eb\nSync timeout in grpc config back to 10s for tasks API with github googelapis gapic config.\n\nPiperOrigin-RevId: 287207067\n\n49dd7d856a6f77c0cf7e5cb3334423e5089a9e8a\nbazel: Integrate bazel-2.0.0 compatibility fixes\n\nPiperOrigin-RevId: 287205644\n\n46e52fd64973e815cae61e78b14608fe7aa7b1df\nbazel: Integrate bazel build file generator\n\nTo generate/update BUILD.bazel files for any particular client or a batch of clients:\n```\nbazel run //:build_gen -- --src=google/example/library\n```\n\nPiperOrigin-RevId: 286958627\n\n1a380ea21dea9b6ac6ad28c60ad96d9d73574e19\nBigQuery Storage Read API v1beta2 clients.\n\nPiperOrigin-RevId: 286616241\n\n5f3f1d0f1c06b6475a17d995e4f7a436ca67ec9e\nAdd Artman config for secretmanager.\n\nPiperOrigin-RevId: 286598440\n\n50af0530730348f1e3697bf3c70261f7daaf2981\nSynchronize new proto/yaml changes.\n\nPiperOrigin-RevId: 286491002\n\n91818800384f4ed26961aea268910b1a2ec58cc8\nFor Data Catalog API,\n1. Add support for marking a tag template field as required when creating a new tag template.\n2. Add support for updating a tag template field from required to optional.\n\nPiperOrigin-RevId: 286490262\n\nff4a2047b3d66f38c9b22197c370ed0d02fc0238\nWeekly library update.\n\nPiperOrigin-RevId: 286484215\n\n192c14029861752a911ed434fd6ee5b850517cd9\nWeekly library update.\n\nPiperOrigin-RevId: 286484165\n\nd9e328eaf790d4e4346fbbf32858160f497a03e0\nFix bazel build (versions 1.x)\n\nBump gapic-generator and resource names plugins to the latest version.\n\nPiperOrigin-RevId: 286469287\n\n0ca305403dcc50e31ad9477c9b6241ddfd2056af\nsecretmanager client package name option updates for java and go\n\nPiperOrigin-RevId: 286439553\n\nade4803e8a1a9e3efd249c8c86895d2f12eb2aaa\niam credentials: publish v1 protos containing annotations\n\nPiperOrigin-RevId: 286418383\n\n03e5708e5f8d1909dcb74b25520309e59ebf24be\nsecuritycenter: add missing proto deps for Bazel build\n\nPiperOrigin-RevId: 286417075\n\n8b991eb3eb82483b0ca1f1361a9c8e5b375c4747\nAdd secretmanager client package name options.\n\nPiperOrigin-RevId: 286415883\n\nd400cb8d45df5b2ae796b909f098a215b2275c1d\ndialogflow: add operation_info annotations to BatchUpdateEntities and BatchDeleteEntities.\n\nPiperOrigin-RevId: 286312673\n\nf2b25232db397ebd4f67eb901a2a4bc99f7cc4c6\nIncreased the default timeout time for all the Cloud Security Command Center client libraries.\n\nPiperOrigin-RevId: 286263771\n\ncb2f1eefd684c7efd56fd375cde8d4084a20439e\nExposing new Resource fields in the SecurityCenterProperties proto, added more comments to the filter logic for these Resource fields, and updated the response proto for the ListFindings API with the new Resource fields.\n\nPiperOrigin-RevId: 286263092\n\n73cebb20432b387c3d8879bb161b517d60cf2552\nUpdate v1beta2 clusters and jobs to include resource ids in GRPC header.\n\nPiperOrigin-RevId: 286261392\n\n1b4e453d51c0bd77e7b73896cdd8357d62768d83\nsecuritycenter: publish v1beta1 protos with annotations\n\nPiperOrigin-RevId: 286228860\n\na985eeda90ae98e8519d2320bee4dec148eb8ccb\nAdd default retry configurations for speech_v1p1beta1.\n\nSettings are copied from speech_gapic.legacy.yaml. The Python client library is being generated with timeouts that are too low. See https://github.com/GoogleCloudPlatform/python-docs-samples/issues/2578\n\nPiperOrigin-RevId: 286191318\n\n3352100a15ede383f5ab3c34599f7a10a3d066fe\nMake importing rule with the same name (but different aliases) from different repositories possible.\n\nThis is needed to allow monolitic gapic-generator and microgenerators coexist during transition period.\n\nTo plug a microgenerator:\n\n1) Add corresponding rules bidnings under `switched_rules_by_language` in repository_rules.bzl:\n rules[\"go_gapic_library2\"] = _switch(\n go and grpc and gapic,\n \"@gapic_generator_go//rules_go_gapic/go_gapic.bzl\",\n \"go_gapic_library\",\n )\n\n2) Import microgenerator in WORKSPACE (the above example assumes that the generator was imported under name \"gapic_generator_go\").\n\n3) To migrate an API from monolith to micro generator (this is done per API and per language) modify the corresponding load statement in the API's BUILD.bazel file. For example, for the example above, to migrate to go microgenerator modify the go-specific load statement in BUILD.bazel file of a specific API (which you want to migrate) to the following:\n\nload(\n \"@com_google_googleapis_imports//:imports.bzl\",\n \"go_gapic_assembly_pkg\",\n go_gapic_library = \"go_gapic_library2\",\n \"go_proto_library\",\n \"go_test\",\n)\n\nPiperOrigin-RevId: 286065440\n\n6ad2bb13bc4b0f3f785517f0563118f6ca52ddfd\nUpdated v1beta1 protos for the client:\n- added support for GenericSignedAttestation which has a generic Signature\n- added support for CVSSv3 and WindowsDetail in Vulnerability\n- documentation updates\n\nPiperOrigin-RevId: 286008145\n\nfe1962e49999a832eed8162c45f23096336a9ced\nAdMob API v1 20191210\n\nBasic account info, mediation and network report available. See https://developers.google.com/admob/api/release-notes for more details.\n\nPiperOrigin-RevId: 285894502\n\n41fc1403738b61427f3a798ca9750ef47eb9c0f2\nAnnotate the required fields for the Monitoring Dashboards API\n\nPiperOrigin-RevId: 285824386\n\n27d0e0f202cbe91bf155fcf36824a87a5764ef1e\nRemove inappropriate resource_reference annotations for UpdateWorkflowTemplateRequest.template.\n\nPiperOrigin-RevId: 285802643\n\ne5c4d3a2b5b5bef0a30df39ebb27711dc98dee64\nAdd Artman BUILD.bazel file for the Monitoring Dashboards API\n\nPiperOrigin-RevId: 285445602\n\n2085a0d3c76180ee843cf2ecef2b94ca5266be31\nFix path in the artman config for Monitoring Dashboard API.\n\nPiperOrigin-RevId: 285233245\n\n2da72dfe71e4cca80902f9e3e125c40f02c2925b\nAdd Artman and GAPIC configs for the Monitoring Dashboards API.\n\nPiperOrigin-RevId: 285211544\n\n9f6eeebf1f30f51ffa02acea5a71680fe592348e\nAdd annotations to Dataproc v1. (Also forwarding comment changes from internal source control.)\n\nCommitter: @lukesneeringer\nPiperOrigin-RevId: 285197557\n\n19c4589a3cb44b3679f7b3fba88365b3d055d5f8\noslogin: fix v1beta retry configuration\n\nPiperOrigin-RevId: 285013366\n\nee3f02926d0f8a0bc13f8d716581aad20f575751\nAdd Monitoring Dashboards API protocol buffers to Google Cloud Monitoring API.\n\nPiperOrigin-RevId: 284982647\n\ne47fdd266542386e5e7346697f90476e96dc7ee8\nbigquery datatransfer: Remove non-publicly available DataSourceService.\n\nPiperOrigin-RevId: 284822593\n\n6156f433fd1d9d5e4a448d6c6da7f637921d92ea\nAdds OSConfig v1beta protos and initial client library config\n\nPiperOrigin-RevId: 284799663\n\n6cc9499e225a4f6a5e34fe07e390f67055d7991c\nAdd datetime.proto to google/type/BUILD.bazel\n\nPiperOrigin-RevId: 284643689\n\nfe7dd5277e39ffe0075729c61e8d118d7527946d\nCosmetic changes to proto comment as part of testing internal release instructions.\n\nPiperOrigin-RevId: 284608712\n\n68d109adad726b89f74276d2f4b2ba6aac6ec04a\nAdd annotations to securitycenter v1, but leave GAPIC v1 in place.\n\nCommitter: @lukesneeringer\nPiperOrigin-RevId: 284580511\n\ndf8a1707a910fc17c71407a75547992fd1864c51\nSynchronize new proto/yaml changes.\n\nPiperOrigin-RevId: 284568564\n\na69a974976221ce3bb944901b739418b85d6408c\nclient library update\n\nPiperOrigin-RevId: 284463979\n\na4adac3a12aca6e3a792c9c35ee850435fe7cf7e\nAdded DateTime, TimeZone, and Month proto files to google/type\n\nPiperOrigin-RevId: 284277770\n\ned5dec392906078db4f7745fe4f11d34dd401ae9\nchange common resources from message-level annotations to file-level annotations.\n\nPiperOrigin-RevId: 284236794\n\na00e2c575ef1b637667b4ebe96b8c228b2ddb273\nbigquerydatatransfer: change resource type TransferRun to Run to be consistent with gapic configs\nbigquerydatatransfer: add missing patterns for DataSource, TransferConfig and Run (to allow the location segment)\nbigquerydatatransfer: add file-level Parent resource type (to allow the location segement)\nbigquerydatatransfer: update grpc service config with correct retry delays\n\nPiperOrigin-RevId: 284234378\n\nb10e4547017ca529ac8d183e839f3c272e1c13de\ncloud asset: replace required fields for batchgetassethistory. Correct the time out duration.\n\nPiperOrigin-RevId: 284059574\n\n6690161e3dcc3367639a2ec10db67bf1cf392550\nAdd default retry configurations for speech_v1.\n\nSettings are copied from speech_gapic.legacy.yaml. The Python client library is being generated with timeouts that are too low. See https://github.com/GoogleCloudPlatform/python-docs-samples/issues/2578\n\nPiperOrigin-RevId: 284035915\n\n9b2635ef91e114f0357bdb87652c26a8f59316d5\ncloudtasks: fix gapic v2 config\n\nPiperOrigin-RevId: 284020555\n\ne5676ba8b863951a8ed0bfd6046e1db38062743c\nReinstate resource name handling in GAPIC config for Asset v1.\n\nPiperOrigin-RevId: 283993903\n\nf337f7fb702c85833b7b6ca56afaf9a1bf32c096\nOSConfig AgentEndpoint: add LookupEffectiveGuestPolicy rpc\n\nPiperOrigin-RevId: 283989762\n\nc0ac9b55f2e2efd0ee525b3a6591a1b09330e55a\nInclude real time feed api into v1 version\n\nPiperOrigin-RevId: 283845474\n\n2427a3a0f6f4222315362d973d91a082a3a884a7\nfirestore admin: update v1 protos with annotations & retry config\n\nPiperOrigin-RevId: 283826605\n\n555e844dbe04af50a8f55fe1217fa9d39a0a80b2\nchore: publish retry configs for iam admin, cloud asset, and remoteworkers\n\nPiperOrigin-RevId: 283801979\n\n6311dc536668849142d1fe5cd9fc46da66d1f77f\nfirestore: update v1beta1 protos with annotations and retry config\n\nPiperOrigin-RevId: 283794315\n\nda0edeeef953b05eb1524d514d2e9842ac2df0fd\nfeat: publish several retry config files for client generation\n\nPiperOrigin-RevId: 283614497\n\n59a78053537e06190f02d0a7ffb792c34e185c5a\nRemoving TODO comment\n\nPiperOrigin-RevId: 283592535\n\n8463992271d162e2aff1d5da5b78db11f2fb5632\nFix bazel build\n\nPiperOrigin-RevId: 283589351\n\n3bfcb3d8df10dfdba58f864d3bdb8ccd69364669\nPublic client library for bebop_jobs_api_20191118_1_RC3 release.\n\nPiperOrigin-RevId: 283568877\n\n27ab0db61021d267c452b34d149161a7bf0d9f57\nfirestore: publish annotated protos and new retry config\n\nPiperOrigin-RevId: 283565148\n\n38dc36a2a43cbab4a2a9183a43dd0441670098a9\nfeat: add http annotations for operations calls\n\nPiperOrigin-RevId: 283384331\n\n366caab94906975af0e17822e372f1d34e319d51\ndatastore: add a legacy artman config for PHP generation\n\nPiperOrigin-RevId: 283378578\n\n82944da21578a53b74e547774cf62ed31a05b841\nMigrate container v1beta1 to GAPIC v2.\n\nPiperOrigin-RevId: 283342796\n\n584dcde5826dd11ebe222016b7b208a4e1196f4b\nRemove resource name annotation for UpdateKeyRequest.key, because it's the resource, not a name.\n\nPiperOrigin-RevId: 283167368\n\n6ab0171e3688bfdcf3dbc4056e2df6345e843565\nAdded resource annotation for Key message.\n\nPiperOrigin-RevId: 283066965\n\n86c1a2db1707a25cec7d92f8850cc915163ec3c3\nExpose Admin API methods for Key manipulation.\n\nPiperOrigin-RevId: 282988776\n\n3ddad085965896ffb205d44cb0c0616fe3def10b\nC++ targets: correct deps so they build, rename them from trace* to cloudtrace*\nto match the proto names.\n\nPiperOrigin-RevId: 282857635\n\ne9389365a971ad6457ceb9646c595e79dfdbdea5\nSynchronize new proto/yaml changes.\n\nPiperOrigin-RevId: 282810797\n\ne42eaaa9abed3c4d63d64f790bd3191448dbbca6\nPut back C++ targets for cloud trace v2 api.\n\nPiperOrigin-RevId: 282803841\n\nd8896a3d8a191702a9e39f29cf4c2e16fa05f76d\nAdd initial BUILD.bazel for secretmanager.googleapis.com\n\nPiperOrigin-RevId: 282674885\n\n2cc56cb83ea3e59a6364e0392c29c9e23ad12c3a\nCreate sample for list recommendations\n\nPiperOrigin-RevId: 282665402\n\nf88e2ca65790e3b44bb3455e4779b41de1bf7136\nbump Go to ga\n\nPiperOrigin-RevId: 282651105\n\naac86d932b3cefd7d746f19def6935d16d6235e0\nDocumentation update. Add location_id in preparation for regionalization.\n\nPiperOrigin-RevId: 282586371\n\n5b501cd384f6b842486bd41acce77854876158e7\nMigrate Datastore Admin to GAPIC v2.\n\nCommitter: @lukesneeringer\nPiperOrigin-RevId: 282570874\n\n6a16d474d5be201b20a27646e2009c4dfde30452\nMigrate Datastore to GAPIC v2.\n\nCommitter: @lukesneeringer\nPiperOrigin-RevId: 282564329\n\n74bd9b95ac8c70b883814e4765a725cffe43d77c\nmark Go lib ga\n\nPiperOrigin-RevId: 282562558\n\nf7b3d434f44f6a77cf6c37cae5474048a0639298\nAdd secretmanager.googleapis.com protos\n\nPiperOrigin-RevId: 282546399\n\nc34a911aaa0660a45f5a556578f764f135e6e060\niot: bump Go GAPIC to GA release level\n\nPiperOrigin-RevId: 282494787\n\n79b7f1c5ba86859dbf70aa6cd546057c1002cdc0\nPut back C++ targets.\nPrevious change overrode custom C++ targets made by external teams. This PR puts those targets back.\n\nPiperOrigin-RevId: 282458292\n\n06a840781d2dc1b0a28e03e30fb4b1bfb0b29d1e\nPopulate BAZEL.build files for around 100 APIs (all APIs we publish) in all 7 langauges.\n\nPiperOrigin-RevId: 282449910\n\n777b580a046c4fa84a35e1d00658b71964120bb0\nCreate BUILD file for recommender v1beta1\n\nPiperOrigin-RevId: 282068850\n\n48b385b6ef71dfe2596490ea34c9a9a434e74243\nGenerate recommender v1beta1 gRPC ServiceConfig file\n\nPiperOrigin-RevId: 282067795\n\n8395b0f1435a4d7ce8737b3b55392627758bd20c\nfix: Set timeout to 25s, because Tasks fails for any deadline above 30s.\n\nCommitter: @lukesneeringer\nPiperOrigin-RevId: 282017295\n\n3ba7ddc4b2acf532bdfb0004ca26311053c11c30\nfix: Shift Ruby and PHP to legacy GAPIC YAMLs for back-compat.\n\nCommitter: @lukesneeringer\nPiperOrigin-RevId: 281852671\n\nad6f0c002194c3ec6c13d592d911d122d2293931\nRemove unneeded yaml files\n\nPiperOrigin-RevId: 281835839\n\n1f42588e4373750588152cdf6f747de1cadbcbef\nrefactor: Migrate Tasks beta 2 to GAPIC v2.\n\nCommitter: @lukesneeringer\nPiperOrigin-RevId: 281769558\n\n902b51f2073e9958a2aba441f7f7ac54ea00966d\nrefactor: Migrate Tasks to GAPIC v2 (for real this time).\n\nCommitter: @lukesneeringer\nPiperOrigin-RevId: 281769522\n\n17561f59970eede87f61ef6e9c322fa1198a2f4d\nMigrate Tasks Beta 3 to GAPIC v2.\n\nCommitter: @lukesneeringer\nPiperOrigin-RevId: 281769519\n\nf95883b15a1ddd58eb7e3583fdefe7b00505faa3\nRegenerate recommender v1beta1 protos and sanitized yaml\n\nPiperOrigin-RevId: 281765245\n\n9a52df54c626b36699a058013d1735a166933167\nadd gRPC ServiceConfig for grafeas v1\n\nPiperOrigin-RevId: 281762754\n\n7a79d682ef40c5ca39c3fca1c0901a8e90021f8a\nfix: Roll back Tasks GAPIC v2 while we investigate C# issue.\n\nCommitter: @lukesneeringer\nPiperOrigin-RevId: 281758548\n\n3fc31491640a90f029f284289e7e97f78f442233\nMigrate Tasks to GAPIC v2.\n\nCommitter: @lukesneeringer\nPiperOrigin-RevId: 281751187\n\n5bc0fecee454f857cec042fb99fe2d22e1bff5bc\nfix: adds operation HTTP rules back to v1p1beta1 config\n\nPiperOrigin-RevId: 281635572\n\n5364a19284a1333b3ffe84e4e78a1919363d9f9c\nbazel: Fix build\n\n1) Update to latest gapic-generator (has iam resource names fix for java).\n2) Fix non-trivial issues with oslogin (resources defined in sibling package to the one they are used from) and monitoring.\n3) Fix trivial missing dependencies in proto_library targets for other apis.\n\nThis is to prepare the repository to being populated with BUILD.bazel files for all supported apis (101 API) in all 7 languages.\n\nPiperOrigin-RevId: 281618750\n\n0aa77cbe45538d5e5739eb637db3f2940b912789\nUpdating common proto files in google/type/ with their latest versions.\n\nPiperOrigin-RevId: 281603926\n\nd47e1b4485b3effbb2298eb10dd13a544c0f66dc\nfix: replace Speech Recognize RPC retry_codes_name for non-standard assignment\n\nPiperOrigin-RevId: 281594037\n\n16543773103e2619d2b5f52456264de5bb9be104\nRegenerating public protos for datacatalog, also adding gRPC service config.\n\nPiperOrigin-RevId: 281423227\n\n328ebe76adb06128d12547ed70107fb841aebf4e\nChange custom data type from String to google.protobuf.Struct to be consistent with other docs such as\nhttps://developers.google.com/actions/smarthome/develop/process-intents#response_format\n\nPiperOrigin-RevId: 281402467\n\n5af83f47b9656261cafcf88b0b3334521ab266b3\n(internal change without visible public changes)\n\nPiperOrigin-RevId: 281334391\n\nc53ed56649583a149382bd88d3c427be475b91b6\nFix typo in protobuf docs.\n\nPiperOrigin-RevId: 281293109\n\nd8dd7fe8d5304f7bd1c52207703d7f27d5328c5a\nFix build by adding missing deps.\n\nPiperOrigin-RevId: 281088257\n\n3ef5ffd7351809d75c1332d2eaad1f24d9c318e4\nMigrate Error Reporting v1beta1 to proto annotations / GAPIC v2.\n\nCommitter: @lukesneeringer\nPiperOrigin-RevId: 281075722\n\n418ee8e24a56b5959e1c1defa4b6c97f883be379\nTrace v2: Add remaining proto annotations, migrate to GAPIC v2.\n\nCommitter: @lukesneeringer\nPiperOrigin-RevId: 281068859\n\nc89394342a9ef70acaf73a6959e04b943fbc817b\nThis change updates an outdated comment for the feature importance proto field since they are no longer in [0, 1] for online predictions.\n\nPiperOrigin-RevId: 280761373\n\n1ec8b8e2c3c8f41d7d2b22c594c025276d6a4ae6\nCode refactoring\n\nPiperOrigin-RevId: 280760149\n\n427a22b04039f93b769d89accd6f487413f667c1\nImport automl operation protos.\n\nPiperOrigin-RevId: 280703572\n\n45749a04dac104e986f6cc47da3baf7c8bb6f9b0\nfix: bigqueryconnection_gapic.yaml to reflect proto annotations\n\n* remove connection_credential resource\n* make CreateCredentialRequest.connection_id optional\n* shuffle field ordering in CreateCredential flattening\n\nPiperOrigin-RevId: 280685438\n\n8385366aa1e5d7796793db02a9c5e167d1fd8f17\nRevert the Trace v2 GAPIC for now.\nCommitter: @lukesneeringer\n\nPiperOrigin-RevId: 280669295\n\n5c8ab2c072d557c2f4c4e54b544394e2d62202d5\nMigrate Trace v1 and Trace v2 to GAPIC v2.\n\nCommitter: @lukesneeringer\nPiperOrigin-RevId: 280667429\n\nf6808ff4e8b966cd571e99279d4a2780ed97dff2\nRename the `endpoint_urls` field to `endpoint_uris` to be consistent with\nGoogle API nomenclature.\n\nPiperOrigin-RevId: 280581337\n\n1935fb8889686f5c9d107f11b3c6870fc3aa7cdc\nComment updates\n\nPiperOrigin-RevId: 280451656\n\n0797fd5b9029d630e68a0899734715d62ad38e33\nComment updates\n\nPiperOrigin-RevId: 280451600\n\n9bc8d07b8b749e791d16c8d559526928ceaf1994\nRollback of \"Migrate Cloud Error Reporting to proto annotations & GAPIC v2.\"\n\nPiperOrigin-RevId: 280445975\n\nf8720321aecf4aab42e03602ac2c67f9777d9170\nfix: bigtable retry config in GAPIC v2\n\nPiperOrigin-RevId: 280434856\n\nb11664ba64f92d96d748e0dd9724d006dcafd120\nMigrate Cloud Error Reporting to proto annotations & GAPIC v2.\n\nPiperOrigin-RevId: 280432937\n\n4f747bda9b099b4426f495985680d16d0227fa5f\n1. Change DataCatalog package name in java from com.google.cloud.datacatalog to com.google.cloud.datacatalog.v1beta1 (API version is included in the package). *This is a breaking change.*\n\n2. Add API for Taxonomies (PolicyTagManager and PolicyTagManagerSerialization services).\n\n3. Minor changes to documentation.\n\nPiperOrigin-RevId: 280394936\n\nbc76ffd87360ce1cd34e3a6eac28afd5e1efda76\nUse rules_proto bzl files to load proto_library\n\nThis makes googleapis forward compatible with Bazel incompatible change https://github.com/bazelbuild/bazel/issues/8922.\n\nThis CL was created by adding @rules_proto to the WORKSPACE file and then running:\n\nfind . -name BUILD.bazel | \\\n while read build; do \\\n buildifier --lint=fix --warnings=load $build; \\\n done\n\nSince buildifier cannot be told not to reformat the BUILD file, some files are reformatted.\n\nPiperOrigin-RevId: 280356106\n\n218164b3deba1075979c9dca5f71461379e42dd1\nMake the `permissions` argument in TestIamPermissions required.\n\nPiperOrigin-RevId: 280279014\n\ndec8fd8ea5dc464496606189ba4b8949188639c8\nUpdating Cloud Billing Budget API documentation for clarity.\n\nPiperOrigin-RevId: 280225437\n\na667ffab90deb5e2669eb40ec7b61ec96a3d0454\nIntroduced detailed status message for CreateTimeSeries: CreateTimeSeriesSummary replaces CreateTimeSeriesError, which is now deprecated and unused.\n\nPiperOrigin-RevId: 280221707\n\nbe0a25eceec8916633447a37af0ecea801b85186\nMigrate Bigtable API to GAPIC v2 config.\n\nPiperOrigin-RevId: 280199643\n\n88bbf96b90089994ed16208a0f38cdd07f743742\nFix location of monitoring.yaml in Artman config for monitoring v3.\n\nPiperOrigin-RevId: 280134477\n\ndbaa01a20303758eed0c5a95ad2239ea306ad9a5\nUpdate namespace for PHP.\n\nPiperOrigin-RevId: 280085199\n\nf73b3796a635b2026a590d5133af7fa1f0eb807b\nStandardize pub/sub client default settings across clients:\n- Add retry codes for streaming pull\n- Decrease publish's max_rpc_timeout (mini-timeout) from 10 mins to 1 min\n- Decrease publish's total timeout from 10 mins to 1 min\n- Increase publish batching threshold from 10 to 100 elements\n- Increase publish batching size threshold from 1 KiB to 1 MiB\n\nPiperOrigin-RevId: 280044012\n\n822172613e1d93bede3beaf78b123c42a5876e2b\nReplace local_repository with http_archive in WORKSPACE\n\nPiperOrigin-RevId: 280039052\n\n6a8c7914d1b79bd832b5157a09a9332e8cbd16d4\nAdded notification_supported_by_agent to indicate whether the agent is sending notifications to Google or not.\n\nPiperOrigin-RevId: 279991530\n\n675de3dc9ab98cc1cf54216ad58c933ede54e915\nAdd an endpoint_urls field to the instance admin proto and adds a field_mask field to the GetInstanceRequest.\n\nPiperOrigin-RevId: 279982263\n\nf69562be0608904932bdcfbc5ad8b9a22d9dceb8\nAdds some clarification to IAM Policy public proto comments about the policy versioning compliance check for etag-less SetIamPolicy requests.\n\nPiperOrigin-RevId: 279774957\n\n4e86b2538758e3155e867d1cb4155ee91de7c6e9\nDocumentation update. Add the new action for sending metrics to Stackdriver.\n\nPiperOrigin-RevId: 279768476\n\neafaf30b7a3af0bc72f323fe6a6827327d3cad75\nfix: Restore deleted field to avoid a breaking change.\n\nPiperOrigin-RevId: 279760458\n\ned13a73f3054a29b764f104feaa503820b75140a\nAdd GAPIC annotations to the GKE API.\n\nPiperOrigin-RevId: 279734275\n\n6b125955bf0d6377b96f205e5d187e9d524b7ea2\nUpdate timeouts to 1 hour for default and streaming RPCs.\n\nPiperOrigin-RevId: 279657866\n\n989b304c8a6cfe72bdd7cb264e0d71b784db9421\nAdd Service Monitoring (Service and ServiceLevelObjective) protocol buffers to Google Cloud Monitoring API.\n\nPiperOrigin-RevId: 279649144\n\n1ef3bed9594674bb571ce20418af307505e3f609\nUpdating configs for AgentEndpoint to fix the client library generation.\n\nPiperOrigin-RevId: 279518887\n\n34e661f58d58fa57da8ed113a3d8bb3de26b307d\nUpdate v1beta2 clusters and jobs to include resource ids in GRPC header.\n\nPiperOrigin-RevId: 279417429\n\n248abde06efb7e5a3d81b84de02c8272122b0c3b\nIntegrate GAPIC Python Bazel Extensions\n\nAlso configure python build for the following clients as an example:\n\ndiaglogflow/v2\nlanguage/v1\ntexttospeech/v1\nfirestore/v1beta1\npubsub/v1\n\nPiperOrigin-RevId: 279406526\n\n7ffbf721e29b8806e0c8947c5dd0cdddc02de72a\nOSConfig Agentendpoint: Rename ReportTaskStart to StartNextTask\n\nPiperOrigin-RevId: 279389774\n\n2642d8688bab8981c8a5153b7578f9ff8460a37c\nAgentendpoint API: minor doc updates, addition of exclusive_packages|patches to PatchConfigs.\n\nPiperOrigin-RevId: 279326626\n\nd323b287c782802242005072d15f1474d7d10819\nDocumentation changes.\n\nPiperOrigin-RevId: 279234903\n\n29927f71d92d59551a42272ab7c6e97e8413af78\nPublishing Billing Budgets v1alpha1 API.\n\nPiperOrigin-RevId: 279176561\n\nff413d36f8358818d76fa92006f2d8f608843093\nAdding gRPC service config for Billing Budgets API.\n\nPiperOrigin-RevId: 279175129\n\n3eb91187709cc96bb890c110f518505f65ffd95d\nagentendpoint: removes all gapic languages except Go from artman config\n\nPiperOrigin-RevId: 279173968\n\na34950f968c7944a1036551b545557edcc18c767\nFix bazel build.\n\nUpdate gapic-generator and protoc-java-resource-name plugin dependencies to the latest versions.\n\nThe following clients remain broken because of bugs in gapic-generator and/or corresponding configs\n\ngoogle/cloud/iot/v1\ngoogle/cloud/oslogin/v1\ngoogle/spanner/admin/instance/v1\ngoogle/cloud/oslogin/v1\n\nPiperOrigin-RevId: 279171061\n\n0ed34e9fdf601dfc37eb24c40e17495b86771ff4\nAdds agentendpoint protos and initial client library config\n\nPiperOrigin-RevId: 279147036\n\ncad1d3b365a90c2a9f014b84a2a1acb55c15480f\nUpdates to MediaCard\n\nPiperOrigin-RevId: 279100776\n\n05556c26b633c153f2eca62aeafbcd62705f41b7\nUpdates to MediaCard\n\nPiperOrigin-RevId: 279100278\n\n2275670a746ab2bc03ebba0d914b45320ea15af4\nSynchronize new proto/yaml changes.\n\nPiperOrigin-RevId: 278922329\n\n5691fcb7c1a926b52577aa1834f31d9c50efda54\nSynchronize new proto/yaml changes.\n\nPiperOrigin-RevId: 278731899\n\ncb542d6f5f1c9431ec4181d9cfd7f8d8c953e60b\nSynchronize new proto/yaml changes.\n\nPiperOrigin-RevId: 278688708\n\n311e73f017a474c9a41f2a41b00d5d704ff191c5\nSynchronize new proto/yaml changes.\n\nPiperOrigin-RevId: 278658917\n\n521ce65c04266df83dde9e2cfd8b2caf057cab45\nSynchronize new proto/yaml changes.\n\nPiperOrigin-RevId: 278656745\n\nf06bab1c11b7a6dcd15c50525da44c4b2ff3ef3d\nSynchronize new proto/yaml changes.\n\nPiperOrigin-RevId: 278627678\n\n8c6569ced063c08a48272de2e887860d0c40d388\nSynchronize new proto/yaml changes.\n\nPiperOrigin-RevId: 278552094\n\n21262f41c4445d24bf441e2a5c250a4207348008\nSynchronize new proto/yaml changes.\n\nPiperOrigin-RevId: 278486499\n\ndf366ed5ee26ebb73511127b4c329a98ecdd1f7b\nSynchronize new proto/yaml changes.\n\nPiperOrigin-RevId: 278469200\n\n58bc0f51b1270975b532f5847d9e9e0ff5cdc592\nSynchronize new proto/yaml changes.\n\nPiperOrigin-RevId: 278368388\n\ne0935db8bfe6fd901ee5d2104b0e1865682899f7\nSynchronize new proto/yaml changes.\n\nPiperOrigin-RevId: 278368327\n\naf4a739e9d810eb033903f1aa44c615ab729760d\nSynchronize new proto/yaml changes.\n\nPiperOrigin-RevId: 278132545\n\naac770126e2def40dcc387f50e8007b21c869e58\nSynchronize new proto/yaml changes.\n\nPiperOrigin-RevId: 278016738\n\n271fed175d16501fb988e02b891166e9718ff141\nSynchronize new proto/yaml changes.\n\nPiperOrigin-RevId: 277992079\n\n597951d86beb120bc18428f70ffe0d5b97c70620\nSynchronize new proto/yaml changes.\n\nPiperOrigin-RevId: 277991975\n\nbba93d7148ff203d400a4929cd0fbc7dafd8dae2\nSynchronize new proto/yaml changes.\n\nPiperOrigin-RevId: 277920288\n\n5b86376273637f5ce3844f29bf8cb1c4aceaea2d\nSynchronize new proto/yaml changes.\n\nPiperOrigin-RevId: 277850256\n\n8bc65fb6973a281e8fb9e5c12080644a550322c9\nSynchronize new proto/yaml changes.\n\nPiperOrigin-RevId: 277813826\n\n30a6ca0f1a98f1777c94fc22094c892c2a43e0ef\nSynchronize new proto/yaml changes.\n\nPiperOrigin-RevId: 277811318\n\n6bef7bd6184390a4e7aa8f09382d7d97afeccfc4\nSynchronize new proto/yaml changes.\n\nPiperOrigin-RevId: 277789040\n\naa33c92d79760f2a03ba9b42f855f7a821ed9147\nSynchronize new proto/yaml changes.\n\nPiperOrigin-RevId: 277759754\n\na4933867265e2b1cbc70f876a4312a92116c36ad\nSynchronize new proto/yaml changes.\n\nPiperOrigin-RevId: 277759298\n\nb21f96290006525e039b9bd1acddeeae407ae1ff\nSynchronize new proto/yaml changes.\n\nPiperOrigin-RevId: 277750396\n\n93661a24048eb64755fbbeedd7f6a207d1b4d8dc\nSynchronize new proto/yaml changes.\n\nPiperOrigin-RevId: 277748718\n\nc0e494ca955a4fdd9ad460a5890a354ec3a3a0ff\nSynchronize new proto/yaml changes.\n\nPiperOrigin-RevId: 277673798\n\n4e952e7e2bb0dd2ef389d552d48f44c8dc4b5f8f\nSynchronize new proto/yaml changes.\n\nPiperOrigin-RevId: 277595731\n\n78883c8de959f7a9870c332ab0e3d788b13dd763\nSynchronize new proto/yaml changes.\n\nPiperOrigin-RevId: 277528057\n\n7c4cf35d5fe3b8ad664bd219edd6d9f28a788b64\nSynchronize new proto/yaml changes.\n\nPiperOrigin-RevId: 277334937\n\nf28342c58c1df57c92e967961e1eaa641d447dde\nSynchronize new proto/yaml changes.\n\nPiperOrigin-RevId: 277311984\n\n" } } ], @@ -28,5 +29,742 @@ "config": "google/cloud/bigquery/artman_bigquery_v2.yaml" } } + ], + "newFiles": [ + { + "path": ".coveragerc" + }, + { + "path": ".flake8" + }, + { + "path": ".gitignore" + }, + { + "path": ".repo-metadata.json" + }, + { + "path": "CHANGELOG.md" + }, + { + "path": "LICENSE" + }, + { + "path": "MANIFEST.in" + }, + { + "path": "README.rst" + }, + { + "path": "benchmark/README.md" + }, + { + "path": "benchmark/benchmark.py" + }, + { + "path": "benchmark/queries.json" + }, + { + "path": "docs/.gitignore" + }, + { + "path": "docs/README.rst" + }, + { + "path": "docs/_static/custom.css" + }, + { + "path": "docs/_templates/layout.html" + }, + { + "path": "docs/changelog.md" + }, + { + "path": "docs/conf.py" + }, + { + "path": "docs/dbapi.rst" + }, + { + "path": "docs/gapic/v2/enums.rst" + }, + { + "path": "docs/gapic/v2/types.rst" + }, + { + "path": "docs/generated/google.cloud.bigquery.magics.html" + }, + { + "path": "docs/index.rst" + }, + { + "path": "docs/magics.rst" + }, + { + "path": "docs/reference.rst" + }, + { + "path": "docs/snippets.py" + }, + { + "path": "docs/usage.html" + }, + { + "path": "docs/usage/client.rst" + }, + { + "path": "docs/usage/datasets.rst" + }, + { + "path": "docs/usage/encryption.rst" + }, + { + "path": "docs/usage/index.rst" + }, + { + "path": "docs/usage/jobs.rst" + }, + { + "path": "docs/usage/pandas.rst" + }, + { + "path": "docs/usage/queries.rst" + }, + { + "path": "docs/usage/tables.rst" + }, + { + "path": "google/__init__.py" + }, + { + "path": "google/cloud/__init__.py" + }, + { + "path": "google/cloud/bigquery/__init__.py" + }, + { + "path": "google/cloud/bigquery/_helpers.py" + }, + { + "path": "google/cloud/bigquery/_http.py" + }, + { + "path": "google/cloud/bigquery/_pandas_helpers.py" + }, + { + "path": "google/cloud/bigquery/client.py" + }, + { + "path": "google/cloud/bigquery/dataset.py" + }, + { + "path": "google/cloud/bigquery/dbapi/__init__.py" + }, + { + "path": "google/cloud/bigquery/dbapi/_helpers.py" + }, + { + "path": "google/cloud/bigquery/dbapi/connection.py" + }, + { + "path": "google/cloud/bigquery/dbapi/cursor.py" + }, + { + "path": "google/cloud/bigquery/dbapi/exceptions.py" + }, + { + "path": "google/cloud/bigquery/dbapi/types.py" + }, + { + "path": "google/cloud/bigquery/encryption_configuration.py" + }, + { + "path": "google/cloud/bigquery/enums.py" + }, + { + "path": "google/cloud/bigquery/external_config.py" + }, + { + "path": "google/cloud/bigquery/job.py" + }, + { + "path": "google/cloud/bigquery/magics.py" + }, + { + "path": "google/cloud/bigquery/model.py" + }, + { + "path": "google/cloud/bigquery/query.py" + }, + { + "path": "google/cloud/bigquery/retry.py" + }, + { + "path": "google/cloud/bigquery/routine.py" + }, + { + "path": "google/cloud/bigquery/schema.py" + }, + { + "path": "google/cloud/bigquery/table.py" + }, + { + "path": "google/cloud/bigquery_v2/__init__.py" + }, + { + "path": "google/cloud/bigquery_v2/gapic/__init__.py" + }, + { + "path": "google/cloud/bigquery_v2/gapic/enums.py" + }, + { + "path": "google/cloud/bigquery_v2/proto/__init__.py" + }, + { + "path": "google/cloud/bigquery_v2/proto/encryption_config.proto" + }, + { + "path": "google/cloud/bigquery_v2/proto/encryption_config_pb2.py" + }, + { + "path": "google/cloud/bigquery_v2/proto/encryption_config_pb2_grpc.py" + }, + { + "path": "google/cloud/bigquery_v2/proto/location_metadata.proto" + }, + { + "path": "google/cloud/bigquery_v2/proto/location_metadata_pb2.py" + }, + { + "path": "google/cloud/bigquery_v2/proto/location_metadata_pb2_grpc.py" + }, + { + "path": "google/cloud/bigquery_v2/proto/model.proto" + }, + { + "path": "google/cloud/bigquery_v2/proto/model_pb2.py" + }, + { + "path": "google/cloud/bigquery_v2/proto/model_pb2_grpc.py" + }, + { + "path": "google/cloud/bigquery_v2/proto/model_reference.proto" + }, + { + "path": "google/cloud/bigquery_v2/proto/model_reference_pb2.py" + }, + { + "path": "google/cloud/bigquery_v2/proto/model_reference_pb2_grpc.py" + }, + { + "path": "google/cloud/bigquery_v2/proto/standard_sql.proto" + }, + { + "path": "google/cloud/bigquery_v2/proto/standard_sql_pb2.py" + }, + { + "path": "google/cloud/bigquery_v2/proto/standard_sql_pb2_grpc.py" + }, + { + "path": "google/cloud/bigquery_v2/types.py" + }, + { + "path": "noxfile.py" + }, + { + "path": "pylint.config.py" + }, + { + "path": "samples/__init__.py" + }, + { + "path": "samples/add_empty_column.py" + }, + { + "path": "samples/browse_table_data.py" + }, + { + "path": "samples/client_list_jobs.py" + }, + { + "path": "samples/client_load_partitioned_table.py" + }, + { + "path": "samples/client_query.py" + }, + { + "path": "samples/client_query_add_column.py" + }, + { + "path": "samples/client_query_batch.py" + }, + { + "path": "samples/client_query_destination_table.py" + }, + { + "path": "samples/client_query_destination_table_cmek.py" + }, + { + "path": "samples/client_query_destination_table_legacy.py" + }, + { + "path": "samples/client_query_dry_run.py" + }, + { + "path": "samples/client_query_legacy_sql.py" + }, + { + "path": "samples/client_query_relax_column.py" + }, + { + "path": "samples/client_query_w_array_params.py" + }, + { + "path": "samples/client_query_w_named_params.py" + }, + { + "path": "samples/client_query_w_positional_params.py" + }, + { + "path": "samples/client_query_w_struct_params.py" + }, + { + "path": "samples/client_query_w_timestamp_params.py" + }, + { + "path": "samples/copy_table.py" + }, + { + "path": "samples/copy_table_cmek.py" + }, + { + "path": "samples/copy_table_multiple_source.py" + }, + { + "path": "samples/create_dataset.py" + }, + { + "path": "samples/create_job.py" + }, + { + "path": "samples/create_routine.py" + }, + { + "path": "samples/create_routine_ddl.py" + }, + { + "path": "samples/create_table.py" + }, + { + "path": "samples/create_table_range_partitioned.py" + }, + { + "path": "samples/dataset_exists.py" + }, + { + "path": "samples/delete_dataset.py" + }, + { + "path": "samples/delete_dataset_labels.py" + }, + { + "path": "samples/delete_model.py" + }, + { + "path": "samples/delete_routine.py" + }, + { + "path": "samples/delete_table.py" + }, + { + "path": "samples/download_public_data.py" + }, + { + "path": "samples/download_public_data_sandbox.py" + }, + { + "path": "samples/get_dataset.py" + }, + { + "path": "samples/get_dataset_labels.py" + }, + { + "path": "samples/get_model.py" + }, + { + "path": "samples/get_routine.py" + }, + { + "path": "samples/get_table.py" + }, + { + "path": "samples/label_dataset.py" + }, + { + "path": "samples/list_datasets.py" + }, + { + "path": "samples/list_datasets_by_label.py" + }, + { + "path": "samples/list_models.py" + }, + { + "path": "samples/list_routines.py" + }, + { + "path": "samples/list_tables.py" + }, + { + "path": "samples/load_table_dataframe.py" + }, + { + "path": "samples/query_external_gcs_temporary_table.py" + }, + { + "path": "samples/query_external_sheets_permanent_table.py" + }, + { + "path": "samples/query_external_sheets_temporary_table.py" + }, + { + "path": "samples/query_no_cache.py" + }, + { + "path": "samples/query_pagination.py" + }, + { + "path": "samples/query_script.py" + }, + { + "path": "samples/query_to_arrow.py" + }, + { + "path": "samples/table_exists.py" + }, + { + "path": "samples/table_insert_rows.py" + }, + { + "path": "samples/table_insert_rows_explicit_none_insert_ids.py" + }, + { + "path": "samples/tests/__init__.py" + }, + { + "path": "samples/tests/conftest.py" + }, + { + "path": "samples/tests/test_add_empty_column.py" + }, + { + "path": "samples/tests/test_browse_table_data.py" + }, + { + "path": "samples/tests/test_client_list_jobs.py" + }, + { + "path": "samples/tests/test_client_load_partitioned_table.py" + }, + { + "path": "samples/tests/test_client_query.py" + }, + { + "path": "samples/tests/test_client_query_add_column.py" + }, + { + "path": "samples/tests/test_client_query_batch.py" + }, + { + "path": "samples/tests/test_client_query_destination_table.py" + }, + { + "path": "samples/tests/test_client_query_destination_table_cmek.py" + }, + { + "path": "samples/tests/test_client_query_destination_table_legacy.py" + }, + { + "path": "samples/tests/test_client_query_dry_run.py" + }, + { + "path": "samples/tests/test_client_query_legacy_sql.py" + }, + { + "path": "samples/tests/test_client_query_relax_column.py" + }, + { + "path": "samples/tests/test_client_query_w_array_params.py" + }, + { + "path": "samples/tests/test_client_query_w_named_params.py" + }, + { + "path": "samples/tests/test_client_query_w_positional_params.py" + }, + { + "path": "samples/tests/test_client_query_w_struct_params.py" + }, + { + "path": "samples/tests/test_client_query_w_timestamp_params.py" + }, + { + "path": "samples/tests/test_copy_table.py" + }, + { + "path": "samples/tests/test_copy_table_cmek.py" + }, + { + "path": "samples/tests/test_copy_table_multiple_source.py" + }, + { + "path": "samples/tests/test_create_dataset.py" + }, + { + "path": "samples/tests/test_create_job.py" + }, + { + "path": "samples/tests/test_create_table.py" + }, + { + "path": "samples/tests/test_create_table_range_partitioned.py" + }, + { + "path": "samples/tests/test_dataset_exists.py" + }, + { + "path": "samples/tests/test_dataset_label_samples.py" + }, + { + "path": "samples/tests/test_delete_dataset.py" + }, + { + "path": "samples/tests/test_delete_table.py" + }, + { + "path": "samples/tests/test_download_public_data.py" + }, + { + "path": "samples/tests/test_download_public_data_sandbox.py" + }, + { + "path": "samples/tests/test_get_dataset.py" + }, + { + "path": "samples/tests/test_get_table.py" + }, + { + "path": "samples/tests/test_list_datasets.py" + }, + { + "path": "samples/tests/test_list_datasets_by_label.py" + }, + { + "path": "samples/tests/test_list_tables.py" + }, + { + "path": "samples/tests/test_load_table_dataframe.py" + }, + { + "path": "samples/tests/test_model_samples.py" + }, + { + "path": "samples/tests/test_query_external_gcs_temporary_table.py" + }, + { + "path": "samples/tests/test_query_external_sheets_permanent_table.py" + }, + { + "path": "samples/tests/test_query_external_sheets_temporary_table.py" + }, + { + "path": "samples/tests/test_query_no_cache.py" + }, + { + "path": "samples/tests/test_query_pagination.py" + }, + { + "path": "samples/tests/test_query_script.py" + }, + { + "path": "samples/tests/test_query_to_arrow.py" + }, + { + "path": "samples/tests/test_routine_samples.py" + }, + { + "path": "samples/tests/test_table_exists.py" + }, + { + "path": "samples/tests/test_table_insert_rows.py" + }, + { + "path": "samples/tests/test_table_insert_rows_explicit_none_insert_ids.py" + }, + { + "path": "samples/tests/test_undelete_table.py" + }, + { + "path": "samples/tests/test_update_dataset_access.py" + }, + { + "path": "samples/tests/test_update_dataset_default_partition_expiration.py" + }, + { + "path": "samples/tests/test_update_dataset_default_table_expiration.py" + }, + { + "path": "samples/tests/test_update_dataset_description.py" + }, + { + "path": "samples/tests/test_update_table_require_partition_filter.py" + }, + { + "path": "samples/undelete_table.py" + }, + { + "path": "samples/update_dataset_access.py" + }, + { + "path": "samples/update_dataset_default_partition_expiration.py" + }, + { + "path": "samples/update_dataset_default_table_expiration.py" + }, + { + "path": "samples/update_dataset_description.py" + }, + { + "path": "samples/update_model.py" + }, + { + "path": "samples/update_routine.py" + }, + { + "path": "samples/update_table_require_partition_filter.py" + }, + { + "path": "setup.cfg" + }, + { + "path": "setup.py" + }, + { + "path": "synth.metadata" + }, + { + "path": "synth.py" + }, + { + "path": "tests/__init__.py" + }, + { + "path": "tests/data/characters.json" + }, + { + "path": "tests/data/characters.jsonl" + }, + { + "path": "tests/data/colors.avro" + }, + { + "path": "tests/data/people.csv" + }, + { + "path": "tests/data/schema.json" + }, + { + "path": "tests/scrub_datasets.py" + }, + { + "path": "tests/system.py" + }, + { + "path": "tests/unit/__init__.py" + }, + { + "path": "tests/unit/enums/__init__.py" + }, + { + "path": "tests/unit/enums/test_standard_sql_data_types.py" + }, + { + "path": "tests/unit/helpers.py" + }, + { + "path": "tests/unit/model/__init__.py" + }, + { + "path": "tests/unit/model/test_model.py" + }, + { + "path": "tests/unit/model/test_model_reference.py" + }, + { + "path": "tests/unit/routine/__init__.py" + }, + { + "path": "tests/unit/routine/test_routine.py" + }, + { + "path": "tests/unit/routine/test_routine_argument.py" + }, + { + "path": "tests/unit/routine/test_routine_reference.py" + }, + { + "path": "tests/unit/test__helpers.py" + }, + { + "path": "tests/unit/test__http.py" + }, + { + "path": "tests/unit/test__pandas_helpers.py" + }, + { + "path": "tests/unit/test_client.py" + }, + { + "path": "tests/unit/test_dataset.py" + }, + { + "path": "tests/unit/test_dbapi__helpers.py" + }, + { + "path": "tests/unit/test_dbapi_connection.py" + }, + { + "path": "tests/unit/test_dbapi_cursor.py" + }, + { + "path": "tests/unit/test_dbapi_types.py" + }, + { + "path": "tests/unit/test_encryption_configuration.py" + }, + { + "path": "tests/unit/test_external_config.py" + }, + { + "path": "tests/unit/test_job.py" + }, + { + "path": "tests/unit/test_magics.py" + }, + { + "path": "tests/unit/test_query.py" + }, + { + "path": "tests/unit/test_retry.py" + }, + { + "path": "tests/unit/test_schema.py" + }, + { + "path": "tests/unit/test_signature_compatibility.py" + }, + { + "path": "tests/unit/test_table.py" + } ] } \ No newline at end of file From d92510642288e4f6e89f9640b9b2c3c8f6792df6 Mon Sep 17 00:00:00 2001 From: Peter Lamut Date: Mon, 3 Feb 2020 13:40:10 +0000 Subject: [PATCH 0742/2016] chore(bigquery): release 1.24.0 (#10322) --- packages/google-cloud-bigquery/CHANGELOG.md | 40 +++++++++++++++++++++ packages/google-cloud-bigquery/setup.py | 2 +- 2 files changed, 41 insertions(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/CHANGELOG.md b/packages/google-cloud-bigquery/CHANGELOG.md index b5fe8936599d..0da745204cec 100644 --- a/packages/google-cloud-bigquery/CHANGELOG.md +++ b/packages/google-cloud-bigquery/CHANGELOG.md @@ -4,6 +4,46 @@ [1]: https://pypi.org/project/google-cloud-bigquery/#history +## 1.24.0 + +02-03-2020 01:38 PST + +### Implementation Changes + +- Fix inserting missing repeated fields. ([#10196](https://github.com/googleapis/google-cloud-python/pull/10196)) +- Deprecate `client.dataset()` in favor of `DatasetReference`. ([#7753](https://github.com/googleapis/google-cloud-python/pull/7753)) +- Use faster `to_arrow` + `to_pandas` in `to_dataframe()` when `pyarrow` is available. ([#10027](https://github.com/googleapis/google-cloud-python/pull/10027)) +- Write pandas `datetime[ns]` columns to BigQuery TIMESTAMP columns. ([#10028](https://github.com/googleapis/google-cloud-python/pull/10028)) + +### New Features + +- Check `rows` argument type in `insert_rows()`. ([#10174](https://github.com/googleapis/google-cloud-python/pull/10174)) +- Check `json_rows` arg type in `insert_rows_json()`. ([#10162](https://github.com/googleapis/google-cloud-python/pull/10162)) +- Make `RowIterator.to_dataframe_iterable()` method public. ([#10017](https://github.com/googleapis/google-cloud-python/pull/10017)) +- Add retry parameter to public methods where missing. ([#10026](https://github.com/googleapis/google-cloud-python/pull/10026)) +- Add timeout parameter to Client and Job public methods. ([#10002](https://github.com/googleapis/google-cloud-python/pull/10002)) +- Add timeout parameter to `QueryJob.done()` method. ([#9875](https://github.com/googleapis/google-cloud-python/pull/9875)) +- Add `create_bqstorage_client` parameter to `to_dataframe()` and `to_arrow()` methods. ([#9573](https://github.com/googleapis/google-cloud-python/pull/9573)) + +### Dependencies + +- Fix minimum versions of `google-cloud-core` and `google-resumable-media` dependencies. ([#10016](https://github.com/googleapis/google-cloud-python/pull/10016)) + +### Documentation + +- Fix a comment typo in `job.py`. ([#10209](https://github.com/googleapis/google-cloud-python/pull/10209)) +- Update code samples of load table file and load table URI. ([#10175](https://github.com/googleapis/google-cloud-python/pull/10175)) +- Uncomment `Client` constructor and imports in samples. ([#10058](https://github.com/googleapis/google-cloud-python/pull/10058)) +- Remove unused query code sample. ([#10024](https://github.com/googleapis/google-cloud-python/pull/10024)) +- Update code samples to use strings for table and dataset IDs. ([#9974](https://github.com/googleapis/google-cloud-python/pull/9974)) + +### Internal / Testing Changes + +- Bump copyright year to 2020, tweak docstring formatting (via synth). [#10225](https://github.com/googleapis/google-cloud-python/pull/10225) +- Add tests for concatenating categorical columns. ([#10180](https://github.com/googleapis/google-cloud-python/pull/10180)) +- Adjust test assertions to the new default timeout. ([#10222](https://github.com/googleapis/google-cloud-python/pull/10222)) +- Use Python 3.6 for the nox blacken session (via synth). ([#10012](https://github.com/googleapis/google-cloud-python/pull/10012)) + ## 1.23.1 12-16-2019 09:39 PST diff --git a/packages/google-cloud-bigquery/setup.py b/packages/google-cloud-bigquery/setup.py index a543ca416af1..378c4fc1b4ce 100644 --- a/packages/google-cloud-bigquery/setup.py +++ b/packages/google-cloud-bigquery/setup.py @@ -22,7 +22,7 @@ name = "google-cloud-bigquery" description = "Google BigQuery API client library" -version = "1.23.1" +version = "1.24.0" # Should be one of: # 'Development Status :: 3 - Alpha' # 'Development Status :: 4 - Beta' From a73097671dec82b020d28168c3d650c240624294 Mon Sep 17 00:00:00 2001 From: Peter Lamut Date: Thu, 6 Feb 2020 07:33:12 +0000 Subject: [PATCH 0743/2016] chore: add split repo templates (#1) * chore: add split repo templates * Adjust synth.py to include templated files * Re-run synth with including template files * Add test_utils and adjust noxfile * Prevent synth from overriding the custom noxfile * Adjust paths to data file in snippets tests * Add missing grpcio import in snippets nox session --- packages/google-cloud-bigquery/.coveragerc | 3 + packages/google-cloud-bigquery/.flake8 | 1 + .../.github/CONTRIBUTING.md | 28 + .../.github/ISSUE_TEMPLATE/bug_report.md | 44 + .../.github/ISSUE_TEMPLATE/feature_request.md | 18 + .../.github/ISSUE_TEMPLATE/support_request.md | 7 + .../.github/PULL_REQUEST_TEMPLATE.md | 7 + .../.github/release-please.yml | 1 + packages/google-cloud-bigquery/.gitignore | 59 +- .../google-cloud-bigquery/.kokoro/build.sh | 39 + .../.kokoro/continuous/common.cfg | 27 + .../.kokoro/continuous/continuous.cfg | 1 + .../.kokoro/docs/common.cfg | 48 ++ .../.kokoro/docs/docs.cfg | 1 + .../.kokoro/presubmit/common.cfg | 27 + .../.kokoro/presubmit/presubmit.cfg | 1 + .../.kokoro/publish-docs.sh | 57 ++ .../google-cloud-bigquery/.kokoro/release.sh | 34 + .../.kokoro/release/common.cfg | 64 ++ .../.kokoro/release/release.cfg | 1 + .../.kokoro/trampoline.sh | 23 + .../google-cloud-bigquery/.repo-metadata.json | 2 +- .../google-cloud-bigquery/CODE_OF_CONDUCT.md | 44 + .../google-cloud-bigquery/CONTRIBUTING.rst | 279 +++++++ packages/google-cloud-bigquery/MANIFEST.in | 1 + packages/google-cloud-bigquery/docs/conf.py | 83 +- .../google-cloud-bigquery/docs/snippets.py | 8 +- packages/google-cloud-bigquery/noxfile.py | 27 +- packages/google-cloud-bigquery/renovate.json | 5 + packages/google-cloud-bigquery/setup.cfg | 1 + packages/google-cloud-bigquery/setup.py | 2 +- packages/google-cloud-bigquery/synth.metadata | 751 +----------------- packages/google-cloud-bigquery/synth.py | 12 +- .../test_utils/credentials.json.enc | 49 ++ .../scripts/circleci/get_tagged_package.py | 64 ++ .../scripts/circleci/twine_upload.sh | 36 + .../test_utils/scripts/get_target_packages.py | 268 +++++++ .../scripts/get_target_packages_kokoro.py | 98 +++ .../test_utils/scripts/run_emulator.py | 199 +++++ .../test_utils/scripts/update_docs.sh | 93 +++ .../google-cloud-bigquery/test_utils/setup.py | 64 ++ .../test_utils/test_utils/__init__.py | 0 .../test_utils/test_utils/imports.py | 38 + .../test_utils/test_utils/retry.py | 207 +++++ .../test_utils/test_utils/system.py | 81 ++ .../test_utils/test_utils/vpcsc_config.py | 118 +++ 46 files changed, 2207 insertions(+), 814 deletions(-) create mode 100644 packages/google-cloud-bigquery/.github/CONTRIBUTING.md create mode 100644 packages/google-cloud-bigquery/.github/ISSUE_TEMPLATE/bug_report.md create mode 100644 packages/google-cloud-bigquery/.github/ISSUE_TEMPLATE/feature_request.md create mode 100644 packages/google-cloud-bigquery/.github/ISSUE_TEMPLATE/support_request.md create mode 100644 packages/google-cloud-bigquery/.github/PULL_REQUEST_TEMPLATE.md create mode 100644 packages/google-cloud-bigquery/.github/release-please.yml create mode 100755 packages/google-cloud-bigquery/.kokoro/build.sh create mode 100644 packages/google-cloud-bigquery/.kokoro/continuous/common.cfg create mode 100644 packages/google-cloud-bigquery/.kokoro/continuous/continuous.cfg create mode 100644 packages/google-cloud-bigquery/.kokoro/docs/common.cfg create mode 100644 packages/google-cloud-bigquery/.kokoro/docs/docs.cfg create mode 100644 packages/google-cloud-bigquery/.kokoro/presubmit/common.cfg create mode 100644 packages/google-cloud-bigquery/.kokoro/presubmit/presubmit.cfg create mode 100755 packages/google-cloud-bigquery/.kokoro/publish-docs.sh create mode 100755 packages/google-cloud-bigquery/.kokoro/release.sh create mode 100644 packages/google-cloud-bigquery/.kokoro/release/common.cfg create mode 100644 packages/google-cloud-bigquery/.kokoro/release/release.cfg create mode 100755 packages/google-cloud-bigquery/.kokoro/trampoline.sh create mode 100644 packages/google-cloud-bigquery/CODE_OF_CONDUCT.md create mode 100644 packages/google-cloud-bigquery/CONTRIBUTING.rst create mode 100644 packages/google-cloud-bigquery/renovate.json create mode 100644 packages/google-cloud-bigquery/test_utils/credentials.json.enc create mode 100644 packages/google-cloud-bigquery/test_utils/scripts/circleci/get_tagged_package.py create mode 100755 packages/google-cloud-bigquery/test_utils/scripts/circleci/twine_upload.sh create mode 100644 packages/google-cloud-bigquery/test_utils/scripts/get_target_packages.py create mode 100644 packages/google-cloud-bigquery/test_utils/scripts/get_target_packages_kokoro.py create mode 100644 packages/google-cloud-bigquery/test_utils/scripts/run_emulator.py create mode 100755 packages/google-cloud-bigquery/test_utils/scripts/update_docs.sh create mode 100644 packages/google-cloud-bigquery/test_utils/setup.py create mode 100644 packages/google-cloud-bigquery/test_utils/test_utils/__init__.py create mode 100644 packages/google-cloud-bigquery/test_utils/test_utils/imports.py create mode 100644 packages/google-cloud-bigquery/test_utils/test_utils/retry.py create mode 100644 packages/google-cloud-bigquery/test_utils/test_utils/system.py create mode 100644 packages/google-cloud-bigquery/test_utils/test_utils/vpcsc_config.py diff --git a/packages/google-cloud-bigquery/.coveragerc b/packages/google-cloud-bigquery/.coveragerc index 098720f672e1..b178b094aa1d 100644 --- a/packages/google-cloud-bigquery/.coveragerc +++ b/packages/google-cloud-bigquery/.coveragerc @@ -1,3 +1,4 @@ +# Generated by synthtool. DO NOT EDIT! [run] branch = True @@ -14,3 +15,5 @@ exclude_lines = omit = */gapic/*.py */proto/*.py + */core/*.py + */site-packages/*.py \ No newline at end of file diff --git a/packages/google-cloud-bigquery/.flake8 b/packages/google-cloud-bigquery/.flake8 index 61766fa84d02..0268ecc9c55c 100644 --- a/packages/google-cloud-bigquery/.flake8 +++ b/packages/google-cloud-bigquery/.flake8 @@ -1,3 +1,4 @@ +# Generated by synthtool. DO NOT EDIT! [flake8] ignore = E203, E266, E501, W503 exclude = diff --git a/packages/google-cloud-bigquery/.github/CONTRIBUTING.md b/packages/google-cloud-bigquery/.github/CONTRIBUTING.md new file mode 100644 index 000000000000..939e5341e74d --- /dev/null +++ b/packages/google-cloud-bigquery/.github/CONTRIBUTING.md @@ -0,0 +1,28 @@ +# How to Contribute + +We'd love to accept your patches and contributions to this project. There are +just a few small guidelines you need to follow. + +## Contributor License Agreement + +Contributions to this project must be accompanied by a Contributor License +Agreement. You (or your employer) retain the copyright to your contribution; +this simply gives us permission to use and redistribute your contributions as +part of the project. Head over to to see +your current agreements on file or to sign a new one. + +You generally only need to submit a CLA once, so if you've already submitted one +(even if it was for a different project), you probably don't need to do it +again. + +## Code reviews + +All submissions, including submissions by project members, require review. We +use GitHub pull requests for this purpose. Consult +[GitHub Help](https://help.github.com/articles/about-pull-requests/) for more +information on using pull requests. + +## Community Guidelines + +This project follows [Google's Open Source Community +Guidelines](https://opensource.google.com/conduct/). diff --git a/packages/google-cloud-bigquery/.github/ISSUE_TEMPLATE/bug_report.md b/packages/google-cloud-bigquery/.github/ISSUE_TEMPLATE/bug_report.md new file mode 100644 index 000000000000..222dc82a48a5 --- /dev/null +++ b/packages/google-cloud-bigquery/.github/ISSUE_TEMPLATE/bug_report.md @@ -0,0 +1,44 @@ +--- +name: Bug report +about: Create a report to help us improve + +--- + +Thanks for stopping by to let us know something could be better! + +**PLEASE READ**: If you have a support contract with Google, please create an issue in the [support console](https://cloud.google.com/support/) instead of filing on GitHub. This will ensure a timely response. + +Please run down the following list and make sure you've tried the usual "quick fixes": + + - Search the issues already opened: https://github.com/googleapis/python-bigquery/issues + - Search the issues on our "catch-all" repository: https://github.com/googleapis/google-cloud-python + - Search StackOverflow: http://stackoverflow.com/questions/tagged/google-cloud-platform+python + +If you are still having issues, please be sure to include as much information as possible: + +#### Environment details + + - OS type and version: + - Python version: `python --version` + - pip version: `pip --version` + - `google-cloud-bigquery` version: `pip show google-cloud-bigquery` + +#### Steps to reproduce + + 1. ? + 2. ? + +#### Code example + +```python +# example +``` + +#### Stack trace +``` +# example +``` + +Making sure to follow these steps will guarantee the quickest resolution possible. + +Thanks! diff --git a/packages/google-cloud-bigquery/.github/ISSUE_TEMPLATE/feature_request.md b/packages/google-cloud-bigquery/.github/ISSUE_TEMPLATE/feature_request.md new file mode 100644 index 000000000000..6365857f33c6 --- /dev/null +++ b/packages/google-cloud-bigquery/.github/ISSUE_TEMPLATE/feature_request.md @@ -0,0 +1,18 @@ +--- +name: Feature request +about: Suggest an idea for this library + +--- + +Thanks for stopping by to let us know something could be better! + +**PLEASE READ**: If you have a support contract with Google, please create an issue in the [support console](https://cloud.google.com/support/) instead of filing on GitHub. This will ensure a timely response. + + **Is your feature request related to a problem? Please describe.** +A clear and concise description of what the problem is. Ex. I'm always frustrated when [...] + **Describe the solution you'd like** +A clear and concise description of what you want to happen. + **Describe alternatives you've considered** +A clear and concise description of any alternative solutions or features you've considered. + **Additional context** +Add any other context or screenshots about the feature request here. diff --git a/packages/google-cloud-bigquery/.github/ISSUE_TEMPLATE/support_request.md b/packages/google-cloud-bigquery/.github/ISSUE_TEMPLATE/support_request.md new file mode 100644 index 000000000000..995869032125 --- /dev/null +++ b/packages/google-cloud-bigquery/.github/ISSUE_TEMPLATE/support_request.md @@ -0,0 +1,7 @@ +--- +name: Support request +about: If you have a support contract with Google, please create an issue in the Google Cloud Support console. + +--- + +**PLEASE READ**: If you have a support contract with Google, please create an issue in the [support console](https://cloud.google.com/support/) instead of filing on GitHub. This will ensure a timely response. diff --git a/packages/google-cloud-bigquery/.github/PULL_REQUEST_TEMPLATE.md b/packages/google-cloud-bigquery/.github/PULL_REQUEST_TEMPLATE.md new file mode 100644 index 000000000000..65ceeeb5e490 --- /dev/null +++ b/packages/google-cloud-bigquery/.github/PULL_REQUEST_TEMPLATE.md @@ -0,0 +1,7 @@ +Thank you for opening a Pull Request! Before submitting your PR, there are a few things you can do to make sure it goes smoothly: +- [ ] Make sure to open an issue as a [bug/issue](https://github.com/googleapis/python-bigquery/issues/new/choose) before writing your code! That way we can discuss the change, evaluate designs, and agree on the general idea +- [ ] Ensure the tests and linter pass +- [ ] Code coverage does not decrease (if any source code was changed) +- [ ] Appropriate docs were updated (if necessary) + +Fixes # 🦕 diff --git a/packages/google-cloud-bigquery/.github/release-please.yml b/packages/google-cloud-bigquery/.github/release-please.yml new file mode 100644 index 000000000000..4507ad0598a5 --- /dev/null +++ b/packages/google-cloud-bigquery/.github/release-please.yml @@ -0,0 +1 @@ +releaseType: python diff --git a/packages/google-cloud-bigquery/.gitignore b/packages/google-cloud-bigquery/.gitignore index 9e3a5f25770c..3fb06e09ce74 100644 --- a/packages/google-cloud-bigquery/.gitignore +++ b/packages/google-cloud-bigquery/.gitignore @@ -1 +1,58 @@ -docs/_build \ No newline at end of file +*.py[cod] +*.sw[op] + +# C extensions +*.so + +# Packages +*.egg +*.egg-info +dist +build +eggs +parts +bin +var +sdist +develop-eggs +.installed.cfg +lib +lib64 +__pycache__ + +# Installer logs +pip-log.txt + +# Unit test / coverage reports +.coverage +.nox +.cache +.pytest_cache + + +# Mac +.DS_Store + +# JetBrains +.idea + +# VS Code +.vscode + +# emacs +*~ + +# Built documentation +docs/_build +bigquery/docs/generated + +# Virtual environment +env/ +coverage.xml + +# System test environment variables. +system_tests/local_test_setup + +# Make sure a generated file isn't accidentally committed. +pylintrc +pylintrc.test \ No newline at end of file diff --git a/packages/google-cloud-bigquery/.kokoro/build.sh b/packages/google-cloud-bigquery/.kokoro/build.sh new file mode 100755 index 000000000000..d3749e290e28 --- /dev/null +++ b/packages/google-cloud-bigquery/.kokoro/build.sh @@ -0,0 +1,39 @@ +#!/bin/bash +# Copyright 2018 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +set -eo pipefail + +cd github/python-bigquery + +# Disable buffering, so that the logs stream through. +export PYTHONUNBUFFERED=1 + +# Debug: show build environment +env | grep KOKORO + +# Setup service account credentials. +export GOOGLE_APPLICATION_CREDENTIALS=${KOKORO_GFILE_DIR}/service-account.json + +# Setup project id. +export PROJECT_ID=$(cat "${KOKORO_GFILE_DIR}/project-id.json") + +# Remove old nox +python3.6 -m pip uninstall --yes --quiet nox-automation + +# Install nox +python3.6 -m pip install --upgrade --quiet nox +python3.6 -m nox --version + +python3.6 -m nox diff --git a/packages/google-cloud-bigquery/.kokoro/continuous/common.cfg b/packages/google-cloud-bigquery/.kokoro/continuous/common.cfg new file mode 100644 index 000000000000..1f46f62708d9 --- /dev/null +++ b/packages/google-cloud-bigquery/.kokoro/continuous/common.cfg @@ -0,0 +1,27 @@ +# Format: //devtools/kokoro/config/proto/build.proto + +# Build logs will be here +action { + define_artifacts { + regex: "**/*sponge_log.xml" + } +} + +# Download trampoline resources. +gfile_resources: "/bigstore/cloud-devrel-kokoro-resources/trampoline" + +# Download resources for system tests (service account key, etc.) +gfile_resources: "/bigstore/cloud-devrel-kokoro-resources/google-cloud-python" + +# Use the trampoline script to run in docker. +build_file: "python-bigquery/.kokoro/trampoline.sh" + +# Configure the docker image for kokoro-trampoline. +env_vars: { + key: "TRAMPOLINE_IMAGE" + value: "gcr.io/cloud-devrel-kokoro-resources/python-multi" +} +env_vars: { + key: "TRAMPOLINE_BUILD_FILE" + value: "github/python-bigquery/.kokoro/build.sh" +} diff --git a/packages/google-cloud-bigquery/.kokoro/continuous/continuous.cfg b/packages/google-cloud-bigquery/.kokoro/continuous/continuous.cfg new file mode 100644 index 000000000000..8f43917d92fe --- /dev/null +++ b/packages/google-cloud-bigquery/.kokoro/continuous/continuous.cfg @@ -0,0 +1 @@ +# Format: //devtools/kokoro/config/proto/build.proto \ No newline at end of file diff --git a/packages/google-cloud-bigquery/.kokoro/docs/common.cfg b/packages/google-cloud-bigquery/.kokoro/docs/common.cfg new file mode 100644 index 000000000000..229abf075515 --- /dev/null +++ b/packages/google-cloud-bigquery/.kokoro/docs/common.cfg @@ -0,0 +1,48 @@ +# Format: //devtools/kokoro/config/proto/build.proto + +# Build logs will be here +action { + define_artifacts { + regex: "**/*sponge_log.xml" + } +} + +# Download trampoline resources. +gfile_resources: "/bigstore/cloud-devrel-kokoro-resources/trampoline" + +# Use the trampoline script to run in docker. +build_file: "python-bigquery/.kokoro/trampoline.sh" + +# Configure the docker image for kokoro-trampoline. +env_vars: { + key: "TRAMPOLINE_IMAGE" + value: "gcr.io/cloud-devrel-kokoro-resources/python-multi" +} +env_vars: { + key: "TRAMPOLINE_BUILD_FILE" + value: "github/python-bigquery/.kokoro/publish-docs.sh" +} + +env_vars: { + key: "STAGING_BUCKET" + value: "docs-staging" +} + +# Fetch the token needed for reporting release status to GitHub +before_action { + fetch_keystore { + keystore_resource { + keystore_config_id: 73713 + keyname: "yoshi-automation-github-key" + } + } +} + +before_action { + fetch_keystore { + keystore_resource { + keystore_config_id: 73713 + keyname: "docuploader_service_account" + } + } +} \ No newline at end of file diff --git a/packages/google-cloud-bigquery/.kokoro/docs/docs.cfg b/packages/google-cloud-bigquery/.kokoro/docs/docs.cfg new file mode 100644 index 000000000000..8f43917d92fe --- /dev/null +++ b/packages/google-cloud-bigquery/.kokoro/docs/docs.cfg @@ -0,0 +1 @@ +# Format: //devtools/kokoro/config/proto/build.proto \ No newline at end of file diff --git a/packages/google-cloud-bigquery/.kokoro/presubmit/common.cfg b/packages/google-cloud-bigquery/.kokoro/presubmit/common.cfg new file mode 100644 index 000000000000..1f46f62708d9 --- /dev/null +++ b/packages/google-cloud-bigquery/.kokoro/presubmit/common.cfg @@ -0,0 +1,27 @@ +# Format: //devtools/kokoro/config/proto/build.proto + +# Build logs will be here +action { + define_artifacts { + regex: "**/*sponge_log.xml" + } +} + +# Download trampoline resources. +gfile_resources: "/bigstore/cloud-devrel-kokoro-resources/trampoline" + +# Download resources for system tests (service account key, etc.) +gfile_resources: "/bigstore/cloud-devrel-kokoro-resources/google-cloud-python" + +# Use the trampoline script to run in docker. +build_file: "python-bigquery/.kokoro/trampoline.sh" + +# Configure the docker image for kokoro-trampoline. +env_vars: { + key: "TRAMPOLINE_IMAGE" + value: "gcr.io/cloud-devrel-kokoro-resources/python-multi" +} +env_vars: { + key: "TRAMPOLINE_BUILD_FILE" + value: "github/python-bigquery/.kokoro/build.sh" +} diff --git a/packages/google-cloud-bigquery/.kokoro/presubmit/presubmit.cfg b/packages/google-cloud-bigquery/.kokoro/presubmit/presubmit.cfg new file mode 100644 index 000000000000..8f43917d92fe --- /dev/null +++ b/packages/google-cloud-bigquery/.kokoro/presubmit/presubmit.cfg @@ -0,0 +1 @@ +# Format: //devtools/kokoro/config/proto/build.proto \ No newline at end of file diff --git a/packages/google-cloud-bigquery/.kokoro/publish-docs.sh b/packages/google-cloud-bigquery/.kokoro/publish-docs.sh new file mode 100755 index 000000000000..de3549ef89ca --- /dev/null +++ b/packages/google-cloud-bigquery/.kokoro/publish-docs.sh @@ -0,0 +1,57 @@ +#!/bin/bash +# Copyright 2020 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +#!/bin/bash + +set -eo pipefail + +# Disable buffering, so that the logs stream through. +export PYTHONUNBUFFERED=1 + +cd github/python-bigquery + +# Remove old nox +python3.6 -m pip uninstall --yes --quiet nox-automation + +# Install nox +python3.6 -m pip install --upgrade --quiet nox +python3.6 -m nox --version + +# build docs +nox -s docs + +python3 -m pip install gcp-docuploader + +# install a json parser +sudo apt-get update +sudo apt-get -y install software-properties-common +sudo add-apt-repository universe +sudo apt-get update +sudo apt-get -y install jq + +# create metadata +python3 -m docuploader create-metadata \ + --name=$(jq --raw-output '.name // empty' .repo-metadata.json) \ + --version=$(python3 setup.py --version) \ + --language=$(jq --raw-output '.language // empty' .repo-metadata.json) \ + --distribution-name=$(python3 setup.py --name) \ + --product-page=$(jq --raw-output '.product_documentation // empty' .repo-metadata.json) \ + --github-repository=$(jq --raw-output '.repo // empty' .repo-metadata.json) \ + --issue-tracker=$(jq --raw-output '.issue_tracker // empty' .repo-metadata.json) + +cat docs.metadata + +# upload docs +python3 -m docuploader upload docs/_build/html --metadata-file docs.metadata --staging-bucket docs-staging diff --git a/packages/google-cloud-bigquery/.kokoro/release.sh b/packages/google-cloud-bigquery/.kokoro/release.sh new file mode 100755 index 000000000000..55233bd89166 --- /dev/null +++ b/packages/google-cloud-bigquery/.kokoro/release.sh @@ -0,0 +1,34 @@ +#!/bin/bash +# Copyright 2020 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +#!/bin/bash + +set -eo pipefail + +# Start the releasetool reporter +python3 -m pip install gcp-releasetool +python3 -m releasetool publish-reporter-script > /tmp/publisher-script; source /tmp/publisher-script + +# Ensure that we have the latest versions of Twine, Wheel, and Setuptools. +python3 -m pip install --upgrade twine wheel setuptools + +# Disable buffering, so that the logs stream through. +export PYTHONUNBUFFERED=1 + +# Move into the package, build the distribution and upload. +TWINE_PASSWORD=$(cat "${KOKORO_KEYSTORE_DIR}/73713_google_cloud_pypi_password") +cd github/python-bigquery +python3 setup.py sdist bdist_wheel +twine upload --username gcloudpypi --password "${TWINE_PASSWORD}" dist/* diff --git a/packages/google-cloud-bigquery/.kokoro/release/common.cfg b/packages/google-cloud-bigquery/.kokoro/release/common.cfg new file mode 100644 index 000000000000..661a044811ee --- /dev/null +++ b/packages/google-cloud-bigquery/.kokoro/release/common.cfg @@ -0,0 +1,64 @@ +# Format: //devtools/kokoro/config/proto/build.proto + +# Build logs will be here +action { + define_artifacts { + regex: "**/*sponge_log.xml" + } +} + +# Download trampoline resources. +gfile_resources: "/bigstore/cloud-devrel-kokoro-resources/trampoline" + +# Use the trampoline script to run in docker. +build_file: "python-bigquery/.kokoro/trampoline.sh" + +# Configure the docker image for kokoro-trampoline. +env_vars: { + key: "TRAMPOLINE_IMAGE" + value: "gcr.io/cloud-devrel-kokoro-resources/python-multi" +} +env_vars: { + key: "TRAMPOLINE_BUILD_FILE" + value: "github/python-bigquery/.kokoro/release.sh" +} + +# Fetch the token needed for reporting release status to GitHub +before_action { + fetch_keystore { + keystore_resource { + keystore_config_id: 73713 + keyname: "yoshi-automation-github-key" + } + } +} + +# Fetch PyPI password +before_action { + fetch_keystore { + keystore_resource { + keystore_config_id: 73713 + keyname: "google_cloud_pypi_password" + } + } +} + +# Fetch magictoken to use with Magic Github Proxy +before_action { + fetch_keystore { + keystore_resource { + keystore_config_id: 73713 + keyname: "releasetool-magictoken" + } + } +} + +# Fetch api key to use with Magic Github Proxy +before_action { + fetch_keystore { + keystore_resource { + keystore_config_id: 73713 + keyname: "magic-github-proxy-api-key" + } + } +} diff --git a/packages/google-cloud-bigquery/.kokoro/release/release.cfg b/packages/google-cloud-bigquery/.kokoro/release/release.cfg new file mode 100644 index 000000000000..8f43917d92fe --- /dev/null +++ b/packages/google-cloud-bigquery/.kokoro/release/release.cfg @@ -0,0 +1 @@ +# Format: //devtools/kokoro/config/proto/build.proto \ No newline at end of file diff --git a/packages/google-cloud-bigquery/.kokoro/trampoline.sh b/packages/google-cloud-bigquery/.kokoro/trampoline.sh new file mode 100755 index 000000000000..e8c4251f3ed4 --- /dev/null +++ b/packages/google-cloud-bigquery/.kokoro/trampoline.sh @@ -0,0 +1,23 @@ +#!/bin/bash +# Copyright 2017 Google Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +set -eo pipefail + +python3 "${KOKORO_GFILE_DIR}/trampoline_v1.py" || ret_code=$? + +chmod +x ${KOKORO_GFILE_DIR}/trampoline_cleanup.sh +${KOKORO_GFILE_DIR}/trampoline_cleanup.sh || true + +exit ${ret_code} diff --git a/packages/google-cloud-bigquery/.repo-metadata.json b/packages/google-cloud-bigquery/.repo-metadata.json index 5b4734b8e389..f50dbbeb2b51 100644 --- a/packages/google-cloud-bigquery/.repo-metadata.json +++ b/packages/google-cloud-bigquery/.repo-metadata.json @@ -6,7 +6,7 @@ "issue_tracker": "https://issuetracker.google.com/savedsearches/559654", "release_level": "ga", "language": "python", - "repo": "googleapis/google-cloud-python", + "repo": "googleapis/python-bigquery", "distribution_name": "google-cloud-bigquery", "api_id": "bigquery.googleapis.com", "requires_billing": false diff --git a/packages/google-cloud-bigquery/CODE_OF_CONDUCT.md b/packages/google-cloud-bigquery/CODE_OF_CONDUCT.md new file mode 100644 index 000000000000..b3d1f6029849 --- /dev/null +++ b/packages/google-cloud-bigquery/CODE_OF_CONDUCT.md @@ -0,0 +1,44 @@ + +# Contributor Code of Conduct + +As contributors and maintainers of this project, +and in the interest of fostering an open and welcoming community, +we pledge to respect all people who contribute through reporting issues, +posting feature requests, updating documentation, +submitting pull requests or patches, and other activities. + +We are committed to making participation in this project +a harassment-free experience for everyone, +regardless of level of experience, gender, gender identity and expression, +sexual orientation, disability, personal appearance, +body size, race, ethnicity, age, religion, or nationality. + +Examples of unacceptable behavior by participants include: + +* The use of sexualized language or imagery +* Personal attacks +* Trolling or insulting/derogatory comments +* Public or private harassment +* Publishing other's private information, +such as physical or electronic +addresses, without explicit permission +* Other unethical or unprofessional conduct. + +Project maintainers have the right and responsibility to remove, edit, or reject +comments, commits, code, wiki edits, issues, and other contributions +that are not aligned to this Code of Conduct. +By adopting this Code of Conduct, +project maintainers commit themselves to fairly and consistently +applying these principles to every aspect of managing this project. +Project maintainers who do not follow or enforce the Code of Conduct +may be permanently removed from the project team. + +This code of conduct applies both within project spaces and in public spaces +when an individual is representing the project or its community. + +Instances of abusive, harassing, or otherwise unacceptable behavior +may be reported by opening an issue +or contacting one or more of the project maintainers. + +This Code of Conduct is adapted from the [Contributor Covenant](http://contributor-covenant.org), version 1.2.0, +available at [http://contributor-covenant.org/version/1/2/0/](http://contributor-covenant.org/version/1/2/0/) diff --git a/packages/google-cloud-bigquery/CONTRIBUTING.rst b/packages/google-cloud-bigquery/CONTRIBUTING.rst new file mode 100644 index 000000000000..c812edbd1d4d --- /dev/null +++ b/packages/google-cloud-bigquery/CONTRIBUTING.rst @@ -0,0 +1,279 @@ +.. Generated by synthtool. DO NOT EDIT! +############ +Contributing +############ + +#. **Please sign one of the contributor license agreements below.** +#. Fork the repo, develop and test your code changes, add docs. +#. Make sure that your commit messages clearly describe the changes. +#. Send a pull request. (Please Read: `Faster Pull Request Reviews`_) + +.. _Faster Pull Request Reviews: https://github.com/kubernetes/community/blob/master/contributors/guide/pull-requests.md#best-practices-for-faster-reviews + +.. contents:: Here are some guidelines for hacking on the Google Cloud Client libraries. + +*************** +Adding Features +*************** + +In order to add a feature: + +- The feature must be documented in both the API and narrative + documentation. + +- The feature must work fully on the following CPython versions: 2.7, + 3.5, 3.6, and 3.7 on both UNIX and Windows. + +- The feature must not add unnecessary dependencies (where + "unnecessary" is of course subjective, but new dependencies should + be discussed). + +**************************** +Using a Development Checkout +**************************** + +You'll have to create a development environment using a Git checkout: + +- While logged into your GitHub account, navigate to the + ``python-bigquery`` `repo`_ on GitHub. + +- Fork and clone the ``python-bigquery`` repository to your GitHub account by + clicking the "Fork" button. + +- Clone your fork of ``python-bigquery`` from your GitHub account to your local + computer, substituting your account username and specifying the destination + as ``hack-on-python-bigquery``. E.g.:: + + $ cd ${HOME} + $ git clone git@github.com:USERNAME/python-bigquery.git hack-on-python-bigquery + $ cd hack-on-python-bigquery + # Configure remotes such that you can pull changes from the googleapis/python-bigquery + # repository into your local repository. + $ git remote add upstream git@github.com:googleapis/python-bigquery.git + # fetch and merge changes from upstream into master + $ git fetch upstream + $ git merge upstream/master + +Now your local repo is set up such that you will push changes to your GitHub +repo, from which you can submit a pull request. + +To work on the codebase and run the tests, we recommend using ``nox``, +but you can also use a ``virtualenv`` of your own creation. + +.. _repo: https://github.com/googleapis/python-bigquery + +Using ``nox`` +============= + +We use `nox `__ to instrument our tests. + +- To test your changes, run unit tests with ``nox``:: + + $ nox -s unit-2.7 + $ nox -s unit-3.7 + $ ... + + .. note:: + + The unit tests and system tests are described in the + ``noxfile.py`` files in each directory. + +.. nox: https://pypi.org/project/nox/ + +Note on Editable Installs / Develop Mode +======================================== + +- As mentioned previously, using ``setuptools`` in `develop mode`_ + or a ``pip`` `editable install`_ is not possible with this + library. This is because this library uses `namespace packages`_. + For context see `Issue #2316`_ and the relevant `PyPA issue`_. + + Since ``editable`` / ``develop`` mode can't be used, packages + need to be installed directly. Hence your changes to the source + tree don't get incorporated into the **already installed** + package. + +.. _namespace packages: https://www.python.org/dev/peps/pep-0420/ +.. _Issue #2316: https://github.com/GoogleCloudPlatform/google-cloud-python/issues/2316 +.. _PyPA issue: https://github.com/pypa/packaging-problems/issues/12 +.. _develop mode: https://setuptools.readthedocs.io/en/latest/setuptools.html#development-mode +.. _editable install: https://pip.pypa.io/en/stable/reference/pip_install/#editable-installs + +***************************************** +I'm getting weird errors... Can you help? +***************************************** + +If the error mentions ``Python.h`` not being found, +install ``python-dev`` and try again. +On Debian/Ubuntu:: + + $ sudo apt-get install python-dev + +************ +Coding Style +************ + +- PEP8 compliance, with exceptions defined in the linter configuration. + If you have ``nox`` installed, you can test that you have not introduced + any non-compliant code via:: + + $ nox -s lint + +- In order to make ``nox -s lint`` run faster, you can set some environment + variables:: + + export GOOGLE_CLOUD_TESTING_REMOTE="upstream" + export GOOGLE_CLOUD_TESTING_BRANCH="master" + + By doing this, you are specifying the location of the most up-to-date + version of ``python-bigquery``. The the suggested remote name ``upstream`` + should point to the official ``googleapis`` checkout and the + the branch should be the main branch on that remote (``master``). + +Exceptions to PEP8: + +- Many unit tests use a helper method, ``_call_fut`` ("FUT" is short for + "Function-Under-Test"), which is PEP8-incompliant, but more readable. + Some also use a local variable, ``MUT`` (short for "Module-Under-Test"). + +******************** +Running System Tests +******************** + +- To run system tests, you can execute:: + + $ nox -s system-3.7 + $ nox -s system-2.7 + + .. note:: + + System tests are only configured to run under Python 2.7 and + Python 3.7. For expediency, we do not run them in older versions + of Python 3. + + This alone will not run the tests. You'll need to change some local + auth settings and change some configuration in your project to + run all the tests. + +- System tests will be run against an actual project and + so you'll need to provide some environment variables to facilitate + authentication to your project: + + - ``GOOGLE_APPLICATION_CREDENTIALS``: The path to a JSON key file; + Such a file can be downloaded directly from the developer's console by clicking + "Generate new JSON key". See private key + `docs `__ + for more details. + +- Once you have downloaded your json keys, set the environment variable + ``GOOGLE_APPLICATION_CREDENTIALS`` to the absolute path of the json file:: + + $ export GOOGLE_APPLICATION_CREDENTIALS="/Users//path/to/app_credentials.json" + + +************* +Test Coverage +************* + +- The codebase *must* have 100% test statement coverage after each commit. + You can test coverage via ``nox -s cover``. + +****************************************************** +Documentation Coverage and Building HTML Documentation +****************************************************** + +If you fix a bug, and the bug requires an API or behavior modification, all +documentation in this package which references that API or behavior must be +changed to reflect the bug fix, ideally in the same commit that fixes the bug +or adds the feature. + +Build the docs via: + + $ nox -s docs + +******************************************** +Note About ``README`` as it pertains to PyPI +******************************************** + +The `description on PyPI`_ for the project comes directly from the +``README``. Due to the reStructuredText (``rst``) parser used by +PyPI, relative links which will work on GitHub (e.g. ``CONTRIBUTING.rst`` +instead of +``https://github.com/googleapis/python-bigquery/blob/master/CONTRIBUTING.rst``) +may cause problems creating links or rendering the description. + +.. _description on PyPI: https://pypi.org/project/google-cloud-bigquery + + +************************* +Supported Python Versions +************************* + +We support: + +- `Python 3.5`_ +- `Python 3.6`_ +- `Python 3.7`_ + +.. _Python 3.5: https://docs.python.org/3.5/ +.. _Python 3.6: https://docs.python.org/3.6/ +.. _Python 3.7: https://docs.python.org/3.7/ + + +Supported versions can be found in our ``noxfile.py`` `config`_. + +.. _config: https://github.com/googleapis/python-bigquery/blob/master/noxfile.py + +We explicitly decided not to support `Python 2.5`_ due to `decreased usage`_ +and lack of continuous integration `support`_. + +.. _Python 2.5: https://docs.python.org/2.5/ +.. _decreased usage: https://caremad.io/2013/10/a-look-at-pypi-downloads/ +.. _support: https://blog.travis-ci.com/2013-11-18-upcoming-build-environment-updates/ + +We have `dropped 2.6`_ as a supported version as well since Python 2.6 is no +longer supported by the core development team. + +Python 2.7 support is deprecated. All code changes should maintain Python 2.7 compatibility until January 1, 2020. + +We also explicitly decided to support Python 3 beginning with version +3.5. Reasons for this include: + +- Encouraging use of newest versions of Python 3 +- Taking the lead of `prominent`_ open-source `projects`_ +- `Unicode literal support`_ which allows for a cleaner codebase that + works in both Python 2 and Python 3 + +.. _prominent: https://docs.djangoproject.com/en/1.9/faq/install/#what-python-version-can-i-use-with-django +.. _projects: http://flask.pocoo.org/docs/0.10/python3/ +.. _Unicode literal support: https://www.python.org/dev/peps/pep-0414/ +.. _dropped 2.6: https://github.com/googleapis/google-cloud-python/issues/995 + +********** +Versioning +********** + +This library follows `Semantic Versioning`_. + +.. _Semantic Versioning: http://semver.org/ + +Some packages are currently in major version zero (``0.y.z``), which means that +anything may change at any time and the public API should not be considered +stable. + +****************************** +Contributor License Agreements +****************************** + +Before we can accept your pull requests you'll need to sign a Contributor +License Agreement (CLA): + +- **If you are an individual writing original source code** and **you own the + intellectual property**, then you'll need to sign an + `individual CLA `__. +- **If you work for a company that wants to allow you to contribute your work**, + then you'll need to sign a + `corporate CLA `__. + +You can sign these electronically (just scroll to the bottom). After that, +we'll be able to accept your pull requests. diff --git a/packages/google-cloud-bigquery/MANIFEST.in b/packages/google-cloud-bigquery/MANIFEST.in index 9cbf175afe6b..cd011be27a0e 100644 --- a/packages/google-cloud-bigquery/MANIFEST.in +++ b/packages/google-cloud-bigquery/MANIFEST.in @@ -1,3 +1,4 @@ +# Generated by synthtool. DO NOT EDIT! include README.rst LICENSE recursive-include google *.json *.proto recursive-include tests * diff --git a/packages/google-cloud-bigquery/docs/conf.py b/packages/google-cloud-bigquery/docs/conf.py index 1b83501d1417..d7dae6960f6a 100644 --- a/packages/google-cloud-bigquery/docs/conf.py +++ b/packages/google-cloud-bigquery/docs/conf.py @@ -1,17 +1,4 @@ # -*- coding: utf-8 -*- -# Copyright 2019 Google LLC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. # # google-cloud-bigquery documentation build configuration file # @@ -26,23 +13,19 @@ import sys import os -import shutil - -from sphinx.util import logging - -logger = logging.getLogger(__name__) +import shlex # If extensions (or modules to document with autodoc) are in another directory, # add these directories to sys.path here. If the directory is relative to the # documentation root, use os.path.abspath to make it absolute, like shown here. sys.path.insert(0, os.path.abspath("..")) -__version__ = "0.1.0" +__version__ = "" # -- General configuration ------------------------------------------------ # If your documentation needs a minimal Sphinx version, state it here. -# needs_sphinx = '1.0' +needs_sphinx = "1.6.3" # Add any Sphinx extension module names here, as strings. They can be # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom @@ -53,6 +36,7 @@ "sphinx.ext.intersphinx", "sphinx.ext.coverage", "sphinx.ext.napoleon", + "sphinx.ext.todo", "sphinx.ext.viewcode", ] @@ -61,14 +45,17 @@ autodoc_default_flags = ["members"] autosummary_generate = True + # Add any paths that contain templates here, relative to this directory. -templates_path = ["_templates", os.path.join("..", "..", "docs", "_templates")] +templates_path = ["_templates"] # Allow markdown includes (so releases.md can include CHANGLEOG.md) # http://www.sphinx-doc.org/en/master/markdown.html source_parsers = {".md": "recommonmark.parser.CommonMarkParser"} # The suffix(es) of source filenames. +# You can specify multiple suffix as a list of string: +# source_suffix = ['.rst', '.md'] source_suffix = [".rst", ".md"] # The encoding of source files. @@ -79,7 +66,7 @@ # General information about the project. project = u"google-cloud-bigquery" -copyright = u"2015, Google" +copyright = u"2019, Google" author = u"Google APIs" # The version info for the project you're documenting, acts as replacement for @@ -135,6 +122,7 @@ # If true, `todo` and `todoList` produce output, else they produce nothing. todo_include_todos = True + # -- Options for HTML output ---------------------------------------------- # The theme to use for HTML and HTML Help pages. See the documentation for @@ -144,7 +132,15 @@ # Theme options are theme-specific and customize the look and feel of a theme # further. For a list of options available for each theme, see the # documentation. -# html_theme_options = {} +html_theme_options = { + "description": "Google Cloud Client Libraries for google-cloud-bigquery", + "github_user": "googleapis", + "github_repo": "python-bigquery", + "github_banner": True, + "font_family": "'Roboto', Georgia, sans", + "head_font_family": "'Roboto', Georgia, serif", + "code_font_family": "'Roboto Mono', 'Consolas', monospace", +} # Add any paths that contain custom themes here, relative to this directory. # html_theme_path = [] @@ -233,6 +229,18 @@ # Output file base name for HTML help builder. htmlhelp_basename = "google-cloud-bigquery-doc" +# -- Options for warnings ------------------------------------------------------ + + +suppress_warnings = [ + # Temporarily suppress this to avoid "more than one target found for + # cross-reference" warning, which are intractable for us to avoid while in + # a mono-repo. + # See https://github.com/sphinx-doc/sphinx/blob + # /2a65ffeef5c107c19084fabdd706cdff3f52d93c/sphinx/domains/python.py#L843 + "ref.python" +] + # -- Options for LaTeX output --------------------------------------------- latex_elements = { @@ -279,6 +287,7 @@ # If false, no module index is generated. # latex_domain_indices = True + # -- Options for manual page output --------------------------------------- # One entry per manual page. List of tuples @@ -296,6 +305,7 @@ # If true, show URL addresses after external links. # man_show_urls = False + # -- Options for Texinfo output ------------------------------------------- # Grouping the document tree into Texinfo files. List of tuples @@ -308,6 +318,7 @@ u"google-cloud-bigquery Documentation", author, "google-cloud-bigquery", + "google-cloud-bigquery Library", "APIs", ) ] @@ -324,13 +335,16 @@ # If true, do not generate a @detailmenu in the "Top" node's menu. # texinfo_no_detailmenu = False + # Example configuration for intersphinx: refer to the Python standard library. intersphinx_mapping = { - "gax": ("https://gax-python.readthedocs.org/en/latest/", None), - "pandas": ("https://pandas.pydata.org/pandas-docs/stable/", None), "python": ("http://python.readthedocs.org/en/latest/", None), + "google-auth": ("https://google-auth.readthedocs.io/en/stable", None), + "google.api_core": ("https://googleapis.dev/python/google-api-core/latest/", None), + "grpc": ("https://grpc.io/grpc/python/", None), } + # Napoleon settings napoleon_google_docstring = True napoleon_numpy_docstring = True @@ -342,22 +356,3 @@ napoleon_use_ivar = False napoleon_use_param = True napoleon_use_rtype = True - -# Static HTML pages, e.g. to support redirects -# See: https://tech.signavio.com/2017/managing-sphinx-redirects -# HTML pages to be copied from source to target -static_html_pages = ["usage.html", "generated/google.cloud.bigquery.magics.html"] - - -def copy_static_html_pages(app, exception): - if exception is None and app.builder.name == "html": - for static_html_page in static_html_pages: - target_path = app.outdir + "/" + static_html_page - src_path = app.srcdir + "/" + static_html_page - if os.path.isfile(src_path): - logger.info("Copying static html: %s -> %s", src_path, target_path) - shutil.copyfile(src_path, target_path) - - -def setup(app): - app.connect("build-finished", copy_static_html_pages) diff --git a/packages/google-cloud-bigquery/docs/snippets.py b/packages/google-cloud-bigquery/docs/snippets.py index 4981a1e18100..9fa2a21a6c25 100644 --- a/packages/google-cloud-bigquery/docs/snippets.py +++ b/packages/google-cloud-bigquery/docs/snippets.py @@ -753,9 +753,7 @@ def test_load_table_add_column(client, to_delete): to_delete.append(dataset) snippets_dir = os.path.abspath(os.path.dirname(__file__)) - filepath = os.path.join( - snippets_dir, "..", "..", "bigquery", "tests", "data", "people.csv" - ) + filepath = os.path.join(snippets_dir, "..", "tests", "data", "people.csv") table_ref = dataset_ref.table("my_table") old_schema = [bigquery.SchemaField("full_name", "STRING", mode="REQUIRED")] table = client.create_table(bigquery.Table(table_ref, schema=old_schema)) @@ -821,9 +819,7 @@ def test_load_table_relax_column(client, to_delete): to_delete.append(dataset) snippets_dir = os.path.abspath(os.path.dirname(__file__)) - filepath = os.path.join( - snippets_dir, "..", "..", "bigquery", "tests", "data", "people.csv" - ) + filepath = os.path.join(snippets_dir, "..", "tests", "data", "people.csv") table_ref = dataset_ref.table("my_table") old_schema = [ bigquery.SchemaField("full_name", "STRING", mode="REQUIRED"), diff --git a/packages/google-cloud-bigquery/noxfile.py b/packages/google-cloud-bigquery/noxfile.py index 17a2dee417c0..f7e59e56059d 100644 --- a/packages/google-cloud-bigquery/noxfile.py +++ b/packages/google-cloud-bigquery/noxfile.py @@ -20,8 +20,6 @@ import nox -LOCAL_DEPS = (os.path.join("..", "api_core[grpc]"), os.path.join("..", "core")) - BLACK_PATHS = ("docs", "google", "samples", "tests", "noxfile.py", "setup.py") @@ -35,10 +33,8 @@ def default(session): """ # Install all test dependencies, then install local packages in-place. session.install("mock", "pytest", "pytest-cov", "freezegun") - for local_dep in LOCAL_DEPS: - session.install("-e", local_dep) - - session.install("-e", os.path.join("..", "test_utils")) + session.install("grpcio") + session.install("-e", "test_utils") coverage_fail_under = "--cov-fail-under=97" @@ -97,10 +93,8 @@ def system(session): # Install all test dependencies, then install local packages in place. session.install("mock", "pytest", "psutil") - for local_dep in LOCAL_DEPS: - session.install("-e", local_dep) - session.install("-e", os.path.join("..", "storage")) - session.install("-e", os.path.join("..", "test_utils")) + session.install("google-cloud-storage") + session.install("-e", "test_utils") session.install("-e", ".[all]") # IPython does not support Python 2 after version 5.x @@ -125,10 +119,9 @@ def snippets(session): # Install all test dependencies, then install local packages in place. session.install("mock", "pytest") - for local_dep in LOCAL_DEPS: - session.install("-e", local_dep) - session.install("-e", os.path.join("..", "storage")) - session.install("-e", os.path.join("..", "test_utils")) + session.install("google-cloud-storage") + session.install("grpcio") + session.install("-e", "test_utils") session.install("-e", ".[all]") # Run py.test against the snippets tests. @@ -157,8 +150,6 @@ def lint(session): """ session.install("black", "flake8") - for local_dep in LOCAL_DEPS: - session.install("-e", local_dep) session.install("-e", ".") session.run("flake8", os.path.join("google", "cloud", "bigquery")) session.run("flake8", "tests") @@ -193,9 +184,7 @@ def docs(session): """Build the docs.""" session.install("ipython", "recommonmark", "sphinx", "sphinx_rtd_theme") - for local_dep in LOCAL_DEPS: - session.install("-e", local_dep) - session.install("-e", os.path.join("..", "storage")) + session.install("google-cloud-storage") session.install("-e", ".[all]") shutil.rmtree(os.path.join("docs", "_build"), ignore_errors=True) diff --git a/packages/google-cloud-bigquery/renovate.json b/packages/google-cloud-bigquery/renovate.json new file mode 100644 index 000000000000..4fa949311b20 --- /dev/null +++ b/packages/google-cloud-bigquery/renovate.json @@ -0,0 +1,5 @@ +{ + "extends": [ + "config:base", ":preserveSemverRanges" + ] +} diff --git a/packages/google-cloud-bigquery/setup.cfg b/packages/google-cloud-bigquery/setup.cfg index 2a9acf13daa9..3bd555500e37 100644 --- a/packages/google-cloud-bigquery/setup.cfg +++ b/packages/google-cloud-bigquery/setup.cfg @@ -1,2 +1,3 @@ +# Generated by synthtool. DO NOT EDIT! [bdist_wheel] universal = 1 diff --git a/packages/google-cloud-bigquery/setup.py b/packages/google-cloud-bigquery/setup.py index 378c4fc1b4ce..6324c8b250ba 100644 --- a/packages/google-cloud-bigquery/setup.py +++ b/packages/google-cloud-bigquery/setup.py @@ -95,7 +95,7 @@ author="Google LLC", author_email="googleapis-packages@google.com", license="Apache 2.0", - url="https://github.com/GoogleCloudPlatform/google-cloud-python", + url="https://github.com/googleapis/python-bigquery", classifiers=[ release_status, "Intended Audience :: Developers", diff --git a/packages/google-cloud-bigquery/synth.metadata b/packages/google-cloud-bigquery/synth.metadata index ef9fc79c57d3..86ecc1ffa60c 100644 --- a/packages/google-cloud-bigquery/synth.metadata +++ b/packages/google-cloud-bigquery/synth.metadata @@ -1,5 +1,5 @@ { - "updateTime": "2020-01-29T13:17:11.693204Z", + "updateTime": "2020-02-04T11:46:46.343511Z", "sources": [ { "generator": { @@ -12,9 +12,15 @@ "git": { "name": "googleapis", "remote": "https://github.com/googleapis/googleapis.git", - "sha": "cf3b61102ed5f36b827bc82ec39be09525f018c8", - "internalRef": "292034635", - "log": "cf3b61102ed5f36b827bc82ec39be09525f018c8\n Fix to protos for v1p1beta1 release of Cloud Security Command Center\n\nPiperOrigin-RevId: 292034635\n\n4e1cfaa7c0fede9e65d64213ca3da1b1255816c0\nUpdate the public proto to support UTF-8 encoded id for CatalogService API, increase the ListCatalogItems deadline to 300s and some minor documentation change\n\nPiperOrigin-RevId: 292030970\n\n9c483584f8fd5a1b862ae07973f4cc7bb3e46648\nasset: add annotations to v1p1beta1\n\nPiperOrigin-RevId: 292009868\n\ne19209fac29731d0baf6d9ac23da1164f7bdca24\nAdd the google.rpc.context.AttributeContext message to the open source\ndirectories.\n\nPiperOrigin-RevId: 291999930\n\nae5662960573f279502bf98a108a35ba1175e782\noslogin API: move file level option on top of the file to avoid protobuf.js bug.\n\nPiperOrigin-RevId: 291990506\n\neba3897fff7c49ed85d3c47fc96fe96e47f6f684\nAdd cc_proto_library and cc_grpc_library targets for Spanner and IAM protos.\n\nPiperOrigin-RevId: 291988651\n\n8e981acfd9b97ea2f312f11bbaa7b6c16e412dea\nBeta launch for PersonDetection and FaceDetection features.\n\nPiperOrigin-RevId: 291821782\n\n994e067fae3b21e195f7da932b08fff806d70b5d\nasset: add annotations to v1p2beta1\n\nPiperOrigin-RevId: 291815259\n\n244e1d2c89346ca2e0701b39e65552330d68545a\nAdd Playable Locations service\n\nPiperOrigin-RevId: 291806349\n\n909f8f67963daf45dd88d020877fb9029b76788d\nasset: add annotations to v1beta2\n\nPiperOrigin-RevId: 291805301\n\n3c39a1d6e23c1ef63c7fba4019c25e76c40dfe19\nKMS: add file-level message for CryptoKeyPath, it is defined in gapic yaml but not\nin proto files.\n\nPiperOrigin-RevId: 291420695\n\nc6f3f350b8387f8d1b85ed4506f30187ebaaddc3\ncontaineranalysis: update v1beta1 and bazel build with annotations\n\nPiperOrigin-RevId: 291401900\n\n92887d74b44e4e636252b7b8477d0d2570cd82db\nfix: fix the location of grpc config file.\n\nPiperOrigin-RevId: 291396015\n\ne26cab8afd19d396b929039dac5d874cf0b5336c\nexpr: add default_host and method_signature annotations to CelService\n\nPiperOrigin-RevId: 291240093\n\n06093ae3952441c34ec176d1f7431b8765cec0be\nirm: fix v1alpha2 bazel build by adding missing proto imports\n\nPiperOrigin-RevId: 291227940\n\na8a2514af326e4673063f9a3c9d0ef1091c87e6c\nAdd proto annotation for cloud/irm API\n\nPiperOrigin-RevId: 291217859\n\n8d16f76de065f530d395a4c7eabbf766d6a120fd\nGenerate Memcache v1beta2 API protos and gRPC ServiceConfig files\n\nPiperOrigin-RevId: 291008516\n\n3af1dabd93df9a9f17bf3624d3b875c11235360b\ngrafeas: Add containeranalysis default_host to Grafeas service\n\nPiperOrigin-RevId: 290965849\n\nbe2663fa95e31cba67d0cd62611a6674db9f74b7\nfix(google/maps/roads): add missing opening bracket\n\nPiperOrigin-RevId: 290964086\n\nfacc26550a0af0696e0534bc9cae9df14275aa7c\nUpdating v2 protos with the latest inline documentation (in comments) and adding a per-service .yaml file.\n\nPiperOrigin-RevId: 290952261\n\ncda99c1f7dc5e4ca9b1caeae1dc330838cbc1461\nChange api_name to 'asset' for v1p1beta1\n\nPiperOrigin-RevId: 290800639\n\n94e9e90c303a820ce40643d9129e7f0d2054e8a1\nAdds Google Maps Road service\n\nPiperOrigin-RevId: 290795667\n\na3b23dcb2eaecce98c600c7d009451bdec52dbda\nrpc: new message ErrorInfo, other comment updates\n\nPiperOrigin-RevId: 290781668\n\n26420ef4e46c37f193c0fbe53d6ebac481de460e\nAdd proto definition for Org Policy v1.\n\nPiperOrigin-RevId: 290771923\n\n7f0dab8177cf371ae019a082e2512de7ac102888\nPublish Routes Preferred API v1 service definitions.\n\nPiperOrigin-RevId: 290326986\n\nad6e508d0728e1d1bca6e3f328cd562718cb772d\nFix: Qualify resource type references with \"jobs.googleapis.com/\"\n\nPiperOrigin-RevId: 290285762\n\n58e770d568a2b78168ddc19a874178fee8265a9d\ncts client library\n\nPiperOrigin-RevId: 290146169\n\naf9daa4c3b4c4a8b7133b81588dd9ffd37270af2\nAdd more programming language options to public proto\n\nPiperOrigin-RevId: 290144091\n\nd9f2bbf2df301ef84641d4cec7c828736a0bd907\ntalent: add missing resource.proto dep to Bazel build target\n\nPiperOrigin-RevId: 290143164\n\n3b3968237451d027b42471cd28884a5a1faed6c7\nAnnotate Talent API.\nAdd gRPC service config for retry.\nUpdate bazel file with google.api.resource dependency.\n\nPiperOrigin-RevId: 290125172\n\n0735b4b096872960568d1f366bfa75b7b0e1f1a3\nWeekly library update.\n\nPiperOrigin-RevId: 289939042\n\n8760d3d9a4543d7f9c0d1c7870aca08b116e4095\nWeekly library update.\n\nPiperOrigin-RevId: 289939020\n\n8607df842f782a901805187e02fff598145b0b0e\nChange Talent API timeout to 30s.\n\nPiperOrigin-RevId: 289912621\n\n908155991fe32570653bcb72ecfdcfc896642f41\nAdd Recommendations AI V1Beta1\n\nPiperOrigin-RevId: 289901914\n\n5c9a8c2bebd8b71aa66d1cc473edfaac837a2c78\nAdding no-arg method signatures for ListBillingAccounts and ListServices\n\nPiperOrigin-RevId: 289891136\n\n50b0e8286ac988b0593bd890eb31fef6ea2f5767\nlongrunning: add grpc service config and default_host annotation to operations.proto\n\nPiperOrigin-RevId: 289876944\n\n6cac27dabe51c54807b0401698c32d34998948a9\n Updating default deadline for Cloud Security Command Center's v1 APIs.\n\nPiperOrigin-RevId: 289875412\n\nd99df0d67057a233c711187e0689baa4f8e6333d\nFix: Correct spelling in C# namespace option\n\nPiperOrigin-RevId: 289709813\n\n2fa8d48165cc48e35b0c62e6f7bdade12229326c\nfeat: Publish Recommender v1 to GitHub.\n\nCommitter: @lukesneeringer\nPiperOrigin-RevId: 289619243\n\n9118db63d1ab493a2e44a3b4973fde810a835c49\nfirestore: don't retry reads that fail with Aborted\n\nFor transaction reads that fail with ABORTED, we need to rollback and start a new transaction. Our current configuration makes it so that GAPIC retries ABORTED reads multiple times without making any progress. Instead, we should retry at the transaction level.\n\nPiperOrigin-RevId: 289532382\n\n1dbfd3fe4330790b1e99c0bb20beb692f1e20b8a\nFix bazel build\nAdd other langauges (Java was already there) for bigquery/storage/v1alpha2 api.\n\nPiperOrigin-RevId: 289519766\n\nc06599cdd7d11f8d3fd25f8d3249e5bb1a3d5d73\nInitial commit of google.cloud.policytroubleshooter API, The API helps in troubleshooting GCP policies. Refer https://cloud.google.com/iam/docs/troubleshooting-access for more information\n\nPiperOrigin-RevId: 289491444\n\nfce7d80fa16ea241e87f7bc33d68595422e94ecd\nDo not pass samples option for Artman config of recommender v1 API.\n\nPiperOrigin-RevId: 289477403\n\nef179e8c61436297e6bb124352e47e45c8c80cb1\nfix: Address missing Bazel dependency.\n\nBazel builds stopped working in 06ec6d5 because\nthe google/longrunning/operations.proto file took\nan import from google/api/client.proto, but that\nimport was not added to BUILD.bazel.\n\nCommitter: @lukesneeringer\nPiperOrigin-RevId: 289446074\n\n8841655b242c84fd691d77d7bcf21b61044f01ff\nMigrate Data Labeling v1beta1 to GAPIC v2.\n\nCommitter: @lukesneeringer\nPiperOrigin-RevId: 289446026\n\n06ec6d5d053fff299eaa6eaa38afdd36c5e2fc68\nAdd annotations to google.longrunning.v1\n\nCommitter: @lukesneeringer\nPiperOrigin-RevId: 289413169\n\n0480cf40be1d3cc231f4268a2fdb36a8dd60e641\nMigrate IAM Admin v1 to GAPIC v2.\n\nCommitter: @lukesneeringer\nPiperOrigin-RevId: 289411084\n\n1017173e9adeb858587639af61889ad970c728b1\nSpecify a C# namespace for BigQuery Connection v1beta1\n\nPiperOrigin-RevId: 289396763\n\nb08714b378e8e5b0c4ecdde73f92c36d6303b4b6\nfix: Integrate latest proto-docs-plugin fix.\nFixes dialogflow v2\n\nPiperOrigin-RevId: 289189004\n\n51217a67e79255ee1f2e70a6a3919df082513327\nCreate BUILD file for recommender v1\n\nPiperOrigin-RevId: 289183234\n\nacacd87263c0a60e458561b8b8ce9f67c760552a\nGenerate recommender v1 API protos and gRPC ServiceConfig files\n\nPiperOrigin-RevId: 289177510\n\n9d2f7133b97720b1fa3601f6dcd30760ba6d8a1e\nFix kokoro build script\n\nPiperOrigin-RevId: 289166315\n\nc43a67530d2a47a0220cad20ca8de39b3fbaf2c5\ncloudtasks: replace missing RPC timeout config for v2beta2 and v2beta3\n\nPiperOrigin-RevId: 289162391\n\n4cefc229a9197236fc0adf02d69b71c0c5cf59de\nSynchronize new proto/yaml changes.\n\nPiperOrigin-RevId: 289158456\n\n56f263fe959c50786dab42e3c61402d32d1417bd\nCatalog API: Adding config necessary to build client libraries\n\nPiperOrigin-RevId: 289149879\n\n4543762b23a57fc3c53d409efc3a9affd47b6ab3\nFix Bazel build\nbilling/v1 and dialogflow/v2 remain broken (not bazel-related issues).\nBilling has wrong configuration, dialogflow failure is caused by a bug in documentation plugin.\n\nPiperOrigin-RevId: 289140194\n\nc9dce519127b97e866ca133a01157f4ce27dcceb\nUpdate Bigtable docs\n\nPiperOrigin-RevId: 289114419\n\n802c5c5f2bf94c3facb011267d04e71942e0d09f\nMigrate DLP to proto annotations (but not GAPIC v2).\n\nCommitter: @lukesneeringer\nPiperOrigin-RevId: 289102579\n\n6357f30f2ec3cff1d8239d18b707ff9d438ea5da\nRemove gRPC configuration file that was in the wrong place.\n\nPiperOrigin-RevId: 289096111\n\n360a8792ed62f944109d7e22d613a04a010665b4\n Protos for v1p1beta1 release of Cloud Security Command Center\n\nPiperOrigin-RevId: 289011995\n\na79211c20c4f2807eec524d00123bf7c06ad3d6e\nRoll back containeranalysis v1 to GAPIC v1.\n\nCommitter: @lukesneeringer\nPiperOrigin-RevId: 288999068\n\n9e60345ba603e03484a8aaa33ce5ffa19c1c652b\nPublish Routes Preferred API v1 proto definitions.\n\nPiperOrigin-RevId: 288941399\n\nd52885b642ad2aa1f42b132ee62dbf49a73e1e24\nMigrate the service management API to GAPIC v2.\n\nCommitter: @lukesneeringer\nPiperOrigin-RevId: 288909426\n\n6ace586805c08896fef43e28a261337fcf3f022b\ncloudtasks: replace missing RPC timeout config\n\nPiperOrigin-RevId: 288783603\n\n51d906cabee4876b12497054b15b05d4a50ad027\nImport of Grafeas from Github.\n\nUpdate BUILD.bazel accordingly.\n\nPiperOrigin-RevId: 288783426\n\n5ef42bcd363ba0440f0ee65b3c80b499e9067ede\nMigrate Recommender v1beta1 to GAPIC v2.\n\nCommitter: @lukesneeringer\nPiperOrigin-RevId: 288713066\n\n94f986afd365b7d7e132315ddcd43d7af0e652fb\nMigrate Container Analysis v1 to GAPIC v2.\n\nCommitter: @lukesneeringer\nPiperOrigin-RevId: 288708382\n\n7a751a279184970d3b6ba90e4dd4d22a382a0747\nRemove Container Analysis v1alpha1 (nobody publishes it).\n\nCommitter: @lukesneeringer\nPiperOrigin-RevId: 288707473\n\n3c0d9c71242e70474b2b640e15bb0a435fd06ff0\nRemove specious annotation from BigQuery Data Transfer before\nanyone accidentally does anything that uses it.\n\nCommitter: @lukesneeringer\nPiperOrigin-RevId: 288701604\n\n1af307a4764bd415ef942ac5187fa1def043006f\nMigrate BigQuery Connection to GAPIC v2.\n\nCommitter: @lukesneeringer\nPiperOrigin-RevId: 288698681\n\n08b488e0660c59842a7dee0e3e2b65d9e3a514a9\nExposing cloud_catalog.proto (This API is already available through REST)\n\nPiperOrigin-RevId: 288625007\n\na613482977e11ac09fa47687a5d1b5a01efcf794\nUpdate the OS Login v1beta API description to render better in the UI.\n\nPiperOrigin-RevId: 288547940\n\n5e182b8d9943f1b17008d69d4c7e865dc83641a7\nUpdate the OS Login API description to render better in the UI.\n\nPiperOrigin-RevId: 288546443\n\ncb79155f596e0396dd900da93872be7066f6340d\nFix: Add a resource annotation for Agent\nFix: Correct the service name in annotations for Intent and SessionEntityType\n\nPiperOrigin-RevId: 288441307\n\nf7f6e9daec3315fd47cb638789bd8415bf4a27cc\nAdded cloud asset api v1p1beta1\n\nPiperOrigin-RevId: 288427239\n\nf2880f5b342c6345f3dcaad24fcb3c6ca9483654\nBilling account API: Adding config necessary to build client libraries\n\nPiperOrigin-RevId: 288351810\n\ndc250ffe071729f8f8bef9d6fd0fbbeb0254c666\nFix: Remove incorrect resource annotations in requests\n\nPiperOrigin-RevId: 288321208\n\n91ef2d9dd69807b0b79555f22566fb2d81e49ff9\nAdd GAPIC annotations to Cloud KMS (but do not migrate the GAPIC config yet).\n\nCommitter: @lukesneeringer\nPiperOrigin-RevId: 287999179\n\n4d45a6399e9444fbddaeb1c86aabfde210723714\nRefreshing Cloud Billing API protos.\n\nThis exposes the following API methods:\n- UpdateBillingAccount\n- CreateBillingAccount\n- GetIamPolicy\n- SetIamPolicy\n- TestIamPermissions\n\nThere are also some new fields to support the management of sub-accounts.\n\nPiperOrigin-RevId: 287908369\n\nec285d3d230810147ebbf8d5b691ee90320c6d2d\nHide not yet implemented update_transforms message\n\nPiperOrigin-RevId: 287608953\n\na202fb3b91cd0e4231be878b0348afd17067cbe2\nBigQuery Storage Write API v1alpha2 clients. The service is enabled by whitelist only.\n\nPiperOrigin-RevId: 287379998\n\n650d7f1f8adb0cfaf37b3ce2241c3168f24efd4d\nUpdate Readme.md to match latest Bazel updates\n090d98aea20270e3be4b64240775588f7ce50ff8\ndocs(bigtable): Fix library release level listed in generated documentation\n\nPiperOrigin-RevId: 287308849\n\n2c28f646ca77b1d57550368be22aa388adde2e66\nfirestore: retry reads that fail with contention\n\nPiperOrigin-RevId: 287250665\n\nfd3091fbe9b2083cabc53dc50c78035658bfc4eb\nSync timeout in grpc config back to 10s for tasks API with github googelapis gapic config.\n\nPiperOrigin-RevId: 287207067\n\n49dd7d856a6f77c0cf7e5cb3334423e5089a9e8a\nbazel: Integrate bazel-2.0.0 compatibility fixes\n\nPiperOrigin-RevId: 287205644\n\n46e52fd64973e815cae61e78b14608fe7aa7b1df\nbazel: Integrate bazel build file generator\n\nTo generate/update BUILD.bazel files for any particular client or a batch of clients:\n```\nbazel run //:build_gen -- --src=google/example/library\n```\n\nPiperOrigin-RevId: 286958627\n\n1a380ea21dea9b6ac6ad28c60ad96d9d73574e19\nBigQuery Storage Read API v1beta2 clients.\n\nPiperOrigin-RevId: 286616241\n\n5f3f1d0f1c06b6475a17d995e4f7a436ca67ec9e\nAdd Artman config for secretmanager.\n\nPiperOrigin-RevId: 286598440\n\n50af0530730348f1e3697bf3c70261f7daaf2981\nSynchronize new proto/yaml changes.\n\nPiperOrigin-RevId: 286491002\n\n91818800384f4ed26961aea268910b1a2ec58cc8\nFor Data Catalog API,\n1. Add support for marking a tag template field as required when creating a new tag template.\n2. Add support for updating a tag template field from required to optional.\n\nPiperOrigin-RevId: 286490262\n\nff4a2047b3d66f38c9b22197c370ed0d02fc0238\nWeekly library update.\n\nPiperOrigin-RevId: 286484215\n\n192c14029861752a911ed434fd6ee5b850517cd9\nWeekly library update.\n\nPiperOrigin-RevId: 286484165\n\nd9e328eaf790d4e4346fbbf32858160f497a03e0\nFix bazel build (versions 1.x)\n\nBump gapic-generator and resource names plugins to the latest version.\n\nPiperOrigin-RevId: 286469287\n\n0ca305403dcc50e31ad9477c9b6241ddfd2056af\nsecretmanager client package name option updates for java and go\n\nPiperOrigin-RevId: 286439553\n\nade4803e8a1a9e3efd249c8c86895d2f12eb2aaa\niam credentials: publish v1 protos containing annotations\n\nPiperOrigin-RevId: 286418383\n\n03e5708e5f8d1909dcb74b25520309e59ebf24be\nsecuritycenter: add missing proto deps for Bazel build\n\nPiperOrigin-RevId: 286417075\n\n8b991eb3eb82483b0ca1f1361a9c8e5b375c4747\nAdd secretmanager client package name options.\n\nPiperOrigin-RevId: 286415883\n\nd400cb8d45df5b2ae796b909f098a215b2275c1d\ndialogflow: add operation_info annotations to BatchUpdateEntities and BatchDeleteEntities.\n\nPiperOrigin-RevId: 286312673\n\nf2b25232db397ebd4f67eb901a2a4bc99f7cc4c6\nIncreased the default timeout time for all the Cloud Security Command Center client libraries.\n\nPiperOrigin-RevId: 286263771\n\ncb2f1eefd684c7efd56fd375cde8d4084a20439e\nExposing new Resource fields in the SecurityCenterProperties proto, added more comments to the filter logic for these Resource fields, and updated the response proto for the ListFindings API with the new Resource fields.\n\nPiperOrigin-RevId: 286263092\n\n73cebb20432b387c3d8879bb161b517d60cf2552\nUpdate v1beta2 clusters and jobs to include resource ids in GRPC header.\n\nPiperOrigin-RevId: 286261392\n\n1b4e453d51c0bd77e7b73896cdd8357d62768d83\nsecuritycenter: publish v1beta1 protos with annotations\n\nPiperOrigin-RevId: 286228860\n\na985eeda90ae98e8519d2320bee4dec148eb8ccb\nAdd default retry configurations for speech_v1p1beta1.\n\nSettings are copied from speech_gapic.legacy.yaml. The Python client library is being generated with timeouts that are too low. See https://github.com/GoogleCloudPlatform/python-docs-samples/issues/2578\n\nPiperOrigin-RevId: 286191318\n\n3352100a15ede383f5ab3c34599f7a10a3d066fe\nMake importing rule with the same name (but different aliases) from different repositories possible.\n\nThis is needed to allow monolitic gapic-generator and microgenerators coexist during transition period.\n\nTo plug a microgenerator:\n\n1) Add corresponding rules bidnings under `switched_rules_by_language` in repository_rules.bzl:\n rules[\"go_gapic_library2\"] = _switch(\n go and grpc and gapic,\n \"@gapic_generator_go//rules_go_gapic/go_gapic.bzl\",\n \"go_gapic_library\",\n )\n\n2) Import microgenerator in WORKSPACE (the above example assumes that the generator was imported under name \"gapic_generator_go\").\n\n3) To migrate an API from monolith to micro generator (this is done per API and per language) modify the corresponding load statement in the API's BUILD.bazel file. For example, for the example above, to migrate to go microgenerator modify the go-specific load statement in BUILD.bazel file of a specific API (which you want to migrate) to the following:\n\nload(\n \"@com_google_googleapis_imports//:imports.bzl\",\n \"go_gapic_assembly_pkg\",\n go_gapic_library = \"go_gapic_library2\",\n \"go_proto_library\",\n \"go_test\",\n)\n\nPiperOrigin-RevId: 286065440\n\n6ad2bb13bc4b0f3f785517f0563118f6ca52ddfd\nUpdated v1beta1 protos for the client:\n- added support for GenericSignedAttestation which has a generic Signature\n- added support for CVSSv3 and WindowsDetail in Vulnerability\n- documentation updates\n\nPiperOrigin-RevId: 286008145\n\nfe1962e49999a832eed8162c45f23096336a9ced\nAdMob API v1 20191210\n\nBasic account info, mediation and network report available. See https://developers.google.com/admob/api/release-notes for more details.\n\nPiperOrigin-RevId: 285894502\n\n41fc1403738b61427f3a798ca9750ef47eb9c0f2\nAnnotate the required fields for the Monitoring Dashboards API\n\nPiperOrigin-RevId: 285824386\n\n27d0e0f202cbe91bf155fcf36824a87a5764ef1e\nRemove inappropriate resource_reference annotations for UpdateWorkflowTemplateRequest.template.\n\nPiperOrigin-RevId: 285802643\n\ne5c4d3a2b5b5bef0a30df39ebb27711dc98dee64\nAdd Artman BUILD.bazel file for the Monitoring Dashboards API\n\nPiperOrigin-RevId: 285445602\n\n2085a0d3c76180ee843cf2ecef2b94ca5266be31\nFix path in the artman config for Monitoring Dashboard API.\n\nPiperOrigin-RevId: 285233245\n\n2da72dfe71e4cca80902f9e3e125c40f02c2925b\nAdd Artman and GAPIC configs for the Monitoring Dashboards API.\n\nPiperOrigin-RevId: 285211544\n\n9f6eeebf1f30f51ffa02acea5a71680fe592348e\nAdd annotations to Dataproc v1. (Also forwarding comment changes from internal source control.)\n\nCommitter: @lukesneeringer\nPiperOrigin-RevId: 285197557\n\n19c4589a3cb44b3679f7b3fba88365b3d055d5f8\noslogin: fix v1beta retry configuration\n\nPiperOrigin-RevId: 285013366\n\nee3f02926d0f8a0bc13f8d716581aad20f575751\nAdd Monitoring Dashboards API protocol buffers to Google Cloud Monitoring API.\n\nPiperOrigin-RevId: 284982647\n\ne47fdd266542386e5e7346697f90476e96dc7ee8\nbigquery datatransfer: Remove non-publicly available DataSourceService.\n\nPiperOrigin-RevId: 284822593\n\n6156f433fd1d9d5e4a448d6c6da7f637921d92ea\nAdds OSConfig v1beta protos and initial client library config\n\nPiperOrigin-RevId: 284799663\n\n6cc9499e225a4f6a5e34fe07e390f67055d7991c\nAdd datetime.proto to google/type/BUILD.bazel\n\nPiperOrigin-RevId: 284643689\n\nfe7dd5277e39ffe0075729c61e8d118d7527946d\nCosmetic changes to proto comment as part of testing internal release instructions.\n\nPiperOrigin-RevId: 284608712\n\n68d109adad726b89f74276d2f4b2ba6aac6ec04a\nAdd annotations to securitycenter v1, but leave GAPIC v1 in place.\n\nCommitter: @lukesneeringer\nPiperOrigin-RevId: 284580511\n\ndf8a1707a910fc17c71407a75547992fd1864c51\nSynchronize new proto/yaml changes.\n\nPiperOrigin-RevId: 284568564\n\na69a974976221ce3bb944901b739418b85d6408c\nclient library update\n\nPiperOrigin-RevId: 284463979\n\na4adac3a12aca6e3a792c9c35ee850435fe7cf7e\nAdded DateTime, TimeZone, and Month proto files to google/type\n\nPiperOrigin-RevId: 284277770\n\ned5dec392906078db4f7745fe4f11d34dd401ae9\nchange common resources from message-level annotations to file-level annotations.\n\nPiperOrigin-RevId: 284236794\n\na00e2c575ef1b637667b4ebe96b8c228b2ddb273\nbigquerydatatransfer: change resource type TransferRun to Run to be consistent with gapic configs\nbigquerydatatransfer: add missing patterns for DataSource, TransferConfig and Run (to allow the location segment)\nbigquerydatatransfer: add file-level Parent resource type (to allow the location segement)\nbigquerydatatransfer: update grpc service config with correct retry delays\n\nPiperOrigin-RevId: 284234378\n\nb10e4547017ca529ac8d183e839f3c272e1c13de\ncloud asset: replace required fields for batchgetassethistory. Correct the time out duration.\n\nPiperOrigin-RevId: 284059574\n\n6690161e3dcc3367639a2ec10db67bf1cf392550\nAdd default retry configurations for speech_v1.\n\nSettings are copied from speech_gapic.legacy.yaml. The Python client library is being generated with timeouts that are too low. See https://github.com/GoogleCloudPlatform/python-docs-samples/issues/2578\n\nPiperOrigin-RevId: 284035915\n\n9b2635ef91e114f0357bdb87652c26a8f59316d5\ncloudtasks: fix gapic v2 config\n\nPiperOrigin-RevId: 284020555\n\ne5676ba8b863951a8ed0bfd6046e1db38062743c\nReinstate resource name handling in GAPIC config for Asset v1.\n\nPiperOrigin-RevId: 283993903\n\nf337f7fb702c85833b7b6ca56afaf9a1bf32c096\nOSConfig AgentEndpoint: add LookupEffectiveGuestPolicy rpc\n\nPiperOrigin-RevId: 283989762\n\nc0ac9b55f2e2efd0ee525b3a6591a1b09330e55a\nInclude real time feed api into v1 version\n\nPiperOrigin-RevId: 283845474\n\n2427a3a0f6f4222315362d973d91a082a3a884a7\nfirestore admin: update v1 protos with annotations & retry config\n\nPiperOrigin-RevId: 283826605\n\n555e844dbe04af50a8f55fe1217fa9d39a0a80b2\nchore: publish retry configs for iam admin, cloud asset, and remoteworkers\n\nPiperOrigin-RevId: 283801979\n\n6311dc536668849142d1fe5cd9fc46da66d1f77f\nfirestore: update v1beta1 protos with annotations and retry config\n\nPiperOrigin-RevId: 283794315\n\nda0edeeef953b05eb1524d514d2e9842ac2df0fd\nfeat: publish several retry config files for client generation\n\nPiperOrigin-RevId: 283614497\n\n59a78053537e06190f02d0a7ffb792c34e185c5a\nRemoving TODO comment\n\nPiperOrigin-RevId: 283592535\n\n8463992271d162e2aff1d5da5b78db11f2fb5632\nFix bazel build\n\nPiperOrigin-RevId: 283589351\n\n3bfcb3d8df10dfdba58f864d3bdb8ccd69364669\nPublic client library for bebop_jobs_api_20191118_1_RC3 release.\n\nPiperOrigin-RevId: 283568877\n\n27ab0db61021d267c452b34d149161a7bf0d9f57\nfirestore: publish annotated protos and new retry config\n\nPiperOrigin-RevId: 283565148\n\n38dc36a2a43cbab4a2a9183a43dd0441670098a9\nfeat: add http annotations for operations calls\n\nPiperOrigin-RevId: 283384331\n\n366caab94906975af0e17822e372f1d34e319d51\ndatastore: add a legacy artman config for PHP generation\n\nPiperOrigin-RevId: 283378578\n\n82944da21578a53b74e547774cf62ed31a05b841\nMigrate container v1beta1 to GAPIC v2.\n\nPiperOrigin-RevId: 283342796\n\n584dcde5826dd11ebe222016b7b208a4e1196f4b\nRemove resource name annotation for UpdateKeyRequest.key, because it's the resource, not a name.\n\nPiperOrigin-RevId: 283167368\n\n6ab0171e3688bfdcf3dbc4056e2df6345e843565\nAdded resource annotation for Key message.\n\nPiperOrigin-RevId: 283066965\n\n86c1a2db1707a25cec7d92f8850cc915163ec3c3\nExpose Admin API methods for Key manipulation.\n\nPiperOrigin-RevId: 282988776\n\n3ddad085965896ffb205d44cb0c0616fe3def10b\nC++ targets: correct deps so they build, rename them from trace* to cloudtrace*\nto match the proto names.\n\nPiperOrigin-RevId: 282857635\n\ne9389365a971ad6457ceb9646c595e79dfdbdea5\nSynchronize new proto/yaml changes.\n\nPiperOrigin-RevId: 282810797\n\ne42eaaa9abed3c4d63d64f790bd3191448dbbca6\nPut back C++ targets for cloud trace v2 api.\n\nPiperOrigin-RevId: 282803841\n\nd8896a3d8a191702a9e39f29cf4c2e16fa05f76d\nAdd initial BUILD.bazel for secretmanager.googleapis.com\n\nPiperOrigin-RevId: 282674885\n\n2cc56cb83ea3e59a6364e0392c29c9e23ad12c3a\nCreate sample for list recommendations\n\nPiperOrigin-RevId: 282665402\n\nf88e2ca65790e3b44bb3455e4779b41de1bf7136\nbump Go to ga\n\nPiperOrigin-RevId: 282651105\n\naac86d932b3cefd7d746f19def6935d16d6235e0\nDocumentation update. Add location_id in preparation for regionalization.\n\nPiperOrigin-RevId: 282586371\n\n5b501cd384f6b842486bd41acce77854876158e7\nMigrate Datastore Admin to GAPIC v2.\n\nCommitter: @lukesneeringer\nPiperOrigin-RevId: 282570874\n\n6a16d474d5be201b20a27646e2009c4dfde30452\nMigrate Datastore to GAPIC v2.\n\nCommitter: @lukesneeringer\nPiperOrigin-RevId: 282564329\n\n74bd9b95ac8c70b883814e4765a725cffe43d77c\nmark Go lib ga\n\nPiperOrigin-RevId: 282562558\n\nf7b3d434f44f6a77cf6c37cae5474048a0639298\nAdd secretmanager.googleapis.com protos\n\nPiperOrigin-RevId: 282546399\n\nc34a911aaa0660a45f5a556578f764f135e6e060\niot: bump Go GAPIC to GA release level\n\nPiperOrigin-RevId: 282494787\n\n79b7f1c5ba86859dbf70aa6cd546057c1002cdc0\nPut back C++ targets.\nPrevious change overrode custom C++ targets made by external teams. This PR puts those targets back.\n\nPiperOrigin-RevId: 282458292\n\n06a840781d2dc1b0a28e03e30fb4b1bfb0b29d1e\nPopulate BAZEL.build files for around 100 APIs (all APIs we publish) in all 7 langauges.\n\nPiperOrigin-RevId: 282449910\n\n777b580a046c4fa84a35e1d00658b71964120bb0\nCreate BUILD file for recommender v1beta1\n\nPiperOrigin-RevId: 282068850\n\n48b385b6ef71dfe2596490ea34c9a9a434e74243\nGenerate recommender v1beta1 gRPC ServiceConfig file\n\nPiperOrigin-RevId: 282067795\n\n8395b0f1435a4d7ce8737b3b55392627758bd20c\nfix: Set timeout to 25s, because Tasks fails for any deadline above 30s.\n\nCommitter: @lukesneeringer\nPiperOrigin-RevId: 282017295\n\n3ba7ddc4b2acf532bdfb0004ca26311053c11c30\nfix: Shift Ruby and PHP to legacy GAPIC YAMLs for back-compat.\n\nCommitter: @lukesneeringer\nPiperOrigin-RevId: 281852671\n\nad6f0c002194c3ec6c13d592d911d122d2293931\nRemove unneeded yaml files\n\nPiperOrigin-RevId: 281835839\n\n1f42588e4373750588152cdf6f747de1cadbcbef\nrefactor: Migrate Tasks beta 2 to GAPIC v2.\n\nCommitter: @lukesneeringer\nPiperOrigin-RevId: 281769558\n\n902b51f2073e9958a2aba441f7f7ac54ea00966d\nrefactor: Migrate Tasks to GAPIC v2 (for real this time).\n\nCommitter: @lukesneeringer\nPiperOrigin-RevId: 281769522\n\n17561f59970eede87f61ef6e9c322fa1198a2f4d\nMigrate Tasks Beta 3 to GAPIC v2.\n\nCommitter: @lukesneeringer\nPiperOrigin-RevId: 281769519\n\nf95883b15a1ddd58eb7e3583fdefe7b00505faa3\nRegenerate recommender v1beta1 protos and sanitized yaml\n\nPiperOrigin-RevId: 281765245\n\n9a52df54c626b36699a058013d1735a166933167\nadd gRPC ServiceConfig for grafeas v1\n\nPiperOrigin-RevId: 281762754\n\n7a79d682ef40c5ca39c3fca1c0901a8e90021f8a\nfix: Roll back Tasks GAPIC v2 while we investigate C# issue.\n\nCommitter: @lukesneeringer\nPiperOrigin-RevId: 281758548\n\n3fc31491640a90f029f284289e7e97f78f442233\nMigrate Tasks to GAPIC v2.\n\nCommitter: @lukesneeringer\nPiperOrigin-RevId: 281751187\n\n5bc0fecee454f857cec042fb99fe2d22e1bff5bc\nfix: adds operation HTTP rules back to v1p1beta1 config\n\nPiperOrigin-RevId: 281635572\n\n5364a19284a1333b3ffe84e4e78a1919363d9f9c\nbazel: Fix build\n\n1) Update to latest gapic-generator (has iam resource names fix for java).\n2) Fix non-trivial issues with oslogin (resources defined in sibling package to the one they are used from) and monitoring.\n3) Fix trivial missing dependencies in proto_library targets for other apis.\n\nThis is to prepare the repository to being populated with BUILD.bazel files for all supported apis (101 API) in all 7 languages.\n\nPiperOrigin-RevId: 281618750\n\n0aa77cbe45538d5e5739eb637db3f2940b912789\nUpdating common proto files in google/type/ with their latest versions.\n\nPiperOrigin-RevId: 281603926\n\nd47e1b4485b3effbb2298eb10dd13a544c0f66dc\nfix: replace Speech Recognize RPC retry_codes_name for non-standard assignment\n\nPiperOrigin-RevId: 281594037\n\n16543773103e2619d2b5f52456264de5bb9be104\nRegenerating public protos for datacatalog, also adding gRPC service config.\n\nPiperOrigin-RevId: 281423227\n\n328ebe76adb06128d12547ed70107fb841aebf4e\nChange custom data type from String to google.protobuf.Struct to be consistent with other docs such as\nhttps://developers.google.com/actions/smarthome/develop/process-intents#response_format\n\nPiperOrigin-RevId: 281402467\n\n5af83f47b9656261cafcf88b0b3334521ab266b3\n(internal change without visible public changes)\n\nPiperOrigin-RevId: 281334391\n\nc53ed56649583a149382bd88d3c427be475b91b6\nFix typo in protobuf docs.\n\nPiperOrigin-RevId: 281293109\n\nd8dd7fe8d5304f7bd1c52207703d7f27d5328c5a\nFix build by adding missing deps.\n\nPiperOrigin-RevId: 281088257\n\n3ef5ffd7351809d75c1332d2eaad1f24d9c318e4\nMigrate Error Reporting v1beta1 to proto annotations / GAPIC v2.\n\nCommitter: @lukesneeringer\nPiperOrigin-RevId: 281075722\n\n418ee8e24a56b5959e1c1defa4b6c97f883be379\nTrace v2: Add remaining proto annotations, migrate to GAPIC v2.\n\nCommitter: @lukesneeringer\nPiperOrigin-RevId: 281068859\n\nc89394342a9ef70acaf73a6959e04b943fbc817b\nThis change updates an outdated comment for the feature importance proto field since they are no longer in [0, 1] for online predictions.\n\nPiperOrigin-RevId: 280761373\n\n1ec8b8e2c3c8f41d7d2b22c594c025276d6a4ae6\nCode refactoring\n\nPiperOrigin-RevId: 280760149\n\n427a22b04039f93b769d89accd6f487413f667c1\nImport automl operation protos.\n\nPiperOrigin-RevId: 280703572\n\n45749a04dac104e986f6cc47da3baf7c8bb6f9b0\nfix: bigqueryconnection_gapic.yaml to reflect proto annotations\n\n* remove connection_credential resource\n* make CreateCredentialRequest.connection_id optional\n* shuffle field ordering in CreateCredential flattening\n\nPiperOrigin-RevId: 280685438\n\n8385366aa1e5d7796793db02a9c5e167d1fd8f17\nRevert the Trace v2 GAPIC for now.\nCommitter: @lukesneeringer\n\nPiperOrigin-RevId: 280669295\n\n5c8ab2c072d557c2f4c4e54b544394e2d62202d5\nMigrate Trace v1 and Trace v2 to GAPIC v2.\n\nCommitter: @lukesneeringer\nPiperOrigin-RevId: 280667429\n\nf6808ff4e8b966cd571e99279d4a2780ed97dff2\nRename the `endpoint_urls` field to `endpoint_uris` to be consistent with\nGoogle API nomenclature.\n\nPiperOrigin-RevId: 280581337\n\n1935fb8889686f5c9d107f11b3c6870fc3aa7cdc\nComment updates\n\nPiperOrigin-RevId: 280451656\n\n0797fd5b9029d630e68a0899734715d62ad38e33\nComment updates\n\nPiperOrigin-RevId: 280451600\n\n9bc8d07b8b749e791d16c8d559526928ceaf1994\nRollback of \"Migrate Cloud Error Reporting to proto annotations & GAPIC v2.\"\n\nPiperOrigin-RevId: 280445975\n\nf8720321aecf4aab42e03602ac2c67f9777d9170\nfix: bigtable retry config in GAPIC v2\n\nPiperOrigin-RevId: 280434856\n\nb11664ba64f92d96d748e0dd9724d006dcafd120\nMigrate Cloud Error Reporting to proto annotations & GAPIC v2.\n\nPiperOrigin-RevId: 280432937\n\n4f747bda9b099b4426f495985680d16d0227fa5f\n1. Change DataCatalog package name in java from com.google.cloud.datacatalog to com.google.cloud.datacatalog.v1beta1 (API version is included in the package). *This is a breaking change.*\n\n2. Add API for Taxonomies (PolicyTagManager and PolicyTagManagerSerialization services).\n\n3. Minor changes to documentation.\n\nPiperOrigin-RevId: 280394936\n\nbc76ffd87360ce1cd34e3a6eac28afd5e1efda76\nUse rules_proto bzl files to load proto_library\n\nThis makes googleapis forward compatible with Bazel incompatible change https://github.com/bazelbuild/bazel/issues/8922.\n\nThis CL was created by adding @rules_proto to the WORKSPACE file and then running:\n\nfind . -name BUILD.bazel | \\\n while read build; do \\\n buildifier --lint=fix --warnings=load $build; \\\n done\n\nSince buildifier cannot be told not to reformat the BUILD file, some files are reformatted.\n\nPiperOrigin-RevId: 280356106\n\n218164b3deba1075979c9dca5f71461379e42dd1\nMake the `permissions` argument in TestIamPermissions required.\n\nPiperOrigin-RevId: 280279014\n\ndec8fd8ea5dc464496606189ba4b8949188639c8\nUpdating Cloud Billing Budget API documentation for clarity.\n\nPiperOrigin-RevId: 280225437\n\na667ffab90deb5e2669eb40ec7b61ec96a3d0454\nIntroduced detailed status message for CreateTimeSeries: CreateTimeSeriesSummary replaces CreateTimeSeriesError, which is now deprecated and unused.\n\nPiperOrigin-RevId: 280221707\n\nbe0a25eceec8916633447a37af0ecea801b85186\nMigrate Bigtable API to GAPIC v2 config.\n\nPiperOrigin-RevId: 280199643\n\n88bbf96b90089994ed16208a0f38cdd07f743742\nFix location of monitoring.yaml in Artman config for monitoring v3.\n\nPiperOrigin-RevId: 280134477\n\ndbaa01a20303758eed0c5a95ad2239ea306ad9a5\nUpdate namespace for PHP.\n\nPiperOrigin-RevId: 280085199\n\nf73b3796a635b2026a590d5133af7fa1f0eb807b\nStandardize pub/sub client default settings across clients:\n- Add retry codes for streaming pull\n- Decrease publish's max_rpc_timeout (mini-timeout) from 10 mins to 1 min\n- Decrease publish's total timeout from 10 mins to 1 min\n- Increase publish batching threshold from 10 to 100 elements\n- Increase publish batching size threshold from 1 KiB to 1 MiB\n\nPiperOrigin-RevId: 280044012\n\n822172613e1d93bede3beaf78b123c42a5876e2b\nReplace local_repository with http_archive in WORKSPACE\n\nPiperOrigin-RevId: 280039052\n\n6a8c7914d1b79bd832b5157a09a9332e8cbd16d4\nAdded notification_supported_by_agent to indicate whether the agent is sending notifications to Google or not.\n\nPiperOrigin-RevId: 279991530\n\n675de3dc9ab98cc1cf54216ad58c933ede54e915\nAdd an endpoint_urls field to the instance admin proto and adds a field_mask field to the GetInstanceRequest.\n\nPiperOrigin-RevId: 279982263\n\nf69562be0608904932bdcfbc5ad8b9a22d9dceb8\nAdds some clarification to IAM Policy public proto comments about the policy versioning compliance check for etag-less SetIamPolicy requests.\n\nPiperOrigin-RevId: 279774957\n\n4e86b2538758e3155e867d1cb4155ee91de7c6e9\nDocumentation update. Add the new action for sending metrics to Stackdriver.\n\nPiperOrigin-RevId: 279768476\n\neafaf30b7a3af0bc72f323fe6a6827327d3cad75\nfix: Restore deleted field to avoid a breaking change.\n\nPiperOrigin-RevId: 279760458\n\ned13a73f3054a29b764f104feaa503820b75140a\nAdd GAPIC annotations to the GKE API.\n\nPiperOrigin-RevId: 279734275\n\n6b125955bf0d6377b96f205e5d187e9d524b7ea2\nUpdate timeouts to 1 hour for default and streaming RPCs.\n\nPiperOrigin-RevId: 279657866\n\n989b304c8a6cfe72bdd7cb264e0d71b784db9421\nAdd Service Monitoring (Service and ServiceLevelObjective) protocol buffers to Google Cloud Monitoring API.\n\nPiperOrigin-RevId: 279649144\n\n1ef3bed9594674bb571ce20418af307505e3f609\nUpdating configs for AgentEndpoint to fix the client library generation.\n\nPiperOrigin-RevId: 279518887\n\n34e661f58d58fa57da8ed113a3d8bb3de26b307d\nUpdate v1beta2 clusters and jobs to include resource ids in GRPC header.\n\nPiperOrigin-RevId: 279417429\n\n248abde06efb7e5a3d81b84de02c8272122b0c3b\nIntegrate GAPIC Python Bazel Extensions\n\nAlso configure python build for the following clients as an example:\n\ndiaglogflow/v2\nlanguage/v1\ntexttospeech/v1\nfirestore/v1beta1\npubsub/v1\n\nPiperOrigin-RevId: 279406526\n\n7ffbf721e29b8806e0c8947c5dd0cdddc02de72a\nOSConfig Agentendpoint: Rename ReportTaskStart to StartNextTask\n\nPiperOrigin-RevId: 279389774\n\n2642d8688bab8981c8a5153b7578f9ff8460a37c\nAgentendpoint API: minor doc updates, addition of exclusive_packages|patches to PatchConfigs.\n\nPiperOrigin-RevId: 279326626\n\nd323b287c782802242005072d15f1474d7d10819\nDocumentation changes.\n\nPiperOrigin-RevId: 279234903\n\n29927f71d92d59551a42272ab7c6e97e8413af78\nPublishing Billing Budgets v1alpha1 API.\n\nPiperOrigin-RevId: 279176561\n\nff413d36f8358818d76fa92006f2d8f608843093\nAdding gRPC service config for Billing Budgets API.\n\nPiperOrigin-RevId: 279175129\n\n3eb91187709cc96bb890c110f518505f65ffd95d\nagentendpoint: removes all gapic languages except Go from artman config\n\nPiperOrigin-RevId: 279173968\n\na34950f968c7944a1036551b545557edcc18c767\nFix bazel build.\n\nUpdate gapic-generator and protoc-java-resource-name plugin dependencies to the latest versions.\n\nThe following clients remain broken because of bugs in gapic-generator and/or corresponding configs\n\ngoogle/cloud/iot/v1\ngoogle/cloud/oslogin/v1\ngoogle/spanner/admin/instance/v1\ngoogle/cloud/oslogin/v1\n\nPiperOrigin-RevId: 279171061\n\n0ed34e9fdf601dfc37eb24c40e17495b86771ff4\nAdds agentendpoint protos and initial client library config\n\nPiperOrigin-RevId: 279147036\n\ncad1d3b365a90c2a9f014b84a2a1acb55c15480f\nUpdates to MediaCard\n\nPiperOrigin-RevId: 279100776\n\n05556c26b633c153f2eca62aeafbcd62705f41b7\nUpdates to MediaCard\n\nPiperOrigin-RevId: 279100278\n\n2275670a746ab2bc03ebba0d914b45320ea15af4\nSynchronize new proto/yaml changes.\n\nPiperOrigin-RevId: 278922329\n\n5691fcb7c1a926b52577aa1834f31d9c50efda54\nSynchronize new proto/yaml changes.\n\nPiperOrigin-RevId: 278731899\n\ncb542d6f5f1c9431ec4181d9cfd7f8d8c953e60b\nSynchronize new proto/yaml changes.\n\nPiperOrigin-RevId: 278688708\n\n311e73f017a474c9a41f2a41b00d5d704ff191c5\nSynchronize new proto/yaml changes.\n\nPiperOrigin-RevId: 278658917\n\n521ce65c04266df83dde9e2cfd8b2caf057cab45\nSynchronize new proto/yaml changes.\n\nPiperOrigin-RevId: 278656745\n\nf06bab1c11b7a6dcd15c50525da44c4b2ff3ef3d\nSynchronize new proto/yaml changes.\n\nPiperOrigin-RevId: 278627678\n\n8c6569ced063c08a48272de2e887860d0c40d388\nSynchronize new proto/yaml changes.\n\nPiperOrigin-RevId: 278552094\n\n21262f41c4445d24bf441e2a5c250a4207348008\nSynchronize new proto/yaml changes.\n\nPiperOrigin-RevId: 278486499\n\ndf366ed5ee26ebb73511127b4c329a98ecdd1f7b\nSynchronize new proto/yaml changes.\n\nPiperOrigin-RevId: 278469200\n\n58bc0f51b1270975b532f5847d9e9e0ff5cdc592\nSynchronize new proto/yaml changes.\n\nPiperOrigin-RevId: 278368388\n\ne0935db8bfe6fd901ee5d2104b0e1865682899f7\nSynchronize new proto/yaml changes.\n\nPiperOrigin-RevId: 278368327\n\naf4a739e9d810eb033903f1aa44c615ab729760d\nSynchronize new proto/yaml changes.\n\nPiperOrigin-RevId: 278132545\n\naac770126e2def40dcc387f50e8007b21c869e58\nSynchronize new proto/yaml changes.\n\nPiperOrigin-RevId: 278016738\n\n271fed175d16501fb988e02b891166e9718ff141\nSynchronize new proto/yaml changes.\n\nPiperOrigin-RevId: 277992079\n\n597951d86beb120bc18428f70ffe0d5b97c70620\nSynchronize new proto/yaml changes.\n\nPiperOrigin-RevId: 277991975\n\nbba93d7148ff203d400a4929cd0fbc7dafd8dae2\nSynchronize new proto/yaml changes.\n\nPiperOrigin-RevId: 277920288\n\n5b86376273637f5ce3844f29bf8cb1c4aceaea2d\nSynchronize new proto/yaml changes.\n\nPiperOrigin-RevId: 277850256\n\n8bc65fb6973a281e8fb9e5c12080644a550322c9\nSynchronize new proto/yaml changes.\n\nPiperOrigin-RevId: 277813826\n\n30a6ca0f1a98f1777c94fc22094c892c2a43e0ef\nSynchronize new proto/yaml changes.\n\nPiperOrigin-RevId: 277811318\n\n6bef7bd6184390a4e7aa8f09382d7d97afeccfc4\nSynchronize new proto/yaml changes.\n\nPiperOrigin-RevId: 277789040\n\naa33c92d79760f2a03ba9b42f855f7a821ed9147\nSynchronize new proto/yaml changes.\n\nPiperOrigin-RevId: 277759754\n\na4933867265e2b1cbc70f876a4312a92116c36ad\nSynchronize new proto/yaml changes.\n\nPiperOrigin-RevId: 277759298\n\nb21f96290006525e039b9bd1acddeeae407ae1ff\nSynchronize new proto/yaml changes.\n\nPiperOrigin-RevId: 277750396\n\n93661a24048eb64755fbbeedd7f6a207d1b4d8dc\nSynchronize new proto/yaml changes.\n\nPiperOrigin-RevId: 277748718\n\nc0e494ca955a4fdd9ad460a5890a354ec3a3a0ff\nSynchronize new proto/yaml changes.\n\nPiperOrigin-RevId: 277673798\n\n4e952e7e2bb0dd2ef389d552d48f44c8dc4b5f8f\nSynchronize new proto/yaml changes.\n\nPiperOrigin-RevId: 277595731\n\n78883c8de959f7a9870c332ab0e3d788b13dd763\nSynchronize new proto/yaml changes.\n\nPiperOrigin-RevId: 277528057\n\n7c4cf35d5fe3b8ad664bd219edd6d9f28a788b64\nSynchronize new proto/yaml changes.\n\nPiperOrigin-RevId: 277334937\n\nf28342c58c1df57c92e967961e1eaa641d447dde\nSynchronize new proto/yaml changes.\n\nPiperOrigin-RevId: 277311984\n\n" + "sha": "69d9945330a5721cd679f17331a78850e2618226", + "internalRef": "293080182" + } + }, + { + "template": { + "name": "python_split_library", + "origin": "synthtool.gcp", + "version": "2019.10.17" } } ], @@ -29,742 +35,5 @@ "config": "google/cloud/bigquery/artman_bigquery_v2.yaml" } } - ], - "newFiles": [ - { - "path": ".coveragerc" - }, - { - "path": ".flake8" - }, - { - "path": ".gitignore" - }, - { - "path": ".repo-metadata.json" - }, - { - "path": "CHANGELOG.md" - }, - { - "path": "LICENSE" - }, - { - "path": "MANIFEST.in" - }, - { - "path": "README.rst" - }, - { - "path": "benchmark/README.md" - }, - { - "path": "benchmark/benchmark.py" - }, - { - "path": "benchmark/queries.json" - }, - { - "path": "docs/.gitignore" - }, - { - "path": "docs/README.rst" - }, - { - "path": "docs/_static/custom.css" - }, - { - "path": "docs/_templates/layout.html" - }, - { - "path": "docs/changelog.md" - }, - { - "path": "docs/conf.py" - }, - { - "path": "docs/dbapi.rst" - }, - { - "path": "docs/gapic/v2/enums.rst" - }, - { - "path": "docs/gapic/v2/types.rst" - }, - { - "path": "docs/generated/google.cloud.bigquery.magics.html" - }, - { - "path": "docs/index.rst" - }, - { - "path": "docs/magics.rst" - }, - { - "path": "docs/reference.rst" - }, - { - "path": "docs/snippets.py" - }, - { - "path": "docs/usage.html" - }, - { - "path": "docs/usage/client.rst" - }, - { - "path": "docs/usage/datasets.rst" - }, - { - "path": "docs/usage/encryption.rst" - }, - { - "path": "docs/usage/index.rst" - }, - { - "path": "docs/usage/jobs.rst" - }, - { - "path": "docs/usage/pandas.rst" - }, - { - "path": "docs/usage/queries.rst" - }, - { - "path": "docs/usage/tables.rst" - }, - { - "path": "google/__init__.py" - }, - { - "path": "google/cloud/__init__.py" - }, - { - "path": "google/cloud/bigquery/__init__.py" - }, - { - "path": "google/cloud/bigquery/_helpers.py" - }, - { - "path": "google/cloud/bigquery/_http.py" - }, - { - "path": "google/cloud/bigquery/_pandas_helpers.py" - }, - { - "path": "google/cloud/bigquery/client.py" - }, - { - "path": "google/cloud/bigquery/dataset.py" - }, - { - "path": "google/cloud/bigquery/dbapi/__init__.py" - }, - { - "path": "google/cloud/bigquery/dbapi/_helpers.py" - }, - { - "path": "google/cloud/bigquery/dbapi/connection.py" - }, - { - "path": "google/cloud/bigquery/dbapi/cursor.py" - }, - { - "path": "google/cloud/bigquery/dbapi/exceptions.py" - }, - { - "path": "google/cloud/bigquery/dbapi/types.py" - }, - { - "path": "google/cloud/bigquery/encryption_configuration.py" - }, - { - "path": "google/cloud/bigquery/enums.py" - }, - { - "path": "google/cloud/bigquery/external_config.py" - }, - { - "path": "google/cloud/bigquery/job.py" - }, - { - "path": "google/cloud/bigquery/magics.py" - }, - { - "path": "google/cloud/bigquery/model.py" - }, - { - "path": "google/cloud/bigquery/query.py" - }, - { - "path": "google/cloud/bigquery/retry.py" - }, - { - "path": "google/cloud/bigquery/routine.py" - }, - { - "path": "google/cloud/bigquery/schema.py" - }, - { - "path": "google/cloud/bigquery/table.py" - }, - { - "path": "google/cloud/bigquery_v2/__init__.py" - }, - { - "path": "google/cloud/bigquery_v2/gapic/__init__.py" - }, - { - "path": "google/cloud/bigquery_v2/gapic/enums.py" - }, - { - "path": "google/cloud/bigquery_v2/proto/__init__.py" - }, - { - "path": "google/cloud/bigquery_v2/proto/encryption_config.proto" - }, - { - "path": "google/cloud/bigquery_v2/proto/encryption_config_pb2.py" - }, - { - "path": "google/cloud/bigquery_v2/proto/encryption_config_pb2_grpc.py" - }, - { - "path": "google/cloud/bigquery_v2/proto/location_metadata.proto" - }, - { - "path": "google/cloud/bigquery_v2/proto/location_metadata_pb2.py" - }, - { - "path": "google/cloud/bigquery_v2/proto/location_metadata_pb2_grpc.py" - }, - { - "path": "google/cloud/bigquery_v2/proto/model.proto" - }, - { - "path": "google/cloud/bigquery_v2/proto/model_pb2.py" - }, - { - "path": "google/cloud/bigquery_v2/proto/model_pb2_grpc.py" - }, - { - "path": "google/cloud/bigquery_v2/proto/model_reference.proto" - }, - { - "path": "google/cloud/bigquery_v2/proto/model_reference_pb2.py" - }, - { - "path": "google/cloud/bigquery_v2/proto/model_reference_pb2_grpc.py" - }, - { - "path": "google/cloud/bigquery_v2/proto/standard_sql.proto" - }, - { - "path": "google/cloud/bigquery_v2/proto/standard_sql_pb2.py" - }, - { - "path": "google/cloud/bigquery_v2/proto/standard_sql_pb2_grpc.py" - }, - { - "path": "google/cloud/bigquery_v2/types.py" - }, - { - "path": "noxfile.py" - }, - { - "path": "pylint.config.py" - }, - { - "path": "samples/__init__.py" - }, - { - "path": "samples/add_empty_column.py" - }, - { - "path": "samples/browse_table_data.py" - }, - { - "path": "samples/client_list_jobs.py" - }, - { - "path": "samples/client_load_partitioned_table.py" - }, - { - "path": "samples/client_query.py" - }, - { - "path": "samples/client_query_add_column.py" - }, - { - "path": "samples/client_query_batch.py" - }, - { - "path": "samples/client_query_destination_table.py" - }, - { - "path": "samples/client_query_destination_table_cmek.py" - }, - { - "path": "samples/client_query_destination_table_legacy.py" - }, - { - "path": "samples/client_query_dry_run.py" - }, - { - "path": "samples/client_query_legacy_sql.py" - }, - { - "path": "samples/client_query_relax_column.py" - }, - { - "path": "samples/client_query_w_array_params.py" - }, - { - "path": "samples/client_query_w_named_params.py" - }, - { - "path": "samples/client_query_w_positional_params.py" - }, - { - "path": "samples/client_query_w_struct_params.py" - }, - { - "path": "samples/client_query_w_timestamp_params.py" - }, - { - "path": "samples/copy_table.py" - }, - { - "path": "samples/copy_table_cmek.py" - }, - { - "path": "samples/copy_table_multiple_source.py" - }, - { - "path": "samples/create_dataset.py" - }, - { - "path": "samples/create_job.py" - }, - { - "path": "samples/create_routine.py" - }, - { - "path": "samples/create_routine_ddl.py" - }, - { - "path": "samples/create_table.py" - }, - { - "path": "samples/create_table_range_partitioned.py" - }, - { - "path": "samples/dataset_exists.py" - }, - { - "path": "samples/delete_dataset.py" - }, - { - "path": "samples/delete_dataset_labels.py" - }, - { - "path": "samples/delete_model.py" - }, - { - "path": "samples/delete_routine.py" - }, - { - "path": "samples/delete_table.py" - }, - { - "path": "samples/download_public_data.py" - }, - { - "path": "samples/download_public_data_sandbox.py" - }, - { - "path": "samples/get_dataset.py" - }, - { - "path": "samples/get_dataset_labels.py" - }, - { - "path": "samples/get_model.py" - }, - { - "path": "samples/get_routine.py" - }, - { - "path": "samples/get_table.py" - }, - { - "path": "samples/label_dataset.py" - }, - { - "path": "samples/list_datasets.py" - }, - { - "path": "samples/list_datasets_by_label.py" - }, - { - "path": "samples/list_models.py" - }, - { - "path": "samples/list_routines.py" - }, - { - "path": "samples/list_tables.py" - }, - { - "path": "samples/load_table_dataframe.py" - }, - { - "path": "samples/query_external_gcs_temporary_table.py" - }, - { - "path": "samples/query_external_sheets_permanent_table.py" - }, - { - "path": "samples/query_external_sheets_temporary_table.py" - }, - { - "path": "samples/query_no_cache.py" - }, - { - "path": "samples/query_pagination.py" - }, - { - "path": "samples/query_script.py" - }, - { - "path": "samples/query_to_arrow.py" - }, - { - "path": "samples/table_exists.py" - }, - { - "path": "samples/table_insert_rows.py" - }, - { - "path": "samples/table_insert_rows_explicit_none_insert_ids.py" - }, - { - "path": "samples/tests/__init__.py" - }, - { - "path": "samples/tests/conftest.py" - }, - { - "path": "samples/tests/test_add_empty_column.py" - }, - { - "path": "samples/tests/test_browse_table_data.py" - }, - { - "path": "samples/tests/test_client_list_jobs.py" - }, - { - "path": "samples/tests/test_client_load_partitioned_table.py" - }, - { - "path": "samples/tests/test_client_query.py" - }, - { - "path": "samples/tests/test_client_query_add_column.py" - }, - { - "path": "samples/tests/test_client_query_batch.py" - }, - { - "path": "samples/tests/test_client_query_destination_table.py" - }, - { - "path": "samples/tests/test_client_query_destination_table_cmek.py" - }, - { - "path": "samples/tests/test_client_query_destination_table_legacy.py" - }, - { - "path": "samples/tests/test_client_query_dry_run.py" - }, - { - "path": "samples/tests/test_client_query_legacy_sql.py" - }, - { - "path": "samples/tests/test_client_query_relax_column.py" - }, - { - "path": "samples/tests/test_client_query_w_array_params.py" - }, - { - "path": "samples/tests/test_client_query_w_named_params.py" - }, - { - "path": "samples/tests/test_client_query_w_positional_params.py" - }, - { - "path": "samples/tests/test_client_query_w_struct_params.py" - }, - { - "path": "samples/tests/test_client_query_w_timestamp_params.py" - }, - { - "path": "samples/tests/test_copy_table.py" - }, - { - "path": "samples/tests/test_copy_table_cmek.py" - }, - { - "path": "samples/tests/test_copy_table_multiple_source.py" - }, - { - "path": "samples/tests/test_create_dataset.py" - }, - { - "path": "samples/tests/test_create_job.py" - }, - { - "path": "samples/tests/test_create_table.py" - }, - { - "path": "samples/tests/test_create_table_range_partitioned.py" - }, - { - "path": "samples/tests/test_dataset_exists.py" - }, - { - "path": "samples/tests/test_dataset_label_samples.py" - }, - { - "path": "samples/tests/test_delete_dataset.py" - }, - { - "path": "samples/tests/test_delete_table.py" - }, - { - "path": "samples/tests/test_download_public_data.py" - }, - { - "path": "samples/tests/test_download_public_data_sandbox.py" - }, - { - "path": "samples/tests/test_get_dataset.py" - }, - { - "path": "samples/tests/test_get_table.py" - }, - { - "path": "samples/tests/test_list_datasets.py" - }, - { - "path": "samples/tests/test_list_datasets_by_label.py" - }, - { - "path": "samples/tests/test_list_tables.py" - }, - { - "path": "samples/tests/test_load_table_dataframe.py" - }, - { - "path": "samples/tests/test_model_samples.py" - }, - { - "path": "samples/tests/test_query_external_gcs_temporary_table.py" - }, - { - "path": "samples/tests/test_query_external_sheets_permanent_table.py" - }, - { - "path": "samples/tests/test_query_external_sheets_temporary_table.py" - }, - { - "path": "samples/tests/test_query_no_cache.py" - }, - { - "path": "samples/tests/test_query_pagination.py" - }, - { - "path": "samples/tests/test_query_script.py" - }, - { - "path": "samples/tests/test_query_to_arrow.py" - }, - { - "path": "samples/tests/test_routine_samples.py" - }, - { - "path": "samples/tests/test_table_exists.py" - }, - { - "path": "samples/tests/test_table_insert_rows.py" - }, - { - "path": "samples/tests/test_table_insert_rows_explicit_none_insert_ids.py" - }, - { - "path": "samples/tests/test_undelete_table.py" - }, - { - "path": "samples/tests/test_update_dataset_access.py" - }, - { - "path": "samples/tests/test_update_dataset_default_partition_expiration.py" - }, - { - "path": "samples/tests/test_update_dataset_default_table_expiration.py" - }, - { - "path": "samples/tests/test_update_dataset_description.py" - }, - { - "path": "samples/tests/test_update_table_require_partition_filter.py" - }, - { - "path": "samples/undelete_table.py" - }, - { - "path": "samples/update_dataset_access.py" - }, - { - "path": "samples/update_dataset_default_partition_expiration.py" - }, - { - "path": "samples/update_dataset_default_table_expiration.py" - }, - { - "path": "samples/update_dataset_description.py" - }, - { - "path": "samples/update_model.py" - }, - { - "path": "samples/update_routine.py" - }, - { - "path": "samples/update_table_require_partition_filter.py" - }, - { - "path": "setup.cfg" - }, - { - "path": "setup.py" - }, - { - "path": "synth.metadata" - }, - { - "path": "synth.py" - }, - { - "path": "tests/__init__.py" - }, - { - "path": "tests/data/characters.json" - }, - { - "path": "tests/data/characters.jsonl" - }, - { - "path": "tests/data/colors.avro" - }, - { - "path": "tests/data/people.csv" - }, - { - "path": "tests/data/schema.json" - }, - { - "path": "tests/scrub_datasets.py" - }, - { - "path": "tests/system.py" - }, - { - "path": "tests/unit/__init__.py" - }, - { - "path": "tests/unit/enums/__init__.py" - }, - { - "path": "tests/unit/enums/test_standard_sql_data_types.py" - }, - { - "path": "tests/unit/helpers.py" - }, - { - "path": "tests/unit/model/__init__.py" - }, - { - "path": "tests/unit/model/test_model.py" - }, - { - "path": "tests/unit/model/test_model_reference.py" - }, - { - "path": "tests/unit/routine/__init__.py" - }, - { - "path": "tests/unit/routine/test_routine.py" - }, - { - "path": "tests/unit/routine/test_routine_argument.py" - }, - { - "path": "tests/unit/routine/test_routine_reference.py" - }, - { - "path": "tests/unit/test__helpers.py" - }, - { - "path": "tests/unit/test__http.py" - }, - { - "path": "tests/unit/test__pandas_helpers.py" - }, - { - "path": "tests/unit/test_client.py" - }, - { - "path": "tests/unit/test_dataset.py" - }, - { - "path": "tests/unit/test_dbapi__helpers.py" - }, - { - "path": "tests/unit/test_dbapi_connection.py" - }, - { - "path": "tests/unit/test_dbapi_cursor.py" - }, - { - "path": "tests/unit/test_dbapi_types.py" - }, - { - "path": "tests/unit/test_encryption_configuration.py" - }, - { - "path": "tests/unit/test_external_config.py" - }, - { - "path": "tests/unit/test_job.py" - }, - { - "path": "tests/unit/test_magics.py" - }, - { - "path": "tests/unit/test_query.py" - }, - { - "path": "tests/unit/test_retry.py" - }, - { - "path": "tests/unit/test_schema.py" - }, - { - "path": "tests/unit/test_signature_compatibility.py" - }, - { - "path": "tests/unit/test_table.py" - } ] } \ No newline at end of file diff --git a/packages/google-cloud-bigquery/synth.py b/packages/google-cloud-bigquery/synth.py index a20426d3910f..ad6f942139aa 100644 --- a/packages/google-cloud-bigquery/synth.py +++ b/packages/google-cloud-bigquery/synth.py @@ -14,11 +14,13 @@ """This script is used to synthesize generated parts of this library.""" +import os + import synthtool as s from synthtool import gcp gapic = gcp.GAPICGenerator() - +common = gcp.CommonTemplates() version = 'v2' library = gapic.py_library( @@ -57,4 +59,12 @@ # Format quoted strings as plain text. s.replace("google/cloud/bigquery_v2/proto/*.py", "[“”]", '``') +# ---------------------------------------------------------------------------- +# Add templated files +# ---------------------------------------------------------------------------- +templated_files = common.py_library(cov_level=100) +# we do not want to override the custom noxfile with the generated one +os.remove(os.path.join(templated_files, "noxfile.py")) +s.move(templated_files) + s.shell.run(["nox", "-s", "blacken"], hide_output=False) diff --git a/packages/google-cloud-bigquery/test_utils/credentials.json.enc b/packages/google-cloud-bigquery/test_utils/credentials.json.enc new file mode 100644 index 000000000000..f073c7e4f774 --- /dev/null +++ b/packages/google-cloud-bigquery/test_utils/credentials.json.enc @@ -0,0 +1,49 @@ +U2FsdGVkX1/vVm/dOEg1DCACYbdOcL+ey6+64A+DZGZVgF8Z/3skK6rpPocu6GOA +UZAqASsBH9QifDf8cKVXQXVYpYq6HSv2O0w7vOmVorZO9GYPo98s9/8XO+4ty/AU +aB6TD68frBAYv4cT/l5m7aYdzfzMTy0EOXoleZT09JYP3B5FV3KCO114FzMXGwrj +HXsR6E5SyUUlUnWPC3eD3aqmovay0gxOKYO3ZwjFK1nlbN/8q6/8nwBCf/Bg6SHV +V93pNxdolRlJev9kgKz4RN1z4jGCy5PAndhSLE82NFIs9LoAiEOU5YeMlN+Ulqus +J92nh+ptUe9a4pJGbAuveUWO7zdS1QyXvTMUcmmSfXCNm/eIQjNuu5+rHtIjWKh8 +Ilwj2w1aTfSptQEhk/kwRgFz/d11vfwJzvwTmCxO6zyOeL0VUWLqdCBGgG5As9He +/RenF8PZ1O0WbTt7fns5oTlTk/MUo+0xJ1xqvu/y45LaqqcBAnEdrWKmtM3dJHWv +ufQku+kD+83F/VwBnQdvgMHu6KZEs6LRrNo58r4QuK6fS7VCACdzxID1RM2cL7kT +6BFRlyGj1aigmjne9g9M9Jx4R+mZDpPU1WDzzG71J4qCUwaX8Dfwutuv4uiFvzwq +NUF0wLJJPtKWmtW+hnZ/fhHQGCRsOpZzFnqp6Zv7J7k6esqxMgIjfal7Djk5Acy8 +j3iVvm6CYmKMVqzL62JHYS9Ye83tzBCaR8hpnJQKgH3FSOFY8HSwrtQSIsl/hSeF +41sgnz0Y+/gkzNeU18qFk+eCZmvljyu+JK0nPYUgpOCJYVBNQpNHz5PUyiAEKhtM +IOSdjPRW1Y+Xf4RroJnLPoF24Ijwrow5LCm9hBRY6TPPMMmnIXCd23xcLJ1rMj6g +x4ZikElans+cwuc9wtbb7w01DcpTwQ1+eIV1qV+KIgpnLjRGLhZD4etobBsrwYu/ +vnIwy2QHCKENPb8sbdgp7x2mF7VSX0/7tf+9+i70EBiMzpOKBkiZhtLzm6hOBkEy +ODaWrx4lTTwbSw8Rmtf58APhPFMsjHoNsjiUoK249Y8Y2Ff4fMfqYsXu6VC1n/At +CuWYHc3EfBwFcLJS+RQB9kFk/4FygFBWq4Kj0MqoRruLbKmoGeJKH9q35W0f0NCD +j+iHt3014kMGiuyJe1UDQ6fvEihFFdHuDivFpPAXDt4PTY/WtpDhaGMx23kb54pK +jkAuxpznAB1lK3u9bGRXDasGeHIrNtIlPvgkrWHXvoBVqM7zry8TGtoxp3E3I42Z +cUfDWfB9GqVdrOwvrTzyZsl2uShRkAJaZFZj5aMyYxiptp4gM8CwWiNtOd2EwtRO +LxZX4M02PQFIqXV3FSDA0q6EwglUrTZdAlYeOEkopaKCtG31dEPOSQG3NGJAEYso +Cxm99H7970dp0OAgpNSgRbcWDbhVbQXnRzvFGqLeH6a9dQ/a8uD3s8Qm9Du/kB6d +XxTRe2OGxzcD0AgI8GClE4rIZHCLbcwuJRp0EYcN+pgY80O4U98fZ5RYpU6OYbU/ +MEiaBYFKtZtGkV6AQD568V7hHJWqc5DDfVHUQ/aeQwnKi2vnU66u+nnV2rZxXxLP ++dqeLRpul+wKa5b/Z5SfQ14Ff8s7aVyxaogGpyggyPL1vyq4KWZ6Or/wEE5hgNO4 +kBh6ht0QT1Hti8XY2JK1M+Jgbjgcg4jkHBGVqegrG1Rvcc2A4TYKwx+QMSBhyxrU +5qhROjS4lTcC42hQslMUkUwc4U/Y91XdFbOOnaAkwzI36NRYL0pmgZnYxGJZeRvr +E5foOhnOEVSFGdOkLfFh+FkWZQf56Lmn8Gg2wHE3dZTxLHibiUYfkgOr1uEosq29 +D1NstvlJURPQ0Q+8QQNWcl9nEZHMAjOmnL1hbx+QfuC6seucp+sXGzdZByMLZbvT +tG8KNL293CmyQowgf9MXToWYnwRkcvqfTaKyor2Ggze3JtoFW4t0j4DI1XPciZFX +XmfApHrzdB/bZadzxyaZ2NE0CuH9zDelwI6rz38xsN5liYnp5qmNKVCZVOHccXa6 +J8x365m5/VaaA2RrtdPqKxn8VaKy7+T690QgMXVGM4PbzQzQxHuSleklocqlP+sB +jSMXCZY+ng/i4UmRO9noiyW3UThYh0hIdMYs12EmmI9cnF/OuYZpl30fmqwV+VNM +td5B2fYvAvvsjiX60SFCn3DATP1GrPMBlZSmhhP3GYS+xrWt3Xxta9qIX2BEF1Gg +twnZZRjoULSRFUYPfJPEOfEH2UQwm84wxx/GezVE+S/RpBlatPOgCiLnNNaLfdTC +mTG9qY9elJv3GGQO8Lqgf4i8blExs05lSPk1BDhzTB6H9TLz+Ge0/l1QxKf3gPXU +aImK1azieXMXHECkdKxrzmehwu1dZ/oYOLc/OFQCETwSRoLPFOFpYUpizwmVVHR6 +uLSfRptte4ZOU3zHfpd/0+J4tkwHwEkGzsmMdqudlm7qME6upuIplyVBH8JiXzUK +n1RIH/OPmVEluAnexWRLZNdk7MrakIO4XACVbICENiYQgAIErP568An6twWEGDbZ +bEN64E3cVDTDRPRAunIhhsEaapcxpFEPWlHorxv36nMUt0R0h0bJlCu5QdzckfcX +ZrRuu1kl76ZfbSE8T0G4/rBb9gsU4Gn3WyvLIO3MgFBuxR68ZwcR8LpEUd8qp38H +NG4cxPmN1nGKo663Z+xI2Gt5up4gpl+fOt4mXqxY386rB7yHaOfElMG5TUYdrS9w +1xbbCVgeJ6zxX+NFlndG33cSAPprhw+C18eUu6ZU63WZcYFo3GfK6rs3lvYtofvE +8DxztdTidQedNVNE+63YCjhxd/cZUI5n/UpgYkr9owp7hNGJiR3tdoNLR2gcoGqL +qWhH928k2aSgF2j97LZ2OqoPCp0tUB7ho4jD2u4Ik3GLVNlCc3dCvWRvpHtDTQDv +tujESMfHUc9I2r4S/PD3bku/ABGwa977Yp1PjzJGr9RajA5is5n6GVpyynwjtKG4 +iyyITpdwpCgr8pueTBLwZnas3slmiMOog/E4PmPgctHzvC+vhQijhUtw5zSsmv0l +bZlw/mVhp5Ta7dTcLBKR8DA3m3vTbaEGkz0xpfQr7GfiSMRbJyvIw88pDK0gyTMD diff --git a/packages/google-cloud-bigquery/test_utils/scripts/circleci/get_tagged_package.py b/packages/google-cloud-bigquery/test_utils/scripts/circleci/get_tagged_package.py new file mode 100644 index 000000000000..c148b9dc2370 --- /dev/null +++ b/packages/google-cloud-bigquery/test_utils/scripts/circleci/get_tagged_package.py @@ -0,0 +1,64 @@ +# Copyright 2016 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Helper to determine package from tag. +Get the current package directory corresponding to the Circle Tag. +""" + +from __future__ import print_function + +import os +import re +import sys + + +TAG_RE = re.compile(r""" + ^ + (?P + (([a-z]+)[_-])*) # pkg-name-with-hyphens-or-underscores (empty allowed) + ([0-9]+)\.([0-9]+)\.([0-9]+) # Version x.y.z (x, y, z all ints) + $ +""", re.VERBOSE) +TAG_ENV = 'CIRCLE_TAG' +ERROR_MSG = '%s env. var. not set' % (TAG_ENV,) +BAD_TAG_MSG = 'Invalid tag name: %s. Expected pkg-name-x.y.z' +CIRCLE_CI_SCRIPTS_DIR = os.path.dirname(__file__) +ROOT_DIR = os.path.realpath( + os.path.join(CIRCLE_CI_SCRIPTS_DIR, '..', '..', '..')) + + +def main(): + """Get the current package directory. + Prints the package directory out so callers can consume it. + """ + if TAG_ENV not in os.environ: + print(ERROR_MSG, file=sys.stderr) + sys.exit(1) + + tag_name = os.environ[TAG_ENV] + match = TAG_RE.match(tag_name) + if match is None: + print(BAD_TAG_MSG % (tag_name,), file=sys.stderr) + sys.exit(1) + + pkg_name = match.group('pkg') + if pkg_name is None: + print(ROOT_DIR) + else: + pkg_dir = pkg_name.rstrip('-').replace('-', '_') + print(os.path.join(ROOT_DIR, pkg_dir)) + + +if __name__ == '__main__': + main() diff --git a/packages/google-cloud-bigquery/test_utils/scripts/circleci/twine_upload.sh b/packages/google-cloud-bigquery/test_utils/scripts/circleci/twine_upload.sh new file mode 100755 index 000000000000..23a4738e90b9 --- /dev/null +++ b/packages/google-cloud-bigquery/test_utils/scripts/circleci/twine_upload.sh @@ -0,0 +1,36 @@ +#!/bin/bash + +# Copyright 2016 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +set -ev + +# If this is not a CircleCI tag, no-op. +if [[ -z "$CIRCLE_TAG" ]]; then + echo "This is not a release tag. Doing nothing." + exit 0 +fi + +# H/T: http://stackoverflow.com/a/246128/1068170 +SCRIPT="$(dirname "${BASH_SOURCE[0]}")/get_tagged_package.py" +# Determine the package directory being deploying on this tag. +PKG_DIR="$(python ${SCRIPT})" + +# Ensure that we have the latest versions of Twine, Wheel, and Setuptools. +python3 -m pip install --upgrade twine wheel setuptools + +# Move into the package, build the distribution and upload. +cd ${PKG_DIR} +python3 setup.py sdist bdist_wheel +twine upload dist/* diff --git a/packages/google-cloud-bigquery/test_utils/scripts/get_target_packages.py b/packages/google-cloud-bigquery/test_utils/scripts/get_target_packages.py new file mode 100644 index 000000000000..1d51830cc23a --- /dev/null +++ b/packages/google-cloud-bigquery/test_utils/scripts/get_target_packages.py @@ -0,0 +1,268 @@ +# Copyright 2017 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Print a list of packages which require testing.""" + +import os +import re +import subprocess +import warnings + + +CURRENT_DIR = os.path.realpath(os.path.dirname(__file__)) +BASE_DIR = os.path.realpath(os.path.join(CURRENT_DIR, '..', '..')) +GITHUB_REPO = os.environ.get('GITHUB_REPO', 'google-cloud-python') +CI = os.environ.get('CI', '') +CI_BRANCH = os.environ.get('CIRCLE_BRANCH') +CI_PR = os.environ.get('CIRCLE_PR_NUMBER') +CIRCLE_TAG = os.environ.get('CIRCLE_TAG') +head_hash, head_name = subprocess.check_output(['git', 'show-ref', 'HEAD'] +).strip().decode('ascii').split() +rev_parse = subprocess.check_output( + ['git', 'rev-parse', '--abbrev-ref', 'HEAD'] +).strip().decode('ascii') +MAJOR_DIV = '#' * 78 +MINOR_DIV = '#' + '-' * 77 + +# NOTE: This reg-ex is copied from ``get_tagged_packages``. +TAG_RE = re.compile(r""" + ^ + (?P + (([a-z]+)-)*) # pkg-name-with-hyphens- (empty allowed) + ([0-9]+)\.([0-9]+)\.([0-9]+) # Version x.y.z (x, y, z all ints) + $ +""", re.VERBOSE) + +# This is the current set of dependencies by package. +# As of this writing, the only "real" dependency is that of error_reporting +# (on logging), the rest are just system test dependencies. +PKG_DEPENDENCIES = { + 'logging': {'pubsub'}, +} + + +def get_baseline(): + """Return the baseline commit. + + On a pull request, or on a branch, return the common parent revision + with the master branch. + + Locally, return a value pulled from environment variables, or None if + the environment variables are not set. + + On a push to master, return None. This will effectively cause everything + to be considered to be affected. + """ + + # If this is a pull request or branch, return the tip for master. + # We will test only packages which have changed since that point. + ci_non_master = (CI == 'true') and any([CI_BRANCH != 'master', CI_PR]) + + if ci_non_master: + + repo_url = 'git@github.com:GoogleCloudPlatform/{}'.format(GITHUB_REPO) + subprocess.run(['git', 'remote', 'add', 'baseline', repo_url], + stderr=subprocess.DEVNULL) + subprocess.run(['git', 'pull', 'baseline'], stderr=subprocess.DEVNULL) + + if CI_PR is None and CI_BRANCH is not None: + output = subprocess.check_output([ + 'git', 'merge-base', '--fork-point', + 'baseline/master', CI_BRANCH]) + return output.strip().decode('ascii') + + return 'baseline/master' + + # If environment variables are set identifying what the master tip is, + # use that. + if os.environ.get('GOOGLE_CLOUD_TESTING_REMOTE', ''): + remote = os.environ['GOOGLE_CLOUD_TESTING_REMOTE'] + branch = os.environ.get('GOOGLE_CLOUD_TESTING_BRANCH', 'master') + return '%s/%s' % (remote, branch) + + # If we are not in CI and we got this far, issue a warning. + if not CI: + warnings.warn('No baseline could be determined; this means tests ' + 'will run for every package. If this is local ' + 'development, set the $GOOGLE_CLOUD_TESTING_REMOTE ' + 'environment variable.') + + # That is all we can do; return None. + return None + + +def get_changed_files(): + """Return a list of files that have been changed since the baseline. + + If there is no base, return None. + """ + # Get the baseline, and fail quickly if there is no baseline. + baseline = get_baseline() + print('# Baseline commit: {}'.format(baseline)) + if not baseline: + return None + + # Return a list of altered files. + try: + return subprocess.check_output([ + 'git', 'diff', '--name-only', '{}..HEAD'.format(baseline), + ], stderr=subprocess.DEVNULL).decode('utf8').strip().split('\n') + except subprocess.CalledProcessError: + warnings.warn('Unable to perform git diff; falling back to assuming ' + 'all packages have changed.') + return None + + +def reverse_map(dict_of_sets): + """Reverse a map of one-to-many. + + So the map:: + + { + 'A': {'B', 'C'}, + 'B': {'C'}, + } + + becomes + + { + 'B': {'A'}, + 'C': {'A', 'B'}, + } + + Args: + dict_of_sets (dict[set]): A dictionary of sets, mapping + one value to many. + + Returns: + dict[set]: The reversed map. + """ + result = {} + for key, values in dict_of_sets.items(): + for value in values: + result.setdefault(value, set()).add(key) + + return result + +def get_changed_packages(file_list): + """Return a list of changed packages based on the provided file list. + + If the file list is None, then all packages should be considered to be + altered. + """ + # Determine a complete list of packages. + all_packages = set() + for file_ in os.listdir(BASE_DIR): + abs_file = os.path.realpath(os.path.join(BASE_DIR, file_)) + nox_file = os.path.join(abs_file, 'nox.py') + if os.path.isdir(abs_file) and os.path.isfile(nox_file): + all_packages.add(file_) + + # If ther is no file list, send down the full package set. + if file_list is None: + return all_packages + + # Create a set based on the list of changed files. + answer = set() + reverse_deps = reverse_map(PKG_DEPENDENCIES) + for file_ in file_list: + # Ignore root directory changes (setup.py, .gitignore, etc.). + if os.path.sep not in file_: + continue + + # Ignore changes that are not in a package (usually this will be docs). + package = file_.split(os.path.sep, 1)[0] + if package not in all_packages: + continue + + # If there is a change in core, short-circuit now and return + # everything. + if package in ('core',): + return all_packages + + # Add the package, as well as any dependencies this package has. + # NOTE: For now, dependencies only go down one level. + answer.add(package) + answer = answer.union(reverse_deps.get(package, set())) + + # We got this far without being short-circuited; return the final answer. + return answer + + +def get_tagged_package(): + """Return the package corresponding to the current tag. + + If there is not tag, will return :data:`None`. + """ + if CIRCLE_TAG is None: + return + + match = TAG_RE.match(CIRCLE_TAG) + if match is None: + return + + pkg_name = match.group('pkg') + if pkg_name == '': + # NOTE: This corresponds to the "umbrella" tag. + return + + return pkg_name.rstrip('-').replace('-', '_') + + +def get_target_packages(): + """Return a list of target packages to be run in the current build. + + If in a tag build, will run only the package(s) that are tagged, otherwise + will run the packages that have file changes in them (or packages that + depend on those). + """ + tagged_package = get_tagged_package() + if tagged_package is None: + file_list = get_changed_files() + print(MAJOR_DIV) + print('# Changed files:') + print(MINOR_DIV) + for file_ in file_list or (): + print('# {}'.format(file_)) + for package in sorted(get_changed_packages(file_list)): + yield package + else: + yield tagged_package + + +def main(): + print(MAJOR_DIV) + print('# Environment') + print(MINOR_DIV) + print('# CircleCI: {}'.format(CI)) + print('# CircleCI branch: {}'.format(CI_BRANCH)) + print('# CircleCI pr: {}'.format(CI_PR)) + print('# CircleCI tag: {}'.format(CIRCLE_TAG)) + print('# HEAD ref: {}'.format(head_hash)) + print('# {}'.format(head_name)) + print('# Git branch: {}'.format(rev_parse)) + print(MAJOR_DIV) + + packages = list(get_target_packages()) + + print(MAJOR_DIV) + print('# Target packages:') + print(MINOR_DIV) + for package in packages: + print(package) + print(MAJOR_DIV) + + +if __name__ == '__main__': + main() diff --git a/packages/google-cloud-bigquery/test_utils/scripts/get_target_packages_kokoro.py b/packages/google-cloud-bigquery/test_utils/scripts/get_target_packages_kokoro.py new file mode 100644 index 000000000000..27d3a0c940ea --- /dev/null +++ b/packages/google-cloud-bigquery/test_utils/scripts/get_target_packages_kokoro.py @@ -0,0 +1,98 @@ +# Copyright 2017 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Print a list of packages which require testing.""" + +import pathlib +import subprocess + +import ci_diff_helper +import requests + + +def print_environment(environment): + print("-> CI environment:") + print('Branch', environment.branch) + print('PR', environment.pr) + print('In PR', environment.in_pr) + print('Repo URL', environment.repo_url) + if environment.in_pr: + print('PR Base', environment.base) + + +def get_base(environment): + if environment.in_pr: + return environment.base + else: + # If we're not in a PR, just calculate the changes between this commit + # and its parent. + return 'HEAD~1' + + +def get_changed_files_from_base(base): + return subprocess.check_output([ + 'git', 'diff', '--name-only', f'{base}..HEAD', + ], stderr=subprocess.DEVNULL).decode('utf8').strip().split('\n') + + +_URL_TEMPLATE = ( + 'https://api.github.com/repos/googleapis/google-cloud-python/pulls/' + '{}/files' +) + + +def get_changed_files_from_pr(pr): + url = _URL_TEMPLATE.format(pr) + while url is not None: + response = requests.get(url) + for info in response.json(): + yield info['filename'] + url = response.links.get('next', {}).get('url') + + +def determine_changed_packages(changed_files): + packages = [ + path.parent for path in pathlib.Path('.').glob('*/noxfile.py') + ] + + changed_packages = set() + for file in changed_files: + file = pathlib.Path(file) + for package in packages: + if package in file.parents: + changed_packages.add(package) + + return changed_packages + + +def main(): + environment = ci_diff_helper.get_config() + print_environment(environment) + base = get_base(environment) + + if environment.in_pr: + changed_files = list(get_changed_files_from_pr(environment.pr)) + else: + changed_files = get_changed_files_from_base(base) + + packages = determine_changed_packages(changed_files) + + print(f"Comparing against {base}.") + print("-> Changed packages:") + + for package in packages: + print(package) + + +main() diff --git a/packages/google-cloud-bigquery/test_utils/scripts/run_emulator.py b/packages/google-cloud-bigquery/test_utils/scripts/run_emulator.py new file mode 100644 index 000000000000..287b08640691 --- /dev/null +++ b/packages/google-cloud-bigquery/test_utils/scripts/run_emulator.py @@ -0,0 +1,199 @@ +# Copyright 2016 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Run system tests locally with the emulator. + +First makes system calls to spawn the emulator and get the local environment +variable needed for it. Then calls the system tests. +""" + + +import argparse +import os +import subprocess + +import psutil + +from google.cloud.environment_vars import BIGTABLE_EMULATOR +from google.cloud.environment_vars import GCD_DATASET +from google.cloud.environment_vars import GCD_HOST +from google.cloud.environment_vars import PUBSUB_EMULATOR +from run_system_test import run_module_tests + + +BIGTABLE = 'bigtable' +DATASTORE = 'datastore' +PUBSUB = 'pubsub' +PACKAGE_INFO = { + BIGTABLE: (BIGTABLE_EMULATOR,), + DATASTORE: (GCD_DATASET, GCD_HOST), + PUBSUB: (PUBSUB_EMULATOR,), +} +EXTRA = { + DATASTORE: ('--no-legacy',), +} +_DS_READY_LINE = '[datastore] Dev App Server is now running.\n' +_PS_READY_LINE_PREFIX = '[pubsub] INFO: Server started, listening on ' +_BT_READY_LINE_PREFIX = '[bigtable] Cloud Bigtable emulator running on ' + + +def get_parser(): + """Get simple ``argparse`` parser to determine package. + + :rtype: :class:`argparse.ArgumentParser` + :returns: The parser for this script. + """ + parser = argparse.ArgumentParser( + description='Run google-cloud system tests against local emulator.') + parser.add_argument('--package', dest='package', + choices=sorted(PACKAGE_INFO.keys()), + default=DATASTORE, help='Package to be tested.') + return parser + + +def get_start_command(package): + """Get command line arguments for starting emulator. + + :type package: str + :param package: The package to start an emulator for. + + :rtype: tuple + :returns: The arguments to be used, in a tuple. + """ + result = ('gcloud', 'beta', 'emulators', package, 'start') + extra = EXTRA.get(package, ()) + return result + extra + + +def get_env_init_command(package): + """Get command line arguments for getting emulator env. info. + + :type package: str + :param package: The package to get environment info for. + + :rtype: tuple + :returns: The arguments to be used, in a tuple. + """ + result = ('gcloud', 'beta', 'emulators', package, 'env-init') + extra = EXTRA.get(package, ()) + return result + extra + + +def datastore_wait_ready(popen): + """Wait until the datastore emulator is ready to use. + + :type popen: :class:`subprocess.Popen` + :param popen: An open subprocess to interact with. + """ + emulator_ready = False + while not emulator_ready: + emulator_ready = popen.stderr.readline() == _DS_READY_LINE + + +def wait_ready_prefix(popen, prefix): + """Wait until the a process encounters a line with matching prefix. + + :type popen: :class:`subprocess.Popen` + :param popen: An open subprocess to interact with. + + :type prefix: str + :param prefix: The prefix to match + """ + emulator_ready = False + while not emulator_ready: + emulator_ready = popen.stderr.readline().startswith(prefix) + + +def wait_ready(package, popen): + """Wait until the emulator is ready to use. + + :type package: str + :param package: The package to check if ready. + + :type popen: :class:`subprocess.Popen` + :param popen: An open subprocess to interact with. + + :raises: :class:`KeyError` if the ``package`` is not among + ``datastore``, ``pubsub`` or ``bigtable``. + """ + if package == DATASTORE: + datastore_wait_ready(popen) + elif package == PUBSUB: + wait_ready_prefix(popen, _PS_READY_LINE_PREFIX) + elif package == BIGTABLE: + wait_ready_prefix(popen, _BT_READY_LINE_PREFIX) + else: + raise KeyError('Package not supported', package) + + +def cleanup(pid): + """Cleanup a process (including all of its children). + + :type pid: int + :param pid: Process ID. + """ + proc = psutil.Process(pid) + for child_proc in proc.children(recursive=True): + try: + child_proc.kill() + child_proc.terminate() + except psutil.NoSuchProcess: + pass + proc.terminate() + proc.kill() + + +def run_tests_in_emulator(package): + """Spawn an emulator instance and run the system tests. + + :type package: str + :param package: The package to run system tests against. + """ + # Make sure this package has environment vars to replace. + env_vars = PACKAGE_INFO[package] + + start_command = get_start_command(package) + # Ignore stdin and stdout, don't pollute the user's output with them. + proc_start = subprocess.Popen(start_command, stdout=subprocess.PIPE, + stderr=subprocess.PIPE) + try: + wait_ready(package, proc_start) + env_init_command = get_env_init_command(package) + proc_env = subprocess.Popen(env_init_command, stdout=subprocess.PIPE, + stderr=subprocess.PIPE) + env_status = proc_env.wait() + if env_status != 0: + raise RuntimeError(env_status, proc_env.stderr.read()) + env_lines = proc_env.stdout.read().strip().split('\n') + # Set environment variables before running the system tests. + for env_var in env_vars: + line_prefix = 'export ' + env_var + '=' + value, = [line.split(line_prefix, 1)[1] for line in env_lines + if line.startswith(line_prefix)] + os.environ[env_var] = value + run_module_tests(package, + ignore_requirements=True) + finally: + cleanup(proc_start.pid) + + +def main(): + """Main method to run this script.""" + parser = get_parser() + args = parser.parse_args() + run_tests_in_emulator(args.package) + + +if __name__ == '__main__': + main() diff --git a/packages/google-cloud-bigquery/test_utils/scripts/update_docs.sh b/packages/google-cloud-bigquery/test_utils/scripts/update_docs.sh new file mode 100755 index 000000000000..8cbab9f0dad0 --- /dev/null +++ b/packages/google-cloud-bigquery/test_utils/scripts/update_docs.sh @@ -0,0 +1,93 @@ +#!/bin/bash + +# Copyright 2016 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +set -ev + +GH_OWNER='GoogleCloudPlatform' +GH_PROJECT_NAME='google-cloud-python' + +DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" + +# Function to build the docs. +function build_docs { + rm -rf docs/_build/ + rm -f docs/bigquery/generated/*.rst + # -W -> warnings as errors + # -T -> show full traceback on exception + # -N -> no color + sphinx-build \ + -W -T -N \ + -b html \ + -d docs/_build/doctrees \ + docs/ \ + docs/_build/html/ + return $? +} + +# Only update docs if we are on CircleCI. +if [[ "${CIRCLE_BRANCH}" == "master" ]] && [[ -z "${CIRCLE_PR_NUMBER}" ]]; then + echo "Building new docs on a merged commit." +elif [[ "$1" == "kokoro" ]]; then + echo "Building and publishing docs on Kokoro." +elif [[ -n "${CIRCLE_TAG}" ]]; then + echo "Building new docs on a tag (but will not deploy)." + build_docs + exit $? +else + echo "Not on master nor a release tag." + echo "Building new docs for testing purposes, but not deploying." + build_docs + exit $? +fi + +# Adding GitHub pages branch. `git submodule add` checks it +# out at HEAD. +GH_PAGES_DIR='ghpages' +git submodule add -q -b gh-pages \ + "git@github.com:${GH_OWNER}/${GH_PROJECT_NAME}" ${GH_PAGES_DIR} + +# Determine if we are building a new tag or are building docs +# for master. Then build new docs in docs/_build from master. +if [[ -n "${CIRCLE_TAG}" ]]; then + # Sphinx will use the package version by default. + build_docs +else + SPHINX_RELEASE=$(git log -1 --pretty=%h) build_docs +fi + +# Update gh-pages with the created docs. +cd ${GH_PAGES_DIR} +git rm -fr latest/ +cp -R ../docs/_build/html/ latest/ + +# Update the files push to gh-pages. +git add . +git status + +# If there are no changes, just exit cleanly. +if [[ -z "$(git status --porcelain)" ]]; then + echo "Nothing to commit. Exiting without pushing changes." + exit +fi + +# Commit to gh-pages branch to apply changes. +git config --global user.email "dpebot@google.com" +git config --global user.name "dpebot" +git commit -m "Update docs after merge to master." + +# NOTE: This may fail if two docs updates (on merges to master) +# happen in close proximity. +git push -q origin HEAD:gh-pages diff --git a/packages/google-cloud-bigquery/test_utils/setup.py b/packages/google-cloud-bigquery/test_utils/setup.py new file mode 100644 index 000000000000..8e9222a7f862 --- /dev/null +++ b/packages/google-cloud-bigquery/test_utils/setup.py @@ -0,0 +1,64 @@ +# Copyright 2017 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os + +from setuptools import find_packages +from setuptools import setup + + +PACKAGE_ROOT = os.path.abspath(os.path.dirname(__file__)) + + +# NOTE: This is duplicated throughout and we should try to +# consolidate. +SETUP_BASE = { + 'author': 'Google Cloud Platform', + 'author_email': 'googleapis-publisher@google.com', + 'scripts': [], + 'url': 'https://github.com/GoogleCloudPlatform/google-cloud-python', + 'license': 'Apache 2.0', + 'platforms': 'Posix; MacOS X; Windows', + 'include_package_data': True, + 'zip_safe': False, + 'classifiers': [ + 'Development Status :: 4 - Beta', + 'Intended Audience :: Developers', + 'License :: OSI Approved :: Apache Software License', + 'Operating System :: OS Independent', + 'Programming Language :: Python :: 2', + 'Programming Language :: Python :: 2.7', + 'Programming Language :: Python :: 3', + 'Programming Language :: Python :: 3.5', + 'Programming Language :: Python :: 3.6', + 'Programming Language :: Python :: 3.7', + 'Topic :: Internet', + ], +} + + +REQUIREMENTS = [ + 'google-auth >= 0.4.0', + 'six', +] + +setup( + name='google-cloud-testutils', + version='0.24.0', + description='System test utilities for google-cloud-python', + packages=find_packages(), + install_requires=REQUIREMENTS, + python_requires='>=2.7,!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*', + **SETUP_BASE +) diff --git a/packages/google-cloud-bigquery/test_utils/test_utils/__init__.py b/packages/google-cloud-bigquery/test_utils/test_utils/__init__.py new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/packages/google-cloud-bigquery/test_utils/test_utils/imports.py b/packages/google-cloud-bigquery/test_utils/test_utils/imports.py new file mode 100644 index 000000000000..5991af7fc465 --- /dev/null +++ b/packages/google-cloud-bigquery/test_utils/test_utils/imports.py @@ -0,0 +1,38 @@ +# Copyright 2019 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import mock +import six + + +def maybe_fail_import(predicate): + """Create and return a patcher that conditionally makes an import fail. + + Args: + predicate (Callable[[...], bool]): A callable that, if it returns `True`, + triggers an `ImportError`. It must accept the same arguments as the + built-in `__import__` function. + https://docs.python.org/3/library/functions.html#__import__ + + Returns: + A mock patcher object that can be used to enable patched import behavior. + """ + orig_import = six.moves.builtins.__import__ + + def custom_import(name, globals=None, locals=None, fromlist=(), level=0): + if predicate(name, globals, locals, fromlist, level): + raise ImportError + return orig_import(name, globals, locals, fromlist, level) + + return mock.patch.object(six.moves.builtins, "__import__", new=custom_import) diff --git a/packages/google-cloud-bigquery/test_utils/test_utils/retry.py b/packages/google-cloud-bigquery/test_utils/test_utils/retry.py new file mode 100644 index 000000000000..e61c001a03e1 --- /dev/null +++ b/packages/google-cloud-bigquery/test_utils/test_utils/retry.py @@ -0,0 +1,207 @@ +# Copyright 2016 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import time +from functools import wraps + +import six + +MAX_TRIES = 4 +DELAY = 1 +BACKOFF = 2 + + +def _retry_all(_): + """Retry all caught exceptions.""" + return True + + +class BackoffFailed(Exception): + """Retry w/ backoffs did not complete successfully.""" + + +class RetryBase(object): + """Base for retrying calling a decorated function w/ exponential backoff. + + :type max_tries: int + :param max_tries: Number of times to try (not retry) before giving up. + + :type delay: int + :param delay: Initial delay between retries in seconds. + + :type backoff: int + :param backoff: Backoff multiplier e.g. value of 2 will double the + delay each retry. + + :type logger: logging.Logger instance + :param logger: Logger to use. If None, print. + """ + def __init__(self, max_tries=MAX_TRIES, delay=DELAY, backoff=BACKOFF, + logger=None): + self.max_tries = max_tries + self.delay = delay + self.backoff = backoff + self.logger = logger.warning if logger else six.print_ + + +class RetryErrors(RetryBase): + """Decorator for retrying given exceptions in testing. + + :type exception: Exception or tuple of Exceptions + :param exception: The exception to check or may be a tuple of + exceptions to check. + + :type error_predicate: function, takes caught exception, returns bool + :param error_predicate: Predicate evaluating whether to retry after a + caught exception. + + :type max_tries: int + :param max_tries: Number of times to try (not retry) before giving up. + + :type delay: int + :param delay: Initial delay between retries in seconds. + + :type backoff: int + :param backoff: Backoff multiplier e.g. value of 2 will double the + delay each retry. + + :type logger: logging.Logger instance + :param logger: Logger to use. If None, print. + """ + def __init__(self, exception, error_predicate=_retry_all, + max_tries=MAX_TRIES, delay=DELAY, backoff=BACKOFF, + logger=None): + super(RetryErrors, self).__init__(max_tries, delay, backoff, logger) + self.exception = exception + self.error_predicate = error_predicate + + def __call__(self, to_wrap): + @wraps(to_wrap) + def wrapped_function(*args, **kwargs): + tries = 0 + while tries < self.max_tries: + try: + return to_wrap(*args, **kwargs) + except self.exception as caught_exception: + + if not self.error_predicate(caught_exception): + raise + + delay = self.delay * self.backoff**tries + msg = ("%s, Trying again in %d seconds..." % + (caught_exception, delay)) + self.logger(msg) + + time.sleep(delay) + tries += 1 + return to_wrap(*args, **kwargs) + + return wrapped_function + + +class RetryResult(RetryBase): + """Decorator for retrying based on non-error result. + + :type result_predicate: function, takes result, returns bool + :param result_predicate: Predicate evaluating whether to retry after a + result is returned. + + :type max_tries: int + :param max_tries: Number of times to try (not retry) before giving up. + + :type delay: int + :param delay: Initial delay between retries in seconds. + + :type backoff: int + :param backoff: Backoff multiplier e.g. value of 2 will double the + delay each retry. + + :type logger: logging.Logger instance + :param logger: Logger to use. If None, print. + """ + def __init__(self, result_predicate, + max_tries=MAX_TRIES, delay=DELAY, backoff=BACKOFF, + logger=None): + super(RetryResult, self).__init__(max_tries, delay, backoff, logger) + self.result_predicate = result_predicate + + def __call__(self, to_wrap): + @wraps(to_wrap) + def wrapped_function(*args, **kwargs): + tries = 0 + while tries < self.max_tries: + result = to_wrap(*args, **kwargs) + if self.result_predicate(result): + return result + + delay = self.delay * self.backoff**tries + msg = "%s. Trying again in %d seconds..." % ( + self.result_predicate.__name__, delay,) + self.logger(msg) + + time.sleep(delay) + tries += 1 + raise BackoffFailed() + + return wrapped_function + + +class RetryInstanceState(RetryBase): + """Decorator for retrying based on instance state. + + :type instance_predicate: function, takes instance, returns bool + :param instance_predicate: Predicate evaluating whether to retry after an + API-invoking method is called. + + :type max_tries: int + :param max_tries: Number of times to try (not retry) before giving up. + + :type delay: int + :param delay: Initial delay between retries in seconds. + + :type backoff: int + :param backoff: Backoff multiplier e.g. value of 2 will double the + delay each retry. + + :type logger: logging.Logger instance + :param logger: Logger to use. If None, print. + """ + def __init__(self, instance_predicate, + max_tries=MAX_TRIES, delay=DELAY, backoff=BACKOFF, + logger=None): + super(RetryInstanceState, self).__init__( + max_tries, delay, backoff, logger) + self.instance_predicate = instance_predicate + + def __call__(self, to_wrap): + instance = to_wrap.__self__ # only instance methods allowed + + @wraps(to_wrap) + def wrapped_function(*args, **kwargs): + tries = 0 + while tries < self.max_tries: + result = to_wrap(*args, **kwargs) + if self.instance_predicate(instance): + return result + + delay = self.delay * self.backoff**tries + msg = "%s. Trying again in %d seconds..." % ( + self.instance_predicate.__name__, delay,) + self.logger(msg) + + time.sleep(delay) + tries += 1 + raise BackoffFailed() + + return wrapped_function diff --git a/packages/google-cloud-bigquery/test_utils/test_utils/system.py b/packages/google-cloud-bigquery/test_utils/test_utils/system.py new file mode 100644 index 000000000000..590dc62a06e6 --- /dev/null +++ b/packages/google-cloud-bigquery/test_utils/test_utils/system.py @@ -0,0 +1,81 @@ +# Copyright 2014 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import print_function +import os +import sys +import time + +import google.auth.credentials +from google.auth.environment_vars import CREDENTIALS as TEST_CREDENTIALS + + +# From shell environ. May be None. +CREDENTIALS = os.getenv(TEST_CREDENTIALS) + +ENVIRON_ERROR_MSG = """\ +To run the system tests, you need to set some environment variables. +Please check the CONTRIBUTING guide for instructions. +""" + + +class EmulatorCreds(google.auth.credentials.Credentials): + """A mock credential object. + + Used to avoid unnecessary token refreshing or reliance on the network + while an emulator is running. + """ + + def __init__(self): # pylint: disable=super-init-not-called + self.token = b'seekrit' + self.expiry = None + + @property + def valid(self): + """Would-be validity check of the credentials. + + Always is :data:`True`. + """ + return True + + def refresh(self, unused_request): # pylint: disable=unused-argument + """Off-limits implementation for abstract method.""" + raise RuntimeError('Should never be refreshed.') + + +def check_environ(): + err_msg = None + if CREDENTIALS is None: + err_msg = '\nMissing variables: ' + TEST_CREDENTIALS + elif not os.path.isfile(CREDENTIALS): + err_msg = '\nThe %s path %r is not a file.' % (TEST_CREDENTIALS, + CREDENTIALS) + + if err_msg is not None: + msg = ENVIRON_ERROR_MSG + err_msg + print(msg, file=sys.stderr) + sys.exit(1) + + +def unique_resource_id(delimiter='_'): + """A unique identifier for a resource. + + Intended to help locate resources created in particular + testing environments and at particular times. + """ + build_id = os.getenv('CIRCLE_BUILD_NUM', '') + if build_id == '': + return '%s%d' % (delimiter, 1000 * time.time()) + else: + return '%s%s%s%d' % (delimiter, build_id, delimiter, time.time()) diff --git a/packages/google-cloud-bigquery/test_utils/test_utils/vpcsc_config.py b/packages/google-cloud-bigquery/test_utils/test_utils/vpcsc_config.py new file mode 100644 index 000000000000..36b15d6be991 --- /dev/null +++ b/packages/google-cloud-bigquery/test_utils/test_utils/vpcsc_config.py @@ -0,0 +1,118 @@ +# -*- coding: utf-8 -*- +# +# Copyright 2019 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os + +import pytest + + +INSIDE_VPCSC_ENVVAR = "GOOGLE_CLOUD_TESTS_IN_VPCSC" +PROJECT_INSIDE_ENVVAR = "PROJECT_ID" +PROJECT_OUTSIDE_ENVVAR = "GOOGLE_CLOUD_TESTS_VPCSC_OUTSIDE_PERIMETER_PROJECT" +BUCKET_OUTSIDE_ENVVAR = "GOOGLE_CLOUD_TESTS_VPCSC_OUTSIDE_PERIMETER_BUCKET" + + +class VPCSCTestConfig(object): + """System test utility for VPCSC detection. + + See: https://cloud.google.com/vpc-service-controls/docs/ + """ + + @property + def inside_vpcsc(self): + """Test whether the test environment is configured to run inside VPCSC. + + Returns: + bool: + true if the environment is configured to run inside VPCSC, + else false. + """ + return INSIDE_VPCSC_ENVVAR in os.environ + + @property + def project_inside(self): + """Project ID for testing outside access. + + Returns: + str: project ID used for testing outside access; None if undefined. + """ + return os.environ.get(PROJECT_INSIDE_ENVVAR, None) + + @property + def project_outside(self): + """Project ID for testing inside access. + + Returns: + str: project ID used for testing inside access; None if undefined. + """ + return os.environ.get(PROJECT_OUTSIDE_ENVVAR, None) + + @property + def bucket_outside(self): + """GCS bucket for testing inside access. + + Returns: + str: bucket ID used for testing inside access; None if undefined. + """ + return os.environ.get(BUCKET_OUTSIDE_ENVVAR, None) + + def skip_if_inside_vpcsc(self, testcase): + """Test decorator: skip if running inside VPCSC.""" + reason = ( + "Running inside VPCSC. " + "Unset the {} environment variable to enable this test." + ).format(INSIDE_VPCSC_ENVVAR) + skip = pytest.mark.skipif(self.inside_vpcsc, reason=reason) + return skip(testcase) + + def skip_unless_inside_vpcsc(self, testcase): + """Test decorator: skip if running outside VPCSC.""" + reason = ( + "Running outside VPCSC. " + "Set the {} environment variable to enable this test." + ).format(INSIDE_VPCSC_ENVVAR) + skip = pytest.mark.skipif(not self.inside_vpcsc, reason=reason) + return skip(testcase) + + def skip_unless_inside_project(self, testcase): + """Test decorator: skip if inside project env var not set.""" + reason = ( + "Project ID for running inside VPCSC not set. " + "Set the {} environment variable to enable this test." + ).format(PROJECT_INSIDE_ENVVAR) + skip = pytest.mark.skipif(self.project_inside is None, reason=reason) + return skip(testcase) + + def skip_unless_outside_project(self, testcase): + """Test decorator: skip if outside project env var not set.""" + reason = ( + "Project ID for running outside VPCSC not set. " + "Set the {} environment variable to enable this test." + ).format(PROJECT_OUTSIDE_ENVVAR) + skip = pytest.mark.skipif(self.project_outside is None, reason=reason) + return skip(testcase) + + def skip_unless_outside_bucket(self, testcase): + """Test decorator: skip if outside bucket env var not set.""" + reason = ( + "Bucket ID for running outside VPCSC not set. " + "Set the {} environment variable to enable this test." + ).format(BUCKET_OUTSIDE_ENVVAR) + skip = pytest.mark.skipif(self.bucket_outside is None, reason=reason) + return skip(testcase) + + +vpcsc_config = VPCSCTestConfig() From 27a6edfa8c6d0005029dc387761124f54d79fcef Mon Sep 17 00:00:00 2001 From: Peter Lamut Date: Thu, 6 Feb 2020 17:46:32 +0000 Subject: [PATCH 0744/2016] refactor: noxfile exclusion from synth changes (#30) --- packages/google-cloud-bigquery/synth.py | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/packages/google-cloud-bigquery/synth.py b/packages/google-cloud-bigquery/synth.py index ad6f942139aa..ec487eb39254 100644 --- a/packages/google-cloud-bigquery/synth.py +++ b/packages/google-cloud-bigquery/synth.py @@ -14,8 +14,6 @@ """This script is used to synthesize generated parts of this library.""" -import os - import synthtool as s from synthtool import gcp @@ -63,8 +61,6 @@ # Add templated files # ---------------------------------------------------------------------------- templated_files = common.py_library(cov_level=100) -# we do not want to override the custom noxfile with the generated one -os.remove(os.path.join(templated_files, "noxfile.py")) -s.move(templated_files) +s.move(templated_files, excludes=["noxfile.py"]) s.shell.run(["nox", "-s", "blacken"], hide_output=False) From 36191d7015a7eb8e61ba88647fdc1410c6683306 Mon Sep 17 00:00:00 2001 From: Peter Lamut Date: Mon, 10 Feb 2020 20:33:21 +0000 Subject: [PATCH 0745/2016] docs: add limitation note for loading REPEATED fields (#35) --- .../google-cloud-bigquery/google/cloud/bigquery/client.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py index 5da12990b390..188fb19cbc63 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py @@ -1712,6 +1712,14 @@ def load_table_from_dataframe( Similar to :meth:`load_table_from_uri`, this method creates, starts and returns a :class:`~google.cloud.bigquery.job.LoadJob`. + .. note:: + + Due to the way REPEATED fields are encoded in the ``parquet`` file + format, a mismatch with the existing table schema can occur, and + 100% compatibility cannot be guaranteed for REPEATED fields. + + https://github.com/googleapis/python-bigquery/issues/17 + Arguments: dataframe (pandas.DataFrame): A :class:`~pandas.DataFrame` containing the data to load. From 1de426f934e6267c1749583de1c218bf2c112e6e Mon Sep 17 00:00:00 2001 From: shollyman Date: Fri, 14 Feb 2020 12:35:48 -0800 Subject: [PATCH 0746/2016] fix: allow partial streaming_buffer statistics (#37) * fix: allow partial streaming_buffer statistics. Previously, the BQ backend would supply all fields of the streamingBuffer statistics or none. This is no longer the case, so we relax construction to not depend on all values being present. Related: internal issue b/148720220 --- .../google/cloud/bigquery/table.py | 17 +++++++++----- .../tests/unit/test_table.py | 23 +++++++++++++++++++ 2 files changed, 34 insertions(+), 6 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py index 555f529f3670..1da0617207ab 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py @@ -1139,12 +1139,17 @@ class StreamingBuffer(object): """ def __init__(self, resource): - self.estimated_bytes = int(resource["estimatedBytes"]) - self.estimated_rows = int(resource["estimatedRows"]) - # time is in milliseconds since the epoch. - self.oldest_entry_time = google.cloud._helpers._datetime_from_microseconds( - 1000.0 * int(resource["oldestEntryTime"]) - ) + self.estimated_bytes = None + if "estimatedBytes" in resource: + self.estimated_bytes = int(resource["estimatedBytes"]) + self.estimated_rows = None + if "estimatedRows" in resource: + self.estimated_rows = int(resource["estimatedRows"]) + self.oldest_entry_time = None + if "oldestEntryTime" in resource: + self.oldest_entry_time = google.cloud._helpers._datetime_from_microseconds( + 1000.0 * int(resource["oldestEntryTime"]) + ) class Row(object): diff --git a/packages/google-cloud-bigquery/tests/unit/test_table.py b/packages/google-cloud-bigquery/tests/unit/test_table.py index 079ec6e000d3..c1611c08487c 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_table.py +++ b/packages/google-cloud-bigquery/tests/unit/test_table.py @@ -855,6 +855,29 @@ def test_from_api_repr_w_properties(self): table = klass.from_api_repr(RESOURCE) self._verifyResourceProperties(table, RESOURCE) + def test_from_api_repr_w_partial_streamingbuffer(self): + import datetime + from google.cloud._helpers import UTC + from google.cloud._helpers import _millis + + RESOURCE = self._make_resource() + self.OLDEST_TIME = datetime.datetime(2015, 8, 1, 23, 59, 59, tzinfo=UTC) + RESOURCE["streamingBuffer"] = {"oldestEntryTime": _millis(self.OLDEST_TIME)} + klass = self._get_target_class() + table = klass.from_api_repr(RESOURCE) + self.assertIsNotNone(table.streaming_buffer) + self.assertIsNone(table.streaming_buffer.estimated_rows) + self.assertIsNone(table.streaming_buffer.estimated_bytes) + self.assertEqual(table.streaming_buffer.oldest_entry_time, self.OLDEST_TIME) + # Another partial construction + RESOURCE["streamingBuffer"] = {"estimatedRows": 1} + klass = self._get_target_class() + table = klass.from_api_repr(RESOURCE) + self.assertIsNotNone(table.streaming_buffer) + self.assertEqual(table.streaming_buffer.estimated_rows, 1) + self.assertIsNone(table.streaming_buffer.estimated_bytes) + self.assertIsNone(table.streaming_buffer.oldest_entry_time) + def test_from_api_with_encryption(self): self._setUpConstants() RESOURCE = { From 7ac3d178af4adb4bbc758ba0d0daad2171b94b49 Mon Sep 17 00:00:00 2001 From: Peter Lamut Date: Tue, 25 Feb 2020 18:47:00 +0000 Subject: [PATCH 0747/2016] feat: add BigQuery storage client support to DB API (#36) * feat: add BigQueryStorageClient support to DB API * Use BigQuery Storage client in Cursor if available * Skip BQ storage unit tests in Python 3.8 * Add system tests for Cursor w/ BQ storage client * Add test for Connection ctor w/o BQ storage client * Refactor exception handling in Cursor._try_fetch() * Add explicit check against None Co-Authored-By: Tres Seaver * Remove redundand word in a comment in cursor.py Co-authored-by: Tres Seaver --- .../google/cloud/bigquery/dbapi/_helpers.py | 20 ++ .../google/cloud/bigquery/dbapi/connection.py | 39 +++- .../google/cloud/bigquery/dbapi/cursor.py | 68 +++++++ packages/google-cloud-bigquery/noxfile.py | 5 +- .../google-cloud-bigquery/tests/system.py | 100 ++++++++++ .../tests/unit/test_dbapi__helpers.py | 34 ++++ .../tests/unit/test_dbapi_connection.py | 49 ++++- .../tests/unit/test_dbapi_cursor.py | 182 ++++++++++++++++++ 8 files changed, 487 insertions(+), 10 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/dbapi/_helpers.py b/packages/google-cloud-bigquery/google/cloud/bigquery/dbapi/_helpers.py index 651880feac90..6558177d7bb1 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/dbapi/_helpers.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/dbapi/_helpers.py @@ -24,6 +24,7 @@ import six from google.cloud import bigquery +from google.cloud.bigquery import table from google.cloud.bigquery.dbapi import exceptions @@ -218,3 +219,22 @@ def array_like(value): return isinstance(value, collections_abc.Sequence) and not isinstance( value, (six.text_type, six.binary_type, bytearray) ) + + +def to_bq_table_rows(rows_iterable): + """Convert table rows to BigQuery table Row instances. + + Args: + rows_iterable (Iterable[Mapping]): + An iterable of row data items to convert to ``Row`` instances. + + Returns: + Iterable[google.cloud.bigquery.table.Row] + """ + + def to_table_row(row): + values = tuple(row.values()) + keys_to_index = {key: i for i, key in enumerate(row.keys())} + return table.Row(values, keys_to_index) + + return (to_table_row(row_data) for row_data in rows_iterable) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/dbapi/connection.py b/packages/google-cloud-bigquery/google/cloud/bigquery/dbapi/connection.py index ee7d0dc3cc59..b8eaf2f9b91d 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/dbapi/connection.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/dbapi/connection.py @@ -23,10 +23,24 @@ class Connection(object): Args: client (google.cloud.bigquery.Client): A client used to connect to BigQuery. + bqstorage_client(\ + Optional[google.cloud.bigquery_storage_v1beta1.BigQueryStorageClient] \ + ): + [Beta] An alternative client that uses the faster BigQuery Storage + API to fetch rows from BigQuery. If both clients are given, + ``bqstorage_client`` is used first to fetch query results, + with a fallback on ``client``, if necessary. + + .. note:: + There is a known issue with the BigQuery Storage API with small + anonymous result sets, which results in such fallback. + + https://github.com/googleapis/python-bigquery-storage/issues/2 """ - def __init__(self, client): + def __init__(self, client, bqstorage_client=None): self._client = client + self._bqstorage_client = bqstorage_client def close(self): """No-op.""" @@ -43,17 +57,30 @@ def cursor(self): return cursor.Cursor(self) -def connect(client=None): +def connect(client=None, bqstorage_client=None): """Construct a DB-API connection to Google BigQuery. Args: - client (google.cloud.bigquery.Client): - (Optional) A client used to connect to BigQuery. If not passed, a - client is created using default options inferred from the environment. + client (Optional[google.cloud.bigquery.Client]): + A client used to connect to BigQuery. If not passed, a client is + created using default options inferred from the environment. + bqstorage_client(\ + Optional[google.cloud.bigquery_storage_v1beta1.BigQueryStorageClient] \ + ): + [Beta] An alternative client that uses the faster BigQuery Storage + API to fetch rows from BigQuery. If both clients are given, + ``bqstorage_client`` is used first to fetch query results, + with a fallback on ``client``, if necessary. + + .. note:: + There is a known issue with the BigQuery Storage API with small + anonymous result sets, which results in such fallback. + + https://github.com/googleapis/python-bigquery-storage/issues/2 Returns: google.cloud.bigquery.dbapi.Connection: A new DB-API connection to BigQuery. """ if client is None: client = bigquery.Client() - return Connection(client) + return Connection(client, bqstorage_client) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/dbapi/cursor.py b/packages/google-cloud-bigquery/google/cloud/bigquery/dbapi/cursor.py index a3e6ea5be87e..eb73b3d562e5 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/dbapi/cursor.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/dbapi/cursor.py @@ -21,6 +21,8 @@ except ImportError: # Python 2.7 import collections as collections_abc +import logging + import six from google.cloud.bigquery import job @@ -28,6 +30,9 @@ from google.cloud.bigquery.dbapi import exceptions import google.cloud.exceptions + +_LOGGER = logging.getLogger(__name__) + # Per PEP 249: A 7-item sequence containing information describing one result # column. The first two items (name and type_code) are mandatory, the other # five are optional and are set to None if no meaningful values can be @@ -212,6 +217,30 @@ def _try_fetch(self, size=None): if self._query_data is None: client = self.connection._client + bqstorage_client = self.connection._bqstorage_client + + if bqstorage_client is not None: + try: + rows_iterable = self._bqstorage_fetch(bqstorage_client) + self._query_data = _helpers.to_bq_table_rows(rows_iterable) + return + except google.api_core.exceptions.GoogleAPICallError as exc: + # NOTE: Forbidden is a subclass of GoogleAPICallError + if isinstance(exc, google.api_core.exceptions.Forbidden): + # Don't hide errors such as insufficient permissions to create + # a read session, or the API is not enabled. Both of those are + # clearly problems if the developer has explicitly asked for + # BigQuery Storage API support. + raise + + # There is an issue with reading from small anonymous + # query results tables. If such an error occurs, we silence + # it in order to try again with the tabledata.list API. + _LOGGER.debug( + "Error fetching data with BigQuery Storage API, " + "falling back to tabledata.list API." + ) + rows_iter = client.list_rows( self._query_job.destination, selected_fields=self._query_job._query_results.schema, @@ -219,6 +248,45 @@ def _try_fetch(self, size=None): ) self._query_data = iter(rows_iter) + def _bqstorage_fetch(self, bqstorage_client): + """Start fetching data with the BigQuery Storage API. + + The method assumes that the data about the relevant query job already + exists internally. + + Args: + bqstorage_client(\ + google.cloud.bigquery_storage_v1beta1.BigQueryStorageClient \ + ): + A client tha know how to talk to the BigQuery Storage API. + + Returns: + Iterable[Mapping]: + A sequence of rows, represented as dictionaries. + """ + # NOTE: Given that BQ storage client instance is passed in, it means + # that bigquery_storage_v1beta1 library is available (no ImportError). + from google.cloud import bigquery_storage_v1beta1 + + table_reference = self._query_job.destination + + read_session = bqstorage_client.create_read_session( + table_reference.to_bqstorage(), + "projects/{}".format(table_reference.project), + # a single stream only, as DB API is not well-suited for multithreading + requested_streams=1, + ) + + if not read_session.streams: + return iter([]) # empty table, nothing to read + + read_position = bigquery_storage_v1beta1.types.StreamPosition( + stream=read_session.streams[0], + ) + read_rows_stream = bqstorage_client.read_rows(read_position) + rows_iterable = read_rows_stream.rows(read_session) + return rows_iterable + def fetchone(self): """Fetch a single row from the results of the last ``execute*()`` call. diff --git a/packages/google-cloud-bigquery/noxfile.py b/packages/google-cloud-bigquery/noxfile.py index f7e59e56059d..32782d0a0ded 100644 --- a/packages/google-cloud-bigquery/noxfile.py +++ b/packages/google-cloud-bigquery/noxfile.py @@ -48,7 +48,7 @@ def default(session): # Since many tests are skipped due to missing dependencies, test # coverage is much lower in Python 3.8. Remove once we can test with # pyarrow. - coverage_fail_under = "--cov-fail-under=92" + coverage_fail_under = "--cov-fail-under=91" dev_install = ".[pandas,tqdm]" session.install("-e", dev_install) @@ -70,7 +70,7 @@ def default(session): "--cov-report=", coverage_fail_under, os.path.join("tests", "unit"), - *session.posargs + *session.posargs, ) @@ -94,6 +94,7 @@ def system(session): # Install all test dependencies, then install local packages in place. session.install("mock", "pytest", "psutil") session.install("google-cloud-storage") + session.install("fastavro") session.install("-e", "test_utils") session.install("-e", ".[all]") diff --git a/packages/google-cloud-bigquery/tests/system.py b/packages/google-cloud-bigquery/tests/system.py index 4a1c032717f5..c611d8e7e35a 100644 --- a/packages/google-cloud-bigquery/tests/system.py +++ b/packages/google-cloud-bigquery/tests/system.py @@ -36,6 +36,12 @@ from google.cloud import bigquery_storage_v1beta1 except ImportError: # pragma: NO COVER bigquery_storage_v1beta1 = None + +try: + import fastavro # to parse BQ storage client results +except ImportError: # pragma: NO COVER + fastavro = None + try: import pandas except ImportError: # pragma: NO COVER @@ -1543,6 +1549,100 @@ def test_dbapi_fetchall(self): row_tuples = [r.values() for r in rows] self.assertEqual(row_tuples, [(1, 2), (3, 4), (5, 6)]) + @unittest.skipIf( + bigquery_storage_v1beta1 is None, "Requires `google-cloud-bigquery-storage`" + ) + def test_dbapi_fetch_w_bqstorage_client_small_result_set(self): + bqstorage_client = bigquery_storage_v1beta1.BigQueryStorageClient( + credentials=Config.CLIENT._credentials + ) + cursor = dbapi.connect(Config.CLIENT, bqstorage_client).cursor() + + # Reading small result sets causes an issue with BQ storage client, + # and the DB API should transparently fall back to the default client. + cursor.execute( + """ + SELECT id, `by`, time_ts + FROM `bigquery-public-data.hacker_news.comments` + ORDER BY `id` ASC + LIMIT 10 + """ + ) + + result_rows = [cursor.fetchone(), cursor.fetchone(), cursor.fetchone()] + + field_name = operator.itemgetter(0) + fetched_data = [sorted(row.items(), key=field_name) for row in result_rows] + + expected_data = [ + [ + ("by", "sama"), + ("id", 15), + ("time_ts", datetime.datetime(2006, 10, 9, 19, 51, 1, tzinfo=UTC)), + ], + [ + ("by", "pg"), + ("id", 17), + ("time_ts", datetime.datetime(2006, 10, 9, 19, 52, 45, tzinfo=UTC)), + ], + [ + ("by", "pg"), + ("id", 22), + ("time_ts", datetime.datetime(2006, 10, 10, 2, 18, 22, tzinfo=UTC)), + ], + ] + self.assertEqual(fetched_data, expected_data) + + @unittest.skipIf( + bigquery_storage_v1beta1 is None, "Requires `google-cloud-bigquery-storage`" + ) + @unittest.skipIf(fastavro is None, "Requires `fastavro`") + def test_dbapi_fetch_w_bqstorage_client_large_result_set(self): + bqstorage_client = bigquery_storage_v1beta1.BigQueryStorageClient( + credentials=Config.CLIENT._credentials + ) + cursor = dbapi.connect(Config.CLIENT, bqstorage_client).cursor() + + # Pick a large enouhg LIMIT value to assure that the fallback to the + # default client is not needed due to the result set being too small + # (a known issue that causes problems when reding such result sets with + # BQ storage client). + cursor.execute( + """ + SELECT id, `by`, time_ts + FROM `bigquery-public-data.hacker_news.comments` + ORDER BY `id` ASC + LIMIT 100000 + """ + ) + + result_rows = [cursor.fetchone(), cursor.fetchone(), cursor.fetchone()] + + field_name = operator.itemgetter(0) + fetched_data = [sorted(row.items(), key=field_name) for row in result_rows] + + # Since DB API is not thread safe, only a single result stream should be + # requested by the BQ storage client, meaning that results should arrive + # in the sorted order. + expected_data = [ + [ + ("by", "sama"), + ("id", 15), + ("time_ts", datetime.datetime(2006, 10, 9, 19, 51, 1, tzinfo=UTC)), + ], + [ + ("by", "pg"), + ("id", 17), + ("time_ts", datetime.datetime(2006, 10, 9, 19, 52, 45, tzinfo=UTC)), + ], + [ + ("by", "pg"), + ("id", 22), + ("time_ts", datetime.datetime(2006, 10, 10, 2, 18, 22, tzinfo=UTC)), + ], + ] + self.assertEqual(fetched_data, expected_data) + def _load_table_for_dml(self, rows, dataset_id, table_id): from google.cloud._testing import _NamedTemporaryFile from google.cloud.bigquery.job import CreateDisposition diff --git a/packages/google-cloud-bigquery/tests/unit/test_dbapi__helpers.py b/packages/google-cloud-bigquery/tests/unit/test_dbapi__helpers.py index 45c690ede363..8f98d0c530ff 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_dbapi__helpers.py +++ b/packages/google-cloud-bigquery/tests/unit/test_dbapi__helpers.py @@ -15,9 +15,11 @@ import datetime import decimal import math +import operator as op import unittest import google.cloud._helpers +from google.cloud.bigquery import table from google.cloud.bigquery.dbapi import _helpers from google.cloud.bigquery.dbapi import exceptions @@ -185,3 +187,35 @@ def test_to_query_parameters_w_list_dict_param(self): def test_to_query_parameters_none_argument(self): query_parameters = _helpers.to_query_parameters(None) self.assertEqual(query_parameters, []) + + +class TestToBqTableRows(unittest.TestCase): + def test_empty_iterable(self): + rows_iterable = iter([]) + result = _helpers.to_bq_table_rows(rows_iterable) + self.assertEqual(list(result), []) + + def test_non_empty_iterable(self): + rows_iterable = [ + dict(one=1.1, four=1.4, two=1.2, three=1.3), + dict(one=2.1, four=2.4, two=2.2, three=2.3), + ] + + result = _helpers.to_bq_table_rows(rows_iterable) + + rows = list(result) + self.assertEqual(len(rows), 2) + + row_1, row_2 = rows + self.assertIsInstance(row_1, table.Row) + self.assertIsInstance(row_2, table.Row) + + field_value = op.itemgetter(1) + + items = sorted(row_1.items(), key=field_value) + expected_items = [("one", 1.1), ("two", 1.2), ("three", 1.3), ("four", 1.4)] + self.assertEqual(items, expected_items) + + items = sorted(row_2.items(), key=field_value) + expected_items = [("one", 2.1), ("two", 2.2), ("three", 2.3), ("four", 2.4)] + self.assertEqual(items, expected_items) diff --git a/packages/google-cloud-bigquery/tests/unit/test_dbapi_connection.py b/packages/google-cloud-bigquery/tests/unit/test_dbapi_connection.py index 19acec05bd34..595afd0fe66c 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_dbapi_connection.py +++ b/packages/google-cloud-bigquery/tests/unit/test_dbapi_connection.py @@ -16,6 +16,11 @@ import mock +try: + from google.cloud import bigquery_storage_v1beta1 +except ImportError: # pragma: NO COVER + bigquery_storage_v1beta1 = None + class TestConnection(unittest.TestCase): @staticmethod @@ -27,19 +32,41 @@ def _get_target_class(): def _make_one(self, *args, **kw): return self._get_target_class()(*args, **kw) - def _mock_client(self, rows=None, schema=None): + def _mock_client(self): from google.cloud.bigquery import client mock_client = mock.create_autospec(client.Client) return mock_client - def test_ctor(self): + def _mock_bqstorage_client(self): + from google.cloud.bigquery_storage_v1beta1 import client + + mock_client = mock.create_autospec(client.BigQueryStorageClient) + return mock_client + + def test_ctor_wo_bqstorage_client(self): from google.cloud.bigquery.dbapi import Connection mock_client = self._mock_client() connection = self._make_one(client=mock_client) self.assertIsInstance(connection, Connection) self.assertIs(connection._client, mock_client) + self.assertIsNone(connection._bqstorage_client) + + @unittest.skipIf( + bigquery_storage_v1beta1 is None, "Requires `google-cloud-bigquery-storage`" + ) + def test_ctor_w_bqstorage_client(self): + from google.cloud.bigquery.dbapi import Connection + + mock_client = self._mock_client() + mock_bqstorage_client = self._mock_bqstorage_client() + connection = self._make_one( + client=mock_client, bqstorage_client=mock_bqstorage_client, + ) + self.assertIsInstance(connection, Connection) + self.assertIs(connection._client, mock_client) + self.assertIs(connection._bqstorage_client, mock_bqstorage_client) @mock.patch("google.cloud.bigquery.Client", autospec=True) def test_connect_wo_client(self, mock_client): @@ -49,6 +76,7 @@ def test_connect_wo_client(self, mock_client): connection = connect() self.assertIsInstance(connection, Connection) self.assertIsNotNone(connection._client) + self.assertIsNone(connection._bqstorage_client) def test_connect_w_client(self): from google.cloud.bigquery.dbapi import connect @@ -58,6 +86,23 @@ def test_connect_w_client(self): connection = connect(client=mock_client) self.assertIsInstance(connection, Connection) self.assertIs(connection._client, mock_client) + self.assertIsNone(connection._bqstorage_client) + + @unittest.skipIf( + bigquery_storage_v1beta1 is None, "Requires `google-cloud-bigquery-storage`" + ) + def test_connect_w_both_clients(self): + from google.cloud.bigquery.dbapi import connect + from google.cloud.bigquery.dbapi import Connection + + mock_client = self._mock_client() + mock_bqstorage_client = self._mock_bqstorage_client() + connection = connect( + client=mock_client, bqstorage_client=mock_bqstorage_client, + ) + self.assertIsInstance(connection, Connection) + self.assertIs(connection._client, mock_client) + self.assertIs(connection._bqstorage_client, mock_bqstorage_client) def test_close(self): connection = self._make_one(client=self._mock_client()) diff --git a/packages/google-cloud-bigquery/tests/unit/test_dbapi_cursor.py b/packages/google-cloud-bigquery/tests/unit/test_dbapi_cursor.py index 4ccd5e71af72..e53cc158a4c4 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_dbapi_cursor.py +++ b/packages/google-cloud-bigquery/tests/unit/test_dbapi_cursor.py @@ -12,9 +12,18 @@ # See the License for the specific language governing permissions and # limitations under the License. +import operator as op import unittest import mock +import six + +from google.api_core import exceptions + +try: + from google.cloud import bigquery_storage_v1beta1 +except ImportError: # pragma: NO COVER + bigquery_storage_v1beta1 = None class TestCursor(unittest.TestCase): @@ -44,6 +53,29 @@ def _mock_client(self, rows=None, schema=None, num_dml_affected_rows=None): mock_client.list_rows.return_value = rows return mock_client + def _mock_bqstorage_client(self, rows=None, stream_count=0): + from google.cloud.bigquery_storage_v1beta1 import client + from google.cloud.bigquery_storage_v1beta1 import types + + if rows is None: + rows = [] + + mock_client = mock.create_autospec(client.BigQueryStorageClient) + + mock_read_session = mock.MagicMock( + streams=[ + types.Stream(name="streams/stream_{}".format(i)) + for i in range(stream_count) + ] + ) + mock_client.create_read_session.return_value = mock_read_session + + mock_rows_stream = mock.MagicMock() + mock_rows_stream.rows.return_value = iter(rows) + mock_client.read_rows.return_value = mock_rows_stream + + return mock_client + def _mock_job(self, total_rows=0, schema=None, num_dml_affected_rows=None): from google.cloud.bigquery import job @@ -180,6 +212,156 @@ def test_fetchall_w_row(self): self.assertEqual(len(rows), 1) self.assertEqual(rows[0], (1,)) + @unittest.skipIf( + bigquery_storage_v1beta1 is None, "Requires `google-cloud-bigquery-storage`" + ) + def test_fetchall_w_bqstorage_client_fetch_success(self): + from google.cloud.bigquery import dbapi + from google.cloud.bigquery import table + + # use unordered data to also test any non-determenistic key order in dicts + row_data = [ + table.Row([1.4, 1.1, 1.3, 1.2], {"bar": 3, "baz": 2, "foo": 1, "quux": 0}), + table.Row([2.4, 2.1, 2.3, 2.2], {"bar": 3, "baz": 2, "foo": 1, "quux": 0}), + ] + bqstorage_streamed_rows = [ + {"bar": 1.2, "foo": 1.1, "quux": 1.4, "baz": 1.3}, + {"bar": 2.2, "foo": 2.1, "quux": 2.4, "baz": 2.3}, + ] + + mock_client = self._mock_client(rows=row_data) + mock_bqstorage_client = self._mock_bqstorage_client( + stream_count=1, rows=bqstorage_streamed_rows, + ) + + connection = dbapi.connect( + client=mock_client, bqstorage_client=mock_bqstorage_client, + ) + cursor = connection.cursor() + cursor.execute("SELECT foo, bar FROM some_table") + + rows = cursor.fetchall() + + # the default client was not used + mock_client.list_rows.assert_not_called() + + # check the data returned + field_value = op.itemgetter(1) + sorted_row_data = [sorted(row.items(), key=field_value) for row in rows] + expected_row_data = [ + [("foo", 1.1), ("bar", 1.2), ("baz", 1.3), ("quux", 1.4)], + [("foo", 2.1), ("bar", 2.2), ("baz", 2.3), ("quux", 2.4)], + ] + + self.assertEqual(sorted_row_data, expected_row_data) + + @unittest.skipIf( + bigquery_storage_v1beta1 is None, "Requires `google-cloud-bigquery-storage`" + ) + def test_fetchall_w_bqstorage_client_fetch_no_rows(self): + from google.cloud.bigquery import dbapi + + mock_client = self._mock_client(rows=[]) + mock_bqstorage_client = self._mock_bqstorage_client(stream_count=0) + + connection = dbapi.connect( + client=mock_client, bqstorage_client=mock_bqstorage_client, + ) + cursor = connection.cursor() + cursor.execute("SELECT foo, bar FROM some_table") + + rows = cursor.fetchall() + + # # the default client was not used + mock_client.list_rows.assert_not_called() + + # check the data returned + self.assertEqual(rows, []) + + @unittest.skipIf( + bigquery_storage_v1beta1 is None, "Requires `google-cloud-bigquery-storage`" + ) + def test_fetchall_w_bqstorage_client_fetch_error_no_fallback(self): + from google.cloud.bigquery import dbapi + from google.cloud.bigquery import table + + row_data = [table.Row([1.1, 1.2], {"foo": 0, "bar": 1})] + + mock_client = self._mock_client(rows=row_data) + mock_bqstorage_client = self._mock_bqstorage_client( + stream_count=1, rows=row_data, + ) + no_access_error = exceptions.Forbidden("invalid credentials") + mock_bqstorage_client.create_read_session.side_effect = no_access_error + + connection = dbapi.connect( + client=mock_client, bqstorage_client=mock_bqstorage_client, + ) + cursor = connection.cursor() + cursor.execute("SELECT foo, bar FROM some_table") + + with six.assertRaisesRegex(self, exceptions.Forbidden, "invalid credentials"): + cursor.fetchall() + + # the default client was not used + mock_client.list_rows.assert_not_called() + + @unittest.skipIf( + bigquery_storage_v1beta1 is None, "Requires `google-cloud-bigquery-storage`" + ) + def test_fetchall_w_bqstorage_client_fetch_error_fallback_on_client(self): + from google.cloud.bigquery import dbapi + from google.cloud.bigquery import table + + # use unordered data to also test any non-determenistic key order in dicts + row_data = [ + table.Row([1.4, 1.1, 1.3, 1.2], {"bar": 3, "baz": 2, "foo": 1, "quux": 0}), + table.Row([2.4, 2.1, 2.3, 2.2], {"bar": 3, "baz": 2, "foo": 1, "quux": 0}), + ] + bqstorage_streamed_rows = [ + {"bar": 1.2, "foo": 1.1, "quux": 1.4, "baz": 1.3}, + {"bar": 2.2, "foo": 2.1, "quux": 2.4, "baz": 2.3}, + ] + + mock_client = self._mock_client(rows=row_data) + mock_bqstorage_client = self._mock_bqstorage_client( + stream_count=1, rows=bqstorage_streamed_rows, + ) + request_error = exceptions.BadRequest("BQ storage what??") + mock_bqstorage_client.create_read_session.side_effect = request_error + + connection = dbapi.connect( + client=mock_client, bqstorage_client=mock_bqstorage_client, + ) + cursor = connection.cursor() + cursor.execute("SELECT foo, bar FROM some_table") + + logger_patcher = mock.patch("google.cloud.bigquery.dbapi.cursor._LOGGER") + with logger_patcher as mock_logger: + rows = cursor.fetchall() + + # both client were used + mock_bqstorage_client.create_read_session.assert_called() + mock_client.list_rows.assert_called() + + # fallback to default API should have been logged + relevant_calls = [ + call + for call in mock_logger.debug.call_args_list + if call.args and "tabledata.list API" in call.args[0] + ] + self.assertTrue(relevant_calls) + + # check the data returned + field_value = op.itemgetter(1) + sorted_row_data = [sorted(row.items(), key=field_value) for row in rows] + expected_row_data = [ + [("foo", 1.1), ("bar", 1.2), ("baz", 1.3), ("quux", 1.4)], + [("foo", 2.1), ("bar", 2.2), ("baz", 2.3), ("quux", 2.4)], + ] + + self.assertEqual(sorted_row_data, expected_row_data) + def test_execute_custom_job_id(self): from google.cloud.bigquery.dbapi import connect From b14ca1ee5b531a62274e8d2190dbf559e26aaf96 Mon Sep 17 00:00:00 2001 From: HemangChothani <50404902+HemangChothani@users.noreply.github.com> Date: Wed, 4 Mar 2020 13:18:37 +0530 Subject: [PATCH 0748/2016] chore(bigquery): bump pyarrow version to 0.16.0 (#51) * chore(bigquery): bump pyarrow version to 0.16.0 * chore(bigquery): version restrict to 2.0dev --- packages/google-cloud-bigquery/setup.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/packages/google-cloud-bigquery/setup.py b/packages/google-cloud-bigquery/setup.py index 6324c8b250ba..0e920a04e8e1 100644 --- a/packages/google-cloud-bigquery/setup.py +++ b/packages/google-cloud-bigquery/setup.py @@ -40,9 +40,7 @@ extras = { "bqstorage": [ "google-cloud-bigquery-storage >= 0.6.0, <2.0.0dev", - # Bad Linux release for 0.14.0. - # https://issues.apache.org/jira/browse/ARROW-5868 - "pyarrow>=0.13.0, != 0.14.0", + "pyarrow>=0.16.0, < 2.0dev", ], "pandas": ["pandas>=0.17.1"], # Exclude PyArrow dependency from Windows Python 2.7. From b3e767b9edd0881a681f8e2bab3896a55b298732 Mon Sep 17 00:00:00 2001 From: Peter Lamut Date: Wed, 4 Mar 2020 10:41:22 +0000 Subject: [PATCH 0749/2016] chore: declare full Python 3.8 support (#47) * chore: include arrow and fastparquet in Python 3.8 * Declare Python 3.8 support in package metadata * Bump Python 3.7 to 3.8 in several nox sessions. --- packages/google-cloud-bigquery/noxfile.py | 20 ++++++-------------- packages/google-cloud-bigquery/setup.py | 1 + 2 files changed, 7 insertions(+), 14 deletions(-) diff --git a/packages/google-cloud-bigquery/noxfile.py b/packages/google-cloud-bigquery/noxfile.py index 32782d0a0ded..94902187471b 100644 --- a/packages/google-cloud-bigquery/noxfile.py +++ b/packages/google-cloud-bigquery/noxfile.py @@ -43,14 +43,6 @@ def default(session): # serialization, though. dev_install = ".[all,fastparquet]" - # There is no pyarrow or fastparquet wheel for Python 3.8. - if session.python == "3.8": - # Since many tests are skipped due to missing dependencies, test - # coverage is much lower in Python 3.8. Remove once we can test with - # pyarrow. - coverage_fail_under = "--cov-fail-under=91" - dev_install = ".[pandas,tqdm]" - session.install("-e", dev_install) # IPython does not support Python 2 after version 5.x @@ -80,7 +72,7 @@ def unit(session): default(session) -@nox.session(python=["2.7", "3.7"]) +@nox.session(python=["2.7", "3.8"]) def system(session): """Run the system test suite.""" @@ -110,7 +102,7 @@ def system(session): ) -@nox.session(python=["2.7", "3.7"]) +@nox.session(python=["2.7", "3.8"]) def snippets(session): """Run the snippets test suite.""" @@ -130,7 +122,7 @@ def snippets(session): session.run("py.test", "samples", *session.posargs) -@nox.session(python="3.7") +@nox.session(python="3.8") def cover(session): """Run the final coverage report. @@ -142,7 +134,7 @@ def cover(session): session.run("coverage", "erase") -@nox.session(python="3.7") +@nox.session(python="3.8") def lint(session): """Run linters. @@ -159,7 +151,7 @@ def lint(session): session.run("black", "--check", *BLACK_PATHS) -@nox.session(python="3.7") +@nox.session(python="3.8") def lint_setup_py(session): """Verify that setup.py is valid (including RST check).""" @@ -180,7 +172,7 @@ def blacken(session): session.run("black", *BLACK_PATHS) -@nox.session(python="3.7") +@nox.session(python="3.8") def docs(session): """Build the docs.""" diff --git a/packages/google-cloud-bigquery/setup.py b/packages/google-cloud-bigquery/setup.py index 0e920a04e8e1..4519472d323b 100644 --- a/packages/google-cloud-bigquery/setup.py +++ b/packages/google-cloud-bigquery/setup.py @@ -105,6 +105,7 @@ "Programming Language :: Python :: 3.5", "Programming Language :: Python :: 3.6", "Programming Language :: Python :: 3.7", + "Programming Language :: Python :: 3.8", "Operating System :: OS Independent", "Topic :: Internet", ], From fa4912063d2993af50e6c38c2d580a28cc025c7e Mon Sep 17 00:00:00 2001 From: HemangChothani <50404902+HemangChothani@users.noreply.github.com> Date: Wed, 4 Mar 2020 17:33:21 +0530 Subject: [PATCH 0750/2016] fix(bigquery): fix start index with page size for list rows (#27) Co-authored-by: Peter Lamut --- .../google/cloud/bigquery/table.py | 2 + .../tests/unit/test_client.py | 65 +++++++++++++++++++ 2 files changed, 67 insertions(+) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py index 1da0617207ab..f7b575536354 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py @@ -1367,6 +1367,8 @@ def _get_next_page_response(self): """ params = self._get_query_params() if self._page_size is not None: + if self.page_number and "startIndex" in params: + del params["startIndex"] params["maxResults"] = self._page_size return self.api_request( method=self._HTTP_METHOD, path=self.path, query_params=params diff --git a/packages/google-cloud-bigquery/tests/unit/test_client.py b/packages/google-cloud-bigquery/tests/unit/test_client.py index 2227183a9236..a82445876f26 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_client.py +++ b/packages/google-cloud-bigquery/tests/unit/test_client.py @@ -5612,6 +5612,71 @@ def _bigquery_timestamp_float_repr(ts_float): method="GET", path="/%s" % PATH, query_params={}, timeout=7.5 ) + def test_list_rows_w_start_index_w_page_size(self): + from google.cloud.bigquery.schema import SchemaField + from google.cloud.bigquery.table import Table + from google.cloud.bigquery.table import Row + + PATH = "projects/%s/datasets/%s/tables/%s/data" % ( + self.PROJECT, + self.DS_ID, + self.TABLE_ID, + ) + + page_1 = { + "totalRows": 4, + "pageToken": "some-page-token", + "rows": [ + {"f": [{"v": "Phred Phlyntstone"}]}, + {"f": [{"v": "Bharney Rhubble"}]}, + ], + } + page_2 = { + "totalRows": 4, + "rows": [ + {"f": [{"v": "Wylma Phlyntstone"}]}, + {"f": [{"v": "Bhettye Rhubble"}]}, + ], + } + creds = _make_credentials() + http = object() + client = self._make_one(project=self.PROJECT, credentials=creds, _http=http) + conn = client._connection = make_connection(page_1, page_2) + full_name = SchemaField("full_name", "STRING", mode="REQUIRED") + table = Table(self.TABLE_REF, schema=[full_name]) + iterator = client.list_rows(table, max_results=4, page_size=2, start_index=1) + pages = iterator.pages + rows = list(six.next(pages)) + extra_params = iterator.extra_params + f2i = {"full_name": 0} + self.assertEqual(len(rows), 2) + self.assertEqual(rows[0], Row(("Phred Phlyntstone",), f2i)) + self.assertEqual(rows[1], Row(("Bharney Rhubble",), f2i)) + + rows = list(six.next(pages)) + + self.assertEqual(len(rows), 2) + self.assertEqual(rows[0], Row(("Wylma Phlyntstone",), f2i)) + self.assertEqual(rows[1], Row(("Bhettye Rhubble",), f2i)) + self.assertEqual(extra_params, {"startIndex": 1}) + + conn.api_request.assert_has_calls( + [ + mock.call( + method="GET", + path="/%s" % PATH, + query_params={"startIndex": 1, "maxResults": 2}, + timeout=None, + ), + mock.call( + method="GET", + path="/%s" % PATH, + query_params={"pageToken": "some-page-token", "maxResults": 2}, + timeout=None, + ), + ] + ) + def test_list_rows_empty_table(self): response = {"totalRows": "0", "rows": []} creds = _make_credentials() From a1ceca0bfee2346933b03930ddd3307117b0816a Mon Sep 17 00:00:00 2001 From: Peter Lamut Date: Mon, 9 Mar 2020 07:29:27 +0000 Subject: [PATCH 0751/2016] docs: use timeout in create dataset sample (#52) * docs: use timeout in create dataset sample * Emphasize "explicit" in comment timeout --- packages/google-cloud-bigquery/samples/create_dataset.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/packages/google-cloud-bigquery/samples/create_dataset.py b/packages/google-cloud-bigquery/samples/create_dataset.py index e47d68a96b2a..6af3c67eb623 100644 --- a/packages/google-cloud-bigquery/samples/create_dataset.py +++ b/packages/google-cloud-bigquery/samples/create_dataset.py @@ -30,9 +30,9 @@ def create_dataset(dataset_id): # TODO(developer): Specify the geographic location where the dataset should reside. dataset.location = "US" - # Send the dataset to the API for creation. + # Send the dataset to the API for creation, with an explicit timeout. # Raises google.api_core.exceptions.Conflict if the Dataset already # exists within the project. - dataset = client.create_dataset(dataset) # Make an API request. + dataset = client.create_dataset(dataset, timeout=30) # Make an API request. print("Created dataset {}.{}".format(client.project, dataset.dataset_id)) # [END bigquery_create_dataset] From b4acb5a5cec4f92e1905de46d942478a02c29f42 Mon Sep 17 00:00:00 2001 From: Peter Lamut Date: Mon, 9 Mar 2020 18:49:49 +0000 Subject: [PATCH 0752/2016] fix: distinguish server timeouts from transport timeouts (#43) * fix: distinguish transport and query timeouts A transport layer timeout is made independent of the query timeout, i.e. the maximum time to wait for the query to complete. The query timeout is used by the blocking poll so that the backend does not block for too long when polling for job completion, but the transport can have different timeout requirements, and we do not want it to be raising sometimes unnecessary timeout errors. * Apply timeout to each of the underlying requests As job methods do not split the timeout anymore between all requests a method might make, the Client methods are adjusted in the same way. --- .../google/cloud/bigquery/client.py | 37 +++----- .../google/cloud/bigquery/job.py | 62 ++++--------- .../tests/unit/test_client.py | 78 ---------------- .../tests/unit/test_job.py | 88 ------------------- 4 files changed, 32 insertions(+), 233 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py index 188fb19cbc63..343217ae6858 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py @@ -22,7 +22,6 @@ except ImportError: # Python 2.7 import collections as collections_abc -import concurrent.futures import copy import functools import gzip @@ -48,7 +47,6 @@ import google.api_core.client_options import google.api_core.exceptions from google.api_core import page_iterator -from google.auth.transport.requests import TimeoutGuard import google.cloud._helpers from google.cloud import exceptions from google.cloud.client import ClientWithProject @@ -2598,27 +2596,22 @@ def list_partitions(self, table, retry=DEFAULT_RETRY, timeout=None): timeout (Optional[float]): The number of seconds to wait for the underlying HTTP transport before using ``retry``. - If multiple requests are made under the hood, ``timeout`` is - interpreted as the approximate total time of **all** requests. + If multiple requests are made under the hood, ``timeout`` + applies to each individual request. Returns: List[str]: A list of the partition ids present in the partitioned table """ table = _table_arg_to_table_ref(table, default_project=self.project) - - with TimeoutGuard( - timeout, timeout_error_type=concurrent.futures.TimeoutError - ) as guard: - meta_table = self.get_table( - TableReference( - DatasetReference(table.project, table.dataset_id), - "%s$__PARTITIONS_SUMMARY__" % table.table_id, - ), - retry=retry, - timeout=timeout, - ) - timeout = guard.remaining_timeout + meta_table = self.get_table( + TableReference( + DatasetReference(table.project, table.dataset_id), + "%s$__PARTITIONS_SUMMARY__" % table.table_id, + ), + retry=retry, + timeout=timeout, + ) subset = [col for col in meta_table.schema if col.name == "partition_id"] return [ @@ -2685,8 +2678,8 @@ def list_rows( timeout (Optional[float]): The number of seconds to wait for the underlying HTTP transport before using ``retry``. - If multiple requests are made under the hood, ``timeout`` is - interpreted as the approximate total time of **all** requests. + If multiple requests are made under the hood, ``timeout`` + applies to each individual request. Returns: google.cloud.bigquery.table.RowIterator: @@ -2711,11 +2704,7 @@ def list_rows( # No schema, but no selected_fields. Assume the developer wants all # columns, so get the table resource for them rather than failing. elif len(schema) == 0: - with TimeoutGuard( - timeout, timeout_error_type=concurrent.futures.TimeoutError - ) as guard: - table = self.get_table(table.reference, retry=retry, timeout=timeout) - timeout = guard.remaining_timeout + table = self.get_table(table.reference, retry=retry, timeout=timeout) schema = table.schema params = {} diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/job.py b/packages/google-cloud-bigquery/google/cloud/bigquery/job.py index 5861febe830d..ab2eaede5986 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/job.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/job.py @@ -26,7 +26,6 @@ from six.moves import http_client import google.api_core.future.polling -from google.auth.transport.requests import TimeoutGuard from google.cloud import exceptions from google.cloud.exceptions import NotFound from google.cloud.bigquery.dataset import Dataset @@ -55,7 +54,6 @@ _DONE_STATE = "DONE" _STOPPED_REASON = "stopped" _TIMEOUT_BUFFER_SECS = 0.1 -_SERVER_TIMEOUT_MARGIN_SECS = 1.0 _CONTAINS_ORDER_BY = re.compile(r"ORDER\s+BY", re.IGNORECASE) _ERROR_REASON_TO_EXCEPTION = { @@ -796,8 +794,8 @@ def result(self, retry=DEFAULT_RETRY, timeout=None): timeout (Optional[float]): The number of seconds to wait for the underlying HTTP transport before using ``retry``. - If multiple requests are made under the hood, ``timeout`` is - interpreted as the approximate total time of **all** requests. + If multiple requests are made under the hood, ``timeout`` + applies to each individual request. Returns: _AsyncJob: This instance. @@ -809,11 +807,7 @@ def result(self, retry=DEFAULT_RETRY, timeout=None): if the job did not complete in the given timeout. """ if self.state is None: - with TimeoutGuard( - timeout, timeout_error_type=concurrent.futures.TimeoutError - ) as guard: - self._begin(retry=retry, timeout=timeout) - timeout = guard.remaining_timeout + self._begin(retry=retry, timeout=timeout) # TODO: modify PollingFuture so it can pass a retry argument to done(). return super(_AsyncJob, self).result(timeout=timeout) @@ -2602,6 +2596,7 @@ def __init__(self, job_id, query, client, job_config=None): self._configuration = job_config self._query_results = None self._done_timeout = None + self._transport_timeout = None @property def allow_large_results(self): @@ -3059,19 +3054,9 @@ def done(self, retry=DEFAULT_RETRY, timeout=None): self._done_timeout = max(0, self._done_timeout) timeout_ms = int(api_timeout * 1000) - # If the server-side processing timeout (timeout_ms) is specified and - # would be picked as the total request timeout, we want to add a small - # margin to it - we don't want to timeout the connection just as the - # server-side processing might have completed, but instead slightly - # after the server-side deadline. - # However, if `timeout` is specified, and is shorter than the adjusted - # server timeout, the former prevails. - if timeout_ms is not None and timeout_ms > 0: - server_timeout_with_margin = timeout_ms / 1000 + _SERVER_TIMEOUT_MARGIN_SECS - if timeout is not None: - timeout = min(server_timeout_with_margin, timeout) - else: - timeout = server_timeout_with_margin + # If an explicit timeout is not given, fall back to the transport timeout + # stored in _blocking_poll() in the process of polling for job completion. + transport_timeout = timeout if timeout is not None else self._transport_timeout # Do not refresh if the state is already done, as the job will not # change once complete. @@ -3082,19 +3067,20 @@ def done(self, retry=DEFAULT_RETRY, timeout=None): project=self.project, timeout_ms=timeout_ms, location=self.location, - timeout=timeout, + timeout=transport_timeout, ) # Only reload the job once we know the query is complete. # This will ensure that fields such as the destination table are # correctly populated. if self._query_results.complete: - self.reload(retry=retry, timeout=timeout) + self.reload(retry=retry, timeout=transport_timeout) return self.state == _DONE_STATE def _blocking_poll(self, timeout=None): self._done_timeout = timeout + self._transport_timeout = timeout super(QueryJob, self)._blocking_poll(timeout=timeout) @staticmethod @@ -3170,8 +3156,8 @@ def result( timeout (Optional[float]): The number of seconds to wait for the underlying HTTP transport before using ``retry``. - If multiple requests are made under the hood, ``timeout`` is - interpreted as the approximate total time of **all** requests. + If multiple requests are made under the hood, ``timeout`` + applies to each individual request. Returns: google.cloud.bigquery.table.RowIterator: @@ -3189,27 +3175,17 @@ def result( If the job did not complete in the given timeout. """ try: - guard = TimeoutGuard( - timeout, timeout_error_type=concurrent.futures.TimeoutError - ) - with guard: - super(QueryJob, self).result(retry=retry, timeout=timeout) - timeout = guard.remaining_timeout + super(QueryJob, self).result(retry=retry, timeout=timeout) # Return an iterator instead of returning the job. if not self._query_results: - guard = TimeoutGuard( - timeout, timeout_error_type=concurrent.futures.TimeoutError + self._query_results = self._client._get_query_results( + self.job_id, + retry, + project=self.project, + location=self.location, + timeout=timeout, ) - with guard: - self._query_results = self._client._get_query_results( - self.job_id, - retry, - project=self.project, - location=self.location, - timeout=timeout, - ) - timeout = guard.remaining_timeout except exceptions.GoogleCloudError as exc: exc.message += self._format_for_exception(self.query, self.job_id) exc.query_job = self diff --git a/packages/google-cloud-bigquery/tests/unit/test_client.py b/packages/google-cloud-bigquery/tests/unit/test_client.py index a82445876f26..b8dfbbad14f4 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_client.py +++ b/packages/google-cloud-bigquery/tests/unit/test_client.py @@ -24,7 +24,6 @@ import unittest import warnings -import freezegun import mock import requests import six @@ -5496,43 +5495,6 @@ def test_list_partitions_with_string_id(self): self.assertEqual(len(partition_list), 0) - def test_list_partitions_splitting_timout_between_requests(self): - from google.cloud.bigquery.table import Table - - row_count = 2 - meta_info = _make_list_partitons_meta_info( - self.PROJECT, self.DS_ID, self.TABLE_ID, row_count - ) - - data = { - "totalRows": str(row_count), - "rows": [{"f": [{"v": "20180101"}]}, {"f": [{"v": "20180102"}]}], - } - creds = _make_credentials() - http = object() - client = self._make_one(project=self.PROJECT, credentials=creds, _http=http) - client._connection = make_connection(meta_info, data) - table = Table(self.TABLE_REF) - - with freezegun.freeze_time("2019-01-01 00:00:00", tick=False) as frozen_time: - - def delayed_get_table(*args, **kwargs): - frozen_time.tick(delta=1.4) - return orig_get_table(*args, **kwargs) - - orig_get_table = client.get_table - client.get_table = mock.Mock(side_effect=delayed_get_table) - - client.list_partitions(table, timeout=5.0) - - client.get_table.assert_called_once() - _, kwargs = client.get_table.call_args - self.assertEqual(kwargs.get("timeout"), 5.0) - - client._connection.api_request.assert_called() - _, kwargs = client._connection.api_request.call_args - self.assertAlmostEqual(kwargs.get("timeout"), 3.6, places=5) - def test_list_rows(self): import datetime from google.cloud._helpers import UTC @@ -5918,46 +5880,6 @@ def test_list_rows_with_missing_schema(self): self.assertEqual(rows[1].age, 31, msg=repr(table)) self.assertIsNone(rows[2].age, msg=repr(table)) - def test_list_rows_splitting_timout_between_requests(self): - from google.cloud.bigquery.schema import SchemaField - from google.cloud.bigquery.table import Table - - response = {"totalRows": "0", "rows": []} - creds = _make_credentials() - http = object() - client = self._make_one(project=self.PROJECT, credentials=creds, _http=http) - client._connection = make_connection(response, response) - - table = Table( - self.TABLE_REF, schema=[SchemaField("field_x", "INTEGER", mode="NULLABLE")] - ) - - with freezegun.freeze_time("1970-01-01 00:00:00", tick=False) as frozen_time: - - def delayed_get_table(*args, **kwargs): - frozen_time.tick(delta=1.4) - return table - - client.get_table = mock.Mock(side_effect=delayed_get_table) - - rows_iter = client.list_rows( - "{}.{}.{}".format( - self.TABLE_REF.project, - self.TABLE_REF.dataset_id, - self.TABLE_REF.table_id, - ), - timeout=5.0, - ) - six.next(rows_iter.pages) - - client.get_table.assert_called_once() - _, kwargs = client.get_table.call_args - self.assertEqual(kwargs.get("timeout"), 5.0) - - client._connection.api_request.assert_called_once() - _, kwargs = client._connection.api_request.call_args - self.assertAlmostEqual(kwargs.get("timeout"), 3.6) - def test_list_rows_error(self): creds = _make_credentials() http = object() diff --git a/packages/google-cloud-bigquery/tests/unit/test_job.py b/packages/google-cloud-bigquery/tests/unit/test_job.py index 6b0d4b8fb352..3e642142d041 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_job.py +++ b/packages/google-cloud-bigquery/tests/unit/test_job.py @@ -994,24 +994,6 @@ def test_result_explicit_w_state(self, result): begin.assert_not_called() result.assert_called_once_with(timeout=timeout) - @mock.patch("google.api_core.future.polling.PollingFuture.result") - def test_result_splitting_timout_between_requests(self, result): - client = _make_client(project=self.PROJECT) - job = self._make_one(self.JOB_ID, client) - begin = job._begin = mock.Mock() - retry = mock.Mock() - - with freezegun.freeze_time("1970-01-01 00:00:00", tick=False) as frozen_time: - - def delayed_begin(*args, **kwargs): - frozen_time.tick(delta=0.3) - - begin.side_effect = delayed_begin - job.result(retry=retry, timeout=1.0) - - begin.assert_called_once_with(retry=retry, timeout=1.0) - result.assert_called_once_with(timeout=0.7) - def test_cancelled_wo_error_result(self): client = _make_client(project=self.PROJECT) job = self._make_one(self.JOB_ID, client) @@ -4011,33 +3993,6 @@ def test_done_w_timeout(self): call_args = fake_reload.call_args self.assertEqual(call_args.kwargs.get("timeout"), 42) - def test_done_w_timeout_and_shorter_internal_api_timeout(self): - from google.cloud.bigquery.job import _TIMEOUT_BUFFER_SECS - from google.cloud.bigquery.job import _SERVER_TIMEOUT_MARGIN_SECS - - client = _make_client(project=self.PROJECT) - resource = self._make_resource(ended=False) - job = self._get_target_class().from_api_repr(resource, client) - job._done_timeout = 8.8 - - with mock.patch.object( - client, "_get_query_results" - ) as fake_get_results, mock.patch.object(job, "reload") as fake_reload: - job.done(timeout=42) - - # The expected timeout used is the job's own done_timeout minus a - # fixed amount (bigquery.job._TIMEOUT_BUFFER_SECS) increased by the - # safety margin on top of server-side processing timeout - that's - # because that final number is smaller than the given timeout (42 seconds). - expected_timeout = 8.8 - _TIMEOUT_BUFFER_SECS + _SERVER_TIMEOUT_MARGIN_SECS - - fake_get_results.assert_called_once() - call_args = fake_get_results.call_args - self.assertAlmostEqual(call_args.kwargs.get("timeout"), expected_timeout) - - call_args = fake_reload.call_args - self.assertAlmostEqual(call_args.kwargs.get("timeout"), expected_timeout) - def test_done_w_timeout_and_longer_internal_api_timeout(self): client = _make_client(project=self.PROJECT) resource = self._make_resource(ended=False) @@ -4623,49 +4578,6 @@ def test_result_w_timeout(self): self.assertEqual(query_request[1]["query_params"]["timeoutMs"], 900) self.assertEqual(reload_request[1]["method"], "GET") - @mock.patch("google.api_core.future.polling.PollingFuture.result") - def test_result_splitting_timout_between_requests(self, polling_result): - begun_resource = self._make_resource() - query_resource = { - "jobComplete": True, - "jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID}, - "schema": {"fields": [{"name": "col1", "type": "STRING"}]}, - "totalRows": "5", - } - done_resource = copy.deepcopy(begun_resource) - done_resource["status"] = {"state": "DONE"} - - connection = _make_connection(begun_resource, query_resource, done_resource) - client = _make_client(project=self.PROJECT, connection=connection) - job = self._make_one(self.JOB_ID, self.QUERY, client) - - client.list_rows = mock.Mock() - - with freezegun.freeze_time("1970-01-01 00:00:00", tick=False) as frozen_time: - - def delayed_result(*args, **kwargs): - frozen_time.tick(delta=0.8) - - polling_result.side_effect = delayed_result - - def delayed_get_results(*args, **kwargs): - frozen_time.tick(delta=0.5) - return orig_get_results(*args, **kwargs) - - orig_get_results = client._get_query_results - client._get_query_results = mock.Mock(side_effect=delayed_get_results) - job.result(timeout=2.0) - - polling_result.assert_called_once_with(timeout=2.0) - - client._get_query_results.assert_called_once() - _, kwargs = client._get_query_results.call_args - self.assertAlmostEqual(kwargs.get("timeout"), 1.2) - - client.list_rows.assert_called_once() - _, kwargs = client.list_rows.call_args - self.assertAlmostEqual(kwargs.get("timeout"), 0.7) - def test_result_w_page_size(self): # Arrange query_results_resource = { From 8fad17d69a3307cf19adfe3c41c330b4b2f912c4 Mon Sep 17 00:00:00 2001 From: Peter Lamut Date: Tue, 17 Mar 2020 09:37:20 +0100 Subject: [PATCH 0753/2016] chore: deprecate pandas code paths that don't use pyarrow (#48) * feat: deprecate code paths without pyarrow * Emit warning if loading dataframe data w/o pyarrow * Issue a warning in table.to dataframe() w/o pyarrow --- .../google/cloud/bigquery/__init__.py | 3 ++ .../google/cloud/bigquery/client.py | 10 +++++ .../google/cloud/bigquery/exceptions.py | 17 ++++++++ .../google/cloud/bigquery/table.py | 9 +++++ .../tests/unit/test_client.py | 40 ++++++++++++++++++- .../tests/unit/test_table.py | 32 +++++++++++++++ 6 files changed, 110 insertions(+), 1 deletion(-) create mode 100644 packages/google-cloud-bigquery/google/cloud/bigquery/exceptions.py diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/__init__.py b/packages/google-cloud-bigquery/google/cloud/bigquery/__init__.py index 3982c1175850..63d71694cb5c 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/__init__.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/__init__.py @@ -38,6 +38,7 @@ from google.cloud.bigquery.dataset import DatasetReference from google.cloud.bigquery import enums from google.cloud.bigquery.enums import StandardSqlDataTypes +from google.cloud.bigquery.exceptions import PyarrowMissingWarning from google.cloud.bigquery.external_config import ExternalConfig from google.cloud.bigquery.external_config import BigtableOptions from google.cloud.bigquery.external_config import BigtableColumnFamily @@ -142,6 +143,8 @@ "WriteDisposition", # EncryptionConfiguration "EncryptionConfiguration", + # Errors and warnings + "PyarrowMissingWarning", ] diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py index 343217ae6858..6fe474218461 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py @@ -59,6 +59,7 @@ from google.cloud.bigquery.dataset import Dataset from google.cloud.bigquery.dataset import DatasetListItem from google.cloud.bigquery.dataset import DatasetReference +from google.cloud.bigquery.exceptions import PyarrowMissingWarning from google.cloud.bigquery import job from google.cloud.bigquery.model import Model from google.cloud.bigquery.model import ModelReference @@ -1848,6 +1849,15 @@ def load_table_from_dataframe( parquet_compression=parquet_compression, ) else: + if not pyarrow: + warnings.warn( + "Loading dataframe data without pyarrow installed is " + "deprecated and will become unsupported in the future. " + "Please install the pyarrow package.", + PyarrowMissingWarning, + stacklevel=2, + ) + if job_config.schema: warnings.warn( "job_config.schema is set, but not used to assist in " diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/exceptions.py b/packages/google-cloud-bigquery/google/cloud/bigquery/exceptions.py new file mode 100644 index 000000000000..93490ef97668 --- /dev/null +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/exceptions.py @@ -0,0 +1,17 @@ +# Copyright 2020 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +class PyarrowMissingWarning(DeprecationWarning): + pass diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py index f7b575536354..e674f237d8d7 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py @@ -54,6 +54,7 @@ from google.cloud.bigquery.schema import _build_schema_resource from google.cloud.bigquery.schema import _parse_schema_resource from google.cloud.bigquery.schema import _to_schema_fields +from google.cloud.bigquery.exceptions import PyarrowMissingWarning from google.cloud.bigquery.external_config import ExternalConfig from google.cloud.bigquery.encryption_configuration import EncryptionConfiguration @@ -1739,6 +1740,14 @@ def to_dataframe( for column in dtypes: df[column] = pandas.Series(df[column], dtype=dtypes[column]) return df + else: + warnings.warn( + "Converting to a dataframe without pyarrow installed is " + "often slower and will become unsupported in the future. " + "Please install the pyarrow package.", + PyarrowMissingWarning, + stacklevel=2, + ) # The bqstorage_client is only used if pyarrow is available, so the # rest of this method only needs to account for tabledata.list. diff --git a/packages/google-cloud-bigquery/tests/unit/test_client.py b/packages/google-cloud-bigquery/tests/unit/test_client.py index b8dfbbad14f4..e4bc6af75ccb 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_client.py +++ b/packages/google-cloud-bigquery/tests/unit/test_client.py @@ -6580,6 +6580,42 @@ def test_load_table_from_dataframe_unknown_table(self): job_config=mock.ANY, ) + @unittest.skipIf(pandas is None, "Requires `pandas`") + @unittest.skipIf(fastparquet is None, "Requires `fastparquet`") + def test_load_table_from_dataframe_no_pyarrow_warning(self): + from google.cloud.bigquery.client import PyarrowMissingWarning + + client = self._make_client() + + # Pick at least one column type that translates to Pandas dtype + # "object". A string column matches that. + records = [{"name": "Monty", "age": 100}, {"name": "Python", "age": 60}] + dataframe = pandas.DataFrame(records) + + get_table_patch = mock.patch( + "google.cloud.bigquery.client.Client.get_table", + autospec=True, + side_effect=google.api_core.exceptions.NotFound("Table not found"), + ) + load_patch = mock.patch( + "google.cloud.bigquery.client.Client.load_table_from_file", autospec=True + ) + pyarrow_patch = mock.patch("google.cloud.bigquery.client.pyarrow", None) + pyarrow_patch_helpers = mock.patch( + "google.cloud.bigquery._pandas_helpers.pyarrow", None + ) + catch_warnings = warnings.catch_warnings(record=True) + + with get_table_patch, load_patch, pyarrow_patch, pyarrow_patch_helpers, catch_warnings as warned: + client.load_table_from_dataframe( + dataframe, self.TABLE_REF, location=self.LOCATION + ) + + matches = [ + warning for warning in warned if warning.category is PyarrowMissingWarning + ] + assert matches, "A missing pyarrow deprecation warning was not raised." + @unittest.skipIf(pandas is None, "Requires `pandas`") @unittest.skipIf(fastparquet is None, "Requires `fastparquet`") def test_load_table_from_dataframe_no_schema_warning_wo_pyarrow(self): @@ -6854,7 +6890,9 @@ def test_load_table_from_dataframe_w_schema_wo_pyarrow(self): assert warned # there should be at least one warning for warning in warned: assert "pyarrow" in str(warning) - assert warning.category in (DeprecationWarning, PendingDeprecationWarning) + assert issubclass( + warning.category, (DeprecationWarning, PendingDeprecationWarning) + ) load_table_from_file.assert_called_once_with( client, diff --git a/packages/google-cloud-bigquery/tests/unit/test_table.py b/packages/google-cloud-bigquery/tests/unit/test_table.py index c1611c08487c..5bcd60986d45 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_table.py +++ b/packages/google-cloud-bigquery/tests/unit/test_table.py @@ -2239,6 +2239,38 @@ def test_to_dataframe(self): self.assertEqual(df.name.dtype.name, "object") self.assertEqual(df.age.dtype.name, "int64") + @unittest.skipIf(pandas is None, "Requires `pandas`") + def test_to_dataframe_warning_wo_pyarrow(self): + from google.cloud.bigquery.client import PyarrowMissingWarning + from google.cloud.bigquery.schema import SchemaField + + schema = [ + SchemaField("name", "STRING", mode="REQUIRED"), + SchemaField("age", "INTEGER", mode="REQUIRED"), + ] + rows = [ + {"f": [{"v": "Phred Phlyntstone"}, {"v": "32"}]}, + {"f": [{"v": "Bharney Rhubble"}, {"v": "33"}]}, + ] + path = "/foo" + api_request = mock.Mock(return_value={"rows": rows}) + row_iterator = self._make_one(_mock_client(), api_request, path, schema) + + no_pyarrow_patch = mock.patch("google.cloud.bigquery.table.pyarrow", new=None) + catch_warnings = warnings.catch_warnings(record=True) + + with no_pyarrow_patch, catch_warnings as warned: + df = row_iterator.to_dataframe() + + self.assertIsInstance(df, pandas.DataFrame) + self.assertEqual(len(df), 2) + matches = [ + warning for warning in warned if warning.category is PyarrowMissingWarning + ] + self.assertTrue( + matches, msg="A missing pyarrow deprecation warning was not raised." + ) + @unittest.skipIf(pandas is None, "Requires `pandas`") @unittest.skipIf(tqdm is None, "Requires `tqdm`") @mock.patch("tqdm.tqdm_gui") From c2a1a19828cd5e0c8be9175eed465235bd890146 Mon Sep 17 00:00:00 2001 From: Peter Lamut Date: Thu, 26 Mar 2020 08:47:07 +0100 Subject: [PATCH 0754/2016] chore: temporarily adjust failing system test (#69) --- .../google-cloud-bigquery/tests/system.py | 23 +++++++++++-------- 1 file changed, 14 insertions(+), 9 deletions(-) diff --git a/packages/google-cloud-bigquery/tests/system.py b/packages/google-cloud-bigquery/tests/system.py index c611d8e7e35a..98a1edaa5c83 100644 --- a/packages/google-cloud-bigquery/tests/system.py +++ b/packages/google-cloud-bigquery/tests/system.py @@ -858,11 +858,16 @@ def test_load_table_from_dataframe_w_required(self): @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_load_table_from_dataframe_w_explicit_schema(self): # Schema with all scalar types. + # TODO: Uploading DATETIME columns currently fails, thus that field type + # is temporarily removed from the test. + # See: + # https://github.com/googleapis/python-bigquery/issues/61 + # https://issuetracker.google.com/issues/151765076 scalars_schema = ( bigquery.SchemaField("bool_col", "BOOLEAN"), bigquery.SchemaField("bytes_col", "BYTES"), bigquery.SchemaField("date_col", "DATE"), - bigquery.SchemaField("dt_col", "DATETIME"), + # bigquery.SchemaField("dt_col", "DATETIME"), bigquery.SchemaField("float_col", "FLOAT"), bigquery.SchemaField("geo_col", "GEOGRAPHY"), bigquery.SchemaField("int_col", "INTEGER"), @@ -888,14 +893,14 @@ def test_load_table_from_dataframe_w_explicit_schema(self): "date_col", [datetime.date(1, 1, 1), None, datetime.date(9999, 12, 31)], ), - ( - "dt_col", - [ - datetime.datetime(1, 1, 1, 0, 0, 0), - None, - datetime.datetime(9999, 12, 31, 23, 59, 59, 999999), - ], - ), + # ( + # "dt_col", + # [ + # datetime.datetime(1, 1, 1, 0, 0, 0), + # None, + # datetime.datetime(9999, 12, 31, 23, 59, 59, 999999), + # ], + # ), ("float_col", [float("-inf"), float("nan"), float("inf")]), ( "geo_col", From fba561521bb62b6454dfd7cc1339fb6c3f677b53 Mon Sep 17 00:00:00 2001 From: Peter Lamut Date: Thu, 26 Mar 2020 09:59:18 +0100 Subject: [PATCH 0755/2016] docs: include details of inherited class members (#64) * docs: include details of inherited class members * Clarify result() return type for special queries --- packages/google-cloud-bigquery/docs/conf.py | 2 +- packages/google-cloud-bigquery/google/cloud/bigquery/job.py | 3 +++ 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/docs/conf.py b/packages/google-cloud-bigquery/docs/conf.py index d7dae6960f6a..32456a89cd78 100644 --- a/packages/google-cloud-bigquery/docs/conf.py +++ b/packages/google-cloud-bigquery/docs/conf.py @@ -42,7 +42,7 @@ # autodoc/autosummary flags autoclass_content = "both" -autodoc_default_flags = ["members"] +autodoc_default_flags = ["members", "inherited-members"] autosummary_generate = True diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/job.py b/packages/google-cloud-bigquery/google/cloud/bigquery/job.py index ab2eaede5986..4f3103bb5f21 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/job.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/job.py @@ -3168,6 +3168,9 @@ def result( set** (this is distinct from the total number of rows in the current page: ``iterator.page.num_items``). + If the query is a special query that produces no results, e.g. + a DDL query, an ``_EmptyRowIterator`` instance is returned. + Raises: google.cloud.exceptions.GoogleCloudError: If the job failed. From 9a1849d089367be34a167cec23622073d1b623d3 Mon Sep 17 00:00:00 2001 From: Peter Lamut Date: Thu, 26 Mar 2020 10:42:08 +0100 Subject: [PATCH 0756/2016] fix: improve cell magic error message on missing query (#58) * fix: improve cell magic error message on missing query * Remove possibly confusing wording from docstring --- .../google/cloud/bigquery/magics.py | 9 ++++++++- .../tests/unit/test_magics.py | 16 ++++++++++++++++ 2 files changed, 24 insertions(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/magics.py b/packages/google-cloud-bigquery/google/cloud/bigquery/magics.py index 39608b19fcde..5872d0cfc3d0 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/magics.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/magics.py @@ -66,7 +66,9 @@ the variable name (ex. ``$my_dict_var``). See ``In[6]`` and ``In[7]`` in the Examples section below. * ```` (required, cell argument): - SQL query to run. + SQL query to run. If the query does not contain any whitespace (aside + from leading and trailing whitespace), it is assumed to represent a + fully-qualified table ID, and the latter's data will be fetched. Returns: A :class:`pandas.DataFrame` with the query results. @@ -506,6 +508,11 @@ def _cell_magic(line, query): query = query.strip() + if not query: + error = ValueError("Query is missing.") + _handle_error(error, args.destination_var) + return + # Any query that does not contain whitespace (aside from leading and trailing whitespace) # is assumed to be a table id if not re.search(r"\s", query): diff --git a/packages/google-cloud-bigquery/tests/unit/test_magics.py b/packages/google-cloud-bigquery/tests/unit/test_magics.py index 3f66b2c4b765..fd9d1d7007f9 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_magics.py +++ b/packages/google-cloud-bigquery/tests/unit/test_magics.py @@ -800,6 +800,22 @@ def test_bigquery_magic_w_table_id_invalid(): assert "Traceback (most recent call last)" not in output +def test_bigquery_magic_w_missing_query(): + ip = IPython.get_ipython() + ip.extension_manager.load_extension("google.cloud.bigquery") + magics.context._project = None + + cell_body = " \n \n \t\t \n " + + with io.capture_output() as captured_io: + ip.run_cell_magic("bigquery", "df", cell_body) + + output = captured_io.stderr + assert "Could not save output to variable" in output + assert "Query is missing" in output + assert "Traceback (most recent call last)" not in output + + @pytest.mark.usefixtures("ipython_interactive") @pytest.mark.skipif(pandas is None, reason="Requires `pandas`") def test_bigquery_magic_w_table_id_and_destination_var(): From fc60107383b21e91ce5fae5638d229f382c0ca5e Mon Sep 17 00:00:00 2001 From: HemangChothani <50404902+HemangChothani@users.noreply.github.com> Date: Thu, 26 Mar 2020 15:54:51 +0530 Subject: [PATCH 0757/2016] feat(bigquery): add create job method (#32) * feat(bigquery): add create job method * feat(bigquery): Addressed comments and add unit test * feat(bigquery): make copy of job config for query job Co-authored-by: Peter Lamut --- .../google/cloud/bigquery/client.py | 66 +++++++ .../tests/unit/test_client.py | 170 ++++++++++++++++++ 2 files changed, 236 insertions(+) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py index 6fe474218461..a9c77d5e1970 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py @@ -51,9 +51,11 @@ from google.cloud import exceptions from google.cloud.client import ClientWithProject +from google.cloud.bigquery._helpers import _get_sub_prop from google.cloud.bigquery._helpers import _record_field_to_json from google.cloud.bigquery._helpers import _str_or_none from google.cloud.bigquery._helpers import _verify_job_config_type +from google.cloud.bigquery._helpers import _del_sub_prop from google.cloud.bigquery._http import Connection from google.cloud.bigquery import _pandas_helpers from google.cloud.bigquery.dataset import Dataset @@ -1313,6 +1315,70 @@ def job_from_resource(self, resource): return job.QueryJob.from_api_repr(resource, self) return job.UnknownJob.from_api_repr(resource, self) + def create_job(self, job_config, retry=DEFAULT_RETRY): + """Create a new job. + Arguments: + job_config (dict): configuration job representation returned from the API. + + Keyword Arguments: + retry (google.api_core.retry.Retry): + (Optional) How to retry the RPC. + + Returns: + Union[ \ + google.cloud.bigquery.job.LoadJob, \ + google.cloud.bigquery.job.CopyJob, \ + google.cloud.bigquery.job.ExtractJob, \ + google.cloud.bigquery.job.QueryJob \ + ]: + A new job instance. + """ + + if "load" in job_config: + load_job_config = google.cloud.bigquery.job.LoadJobConfig.from_api_repr( + job_config + ) + destination = _get_sub_prop(job_config, ["load", "destinationTable"]) + source_uris = _get_sub_prop(job_config, ["load", "sourceUris"]) + return self.load_table_from_uri( + source_uris, destination, job_config=load_job_config, retry=retry + ) + elif "copy" in job_config: + copy_job_config = google.cloud.bigquery.job.CopyJobConfig.from_api_repr( + job_config + ) + destination = _get_sub_prop(job_config, ["copy", "destinationTable"]) + sources = [] + source_configs = _get_sub_prop(job_config, ["copy", "sourceTables"]) + + if source_configs is None: + source_configs = [_get_sub_prop(job_config, ["copy", "sourceTable"])] + for source_config in source_configs: + table_ref = TableReference.from_api_repr(source_config) + sources.append(table_ref) + return self.copy_table( + sources, destination, job_config=copy_job_config, retry=retry + ) + elif "extract" in job_config: + extract_job_config = google.cloud.bigquery.job.ExtractJobConfig.from_api_repr( + job_config + ) + source = _get_sub_prop(job_config, ["extract", "sourceTable"]) + destination_uris = _get_sub_prop(job_config, ["extract", "destinationUris"]) + return self.extract_table( + source, destination_uris, job_config=extract_job_config, retry=retry + ) + elif "query" in job_config: + copy_config = copy.deepcopy(job_config) + _del_sub_prop(copy_config, ["query", "destinationTable"]) + query_job_config = google.cloud.bigquery.job.QueryJobConfig.from_api_repr( + copy_config + ) + query = _get_sub_prop(copy_config, ["query", "query"]) + return self.query(query, job_config=query_job_config, retry=retry) + else: + raise TypeError("Invalid job configuration received.") + def get_job( self, job_id, project=None, location=None, retry=DEFAULT_RETRY, timeout=None ): diff --git a/packages/google-cloud-bigquery/tests/unit/test_client.py b/packages/google-cloud-bigquery/tests/unit/test_client.py index e4bc6af75ccb..fddfa4b1b8ab 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_client.py +++ b/packages/google-cloud-bigquery/tests/unit/test_client.py @@ -2795,6 +2795,176 @@ def test_delete_table_w_not_found_ok_true(self): conn.api_request.assert_called_with(method="DELETE", path=path, timeout=None) + def _create_job_helper(self, job_config, client_method): + creds = _make_credentials() + http = object() + client = self._make_one(project=self.PROJECT, credentials=creds, _http=http) + + client._connection = make_connection() + rf1 = mock.Mock() + get_config_patch = mock.patch( + "google.cloud.bigquery.job._JobConfig.from_api_repr", return_value=rf1, + ) + load_patch = mock.patch(client_method, autospec=True) + + with load_patch as client_method, get_config_patch: + client.create_job(job_config=job_config) + client_method.assert_called_once() + + def test_create_job_load_config(self): + configuration = { + "load": { + "destinationTable": { + "projectId": self.PROJECT, + "datasetId": self.DS_ID, + "tableId": "source_table", + }, + "sourceUris": ["gs://test_bucket/src_object*"], + } + } + + self._create_job_helper( + configuration, "google.cloud.bigquery.client.Client.load_table_from_uri" + ) + + def test_create_job_copy_config(self): + configuration = { + "copy": { + "sourceTables": [ + { + "projectId": self.PROJECT, + "datasetId": self.DS_ID, + "tableId": "source_table", + } + ], + "destinationTable": { + "projectId": self.PROJECT, + "datasetId": self.DS_ID, + "tableId": "destination_table", + }, + } + } + + self._create_job_helper( + configuration, "google.cloud.bigquery.client.Client.copy_table", + ) + + def test_create_job_copy_config_w_single_source(self): + configuration = { + "copy": { + "sourceTable": { + "projectId": self.PROJECT, + "datasetId": self.DS_ID, + "tableId": "source_table", + }, + "destinationTable": { + "projectId": self.PROJECT, + "datasetId": self.DS_ID, + "tableId": "destination_table", + }, + } + } + + self._create_job_helper( + configuration, "google.cloud.bigquery.client.Client.copy_table", + ) + + def test_create_job_extract_config(self): + configuration = { + "extract": { + "sourceTable": { + "projectId": self.PROJECT, + "datasetId": self.DS_ID, + "tableId": "source_table", + }, + "destinationUris": ["gs://test_bucket/dst_object*"], + } + } + self._create_job_helper( + configuration, "google.cloud.bigquery.client.Client.extract_table", + ) + + def test_create_job_query_config(self): + configuration = { + "query": {"query": "query", "destinationTable": {"tableId": "table_id"}} + } + self._create_job_helper( + configuration, "google.cloud.bigquery.client.Client.query", + ) + + def test_create_job_query_config_w_rateLimitExceeded_error(self): + from google.cloud.exceptions import Forbidden + from google.cloud.bigquery.retry import DEFAULT_RETRY + + query = "select count(*) from persons" + configuration = { + "query": { + "query": query, + "useLegacySql": False, + "destinationTable": {"tableId": "table_id"}, + } + } + resource = { + "jobReference": {"projectId": self.PROJECT, "jobId": mock.ANY}, + "configuration": { + "query": { + "query": query, + "useLegacySql": False, + "destinationTable": { + "projectId": self.PROJECT, + "datasetId": self.DS_ID, + "tableId": "query_destination_table", + }, + } + }, + } + data_without_destination = { + "jobReference": {"projectId": self.PROJECT, "jobId": mock.ANY}, + "configuration": {"query": {"query": query, "useLegacySql": False}}, + } + + creds = _make_credentials() + http = object() + retry = DEFAULT_RETRY.with_deadline(1).with_predicate( + lambda exc: isinstance(exc, Forbidden) + ) + client = self._make_one(project=self.PROJECT, credentials=creds, _http=http) + + api_request_patcher = mock.patch.object( + client._connection, + "api_request", + side_effect=[ + Forbidden("", errors=[{"reason": "rateLimitExceeded"}]), + resource, + ], + ) + + with api_request_patcher as fake_api_request: + job = client.create_job(job_config=configuration, retry=retry) + + self.assertEqual(job.destination.table_id, "query_destination_table") + self.assertEqual(len(fake_api_request.call_args_list), 2) # was retried once + self.assertEqual( + fake_api_request.call_args_list[1], + mock.call( + method="POST", + path="/projects/PROJECT/jobs", + data=data_without_destination, + timeout=None, + ), + ) + + def test_create_job_w_invalid_job_config(self): + configuration = {"unknown": {}} + creds = _make_credentials() + http = object() + client = self._make_one(project=self.PROJECT, credentials=creds, _http=http) + + with self.assertRaises(TypeError) as exc: + client.create_job(job_config=configuration) + + self.assertIn("Invalid job configuration", exc.exception.args[0]) + def test_job_from_resource_unknown_type(self): from google.cloud.bigquery.job import UnknownJob From b87b6219e109a17ec865804fcedb50820055f8c1 Mon Sep 17 00:00:00 2001 From: HemangChothani <50404902+HemangChothani@users.noreply.github.com> Date: Thu, 26 Mar 2020 17:19:40 +0530 Subject: [PATCH 0758/2016] fix(bigquery): fix repr of model reference (#66) --- packages/google-cloud-bigquery/google/cloud/bigquery/model.py | 2 +- packages/google-cloud-bigquery/tests/unit/model/test_model.py | 2 +- .../tests/unit/model/test_model_reference.py | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/model.py b/packages/google-cloud-bigquery/google/cloud/bigquery/model.py index d39ec5f2f60c..a2510e86cf80 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/model.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/model.py @@ -430,6 +430,6 @@ def __hash__(self): return hash(self._key()) def __repr__(self): - return "ModelReference(project='{}', dataset_id='{}', project_id='{}')".format( + return "ModelReference(project_id='{}', dataset_id='{}', model_id='{}')".format( self.project, self.dataset_id, self.model_id ) diff --git a/packages/google-cloud-bigquery/tests/unit/model/test_model.py b/packages/google-cloud-bigquery/tests/unit/model/test_model.py index bbb93ef9e897..90fc09e66ab9 100644 --- a/packages/google-cloud-bigquery/tests/unit/model/test_model.py +++ b/packages/google-cloud-bigquery/tests/unit/model/test_model.py @@ -316,5 +316,5 @@ def test_repr(target_class): got = repr(model) assert got == ( "Model(reference=ModelReference(" - "project='my-proj', dataset_id='my_dset', project_id='my_model'))" + "project_id='my-proj', dataset_id='my_dset', model_id='my_model'))" ) diff --git a/packages/google-cloud-bigquery/tests/unit/model/test_model_reference.py b/packages/google-cloud-bigquery/tests/unit/model/test_model_reference.py index ff1d1df7d499..39dabb55db68 100644 --- a/packages/google-cloud-bigquery/tests/unit/model/test_model_reference.py +++ b/packages/google-cloud-bigquery/tests/unit/model/test_model_reference.py @@ -136,5 +136,5 @@ def test_repr(target_class): got = repr(model) assert ( got - == "ModelReference(project='my-proj', dataset_id='my_dset', project_id='my_model')" + == "ModelReference(project_id='my-proj', dataset_id='my_dset', model_id='my_model')" ) From d5c90dd6da4cf704ceae35e6e031b6f9098d400d Mon Sep 17 00:00:00 2001 From: Peter Lamut Date: Fri, 10 Apr 2020 10:26:08 +0200 Subject: [PATCH 0759/2016] docs: fix warnings occurring in Sphinx 3.0+ (#76) --- packages/google-cloud-bigquery/docs/conf.py | 5 +---- packages/google-cloud-bigquery/docs/gapic/v2/types.rst | 3 ++- 2 files changed, 3 insertions(+), 5 deletions(-) diff --git a/packages/google-cloud-bigquery/docs/conf.py b/packages/google-cloud-bigquery/docs/conf.py index 32456a89cd78..87501ce663df 100644 --- a/packages/google-cloud-bigquery/docs/conf.py +++ b/packages/google-cloud-bigquery/docs/conf.py @@ -38,6 +38,7 @@ "sphinx.ext.napoleon", "sphinx.ext.todo", "sphinx.ext.viewcode", + "recommonmark", ] # autodoc/autosummary flags @@ -49,10 +50,6 @@ # Add any paths that contain templates here, relative to this directory. templates_path = ["_templates"] -# Allow markdown includes (so releases.md can include CHANGLEOG.md) -# http://www.sphinx-doc.org/en/master/markdown.html -source_parsers = {".md": "recommonmark.parser.CommonMarkParser"} - # The suffix(es) of source filenames. # You can specify multiple suffix as a list of string: # source_suffix = ['.rst', '.md'] diff --git a/packages/google-cloud-bigquery/docs/gapic/v2/types.rst b/packages/google-cloud-bigquery/docs/gapic/v2/types.rst index 97938768a690..99b954eca86d 100644 --- a/packages/google-cloud-bigquery/docs/gapic/v2/types.rst +++ b/packages/google-cloud-bigquery/docs/gapic/v2/types.rst @@ -2,4 +2,5 @@ Types for BigQuery API Client ============================= .. automodule:: google.cloud.bigquery_v2.types - :members: \ No newline at end of file + :members: + :noindex: \ No newline at end of file From 9c83172704f4ab55cdc8d18d07ffa4d903320243 Mon Sep 17 00:00:00 2001 From: Peter Lamut Date: Mon, 20 Apr 2020 18:26:18 +0200 Subject: [PATCH 0760/2016] chore: restrict llvmlite version for Python 2.7/3.5 compatibility (#79) --- packages/google-cloud-bigquery/setup.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/setup.py b/packages/google-cloud-bigquery/setup.py index 4519472d323b..7a13476168aa 100644 --- a/packages/google-cloud-bigquery/setup.py +++ b/packages/google-cloud-bigquery/setup.py @@ -50,7 +50,14 @@ "pyarrow>=0.4.1, != 0.14.0" ], "tqdm": ["tqdm >= 4.0.0, <5.0.0dev"], - "fastparquet": ["fastparquet", "python-snappy"], + "fastparquet": [ + "fastparquet", + "python-snappy", + # llvmlite >= 0.32.0 cannot be installed on Python 3.5 and below + # (building the wheel fails), thus needs to be restricted. + # See: https://github.com/googleapis/python-bigquery/issues/78 + "llvmlite <= 0.31.0", + ], } all_extras = [] From 54371fbb796e8bd29c427003c1d25392c4f08a2e Mon Sep 17 00:00:00 2001 From: Vadym Matsishevskyi <25311427+vam-google@users.noreply.github.com> Date: Wed, 22 Apr 2020 01:49:43 -0700 Subject: [PATCH 0761/2016] chore: migrate synth.py from artman to bazel (#80) --- packages/google-cloud-bigquery/synth.py | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/packages/google-cloud-bigquery/synth.py b/packages/google-cloud-bigquery/synth.py index ec487eb39254..d26c614898c3 100644 --- a/packages/google-cloud-bigquery/synth.py +++ b/packages/google-cloud-bigquery/synth.py @@ -17,16 +17,14 @@ import synthtool as s from synthtool import gcp -gapic = gcp.GAPICGenerator() +gapic = gcp.GAPICBazel() common = gcp.CommonTemplates() version = 'v2' library = gapic.py_library( - 'bigquery', - version, - config_path='/google/cloud/bigquery/' - 'artman_bigquery_v2.yaml', - artman_output_name='bigquery-v2', + service='bigquery', + version=version, + bazel_target=f"//google/cloud/bigquery/{version}:bigquery-{version}-py", include_protos=True, ) From f710f1b55803119a02625f550a3856646cd1e811 Mon Sep 17 00:00:00 2001 From: Peter Lamut Date: Wed, 29 Apr 2020 08:39:43 +0200 Subject: [PATCH 0762/2016] chore: assure bqstorage extra installs grpcio (#87) Due to a bug in pip, this does not happen automatically, thus an explicit grpcio pin is needed. --- packages/google-cloud-bigquery/setup.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/packages/google-cloud-bigquery/setup.py b/packages/google-cloud-bigquery/setup.py index 7a13476168aa..f6afaa46e300 100644 --- a/packages/google-cloud-bigquery/setup.py +++ b/packages/google-cloud-bigquery/setup.py @@ -40,6 +40,11 @@ extras = { "bqstorage": [ "google-cloud-bigquery-storage >= 0.6.0, <2.0.0dev", + # Due to an issue in pip's dependency resolver, the `grpc` extra is not + # installed, even though `google-cloud-bigquery-storage` specifies it + # as `google-api-core[grpc]`. We thus need to explicitly specify it here. + # See: https://github.com/googleapis/python-bigquery/issues/83 + "grpcio >= 1.8.2, < 2.0dev", "pyarrow>=0.16.0, < 2.0dev", ], "pandas": ["pandas>=0.17.1"], From bcb2749ea9a49ccfa8958e0dd7a32de40d3571c1 Mon Sep 17 00:00:00 2001 From: Peter Lamut Date: Wed, 29 Apr 2020 09:15:46 +0200 Subject: [PATCH 0763/2016] test: drop per-language version coverage levels (#46) --- packages/google-cloud-bigquery/noxfile.py | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/packages/google-cloud-bigquery/noxfile.py b/packages/google-cloud-bigquery/noxfile.py index 94902187471b..d51d99babf86 100644 --- a/packages/google-cloud-bigquery/noxfile.py +++ b/packages/google-cloud-bigquery/noxfile.py @@ -36,14 +36,10 @@ def default(session): session.install("grpcio") session.install("-e", "test_utils") - coverage_fail_under = "--cov-fail-under=97" - # fastparquet is not included in .[all] because, in general, it's redundant # with pyarrow. We still want to run some unit tests with fastparquet # serialization, though. - dev_install = ".[all,fastparquet]" - - session.install("-e", dev_install) + session.install("-e", ".[all,fastparquet]") # IPython does not support Python 2 after version 5.x if session.python == "2.7": @@ -60,7 +56,7 @@ def default(session): "--cov-append", "--cov-config=.coveragerc", "--cov-report=", - coverage_fail_under, + "--cov-fail-under=0", os.path.join("tests", "unit"), *session.posargs, ) From 3de926eebe4dd240267c813a861bac174ada7935 Mon Sep 17 00:00:00 2001 From: Peter Lamut Date: Thu, 30 Apr 2020 09:55:15 +0200 Subject: [PATCH 0764/2016] chore: remove local test_utils directory (#89) --- packages/google-cloud-bigquery/noxfile.py | 6 +- .../test_utils/credentials.json.enc | 49 ---- .../scripts/circleci/get_tagged_package.py | 64 ----- .../scripts/circleci/twine_upload.sh | 36 --- .../test_utils/scripts/get_target_packages.py | 268 ------------------ .../scripts/get_target_packages_kokoro.py | 98 ------- .../test_utils/scripts/run_emulator.py | 199 ------------- .../test_utils/scripts/update_docs.sh | 93 ------ .../google-cloud-bigquery/test_utils/setup.py | 64 ----- .../test_utils/test_utils/__init__.py | 0 .../test_utils/test_utils/imports.py | 38 --- .../test_utils/test_utils/retry.py | 207 -------------- .../test_utils/test_utils/system.py | 81 ------ .../test_utils/test_utils/vpcsc_config.py | 118 -------- 14 files changed, 3 insertions(+), 1318 deletions(-) delete mode 100644 packages/google-cloud-bigquery/test_utils/credentials.json.enc delete mode 100644 packages/google-cloud-bigquery/test_utils/scripts/circleci/get_tagged_package.py delete mode 100755 packages/google-cloud-bigquery/test_utils/scripts/circleci/twine_upload.sh delete mode 100644 packages/google-cloud-bigquery/test_utils/scripts/get_target_packages.py delete mode 100644 packages/google-cloud-bigquery/test_utils/scripts/get_target_packages_kokoro.py delete mode 100644 packages/google-cloud-bigquery/test_utils/scripts/run_emulator.py delete mode 100755 packages/google-cloud-bigquery/test_utils/scripts/update_docs.sh delete mode 100644 packages/google-cloud-bigquery/test_utils/setup.py delete mode 100644 packages/google-cloud-bigquery/test_utils/test_utils/__init__.py delete mode 100644 packages/google-cloud-bigquery/test_utils/test_utils/imports.py delete mode 100644 packages/google-cloud-bigquery/test_utils/test_utils/retry.py delete mode 100644 packages/google-cloud-bigquery/test_utils/test_utils/system.py delete mode 100644 packages/google-cloud-bigquery/test_utils/test_utils/vpcsc_config.py diff --git a/packages/google-cloud-bigquery/noxfile.py b/packages/google-cloud-bigquery/noxfile.py index d51d99babf86..ba4adbfd259a 100644 --- a/packages/google-cloud-bigquery/noxfile.py +++ b/packages/google-cloud-bigquery/noxfile.py @@ -34,7 +34,7 @@ def default(session): # Install all test dependencies, then install local packages in-place. session.install("mock", "pytest", "pytest-cov", "freezegun") session.install("grpcio") - session.install("-e", "test_utils") + session.install("git+https://github.com/googleapis/python-test-utils") # fastparquet is not included in .[all] because, in general, it's redundant # with pyarrow. We still want to run some unit tests with fastparquet @@ -83,7 +83,7 @@ def system(session): session.install("mock", "pytest", "psutil") session.install("google-cloud-storage") session.install("fastavro") - session.install("-e", "test_utils") + session.install("git+https://github.com/googleapis/python-test-utils") session.install("-e", ".[all]") # IPython does not support Python 2 after version 5.x @@ -110,7 +110,7 @@ def snippets(session): session.install("mock", "pytest") session.install("google-cloud-storage") session.install("grpcio") - session.install("-e", "test_utils") + session.install("git+https://github.com/googleapis/python-test-utils") session.install("-e", ".[all]") # Run py.test against the snippets tests. diff --git a/packages/google-cloud-bigquery/test_utils/credentials.json.enc b/packages/google-cloud-bigquery/test_utils/credentials.json.enc deleted file mode 100644 index f073c7e4f774..000000000000 --- a/packages/google-cloud-bigquery/test_utils/credentials.json.enc +++ /dev/null @@ -1,49 +0,0 @@ -U2FsdGVkX1/vVm/dOEg1DCACYbdOcL+ey6+64A+DZGZVgF8Z/3skK6rpPocu6GOA -UZAqASsBH9QifDf8cKVXQXVYpYq6HSv2O0w7vOmVorZO9GYPo98s9/8XO+4ty/AU -aB6TD68frBAYv4cT/l5m7aYdzfzMTy0EOXoleZT09JYP3B5FV3KCO114FzMXGwrj -HXsR6E5SyUUlUnWPC3eD3aqmovay0gxOKYO3ZwjFK1nlbN/8q6/8nwBCf/Bg6SHV -V93pNxdolRlJev9kgKz4RN1z4jGCy5PAndhSLE82NFIs9LoAiEOU5YeMlN+Ulqus -J92nh+ptUe9a4pJGbAuveUWO7zdS1QyXvTMUcmmSfXCNm/eIQjNuu5+rHtIjWKh8 -Ilwj2w1aTfSptQEhk/kwRgFz/d11vfwJzvwTmCxO6zyOeL0VUWLqdCBGgG5As9He -/RenF8PZ1O0WbTt7fns5oTlTk/MUo+0xJ1xqvu/y45LaqqcBAnEdrWKmtM3dJHWv -ufQku+kD+83F/VwBnQdvgMHu6KZEs6LRrNo58r4QuK6fS7VCACdzxID1RM2cL7kT -6BFRlyGj1aigmjne9g9M9Jx4R+mZDpPU1WDzzG71J4qCUwaX8Dfwutuv4uiFvzwq -NUF0wLJJPtKWmtW+hnZ/fhHQGCRsOpZzFnqp6Zv7J7k6esqxMgIjfal7Djk5Acy8 -j3iVvm6CYmKMVqzL62JHYS9Ye83tzBCaR8hpnJQKgH3FSOFY8HSwrtQSIsl/hSeF -41sgnz0Y+/gkzNeU18qFk+eCZmvljyu+JK0nPYUgpOCJYVBNQpNHz5PUyiAEKhtM -IOSdjPRW1Y+Xf4RroJnLPoF24Ijwrow5LCm9hBRY6TPPMMmnIXCd23xcLJ1rMj6g -x4ZikElans+cwuc9wtbb7w01DcpTwQ1+eIV1qV+KIgpnLjRGLhZD4etobBsrwYu/ -vnIwy2QHCKENPb8sbdgp7x2mF7VSX0/7tf+9+i70EBiMzpOKBkiZhtLzm6hOBkEy -ODaWrx4lTTwbSw8Rmtf58APhPFMsjHoNsjiUoK249Y8Y2Ff4fMfqYsXu6VC1n/At -CuWYHc3EfBwFcLJS+RQB9kFk/4FygFBWq4Kj0MqoRruLbKmoGeJKH9q35W0f0NCD -j+iHt3014kMGiuyJe1UDQ6fvEihFFdHuDivFpPAXDt4PTY/WtpDhaGMx23kb54pK -jkAuxpznAB1lK3u9bGRXDasGeHIrNtIlPvgkrWHXvoBVqM7zry8TGtoxp3E3I42Z -cUfDWfB9GqVdrOwvrTzyZsl2uShRkAJaZFZj5aMyYxiptp4gM8CwWiNtOd2EwtRO -LxZX4M02PQFIqXV3FSDA0q6EwglUrTZdAlYeOEkopaKCtG31dEPOSQG3NGJAEYso -Cxm99H7970dp0OAgpNSgRbcWDbhVbQXnRzvFGqLeH6a9dQ/a8uD3s8Qm9Du/kB6d -XxTRe2OGxzcD0AgI8GClE4rIZHCLbcwuJRp0EYcN+pgY80O4U98fZ5RYpU6OYbU/ -MEiaBYFKtZtGkV6AQD568V7hHJWqc5DDfVHUQ/aeQwnKi2vnU66u+nnV2rZxXxLP -+dqeLRpul+wKa5b/Z5SfQ14Ff8s7aVyxaogGpyggyPL1vyq4KWZ6Or/wEE5hgNO4 -kBh6ht0QT1Hti8XY2JK1M+Jgbjgcg4jkHBGVqegrG1Rvcc2A4TYKwx+QMSBhyxrU -5qhROjS4lTcC42hQslMUkUwc4U/Y91XdFbOOnaAkwzI36NRYL0pmgZnYxGJZeRvr -E5foOhnOEVSFGdOkLfFh+FkWZQf56Lmn8Gg2wHE3dZTxLHibiUYfkgOr1uEosq29 -D1NstvlJURPQ0Q+8QQNWcl9nEZHMAjOmnL1hbx+QfuC6seucp+sXGzdZByMLZbvT -tG8KNL293CmyQowgf9MXToWYnwRkcvqfTaKyor2Ggze3JtoFW4t0j4DI1XPciZFX -XmfApHrzdB/bZadzxyaZ2NE0CuH9zDelwI6rz38xsN5liYnp5qmNKVCZVOHccXa6 -J8x365m5/VaaA2RrtdPqKxn8VaKy7+T690QgMXVGM4PbzQzQxHuSleklocqlP+sB -jSMXCZY+ng/i4UmRO9noiyW3UThYh0hIdMYs12EmmI9cnF/OuYZpl30fmqwV+VNM -td5B2fYvAvvsjiX60SFCn3DATP1GrPMBlZSmhhP3GYS+xrWt3Xxta9qIX2BEF1Gg -twnZZRjoULSRFUYPfJPEOfEH2UQwm84wxx/GezVE+S/RpBlatPOgCiLnNNaLfdTC -mTG9qY9elJv3GGQO8Lqgf4i8blExs05lSPk1BDhzTB6H9TLz+Ge0/l1QxKf3gPXU -aImK1azieXMXHECkdKxrzmehwu1dZ/oYOLc/OFQCETwSRoLPFOFpYUpizwmVVHR6 -uLSfRptte4ZOU3zHfpd/0+J4tkwHwEkGzsmMdqudlm7qME6upuIplyVBH8JiXzUK -n1RIH/OPmVEluAnexWRLZNdk7MrakIO4XACVbICENiYQgAIErP568An6twWEGDbZ -bEN64E3cVDTDRPRAunIhhsEaapcxpFEPWlHorxv36nMUt0R0h0bJlCu5QdzckfcX -ZrRuu1kl76ZfbSE8T0G4/rBb9gsU4Gn3WyvLIO3MgFBuxR68ZwcR8LpEUd8qp38H -NG4cxPmN1nGKo663Z+xI2Gt5up4gpl+fOt4mXqxY386rB7yHaOfElMG5TUYdrS9w -1xbbCVgeJ6zxX+NFlndG33cSAPprhw+C18eUu6ZU63WZcYFo3GfK6rs3lvYtofvE -8DxztdTidQedNVNE+63YCjhxd/cZUI5n/UpgYkr9owp7hNGJiR3tdoNLR2gcoGqL -qWhH928k2aSgF2j97LZ2OqoPCp0tUB7ho4jD2u4Ik3GLVNlCc3dCvWRvpHtDTQDv -tujESMfHUc9I2r4S/PD3bku/ABGwa977Yp1PjzJGr9RajA5is5n6GVpyynwjtKG4 -iyyITpdwpCgr8pueTBLwZnas3slmiMOog/E4PmPgctHzvC+vhQijhUtw5zSsmv0l -bZlw/mVhp5Ta7dTcLBKR8DA3m3vTbaEGkz0xpfQr7GfiSMRbJyvIw88pDK0gyTMD diff --git a/packages/google-cloud-bigquery/test_utils/scripts/circleci/get_tagged_package.py b/packages/google-cloud-bigquery/test_utils/scripts/circleci/get_tagged_package.py deleted file mode 100644 index c148b9dc2370..000000000000 --- a/packages/google-cloud-bigquery/test_utils/scripts/circleci/get_tagged_package.py +++ /dev/null @@ -1,64 +0,0 @@ -# Copyright 2016 Google LLC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -"""Helper to determine package from tag. -Get the current package directory corresponding to the Circle Tag. -""" - -from __future__ import print_function - -import os -import re -import sys - - -TAG_RE = re.compile(r""" - ^ - (?P - (([a-z]+)[_-])*) # pkg-name-with-hyphens-or-underscores (empty allowed) - ([0-9]+)\.([0-9]+)\.([0-9]+) # Version x.y.z (x, y, z all ints) - $ -""", re.VERBOSE) -TAG_ENV = 'CIRCLE_TAG' -ERROR_MSG = '%s env. var. not set' % (TAG_ENV,) -BAD_TAG_MSG = 'Invalid tag name: %s. Expected pkg-name-x.y.z' -CIRCLE_CI_SCRIPTS_DIR = os.path.dirname(__file__) -ROOT_DIR = os.path.realpath( - os.path.join(CIRCLE_CI_SCRIPTS_DIR, '..', '..', '..')) - - -def main(): - """Get the current package directory. - Prints the package directory out so callers can consume it. - """ - if TAG_ENV not in os.environ: - print(ERROR_MSG, file=sys.stderr) - sys.exit(1) - - tag_name = os.environ[TAG_ENV] - match = TAG_RE.match(tag_name) - if match is None: - print(BAD_TAG_MSG % (tag_name,), file=sys.stderr) - sys.exit(1) - - pkg_name = match.group('pkg') - if pkg_name is None: - print(ROOT_DIR) - else: - pkg_dir = pkg_name.rstrip('-').replace('-', '_') - print(os.path.join(ROOT_DIR, pkg_dir)) - - -if __name__ == '__main__': - main() diff --git a/packages/google-cloud-bigquery/test_utils/scripts/circleci/twine_upload.sh b/packages/google-cloud-bigquery/test_utils/scripts/circleci/twine_upload.sh deleted file mode 100755 index 23a4738e90b9..000000000000 --- a/packages/google-cloud-bigquery/test_utils/scripts/circleci/twine_upload.sh +++ /dev/null @@ -1,36 +0,0 @@ -#!/bin/bash - -# Copyright 2016 Google LLC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -set -ev - -# If this is not a CircleCI tag, no-op. -if [[ -z "$CIRCLE_TAG" ]]; then - echo "This is not a release tag. Doing nothing." - exit 0 -fi - -# H/T: http://stackoverflow.com/a/246128/1068170 -SCRIPT="$(dirname "${BASH_SOURCE[0]}")/get_tagged_package.py" -# Determine the package directory being deploying on this tag. -PKG_DIR="$(python ${SCRIPT})" - -# Ensure that we have the latest versions of Twine, Wheel, and Setuptools. -python3 -m pip install --upgrade twine wheel setuptools - -# Move into the package, build the distribution and upload. -cd ${PKG_DIR} -python3 setup.py sdist bdist_wheel -twine upload dist/* diff --git a/packages/google-cloud-bigquery/test_utils/scripts/get_target_packages.py b/packages/google-cloud-bigquery/test_utils/scripts/get_target_packages.py deleted file mode 100644 index 1d51830cc23a..000000000000 --- a/packages/google-cloud-bigquery/test_utils/scripts/get_target_packages.py +++ /dev/null @@ -1,268 +0,0 @@ -# Copyright 2017 Google LLC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -"""Print a list of packages which require testing.""" - -import os -import re -import subprocess -import warnings - - -CURRENT_DIR = os.path.realpath(os.path.dirname(__file__)) -BASE_DIR = os.path.realpath(os.path.join(CURRENT_DIR, '..', '..')) -GITHUB_REPO = os.environ.get('GITHUB_REPO', 'google-cloud-python') -CI = os.environ.get('CI', '') -CI_BRANCH = os.environ.get('CIRCLE_BRANCH') -CI_PR = os.environ.get('CIRCLE_PR_NUMBER') -CIRCLE_TAG = os.environ.get('CIRCLE_TAG') -head_hash, head_name = subprocess.check_output(['git', 'show-ref', 'HEAD'] -).strip().decode('ascii').split() -rev_parse = subprocess.check_output( - ['git', 'rev-parse', '--abbrev-ref', 'HEAD'] -).strip().decode('ascii') -MAJOR_DIV = '#' * 78 -MINOR_DIV = '#' + '-' * 77 - -# NOTE: This reg-ex is copied from ``get_tagged_packages``. -TAG_RE = re.compile(r""" - ^ - (?P - (([a-z]+)-)*) # pkg-name-with-hyphens- (empty allowed) - ([0-9]+)\.([0-9]+)\.([0-9]+) # Version x.y.z (x, y, z all ints) - $ -""", re.VERBOSE) - -# This is the current set of dependencies by package. -# As of this writing, the only "real" dependency is that of error_reporting -# (on logging), the rest are just system test dependencies. -PKG_DEPENDENCIES = { - 'logging': {'pubsub'}, -} - - -def get_baseline(): - """Return the baseline commit. - - On a pull request, or on a branch, return the common parent revision - with the master branch. - - Locally, return a value pulled from environment variables, or None if - the environment variables are not set. - - On a push to master, return None. This will effectively cause everything - to be considered to be affected. - """ - - # If this is a pull request or branch, return the tip for master. - # We will test only packages which have changed since that point. - ci_non_master = (CI == 'true') and any([CI_BRANCH != 'master', CI_PR]) - - if ci_non_master: - - repo_url = 'git@github.com:GoogleCloudPlatform/{}'.format(GITHUB_REPO) - subprocess.run(['git', 'remote', 'add', 'baseline', repo_url], - stderr=subprocess.DEVNULL) - subprocess.run(['git', 'pull', 'baseline'], stderr=subprocess.DEVNULL) - - if CI_PR is None and CI_BRANCH is not None: - output = subprocess.check_output([ - 'git', 'merge-base', '--fork-point', - 'baseline/master', CI_BRANCH]) - return output.strip().decode('ascii') - - return 'baseline/master' - - # If environment variables are set identifying what the master tip is, - # use that. - if os.environ.get('GOOGLE_CLOUD_TESTING_REMOTE', ''): - remote = os.environ['GOOGLE_CLOUD_TESTING_REMOTE'] - branch = os.environ.get('GOOGLE_CLOUD_TESTING_BRANCH', 'master') - return '%s/%s' % (remote, branch) - - # If we are not in CI and we got this far, issue a warning. - if not CI: - warnings.warn('No baseline could be determined; this means tests ' - 'will run for every package. If this is local ' - 'development, set the $GOOGLE_CLOUD_TESTING_REMOTE ' - 'environment variable.') - - # That is all we can do; return None. - return None - - -def get_changed_files(): - """Return a list of files that have been changed since the baseline. - - If there is no base, return None. - """ - # Get the baseline, and fail quickly if there is no baseline. - baseline = get_baseline() - print('# Baseline commit: {}'.format(baseline)) - if not baseline: - return None - - # Return a list of altered files. - try: - return subprocess.check_output([ - 'git', 'diff', '--name-only', '{}..HEAD'.format(baseline), - ], stderr=subprocess.DEVNULL).decode('utf8').strip().split('\n') - except subprocess.CalledProcessError: - warnings.warn('Unable to perform git diff; falling back to assuming ' - 'all packages have changed.') - return None - - -def reverse_map(dict_of_sets): - """Reverse a map of one-to-many. - - So the map:: - - { - 'A': {'B', 'C'}, - 'B': {'C'}, - } - - becomes - - { - 'B': {'A'}, - 'C': {'A', 'B'}, - } - - Args: - dict_of_sets (dict[set]): A dictionary of sets, mapping - one value to many. - - Returns: - dict[set]: The reversed map. - """ - result = {} - for key, values in dict_of_sets.items(): - for value in values: - result.setdefault(value, set()).add(key) - - return result - -def get_changed_packages(file_list): - """Return a list of changed packages based on the provided file list. - - If the file list is None, then all packages should be considered to be - altered. - """ - # Determine a complete list of packages. - all_packages = set() - for file_ in os.listdir(BASE_DIR): - abs_file = os.path.realpath(os.path.join(BASE_DIR, file_)) - nox_file = os.path.join(abs_file, 'nox.py') - if os.path.isdir(abs_file) and os.path.isfile(nox_file): - all_packages.add(file_) - - # If ther is no file list, send down the full package set. - if file_list is None: - return all_packages - - # Create a set based on the list of changed files. - answer = set() - reverse_deps = reverse_map(PKG_DEPENDENCIES) - for file_ in file_list: - # Ignore root directory changes (setup.py, .gitignore, etc.). - if os.path.sep not in file_: - continue - - # Ignore changes that are not in a package (usually this will be docs). - package = file_.split(os.path.sep, 1)[0] - if package not in all_packages: - continue - - # If there is a change in core, short-circuit now and return - # everything. - if package in ('core',): - return all_packages - - # Add the package, as well as any dependencies this package has. - # NOTE: For now, dependencies only go down one level. - answer.add(package) - answer = answer.union(reverse_deps.get(package, set())) - - # We got this far without being short-circuited; return the final answer. - return answer - - -def get_tagged_package(): - """Return the package corresponding to the current tag. - - If there is not tag, will return :data:`None`. - """ - if CIRCLE_TAG is None: - return - - match = TAG_RE.match(CIRCLE_TAG) - if match is None: - return - - pkg_name = match.group('pkg') - if pkg_name == '': - # NOTE: This corresponds to the "umbrella" tag. - return - - return pkg_name.rstrip('-').replace('-', '_') - - -def get_target_packages(): - """Return a list of target packages to be run in the current build. - - If in a tag build, will run only the package(s) that are tagged, otherwise - will run the packages that have file changes in them (or packages that - depend on those). - """ - tagged_package = get_tagged_package() - if tagged_package is None: - file_list = get_changed_files() - print(MAJOR_DIV) - print('# Changed files:') - print(MINOR_DIV) - for file_ in file_list or (): - print('# {}'.format(file_)) - for package in sorted(get_changed_packages(file_list)): - yield package - else: - yield tagged_package - - -def main(): - print(MAJOR_DIV) - print('# Environment') - print(MINOR_DIV) - print('# CircleCI: {}'.format(CI)) - print('# CircleCI branch: {}'.format(CI_BRANCH)) - print('# CircleCI pr: {}'.format(CI_PR)) - print('# CircleCI tag: {}'.format(CIRCLE_TAG)) - print('# HEAD ref: {}'.format(head_hash)) - print('# {}'.format(head_name)) - print('# Git branch: {}'.format(rev_parse)) - print(MAJOR_DIV) - - packages = list(get_target_packages()) - - print(MAJOR_DIV) - print('# Target packages:') - print(MINOR_DIV) - for package in packages: - print(package) - print(MAJOR_DIV) - - -if __name__ == '__main__': - main() diff --git a/packages/google-cloud-bigquery/test_utils/scripts/get_target_packages_kokoro.py b/packages/google-cloud-bigquery/test_utils/scripts/get_target_packages_kokoro.py deleted file mode 100644 index 27d3a0c940ea..000000000000 --- a/packages/google-cloud-bigquery/test_utils/scripts/get_target_packages_kokoro.py +++ /dev/null @@ -1,98 +0,0 @@ -# Copyright 2017 Google LLC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -"""Print a list of packages which require testing.""" - -import pathlib -import subprocess - -import ci_diff_helper -import requests - - -def print_environment(environment): - print("-> CI environment:") - print('Branch', environment.branch) - print('PR', environment.pr) - print('In PR', environment.in_pr) - print('Repo URL', environment.repo_url) - if environment.in_pr: - print('PR Base', environment.base) - - -def get_base(environment): - if environment.in_pr: - return environment.base - else: - # If we're not in a PR, just calculate the changes between this commit - # and its parent. - return 'HEAD~1' - - -def get_changed_files_from_base(base): - return subprocess.check_output([ - 'git', 'diff', '--name-only', f'{base}..HEAD', - ], stderr=subprocess.DEVNULL).decode('utf8').strip().split('\n') - - -_URL_TEMPLATE = ( - 'https://api.github.com/repos/googleapis/google-cloud-python/pulls/' - '{}/files' -) - - -def get_changed_files_from_pr(pr): - url = _URL_TEMPLATE.format(pr) - while url is not None: - response = requests.get(url) - for info in response.json(): - yield info['filename'] - url = response.links.get('next', {}).get('url') - - -def determine_changed_packages(changed_files): - packages = [ - path.parent for path in pathlib.Path('.').glob('*/noxfile.py') - ] - - changed_packages = set() - for file in changed_files: - file = pathlib.Path(file) - for package in packages: - if package in file.parents: - changed_packages.add(package) - - return changed_packages - - -def main(): - environment = ci_diff_helper.get_config() - print_environment(environment) - base = get_base(environment) - - if environment.in_pr: - changed_files = list(get_changed_files_from_pr(environment.pr)) - else: - changed_files = get_changed_files_from_base(base) - - packages = determine_changed_packages(changed_files) - - print(f"Comparing against {base}.") - print("-> Changed packages:") - - for package in packages: - print(package) - - -main() diff --git a/packages/google-cloud-bigquery/test_utils/scripts/run_emulator.py b/packages/google-cloud-bigquery/test_utils/scripts/run_emulator.py deleted file mode 100644 index 287b08640691..000000000000 --- a/packages/google-cloud-bigquery/test_utils/scripts/run_emulator.py +++ /dev/null @@ -1,199 +0,0 @@ -# Copyright 2016 Google LLC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -"""Run system tests locally with the emulator. - -First makes system calls to spawn the emulator and get the local environment -variable needed for it. Then calls the system tests. -""" - - -import argparse -import os -import subprocess - -import psutil - -from google.cloud.environment_vars import BIGTABLE_EMULATOR -from google.cloud.environment_vars import GCD_DATASET -from google.cloud.environment_vars import GCD_HOST -from google.cloud.environment_vars import PUBSUB_EMULATOR -from run_system_test import run_module_tests - - -BIGTABLE = 'bigtable' -DATASTORE = 'datastore' -PUBSUB = 'pubsub' -PACKAGE_INFO = { - BIGTABLE: (BIGTABLE_EMULATOR,), - DATASTORE: (GCD_DATASET, GCD_HOST), - PUBSUB: (PUBSUB_EMULATOR,), -} -EXTRA = { - DATASTORE: ('--no-legacy',), -} -_DS_READY_LINE = '[datastore] Dev App Server is now running.\n' -_PS_READY_LINE_PREFIX = '[pubsub] INFO: Server started, listening on ' -_BT_READY_LINE_PREFIX = '[bigtable] Cloud Bigtable emulator running on ' - - -def get_parser(): - """Get simple ``argparse`` parser to determine package. - - :rtype: :class:`argparse.ArgumentParser` - :returns: The parser for this script. - """ - parser = argparse.ArgumentParser( - description='Run google-cloud system tests against local emulator.') - parser.add_argument('--package', dest='package', - choices=sorted(PACKAGE_INFO.keys()), - default=DATASTORE, help='Package to be tested.') - return parser - - -def get_start_command(package): - """Get command line arguments for starting emulator. - - :type package: str - :param package: The package to start an emulator for. - - :rtype: tuple - :returns: The arguments to be used, in a tuple. - """ - result = ('gcloud', 'beta', 'emulators', package, 'start') - extra = EXTRA.get(package, ()) - return result + extra - - -def get_env_init_command(package): - """Get command line arguments for getting emulator env. info. - - :type package: str - :param package: The package to get environment info for. - - :rtype: tuple - :returns: The arguments to be used, in a tuple. - """ - result = ('gcloud', 'beta', 'emulators', package, 'env-init') - extra = EXTRA.get(package, ()) - return result + extra - - -def datastore_wait_ready(popen): - """Wait until the datastore emulator is ready to use. - - :type popen: :class:`subprocess.Popen` - :param popen: An open subprocess to interact with. - """ - emulator_ready = False - while not emulator_ready: - emulator_ready = popen.stderr.readline() == _DS_READY_LINE - - -def wait_ready_prefix(popen, prefix): - """Wait until the a process encounters a line with matching prefix. - - :type popen: :class:`subprocess.Popen` - :param popen: An open subprocess to interact with. - - :type prefix: str - :param prefix: The prefix to match - """ - emulator_ready = False - while not emulator_ready: - emulator_ready = popen.stderr.readline().startswith(prefix) - - -def wait_ready(package, popen): - """Wait until the emulator is ready to use. - - :type package: str - :param package: The package to check if ready. - - :type popen: :class:`subprocess.Popen` - :param popen: An open subprocess to interact with. - - :raises: :class:`KeyError` if the ``package`` is not among - ``datastore``, ``pubsub`` or ``bigtable``. - """ - if package == DATASTORE: - datastore_wait_ready(popen) - elif package == PUBSUB: - wait_ready_prefix(popen, _PS_READY_LINE_PREFIX) - elif package == BIGTABLE: - wait_ready_prefix(popen, _BT_READY_LINE_PREFIX) - else: - raise KeyError('Package not supported', package) - - -def cleanup(pid): - """Cleanup a process (including all of its children). - - :type pid: int - :param pid: Process ID. - """ - proc = psutil.Process(pid) - for child_proc in proc.children(recursive=True): - try: - child_proc.kill() - child_proc.terminate() - except psutil.NoSuchProcess: - pass - proc.terminate() - proc.kill() - - -def run_tests_in_emulator(package): - """Spawn an emulator instance and run the system tests. - - :type package: str - :param package: The package to run system tests against. - """ - # Make sure this package has environment vars to replace. - env_vars = PACKAGE_INFO[package] - - start_command = get_start_command(package) - # Ignore stdin and stdout, don't pollute the user's output with them. - proc_start = subprocess.Popen(start_command, stdout=subprocess.PIPE, - stderr=subprocess.PIPE) - try: - wait_ready(package, proc_start) - env_init_command = get_env_init_command(package) - proc_env = subprocess.Popen(env_init_command, stdout=subprocess.PIPE, - stderr=subprocess.PIPE) - env_status = proc_env.wait() - if env_status != 0: - raise RuntimeError(env_status, proc_env.stderr.read()) - env_lines = proc_env.stdout.read().strip().split('\n') - # Set environment variables before running the system tests. - for env_var in env_vars: - line_prefix = 'export ' + env_var + '=' - value, = [line.split(line_prefix, 1)[1] for line in env_lines - if line.startswith(line_prefix)] - os.environ[env_var] = value - run_module_tests(package, - ignore_requirements=True) - finally: - cleanup(proc_start.pid) - - -def main(): - """Main method to run this script.""" - parser = get_parser() - args = parser.parse_args() - run_tests_in_emulator(args.package) - - -if __name__ == '__main__': - main() diff --git a/packages/google-cloud-bigquery/test_utils/scripts/update_docs.sh b/packages/google-cloud-bigquery/test_utils/scripts/update_docs.sh deleted file mode 100755 index 8cbab9f0dad0..000000000000 --- a/packages/google-cloud-bigquery/test_utils/scripts/update_docs.sh +++ /dev/null @@ -1,93 +0,0 @@ -#!/bin/bash - -# Copyright 2016 Google LLC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -set -ev - -GH_OWNER='GoogleCloudPlatform' -GH_PROJECT_NAME='google-cloud-python' - -DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" - -# Function to build the docs. -function build_docs { - rm -rf docs/_build/ - rm -f docs/bigquery/generated/*.rst - # -W -> warnings as errors - # -T -> show full traceback on exception - # -N -> no color - sphinx-build \ - -W -T -N \ - -b html \ - -d docs/_build/doctrees \ - docs/ \ - docs/_build/html/ - return $? -} - -# Only update docs if we are on CircleCI. -if [[ "${CIRCLE_BRANCH}" == "master" ]] && [[ -z "${CIRCLE_PR_NUMBER}" ]]; then - echo "Building new docs on a merged commit." -elif [[ "$1" == "kokoro" ]]; then - echo "Building and publishing docs on Kokoro." -elif [[ -n "${CIRCLE_TAG}" ]]; then - echo "Building new docs on a tag (but will not deploy)." - build_docs - exit $? -else - echo "Not on master nor a release tag." - echo "Building new docs for testing purposes, but not deploying." - build_docs - exit $? -fi - -# Adding GitHub pages branch. `git submodule add` checks it -# out at HEAD. -GH_PAGES_DIR='ghpages' -git submodule add -q -b gh-pages \ - "git@github.com:${GH_OWNER}/${GH_PROJECT_NAME}" ${GH_PAGES_DIR} - -# Determine if we are building a new tag or are building docs -# for master. Then build new docs in docs/_build from master. -if [[ -n "${CIRCLE_TAG}" ]]; then - # Sphinx will use the package version by default. - build_docs -else - SPHINX_RELEASE=$(git log -1 --pretty=%h) build_docs -fi - -# Update gh-pages with the created docs. -cd ${GH_PAGES_DIR} -git rm -fr latest/ -cp -R ../docs/_build/html/ latest/ - -# Update the files push to gh-pages. -git add . -git status - -# If there are no changes, just exit cleanly. -if [[ -z "$(git status --porcelain)" ]]; then - echo "Nothing to commit. Exiting without pushing changes." - exit -fi - -# Commit to gh-pages branch to apply changes. -git config --global user.email "dpebot@google.com" -git config --global user.name "dpebot" -git commit -m "Update docs after merge to master." - -# NOTE: This may fail if two docs updates (on merges to master) -# happen in close proximity. -git push -q origin HEAD:gh-pages diff --git a/packages/google-cloud-bigquery/test_utils/setup.py b/packages/google-cloud-bigquery/test_utils/setup.py deleted file mode 100644 index 8e9222a7f862..000000000000 --- a/packages/google-cloud-bigquery/test_utils/setup.py +++ /dev/null @@ -1,64 +0,0 @@ -# Copyright 2017 Google LLC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import os - -from setuptools import find_packages -from setuptools import setup - - -PACKAGE_ROOT = os.path.abspath(os.path.dirname(__file__)) - - -# NOTE: This is duplicated throughout and we should try to -# consolidate. -SETUP_BASE = { - 'author': 'Google Cloud Platform', - 'author_email': 'googleapis-publisher@google.com', - 'scripts': [], - 'url': 'https://github.com/GoogleCloudPlatform/google-cloud-python', - 'license': 'Apache 2.0', - 'platforms': 'Posix; MacOS X; Windows', - 'include_package_data': True, - 'zip_safe': False, - 'classifiers': [ - 'Development Status :: 4 - Beta', - 'Intended Audience :: Developers', - 'License :: OSI Approved :: Apache Software License', - 'Operating System :: OS Independent', - 'Programming Language :: Python :: 2', - 'Programming Language :: Python :: 2.7', - 'Programming Language :: Python :: 3', - 'Programming Language :: Python :: 3.5', - 'Programming Language :: Python :: 3.6', - 'Programming Language :: Python :: 3.7', - 'Topic :: Internet', - ], -} - - -REQUIREMENTS = [ - 'google-auth >= 0.4.0', - 'six', -] - -setup( - name='google-cloud-testutils', - version='0.24.0', - description='System test utilities for google-cloud-python', - packages=find_packages(), - install_requires=REQUIREMENTS, - python_requires='>=2.7,!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*', - **SETUP_BASE -) diff --git a/packages/google-cloud-bigquery/test_utils/test_utils/__init__.py b/packages/google-cloud-bigquery/test_utils/test_utils/__init__.py deleted file mode 100644 index e69de29bb2d1..000000000000 diff --git a/packages/google-cloud-bigquery/test_utils/test_utils/imports.py b/packages/google-cloud-bigquery/test_utils/test_utils/imports.py deleted file mode 100644 index 5991af7fc465..000000000000 --- a/packages/google-cloud-bigquery/test_utils/test_utils/imports.py +++ /dev/null @@ -1,38 +0,0 @@ -# Copyright 2019 Google LLC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import mock -import six - - -def maybe_fail_import(predicate): - """Create and return a patcher that conditionally makes an import fail. - - Args: - predicate (Callable[[...], bool]): A callable that, if it returns `True`, - triggers an `ImportError`. It must accept the same arguments as the - built-in `__import__` function. - https://docs.python.org/3/library/functions.html#__import__ - - Returns: - A mock patcher object that can be used to enable patched import behavior. - """ - orig_import = six.moves.builtins.__import__ - - def custom_import(name, globals=None, locals=None, fromlist=(), level=0): - if predicate(name, globals, locals, fromlist, level): - raise ImportError - return orig_import(name, globals, locals, fromlist, level) - - return mock.patch.object(six.moves.builtins, "__import__", new=custom_import) diff --git a/packages/google-cloud-bigquery/test_utils/test_utils/retry.py b/packages/google-cloud-bigquery/test_utils/test_utils/retry.py deleted file mode 100644 index e61c001a03e1..000000000000 --- a/packages/google-cloud-bigquery/test_utils/test_utils/retry.py +++ /dev/null @@ -1,207 +0,0 @@ -# Copyright 2016 Google LLC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import time -from functools import wraps - -import six - -MAX_TRIES = 4 -DELAY = 1 -BACKOFF = 2 - - -def _retry_all(_): - """Retry all caught exceptions.""" - return True - - -class BackoffFailed(Exception): - """Retry w/ backoffs did not complete successfully.""" - - -class RetryBase(object): - """Base for retrying calling a decorated function w/ exponential backoff. - - :type max_tries: int - :param max_tries: Number of times to try (not retry) before giving up. - - :type delay: int - :param delay: Initial delay between retries in seconds. - - :type backoff: int - :param backoff: Backoff multiplier e.g. value of 2 will double the - delay each retry. - - :type logger: logging.Logger instance - :param logger: Logger to use. If None, print. - """ - def __init__(self, max_tries=MAX_TRIES, delay=DELAY, backoff=BACKOFF, - logger=None): - self.max_tries = max_tries - self.delay = delay - self.backoff = backoff - self.logger = logger.warning if logger else six.print_ - - -class RetryErrors(RetryBase): - """Decorator for retrying given exceptions in testing. - - :type exception: Exception or tuple of Exceptions - :param exception: The exception to check or may be a tuple of - exceptions to check. - - :type error_predicate: function, takes caught exception, returns bool - :param error_predicate: Predicate evaluating whether to retry after a - caught exception. - - :type max_tries: int - :param max_tries: Number of times to try (not retry) before giving up. - - :type delay: int - :param delay: Initial delay between retries in seconds. - - :type backoff: int - :param backoff: Backoff multiplier e.g. value of 2 will double the - delay each retry. - - :type logger: logging.Logger instance - :param logger: Logger to use. If None, print. - """ - def __init__(self, exception, error_predicate=_retry_all, - max_tries=MAX_TRIES, delay=DELAY, backoff=BACKOFF, - logger=None): - super(RetryErrors, self).__init__(max_tries, delay, backoff, logger) - self.exception = exception - self.error_predicate = error_predicate - - def __call__(self, to_wrap): - @wraps(to_wrap) - def wrapped_function(*args, **kwargs): - tries = 0 - while tries < self.max_tries: - try: - return to_wrap(*args, **kwargs) - except self.exception as caught_exception: - - if not self.error_predicate(caught_exception): - raise - - delay = self.delay * self.backoff**tries - msg = ("%s, Trying again in %d seconds..." % - (caught_exception, delay)) - self.logger(msg) - - time.sleep(delay) - tries += 1 - return to_wrap(*args, **kwargs) - - return wrapped_function - - -class RetryResult(RetryBase): - """Decorator for retrying based on non-error result. - - :type result_predicate: function, takes result, returns bool - :param result_predicate: Predicate evaluating whether to retry after a - result is returned. - - :type max_tries: int - :param max_tries: Number of times to try (not retry) before giving up. - - :type delay: int - :param delay: Initial delay between retries in seconds. - - :type backoff: int - :param backoff: Backoff multiplier e.g. value of 2 will double the - delay each retry. - - :type logger: logging.Logger instance - :param logger: Logger to use. If None, print. - """ - def __init__(self, result_predicate, - max_tries=MAX_TRIES, delay=DELAY, backoff=BACKOFF, - logger=None): - super(RetryResult, self).__init__(max_tries, delay, backoff, logger) - self.result_predicate = result_predicate - - def __call__(self, to_wrap): - @wraps(to_wrap) - def wrapped_function(*args, **kwargs): - tries = 0 - while tries < self.max_tries: - result = to_wrap(*args, **kwargs) - if self.result_predicate(result): - return result - - delay = self.delay * self.backoff**tries - msg = "%s. Trying again in %d seconds..." % ( - self.result_predicate.__name__, delay,) - self.logger(msg) - - time.sleep(delay) - tries += 1 - raise BackoffFailed() - - return wrapped_function - - -class RetryInstanceState(RetryBase): - """Decorator for retrying based on instance state. - - :type instance_predicate: function, takes instance, returns bool - :param instance_predicate: Predicate evaluating whether to retry after an - API-invoking method is called. - - :type max_tries: int - :param max_tries: Number of times to try (not retry) before giving up. - - :type delay: int - :param delay: Initial delay between retries in seconds. - - :type backoff: int - :param backoff: Backoff multiplier e.g. value of 2 will double the - delay each retry. - - :type logger: logging.Logger instance - :param logger: Logger to use. If None, print. - """ - def __init__(self, instance_predicate, - max_tries=MAX_TRIES, delay=DELAY, backoff=BACKOFF, - logger=None): - super(RetryInstanceState, self).__init__( - max_tries, delay, backoff, logger) - self.instance_predicate = instance_predicate - - def __call__(self, to_wrap): - instance = to_wrap.__self__ # only instance methods allowed - - @wraps(to_wrap) - def wrapped_function(*args, **kwargs): - tries = 0 - while tries < self.max_tries: - result = to_wrap(*args, **kwargs) - if self.instance_predicate(instance): - return result - - delay = self.delay * self.backoff**tries - msg = "%s. Trying again in %d seconds..." % ( - self.instance_predicate.__name__, delay,) - self.logger(msg) - - time.sleep(delay) - tries += 1 - raise BackoffFailed() - - return wrapped_function diff --git a/packages/google-cloud-bigquery/test_utils/test_utils/system.py b/packages/google-cloud-bigquery/test_utils/test_utils/system.py deleted file mode 100644 index 590dc62a06e6..000000000000 --- a/packages/google-cloud-bigquery/test_utils/test_utils/system.py +++ /dev/null @@ -1,81 +0,0 @@ -# Copyright 2014 Google LLC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from __future__ import print_function -import os -import sys -import time - -import google.auth.credentials -from google.auth.environment_vars import CREDENTIALS as TEST_CREDENTIALS - - -# From shell environ. May be None. -CREDENTIALS = os.getenv(TEST_CREDENTIALS) - -ENVIRON_ERROR_MSG = """\ -To run the system tests, you need to set some environment variables. -Please check the CONTRIBUTING guide for instructions. -""" - - -class EmulatorCreds(google.auth.credentials.Credentials): - """A mock credential object. - - Used to avoid unnecessary token refreshing or reliance on the network - while an emulator is running. - """ - - def __init__(self): # pylint: disable=super-init-not-called - self.token = b'seekrit' - self.expiry = None - - @property - def valid(self): - """Would-be validity check of the credentials. - - Always is :data:`True`. - """ - return True - - def refresh(self, unused_request): # pylint: disable=unused-argument - """Off-limits implementation for abstract method.""" - raise RuntimeError('Should never be refreshed.') - - -def check_environ(): - err_msg = None - if CREDENTIALS is None: - err_msg = '\nMissing variables: ' + TEST_CREDENTIALS - elif not os.path.isfile(CREDENTIALS): - err_msg = '\nThe %s path %r is not a file.' % (TEST_CREDENTIALS, - CREDENTIALS) - - if err_msg is not None: - msg = ENVIRON_ERROR_MSG + err_msg - print(msg, file=sys.stderr) - sys.exit(1) - - -def unique_resource_id(delimiter='_'): - """A unique identifier for a resource. - - Intended to help locate resources created in particular - testing environments and at particular times. - """ - build_id = os.getenv('CIRCLE_BUILD_NUM', '') - if build_id == '': - return '%s%d' % (delimiter, 1000 * time.time()) - else: - return '%s%s%s%d' % (delimiter, build_id, delimiter, time.time()) diff --git a/packages/google-cloud-bigquery/test_utils/test_utils/vpcsc_config.py b/packages/google-cloud-bigquery/test_utils/test_utils/vpcsc_config.py deleted file mode 100644 index 36b15d6be991..000000000000 --- a/packages/google-cloud-bigquery/test_utils/test_utils/vpcsc_config.py +++ /dev/null @@ -1,118 +0,0 @@ -# -*- coding: utf-8 -*- -# -# Copyright 2019 Google LLC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import os - -import pytest - - -INSIDE_VPCSC_ENVVAR = "GOOGLE_CLOUD_TESTS_IN_VPCSC" -PROJECT_INSIDE_ENVVAR = "PROJECT_ID" -PROJECT_OUTSIDE_ENVVAR = "GOOGLE_CLOUD_TESTS_VPCSC_OUTSIDE_PERIMETER_PROJECT" -BUCKET_OUTSIDE_ENVVAR = "GOOGLE_CLOUD_TESTS_VPCSC_OUTSIDE_PERIMETER_BUCKET" - - -class VPCSCTestConfig(object): - """System test utility for VPCSC detection. - - See: https://cloud.google.com/vpc-service-controls/docs/ - """ - - @property - def inside_vpcsc(self): - """Test whether the test environment is configured to run inside VPCSC. - - Returns: - bool: - true if the environment is configured to run inside VPCSC, - else false. - """ - return INSIDE_VPCSC_ENVVAR in os.environ - - @property - def project_inside(self): - """Project ID for testing outside access. - - Returns: - str: project ID used for testing outside access; None if undefined. - """ - return os.environ.get(PROJECT_INSIDE_ENVVAR, None) - - @property - def project_outside(self): - """Project ID for testing inside access. - - Returns: - str: project ID used for testing inside access; None if undefined. - """ - return os.environ.get(PROJECT_OUTSIDE_ENVVAR, None) - - @property - def bucket_outside(self): - """GCS bucket for testing inside access. - - Returns: - str: bucket ID used for testing inside access; None if undefined. - """ - return os.environ.get(BUCKET_OUTSIDE_ENVVAR, None) - - def skip_if_inside_vpcsc(self, testcase): - """Test decorator: skip if running inside VPCSC.""" - reason = ( - "Running inside VPCSC. " - "Unset the {} environment variable to enable this test." - ).format(INSIDE_VPCSC_ENVVAR) - skip = pytest.mark.skipif(self.inside_vpcsc, reason=reason) - return skip(testcase) - - def skip_unless_inside_vpcsc(self, testcase): - """Test decorator: skip if running outside VPCSC.""" - reason = ( - "Running outside VPCSC. " - "Set the {} environment variable to enable this test." - ).format(INSIDE_VPCSC_ENVVAR) - skip = pytest.mark.skipif(not self.inside_vpcsc, reason=reason) - return skip(testcase) - - def skip_unless_inside_project(self, testcase): - """Test decorator: skip if inside project env var not set.""" - reason = ( - "Project ID for running inside VPCSC not set. " - "Set the {} environment variable to enable this test." - ).format(PROJECT_INSIDE_ENVVAR) - skip = pytest.mark.skipif(self.project_inside is None, reason=reason) - return skip(testcase) - - def skip_unless_outside_project(self, testcase): - """Test decorator: skip if outside project env var not set.""" - reason = ( - "Project ID for running outside VPCSC not set. " - "Set the {} environment variable to enable this test." - ).format(PROJECT_OUTSIDE_ENVVAR) - skip = pytest.mark.skipif(self.project_outside is None, reason=reason) - return skip(testcase) - - def skip_unless_outside_bucket(self, testcase): - """Test decorator: skip if outside bucket env var not set.""" - reason = ( - "Bucket ID for running outside VPCSC not set. " - "Set the {} environment variable to enable this test." - ).format(BUCKET_OUTSIDE_ENVVAR) - skip = pytest.mark.skipif(self.bucket_outside is None, reason=reason) - return skip(testcase) - - -vpcsc_config = VPCSCTestConfig() From 960d01367ec3fa4da98244e4e8c486cbf446718d Mon Sep 17 00:00:00 2001 From: Bu Sun Kim <8822365+busunkim96@users.noreply.github.com> Date: Mon, 4 May 2020 03:15:23 -0700 Subject: [PATCH 0765/2016] chore: install google-cloud-testutils by package name (#90) --- packages/google-cloud-bigquery/noxfile.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/packages/google-cloud-bigquery/noxfile.py b/packages/google-cloud-bigquery/noxfile.py index ba4adbfd259a..98f7350709a3 100644 --- a/packages/google-cloud-bigquery/noxfile.py +++ b/packages/google-cloud-bigquery/noxfile.py @@ -107,10 +107,9 @@ def snippets(session): session.skip("Credentials must be set via environment variable.") # Install all test dependencies, then install local packages in place. - session.install("mock", "pytest") + session.install("mock", "pytest", "google-cloud-testutils") session.install("google-cloud-storage") session.install("grpcio") - session.install("git+https://github.com/googleapis/python-test-utils") session.install("-e", ".[all]") # Run py.test against the snippets tests. From 5bcf920fbd12a0fc783ec77f30e2714e1e64eef9 Mon Sep 17 00:00:00 2001 From: HemangChothani <50404902+HemangChothani@users.noreply.github.com> Date: Mon, 11 May 2020 20:07:04 +0530 Subject: [PATCH 0766/2016] feat(bigquery): add support of model for extract job (#71) * feat(bigquery): add support of model for extract job * feat(bigquery): nit * feat(bigquery): add source model for create job method * feat(bigquery): nits * feat(bigquery): nit --- .../google/cloud/bigquery/client.py | 38 ++++- .../google/cloud/bigquery/job.py | 37 +++-- .../google/cloud/bigquery/model.py | 12 ++ .../tests/unit/test_client.py | 149 ++++++++++++++++++ .../tests/unit/test_job.py | 36 ++++- 5 files changed, 252 insertions(+), 20 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py index a9c77d5e1970..da5b30a3506d 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py @@ -65,6 +65,7 @@ from google.cloud.bigquery import job from google.cloud.bigquery.model import Model from google.cloud.bigquery.model import ModelReference +from google.cloud.bigquery.model import _model_arg_to_model_ref from google.cloud.bigquery.query import _QueryResults from google.cloud.bigquery.retry import DEFAULT_RETRY from google.cloud.bigquery.routine import Routine @@ -1364,9 +1365,17 @@ def create_job(self, job_config, retry=DEFAULT_RETRY): job_config ) source = _get_sub_prop(job_config, ["extract", "sourceTable"]) + source_type = "Table" + if not source: + source = _get_sub_prop(job_config, ["extract", "sourceModel"]) + source_type = "Model" destination_uris = _get_sub_prop(job_config, ["extract", "destinationUris"]) return self.extract_table( - source, destination_uris, job_config=extract_job_config, retry=retry + source, + destination_uris, + job_config=extract_job_config, + retry=retry, + source_type=source_type, ) elif "query" in job_config: copy_config = copy.deepcopy(job_config) @@ -2282,6 +2291,7 @@ def extract_table( job_config=None, retry=DEFAULT_RETRY, timeout=None, + source_type="Table", ): """Start a job to extract a table into Cloud Storage files. @@ -2292,9 +2302,11 @@ def extract_table( source (Union[ \ google.cloud.bigquery.table.Table, \ google.cloud.bigquery.table.TableReference, \ + google.cloud.bigquery.model.Model, \ + google.cloud.bigquery.model.ModelReference, \ src, \ ]): - Table to be extracted. + Table or Model to be extracted. destination_uris (Union[str, Sequence[str]]): URIs of Cloud Storage file(s) into which table data is to be extracted; in format @@ -2319,9 +2331,9 @@ def extract_table( timeout (Optional[float]): The number of seconds to wait for the underlying HTTP transport before using ``retry``. - Args: - source (google.cloud.bigquery.table.TableReference): table to be extracted. - + source_type (str): + (Optional) Type of source to be extracted.``Table`` or ``Model``. + Defaults to ``Table``. Returns: google.cloud.bigquery.job.ExtractJob: A new extract job instance. @@ -2329,7 +2341,9 @@ def extract_table( TypeError: If ``job_config`` is not an instance of :class:`~google.cloud.bigquery.job.ExtractJobConfig` class. - """ + ValueError: + If ``source_type`` is not among ``Table``,``Model``. + """ job_id = _make_job_id(job_id, job_id_prefix) if project is None: @@ -2339,7 +2353,17 @@ def extract_table( location = self.location job_ref = job._JobReference(job_id, project=project, location=location) - source = _table_arg_to_table_ref(source, default_project=self.project) + src = source_type.lower() + if src == "table": + source = _table_arg_to_table_ref(source, default_project=self.project) + elif src == "model": + source = _model_arg_to_model_ref(source, default_project=self.project) + else: + raise ValueError( + "Cannot pass `{}` as a ``source_type``, pass Table or Model".format( + source_type + ) + ) if isinstance(destination_uris, six.string_types): destination_uris = [destination_uris] diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/job.py b/packages/google-cloud-bigquery/google/cloud/bigquery/job.py index 4f3103bb5f21..25dd446e8f99 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/job.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/job.py @@ -1990,8 +1990,11 @@ class ExtractJob(_AsyncJob): Args: job_id (str): the job's ID. - source (google.cloud.bigquery.table.TableReference): - Table into which data is to be loaded. + source (Union[ \ + google.cloud.bigquery.table.TableReference, \ + google.cloud.bigquery.model.ModelReference \ + ]): + Table or Model from which data is to be loaded or extracted. destination_uris (List[str]): URIs describing where the extracted data will be written in Cloud @@ -2067,14 +2070,20 @@ def destination_uri_file_counts(self): def to_api_repr(self): """Generate a resource for :meth:`_begin`.""" + configuration = self._configuration.to_api_repr() source_ref = { "projectId": self.source.project, "datasetId": self.source.dataset_id, - "tableId": self.source.table_id, } - configuration = self._configuration.to_api_repr() - _helpers._set_sub_prop(configuration, ["extract", "sourceTable"], source_ref) + source = "sourceTable" + if isinstance(self.source, TableReference): + source_ref["tableId"] = self.source.table_id + else: + source_ref["modelId"] = self.source.model_id + source = "sourceModel" + + _helpers._set_sub_prop(configuration, ["extract", source], source_ref) _helpers._set_sub_prop( configuration, ["extract", "destinationUris"], self.destination_uris ) @@ -2112,10 +2121,20 @@ def from_api_repr(cls, resource, client): source_config = _helpers._get_sub_prop( config_resource, ["extract", "sourceTable"] ) - dataset = DatasetReference( - source_config["projectId"], source_config["datasetId"] - ) - source = dataset.table(source_config["tableId"]) + if source_config: + dataset = DatasetReference( + source_config["projectId"], source_config["datasetId"] + ) + source = dataset.table(source_config["tableId"]) + else: + source_config = _helpers._get_sub_prop( + config_resource, ["extract", "sourceModel"] + ) + dataset = DatasetReference( + source_config["projectId"], source_config["datasetId"] + ) + source = dataset.model(source_config["modelId"]) + destination_uris = _helpers._get_sub_prop( config_resource, ["extract", "destinationUris"] ) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/model.py b/packages/google-cloud-bigquery/google/cloud/bigquery/model.py index a2510e86cf80..eb459f57a638 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/model.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/model.py @@ -433,3 +433,15 @@ def __repr__(self): return "ModelReference(project_id='{}', dataset_id='{}', model_id='{}')".format( self.project, self.dataset_id, self.model_id ) + + +def _model_arg_to_model_ref(value, default_project=None): + """Helper to convert a string or Model to ModelReference. + + This function keeps ModelReference and other kinds of objects unchanged. + """ + if isinstance(value, six.string_types): + return ModelReference.from_string(value, default_project=default_project) + if isinstance(value, Model): + return value.reference + return value diff --git a/packages/google-cloud-bigquery/tests/unit/test_client.py b/packages/google-cloud-bigquery/tests/unit/test_client.py index fddfa4b1b8ab..6edb2e168668 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_client.py +++ b/packages/google-cloud-bigquery/tests/unit/test_client.py @@ -2884,6 +2884,21 @@ def test_create_job_extract_config(self): configuration, "google.cloud.bigquery.client.Client.extract_table", ) + def test_create_job_extract_config_for_model(self): + configuration = { + "extract": { + "sourceModel": { + "projectId": self.PROJECT, + "datasetId": self.DS_ID, + "modelId": "source_model", + }, + "destinationUris": ["gs://test_bucket/dst_object*"], + } + } + self._create_job_helper( + configuration, "google.cloud.bigquery.client.Client.extract_table", + ) + def test_create_job_query_config(self): configuration = { "query": {"query": "query", "destinationTable": {"tableId": "table_id"}} @@ -4217,6 +4232,140 @@ def test_extract_table_w_destination_uris(self): self.assertEqual(job.source, source) self.assertEqual(list(job.destination_uris), [DESTINATION1, DESTINATION2]) + def test_extract_table_for_source_type_model(self): + from google.cloud.bigquery.job import ExtractJob + + JOB = "job_id" + SOURCE = "source_model" + DESTINATION = "gs://bucket_name/object_name" + RESOURCE = { + "jobReference": {"projectId": self.PROJECT, "jobId": JOB}, + "configuration": { + "extract": { + "sourceModel": { + "projectId": self.PROJECT, + "datasetId": self.DS_ID, + "modelId": SOURCE, + }, + "destinationUris": [DESTINATION], + } + }, + } + creds = _make_credentials() + http = object() + client = self._make_one(project=self.PROJECT, credentials=creds, _http=http) + conn = client._connection = make_connection(RESOURCE) + dataset = DatasetReference(self.PROJECT, self.DS_ID) + source = dataset.model(SOURCE) + + job = client.extract_table( + source, DESTINATION, job_id=JOB, timeout=7.5, source_type="Model" + ) + + # Check that extract_table actually starts the job. + conn.api_request.assert_called_once_with( + method="POST", path="/projects/PROJECT/jobs", data=RESOURCE, timeout=7.5, + ) + + # Check the job resource. + self.assertIsInstance(job, ExtractJob) + self.assertIs(job._client, client) + self.assertEqual(job.job_id, JOB) + self.assertEqual(job.source, source) + self.assertEqual(list(job.destination_uris), [DESTINATION]) + + def test_extract_table_for_source_type_model_w_string_model_id(self): + JOB = "job_id" + source_id = "source_model" + DESTINATION = "gs://bucket_name/object_name" + RESOURCE = { + "jobReference": {"projectId": self.PROJECT, "jobId": JOB}, + "configuration": { + "extract": { + "sourceModel": { + "projectId": self.PROJECT, + "datasetId": self.DS_ID, + "modelId": source_id, + }, + "destinationUris": [DESTINATION], + } + }, + } + creds = _make_credentials() + http = object() + client = self._make_one(project=self.PROJECT, credentials=creds, _http=http) + conn = client._connection = make_connection(RESOURCE) + + client.extract_table( + # Test with string for model ID. + "{}.{}".format(self.DS_ID, source_id), + DESTINATION, + job_id=JOB, + timeout=7.5, + source_type="Model", + ) + + # Check that extract_table actually starts the job. + conn.api_request.assert_called_once_with( + method="POST", path="/projects/PROJECT/jobs", data=RESOURCE, timeout=7.5, + ) + + def test_extract_table_for_source_type_model_w_model_object(self): + from google.cloud.bigquery.model import Model + + JOB = "job_id" + DESTINATION = "gs://bucket_name/object_name" + model_id = "{}.{}.{}".format(self.PROJECT, self.DS_ID, self.MODEL_ID) + model = Model(model_id) + RESOURCE = { + "jobReference": {"projectId": self.PROJECT, "jobId": JOB}, + "configuration": { + "extract": { + "sourceModel": { + "projectId": self.PROJECT, + "datasetId": self.DS_ID, + "modelId": self.MODEL_ID, + }, + "destinationUris": [DESTINATION], + } + }, + } + creds = _make_credentials() + http = object() + client = self._make_one(project=self.PROJECT, credentials=creds, _http=http) + conn = client._connection = make_connection(RESOURCE) + + client.extract_table( + # Test with Model class object. + model, + DESTINATION, + job_id=JOB, + timeout=7.5, + source_type="Model", + ) + + # Check that extract_table actually starts the job. + conn.api_request.assert_called_once_with( + method="POST", path="/projects/PROJECT/jobs", data=RESOURCE, timeout=7.5, + ) + + def test_extract_table_for_invalid_source_type_model(self): + JOB = "job_id" + SOURCE = "source_model" + DESTINATION = "gs://bucket_name/object_name" + creds = _make_credentials() + http = object() + client = self._make_one(project=self.PROJECT, credentials=creds, _http=http) + dataset = DatasetReference(self.PROJECT, self.DS_ID) + source = dataset.model(SOURCE) + + with self.assertRaises(ValueError) as exc: + client.extract_table( + source, DESTINATION, job_id=JOB, timeout=7.5, source_type="foo" + ) + + self.assertIn("Cannot pass", exc.exception.args[0]) + def test_query_defaults(self): from google.cloud.bigquery.job import QueryJob diff --git a/packages/google-cloud-bigquery/tests/unit/test_job.py b/packages/google-cloud-bigquery/tests/unit/test_job.py index 3e642142d041..d97efd946396 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_job.py +++ b/packages/google-cloud-bigquery/tests/unit/test_job.py @@ -3176,10 +3176,16 @@ def _verifyResourceProperties(self, job, resource): self.assertEqual(job.destination_uris, config["destinationUris"]) - table_ref = config["sourceTable"] - self.assertEqual(job.source.project, table_ref["projectId"]) - self.assertEqual(job.source.dataset_id, table_ref["datasetId"]) - self.assertEqual(job.source.table_id, table_ref["tableId"]) + if "sourceTable" in config: + table_ref = config["sourceTable"] + self.assertEqual(job.source.project, table_ref["projectId"]) + self.assertEqual(job.source.dataset_id, table_ref["datasetId"]) + self.assertEqual(job.source.table_id, table_ref["tableId"]) + else: + model_ref = config["sourceModel"] + self.assertEqual(job.source.project, model_ref["projectId"]) + self.assertEqual(job.source.dataset_id, model_ref["datasetId"]) + self.assertEqual(job.source.model_id, model_ref["modelId"]) if "compression" in config: self.assertEqual(job.compression, config["compression"]) @@ -3281,6 +3287,28 @@ def test_from_api_repr_bare(self): self.assertIs(job._client, client) self._verifyResourceProperties(job, RESOURCE) + def test_from_api_repr_for_model(self): + self._setUpConstants() + client = _make_client(project=self.PROJECT) + RESOURCE = { + "id": self.JOB_ID, + "jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID}, + "configuration": { + "extract": { + "sourceModel": { + "projectId": self.PROJECT, + "datasetId": self.DS_ID, + "modelId": "model_id", + }, + "destinationUris": [self.DESTINATION_URI], + } + }, + } + klass = self._get_target_class() + job = klass.from_api_repr(RESOURCE, client=client) + self.assertIs(job._client, client) + self._verifyResourceProperties(job, RESOURCE) + def test_from_api_repr_w_properties(self): from google.cloud.bigquery.job import Compression From aa757d3a5b88b7c9c20916ceb50c45cd432c5433 Mon Sep 17 00:00:00 2001 From: Steffany Brown <30247553+steffnay@users.noreply.github.com> Date: Mon, 11 May 2020 12:28:08 -0700 Subject: [PATCH 0767/2016] docs(samples): deprecate client.dataset method (#98) * docs(samples): deprecate client.dataset method * docs(samples): linted * docs(samples): updates dataset reference --- .../google-cloud-bigquery/docs/snippets.py | 51 +++++++++++++------ 1 file changed, 35 insertions(+), 16 deletions(-) diff --git a/packages/google-cloud-bigquery/docs/snippets.py b/packages/google-cloud-bigquery/docs/snippets.py index 9fa2a21a6c25..607f5a58418f 100644 --- a/packages/google-cloud-bigquery/docs/snippets.py +++ b/packages/google-cloud-bigquery/docs/snippets.py @@ -124,7 +124,8 @@ def test_create_client_default_credentials(): def test_create_table_nested_repeated_schema(client, to_delete): dataset_id = "create_table_nested_repeated_{}".format(_millis()) - dataset_ref = client.dataset(dataset_id) + project = client.project + dataset_ref = bigquery.DatasetReference(project, dataset_id) dataset = bigquery.Dataset(dataset_ref) client.create_dataset(dataset) to_delete.append(dataset) @@ -163,7 +164,9 @@ def test_create_table_nested_repeated_schema(client, to_delete): def test_create_table_cmek(client, to_delete): dataset_id = "create_table_cmek_{}".format(_millis()) - dataset = bigquery.Dataset(client.dataset(dataset_id)) + project = client.project + dataset_ref = bigquery.DatasetReference(project, dataset_id) + dataset = bigquery.Dataset(dataset_ref) client.create_dataset(dataset) to_delete.append(dataset) @@ -172,7 +175,7 @@ def test_create_table_cmek(client, to_delete): # client = bigquery.Client() # dataset_id = 'my_dataset' - table_ref = client.dataset(dataset_id).table("my_table") + table_ref = dataset.table("my_table") table = bigquery.Table(table_ref) # Set the encryption key to use for the table. @@ -192,7 +195,8 @@ def test_create_table_cmek(client, to_delete): def test_create_partitioned_table(client, to_delete): dataset_id = "create_table_partitioned_{}".format(_millis()) - dataset_ref = bigquery.Dataset(client.dataset(dataset_id)) + project = client.project + dataset_ref = bigquery.DatasetReference(project, dataset_id) dataset = client.create_dataset(dataset_ref) to_delete.append(dataset) @@ -592,7 +596,9 @@ def test_load_table_from_uri_autodetect(client, to_delete, capsys): format-specific code section will be tested in this test. """ dataset_id = "load_table_from_uri_auto_{}".format(_millis()) - dataset = bigquery.Dataset(client.dataset(dataset_id)) + project = client.project + dataset_ref = bigquery.DatasetReference(project, dataset_id) + dataset = bigquery.Dataset(dataset_ref) client.create_dataset(dataset) to_delete.append(dataset) @@ -603,7 +609,7 @@ def test_load_table_from_uri_autodetect(client, to_delete, capsys): # client = bigquery.Client() # dataset_id = 'my_dataset' - dataset_ref = client.dataset(dataset_id) + dataset_ref = bigquery.DatasetReference(project, dataset_id) job_config = bigquery.LoadJobConfig() job_config.autodetect = True # [END bigquery_load_table_gcs_csv_autodetect] @@ -655,7 +661,9 @@ def test_load_table_from_uri_truncate(client, to_delete, capsys): format-specific code section will be tested in this test. """ dataset_id = "load_table_from_uri_trunc_{}".format(_millis()) - dataset = bigquery.Dataset(client.dataset(dataset_id)) + project = client.project + dataset_ref = bigquery.DatasetReference(project, dataset_id) + dataset = bigquery.Dataset(dataset_ref) client.create_dataset(dataset) to_delete.append(dataset) @@ -746,7 +754,8 @@ def test_load_table_from_uri_truncate(client, to_delete, capsys): def test_load_table_add_column(client, to_delete): dataset_id = "load_table_add_column_{}".format(_millis()) - dataset_ref = client.dataset(dataset_id) + project = client.project + dataset_ref = bigquery.DatasetReference(project, dataset_id) dataset = bigquery.Dataset(dataset_ref) dataset.location = "US" dataset = client.create_dataset(dataset) @@ -812,7 +821,8 @@ def test_load_table_add_column(client, to_delete): def test_load_table_relax_column(client, to_delete): dataset_id = "load_table_relax_column_{}".format(_millis()) - dataset_ref = client.dataset(dataset_id) + project = client.project + dataset_ref = bigquery.DatasetReference(project, dataset_id) dataset = bigquery.Dataset(dataset_ref) dataset.location = "US" dataset = client.create_dataset(dataset) @@ -900,7 +910,7 @@ def test_extract_table(client, to_delete): table_id = "shakespeare" destination_uri = "gs://{}/{}".format(bucket_name, "shakespeare.csv") - dataset_ref = client.dataset(dataset_id, project=project) + dataset_ref = bigquery.DatasetReference(project, dataset_id) table_ref = dataset_ref.table(table_id) extract_job = client.extract_table( @@ -927,6 +937,8 @@ def test_extract_table_json(client, to_delete): storage_client = storage.Client() bucket = retry_storage_errors(storage_client.create_bucket)(bucket_name) to_delete.append(bucket) + project = "bigquery-public-data" + dataset_id = "samples" # [START bigquery_extract_table_json] # from google.cloud import bigquery @@ -934,7 +946,7 @@ def test_extract_table_json(client, to_delete): # bucket_name = 'my-bucket' destination_uri = "gs://{}/{}".format(bucket_name, "shakespeare.json") - dataset_ref = client.dataset("samples", project="bigquery-public-data") + dataset_ref = bigquery.DatasetReference(project, dataset_id) table_ref = dataset_ref.table("shakespeare") job_config = bigquery.job.ExtractJobConfig() job_config.destination_format = bigquery.DestinationFormat.NEWLINE_DELIMITED_JSON @@ -960,6 +972,8 @@ def test_extract_table_compressed(client, to_delete): storage_client = storage.Client() bucket = retry_storage_errors(storage_client.create_bucket)(bucket_name) to_delete.append(bucket) + project = "bigquery-public-data" + dataset_id = "samples" # [START bigquery_extract_table_compressed] # from google.cloud import bigquery @@ -967,7 +981,7 @@ def test_extract_table_compressed(client, to_delete): # bucket_name = 'my-bucket' destination_uri = "gs://{}/{}".format(bucket_name, "shakespeare.csv.gz") - dataset_ref = client.dataset("samples", project="bigquery-public-data") + dataset_ref = bigquery.DatasetReference(project, dataset_id) table_ref = dataset_ref.table("shakespeare") job_config = bigquery.job.ExtractJobConfig() job_config.compression = bigquery.Compression.GZIP @@ -1054,7 +1068,9 @@ def test_manage_job(client): def test_query_external_gcs_permanent_table(client, to_delete): dataset_id = "query_external_gcs_{}".format(_millis()) - dataset = bigquery.Dataset(client.dataset(dataset_id)) + project = client.project + dataset_ref = bigquery.DatasetReference(project, dataset_id) + dataset = bigquery.Dataset(dataset_ref) client.create_dataset(dataset) to_delete.append(dataset) @@ -1064,7 +1080,7 @@ def test_query_external_gcs_permanent_table(client, to_delete): # dataset_id = 'my_dataset' # Configure the external data source - dataset_ref = client.dataset(dataset_id) + dataset_ref = bigquery.DatasetReference(project, dataset_id) table_id = "us_states" schema = [ bigquery.SchemaField("name", "STRING"), @@ -1097,7 +1113,8 @@ def test_ddl_create_view(client, to_delete, capsys): project = client.project dataset_id = "ddl_view_{}".format(_millis()) table_id = "new_view" - dataset = bigquery.Dataset(client.dataset(dataset_id)) + dataset_ref = bigquery.DatasetReference(project, dataset_id) + dataset = bigquery.Dataset(dataset_ref) client.create_dataset(dataset) to_delete.append(dataset) @@ -1175,8 +1192,10 @@ def test_list_rows_as_dataframe(client): # [START bigquery_list_rows_dataframe] # from google.cloud import bigquery # client = bigquery.Client() + project = "bigquery-public-data" + dataset_id = "samples" - dataset_ref = client.dataset("samples", project="bigquery-public-data") + dataset_ref = bigquery.DatasetReference(project, dataset_id) table_ref = dataset_ref.table("shakespeare") table = client.get_table(table_ref) From 0e80c13bb58c81851ed6ac4941fe157785016524 Mon Sep 17 00:00:00 2001 From: Peter Lamut Date: Wed, 13 May 2020 03:36:23 +0200 Subject: [PATCH 0768/2016] chore: fix lint error in job.py module (#103) --- packages/google-cloud-bigquery/google/cloud/bigquery/job.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/job.py b/packages/google-cloud-bigquery/google/cloud/bigquery/job.py index 25dd446e8f99..52683f23463e 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/job.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/job.py @@ -3116,7 +3116,7 @@ def _format_for_exception(query, job_id): template = "\n\n(job ID: {job_id})\n\n{header}\n\n{ruler}\n{body}\n{ruler}" lines = query.splitlines() - max_line_len = max(len(l) for l in lines) + max_line_len = max(len(line) for line in lines) header = "-----Query Job SQL Follows-----" header = "{:^{total_width}}".format(header, total_width=max_line_len + 5) From 38ba535c9a160334a4178f37f90e24f864c46b2b Mon Sep 17 00:00:00 2001 From: HemangChothani <50404902+HemangChothani@users.noreply.github.com> Date: Wed, 13 May 2020 12:10:14 +0530 Subject: [PATCH 0769/2016] refactor(bigquery): update code samples of load table autodetect and truncate (#28) Co-authored-by: Peter Lamut --- .../google-cloud-bigquery/docs/snippets.py | 168 ------------------ .../docs/usage/tables.rst | 59 ++++++ .../samples/load_table_uri_autodetect_csv.py | 45 +++++ .../samples/load_table_uri_autodetect_json.py | 42 +++++ .../samples/load_table_uri_truncate_avro.py | 55 ++++++ .../samples/load_table_uri_truncate_csv.py | 56 ++++++ .../samples/load_table_uri_truncate_json.py | 55 ++++++ .../samples/load_table_uri_truncate_orc.py | 55 ++++++ .../load_table_uri_truncate_parquet.py | 55 ++++++ .../test_load_table_uri_autodetect_csv.py | 22 +++ .../test_load_table_uri_autodetect_json.py | 22 +++ .../test_load_table_uri_truncate_avro.py | 21 +++ .../tests/test_load_table_uri_truncate_csv.py | 21 +++ .../test_load_table_uri_truncate_json.py | 21 +++ .../tests/test_load_table_uri_truncate_orc.py | 21 +++ .../test_load_table_uri_truncate_parquet.py | 21 +++ 16 files changed, 571 insertions(+), 168 deletions(-) create mode 100644 packages/google-cloud-bigquery/samples/load_table_uri_autodetect_csv.py create mode 100644 packages/google-cloud-bigquery/samples/load_table_uri_autodetect_json.py create mode 100644 packages/google-cloud-bigquery/samples/load_table_uri_truncate_avro.py create mode 100644 packages/google-cloud-bigquery/samples/load_table_uri_truncate_csv.py create mode 100644 packages/google-cloud-bigquery/samples/load_table_uri_truncate_json.py create mode 100644 packages/google-cloud-bigquery/samples/load_table_uri_truncate_orc.py create mode 100644 packages/google-cloud-bigquery/samples/load_table_uri_truncate_parquet.py create mode 100644 packages/google-cloud-bigquery/samples/tests/test_load_table_uri_autodetect_csv.py create mode 100644 packages/google-cloud-bigquery/samples/tests/test_load_table_uri_autodetect_json.py create mode 100644 packages/google-cloud-bigquery/samples/tests/test_load_table_uri_truncate_avro.py create mode 100644 packages/google-cloud-bigquery/samples/tests/test_load_table_uri_truncate_csv.py create mode 100644 packages/google-cloud-bigquery/samples/tests/test_load_table_uri_truncate_json.py create mode 100644 packages/google-cloud-bigquery/samples/tests/test_load_table_uri_truncate_orc.py create mode 100644 packages/google-cloud-bigquery/samples/tests/test_load_table_uri_truncate_parquet.py diff --git a/packages/google-cloud-bigquery/docs/snippets.py b/packages/google-cloud-bigquery/docs/snippets.py index 607f5a58418f..0b68d59d3858 100644 --- a/packages/google-cloud-bigquery/docs/snippets.py +++ b/packages/google-cloud-bigquery/docs/snippets.py @@ -25,7 +25,6 @@ import time import pytest -import six try: import fastparquet @@ -585,173 +584,6 @@ def test_manage_views(client, to_delete): # [END bigquery_grant_view_access] -def test_load_table_from_uri_autodetect(client, to_delete, capsys): - """Load table from a GCS URI using various formats and auto-detected schema - Each file format has its own tested load from URI sample. Because most of - the code is common for autodetect, append, and truncate, this sample - includes snippets for all supported formats but only calls a single load - job. - This code snippet is made up of shared code, then format-specific code, - followed by more shared code. Note that only the last format in the - format-specific code section will be tested in this test. - """ - dataset_id = "load_table_from_uri_auto_{}".format(_millis()) - project = client.project - dataset_ref = bigquery.DatasetReference(project, dataset_id) - dataset = bigquery.Dataset(dataset_ref) - client.create_dataset(dataset) - to_delete.append(dataset) - - # Shared code - # [START bigquery_load_table_gcs_csv_autodetect] - # [START bigquery_load_table_gcs_json_autodetect] - # from google.cloud import bigquery - # client = bigquery.Client() - # dataset_id = 'my_dataset' - - dataset_ref = bigquery.DatasetReference(project, dataset_id) - job_config = bigquery.LoadJobConfig() - job_config.autodetect = True - # [END bigquery_load_table_gcs_csv_autodetect] - # [END bigquery_load_table_gcs_json_autodetect] - - # Format-specific code - # [START bigquery_load_table_gcs_csv_autodetect] - job_config.skip_leading_rows = 1 - # The source format defaults to CSV, so the line below is optional. - job_config.source_format = bigquery.SourceFormat.CSV - uri = "gs://cloud-samples-data/bigquery/us-states/us-states.csv" - # [END bigquery_load_table_gcs_csv_autodetect] - # unset csv-specific attribute - del job_config._properties["load"]["skipLeadingRows"] - - # [START bigquery_load_table_gcs_json_autodetect] - job_config.source_format = bigquery.SourceFormat.NEWLINE_DELIMITED_JSON - uri = "gs://cloud-samples-data/bigquery/us-states/us-states.json" - # [END bigquery_load_table_gcs_json_autodetect] - - # Shared code - # [START bigquery_load_table_gcs_csv_autodetect] - # [START bigquery_load_table_gcs_json_autodetect] - load_job = client.load_table_from_uri( - uri, dataset_ref.table("us_states"), job_config=job_config - ) # API request - print("Starting job {}".format(load_job.job_id)) - - load_job.result() # Waits for table load to complete. - print("Job finished.") - - destination_table = client.get_table(dataset_ref.table("us_states")) - print("Loaded {} rows.".format(destination_table.num_rows)) - # [END bigquery_load_table_gcs_csv_autodetect] - # [END bigquery_load_table_gcs_json_autodetect] - - out, _ = capsys.readouterr() - assert "Loaded 50 rows." in out - - -def test_load_table_from_uri_truncate(client, to_delete, capsys): - """Replaces table data with data from a GCS URI using various formats - Each file format has its own tested load from URI sample. Because most of - the code is common for autodetect, append, and truncate, this sample - includes snippets for all supported formats but only calls a single load - job. - This code snippet is made up of shared code, then format-specific code, - followed by more shared code. Note that only the last format in the - format-specific code section will be tested in this test. - """ - dataset_id = "load_table_from_uri_trunc_{}".format(_millis()) - project = client.project - dataset_ref = bigquery.DatasetReference(project, dataset_id) - dataset = bigquery.Dataset(dataset_ref) - client.create_dataset(dataset) - to_delete.append(dataset) - - job_config = bigquery.LoadJobConfig() - job_config.schema = [ - bigquery.SchemaField("name", "STRING"), - bigquery.SchemaField("post_abbr", "STRING"), - ] - table_ref = dataset.table("us_states") - body = six.BytesIO(b"Washington,WA") - client.load_table_from_file(body, table_ref, job_config=job_config).result() - previous_rows = client.get_table(table_ref).num_rows - assert previous_rows > 0 - - # Shared code - # [START bigquery_load_table_gcs_avro_truncate] - # [START bigquery_load_table_gcs_csv_truncate] - # [START bigquery_load_table_gcs_json_truncate] - # [START bigquery_load_table_gcs_parquet_truncate] - # [START bigquery_load_table_gcs_orc_truncate] - # from google.cloud import bigquery - # client = bigquery.Client() - # table_ref = client.dataset('my_dataset').table('existing_table') - - job_config = bigquery.LoadJobConfig() - job_config.write_disposition = bigquery.WriteDisposition.WRITE_TRUNCATE - # [END bigquery_load_table_gcs_avro_truncate] - # [END bigquery_load_table_gcs_csv_truncate] - # [END bigquery_load_table_gcs_json_truncate] - # [END bigquery_load_table_gcs_parquet_truncate] - # [END bigquery_load_table_gcs_orc_truncate] - - # Format-specific code - # [START bigquery_load_table_gcs_avro_truncate] - job_config.source_format = bigquery.SourceFormat.AVRO - uri = "gs://cloud-samples-data/bigquery/us-states/us-states.avro" - # [END bigquery_load_table_gcs_avro_truncate] - - # [START bigquery_load_table_gcs_csv_truncate] - job_config.skip_leading_rows = 1 - # The source format defaults to CSV, so the line below is optional. - job_config.source_format = bigquery.SourceFormat.CSV - uri = "gs://cloud-samples-data/bigquery/us-states/us-states.csv" - # [END bigquery_load_table_gcs_csv_truncate] - # unset csv-specific attribute - del job_config._properties["load"]["skipLeadingRows"] - - # [START bigquery_load_table_gcs_json_truncate] - job_config.source_format = bigquery.SourceFormat.NEWLINE_DELIMITED_JSON - uri = "gs://cloud-samples-data/bigquery/us-states/us-states.json" - # [END bigquery_load_table_gcs_json_truncate] - - # [START bigquery_load_table_gcs_parquet_truncate] - job_config.source_format = bigquery.SourceFormat.PARQUET - uri = "gs://cloud-samples-data/bigquery/us-states/us-states.parquet" - # [END bigquery_load_table_gcs_parquet_truncate] - - # [START bigquery_load_table_gcs_orc_truncate] - job_config.source_format = bigquery.SourceFormat.ORC - uri = "gs://cloud-samples-data/bigquery/us-states/us-states.orc" - # [END bigquery_load_table_gcs_orc_truncate] - - # Shared code - # [START bigquery_load_table_gcs_avro_truncate] - # [START bigquery_load_table_gcs_csv_truncate] - # [START bigquery_load_table_gcs_json_truncate] - # [START bigquery_load_table_gcs_parquet_truncate] - # [START bigquery_load_table_gcs_orc_truncate] - load_job = client.load_table_from_uri( - uri, table_ref, job_config=job_config - ) # API request - print("Starting job {}".format(load_job.job_id)) - - load_job.result() # Waits for table load to complete. - print("Job finished.") - - destination_table = client.get_table(table_ref) - print("Loaded {} rows.".format(destination_table.num_rows)) - # [END bigquery_load_table_gcs_avro_truncate] - # [END bigquery_load_table_gcs_csv_truncate] - # [END bigquery_load_table_gcs_json_truncate] - # [END bigquery_load_table_gcs_parquet_truncate] - # [END bigquery_load_table_gcs_orc_truncate] - - out, _ = capsys.readouterr() - assert "Loaded 50 rows." in out - - def test_load_table_add_column(client, to_delete): dataset_id = "load_table_add_column_{}".format(_millis()) project = client.project diff --git a/packages/google-cloud-bigquery/docs/usage/tables.rst b/packages/google-cloud-bigquery/docs/usage/tables.rst index 45145cd19004..27af7c7dfd3d 100644 --- a/packages/google-cloud-bigquery/docs/usage/tables.rst +++ b/packages/google-cloud-bigquery/docs/usage/tables.rst @@ -132,6 +132,22 @@ Load an ORC file from Cloud Storage: See also: `Loading ORC data from Cloud Storage `_. +Load a CSV file from Cloud Storage and auto-detect schema: + +.. literalinclude:: ../samples/load_table_uri_autodetect_csv.py + :language: python + :dedent: 4 + :start-after: [START bigquery_load_table_gcs_csv_autodetect] + :end-before: [END bigquery_load_table_gcs_csv_autodetect] + +Load a JSON file from Cloud Storage and auto-detect schema: + +.. literalinclude:: ../samples/load_table_uri_autodetect_json.py + :language: python + :dedent: 4 + :start-after: [START bigquery_load_table_gcs_json_autodetect] + :end-before: [END bigquery_load_table_gcs_json_autodetect] + Updating a Table ^^^^^^^^^^^^^^^^ @@ -220,3 +236,46 @@ Restore a deleted table from a snapshot by using the :dedent: 4 :start-after: [START bigquery_undelete_table] :end-before: [END bigquery_undelete_table] + +Overwrite a Table +^^^^^^^^^^^^^^^^^ + +Replace the table data with an Avro file from Cloud Storage: + +.. literalinclude:: ../samples/load_table_uri_truncate_avro.py + :language: python + :dedent: 4 + :start-after: [START bigquery_load_table_gcs_avro_truncate] + :end-before: [END bigquery_load_table_gcs_avro_truncate] + +Replace the table data with a CSV file from Cloud Storage: + +.. literalinclude:: ../samples/load_table_uri_truncate_csv.py + :language: python + :dedent: 4 + :start-after: [START bigquery_load_table_gcs_csv_truncate] + :end-before: [END bigquery_load_table_gcs_csv_truncate] + +Replace the table data with a JSON file from Cloud Storage: + +.. literalinclude:: ../samples/load_table_uri_truncate_json.py + :language: python + :dedent: 4 + :start-after: [START bigquery_load_table_gcs_json_truncate] + :end-before: [END bigquery_load_table_gcs_json_truncate] + +Replace the table data with an ORC file from Cloud Storage: + +.. literalinclude:: ../samples/load_table_uri_truncate_orc.py + :language: python + :dedent: 4 + :start-after: [START bigquery_load_table_gcs_orc_truncate] + :end-before: [END bigquery_load_table_gcs_orc_truncate] + +Replace the table data with a Parquet file from Cloud Storage: + +.. literalinclude:: ../samples/load_table_uri_truncate_parquet.py + :language: python + :dedent: 4 + :start-after: [START bigquery_load_table_gcs_parquet_truncate] + :end-before: [END bigquery_load_table_gcs_parquet_truncate] \ No newline at end of file diff --git a/packages/google-cloud-bigquery/samples/load_table_uri_autodetect_csv.py b/packages/google-cloud-bigquery/samples/load_table_uri_autodetect_csv.py new file mode 100644 index 000000000000..09a5d708d437 --- /dev/null +++ b/packages/google-cloud-bigquery/samples/load_table_uri_autodetect_csv.py @@ -0,0 +1,45 @@ +# Copyright 2020 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +def load_table_uri_autodetect_csv(table_id): + + # [START bigquery_load_table_gcs_csv_autodetect] + from google.cloud import bigquery + + # Construct a BigQuery client object. + client = bigquery.Client() + + # TODO(developer): Set table_id to the ID of the table to create. + # table_id = "your-project.your_dataset.your_table_name + + # Set the encryption key to use for the destination. + # TODO: Replace this key with a key you have created in KMS. + # kms_key_name = "projects/{}/locations/{}/keyRings/{}/cryptoKeys/{}".format( + # "cloud-samples-tests", "us", "test", "test" + # ) + job_config = bigquery.LoadJobConfig( + autodetect=True, + skip_leading_rows=1, + # The source format defaults to CSV, so the line below is optional. + source_format=bigquery.SourceFormat.CSV, + ) + uri = "gs://cloud-samples-data/bigquery/us-states/us-states.csv" + load_job = client.load_table_from_uri( + uri, table_id, job_config=job_config + ) # Make an API request. + load_job.result() # Waits for the job to complete. + destination_table = client.get_table(table_id) + print("Loaded {} rows.".format(destination_table.num_rows)) + # [END bigquery_load_table_gcs_csv_autodetect] diff --git a/packages/google-cloud-bigquery/samples/load_table_uri_autodetect_json.py b/packages/google-cloud-bigquery/samples/load_table_uri_autodetect_json.py new file mode 100644 index 000000000000..61b7aab1287d --- /dev/null +++ b/packages/google-cloud-bigquery/samples/load_table_uri_autodetect_json.py @@ -0,0 +1,42 @@ +# Copyright 2020 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +def load_table_uri_autodetect_json(table_id): + + # [START bigquery_load_table_gcs_json_autodetect] + from google.cloud import bigquery + + # Construct a BigQuery client object. + client = bigquery.Client() + + # TODO(developer): Set table_id to the ID of the table to create. + # table_id = "your-project.your_dataset.your_table_name + + # Set the encryption key to use for the destination. + # TODO: Replace this key with a key you have created in KMS. + # kms_key_name = "projects/{}/locations/{}/keyRings/{}/cryptoKeys/{}".format( + # "cloud-samples-tests", "us", "test", "test" + # ) + job_config = bigquery.LoadJobConfig( + autodetect=True, source_format=bigquery.SourceFormat.NEWLINE_DELIMITED_JSON + ) + uri = "gs://cloud-samples-data/bigquery/us-states/us-states.json" + load_job = client.load_table_from_uri( + uri, table_id, job_config=job_config + ) # Make an API request. + load_job.result() # Waits for the job to complete. + destination_table = client.get_table(table_id) + print("Loaded {} rows.".format(destination_table.num_rows)) + # [END bigquery_load_table_gcs_json_autodetect] diff --git a/packages/google-cloud-bigquery/samples/load_table_uri_truncate_avro.py b/packages/google-cloud-bigquery/samples/load_table_uri_truncate_avro.py new file mode 100644 index 000000000000..98a791477dd1 --- /dev/null +++ b/packages/google-cloud-bigquery/samples/load_table_uri_truncate_avro.py @@ -0,0 +1,55 @@ +# Copyright 2020 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +def load_table_uri_truncate_avro(table_id): + + # [START bigquery_load_table_gcs_avro_truncate] + import six + + from google.cloud import bigquery + + # Construct a BigQuery client object. + client = bigquery.Client() + + # TODO(developer): Set table_id to the ID of the table to create. + # table_id = "your-project.your_dataset.your_table_name + + job_config = bigquery.LoadJobConfig( + schema=[ + bigquery.SchemaField("name", "STRING"), + bigquery.SchemaField("post_abbr", "STRING"), + ], + ) + + body = six.BytesIO(b"Washington,WA") + client.load_table_from_file(body, table_id, job_config=job_config).result() + previous_rows = client.get_table(table_id).num_rows + assert previous_rows > 0 + + job_config = bigquery.LoadJobConfig( + write_disposition=bigquery.WriteDisposition.WRITE_TRUNCATE, + source_format=bigquery.SourceFormat.AVRO, + ) + + uri = "gs://cloud-samples-data/bigquery/us-states/us-states.avro" + load_job = client.load_table_from_uri( + uri, table_id, job_config=job_config + ) # Make an API request. + + load_job.result() # Waits for the job to complete. + + destination_table = client.get_table(table_id) + print("Loaded {} rows.".format(destination_table.num_rows)) + # [END bigquery_load_table_gcs_avro_truncate] diff --git a/packages/google-cloud-bigquery/samples/load_table_uri_truncate_csv.py b/packages/google-cloud-bigquery/samples/load_table_uri_truncate_csv.py new file mode 100644 index 000000000000..73de7a8c17cf --- /dev/null +++ b/packages/google-cloud-bigquery/samples/load_table_uri_truncate_csv.py @@ -0,0 +1,56 @@ +# Copyright 2020 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +def load_table_uri_truncate_csv(table_id): + + # [START bigquery_load_table_gcs_csv_truncate] + import six + + from google.cloud import bigquery + + # Construct a BigQuery client object. + client = bigquery.Client() + + # TODO(developer): Set table_id to the ID of the table to create. + # table_id = "your-project.your_dataset.your_table_name + + job_config = bigquery.LoadJobConfig( + schema=[ + bigquery.SchemaField("name", "STRING"), + bigquery.SchemaField("post_abbr", "STRING"), + ], + ) + + body = six.BytesIO(b"Washington,WA") + client.load_table_from_file(body, table_id, job_config=job_config).result() + previous_rows = client.get_table(table_id).num_rows + assert previous_rows > 0 + + job_config = bigquery.LoadJobConfig( + write_disposition=bigquery.WriteDisposition.WRITE_TRUNCATE, + source_format=bigquery.SourceFormat.CSV, + skip_leading_rows=1, + ) + + uri = "gs://cloud-samples-data/bigquery/us-states/us-states.csv" + load_job = client.load_table_from_uri( + uri, table_id, job_config=job_config + ) # Make an API request. + + load_job.result() # Waits for the job to complete. + + destination_table = client.get_table(table_id) + print("Loaded {} rows.".format(destination_table.num_rows)) + # [END bigquery_load_table_gcs_csv_truncate] diff --git a/packages/google-cloud-bigquery/samples/load_table_uri_truncate_json.py b/packages/google-cloud-bigquery/samples/load_table_uri_truncate_json.py new file mode 100644 index 000000000000..a30fae73629e --- /dev/null +++ b/packages/google-cloud-bigquery/samples/load_table_uri_truncate_json.py @@ -0,0 +1,55 @@ +# Copyright 2020 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +def load_table_uri_truncate_json(table_id): + + # [START bigquery_load_table_gcs_json_truncate] + import six + + from google.cloud import bigquery + + # Construct a BigQuery client object. + client = bigquery.Client() + + # TODO(developer): Set table_id to the ID of the table to create. + # table_id = "your-project.your_dataset.your_table_name + + job_config = bigquery.LoadJobConfig( + schema=[ + bigquery.SchemaField("name", "STRING"), + bigquery.SchemaField("post_abbr", "STRING"), + ], + ) + + body = six.BytesIO(b"Washington,WA") + client.load_table_from_file(body, table_id, job_config=job_config).result() + previous_rows = client.get_table(table_id).num_rows + assert previous_rows > 0 + + job_config = bigquery.LoadJobConfig( + write_disposition=bigquery.WriteDisposition.WRITE_TRUNCATE, + source_format=bigquery.SourceFormat.NEWLINE_DELIMITED_JSON, + ) + + uri = "gs://cloud-samples-data/bigquery/us-states/us-states.json" + load_job = client.load_table_from_uri( + uri, table_id, job_config=job_config + ) # Make an API request. + + load_job.result() # Waits for the job to complete. + + destination_table = client.get_table(table_id) + print("Loaded {} rows.".format(destination_table.num_rows)) + # [END bigquery_load_table_gcs_json_truncate] diff --git a/packages/google-cloud-bigquery/samples/load_table_uri_truncate_orc.py b/packages/google-cloud-bigquery/samples/load_table_uri_truncate_orc.py new file mode 100644 index 000000000000..18f963be2e7d --- /dev/null +++ b/packages/google-cloud-bigquery/samples/load_table_uri_truncate_orc.py @@ -0,0 +1,55 @@ +# Copyright 2020 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +def load_table_uri_truncate_orc(table_id): + + # [START bigquery_load_table_gcs_orc_truncate] + import six + + from google.cloud import bigquery + + # Construct a BigQuery client object. + client = bigquery.Client() + + # TODO(developer): Set table_id to the ID of the table to create. + # table_id = "your-project.your_dataset.your_table_name + + job_config = bigquery.LoadJobConfig( + schema=[ + bigquery.SchemaField("name", "STRING"), + bigquery.SchemaField("post_abbr", "STRING"), + ], + ) + + body = six.BytesIO(b"Washington,WA") + client.load_table_from_file(body, table_id, job_config=job_config).result() + previous_rows = client.get_table(table_id).num_rows + assert previous_rows > 0 + + job_config = bigquery.LoadJobConfig( + write_disposition=bigquery.WriteDisposition.WRITE_TRUNCATE, + source_format=bigquery.SourceFormat.ORC, + ) + + uri = "gs://cloud-samples-data/bigquery/us-states/us-states.orc" + load_job = client.load_table_from_uri( + uri, table_id, job_config=job_config + ) # Make an API request. + + load_job.result() # Waits for the job to complete. + + destination_table = client.get_table(table_id) + print("Loaded {} rows.".format(destination_table.num_rows)) + # [END bigquery_load_table_gcs_orc_truncate] diff --git a/packages/google-cloud-bigquery/samples/load_table_uri_truncate_parquet.py b/packages/google-cloud-bigquery/samples/load_table_uri_truncate_parquet.py new file mode 100644 index 000000000000..28692d840d37 --- /dev/null +++ b/packages/google-cloud-bigquery/samples/load_table_uri_truncate_parquet.py @@ -0,0 +1,55 @@ +# Copyright 2020 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +def load_table_uri_truncate_parquet(table_id): + + # [START bigquery_load_table_gcs_parquet_truncate] + import six + + from google.cloud import bigquery + + # Construct a BigQuery client object. + client = bigquery.Client() + + # TODO(developer): Set table_id to the ID of the table to create. + # table_id = "your-project.your_dataset.your_table_name + + job_config = bigquery.LoadJobConfig( + schema=[ + bigquery.SchemaField("name", "STRING"), + bigquery.SchemaField("post_abbr", "STRING"), + ], + ) + + body = six.BytesIO(b"Washington,WA") + client.load_table_from_file(body, table_id, job_config=job_config).result() + previous_rows = client.get_table(table_id).num_rows + assert previous_rows > 0 + + job_config = bigquery.LoadJobConfig( + write_disposition=bigquery.WriteDisposition.WRITE_TRUNCATE, + source_format=bigquery.SourceFormat.PARQUET, + ) + + uri = "gs://cloud-samples-data/bigquery/us-states/us-states.parquet" + load_job = client.load_table_from_uri( + uri, table_id, job_config=job_config + ) # Make an API request. + + load_job.result() # Waits for the job to complete. + + destination_table = client.get_table(table_id) + print("Loaded {} rows.".format(destination_table.num_rows)) + # [END bigquery_load_table_gcs_parquet_truncate] diff --git a/packages/google-cloud-bigquery/samples/tests/test_load_table_uri_autodetect_csv.py b/packages/google-cloud-bigquery/samples/tests/test_load_table_uri_autodetect_csv.py new file mode 100644 index 000000000000..a407197834f0 --- /dev/null +++ b/packages/google-cloud-bigquery/samples/tests/test_load_table_uri_autodetect_csv.py @@ -0,0 +1,22 @@ +# Copyright 2020 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from .. import load_table_uri_autodetect_csv + + +def test_load_table_uri_autodetect_csv(capsys, random_table_id): + + load_table_uri_autodetect_csv.load_table_uri_autodetect_csv(random_table_id) + out, err = capsys.readouterr() + assert "Loaded 50 rows." in out diff --git a/packages/google-cloud-bigquery/samples/tests/test_load_table_uri_autodetect_json.py b/packages/google-cloud-bigquery/samples/tests/test_load_table_uri_autodetect_json.py new file mode 100644 index 000000000000..df14d26ed2c4 --- /dev/null +++ b/packages/google-cloud-bigquery/samples/tests/test_load_table_uri_autodetect_json.py @@ -0,0 +1,22 @@ +# Copyright 2020 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from .. import load_table_uri_autodetect_json + + +def test_load_table_uri_autodetect_csv(capsys, random_table_id): + + load_table_uri_autodetect_json.load_table_uri_autodetect_json(random_table_id) + out, err = capsys.readouterr() + assert "Loaded 50 rows." in out diff --git a/packages/google-cloud-bigquery/samples/tests/test_load_table_uri_truncate_avro.py b/packages/google-cloud-bigquery/samples/tests/test_load_table_uri_truncate_avro.py new file mode 100644 index 000000000000..ba680cabd49f --- /dev/null +++ b/packages/google-cloud-bigquery/samples/tests/test_load_table_uri_truncate_avro.py @@ -0,0 +1,21 @@ +# Copyright 2020 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from .. import load_table_uri_truncate_avro + + +def test_load_table_uri_truncate_avro(capsys, random_table_id): + load_table_uri_truncate_avro.load_table_uri_truncate_avro(random_table_id) + out, _ = capsys.readouterr() + assert "Loaded 50 rows." in out diff --git a/packages/google-cloud-bigquery/samples/tests/test_load_table_uri_truncate_csv.py b/packages/google-cloud-bigquery/samples/tests/test_load_table_uri_truncate_csv.py new file mode 100644 index 000000000000..5c1da7dcec62 --- /dev/null +++ b/packages/google-cloud-bigquery/samples/tests/test_load_table_uri_truncate_csv.py @@ -0,0 +1,21 @@ +# Copyright 2020 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from .. import load_table_uri_truncate_csv + + +def test_load_table_uri_truncate_csv(capsys, random_table_id): + load_table_uri_truncate_csv.load_table_uri_truncate_csv(random_table_id) + out, _ = capsys.readouterr() + assert "Loaded 50 rows." in out diff --git a/packages/google-cloud-bigquery/samples/tests/test_load_table_uri_truncate_json.py b/packages/google-cloud-bigquery/samples/tests/test_load_table_uri_truncate_json.py new file mode 100644 index 000000000000..180ca7f40b2e --- /dev/null +++ b/packages/google-cloud-bigquery/samples/tests/test_load_table_uri_truncate_json.py @@ -0,0 +1,21 @@ +# Copyright 2020 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from .. import load_table_uri_truncate_json + + +def test_load_table_uri_truncate_json(capsys, random_table_id): + load_table_uri_truncate_json.load_table_uri_truncate_json(random_table_id) + out, _ = capsys.readouterr() + assert "Loaded 50 rows." in out diff --git a/packages/google-cloud-bigquery/samples/tests/test_load_table_uri_truncate_orc.py b/packages/google-cloud-bigquery/samples/tests/test_load_table_uri_truncate_orc.py new file mode 100644 index 000000000000..322bf31276c3 --- /dev/null +++ b/packages/google-cloud-bigquery/samples/tests/test_load_table_uri_truncate_orc.py @@ -0,0 +1,21 @@ +# Copyright 2020 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from .. import load_table_uri_truncate_orc + + +def test_load_table_uri_truncate_orc(capsys, random_table_id): + load_table_uri_truncate_orc.load_table_uri_truncate_orc(random_table_id) + out, _ = capsys.readouterr() + assert "Loaded 50 rows." in out diff --git a/packages/google-cloud-bigquery/samples/tests/test_load_table_uri_truncate_parquet.py b/packages/google-cloud-bigquery/samples/tests/test_load_table_uri_truncate_parquet.py new file mode 100644 index 000000000000..ca901defa815 --- /dev/null +++ b/packages/google-cloud-bigquery/samples/tests/test_load_table_uri_truncate_parquet.py @@ -0,0 +1,21 @@ +# Copyright 2020 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from .. import load_table_uri_truncate_parquet + + +def test_load_table_uri_truncate_parquet(capsys, random_table_id): + load_table_uri_truncate_parquet.load_table_uri_truncate_parquet(random_table_id) + out, _ = capsys.readouterr() + assert "Loaded 50 rows." in out From ad4921db9bc3aed696e0e6c9545d50ba008806d9 Mon Sep 17 00:00:00 2001 From: HemangChothani <50404902+HemangChothani@users.noreply.github.com> Date: Wed, 13 May 2020 13:00:11 +0530 Subject: [PATCH 0770/2016] feat(bigquery): unit and system test for dataframe with int column with Nan values (#39) * feat(bigquery): add unit and system tests for int columns * feat(bigquery): cosmetic changes * feat(bigquery): use pkg_resources for comparison * feat(bigquery): nit --- .../google-cloud-bigquery/tests/system.py | 64 +++++++++++++ .../tests/unit/test_client.py | 96 +++++++++++++++++++ 2 files changed, 160 insertions(+) diff --git a/packages/google-cloud-bigquery/tests/system.py b/packages/google-cloud-bigquery/tests/system.py index 98a1edaa5c83..b86684675da0 100644 --- a/packages/google-cloud-bigquery/tests/system.py +++ b/packages/google-cloud-bigquery/tests/system.py @@ -31,6 +31,7 @@ import psutil import pytest import pytz +import pkg_resources try: from google.cloud import bigquery_storage_v1beta1 @@ -125,6 +126,9 @@ (TooManyRequests, InternalServerError, ServiceUnavailable) ) +PANDAS_MINIMUM_VERSION = pkg_resources.parse_version("1.0.0") +PANDAS_INSTALLED_VERSION = pkg_resources.get_distribution("pandas").parsed_version + def _has_rows(result): return len(result) > 0 @@ -742,6 +746,66 @@ def test_load_table_from_dataframe_w_automatic_schema(self): ) self.assertEqual(table.num_rows, 3) + @unittest.skipIf( + pandas is None or PANDAS_INSTALLED_VERSION < PANDAS_MINIMUM_VERSION, + "Only `pandas version >=1.0.0` is supported", + ) + @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") + def test_load_table_from_dataframe_w_nullable_int64_datatype(self): + """Test that a DataFrame containing column with None-type values and int64 datatype + can be uploaded if a BigQuery schema is specified. + + https://github.com/googleapis/python-bigquery/issues/22 + """ + + dataset_id = _make_dataset_id("bq_load_test") + self.temp_dataset(dataset_id) + table_id = "{}.{}.load_table_from_dataframe_w_nullable_int64_datatype".format( + Config.CLIENT.project, dataset_id + ) + table_schema = (bigquery.SchemaField("x", "INTEGER", mode="NULLABLE"),) + table = retry_403(Config.CLIENT.create_table)( + Table(table_id, schema=table_schema) + ) + self.to_delete.insert(0, table) + + df_data = collections.OrderedDict( + [("x", pandas.Series([1, 2, None, 4], dtype="Int64"))] + ) + dataframe = pandas.DataFrame(df_data, columns=df_data.keys()) + load_job = Config.CLIENT.load_table_from_dataframe(dataframe, table_id) + load_job.result() + table = Config.CLIENT.get_table(table_id) + self.assertEqual(tuple(table.schema), (bigquery.SchemaField("x", "INTEGER"),)) + self.assertEqual(table.num_rows, 4) + + @unittest.skipIf( + pandas is None or PANDAS_INSTALLED_VERSION < PANDAS_MINIMUM_VERSION, + "Only `pandas version >=1.0.0` is supported", + ) + @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") + def test_load_table_from_dataframe_w_nullable_int64_datatype_automatic_schema(self): + """Test that a DataFrame containing column with None-type values and int64 datatype + can be uploaded without specifying a schema. + + https://github.com/googleapis/python-bigquery/issues/22 + """ + + dataset_id = _make_dataset_id("bq_load_test") + self.temp_dataset(dataset_id) + table_id = "{}.{}.load_table_from_dataframe_w_nullable_int64_datatype".format( + Config.CLIENT.project, dataset_id + ) + df_data = collections.OrderedDict( + [("x", pandas.Series([1, 2, None, 4], dtype="Int64"))] + ) + dataframe = pandas.DataFrame(df_data, columns=df_data.keys()) + load_job = Config.CLIENT.load_table_from_dataframe(dataframe, table_id) + load_job.result() + table = Config.CLIENT.get_table(table_id) + self.assertEqual(tuple(table.schema), (bigquery.SchemaField("x", "INTEGER"),)) + self.assertEqual(table.num_rows, 4) + @unittest.skipIf(pandas is None, "Requires `pandas`") @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_load_table_from_dataframe_w_nulls(self): diff --git a/packages/google-cloud-bigquery/tests/unit/test_client.py b/packages/google-cloud-bigquery/tests/unit/test_client.py index 6edb2e168668..f1dc4e816765 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_client.py +++ b/packages/google-cloud-bigquery/tests/unit/test_client.py @@ -30,6 +30,7 @@ from six.moves import http_client import pytest import pytz +import pkg_resources try: import fastparquet @@ -56,6 +57,9 @@ bigquery_storage_v1beta1 = None from tests.unit.helpers import make_connection +PANDAS_MINIUM_VERSION = pkg_resources.parse_version("1.0.0") +PANDAS_INSTALLED_VERSION = pkg_resources.get_distribution("pandas").parsed_version + def _make_credentials(): import google.auth.credentials @@ -6973,6 +6977,98 @@ def test_load_table_from_dataframe_no_schema_warning_wo_pyarrow(self): ] assert matches, "A missing schema deprecation warning was not raised." + @unittest.skipIf( + pandas is None or PANDAS_INSTALLED_VERSION < PANDAS_MINIUM_VERSION, + "Only `pandas version >=1.0.0` supported", + ) + @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") + def test_load_table_from_dataframe_w_nullable_int64_datatype(self): + from google.cloud.bigquery.client import _DEFAULT_NUM_RETRIES + from google.cloud.bigquery import job + from google.cloud.bigquery.schema import SchemaField + + client = self._make_client() + dataframe = pandas.DataFrame({"x": [1, 2, None, 4]}, dtype="Int64") + load_patch = mock.patch( + "google.cloud.bigquery.client.Client.load_table_from_file", autospec=True + ) + + get_table_patch = mock.patch( + "google.cloud.bigquery.client.Client.get_table", + autospec=True, + return_value=mock.Mock(schema=[SchemaField("x", "INT64", "NULLABLE")]), + ) + + with load_patch as load_table_from_file, get_table_patch: + client.load_table_from_dataframe( + dataframe, self.TABLE_REF, location=self.LOCATION + ) + + load_table_from_file.assert_called_once_with( + client, + mock.ANY, + self.TABLE_REF, + num_retries=_DEFAULT_NUM_RETRIES, + rewind=True, + job_id=mock.ANY, + job_id_prefix=None, + location=self.LOCATION, + project=None, + job_config=mock.ANY, + ) + + sent_config = load_table_from_file.mock_calls[0][2]["job_config"] + assert sent_config.source_format == job.SourceFormat.PARQUET + assert tuple(sent_config.schema) == ( + SchemaField("x", "INT64", "NULLABLE", None), + ) + + @unittest.skipIf( + pandas is None or PANDAS_INSTALLED_VERSION < PANDAS_MINIUM_VERSION, + "Only `pandas version >=1.0.0` supported", + ) + @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") + def test_load_table_from_dataframe_w_nullable_int64_datatype_automatic_schema(self): + from google.cloud.bigquery.client import _DEFAULT_NUM_RETRIES + from google.cloud.bigquery import job + from google.cloud.bigquery.schema import SchemaField + + client = self._make_client() + dataframe = pandas.DataFrame({"x": [1, 2, None, 4]}, dtype="Int64") + load_patch = mock.patch( + "google.cloud.bigquery.client.Client.load_table_from_file", autospec=True + ) + + get_table_patch = mock.patch( + "google.cloud.bigquery.client.Client.get_table", + autospec=True, + side_effect=google.api_core.exceptions.NotFound("Table not found"), + ) + + with load_patch as load_table_from_file, get_table_patch: + client.load_table_from_dataframe( + dataframe, self.TABLE_REF, location=self.LOCATION + ) + + load_table_from_file.assert_called_once_with( + client, + mock.ANY, + self.TABLE_REF, + num_retries=_DEFAULT_NUM_RETRIES, + rewind=True, + job_id=mock.ANY, + job_id_prefix=None, + location=self.LOCATION, + project=None, + job_config=mock.ANY, + ) + + sent_config = load_table_from_file.mock_calls[0][2]["job_config"] + assert sent_config.source_format == job.SourceFormat.PARQUET + assert tuple(sent_config.schema) == ( + SchemaField("x", "INT64", "NULLABLE", None), + ) + @unittest.skipIf(pandas is None, "Requires `pandas`") @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_load_table_from_dataframe_struct_fields_error(self): From b78654490cb2dfe20d7a142783568fc31dff7cc5 Mon Sep 17 00:00:00 2001 From: Bu Sun Kim <8822365+busunkim96@users.noreply.github.com> Date: Wed, 13 May 2020 01:37:48 -0700 Subject: [PATCH 0771/2016] chore: use google-cloud-testutils in noxfile (#101) * chore: use google-cloud-testutils in noxfile * chore: blacken Co-authored-by: Peter Lamut --- packages/google-cloud-bigquery/noxfile.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/packages/google-cloud-bigquery/noxfile.py b/packages/google-cloud-bigquery/noxfile.py index 98f7350709a3..b2d26568ccaf 100644 --- a/packages/google-cloud-bigquery/noxfile.py +++ b/packages/google-cloud-bigquery/noxfile.py @@ -32,9 +32,10 @@ def default(session): run the tests. """ # Install all test dependencies, then install local packages in-place. - session.install("mock", "pytest", "pytest-cov", "freezegun") + session.install( + "mock", "pytest", "google-cloud-testutils", "pytest-cov", "freezegun" + ) session.install("grpcio") - session.install("git+https://github.com/googleapis/python-test-utils") # fastparquet is not included in .[all] because, in general, it's redundant # with pyarrow. We still want to run some unit tests with fastparquet @@ -80,10 +81,9 @@ def system(session): session.install("--pre", "grpcio") # Install all test dependencies, then install local packages in place. - session.install("mock", "pytest", "psutil") + session.install("mock", "pytest", "psutil", "google-cloud-testutils") session.install("google-cloud-storage") session.install("fastavro") - session.install("git+https://github.com/googleapis/python-test-utils") session.install("-e", ".[all]") # IPython does not support Python 2 after version 5.x From 8986cd92005ad224562d42c4d90b284190524093 Mon Sep 17 00:00:00 2001 From: shollyman Date: Wed, 13 May 2020 18:40:25 -0700 Subject: [PATCH 0772/2016] feat: add HOUR support for time partitioning interval (#91) * feat: add HOUR support for time partitioning interval --- .../google-cloud-bigquery/google/cloud/bigquery/table.py | 5 +++-- packages/google-cloud-bigquery/tests/unit/test_table.py | 4 ++-- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py index e674f237d8d7..e66d24e74983 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py @@ -583,8 +583,6 @@ def partitioning_type(self): """Union[str, None]: Time partitioning of the table if it is partitioned (Defaults to :data:`None`). - The only partitioning type that is currently supported is - :attr:`~google.cloud.bigquery.table.TimePartitioningType.DAY`. """ warnings.warn( "This method will be deprecated in future versions. Please use " @@ -1980,6 +1978,9 @@ class TimePartitioningType(object): DAY = "DAY" """str: Generates one partition per day.""" + HOUR = "HOUR" + """str: Generates one partition per hour.""" + class TimePartitioning(object): """Configures time-based partitioning for a table. diff --git a/packages/google-cloud-bigquery/tests/unit/test_table.py b/packages/google-cloud-bigquery/tests/unit/test_table.py index 5bcd60986d45..72275fc536c1 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_table.py +++ b/packages/google-cloud-bigquery/tests/unit/test_table.py @@ -1024,11 +1024,11 @@ def test_time_partitioning_setter(self): dataset = DatasetReference(self.PROJECT, self.DS_ID) table_ref = dataset.table(self.TABLE_NAME) table = self._make_one(table_ref) - time_partitioning = TimePartitioning(type_=TimePartitioningType.DAY) + time_partitioning = TimePartitioning(type_=TimePartitioningType.HOUR) table.time_partitioning = time_partitioning - self.assertEqual(table.time_partitioning.type_, TimePartitioningType.DAY) + self.assertEqual(table.time_partitioning.type_, TimePartitioningType.HOUR) # Both objects point to the same properties dict self.assertIs( table._properties["timePartitioning"], time_partitioning._properties From 4fa043cacc87a1ca2c56d6e66b735b746a942011 Mon Sep 17 00:00:00 2001 From: Spencer McCreary Date: Fri, 15 May 2020 05:50:06 -0400 Subject: [PATCH 0773/2016] feat: make AccessEntry objects hashable (#93) * feat: add a __hash__ implementation to AccessEntry * use internal * feat: unit tests for hashable AccessEntry * fix: black lint issue Co-authored-by: Peter Lamut --- .../google/cloud/bigquery/dataset.py | 51 ++++++++++++++----- .../tests/unit/test_dataset.py | 14 +++++ 2 files changed, 51 insertions(+), 14 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/dataset.py b/packages/google-cloud-bigquery/google/cloud/bigquery/dataset.py index 99c47026fe3a..40489a38b20e 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/dataset.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/dataset.py @@ -20,6 +20,7 @@ import copy import google.cloud._helpers + from google.cloud.bigquery import _helpers from google.cloud.bigquery.model import ModelReference from google.cloud.bigquery.routine import RoutineReference @@ -145,38 +146,60 @@ def __init__(self, role, entity_type, entity_id): "Role must be set for entity " "type %r" % (entity_type,) ) - self.role = role - self.entity_type = entity_type - self.entity_id = entity_id + self._role = role + self._entity_type = entity_type + self._entity_id = entity_id + + @property + def role(self): + """str: The role of the entry.""" + return self._role + + @property + def entity_type(self): + """str: The entity_type of the entry.""" + return self._entity_type + + @property + def entity_id(self): + """str: The entity_id of the entry.""" + return self._entity_id def __eq__(self, other): if not isinstance(other, AccessEntry): return NotImplemented - return ( - self.role == other.role - and self.entity_type == other.entity_type - and self.entity_id == other.entity_id - ) + return self._key() == other._key() def __ne__(self, other): return not self == other def __repr__(self): return "" % ( - self.role, - self.entity_type, - self.entity_id, + self._role, + self._entity_type, + self._entity_id, ) + def _key(self): + """ A tuple key that uniquely describes this field. + Used to compute this instance's hashcode and evaluate equality. + Returns: + Tuple: The contents of this :class:`~google.cloud.bigquery.dataset.AccessEntry`. + """ + return (self._role, self._entity_type, self._entity_id) + + def __hash__(self): + return hash(self._key()) + def to_api_repr(self): """Construct the API resource representation of this access entry Returns: Dict[str, object]: Access entry represented as an API resource """ - resource = {self.entity_type: self.entity_id} - if self.role is not None: - resource["role"] = self.role + resource = {self._entity_type: self._entity_id} + if self._role is not None: + resource["role"] = self._role return resource @classmethod diff --git a/packages/google-cloud-bigquery/tests/unit/test_dataset.py b/packages/google-cloud-bigquery/tests/unit/test_dataset.py index ac13e00932ba..e4977a2703f8 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_dataset.py +++ b/packages/google-cloud-bigquery/tests/unit/test_dataset.py @@ -84,6 +84,20 @@ def test__eq___type_mismatch(self): self.assertNotEqual(entry, object()) self.assertEqual(entry, mock.ANY) + def test___hash__set_equality(self): + entry1 = self._make_one("OWNER", "userByEmail", "silly@example.com") + entry2 = self._make_one("OWNER", "userByEmail", "phred@example.com") + set_one = {entry1, entry2} + set_two = {entry1, entry2} + self.assertEqual(set_one, set_two) + + def test___hash__not_equals(self): + entry1 = self._make_one("OWNER", "userByEmail", "silly@example.com") + entry2 = self._make_one("OWNER", "userByEmail", "phred@example.com") + set_one = {entry1} + set_two = {entry2} + self.assertNotEqual(set_one, set_two) + def test_to_api_repr(self): entry = self._make_one("OWNER", "userByEmail", "salmon@example.com") resource = entry.to_api_repr() From ddb8fc7ddd32a5d2daf56ca2f7eb9935468af294 Mon Sep 17 00:00:00 2001 From: shollyman Date: Mon, 18 May 2020 11:00:20 -0700 Subject: [PATCH 0774/2016] feat: add support for policy tags (#77) * feat: add support for policy tags in schema * blacken * add more unit coverage * more test cleanup * more tests * formatting * more testing of names setter * address reviewer comments * docstrings migrate from unions -> optional * stashing changes * revision to list-based representation, update tests * changes to equality and testing, towards satisfying coverage * cleanup * return copy * address api repr feedback * make PolicyTagList fully immutable * update docstring * simplify to_api_repr * remove stale doc comments Co-authored-by: Peter Lamut --- .../google/cloud/bigquery/schema.py | 118 +++++++++++++++++- .../google-cloud-bigquery/tests/system.py | 51 ++++++++ .../tests/unit/test_schema.py | 114 ++++++++++++++++- 3 files changed, 278 insertions(+), 5 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/schema.py b/packages/google-cloud-bigquery/google/cloud/bigquery/schema.py index 3878a80a9f94..0eaf1201b594 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/schema.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/schema.py @@ -62,14 +62,26 @@ class SchemaField(object): fields (Tuple[google.cloud.bigquery.schema.SchemaField]): subfields (requires ``field_type`` of 'RECORD'). + + policy_tags (Optional[PolicyTagList]): The policy tag list for the field. + """ - def __init__(self, name, field_type, mode="NULLABLE", description=None, fields=()): + def __init__( + self, + name, + field_type, + mode="NULLABLE", + description=None, + fields=(), + policy_tags=None, + ): self._name = name self._field_type = field_type self._mode = mode self._description = description self._fields = tuple(fields) + self._policy_tags = policy_tags @classmethod def from_api_repr(cls, api_repr): @@ -87,12 +99,14 @@ def from_api_repr(cls, api_repr): mode = api_repr.get("mode", "NULLABLE") description = api_repr.get("description") fields = api_repr.get("fields", ()) + return cls( field_type=api_repr["type"].upper(), fields=[cls.from_api_repr(f) for f in fields], mode=mode.upper(), description=description, name=api_repr["name"], + policy_tags=PolicyTagList.from_api_repr(api_repr.get("policyTags")), ) @property @@ -136,6 +150,13 @@ def fields(self): """ return self._fields + @property + def policy_tags(self): + """Optional[google.cloud.bigquery.schema.PolicyTagList]: Policy tag list + definition for this field. + """ + return self._policy_tags + def to_api_repr(self): """Return a dictionary representing this schema field. @@ -155,6 +176,10 @@ def to_api_repr(self): if self.field_type.upper() in _STRUCT_TYPES: answer["fields"] = [f.to_api_repr() for f in self.fields] + # If this contains a policy tag definition, include that as well: + if self.policy_tags is not None: + answer["policyTags"] = self.policy_tags.to_api_repr() + # Done; return the serialized dictionary. return answer @@ -172,6 +197,7 @@ def _key(self): self._mode.upper(), self._description, self._fields, + self._policy_tags, ) def to_standard_sql(self): @@ -244,7 +270,10 @@ def _parse_schema_resource(info): mode = r_field.get("mode", "NULLABLE") description = r_field.get("description") sub_fields = _parse_schema_resource(r_field) - schema.append(SchemaField(name, field_type, mode, description, sub_fields)) + policy_tags = PolicyTagList.from_api_repr(r_field.get("policyTags")) + schema.append( + SchemaField(name, field_type, mode, description, sub_fields, policy_tags) + ) return schema @@ -291,3 +320,88 @@ def _to_schema_fields(schema): field if isinstance(field, SchemaField) else SchemaField.from_api_repr(field) for field in schema ] + + +class PolicyTagList(object): + """Define Policy Tags for a column. + + Args: + names ( + Optional[Tuple[str]]): list of policy tags to associate with + the column. Policy tag identifiers are of the form + `projects/*/locations/*/taxonomies/*/policyTags/*`. + """ + + def __init__(self, names=()): + self._properties = {} + self._properties["names"] = tuple(names) + + @property + def names(self): + """Tuple[str]: Policy tags associated with this definition. + """ + return self._properties.get("names", ()) + + def _key(self): + """A tuple key that uniquely describes this PolicyTagList. + + Used to compute this instance's hashcode and evaluate equality. + + Returns: + Tuple: The contents of this :class:`~google.cloud.bigquery.schema.PolicyTagList`. + """ + return tuple(sorted(self._properties.items())) + + def __eq__(self, other): + if not isinstance(other, PolicyTagList): + return NotImplemented + return self._key() == other._key() + + def __ne__(self, other): + return not self == other + + def __hash__(self): + return hash(self._key()) + + def __repr__(self): + return "PolicyTagList{}".format(self._key()) + + @classmethod + def from_api_repr(cls, api_repr): + """Return a :class:`PolicyTagList` object deserialized from a dict. + + This method creates a new ``PolicyTagList`` instance that points to + the ``api_repr`` parameter as its internal properties dict. This means + that when a ``PolicyTagList`` instance is stored as a property of + another object, any changes made at the higher level will also appear + here. + + Args: + api_repr (Mapping[str, str]): + The serialized representation of the PolicyTagList, such as + what is output by :meth:`to_api_repr`. + + Returns: + Optional[google.cloud.bigquery.schema.PolicyTagList]: + The ``PolicyTagList`` object or None. + """ + if api_repr is None: + return None + names = api_repr.get("names", ()) + return cls(names=names) + + def to_api_repr(self): + """Return a dictionary representing this object. + + This method returns the properties dict of the ``PolicyTagList`` + instance rather than making a copy. This means that when a + ``PolicyTagList`` instance is stored as a property of another + object, any changes made at the higher level will also appear here. + + Returns: + dict: + A dictionary representing the PolicyTagList object in + serialized form. + """ + answer = {"names": [name for name in self.names]} + return answer diff --git a/packages/google-cloud-bigquery/tests/system.py b/packages/google-cloud-bigquery/tests/system.py index b86684675da0..49e45c772735 100644 --- a/packages/google-cloud-bigquery/tests/system.py +++ b/packages/google-cloud-bigquery/tests/system.py @@ -339,6 +339,57 @@ def test_create_table(self): self.assertTrue(_table_exists(table)) self.assertEqual(table.table_id, table_id) + def test_create_table_with_policy(self): + from google.cloud.bigquery.schema import PolicyTagList + + dataset = self.temp_dataset(_make_dataset_id("create_table_with_policy")) + table_id = "test_table" + policy_1 = PolicyTagList( + names=[ + "projects/{}/locations/us/taxonomies/1/policyTags/2".format( + Config.CLIENT.project + ), + ] + ) + policy_2 = PolicyTagList( + names=[ + "projects/{}/locations/us/taxonomies/3/policyTags/4".format( + Config.CLIENT.project + ), + ] + ) + + schema = [ + bigquery.SchemaField("full_name", "STRING", mode="REQUIRED"), + bigquery.SchemaField( + "secret_int", "INTEGER", mode="REQUIRED", policy_tags=policy_1 + ), + ] + table_arg = Table(dataset.table(table_id), schema=schema) + self.assertFalse(_table_exists(table_arg)) + + table = retry_403(Config.CLIENT.create_table)(table_arg) + self.to_delete.insert(0, table) + + self.assertTrue(_table_exists(table)) + self.assertEqual(policy_1, table.schema[1].policy_tags) + + # Amend the schema to replace the policy tags + new_schema = table.schema[:] + old_field = table.schema[1] + new_schema[1] = bigquery.SchemaField( + name=old_field.name, + field_type=old_field.field_type, + mode=old_field.mode, + description=old_field.description, + fields=old_field.fields, + policy_tags=policy_2, + ) + + table.schema = new_schema + table2 = Config.CLIENT.update_table(table, ["schema"]) + self.assertEqual(policy_2, table2.schema[1].policy_tags) + def test_create_table_w_time_partitioning_w_clustering_fields(self): from google.cloud.bigquery.table import TimePartitioning from google.cloud.bigquery.table import TimePartitioningType diff --git a/packages/google-cloud-bigquery/tests/unit/test_schema.py b/packages/google-cloud-bigquery/tests/unit/test_schema.py index e1bdd7b2fb73..9f7ee7bb3af4 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_schema.py +++ b/packages/google-cloud-bigquery/tests/unit/test_schema.py @@ -63,11 +63,38 @@ def test_constructor_subfields(self): self.assertIs(field._fields[0], sub_field1) self.assertIs(field._fields[1], sub_field2) + def test_constructor_with_policy_tags(self): + from google.cloud.bigquery.schema import PolicyTagList + + policy = PolicyTagList(names=("foo", "bar")) + field = self._make_one( + "test", "STRING", mode="REQUIRED", description="Testing", policy_tags=policy + ) + self.assertEqual(field._name, "test") + self.assertEqual(field._field_type, "STRING") + self.assertEqual(field._mode, "REQUIRED") + self.assertEqual(field._description, "Testing") + self.assertEqual(field._fields, ()) + self.assertEqual(field._policy_tags, policy) + def test_to_api_repr(self): - field = self._make_one("foo", "INTEGER", "NULLABLE") + from google.cloud.bigquery.schema import PolicyTagList + + policy = PolicyTagList(names=("foo", "bar")) + self.assertEqual( + policy.to_api_repr(), {"names": ["foo", "bar"]}, + ) + + field = self._make_one("foo", "INTEGER", "NULLABLE", policy_tags=policy) self.assertEqual( field.to_api_repr(), - {"mode": "NULLABLE", "name": "foo", "type": "INTEGER", "description": None}, + { + "mode": "NULLABLE", + "name": "foo", + "type": "INTEGER", + "description": None, + "policyTags": {"names": ["foo", "bar"]}, + }, ) def test_to_api_repr_with_subfield(self): @@ -111,6 +138,23 @@ def test_from_api_repr(self): self.assertEqual(field.fields[0].field_type, "INTEGER") self.assertEqual(field.fields[0].mode, "NULLABLE") + def test_from_api_repr_policy(self): + field = self._get_target_class().from_api_repr( + { + "fields": [{"mode": "nullable", "name": "bar", "type": "integer"}], + "name": "foo", + "type": "record", + "policyTags": {"names": ["one", "two"]}, + } + ) + self.assertEqual(field.name, "foo") + self.assertEqual(field.field_type, "RECORD") + self.assertEqual(field.policy_tags.names, ("one", "two")) + self.assertEqual(len(field.fields), 1) + self.assertEqual(field.fields[0].name, "bar") + self.assertEqual(field.fields[0].field_type, "INTEGER") + self.assertEqual(field.fields[0].mode, "NULLABLE") + def test_from_api_repr_defaults(self): field = self._get_target_class().from_api_repr( {"name": "foo", "type": "record"} @@ -408,7 +452,7 @@ def test___hash__not_equals(self): def test___repr__(self): field1 = self._make_one("field1", "STRING") - expected = "SchemaField('field1', 'STRING', 'NULLABLE', None, ())" + expected = "SchemaField('field1', 'STRING', 'NULLABLE', None, (), None)" self.assertEqual(repr(field1), expected) @@ -632,3 +676,67 @@ def test_valid_mapping_representation(self): result = self._call_fut(schema) self.assertEqual(result, expected_schema) + + +class TestPolicyTags(unittest.TestCase): + @staticmethod + def _get_target_class(): + from google.cloud.bigquery.schema import PolicyTagList + + return PolicyTagList + + def _make_one(self, *args, **kw): + return self._get_target_class()(*args, **kw) + + def test_constructor(self): + empty_policy_tags = self._make_one() + self.assertIsNotNone(empty_policy_tags.names) + self.assertEqual(len(empty_policy_tags.names), 0) + policy_tags = self._make_one(["foo", "bar"]) + self.assertEqual(policy_tags.names, ("foo", "bar")) + + def test_from_api_repr(self): + klass = self._get_target_class() + api_repr = {"names": ["foo"]} + policy_tags = klass.from_api_repr(api_repr) + self.assertEqual(policy_tags.to_api_repr(), api_repr) + + # Ensure the None case correctly returns None, rather + # than an empty instance. + policy_tags2 = klass.from_api_repr(None) + self.assertIsNone(policy_tags2) + + def test_to_api_repr(self): + taglist = self._make_one(names=["foo", "bar"]) + self.assertEqual( + taglist.to_api_repr(), {"names": ["foo", "bar"]}, + ) + taglist2 = self._make_one(names=("foo", "bar")) + self.assertEqual( + taglist2.to_api_repr(), {"names": ["foo", "bar"]}, + ) + + def test___eq___wrong_type(self): + policy = self._make_one(names=["foo"]) + other = object() + self.assertNotEqual(policy, other) + self.assertEqual(policy, mock.ANY) + + def test___eq___names_mismatch(self): + policy = self._make_one(names=["foo", "bar"]) + other = self._make_one(names=["bar", "baz"]) + self.assertNotEqual(policy, other) + + def test___hash__set_equality(self): + policy1 = self._make_one(["foo", "bar"]) + policy2 = self._make_one(["bar", "baz"]) + set_one = {policy1, policy2} + set_two = {policy1, policy2} + self.assertEqual(set_one, set_two) + + def test___hash__not_equals(self): + policy1 = self._make_one(["foo", "bar"]) + policy2 = self._make_one(["bar", "baz"]) + set_one = {policy1} + set_two = {policy2} + self.assertNotEqual(set_one, set_two) From 47faf37ea2868f7a68738532453ca8011a612738 Mon Sep 17 00:00:00 2001 From: Steffany Brown <30247553+steffnay@users.noreply.github.com> Date: Tue, 19 May 2020 12:15:35 -0700 Subject: [PATCH 0775/2016] docs(samples): deprecate client.dataset in samples (#111) --- .../google-cloud-bigquery/docs/snippets.py | 80 ++++++++++++------- 1 file changed, 53 insertions(+), 27 deletions(-) diff --git a/packages/google-cloud-bigquery/docs/snippets.py b/packages/google-cloud-bigquery/docs/snippets.py index 0b68d59d3858..bc6b580201b9 100644 --- a/packages/google-cloud-bigquery/docs/snippets.py +++ b/packages/google-cloud-bigquery/docs/snippets.py @@ -132,7 +132,8 @@ def test_create_table_nested_repeated_schema(client, to_delete): # [START bigquery_nested_repeated_schema] # from google.cloud import bigquery # client = bigquery.Client() - # dataset_ref = client.dataset('my_dataset') + # project = client.project + # dataset_ref = bigquery.DatasetReference(project, 'my_dataset') schema = [ bigquery.SchemaField("id", "STRING", mode="NULLABLE"), @@ -202,7 +203,8 @@ def test_create_partitioned_table(client, to_delete): # [START bigquery_create_table_partitioned] # from google.cloud import bigquery # client = bigquery.Client() - # dataset_ref = client.dataset('my_dataset') + # project = client.project + # dataset_ref = bigquery.DatasetReference(project, 'my_dataset') table_ref = dataset_ref.table("my_partitioned_table") schema = [ @@ -240,7 +242,9 @@ def test_create_partitioned_table(client, to_delete): def test_manage_table_labels(client, to_delete): dataset_id = "label_table_dataset_{}".format(_millis()) table_id = "label_table_{}".format(_millis()) - dataset = bigquery.Dataset(client.dataset(dataset_id)) + project = client.project + dataset_ref = bigquery.DatasetReference(project, dataset_id) + dataset = bigquery.Dataset(dataset_ref) client.create_dataset(dataset) to_delete.append(dataset) @@ -250,7 +254,9 @@ def test_manage_table_labels(client, to_delete): # [START bigquery_label_table] # from google.cloud import bigquery # client = bigquery.Client() - # table_ref = client.dataset('my_dataset').table('my_table') + # project = client.project + # dataset_ref = bigquery.DatasetReference(project, dataset_id) + # table_ref = dataset_ref.table('my_table') # table = client.get_table(table_ref) # API request assert table.labels == {} @@ -268,7 +274,8 @@ def test_manage_table_labels(client, to_delete): # dataset_id = 'my_dataset' # table_id = 'my_table' - dataset_ref = client.dataset(dataset_id) + project = client.project + dataset_ref = bigquery.DatasetReference(project, dataset_id) table_ref = dataset_ref.table(table_id) table = client.get_table(table_ref) # API Request @@ -286,7 +293,9 @@ def test_manage_table_labels(client, to_delete): # [START bigquery_delete_label_table] # from google.cloud import bigquery # client = bigquery.Client() - # table_ref = client.dataset('my_dataset').table('my_table') + # project = client.project + # dataset_ref = bigquery.DatasetReference(project, dataset_id) + # table_ref = dataset_ref.table('my_table') # table = client.get_table(table_ref) # API request # This example table starts with one label @@ -310,7 +319,9 @@ def test_update_table_description(client, to_delete): """Update a table's description.""" dataset_id = "update_table_description_dataset_{}".format(_millis()) table_id = "update_table_description_table_{}".format(_millis()) - dataset = bigquery.Dataset(client.dataset(dataset_id)) + project = client.project + dataset_ref = bigquery.DatasetReference(project, dataset_id) + dataset = bigquery.Dataset(dataset_ref) client.create_dataset(dataset) to_delete.append(dataset) @@ -321,7 +332,9 @@ def test_update_table_description(client, to_delete): # [START bigquery_update_table_description] # from google.cloud import bigquery # client = bigquery.Client() - # table_ref = client.dataset('my_dataset').table('my_table') + # project = client.project + # dataset_ref = bigquery.DatasetReference(project, dataset_id) + # table_ref = dataset_ref.table('my_table') # table = client.get_table(table_ref) # API request assert table.description == "Original description." @@ -343,7 +356,9 @@ def test_update_table_expiration(client, to_delete): """Update a table's expiration time.""" dataset_id = "update_table_expiration_dataset_{}".format(_millis()) table_id = "update_table_expiration_table_{}".format(_millis()) - dataset = bigquery.Dataset(client.dataset(dataset_id)) + project = client.project + dataset_ref = bigquery.DatasetReference(project, dataset_id) + dataset = bigquery.Dataset(dataset_ref) client.create_dataset(dataset) to_delete.append(dataset) @@ -356,7 +371,9 @@ def test_update_table_expiration(client, to_delete): # from google.cloud import bigquery # client = bigquery.Client() - # table_ref = client.dataset('my_dataset').table('my_table') + # project = client.project + # dataset_ref = bigquery.DatasetReference(project, dataset_id) + # table_ref = dataset_ref.table('my_table') # table = client.get_table(table_ref) # API request assert table.expires is None @@ -382,7 +399,9 @@ def test_relax_column(client, to_delete): """Updates a schema field from required to nullable.""" dataset_id = "relax_column_dataset_{}".format(_millis()) table_id = "relax_column_table_{}".format(_millis()) - dataset = bigquery.Dataset(client.dataset(dataset_id)) + project = client.project + dataset_ref = bigquery.DatasetReference(project, dataset_id) + dataset = bigquery.Dataset(dataset_ref) dataset = client.create_dataset(dataset) to_delete.append(dataset) @@ -396,7 +415,9 @@ def test_relax_column(client, to_delete): bigquery.SchemaField("full_name", "STRING", mode="REQUIRED"), bigquery.SchemaField("age", "INTEGER", mode="REQUIRED"), ] - table_ref = client.dataset(dataset_id).table(table_id) + + dataset_ref = bigquery.DatasetReference(project, dataset_id) + table_ref = dataset_ref.table(table_id) table = bigquery.Table(table_ref, schema=original_schema) table = client.create_table(table) assert all(field.mode == "REQUIRED" for field in table.schema) @@ -424,7 +445,9 @@ def test_update_table_cmek(client, to_delete): """Patch a table's metadata.""" dataset_id = "update_table_cmek_{}".format(_millis()) table_id = "update_table_cmek_{}".format(_millis()) - dataset = bigquery.Dataset(client.dataset(dataset_id)) + project = client.project + dataset_ref = bigquery.DatasetReference(project, dataset_id) + dataset = bigquery.Dataset(dataset_ref) client.create_dataset(dataset) to_delete.append(dataset) @@ -468,7 +491,7 @@ def test_update_table_cmek(client, to_delete): def test_manage_views(client, to_delete): project = client.project source_dataset_id = "source_dataset_{}".format(_millis()) - source_dataset_ref = client.dataset(source_dataset_id) + source_dataset_ref = bigquery.DatasetReference(project, source_dataset_id) source_dataset = bigquery.Dataset(source_dataset_ref) source_dataset = client.create_dataset(source_dataset) to_delete.append(source_dataset) @@ -487,7 +510,7 @@ def test_manage_views(client, to_delete): load_job.result() shared_dataset_id = "shared_dataset_{}".format(_millis()) - shared_dataset_ref = client.dataset(shared_dataset_id) + shared_dataset_ref = bigquery.DatasetReference(project, shared_dataset_id) shared_dataset = bigquery.Dataset(shared_dataset_ref) shared_dataset = client.create_dataset(shared_dataset) to_delete.append(shared_dataset) @@ -498,7 +521,7 @@ def test_manage_views(client, to_delete): # project = 'my-project' # source_dataset_id = 'my_source_dataset' # source_table_id = 'us_states' - # shared_dataset_ref = client.dataset('my_shared_dataset') + # shared_dataset_ref = bigquery.DatasetReference(project, 'my_shared_dataset') # This example shows how to create a shared view of a source table of # US States. The source table contains all 50 states, while the view will @@ -518,7 +541,7 @@ def test_manage_views(client, to_delete): # project = 'my-project' # source_dataset_id = 'my_source_dataset' # source_table_id = 'us_states' - # shared_dataset_ref = client.dataset('my_shared_dataset') + # shared_dataset_ref = bigquery.DatasetReference(project, 'my_shared_dataset') # This example shows how to update a shared view of a source table of # US States. The view's query will be updated to contain only states with @@ -534,8 +557,9 @@ def test_manage_views(client, to_delete): # from google.cloud import bigquery # client = bigquery.Client() # shared_dataset_id = 'my_shared_dataset' - - view_ref = client.dataset(shared_dataset_id).table("my_shared_view") + project = client.project + shared_dataset_ref = bigquery.DatasetReference(project, shared_dataset_id) + view_ref = shared_dataset_ref.table("my_shared_view") view = client.get_table(view_ref) # API Request # Display view properties @@ -552,9 +576,9 @@ def test_manage_views(client, to_delete): # Assign access controls to the dataset containing the view # shared_dataset_id = 'my_shared_dataset' # analyst_group_email = 'data_analysts@example.com' - shared_dataset = client.get_dataset( - client.dataset(shared_dataset_id) - ) # API request + project = client.project + shared_dataset_ref = bigquery.DatasetReference(project, shared_dataset_id) + shared_dataset = client.get_dataset(shared_dataset_ref) # API request access_entries = shared_dataset.access_entries access_entries.append( bigquery.AccessEntry("READER", "groupByEmail", analyst_group_email) @@ -567,9 +591,9 @@ def test_manage_views(client, to_delete): # Authorize the view to access the source dataset # project = 'my-project' # source_dataset_id = 'my_source_dataset' - source_dataset = client.get_dataset( - client.dataset(source_dataset_id) - ) # API request + project = client.project + source_dataset_ref = bigquery.DatasetReference(project, source_dataset_id) + source_dataset = client.get_dataset(source_dataset_ref) # API request view_reference = { "projectId": project, "datasetId": shared_dataset_id, @@ -602,7 +626,8 @@ def test_load_table_add_column(client, to_delete): # [START bigquery_add_column_load_append] # from google.cloud import bigquery # client = bigquery.Client() - # dataset_ref = client.dataset('my_dataset') + # project = client.project + # dataset_ref = bigquery.DatasetReference(project, 'my_dataset') # filepath = 'path/to/your_file.csv' # Retrieves the destination table and checks the length of the schema @@ -673,7 +698,8 @@ def test_load_table_relax_column(client, to_delete): # [START bigquery_relax_column_load_append] # from google.cloud import bigquery # client = bigquery.Client() - # dataset_ref = client.dataset('my_dataset') + # project = client.project + # dataset_ref = bigquery.DatasetReference(project, 'my_dataset') # filepath = 'path/to/your_file.csv' # Retrieves the destination table and checks the number of required fields From 8f330b56eb0f2f5e3ccdd94faaffa3f15fbb46cf Mon Sep 17 00:00:00 2001 From: Steffany Brown <30247553+steffnay@users.noreply.github.com> Date: Thu, 21 May 2020 13:35:39 -0700 Subject: [PATCH 0776/2016] test(system): update tests to use DatasetReference (#114) --- .../google-cloud-bigquery/tests/system.py | 27 ++++++++++++------- 1 file changed, 18 insertions(+), 9 deletions(-) diff --git a/packages/google-cloud-bigquery/tests/system.py b/packages/google-cloud-bigquery/tests/system.py index 49e45c772735..b47195084404 100644 --- a/packages/google-cloud-bigquery/tests/system.py +++ b/packages/google-cloud-bigquery/tests/system.py @@ -247,12 +247,14 @@ def test_create_dataset(self): def test_get_dataset(self): dataset_id = _make_dataset_id("get_dataset") client = Config.CLIENT - dataset_arg = Dataset(client.dataset(dataset_id)) + project = client.project + dataset_ref = bigquery.DatasetReference(project, dataset_id) + dataset_arg = Dataset(dataset_ref) dataset_arg.friendly_name = "Friendly" dataset_arg.description = "Description" dataset = retry_403(client.create_dataset)(dataset_arg) self.to_delete.append(dataset) - dataset_ref = client.dataset(dataset_id) + dataset_ref = bigquery.DatasetReference(project, dataset_id) # Get with a reference. got = client.get_dataset(dataset_ref) @@ -416,7 +418,8 @@ def test_create_table_w_time_partitioning_w_clustering_fields(self): def test_delete_dataset_with_string(self): dataset_id = _make_dataset_id("delete_table_true") - dataset_ref = Config.CLIENT.dataset(dataset_id) + project = Config.CLIENT.project + dataset_ref = bigquery.DatasetReference(project, dataset_id) retry_403(Config.CLIENT.create_dataset)(Dataset(dataset_ref)) self.assertTrue(_dataset_exists(dataset_ref)) Config.CLIENT.delete_dataset(dataset_id) @@ -424,9 +427,9 @@ def test_delete_dataset_with_string(self): def test_delete_dataset_delete_contents_true(self): dataset_id = _make_dataset_id("delete_table_true") - dataset = retry_403(Config.CLIENT.create_dataset)( - Dataset(Config.CLIENT.dataset(dataset_id)) - ) + project = Config.CLIENT.project + dataset_ref = bigquery.DatasetReference(project, dataset_id) + dataset = retry_403(Config.CLIENT.create_dataset)(Dataset(dataset_ref)) table_id = "test_table" table_arg = Table(dataset.table(table_id), schema=SCHEMA) @@ -1363,7 +1366,9 @@ def test_extract_table(self): source_blob_name = "person_ages.csv" dataset_id = _make_dataset_id("load_gcs_then_extract") table_id = "test_table" - table_ref = Config.CLIENT.dataset(dataset_id).table(table_id) + project = Config.CLIENT.project + dataset_ref = bigquery.DatasetReference(project, dataset_id) + table_ref = dataset_ref.table(table_id) table = Table(table_ref) self.to_delete.insert(0, table) bucket = self._create_bucket(bucket_name) @@ -1546,8 +1551,10 @@ def test_query_w_wrong_config(self): rows = list(Config.CLIENT.query("SELECT 1;").result()) assert rows[0][0] == 1 + project = Config.CLIENT.project + dataset_ref = bigquery.DatasetReference(project, "dset") bad_config = LoadJobConfig() - bad_config.destination = Config.CLIENT.dataset("dset").table("tbl") + bad_config.destination = dataset_ref.table("tbl") with self.assertRaises(Exception): Config.CLIENT.query(good_query, job_config=bad_config).result() @@ -2678,7 +2685,9 @@ def test_list_rows_max_results_w_bqstorage(self): self.assertEqual(len(dataframe.index), 100) def temp_dataset(self, dataset_id, location=None): - dataset = Dataset(Config.CLIENT.dataset(dataset_id)) + project = Config.CLIENT.project + dataset_ref = bigquery.DatasetReference(project, dataset_id) + dataset = Dataset(dataset_ref) if location: dataset.location = location dataset = retry_403(Config.CLIENT.create_dataset)(dataset) From 3bc92ebf9c52674977740077e44826068324bcfb Mon Sep 17 00:00:00 2001 From: Peter Lamut Date: Thu, 4 Jun 2020 09:02:34 +0200 Subject: [PATCH 0777/2016] docs: explain the limitation of IPython magic dict arguments (#119) --- .../google-cloud-bigquery/google/cloud/bigquery/magics.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/magics.py b/packages/google-cloud-bigquery/google/cloud/bigquery/magics.py index 5872d0cfc3d0..4f2a16ccafd3 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/magics.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/magics.py @@ -65,6 +65,14 @@ serializable. The variable reference is indicated by a ``$`` before the variable name (ex. ``$my_dict_var``). See ``In[6]`` and ``In[7]`` in the Examples section below. + + .. note:: + + Due to the way IPython argument parser works, negative numbers in + dictionaries are incorrectly "recognized" as additional arguments, + resulting in an error ("unrecognized arguments"). To get around this, + pass such dictionary as a JSON string variable. + * ```` (required, cell argument): SQL query to run. If the query does not contain any whitespace (aside from leading and trailing whitespace), it is assumed to represent a From 53e07b95b3e247999b49340eb7585287d0a02e4c Mon Sep 17 00:00:00 2001 From: HemangChothani <50404902+HemangChothani@users.noreply.github.com> Date: Sat, 6 Jun 2020 14:48:46 +0530 Subject: [PATCH 0778/2016] feat(bigquery): expose start index parameter for query result (#121) * feat(bigquery): expose start index parameter for query result * feat(bigquery): nit --- .../google/cloud/bigquery/job.py | 10 ++++- .../google-cloud-bigquery/tests/system.py | 12 ++++++ .../tests/unit/test_job.py | 40 +++++++++++++++++++ 3 files changed, 61 insertions(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/job.py b/packages/google-cloud-bigquery/google/cloud/bigquery/job.py index 52683f23463e..7a1178a8c933 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/job.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/job.py @@ -3160,7 +3160,12 @@ def _begin(self, client=None, retry=DEFAULT_RETRY, timeout=None): raise def result( - self, page_size=None, max_results=None, retry=DEFAULT_RETRY, timeout=None + self, + page_size=None, + max_results=None, + retry=DEFAULT_RETRY, + timeout=None, + start_index=None, ): """Start the job and wait for it to complete and get the result. @@ -3177,6 +3182,8 @@ def result( before using ``retry``. If multiple requests are made under the hood, ``timeout`` applies to each individual request. + start_index (Optional[int]): + The zero-based index of the starting row to read. Returns: google.cloud.bigquery.table.RowIterator: @@ -3230,6 +3237,7 @@ def result( dest_table, page_size=page_size, max_results=max_results, + start_index=start_index, retry=retry, timeout=timeout, ) diff --git a/packages/google-cloud-bigquery/tests/system.py b/packages/google-cloud-bigquery/tests/system.py index b47195084404..66d7ee259bee 100644 --- a/packages/google-cloud-bigquery/tests/system.py +++ b/packages/google-cloud-bigquery/tests/system.py @@ -1577,6 +1577,18 @@ def test_query_w_page_size(self): iterator = query_job.result(page_size=page_size) self.assertEqual(next(iterator.pages).num_items, page_size) + def test_query_w_start_index(self): + start_index = 164652 + query_job = Config.CLIENT.query( + "SELECT word FROM `bigquery-public-data.samples.shakespeare`;", + job_id_prefix="test_query_w_start_index_", + ) + result1 = query_job.result(start_index=start_index) + total_rows = result1.total_rows + + self.assertEqual(result1.extra_params["startIndex"], start_index) + self.assertEqual(len(list(result1)), total_rows - start_index) + def test_query_statistics(self): """ A system test to exercise some of the extended query statistics. diff --git a/packages/google-cloud-bigquery/tests/unit/test_job.py b/packages/google-cloud-bigquery/tests/unit/test_job.py index d97efd946396..c89cad7490bb 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_job.py +++ b/packages/google-cloud-bigquery/tests/unit/test_job.py @@ -4666,6 +4666,46 @@ def test_result_w_page_size(self): ] ) + def test_result_with_start_index(self): + from google.cloud.bigquery.table import RowIterator + + query_resource = { + "jobComplete": True, + "jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID}, + "schema": {"fields": [{"name": "col1", "type": "STRING"}]}, + "totalRows": "5", + } + tabledata_resource = { + "totalRows": "5", + "pageToken": None, + "rows": [ + {"f": [{"v": "abc"}]}, + {"f": [{"v": "def"}]}, + {"f": [{"v": "ghi"}]}, + {"f": [{"v": "jkl"}]}, + ], + } + connection = _make_connection(query_resource, tabledata_resource) + client = _make_client(self.PROJECT, connection=connection) + resource = self._make_resource(ended=True) + job = self._get_target_class().from_api_repr(resource, client) + + start_index = 1 + + result = job.result(start_index=start_index) + + self.assertIsInstance(result, RowIterator) + self.assertEqual(result.total_rows, 5) + + rows = list(result) + + self.assertEqual(len(rows), 4) + self.assertEqual(len(connection.api_request.call_args_list), 2) + tabledata_list_request = connection.api_request.call_args_list[1] + self.assertEqual( + tabledata_list_request[1]["query_params"]["startIndex"], start_index + ) + def test_result_error(self): from google.cloud import exceptions From 056d3357ae91edeba2731cf61e554bed95214fd6 Mon Sep 17 00:00:00 2001 From: "release-please[bot]" <55107282+release-please[bot]@users.noreply.github.com> Date: Tue, 9 Jun 2020 08:45:09 -0700 Subject: [PATCH 0779/2016] chore: release 1.25.0 (#44) * updated CHANGELOG.md [ci skip] * updated setup.cfg [ci skip] * updated setup.py Co-authored-by: release-please[bot] <55107282+release-please[bot]@users.noreply.github.com> --- packages/google-cloud-bigquery/CHANGELOG.md | 23 +++++++++++++++++++++ packages/google-cloud-bigquery/setup.py | 2 +- 2 files changed, 24 insertions(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/CHANGELOG.md b/packages/google-cloud-bigquery/CHANGELOG.md index 0da745204cec..7506ed4380d4 100644 --- a/packages/google-cloud-bigquery/CHANGELOG.md +++ b/packages/google-cloud-bigquery/CHANGELOG.md @@ -4,6 +4,29 @@ [1]: https://pypi.org/project/google-cloud-bigquery/#history +## [1.25.0](https://www.github.com/googleapis/python-bigquery/compare/v1.24.0...v1.25.0) (2020-06-06) + + +### Features + +* add BigQuery storage client support to DB API ([#36](https://www.github.com/googleapis/python-bigquery/issues/36)) ([ba9b2f8](https://www.github.com/googleapis/python-bigquery/commit/ba9b2f87e36320d80f6f6460b77e6daddb0fa214)) +* **bigquery:** add create job method ([#32](https://www.github.com/googleapis/python-bigquery/issues/32)) ([2abdef8](https://www.github.com/googleapis/python-bigquery/commit/2abdef82bed31601d1ca1aa92a10fea1e09f5297)) +* **bigquery:** add support of model for extract job ([#71](https://www.github.com/googleapis/python-bigquery/issues/71)) ([4a7a514](https://www.github.com/googleapis/python-bigquery/commit/4a7a514659a9f6f9bbd8af46bab3f8782d6b4b98)) +* add HOUR support for time partitioning interval ([#91](https://www.github.com/googleapis/python-bigquery/issues/91)) ([0dd90b9](https://www.github.com/googleapis/python-bigquery/commit/0dd90b90e3714c1d18f8a404917a9454870e338a)) +* add support for policy tags ([#77](https://www.github.com/googleapis/python-bigquery/issues/77)) ([38a5c01](https://www.github.com/googleapis/python-bigquery/commit/38a5c01ca830daf165592357c45f2fb4016aad23)) +* make AccessEntry objects hashable ([#93](https://www.github.com/googleapis/python-bigquery/issues/93)) ([23a173b](https://www.github.com/googleapis/python-bigquery/commit/23a173bc5a25c0c8200adc5af62eb05624c9099e)) +* **bigquery:** expose start index parameter for query result ([#121](https://www.github.com/googleapis/python-bigquery/issues/121)) ([be86de3](https://www.github.com/googleapis/python-bigquery/commit/be86de330a3c3801653a0ccef90e3d9bdb3eee7a)) +* **bigquery:** unit and system test for dataframe with int column with Nan values ([#39](https://www.github.com/googleapis/python-bigquery/issues/39)) ([5fd840e](https://www.github.com/googleapis/python-bigquery/commit/5fd840e9d4c592c4f736f2fd4792c9670ba6795e)) + + +### Bug Fixes + +* allow partial streaming_buffer statistics ([#37](https://www.github.com/googleapis/python-bigquery/issues/37)) ([645f0fd](https://www.github.com/googleapis/python-bigquery/commit/645f0fdb35ee0e81ee70f7459e796a42a1f03210)) +* distinguish server timeouts from transport timeouts ([#43](https://www.github.com/googleapis/python-bigquery/issues/43)) ([a17be5f](https://www.github.com/googleapis/python-bigquery/commit/a17be5f01043f32d9fbfb2ddf456031ea9205c8f)) +* improve cell magic error message on missing query ([#58](https://www.github.com/googleapis/python-bigquery/issues/58)) ([6182cf4](https://www.github.com/googleapis/python-bigquery/commit/6182cf48aef8f463bb96891cfc44a96768121dbc)) +* **bigquery:** fix repr of model reference ([#66](https://www.github.com/googleapis/python-bigquery/issues/66)) ([26c6204](https://www.github.com/googleapis/python-bigquery/commit/26c62046f4ec8880cf6561cc90a8b821dcc84ec5)) +* **bigquery:** fix start index with page size for list rows ([#27](https://www.github.com/googleapis/python-bigquery/issues/27)) ([400673b](https://www.github.com/googleapis/python-bigquery/commit/400673b5d0f2a6a3d828fdaad9d222ca967ffeff)) + ## 1.24.0 02-03-2020 01:38 PST diff --git a/packages/google-cloud-bigquery/setup.py b/packages/google-cloud-bigquery/setup.py index f6afaa46e300..3ec2ba0bd8de 100644 --- a/packages/google-cloud-bigquery/setup.py +++ b/packages/google-cloud-bigquery/setup.py @@ -22,7 +22,7 @@ name = "google-cloud-bigquery" description = "Google BigQuery API client library" -version = "1.24.0" +version = "1.25.0" # Should be one of: # 'Development Status :: 3 - Alpha' # 'Development Status :: 4 - Beta' From bcc9ff3eb422539cf148fad9dc92176517198597 Mon Sep 17 00:00:00 2001 From: Peter Lamut Date: Wed, 10 Jun 2020 23:20:02 +0200 Subject: [PATCH 0780/2016] feat: use BigQuery Storage client by default (#55) * feat: use BigQuery Storage client by default * Use BQ Storage API by default in cell magic * Add raise_on_closed helper decorator to DB API * Use BigQuery Storage API by default in DB API * Use BQ Storage v1 stable version in main client * Use BQ Storage v1 stable in BigQuery cell magic * Use BQ Storage v1 stable in DB API * Support both v1 stable and beta1 BQ Storage client * Fix some typos and redundant Beta mark * Use ARROW as data format in DB API cursor * feat: add HOUR support for time partitioning interval (#91) * feat: add HOUR support for time partitioning interval * Bump BQ storage pin to stable version. Co-authored-by: shollyman --- .../google/cloud/bigquery/_pandas_helpers.py | 79 +++- .../google/cloud/bigquery/client.py | 18 +- .../google/cloud/bigquery/dbapi/_helpers.py | 47 ++- .../google/cloud/bigquery/dbapi/connection.py | 79 +++- .../google/cloud/bigquery/dbapi/cursor.py | 64 ++- .../google/cloud/bigquery/job.py | 38 +- .../google/cloud/bigquery/magics.py | 64 +-- .../google/cloud/bigquery/table.py | 115 +++--- packages/google-cloud-bigquery/setup.py | 2 +- .../google-cloud-bigquery/tests/system.py | 160 +++++++- .../tests/unit/helpers.py | 7 + .../tests/unit/test_client.py | 36 +- .../tests/unit/test_dbapi__helpers.py | 114 +++++- .../tests/unit/test_dbapi_connection.py | 98 ++++- .../tests/unit/test_dbapi_cursor.py | 160 +++++++- .../tests/unit/test_job.py | 51 +-- .../tests/unit/test_magics.py | 141 +++---- .../tests/unit/test_table.py | 377 +++++++++++------- 18 files changed, 1195 insertions(+), 455 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/_pandas_helpers.py b/packages/google-cloud-bigquery/google/cloud/bigquery/_pandas_helpers.py index 645478ff6d4b..f5f9d4a99a49 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/_pandas_helpers.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/_pandas_helpers.py @@ -22,9 +22,9 @@ from six.moves import queue try: - from google.cloud import bigquery_storage_v1beta1 + from google.cloud import bigquery_storage_v1 except ImportError: # pragma: NO COVER - bigquery_storage_v1beta1 = None + bigquery_storage_v1 = None try: import pandas @@ -577,8 +577,19 @@ def _bqstorage_page_to_dataframe(column_names, dtypes, page): def _download_table_bqstorage_stream( download_state, bqstorage_client, session, stream, worker_queue, page_to_item ): - position = bigquery_storage_v1beta1.types.StreamPosition(stream=stream) - rowstream = bqstorage_client.read_rows(position).rows(session) + # Passing a BQ Storage client in implies that the BigQuery Storage library + # is available and can be imported. + from google.cloud import bigquery_storage_v1beta1 + + # We want to preserve comaptibility with the v1beta1 BQ Storage clients, + # thus adjust constructing the rowstream if needed. + # The assumption is that the caller provides a BQ Storage `session` that is + # compatible with the version of the BQ Storage client passed in. + if isinstance(bqstorage_client, bigquery_storage_v1beta1.BigQueryStorageClient): + position = bigquery_storage_v1beta1.types.StreamPosition(stream=stream) + rowstream = bqstorage_client.read_rows(position).rows(session) + else: + rowstream = bqstorage_client.read_rows(stream.name).rows(session) for page in rowstream.pages: if download_state.done: @@ -610,6 +621,12 @@ def _download_table_bqstorage( page_to_item=None, ): """Use (faster, but billable) BQ Storage API to construct DataFrame.""" + + # Passing a BQ Storage client in implies that the BigQuery Storage library + # is available and can be imported. + from google.cloud import bigquery_storage_v1 + from google.cloud import bigquery_storage_v1beta1 + if "$" in table.table_id: raise ValueError( "Reading from a specific partition is not currently supported." @@ -617,22 +634,44 @@ def _download_table_bqstorage( if "@" in table.table_id: raise ValueError("Reading from a specific snapshot is not currently supported.") - read_options = bigquery_storage_v1beta1.types.TableReadOptions() - if selected_fields is not None: - for field in selected_fields: - read_options.selected_fields.append(field.name) - - requested_streams = 0 - if preserve_order: - requested_streams = 1 - - session = bqstorage_client.create_read_session( - table.to_bqstorage(), - "projects/{}".format(project_id), - format_=bigquery_storage_v1beta1.enums.DataFormat.ARROW, - read_options=read_options, - requested_streams=requested_streams, - ) + requested_streams = 1 if preserve_order else 0 + + # We want to preserve comaptibility with the v1beta1 BQ Storage clients, + # thus adjust the session creation if needed. + if isinstance(bqstorage_client, bigquery_storage_v1beta1.BigQueryStorageClient): + warnings.warn( + "Support for BigQuery Storage v1beta1 clients is deprecated, please " + "consider upgrading the client to BigQuery Storage v1 stable version.", + category=DeprecationWarning, + ) + read_options = bigquery_storage_v1beta1.types.TableReadOptions() + + if selected_fields is not None: + for field in selected_fields: + read_options.selected_fields.append(field.name) + + session = bqstorage_client.create_read_session( + table.to_bqstorage(v1beta1=True), + "projects/{}".format(project_id), + format_=bigquery_storage_v1beta1.enums.DataFormat.ARROW, + read_options=read_options, + requested_streams=requested_streams, + ) + else: + requested_session = bigquery_storage_v1.types.ReadSession( + table=table.to_bqstorage(), + data_format=bigquery_storage_v1.enums.DataFormat.ARROW, + ) + if selected_fields is not None: + for field in selected_fields: + requested_session.read_options.selected_fields.append(field.name) + + session = bqstorage_client.create_read_session( + parent="projects/{}".format(project_id), + read_session=requested_session, + max_stream_count=requested_streams, + ) + _LOGGER.debug( "Started reading table '{}.{}.{}' with BQ Storage API session '{}'.".format( table.project, table.dataset_id, table.table_id, session.name diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py index da5b30a3506d..8e265d971069 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py @@ -397,15 +397,23 @@ def dataset(self, dataset_id, project=None): def _create_bqstorage_client(self): """Create a BigQuery Storage API client using this client's credentials. + If a client cannot be created due to missing dependencies, raise a + warning and return ``None``. + Returns: - google.cloud.bigquery_storage_v1beta1.BigQueryStorageClient: + Optional[google.cloud.bigquery_storage_v1.BigQueryReadClient]: A BigQuery Storage API client. """ - from google.cloud import bigquery_storage_v1beta1 + try: + from google.cloud import bigquery_storage_v1 + except ImportError: + warnings.warn( + "Cannot create BigQuery Storage client, the dependency " + "google-cloud-bigquery-storage is not installed." + ) + return None - return bigquery_storage_v1beta1.BigQueryStorageClient( - credentials=self._credentials - ) + return bigquery_storage_v1.BigQueryReadClient(credentials=self._credentials) def create_dataset( self, dataset, exists_ok=False, retry=DEFAULT_RETRY, timeout=None diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/dbapi/_helpers.py b/packages/google-cloud-bigquery/google/cloud/bigquery/dbapi/_helpers.py index 6558177d7bb1..1bcf45f3163b 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/dbapi/_helpers.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/dbapi/_helpers.py @@ -19,6 +19,7 @@ import datetime import decimal +import functools import numbers import six @@ -233,8 +234,52 @@ def to_bq_table_rows(rows_iterable): """ def to_table_row(row): - values = tuple(row.values()) + # NOTE: We fetch ARROW values, thus we need to convert them to Python + # objects with as_py(). + values = tuple(value.as_py() for value in row.values()) keys_to_index = {key: i for i, key in enumerate(row.keys())} return table.Row(values, keys_to_index) return (to_table_row(row_data) for row_data in rows_iterable) + + +def raise_on_closed( + exc_msg, exc_class=exceptions.ProgrammingError, closed_attr_name="_closed" +): + """Make public instance methods raise an error if the instance is closed.""" + + def _raise_on_closed(method): + """Make a non-static method raise an error if its containing instance is closed. + """ + + def with_closed_check(self, *args, **kwargs): + if getattr(self, closed_attr_name): + raise exc_class(exc_msg) + return method(self, *args, **kwargs) + + functools.update_wrapper(with_closed_check, method) + return with_closed_check + + def decorate_public_methods(klass): + """Apply ``_raise_on_closed()`` decorator to public instance methods. + """ + for name in dir(klass): + if name.startswith("_"): + continue + + member = getattr(klass, name) + if not callable(member): + continue + + # We need to check for class/static methods directly in the instance + # __dict__, not via the retrieved attribute (`member`), as the + # latter is already a callable *produced* by one of these descriptors. + if isinstance(klass.__dict__[name], (staticmethod, classmethod)): + continue + + member = _raise_on_closed(member) + setattr(klass, name, member) + + return klass + + return decorate_public_methods diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/dbapi/connection.py b/packages/google-cloud-bigquery/google/cloud/bigquery/dbapi/connection.py index b8eaf2f9b91d..23e966486ea3 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/dbapi/connection.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/dbapi/connection.py @@ -14,22 +14,30 @@ """Connection for the Google BigQuery DB-API.""" +import weakref + from google.cloud import bigquery from google.cloud.bigquery.dbapi import cursor +from google.cloud.bigquery.dbapi import _helpers +@_helpers.raise_on_closed("Operating on a closed connection.") class Connection(object): """DB-API Connection to Google BigQuery. Args: - client (google.cloud.bigquery.Client): A client used to connect to BigQuery. + client (Optional[google.cloud.bigquery.Client]): + A REST API client used to connect to BigQuery. If not passed, a + client is created using default options inferred from the environment. bqstorage_client(\ - Optional[google.cloud.bigquery_storage_v1beta1.BigQueryStorageClient] \ + Optional[google.cloud.bigquery_storage_v1.BigQueryReadClient] \ ): - [Beta] An alternative client that uses the faster BigQuery Storage - API to fetch rows from BigQuery. If both clients are given, - ``bqstorage_client`` is used first to fetch query results, - with a fallback on ``client``, if necessary. + A client that uses the faster BigQuery Storage API to fetch rows from + BigQuery. If not passed, it is created using the same credentials + as ``client``. + + When fetching query results, ``bqstorage_client`` is used first, with + a fallback on ``client``, if necessary. .. note:: There is a known issue with the BigQuery Storage API with small @@ -38,15 +46,46 @@ class Connection(object): https://github.com/googleapis/python-bigquery-storage/issues/2 """ - def __init__(self, client, bqstorage_client=None): + def __init__(self, client=None, bqstorage_client=None): + if client is None: + client = bigquery.Client() + self._owns_client = True + else: + self._owns_client = False + + if bqstorage_client is None: + # A warning is already raised by the factory if instantiation fails. + bqstorage_client = client._create_bqstorage_client() + self._owns_bqstorage_client = bqstorage_client is not None + else: + self._owns_bqstorage_client = False + self._client = client self._bqstorage_client = bqstorage_client + self._closed = False + self._cursors_created = weakref.WeakSet() + def close(self): - """No-op.""" + """Close the connection and any cursors created from it. + + Any BigQuery clients explicitly passed to the constructor are *not* + closed, only those created by the connection instance itself. + """ + self._closed = True + + if self._owns_client: + self._client.close() + + if self._owns_bqstorage_client: + # There is no close() on the BQ Storage client itself. + self._bqstorage_client.transport.channel.close() + + for cursor_ in self._cursors_created: + cursor_.close() def commit(self): - """No-op.""" + """No-op, but for consistency raise an error if connection is closed.""" def cursor(self): """Return a new cursor object. @@ -54,7 +93,9 @@ def cursor(self): Returns: google.cloud.bigquery.dbapi.Cursor: A DB-API cursor that uses this connection. """ - return cursor.Cursor(self) + new_cursor = cursor.Cursor(self) + self._cursors_created.add(new_cursor) + return new_cursor def connect(client=None, bqstorage_client=None): @@ -62,15 +103,17 @@ def connect(client=None, bqstorage_client=None): Args: client (Optional[google.cloud.bigquery.Client]): - A client used to connect to BigQuery. If not passed, a client is - created using default options inferred from the environment. + A REST API client used to connect to BigQuery. If not passed, a + client is created using default options inferred from the environment. bqstorage_client(\ - Optional[google.cloud.bigquery_storage_v1beta1.BigQueryStorageClient] \ + Optional[google.cloud.bigquery_storage_v1.BigQueryReadClient] \ ): - [Beta] An alternative client that uses the faster BigQuery Storage - API to fetch rows from BigQuery. If both clients are given, - ``bqstorage_client`` is used first to fetch query results, - with a fallback on ``client``, if necessary. + A client that uses the faster BigQuery Storage API to fetch rows from + BigQuery. If not passed, it is created using the same credentials + as ``client``. + + When fetching query results, ``bqstorage_client`` is used first, with + a fallback on ``client``, if necessary. .. note:: There is a known issue with the BigQuery Storage API with small @@ -81,6 +124,4 @@ def connect(client=None, bqstorage_client=None): Returns: google.cloud.bigquery.dbapi.Connection: A new DB-API connection to BigQuery. """ - if client is None: - client = bigquery.Client() return Connection(client, bqstorage_client) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/dbapi/cursor.py b/packages/google-cloud-bigquery/google/cloud/bigquery/dbapi/cursor.py index eb73b3d562e5..c72116d07178 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/dbapi/cursor.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/dbapi/cursor.py @@ -15,6 +15,7 @@ """Cursor for the Google BigQuery DB-API.""" import collections +import warnings try: from collections import abc as collections_abc @@ -51,6 +52,7 @@ ) +@_helpers.raise_on_closed("Operating on a closed cursor.") class Cursor(object): """DB-API Cursor to Google BigQuery. @@ -73,9 +75,11 @@ def __init__(self, connection): self.arraysize = None self._query_data = None self._query_job = None + self._closed = False def close(self): - """No-op.""" + """Mark the cursor as closed, preventing its further use.""" + self._closed = True def _set_description(self, schema): """Set description from schema. @@ -256,7 +260,7 @@ def _bqstorage_fetch(self, bqstorage_client): Args: bqstorage_client(\ - google.cloud.bigquery_storage_v1beta1.BigQueryStorageClient \ + google.cloud.bigquery_storage_v1.BigQueryReadClient \ ): A client tha know how to talk to the BigQuery Storage API. @@ -264,26 +268,56 @@ def _bqstorage_fetch(self, bqstorage_client): Iterable[Mapping]: A sequence of rows, represented as dictionaries. """ - # NOTE: Given that BQ storage client instance is passed in, it means - # that bigquery_storage_v1beta1 library is available (no ImportError). + # Hitting this code path with a BQ Storage client instance implies that + # bigquery_storage_v1* can indeed be imported here without errors. + from google.cloud import bigquery_storage_v1 from google.cloud import bigquery_storage_v1beta1 table_reference = self._query_job.destination - read_session = bqstorage_client.create_read_session( - table_reference.to_bqstorage(), - "projects/{}".format(table_reference.project), - # a single stream only, as DB API is not well-suited for multithreading - requested_streams=1, + is_v1beta1_client = isinstance( + bqstorage_client, bigquery_storage_v1beta1.BigQueryStorageClient ) + # We want to preserve compatibility with the v1beta1 BQ Storage clients, + # thus adjust the session creation if needed. + if is_v1beta1_client: + warnings.warn( + "Support for BigQuery Storage v1beta1 clients is deprecated, please " + "consider upgrading the client to BigQuery Storage v1 stable version.", + category=DeprecationWarning, + ) + read_session = bqstorage_client.create_read_session( + table_reference.to_bqstorage(v1beta1=True), + "projects/{}".format(table_reference.project), + # a single stream only, as DB API is not well-suited for multithreading + requested_streams=1, + format_=bigquery_storage_v1beta1.enums.DataFormat.ARROW, + ) + else: + requested_session = bigquery_storage_v1.types.ReadSession( + table=table_reference.to_bqstorage(), + data_format=bigquery_storage_v1.enums.DataFormat.ARROW, + ) + read_session = bqstorage_client.create_read_session( + parent="projects/{}".format(table_reference.project), + read_session=requested_session, + # a single stream only, as DB API is not well-suited for multithreading + max_stream_count=1, + ) + if not read_session.streams: return iter([]) # empty table, nothing to read - read_position = bigquery_storage_v1beta1.types.StreamPosition( - stream=read_session.streams[0], - ) - read_rows_stream = bqstorage_client.read_rows(read_position) + if is_v1beta1_client: + read_position = bigquery_storage_v1beta1.types.StreamPosition( + stream=read_session.streams[0], + ) + read_rows_stream = bqstorage_client.read_rows(read_position) + else: + stream_name = read_session.streams[0].name + read_rows_stream = bqstorage_client.read_rows(stream_name) + rows_iterable = read_rows_stream.rows(read_session) return rows_iterable @@ -353,10 +387,10 @@ def fetchall(self): return list(self._query_data) def setinputsizes(self, sizes): - """No-op.""" + """No-op, but for consistency raise an error if cursor is closed.""" def setoutputsize(self, size, column=None): - """No-op.""" + """No-op, but for consistency raise an error if cursor is closed.""" def _format_operation_list(operation, parameters): diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/job.py b/packages/google-cloud-bigquery/google/cloud/bigquery/job.py index 7a1178a8c933..0040d585ae9a 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/job.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/job.py @@ -3250,7 +3250,7 @@ def to_arrow( self, progress_bar_type=None, bqstorage_client=None, - create_bqstorage_client=False, + create_bqstorage_client=True, ): """[Beta] Create a class:`pyarrow.Table` by loading all pages of a table or query. @@ -3274,10 +3274,10 @@ def to_arrow( ``'tqdm_gui'`` Use the :func:`tqdm.tqdm_gui` function to display a progress bar as a graphical dialog box. - bqstorage_client (google.cloud.bigquery_storage_v1beta1.BigQueryStorageClient): - **Beta Feature** Optional. A BigQuery Storage API client. If - supplied, use the faster BigQuery Storage API to fetch rows - from BigQuery. This API is a billable API. + bqstorage_client (google.cloud.bigquery_storage_v1.BigQueryReadClient): + Optional. A BigQuery Storage API client. If supplied, use the + faster BigQuery Storage API to fetch rows from BigQuery. + This API is a billable API. This method requires the ``pyarrow`` and ``google-cloud-bigquery-storage`` libraries. @@ -3285,11 +3285,10 @@ def to_arrow( Reading from a specific partition or snapshot is not currently supported by this method. create_bqstorage_client (bool): - **Beta Feature** Optional. If ``True``, create a BigQuery - Storage API client using the default API settings. The - BigQuery Storage API is a faster way to fetch rows from - BigQuery. See the ``bqstorage_client`` parameter for more - information. + Optional. If ``True`` (default), create a BigQuery Storage API + client using the default API settings. The BigQuery Storage API + is a faster way to fetch rows from BigQuery. See the + ``bqstorage_client`` parameter for more information. This argument does nothing if ``bqstorage_client`` is supplied. @@ -3320,15 +3319,15 @@ def to_dataframe( bqstorage_client=None, dtypes=None, progress_bar_type=None, - create_bqstorage_client=False, + create_bqstorage_client=True, ): """Return a pandas DataFrame from a QueryJob Args: - bqstorage_client (google.cloud.bigquery_storage_v1beta1.BigQueryStorageClient): - **Alpha Feature** Optional. A BigQuery Storage API client. If - supplied, use the faster BigQuery Storage API to fetch rows - from BigQuery. This API is a billable API. + bqstorage_client (google.cloud.bigquery_storage_v1.BigQueryReadClient): + Optional. A BigQuery Storage API client. If supplied, use the + faster BigQuery Storage API to fetch rows from BigQuery. This + API is a billable API. This method requires the ``fastavro`` and ``google-cloud-bigquery-storage`` libraries. @@ -3355,11 +3354,10 @@ def to_dataframe( ..versionadded:: 1.11.0 create_bqstorage_client (bool): - **Beta Feature** Optional. If ``True``, create a BigQuery - Storage API client using the default API settings. The - BigQuery Storage API is a faster way to fetch rows from - BigQuery. See the ``bqstorage_client`` parameter for more - information. + Optional. If ``True`` (default), create a BigQuery Storage API + client using the default API settings. The BigQuery Storage API + is a faster way to fetch rows from BigQuery. See the + ``bqstorage_client`` parameter for more information. This argument does nothing if ``bqstorage_client`` is supplied. diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/magics.py b/packages/google-cloud-bigquery/google/cloud/bigquery/magics.py index 4f2a16ccafd3..40dda3d13bc1 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/magics.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/magics.py @@ -39,10 +39,9 @@ Project to use for running the query. Defaults to the context :attr:`~google.cloud.bigquery.magics.Context.project`. * ``--use_bqstorage_api`` (optional, line argument): - Downloads the DataFrame using the BigQuery Storage API. To use this - option, install the ``google-cloud-bigquery-storage`` and ``fastavro`` - packages, and `enable the BigQuery Storage API - `_. + [Deprecated] Not used anymore, as BigQuery Storage API is used by default. + * ``--use_rest_api`` (optional, line argument): + Use the BigQuery REST API instead of the Storage API. * ``--use_legacy_sql`` (optional, line argument): Runs the query using Legacy SQL syntax. Defaults to Standard SQL if this argument not used. @@ -150,6 +149,7 @@ import functools import sys import time +import warnings from concurrent import futures try: @@ -182,7 +182,6 @@ def __init__(self): self._credentials = None self._project = None self._connection = None - self._use_bqstorage_api = None self._default_query_job_config = bigquery.QueryJobConfig() @property @@ -245,21 +244,6 @@ def project(self): def project(self, value): self._project = value - @property - def use_bqstorage_api(self): - """bool: [Beta] Set to True to use the BigQuery Storage API to - download query results - - To use this option, install the ``google-cloud-bigquery-storage`` and - ``fastavro`` packages, and `enable the BigQuery Storage API - `_. - """ - return self._use_bqstorage_api - - @use_bqstorage_api.setter - def use_bqstorage_api(self, value): - self._use_bqstorage_api = value - @property def default_query_job_config(self): """google.cloud.bigquery.job.QueryJobConfig: Default job @@ -434,11 +418,21 @@ def _create_dataset_if_necessary(client, dataset_id): @magic_arguments.argument( "--use_bqstorage_api", action="store_true", + default=None, + help=( + "[Deprecated] The BigQuery Storage API is already used by default to " + "download large query results, and this option has no effect. " + "If you want to switch to the classic REST API instead, use the " + "--use_rest_api option." + ), +) +@magic_arguments.argument( + "--use_rest_api", + action="store_true", default=False, help=( - "[Beta] Use the BigQuery Storage API to download large query results. " - "To use this option, install the google-cloud-bigquery-storage and " - "fastavro packages, and enable the BigQuery Storage API." + "Use the classic REST API instead of the BigQuery Storage API to " + "download query results." ), ) @magic_arguments.argument( @@ -481,6 +475,14 @@ def _cell_magic(line, query): """ args = magic_arguments.parse_argstring(_cell_magic, line) + if args.use_bqstorage_api is not None: + warnings.warn( + "Deprecated option --use_bqstorage_api, the BigQuery " + "Storage API is already used by default.", + category=DeprecationWarning, + ) + use_bqstorage_api = not args.use_rest_api + params = [] if args.params is not None: try: @@ -502,9 +504,7 @@ def _cell_magic(line, query): ) if context._connection: client._connection = context._connection - bqstorage_client = _make_bqstorage_client( - args.use_bqstorage_api or context.use_bqstorage_api, context.credentials - ) + bqstorage_client = _make_bqstorage_client(use_bqstorage_api, context.credentials) close_transports = functools.partial(_close_transports, client, bqstorage_client) @@ -603,11 +603,13 @@ def _make_bqstorage_client(use_bqstorage_api, credentials): return None try: - from google.cloud import bigquery_storage_v1beta1 + from google.cloud import bigquery_storage_v1 except ImportError as err: customized_error = ImportError( - "Install the google-cloud-bigquery-storage and pyarrow packages " - "to use the BigQuery Storage API." + "The default BigQuery Storage API client cannot be used, install " + "the missing google-cloud-bigquery-storage and pyarrow packages " + "to use it. Alternatively, use the classic REST API by specifying " + "the --use_rest_api magic option." ) six.raise_from(customized_error, err) @@ -619,7 +621,7 @@ def _make_bqstorage_client(use_bqstorage_api, credentials): ) six.raise_from(customized_error, err) - return bigquery_storage_v1beta1.BigQueryStorageClient( + return bigquery_storage_v1.BigQueryReadClient( credentials=credentials, client_info=gapic_client_info.ClientInfo(user_agent=IPYTHON_USER_AGENT), ) @@ -634,7 +636,7 @@ def _close_transports(client, bqstorage_client): Args: client (:class:`~google.cloud.bigquery.client.Client`): bqstorage_client - (Optional[:class:`~google.cloud.bigquery_storage_v1beta1.BigQueryStorageClient`]): + (Optional[:class:`~google.cloud.bigquery_storage_v1.BigQueryReadClient`]): A client for the BigQuery Storage API. """ diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py index e66d24e74983..3022ddbd5f2c 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py @@ -26,6 +26,7 @@ import six try: + # Needed for the to_bqstorage() method. from google.cloud import bigquery_storage_v1beta1 except ImportError: # pragma: NO COVER bigquery_storage_v1beta1 = None @@ -226,7 +227,7 @@ def to_api_repr(self): "tableId": self._table_id, } - def to_bqstorage(self): + def to_bqstorage(self, v1beta1=False): """Construct a BigQuery Storage API representation of this table. Install the ``google-cloud-bigquery-storage`` package to use this @@ -235,35 +236,41 @@ def to_bqstorage(self): If the ``table_id`` contains a partition identifier (e.g. ``my_table$201812``) or a snapshot identifier (e.g. ``mytable@1234567890``), it is ignored. Use - :class:`google.cloud.bigquery_storage_v1beta1.types.TableReadOptions` + :class:`google.cloud.bigquery_storage_v1.types.ReadSession.TableReadOptions` to filter rows by partition. Use - :class:`google.cloud.bigquery_storage_v1beta1.types.TableModifiers` + :class:`google.cloud.bigquery_storage_v1.types.ReadSession.TableModifiers` to select a specific snapshot to read from. + Args: + v1beta1 (Optiona[bool]): + If :data:`True`, return representation compatible with BigQuery + Storage ``v1beta1`` version. Defaults to :data:`False`. + Returns: - google.cloud.bigquery_storage_v1beta1.types.TableReference: + Union[str, google.cloud.bigquery_storage_v1beta1.types.TableReference:]: A reference to this table in the BigQuery Storage API. Raises: ValueError: - If the :mod:`google.cloud.bigquery_storage_v1beta1` module - cannot be imported. + If ``v1beta1`` compatibility is requested, but the + :mod:`google.cloud.bigquery_storage_v1beta1` module cannot be imported. """ - if bigquery_storage_v1beta1 is None: + if v1beta1 and bigquery_storage_v1beta1 is None: raise ValueError(_NO_BQSTORAGE_ERROR) - table_ref = bigquery_storage_v1beta1.types.TableReference() - table_ref.project_id = self._project - table_ref.dataset_id = self._dataset_id - table_id = self._table_id - - if "@" in table_id: - table_id = table_id.split("@")[0] + table_id, _, _ = self._table_id.partition("@") + table_id, _, _ = table_id.partition("$") - if "$" in table_id: - table_id = table_id.split("$")[0] - - table_ref.table_id = table_id + if v1beta1: + table_ref = bigquery_storage_v1beta1.types.TableReference( + project_id=self._project, + dataset_id=self._dataset_id, + table_id=table_id, + ) + else: + table_ref = "projects/{}/datasets/{}/tables/{}".format( + self._project, self._dataset_id, table_id, + ) return table_ref @@ -868,14 +875,19 @@ def to_api_repr(self): """ return copy.deepcopy(self._properties) - def to_bqstorage(self): + def to_bqstorage(self, v1beta1=False): """Construct a BigQuery Storage API representation of this table. + Args: + v1beta1 (Optiona[bool]): + If :data:`True`, return representation compatible with BigQuery + Storage ``v1beta1`` version. Defaults to :data:`False`. + Returns: - google.cloud.bigquery_storage_v1beta1.types.TableReference: + Union[str, google.cloud.bigquery_storage_v1beta1.types.TableReference:]: A reference to this table in the BigQuery Storage API. """ - return self.reference.to_bqstorage() + return self.reference.to_bqstorage(v1beta1=v1beta1) def _build_resource(self, filter_fields): """Generate a resource for ``update``.""" @@ -1083,14 +1095,19 @@ def from_string(cls, full_table_id): {"tableReference": TableReference.from_string(full_table_id).to_api_repr()} ) - def to_bqstorage(self): + def to_bqstorage(self, v1beta1=False): """Construct a BigQuery Storage API representation of this table. + Args: + v1beta1 (Optiona[bool]): + If :data:`True`, return representation compatible with BigQuery + Storage ``v1beta1`` version. Defaults to :data:`False`. + Returns: - google.cloud.bigquery_storage_v1beta1.types.TableReference: + Union[str, google.cloud.bigquery_storage_v1beta1.types.TableReference:]: A reference to this table in the BigQuery Storage API. """ - return self.reference.to_bqstorage() + return self.reference.to_bqstorage(v1beta1=v1beta1) def _row_from_mapping(mapping, schema): @@ -1466,7 +1483,7 @@ def to_arrow( self, progress_bar_type=None, bqstorage_client=None, - create_bqstorage_client=False, + create_bqstorage_client=True, ): """[Beta] Create a class:`pyarrow.Table` by loading all pages of a table or query. @@ -1490,10 +1507,10 @@ def to_arrow( ``'tqdm_gui'`` Use the :func:`tqdm.tqdm_gui` function to display a progress bar as a graphical dialog box. - bqstorage_client (google.cloud.bigquery_storage_v1beta1.BigQueryStorageClient): - **Beta Feature** Optional. A BigQuery Storage API client. If - supplied, use the faster BigQuery Storage API to fetch rows - from BigQuery. This API is a billable API. + bqstorage_client (google.cloud.bigquery_storage_v1.BigQueryReadClient): + Optional. A BigQuery Storage API client. If supplied, use the + faster BigQuery Storage API to fetch rows from BigQuery. This + API is a billable API. This method requires the ``pyarrow`` and ``google-cloud-bigquery-storage`` libraries. @@ -1501,11 +1518,10 @@ def to_arrow( Reading from a specific partition or snapshot is not currently supported by this method. create_bqstorage_client (bool): - **Beta Feature** Optional. If ``True``, create a BigQuery - Storage API client using the default API settings. The - BigQuery Storage API is a faster way to fetch rows from - BigQuery. See the ``bqstorage_client`` parameter for more - information. + Optional. If ``True`` (default), create a BigQuery Storage API + client using the default API settings. The BigQuery Storage API + is a faster way to fetch rows from BigQuery. See the + ``bqstorage_client`` parameter for more information. This argument does nothing if ``bqstorage_client`` is supplied. @@ -1575,10 +1591,9 @@ def to_dataframe_iterable(self, bqstorage_client=None, dtypes=None): """Create an iterable of pandas DataFrames, to process the table as a stream. Args: - bqstorage_client (google.cloud.bigquery_storage_v1beta1.BigQueryStorageClient): - **Beta Feature** Optional. A BigQuery Storage API client. If - supplied, use the faster BigQuery Storage API to fetch rows - from BigQuery. + bqstorage_client (google.cloud.bigquery_storage_v1.BigQueryReadClient): + Optional. A BigQuery Storage API client. If supplied, use the + faster BigQuery Storage API to fetch rows from BigQuery. This method requires the ``pyarrow`` and ``google-cloud-bigquery-storage`` libraries. @@ -1639,15 +1654,14 @@ def to_dataframe( bqstorage_client=None, dtypes=None, progress_bar_type=None, - create_bqstorage_client=False, + create_bqstorage_client=True, ): """Create a pandas DataFrame by loading all pages of a query. Args: - bqstorage_client (google.cloud.bigquery_storage_v1beta1.BigQueryStorageClient): - **Beta Feature** Optional. A BigQuery Storage API client. If - supplied, use the faster BigQuery Storage API to fetch rows - from BigQuery. + bqstorage_client (google.cloud.bigquery_storage_v1.BigQueryReadClient): + Optional. A BigQuery Storage API client. If supplied, use the + faster BigQuery Storage API to fetch rows from BigQuery. This method requires the ``pyarrow`` and ``google-cloud-bigquery-storage`` libraries. @@ -1685,11 +1699,10 @@ def to_dataframe( ..versionadded:: 1.11.0 create_bqstorage_client (bool): - **Beta Feature** Optional. If ``True``, create a BigQuery - Storage API client using the default API settings. The - BigQuery Storage API is a faster way to fetch rows from - BigQuery. See the ``bqstorage_client`` parameter for more - information. + Optional. If ``True`` (default), create a BigQuery Storage API + client using the default API settings. The BigQuery Storage API + is a faster way to fetch rows from BigQuery. See the + ``bqstorage_client`` parameter for more information. This argument does nothing if ``bqstorage_client`` is supplied. @@ -1704,7 +1717,7 @@ def to_dataframe( Raises: ValueError: If the :mod:`pandas` library cannot be imported, or the - :mod:`google.cloud.bigquery_storage_v1beta1` module is + :mod:`google.cloud.bigquery_storage_v1` module is required but cannot be imported. """ @@ -1789,7 +1802,7 @@ def to_arrow( self, progress_bar_type=None, bqstorage_client=None, - create_bqstorage_client=False, + create_bqstorage_client=True, ): """[Beta] Create an empty class:`pyarrow.Table`. @@ -1810,7 +1823,7 @@ def to_dataframe( bqstorage_client=None, dtypes=None, progress_bar_type=None, - create_bqstorage_client=False, + create_bqstorage_client=True, ): """Create an empty dataframe. diff --git a/packages/google-cloud-bigquery/setup.py b/packages/google-cloud-bigquery/setup.py index 3ec2ba0bd8de..422584d123d9 100644 --- a/packages/google-cloud-bigquery/setup.py +++ b/packages/google-cloud-bigquery/setup.py @@ -39,7 +39,7 @@ ] extras = { "bqstorage": [ - "google-cloud-bigquery-storage >= 0.6.0, <2.0.0dev", + "google-cloud-bigquery-storage >= 1.0.0, <2.0.0dev", # Due to an issue in pip's dependency resolver, the `grpc` extra is not # installed, even though `google-cloud-bigquery-storage` specifies it # as `google-api-core[grpc]`. We thus need to explicitly specify it here. diff --git a/packages/google-cloud-bigquery/tests/system.py b/packages/google-cloud-bigquery/tests/system.py index 66d7ee259bee..3b874300f3e9 100644 --- a/packages/google-cloud-bigquery/tests/system.py +++ b/packages/google-cloud-bigquery/tests/system.py @@ -34,8 +34,10 @@ import pkg_resources try: + from google.cloud import bigquery_storage_v1 from google.cloud import bigquery_storage_v1beta1 except ImportError: # pragma: NO COVER + bigquery_storage_v1 = None bigquery_storage_v1beta1 = None try: @@ -1689,10 +1691,10 @@ def test_dbapi_fetchall(self): self.assertEqual(row_tuples, [(1, 2), (3, 4), (5, 6)]) @unittest.skipIf( - bigquery_storage_v1beta1 is None, "Requires `google-cloud-bigquery-storage`" + bigquery_storage_v1 is None, "Requires `google-cloud-bigquery-storage`" ) def test_dbapi_fetch_w_bqstorage_client_small_result_set(self): - bqstorage_client = bigquery_storage_v1beta1.BigQueryStorageClient( + bqstorage_client = bigquery_storage_v1.BigQueryReadClient( credentials=Config.CLIENT._credentials ) cursor = dbapi.connect(Config.CLIENT, bqstorage_client).cursor() @@ -1733,10 +1735,60 @@ def test_dbapi_fetch_w_bqstorage_client_small_result_set(self): self.assertEqual(fetched_data, expected_data) @unittest.skipIf( - bigquery_storage_v1beta1 is None, "Requires `google-cloud-bigquery-storage`" + bigquery_storage_v1 is None, "Requires `google-cloud-bigquery-storage`" ) - @unittest.skipIf(fastavro is None, "Requires `fastavro`") + @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_dbapi_fetch_w_bqstorage_client_large_result_set(self): + bqstorage_client = bigquery_storage_v1.BigQueryReadClient( + credentials=Config.CLIENT._credentials + ) + cursor = dbapi.connect(Config.CLIENT, bqstorage_client).cursor() + + # Pick a large enough LIMIT value to assure that the fallback to the + # default client is not needed due to the result set being too small + # (a known issue that causes problems when reading such result sets with + # BQ storage client). + cursor.execute( + """ + SELECT id, `by`, time_ts + FROM `bigquery-public-data.hacker_news.comments` + ORDER BY `id` ASC + LIMIT 100000 + """ + ) + + result_rows = [cursor.fetchone(), cursor.fetchone(), cursor.fetchone()] + + field_name = operator.itemgetter(0) + fetched_data = [sorted(row.items(), key=field_name) for row in result_rows] + + # Since DB API is not thread safe, only a single result stream should be + # requested by the BQ storage client, meaning that results should arrive + # in the sorted order. + expected_data = [ + [ + ("by", "sama"), + ("id", 15), + ("time_ts", datetime.datetime(2006, 10, 9, 19, 51, 1, tzinfo=UTC)), + ], + [ + ("by", "pg"), + ("id", 17), + ("time_ts", datetime.datetime(2006, 10, 9, 19, 52, 45, tzinfo=UTC)), + ], + [ + ("by", "pg"), + ("id", 22), + ("time_ts", datetime.datetime(2006, 10, 10, 2, 18, 22, tzinfo=UTC)), + ], + ] + self.assertEqual(fetched_data, expected_data) + + @unittest.skipIf( + bigquery_storage_v1beta1 is None, "Requires `google-cloud-bigquery-storage`" + ) + @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") + def test_dbapi_fetch_w_bqstorage_client_v1beta1_large_result_set(self): bqstorage_client = bigquery_storage_v1beta1.BigQueryStorageClient( credentials=Config.CLIENT._credentials ) @@ -1782,6 +1834,36 @@ def test_dbapi_fetch_w_bqstorage_client_large_result_set(self): ] self.assertEqual(fetched_data, expected_data) + @unittest.skipIf( + bigquery_storage_v1 is None, "Requires `google-cloud-bigquery-storage`" + ) + def test_dbapi_connection_does_not_leak_sockets(self): + current_process = psutil.Process() + conn_count_start = len(current_process.connections()) + + # Provide no explicit clients, so that the connection will create and own them. + connection = dbapi.connect() + cursor = connection.cursor() + + # Pick a large enough LIMIT value to assure that the fallback to the + # default client is not needed due to the result set being too small + # (a known issue that causes problems when reding such result sets with + # BQ storage client). + cursor.execute( + """ + SELECT id, `by`, time_ts + FROM `bigquery-public-data.hacker_news.comments` + ORDER BY `id` ASC + LIMIT 100000 + """ + ) + rows = cursor.fetchall() + self.assertEqual(len(rows), 100000) + + connection.close() + conn_count_end = len(current_process.connections()) + self.assertEqual(conn_count_end, conn_count_start) + def _load_table_for_dml(self, rows, dataset_id, table_id): from google.cloud._testing import _NamedTemporaryFile from google.cloud.bigquery.job import CreateDisposition @@ -2187,7 +2269,7 @@ def test_query_results_to_dataframe(self): @unittest.skipIf(pandas is None, "Requires `pandas`") @unittest.skipIf( - bigquery_storage_v1beta1 is None, "Requires `google-cloud-bigquery-storage`" + bigquery_storage_v1 is None, "Requires `google-cloud-bigquery-storage`" ) def test_query_results_to_dataframe_w_bqstorage(self): dest_dataset = self.temp_dataset(_make_dataset_id("bqstorage_to_dataframe_")) @@ -2199,6 +2281,60 @@ def test_query_results_to_dataframe_w_bqstorage(self): LIMIT 10 """ + bqstorage_client = bigquery_storage_v1.BigQueryReadClient( + credentials=Config.CLIENT._credentials + ) + + job_configs = ( + # There is a known issue reading small anonymous query result + # tables with the BQ Storage API. Writing to a destination + # table works around this issue. + bigquery.QueryJobConfig( + destination=dest_ref, write_disposition="WRITE_TRUNCATE" + ), + # Check that the client is able to work around the issue with + # reading small anonymous query result tables by falling back to + # the tabledata.list API. + None, + ) + + for job_config in job_configs: + df = ( + Config.CLIENT.query(query, job_config=job_config) + .result() + .to_dataframe(bqstorage_client) + ) + + self.assertIsInstance(df, pandas.DataFrame) + self.assertEqual(len(df), 10) # verify the number of rows + column_names = ["id", "author", "time_ts", "dead"] + self.assertEqual(list(df), column_names) + exp_datatypes = { + "id": int, + "author": six.text_type, + "time_ts": pandas.Timestamp, + "dead": bool, + } + for index, row in df.iterrows(): + for col in column_names: + # all the schema fields are nullable, so None is acceptable + if not row[col] is None: + self.assertIsInstance(row[col], exp_datatypes[col]) + + @unittest.skipIf(pandas is None, "Requires `pandas`") + @unittest.skipIf( + bigquery_storage_v1beta1 is None, "Requires `google-cloud-bigquery-storage`" + ) + def test_query_results_to_dataframe_w_bqstorage_v1beta1(self): + dest_dataset = self.temp_dataset(_make_dataset_id("bqstorage_to_dataframe_")) + dest_ref = dest_dataset.table("query_results") + + query = """ + SELECT id, author, time_ts, dead + FROM `bigquery-public-data.hacker_news.comments` + LIMIT 10 + """ + bqstorage_client = bigquery_storage_v1beta1.BigQueryStorageClient( credentials=Config.CLIENT._credentials ) @@ -2485,7 +2621,7 @@ def _fetch_dataframe(self, query): @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") @unittest.skipIf( - bigquery_storage_v1beta1 is None, "Requires `google-cloud-bigquery-storage`" + bigquery_storage_v1 is None, "Requires `google-cloud-bigquery-storage`" ) def test_nested_table_to_arrow(self): from google.cloud.bigquery.job import SourceFormat @@ -2521,7 +2657,7 @@ def test_nested_table_to_arrow(self): job_config.schema = schema # Load a table using a local JSON file from memory. Config.CLIENT.load_table_from_file(body, table, job_config=job_config).result() - bqstorage_client = bigquery_storage_v1beta1.BigQueryStorageClient( + bqstorage_client = bigquery_storage_v1.BigQueryReadClient( credentials=Config.CLIENT._credentials ) @@ -2677,13 +2813,13 @@ def test_list_rows_page_size(self): @unittest.skipIf(pandas is None, "Requires `pandas`") @unittest.skipIf( - bigquery_storage_v1beta1 is None, "Requires `google-cloud-bigquery-storage`" + bigquery_storage_v1 is None, "Requires `google-cloud-bigquery-storage`" ) def test_list_rows_max_results_w_bqstorage(self): table_ref = DatasetReference("bigquery-public-data", "utility_us").table( "country_code_iso" ) - bqstorage_client = bigquery_storage_v1beta1.BigQueryStorageClient( + bqstorage_client = bigquery_storage_v1.BigQueryReadClient( credentials=Config.CLIENT._credentials ) @@ -2741,7 +2877,11 @@ def test_bigquery_magic(): assert isinstance(result, pandas.DataFrame) assert len(result) == 10 # verify row count assert list(result) == ["url", "view_count"] # verify column names - assert conn_count_end == conn_count_start # system resources are released + + # NOTE: For some reason, the number of open sockets is sometimes one *less* + # than expected when running system tests on Kokoro, thus using the <= assertion. + # That's still fine, however, since the sockets are apparently not leaked. + assert conn_count_end <= conn_count_start # system resources are released def _job_done(instance): diff --git a/packages/google-cloud-bigquery/tests/unit/helpers.py b/packages/google-cloud-bigquery/tests/unit/helpers.py index 5b731a763a99..eea345e8979b 100644 --- a/packages/google-cloud-bigquery/tests/unit/helpers.py +++ b/packages/google-cloud-bigquery/tests/unit/helpers.py @@ -22,3 +22,10 @@ def make_connection(*responses): mock_conn.user_agent = "testing 1.2.3" mock_conn.api_request.side_effect = list(responses) + [NotFound("miss")] return mock_conn + + +def _to_pyarrow(value): + """Convert Python value to pyarrow value.""" + import pyarrow + + return pyarrow.array([value])[0] diff --git a/packages/google-cloud-bigquery/tests/unit/test_client.py b/packages/google-cloud-bigquery/tests/unit/test_client.py index f1dc4e816765..0e083d43f74e 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_client.py +++ b/packages/google-cloud-bigquery/tests/unit/test_client.py @@ -52,9 +52,10 @@ from google.cloud.bigquery.dataset import DatasetReference try: - from google.cloud import bigquery_storage_v1beta1 + from google.cloud import bigquery_storage_v1 except (ImportError, AttributeError): # pragma: NO COVER - bigquery_storage_v1beta1 = None + bigquery_storage_v1 = None +from test_utils.imports import maybe_fail_import from tests.unit.helpers import make_connection PANDAS_MINIUM_VERSION = pkg_resources.parse_version("1.0.0") @@ -655,25 +656,46 @@ def test_get_dataset(self): self.assertEqual(dataset.dataset_id, self.DS_ID) @unittest.skipIf( - bigquery_storage_v1beta1 is None, "Requires `google-cloud-bigquery-storage`" + bigquery_storage_v1 is None, "Requires `google-cloud-bigquery-storage`" ) def test_create_bqstorage_client(self): - mock_client = mock.create_autospec( - bigquery_storage_v1beta1.BigQueryStorageClient - ) + mock_client = mock.create_autospec(bigquery_storage_v1.BigQueryReadClient) mock_client_instance = object() mock_client.return_value = mock_client_instance creds = _make_credentials() client = self._make_one(project=self.PROJECT, credentials=creds) with mock.patch( - "google.cloud.bigquery_storage_v1beta1.BigQueryStorageClient", mock_client + "google.cloud.bigquery_storage_v1.BigQueryReadClient", mock_client ): bqstorage_client = client._create_bqstorage_client() self.assertIs(bqstorage_client, mock_client_instance) mock_client.assert_called_once_with(credentials=creds) + def test_create_bqstorage_client_missing_dependency(self): + client = self._make_one() + + def fail_bqstorage_import(name, globals, locals, fromlist, level): + # NOTE: *very* simplified, assuming a straightforward absolute import + return "bigquery_storage_v1" in name or ( + fromlist is not None and "bigquery_storage_v1" in fromlist + ) + + no_bqstorage = maybe_fail_import(predicate=fail_bqstorage_import) + + with no_bqstorage, warnings.catch_warnings(record=True) as warned: + bqstorage_client = client._create_bqstorage_client() + + self.assertIsNone(bqstorage_client) + matching_warnings = [ + warning + for warning in warned + if "not installed" in str(warning) + and "google-cloud-bigquery-storage" in str(warning) + ] + assert matching_warnings, "Missing dependency warning not raised." + def test_create_dataset_minimal(self): from google.cloud.bigquery.dataset import Dataset diff --git a/packages/google-cloud-bigquery/tests/unit/test_dbapi__helpers.py b/packages/google-cloud-bigquery/tests/unit/test_dbapi__helpers.py index 8f98d0c530ff..08dd6dcfaa7a 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_dbapi__helpers.py +++ b/packages/google-cloud-bigquery/tests/unit/test_dbapi__helpers.py @@ -18,10 +18,18 @@ import operator as op import unittest +try: + import pyarrow +except ImportError: # pragma: NO COVER + pyarrow = None + +import six + import google.cloud._helpers from google.cloud.bigquery import table from google.cloud.bigquery.dbapi import _helpers from google.cloud.bigquery.dbapi import exceptions +from tests.unit.helpers import _to_pyarrow class TestQueryParameters(unittest.TestCase): @@ -195,10 +203,21 @@ def test_empty_iterable(self): result = _helpers.to_bq_table_rows(rows_iterable) self.assertEqual(list(result), []) + @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_non_empty_iterable(self): rows_iterable = [ - dict(one=1.1, four=1.4, two=1.2, three=1.3), - dict(one=2.1, four=2.4, two=2.2, three=2.3), + dict( + one=_to_pyarrow(1.1), + four=_to_pyarrow(1.4), + two=_to_pyarrow(1.2), + three=_to_pyarrow(1.3), + ), + dict( + one=_to_pyarrow(2.1), + four=_to_pyarrow(2.4), + two=_to_pyarrow(2.2), + three=_to_pyarrow(2.3), + ), ] result = _helpers.to_bq_table_rows(rows_iterable) @@ -219,3 +238,94 @@ def test_non_empty_iterable(self): items = sorted(row_2.items(), key=field_value) expected_items = [("one", 2.1), ("two", 2.2), ("three", 2.3), ("four", 2.4)] self.assertEqual(items, expected_items) + + +class TestRaiseOnClosedDecorator(unittest.TestCase): + def _make_class(self): + class Foo(object): + + class_member = "class member" + + def __init__(self): + self._closed = False + self.instance_member = "instance member" + + def instance_method(self): + return self.instance_member + + @classmethod + def class_method(cls): # pragma: NO COVER + return cls.class_member + + @staticmethod + def static_method(): # pragma: NO COVER + return "static return value" + + def _private_method(self): + return self.instance_member + + return Foo + + def test_preserves_method_names(self): + klass = self._make_class() + decorated_class = _helpers.raise_on_closed("I'm closed!")(klass) + instance = decorated_class() + + self.assertEqual(instance.instance_method.__name__, "instance_method") + self.assertEqual(instance.class_method.__name__, "class_method") + self.assertEqual(instance.static_method.__name__, "static_method") + self.assertEqual(instance._private_method.__name__, "_private_method") + + def test_methods_on_not_closed_instance(self): + klass = self._make_class() + decorated_class = _helpers.raise_on_closed("I'm closed!")(klass) + instance = decorated_class() + instance._closed = False + + self.assertEqual(instance.instance_method(), "instance member") + self.assertEqual(instance.class_method(), "class member") + self.assertEqual(instance.static_method(), "static return value") + self.assertEqual(instance._private_method(), "instance member") + + def test_public_instance_methods_on_closed_instance(self): + klass = self._make_class() + decorated_class = _helpers.raise_on_closed("I'm closed!")(klass) + instance = decorated_class() + instance._closed = True + + with six.assertRaisesRegex(self, exceptions.ProgrammingError, "I'm closed!"): + instance.instance_method() + + def test_methods_wo_public_instance_methods_on_closed_instance(self): + klass = self._make_class() + decorated_class = _helpers.raise_on_closed("I'm closed!")(klass) + instance = decorated_class() + instance._closed = True + + # no errors expected + self.assertEqual(instance.class_method(), "class member") + self.assertEqual(instance.static_method(), "static return value") + self.assertEqual(instance._private_method(), "instance member") + + def test_custom_class_closed_attribute(self): + klass = self._make_class() + decorated_class = _helpers.raise_on_closed( + "I'm closed!", closed_attr_name="_really_closed" + )(klass) + instance = decorated_class() + instance._closed = False + instance._really_closed = True + + with six.assertRaisesRegex(self, exceptions.ProgrammingError, "I'm closed!"): + instance.instance_method() + + def test_custom_on_closed_error_type(self): + klass = self._make_class() + decorated_class = _helpers.raise_on_closed( + "I'm closed!", exc_class=RuntimeError + )(klass) + instance = decorated_class() + instance._closed = True + + with six.assertRaisesRegex(self, RuntimeError, "I'm closed!"): + instance.instance_method() diff --git a/packages/google-cloud-bigquery/tests/unit/test_dbapi_connection.py b/packages/google-cloud-bigquery/tests/unit/test_dbapi_connection.py index 595afd0fe66c..96ec41c51555 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_dbapi_connection.py +++ b/packages/google-cloud-bigquery/tests/unit/test_dbapi_connection.py @@ -12,14 +12,16 @@ # See the License for the specific language governing permissions and # limitations under the License. +import gc import unittest import mock +import six try: - from google.cloud import bigquery_storage_v1beta1 + from google.cloud import bigquery_storage_v1 except ImportError: # pragma: NO COVER - bigquery_storage_v1beta1 = None + bigquery_storage_v1 = None class TestConnection(unittest.TestCase): @@ -39,22 +41,27 @@ def _mock_client(self): return mock_client def _mock_bqstorage_client(self): - from google.cloud.bigquery_storage_v1beta1 import client + from google.cloud.bigquery_storage_v1 import client - mock_client = mock.create_autospec(client.BigQueryStorageClient) + mock_client = mock.create_autospec(client.BigQueryReadClient) + mock_client.transport = mock.Mock(spec=["channel"]) + mock_client.transport.channel = mock.Mock(spec=["close"]) return mock_client def test_ctor_wo_bqstorage_client(self): from google.cloud.bigquery.dbapi import Connection mock_client = self._mock_client() + mock_bqstorage_client = self._mock_bqstorage_client() + mock_client._create_bqstorage_client.return_value = mock_bqstorage_client + connection = self._make_one(client=mock_client) self.assertIsInstance(connection, Connection) self.assertIs(connection._client, mock_client) - self.assertIsNone(connection._bqstorage_client) + self.assertIs(connection._bqstorage_client, mock_bqstorage_client) @unittest.skipIf( - bigquery_storage_v1beta1 is None, "Requires `google-cloud-bigquery-storage`" + bigquery_storage_v1 is None, "Requires `google-cloud-bigquery-storage`" ) def test_ctor_w_bqstorage_client(self): from google.cloud.bigquery.dbapi import Connection @@ -76,20 +83,23 @@ def test_connect_wo_client(self, mock_client): connection = connect() self.assertIsInstance(connection, Connection) self.assertIsNotNone(connection._client) - self.assertIsNone(connection._bqstorage_client) + self.assertIsNotNone(connection._bqstorage_client) def test_connect_w_client(self): from google.cloud.bigquery.dbapi import connect from google.cloud.bigquery.dbapi import Connection mock_client = self._mock_client() + mock_bqstorage_client = self._mock_bqstorage_client() + mock_client._create_bqstorage_client.return_value = mock_bqstorage_client + connection = connect(client=mock_client) self.assertIsInstance(connection, Connection) self.assertIs(connection._client, mock_client) - self.assertIsNone(connection._bqstorage_client) + self.assertIs(connection._bqstorage_client, mock_bqstorage_client) @unittest.skipIf( - bigquery_storage_v1beta1 is None, "Requires `google-cloud-bigquery-storage`" + bigquery_storage_v1 is None, "Requires `google-cloud-bigquery-storage`" ) def test_connect_w_both_clients(self): from google.cloud.bigquery.dbapi import connect @@ -104,11 +114,77 @@ def test_connect_w_both_clients(self): self.assertIs(connection._client, mock_client) self.assertIs(connection._bqstorage_client, mock_bqstorage_client) - def test_close(self): + def test_raises_error_if_closed(self): + from google.cloud.bigquery.dbapi.exceptions import ProgrammingError + + connection = self._make_one(client=self._mock_client()) + + connection.close() + + for method in ("close", "commit", "cursor"): + with six.assertRaisesRegex( + self, ProgrammingError, r"Operating on a closed connection\." + ): + getattr(connection, method)() + + def test_close_closes_all_created_bigquery_clients(self): + client = self._mock_client() + bqstorage_client = self._mock_bqstorage_client() + + client_patcher = mock.patch( + "google.cloud.bigquery.dbapi.connection.bigquery.Client", + return_value=client, + ) + bqstorage_client_patcher = mock.patch.object( + client, "_create_bqstorage_client", return_value=bqstorage_client, + ) + + with client_patcher, bqstorage_client_patcher: + connection = self._make_one(client=None, bqstorage_client=None) + + connection.close() + + self.assertTrue(client.close.called) + self.assertTrue(bqstorage_client.transport.channel.close.called) + + def test_close_does_not_close_bigquery_clients_passed_to_it(self): + client = self._mock_client() + bqstorage_client = self._mock_bqstorage_client() + connection = self._make_one(client=client, bqstorage_client=bqstorage_client) + + connection.close() + + self.assertFalse(client.close.called) + self.assertFalse(bqstorage_client.transport.channel.called) + + def test_close_closes_all_created_cursors(self): connection = self._make_one(client=self._mock_client()) - # close() is a no-op, there is nothing to test. + cursor_1 = connection.cursor() + cursor_2 = connection.cursor() + self.assertFalse(cursor_1._closed) + self.assertFalse(cursor_2._closed) + connection.close() + self.assertTrue(cursor_1._closed) + self.assertTrue(cursor_2._closed) + + def test_does_not_keep_cursor_instances_alive(self): + from google.cloud.bigquery.dbapi import Cursor + + connection = self._make_one(client=self._mock_client()) + cursor_1 = connection.cursor() # noqa + cursor_2 = connection.cursor() + cursor_3 = connection.cursor() # noqa + + del cursor_2 + + # Connections should not hold strong references to the Cursor instances + # they created, unnecessarily keeping them alive. + gc.collect() + cursors = [obj for obj in gc.get_objects() if isinstance(obj, Cursor)] + self.assertEqual(len(cursors), 2) + def test_commit(self): connection = self._make_one(client=self._mock_client()) # commit() is a no-op, there is nothing to test. diff --git a/packages/google-cloud-bigquery/tests/unit/test_dbapi_cursor.py b/packages/google-cloud-bigquery/tests/unit/test_dbapi_cursor.py index e53cc158a4c4..caec4b1bd38b 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_dbapi_cursor.py +++ b/packages/google-cloud-bigquery/tests/unit/test_dbapi_cursor.py @@ -14,17 +14,27 @@ import operator as op import unittest +import warnings import mock import six +try: + import pyarrow +except ImportError: # pragma: NO COVER + pyarrow = None + from google.api_core import exceptions try: + from google.cloud import bigquery_storage_v1 from google.cloud import bigquery_storage_v1beta1 except ImportError: # pragma: NO COVER + bigquery_storage_v1 = None bigquery_storage_v1beta1 = None +from tests.unit.helpers import _to_pyarrow + class TestCursor(unittest.TestCase): @staticmethod @@ -51,25 +61,40 @@ def _mock_client(self, rows=None, schema=None, num_dml_affected_rows=None): num_dml_affected_rows=num_dml_affected_rows, ) mock_client.list_rows.return_value = rows + + # Assure that the REST client gets used, not the BQ Storage client. + mock_client._create_bqstorage_client.return_value = None + return mock_client - def _mock_bqstorage_client(self, rows=None, stream_count=0): - from google.cloud.bigquery_storage_v1beta1 import client - from google.cloud.bigquery_storage_v1beta1 import types + def _mock_bqstorage_client(self, rows=None, stream_count=0, v1beta1=False): + from google.cloud.bigquery_storage_v1 import client + from google.cloud.bigquery_storage_v1 import types + from google.cloud.bigquery_storage_v1beta1 import types as types_v1beta1 if rows is None: rows = [] - mock_client = mock.create_autospec(client.BigQueryStorageClient) + if v1beta1: + mock_client = mock.create_autospec( + bigquery_storage_v1beta1.BigQueryStorageClient + ) + mock_read_session = mock.MagicMock( + streams=[ + types_v1beta1.Stream(name="streams/stream_{}".format(i)) + for i in range(stream_count) + ] + ) + else: + mock_client = mock.create_autospec(client.BigQueryReadClient) + mock_read_session = mock.MagicMock( + streams=[ + types.ReadStream(name="streams/stream_{}".format(i)) + for i in range(stream_count) + ] + ) - mock_read_session = mock.MagicMock( - streams=[ - types.Stream(name="streams/stream_{}".format(i)) - for i in range(stream_count) - ] - ) mock_client.create_read_session.return_value = mock_read_session - mock_rows_stream = mock.MagicMock() mock_rows_stream.rows.return_value = iter(rows) mock_client.read_rows.return_value = mock_rows_stream @@ -88,6 +113,9 @@ def _mock_job(self, total_rows=0, schema=None, num_dml_affected_rows=None): schema=schema, num_dml_affected_rows=num_dml_affected_rows, ) + mock_job.destination.to_bqstorage.return_value = ( + "projects/P/datasets/DS/tables/T" + ) if num_dml_affected_rows is None: mock_job.statement_type = None # API sends back None for SELECT @@ -122,6 +150,31 @@ def test_close(self): # close() is a no-op, there is nothing to test. cursor.close() + def test_raises_error_if_closed(self): + from google.cloud.bigquery.dbapi import connect + from google.cloud.bigquery.dbapi.exceptions import ProgrammingError + + connection = connect(self._mock_client()) + cursor = connection.cursor() + cursor.close() + + method_names = ( + "close", + "execute", + "executemany", + "fetchall", + "fetchmany", + "fetchone", + "setinputsizes", + "setoutputsize", + ) + + for method in method_names: + with six.assertRaisesRegex( + self, ProgrammingError, r"Operating on a closed cursor\." + ): + getattr(cursor, method)() + def test_fetchone_wo_execute_raises_error(self): from google.cloud.bigquery import dbapi @@ -213,8 +266,9 @@ def test_fetchall_w_row(self): self.assertEqual(rows[0], (1,)) @unittest.skipIf( - bigquery_storage_v1beta1 is None, "Requires `google-cloud-bigquery-storage`" + bigquery_storage_v1 is None, "Requires `google-cloud-bigquery-storage`" ) + @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_fetchall_w_bqstorage_client_fetch_success(self): from google.cloud.bigquery import dbapi from google.cloud.bigquery import table @@ -225,8 +279,18 @@ def test_fetchall_w_bqstorage_client_fetch_success(self): table.Row([2.4, 2.1, 2.3, 2.2], {"bar": 3, "baz": 2, "foo": 1, "quux": 0}), ] bqstorage_streamed_rows = [ - {"bar": 1.2, "foo": 1.1, "quux": 1.4, "baz": 1.3}, - {"bar": 2.2, "foo": 2.1, "quux": 2.4, "baz": 2.3}, + { + "bar": _to_pyarrow(1.2), + "foo": _to_pyarrow(1.1), + "quux": _to_pyarrow(1.4), + "baz": _to_pyarrow(1.3), + }, + { + "bar": _to_pyarrow(2.2), + "foo": _to_pyarrow(2.1), + "quux": _to_pyarrow(2.4), + "baz": _to_pyarrow(2.3), + }, ] mock_client = self._mock_client(rows=row_data) @@ -258,6 +322,70 @@ def test_fetchall_w_bqstorage_client_fetch_success(self): @unittest.skipIf( bigquery_storage_v1beta1 is None, "Requires `google-cloud-bigquery-storage`" ) + @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") + def test_fetchall_w_bqstorage_client_v1beta1_fetch_success(self): + from google.cloud.bigquery import dbapi + from google.cloud.bigquery import table + + # use unordered data to also test any non-determenistic key order in dicts + row_data = [ + table.Row([1.4, 1.1, 1.3, 1.2], {"bar": 3, "baz": 2, "foo": 1, "quux": 0}), + table.Row([2.4, 2.1, 2.3, 2.2], {"bar": 3, "baz": 2, "foo": 1, "quux": 0}), + ] + bqstorage_streamed_rows = [ + { + "bar": _to_pyarrow(1.2), + "foo": _to_pyarrow(1.1), + "quux": _to_pyarrow(1.4), + "baz": _to_pyarrow(1.3), + }, + { + "bar": _to_pyarrow(2.2), + "foo": _to_pyarrow(2.1), + "quux": _to_pyarrow(2.4), + "baz": _to_pyarrow(2.3), + }, + ] + + mock_client = self._mock_client(rows=row_data) + mock_bqstorage_client = self._mock_bqstorage_client( + stream_count=1, rows=bqstorage_streamed_rows, v1beta1=True + ) + + connection = dbapi.connect( + client=mock_client, bqstorage_client=mock_bqstorage_client, + ) + cursor = connection.cursor() + cursor.execute("SELECT foo, bar FROM some_table") + + with warnings.catch_warnings(record=True) as warned: + rows = cursor.fetchall() + + # a deprecation warning should have been emitted + expected_warnings = [ + warning + for warning in warned + if issubclass(warning.category, DeprecationWarning) + and "v1beta1" in str(warning) + ] + self.assertEqual(len(expected_warnings), 1, "Deprecation warning not raised.") + + # the default client was not used + mock_client.list_rows.assert_not_called() + + # check the data returned + field_value = op.itemgetter(1) + sorted_row_data = [sorted(row.items(), key=field_value) for row in rows] + expected_row_data = [ + [("foo", 1.1), ("bar", 1.2), ("baz", 1.3), ("quux", 1.4)], + [("foo", 2.1), ("bar", 2.2), ("baz", 2.3), ("quux", 2.4)], + ] + + self.assertEqual(sorted_row_data, expected_row_data) + + @unittest.skipIf( + bigquery_storage_v1 is None, "Requires `google-cloud-bigquery-storage`" + ) def test_fetchall_w_bqstorage_client_fetch_no_rows(self): from google.cloud.bigquery import dbapi @@ -279,7 +407,7 @@ def test_fetchall_w_bqstorage_client_fetch_no_rows(self): self.assertEqual(rows, []) @unittest.skipIf( - bigquery_storage_v1beta1 is None, "Requires `google-cloud-bigquery-storage`" + bigquery_storage_v1 is None, "Requires `google-cloud-bigquery-storage`" ) def test_fetchall_w_bqstorage_client_fetch_error_no_fallback(self): from google.cloud.bigquery import dbapi @@ -307,7 +435,7 @@ def test_fetchall_w_bqstorage_client_fetch_error_no_fallback(self): mock_client.list_rows.assert_not_called() @unittest.skipIf( - bigquery_storage_v1beta1 is None, "Requires `google-cloud-bigquery-storage`" + bigquery_storage_v1 is None, "Requires `google-cloud-bigquery-storage`" ) def test_fetchall_w_bqstorage_client_fetch_error_fallback_on_client(self): from google.cloud.bigquery import dbapi diff --git a/packages/google-cloud-bigquery/tests/unit/test_job.py b/packages/google-cloud-bigquery/tests/unit/test_job.py index c89cad7490bb..23991b9ec931 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_job.py +++ b/packages/google-cloud-bigquery/tests/unit/test_job.py @@ -34,9 +34,9 @@ except ImportError: # pragma: NO COVER pyarrow = None try: - from google.cloud import bigquery_storage_v1beta1 + from google.cloud import bigquery_storage_v1 except (ImportError, AttributeError): # pragma: NO COVER - bigquery_storage_v1beta1 = None + bigquery_storage_v1 = None try: from tqdm import tqdm except (ImportError, AttributeError): # pragma: NO COVER @@ -5437,7 +5437,7 @@ def test_to_dataframe_ddl_query(self): @unittest.skipIf(pandas is None, "Requires `pandas`") @unittest.skipIf( - bigquery_storage_v1beta1 is None, "Requires `google-cloud-bigquery-storage`" + bigquery_storage_v1 is None, "Requires `google-cloud-bigquery-storage`" ) def test_to_dataframe_bqstorage(self): query_resource = { @@ -5455,10 +5455,8 @@ def test_to_dataframe_bqstorage(self): client = _make_client(self.PROJECT, connection=connection) resource = self._make_resource(ended=True) job = self._get_target_class().from_api_repr(resource, client) - bqstorage_client = mock.create_autospec( - bigquery_storage_v1beta1.BigQueryStorageClient - ) - session = bigquery_storage_v1beta1.types.ReadSession() + bqstorage_client = mock.create_autospec(bigquery_storage_v1.BigQueryReadClient) + session = bigquery_storage_v1.types.ReadSession() session.avro_schema.schema = json.dumps( { "type": "record", @@ -5473,13 +5471,17 @@ def test_to_dataframe_bqstorage(self): job.to_dataframe(bqstorage_client=bqstorage_client) + destination_table = "projects/{projectId}/datasets/{datasetId}/tables/{tableId}".format( + **resource["configuration"]["query"]["destinationTable"] + ) + expected_session = bigquery_storage_v1.types.ReadSession( + table=destination_table, + data_format=bigquery_storage_v1.enums.DataFormat.ARROW, + ) bqstorage_client.create_read_session.assert_called_once_with( - mock.ANY, - "projects/{}".format(self.PROJECT), - format_=bigquery_storage_v1beta1.enums.DataFormat.ARROW, - read_options=mock.ANY, - # Use default number of streams for best performance. - requested_streams=0, + parent="projects/{}".format(self.PROJECT), + read_session=expected_session, + max_stream_count=0, # Use default number of streams for best performance. ) @unittest.skipIf(pandas is None, "Requires `pandas`") @@ -5949,7 +5951,7 @@ def test__contains_order_by(query, expected): @pytest.mark.skipif(pandas is None, reason="Requires `pandas`") @pytest.mark.skipif( - bigquery_storage_v1beta1 is None, reason="Requires `google-cloud-bigquery-storage`" + bigquery_storage_v1 is None, reason="Requires `google-cloud-bigquery-storage`" ) @pytest.mark.parametrize( "query", @@ -5985,10 +5987,8 @@ def test_to_dataframe_bqstorage_preserve_order(query): connection = _make_connection(get_query_results_resource, job_resource) client = _make_client(connection=connection) job = target_class.from_api_repr(job_resource, client) - bqstorage_client = mock.create_autospec( - bigquery_storage_v1beta1.BigQueryStorageClient - ) - session = bigquery_storage_v1beta1.types.ReadSession() + bqstorage_client = mock.create_autospec(bigquery_storage_v1.BigQueryReadClient) + session = bigquery_storage_v1.types.ReadSession() session.avro_schema.schema = json.dumps( { "type": "record", @@ -6003,11 +6003,14 @@ def test_to_dataframe_bqstorage_preserve_order(query): job.to_dataframe(bqstorage_client=bqstorage_client) + destination_table = "projects/{projectId}/datasets/{datasetId}/tables/{tableId}".format( + **job_resource["configuration"]["query"]["destinationTable"] + ) + expected_session = bigquery_storage_v1.types.ReadSession( + table=destination_table, data_format=bigquery_storage_v1.enums.DataFormat.ARROW, + ) bqstorage_client.create_read_session.assert_called_once_with( - mock.ANY, - "projects/test-project", - format_=bigquery_storage_v1beta1.enums.DataFormat.ARROW, - read_options=mock.ANY, - # Use a single stream to preserve row order. - requested_streams=1, + parent="projects/test-project", + read_session=expected_session, + max_stream_count=1, # Use a single stream to preserve row order. ) diff --git a/packages/google-cloud-bigquery/tests/unit/test_magics.py b/packages/google-cloud-bigquery/tests/unit/test_magics.py index fd9d1d7007f9..a42592e3c560 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_magics.py +++ b/packages/google-cloud-bigquery/tests/unit/test_magics.py @@ -15,6 +15,7 @@ import copy import re from concurrent import futures +import warnings import mock import pytest @@ -36,9 +37,9 @@ import google.auth.credentials try: - from google.cloud import bigquery_storage_v1beta1 + from google.cloud import bigquery_storage_v1 except ImportError: # pragma: NO COVER - bigquery_storage_v1beta1 = None + bigquery_storage_v1 = None from google.cloud import bigquery from google.cloud.bigquery import job from google.cloud.bigquery import table @@ -74,8 +75,8 @@ def missing_bq_storage(): def fail_if(name, globals, locals, fromlist, level): # NOTE: *very* simplified, assuming a straightforward absolute import - return "bigquery_storage_v1beta1" in name or ( - fromlist is not None and "bigquery_storage_v1beta1" in fromlist + return "bigquery_storage_v1" in name or ( + fromlist is not None and "bigquery_storage_v1" in fromlist ) return maybe_fail_import(predicate=fail_if) @@ -305,14 +306,14 @@ def test__make_bqstorage_client_false(): @pytest.mark.skipif( - bigquery_storage_v1beta1 is None, reason="Requires `google-cloud-bigquery-storage`" + bigquery_storage_v1 is None, reason="Requires `google-cloud-bigquery-storage`" ) def test__make_bqstorage_client_true(): credentials_mock = mock.create_autospec( google.auth.credentials.Credentials, instance=True ) got = magics._make_bqstorage_client(True, credentials_mock) - assert isinstance(got, bigquery_storage_v1beta1.BigQueryStorageClient) + assert isinstance(got, bigquery_storage_v1.BigQueryReadClient) def test__make_bqstorage_client_true_raises_import_error(missing_bq_storage): @@ -329,7 +330,7 @@ def test__make_bqstorage_client_true_raises_import_error(missing_bq_storage): @pytest.mark.skipif( - bigquery_storage_v1beta1 is None, reason="Requires `google-cloud-bigquery-storage`" + bigquery_storage_v1 is None, reason="Requires `google-cloud-bigquery-storage`" ) @pytest.mark.skipif(pandas is None, reason="Requires `pandas`") def test__make_bqstorage_client_true_missing_gapic(missing_grpcio_lib): @@ -386,13 +387,31 @@ def test_extension_load(): @pytest.mark.usefixtures("ipython_interactive") @pytest.mark.skipif(pandas is None, reason="Requires `pandas`") -def test_bigquery_magic_without_optional_arguments(missing_bq_storage): +@pytest.mark.skipif( + bigquery_storage_v1 is None, reason="Requires `google-cloud-bigquery-storage`" +) +def test_bigquery_magic_without_optional_arguments(monkeypatch): ip = IPython.get_ipython() ip.extension_manager.load_extension("google.cloud.bigquery") - magics.context.credentials = mock.create_autospec( + mock_credentials = mock.create_autospec( google.auth.credentials.Credentials, instance=True ) + # Set up the context with monkeypatch so that it's reset for subsequent + # tests. + monkeypatch.setattr(magics.context, "credentials", mock_credentials) + + # Mock out the BigQuery Storage API. + bqstorage_mock = mock.create_autospec(bigquery_storage_v1.BigQueryReadClient) + bqstorage_instance_mock = mock.create_autospec( + bigquery_storage_v1.BigQueryReadClient, instance=True + ) + bqstorage_instance_mock.transport = mock.Mock() + bqstorage_mock.return_value = bqstorage_instance_mock + bqstorage_client_patch = mock.patch( + "google.cloud.bigquery_storage_v1.BigQueryReadClient", bqstorage_mock + ) + sql = "SELECT 17 AS num" result = pandas.DataFrame([17], columns=["num"]) run_query_patch = mock.patch( @@ -403,11 +422,11 @@ def test_bigquery_magic_without_optional_arguments(missing_bq_storage): ) query_job_mock.to_dataframe.return_value = result - # Shouldn't fail when BigQuery Storage client isn't installed. - with run_query_patch as run_query_mock, missing_bq_storage: + with run_query_patch as run_query_mock, bqstorage_client_patch: run_query_mock.return_value = query_job_mock return_value = ip.run_cell_magic("bigquery", "", sql) + assert bqstorage_mock.called # BQ storage client was used assert isinstance(return_value, pandas.DataFrame) assert len(return_value) == len(result) # verify row count assert list(return_value) == list(result) # verify column names @@ -530,7 +549,7 @@ def test_bigquery_magic_clears_display_in_verbose_mode(): @pytest.mark.usefixtures("ipython_interactive") @pytest.mark.skipif( - bigquery_storage_v1beta1 is None, reason="Requires `google-cloud-bigquery-storage`" + bigquery_storage_v1 is None, reason="Requires `google-cloud-bigquery-storage`" ) def test_bigquery_magic_with_bqstorage_from_argument(monkeypatch): ip = IPython.get_ipython() @@ -542,19 +561,16 @@ def test_bigquery_magic_with_bqstorage_from_argument(monkeypatch): # Set up the context with monkeypatch so that it's reset for subsequent # tests. monkeypatch.setattr(magics.context, "credentials", mock_credentials) - monkeypatch.setattr(magics.context, "use_bqstorage_api", False) # Mock out the BigQuery Storage API. - bqstorage_mock = mock.create_autospec( - bigquery_storage_v1beta1.BigQueryStorageClient - ) + bqstorage_mock = mock.create_autospec(bigquery_storage_v1.BigQueryReadClient) bqstorage_instance_mock = mock.create_autospec( - bigquery_storage_v1beta1.BigQueryStorageClient, instance=True + bigquery_storage_v1.BigQueryReadClient, instance=True ) bqstorage_instance_mock.transport = mock.Mock() bqstorage_mock.return_value = bqstorage_instance_mock bqstorage_client_patch = mock.patch( - "google.cloud.bigquery_storage_v1beta1.BigQueryStorageClient", bqstorage_mock + "google.cloud.bigquery_storage_v1.BigQueryReadClient", bqstorage_mock ) sql = "SELECT 17 AS num" @@ -566,67 +582,20 @@ def test_bigquery_magic_with_bqstorage_from_argument(monkeypatch): google.cloud.bigquery.job.QueryJob, instance=True ) query_job_mock.to_dataframe.return_value = result - with run_query_patch as run_query_mock, bqstorage_client_patch: + with run_query_patch as run_query_mock, bqstorage_client_patch, warnings.catch_warnings( + record=True + ) as warned: run_query_mock.return_value = query_job_mock return_value = ip.run_cell_magic("bigquery", "--use_bqstorage_api", sql) - assert len(bqstorage_mock.call_args_list) == 1 - kwargs = bqstorage_mock.call_args_list[0].kwargs - assert kwargs.get("credentials") is mock_credentials - client_info = kwargs.get("client_info") - assert client_info is not None - assert client_info.user_agent == "ipython-" + IPython.__version__ - - query_job_mock.to_dataframe.assert_called_once_with( - bqstorage_client=bqstorage_instance_mock - ) - - assert isinstance(return_value, pandas.DataFrame) - - -@pytest.mark.usefixtures("ipython_interactive") -@pytest.mark.skipif( - bigquery_storage_v1beta1 is None, reason="Requires `google-cloud-bigquery-storage`" -) -def test_bigquery_magic_with_bqstorage_from_context(monkeypatch): - ip = IPython.get_ipython() - ip.extension_manager.load_extension("google.cloud.bigquery") - mock_credentials = mock.create_autospec( - google.auth.credentials.Credentials, instance=True - ) - - # Set up the context with monkeypatch so that it's reset for subsequent - # tests. - monkeypatch.setattr(magics.context, "credentials", mock_credentials) - monkeypatch.setattr(magics.context, "use_bqstorage_api", True) + # Deprecation warning should have been issued. + def warning_match(warning): + message = str(warning).lower() + return "deprecated" in message and "use_bqstorage_api" in message - # Mock out the BigQuery Storage API. - bqstorage_mock = mock.create_autospec( - bigquery_storage_v1beta1.BigQueryStorageClient - ) - bqstorage_instance_mock = mock.create_autospec( - bigquery_storage_v1beta1.BigQueryStorageClient, instance=True - ) - bqstorage_instance_mock.transport = mock.Mock() - bqstorage_mock.return_value = bqstorage_instance_mock - bqstorage_client_patch = mock.patch( - "google.cloud.bigquery_storage_v1beta1.BigQueryStorageClient", bqstorage_mock - ) - - sql = "SELECT 17 AS num" - result = pandas.DataFrame([17], columns=["num"]) - run_query_patch = mock.patch( - "google.cloud.bigquery.magics._run_query", autospec=True - ) - query_job_mock = mock.create_autospec( - google.cloud.bigquery.job.QueryJob, instance=True - ) - query_job_mock.to_dataframe.return_value = result - with run_query_patch as run_query_mock, bqstorage_client_patch: - run_query_mock.return_value = query_job_mock - - return_value = ip.run_cell_magic("bigquery", "", sql) + expected_warnings = list(filter(warning_match, warned)) + assert len(expected_warnings) == 1 assert len(bqstorage_mock.call_args_list) == 1 kwargs = bqstorage_mock.call_args_list[0].kwargs @@ -644,9 +613,9 @@ def test_bigquery_magic_with_bqstorage_from_context(monkeypatch): @pytest.mark.usefixtures("ipython_interactive") @pytest.mark.skipif( - bigquery_storage_v1beta1 is None, reason="Requires `google-cloud-bigquery-storage`" + bigquery_storage_v1 is None, reason="Requires `google-cloud-bigquery-storage`" ) -def test_bigquery_magic_without_bqstorage(monkeypatch): +def test_bigquery_magic_with_rest_client_requested(monkeypatch): ip = IPython.get_ipython() ip.extension_manager.load_extension("google.cloud.bigquery") mock_credentials = mock.create_autospec( @@ -658,11 +627,9 @@ def test_bigquery_magic_without_bqstorage(monkeypatch): monkeypatch.setattr(magics.context, "credentials", mock_credentials) # Mock out the BigQuery Storage API. - bqstorage_mock = mock.create_autospec( - bigquery_storage_v1beta1.BigQueryStorageClient - ) + bqstorage_mock = mock.create_autospec(bigquery_storage_v1.BigQueryReadClient) bqstorage_client_patch = mock.patch( - "google.cloud.bigquery_storage_v1beta1.BigQueryStorageClient", bqstorage_mock + "google.cloud.bigquery_storage_v1.BigQueryReadClient", bqstorage_mock ) sql = "SELECT 17 AS num" @@ -677,7 +644,7 @@ def test_bigquery_magic_without_bqstorage(monkeypatch): with run_query_patch as run_query_mock, bqstorage_client_patch: run_query_mock.return_value = query_job_mock - return_value = ip.run_cell_magic("bigquery", "", sql) + return_value = ip.run_cell_magic("bigquery", "--use_rest_api", sql) bqstorage_mock.assert_not_called() query_job_mock.to_dataframe.assert_called_once_with(bqstorage_client=None) @@ -855,7 +822,7 @@ def test_bigquery_magic_w_table_id_and_destination_var(): @pytest.mark.usefixtures("ipython_interactive") @pytest.mark.skipif( - bigquery_storage_v1beta1 is None, reason="Requires `google-cloud-bigquery-storage`" + bigquery_storage_v1 is None, reason="Requires `google-cloud-bigquery-storage`" ) @pytest.mark.skipif(pandas is None, reason="Requires `pandas`") def test_bigquery_magic_w_table_id_and_bqstorage_client(): @@ -878,16 +845,14 @@ def test_bigquery_magic_w_table_id_and_bqstorage_client(): "google.cloud.bigquery.magics.bigquery.Client", autospec=True ) - bqstorage_mock = mock.create_autospec( - bigquery_storage_v1beta1.BigQueryStorageClient - ) + bqstorage_mock = mock.create_autospec(bigquery_storage_v1.BigQueryReadClient) bqstorage_instance_mock = mock.create_autospec( - bigquery_storage_v1beta1.BigQueryStorageClient, instance=True + bigquery_storage_v1.BigQueryReadClient, instance=True ) bqstorage_instance_mock.transport = mock.Mock() bqstorage_mock.return_value = bqstorage_instance_mock bqstorage_client_patch = mock.patch( - "google.cloud.bigquery_storage_v1beta1.BigQueryStorageClient", bqstorage_mock + "google.cloud.bigquery_storage_v1.BigQueryReadClient", bqstorage_mock ) table_id = "bigquery-public-data.samples.shakespeare" @@ -895,7 +860,7 @@ def test_bigquery_magic_w_table_id_and_bqstorage_client(): with default_patch, client_patch as client_mock, bqstorage_client_patch: client_mock().list_rows.return_value = row_iterator_mock - ip.run_cell_magic("bigquery", "--use_bqstorage_api --max_results=5", table_id) + ip.run_cell_magic("bigquery", "--max_results=5", table_id) row_iterator_mock.to_dataframe.assert_called_once_with( bqstorage_client=bqstorage_instance_mock ) diff --git a/packages/google-cloud-bigquery/tests/unit/test_table.py b/packages/google-cloud-bigquery/tests/unit/test_table.py index 72275fc536c1..cbce25b006ff 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_table.py +++ b/packages/google-cloud-bigquery/tests/unit/test_table.py @@ -25,13 +25,19 @@ import google.api_core.exceptions try: + from google.cloud import bigquery_storage_v1 from google.cloud import bigquery_storage_v1beta1 + from google.cloud.bigquery_storage_v1.gapic.transports import ( + big_query_read_grpc_transport, + ) from google.cloud.bigquery_storage_v1beta1.gapic.transports import ( - big_query_storage_grpc_transport, + big_query_storage_grpc_transport as big_query_storage_grpc_transport_v1beta1, ) except ImportError: # pragma: NO COVER + bigquery_storage_v1 = None bigquery_storage_v1beta1 = None - big_query_storage_grpc_transport = None + big_query_read_grpc_transport = None + big_query_storage_grpc_transport_v1beta1 = None try: import pandas @@ -1492,7 +1498,7 @@ def test_to_dataframe_error_if_pandas_is_none(self): @unittest.skipIf(pandas is None, "Requires `pandas`") def test_to_dataframe(self): row_iterator = self._make_one() - df = row_iterator.to_dataframe() + df = row_iterator.to_dataframe(create_bqstorage_client=False) self.assertIsInstance(df, pandas.DataFrame) self.assertEqual(len(df), 0) # verify the number of rows @@ -1687,7 +1693,7 @@ def test_to_arrow(self): api_request = mock.Mock(return_value={"rows": rows}) row_iterator = self._make_one(_mock_client(), api_request, path, schema) - tbl = row_iterator.to_arrow() + tbl = row_iterator.to_arrow(create_bqstorage_client=False) self.assertIsInstance(tbl, pyarrow.Table) self.assertEqual(tbl.num_rows, 2) @@ -1737,7 +1743,7 @@ def test_to_arrow_w_nulls(self): api_request = mock.Mock(return_value={"rows": rows}) row_iterator = self._make_one(_mock_client(), api_request, path, schema) - tbl = row_iterator.to_arrow() + tbl = row_iterator.to_arrow(create_bqstorage_client=False) self.assertIsInstance(tbl, pyarrow.Table) self.assertEqual(tbl.num_rows, 4) @@ -1772,7 +1778,7 @@ def test_to_arrow_w_unknown_type(self): api_request = mock.Mock(return_value={"rows": rows}) row_iterator = self._make_one(_mock_client(), api_request, path, schema) - tbl = row_iterator.to_arrow() + tbl = row_iterator.to_arrow(create_bqstorage_client=False) self.assertIsInstance(tbl, pyarrow.Table) self.assertEqual(tbl.num_rows, 2) @@ -1815,7 +1821,7 @@ def test_to_arrow_w_empty_table(self): api_request = mock.Mock(return_value={"rows": rows}) row_iterator = self._make_one(_mock_client(), api_request, path, schema) - tbl = row_iterator.to_arrow() + tbl = row_iterator.to_arrow(create_bqstorage_client=False) self.assertIsInstance(tbl, pyarrow.Table) self.assertEqual(tbl.num_rows, 0) @@ -1834,7 +1840,7 @@ def test_to_arrow_w_empty_table(self): @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") @unittest.skipIf( - bigquery_storage_v1beta1 is None, "Requires `google-cloud-bigquery-storage`" + bigquery_storage_v1 is None, "Requires `google-cloud-bigquery-storage`" ) def test_to_arrow_max_results_w_create_bqstorage_warning(self): from google.cloud.bigquery.schema import SchemaField @@ -1874,25 +1880,23 @@ def test_to_arrow_max_results_w_create_bqstorage_warning(self): @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") @unittest.skipIf( - bigquery_storage_v1beta1 is None, "Requires `google-cloud-bigquery-storage`" + bigquery_storage_v1 is None, "Requires `google-cloud-bigquery-storage`" ) def test_to_arrow_w_bqstorage(self): from google.cloud.bigquery import schema from google.cloud.bigquery import table as mut - from google.cloud.bigquery_storage_v1beta1 import reader + from google.cloud.bigquery_storage_v1 import reader - bqstorage_client = mock.create_autospec( - bigquery_storage_v1beta1.BigQueryStorageClient - ) + bqstorage_client = mock.create_autospec(bigquery_storage_v1.BigQueryReadClient) bqstorage_client.transport = mock.create_autospec( - big_query_storage_grpc_transport.BigQueryStorageGrpcTransport + big_query_read_grpc_transport.BigQueryReadGrpcTransport ) streams = [ # Use two streams we want to check frames are read from each stream. {"name": "/projects/proj/dataset/dset/tables/tbl/streams/1234"}, {"name": "/projects/proj/dataset/dset/tables/tbl/streams/5678"}, ] - session = bigquery_storage_v1beta1.types.ReadSession(streams=streams) + session = bigquery_storage_v1.types.ReadSession(streams=streams) arrow_schema = pyarrow.schema( [ pyarrow.field("colA", pyarrow.int64()), @@ -1957,21 +1961,19 @@ def test_to_arrow_w_bqstorage(self): @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") @unittest.skipIf( - bigquery_storage_v1beta1 is None, "Requires `google-cloud-bigquery-storage`" + bigquery_storage_v1 is None, "Requires `google-cloud-bigquery-storage`" ) def test_to_arrow_w_bqstorage_creates_client(self): from google.cloud.bigquery import schema from google.cloud.bigquery import table as mut mock_client = _mock_client() - bqstorage_client = mock.create_autospec( - bigquery_storage_v1beta1.BigQueryStorageClient - ) + bqstorage_client = mock.create_autospec(bigquery_storage_v1.BigQueryReadClient) bqstorage_client.transport = mock.create_autospec( - big_query_storage_grpc_transport.BigQueryStorageGrpcTransport + big_query_read_grpc_transport.BigQueryReadGrpcTransport ) mock_client._create_bqstorage_client.return_value = bqstorage_client - session = bigquery_storage_v1beta1.types.ReadSession() + session = bigquery_storage_v1.types.ReadSession() bqstorage_client.create_read_session.return_value = session row_iterator = mut.RowIterator( mock_client, @@ -1990,16 +1992,14 @@ def test_to_arrow_w_bqstorage_creates_client(self): @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") @unittest.skipIf( - bigquery_storage_v1beta1 is None, "Requires `google-cloud-bigquery-storage`" + bigquery_storage_v1 is None, "Requires `google-cloud-bigquery-storage`" ) def test_to_arrow_w_bqstorage_no_streams(self): from google.cloud.bigquery import schema from google.cloud.bigquery import table as mut - bqstorage_client = mock.create_autospec( - bigquery_storage_v1beta1.BigQueryStorageClient - ) - session = bigquery_storage_v1beta1.types.ReadSession() + bqstorage_client = mock.create_autospec(bigquery_storage_v1.BigQueryReadClient) + session = bigquery_storage_v1.types.ReadSession() arrow_schema = pyarrow.schema( [ pyarrow.field("colA", pyarrow.string()), @@ -2059,7 +2059,9 @@ def test_to_arrow_progress_bar(self, tqdm_mock, tqdm_notebook_mock, tqdm_gui_moc for progress_bar_type, progress_bar_mock in progress_bars: row_iterator = self._make_one(_mock_client(), api_request, path, schema) - tbl = row_iterator.to_arrow(progress_bar_type=progress_bar_type) + tbl = row_iterator.to_arrow( + progress_bar_type=progress_bar_type, create_bqstorage_client=False, + ) progress_bar_mock.assert_called() progress_bar_mock().update.assert_called() @@ -2122,13 +2124,13 @@ def test_to_dataframe_iterable(self): @unittest.skipIf(pandas is None, "Requires `pandas`") @unittest.skipIf( - bigquery_storage_v1beta1 is None, "Requires `google-cloud-bigquery-storage`" + bigquery_storage_v1 is None, "Requires `google-cloud-bigquery-storage`" ) @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_to_dataframe_iterable_w_bqstorage(self): from google.cloud.bigquery import schema from google.cloud.bigquery import table as mut - from google.cloud.bigquery_storage_v1beta1 import reader + from google.cloud.bigquery_storage_v1 import reader arrow_fields = [ pyarrow.field("colA", pyarrow.int64()), @@ -2138,18 +2140,16 @@ def test_to_dataframe_iterable_w_bqstorage(self): ] arrow_schema = pyarrow.schema(arrow_fields) - bqstorage_client = mock.create_autospec( - bigquery_storage_v1beta1.BigQueryStorageClient - ) + bqstorage_client = mock.create_autospec(bigquery_storage_v1.BigQueryReadClient) bqstorage_client.transport = mock.create_autospec( - big_query_storage_grpc_transport.BigQueryStorageGrpcTransport + big_query_read_grpc_transport.BigQueryReadGrpcTransport ) streams = [ # Use two streams we want to check frames are read from each stream. {"name": "/projects/proj/dataset/dset/tables/tbl/streams/1234"}, {"name": "/projects/proj/dataset/dset/tables/tbl/streams/5678"}, ] - session = bigquery_storage_v1beta1.types.ReadSession( + session = bigquery_storage_v1.types.ReadSession( streams=streams, arrow_schema={"serialized_schema": arrow_schema.serialize().to_pybytes()}, ) @@ -2231,7 +2231,7 @@ def test_to_dataframe(self): api_request = mock.Mock(return_value={"rows": rows}) row_iterator = self._make_one(_mock_client(), api_request, path, schema) - df = row_iterator.to_dataframe() + df = row_iterator.to_dataframe(create_bqstorage_client=False) self.assertIsInstance(df, pandas.DataFrame) self.assertEqual(len(df), 4) # verify the number of rows @@ -2302,7 +2302,9 @@ def test_to_dataframe_progress_bar( for progress_bar_type, progress_bar_mock in progress_bars: row_iterator = self._make_one(_mock_client(), api_request, path, schema) - df = row_iterator.to_dataframe(progress_bar_type=progress_bar_type) + df = row_iterator.to_dataframe( + progress_bar_type=progress_bar_type, create_bqstorage_client=False, + ) progress_bar_mock.assert_called() progress_bar_mock().update.assert_called() @@ -2368,7 +2370,7 @@ def test_to_dataframe_no_tqdm_no_progress_bar(self): row_iterator = self._make_one(_mock_client(), api_request, path, schema) with warnings.catch_warnings(record=True) as warned: - df = row_iterator.to_dataframe() + df = row_iterator.to_dataframe(create_bqstorage_client=False) self.assertEqual(len(warned), 0) self.assertEqual(len(df), 4) @@ -2393,7 +2395,9 @@ def test_to_dataframe_no_tqdm(self): row_iterator = self._make_one(_mock_client(), api_request, path, schema) with warnings.catch_warnings(record=True) as warned: - df = row_iterator.to_dataframe(progress_bar_type="tqdm") + df = row_iterator.to_dataframe( + progress_bar_type="tqdm", create_bqstorage_client=False, + ) self.assertEqual(len(warned), 1) for warning in warned: @@ -2428,7 +2432,9 @@ def test_to_dataframe_tqdm_error(self): row_iterator = self._make_one(_mock_client(), api_request, path, schema) with warnings.catch_warnings(record=True) as warned: - df = row_iterator.to_dataframe(progress_bar_type=progress_bar_type) + df = row_iterator.to_dataframe( + progress_bar_type=progress_bar_type, create_bqstorage_client=False, + ) self.assertEqual(len(df), 4) # all should be well @@ -2448,7 +2454,7 @@ def test_to_dataframe_w_empty_results(self): api_request = mock.Mock(return_value={"rows": []}) row_iterator = self._make_one(_mock_client(), api_request, schema=schema) - df = row_iterator.to_dataframe() + df = row_iterator.to_dataframe(create_bqstorage_client=False) self.assertIsInstance(df, pandas.DataFrame) self.assertEqual(len(df), 0) # verify the number of rows @@ -2506,7 +2512,7 @@ def test_to_dataframe_logs_tabledata_list(self): ) with mock.patch("google.cloud.bigquery.table._LOGGER", mock_logger): - row_iterator.to_dataframe() + row_iterator.to_dataframe(create_bqstorage_client=False) mock_logger.debug.assert_any_call( "Started reading table 'debug-proj.debug_dset.debug_tbl' with tabledata.list." @@ -2536,7 +2542,7 @@ def test_to_dataframe_w_various_types_nullable(self): api_request = mock.Mock(return_value={"rows": rows}) row_iterator = self._make_one(_mock_client(), api_request, path, schema) - df = row_iterator.to_dataframe() + df = row_iterator.to_dataframe(create_bqstorage_client=False) self.assertIsInstance(df, pandas.DataFrame) self.assertEqual(len(df), 4) # verify the number of rows @@ -2576,7 +2582,9 @@ def test_to_dataframe_column_dtypes(self): api_request = mock.Mock(return_value={"rows": rows}) row_iterator = self._make_one(_mock_client(), api_request, path, schema) - df = row_iterator.to_dataframe(dtypes={"km": "float16"}) + df = row_iterator.to_dataframe( + dtypes={"km": "float16"}, create_bqstorage_client=False, + ) self.assertIsInstance(df, pandas.DataFrame) self.assertEqual(len(df), 3) # verify the number of rows @@ -2685,21 +2693,19 @@ def test_to_dataframe_max_results_w_create_bqstorage_warning(self): @unittest.skipIf(pandas is None, "Requires `pandas`") @unittest.skipIf( - bigquery_storage_v1beta1 is None, "Requires `google-cloud-bigquery-storage`" + bigquery_storage_v1 is None, "Requires `google-cloud-bigquery-storage`" ) def test_to_dataframe_w_bqstorage_creates_client(self): from google.cloud.bigquery import schema from google.cloud.bigquery import table as mut mock_client = _mock_client() - bqstorage_client = mock.create_autospec( - bigquery_storage_v1beta1.BigQueryStorageClient - ) + bqstorage_client = mock.create_autospec(bigquery_storage_v1.BigQueryReadClient) bqstorage_client.transport = mock.create_autospec( - big_query_storage_grpc_transport.BigQueryStorageGrpcTransport + big_query_read_grpc_transport.BigQueryReadGrpcTransport ) mock_client._create_bqstorage_client.return_value = bqstorage_client - session = bigquery_storage_v1beta1.types.ReadSession() + session = bigquery_storage_v1.types.ReadSession() bqstorage_client.create_read_session.return_value = session row_iterator = mut.RowIterator( mock_client, @@ -2718,12 +2724,41 @@ def test_to_dataframe_w_bqstorage_creates_client(self): @unittest.skipIf(pandas is None, "Requires `pandas`") @unittest.skipIf( - bigquery_storage_v1beta1 is None, "Requires `google-cloud-bigquery-storage`" + bigquery_storage_v1 is None, "Requires `google-cloud-bigquery-storage`" ) def test_to_dataframe_w_bqstorage_no_streams(self): from google.cloud.bigquery import schema from google.cloud.bigquery import table as mut + bqstorage_client = mock.create_autospec(bigquery_storage_v1.BigQueryReadClient) + session = bigquery_storage_v1.types.ReadSession() + bqstorage_client.create_read_session.return_value = session + + row_iterator = mut.RowIterator( + _mock_client(), + api_request=None, + path=None, + schema=[ + schema.SchemaField("colA", "INTEGER"), + schema.SchemaField("colC", "FLOAT"), + schema.SchemaField("colB", "STRING"), + ], + table=mut.TableReference.from_string("proj.dset.tbl"), + ) + + got = row_iterator.to_dataframe(bqstorage_client) + column_names = ["colA", "colC", "colB"] + self.assertEqual(list(got), column_names) + self.assertTrue(got.empty) + + @unittest.skipIf(pandas is None, "Requires `pandas`") + @unittest.skipIf( + bigquery_storage_v1beta1 is None, "Requires `google-cloud-bigquery-storage`" + ) + def test_to_dataframe_w_bqstorage_v1beta1_no_streams(self): + from google.cloud.bigquery import schema + from google.cloud.bigquery import table as mut + bqstorage_client = mock.create_autospec( bigquery_storage_v1beta1.BigQueryStorageClient ) @@ -2748,17 +2783,15 @@ def test_to_dataframe_w_bqstorage_no_streams(self): self.assertTrue(got.empty) @unittest.skipIf( - bigquery_storage_v1beta1 is None, "Requires `google-cloud-bigquery-storage`" + bigquery_storage_v1 is None, "Requires `google-cloud-bigquery-storage`" ) @unittest.skipIf(pandas is None, "Requires `pandas`") @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_to_dataframe_w_bqstorage_logs_session(self): from google.cloud.bigquery.table import Table - bqstorage_client = mock.create_autospec( - bigquery_storage_v1beta1.BigQueryStorageClient - ) - session = bigquery_storage_v1beta1.types.ReadSession() + bqstorage_client = mock.create_autospec(bigquery_storage_v1.BigQueryReadClient) + session = bigquery_storage_v1.types.ReadSession() session.name = "projects/test-proj/locations/us/sessions/SOMESESSION" bqstorage_client.create_read_session.return_value = session mock_logger = mock.create_autospec(logging.Logger) @@ -2776,13 +2809,13 @@ def test_to_dataframe_w_bqstorage_logs_session(self): @unittest.skipIf(pandas is None, "Requires `pandas`") @unittest.skipIf( - bigquery_storage_v1beta1 is None, "Requires `google-cloud-bigquery-storage`" + bigquery_storage_v1 is None, "Requires `google-cloud-bigquery-storage`" ) @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_to_dataframe_w_bqstorage_empty_streams(self): from google.cloud.bigquery import schema from google.cloud.bigquery import table as mut - from google.cloud.bigquery_storage_v1beta1 import reader + from google.cloud.bigquery_storage_v1 import reader arrow_fields = [ pyarrow.field("colA", pyarrow.int64()), @@ -2792,10 +2825,8 @@ def test_to_dataframe_w_bqstorage_empty_streams(self): ] arrow_schema = pyarrow.schema(arrow_fields) - bqstorage_client = mock.create_autospec( - bigquery_storage_v1beta1.BigQueryStorageClient - ) - session = bigquery_storage_v1beta1.types.ReadSession( + bqstorage_client = mock.create_autospec(bigquery_storage_v1.BigQueryReadClient) + session = bigquery_storage_v1.types.ReadSession( streams=[{"name": "/projects/proj/dataset/dset/tables/tbl/streams/1234"}], arrow_schema={"serialized_schema": arrow_schema.serialize().to_pybytes()}, ) @@ -2833,10 +2864,90 @@ def test_to_dataframe_w_bqstorage_empty_streams(self): @unittest.skipIf(pandas is None, "Requires `pandas`") @unittest.skipIf( - bigquery_storage_v1beta1 is None, "Requires `google-cloud-bigquery-storage`" + bigquery_storage_v1 is None, "Requires `google-cloud-bigquery-storage`" ) @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_to_dataframe_w_bqstorage_nonempty(self): + from google.cloud.bigquery import schema + from google.cloud.bigquery import table as mut + from google.cloud.bigquery_storage_v1 import reader + + arrow_fields = [ + pyarrow.field("colA", pyarrow.int64()), + # Not alphabetical to test column order. + pyarrow.field("colC", pyarrow.float64()), + pyarrow.field("colB", pyarrow.utf8()), + ] + arrow_schema = pyarrow.schema(arrow_fields) + + bqstorage_client = mock.create_autospec(bigquery_storage_v1.BigQueryReadClient) + bqstorage_client.transport = mock.create_autospec( + big_query_read_grpc_transport.BigQueryReadGrpcTransport + ) + streams = [ + # Use two streams we want to check frames are read from each stream. + {"name": "/projects/proj/dataset/dset/tables/tbl/streams/1234"}, + {"name": "/projects/proj/dataset/dset/tables/tbl/streams/5678"}, + ] + session = bigquery_storage_v1.types.ReadSession( + streams=streams, + arrow_schema={"serialized_schema": arrow_schema.serialize().to_pybytes()}, + ) + bqstorage_client.create_read_session.return_value = session + + mock_rowstream = mock.create_autospec(reader.ReadRowsStream) + bqstorage_client.read_rows.return_value = mock_rowstream + + mock_rows = mock.create_autospec(reader.ReadRowsIterable) + mock_rowstream.rows.return_value = mock_rows + page_items = [ + pyarrow.array([1, -1]), + pyarrow.array([2.0, 4.0]), + pyarrow.array(["abc", "def"]), + ] + page_record_batch = pyarrow.RecordBatch.from_arrays( + page_items, schema=arrow_schema + ) + mock_page = mock.create_autospec(reader.ReadRowsPage) + mock_page.to_arrow.return_value = page_record_batch + mock_pages = (mock_page, mock_page, mock_page) + type(mock_rows).pages = mock.PropertyMock(return_value=mock_pages) + + schema = [ + schema.SchemaField("colA", "IGNORED"), + schema.SchemaField("colC", "IGNORED"), + schema.SchemaField("colB", "IGNORED"), + ] + + row_iterator = mut.RowIterator( + _mock_client(), + None, # api_request: ignored + None, # path: ignored + schema, + table=mut.TableReference.from_string("proj.dset.tbl"), + selected_fields=schema, + ) + + got = row_iterator.to_dataframe(bqstorage_client=bqstorage_client) + + # Are the columns in the expected order? + column_names = ["colA", "colC", "colB"] + self.assertEqual(list(got), column_names) + + # Have expected number of rows? + total_pages = len(streams) * len(mock_pages) + total_rows = len(page_items[0]) * total_pages + self.assertEqual(len(got.index), total_rows) + + # Don't close the client if it was passed in. + bqstorage_client.transport.channel.close.assert_not_called() + + @unittest.skipIf(pandas is None, "Requires `pandas`") + @unittest.skipIf( + bigquery_storage_v1beta1 is None, "Requires `google-cloud-bigquery-storage`" + ) + @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") + def test_to_dataframe_w_bqstorage_v1beta1_nonempty(self): from google.cloud.bigquery import schema from google.cloud.bigquery import table as mut from google.cloud.bigquery_storage_v1beta1 import reader @@ -2853,7 +2964,7 @@ def test_to_dataframe_w_bqstorage_nonempty(self): bigquery_storage_v1beta1.BigQueryStorageClient ) bqstorage_client.transport = mock.create_autospec( - big_query_storage_grpc_transport.BigQueryStorageGrpcTransport + big_query_storage_grpc_transport_v1beta1.BigQueryStorageGrpcTransport ) streams = [ # Use two streams we want to check frames are read from each stream. @@ -2899,7 +3010,17 @@ def test_to_dataframe_w_bqstorage_nonempty(self): selected_fields=schema, ) - got = row_iterator.to_dataframe(bqstorage_client=bqstorage_client) + with warnings.catch_warnings(record=True) as warned: + got = row_iterator.to_dataframe(bqstorage_client=bqstorage_client) + + # Was a deprecation warning emitted? + expected_warnings = [ + warning + for warning in warned + if issubclass(warning.category, DeprecationWarning) + and "v1beta1" in str(warning) + ] + self.assertEqual(len(expected_warnings), 1, "Deprecation warning not raised.") # Are the columns in the expected order? column_names = ["colA", "colC", "colB"] @@ -2915,13 +3036,13 @@ def test_to_dataframe_w_bqstorage_nonempty(self): @unittest.skipIf(pandas is None, "Requires `pandas`") @unittest.skipIf( - bigquery_storage_v1beta1 is None, "Requires `google-cloud-bigquery-storage`" + bigquery_storage_v1 is None, "Requires `google-cloud-bigquery-storage`" ) @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_to_dataframe_w_bqstorage_multiple_streams_return_unique_index(self): from google.cloud.bigquery import schema from google.cloud.bigquery import table as mut - from google.cloud.bigquery_storage_v1beta1 import reader + from google.cloud.bigquery_storage_v1 import reader arrow_fields = [pyarrow.field("colA", pyarrow.int64())] arrow_schema = pyarrow.schema(arrow_fields) @@ -2930,14 +3051,12 @@ def test_to_dataframe_w_bqstorage_multiple_streams_return_unique_index(self): {"name": "/projects/proj/dataset/dset/tables/tbl/streams/1234"}, {"name": "/projects/proj/dataset/dset/tables/tbl/streams/5678"}, ] - session = bigquery_storage_v1beta1.types.ReadSession( + session = bigquery_storage_v1.types.ReadSession( streams=streams, arrow_schema={"serialized_schema": arrow_schema.serialize().to_pybytes()}, ) - bqstorage_client = mock.create_autospec( - bigquery_storage_v1beta1.BigQueryStorageClient - ) + bqstorage_client = mock.create_autospec(bigquery_storage_v1.BigQueryReadClient) bqstorage_client.create_read_session.return_value = session mock_rowstream = mock.create_autospec(reader.ReadRowsStream) @@ -2971,7 +3090,7 @@ def test_to_dataframe_w_bqstorage_multiple_streams_return_unique_index(self): @unittest.skipIf(pandas is None, "Requires `pandas`") @unittest.skipIf( - bigquery_storage_v1beta1 is None, "Requires `google-cloud-bigquery-storage`" + bigquery_storage_v1 is None, "Requires `google-cloud-bigquery-storage`" ) @unittest.skipIf(tqdm is None, "Requires `tqdm`") @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") @@ -2979,7 +3098,7 @@ def test_to_dataframe_w_bqstorage_multiple_streams_return_unique_index(self): def test_to_dataframe_w_bqstorage_updates_progress_bar(self, tqdm_mock): from google.cloud.bigquery import schema from google.cloud.bigquery import table as mut - from google.cloud.bigquery_storage_v1beta1 import reader + from google.cloud.bigquery_storage_v1 import reader # Speed up testing. mut._PROGRESS_INTERVAL = 0.01 @@ -2987,16 +3106,14 @@ def test_to_dataframe_w_bqstorage_updates_progress_bar(self, tqdm_mock): arrow_fields = [pyarrow.field("testcol", pyarrow.int64())] arrow_schema = pyarrow.schema(arrow_fields) - bqstorage_client = mock.create_autospec( - bigquery_storage_v1beta1.BigQueryStorageClient - ) + bqstorage_client = mock.create_autospec(bigquery_storage_v1.BigQueryReadClient) streams = [ # Use two streams we want to check that progress bar updates are # sent from each stream. {"name": "/projects/proj/dataset/dset/tables/tbl/streams/1234"}, {"name": "/projects/proj/dataset/dset/tables/tbl/streams/5678"}, ] - session = bigquery_storage_v1beta1.types.ReadSession( + session = bigquery_storage_v1.types.ReadSession( streams=streams, arrow_schema={"serialized_schema": arrow_schema.serialize().to_pybytes()}, ) @@ -3052,13 +3169,13 @@ def blocking_to_arrow(*args, **kwargs): @unittest.skipIf(pandas is None, "Requires `pandas`") @unittest.skipIf( - bigquery_storage_v1beta1 is None, "Requires `google-cloud-bigquery-storage`" + bigquery_storage_v1 is None, "Requires `google-cloud-bigquery-storage`" ) @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_to_dataframe_w_bqstorage_exits_on_keyboardinterrupt(self): from google.cloud.bigquery import schema from google.cloud.bigquery import table as mut - from google.cloud.bigquery_storage_v1beta1 import reader + from google.cloud.bigquery_storage_v1 import reader # Speed up testing. mut._PROGRESS_INTERVAL = 0.01 @@ -3071,16 +3188,15 @@ def test_to_dataframe_w_bqstorage_exits_on_keyboardinterrupt(self): ] arrow_schema = pyarrow.schema(arrow_fields) - bqstorage_client = mock.create_autospec( - bigquery_storage_v1beta1.BigQueryStorageClient - ) - session = bigquery_storage_v1beta1.types.ReadSession( + bqstorage_client = mock.create_autospec(bigquery_storage_v1.BigQueryReadClient) + session = bigquery_storage_v1.types.ReadSession( streams=[ - # Use two streams because one will fail with a - # KeyboardInterrupt, and we want to check that the other stream + # Use multiple streams because one will fail with a + # KeyboardInterrupt, and we want to check that the other streams # ends early. {"name": "/projects/proj/dataset/dset/tables/tbl/streams/1234"}, {"name": "/projects/proj/dataset/dset/tables/tbl/streams/5678"}, + {"name": "/projects/proj/dataset/dset/tables/tbl/streams/9999"}, ], arrow_schema={"serialized_schema": arrow_schema.serialize().to_pybytes()}, ) @@ -3112,6 +3228,7 @@ def blocking_to_arrow(*args, **kwargs): mock_cancelled_rowstream.rows.return_value = mock_cancelled_rows bqstorage_client.read_rows.side_effect = ( + mock_rowstream, mock_cancelled_rowstream, mock_rowstream, ) @@ -3140,15 +3257,13 @@ def blocking_to_arrow(*args, **kwargs): @unittest.skipIf(pandas is None, "Requires `pandas`") @unittest.skipIf( - bigquery_storage_v1beta1 is None, "Requires `google-cloud-bigquery-storage`" + bigquery_storage_v1 is None, "Requires `google-cloud-bigquery-storage`" ) def test_to_dataframe_w_bqstorage_fallback_to_tabledata_list(self): from google.cloud.bigquery import schema from google.cloud.bigquery import table as mut - bqstorage_client = mock.create_autospec( - bigquery_storage_v1beta1.BigQueryStorageClient - ) + bqstorage_client = mock.create_autospec(bigquery_storage_v1.BigQueryReadClient) bqstorage_client.create_read_session.side_effect = google.api_core.exceptions.InternalServerError( "can't read with bqstorage_client" ) @@ -3201,7 +3316,9 @@ def test_to_dataframe_tabledata_list_w_multiple_pages_return_unique_index(self): table=mut.Table("proj.dset.tbl"), ) - df = row_iterator.to_dataframe(bqstorage_client=None) + df = row_iterator.to_dataframe( + bqstorage_client=None, create_bqstorage_client=False, + ) self.assertIsInstance(df, pandas.DataFrame) self.assertEqual(len(df), 2) @@ -3211,14 +3328,12 @@ def test_to_dataframe_tabledata_list_w_multiple_pages_return_unique_index(self): @unittest.skipIf(pandas is None, "Requires `pandas`") @unittest.skipIf( - bigquery_storage_v1beta1 is None, "Requires `google-cloud-bigquery-storage`" + bigquery_storage_v1 is None, "Requires `google-cloud-bigquery-storage`" ) def test_to_dataframe_w_bqstorage_raises_auth_error(self): from google.cloud.bigquery import table as mut - bqstorage_client = mock.create_autospec( - bigquery_storage_v1beta1.BigQueryStorageClient - ) + bqstorage_client = mock.create_autospec(bigquery_storage_v1.BigQueryReadClient) bqstorage_client.create_read_session.side_effect = google.api_core.exceptions.Forbidden( "TEST BigQuery Storage API not enabled. TEST" ) @@ -3231,38 +3346,14 @@ def test_to_dataframe_w_bqstorage_raises_auth_error(self): with pytest.raises(google.api_core.exceptions.Forbidden): row_iterator.to_dataframe(bqstorage_client=bqstorage_client) - @unittest.skipIf(pandas is None, "Requires `pandas`") - @unittest.skipIf( - bigquery_storage_v1beta1 is None, "Requires `google-cloud-bigquery-storage`" - ) - def test_to_dataframe_w_bqstorage_raises_import_error(self): - from google.cloud.bigquery import table as mut - - bqstorage_client = mock.create_autospec( - bigquery_storage_v1beta1.BigQueryStorageClient - ) - path = "/foo" - api_request = mock.Mock(return_value={"rows": []}) - row_iterator = mut.RowIterator( - _mock_client(), api_request, path, [], table=mut.Table("proj.dset.tbl") - ) - - with mock.patch.object(mut, "bigquery_storage_v1beta1", None), pytest.raises( - ValueError - ) as exc_context: - row_iterator.to_dataframe(bqstorage_client=bqstorage_client) - assert mut._NO_BQSTORAGE_ERROR in str(exc_context.value) - @unittest.skipIf( - bigquery_storage_v1beta1 is None, "Requires `google-cloud-bigquery-storage`" + bigquery_storage_v1 is None, "Requires `google-cloud-bigquery-storage`" ) def test_to_dataframe_w_bqstorage_partition(self): from google.cloud.bigquery import schema from google.cloud.bigquery import table as mut - bqstorage_client = mock.create_autospec( - bigquery_storage_v1beta1.BigQueryStorageClient - ) + bqstorage_client = mock.create_autospec(bigquery_storage_v1.BigQueryReadClient) row_iterator = mut.RowIterator( _mock_client(), @@ -3276,15 +3367,13 @@ def test_to_dataframe_w_bqstorage_partition(self): row_iterator.to_dataframe(bqstorage_client) @unittest.skipIf( - bigquery_storage_v1beta1 is None, "Requires `google-cloud-bigquery-storage`" + bigquery_storage_v1 is None, "Requires `google-cloud-bigquery-storage`" ) def test_to_dataframe_w_bqstorage_snapshot(self): from google.cloud.bigquery import schema from google.cloud.bigquery import table as mut - bqstorage_client = mock.create_autospec( - bigquery_storage_v1beta1.BigQueryStorageClient - ) + bqstorage_client = mock.create_autospec(bigquery_storage_v1.BigQueryReadClient) row_iterator = mut.RowIterator( _mock_client(), @@ -3299,13 +3388,13 @@ def test_to_dataframe_w_bqstorage_snapshot(self): @unittest.skipIf(pandas is None, "Requires `pandas`") @unittest.skipIf( - bigquery_storage_v1beta1 is None, "Requires `google-cloud-bigquery-storage`" + bigquery_storage_v1 is None, "Requires `google-cloud-bigquery-storage`" ) @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_to_dataframe_concat_categorical_dtype_w_pyarrow(self): from google.cloud.bigquery import schema from google.cloud.bigquery import table as mut - from google.cloud.bigquery_storage_v1beta1 import reader + from google.cloud.bigquery_storage_v1 import reader arrow_fields = [ # Not alphabetical to test column order. @@ -3318,13 +3407,11 @@ def test_to_dataframe_concat_categorical_dtype_w_pyarrow(self): arrow_schema = pyarrow.schema(arrow_fields) # create a mock BQ storage client - bqstorage_client = mock.create_autospec( - bigquery_storage_v1beta1.BigQueryStorageClient - ) + bqstorage_client = mock.create_autospec(bigquery_storage_v1.BigQueryReadClient) bqstorage_client.transport = mock.create_autospec( - big_query_storage_grpc_transport.BigQueryStorageGrpcTransport + big_query_read_grpc_transport.BigQueryReadGrpcTransport ) - session = bigquery_storage_v1beta1.types.ReadSession( + session = bigquery_storage_v1.types.ReadSession( streams=[{"name": "/projects/proj/dataset/dset/tables/tbl/streams/1234"}], arrow_schema={"serialized_schema": arrow_schema.serialize().to_pybytes()}, ) @@ -3761,10 +3848,32 @@ def test_set_expiration_w_none(self): assert time_partitioning._properties["expirationMs"] is None +@pytest.mark.skipif( + bigquery_storage_v1 is None, reason="Requires `google-cloud-bigquery-storage`" +) +@pytest.mark.parametrize( + "table_path", + ( + "my-project.my_dataset.my_table", + "my-project.my_dataset.my_table$20181225", + "my-project.my_dataset.my_table@1234567890", + "my-project.my_dataset.my_table$20181225@1234567890", + ), +) +def test_table_reference_to_bqstorage_v1_stable(table_path): + from google.cloud.bigquery import table as mut + + expected = "projects/my-project/datasets/my_dataset/tables/my_table" + + for klass in (mut.TableReference, mut.Table, mut.TableListItem): + got = klass.from_string(table_path).to_bqstorage() + assert got == expected + + @pytest.mark.skipif( bigquery_storage_v1beta1 is None, reason="Requires `google-cloud-bigquery-storage`" ) -def test_table_reference_to_bqstorage(): +def test_table_reference_to_bqstorage_v1beta1(): from google.cloud.bigquery import table as mut # Can't use parametrized pytest because bigquery_storage_v1beta1 may not be @@ -3782,14 +3891,14 @@ def test_table_reference_to_bqstorage(): classes = (mut.TableReference, mut.Table, mut.TableListItem) for case, cls in itertools.product(cases, classes): - got = cls.from_string(case).to_bqstorage() + got = cls.from_string(case).to_bqstorage(v1beta1=True) assert got == expected @unittest.skipIf( bigquery_storage_v1beta1 is None, "Requires `google-cloud-bigquery-storage`" ) -def test_table_reference_to_bqstorage_raises_import_error(): +def test_table_reference_to_bqstorage_v1beta1_raises_import_error(): from google.cloud.bigquery import table as mut classes = (mut.TableReference, mut.Table, mut.TableListItem) @@ -3797,5 +3906,5 @@ def test_table_reference_to_bqstorage_raises_import_error(): with mock.patch.object(mut, "bigquery_storage_v1beta1", None), pytest.raises( ValueError ) as exc_context: - cls.from_string("my-project.my_dataset.my_table").to_bqstorage() + cls.from_string("my-project.my_dataset.my_table").to_bqstorage(v1beta1=True) assert mut._NO_BQSTORAGE_ERROR in str(exc_context.value) From e4546e33e8230ae36b1eea2da265a978c8d842a4 Mon Sep 17 00:00:00 2001 From: shollyman Date: Wed, 10 Jun 2020 23:21:03 -0700 Subject: [PATCH 0781/2016] doc: update wording in RowIterator docstrings to reduce confusion (#127) Goal: Talking about partition filtration can be problematic, so refer to Storage API capabilities more obliquely. Particularly, partition filtration is possible via a query or a direct storage API read, so don't use as severe of language when describing helper mechanisms such as to_dataframe() on row iterators. Fixes: https://github.com/googleapis/python-bigquery-storage/issues/22 Co-authored-by: Peter Lamut --- .../google/cloud/bigquery/table.py | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py index 3022ddbd5f2c..b39d28a205d9 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py @@ -1515,8 +1515,9 @@ def to_arrow( This method requires the ``pyarrow`` and ``google-cloud-bigquery-storage`` libraries. - Reading from a specific partition or snapshot is not - currently supported by this method. + This method only exposes a subset of the capabilities of the + BigQuery Storage API. For full access to all features + (projections, filters, snapshots) use the Storage API directly. create_bqstorage_client (bool): Optional. If ``True`` (default), create a BigQuery Storage API client using the default API settings. The BigQuery Storage API @@ -1598,8 +1599,9 @@ def to_dataframe_iterable(self, bqstorage_client=None, dtypes=None): This method requires the ``pyarrow`` and ``google-cloud-bigquery-storage`` libraries. - Reading from a specific partition or snapshot is not - currently supported by this method. + This method only exposes a subset of the capabilities of the + BigQuery Storage API. For full access to all features + (projections, filters, snapshots) use the Storage API directly. **Caution**: There is a known issue reading small anonymous query result tables with the BQ Storage API. When a problem @@ -1666,8 +1668,9 @@ def to_dataframe( This method requires the ``pyarrow`` and ``google-cloud-bigquery-storage`` libraries. - Reading from a specific partition or snapshot is not - currently supported by this method. + This method only exposes a subset of the capabilities of the + BigQuery Storage API. For full access to all features + (projections, filters, snapshots) use the Storage API directly. **Caution**: There is a known issue reading small anonymous query result tables with the BQ Storage API. When a problem From 0287b9612517f1e523c6ad1ef07bff3fdefea4c4 Mon Sep 17 00:00:00 2001 From: HemangChothani <50404902+HemangChothani@users.noreply.github.com> Date: Mon, 15 Jun 2020 14:10:27 +0530 Subject: [PATCH 0782/2016] docs(bigquery): add client thread-safety documentation (#132) * docs(bigquery): add client thread-safety documentation * docs(bigquery): nit * docs(bigquery): nit --- packages/google-cloud-bigquery/docs/index.rst | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/packages/google-cloud-bigquery/docs/index.rst b/packages/google-cloud-bigquery/docs/index.rst index ec360de69770..62a82e0e95c5 100644 --- a/packages/google-cloud-bigquery/docs/index.rst +++ b/packages/google-cloud-bigquery/docs/index.rst @@ -1,5 +1,14 @@ .. include:: README.rst +.. note:: + + Because the BigQuery client uses the third-party :mod:`requests` library + by default and the BigQuery-Storage client uses :mod:`grpcio` library, + both are safe to share instances across threads. In multiprocessing + scenarios, the best practice is to create client instances *after* + :class:`multiprocessing.Pool` or :class:`multiprocessing.Process` invokes + :func:`os.fork`. + More Examples ~~~~~~~~~~~~~ From c1a62e2e64656584f5026489f93816fdf2e1cec0 Mon Sep 17 00:00:00 2001 From: Peter Lamut Date: Mon, 15 Jun 2020 12:01:03 +0200 Subject: [PATCH 0783/2016] chore: remove workarounds for BQ Storage issue with small result sets (#133) * chore: remove workarounds for BQ Storage issue with small result sets * Fix two typos in docstrings --- .../google/cloud/bigquery/dbapi/connection.py | 24 +-- .../google/cloud/bigquery/dbapi/cursor.py | 23 +-- .../google/cloud/bigquery/job.py | 3 - .../google/cloud/bigquery/table.py | 44 +---- .../google-cloud-bigquery/tests/system.py | 162 ++++-------------- .../tests/unit/test_dbapi_cursor.py | 56 ------ .../tests/unit/test_job.py | 10 +- .../tests/unit/test_table.py | 57 ------ 8 files changed, 54 insertions(+), 325 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/dbapi/connection.py b/packages/google-cloud-bigquery/google/cloud/bigquery/dbapi/connection.py index 23e966486ea3..464b0fd066bf 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/dbapi/connection.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/dbapi/connection.py @@ -34,16 +34,10 @@ class Connection(object): ): A client that uses the faster BigQuery Storage API to fetch rows from BigQuery. If not passed, it is created using the same credentials - as ``client``. + as ``client`` (provided that BigQuery Storage dependencies are installed). - When fetching query results, ``bqstorage_client`` is used first, with - a fallback on ``client``, if necessary. - - .. note:: - There is a known issue with the BigQuery Storage API with small - anonymous result sets, which results in such fallback. - - https://github.com/googleapis/python-bigquery-storage/issues/2 + If both clients are available, ``bqstorage_client`` is used for + fetching query results. """ def __init__(self, client=None, bqstorage_client=None): @@ -110,16 +104,10 @@ def connect(client=None, bqstorage_client=None): ): A client that uses the faster BigQuery Storage API to fetch rows from BigQuery. If not passed, it is created using the same credentials - as ``client``. - - When fetching query results, ``bqstorage_client`` is used first, with - a fallback on ``client``, if necessary. - - .. note:: - There is a known issue with the BigQuery Storage API with small - anonymous result sets, which results in such fallback. + as ``client`` (provided that BigQuery Storage dependencies are installed). - https://github.com/googleapis/python-bigquery-storage/issues/2 + If both clients are available, ``bqstorage_client`` is used for + fetching query results. Returns: google.cloud.bigquery.dbapi.Connection: A new DB-API connection to BigQuery. diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/dbapi/cursor.py b/packages/google-cloud-bigquery/google/cloud/bigquery/dbapi/cursor.py index c72116d07178..40de336bd7cd 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/dbapi/cursor.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/dbapi/cursor.py @@ -224,26 +224,9 @@ def _try_fetch(self, size=None): bqstorage_client = self.connection._bqstorage_client if bqstorage_client is not None: - try: - rows_iterable = self._bqstorage_fetch(bqstorage_client) - self._query_data = _helpers.to_bq_table_rows(rows_iterable) - return - except google.api_core.exceptions.GoogleAPICallError as exc: - # NOTE: Forbidden is a subclass of GoogleAPICallError - if isinstance(exc, google.api_core.exceptions.Forbidden): - # Don't hide errors such as insufficient permissions to create - # a read session, or the API is not enabled. Both of those are - # clearly problems if the developer has explicitly asked for - # BigQuery Storage API support. - raise - - # There is an issue with reading from small anonymous - # query results tables. If such an error occurs, we silence - # it in order to try again with the tabledata.list API. - _LOGGER.debug( - "Error fetching data with BigQuery Storage API, " - "falling back to tabledata.list API." - ) + rows_iterable = self._bqstorage_fetch(bqstorage_client) + self._query_data = _helpers.to_bq_table_rows(rows_iterable) + return rows_iter = client.list_rows( self._query_job.destination, diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/job.py b/packages/google-cloud-bigquery/google/cloud/bigquery/job.py index 0040d585ae9a..b0d2e751731a 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/job.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/job.py @@ -3335,9 +3335,6 @@ def to_dataframe( Reading from a specific partition or snapshot is not currently supported by this method. - **Caution**: There is a known issue reading small anonymous - query result tables with the BQ Storage API. Write your query - results to a destination table to work around this issue. dtypes (Map[str, Union[str, pandas.Series.dtype]]): Optional. A dictionary of column names pandas ``dtype``s. The provided ``dtype`` is used when constructing the series for diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py index b39d28a205d9..5b13cc52a026 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py @@ -1431,30 +1431,10 @@ def _to_page_iterable( self, bqstorage_download, tabledata_list_download, bqstorage_client=None ): if bqstorage_client is not None: - try: - # Iterate over the stream so that read errors are raised (and - # the method can then fallback to tabledata.list). - for item in bqstorage_download(): - yield item - return - except google.api_core.exceptions.Forbidden: - # Don't hide errors such as insufficient permissions to create - # a read session, or the API is not enabled. Both of those are - # clearly problems if the developer has explicitly asked for - # BigQuery Storage API support. - raise - except google.api_core.exceptions.GoogleAPICallError: - # There is a known issue with reading from small anonymous - # query results tables, so some errors are expected. Rather - # than throw those errors, try reading the DataFrame again, but - # with the tabledata.list API. - pass - - _LOGGER.debug( - "Started reading table '{}.{}.{}' with tabledata.list.".format( - self._table.project, self._table.dataset_id, self._table.table_id - ) - ) + for item in bqstorage_download(): + yield item + return + for item in tabledata_list_download(): yield item @@ -1599,14 +1579,10 @@ def to_dataframe_iterable(self, bqstorage_client=None, dtypes=None): This method requires the ``pyarrow`` and ``google-cloud-bigquery-storage`` libraries. - This method only exposes a subset of the capabilities of the - BigQuery Storage API. For full access to all features + This method only exposes a subset of the capabilities of the + BigQuery Storage API. For full access to all features (projections, filters, snapshots) use the Storage API directly. - **Caution**: There is a known issue reading small anonymous - query result tables with the BQ Storage API. When a problem - is encountered reading a table, the tabledata.list method - from the BigQuery API is used, instead. dtypes (Map[str, Union[str, pandas.Series.dtype]]): Optional. A dictionary of column names pandas ``dtype``s. The provided ``dtype`` is used when constructing the series for @@ -1668,14 +1644,10 @@ def to_dataframe( This method requires the ``pyarrow`` and ``google-cloud-bigquery-storage`` libraries. - This method only exposes a subset of the capabilities of the - BigQuery Storage API. For full access to all features + This method only exposes a subset of the capabilities of the + BigQuery Storage API. For full access to all features (projections, filters, snapshots) use the Storage API directly. - **Caution**: There is a known issue reading small anonymous - query result tables with the BQ Storage API. When a problem - is encountered reading a table, the tabledata.list method - from the BigQuery API is used, instead. dtypes (Map[str, Union[str, pandas.Series.dtype]]): Optional. A dictionary of column names pandas ``dtype``s. The provided ``dtype`` is used when constructing the series for diff --git a/packages/google-cloud-bigquery/tests/system.py b/packages/google-cloud-bigquery/tests/system.py index 3b874300f3e9..965c34331f2b 100644 --- a/packages/google-cloud-bigquery/tests/system.py +++ b/packages/google-cloud-bigquery/tests/system.py @@ -1690,50 +1690,6 @@ def test_dbapi_fetchall(self): row_tuples = [r.values() for r in rows] self.assertEqual(row_tuples, [(1, 2), (3, 4), (5, 6)]) - @unittest.skipIf( - bigquery_storage_v1 is None, "Requires `google-cloud-bigquery-storage`" - ) - def test_dbapi_fetch_w_bqstorage_client_small_result_set(self): - bqstorage_client = bigquery_storage_v1.BigQueryReadClient( - credentials=Config.CLIENT._credentials - ) - cursor = dbapi.connect(Config.CLIENT, bqstorage_client).cursor() - - # Reading small result sets causes an issue with BQ storage client, - # and the DB API should transparently fall back to the default client. - cursor.execute( - """ - SELECT id, `by`, time_ts - FROM `bigquery-public-data.hacker_news.comments` - ORDER BY `id` ASC - LIMIT 10 - """ - ) - - result_rows = [cursor.fetchone(), cursor.fetchone(), cursor.fetchone()] - - field_name = operator.itemgetter(0) - fetched_data = [sorted(row.items(), key=field_name) for row in result_rows] - - expected_data = [ - [ - ("by", "sama"), - ("id", 15), - ("time_ts", datetime.datetime(2006, 10, 9, 19, 51, 1, tzinfo=UTC)), - ], - [ - ("by", "pg"), - ("id", 17), - ("time_ts", datetime.datetime(2006, 10, 9, 19, 52, 45, tzinfo=UTC)), - ], - [ - ("by", "pg"), - ("id", 22), - ("time_ts", datetime.datetime(2006, 10, 10, 2, 18, 22, tzinfo=UTC)), - ], - ] - self.assertEqual(fetched_data, expected_data) - @unittest.skipIf( bigquery_storage_v1 is None, "Requires `google-cloud-bigquery-storage`" ) @@ -1744,10 +1700,6 @@ def test_dbapi_fetch_w_bqstorage_client_large_result_set(self): ) cursor = dbapi.connect(Config.CLIENT, bqstorage_client).cursor() - # Pick a large enough LIMIT value to assure that the fallback to the - # default client is not needed due to the result set being too small - # (a known issue that causes problems when reading such result sets with - # BQ storage client). cursor.execute( """ SELECT id, `by`, time_ts @@ -1794,10 +1746,6 @@ def test_dbapi_fetch_w_bqstorage_client_v1beta1_large_result_set(self): ) cursor = dbapi.connect(Config.CLIENT, bqstorage_client).cursor() - # Pick a large enouhg LIMIT value to assure that the fallback to the - # default client is not needed due to the result set being too small - # (a known issue that causes problems when reding such result sets with - # BQ storage client). cursor.execute( """ SELECT id, `by`, time_ts @@ -1845,10 +1793,6 @@ def test_dbapi_connection_does_not_leak_sockets(self): connection = dbapi.connect() cursor = connection.cursor() - # Pick a large enough LIMIT value to assure that the fallback to the - # default client is not needed due to the result set being too small - # (a known issue that causes problems when reding such result sets with - # BQ storage client). cursor.execute( """ SELECT id, `by`, time_ts @@ -2272,9 +2216,6 @@ def test_query_results_to_dataframe(self): bigquery_storage_v1 is None, "Requires `google-cloud-bigquery-storage`" ) def test_query_results_to_dataframe_w_bqstorage(self): - dest_dataset = self.temp_dataset(_make_dataset_id("bqstorage_to_dataframe_")) - dest_ref = dest_dataset.table("query_results") - query = """ SELECT id, author, time_ts, dead FROM `bigquery-public-data.hacker_news.comments` @@ -2285,50 +2226,29 @@ def test_query_results_to_dataframe_w_bqstorage(self): credentials=Config.CLIENT._credentials ) - job_configs = ( - # There is a known issue reading small anonymous query result - # tables with the BQ Storage API. Writing to a destination - # table works around this issue. - bigquery.QueryJobConfig( - destination=dest_ref, write_disposition="WRITE_TRUNCATE" - ), - # Check that the client is able to work around the issue with - # reading small anonymous query result tables by falling back to - # the tabledata.list API. - None, - ) - - for job_config in job_configs: - df = ( - Config.CLIENT.query(query, job_config=job_config) - .result() - .to_dataframe(bqstorage_client) - ) + df = Config.CLIENT.query(query).result().to_dataframe(bqstorage_client) - self.assertIsInstance(df, pandas.DataFrame) - self.assertEqual(len(df), 10) # verify the number of rows - column_names = ["id", "author", "time_ts", "dead"] - self.assertEqual(list(df), column_names) - exp_datatypes = { - "id": int, - "author": six.text_type, - "time_ts": pandas.Timestamp, - "dead": bool, - } - for index, row in df.iterrows(): - for col in column_names: - # all the schema fields are nullable, so None is acceptable - if not row[col] is None: - self.assertIsInstance(row[col], exp_datatypes[col]) + self.assertIsInstance(df, pandas.DataFrame) + self.assertEqual(len(df), 10) # verify the number of rows + column_names = ["id", "author", "time_ts", "dead"] + self.assertEqual(list(df), column_names) + exp_datatypes = { + "id": int, + "author": six.text_type, + "time_ts": pandas.Timestamp, + "dead": bool, + } + for index, row in df.iterrows(): + for col in column_names: + # all the schema fields are nullable, so None is acceptable + if not row[col] is None: + self.assertIsInstance(row[col], exp_datatypes[col]) @unittest.skipIf(pandas is None, "Requires `pandas`") @unittest.skipIf( bigquery_storage_v1beta1 is None, "Requires `google-cloud-bigquery-storage`" ) def test_query_results_to_dataframe_w_bqstorage_v1beta1(self): - dest_dataset = self.temp_dataset(_make_dataset_id("bqstorage_to_dataframe_")) - dest_ref = dest_dataset.table("query_results") - query = """ SELECT id, author, time_ts, dead FROM `bigquery-public-data.hacker_news.comments` @@ -2339,41 +2259,23 @@ def test_query_results_to_dataframe_w_bqstorage_v1beta1(self): credentials=Config.CLIENT._credentials ) - job_configs = ( - # There is a known issue reading small anonymous query result - # tables with the BQ Storage API. Writing to a destination - # table works around this issue. - bigquery.QueryJobConfig( - destination=dest_ref, write_disposition="WRITE_TRUNCATE" - ), - # Check that the client is able to work around the issue with - # reading small anonymous query result tables by falling back to - # the tabledata.list API. - None, - ) - - for job_config in job_configs: - df = ( - Config.CLIENT.query(query, job_config=job_config) - .result() - .to_dataframe(bqstorage_client) - ) + df = Config.CLIENT.query(query).result().to_dataframe(bqstorage_client) - self.assertIsInstance(df, pandas.DataFrame) - self.assertEqual(len(df), 10) # verify the number of rows - column_names = ["id", "author", "time_ts", "dead"] - self.assertEqual(list(df), column_names) - exp_datatypes = { - "id": int, - "author": six.text_type, - "time_ts": pandas.Timestamp, - "dead": bool, - } - for index, row in df.iterrows(): - for col in column_names: - # all the schema fields are nullable, so None is acceptable - if not row[col] is None: - self.assertIsInstance(row[col], exp_datatypes[col]) + self.assertIsInstance(df, pandas.DataFrame) + self.assertEqual(len(df), 10) # verify the number of rows + column_names = ["id", "author", "time_ts", "dead"] + self.assertEqual(list(df), column_names) + exp_datatypes = { + "id": int, + "author": six.text_type, + "time_ts": pandas.Timestamp, + "dead": bool, + } + for index, row in df.iterrows(): + for col in column_names: + # all the schema fields are nullable, so None is acceptable + if not row[col] is None: + self.assertIsInstance(row[col], exp_datatypes[col]) @unittest.skipIf(pandas is None, "Requires `pandas`") def test_insert_rows_from_dataframe(self): diff --git a/packages/google-cloud-bigquery/tests/unit/test_dbapi_cursor.py b/packages/google-cloud-bigquery/tests/unit/test_dbapi_cursor.py index caec4b1bd38b..129ce28ad22e 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_dbapi_cursor.py +++ b/packages/google-cloud-bigquery/tests/unit/test_dbapi_cursor.py @@ -434,62 +434,6 @@ def test_fetchall_w_bqstorage_client_fetch_error_no_fallback(self): # the default client was not used mock_client.list_rows.assert_not_called() - @unittest.skipIf( - bigquery_storage_v1 is None, "Requires `google-cloud-bigquery-storage`" - ) - def test_fetchall_w_bqstorage_client_fetch_error_fallback_on_client(self): - from google.cloud.bigquery import dbapi - from google.cloud.bigquery import table - - # use unordered data to also test any non-determenistic key order in dicts - row_data = [ - table.Row([1.4, 1.1, 1.3, 1.2], {"bar": 3, "baz": 2, "foo": 1, "quux": 0}), - table.Row([2.4, 2.1, 2.3, 2.2], {"bar": 3, "baz": 2, "foo": 1, "quux": 0}), - ] - bqstorage_streamed_rows = [ - {"bar": 1.2, "foo": 1.1, "quux": 1.4, "baz": 1.3}, - {"bar": 2.2, "foo": 2.1, "quux": 2.4, "baz": 2.3}, - ] - - mock_client = self._mock_client(rows=row_data) - mock_bqstorage_client = self._mock_bqstorage_client( - stream_count=1, rows=bqstorage_streamed_rows, - ) - request_error = exceptions.BadRequest("BQ storage what??") - mock_bqstorage_client.create_read_session.side_effect = request_error - - connection = dbapi.connect( - client=mock_client, bqstorage_client=mock_bqstorage_client, - ) - cursor = connection.cursor() - cursor.execute("SELECT foo, bar FROM some_table") - - logger_patcher = mock.patch("google.cloud.bigquery.dbapi.cursor._LOGGER") - with logger_patcher as mock_logger: - rows = cursor.fetchall() - - # both client were used - mock_bqstorage_client.create_read_session.assert_called() - mock_client.list_rows.assert_called() - - # fallback to default API should have been logged - relevant_calls = [ - call - for call in mock_logger.debug.call_args_list - if call.args and "tabledata.list API" in call.args[0] - ] - self.assertTrue(relevant_calls) - - # check the data returned - field_value = op.itemgetter(1) - sorted_row_data = [sorted(row.items(), key=field_value) for row in rows] - expected_row_data = [ - [("foo", 1.1), ("bar", 1.2), ("baz", 1.3), ("quux", 1.4)], - [("foo", 2.1), ("bar", 2.2), ("baz", 2.3), ("quux", 2.4)], - ] - - self.assertEqual(sorted_row_data, expected_row_data) - def test_execute_custom_job_id(self): from google.cloud.bigquery.dbapi import connect diff --git a/packages/google-cloud-bigquery/tests/unit/test_job.py b/packages/google-cloud-bigquery/tests/unit/test_job.py index 23991b9ec931..9eec9fda31ef 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_job.py +++ b/packages/google-cloud-bigquery/tests/unit/test_job.py @@ -5344,7 +5344,7 @@ def test_to_arrow(self): client = _make_client(project=self.PROJECT, connection=connection) job = self._make_one(self.JOB_ID, self.QUERY, client) - tbl = job.to_arrow() + tbl = job.to_arrow(create_bqstorage_client=False) self.assertIsInstance(tbl, pyarrow.Table) self.assertEqual(tbl.num_rows, 2) @@ -5412,7 +5412,7 @@ def test_to_dataframe(self): client = _make_client(project=self.PROJECT, connection=connection) job = self._make_one(self.JOB_ID, self.QUERY, client) - df = job.to_dataframe() + df = job.to_dataframe(create_bqstorage_client=False) self.assertIsInstance(df, pandas.DataFrame) self.assertEqual(len(df), 4) # verify the number of rows @@ -5518,7 +5518,7 @@ def test_to_dataframe_column_dtypes(self): client = _make_client(project=self.PROJECT, connection=connection) job = self._make_one(self.JOB_ID, self.QUERY, client) - df = job.to_dataframe(dtypes={"km": "float16"}) + df = job.to_dataframe(dtypes={"km": "float16"}, create_bqstorage_client=False) self.assertIsInstance(df, pandas.DataFrame) self.assertEqual(len(df), 3) # verify the number of rows @@ -5558,10 +5558,10 @@ def test_to_dataframe_with_progress_bar(self, tqdm_mock): client = _make_client(project=self.PROJECT, connection=connection) job = self._make_one(self.JOB_ID, self.QUERY, client) - job.to_dataframe(progress_bar_type=None) + job.to_dataframe(progress_bar_type=None, create_bqstorage_client=False) tqdm_mock.assert_not_called() - job.to_dataframe(progress_bar_type="tqdm") + job.to_dataframe(progress_bar_type="tqdm", create_bqstorage_client=False) tqdm_mock.assert_called() def test_iter(self): diff --git a/packages/google-cloud-bigquery/tests/unit/test_table.py b/packages/google-cloud-bigquery/tests/unit/test_table.py index cbce25b006ff..94a32661731e 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_table.py +++ b/packages/google-cloud-bigquery/tests/unit/test_table.py @@ -2501,23 +2501,6 @@ def empty_iterable(dtypes=None): self.assertEqual(len(df), 0) # verify the number of rows self.assertEqual(list(df), ["name", "age"]) # verify the column names - @unittest.skipIf(pandas is None, "Requires `pandas`") - def test_to_dataframe_logs_tabledata_list(self): - from google.cloud.bigquery.table import Table - - mock_logger = mock.create_autospec(logging.Logger) - api_request = mock.Mock(return_value={"rows": []}) - row_iterator = self._make_one( - _mock_client(), api_request, table=Table("debug-proj.debug_dset.debug_tbl") - ) - - with mock.patch("google.cloud.bigquery.table._LOGGER", mock_logger): - row_iterator.to_dataframe(create_bqstorage_client=False) - - mock_logger.debug.assert_any_call( - "Started reading table 'debug-proj.debug_dset.debug_tbl' with tabledata.list." - ) - @unittest.skipIf(pandas is None, "Requires `pandas`") def test_to_dataframe_w_various_types_nullable(self): import datetime @@ -3255,46 +3238,6 @@ def blocking_to_arrow(*args, **kwargs): # should have been set. self.assertLessEqual(mock_page.to_dataframe.call_count, 2) - @unittest.skipIf(pandas is None, "Requires `pandas`") - @unittest.skipIf( - bigquery_storage_v1 is None, "Requires `google-cloud-bigquery-storage`" - ) - def test_to_dataframe_w_bqstorage_fallback_to_tabledata_list(self): - from google.cloud.bigquery import schema - from google.cloud.bigquery import table as mut - - bqstorage_client = mock.create_autospec(bigquery_storage_v1.BigQueryReadClient) - bqstorage_client.create_read_session.side_effect = google.api_core.exceptions.InternalServerError( - "can't read with bqstorage_client" - ) - iterator_schema = [ - schema.SchemaField("name", "STRING", mode="REQUIRED"), - schema.SchemaField("age", "INTEGER", mode="REQUIRED"), - ] - rows = [ - {"f": [{"v": "Phred Phlyntstone"}, {"v": "32"}]}, - {"f": [{"v": "Bharney Rhubble"}, {"v": "33"}]}, - {"f": [{"v": "Wylma Phlyntstone"}, {"v": "29"}]}, - {"f": [{"v": "Bhettye Rhubble"}, {"v": "27"}]}, - ] - path = "/foo" - api_request = mock.Mock(return_value={"rows": rows}) - row_iterator = mut.RowIterator( - _mock_client(), - api_request, - path, - iterator_schema, - table=mut.Table("proj.dset.tbl"), - ) - - df = row_iterator.to_dataframe(bqstorage_client=bqstorage_client) - - self.assertIsInstance(df, pandas.DataFrame) - self.assertEqual(len(df), 4) # verify the number of rows - self.assertEqual(list(df), ["name", "age"]) # verify the column names - self.assertEqual(df.name.dtype.name, "object") - self.assertEqual(df.age.dtype.name, "int64") - @unittest.skipIf(pandas is None, "Requires `pandas`") def test_to_dataframe_tabledata_list_w_multiple_pages_return_unique_index(self): from google.cloud.bigquery import schema From 78620c953f9289822cbfdfbdb78e8473e324be90 Mon Sep 17 00:00:00 2001 From: Peter Lamut Date: Tue, 16 Jun 2020 08:35:48 +0200 Subject: [PATCH 0784/2016] chore: restrict `rsa` version to <=4.0 for Python 2.7 compatibility (#135) * chore: restrict `rsa` version to <=4.0 for Python 2.7 compatibility * Restrict RSA dependency version for Python 2 only --- packages/google-cloud-bigquery/setup.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/packages/google-cloud-bigquery/setup.py b/packages/google-cloud-bigquery/setup.py index 422584d123d9..ad2bcf68d938 100644 --- a/packages/google-cloud-bigquery/setup.py +++ b/packages/google-cloud-bigquery/setup.py @@ -36,6 +36,10 @@ "google-resumable-media >= 0.5.0, < 0.6dev", "protobuf >= 3.6.0", "six >=1.13.0,< 2.0.0dev", + # rsa >= 4.1 is not compatible with Python 2 + # https://github.com/sybrenstuvel/python-rsa/issues/152 + 'rsa <4.1; python_version < "3"', + 'rsa >=3.1.4, <5; python_version >= "3"', ] extras = { "bqstorage": [ From 52c99207416b3deef2f47d43fa43e1e2eb32046a Mon Sep 17 00:00:00 2001 From: HemangChothani <50404902+HemangChothani@users.noreply.github.com> Date: Fri, 19 Jun 2020 17:43:58 +0530 Subject: [PATCH 0785/2016] docs(bigquery): fix the broken docs (#139) * docs(bigquery): fix the broken docs * docs(bigquery): add autodoc_default_options into conf.py file --- packages/google-cloud-bigquery/docs/conf.py | 2 +- .../google-cloud-bigquery/google/cloud/bigquery/dataset.py | 2 +- packages/google-cloud-bigquery/synth.py | 6 ++++++ 3 files changed, 8 insertions(+), 2 deletions(-) diff --git a/packages/google-cloud-bigquery/docs/conf.py b/packages/google-cloud-bigquery/docs/conf.py index 87501ce663df..30dcac56443e 100644 --- a/packages/google-cloud-bigquery/docs/conf.py +++ b/packages/google-cloud-bigquery/docs/conf.py @@ -43,7 +43,7 @@ # autodoc/autosummary flags autoclass_content = "both" -autodoc_default_flags = ["members", "inherited-members"] +autodoc_default_options = {"members": True, "inherited-members": True} autosummary_generate = True diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/dataset.py b/packages/google-cloud-bigquery/google/cloud/bigquery/dataset.py index 40489a38b20e..c804c1c172ef 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/dataset.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/dataset.py @@ -84,7 +84,7 @@ class AccessEntry(object): See https://cloud.google.com/bigquery/docs/reference/rest/v2/datasets. - Attributes: + Args: role (str): Role granted to the entity. The following string values are supported: `'READER'`, `'WRITER'`, `'OWNER'`. It may also be diff --git a/packages/google-cloud-bigquery/synth.py b/packages/google-cloud-bigquery/synth.py index d26c614898c3..7fba81a5cabc 100644 --- a/packages/google-cloud-bigquery/synth.py +++ b/packages/google-cloud-bigquery/synth.py @@ -61,4 +61,10 @@ templated_files = common.py_library(cov_level=100) s.move(templated_files, excludes=["noxfile.py"]) +s.replace( + "docs/conf.py", + r'\{"members": True\}', + '{"members": True, "inherited-members": True}' +) + s.shell.run(["nox", "-s", "blacken"], hide_output=False) From f575274516c6abc4cabc61a29f7fe0c7aeec3460 Mon Sep 17 00:00:00 2001 From: Peter Lamut Date: Wed, 24 Jun 2020 00:59:39 +0200 Subject: [PATCH 0786/2016] fix: dry run queries with DB API cursor (#128) * fix: dry run queries with DB API cursor * Fix a merge errors with master * Return no rows on dry run instead of processed bytes count --- .../google/cloud/bigquery/dbapi/cursor.py | 50 +++++-- .../google-cloud-bigquery/tests/system.py | 16 +++ .../tests/unit/test_dbapi_cursor.py | 123 ++++++++++++++++-- 3 files changed, 164 insertions(+), 25 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/dbapi/cursor.py b/packages/google-cloud-bigquery/google/cloud/bigquery/dbapi/cursor.py index 40de336bd7cd..7a10637f0669 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/dbapi/cursor.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/dbapi/cursor.py @@ -15,6 +15,7 @@ """Cursor for the Google BigQuery DB-API.""" import collections +import copy import warnings try: @@ -93,18 +94,16 @@ def _set_description(self, schema): return self.description = tuple( - [ - Column( - name=field.name, - type_code=field.field_type, - display_size=None, - internal_size=None, - precision=None, - scale=None, - null_ok=field.is_nullable, - ) - for field in schema - ] + Column( + name=field.name, + type_code=field.field_type, + display_size=None, + internal_size=None, + precision=None, + scale=None, + null_ok=field.is_nullable, + ) + for field in schema ) def _set_rowcount(self, query_results): @@ -173,12 +172,24 @@ def execute(self, operation, parameters=None, job_id=None, job_config=None): formatted_operation = _format_operation(operation, parameters=parameters) query_parameters = _helpers.to_query_parameters(parameters) - config = job_config or job.QueryJobConfig(use_legacy_sql=False) + if client._default_query_job_config: + if job_config: + config = job_config._fill_from_default(client._default_query_job_config) + else: + config = copy.deepcopy(client._default_query_job_config) + else: + config = job_config or job.QueryJobConfig(use_legacy_sql=False) + config.query_parameters = query_parameters self._query_job = client.query( formatted_operation, job_config=config, job_id=job_id ) + if self._query_job.dry_run: + self._set_description(schema=None) + self.rowcount = 0 + return + # Wait for the query to finish. try: self._query_job.result() @@ -211,6 +222,10 @@ def _try_fetch(self, size=None): "No query results: execute() must be called before fetch." ) + if self._query_job.dry_run: + self._query_data = iter([]) + return + is_dml = ( self._query_job.statement_type and self._query_job.statement_type.upper() != "SELECT" @@ -307,6 +322,9 @@ def _bqstorage_fetch(self, bqstorage_client): def fetchone(self): """Fetch a single row from the results of the last ``execute*()`` call. + .. note:: + If a dry run query was executed, no rows are returned. + Returns: Tuple: A tuple representing a row or ``None`` if no more data is @@ -324,6 +342,9 @@ def fetchone(self): def fetchmany(self, size=None): """Fetch multiple results from the last ``execute*()`` call. + .. note:: + If a dry run query was executed, no rows are returned. + .. note:: The size parameter is not used for the request/response size. Set the ``arraysize`` attribute before calling ``execute()`` to @@ -360,6 +381,9 @@ def fetchmany(self, size=None): def fetchall(self): """Fetch all remaining results from the last ``execute*()`` call. + .. note:: + If a dry run query was executed, no rows are returned. + Returns: List[Tuple]: A list of all the rows in the results. diff --git a/packages/google-cloud-bigquery/tests/system.py b/packages/google-cloud-bigquery/tests/system.py index 965c34331f2b..14d3f49a1b13 100644 --- a/packages/google-cloud-bigquery/tests/system.py +++ b/packages/google-cloud-bigquery/tests/system.py @@ -1782,6 +1782,22 @@ def test_dbapi_fetch_w_bqstorage_client_v1beta1_large_result_set(self): ] self.assertEqual(fetched_data, expected_data) + def test_dbapi_dry_run_query(self): + from google.cloud.bigquery.job import QueryJobConfig + + query = """ + SELECT country_name + FROM `bigquery-public-data.utility_us.country_code_iso` + WHERE country_name LIKE 'U%' + """ + + Config.CURSOR.execute(query, job_config=QueryJobConfig(dry_run=True)) + self.assertEqual(Config.CURSOR.rowcount, 0, "expected no rows") + + rows = Config.CURSOR.fetchall() + + self.assertEqual(list(rows), []) + @unittest.skipIf( bigquery_storage_v1 is None, "Requires `google-cloud-bigquery-storage`" ) diff --git a/packages/google-cloud-bigquery/tests/unit/test_dbapi_cursor.py b/packages/google-cloud-bigquery/tests/unit/test_dbapi_cursor.py index 129ce28ad22e..bd1d9dc0aaed 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_dbapi_cursor.py +++ b/packages/google-cloud-bigquery/tests/unit/test_dbapi_cursor.py @@ -46,7 +46,15 @@ def _get_target_class(): def _make_one(self, *args, **kw): return self._get_target_class()(*args, **kw) - def _mock_client(self, rows=None, schema=None, num_dml_affected_rows=None): + def _mock_client( + self, + rows=None, + schema=None, + num_dml_affected_rows=None, + default_query_job_config=None, + dry_run_job=False, + total_bytes_processed=0, + ): from google.cloud.bigquery import client if rows is None: @@ -59,8 +67,11 @@ def _mock_client(self, rows=None, schema=None, num_dml_affected_rows=None): total_rows=total_rows, schema=schema, num_dml_affected_rows=num_dml_affected_rows, + dry_run=dry_run_job, + total_bytes_processed=total_bytes_processed, ) mock_client.list_rows.return_value = rows + mock_client._default_query_job_config = default_query_job_config # Assure that the REST client gets used, not the BQ Storage client. mock_client._create_bqstorage_client.return_value = None @@ -95,27 +106,41 @@ def _mock_bqstorage_client(self, rows=None, stream_count=0, v1beta1=False): ) mock_client.create_read_session.return_value = mock_read_session + mock_rows_stream = mock.MagicMock() mock_rows_stream.rows.return_value = iter(rows) mock_client.read_rows.return_value = mock_rows_stream return mock_client - def _mock_job(self, total_rows=0, schema=None, num_dml_affected_rows=None): + def _mock_job( + self, + total_rows=0, + schema=None, + num_dml_affected_rows=None, + dry_run=False, + total_bytes_processed=0, + ): from google.cloud.bigquery import job mock_job = mock.create_autospec(job.QueryJob) mock_job.error_result = None mock_job.state = "DONE" - mock_job.result.return_value = mock_job - mock_job._query_results = self._mock_results( - total_rows=total_rows, - schema=schema, - num_dml_affected_rows=num_dml_affected_rows, - ) - mock_job.destination.to_bqstorage.return_value = ( - "projects/P/datasets/DS/tables/T" - ) + mock_job.dry_run = dry_run + + if dry_run: + mock_job.result.side_effect = exceptions.NotFound + mock_job.total_bytes_processed = total_bytes_processed + else: + mock_job.result.return_value = mock_job + mock_job._query_results = self._mock_results( + total_rows=total_rows, + schema=schema, + num_dml_affected_rows=num_dml_affected_rows, + ) + mock_job.destination.to_bqstorage.return_value = ( + "projects/P/datasets/DS/tables/T" + ) if num_dml_affected_rows is None: mock_job.statement_type = None # API sends back None for SELECT @@ -445,7 +470,27 @@ def test_execute_custom_job_id(self): self.assertEqual(args[0], "SELECT 1;") self.assertEqual(kwargs["job_id"], "foo") - def test_execute_custom_job_config(self): + def test_execute_w_default_config(self): + from google.cloud.bigquery.dbapi import connect + from google.cloud.bigquery import job + + default_config = job.QueryJobConfig(use_legacy_sql=False, flatten_results=True) + client = self._mock_client( + rows=[], num_dml_affected_rows=0, default_query_job_config=default_config + ) + connection = connect(client) + cursor = connection.cursor() + + cursor.execute("SELECT 1;", job_id="foo") + + _, kwargs = client.query.call_args + used_config = kwargs["job_config"] + expected_config = job.QueryJobConfig( + use_legacy_sql=False, flatten_results=True, query_parameters=[] + ) + self.assertEqual(used_config._properties, expected_config._properties) + + def test_execute_custom_job_config_wo_default_config(self): from google.cloud.bigquery.dbapi import connect from google.cloud.bigquery import job @@ -459,6 +504,29 @@ def test_execute_custom_job_config(self): self.assertEqual(kwargs["job_id"], "foo") self.assertEqual(kwargs["job_config"], config) + def test_execute_custom_job_config_w_default_config(self): + from google.cloud.bigquery.dbapi import connect + from google.cloud.bigquery import job + + default_config = job.QueryJobConfig(use_legacy_sql=False, flatten_results=True) + client = self._mock_client( + rows=[], num_dml_affected_rows=0, default_query_job_config=default_config + ) + connection = connect(client) + cursor = connection.cursor() + config = job.QueryJobConfig(use_legacy_sql=True) + + cursor.execute("SELECT 1;", job_id="foo", job_config=config) + + _, kwargs = client.query.call_args + used_config = kwargs["job_config"] + expected_config = job.QueryJobConfig( + use_legacy_sql=True, # the config passed to execute() prevails + flatten_results=True, # from the default + query_parameters=[], + ) + self.assertEqual(used_config._properties, expected_config._properties) + def test_execute_w_dml(self): from google.cloud.bigquery.dbapi import connect @@ -514,6 +582,35 @@ def test_execute_w_query(self): row = cursor.fetchone() self.assertIsNone(row) + def test_execute_w_query_dry_run(self): + from google.cloud.bigquery.job import QueryJobConfig + from google.cloud.bigquery.schema import SchemaField + from google.cloud.bigquery import dbapi + + connection = dbapi.connect( + self._mock_client( + rows=[("hello", "world", 1), ("howdy", "y'all", 2)], + schema=[ + SchemaField("a", "STRING", mode="NULLABLE"), + SchemaField("b", "STRING", mode="REQUIRED"), + SchemaField("c", "INTEGER", mode="NULLABLE"), + ], + dry_run_job=True, + total_bytes_processed=12345, + ) + ) + cursor = connection.cursor() + + cursor.execute( + "SELECT a, b, c FROM hello_world WHERE d > 3;", + job_config=QueryJobConfig(dry_run=True), + ) + + self.assertEqual(cursor.rowcount, 0) + self.assertIsNone(cursor.description) + rows = cursor.fetchall() + self.assertEqual(list(rows), []) + def test_execute_raises_if_result_raises(self): import google.cloud.exceptions @@ -523,8 +620,10 @@ def test_execute_raises_if_result_raises(self): from google.cloud.bigquery.dbapi import exceptions job = mock.create_autospec(job.QueryJob) + job.dry_run = None job.result.side_effect = google.cloud.exceptions.GoogleCloudError("") client = mock.create_autospec(client.Client) + client._default_query_job_config = None client.query.return_value = job connection = connect(client) cursor = connection.cursor() From 51797b19d01d5d875dfface15eedfcf1f0062321 Mon Sep 17 00:00:00 2001 From: HemangChothani <50404902+HemangChothani@users.noreply.github.com> Date: Mon, 29 Jun 2020 15:09:37 +0530 Subject: [PATCH 0787/2016] feat(bigquery): expose date_as_object parameter to users (#150) * feat(bigquery): expose date_as_object parameter for users * feat(bigquery): nit * feat(bigquery): add unit test for date as object without pyarrow * feat(bigquery): docs fixed * feat(bigquery): nit * feat(bigquery): nit --- .../google/cloud/bigquery/job.py | 14 +++- .../google/cloud/bigquery/table.py | 17 ++++- .../tests/unit/test_job.py | 73 ++++++++++++++++++- 3 files changed, 96 insertions(+), 8 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/job.py b/packages/google-cloud-bigquery/google/cloud/bigquery/job.py index b0d2e751731a..930dc413df3e 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/job.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/job.py @@ -3320,6 +3320,7 @@ def to_dataframe( dtypes=None, progress_bar_type=None, create_bqstorage_client=True, + date_as_object=True, ): """Return a pandas DataFrame from a QueryJob @@ -3350,9 +3351,9 @@ def to_dataframe( for details. ..versionadded:: 1.11.0 - create_bqstorage_client (bool): - Optional. If ``True`` (default), create a BigQuery Storage API - client using the default API settings. The BigQuery Storage API + create_bqstorage_client (Optional[bool]): + If ``True`` (default), create a BigQuery Storage API client + using the default API settings. The BigQuery Storage API is a faster way to fetch rows from BigQuery. See the ``bqstorage_client`` parameter for more information. @@ -3360,6 +3361,12 @@ def to_dataframe( ..versionadded:: 1.24.0 + date_as_object (Optional[bool]): + If ``True`` (default), cast dates to objects. If ``False``, convert + to datetime64[ns] dtype. + + ..versionadded:: 1.26.0 + Returns: A :class:`~pandas.DataFrame` populated with row data and column headers from the query results. The column headers are derived @@ -3373,6 +3380,7 @@ def to_dataframe( dtypes=dtypes, progress_bar_type=progress_bar_type, create_bqstorage_client=create_bqstorage_client, + date_as_object=date_as_object, ) def __iter__(self): diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py index 5b13cc52a026..5f557d28a10d 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py @@ -1633,6 +1633,7 @@ def to_dataframe( dtypes=None, progress_bar_type=None, create_bqstorage_client=True, + date_as_object=True, ): """Create a pandas DataFrame by loading all pages of a query. @@ -1673,9 +1674,9 @@ def to_dataframe( progress bar as a graphical dialog box. ..versionadded:: 1.11.0 - create_bqstorage_client (bool): - Optional. If ``True`` (default), create a BigQuery Storage API - client using the default API settings. The BigQuery Storage API + create_bqstorage_client (Optional[bool]): + If ``True`` (default), create a BigQuery Storage API client + using the default API settings. The BigQuery Storage API is a faster way to fetch rows from BigQuery. See the ``bqstorage_client`` parameter for more information. @@ -1683,6 +1684,12 @@ def to_dataframe( ..versionadded:: 1.24.0 + date_as_object (Optional[bool]): + If ``True`` (default), cast dates to objects. If ``False``, convert + to datetime64[ns] dtype. + + ..versionadded:: 1.26.0 + Returns: pandas.DataFrame: A :class:`~pandas.DataFrame` populated with row data and column @@ -1722,7 +1729,7 @@ def to_dataframe( bqstorage_client=bqstorage_client, create_bqstorage_client=create_bqstorage_client, ) - df = record_batch.to_pandas() + df = record_batch.to_pandas(date_as_object=date_as_object) for column in dtypes: df[column] = pandas.Series(df[column], dtype=dtypes[column]) return df @@ -1799,6 +1806,7 @@ def to_dataframe( dtypes=None, progress_bar_type=None, create_bqstorage_client=True, + date_as_object=True, ): """Create an empty dataframe. @@ -1807,6 +1815,7 @@ def to_dataframe( dtypes (Any): Ignored. Added for compatibility with RowIterator. progress_bar_type (Any): Ignored. Added for compatibility with RowIterator. create_bqstorage_client (bool): Ignored. Added for compatibility with RowIterator. + date_as_object (bool): Ignored. Added for compatibility with RowIterator. Returns: pandas.DataFrame: An empty :class:`~pandas.DataFrame`. diff --git a/packages/google-cloud-bigquery/tests/unit/test_job.py b/packages/google-cloud-bigquery/tests/unit/test_job.py index 9eec9fda31ef..733445337509 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_job.py +++ b/packages/google-cloud-bigquery/tests/unit/test_job.py @@ -5504,7 +5504,15 @@ def test_to_dataframe_column_dtypes(self): }, } row_data = [ - ["1.4338368E9", "420", "1.1", "1.77", "Cash", "true", "1999-12-01"], + [ + "1.4338368E9", + "420", + "1.1", + "1.77", + "Cto_dataframeash", + "true", + "1999-12-01", + ], ["1.3878117E9", "2580", "17.7", "28.5", "Cash", "false", "1953-06-14"], ["1.3855653E9", "2280", "4.4", "7.1", "Credit", "true", "1981-11-04"], ] @@ -5533,6 +5541,69 @@ def test_to_dataframe_column_dtypes(self): self.assertEqual(df.complete.dtype.name, "bool") self.assertEqual(df.date.dtype.name, "object") + @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") + @unittest.skipIf(pandas is None, "Requires `pandas`") + def test_to_dataframe_column_date_dtypes(self): + begun_resource = self._make_resource() + query_resource = { + "jobComplete": True, + "jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID}, + "totalRows": "1", + "schema": {"fields": [{"name": "date", "type": "DATE"}]}, + } + row_data = [ + ["1999-12-01"], + ] + rows = [{"f": [{"v": field} for field in row]} for row in row_data] + query_resource["rows"] = rows + done_resource = copy.deepcopy(begun_resource) + done_resource["status"] = {"state": "DONE"} + connection = _make_connection( + begun_resource, query_resource, done_resource, query_resource + ) + client = _make_client(project=self.PROJECT, connection=connection) + job = self._make_one(self.JOB_ID, self.QUERY, client) + df = job.to_dataframe(date_as_object=False, create_bqstorage_client=False) + + self.assertIsInstance(df, pandas.DataFrame) + self.assertEqual(len(df), 1) # verify the number of rows + exp_columns = [field["name"] for field in query_resource["schema"]["fields"]] + self.assertEqual(list(df), exp_columns) # verify the column names + + self.assertEqual(df.date.dtype.name, "datetime64[ns]") + + @unittest.skipIf(pandas is None, "Requires `pandas`") + def test_to_dataframe_column_date_dtypes_wo_pyarrow(self): + begun_resource = self._make_resource() + query_resource = { + "jobComplete": True, + "jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID}, + "totalRows": "1", + "schema": {"fields": [{"name": "date", "type": "DATE"}]}, + } + row_data = [ + ["1999-12-01"], + ] + rows = [{"f": [{"v": field} for field in row]} for row in row_data] + query_resource["rows"] = rows + done_resource = copy.deepcopy(begun_resource) + done_resource["status"] = {"state": "DONE"} + connection = _make_connection( + begun_resource, query_resource, done_resource, query_resource + ) + client = _make_client(project=self.PROJECT, connection=connection) + job = self._make_one(self.JOB_ID, self.QUERY, client) + + with mock.patch("google.cloud.bigquery.table.pyarrow", None): + df = job.to_dataframe(date_as_object=False, create_bqstorage_client=False) + + self.assertIsInstance(df, pandas.DataFrame) + self.assertEqual(len(df), 1) # verify the number of rows + exp_columns = [field["name"] for field in query_resource["schema"]["fields"]] + self.assertEqual(list(df), exp_columns) # verify the column names + + self.assertEqual(df.date.dtype.name, "object") + @unittest.skipIf(pandas is None, "Requires `pandas`") @unittest.skipIf(tqdm is None, "Requires `tqdm`") @mock.patch("tqdm.tqdm") From 1251ab20441548f250c4878e350d8f193b0852d8 Mon Sep 17 00:00:00 2001 From: HemangChothani <50404902+HemangChothani@users.noreply.github.com> Date: Tue, 30 Jun 2020 18:56:04 +0530 Subject: [PATCH 0788/2016] docs(bigquery): consistent use of optional keyword (#153) * docs(bigquery): consistent use of optional keyword * docs(bigquery): nit --- .../google/cloud/bigquery/_helpers.py | 10 +- .../google/cloud/bigquery/_http.py | 2 +- .../google/cloud/bigquery/_pandas_helpers.py | 7 +- .../google/cloud/bigquery/client.py | 449 +++++++++--------- .../google/cloud/bigquery/dataset.py | 6 +- .../google/cloud/bigquery/job.py | 136 +++--- .../google/cloud/bigquery/magics.py | 18 +- .../google/cloud/bigquery/model.py | 12 +- .../google/cloud/bigquery/query.py | 62 +-- .../google/cloud/bigquery/routine.py | 8 +- .../google/cloud/bigquery/schema.py | 16 +- .../google/cloud/bigquery/table.py | 79 ++- 12 files changed, 392 insertions(+), 413 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py b/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py index 21a8e3636d24..d814eec8ca69 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py @@ -471,7 +471,7 @@ def _get_sub_prop(container, keys, default=None): This method works like ``dict.get(key)``, but for nested values. - Arguments: + Args: container (Dict): A dictionary which may contain other dictionaries as values. keys (Iterable): @@ -479,8 +479,8 @@ def _get_sub_prop(container, keys, default=None): the sequence represents a deeper nesting. The first key is for the top level. If there is a dictionary there, the second key attempts to get the value within that, and so on. - default (object): - (Optional) Value to returned if any of the keys are not found. + default (Optional[object]): + Value to returned if any of the keys are not found. Defaults to ``None``. Examples: @@ -514,7 +514,7 @@ def _get_sub_prop(container, keys, default=None): def _set_sub_prop(container, keys, value): """Set a nested value in a dictionary. - Arguments: + Args: container (Dict): A dictionary which may contain other dictionaries as values. keys (Iterable): @@ -557,7 +557,7 @@ def _set_sub_prop(container, keys, value): def _del_sub_prop(container, keys): """Remove a nested key fro a dictionary. - Arguments: + Args: container (Dict): A dictionary which may contain other dictionaries as values. keys (Iterable): diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/_http.py b/packages/google-cloud-bigquery/google/cloud/bigquery/_http.py index 2ff4effefb76..8ee633e64147 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/_http.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/_http.py @@ -25,7 +25,7 @@ class Connection(_http.JSONConnection): Args: client (google.cloud.bigquery.client.Client): The client that owns the current connection. - client_info (google.api_core.client_info.ClientInfo): (Optional) instance used to generate user agent. + client_info (Optional[google.api_core.client_info.ClientInfo]): Instance used to generate user agent. """ DEFAULT_API_ENDPOINT = "https://bigquery.googleapis.com" diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/_pandas_helpers.py b/packages/google-cloud-bigquery/google/cloud/bigquery/_pandas_helpers.py index f5f9d4a99a49..bced246e8f81 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/_pandas_helpers.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/_pandas_helpers.py @@ -472,10 +472,9 @@ def dataframe_to_parquet(dataframe, bq_schema, filepath, parquet_compression="SN columns in the DataFrame. filepath (str): Path to write Parquet file to. - parquet_compression (str): - (optional) The compression codec to use by the the - ``pyarrow.parquet.write_table`` serializing method. Defaults to - "SNAPPY". + parquet_compression (Optional[str]): + The compression codec to use by the the ``pyarrow.parquet.write_table`` + serializing method. Defaults to "SNAPPY". https://arrow.apache.org/docs/python/generated/pyarrow.parquet.write_table.html#pyarrow-parquet-write-table """ if pyarrow is None: diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py index 8e265d971069..eceedcd67a4d 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py @@ -123,35 +123,35 @@ class Client(ClientWithProject): """Client to bundle configuration needed for API requests. Args: - project (str): + project (Optional[str]): Project ID for the project which the client acts on behalf of. Will be passed when creating a dataset / job. If not passed, falls back to the default inferred from the environment. - credentials (google.auth.credentials.Credentials): - (Optional) The OAuth2 Credentials to use for this client. If not - passed (and if no ``_http`` object is passed), falls back to the + credentials (Optional[google.auth.credentials.Credentials]): + The OAuth2 Credentials to use for this client. If not passed + (and if no ``_http`` object is passed), falls back to the default inferred from the environment. - _http (requests.Session): - (Optional) HTTP object to make requests. Can be any object that + _http (Optional[requests.Session]): + HTTP object to make requests. Can be any object that defines ``request()`` with the same interface as :meth:`requests.Session.request`. If not passed, an ``_http`` object is created that is bound to the ``credentials`` for the current object. This parameter should be considered private, and could change in the future. - location (str): - (Optional) Default location for jobs / datasets / tables. - default_query_job_config (google.cloud.bigquery.job.QueryJobConfig): - (Optional) Default ``QueryJobConfig``. + location (Optional[str]): + Default location for jobs / datasets / tables. + default_query_job_config (Optional[google.cloud.bigquery.job.QueryJobConfig]): + Default ``QueryJobConfig``. Will be merged into job configs passed into the ``query`` method. - client_info (google.api_core.client_info.ClientInfo): + client_info (Optional[google.api_core.client_info.ClientInfo]): The client info used to send a user-agent string along with API requests. If ``None``, then default info will be used. Generally, you only need to set this if you're developing your own library or partner tool. - client_options (Union[google.api_core.client_options.ClientOptions, Dict]): - (Optional) Client options used to set user options on the client. - API Endpoint should be set through client_options. + client_options (Optional[Union[google.api_core.client_options.ClientOptions, Dict]]): + Client options used to set user options on the client. API Endpoint + should be set through client_options. Raises: google.auth.exceptions.DefaultCredentialsError: @@ -220,7 +220,7 @@ def get_service_account_email( encrypted by a key in KMS. Args: - project (str, optional): + project (Optional[str]): Project ID to use for retreiving service account email. Defaults to the client's project. retry (Optional[google.api_core.retry.Retry]): How to retry the RPC. @@ -255,19 +255,18 @@ def list_projects( https://cloud.google.com/bigquery/docs/reference/rest/v2/projects/list Args: - max_results (int): - (Optional) maximum number of projects to return, - If not passed, defaults to a value set by the API. - - page_token (str): - (Optional) Token representing a cursor into the projects. If - not passed, the API will return the first page of projects. - The token marks the beginning of the iterator to be returned - and the value of the ``page_token`` can be accessed at - ``next_page_token`` of the + max_results (Optional[int]): + Maximum number of projects to return, If not passed, + defaults to a value set by the API. + + page_token (Optional[str]): + Token representing a cursor into the projects. If not passed, + the API will return the first page of projects. The token marks + the beginning of the iterator to be returned and the value of + the ``page_token`` can be accessed at ``next_page_token`` of the :class:`~google.api_core.page_iterator.HTTPIterator`. - retry (google.api_core.retry.Retry): (Optional) How to retry the RPC. + retry (Optional[google.api_core.retry.Retry]): How to retry the RPC. timeout (Optional[float]): The number of seconds to wait for the underlying HTTP transport @@ -304,27 +303,25 @@ def list_datasets( https://cloud.google.com/bigquery/docs/reference/rest/v2/datasets/list Args: - project (str): - Optional. Project ID to use for retreiving datasets. Defaults - to the client's project. - include_all (bool): - Optional. True if results include hidden datasets. Defaults - to False. - filter (str): - Optional. An expression for filtering the results by label. + project (Optional[str]): + Project ID to use for retreiving datasets. Defaults to the + client's project. + include_all (Optional[bool]): + True if results include hidden datasets. Defaults to False. + filter (Optional[str]): + An expression for filtering the results by label. For syntax, see https://cloud.google.com/bigquery/docs/reference/rest/v2/datasets/list#body.QUERY_PARAMETERS.filter - max_results (int): - Optional. Maximum number of datasets to return. - page_token (str): - Optional. Token representing a cursor into the datasets. If - not passed, the API will return the first page of datasets. - The token marks the beginning of the iterator to be returned - and the value of the ``page_token`` can be accessed at - ``next_page_token`` of the + max_results (Optional[int]): + Maximum number of datasets to return. + page_token (Optional[str]): + Token representing a cursor into the datasets. If not passed, + the API will return the first page of datasets. The token marks + the beginning of the iterator to be returned and the value of + the ``page_token`` can be accessed at ``next_page_token`` of the :class:`~google.api_core.page_iterator.HTTPIterator`. - retry (google.api_core.retry.Retry): - Optional. How to retry the RPC. + retry (Optional[google.api_core.retry.Retry]): + How to retry the RPC. timeout (Optional[float]): The number of seconds to wait for the underlying HTTP transport before using ``retry``. @@ -374,9 +371,8 @@ def dataset(self, dataset_id, project=None): Args: dataset_id (str): ID of the dataset. - project (str): - (Optional) project ID for the dataset (defaults to - the project of the client). + project (Optional[str]): + Project ID for the dataset (defaults to the project of the client). Returns: google.cloud.bigquery.dataset.DatasetReference: @@ -432,11 +428,11 @@ def create_dataset( A :class:`~google.cloud.bigquery.dataset.Dataset` to create. If ``dataset`` is a reference, an empty dataset is created with the specified ID and client's default location. - exists_ok (bool): + exists_ok (Optional[bool]): Defaults to ``False``. If ``True``, ignore "already exists" errors when creating the dataset. - retry (google.api_core.retry.Retry): - Optional. How to retry the RPC. + retry (Optional[google.api_core.retry.Retry]): + How to retry the RPC. timeout (Optional[float]): The number of seconds to wait for the underlying HTTP transport before using ``retry``. @@ -488,11 +484,11 @@ def create_routine( routine (google.cloud.bigquery.routine.Routine): A :class:`~google.cloud.bigquery.routine.Routine` to create. The dataset that the routine belongs to must already exist. - exists_ok (bool): + exists_ok (Optional[bool]): Defaults to ``False``. If ``True``, ignore "already exists" errors when creating the routine. - retry (google.api_core.retry.Retry): - Optional. How to retry the RPC. + retry (Optional[google.api_core.retry.Retry]): + How to retry the RPC. timeout (Optional[float]): The number of seconds to wait for the underlying HTTP transport before using ``retry``. @@ -532,11 +528,11 @@ def create_table(self, table, exists_ok=False, retry=DEFAULT_RETRY, timeout=None If ``table`` is a reference, an empty table is created with the specified ID. The dataset that the table belongs to must already exist. - exists_ok (bool): + exists_ok (Optional[bool]): Defaults to ``False``. If ``True``, ignore "already exists" errors when creating the table. - retry (google.api_core.retry.Retry): - Optional. How to retry the RPC. + retry (Optional[google.api_core.retry.Retry]): + How to retry the RPC. timeout (Optional[float]): The number of seconds to wait for the underlying HTTP transport before using ``retry``. @@ -577,8 +573,8 @@ def get_dataset(self, dataset_ref, retry=DEFAULT_RETRY, timeout=None): If a string is passed in, this method attempts to create a dataset reference from a string using :func:`~google.cloud.bigquery.dataset.DatasetReference.from_string`. - retry (google.api_core.retry.Retry): - (Optional) How to retry the RPC. + retry (Optional[google.api_core.retry.Retry]): + How to retry the RPC. timeout (Optional[float]): The number of seconds to wait for the underlying HTTP transport before using ``retry``. @@ -609,8 +605,8 @@ def get_model(self, model_ref, retry=DEFAULT_RETRY, timeout=None): If a string is passed in, this method attempts to create a model reference from a string using :func:`google.cloud.bigquery.model.ModelReference.from_string`. - retry (google.api_core.retry.Retry): - (Optional) How to retry the RPC. + retry (Optional[google.api_core.retry.Retry]): + How to retry the RPC. timeout (Optional[float]): The number of seconds to wait for the underlying HTTP transport before using ``retry``. @@ -641,8 +637,8 @@ def get_routine(self, routine_ref, retry=DEFAULT_RETRY, timeout=None): a string is passed in, this method attempts to create a reference from a string using :func:`google.cloud.bigquery.routine.RoutineReference.from_string`. - retry (google.api_core.retry.Retry): - (Optional) How to retry the API call. + retry (Optional[google.api_core.retry.Retry]): + How to retry the API call. timeout (Optional[float]): The number of seconds to wait for the underlying HTTP transport before using ``retry``. @@ -674,8 +670,8 @@ def get_table(self, table, retry=DEFAULT_RETRY, timeout=None): If a string is passed in, this method attempts to create a table reference from a string using :func:`google.cloud.bigquery.table.TableReference.from_string`. - retry (google.api_core.retry.Retry): - (Optional) How to retry the RPC. + retry (Optional[google.api_core.retry.Retry]): + How to retry the RPC. timeout (Optional[float]): The number of seconds to wait for the underlying HTTP transport before using ``retry``. @@ -709,7 +705,7 @@ def update_dataset(self, dataset, fields, retry=DEFAULT_RETRY, timeout=None): The dataset to update. fields (Sequence[str]): The properties of ``dataset`` to change (e.g. "friendly_name"). - retry (google.api_core.retry.Retry, optional): + retry (Optional[google.api_core.retry.Retry]): How to retry the RPC. timeout (Optional[float]): The number of seconds to wait for the underlying HTTP transport @@ -752,8 +748,8 @@ def update_model(self, model, fields, retry=DEFAULT_RETRY, timeout=None): fields (Sequence[str]): The fields of ``model`` to change, spelled as the Model properties (e.g. "friendly_name"). - retry (google.api_core.retry.Retry): - (Optional) A description of how to retry the API call. + retry (Optional[google.api_core.retry.Retry]): + A description of how to retry the API call. timeout (Optional[float]): The number of seconds to wait for the underlying HTTP transport before using ``retry``. @@ -802,8 +798,8 @@ def update_routine(self, routine, fields, retry=DEFAULT_RETRY, timeout=None): The fields of ``routine`` to change, spelled as the :class:`~google.cloud.bigquery.routine.Routine` properties (e.g. ``type_``). - retry (google.api_core.retry.Retry): - (Optional) A description of how to retry the API call. + retry (Optional[google.api_core.retry.Retry]): + A description of how to retry the API call. timeout (Optional[float]): The number of seconds to wait for the underlying HTTP transport before using ``retry``. @@ -849,8 +845,8 @@ def update_table(self, table, fields, retry=DEFAULT_RETRY, timeout=None): fields (Sequence[str]): The fields of ``table`` to change, spelled as the Table properties (e.g. "friendly_name"). - retry (google.api_core.retry.Retry): - (Optional) A description of how to retry the API call. + retry (Optional[google.api_core.retry.Retry]): + A description of how to retry the API call. timeout (Optional[float]): The number of seconds to wait for the underlying HTTP transport before using ``retry``. @@ -897,18 +893,17 @@ def list_models( BigQuery API. If a string is passed in, this method attempts to create a dataset reference from a string using :func:`google.cloud.bigquery.dataset.DatasetReference.from_string`. - max_results (int): - (Optional) Maximum number of models to return. If not passed, - defaults to a value set by the API. - page_token (str): - (Optional) Token representing a cursor into the models. If - not passed, the API will return the first page of models. The - token marks the beginning of the iterator to be returned and - the value of the ``page_token`` can be accessed at - ``next_page_token`` of the + max_results (Optional[int]): + Maximum number of models to return. If not passed, defaults to a + value set by the API. + page_token (Optional[str]): + Token representing a cursor into the models. If not passed, + the API will return the first page of models. The token marks + the beginning of the iterator to be returned and the value of + the ``page_token`` can be accessed at ``next_page_token`` of the :class:`~google.api_core.page_iterator.HTTPIterator`. - retry (google.api_core.retry.Retry): - (Optional) How to retry the RPC. + retry (Optional[google.api_core.retry.Retry]): + How to retry the RPC. timeout (Optional[float]): The number of seconds to wait for the underlying HTTP transport before using ``retry``. @@ -963,18 +958,17 @@ def list_routines( BigQuery API. If a string is passed in, this method attempts to create a dataset reference from a string using :func:`google.cloud.bigquery.dataset.DatasetReference.from_string`. - max_results (int): - (Optional) Maximum number of routines to return. If not passed, - defaults to a value set by the API. - page_token (str): - (Optional) Token representing a cursor into the routines. If - not passed, the API will return the first page of routines. The - token marks the beginning of the iterator to be returned and - the value of the ``page_token`` can be accessed at - ``next_page_token`` of the + max_results (Optional[int]): + Maximum number of routines to return. If not passed, defaults + to a value set by the API. + page_token (Optional[str]): + Token representing a cursor into the routines. If not passed, + the API will return the first page of routines. The token marks + the beginning of the iterator to be returned and the value of the + ``page_token`` can be accessed at ``next_page_token`` of the :class:`~google.api_core.page_iterator.HTTPIterator`. - retry (google.api_core.retry.Retry): - (Optional) How to retry the RPC. + retry (Optional[google.api_core.retry.Retry]): + How to retry the RPC. timeout (Optional[float]): The number of seconds to wait for the underlying HTTP transport before using ``retry``. @@ -1029,18 +1023,17 @@ def list_tables( BigQuery API. If a string is passed in, this method attempts to create a dataset reference from a string using :func:`google.cloud.bigquery.dataset.DatasetReference.from_string`. - max_results (int): - (Optional) Maximum number of tables to return. If not passed, - defaults to a value set by the API. - page_token (str): - (Optional) Token representing a cursor into the tables. If - not passed, the API will return the first page of tables. The - token marks the beginning of the iterator to be returned and - the value of the ``page_token`` can be accessed at - ``next_page_token`` of the + max_results (Optional[int]): + Maximum number of tables to return. If not passed, defaults + to a value set by the API. + page_token (Optional[str]): + Token representing a cursor into the tables. If not passed, + the API will return the first page of tables. The token marks + the beginning of the iterator to be returned and the value of + the ``page_token`` can be accessed at ``next_page_token`` of the :class:`~google.api_core.page_iterator.HTTPIterator`. - retry (google.api_core.retry.Retry): - (Optional) How to retry the RPC. + retry (Optional[google.api_core.retry.Retry]): + How to retry the RPC. timeout (Optional[float]): The number of seconds to wait for the underlying HTTP transport before using ``retry``. @@ -1085,7 +1078,7 @@ def delete_dataset( See https://cloud.google.com/bigquery/docs/reference/rest/v2/datasets/delete - Args + Args: dataset (Union[ \ google.cloud.bigquery.dataset.Dataset, \ google.cloud.bigquery.dataset.DatasetReference, \ @@ -1095,16 +1088,16 @@ def delete_dataset( in, this method attempts to create a dataset reference from a string using :func:`google.cloud.bigquery.dataset.DatasetReference.from_string`. - delete_contents (boolean): - (Optional) If True, delete all the tables in the dataset. If - False and the dataset contains tables, the request will fail. + delete_contents (Optional[bool]): + If True, delete all the tables in the dataset. If False and + the dataset contains tables, the request will fail. Default is False. - retry (google.api_core.retry.Retry): - (Optional) How to retry the RPC. + retry (Optional[google.api_core.retry.Retry]): + How to retry the RPC. timeout (Optional[float]): The number of seconds to wait for the underlying HTTP transport before using ``retry``. - not_found_ok (bool): + not_found_ok (Optional[bool]): Defaults to ``False``. If ``True``, ignore "not found" errors when deleting the dataset. """ @@ -1150,12 +1143,12 @@ def delete_model( this method attempts to create a model reference from a string using :func:`google.cloud.bigquery.model.ModelReference.from_string`. - retry (google.api_core.retry.Retry): - (Optional) How to retry the RPC. + retry (Optional[google.api_core.retry.Retry]): + How to retry the RPC. timeout (Optional[float]): The number of seconds to wait for the underlying HTTP transport before using ``retry``. - not_found_ok (bool): + not_found_ok (Optional[bool]): Defaults to ``False``. If ``True``, ignore "not found" errors when deleting the model. """ @@ -1180,7 +1173,7 @@ def delete_routine( https://cloud.google.com/bigquery/docs/reference/rest/v2/routines/delete Args: - model (Union[ \ + routine (Union[ \ google.cloud.bigquery.routine.Routine, \ google.cloud.bigquery.routine.RoutineReference, \ str, \ @@ -1189,12 +1182,12 @@ def delete_routine( in, this method attempts to create a routine reference from a string using :func:`google.cloud.bigquery.routine.RoutineReference.from_string`. - retry (google.api_core.retry.Retry): - (Optional) How to retry the RPC. + retry (Optional[google.api_core.retry.Retry]): + How to retry the RPC. timeout (Optional[float]): The number of seconds to wait for the underlying HTTP transport before using ``retry``. - not_found_ok (bool): + not_found_ok (Optional[bool]): Defaults to ``False``. If ``True``, ignore "not found" errors when deleting the routine. """ @@ -1230,12 +1223,12 @@ def delete_table( this method attempts to create a table reference from a string using :func:`google.cloud.bigquery.table.TableReference.from_string`. - retry (google.api_core.retry.Retry): - (Optional) How to retry the RPC. + retry (Optional[google.api_core.retry.Retry]): + How to retry the RPC. timeout (Optional[float]): The number of seconds to wait for the underlying HTTP transport before using ``retry``. - not_found_ok (bool): + not_found_ok (Optional[bool]): Defaults to ``False``. If ``True``, ignore "not found" errors when deleting the table. """ @@ -1254,17 +1247,16 @@ def _get_query_results( ): """Get the query results object for a query job. - Arguments: + Args: job_id (str): Name of the query job. retry (google.api_core.retry.Retry): - (Optional) How to retry the RPC. - project (str): - (Optional) project ID for the query job (defaults to the - project of the client). - timeout_ms (int): - (Optional) number of milliseconds the the API call should - wait for the query to complete before the request times out. - location (str): Location of the query job. + How to retry the RPC. + project (Optional[str]): + Project ID for the query job (defaults to the project of the client). + timeout_ms (Optional[int]): + Number of milliseconds the the API call should wait for the query + to complete before the request times out. + location (Optional[str]): Location of the query job. timeout (Optional[float]): The number of seconds to wait for the underlying HTTP transport before using ``retry``. @@ -1326,12 +1318,12 @@ def job_from_resource(self, resource): def create_job(self, job_config, retry=DEFAULT_RETRY): """Create a new job. - Arguments: + Args: job_config (dict): configuration job representation returned from the API. Keyword Arguments: - retry (google.api_core.retry.Retry): - (Optional) How to retry the RPC. + retry (Optional[google.api_core.retry.Retry]): + How to retry the RPC. Returns: Union[ \ @@ -1404,16 +1396,15 @@ def get_job( See https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs/get - Arguments: + Args: job_id (str): Unique job identifier. Keyword Arguments: - project (str): - (Optional) ID of the project which ownsthe job (defaults to - the client's project). - location (str): Location where the job was run. - retry (google.api_core.retry.Retry): - (Optional) How to retry the RPC. + project (Optional[str]): + ID of the project which owns the job (defaults to the client's project). + location (Optional[str]): Location where the job was run. + retry (Optional[google.api_core.retry.Retry]): + How to retry the RPC. timeout (Optional[float]): The number of seconds to wait for the underlying HTTP transport before using ``retry``. @@ -1458,12 +1449,11 @@ def cancel_job( job_id (str): Unique job identifier. Keyword Arguments: - project (str): - (Optional) ID of the project which owns the job (defaults to - the client's project). - location (str): Location where the job was run. - retry (google.api_core.retry.Retry): - (Optional) How to retry the RPC. + project (Optional[str]): + ID of the project which owns the job (defaults to the client's project). + location (Optional[str]): Location where the job was run. + retry (Optional[google.api_core.retry.Retry]): + How to retry the RPC. timeout (Optional[float]): The number of seconds to wait for the underlying HTTP transport before using ``retry``. @@ -1610,7 +1600,7 @@ def load_table_from_uri( See https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#jobconfigurationload - Arguments: + Args: source_uris (Union[str, Sequence[str]]): URIs of data files to be loaded; in format ``gs:///``. @@ -1625,21 +1615,20 @@ def load_table_from_uri( :func:`google.cloud.bigquery.table.TableReference.from_string`. Keyword Arguments: - job_id (str): (Optional) Name of the job. - job_id_prefix (str): - (Optional) the user-provided prefix for a randomly generated - job ID. This parameter will be ignored if a ``job_id`` is - also given. - location (str): + job_id (Optional[str]): Name of the job. + job_id_prefix (Optional[str]): + The user-provided prefix for a randomly generated job ID. + This parameter will be ignored if a ``job_id`` is also given. + location (Optional[str]): Location where to run the job. Must match the location of the destination table. - project (str): + project (Optional[str]): Project ID of the project of where to run the job. Defaults to the client's project. - job_config (google.cloud.bigquery.job.LoadJobConfig): - (Optional) Extra configuration options for the job. - retry (google.api_core.retry.Retry): - (Optional) How to retry the RPC. + job_config (Optional[google.cloud.bigquery.job.LoadJobConfig]): + Extra configuration options for the job. + retry (Optional[google.api_core.retry.Retry]): + How to retry the RPC. timeout (Optional[float]): The number of seconds to wait for the underlying HTTP transport before using ``retry``. @@ -1694,7 +1683,7 @@ def load_table_from_file( Similar to :meth:`load_table_from_uri`, this method creates, starts and returns a :class:`~google.cloud.bigquery.job.LoadJob`. - Arguments: + Args: file_obj (file): A file handle opened in binary mode for reading. destination (Union[ \ google.cloud.bigquery.table.Table, \ @@ -1707,27 +1696,26 @@ def load_table_from_file( :func:`google.cloud.bigquery.table.TableReference.from_string`. Keyword Arguments: - rewind (bool): + rewind (Optional[bool]): If True, seek to the beginning of the file handle before reading the file. - size (int): + size (Optional[int]): The number of bytes to read from the file handle. If size is ``None`` or large, resumable upload will be used. Otherwise, multipart upload will be used. - num_retries (int): Number of upload retries. Defaults to 6. - job_id (str): (Optional) Name of the job. - job_id_prefix (str): - (Optional) the user-provided prefix for a randomly generated - job ID. This parameter will be ignored if a ``job_id`` is - also given. - location (str): + num_retries (Optional[int]): Number of upload retries. Defaults to 6. + job_id (Optional[str]): Name of the job. + job_id_prefix (Optional[str]): + The user-provided prefix for a randomly generated job ID. + This parameter will be ignored if a ``job_id`` is also given. + location (Optional[str]): Location where to run the job. Must match the location of the destination table. - project (str): + project (Optional[str]): Project ID of the project of where to run the job. Defaults to the client's project. - job_config (google.cloud.bigquery.job.LoadJobConfig): - (Optional) Extra configuration options for the job. + job_config (Optional[google.cloud.bigquery.job.LoadJobConfig]): + Extra configuration options for the job. Returns: google.cloud.bigquery.job.LoadJob: A new load job. @@ -1802,7 +1790,7 @@ def load_table_from_dataframe( https://github.com/googleapis/python-bigquery/issues/17 - Arguments: + Args: dataframe (pandas.DataFrame): A :class:`~pandas.DataFrame` containing the data to load. destination (google.cloud.bigquery.table.TableReference): @@ -1823,7 +1811,7 @@ def load_table_from_dataframe( The user-provided prefix for a randomly generated job ID. This parameter will be ignored if a ``job_id`` is also given. - location (str): + location (Optional[str]): Location where to run the job. Must match the location of the destination table. project (Optional[str]): @@ -1838,7 +1826,7 @@ def load_table_from_dataframe( column names matching those of the dataframe. The BigQuery schema is used to determine the correct data type conversion. Indexes are not loaded. Requires the :mod:`pyarrow` library. - parquet_compression (str): + parquet_compression (Optional[str]): [Beta] The compression method to use if intermittently serializing ``dataframe`` to a parquet file. @@ -2013,20 +2001,19 @@ def load_table_from_json( Keyword Arguments: num_retries (Optional[int]): Number of upload retries. - job_id (str): (Optional) Name of the job. - job_id_prefix (str): - (Optional) the user-provided prefix for a randomly generated - job ID. This parameter will be ignored if a ``job_id`` is - also given. - location (str): + job_id (Optional[str]): Name of the job. + job_id_prefix (Optional[str]): + The user-provided prefix for a randomly generated job ID. + This parameter will be ignored if a ``job_id`` is also given. + location (Optional[str]): Location where to run the job. Must match the location of the destination table. - project (str): + project (Optional[str]): Project ID of the project of where to run the job. Defaults to the client's project. - job_config (google.cloud.bigquery.job.LoadJobConfig): - (Optional) Extra configuration options for the job. The - ``source_format`` setting is always set to + job_config (Optional[google.cloud.bigquery.job.LoadJobConfig]): + Extra configuration options for the job. The ``source_format`` + setting is always set to :attr:`~google.cloud.bigquery.job.SourceFormat.NEWLINE_DELIMITED_JSON`. Returns: @@ -2223,21 +2210,20 @@ def copy_table( Table into which data is to be copied. Keyword Arguments: - job_id (str): (Optional) The ID of the job. - job_id_prefix (str) - (Optional) the user-provided prefix for a randomly generated - job ID. This parameter will be ignored if a ``job_id`` is - also given. - location (str): + job_id (Optional[str]): The ID of the job. + job_id_prefix (Optional[str]): + The user-provided prefix for a randomly generated job ID. + This parameter will be ignored if a ``job_id`` is also given. + location (Optional[str]): Location where to run the job. Must match the location of any source table as well as the destination table. - project (str): + project (Optional[str]): Project ID of the project of where to run the job. Defaults to the client's project. - job_config (google.cloud.bigquery.job.CopyJobConfig): - (Optional) Extra configuration options for the job. - retry (google.api_core.retry.Retry): - (Optional) How to retry the RPC. + job_config (Optional[google.cloud.bigquery.job.CopyJobConfig]): + Extra configuration options for the job. + retry (Optional[google.api_core.retry.Retry]): + How to retry the RPC. timeout (Optional[float]): The number of seconds to wait for the underlying HTTP transport before using ``retry``. @@ -2321,27 +2307,25 @@ def extract_table( ``gs:///``. Keyword Arguments: - job_id (str): (Optional) The ID of the job. - job_id_prefix (str) - (Optional) the user-provided prefix for a randomly generated - job ID. This parameter will be ignored if a ``job_id`` is - also given. - location (str): + job_id (Optional[str]): The ID of the job. + job_id_prefix (Optional[str]): + The user-provided prefix for a randomly generated job ID. + This parameter will be ignored if a ``job_id`` is also given. + location (Optional[str]): Location where to run the job. Must match the location of the source table. - project (str): + project (Optional[str]): Project ID of the project of where to run the job. Defaults to the client's project. - job_config (google.cloud.bigquery.job.ExtractJobConfig): - (Optional) Extra configuration options for the job. - retry (google.api_core.retry.Retry): - (Optional) How to retry the RPC. + job_config (Optional[google.cloud.bigquery.job.ExtractJobConfig]): + Extra configuration options for the job. + retry (Optional[google.api_core.retry.Retry]): + How to retry the RPC. timeout (Optional[float]): The number of seconds to wait for the underlying HTTP transport before using ``retry``. - source_type (str): - (Optional) Type of source to be extracted.``Table`` or ``Model``. - Defaults to ``Table``. + source_type (Optional[str]): + Type of source to be extracted.``Table`` or ``Model``. Defaults to ``Table``. Returns: google.cloud.bigquery.job.ExtractJob: A new extract job instance. @@ -2411,24 +2395,24 @@ def query( dialect. Use the ``job_config`` parameter to change dialects. Keyword Arguments: - job_config (google.cloud.bigquery.job.QueryJobConfig): - (Optional) Extra configuration options for the job. + job_config (Optional[google.cloud.bigquery.job.QueryJobConfig]): + Extra configuration options for the job. To override any options that were previously set in the ``default_query_job_config`` given to the ``Client`` constructor, manually set those options to ``None``, or whatever value is preferred. - job_id (str): (Optional) ID to use for the query job. - job_id_prefix (str): - (Optional) The prefix to use for a randomly generated job ID. - This parameter will be ignored if a ``job_id`` is also given. - location (str): + job_id (Optional[str]): ID to use for the query job. + job_id_prefix (Optional[str]): + The prefix to use for a randomly generated job ID. This parameter + will be ignored if a ``job_id`` is also given. + location (Optional[str]): Location where to run the job. Must match the location of the any table used in the query as well as the destination table. - project (str): + project (Optional[str]): Project ID of the project of where to run the job. Defaults to the client's project. - retry (google.api_core.retry.Retry): - (Optional) How to retry the RPC. + retry (Optional[google.api_core.retry.Retry]): + How to retry the RPC. timeout (Optional[float]): The number of seconds to wait for the underlying HTTP transport before using ``retry``. @@ -2699,8 +2683,8 @@ def list_partitions(self, table, retry=DEFAULT_RETRY, timeout=None): str, \ ]): The table or reference from which to get partition info - retry (google.api_core.retry.Retry): - (Optional) How to retry the RPC. + retry (Optional[google.api_core.retry.Retry]): + How to retry the RPC. timeout (Optional[float]): The number of seconds to wait for the underlying HTTP transport before using ``retry``. @@ -2766,23 +2750,22 @@ def list_rows( selected_fields (Sequence[google.cloud.bigquery.schema.SchemaField]): The fields to return. If not supplied, data for all columns are downloaded. - max_results (int): - (Optional) maximum number of rows to return. - page_token (str): - (Optional) Token representing a cursor into the table's rows. + max_results (Optional[int]): + Maximum number of rows to return. + page_token (Optional[str]): + Token representing a cursor into the table's rows. If not passed, the API will return the first page of the rows. The token marks the beginning of the iterator to be returned and the value of the ``page_token`` can be accessed at ``next_page_token`` of the :class:`~google.cloud.bigquery.table.RowIterator`. - start_index (int): - (Optional) The zero-based index of the starting row to read. - page_size (int): - Optional. The maximum number of rows in each page of results - from this request. Non-positive values are ignored. Defaults - to a sensible value set by the API. - retry (google.api_core.retry.Retry): - (Optional) How to retry the RPC. + start_index (Optional[int]): + The zero-based index of the starting row to read. + page_size (Optional[int]): + The maximum number of rows in each page of results from this request. + Non-positive values are ignored. Defaults to a sensible value set by the API. + retry (Optional[google.api_core.retry.Retry]): + How to retry the RPC. timeout (Optional[float]): The number of seconds to wait for the underlying HTTP transport before using ``retry``. diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/dataset.py b/packages/google-cloud-bigquery/google/cloud/bigquery/dataset.py index c804c1c172ef..9a80f30b5fa7 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/dataset.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/dataset.py @@ -295,9 +295,9 @@ def from_string(cls, dataset_id, default_project=None): A dataset ID in standard SQL format. If ``default_project`` is not specified, this must include both the project ID and the dataset ID, separated by ``.``. - default_project (str): - Optional. The project ID to use when ``dataset_id`` does not - include a project ID. + default_project (Optional[str]): + The project ID to use when ``dataset_id`` does not include a + project ID. Returns: DatasetReference: diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/job.py b/packages/google-cloud-bigquery/google/cloud/bigquery/job.py index 930dc413df3e..753307b2a7f4 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/job.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/job.py @@ -267,7 +267,7 @@ class SchemaUpdateOption(object): class _JobReference(object): """A reference to a job. - Arguments: + Args: job_id (str): ID of the job to run. project (str): ID of the project where the job runs. location (str): Location of where the job runs. @@ -311,7 +311,7 @@ def _from_api_repr(cls, resource): class _AsyncJob(google.api_core.future.polling.PollingFuture): """Base class for asynchronous jobs. - Arguments: + Args: job_id (Union[str, _JobReference]): Job's ID in the project associated with the client or a fully-qualified job reference. @@ -650,7 +650,7 @@ def exists(self, client=None, retry=DEFAULT_RETRY, timeout=None): the client to use. If not passed, falls back to the ``client`` stored on the current dataset. - retry (google.api_core.retry.Retry): (Optional) How to retry the RPC. + retry (Optional[google.api_core.retry.Retry]): How to retry the RPC. timeout (Optional[float]): The number of seconds to wait for the underlying HTTP transport before using ``retry``. @@ -688,7 +688,7 @@ def reload(self, client=None, retry=DEFAULT_RETRY, timeout=None): the client to use. If not passed, falls back to the ``client`` stored on the current dataset. - retry (google.api_core.retry.Retry): (Optional) How to retry the RPC. + retry (Optional[google.api_core.retry.Retry]): How to retry the RPC. timeout (Optional[float]): The number of seconds to wait for the underlying HTTP transport before using ``retry``. @@ -772,7 +772,7 @@ def done(self, retry=DEFAULT_RETRY, timeout=None): """Refresh the job and checks if it is complete. Args: - retry (google.api_core.retry.Retry): (Optional) How to retry the RPC. + retry (Optional[google.api_core.retry.Retry]): How to retry the RPC. timeout (Optional[float]): The number of seconds to wait for the underlying HTTP transport before using ``retry``. @@ -790,7 +790,7 @@ def result(self, retry=DEFAULT_RETRY, timeout=None): """Start the job and wait for it to complete and get the result. Args: - retry (google.api_core.retry.Retry): (Optional) How to retry the RPC. + retry (Optional[google.api_core.retry.Retry]): How to retry the RPC. timeout (Optional[float]): The number of seconds to wait for the underlying HTTP transport before using ``retry``. @@ -877,8 +877,8 @@ def _get_sub_prop(self, key, default=None): key (str): Key for the value to get in the ``self._properties[self._job_type]`` dictionary. - default (object): - (Optional) Default value to return if the key is not found. + default (Optional[object]): + Default value to return if the key is not found. Defaults to :data:`None`. Returns: @@ -1003,9 +1003,9 @@ def __init__(self, **kwargs): @property def allow_jagged_rows(self): - """bool: Allow missing trailing optional columns (CSV only). + """Optional[bool]: Allow missing trailing optional columns (CSV only). - See + See: https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationLoad.FIELDS.allow_jagged_rows """ return self._get_sub_prop("allowJaggedRows") @@ -1016,9 +1016,9 @@ def allow_jagged_rows(self, value): @property def allow_quoted_newlines(self): - """bool: Allow quoted data containing newline characters (CSV only). + """Optional[bool]: Allow quoted data containing newline characters (CSV only). - See + See: https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationLoad.FIELDS.allow_quoted_newlines """ return self._get_sub_prop("allowQuotedNewlines") @@ -1029,9 +1029,9 @@ def allow_quoted_newlines(self, value): @property def autodetect(self): - """bool: Automatically infer the schema from a sample of the data. + """Optional[bool]: Automatically infer the schema from a sample of the data. - See + See: https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationLoad.FIELDS.autodetect """ return self._get_sub_prop("autodetect") @@ -1042,7 +1042,7 @@ def autodetect(self, value): @property def clustering_fields(self): - """Union[List[str], None]: Fields defining clustering for the table + """Optional[List[str]]: Fields defining clustering for the table (Defaults to :data:`None`). @@ -1059,7 +1059,7 @@ def clustering_fields(self): @clustering_fields.setter def clustering_fields(self, value): - """Union[List[str], None]: Fields defining clustering for the table + """Optional[List[str]]: Fields defining clustering for the table (Defaults to :data:`None`). """ @@ -1070,10 +1070,10 @@ def clustering_fields(self, value): @property def create_disposition(self): - """google.cloud.bigquery.job.CreateDisposition: Specifies behavior + """Optional[google.cloud.bigquery.job.CreateDisposition]: Specifies behavior for creating tables. - See + See: https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationLoad.FIELDS.create_disposition """ return self._get_sub_prop("createDisposition") @@ -1084,13 +1084,13 @@ def create_disposition(self, value): @property def destination_encryption_configuration(self): - """google.cloud.bigquery.encryption_configuration.EncryptionConfiguration: Custom + """Optional[google.cloud.bigquery.encryption_configuration.EncryptionConfiguration]: Custom encryption configuration for the destination table. Custom encryption configuration (e.g., Cloud KMS keys) or :data:`None` if using default encryption. - See + See: https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationLoad.FIELDS.destination_encryption_configuration """ prop = self._get_sub_prop("destinationEncryptionConfiguration") @@ -1109,7 +1109,7 @@ def destination_encryption_configuration(self, value): @property def destination_table_description(self): - """Union[str, None] name given to destination table. + """Optional[str]: Name given to destination table. See: https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#DestinationTableProperties.FIELDS.description @@ -1128,7 +1128,7 @@ def destination_table_description(self, value): @property def destination_table_friendly_name(self): - """Union[str, None] name given to destination table. + """Optional[str]: Name given to destination table. See: https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#DestinationTableProperties.FIELDS.friendly_name @@ -1147,10 +1147,10 @@ def destination_table_friendly_name(self, value): @property def encoding(self): - """google.cloud.bigquery.job.Encoding: The character encoding of the + """Optional[google.cloud.bigquery.job.Encoding]: The character encoding of the data. - See + See: https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationLoad.FIELDS.encoding """ return self._get_sub_prop("encoding") @@ -1161,9 +1161,9 @@ def encoding(self, value): @property def field_delimiter(self): - """str: The separator for fields in a CSV file. + """Optional[str]: The separator for fields in a CSV file. - See + See: https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationLoad.FIELDS.field_delimiter """ return self._get_sub_prop("fieldDelimiter") @@ -1181,7 +1181,7 @@ def hive_partitioning(self): **Experimental**. This feature is experimental and might change or have limited support. - See + See: https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationLoad.FIELDS.hive_partitioning_options """ prop = self._get_sub_prop("hivePartitioningOptions") @@ -1201,9 +1201,9 @@ def hive_partitioning(self, value): @property def ignore_unknown_values(self): - """bool: Ignore extra values not represented in the table schema. + """Optional[bool]: Ignore extra values not represented in the table schema. - See + See: https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationLoad.FIELDS.ignore_unknown_values """ return self._get_sub_prop("ignoreUnknownValues") @@ -1214,9 +1214,9 @@ def ignore_unknown_values(self, value): @property def max_bad_records(self): - """int: Number of invalid rows to ignore. + """Optional[int]: Number of invalid rows to ignore. - See + See: https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationLoad.FIELDS.max_bad_records """ return _helpers._int_or_none(self._get_sub_prop("maxBadRecords")) @@ -1227,9 +1227,9 @@ def max_bad_records(self, value): @property def null_marker(self): - """str: Represents a null value (CSV only). + """Optional[str]: Represents a null value (CSV only). - See + See: https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationLoad.FIELDS.null_marker """ return self._get_sub_prop("nullMarker") @@ -1240,9 +1240,9 @@ def null_marker(self, value): @property def quote_character(self): - """str: Character used to quote data sections (CSV only). + """Optional[str]: Character used to quote data sections (CSV only). - See + See: https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationLoad.FIELDS.quote """ return self._get_sub_prop("quote") @@ -1287,12 +1287,12 @@ def range_partitioning(self, value): @property def schema(self): - """Sequence[Union[ \ + """Optional[Sequence[Union[ \ :class:`~google.cloud.bigquery.schema.SchemaField`, \ Mapping[str, Any] \ - ]]: Schema of the destination table. + ]]]: Schema of the destination table. - See + See: https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationLoad.FIELDS.schema """ schema = _helpers._get_sub_prop(self._properties, ["load", "schema", "fields"]) @@ -1316,7 +1316,7 @@ def schema(self, value): @property def schema_update_options(self): - """List[google.cloud.bigquery.job.SchemaUpdateOption]: Specifies + """Optional[List[google.cloud.bigquery.job.SchemaUpdateOption]]: Specifies updates to the destination table schema to allow as a side effect of the load job. """ @@ -1328,9 +1328,9 @@ def schema_update_options(self, values): @property def skip_leading_rows(self): - """int: Number of rows to skip when reading data (CSV only). + """Optional[int]: Number of rows to skip when reading data (CSV only). - See + See: https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationLoad.FIELDS.skip_leading_rows """ return _helpers._int_or_none(self._get_sub_prop("skipLeadingRows")) @@ -1341,9 +1341,9 @@ def skip_leading_rows(self, value): @property def source_format(self): - """google.cloud.bigquery.job.SourceFormat: File format of the data. + """Optional[google.cloud.bigquery.job.SourceFormat]: File format of the data. - See + See: https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationLoad.FIELDS.source_format """ return self._get_sub_prop("sourceFormat") @@ -1354,7 +1354,7 @@ def source_format(self, value): @property def time_partitioning(self): - """google.cloud.bigquery.table.TimePartitioning: Specifies time-based + """Optional[google.cloud.bigquery.table.TimePartitioning]: Specifies time-based partitioning for the destination table. Only specify at most one of @@ -1377,7 +1377,7 @@ def time_partitioning(self, value): @property def use_avro_logical_types(self): - """bool: For loads of Avro data, governs whether Avro logical types are + """Optional[bool]: For loads of Avro data, governs whether Avro logical types are converted to their corresponding BigQuery types (e.g. TIMESTAMP) rather than raw types (e.g. INTEGER). """ @@ -1389,10 +1389,10 @@ def use_avro_logical_types(self, value): @property def write_disposition(self): - """google.cloud.bigquery.job.WriteDisposition: Action that occurs if + """Optional[google.cloud.bigquery.job.WriteDisposition]: Action that occurs if the destination table already exists. - See + See: https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationLoad.FIELDS.write_disposition """ return self._get_sub_prop("writeDisposition") @@ -1792,8 +1792,8 @@ class CopyJob(_AsyncJob): A client which holds credentials and project configuration for the dataset (which requires a project). - job_config (google.cloud.bigquery.job.CopyJobConfig): - (Optional) Extra configuration options for the copy job. + job_config (Optional[google.cloud.bigquery.job.CopyJobConfig]): + Extra configuration options for the copy job. """ _JOB_TYPE = "copy" @@ -2003,8 +2003,8 @@ class ExtractJob(_AsyncJob): client (google.cloud.bigquery.client.Client): A client which holds credentials and project configuration. - job_config (google.cloud.bigquery.job.ExtractJobConfig): - (Optional) Extra configuration options for the extract job. + job_config (Optional[google.cloud.bigquery.job.ExtractJobConfig]): + Extra configuration options for the extract job. """ _JOB_TYPE = "extract" @@ -2593,8 +2593,8 @@ class QueryJob(_AsyncJob): A client which holds credentials and project configuration for the dataset (which requires a project). - job_config (google.cloud.bigquery.job.QueryJobConfig): - (Optional) Extra configuration options for the query job. + job_config (Optional[google.cloud.bigquery.job.QueryJobConfig]): + Extra configuration options for the query job. """ _JOB_TYPE = "query" @@ -3274,19 +3274,19 @@ def to_arrow( ``'tqdm_gui'`` Use the :func:`tqdm.tqdm_gui` function to display a progress bar as a graphical dialog box. - bqstorage_client (google.cloud.bigquery_storage_v1.BigQueryReadClient): - Optional. A BigQuery Storage API client. If supplied, use the - faster BigQuery Storage API to fetch rows from BigQuery. - This API is a billable API. + bqstorage_client (Optional[google.cloud.bigquery_storage_v1.BigQueryReadClient]): + A BigQuery Storage API client. If supplied, use the faster + BigQuery Storage API to fetch rows from BigQuery. This API + is a billable API. This method requires the ``pyarrow`` and ``google-cloud-bigquery-storage`` libraries. Reading from a specific partition or snapshot is not currently supported by this method. - create_bqstorage_client (bool): - Optional. If ``True`` (default), create a BigQuery Storage API - client using the default API settings. The BigQuery Storage API + create_bqstorage_client (Optional[bool]): + If ``True`` (default), create a BigQuery Storage API client + using the default API settings. The BigQuery Storage API is a faster way to fetch rows from BigQuery. See the ``bqstorage_client`` parameter for more information. @@ -3325,9 +3325,9 @@ def to_dataframe( """Return a pandas DataFrame from a QueryJob Args: - bqstorage_client (google.cloud.bigquery_storage_v1.BigQueryReadClient): - Optional. A BigQuery Storage API client. If supplied, use the - faster BigQuery Storage API to fetch rows from BigQuery. This + bqstorage_client (Optional[google.cloud.bigquery_storage_v1.BigQueryReadClient]): + A BigQuery Storage API client. If supplied, use the faster + BigQuery Storage API to fetch rows from BigQuery. This API is a billable API. This method requires the ``fastavro`` and @@ -3336,11 +3336,11 @@ def to_dataframe( Reading from a specific partition or snapshot is not currently supported by this method. - dtypes (Map[str, Union[str, pandas.Series.dtype]]): - Optional. A dictionary of column names pandas ``dtype``s. The - provided ``dtype`` is used when constructing the series for - the column specified. Otherwise, the default pandas behavior - is used. + dtypes (Optional[Map[str, Union[str, pandas.Series.dtype]]]): + A dictionary of column names pandas ``dtype``s. The provided + ``dtype`` is used when constructing the series for the column + specified. Otherwise, the default pandas behavior is used. + progress_bar_type (Optional[str]): If set, use the `tqdm `_ library to display a progress bar while the data downloads. Install the diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/magics.py b/packages/google-cloud-bigquery/google/cloud/bigquery/magics.py index 40dda3d13bc1..7128e32bfea1 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/magics.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/magics.py @@ -26,31 +26,31 @@ Parameters: - * ```` (optional, line argument): + * ```` (Optional[line argument]): variable to store the query results. The results are not displayed if this parameter is used. If an error occurs during the query execution, the corresponding ``QueryJob`` instance (if available) is stored in the variable instead. - * ``--destination_table`` (optional, line argument): + * ``--destination_table`` (Optional[line argument]): A dataset and table to store the query results. If table does not exists, it will be created. If table already exists, its data will be overwritten. Variable should be in a format .. - * ``--project `` (optional, line argument): + * ``--project `` (Optional[line argument]): Project to use for running the query. Defaults to the context :attr:`~google.cloud.bigquery.magics.Context.project`. - * ``--use_bqstorage_api`` (optional, line argument): + * ``--use_bqstorage_api`` (Optional[line argument]): [Deprecated] Not used anymore, as BigQuery Storage API is used by default. - * ``--use_rest_api`` (optional, line argument): + * ``--use_rest_api`` (Optional[line argument]): Use the BigQuery REST API instead of the Storage API. - * ``--use_legacy_sql`` (optional, line argument): + * ``--use_legacy_sql`` (Optional[line argument]): Runs the query using Legacy SQL syntax. Defaults to Standard SQL if this argument not used. - * ``--verbose`` (optional, line argument): + * ``--verbose`` (Optional[line argument]): If this flag is used, information including the query job ID and the amount of time for the query to complete will not be cleared after the query is finished. By default, this information will be displayed but will be cleared after the query is finished. - * ``--params `` (optional, line argument): + * ``--params `` (Optional[line argument]): If present, the argument following the ``--params`` flag must be either: @@ -304,7 +304,7 @@ def _run_query(client, query, job_config=None): query (str): SQL query to be executed. Defaults to the standard SQL dialect. Use the ``job_config`` parameter to change dialects. - job_config (google.cloud.bigquery.job.QueryJobConfig, optional): + job_config (Optional[google.cloud.bigquery.job.QueryJobConfig]): Extra configuration options for the job. Returns: diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/model.py b/packages/google-cloud-bigquery/google/cloud/bigquery/model.py index eb459f57a638..d3fe8a9379af 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/model.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/model.py @@ -228,7 +228,7 @@ def description(self, value): @property def friendly_name(self): - """Union[str, None]: Title of the table (defaults to :data:`None`). + """Optional[str]: Title of the table (defaults to :data:`None`). Raises: ValueError: For invalid value types. @@ -241,7 +241,7 @@ def friendly_name(self, value): @property def labels(self): - """Dict[str, str]: Labels for the table. + """Optional[Dict[str, str]]: Labels for the table. This method always returns a dict. To change a model's labels, modify the dict, then call ``Client.update_model``. To delete a @@ -257,7 +257,7 @@ def labels(self, value): @property def encryption_configuration(self): - """google.cloud.bigquery.encryption_configuration.EncryptionConfiguration: Custom + """Optional[google.cloud.bigquery.encryption_configuration.EncryptionConfiguration]: Custom encryption configuration for the model. Custom encryption configuration (e.g., Cloud KMS keys) or :data:`None` @@ -383,9 +383,9 @@ def from_string(cls, model_id, default_project=None): A model ID in standard SQL format. If ``default_project`` is not specified, this must included a project ID, dataset ID, and model ID, each separated by ``.``. - default_project (str): - Optional. The project ID to use when ``model_id`` does not - include a project ID. + default_project (Optional[str]): + The project ID to use when ``model_id`` does not include + a project ID. Returns: google.cloud.bigquery.model.ModelReference: diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/query.py b/packages/google-cloud-bigquery/google/cloud/bigquery/query.py index 925f3e29d298..0f4c806864ba 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/query.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/query.py @@ -27,11 +27,11 @@ class UDFResource(object): """Describe a single user-defined function (UDF) resource. Args: - udf_type (str): the type of the resource ('inlineCode' or 'resourceUri') + udf_type (str): The type of the resource ('inlineCode' or 'resourceUri') - value (str): the inline code or resource URI. + value (str): The inline code or resource URI. - See + See: https://cloud.google.com/bigquery/user-defined-functions#api """ @@ -82,12 +82,12 @@ class ScalarQueryParameter(_AbstractQueryParameter): parameter can only be addressed via position (``?``). type_ (str): - name of parameter type. One of 'STRING', 'INT64', + Name of parameter type. One of 'STRING', 'INT64', 'FLOAT64', 'NUMERIC', 'BOOL', 'TIMESTAMP', 'DATETIME', or 'DATE'. value (Union[str, int, float, decimal.Decimal, bool, - datetime.datetime, datetime.date]): the scalar parameter value. + datetime.datetime, datetime.date]): The scalar parameter value. """ def __init__(self, name, type_, value): @@ -101,16 +101,16 @@ def positional(cls, type_, value): Args: type_ (str): - name of parameter type. One of 'STRING', 'INT64', + Name of parameter type. One of 'STRING', 'INT64', 'FLOAT64', 'NUMERIC', 'BOOL', 'TIMESTAMP', 'DATETIME', or 'DATE'. value (Union[str, int, float, decimal.Decimal, bool, datetime.datetime, - datetime.date]): the scalar parameter value. + datetime.date]): The scalar parameter value. Returns: - google.cloud.bigquery.query.ScalarQueryParameter: instance without name + google.cloud.bigquery.query.ScalarQueryParameter: Instance without name """ return cls(None, type_, value) @@ -122,7 +122,7 @@ def from_api_repr(cls, resource): resource (Dict): JSON mapping of parameter Returns: - google.cloud.bigquery.query.ScalarQueryParameter: instance + google.cloud.bigquery.query.ScalarQueryParameter: Instance """ name = resource.get("name") type_ = resource["parameterType"]["type"] @@ -186,10 +186,10 @@ class ArrayQueryParameter(_AbstractQueryParameter): parameter can only be addressed via position (``?``). array_type (str): - name of type of array elements. One of `'STRING'`, `'INT64'`, + Name of type of array elements. One of `'STRING'`, `'INT64'`, `'FLOAT64'`, `'NUMERIC'`, `'BOOL'`, `'TIMESTAMP'`, or `'DATE'`. - values (List[appropriate scalar type]): the parameter array values. + values (List[appropriate scalar type]): The parameter array values. """ def __init__(self, name, array_type, values): @@ -203,13 +203,13 @@ def positional(cls, array_type, values): Args: array_type (str): - name of type of array elements. One of `'STRING'`, `'INT64'`, + Name of type of array elements. One of `'STRING'`, `'INT64'`, `'FLOAT64'`, `'NUMERIC'`, `'BOOL'`, `'TIMESTAMP'`, or `'DATE'`. - values (List[appropriate scalar type]): the parameter array values. + values (List[appropriate scalar type]): The parameter array values. Returns: - google.cloud.bigquery.query.ArrayQueryParameter: instance without name + google.cloud.bigquery.query.ArrayQueryParameter: Instance without name """ return cls(None, array_type, values) @@ -250,7 +250,7 @@ def from_api_repr(cls, resource): resource (Dict): JSON mapping of parameter Returns: - google.cloud.bigquery.query.ArrayQueryParameter: instance + google.cloud.bigquery.query.ArrayQueryParameter: Instance """ array_type = resource["parameterType"]["arrayType"]["type"] if array_type == "STRUCT": @@ -316,7 +316,7 @@ class StructQueryParameter(_AbstractQueryParameter): google.cloud.bigquery.query.ScalarQueryParameter, google.cloud.bigquery.query.ArrayQueryParameter, google.cloud.bigquery.query.StructQueryParameter - ]]): the sub-parameters for the struct + ]]): The sub-parameters for the struct """ def __init__(self, name, *sub_params): @@ -343,10 +343,10 @@ def positional(cls, *sub_params): google.cloud.bigquery.query.ScalarQueryParameter, google.cloud.bigquery.query.ArrayQueryParameter, google.cloud.bigquery.query.StructQueryParameter - ]]): the sub-parameters for the struct + ]]): The sub-parameters for the struct Returns: - google.cloud.bigquery.query.StructQueryParameter: instance without name + google.cloud.bigquery.query.StructQueryParameter: Instance without name """ return cls(None, *sub_params) @@ -358,7 +358,7 @@ def from_api_repr(cls, resource): resource (Dict): JSON mapping of parameter Returns: - google.cloud.bigquery.query.StructQueryParameter: instance + google.cloud.bigquery.query.StructQueryParameter: Instance """ name = resource.get("name") instance = cls(name) @@ -473,7 +473,7 @@ def project(self): def cache_hit(self): """Query results served from cache. - See + See: https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs/query#body.QueryResponse.FIELDS.cache_hit Returns: @@ -487,7 +487,7 @@ def cache_hit(self): def complete(self): """Server completed query. - See + See: https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs/query#body.QueryResponse.FIELDS.job_complete Returns: @@ -501,7 +501,7 @@ def complete(self): def errors(self): """Errors generated by the query. - See + See: https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs/query#body.QueryResponse.FIELDS.errors Returns: @@ -515,7 +515,7 @@ def errors(self): def job_id(self): """Job ID of the query job these results are from. - See + See: https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs/query#body.QueryResponse.FIELDS.job_reference Returns: @@ -527,7 +527,7 @@ def job_id(self): def page_token(self): """Token for fetching next bach of results. - See + See: https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs/query#body.QueryResponse.FIELDS.page_token Returns: @@ -539,7 +539,7 @@ def page_token(self): def total_rows(self): """Total number of rows returned by the query. - See + See: https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs/query#body.QueryResponse.FIELDS.total_rows Returns: @@ -553,7 +553,7 @@ def total_rows(self): def total_bytes_processed(self): """Total number of bytes processed by the query. - See + See: https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs/query#body.QueryResponse.FIELDS.total_bytes_processed Returns: @@ -567,7 +567,7 @@ def total_bytes_processed(self): def num_dml_affected_rows(self): """Total number of rows affected by a DML query. - See + See: https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs/query#body.QueryResponse.FIELDS.num_dml_affected_rows Returns: @@ -581,7 +581,7 @@ def num_dml_affected_rows(self): def rows(self): """Query results. - See + See: https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs/query#body.QueryResponse.FIELDS.rows Returns: @@ -594,7 +594,7 @@ def rows(self): def schema(self): """Schema for query results. - See + See: https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs/query#body.QueryResponse.FIELDS.schema Returns: @@ -607,7 +607,7 @@ def _set_properties(self, api_response): """Update properties from resource in body of ``api_response`` Args: - api_response (Dict): response returned from an API call + api_response (Dict): Response returned from an API call """ job_id_present = ( "jobReference" in api_response @@ -622,7 +622,7 @@ def _set_properties(self, api_response): def _query_param_from_api_repr(resource): - """Helper: construct concrete query parameter from JSON resource.""" + """Helper: Construct concrete query parameter from JSON resource.""" qp_type = resource["parameterType"] if "arrayType" in qp_type: klass = ArrayQueryParameter diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/routine.py b/packages/google-cloud-bigquery/google/cloud/bigquery/routine.py index e99d9c6fa162..03423c01b71a 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/routine.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/routine.py @@ -288,7 +288,7 @@ def __repr__(self): class RoutineArgument(object): """Input/output argument of a function or a stored procedure. - See + See: https://cloud.google.com/bigquery/docs/reference/rest/v2/routines#argument Args: @@ -411,7 +411,7 @@ def __repr__(self): class RoutineReference(object): """A pointer to a routine. - See + See: https://cloud.google.com/bigquery/docs/reference/rest/v2/routines#routinereference """ @@ -467,8 +467,8 @@ def from_string(cls, routine_id, default_project=None): A routine ID in standard SQL format. If ``default_project`` is not specified, this must included a project ID, dataset ID, and routine ID, each separated by ``.``. - default_project (str): - Optional. The project ID to use when ``routine_id`` does not + default_project (Optional[str]): + The project ID to use when ``routine_id`` does not include a project ID. Returns: diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/schema.py b/packages/google-cloud-bigquery/google/cloud/bigquery/schema.py index 0eaf1201b594..c1b2588be811 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/schema.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/schema.py @@ -50,18 +50,18 @@ class SchemaField(object): """Describe a single field within a table schema. Args: - name (str): the name of the field. + name (str): The name of the field. - field_type (str): the type of the field. See + field_type (str): The type of the field. See https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#TableFieldSchema.FIELDS.type - mode (str): the mode of the field. See + mode (Optional[str]): The mode of the field. See https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#TableFieldSchema.FIELDS.mode - description (Optional[str]): description for the field. + description (Optional[str]): Description for the field. - fields (Tuple[google.cloud.bigquery.schema.SchemaField]): - subfields (requires ``field_type`` of 'RECORD'). + fields (Optional[Tuple[google.cloud.bigquery.schema.SchemaField]]): + Subfields (requires ``field_type`` of 'RECORD'). policy_tags (Optional[PolicyTagList]): The policy tag list for the field. @@ -125,7 +125,7 @@ def field_type(self): @property def mode(self): - """str: The mode of the field. + """Optional[str]: The mode of the field. See: https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#TableFieldSchema.FIELDS.mode @@ -144,7 +144,7 @@ def description(self): @property def fields(self): - """tuple: Subfields contained in this field. + """Optional[tuple]: Subfields contained in this field. Must be empty unset if ``field_type`` is not 'RECORD'. """ diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py index 5f557d28a10d..5766f5fbe4b7 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py @@ -166,8 +166,8 @@ def from_string(cls, table_id, default_project=None): A table ID in standard SQL format. If ``default_project`` is not specified, this must included a project ID, dataset ID, and table ID, each separated by ``.``. - default_project (str): - Optional. The project ID to use when ``table_id`` does not + default_project (Optional[str]): + The project ID to use when ``table_id`` does not include a project ID. Returns: @@ -1322,20 +1322,20 @@ class RowIterator(HTTPIterator): :meth:`~google.cloud.bigquery.schema.SchemaField.from_api_repr`. page_token (str): A token identifying a page in a result set to start fetching results from. - max_results (int, optional): The maximum number of results to fetch. - page_size (int, optional): The maximum number of rows in each page + max_results (Optional[int]): The maximum number of results to fetch. + page_size (Optional[int]): The maximum number of rows in each page of results from this request. Non-positive values are ignored. Defaults to a sensible value set by the API. - extra_params (Dict[str, object]): + extra_params (Optional[Dict[str, object]]): Extra query string parameters for the API call. - table (Union[ \ + table (Optional[Union[ \ google.cloud.bigquery.table.Table, \ google.cloud.bigquery.table.TableReference, \ - ]): - Optional. The table which these rows belong to, or a reference to - it. Used to call the BigQuery Storage API to fetch rows. - selected_fields (Sequence[google.cloud.bigquery.schema.SchemaField]): - Optional. A subset of columns to select from this table. + ]]): + The table which these rows belong to, or a reference to it. Used to + call the BigQuery Storage API to fetch rows. + selected_fields (Optional[Sequence[google.cloud.bigquery.schema.SchemaField]]): + A subset of columns to select from this table. """ @@ -1487,10 +1487,9 @@ def to_arrow( ``'tqdm_gui'`` Use the :func:`tqdm.tqdm_gui` function to display a progress bar as a graphical dialog box. - bqstorage_client (google.cloud.bigquery_storage_v1.BigQueryReadClient): - Optional. A BigQuery Storage API client. If supplied, use the - faster BigQuery Storage API to fetch rows from BigQuery. This - API is a billable API. + bqstorage_client (Optional[google.cloud.bigquery_storage_v1.BigQueryReadClient]): + A BigQuery Storage API client. If supplied, use the faster BigQuery + Storage API to fetch rows from BigQuery. This API is a billable API. This method requires the ``pyarrow`` and ``google-cloud-bigquery-storage`` libraries. @@ -1498,11 +1497,11 @@ def to_arrow( This method only exposes a subset of the capabilities of the BigQuery Storage API. For full access to all features (projections, filters, snapshots) use the Storage API directly. - create_bqstorage_client (bool): - Optional. If ``True`` (default), create a BigQuery Storage API - client using the default API settings. The BigQuery Storage API - is a faster way to fetch rows from BigQuery. See the - ``bqstorage_client`` parameter for more information. + create_bqstorage_client (Optional[bool]): + If ``True`` (default), create a BigQuery Storage API client using + the default API settings. The BigQuery Storage API is a faster way + to fetch rows from BigQuery. See the ``bqstorage_client`` parameter + for more information. This argument does nothing if ``bqstorage_client`` is supplied. @@ -1572,9 +1571,9 @@ def to_dataframe_iterable(self, bqstorage_client=None, dtypes=None): """Create an iterable of pandas DataFrames, to process the table as a stream. Args: - bqstorage_client (google.cloud.bigquery_storage_v1.BigQueryReadClient): - Optional. A BigQuery Storage API client. If supplied, use the - faster BigQuery Storage API to fetch rows from BigQuery. + bqstorage_client (Optional[google.cloud.bigquery_storage_v1.BigQueryReadClient]): + A BigQuery Storage API client. If supplied, use the faster + BigQuery Storage API to fetch rows from BigQuery. This method requires the ``pyarrow`` and ``google-cloud-bigquery-storage`` libraries. @@ -1583,11 +1582,10 @@ def to_dataframe_iterable(self, bqstorage_client=None, dtypes=None): BigQuery Storage API. For full access to all features (projections, filters, snapshots) use the Storage API directly. - dtypes (Map[str, Union[str, pandas.Series.dtype]]): - Optional. A dictionary of column names pandas ``dtype``s. The - provided ``dtype`` is used when constructing the series for - the column specified. Otherwise, the default pandas behavior - is used. + dtypes (Optional[Map[str, Union[str, pandas.Series.dtype]]]): + A dictionary of column names pandas ``dtype``s. The provided + ``dtype`` is used when constructing the series for the column + specified. Otherwise, the default pandas behavior is used. Returns: pandas.DataFrame: @@ -1638,9 +1636,9 @@ def to_dataframe( """Create a pandas DataFrame by loading all pages of a query. Args: - bqstorage_client (google.cloud.bigquery_storage_v1.BigQueryReadClient): - Optional. A BigQuery Storage API client. If supplied, use the - faster BigQuery Storage API to fetch rows from BigQuery. + bqstorage_client (Optional[google.cloud.bigquery_storage_v1.BigQueryReadClient]): + A BigQuery Storage API client. If supplied, use the faster + BigQuery Storage API to fetch rows from BigQuery. This method requires the ``pyarrow`` and ``google-cloud-bigquery-storage`` libraries. @@ -1649,11 +1647,10 @@ def to_dataframe( BigQuery Storage API. For full access to all features (projections, filters, snapshots) use the Storage API directly. - dtypes (Map[str, Union[str, pandas.Series.dtype]]): - Optional. A dictionary of column names pandas ``dtype``s. The - provided ``dtype`` is used when constructing the series for - the column specified. Otherwise, the default pandas behavior - is used. + dtypes (Optional[Map[str, Union[str, pandas.Series.dtype]]]): + A dictionary of column names pandas ``dtype``s. The provided + ``dtype`` is used when constructing the series for the column + specified. Otherwise, the default pandas behavior is used. progress_bar_type (Optional[str]): If set, use the `tqdm `_ library to display a progress bar while the data downloads. Install the @@ -1789,7 +1786,7 @@ def to_arrow( """[Beta] Create an empty class:`pyarrow.Table`. Args: - progress_bar_type (Optional[str]): Ignored. Added for compatibility with RowIterator. + progress_bar_type (str): Ignored. Added for compatibility with RowIterator. bqstorage_client (Any): Ignored. Added for compatibility with RowIterator. create_bqstorage_client (bool): Ignored. Added for compatibility with RowIterator. @@ -1983,19 +1980,19 @@ class TimePartitioning(object): """Configures time-based partitioning for a table. Args: - type_ (google.cloud.bigquery.table.TimePartitioningType, optional): + type_ (Optional[google.cloud.bigquery.table.TimePartitioningType]): Specifies the type of time partitioning to perform. Defaults to :attr:`~google.cloud.bigquery.table.TimePartitioningType.DAY`, which is the only currently supported type. - field (str, optional): + field (Optional[str]): If set, the table is partitioned by this field. If not set, the table is partitioned by pseudo column ``_PARTITIONTIME``. The field must be a top-level ``TIMESTAMP`` or ``DATE`` field. Its mode must be ``NULLABLE`` or ``REQUIRED``. - expiration_ms(int, optional): + expiration_ms(Optional[int]): Number of milliseconds for which to keep the storage for a partition. - require_partition_filter (bool, optional): + require_partition_filter (Optional[bool]): DEPRECATED: Use :attr:`~google.cloud.bigquery.table.Table.require_partition_filter`, instead. From e3fdd9b09891cd046256a5731509edd4cfcef7ba Mon Sep 17 00:00:00 2001 From: Yoshi Automation Bot Date: Wed, 1 Jul 2020 14:36:27 -0700 Subject: [PATCH 0789/2016] chore: update protobuf version (#145) --- packages/google-cloud-bigquery/.coveragerc | 16 + packages/google-cloud-bigquery/.flake8 | 18 + .../.github/ISSUE_TEMPLATE/bug_report.md | 3 +- packages/google-cloud-bigquery/.gitignore | 2 + .../.kokoro/publish-docs.sh | 2 - .../google-cloud-bigquery/.kokoro/release.sh | 2 - .../.kokoro/samples/lint/common.cfg | 34 + .../.kokoro/samples/lint/continuous.cfg | 6 + .../.kokoro/samples/lint/periodic.cfg | 6 + .../.kokoro/samples/lint/presubmit.cfg | 6 + .../.kokoro/samples/python3.6/common.cfg | 34 + .../.kokoro/samples/python3.6/continuous.cfg | 7 + .../.kokoro/samples/python3.6/periodic.cfg | 6 + .../.kokoro/samples/python3.6/presubmit.cfg | 6 + .../.kokoro/samples/python3.7/common.cfg | 34 + .../.kokoro/samples/python3.7/continuous.cfg | 6 + .../.kokoro/samples/python3.7/periodic.cfg | 6 + .../.kokoro/samples/python3.7/presubmit.cfg | 6 + .../.kokoro/samples/python3.8/common.cfg | 34 + .../.kokoro/samples/python3.8/continuous.cfg | 6 + .../.kokoro/samples/python3.8/periodic.cfg | 6 + .../.kokoro/samples/python3.8/presubmit.cfg | 6 + .../.kokoro/test-samples.sh | 104 +++ .../google-cloud-bigquery/CONTRIBUTING.rst | 15 +- packages/google-cloud-bigquery/MANIFEST.in | 19 + .../docs/_static/custom.css | 2 +- .../docs/_templates/layout.html | 5 +- packages/google-cloud-bigquery/docs/conf.py | 2 +- .../google/cloud/bigquery_v2/gapic/enums.py | 10 +- .../proto/encryption_config_pb2.py | 28 +- .../proto/encryption_config_pb2_grpc.py | 1 + .../cloud/bigquery_v2/proto/model_pb2.py | 779 +++++++++++------- .../cloud/bigquery_v2/proto/model_pb2_grpc.py | 136 ++- .../bigquery_v2/proto/model_reference_pb2.py | 43 +- .../proto/model_reference_pb2_grpc.py | 1 + .../bigquery_v2/proto/standard_sql_pb2.py | 173 ++-- .../proto/standard_sql_pb2_grpc.py | 1 + .../scripts/decrypt-secrets.sh | 33 + .../scripts/readme-gen/readme_gen.py | 66 ++ .../readme-gen/templates/README.tmpl.rst | 87 ++ .../readme-gen/templates/auth.tmpl.rst | 9 + .../templates/auth_api_key.tmpl.rst | 14 + .../templates/install_deps.tmpl.rst | 29 + .../templates/install_portaudio.tmpl.rst | 35 + packages/google-cloud-bigquery/setup.cfg | 16 + packages/google-cloud-bigquery/synth.metadata | 24 +- packages/google-cloud-bigquery/synth.py | 4 +- .../google-cloud-bigquery/testing/.gitignore | 3 + 48 files changed, 1463 insertions(+), 428 deletions(-) create mode 100644 packages/google-cloud-bigquery/.kokoro/samples/lint/common.cfg create mode 100644 packages/google-cloud-bigquery/.kokoro/samples/lint/continuous.cfg create mode 100644 packages/google-cloud-bigquery/.kokoro/samples/lint/periodic.cfg create mode 100644 packages/google-cloud-bigquery/.kokoro/samples/lint/presubmit.cfg create mode 100644 packages/google-cloud-bigquery/.kokoro/samples/python3.6/common.cfg create mode 100644 packages/google-cloud-bigquery/.kokoro/samples/python3.6/continuous.cfg create mode 100644 packages/google-cloud-bigquery/.kokoro/samples/python3.6/periodic.cfg create mode 100644 packages/google-cloud-bigquery/.kokoro/samples/python3.6/presubmit.cfg create mode 100644 packages/google-cloud-bigquery/.kokoro/samples/python3.7/common.cfg create mode 100644 packages/google-cloud-bigquery/.kokoro/samples/python3.7/continuous.cfg create mode 100644 packages/google-cloud-bigquery/.kokoro/samples/python3.7/periodic.cfg create mode 100644 packages/google-cloud-bigquery/.kokoro/samples/python3.7/presubmit.cfg create mode 100644 packages/google-cloud-bigquery/.kokoro/samples/python3.8/common.cfg create mode 100644 packages/google-cloud-bigquery/.kokoro/samples/python3.8/continuous.cfg create mode 100644 packages/google-cloud-bigquery/.kokoro/samples/python3.8/periodic.cfg create mode 100644 packages/google-cloud-bigquery/.kokoro/samples/python3.8/presubmit.cfg create mode 100755 packages/google-cloud-bigquery/.kokoro/test-samples.sh create mode 100755 packages/google-cloud-bigquery/scripts/decrypt-secrets.sh create mode 100644 packages/google-cloud-bigquery/scripts/readme-gen/readme_gen.py create mode 100644 packages/google-cloud-bigquery/scripts/readme-gen/templates/README.tmpl.rst create mode 100644 packages/google-cloud-bigquery/scripts/readme-gen/templates/auth.tmpl.rst create mode 100644 packages/google-cloud-bigquery/scripts/readme-gen/templates/auth_api_key.tmpl.rst create mode 100644 packages/google-cloud-bigquery/scripts/readme-gen/templates/install_deps.tmpl.rst create mode 100644 packages/google-cloud-bigquery/scripts/readme-gen/templates/install_portaudio.tmpl.rst create mode 100644 packages/google-cloud-bigquery/testing/.gitignore diff --git a/packages/google-cloud-bigquery/.coveragerc b/packages/google-cloud-bigquery/.coveragerc index b178b094aa1d..dd39c8546c41 100644 --- a/packages/google-cloud-bigquery/.coveragerc +++ b/packages/google-cloud-bigquery/.coveragerc @@ -1,3 +1,19 @@ +# -*- coding: utf-8 -*- +# +# Copyright 2020 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + # Generated by synthtool. DO NOT EDIT! [run] branch = True diff --git a/packages/google-cloud-bigquery/.flake8 b/packages/google-cloud-bigquery/.flake8 index 0268ecc9c55c..ed9316381c9c 100644 --- a/packages/google-cloud-bigquery/.flake8 +++ b/packages/google-cloud-bigquery/.flake8 @@ -1,3 +1,19 @@ +# -*- coding: utf-8 -*- +# +# Copyright 2020 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + # Generated by synthtool. DO NOT EDIT! [flake8] ignore = E203, E266, E501, W503 @@ -5,6 +21,8 @@ exclude = # Exclude generated code. **/proto/** **/gapic/** + **/services/** + **/types/** *_pb2.py # Standard linting exemptions. diff --git a/packages/google-cloud-bigquery/.github/ISSUE_TEMPLATE/bug_report.md b/packages/google-cloud-bigquery/.github/ISSUE_TEMPLATE/bug_report.md index 222dc82a48a5..5b5339350a60 100644 --- a/packages/google-cloud-bigquery/.github/ISSUE_TEMPLATE/bug_report.md +++ b/packages/google-cloud-bigquery/.github/ISSUE_TEMPLATE/bug_report.md @@ -11,8 +11,7 @@ Thanks for stopping by to let us know something could be better! Please run down the following list and make sure you've tried the usual "quick fixes": - Search the issues already opened: https://github.com/googleapis/python-bigquery/issues - - Search the issues on our "catch-all" repository: https://github.com/googleapis/google-cloud-python - - Search StackOverflow: http://stackoverflow.com/questions/tagged/google-cloud-platform+python + - Search StackOverflow: https://stackoverflow.com/questions/tagged/google-cloud-platform+python If you are still having issues, please be sure to include as much information as possible: diff --git a/packages/google-cloud-bigquery/.gitignore b/packages/google-cloud-bigquery/.gitignore index 3fb06e09ce74..b87e1ed580d9 100644 --- a/packages/google-cloud-bigquery/.gitignore +++ b/packages/google-cloud-bigquery/.gitignore @@ -10,6 +10,7 @@ dist build eggs +.eggs parts bin var @@ -49,6 +50,7 @@ bigquery/docs/generated # Virtual environment env/ coverage.xml +sponge_log.xml # System test environment variables. system_tests/local_test_setup diff --git a/packages/google-cloud-bigquery/.kokoro/publish-docs.sh b/packages/google-cloud-bigquery/.kokoro/publish-docs.sh index de3549ef89ca..309212789828 100755 --- a/packages/google-cloud-bigquery/.kokoro/publish-docs.sh +++ b/packages/google-cloud-bigquery/.kokoro/publish-docs.sh @@ -13,8 +13,6 @@ # See the License for the specific language governing permissions and # limitations under the License. -#!/bin/bash - set -eo pipefail # Disable buffering, so that the logs stream through. diff --git a/packages/google-cloud-bigquery/.kokoro/release.sh b/packages/google-cloud-bigquery/.kokoro/release.sh index 55233bd89166..0e58f0640fdf 100755 --- a/packages/google-cloud-bigquery/.kokoro/release.sh +++ b/packages/google-cloud-bigquery/.kokoro/release.sh @@ -13,8 +13,6 @@ # See the License for the specific language governing permissions and # limitations under the License. -#!/bin/bash - set -eo pipefail # Start the releasetool reporter diff --git a/packages/google-cloud-bigquery/.kokoro/samples/lint/common.cfg b/packages/google-cloud-bigquery/.kokoro/samples/lint/common.cfg new file mode 100644 index 000000000000..3e41df313979 --- /dev/null +++ b/packages/google-cloud-bigquery/.kokoro/samples/lint/common.cfg @@ -0,0 +1,34 @@ +# Format: //devtools/kokoro/config/proto/build.proto + +# Build logs will be here +action { + define_artifacts { + regex: "**/*sponge_log.xml" + } +} + +# Specify which tests to run +env_vars: { + key: "RUN_TESTS_SESSION" + value: "lint" +} + +env_vars: { + key: "TRAMPOLINE_BUILD_FILE" + value: "github/python-bigquery/.kokoro/test-samples.sh" +} + +# Configure the docker image for kokoro-trampoline. +env_vars: { + key: "TRAMPOLINE_IMAGE" + value: "gcr.io/cloud-devrel-kokoro-resources/python-samples-testing-docker" +} + +# Download secrets for samples +gfile_resources: "/bigstore/cloud-devrel-kokoro-resources/python-docs-samples" + +# Download trampoline resources. +gfile_resources: "/bigstore/cloud-devrel-kokoro-resources/trampoline" + +# Use the trampoline script to run in docker. +build_file: "python-bigquery/.kokoro/trampoline.sh" \ No newline at end of file diff --git a/packages/google-cloud-bigquery/.kokoro/samples/lint/continuous.cfg b/packages/google-cloud-bigquery/.kokoro/samples/lint/continuous.cfg new file mode 100644 index 000000000000..a1c8d9759c88 --- /dev/null +++ b/packages/google-cloud-bigquery/.kokoro/samples/lint/continuous.cfg @@ -0,0 +1,6 @@ +# Format: //devtools/kokoro/config/proto/build.proto + +env_vars: { + key: "INSTALL_LIBRARY_FROM_SOURCE" + value: "True" +} \ No newline at end of file diff --git a/packages/google-cloud-bigquery/.kokoro/samples/lint/periodic.cfg b/packages/google-cloud-bigquery/.kokoro/samples/lint/periodic.cfg new file mode 100644 index 000000000000..50fec9649732 --- /dev/null +++ b/packages/google-cloud-bigquery/.kokoro/samples/lint/periodic.cfg @@ -0,0 +1,6 @@ +# Format: //devtools/kokoro/config/proto/build.proto + +env_vars: { + key: "INSTALL_LIBRARY_FROM_SOURCE" + value: "False" +} \ No newline at end of file diff --git a/packages/google-cloud-bigquery/.kokoro/samples/lint/presubmit.cfg b/packages/google-cloud-bigquery/.kokoro/samples/lint/presubmit.cfg new file mode 100644 index 000000000000..a1c8d9759c88 --- /dev/null +++ b/packages/google-cloud-bigquery/.kokoro/samples/lint/presubmit.cfg @@ -0,0 +1,6 @@ +# Format: //devtools/kokoro/config/proto/build.proto + +env_vars: { + key: "INSTALL_LIBRARY_FROM_SOURCE" + value: "True" +} \ No newline at end of file diff --git a/packages/google-cloud-bigquery/.kokoro/samples/python3.6/common.cfg b/packages/google-cloud-bigquery/.kokoro/samples/python3.6/common.cfg new file mode 100644 index 000000000000..a56768eae259 --- /dev/null +++ b/packages/google-cloud-bigquery/.kokoro/samples/python3.6/common.cfg @@ -0,0 +1,34 @@ +# Format: //devtools/kokoro/config/proto/build.proto + +# Build logs will be here +action { + define_artifacts { + regex: "**/*sponge_log.xml" + } +} + +# Specify which tests to run +env_vars: { + key: "RUN_TESTS_SESSION" + value: "py-3.6" +} + +env_vars: { + key: "TRAMPOLINE_BUILD_FILE" + value: "github/python-bigquery/.kokoro/test-samples.sh" +} + +# Configure the docker image for kokoro-trampoline. +env_vars: { + key: "TRAMPOLINE_IMAGE" + value: "gcr.io/cloud-devrel-kokoro-resources/python-samples-testing-docker" +} + +# Download secrets for samples +gfile_resources: "/bigstore/cloud-devrel-kokoro-resources/python-docs-samples" + +# Download trampoline resources. +gfile_resources: "/bigstore/cloud-devrel-kokoro-resources/trampoline" + +# Use the trampoline script to run in docker. +build_file: "python-bigquery/.kokoro/trampoline.sh" \ No newline at end of file diff --git a/packages/google-cloud-bigquery/.kokoro/samples/python3.6/continuous.cfg b/packages/google-cloud-bigquery/.kokoro/samples/python3.6/continuous.cfg new file mode 100644 index 000000000000..7218af1499e5 --- /dev/null +++ b/packages/google-cloud-bigquery/.kokoro/samples/python3.6/continuous.cfg @@ -0,0 +1,7 @@ +# Format: //devtools/kokoro/config/proto/build.proto + +env_vars: { + key: "INSTALL_LIBRARY_FROM_SOURCE" + value: "True" +} + diff --git a/packages/google-cloud-bigquery/.kokoro/samples/python3.6/periodic.cfg b/packages/google-cloud-bigquery/.kokoro/samples/python3.6/periodic.cfg new file mode 100644 index 000000000000..50fec9649732 --- /dev/null +++ b/packages/google-cloud-bigquery/.kokoro/samples/python3.6/periodic.cfg @@ -0,0 +1,6 @@ +# Format: //devtools/kokoro/config/proto/build.proto + +env_vars: { + key: "INSTALL_LIBRARY_FROM_SOURCE" + value: "False" +} \ No newline at end of file diff --git a/packages/google-cloud-bigquery/.kokoro/samples/python3.6/presubmit.cfg b/packages/google-cloud-bigquery/.kokoro/samples/python3.6/presubmit.cfg new file mode 100644 index 000000000000..a1c8d9759c88 --- /dev/null +++ b/packages/google-cloud-bigquery/.kokoro/samples/python3.6/presubmit.cfg @@ -0,0 +1,6 @@ +# Format: //devtools/kokoro/config/proto/build.proto + +env_vars: { + key: "INSTALL_LIBRARY_FROM_SOURCE" + value: "True" +} \ No newline at end of file diff --git a/packages/google-cloud-bigquery/.kokoro/samples/python3.7/common.cfg b/packages/google-cloud-bigquery/.kokoro/samples/python3.7/common.cfg new file mode 100644 index 000000000000..c93747180b2e --- /dev/null +++ b/packages/google-cloud-bigquery/.kokoro/samples/python3.7/common.cfg @@ -0,0 +1,34 @@ +# Format: //devtools/kokoro/config/proto/build.proto + +# Build logs will be here +action { + define_artifacts { + regex: "**/*sponge_log.xml" + } +} + +# Specify which tests to run +env_vars: { + key: "RUN_TESTS_SESSION" + value: "py-3.7" +} + +env_vars: { + key: "TRAMPOLINE_BUILD_FILE" + value: "github/python-bigquery/.kokoro/test-samples.sh" +} + +# Configure the docker image for kokoro-trampoline. +env_vars: { + key: "TRAMPOLINE_IMAGE" + value: "gcr.io/cloud-devrel-kokoro-resources/python-samples-testing-docker" +} + +# Download secrets for samples +gfile_resources: "/bigstore/cloud-devrel-kokoro-resources/python-docs-samples" + +# Download trampoline resources. +gfile_resources: "/bigstore/cloud-devrel-kokoro-resources/trampoline" + +# Use the trampoline script to run in docker. +build_file: "python-bigquery/.kokoro/trampoline.sh" \ No newline at end of file diff --git a/packages/google-cloud-bigquery/.kokoro/samples/python3.7/continuous.cfg b/packages/google-cloud-bigquery/.kokoro/samples/python3.7/continuous.cfg new file mode 100644 index 000000000000..a1c8d9759c88 --- /dev/null +++ b/packages/google-cloud-bigquery/.kokoro/samples/python3.7/continuous.cfg @@ -0,0 +1,6 @@ +# Format: //devtools/kokoro/config/proto/build.proto + +env_vars: { + key: "INSTALL_LIBRARY_FROM_SOURCE" + value: "True" +} \ No newline at end of file diff --git a/packages/google-cloud-bigquery/.kokoro/samples/python3.7/periodic.cfg b/packages/google-cloud-bigquery/.kokoro/samples/python3.7/periodic.cfg new file mode 100644 index 000000000000..50fec9649732 --- /dev/null +++ b/packages/google-cloud-bigquery/.kokoro/samples/python3.7/periodic.cfg @@ -0,0 +1,6 @@ +# Format: //devtools/kokoro/config/proto/build.proto + +env_vars: { + key: "INSTALL_LIBRARY_FROM_SOURCE" + value: "False" +} \ No newline at end of file diff --git a/packages/google-cloud-bigquery/.kokoro/samples/python3.7/presubmit.cfg b/packages/google-cloud-bigquery/.kokoro/samples/python3.7/presubmit.cfg new file mode 100644 index 000000000000..a1c8d9759c88 --- /dev/null +++ b/packages/google-cloud-bigquery/.kokoro/samples/python3.7/presubmit.cfg @@ -0,0 +1,6 @@ +# Format: //devtools/kokoro/config/proto/build.proto + +env_vars: { + key: "INSTALL_LIBRARY_FROM_SOURCE" + value: "True" +} \ No newline at end of file diff --git a/packages/google-cloud-bigquery/.kokoro/samples/python3.8/common.cfg b/packages/google-cloud-bigquery/.kokoro/samples/python3.8/common.cfg new file mode 100644 index 000000000000..9808f15e32a9 --- /dev/null +++ b/packages/google-cloud-bigquery/.kokoro/samples/python3.8/common.cfg @@ -0,0 +1,34 @@ +# Format: //devtools/kokoro/config/proto/build.proto + +# Build logs will be here +action { + define_artifacts { + regex: "**/*sponge_log.xml" + } +} + +# Specify which tests to run +env_vars: { + key: "RUN_TESTS_SESSION" + value: "py-3.8" +} + +env_vars: { + key: "TRAMPOLINE_BUILD_FILE" + value: "github/python-bigquery/.kokoro/test-samples.sh" +} + +# Configure the docker image for kokoro-trampoline. +env_vars: { + key: "TRAMPOLINE_IMAGE" + value: "gcr.io/cloud-devrel-kokoro-resources/python-samples-testing-docker" +} + +# Download secrets for samples +gfile_resources: "/bigstore/cloud-devrel-kokoro-resources/python-docs-samples" + +# Download trampoline resources. +gfile_resources: "/bigstore/cloud-devrel-kokoro-resources/trampoline" + +# Use the trampoline script to run in docker. +build_file: "python-bigquery/.kokoro/trampoline.sh" \ No newline at end of file diff --git a/packages/google-cloud-bigquery/.kokoro/samples/python3.8/continuous.cfg b/packages/google-cloud-bigquery/.kokoro/samples/python3.8/continuous.cfg new file mode 100644 index 000000000000..a1c8d9759c88 --- /dev/null +++ b/packages/google-cloud-bigquery/.kokoro/samples/python3.8/continuous.cfg @@ -0,0 +1,6 @@ +# Format: //devtools/kokoro/config/proto/build.proto + +env_vars: { + key: "INSTALL_LIBRARY_FROM_SOURCE" + value: "True" +} \ No newline at end of file diff --git a/packages/google-cloud-bigquery/.kokoro/samples/python3.8/periodic.cfg b/packages/google-cloud-bigquery/.kokoro/samples/python3.8/periodic.cfg new file mode 100644 index 000000000000..50fec9649732 --- /dev/null +++ b/packages/google-cloud-bigquery/.kokoro/samples/python3.8/periodic.cfg @@ -0,0 +1,6 @@ +# Format: //devtools/kokoro/config/proto/build.proto + +env_vars: { + key: "INSTALL_LIBRARY_FROM_SOURCE" + value: "False" +} \ No newline at end of file diff --git a/packages/google-cloud-bigquery/.kokoro/samples/python3.8/presubmit.cfg b/packages/google-cloud-bigquery/.kokoro/samples/python3.8/presubmit.cfg new file mode 100644 index 000000000000..a1c8d9759c88 --- /dev/null +++ b/packages/google-cloud-bigquery/.kokoro/samples/python3.8/presubmit.cfg @@ -0,0 +1,6 @@ +# Format: //devtools/kokoro/config/proto/build.proto + +env_vars: { + key: "INSTALL_LIBRARY_FROM_SOURCE" + value: "True" +} \ No newline at end of file diff --git a/packages/google-cloud-bigquery/.kokoro/test-samples.sh b/packages/google-cloud-bigquery/.kokoro/test-samples.sh new file mode 100755 index 000000000000..905732a405e3 --- /dev/null +++ b/packages/google-cloud-bigquery/.kokoro/test-samples.sh @@ -0,0 +1,104 @@ +#!/bin/bash +# Copyright 2020 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +# `-e` enables the script to automatically fail when a command fails +# `-o pipefail` sets the exit code to the rightmost comment to exit with a non-zero +set -eo pipefail +# Enables `**` to include files nested inside sub-folders +shopt -s globstar + +cd github/python-bigquery + +# Run periodic samples tests at latest release +if [[ $KOKORO_BUILD_ARTIFACTS_SUBDIR = *"periodic"* ]]; then + LATEST_RELEASE=$(git describe --abbrev=0 --tags) + git checkout $LATEST_RELEASE +fi + +# Disable buffering, so that the logs stream through. +export PYTHONUNBUFFERED=1 + +# Debug: show build environment +env | grep KOKORO + +# Install nox +python3.6 -m pip install --upgrade --quiet nox + +# Use secrets acessor service account to get secrets +if [[ -f "${KOKORO_GFILE_DIR}/secrets_viewer_service_account.json" ]]; then + gcloud auth activate-service-account \ + --key-file="${KOKORO_GFILE_DIR}/secrets_viewer_service_account.json" \ + --project="cloud-devrel-kokoro-resources" +fi + +# This script will create 3 files: +# - testing/test-env.sh +# - testing/service-account.json +# - testing/client-secrets.json +./scripts/decrypt-secrets.sh + +source ./testing/test-env.sh +export GOOGLE_APPLICATION_CREDENTIALS=$(pwd)/testing/service-account.json + +# For cloud-run session, we activate the service account for gcloud sdk. +gcloud auth activate-service-account \ + --key-file "${GOOGLE_APPLICATION_CREDENTIALS}" + +export GOOGLE_CLIENT_SECRETS=$(pwd)/testing/client-secrets.json + +echo -e "\n******************** TESTING PROJECTS ********************" + +# Switch to 'fail at end' to allow all tests to complete before exiting. +set +e +# Use RTN to return a non-zero value if the test fails. +RTN=0 +ROOT=$(pwd) +# Find all requirements.txt in the samples directory (may break on whitespace). +for file in samples/**/requirements.txt; do + cd "$ROOT" + # Navigate to the project folder. + file=$(dirname "$file") + cd "$file" + + echo "------------------------------------------------------------" + echo "- testing $file" + echo "------------------------------------------------------------" + + # Use nox to execute the tests for the project. + python3.6 -m nox -s "$RUN_TESTS_SESSION" + EXIT=$? + + # If this is a periodic build, send the test log to the Build Cop Bot. + # See https://github.com/googleapis/repo-automation-bots/tree/master/packages/buildcop. + if [[ $KOKORO_BUILD_ARTIFACTS_SUBDIR = *"periodic"* ]]; then + chmod +x $KOKORO_GFILE_DIR/linux_amd64/buildcop + $KOKORO_GFILE_DIR/linux_amd64/buildcop + fi + + if [[ $EXIT -ne 0 ]]; then + RTN=1 + echo -e "\n Testing failed: Nox returned a non-zero exit code. \n" + else + echo -e "\n Testing completed.\n" + fi + +done +cd "$ROOT" + +# Workaround for Kokoro permissions issue: delete secrets +rm testing/{test-env.sh,client-secrets.json,service-account.json} + +exit "$RTN" \ No newline at end of file diff --git a/packages/google-cloud-bigquery/CONTRIBUTING.rst b/packages/google-cloud-bigquery/CONTRIBUTING.rst index c812edbd1d4d..3366287d68be 100644 --- a/packages/google-cloud-bigquery/CONTRIBUTING.rst +++ b/packages/google-cloud-bigquery/CONTRIBUTING.rst @@ -22,7 +22,7 @@ In order to add a feature: documentation. - The feature must work fully on the following CPython versions: 2.7, - 3.5, 3.6, and 3.7 on both UNIX and Windows. + 3.5, 3.6, 3.7 and 3.8 on both UNIX and Windows. - The feature must not add unnecessary dependencies (where "unnecessary" is of course subjective, but new dependencies should @@ -214,26 +214,18 @@ We support: - `Python 3.5`_ - `Python 3.6`_ - `Python 3.7`_ +- `Python 3.8`_ .. _Python 3.5: https://docs.python.org/3.5/ .. _Python 3.6: https://docs.python.org/3.6/ .. _Python 3.7: https://docs.python.org/3.7/ +.. _Python 3.8: https://docs.python.org/3.8/ Supported versions can be found in our ``noxfile.py`` `config`_. .. _config: https://github.com/googleapis/python-bigquery/blob/master/noxfile.py -We explicitly decided not to support `Python 2.5`_ due to `decreased usage`_ -and lack of continuous integration `support`_. - -.. _Python 2.5: https://docs.python.org/2.5/ -.. _decreased usage: https://caremad.io/2013/10/a-look-at-pypi-downloads/ -.. _support: https://blog.travis-ci.com/2013-11-18-upcoming-build-environment-updates/ - -We have `dropped 2.6`_ as a supported version as well since Python 2.6 is no -longer supported by the core development team. - Python 2.7 support is deprecated. All code changes should maintain Python 2.7 compatibility until January 1, 2020. We also explicitly decided to support Python 3 beginning with version @@ -247,7 +239,6 @@ We also explicitly decided to support Python 3 beginning with version .. _prominent: https://docs.djangoproject.com/en/1.9/faq/install/#what-python-version-can-i-use-with-django .. _projects: http://flask.pocoo.org/docs/0.10/python3/ .. _Unicode literal support: https://www.python.org/dev/peps/pep-0414/ -.. _dropped 2.6: https://github.com/googleapis/google-cloud-python/issues/995 ********** Versioning diff --git a/packages/google-cloud-bigquery/MANIFEST.in b/packages/google-cloud-bigquery/MANIFEST.in index cd011be27a0e..e9e29d12033d 100644 --- a/packages/google-cloud-bigquery/MANIFEST.in +++ b/packages/google-cloud-bigquery/MANIFEST.in @@ -1,6 +1,25 @@ +# -*- coding: utf-8 -*- +# +# Copyright 2020 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + # Generated by synthtool. DO NOT EDIT! include README.rst LICENSE recursive-include google *.json *.proto recursive-include tests * global-exclude *.py[co] global-exclude __pycache__ + +# Exclude scripts for samples readmegen +prune scripts/readme-gen \ No newline at end of file diff --git a/packages/google-cloud-bigquery/docs/_static/custom.css b/packages/google-cloud-bigquery/docs/_static/custom.css index 9a6f9f8ddc3a..0abaf229fce3 100644 --- a/packages/google-cloud-bigquery/docs/_static/custom.css +++ b/packages/google-cloud-bigquery/docs/_static/custom.css @@ -1,4 +1,4 @@ div#python2-eol { border-color: red; border-width: medium; -} \ No newline at end of file +} \ No newline at end of file diff --git a/packages/google-cloud-bigquery/docs/_templates/layout.html b/packages/google-cloud-bigquery/docs/_templates/layout.html index de457b2c2767..6316a537f72b 100644 --- a/packages/google-cloud-bigquery/docs/_templates/layout.html +++ b/packages/google-cloud-bigquery/docs/_templates/layout.html @@ -1,3 +1,4 @@ + {% extends "!layout.html" %} {%- block content %} {%- if theme_fixed_sidebar|lower == 'true' %} @@ -20,8 +21,8 @@
- On January 1, 2020 this library will no longer support Python 2 on the latest released version. - Previously released library versions will continue to be available. For more information please + As of January 1, 2020 this library no longer supports Python 2 on the latest released version. + Library versions released prior to that date will continue to be available. For more information please visit Python 2 support on Google Cloud.
{% block body %} {% endblock %} diff --git a/packages/google-cloud-bigquery/docs/conf.py b/packages/google-cloud-bigquery/docs/conf.py index 30dcac56443e..332b81b10268 100644 --- a/packages/google-cloud-bigquery/docs/conf.py +++ b/packages/google-cloud-bigquery/docs/conf.py @@ -337,7 +337,7 @@ intersphinx_mapping = { "python": ("http://python.readthedocs.org/en/latest/", None), "google-auth": ("https://google-auth.readthedocs.io/en/stable", None), - "google.api_core": ("https://googleapis.dev/python/google-api-core/latest/", None), + "google.api_core": ("https://googleapis.dev/python/google-api-core/latest/", None,), "grpc": ("https://grpc.io/grpc/python/", None), } diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery_v2/gapic/enums.py b/packages/google-cloud-bigquery/google/cloud/bigquery_v2/gapic/enums.py index 97059414f368..10d7c2517296 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery_v2/gapic/enums.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery_v2/gapic/enums.py @@ -30,7 +30,7 @@ class DataSplitMethod(enum.IntEnum): CUSTOM (int): Splits data with the user provided tags. SEQUENTIAL (int): Splits data sequentially. NO_SPLIT (int): Data split will be skipped. - AUTO_SPLIT (int): Splits data automatically: Uses NO\_SPLIT if the data size is small. + AUTO_SPLIT (int): Splits data automatically: Uses NO_SPLIT if the data size is small. Otherwise uses RANDOM. """ @@ -125,7 +125,7 @@ class KmeansInitializationMethod(enum.IntEnum): KMEANS_INITIALIZATION_METHOD_UNSPECIFIED (int) RANDOM (int): Initializes the centroids randomly. CUSTOM (int): Initializes the centroids using data specified in - kmeans\_initialization\_column. + kmeans_initialization_column. """ KMEANS_INITIALIZATION_METHOD_UNSPECIFIED = 0 @@ -150,9 +150,9 @@ class TypeKind(enum.IntEnum): DATETIME (int): Encoded as RFC 3339 full-date "T" partial-time: 1985-04-12T23:20:50.52 GEOGRAPHY (int): Encoded as WKT NUMERIC (int): Encoded as a decimal string. - ARRAY (int): Encoded as a list with types matching Type.array\_type. - STRUCT (int): Encoded as a list with fields of type Type.struct\_type[i]. List is used - because a JSON object cannot have duplicate field names. + ARRAY (int): Encoded as a list with types matching Type.array_type. + STRUCT (int): Encoded as a list with fields of type Type.struct_type[i]. List is + used because a JSON object cannot have duplicate field names. """ TYPE_KIND_UNSPECIFIED = 0 diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery_v2/proto/encryption_config_pb2.py b/packages/google-cloud-bigquery/google/cloud/bigquery_v2/proto/encryption_config_pb2.py index f7b26be5547f..5ae21ea6f49f 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery_v2/proto/encryption_config_pb2.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery_v2/proto/encryption_config_pb2.py @@ -1,10 +1,7 @@ # -*- coding: utf-8 -*- # Generated by the protocol buffer compiler. DO NOT EDIT! # source: google/cloud/bigquery_v2/proto/encryption_config.proto - -import sys - -_b = sys.version_info[0] < 3 and (lambda x: x) or (lambda x: x.encode("latin1")) +"""Generated protocol buffer code.""" from google.protobuf import descriptor as _descriptor from google.protobuf import message as _message from google.protobuf import reflection as _reflection @@ -24,12 +21,9 @@ name="google/cloud/bigquery_v2/proto/encryption_config.proto", package="google.cloud.bigquery.v2", syntax="proto3", - serialized_options=_b( - "\n\034com.google.cloud.bigquery.v2B\025EncryptionConfigProtoZ@google.golang.org/genproto/googleapis/cloud/bigquery/v2;bigquery" - ), - serialized_pb=_b( - '\n6google/cloud/bigquery_v2/proto/encryption_config.proto\x12\x18google.cloud.bigquery.v2\x1a\x1fgoogle/api/field_behavior.proto\x1a\x1egoogle/protobuf/wrappers.proto\x1a\x1cgoogle/api/annotations.proto"R\n\x17\x45ncryptionConfiguration\x12\x37\n\x0ckms_key_name\x18\x01 \x01(\x0b\x32\x1c.google.protobuf.StringValueB\x03\xe0\x41\x01\x42w\n\x1c\x63om.google.cloud.bigquery.v2B\x15\x45ncryptionConfigProtoZ@google.golang.org/genproto/googleapis/cloud/bigquery/v2;bigqueryb\x06proto3' - ), + serialized_options=b"\n\034com.google.cloud.bigquery.v2B\025EncryptionConfigProtoZ@google.golang.org/genproto/googleapis/cloud/bigquery/v2;bigquery", + create_key=_descriptor._internal_create_key, + serialized_pb=b'\n6google/cloud/bigquery_v2/proto/encryption_config.proto\x12\x18google.cloud.bigquery.v2\x1a\x1fgoogle/api/field_behavior.proto\x1a\x1egoogle/protobuf/wrappers.proto\x1a\x1cgoogle/api/annotations.proto"R\n\x17\x45ncryptionConfiguration\x12\x37\n\x0ckms_key_name\x18\x01 \x01(\x0b\x32\x1c.google.protobuf.StringValueB\x03\xe0\x41\x01\x42w\n\x1c\x63om.google.cloud.bigquery.v2B\x15\x45ncryptionConfigProtoZ@google.golang.org/genproto/googleapis/cloud/bigquery/v2;bigqueryb\x06proto3', dependencies=[ google_dot_api_dot_field__behavior__pb2.DESCRIPTOR, google_dot_protobuf_dot_wrappers__pb2.DESCRIPTOR, @@ -44,6 +38,7 @@ filename=None, file=DESCRIPTOR, containing_type=None, + create_key=_descriptor._internal_create_key, fields=[ _descriptor.FieldDescriptor( name="kms_key_name", @@ -60,8 +55,9 @@ containing_type=None, is_extension=False, extension_scope=None, - serialized_options=_b("\340A\001"), + serialized_options=b"\340A\001", file=DESCRIPTOR, + create_key=_descriptor._internal_create_key, ), ], extensions=[], @@ -85,10 +81,10 @@ EncryptionConfiguration = _reflection.GeneratedProtocolMessageType( "EncryptionConfiguration", (_message.Message,), - dict( - DESCRIPTOR=_ENCRYPTIONCONFIGURATION, - __module__="google.cloud.bigquery_v2.proto.encryption_config_pb2", - __doc__="""Encryption configuration. + { + "DESCRIPTOR": _ENCRYPTIONCONFIGURATION, + "__module__": "google.cloud.bigquery_v2.proto.encryption_config_pb2", + "__doc__": """Encryption configuration. Attributes: kms_key_name: @@ -98,7 +94,7 @@ to this encryption key. """, # @@protoc_insertion_point(class_scope:google.cloud.bigquery.v2.EncryptionConfiguration) - ), + }, ) _sym_db.RegisterMessage(EncryptionConfiguration) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery_v2/proto/encryption_config_pb2_grpc.py b/packages/google-cloud-bigquery/google/cloud/bigquery_v2/proto/encryption_config_pb2_grpc.py index 07cb78fe03a9..8a9393943bdf 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery_v2/proto/encryption_config_pb2_grpc.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery_v2/proto/encryption_config_pb2_grpc.py @@ -1,2 +1,3 @@ # Generated by the gRPC Python protocol compiler plugin. DO NOT EDIT! +"""Client and server classes corresponding to protobuf-defined services.""" import grpc diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery_v2/proto/model_pb2.py b/packages/google-cloud-bigquery/google/cloud/bigquery_v2/proto/model_pb2.py index 0b4e9d23ed26..7b66be8f7131 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery_v2/proto/model_pb2.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery_v2/proto/model_pb2.py @@ -1,10 +1,7 @@ # -*- coding: utf-8 -*- # Generated by the protocol buffer compiler. DO NOT EDIT! # source: google/cloud/bigquery_v2/proto/model.proto - -import sys - -_b = sys.version_info[0] < 3 and (lambda x: x) or (lambda x: x.encode("latin1")) +"""Generated protocol buffer code.""" from google.protobuf import descriptor as _descriptor from google.protobuf import message as _message from google.protobuf import reflection as _reflection @@ -36,12 +33,9 @@ name="google/cloud/bigquery_v2/proto/model.proto", package="google.cloud.bigquery.v2", syntax="proto3", - serialized_options=_b( - "\n\034com.google.cloud.bigquery.v2B\nModelProtoZ@google.golang.org/genproto/googleapis/cloud/bigquery/v2;bigquery" - ), - serialized_pb=_b( - '\n*google/cloud/bigquery_v2/proto/model.proto\x12\x18google.cloud.bigquery.v2\x1a\x17google/api/client.proto\x1a\x1fgoogle/api/field_behavior.proto\x1a\x36google/cloud/bigquery_v2/proto/encryption_config.proto\x1a\x34google/cloud/bigquery_v2/proto/model_reference.proto\x1a\x31google/cloud/bigquery_v2/proto/standard_sql.proto\x1a\x1bgoogle/protobuf/empty.proto\x1a\x1fgoogle/protobuf/timestamp.proto\x1a\x1egoogle/protobuf/wrappers.proto\x1a\x1cgoogle/api/annotations.proto"\x9b\x35\n\x05Model\x12\x11\n\x04\x65tag\x18\x01 \x01(\tB\x03\xe0\x41\x03\x12\x46\n\x0fmodel_reference\x18\x02 \x01(\x0b\x32(.google.cloud.bigquery.v2.ModelReferenceB\x03\xe0\x41\x02\x12\x1a\n\rcreation_time\x18\x05 \x01(\x03\x42\x03\xe0\x41\x03\x12\x1f\n\x12last_modified_time\x18\x06 \x01(\x03\x42\x03\xe0\x41\x03\x12\x18\n\x0b\x64\x65scription\x18\x0c \x01(\tB\x03\xe0\x41\x01\x12\x1a\n\rfriendly_name\x18\x0e \x01(\tB\x03\xe0\x41\x01\x12;\n\x06labels\x18\x0f \x03(\x0b\x32+.google.cloud.bigquery.v2.Model.LabelsEntry\x12\x1c\n\x0f\x65xpiration_time\x18\x10 \x01(\x03\x42\x03\xe0\x41\x01\x12\x15\n\x08location\x18\r \x01(\tB\x03\xe0\x41\x03\x12S\n\x18\x65ncryption_configuration\x18\x11 \x01(\x0b\x32\x31.google.cloud.bigquery.v2.EncryptionConfiguration\x12\x42\n\nmodel_type\x18\x07 \x01(\x0e\x32).google.cloud.bigquery.v2.Model.ModelTypeB\x03\xe0\x41\x03\x12G\n\rtraining_runs\x18\t \x03(\x0b\x32+.google.cloud.bigquery.v2.Model.TrainingRunB\x03\xe0\x41\x03\x12H\n\x0f\x66\x65\x61ture_columns\x18\n \x03(\x0b\x32*.google.cloud.bigquery.v2.StandardSqlFieldB\x03\xe0\x41\x03\x12\x46\n\rlabel_columns\x18\x0b \x03(\x0b\x32*.google.cloud.bigquery.v2.StandardSqlFieldB\x03\xe0\x41\x03\x1aq\n\x0bKmeansEnums"b\n\x1aKmeansInitializationMethod\x12,\n(KMEANS_INITIALIZATION_METHOD_UNSPECIFIED\x10\x00\x12\n\n\x06RANDOM\x10\x01\x12\n\n\x06\x43USTOM\x10\x02\x1a\xb4\x02\n\x11RegressionMetrics\x12\x39\n\x13mean_absolute_error\x18\x01 \x01(\x0b\x32\x1c.google.protobuf.DoubleValue\x12\x38\n\x12mean_squared_error\x18\x02 \x01(\x0b\x32\x1c.google.protobuf.DoubleValue\x12<\n\x16mean_squared_log_error\x18\x03 \x01(\x0b\x32\x1c.google.protobuf.DoubleValue\x12;\n\x15median_absolute_error\x18\x04 \x01(\x0b\x32\x1c.google.protobuf.DoubleValue\x12/\n\tr_squared\x18\x05 \x01(\x0b\x32\x1c.google.protobuf.DoubleValue\x1a\xef\x02\n\x1e\x41ggregateClassificationMetrics\x12/\n\tprecision\x18\x01 \x01(\x0b\x32\x1c.google.protobuf.DoubleValue\x12,\n\x06recall\x18\x02 \x01(\x0b\x32\x1c.google.protobuf.DoubleValue\x12.\n\x08\x61\x63\x63uracy\x18\x03 \x01(\x0b\x32\x1c.google.protobuf.DoubleValue\x12/\n\tthreshold\x18\x04 \x01(\x0b\x32\x1c.google.protobuf.DoubleValue\x12.\n\x08\x66\x31_score\x18\x05 \x01(\x0b\x32\x1c.google.protobuf.DoubleValue\x12.\n\x08log_loss\x18\x06 \x01(\x0b\x32\x1c.google.protobuf.DoubleValue\x12-\n\x07roc_auc\x18\x07 \x01(\x0b\x32\x1c.google.protobuf.DoubleValue\x1a\x9f\x06\n\x1b\x42inaryClassificationMetrics\x12h\n aggregate_classification_metrics\x18\x01 \x01(\x0b\x32>.google.cloud.bigquery.v2.Model.AggregateClassificationMetrics\x12w\n\x1c\x62inary_confusion_matrix_list\x18\x02 \x03(\x0b\x32Q.google.cloud.bigquery.v2.Model.BinaryClassificationMetrics.BinaryConfusionMatrix\x12\x16\n\x0epositive_label\x18\x03 \x01(\t\x12\x16\n\x0enegative_label\x18\x04 \x01(\t\x1a\xec\x03\n\x15\x42inaryConfusionMatrix\x12>\n\x18positive_class_threshold\x18\x01 \x01(\x0b\x32\x1c.google.protobuf.DoubleValue\x12\x33\n\x0etrue_positives\x18\x02 \x01(\x0b\x32\x1b.google.protobuf.Int64Value\x12\x34\n\x0f\x66\x61lse_positives\x18\x03 \x01(\x0b\x32\x1b.google.protobuf.Int64Value\x12\x33\n\x0etrue_negatives\x18\x04 \x01(\x0b\x32\x1b.google.protobuf.Int64Value\x12\x34\n\x0f\x66\x61lse_negatives\x18\x05 \x01(\x0b\x32\x1b.google.protobuf.Int64Value\x12/\n\tprecision\x18\x06 \x01(\x0b\x32\x1c.google.protobuf.DoubleValue\x12,\n\x06recall\x18\x07 \x01(\x0b\x32\x1c.google.protobuf.DoubleValue\x12.\n\x08\x66\x31_score\x18\x08 \x01(\x0b\x32\x1c.google.protobuf.DoubleValue\x12.\n\x08\x61\x63\x63uracy\x18\t \x01(\x0b\x32\x1c.google.protobuf.DoubleValue\x1a\x87\x05\n\x1fMultiClassClassificationMetrics\x12h\n aggregate_classification_metrics\x18\x01 \x01(\x0b\x32>.google.cloud.bigquery.v2.Model.AggregateClassificationMetrics\x12n\n\x15\x63onfusion_matrix_list\x18\x02 \x03(\x0b\x32O.google.cloud.bigquery.v2.Model.MultiClassClassificationMetrics.ConfusionMatrix\x1a\x89\x03\n\x0f\x43onfusionMatrix\x12:\n\x14\x63onfidence_threshold\x18\x01 \x01(\x0b\x32\x1c.google.protobuf.DoubleValue\x12\x61\n\x04rows\x18\x02 \x03(\x0b\x32S.google.cloud.bigquery.v2.Model.MultiClassClassificationMetrics.ConfusionMatrix.Row\x1aQ\n\x05\x45ntry\x12\x17\n\x0fpredicted_label\x18\x01 \x01(\t\x12/\n\nitem_count\x18\x02 \x01(\x0b\x32\x1b.google.protobuf.Int64Value\x1a\x83\x01\n\x03Row\x12\x14\n\x0c\x61\x63tual_label\x18\x01 \x01(\t\x12\x66\n\x07\x65ntries\x18\x02 \x03(\x0b\x32U.google.cloud.bigquery.v2.Model.MultiClassClassificationMetrics.ConfusionMatrix.Entry\x1a\xcb\x06\n\x11\x43lusteringMetrics\x12:\n\x14\x64\x61vies_bouldin_index\x18\x01 \x01(\x0b\x32\x1c.google.protobuf.DoubleValue\x12;\n\x15mean_squared_distance\x18\x02 \x01(\x0b\x32\x1c.google.protobuf.DoubleValue\x12K\n\x08\x63lusters\x18\x03 \x03(\x0b\x32\x39.google.cloud.bigquery.v2.Model.ClusteringMetrics.Cluster\x1a\xef\x04\n\x07\x43luster\x12\x13\n\x0b\x63\x65ntroid_id\x18\x01 \x01(\x03\x12^\n\x0e\x66\x65\x61ture_values\x18\x02 \x03(\x0b\x32\x46.google.cloud.bigquery.v2.Model.ClusteringMetrics.Cluster.FeatureValue\x12*\n\x05\x63ount\x18\x03 \x01(\x0b\x32\x1b.google.protobuf.Int64Value\x1a\xc2\x03\n\x0c\x46\x65\x61tureValue\x12\x16\n\x0e\x66\x65\x61ture_column\x18\x01 \x01(\t\x12\x37\n\x0fnumerical_value\x18\x02 \x01(\x0b\x32\x1c.google.protobuf.DoubleValueH\x00\x12t\n\x11\x63\x61tegorical_value\x18\x03 \x01(\x0b\x32W.google.cloud.bigquery.v2.Model.ClusteringMetrics.Cluster.FeatureValue.CategoricalValueH\x00\x1a\xe1\x01\n\x10\x43\x61tegoricalValue\x12~\n\x0f\x63\x61tegory_counts\x18\x01 \x03(\x0b\x32\x65.google.cloud.bigquery.v2.Model.ClusteringMetrics.Cluster.FeatureValue.CategoricalValue.CategoryCount\x1aM\n\rCategoryCount\x12\x10\n\x08\x63\x61tegory\x18\x01 \x01(\t\x12*\n\x05\x63ount\x18\x02 \x01(\x0b\x32\x1b.google.protobuf.Int64ValueB\x07\n\x05value\x1a\x95\x03\n\x11\x45valuationMetrics\x12O\n\x12regression_metrics\x18\x01 \x01(\x0b\x32\x31.google.cloud.bigquery.v2.Model.RegressionMetricsH\x00\x12\x64\n\x1d\x62inary_classification_metrics\x18\x02 \x01(\x0b\x32;.google.cloud.bigquery.v2.Model.BinaryClassificationMetricsH\x00\x12m\n"multi_class_classification_metrics\x18\x03 \x01(\x0b\x32?.google.cloud.bigquery.v2.Model.MultiClassClassificationMetricsH\x00\x12O\n\x12\x63lustering_metrics\x18\x04 \x01(\x0b\x32\x31.google.cloud.bigquery.v2.Model.ClusteringMetricsH\x00\x42\t\n\x07metrics\x1a\xab\x0f\n\x0bTrainingRun\x12U\n\x10training_options\x18\x01 \x01(\x0b\x32;.google.cloud.bigquery.v2.Model.TrainingRun.TrainingOptions\x12.\n\nstart_time\x18\x08 \x01(\x0b\x32\x1a.google.protobuf.Timestamp\x12L\n\x07results\x18\x06 \x03(\x0b\x32;.google.cloud.bigquery.v2.Model.TrainingRun.IterationResult\x12M\n\x12\x65valuation_metrics\x18\x07 \x01(\x0b\x32\x31.google.cloud.bigquery.v2.Model.EvaluationMetrics\x1a\x9d\t\n\x0fTrainingOptions\x12\x16\n\x0emax_iterations\x18\x01 \x01(\x03\x12;\n\tloss_type\x18\x02 \x01(\x0e\x32(.google.cloud.bigquery.v2.Model.LossType\x12\x12\n\nlearn_rate\x18\x03 \x01(\x01\x12\x37\n\x11l1_regularization\x18\x04 \x01(\x0b\x32\x1c.google.protobuf.DoubleValue\x12\x37\n\x11l2_regularization\x18\x05 \x01(\x0b\x32\x1c.google.protobuf.DoubleValue\x12;\n\x15min_relative_progress\x18\x06 \x01(\x0b\x32\x1c.google.protobuf.DoubleValue\x12.\n\nwarm_start\x18\x07 \x01(\x0b\x32\x1a.google.protobuf.BoolValue\x12.\n\nearly_stop\x18\x08 \x01(\x0b\x32\x1a.google.protobuf.BoolValue\x12\x1b\n\x13input_label_columns\x18\t \x03(\t\x12J\n\x11\x64\x61ta_split_method\x18\n \x01(\x0e\x32/.google.cloud.bigquery.v2.Model.DataSplitMethod\x12 \n\x18\x64\x61ta_split_eval_fraction\x18\x0b \x01(\x01\x12\x19\n\x11\x64\x61ta_split_column\x18\x0c \x01(\t\x12N\n\x13learn_rate_strategy\x18\r \x01(\x0e\x32\x31.google.cloud.bigquery.v2.Model.LearnRateStrategy\x12\x1a\n\x12initial_learn_rate\x18\x10 \x01(\x01\x12o\n\x13label_class_weights\x18\x11 \x03(\x0b\x32R.google.cloud.bigquery.v2.Model.TrainingRun.TrainingOptions.LabelClassWeightsEntry\x12\x43\n\rdistance_type\x18\x14 \x01(\x0e\x32,.google.cloud.bigquery.v2.Model.DistanceType\x12\x14\n\x0cnum_clusters\x18\x15 \x01(\x03\x12\x11\n\tmodel_uri\x18\x16 \x01(\t\x12S\n\x15optimization_strategy\x18\x17 \x01(\x0e\x32\x34.google.cloud.bigquery.v2.Model.OptimizationStrategy\x12l\n\x1ckmeans_initialization_method\x18! \x01(\x0e\x32\x46.google.cloud.bigquery.v2.Model.KmeansEnums.KmeansInitializationMethod\x12$\n\x1ckmeans_initialization_column\x18" \x01(\t\x1a\x38\n\x16LabelClassWeightsEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\x01:\x02\x38\x01\x1a\xd7\x03\n\x0fIterationResult\x12*\n\x05index\x18\x01 \x01(\x0b\x32\x1b.google.protobuf.Int32Value\x12\x30\n\x0b\x64uration_ms\x18\x04 \x01(\x0b\x32\x1b.google.protobuf.Int64Value\x12\x33\n\rtraining_loss\x18\x05 \x01(\x0b\x32\x1c.google.protobuf.DoubleValue\x12/\n\teval_loss\x18\x06 \x01(\x0b\x32\x1c.google.protobuf.DoubleValue\x12\x12\n\nlearn_rate\x18\x07 \x01(\x01\x12^\n\rcluster_infos\x18\x08 \x03(\x0b\x32G.google.cloud.bigquery.v2.Model.TrainingRun.IterationResult.ClusterInfo\x1a\x8b\x01\n\x0b\x43lusterInfo\x12\x13\n\x0b\x63\x65ntroid_id\x18\x01 \x01(\x03\x12\x34\n\x0e\x63luster_radius\x18\x02 \x01(\x0b\x32\x1c.google.protobuf.DoubleValue\x12\x31\n\x0c\x63luster_size\x18\x03 \x01(\x0b\x32\x1b.google.protobuf.Int64Value\x1a-\n\x0bLabelsEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\t:\x02\x38\x01"s\n\tModelType\x12\x1a\n\x16MODEL_TYPE_UNSPECIFIED\x10\x00\x12\x15\n\x11LINEAR_REGRESSION\x10\x01\x12\x17\n\x13LOGISTIC_REGRESSION\x10\x02\x12\n\n\x06KMEANS\x10\x03\x12\x0e\n\nTENSORFLOW\x10\x06"O\n\x08LossType\x12\x19\n\x15LOSS_TYPE_UNSPECIFIED\x10\x00\x12\x15\n\x11MEAN_SQUARED_LOSS\x10\x01\x12\x11\n\rMEAN_LOG_LOSS\x10\x02"H\n\x0c\x44istanceType\x12\x1d\n\x19\x44ISTANCE_TYPE_UNSPECIFIED\x10\x00\x12\r\n\tEUCLIDEAN\x10\x01\x12\n\n\x06\x43OSINE\x10\x02"z\n\x0f\x44\x61taSplitMethod\x12!\n\x1d\x44\x41TA_SPLIT_METHOD_UNSPECIFIED\x10\x00\x12\n\n\x06RANDOM\x10\x01\x12\n\n\x06\x43USTOM\x10\x02\x12\x0e\n\nSEQUENTIAL\x10\x03\x12\x0c\n\x08NO_SPLIT\x10\x04\x12\x0e\n\nAUTO_SPLIT\x10\x05"W\n\x11LearnRateStrategy\x12#\n\x1fLEARN_RATE_STRATEGY_UNSPECIFIED\x10\x00\x12\x0f\n\x0bLINE_SEARCH\x10\x01\x12\x0c\n\x08\x43ONSTANT\x10\x02"n\n\x14OptimizationStrategy\x12%\n!OPTIMIZATION_STRATEGY_UNSPECIFIED\x10\x00\x12\x1a\n\x16\x42\x41TCH_GRADIENT_DESCENT\x10\x01\x12\x13\n\x0fNORMAL_EQUATION\x10\x02"Z\n\x0fGetModelRequest\x12\x17\n\nproject_id\x18\x01 \x01(\tB\x03\xe0\x41\x02\x12\x17\n\ndataset_id\x18\x02 \x01(\tB\x03\xe0\x41\x02\x12\x15\n\x08model_id\x18\x03 \x01(\tB\x03\xe0\x41\x02"\x91\x01\n\x11PatchModelRequest\x12\x17\n\nproject_id\x18\x01 \x01(\tB\x03\xe0\x41\x02\x12\x17\n\ndataset_id\x18\x02 \x01(\tB\x03\xe0\x41\x02\x12\x15\n\x08model_id\x18\x03 \x01(\tB\x03\xe0\x41\x02\x12\x33\n\x05model\x18\x04 \x01(\x0b\x32\x1f.google.cloud.bigquery.v2.ModelB\x03\xe0\x41\x02"]\n\x12\x44\x65leteModelRequest\x12\x17\n\nproject_id\x18\x01 \x01(\tB\x03\xe0\x41\x02\x12\x17\n\ndataset_id\x18\x02 \x01(\tB\x03\xe0\x41\x02\x12\x15\n\x08model_id\x18\x03 \x01(\tB\x03\xe0\x41\x02"\x8c\x01\n\x11ListModelsRequest\x12\x17\n\nproject_id\x18\x01 \x01(\tB\x03\xe0\x41\x02\x12\x17\n\ndataset_id\x18\x02 \x01(\tB\x03\xe0\x41\x02\x12\x31\n\x0bmax_results\x18\x03 \x01(\x0b\x32\x1c.google.protobuf.UInt32Value\x12\x12\n\npage_token\x18\x04 \x01(\t"^\n\x12ListModelsResponse\x12/\n\x06models\x18\x01 \x03(\x0b\x32\x1f.google.cloud.bigquery.v2.Model\x12\x17\n\x0fnext_page_token\x18\x02 \x01(\t2\xfa\x05\n\x0cModelService\x12y\n\x08GetModel\x12).google.cloud.bigquery.v2.GetModelRequest\x1a\x1f.google.cloud.bigquery.v2.Model"!\xda\x41\x1eproject_id,dataset_id,model_id\x12\x8d\x01\n\nListModels\x12+.google.cloud.bigquery.v2.ListModelsRequest\x1a,.google.cloud.bigquery.v2.ListModelsResponse"$\xda\x41!project_id,dataset_id,max_results\x12\x83\x01\n\nPatchModel\x12+.google.cloud.bigquery.v2.PatchModelRequest\x1a\x1f.google.cloud.bigquery.v2.Model"\'\xda\x41$project_id,dataset_id,model_id,model\x12v\n\x0b\x44\x65leteModel\x12,.google.cloud.bigquery.v2.DeleteModelRequest\x1a\x16.google.protobuf.Empty"!\xda\x41\x1eproject_id,dataset_id,model_id\x1a\xe0\x01\xca\x41\x17\x62igquery.googleapis.com\xd2\x41\xc2\x01https://www.googleapis.com/auth/bigquery,https://www.googleapis.com/auth/bigquery.readonly,https://www.googleapis.com/auth/cloud-platform,https://www.googleapis.com/auth/cloud-platform.read-onlyBl\n\x1c\x63om.google.cloud.bigquery.v2B\nModelProtoZ@google.golang.org/genproto/googleapis/cloud/bigquery/v2;bigqueryb\x06proto3' - ), + serialized_options=b"\n\034com.google.cloud.bigquery.v2B\nModelProtoZ@google.golang.org/genproto/googleapis/cloud/bigquery/v2;bigquery", + create_key=_descriptor._internal_create_key, + serialized_pb=b'\n*google/cloud/bigquery_v2/proto/model.proto\x12\x18google.cloud.bigquery.v2\x1a\x17google/api/client.proto\x1a\x1fgoogle/api/field_behavior.proto\x1a\x36google/cloud/bigquery_v2/proto/encryption_config.proto\x1a\x34google/cloud/bigquery_v2/proto/model_reference.proto\x1a\x31google/cloud/bigquery_v2/proto/standard_sql.proto\x1a\x1bgoogle/protobuf/empty.proto\x1a\x1fgoogle/protobuf/timestamp.proto\x1a\x1egoogle/protobuf/wrappers.proto\x1a\x1cgoogle/api/annotations.proto"\x9b\x35\n\x05Model\x12\x11\n\x04\x65tag\x18\x01 \x01(\tB\x03\xe0\x41\x03\x12\x46\n\x0fmodel_reference\x18\x02 \x01(\x0b\x32(.google.cloud.bigquery.v2.ModelReferenceB\x03\xe0\x41\x02\x12\x1a\n\rcreation_time\x18\x05 \x01(\x03\x42\x03\xe0\x41\x03\x12\x1f\n\x12last_modified_time\x18\x06 \x01(\x03\x42\x03\xe0\x41\x03\x12\x18\n\x0b\x64\x65scription\x18\x0c \x01(\tB\x03\xe0\x41\x01\x12\x1a\n\rfriendly_name\x18\x0e \x01(\tB\x03\xe0\x41\x01\x12;\n\x06labels\x18\x0f \x03(\x0b\x32+.google.cloud.bigquery.v2.Model.LabelsEntry\x12\x1c\n\x0f\x65xpiration_time\x18\x10 \x01(\x03\x42\x03\xe0\x41\x01\x12\x15\n\x08location\x18\r \x01(\tB\x03\xe0\x41\x03\x12S\n\x18\x65ncryption_configuration\x18\x11 \x01(\x0b\x32\x31.google.cloud.bigquery.v2.EncryptionConfiguration\x12\x42\n\nmodel_type\x18\x07 \x01(\x0e\x32).google.cloud.bigquery.v2.Model.ModelTypeB\x03\xe0\x41\x03\x12G\n\rtraining_runs\x18\t \x03(\x0b\x32+.google.cloud.bigquery.v2.Model.TrainingRunB\x03\xe0\x41\x03\x12H\n\x0f\x66\x65\x61ture_columns\x18\n \x03(\x0b\x32*.google.cloud.bigquery.v2.StandardSqlFieldB\x03\xe0\x41\x03\x12\x46\n\rlabel_columns\x18\x0b \x03(\x0b\x32*.google.cloud.bigquery.v2.StandardSqlFieldB\x03\xe0\x41\x03\x1aq\n\x0bKmeansEnums"b\n\x1aKmeansInitializationMethod\x12,\n(KMEANS_INITIALIZATION_METHOD_UNSPECIFIED\x10\x00\x12\n\n\x06RANDOM\x10\x01\x12\n\n\x06\x43USTOM\x10\x02\x1a\xb4\x02\n\x11RegressionMetrics\x12\x39\n\x13mean_absolute_error\x18\x01 \x01(\x0b\x32\x1c.google.protobuf.DoubleValue\x12\x38\n\x12mean_squared_error\x18\x02 \x01(\x0b\x32\x1c.google.protobuf.DoubleValue\x12<\n\x16mean_squared_log_error\x18\x03 \x01(\x0b\x32\x1c.google.protobuf.DoubleValue\x12;\n\x15median_absolute_error\x18\x04 \x01(\x0b\x32\x1c.google.protobuf.DoubleValue\x12/\n\tr_squared\x18\x05 \x01(\x0b\x32\x1c.google.protobuf.DoubleValue\x1a\xef\x02\n\x1e\x41ggregateClassificationMetrics\x12/\n\tprecision\x18\x01 \x01(\x0b\x32\x1c.google.protobuf.DoubleValue\x12,\n\x06recall\x18\x02 \x01(\x0b\x32\x1c.google.protobuf.DoubleValue\x12.\n\x08\x61\x63\x63uracy\x18\x03 \x01(\x0b\x32\x1c.google.protobuf.DoubleValue\x12/\n\tthreshold\x18\x04 \x01(\x0b\x32\x1c.google.protobuf.DoubleValue\x12.\n\x08\x66\x31_score\x18\x05 \x01(\x0b\x32\x1c.google.protobuf.DoubleValue\x12.\n\x08log_loss\x18\x06 \x01(\x0b\x32\x1c.google.protobuf.DoubleValue\x12-\n\x07roc_auc\x18\x07 \x01(\x0b\x32\x1c.google.protobuf.DoubleValue\x1a\x9f\x06\n\x1b\x42inaryClassificationMetrics\x12h\n aggregate_classification_metrics\x18\x01 \x01(\x0b\x32>.google.cloud.bigquery.v2.Model.AggregateClassificationMetrics\x12w\n\x1c\x62inary_confusion_matrix_list\x18\x02 \x03(\x0b\x32Q.google.cloud.bigquery.v2.Model.BinaryClassificationMetrics.BinaryConfusionMatrix\x12\x16\n\x0epositive_label\x18\x03 \x01(\t\x12\x16\n\x0enegative_label\x18\x04 \x01(\t\x1a\xec\x03\n\x15\x42inaryConfusionMatrix\x12>\n\x18positive_class_threshold\x18\x01 \x01(\x0b\x32\x1c.google.protobuf.DoubleValue\x12\x33\n\x0etrue_positives\x18\x02 \x01(\x0b\x32\x1b.google.protobuf.Int64Value\x12\x34\n\x0f\x66\x61lse_positives\x18\x03 \x01(\x0b\x32\x1b.google.protobuf.Int64Value\x12\x33\n\x0etrue_negatives\x18\x04 \x01(\x0b\x32\x1b.google.protobuf.Int64Value\x12\x34\n\x0f\x66\x61lse_negatives\x18\x05 \x01(\x0b\x32\x1b.google.protobuf.Int64Value\x12/\n\tprecision\x18\x06 \x01(\x0b\x32\x1c.google.protobuf.DoubleValue\x12,\n\x06recall\x18\x07 \x01(\x0b\x32\x1c.google.protobuf.DoubleValue\x12.\n\x08\x66\x31_score\x18\x08 \x01(\x0b\x32\x1c.google.protobuf.DoubleValue\x12.\n\x08\x61\x63\x63uracy\x18\t \x01(\x0b\x32\x1c.google.protobuf.DoubleValue\x1a\x87\x05\n\x1fMultiClassClassificationMetrics\x12h\n aggregate_classification_metrics\x18\x01 \x01(\x0b\x32>.google.cloud.bigquery.v2.Model.AggregateClassificationMetrics\x12n\n\x15\x63onfusion_matrix_list\x18\x02 \x03(\x0b\x32O.google.cloud.bigquery.v2.Model.MultiClassClassificationMetrics.ConfusionMatrix\x1a\x89\x03\n\x0f\x43onfusionMatrix\x12:\n\x14\x63onfidence_threshold\x18\x01 \x01(\x0b\x32\x1c.google.protobuf.DoubleValue\x12\x61\n\x04rows\x18\x02 \x03(\x0b\x32S.google.cloud.bigquery.v2.Model.MultiClassClassificationMetrics.ConfusionMatrix.Row\x1aQ\n\x05\x45ntry\x12\x17\n\x0fpredicted_label\x18\x01 \x01(\t\x12/\n\nitem_count\x18\x02 \x01(\x0b\x32\x1b.google.protobuf.Int64Value\x1a\x83\x01\n\x03Row\x12\x14\n\x0c\x61\x63tual_label\x18\x01 \x01(\t\x12\x66\n\x07\x65ntries\x18\x02 \x03(\x0b\x32U.google.cloud.bigquery.v2.Model.MultiClassClassificationMetrics.ConfusionMatrix.Entry\x1a\xcb\x06\n\x11\x43lusteringMetrics\x12:\n\x14\x64\x61vies_bouldin_index\x18\x01 \x01(\x0b\x32\x1c.google.protobuf.DoubleValue\x12;\n\x15mean_squared_distance\x18\x02 \x01(\x0b\x32\x1c.google.protobuf.DoubleValue\x12K\n\x08\x63lusters\x18\x03 \x03(\x0b\x32\x39.google.cloud.bigquery.v2.Model.ClusteringMetrics.Cluster\x1a\xef\x04\n\x07\x43luster\x12\x13\n\x0b\x63\x65ntroid_id\x18\x01 \x01(\x03\x12^\n\x0e\x66\x65\x61ture_values\x18\x02 \x03(\x0b\x32\x46.google.cloud.bigquery.v2.Model.ClusteringMetrics.Cluster.FeatureValue\x12*\n\x05\x63ount\x18\x03 \x01(\x0b\x32\x1b.google.protobuf.Int64Value\x1a\xc2\x03\n\x0c\x46\x65\x61tureValue\x12\x16\n\x0e\x66\x65\x61ture_column\x18\x01 \x01(\t\x12\x37\n\x0fnumerical_value\x18\x02 \x01(\x0b\x32\x1c.google.protobuf.DoubleValueH\x00\x12t\n\x11\x63\x61tegorical_value\x18\x03 \x01(\x0b\x32W.google.cloud.bigquery.v2.Model.ClusteringMetrics.Cluster.FeatureValue.CategoricalValueH\x00\x1a\xe1\x01\n\x10\x43\x61tegoricalValue\x12~\n\x0f\x63\x61tegory_counts\x18\x01 \x03(\x0b\x32\x65.google.cloud.bigquery.v2.Model.ClusteringMetrics.Cluster.FeatureValue.CategoricalValue.CategoryCount\x1aM\n\rCategoryCount\x12\x10\n\x08\x63\x61tegory\x18\x01 \x01(\t\x12*\n\x05\x63ount\x18\x02 \x01(\x0b\x32\x1b.google.protobuf.Int64ValueB\x07\n\x05value\x1a\x95\x03\n\x11\x45valuationMetrics\x12O\n\x12regression_metrics\x18\x01 \x01(\x0b\x32\x31.google.cloud.bigquery.v2.Model.RegressionMetricsH\x00\x12\x64\n\x1d\x62inary_classification_metrics\x18\x02 \x01(\x0b\x32;.google.cloud.bigquery.v2.Model.BinaryClassificationMetricsH\x00\x12m\n"multi_class_classification_metrics\x18\x03 \x01(\x0b\x32?.google.cloud.bigquery.v2.Model.MultiClassClassificationMetricsH\x00\x12O\n\x12\x63lustering_metrics\x18\x04 \x01(\x0b\x32\x31.google.cloud.bigquery.v2.Model.ClusteringMetricsH\x00\x42\t\n\x07metrics\x1a\xab\x0f\n\x0bTrainingRun\x12U\n\x10training_options\x18\x01 \x01(\x0b\x32;.google.cloud.bigquery.v2.Model.TrainingRun.TrainingOptions\x12.\n\nstart_time\x18\x08 \x01(\x0b\x32\x1a.google.protobuf.Timestamp\x12L\n\x07results\x18\x06 \x03(\x0b\x32;.google.cloud.bigquery.v2.Model.TrainingRun.IterationResult\x12M\n\x12\x65valuation_metrics\x18\x07 \x01(\x0b\x32\x31.google.cloud.bigquery.v2.Model.EvaluationMetrics\x1a\x9d\t\n\x0fTrainingOptions\x12\x16\n\x0emax_iterations\x18\x01 \x01(\x03\x12;\n\tloss_type\x18\x02 \x01(\x0e\x32(.google.cloud.bigquery.v2.Model.LossType\x12\x12\n\nlearn_rate\x18\x03 \x01(\x01\x12\x37\n\x11l1_regularization\x18\x04 \x01(\x0b\x32\x1c.google.protobuf.DoubleValue\x12\x37\n\x11l2_regularization\x18\x05 \x01(\x0b\x32\x1c.google.protobuf.DoubleValue\x12;\n\x15min_relative_progress\x18\x06 \x01(\x0b\x32\x1c.google.protobuf.DoubleValue\x12.\n\nwarm_start\x18\x07 \x01(\x0b\x32\x1a.google.protobuf.BoolValue\x12.\n\nearly_stop\x18\x08 \x01(\x0b\x32\x1a.google.protobuf.BoolValue\x12\x1b\n\x13input_label_columns\x18\t \x03(\t\x12J\n\x11\x64\x61ta_split_method\x18\n \x01(\x0e\x32/.google.cloud.bigquery.v2.Model.DataSplitMethod\x12 \n\x18\x64\x61ta_split_eval_fraction\x18\x0b \x01(\x01\x12\x19\n\x11\x64\x61ta_split_column\x18\x0c \x01(\t\x12N\n\x13learn_rate_strategy\x18\r \x01(\x0e\x32\x31.google.cloud.bigquery.v2.Model.LearnRateStrategy\x12\x1a\n\x12initial_learn_rate\x18\x10 \x01(\x01\x12o\n\x13label_class_weights\x18\x11 \x03(\x0b\x32R.google.cloud.bigquery.v2.Model.TrainingRun.TrainingOptions.LabelClassWeightsEntry\x12\x43\n\rdistance_type\x18\x14 \x01(\x0e\x32,.google.cloud.bigquery.v2.Model.DistanceType\x12\x14\n\x0cnum_clusters\x18\x15 \x01(\x03\x12\x11\n\tmodel_uri\x18\x16 \x01(\t\x12S\n\x15optimization_strategy\x18\x17 \x01(\x0e\x32\x34.google.cloud.bigquery.v2.Model.OptimizationStrategy\x12l\n\x1ckmeans_initialization_method\x18! \x01(\x0e\x32\x46.google.cloud.bigquery.v2.Model.KmeansEnums.KmeansInitializationMethod\x12$\n\x1ckmeans_initialization_column\x18" \x01(\t\x1a\x38\n\x16LabelClassWeightsEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\x01:\x02\x38\x01\x1a\xd7\x03\n\x0fIterationResult\x12*\n\x05index\x18\x01 \x01(\x0b\x32\x1b.google.protobuf.Int32Value\x12\x30\n\x0b\x64uration_ms\x18\x04 \x01(\x0b\x32\x1b.google.protobuf.Int64Value\x12\x33\n\rtraining_loss\x18\x05 \x01(\x0b\x32\x1c.google.protobuf.DoubleValue\x12/\n\teval_loss\x18\x06 \x01(\x0b\x32\x1c.google.protobuf.DoubleValue\x12\x12\n\nlearn_rate\x18\x07 \x01(\x01\x12^\n\rcluster_infos\x18\x08 \x03(\x0b\x32G.google.cloud.bigquery.v2.Model.TrainingRun.IterationResult.ClusterInfo\x1a\x8b\x01\n\x0b\x43lusterInfo\x12\x13\n\x0b\x63\x65ntroid_id\x18\x01 \x01(\x03\x12\x34\n\x0e\x63luster_radius\x18\x02 \x01(\x0b\x32\x1c.google.protobuf.DoubleValue\x12\x31\n\x0c\x63luster_size\x18\x03 \x01(\x0b\x32\x1b.google.protobuf.Int64Value\x1a-\n\x0bLabelsEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\t:\x02\x38\x01"s\n\tModelType\x12\x1a\n\x16MODEL_TYPE_UNSPECIFIED\x10\x00\x12\x15\n\x11LINEAR_REGRESSION\x10\x01\x12\x17\n\x13LOGISTIC_REGRESSION\x10\x02\x12\n\n\x06KMEANS\x10\x03\x12\x0e\n\nTENSORFLOW\x10\x06"O\n\x08LossType\x12\x19\n\x15LOSS_TYPE_UNSPECIFIED\x10\x00\x12\x15\n\x11MEAN_SQUARED_LOSS\x10\x01\x12\x11\n\rMEAN_LOG_LOSS\x10\x02"H\n\x0c\x44istanceType\x12\x1d\n\x19\x44ISTANCE_TYPE_UNSPECIFIED\x10\x00\x12\r\n\tEUCLIDEAN\x10\x01\x12\n\n\x06\x43OSINE\x10\x02"z\n\x0f\x44\x61taSplitMethod\x12!\n\x1d\x44\x41TA_SPLIT_METHOD_UNSPECIFIED\x10\x00\x12\n\n\x06RANDOM\x10\x01\x12\n\n\x06\x43USTOM\x10\x02\x12\x0e\n\nSEQUENTIAL\x10\x03\x12\x0c\n\x08NO_SPLIT\x10\x04\x12\x0e\n\nAUTO_SPLIT\x10\x05"W\n\x11LearnRateStrategy\x12#\n\x1fLEARN_RATE_STRATEGY_UNSPECIFIED\x10\x00\x12\x0f\n\x0bLINE_SEARCH\x10\x01\x12\x0c\n\x08\x43ONSTANT\x10\x02"n\n\x14OptimizationStrategy\x12%\n!OPTIMIZATION_STRATEGY_UNSPECIFIED\x10\x00\x12\x1a\n\x16\x42\x41TCH_GRADIENT_DESCENT\x10\x01\x12\x13\n\x0fNORMAL_EQUATION\x10\x02"Z\n\x0fGetModelRequest\x12\x17\n\nproject_id\x18\x01 \x01(\tB\x03\xe0\x41\x02\x12\x17\n\ndataset_id\x18\x02 \x01(\tB\x03\xe0\x41\x02\x12\x15\n\x08model_id\x18\x03 \x01(\tB\x03\xe0\x41\x02"\x91\x01\n\x11PatchModelRequest\x12\x17\n\nproject_id\x18\x01 \x01(\tB\x03\xe0\x41\x02\x12\x17\n\ndataset_id\x18\x02 \x01(\tB\x03\xe0\x41\x02\x12\x15\n\x08model_id\x18\x03 \x01(\tB\x03\xe0\x41\x02\x12\x33\n\x05model\x18\x04 \x01(\x0b\x32\x1f.google.cloud.bigquery.v2.ModelB\x03\xe0\x41\x02"]\n\x12\x44\x65leteModelRequest\x12\x17\n\nproject_id\x18\x01 \x01(\tB\x03\xe0\x41\x02\x12\x17\n\ndataset_id\x18\x02 \x01(\tB\x03\xe0\x41\x02\x12\x15\n\x08model_id\x18\x03 \x01(\tB\x03\xe0\x41\x02"\x8c\x01\n\x11ListModelsRequest\x12\x17\n\nproject_id\x18\x01 \x01(\tB\x03\xe0\x41\x02\x12\x17\n\ndataset_id\x18\x02 \x01(\tB\x03\xe0\x41\x02\x12\x31\n\x0bmax_results\x18\x03 \x01(\x0b\x32\x1c.google.protobuf.UInt32Value\x12\x12\n\npage_token\x18\x04 \x01(\t"^\n\x12ListModelsResponse\x12/\n\x06models\x18\x01 \x03(\x0b\x32\x1f.google.cloud.bigquery.v2.Model\x12\x17\n\x0fnext_page_token\x18\x02 \x01(\t2\xfa\x05\n\x0cModelService\x12y\n\x08GetModel\x12).google.cloud.bigquery.v2.GetModelRequest\x1a\x1f.google.cloud.bigquery.v2.Model"!\xda\x41\x1eproject_id,dataset_id,model_id\x12\x8d\x01\n\nListModels\x12+.google.cloud.bigquery.v2.ListModelsRequest\x1a,.google.cloud.bigquery.v2.ListModelsResponse"$\xda\x41!project_id,dataset_id,max_results\x12\x83\x01\n\nPatchModel\x12+.google.cloud.bigquery.v2.PatchModelRequest\x1a\x1f.google.cloud.bigquery.v2.Model"\'\xda\x41$project_id,dataset_id,model_id,model\x12v\n\x0b\x44\x65leteModel\x12,.google.cloud.bigquery.v2.DeleteModelRequest\x1a\x16.google.protobuf.Empty"!\xda\x41\x1eproject_id,dataset_id,model_id\x1a\xe0\x01\xca\x41\x17\x62igquery.googleapis.com\xd2\x41\xc2\x01https://www.googleapis.com/auth/bigquery,https://www.googleapis.com/auth/bigquery.readonly,https://www.googleapis.com/auth/cloud-platform,https://www.googleapis.com/auth/cloud-platform.read-onlyBl\n\x1c\x63om.google.cloud.bigquery.v2B\nModelProtoZ@google.golang.org/genproto/googleapis/cloud/bigquery/v2;bigqueryb\x06proto3', dependencies=[ google_dot_api_dot_client__pb2.DESCRIPTOR, google_dot_api_dot_field__behavior__pb2.DESCRIPTOR, @@ -61,6 +55,7 @@ full_name="google.cloud.bigquery.v2.Model.KmeansEnums.KmeansInitializationMethod", filename=None, file=DESCRIPTOR, + create_key=_descriptor._internal_create_key, values=[ _descriptor.EnumValueDescriptor( name="KMEANS_INITIALIZATION_METHOD_UNSPECIFIED", @@ -68,12 +63,23 @@ number=0, serialized_options=None, type=None, + create_key=_descriptor._internal_create_key, ), _descriptor.EnumValueDescriptor( - name="RANDOM", index=1, number=1, serialized_options=None, type=None + name="RANDOM", + index=1, + number=1, + serialized_options=None, + type=None, + create_key=_descriptor._internal_create_key, ), _descriptor.EnumValueDescriptor( - name="CUSTOM", index=2, number=2, serialized_options=None, type=None + name="CUSTOM", + index=2, + number=2, + serialized_options=None, + type=None, + create_key=_descriptor._internal_create_key, ), ], containing_type=None, @@ -88,6 +94,7 @@ full_name="google.cloud.bigquery.v2.Model.ModelType", filename=None, file=DESCRIPTOR, + create_key=_descriptor._internal_create_key, values=[ _descriptor.EnumValueDescriptor( name="MODEL_TYPE_UNSPECIFIED", @@ -95,6 +102,7 @@ number=0, serialized_options=None, type=None, + create_key=_descriptor._internal_create_key, ), _descriptor.EnumValueDescriptor( name="LINEAR_REGRESSION", @@ -102,6 +110,7 @@ number=1, serialized_options=None, type=None, + create_key=_descriptor._internal_create_key, ), _descriptor.EnumValueDescriptor( name="LOGISTIC_REGRESSION", @@ -109,12 +118,23 @@ number=2, serialized_options=None, type=None, + create_key=_descriptor._internal_create_key, ), _descriptor.EnumValueDescriptor( - name="KMEANS", index=3, number=3, serialized_options=None, type=None + name="KMEANS", + index=3, + number=3, + serialized_options=None, + type=None, + create_key=_descriptor._internal_create_key, ), _descriptor.EnumValueDescriptor( - name="TENSORFLOW", index=4, number=6, serialized_options=None, type=None + name="TENSORFLOW", + index=4, + number=6, + serialized_options=None, + type=None, + create_key=_descriptor._internal_create_key, ), ], containing_type=None, @@ -129,6 +149,7 @@ full_name="google.cloud.bigquery.v2.Model.LossType", filename=None, file=DESCRIPTOR, + create_key=_descriptor._internal_create_key, values=[ _descriptor.EnumValueDescriptor( name="LOSS_TYPE_UNSPECIFIED", @@ -136,6 +157,7 @@ number=0, serialized_options=None, type=None, + create_key=_descriptor._internal_create_key, ), _descriptor.EnumValueDescriptor( name="MEAN_SQUARED_LOSS", @@ -143,9 +165,15 @@ number=1, serialized_options=None, type=None, + create_key=_descriptor._internal_create_key, ), _descriptor.EnumValueDescriptor( - name="MEAN_LOG_LOSS", index=2, number=2, serialized_options=None, type=None + name="MEAN_LOG_LOSS", + index=2, + number=2, + serialized_options=None, + type=None, + create_key=_descriptor._internal_create_key, ), ], containing_type=None, @@ -160,6 +188,7 @@ full_name="google.cloud.bigquery.v2.Model.DistanceType", filename=None, file=DESCRIPTOR, + create_key=_descriptor._internal_create_key, values=[ _descriptor.EnumValueDescriptor( name="DISTANCE_TYPE_UNSPECIFIED", @@ -167,12 +196,23 @@ number=0, serialized_options=None, type=None, + create_key=_descriptor._internal_create_key, ), _descriptor.EnumValueDescriptor( - name="EUCLIDEAN", index=1, number=1, serialized_options=None, type=None + name="EUCLIDEAN", + index=1, + number=1, + serialized_options=None, + type=None, + create_key=_descriptor._internal_create_key, ), _descriptor.EnumValueDescriptor( - name="COSINE", index=2, number=2, serialized_options=None, type=None + name="COSINE", + index=2, + number=2, + serialized_options=None, + type=None, + create_key=_descriptor._internal_create_key, ), ], containing_type=None, @@ -187,6 +227,7 @@ full_name="google.cloud.bigquery.v2.Model.DataSplitMethod", filename=None, file=DESCRIPTOR, + create_key=_descriptor._internal_create_key, values=[ _descriptor.EnumValueDescriptor( name="DATA_SPLIT_METHOD_UNSPECIFIED", @@ -194,21 +235,47 @@ number=0, serialized_options=None, type=None, + create_key=_descriptor._internal_create_key, ), _descriptor.EnumValueDescriptor( - name="RANDOM", index=1, number=1, serialized_options=None, type=None + name="RANDOM", + index=1, + number=1, + serialized_options=None, + type=None, + create_key=_descriptor._internal_create_key, ), _descriptor.EnumValueDescriptor( - name="CUSTOM", index=2, number=2, serialized_options=None, type=None + name="CUSTOM", + index=2, + number=2, + serialized_options=None, + type=None, + create_key=_descriptor._internal_create_key, ), _descriptor.EnumValueDescriptor( - name="SEQUENTIAL", index=3, number=3, serialized_options=None, type=None + name="SEQUENTIAL", + index=3, + number=3, + serialized_options=None, + type=None, + create_key=_descriptor._internal_create_key, ), _descriptor.EnumValueDescriptor( - name="NO_SPLIT", index=4, number=4, serialized_options=None, type=None + name="NO_SPLIT", + index=4, + number=4, + serialized_options=None, + type=None, + create_key=_descriptor._internal_create_key, ), _descriptor.EnumValueDescriptor( - name="AUTO_SPLIT", index=5, number=5, serialized_options=None, type=None + name="AUTO_SPLIT", + index=5, + number=5, + serialized_options=None, + type=None, + create_key=_descriptor._internal_create_key, ), ], containing_type=None, @@ -223,6 +290,7 @@ full_name="google.cloud.bigquery.v2.Model.LearnRateStrategy", filename=None, file=DESCRIPTOR, + create_key=_descriptor._internal_create_key, values=[ _descriptor.EnumValueDescriptor( name="LEARN_RATE_STRATEGY_UNSPECIFIED", @@ -230,12 +298,23 @@ number=0, serialized_options=None, type=None, + create_key=_descriptor._internal_create_key, ), _descriptor.EnumValueDescriptor( - name="LINE_SEARCH", index=1, number=1, serialized_options=None, type=None + name="LINE_SEARCH", + index=1, + number=1, + serialized_options=None, + type=None, + create_key=_descriptor._internal_create_key, ), _descriptor.EnumValueDescriptor( - name="CONSTANT", index=2, number=2, serialized_options=None, type=None + name="CONSTANT", + index=2, + number=2, + serialized_options=None, + type=None, + create_key=_descriptor._internal_create_key, ), ], containing_type=None, @@ -250,6 +329,7 @@ full_name="google.cloud.bigquery.v2.Model.OptimizationStrategy", filename=None, file=DESCRIPTOR, + create_key=_descriptor._internal_create_key, values=[ _descriptor.EnumValueDescriptor( name="OPTIMIZATION_STRATEGY_UNSPECIFIED", @@ -257,6 +337,7 @@ number=0, serialized_options=None, type=None, + create_key=_descriptor._internal_create_key, ), _descriptor.EnumValueDescriptor( name="BATCH_GRADIENT_DESCENT", @@ -264,6 +345,7 @@ number=1, serialized_options=None, type=None, + create_key=_descriptor._internal_create_key, ), _descriptor.EnumValueDescriptor( name="NORMAL_EQUATION", @@ -271,6 +353,7 @@ number=2, serialized_options=None, type=None, + create_key=_descriptor._internal_create_key, ), ], containing_type=None, @@ -287,6 +370,7 @@ filename=None, file=DESCRIPTOR, containing_type=None, + create_key=_descriptor._internal_create_key, fields=[], extensions=[], nested_types=[], @@ -306,6 +390,7 @@ filename=None, file=DESCRIPTOR, containing_type=None, + create_key=_descriptor._internal_create_key, fields=[ _descriptor.FieldDescriptor( name="mean_absolute_error", @@ -324,6 +409,7 @@ extension_scope=None, serialized_options=None, file=DESCRIPTOR, + create_key=_descriptor._internal_create_key, ), _descriptor.FieldDescriptor( name="mean_squared_error", @@ -342,6 +428,7 @@ extension_scope=None, serialized_options=None, file=DESCRIPTOR, + create_key=_descriptor._internal_create_key, ), _descriptor.FieldDescriptor( name="mean_squared_log_error", @@ -360,6 +447,7 @@ extension_scope=None, serialized_options=None, file=DESCRIPTOR, + create_key=_descriptor._internal_create_key, ), _descriptor.FieldDescriptor( name="median_absolute_error", @@ -378,6 +466,7 @@ extension_scope=None, serialized_options=None, file=DESCRIPTOR, + create_key=_descriptor._internal_create_key, ), _descriptor.FieldDescriptor( name="r_squared", @@ -396,6 +485,7 @@ extension_scope=None, serialized_options=None, file=DESCRIPTOR, + create_key=_descriptor._internal_create_key, ), ], extensions=[], @@ -416,6 +506,7 @@ filename=None, file=DESCRIPTOR, containing_type=None, + create_key=_descriptor._internal_create_key, fields=[ _descriptor.FieldDescriptor( name="precision", @@ -434,6 +525,7 @@ extension_scope=None, serialized_options=None, file=DESCRIPTOR, + create_key=_descriptor._internal_create_key, ), _descriptor.FieldDescriptor( name="recall", @@ -452,6 +544,7 @@ extension_scope=None, serialized_options=None, file=DESCRIPTOR, + create_key=_descriptor._internal_create_key, ), _descriptor.FieldDescriptor( name="accuracy", @@ -470,6 +563,7 @@ extension_scope=None, serialized_options=None, file=DESCRIPTOR, + create_key=_descriptor._internal_create_key, ), _descriptor.FieldDescriptor( name="threshold", @@ -488,6 +582,7 @@ extension_scope=None, serialized_options=None, file=DESCRIPTOR, + create_key=_descriptor._internal_create_key, ), _descriptor.FieldDescriptor( name="f1_score", @@ -506,6 +601,7 @@ extension_scope=None, serialized_options=None, file=DESCRIPTOR, + create_key=_descriptor._internal_create_key, ), _descriptor.FieldDescriptor( name="log_loss", @@ -524,6 +620,7 @@ extension_scope=None, serialized_options=None, file=DESCRIPTOR, + create_key=_descriptor._internal_create_key, ), _descriptor.FieldDescriptor( name="roc_auc", @@ -542,6 +639,7 @@ extension_scope=None, serialized_options=None, file=DESCRIPTOR, + create_key=_descriptor._internal_create_key, ), ], extensions=[], @@ -562,6 +660,7 @@ filename=None, file=DESCRIPTOR, containing_type=None, + create_key=_descriptor._internal_create_key, fields=[ _descriptor.FieldDescriptor( name="positive_class_threshold", @@ -580,6 +679,7 @@ extension_scope=None, serialized_options=None, file=DESCRIPTOR, + create_key=_descriptor._internal_create_key, ), _descriptor.FieldDescriptor( name="true_positives", @@ -598,6 +698,7 @@ extension_scope=None, serialized_options=None, file=DESCRIPTOR, + create_key=_descriptor._internal_create_key, ), _descriptor.FieldDescriptor( name="false_positives", @@ -616,6 +717,7 @@ extension_scope=None, serialized_options=None, file=DESCRIPTOR, + create_key=_descriptor._internal_create_key, ), _descriptor.FieldDescriptor( name="true_negatives", @@ -634,6 +736,7 @@ extension_scope=None, serialized_options=None, file=DESCRIPTOR, + create_key=_descriptor._internal_create_key, ), _descriptor.FieldDescriptor( name="false_negatives", @@ -652,6 +755,7 @@ extension_scope=None, serialized_options=None, file=DESCRIPTOR, + create_key=_descriptor._internal_create_key, ), _descriptor.FieldDescriptor( name="precision", @@ -670,6 +774,7 @@ extension_scope=None, serialized_options=None, file=DESCRIPTOR, + create_key=_descriptor._internal_create_key, ), _descriptor.FieldDescriptor( name="recall", @@ -688,6 +793,7 @@ extension_scope=None, serialized_options=None, file=DESCRIPTOR, + create_key=_descriptor._internal_create_key, ), _descriptor.FieldDescriptor( name="f1_score", @@ -706,6 +812,7 @@ extension_scope=None, serialized_options=None, file=DESCRIPTOR, + create_key=_descriptor._internal_create_key, ), _descriptor.FieldDescriptor( name="accuracy", @@ -724,6 +831,7 @@ extension_scope=None, serialized_options=None, file=DESCRIPTOR, + create_key=_descriptor._internal_create_key, ), ], extensions=[], @@ -744,6 +852,7 @@ filename=None, file=DESCRIPTOR, containing_type=None, + create_key=_descriptor._internal_create_key, fields=[ _descriptor.FieldDescriptor( name="aggregate_classification_metrics", @@ -762,6 +871,7 @@ extension_scope=None, serialized_options=None, file=DESCRIPTOR, + create_key=_descriptor._internal_create_key, ), _descriptor.FieldDescriptor( name="binary_confusion_matrix_list", @@ -780,6 +890,7 @@ extension_scope=None, serialized_options=None, file=DESCRIPTOR, + create_key=_descriptor._internal_create_key, ), _descriptor.FieldDescriptor( name="positive_label", @@ -790,7 +901,7 @@ cpp_type=9, label=1, has_default_value=False, - default_value=_b("").decode("utf-8"), + default_value=b"".decode("utf-8"), message_type=None, enum_type=None, containing_type=None, @@ -798,6 +909,7 @@ extension_scope=None, serialized_options=None, file=DESCRIPTOR, + create_key=_descriptor._internal_create_key, ), _descriptor.FieldDescriptor( name="negative_label", @@ -808,7 +920,7 @@ cpp_type=9, label=1, has_default_value=False, - default_value=_b("").decode("utf-8"), + default_value=b"".decode("utf-8"), message_type=None, enum_type=None, containing_type=None, @@ -816,6 +928,7 @@ extension_scope=None, serialized_options=None, file=DESCRIPTOR, + create_key=_descriptor._internal_create_key, ), ], extensions=[], @@ -836,6 +949,7 @@ filename=None, file=DESCRIPTOR, containing_type=None, + create_key=_descriptor._internal_create_key, fields=[ _descriptor.FieldDescriptor( name="predicted_label", @@ -846,7 +960,7 @@ cpp_type=9, label=1, has_default_value=False, - default_value=_b("").decode("utf-8"), + default_value=b"".decode("utf-8"), message_type=None, enum_type=None, containing_type=None, @@ -854,6 +968,7 @@ extension_scope=None, serialized_options=None, file=DESCRIPTOR, + create_key=_descriptor._internal_create_key, ), _descriptor.FieldDescriptor( name="item_count", @@ -872,6 +987,7 @@ extension_scope=None, serialized_options=None, file=DESCRIPTOR, + create_key=_descriptor._internal_create_key, ), ], extensions=[], @@ -892,6 +1008,7 @@ filename=None, file=DESCRIPTOR, containing_type=None, + create_key=_descriptor._internal_create_key, fields=[ _descriptor.FieldDescriptor( name="actual_label", @@ -902,7 +1019,7 @@ cpp_type=9, label=1, has_default_value=False, - default_value=_b("").decode("utf-8"), + default_value=b"".decode("utf-8"), message_type=None, enum_type=None, containing_type=None, @@ -910,6 +1027,7 @@ extension_scope=None, serialized_options=None, file=DESCRIPTOR, + create_key=_descriptor._internal_create_key, ), _descriptor.FieldDescriptor( name="entries", @@ -928,6 +1046,7 @@ extension_scope=None, serialized_options=None, file=DESCRIPTOR, + create_key=_descriptor._internal_create_key, ), ], extensions=[], @@ -948,6 +1067,7 @@ filename=None, file=DESCRIPTOR, containing_type=None, + create_key=_descriptor._internal_create_key, fields=[ _descriptor.FieldDescriptor( name="confidence_threshold", @@ -966,6 +1086,7 @@ extension_scope=None, serialized_options=None, file=DESCRIPTOR, + create_key=_descriptor._internal_create_key, ), _descriptor.FieldDescriptor( name="rows", @@ -984,6 +1105,7 @@ extension_scope=None, serialized_options=None, file=DESCRIPTOR, + create_key=_descriptor._internal_create_key, ), ], extensions=[], @@ -1007,6 +1129,7 @@ filename=None, file=DESCRIPTOR, containing_type=None, + create_key=_descriptor._internal_create_key, fields=[ _descriptor.FieldDescriptor( name="aggregate_classification_metrics", @@ -1025,6 +1148,7 @@ extension_scope=None, serialized_options=None, file=DESCRIPTOR, + create_key=_descriptor._internal_create_key, ), _descriptor.FieldDescriptor( name="confusion_matrix_list", @@ -1043,6 +1167,7 @@ extension_scope=None, serialized_options=None, file=DESCRIPTOR, + create_key=_descriptor._internal_create_key, ), ], extensions=[], @@ -1063,6 +1188,7 @@ filename=None, file=DESCRIPTOR, containing_type=None, + create_key=_descriptor._internal_create_key, fields=[ _descriptor.FieldDescriptor( name="category", @@ -1073,7 +1199,7 @@ cpp_type=9, label=1, has_default_value=False, - default_value=_b("").decode("utf-8"), + default_value=b"".decode("utf-8"), message_type=None, enum_type=None, containing_type=None, @@ -1081,6 +1207,7 @@ extension_scope=None, serialized_options=None, file=DESCRIPTOR, + create_key=_descriptor._internal_create_key, ), _descriptor.FieldDescriptor( name="count", @@ -1099,6 +1226,7 @@ extension_scope=None, serialized_options=None, file=DESCRIPTOR, + create_key=_descriptor._internal_create_key, ), ], extensions=[], @@ -1119,6 +1247,7 @@ filename=None, file=DESCRIPTOR, containing_type=None, + create_key=_descriptor._internal_create_key, fields=[ _descriptor.FieldDescriptor( name="category_counts", @@ -1137,6 +1266,7 @@ extension_scope=None, serialized_options=None, file=DESCRIPTOR, + create_key=_descriptor._internal_create_key, ), ], extensions=[], @@ -1159,6 +1289,7 @@ filename=None, file=DESCRIPTOR, containing_type=None, + create_key=_descriptor._internal_create_key, fields=[ _descriptor.FieldDescriptor( name="feature_column", @@ -1169,7 +1300,7 @@ cpp_type=9, label=1, has_default_value=False, - default_value=_b("").decode("utf-8"), + default_value=b"".decode("utf-8"), message_type=None, enum_type=None, containing_type=None, @@ -1177,6 +1308,7 @@ extension_scope=None, serialized_options=None, file=DESCRIPTOR, + create_key=_descriptor._internal_create_key, ), _descriptor.FieldDescriptor( name="numerical_value", @@ -1195,6 +1327,7 @@ extension_scope=None, serialized_options=None, file=DESCRIPTOR, + create_key=_descriptor._internal_create_key, ), _descriptor.FieldDescriptor( name="categorical_value", @@ -1213,6 +1346,7 @@ extension_scope=None, serialized_options=None, file=DESCRIPTOR, + create_key=_descriptor._internal_create_key, ), ], extensions=[], @@ -1228,6 +1362,7 @@ full_name="google.cloud.bigquery.v2.Model.ClusteringMetrics.Cluster.FeatureValue.value", index=0, containing_type=None, + create_key=_descriptor._internal_create_key, fields=[], ), ], @@ -1241,6 +1376,7 @@ filename=None, file=DESCRIPTOR, containing_type=None, + create_key=_descriptor._internal_create_key, fields=[ _descriptor.FieldDescriptor( name="centroid_id", @@ -1259,6 +1395,7 @@ extension_scope=None, serialized_options=None, file=DESCRIPTOR, + create_key=_descriptor._internal_create_key, ), _descriptor.FieldDescriptor( name="feature_values", @@ -1277,6 +1414,7 @@ extension_scope=None, serialized_options=None, file=DESCRIPTOR, + create_key=_descriptor._internal_create_key, ), _descriptor.FieldDescriptor( name="count", @@ -1295,6 +1433,7 @@ extension_scope=None, serialized_options=None, file=DESCRIPTOR, + create_key=_descriptor._internal_create_key, ), ], extensions=[], @@ -1315,6 +1454,7 @@ filename=None, file=DESCRIPTOR, containing_type=None, + create_key=_descriptor._internal_create_key, fields=[ _descriptor.FieldDescriptor( name="davies_bouldin_index", @@ -1333,6 +1473,7 @@ extension_scope=None, serialized_options=None, file=DESCRIPTOR, + create_key=_descriptor._internal_create_key, ), _descriptor.FieldDescriptor( name="mean_squared_distance", @@ -1351,6 +1492,7 @@ extension_scope=None, serialized_options=None, file=DESCRIPTOR, + create_key=_descriptor._internal_create_key, ), _descriptor.FieldDescriptor( name="clusters", @@ -1369,6 +1511,7 @@ extension_scope=None, serialized_options=None, file=DESCRIPTOR, + create_key=_descriptor._internal_create_key, ), ], extensions=[], @@ -1389,6 +1532,7 @@ filename=None, file=DESCRIPTOR, containing_type=None, + create_key=_descriptor._internal_create_key, fields=[ _descriptor.FieldDescriptor( name="regression_metrics", @@ -1407,6 +1551,7 @@ extension_scope=None, serialized_options=None, file=DESCRIPTOR, + create_key=_descriptor._internal_create_key, ), _descriptor.FieldDescriptor( name="binary_classification_metrics", @@ -1425,6 +1570,7 @@ extension_scope=None, serialized_options=None, file=DESCRIPTOR, + create_key=_descriptor._internal_create_key, ), _descriptor.FieldDescriptor( name="multi_class_classification_metrics", @@ -1443,6 +1589,7 @@ extension_scope=None, serialized_options=None, file=DESCRIPTOR, + create_key=_descriptor._internal_create_key, ), _descriptor.FieldDescriptor( name="clustering_metrics", @@ -1461,6 +1608,7 @@ extension_scope=None, serialized_options=None, file=DESCRIPTOR, + create_key=_descriptor._internal_create_key, ), ], extensions=[], @@ -1476,6 +1624,7 @@ full_name="google.cloud.bigquery.v2.Model.EvaluationMetrics.metrics", index=0, containing_type=None, + create_key=_descriptor._internal_create_key, fields=[], ), ], @@ -1489,6 +1638,7 @@ filename=None, file=DESCRIPTOR, containing_type=None, + create_key=_descriptor._internal_create_key, fields=[ _descriptor.FieldDescriptor( name="key", @@ -1499,7 +1649,7 @@ cpp_type=9, label=1, has_default_value=False, - default_value=_b("").decode("utf-8"), + default_value=b"".decode("utf-8"), message_type=None, enum_type=None, containing_type=None, @@ -1507,6 +1657,7 @@ extension_scope=None, serialized_options=None, file=DESCRIPTOR, + create_key=_descriptor._internal_create_key, ), _descriptor.FieldDescriptor( name="value", @@ -1525,12 +1676,13 @@ extension_scope=None, serialized_options=None, file=DESCRIPTOR, + create_key=_descriptor._internal_create_key, ), ], extensions=[], nested_types=[], enum_types=[], - serialized_options=_b("8\001"), + serialized_options=b"8\001", is_extendable=False, syntax="proto3", extension_ranges=[], @@ -1545,6 +1697,7 @@ filename=None, file=DESCRIPTOR, containing_type=None, + create_key=_descriptor._internal_create_key, fields=[ _descriptor.FieldDescriptor( name="max_iterations", @@ -1563,6 +1716,7 @@ extension_scope=None, serialized_options=None, file=DESCRIPTOR, + create_key=_descriptor._internal_create_key, ), _descriptor.FieldDescriptor( name="loss_type", @@ -1581,6 +1735,7 @@ extension_scope=None, serialized_options=None, file=DESCRIPTOR, + create_key=_descriptor._internal_create_key, ), _descriptor.FieldDescriptor( name="learn_rate", @@ -1599,6 +1754,7 @@ extension_scope=None, serialized_options=None, file=DESCRIPTOR, + create_key=_descriptor._internal_create_key, ), _descriptor.FieldDescriptor( name="l1_regularization", @@ -1617,6 +1773,7 @@ extension_scope=None, serialized_options=None, file=DESCRIPTOR, + create_key=_descriptor._internal_create_key, ), _descriptor.FieldDescriptor( name="l2_regularization", @@ -1635,6 +1792,7 @@ extension_scope=None, serialized_options=None, file=DESCRIPTOR, + create_key=_descriptor._internal_create_key, ), _descriptor.FieldDescriptor( name="min_relative_progress", @@ -1653,6 +1811,7 @@ extension_scope=None, serialized_options=None, file=DESCRIPTOR, + create_key=_descriptor._internal_create_key, ), _descriptor.FieldDescriptor( name="warm_start", @@ -1671,6 +1830,7 @@ extension_scope=None, serialized_options=None, file=DESCRIPTOR, + create_key=_descriptor._internal_create_key, ), _descriptor.FieldDescriptor( name="early_stop", @@ -1689,6 +1849,7 @@ extension_scope=None, serialized_options=None, file=DESCRIPTOR, + create_key=_descriptor._internal_create_key, ), _descriptor.FieldDescriptor( name="input_label_columns", @@ -1707,6 +1868,7 @@ extension_scope=None, serialized_options=None, file=DESCRIPTOR, + create_key=_descriptor._internal_create_key, ), _descriptor.FieldDescriptor( name="data_split_method", @@ -1725,6 +1887,7 @@ extension_scope=None, serialized_options=None, file=DESCRIPTOR, + create_key=_descriptor._internal_create_key, ), _descriptor.FieldDescriptor( name="data_split_eval_fraction", @@ -1743,6 +1906,7 @@ extension_scope=None, serialized_options=None, file=DESCRIPTOR, + create_key=_descriptor._internal_create_key, ), _descriptor.FieldDescriptor( name="data_split_column", @@ -1753,7 +1917,7 @@ cpp_type=9, label=1, has_default_value=False, - default_value=_b("").decode("utf-8"), + default_value=b"".decode("utf-8"), message_type=None, enum_type=None, containing_type=None, @@ -1761,6 +1925,7 @@ extension_scope=None, serialized_options=None, file=DESCRIPTOR, + create_key=_descriptor._internal_create_key, ), _descriptor.FieldDescriptor( name="learn_rate_strategy", @@ -1779,6 +1944,7 @@ extension_scope=None, serialized_options=None, file=DESCRIPTOR, + create_key=_descriptor._internal_create_key, ), _descriptor.FieldDescriptor( name="initial_learn_rate", @@ -1797,6 +1963,7 @@ extension_scope=None, serialized_options=None, file=DESCRIPTOR, + create_key=_descriptor._internal_create_key, ), _descriptor.FieldDescriptor( name="label_class_weights", @@ -1815,6 +1982,7 @@ extension_scope=None, serialized_options=None, file=DESCRIPTOR, + create_key=_descriptor._internal_create_key, ), _descriptor.FieldDescriptor( name="distance_type", @@ -1833,6 +2001,7 @@ extension_scope=None, serialized_options=None, file=DESCRIPTOR, + create_key=_descriptor._internal_create_key, ), _descriptor.FieldDescriptor( name="num_clusters", @@ -1851,6 +2020,7 @@ extension_scope=None, serialized_options=None, file=DESCRIPTOR, + create_key=_descriptor._internal_create_key, ), _descriptor.FieldDescriptor( name="model_uri", @@ -1861,7 +2031,7 @@ cpp_type=9, label=1, has_default_value=False, - default_value=_b("").decode("utf-8"), + default_value=b"".decode("utf-8"), message_type=None, enum_type=None, containing_type=None, @@ -1869,6 +2039,7 @@ extension_scope=None, serialized_options=None, file=DESCRIPTOR, + create_key=_descriptor._internal_create_key, ), _descriptor.FieldDescriptor( name="optimization_strategy", @@ -1887,6 +2058,7 @@ extension_scope=None, serialized_options=None, file=DESCRIPTOR, + create_key=_descriptor._internal_create_key, ), _descriptor.FieldDescriptor( name="kmeans_initialization_method", @@ -1905,6 +2077,7 @@ extension_scope=None, serialized_options=None, file=DESCRIPTOR, + create_key=_descriptor._internal_create_key, ), _descriptor.FieldDescriptor( name="kmeans_initialization_column", @@ -1915,7 +2088,7 @@ cpp_type=9, label=1, has_default_value=False, - default_value=_b("").decode("utf-8"), + default_value=b"".decode("utf-8"), message_type=None, enum_type=None, containing_type=None, @@ -1923,6 +2096,7 @@ extension_scope=None, serialized_options=None, file=DESCRIPTOR, + create_key=_descriptor._internal_create_key, ), ], extensions=[], @@ -1943,6 +2117,7 @@ filename=None, file=DESCRIPTOR, containing_type=None, + create_key=_descriptor._internal_create_key, fields=[ _descriptor.FieldDescriptor( name="centroid_id", @@ -1961,6 +2136,7 @@ extension_scope=None, serialized_options=None, file=DESCRIPTOR, + create_key=_descriptor._internal_create_key, ), _descriptor.FieldDescriptor( name="cluster_radius", @@ -1979,6 +2155,7 @@ extension_scope=None, serialized_options=None, file=DESCRIPTOR, + create_key=_descriptor._internal_create_key, ), _descriptor.FieldDescriptor( name="cluster_size", @@ -1997,6 +2174,7 @@ extension_scope=None, serialized_options=None, file=DESCRIPTOR, + create_key=_descriptor._internal_create_key, ), ], extensions=[], @@ -2017,6 +2195,7 @@ filename=None, file=DESCRIPTOR, containing_type=None, + create_key=_descriptor._internal_create_key, fields=[ _descriptor.FieldDescriptor( name="index", @@ -2035,6 +2214,7 @@ extension_scope=None, serialized_options=None, file=DESCRIPTOR, + create_key=_descriptor._internal_create_key, ), _descriptor.FieldDescriptor( name="duration_ms", @@ -2053,6 +2233,7 @@ extension_scope=None, serialized_options=None, file=DESCRIPTOR, + create_key=_descriptor._internal_create_key, ), _descriptor.FieldDescriptor( name="training_loss", @@ -2071,6 +2252,7 @@ extension_scope=None, serialized_options=None, file=DESCRIPTOR, + create_key=_descriptor._internal_create_key, ), _descriptor.FieldDescriptor( name="eval_loss", @@ -2089,6 +2271,7 @@ extension_scope=None, serialized_options=None, file=DESCRIPTOR, + create_key=_descriptor._internal_create_key, ), _descriptor.FieldDescriptor( name="learn_rate", @@ -2107,6 +2290,7 @@ extension_scope=None, serialized_options=None, file=DESCRIPTOR, + create_key=_descriptor._internal_create_key, ), _descriptor.FieldDescriptor( name="cluster_infos", @@ -2125,6 +2309,7 @@ extension_scope=None, serialized_options=None, file=DESCRIPTOR, + create_key=_descriptor._internal_create_key, ), ], extensions=[], @@ -2145,6 +2330,7 @@ filename=None, file=DESCRIPTOR, containing_type=None, + create_key=_descriptor._internal_create_key, fields=[ _descriptor.FieldDescriptor( name="training_options", @@ -2163,6 +2349,7 @@ extension_scope=None, serialized_options=None, file=DESCRIPTOR, + create_key=_descriptor._internal_create_key, ), _descriptor.FieldDescriptor( name="start_time", @@ -2181,6 +2368,7 @@ extension_scope=None, serialized_options=None, file=DESCRIPTOR, + create_key=_descriptor._internal_create_key, ), _descriptor.FieldDescriptor( name="results", @@ -2199,6 +2387,7 @@ extension_scope=None, serialized_options=None, file=DESCRIPTOR, + create_key=_descriptor._internal_create_key, ), _descriptor.FieldDescriptor( name="evaluation_metrics", @@ -2217,6 +2406,7 @@ extension_scope=None, serialized_options=None, file=DESCRIPTOR, + create_key=_descriptor._internal_create_key, ), ], extensions=[], @@ -2240,6 +2430,7 @@ filename=None, file=DESCRIPTOR, containing_type=None, + create_key=_descriptor._internal_create_key, fields=[ _descriptor.FieldDescriptor( name="key", @@ -2250,7 +2441,7 @@ cpp_type=9, label=1, has_default_value=False, - default_value=_b("").decode("utf-8"), + default_value=b"".decode("utf-8"), message_type=None, enum_type=None, containing_type=None, @@ -2258,6 +2449,7 @@ extension_scope=None, serialized_options=None, file=DESCRIPTOR, + create_key=_descriptor._internal_create_key, ), _descriptor.FieldDescriptor( name="value", @@ -2268,7 +2460,7 @@ cpp_type=9, label=1, has_default_value=False, - default_value=_b("").decode("utf-8"), + default_value=b"".decode("utf-8"), message_type=None, enum_type=None, containing_type=None, @@ -2276,12 +2468,13 @@ extension_scope=None, serialized_options=None, file=DESCRIPTOR, + create_key=_descriptor._internal_create_key, ), ], extensions=[], nested_types=[], enum_types=[], - serialized_options=_b("8\001"), + serialized_options=b"8\001", is_extendable=False, syntax="proto3", extension_ranges=[], @@ -2296,6 +2489,7 @@ filename=None, file=DESCRIPTOR, containing_type=None, + create_key=_descriptor._internal_create_key, fields=[ _descriptor.FieldDescriptor( name="etag", @@ -2306,14 +2500,15 @@ cpp_type=9, label=1, has_default_value=False, - default_value=_b("").decode("utf-8"), + default_value=b"".decode("utf-8"), message_type=None, enum_type=None, containing_type=None, is_extension=False, extension_scope=None, - serialized_options=_b("\340A\003"), + serialized_options=b"\340A\003", file=DESCRIPTOR, + create_key=_descriptor._internal_create_key, ), _descriptor.FieldDescriptor( name="model_reference", @@ -2330,8 +2525,9 @@ containing_type=None, is_extension=False, extension_scope=None, - serialized_options=_b("\340A\002"), + serialized_options=b"\340A\002", file=DESCRIPTOR, + create_key=_descriptor._internal_create_key, ), _descriptor.FieldDescriptor( name="creation_time", @@ -2348,8 +2544,9 @@ containing_type=None, is_extension=False, extension_scope=None, - serialized_options=_b("\340A\003"), + serialized_options=b"\340A\003", file=DESCRIPTOR, + create_key=_descriptor._internal_create_key, ), _descriptor.FieldDescriptor( name="last_modified_time", @@ -2366,8 +2563,9 @@ containing_type=None, is_extension=False, extension_scope=None, - serialized_options=_b("\340A\003"), + serialized_options=b"\340A\003", file=DESCRIPTOR, + create_key=_descriptor._internal_create_key, ), _descriptor.FieldDescriptor( name="description", @@ -2378,14 +2576,15 @@ cpp_type=9, label=1, has_default_value=False, - default_value=_b("").decode("utf-8"), + default_value=b"".decode("utf-8"), message_type=None, enum_type=None, containing_type=None, is_extension=False, extension_scope=None, - serialized_options=_b("\340A\001"), + serialized_options=b"\340A\001", file=DESCRIPTOR, + create_key=_descriptor._internal_create_key, ), _descriptor.FieldDescriptor( name="friendly_name", @@ -2396,14 +2595,15 @@ cpp_type=9, label=1, has_default_value=False, - default_value=_b("").decode("utf-8"), + default_value=b"".decode("utf-8"), message_type=None, enum_type=None, containing_type=None, is_extension=False, extension_scope=None, - serialized_options=_b("\340A\001"), + serialized_options=b"\340A\001", file=DESCRIPTOR, + create_key=_descriptor._internal_create_key, ), _descriptor.FieldDescriptor( name="labels", @@ -2422,6 +2622,7 @@ extension_scope=None, serialized_options=None, file=DESCRIPTOR, + create_key=_descriptor._internal_create_key, ), _descriptor.FieldDescriptor( name="expiration_time", @@ -2438,8 +2639,9 @@ containing_type=None, is_extension=False, extension_scope=None, - serialized_options=_b("\340A\001"), + serialized_options=b"\340A\001", file=DESCRIPTOR, + create_key=_descriptor._internal_create_key, ), _descriptor.FieldDescriptor( name="location", @@ -2450,14 +2652,15 @@ cpp_type=9, label=1, has_default_value=False, - default_value=_b("").decode("utf-8"), + default_value=b"".decode("utf-8"), message_type=None, enum_type=None, containing_type=None, is_extension=False, extension_scope=None, - serialized_options=_b("\340A\003"), + serialized_options=b"\340A\003", file=DESCRIPTOR, + create_key=_descriptor._internal_create_key, ), _descriptor.FieldDescriptor( name="encryption_configuration", @@ -2476,6 +2679,7 @@ extension_scope=None, serialized_options=None, file=DESCRIPTOR, + create_key=_descriptor._internal_create_key, ), _descriptor.FieldDescriptor( name="model_type", @@ -2492,8 +2696,9 @@ containing_type=None, is_extension=False, extension_scope=None, - serialized_options=_b("\340A\003"), + serialized_options=b"\340A\003", file=DESCRIPTOR, + create_key=_descriptor._internal_create_key, ), _descriptor.FieldDescriptor( name="training_runs", @@ -2510,8 +2715,9 @@ containing_type=None, is_extension=False, extension_scope=None, - serialized_options=_b("\340A\003"), + serialized_options=b"\340A\003", file=DESCRIPTOR, + create_key=_descriptor._internal_create_key, ), _descriptor.FieldDescriptor( name="feature_columns", @@ -2528,8 +2734,9 @@ containing_type=None, is_extension=False, extension_scope=None, - serialized_options=_b("\340A\003"), + serialized_options=b"\340A\003", file=DESCRIPTOR, + create_key=_descriptor._internal_create_key, ), _descriptor.FieldDescriptor( name="label_columns", @@ -2546,8 +2753,9 @@ containing_type=None, is_extension=False, extension_scope=None, - serialized_options=_b("\340A\003"), + serialized_options=b"\340A\003", file=DESCRIPTOR, + create_key=_descriptor._internal_create_key, ), ], extensions=[], @@ -2586,6 +2794,7 @@ filename=None, file=DESCRIPTOR, containing_type=None, + create_key=_descriptor._internal_create_key, fields=[ _descriptor.FieldDescriptor( name="project_id", @@ -2596,14 +2805,15 @@ cpp_type=9, label=1, has_default_value=False, - default_value=_b("").decode("utf-8"), + default_value=b"".decode("utf-8"), message_type=None, enum_type=None, containing_type=None, is_extension=False, extension_scope=None, - serialized_options=_b("\340A\002"), + serialized_options=b"\340A\002", file=DESCRIPTOR, + create_key=_descriptor._internal_create_key, ), _descriptor.FieldDescriptor( name="dataset_id", @@ -2614,14 +2824,15 @@ cpp_type=9, label=1, has_default_value=False, - default_value=_b("").decode("utf-8"), + default_value=b"".decode("utf-8"), message_type=None, enum_type=None, containing_type=None, is_extension=False, extension_scope=None, - serialized_options=_b("\340A\002"), + serialized_options=b"\340A\002", file=DESCRIPTOR, + create_key=_descriptor._internal_create_key, ), _descriptor.FieldDescriptor( name="model_id", @@ -2632,14 +2843,15 @@ cpp_type=9, label=1, has_default_value=False, - default_value=_b("").decode("utf-8"), + default_value=b"".decode("utf-8"), message_type=None, enum_type=None, containing_type=None, is_extension=False, extension_scope=None, - serialized_options=_b("\340A\002"), + serialized_options=b"\340A\002", file=DESCRIPTOR, + create_key=_descriptor._internal_create_key, ), ], extensions=[], @@ -2661,6 +2873,7 @@ filename=None, file=DESCRIPTOR, containing_type=None, + create_key=_descriptor._internal_create_key, fields=[ _descriptor.FieldDescriptor( name="project_id", @@ -2671,14 +2884,15 @@ cpp_type=9, label=1, has_default_value=False, - default_value=_b("").decode("utf-8"), + default_value=b"".decode("utf-8"), message_type=None, enum_type=None, containing_type=None, is_extension=False, extension_scope=None, - serialized_options=_b("\340A\002"), + serialized_options=b"\340A\002", file=DESCRIPTOR, + create_key=_descriptor._internal_create_key, ), _descriptor.FieldDescriptor( name="dataset_id", @@ -2689,14 +2903,15 @@ cpp_type=9, label=1, has_default_value=False, - default_value=_b("").decode("utf-8"), + default_value=b"".decode("utf-8"), message_type=None, enum_type=None, containing_type=None, is_extension=False, extension_scope=None, - serialized_options=_b("\340A\002"), + serialized_options=b"\340A\002", file=DESCRIPTOR, + create_key=_descriptor._internal_create_key, ), _descriptor.FieldDescriptor( name="model_id", @@ -2707,14 +2922,15 @@ cpp_type=9, label=1, has_default_value=False, - default_value=_b("").decode("utf-8"), + default_value=b"".decode("utf-8"), message_type=None, enum_type=None, containing_type=None, is_extension=False, extension_scope=None, - serialized_options=_b("\340A\002"), + serialized_options=b"\340A\002", file=DESCRIPTOR, + create_key=_descriptor._internal_create_key, ), _descriptor.FieldDescriptor( name="model", @@ -2731,8 +2947,9 @@ containing_type=None, is_extension=False, extension_scope=None, - serialized_options=_b("\340A\002"), + serialized_options=b"\340A\002", file=DESCRIPTOR, + create_key=_descriptor._internal_create_key, ), ], extensions=[], @@ -2754,6 +2971,7 @@ filename=None, file=DESCRIPTOR, containing_type=None, + create_key=_descriptor._internal_create_key, fields=[ _descriptor.FieldDescriptor( name="project_id", @@ -2764,14 +2982,15 @@ cpp_type=9, label=1, has_default_value=False, - default_value=_b("").decode("utf-8"), + default_value=b"".decode("utf-8"), message_type=None, enum_type=None, containing_type=None, is_extension=False, extension_scope=None, - serialized_options=_b("\340A\002"), + serialized_options=b"\340A\002", file=DESCRIPTOR, + create_key=_descriptor._internal_create_key, ), _descriptor.FieldDescriptor( name="dataset_id", @@ -2782,14 +3001,15 @@ cpp_type=9, label=1, has_default_value=False, - default_value=_b("").decode("utf-8"), + default_value=b"".decode("utf-8"), message_type=None, enum_type=None, containing_type=None, is_extension=False, extension_scope=None, - serialized_options=_b("\340A\002"), + serialized_options=b"\340A\002", file=DESCRIPTOR, + create_key=_descriptor._internal_create_key, ), _descriptor.FieldDescriptor( name="model_id", @@ -2800,14 +3020,15 @@ cpp_type=9, label=1, has_default_value=False, - default_value=_b("").decode("utf-8"), + default_value=b"".decode("utf-8"), message_type=None, enum_type=None, containing_type=None, is_extension=False, extension_scope=None, - serialized_options=_b("\340A\002"), + serialized_options=b"\340A\002", file=DESCRIPTOR, + create_key=_descriptor._internal_create_key, ), ], extensions=[], @@ -2829,6 +3050,7 @@ filename=None, file=DESCRIPTOR, containing_type=None, + create_key=_descriptor._internal_create_key, fields=[ _descriptor.FieldDescriptor( name="project_id", @@ -2839,14 +3061,15 @@ cpp_type=9, label=1, has_default_value=False, - default_value=_b("").decode("utf-8"), + default_value=b"".decode("utf-8"), message_type=None, enum_type=None, containing_type=None, is_extension=False, extension_scope=None, - serialized_options=_b("\340A\002"), + serialized_options=b"\340A\002", file=DESCRIPTOR, + create_key=_descriptor._internal_create_key, ), _descriptor.FieldDescriptor( name="dataset_id", @@ -2857,14 +3080,15 @@ cpp_type=9, label=1, has_default_value=False, - default_value=_b("").decode("utf-8"), + default_value=b"".decode("utf-8"), message_type=None, enum_type=None, containing_type=None, is_extension=False, extension_scope=None, - serialized_options=_b("\340A\002"), + serialized_options=b"\340A\002", file=DESCRIPTOR, + create_key=_descriptor._internal_create_key, ), _descriptor.FieldDescriptor( name="max_results", @@ -2883,6 +3107,7 @@ extension_scope=None, serialized_options=None, file=DESCRIPTOR, + create_key=_descriptor._internal_create_key, ), _descriptor.FieldDescriptor( name="page_token", @@ -2893,7 +3118,7 @@ cpp_type=9, label=1, has_default_value=False, - default_value=_b("").decode("utf-8"), + default_value=b"".decode("utf-8"), message_type=None, enum_type=None, containing_type=None, @@ -2901,6 +3126,7 @@ extension_scope=None, serialized_options=None, file=DESCRIPTOR, + create_key=_descriptor._internal_create_key, ), ], extensions=[], @@ -2922,6 +3148,7 @@ filename=None, file=DESCRIPTOR, containing_type=None, + create_key=_descriptor._internal_create_key, fields=[ _descriptor.FieldDescriptor( name="models", @@ -2940,6 +3167,7 @@ extension_scope=None, serialized_options=None, file=DESCRIPTOR, + create_key=_descriptor._internal_create_key, ), _descriptor.FieldDescriptor( name="next_page_token", @@ -2950,7 +3178,7 @@ cpp_type=9, label=1, has_default_value=False, - default_value=_b("").decode("utf-8"), + default_value=b"".decode("utf-8"), message_type=None, enum_type=None, containing_type=None, @@ -2958,6 +3186,7 @@ extension_scope=None, serialized_options=None, file=DESCRIPTOR, + create_key=_descriptor._internal_create_key, ), ], extensions=[], @@ -3294,25 +3523,24 @@ Model = _reflection.GeneratedProtocolMessageType( "Model", (_message.Message,), - dict( - KmeansEnums=_reflection.GeneratedProtocolMessageType( + { + "KmeansEnums": _reflection.GeneratedProtocolMessageType( "KmeansEnums", (_message.Message,), - dict( - DESCRIPTOR=_MODEL_KMEANSENUMS, - __module__="google.cloud.bigquery_v2.proto.model_pb2" + { + "DESCRIPTOR": _MODEL_KMEANSENUMS, + "__module__": "google.cloud.bigquery_v2.proto.model_pb2" # @@protoc_insertion_point(class_scope:google.cloud.bigquery.v2.Model.KmeansEnums) - ), + }, ), - RegressionMetrics=_reflection.GeneratedProtocolMessageType( + "RegressionMetrics": _reflection.GeneratedProtocolMessageType( "RegressionMetrics", (_message.Message,), - dict( - DESCRIPTOR=_MODEL_REGRESSIONMETRICS, - __module__="google.cloud.bigquery_v2.proto.model_pb2", - __doc__="""Evaluation metrics for regression and explicit feedback - type matrix factorization models. - + { + "DESCRIPTOR": _MODEL_REGRESSIONMETRICS, + "__module__": "google.cloud.bigquery_v2.proto.model_pb2", + "__doc__": """Evaluation metrics for regression and explicit feedback type matrix + factorization models. Attributes: mean_absolute_error: @@ -3327,22 +3555,21 @@ R^2 score. """, # @@protoc_insertion_point(class_scope:google.cloud.bigquery.v2.Model.RegressionMetrics) - ), + }, ), - AggregateClassificationMetrics=_reflection.GeneratedProtocolMessageType( + "AggregateClassificationMetrics": _reflection.GeneratedProtocolMessageType( "AggregateClassificationMetrics", (_message.Message,), - dict( - DESCRIPTOR=_MODEL_AGGREGATECLASSIFICATIONMETRICS, - __module__="google.cloud.bigquery_v2.proto.model_pb2", - __doc__="""Aggregate metrics for classification/classifier models. - For multi-class models, the metrics are either macro-averaged or - micro-averaged. When macro-averaged, the metrics are calculated for each - label and then an unweighted average is taken of those values. When - micro-averaged, the metric is calculated globally by counting the total + { + "DESCRIPTOR": _MODEL_AGGREGATECLASSIFICATIONMETRICS, + "__module__": "google.cloud.bigquery_v2.proto.model_pb2", + "__doc__": """Aggregate metrics for classification/classifier models. For multi- + class models, the metrics are either macro-averaged or micro-averaged. + When macro-averaged, the metrics are calculated for each label and + then an unweighted average is taken of those values. When micro- + averaged, the metric is calculated globally by counting the total number of correctly predicted rows. - Attributes: precision: Precision is the fraction of actual positive predictions that @@ -3371,20 +3598,19 @@ averaged metric. """, # @@protoc_insertion_point(class_scope:google.cloud.bigquery.v2.Model.AggregateClassificationMetrics) - ), + }, ), - BinaryClassificationMetrics=_reflection.GeneratedProtocolMessageType( + "BinaryClassificationMetrics": _reflection.GeneratedProtocolMessageType( "BinaryClassificationMetrics", (_message.Message,), - dict( - BinaryConfusionMatrix=_reflection.GeneratedProtocolMessageType( + { + "BinaryConfusionMatrix": _reflection.GeneratedProtocolMessageType( "BinaryConfusionMatrix", (_message.Message,), - dict( - DESCRIPTOR=_MODEL_BINARYCLASSIFICATIONMETRICS_BINARYCONFUSIONMATRIX, - __module__="google.cloud.bigquery_v2.proto.model_pb2", - __doc__="""Confusion matrix for binary classification models. - + { + "DESCRIPTOR": _MODEL_BINARYCLASSIFICATIONMETRICS_BINARYCONFUSIONMATRIX, + "__module__": "google.cloud.bigquery_v2.proto.model_pb2", + "__doc__": """Confusion matrix for binary classification models. Attributes: positive_class_threshold: @@ -3410,13 +3636,11 @@ The fraction of predictions given the correct label. """, # @@protoc_insertion_point(class_scope:google.cloud.bigquery.v2.Model.BinaryClassificationMetrics.BinaryConfusionMatrix) - ), + }, ), - DESCRIPTOR=_MODEL_BINARYCLASSIFICATIONMETRICS, - __module__="google.cloud.bigquery_v2.proto.model_pb2", - __doc__="""Evaluation metrics for binary classification/classifier - models. - + "DESCRIPTOR": _MODEL_BINARYCLASSIFICATIONMETRICS, + "__module__": "google.cloud.bigquery_v2.proto.model_pb2", + "__doc__": """Evaluation metrics for binary classification/classifier models. Attributes: aggregate_classification_metrics: @@ -3429,44 +3653,42 @@ Label representing the negative class. """, # @@protoc_insertion_point(class_scope:google.cloud.bigquery.v2.Model.BinaryClassificationMetrics) - ), + }, ), - MultiClassClassificationMetrics=_reflection.GeneratedProtocolMessageType( + "MultiClassClassificationMetrics": _reflection.GeneratedProtocolMessageType( "MultiClassClassificationMetrics", (_message.Message,), - dict( - ConfusionMatrix=_reflection.GeneratedProtocolMessageType( + { + "ConfusionMatrix": _reflection.GeneratedProtocolMessageType( "ConfusionMatrix", (_message.Message,), - dict( - Entry=_reflection.GeneratedProtocolMessageType( + { + "Entry": _reflection.GeneratedProtocolMessageType( "Entry", (_message.Message,), - dict( - DESCRIPTOR=_MODEL_MULTICLASSCLASSIFICATIONMETRICS_CONFUSIONMATRIX_ENTRY, - __module__="google.cloud.bigquery_v2.proto.model_pb2", - __doc__="""A single entry in the confusion matrix. - + { + "DESCRIPTOR": _MODEL_MULTICLASSCLASSIFICATIONMETRICS_CONFUSIONMATRIX_ENTRY, + "__module__": "google.cloud.bigquery_v2.proto.model_pb2", + "__doc__": """A single entry in the confusion matrix. Attributes: predicted_label: - The predicted label. For confidence\_threshold > 0, we will + The predicted label. For confidence_threshold > 0, we will also add an entry indicating the number of items under the confidence threshold. item_count: Number of items being predicted as this label. """, # @@protoc_insertion_point(class_scope:google.cloud.bigquery.v2.Model.MultiClassClassificationMetrics.ConfusionMatrix.Entry) - ), + }, ), - Row=_reflection.GeneratedProtocolMessageType( + "Row": _reflection.GeneratedProtocolMessageType( "Row", (_message.Message,), - dict( - DESCRIPTOR=_MODEL_MULTICLASSCLASSIFICATIONMETRICS_CONFUSIONMATRIX_ROW, - __module__="google.cloud.bigquery_v2.proto.model_pb2", - __doc__="""A single row in the confusion matrix. - + { + "DESCRIPTOR": _MODEL_MULTICLASSCLASSIFICATIONMETRICS_CONFUSIONMATRIX_ROW, + "__module__": "google.cloud.bigquery_v2.proto.model_pb2", + "__doc__": """A single row in the confusion matrix. Attributes: actual_label: @@ -3475,12 +3697,11 @@ Info describing predicted label distribution. """, # @@protoc_insertion_point(class_scope:google.cloud.bigquery.v2.Model.MultiClassClassificationMetrics.ConfusionMatrix.Row) - ), + }, ), - DESCRIPTOR=_MODEL_MULTICLASSCLASSIFICATIONMETRICS_CONFUSIONMATRIX, - __module__="google.cloud.bigquery_v2.proto.model_pb2", - __doc__="""Confusion matrix for multi-class classification models. - + "DESCRIPTOR": _MODEL_MULTICLASSCLASSIFICATIONMETRICS_CONFUSIONMATRIX, + "__module__": "google.cloud.bigquery_v2.proto.model_pb2", + "__doc__": """Confusion matrix for multi-class classification models. Attributes: confidence_threshold: @@ -3490,13 +3711,11 @@ One row per actual label. """, # @@protoc_insertion_point(class_scope:google.cloud.bigquery.v2.Model.MultiClassClassificationMetrics.ConfusionMatrix) - ), + }, ), - DESCRIPTOR=_MODEL_MULTICLASSCLASSIFICATIONMETRICS, - __module__="google.cloud.bigquery_v2.proto.model_pb2", - __doc__="""Evaluation metrics for multi-class - classification/classifier models. - + "DESCRIPTOR": _MODEL_MULTICLASSCLASSIFICATIONMETRICS, + "__module__": "google.cloud.bigquery_v2.proto.model_pb2", + "__doc__": """Evaluation metrics for multi-class classification/classifier models. Attributes: aggregate_classification_metrics: @@ -3505,33 +3724,31 @@ Confusion matrix at different thresholds. """, # @@protoc_insertion_point(class_scope:google.cloud.bigquery.v2.Model.MultiClassClassificationMetrics) - ), + }, ), - ClusteringMetrics=_reflection.GeneratedProtocolMessageType( + "ClusteringMetrics": _reflection.GeneratedProtocolMessageType( "ClusteringMetrics", (_message.Message,), - dict( - Cluster=_reflection.GeneratedProtocolMessageType( + { + "Cluster": _reflection.GeneratedProtocolMessageType( "Cluster", (_message.Message,), - dict( - FeatureValue=_reflection.GeneratedProtocolMessageType( + { + "FeatureValue": _reflection.GeneratedProtocolMessageType( "FeatureValue", (_message.Message,), - dict( - CategoricalValue=_reflection.GeneratedProtocolMessageType( + { + "CategoricalValue": _reflection.GeneratedProtocolMessageType( "CategoricalValue", (_message.Message,), - dict( - CategoryCount=_reflection.GeneratedProtocolMessageType( + { + "CategoryCount": _reflection.GeneratedProtocolMessageType( "CategoryCount", (_message.Message,), - dict( - DESCRIPTOR=_MODEL_CLUSTERINGMETRICS_CLUSTER_FEATUREVALUE_CATEGORICALVALUE_CATEGORYCOUNT, - __module__="google.cloud.bigquery_v2.proto.model_pb2", - __doc__="""Represents the count of a single category within the - cluster. - + { + "DESCRIPTOR": _MODEL_CLUSTERINGMETRICS_CLUSTER_FEATUREVALUE_CATEGORICALVALUE_CATEGORYCOUNT, + "__module__": "google.cloud.bigquery_v2.proto.model_pb2", + "__doc__": """Represents the count of a single category within the cluster. Attributes: category: @@ -3541,27 +3758,25 @@ cluster. """, # @@protoc_insertion_point(class_scope:google.cloud.bigquery.v2.Model.ClusteringMetrics.Cluster.FeatureValue.CategoricalValue.CategoryCount) - ), + }, ), - DESCRIPTOR=_MODEL_CLUSTERINGMETRICS_CLUSTER_FEATUREVALUE_CATEGORICALVALUE, - __module__="google.cloud.bigquery_v2.proto.model_pb2", - __doc__="""Representative value of a categorical feature. - + "DESCRIPTOR": _MODEL_CLUSTERINGMETRICS_CLUSTER_FEATUREVALUE_CATEGORICALVALUE, + "__module__": "google.cloud.bigquery_v2.proto.model_pb2", + "__doc__": """Representative value of a categorical feature. Attributes: category_counts: Counts of all categories for the categorical feature. If there are more than ten categories, we return top ten (by count) and - return one more CategoryCount with category "*OTHER*" and + return one more CategoryCount with category ``*OTHER*`` and count as aggregate counts of remaining categories. """, # @@protoc_insertion_point(class_scope:google.cloud.bigquery.v2.Model.ClusteringMetrics.Cluster.FeatureValue.CategoricalValue) - ), + }, ), - DESCRIPTOR=_MODEL_CLUSTERINGMETRICS_CLUSTER_FEATUREVALUE, - __module__="google.cloud.bigquery_v2.proto.model_pb2", - __doc__="""Representative value of a single feature within the cluster. - + "DESCRIPTOR": _MODEL_CLUSTERINGMETRICS_CLUSTER_FEATUREVALUE, + "__module__": "google.cloud.bigquery_v2.proto.model_pb2", + "__doc__": """Representative value of a single feature within the cluster. Attributes: feature_column: @@ -3573,12 +3788,11 @@ The categorical feature value. """, # @@protoc_insertion_point(class_scope:google.cloud.bigquery.v2.Model.ClusteringMetrics.Cluster.FeatureValue) - ), + }, ), - DESCRIPTOR=_MODEL_CLUSTERINGMETRICS_CLUSTER, - __module__="google.cloud.bigquery_v2.proto.model_pb2", - __doc__="""Message containing the information about one cluster. - + "DESCRIPTOR": _MODEL_CLUSTERINGMETRICS_CLUSTER, + "__module__": "google.cloud.bigquery_v2.proto.model_pb2", + "__doc__": """Message containing the information about one cluster. Attributes: centroid_id: @@ -3590,12 +3804,11 @@ cluster. """, # @@protoc_insertion_point(class_scope:google.cloud.bigquery.v2.Model.ClusteringMetrics.Cluster) - ), + }, ), - DESCRIPTOR=_MODEL_CLUSTERINGMETRICS, - __module__="google.cloud.bigquery_v2.proto.model_pb2", - __doc__="""Evaluation metrics for clustering models. - + "DESCRIPTOR": _MODEL_CLUSTERINGMETRICS, + "__module__": "google.cloud.bigquery_v2.proto.model_pb2", + "__doc__": """Evaluation metrics for clustering models. Attributes: davies_bouldin_index: @@ -3607,18 +3820,17 @@ [Beta] Information for all clusters. """, # @@protoc_insertion_point(class_scope:google.cloud.bigquery.v2.Model.ClusteringMetrics) - ), + }, ), - EvaluationMetrics=_reflection.GeneratedProtocolMessageType( + "EvaluationMetrics": _reflection.GeneratedProtocolMessageType( "EvaluationMetrics", (_message.Message,), - dict( - DESCRIPTOR=_MODEL_EVALUATIONMETRICS, - __module__="google.cloud.bigquery_v2.proto.model_pb2", - __doc__="""Evaluation metrics of a model. These are either computed - on all training data or just the eval data based on whether eval data - was used during training. These are not present for imported models. - + { + "DESCRIPTOR": _MODEL_EVALUATIONMETRICS, + "__module__": "google.cloud.bigquery_v2.proto.model_pb2", + "__doc__": """Evaluation metrics of a model. These are either computed on all + training data or just the eval data based on whether eval data was + used during training. These are not present for imported models. Attributes: regression_metrics: @@ -3632,28 +3844,28 @@ Populated for clustering models. """, # @@protoc_insertion_point(class_scope:google.cloud.bigquery.v2.Model.EvaluationMetrics) - ), + }, ), - TrainingRun=_reflection.GeneratedProtocolMessageType( + "TrainingRun": _reflection.GeneratedProtocolMessageType( "TrainingRun", (_message.Message,), - dict( - TrainingOptions=_reflection.GeneratedProtocolMessageType( + { + "TrainingOptions": _reflection.GeneratedProtocolMessageType( "TrainingOptions", (_message.Message,), - dict( - LabelClassWeightsEntry=_reflection.GeneratedProtocolMessageType( + { + "LabelClassWeightsEntry": _reflection.GeneratedProtocolMessageType( "LabelClassWeightsEntry", (_message.Message,), - dict( - DESCRIPTOR=_MODEL_TRAININGRUN_TRAININGOPTIONS_LABELCLASSWEIGHTSENTRY, - __module__="google.cloud.bigquery_v2.proto.model_pb2" + { + "DESCRIPTOR": _MODEL_TRAININGRUN_TRAININGOPTIONS_LABELCLASSWEIGHTSENTRY, + "__module__": "google.cloud.bigquery_v2.proto.model_pb2" # @@protoc_insertion_point(class_scope:google.cloud.bigquery.v2.Model.TrainingRun.TrainingOptions.LabelClassWeightsEntry) - ), + }, ), - DESCRIPTOR=_MODEL_TRAININGRUN_TRAININGOPTIONS, - __module__="google.cloud.bigquery_v2.proto.model_pb2", - __doc__="""Protocol buffer. + "DESCRIPTOR": _MODEL_TRAININGRUN_TRAININGOPTIONS, + "__module__": "google.cloud.bigquery_v2.proto.model_pb2", + "__doc__": """Protocol buffer. Attributes: max_iterations: @@ -3669,31 +3881,31 @@ l2_regularization: L2 regularization coefficient. min_relative_progress: - When early\_stop is true, stops training when accuracy - improvement is less than 'min\_relative\_progress'. Used only + When early_stop is true, stops training when accuracy + improvement is less than ‘min_relative_progress’. Used only for iterative training algorithms. warm_start: Whether to train a model from the last checkpoint. early_stop: - Whether to stop early when the loss doesn't improve - significantly any more (compared to min\_relative\_progress). + Whether to stop early when the loss doesn’t improve + significantly any more (compared to min_relative_progress). Used only for iterative training algorithms. input_label_columns: Name of input label columns in training data. data_split_method: - The data split type for training and evaluation, e.g. RANDOM. + The data split type for training and evaluation, e.g. RANDOM. data_split_eval_fraction: The fraction of evaluation data over the whole input data. The rest of data will be used as training data. The format should be double. Accurate to two decimal places. Default value is 0.2. data_split_column: - The column to split data with. This column won't be used as a - feature. 1. When data\_split\_method is CUSTOM, the + The column to split data with. This column won’t be used as a + feature. 1. When data_split_method is CUSTOM, the corresponding column should be boolean. The rows with true value tag are eval data, and the false are training data. 2. - When data\_split\_method is SEQ, the first - DATA\_SPLIT\_EVAL\_FRACTION rows (from smallest to largest) in + When data_split_method is SEQ, the first + DATA_SPLIT_EVAL_FRACTION rows (from smallest to largest) in the corresponding column are used as training data, and the rest are eval data. It respects the order in Orderable data types: @@ -3722,23 +3934,22 @@ algorithm. kmeans_initialization_column: The column used to provide the initial centroids for kmeans - algorithm when kmeans\_initialization\_method is CUSTOM. + algorithm when kmeans_initialization_method is CUSTOM. """, # @@protoc_insertion_point(class_scope:google.cloud.bigquery.v2.Model.TrainingRun.TrainingOptions) - ), + }, ), - IterationResult=_reflection.GeneratedProtocolMessageType( + "IterationResult": _reflection.GeneratedProtocolMessageType( "IterationResult", (_message.Message,), - dict( - ClusterInfo=_reflection.GeneratedProtocolMessageType( + { + "ClusterInfo": _reflection.GeneratedProtocolMessageType( "ClusterInfo", (_message.Message,), - dict( - DESCRIPTOR=_MODEL_TRAININGRUN_ITERATIONRESULT_CLUSTERINFO, - __module__="google.cloud.bigquery_v2.proto.model_pb2", - __doc__="""Information about a single cluster for clustering model. - + { + "DESCRIPTOR": _MODEL_TRAININGRUN_ITERATIONRESULT_CLUSTERINFO, + "__module__": "google.cloud.bigquery_v2.proto.model_pb2", + "__doc__": """Information about a single cluster for clustering model. Attributes: centroid_id: @@ -3751,12 +3962,11 @@ cluster. """, # @@protoc_insertion_point(class_scope:google.cloud.bigquery.v2.Model.TrainingRun.IterationResult.ClusterInfo) - ), + }, ), - DESCRIPTOR=_MODEL_TRAININGRUN_ITERATIONRESULT, - __module__="google.cloud.bigquery_v2.proto.model_pb2", - __doc__="""Information about a single iteration of the training run. - + "DESCRIPTOR": _MODEL_TRAININGRUN_ITERATIONRESULT, + "__module__": "google.cloud.bigquery_v2.proto.model_pb2", + "__doc__": """Information about a single iteration of the training run. Attributes: index: @@ -3773,13 +3983,11 @@ Information about top clusters for clustering models. """, # @@protoc_insertion_point(class_scope:google.cloud.bigquery.v2.Model.TrainingRun.IterationResult) - ), + }, ), - DESCRIPTOR=_MODEL_TRAININGRUN, - __module__="google.cloud.bigquery_v2.proto.model_pb2", - __doc__="""Information about a single training query run for the - model. - + "DESCRIPTOR": _MODEL_TRAININGRUN, + "__module__": "google.cloud.bigquery_v2.proto.model_pb2", + "__doc__": """Information about a single training query run for the model. Attributes: training_options: @@ -3789,26 +3997,26 @@ The start time of this training run. results: Output of each iteration run, results.size() <= - max\_iterations. + max_iterations. evaluation_metrics: The evaluation metrics over training/eval data that were computed at the end of training. """, # @@protoc_insertion_point(class_scope:google.cloud.bigquery.v2.Model.TrainingRun) - ), + }, ), - LabelsEntry=_reflection.GeneratedProtocolMessageType( + "LabelsEntry": _reflection.GeneratedProtocolMessageType( "LabelsEntry", (_message.Message,), - dict( - DESCRIPTOR=_MODEL_LABELSENTRY, - __module__="google.cloud.bigquery_v2.proto.model_pb2" + { + "DESCRIPTOR": _MODEL_LABELSENTRY, + "__module__": "google.cloud.bigquery_v2.proto.model_pb2" # @@protoc_insertion_point(class_scope:google.cloud.bigquery.v2.Model.LabelsEntry) - ), + }, ), - DESCRIPTOR=_MODEL, - __module__="google.cloud.bigquery_v2.proto.model_pb2", - __doc__="""Protocol buffer. + "DESCRIPTOR": _MODEL, + "__module__": "google.cloud.bigquery_v2.proto.model_pb2", + "__doc__": """Protocol buffer. Attributes: etag: @@ -3851,17 +4059,17 @@ Output only. Type of the model resource. training_runs: Output only. Information for all training runs in increasing - order of start\_time. + order of start_time. feature_columns: Output only. Input feature columns that were used to train this model. label_columns: Output only. Label columns that were used to train this model. - The output of the model will have a "predicted\_" prefix to + The output of the model will have a ``predicted\_`` prefix to these columns. """, # @@protoc_insertion_point(class_scope:google.cloud.bigquery.v2.Model) - ), + }, ) _sym_db.RegisterMessage(Model) _sym_db.RegisterMessage(Model.KmeansEnums) @@ -3891,10 +4099,10 @@ GetModelRequest = _reflection.GeneratedProtocolMessageType( "GetModelRequest", (_message.Message,), - dict( - DESCRIPTOR=_GETMODELREQUEST, - __module__="google.cloud.bigquery_v2.proto.model_pb2", - __doc__="""Protocol buffer. + { + "DESCRIPTOR": _GETMODELREQUEST, + "__module__": "google.cloud.bigquery_v2.proto.model_pb2", + "__doc__": """Protocol buffer. Attributes: project_id: @@ -3905,17 +4113,17 @@ Required. Model ID of the requested model. """, # @@protoc_insertion_point(class_scope:google.cloud.bigquery.v2.GetModelRequest) - ), + }, ) _sym_db.RegisterMessage(GetModelRequest) PatchModelRequest = _reflection.GeneratedProtocolMessageType( "PatchModelRequest", (_message.Message,), - dict( - DESCRIPTOR=_PATCHMODELREQUEST, - __module__="google.cloud.bigquery_v2.proto.model_pb2", - __doc__="""Protocol buffer. + { + "DESCRIPTOR": _PATCHMODELREQUEST, + "__module__": "google.cloud.bigquery_v2.proto.model_pb2", + "__doc__": """Protocol buffer. Attributes: project_id: @@ -3930,17 +4138,17 @@ set to default value. """, # @@protoc_insertion_point(class_scope:google.cloud.bigquery.v2.PatchModelRequest) - ), + }, ) _sym_db.RegisterMessage(PatchModelRequest) DeleteModelRequest = _reflection.GeneratedProtocolMessageType( "DeleteModelRequest", (_message.Message,), - dict( - DESCRIPTOR=_DELETEMODELREQUEST, - __module__="google.cloud.bigquery_v2.proto.model_pb2", - __doc__="""Protocol buffer. + { + "DESCRIPTOR": _DELETEMODELREQUEST, + "__module__": "google.cloud.bigquery_v2.proto.model_pb2", + "__doc__": """Protocol buffer. Attributes: project_id: @@ -3951,17 +4159,17 @@ Required. Model ID of the model to delete. """, # @@protoc_insertion_point(class_scope:google.cloud.bigquery.v2.DeleteModelRequest) - ), + }, ) _sym_db.RegisterMessage(DeleteModelRequest) ListModelsRequest = _reflection.GeneratedProtocolMessageType( "ListModelsRequest", (_message.Message,), - dict( - DESCRIPTOR=_LISTMODELSREQUEST, - __module__="google.cloud.bigquery_v2.proto.model_pb2", - __doc__="""Protocol buffer. + { + "DESCRIPTOR": _LISTMODELSREQUEST, + "__module__": "google.cloud.bigquery_v2.proto.model_pb2", + "__doc__": """Protocol buffer. Attributes: project_id: @@ -3977,28 +4185,28 @@ page of results """, # @@protoc_insertion_point(class_scope:google.cloud.bigquery.v2.ListModelsRequest) - ), + }, ) _sym_db.RegisterMessage(ListModelsRequest) ListModelsResponse = _reflection.GeneratedProtocolMessageType( "ListModelsResponse", (_message.Message,), - dict( - DESCRIPTOR=_LISTMODELSRESPONSE, - __module__="google.cloud.bigquery_v2.proto.model_pb2", - __doc__="""Protocol buffer. + { + "DESCRIPTOR": _LISTMODELSRESPONSE, + "__module__": "google.cloud.bigquery_v2.proto.model_pb2", + "__doc__": """Protocol buffer. Attributes: models: Models in the requested dataset. Only the following fields are - populated: model\_reference, model\_type, creation\_time, - last\_modified\_time and labels. + populated: model_reference, model_type, creation_time, + last_modified_time and labels. next_page_token: A token to request the next page of results. """, # @@protoc_insertion_point(class_scope:google.cloud.bigquery.v2.ListModelsResponse) - ), + }, ) _sym_db.RegisterMessage(ListModelsResponse) @@ -4036,9 +4244,8 @@ full_name="google.cloud.bigquery.v2.ModelService", file=DESCRIPTOR, index=0, - serialized_options=_b( - "\312A\027bigquery.googleapis.com\322A\302\001https://www.googleapis.com/auth/bigquery,https://www.googleapis.com/auth/bigquery.readonly,https://www.googleapis.com/auth/cloud-platform,https://www.googleapis.com/auth/cloud-platform.read-only" - ), + serialized_options=b"\312A\027bigquery.googleapis.com\322A\302\001https://www.googleapis.com/auth/bigquery,https://www.googleapis.com/auth/bigquery.readonly,https://www.googleapis.com/auth/cloud-platform,https://www.googleapis.com/auth/cloud-platform.read-only", + create_key=_descriptor._internal_create_key, serialized_start=7804, serialized_end=8566, methods=[ @@ -4049,7 +4256,8 @@ containing_service=None, input_type=_GETMODELREQUEST, output_type=_MODEL, - serialized_options=_b("\332A\036project_id,dataset_id,model_id"), + serialized_options=b"\332A\036project_id,dataset_id,model_id", + create_key=_descriptor._internal_create_key, ), _descriptor.MethodDescriptor( name="ListModels", @@ -4058,7 +4266,8 @@ containing_service=None, input_type=_LISTMODELSREQUEST, output_type=_LISTMODELSRESPONSE, - serialized_options=_b("\332A!project_id,dataset_id,max_results"), + serialized_options=b"\332A!project_id,dataset_id,max_results", + create_key=_descriptor._internal_create_key, ), _descriptor.MethodDescriptor( name="PatchModel", @@ -4067,7 +4276,8 @@ containing_service=None, input_type=_PATCHMODELREQUEST, output_type=_MODEL, - serialized_options=_b("\332A$project_id,dataset_id,model_id,model"), + serialized_options=b"\332A$project_id,dataset_id,model_id,model", + create_key=_descriptor._internal_create_key, ), _descriptor.MethodDescriptor( name="DeleteModel", @@ -4076,7 +4286,8 @@ containing_service=None, input_type=_DELETEMODELREQUEST, output_type=google_dot_protobuf_dot_empty__pb2._EMPTY, - serialized_options=_b("\332A\036project_id,dataset_id,model_id"), + serialized_options=b"\332A\036project_id,dataset_id,model_id", + create_key=_descriptor._internal_create_key, ), ], ) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery_v2/proto/model_pb2_grpc.py b/packages/google-cloud-bigquery/google/cloud/bigquery_v2/proto/model_pb2_grpc.py index 5abcdf0f2606..13db95717f60 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery_v2/proto/model_pb2_grpc.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery_v2/proto/model_pb2_grpc.py @@ -1,4 +1,5 @@ # Generated by the gRPC Python protocol compiler plugin. DO NOT EDIT! +"""Client and server classes corresponding to protobuf-defined services.""" import grpc from google.cloud.bigquery_v2.proto import ( @@ -8,15 +9,14 @@ class ModelServiceStub(object): - # missing associated documentation comment in .proto file - pass + """Missing associated documentation comment in .proto file.""" def __init__(self, channel): """Constructor. - Args: - channel: A grpc.Channel. - """ + Args: + channel: A grpc.Channel. + """ self.GetModel = channel.unary_unary( "/google.cloud.bigquery.v2.ModelService/GetModel", request_serializer=google_dot_cloud_dot_bigquery__v2_dot_proto_dot_model__pb2.GetModelRequest.SerializeToString, @@ -40,34 +40,33 @@ def __init__(self, channel): class ModelServiceServicer(object): - # missing associated documentation comment in .proto file - pass + """Missing associated documentation comment in .proto file.""" def GetModel(self, request, context): """Gets the specified model resource by model ID. - """ + """ context.set_code(grpc.StatusCode.UNIMPLEMENTED) context.set_details("Method not implemented!") raise NotImplementedError("Method not implemented!") def ListModels(self, request, context): """Lists all models in the specified dataset. Requires the READER dataset - role. - """ + role. + """ context.set_code(grpc.StatusCode.UNIMPLEMENTED) context.set_details("Method not implemented!") raise NotImplementedError("Method not implemented!") def PatchModel(self, request, context): """Patch specific fields in the specified model. - """ + """ context.set_code(grpc.StatusCode.UNIMPLEMENTED) context.set_details("Method not implemented!") raise NotImplementedError("Method not implemented!") def DeleteModel(self, request, context): """Deletes the model specified by modelId from the dataset. - """ + """ context.set_code(grpc.StatusCode.UNIMPLEMENTED) context.set_details("Method not implemented!") raise NotImplementedError("Method not implemented!") @@ -100,3 +99,116 @@ def add_ModelServiceServicer_to_server(servicer, server): "google.cloud.bigquery.v2.ModelService", rpc_method_handlers ) server.add_generic_rpc_handlers((generic_handler,)) + + +# This class is part of an EXPERIMENTAL API. +class ModelService(object): + """Missing associated documentation comment in .proto file.""" + + @staticmethod + def GetModel( + request, + target, + options=(), + channel_credentials=None, + call_credentials=None, + compression=None, + wait_for_ready=None, + timeout=None, + metadata=None, + ): + return grpc.experimental.unary_unary( + request, + target, + "/google.cloud.bigquery.v2.ModelService/GetModel", + google_dot_cloud_dot_bigquery__v2_dot_proto_dot_model__pb2.GetModelRequest.SerializeToString, + google_dot_cloud_dot_bigquery__v2_dot_proto_dot_model__pb2.Model.FromString, + options, + channel_credentials, + call_credentials, + compression, + wait_for_ready, + timeout, + metadata, + ) + + @staticmethod + def ListModels( + request, + target, + options=(), + channel_credentials=None, + call_credentials=None, + compression=None, + wait_for_ready=None, + timeout=None, + metadata=None, + ): + return grpc.experimental.unary_unary( + request, + target, + "/google.cloud.bigquery.v2.ModelService/ListModels", + google_dot_cloud_dot_bigquery__v2_dot_proto_dot_model__pb2.ListModelsRequest.SerializeToString, + google_dot_cloud_dot_bigquery__v2_dot_proto_dot_model__pb2.ListModelsResponse.FromString, + options, + channel_credentials, + call_credentials, + compression, + wait_for_ready, + timeout, + metadata, + ) + + @staticmethod + def PatchModel( + request, + target, + options=(), + channel_credentials=None, + call_credentials=None, + compression=None, + wait_for_ready=None, + timeout=None, + metadata=None, + ): + return grpc.experimental.unary_unary( + request, + target, + "/google.cloud.bigquery.v2.ModelService/PatchModel", + google_dot_cloud_dot_bigquery__v2_dot_proto_dot_model__pb2.PatchModelRequest.SerializeToString, + google_dot_cloud_dot_bigquery__v2_dot_proto_dot_model__pb2.Model.FromString, + options, + channel_credentials, + call_credentials, + compression, + wait_for_ready, + timeout, + metadata, + ) + + @staticmethod + def DeleteModel( + request, + target, + options=(), + channel_credentials=None, + call_credentials=None, + compression=None, + wait_for_ready=None, + timeout=None, + metadata=None, + ): + return grpc.experimental.unary_unary( + request, + target, + "/google.cloud.bigquery.v2.ModelService/DeleteModel", + google_dot_cloud_dot_bigquery__v2_dot_proto_dot_model__pb2.DeleteModelRequest.SerializeToString, + google_dot_protobuf_dot_empty__pb2.Empty.FromString, + options, + channel_credentials, + call_credentials, + compression, + wait_for_ready, + timeout, + metadata, + ) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery_v2/proto/model_reference_pb2.py b/packages/google-cloud-bigquery/google/cloud/bigquery_v2/proto/model_reference_pb2.py index 01e6e29522a5..2411c48632c8 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery_v2/proto/model_reference_pb2.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery_v2/proto/model_reference_pb2.py @@ -1,10 +1,7 @@ # -*- coding: utf-8 -*- # Generated by the protocol buffer compiler. DO NOT EDIT! # source: google/cloud/bigquery_v2/proto/model_reference.proto - -import sys - -_b = sys.version_info[0] < 3 and (lambda x: x) or (lambda x: x.encode("latin1")) +"""Generated protocol buffer code.""" from google.protobuf import descriptor as _descriptor from google.protobuf import message as _message from google.protobuf import reflection as _reflection @@ -23,12 +20,9 @@ name="google/cloud/bigquery_v2/proto/model_reference.proto", package="google.cloud.bigquery.v2", syntax="proto3", - serialized_options=_b( - "\n\034com.google.cloud.bigquery.v2B\023ModelReferenceProtoZ@google.golang.org/genproto/googleapis/cloud/bigquery/v2;bigquery" - ), - serialized_pb=_b( - '\n4google/cloud/bigquery_v2/proto/model_reference.proto\x12\x18google.cloud.bigquery.v2\x1a\x1fgoogle/api/field_behavior.proto\x1a\x1cgoogle/api/annotations.proto"Y\n\x0eModelReference\x12\x17\n\nproject_id\x18\x01 \x01(\tB\x03\xe0\x41\x02\x12\x17\n\ndataset_id\x18\x02 \x01(\tB\x03\xe0\x41\x02\x12\x15\n\x08model_id\x18\x03 \x01(\tB\x03\xe0\x41\x02\x42u\n\x1c\x63om.google.cloud.bigquery.v2B\x13ModelReferenceProtoZ@google.golang.org/genproto/googleapis/cloud/bigquery/v2;bigqueryb\x06proto3' - ), + serialized_options=b"\n\034com.google.cloud.bigquery.v2B\023ModelReferenceProtoZ@google.golang.org/genproto/googleapis/cloud/bigquery/v2;bigquery", + create_key=_descriptor._internal_create_key, + serialized_pb=b'\n4google/cloud/bigquery_v2/proto/model_reference.proto\x12\x18google.cloud.bigquery.v2\x1a\x1fgoogle/api/field_behavior.proto\x1a\x1cgoogle/api/annotations.proto"Y\n\x0eModelReference\x12\x17\n\nproject_id\x18\x01 \x01(\tB\x03\xe0\x41\x02\x12\x17\n\ndataset_id\x18\x02 \x01(\tB\x03\xe0\x41\x02\x12\x15\n\x08model_id\x18\x03 \x01(\tB\x03\xe0\x41\x02\x42u\n\x1c\x63om.google.cloud.bigquery.v2B\x13ModelReferenceProtoZ@google.golang.org/genproto/googleapis/cloud/bigquery/v2;bigqueryb\x06proto3', dependencies=[ google_dot_api_dot_field__behavior__pb2.DESCRIPTOR, google_dot_api_dot_annotations__pb2.DESCRIPTOR, @@ -42,6 +36,7 @@ filename=None, file=DESCRIPTOR, containing_type=None, + create_key=_descriptor._internal_create_key, fields=[ _descriptor.FieldDescriptor( name="project_id", @@ -52,14 +47,15 @@ cpp_type=9, label=1, has_default_value=False, - default_value=_b("").decode("utf-8"), + default_value=b"".decode("utf-8"), message_type=None, enum_type=None, containing_type=None, is_extension=False, extension_scope=None, - serialized_options=_b("\340A\002"), + serialized_options=b"\340A\002", file=DESCRIPTOR, + create_key=_descriptor._internal_create_key, ), _descriptor.FieldDescriptor( name="dataset_id", @@ -70,14 +66,15 @@ cpp_type=9, label=1, has_default_value=False, - default_value=_b("").decode("utf-8"), + default_value=b"".decode("utf-8"), message_type=None, enum_type=None, containing_type=None, is_extension=False, extension_scope=None, - serialized_options=_b("\340A\002"), + serialized_options=b"\340A\002", file=DESCRIPTOR, + create_key=_descriptor._internal_create_key, ), _descriptor.FieldDescriptor( name="model_id", @@ -88,14 +85,15 @@ cpp_type=9, label=1, has_default_value=False, - default_value=_b("").decode("utf-8"), + default_value=b"".decode("utf-8"), message_type=None, enum_type=None, containing_type=None, is_extension=False, extension_scope=None, - serialized_options=_b("\340A\002"), + serialized_options=b"\340A\002", file=DESCRIPTOR, + create_key=_descriptor._internal_create_key, ), ], extensions=[], @@ -116,11 +114,10 @@ ModelReference = _reflection.GeneratedProtocolMessageType( "ModelReference", (_message.Message,), - dict( - DESCRIPTOR=_MODELREFERENCE, - __module__="google.cloud.bigquery_v2.proto.model_reference_pb2", - __doc__="""Id path of a model. - + { + "DESCRIPTOR": _MODELREFERENCE, + "__module__": "google.cloud.bigquery_v2.proto.model_reference_pb2", + "__doc__": """Id path of a model. Attributes: project_id: @@ -129,11 +126,11 @@ Required. The ID of the dataset containing this model. model_id: Required. The ID of the model. The ID must contain only - letters (a-z, A-Z), numbers (0-9), or underscores (\_). The + letters (a-z, A-Z), numbers (0-9), or underscores (_). The maximum length is 1,024 characters. """, # @@protoc_insertion_point(class_scope:google.cloud.bigquery.v2.ModelReference) - ), + }, ) _sym_db.RegisterMessage(ModelReference) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery_v2/proto/model_reference_pb2_grpc.py b/packages/google-cloud-bigquery/google/cloud/bigquery_v2/proto/model_reference_pb2_grpc.py index 07cb78fe03a9..8a9393943bdf 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery_v2/proto/model_reference_pb2_grpc.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery_v2/proto/model_reference_pb2_grpc.py @@ -1,2 +1,3 @@ # Generated by the gRPC Python protocol compiler plugin. DO NOT EDIT! +"""Client and server classes corresponding to protobuf-defined services.""" import grpc diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery_v2/proto/standard_sql_pb2.py b/packages/google-cloud-bigquery/google/cloud/bigquery_v2/proto/standard_sql_pb2.py index ca02014057d2..bfe77f934338 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery_v2/proto/standard_sql_pb2.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery_v2/proto/standard_sql_pb2.py @@ -1,10 +1,7 @@ # -*- coding: utf-8 -*- # Generated by the protocol buffer compiler. DO NOT EDIT! # source: google/cloud/bigquery_v2/proto/standard_sql.proto - -import sys - -_b = sys.version_info[0] < 3 and (lambda x: x) or (lambda x: x.encode("latin1")) +"""Generated protocol buffer code.""" from google.protobuf import descriptor as _descriptor from google.protobuf import message as _message from google.protobuf import reflection as _reflection @@ -23,12 +20,9 @@ name="google/cloud/bigquery_v2/proto/standard_sql.proto", package="google.cloud.bigquery.v2", syntax="proto3", - serialized_options=_b( - "\n\034com.google.cloud.bigquery.v2B\020StandardSqlProtoZ@google.golang.org/genproto/googleapis/cloud/bigquery/v2;bigquery" - ), - serialized_pb=_b( - '\n1google/cloud/bigquery_v2/proto/standard_sql.proto\x12\x18google.cloud.bigquery.v2\x1a\x1fgoogle/api/field_behavior.proto\x1a\x1cgoogle/api/annotations.proto"\xcb\x03\n\x13StandardSqlDataType\x12N\n\ttype_kind\x18\x01 \x01(\x0e\x32\x36.google.cloud.bigquery.v2.StandardSqlDataType.TypeKindB\x03\xe0\x41\x02\x12K\n\x12\x61rray_element_type\x18\x02 \x01(\x0b\x32-.google.cloud.bigquery.v2.StandardSqlDataTypeH\x00\x12\x46\n\x0bstruct_type\x18\x03 \x01(\x0b\x32/.google.cloud.bigquery.v2.StandardSqlStructTypeH\x00"\xc2\x01\n\x08TypeKind\x12\x19\n\x15TYPE_KIND_UNSPECIFIED\x10\x00\x12\t\n\x05INT64\x10\x02\x12\x08\n\x04\x42OOL\x10\x05\x12\x0b\n\x07\x46LOAT64\x10\x07\x12\n\n\x06STRING\x10\x08\x12\t\n\x05\x42YTES\x10\t\x12\r\n\tTIMESTAMP\x10\x13\x12\x08\n\x04\x44\x41TE\x10\n\x12\x08\n\x04TIME\x10\x14\x12\x0c\n\x08\x44\x41TETIME\x10\x15\x12\r\n\tGEOGRAPHY\x10\x16\x12\x0b\n\x07NUMERIC\x10\x17\x12\t\n\x05\x41RRAY\x10\x10\x12\n\n\x06STRUCT\x10\x11\x42\n\n\x08sub_type"g\n\x10StandardSqlField\x12\x11\n\x04name\x18\x01 \x01(\tB\x03\xe0\x41\x01\x12@\n\x04type\x18\x02 \x01(\x0b\x32-.google.cloud.bigquery.v2.StandardSqlDataTypeB\x03\xe0\x41\x01"S\n\x15StandardSqlStructType\x12:\n\x06\x66ields\x18\x01 \x03(\x0b\x32*.google.cloud.bigquery.v2.StandardSqlFieldBr\n\x1c\x63om.google.cloud.bigquery.v2B\x10StandardSqlProtoZ@google.golang.org/genproto/googleapis/cloud/bigquery/v2;bigqueryb\x06proto3' - ), + serialized_options=b"\n\034com.google.cloud.bigquery.v2B\020StandardSqlProtoZ@google.golang.org/genproto/googleapis/cloud/bigquery/v2;bigquery", + create_key=_descriptor._internal_create_key, + serialized_pb=b'\n1google/cloud/bigquery_v2/proto/standard_sql.proto\x12\x18google.cloud.bigquery.v2\x1a\x1fgoogle/api/field_behavior.proto\x1a\x1cgoogle/api/annotations.proto"\xcb\x03\n\x13StandardSqlDataType\x12N\n\ttype_kind\x18\x01 \x01(\x0e\x32\x36.google.cloud.bigquery.v2.StandardSqlDataType.TypeKindB\x03\xe0\x41\x02\x12K\n\x12\x61rray_element_type\x18\x02 \x01(\x0b\x32-.google.cloud.bigquery.v2.StandardSqlDataTypeH\x00\x12\x46\n\x0bstruct_type\x18\x03 \x01(\x0b\x32/.google.cloud.bigquery.v2.StandardSqlStructTypeH\x00"\xc2\x01\n\x08TypeKind\x12\x19\n\x15TYPE_KIND_UNSPECIFIED\x10\x00\x12\t\n\x05INT64\x10\x02\x12\x08\n\x04\x42OOL\x10\x05\x12\x0b\n\x07\x46LOAT64\x10\x07\x12\n\n\x06STRING\x10\x08\x12\t\n\x05\x42YTES\x10\t\x12\r\n\tTIMESTAMP\x10\x13\x12\x08\n\x04\x44\x41TE\x10\n\x12\x08\n\x04TIME\x10\x14\x12\x0c\n\x08\x44\x41TETIME\x10\x15\x12\r\n\tGEOGRAPHY\x10\x16\x12\x0b\n\x07NUMERIC\x10\x17\x12\t\n\x05\x41RRAY\x10\x10\x12\n\n\x06STRUCT\x10\x11\x42\n\n\x08sub_type"g\n\x10StandardSqlField\x12\x11\n\x04name\x18\x01 \x01(\tB\x03\xe0\x41\x01\x12@\n\x04type\x18\x02 \x01(\x0b\x32-.google.cloud.bigquery.v2.StandardSqlDataTypeB\x03\xe0\x41\x01"S\n\x15StandardSqlStructType\x12:\n\x06\x66ields\x18\x01 \x03(\x0b\x32*.google.cloud.bigquery.v2.StandardSqlFieldBr\n\x1c\x63om.google.cloud.bigquery.v2B\x10StandardSqlProtoZ@google.golang.org/genproto/googleapis/cloud/bigquery/v2;bigqueryb\x06proto3', dependencies=[ google_dot_api_dot_field__behavior__pb2.DESCRIPTOR, google_dot_api_dot_annotations__pb2.DESCRIPTOR, @@ -41,6 +35,7 @@ full_name="google.cloud.bigquery.v2.StandardSqlDataType.TypeKind", filename=None, file=DESCRIPTOR, + create_key=_descriptor._internal_create_key, values=[ _descriptor.EnumValueDescriptor( name="TYPE_KIND_UNSPECIFIED", @@ -48,45 +43,111 @@ number=0, serialized_options=None, type=None, + create_key=_descriptor._internal_create_key, ), _descriptor.EnumValueDescriptor( - name="INT64", index=1, number=2, serialized_options=None, type=None + name="INT64", + index=1, + number=2, + serialized_options=None, + type=None, + create_key=_descriptor._internal_create_key, ), _descriptor.EnumValueDescriptor( - name="BOOL", index=2, number=5, serialized_options=None, type=None + name="BOOL", + index=2, + number=5, + serialized_options=None, + type=None, + create_key=_descriptor._internal_create_key, ), _descriptor.EnumValueDescriptor( - name="FLOAT64", index=3, number=7, serialized_options=None, type=None + name="FLOAT64", + index=3, + number=7, + serialized_options=None, + type=None, + create_key=_descriptor._internal_create_key, ), _descriptor.EnumValueDescriptor( - name="STRING", index=4, number=8, serialized_options=None, type=None + name="STRING", + index=4, + number=8, + serialized_options=None, + type=None, + create_key=_descriptor._internal_create_key, ), _descriptor.EnumValueDescriptor( - name="BYTES", index=5, number=9, serialized_options=None, type=None + name="BYTES", + index=5, + number=9, + serialized_options=None, + type=None, + create_key=_descriptor._internal_create_key, ), _descriptor.EnumValueDescriptor( - name="TIMESTAMP", index=6, number=19, serialized_options=None, type=None + name="TIMESTAMP", + index=6, + number=19, + serialized_options=None, + type=None, + create_key=_descriptor._internal_create_key, ), _descriptor.EnumValueDescriptor( - name="DATE", index=7, number=10, serialized_options=None, type=None + name="DATE", + index=7, + number=10, + serialized_options=None, + type=None, + create_key=_descriptor._internal_create_key, ), _descriptor.EnumValueDescriptor( - name="TIME", index=8, number=20, serialized_options=None, type=None + name="TIME", + index=8, + number=20, + serialized_options=None, + type=None, + create_key=_descriptor._internal_create_key, ), _descriptor.EnumValueDescriptor( - name="DATETIME", index=9, number=21, serialized_options=None, type=None + name="DATETIME", + index=9, + number=21, + serialized_options=None, + type=None, + create_key=_descriptor._internal_create_key, ), _descriptor.EnumValueDescriptor( - name="GEOGRAPHY", index=10, number=22, serialized_options=None, type=None + name="GEOGRAPHY", + index=10, + number=22, + serialized_options=None, + type=None, + create_key=_descriptor._internal_create_key, ), _descriptor.EnumValueDescriptor( - name="NUMERIC", index=11, number=23, serialized_options=None, type=None + name="NUMERIC", + index=11, + number=23, + serialized_options=None, + type=None, + create_key=_descriptor._internal_create_key, ), _descriptor.EnumValueDescriptor( - name="ARRAY", index=12, number=16, serialized_options=None, type=None + name="ARRAY", + index=12, + number=16, + serialized_options=None, + type=None, + create_key=_descriptor._internal_create_key, ), _descriptor.EnumValueDescriptor( - name="STRUCT", index=13, number=17, serialized_options=None, type=None + name="STRUCT", + index=13, + number=17, + serialized_options=None, + type=None, + create_key=_descriptor._internal_create_key, ), ], containing_type=None, @@ -103,6 +164,7 @@ filename=None, file=DESCRIPTOR, containing_type=None, + create_key=_descriptor._internal_create_key, fields=[ _descriptor.FieldDescriptor( name="type_kind", @@ -119,8 +181,9 @@ containing_type=None, is_extension=False, extension_scope=None, - serialized_options=_b("\340A\002"), + serialized_options=b"\340A\002", file=DESCRIPTOR, + create_key=_descriptor._internal_create_key, ), _descriptor.FieldDescriptor( name="array_element_type", @@ -139,6 +202,7 @@ extension_scope=None, serialized_options=None, file=DESCRIPTOR, + create_key=_descriptor._internal_create_key, ), _descriptor.FieldDescriptor( name="struct_type", @@ -157,6 +221,7 @@ extension_scope=None, serialized_options=None, file=DESCRIPTOR, + create_key=_descriptor._internal_create_key, ), ], extensions=[], @@ -172,6 +237,7 @@ full_name="google.cloud.bigquery.v2.StandardSqlDataType.sub_type", index=0, containing_type=None, + create_key=_descriptor._internal_create_key, fields=[], ), ], @@ -186,6 +252,7 @@ filename=None, file=DESCRIPTOR, containing_type=None, + create_key=_descriptor._internal_create_key, fields=[ _descriptor.FieldDescriptor( name="name", @@ -196,14 +263,15 @@ cpp_type=9, label=1, has_default_value=False, - default_value=_b("").decode("utf-8"), + default_value=b"".decode("utf-8"), message_type=None, enum_type=None, containing_type=None, is_extension=False, extension_scope=None, - serialized_options=_b("\340A\001"), + serialized_options=b"\340A\001", file=DESCRIPTOR, + create_key=_descriptor._internal_create_key, ), _descriptor.FieldDescriptor( name="type", @@ -220,8 +288,9 @@ containing_type=None, is_extension=False, extension_scope=None, - serialized_options=_b("\340A\001"), + serialized_options=b"\340A\001", file=DESCRIPTOR, + create_key=_descriptor._internal_create_key, ), ], extensions=[], @@ -243,6 +312,7 @@ filename=None, file=DESCRIPTOR, containing_type=None, + create_key=_descriptor._internal_create_key, fields=[ _descriptor.FieldDescriptor( name="fields", @@ -261,6 +331,7 @@ extension_scope=None, serialized_options=None, file=DESCRIPTOR, + create_key=_descriptor._internal_create_key, ), ], extensions=[], @@ -305,39 +376,37 @@ StandardSqlDataType = _reflection.GeneratedProtocolMessageType( "StandardSqlDataType", (_message.Message,), - dict( - DESCRIPTOR=_STANDARDSQLDATATYPE, - __module__="google.cloud.bigquery_v2.proto.standard_sql_pb2", - __doc__="""The type of a variable, e.g., a function argument. - Examples: INT64: {type\_kind="INT64"} ARRAY: {type\_kind="ARRAY", - array\_element\_type="STRING"} STRUCT: - {type\_kind="STRUCT", struct\_type={fields=[ {name="x", - type={type\_kind="STRING"}}, {name="y", type={type\_kind="ARRAY", - array\_element\_type="DATE"}} ]}} - + { + "DESCRIPTOR": _STANDARDSQLDATATYPE, + "__module__": "google.cloud.bigquery_v2.proto.standard_sql_pb2", + "__doc__": """The type of a variable, e.g., a function argument. Examples: INT64: + {type_kind=``INT64``} ARRAY: {type_kind=``ARRAY``, + array_element_type=``STRING``} STRUCT: + {type_kind=``STRUCT``, struct_type={fields=[ {name=``x``, + type={type_kind=``STRING``}}, {name=``y``, type={type_kind=``ARRAY``, + array_element_type=``DATE``}} ]}} Attributes: type_kind: Required. The top level type of this field. Can be any - standard SQL data type (e.g., "INT64", "DATE", "ARRAY"). + standard SQL data type (e.g., ``INT64``, ``DATE``, ``ARRAY``). array_element_type: - The type of the array's elements, if type\_kind = "ARRAY". + The type of the array’s elements, if type_kind = ``ARRAY``. struct_type: - The fields of this struct, in order, if type\_kind = "STRUCT". + The fields of this struct, in order, if type_kind = ``STRUCT``. """, # @@protoc_insertion_point(class_scope:google.cloud.bigquery.v2.StandardSqlDataType) - ), + }, ) _sym_db.RegisterMessage(StandardSqlDataType) StandardSqlField = _reflection.GeneratedProtocolMessageType( "StandardSqlField", (_message.Message,), - dict( - DESCRIPTOR=_STANDARDSQLFIELD, - __module__="google.cloud.bigquery_v2.proto.standard_sql_pb2", - __doc__="""A field or a column. - + { + "DESCRIPTOR": _STANDARDSQLFIELD, + "__module__": "google.cloud.bigquery_v2.proto.standard_sql_pb2", + "__doc__": """A field or a column. Attributes: name: @@ -347,21 +416,21 @@ Optional. The type of this parameter. Absent if not explicitly specified (e.g., CREATE FUNCTION statement can omit the return type; in this case the output parameter does not have this - "type" field). + ``type`` field). """, # @@protoc_insertion_point(class_scope:google.cloud.bigquery.v2.StandardSqlField) - ), + }, ) _sym_db.RegisterMessage(StandardSqlField) StandardSqlStructType = _reflection.GeneratedProtocolMessageType( "StandardSqlStructType", (_message.Message,), - dict( - DESCRIPTOR=_STANDARDSQLSTRUCTTYPE, - __module__="google.cloud.bigquery_v2.proto.standard_sql_pb2" + { + "DESCRIPTOR": _STANDARDSQLSTRUCTTYPE, + "__module__": "google.cloud.bigquery_v2.proto.standard_sql_pb2" # @@protoc_insertion_point(class_scope:google.cloud.bigquery.v2.StandardSqlStructType) - ), + }, ) _sym_db.RegisterMessage(StandardSqlStructType) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery_v2/proto/standard_sql_pb2_grpc.py b/packages/google-cloud-bigquery/google/cloud/bigquery_v2/proto/standard_sql_pb2_grpc.py index 07cb78fe03a9..8a9393943bdf 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery_v2/proto/standard_sql_pb2_grpc.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery_v2/proto/standard_sql_pb2_grpc.py @@ -1,2 +1,3 @@ # Generated by the gRPC Python protocol compiler plugin. DO NOT EDIT! +"""Client and server classes corresponding to protobuf-defined services.""" import grpc diff --git a/packages/google-cloud-bigquery/scripts/decrypt-secrets.sh b/packages/google-cloud-bigquery/scripts/decrypt-secrets.sh new file mode 100755 index 000000000000..ff599eb2af25 --- /dev/null +++ b/packages/google-cloud-bigquery/scripts/decrypt-secrets.sh @@ -0,0 +1,33 @@ +#!/bin/bash + +# Copyright 2015 Google Inc. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" +ROOT=$( dirname "$DIR" ) + +# Work from the project root. +cd $ROOT + +# Use SECRET_MANAGER_PROJECT if set, fallback to cloud-devrel-kokoro-resources. +PROJECT_ID="${SECRET_MANAGER_PROJECT:-cloud-devrel-kokoro-resources}" + +gcloud secrets versions access latest --secret="python-docs-samples-test-env" \ + > testing/test-env.sh +gcloud secrets versions access latest \ + --secret="python-docs-samples-service-account" \ + > testing/service-account.json +gcloud secrets versions access latest \ + --secret="python-docs-samples-client-secrets" \ + > testing/client-secrets.json \ No newline at end of file diff --git a/packages/google-cloud-bigquery/scripts/readme-gen/readme_gen.py b/packages/google-cloud-bigquery/scripts/readme-gen/readme_gen.py new file mode 100644 index 000000000000..d309d6e97518 --- /dev/null +++ b/packages/google-cloud-bigquery/scripts/readme-gen/readme_gen.py @@ -0,0 +1,66 @@ +#!/usr/bin/env python + +# Copyright 2016 Google Inc +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Generates READMEs using configuration defined in yaml.""" + +import argparse +import io +import os +import subprocess + +import jinja2 +import yaml + + +jinja_env = jinja2.Environment( + trim_blocks=True, + loader=jinja2.FileSystemLoader( + os.path.abspath(os.path.join(os.path.dirname(__file__), 'templates')))) + +README_TMPL = jinja_env.get_template('README.tmpl.rst') + + +def get_help(file): + return subprocess.check_output(['python', file, '--help']).decode() + + +def main(): + parser = argparse.ArgumentParser() + parser.add_argument('source') + parser.add_argument('--destination', default='README.rst') + + args = parser.parse_args() + + source = os.path.abspath(args.source) + root = os.path.dirname(source) + destination = os.path.join(root, args.destination) + + jinja_env.globals['get_help'] = get_help + + with io.open(source, 'r') as f: + config = yaml.load(f) + + # This allows get_help to execute in the right directory. + os.chdir(root) + + output = README_TMPL.render(config) + + with io.open(destination, 'w') as f: + f.write(output) + + +if __name__ == '__main__': + main() diff --git a/packages/google-cloud-bigquery/scripts/readme-gen/templates/README.tmpl.rst b/packages/google-cloud-bigquery/scripts/readme-gen/templates/README.tmpl.rst new file mode 100644 index 000000000000..4fd239765b0a --- /dev/null +++ b/packages/google-cloud-bigquery/scripts/readme-gen/templates/README.tmpl.rst @@ -0,0 +1,87 @@ +{# The following line is a lie. BUT! Once jinja2 is done with it, it will + become truth! #} +.. This file is automatically generated. Do not edit this file directly. + +{{product.name}} Python Samples +=============================================================================== + +.. image:: https://gstatic.com/cloudssh/images/open-btn.png + :target: https://console.cloud.google.com/cloudshell/open?git_repo=https://github.com/GoogleCloudPlatform/python-docs-samples&page=editor&open_in_editor={{folder}}/README.rst + + +This directory contains samples for {{product.name}}. {{product.description}} + +{{description}} + +.. _{{product.name}}: {{product.url}} + +{% if required_api_url %} +To run the sample, you need to enable the API at: {{required_api_url}} +{% endif %} + +{% if required_role %} +To run the sample, you need to have `{{required_role}}` role. +{% endif %} + +{{other_required_steps}} + +{% if setup %} +Setup +------------------------------------------------------------------------------- + +{% for section in setup %} + +{% include section + '.tmpl.rst' %} + +{% endfor %} +{% endif %} + +{% if samples %} +Samples +------------------------------------------------------------------------------- + +{% for sample in samples %} +{{sample.name}} ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + +{% if not sample.hide_cloudshell_button %} +.. image:: https://gstatic.com/cloudssh/images/open-btn.png + :target: https://console.cloud.google.com/cloudshell/open?git_repo=https://github.com/GoogleCloudPlatform/python-docs-samples&page=editor&open_in_editor={{folder}}/{{sample.file}},{{folder}}/README.rst +{% endif %} + + +{{sample.description}} + +To run this sample: + +.. code-block:: bash + + $ python {{sample.file}} +{% if sample.show_help %} + + {{get_help(sample.file)|indent}} +{% endif %} + + +{% endfor %} +{% endif %} + +{% if cloud_client_library %} + +The client library +------------------------------------------------------------------------------- + +This sample uses the `Google Cloud Client Library for Python`_. +You can read the documentation for more details on API usage and use GitHub +to `browse the source`_ and `report issues`_. + +.. _Google Cloud Client Library for Python: + https://googlecloudplatform.github.io/google-cloud-python/ +.. _browse the source: + https://github.com/GoogleCloudPlatform/google-cloud-python +.. _report issues: + https://github.com/GoogleCloudPlatform/google-cloud-python/issues + +{% endif %} + +.. _Google Cloud SDK: https://cloud.google.com/sdk/ \ No newline at end of file diff --git a/packages/google-cloud-bigquery/scripts/readme-gen/templates/auth.tmpl.rst b/packages/google-cloud-bigquery/scripts/readme-gen/templates/auth.tmpl.rst new file mode 100644 index 000000000000..1446b94a5e3a --- /dev/null +++ b/packages/google-cloud-bigquery/scripts/readme-gen/templates/auth.tmpl.rst @@ -0,0 +1,9 @@ +Authentication +++++++++++++++ + +This sample requires you to have authentication setup. Refer to the +`Authentication Getting Started Guide`_ for instructions on setting up +credentials for applications. + +.. _Authentication Getting Started Guide: + https://cloud.google.com/docs/authentication/getting-started diff --git a/packages/google-cloud-bigquery/scripts/readme-gen/templates/auth_api_key.tmpl.rst b/packages/google-cloud-bigquery/scripts/readme-gen/templates/auth_api_key.tmpl.rst new file mode 100644 index 000000000000..11957ce2714a --- /dev/null +++ b/packages/google-cloud-bigquery/scripts/readme-gen/templates/auth_api_key.tmpl.rst @@ -0,0 +1,14 @@ +Authentication +++++++++++++++ + +Authentication for this service is done via an `API Key`_. To obtain an API +Key: + +1. Open the `Cloud Platform Console`_ +2. Make sure that billing is enabled for your project. +3. From the **Credentials** page, create a new **API Key** or use an existing + one for your project. + +.. _API Key: + https://developers.google.com/api-client-library/python/guide/aaa_apikeys +.. _Cloud Console: https://console.cloud.google.com/project?_ diff --git a/packages/google-cloud-bigquery/scripts/readme-gen/templates/install_deps.tmpl.rst b/packages/google-cloud-bigquery/scripts/readme-gen/templates/install_deps.tmpl.rst new file mode 100644 index 000000000000..a0406dba8c84 --- /dev/null +++ b/packages/google-cloud-bigquery/scripts/readme-gen/templates/install_deps.tmpl.rst @@ -0,0 +1,29 @@ +Install Dependencies +++++++++++++++++++++ + +#. Clone python-docs-samples and change directory to the sample directory you want to use. + + .. code-block:: bash + + $ git clone https://github.com/GoogleCloudPlatform/python-docs-samples.git + +#. Install `pip`_ and `virtualenv`_ if you do not already have them. You may want to refer to the `Python Development Environment Setup Guide`_ for Google Cloud Platform for instructions. + + .. _Python Development Environment Setup Guide: + https://cloud.google.com/python/setup + +#. Create a virtualenv. Samples are compatible with Python 2.7 and 3.4+. + + .. code-block:: bash + + $ virtualenv env + $ source env/bin/activate + +#. Install the dependencies needed to run the samples. + + .. code-block:: bash + + $ pip install -r requirements.txt + +.. _pip: https://pip.pypa.io/ +.. _virtualenv: https://virtualenv.pypa.io/ diff --git a/packages/google-cloud-bigquery/scripts/readme-gen/templates/install_portaudio.tmpl.rst b/packages/google-cloud-bigquery/scripts/readme-gen/templates/install_portaudio.tmpl.rst new file mode 100644 index 000000000000..5ea33d18c00c --- /dev/null +++ b/packages/google-cloud-bigquery/scripts/readme-gen/templates/install_portaudio.tmpl.rst @@ -0,0 +1,35 @@ +Install PortAudio ++++++++++++++++++ + +Install `PortAudio`_. This is required by the `PyAudio`_ library to stream +audio from your computer's microphone. PyAudio depends on PortAudio for cross-platform compatibility, and is installed differently depending on the +platform. + +* For Mac OS X, you can use `Homebrew`_:: + + brew install portaudio + + **Note**: if you encounter an error when running `pip install` that indicates + it can't find `portaudio.h`, try running `pip install` with the following + flags:: + + pip install --global-option='build_ext' \ + --global-option='-I/usr/local/include' \ + --global-option='-L/usr/local/lib' \ + pyaudio + +* For Debian / Ubuntu Linux:: + + apt-get install portaudio19-dev python-all-dev + +* Windows may work without having to install PortAudio explicitly (it will get + installed with PyAudio). + +For more details, see the `PyAudio installation`_ page. + + +.. _PyAudio: https://people.csail.mit.edu/hubert/pyaudio/ +.. _PortAudio: http://www.portaudio.com/ +.. _PyAudio installation: + https://people.csail.mit.edu/hubert/pyaudio/#downloads +.. _Homebrew: http://brew.sh diff --git a/packages/google-cloud-bigquery/setup.cfg b/packages/google-cloud-bigquery/setup.cfg index 3bd555500e37..c3a2b39f6528 100644 --- a/packages/google-cloud-bigquery/setup.cfg +++ b/packages/google-cloud-bigquery/setup.cfg @@ -1,3 +1,19 @@ +# -*- coding: utf-8 -*- +# +# Copyright 2020 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + # Generated by synthtool. DO NOT EDIT! [bdist_wheel] universal = 1 diff --git a/packages/google-cloud-bigquery/synth.metadata b/packages/google-cloud-bigquery/synth.metadata index 86ecc1ffa60c..f131790f2b51 100644 --- a/packages/google-cloud-bigquery/synth.metadata +++ b/packages/google-cloud-bigquery/synth.metadata @@ -1,26 +1,25 @@ { - "updateTime": "2020-02-04T11:46:46.343511Z", "sources": [ { - "generator": { - "name": "artman", - "version": "0.44.4", - "dockerImage": "googleapis/artman@sha256:19e945954fc960a4bdfee6cb34695898ab21a8cf0bac063ee39b91f00a1faec8" + "git": { + "name": ".", + "remote": "git@github.com:googleapis/python-bigquery", + "sha": "0946a5c460b0d675f6dbe4f053a7801edba36443" } }, { "git": { "name": "googleapis", "remote": "https://github.com/googleapis/googleapis.git", - "sha": "69d9945330a5721cd679f17331a78850e2618226", - "internalRef": "293080182" + "sha": "e6ab0a55f2195169feded73dd684574dd4bd9dfa", + "internalRef": "319180144" } }, { - "template": { - "name": "python_split_library", - "origin": "synthtool.gcp", - "version": "2019.10.17" + "git": { + "name": "synthtool", + "remote": "https://github.com/googleapis/synthtool.git", + "sha": "303271797a360f8a439203413f13a160f2f5b3b4" } } ], @@ -31,8 +30,7 @@ "apiName": "bigquery", "apiVersion": "v2", "language": "python", - "generator": "gapic", - "config": "google/cloud/bigquery/artman_bigquery_v2.yaml" + "generator": "bazel" } } ] diff --git a/packages/google-cloud-bigquery/synth.py b/packages/google-cloud-bigquery/synth.py index 7fba81a5cabc..2bc3798ea444 100644 --- a/packages/google-cloud-bigquery/synth.py +++ b/packages/google-cloud-bigquery/synth.py @@ -59,7 +59,9 @@ # Add templated files # ---------------------------------------------------------------------------- templated_files = common.py_library(cov_level=100) -s.move(templated_files, excludes=["noxfile.py"]) + +# BigQuery has a custom multiprocessing note +s.move(templated_files, excludes=["noxfile.py", "docs/multiprocessing.rst"]) s.replace( "docs/conf.py", diff --git a/packages/google-cloud-bigquery/testing/.gitignore b/packages/google-cloud-bigquery/testing/.gitignore new file mode 100644 index 000000000000..b05fbd630881 --- /dev/null +++ b/packages/google-cloud-bigquery/testing/.gitignore @@ -0,0 +1,3 @@ +test-env.sh +service-account.json +client-secrets.json \ No newline at end of file From e5a64a655b3fe4fc13dfcd8dfeb4ab5848b9d32a Mon Sep 17 00:00:00 2001 From: HemangChothani <50404902+HemangChothani@users.noreply.github.com> Date: Mon, 13 Jul 2020 18:54:47 +0530 Subject: [PATCH 0790/2016] feat(bigquery): add __eq__ method for class PartitionRange and RangePartitioning (#162) * feat(bigquery): add __eq__ method for class PartitionRange and RangePartitioning * feat(bigquery): change class object to unhashable * feat(bigquery): change the assertion --- .../google/cloud/bigquery/table.py | 20 +++++ .../tests/unit/test_table.py | 82 +++++++++++++++++++ 2 files changed, 102 insertions(+) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py index 5766f5fbe4b7..f1575ffb2af1 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py @@ -1891,10 +1891,20 @@ def interval(self, value): def _key(self): return tuple(sorted(self._properties.items())) + def __eq__(self, other): + if not isinstance(other, PartitionRange): + return NotImplemented + return self._key() == other._key() + + def __ne__(self, other): + return not self == other + def __repr__(self): key_vals = ["{}={}".format(key, val) for key, val in self._key()] return "PartitionRange({})".format(", ".join(key_vals)) + __hash__ = None + class RangePartitioning(object): """Range-based partitioning configuration for a table. @@ -1961,10 +1971,20 @@ def field(self, value): def _key(self): return (("field", self.field), ("range_", self.range_)) + def __eq__(self, other): + if not isinstance(other, RangePartitioning): + return NotImplemented + return self._key() == other._key() + + def __ne__(self, other): + return not self == other + def __repr__(self): key_vals = ["{}={}".format(key, repr(val)) for key, val in self._key()] return "RangePartitioning({})".format(", ".join(key_vals)) + __hash__ = None + class TimePartitioningType(object): """Specifies the type of time partitioning to perform.""" diff --git a/packages/google-cloud-bigquery/tests/unit/test_table.py b/packages/google-cloud-bigquery/tests/unit/test_table.py index 94a32661731e..3aabebb77f11 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_table.py +++ b/packages/google-cloud-bigquery/tests/unit/test_table.py @@ -3525,6 +3525,37 @@ def test_constructor_w_resource(self): assert object_under_test.end == 1234567890 assert object_under_test.interval == 1000000 + def test___eq___start_mismatch(self): + object_under_test = self._make_one(start=1, end=10, interval=2) + other = self._make_one(start=2, end=10, interval=2) + self.assertNotEqual(object_under_test, other) + + def test___eq___end__mismatch(self): + object_under_test = self._make_one(start=1, end=10, interval=2) + other = self._make_one(start=1, end=11, interval=2) + self.assertNotEqual(object_under_test, other) + + def test___eq___interval__mismatch(self): + object_under_test = self._make_one(start=1, end=10, interval=2) + other = self._make_one(start=1, end=11, interval=3) + self.assertNotEqual(object_under_test, other) + + def test___eq___hit(self): + object_under_test = self._make_one(start=1, end=10, interval=2) + other = self._make_one(start=1, end=10, interval=2) + self.assertEqual(object_under_test, other) + + def test__eq___type_mismatch(self): + object_under_test = self._make_one(start=1, end=10, interval=2) + self.assertNotEqual(object_under_test, object()) + self.assertEqual(object_under_test, mock.ANY) + + def test_unhashable_object(self): + object_under_test1 = self._make_one(start=1, end=10, interval=2) + + with six.assertRaisesRegex(self, TypeError, r".*unhashable type.*"): + hash(object_under_test1) + def test_repr(self): object_under_test = self._make_one(start=1, end=10, interval=2) assert repr(object_under_test) == "PartitionRange(end=10, interval=2, start=1)" @@ -3574,6 +3605,57 @@ def test_range_w_wrong_type(self): with pytest.raises(ValueError, match="PartitionRange"): object_under_test.range_ = object() + def test___eq___field_mismatch(self): + from google.cloud.bigquery.table import PartitionRange + + object_under_test = self._make_one( + range_=PartitionRange(start=1, end=10, interval=2), field="integer_col" + ) + other = self._make_one( + range_=PartitionRange(start=1, end=10, interval=2), field="float_col" + ) + self.assertNotEqual(object_under_test, other) + + def test___eq___range__mismatch(self): + from google.cloud.bigquery.table import PartitionRange + + object_under_test = self._make_one( + range_=PartitionRange(start=1, end=10, interval=2), field="integer_col" + ) + other = self._make_one( + range_=PartitionRange(start=2, end=20, interval=2), field="float_col" + ) + self.assertNotEqual(object_under_test, other) + + def test___eq___hit(self): + from google.cloud.bigquery.table import PartitionRange + + object_under_test = self._make_one( + range_=PartitionRange(start=1, end=10, interval=2), field="integer_col" + ) + other = self._make_one( + range_=PartitionRange(start=1, end=10, interval=2), field="integer_col" + ) + self.assertEqual(object_under_test, other) + + def test__eq___type_mismatch(self): + from google.cloud.bigquery.table import PartitionRange + + object_under_test = self._make_one( + range_=PartitionRange(start=1, end=10, interval=2), field="integer_col" + ) + self.assertNotEqual(object_under_test, object()) + self.assertEqual(object_under_test, mock.ANY) + + def test_unhashable_object(self): + from google.cloud.bigquery.table import PartitionRange + + object_under_test1 = self._make_one( + range_=PartitionRange(start=1, end=10, interval=2), field="integer_col" + ) + with six.assertRaisesRegex(self, TypeError, r".*unhashable type.*"): + hash(object_under_test1) + def test_repr(self): from google.cloud.bigquery.table import PartitionRange From 8947537c34eb123e8a833e8571c7845781215e8e Mon Sep 17 00:00:00 2001 From: HemangChothani <50404902+HemangChothani@users.noreply.github.com> Date: Wed, 15 Jul 2020 10:08:46 +0530 Subject: [PATCH 0791/2016] chore(deps): remove redundant dependencies (#164) * chore(deps): remove redundant dependencies * chore(deps): bump api core version --- packages/google-cloud-bigquery/setup.py | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/packages/google-cloud-bigquery/setup.py b/packages/google-cloud-bigquery/setup.py index ad2bcf68d938..ea88debf76bd 100644 --- a/packages/google-cloud-bigquery/setup.py +++ b/packages/google-cloud-bigquery/setup.py @@ -30,16 +30,10 @@ release_status = "Development Status :: 5 - Production/Stable" dependencies = [ 'enum34; python_version < "3.4"', - "google-auth >= 1.9.0, < 2.0dev", - "google-api-core >= 1.15.0, < 2.0dev", + "google-api-core >= 1.21.0, < 2.0dev", "google-cloud-core >= 1.1.0, < 2.0dev", "google-resumable-media >= 0.5.0, < 0.6dev", - "protobuf >= 3.6.0", "six >=1.13.0,< 2.0.0dev", - # rsa >= 4.1 is not compatible with Python 2 - # https://github.com/sybrenstuvel/python-rsa/issues/152 - 'rsa <4.1; python_version < "3"', - 'rsa >=3.1.4, <5; python_version >= "3"', ] extras = { "bqstorage": [ From ab25985d4760bed0fd03dc28ae18a8e8c09810bd Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Fri, 17 Jul 2020 07:42:04 -0500 Subject: [PATCH 0792/2016] fix: omit `NaN` values when uploading from `insert_rows_from_dataframe` (#170) * fix: omit `NaN` values when uploading from `insert_rows_from_dataframe` NaN values are most often used to indicate a NULL value in pandas. Also, even when a column is a floating point column, the BigQuery streaming API JSON parser doesn't seem to be able to handle NaN literals. * doc: update docstring to indicate missing NaNs --- .../google/cloud/bigquery/_pandas_helpers.py | 12 ++++ .../google/cloud/bigquery/client.py | 9 ++- .../google-cloud-bigquery/tests/system.py | 30 ++++++-- .../tests/unit/test_client.py | 68 +++++++++++++++++++ 4 files changed, 110 insertions(+), 9 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/_pandas_helpers.py b/packages/google-cloud-bigquery/google/cloud/bigquery/_pandas_helpers.py index bced246e8f81..ff6525399a52 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/_pandas_helpers.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/_pandas_helpers.py @@ -19,6 +19,7 @@ import logging import warnings +import six from six.moves import queue try: @@ -780,3 +781,14 @@ def download_dataframe_bqstorage( selected_fields=selected_fields, page_to_item=page_to_item, ) + + +def dataframe_to_json_generator(dataframe): + for row in dataframe.itertuples(index=False, name=None): + output = {} + for column, value in six.moves.zip(dataframe.columns, row): + # Omit NaN values. + if value != value: + continue + output[column] = value + yield output diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py index eceedcd67a4d..20a485698df3 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py @@ -2535,7 +2535,9 @@ def insert_rows_from_dataframe( ]): The destination table for the row data, or a reference to it. dataframe (pandas.DataFrame): - A :class:`~pandas.DataFrame` containing the data to load. + A :class:`~pandas.DataFrame` containing the data to load. Any + ``NaN`` values present in the dataframe are omitted from the + streaming API request(s). selected_fields (Sequence[google.cloud.bigquery.schema.SchemaField]): The fields to return. Required if ``table`` is a :class:`~google.cloud.bigquery.table.TableReference`. @@ -2559,10 +2561,7 @@ def insert_rows_from_dataframe( insert_results = [] chunk_count = int(math.ceil(len(dataframe) / chunk_size)) - rows_iter = ( - dict(six.moves.zip(dataframe.columns, row)) - for row in dataframe.itertuples(index=False, name=None) - ) + rows_iter = _pandas_helpers.dataframe_to_json_generator(dataframe) for _ in range(chunk_count): rows_chunk = itertools.islice(rows_iter, chunk_size) diff --git a/packages/google-cloud-bigquery/tests/system.py b/packages/google-cloud-bigquery/tests/system.py index 14d3f49a1b13..cd5454a876ba 100644 --- a/packages/google-cloud-bigquery/tests/system.py +++ b/packages/google-cloud-bigquery/tests/system.py @@ -2335,6 +2335,14 @@ def test_insert_rows_from_dataframe(self): "string_col": "another string", "int_col": 50, }, + { + "float_col": 6.66, + "bool_col": True, + # Include a NaN value, because pandas often uses NaN as a + # NULL value indicator. + "string_col": float("NaN"), + "int_col": 60, + }, ] ) @@ -2344,14 +2352,28 @@ def test_insert_rows_from_dataframe(self): table = retry_403(Config.CLIENT.create_table)(table_arg) self.to_delete.insert(0, table) - Config.CLIENT.insert_rows_from_dataframe(table, dataframe, chunk_size=3) + chunk_errors = Config.CLIENT.insert_rows_from_dataframe( + table, dataframe, chunk_size=3 + ) + for errors in chunk_errors: + assert not errors - retry = RetryResult(_has_rows, max_tries=8) - rows = retry(self._fetch_single_page)(table) + # Use query to fetch rows instead of listing directly from the table so + # that we get values from the streaming buffer. + rows = list( + Config.CLIENT.query( + "SELECT * FROM `{}.{}.{}`".format( + table.project, table.dataset_id, table.table_id + ) + ) + ) sorted_rows = sorted(rows, key=operator.attrgetter("int_col")) row_tuples = [r.values() for r in sorted_rows] - expected = [tuple(data_row) for data_row in dataframe.itertuples(index=False)] + expected = [ + tuple(None if col != col else col for col in data_row) + for data_row in dataframe.itertuples(index=False) + ] assert len(row_tuples) == len(expected) diff --git a/packages/google-cloud-bigquery/tests/unit/test_client.py b/packages/google-cloud-bigquery/tests/unit/test_client.py index 0e083d43f74e..2c4c1342c7a7 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_client.py +++ b/packages/google-cloud-bigquery/tests/unit/test_client.py @@ -5582,6 +5582,74 @@ def test_insert_rows_from_dataframe(self): ) assert call == expected_call + @unittest.skipIf(pandas is None, "Requires `pandas`") + def test_insert_rows_from_dataframe_nan(self): + from google.cloud.bigquery.schema import SchemaField + from google.cloud.bigquery.table import Table + + API_PATH = "/projects/{}/datasets/{}/tables/{}/insertAll".format( + self.PROJECT, self.DS_ID, self.TABLE_REF.table_id + ) + + dataframe = pandas.DataFrame( + { + "str_col": ["abc", "def", float("NaN"), "jkl"], + "int_col": [1, float("NaN"), 3, 4], + "float_col": [float("NaN"), 0.25, 0.5, 0.125], + } + ) + + # create client + creds = _make_credentials() + http = object() + client = self._make_one(project=self.PROJECT, credentials=creds, _http=http) + conn = client._connection = make_connection({}, {}) + + # create table + schema = [ + SchemaField("str_col", "STRING"), + SchemaField("int_col", "INTEGER"), + SchemaField("float_col", "FLOAT"), + ] + table = Table(self.TABLE_REF, schema=schema) + + with mock.patch("uuid.uuid4", side_effect=map(str, range(len(dataframe)))): + error_info = client.insert_rows_from_dataframe( + table, dataframe, chunk_size=3, timeout=7.5 + ) + + self.assertEqual(len(error_info), 2) + for chunk_errors in error_info: + assert chunk_errors == [] + + EXPECTED_SENT_DATA = [ + { + "rows": [ + {"insertId": "0", "json": {"str_col": "abc", "int_col": 1}}, + {"insertId": "1", "json": {"str_col": "def", "float_col": 0.25}}, + {"insertId": "2", "json": {"int_col": 3, "float_col": 0.5}}, + ] + }, + { + "rows": [ + { + "insertId": "3", + "json": {"str_col": "jkl", "int_col": 4, "float_col": 0.125}, + } + ] + }, + ] + + actual_calls = conn.api_request.call_args_list + + for call, expected_data in six.moves.zip_longest( + actual_calls, EXPECTED_SENT_DATA + ): + expected_call = mock.call( + method="POST", path=API_PATH, data=expected_data, timeout=7.5 + ) + assert call == expected_call + @unittest.skipIf(pandas is None, "Requires `pandas`") def test_insert_rows_from_dataframe_many_columns(self): from google.cloud.bigquery.schema import SchemaField From 14fc2bbe1ee37c655c68f82c2e306803ca5b19fa Mon Sep 17 00:00:00 2001 From: HemangChothani <50404902+HemangChothani@users.noreply.github.com> Date: Mon, 20 Jul 2020 19:20:00 +0530 Subject: [PATCH 0793/2016] docs(bigquery): add docstring for conflict exception (#171) * docs(bigquery): add docstring for conflict exception * docs(bigquery): nit --- .../google/cloud/bigquery/client.py | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py index 20a485698df3..a3d1b8846c0a 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py @@ -441,6 +441,10 @@ def create_dataset( google.cloud.bigquery.dataset.Dataset: A new ``Dataset`` returned from the API. + Raises: + google.cloud.exceptions.Conflict: + If the dataset already exists. + Example: >>> from google.cloud import bigquery @@ -496,6 +500,10 @@ def create_routine( Returns: google.cloud.bigquery.routine.Routine: A new ``Routine`` returned from the service. + + Raises: + google.cloud.exceptions.Conflict: + If the routine already exists. """ reference = routine.reference path = "/projects/{}/datasets/{}/routines".format( @@ -540,6 +548,10 @@ def create_table(self, table, exists_ok=False, retry=DEFAULT_RETRY, timeout=None Returns: google.cloud.bigquery.table.Table: A new ``Table`` returned from the service. + + Raises: + google.cloud.exceptions.Conflict: + If the table already exists. """ table = _table_arg_to_table(table, default_project=self.project) From 002ec290c71fa40a1018cdf7c53e3e92a2622f76 Mon Sep 17 00:00:00 2001 From: "release-please[bot]" <55107282+release-please[bot]@users.noreply.github.com> Date: Tue, 21 Jul 2020 14:40:50 -0700 Subject: [PATCH 0794/2016] chore: release 1.26.0 (#130) * updated CHANGELOG.md [ci skip] * updated setup.cfg [ci skip] * updated setup.py Co-authored-by: release-please[bot] <55107282+release-please[bot]@users.noreply.github.com> --- packages/google-cloud-bigquery/CHANGELOG.md | 24 +++++++++++++++++++++ packages/google-cloud-bigquery/setup.py | 2 +- 2 files changed, 25 insertions(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/CHANGELOG.md b/packages/google-cloud-bigquery/CHANGELOG.md index 7506ed4380d4..6da5b01a098d 100644 --- a/packages/google-cloud-bigquery/CHANGELOG.md +++ b/packages/google-cloud-bigquery/CHANGELOG.md @@ -4,6 +4,30 @@ [1]: https://pypi.org/project/google-cloud-bigquery/#history +## [1.26.0](https://www.github.com/googleapis/python-bigquery/compare/v1.25.0...v1.26.0) (2020-07-20) + + +### Features + +* use BigQuery Storage client by default ([#55](https://www.github.com/googleapis/python-bigquery/issues/55)) ([e75ff82](https://www.github.com/googleapis/python-bigquery/commit/e75ff8297c65981545b097f75a17cf9e78ac6772)), closes [#91](https://www.github.com/googleapis/python-bigquery/issues/91) +* **bigquery:** add __eq__ method for class PartitionRange and RangePartitioning ([#162](https://www.github.com/googleapis/python-bigquery/issues/162)) ([0d2a88d](https://www.github.com/googleapis/python-bigquery/commit/0d2a88d8072154cfc9152afd6d26a60ddcdfbc73)) +* **bigquery:** expose date_as_object parameter to users ([#150](https://www.github.com/googleapis/python-bigquery/issues/150)) ([a2d5ce9](https://www.github.com/googleapis/python-bigquery/commit/a2d5ce9e97992318d7dc85c51c053cab74e25a11)) +* **bigquery:** expose date_as_object parameter to users ([#150](https://www.github.com/googleapis/python-bigquery/issues/150)) ([cbd831e](https://www.github.com/googleapis/python-bigquery/commit/cbd831e08024a67148723afd49e1db085e0a862c)) + + +### Bug Fixes + +* dry run queries with DB API cursor ([#128](https://www.github.com/googleapis/python-bigquery/issues/128)) ([bc33a67](https://www.github.com/googleapis/python-bigquery/commit/bc33a678a765f0232615aa2038b8cc67c88468a0)) +* omit `NaN` values when uploading from `insert_rows_from_dataframe` ([#170](https://www.github.com/googleapis/python-bigquery/issues/170)) ([f9f2f45](https://www.github.com/googleapis/python-bigquery/commit/f9f2f45bc009c03cd257441bd4b6beb1754e2177)) + + +### Documentation + +* **bigquery:** add client thread-safety documentation ([#132](https://www.github.com/googleapis/python-bigquery/issues/132)) ([fce76b3](https://www.github.com/googleapis/python-bigquery/commit/fce76b3776472b1da798df862a3405e659e35bab)) +* **bigquery:** add docstring for conflict exception ([#171](https://www.github.com/googleapis/python-bigquery/issues/171)) ([9c3409b](https://www.github.com/googleapis/python-bigquery/commit/9c3409bb06218bf499620544f8e92802df0cce47)) +* **bigquery:** consistent use of optional keyword ([#153](https://www.github.com/googleapis/python-bigquery/issues/153)) ([79d8c61](https://www.github.com/googleapis/python-bigquery/commit/79d8c61064cca18b596a24b6f738c7611721dd5c)) +* **bigquery:** fix the broken docs ([#139](https://www.github.com/googleapis/python-bigquery/issues/139)) ([3235255](https://www.github.com/googleapis/python-bigquery/commit/3235255cc5f483949f34d2e8ef13b372e8713782)) + ## [1.25.0](https://www.github.com/googleapis/python-bigquery/compare/v1.24.0...v1.25.0) (2020-06-06) diff --git a/packages/google-cloud-bigquery/setup.py b/packages/google-cloud-bigquery/setup.py index ea88debf76bd..497853be01d1 100644 --- a/packages/google-cloud-bigquery/setup.py +++ b/packages/google-cloud-bigquery/setup.py @@ -22,7 +22,7 @@ name = "google-cloud-bigquery" description = "Google BigQuery API client library" -version = "1.25.0" +version = "1.26.0" # Should be one of: # 'Development Status :: 3 - Alpha' # 'Development Status :: 4 - Beta' From 03a910f9d353d08322ca493d2f601965f1e4a803 Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Wed, 10 Aug 2016 14:49:53 -0700 Subject: [PATCH 0795/2016] Add veneer samples for BigQuery. [(#444)](https://github.com/GoogleCloudPlatform/python-docs-samples/issues/444) Put them in their own directory, since they duplicate the content from the existing samples that use the generated libraries. --- .../samples/snippets/README.md | 5 ++ .../samples/snippets/async_query.py | 83 +++++++++++++++++++ .../samples/snippets/async_query_test.py | 27 ++++++ .../samples/snippets/requirements.txt | 1 + .../samples/snippets/sync_query.py | 66 +++++++++++++++ .../samples/snippets/sync_query_test.py | 27 ++++++ 6 files changed, 209 insertions(+) create mode 100644 packages/google-cloud-bigquery/samples/snippets/README.md create mode 100755 packages/google-cloud-bigquery/samples/snippets/async_query.py create mode 100644 packages/google-cloud-bigquery/samples/snippets/async_query_test.py create mode 100644 packages/google-cloud-bigquery/samples/snippets/requirements.txt create mode 100755 packages/google-cloud-bigquery/samples/snippets/sync_query.py create mode 100644 packages/google-cloud-bigquery/samples/snippets/sync_query_test.py diff --git a/packages/google-cloud-bigquery/samples/snippets/README.md b/packages/google-cloud-bigquery/samples/snippets/README.md new file mode 100644 index 000000000000..9c1f9b9fbe5a --- /dev/null +++ b/packages/google-cloud-bigquery/samples/snippets/README.md @@ -0,0 +1,5 @@ +# BigQuery Google Cloud Client Library Samples + + + + diff --git a/packages/google-cloud-bigquery/samples/snippets/async_query.py b/packages/google-cloud-bigquery/samples/snippets/async_query.py new file mode 100755 index 000000000000..0ca324240893 --- /dev/null +++ b/packages/google-cloud-bigquery/samples/snippets/async_query.py @@ -0,0 +1,83 @@ +#!/usr/bin/env python + +# Copyright 2016 Google Inc. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Command-line application to perform asynchronous queries in BigQuery. + +For more information, see the README.md under /bigquery. + +Example invocation: + $ python async_query.py \ + 'SELECT corpus FROM `publicdata.samples.shakespeare` GROUP BY corpus' +""" + +import argparse +import time +import uuid + +from gcloud import bigquery + + +def async_query(query): + client = bigquery.Client() + query_job = client.run_async_query(str(uuid.uuid4()), query) + query_job.use_legacy_sql = False + query_job.begin() + + wait_for_job(query_job) + + # Manually construct the QueryResults. + # TODO: The client library will provide a helper method that does this. + # https://github.com/GoogleCloudPlatform/gcloud-python/issues/2083 + query_results = bigquery.query.QueryResults('', client) + query_results._properties['jobReference'] = { + 'jobId': query_job.name, + 'projectId': query_job.project + } + + # Drain the query results by requesting a page at a time. + page_token = None + + while True: + rows, total_rows, page_token = query_results.fetch_data( + max_results=10, + page_token=page_token) + + for row in rows: + print(row) + + if not page_token: + break + + +def wait_for_job(job): + while True: + job.reload() # Refreshes the state via a GET request. + if job.state == 'DONE': + if job.error_result: + raise RuntimeError(job.error_result) + return + time.sleep(1) + + +if __name__ == '__main__': + parser = argparse.ArgumentParser( + description=__doc__, + formatter_class=argparse.RawDescriptionHelpFormatter) + parser.add_argument('query', help='BigQuery SQL Query.') + + args = parser.parse_args() + + async_query(args.query) diff --git a/packages/google-cloud-bigquery/samples/snippets/async_query_test.py b/packages/google-cloud-bigquery/samples/snippets/async_query_test.py new file mode 100644 index 000000000000..810c538a6da7 --- /dev/null +++ b/packages/google-cloud-bigquery/samples/snippets/async_query_test.py @@ -0,0 +1,27 @@ +# Copyright 2016 Google Inc. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from async_query import async_query + + +def test_async_query(cloud_config, capsys): + query = ( + 'SELECT corpus FROM `publicdata.samples.shakespeare` ' + 'GROUP BY corpus;') + + async_query(query) + + out, _ = capsys.readouterr() + + assert 'romeoandjuliet' in out diff --git a/packages/google-cloud-bigquery/samples/snippets/requirements.txt b/packages/google-cloud-bigquery/samples/snippets/requirements.txt new file mode 100644 index 000000000000..2beeafe63a8a --- /dev/null +++ b/packages/google-cloud-bigquery/samples/snippets/requirements.txt @@ -0,0 +1 @@ +gcloud==0.18.1 diff --git a/packages/google-cloud-bigquery/samples/snippets/sync_query.py b/packages/google-cloud-bigquery/samples/snippets/sync_query.py new file mode 100755 index 000000000000..59007b537833 --- /dev/null +++ b/packages/google-cloud-bigquery/samples/snippets/sync_query.py @@ -0,0 +1,66 @@ +#!/usr/bin/env python + +# Copyright 2016 Google Inc. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Command-line application to perform synchronous queries in BigQuery. + +For more information, see the README.md under /bigquery. + +Example invocation: + $ python sync_query.py \ + 'SELECT corpus FROM `publicdata.samples.shakespeare` GROUP BY corpus' +""" + +import argparse + +# [START sync_query] +from gcloud import bigquery + + +def sync_query(query): + client = bigquery.Client() + query_results = client.run_sync_query(query) + + # Use standard SQL syntax for queries. + # See: https://cloud.google.com/bigquery/sql-reference/ + query_results.use_legacy_sql = False + + query_results.run() + + # Drain the query results by requesting a page at a time. + page_token = None + + while True: + rows, total_rows, page_token = query_results.fetch_data( + max_results=10, + page_token=page_token) + + for row in rows: + print(row) + + if not page_token: + break +# [END sync_query] + + +if __name__ == '__main__': + parser = argparse.ArgumentParser( + description=__doc__, + formatter_class=argparse.RawDescriptionHelpFormatter) + parser.add_argument('query', help='BigQuery SQL Query.') + + args = parser.parse_args() + + sync_query(args.query) diff --git a/packages/google-cloud-bigquery/samples/snippets/sync_query_test.py b/packages/google-cloud-bigquery/samples/snippets/sync_query_test.py new file mode 100644 index 000000000000..6f6b4f5fa8e8 --- /dev/null +++ b/packages/google-cloud-bigquery/samples/snippets/sync_query_test.py @@ -0,0 +1,27 @@ +# Copyright 2016 Google Inc. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from sync_query import sync_query + + +def test_sync_query(cloud_config, capsys): + query = ( + 'SELECT corpus FROM `publicdata.samples.shakespeare` ' + 'GROUP BY corpus;') + + sync_query(query) + + out, _ = capsys.readouterr() + + assert 'romeoandjuliet' in out From e8f008407ac3b75d19312331ed8dad9a9fd0daf9 Mon Sep 17 00:00:00 2001 From: Jon Wayne Parrott Date: Thu, 18 Aug 2016 16:19:00 -0700 Subject: [PATCH 0796/2016] Add data loading samples Change-Id: Ie78c06c7375f11dc2a98e2e8cf601f3820c6f0c1 --- .../samples/snippets/export_data_to_gcs.py | 77 ++++++++++++++++++ .../snippets/export_data_to_gcs_test.py | 30 +++++++ .../samples/snippets/load_data_from_file.py | 79 +++++++++++++++++++ .../snippets/load_data_from_file_test.py | 34 ++++++++ .../samples/snippets/load_data_from_gcs.py | 77 ++++++++++++++++++ .../snippets/load_data_from_gcs_test.py | 31 ++++++++ .../samples/snippets/resources/data.csv | 1 + .../samples/snippets/resources/data.json | 1 + .../samples/snippets/resources/schema.json | 1 + .../samples/snippets/stream_data.py | 69 ++++++++++++++++ .../samples/snippets/stream_data_test.py | 29 +++++++ 11 files changed, 429 insertions(+) create mode 100644 packages/google-cloud-bigquery/samples/snippets/export_data_to_gcs.py create mode 100644 packages/google-cloud-bigquery/samples/snippets/export_data_to_gcs_test.py create mode 100644 packages/google-cloud-bigquery/samples/snippets/load_data_from_file.py create mode 100644 packages/google-cloud-bigquery/samples/snippets/load_data_from_file_test.py create mode 100644 packages/google-cloud-bigquery/samples/snippets/load_data_from_gcs.py create mode 100644 packages/google-cloud-bigquery/samples/snippets/load_data_from_gcs_test.py create mode 100644 packages/google-cloud-bigquery/samples/snippets/resources/data.csv create mode 100644 packages/google-cloud-bigquery/samples/snippets/resources/data.json create mode 100644 packages/google-cloud-bigquery/samples/snippets/resources/schema.json create mode 100644 packages/google-cloud-bigquery/samples/snippets/stream_data.py create mode 100644 packages/google-cloud-bigquery/samples/snippets/stream_data_test.py diff --git a/packages/google-cloud-bigquery/samples/snippets/export_data_to_gcs.py b/packages/google-cloud-bigquery/samples/snippets/export_data_to_gcs.py new file mode 100644 index 000000000000..c9771ea1b0d5 --- /dev/null +++ b/packages/google-cloud-bigquery/samples/snippets/export_data_to_gcs.py @@ -0,0 +1,77 @@ +#!/usr/bin/env python + +# Copyright 2016 Google Inc. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Exports data from BigQuery to an object in Google Cloud Storage. + +For more information, see the README.md under /bigquery. + +Example invocation: + $ python export_data_to_gcs.py example_dataset example_table \ + gs://example-bucket/example-data.csv + +The dataset and table should already exist. +""" + +import argparse +import time +import uuid + +from gcloud import bigquery + + +def export_data_to_gcs(dataset_name, table_name, destination): + bigquery_client = bigquery.Client() + dataset = bigquery_client.dataset(dataset_name) + table = dataset.table(table_name) + job_name = str(uuid.uuid4()) + + job = bigquery_client.extract_table_to_storage( + job_name, table, destination) + + job.begin() + + wait_for_job(job) + + print('Exported {}:{} to {}'.format( + dataset_name, table_name, destination)) + + +def wait_for_job(job): + while True: + job.reload() + if job.state == 'DONE': + if job.error_result: + raise RuntimeError(job.error_result) + return + time.sleep(1) + + +if __name__ == '__main__': + parser = argparse.ArgumentParser( + description=__doc__, + formatter_class=argparse.RawDescriptionHelpFormatter) + parser.add_argument('dataset_name') + parser.add_argument('table_name') + parser.add_argument( + 'destination', help='The desintation Google Cloud Storage object.' + 'Must be in the format gs://bucket_name/object_name') + + args = parser.parse_args() + + export_data_to_gcs( + args.dataset_name, + args.table_name, + args.destination) diff --git a/packages/google-cloud-bigquery/samples/snippets/export_data_to_gcs_test.py b/packages/google-cloud-bigquery/samples/snippets/export_data_to_gcs_test.py new file mode 100644 index 000000000000..e260e47b4f74 --- /dev/null +++ b/packages/google-cloud-bigquery/samples/snippets/export_data_to_gcs_test.py @@ -0,0 +1,30 @@ +# Copyright 2015, Google, Inc. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import export_data_to_gcs + + +DATASET_ID = 'test_dataset' +TABLE_ID = 'test_import_table' + + +def test_export_data_to_gcs(cloud_config, capsys): + export_data_to_gcs.export_data_to_gcs( + DATASET_ID, + TABLE_ID, + 'gs://{}/test-export-data-to-gcs.csv'.format( + cloud_config.storage_bucket)) + + out, _ = capsys.readouterr() + + assert 'Exported' in out diff --git a/packages/google-cloud-bigquery/samples/snippets/load_data_from_file.py b/packages/google-cloud-bigquery/samples/snippets/load_data_from_file.py new file mode 100644 index 000000000000..cbb015347352 --- /dev/null +++ b/packages/google-cloud-bigquery/samples/snippets/load_data_from_file.py @@ -0,0 +1,79 @@ +#!/usr/bin/env python + +# Copyright 2016 Google Inc. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Loads data into BigQuery from a local file. + +For more information, see the README.md under /bigquery. + +Example invocation: + $ python load_data_from_file.py example_dataset example_table \ + example-data.csv + +The dataset and table should already exist. +""" + +import argparse +import time +from gcloud import bigquery + + +def load_data_from_file(dataset_name, table_name, source_file_name): + bigquery_client = bigquery.Client() + dataset = bigquery_client.dataset(dataset_name) + table = dataset.table(table_name) + + # Reload the table to get the schema. + table.reload() + + with open(source_file_name, 'rb') as source_file: + # This example uses CSV, but you can use other formats. + # See https://cloud.google.com/bigquery/loading-data + job = table.upload_from_file( + source_file, source_format='text/csv') + + job.begin() + + wait_for_job(job) + + print('Loaded {} rows into {}:{}.'.format( + job.output_rows, dataset_name, table_name)) + + +def wait_for_job(job): + while True: + job.reload() + if job.state == 'DONE': + if job.error_result: + raise RuntimeError(job.error_result) + return + time.sleep(1) + + +if __name__ == '__main__': + parser = argparse.ArgumentParser( + description=__doc__, + formatter_class=argparse.RawDescriptionHelpFormatter) + parser.add_argument('dataset_name') + parser.add_argument('table_name') + parser.add_argument( + 'source_file_name', help='Path to a .csv file to upload.') + + args = parser.parse_args() + + load_data_from_file( + args.dataset_name, + args.table_name, + args.source_file_name) diff --git a/packages/google-cloud-bigquery/samples/snippets/load_data_from_file_test.py b/packages/google-cloud-bigquery/samples/snippets/load_data_from_file_test.py new file mode 100644 index 000000000000..9adc99dfeb77 --- /dev/null +++ b/packages/google-cloud-bigquery/samples/snippets/load_data_from_file_test.py @@ -0,0 +1,34 @@ +# Copyright 2015, Google, Inc. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import load_data_from_file +import pytest + +DATASET_ID = 'test_dataset' +TABLE_ID = 'test_import_table' + + +@pytest.mark.xfail( + strict=True, + reason='https://github.com/GoogleCloudPlatform/gcloud-python/issues/2133') +def test_load_table(resource, capsys): + data_path = resource('data.csv') + + load_data_from_file.load_data_from_file( + DATASET_ID, + TABLE_ID, + data_path) + + out, _ = capsys.readouterr() + + assert 'Loaded 1 rows' in out diff --git a/packages/google-cloud-bigquery/samples/snippets/load_data_from_gcs.py b/packages/google-cloud-bigquery/samples/snippets/load_data_from_gcs.py new file mode 100644 index 000000000000..1a577be649ca --- /dev/null +++ b/packages/google-cloud-bigquery/samples/snippets/load_data_from_gcs.py @@ -0,0 +1,77 @@ +#!/usr/bin/env python + +# Copyright 2016 Google Inc. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Loads data into BigQuery from an object in Google Cloud Storage. + +For more information, see the README.md under /bigquery. + +Example invocation: + $ python load_data_from_gcs.py example_dataset example_table \ + gs://example-bucket/example-data.csv + +The dataset and table should already exist. +""" + +import argparse +import time +import uuid + +from gcloud import bigquery + + +def load_data_from_gcs(dataset_name, table_name, source): + bigquery_client = bigquery.Client() + dataset = bigquery_client.dataset(dataset_name) + table = dataset.table(table_name) + job_name = str(uuid.uuid4()) + + job = bigquery_client.load_table_from_storage( + job_name, table, source) + + job.begin() + + wait_for_job(job) + + print('Loaded {} rows into {}:{}.'.format( + job.output_rows, dataset_name, table_name)) + + +def wait_for_job(job): + while True: + job.reload() + if job.state == 'DONE': + if job.error_result: + raise RuntimeError(job.error_result) + return + time.sleep(1) + + +if __name__ == '__main__': + parser = argparse.ArgumentParser( + description=__doc__, + formatter_class=argparse.RawDescriptionHelpFormatter) + parser.add_argument('dataset_name') + parser.add_argument('table_name') + parser.add_argument( + 'source', help='The Google Cloud Storage object to load. Must be in ' + 'the format gs://bucket_name/object_name') + + args = parser.parse_args() + + load_data_from_gcs( + args.dataset_name, + args.table_name, + args.source) diff --git a/packages/google-cloud-bigquery/samples/snippets/load_data_from_gcs_test.py b/packages/google-cloud-bigquery/samples/snippets/load_data_from_gcs_test.py new file mode 100644 index 000000000000..2d1c66162c0b --- /dev/null +++ b/packages/google-cloud-bigquery/samples/snippets/load_data_from_gcs_test.py @@ -0,0 +1,31 @@ +# Copyright 2015, Google, Inc. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import load_data_from_gcs + +DATASET_ID = 'test_dataset' +TABLE_ID = 'test_import_table' + + +def test_load_table(cloud_config, capsys): + cloud_storage_input_uri = 'gs://{}/data.csv'.format( + cloud_config.storage_bucket) + + load_data_from_gcs.load_data_from_gcs( + DATASET_ID, + TABLE_ID, + cloud_storage_input_uri) + + out, _ = capsys.readouterr() + + assert 'Loaded 1 rows' in out diff --git a/packages/google-cloud-bigquery/samples/snippets/resources/data.csv b/packages/google-cloud-bigquery/samples/snippets/resources/data.csv new file mode 100644 index 000000000000..230a96b559df --- /dev/null +++ b/packages/google-cloud-bigquery/samples/snippets/resources/data.csv @@ -0,0 +1 @@ +Gandalf, 2000, 140.0, 1 diff --git a/packages/google-cloud-bigquery/samples/snippets/resources/data.json b/packages/google-cloud-bigquery/samples/snippets/resources/data.json new file mode 100644 index 000000000000..b8eef90c5911 --- /dev/null +++ b/packages/google-cloud-bigquery/samples/snippets/resources/data.json @@ -0,0 +1 @@ +{"Name": "Gandalf", "Age": 2000, "Weight": 140.0, "IsMagic": true} diff --git a/packages/google-cloud-bigquery/samples/snippets/resources/schema.json b/packages/google-cloud-bigquery/samples/snippets/resources/schema.json new file mode 100644 index 000000000000..a48971ef857b --- /dev/null +++ b/packages/google-cloud-bigquery/samples/snippets/resources/schema.json @@ -0,0 +1 @@ +[{"type": "STRING", "name": "Name"}, {"type": "INTEGER", "name": "Age"}, {"type": "FLOAT", "name": "Weight"}, {"type": "BOOLEAN", "name": "IsMagic"}] \ No newline at end of file diff --git a/packages/google-cloud-bigquery/samples/snippets/stream_data.py b/packages/google-cloud-bigquery/samples/snippets/stream_data.py new file mode 100644 index 000000000000..5df6be114446 --- /dev/null +++ b/packages/google-cloud-bigquery/samples/snippets/stream_data.py @@ -0,0 +1,69 @@ +#!/usr/bin/env python + +# Copyright 2016 Google Inc. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Loads a single row of data directly into BigQuery. + +For more information, see the README.md under /bigquery. + +Example invocation: + $ python stream_data.py example_dataset example_table \ + '["Gandalf", 2000]' + +The dataset and table should already exist. +""" + +import argparse +import json +from pprint import pprint + +from gcloud import bigquery + + +def stream_data(dataset_name, table_name, json_data): + bigquery_client = bigquery.Client() + dataset = bigquery_client.dataset(dataset_name) + table = dataset.table(table_name) + data = json.loads(json_data) + + # Reload the table to get the schema. + table.reload() + + rows = [data] + errors = table.insert_data(rows) + + if not errors: + print('Loaded 1 row into {}:{}'.format(dataset_name, table_name)) + else: + print('Errors:') + pprint(errors) + + +if __name__ == '__main__': + parser = argparse.ArgumentParser( + description=__doc__, + formatter_class=argparse.RawDescriptionHelpFormatter) + parser.add_argument('dataset_name') + parser.add_argument('table_name') + parser.add_argument( + 'json_data', + help='The row to load into BigQuery as an array in JSON format.') + + args = parser.parse_args() + + stream_data( + args.dataset_name, + args.table_name, + args.json_data) diff --git a/packages/google-cloud-bigquery/samples/snippets/stream_data_test.py b/packages/google-cloud-bigquery/samples/snippets/stream_data_test.py new file mode 100644 index 000000000000..f7d244b3adb2 --- /dev/null +++ b/packages/google-cloud-bigquery/samples/snippets/stream_data_test.py @@ -0,0 +1,29 @@ +# Copyright 2015, Google, Inc. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import stream_data + + +DATASET_ID = 'test_dataset' +TABLE_ID = 'test_import_table' + + +def test_stream_data(resource, capsys): + stream_data.stream_data( + DATASET_ID, + TABLE_ID, + '["Gandalf", 2000]') + + out, _ = capsys.readouterr() + + assert 'Loaded 1 row' in out From 3bd2a733d1deae325c91cd87acb5c509d70e0167 Mon Sep 17 00:00:00 2001 From: Jon Wayne Parrott Date: Fri, 19 Aug 2016 12:39:59 -0700 Subject: [PATCH 0797/2016] Add bigquery snippets Change-Id: I148b9f444ad5e481d4f091b53121873b51de191e --- .../samples/snippets/snippets.py | 172 ++++++++++++++++++ .../samples/snippets/snippets_test.py | 75 ++++++++ 2 files changed, 247 insertions(+) create mode 100644 packages/google-cloud-bigquery/samples/snippets/snippets.py create mode 100644 packages/google-cloud-bigquery/samples/snippets/snippets_test.py diff --git a/packages/google-cloud-bigquery/samples/snippets/snippets.py b/packages/google-cloud-bigquery/samples/snippets/snippets.py new file mode 100644 index 000000000000..7c7a90c30dd1 --- /dev/null +++ b/packages/google-cloud-bigquery/samples/snippets/snippets.py @@ -0,0 +1,172 @@ +#!/usr/bin/env python + +# Copyright 2016 Google Inc. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Samples that demonstrate basic operations in the BigQuery API. + +For more information, see the README.md under /bigquery. + +Example invocation: + $ python snippets.py list-datasets + +The dataset and table should already exist. +""" + +import argparse + +from gcloud import bigquery + + +def list_projects(): + raise NotImplementedError( + 'https://github.com/GoogleCloudPlatform/gcloud-python/issues/2143') + + +def list_datasets(project=None): + """Lists all datasets in a given project. + + If no project is specified, then the currently active project is used + """ + bigquery_client = bigquery.Client(project=project) + + datasets = [] + page_token = None + + while True: + results, page_token = bigquery_client.list_datasets( + page_token=page_token) + datasets.extend(results) + + if not page_token: + break + + for dataset in datasets: + print(dataset.name) + + +def list_tables(dataset_name, project=None): + """Lists all of the tables in a given dataset. + + If no project is specified, then the currently active project is used. + """ + bigquery_client = bigquery.Client(project=project) + dataset = bigquery_client.dataset(dataset_name) + + if not dataset.exists(): + print('Dataset {} does not exist.'.format(dataset_name)) + + tables = [] + page_token = None + + while True: + results, page_token = dataset.list_tables(page_token=page_token) + tables.extend(results) + + if not page_token: + break + + for table in tables: + print(table.name) + + +def list_rows(dataset_name, table_name, project=None): + """Prints rows in the given table. + + Will print 25 rows at most for brevity as tables can contain large amounts + of rows. + + If no project is specified, then the currently active project is used. + """ + bigquery_client = bigquery.Client(project=project) + dataset = bigquery_client.dataset(dataset_name) + table = dataset.table(table_name) + + if not table.exists(): + print('Table {}:{} does not exist.'.format(dataset_name, table_name)) + + # Reload the table so that the schema is available. + table.reload() + + rows = [] + page_token = None + + while len(rows) < 25: + results, total_rows, page_token = table.fetch_data( + max_results=25, page_token=page_token) + rows.extend(results) + + if not page_token: + break + + # Use format to create a simple table. + format_string = '{:<16} ' * len(table.schema) + + # Print schema field names + field_names = [field.name for field in table.schema] + print(format_string.format(*field_names)) + + for row in rows: + print(format_string.format(*row)) + + +def delete_table(dataset_name, table_name, project=None): + """Deletes a table in a given dataset. + + If no project is specified, then the currently active project is used. + """ + bigquery_client = bigquery.Client(project=project) + dataset = bigquery_client.dataset(dataset_name) + table = dataset.table(table_name) + + table.delete() + + print('Table {}:{} deleted.'.format(dataset_name, table_name)) + + +if __name__ == '__main__': + parser = argparse.ArgumentParser( + description=__doc__, + formatter_class=argparse.RawDescriptionHelpFormatter) + parser.add_argument('--project', default=None) + + subparsers = parser.add_subparsers(dest='command') + + list_datasets_parser = subparsers.add_parser( + 'list-datasets', help=list_datasets.__doc__) + + list_tables_parser = subparsers.add_parser( + 'list-tables', help=list_tables.__doc__) + list_tables_parser.add_argument('dataset_name') + + list_rows_parser = subparsers.add_parser( + 'list-rows', help=list_rows.__doc__) + list_rows_parser.add_argument('dataset_name') + list_rows_parser.add_argument('table_name') + + delete_table_parser = subparsers.add_parser( + 'delete-table', help=delete_table.__doc__) + delete_table_parser.add_argument('dataset_name') + delete_table_parser.add_argument('table_name') + + args = parser.parse_args() + + if args.command == 'list-datasets': + list_datasets(args.project) + elif args.command == 'list-tables': + list_tables(args.dataset_name, args.project) + elif args.command == 'list-rows': + list_rows(args.dataset_name, args.table_name, args.project) + elif args.command == 'delete-table': + delete_table(args.dataset_name, args.table_name, args.project) diff --git a/packages/google-cloud-bigquery/samples/snippets/snippets_test.py b/packages/google-cloud-bigquery/samples/snippets/snippets_test.py new file mode 100644 index 000000000000..e45484ef24ac --- /dev/null +++ b/packages/google-cloud-bigquery/samples/snippets/snippets_test.py @@ -0,0 +1,75 @@ +# Copyright 2015, Google, Inc. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from gcloud import bigquery +import pytest +import snippets + + +DATASET_ID = 'test_dataset' +TABLE_ID = 'test_import_table' + + +@pytest.mark.xfail( + strict=True, + reason='https://github.com/GoogleCloudPlatform/gcloud-python/issues/2143') +def test_list_projects(): + snippets.list_projects() + # No need to check the ouput, lack of exception is enough. + + +def test_list_datasets(capsys): + # Requires the dataset to have been created in the test project. + snippets.list_datasets() + + out, _ = capsys.readouterr() + + assert DATASET_ID in out + + +def test_list_tables(capsys): + # Requires teh dataset and table to have been created in the test project. + snippets.list_tables(DATASET_ID) + + out, _ = capsys.readouterr() + + assert TABLE_ID in out + + +def test_list_rows(capsys): + # Requires the dataset and table to have been created in the test project. + + # Check for the schema. It's okay if the table is empty as long as there + # aren't any errors. + + snippets.list_rows(DATASET_ID, TABLE_ID) + + out, _ = capsys.readouterr() + + assert 'Name' in out + assert 'Age' in out + + +def test_delete_table(capsys): + # Create a table to delete + bigquery_client = bigquery.Client() + dataset = bigquery_client.dataset(DATASET_ID) + table = dataset.table('test_delete_table') + + if not table.exists(): + table.schema = [bigquery.SchemaField('id', 'INTEGER')] + table.create() + + snippets.delete_table(DATASET_ID, table.name) + + assert not table.exists() From 2d447bdd9173608afa1b06bc8d1ee683bc8b63b0 Mon Sep 17 00:00:00 2001 From: Jon Wayne Parrott Date: Fri, 19 Aug 2016 12:45:19 -0700 Subject: [PATCH 0798/2016] Remove unnecessary fixture Change-Id: I5e3f3faecfaf05749e7cd931d1d81758d3032651 --- .../google-cloud-bigquery/samples/snippets/stream_data_test.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/samples/snippets/stream_data_test.py b/packages/google-cloud-bigquery/samples/snippets/stream_data_test.py index f7d244b3adb2..25982b88ad50 100644 --- a/packages/google-cloud-bigquery/samples/snippets/stream_data_test.py +++ b/packages/google-cloud-bigquery/samples/snippets/stream_data_test.py @@ -18,7 +18,7 @@ TABLE_ID = 'test_import_table' -def test_stream_data(resource, capsys): +def test_stream_data(capsys): stream_data.stream_data( DATASET_ID, TABLE_ID, From 26c0953465d7f27498e8bb7d16722b189ba4c964 Mon Sep 17 00:00:00 2001 From: Jon Wayne Parrott Date: Fri, 19 Aug 2016 14:42:54 -0700 Subject: [PATCH 0799/2016] Address review comments Change-Id: I957fa992850ed3305dbcad5cd73ed61a7eedb63c --- packages/google-cloud-bigquery/samples/snippets/snippets.py | 2 ++ .../google-cloud-bigquery/samples/snippets/snippets_test.py | 5 +++-- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/packages/google-cloud-bigquery/samples/snippets/snippets.py b/packages/google-cloud-bigquery/samples/snippets/snippets.py index 7c7a90c30dd1..bcf534c8d341 100644 --- a/packages/google-cloud-bigquery/samples/snippets/snippets.py +++ b/packages/google-cloud-bigquery/samples/snippets/snippets.py @@ -66,6 +66,7 @@ def list_tables(dataset_name, project=None): if not dataset.exists(): print('Dataset {} does not exist.'.format(dataset_name)) + return tables = [] page_token = None @@ -95,6 +96,7 @@ def list_rows(dataset_name, table_name, project=None): if not table.exists(): print('Table {}:{} does not exist.'.format(dataset_name, table_name)) + return # Reload the table so that the schema is available. table.reload() diff --git a/packages/google-cloud-bigquery/samples/snippets/snippets_test.py b/packages/google-cloud-bigquery/samples/snippets/snippets_test.py index e45484ef24ac..34faa277f0fc 100644 --- a/packages/google-cloud-bigquery/samples/snippets/snippets_test.py +++ b/packages/google-cloud-bigquery/samples/snippets/snippets_test.py @@ -1,9 +1,10 @@ -# Copyright 2015, Google, Inc. +# Copyright 2016 Google Inc. All Rights Reserved. +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # -# http://www.apache.org/licenses/LICENSE-2.0 +# http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, From 9cad0759c0a1a86ba190bfab2b98fdeebde0d16d Mon Sep 17 00:00:00 2001 From: Jon Wayne Parrott Date: Fri, 19 Aug 2016 14:55:30 -0700 Subject: [PATCH 0800/2016] Fix lint Change-Id: If7b90925c51b739a293fad737c8ab6919b3f7eae --- .../samples/snippets/load_data_from_file_test.py | 3 ++- .../google-cloud-bigquery/samples/snippets/snippets_test.py | 1 + 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/samples/snippets/load_data_from_file_test.py b/packages/google-cloud-bigquery/samples/snippets/load_data_from_file_test.py index 9adc99dfeb77..eccefe038433 100644 --- a/packages/google-cloud-bigquery/samples/snippets/load_data_from_file_test.py +++ b/packages/google-cloud-bigquery/samples/snippets/load_data_from_file_test.py @@ -11,9 +11,10 @@ # See the License for the specific language governing permissions and # limitations under the License. -import load_data_from_file import pytest +import load_data_from_file + DATASET_ID = 'test_dataset' TABLE_ID = 'test_import_table' diff --git a/packages/google-cloud-bigquery/samples/snippets/snippets_test.py b/packages/google-cloud-bigquery/samples/snippets/snippets_test.py index 34faa277f0fc..0a52922b9ac7 100644 --- a/packages/google-cloud-bigquery/samples/snippets/snippets_test.py +++ b/packages/google-cloud-bigquery/samples/snippets/snippets_test.py @@ -14,6 +14,7 @@ from gcloud import bigquery import pytest + import snippets From efbc35affa6b896bacf7d3a8cb2f858280e44d67 Mon Sep 17 00:00:00 2001 From: Jon Wayne Parrott Date: Mon, 22 Aug 2016 10:34:15 -0700 Subject: [PATCH 0801/2016] Address post-review comments for BigQuery samples Change-Id: I74627bc2744d3b0946d34c1d2a04c424ed708e87 --- packages/google-cloud-bigquery/samples/snippets/snippets.py | 3 +++ .../google-cloud-bigquery/samples/snippets/snippets_test.py | 2 +- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/samples/snippets/snippets.py b/packages/google-cloud-bigquery/samples/snippets/snippets.py index bcf534c8d341..49272965b416 100644 --- a/packages/google-cloud-bigquery/samples/snippets/snippets.py +++ b/packages/google-cloud-bigquery/samples/snippets/snippets.py @@ -104,6 +104,9 @@ def list_rows(dataset_name, table_name, project=None): rows = [] page_token = None + # Load at most 25 results. You can change this to `while True` and change + # the max_results argument to load more rows from BigQuery, but note + # that this can take some time. It's preferred to use a query. while len(rows) < 25: results, total_rows, page_token = table.fetch_data( max_results=25, page_token=page_token) diff --git a/packages/google-cloud-bigquery/samples/snippets/snippets_test.py b/packages/google-cloud-bigquery/samples/snippets/snippets_test.py index 0a52922b9ac7..ed2f47639fc3 100644 --- a/packages/google-cloud-bigquery/samples/snippets/snippets_test.py +++ b/packages/google-cloud-bigquery/samples/snippets/snippets_test.py @@ -40,7 +40,7 @@ def test_list_datasets(capsys): def test_list_tables(capsys): - # Requires teh dataset and table to have been created in the test project. + # Requires the dataset and table to have been created in the test project. snippets.list_tables(DATASET_ID) out, _ = capsys.readouterr() From a38a3678c1fcccdffa527f2a44353cc6cb5543ef Mon Sep 17 00:00:00 2001 From: Jon Wayne Parrott Date: Mon, 12 Sep 2016 12:59:26 -0700 Subject: [PATCH 0802/2016] Add bigquery create and copy table examples [(#514)](https://github.com/GoogleCloudPlatform/python-docs-samples/issues/514) * Add bigquery create table sample * Add copy table sample * Fix test table ids --- .../snippets/export_data_to_gcs_test.py | 2 +- .../samples/snippets/snippets.py | 88 +++++++++++++++++++ .../samples/snippets/snippets_test.py | 39 +++++++- 3 files changed, 126 insertions(+), 3 deletions(-) diff --git a/packages/google-cloud-bigquery/samples/snippets/export_data_to_gcs_test.py b/packages/google-cloud-bigquery/samples/snippets/export_data_to_gcs_test.py index e260e47b4f74..acbbe50e55e0 100644 --- a/packages/google-cloud-bigquery/samples/snippets/export_data_to_gcs_test.py +++ b/packages/google-cloud-bigquery/samples/snippets/export_data_to_gcs_test.py @@ -15,7 +15,7 @@ DATASET_ID = 'test_dataset' -TABLE_ID = 'test_import_table' +TABLE_ID = 'test_table' def test_export_data_to_gcs(cloud_config, capsys): diff --git a/packages/google-cloud-bigquery/samples/snippets/snippets.py b/packages/google-cloud-bigquery/samples/snippets/snippets.py index 49272965b416..d201a84cba80 100644 --- a/packages/google-cloud-bigquery/samples/snippets/snippets.py +++ b/packages/google-cloud-bigquery/samples/snippets/snippets.py @@ -25,8 +25,11 @@ """ import argparse +import time +import uuid from gcloud import bigquery +import gcloud.bigquery.job def list_projects(): @@ -82,6 +85,32 @@ def list_tables(dataset_name, project=None): print(table.name) +def create_table(dataset_name, table_name, project=None): + """Creates a simple table in the given dataset. + + If no project is specified, then the currently active project is used. + """ + bigquery_client = bigquery.Client(project=project) + dataset = bigquery_client.dataset(dataset_name) + + if not dataset.exists(): + print('Dataset {} does not exist.'.format(dataset_name)) + return + + table = dataset.table(table_name) + + # Set the table schema + table.schema = ( + bigquery.SchemaField('Name', 'STRING'), + bigquery.SchemaField('Age', 'INTEGER'), + bigquery.SchemaField('Weight', 'FLOAT'), + ) + + table.create() + + print('Created table {} in dataset {}.'.format(table_name, dataset_name)) + + def list_rows(dataset_name, table_name, project=None): """Prints rows in the given table. @@ -126,6 +155,50 @@ def list_rows(dataset_name, table_name, project=None): print(format_string.format(*row)) +def copy_table(dataset_name, table_name, new_table_name, project=None): + """Copies a table. + + If no project is specified, then the currently active project is used. + """ + bigquery_client = bigquery.Client(project=project) + dataset = bigquery_client.dataset(dataset_name) + table = dataset.table(table_name) + + # This sample shows the destination table in the same dataset and project, + # however, it's possible to copy across datasets and projects. You can + # also copy muliple source tables into a single destination table by + # providing addtional arguments to `copy_table`. + destination_table = dataset.table(new_table_name) + + # Create a job to copy the table to the destination table. + job_id = str(uuid.uuid4()) + job = bigquery_client.copy_table( + job_id, destination_table, table) + + # Create the table if it doesn't exist. + job.create_disposition = ( + gcloud.bigquery.job.CreateDisposition.CREATE_IF_NEEDED) + + # Start the job. + job.begin() + + # Wait for the the job to finish. + print('Waiting for job to finish...') + wait_for_job(job) + + print('Table {} copied to {}.'.format(table_name, new_table_name)) + + +def wait_for_job(job): + while True: + job.reload() # Refreshes the state via a GET request. + if job.state == 'DONE': + if job.error_result: + raise RuntimeError(job.error_result) + return + time.sleep(1) + + def delete_table(dataset_name, table_name, project=None): """Deletes a table in a given dataset. @@ -155,11 +228,22 @@ def delete_table(dataset_name, table_name, project=None): 'list-tables', help=list_tables.__doc__) list_tables_parser.add_argument('dataset_name') + create_table_parser = subparsers.add_parser( + 'create-table', help=create_table.__doc__) + create_table_parser.add_argument('dataset_name') + create_table_parser.add_argument('table_name') + list_rows_parser = subparsers.add_parser( 'list-rows', help=list_rows.__doc__) list_rows_parser.add_argument('dataset_name') list_rows_parser.add_argument('table_name') + copy_table_parser = subparsers.add_parser( + 'copy-table', help=copy_table.__doc__) + copy_table_parser.add_argument('dataset_name') + copy_table_parser.add_argument('table_name') + copy_table_parser.add_argument('new_table_name') + delete_table_parser = subparsers.add_parser( 'delete-table', help=delete_table.__doc__) delete_table_parser.add_argument('dataset_name') @@ -171,7 +255,11 @@ def delete_table(dataset_name, table_name, project=None): list_datasets(args.project) elif args.command == 'list-tables': list_tables(args.dataset_name, args.project) + elif args.command == 'create-table': + create_table(args.dataset_name, args.table_name, args.project) elif args.command == 'list-rows': list_rows(args.dataset_name, args.table_name, args.project) + elif args.command == 'copy-table': + copy_table(args.dataset_name, args.table_name, args.new_table_name) elif args.command == 'delete-table': delete_table(args.dataset_name, args.table_name, args.project) diff --git a/packages/google-cloud-bigquery/samples/snippets/snippets_test.py b/packages/google-cloud-bigquery/samples/snippets/snippets_test.py index ed2f47639fc3..372cbc834bf6 100644 --- a/packages/google-cloud-bigquery/samples/snippets/snippets_test.py +++ b/packages/google-cloud-bigquery/samples/snippets/snippets_test.py @@ -19,7 +19,7 @@ DATASET_ID = 'test_dataset' -TABLE_ID = 'test_import_table' +TABLE_ID = 'test_table' @pytest.mark.xfail( @@ -62,7 +62,42 @@ def test_list_rows(capsys): assert 'Age' in out -def test_delete_table(capsys): +@pytest.fixture +def temporary_table(): + """Fixture that returns a factory for tables that do not yet exist and + will be automatically deleted after the test.""" + bigquery_client = bigquery.Client() + dataset = bigquery_client.dataset(DATASET_ID) + tables = [] + + def factory(table_name): + new_table = dataset.table(table_name) + if new_table.exists(): + new_table.delete() + tables.append(new_table) + return new_table + + yield factory + + for table in tables: + if table.exists(): + table.delete() + + +def test_create_table(temporary_table): + new_table = temporary_table('test_create_table') + snippets.create_table(DATASET_ID, new_table.name) + assert new_table.exists() + + +@pytest.mark.slow +def test_copy_table(temporary_table): + new_table = temporary_table('test_copy_table') + snippets.copy_table(DATASET_ID, TABLE_ID, new_table.name) + assert new_table.exists() + + +def test_delete_table(): # Create a table to delete bigquery_client = bigquery.Client() dataset = bigquery_client.dataset(DATASET_ID) From 296b4841fa7a0db068797708073f63bed1b05cf1 Mon Sep 17 00:00:00 2001 From: DPE bot Date: Fri, 23 Sep 2016 09:48:46 -0700 Subject: [PATCH 0803/2016] Auto-update dependencies. [(#540)](https://github.com/GoogleCloudPlatform/python-docs-samples/issues/540) --- .../google-cloud-bigquery/samples/snippets/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/samples/snippets/requirements.txt b/packages/google-cloud-bigquery/samples/snippets/requirements.txt index 2beeafe63a8a..dfb42aaaaaf2 100644 --- a/packages/google-cloud-bigquery/samples/snippets/requirements.txt +++ b/packages/google-cloud-bigquery/samples/snippets/requirements.txt @@ -1 +1 @@ -gcloud==0.18.1 +gcloud==0.18.2 From cbdd098f1f48a1c53e77977d839275a8b5751296 Mon Sep 17 00:00:00 2001 From: DPE bot Date: Mon, 26 Sep 2016 11:34:45 -0700 Subject: [PATCH 0804/2016] Auto-update dependencies. [(#542)](https://github.com/GoogleCloudPlatform/python-docs-samples/issues/542) --- .../google-cloud-bigquery/samples/snippets/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/samples/snippets/requirements.txt b/packages/google-cloud-bigquery/samples/snippets/requirements.txt index dfb42aaaaaf2..97a207d3aad0 100644 --- a/packages/google-cloud-bigquery/samples/snippets/requirements.txt +++ b/packages/google-cloud-bigquery/samples/snippets/requirements.txt @@ -1 +1 @@ -gcloud==0.18.2 +gcloud==0.18.3 From d26fcf4b32f757a7b9696661a2541b565bd548b1 Mon Sep 17 00:00:00 2001 From: Jon Wayne Parrott Date: Thu, 29 Sep 2016 20:51:47 -0700 Subject: [PATCH 0805/2016] Move to google-cloud [(#544)](https://github.com/GoogleCloudPlatform/python-docs-samples/issues/544) --- .../google-cloud-bigquery/samples/snippets/async_query.py | 2 +- .../samples/snippets/export_data_to_gcs.py | 2 +- .../samples/snippets/load_data_from_file.py | 2 +- .../samples/snippets/load_data_from_gcs.py | 2 +- .../samples/snippets/requirements.txt | 2 +- .../google-cloud-bigquery/samples/snippets/snippets.py | 8 ++++---- .../samples/snippets/snippets_test.py | 2 +- .../google-cloud-bigquery/samples/snippets/stream_data.py | 2 +- .../google-cloud-bigquery/samples/snippets/sync_query.py | 2 +- 9 files changed, 12 insertions(+), 12 deletions(-) diff --git a/packages/google-cloud-bigquery/samples/snippets/async_query.py b/packages/google-cloud-bigquery/samples/snippets/async_query.py index 0ca324240893..37192d156599 100755 --- a/packages/google-cloud-bigquery/samples/snippets/async_query.py +++ b/packages/google-cloud-bigquery/samples/snippets/async_query.py @@ -27,7 +27,7 @@ import time import uuid -from gcloud import bigquery +from google.cloud import bigquery def async_query(query): diff --git a/packages/google-cloud-bigquery/samples/snippets/export_data_to_gcs.py b/packages/google-cloud-bigquery/samples/snippets/export_data_to_gcs.py index c9771ea1b0d5..e9037ee01580 100644 --- a/packages/google-cloud-bigquery/samples/snippets/export_data_to_gcs.py +++ b/packages/google-cloud-bigquery/samples/snippets/export_data_to_gcs.py @@ -29,7 +29,7 @@ import time import uuid -from gcloud import bigquery +from google.cloud import bigquery def export_data_to_gcs(dataset_name, table_name, destination): diff --git a/packages/google-cloud-bigquery/samples/snippets/load_data_from_file.py b/packages/google-cloud-bigquery/samples/snippets/load_data_from_file.py index cbb015347352..b4f851f1adeb 100644 --- a/packages/google-cloud-bigquery/samples/snippets/load_data_from_file.py +++ b/packages/google-cloud-bigquery/samples/snippets/load_data_from_file.py @@ -27,7 +27,7 @@ import argparse import time -from gcloud import bigquery +from google.cloud import bigquery def load_data_from_file(dataset_name, table_name, source_file_name): diff --git a/packages/google-cloud-bigquery/samples/snippets/load_data_from_gcs.py b/packages/google-cloud-bigquery/samples/snippets/load_data_from_gcs.py index 1a577be649ca..4aa435fad007 100644 --- a/packages/google-cloud-bigquery/samples/snippets/load_data_from_gcs.py +++ b/packages/google-cloud-bigquery/samples/snippets/load_data_from_gcs.py @@ -29,7 +29,7 @@ import time import uuid -from gcloud import bigquery +from google.cloud import bigquery def load_data_from_gcs(dataset_name, table_name, source): diff --git a/packages/google-cloud-bigquery/samples/snippets/requirements.txt b/packages/google-cloud-bigquery/samples/snippets/requirements.txt index 97a207d3aad0..11a303264935 100644 --- a/packages/google-cloud-bigquery/samples/snippets/requirements.txt +++ b/packages/google-cloud-bigquery/samples/snippets/requirements.txt @@ -1 +1 @@ -gcloud==0.18.3 +google-cloud-bigquery==0.20.0 diff --git a/packages/google-cloud-bigquery/samples/snippets/snippets.py b/packages/google-cloud-bigquery/samples/snippets/snippets.py index d201a84cba80..0e1f5d4b5a87 100644 --- a/packages/google-cloud-bigquery/samples/snippets/snippets.py +++ b/packages/google-cloud-bigquery/samples/snippets/snippets.py @@ -28,8 +28,8 @@ import time import uuid -from gcloud import bigquery -import gcloud.bigquery.job +from google.cloud import bigquery +import google.cloud.bigquery.job def list_projects(): @@ -145,7 +145,7 @@ def list_rows(dataset_name, table_name, project=None): break # Use format to create a simple table. - format_string = '{:<16} ' * len(table.schema) + format_string = '{!s:<16} ' * len(table.schema) # Print schema field names field_names = [field.name for field in table.schema] @@ -177,7 +177,7 @@ def copy_table(dataset_name, table_name, new_table_name, project=None): # Create the table if it doesn't exist. job.create_disposition = ( - gcloud.bigquery.job.CreateDisposition.CREATE_IF_NEEDED) + google.cloud.bigquery.job.CreateDisposition.CREATE_IF_NEEDED) # Start the job. job.begin() diff --git a/packages/google-cloud-bigquery/samples/snippets/snippets_test.py b/packages/google-cloud-bigquery/samples/snippets/snippets_test.py index 372cbc834bf6..35f79af7b57b 100644 --- a/packages/google-cloud-bigquery/samples/snippets/snippets_test.py +++ b/packages/google-cloud-bigquery/samples/snippets/snippets_test.py @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -from gcloud import bigquery +from google.cloud import bigquery import pytest import snippets diff --git a/packages/google-cloud-bigquery/samples/snippets/stream_data.py b/packages/google-cloud-bigquery/samples/snippets/stream_data.py index 5df6be114446..a90d432bf3e4 100644 --- a/packages/google-cloud-bigquery/samples/snippets/stream_data.py +++ b/packages/google-cloud-bigquery/samples/snippets/stream_data.py @@ -29,7 +29,7 @@ import json from pprint import pprint -from gcloud import bigquery +from google.cloud import bigquery def stream_data(dataset_name, table_name, json_data): diff --git a/packages/google-cloud-bigquery/samples/snippets/sync_query.py b/packages/google-cloud-bigquery/samples/snippets/sync_query.py index 59007b537833..f21270ed0706 100755 --- a/packages/google-cloud-bigquery/samples/snippets/sync_query.py +++ b/packages/google-cloud-bigquery/samples/snippets/sync_query.py @@ -26,7 +26,7 @@ import argparse # [START sync_query] -from gcloud import bigquery +from google.cloud import bigquery def sync_query(query): From a6abfb753711f31e6a573f6253a5f0fdcd205a42 Mon Sep 17 00:00:00 2001 From: Jason Dobry Date: Wed, 5 Oct 2016 09:56:04 -0700 Subject: [PATCH 0806/2016] Add new "quickstart" samples [(#547)](https://github.com/GoogleCloudPlatform/python-docs-samples/issues/547) --- .../samples/snippets/quickstart.py | 40 +++++++++++++++++++ 1 file changed, 40 insertions(+) create mode 100644 packages/google-cloud-bigquery/samples/snippets/quickstart.py diff --git a/packages/google-cloud-bigquery/samples/snippets/quickstart.py b/packages/google-cloud-bigquery/samples/snippets/quickstart.py new file mode 100644 index 000000000000..2c9923f6eb80 --- /dev/null +++ b/packages/google-cloud-bigquery/samples/snippets/quickstart.py @@ -0,0 +1,40 @@ +#!/usr/bin/env python + +# Copyright 2016 Google Inc. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +def run_quickstart(): + # [START bigquery_quickstart] + # Imports the Google Cloud client library + from google.cloud import bigquery + + # Instantiates a client + bigquery_client = bigquery.Client() + + # The name for the new dataset + dataset_name = 'my_new_dataset' + + # Prepares the new dataset + dataset = bigquery_client.dataset(dataset_name) + + # Creates the new dataset + dataset.create() + + print('Dataset {} created.'.format(dataset.name)) + # [END bigquery_quickstart] + + +if __name__ == '__main__': + run_quickstart() From 7ad0dc1305acced7d9c0683a48b18b81170bc742 Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Wed, 5 Oct 2016 11:25:06 -0700 Subject: [PATCH 0807/2016] BigQuery - use new QueryJob.results() method. [(#555)](https://github.com/GoogleCloudPlatform/python-docs-samples/issues/555) This method was added in https://github.com/GoogleCloudPlatform/google-cloud-python/issues/2083. I can remove my hack now. --- .../samples/snippets/async_query.py | 30 +++++++------------ 1 file changed, 11 insertions(+), 19 deletions(-) diff --git a/packages/google-cloud-bigquery/samples/snippets/async_query.py b/packages/google-cloud-bigquery/samples/snippets/async_query.py index 37192d156599..2531c61528df 100755 --- a/packages/google-cloud-bigquery/samples/snippets/async_query.py +++ b/packages/google-cloud-bigquery/samples/snippets/async_query.py @@ -30,6 +30,16 @@ from google.cloud import bigquery +def wait_for_job(job): + while True: + job.reload() # Refreshes the state via a GET request. + if job.state == 'DONE': + if job.error_result: + raise RuntimeError(job.error_result) + return + time.sleep(1) + + def async_query(query): client = bigquery.Client() query_job = client.run_async_query(str(uuid.uuid4()), query) @@ -38,16 +48,8 @@ def async_query(query): wait_for_job(query_job) - # Manually construct the QueryResults. - # TODO: The client library will provide a helper method that does this. - # https://github.com/GoogleCloudPlatform/gcloud-python/issues/2083 - query_results = bigquery.query.QueryResults('', client) - query_results._properties['jobReference'] = { - 'jobId': query_job.name, - 'projectId': query_job.project - } - # Drain the query results by requesting a page at a time. + query_results = query_job.results() page_token = None while True: @@ -62,16 +64,6 @@ def async_query(query): break -def wait_for_job(job): - while True: - job.reload() # Refreshes the state via a GET request. - if job.state == 'DONE': - if job.error_result: - raise RuntimeError(job.error_result) - return - time.sleep(1) - - if __name__ == '__main__': parser = argparse.ArgumentParser( description=__doc__, From 8fc67f7994b7ea9569c1761a22ea84b1d357cfb3 Mon Sep 17 00:00:00 2001 From: Jon Wayne Parrott Date: Wed, 12 Oct 2016 10:48:57 -0700 Subject: [PATCH 0808/2016] Quickstart tests [(#569)](https://github.com/GoogleCloudPlatform/python-docs-samples/issues/569) * Add tests for quickstarts * Update secrets --- .../samples/snippets/quickstart_test.py | 45 +++++++++++++++++++ 1 file changed, 45 insertions(+) create mode 100644 packages/google-cloud-bigquery/samples/snippets/quickstart_test.py diff --git a/packages/google-cloud-bigquery/samples/snippets/quickstart_test.py b/packages/google-cloud-bigquery/samples/snippets/quickstart_test.py new file mode 100644 index 000000000000..f5842960ce6e --- /dev/null +++ b/packages/google-cloud-bigquery/samples/snippets/quickstart_test.py @@ -0,0 +1,45 @@ +# Copyright 2016 Google Inc. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from google.cloud import bigquery +import pytest + +import quickstart + + +# Must match the dataset listed in quickstart.py (there's no easy way to +# extract this). +DATASET_ID = 'my_new_dataset' + + +@pytest.fixture +def temporary_dataset(): + """Fixture that ensures the test dataset does not exist before or + after a test.""" + bigquery_client = bigquery.Client() + dataset = bigquery_client.dataset(DATASET_ID) + + if dataset.exists(): + dataset.delete() + + yield + + if dataset.exists(): + dataset.delete() + + +def test_quickstart(capsys, temporary_dataset): + quickstart.run_quickstart() + out, _ = capsys.readouterr() + assert DATASET_ID in out From 2fe77eec7931e4577e5fc02d14802cd0dbec822b Mon Sep 17 00:00:00 2001 From: Jon Wayne Parrott Date: Mon, 24 Oct 2016 11:03:17 -0700 Subject: [PATCH 0809/2016] Generate readmes for most service samples [(#599)](https://github.com/GoogleCloudPlatform/python-docs-samples/issues/599) --- .../samples/snippets/README.md | 5 - .../samples/snippets/README.rst | 332 ++++++++++++++++++ .../samples/snippets/README.rst.in | 43 +++ .../samples/snippets/async_query.py | 2 +- .../samples/snippets/sync_query.py | 2 +- 5 files changed, 377 insertions(+), 7 deletions(-) delete mode 100644 packages/google-cloud-bigquery/samples/snippets/README.md create mode 100644 packages/google-cloud-bigquery/samples/snippets/README.rst create mode 100644 packages/google-cloud-bigquery/samples/snippets/README.rst.in diff --git a/packages/google-cloud-bigquery/samples/snippets/README.md b/packages/google-cloud-bigquery/samples/snippets/README.md deleted file mode 100644 index 9c1f9b9fbe5a..000000000000 --- a/packages/google-cloud-bigquery/samples/snippets/README.md +++ /dev/null @@ -1,5 +0,0 @@ -# BigQuery Google Cloud Client Library Samples - - - - diff --git a/packages/google-cloud-bigquery/samples/snippets/README.rst b/packages/google-cloud-bigquery/samples/snippets/README.rst new file mode 100644 index 000000000000..3fbb1716f3f1 --- /dev/null +++ b/packages/google-cloud-bigquery/samples/snippets/README.rst @@ -0,0 +1,332 @@ +.. This file is automatically generated. Do not edit this file directly. + +Google BigQuery Python Samples +=============================================================================== + +This directory contains samples for Google BigQuery. `Google BigQuery`_ is Google's fully managed, petabyte scale, low cost analytics data warehouse. BigQuery is NoOps—there is no infrastructure to manage and you don't need a database administrator—so you can focus on analyzing data to find meaningful insights, use familiar SQL, and take advantage of our pay-as-you-go model. + + + + +.. _Google BigQuery: https://cloud.google.com/bigquery/docs + +Setup +------------------------------------------------------------------------------- + + +Authentication +++++++++++++++ + +Authentication is typically done through `Application Default Credentials`_, +which means you do not have to change the code to authenticate as long as +your environment has credentials. You have a few options for setting up +authentication: + +#. When running locally, use the `Google Cloud SDK`_ + + .. code-block:: bash + + gcloud beta auth application-default login + + +#. When running on App Engine or Compute Engine, credentials are already + set-up. However, you may need to configure your Compute Engine instance + with `additional scopes`_. + +#. You can create a `Service Account key file`_. This file can be used to + authenticate to Google Cloud Platform services from any environment. To use + the file, set the ``GOOGLE_APPLICATION_CREDENTIALS`` environment variable to + the path to the key file, for example: + + .. code-block:: bash + + export GOOGLE_APPLICATION_CREDENTIALS=/path/to/service_account.json + +.. _Application Default Credentials: https://cloud.google.com/docs/authentication#getting_credentials_for_server-centric_flow +.. _additional scopes: https://cloud.google.com/compute/docs/authentication#using +.. _Service Account key file: https://developers.google.com/identity/protocols/OAuth2ServiceAccount#creatinganaccount + +Install Dependencies +++++++++++++++++++++ + +#. Install `pip`_ and `virtualenv`_ if you do not already have them. + +#. Create a virtualenv. Samples are compatible with Python 2.7 and 3.4+. + + .. code-block:: bash + + $ virtualenv env + $ source env/bin/activate + +#. Install the dependencies needed to run the samples. + + .. code-block:: bash + + $ pip install -r requirements.txt + +.. _pip: https://pip.pypa.io/ +.. _virtualenv: https://virtualenv.pypa.io/ + +Samples +------------------------------------------------------------------------------- + +Quickstart ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + + + +To run this sample: + +.. code-block:: bash + + $ python quickstart.py + + +Sync query ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + + + +To run this sample: + +.. code-block:: bash + + $ python sync_query.py + + usage: sync_query.py [-h] query + + Command-line application to perform synchronous queries in BigQuery. + + For more information, see the README.md under /bigquery. + + Example invocation: + $ python sync_query.py \ + 'SELECT corpus FROM `publicdata.samples.shakespeare` GROUP BY corpus' + + positional arguments: + query BigQuery SQL Query. + + optional arguments: + -h, --help show this help message and exit + + +Async query ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + + + +To run this sample: + +.. code-block:: bash + + $ python async_query.py + + usage: async_query.py [-h] query + + Command-line application to perform asynchronous queries in BigQuery. + + For more information, see the README.md under /bigquery. + + Example invocation: + $ python async_query.py 'SELECT corpus FROM `publicdata.samples.shakespeare` GROUP BY corpus' + + positional arguments: + query BigQuery SQL Query. + + optional arguments: + -h, --help show this help message and exit + + +Snippets ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + + + +To run this sample: + +.. code-block:: bash + + $ python snippets.py + + usage: snippets.py [-h] [--project PROJECT] + {list-datasets,list-tables,create-table,list-rows,copy-table,delete-table} + ... + + Samples that demonstrate basic operations in the BigQuery API. + + For more information, see the README.md under /bigquery. + + Example invocation: + $ python snippets.py list-datasets + + The dataset and table should already exist. + + positional arguments: + {list-datasets,list-tables,create-table,list-rows,copy-table,delete-table} + list-datasets Lists all datasets in a given project. If no project + is specified, then the currently active project is + used + list-tables Lists all of the tables in a given dataset. If no + project is specified, then the currently active + project is used. + create-table Creates a simple table in the given dataset. If no + project is specified, then the currently active + project is used. + list-rows Prints rows in the given table. Will print 25 rows at + most for brevity as tables can contain large amounts + of rows. If no project is specified, then the + currently active project is used. + copy-table Copies a table. If no project is specified, then the + currently active project is used. + delete-table Deletes a table in a given dataset. If no project is + specified, then the currently active project is used. + + optional arguments: + -h, --help show this help message and exit + --project PROJECT + + +Load data from a file ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + + + +To run this sample: + +.. code-block:: bash + + $ python load_data_from_file.py + + usage: load_data_from_file.py [-h] dataset_name table_name source_file_name + + Loads data into BigQuery from a local file. + + For more information, see the README.md under /bigquery. + + Example invocation: + $ python load_data_from_file.py example_dataset example_table example-data.csv + + The dataset and table should already exist. + + positional arguments: + dataset_name + table_name + source_file_name Path to a .csv file to upload. + + optional arguments: + -h, --help show this help message and exit + + +Load data from Cloud Storage ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + + + +To run this sample: + +.. code-block:: bash + + $ python load_data_from_gcs.py + + usage: load_data_from_gcs.py [-h] dataset_name table_name source + + Loads data into BigQuery from an object in Google Cloud Storage. + + For more information, see the README.md under /bigquery. + + Example invocation: + $ python load_data_from_gcs.py example_dataset example_table gs://example-bucket/example-data.csv + + The dataset and table should already exist. + + positional arguments: + dataset_name + table_name + source The Google Cloud Storage object to load. Must be in the format + gs://bucket_name/object_name + + optional arguments: + -h, --help show this help message and exit + + +Load streaming data ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + + + +To run this sample: + +.. code-block:: bash + + $ python stream_data.py + + usage: stream_data.py [-h] dataset_name table_name json_data + + Loads a single row of data directly into BigQuery. + + For more information, see the README.md under /bigquery. + + Example invocation: + $ python stream_data.py example_dataset example_table '["Gandalf", 2000]' + + The dataset and table should already exist. + + positional arguments: + dataset_name + table_name + json_data The row to load into BigQuery as an array in JSON format. + + optional arguments: + -h, --help show this help message and exit + + +Export data to Cloud Storage ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + + + +To run this sample: + +.. code-block:: bash + + $ python export_data_to_gcs.py + + usage: export_data_to_gcs.py [-h] dataset_name table_name destination + + Exports data from BigQuery to an object in Google Cloud Storage. + + For more information, see the README.md under /bigquery. + + Example invocation: + $ python export_data_to_gcs.py example_dataset example_table gs://example-bucket/example-data.csv + + The dataset and table should already exist. + + positional arguments: + dataset_name + table_name + destination The desintation Google Cloud Storage object.Must be in the + format gs://bucket_name/object_name + + optional arguments: + -h, --help show this help message and exit + + + + +The client library +------------------------------------------------------------------------------- + +This sample uses the `Google Cloud Client Library for Python`_. +You can read the documentation for more details on API usage and use GitHub +to `browse the source`_ and `report issues`_. + +.. Google Cloud Client Library for Python: + https://googlecloudplatform.github.io/google-cloud-python/ +.. browse the source: + https://github.com/GoogleCloudPlatform/google-cloud-python +.. report issues: + https://github.com/GoogleCloudPlatform/google-cloud-python/issues + + +.. _Google Cloud SDK: https://cloud.google.com/sdk/ \ No newline at end of file diff --git a/packages/google-cloud-bigquery/samples/snippets/README.rst.in b/packages/google-cloud-bigquery/samples/snippets/README.rst.in new file mode 100644 index 000000000000..49143f062e6c --- /dev/null +++ b/packages/google-cloud-bigquery/samples/snippets/README.rst.in @@ -0,0 +1,43 @@ +# This file is used to generate README.rst + +product: + name: Google BigQuery + short_name: BigQuery + url: https://cloud.google.com/bigquery/docs + description: > + `Google BigQuery`_ is Google's fully managed, petabyte scale, low cost + analytics data warehouse. BigQuery is NoOps—there is no infrastructure to + manage and you don't need a database administrator—so you can focus on + analyzing data to find meaningful insights, use familiar SQL, and take + advantage of our pay-as-you-go model. + +setup: +- auth +- install_deps + +samples: +- name: Quickstart + file: quickstart.py +- name: Sync query + file: sync_query.py + show_help: true +- name: Async query + file: async_query.py + show_help: true +- name: Snippets + file: snippets.py + show_help: true +- name: Load data from a file + file: load_data_from_file.py + show_help: true +- name: Load data from Cloud Storage + file: load_data_from_gcs.py + show_help: true +- name: Load streaming data + file: stream_data.py + show_help: true +- name: Export data to Cloud Storage + file: export_data_to_gcs.py + show_help: true + +cloud_client_library: true diff --git a/packages/google-cloud-bigquery/samples/snippets/async_query.py b/packages/google-cloud-bigquery/samples/snippets/async_query.py index 2531c61528df..aa3397351175 100755 --- a/packages/google-cloud-bigquery/samples/snippets/async_query.py +++ b/packages/google-cloud-bigquery/samples/snippets/async_query.py @@ -19,7 +19,7 @@ For more information, see the README.md under /bigquery. Example invocation: - $ python async_query.py \ + $ python async_query.py \\ 'SELECT corpus FROM `publicdata.samples.shakespeare` GROUP BY corpus' """ diff --git a/packages/google-cloud-bigquery/samples/snippets/sync_query.py b/packages/google-cloud-bigquery/samples/snippets/sync_query.py index f21270ed0706..37c8fea8a653 100755 --- a/packages/google-cloud-bigquery/samples/snippets/sync_query.py +++ b/packages/google-cloud-bigquery/samples/snippets/sync_query.py @@ -19,7 +19,7 @@ For more information, see the README.md under /bigquery. Example invocation: - $ python sync_query.py \ + $ python sync_query.py \\ 'SELECT corpus FROM `publicdata.samples.shakespeare` GROUP BY corpus' """ From 8eb92396483b9f10112b85b6bf6b7c4521da44fc Mon Sep 17 00:00:00 2001 From: Jon Wayne Parrott Date: Tue, 25 Oct 2016 10:54:45 -0700 Subject: [PATCH 0810/2016] Generate most non-appengine readmes Change-Id: I3779282126cdd05b047194d356932b9995484115 --- packages/google-cloud-bigquery/samples/snippets/README.rst | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/samples/snippets/README.rst b/packages/google-cloud-bigquery/samples/snippets/README.rst index 3fbb1716f3f1..be037a3b884e 100644 --- a/packages/google-cloud-bigquery/samples/snippets/README.rst +++ b/packages/google-cloud-bigquery/samples/snippets/README.rst @@ -128,7 +128,8 @@ To run this sample: For more information, see the README.md under /bigquery. Example invocation: - $ python async_query.py 'SELECT corpus FROM `publicdata.samples.shakespeare` GROUP BY corpus' + $ python async_query.py \ + 'SELECT corpus FROM `publicdata.samples.shakespeare` GROUP BY corpus' positional arguments: query BigQuery SQL Query. From e5d5b2ec1e93522233433fa1c0019e56ae794916 Mon Sep 17 00:00:00 2001 From: Jon Wayne Parrott Date: Thu, 27 Oct 2016 12:16:03 -0700 Subject: [PATCH 0811/2016] Add missing bigquery samples [(#622)](https://github.com/GoogleCloudPlatform/python-docs-samples/issues/622) --- .../samples/snippets/snippets.py | 46 +++++++++++++++++-- .../samples/snippets/snippets_test.py | 26 +++++++++-- 2 files changed, 65 insertions(+), 7 deletions(-) diff --git a/packages/google-cloud-bigquery/samples/snippets/snippets.py b/packages/google-cloud-bigquery/samples/snippets/snippets.py index 0e1f5d4b5a87..f7ab33731058 100644 --- a/packages/google-cloud-bigquery/samples/snippets/snippets.py +++ b/packages/google-cloud-bigquery/samples/snippets/snippets.py @@ -33,14 +33,27 @@ def list_projects(): - raise NotImplementedError( - 'https://github.com/GoogleCloudPlatform/gcloud-python/issues/2143') + bigquery_client = bigquery.Client() + + projects = [] + page_token = None + + while True: + results, page_token = bigquery_client.list_projects( + page_token=page_token) + projects.extend(results) + + if not page_token: + break + + for project in projects: + print(project.project_id) def list_datasets(project=None): """Lists all datasets in a given project. - If no project is specified, then the currently active project is used + If no project is specified, then the currently active project is used. """ bigquery_client = bigquery.Client(project=project) @@ -59,6 +72,20 @@ def list_datasets(project=None): print(dataset.name) +def create_dataset(dataset_name, project=None): + """Craetes a dataset in a given project. + + If no project is specified, then the currently active project is used. + """ + bigquery_client = bigquery.Client(project=project) + + dataset = bigquery_client.dataset(dataset_name) + + dataset.create() + + print('Created dataset {}.'.format(dataset_name)) + + def list_tables(dataset_name, project=None): """Lists all of the tables in a given dataset. @@ -221,9 +248,16 @@ def delete_table(dataset_name, table_name, project=None): subparsers = parser.add_subparsers(dest='command') + list_projects_parser = subparsers.add_parser( + 'list-projects', help=list_projects.__doc__) + list_datasets_parser = subparsers.add_parser( 'list-datasets', help=list_datasets.__doc__) + create_dataset_parser = subparsers.add_parser( + 'list-datasets', help=list_datasets.__doc__) + create_dataset_parser.add_argument('dataset_name') + list_tables_parser = subparsers.add_parser( 'list-tables', help=list_tables.__doc__) list_tables_parser.add_argument('dataset_name') @@ -251,8 +285,12 @@ def delete_table(dataset_name, table_name, project=None): args = parser.parse_args() - if args.command == 'list-datasets': + if args.command == 'list-projects': + list_projects() + elif args.command == 'list-datasets': list_datasets(args.project) + elif args.command == 'create-dataset': + create_dataset(args.dataset_name, args.project) elif args.command == 'list-tables': list_tables(args.dataset_name, args.project) elif args.command == 'create-table': diff --git a/packages/google-cloud-bigquery/samples/snippets/snippets_test.py b/packages/google-cloud-bigquery/samples/snippets/snippets_test.py index 35f79af7b57b..af368d9a4a2a 100644 --- a/packages/google-cloud-bigquery/samples/snippets/snippets_test.py +++ b/packages/google-cloud-bigquery/samples/snippets/snippets_test.py @@ -22,9 +22,6 @@ TABLE_ID = 'test_table' -@pytest.mark.xfail( - strict=True, - reason='https://github.com/GoogleCloudPlatform/gcloud-python/issues/2143') def test_list_projects(): snippets.list_projects() # No need to check the ouput, lack of exception is enough. @@ -39,6 +36,29 @@ def test_list_datasets(capsys): assert DATASET_ID in out +@pytest.fixture +def cleanup_dataset(): + dataset_name = 'test_temporary_dataset' + bigquery_client = bigquery.Client() + dataset = bigquery_client.dataset(dataset_name) + + if dataset.exists(): + dataset.delete() + + yield dataset_name + + if dataset.exists(): + dataset.delete() + + +def test_create_dataset(capsys, cleanup_dataset): + snippets.create_dataset(cleanup_dataset) + + out, _ = capsys.readouterr() + + assert cleanup_dataset in out + + def test_list_tables(capsys): # Requires the dataset and table to have been created in the test project. snippets.list_tables(DATASET_ID) From 64ba3f2420dbfb3c642a068fcfcd86f986d6a099 Mon Sep 17 00:00:00 2001 From: Jon Wayne Parrott Date: Tue, 15 Nov 2016 14:58:27 -0800 Subject: [PATCH 0812/2016] Update samples to support latest Google Cloud Python [(#656)](https://github.com/GoogleCloudPlatform/python-docs-samples/issues/656) --- .../samples/snippets/requirements.txt | 2 +- .../samples/snippets/snippets.py | 55 +++---------------- 2 files changed, 8 insertions(+), 49 deletions(-) diff --git a/packages/google-cloud-bigquery/samples/snippets/requirements.txt b/packages/google-cloud-bigquery/samples/snippets/requirements.txt index 11a303264935..d9f269ca7b63 100644 --- a/packages/google-cloud-bigquery/samples/snippets/requirements.txt +++ b/packages/google-cloud-bigquery/samples/snippets/requirements.txt @@ -1 +1 @@ -google-cloud-bigquery==0.20.0 +google-cloud-bigquery==0.21.0 diff --git a/packages/google-cloud-bigquery/samples/snippets/snippets.py b/packages/google-cloud-bigquery/samples/snippets/snippets.py index f7ab33731058..b02ac8ce61fb 100644 --- a/packages/google-cloud-bigquery/samples/snippets/snippets.py +++ b/packages/google-cloud-bigquery/samples/snippets/snippets.py @@ -35,18 +35,7 @@ def list_projects(): bigquery_client = bigquery.Client() - projects = [] - page_token = None - - while True: - results, page_token = bigquery_client.list_projects( - page_token=page_token) - projects.extend(results) - - if not page_token: - break - - for project in projects: + for project in bigquery_client.list_projects(): print(project.project_id) @@ -57,18 +46,7 @@ def list_datasets(project=None): """ bigquery_client = bigquery.Client(project=project) - datasets = [] - page_token = None - - while True: - results, page_token = bigquery_client.list_datasets( - page_token=page_token) - datasets.extend(results) - - if not page_token: - break - - for dataset in datasets: + for dataset in bigquery_client.list_datasets(): print(dataset.name) @@ -98,17 +76,7 @@ def list_tables(dataset_name, project=None): print('Dataset {} does not exist.'.format(dataset_name)) return - tables = [] - page_token = None - - while True: - results, page_token = dataset.list_tables(page_token=page_token) - tables.extend(results) - - if not page_token: - break - - for table in tables: + for table in dataset.list_tables(): print(table.name) @@ -157,19 +125,10 @@ def list_rows(dataset_name, table_name, project=None): # Reload the table so that the schema is available. table.reload() - rows = [] - page_token = None - - # Load at most 25 results. You can change this to `while True` and change - # the max_results argument to load more rows from BigQuery, but note - # that this can take some time. It's preferred to use a query. - while len(rows) < 25: - results, total_rows, page_token = table.fetch_data( - max_results=25, page_token=page_token) - rows.extend(results) - - if not page_token: - break + # Load at most 25 results. You can change the max_results argument to load + # more rows from BigQuery, but note that this can take some time. It's + # preferred to use a query. + rows = list(table.fetch_data(max_results=25)) # Use format to create a simple table. format_string = '{!s:<16} ' * len(table.schema) From 70118da97b71a9194114f3643d5c917155006caa Mon Sep 17 00:00:00 2001 From: Jon Wayne Parrott Date: Tue, 15 Nov 2016 15:05:13 -0800 Subject: [PATCH 0813/2016] Update readmes Change-Id: Ie385fd8105325c6f2754b737e0f11c84254bcb47 --- .../google-cloud-bigquery/samples/snippets/README.rst | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/packages/google-cloud-bigquery/samples/snippets/README.rst b/packages/google-cloud-bigquery/samples/snippets/README.rst index be037a3b884e..1824609ddb7c 100644 --- a/packages/google-cloud-bigquery/samples/snippets/README.rst +++ b/packages/google-cloud-bigquery/samples/snippets/README.rst @@ -150,7 +150,7 @@ To run this sample: $ python snippets.py usage: snippets.py [-h] [--project PROJECT] - {list-datasets,list-tables,create-table,list-rows,copy-table,delete-table} + {list-projects,list-datasets,list-tables,create-table,list-rows,copy-table,delete-table} ... Samples that demonstrate basic operations in the BigQuery API. @@ -163,10 +163,14 @@ To run this sample: The dataset and table should already exist. positional arguments: - {list-datasets,list-tables,create-table,list-rows,copy-table,delete-table} + {list-projects,list-datasets,list-tables,create-table,list-rows,copy-table,delete-table} + list-projects list-datasets Lists all datasets in a given project. If no project is specified, then the currently active project is - used + used. + list-datasets Lists all datasets in a given project. If no project + is specified, then the currently active project is + used. list-tables Lists all of the tables in a given dataset. If no project is specified, then the currently active project is used. From 14fbe2da6e94ec91b9861f2f7dbd62b2e05bfa91 Mon Sep 17 00:00:00 2001 From: Jon Wayne Parrott Date: Wed, 30 Nov 2016 09:49:50 -0800 Subject: [PATCH 0814/2016] Fix bigquery load from file sample Change-Id: I5bcb3d41bed33f638e5dba13da3335adfbdc2ead --- .../samples/snippets/load_data_from_file.py | 2 -- .../samples/snippets/load_data_from_file_test.py | 5 ----- 2 files changed, 7 deletions(-) diff --git a/packages/google-cloud-bigquery/samples/snippets/load_data_from_file.py b/packages/google-cloud-bigquery/samples/snippets/load_data_from_file.py index b4f851f1adeb..671752531fff 100644 --- a/packages/google-cloud-bigquery/samples/snippets/load_data_from_file.py +++ b/packages/google-cloud-bigquery/samples/snippets/load_data_from_file.py @@ -44,8 +44,6 @@ def load_data_from_file(dataset_name, table_name, source_file_name): job = table.upload_from_file( source_file, source_format='text/csv') - job.begin() - wait_for_job(job) print('Loaded {} rows into {}:{}.'.format( diff --git a/packages/google-cloud-bigquery/samples/snippets/load_data_from_file_test.py b/packages/google-cloud-bigquery/samples/snippets/load_data_from_file_test.py index eccefe038433..434bbd1d84bc 100644 --- a/packages/google-cloud-bigquery/samples/snippets/load_data_from_file_test.py +++ b/packages/google-cloud-bigquery/samples/snippets/load_data_from_file_test.py @@ -11,17 +11,12 @@ # See the License for the specific language governing permissions and # limitations under the License. -import pytest - import load_data_from_file DATASET_ID = 'test_dataset' TABLE_ID = 'test_import_table' -@pytest.mark.xfail( - strict=True, - reason='https://github.com/GoogleCloudPlatform/gcloud-python/issues/2133') def test_load_table(resource, capsys): data_path = resource('data.csv') From 4d159ec6ffbfaca2d2228778c685dc4b6193033c Mon Sep 17 00:00:00 2001 From: Jon Wayne Parrott Date: Wed, 30 Nov 2016 09:55:03 -0800 Subject: [PATCH 0815/2016] Fix lint Change-Id: If721ff085eb502b679a8decd3a39ac112425a114 --- .../samples/snippets/load_data_from_file.py | 1 + 1 file changed, 1 insertion(+) diff --git a/packages/google-cloud-bigquery/samples/snippets/load_data_from_file.py b/packages/google-cloud-bigquery/samples/snippets/load_data_from_file.py index 671752531fff..80018f3fc888 100644 --- a/packages/google-cloud-bigquery/samples/snippets/load_data_from_file.py +++ b/packages/google-cloud-bigquery/samples/snippets/load_data_from_file.py @@ -27,6 +27,7 @@ import argparse import time + from google.cloud import bigquery From 8ac186017beeb9478c9c25f8bdd043e9de6585ed Mon Sep 17 00:00:00 2001 From: Ryan Matsumoto Date: Wed, 30 Nov 2016 13:18:30 -0800 Subject: [PATCH 0816/2016] Adjusted error handling based on Googler feedback [(#693)](https://github.com/GoogleCloudPlatform/python-docs-samples/issues/693) --- .../samples/snippets/load_data_from_gcs.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/samples/snippets/load_data_from_gcs.py b/packages/google-cloud-bigquery/samples/snippets/load_data_from_gcs.py index 4aa435fad007..7c576e20ad67 100644 --- a/packages/google-cloud-bigquery/samples/snippets/load_data_from_gcs.py +++ b/packages/google-cloud-bigquery/samples/snippets/load_data_from_gcs.py @@ -54,7 +54,7 @@ def wait_for_job(job): job.reload() if job.state == 'DONE': if job.error_result: - raise RuntimeError(job.error_result) + raise RuntimeError(job.errors) return time.sleep(1) From e4b2f4d90dde4d48cf88a3f8546b4b002216828a Mon Sep 17 00:00:00 2001 From: Ryan Matsumoto Date: Wed, 30 Nov 2016 14:14:36 -0800 Subject: [PATCH 0817/2016] Changed error handling for all other big query samples as well [(#694)](https://github.com/GoogleCloudPlatform/python-docs-samples/issues/694) --- packages/google-cloud-bigquery/samples/snippets/async_query.py | 2 +- .../samples/snippets/export_data_to_gcs.py | 2 +- .../samples/snippets/load_data_from_file.py | 2 +- packages/google-cloud-bigquery/samples/snippets/snippets.py | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/packages/google-cloud-bigquery/samples/snippets/async_query.py b/packages/google-cloud-bigquery/samples/snippets/async_query.py index aa3397351175..f90c8f2558ef 100755 --- a/packages/google-cloud-bigquery/samples/snippets/async_query.py +++ b/packages/google-cloud-bigquery/samples/snippets/async_query.py @@ -35,7 +35,7 @@ def wait_for_job(job): job.reload() # Refreshes the state via a GET request. if job.state == 'DONE': if job.error_result: - raise RuntimeError(job.error_result) + raise RuntimeError(job.errors) return time.sleep(1) diff --git a/packages/google-cloud-bigquery/samples/snippets/export_data_to_gcs.py b/packages/google-cloud-bigquery/samples/snippets/export_data_to_gcs.py index e9037ee01580..3aee442c14b5 100644 --- a/packages/google-cloud-bigquery/samples/snippets/export_data_to_gcs.py +++ b/packages/google-cloud-bigquery/samples/snippets/export_data_to_gcs.py @@ -54,7 +54,7 @@ def wait_for_job(job): job.reload() if job.state == 'DONE': if job.error_result: - raise RuntimeError(job.error_result) + raise RuntimeError(job.errors) return time.sleep(1) diff --git a/packages/google-cloud-bigquery/samples/snippets/load_data_from_file.py b/packages/google-cloud-bigquery/samples/snippets/load_data_from_file.py index 80018f3fc888..0bbdd7ba7d80 100644 --- a/packages/google-cloud-bigquery/samples/snippets/load_data_from_file.py +++ b/packages/google-cloud-bigquery/samples/snippets/load_data_from_file.py @@ -56,7 +56,7 @@ def wait_for_job(job): job.reload() if job.state == 'DONE': if job.error_result: - raise RuntimeError(job.error_result) + raise RuntimeError(job.errors) return time.sleep(1) diff --git a/packages/google-cloud-bigquery/samples/snippets/snippets.py b/packages/google-cloud-bigquery/samples/snippets/snippets.py index b02ac8ce61fb..f4294123d9d4 100644 --- a/packages/google-cloud-bigquery/samples/snippets/snippets.py +++ b/packages/google-cloud-bigquery/samples/snippets/snippets.py @@ -180,7 +180,7 @@ def wait_for_job(job): job.reload() # Refreshes the state via a GET request. if job.state == 'DONE': if job.error_result: - raise RuntimeError(job.error_result) + raise RuntimeError(job.errors) return time.sleep(1) From dd04f9ed81b298052503b6479d9692fc348cc8de Mon Sep 17 00:00:00 2001 From: Jon Wayne Parrott Date: Tue, 6 Dec 2016 13:56:45 -0800 Subject: [PATCH 0818/2016] Add bigquery shakespeare example [(#604)](https://github.com/GoogleCloudPlatform/python-docs-samples/issues/604) --- .../samples/snippets/requirements.txt | 2 +- .../samples/snippets/simple_app.py | 59 +++++++++++++++++++ .../samples/snippets/simple_app_test.py | 21 +++++++ 3 files changed, 81 insertions(+), 1 deletion(-) create mode 100644 packages/google-cloud-bigquery/samples/snippets/simple_app.py create mode 100644 packages/google-cloud-bigquery/samples/snippets/simple_app_test.py diff --git a/packages/google-cloud-bigquery/samples/snippets/requirements.txt b/packages/google-cloud-bigquery/samples/snippets/requirements.txt index d9f269ca7b63..7523b0dacfb2 100644 --- a/packages/google-cloud-bigquery/samples/snippets/requirements.txt +++ b/packages/google-cloud-bigquery/samples/snippets/requirements.txt @@ -1 +1 @@ -google-cloud-bigquery==0.21.0 +google-cloud-bigquery==0.22.0 diff --git a/packages/google-cloud-bigquery/samples/snippets/simple_app.py b/packages/google-cloud-bigquery/samples/snippets/simple_app.py new file mode 100644 index 000000000000..7180c4fda62a --- /dev/null +++ b/packages/google-cloud-bigquery/samples/snippets/simple_app.py @@ -0,0 +1,59 @@ +#!/usr/bin/env python + +# Copyright 2016 Google Inc. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Simple application that performs a query with BigQuery.""" +# [START all] +# [START create_client] +from google.cloud import bigquery + + +def query_shakespeare(): + client = bigquery.Client() + # [END create_client] + # [START run_query] + query_results = client.run_sync_query(""" + SELECT + APPROX_TOP_COUNT(corpus, 10) as title, + COUNT(*) as unique_words + FROM `publicdata.samples.shakespeare`;""") + + # Use standard SQL syntax for queries. + # See: https://cloud.google.com/bigquery/sql-reference/ + query_results.use_legacy_sql = False + + query_results.run() + # [END run_query] + + # [START print_results] + # Drain the query results by requesting a page at a time. + page_token = None + + while True: + rows, total_rows, page_token = query_results.fetch_data( + max_results=10, + page_token=page_token) + + for row in rows: + print(row) + + if not page_token: + break + # [END print_results] + + +if __name__ == '__main__': + query_shakespeare() +# [END all] diff --git a/packages/google-cloud-bigquery/samples/snippets/simple_app_test.py b/packages/google-cloud-bigquery/samples/snippets/simple_app_test.py new file mode 100644 index 000000000000..3733bf6ef3fe --- /dev/null +++ b/packages/google-cloud-bigquery/samples/snippets/simple_app_test.py @@ -0,0 +1,21 @@ +# Copyright 2016 Google Inc. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import simple_app + + +def test_query_shakespeare(capsys): + simple_app.query_shakespeare() + out, _ = capsys.readouterr() + assert 'hamlet' in out From 34f41752b247a23f790dbc0e7811daa01e6902ff Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Tue, 6 Dec 2016 15:01:30 -0800 Subject: [PATCH 0819/2016] BigQuery parameterized query sample [(#699)](https://github.com/GoogleCloudPlatform/python-docs-samples/issues/699) Available in latest version of google-cloud-bigquery (0.22.0), so upgrades the requirement, too. --- .../samples/snippets/sync_query_params.py | 120 ++++++++++++++++++ .../snippets/sync_query_params_test.py | 31 +++++ 2 files changed, 151 insertions(+) create mode 100644 packages/google-cloud-bigquery/samples/snippets/sync_query_params.py create mode 100644 packages/google-cloud-bigquery/samples/snippets/sync_query_params_test.py diff --git a/packages/google-cloud-bigquery/samples/snippets/sync_query_params.py b/packages/google-cloud-bigquery/samples/snippets/sync_query_params.py new file mode 100644 index 000000000000..41ad940524e0 --- /dev/null +++ b/packages/google-cloud-bigquery/samples/snippets/sync_query_params.py @@ -0,0 +1,120 @@ +#!/usr/bin/env python + +# Copyright 2016 Google Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Command-line app to perform synchronous queries with parameters in BigQuery. + +For more information, see the README.md under /bigquery. + +Example invocation: + $ python sync_query_params.py --use-named-params 'romeoandjuliet' 100 + $ python sync_query_params.py --use-positional-params 'romeoandjuliet' 100 +""" + +import argparse + +from google.cloud import bigquery + + +def print_results(query_results): + """Print the query results by requesting a page at a time.""" + page_token = None + + while True: + rows, total_rows, page_token = query_results.fetch_data( + max_results=10, + page_token=page_token) + + for row in rows: + print(row) + + if not page_token: + break + + +def sync_query_positional_params(corpus, min_word_count): + client = bigquery.Client() + query_results = client.run_sync_query( + """SELECT word, word_count + FROM `bigquery-public-data.samples.shakespeare` + WHERE corpus = ? + AND word_count >= ? + ORDER BY word_count DESC; + """, + query_parameters=( + bigquery.ScalarQueryParameter( + # Set the name to None to use positional parameters (? symbol + # in the query). Note that you cannot mix named and positional + # parameters. + None, + 'STRING', + corpus), + bigquery.ScalarQueryParameter(None, 'INT64', min_word_count))) + + # Only standard SQL syntax supports parameters in queries. + # See: https://cloud.google.com/bigquery/sql-reference/ + query_results.use_legacy_sql = False + query_results.run() + print_results(query_results) + + +def sync_query_named_params(corpus, min_word_count): + client = bigquery.Client() + query_results = client.run_sync_query( + """SELECT word, word_count + FROM `bigquery-public-data.samples.shakespeare` + WHERE corpus = @corpus + AND word_count >= @min_word_count + ORDER BY word_count DESC; + """, + query_parameters=( + bigquery.ScalarQueryParameter('corpus', 'STRING', corpus), + bigquery.ScalarQueryParameter( + 'min_word_count', + 'INT64', + min_word_count))) + query_results.use_legacy_sql = False + query_results.run() + print_results(query_results) + + +if __name__ == '__main__': + parser = argparse.ArgumentParser( + description=__doc__, + formatter_class=argparse.RawDescriptionHelpFormatter) + parser.add_argument( + 'corpus', + help='Corpus to search from Shakespeare dataset.') + parser.add_argument( + 'min_word_count', + help='Minimum count of words to query.', + type=int) + + params_type_parser = parser.add_mutually_exclusive_group(required=False) + params_type_parser.add_argument( + '--use-named-params', + dest='use_named_params', + action='store_true') + params_type_parser.add_argument( + '--use-positional-params', + dest='use_named_params', + action='store_false') + parser.set_defaults(use_named_params=False) + args = parser.parse_args() + + if args.use_named_params: + sync_query_named_params(args.corpus, args.min_word_count) + else: + sync_query_positional_params(args.corpus, args.min_word_count) diff --git a/packages/google-cloud-bigquery/samples/snippets/sync_query_params_test.py b/packages/google-cloud-bigquery/samples/snippets/sync_query_params_test.py new file mode 100644 index 000000000000..270dfc62a580 --- /dev/null +++ b/packages/google-cloud-bigquery/samples/snippets/sync_query_params_test.py @@ -0,0 +1,31 @@ +# Copyright 2016 Google Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import sync_query_params + + +def test_sync_query_named_params(cloud_config, capsys): + sync_query_params.sync_query_named_params( + corpus='romeoandjuliet', + min_word_count=100) + out, _ = capsys.readouterr() + assert 'love' in out + + +def test_sync_query_positional_params(cloud_config, capsys): + sync_query_params.sync_query_positional_params( + corpus='romeoandjuliet', + min_word_count=100) + out, _ = capsys.readouterr() + assert 'love' in out From 06352b92d4e8b3e9e2c18d37a5ce1dbe102332f0 Mon Sep 17 00:00:00 2001 From: DPE bot Date: Tue, 13 Dec 2016 09:54:02 -0800 Subject: [PATCH 0820/2016] Auto-update dependencies. [(#715)](https://github.com/GoogleCloudPlatform/python-docs-samples/issues/715) --- .../google-cloud-bigquery/samples/snippets/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/samples/snippets/requirements.txt b/packages/google-cloud-bigquery/samples/snippets/requirements.txt index 7523b0dacfb2..ba11329ac713 100644 --- a/packages/google-cloud-bigquery/samples/snippets/requirements.txt +++ b/packages/google-cloud-bigquery/samples/snippets/requirements.txt @@ -1 +1 @@ -google-cloud-bigquery==0.22.0 +google-cloud-bigquery==0.22.1 From 9c40d81a0b755feeb97ac11b9f84e185ac4c50c0 Mon Sep 17 00:00:00 2001 From: Jon Wayne Parrott Date: Tue, 4 Apr 2017 16:08:30 -0700 Subject: [PATCH 0821/2016] Remove cloud config fixture [(#887)](https://github.com/GoogleCloudPlatform/python-docs-samples/issues/887) * Remove cloud config fixture * Fix client secrets * Fix bigtable instance --- .../samples/snippets/async_query_test.py | 2 +- .../samples/snippets/export_data_to_gcs_test.py | 9 +++++---- .../samples/snippets/load_data_from_gcs_test.py | 8 +++++--- .../samples/snippets/sync_query_params_test.py | 4 ++-- .../samples/snippets/sync_query_test.py | 2 +- 5 files changed, 14 insertions(+), 11 deletions(-) diff --git a/packages/google-cloud-bigquery/samples/snippets/async_query_test.py b/packages/google-cloud-bigquery/samples/snippets/async_query_test.py index 810c538a6da7..85ce3fce963e 100644 --- a/packages/google-cloud-bigquery/samples/snippets/async_query_test.py +++ b/packages/google-cloud-bigquery/samples/snippets/async_query_test.py @@ -15,7 +15,7 @@ from async_query import async_query -def test_async_query(cloud_config, capsys): +def test_async_query(capsys): query = ( 'SELECT corpus FROM `publicdata.samples.shakespeare` ' 'GROUP BY corpus;') diff --git a/packages/google-cloud-bigquery/samples/snippets/export_data_to_gcs_test.py b/packages/google-cloud-bigquery/samples/snippets/export_data_to_gcs_test.py index acbbe50e55e0..a41cfd226fc7 100644 --- a/packages/google-cloud-bigquery/samples/snippets/export_data_to_gcs_test.py +++ b/packages/google-cloud-bigquery/samples/snippets/export_data_to_gcs_test.py @@ -11,19 +11,20 @@ # See the License for the specific language governing permissions and # limitations under the License. -import export_data_to_gcs +import os +import export_data_to_gcs +BUCKET = os.environ['CLOUD_STORAGE_BUCKET'] DATASET_ID = 'test_dataset' TABLE_ID = 'test_table' -def test_export_data_to_gcs(cloud_config, capsys): +def test_export_data_to_gcs(capsys): export_data_to_gcs.export_data_to_gcs( DATASET_ID, TABLE_ID, - 'gs://{}/test-export-data-to-gcs.csv'.format( - cloud_config.storage_bucket)) + 'gs://{}/test-export-data-to-gcs.csv'.format(BUCKET)) out, _ = capsys.readouterr() diff --git a/packages/google-cloud-bigquery/samples/snippets/load_data_from_gcs_test.py b/packages/google-cloud-bigquery/samples/snippets/load_data_from_gcs_test.py index 2d1c66162c0b..dbd39fc5ceca 100644 --- a/packages/google-cloud-bigquery/samples/snippets/load_data_from_gcs_test.py +++ b/packages/google-cloud-bigquery/samples/snippets/load_data_from_gcs_test.py @@ -11,15 +11,17 @@ # See the License for the specific language governing permissions and # limitations under the License. +import os + import load_data_from_gcs +BUCKET = os.environ['CLOUD_STORAGE_BUCKET'] DATASET_ID = 'test_dataset' TABLE_ID = 'test_import_table' -def test_load_table(cloud_config, capsys): - cloud_storage_input_uri = 'gs://{}/data.csv'.format( - cloud_config.storage_bucket) +def test_load_table(capsys): + cloud_storage_input_uri = 'gs://{}/data.csv'.format(BUCKET) load_data_from_gcs.load_data_from_gcs( DATASET_ID, diff --git a/packages/google-cloud-bigquery/samples/snippets/sync_query_params_test.py b/packages/google-cloud-bigquery/samples/snippets/sync_query_params_test.py index 270dfc62a580..d87fe8231f4a 100644 --- a/packages/google-cloud-bigquery/samples/snippets/sync_query_params_test.py +++ b/packages/google-cloud-bigquery/samples/snippets/sync_query_params_test.py @@ -15,7 +15,7 @@ import sync_query_params -def test_sync_query_named_params(cloud_config, capsys): +def test_sync_query_named_params(capsys): sync_query_params.sync_query_named_params( corpus='romeoandjuliet', min_word_count=100) @@ -23,7 +23,7 @@ def test_sync_query_named_params(cloud_config, capsys): assert 'love' in out -def test_sync_query_positional_params(cloud_config, capsys): +def test_sync_query_positional_params(capsys): sync_query_params.sync_query_positional_params( corpus='romeoandjuliet', min_word_count=100) diff --git a/packages/google-cloud-bigquery/samples/snippets/sync_query_test.py b/packages/google-cloud-bigquery/samples/snippets/sync_query_test.py index 6f6b4f5fa8e8..26c8973e4bfe 100644 --- a/packages/google-cloud-bigquery/samples/snippets/sync_query_test.py +++ b/packages/google-cloud-bigquery/samples/snippets/sync_query_test.py @@ -15,7 +15,7 @@ from sync_query import sync_query -def test_sync_query(cloud_config, capsys): +def test_sync_query(capsys): query = ( 'SELECT corpus FROM `publicdata.samples.shakespeare` ' 'GROUP BY corpus;') From 083acbe1680af21e276cf16f041a5736c52f0515 Mon Sep 17 00:00:00 2001 From: Jon Wayne Parrott Date: Wed, 5 Apr 2017 15:21:33 -0700 Subject: [PATCH 0822/2016] Remove resource [(#890)](https://github.com/GoogleCloudPlatform/python-docs-samples/issues/890) * Remove resource fixture * Remove remote resource --- .../samples/snippets/load_data_from_file_test.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/packages/google-cloud-bigquery/samples/snippets/load_data_from_file_test.py b/packages/google-cloud-bigquery/samples/snippets/load_data_from_file_test.py index 434bbd1d84bc..960fe62c9d7c 100644 --- a/packages/google-cloud-bigquery/samples/snippets/load_data_from_file_test.py +++ b/packages/google-cloud-bigquery/samples/snippets/load_data_from_file_test.py @@ -11,14 +11,17 @@ # See the License for the specific language governing permissions and # limitations under the License. +import os + import load_data_from_file +RESOURCES = os.path.join(os.path.dirname(__file__), 'resources') DATASET_ID = 'test_dataset' TABLE_ID = 'test_import_table' -def test_load_table(resource, capsys): - data_path = resource('data.csv') +def test_load_table(capsys): + data_path = os.path.join(RESOURCES, 'data.csv') load_data_from_file.load_data_from_file( DATASET_ID, From 6d1661f2946a9335ad616470d7435e0e94416d57 Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Mon, 19 Dec 2016 12:25:25 -0800 Subject: [PATCH 0823/2016] BigQuery: named parameter query samples - query using an array value as a parameter. - query using timestamps in named parameters. See: https://cloud.google.com/bigquery/querying-data#using_timestamps_in_parameterized_queries - query using a struct in query parameters. --- .../samples/snippets/requirements.txt | 1 + .../samples/snippets/sync_query_params.py | 145 ++++++++++++++---- .../snippets/sync_query_params_test.py | 21 ++- 3 files changed, 136 insertions(+), 31 deletions(-) diff --git a/packages/google-cloud-bigquery/samples/snippets/requirements.txt b/packages/google-cloud-bigquery/samples/snippets/requirements.txt index ba11329ac713..c1f420a87aeb 100644 --- a/packages/google-cloud-bigquery/samples/snippets/requirements.txt +++ b/packages/google-cloud-bigquery/samples/snippets/requirements.txt @@ -1 +1,2 @@ google-cloud-bigquery==0.22.1 +pytz==2016.10 diff --git a/packages/google-cloud-bigquery/samples/snippets/sync_query_params.py b/packages/google-cloud-bigquery/samples/snippets/sync_query_params.py index 41ad940524e0..1d0f3298ab4b 100644 --- a/packages/google-cloud-bigquery/samples/snippets/sync_query_params.py +++ b/packages/google-cloud-bigquery/samples/snippets/sync_query_params.py @@ -24,8 +24,10 @@ """ import argparse +import datetime from google.cloud import bigquery +import pytz def print_results(query_results): @@ -46,13 +48,14 @@ def print_results(query_results): def sync_query_positional_params(corpus, min_word_count): client = bigquery.Client() + query = """SELECT word, word_count + FROM `bigquery-public-data.samples.shakespeare` + WHERE corpus = ? + AND word_count >= ? + ORDER BY word_count DESC; + """ query_results = client.run_sync_query( - """SELECT word, word_count - FROM `bigquery-public-data.samples.shakespeare` - WHERE corpus = ? - AND word_count >= ? - ORDER BY word_count DESC; - """, + query, query_parameters=( bigquery.ScalarQueryParameter( # Set the name to None to use positional parameters (? symbol @@ -72,13 +75,14 @@ def sync_query_positional_params(corpus, min_word_count): def sync_query_named_params(corpus, min_word_count): client = bigquery.Client() + query = """SELECT word, word_count + FROM `bigquery-public-data.samples.shakespeare` + WHERE corpus = @corpus + AND word_count >= @min_word_count + ORDER BY word_count DESC; + """ query_results = client.run_sync_query( - """SELECT word, word_count - FROM `bigquery-public-data.samples.shakespeare` - WHERE corpus = @corpus - AND word_count >= @min_word_count - ORDER BY word_count DESC; - """, + query, query_parameters=( bigquery.ScalarQueryParameter('corpus', 'STRING', corpus), bigquery.ScalarQueryParameter( @@ -90,31 +94,118 @@ def sync_query_named_params(corpus, min_word_count): print_results(query_results) +def sync_query_array_params(gender, states): + client = bigquery.Client() + query = """SELECT name, sum(number) as count + FROM `bigquery-public-data.usa_names.usa_1910_2013` + WHERE gender = @gender + AND state IN UNNEST(@states) + GROUP BY name + ORDER BY count DESC + LIMIT 10; + """ + query_results = client.run_sync_query( + query, + query_parameters=( + bigquery.ScalarQueryParameter('gender', 'STRING', gender), + bigquery.ArrayQueryParameter('states', 'STRING', states))) + query_results.use_legacy_sql = False + query_results.run() + print_results(query_results) + + +def sync_query_timestamp_params(year, month, day, hour, minute): + client = bigquery.Client() + query = 'SELECT TIMESTAMP_ADD(@ts_value, INTERVAL 1 HOUR);' + query_results = client.run_sync_query( + query, + query_parameters=[ + bigquery.ScalarQueryParameter( + 'ts_value', + 'TIMESTAMP', + datetime.datetime( + year, month, day, hour, minute, tzinfo=pytz.UTC))]) + query_results.use_legacy_sql = False + query_results.run() + print_results(query_results) + + +def sync_query_struct_params(x, y): + client = bigquery.Client() + query = 'SELECT @struct_value AS s;' + query_results = client.run_sync_query( + query, + query_parameters=[ + bigquery.StructQueryParameter( + 'struct_value', + bigquery.ScalarQueryParameter('x', 'INT64', x), + bigquery.ScalarQueryParameter('y', 'STRING', y))]) + query_results.use_legacy_sql = False + query_results.run() + print_results(query_results) + + if __name__ == '__main__': parser = argparse.ArgumentParser( description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter) - parser.add_argument( + subparsers = parser.add_subparsers(dest='sample', help='samples') + named_parser = subparsers.add_parser( + 'named', + help='Run a query with named parameters.') + named_parser.add_argument( 'corpus', help='Corpus to search from Shakespeare dataset.') - parser.add_argument( + named_parser.add_argument( 'min_word_count', help='Minimum count of words to query.', type=int) - - params_type_parser = parser.add_mutually_exclusive_group(required=False) - params_type_parser.add_argument( - '--use-named-params', - dest='use_named_params', - action='store_true') - params_type_parser.add_argument( - '--use-positional-params', - dest='use_named_params', - action='store_false') - parser.set_defaults(use_named_params=False) + positional_parser = subparsers.add_parser( + 'positional', + help='Run a query with positional parameters.') + positional_parser.add_argument( + 'corpus', + help='Corpus to search from Shakespeare dataset.') + positional_parser.add_argument( + 'min_word_count', + help='Minimum count of words to query.', + type=int) + array_parser = subparsers.add_parser( + 'array', + help='Run a query with an array parameter.') + array_parser.add_argument( + 'gender', + choices=['F', 'M'], + help='Gender of baby in the Social Security baby names database.') + array_parser.add_argument( + 'states', + help='U.S. States to consider for popular baby names.', + nargs='+') + timestamp_parser = subparsers.add_parser( + 'timestamp', + help='Run a query with a timestamp parameter.') + timestamp_parser.add_argument('year', type=int) + timestamp_parser.add_argument('month', type=int) + timestamp_parser.add_argument('day', type=int) + timestamp_parser.add_argument('hour', type=int) + timestamp_parser.add_argument('minute', type=int) + struct_parser = subparsers.add_parser( + 'struct', + help='Run a query with a struct parameter.') + struct_parser.add_argument('x', help='Integer for x', type=int) + struct_parser.add_argument('y', help='String for y') args = parser.parse_args() - if args.use_named_params: + if args.sample == 'named': sync_query_named_params(args.corpus, args.min_word_count) - else: + elif args.sample == 'positional': sync_query_positional_params(args.corpus, args.min_word_count) + elif args.sample == 'array': + sync_query_array_params(args.gender, args.states) + elif args.sample == 'timestamp': + sync_query_timestamp_params( + args.year, args.month, args.day, args.hour, args.minute) + elif args.sample == 'struct': + sync_query_struct_params(args.x, args.y) + else: + print('Unexpected value for sample') diff --git a/packages/google-cloud-bigquery/samples/snippets/sync_query_params_test.py b/packages/google-cloud-bigquery/samples/snippets/sync_query_params_test.py index d87fe8231f4a..c20fe830037d 100644 --- a/packages/google-cloud-bigquery/samples/snippets/sync_query_params_test.py +++ b/packages/google-cloud-bigquery/samples/snippets/sync_query_params_test.py @@ -16,11 +16,11 @@ def test_sync_query_named_params(capsys): - sync_query_params.sync_query_named_params( - corpus='romeoandjuliet', - min_word_count=100) + sync_query_params.sync_query_array_params( + gender='M', + states=['WA', 'WI', 'WV', 'WY']) out, _ = capsys.readouterr() - assert 'love' in out + assert 'James' in out def test_sync_query_positional_params(capsys): @@ -29,3 +29,16 @@ def test_sync_query_positional_params(capsys): min_word_count=100) out, _ = capsys.readouterr() assert 'love' in out + + +def test_sync_query_struct_params(capsys): + sync_query_params.sync_query_struct_params(765, "hello world") + out, _ = capsys.readouterr() + assert '765' in out + assert 'hello world' in out + + +def test_sync_query_timestamp_params(capsys): + sync_query_params.sync_query_timestamp_params(2016, 12, 7, 8, 0) + out, _ = capsys.readouterr() + assert '2016-12-07 09:00:00' in out From 542a37388fec1f3754da79dac91247301f03eef2 Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Fri, 21 Apr 2017 16:51:55 -0700 Subject: [PATCH 0824/2016] Rename query file and use async method. Update to latest version of google-cloud-bigquery. --- .../{sync_query_params.py => query_params.py} | 98 ++++++++++++------- ...ry_params_test.py => query_params_test.py} | 28 ++++-- .../samples/snippets/requirements.txt | 2 +- 3 files changed, 84 insertions(+), 44 deletions(-) rename packages/google-cloud-bigquery/samples/snippets/{sync_query_params.py => query_params.py} (72%) rename packages/google-cloud-bigquery/samples/snippets/{sync_query_params_test.py => query_params_test.py} (60%) diff --git a/packages/google-cloud-bigquery/samples/snippets/sync_query_params.py b/packages/google-cloud-bigquery/samples/snippets/query_params.py similarity index 72% rename from packages/google-cloud-bigquery/samples/snippets/sync_query_params.py rename to packages/google-cloud-bigquery/samples/snippets/query_params.py index 1d0f3298ab4b..891f0ef79487 100644 --- a/packages/google-cloud-bigquery/samples/snippets/sync_query_params.py +++ b/packages/google-cloud-bigquery/samples/snippets/query_params.py @@ -14,22 +14,34 @@ # See the License for the specific language governing permissions and # limitations under the License. -"""Command-line app to perform synchronous queries with parameters in BigQuery. +"""Command-line app to perform queries with parameters in BigQuery. For more information, see the README.md under /bigquery. Example invocation: - $ python sync_query_params.py --use-named-params 'romeoandjuliet' 100 - $ python sync_query_params.py --use-positional-params 'romeoandjuliet' 100 + $ python query_params.py --use-named-params 'romeoandjuliet' 100 + $ python query_params.py --use-positional-params 'romeoandjuliet' 100 """ import argparse import datetime +import time +import uuid from google.cloud import bigquery import pytz +def wait_for_job(job): + while True: + job.reload() # Refreshes the state via a GET request. + if job.state == 'DONE': + if job.error_result: + raise RuntimeError(job.errors) + return + time.sleep(1) + + def print_results(query_results): """Print the query results by requesting a page at a time.""" page_token = None @@ -46,7 +58,7 @@ def print_results(query_results): break -def sync_query_positional_params(corpus, min_word_count): +def query_positional_params(corpus, min_word_count): client = bigquery.Client() query = """SELECT word, word_count FROM `bigquery-public-data.samples.shakespeare` @@ -54,7 +66,8 @@ def sync_query_positional_params(corpus, min_word_count): AND word_count >= ? ORDER BY word_count DESC; """ - query_results = client.run_sync_query( + query_job = client.run_async_query( + str(uuid.uuid4()), query, query_parameters=( bigquery.ScalarQueryParameter( @@ -68,12 +81,15 @@ def sync_query_positional_params(corpus, min_word_count): # Only standard SQL syntax supports parameters in queries. # See: https://cloud.google.com/bigquery/sql-reference/ - query_results.use_legacy_sql = False - query_results.run() - print_results(query_results) + query_job.use_legacy_sql = False + + # Start the query and wait for the job to complete. + query_job.begin() + wait_for_job(query_job) + print_results(query_job.results()) -def sync_query_named_params(corpus, min_word_count): +def query_named_params(corpus, min_word_count): client = bigquery.Client() query = """SELECT word, word_count FROM `bigquery-public-data.samples.shakespeare` @@ -81,7 +97,8 @@ def sync_query_named_params(corpus, min_word_count): AND word_count >= @min_word_count ORDER BY word_count DESC; """ - query_results = client.run_sync_query( + query_job = client.run_async_query( + str(uuid.uuid4()), query, query_parameters=( bigquery.ScalarQueryParameter('corpus', 'STRING', corpus), @@ -89,12 +106,15 @@ def sync_query_named_params(corpus, min_word_count): 'min_word_count', 'INT64', min_word_count))) - query_results.use_legacy_sql = False - query_results.run() - print_results(query_results) + query_job.use_legacy_sql = False + # Start the query and wait for the job to complete. + query_job.begin() + wait_for_job(query_job) + print_results(query_job.results()) -def sync_query_array_params(gender, states): + +def query_array_params(gender, states): client = bigquery.Client() query = """SELECT name, sum(number) as count FROM `bigquery-public-data.usa_names.usa_1910_2013` @@ -104,20 +124,25 @@ def sync_query_array_params(gender, states): ORDER BY count DESC LIMIT 10; """ - query_results = client.run_sync_query( + query_job = client.run_async_query( + str(uuid.uuid4()), query, query_parameters=( bigquery.ScalarQueryParameter('gender', 'STRING', gender), bigquery.ArrayQueryParameter('states', 'STRING', states))) - query_results.use_legacy_sql = False - query_results.run() - print_results(query_results) + query_job.use_legacy_sql = False + + # Start the query and wait for the job to complete. + query_job.begin() + wait_for_job(query_job) + print_results(query_job.results()) -def sync_query_timestamp_params(year, month, day, hour, minute): +def query_timestamp_params(year, month, day, hour, minute): client = bigquery.Client() query = 'SELECT TIMESTAMP_ADD(@ts_value, INTERVAL 1 HOUR);' - query_results = client.run_sync_query( + query_job = client.run_async_query( + str(uuid.uuid4()), query, query_parameters=[ bigquery.ScalarQueryParameter( @@ -125,24 +150,31 @@ def sync_query_timestamp_params(year, month, day, hour, minute): 'TIMESTAMP', datetime.datetime( year, month, day, hour, minute, tzinfo=pytz.UTC))]) - query_results.use_legacy_sql = False - query_results.run() - print_results(query_results) + query_job.use_legacy_sql = False + # Start the query and wait for the job to complete. + query_job.begin() + wait_for_job(query_job) + print_results(query_job.results()) -def sync_query_struct_params(x, y): + +def query_struct_params(x, y): client = bigquery.Client() query = 'SELECT @struct_value AS s;' - query_results = client.run_sync_query( + query_job = client.run_async_query( + str(uuid.uuid4()), query, query_parameters=[ bigquery.StructQueryParameter( 'struct_value', bigquery.ScalarQueryParameter('x', 'INT64', x), bigquery.ScalarQueryParameter('y', 'STRING', y))]) - query_results.use_legacy_sql = False - query_results.run() - print_results(query_results) + query_job.use_legacy_sql = False + + # Start the query and wait for the job to complete. + query_job.begin() + wait_for_job(query_job) + print_results(query_job.results()) if __name__ == '__main__': @@ -197,15 +229,15 @@ def sync_query_struct_params(x, y): args = parser.parse_args() if args.sample == 'named': - sync_query_named_params(args.corpus, args.min_word_count) + query_named_params(args.corpus, args.min_word_count) elif args.sample == 'positional': - sync_query_positional_params(args.corpus, args.min_word_count) + query_positional_params(args.corpus, args.min_word_count) elif args.sample == 'array': - sync_query_array_params(args.gender, args.states) + query_array_params(args.gender, args.states) elif args.sample == 'timestamp': - sync_query_timestamp_params( + query_timestamp_params( args.year, args.month, args.day, args.hour, args.minute) elif args.sample == 'struct': - sync_query_struct_params(args.x, args.y) + query_struct_params(args.x, args.y) else: print('Unexpected value for sample') diff --git a/packages/google-cloud-bigquery/samples/snippets/sync_query_params_test.py b/packages/google-cloud-bigquery/samples/snippets/query_params_test.py similarity index 60% rename from packages/google-cloud-bigquery/samples/snippets/sync_query_params_test.py rename to packages/google-cloud-bigquery/samples/snippets/query_params_test.py index c20fe830037d..66f4951ddb86 100644 --- a/packages/google-cloud-bigquery/samples/snippets/sync_query_params_test.py +++ b/packages/google-cloud-bigquery/samples/snippets/query_params_test.py @@ -12,33 +12,41 @@ # See the License for the specific language governing permissions and # limitations under the License. -import sync_query_params +import query_params -def test_sync_query_named_params(capsys): - sync_query_params.sync_query_array_params( +def test_query_array_params(capsys): + query_params.query_array_params( gender='M', states=['WA', 'WI', 'WV', 'WY']) out, _ = capsys.readouterr() assert 'James' in out -def test_sync_query_positional_params(capsys): - sync_query_params.sync_query_positional_params( +def test_query_named_params(capsys): + query_params.query_named_params( corpus='romeoandjuliet', min_word_count=100) out, _ = capsys.readouterr() assert 'love' in out -def test_sync_query_struct_params(capsys): - sync_query_params.sync_query_struct_params(765, "hello world") +def test_query_positional_params(capsys): + query_params.query_positional_params( + corpus='romeoandjuliet', + min_word_count=100) + out, _ = capsys.readouterr() + assert 'love' in out + + +def test_query_struct_params(capsys): + query_params.query_struct_params(765, "hello world") out, _ = capsys.readouterr() assert '765' in out assert 'hello world' in out -def test_sync_query_timestamp_params(capsys): - sync_query_params.sync_query_timestamp_params(2016, 12, 7, 8, 0) +def test_query_timestamp_params(capsys): + query_params.query_timestamp_params(2016, 12, 7, 8, 0) out, _ = capsys.readouterr() - assert '2016-12-07 09:00:00' in out + assert '2016, 12, 7, 9, 0' in out diff --git a/packages/google-cloud-bigquery/samples/snippets/requirements.txt b/packages/google-cloud-bigquery/samples/snippets/requirements.txt index c1f420a87aeb..b969cd8cd973 100644 --- a/packages/google-cloud-bigquery/samples/snippets/requirements.txt +++ b/packages/google-cloud-bigquery/samples/snippets/requirements.txt @@ -1,2 +1,2 @@ -google-cloud-bigquery==0.22.1 +google-cloud-bigquery==0.24.0 pytz==2016.10 From 3db03f4491076de93637a03b69fcac5fbecb142e Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Mon, 24 Apr 2017 15:10:27 -0700 Subject: [PATCH 0825/2016] Indent queries. --- .../samples/snippets/query_params.py | 51 +++++++++---------- 1 file changed, 25 insertions(+), 26 deletions(-) diff --git a/packages/google-cloud-bigquery/samples/snippets/query_params.py b/packages/google-cloud-bigquery/samples/snippets/query_params.py index 891f0ef79487..1025fd26b002 100644 --- a/packages/google-cloud-bigquery/samples/snippets/query_params.py +++ b/packages/google-cloud-bigquery/samples/snippets/query_params.py @@ -60,12 +60,13 @@ def print_results(query_results): def query_positional_params(corpus, min_word_count): client = bigquery.Client() - query = """SELECT word, word_count - FROM `bigquery-public-data.samples.shakespeare` - WHERE corpus = ? - AND word_count >= ? - ORDER BY word_count DESC; - """ + query = """ + SELECT word, word_count + FROM `bigquery-public-data.samples.shakespeare` + WHERE corpus = ? + AND word_count >= ? + ORDER BY word_count DESC; + """ query_job = client.run_async_query( str(uuid.uuid4()), query, @@ -74,9 +75,7 @@ def query_positional_params(corpus, min_word_count): # Set the name to None to use positional parameters (? symbol # in the query). Note that you cannot mix named and positional # parameters. - None, - 'STRING', - corpus), + None, 'STRING', corpus), bigquery.ScalarQueryParameter(None, 'INT64', min_word_count))) # Only standard SQL syntax supports parameters in queries. @@ -91,21 +90,20 @@ def query_positional_params(corpus, min_word_count): def query_named_params(corpus, min_word_count): client = bigquery.Client() - query = """SELECT word, word_count - FROM `bigquery-public-data.samples.shakespeare` - WHERE corpus = @corpus - AND word_count >= @min_word_count - ORDER BY word_count DESC; - """ + query = """ + SELECT word, word_count + FROM `bigquery-public-data.samples.shakespeare` + WHERE corpus = @corpus + AND word_count >= @min_word_count + ORDER BY word_count DESC; + """ query_job = client.run_async_query( str(uuid.uuid4()), query, query_parameters=( bigquery.ScalarQueryParameter('corpus', 'STRING', corpus), bigquery.ScalarQueryParameter( - 'min_word_count', - 'INT64', - min_word_count))) + 'min_word_count', 'INT64', min_word_count))) query_job.use_legacy_sql = False # Start the query and wait for the job to complete. @@ -116,14 +114,15 @@ def query_named_params(corpus, min_word_count): def query_array_params(gender, states): client = bigquery.Client() - query = """SELECT name, sum(number) as count - FROM `bigquery-public-data.usa_names.usa_1910_2013` - WHERE gender = @gender - AND state IN UNNEST(@states) - GROUP BY name - ORDER BY count DESC - LIMIT 10; - """ + query = """ + SELECT name, sum(number) as count + FROM `bigquery-public-data.usa_names.usa_1910_2013` + WHERE gender = @gender + AND state IN UNNEST(@states) + GROUP BY name + ORDER BY count DESC + LIMIT 10; + """ query_job = client.run_async_query( str(uuid.uuid4()), query, From cf9c9cff75d1a8a1111a601edf87901d8e905603 Mon Sep 17 00:00:00 2001 From: DPE bot Date: Tue, 25 Apr 2017 09:32:44 -0700 Subject: [PATCH 0826/2016] Auto-update dependencies. [(#916)](https://github.com/GoogleCloudPlatform/python-docs-samples/issues/916) --- .../google-cloud-bigquery/samples/snippets/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/samples/snippets/requirements.txt b/packages/google-cloud-bigquery/samples/snippets/requirements.txt index b969cd8cd973..393886fbe0b1 100644 --- a/packages/google-cloud-bigquery/samples/snippets/requirements.txt +++ b/packages/google-cloud-bigquery/samples/snippets/requirements.txt @@ -1,2 +1,2 @@ google-cloud-bigquery==0.24.0 -pytz==2016.10 +pytz==2017.2 From 0b9792d0da4a17cd331b4cf65af2c84c8f003512 Mon Sep 17 00:00:00 2001 From: Jon Wayne Parrott Date: Thu, 27 Apr 2017 09:54:41 -0700 Subject: [PATCH 0827/2016] Re-generate all readmes --- packages/google-cloud-bigquery/samples/snippets/README.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/samples/snippets/README.rst b/packages/google-cloud-bigquery/samples/snippets/README.rst index 1824609ddb7c..4d108b1e5df6 100644 --- a/packages/google-cloud-bigquery/samples/snippets/README.rst +++ b/packages/google-cloud-bigquery/samples/snippets/README.rst @@ -26,7 +26,7 @@ authentication: .. code-block:: bash - gcloud beta auth application-default login + gcloud auth application-default login #. When running on App Engine or Compute Engine, credentials are already From aafd02e3e3de8537e42d0ee953ccfb0187660695 Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Wed, 3 May 2017 10:52:35 -0700 Subject: [PATCH 0828/2016] BigQuery: user credentials to run a query. [(#925)](https://github.com/GoogleCloudPlatform/python-docs-samples/issues/925) * BigQuery: user credentials to run a query. * BigQuery user creds sample: add tests. Mocks out user credentials using the Application Default Credentials, but uses the same scopes. --- .../samples/snippets/requirements.txt | 1 + .../samples/snippets/user_credentials.py | 92 +++++++++++++++++++ .../samples/snippets/user_credentials_test.py | 41 +++++++++ 3 files changed, 134 insertions(+) create mode 100644 packages/google-cloud-bigquery/samples/snippets/user_credentials.py create mode 100644 packages/google-cloud-bigquery/samples/snippets/user_credentials_test.py diff --git a/packages/google-cloud-bigquery/samples/snippets/requirements.txt b/packages/google-cloud-bigquery/samples/snippets/requirements.txt index 393886fbe0b1..224e1463adc3 100644 --- a/packages/google-cloud-bigquery/samples/snippets/requirements.txt +++ b/packages/google-cloud-bigquery/samples/snippets/requirements.txt @@ -1,2 +1,3 @@ google-cloud-bigquery==0.24.0 +google-auth-oauthlib==0.0.1 pytz==2017.2 diff --git a/packages/google-cloud-bigquery/samples/snippets/user_credentials.py b/packages/google-cloud-bigquery/samples/snippets/user_credentials.py new file mode 100644 index 000000000000..a239b741ecb5 --- /dev/null +++ b/packages/google-cloud-bigquery/samples/snippets/user_credentials.py @@ -0,0 +1,92 @@ +#!/usr/bin/env python + +# Copyright 2017 Google Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Command-line application to run a query using user credentials. + +You must supply a client secrets file, which would normally be bundled with +your application. +""" + +import argparse +import time +import uuid + +from google.cloud import bigquery +from google_auth_oauthlib import flow + + +def wait_for_job(job): + while True: + job.reload() # Refreshes the state via a GET request. + if job.state == 'DONE': + if job.error_result: + raise RuntimeError(job.errors) + return + time.sleep(1) + + +def run_query(credentials, project, query): + client = bigquery.Client(project=project, credentials=credentials) + query_job = client.run_async_query(str(uuid.uuid4()), query) + query_job.use_legacy_sql = False + query_job.begin() + + wait_for_job(query_job) + + # Drain the query results by requesting a page at a time. + query_results = query_job.results() + page_token = None + + while True: + rows, total_rows, page_token = query_results.fetch_data( + max_results=10, + page_token=page_token) + + for row in rows: + print(row) + + if not page_token: + break + + +def authenticate_and_query(project, query, launch_browser=True): + appflow = flow.InstalledAppFlow.from_client_secrets_file( + 'client_secrets.json', + scopes=['https://www.googleapis.com/auth/bigquery']) + + if launch_browser: + appflow.run_local_server() + else: + appflow.run_console() + + run_query(appflow.credentials, project, query) + + +if __name__ == '__main__': + parser = argparse.ArgumentParser( + description=__doc__, + formatter_class=argparse.RawDescriptionHelpFormatter) + parser.add_argument( + '--launch-browser', + help='Use a local server flow to authenticate. ', + action='store_true') + parser.add_argument('project', help='Project to use for BigQuery billing.') + parser.add_argument('query', help='BigQuery SQL Query.') + + args = parser.parse_args() + + authenticate_and_query( + args.project, args.query, launch_browser=args.launch_browser) diff --git a/packages/google-cloud-bigquery/samples/snippets/user_credentials_test.py b/packages/google-cloud-bigquery/samples/snippets/user_credentials_test.py new file mode 100644 index 000000000000..02acc19c38b1 --- /dev/null +++ b/packages/google-cloud-bigquery/samples/snippets/user_credentials_test.py @@ -0,0 +1,41 @@ +# Copyright 2017 Google Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os + +import google.auth +import mock +import pytest + +from user_credentials import authenticate_and_query + + +PROJECT = os.environ['GCLOUD_PROJECT'] + + +@pytest.fixture +def mock_flow(): + flow_patch = mock.patch( + 'google_auth_oauthlib.flow.InstalledAppFlow', autospec=True) + + with flow_patch as flow_mock: + flow_mock.from_client_secrets_file.return_value = flow_mock + flow_mock.credentials = google.auth.default()[0] + yield flow_mock + + +def test_auth_query_console(mock_flow, capsys): + authenticate_and_query(PROJECT, 'SELECT 1+1;', launch_browser=False) + out, _ = capsys.readouterr() + assert '2' in out From f7ac6f826619122e32e7487e1dcad4cafcf82cf7 Mon Sep 17 00:00:00 2001 From: DPE bot Date: Fri, 12 May 2017 09:22:05 -0700 Subject: [PATCH 0829/2016] Auto-update dependencies. [(#942)](https://github.com/GoogleCloudPlatform/python-docs-samples/issues/942) --- .../google-cloud-bigquery/samples/snippets/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/samples/snippets/requirements.txt b/packages/google-cloud-bigquery/samples/snippets/requirements.txt index 224e1463adc3..5874270b3bd5 100644 --- a/packages/google-cloud-bigquery/samples/snippets/requirements.txt +++ b/packages/google-cloud-bigquery/samples/snippets/requirements.txt @@ -1,3 +1,3 @@ google-cloud-bigquery==0.24.0 -google-auth-oauthlib==0.0.1 +google-auth-oauthlib==0.1.0 pytz==2017.2 From 3acc3ad9c0c90c0c7a3d9f483e720f9153579163 Mon Sep 17 00:00:00 2001 From: Bill Prin Date: Tue, 23 May 2017 17:01:25 -0700 Subject: [PATCH 0830/2016] Fix README rst links [(#962)](https://github.com/GoogleCloudPlatform/python-docs-samples/issues/962) * Fix README rst links * Update all READMEs --- packages/google-cloud-bigquery/samples/snippets/README.rst | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/packages/google-cloud-bigquery/samples/snippets/README.rst b/packages/google-cloud-bigquery/samples/snippets/README.rst index 4d108b1e5df6..1f5b08cabfbe 100644 --- a/packages/google-cloud-bigquery/samples/snippets/README.rst +++ b/packages/google-cloud-bigquery/samples/snippets/README.rst @@ -326,11 +326,11 @@ This sample uses the `Google Cloud Client Library for Python`_. You can read the documentation for more details on API usage and use GitHub to `browse the source`_ and `report issues`_. -.. Google Cloud Client Library for Python: +.. _Google Cloud Client Library for Python: https://googlecloudplatform.github.io/google-cloud-python/ -.. browse the source: +.. _browse the source: https://github.com/GoogleCloudPlatform/google-cloud-python -.. report issues: +.. _report issues: https://github.com/GoogleCloudPlatform/google-cloud-python/issues From 00b5ed5e88a94ab2166b22c18f079f7d33be3956 Mon Sep 17 00:00:00 2001 From: DPE bot Date: Tue, 27 Jun 2017 12:41:15 -0700 Subject: [PATCH 0831/2016] Auto-update dependencies. [(#1004)](https://github.com/GoogleCloudPlatform/python-docs-samples/issues/1004) * Auto-update dependencies. * Fix natural language samples * Fix pubsub iam samples * Fix language samples * Fix bigquery samples --- .../samples/snippets/async_query.py | 17 +++-------------- .../samples/snippets/query_params.py | 17 ++++------------- .../samples/snippets/query_params_test.py | 4 ++-- .../samples/snippets/requirements.txt | 2 +- .../samples/snippets/resources/data.csv | 2 +- .../samples/snippets/simple_app.py | 15 +++------------ .../samples/snippets/sync_query.py | 15 +++------------ .../samples/snippets/user_credentials.py | 15 +++------------ 8 files changed, 20 insertions(+), 67 deletions(-) diff --git a/packages/google-cloud-bigquery/samples/snippets/async_query.py b/packages/google-cloud-bigquery/samples/snippets/async_query.py index f90c8f2558ef..4f7b2330bf32 100755 --- a/packages/google-cloud-bigquery/samples/snippets/async_query.py +++ b/packages/google-cloud-bigquery/samples/snippets/async_query.py @@ -48,20 +48,9 @@ def async_query(query): wait_for_job(query_job) - # Drain the query results by requesting a page at a time. - query_results = query_job.results() - page_token = None - - while True: - rows, total_rows, page_token = query_results.fetch_data( - max_results=10, - page_token=page_token) - - for row in rows: - print(row) - - if not page_token: - break + rows = query_job.results().fetch_data(max_results=10) + for row in rows: + print(row) if __name__ == '__main__': diff --git a/packages/google-cloud-bigquery/samples/snippets/query_params.py b/packages/google-cloud-bigquery/samples/snippets/query_params.py index 1025fd26b002..192558d91431 100644 --- a/packages/google-cloud-bigquery/samples/snippets/query_params.py +++ b/packages/google-cloud-bigquery/samples/snippets/query_params.py @@ -43,19 +43,10 @@ def wait_for_job(job): def print_results(query_results): - """Print the query results by requesting a page at a time.""" - page_token = None - - while True: - rows, total_rows, page_token = query_results.fetch_data( - max_results=10, - page_token=page_token) - - for row in rows: - print(row) - - if not page_token: - break + """Print the rows in the query's results.""" + rows = query_results.fetch_data(max_results=10) + for row in rows: + print(row) def query_positional_params(corpus, min_word_count): diff --git a/packages/google-cloud-bigquery/samples/snippets/query_params_test.py b/packages/google-cloud-bigquery/samples/snippets/query_params_test.py index 66f4951ddb86..f4b4931372b5 100644 --- a/packages/google-cloud-bigquery/samples/snippets/query_params_test.py +++ b/packages/google-cloud-bigquery/samples/snippets/query_params_test.py @@ -28,7 +28,7 @@ def test_query_named_params(capsys): corpus='romeoandjuliet', min_word_count=100) out, _ = capsys.readouterr() - assert 'love' in out + assert 'the' in out def test_query_positional_params(capsys): @@ -36,7 +36,7 @@ def test_query_positional_params(capsys): corpus='romeoandjuliet', min_word_count=100) out, _ = capsys.readouterr() - assert 'love' in out + assert 'the' in out def test_query_struct_params(capsys): diff --git a/packages/google-cloud-bigquery/samples/snippets/requirements.txt b/packages/google-cloud-bigquery/samples/snippets/requirements.txt index 5874270b3bd5..3e0afe76a430 100644 --- a/packages/google-cloud-bigquery/samples/snippets/requirements.txt +++ b/packages/google-cloud-bigquery/samples/snippets/requirements.txt @@ -1,3 +1,3 @@ -google-cloud-bigquery==0.24.0 +google-cloud-bigquery==0.25.0 google-auth-oauthlib==0.1.0 pytz==2017.2 diff --git a/packages/google-cloud-bigquery/samples/snippets/resources/data.csv b/packages/google-cloud-bigquery/samples/snippets/resources/data.csv index 230a96b559df..affe39ef89de 100644 --- a/packages/google-cloud-bigquery/samples/snippets/resources/data.csv +++ b/packages/google-cloud-bigquery/samples/snippets/resources/data.csv @@ -1 +1 @@ -Gandalf, 2000, 140.0, 1 +Gandalf,2000,140.0,1 diff --git a/packages/google-cloud-bigquery/samples/snippets/simple_app.py b/packages/google-cloud-bigquery/samples/snippets/simple_app.py index 7180c4fda62a..31059c9f7751 100644 --- a/packages/google-cloud-bigquery/samples/snippets/simple_app.py +++ b/packages/google-cloud-bigquery/samples/snippets/simple_app.py @@ -38,19 +38,10 @@ def query_shakespeare(): # [END run_query] # [START print_results] - # Drain the query results by requesting a page at a time. - page_token = None + rows = query_results.fetch_data(max_results=10) - while True: - rows, total_rows, page_token = query_results.fetch_data( - max_results=10, - page_token=page_token) - - for row in rows: - print(row) - - if not page_token: - break + for row in rows: + print(row) # [END print_results] diff --git a/packages/google-cloud-bigquery/samples/snippets/sync_query.py b/packages/google-cloud-bigquery/samples/snippets/sync_query.py index 37c8fea8a653..1f494f843431 100755 --- a/packages/google-cloud-bigquery/samples/snippets/sync_query.py +++ b/packages/google-cloud-bigquery/samples/snippets/sync_query.py @@ -39,19 +39,10 @@ def sync_query(query): query_results.run() - # Drain the query results by requesting a page at a time. - page_token = None + rows = query_results.fetch_data(max_results=10) - while True: - rows, total_rows, page_token = query_results.fetch_data( - max_results=10, - page_token=page_token) - - for row in rows: - print(row) - - if not page_token: - break + for row in rows: + print(row) # [END sync_query] diff --git a/packages/google-cloud-bigquery/samples/snippets/user_credentials.py b/packages/google-cloud-bigquery/samples/snippets/user_credentials.py index a239b741ecb5..017c87ffd453 100644 --- a/packages/google-cloud-bigquery/samples/snippets/user_credentials.py +++ b/packages/google-cloud-bigquery/samples/snippets/user_credentials.py @@ -46,20 +46,11 @@ def run_query(credentials, project, query): wait_for_job(query_job) - # Drain the query results by requesting a page at a time. query_results = query_job.results() - page_token = None + rows = query_results.fetch_data(max_results=10) - while True: - rows, total_rows, page_token = query_results.fetch_data( - max_results=10, - page_token=page_token) - - for row in rows: - print(row) - - if not page_token: - break + for row in rows: + print(row) def authenticate_and_query(project, query, launch_browser=True): From 9783b9da7dffe64cdcdacdcee964e6e9a3d80e63 Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Mon, 1 May 2017 15:44:01 -0700 Subject: [PATCH 0832/2016] BigQuery: add auth samples for service accounts. --- .../samples/snippets/.gitignore | 2 + .../samples/snippets/auth_snippets.py | 62 +++++++++++++++++++ .../samples/snippets/auth_snippets_test.py | 33 ++++++++++ 3 files changed, 97 insertions(+) create mode 100644 packages/google-cloud-bigquery/samples/snippets/.gitignore create mode 100644 packages/google-cloud-bigquery/samples/snippets/auth_snippets.py create mode 100644 packages/google-cloud-bigquery/samples/snippets/auth_snippets_test.py diff --git a/packages/google-cloud-bigquery/samples/snippets/.gitignore b/packages/google-cloud-bigquery/samples/snippets/.gitignore new file mode 100644 index 000000000000..0dc05ffadec2 --- /dev/null +++ b/packages/google-cloud-bigquery/samples/snippets/.gitignore @@ -0,0 +1,2 @@ +client_secrets.json +service_account.json diff --git a/packages/google-cloud-bigquery/samples/snippets/auth_snippets.py b/packages/google-cloud-bigquery/samples/snippets/auth_snippets.py new file mode 100644 index 000000000000..9a0c490d9c36 --- /dev/null +++ b/packages/google-cloud-bigquery/samples/snippets/auth_snippets.py @@ -0,0 +1,62 @@ +# Copyright 2017 Google Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Demonstrates how to authenticate to Google BigQuery using the Google Cloud +Client Libraries.""" + +import argparse + + +def implicit(): + from google.cloud import bigquery + + # If you don't specify credentials when constructing the client, the + # client library will look for credentials in the environment. + bigquery_client = bigquery.Client() + + # Make an authenticated API request + datasets = list(bigquery_client.list_datasets()) + print(datasets) + + +def explicit(): + from google.cloud import bigquery + + # Explicitly use service account credentials by specifying the private key + # file. All clients in google-cloud-python have this helper, see + # https://google-cloud-python.readthedocs.io/en/latest/core/modules.html + # #google.cloud.client.Client.from_service_account_json + bigquery_client = bigquery.Client.from_service_account_json( + 'service_account.json') + + # Make an authenticated API request + buckets = list(bigquery_client.list_datasets()) + print(buckets) + + +if __name__ == '__main__': + parser = argparse.ArgumentParser( + description=__doc__, + formatter_class=argparse.RawDescriptionHelpFormatter) + + subparsers = parser.add_subparsers(dest='command') + subparsers.add_parser('implicit', help=implicit.__doc__) + subparsers.add_parser('explicit', help=explicit.__doc__) + + args = parser.parse_args() + + if args.command == 'implicit': + implicit() + elif args.command == 'explicit': + explicit() diff --git a/packages/google-cloud-bigquery/samples/snippets/auth_snippets_test.py b/packages/google-cloud-bigquery/samples/snippets/auth_snippets_test.py new file mode 100644 index 000000000000..5b5f2cac00cd --- /dev/null +++ b/packages/google-cloud-bigquery/samples/snippets/auth_snippets_test.py @@ -0,0 +1,33 @@ +# Copyright 2017 Google Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os + +import mock + +import auth_snippets + + +def test_implicit(): + auth_snippets.implicit() + + +def test_explicit(): + with open(os.environ['GOOGLE_APPLICATION_CREDENTIALS']) as creds_file: + creds_file_data = creds_file.read() + + open_mock = mock.mock_open(read_data=creds_file_data) + + with mock.patch('io.open', open_mock): + auth_snippets.explicit() From 52309c21385468304936f8392ff3399662acb88b Mon Sep 17 00:00:00 2001 From: PicardParis Date: Mon, 31 Jul 2017 23:13:35 +0200 Subject: [PATCH 0833/2016] Fix SQL query to return 10 rows & simplify [(#1041)](https://github.com/GoogleCloudPlatform/python-docs-samples/issues/1041) - Original query returned a single nested row (instead of 10 rows as apparently expected in section print_results) - Standard SQL specified directly in query - Removed parameter max_results to avoid redundancy w/ query --- .../samples/snippets/simple_app.py | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/packages/google-cloud-bigquery/samples/snippets/simple_app.py b/packages/google-cloud-bigquery/samples/snippets/simple_app.py index 31059c9f7751..6db8568398d6 100644 --- a/packages/google-cloud-bigquery/samples/snippets/simple_app.py +++ b/packages/google-cloud-bigquery/samples/snippets/simple_app.py @@ -24,21 +24,20 @@ def query_shakespeare(): client = bigquery.Client() # [END create_client] # [START run_query] - query_results = client.run_sync_query(""" - SELECT - APPROX_TOP_COUNT(corpus, 10) as title, - COUNT(*) as unique_words - FROM `publicdata.samples.shakespeare`;""") - - # Use standard SQL syntax for queries. # See: https://cloud.google.com/bigquery/sql-reference/ - query_results.use_legacy_sql = False + query_results = client.run_sync_query(""" + #standardSQL + SELECT corpus AS title, COUNT(*) AS unique_words + FROM `publicdata.samples.shakespeare` + GROUP BY title + ORDER BY unique_words DESC + LIMIT 10""") query_results.run() # [END run_query] # [START print_results] - rows = query_results.fetch_data(max_results=10) + rows = query_results.fetch_data() for row in rows: print(row) From c9181d785e624bbe9ed7c0a44d2328087c2cdc86 Mon Sep 17 00:00:00 2001 From: PicardParis Date: Tue, 1 Aug 2017 18:04:30 +0200 Subject: [PATCH 0834/2016] Fix test randomness [(#1043)](https://github.com/GoogleCloudPlatform/python-docs-samples/issues/1043) --- .../samples/snippets/async_query_test.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/packages/google-cloud-bigquery/samples/snippets/async_query_test.py b/packages/google-cloud-bigquery/samples/snippets/async_query_test.py index 85ce3fce963e..10213e215872 100644 --- a/packages/google-cloud-bigquery/samples/snippets/async_query_test.py +++ b/packages/google-cloud-bigquery/samples/snippets/async_query_test.py @@ -16,12 +16,13 @@ def test_async_query(capsys): + # Query only outputs the first 10 rows, sort results to avoid randomness query = ( 'SELECT corpus FROM `publicdata.samples.shakespeare` ' - 'GROUP BY corpus;') + 'GROUP BY corpus ORDER BY corpus') async_query(query) out, _ = capsys.readouterr() - assert 'romeoandjuliet' in out + assert 'antonyandcleopatra' in out From 1baee001f17a114f9c1fde7b92bb541e928a0834 Mon Sep 17 00:00:00 2001 From: PicardParis Date: Tue, 1 Aug 2017 18:04:41 +0200 Subject: [PATCH 0835/2016] Fix test randomness [(#1044)](https://github.com/GoogleCloudPlatform/python-docs-samples/issues/1044) --- .../samples/snippets/sync_query_test.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/packages/google-cloud-bigquery/samples/snippets/sync_query_test.py b/packages/google-cloud-bigquery/samples/snippets/sync_query_test.py index 26c8973e4bfe..566994477b5c 100644 --- a/packages/google-cloud-bigquery/samples/snippets/sync_query_test.py +++ b/packages/google-cloud-bigquery/samples/snippets/sync_query_test.py @@ -16,12 +16,13 @@ def test_sync_query(capsys): + # Query only outputs the first 10 rows, sort results to avoid randomness query = ( 'SELECT corpus FROM `publicdata.samples.shakespeare` ' - 'GROUP BY corpus;') + 'GROUP BY corpus ORDER BY corpus') sync_query(query) out, _ = capsys.readouterr() - assert 'romeoandjuliet' in out + assert 'antonyandcleopatra' in out From 8b290cc00010305e193b2687717b50c26c861b5d Mon Sep 17 00:00:00 2001 From: DPE bot Date: Mon, 7 Aug 2017 10:04:55 -0700 Subject: [PATCH 0836/2016] Auto-update dependencies. [(#1055)](https://github.com/GoogleCloudPlatform/python-docs-samples/issues/1055) * Auto-update dependencies. * Explicitly use latest bigtable client Change-Id: Id71e9e768f020730e4ca9514a0d7ebaa794e7d9e * Revert language update for now Change-Id: I8867f154e9a5aae00d0047c9caf880e5e8f50c53 * Remove pdb. smh Change-Id: I5ff905fadc026eebbcd45512d4e76e003e3b2b43 --- .../google-cloud-bigquery/samples/snippets/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/samples/snippets/requirements.txt b/packages/google-cloud-bigquery/samples/snippets/requirements.txt index 3e0afe76a430..fe1ea7ea253e 100644 --- a/packages/google-cloud-bigquery/samples/snippets/requirements.txt +++ b/packages/google-cloud-bigquery/samples/snippets/requirements.txt @@ -1,3 +1,3 @@ -google-cloud-bigquery==0.25.0 +google-cloud-bigquery==0.26.0 google-auth-oauthlib==0.1.0 pytz==2017.2 From 20a7098b0dbdc07eadff17eaac5e2df767220a6c Mon Sep 17 00:00:00 2001 From: Jon Wayne Parrott Date: Tue, 8 Aug 2017 09:40:00 -0700 Subject: [PATCH 0837/2016] Update readme references. Resolves 1059 Change-Id: I05013e56fae4e801cd6682ec2ec12459baea9dc8 --- .../samples/snippets/README.rst | 14 +++++++------- .../samples/snippets/async_query.py | 2 +- .../samples/snippets/export_data_to_gcs.py | 2 +- .../samples/snippets/load_data_from_file.py | 2 +- .../samples/snippets/load_data_from_gcs.py | 2 +- .../samples/snippets/query_params.py | 2 +- .../samples/snippets/snippets.py | 2 +- .../samples/snippets/stream_data.py | 2 +- .../samples/snippets/sync_query.py | 2 +- 9 files changed, 15 insertions(+), 15 deletions(-) diff --git a/packages/google-cloud-bigquery/samples/snippets/README.rst b/packages/google-cloud-bigquery/samples/snippets/README.rst index 1f5b08cabfbe..a68ff1f7624d 100644 --- a/packages/google-cloud-bigquery/samples/snippets/README.rst +++ b/packages/google-cloud-bigquery/samples/snippets/README.rst @@ -97,7 +97,7 @@ To run this sample: Command-line application to perform synchronous queries in BigQuery. - For more information, see the README.md under /bigquery. + For more information, see the README.rst. Example invocation: $ python sync_query.py \ @@ -125,7 +125,7 @@ To run this sample: Command-line application to perform asynchronous queries in BigQuery. - For more information, see the README.md under /bigquery. + For more information, see the README.rst. Example invocation: $ python async_query.py \ @@ -155,7 +155,7 @@ To run this sample: Samples that demonstrate basic operations in the BigQuery API. - For more information, see the README.md under /bigquery. + For more information, see the README.rst. Example invocation: $ python snippets.py list-datasets @@ -206,7 +206,7 @@ To run this sample: Loads data into BigQuery from a local file. - For more information, see the README.md under /bigquery. + For more information, see the README.rst. Example invocation: $ python load_data_from_file.py example_dataset example_table example-data.csv @@ -237,7 +237,7 @@ To run this sample: Loads data into BigQuery from an object in Google Cloud Storage. - For more information, see the README.md under /bigquery. + For more information, see the README.rst. Example invocation: $ python load_data_from_gcs.py example_dataset example_table gs://example-bucket/example-data.csv @@ -269,7 +269,7 @@ To run this sample: Loads a single row of data directly into BigQuery. - For more information, see the README.md under /bigquery. + For more information, see the README.rst. Example invocation: $ python stream_data.py example_dataset example_table '["Gandalf", 2000]' @@ -300,7 +300,7 @@ To run this sample: Exports data from BigQuery to an object in Google Cloud Storage. - For more information, see the README.md under /bigquery. + For more information, see the README.rst. Example invocation: $ python export_data_to_gcs.py example_dataset example_table gs://example-bucket/example-data.csv diff --git a/packages/google-cloud-bigquery/samples/snippets/async_query.py b/packages/google-cloud-bigquery/samples/snippets/async_query.py index 4f7b2330bf32..895f294bd239 100755 --- a/packages/google-cloud-bigquery/samples/snippets/async_query.py +++ b/packages/google-cloud-bigquery/samples/snippets/async_query.py @@ -16,7 +16,7 @@ """Command-line application to perform asynchronous queries in BigQuery. -For more information, see the README.md under /bigquery. +For more information, see the README.rst. Example invocation: $ python async_query.py \\ diff --git a/packages/google-cloud-bigquery/samples/snippets/export_data_to_gcs.py b/packages/google-cloud-bigquery/samples/snippets/export_data_to_gcs.py index 3aee442c14b5..b93ea71ef26a 100644 --- a/packages/google-cloud-bigquery/samples/snippets/export_data_to_gcs.py +++ b/packages/google-cloud-bigquery/samples/snippets/export_data_to_gcs.py @@ -16,7 +16,7 @@ """Exports data from BigQuery to an object in Google Cloud Storage. -For more information, see the README.md under /bigquery. +For more information, see the README.rst. Example invocation: $ python export_data_to_gcs.py example_dataset example_table \ diff --git a/packages/google-cloud-bigquery/samples/snippets/load_data_from_file.py b/packages/google-cloud-bigquery/samples/snippets/load_data_from_file.py index 0bbdd7ba7d80..5823003a1175 100644 --- a/packages/google-cloud-bigquery/samples/snippets/load_data_from_file.py +++ b/packages/google-cloud-bigquery/samples/snippets/load_data_from_file.py @@ -16,7 +16,7 @@ """Loads data into BigQuery from a local file. -For more information, see the README.md under /bigquery. +For more information, see the README.rst. Example invocation: $ python load_data_from_file.py example_dataset example_table \ diff --git a/packages/google-cloud-bigquery/samples/snippets/load_data_from_gcs.py b/packages/google-cloud-bigquery/samples/snippets/load_data_from_gcs.py index 7c576e20ad67..21d96169ffb2 100644 --- a/packages/google-cloud-bigquery/samples/snippets/load_data_from_gcs.py +++ b/packages/google-cloud-bigquery/samples/snippets/load_data_from_gcs.py @@ -16,7 +16,7 @@ """Loads data into BigQuery from an object in Google Cloud Storage. -For more information, see the README.md under /bigquery. +For more information, see the README.rst. Example invocation: $ python load_data_from_gcs.py example_dataset example_table \ diff --git a/packages/google-cloud-bigquery/samples/snippets/query_params.py b/packages/google-cloud-bigquery/samples/snippets/query_params.py index 192558d91431..96dbb59b9fc5 100644 --- a/packages/google-cloud-bigquery/samples/snippets/query_params.py +++ b/packages/google-cloud-bigquery/samples/snippets/query_params.py @@ -16,7 +16,7 @@ """Command-line app to perform queries with parameters in BigQuery. -For more information, see the README.md under /bigquery. +For more information, see the README.rst. Example invocation: $ python query_params.py --use-named-params 'romeoandjuliet' 100 diff --git a/packages/google-cloud-bigquery/samples/snippets/snippets.py b/packages/google-cloud-bigquery/samples/snippets/snippets.py index f4294123d9d4..f62c074a70fd 100644 --- a/packages/google-cloud-bigquery/samples/snippets/snippets.py +++ b/packages/google-cloud-bigquery/samples/snippets/snippets.py @@ -16,7 +16,7 @@ """Samples that demonstrate basic operations in the BigQuery API. -For more information, see the README.md under /bigquery. +For more information, see the README.rst. Example invocation: $ python snippets.py list-datasets diff --git a/packages/google-cloud-bigquery/samples/snippets/stream_data.py b/packages/google-cloud-bigquery/samples/snippets/stream_data.py index a90d432bf3e4..7d9970c3aa34 100644 --- a/packages/google-cloud-bigquery/samples/snippets/stream_data.py +++ b/packages/google-cloud-bigquery/samples/snippets/stream_data.py @@ -16,7 +16,7 @@ """Loads a single row of data directly into BigQuery. -For more information, see the README.md under /bigquery. +For more information, see the README.rst. Example invocation: $ python stream_data.py example_dataset example_table \ diff --git a/packages/google-cloud-bigquery/samples/snippets/sync_query.py b/packages/google-cloud-bigquery/samples/snippets/sync_query.py index 1f494f843431..34d3fbc8a2bf 100755 --- a/packages/google-cloud-bigquery/samples/snippets/sync_query.py +++ b/packages/google-cloud-bigquery/samples/snippets/sync_query.py @@ -16,7 +16,7 @@ """Command-line application to perform synchronous queries in BigQuery. -For more information, see the README.md under /bigquery. +For more information, see the README.rst. Example invocation: $ python sync_query.py \\ From b1cab98990409f574c6c286f0ae98587ad85e0cf Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Tue, 8 Aug 2017 09:46:29 -0700 Subject: [PATCH 0838/2016] Use futures API to wait for jobs to finish. Removes unused sync_query.py sample. --- .../samples/snippets/async_query.py | 64 --------------- .../samples/snippets/export_data_to_gcs.py | 14 +--- .../samples/snippets/load_data_from_file.py | 13 +-- .../samples/snippets/load_data_from_gcs.py | 14 +--- .../samples/snippets/query.py | 81 +++++++++++++++++++ .../samples/snippets/query_params.py | 68 ++++++++-------- .../{async_query_test.py => query_test.py} | 30 +++++-- .../samples/snippets/snippets.py | 18 +---- .../samples/snippets/sync_query.py | 57 ------------- .../samples/snippets/sync_query_test.py | 28 ------- .../samples/snippets/user_credentials.py | 24 ++---- 11 files changed, 151 insertions(+), 260 deletions(-) delete mode 100755 packages/google-cloud-bigquery/samples/snippets/async_query.py create mode 100755 packages/google-cloud-bigquery/samples/snippets/query.py rename packages/google-cloud-bigquery/samples/snippets/{async_query_test.py => query_test.py} (53%) delete mode 100755 packages/google-cloud-bigquery/samples/snippets/sync_query.py delete mode 100644 packages/google-cloud-bigquery/samples/snippets/sync_query_test.py diff --git a/packages/google-cloud-bigquery/samples/snippets/async_query.py b/packages/google-cloud-bigquery/samples/snippets/async_query.py deleted file mode 100755 index 895f294bd239..000000000000 --- a/packages/google-cloud-bigquery/samples/snippets/async_query.py +++ /dev/null @@ -1,64 +0,0 @@ -#!/usr/bin/env python - -# Copyright 2016 Google Inc. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -"""Command-line application to perform asynchronous queries in BigQuery. - -For more information, see the README.rst. - -Example invocation: - $ python async_query.py \\ - 'SELECT corpus FROM `publicdata.samples.shakespeare` GROUP BY corpus' -""" - -import argparse -import time -import uuid - -from google.cloud import bigquery - - -def wait_for_job(job): - while True: - job.reload() # Refreshes the state via a GET request. - if job.state == 'DONE': - if job.error_result: - raise RuntimeError(job.errors) - return - time.sleep(1) - - -def async_query(query): - client = bigquery.Client() - query_job = client.run_async_query(str(uuid.uuid4()), query) - query_job.use_legacy_sql = False - query_job.begin() - - wait_for_job(query_job) - - rows = query_job.results().fetch_data(max_results=10) - for row in rows: - print(row) - - -if __name__ == '__main__': - parser = argparse.ArgumentParser( - description=__doc__, - formatter_class=argparse.RawDescriptionHelpFormatter) - parser.add_argument('query', help='BigQuery SQL Query.') - - args = parser.parse_args() - - async_query(args.query) diff --git a/packages/google-cloud-bigquery/samples/snippets/export_data_to_gcs.py b/packages/google-cloud-bigquery/samples/snippets/export_data_to_gcs.py index b93ea71ef26a..41b011ca06d8 100644 --- a/packages/google-cloud-bigquery/samples/snippets/export_data_to_gcs.py +++ b/packages/google-cloud-bigquery/samples/snippets/export_data_to_gcs.py @@ -26,7 +26,6 @@ """ import argparse -import time import uuid from google.cloud import bigquery @@ -42,23 +41,12 @@ def export_data_to_gcs(dataset_name, table_name, destination): job_name, table, destination) job.begin() - - wait_for_job(job) + job.result() # Wait for job to complete print('Exported {}:{} to {}'.format( dataset_name, table_name, destination)) -def wait_for_job(job): - while True: - job.reload() - if job.state == 'DONE': - if job.error_result: - raise RuntimeError(job.errors) - return - time.sleep(1) - - if __name__ == '__main__': parser = argparse.ArgumentParser( description=__doc__, diff --git a/packages/google-cloud-bigquery/samples/snippets/load_data_from_file.py b/packages/google-cloud-bigquery/samples/snippets/load_data_from_file.py index 5823003a1175..9e0bf9f4d130 100644 --- a/packages/google-cloud-bigquery/samples/snippets/load_data_from_file.py +++ b/packages/google-cloud-bigquery/samples/snippets/load_data_from_file.py @@ -26,7 +26,6 @@ """ import argparse -import time from google.cloud import bigquery @@ -45,22 +44,12 @@ def load_data_from_file(dataset_name, table_name, source_file_name): job = table.upload_from_file( source_file, source_format='text/csv') - wait_for_job(job) + job.result() # Wait for job to complete print('Loaded {} rows into {}:{}.'.format( job.output_rows, dataset_name, table_name)) -def wait_for_job(job): - while True: - job.reload() - if job.state == 'DONE': - if job.error_result: - raise RuntimeError(job.errors) - return - time.sleep(1) - - if __name__ == '__main__': parser = argparse.ArgumentParser( description=__doc__, diff --git a/packages/google-cloud-bigquery/samples/snippets/load_data_from_gcs.py b/packages/google-cloud-bigquery/samples/snippets/load_data_from_gcs.py index 21d96169ffb2..b0db3a01139d 100644 --- a/packages/google-cloud-bigquery/samples/snippets/load_data_from_gcs.py +++ b/packages/google-cloud-bigquery/samples/snippets/load_data_from_gcs.py @@ -26,7 +26,6 @@ """ import argparse -import time import uuid from google.cloud import bigquery @@ -42,23 +41,12 @@ def load_data_from_gcs(dataset_name, table_name, source): job_name, table, source) job.begin() - - wait_for_job(job) + job.result() # Wait for job to complete print('Loaded {} rows into {}:{}.'.format( job.output_rows, dataset_name, table_name)) -def wait_for_job(job): - while True: - job.reload() - if job.state == 'DONE': - if job.error_result: - raise RuntimeError(job.errors) - return - time.sleep(1) - - if __name__ == '__main__': parser = argparse.ArgumentParser( description=__doc__, diff --git a/packages/google-cloud-bigquery/samples/snippets/query.py b/packages/google-cloud-bigquery/samples/snippets/query.py new file mode 100755 index 000000000000..f01f912cce67 --- /dev/null +++ b/packages/google-cloud-bigquery/samples/snippets/query.py @@ -0,0 +1,81 @@ +#!/usr/bin/env python + +# Copyright 2016 Google Inc. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Command-line application to perform queries in BigQuery. + +For more information, see the README.rst. + +Example invocation: + $ python query.py '#standardSQL + SELECT corpus + FROM `publicdata.samples.shakespeare` + GROUP BY corpus + ORDER BY corpus' +""" + +import argparse +import uuid + +from google.cloud import bigquery + + +def query(query): + client = bigquery.Client() + query_job = client.run_async_query(str(uuid.uuid4()), query) + + query_job.begin() + query_job.result() # Wait for job to complete. + + # Print the results. + destination_table = query_job.destination + destination_table.reload() + for row in destination_table.fetch_data(): + print(row) + + +def query_standard_sql(query): + client = bigquery.Client() + query_job = client.run_async_query(str(uuid.uuid4()), query) + # Set use_legacy_sql to False to use standard SQL syntax. See: + # https://cloud.google.com/bigquery/docs/reference/standard-sql/enabling-standard-sql + query_job.use_legacy_sql = False + + query_job.begin() + query_job.result() # Wait for job to complete. + + # Print the results. + destination_table = query_job.destination + destination_table.reload() + for row in destination_table.fetch_data(): + print(row) + + +if __name__ == '__main__': + parser = argparse.ArgumentParser( + description=__doc__, + formatter_class=argparse.RawDescriptionHelpFormatter) + parser.add_argument('query', help='BigQuery SQL Query.') + parser.add_argument( + '--use_standard_sql', + action='store_true', + help='Use standard SQL syntax.') + + args = parser.parse_args() + + if args.use_standard_sql: + query_standard_sql(args.query) + else: + query(args.query) diff --git a/packages/google-cloud-bigquery/samples/snippets/query_params.py b/packages/google-cloud-bigquery/samples/snippets/query_params.py index 96dbb59b9fc5..435af29d3b56 100644 --- a/packages/google-cloud-bigquery/samples/snippets/query_params.py +++ b/packages/google-cloud-bigquery/samples/snippets/query_params.py @@ -25,30 +25,12 @@ import argparse import datetime -import time import uuid from google.cloud import bigquery import pytz -def wait_for_job(job): - while True: - job.reload() # Refreshes the state via a GET request. - if job.state == 'DONE': - if job.error_result: - raise RuntimeError(job.errors) - return - time.sleep(1) - - -def print_results(query_results): - """Print the rows in the query's results.""" - rows = query_results.fetch_data(max_results=10) - for row in rows: - print(row) - - def query_positional_params(corpus, min_word_count): client = bigquery.Client() query = """ @@ -73,10 +55,14 @@ def query_positional_params(corpus, min_word_count): # See: https://cloud.google.com/bigquery/sql-reference/ query_job.use_legacy_sql = False - # Start the query and wait for the job to complete. query_job.begin() - wait_for_job(query_job) - print_results(query_job.results()) + query_job.result() # Wait for job to complete + + # Print the results. + destination_table = query_job.destination + destination_table.reload() + for row in destination_table.fetch_data(): + print(row) def query_named_params(corpus, min_word_count): @@ -97,10 +83,14 @@ def query_named_params(corpus, min_word_count): 'min_word_count', 'INT64', min_word_count))) query_job.use_legacy_sql = False - # Start the query and wait for the job to complete. query_job.begin() - wait_for_job(query_job) - print_results(query_job.results()) + query_job.result() # Wait for job to complete + + # Print the results. + destination_table = query_job.destination + destination_table.reload() + for row in destination_table.fetch_data(): + print(row) def query_array_params(gender, states): @@ -122,10 +112,14 @@ def query_array_params(gender, states): bigquery.ArrayQueryParameter('states', 'STRING', states))) query_job.use_legacy_sql = False - # Start the query and wait for the job to complete. query_job.begin() - wait_for_job(query_job) - print_results(query_job.results()) + query_job.result() # Wait for job to complete + + # Print the results. + destination_table = query_job.destination + destination_table.reload() + for row in destination_table.fetch_data(): + print(row) def query_timestamp_params(year, month, day, hour, minute): @@ -142,10 +136,14 @@ def query_timestamp_params(year, month, day, hour, minute): year, month, day, hour, minute, tzinfo=pytz.UTC))]) query_job.use_legacy_sql = False - # Start the query and wait for the job to complete. query_job.begin() - wait_for_job(query_job) - print_results(query_job.results()) + query_job.result() # Wait for job to complete + + # Print the results. + destination_table = query_job.destination + destination_table.reload() + for row in destination_table.fetch_data(): + print(row) def query_struct_params(x, y): @@ -161,10 +159,14 @@ def query_struct_params(x, y): bigquery.ScalarQueryParameter('y', 'STRING', y))]) query_job.use_legacy_sql = False - # Start the query and wait for the job to complete. query_job.begin() - wait_for_job(query_job) - print_results(query_job.results()) + query_job.result() # Wait for job to complete + + # Print the results. + destination_table = query_job.destination + destination_table.reload() + for row in destination_table.fetch_data(): + print(row) if __name__ == '__main__': diff --git a/packages/google-cloud-bigquery/samples/snippets/async_query_test.py b/packages/google-cloud-bigquery/samples/snippets/query_test.py similarity index 53% rename from packages/google-cloud-bigquery/samples/snippets/async_query_test.py rename to packages/google-cloud-bigquery/samples/snippets/query_test.py index 10213e215872..fa698e146a84 100644 --- a/packages/google-cloud-bigquery/samples/snippets/async_query_test.py +++ b/packages/google-cloud-bigquery/samples/snippets/query_test.py @@ -12,16 +12,34 @@ # See the License for the specific language governing permissions and # limitations under the License. -from async_query import async_query +import query -def test_async_query(capsys): +def test_query(capsys): # Query only outputs the first 10 rows, sort results to avoid randomness - query = ( - 'SELECT corpus FROM `publicdata.samples.shakespeare` ' - 'GROUP BY corpus ORDER BY corpus') + query_string = '''#standardSQL + SELECT corpus + FROM `publicdata.samples.shakespeare` + GROUP BY corpus + ORDER BY corpus + LIMIT 10;''' - async_query(query) + query.query(query_string) + + out, _ = capsys.readouterr() + + assert 'antonyandcleopatra' in out + + +def test_query_standard_sql(capsys): + # Query only outputs the first 10 rows, sort results to avoid randomness + query_string = '''SELECT corpus + FROM `publicdata.samples.shakespeare` + GROUP BY corpus + ORDER BY corpus + LIMIT 10;''' + + query.query_standard_sql(query_string) out, _ = capsys.readouterr() diff --git a/packages/google-cloud-bigquery/samples/snippets/snippets.py b/packages/google-cloud-bigquery/samples/snippets/snippets.py index f62c074a70fd..ef6af0abead8 100644 --- a/packages/google-cloud-bigquery/samples/snippets/snippets.py +++ b/packages/google-cloud-bigquery/samples/snippets/snippets.py @@ -25,7 +25,6 @@ """ import argparse -import time import uuid from google.cloud import bigquery @@ -165,26 +164,13 @@ def copy_table(dataset_name, table_name, new_table_name, project=None): job.create_disposition = ( google.cloud.bigquery.job.CreateDisposition.CREATE_IF_NEEDED) - # Start the job. - job.begin() - - # Wait for the the job to finish. + job.begin() # Start the job. print('Waiting for job to finish...') - wait_for_job(job) + job.result() print('Table {} copied to {}.'.format(table_name, new_table_name)) -def wait_for_job(job): - while True: - job.reload() # Refreshes the state via a GET request. - if job.state == 'DONE': - if job.error_result: - raise RuntimeError(job.errors) - return - time.sleep(1) - - def delete_table(dataset_name, table_name, project=None): """Deletes a table in a given dataset. diff --git a/packages/google-cloud-bigquery/samples/snippets/sync_query.py b/packages/google-cloud-bigquery/samples/snippets/sync_query.py deleted file mode 100755 index 34d3fbc8a2bf..000000000000 --- a/packages/google-cloud-bigquery/samples/snippets/sync_query.py +++ /dev/null @@ -1,57 +0,0 @@ -#!/usr/bin/env python - -# Copyright 2016 Google Inc. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -"""Command-line application to perform synchronous queries in BigQuery. - -For more information, see the README.rst. - -Example invocation: - $ python sync_query.py \\ - 'SELECT corpus FROM `publicdata.samples.shakespeare` GROUP BY corpus' -""" - -import argparse - -# [START sync_query] -from google.cloud import bigquery - - -def sync_query(query): - client = bigquery.Client() - query_results = client.run_sync_query(query) - - # Use standard SQL syntax for queries. - # See: https://cloud.google.com/bigquery/sql-reference/ - query_results.use_legacy_sql = False - - query_results.run() - - rows = query_results.fetch_data(max_results=10) - - for row in rows: - print(row) -# [END sync_query] - - -if __name__ == '__main__': - parser = argparse.ArgumentParser( - description=__doc__, - formatter_class=argparse.RawDescriptionHelpFormatter) - parser.add_argument('query', help='BigQuery SQL Query.') - - args = parser.parse_args() - - sync_query(args.query) diff --git a/packages/google-cloud-bigquery/samples/snippets/sync_query_test.py b/packages/google-cloud-bigquery/samples/snippets/sync_query_test.py deleted file mode 100644 index 566994477b5c..000000000000 --- a/packages/google-cloud-bigquery/samples/snippets/sync_query_test.py +++ /dev/null @@ -1,28 +0,0 @@ -# Copyright 2016 Google Inc. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from sync_query import sync_query - - -def test_sync_query(capsys): - # Query only outputs the first 10 rows, sort results to avoid randomness - query = ( - 'SELECT corpus FROM `publicdata.samples.shakespeare` ' - 'GROUP BY corpus ORDER BY corpus') - - sync_query(query) - - out, _ = capsys.readouterr() - - assert 'antonyandcleopatra' in out diff --git a/packages/google-cloud-bigquery/samples/snippets/user_credentials.py b/packages/google-cloud-bigquery/samples/snippets/user_credentials.py index 017c87ffd453..ca585c0a14e4 100644 --- a/packages/google-cloud-bigquery/samples/snippets/user_credentials.py +++ b/packages/google-cloud-bigquery/samples/snippets/user_credentials.py @@ -21,35 +21,23 @@ """ import argparse -import time import uuid from google.cloud import bigquery from google_auth_oauthlib import flow -def wait_for_job(job): - while True: - job.reload() # Refreshes the state via a GET request. - if job.state == 'DONE': - if job.error_result: - raise RuntimeError(job.errors) - return - time.sleep(1) - - def run_query(credentials, project, query): client = bigquery.Client(project=project, credentials=credentials) query_job = client.run_async_query(str(uuid.uuid4()), query) - query_job.use_legacy_sql = False - query_job.begin() - wait_for_job(query_job) - - query_results = query_job.results() - rows = query_results.fetch_data(max_results=10) + query_job.begin() + query_job.result() # Wait for the job to complete. - for row in rows: + # Print the results. + destination_table = query_job.destination + destination_table.reload() + for row in destination_table.fetch_data(): print(row) From 807f6a781f716b981e66ea21486d5d7236cda6d0 Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Tue, 8 Aug 2017 12:11:16 -0700 Subject: [PATCH 0839/2016] BQ: Use futures API for quickstart. --- .../samples/snippets/simple_app.py | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/packages/google-cloud-bigquery/samples/snippets/simple_app.py b/packages/google-cloud-bigquery/samples/snippets/simple_app.py index 6db8568398d6..9bca432ef4ad 100644 --- a/packages/google-cloud-bigquery/samples/snippets/simple_app.py +++ b/packages/google-cloud-bigquery/samples/snippets/simple_app.py @@ -17,6 +17,8 @@ """Simple application that performs a query with BigQuery.""" # [START all] # [START create_client] +import uuid + from google.cloud import bigquery @@ -24,8 +26,7 @@ def query_shakespeare(): client = bigquery.Client() # [END create_client] # [START run_query] - # See: https://cloud.google.com/bigquery/sql-reference/ - query_results = client.run_sync_query(""" + query_job = client.run_async_query(str(uuid.uuid4()), """ #standardSQL SELECT corpus AS title, COUNT(*) AS unique_words FROM `publicdata.samples.shakespeare` @@ -33,13 +34,14 @@ def query_shakespeare(): ORDER BY unique_words DESC LIMIT 10""") - query_results.run() + query_job.begin() + query_job.result() # Wait for job to complete. # [END run_query] # [START print_results] - rows = query_results.fetch_data() - - for row in rows: + destination_table = query_job.destination + destination_table.reload() + for row in destination_table.fetch_data(): print(row) # [END print_results] From 3c4e6c5e2e612c4177c14c3dcd56dc6f408aefcd Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Mon, 21 Aug 2017 15:24:48 -0700 Subject: [PATCH 0840/2016] BigQuery : max_results changes page size not full list size Also, running a query is not "preferred" if you do want all the data. If you do run a query, you end up reading data from the destination table anyway. --- .../google-cloud-bigquery/samples/snippets/snippets.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/packages/google-cloud-bigquery/samples/snippets/snippets.py b/packages/google-cloud-bigquery/samples/snippets/snippets.py index ef6af0abead8..bb6b1c01f712 100644 --- a/packages/google-cloud-bigquery/samples/snippets/snippets.py +++ b/packages/google-cloud-bigquery/samples/snippets/snippets.py @@ -25,6 +25,7 @@ """ import argparse +import itertools import uuid from google.cloud import bigquery @@ -124,10 +125,9 @@ def list_rows(dataset_name, table_name, project=None): # Reload the table so that the schema is available. table.reload() - # Load at most 25 results. You can change the max_results argument to load - # more rows from BigQuery, but note that this can take some time. It's - # preferred to use a query. - rows = list(table.fetch_data(max_results=25)) + # Load at most 25 results per page. You can change the max_results + # argument to load more rows from BigQuery at a time. + rows = list(itertools.islice(table.fetch_data(max_results=25), 25)) # Use format to create a simple table. format_string = '{!s:<16} ' * len(table.schema) From 4ee258721bfa367503c59e44a5815c0cabdfe6ad Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Tue, 22 Aug 2017 09:50:12 -0700 Subject: [PATCH 0841/2016] Remove max_results argument from table.fetch_data() call. The sample uses islice instead, which demonstrates that fetch_data returns an iterable. --- packages/google-cloud-bigquery/samples/snippets/snippets.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/packages/google-cloud-bigquery/samples/snippets/snippets.py b/packages/google-cloud-bigquery/samples/snippets/snippets.py index bb6b1c01f712..33494cfaa31c 100644 --- a/packages/google-cloud-bigquery/samples/snippets/snippets.py +++ b/packages/google-cloud-bigquery/samples/snippets/snippets.py @@ -125,9 +125,8 @@ def list_rows(dataset_name, table_name, project=None): # Reload the table so that the schema is available. table.reload() - # Load at most 25 results per page. You can change the max_results - # argument to load more rows from BigQuery at a time. - rows = list(itertools.islice(table.fetch_data(max_results=25), 25)) + # Load at most 25 results. + rows = list(itertools.islice(table.fetch_data(), 25)) # Use format to create a simple table. format_string = '{!s:<16} ' * len(table.schema) From 564c151c74f32f1b84cdfb91cfe024168ac54f5e Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Tue, 22 Aug 2017 10:08:26 -0700 Subject: [PATCH 0842/2016] Remove unnecessary list() call --- packages/google-cloud-bigquery/samples/snippets/snippets.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/samples/snippets/snippets.py b/packages/google-cloud-bigquery/samples/snippets/snippets.py index 33494cfaa31c..401d18bbdfb4 100644 --- a/packages/google-cloud-bigquery/samples/snippets/snippets.py +++ b/packages/google-cloud-bigquery/samples/snippets/snippets.py @@ -126,7 +126,7 @@ def list_rows(dataset_name, table_name, project=None): table.reload() # Load at most 25 results. - rows = list(itertools.islice(table.fetch_data(), 25)) + rows = itertools.islice(table.fetch_data(), 25) # Use format to create a simple table. format_string = '{!s:<16} ' * len(table.schema) From 69e65aafc3203f52f2b9b72d9bf19630b2cebf1d Mon Sep 17 00:00:00 2001 From: DPE bot Date: Tue, 29 Aug 2017 16:53:02 -0700 Subject: [PATCH 0843/2016] Auto-update dependencies. [(#1093)](https://github.com/GoogleCloudPlatform/python-docs-samples/issues/1093) * Auto-update dependencies. * Fix storage notification poll sample Change-Id: I6afbc79d15e050531555e4c8e51066996717a0f3 * Fix spanner samples Change-Id: I40069222c60d57e8f3d3878167591af9130895cb * Drop coverage because it's not useful Change-Id: Iae399a7083d7866c3c7b9162d0de244fbff8b522 * Try again to fix flaky logging test Change-Id: I6225c074701970c17c426677ef1935bb6d7e36b4 --- .../google-cloud-bigquery/samples/snippets/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/samples/snippets/requirements.txt b/packages/google-cloud-bigquery/samples/snippets/requirements.txt index fe1ea7ea253e..e72d04ef4e70 100644 --- a/packages/google-cloud-bigquery/samples/snippets/requirements.txt +++ b/packages/google-cloud-bigquery/samples/snippets/requirements.txt @@ -1,3 +1,3 @@ -google-cloud-bigquery==0.26.0 +google-cloud-bigquery==0.27.0 google-auth-oauthlib==0.1.0 pytz==2017.2 From 81e7ee68f5fe47ad7bcf91eb6237d7721a61db12 Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Tue, 19 Sep 2017 09:33:49 -0700 Subject: [PATCH 0844/2016] BigQuery : add sample for writing query results to a destination table. [(#1101)](https://github.com/GoogleCloudPlatform/python-docs-samples/issues/1101) See: https://cloud.google.com/bigquery/docs/writing-results --- .../samples/snippets/query.py | 33 +++++++++++++++++++ .../samples/snippets/query_test.py | 20 +++++++++++ 2 files changed, 53 insertions(+) diff --git a/packages/google-cloud-bigquery/samples/snippets/query.py b/packages/google-cloud-bigquery/samples/snippets/query.py index f01f912cce67..93b13b84873a 100755 --- a/packages/google-cloud-bigquery/samples/snippets/query.py +++ b/packages/google-cloud-bigquery/samples/snippets/query.py @@ -63,6 +63,30 @@ def query_standard_sql(query): print(row) +def query_destination_table(query, dest_dataset_id, dest_table_id): + client = bigquery.Client() + query_job = client.run_async_query(str(uuid.uuid4()), query) + + # Allow for query results larger than the maximum response size. + query_job.allow_large_results = True + + # When large results are allowed, a destination table must be set. + dest_dataset = client.dataset(dest_dataset_id) + dest_table = dest_dataset.table(dest_table_id) + query_job.destination = dest_table + + # Allow the results table to be overwritten. + query_job.write_disposition = 'WRITE_TRUNCATE' + + query_job.begin() + query_job.result() # Wait for job to complete. + + # Verify that the results were written to the destination table. + dest_table.reload() # Get the table metadata, such as the schema. + for row in dest_table.fetch_data(): + print(row) + + if __name__ == '__main__': parser = argparse.ArgumentParser( description=__doc__, @@ -72,10 +96,19 @@ def query_standard_sql(query): '--use_standard_sql', action='store_true', help='Use standard SQL syntax.') + parser.add_argument( + '--destination_table', + type=str, + help=( + 'Destination table to use for results. ' + 'Example: my_dataset.my_table')) args = parser.parse_args() if args.use_standard_sql: query_standard_sql(args.query) + elif args.destination_table: + dataset, table = args.destination_table.split('.') + query_destination_table(args.query, dataset, table) else: query(args.query) diff --git a/packages/google-cloud-bigquery/samples/snippets/query_test.py b/packages/google-cloud-bigquery/samples/snippets/query_test.py index fa698e146a84..9d6c912b4e5f 100644 --- a/packages/google-cloud-bigquery/samples/snippets/query_test.py +++ b/packages/google-cloud-bigquery/samples/snippets/query_test.py @@ -15,6 +15,10 @@ import query +DATASET_ID = 'test_dataset' +TABLE_ID = 'test_destination_table' + + def test_query(capsys): # Query only outputs the first 10 rows, sort results to avoid randomness query_string = '''#standardSQL @@ -44,3 +48,19 @@ def test_query_standard_sql(capsys): out, _ = capsys.readouterr() assert 'antonyandcleopatra' in out + + +def test_query_destination_table(capsys): + # Query only outputs the first 10 rows, sort results to avoid randomness + query_string = '''#standardSQL + SELECT corpus + FROM `publicdata.samples.shakespeare` + GROUP BY corpus + ORDER BY corpus + LIMIT 10;''' + + query.query_destination_table(query_string, DATASET_ID, TABLE_ID) + + out, _ = capsys.readouterr() + + assert 'antonyandcleopatra' in out From 7bebb1db9cbfc9f6ecbfee1875c721e07ab1c88e Mon Sep 17 00:00:00 2001 From: DPE bot Date: Wed, 20 Sep 2017 09:09:43 -0700 Subject: [PATCH 0845/2016] Auto-update dependencies. [(#1128)](https://github.com/GoogleCloudPlatform/python-docs-samples/issues/1128) --- .../google-cloud-bigquery/samples/snippets/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/samples/snippets/requirements.txt b/packages/google-cloud-bigquery/samples/snippets/requirements.txt index e72d04ef4e70..e90122cb65f1 100644 --- a/packages/google-cloud-bigquery/samples/snippets/requirements.txt +++ b/packages/google-cloud-bigquery/samples/snippets/requirements.txt @@ -1,3 +1,3 @@ google-cloud-bigquery==0.27.0 -google-auth-oauthlib==0.1.0 +google-auth-oauthlib==0.1.1 pytz==2017.2 From b2aa3249a1e070522bcb64ba51eaad1da7cd78ad Mon Sep 17 00:00:00 2001 From: Alix Hamilton Date: Tue, 31 Oct 2017 09:58:29 -0700 Subject: [PATCH 0846/2016] BigQuery: Updates samples for BigQuery Beta 2 (do not merge until release) [(#1178)](https://github.com/GoogleCloudPlatform/python-docs-samples/issues/1178) * updates client.extract_table() sample * updates client.load_table_from_file() sample * updates client.load_table_from_uri() sample * updates parameterized query samples * updates query samples * updates dataset quickstart * fixes typo in sample * updates simple app sample * updates snippets * updates stream data sample * updates user credentials sample * removes job_id generation * Update BQ lib to 0.28.0. Use max_results in list_rows. * updates public dataset queries to use 'bigquery-public-data' instead of outdated 'publicdata' * updates readme and help sections --- .../samples/snippets/README.rst | 111 ++++++++----- .../samples/snippets/README.rst.in | 10 +- .../samples/snippets/export_data_to_gcs.py | 28 ++-- .../samples/snippets/load_data_from_file.py | 29 ++-- .../samples/snippets/load_data_from_gcs.py | 26 ++-- .../samples/snippets/query.py | 47 +++--- .../samples/snippets/query_params.py | 146 ++++++++---------- .../samples/snippets/query_test.py | 6 +- .../samples/snippets/quickstart.py | 11 +- .../samples/snippets/quickstart_test.py | 19 ++- .../samples/snippets/requirements.txt | 2 +- .../samples/snippets/simple_app.py | 15 +- .../samples/snippets/snippets.py | 129 +++++++--------- .../samples/snippets/snippets_test.py | 72 +++++---- .../samples/snippets/stream_data.py | 24 +-- .../samples/snippets/user_credentials.py | 10 +- 16 files changed, 347 insertions(+), 338 deletions(-) diff --git a/packages/google-cloud-bigquery/samples/snippets/README.rst b/packages/google-cloud-bigquery/samples/snippets/README.rst index a68ff1f7624d..1a3b889a302d 100644 --- a/packages/google-cloud-bigquery/samples/snippets/README.rst +++ b/packages/google-cloud-bigquery/samples/snippets/README.rst @@ -70,6 +70,18 @@ Install Dependencies Samples ------------------------------------------------------------------------------- +Simple Application ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + + + +To run this sample: + +.. code-block:: bash + + $ python simple_app.py + + Quickstart +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ @@ -82,7 +94,7 @@ To run this sample: $ python quickstart.py -Sync query +Query +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ @@ -91,26 +103,35 @@ To run this sample: .. code-block:: bash - $ python sync_query.py + $ python query.py - usage: sync_query.py [-h] query + usage: query.py [-h] [--use_standard_sql] + [--destination_table DESTINATION_TABLE] + query - Command-line application to perform synchronous queries in BigQuery. + Command-line application to perform queries in BigQuery. For more information, see the README.rst. Example invocation: - $ python sync_query.py \ - 'SELECT corpus FROM `publicdata.samples.shakespeare` GROUP BY corpus' + $ python query.py '#standardSQL + SELECT corpus + FROM `bigquery-public-data.samples.shakespeare` + GROUP BY corpus + ORDER BY corpus' positional arguments: - query BigQuery SQL Query. + query BigQuery SQL Query. optional arguments: - -h, --help show this help message and exit + -h, --help show this help message and exit + --use_standard_sql Use standard SQL syntax. + --destination_table DESTINATION_TABLE + Destination table to use for results. Example: + my_dataset.my_table -Async query +Parameterized Query +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ @@ -119,23 +140,29 @@ To run this sample: .. code-block:: bash - $ python async_query.py + $ python query_params.py - usage: async_query.py [-h] query + usage: query_params.py [-h] {named,positional,array,timestamp,struct} ... - Command-line application to perform asynchronous queries in BigQuery. + Command-line app to perform queries with parameters in BigQuery. For more information, see the README.rst. Example invocation: - $ python async_query.py \ - 'SELECT corpus FROM `publicdata.samples.shakespeare` GROUP BY corpus' + $ python query_params.py named 'romeoandjuliet' 100 + $ python query_params.py positional 'romeoandjuliet' 100 positional arguments: - query BigQuery SQL Query. + {named,positional,array,timestamp,struct} + samples + named Run a query with named parameters. + positional Run a query with positional parameters. + array Run a query with an array parameter. + timestamp Run a query with a timestamp parameter. + struct Run a query with a struct parameter. optional arguments: - -h, --help show this help message and exit + -h, --help show this help message and exit Snippets @@ -202,20 +229,21 @@ To run this sample: $ python load_data_from_file.py - usage: load_data_from_file.py [-h] dataset_name table_name source_file_name + usage: load_data_from_file.py [-h] dataset_id table_id source_file_name Loads data into BigQuery from a local file. For more information, see the README.rst. Example invocation: - $ python load_data_from_file.py example_dataset example_table example-data.csv + $ python load_data_from_file.py example_dataset example_table \ + example-data.csv The dataset and table should already exist. positional arguments: - dataset_name - table_name + dataset_id + table_id source_file_name Path to a .csv file to upload. optional arguments: @@ -233,25 +261,26 @@ To run this sample: $ python load_data_from_gcs.py - usage: load_data_from_gcs.py [-h] dataset_name table_name source + usage: load_data_from_gcs.py [-h] dataset_id table_id source Loads data into BigQuery from an object in Google Cloud Storage. For more information, see the README.rst. Example invocation: - $ python load_data_from_gcs.py example_dataset example_table gs://example-bucket/example-data.csv + $ python load_data_from_gcs.py example_dataset example_table \ + gs://example-bucket/example-data.csv The dataset and table should already exist. positional arguments: - dataset_name - table_name - source The Google Cloud Storage object to load. Must be in the format - gs://bucket_name/object_name + dataset_id + table_id + source The Google Cloud Storage object to load. Must be in the format + gs://bucket_name/object_name optional arguments: - -h, --help show this help message and exit + -h, --help show this help message and exit Load streaming data @@ -265,24 +294,25 @@ To run this sample: $ python stream_data.py - usage: stream_data.py [-h] dataset_name table_name json_data + usage: stream_data.py [-h] dataset_id table_id json_data Loads a single row of data directly into BigQuery. For more information, see the README.rst. Example invocation: - $ python stream_data.py example_dataset example_table '["Gandalf", 2000]' + $ python stream_data.py example_dataset example_table \ + '["Gandalf", 2000]' The dataset and table should already exist. positional arguments: - dataset_name - table_name - json_data The row to load into BigQuery as an array in JSON format. + dataset_id + table_id + json_data The row to load into BigQuery as an array in JSON format. optional arguments: - -h, --help show this help message and exit + -h, --help show this help message and exit Export data to Cloud Storage @@ -296,25 +326,26 @@ To run this sample: $ python export_data_to_gcs.py - usage: export_data_to_gcs.py [-h] dataset_name table_name destination + usage: export_data_to_gcs.py [-h] dataset_id table_id destination Exports data from BigQuery to an object in Google Cloud Storage. For more information, see the README.rst. Example invocation: - $ python export_data_to_gcs.py example_dataset example_table gs://example-bucket/example-data.csv + $ python export_data_to_gcs.py example_dataset example_table \ + gs://example-bucket/example-data.csv The dataset and table should already exist. positional arguments: - dataset_name - table_name - destination The desintation Google Cloud Storage object.Must be in the - format gs://bucket_name/object_name + dataset_id + table_id + destination The destination Google Cloud Storage object. Must be in the + format gs://bucket_name/object_name optional arguments: - -h, --help show this help message and exit + -h, --help show this help message and exit diff --git a/packages/google-cloud-bigquery/samples/snippets/README.rst.in b/packages/google-cloud-bigquery/samples/snippets/README.rst.in index 49143f062e6c..61c66ab43c1d 100644 --- a/packages/google-cloud-bigquery/samples/snippets/README.rst.in +++ b/packages/google-cloud-bigquery/samples/snippets/README.rst.in @@ -16,13 +16,15 @@ setup: - install_deps samples: +- name: Simple Application + file: simple_app.py - name: Quickstart file: quickstart.py -- name: Sync query - file: sync_query.py +- name: Query + file: query.py show_help: true -- name: Async query - file: async_query.py +- name: Parameterized Query + file: query_params.py show_help: true - name: Snippets file: snippets.py diff --git a/packages/google-cloud-bigquery/samples/snippets/export_data_to_gcs.py b/packages/google-cloud-bigquery/samples/snippets/export_data_to_gcs.py index 41b011ca06d8..5993ef0f6a20 100644 --- a/packages/google-cloud-bigquery/samples/snippets/export_data_to_gcs.py +++ b/packages/google-cloud-bigquery/samples/snippets/export_data_to_gcs.py @@ -19,47 +19,43 @@ For more information, see the README.rst. Example invocation: - $ python export_data_to_gcs.py example_dataset example_table \ + $ python export_data_to_gcs.py example_dataset example_table \\ gs://example-bucket/example-data.csv The dataset and table should already exist. """ import argparse -import uuid from google.cloud import bigquery -def export_data_to_gcs(dataset_name, table_name, destination): +def export_data_to_gcs(dataset_id, table_id, destination): bigquery_client = bigquery.Client() - dataset = bigquery_client.dataset(dataset_name) - table = dataset.table(table_name) - job_name = str(uuid.uuid4()) + dataset_ref = bigquery_client.dataset(dataset_id) + table_ref = dataset_ref.table(table_id) - job = bigquery_client.extract_table_to_storage( - job_name, table, destination) + job = bigquery_client.extract_table(table_ref, destination) - job.begin() - job.result() # Wait for job to complete + job.result() # Waits for job to complete print('Exported {}:{} to {}'.format( - dataset_name, table_name, destination)) + dataset_id, table_id, destination)) if __name__ == '__main__': parser = argparse.ArgumentParser( description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter) - parser.add_argument('dataset_name') - parser.add_argument('table_name') + parser.add_argument('dataset_id') + parser.add_argument('table_id') parser.add_argument( - 'destination', help='The desintation Google Cloud Storage object.' + 'destination', help='The destination Google Cloud Storage object. ' 'Must be in the format gs://bucket_name/object_name') args = parser.parse_args() export_data_to_gcs( - args.dataset_name, - args.table_name, + args.dataset_id, + args.table_id, args.destination) diff --git a/packages/google-cloud-bigquery/samples/snippets/load_data_from_file.py b/packages/google-cloud-bigquery/samples/snippets/load_data_from_file.py index 9e0bf9f4d130..e311daa1e624 100644 --- a/packages/google-cloud-bigquery/samples/snippets/load_data_from_file.py +++ b/packages/google-cloud-bigquery/samples/snippets/load_data_from_file.py @@ -19,7 +19,7 @@ For more information, see the README.rst. Example invocation: - $ python load_data_from_file.py example_dataset example_table \ + $ python load_data_from_file.py example_dataset example_table \\ example-data.csv The dataset and table should already exist. @@ -30,38 +30,37 @@ from google.cloud import bigquery -def load_data_from_file(dataset_name, table_name, source_file_name): +def load_data_from_file(dataset_id, table_id, source_file_name): bigquery_client = bigquery.Client() - dataset = bigquery_client.dataset(dataset_name) - table = dataset.table(table_name) - - # Reload the table to get the schema. - table.reload() + dataset_ref = bigquery_client.dataset(dataset_id) + table_ref = dataset_ref.table(table_id) with open(source_file_name, 'rb') as source_file: # This example uses CSV, but you can use other formats. # See https://cloud.google.com/bigquery/loading-data - job = table.upload_from_file( - source_file, source_format='text/csv') + job_config = bigquery.LoadJobConfig() + job_config.source_format = 'text/csv' + job = bigquery_client.load_table_from_file( + source_file, table_ref, job_config=job_config) - job.result() # Wait for job to complete + job.result() # Waits for job to complete print('Loaded {} rows into {}:{}.'.format( - job.output_rows, dataset_name, table_name)) + job.output_rows, dataset_id, table_id)) if __name__ == '__main__': parser = argparse.ArgumentParser( description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter) - parser.add_argument('dataset_name') - parser.add_argument('table_name') + parser.add_argument('dataset_id') + parser.add_argument('table_id') parser.add_argument( 'source_file_name', help='Path to a .csv file to upload.') args = parser.parse_args() load_data_from_file( - args.dataset_name, - args.table_name, + args.dataset_id, + args.table_id, args.source_file_name) diff --git a/packages/google-cloud-bigquery/samples/snippets/load_data_from_gcs.py b/packages/google-cloud-bigquery/samples/snippets/load_data_from_gcs.py index b0db3a01139d..285e6d1b22a2 100644 --- a/packages/google-cloud-bigquery/samples/snippets/load_data_from_gcs.py +++ b/packages/google-cloud-bigquery/samples/snippets/load_data_from_gcs.py @@ -19,40 +19,36 @@ For more information, see the README.rst. Example invocation: - $ python load_data_from_gcs.py example_dataset example_table \ + $ python load_data_from_gcs.py example_dataset example_table \\ gs://example-bucket/example-data.csv The dataset and table should already exist. """ import argparse -import uuid from google.cloud import bigquery -def load_data_from_gcs(dataset_name, table_name, source): +def load_data_from_gcs(dataset_id, table_id, source): bigquery_client = bigquery.Client() - dataset = bigquery_client.dataset(dataset_name) - table = dataset.table(table_name) - job_name = str(uuid.uuid4()) + dataset_ref = bigquery_client.dataset(dataset_id) + table_ref = dataset_ref.table(table_id) - job = bigquery_client.load_table_from_storage( - job_name, table, source) + job = bigquery_client.load_table_from_uri(source, table_ref) - job.begin() - job.result() # Wait for job to complete + job.result() # Waits for job to complete print('Loaded {} rows into {}:{}.'.format( - job.output_rows, dataset_name, table_name)) + job.output_rows, dataset_id, table_id)) if __name__ == '__main__': parser = argparse.ArgumentParser( description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter) - parser.add_argument('dataset_name') - parser.add_argument('table_name') + parser.add_argument('dataset_id') + parser.add_argument('table_id') parser.add_argument( 'source', help='The Google Cloud Storage object to load. Must be in ' 'the format gs://bucket_name/object_name') @@ -60,6 +56,6 @@ def load_data_from_gcs(dataset_name, table_name, source): args = parser.parse_args() load_data_from_gcs( - args.dataset_name, - args.table_name, + args.dataset_id, + args.table_id, args.source) diff --git a/packages/google-cloud-bigquery/samples/snippets/query.py b/packages/google-cloud-bigquery/samples/snippets/query.py index 93b13b84873a..19605bbad4d9 100755 --- a/packages/google-cloud-bigquery/samples/snippets/query.py +++ b/packages/google-cloud-bigquery/samples/snippets/query.py @@ -21,69 +21,58 @@ Example invocation: $ python query.py '#standardSQL SELECT corpus - FROM `publicdata.samples.shakespeare` + FROM `bigquery-public-data.samples.shakespeare` GROUP BY corpus ORDER BY corpus' """ import argparse -import uuid from google.cloud import bigquery def query(query): client = bigquery.Client() - query_job = client.run_async_query(str(uuid.uuid4()), query) - - query_job.begin() - query_job.result() # Wait for job to complete. + query_job = client.query(query) # Print the results. - destination_table = query_job.destination - destination_table.reload() - for row in destination_table.fetch_data(): + for row in query_job.result(): # Waits for job to complete. print(row) def query_standard_sql(query): client = bigquery.Client() - query_job = client.run_async_query(str(uuid.uuid4()), query) - # Set use_legacy_sql to False to use standard SQL syntax. See: - # https://cloud.google.com/bigquery/docs/reference/standard-sql/enabling-standard-sql - query_job.use_legacy_sql = False + job_config = bigquery.QueryJobConfig() - query_job.begin() - query_job.result() # Wait for job to complete. + # Set use_legacy_sql to False to use standard SQL syntax. + # Note that queries are treated as standard SQL by default. + job_config.use_legacy_sql = False + query_job = client.query(query, job_config=job_config) # Print the results. - destination_table = query_job.destination - destination_table.reload() - for row in destination_table.fetch_data(): + for row in query_job.result(): # Waits for job to complete. print(row) def query_destination_table(query, dest_dataset_id, dest_table_id): client = bigquery.Client() - query_job = client.run_async_query(str(uuid.uuid4()), query) + job_config = bigquery.QueryJobConfig() # Allow for query results larger than the maximum response size. - query_job.allow_large_results = True + job_config.allow_large_results = True # When large results are allowed, a destination table must be set. - dest_dataset = client.dataset(dest_dataset_id) - dest_table = dest_dataset.table(dest_table_id) - query_job.destination = dest_table + dest_dataset_ref = client.dataset(dest_dataset_id) + dest_table_ref = dest_dataset_ref.table(dest_table_id) + job_config.destination = dest_table_ref # Allow the results table to be overwritten. - query_job.write_disposition = 'WRITE_TRUNCATE' + job_config.write_disposition = 'WRITE_TRUNCATE' - query_job.begin() - query_job.result() # Wait for job to complete. + query_job = client.query(query, job_config=job_config) - # Verify that the results were written to the destination table. - dest_table.reload() # Get the table metadata, such as the schema. - for row in dest_table.fetch_data(): + # Print the results. + for row in query_job.result(): # Waits for job to complete. print(row) diff --git a/packages/google-cloud-bigquery/samples/snippets/query_params.py b/packages/google-cloud-bigquery/samples/snippets/query_params.py index 435af29d3b56..594f51c758d0 100644 --- a/packages/google-cloud-bigquery/samples/snippets/query_params.py +++ b/packages/google-cloud-bigquery/samples/snippets/query_params.py @@ -19,13 +19,12 @@ For more information, see the README.rst. Example invocation: - $ python query_params.py --use-named-params 'romeoandjuliet' 100 - $ python query_params.py --use-positional-params 'romeoandjuliet' 100 + $ python query_params.py named 'romeoandjuliet' 100 + $ python query_params.py positional 'romeoandjuliet' 100 """ import argparse import datetime -import uuid from google.cloud import bigquery import pytz @@ -40,28 +39,23 @@ def query_positional_params(corpus, min_word_count): AND word_count >= ? ORDER BY word_count DESC; """ - query_job = client.run_async_query( - str(uuid.uuid4()), - query, - query_parameters=( - bigquery.ScalarQueryParameter( - # Set the name to None to use positional parameters (? symbol - # in the query). Note that you cannot mix named and positional - # parameters. - None, 'STRING', corpus), - bigquery.ScalarQueryParameter(None, 'INT64', min_word_count))) - - # Only standard SQL syntax supports parameters in queries. - # See: https://cloud.google.com/bigquery/sql-reference/ - query_job.use_legacy_sql = False - - query_job.begin() + # Set the name to None to use positional parameters (? symbol in the + # query). Note that you cannot mix named and positional parameters. + # See: https://cloud.google.com/bigquery/docs/parameterized-queries/ + query_params = [ + bigquery.ScalarQueryParameter(None, 'STRING', corpus), + bigquery.ScalarQueryParameter(None, 'INT64', min_word_count) + ] + job_config = bigquery.QueryJobConfig() + job_config.query_parameters = query_params + query_job = client.query(query, job_config=job_config) + query_job.result() # Wait for job to complete # Print the results. - destination_table = query_job.destination - destination_table.reload() - for row in destination_table.fetch_data(): + destination_table_ref = query_job.destination + table = client.get_table(destination_table_ref) + for row in client.list_rows(table): print(row) @@ -74,22 +68,21 @@ def query_named_params(corpus, min_word_count): AND word_count >= @min_word_count ORDER BY word_count DESC; """ - query_job = client.run_async_query( - str(uuid.uuid4()), - query, - query_parameters=( - bigquery.ScalarQueryParameter('corpus', 'STRING', corpus), - bigquery.ScalarQueryParameter( - 'min_word_count', 'INT64', min_word_count))) - query_job.use_legacy_sql = False - - query_job.begin() + query_params = [ + bigquery.ScalarQueryParameter('corpus', 'STRING', corpus), + bigquery.ScalarQueryParameter( + 'min_word_count', 'INT64', min_word_count) + ] + job_config = bigquery.QueryJobConfig() + job_config.query_parameters = query_params + query_job = client.query(query, job_config=job_config) + query_job.result() # Wait for job to complete # Print the results. - destination_table = query_job.destination - destination_table.reload() - for row in destination_table.fetch_data(): + destination_table_ref = query_job.destination + table = client.get_table(destination_table_ref) + for row in client.list_rows(table): print(row) @@ -104,68 +97,65 @@ def query_array_params(gender, states): ORDER BY count DESC LIMIT 10; """ - query_job = client.run_async_query( - str(uuid.uuid4()), - query, - query_parameters=( - bigquery.ScalarQueryParameter('gender', 'STRING', gender), - bigquery.ArrayQueryParameter('states', 'STRING', states))) - query_job.use_legacy_sql = False - - query_job.begin() + query_params = [ + bigquery.ScalarQueryParameter('gender', 'STRING', gender), + bigquery.ArrayQueryParameter('states', 'STRING', states) + ] + job_config = bigquery.QueryJobConfig() + job_config.query_parameters = query_params + query_job = client.query(query, job_config=job_config) + query_job.result() # Wait for job to complete # Print the results. - destination_table = query_job.destination - destination_table.reload() - for row in destination_table.fetch_data(): + destination_table_ref = query_job.destination + table = client.get_table(destination_table_ref) + for row in client.list_rows(table): print(row) def query_timestamp_params(year, month, day, hour, minute): client = bigquery.Client() query = 'SELECT TIMESTAMP_ADD(@ts_value, INTERVAL 1 HOUR);' - query_job = client.run_async_query( - str(uuid.uuid4()), - query, - query_parameters=[ - bigquery.ScalarQueryParameter( - 'ts_value', - 'TIMESTAMP', - datetime.datetime( - year, month, day, hour, minute, tzinfo=pytz.UTC))]) - query_job.use_legacy_sql = False - - query_job.begin() - query_job.result() # Wait for job to complete + query_params = [ + bigquery.ScalarQueryParameter( + 'ts_value', + 'TIMESTAMP', + datetime.datetime(year, month, day, hour, minute, tzinfo=pytz.UTC)) + ] + job_config = bigquery.QueryJobConfig() + job_config.query_parameters = query_params + query_job = client.query(query, job_config=job_config) + + query_job.result() # Waits for job to complete # Print the results. - destination_table = query_job.destination - destination_table.reload() - for row in destination_table.fetch_data(): + destination_table_ref = query_job.destination + table = client.get_table(destination_table_ref) + for row in client.list_rows(table): print(row) def query_struct_params(x, y): client = bigquery.Client() query = 'SELECT @struct_value AS s;' - query_job = client.run_async_query( - str(uuid.uuid4()), - query, - query_parameters=[ - bigquery.StructQueryParameter( - 'struct_value', - bigquery.ScalarQueryParameter('x', 'INT64', x), - bigquery.ScalarQueryParameter('y', 'STRING', y))]) - query_job.use_legacy_sql = False - - query_job.begin() - query_job.result() # Wait for job to complete + query_params = [ + bigquery.StructQueryParameter( + 'struct_value', + bigquery.ScalarQueryParameter('x', 'INT64', x), + bigquery.ScalarQueryParameter('y', 'STRING', y) + ) + ] + job_config = bigquery.QueryJobConfig() + job_config.query_parameters = query_params + query_job = client.query(query, job_config=job_config) + + query_job.result() # Waits for job to complete # Print the results. - destination_table = query_job.destination - destination_table.reload() - for row in destination_table.fetch_data(): + destination_table_ref = query_job.destination + table = client.get_table(destination_table_ref) + for row in client.list_rows(table): print(row) diff --git a/packages/google-cloud-bigquery/samples/snippets/query_test.py b/packages/google-cloud-bigquery/samples/snippets/query_test.py index 9d6c912b4e5f..3d456cb59679 100644 --- a/packages/google-cloud-bigquery/samples/snippets/query_test.py +++ b/packages/google-cloud-bigquery/samples/snippets/query_test.py @@ -23,7 +23,7 @@ def test_query(capsys): # Query only outputs the first 10 rows, sort results to avoid randomness query_string = '''#standardSQL SELECT corpus - FROM `publicdata.samples.shakespeare` + FROM `bigquery-public-data.samples.shakespeare` GROUP BY corpus ORDER BY corpus LIMIT 10;''' @@ -38,7 +38,7 @@ def test_query(capsys): def test_query_standard_sql(capsys): # Query only outputs the first 10 rows, sort results to avoid randomness query_string = '''SELECT corpus - FROM `publicdata.samples.shakespeare` + FROM `bigquery-public-data.samples.shakespeare` GROUP BY corpus ORDER BY corpus LIMIT 10;''' @@ -54,7 +54,7 @@ def test_query_destination_table(capsys): # Query only outputs the first 10 rows, sort results to avoid randomness query_string = '''#standardSQL SELECT corpus - FROM `publicdata.samples.shakespeare` + FROM `bigquery-public-data.samples.shakespeare` GROUP BY corpus ORDER BY corpus LIMIT 10;''' diff --git a/packages/google-cloud-bigquery/samples/snippets/quickstart.py b/packages/google-cloud-bigquery/samples/snippets/quickstart.py index 2c9923f6eb80..10ae58e84caa 100644 --- a/packages/google-cloud-bigquery/samples/snippets/quickstart.py +++ b/packages/google-cloud-bigquery/samples/snippets/quickstart.py @@ -24,15 +24,16 @@ def run_quickstart(): bigquery_client = bigquery.Client() # The name for the new dataset - dataset_name = 'my_new_dataset' + dataset_id = 'my_new_dataset' - # Prepares the new dataset - dataset = bigquery_client.dataset(dataset_name) + # Prepares a reference to the new dataset + dataset_ref = bigquery_client.dataset(dataset_id) + dataset = bigquery.Dataset(dataset_ref) # Creates the new dataset - dataset.create() + dataset = bigquery_client.create_dataset(dataset) - print('Dataset {} created.'.format(dataset.name)) + print('Dataset {} created.'.format(dataset.dataset_id)) # [END bigquery_quickstart] diff --git a/packages/google-cloud-bigquery/samples/snippets/quickstart_test.py b/packages/google-cloud-bigquery/samples/snippets/quickstart_test.py index f5842960ce6e..02931086a11e 100644 --- a/packages/google-cloud-bigquery/samples/snippets/quickstart_test.py +++ b/packages/google-cloud-bigquery/samples/snippets/quickstart_test.py @@ -13,6 +13,7 @@ # limitations under the License. from google.cloud import bigquery +from google.cloud.exceptions import NotFound import pytest import quickstart @@ -28,15 +29,23 @@ def temporary_dataset(): """Fixture that ensures the test dataset does not exist before or after a test.""" bigquery_client = bigquery.Client() - dataset = bigquery_client.dataset(DATASET_ID) + dataset_ref = bigquery_client.dataset(DATASET_ID) - if dataset.exists(): - dataset.delete() + if dataset_exists(dataset_ref, bigquery_client): + bigquery_client.delete_dataset(dataset_ref) yield - if dataset.exists(): - dataset.delete() + if dataset_exists(dataset_ref, bigquery_client): + bigquery_client.delete_dataset(dataset_ref) + + +def dataset_exists(dataset, client): + try: + client.get_dataset(dataset) + return True + except NotFound: + return False def test_quickstart(capsys, temporary_dataset): diff --git a/packages/google-cloud-bigquery/samples/snippets/requirements.txt b/packages/google-cloud-bigquery/samples/snippets/requirements.txt index e90122cb65f1..0401c4221481 100644 --- a/packages/google-cloud-bigquery/samples/snippets/requirements.txt +++ b/packages/google-cloud-bigquery/samples/snippets/requirements.txt @@ -1,3 +1,3 @@ -google-cloud-bigquery==0.27.0 +google-cloud-bigquery==0.28.0 google-auth-oauthlib==0.1.1 pytz==2017.2 diff --git a/packages/google-cloud-bigquery/samples/snippets/simple_app.py b/packages/google-cloud-bigquery/samples/snippets/simple_app.py index 9bca432ef4ad..5d0d04e666d9 100644 --- a/packages/google-cloud-bigquery/samples/snippets/simple_app.py +++ b/packages/google-cloud-bigquery/samples/snippets/simple_app.py @@ -17,8 +17,6 @@ """Simple application that performs a query with BigQuery.""" # [START all] # [START create_client] -import uuid - from google.cloud import bigquery @@ -26,23 +24,20 @@ def query_shakespeare(): client = bigquery.Client() # [END create_client] # [START run_query] - query_job = client.run_async_query(str(uuid.uuid4()), """ + query_job = client.query(""" #standardSQL SELECT corpus AS title, COUNT(*) AS unique_words - FROM `publicdata.samples.shakespeare` + FROM `bigquery-public-data.samples.shakespeare` GROUP BY title ORDER BY unique_words DESC LIMIT 10""") - query_job.begin() - query_job.result() # Wait for job to complete. + results = query_job.result() # Waits for job to complete. # [END run_query] # [START print_results] - destination_table = query_job.destination - destination_table.reload() - for row in destination_table.fetch_data(): - print(row) + for row in results: + print("{}: {}".format(row.title, row.unique_words)) # [END print_results] diff --git a/packages/google-cloud-bigquery/samples/snippets/snippets.py b/packages/google-cloud-bigquery/samples/snippets/snippets.py index 401d18bbdfb4..ee75f7fc2697 100644 --- a/packages/google-cloud-bigquery/samples/snippets/snippets.py +++ b/packages/google-cloud-bigquery/samples/snippets/snippets.py @@ -25,11 +25,8 @@ """ import argparse -import itertools -import uuid from google.cloud import bigquery -import google.cloud.bigquery.job def list_projects(): @@ -47,52 +44,45 @@ def list_datasets(project=None): bigquery_client = bigquery.Client(project=project) for dataset in bigquery_client.list_datasets(): - print(dataset.name) + print(dataset.dataset_id) -def create_dataset(dataset_name, project=None): +def create_dataset(dataset_id, project=None): """Craetes a dataset in a given project. If no project is specified, then the currently active project is used. """ bigquery_client = bigquery.Client(project=project) - dataset = bigquery_client.dataset(dataset_name) + dataset_ref = bigquery_client.dataset(dataset_id) - dataset.create() + dataset = bigquery_client.create_dataset(bigquery.Dataset(dataset_ref)) - print('Created dataset {}.'.format(dataset_name)) + print('Created dataset {}.'.format(dataset.dataset_id)) -def list_tables(dataset_name, project=None): +def list_tables(dataset_id, project=None): """Lists all of the tables in a given dataset. If no project is specified, then the currently active project is used. """ bigquery_client = bigquery.Client(project=project) - dataset = bigquery_client.dataset(dataset_name) + dataset_ref = bigquery_client.dataset(dataset_id) - if not dataset.exists(): - print('Dataset {} does not exist.'.format(dataset_name)) - return + for table in bigquery_client.list_dataset_tables(dataset_ref): + print(table.table_id) - for table in dataset.list_tables(): - print(table.name) - -def create_table(dataset_name, table_name, project=None): +def create_table(dataset_id, table_id, project=None): """Creates a simple table in the given dataset. If no project is specified, then the currently active project is used. """ bigquery_client = bigquery.Client(project=project) - dataset = bigquery_client.dataset(dataset_name) - - if not dataset.exists(): - print('Dataset {} does not exist.'.format(dataset_name)) - return + dataset_ref = bigquery_client.dataset(dataset_id) - table = dataset.table(table_name) + table_ref = dataset_ref.table(table_id) + table = bigquery.Table(table_ref) # Set the table schema table.schema = ( @@ -101,12 +91,12 @@ def create_table(dataset_name, table_name, project=None): bigquery.SchemaField('Weight', 'FLOAT'), ) - table.create() + table = bigquery_client.create_table(table) - print('Created table {} in dataset {}.'.format(table_name, dataset_name)) + print('Created table {} in dataset {}.'.format(table_id, dataset_id)) -def list_rows(dataset_name, table_name, project=None): +def list_rows(dataset_id, table_id, project=None): """Prints rows in the given table. Will print 25 rows at most for brevity as tables can contain large amounts @@ -115,18 +105,14 @@ def list_rows(dataset_name, table_name, project=None): If no project is specified, then the currently active project is used. """ bigquery_client = bigquery.Client(project=project) - dataset = bigquery_client.dataset(dataset_name) - table = dataset.table(table_name) + dataset_ref = bigquery_client.dataset(dataset_id) + table_ref = dataset_ref.table(table_id) - if not table.exists(): - print('Table {}:{} does not exist.'.format(dataset_name, table_name)) - return - - # Reload the table so that the schema is available. - table.reload() + # Get the table from the API so that the schema is available. + table = bigquery_client.get_table(table_ref) # Load at most 25 results. - rows = itertools.islice(table.fetch_data(), 25) + rows = bigquery_client.list_rows(table, max_results=25) # Use format to create a simple table. format_string = '{!s:<16} ' * len(table.schema) @@ -139,49 +125,50 @@ def list_rows(dataset_name, table_name, project=None): print(format_string.format(*row)) -def copy_table(dataset_name, table_name, new_table_name, project=None): +def copy_table(dataset_id, table_id, new_table_id, project=None): """Copies a table. If no project is specified, then the currently active project is used. """ bigquery_client = bigquery.Client(project=project) - dataset = bigquery_client.dataset(dataset_name) - table = dataset.table(table_name) + dataset_ref = bigquery_client.dataset(dataset_id) + table_ref = dataset_ref.table(table_id) # This sample shows the destination table in the same dataset and project, # however, it's possible to copy across datasets and projects. You can - # also copy muliple source tables into a single destination table by + # also copy multiple source tables into a single destination table by # providing addtional arguments to `copy_table`. - destination_table = dataset.table(new_table_name) + destination_table_ref = dataset_ref.table(new_table_id) # Create a job to copy the table to the destination table. - job_id = str(uuid.uuid4()) - job = bigquery_client.copy_table( - job_id, destination_table, table) + # Start by creating a job configuration + job_config = bigquery.CopyJobConfig() + + # Configure the job to create the table if it doesn't exist. + job_config.create_disposition = ( + bigquery.job.CreateDisposition.CREATE_IF_NEEDED) - # Create the table if it doesn't exist. - job.create_disposition = ( - google.cloud.bigquery.job.CreateDisposition.CREATE_IF_NEEDED) + copy_job = bigquery_client.copy_table( + table_ref, destination_table_ref, job_config=job_config) - job.begin() # Start the job. print('Waiting for job to finish...') - job.result() + copy_job.result() - print('Table {} copied to {}.'.format(table_name, new_table_name)) + print('Table {} copied to {}.'.format(table_id, new_table_id)) -def delete_table(dataset_name, table_name, project=None): +def delete_table(dataset_id, table_id, project=None): """Deletes a table in a given dataset. If no project is specified, then the currently active project is used. """ bigquery_client = bigquery.Client(project=project) - dataset = bigquery_client.dataset(dataset_name) - table = dataset.table(table_name) + dataset_ref = bigquery_client.dataset(dataset_id) + table_ref = dataset_ref.table(table_id) - table.delete() + bigquery_client.delete_table(table_ref) - print('Table {}:{} deleted.'.format(dataset_name, table_name)) + print('Table {}:{} deleted.'.format(dataset_id, table_id)) if __name__ == '__main__': @@ -200,32 +187,32 @@ def delete_table(dataset_name, table_name, project=None): create_dataset_parser = subparsers.add_parser( 'list-datasets', help=list_datasets.__doc__) - create_dataset_parser.add_argument('dataset_name') + create_dataset_parser.add_argument('dataset_id') list_tables_parser = subparsers.add_parser( 'list-tables', help=list_tables.__doc__) - list_tables_parser.add_argument('dataset_name') + list_tables_parser.add_argument('dataset_id') create_table_parser = subparsers.add_parser( 'create-table', help=create_table.__doc__) - create_table_parser.add_argument('dataset_name') - create_table_parser.add_argument('table_name') + create_table_parser.add_argument('dataset_id') + create_table_parser.add_argument('table_id') list_rows_parser = subparsers.add_parser( 'list-rows', help=list_rows.__doc__) - list_rows_parser.add_argument('dataset_name') - list_rows_parser.add_argument('table_name') + list_rows_parser.add_argument('dataset_id') + list_rows_parser.add_argument('table_id') copy_table_parser = subparsers.add_parser( 'copy-table', help=copy_table.__doc__) - copy_table_parser.add_argument('dataset_name') - copy_table_parser.add_argument('table_name') - copy_table_parser.add_argument('new_table_name') + copy_table_parser.add_argument('dataset_id') + copy_table_parser.add_argument('table_id') + copy_table_parser.add_argument('new_table_id') delete_table_parser = subparsers.add_parser( 'delete-table', help=delete_table.__doc__) - delete_table_parser.add_argument('dataset_name') - delete_table_parser.add_argument('table_name') + delete_table_parser.add_argument('dataset_id') + delete_table_parser.add_argument('table_id') args = parser.parse_args() @@ -234,14 +221,14 @@ def delete_table(dataset_name, table_name, project=None): elif args.command == 'list-datasets': list_datasets(args.project) elif args.command == 'create-dataset': - create_dataset(args.dataset_name, args.project) + create_dataset(args.dataset_id, args.project) elif args.command == 'list-tables': - list_tables(args.dataset_name, args.project) + list_tables(args.dataset_id, args.project) elif args.command == 'create-table': - create_table(args.dataset_name, args.table_name, args.project) + create_table(args.dataset_id, args.table_id, args.project) elif args.command == 'list-rows': - list_rows(args.dataset_name, args.table_name, args.project) + list_rows(args.dataset_id, args.table_id, args.project) elif args.command == 'copy-table': - copy_table(args.dataset_name, args.table_name, args.new_table_name) + copy_table(args.dataset_id, args.table_id, args.new_table_id) elif args.command == 'delete-table': - delete_table(args.dataset_name, args.table_name, args.project) + delete_table(args.dataset_id, args.table_id, args.project) diff --git a/packages/google-cloud-bigquery/samples/snippets/snippets_test.py b/packages/google-cloud-bigquery/samples/snippets/snippets_test.py index af368d9a4a2a..5f666ccc6c85 100644 --- a/packages/google-cloud-bigquery/samples/snippets/snippets_test.py +++ b/packages/google-cloud-bigquery/samples/snippets/snippets_test.py @@ -13,6 +13,7 @@ # limitations under the License. from google.cloud import bigquery +from google.cloud.exceptions import NotFound import pytest import snippets @@ -38,17 +39,25 @@ def test_list_datasets(capsys): @pytest.fixture def cleanup_dataset(): - dataset_name = 'test_temporary_dataset' + dataset_id = 'test_temporary_dataset' bigquery_client = bigquery.Client() - dataset = bigquery_client.dataset(dataset_name) + dataset_ref = bigquery_client.dataset(dataset_id) - if dataset.exists(): - dataset.delete() + if dataset_exists(dataset_ref, bigquery_client): + bigquery_client.delete_dataset(dataset_ref) - yield dataset_name + yield dataset_id - if dataset.exists(): - dataset.delete() + if dataset_exists(dataset_ref, bigquery_client): + bigquery_client.delete_dataset(dataset_ref) + + +def dataset_exists(dataset, client): + try: + client.get_dataset(dataset) + return True + except NotFound: + return False def test_create_dataset(capsys, cleanup_dataset): @@ -87,46 +96,57 @@ def temporary_table(): """Fixture that returns a factory for tables that do not yet exist and will be automatically deleted after the test.""" bigquery_client = bigquery.Client() - dataset = bigquery_client.dataset(DATASET_ID) + dataset_ref = bigquery_client.dataset(DATASET_ID) tables = [] - def factory(table_name): - new_table = dataset.table(table_name) - if new_table.exists(): - new_table.delete() - tables.append(new_table) - return new_table + def factory(table_id): + new_table_ref = dataset_ref.table(table_id) + if table_exists(new_table_ref, bigquery_client): + bigquery_client.delete_table(new_table_ref) + tables.append(new_table_ref) + return new_table_ref yield factory for table in tables: - if table.exists(): - table.delete() + if table_exists(table, bigquery_client): + bigquery_client.delete_table(table) + + +def table_exists(table, client): + try: + client.get_table(table) + return True + except NotFound: + return False def test_create_table(temporary_table): + bigquery_client = bigquery.Client() new_table = temporary_table('test_create_table') - snippets.create_table(DATASET_ID, new_table.name) - assert new_table.exists() + snippets.create_table(DATASET_ID, new_table.table_id) + assert table_exists(new_table, bigquery_client) @pytest.mark.slow def test_copy_table(temporary_table): + bigquery_client = bigquery.Client() new_table = temporary_table('test_copy_table') - snippets.copy_table(DATASET_ID, TABLE_ID, new_table.name) - assert new_table.exists() + snippets.copy_table(DATASET_ID, TABLE_ID, new_table.table_id) + assert table_exists(new_table, bigquery_client) def test_delete_table(): # Create a table to delete bigquery_client = bigquery.Client() - dataset = bigquery_client.dataset(DATASET_ID) - table = dataset.table('test_delete_table') + dataset_ref = bigquery_client.dataset(DATASET_ID) + table_ref = dataset_ref.table('test_delete_table') + table = bigquery.Table(table_ref) - if not table.exists(): + if not table_exists(table, bigquery_client): table.schema = [bigquery.SchemaField('id', 'INTEGER')] - table.create() + table = bigquery_client.create_table(table) - snippets.delete_table(DATASET_ID, table.name) + snippets.delete_table(DATASET_ID, table.table_id) - assert not table.exists() + assert not table_exists(table, bigquery_client) diff --git a/packages/google-cloud-bigquery/samples/snippets/stream_data.py b/packages/google-cloud-bigquery/samples/snippets/stream_data.py index 7d9970c3aa34..c54960048206 100644 --- a/packages/google-cloud-bigquery/samples/snippets/stream_data.py +++ b/packages/google-cloud-bigquery/samples/snippets/stream_data.py @@ -19,7 +19,7 @@ For more information, see the README.rst. Example invocation: - $ python stream_data.py example_dataset example_table \ + $ python stream_data.py example_dataset example_table \\ '["Gandalf", 2000]' The dataset and table should already exist. @@ -32,20 +32,20 @@ from google.cloud import bigquery -def stream_data(dataset_name, table_name, json_data): +def stream_data(dataset_id, table_id, json_data): bigquery_client = bigquery.Client() - dataset = bigquery_client.dataset(dataset_name) - table = dataset.table(table_name) + dataset_ref = bigquery_client.dataset(dataset_id) + table_ref = dataset_ref.table(table_id) data = json.loads(json_data) - # Reload the table to get the schema. - table.reload() + # Get the table from the API so that the schema is available. + table = bigquery_client.get_table(table_ref) rows = [data] - errors = table.insert_data(rows) + errors = bigquery_client.create_rows(table, rows) if not errors: - print('Loaded 1 row into {}:{}'.format(dataset_name, table_name)) + print('Loaded 1 row into {}:{}'.format(dataset_id, table_id)) else: print('Errors:') pprint(errors) @@ -55,8 +55,8 @@ def stream_data(dataset_name, table_name, json_data): parser = argparse.ArgumentParser( description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter) - parser.add_argument('dataset_name') - parser.add_argument('table_name') + parser.add_argument('dataset_id') + parser.add_argument('table_id') parser.add_argument( 'json_data', help='The row to load into BigQuery as an array in JSON format.') @@ -64,6 +64,6 @@ def stream_data(dataset_name, table_name, json_data): args = parser.parse_args() stream_data( - args.dataset_name, - args.table_name, + args.dataset_id, + args.table_id, args.json_data) diff --git a/packages/google-cloud-bigquery/samples/snippets/user_credentials.py b/packages/google-cloud-bigquery/samples/snippets/user_credentials.py index ca585c0a14e4..a170b66291e9 100644 --- a/packages/google-cloud-bigquery/samples/snippets/user_credentials.py +++ b/packages/google-cloud-bigquery/samples/snippets/user_credentials.py @@ -21,7 +21,6 @@ """ import argparse -import uuid from google.cloud import bigquery from google_auth_oauthlib import flow @@ -29,15 +28,10 @@ def run_query(credentials, project, query): client = bigquery.Client(project=project, credentials=credentials) - query_job = client.run_async_query(str(uuid.uuid4()), query) - - query_job.begin() - query_job.result() # Wait for the job to complete. + query_job = client.query(query) # Print the results. - destination_table = query_job.destination - destination_table.reload() - for row in destination_table.fetch_data(): + for row in query_job.result(): # Wait for the job to complete. print(row) From d8c0212507669c12b95a9f1e9f03420db7229475 Mon Sep 17 00:00:00 2001 From: DPE bot Date: Wed, 1 Nov 2017 12:30:10 -0700 Subject: [PATCH 0847/2016] Auto-update dependencies. [(#1186)](https://github.com/GoogleCloudPlatform/python-docs-samples/issues/1186) --- .../google-cloud-bigquery/samples/snippets/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/samples/snippets/requirements.txt b/packages/google-cloud-bigquery/samples/snippets/requirements.txt index 0401c4221481..d73dc929dc14 100644 --- a/packages/google-cloud-bigquery/samples/snippets/requirements.txt +++ b/packages/google-cloud-bigquery/samples/snippets/requirements.txt @@ -1,3 +1,3 @@ google-cloud-bigquery==0.28.0 google-auth-oauthlib==0.1.1 -pytz==2017.2 +pytz==2017.3 From 0362a92314f33b811d5858e819bf2f6690ac5d7c Mon Sep 17 00:00:00 2001 From: DPE bot Date: Thu, 16 Nov 2017 09:28:12 -0800 Subject: [PATCH 0848/2016] Auto-update dependencies. [(#1227)](https://github.com/GoogleCloudPlatform/python-docs-samples/issues/1227) --- .../google-cloud-bigquery/samples/snippets/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/samples/snippets/requirements.txt b/packages/google-cloud-bigquery/samples/snippets/requirements.txt index d73dc929dc14..c2b4ac2d5adc 100644 --- a/packages/google-cloud-bigquery/samples/snippets/requirements.txt +++ b/packages/google-cloud-bigquery/samples/snippets/requirements.txt @@ -1,3 +1,3 @@ google-cloud-bigquery==0.28.0 -google-auth-oauthlib==0.1.1 +google-auth-oauthlib==0.2.0 pytz==2017.3 From 1dbbc500423fb0056ebe06d8bcf9fbaaef528063 Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Fri, 1 Dec 2017 09:35:09 -0800 Subject: [PATCH 0849/2016] /s/buckets/datasets in BigQuery auth sample [(#1242)](https://github.com/GoogleCloudPlatform/python-docs-samples/issues/1242) --- .../google-cloud-bigquery/samples/snippets/auth_snippets.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/packages/google-cloud-bigquery/samples/snippets/auth_snippets.py b/packages/google-cloud-bigquery/samples/snippets/auth_snippets.py index 9a0c490d9c36..670c6a1ec443 100644 --- a/packages/google-cloud-bigquery/samples/snippets/auth_snippets.py +++ b/packages/google-cloud-bigquery/samples/snippets/auth_snippets.py @@ -41,8 +41,8 @@ def explicit(): 'service_account.json') # Make an authenticated API request - buckets = list(bigquery_client.list_datasets()) - print(buckets) + datasets = list(bigquery_client.list_datasets()) + print(datasets) if __name__ == '__main__': From 4d4c0246e38b1a40eefc3351bfd6d2af9f166f0d Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Thu, 7 Dec 2017 09:46:43 -0800 Subject: [PATCH 0850/2016] Fix broken link to core Client service account helper. [(#1256)](https://github.com/GoogleCloudPlatform/python-docs-samples/issues/1256) --- .../google-cloud-bigquery/samples/snippets/auth_snippets.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/packages/google-cloud-bigquery/samples/snippets/auth_snippets.py b/packages/google-cloud-bigquery/samples/snippets/auth_snippets.py index 670c6a1ec443..1dc6fddd204b 100644 --- a/packages/google-cloud-bigquery/samples/snippets/auth_snippets.py +++ b/packages/google-cloud-bigquery/samples/snippets/auth_snippets.py @@ -35,8 +35,7 @@ def explicit(): # Explicitly use service account credentials by specifying the private key # file. All clients in google-cloud-python have this helper, see - # https://google-cloud-python.readthedocs.io/en/latest/core/modules.html - # #google.cloud.client.Client.from_service_account_json + # https://googlecloudplatform.github.io/google-cloud-python/latest/core/auth.html#service-accounts bigquery_client = bigquery.Client.from_service_account_json( 'service_account.json') From 1fd638901ed3f9f34683524a590d745c5d8a4a22 Mon Sep 17 00:00:00 2001 From: michaelawyu Date: Thu, 7 Dec 2017 10:34:29 -0800 Subject: [PATCH 0851/2016] Added "Open in Cloud Shell" buttons to README files [(#1254)](https://github.com/GoogleCloudPlatform/python-docs-samples/issues/1254) --- .../samples/snippets/README.rst | 168 ++++++++++-------- .../samples/snippets/README.rst.in | 2 + 2 files changed, 100 insertions(+), 70 deletions(-) diff --git a/packages/google-cloud-bigquery/samples/snippets/README.rst b/packages/google-cloud-bigquery/samples/snippets/README.rst index 1a3b889a302d..b0e5810a32ff 100644 --- a/packages/google-cloud-bigquery/samples/snippets/README.rst +++ b/packages/google-cloud-bigquery/samples/snippets/README.rst @@ -3,6 +3,10 @@ Google BigQuery Python Samples =============================================================================== +.. image:: https://gstatic.com/cloudssh/images/open-btn.png + :target: https://console.cloud.google.com/cloudshell/open?git_repo=https://github.com/GoogleCloudPlatform/python-docs-samples&page=editor&open_in_editor=bigquery/cloud-client/README.rst + + This directory contains samples for Google BigQuery. `Google BigQuery`_ is Google's fully managed, petabyte scale, low cost analytics data warehouse. BigQuery is NoOps—there is no infrastructure to manage and you don't need a database administrator—so you can focus on analyzing data to find meaningful insights, use familiar SQL, and take advantage of our pay-as-you-go model. @@ -17,39 +21,20 @@ Setup Authentication ++++++++++++++ -Authentication is typically done through `Application Default Credentials`_, -which means you do not have to change the code to authenticate as long as -your environment has credentials. You have a few options for setting up -authentication: - -#. When running locally, use the `Google Cloud SDK`_ - - .. code-block:: bash - - gcloud auth application-default login - - -#. When running on App Engine or Compute Engine, credentials are already - set-up. However, you may need to configure your Compute Engine instance - with `additional scopes`_. - -#. You can create a `Service Account key file`_. This file can be used to - authenticate to Google Cloud Platform services from any environment. To use - the file, set the ``GOOGLE_APPLICATION_CREDENTIALS`` environment variable to - the path to the key file, for example: - - .. code-block:: bash - - export GOOGLE_APPLICATION_CREDENTIALS=/path/to/service_account.json +This sample requires you to have authentication setup. Refer to the +`Authentication Getting Started Guide`_ for instructions on setting up +credentials for applications. -.. _Application Default Credentials: https://cloud.google.com/docs/authentication#getting_credentials_for_server-centric_flow -.. _additional scopes: https://cloud.google.com/compute/docs/authentication#using -.. _Service Account key file: https://developers.google.com/identity/protocols/OAuth2ServiceAccount#creatinganaccount +.. _Authentication Getting Started Guide: + https://cloud.google.com/docs/authentication/getting-started Install Dependencies ++++++++++++++++++++ -#. Install `pip`_ and `virtualenv`_ if you do not already have them. +#. Install `pip`_ and `virtualenv`_ if you do not already have them. You may want to refer to the `Python Development Environment Setup Guide`_ for Google Cloud Platform for instructions. + + .. _Python Development Environment Setup Guide: + https://cloud.google.com/python/setup #. Create a virtualenv. Samples are compatible with Python 2.7 and 3.4+. @@ -73,6 +58,10 @@ Samples Simple Application +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +.. image:: https://gstatic.com/cloudssh/images/open-btn.png + :target: https://console.cloud.google.com/cloudshell/open?git_repo=https://github.com/GoogleCloudPlatform/python-docs-samples&page=editor&open_in_editor=bigquery/cloud-client/simple_app.py;bigquery/cloud-client/README.rst + + To run this sample: @@ -85,6 +74,10 @@ To run this sample: Quickstart +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +.. image:: https://gstatic.com/cloudssh/images/open-btn.png + :target: https://console.cloud.google.com/cloudshell/open?git_repo=https://github.com/GoogleCloudPlatform/python-docs-samples&page=editor&open_in_editor=bigquery/cloud-client/quickstart.py;bigquery/cloud-client/README.rst + + To run this sample: @@ -97,6 +90,10 @@ To run this sample: Query +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +.. image:: https://gstatic.com/cloudssh/images/open-btn.png + :target: https://console.cloud.google.com/cloudshell/open?git_repo=https://github.com/GoogleCloudPlatform/python-docs-samples&page=editor&open_in_editor=bigquery/cloud-client/query.py;bigquery/cloud-client/README.rst + + To run this sample: @@ -106,23 +103,23 @@ To run this sample: $ python query.py usage: query.py [-h] [--use_standard_sql] - [--destination_table DESTINATION_TABLE] - query - + [--destination_table DESTINATION_TABLE] + query + Command-line application to perform queries in BigQuery. - + For more information, see the README.rst. - + Example invocation: $ python query.py '#standardSQL SELECT corpus FROM `bigquery-public-data.samples.shakespeare` GROUP BY corpus ORDER BY corpus' - + positional arguments: query BigQuery SQL Query. - + optional arguments: -h, --help show this help message and exit --use_standard_sql Use standard SQL syntax. @@ -131,9 +128,14 @@ To run this sample: my_dataset.my_table + Parameterized Query +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +.. image:: https://gstatic.com/cloudssh/images/open-btn.png + :target: https://console.cloud.google.com/cloudshell/open?git_repo=https://github.com/GoogleCloudPlatform/python-docs-samples&page=editor&open_in_editor=bigquery/cloud-client/query_params.py;bigquery/cloud-client/README.rst + + To run this sample: @@ -143,15 +145,15 @@ To run this sample: $ python query_params.py usage: query_params.py [-h] {named,positional,array,timestamp,struct} ... - + Command-line app to perform queries with parameters in BigQuery. - + For more information, see the README.rst. - + Example invocation: $ python query_params.py named 'romeoandjuliet' 100 $ python query_params.py positional 'romeoandjuliet' 100 - + positional arguments: {named,positional,array,timestamp,struct} samples @@ -160,14 +162,19 @@ To run this sample: array Run a query with an array parameter. timestamp Run a query with a timestamp parameter. struct Run a query with a struct parameter. - + optional arguments: -h, --help show this help message and exit + Snippets +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +.. image:: https://gstatic.com/cloudssh/images/open-btn.png + :target: https://console.cloud.google.com/cloudshell/open?git_repo=https://github.com/GoogleCloudPlatform/python-docs-samples&page=editor&open_in_editor=bigquery/cloud-client/snippets.py;bigquery/cloud-client/README.rst + + To run this sample: @@ -179,16 +186,16 @@ To run this sample: usage: snippets.py [-h] [--project PROJECT] {list-projects,list-datasets,list-tables,create-table,list-rows,copy-table,delete-table} ... - + Samples that demonstrate basic operations in the BigQuery API. - + For more information, see the README.rst. - + Example invocation: $ python snippets.py list-datasets - + The dataset and table should already exist. - + positional arguments: {list-projects,list-datasets,list-tables,create-table,list-rows,copy-table,delete-table} list-projects @@ -212,15 +219,20 @@ To run this sample: currently active project is used. delete-table Deletes a table in a given dataset. If no project is specified, then the currently active project is used. - + optional arguments: -h, --help show this help message and exit --project PROJECT + Load data from a file +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +.. image:: https://gstatic.com/cloudssh/images/open-btn.png + :target: https://console.cloud.google.com/cloudshell/open?git_repo=https://github.com/GoogleCloudPlatform/python-docs-samples&page=editor&open_in_editor=bigquery/cloud-client/load_data_from_file.py;bigquery/cloud-client/README.rst + + To run this sample: @@ -230,29 +242,34 @@ To run this sample: $ python load_data_from_file.py usage: load_data_from_file.py [-h] dataset_id table_id source_file_name - + Loads data into BigQuery from a local file. - + For more information, see the README.rst. - + Example invocation: $ python load_data_from_file.py example_dataset example_table \ example-data.csv - + The dataset and table should already exist. - + positional arguments: dataset_id table_id source_file_name Path to a .csv file to upload. - + optional arguments: -h, --help show this help message and exit + Load data from Cloud Storage +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +.. image:: https://gstatic.com/cloudssh/images/open-btn.png + :target: https://console.cloud.google.com/cloudshell/open?git_repo=https://github.com/GoogleCloudPlatform/python-docs-samples&page=editor&open_in_editor=bigquery/cloud-client/load_data_from_gcs.py;bigquery/cloud-client/README.rst + + To run this sample: @@ -262,30 +279,35 @@ To run this sample: $ python load_data_from_gcs.py usage: load_data_from_gcs.py [-h] dataset_id table_id source - + Loads data into BigQuery from an object in Google Cloud Storage. - + For more information, see the README.rst. - + Example invocation: $ python load_data_from_gcs.py example_dataset example_table \ gs://example-bucket/example-data.csv - + The dataset and table should already exist. - + positional arguments: dataset_id table_id source The Google Cloud Storage object to load. Must be in the format gs://bucket_name/object_name - + optional arguments: -h, --help show this help message and exit + Load streaming data +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +.. image:: https://gstatic.com/cloudssh/images/open-btn.png + :target: https://console.cloud.google.com/cloudshell/open?git_repo=https://github.com/GoogleCloudPlatform/python-docs-samples&page=editor&open_in_editor=bigquery/cloud-client/stream_data.py;bigquery/cloud-client/README.rst + + To run this sample: @@ -295,29 +317,34 @@ To run this sample: $ python stream_data.py usage: stream_data.py [-h] dataset_id table_id json_data - + Loads a single row of data directly into BigQuery. - + For more information, see the README.rst. - + Example invocation: $ python stream_data.py example_dataset example_table \ '["Gandalf", 2000]' - + The dataset and table should already exist. - + positional arguments: dataset_id table_id json_data The row to load into BigQuery as an array in JSON format. - + optional arguments: -h, --help show this help message and exit + Export data to Cloud Storage +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +.. image:: https://gstatic.com/cloudssh/images/open-btn.png + :target: https://console.cloud.google.com/cloudshell/open?git_repo=https://github.com/GoogleCloudPlatform/python-docs-samples&page=editor&open_in_editor=bigquery/cloud-client/export_data_to_gcs.py;bigquery/cloud-client/README.rst + + To run this sample: @@ -327,29 +354,30 @@ To run this sample: $ python export_data_to_gcs.py usage: export_data_to_gcs.py [-h] dataset_id table_id destination - + Exports data from BigQuery to an object in Google Cloud Storage. - + For more information, see the README.rst. - + Example invocation: $ python export_data_to_gcs.py example_dataset example_table \ gs://example-bucket/example-data.csv - + The dataset and table should already exist. - + positional arguments: dataset_id table_id destination The destination Google Cloud Storage object. Must be in the format gs://bucket_name/object_name - + optional arguments: -h, --help show this help message and exit + The client library ------------------------------------------------------------------------------- diff --git a/packages/google-cloud-bigquery/samples/snippets/README.rst.in b/packages/google-cloud-bigquery/samples/snippets/README.rst.in index 61c66ab43c1d..55572c0709ee 100644 --- a/packages/google-cloud-bigquery/samples/snippets/README.rst.in +++ b/packages/google-cloud-bigquery/samples/snippets/README.rst.in @@ -43,3 +43,5 @@ samples: show_help: true cloud_client_library: true + +folder: bigquery/cloud-client \ No newline at end of file From de8f449e67f75a3e0fa50bbb7da7a33601868c42 Mon Sep 17 00:00:00 2001 From: Jon Wayne Parrott Date: Mon, 11 Dec 2017 10:43:12 -0800 Subject: [PATCH 0852/2016] Move imports into bigquery samples Change-Id: I0be8d3e0778352a8b814258f83d13d131cb5054e --- .../samples/snippets/user_credentials.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/packages/google-cloud-bigquery/samples/snippets/user_credentials.py b/packages/google-cloud-bigquery/samples/snippets/user_credentials.py index a170b66291e9..9ab062fb3fe7 100644 --- a/packages/google-cloud-bigquery/samples/snippets/user_credentials.py +++ b/packages/google-cloud-bigquery/samples/snippets/user_credentials.py @@ -22,11 +22,10 @@ import argparse -from google.cloud import bigquery -from google_auth_oauthlib import flow - def run_query(credentials, project, query): + from google.cloud import bigquery + client = bigquery.Client(project=project, credentials=credentials) query_job = client.query(query) @@ -36,6 +35,8 @@ def run_query(credentials, project, query): def authenticate_and_query(project, query, launch_browser=True): + from google_auth_oauthlib import flow + appflow = flow.InstalledAppFlow.from_client_secrets_file( 'client_secrets.json', scopes=['https://www.googleapis.com/auth/bigquery']) From ac229437e5201c14689eda3e0f8dd05d891a6d36 Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Fri, 1 Dec 2017 17:07:38 -0800 Subject: [PATCH 0853/2016] BigQuery: rewrite simple app tutorial. - Add region tags for needed dependencies. - Use more relevant query from public datasets. --- .../samples/snippets/requirements.txt | 2 ++ .../samples/snippets/simple_app.py | 23 +++++++++++-------- .../samples/snippets/simple_app_test.py | 6 ++--- 3 files changed, 19 insertions(+), 12 deletions(-) diff --git a/packages/google-cloud-bigquery/samples/snippets/requirements.txt b/packages/google-cloud-bigquery/samples/snippets/requirements.txt index c2b4ac2d5adc..618a85dfd39e 100644 --- a/packages/google-cloud-bigquery/samples/snippets/requirements.txt +++ b/packages/google-cloud-bigquery/samples/snippets/requirements.txt @@ -1,3 +1,5 @@ +# [START bigquery_simple_app_pkgs] google-cloud-bigquery==0.28.0 +# [END bigquery_simple_app_pkgs] google-auth-oauthlib==0.2.0 pytz==2017.3 diff --git a/packages/google-cloud-bigquery/samples/snippets/simple_app.py b/packages/google-cloud-bigquery/samples/snippets/simple_app.py index 5d0d04e666d9..9f266ed7aac4 100644 --- a/packages/google-cloud-bigquery/samples/snippets/simple_app.py +++ b/packages/google-cloud-bigquery/samples/snippets/simple_app.py @@ -16,20 +16,25 @@ """Simple application that performs a query with BigQuery.""" # [START all] -# [START create_client] +# [START bigquery_simple_app_deps] from google.cloud import bigquery +# [END bigquery_simple_app_deps] -def query_shakespeare(): +def query_stackoverflow(): + # [START create_client] client = bigquery.Client() # [END create_client] # [START run_query] query_job = client.query(""" - #standardSQL - SELECT corpus AS title, COUNT(*) AS unique_words - FROM `bigquery-public-data.samples.shakespeare` - GROUP BY title - ORDER BY unique_words DESC + SELECT + CONCAT( + 'https://stackoverflow.com/questions/', + CAST(id as STRING)) as url, + view_count + FROM `bigquery-public-data.stackoverflow.posts_questions` + WHERE tags like '%google-bigquery%' + ORDER BY view_count DESC LIMIT 10""") results = query_job.result() # Waits for job to complete. @@ -37,10 +42,10 @@ def query_shakespeare(): # [START print_results] for row in results: - print("{}: {}".format(row.title, row.unique_words)) + print("{} : {} views".format(row.url, row.view_count)) # [END print_results] if __name__ == '__main__': - query_shakespeare() + query_stackoverflow() # [END all] diff --git a/packages/google-cloud-bigquery/samples/snippets/simple_app_test.py b/packages/google-cloud-bigquery/samples/snippets/simple_app_test.py index 3733bf6ef3fe..33f9f1adf69a 100644 --- a/packages/google-cloud-bigquery/samples/snippets/simple_app_test.py +++ b/packages/google-cloud-bigquery/samples/snippets/simple_app_test.py @@ -15,7 +15,7 @@ import simple_app -def test_query_shakespeare(capsys): - simple_app.query_shakespeare() +def test_query_stackoverflow(capsys): + simple_app.query_stackoverflow() out, _ = capsys.readouterr() - assert 'hamlet' in out + assert 'views' in out From 3918cc512cbaaaacd9557eedc0e71fdc824be02c Mon Sep 17 00:00:00 2001 From: DPE bot Date: Tue, 12 Dec 2017 09:26:42 -0800 Subject: [PATCH 0854/2016] Auto-update dependencies. [(#1272)](https://github.com/GoogleCloudPlatform/python-docs-samples/issues/1272) * Auto-update dependencies. * Update requirements.txt --- .../google-cloud-bigquery/samples/snippets/requirements.txt | 2 -- 1 file changed, 2 deletions(-) diff --git a/packages/google-cloud-bigquery/samples/snippets/requirements.txt b/packages/google-cloud-bigquery/samples/snippets/requirements.txt index 618a85dfd39e..c2b4ac2d5adc 100644 --- a/packages/google-cloud-bigquery/samples/snippets/requirements.txt +++ b/packages/google-cloud-bigquery/samples/snippets/requirements.txt @@ -1,5 +1,3 @@ -# [START bigquery_simple_app_pkgs] google-cloud-bigquery==0.28.0 -# [END bigquery_simple_app_pkgs] google-auth-oauthlib==0.2.0 pytz==2017.3 From ccd3016b7ac7fbc08f3ed4740151188fc0919c2a Mon Sep 17 00:00:00 2001 From: DPE bot Date: Tue, 9 Jan 2018 09:06:58 -0800 Subject: [PATCH 0855/2016] Auto-update dependencies. [(#1307)](https://github.com/GoogleCloudPlatform/python-docs-samples/issues/1307) --- .../google-cloud-bigquery/samples/snippets/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/samples/snippets/requirements.txt b/packages/google-cloud-bigquery/samples/snippets/requirements.txt index c2b4ac2d5adc..659797618770 100644 --- a/packages/google-cloud-bigquery/samples/snippets/requirements.txt +++ b/packages/google-cloud-bigquery/samples/snippets/requirements.txt @@ -1,3 +1,3 @@ -google-cloud-bigquery==0.28.0 +google-cloud-bigquery==0.29.0 google-auth-oauthlib==0.2.0 pytz==2017.3 From e24830d23f4255c4625f1a021baf079edf83e07d Mon Sep 17 00:00:00 2001 From: DPE bot Date: Fri, 9 Feb 2018 10:46:48 -0800 Subject: [PATCH 0856/2016] Auto-update dependencies. [(#1355)](https://github.com/GoogleCloudPlatform/python-docs-samples/issues/1355) --- .../google-cloud-bigquery/samples/snippets/requirements.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/packages/google-cloud-bigquery/samples/snippets/requirements.txt b/packages/google-cloud-bigquery/samples/snippets/requirements.txt index 659797618770..8ec538738ec4 100644 --- a/packages/google-cloud-bigquery/samples/snippets/requirements.txt +++ b/packages/google-cloud-bigquery/samples/snippets/requirements.txt @@ -1,3 +1,3 @@ -google-cloud-bigquery==0.29.0 +google-cloud-bigquery==0.30.0 google-auth-oauthlib==0.2.0 -pytz==2017.3 +pytz==2018.3 From 0123157c7524846de7350e05953f456263747a7f Mon Sep 17 00:00:00 2001 From: DPE bot Date: Mon, 5 Mar 2018 12:28:55 -0800 Subject: [PATCH 0857/2016] Auto-update dependencies. [(#1377)](https://github.com/GoogleCloudPlatform/python-docs-samples/issues/1377) * Auto-update dependencies. * Update requirements.txt --- .../google-cloud-bigquery/samples/snippets/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/samples/snippets/requirements.txt b/packages/google-cloud-bigquery/samples/snippets/requirements.txt index 8ec538738ec4..62490b384052 100644 --- a/packages/google-cloud-bigquery/samples/snippets/requirements.txt +++ b/packages/google-cloud-bigquery/samples/snippets/requirements.txt @@ -1,3 +1,3 @@ -google-cloud-bigquery==0.30.0 +google-cloud-bigquery==0.31.0 google-auth-oauthlib==0.2.0 pytz==2018.3 From 1e7f396e349921dd641ee68e76f39298e851c11d Mon Sep 17 00:00:00 2001 From: chenyumic Date: Fri, 6 Apr 2018 22:57:36 -0700 Subject: [PATCH 0858/2016] Regenerate the README files and fix the Open in Cloud Shell link for some samples [(#1441)](https://github.com/GoogleCloudPlatform/python-docs-samples/issues/1441) --- .../samples/snippets/README.rst | 20 +++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/packages/google-cloud-bigquery/samples/snippets/README.rst b/packages/google-cloud-bigquery/samples/snippets/README.rst index b0e5810a32ff..60b51a7cac72 100644 --- a/packages/google-cloud-bigquery/samples/snippets/README.rst +++ b/packages/google-cloud-bigquery/samples/snippets/README.rst @@ -12,7 +12,7 @@ This directory contains samples for Google BigQuery. `Google BigQuery`_ is Googl -.. _Google BigQuery: https://cloud.google.com/bigquery/docs +.. _Google BigQuery: https://cloud.google.com/bigquery/docs Setup ------------------------------------------------------------------------------- @@ -59,7 +59,7 @@ Simple Application +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ .. image:: https://gstatic.com/cloudssh/images/open-btn.png - :target: https://console.cloud.google.com/cloudshell/open?git_repo=https://github.com/GoogleCloudPlatform/python-docs-samples&page=editor&open_in_editor=bigquery/cloud-client/simple_app.py;bigquery/cloud-client/README.rst + :target: https://console.cloud.google.com/cloudshell/open?git_repo=https://github.com/GoogleCloudPlatform/python-docs-samples&page=editor&open_in_editor=bigquery/cloud-client/simple_app.py,bigquery/cloud-client/README.rst @@ -75,7 +75,7 @@ Quickstart +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ .. image:: https://gstatic.com/cloudssh/images/open-btn.png - :target: https://console.cloud.google.com/cloudshell/open?git_repo=https://github.com/GoogleCloudPlatform/python-docs-samples&page=editor&open_in_editor=bigquery/cloud-client/quickstart.py;bigquery/cloud-client/README.rst + :target: https://console.cloud.google.com/cloudshell/open?git_repo=https://github.com/GoogleCloudPlatform/python-docs-samples&page=editor&open_in_editor=bigquery/cloud-client/quickstart.py,bigquery/cloud-client/README.rst @@ -91,7 +91,7 @@ Query +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ .. image:: https://gstatic.com/cloudssh/images/open-btn.png - :target: https://console.cloud.google.com/cloudshell/open?git_repo=https://github.com/GoogleCloudPlatform/python-docs-samples&page=editor&open_in_editor=bigquery/cloud-client/query.py;bigquery/cloud-client/README.rst + :target: https://console.cloud.google.com/cloudshell/open?git_repo=https://github.com/GoogleCloudPlatform/python-docs-samples&page=editor&open_in_editor=bigquery/cloud-client/query.py,bigquery/cloud-client/README.rst @@ -133,7 +133,7 @@ Parameterized Query +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ .. image:: https://gstatic.com/cloudssh/images/open-btn.png - :target: https://console.cloud.google.com/cloudshell/open?git_repo=https://github.com/GoogleCloudPlatform/python-docs-samples&page=editor&open_in_editor=bigquery/cloud-client/query_params.py;bigquery/cloud-client/README.rst + :target: https://console.cloud.google.com/cloudshell/open?git_repo=https://github.com/GoogleCloudPlatform/python-docs-samples&page=editor&open_in_editor=bigquery/cloud-client/query_params.py,bigquery/cloud-client/README.rst @@ -172,7 +172,7 @@ Snippets +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ .. image:: https://gstatic.com/cloudssh/images/open-btn.png - :target: https://console.cloud.google.com/cloudshell/open?git_repo=https://github.com/GoogleCloudPlatform/python-docs-samples&page=editor&open_in_editor=bigquery/cloud-client/snippets.py;bigquery/cloud-client/README.rst + :target: https://console.cloud.google.com/cloudshell/open?git_repo=https://github.com/GoogleCloudPlatform/python-docs-samples&page=editor&open_in_editor=bigquery/cloud-client/snippets.py,bigquery/cloud-client/README.rst @@ -230,7 +230,7 @@ Load data from a file +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ .. image:: https://gstatic.com/cloudssh/images/open-btn.png - :target: https://console.cloud.google.com/cloudshell/open?git_repo=https://github.com/GoogleCloudPlatform/python-docs-samples&page=editor&open_in_editor=bigquery/cloud-client/load_data_from_file.py;bigquery/cloud-client/README.rst + :target: https://console.cloud.google.com/cloudshell/open?git_repo=https://github.com/GoogleCloudPlatform/python-docs-samples&page=editor&open_in_editor=bigquery/cloud-client/load_data_from_file.py,bigquery/cloud-client/README.rst @@ -267,7 +267,7 @@ Load data from Cloud Storage +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ .. image:: https://gstatic.com/cloudssh/images/open-btn.png - :target: https://console.cloud.google.com/cloudshell/open?git_repo=https://github.com/GoogleCloudPlatform/python-docs-samples&page=editor&open_in_editor=bigquery/cloud-client/load_data_from_gcs.py;bigquery/cloud-client/README.rst + :target: https://console.cloud.google.com/cloudshell/open?git_repo=https://github.com/GoogleCloudPlatform/python-docs-samples&page=editor&open_in_editor=bigquery/cloud-client/load_data_from_gcs.py,bigquery/cloud-client/README.rst @@ -305,7 +305,7 @@ Load streaming data +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ .. image:: https://gstatic.com/cloudssh/images/open-btn.png - :target: https://console.cloud.google.com/cloudshell/open?git_repo=https://github.com/GoogleCloudPlatform/python-docs-samples&page=editor&open_in_editor=bigquery/cloud-client/stream_data.py;bigquery/cloud-client/README.rst + :target: https://console.cloud.google.com/cloudshell/open?git_repo=https://github.com/GoogleCloudPlatform/python-docs-samples&page=editor&open_in_editor=bigquery/cloud-client/stream_data.py,bigquery/cloud-client/README.rst @@ -342,7 +342,7 @@ Export data to Cloud Storage +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ .. image:: https://gstatic.com/cloudssh/images/open-btn.png - :target: https://console.cloud.google.com/cloudshell/open?git_repo=https://github.com/GoogleCloudPlatform/python-docs-samples&page=editor&open_in_editor=bigquery/cloud-client/export_data_to_gcs.py;bigquery/cloud-client/README.rst + :target: https://console.cloud.google.com/cloudshell/open?git_repo=https://github.com/GoogleCloudPlatform/python-docs-samples&page=editor&open_in_editor=bigquery/cloud-client/export_data_to_gcs.py,bigquery/cloud-client/README.rst From 483620b35a175e2a8c2a510c110b093e8a82c0cb Mon Sep 17 00:00:00 2001 From: Alix Hamilton Date: Tue, 17 Apr 2018 15:03:48 -0700 Subject: [PATCH 0859/2016] BigQuery: Remove unused samples [(#1455)](https://github.com/GoogleCloudPlatform/python-docs-samples/issues/1455) * updates simple app region tags to standard * removes samples redundant with google-cloud-python * adds user_credentials.py to readme generator * shows command help in user_credentials section --- .../samples/snippets/README.rst | 283 +----------------- .../samples/snippets/README.rst.in | 28 +- .../samples/snippets/auth_snippets.py | 61 ---- .../samples/snippets/auth_snippets_test.py | 33 -- .../samples/snippets/export_data_to_gcs.py | 61 ---- .../snippets/export_data_to_gcs_test.py | 31 -- .../samples/snippets/load_data_from_file.py | 66 ---- .../snippets/load_data_from_file_test.py | 33 -- .../samples/snippets/load_data_from_gcs.py | 61 ---- .../snippets/load_data_from_gcs_test.py | 33 -- .../samples/snippets/query.py | 103 ------- .../samples/snippets/query_params.py | 225 -------------- .../samples/snippets/query_params_test.py | 52 ---- .../samples/snippets/query_test.py | 66 ---- .../samples/snippets/simple_app.py | 16 +- .../samples/snippets/snippets.py | 234 --------------- .../samples/snippets/snippets_test.py | 152 ---------- .../samples/snippets/stream_data.py | 69 ----- .../samples/snippets/stream_data_test.py | 29 -- 19 files changed, 26 insertions(+), 1610 deletions(-) delete mode 100644 packages/google-cloud-bigquery/samples/snippets/auth_snippets.py delete mode 100644 packages/google-cloud-bigquery/samples/snippets/auth_snippets_test.py delete mode 100644 packages/google-cloud-bigquery/samples/snippets/export_data_to_gcs.py delete mode 100644 packages/google-cloud-bigquery/samples/snippets/export_data_to_gcs_test.py delete mode 100644 packages/google-cloud-bigquery/samples/snippets/load_data_from_file.py delete mode 100644 packages/google-cloud-bigquery/samples/snippets/load_data_from_file_test.py delete mode 100644 packages/google-cloud-bigquery/samples/snippets/load_data_from_gcs.py delete mode 100644 packages/google-cloud-bigquery/samples/snippets/load_data_from_gcs_test.py delete mode 100755 packages/google-cloud-bigquery/samples/snippets/query.py delete mode 100644 packages/google-cloud-bigquery/samples/snippets/query_params.py delete mode 100644 packages/google-cloud-bigquery/samples/snippets/query_params_test.py delete mode 100644 packages/google-cloud-bigquery/samples/snippets/query_test.py delete mode 100644 packages/google-cloud-bigquery/samples/snippets/snippets.py delete mode 100644 packages/google-cloud-bigquery/samples/snippets/snippets_test.py delete mode 100644 packages/google-cloud-bigquery/samples/snippets/stream_data.py delete mode 100644 packages/google-cloud-bigquery/samples/snippets/stream_data_test.py diff --git a/packages/google-cloud-bigquery/samples/snippets/README.rst b/packages/google-cloud-bigquery/samples/snippets/README.rst index 60b51a7cac72..cca0ff3e5054 100644 --- a/packages/google-cloud-bigquery/samples/snippets/README.rst +++ b/packages/google-cloud-bigquery/samples/snippets/README.rst @@ -55,22 +55,6 @@ Install Dependencies Samples ------------------------------------------------------------------------------- -Simple Application -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ - -.. image:: https://gstatic.com/cloudssh/images/open-btn.png - :target: https://console.cloud.google.com/cloudshell/open?git_repo=https://github.com/GoogleCloudPlatform/python-docs-samples&page=editor&open_in_editor=bigquery/cloud-client/simple_app.py,bigquery/cloud-client/README.rst - - - - -To run this sample: - -.. code-block:: bash - - $ python simple_app.py - - Quickstart +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ @@ -87,92 +71,11 @@ To run this sample: $ python quickstart.py -Query -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ - -.. image:: https://gstatic.com/cloudssh/images/open-btn.png - :target: https://console.cloud.google.com/cloudshell/open?git_repo=https://github.com/GoogleCloudPlatform/python-docs-samples&page=editor&open_in_editor=bigquery/cloud-client/query.py,bigquery/cloud-client/README.rst - - - - -To run this sample: - -.. code-block:: bash - - $ python query.py - - usage: query.py [-h] [--use_standard_sql] - [--destination_table DESTINATION_TABLE] - query - - Command-line application to perform queries in BigQuery. - - For more information, see the README.rst. - - Example invocation: - $ python query.py '#standardSQL - SELECT corpus - FROM `bigquery-public-data.samples.shakespeare` - GROUP BY corpus - ORDER BY corpus' - - positional arguments: - query BigQuery SQL Query. - - optional arguments: - -h, --help show this help message and exit - --use_standard_sql Use standard SQL syntax. - --destination_table DESTINATION_TABLE - Destination table to use for results. Example: - my_dataset.my_table - - - -Parameterized Query -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ - -.. image:: https://gstatic.com/cloudssh/images/open-btn.png - :target: https://console.cloud.google.com/cloudshell/open?git_repo=https://github.com/GoogleCloudPlatform/python-docs-samples&page=editor&open_in_editor=bigquery/cloud-client/query_params.py,bigquery/cloud-client/README.rst - - - - -To run this sample: - -.. code-block:: bash - - $ python query_params.py - - usage: query_params.py [-h] {named,positional,array,timestamp,struct} ... - - Command-line app to perform queries with parameters in BigQuery. - - For more information, see the README.rst. - - Example invocation: - $ python query_params.py named 'romeoandjuliet' 100 - $ python query_params.py positional 'romeoandjuliet' 100 - - positional arguments: - {named,positional,array,timestamp,struct} - samples - named Run a query with named parameters. - positional Run a query with positional parameters. - array Run a query with an array parameter. - timestamp Run a query with a timestamp parameter. - struct Run a query with a struct parameter. - - optional arguments: - -h, --help show this help message and exit - - - -Snippets +Simple Application +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ .. image:: https://gstatic.com/cloudssh/images/open-btn.png - :target: https://console.cloud.google.com/cloudshell/open?git_repo=https://github.com/GoogleCloudPlatform/python-docs-samples&page=editor&open_in_editor=bigquery/cloud-client/snippets.py,bigquery/cloud-client/README.rst + :target: https://console.cloud.google.com/cloudshell/open?git_repo=https://github.com/GoogleCloudPlatform/python-docs-samples&page=editor&open_in_editor=bigquery/cloud-client/simple_app.py,bigquery/cloud-client/README.rst @@ -181,56 +84,14 @@ To run this sample: .. code-block:: bash - $ python snippets.py - - usage: snippets.py [-h] [--project PROJECT] - {list-projects,list-datasets,list-tables,create-table,list-rows,copy-table,delete-table} - ... - - Samples that demonstrate basic operations in the BigQuery API. - - For more information, see the README.rst. - - Example invocation: - $ python snippets.py list-datasets - - The dataset and table should already exist. - - positional arguments: - {list-projects,list-datasets,list-tables,create-table,list-rows,copy-table,delete-table} - list-projects - list-datasets Lists all datasets in a given project. If no project - is specified, then the currently active project is - used. - list-datasets Lists all datasets in a given project. If no project - is specified, then the currently active project is - used. - list-tables Lists all of the tables in a given dataset. If no - project is specified, then the currently active - project is used. - create-table Creates a simple table in the given dataset. If no - project is specified, then the currently active - project is used. - list-rows Prints rows in the given table. Will print 25 rows at - most for brevity as tables can contain large amounts - of rows. If no project is specified, then the - currently active project is used. - copy-table Copies a table. If no project is specified, then the - currently active project is used. - delete-table Deletes a table in a given dataset. If no project is - specified, then the currently active project is used. - - optional arguments: - -h, --help show this help message and exit - --project PROJECT - + $ python simple_app.py -Load data from a file +User Credentials +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ .. image:: https://gstatic.com/cloudssh/images/open-btn.png - :target: https://console.cloud.google.com/cloudshell/open?git_repo=https://github.com/GoogleCloudPlatform/python-docs-samples&page=editor&open_in_editor=bigquery/cloud-client/load_data_from_file.py,bigquery/cloud-client/README.rst + :target: https://console.cloud.google.com/cloudshell/open?git_repo=https://github.com/GoogleCloudPlatform/python-docs-samples&page=editor&open_in_editor=bigquery/cloud-client/user_credentials.py,bigquery/cloud-client/README.rst @@ -239,140 +100,22 @@ To run this sample: .. code-block:: bash - $ python load_data_from_file.py - - usage: load_data_from_file.py [-h] dataset_id table_id source_file_name - - Loads data into BigQuery from a local file. + $ python user_credentials.py - For more information, see the README.rst. + usage: user_credentials.py [-h] [--launch-browser] project query - Example invocation: - $ python load_data_from_file.py example_dataset example_table \ - example-data.csv + Command-line application to run a query using user credentials. - The dataset and table should already exist. + You must supply a client secrets file, which would normally be bundled with + your application. positional arguments: - dataset_id - table_id - source_file_name Path to a .csv file to upload. + project Project to use for BigQuery billing. + query BigQuery SQL Query. optional arguments: -h, --help show this help message and exit - - - -Load data from Cloud Storage -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ - -.. image:: https://gstatic.com/cloudssh/images/open-btn.png - :target: https://console.cloud.google.com/cloudshell/open?git_repo=https://github.com/GoogleCloudPlatform/python-docs-samples&page=editor&open_in_editor=bigquery/cloud-client/load_data_from_gcs.py,bigquery/cloud-client/README.rst - - - - -To run this sample: - -.. code-block:: bash - - $ python load_data_from_gcs.py - - usage: load_data_from_gcs.py [-h] dataset_id table_id source - - Loads data into BigQuery from an object in Google Cloud Storage. - - For more information, see the README.rst. - - Example invocation: - $ python load_data_from_gcs.py example_dataset example_table \ - gs://example-bucket/example-data.csv - - The dataset and table should already exist. - - positional arguments: - dataset_id - table_id - source The Google Cloud Storage object to load. Must be in the format - gs://bucket_name/object_name - - optional arguments: - -h, --help show this help message and exit - - - -Load streaming data -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ - -.. image:: https://gstatic.com/cloudssh/images/open-btn.png - :target: https://console.cloud.google.com/cloudshell/open?git_repo=https://github.com/GoogleCloudPlatform/python-docs-samples&page=editor&open_in_editor=bigquery/cloud-client/stream_data.py,bigquery/cloud-client/README.rst - - - - -To run this sample: - -.. code-block:: bash - - $ python stream_data.py - - usage: stream_data.py [-h] dataset_id table_id json_data - - Loads a single row of data directly into BigQuery. - - For more information, see the README.rst. - - Example invocation: - $ python stream_data.py example_dataset example_table \ - '["Gandalf", 2000]' - - The dataset and table should already exist. - - positional arguments: - dataset_id - table_id - json_data The row to load into BigQuery as an array in JSON format. - - optional arguments: - -h, --help show this help message and exit - - - -Export data to Cloud Storage -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ - -.. image:: https://gstatic.com/cloudssh/images/open-btn.png - :target: https://console.cloud.google.com/cloudshell/open?git_repo=https://github.com/GoogleCloudPlatform/python-docs-samples&page=editor&open_in_editor=bigquery/cloud-client/export_data_to_gcs.py,bigquery/cloud-client/README.rst - - - - -To run this sample: - -.. code-block:: bash - - $ python export_data_to_gcs.py - - usage: export_data_to_gcs.py [-h] dataset_id table_id destination - - Exports data from BigQuery to an object in Google Cloud Storage. - - For more information, see the README.rst. - - Example invocation: - $ python export_data_to_gcs.py example_dataset example_table \ - gs://example-bucket/example-data.csv - - The dataset and table should already exist. - - positional arguments: - dataset_id - table_id - destination The destination Google Cloud Storage object. Must be in the - format gs://bucket_name/object_name - - optional arguments: - -h, --help show this help message and exit + --launch-browser Use a local server flow to authenticate. diff --git a/packages/google-cloud-bigquery/samples/snippets/README.rst.in b/packages/google-cloud-bigquery/samples/snippets/README.rst.in index 55572c0709ee..008b5179565d 100644 --- a/packages/google-cloud-bigquery/samples/snippets/README.rst.in +++ b/packages/google-cloud-bigquery/samples/snippets/README.rst.in @@ -4,7 +4,7 @@ product: name: Google BigQuery short_name: BigQuery url: https://cloud.google.com/bigquery/docs - description: > + description: > `Google BigQuery`_ is Google's fully managed, petabyte scale, low cost analytics data warehouse. BigQuery is NoOps—there is no infrastructure to manage and you don't need a database administrator—so you can focus on @@ -16,30 +16,12 @@ setup: - install_deps samples: -- name: Simple Application - file: simple_app.py - name: Quickstart file: quickstart.py -- name: Query - file: query.py - show_help: true -- name: Parameterized Query - file: query_params.py - show_help: true -- name: Snippets - file: snippets.py - show_help: true -- name: Load data from a file - file: load_data_from_file.py - show_help: true -- name: Load data from Cloud Storage - file: load_data_from_gcs.py - show_help: true -- name: Load streaming data - file: stream_data.py - show_help: true -- name: Export data to Cloud Storage - file: export_data_to_gcs.py +- name: Simple Application + file: simple_app.py +- name: User Credentials + file: user_credentials.py show_help: true cloud_client_library: true diff --git a/packages/google-cloud-bigquery/samples/snippets/auth_snippets.py b/packages/google-cloud-bigquery/samples/snippets/auth_snippets.py deleted file mode 100644 index 1dc6fddd204b..000000000000 --- a/packages/google-cloud-bigquery/samples/snippets/auth_snippets.py +++ /dev/null @@ -1,61 +0,0 @@ -# Copyright 2017 Google Inc. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -"""Demonstrates how to authenticate to Google BigQuery using the Google Cloud -Client Libraries.""" - -import argparse - - -def implicit(): - from google.cloud import bigquery - - # If you don't specify credentials when constructing the client, the - # client library will look for credentials in the environment. - bigquery_client = bigquery.Client() - - # Make an authenticated API request - datasets = list(bigquery_client.list_datasets()) - print(datasets) - - -def explicit(): - from google.cloud import bigquery - - # Explicitly use service account credentials by specifying the private key - # file. All clients in google-cloud-python have this helper, see - # https://googlecloudplatform.github.io/google-cloud-python/latest/core/auth.html#service-accounts - bigquery_client = bigquery.Client.from_service_account_json( - 'service_account.json') - - # Make an authenticated API request - datasets = list(bigquery_client.list_datasets()) - print(datasets) - - -if __name__ == '__main__': - parser = argparse.ArgumentParser( - description=__doc__, - formatter_class=argparse.RawDescriptionHelpFormatter) - - subparsers = parser.add_subparsers(dest='command') - subparsers.add_parser('implicit', help=implicit.__doc__) - subparsers.add_parser('explicit', help=explicit.__doc__) - - args = parser.parse_args() - - if args.command == 'implicit': - implicit() - elif args.command == 'explicit': - explicit() diff --git a/packages/google-cloud-bigquery/samples/snippets/auth_snippets_test.py b/packages/google-cloud-bigquery/samples/snippets/auth_snippets_test.py deleted file mode 100644 index 5b5f2cac00cd..000000000000 --- a/packages/google-cloud-bigquery/samples/snippets/auth_snippets_test.py +++ /dev/null @@ -1,33 +0,0 @@ -# Copyright 2017 Google Inc. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import os - -import mock - -import auth_snippets - - -def test_implicit(): - auth_snippets.implicit() - - -def test_explicit(): - with open(os.environ['GOOGLE_APPLICATION_CREDENTIALS']) as creds_file: - creds_file_data = creds_file.read() - - open_mock = mock.mock_open(read_data=creds_file_data) - - with mock.patch('io.open', open_mock): - auth_snippets.explicit() diff --git a/packages/google-cloud-bigquery/samples/snippets/export_data_to_gcs.py b/packages/google-cloud-bigquery/samples/snippets/export_data_to_gcs.py deleted file mode 100644 index 5993ef0f6a20..000000000000 --- a/packages/google-cloud-bigquery/samples/snippets/export_data_to_gcs.py +++ /dev/null @@ -1,61 +0,0 @@ -#!/usr/bin/env python - -# Copyright 2016 Google Inc. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -"""Exports data from BigQuery to an object in Google Cloud Storage. - -For more information, see the README.rst. - -Example invocation: - $ python export_data_to_gcs.py example_dataset example_table \\ - gs://example-bucket/example-data.csv - -The dataset and table should already exist. -""" - -import argparse - -from google.cloud import bigquery - - -def export_data_to_gcs(dataset_id, table_id, destination): - bigquery_client = bigquery.Client() - dataset_ref = bigquery_client.dataset(dataset_id) - table_ref = dataset_ref.table(table_id) - - job = bigquery_client.extract_table(table_ref, destination) - - job.result() # Waits for job to complete - - print('Exported {}:{} to {}'.format( - dataset_id, table_id, destination)) - - -if __name__ == '__main__': - parser = argparse.ArgumentParser( - description=__doc__, - formatter_class=argparse.RawDescriptionHelpFormatter) - parser.add_argument('dataset_id') - parser.add_argument('table_id') - parser.add_argument( - 'destination', help='The destination Google Cloud Storage object. ' - 'Must be in the format gs://bucket_name/object_name') - - args = parser.parse_args() - - export_data_to_gcs( - args.dataset_id, - args.table_id, - args.destination) diff --git a/packages/google-cloud-bigquery/samples/snippets/export_data_to_gcs_test.py b/packages/google-cloud-bigquery/samples/snippets/export_data_to_gcs_test.py deleted file mode 100644 index a41cfd226fc7..000000000000 --- a/packages/google-cloud-bigquery/samples/snippets/export_data_to_gcs_test.py +++ /dev/null @@ -1,31 +0,0 @@ -# Copyright 2015, Google, Inc. -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import os - -import export_data_to_gcs - -BUCKET = os.environ['CLOUD_STORAGE_BUCKET'] -DATASET_ID = 'test_dataset' -TABLE_ID = 'test_table' - - -def test_export_data_to_gcs(capsys): - export_data_to_gcs.export_data_to_gcs( - DATASET_ID, - TABLE_ID, - 'gs://{}/test-export-data-to-gcs.csv'.format(BUCKET)) - - out, _ = capsys.readouterr() - - assert 'Exported' in out diff --git a/packages/google-cloud-bigquery/samples/snippets/load_data_from_file.py b/packages/google-cloud-bigquery/samples/snippets/load_data_from_file.py deleted file mode 100644 index e311daa1e624..000000000000 --- a/packages/google-cloud-bigquery/samples/snippets/load_data_from_file.py +++ /dev/null @@ -1,66 +0,0 @@ -#!/usr/bin/env python - -# Copyright 2016 Google Inc. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -"""Loads data into BigQuery from a local file. - -For more information, see the README.rst. - -Example invocation: - $ python load_data_from_file.py example_dataset example_table \\ - example-data.csv - -The dataset and table should already exist. -""" - -import argparse - -from google.cloud import bigquery - - -def load_data_from_file(dataset_id, table_id, source_file_name): - bigquery_client = bigquery.Client() - dataset_ref = bigquery_client.dataset(dataset_id) - table_ref = dataset_ref.table(table_id) - - with open(source_file_name, 'rb') as source_file: - # This example uses CSV, but you can use other formats. - # See https://cloud.google.com/bigquery/loading-data - job_config = bigquery.LoadJobConfig() - job_config.source_format = 'text/csv' - job = bigquery_client.load_table_from_file( - source_file, table_ref, job_config=job_config) - - job.result() # Waits for job to complete - - print('Loaded {} rows into {}:{}.'.format( - job.output_rows, dataset_id, table_id)) - - -if __name__ == '__main__': - parser = argparse.ArgumentParser( - description=__doc__, - formatter_class=argparse.RawDescriptionHelpFormatter) - parser.add_argument('dataset_id') - parser.add_argument('table_id') - parser.add_argument( - 'source_file_name', help='Path to a .csv file to upload.') - - args = parser.parse_args() - - load_data_from_file( - args.dataset_id, - args.table_id, - args.source_file_name) diff --git a/packages/google-cloud-bigquery/samples/snippets/load_data_from_file_test.py b/packages/google-cloud-bigquery/samples/snippets/load_data_from_file_test.py deleted file mode 100644 index 960fe62c9d7c..000000000000 --- a/packages/google-cloud-bigquery/samples/snippets/load_data_from_file_test.py +++ /dev/null @@ -1,33 +0,0 @@ -# Copyright 2015, Google, Inc. -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import os - -import load_data_from_file - -RESOURCES = os.path.join(os.path.dirname(__file__), 'resources') -DATASET_ID = 'test_dataset' -TABLE_ID = 'test_import_table' - - -def test_load_table(capsys): - data_path = os.path.join(RESOURCES, 'data.csv') - - load_data_from_file.load_data_from_file( - DATASET_ID, - TABLE_ID, - data_path) - - out, _ = capsys.readouterr() - - assert 'Loaded 1 rows' in out diff --git a/packages/google-cloud-bigquery/samples/snippets/load_data_from_gcs.py b/packages/google-cloud-bigquery/samples/snippets/load_data_from_gcs.py deleted file mode 100644 index 285e6d1b22a2..000000000000 --- a/packages/google-cloud-bigquery/samples/snippets/load_data_from_gcs.py +++ /dev/null @@ -1,61 +0,0 @@ -#!/usr/bin/env python - -# Copyright 2016 Google Inc. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -"""Loads data into BigQuery from an object in Google Cloud Storage. - -For more information, see the README.rst. - -Example invocation: - $ python load_data_from_gcs.py example_dataset example_table \\ - gs://example-bucket/example-data.csv - -The dataset and table should already exist. -""" - -import argparse - -from google.cloud import bigquery - - -def load_data_from_gcs(dataset_id, table_id, source): - bigquery_client = bigquery.Client() - dataset_ref = bigquery_client.dataset(dataset_id) - table_ref = dataset_ref.table(table_id) - - job = bigquery_client.load_table_from_uri(source, table_ref) - - job.result() # Waits for job to complete - - print('Loaded {} rows into {}:{}.'.format( - job.output_rows, dataset_id, table_id)) - - -if __name__ == '__main__': - parser = argparse.ArgumentParser( - description=__doc__, - formatter_class=argparse.RawDescriptionHelpFormatter) - parser.add_argument('dataset_id') - parser.add_argument('table_id') - parser.add_argument( - 'source', help='The Google Cloud Storage object to load. Must be in ' - 'the format gs://bucket_name/object_name') - - args = parser.parse_args() - - load_data_from_gcs( - args.dataset_id, - args.table_id, - args.source) diff --git a/packages/google-cloud-bigquery/samples/snippets/load_data_from_gcs_test.py b/packages/google-cloud-bigquery/samples/snippets/load_data_from_gcs_test.py deleted file mode 100644 index dbd39fc5ceca..000000000000 --- a/packages/google-cloud-bigquery/samples/snippets/load_data_from_gcs_test.py +++ /dev/null @@ -1,33 +0,0 @@ -# Copyright 2015, Google, Inc. -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import os - -import load_data_from_gcs - -BUCKET = os.environ['CLOUD_STORAGE_BUCKET'] -DATASET_ID = 'test_dataset' -TABLE_ID = 'test_import_table' - - -def test_load_table(capsys): - cloud_storage_input_uri = 'gs://{}/data.csv'.format(BUCKET) - - load_data_from_gcs.load_data_from_gcs( - DATASET_ID, - TABLE_ID, - cloud_storage_input_uri) - - out, _ = capsys.readouterr() - - assert 'Loaded 1 rows' in out diff --git a/packages/google-cloud-bigquery/samples/snippets/query.py b/packages/google-cloud-bigquery/samples/snippets/query.py deleted file mode 100755 index 19605bbad4d9..000000000000 --- a/packages/google-cloud-bigquery/samples/snippets/query.py +++ /dev/null @@ -1,103 +0,0 @@ -#!/usr/bin/env python - -# Copyright 2016 Google Inc. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -"""Command-line application to perform queries in BigQuery. - -For more information, see the README.rst. - -Example invocation: - $ python query.py '#standardSQL - SELECT corpus - FROM `bigquery-public-data.samples.shakespeare` - GROUP BY corpus - ORDER BY corpus' -""" - -import argparse - -from google.cloud import bigquery - - -def query(query): - client = bigquery.Client() - query_job = client.query(query) - - # Print the results. - for row in query_job.result(): # Waits for job to complete. - print(row) - - -def query_standard_sql(query): - client = bigquery.Client() - job_config = bigquery.QueryJobConfig() - - # Set use_legacy_sql to False to use standard SQL syntax. - # Note that queries are treated as standard SQL by default. - job_config.use_legacy_sql = False - query_job = client.query(query, job_config=job_config) - - # Print the results. - for row in query_job.result(): # Waits for job to complete. - print(row) - - -def query_destination_table(query, dest_dataset_id, dest_table_id): - client = bigquery.Client() - job_config = bigquery.QueryJobConfig() - - # Allow for query results larger than the maximum response size. - job_config.allow_large_results = True - - # When large results are allowed, a destination table must be set. - dest_dataset_ref = client.dataset(dest_dataset_id) - dest_table_ref = dest_dataset_ref.table(dest_table_id) - job_config.destination = dest_table_ref - - # Allow the results table to be overwritten. - job_config.write_disposition = 'WRITE_TRUNCATE' - - query_job = client.query(query, job_config=job_config) - - # Print the results. - for row in query_job.result(): # Waits for job to complete. - print(row) - - -if __name__ == '__main__': - parser = argparse.ArgumentParser( - description=__doc__, - formatter_class=argparse.RawDescriptionHelpFormatter) - parser.add_argument('query', help='BigQuery SQL Query.') - parser.add_argument( - '--use_standard_sql', - action='store_true', - help='Use standard SQL syntax.') - parser.add_argument( - '--destination_table', - type=str, - help=( - 'Destination table to use for results. ' - 'Example: my_dataset.my_table')) - - args = parser.parse_args() - - if args.use_standard_sql: - query_standard_sql(args.query) - elif args.destination_table: - dataset, table = args.destination_table.split('.') - query_destination_table(args.query, dataset, table) - else: - query(args.query) diff --git a/packages/google-cloud-bigquery/samples/snippets/query_params.py b/packages/google-cloud-bigquery/samples/snippets/query_params.py deleted file mode 100644 index 594f51c758d0..000000000000 --- a/packages/google-cloud-bigquery/samples/snippets/query_params.py +++ /dev/null @@ -1,225 +0,0 @@ -#!/usr/bin/env python - -# Copyright 2016 Google Inc. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -"""Command-line app to perform queries with parameters in BigQuery. - -For more information, see the README.rst. - -Example invocation: - $ python query_params.py named 'romeoandjuliet' 100 - $ python query_params.py positional 'romeoandjuliet' 100 -""" - -import argparse -import datetime - -from google.cloud import bigquery -import pytz - - -def query_positional_params(corpus, min_word_count): - client = bigquery.Client() - query = """ - SELECT word, word_count - FROM `bigquery-public-data.samples.shakespeare` - WHERE corpus = ? - AND word_count >= ? - ORDER BY word_count DESC; - """ - # Set the name to None to use positional parameters (? symbol in the - # query). Note that you cannot mix named and positional parameters. - # See: https://cloud.google.com/bigquery/docs/parameterized-queries/ - query_params = [ - bigquery.ScalarQueryParameter(None, 'STRING', corpus), - bigquery.ScalarQueryParameter(None, 'INT64', min_word_count) - ] - job_config = bigquery.QueryJobConfig() - job_config.query_parameters = query_params - query_job = client.query(query, job_config=job_config) - - query_job.result() # Wait for job to complete - - # Print the results. - destination_table_ref = query_job.destination - table = client.get_table(destination_table_ref) - for row in client.list_rows(table): - print(row) - - -def query_named_params(corpus, min_word_count): - client = bigquery.Client() - query = """ - SELECT word, word_count - FROM `bigquery-public-data.samples.shakespeare` - WHERE corpus = @corpus - AND word_count >= @min_word_count - ORDER BY word_count DESC; - """ - query_params = [ - bigquery.ScalarQueryParameter('corpus', 'STRING', corpus), - bigquery.ScalarQueryParameter( - 'min_word_count', 'INT64', min_word_count) - ] - job_config = bigquery.QueryJobConfig() - job_config.query_parameters = query_params - query_job = client.query(query, job_config=job_config) - - query_job.result() # Wait for job to complete - - # Print the results. - destination_table_ref = query_job.destination - table = client.get_table(destination_table_ref) - for row in client.list_rows(table): - print(row) - - -def query_array_params(gender, states): - client = bigquery.Client() - query = """ - SELECT name, sum(number) as count - FROM `bigquery-public-data.usa_names.usa_1910_2013` - WHERE gender = @gender - AND state IN UNNEST(@states) - GROUP BY name - ORDER BY count DESC - LIMIT 10; - """ - query_params = [ - bigquery.ScalarQueryParameter('gender', 'STRING', gender), - bigquery.ArrayQueryParameter('states', 'STRING', states) - ] - job_config = bigquery.QueryJobConfig() - job_config.query_parameters = query_params - query_job = client.query(query, job_config=job_config) - - query_job.result() # Wait for job to complete - - # Print the results. - destination_table_ref = query_job.destination - table = client.get_table(destination_table_ref) - for row in client.list_rows(table): - print(row) - - -def query_timestamp_params(year, month, day, hour, minute): - client = bigquery.Client() - query = 'SELECT TIMESTAMP_ADD(@ts_value, INTERVAL 1 HOUR);' - query_params = [ - bigquery.ScalarQueryParameter( - 'ts_value', - 'TIMESTAMP', - datetime.datetime(year, month, day, hour, minute, tzinfo=pytz.UTC)) - ] - job_config = bigquery.QueryJobConfig() - job_config.query_parameters = query_params - query_job = client.query(query, job_config=job_config) - - query_job.result() # Waits for job to complete - - # Print the results. - destination_table_ref = query_job.destination - table = client.get_table(destination_table_ref) - for row in client.list_rows(table): - print(row) - - -def query_struct_params(x, y): - client = bigquery.Client() - query = 'SELECT @struct_value AS s;' - query_params = [ - bigquery.StructQueryParameter( - 'struct_value', - bigquery.ScalarQueryParameter('x', 'INT64', x), - bigquery.ScalarQueryParameter('y', 'STRING', y) - ) - ] - job_config = bigquery.QueryJobConfig() - job_config.query_parameters = query_params - query_job = client.query(query, job_config=job_config) - - query_job.result() # Waits for job to complete - - # Print the results. - destination_table_ref = query_job.destination - table = client.get_table(destination_table_ref) - for row in client.list_rows(table): - print(row) - - -if __name__ == '__main__': - parser = argparse.ArgumentParser( - description=__doc__, - formatter_class=argparse.RawDescriptionHelpFormatter) - subparsers = parser.add_subparsers(dest='sample', help='samples') - named_parser = subparsers.add_parser( - 'named', - help='Run a query with named parameters.') - named_parser.add_argument( - 'corpus', - help='Corpus to search from Shakespeare dataset.') - named_parser.add_argument( - 'min_word_count', - help='Minimum count of words to query.', - type=int) - positional_parser = subparsers.add_parser( - 'positional', - help='Run a query with positional parameters.') - positional_parser.add_argument( - 'corpus', - help='Corpus to search from Shakespeare dataset.') - positional_parser.add_argument( - 'min_word_count', - help='Minimum count of words to query.', - type=int) - array_parser = subparsers.add_parser( - 'array', - help='Run a query with an array parameter.') - array_parser.add_argument( - 'gender', - choices=['F', 'M'], - help='Gender of baby in the Social Security baby names database.') - array_parser.add_argument( - 'states', - help='U.S. States to consider for popular baby names.', - nargs='+') - timestamp_parser = subparsers.add_parser( - 'timestamp', - help='Run a query with a timestamp parameter.') - timestamp_parser.add_argument('year', type=int) - timestamp_parser.add_argument('month', type=int) - timestamp_parser.add_argument('day', type=int) - timestamp_parser.add_argument('hour', type=int) - timestamp_parser.add_argument('minute', type=int) - struct_parser = subparsers.add_parser( - 'struct', - help='Run a query with a struct parameter.') - struct_parser.add_argument('x', help='Integer for x', type=int) - struct_parser.add_argument('y', help='String for y') - args = parser.parse_args() - - if args.sample == 'named': - query_named_params(args.corpus, args.min_word_count) - elif args.sample == 'positional': - query_positional_params(args.corpus, args.min_word_count) - elif args.sample == 'array': - query_array_params(args.gender, args.states) - elif args.sample == 'timestamp': - query_timestamp_params( - args.year, args.month, args.day, args.hour, args.minute) - elif args.sample == 'struct': - query_struct_params(args.x, args.y) - else: - print('Unexpected value for sample') diff --git a/packages/google-cloud-bigquery/samples/snippets/query_params_test.py b/packages/google-cloud-bigquery/samples/snippets/query_params_test.py deleted file mode 100644 index f4b4931372b5..000000000000 --- a/packages/google-cloud-bigquery/samples/snippets/query_params_test.py +++ /dev/null @@ -1,52 +0,0 @@ -# Copyright 2016 Google Inc. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import query_params - - -def test_query_array_params(capsys): - query_params.query_array_params( - gender='M', - states=['WA', 'WI', 'WV', 'WY']) - out, _ = capsys.readouterr() - assert 'James' in out - - -def test_query_named_params(capsys): - query_params.query_named_params( - corpus='romeoandjuliet', - min_word_count=100) - out, _ = capsys.readouterr() - assert 'the' in out - - -def test_query_positional_params(capsys): - query_params.query_positional_params( - corpus='romeoandjuliet', - min_word_count=100) - out, _ = capsys.readouterr() - assert 'the' in out - - -def test_query_struct_params(capsys): - query_params.query_struct_params(765, "hello world") - out, _ = capsys.readouterr() - assert '765' in out - assert 'hello world' in out - - -def test_query_timestamp_params(capsys): - query_params.query_timestamp_params(2016, 12, 7, 8, 0) - out, _ = capsys.readouterr() - assert '2016, 12, 7, 9, 0' in out diff --git a/packages/google-cloud-bigquery/samples/snippets/query_test.py b/packages/google-cloud-bigquery/samples/snippets/query_test.py deleted file mode 100644 index 3d456cb59679..000000000000 --- a/packages/google-cloud-bigquery/samples/snippets/query_test.py +++ /dev/null @@ -1,66 +0,0 @@ -# Copyright 2016 Google Inc. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import query - - -DATASET_ID = 'test_dataset' -TABLE_ID = 'test_destination_table' - - -def test_query(capsys): - # Query only outputs the first 10 rows, sort results to avoid randomness - query_string = '''#standardSQL - SELECT corpus - FROM `bigquery-public-data.samples.shakespeare` - GROUP BY corpus - ORDER BY corpus - LIMIT 10;''' - - query.query(query_string) - - out, _ = capsys.readouterr() - - assert 'antonyandcleopatra' in out - - -def test_query_standard_sql(capsys): - # Query only outputs the first 10 rows, sort results to avoid randomness - query_string = '''SELECT corpus - FROM `bigquery-public-data.samples.shakespeare` - GROUP BY corpus - ORDER BY corpus - LIMIT 10;''' - - query.query_standard_sql(query_string) - - out, _ = capsys.readouterr() - - assert 'antonyandcleopatra' in out - - -def test_query_destination_table(capsys): - # Query only outputs the first 10 rows, sort results to avoid randomness - query_string = '''#standardSQL - SELECT corpus - FROM `bigquery-public-data.samples.shakespeare` - GROUP BY corpus - ORDER BY corpus - LIMIT 10;''' - - query.query_destination_table(query_string, DATASET_ID, TABLE_ID) - - out, _ = capsys.readouterr() - - assert 'antonyandcleopatra' in out diff --git a/packages/google-cloud-bigquery/samples/snippets/simple_app.py b/packages/google-cloud-bigquery/samples/snippets/simple_app.py index 9f266ed7aac4..a09e97f12467 100644 --- a/packages/google-cloud-bigquery/samples/snippets/simple_app.py +++ b/packages/google-cloud-bigquery/samples/snippets/simple_app.py @@ -15,17 +15,17 @@ # limitations under the License. """Simple application that performs a query with BigQuery.""" -# [START all] +# [START bigquery_simple_app_all] # [START bigquery_simple_app_deps] from google.cloud import bigquery # [END bigquery_simple_app_deps] def query_stackoverflow(): - # [START create_client] + # [START bigquery_simple_app_client] client = bigquery.Client() - # [END create_client] - # [START run_query] + # [END bigquery_simple_app_client] + # [START bigquery_simple_app_query] query_job = client.query(""" SELECT CONCAT( @@ -38,14 +38,14 @@ def query_stackoverflow(): LIMIT 10""") results = query_job.result() # Waits for job to complete. - # [END run_query] + # [END bigquery_simple_app_query] - # [START print_results] + # [START bigquery_simple_app_print] for row in results: print("{} : {} views".format(row.url, row.view_count)) - # [END print_results] + # [END bigquery_simple_app_print] if __name__ == '__main__': query_stackoverflow() -# [END all] +# [END bigquery_simple_app_all] diff --git a/packages/google-cloud-bigquery/samples/snippets/snippets.py b/packages/google-cloud-bigquery/samples/snippets/snippets.py deleted file mode 100644 index ee75f7fc2697..000000000000 --- a/packages/google-cloud-bigquery/samples/snippets/snippets.py +++ /dev/null @@ -1,234 +0,0 @@ -#!/usr/bin/env python - -# Copyright 2016 Google Inc. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -"""Samples that demonstrate basic operations in the BigQuery API. - -For more information, see the README.rst. - -Example invocation: - $ python snippets.py list-datasets - -The dataset and table should already exist. -""" - -import argparse - -from google.cloud import bigquery - - -def list_projects(): - bigquery_client = bigquery.Client() - - for project in bigquery_client.list_projects(): - print(project.project_id) - - -def list_datasets(project=None): - """Lists all datasets in a given project. - - If no project is specified, then the currently active project is used. - """ - bigquery_client = bigquery.Client(project=project) - - for dataset in bigquery_client.list_datasets(): - print(dataset.dataset_id) - - -def create_dataset(dataset_id, project=None): - """Craetes a dataset in a given project. - - If no project is specified, then the currently active project is used. - """ - bigquery_client = bigquery.Client(project=project) - - dataset_ref = bigquery_client.dataset(dataset_id) - - dataset = bigquery_client.create_dataset(bigquery.Dataset(dataset_ref)) - - print('Created dataset {}.'.format(dataset.dataset_id)) - - -def list_tables(dataset_id, project=None): - """Lists all of the tables in a given dataset. - - If no project is specified, then the currently active project is used. - """ - bigquery_client = bigquery.Client(project=project) - dataset_ref = bigquery_client.dataset(dataset_id) - - for table in bigquery_client.list_dataset_tables(dataset_ref): - print(table.table_id) - - -def create_table(dataset_id, table_id, project=None): - """Creates a simple table in the given dataset. - - If no project is specified, then the currently active project is used. - """ - bigquery_client = bigquery.Client(project=project) - dataset_ref = bigquery_client.dataset(dataset_id) - - table_ref = dataset_ref.table(table_id) - table = bigquery.Table(table_ref) - - # Set the table schema - table.schema = ( - bigquery.SchemaField('Name', 'STRING'), - bigquery.SchemaField('Age', 'INTEGER'), - bigquery.SchemaField('Weight', 'FLOAT'), - ) - - table = bigquery_client.create_table(table) - - print('Created table {} in dataset {}.'.format(table_id, dataset_id)) - - -def list_rows(dataset_id, table_id, project=None): - """Prints rows in the given table. - - Will print 25 rows at most for brevity as tables can contain large amounts - of rows. - - If no project is specified, then the currently active project is used. - """ - bigquery_client = bigquery.Client(project=project) - dataset_ref = bigquery_client.dataset(dataset_id) - table_ref = dataset_ref.table(table_id) - - # Get the table from the API so that the schema is available. - table = bigquery_client.get_table(table_ref) - - # Load at most 25 results. - rows = bigquery_client.list_rows(table, max_results=25) - - # Use format to create a simple table. - format_string = '{!s:<16} ' * len(table.schema) - - # Print schema field names - field_names = [field.name for field in table.schema] - print(format_string.format(*field_names)) - - for row in rows: - print(format_string.format(*row)) - - -def copy_table(dataset_id, table_id, new_table_id, project=None): - """Copies a table. - - If no project is specified, then the currently active project is used. - """ - bigquery_client = bigquery.Client(project=project) - dataset_ref = bigquery_client.dataset(dataset_id) - table_ref = dataset_ref.table(table_id) - - # This sample shows the destination table in the same dataset and project, - # however, it's possible to copy across datasets and projects. You can - # also copy multiple source tables into a single destination table by - # providing addtional arguments to `copy_table`. - destination_table_ref = dataset_ref.table(new_table_id) - - # Create a job to copy the table to the destination table. - # Start by creating a job configuration - job_config = bigquery.CopyJobConfig() - - # Configure the job to create the table if it doesn't exist. - job_config.create_disposition = ( - bigquery.job.CreateDisposition.CREATE_IF_NEEDED) - - copy_job = bigquery_client.copy_table( - table_ref, destination_table_ref, job_config=job_config) - - print('Waiting for job to finish...') - copy_job.result() - - print('Table {} copied to {}.'.format(table_id, new_table_id)) - - -def delete_table(dataset_id, table_id, project=None): - """Deletes a table in a given dataset. - - If no project is specified, then the currently active project is used. - """ - bigquery_client = bigquery.Client(project=project) - dataset_ref = bigquery_client.dataset(dataset_id) - table_ref = dataset_ref.table(table_id) - - bigquery_client.delete_table(table_ref) - - print('Table {}:{} deleted.'.format(dataset_id, table_id)) - - -if __name__ == '__main__': - parser = argparse.ArgumentParser( - description=__doc__, - formatter_class=argparse.RawDescriptionHelpFormatter) - parser.add_argument('--project', default=None) - - subparsers = parser.add_subparsers(dest='command') - - list_projects_parser = subparsers.add_parser( - 'list-projects', help=list_projects.__doc__) - - list_datasets_parser = subparsers.add_parser( - 'list-datasets', help=list_datasets.__doc__) - - create_dataset_parser = subparsers.add_parser( - 'list-datasets', help=list_datasets.__doc__) - create_dataset_parser.add_argument('dataset_id') - - list_tables_parser = subparsers.add_parser( - 'list-tables', help=list_tables.__doc__) - list_tables_parser.add_argument('dataset_id') - - create_table_parser = subparsers.add_parser( - 'create-table', help=create_table.__doc__) - create_table_parser.add_argument('dataset_id') - create_table_parser.add_argument('table_id') - - list_rows_parser = subparsers.add_parser( - 'list-rows', help=list_rows.__doc__) - list_rows_parser.add_argument('dataset_id') - list_rows_parser.add_argument('table_id') - - copy_table_parser = subparsers.add_parser( - 'copy-table', help=copy_table.__doc__) - copy_table_parser.add_argument('dataset_id') - copy_table_parser.add_argument('table_id') - copy_table_parser.add_argument('new_table_id') - - delete_table_parser = subparsers.add_parser( - 'delete-table', help=delete_table.__doc__) - delete_table_parser.add_argument('dataset_id') - delete_table_parser.add_argument('table_id') - - args = parser.parse_args() - - if args.command == 'list-projects': - list_projects() - elif args.command == 'list-datasets': - list_datasets(args.project) - elif args.command == 'create-dataset': - create_dataset(args.dataset_id, args.project) - elif args.command == 'list-tables': - list_tables(args.dataset_id, args.project) - elif args.command == 'create-table': - create_table(args.dataset_id, args.table_id, args.project) - elif args.command == 'list-rows': - list_rows(args.dataset_id, args.table_id, args.project) - elif args.command == 'copy-table': - copy_table(args.dataset_id, args.table_id, args.new_table_id) - elif args.command == 'delete-table': - delete_table(args.dataset_id, args.table_id, args.project) diff --git a/packages/google-cloud-bigquery/samples/snippets/snippets_test.py b/packages/google-cloud-bigquery/samples/snippets/snippets_test.py deleted file mode 100644 index 5f666ccc6c85..000000000000 --- a/packages/google-cloud-bigquery/samples/snippets/snippets_test.py +++ /dev/null @@ -1,152 +0,0 @@ -# Copyright 2016 Google Inc. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from google.cloud import bigquery -from google.cloud.exceptions import NotFound -import pytest - -import snippets - - -DATASET_ID = 'test_dataset' -TABLE_ID = 'test_table' - - -def test_list_projects(): - snippets.list_projects() - # No need to check the ouput, lack of exception is enough. - - -def test_list_datasets(capsys): - # Requires the dataset to have been created in the test project. - snippets.list_datasets() - - out, _ = capsys.readouterr() - - assert DATASET_ID in out - - -@pytest.fixture -def cleanup_dataset(): - dataset_id = 'test_temporary_dataset' - bigquery_client = bigquery.Client() - dataset_ref = bigquery_client.dataset(dataset_id) - - if dataset_exists(dataset_ref, bigquery_client): - bigquery_client.delete_dataset(dataset_ref) - - yield dataset_id - - if dataset_exists(dataset_ref, bigquery_client): - bigquery_client.delete_dataset(dataset_ref) - - -def dataset_exists(dataset, client): - try: - client.get_dataset(dataset) - return True - except NotFound: - return False - - -def test_create_dataset(capsys, cleanup_dataset): - snippets.create_dataset(cleanup_dataset) - - out, _ = capsys.readouterr() - - assert cleanup_dataset in out - - -def test_list_tables(capsys): - # Requires the dataset and table to have been created in the test project. - snippets.list_tables(DATASET_ID) - - out, _ = capsys.readouterr() - - assert TABLE_ID in out - - -def test_list_rows(capsys): - # Requires the dataset and table to have been created in the test project. - - # Check for the schema. It's okay if the table is empty as long as there - # aren't any errors. - - snippets.list_rows(DATASET_ID, TABLE_ID) - - out, _ = capsys.readouterr() - - assert 'Name' in out - assert 'Age' in out - - -@pytest.fixture -def temporary_table(): - """Fixture that returns a factory for tables that do not yet exist and - will be automatically deleted after the test.""" - bigquery_client = bigquery.Client() - dataset_ref = bigquery_client.dataset(DATASET_ID) - tables = [] - - def factory(table_id): - new_table_ref = dataset_ref.table(table_id) - if table_exists(new_table_ref, bigquery_client): - bigquery_client.delete_table(new_table_ref) - tables.append(new_table_ref) - return new_table_ref - - yield factory - - for table in tables: - if table_exists(table, bigquery_client): - bigquery_client.delete_table(table) - - -def table_exists(table, client): - try: - client.get_table(table) - return True - except NotFound: - return False - - -def test_create_table(temporary_table): - bigquery_client = bigquery.Client() - new_table = temporary_table('test_create_table') - snippets.create_table(DATASET_ID, new_table.table_id) - assert table_exists(new_table, bigquery_client) - - -@pytest.mark.slow -def test_copy_table(temporary_table): - bigquery_client = bigquery.Client() - new_table = temporary_table('test_copy_table') - snippets.copy_table(DATASET_ID, TABLE_ID, new_table.table_id) - assert table_exists(new_table, bigquery_client) - - -def test_delete_table(): - # Create a table to delete - bigquery_client = bigquery.Client() - dataset_ref = bigquery_client.dataset(DATASET_ID) - table_ref = dataset_ref.table('test_delete_table') - table = bigquery.Table(table_ref) - - if not table_exists(table, bigquery_client): - table.schema = [bigquery.SchemaField('id', 'INTEGER')] - table = bigquery_client.create_table(table) - - snippets.delete_table(DATASET_ID, table.table_id) - - assert not table_exists(table, bigquery_client) diff --git a/packages/google-cloud-bigquery/samples/snippets/stream_data.py b/packages/google-cloud-bigquery/samples/snippets/stream_data.py deleted file mode 100644 index c54960048206..000000000000 --- a/packages/google-cloud-bigquery/samples/snippets/stream_data.py +++ /dev/null @@ -1,69 +0,0 @@ -#!/usr/bin/env python - -# Copyright 2016 Google Inc. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -"""Loads a single row of data directly into BigQuery. - -For more information, see the README.rst. - -Example invocation: - $ python stream_data.py example_dataset example_table \\ - '["Gandalf", 2000]' - -The dataset and table should already exist. -""" - -import argparse -import json -from pprint import pprint - -from google.cloud import bigquery - - -def stream_data(dataset_id, table_id, json_data): - bigquery_client = bigquery.Client() - dataset_ref = bigquery_client.dataset(dataset_id) - table_ref = dataset_ref.table(table_id) - data = json.loads(json_data) - - # Get the table from the API so that the schema is available. - table = bigquery_client.get_table(table_ref) - - rows = [data] - errors = bigquery_client.create_rows(table, rows) - - if not errors: - print('Loaded 1 row into {}:{}'.format(dataset_id, table_id)) - else: - print('Errors:') - pprint(errors) - - -if __name__ == '__main__': - parser = argparse.ArgumentParser( - description=__doc__, - formatter_class=argparse.RawDescriptionHelpFormatter) - parser.add_argument('dataset_id') - parser.add_argument('table_id') - parser.add_argument( - 'json_data', - help='The row to load into BigQuery as an array in JSON format.') - - args = parser.parse_args() - - stream_data( - args.dataset_id, - args.table_id, - args.json_data) diff --git a/packages/google-cloud-bigquery/samples/snippets/stream_data_test.py b/packages/google-cloud-bigquery/samples/snippets/stream_data_test.py deleted file mode 100644 index 25982b88ad50..000000000000 --- a/packages/google-cloud-bigquery/samples/snippets/stream_data_test.py +++ /dev/null @@ -1,29 +0,0 @@ -# Copyright 2015, Google, Inc. -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import stream_data - - -DATASET_ID = 'test_dataset' -TABLE_ID = 'test_import_table' - - -def test_stream_data(capsys): - stream_data.stream_data( - DATASET_ID, - TABLE_ID, - '["Gandalf", 2000]') - - out, _ = capsys.readouterr() - - assert 'Loaded 1 row' in out From 0634cb54d5793b6c4520d1bca1b800fc1e61611f Mon Sep 17 00:00:00 2001 From: Alix Hamilton Date: Tue, 17 Apr 2018 17:47:05 -0700 Subject: [PATCH 0860/2016] BigQuery: Removes unused API samples [(#1459)](https://github.com/GoogleCloudPlatform/python-docs-samples/issues/1459) * BigQuery: Removes unused API samples * removes unused sample data files --- .../google-cloud-bigquery/samples/snippets/resources/data.csv | 1 - .../google-cloud-bigquery/samples/snippets/resources/data.json | 1 - .../google-cloud-bigquery/samples/snippets/resources/schema.json | 1 - 3 files changed, 3 deletions(-) delete mode 100644 packages/google-cloud-bigquery/samples/snippets/resources/data.csv delete mode 100644 packages/google-cloud-bigquery/samples/snippets/resources/data.json delete mode 100644 packages/google-cloud-bigquery/samples/snippets/resources/schema.json diff --git a/packages/google-cloud-bigquery/samples/snippets/resources/data.csv b/packages/google-cloud-bigquery/samples/snippets/resources/data.csv deleted file mode 100644 index affe39ef89de..000000000000 --- a/packages/google-cloud-bigquery/samples/snippets/resources/data.csv +++ /dev/null @@ -1 +0,0 @@ -Gandalf,2000,140.0,1 diff --git a/packages/google-cloud-bigquery/samples/snippets/resources/data.json b/packages/google-cloud-bigquery/samples/snippets/resources/data.json deleted file mode 100644 index b8eef90c5911..000000000000 --- a/packages/google-cloud-bigquery/samples/snippets/resources/data.json +++ /dev/null @@ -1 +0,0 @@ -{"Name": "Gandalf", "Age": 2000, "Weight": 140.0, "IsMagic": true} diff --git a/packages/google-cloud-bigquery/samples/snippets/resources/schema.json b/packages/google-cloud-bigquery/samples/snippets/resources/schema.json deleted file mode 100644 index a48971ef857b..000000000000 --- a/packages/google-cloud-bigquery/samples/snippets/resources/schema.json +++ /dev/null @@ -1 +0,0 @@ -[{"type": "STRING", "name": "Name"}, {"type": "INTEGER", "name": "Age"}, {"type": "FLOAT", "name": "Weight"}, {"type": "BOOLEAN", "name": "IsMagic"}] \ No newline at end of file From 96247d445d643780164491ca5afb99a07f49995d Mon Sep 17 00:00:00 2001 From: Frank Natividad Date: Thu, 26 Apr 2018 10:26:41 -0700 Subject: [PATCH 0861/2016] Update READMEs to fix numbering and add git clone [(#1464)](https://github.com/GoogleCloudPlatform/python-docs-samples/issues/1464) --- .../google-cloud-bigquery/samples/snippets/README.rst | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/packages/google-cloud-bigquery/samples/snippets/README.rst b/packages/google-cloud-bigquery/samples/snippets/README.rst index cca0ff3e5054..02bc856f9786 100644 --- a/packages/google-cloud-bigquery/samples/snippets/README.rst +++ b/packages/google-cloud-bigquery/samples/snippets/README.rst @@ -31,10 +31,16 @@ credentials for applications. Install Dependencies ++++++++++++++++++++ +#. Clone python-docs-samples and change directory to the sample directory you want to use. + + .. code-block:: bash + + $ git clone https://github.com/GoogleCloudPlatform/python-docs-samples.git + #. Install `pip`_ and `virtualenv`_ if you do not already have them. You may want to refer to the `Python Development Environment Setup Guide`_ for Google Cloud Platform for instructions. - .. _Python Development Environment Setup Guide: - https://cloud.google.com/python/setup + .. _Python Development Environment Setup Guide: + https://cloud.google.com/python/setup #. Create a virtualenv. Samples are compatible with Python 2.7 and 3.4+. From 88ff65f645cb5dd61be52c43b6ef8c14c7c3c36f Mon Sep 17 00:00:00 2001 From: Alix Hamilton Date: Fri, 18 May 2018 12:50:17 -0700 Subject: [PATCH 0862/2016] BigQuery: Moves BigQuery tutorial for Dataproc to python-docs-samples [(#1494)](https://github.com/GoogleCloudPlatform/python-docs-samples/issues/1494) --- .../samples/snippets/natality_tutorial.py | 81 +++++++++++++++++++ .../snippets/natality_tutorial_test.py | 42 ++++++++++ 2 files changed, 123 insertions(+) create mode 100644 packages/google-cloud-bigquery/samples/snippets/natality_tutorial.py create mode 100644 packages/google-cloud-bigquery/samples/snippets/natality_tutorial_test.py diff --git a/packages/google-cloud-bigquery/samples/snippets/natality_tutorial.py b/packages/google-cloud-bigquery/samples/snippets/natality_tutorial.py new file mode 100644 index 000000000000..5bfa8f1d27a9 --- /dev/null +++ b/packages/google-cloud-bigquery/samples/snippets/natality_tutorial.py @@ -0,0 +1,81 @@ +#!/usr/bin/env python + +# Copyright 2018 Google Inc. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +def run_natality_tutorial(): + # [START bigquery_query_natality_tutorial] + """Create a Google BigQuery linear regression input table. + + In the code below, the following actions are taken: + * A new dataset is created "natality_regression." + * A query is run against the public dataset, + bigquery-public-data.samples.natality, selecting only the data of + interest to the regression, the output of which is stored in a new + "regression_input" table. + * The output table is moved over the wire to the user's default project via + the built-in BigQuery Connector for Spark that bridges BigQuery and + Cloud Dataproc. + """ + + from google.cloud import bigquery + + # Create a new Google BigQuery client using Google Cloud Platform project + # defaults. + client = bigquery.Client() + + # Prepare a reference to a new dataset for storing the query results. + dataset_ref = client.dataset('natality_regression') + dataset = bigquery.Dataset(dataset_ref) + + # Create the new BigQuery dataset. + dataset = client.create_dataset(dataset) + + # In the new BigQuery dataset, create a reference to a new table for + # storing the query results. + table_ref = dataset.table('regression_input') + + # Configure the query job. + job_config = bigquery.QueryJobConfig() + + # Set the destination table to the table reference created above. + job_config.destination = table_ref + + # Set up a query in Standard SQL, which is the default for the BigQuery + # Python client library. + # The query selects the fields of interest. + query = """ + SELECT + weight_pounds, mother_age, father_age, gestation_weeks, + weight_gain_pounds, apgar_5min + FROM + `bigquery-public-data.samples.natality` + WHERE + weight_pounds IS NOT NULL + AND mother_age IS NOT NULL + AND father_age IS NOT NULL + AND gestation_weeks IS NOT NULL + AND weight_gain_pounds IS NOT NULL + AND apgar_5min IS NOT NULL + """ + + # Run the query. + query_job = client.query(query, job_config=job_config) + query_job.result() # Waits for the query to finish + # [END bigquery_query_natality_tutorial] + + +if __name__ == '__main__': + run_natality_tutorial() diff --git a/packages/google-cloud-bigquery/samples/snippets/natality_tutorial_test.py b/packages/google-cloud-bigquery/samples/snippets/natality_tutorial_test.py new file mode 100644 index 000000000000..5165f7244f17 --- /dev/null +++ b/packages/google-cloud-bigquery/samples/snippets/natality_tutorial_test.py @@ -0,0 +1,42 @@ +# Copyright 2018 Google Inc. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from google.cloud import bigquery +from google.cloud import exceptions + +import natality_tutorial + + +def dataset_exists(dataset, client): + try: + client.get_dataset(dataset) + return True + except exceptions.NotFound: + return False + + +def test_natality_tutorial(): + client = bigquery.Client() + dataset_ref = client.dataset('natality_regression') + assert not dataset_exists(dataset_ref, client) + + natality_tutorial.run_natality_tutorial() + + assert dataset_exists(dataset_ref, client) + + table = client.get_table( + bigquery.Table(dataset_ref.table('regression_input'))) + assert table.num_rows > 0 + + client.delete_dataset(dataset_ref, delete_contents=True) From a56300b55ff752e0531b4ff923861027324af557 Mon Sep 17 00:00:00 2001 From: Alix Hamilton Date: Thu, 21 Jun 2018 12:42:25 -0700 Subject: [PATCH 0863/2016] BigQuery: Adds authorized view tutorial [(#1535)](https://github.com/GoogleCloudPlatform/python-docs-samples/issues/1535) --- .../snippets/authorized_view_tutorial.py | 108 ++++++++++++++++++ .../snippets/authorized_view_tutorial_test.py | 58 ++++++++++ .../samples/snippets/requirements.txt | 2 +- 3 files changed, 167 insertions(+), 1 deletion(-) create mode 100644 packages/google-cloud-bigquery/samples/snippets/authorized_view_tutorial.py create mode 100644 packages/google-cloud-bigquery/samples/snippets/authorized_view_tutorial_test.py diff --git a/packages/google-cloud-bigquery/samples/snippets/authorized_view_tutorial.py b/packages/google-cloud-bigquery/samples/snippets/authorized_view_tutorial.py new file mode 100644 index 000000000000..a6dff0612008 --- /dev/null +++ b/packages/google-cloud-bigquery/samples/snippets/authorized_view_tutorial.py @@ -0,0 +1,108 @@ +#!/usr/bin/env python + +# Copyright 2018 Google Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +def run_authorized_view_tutorial(): + # Note to user: This is a group email for testing purposes. Replace with + # your own group email address when running this code. + analyst_group_email = 'example-analyst-group@google.com' + + # [START bigquery_authorized_view_tutorial] + # Create a source dataset + # [START bigquery_avt_create_source_dataset] + from google.cloud import bigquery + + client = bigquery.Client() + source_dataset_id = 'github_source_data' + + source_dataset = bigquery.Dataset(client.dataset(source_dataset_id)) + # Specify the geographic location where the dataset should reside. + source_dataset.location = 'US' + source_dataset = client.create_dataset(source_dataset) # API request + # [END bigquery_avt_create_source_dataset] + + # Populate a source table + # [START bigquery_avt_create_source_table] + source_table_id = 'github_contributors' + job_config = bigquery.QueryJobConfig() + job_config.destination = source_dataset.table(source_table_id) + sql = """ + SELECT commit, author, committer, repo_name + FROM `bigquery-public-data.github_repos.commits` + LIMIT 1000 + """ + query_job = client.query( + sql, + # Location must match that of the dataset(s) referenced in the query + # and of the destination table. + location='US', + job_config=job_config) # API request - starts the query + + query_job.result() # Waits for the query to finish + # [END bigquery_avt_create_source_table] + + # Create a separate dataset to store your view + # [START bigquery_avt_create_shared_dataset] + shared_dataset_id = 'shared_views' + shared_dataset = bigquery.Dataset(client.dataset(shared_dataset_id)) + shared_dataset.location = 'US' + shared_dataset = client.create_dataset(shared_dataset) # API request + # [END bigquery_avt_create_shared_dataset] + + # Create the view in the new dataset + # [START bigquery_avt_create_view] + shared_view_id = 'github_analyst_view' + view = bigquery.Table(shared_dataset.table(shared_view_id)) + sql_template = """ + SELECT + commit, author.name as author, + committer.name as committer, repo_name + FROM + `{}.{}.{}` + """ + view.view_query = sql_template.format( + client.project, source_dataset_id, source_table_id) + view = client.create_table(view) # API request + # [END bigquery_avt_create_view] + + # Assign access controls to the dataset containing the view + # [START bigquery_avt_shared_dataset_access] + # analyst_group_email = 'data_analysts@example.com' + access_entries = shared_dataset.access_entries + access_entries.append( + bigquery.AccessEntry('READER', 'groupByEmail', analyst_group_email) + ) + shared_dataset.access_entries = access_entries + shared_dataset = client.update_dataset( + shared_dataset, ['access_entries']) # API request + # [END bigquery_avt_shared_dataset_access] + + # Authorize the view to access the source dataset + # [START bigquery_avt_source_dataset_access] + access_entries = source_dataset.access_entries + access_entries.append( + bigquery.AccessEntry(None, 'view', view.reference.to_api_repr()) + ) + source_dataset.access_entries = access_entries + source_dataset = client.update_dataset( + source_dataset, ['access_entries']) # API request + # [START bigquery_avt_source_dataset_access] + # [END bigquery_authorized_view_tutorial] + return (source_dataset, shared_dataset) + + +if __name__ == '__main__': + run_authorized_view_tutorial() diff --git a/packages/google-cloud-bigquery/samples/snippets/authorized_view_tutorial_test.py b/packages/google-cloud-bigquery/samples/snippets/authorized_view_tutorial_test.py new file mode 100644 index 000000000000..fbc34771c7c2 --- /dev/null +++ b/packages/google-cloud-bigquery/samples/snippets/authorized_view_tutorial_test.py @@ -0,0 +1,58 @@ +# Copyright 2018 Google Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from google.cloud import bigquery +import pytest + +import authorized_view_tutorial + + +@pytest.fixture(scope='module') +def client(): + return bigquery.Client() + + +@pytest.fixture +def to_delete(client): + doomed = [] + yield doomed + for item in doomed: + if isinstance(item, (bigquery.Dataset, bigquery.DatasetReference)): + client.delete_dataset(item, delete_contents=True) + elif isinstance(item, (bigquery.Table, bigquery.TableReference)): + client.delete_table(item) + else: + item.delete() + + +def test_authorized_view_tutorial(client, to_delete): + source_dataset, shared_dataset = ( + authorized_view_tutorial.run_authorized_view_tutorial()) + to_delete.extend([source_dataset, shared_dataset]) + + analyst_email = 'example-analyst-group@google.com' + analyst_entries = [entry for entry in shared_dataset.access_entries + if entry.entity_id == analyst_email] + assert len(analyst_entries) == 1 + assert analyst_entries[0].role == 'READER' + + authorized_view_entries = [entry for entry in source_dataset.access_entries + if entry.entity_type == 'view'] + expected_view_ref = { + 'projectId': client.project, + 'datasetId': 'shared_views', + 'tableId': 'github_analyst_view', + } + assert len(authorized_view_entries) == 1 + assert authorized_view_entries[0].entity_id == expected_view_ref diff --git a/packages/google-cloud-bigquery/samples/snippets/requirements.txt b/packages/google-cloud-bigquery/samples/snippets/requirements.txt index 62490b384052..97152c191364 100644 --- a/packages/google-cloud-bigquery/samples/snippets/requirements.txt +++ b/packages/google-cloud-bigquery/samples/snippets/requirements.txt @@ -1,3 +1,3 @@ -google-cloud-bigquery==0.31.0 +google-cloud-bigquery==1.3.0 google-auth-oauthlib==0.2.0 pytz==2018.3 From 07916623f3664352596155f190ba4841cbaa09b3 Mon Sep 17 00:00:00 2001 From: Alix Hamilton Date: Thu, 21 Jun 2018 12:53:52 -0700 Subject: [PATCH 0864/2016] Region tag end typo [(#1536)](https://github.com/GoogleCloudPlatform/python-docs-samples/issues/1536) --- .../samples/snippets/authorized_view_tutorial.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/samples/snippets/authorized_view_tutorial.py b/packages/google-cloud-bigquery/samples/snippets/authorized_view_tutorial.py index a6dff0612008..c1465174b86d 100644 --- a/packages/google-cloud-bigquery/samples/snippets/authorized_view_tutorial.py +++ b/packages/google-cloud-bigquery/samples/snippets/authorized_view_tutorial.py @@ -99,7 +99,7 @@ def run_authorized_view_tutorial(): source_dataset.access_entries = access_entries source_dataset = client.update_dataset( source_dataset, ['access_entries']) # API request - # [START bigquery_avt_source_dataset_access] + # [END bigquery_avt_source_dataset_access] # [END bigquery_authorized_view_tutorial] return (source_dataset, shared_dataset) From 118706f103a2efdad2247e4e0dcb463a9b6b964a Mon Sep 17 00:00:00 2001 From: Alix Hamilton Date: Thu, 12 Jul 2018 10:41:41 -0700 Subject: [PATCH 0865/2016] Code snippets for Jupyter data visualization tutorial [(#1560)](https://github.com/GoogleCloudPlatform/python-docs-samples/issues/1560) --- .../samples/snippets/jupyter_tutorial_test.py | 165 ++++++++++++++++++ .../samples/snippets/requirements.txt | 5 +- 2 files changed, 169 insertions(+), 1 deletion(-) create mode 100644 packages/google-cloud-bigquery/samples/snippets/jupyter_tutorial_test.py diff --git a/packages/google-cloud-bigquery/samples/snippets/jupyter_tutorial_test.py b/packages/google-cloud-bigquery/samples/snippets/jupyter_tutorial_test.py new file mode 100644 index 000000000000..a10f1a61b805 --- /dev/null +++ b/packages/google-cloud-bigquery/samples/snippets/jupyter_tutorial_test.py @@ -0,0 +1,165 @@ +# Copyright 2018 Google Inc. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import IPython +from IPython.terminal import interactiveshell +from IPython.testing import tools +import matplotlib +import pytest + + +# Ignore semicolon lint warning because semicolons are used in notebooks +# flake8: noqa E703 + + +@pytest.fixture(scope='session') +def ipython(): + config = tools.default_config() + config.TerminalInteractiveShell.simple_prompt = True + shell = interactiveshell.TerminalInteractiveShell.instance(config=config) + return shell + + +@pytest.fixture() +def ipython_interactive(request, ipython): + """Activate IPython's builtin hooks + + for the duration of the test scope. + """ + with ipython.builtin_trap: + yield ipython + + +def _strip_region_tags(sample_text): + """Remove blank lines and region tags from sample text""" + magic_lines = [line for line in sample_text.split('\n') + if len(line) > 0 and '# [' not in line] + return '\n'.join(magic_lines) + + +def test_jupyter_tutorial(ipython): + matplotlib.use('agg') + ip = IPython.get_ipython() + ip.extension_manager.load_extension('google.cloud.bigquery') + + sample = """ + # [START bigquery_jupyter_magic_gender_by_year] + %%bigquery + SELECT + source_year AS year, + COUNT(is_male) AS birth_count + FROM `bigquery-public-data.samples.natality` + GROUP BY year + ORDER BY year DESC + LIMIT 15 + # [END bigquery_jupyter_magic_gender_by_year] + """ + result = ip.run_cell(_strip_region_tags(sample)) + result.raise_error() # Throws an exception if the cell failed. + + sample = """ + # [START bigquery_jupyter_magic_gender_by_year_var] + %%bigquery total_births + SELECT + source_year AS year, + COUNT(is_male) AS birth_count + FROM `bigquery-public-data.samples.natality` + GROUP BY year + ORDER BY year DESC + LIMIT 15 + # [END bigquery_jupyter_magic_gender_by_year_var] + """ + result = ip.run_cell(_strip_region_tags(sample)) + result.raise_error() # Throws an exception if the cell failed. + + assert 'total_births' in ip.user_ns # verify that variable exists + total_births = ip.user_ns['total_births'] + # [START bigquery_jupyter_plot_births_by_year] + total_births.plot(kind='bar', x='year', y='birth_count'); + # [END bigquery_jupyter_plot_births_by_year] + + sample = """ + # [START bigquery_jupyter_magic_gender_by_weekday] + %%bigquery births_by_weekday + SELECT + wday, + SUM(CASE WHEN is_male THEN 1 ELSE 0 END) AS male_births, + SUM(CASE WHEN is_male THEN 0 ELSE 1 END) AS female_births + FROM `bigquery-public-data.samples.natality` + WHERE wday IS NOT NULL + GROUP BY wday + ORDER BY wday ASC + # [END bigquery_jupyter_magic_gender_by_weekday] + """ + result = ip.run_cell(_strip_region_tags(sample)) + result.raise_error() # Throws an exception if the cell failed. + + assert 'births_by_weekday' in ip.user_ns # verify that variable exists + births_by_weekday = ip.user_ns['births_by_weekday'] + # [START bigquery_jupyter_plot_births_by_weekday] + births_by_weekday.plot(x='wday'); + # [END bigquery_jupyter_plot_births_by_weekday] + + # [START bigquery_jupyter_import_and_client] + from google.cloud import bigquery + client = bigquery.Client() + # [END bigquery_jupyter_import_and_client] + + # [START bigquery_jupyter_query_plurality_by_year] + sql = """ + SELECT + plurality, + COUNT(1) AS count, + year + FROM + `bigquery-public-data.samples.natality` + WHERE + NOT IS_NAN(plurality) AND plurality > 1 + GROUP BY + plurality, year + ORDER BY + count DESC + """ + df = client.query(sql).to_dataframe() + df.head() + # [END bigquery_jupyter_query_plurality_by_year] + + # [START bigquery_jupyter_plot_plurality_by_year] + pivot_table = df.pivot(index='year', columns='plurality', values='count') + pivot_table.plot(kind='bar', stacked=True, figsize=(15, 7)); + # [END bigquery_jupyter_plot_plurality_by_year] + + # [START bigquery_jupyter_query_weight_by_gestation] + sql = """ + SELECT + gestation_weeks, + AVG(weight_pounds) AS ave_weight + FROM + `bigquery-public-data.samples.natality` + WHERE + NOT IS_NAN(gestation_weeks) AND gestation_weeks <> 99 + GROUP BY + gestation_weeks + ORDER BY + gestation_weeks + """ + df = client.query(sql).to_dataframe() + # [END bigquery_jupyter_query_weight_by_gestation] + + # [START bigquery_jupyter_plot_weight_by_gestation] + ax = df.plot( + kind='bar', x='gestation_weeks', y='ave_weight', figsize=(15, 7)) + ax.set_title('Average Weight by Gestation Weeks') + ax.set_xlabel('Gestation Weeks') + ax.set_ylabel('Average Weight'); + # [END bigquery_jupyter_plot_weight_by_gestation] diff --git a/packages/google-cloud-bigquery/samples/snippets/requirements.txt b/packages/google-cloud-bigquery/samples/snippets/requirements.txt index 97152c191364..7b65d6ac812a 100644 --- a/packages/google-cloud-bigquery/samples/snippets/requirements.txt +++ b/packages/google-cloud-bigquery/samples/snippets/requirements.txt @@ -1,3 +1,6 @@ -google-cloud-bigquery==1.3.0 +google-cloud-bigquery[pandas]==1.3.0 google-auth-oauthlib==0.2.0 +ipython==5.5; python_version < "3" +ipython; python_version > "3" +matplotlib pytz==2018.3 From 9323968ef8f49cb83e136375259e356152e50225 Mon Sep 17 00:00:00 2001 From: Alix Hamilton Date: Tue, 17 Jul 2018 15:00:57 -0700 Subject: [PATCH 0866/2016] updates last query per Steve's feedback [(#1578)](https://github.com/GoogleCloudPlatform/python-docs-samples/issues/1578) --- .../samples/snippets/jupyter_tutorial_test.py | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/packages/google-cloud-bigquery/samples/snippets/jupyter_tutorial_test.py b/packages/google-cloud-bigquery/samples/snippets/jupyter_tutorial_test.py index a10f1a61b805..0affbabcb032 100644 --- a/packages/google-cloud-bigquery/samples/snippets/jupyter_tutorial_test.py +++ b/packages/google-cloud-bigquery/samples/snippets/jupyter_tutorial_test.py @@ -139,11 +139,11 @@ def test_jupyter_tutorial(ipython): pivot_table.plot(kind='bar', stacked=True, figsize=(15, 7)); # [END bigquery_jupyter_plot_plurality_by_year] - # [START bigquery_jupyter_query_weight_by_gestation] + # [START bigquery_jupyter_query_births_by_gestation] sql = """ SELECT gestation_weeks, - AVG(weight_pounds) AS ave_weight + COUNT(1) AS count FROM `bigquery-public-data.samples.natality` WHERE @@ -154,12 +154,11 @@ def test_jupyter_tutorial(ipython): gestation_weeks """ df = client.query(sql).to_dataframe() - # [END bigquery_jupyter_query_weight_by_gestation] + # [END bigquery_jupyter_query_births_by_gestation] - # [START bigquery_jupyter_plot_weight_by_gestation] - ax = df.plot( - kind='bar', x='gestation_weeks', y='ave_weight', figsize=(15, 7)) - ax.set_title('Average Weight by Gestation Weeks') + # [START bigquery_jupyter_plot_births_by_gestation] + ax = df.plot(kind='bar', x='gestation_weeks', y='count', figsize=(15,7)) + ax.set_title('Count of Births by Gestation Weeks') ax.set_xlabel('Gestation Weeks') - ax.set_ylabel('Average Weight'); - # [END bigquery_jupyter_plot_weight_by_gestation] + ax.set_ylabel('Count'); + # [END bigquery_jupyter_plot_births_by_gestation] From 63d25f8bd49ca34736fe4add1ff5a9d3fbdce3c8 Mon Sep 17 00:00:00 2001 From: Alix Hamilton Date: Tue, 17 Jul 2018 16:22:27 -0700 Subject: [PATCH 0867/2016] updates tutorial to add datasets to to_delete before running [(#1579)](https://github.com/GoogleCloudPlatform/python-docs-samples/issues/1579) --- .../samples/snippets/authorized_view_tutorial.py | 1 - .../samples/snippets/authorized_view_tutorial_test.py | 10 +++++++--- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/packages/google-cloud-bigquery/samples/snippets/authorized_view_tutorial.py b/packages/google-cloud-bigquery/samples/snippets/authorized_view_tutorial.py index c1465174b86d..abe4e0cb131a 100644 --- a/packages/google-cloud-bigquery/samples/snippets/authorized_view_tutorial.py +++ b/packages/google-cloud-bigquery/samples/snippets/authorized_view_tutorial.py @@ -101,7 +101,6 @@ def run_authorized_view_tutorial(): source_dataset, ['access_entries']) # API request # [END bigquery_avt_source_dataset_access] # [END bigquery_authorized_view_tutorial] - return (source_dataset, shared_dataset) if __name__ == '__main__': diff --git a/packages/google-cloud-bigquery/samples/snippets/authorized_view_tutorial_test.py b/packages/google-cloud-bigquery/samples/snippets/authorized_view_tutorial_test.py index fbc34771c7c2..954c47072c35 100644 --- a/packages/google-cloud-bigquery/samples/snippets/authorized_view_tutorial_test.py +++ b/packages/google-cloud-bigquery/samples/snippets/authorized_view_tutorial_test.py @@ -37,10 +37,14 @@ def to_delete(client): def test_authorized_view_tutorial(client, to_delete): - source_dataset, shared_dataset = ( - authorized_view_tutorial.run_authorized_view_tutorial()) - to_delete.extend([source_dataset, shared_dataset]) + source_dataset_ref = client.dataset('github_source_data') + shared_dataset_ref = client.dataset('shared_views') + to_delete.extend([source_dataset_ref, shared_dataset_ref]) + authorized_view_tutorial.run_authorized_view_tutorial() + + source_dataset = client.get_dataset(source_dataset_ref) + shared_dataset = client.get_dataset(shared_dataset_ref) analyst_email = 'example-analyst-group@google.com' analyst_entries = [entry for entry in shared_dataset.access_entries if entry.entity_id == analyst_email] From f61e86f9dc61f90c318c2eb2f980872c0c8cf7f4 Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Fri, 31 Aug 2018 11:01:23 -0700 Subject: [PATCH 0868/2016] Remove ipython Python 2 modifier from requirements.txt [(#1675)](https://github.com/GoogleCloudPlatform/python-docs-samples/issues/1675) If ipython has tagged their packages correctly, then the modifier is not necessary. Sending a PR to check. Bug 113341391. --- .../google-cloud-bigquery/samples/snippets/requirements.txt | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/packages/google-cloud-bigquery/samples/snippets/requirements.txt b/packages/google-cloud-bigquery/samples/snippets/requirements.txt index 7b65d6ac812a..e90d0443c40d 100644 --- a/packages/google-cloud-bigquery/samples/snippets/requirements.txt +++ b/packages/google-cloud-bigquery/samples/snippets/requirements.txt @@ -1,6 +1,5 @@ google-cloud-bigquery[pandas]==1.3.0 google-auth-oauthlib==0.2.0 -ipython==5.5; python_version < "3" -ipython; python_version > "3" +ipython matplotlib pytz==2018.3 From 98b3d9fdc4132062148e2cba8797702df629aab3 Mon Sep 17 00:00:00 2001 From: DPE bot Date: Tue, 20 Nov 2018 15:40:29 -0800 Subject: [PATCH 0869/2016] Auto-update dependencies. [(#1846)](https://github.com/GoogleCloudPlatform/python-docs-samples/issues/1846) ACK, merging. --- .../samples/snippets/requirements.txt | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/packages/google-cloud-bigquery/samples/snippets/requirements.txt b/packages/google-cloud-bigquery/samples/snippets/requirements.txt index e90d0443c40d..4f8e5641f8ed 100644 --- a/packages/google-cloud-bigquery/samples/snippets/requirements.txt +++ b/packages/google-cloud-bigquery/samples/snippets/requirements.txt @@ -1,5 +1,5 @@ -google-cloud-bigquery[pandas]==1.3.0 +google-cloud-bigquery[pandas]==1.7.0 google-auth-oauthlib==0.2.0 -ipython -matplotlib -pytz==2018.3 +ipython==7.1.1 +matplotlib==3.0.2 +pytz==2018.7 From 692f63c45dd7545e69887b7aa8c1fb0893d4a221 Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Wed, 2 Jan 2019 11:58:00 -0800 Subject: [PATCH 0870/2016] Refactor BQ user credentials sample to use region tags [(#1952)](https://github.com/GoogleCloudPlatform/python-docs-samples/issues/1952) Also add comments for the needed values and use a concrete value where possible according to the sample rubric. --- .../samples/snippets/user_credentials.py | 50 +++++++++++++------ .../samples/snippets/user_credentials_test.py | 7 +-- 2 files changed, 38 insertions(+), 19 deletions(-) diff --git a/packages/google-cloud-bigquery/samples/snippets/user_credentials.py b/packages/google-cloud-bigquery/samples/snippets/user_credentials.py index 9ab062fb3fe7..300d9112d706 100644 --- a/packages/google-cloud-bigquery/samples/snippets/user_credentials.py +++ b/packages/google-cloud-bigquery/samples/snippets/user_credentials.py @@ -23,20 +23,15 @@ import argparse -def run_query(credentials, project, query): - from google.cloud import bigquery - - client = bigquery.Client(project=project, credentials=credentials) - query_job = client.query(query) - - # Print the results. - for row in query_job.result(): # Wait for the job to complete. - print(row) - - -def authenticate_and_query(project, query, launch_browser=True): +def main(project, launch_browser=True): + # [START bigquery_auth_user_flow] from google_auth_oauthlib import flow + # TODO: Use a local server in the auth flow? This is recommended but does + # not work if accessing the application remotely, such as over SSH or + # from a remote Jupyter notebook. + # launch_browser = True + appflow = flow.InstalledAppFlow.from_client_secrets_file( 'client_secrets.json', scopes=['https://www.googleapis.com/auth/bigquery']) @@ -46,7 +41,31 @@ def authenticate_and_query(project, query, launch_browser=True): else: appflow.run_console() - run_query(appflow.credentials, project, query) + credentials = appflow.credentials + # [END bigquery_auth_user_flow] + + # [START bigquery_auth_user_query] + from google.cloud import bigquery + + # TODO: This project will be billed for the query processing. The user must + # have the bigquery.jobs.create permission on this project to run a + # query. See: + # https://cloud.google.com/bigquery/docs/access-control#permissions + # project = 'user-project-id' + + client = bigquery.Client(project=project, credentials=credentials) + + query_string = """SELECT name, SUM(number) as total + FROM `bigquery-public-data.usa_names.usa_1910_current` + WHERE name = 'William' + GROUP BY name; + """ + query_job = client.query(query_string) + + # Print the results. + for row in query_job.result(): # Wait for the job to complete. + print("{}: {}".format(row['name'], row['total'])) + # [END bigquery_auth_user_query] if __name__ == '__main__': @@ -58,9 +77,8 @@ def authenticate_and_query(project, query, launch_browser=True): help='Use a local server flow to authenticate. ', action='store_true') parser.add_argument('project', help='Project to use for BigQuery billing.') - parser.add_argument('query', help='BigQuery SQL Query.') args = parser.parse_args() - authenticate_and_query( - args.project, args.query, launch_browser=args.launch_browser) + main( + args.project, launch_browser=args.launch_browser) diff --git a/packages/google-cloud-bigquery/samples/snippets/user_credentials_test.py b/packages/google-cloud-bigquery/samples/snippets/user_credentials_test.py index 02acc19c38b1..009b9be7f955 100644 --- a/packages/google-cloud-bigquery/samples/snippets/user_credentials_test.py +++ b/packages/google-cloud-bigquery/samples/snippets/user_credentials_test.py @@ -18,7 +18,7 @@ import mock import pytest -from user_credentials import authenticate_and_query +from user_credentials import main PROJECT = os.environ['GCLOUD_PROJECT'] @@ -36,6 +36,7 @@ def mock_flow(): def test_auth_query_console(mock_flow, capsys): - authenticate_and_query(PROJECT, 'SELECT 1+1;', launch_browser=False) + main(PROJECT, launch_browser=False) out, _ = capsys.readouterr() - assert '2' in out + # Fun fact: William P. Wood was the 1st director of the US Secret Service. + assert 'William' in out From cf155e26c40767ddb48c3f88d94bc33652652f53 Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Tue, 8 Jan 2019 13:42:50 -0800 Subject: [PATCH 0871/2016] Make TODO clearer what action to take. [(#1963)](https://github.com/GoogleCloudPlatform/python-docs-samples/issues/1963) * Make TODO clearer what action to take. Describe the variables to set explicitly (rather than "this"), as suggested in internal CL 227863277. * Use standard 'uncomment the variable below' for TODO * Move variable below TODO. --- .../samples/snippets/user_credentials.py | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) diff --git a/packages/google-cloud-bigquery/samples/snippets/user_credentials.py b/packages/google-cloud-bigquery/samples/snippets/user_credentials.py index 300d9112d706..14ca76e29dd0 100644 --- a/packages/google-cloud-bigquery/samples/snippets/user_credentials.py +++ b/packages/google-cloud-bigquery/samples/snippets/user_credentials.py @@ -27,10 +27,13 @@ def main(project, launch_browser=True): # [START bigquery_auth_user_flow] from google_auth_oauthlib import flow - # TODO: Use a local server in the auth flow? This is recommended but does - # not work if accessing the application remotely, such as over SSH or - # from a remote Jupyter notebook. + # TODO: Uncomment the line below to set the `launch_browser` variable. # launch_browser = True + # + # The `launch_browser` boolean variable indicates if a local server in the + # auth flow. A value of `True` is recommended, but a local server does not + # work if accessing the application remotely, such as over SSH or from a + # remote Jupyter notebook. appflow = flow.InstalledAppFlow.from_client_secrets_file( 'client_secrets.json', @@ -47,11 +50,13 @@ def main(project, launch_browser=True): # [START bigquery_auth_user_query] from google.cloud import bigquery - # TODO: This project will be billed for the query processing. The user must - # have the bigquery.jobs.create permission on this project to run a - # query. See: - # https://cloud.google.com/bigquery/docs/access-control#permissions + # TODO: Uncomment the line below to set the `project` variable. # project = 'user-project-id' + # + # The `project` variable defines the project to be billed for query + # processing. The user must have the bigquery.jobs.create permission on + # this project to run a query. See: + # https://cloud.google.com/bigquery/docs/access-control#permissions client = bigquery.Client(project=project, credentials=credentials) From 2690769a3569c8e258d4d2f88790a9a8bfd5a164 Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Wed, 9 Jan 2019 10:18:27 -0800 Subject: [PATCH 0872/2016] Add missing explanation about local_server callback URL Per feedback on CL 227863277 --- .../samples/snippets/user_credentials.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/packages/google-cloud-bigquery/samples/snippets/user_credentials.py b/packages/google-cloud-bigquery/samples/snippets/user_credentials.py index 14ca76e29dd0..4917fdd3a414 100644 --- a/packages/google-cloud-bigquery/samples/snippets/user_credentials.py +++ b/packages/google-cloud-bigquery/samples/snippets/user_credentials.py @@ -30,10 +30,10 @@ def main(project, launch_browser=True): # TODO: Uncomment the line below to set the `launch_browser` variable. # launch_browser = True # - # The `launch_browser` boolean variable indicates if a local server in the - # auth flow. A value of `True` is recommended, but a local server does not - # work if accessing the application remotely, such as over SSH or from a - # remote Jupyter notebook. + # The `launch_browser` boolean variable indicates if a local server is used + # as the callback URL in the auth flow. A value of `True` is recommended, + # but a local server does not work if accessing the application remotely, + # such as over SSH or from a remote Jupyter notebook. appflow = flow.InstalledAppFlow.from_client_secrets_file( 'client_secrets.json', From fe0e459112eb8d8416e26c5380dfa815f4e45139 Mon Sep 17 00:00:00 2001 From: DPEBot Date: Wed, 6 Feb 2019 12:06:35 -0800 Subject: [PATCH 0873/2016] Auto-update dependencies. [(#1980)](https://github.com/GoogleCloudPlatform/python-docs-samples/issues/1980) * Auto-update dependencies. * Update requirements.txt * Update requirements.txt --- .../google-cloud-bigquery/samples/snippets/requirements.txt | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/packages/google-cloud-bigquery/samples/snippets/requirements.txt b/packages/google-cloud-bigquery/samples/snippets/requirements.txt index 4f8e5641f8ed..bfdc27e7d821 100644 --- a/packages/google-cloud-bigquery/samples/snippets/requirements.txt +++ b/packages/google-cloud-bigquery/samples/snippets/requirements.txt @@ -1,5 +1,5 @@ -google-cloud-bigquery[pandas]==1.7.0 +google-cloud-bigquery[pandas]==1.9.0 google-auth-oauthlib==0.2.0 -ipython==7.1.1 +ipython==7.2.0 matplotlib==3.0.2 -pytz==2018.7 +pytz==2018.9 From 10022cee2dae6fa0ddcace2d289fdf2828368f7e Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Mon, 5 Aug 2019 15:17:40 -0700 Subject: [PATCH 0874/2016] =?UTF-8?q?BigQuery:=20Add=20sample=20for=20expl?= =?UTF-8?q?icitly=20creating=20client=20from=20service=20acco=E2=80=A6=20[?= =?UTF-8?q?(#2312)](https://github.com/GoogleCloudPlatform/python-docs-sam?= =?UTF-8?q?ples/issues/2312)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * BigQuery: Add sample for explicitly creating client from service account credentials. * Flake8 * Move imports to improve indentation. * Combine auth and client functions. * flake8 --- .../snippets/authenticate_service_account.py | 43 +++++++++++++++++++ .../authenticate_service_account_test.py | 33 ++++++++++++++ 2 files changed, 76 insertions(+) create mode 100644 packages/google-cloud-bigquery/samples/snippets/authenticate_service_account.py create mode 100644 packages/google-cloud-bigquery/samples/snippets/authenticate_service_account_test.py diff --git a/packages/google-cloud-bigquery/samples/snippets/authenticate_service_account.py b/packages/google-cloud-bigquery/samples/snippets/authenticate_service_account.py new file mode 100644 index 000000000000..03cb06fd16b5 --- /dev/null +++ b/packages/google-cloud-bigquery/samples/snippets/authenticate_service_account.py @@ -0,0 +1,43 @@ +# Copyright 2019 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os + + +def main(): + key_path = os.environ.get("GOOGLE_APPLICATION_CREDENTIALS") + + # [START bigquery_client_json_credentials] + from google.cloud import bigquery + from google.oauth2 import service_account + + # TODO(developer): Set key_path to the path to the service account key + # file. + # key_path = "path/to/service_account.json" + + credentials = service_account.Credentials.from_service_account_file( + key_path, + scopes=["https://www.googleapis.com/auth/cloud-platform"], + ) + + client = bigquery.Client( + credentials=credentials, + project=credentials.project_id, + ) + # [END bigquery_client_json_credentials] + return client + + +if __name__ == "__main__": + main() diff --git a/packages/google-cloud-bigquery/samples/snippets/authenticate_service_account_test.py b/packages/google-cloud-bigquery/samples/snippets/authenticate_service_account_test.py new file mode 100644 index 000000000000..8c436ec206fa --- /dev/null +++ b/packages/google-cloud-bigquery/samples/snippets/authenticate_service_account_test.py @@ -0,0 +1,33 @@ +# Copyright 2019 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import google.auth + +import authenticate_service_account + + +def mock_credentials(*args, **kwargs): + credentials, _ = google.auth.default( + ["https://www.googleapis.com/auth/cloud-platform"] + ) + return credentials + + +def test_main(monkeypatch): + monkeypatch.setattr( + 'google.oauth2.service_account.Credentials.from_service_account_file', + mock_credentials, + ) + client = authenticate_service_account.main() + assert client is not None From b8e43c4c079cea80bc54eaf07012e92c5bb59f05 Mon Sep 17 00:00:00 2001 From: Gus Class Date: Wed, 23 Oct 2019 16:27:00 -0700 Subject: [PATCH 0875/2016] Adds updates including compute [(#2436)](https://github.com/GoogleCloudPlatform/python-docs-samples/issues/2436) * Adds updates including compute * Python 2 compat pytest * Fixing weird \r\n issue from GH merge * Put asset tests back in * Re-add pod operator test * Hack parameter for k8s pod operator --- .../samples/snippets/requirements.txt | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/packages/google-cloud-bigquery/samples/snippets/requirements.txt b/packages/google-cloud-bigquery/samples/snippets/requirements.txt index bfdc27e7d821..70d644472562 100644 --- a/packages/google-cloud-bigquery/samples/snippets/requirements.txt +++ b/packages/google-cloud-bigquery/samples/snippets/requirements.txt @@ -1,5 +1,5 @@ -google-cloud-bigquery[pandas]==1.9.0 -google-auth-oauthlib==0.2.0 -ipython==7.2.0 -matplotlib==3.0.2 -pytz==2018.9 +google-cloud-bigquery[pandas]==1.20.0 +google-auth-oauthlib==0.4.0 +ipython==7.8.0 +matplotlib==3.1.1 +pytz==2019.2 From d35ba4d31b7dc23c27c4bfd212c9f36a58ed64ef Mon Sep 17 00:00:00 2001 From: Kurtis Van Gent <31518063+kurtisvg@users.noreply.github.com> Date: Wed, 1 Apr 2020 19:11:50 -0700 Subject: [PATCH 0876/2016] Simplify noxfile setup. [(#2806)](https://github.com/GoogleCloudPlatform/python-docs-samples/issues/2806) * chore(deps): update dependency requests to v2.23.0 * Simplify noxfile and add version control. * Configure appengine/standard to only test Python 2.7. * Update Kokokro configs to match noxfile. * Add requirements-test to each folder. * Remove Py2 versions from everything execept appengine/standard. * Remove conftest.py. * Remove appengine/standard/conftest.py * Remove 'no-sucess-flaky-report' from pytest.ini. * Add GAE SDK back to appengine/standard tests. * Fix typo. * Roll pytest to python 2 version. * Add a bunch of testing requirements. * Remove typo. * Add appengine lib directory back in. * Add some additional requirements. * Fix issue with flake8 args. * Even more requirements. * Readd appengine conftest.py. * Add a few more requirements. * Even more Appengine requirements. * Add webtest for appengine/standard/mailgun. * Add some additional requirements. * Add workaround for issue with mailjet-rest. * Add responses for appengine/standard/mailjet. Co-authored-by: Renovate Bot --- .../samples/snippets/requirements-test.txt | 2 ++ 1 file changed, 2 insertions(+) create mode 100644 packages/google-cloud-bigquery/samples/snippets/requirements-test.txt diff --git a/packages/google-cloud-bigquery/samples/snippets/requirements-test.txt b/packages/google-cloud-bigquery/samples/snippets/requirements-test.txt new file mode 100644 index 000000000000..41c4d5110536 --- /dev/null +++ b/packages/google-cloud-bigquery/samples/snippets/requirements-test.txt @@ -0,0 +1,2 @@ +pytest==5.3.2 +mock==3.0.5 From aad9a273755a4b75334ce2ca2685a6071390d65c Mon Sep 17 00:00:00 2001 From: shollyman Date: Fri, 17 Apr 2020 16:53:37 -0700 Subject: [PATCH 0877/2016] fix: address issues with concurrent BigQuery tests [(#3426)](https://github.com/GoogleCloudPlatform/python-docs-samples/issues/3426) Co-authored-by: Leah E. Cole <6719667+leahecole@users.noreply.github.com> --- .../snippets/authorized_view_tutorial.py | 19 ++++++++- .../snippets/authorized_view_tutorial_test.py | 28 +++++++------ .../samples/snippets/natality_tutorial.py | 12 ++++-- .../snippets/natality_tutorial_test.py | 38 ++++++++++-------- .../samples/snippets/quickstart.py | 8 +++- .../samples/snippets/quickstart_test.py | 40 +++++++++---------- 6 files changed, 88 insertions(+), 57 deletions(-) diff --git a/packages/google-cloud-bigquery/samples/snippets/authorized_view_tutorial.py b/packages/google-cloud-bigquery/samples/snippets/authorized_view_tutorial.py index abe4e0cb131a..edf83cf7facb 100644 --- a/packages/google-cloud-bigquery/samples/snippets/authorized_view_tutorial.py +++ b/packages/google-cloud-bigquery/samples/snippets/authorized_view_tutorial.py @@ -15,7 +15,7 @@ # limitations under the License. -def run_authorized_view_tutorial(): +def run_authorized_view_tutorial(override_values={}): # Note to user: This is a group email for testing purposes. Replace with # your own group email address when running this code. analyst_group_email = 'example-analyst-group@google.com' @@ -28,6 +28,14 @@ def run_authorized_view_tutorial(): client = bigquery.Client() source_dataset_id = 'github_source_data' + # [END bigquery_authorized_view_tutorial] + # [END bigquery_avt_create_source_dataset] + # To facilitate testing, we replace values with alternatives + # provided by the testing harness. + source_dataset_id = override_values.get("source_dataset_id", source_dataset_id) + # [START bigquery_authorized_view_tutorial] + # [START bigquery_avt_create_source_dataset] + source_dataset = bigquery.Dataset(client.dataset(source_dataset_id)) # Specify the geographic location where the dataset should reside. source_dataset.location = 'US' @@ -57,6 +65,15 @@ def run_authorized_view_tutorial(): # Create a separate dataset to store your view # [START bigquery_avt_create_shared_dataset] shared_dataset_id = 'shared_views' + + # [END bigquery_authorized_view_tutorial] + # [END bigquery_avt_create_shared_dataset] + # To facilitate testing, we replace values with alternatives + # provided by the testing harness. + shared_dataset_id = override_values.get("shared_dataset_id", shared_dataset_id) + # [START bigquery_authorized_view_tutorial] + # [START bigquery_avt_create_shared_dataset] + shared_dataset = bigquery.Dataset(client.dataset(shared_dataset_id)) shared_dataset.location = 'US' shared_dataset = client.create_dataset(shared_dataset) # API request diff --git a/packages/google-cloud-bigquery/samples/snippets/authorized_view_tutorial_test.py b/packages/google-cloud-bigquery/samples/snippets/authorized_view_tutorial_test.py index 954c47072c35..03079085f065 100644 --- a/packages/google-cloud-bigquery/samples/snippets/authorized_view_tutorial_test.py +++ b/packages/google-cloud-bigquery/samples/snippets/authorized_view_tutorial_test.py @@ -12,6 +12,8 @@ # See the License for the specific language governing permissions and # limitations under the License. +import uuid + from google.cloud import bigquery import pytest @@ -24,24 +26,24 @@ def client(): @pytest.fixture -def to_delete(client): +def datasets_to_delete(client): doomed = [] yield doomed for item in doomed: - if isinstance(item, (bigquery.Dataset, bigquery.DatasetReference)): - client.delete_dataset(item, delete_contents=True) - elif isinstance(item, (bigquery.Table, bigquery.TableReference)): - client.delete_table(item) - else: - item.delete() + client.delete_dataset(item, delete_contents=True) -def test_authorized_view_tutorial(client, to_delete): - source_dataset_ref = client.dataset('github_source_data') - shared_dataset_ref = client.dataset('shared_views') - to_delete.extend([source_dataset_ref, shared_dataset_ref]) +def test_authorized_view_tutorial(client, datasets_to_delete): + override_values = { + "source_dataset_id": "github_source_data_{}".format(str(uuid.uuid4()).replace("-", "_")), + "shared_dataset_id": "shared_views_{}".format(str(uuid.uuid4()).replace("-", "_")), + } + source_dataset_ref = client.dataset(override_values["source_dataset_id"]) + shared_dataset_ref = client.dataset(override_values["shared_dataset_id"]) + datasets_to_delete.extend([override_values["source_dataset_id"], + override_values["shared_dataset_id"]]) - authorized_view_tutorial.run_authorized_view_tutorial() + authorized_view_tutorial.run_authorized_view_tutorial(override_values) source_dataset = client.get_dataset(source_dataset_ref) shared_dataset = client.get_dataset(shared_dataset_ref) @@ -55,7 +57,7 @@ def test_authorized_view_tutorial(client, to_delete): if entry.entity_type == 'view'] expected_view_ref = { 'projectId': client.project, - 'datasetId': 'shared_views', + 'datasetId': override_values["shared_dataset_id"], 'tableId': 'github_analyst_view', } assert len(authorized_view_entries) == 1 diff --git a/packages/google-cloud-bigquery/samples/snippets/natality_tutorial.py b/packages/google-cloud-bigquery/samples/snippets/natality_tutorial.py index 5bfa8f1d27a9..6a097add3b09 100644 --- a/packages/google-cloud-bigquery/samples/snippets/natality_tutorial.py +++ b/packages/google-cloud-bigquery/samples/snippets/natality_tutorial.py @@ -15,7 +15,7 @@ # limitations under the License. -def run_natality_tutorial(): +def run_natality_tutorial(override_values={}): # [START bigquery_query_natality_tutorial] """Create a Google BigQuery linear regression input table. @@ -37,8 +37,14 @@ def run_natality_tutorial(): client = bigquery.Client() # Prepare a reference to a new dataset for storing the query results. - dataset_ref = client.dataset('natality_regression') - dataset = bigquery.Dataset(dataset_ref) + dataset_id = 'natality_regression' + # [END bigquery_query_natality_tutorial] + # To facilitate testing, we replace values with alternatives + # provided by the testing harness. + dataset_id = override_values.get("dataset_id", dataset_id) + # [START bigquery_query_natality_tutorial] + + dataset = bigquery.Dataset(client.dataset(dataset_id)) # Create the new BigQuery dataset. dataset = client.create_dataset(dataset) diff --git a/packages/google-cloud-bigquery/samples/snippets/natality_tutorial_test.py b/packages/google-cloud-bigquery/samples/snippets/natality_tutorial_test.py index 5165f7244f17..785df59df81a 100644 --- a/packages/google-cloud-bigquery/samples/snippets/natality_tutorial_test.py +++ b/packages/google-cloud-bigquery/samples/snippets/natality_tutorial_test.py @@ -12,31 +12,35 @@ # See the License for the specific language governing permissions and # limitations under the License. +import uuid + from google.cloud import bigquery -from google.cloud import exceptions +import pytest import natality_tutorial -def dataset_exists(dataset, client): - try: - client.get_dataset(dataset) - return True - except exceptions.NotFound: - return False +@pytest.fixture(scope='module') +def client(): + return bigquery.Client() -def test_natality_tutorial(): - client = bigquery.Client() - dataset_ref = client.dataset('natality_regression') - assert not dataset_exists(dataset_ref, client) +@pytest.fixture +def datasets_to_delete(client): + doomed = [] + yield doomed + for item in doomed: + client.delete_dataset(item, delete_contents=True) - natality_tutorial.run_natality_tutorial() - assert dataset_exists(dataset_ref, client) +def test_natality_tutorial(client, datasets_to_delete): + override_values = { + "dataset_id": "natality_regression_{}".format(str(uuid.uuid4()).replace("-", "_")), + } + datasets_to_delete.append(override_values["dataset_id"]) - table = client.get_table( - bigquery.Table(dataset_ref.table('regression_input'))) - assert table.num_rows > 0 + natality_tutorial.run_natality_tutorial(override_values) - client.delete_dataset(dataset_ref, delete_contents=True) + table_ref = bigquery.Dataset(client.dataset(override_values["dataset_id"])).table("regression_input") + table = client.get_table(table_ref) + assert table.num_rows > 0 diff --git a/packages/google-cloud-bigquery/samples/snippets/quickstart.py b/packages/google-cloud-bigquery/samples/snippets/quickstart.py index 10ae58e84caa..cb6dcc303df2 100644 --- a/packages/google-cloud-bigquery/samples/snippets/quickstart.py +++ b/packages/google-cloud-bigquery/samples/snippets/quickstart.py @@ -15,7 +15,7 @@ # limitations under the License. -def run_quickstart(): +def run_quickstart(override_values={}): # [START bigquery_quickstart] # Imports the Google Cloud client library from google.cloud import bigquery @@ -26,6 +26,12 @@ def run_quickstart(): # The name for the new dataset dataset_id = 'my_new_dataset' + # [END bigquery_quickstart] + # To facilitate testing, we replace values with alternatives + # provided by the testing harness. + dataset_id = override_values.get("dataset_id", dataset_id) + # [START bigquery_quickstart] + # Prepares a reference to the new dataset dataset_ref = bigquery_client.dataset(dataset_id) dataset = bigquery.Dataset(dataset_ref) diff --git a/packages/google-cloud-bigquery/samples/snippets/quickstart_test.py b/packages/google-cloud-bigquery/samples/snippets/quickstart_test.py index 02931086a11e..2b461a8f27b6 100644 --- a/packages/google-cloud-bigquery/samples/snippets/quickstart_test.py +++ b/packages/google-cloud-bigquery/samples/snippets/quickstart_test.py @@ -12,8 +12,9 @@ # See the License for the specific language governing permissions and # limitations under the License. +import uuid + from google.cloud import bigquery -from google.cloud.exceptions import NotFound import pytest import quickstart @@ -24,31 +25,26 @@ DATASET_ID = 'my_new_dataset' -@pytest.fixture -def temporary_dataset(): - """Fixture that ensures the test dataset does not exist before or - after a test.""" - bigquery_client = bigquery.Client() - dataset_ref = bigquery_client.dataset(DATASET_ID) - - if dataset_exists(dataset_ref, bigquery_client): - bigquery_client.delete_dataset(dataset_ref) +@pytest.fixture(scope='module') +def client(): + return bigquery.Client() - yield - if dataset_exists(dataset_ref, bigquery_client): - bigquery_client.delete_dataset(dataset_ref) +@pytest.fixture +def datasets_to_delete(client): + doomed = [] + yield doomed + for item in doomed: + client.delete_dataset(item, delete_contents=True) -def dataset_exists(dataset, client): - try: - client.get_dataset(dataset) - return True - except NotFound: - return False +def test_quickstart(capsys, client, datasets_to_delete): + override_values = { + "dataset_id": "my_new_dataset_{}".format(str(uuid.uuid4()).replace("-", "_")), + } + datasets_to_delete.append(override_values["dataset_id"]) -def test_quickstart(capsys, temporary_dataset): - quickstart.run_quickstart() + quickstart.run_quickstart(override_values) out, _ = capsys.readouterr() - assert DATASET_ID in out + assert override_values["dataset_id"] in out From 7a0c92934a3b3cd5098a71ddef23f6901b05a1f1 Mon Sep 17 00:00:00 2001 From: WhiteSource Renovate Date: Sat, 18 Apr 2020 07:00:37 +0200 Subject: [PATCH 0878/2016] chore(deps): update dependency pytz to v2019.3 [(#3190)](https://github.com/GoogleCloudPlatform/python-docs-samples/issues/3190) Co-authored-by: Leah E. Cole <6719667+leahecole@users.noreply.github.com> --- .../google-cloud-bigquery/samples/snippets/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/samples/snippets/requirements.txt b/packages/google-cloud-bigquery/samples/snippets/requirements.txt index 70d644472562..9b40cd64aead 100644 --- a/packages/google-cloud-bigquery/samples/snippets/requirements.txt +++ b/packages/google-cloud-bigquery/samples/snippets/requirements.txt @@ -2,4 +2,4 @@ google-cloud-bigquery[pandas]==1.20.0 google-auth-oauthlib==0.4.0 ipython==7.8.0 matplotlib==3.1.1 -pytz==2019.2 +pytz==2019.3 From 0da589bf34b4e009a97fd4a385b076eeb83e4e1a Mon Sep 17 00:00:00 2001 From: WhiteSource Renovate Date: Sat, 18 Apr 2020 07:09:05 +0200 Subject: [PATCH 0879/2016] chore(deps): update dependency matplotlib to v3.2.1 [(#3180)](https://github.com/GoogleCloudPlatform/python-docs-samples/issues/3180) Co-authored-by: Leah E. Cole <6719667+leahecole@users.noreply.github.com> --- .../google-cloud-bigquery/samples/snippets/requirements.txt | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/samples/snippets/requirements.txt b/packages/google-cloud-bigquery/samples/snippets/requirements.txt index 9b40cd64aead..4cbdfb2de742 100644 --- a/packages/google-cloud-bigquery/samples/snippets/requirements.txt +++ b/packages/google-cloud-bigquery/samples/snippets/requirements.txt @@ -1,5 +1,6 @@ google-cloud-bigquery[pandas]==1.20.0 google-auth-oauthlib==0.4.0 ipython==7.8.0 -matplotlib==3.1.1 +matplotlib==3.2.1 pytz==2019.3 + From 9ba656252cc9f9a3fcacab17e175fe6212aeed56 Mon Sep 17 00:00:00 2001 From: WhiteSource Renovate Date: Sat, 18 Apr 2020 07:14:29 +0200 Subject: [PATCH 0880/2016] chore(deps): update dependency ipython to v7.13.0 [(#3177)](https://github.com/GoogleCloudPlatform/python-docs-samples/issues/3177) Co-authored-by: Leah E. Cole <6719667+leahecole@users.noreply.github.com> --- .../google-cloud-bigquery/samples/snippets/requirements.txt | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/packages/google-cloud-bigquery/samples/snippets/requirements.txt b/packages/google-cloud-bigquery/samples/snippets/requirements.txt index 4cbdfb2de742..ba7b7bda73a6 100644 --- a/packages/google-cloud-bigquery/samples/snippets/requirements.txt +++ b/packages/google-cloud-bigquery/samples/snippets/requirements.txt @@ -1,6 +1,5 @@ google-cloud-bigquery[pandas]==1.20.0 google-auth-oauthlib==0.4.0 -ipython==7.8.0 +ipython==7.13.0 matplotlib==3.2.1 pytz==2019.3 - From e4209266c1f6f702c4534f8996b35da1ad14b28d Mon Sep 17 00:00:00 2001 From: WhiteSource Renovate Date: Sat, 18 Apr 2020 07:41:12 +0200 Subject: [PATCH 0881/2016] chore(deps): update dependency google-cloud-bigquery to v1.24.0 [(#3049)](https://github.com/GoogleCloudPlatform/python-docs-samples/issues/3049) * chore(deps): update dependency google-cloud-bigquery to v1.24.0 * chore(deps): update ipython version * fix: fix requirements order * explicitly add grpc to resolve errors * adjust arguments * undo mistake * bump auth version Co-authored-by: Bu Sun Kim <8822365+busunkim96@users.noreply.github.com> Co-authored-by: Leah Cole Co-authored-by: Leah E. Cole <6719667+leahecole@users.noreply.github.com> Co-authored-by: Christopher Wilcox --- .../google-cloud-bigquery/samples/snippets/requirements.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/packages/google-cloud-bigquery/samples/snippets/requirements.txt b/packages/google-cloud-bigquery/samples/snippets/requirements.txt index ba7b7bda73a6..9b737b7a0c5f 100644 --- a/packages/google-cloud-bigquery/samples/snippets/requirements.txt +++ b/packages/google-cloud-bigquery/samples/snippets/requirements.txt @@ -1,5 +1,5 @@ -google-cloud-bigquery[pandas]==1.20.0 -google-auth-oauthlib==0.4.0 +google-cloud-bigquery[pandas]==1.24.0 +google-auth-oauthlib==0.4.1 ipython==7.13.0 matplotlib==3.2.1 pytz==2019.3 From d673646535c71120105cca1ca3346ba1b5d5241c Mon Sep 17 00:00:00 2001 From: WhiteSource Renovate Date: Tue, 28 Apr 2020 15:05:51 +0200 Subject: [PATCH 0882/2016] chore(deps): update dependency pytz to v2020 [(#3566)](https://github.com/GoogleCloudPlatform/python-docs-samples/issues/3566) --- .../google-cloud-bigquery/samples/snippets/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/samples/snippets/requirements.txt b/packages/google-cloud-bigquery/samples/snippets/requirements.txt index 9b737b7a0c5f..650c6f4ab4cf 100644 --- a/packages/google-cloud-bigquery/samples/snippets/requirements.txt +++ b/packages/google-cloud-bigquery/samples/snippets/requirements.txt @@ -2,4 +2,4 @@ google-cloud-bigquery[pandas]==1.24.0 google-auth-oauthlib==0.4.1 ipython==7.13.0 matplotlib==3.2.1 -pytz==2019.3 +pytz==2020.1 From 21cc0dabb4b2b07e5163b2b24e18974e7480f31d Mon Sep 17 00:00:00 2001 From: WhiteSource Renovate Date: Tue, 5 May 2020 01:21:57 +0200 Subject: [PATCH 0883/2016] chore(deps): update dependency ipython to v7.14.0 [(#3670)](https://github.com/GoogleCloudPlatform/python-docs-samples/issues/3670) Co-authored-by: Takashi Matsuo --- .../google-cloud-bigquery/samples/snippets/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/samples/snippets/requirements.txt b/packages/google-cloud-bigquery/samples/snippets/requirements.txt index 650c6f4ab4cf..a1eabc954b36 100644 --- a/packages/google-cloud-bigquery/samples/snippets/requirements.txt +++ b/packages/google-cloud-bigquery/samples/snippets/requirements.txt @@ -1,5 +1,5 @@ google-cloud-bigquery[pandas]==1.24.0 google-auth-oauthlib==0.4.1 -ipython==7.13.0 +ipython==7.14.0 matplotlib==3.2.1 pytz==2020.1 From bf35feb5eaf24b206643046c1aa5a03016c18bb8 Mon Sep 17 00:00:00 2001 From: Takashi Matsuo Date: Tue, 26 May 2020 14:42:53 -0700 Subject: [PATCH 0884/2016] [bigquery] opt in to use the build specific projects [(#3883)](https://github.com/GoogleCloudPlatform/python-docs-samples/issues/3883) fixes #3882 --- .../samples/snippets/README.rst | 8 +++- .../samples/snippets/README.rst.in | 4 +- .../samples/snippets/noxfile_config.py | 37 +++++++++++++++++++ 3 files changed, 46 insertions(+), 3 deletions(-) create mode 100644 packages/google-cloud-bigquery/samples/snippets/noxfile_config.py diff --git a/packages/google-cloud-bigquery/samples/snippets/README.rst b/packages/google-cloud-bigquery/samples/snippets/README.rst index 02bc856f9786..9446125cd8a7 100644 --- a/packages/google-cloud-bigquery/samples/snippets/README.rst +++ b/packages/google-cloud-bigquery/samples/snippets/README.rst @@ -14,6 +14,11 @@ This directory contains samples for Google BigQuery. `Google BigQuery`_ is Googl .. _Google BigQuery: https://cloud.google.com/bigquery/docs + +To run the sample, you need to have `BigQuery Admin` role. + + + Setup ------------------------------------------------------------------------------- @@ -108,7 +113,7 @@ To run this sample: $ python user_credentials.py - usage: user_credentials.py [-h] [--launch-browser] project query + usage: user_credentials.py [-h] [--launch-browser] project Command-line application to run a query using user credentials. @@ -117,7 +122,6 @@ To run this sample: positional arguments: project Project to use for BigQuery billing. - query BigQuery SQL Query. optional arguments: -h, --help show this help message and exit diff --git a/packages/google-cloud-bigquery/samples/snippets/README.rst.in b/packages/google-cloud-bigquery/samples/snippets/README.rst.in index 008b5179565d..74b7fa940376 100644 --- a/packages/google-cloud-bigquery/samples/snippets/README.rst.in +++ b/packages/google-cloud-bigquery/samples/snippets/README.rst.in @@ -11,6 +11,8 @@ product: analyzing data to find meaningful insights, use familiar SQL, and take advantage of our pay-as-you-go model. +required_role: BigQuery Admin + setup: - auth - install_deps @@ -26,4 +28,4 @@ samples: cloud_client_library: true -folder: bigquery/cloud-client \ No newline at end of file +folder: bigquery/cloud-client diff --git a/packages/google-cloud-bigquery/samples/snippets/noxfile_config.py b/packages/google-cloud-bigquery/samples/snippets/noxfile_config.py new file mode 100644 index 000000000000..950c3a070bd0 --- /dev/null +++ b/packages/google-cloud-bigquery/samples/snippets/noxfile_config.py @@ -0,0 +1,37 @@ +# Copyright 2020 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Default TEST_CONFIG_OVERRIDE for python repos. + +# You can copy this file into your directory, then it will be inported from +# the noxfile.py. + +# The source of truth: +# https://github.com/GoogleCloudPlatform/python-docs-samples/blob/master/noxfile_config.py + +TEST_CONFIG_OVERRIDE = { + # You can opt out from the test for specific Python versions. + 'ignored_versions': ["2.7"], + + # An envvar key for determining the project id to use. Change it + # to 'BUILD_SPECIFIC_GCLOUD_PROJECT' if you want to opt in using a + # build specific Cloud project. You can also use your own string + # to use your own Cloud project. + # 'gcloud_project_env': 'GCLOUD_PROJECT', + 'gcloud_project_env': 'BUILD_SPECIFIC_GCLOUD_PROJECT', + + # A dictionary you want to inject into your test. Don't put any + # secrets here. These values will override predefined values. + 'envs': {}, +} From 339e65fddc5bd8ec5b936381b802af0efccd1c01 Mon Sep 17 00:00:00 2001 From: WhiteSource Renovate Date: Sat, 30 May 2020 10:59:14 +0200 Subject: [PATCH 0885/2016] chore(deps): update dependency ipython to v7.15.0 [(#3917)](https://github.com/GoogleCloudPlatform/python-docs-samples/issues/3917) Co-authored-by: Takashi Matsuo --- .../google-cloud-bigquery/samples/snippets/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/samples/snippets/requirements.txt b/packages/google-cloud-bigquery/samples/snippets/requirements.txt index a1eabc954b36..acad65571938 100644 --- a/packages/google-cloud-bigquery/samples/snippets/requirements.txt +++ b/packages/google-cloud-bigquery/samples/snippets/requirements.txt @@ -1,5 +1,5 @@ google-cloud-bigquery[pandas]==1.24.0 google-auth-oauthlib==0.4.1 -ipython==7.14.0 +ipython==7.15.0 matplotlib==3.2.1 pytz==2020.1 From cbe9e2d6e800144dedb9f129d8e221e5d9dd160c Mon Sep 17 00:00:00 2001 From: Kurtis Van Gent <31518063+kurtisvg@users.noreply.github.com> Date: Tue, 9 Jun 2020 14:34:27 -0700 Subject: [PATCH 0886/2016] Replace GCLOUD_PROJECT with GOOGLE_CLOUD_PROJECT. [(#4022)](https://github.com/GoogleCloudPlatform/python-docs-samples/issues/4022) --- .../google-cloud-bigquery/samples/snippets/noxfile_config.py | 2 +- .../samples/snippets/user_credentials_test.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/packages/google-cloud-bigquery/samples/snippets/noxfile_config.py b/packages/google-cloud-bigquery/samples/snippets/noxfile_config.py index 950c3a070bd0..cfd0d439150c 100644 --- a/packages/google-cloud-bigquery/samples/snippets/noxfile_config.py +++ b/packages/google-cloud-bigquery/samples/snippets/noxfile_config.py @@ -28,7 +28,7 @@ # to 'BUILD_SPECIFIC_GCLOUD_PROJECT' if you want to opt in using a # build specific Cloud project. You can also use your own string # to use your own Cloud project. - # 'gcloud_project_env': 'GCLOUD_PROJECT', + # 'gcloud_project_env': 'GOOGLE_CLOUD_PROJECT', 'gcloud_project_env': 'BUILD_SPECIFIC_GCLOUD_PROJECT', # A dictionary you want to inject into your test. Don't put any diff --git a/packages/google-cloud-bigquery/samples/snippets/user_credentials_test.py b/packages/google-cloud-bigquery/samples/snippets/user_credentials_test.py index 009b9be7f955..494903c75a11 100644 --- a/packages/google-cloud-bigquery/samples/snippets/user_credentials_test.py +++ b/packages/google-cloud-bigquery/samples/snippets/user_credentials_test.py @@ -21,7 +21,7 @@ from user_credentials import main -PROJECT = os.environ['GCLOUD_PROJECT'] +PROJECT = os.environ['GOOGLE_CLOUD_PROJECT'] @pytest.fixture From d70d11a732c7c346c4c21ff8320fccfef3032c73 Mon Sep 17 00:00:00 2001 From: WhiteSource Renovate Date: Thu, 11 Jun 2020 06:36:11 +0200 Subject: [PATCH 0887/2016] Update dependency google-cloud-bigquery to v1.25.0 [(#4024)](https://github.com/GoogleCloudPlatform/python-docs-samples/issues/4024) This PR contains the following updates: | Package | Update | Change | |---|---|---| | [google-cloud-bigquery](https://togithub.com/googleapis/python-bigquery) | minor | `==1.24.0` -> `==1.25.0` | --- ### Release Notes
googleapis/python-bigquery ### [`v1.25.0`](https://togithub.com/googleapis/python-bigquery/blob/master/CHANGELOG.md#​1250-httpswwwgithubcomgoogleapispython-bigquerycomparev1240v1250-2020-06-06) [Compare Source](https://togithub.com/googleapis/python-bigquery/compare/v1.24.0...v1.25.0) ##### Features - add BigQuery storage client support to DB API ([#​36](https://www.github.com/googleapis/python-bigquery/issues/36)) ([ba9b2f8](https://www.github.com/googleapis/python-bigquery/commit/ba9b2f87e36320d80f6f6460b77e6daddb0fa214)) - **bigquery:** add create job method ([#​32](https://www.github.com/googleapis/python-bigquery/issues/32)) ([2abdef8](https://www.github.com/googleapis/python-bigquery/commit/2abdef82bed31601d1ca1aa92a10fea1e09f5297)) - **bigquery:** add support of model for extract job ([#​71](https://www.github.com/googleapis/python-bigquery/issues/71)) ([4a7a514](https://www.github.com/googleapis/python-bigquery/commit/4a7a514659a9f6f9bbd8af46bab3f8782d6b4b98)) - add HOUR support for time partitioning interval ([#​91](https://www.github.com/googleapis/python-bigquery/issues/91)) ([0dd90b9](https://www.github.com/googleapis/python-bigquery/commit/0dd90b90e3714c1d18f8a404917a9454870e338a)) - add support for policy tags ([#​77](https://www.github.com/googleapis/python-bigquery/issues/77)) ([38a5c01](https://www.github.com/googleapis/python-bigquery/commit/38a5c01ca830daf165592357c45f2fb4016aad23)) - make AccessEntry objects hashable ([#​93](https://www.github.com/googleapis/python-bigquery/issues/93)) ([23a173b](https://www.github.com/googleapis/python-bigquery/commit/23a173bc5a25c0c8200adc5af62eb05624c9099e)) - **bigquery:** expose start index parameter for query result ([#​121](https://www.github.com/googleapis/python-bigquery/issues/121)) ([be86de3](https://www.github.com/googleapis/python-bigquery/commit/be86de330a3c3801653a0ccef90e3d9bdb3eee7a)) - **bigquery:** unit and system test for dataframe with int column with Nan values ([#​39](https://www.github.com/googleapis/python-bigquery/issues/39)) ([5fd840e](https://www.github.com/googleapis/python-bigquery/commit/5fd840e9d4c592c4f736f2fd4792c9670ba6795e)) ##### Bug Fixes - allow partial streaming_buffer statistics ([#​37](https://www.github.com/googleapis/python-bigquery/issues/37)) ([645f0fd](https://www.github.com/googleapis/python-bigquery/commit/645f0fdb35ee0e81ee70f7459e796a42a1f03210)) - distinguish server timeouts from transport timeouts ([#​43](https://www.github.com/googleapis/python-bigquery/issues/43)) ([a17be5f](https://www.github.com/googleapis/python-bigquery/commit/a17be5f01043f32d9fbfb2ddf456031ea9205c8f)) - improve cell magic error message on missing query ([#​58](https://www.github.com/googleapis/python-bigquery/issues/58)) ([6182cf4](https://www.github.com/googleapis/python-bigquery/commit/6182cf48aef8f463bb96891cfc44a96768121dbc)) - **bigquery:** fix repr of model reference ([#​66](https://www.github.com/googleapis/python-bigquery/issues/66)) ([26c6204](https://www.github.com/googleapis/python-bigquery/commit/26c62046f4ec8880cf6561cc90a8b821dcc84ec5)) - **bigquery:** fix start index with page size for list rows ([#​27](https://www.github.com/googleapis/python-bigquery/issues/27)) ([400673b](https://www.github.com/googleapis/python-bigquery/commit/400673b5d0f2a6a3d828fdaad9d222ca967ffeff))
--- ### Renovate configuration :date: **Schedule**: At any time (no schedule defined). :vertical_traffic_light: **Automerge**: Disabled by config. Please merge this manually once you are satisfied. :recycle: **Rebasing**: Never, or you tick the rebase/retry checkbox. :no_bell: **Ignore**: Close this PR and you won't be reminded about this update again. --- - [ ] If you want to rebase/retry this PR, check this box --- This PR has been generated by [WhiteSource Renovate](https://renovate.whitesourcesoftware.com). View repository job log [here](https://app.renovatebot.com/dashboard#GoogleCloudPlatform/python-docs-samples). --- .../google-cloud-bigquery/samples/snippets/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/samples/snippets/requirements.txt b/packages/google-cloud-bigquery/samples/snippets/requirements.txt index acad65571938..b5ec18a01f34 100644 --- a/packages/google-cloud-bigquery/samples/snippets/requirements.txt +++ b/packages/google-cloud-bigquery/samples/snippets/requirements.txt @@ -1,4 +1,4 @@ -google-cloud-bigquery[pandas]==1.24.0 +google-cloud-bigquery[pandas]==1.25.0 google-auth-oauthlib==0.4.1 ipython==7.15.0 matplotlib==3.2.1 From 892a81470a1a72e7302b4c017e699fcf632afb9d Mon Sep 17 00:00:00 2001 From: WhiteSource Renovate Date: Fri, 19 Jun 2020 19:04:04 +0200 Subject: [PATCH 0888/2016] chore(deps): update dependency matplotlib to v3.2.2 [(#4118)](https://github.com/GoogleCloudPlatform/python-docs-samples/issues/4118) This PR contains the following updates: | Package | Update | Change | |---|---|---| | [matplotlib](https://matplotlib.org) ([source](https://togithub.com/matplotlib/matplotlib)) | patch | `==3.2.1` -> `==3.2.2` | --- ### Release Notes
matplotlib/matplotlib ### [`v3.2.2`](https://togithub.com/matplotlib/matplotlib/releases/v3.2.2) [Compare Source](https://togithub.com/matplotlib/matplotlib/compare/v3.2.1...v3.2.2) This is the second bugfix release of the 3.2.x series. This release contains several critical bug-fixes: - support fractional HiDPI scaling with Qt backends - support new Python and fix syntax errors in legacy Python - support new Qt 5 and fix support for Qt 4 - fix animation writer fallback - fix figure resizing - fix handling of large arcs - fix issues with tight layout - fix saving figures after closing windows or under certain size conditions - fix scatter when specifying a single color - fix several memory leaks - fix unexpected autoscaling behavior - fix various issues with usetex - various minor bug and documentation fixes
--- ### Renovate configuration :date: **Schedule**: At any time (no schedule defined). :vertical_traffic_light: **Automerge**: Disabled by config. Please merge this manually once you are satisfied. :recycle: **Rebasing**: Never, or you tick the rebase/retry checkbox. :no_bell: **Ignore**: Close this PR and you won't be reminded about this update again. --- - [ ] If you want to rebase/retry this PR, check this box --- This PR has been generated by [WhiteSource Renovate](https://renovate.whitesourcesoftware.com). View repository job log [here](https://app.renovatebot.com/dashboard#GoogleCloudPlatform/python-docs-samples). --- .../google-cloud-bigquery/samples/snippets/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/samples/snippets/requirements.txt b/packages/google-cloud-bigquery/samples/snippets/requirements.txt index b5ec18a01f34..93f36e038026 100644 --- a/packages/google-cloud-bigquery/samples/snippets/requirements.txt +++ b/packages/google-cloud-bigquery/samples/snippets/requirements.txt @@ -1,5 +1,5 @@ google-cloud-bigquery[pandas]==1.25.0 google-auth-oauthlib==0.4.1 ipython==7.15.0 -matplotlib==3.2.1 +matplotlib==3.2.2 pytz==2020.1 From 517205981bd42f7773f3833cc3d99e99584ec4c5 Mon Sep 17 00:00:00 2001 From: WhiteSource Renovate Date: Sat, 27 Jun 2020 02:20:03 +0200 Subject: [PATCH 0889/2016] Update dependency ipython to v7.16.1 [(#4189)](https://github.com/GoogleCloudPlatform/python-docs-samples/issues/4189) This PR contains the following updates: | Package | Update | Change | |---|---|---| | [ipython](https://ipython.org) ([source](https://togithub.com/ipython/ipython)) | minor | `==7.15.0` -> `==7.16.1` | --- ### Release Notes
ipython/ipython ### [`v7.16.1`](https://togithub.com/ipython/ipython/compare/7.16.0...7.16.1) [Compare Source](https://togithub.com/ipython/ipython/compare/7.16.0...7.16.1) ### [`v7.16.0`](https://togithub.com/ipython/ipython/compare/7.15.0...7.16.0) [Compare Source](https://togithub.com/ipython/ipython/compare/7.15.0...7.16.0)
--- ### Renovate configuration :date: **Schedule**: At any time (no schedule defined). :vertical_traffic_light: **Automerge**: Disabled by config. Please merge this manually once you are satisfied. :recycle: **Rebasing**: Never, or you tick the rebase/retry checkbox. :no_bell: **Ignore**: Close this PR and you won't be reminded about this update again. --- - [ ] If you want to rebase/retry this PR, check this box --- This PR has been generated by [WhiteSource Renovate](https://renovate.whitesourcesoftware.com). View repository job log [here](https://app.renovatebot.com/dashboard#GoogleCloudPlatform/python-docs-samples). --- .../google-cloud-bigquery/samples/snippets/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/samples/snippets/requirements.txt b/packages/google-cloud-bigquery/samples/snippets/requirements.txt index 93f36e038026..c44fe09a0a7a 100644 --- a/packages/google-cloud-bigquery/samples/snippets/requirements.txt +++ b/packages/google-cloud-bigquery/samples/snippets/requirements.txt @@ -1,5 +1,5 @@ google-cloud-bigquery[pandas]==1.25.0 google-auth-oauthlib==0.4.1 -ipython==7.15.0 +ipython==7.16.1 matplotlib==3.2.2 pytz==2020.1 From c226e123ee0ca361434631dab5078485435d4bdd Mon Sep 17 00:00:00 2001 From: WhiteSource Renovate Date: Mon, 13 Jul 2020 00:46:30 +0200 Subject: [PATCH 0890/2016] chore(deps): update dependency pytest to v5.4.3 [(#4279)](https://github.com/GoogleCloudPlatform/python-docs-samples/issues/4279) * chore(deps): update dependency pytest to v5.4.3 * specify pytest for python 2 in appengine Co-authored-by: Leah Cole --- .../samples/snippets/requirements-test.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/samples/snippets/requirements-test.txt b/packages/google-cloud-bigquery/samples/snippets/requirements-test.txt index 41c4d5110536..1a71e3e3296a 100644 --- a/packages/google-cloud-bigquery/samples/snippets/requirements-test.txt +++ b/packages/google-cloud-bigquery/samples/snippets/requirements-test.txt @@ -1,2 +1,2 @@ -pytest==5.3.2 +pytest==5.4.3 mock==3.0.5 From afbd725e061acc4376ca48921ad1edb6a4bf01c1 Mon Sep 17 00:00:00 2001 From: WhiteSource Renovate Date: Mon, 13 Jul 2020 07:10:34 +0200 Subject: [PATCH 0891/2016] chore(deps): update dependency mock to v4 [(#4287)](https://github.com/GoogleCloudPlatform/python-docs-samples/issues/4287) * chore(deps): update dependency mock to v4 * specify mock version for appengine python 2 Co-authored-by: Leah Cole --- .../samples/snippets/requirements-test.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/samples/snippets/requirements-test.txt b/packages/google-cloud-bigquery/samples/snippets/requirements-test.txt index 1a71e3e3296a..676ff949e8ae 100644 --- a/packages/google-cloud-bigquery/samples/snippets/requirements-test.txt +++ b/packages/google-cloud-bigquery/samples/snippets/requirements-test.txt @@ -1,2 +1,2 @@ pytest==5.4.3 -mock==3.0.5 +mock==4.0.2 From d422ff84a03830ec6e33c629984e03e1a1f10234 Mon Sep 17 00:00:00 2001 From: WhiteSource Renovate Date: Fri, 17 Jul 2020 19:58:32 +0200 Subject: [PATCH 0892/2016] chore(deps): update dependency matplotlib to v3.3.0 [(#4310)](https://github.com/GoogleCloudPlatform/python-docs-samples/issues/4310) --- .../google-cloud-bigquery/samples/snippets/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/samples/snippets/requirements.txt b/packages/google-cloud-bigquery/samples/snippets/requirements.txt index c44fe09a0a7a..fe0f79e2139f 100644 --- a/packages/google-cloud-bigquery/samples/snippets/requirements.txt +++ b/packages/google-cloud-bigquery/samples/snippets/requirements.txt @@ -1,5 +1,5 @@ google-cloud-bigquery[pandas]==1.25.0 google-auth-oauthlib==0.4.1 ipython==7.16.1 -matplotlib==3.2.2 +matplotlib==3.3.0 pytz==2020.1 From 8a748308981ba9c91f6c3bda7d668a4c3eb51dc3 Mon Sep 17 00:00:00 2001 From: Peter Lamut Date: Tue, 21 Jul 2020 16:39:20 +0200 Subject: [PATCH 0893/2016] chore: update templates --- .../google-cloud-bigquery/.github/CODEOWNERS | 9 + .../samples/AUTHORING_GUIDE.md | 1 + .../samples/CONTRIBUTING.md | 1 + .../samples/snippets/README.rst | 29 ++- .../snippets/authenticate_service_account.py | 8 +- .../authenticate_service_account_test.py | 2 +- .../snippets/authorized_view_tutorial.py | 34 +-- .../snippets/authorized_view_tutorial_test.py | 37 +-- .../samples/snippets/jupyter_tutorial_test.py | 38 +-- .../samples/snippets/natality_tutorial.py | 6 +- .../snippets/natality_tutorial_test.py | 10 +- .../samples/snippets/noxfile.py | 222 ++++++++++++++++++ .../samples/snippets/noxfile_config.py | 8 +- .../samples/snippets/quickstart.py | 6 +- .../samples/snippets/quickstart_test.py | 4 +- .../samples/snippets/simple_app.py | 9 +- .../samples/snippets/simple_app_test.py | 2 +- .../samples/snippets/user_credentials.py | 24 +- .../samples/snippets/user_credentials_test.py | 7 +- packages/google-cloud-bigquery/synth.metadata | 13 +- packages/google-cloud-bigquery/synth.py | 10 +- 21 files changed, 376 insertions(+), 104 deletions(-) create mode 100644 packages/google-cloud-bigquery/.github/CODEOWNERS create mode 100644 packages/google-cloud-bigquery/samples/AUTHORING_GUIDE.md create mode 100644 packages/google-cloud-bigquery/samples/CONTRIBUTING.md create mode 100644 packages/google-cloud-bigquery/samples/snippets/noxfile.py diff --git a/packages/google-cloud-bigquery/.github/CODEOWNERS b/packages/google-cloud-bigquery/.github/CODEOWNERS new file mode 100644 index 000000000000..309a57710ab1 --- /dev/null +++ b/packages/google-cloud-bigquery/.github/CODEOWNERS @@ -0,0 +1,9 @@ +# Code owners file. +# This file controls who is tagged for review for any given pull request. +# +# For syntax help see: +# https://help.github.com/en/github/creating-cloning-and-archiving-repositories/about-code-owners#codeowners-syntax + + +/samples/ @shollyman @googleapis/python-samples-owners + diff --git a/packages/google-cloud-bigquery/samples/AUTHORING_GUIDE.md b/packages/google-cloud-bigquery/samples/AUTHORING_GUIDE.md new file mode 100644 index 000000000000..55c97b32f4c1 --- /dev/null +++ b/packages/google-cloud-bigquery/samples/AUTHORING_GUIDE.md @@ -0,0 +1 @@ +See https://github.com/GoogleCloudPlatform/python-docs-samples/blob/master/AUTHORING_GUIDE.md \ No newline at end of file diff --git a/packages/google-cloud-bigquery/samples/CONTRIBUTING.md b/packages/google-cloud-bigquery/samples/CONTRIBUTING.md new file mode 100644 index 000000000000..34c882b6f1a3 --- /dev/null +++ b/packages/google-cloud-bigquery/samples/CONTRIBUTING.md @@ -0,0 +1 @@ +See https://github.com/GoogleCloudPlatform/python-docs-samples/blob/master/CONTRIBUTING.md \ No newline at end of file diff --git a/packages/google-cloud-bigquery/samples/snippets/README.rst b/packages/google-cloud-bigquery/samples/snippets/README.rst index 9446125cd8a7..7c3e19e68b16 100644 --- a/packages/google-cloud-bigquery/samples/snippets/README.rst +++ b/packages/google-cloud-bigquery/samples/snippets/README.rst @@ -1,3 +1,4 @@ + .. This file is automatically generated. Do not edit this file directly. Google BigQuery Python Samples @@ -15,14 +16,11 @@ This directory contains samples for Google BigQuery. `Google BigQuery`_ is Googl .. _Google BigQuery: https://cloud.google.com/bigquery/docs -To run the sample, you need to have `BigQuery Admin` role. - - - Setup ------------------------------------------------------------------------------- + Authentication ++++++++++++++ @@ -33,6 +31,9 @@ credentials for applications. .. _Authentication Getting Started Guide: https://cloud.google.com/docs/authentication/getting-started + + + Install Dependencies ++++++++++++++++++++ @@ -47,7 +48,7 @@ Install Dependencies .. _Python Development Environment Setup Guide: https://cloud.google.com/python/setup -#. Create a virtualenv. Samples are compatible with Python 2.7 and 3.4+. +#. Create a virtualenv. Samples are compatible with Python 3.6+. .. code-block:: bash @@ -63,9 +64,15 @@ Install Dependencies .. _pip: https://pip.pypa.io/ .. _virtualenv: https://virtualenv.pypa.io/ + + + + + Samples ------------------------------------------------------------------------------- + Quickstart +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ @@ -82,6 +89,8 @@ To run this sample: $ python quickstart.py + + Simple Application +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ @@ -98,6 +107,8 @@ To run this sample: $ python simple_app.py + + User Credentials +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ @@ -113,6 +124,7 @@ To run this sample: $ python user_credentials.py + usage: user_credentials.py [-h] [--launch-browser] project Command-line application to run a query using user credentials. @@ -131,6 +143,10 @@ To run this sample: + + + + The client library ------------------------------------------------------------------------------- @@ -146,4 +162,5 @@ to `browse the source`_ and `report issues`_. https://github.com/GoogleCloudPlatform/google-cloud-python/issues -.. _Google Cloud SDK: https://cloud.google.com/sdk/ \ No newline at end of file + +.. _Google Cloud SDK: https://cloud.google.com/sdk/ diff --git a/packages/google-cloud-bigquery/samples/snippets/authenticate_service_account.py b/packages/google-cloud-bigquery/samples/snippets/authenticate_service_account.py index 03cb06fd16b5..58cd2b542fe7 100644 --- a/packages/google-cloud-bigquery/samples/snippets/authenticate_service_account.py +++ b/packages/google-cloud-bigquery/samples/snippets/authenticate_service_account.py @@ -27,14 +27,10 @@ def main(): # key_path = "path/to/service_account.json" credentials = service_account.Credentials.from_service_account_file( - key_path, - scopes=["https://www.googleapis.com/auth/cloud-platform"], + key_path, scopes=["https://www.googleapis.com/auth/cloud-platform"], ) - client = bigquery.Client( - credentials=credentials, - project=credentials.project_id, - ) + client = bigquery.Client(credentials=credentials, project=credentials.project_id,) # [END bigquery_client_json_credentials] return client diff --git a/packages/google-cloud-bigquery/samples/snippets/authenticate_service_account_test.py b/packages/google-cloud-bigquery/samples/snippets/authenticate_service_account_test.py index 8c436ec206fa..131c69d2c658 100644 --- a/packages/google-cloud-bigquery/samples/snippets/authenticate_service_account_test.py +++ b/packages/google-cloud-bigquery/samples/snippets/authenticate_service_account_test.py @@ -26,7 +26,7 @@ def mock_credentials(*args, **kwargs): def test_main(monkeypatch): monkeypatch.setattr( - 'google.oauth2.service_account.Credentials.from_service_account_file', + "google.oauth2.service_account.Credentials.from_service_account_file", mock_credentials, ) client = authenticate_service_account.main() diff --git a/packages/google-cloud-bigquery/samples/snippets/authorized_view_tutorial.py b/packages/google-cloud-bigquery/samples/snippets/authorized_view_tutorial.py index edf83cf7facb..6b5cc378f00b 100644 --- a/packages/google-cloud-bigquery/samples/snippets/authorized_view_tutorial.py +++ b/packages/google-cloud-bigquery/samples/snippets/authorized_view_tutorial.py @@ -18,7 +18,7 @@ def run_authorized_view_tutorial(override_values={}): # Note to user: This is a group email for testing purposes. Replace with # your own group email address when running this code. - analyst_group_email = 'example-analyst-group@google.com' + analyst_group_email = "example-analyst-group@google.com" # [START bigquery_authorized_view_tutorial] # Create a source dataset @@ -26,7 +26,7 @@ def run_authorized_view_tutorial(override_values={}): from google.cloud import bigquery client = bigquery.Client() - source_dataset_id = 'github_source_data' + source_dataset_id = "github_source_data" # [END bigquery_authorized_view_tutorial] # [END bigquery_avt_create_source_dataset] @@ -38,13 +38,13 @@ def run_authorized_view_tutorial(override_values={}): source_dataset = bigquery.Dataset(client.dataset(source_dataset_id)) # Specify the geographic location where the dataset should reside. - source_dataset.location = 'US' + source_dataset.location = "US" source_dataset = client.create_dataset(source_dataset) # API request # [END bigquery_avt_create_source_dataset] # Populate a source table # [START bigquery_avt_create_source_table] - source_table_id = 'github_contributors' + source_table_id = "github_contributors" job_config = bigquery.QueryJobConfig() job_config.destination = source_dataset.table(source_table_id) sql = """ @@ -56,15 +56,16 @@ def run_authorized_view_tutorial(override_values={}): sql, # Location must match that of the dataset(s) referenced in the query # and of the destination table. - location='US', - job_config=job_config) # API request - starts the query + location="US", + job_config=job_config, + ) # API request - starts the query query_job.result() # Waits for the query to finish # [END bigquery_avt_create_source_table] # Create a separate dataset to store your view # [START bigquery_avt_create_shared_dataset] - shared_dataset_id = 'shared_views' + shared_dataset_id = "shared_views" # [END bigquery_authorized_view_tutorial] # [END bigquery_avt_create_shared_dataset] @@ -75,13 +76,13 @@ def run_authorized_view_tutorial(override_values={}): # [START bigquery_avt_create_shared_dataset] shared_dataset = bigquery.Dataset(client.dataset(shared_dataset_id)) - shared_dataset.location = 'US' + shared_dataset.location = "US" shared_dataset = client.create_dataset(shared_dataset) # API request # [END bigquery_avt_create_shared_dataset] # Create the view in the new dataset # [START bigquery_avt_create_view] - shared_view_id = 'github_analyst_view' + shared_view_id = "github_analyst_view" view = bigquery.Table(shared_dataset.table(shared_view_id)) sql_template = """ SELECT @@ -91,7 +92,8 @@ def run_authorized_view_tutorial(override_values={}): `{}.{}.{}` """ view.view_query = sql_template.format( - client.project, source_dataset_id, source_table_id) + client.project, source_dataset_id, source_table_id + ) view = client.create_table(view) # API request # [END bigquery_avt_create_view] @@ -100,25 +102,27 @@ def run_authorized_view_tutorial(override_values={}): # analyst_group_email = 'data_analysts@example.com' access_entries = shared_dataset.access_entries access_entries.append( - bigquery.AccessEntry('READER', 'groupByEmail', analyst_group_email) + bigquery.AccessEntry("READER", "groupByEmail", analyst_group_email) ) shared_dataset.access_entries = access_entries shared_dataset = client.update_dataset( - shared_dataset, ['access_entries']) # API request + shared_dataset, ["access_entries"] + ) # API request # [END bigquery_avt_shared_dataset_access] # Authorize the view to access the source dataset # [START bigquery_avt_source_dataset_access] access_entries = source_dataset.access_entries access_entries.append( - bigquery.AccessEntry(None, 'view', view.reference.to_api_repr()) + bigquery.AccessEntry(None, "view", view.reference.to_api_repr()) ) source_dataset.access_entries = access_entries source_dataset = client.update_dataset( - source_dataset, ['access_entries']) # API request + source_dataset, ["access_entries"] + ) # API request # [END bigquery_avt_source_dataset_access] # [END bigquery_authorized_view_tutorial] -if __name__ == '__main__': +if __name__ == "__main__": run_authorized_view_tutorial() diff --git a/packages/google-cloud-bigquery/samples/snippets/authorized_view_tutorial_test.py b/packages/google-cloud-bigquery/samples/snippets/authorized_view_tutorial_test.py index 03079085f065..4c74020bd117 100644 --- a/packages/google-cloud-bigquery/samples/snippets/authorized_view_tutorial_test.py +++ b/packages/google-cloud-bigquery/samples/snippets/authorized_view_tutorial_test.py @@ -20,7 +20,7 @@ import authorized_view_tutorial -@pytest.fixture(scope='module') +@pytest.fixture(scope="module") def client(): return bigquery.Client() @@ -35,30 +35,39 @@ def datasets_to_delete(client): def test_authorized_view_tutorial(client, datasets_to_delete): override_values = { - "source_dataset_id": "github_source_data_{}".format(str(uuid.uuid4()).replace("-", "_")), - "shared_dataset_id": "shared_views_{}".format(str(uuid.uuid4()).replace("-", "_")), + "source_dataset_id": "github_source_data_{}".format( + str(uuid.uuid4()).replace("-", "_") + ), + "shared_dataset_id": "shared_views_{}".format( + str(uuid.uuid4()).replace("-", "_") + ), } source_dataset_ref = client.dataset(override_values["source_dataset_id"]) shared_dataset_ref = client.dataset(override_values["shared_dataset_id"]) - datasets_to_delete.extend([override_values["source_dataset_id"], - override_values["shared_dataset_id"]]) + datasets_to_delete.extend( + [override_values["source_dataset_id"], override_values["shared_dataset_id"]] + ) authorized_view_tutorial.run_authorized_view_tutorial(override_values) source_dataset = client.get_dataset(source_dataset_ref) shared_dataset = client.get_dataset(shared_dataset_ref) - analyst_email = 'example-analyst-group@google.com' - analyst_entries = [entry for entry in shared_dataset.access_entries - if entry.entity_id == analyst_email] + analyst_email = "example-analyst-group@google.com" + analyst_entries = [ + entry + for entry in shared_dataset.access_entries + if entry.entity_id == analyst_email + ] assert len(analyst_entries) == 1 - assert analyst_entries[0].role == 'READER' + assert analyst_entries[0].role == "READER" - authorized_view_entries = [entry for entry in source_dataset.access_entries - if entry.entity_type == 'view'] + authorized_view_entries = [ + entry for entry in source_dataset.access_entries if entry.entity_type == "view" + ] expected_view_ref = { - 'projectId': client.project, - 'datasetId': override_values["shared_dataset_id"], - 'tableId': 'github_analyst_view', + "projectId": client.project, + "datasetId": override_values["shared_dataset_id"], + "tableId": "github_analyst_view", } assert len(authorized_view_entries) == 1 assert authorized_view_entries[0].entity_id == expected_view_ref diff --git a/packages/google-cloud-bigquery/samples/snippets/jupyter_tutorial_test.py b/packages/google-cloud-bigquery/samples/snippets/jupyter_tutorial_test.py index 0affbabcb032..353590b82864 100644 --- a/packages/google-cloud-bigquery/samples/snippets/jupyter_tutorial_test.py +++ b/packages/google-cloud-bigquery/samples/snippets/jupyter_tutorial_test.py @@ -22,7 +22,7 @@ # flake8: noqa E703 -@pytest.fixture(scope='session') +@pytest.fixture(scope="session") def ipython(): config = tools.default_config() config.TerminalInteractiveShell.simple_prompt = True @@ -42,15 +42,16 @@ def ipython_interactive(request, ipython): def _strip_region_tags(sample_text): """Remove blank lines and region tags from sample text""" - magic_lines = [line for line in sample_text.split('\n') - if len(line) > 0 and '# [' not in line] - return '\n'.join(magic_lines) + magic_lines = [ + line for line in sample_text.split("\n") if len(line) > 0 and "# [" not in line + ] + return "\n".join(magic_lines) def test_jupyter_tutorial(ipython): - matplotlib.use('agg') + matplotlib.use("agg") ip = IPython.get_ipython() - ip.extension_manager.load_extension('google.cloud.bigquery') + ip.extension_manager.load_extension("google.cloud.bigquery") sample = """ # [START bigquery_jupyter_magic_gender_by_year] @@ -82,10 +83,10 @@ def test_jupyter_tutorial(ipython): result = ip.run_cell(_strip_region_tags(sample)) result.raise_error() # Throws an exception if the cell failed. - assert 'total_births' in ip.user_ns # verify that variable exists - total_births = ip.user_ns['total_births'] + assert "total_births" in ip.user_ns # verify that variable exists + total_births = ip.user_ns["total_births"] # [START bigquery_jupyter_plot_births_by_year] - total_births.plot(kind='bar', x='year', y='birth_count'); + total_births.plot(kind="bar", x="year", y="birth_count") # [END bigquery_jupyter_plot_births_by_year] sample = """ @@ -104,14 +105,15 @@ def test_jupyter_tutorial(ipython): result = ip.run_cell(_strip_region_tags(sample)) result.raise_error() # Throws an exception if the cell failed. - assert 'births_by_weekday' in ip.user_ns # verify that variable exists - births_by_weekday = ip.user_ns['births_by_weekday'] + assert "births_by_weekday" in ip.user_ns # verify that variable exists + births_by_weekday = ip.user_ns["births_by_weekday"] # [START bigquery_jupyter_plot_births_by_weekday] - births_by_weekday.plot(x='wday'); + births_by_weekday.plot(x="wday") # [END bigquery_jupyter_plot_births_by_weekday] # [START bigquery_jupyter_import_and_client] from google.cloud import bigquery + client = bigquery.Client() # [END bigquery_jupyter_import_and_client] @@ -135,8 +137,8 @@ def test_jupyter_tutorial(ipython): # [END bigquery_jupyter_query_plurality_by_year] # [START bigquery_jupyter_plot_plurality_by_year] - pivot_table = df.pivot(index='year', columns='plurality', values='count') - pivot_table.plot(kind='bar', stacked=True, figsize=(15, 7)); + pivot_table = df.pivot(index="year", columns="plurality", values="count") + pivot_table.plot(kind="bar", stacked=True, figsize=(15, 7)) # [END bigquery_jupyter_plot_plurality_by_year] # [START bigquery_jupyter_query_births_by_gestation] @@ -157,8 +159,8 @@ def test_jupyter_tutorial(ipython): # [END bigquery_jupyter_query_births_by_gestation] # [START bigquery_jupyter_plot_births_by_gestation] - ax = df.plot(kind='bar', x='gestation_weeks', y='count', figsize=(15,7)) - ax.set_title('Count of Births by Gestation Weeks') - ax.set_xlabel('Gestation Weeks') - ax.set_ylabel('Count'); + ax = df.plot(kind="bar", x="gestation_weeks", y="count", figsize=(15, 7)) + ax.set_title("Count of Births by Gestation Weeks") + ax.set_xlabel("Gestation Weeks") + ax.set_ylabel("Count") # [END bigquery_jupyter_plot_births_by_gestation] diff --git a/packages/google-cloud-bigquery/samples/snippets/natality_tutorial.py b/packages/google-cloud-bigquery/samples/snippets/natality_tutorial.py index 6a097add3b09..b2b607b0daf5 100644 --- a/packages/google-cloud-bigquery/samples/snippets/natality_tutorial.py +++ b/packages/google-cloud-bigquery/samples/snippets/natality_tutorial.py @@ -37,7 +37,7 @@ def run_natality_tutorial(override_values={}): client = bigquery.Client() # Prepare a reference to a new dataset for storing the query results. - dataset_id = 'natality_regression' + dataset_id = "natality_regression" # [END bigquery_query_natality_tutorial] # To facilitate testing, we replace values with alternatives # provided by the testing harness. @@ -51,7 +51,7 @@ def run_natality_tutorial(override_values={}): # In the new BigQuery dataset, create a reference to a new table for # storing the query results. - table_ref = dataset.table('regression_input') + table_ref = dataset.table("regression_input") # Configure the query job. job_config = bigquery.QueryJobConfig() @@ -83,5 +83,5 @@ def run_natality_tutorial(override_values={}): # [END bigquery_query_natality_tutorial] -if __name__ == '__main__': +if __name__ == "__main__": run_natality_tutorial() diff --git a/packages/google-cloud-bigquery/samples/snippets/natality_tutorial_test.py b/packages/google-cloud-bigquery/samples/snippets/natality_tutorial_test.py index 785df59df81a..fae72fa46852 100644 --- a/packages/google-cloud-bigquery/samples/snippets/natality_tutorial_test.py +++ b/packages/google-cloud-bigquery/samples/snippets/natality_tutorial_test.py @@ -20,7 +20,7 @@ import natality_tutorial -@pytest.fixture(scope='module') +@pytest.fixture(scope="module") def client(): return bigquery.Client() @@ -35,12 +35,16 @@ def datasets_to_delete(client): def test_natality_tutorial(client, datasets_to_delete): override_values = { - "dataset_id": "natality_regression_{}".format(str(uuid.uuid4()).replace("-", "_")), + "dataset_id": "natality_regression_{}".format( + str(uuid.uuid4()).replace("-", "_") + ), } datasets_to_delete.append(override_values["dataset_id"]) natality_tutorial.run_natality_tutorial(override_values) - table_ref = bigquery.Dataset(client.dataset(override_values["dataset_id"])).table("regression_input") + table_ref = bigquery.Dataset(client.dataset(override_values["dataset_id"])).table( + "regression_input" + ) table = client.get_table(table_ref) assert table.num_rows > 0 diff --git a/packages/google-cloud-bigquery/samples/snippets/noxfile.py b/packages/google-cloud-bigquery/samples/snippets/noxfile.py new file mode 100644 index 000000000000..5660f08be441 --- /dev/null +++ b/packages/google-cloud-bigquery/samples/snippets/noxfile.py @@ -0,0 +1,222 @@ +# Copyright 2019 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import print_function + +import os +from pathlib import Path +import sys + +import nox + + +# WARNING - WARNING - WARNING - WARNING - WARNING +# WARNING - WARNING - WARNING - WARNING - WARNING +# DO NOT EDIT THIS FILE EVER! +# WARNING - WARNING - WARNING - WARNING - WARNING +# WARNING - WARNING - WARNING - WARNING - WARNING + +# Copy `noxfile_config.py` to your directory and modify it instead. + + +# `TEST_CONFIG` dict is a configuration hook that allows users to +# modify the test configurations. The values here should be in sync +# with `noxfile_config.py`. Users will copy `noxfile_config.py` into +# their directory and modify it. + +TEST_CONFIG = { + # You can opt out from the test for specific Python versions. + "ignored_versions": ["2.7"], + # An envvar key for determining the project id to use. Change it + # to 'BUILD_SPECIFIC_GCLOUD_PROJECT' if you want to opt in using a + # build specific Cloud project. You can also use your own string + # to use your own Cloud project. + "gcloud_project_env": "GOOGLE_CLOUD_PROJECT", + # 'gcloud_project_env': 'BUILD_SPECIFIC_GCLOUD_PROJECT', + # A dictionary you want to inject into your test. Don't put any + # secrets here. These values will override predefined values. + "envs": {}, +} + + +try: + # Ensure we can import noxfile_config in the project's directory. + sys.path.append(".") + from noxfile_config import TEST_CONFIG_OVERRIDE +except ImportError as e: + print("No user noxfile_config found: detail: {}".format(e)) + TEST_CONFIG_OVERRIDE = {} + +# Update the TEST_CONFIG with the user supplied values. +TEST_CONFIG.update(TEST_CONFIG_OVERRIDE) + + +def get_pytest_env_vars(): + """Returns a dict for pytest invocation.""" + ret = {} + + # Override the GCLOUD_PROJECT and the alias. + env_key = TEST_CONFIG["gcloud_project_env"] + # This should error out if not set. + ret["GOOGLE_CLOUD_PROJECT"] = os.environ[env_key] + + # Apply user supplied envs. + ret.update(TEST_CONFIG["envs"]) + return ret + + +# DO NOT EDIT - automatically generated. +# All versions used to tested samples. +ALL_VERSIONS = ["2.7", "3.6", "3.7", "3.8"] + +# Any default versions that should be ignored. +IGNORED_VERSIONS = TEST_CONFIG["ignored_versions"] + +TESTED_VERSIONS = sorted([v for v in ALL_VERSIONS if v not in IGNORED_VERSIONS]) + +INSTALL_LIBRARY_FROM_SOURCE = bool(os.environ.get("INSTALL_LIBRARY_FROM_SOURCE", False)) +# +# Style Checks +# + + +def _determine_local_import_names(start_dir): + """Determines all import names that should be considered "local". + + This is used when running the linter to insure that import order is + properly checked. + """ + file_ext_pairs = [os.path.splitext(path) for path in os.listdir(start_dir)] + return [ + basename + for basename, extension in file_ext_pairs + if extension == ".py" + or os.path.isdir(os.path.join(start_dir, basename)) + and basename not in ("__pycache__") + ] + + +# Linting with flake8. +# +# We ignore the following rules: +# E203: whitespace before ‘:’ +# E266: too many leading ‘#’ for block comment +# E501: line too long +# I202: Additional newline in a section of imports +# +# We also need to specify the rules which are ignored by default: +# ['E226', 'W504', 'E126', 'E123', 'W503', 'E24', 'E704', 'E121'] +FLAKE8_COMMON_ARGS = [ + "--show-source", + "--builtin=gettext", + "--max-complexity=20", + "--import-order-style=google", + "--exclude=.nox,.cache,env,lib,generated_pb2,*_pb2.py,*_pb2_grpc.py", + "--ignore=E121,E123,E126,E203,E226,E24,E266,E501,E704,W503,W504,I202", + "--max-line-length=88", +] + + +@nox.session +def lint(session): + session.install("flake8", "flake8-import-order") + + local_names = _determine_local_import_names(".") + args = FLAKE8_COMMON_ARGS + [ + "--application-import-names", + ",".join(local_names), + ".", + ] + session.run("flake8", *args) + + +# +# Sample Tests +# + + +PYTEST_COMMON_ARGS = ["--junitxml=sponge_log.xml"] + + +def _session_tests(session, post_install=None): + """Runs py.test for a particular project.""" + if os.path.exists("requirements.txt"): + session.install("-r", "requirements.txt") + + if os.path.exists("requirements-test.txt"): + session.install("-r", "requirements-test.txt") + + if INSTALL_LIBRARY_FROM_SOURCE: + session.install("-e", _get_repo_root()) + + if post_install: + post_install(session) + + session.run( + "pytest", + *(PYTEST_COMMON_ARGS + session.posargs), + # Pytest will return 5 when no tests are collected. This can happen + # on travis where slow and flaky tests are excluded. + # See http://doc.pytest.org/en/latest/_modules/_pytest/main.html + success_codes=[0, 5], + env=get_pytest_env_vars() + ) + + +@nox.session(python=ALL_VERSIONS) +def py(session): + """Runs py.test for a sample using the specified version of Python.""" + if session.python in TESTED_VERSIONS: + _session_tests(session) + else: + session.skip( + "SKIPPED: {} tests are disabled for this sample.".format(session.python) + ) + + +# +# Readmegen +# + + +def _get_repo_root(): + """ Returns the root folder of the project. """ + # Get root of this repository. Assume we don't have directories nested deeper than 10 items. + p = Path(os.getcwd()) + for i in range(10): + if p is None: + break + if Path(p / ".git").exists(): + return str(p) + p = p.parent + raise Exception("Unable to detect repository root.") + + +GENERATED_READMES = sorted([x for x in Path(".").rglob("*.rst.in")]) + + +@nox.session +@nox.parametrize("path", GENERATED_READMES) +def readmegen(session, path): + """(Re-)generates the readme for a sample.""" + session.install("jinja2", "pyyaml") + dir_ = os.path.dirname(path) + + if os.path.exists(os.path.join(dir_, "requirements.txt")): + session.install("-r", os.path.join(dir_, "requirements.txt")) + + in_file = os.path.join(dir_, "README.rst.in") + session.run( + "python", _get_repo_root() + "/scripts/readme-gen/readme_gen.py", in_file + ) diff --git a/packages/google-cloud-bigquery/samples/snippets/noxfile_config.py b/packages/google-cloud-bigquery/samples/snippets/noxfile_config.py index cfd0d439150c..6ecfa40e0b8f 100644 --- a/packages/google-cloud-bigquery/samples/snippets/noxfile_config.py +++ b/packages/google-cloud-bigquery/samples/snippets/noxfile_config.py @@ -22,16 +22,14 @@ TEST_CONFIG_OVERRIDE = { # You can opt out from the test for specific Python versions. - 'ignored_versions': ["2.7"], - + "ignored_versions": ["2.7"], # An envvar key for determining the project id to use. Change it # to 'BUILD_SPECIFIC_GCLOUD_PROJECT' if you want to opt in using a # build specific Cloud project. You can also use your own string # to use your own Cloud project. # 'gcloud_project_env': 'GOOGLE_CLOUD_PROJECT', - 'gcloud_project_env': 'BUILD_SPECIFIC_GCLOUD_PROJECT', - + "gcloud_project_env": "BUILD_SPECIFIC_GCLOUD_PROJECT", # A dictionary you want to inject into your test. Don't put any # secrets here. These values will override predefined values. - 'envs': {}, + "envs": {}, } diff --git a/packages/google-cloud-bigquery/samples/snippets/quickstart.py b/packages/google-cloud-bigquery/samples/snippets/quickstart.py index cb6dcc303df2..56d6fd843c3a 100644 --- a/packages/google-cloud-bigquery/samples/snippets/quickstart.py +++ b/packages/google-cloud-bigquery/samples/snippets/quickstart.py @@ -24,7 +24,7 @@ def run_quickstart(override_values={}): bigquery_client = bigquery.Client() # The name for the new dataset - dataset_id = 'my_new_dataset' + dataset_id = "my_new_dataset" # [END bigquery_quickstart] # To facilitate testing, we replace values with alternatives @@ -39,9 +39,9 @@ def run_quickstart(override_values={}): # Creates the new dataset dataset = bigquery_client.create_dataset(dataset) - print('Dataset {} created.'.format(dataset.dataset_id)) + print("Dataset {} created.".format(dataset.dataset_id)) # [END bigquery_quickstart] -if __name__ == '__main__': +if __name__ == "__main__": run_quickstart() diff --git a/packages/google-cloud-bigquery/samples/snippets/quickstart_test.py b/packages/google-cloud-bigquery/samples/snippets/quickstart_test.py index 2b461a8f27b6..a5e3a13e3c81 100644 --- a/packages/google-cloud-bigquery/samples/snippets/quickstart_test.py +++ b/packages/google-cloud-bigquery/samples/snippets/quickstart_test.py @@ -22,10 +22,10 @@ # Must match the dataset listed in quickstart.py (there's no easy way to # extract this). -DATASET_ID = 'my_new_dataset' +DATASET_ID = "my_new_dataset" -@pytest.fixture(scope='module') +@pytest.fixture(scope="module") def client(): return bigquery.Client() diff --git a/packages/google-cloud-bigquery/samples/snippets/simple_app.py b/packages/google-cloud-bigquery/samples/snippets/simple_app.py index a09e97f12467..c21ae86f4a94 100644 --- a/packages/google-cloud-bigquery/samples/snippets/simple_app.py +++ b/packages/google-cloud-bigquery/samples/snippets/simple_app.py @@ -18,6 +18,7 @@ # [START bigquery_simple_app_all] # [START bigquery_simple_app_deps] from google.cloud import bigquery + # [END bigquery_simple_app_deps] @@ -26,7 +27,8 @@ def query_stackoverflow(): client = bigquery.Client() # [END bigquery_simple_app_client] # [START bigquery_simple_app_query] - query_job = client.query(""" + query_job = client.query( + """ SELECT CONCAT( 'https://stackoverflow.com/questions/', @@ -35,7 +37,8 @@ def query_stackoverflow(): FROM `bigquery-public-data.stackoverflow.posts_questions` WHERE tags like '%google-bigquery%' ORDER BY view_count DESC - LIMIT 10""") + LIMIT 10""" + ) results = query_job.result() # Waits for job to complete. # [END bigquery_simple_app_query] @@ -46,6 +49,6 @@ def query_stackoverflow(): # [END bigquery_simple_app_print] -if __name__ == '__main__': +if __name__ == "__main__": query_stackoverflow() # [END bigquery_simple_app_all] diff --git a/packages/google-cloud-bigquery/samples/snippets/simple_app_test.py b/packages/google-cloud-bigquery/samples/snippets/simple_app_test.py index 33f9f1adf69a..5c608e1fdc53 100644 --- a/packages/google-cloud-bigquery/samples/snippets/simple_app_test.py +++ b/packages/google-cloud-bigquery/samples/snippets/simple_app_test.py @@ -18,4 +18,4 @@ def test_query_stackoverflow(capsys): simple_app.query_stackoverflow() out, _ = capsys.readouterr() - assert 'views' in out + assert "views" in out diff --git a/packages/google-cloud-bigquery/samples/snippets/user_credentials.py b/packages/google-cloud-bigquery/samples/snippets/user_credentials.py index 4917fdd3a414..6089d9fd9da1 100644 --- a/packages/google-cloud-bigquery/samples/snippets/user_credentials.py +++ b/packages/google-cloud-bigquery/samples/snippets/user_credentials.py @@ -36,8 +36,8 @@ def main(project, launch_browser=True): # such as over SSH or from a remote Jupyter notebook. appflow = flow.InstalledAppFlow.from_client_secrets_file( - 'client_secrets.json', - scopes=['https://www.googleapis.com/auth/bigquery']) + "client_secrets.json", scopes=["https://www.googleapis.com/auth/bigquery"] + ) if launch_browser: appflow.run_local_server() @@ -69,21 +69,21 @@ def main(project, launch_browser=True): # Print the results. for row in query_job.result(): # Wait for the job to complete. - print("{}: {}".format(row['name'], row['total'])) + print("{}: {}".format(row["name"], row["total"])) # [END bigquery_auth_user_query] -if __name__ == '__main__': +if __name__ == "__main__": parser = argparse.ArgumentParser( - description=__doc__, - formatter_class=argparse.RawDescriptionHelpFormatter) + description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter + ) parser.add_argument( - '--launch-browser', - help='Use a local server flow to authenticate. ', - action='store_true') - parser.add_argument('project', help='Project to use for BigQuery billing.') + "--launch-browser", + help="Use a local server flow to authenticate. ", + action="store_true", + ) + parser.add_argument("project", help="Project to use for BigQuery billing.") args = parser.parse_args() - main( - args.project, launch_browser=args.launch_browser) + main(args.project, launch_browser=args.launch_browser) diff --git a/packages/google-cloud-bigquery/samples/snippets/user_credentials_test.py b/packages/google-cloud-bigquery/samples/snippets/user_credentials_test.py index 494903c75a11..829502d25fc7 100644 --- a/packages/google-cloud-bigquery/samples/snippets/user_credentials_test.py +++ b/packages/google-cloud-bigquery/samples/snippets/user_credentials_test.py @@ -21,13 +21,12 @@ from user_credentials import main -PROJECT = os.environ['GOOGLE_CLOUD_PROJECT'] +PROJECT = os.environ["GOOGLE_CLOUD_PROJECT"] @pytest.fixture def mock_flow(): - flow_patch = mock.patch( - 'google_auth_oauthlib.flow.InstalledAppFlow', autospec=True) + flow_patch = mock.patch("google_auth_oauthlib.flow.InstalledAppFlow", autospec=True) with flow_patch as flow_mock: flow_mock.from_client_secrets_file.return_value = flow_mock @@ -39,4 +38,4 @@ def test_auth_query_console(mock_flow, capsys): main(PROJECT, launch_browser=False) out, _ = capsys.readouterr() # Fun fact: William P. Wood was the 1st director of the US Secret Service. - assert 'William' in out + assert "William" in out diff --git a/packages/google-cloud-bigquery/synth.metadata b/packages/google-cloud-bigquery/synth.metadata index f131790f2b51..b7e46157bd72 100644 --- a/packages/google-cloud-bigquery/synth.metadata +++ b/packages/google-cloud-bigquery/synth.metadata @@ -3,23 +3,22 @@ { "git": { "name": ".", - "remote": "git@github.com:googleapis/python-bigquery", - "sha": "0946a5c460b0d675f6dbe4f053a7801edba36443" + "remote": "git@github.com:googleapis/python-bigquery.git", + "sha": "416c0daf40e481c80fb5327b48baa915f0e7aa2f" } }, { "git": { - "name": "googleapis", - "remote": "https://github.com/googleapis/googleapis.git", - "sha": "e6ab0a55f2195169feded73dd684574dd4bd9dfa", - "internalRef": "319180144" + "name": "synthtool", + "remote": "https://github.com/googleapis/synthtool.git", + "sha": "b8ff6a41e195cda91bbfb20d9f11d5e58b7cc218" } }, { "git": { "name": "synthtool", "remote": "https://github.com/googleapis/synthtool.git", - "sha": "303271797a360f8a439203413f13a160f2f5b3b4" + "sha": "b8ff6a41e195cda91bbfb20d9f11d5e58b7cc218" } } ], diff --git a/packages/google-cloud-bigquery/synth.py b/packages/google-cloud-bigquery/synth.py index 2bc3798ea444..5125c398ece1 100644 --- a/packages/google-cloud-bigquery/synth.py +++ b/packages/google-cloud-bigquery/synth.py @@ -16,6 +16,7 @@ import synthtool as s from synthtool import gcp +from synthtool.languages import python gapic = gcp.GAPICBazel() common = gcp.CommonTemplates() @@ -58,11 +59,18 @@ # ---------------------------------------------------------------------------- # Add templated files # ---------------------------------------------------------------------------- -templated_files = common.py_library(cov_level=100) +templated_files = common.py_library(cov_level=100, samples=True) # BigQuery has a custom multiprocessing note s.move(templated_files, excludes=["noxfile.py", "docs/multiprocessing.rst"]) +# ---------------------------------------------------------------------------- +# Samples templates +# ---------------------------------------------------------------------------- + +python.py_samples() + + s.replace( "docs/conf.py", r'\{"members": True\}', From edd5c1f91408e4c552f9d3c2bcc679c39e7b1f0c Mon Sep 17 00:00:00 2001 From: Peter Lamut Date: Tue, 21 Jul 2020 16:51:19 +0200 Subject: [PATCH 0894/2016] chore: change project env variable to use in tests --- .../google-cloud-bigquery/samples/snippets/noxfile_config.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/packages/google-cloud-bigquery/samples/snippets/noxfile_config.py b/packages/google-cloud-bigquery/samples/snippets/noxfile_config.py index 6ecfa40e0b8f..7d2e02346071 100644 --- a/packages/google-cloud-bigquery/samples/snippets/noxfile_config.py +++ b/packages/google-cloud-bigquery/samples/snippets/noxfile_config.py @@ -27,8 +27,8 @@ # to 'BUILD_SPECIFIC_GCLOUD_PROJECT' if you want to opt in using a # build specific Cloud project. You can also use your own string # to use your own Cloud project. - # 'gcloud_project_env': 'GOOGLE_CLOUD_PROJECT', - "gcloud_project_env": "BUILD_SPECIFIC_GCLOUD_PROJECT", + "gcloud_project_env": "GOOGLE_CLOUD_PROJECT", + # "gcloud_project_env": "BUILD_SPECIFIC_GCLOUD_PROJECT", # A dictionary you want to inject into your test. Don't put any # secrets here. These values will override predefined values. "envs": {}, From beaa34f9d6f37fdce9cc479a9349f3a6cd0db598 Mon Sep 17 00:00:00 2001 From: Peter Lamut Date: Tue, 21 Jul 2020 16:54:19 +0200 Subject: [PATCH 0895/2016] chore: fix samples tests dependencies --- .../google-cloud-bigquery/samples/snippets/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/samples/snippets/requirements.txt b/packages/google-cloud-bigquery/samples/snippets/requirements.txt index fe0f79e2139f..5de21f7e9450 100644 --- a/packages/google-cloud-bigquery/samples/snippets/requirements.txt +++ b/packages/google-cloud-bigquery/samples/snippets/requirements.txt @@ -1,4 +1,4 @@ -google-cloud-bigquery[pandas]==1.25.0 +google-cloud-bigquery[pandas,bqstorage,pyarrow]==1.25.0 google-auth-oauthlib==0.4.1 ipython==7.16.1 matplotlib==3.3.0 From b7b420245b7827c5f5761ba29b9f7eb624be9a81 Mon Sep 17 00:00:00 2001 From: Peter Lamut Date: Tue, 21 Jul 2020 17:01:38 +0200 Subject: [PATCH 0896/2016] chore: exclude orphan samples doc files from TOC --- packages/google-cloud-bigquery/docs/conf.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/docs/conf.py b/packages/google-cloud-bigquery/docs/conf.py index 332b81b10268..251e1f4caed8 100644 --- a/packages/google-cloud-bigquery/docs/conf.py +++ b/packages/google-cloud-bigquery/docs/conf.py @@ -90,7 +90,12 @@ # List of patterns, relative to source directory, that match files and # directories to ignore when looking for source files. -exclude_patterns = ["_build"] +exclude_patterns = [ + "_build", + "samples/AUTHORING_GUIDE.md", + "samples/CONTRIBUTING.md", + "samples/snippets/README.rst", +] # The reST default role (used for this markup: `text`) to use for all # documents. From 01708b456cb55218bd595123dac515b2e65c35d2 Mon Sep 17 00:00:00 2001 From: Peter Lamut Date: Wed, 22 Jul 2020 08:36:02 +0200 Subject: [PATCH 0897/2016] chore: skip samples/snippets in top level snippets test --- packages/google-cloud-bigquery/noxfile.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/noxfile.py b/packages/google-cloud-bigquery/noxfile.py index b2d26568ccaf..bb6a10e1e2cf 100644 --- a/packages/google-cloud-bigquery/noxfile.py +++ b/packages/google-cloud-bigquery/noxfile.py @@ -113,8 +113,10 @@ def snippets(session): session.install("-e", ".[all]") # Run py.test against the snippets tests. + # Skip tests in samples/snippets, as those are run in a different session + # using the nox config from that directory. session.run("py.test", os.path.join("docs", "snippets.py"), *session.posargs) - session.run("py.test", "samples", *session.posargs) + session.run("py.test", "samples", "--ignore=samples/snippets", *session.posargs) @nox.session(python="3.8") From 551f2a1f43dc50a59b420688b10f0bb9974d76c8 Mon Sep 17 00:00:00 2001 From: Peter Lamut Date: Thu, 23 Jul 2020 17:23:45 +0200 Subject: [PATCH 0898/2016] fix: RowIterator.to_arrow() error when BQ Storage client cannot be created (#181) * fix: to_arrow() when can't create BQ Storage client * Clarify using BQ Storage client by default --- packages/google-cloud-bigquery/CHANGELOG.md | 2 +- .../google/cloud/bigquery/table.py | 2 +- .../tests/unit/test_table.py | 27 +++++++++++++++++++ 3 files changed, 29 insertions(+), 2 deletions(-) diff --git a/packages/google-cloud-bigquery/CHANGELOG.md b/packages/google-cloud-bigquery/CHANGELOG.md index 6da5b01a098d..b1adaa8cf1cd 100644 --- a/packages/google-cloud-bigquery/CHANGELOG.md +++ b/packages/google-cloud-bigquery/CHANGELOG.md @@ -9,7 +9,7 @@ ### Features -* use BigQuery Storage client by default ([#55](https://www.github.com/googleapis/python-bigquery/issues/55)) ([e75ff82](https://www.github.com/googleapis/python-bigquery/commit/e75ff8297c65981545b097f75a17cf9e78ac6772)), closes [#91](https://www.github.com/googleapis/python-bigquery/issues/91) +* use BigQuery Storage client by default (if dependencies available) ([#55](https://www.github.com/googleapis/python-bigquery/issues/55)) ([e75ff82](https://www.github.com/googleapis/python-bigquery/commit/e75ff8297c65981545b097f75a17cf9e78ac6772)), closes [#91](https://www.github.com/googleapis/python-bigquery/issues/91) * **bigquery:** add __eq__ method for class PartitionRange and RangePartitioning ([#162](https://www.github.com/googleapis/python-bigquery/issues/162)) ([0d2a88d](https://www.github.com/googleapis/python-bigquery/commit/0d2a88d8072154cfc9152afd6d26a60ddcdfbc73)) * **bigquery:** expose date_as_object parameter to users ([#150](https://www.github.com/googleapis/python-bigquery/issues/150)) ([a2d5ce9](https://www.github.com/googleapis/python-bigquery/commit/a2d5ce9e97992318d7dc85c51c053cab74e25a11)) * **bigquery:** expose date_as_object parameter to users ([#150](https://www.github.com/googleapis/python-bigquery/issues/150)) ([cbd831e](https://www.github.com/googleapis/python-bigquery/commit/cbd831e08024a67148723afd49e1db085e0a862c)) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py index f1575ffb2af1..10b4198d3260 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py @@ -1534,8 +1534,8 @@ def to_arrow( owns_bqstorage_client = False if not bqstorage_client and create_bqstorage_client: - owns_bqstorage_client = True bqstorage_client = self.client._create_bqstorage_client() + owns_bqstorage_client = bqstorage_client is not None try: progress_bar = self._get_progress_bar(progress_bar_type) diff --git a/packages/google-cloud-bigquery/tests/unit/test_table.py b/packages/google-cloud-bigquery/tests/unit/test_table.py index 3aabebb77f11..2c9d0f64e466 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_table.py +++ b/packages/google-cloud-bigquery/tests/unit/test_table.py @@ -1990,6 +1990,33 @@ def test_to_arrow_w_bqstorage_creates_client(self): mock_client._create_bqstorage_client.assert_called_once() bqstorage_client.transport.channel.close.assert_called_once() + @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") + def test_to_arrow_create_bqstorage_client_wo_bqstorage(self): + from google.cloud.bigquery.schema import SchemaField + + schema = [ + SchemaField("name", "STRING", mode="REQUIRED"), + SchemaField("age", "INTEGER", mode="REQUIRED"), + ] + rows = [ + {"f": [{"v": "Alice"}, {"v": "98"}]}, + {"f": [{"v": "Bob"}, {"v": "99"}]}, + ] + path = "/foo" + api_request = mock.Mock(return_value={"rows": rows}) + + mock_client = _mock_client() + mock_client._create_bqstorage_client.return_value = None + row_iterator = self._make_one(mock_client, api_request, path, schema) + + tbl = row_iterator.to_arrow(create_bqstorage_client=True) + + # The client attempted to create a BQ Storage client, and even though + # that was not possible, results were still returned without errors. + mock_client._create_bqstorage_client.assert_called_once() + self.assertIsInstance(tbl, pyarrow.Table) + self.assertEqual(tbl.num_rows, 2) + @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") @unittest.skipIf( bigquery_storage_v1 is None, "Requires `google-cloud-bigquery-storage`" From 40dd7f852d4e0691169579c65e942d79cf29ce2b Mon Sep 17 00:00:00 2001 From: Tres Seaver Date: Fri, 24 Jul 2020 13:18:11 -0400 Subject: [PATCH 0899/2016] chore(packaging): prep for grmp-1.0.0 release (#189) Fixes #188 --- packages/google-cloud-bigquery/setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/setup.py b/packages/google-cloud-bigquery/setup.py index 497853be01d1..0ac3a8598db0 100644 --- a/packages/google-cloud-bigquery/setup.py +++ b/packages/google-cloud-bigquery/setup.py @@ -32,7 +32,7 @@ 'enum34; python_version < "3.4"', "google-api-core >= 1.21.0, < 2.0dev", "google-cloud-core >= 1.1.0, < 2.0dev", - "google-resumable-media >= 0.5.0, < 0.6dev", + "google-resumable-media >= 0.5.0, < 2.0dev", "six >=1.13.0,< 2.0.0dev", ] extras = { From 4c37512b6e567be1d7fc2d6256d9798c418f9d9a Mon Sep 17 00:00:00 2001 From: Peter Lamut Date: Sat, 25 Jul 2020 23:14:21 +0200 Subject: [PATCH 0900/2016] test: adjust assertions about pyarrow column values (#192) --- .../tests/unit/test__pandas_helpers.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/packages/google-cloud-bigquery/tests/unit/test__pandas_helpers.py b/packages/google-cloud-bigquery/tests/unit/test__pandas_helpers.py index 6adf098c03c8..4f4b5f447ab1 100644 --- a/packages/google-cloud-bigquery/tests/unit/test__pandas_helpers.py +++ b/packages/google-cloud-bigquery/tests/unit/test__pandas_helpers.py @@ -1226,10 +1226,10 @@ def test_download_arrow_tabledata_list_unknown_field_type(module_under_test): assert len(result.columns) == 2 col = result.columns[0] assert type(col) is pyarrow.lib.Int64Array - assert list(col) == [1, 10, 100] + assert col.to_pylist() == [1, 10, 100] col = result.columns[1] assert type(col) is pyarrow.lib.DoubleArray - assert list(col) == [2.2, 22.22, 222.222] + assert col.to_pylist() == [2.2, 22.22, 222.222] @pytest.mark.skipif(isinstance(pyarrow, mock.Mock), reason="Requires `pyarrow`") @@ -1261,10 +1261,10 @@ def test_download_arrow_tabledata_list_known_field_type(module_under_test): assert len(result.columns) == 2 col = result.columns[0] assert type(col) is pyarrow.lib.Int64Array - assert list(col) == [1, 10, 100] + assert col.to_pylist() == [1, 10, 100] col = result.columns[1] assert type(col) is pyarrow.lib.StringArray - assert list(col) == ["2.2", "22.22", "222.222"] + assert col.to_pylist() == ["2.2", "22.22", "222.222"] @pytest.mark.skipif(isinstance(pyarrow, mock.Mock), reason="Requires `pyarrow`") @@ -1288,10 +1288,10 @@ def test_download_arrow_tabledata_list_dict_sequence_schema(module_under_test): assert len(result.columns) == 2 col = result.columns[0] assert type(col) is pyarrow.lib.Int64Array - assert list(col) == [1, 10, 100] + assert col.to_pylist() == [1, 10, 100] col = result.columns[1] assert type(col) is pyarrow.lib.StringArray - assert list(col) == ["2.2", "22.22", "222.222"] + assert col.to_pylist() == ["2.2", "22.22", "222.222"] @pytest.mark.skipif(pandas is None, reason="Requires `pandas`") From 3f2d089679d35c9af069dc442d36c51e60a23661 Mon Sep 17 00:00:00 2001 From: "release-please[bot]" <55107282+release-please[bot]@users.noreply.github.com> Date: Mon, 27 Jul 2020 12:40:55 -0700 Subject: [PATCH 0901/2016] chore: release 1.26.1 (#179) * chore: updated CHANGELOG.md [ci skip] * chore: updated setup.cfg [ci skip] * chore: updated setup.py Co-authored-by: release-please[bot] <55107282+release-please[bot]@users.noreply.github.com> Co-authored-by: Seth Hollyman --- packages/google-cloud-bigquery/CHANGELOG.md | 16 ++++++++++++++++ packages/google-cloud-bigquery/setup.py | 2 +- 2 files changed, 17 insertions(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/CHANGELOG.md b/packages/google-cloud-bigquery/CHANGELOG.md index b1adaa8cf1cd..f7286e9bf902 100644 --- a/packages/google-cloud-bigquery/CHANGELOG.md +++ b/packages/google-cloud-bigquery/CHANGELOG.md @@ -4,6 +4,22 @@ [1]: https://pypi.org/project/google-cloud-bigquery/#history +### [1.26.1](https://www.github.com/googleapis/python-bigquery/compare/v1.26.0...v1.26.1) (2020-07-25) + +### Documentation + +* Migrated code samples from + https://github.com/GoogleCloudPlatform/python-docs-samples + +### Bug Fixes + +* RowIterator.to_arrow() error when BQ Storage client cannot be created ([#181](https://www.github.com/googleapis/python-bigquery/issues/181)) ([7afa3d7](https://www.github.com/googleapis/python-bigquery/commit/7afa3d70f8564dcdacda2b9acbbd7207b50b186e)) + +### Dependencies + +* Updated version constraints on grmp dependency in anticipation of 1.0.0 release + ([#189](https://github.com/googleapis/python-bigquery/pull/189)) + ## [1.26.0](https://www.github.com/googleapis/python-bigquery/compare/v1.25.0...v1.26.0) (2020-07-20) diff --git a/packages/google-cloud-bigquery/setup.py b/packages/google-cloud-bigquery/setup.py index 0ac3a8598db0..61e836a737e3 100644 --- a/packages/google-cloud-bigquery/setup.py +++ b/packages/google-cloud-bigquery/setup.py @@ -22,7 +22,7 @@ name = "google-cloud-bigquery" description = "Google BigQuery API client library" -version = "1.26.0" +version = "1.26.1" # Should be one of: # 'Development Status :: 3 - Alpha' # 'Development Status :: 4 - Beta' From 9581598111bcfed6f7751c1ac0cfaeec9be9a950 Mon Sep 17 00:00:00 2001 From: Tres Seaver Date: Tue, 28 Jul 2020 17:05:48 -0400 Subject: [PATCH 0902/2016] tests: avoid use of systest envvars in unit tests (#195) Fixes #194 --- packages/google-cloud-bigquery/tests/unit/test_client.py | 4 ++-- packages/google-cloud-bigquery/tests/unit/test_magics.py | 9 ++++++++- 2 files changed, 10 insertions(+), 3 deletions(-) diff --git a/packages/google-cloud-bigquery/tests/unit/test_client.py b/packages/google-cloud-bigquery/tests/unit/test_client.py index 2c4c1342c7a7..1f4d584b95d1 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_client.py +++ b/packages/google-cloud-bigquery/tests/unit/test_client.py @@ -221,7 +221,7 @@ def test__call_api_applying_custom_retry_on_timeout(self): from concurrent.futures import TimeoutError from google.cloud.bigquery.retry import DEFAULT_RETRY - client = self._make_one() + client = self._make_one(project=self.PROJECT) api_request_patcher = mock.patch.object( client._connection, "api_request", side_effect=[TimeoutError, "result"], @@ -674,7 +674,7 @@ def test_create_bqstorage_client(self): mock_client.assert_called_once_with(credentials=creds) def test_create_bqstorage_client_missing_dependency(self): - client = self._make_one() + client = self._make_one(project=self.PROJECT) def fail_bqstorage_import(name, globals, locals, fromlist, level): # NOTE: *very* simplified, assuming a straightforward absolute import diff --git a/packages/google-cloud-bigquery/tests/unit/test_magics.py b/packages/google-cloud-bigquery/tests/unit/test_magics.py index a42592e3c560..bd52f3f5306e 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_magics.py +++ b/packages/google-cloud-bigquery/tests/unit/test_magics.py @@ -772,9 +772,16 @@ def test_bigquery_magic_w_missing_query(): ip.extension_manager.load_extension("google.cloud.bigquery") magics.context._project = None + credentials_mock = mock.create_autospec( + google.auth.credentials.Credentials, instance=True + ) + default_patch = mock.patch( + "google.auth.default", return_value=(credentials_mock, "general-project") + ) + cell_body = " \n \n \t\t \n " - with io.capture_output() as captured_io: + with io.capture_output() as captured_io, default_patch: ip.run_cell_magic("bigquery", "df", cell_body) output = captured_io.stderr From f9968d2d68def5ffa791a44b66e5964f61d8f2a0 Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Tue, 28 Jul 2020 16:44:03 -0500 Subject: [PATCH 0903/2016] doc: update CHANGELOG for version 1.10.0 (#167) I kept getting frustrated that I couldn't find when `exists_ok` was added without looking at the commit history. --- packages/google-cloud-bigquery/CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/CHANGELOG.md b/packages/google-cloud-bigquery/CHANGELOG.md index f7286e9bf902..a209dbaf5b18 100644 --- a/packages/google-cloud-bigquery/CHANGELOG.md +++ b/packages/google-cloud-bigquery/CHANGELOG.md @@ -482,7 +482,7 @@ ### New Features -- Add options to ignore errors when creating/deleting datasets/tables. ([#7491](https://github.com/googleapis/google-cloud-python/pull/7491)) +- Add `exists_ok` and `not_found_ok` options to ignore errors when creating/deleting datasets/tables. ([#7491](https://github.com/googleapis/google-cloud-python/pull/7491)) - Accept a string in Table and Dataset constructors. ([#7483](https://github.com/googleapis/google-cloud-python/pull/7483)) ### Documentation From f5bb04d5954a86c5e0efeffae53eca9c572124bf Mon Sep 17 00:00:00 2001 From: WhiteSource Renovate Date: Wed, 29 Jul 2020 00:26:04 +0200 Subject: [PATCH 0904/2016] chore(deps): update dependency google-cloud-bigquery to v1.26.1 (#180) This PR contains the following updates: | Package | Update | Change | |---|---|---| | [google-cloud-bigquery](https://togithub.com/googleapis/python-bigquery) | minor | `==1.25.0` -> `==1.26.1` | --- ### Release Notes
googleapis/python-bigquery ### [`v1.26.1`](https://togithub.com/googleapis/python-bigquery/blob/master/CHANGELOG.md#​1261-httpswwwgithubcomgoogleapispython-bigquerycomparev1260v1261-2020-07-25) [Compare Source](https://togithub.com/googleapis/python-bigquery/compare/v1.26.0...v1.26.1) ### [`v1.26.0`](https://togithub.com/googleapis/python-bigquery/blob/master/CHANGELOG.md#​1260-httpswwwgithubcomgoogleapispython-bigquerycomparev1250v1260-2020-07-20) [Compare Source](https://togithub.com/googleapis/python-bigquery/compare/v1.25.0...v1.26.0) ##### Features - use BigQuery Storage client by default (if dependencies available) ([#​55](https://www.github.com/googleapis/python-bigquery/issues/55)) ([e75ff82](https://www.github.com/googleapis/python-bigquery/commit/e75ff8297c65981545b097f75a17cf9e78ac6772)), closes [#​91](https://www.github.com/googleapis/python-bigquery/issues/91) - **bigquery:** add **eq** method for class PartitionRange and RangePartitioning ([#​162](https://www.github.com/googleapis/python-bigquery/issues/162)) ([0d2a88d](https://www.github.com/googleapis/python-bigquery/commit/0d2a88d8072154cfc9152afd6d26a60ddcdfbc73)) - **bigquery:** expose date_as_object parameter to users ([#​150](https://www.github.com/googleapis/python-bigquery/issues/150)) ([a2d5ce9](https://www.github.com/googleapis/python-bigquery/commit/a2d5ce9e97992318d7dc85c51c053cab74e25a11)) - **bigquery:** expose date_as_object parameter to users ([#​150](https://www.github.com/googleapis/python-bigquery/issues/150)) ([cbd831e](https://www.github.com/googleapis/python-bigquery/commit/cbd831e08024a67148723afd49e1db085e0a862c)) ##### Bug Fixes - dry run queries with DB API cursor ([#​128](https://www.github.com/googleapis/python-bigquery/issues/128)) ([bc33a67](https://www.github.com/googleapis/python-bigquery/commit/bc33a678a765f0232615aa2038b8cc67c88468a0)) - omit `NaN` values when uploading from `insert_rows_from_dataframe` ([#​170](https://www.github.com/googleapis/python-bigquery/issues/170)) ([f9f2f45](https://www.github.com/googleapis/python-bigquery/commit/f9f2f45bc009c03cd257441bd4b6beb1754e2177)) ##### Documentation - **bigquery:** add client thread-safety documentation ([#​132](https://www.github.com/googleapis/python-bigquery/issues/132)) ([fce76b3](https://www.github.com/googleapis/python-bigquery/commit/fce76b3776472b1da798df862a3405e659e35bab)) - **bigquery:** add docstring for conflict exception ([#​171](https://www.github.com/googleapis/python-bigquery/issues/171)) ([9c3409b](https://www.github.com/googleapis/python-bigquery/commit/9c3409bb06218bf499620544f8e92802df0cce47)) - **bigquery:** consistent use of optional keyword ([#​153](https://www.github.com/googleapis/python-bigquery/issues/153)) ([79d8c61](https://www.github.com/googleapis/python-bigquery/commit/79d8c61064cca18b596a24b6f738c7611721dd5c)) - **bigquery:** fix the broken docs ([#​139](https://www.github.com/googleapis/python-bigquery/issues/139)) ([3235255](https://www.github.com/googleapis/python-bigquery/commit/3235255cc5f483949f34d2e8ef13b372e8713782))
--- ### Renovate configuration :date: **Schedule**: At any time (no schedule defined). :vertical_traffic_light: **Automerge**: Disabled by config. Please merge this manually once you are satisfied. :recycle: **Rebasing**: Whenever PR becomes conflicted, or you tick the rebase/retry checkbox. :no_bell: **Ignore**: Close this PR and you won't be reminded about this update again. --- - [ ] If you want to rebase/retry this PR, check this box --- This PR has been generated by [WhiteSource Renovate](https://renovate.whitesourcesoftware.com). View repository job log [here](https://app.renovatebot.com/dashboard#googleapis/python-bigquery). --- .../google-cloud-bigquery/samples/snippets/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/samples/snippets/requirements.txt b/packages/google-cloud-bigquery/samples/snippets/requirements.txt index 5de21f7e9450..f67eb3587526 100644 --- a/packages/google-cloud-bigquery/samples/snippets/requirements.txt +++ b/packages/google-cloud-bigquery/samples/snippets/requirements.txt @@ -1,4 +1,4 @@ -google-cloud-bigquery[pandas,bqstorage,pyarrow]==1.25.0 +google-cloud-bigquery[pandas,bqstorage,pyarrow]==1.26.1 google-auth-oauthlib==0.4.1 ipython==7.16.1 matplotlib==3.3.0 From 10c6e20228632ce0fa621693a0be7fba20a58598 Mon Sep 17 00:00:00 2001 From: WhiteSource Renovate Date: Wed, 29 Jul 2020 05:06:04 +0200 Subject: [PATCH 0905/2016] chore(deps): update dependency llvmlite to <=0.33.0 for python >= 3.6 (#185) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This PR contains the following updates: | Package | Update | Change | |---|---|---| | [llvmlite](http://llvmlite.pydata.org) ([source](https://togithub.com/numba/llvmlite)) | minor | ` <= 0.31.0` -> `<=0.33.0` | --- ### Release Notes
numba/llvmlite ### [`v0.33.0`](https://togithub.com/numba/llvmlite/blob/master/CHANGE_LOG#v0330-June-10-2020) [Compare Source](https://togithub.com/numba/llvmlite/compare/v0.32.1...v0.33.0) This release upgrades to LLVM 9 and drops support for older LLVM versions. Pull requests: - PR [#​593](https://togithub.com/numba/llvmlite/issues/593): Fix CUDA with LLVM9 - PR [#​592](https://togithub.com/numba/llvmlite/issues/592): Fix meta.yaml - PR [#​591](https://togithub.com/numba/llvmlite/issues/591): buildscripts: Unpin wheel - PR [#​590](https://togithub.com/numba/llvmlite/issues/590): add python_requires to setup.py - PR [#​582](https://togithub.com/numba/llvmlite/issues/582): Adds override for LLVM version check, re-formats docs. - PR [#​581](https://togithub.com/numba/llvmlite/issues/581): Add FAQ entry on LLVM version support. - PR [#​580](https://togithub.com/numba/llvmlite/issues/580): Trove classifiers may be out of date. - PR [#​577](https://togithub.com/numba/llvmlite/issues/577): llvmlite wheel building fixes - PR [#​575](https://togithub.com/numba/llvmlite/issues/575): Update the release date - PR [#​548](https://togithub.com/numba/llvmlite/issues/548): Upgrade to LLVM9 - PR [#​521](https://togithub.com/numba/llvmlite/issues/521): Allow instructions to be removed from blocks Authors: - Graham Markall - Jan Vesely - Siu Kwan Lam (core dev) - Stuart Archibald (core dev) - Tim Babb - Valentin Haenel (core dev) ### [`v0.32.1`](https://togithub.com/numba/llvmlite/blob/master/CHANGE_LOG#v0321-May-7-2020) [Compare Source](https://togithub.com/numba/llvmlite/compare/v0.32.0...v0.32.1) This is a small patch release that addresses some packaging issues: Pull requests: - PR 580: Trove classifiers may be out of date. - PR 581: Add FAQ entry on LLVM version support. - PR 582: Adds override for LLVM version check, re-formats docs. Authors: - Stuart Archibald (core dev) - Valentin Haenel (core dev) ### [`v0.32.0`](https://togithub.com/numba/llvmlite/blob/master/CHANGE_LOG#v0320-Apr-16-2020) [Compare Source](https://togithub.com/numba/llvmlite/compare/v0.31.0...v0.32.0) The main changes in this release are the removal of specific code for Python 2 and Python <3.6, and making the code base PEP8 compliant. Pull requests: - PR [#​577](https://togithub.com/numba/llvmlite/issues/577): llvmlite wheel building fixes - PR [#​560](https://togithub.com/numba/llvmlite/issues/560): ENH: Better error message - PR [#​558](https://togithub.com/numba/llvmlite/issues/558): update install docs - PR [#​556](https://togithub.com/numba/llvmlite/issues/556): binding: Allow empty features list - PR [#​555](https://togithub.com/numba/llvmlite/issues/555): travis: Cleanup - PR [#​554](https://togithub.com/numba/llvmlite/issues/554): azure-pipelines: Bump VM images. - PR [#​552](https://togithub.com/numba/llvmlite/issues/552): Add paragraph on installing from sdist and on non-traditional platforms. - PR [#​551](https://togithub.com/numba/llvmlite/issues/551): Remove python 2, python < 3.6, fix up, add flake8 - PR [#​549](https://togithub.com/numba/llvmlite/issues/549): Miscalled method and missing parameter in the documentation - PR [#​547](https://togithub.com/numba/llvmlite/issues/547): Permit building on Visual Studio 2017 - PR [#​543](https://togithub.com/numba/llvmlite/issues/543): Update error message in LLVM version check. - PR [#​540](https://togithub.com/numba/llvmlite/issues/540): update to final release date for 0.31.0 Authors: - Arik Funke - Eric Larson - Jan Vesely - Shan Sikdar - Siu Kwan Lam (core dev) - Stan Seibert (core dev) - Stuart Archibald (core dev) - Vladislav Hrčka
--- ### Renovate configuration :date: **Schedule**: At any time (no schedule defined). :vertical_traffic_light: **Automerge**: Disabled by config. Please merge this manually once you are satisfied. :recycle: **Rebasing**: Renovate will not automatically rebase this PR, because other commits have been found. :no_bell: **Ignore**: Close this PR and you won't be reminded about this update again. --- - [ ] If you want to rebase/retry this PR, check this box --- This PR has been generated by [WhiteSource Renovate](https://renovate.whitesourcesoftware.com). View repository job log [here](https://app.renovatebot.com/dashboard#googleapis/python-bigquery). --- packages/google-cloud-bigquery/setup.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/setup.py b/packages/google-cloud-bigquery/setup.py index 61e836a737e3..f391143d344a 100644 --- a/packages/google-cloud-bigquery/setup.py +++ b/packages/google-cloud-bigquery/setup.py @@ -59,7 +59,8 @@ # llvmlite >= 0.32.0 cannot be installed on Python 3.5 and below # (building the wheel fails), thus needs to be restricted. # See: https://github.com/googleapis/python-bigquery/issues/78 - "llvmlite <= 0.31.0", + "llvmlite <= 0.33.0;python_version>='3.6'", + "llvmlite <= 0.31.0;python_version<'3.6'", ], } From 3638fa79cf4066cc53cbdbef5442e65d4440a449 Mon Sep 17 00:00:00 2001 From: Steffany Brown <30247553+steffnay@users.noreply.github.com> Date: Thu, 30 Jul 2020 10:55:33 -0700 Subject: [PATCH 0906/2016] feat: add support for getting and setting table IAM policy (#144) --- .../google/cloud/bigquery/client.py | 58 +++++ .../google/cloud/bigquery/iam.py | 38 ++++ .../google-cloud-bigquery/tests/system.py | 49 ++++ .../tests/unit/test_client.py | 210 ++++++++++++++++++ 4 files changed, 355 insertions(+) create mode 100644 packages/google-cloud-bigquery/google/cloud/bigquery/iam.py diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py index a3d1b8846c0a..651f0263e446 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py @@ -46,6 +46,7 @@ import google.api_core.client_options import google.api_core.exceptions +from google.api_core.iam import Policy from google.api_core import page_iterator import google.cloud._helpers from google.cloud import exceptions @@ -605,6 +606,63 @@ def get_dataset(self, dataset_ref, retry=DEFAULT_RETRY, timeout=None): ) return Dataset.from_api_repr(api_response) + def get_iam_policy( + self, table, requested_policy_version=1, retry=DEFAULT_RETRY, timeout=None, + ): + if not isinstance(table, (Table, TableReference)): + raise TypeError("table must be a Table or TableReference") + + if requested_policy_version != 1: + raise ValueError("only IAM policy version 1 is supported") + + body = {"options": {"requestedPolicyVersion": 1}} + + path = "{}:getIamPolicy".format(table.path) + + response = self._call_api( + retry, method="POST", path=path, data=body, timeout=timeout, + ) + + return Policy.from_api_repr(response) + + def set_iam_policy( + self, table, policy, updateMask=None, retry=DEFAULT_RETRY, timeout=None, + ): + if not isinstance(table, (Table, TableReference)): + raise TypeError("table must be a Table or TableReference") + + if not isinstance(policy, (Policy)): + raise TypeError("policy must be a Policy") + + body = {"policy": policy.to_api_repr()} + + if updateMask is not None: + body["updateMask"] = updateMask + + path = "{}:setIamPolicy".format(table.path) + + response = self._call_api( + retry, method="POST", path=path, data=body, timeout=timeout, + ) + + return Policy.from_api_repr(response) + + def test_iam_permissions( + self, table, permissions, retry=DEFAULT_RETRY, timeout=None, + ): + if not isinstance(table, (Table, TableReference)): + raise TypeError("table must be a Table or TableReference") + + body = {"permissions": permissions} + + path = "{}:testIamPermissions".format(table.path) + + response = self._call_api( + retry, method="POST", path=path, data=body, timeout=timeout, + ) + + return response + def get_model(self, model_ref, retry=DEFAULT_RETRY, timeout=None): """[Beta] Fetch the model referenced by ``model_ref``. diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/iam.py b/packages/google-cloud-bigquery/google/cloud/bigquery/iam.py new file mode 100644 index 000000000000..df9db36b756d --- /dev/null +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/iam.py @@ -0,0 +1,38 @@ +# Copyright 2020 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""BigQuery API IAM policy definitions + +For all allowed roles and permissions, see: + +https://cloud.google.com/bigquery/docs/access-control +""" + +# BigQuery-specific IAM roles available for tables and views + +BIGQUERY_DATA_EDITOR_ROLE = "roles/bigquery.dataEditor" +"""When applied to a table or view, this role provides permissions to +read and update data and metadata for the table or view.""" + +BIGQUERY_DATA_OWNER_ROLE = "roles/bigquery.dataOwner" +"""When applied to a table or view, this role provides permissions to +read and update data and metadata for the table or view, share the +table/view, and delete the table/view.""" + +BIGQUERY_DATA_VIEWER_ROLE = "roles/bigquery.dataViewer" +"""When applied to a table or view, this role provides permissions to +read data and metadata from the table or view.""" + +BIGQUERY_METADATA_VIEWER_ROLE = "roles/bigquery.metadataViewer" +"""When applied to a table or view, this role provides persmissions to +read metadata from the table or view.""" diff --git a/packages/google-cloud-bigquery/tests/system.py b/packages/google-cloud-bigquery/tests/system.py index cd5454a876ba..50e2dc7dea80 100644 --- a/packages/google-cloud-bigquery/tests/system.py +++ b/packages/google-cloud-bigquery/tests/system.py @@ -71,6 +71,7 @@ from google.api_core.exceptions import InternalServerError from google.api_core.exceptions import ServiceUnavailable from google.api_core.exceptions import TooManyRequests +from google.api_core.iam import Policy from google.cloud import bigquery from google.cloud import bigquery_v2 from google.cloud.bigquery.dataset import Dataset @@ -1407,6 +1408,54 @@ def test_copy_table(self): got_rows = self._fetch_single_page(dest_table) self.assertTrue(len(got_rows) > 0) + def test_get_set_iam_policy(self): + from google.cloud.bigquery.iam import BIGQUERY_DATA_VIEWER_ROLE + + dataset = self.temp_dataset(_make_dataset_id("create_table")) + table_id = "test_table" + table_ref = Table(dataset.table(table_id)) + self.assertFalse(_table_exists(table_ref)) + + table = retry_403(Config.CLIENT.create_table)(table_ref) + self.to_delete.insert(0, table) + + self.assertTrue(_table_exists(table)) + + member = "serviceAccount:{}".format(Config.CLIENT.get_service_account_email()) + BINDING = { + "role": BIGQUERY_DATA_VIEWER_ROLE, + "members": {member}, + } + + policy = Config.CLIENT.get_iam_policy(table) + self.assertIsInstance(policy, Policy) + self.assertEqual(policy.bindings, []) + + policy.bindings.append(BINDING) + returned_policy = Config.CLIENT.set_iam_policy(table, policy) + self.assertEqual(returned_policy.bindings, policy.bindings) + + def test_test_iam_permissions(self): + dataset = self.temp_dataset(_make_dataset_id("create_table")) + table_id = "test_table" + table_ref = Table(dataset.table(table_id)) + self.assertFalse(_table_exists(table_ref)) + + table = retry_403(Config.CLIENT.create_table)(table_ref) + self.to_delete.insert(0, table) + + self.assertTrue(_table_exists(table)) + + # Test some default permissions. + permissions = [ + "bigquery.tables.get", + "bigquery.tables.getData", + "bigquery.tables.update", + ] + + response = Config.CLIENT.test_iam_permissions(table, [permissions]) + self.assertEqual(set(response["permissions"]), set(permissions)) + def test_job_cancel(self): DATASET_ID = _make_dataset_id("job_cancel") JOB_ID_PREFIX = "fetch_" + DATASET_ID diff --git a/packages/google-cloud-bigquery/tests/unit/test_client.py b/packages/google-cloud-bigquery/tests/unit/test_client.py index 1f4d584b95d1..5687a27ec6d6 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_client.py +++ b/packages/google-cloud-bigquery/tests/unit/test_client.py @@ -1748,6 +1748,216 @@ def test_get_table_sets_user_agent(self): ) self.assertIn("my-application/1.2.3", expected_user_agent) + def test_get_iam_policy(self): + from google.cloud.bigquery.iam import BIGQUERY_DATA_OWNER_ROLE + from google.cloud.bigquery.iam import BIGQUERY_DATA_EDITOR_ROLE + from google.cloud.bigquery.iam import BIGQUERY_DATA_VIEWER_ROLE + from google.api_core.iam import Policy + + PATH = "/projects/{}/datasets/{}/tables/{}:getIamPolicy".format( + self.PROJECT, self.DS_ID, self.TABLE_ID, + ) + BODY = {"options": {"requestedPolicyVersion": 1}} + ETAG = "CARDI" + VERSION = 1 + OWNER1 = "user:phred@example.com" + OWNER2 = "group:cloud-logs@google.com" + EDITOR1 = "domain:google.com" + EDITOR2 = "user:phred@example.com" + VIEWER1 = "serviceAccount:1234-abcdef@service.example.com" + VIEWER2 = "user:phred@example.com" + RETURNED = { + "resourceId": PATH, + "etag": ETAG, + "version": VERSION, + "bindings": [ + {"role": BIGQUERY_DATA_OWNER_ROLE, "members": [OWNER1, OWNER2]}, + {"role": BIGQUERY_DATA_EDITOR_ROLE, "members": [EDITOR1, EDITOR2]}, + {"role": BIGQUERY_DATA_VIEWER_ROLE, "members": [VIEWER1, VIEWER2]}, + ], + } + EXPECTED = { + binding["role"]: set(binding["members"]) for binding in RETURNED["bindings"] + } + + creds = _make_credentials() + http = object() + client = self._make_one(project=self.PROJECT, credentials=creds, _http=http) + conn = client._connection = make_connection(RETURNED) + + policy = client.get_iam_policy(self.TABLE_REF, timeout=7.5) + + conn.api_request.assert_called_once_with( + method="POST", path=PATH, data=BODY, timeout=7.5 + ) + + self.assertIsInstance(policy, Policy) + self.assertEqual(policy.etag, RETURNED["etag"]) + self.assertEqual(policy.version, RETURNED["version"]) + self.assertEqual(dict(policy), EXPECTED) + + def test_get_iam_policy_w_invalid_table(self): + creds = _make_credentials() + http = object() + client = self._make_one(project=self.PROJECT, credentials=creds, _http=http) + + table_resource_string = "projects/{}/datasets/{}/tables/{}".format( + self.PROJECT, self.DS_ID, self.TABLE_ID, + ) + + with self.assertRaises(TypeError): + client.get_iam_policy(table_resource_string) + + def test_get_iam_policy_w_invalid_version(self): + creds = _make_credentials() + http = object() + client = self._make_one(project=self.PROJECT, credentials=creds, _http=http) + + with self.assertRaises(ValueError): + client.get_iam_policy(self.TABLE_REF, requested_policy_version=2) + + def test_set_iam_policy(self): + from google.cloud.bigquery.iam import BIGQUERY_DATA_OWNER_ROLE + from google.cloud.bigquery.iam import BIGQUERY_DATA_EDITOR_ROLE + from google.cloud.bigquery.iam import BIGQUERY_DATA_VIEWER_ROLE + from google.api_core.iam import Policy + + PATH = "/projects/%s/datasets/%s/tables/%s:setIamPolicy" % ( + self.PROJECT, + self.DS_ID, + self.TABLE_ID, + ) + ETAG = "foo" + VERSION = 1 + OWNER1 = "user:phred@example.com" + OWNER2 = "group:cloud-logs@google.com" + EDITOR1 = "domain:google.com" + EDITOR2 = "user:phred@example.com" + VIEWER1 = "serviceAccount:1234-abcdef@service.example.com" + VIEWER2 = "user:phred@example.com" + BINDINGS = [ + {"role": BIGQUERY_DATA_OWNER_ROLE, "members": [OWNER1, OWNER2]}, + {"role": BIGQUERY_DATA_EDITOR_ROLE, "members": [EDITOR1, EDITOR2]}, + {"role": BIGQUERY_DATA_VIEWER_ROLE, "members": [VIEWER1, VIEWER2]}, + ] + MASK = "bindings,etag" + RETURNED = {"etag": ETAG, "version": VERSION, "bindings": BINDINGS} + + policy = Policy() + for binding in BINDINGS: + policy[binding["role"]] = binding["members"] + + BODY = {"policy": policy.to_api_repr(), "updateMask": MASK} + + creds = _make_credentials() + http = object() + client = self._make_one(project=self.PROJECT, credentials=creds, _http=http) + conn = client._connection = make_connection(RETURNED) + + returned_policy = client.set_iam_policy( + self.TABLE_REF, policy, updateMask=MASK, timeout=7.5 + ) + + conn.api_request.assert_called_once_with( + method="POST", path=PATH, data=BODY, timeout=7.5 + ) + self.assertEqual(returned_policy.etag, ETAG) + self.assertEqual(returned_policy.version, VERSION) + self.assertEqual(dict(returned_policy), dict(policy)) + + def test_set_iam_policy_no_mask(self): + from google.api_core.iam import Policy + + PATH = "/projects/%s/datasets/%s/tables/%s:setIamPolicy" % ( + self.PROJECT, + self.DS_ID, + self.TABLE_ID, + ) + RETURNED = {"etag": "foo", "version": 1, "bindings": []} + + policy = Policy() + BODY = {"policy": policy.to_api_repr()} + + creds = _make_credentials() + http = object() + client = self._make_one(project=self.PROJECT, credentials=creds, _http=http) + conn = client._connection = make_connection(RETURNED) + + client.set_iam_policy(self.TABLE_REF, policy, timeout=7.5) + + conn.api_request.assert_called_once_with( + method="POST", path=PATH, data=BODY, timeout=7.5 + ) + + def test_set_iam_policy_invalid_policy(self): + from google.api_core.iam import Policy + + policy = Policy() + invalid_policy_repr = policy.to_api_repr() + + creds = _make_credentials() + http = object() + client = self._make_one(project=self.PROJECT, credentials=creds, _http=http) + + with self.assertRaises(TypeError): + client.set_iam_policy(self.TABLE_REF, invalid_policy_repr) + + def test_set_iam_policy_w_invalid_table(self): + from google.api_core.iam import Policy + + policy = Policy() + + creds = _make_credentials() + http = object() + client = self._make_one(project=self.PROJECT, credentials=creds, _http=http) + + table_resource_string = "projects/%s/datasets/%s/tables/%s" % ( + self.PROJECT, + self.DS_ID, + self.TABLE_ID, + ) + + with self.assertRaises(TypeError): + client.set_iam_policy(table_resource_string, policy) + + def test_test_iam_permissions(self): + PATH = "/projects/%s/datasets/%s/tables/%s:testIamPermissions" % ( + self.PROJECT, + self.DS_ID, + self.TABLE_ID, + ) + + PERMISSIONS = ["bigquery.tables.get", "bigquery.tables.update"] + BODY = {"permissions": PERMISSIONS} + RETURNED = {"permissions": PERMISSIONS} + + creds = _make_credentials() + http = object() + client = self._make_one(project=self.PROJECT, credentials=creds, _http=http) + conn = client._connection = make_connection(RETURNED) + + client.test_iam_permissions(self.TABLE_REF, PERMISSIONS, timeout=7.5) + + conn.api_request.assert_called_once_with( + method="POST", path=PATH, data=BODY, timeout=7.5 + ) + + def test_test_iam_permissions_w_invalid_table(self): + creds = _make_credentials() + http = object() + client = self._make_one(project=self.PROJECT, credentials=creds, _http=http) + + table_resource_string = "projects/%s/datasets/%s/tables/%s" % ( + self.PROJECT, + self.DS_ID, + self.TABLE_ID, + ) + + PERMISSIONS = ["bigquery.tables.get", "bigquery.tables.update"] + + with self.assertRaises(TypeError): + client.test_iam_permissions(table_resource_string, PERMISSIONS) + def test_update_dataset_w_invalid_field(self): from google.cloud.bigquery.dataset import Dataset From 08bd9f2cc1fb4fc8c3fa3c941a7136b972229091 Mon Sep 17 00:00:00 2001 From: Peter Lamut Date: Thu, 30 Jul 2020 22:08:47 +0200 Subject: [PATCH 0907/2016] fix: raise error if inserting rows with unknown fields (#163) Co-authored-by: Tres Seaver --- .../google/cloud/bigquery/_helpers.py | 31 +++++++++++++- .../tests/unit/test__helpers.py | 40 +++++++++++++++++++ 2 files changed, 70 insertions(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py b/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py index d814eec8ca69..47851d42cdf4 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py @@ -19,6 +19,7 @@ import datetime import decimal import re +import six from google.cloud._helpers import UTC from google.cloud._helpers import _date_from_iso8601_date @@ -419,9 +420,23 @@ def _record_field_to_json(fields, row_value): Returns: Mapping[str, Any]: A JSON-serializable dictionary. """ - record = {} isdict = isinstance(row_value, dict) + # If row is passed as a tuple, make the length sanity check to avoid either + # uninformative index errors a few lines below or silently omitting some of + # the values from the result (we cannot know exactly which fields are missing + # or redundant, since we don't have their names). + if not isdict and len(row_value) != len(fields): + msg = "The number of row fields ({}) does not match schema length ({}).".format( + len(row_value), len(fields) + ) + raise ValueError(msg) + + record = {} + + if isdict: + processed_fields = set() + for subindex, subfield in enumerate(fields): subname = subfield.name subvalue = row_value.get(subname) if isdict else row_value[subindex] @@ -430,6 +445,20 @@ def _record_field_to_json(fields, row_value): if subvalue is not None: record[subname] = _field_to_json(subfield, subvalue) + if isdict: + processed_fields.add(subname) + + # Unknown fields should not be silently dropped, include them. Since there + # is no schema information available for them, include them as strings + # to make them JSON-serializable. + if isdict: + not_processed = set(row_value.keys()) - processed_fields + + for field_name in not_processed: + value = row_value[field_name] + if value is not None: + record[field_name] = six.text_type(value) + return record diff --git a/packages/google-cloud-bigquery/tests/unit/test__helpers.py b/packages/google-cloud-bigquery/tests/unit/test__helpers.py index fa6d27c981d8..28ebe81443af 100644 --- a/packages/google-cloud-bigquery/tests/unit/test__helpers.py +++ b/packages/google-cloud-bigquery/tests/unit/test__helpers.py @@ -18,6 +18,7 @@ import unittest import mock +import six class Test_not_null(unittest.TestCase): @@ -847,6 +848,26 @@ def test_w_non_empty_list(self): converted = self._call_fut(fields, original) self.assertEqual(converted, {"one": "42", "two": "two"}) + def test_w_list_missing_fields(self): + fields = [ + _make_field("INT64", name="one", mode="NULLABLE"), + _make_field("STRING", name="two", mode="NULLABLE"), + ] + original = [42] + + with six.assertRaisesRegex(self, ValueError, r".*not match schema length.*"): + self._call_fut(fields, original) + + def test_w_list_too_many_fields(self): + fields = [ + _make_field("INT64", name="one", mode="NULLABLE"), + _make_field("STRING", name="two", mode="NULLABLE"), + ] + original = [42, "two", "three"] + + with six.assertRaisesRegex(self, ValueError, r".*not match schema length.*"): + self._call_fut(fields, original) + def test_w_non_empty_dict(self): fields = [ _make_field("INT64", name="one", mode="NULLABLE"), @@ -890,6 +911,25 @@ def test_w_explicit_none_value(self): # None values should be dropped regardless of the field type self.assertEqual(converted, {"one": "42"}) + def test_w_dict_unknown_fields(self): + fields = [ + _make_field("INT64", name="one", mode="NULLABLE"), + _make_field("STRING", name="two", mode="NULLABLE"), + ] + original = { + "whoami": datetime.date(2020, 7, 20), + "one": 111, + "two": "222", + "void": None, + } + + converted = self._call_fut(fields, original) + + # Unknown fields should be included (if not None), but converted as strings. + self.assertEqual( + converted, {"whoami": "2020-07-20", "one": "111", "two": "222"}, + ) + class Test_field_to_json(unittest.TestCase): def _call_fut(self, field, value): From 06e614868c9f21dc888a1b0098c58bedb80ffaf9 Mon Sep 17 00:00:00 2001 From: Tres Seaver Date: Thu, 30 Jul 2020 17:54:05 -0400 Subject: [PATCH 0908/2016] tests: remove warning spew (#197) Fixes: #196 --- .../tests/unit/test__pandas_helpers.py | 21 +++++++-- .../tests/unit/test_client.py | 25 ++++++---- .../tests/unit/test_job.py | 46 +++++++++++++----- .../tests/unit/test_magics.py | 6 +-- .../tests/unit/test_table.py | 47 ++++++++++++++++--- 5 files changed, 112 insertions(+), 33 deletions(-) diff --git a/packages/google-cloud-bigquery/tests/unit/test__pandas_helpers.py b/packages/google-cloud-bigquery/tests/unit/test__pandas_helpers.py index 4f4b5f447ab1..e229e04a2581 100644 --- a/packages/google-cloud-bigquery/tests/unit/test__pandas_helpers.py +++ b/packages/google-cloud-bigquery/tests/unit/test__pandas_helpers.py @@ -20,6 +20,7 @@ import warnings import mock +import six try: import pandas @@ -299,7 +300,10 @@ def test_bq_to_arrow_data_type_w_struct(module_under_test, bq_type): ) ) assert pyarrow.types.is_struct(actual) - assert actual.num_children == len(fields) + try: + assert actual.num_fields == len(fields) + except AttributeError: # py27 + assert actual.num_children == len(fields) assert actual.equals(expected) @@ -344,7 +348,10 @@ def test_bq_to_arrow_data_type_w_array_struct(module_under_test, bq_type): ) assert pyarrow.types.is_list(actual) assert pyarrow.types.is_struct(actual.value_type) - assert actual.value_type.num_children == len(fields) + try: + assert actual.value_type.num_fields == len(fields) + except AttributeError: # py27 + assert actual.value_type.num_children == len(fields) assert actual.value_type.equals(expected_value_type) @@ -542,9 +549,17 @@ def test_bq_to_arrow_schema_w_unknown_type(module_under_test): # instead. schema.SchemaField("field3", "UNKNOWN_TYPE"), ) - actual = module_under_test.bq_to_arrow_schema(fields) + with warnings.catch_warnings(record=True) as warned: + actual = module_under_test.bq_to_arrow_schema(fields) assert actual is None + if six.PY3: + assert len(warned) == 1 + warning = warned[0] + assert "field3" in str(warning) + else: + assert len(warned) == 0 + @pytest.mark.skipif(pandas is None, reason="Requires `pandas`") def test_get_column_or_index_not_found(module_under_test): diff --git a/packages/google-cloud-bigquery/tests/unit/test_client.py b/packages/google-cloud-bigquery/tests/unit/test_client.py index 5687a27ec6d6..1987082815cb 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_client.py +++ b/packages/google-cloud-bigquery/tests/unit/test_client.py @@ -221,7 +221,8 @@ def test__call_api_applying_custom_retry_on_timeout(self): from concurrent.futures import TimeoutError from google.cloud.bigquery.retry import DEFAULT_RETRY - client = self._make_one(project=self.PROJECT) + creds = _make_credentials() + client = self._make_one(project=self.PROJECT, credentials=creds) api_request_patcher = mock.patch.object( client._connection, "api_request", side_effect=[TimeoutError, "result"], @@ -674,7 +675,8 @@ def test_create_bqstorage_client(self): mock_client.assert_called_once_with(credentials=creds) def test_create_bqstorage_client_missing_dependency(self): - client = self._make_one(project=self.PROJECT) + creds = _make_credentials() + client = self._make_one(project=self.PROJECT, credentials=creds) def fail_bqstorage_import(name, globals, locals, fromlist, level): # NOTE: *very* simplified, assuming a straightforward absolute import @@ -7680,17 +7682,24 @@ def test_load_table_from_dataframe_wo_pyarrow_custom_compression(self): ) with load_patch, get_table_patch, pyarrow_patch, to_parquet_patch as to_parquet_spy: - client.load_table_from_dataframe( - dataframe, - self.TABLE_REF, - location=self.LOCATION, - parquet_compression="gzip", - ) + with warnings.catch_warnings(record=True) as warned: + client.load_table_from_dataframe( + dataframe, + self.TABLE_REF, + location=self.LOCATION, + parquet_compression="gzip", + ) call_args = to_parquet_spy.call_args assert call_args is not None assert call_args.kwargs.get("compression") == "gzip" + assert len(warned) == 2 + warning = warned[0] + assert "Loading dataframe data without pyarrow" in str(warning) + warning = warned[1] + assert "Please install the pyarrow package" in str(warning) + @unittest.skipIf(pandas is None, "Requires `pandas`") @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_load_table_from_dataframe_w_nulls(self): diff --git a/packages/google-cloud-bigquery/tests/unit/test_job.py b/packages/google-cloud-bigquery/tests/unit/test_job.py index 733445337509..9cd3631e1a84 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_job.py +++ b/packages/google-cloud-bigquery/tests/unit/test_job.py @@ -17,6 +17,7 @@ import json import textwrap import unittest +import warnings import freezegun import mock @@ -1834,26 +1835,34 @@ def test_time_partitioning_hit(self): "expirationMs": str(year_ms), "requirePartitionFilter": False, } - expected = TimePartitioning( - type_=TimePartitioningType.DAY, - field=field, - expiration_ms=year_ms, - require_partition_filter=False, - ) + with warnings.catch_warnings(record=True) as warned: + expected = TimePartitioning( + type_=TimePartitioningType.DAY, + field=field, + expiration_ms=year_ms, + require_partition_filter=False, + ) self.assertEqual(config.time_partitioning, expected) + assert len(warned) == 1 + warning = warned[0] + assert "TimePartitioning.require_partition_filter" in str(warning) + def test_time_partitioning_setter(self): from google.cloud.bigquery.table import TimePartitioning from google.cloud.bigquery.table import TimePartitioningType field = "creation_date" year_ms = 86400 * 1000 * 365 - time_partitioning = TimePartitioning( - type_=TimePartitioningType.DAY, - field=field, - expiration_ms=year_ms, - require_partition_filter=False, - ) + + with warnings.catch_warnings(record=True) as warned: + time_partitioning = TimePartitioning( + type_=TimePartitioningType.DAY, + field=field, + expiration_ms=year_ms, + require_partition_filter=False, + ) + config = self._get_target_class()() config.time_partitioning = time_partitioning expected = { @@ -1864,6 +1873,10 @@ def test_time_partitioning_setter(self): } self.assertEqual(config._properties["load"]["timePartitioning"], expected) + assert len(warned) == 1 + warning = warned[0] + assert "TimePartitioning.require_partition_filter" in str(warning) + def test_time_partitioning_setter_w_none(self): from google.cloud.bigquery.table import TimePartitioningType @@ -5595,7 +5608,10 @@ def test_to_dataframe_column_date_dtypes_wo_pyarrow(self): job = self._make_one(self.JOB_ID, self.QUERY, client) with mock.patch("google.cloud.bigquery.table.pyarrow", None): - df = job.to_dataframe(date_as_object=False, create_bqstorage_client=False) + with warnings.catch_warnings(record=True) as warned: + df = job.to_dataframe( + date_as_object=False, create_bqstorage_client=False + ) self.assertIsInstance(df, pandas.DataFrame) self.assertEqual(len(df), 1) # verify the number of rows @@ -5604,6 +5620,10 @@ def test_to_dataframe_column_date_dtypes_wo_pyarrow(self): self.assertEqual(df.date.dtype.name, "object") + assert len(warned) == 1 + warning = warned[0] + assert "without pyarrow" in str(warning) + @unittest.skipIf(pandas is None, "Requires `pandas`") @unittest.skipIf(tqdm is None, "Requires `tqdm`") @mock.patch("tqdm.tqdm") diff --git a/packages/google-cloud-bigquery/tests/unit/test_magics.py b/packages/google-cloud-bigquery/tests/unit/test_magics.py index bd52f3f5306e..7b07626ad9aa 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_magics.py +++ b/packages/google-cloud-bigquery/tests/unit/test_magics.py @@ -399,7 +399,7 @@ def test_bigquery_magic_without_optional_arguments(monkeypatch): # Set up the context with monkeypatch so that it's reset for subsequent # tests. - monkeypatch.setattr(magics.context, "credentials", mock_credentials) + monkeypatch.setattr(magics.context, "_credentials", mock_credentials) # Mock out the BigQuery Storage API. bqstorage_mock = mock.create_autospec(bigquery_storage_v1.BigQueryReadClient) @@ -560,7 +560,7 @@ def test_bigquery_magic_with_bqstorage_from_argument(monkeypatch): # Set up the context with monkeypatch so that it's reset for subsequent # tests. - monkeypatch.setattr(magics.context, "credentials", mock_credentials) + monkeypatch.setattr(magics.context, "_credentials", mock_credentials) # Mock out the BigQuery Storage API. bqstorage_mock = mock.create_autospec(bigquery_storage_v1.BigQueryReadClient) @@ -624,7 +624,7 @@ def test_bigquery_magic_with_rest_client_requested(monkeypatch): # Set up the context with monkeypatch so that it's reset for subsequent # tests. - monkeypatch.setattr(magics.context, "credentials", mock_credentials) + monkeypatch.setattr(magics.context, "_credentials", mock_credentials) # Mock out the BigQuery Storage API. bqstorage_mock = mock.create_autospec(bigquery_storage_v1.BigQueryReadClient) diff --git a/packages/google-cloud-bigquery/tests/unit/test_table.py b/packages/google-cloud-bigquery/tests/unit/test_table.py index 2c9d0f64e466..28575bd43081 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_table.py +++ b/packages/google-cloud-bigquery/tests/unit/test_table.py @@ -1778,7 +1778,8 @@ def test_to_arrow_w_unknown_type(self): api_request = mock.Mock(return_value={"rows": rows}) row_iterator = self._make_one(_mock_client(), api_request, path, schema) - tbl = row_iterator.to_arrow(create_bqstorage_client=False) + with warnings.catch_warnings(record=True) as warned: + tbl = row_iterator.to_arrow(create_bqstorage_client=False) self.assertIsInstance(tbl, pyarrow.Table) self.assertEqual(tbl.num_rows, 2) @@ -1799,6 +1800,10 @@ def test_to_arrow_w_unknown_type(self): self.assertEqual(ages, [33, 29]) self.assertEqual(sports, ["volleyball", "basketball"]) + self.assertEqual(len(warned), 1) + warning = warned[0] + self.assertTrue("sport" in str(warning)) + @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_to_arrow_w_empty_table(self): from google.cloud.bigquery.schema import SchemaField @@ -2370,13 +2375,18 @@ def test_to_dataframe_progress_bar_wo_pyarrow( for progress_bar_type, progress_bar_mock in progress_bars: row_iterator = self._make_one(_mock_client(), api_request, path, schema) with mock.patch("google.cloud.bigquery.table.pyarrow", None): - df = row_iterator.to_dataframe(progress_bar_type=progress_bar_type) + with warnings.catch_warnings(record=True) as warned: + df = row_iterator.to_dataframe(progress_bar_type=progress_bar_type) progress_bar_mock.assert_called() progress_bar_mock().update.assert_called() progress_bar_mock().close.assert_called_once() self.assertEqual(len(df), 4) + self.assertEqual(len(warned), 1) + warning = warned[0] + self.assertTrue("without pyarrow" in str(warning)) + @unittest.skipIf(pandas is None, "Requires `pandas`") @mock.patch("google.cloud.bigquery.table.tqdm", new=None) def test_to_dataframe_no_tqdm_no_progress_bar(self): @@ -2499,12 +2509,17 @@ def test_to_dataframe_w_empty_results_wo_pyarrow(self): api_request = mock.Mock(return_value={"rows": []}) row_iterator = self._make_one(_mock_client(), api_request, schema=schema) - df = row_iterator.to_dataframe() + with warnings.catch_warnings(record=True) as warned: + df = row_iterator.to_dataframe() self.assertIsInstance(df, pandas.DataFrame) self.assertEqual(len(df), 0) # verify the number of rows self.assertEqual(list(df), ["name", "age"]) # verify the column names + self.assertEqual(len(warned), 1) + warning = warned[0] + self.assertTrue("without pyarrow" in str(warning)) + @unittest.skipIf(pandas is None, "Requires `pandas`") def test_to_dataframe_w_no_results_wo_pyarrow(self): from google.cloud.bigquery.schema import SchemaField @@ -2522,12 +2537,17 @@ def empty_iterable(dtypes=None): row_iterator.to_dataframe_iterable = empty_iterable - df = row_iterator.to_dataframe() + with warnings.catch_warnings(record=True) as warned: + df = row_iterator.to_dataframe() self.assertIsInstance(df, pandas.DataFrame) self.assertEqual(len(df), 0) # verify the number of rows self.assertEqual(list(df), ["name", "age"]) # verify the column names + self.assertEqual(len(warned), 1) + warning = warned[0] + self.assertTrue("without pyarrow" in str(warning)) + @unittest.skipIf(pandas is None, "Requires `pandas`") def test_to_dataframe_w_various_types_nullable(self): import datetime @@ -2787,11 +2807,19 @@ def test_to_dataframe_w_bqstorage_v1beta1_no_streams(self): table=mut.TableReference.from_string("proj.dset.tbl"), ) - got = row_iterator.to_dataframe(bqstorage_client) + with warnings.catch_warnings(record=True) as warned: + got = row_iterator.to_dataframe(bqstorage_client) + column_names = ["colA", "colC", "colB"] self.assertEqual(list(got), column_names) self.assertTrue(got.empty) + self.assertEqual(len(warned), 1) + warning = warned[0] + self.assertTrue( + "Support for BigQuery Storage v1beta1 clients is deprecated" in str(warning) + ) + @unittest.skipIf( bigquery_storage_v1 is None, "Requires `google-cloud-bigquery-storage`" ) @@ -3493,7 +3521,10 @@ def test_to_dataframe_concat_categorical_dtype_wo_pyarrow(self): row_iterator = self._make_one(_mock_client(), api_request, path, schema) - with mock.patch("google.cloud.bigquery.table.pyarrow", None): + mock_pyarrow = mock.patch("google.cloud.bigquery.table.pyarrow", None) + catch_warnings = warnings.catch_warnings(record=True) + + with mock_pyarrow, catch_warnings as warned: got = row_iterator.to_dataframe( dtypes={ "col_category": pandas.core.dtypes.dtypes.CategoricalDtype( @@ -3522,6 +3553,10 @@ def test_to_dataframe_concat_categorical_dtype_wo_pyarrow(self): ["low", "medium", "low", "medium", "high", "low"], ) + self.assertEqual(len(warned), 1) + warning = warned[0] + self.assertTrue("without pyarrow" in str(warning)) + class TestPartitionRange(unittest.TestCase): def _get_target_class(self): From 292d8ff3636ead11e5536eef5cd74107cbe09edc Mon Sep 17 00:00:00 2001 From: HemangChothani <50404902+HemangChothani@users.noreply.github.com> Date: Mon, 3 Aug 2020 10:17:00 +0530 Subject: [PATCH 0909/2016] feat: add support and tests for struct fields (#146) * feat(bigquery): add support and tests for struct fields * feat(bigquery): bump pyarrow version for python3 * feat(bigquery): nit --- .../google/cloud/bigquery/_pandas_helpers.py | 15 ++--- packages/google-cloud-bigquery/setup.py | 6 +- .../google-cloud-bigquery/tests/system.py | 44 ++++++++++++++ .../tests/unit/test_client.py | 58 +++++++++++++++---- 4 files changed, 102 insertions(+), 21 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/_pandas_helpers.py b/packages/google-cloud-bigquery/google/cloud/bigquery/_pandas_helpers.py index ff6525399a52..953b7d0fe199 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/_pandas_helpers.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/_pandas_helpers.py @@ -287,13 +287,14 @@ def dataframe_to_bq_schema(dataframe, bq_schema): """ if bq_schema: bq_schema = schema._to_schema_fields(bq_schema) - for field in bq_schema: - if field.field_type in schema._STRUCT_TYPES: - raise ValueError( - "Uploading dataframes with struct (record) column types " - "is not supported. See: " - "https://github.com/googleapis/google-cloud-python/issues/8191" - ) + if six.PY2: + for field in bq_schema: + if field.field_type in schema._STRUCT_TYPES: + raise ValueError( + "Uploading dataframes with struct (record) column types " + "is not supported under Python2. See: " + "https://github.com/googleapis/python-bigquery/issues/21" + ) bq_schema_index = {field.name: field for field in bq_schema} bq_schema_unused = set(bq_schema_index.keys()) else: diff --git a/packages/google-cloud-bigquery/setup.py b/packages/google-cloud-bigquery/setup.py index f391143d344a..b00b2cbe5fb4 100644 --- a/packages/google-cloud-bigquery/setup.py +++ b/packages/google-cloud-bigquery/setup.py @@ -47,10 +47,8 @@ ], "pandas": ["pandas>=0.17.1"], # Exclude PyArrow dependency from Windows Python 2.7. - 'pyarrow: platform_system != "Windows" or python_version >= "3.4"': [ - # Bad Linux release for 0.14.0. - # https://issues.apache.org/jira/browse/ARROW-5868 - "pyarrow>=0.4.1, != 0.14.0" + 'pyarrow: platform_system != "Windows" or python_version >= "3.5"': [ + "pyarrow>=0.17.0" ], "tqdm": ["tqdm >= 4.0.0, <5.0.0dev"], "fastparquet": [ diff --git a/packages/google-cloud-bigquery/tests/system.py b/packages/google-cloud-bigquery/tests/system.py index 50e2dc7dea80..be79a6d20780 100644 --- a/packages/google-cloud-bigquery/tests/system.py +++ b/packages/google-cloud-bigquery/tests/system.py @@ -131,6 +131,8 @@ PANDAS_MINIMUM_VERSION = pkg_resources.parse_version("1.0.0") PANDAS_INSTALLED_VERSION = pkg_resources.get_distribution("pandas").parsed_version +PYARROW_MINIMUM_VERSION = pkg_resources.parse_version("0.17.0") +PYARROW_INSTALLED_VERSION = pkg_resources.get_distribution("pyarrow").parsed_version def _has_rows(result): @@ -1075,6 +1077,48 @@ def test_load_table_from_dataframe_w_explicit_schema(self): self.assertEqual(tuple(table.schema), table_schema) self.assertEqual(table.num_rows, 3) + @unittest.skipIf( + pyarrow is None or PYARROW_INSTALLED_VERSION < PYARROW_MINIMUM_VERSION, + "Only `pyarrow version >=0.17.0` is supported", + ) + @unittest.skipIf(pandas is None, "Requires `pandas`") + def test_load_table_from_dataframe_w_struct_datatype(self): + """Test that a DataFrame with struct datatype can be uploaded if a + BigQuery schema is specified. + + https://github.com/googleapis/python-bigquery/issues/21 + """ + dataset_id = _make_dataset_id("bq_load_test") + self.temp_dataset(dataset_id) + table_id = "{}.{}.load_table_from_dataframe_w_struct_datatype".format( + Config.CLIENT.project, dataset_id + ) + table_schema = [ + bigquery.SchemaField( + "bar", + "RECORD", + fields=[ + bigquery.SchemaField("id", "INTEGER", mode="REQUIRED"), + bigquery.SchemaField("age", "INTEGER", mode="REQUIRED"), + ], + mode="REQUIRED", + ), + ] + table = retry_403(Config.CLIENT.create_table)( + Table(table_id, schema=table_schema) + ) + self.to_delete.insert(0, table) + + df_data = [{"id": 1, "age": 21}, {"id": 2, "age": 22}, {"id": 2, "age": 23}] + dataframe = pandas.DataFrame(data={"bar": df_data}, columns=["bar"]) + + load_job = Config.CLIENT.load_table_from_dataframe(dataframe, table_id) + load_job.result() + + table = Config.CLIENT.get_table(table_id) + self.assertEqual(table.schema, table_schema) + self.assertEqual(table.num_rows, 3) + def test_load_table_from_json_basic_use(self): table_schema = ( bigquery.SchemaField("name", "STRING", mode="REQUIRED"), diff --git a/packages/google-cloud-bigquery/tests/unit/test_client.py b/packages/google-cloud-bigquery/tests/unit/test_client.py index 1987082815cb..8b63f7e57dd0 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_client.py +++ b/packages/google-cloud-bigquery/tests/unit/test_client.py @@ -7373,19 +7373,22 @@ def test_load_table_from_dataframe_w_nullable_int64_datatype_automatic_schema(se @unittest.skipIf(pandas is None, "Requires `pandas`") @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") - def test_load_table_from_dataframe_struct_fields_error(self): + def test_load_table_from_dataframe_struct_fields(self): + from google.cloud.bigquery.client import _DEFAULT_NUM_RETRIES from google.cloud.bigquery import job from google.cloud.bigquery.schema import SchemaField client = self._make_client() - records = [{"float_column": 3.14, "struct_column": [{"foo": 1}, {"bar": -1}]}] - dataframe = pandas.DataFrame(data=records) + records = [(3.14, {"foo": 1, "bar": 1})] + dataframe = pandas.DataFrame( + data=records, columns=["float_column", "struct_column"] + ) schema = [ SchemaField("float_column", "FLOAT"), SchemaField( - "agg_col", + "struct_column", "RECORD", fields=[SchemaField("foo", "INTEGER"), SchemaField("bar", "INTEGER")], ), @@ -7396,14 +7399,49 @@ def test_load_table_from_dataframe_struct_fields_error(self): "google.cloud.bigquery.client.Client.load_table_from_file", autospec=True ) - with pytest.raises(ValueError) as exc_info, load_patch: - client.load_table_from_dataframe( - dataframe, self.TABLE_REF, job_config=job_config, location=self.LOCATION + if six.PY2: + with pytest.raises(ValueError) as exc_info, load_patch: + client.load_table_from_dataframe( + dataframe, + self.TABLE_REF, + job_config=job_config, + location=self.LOCATION, + ) + + err_msg = str(exc_info.value) + assert "struct" in err_msg + assert "not support" in err_msg + + else: + get_table_patch = mock.patch( + "google.cloud.bigquery.client.Client.get_table", + autospec=True, + side_effect=google.api_core.exceptions.NotFound("Table not found"), + ) + with load_patch as load_table_from_file, get_table_patch: + client.load_table_from_dataframe( + dataframe, + self.TABLE_REF, + job_config=job_config, + location=self.LOCATION, + ) + + load_table_from_file.assert_called_once_with( + client, + mock.ANY, + self.TABLE_REF, + num_retries=_DEFAULT_NUM_RETRIES, + rewind=True, + job_id=mock.ANY, + job_id_prefix=None, + location=self.LOCATION, + project=None, + job_config=mock.ANY, ) - err_msg = str(exc_info.value) - assert "struct" in err_msg - assert "not support" in err_msg + sent_config = load_table_from_file.mock_calls[0][2]["job_config"] + assert sent_config.source_format == job.SourceFormat.PARQUET + assert sent_config.schema == schema @unittest.skipIf(pandas is None, "Requires `pandas`") @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") From 8c3c11e9253f9e987caa4f696be09e44f41447d4 Mon Sep 17 00:00:00 2001 From: Takashi Matsuo Date: Tue, 4 Aug 2020 09:39:42 -0700 Subject: [PATCH 0910/2016] testing: split system tests into separate builds (#207) --- packages/google-cloud-bigquery/.gitignore | 3 +- .../google-cloud-bigquery/.kokoro/build.sh | 8 +- .../.kokoro/docker/docs/Dockerfile | 98 ++++ .../.kokoro/docker/docs/fetch_gpg_keys.sh | 45 ++ .../.kokoro/docs/common.cfg | 21 +- .../.kokoro/docs/docs-presubmit.cfg | 17 + .../.kokoro/presubmit/presubmit.cfg | 8 +- .../.kokoro/presubmit/system-2.7.cfg | 7 + .../.kokoro/presubmit/system-3.8.cfg | 7 + .../.kokoro/publish-docs.sh | 39 +- .../.kokoro/trampoline_v2.sh | 487 ++++++++++++++++++ packages/google-cloud-bigquery/.trampolinerc | 51 ++ packages/google-cloud-bigquery/docs/conf.py | 4 + .../proto/encryption_config_pb2.py | 2 +- .../cloud/bigquery_v2/proto/model_pb2.py | 2 +- .../bigquery_v2/proto/model_reference_pb2.py | 2 +- .../bigquery_v2/proto/standard_sql_pb2.py | 2 +- packages/google-cloud-bigquery/noxfile.py | 37 ++ packages/google-cloud-bigquery/synth.metadata | 18 +- packages/google-cloud-bigquery/synth.py | 2 +- 20 files changed, 823 insertions(+), 37 deletions(-) create mode 100644 packages/google-cloud-bigquery/.kokoro/docker/docs/Dockerfile create mode 100755 packages/google-cloud-bigquery/.kokoro/docker/docs/fetch_gpg_keys.sh create mode 100644 packages/google-cloud-bigquery/.kokoro/docs/docs-presubmit.cfg create mode 100644 packages/google-cloud-bigquery/.kokoro/presubmit/system-2.7.cfg create mode 100644 packages/google-cloud-bigquery/.kokoro/presubmit/system-3.8.cfg create mode 100755 packages/google-cloud-bigquery/.kokoro/trampoline_v2.sh create mode 100644 packages/google-cloud-bigquery/.trampolinerc diff --git a/packages/google-cloud-bigquery/.gitignore b/packages/google-cloud-bigquery/.gitignore index b87e1ed580d9..b9daa52f118d 100644 --- a/packages/google-cloud-bigquery/.gitignore +++ b/packages/google-cloud-bigquery/.gitignore @@ -46,6 +46,7 @@ pip-log.txt # Built documentation docs/_build bigquery/docs/generated +docs.metadata # Virtual environment env/ @@ -57,4 +58,4 @@ system_tests/local_test_setup # Make sure a generated file isn't accidentally committed. pylintrc -pylintrc.test \ No newline at end of file +pylintrc.test diff --git a/packages/google-cloud-bigquery/.kokoro/build.sh b/packages/google-cloud-bigquery/.kokoro/build.sh index d3749e290e28..0e71e2aca650 100755 --- a/packages/google-cloud-bigquery/.kokoro/build.sh +++ b/packages/google-cloud-bigquery/.kokoro/build.sh @@ -36,4 +36,10 @@ python3.6 -m pip uninstall --yes --quiet nox-automation python3.6 -m pip install --upgrade --quiet nox python3.6 -m nox --version -python3.6 -m nox +# If NOX_SESSION is set, it only runs the specified session, +# otherwise run all the sessions. +if [[ -n "${NOX_SESSION:-}" ]]; then + python3.6 -m nox -s "${NOX_SESSION:-}" +else + python3.6 -m nox +fi diff --git a/packages/google-cloud-bigquery/.kokoro/docker/docs/Dockerfile b/packages/google-cloud-bigquery/.kokoro/docker/docs/Dockerfile new file mode 100644 index 000000000000..412b0b56a921 --- /dev/null +++ b/packages/google-cloud-bigquery/.kokoro/docker/docs/Dockerfile @@ -0,0 +1,98 @@ +# Copyright 2020 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from ubuntu:20.04 + +ENV DEBIAN_FRONTEND noninteractive + +# Ensure local Python is preferred over distribution Python. +ENV PATH /usr/local/bin:$PATH + +# Install dependencies. +RUN apt-get update \ + && apt-get install -y --no-install-recommends \ + apt-transport-https \ + build-essential \ + ca-certificates \ + curl \ + dirmngr \ + git \ + gpg-agent \ + graphviz \ + libbz2-dev \ + libdb5.3-dev \ + libexpat1-dev \ + libffi-dev \ + liblzma-dev \ + libreadline-dev \ + libsnappy-dev \ + libssl-dev \ + libsqlite3-dev \ + portaudio19-dev \ + redis-server \ + software-properties-common \ + ssh \ + sudo \ + tcl \ + tcl-dev \ + tk \ + tk-dev \ + uuid-dev \ + wget \ + zlib1g-dev \ + && add-apt-repository universe \ + && apt-get update \ + && apt-get -y install jq \ + && apt-get clean autoclean \ + && apt-get autoremove -y \ + && rm -rf /var/lib/apt/lists/* \ + && rm -f /var/cache/apt/archives/*.deb + + +COPY fetch_gpg_keys.sh /tmp +# Install the desired versions of Python. +RUN set -ex \ + && export GNUPGHOME="$(mktemp -d)" \ + && echo "disable-ipv6" >> "${GNUPGHOME}/dirmngr.conf" \ + && /tmp/fetch_gpg_keys.sh \ + && for PYTHON_VERSION in 3.7.8 3.8.5; do \ + wget --no-check-certificate -O python-${PYTHON_VERSION}.tar.xz "https://www.python.org/ftp/python/${PYTHON_VERSION%%[a-z]*}/Python-$PYTHON_VERSION.tar.xz" \ + && wget --no-check-certificate -O python-${PYTHON_VERSION}.tar.xz.asc "https://www.python.org/ftp/python/${PYTHON_VERSION%%[a-z]*}/Python-$PYTHON_VERSION.tar.xz.asc" \ + && gpg --batch --verify python-${PYTHON_VERSION}.tar.xz.asc python-${PYTHON_VERSION}.tar.xz \ + && rm -r python-${PYTHON_VERSION}.tar.xz.asc \ + && mkdir -p /usr/src/python-${PYTHON_VERSION} \ + && tar -xJC /usr/src/python-${PYTHON_VERSION} --strip-components=1 -f python-${PYTHON_VERSION}.tar.xz \ + && rm python-${PYTHON_VERSION}.tar.xz \ + && cd /usr/src/python-${PYTHON_VERSION} \ + && ./configure \ + --enable-shared \ + # This works only on Python 2.7 and throws a warning on every other + # version, but seems otherwise harmless. + --enable-unicode=ucs4 \ + --with-system-ffi \ + --without-ensurepip \ + && make -j$(nproc) \ + && make install \ + && ldconfig \ + ; done \ + && rm -rf "${GNUPGHOME}" \ + && rm -rf /usr/src/python* \ + && rm -rf ~/.cache/ + +RUN wget -O /tmp/get-pip.py 'https://bootstrap.pypa.io/get-pip.py' \ + && python3.7 /tmp/get-pip.py \ + && python3.8 /tmp/get-pip.py \ + && rm /tmp/get-pip.py + +CMD ["python3.7"] diff --git a/packages/google-cloud-bigquery/.kokoro/docker/docs/fetch_gpg_keys.sh b/packages/google-cloud-bigquery/.kokoro/docker/docs/fetch_gpg_keys.sh new file mode 100755 index 000000000000..d653dd868e4b --- /dev/null +++ b/packages/google-cloud-bigquery/.kokoro/docker/docs/fetch_gpg_keys.sh @@ -0,0 +1,45 @@ +#!/bin/bash +# Copyright 2020 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# A script to fetch gpg keys with retry. +# Avoid jinja parsing the file. +# + +function retry { + if [[ "${#}" -le 1 ]]; then + echo "Usage: ${0} retry_count commands.." + exit 1 + fi + local retries=${1} + local command="${@:2}" + until [[ "${retries}" -le 0 ]]; do + $command && return 0 + if [[ $? -ne 0 ]]; then + echo "command failed, retrying" + ((retries--)) + fi + done + return 1 +} + +# 3.6.9, 3.7.5 (Ned Deily) +retry 3 gpg --keyserver ha.pool.sks-keyservers.net --recv-keys \ + 0D96DF4D4110E5C43FBFB17F2D347EA6AA65421D + +# 3.8.0 (Łukasz Langa) +retry 3 gpg --keyserver ha.pool.sks-keyservers.net --recv-keys \ + E3FF2839C048B25C084DEBE9B26995E310250568 + +# diff --git a/packages/google-cloud-bigquery/.kokoro/docs/common.cfg b/packages/google-cloud-bigquery/.kokoro/docs/common.cfg index 229abf075515..8f9807f722a4 100644 --- a/packages/google-cloud-bigquery/.kokoro/docs/common.cfg +++ b/packages/google-cloud-bigquery/.kokoro/docs/common.cfg @@ -11,12 +11,12 @@ action { gfile_resources: "/bigstore/cloud-devrel-kokoro-resources/trampoline" # Use the trampoline script to run in docker. -build_file: "python-bigquery/.kokoro/trampoline.sh" +build_file: "python-bigquery/.kokoro/trampoline_v2.sh" # Configure the docker image for kokoro-trampoline. env_vars: { key: "TRAMPOLINE_IMAGE" - value: "gcr.io/cloud-devrel-kokoro-resources/python-multi" + value: "gcr.io/cloud-devrel-kokoro-resources/python-lib-docs" } env_vars: { key: "TRAMPOLINE_BUILD_FILE" @@ -28,6 +28,23 @@ env_vars: { value: "docs-staging" } +env_vars: { + key: "V2_STAGING_BUCKET" + value: "docs-staging-v2-staging" +} + +# It will upload the docker image after successful builds. +env_vars: { + key: "TRAMPOLINE_IMAGE_UPLOAD" + value: "true" +} + +# It will always build the docker image. +env_vars: { + key: "TRAMPOLINE_DOCKERFILE" + value: ".kokoro/docker/docs/Dockerfile" +} + # Fetch the token needed for reporting release status to GitHub before_action { fetch_keystore { diff --git a/packages/google-cloud-bigquery/.kokoro/docs/docs-presubmit.cfg b/packages/google-cloud-bigquery/.kokoro/docs/docs-presubmit.cfg new file mode 100644 index 000000000000..1118107829b7 --- /dev/null +++ b/packages/google-cloud-bigquery/.kokoro/docs/docs-presubmit.cfg @@ -0,0 +1,17 @@ +# Format: //devtools/kokoro/config/proto/build.proto + +env_vars: { + key: "STAGING_BUCKET" + value: "gcloud-python-test" +} + +env_vars: { + key: "V2_STAGING_BUCKET" + value: "gcloud-python-test" +} + +# We only upload the image in the main `docs` build. +env_vars: { + key: "TRAMPOLINE_IMAGE_UPLOAD" + value: "false" +} diff --git a/packages/google-cloud-bigquery/.kokoro/presubmit/presubmit.cfg b/packages/google-cloud-bigquery/.kokoro/presubmit/presubmit.cfg index 8f43917d92fe..b158096f0ae2 100644 --- a/packages/google-cloud-bigquery/.kokoro/presubmit/presubmit.cfg +++ b/packages/google-cloud-bigquery/.kokoro/presubmit/presubmit.cfg @@ -1 +1,7 @@ -# Format: //devtools/kokoro/config/proto/build.proto \ No newline at end of file +# Format: //devtools/kokoro/config/proto/build.proto + +# Disable system tests. +env_vars: { + key: "RUN_SYSTEM_TESTS" + value: "false" +} diff --git a/packages/google-cloud-bigquery/.kokoro/presubmit/system-2.7.cfg b/packages/google-cloud-bigquery/.kokoro/presubmit/system-2.7.cfg new file mode 100644 index 000000000000..3b6523a197cc --- /dev/null +++ b/packages/google-cloud-bigquery/.kokoro/presubmit/system-2.7.cfg @@ -0,0 +1,7 @@ +# Format: //devtools/kokoro/config/proto/build.proto + +# Only run this nox session. +env_vars: { + key: "NOX_SESSION" + value: "system-2.7" +} \ No newline at end of file diff --git a/packages/google-cloud-bigquery/.kokoro/presubmit/system-3.8.cfg b/packages/google-cloud-bigquery/.kokoro/presubmit/system-3.8.cfg new file mode 100644 index 000000000000..f4bcee3db0f0 --- /dev/null +++ b/packages/google-cloud-bigquery/.kokoro/presubmit/system-3.8.cfg @@ -0,0 +1,7 @@ +# Format: //devtools/kokoro/config/proto/build.proto + +# Only run this nox session. +env_vars: { + key: "NOX_SESSION" + value: "system-3.8" +} \ No newline at end of file diff --git a/packages/google-cloud-bigquery/.kokoro/publish-docs.sh b/packages/google-cloud-bigquery/.kokoro/publish-docs.sh index 309212789828..8acb14e802b0 100755 --- a/packages/google-cloud-bigquery/.kokoro/publish-docs.sh +++ b/packages/google-cloud-bigquery/.kokoro/publish-docs.sh @@ -18,26 +18,16 @@ set -eo pipefail # Disable buffering, so that the logs stream through. export PYTHONUNBUFFERED=1 -cd github/python-bigquery - -# Remove old nox -python3.6 -m pip uninstall --yes --quiet nox-automation +export PATH="${HOME}/.local/bin:${PATH}" # Install nox -python3.6 -m pip install --upgrade --quiet nox -python3.6 -m nox --version +python3 -m pip install --user --upgrade --quiet nox +python3 -m nox --version # build docs nox -s docs -python3 -m pip install gcp-docuploader - -# install a json parser -sudo apt-get update -sudo apt-get -y install software-properties-common -sudo add-apt-repository universe -sudo apt-get update -sudo apt-get -y install jq +python3 -m pip install --user gcp-docuploader # create metadata python3 -m docuploader create-metadata \ @@ -52,4 +42,23 @@ python3 -m docuploader create-metadata \ cat docs.metadata # upload docs -python3 -m docuploader upload docs/_build/html --metadata-file docs.metadata --staging-bucket docs-staging +python3 -m docuploader upload docs/_build/html --metadata-file docs.metadata --staging-bucket "${STAGING_BUCKET}" + + +# docfx yaml files +nox -s docfx + +# create metadata. +python3 -m docuploader create-metadata \ + --name=$(jq --raw-output '.name // empty' .repo-metadata.json) \ + --version=$(python3 setup.py --version) \ + --language=$(jq --raw-output '.language // empty' .repo-metadata.json) \ + --distribution-name=$(python3 setup.py --name) \ + --product-page=$(jq --raw-output '.product_documentation // empty' .repo-metadata.json) \ + --github-repository=$(jq --raw-output '.repo // empty' .repo-metadata.json) \ + --issue-tracker=$(jq --raw-output '.issue_tracker // empty' .repo-metadata.json) + +cat docs.metadata + +# upload docs +python3 -m docuploader upload docs/_build/html/docfx_yaml --metadata-file docs.metadata --destination-prefix docfx --staging-bucket "${V2_STAGING_BUCKET}" diff --git a/packages/google-cloud-bigquery/.kokoro/trampoline_v2.sh b/packages/google-cloud-bigquery/.kokoro/trampoline_v2.sh new file mode 100755 index 000000000000..719bcd5ba84d --- /dev/null +++ b/packages/google-cloud-bigquery/.kokoro/trampoline_v2.sh @@ -0,0 +1,487 @@ +#!/usr/bin/env bash +# Copyright 2020 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# trampoline_v2.sh +# +# This script does 3 things. +# +# 1. Prepare the Docker image for the test +# 2. Run the Docker with appropriate flags to run the test +# 3. Upload the newly built Docker image +# +# in a way that is somewhat compatible with trampoline_v1. +# +# To run this script, first download few files from gcs to /dev/shm. +# (/dev/shm is passed into the container as KOKORO_GFILE_DIR). +# +# gsutil cp gs://cloud-devrel-kokoro-resources/python-docs-samples/secrets_viewer_service_account.json /dev/shm +# gsutil cp gs://cloud-devrel-kokoro-resources/python-docs-samples/automl_secrets.txt /dev/shm +# +# Then run the script. +# .kokoro/trampoline_v2.sh +# +# These environment variables are required: +# TRAMPOLINE_IMAGE: The docker image to use. +# TRAMPOLINE_DOCKERFILE: The location of the Dockerfile. +# +# You can optionally change these environment variables: +# TRAMPOLINE_IMAGE_UPLOAD: +# (true|false): Whether to upload the Docker image after the +# successful builds. +# TRAMPOLINE_BUILD_FILE: The script to run in the docker container. +# TRAMPOLINE_WORKSPACE: The workspace path in the docker container. +# Defaults to /workspace. +# Potentially there are some repo specific envvars in .trampolinerc in +# the project root. + + +set -euo pipefail + +TRAMPOLINE_VERSION="2.0.5" + +if command -v tput >/dev/null && [[ -n "${TERM:-}" ]]; then + readonly IO_COLOR_RED="$(tput setaf 1)" + readonly IO_COLOR_GREEN="$(tput setaf 2)" + readonly IO_COLOR_YELLOW="$(tput setaf 3)" + readonly IO_COLOR_RESET="$(tput sgr0)" +else + readonly IO_COLOR_RED="" + readonly IO_COLOR_GREEN="" + readonly IO_COLOR_YELLOW="" + readonly IO_COLOR_RESET="" +fi + +function function_exists { + [ $(LC_ALL=C type -t $1)"" == "function" ] +} + +# Logs a message using the given color. The first argument must be one +# of the IO_COLOR_* variables defined above, such as +# "${IO_COLOR_YELLOW}". The remaining arguments will be logged in the +# given color. The log message will also have an RFC-3339 timestamp +# prepended (in UTC). You can disable the color output by setting +# TERM=vt100. +function log_impl() { + local color="$1" + shift + local timestamp="$(date -u "+%Y-%m-%dT%H:%M:%SZ")" + echo "================================================================" + echo "${color}${timestamp}:" "$@" "${IO_COLOR_RESET}" + echo "================================================================" +} + +# Logs the given message with normal coloring and a timestamp. +function log() { + log_impl "${IO_COLOR_RESET}" "$@" +} + +# Logs the given message in green with a timestamp. +function log_green() { + log_impl "${IO_COLOR_GREEN}" "$@" +} + +# Logs the given message in yellow with a timestamp. +function log_yellow() { + log_impl "${IO_COLOR_YELLOW}" "$@" +} + +# Logs the given message in red with a timestamp. +function log_red() { + log_impl "${IO_COLOR_RED}" "$@" +} + +readonly tmpdir=$(mktemp -d -t ci-XXXXXXXX) +readonly tmphome="${tmpdir}/h" +mkdir -p "${tmphome}" + +function cleanup() { + rm -rf "${tmpdir}" +} +trap cleanup EXIT + +RUNNING_IN_CI="${RUNNING_IN_CI:-false}" + +# The workspace in the container, defaults to /workspace. +TRAMPOLINE_WORKSPACE="${TRAMPOLINE_WORKSPACE:-/workspace}" + +pass_down_envvars=( + # TRAMPOLINE_V2 variables. + # Tells scripts whether they are running as part of CI or not. + "RUNNING_IN_CI" + # Indicates which CI system we're in. + "TRAMPOLINE_CI" + # Indicates the version of the script. + "TRAMPOLINE_VERSION" +) + +log_yellow "Building with Trampoline ${TRAMPOLINE_VERSION}" + +# Detect which CI systems we're in. If we're in any of the CI systems +# we support, `RUNNING_IN_CI` will be true and `TRAMPOLINE_CI` will be +# the name of the CI system. Both envvars will be passing down to the +# container for telling which CI system we're in. +if [[ -n "${KOKORO_BUILD_ID:-}" ]]; then + # descriptive env var for indicating it's on CI. + RUNNING_IN_CI="true" + TRAMPOLINE_CI="kokoro" + if [[ "${TRAMPOLINE_USE_LEGACY_SERVICE_ACCOUNT:-}" == "true" ]]; then + if [[ ! -f "${KOKORO_GFILE_DIR}/kokoro-trampoline.service-account.json" ]]; then + log_red "${KOKORO_GFILE_DIR}/kokoro-trampoline.service-account.json does not exist. Did you forget to mount cloud-devrel-kokoro-resources/trampoline? Aborting." + exit 1 + fi + # This service account will be activated later. + TRAMPOLINE_SERVICE_ACCOUNT="${KOKORO_GFILE_DIR}/kokoro-trampoline.service-account.json" + else + if [[ "${TRAMPOLINE_VERBOSE:-}" == "true" ]]; then + gcloud auth list + fi + log_yellow "Configuring Container Registry access" + gcloud auth configure-docker --quiet + fi + pass_down_envvars+=( + # KOKORO dynamic variables. + "KOKORO_BUILD_NUMBER" + "KOKORO_BUILD_ID" + "KOKORO_JOB_NAME" + "KOKORO_GIT_COMMIT" + "KOKORO_GITHUB_COMMIT" + "KOKORO_GITHUB_PULL_REQUEST_NUMBER" + "KOKORO_GITHUB_PULL_REQUEST_COMMIT" + # For Build Cop Bot + "KOKORO_GITHUB_COMMIT_URL" + "KOKORO_GITHUB_PULL_REQUEST_URL" + ) +elif [[ "${TRAVIS:-}" == "true" ]]; then + RUNNING_IN_CI="true" + TRAMPOLINE_CI="travis" + pass_down_envvars+=( + "TRAVIS_BRANCH" + "TRAVIS_BUILD_ID" + "TRAVIS_BUILD_NUMBER" + "TRAVIS_BUILD_WEB_URL" + "TRAVIS_COMMIT" + "TRAVIS_COMMIT_MESSAGE" + "TRAVIS_COMMIT_RANGE" + "TRAVIS_JOB_NAME" + "TRAVIS_JOB_NUMBER" + "TRAVIS_JOB_WEB_URL" + "TRAVIS_PULL_REQUEST" + "TRAVIS_PULL_REQUEST_BRANCH" + "TRAVIS_PULL_REQUEST_SHA" + "TRAVIS_PULL_REQUEST_SLUG" + "TRAVIS_REPO_SLUG" + "TRAVIS_SECURE_ENV_VARS" + "TRAVIS_TAG" + ) +elif [[ -n "${GITHUB_RUN_ID:-}" ]]; then + RUNNING_IN_CI="true" + TRAMPOLINE_CI="github-workflow" + pass_down_envvars+=( + "GITHUB_WORKFLOW" + "GITHUB_RUN_ID" + "GITHUB_RUN_NUMBER" + "GITHUB_ACTION" + "GITHUB_ACTIONS" + "GITHUB_ACTOR" + "GITHUB_REPOSITORY" + "GITHUB_EVENT_NAME" + "GITHUB_EVENT_PATH" + "GITHUB_SHA" + "GITHUB_REF" + "GITHUB_HEAD_REF" + "GITHUB_BASE_REF" + ) +elif [[ "${CIRCLECI:-}" == "true" ]]; then + RUNNING_IN_CI="true" + TRAMPOLINE_CI="circleci" + pass_down_envvars+=( + "CIRCLE_BRANCH" + "CIRCLE_BUILD_NUM" + "CIRCLE_BUILD_URL" + "CIRCLE_COMPARE_URL" + "CIRCLE_JOB" + "CIRCLE_NODE_INDEX" + "CIRCLE_NODE_TOTAL" + "CIRCLE_PREVIOUS_BUILD_NUM" + "CIRCLE_PROJECT_REPONAME" + "CIRCLE_PROJECT_USERNAME" + "CIRCLE_REPOSITORY_URL" + "CIRCLE_SHA1" + "CIRCLE_STAGE" + "CIRCLE_USERNAME" + "CIRCLE_WORKFLOW_ID" + "CIRCLE_WORKFLOW_JOB_ID" + "CIRCLE_WORKFLOW_UPSTREAM_JOB_IDS" + "CIRCLE_WORKFLOW_WORKSPACE_ID" + ) +fi + +# Configure the service account for pulling the docker image. +function repo_root() { + local dir="$1" + while [[ ! -d "${dir}/.git" ]]; do + dir="$(dirname "$dir")" + done + echo "${dir}" +} + +# Detect the project root. In CI builds, we assume the script is in +# the git tree and traverse from there, otherwise, traverse from `pwd` +# to find `.git` directory. +if [[ "${RUNNING_IN_CI:-}" == "true" ]]; then + PROGRAM_PATH="$(realpath "$0")" + PROGRAM_DIR="$(dirname "${PROGRAM_PATH}")" + PROJECT_ROOT="$(repo_root "${PROGRAM_DIR}")" +else + PROJECT_ROOT="$(repo_root $(pwd))" +fi + +log_yellow "Changing to the project root: ${PROJECT_ROOT}." +cd "${PROJECT_ROOT}" + +# To support relative path for `TRAMPOLINE_SERVICE_ACCOUNT`, we need +# to use this environment variable in `PROJECT_ROOT`. +if [[ -n "${TRAMPOLINE_SERVICE_ACCOUNT:-}" ]]; then + + mkdir -p "${tmpdir}/gcloud" + gcloud_config_dir="${tmpdir}/gcloud" + + log_yellow "Using isolated gcloud config: ${gcloud_config_dir}." + export CLOUDSDK_CONFIG="${gcloud_config_dir}" + + log_yellow "Using ${TRAMPOLINE_SERVICE_ACCOUNT} for authentication." + gcloud auth activate-service-account \ + --key-file "${TRAMPOLINE_SERVICE_ACCOUNT}" + log_yellow "Configuring Container Registry access" + gcloud auth configure-docker --quiet +fi + +required_envvars=( + # The basic trampoline configurations. + "TRAMPOLINE_IMAGE" + "TRAMPOLINE_BUILD_FILE" +) + +if [[ -f "${PROJECT_ROOT}/.trampolinerc" ]]; then + source "${PROJECT_ROOT}/.trampolinerc" +fi + +log_yellow "Checking environment variables." +for e in "${required_envvars[@]}" +do + if [[ -z "${!e:-}" ]]; then + log "Missing ${e} env var. Aborting." + exit 1 + fi +done + +# We want to support legacy style TRAMPOLINE_BUILD_FILE used with V1 +# script: e.g. "github/repo-name/.kokoro/run_tests.sh" +TRAMPOLINE_BUILD_FILE="${TRAMPOLINE_BUILD_FILE#github/*/}" +log_yellow "Using TRAMPOLINE_BUILD_FILE: ${TRAMPOLINE_BUILD_FILE}" + +# ignore error on docker operations and test execution +set +e + +log_yellow "Preparing Docker image." +# We only download the docker image in CI builds. +if [[ "${RUNNING_IN_CI:-}" == "true" ]]; then + # Download the docker image specified by `TRAMPOLINE_IMAGE` + + # We may want to add --max-concurrent-downloads flag. + + log_yellow "Start pulling the Docker image: ${TRAMPOLINE_IMAGE}." + if docker pull "${TRAMPOLINE_IMAGE}"; then + log_green "Finished pulling the Docker image: ${TRAMPOLINE_IMAGE}." + has_image="true" + else + log_red "Failed pulling the Docker image: ${TRAMPOLINE_IMAGE}." + has_image="false" + fi +else + # For local run, check if we have the image. + if docker images "${TRAMPOLINE_IMAGE}:latest" | grep "${TRAMPOLINE_IMAGE}"; then + has_image="true" + else + has_image="false" + fi +fi + + +# The default user for a Docker container has uid 0 (root). To avoid +# creating root-owned files in the build directory we tell docker to +# use the current user ID. +user_uid="$(id -u)" +user_gid="$(id -g)" +user_name="$(id -un)" + +# To allow docker in docker, we add the user to the docker group in +# the host os. +docker_gid=$(cut -d: -f3 < <(getent group docker)) + +update_cache="false" +if [[ "${TRAMPOLINE_DOCKERFILE:-none}" != "none" ]]; then + # Build the Docker image from the source. + context_dir=$(dirname "${TRAMPOLINE_DOCKERFILE}") + docker_build_flags=( + "-f" "${TRAMPOLINE_DOCKERFILE}" + "-t" "${TRAMPOLINE_IMAGE}" + "--build-arg" "UID=${user_uid}" + "--build-arg" "USERNAME=${user_name}" + ) + if [[ "${has_image}" == "true" ]]; then + docker_build_flags+=("--cache-from" "${TRAMPOLINE_IMAGE}") + fi + + log_yellow "Start building the docker image." + if [[ "${TRAMPOLINE_VERBOSE:-false}" == "true" ]]; then + echo "docker build" "${docker_build_flags[@]}" "${context_dir}" + fi + + # ON CI systems, we want to suppress docker build logs, only + # output the logs when it fails. + if [[ "${RUNNING_IN_CI:-}" == "true" ]]; then + if docker build "${docker_build_flags[@]}" "${context_dir}" \ + > "${tmpdir}/docker_build.log" 2>&1; then + if [[ "${TRAMPOLINE_VERBOSE:-}" == "true" ]]; then + cat "${tmpdir}/docker_build.log" + fi + + log_green "Finished building the docker image." + update_cache="true" + else + log_red "Failed to build the Docker image, aborting." + log_yellow "Dumping the build logs:" + cat "${tmpdir}/docker_build.log" + exit 1 + fi + else + if docker build "${docker_build_flags[@]}" "${context_dir}"; then + log_green "Finished building the docker image." + update_cache="true" + else + log_red "Failed to build the Docker image, aborting." + exit 1 + fi + fi +else + if [[ "${has_image}" != "true" ]]; then + log_red "We do not have ${TRAMPOLINE_IMAGE} locally, aborting." + exit 1 + fi +fi + +# We use an array for the flags so they are easier to document. +docker_flags=( + # Remove the container after it exists. + "--rm" + + # Use the host network. + "--network=host" + + # Run in priviledged mode. We are not using docker for sandboxing or + # isolation, just for packaging our dev tools. + "--privileged" + + # Run the docker script with the user id. Because the docker image gets to + # write in ${PWD} you typically want this to be your user id. + # To allow docker in docker, we need to use docker gid on the host. + "--user" "${user_uid}:${docker_gid}" + + # Pass down the USER. + "--env" "USER=${user_name}" + + # Mount the project directory inside the Docker container. + "--volume" "${PROJECT_ROOT}:${TRAMPOLINE_WORKSPACE}" + "--workdir" "${TRAMPOLINE_WORKSPACE}" + "--env" "PROJECT_ROOT=${TRAMPOLINE_WORKSPACE}" + + # Mount the temporary home directory. + "--volume" "${tmphome}:/h" + "--env" "HOME=/h" + + # Allow docker in docker. + "--volume" "/var/run/docker.sock:/var/run/docker.sock" + + # Mount the /tmp so that docker in docker can mount the files + # there correctly. + "--volume" "/tmp:/tmp" + # Pass down the KOKORO_GFILE_DIR and KOKORO_KEYSTORE_DIR + # TODO(tmatsuo): This part is not portable. + "--env" "TRAMPOLINE_SECRET_DIR=/secrets" + "--volume" "${KOKORO_GFILE_DIR:-/dev/shm}:/secrets/gfile" + "--env" "KOKORO_GFILE_DIR=/secrets/gfile" + "--volume" "${KOKORO_KEYSTORE_DIR:-/dev/shm}:/secrets/keystore" + "--env" "KOKORO_KEYSTORE_DIR=/secrets/keystore" +) + +# Add an option for nicer output if the build gets a tty. +if [[ -t 0 ]]; then + docker_flags+=("-it") +fi + +# Passing down env vars +for e in "${pass_down_envvars[@]}" +do + if [[ -n "${!e:-}" ]]; then + docker_flags+=("--env" "${e}=${!e}") + fi +done + +# If arguments are given, all arguments will become the commands run +# in the container, otherwise run TRAMPOLINE_BUILD_FILE. +if [[ $# -ge 1 ]]; then + log_yellow "Running the given commands '" "${@:1}" "' in the container." + readonly commands=("${@:1}") + if [[ "${TRAMPOLINE_VERBOSE:-}" == "true" ]]; then + echo docker run "${docker_flags[@]}" "${TRAMPOLINE_IMAGE}" "${commands[@]}" + fi + docker run "${docker_flags[@]}" "${TRAMPOLINE_IMAGE}" "${commands[@]}" +else + log_yellow "Running the tests in a Docker container." + docker_flags+=("--entrypoint=${TRAMPOLINE_BUILD_FILE}") + if [[ "${TRAMPOLINE_VERBOSE:-}" == "true" ]]; then + echo docker run "${docker_flags[@]}" "${TRAMPOLINE_IMAGE}" + fi + docker run "${docker_flags[@]}" "${TRAMPOLINE_IMAGE}" +fi + + +test_retval=$? + +if [[ ${test_retval} -eq 0 ]]; then + log_green "Build finished with ${test_retval}" +else + log_red "Build finished with ${test_retval}" +fi + +# Only upload it when the test passes. +if [[ "${update_cache}" == "true" ]] && \ + [[ $test_retval == 0 ]] && \ + [[ "${TRAMPOLINE_IMAGE_UPLOAD:-false}" == "true" ]]; then + log_yellow "Uploading the Docker image." + if docker push "${TRAMPOLINE_IMAGE}"; then + log_green "Finished uploading the Docker image." + else + log_red "Failed uploading the Docker image." + fi + # Call trampoline_after_upload_hook if it's defined. + if function_exists trampoline_after_upload_hook; then + trampoline_after_upload_hook + fi + +fi + +exit "${test_retval}" diff --git a/packages/google-cloud-bigquery/.trampolinerc b/packages/google-cloud-bigquery/.trampolinerc new file mode 100644 index 000000000000..995ee29111e1 --- /dev/null +++ b/packages/google-cloud-bigquery/.trampolinerc @@ -0,0 +1,51 @@ +# Copyright 2020 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Template for .trampolinerc + +# Add required env vars here. +required_envvars+=( + "STAGING_BUCKET" + "V2_STAGING_BUCKET" +) + +# Add env vars which are passed down into the container here. +pass_down_envvars+=( + "STAGING_BUCKET" + "V2_STAGING_BUCKET" +) + +# Prevent unintentional override on the default image. +if [[ "${TRAMPOLINE_IMAGE_UPLOAD:-false}" == "true" ]] && \ + [[ -z "${TRAMPOLINE_IMAGE:-}" ]]; then + echo "Please set TRAMPOLINE_IMAGE if you want to upload the Docker image." + exit 1 +fi + +# Define the default value if it makes sense. +if [[ -z "${TRAMPOLINE_IMAGE_UPLOAD:-}" ]]; then + TRAMPOLINE_IMAGE_UPLOAD="" +fi + +if [[ -z "${TRAMPOLINE_IMAGE:-}" ]]; then + TRAMPOLINE_IMAGE="" +fi + +if [[ -z "${TRAMPOLINE_DOCKERFILE:-}" ]]; then + TRAMPOLINE_DOCKERFILE="" +fi + +if [[ -z "${TRAMPOLINE_BUILD_FILE:-}" ]]; then + TRAMPOLINE_BUILD_FILE="" +fi diff --git a/packages/google-cloud-bigquery/docs/conf.py b/packages/google-cloud-bigquery/docs/conf.py index 251e1f4caed8..155606c9792f 100644 --- a/packages/google-cloud-bigquery/docs/conf.py +++ b/packages/google-cloud-bigquery/docs/conf.py @@ -20,6 +20,10 @@ # documentation root, use os.path.abspath to make it absolute, like shown here. sys.path.insert(0, os.path.abspath("..")) +# For plugins that can not read conf.py. +# See also: https://github.com/docascode/sphinx-docfx-yaml/issues/85 +sys.path.insert(0, os.path.abspath(".")) + __version__ = "" # -- General configuration ------------------------------------------------ diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery_v2/proto/encryption_config_pb2.py b/packages/google-cloud-bigquery/google/cloud/bigquery_v2/proto/encryption_config_pb2.py index 5ae21ea6f49f..5147743b61dd 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery_v2/proto/encryption_config_pb2.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery_v2/proto/encryption_config_pb2.py @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- # Generated by the protocol buffer compiler. DO NOT EDIT! # source: google/cloud/bigquery_v2/proto/encryption_config.proto -"""Generated protocol buffer code.""" + from google.protobuf import descriptor as _descriptor from google.protobuf import message as _message from google.protobuf import reflection as _reflection diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery_v2/proto/model_pb2.py b/packages/google-cloud-bigquery/google/cloud/bigquery_v2/proto/model_pb2.py index 7b66be8f7131..f485c45684f0 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery_v2/proto/model_pb2.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery_v2/proto/model_pb2.py @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- # Generated by the protocol buffer compiler. DO NOT EDIT! # source: google/cloud/bigquery_v2/proto/model.proto -"""Generated protocol buffer code.""" + from google.protobuf import descriptor as _descriptor from google.protobuf import message as _message from google.protobuf import reflection as _reflection diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery_v2/proto/model_reference_pb2.py b/packages/google-cloud-bigquery/google/cloud/bigquery_v2/proto/model_reference_pb2.py index 2411c48632c8..07d7e4c4b0a8 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery_v2/proto/model_reference_pb2.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery_v2/proto/model_reference_pb2.py @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- # Generated by the protocol buffer compiler. DO NOT EDIT! # source: google/cloud/bigquery_v2/proto/model_reference.proto -"""Generated protocol buffer code.""" + from google.protobuf import descriptor as _descriptor from google.protobuf import message as _message from google.protobuf import reflection as _reflection diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery_v2/proto/standard_sql_pb2.py b/packages/google-cloud-bigquery/google/cloud/bigquery_v2/proto/standard_sql_pb2.py index bfe77f934338..15f6715a253d 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery_v2/proto/standard_sql_pb2.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery_v2/proto/standard_sql_pb2.py @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- # Generated by the protocol buffer compiler. DO NOT EDIT! # source: google/cloud/bigquery_v2/proto/standard_sql.proto -"""Generated protocol buffer code.""" + from google.protobuf import descriptor as _descriptor from google.protobuf import message as _message from google.protobuf import reflection as _reflection diff --git a/packages/google-cloud-bigquery/noxfile.py b/packages/google-cloud-bigquery/noxfile.py index bb6a10e1e2cf..4664278f173d 100644 --- a/packages/google-cloud-bigquery/noxfile.py +++ b/packages/google-cloud-bigquery/noxfile.py @@ -73,6 +73,10 @@ def unit(session): def system(session): """Run the system test suite.""" + # Check the value of `RUN_SYSTEM_TESTS` env var. It defaults to true. + if os.environ.get("RUN_SYSTEM_TESTS", "true") == "false": + session.skip("RUN_SYSTEM_TESTS is set to false, skipping") + # Sanity check: Only run system tests if the environment variable is set. if not os.environ.get("GOOGLE_APPLICATION_CREDENTIALS", ""): session.skip("Credentials must be set via environment variable.") @@ -190,3 +194,36 @@ def docs(session): os.path.join("docs", ""), os.path.join("docs", "_build", "html", ""), ) + + +@nox.session(python="3.8") +def docfx(session): + """Build the docfx yaml files for this library.""" + + session.install("-e", ".") + session.install("sphinx", "alabaster", "recommonmark", "sphinx-docfx-yaml") + + shutil.rmtree(os.path.join("docs", "_build"), ignore_errors=True) + session.run( + "sphinx-build", + "-T", # show full traceback on exception + "-N", # no colors + "-D", + ( + "extensions=sphinx.ext.autodoc," + "sphinx.ext.autosummary," + "docfx_yaml.extension," + "sphinx.ext.intersphinx," + "sphinx.ext.coverage," + "sphinx.ext.napoleon," + "sphinx.ext.todo," + "sphinx.ext.viewcode," + "recommonmark" + ), + "-b", + "html", + "-d", + os.path.join("docs", "_build", "doctrees", ""), + os.path.join("docs", ""), + os.path.join("docs", "_build", "html", ""), + ) diff --git a/packages/google-cloud-bigquery/synth.metadata b/packages/google-cloud-bigquery/synth.metadata index b7e46157bd72..46c63367de1d 100644 --- a/packages/google-cloud-bigquery/synth.metadata +++ b/packages/google-cloud-bigquery/synth.metadata @@ -3,22 +3,16 @@ { "git": { "name": ".", - "remote": "git@github.com:googleapis/python-bigquery.git", - "sha": "416c0daf40e481c80fb5327b48baa915f0e7aa2f" + "remote": "git@github.com:tmatsuo/python-bigquery.git", + "sha": "5ed817523a85a6f332951e10c0bf7dbb86d7e1cf" } }, { "git": { - "name": "synthtool", - "remote": "https://github.com/googleapis/synthtool.git", - "sha": "b8ff6a41e195cda91bbfb20d9f11d5e58b7cc218" - } - }, - { - "git": { - "name": "synthtool", - "remote": "https://github.com/googleapis/synthtool.git", - "sha": "b8ff6a41e195cda91bbfb20d9f11d5e58b7cc218" + "name": "googleapis", + "remote": "https://github.com/googleapis/googleapis.git", + "sha": "868615a5c1c1059c636bb3d82a555edb1d5a251e", + "internalRef": "324294521" } } ], diff --git a/packages/google-cloud-bigquery/synth.py b/packages/google-cloud-bigquery/synth.py index 5125c398ece1..ac20c9aec32a 100644 --- a/packages/google-cloud-bigquery/synth.py +++ b/packages/google-cloud-bigquery/synth.py @@ -59,7 +59,7 @@ # ---------------------------------------------------------------------------- # Add templated files # ---------------------------------------------------------------------------- -templated_files = common.py_library(cov_level=100, samples=True) +templated_files = common.py_library(cov_level=100, samples=True, split_system_tests=True) # BigQuery has a custom multiprocessing note s.move(templated_files, excludes=["noxfile.py", "docs/multiprocessing.rst"]) From af3a50c279aa0f3f27215476d081021e38a45cd5 Mon Sep 17 00:00:00 2001 From: WhiteSource Renovate Date: Wed, 5 Aug 2020 23:53:41 +0200 Subject: [PATCH 0911/2016] chore(deps): pin ipython to v7.17.0 for samples, python >= 3.7 (#208) Co-authored-by: Tres Seaver Co-authored-by: Takashi Matsuo --- .../google-cloud-bigquery/samples/snippets/requirements.txt | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/samples/snippets/requirements.txt b/packages/google-cloud-bigquery/samples/snippets/requirements.txt index f67eb3587526..ea84f6bacbe1 100644 --- a/packages/google-cloud-bigquery/samples/snippets/requirements.txt +++ b/packages/google-cloud-bigquery/samples/snippets/requirements.txt @@ -1,5 +1,6 @@ google-cloud-bigquery[pandas,bqstorage,pyarrow]==1.26.1 google-auth-oauthlib==0.4.1 -ipython==7.16.1 +ipython==7.16.1; python_version < '3.7' +ipython==7.17.0; python_version >= '3.7' matplotlib==3.3.0 pytz==2020.1 From 90a8e7d69335afb1dcd615e76cf0c5f16d12d458 Mon Sep 17 00:00:00 2001 From: Tres Seaver Date: Mon, 10 Aug 2020 15:50:26 -0400 Subject: [PATCH 0912/2016] chore: fix docs build broken by Sphinx 3.2.0 (#218) Closes #217. --- .../google-cloud-bigquery/google/cloud/bigquery/query.py | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/query.py b/packages/google-cloud-bigquery/google/cloud/bigquery/query.py index 0f4c806864ba..f2ed6337e697 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/query.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/query.py @@ -86,8 +86,8 @@ class ScalarQueryParameter(_AbstractQueryParameter): 'FLOAT64', 'NUMERIC', 'BOOL', 'TIMESTAMP', 'DATETIME', or 'DATE'. - value (Union[str, int, float, decimal.Decimal, bool, - datetime.datetime, datetime.date]): The scalar parameter value. + value (Union[str, int, float, decimal.Decimal, bool, datetime.datetime, datetime.date]): + The scalar parameter value. """ def __init__(self, name, type_, value): @@ -105,9 +105,8 @@ def positional(cls, type_, value): 'FLOAT64', 'NUMERIC', 'BOOL', 'TIMESTAMP', 'DATETIME', or 'DATE'. - value (Union[str, int, float, decimal.Decimal, bool, - datetime.datetime, - datetime.date]): The scalar parameter value. + value (Union[str, int, float, decimal.Decimal, bool, datetime.datetime, datetime.date]): + The scalar parameter value. Returns: google.cloud.bigquery.query.ScalarQueryParameter: Instance without name From c2e88a33d7d5b34cdac8384ccb36aacfa1d85e09 Mon Sep 17 00:00:00 2001 From: HemangChothani <50404902+HemangChothani@users.noreply.github.com> Date: Fri, 14 Aug 2020 10:24:19 +0530 Subject: [PATCH 0913/2016] feat(bigquery): add client_options to base class (#216) * feat(bigquery): add client_options to base class * chore: bump g-c-c to 1.4.1 --- .../google-cloud-bigquery/google/cloud/bigquery/client.py | 5 ++++- packages/google-cloud-bigquery/setup.py | 2 +- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py index 651f0263e446..52ddffe7de14 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py @@ -177,7 +177,10 @@ def __init__( client_options=None, ): super(Client, self).__init__( - project=project, credentials=credentials, _http=_http + project=project, + credentials=credentials, + client_options=client_options, + _http=_http, ) kw_args = {"client_info": client_info} diff --git a/packages/google-cloud-bigquery/setup.py b/packages/google-cloud-bigquery/setup.py index b00b2cbe5fb4..fe6143557f52 100644 --- a/packages/google-cloud-bigquery/setup.py +++ b/packages/google-cloud-bigquery/setup.py @@ -31,7 +31,7 @@ dependencies = [ 'enum34; python_version < "3.4"', "google-api-core >= 1.21.0, < 2.0dev", - "google-cloud-core >= 1.1.0, < 2.0dev", + "google-cloud-core >= 1.4.1, < 2.0dev", "google-resumable-media >= 0.5.0, < 2.0dev", "six >=1.13.0,< 2.0.0dev", ] From 7918e975cdd6535609c5b695e72612c9f488d6e0 Mon Sep 17 00:00:00 2001 From: Peter Lamut Date: Sat, 15 Aug 2020 14:20:04 +0200 Subject: [PATCH 0914/2016] fix: converting to dataframe with out of bounds timestamps (#209) Fixes #168. This PR fixes the problem when converting query results to Pandas with `pyarrow` when data contains timestamps that would fall out of `pyarrow`'s nanoseconds precision. The fix requires `pyarrow>=1.0.0`, thus it only works on Python 3. ### PR checklist - [x] Make sure to open an issue as a [bug/issue](https://github.com/googleapis/python-bigquery/issues/new/choose) before writing your code! That way we can discuss the change, evaluate designs, and agree on the general idea - [x] Ensure the tests and linter pass - [x] Code coverage does not decrease (if any source code was changed) - [x] Appropriate docs were updated (if necessary) --- .../google/cloud/bigquery/table.py | 31 ++++++++- packages/google-cloud-bigquery/setup.py | 4 +- .../tests/unit/test_table.py | 63 +++++++++++++++++++ 3 files changed, 96 insertions(+), 2 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py index 10b4198d3260..d9e5f7773af1 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py @@ -21,6 +21,7 @@ import functools import logging import operator +import pytz import warnings import six @@ -1726,7 +1727,35 @@ def to_dataframe( bqstorage_client=bqstorage_client, create_bqstorage_client=create_bqstorage_client, ) - df = record_batch.to_pandas(date_as_object=date_as_object) + + # When converting timestamp values to nanosecond precision, the result + # can be out of pyarrow bounds. To avoid the error when converting to + # Pandas, we set the timestamp_as_object parameter to True, if necessary. + # + # NOTE: Python 3+ only, as timestamp_as_object parameter is only supported + # in pyarrow>=1.0, but the latter is not compatible with Python 2. + if six.PY2: + extra_kwargs = {} + else: + types_to_check = { + pyarrow.timestamp("us"), + pyarrow.timestamp("us", tz=pytz.UTC), + } + + for column in record_batch: + if column.type in types_to_check: + try: + column.cast("timestamp[ns]") + except pyarrow.lib.ArrowInvalid: + timestamp_as_object = True + break + else: + timestamp_as_object = False + + extra_kwargs = {"timestamp_as_object": timestamp_as_object} + + df = record_batch.to_pandas(date_as_object=date_as_object, **extra_kwargs) + for column in dtypes: df[column] = pandas.Series(df[column], dtype=dtypes[column]) return df diff --git a/packages/google-cloud-bigquery/setup.py b/packages/google-cloud-bigquery/setup.py index fe6143557f52..389517277179 100644 --- a/packages/google-cloud-bigquery/setup.py +++ b/packages/google-cloud-bigquery/setup.py @@ -48,7 +48,9 @@ "pandas": ["pandas>=0.17.1"], # Exclude PyArrow dependency from Windows Python 2.7. 'pyarrow: platform_system != "Windows" or python_version >= "3.5"': [ - "pyarrow>=0.17.0" + "pyarrow>=1.0.0, <2.0dev; python_version>='3.4'", + # Pyarrow >= 0.17.0 is not compatible with Python 2 anymore. + "pyarrow < 0.17.0; python_version < '3.0'", ], "tqdm": ["tqdm >= 4.0.0, <5.0.0dev"], "fastparquet": [ diff --git a/packages/google-cloud-bigquery/tests/unit/test_table.py b/packages/google-cloud-bigquery/tests/unit/test_table.py index 28575bd43081..80223e8e1396 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_table.py +++ b/packages/google-cloud-bigquery/tests/unit/test_table.py @@ -12,6 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. +import datetime as dt import itertools import logging import time @@ -2271,6 +2272,68 @@ def test_to_dataframe(self): self.assertEqual(df.name.dtype.name, "object") self.assertEqual(df.age.dtype.name, "int64") + @pytest.mark.xfail( + six.PY2, + reason=( + "Requires pyarrow>-1.0 to work, but the latter is not compatible " + "with Python 2 anymore." + ), + ) + @unittest.skipIf(pandas is None, "Requires `pandas`") + @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") + def test_to_dataframe_timestamp_out_of_pyarrow_bounds(self): + from google.cloud.bigquery.schema import SchemaField + + schema = [SchemaField("some_timestamp", "TIMESTAMP")] + rows = [ + {"f": [{"v": "81953424000.0"}]}, # 4567-01-01 00:00:00 UTC + {"f": [{"v": "253402214400.0"}]}, # 9999-12-31 00:00:00 UTC + ] + path = "/foo" + api_request = mock.Mock(return_value={"rows": rows}) + row_iterator = self._make_one(_mock_client(), api_request, path, schema) + + df = row_iterator.to_dataframe(create_bqstorage_client=False) + + self.assertIsInstance(df, pandas.DataFrame) + self.assertEqual(len(df), 2) # verify the number of rows + self.assertEqual(list(df.columns), ["some_timestamp"]) + self.assertEqual( + list(df["some_timestamp"]), + [dt.datetime(4567, 1, 1), dt.datetime(9999, 12, 31)], + ) + + @pytest.mark.xfail( + six.PY2, + reason=( + "Requires pyarrow>-1.0 to work, but the latter is not compatible " + "with Python 2 anymore." + ), + ) + @unittest.skipIf(pandas is None, "Requires `pandas`") + @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") + def test_to_dataframe_datetime_out_of_pyarrow_bounds(self): + from google.cloud.bigquery.schema import SchemaField + + schema = [SchemaField("some_datetime", "DATETIME")] + rows = [ + {"f": [{"v": "4567-01-01T00:00:00"}]}, + {"f": [{"v": "9999-12-31T00:00:00"}]}, + ] + path = "/foo" + api_request = mock.Mock(return_value={"rows": rows}) + row_iterator = self._make_one(_mock_client(), api_request, path, schema) + + df = row_iterator.to_dataframe(create_bqstorage_client=False) + + self.assertIsInstance(df, pandas.DataFrame) + self.assertEqual(len(df), 2) # verify the number of rows + self.assertEqual(list(df.columns), ["some_datetime"]) + self.assertEqual( + list(df["some_datetime"]), + [dt.datetime(4567, 1, 1), dt.datetime(9999, 12, 31)], + ) + @unittest.skipIf(pandas is None, "Requires `pandas`") def test_to_dataframe_warning_wo_pyarrow(self): from google.cloud.bigquery.client import PyarrowMissingWarning From c4aaefc4725a76eae65d3cbd00aeb75472b1c8a4 Mon Sep 17 00:00:00 2001 From: "release-please[bot]" <55107282+release-please[bot]@users.noreply.github.com> Date: Mon, 17 Aug 2020 15:32:14 -0400 Subject: [PATCH 0915/2016] chore: release 1.27.0 (#210) Co-authored-by: release-please[bot] <55107282+release-please[bot]@users.noreply.github.com> --- packages/google-cloud-bigquery/CHANGELOG.md | 15 +++++++++++++++ packages/google-cloud-bigquery/setup.py | 2 +- 2 files changed, 16 insertions(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/CHANGELOG.md b/packages/google-cloud-bigquery/CHANGELOG.md index a209dbaf5b18..fb41f761b708 100644 --- a/packages/google-cloud-bigquery/CHANGELOG.md +++ b/packages/google-cloud-bigquery/CHANGELOG.md @@ -4,6 +4,21 @@ [1]: https://pypi.org/project/google-cloud-bigquery/#history +## [1.27.0](https://www.github.com/googleapis/python-bigquery/compare/v1.26.1...v1.27.0) (2020-08-15) + + +### Features + +* add support and tests for struct fields ([#146](https://www.github.com/googleapis/python-bigquery/issues/146)) ([fee2ba8](https://www.github.com/googleapis/python-bigquery/commit/fee2ba80e338d093ee61565359268da91a5c9913)) +* add support for getting and setting table IAM policy ([#144](https://www.github.com/googleapis/python-bigquery/issues/144)) ([f59fc9a](https://www.github.com/googleapis/python-bigquery/commit/f59fc9a482d9f9ae63e2b2bfc80b9a3481d09bde)) +* **bigquery:** add client_options to base class ([#216](https://www.github.com/googleapis/python-bigquery/issues/216)) ([478597a](https://www.github.com/googleapis/python-bigquery/commit/478597a38167fa57b60ae7f65b581f3fe75ddc7c)) + + +### Bug Fixes + +* converting to dataframe with out of bounds timestamps ([#209](https://www.github.com/googleapis/python-bigquery/issues/209)) ([8209203](https://www.github.com/googleapis/python-bigquery/commit/8209203e967f0624ad306166c0af6f6f1027c550)), closes [#168](https://www.github.com/googleapis/python-bigquery/issues/168) +* raise error if inserting rows with unknown fields ([#163](https://www.github.com/googleapis/python-bigquery/issues/163)) ([8fe7254](https://www.github.com/googleapis/python-bigquery/commit/8fe725429541eed34ddc01cffc8b1ee846c14162)) + ### [1.26.1](https://www.github.com/googleapis/python-bigquery/compare/v1.26.0...v1.26.1) (2020-07-25) ### Documentation diff --git a/packages/google-cloud-bigquery/setup.py b/packages/google-cloud-bigquery/setup.py index 389517277179..0a8f6685e62e 100644 --- a/packages/google-cloud-bigquery/setup.py +++ b/packages/google-cloud-bigquery/setup.py @@ -22,7 +22,7 @@ name = "google-cloud-bigquery" description = "Google BigQuery API client library" -version = "1.26.1" +version = "1.27.0" # Should be one of: # 'Development Status :: 3 - Alpha' # 'Development Status :: 4 - Beta' From 27f21ce4e037a73f2ce8f889a781636cb0f7b021 Mon Sep 17 00:00:00 2001 From: WhiteSource Renovate Date: Mon, 17 Aug 2020 22:08:06 +0200 Subject: [PATCH 0916/2016] chore(deps): update dependency matplotlib to v3.3.1 (#224) This PR contains the following updates: | Package | Update | Change | |---|---|---| | [matplotlib](https://matplotlib.org) ([source](https://togithub.com/matplotlib/matplotlib)) | patch | `==3.3.0` -> `==3.3.1` | --- ### Release Notes
matplotlib/matplotlib ### [`v3.3.1`](https://togithub.com/matplotlib/matplotlib/releases/v3.3.1) [Compare Source](https://togithub.com/matplotlib/matplotlib/compare/v3.3.0...v3.3.1) This is the first bugfix release of the 3.3.x series. This release contains several critical bug-fixes: - fix docstring import issues when running Python with optimization - fix `hist` with categorical data, such as with Pandas - fix install on BSD systems - fix nbagg compatibility with Chrome 84+ - fix ordering of scatter marker size in 3D plots - fix performance regression when plotting `Path`s - fix reading from URL in `imread` - fix several regressions with new date epoch handling - fix some bad constrained and tight layout interactions with colorbars - fix use of customized toolbars in TkAgg and WXAgg backends
--- ### Renovate configuration :date: **Schedule**: At any time (no schedule defined). :vertical_traffic_light: **Automerge**: Disabled by config. Please merge this manually once you are satisfied. :recycle: **Rebasing**: Renovate will not automatically rebase this PR, because other commits have been found. :no_bell: **Ignore**: Close this PR and you won't be reminded about this update again. --- - [ ] If you want to rebase/retry this PR, check this box --- This PR has been generated by [WhiteSource Renovate](https://renovate.whitesourcesoftware.com). View repository job log [here](https://app.renovatebot.com/dashboard#github/googleapis/python-bigquery). --- .../google-cloud-bigquery/samples/snippets/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/samples/snippets/requirements.txt b/packages/google-cloud-bigquery/samples/snippets/requirements.txt index ea84f6bacbe1..7fe8391191b6 100644 --- a/packages/google-cloud-bigquery/samples/snippets/requirements.txt +++ b/packages/google-cloud-bigquery/samples/snippets/requirements.txt @@ -2,5 +2,5 @@ google-cloud-bigquery[pandas,bqstorage,pyarrow]==1.26.1 google-auth-oauthlib==0.4.1 ipython==7.16.1; python_version < '3.7' ipython==7.17.0; python_version >= '3.7' -matplotlib==3.3.0 +matplotlib==3.3.1 pytz==2020.1 From f6370ee0659e0884dbd0be9cb3004cc6ddadbbbc Mon Sep 17 00:00:00 2001 From: WhiteSource Renovate Date: Mon, 17 Aug 2020 22:35:57 +0200 Subject: [PATCH 0917/2016] chore(deps): update dependency llvmlite to <=0.34.0 (#223) Co-authored-by: Tres Seaver --- packages/google-cloud-bigquery/setup.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/packages/google-cloud-bigquery/setup.py b/packages/google-cloud-bigquery/setup.py index 0a8f6685e62e..77f32044b7d1 100644 --- a/packages/google-cloud-bigquery/setup.py +++ b/packages/google-cloud-bigquery/setup.py @@ -59,8 +59,8 @@ # llvmlite >= 0.32.0 cannot be installed on Python 3.5 and below # (building the wheel fails), thus needs to be restricted. # See: https://github.com/googleapis/python-bigquery/issues/78 - "llvmlite <= 0.33.0;python_version>='3.6'", - "llvmlite <= 0.31.0;python_version<'3.6'", + "llvmlite<=0.34.0;python_version>='3.6'", + "llvmlite<=0.31.0;python_version<'3.6'", ], } From 3a644e99f252030497323769a9772de1455c526a Mon Sep 17 00:00:00 2001 From: Tres Seaver Date: Tue, 18 Aug 2020 07:01:24 -0400 Subject: [PATCH 0918/2016] fix: tweak pyarrow extra to soothe PyPI (#230) Release-As: 1.27.1 --- packages/google-cloud-bigquery/setup.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/packages/google-cloud-bigquery/setup.py b/packages/google-cloud-bigquery/setup.py index 77f32044b7d1..32d150774c54 100644 --- a/packages/google-cloud-bigquery/setup.py +++ b/packages/google-cloud-bigquery/setup.py @@ -47,8 +47,11 @@ ], "pandas": ["pandas>=0.17.1"], # Exclude PyArrow dependency from Windows Python 2.7. - 'pyarrow: platform_system != "Windows" or python_version >= "3.5"': [ - "pyarrow>=1.0.0, <2.0dev; python_version>='3.4'", + 'pyarrow: platform_system == "Windows"': [ + "pyarrow>=1.0.0, <2.0dev; python_version>='3.5'", + ], + 'pyarrow: platform_system != "Windows"': [ + "pyarrow>=1.0.0, <2.0dev; python_version>='3.5'", # Pyarrow >= 0.17.0 is not compatible with Python 2 anymore. "pyarrow < 0.17.0; python_version < '3.0'", ], From 618896b7b25446d5423db55a82daf2451e8cd230 Mon Sep 17 00:00:00 2001 From: "release-please[bot]" <55107282+release-please[bot]@users.noreply.github.com> Date: Tue, 18 Aug 2020 11:28:06 +0000 Subject: [PATCH 0919/2016] chore: release 1.27.1 (#231) :robot: I have created a release \*beep\* \*boop\* --- ### [1.27.1](https://www.github.com/googleapis/python-bigquery/compare/v1.27.0...v1.27.1) (2020-08-18) ### Bug Fixes * tweak pyarrow extra to soothe PyPI ([#230](https://www.github.com/googleapis/python-bigquery/issues/230)) ([c15efbd](https://www.github.com/googleapis/python-bigquery/commit/c15efbd1ee4488898fc862768eef701443f492f6)) --- This PR was generated with [Release Please](https://github.com/googleapis/release-please). --- packages/google-cloud-bigquery/CHANGELOG.md | 7 +++++++ packages/google-cloud-bigquery/setup.py | 2 +- 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/CHANGELOG.md b/packages/google-cloud-bigquery/CHANGELOG.md index fb41f761b708..41bbda18ab25 100644 --- a/packages/google-cloud-bigquery/CHANGELOG.md +++ b/packages/google-cloud-bigquery/CHANGELOG.md @@ -4,6 +4,13 @@ [1]: https://pypi.org/project/google-cloud-bigquery/#history +### [1.27.1](https://www.github.com/googleapis/python-bigquery/compare/v1.27.0...v1.27.1) (2020-08-18) + + +### Bug Fixes + +* tweak pyarrow extra to soothe PyPI ([#230](https://www.github.com/googleapis/python-bigquery/issues/230)) ([c15efbd](https://www.github.com/googleapis/python-bigquery/commit/c15efbd1ee4488898fc862768eef701443f492f6)) + ## [1.27.0](https://www.github.com/googleapis/python-bigquery/compare/v1.26.1...v1.27.0) (2020-08-15) diff --git a/packages/google-cloud-bigquery/setup.py b/packages/google-cloud-bigquery/setup.py index 32d150774c54..02a89f607295 100644 --- a/packages/google-cloud-bigquery/setup.py +++ b/packages/google-cloud-bigquery/setup.py @@ -22,7 +22,7 @@ name = "google-cloud-bigquery" description = "Google BigQuery API client library" -version = "1.27.0" +version = "1.27.1" # Should be one of: # 'Development Status :: 3 - Alpha' # 'Development Status :: 4 - Beta' From 73e39842f9caa67a169b4493341f6b1ce13f9938 Mon Sep 17 00:00:00 2001 From: Tres Seaver Date: Tue, 18 Aug 2020 16:15:17 -0400 Subject: [PATCH 0920/2016] fix: rationalize platform constraints for 'pyarrow' extra (#235) Release-As: 1.27.2 --- packages/google-cloud-bigquery/setup.py | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/packages/google-cloud-bigquery/setup.py b/packages/google-cloud-bigquery/setup.py index 02a89f607295..22bc6a8740a8 100644 --- a/packages/google-cloud-bigquery/setup.py +++ b/packages/google-cloud-bigquery/setup.py @@ -47,13 +47,10 @@ ], "pandas": ["pandas>=0.17.1"], # Exclude PyArrow dependency from Windows Python 2.7. - 'pyarrow: platform_system == "Windows"': [ - "pyarrow>=1.0.0, <2.0dev; python_version>='3.5'", - ], - 'pyarrow: platform_system != "Windows"': [ - "pyarrow>=1.0.0, <2.0dev; python_version>='3.5'", + "pyarrow": [ + "pyarrow >= 1.0.0, < 2.0dev; python_version >= '3.5'", # Pyarrow >= 0.17.0 is not compatible with Python 2 anymore. - "pyarrow < 0.17.0; python_version < '3.0'", + "pyarrow < 0.17.0; python_version < '3.0' and platform_system != 'Windows'", ], "tqdm": ["tqdm >= 4.0.0, <5.0.0dev"], "fastparquet": [ From 50ab65428b025450f083e8455c4916d8b4ab7f5d Mon Sep 17 00:00:00 2001 From: "release-please[bot]" <55107282+release-please[bot]@users.noreply.github.com> Date: Tue, 18 Aug 2020 20:44:02 +0000 Subject: [PATCH 0921/2016] chore: release 1.27.2 (#236) :robot: I have created a release \*beep\* \*boop\* --- ### [1.27.2](https://www.github.com/googleapis/python-bigquery/compare/v1.27.1...v1.27.2) (2020-08-18) ### Bug Fixes * rationalize platform constraints for 'pyarrow' extra ([#235](https://www.github.com/googleapis/python-bigquery/issues/235)) ([c9a0567](https://www.github.com/googleapis/python-bigquery/commit/c9a0567f59491b769a9e2fd535430423e39d4fa8)) --- This PR was generated with [Release Please](https://github.com/googleapis/release-please). --- packages/google-cloud-bigquery/CHANGELOG.md | 7 +++++++ packages/google-cloud-bigquery/setup.py | 2 +- 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/CHANGELOG.md b/packages/google-cloud-bigquery/CHANGELOG.md index 41bbda18ab25..5ef22e8d765a 100644 --- a/packages/google-cloud-bigquery/CHANGELOG.md +++ b/packages/google-cloud-bigquery/CHANGELOG.md @@ -4,6 +4,13 @@ [1]: https://pypi.org/project/google-cloud-bigquery/#history +### [1.27.2](https://www.github.com/googleapis/python-bigquery/compare/v1.27.1...v1.27.2) (2020-08-18) + + +### Bug Fixes + +* rationalize platform constraints for 'pyarrow' extra ([#235](https://www.github.com/googleapis/python-bigquery/issues/235)) ([c9a0567](https://www.github.com/googleapis/python-bigquery/commit/c9a0567f59491b769a9e2fd535430423e39d4fa8)) + ### [1.27.1](https://www.github.com/googleapis/python-bigquery/compare/v1.27.0...v1.27.1) (2020-08-18) diff --git a/packages/google-cloud-bigquery/setup.py b/packages/google-cloud-bigquery/setup.py index 22bc6a8740a8..18bb789263c9 100644 --- a/packages/google-cloud-bigquery/setup.py +++ b/packages/google-cloud-bigquery/setup.py @@ -22,7 +22,7 @@ name = "google-cloud-bigquery" description = "Google BigQuery API client library" -version = "1.27.1" +version = "1.27.2" # Should be one of: # 'Development Status :: 3 - Alpha' # 'Development Status :: 4 - Beta' From b663517fc4f2e3c4a170f09f54ed9f7951e592b1 Mon Sep 17 00:00:00 2001 From: Aravin <34178459+aravinsiva@users.noreply.github.com> Date: Fri, 21 Aug 2020 14:49:13 -0400 Subject: [PATCH 0922/2016] feat: add opentelemetry tracing (#215) * testing first trace export * instrumention client.py * instrumenting job.py and adding documentation * reconfiguring imports * quick cleanup of unused variable * adding more attributes in module and limiting complexity of instrumentation * adding tests, nox and correct attribute additions in client & job * adding tests, nox and correct attribute additions in client & job (left out of last commit) * linting * reformatting noxfile.[y * addressing suggested changes * adding suggested changes * removing print statements * setting same version across all OT [ackages and other reccommended changes * suggested changes * fixing packages issue in nox and updating documentation * fixing module install issue * restructuring design for testing adding first layer of tests (some still failing) * adding reamining client tests and all job tests * fixing linting issues * fixing trace not defined issue * fixing lint issues * fixing documentation issues and python2 testing issue * linting and fixing coverage issues * adding suggested changes * linting * adding Shawn's suggested changes * fixing _default_span_attribute_bug * reverting uneccesxsary changes * adding more tests for all job_ref parameters * removing dependecny, ordering imports and other changes * addressing Shawn concerns * adding test and suggested changes * adding opentelemetry to setup.py and other suggested changes * adding reasoning for not adding to [all] * linting * adding Tim suggested changes Co-authored-by: Tim Swast --- packages/google-cloud-bigquery/README.rst | 38 + .../google/cloud/bigquery/client.py | 317 +++++--- .../google/cloud/bigquery/job.py | 71 +- .../cloud/bigquery/opentelemetry_tracing.py | 122 ++++ packages/google-cloud-bigquery/noxfile.py | 5 + packages/google-cloud-bigquery/setup.py | 11 +- .../tests/unit/test_client.py | 689 ++++++++++++++---- .../tests/unit/test_job.py | 360 +++++++-- .../tests/unit/test_opentelemetry_tracing.py | 212 ++++++ 9 files changed, 1506 insertions(+), 319 deletions(-) create mode 100644 packages/google-cloud-bigquery/google/cloud/bigquery/opentelemetry_tracing.py create mode 100644 packages/google-cloud-bigquery/tests/unit/test_opentelemetry_tracing.py diff --git a/packages/google-cloud-bigquery/README.rst b/packages/google-cloud-bigquery/README.rst index 8f73576d6f7d..c6bc17834f35 100644 --- a/packages/google-cloud-bigquery/README.rst +++ b/packages/google-cloud-bigquery/README.rst @@ -102,3 +102,41 @@ Perform a query for row in rows: print(row.name) + +Instrumenting With OpenTelemetry +-------------------------------- + +This application uses `OpenTelemetry`_ to output tracing data from +API calls to BigQuery. To enable OpenTelemetry tracing in +the BigQuery client the following PyPI packages need to be installed: + +.. _OpenTelemetry: https://opentelemetry.io + +.. code-block:: console + + pip install google-cloud-bigquery[opentelemetry] opentelemetry-exporter-google-cloud + +After installation, OpenTelemetry can be used in the BigQuery +client and in BigQuery jobs. First, however, an exporter must be +specified for where the trace data will be outputted to. An +example of this can be found here: + +.. code-block:: python + + from opentelemetry import trace + from opentelemetry.sdk.trace import TracerProvider + from opentelemetry.sdk.trace.export import BatchExportSpanProcessor + from opentelemetry.exporter.cloud_trace import CloudTraceSpanExporter + trace.set_tracer_provider(TracerProvider()) + trace.get_tracer_provider().add_span_processor( + BatchExportSpanProcessor(CloudTraceSpanExporter()) + ) + +In this example all tracing data will be published to the Google +`Cloud Trace`_ console. For more information on OpenTelemetry, please consult the `OpenTelemetry documentation`_. + +.. _OpenTelemetry documentation: https://opentelemetry-python.readthedocs.io +.. _Cloud Trace: https://cloud.google.com/trace + + + diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py index 52ddffe7de14..fbbfda05108b 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py @@ -63,6 +63,7 @@ from google.cloud.bigquery.dataset import DatasetListItem from google.cloud.bigquery.dataset import DatasetReference from google.cloud.bigquery.exceptions import PyarrowMissingWarning +from google.cloud.bigquery.opentelemetry_tracing import create_span from google.cloud.bigquery import job from google.cloud.bigquery.model import Model from google.cloud.bigquery.model import ModelReference @@ -246,8 +247,15 @@ def get_service_account_email( if project is None: project = self.project path = "/projects/%s/serviceAccount" % (project,) - - api_response = self._call_api(retry, method="GET", path=path, timeout=timeout) + span_attributes = {"path": path} + with create_span( + name="BigQuery.getServiceAccountEmail", + attributes=span_attributes, + client=self, + ): + api_response = self._call_api( + retry, method="GET", path=path, timeout=timeout + ) return api_response["email"] def list_projects( @@ -471,9 +479,13 @@ def create_dataset( data["location"] = self.location try: - api_response = self._call_api( - retry, method="POST", path=path, data=data, timeout=timeout - ) + span_attributes = {"path": path} + with create_span( + name="BigQuery.createDataset", attributes=span_attributes, client=self + ): + api_response = self._call_api( + retry, method="POST", path=path, data=data, timeout=timeout + ) return Dataset.from_api_repr(api_response) except google.api_core.exceptions.Conflict: if not exists_ok: @@ -515,9 +527,13 @@ def create_routine( ) resource = routine.to_api_repr() try: - api_response = self._call_api( - retry, method="POST", path=path, data=resource, timeout=timeout - ) + span_attributes = {"path": path} + with create_span( + name="BigQuery.createRoutine", attributes=span_attributes, client=self + ): + api_response = self._call_api( + retry, method="POST", path=path, data=resource, timeout=timeout + ) return Routine.from_api_repr(api_response) except google.api_core.exceptions.Conflict: if not exists_ok: @@ -558,13 +574,17 @@ def create_table(self, table, exists_ok=False, retry=DEFAULT_RETRY, timeout=None If the table already exists. """ table = _table_arg_to_table(table, default_project=self.project) - - path = "/projects/%s/datasets/%s/tables" % (table.project, table.dataset_id) + dataset_id = table.dataset_id + path = "/projects/%s/datasets/%s/tables" % (table.project, dataset_id) data = table.to_api_repr() try: - api_response = self._call_api( - retry, method="POST", path=path, data=data, timeout=timeout - ) + span_attributes = {"path": path, "dataset_id": dataset_id} + with create_span( + name="BigQuery.createTable", attributes=span_attributes, client=self + ): + api_response = self._call_api( + retry, method="POST", path=path, data=data, timeout=timeout + ) return Table.from_api_repr(api_response) except google.api_core.exceptions.Conflict: if not exists_ok: @@ -603,10 +623,14 @@ def get_dataset(self, dataset_ref, retry=DEFAULT_RETRY, timeout=None): dataset_ref = DatasetReference.from_string( dataset_ref, default_project=self.project ) - - api_response = self._call_api( - retry, method="GET", path=dataset_ref.path, timeout=timeout - ) + path = dataset_ref.path + span_attributes = {"path": path} + with create_span( + name="BigQuery.getDataset", attributes=span_attributes, client=self + ): + api_response = self._call_api( + retry, method="GET", path=path, timeout=timeout + ) return Dataset.from_api_repr(api_response) def get_iam_policy( @@ -621,10 +645,13 @@ def get_iam_policy( body = {"options": {"requestedPolicyVersion": 1}} path = "{}:getIamPolicy".format(table.path) - - response = self._call_api( - retry, method="POST", path=path, data=body, timeout=timeout, - ) + span_attributes = {"path": path} + with create_span( + name="BigQuery.getIamPolicy", attributes=span_attributes, client=self + ): + response = self._call_api( + retry, method="POST", path=path, data=body, timeout=timeout, + ) return Policy.from_api_repr(response) @@ -643,10 +670,13 @@ def set_iam_policy( body["updateMask"] = updateMask path = "{}:setIamPolicy".format(table.path) - - response = self._call_api( - retry, method="POST", path=path, data=body, timeout=timeout, - ) + span_attributes = {"path": path} + with create_span( + name="BigQuery.setIamPolicy", attributes=span_attributes, client=self + ): + response = self._call_api( + retry, method="POST", path=path, data=body, timeout=timeout, + ) return Policy.from_api_repr(response) @@ -659,10 +689,13 @@ def test_iam_permissions( body = {"permissions": permissions} path = "{}:testIamPermissions".format(table.path) - - response = self._call_api( - retry, method="POST", path=path, data=body, timeout=timeout, - ) + span_attributes = {"path": path} + with create_span( + name="BigQuery.testIamPermissions", attributes=span_attributes, client=self + ): + response = self._call_api( + retry, method="POST", path=path, data=body, timeout=timeout, + ) return response @@ -691,10 +724,14 @@ def get_model(self, model_ref, retry=DEFAULT_RETRY, timeout=None): model_ref = ModelReference.from_string( model_ref, default_project=self.project ) - - api_response = self._call_api( - retry, method="GET", path=model_ref.path, timeout=timeout - ) + path = model_ref.path + span_attributes = {"path": path} + with create_span( + name="BigQuery.getModel", attributes=span_attributes, client=self + ): + api_response = self._call_api( + retry, method="GET", path=path, timeout=timeout + ) return Model.from_api_repr(api_response) def get_routine(self, routine_ref, retry=DEFAULT_RETRY, timeout=None): @@ -724,10 +761,14 @@ def get_routine(self, routine_ref, retry=DEFAULT_RETRY, timeout=None): routine_ref = RoutineReference.from_string( routine_ref, default_project=self.project ) - - api_response = self._call_api( - retry, method="GET", path=routine_ref.path, timeout=timeout - ) + path = routine_ref.path + span_attributes = {"path": path} + with create_span( + name="BigQuery.getRoutine", attributes=span_attributes, client=self + ): + api_response = self._call_api( + retry, method="GET", path=path, timeout=timeout + ) return Routine.from_api_repr(api_response) def get_table(self, table, retry=DEFAULT_RETRY, timeout=None): @@ -754,9 +795,14 @@ def get_table(self, table, retry=DEFAULT_RETRY, timeout=None): A ``Table`` instance. """ table_ref = _table_arg_to_table_ref(table, default_project=self.project) - api_response = self._call_api( - retry, method="GET", path=table_ref.path, timeout=timeout - ) + path = table_ref.path + span_attributes = {"path": path} + with create_span( + name="BigQuery.getTable", attributes=span_attributes, client=self + ): + api_response = self._call_api( + retry, method="GET", path=path, timeout=timeout + ) return Table.from_api_repr(api_response) def update_dataset(self, dataset, fields, retry=DEFAULT_RETRY, timeout=None): @@ -793,14 +839,20 @@ def update_dataset(self, dataset, fields, retry=DEFAULT_RETRY, timeout=None): headers = {"If-Match": dataset.etag} else: headers = None - api_response = self._call_api( - retry, - method="PATCH", - path=dataset.path, - data=partial, - headers=headers, - timeout=timeout, - ) + path = dataset.path + span_attributes = {"path": path, "fields": fields} + + with create_span( + name="BigQuery.updateDataset", attributes=span_attributes, client=self + ): + api_response = self._call_api( + retry, + method="PATCH", + path=path, + data=partial, + headers=headers, + timeout=timeout, + ) return Dataset.from_api_repr(api_response) def update_model(self, model, fields, retry=DEFAULT_RETRY, timeout=None): @@ -836,14 +888,20 @@ def update_model(self, model, fields, retry=DEFAULT_RETRY, timeout=None): headers = {"If-Match": model.etag} else: headers = None - api_response = self._call_api( - retry, - method="PATCH", - path=model.path, - data=partial, - headers=headers, - timeout=timeout, - ) + path = model.path + span_attributes = {"path": path, "fields": fields} + + with create_span( + name="BigQuery.updateModel", attributes=span_attributes, client=self + ): + api_response = self._call_api( + retry, + method="PATCH", + path=path, + data=partial, + headers=headers, + timeout=timeout, + ) return Model.from_api_repr(api_response) def update_routine(self, routine, fields, retry=DEFAULT_RETRY, timeout=None): @@ -890,14 +948,20 @@ def update_routine(self, routine, fields, retry=DEFAULT_RETRY, timeout=None): # TODO: remove when routines update supports partial requests. partial["routineReference"] = routine.reference.to_api_repr() - api_response = self._call_api( - retry, - method="PUT", - path=routine.path, - data=partial, - headers=headers, - timeout=timeout, - ) + path = routine.path + span_attributes = {"path": path, "fields": fields} + + with create_span( + name="BigQuery.updateRoutine", attributes=span_attributes, client=self + ): + api_response = self._call_api( + retry, + method="PUT", + path=path, + data=partial, + headers=headers, + timeout=timeout, + ) return Routine.from_api_repr(api_response) def update_table(self, table, fields, retry=DEFAULT_RETRY, timeout=None): @@ -933,14 +997,21 @@ def update_table(self, table, fields, retry=DEFAULT_RETRY, timeout=None): headers = {"If-Match": table.etag} else: headers = None - api_response = self._call_api( - retry, - method="PATCH", - path=table.path, - data=partial, - headers=headers, - timeout=timeout, - ) + + path = table.path + span_attributes = {"path": path, "fields": fields} + + with create_span( + name="BigQuery.updateTable", attributes=span_attributes, client=self + ): + api_response = self._call_api( + retry, + method="PATCH", + path=path, + data=partial, + headers=headers, + timeout=timeout, + ) return Table.from_api_repr(api_response) def list_models( @@ -1183,17 +1254,24 @@ def delete_dataset( raise TypeError("dataset must be a Dataset or a DatasetReference") params = {} + path = dataset.path if delete_contents: params["deleteContents"] = "true" + span_attributes = {"path": path, "deleteContents": delete_contents} + else: + span_attributes = {"path": path} try: - self._call_api( - retry, - method="DELETE", - path=dataset.path, - query_params=params, - timeout=timeout, - ) + with create_span( + name="BigQuery.deleteDataset", attributes=span_attributes, client=self + ): + self._call_api( + retry, + method="DELETE", + path=path, + query_params=params, + timeout=timeout, + ) except google.api_core.exceptions.NotFound: if not not_found_ok: raise @@ -1231,8 +1309,13 @@ def delete_model( if not isinstance(model, (Model, ModelReference)): raise TypeError("model must be a Model or a ModelReference") + path = model.path try: - self._call_api(retry, method="DELETE", path=model.path, timeout=timeout) + span_attributes = {"path": path} + with create_span( + name="BigQuery.deleteModel", attributes=span_attributes, client=self + ): + self._call_api(retry, method="DELETE", path=path, timeout=timeout) except google.api_core.exceptions.NotFound: if not not_found_ok: raise @@ -1268,12 +1351,17 @@ def delete_routine( routine = RoutineReference.from_string( routine, default_project=self.project ) + path = routine.path if not isinstance(routine, (Routine, RoutineReference)): raise TypeError("routine must be a Routine or a RoutineReference") try: - self._call_api(retry, method="DELETE", path=routine.path, timeout=timeout) + span_attributes = {"path": path} + with create_span( + name="BigQuery.deleteRoutine", attributes=span_attributes, client=self + ): + self._call_api(retry, method="DELETE", path=path, timeout=timeout) except google.api_core.exceptions.NotFound: if not not_found_ok: raise @@ -1310,7 +1398,12 @@ def delete_table( raise TypeError("Unable to get TableReference for table '{}'".format(table)) try: - self._call_api(retry, method="DELETE", path=table.path, timeout=timeout) + path = table.path + span_attributes = {"path": path} + with create_span( + name="BigQuery.deleteTable", attributes=span_attributes, client=self + ): + self._call_api(retry, method="DELETE", path=path, timeout=timeout) except google.api_core.exceptions.NotFound: if not not_found_ok: raise @@ -1358,9 +1451,18 @@ def _get_query_results( # This call is typically made in a polling loop that checks whether the # job is complete (from QueryJob.done(), called ultimately from # QueryJob.result()). So we don't need to poll here. - resource = self._call_api( - retry, method="GET", path=path, query_params=extra_params, timeout=timeout - ) + span_attributes = {"path": path} + + with create_span( + name="BigQuery.getQueryResults", attributes=span_attributes, client=self + ): + resource = self._call_api( + retry, + method="GET", + path=path, + query_params=extra_params, + timeout=timeout, + ) return _QueryResults.from_api_repr(resource) def job_from_resource(self, resource): @@ -1504,9 +1606,18 @@ def get_job( path = "/projects/{}/jobs/{}".format(project, job_id) - resource = self._call_api( - retry, method="GET", path=path, query_params=extra_params, timeout=timeout - ) + span_attributes = {"path": path, "job_id": job_id, "location": location} + + with create_span( + name="BigQuery.getJob", attributes=span_attributes, client=self + ): + resource = self._call_api( + retry, + method="GET", + path=path, + query_params=extra_params, + timeout=timeout, + ) return self.job_from_resource(resource) @@ -1553,9 +1664,18 @@ def cancel_job( path = "/projects/{}/jobs/{}/cancel".format(project, job_id) - resource = self._call_api( - retry, method="POST", path=path, query_params=extra_params, timeout=timeout - ) + span_attributes = {"path": path, "job_id": job_id, "location": location} + + with create_span( + name="BigQuery.cancelJob", attributes=span_attributes, client=self + ): + resource = self._call_api( + retry, + method="POST", + path=path, + query_params=extra_params, + timeout=timeout, + ) return self.job_from_resource(resource["job"]) @@ -2730,14 +2850,15 @@ def insert_rows_json( if template_suffix is not None: data["templateSuffix"] = template_suffix + path = "%s/insertAll" % table.path # We can always retry, because every row has an insert ID. - response = self._call_api( - retry, - method="POST", - path="%s/insertAll" % table.path, - data=data, - timeout=timeout, - ) + span_attributes = {"path": path} + with create_span( + name="BigQuery.insertRowsJson", attributes=span_attributes, client=self + ): + response = self._call_api( + retry, method="POST", path=path, data=data, timeout=timeout, + ) errors = [] for error in response.get("insertErrors", ()): diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/job.py b/packages/google-cloud-bigquery/google/cloud/bigquery/job.py index 753307b2a7f4..a8e0c25edaf9 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/job.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/job.py @@ -34,6 +34,7 @@ from google.cloud.bigquery.encryption_configuration import EncryptionConfiguration from google.cloud.bigquery.external_config import ExternalConfig from google.cloud.bigquery.external_config import HivePartitioningOptions +from google.cloud.bigquery.opentelemetry_tracing import create_span from google.cloud.bigquery import _helpers from google.cloud.bigquery.query import _query_param_from_api_repr from google.cloud.bigquery.query import ArrayQueryParameter @@ -634,9 +635,17 @@ def _begin(self, client=None, retry=DEFAULT_RETRY, timeout=None): # jobs.insert is idempotent because we ensure that every new # job has an ID. - api_response = client._call_api( - retry, method="POST", path=path, data=self.to_api_repr(), timeout=timeout - ) + span_attributes = {"path": path} + with create_span( + name="BigQuery.job.begin", attributes=span_attributes, job_ref=self + ): + api_response = client._call_api( + retry, + method="POST", + path=path, + data=self.to_api_repr(), + timeout=timeout, + ) self._set_properties(api_response) def exists(self, client=None, retry=DEFAULT_RETRY, timeout=None): @@ -665,13 +674,17 @@ def exists(self, client=None, retry=DEFAULT_RETRY, timeout=None): extra_params["location"] = self.location try: - client._call_api( - retry, - method="GET", - path=self.path, - query_params=extra_params, - timeout=timeout, - ) + span_attributes = {"path": self.path} + with create_span( + name="BigQuery.job.exists", attributes=span_attributes, job_ref=self + ): + client._call_api( + retry, + method="GET", + path=self.path, + query_params=extra_params, + timeout=timeout, + ) except NotFound: return False else: @@ -698,14 +711,17 @@ def reload(self, client=None, retry=DEFAULT_RETRY, timeout=None): extra_params = {} if self.location: extra_params["location"] = self.location - - api_response = client._call_api( - retry, - method="GET", - path=self.path, - query_params=extra_params, - timeout=timeout, - ) + span_attributes = {"path": self.path} + with create_span( + name="BigQuery.job.reload", attributes=span_attributes, job_ref=self + ): + api_response = client._call_api( + retry, + method="GET", + path=self.path, + query_params=extra_params, + timeout=timeout, + ) self._set_properties(api_response) def cancel(self, client=None, retry=DEFAULT_RETRY, timeout=None): @@ -732,13 +748,18 @@ def cancel(self, client=None, retry=DEFAULT_RETRY, timeout=None): if self.location: extra_params["location"] = self.location - api_response = client._call_api( - retry, - method="POST", - path="{}/cancel".format(self.path), - query_params=extra_params, - timeout=timeout, - ) + path = "{}/cancel".format(self.path) + span_attributes = {"path": path} + with create_span( + name="BigQuery.job.cancel", attributes=span_attributes, job_ref=self + ): + api_response = client._call_api( + retry, + method="POST", + path=path, + query_params=extra_params, + timeout=timeout, + ) self._set_properties(api_response["job"]) # The Future interface requires that we return True if the *attempt* # to cancel was successful. diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/opentelemetry_tracing.py b/packages/google-cloud-bigquery/google/cloud/bigquery/opentelemetry_tracing.py new file mode 100644 index 000000000000..f7375c3466a4 --- /dev/null +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/opentelemetry_tracing.py @@ -0,0 +1,122 @@ +# Copyright 2020 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import logging +from contextlib import contextmanager +from google.api_core.exceptions import GoogleAPICallError + +logger = logging.getLogger(__name__) +try: + from opentelemetry import trace + from opentelemetry.instrumentation.utils import http_status_to_canonical_code + from opentelemetry.trace.status import Status + + HAS_OPENTELEMETRY = True + +except ImportError: + logger.info( + "This service is instrumented using OpenTelemetry." + "OpenTelemetry could not be imported; please" + "add opentelemetry-api and opentelemetry-instrumentation" + "packages in order to get BigQuery Tracing data." + ) + + HAS_OPENTELEMETRY = False + +_default_attributes = { + "db.system": "BigQuery" +} # static, default values assigned to all spans + + +@contextmanager +def create_span(name, attributes=None, client=None, job_ref=None): + """Creates a ContextManager for a Span to be exported to the configured exporter. + If no configuration exists yields None. + + Args: + name (str): Name that will be set for the span being created + attributes (Optional[dict]): + Additional attributes that pertain to + the specific API call (i.e. not a default attribute) + client (Optional[google.cloud.bigquery.client.Client]): + Pass in a Client object to extract any attributes that may be + relevant to it and add them to the created spans. + job_ref (Optional[google.cloud.bigquery.job._AsyncJob]) + Pass in a _AsyncJob object to extract any attributes that may be + relevant to it and add them to the created spans. + + Yields: + opentelemetry.trace.Span: Yields the newly created Span. + + Raises: + google.api_core.exceptions.GoogleAPICallError: + Raised if a span could not be yielded or issue with call to + OpenTelemetry. + """ + final_attributes = _get_final_span_attributes(attributes, client, job_ref) + if not HAS_OPENTELEMETRY: + yield None + return + tracer = trace.get_tracer(__name__) + + # yield new span value + with tracer.start_as_current_span(name=name, attributes=final_attributes) as span: + try: + yield span + except GoogleAPICallError as error: + if error.code is not None: + span.set_status(Status(http_status_to_canonical_code(error.code))) + raise + + +def _get_final_span_attributes(attributes=None, client=None, job_ref=None): + final_attributes = {} + final_attributes.update(_default_attributes.copy()) + if client: + client_attributes = _set_client_attributes(client) + final_attributes.update(client_attributes) + if job_ref: + job_attributes = _set_job_attributes(job_ref) + final_attributes.update(job_attributes) + if attributes: + final_attributes.update(attributes) + return final_attributes + + +def _set_client_attributes(client): + return {"db.name": client.project, "location": client.location} + + +def _set_job_attributes(job_ref): + job_attributes = { + "db.name": job_ref.project, + "location": job_ref.location, + "num_child_jobs": job_ref.num_child_jobs, + "job_id": job_ref.job_id, + "parent_job_id": job_ref.parent_job_id, + "state": job_ref.state, + } + + job_attributes["hasErrors"] = job_ref.error_result is not None + + if job_ref.created is not None: + job_attributes["timeCreated"] = job_ref.created.isoformat() + + if job_ref.started is not None: + job_attributes["timeStarted"] = job_ref.started.isoformat() + + if job_ref.ended is not None: + job_attributes["timeEnded"] = job_ref.ended.isoformat() + + return job_attributes diff --git a/packages/google-cloud-bigquery/noxfile.py b/packages/google-cloud-bigquery/noxfile.py index 4664278f173d..5db14c31f349 100644 --- a/packages/google-cloud-bigquery/noxfile.py +++ b/packages/google-cloud-bigquery/noxfile.py @@ -48,6 +48,11 @@ def default(session): else: session.install("ipython") + # opentelemetry was not added to [all] because opentelemetry does not support Python 2. + # Exporter does not need to be in nox thus it has been added to README documentation + if session.python != "2.7": + session.install("-e", ".[opentelemetry]") + # Run py.test against the unit tests. session.run( "py.test", diff --git a/packages/google-cloud-bigquery/setup.py b/packages/google-cloud-bigquery/setup.py index 18bb789263c9..f30968364e08 100644 --- a/packages/google-cloud-bigquery/setup.py +++ b/packages/google-cloud-bigquery/setup.py @@ -62,15 +62,24 @@ "llvmlite<=0.34.0;python_version>='3.6'", "llvmlite<=0.31.0;python_version<'3.6'", ], + "opentelemetry": [ + "opentelemetry-api==0.9b0", + "opentelemetry-sdk==0.9b0", + "opentelemetry-instrumentation==0.9b0 ", + ], } all_extras = [] for extra in extras: - if extra == "fastparquet": + if extra in ( # Skip fastparquet from "all" because it is redundant with pyarrow and # creates a dependency on pre-release versions of numpy. See: # https://github.com/googleapis/google-cloud-python/issues/8549 + "fastparquet", + # Skip opentelemetry because the library is not compatible with Python 2. + "opentelemetry", + ): continue all_extras.extend(extras[extra]) diff --git a/packages/google-cloud-bigquery/tests/unit/test_client.py b/packages/google-cloud-bigquery/tests/unit/test_client.py index 8b63f7e57dd0..271640dd5592 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_client.py +++ b/packages/google-cloud-bigquery/tests/unit/test_client.py @@ -40,6 +40,16 @@ import pandas except (ImportError, AttributeError): # pragma: NO COVER pandas = None +try: + import opentelemetry + from opentelemetry import trace + from opentelemetry.sdk.trace import TracerProvider + from opentelemetry.sdk.trace.export import SimpleExportSpanProcessor + from opentelemetry.sdk.trace.export.in_memory_span_exporter import ( + InMemorySpanExporter, + ) +except (ImportError, AttributeError): + opentelemetry = None try: import pyarrow except (ImportError, AttributeError): # pragma: NO COVER @@ -246,20 +256,25 @@ def test__get_query_results_miss_w_explicit_project_and_timeout(self): creds = _make_credentials() client = self._make_one(self.PROJECT, creds) conn = client._connection = make_connection() - + path = "/projects/other-project/queries/nothere" with self.assertRaises(NotFound): - client._get_query_results( - "nothere", - None, - project="other-project", - location=self.LOCATION, - timeout_ms=500, - timeout=42, - ) + with mock.patch( + "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" + ) as final_attributes: + client._get_query_results( + "nothere", + None, + project="other-project", + location=self.LOCATION, + timeout_ms=500, + timeout=42, + ) + + final_attributes.assert_called_once_with({"path": path}, client, None) conn.api_request.assert_called_once_with( method="GET", - path="/projects/other-project/queries/nothere", + path=path, query_params={"maxResults": 0, "timeoutMs": 500, "location": self.LOCATION}, timeout=42, ) @@ -315,9 +330,12 @@ def test_get_service_account_email(self): email = "bq-123@bigquery-encryption.iam.gserviceaccount.com" resource = {"kind": "bigquery#getServiceAccountResponse", "email": email} conn = client._connection = make_connection(resource) + with mock.patch( + "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" + ) as final_attributes: + service_account_email = client.get_service_account_email(timeout=7.5) - service_account_email = client.get_service_account_email(timeout=7.5) - + final_attributes.assert_called_once_with({"path": path}, client, None) conn.api_request.assert_called_once_with(method="GET", path=path, timeout=7.5) self.assertEqual(service_account_email, email) @@ -330,9 +348,12 @@ def test_get_service_account_email_w_alternate_project(self): email = "bq-123@bigquery-encryption.iam.gserviceaccount.com" resource = {"kind": "bigquery#getServiceAccountResponse", "email": email} conn = client._connection = make_connection(resource) + with mock.patch( + "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" + ) as final_attributes: + service_account_email = client.get_service_account_email(project=project) - service_account_email = client.get_service_account_email(project=project) - + final_attributes.assert_called_once_with({"path": path}, client, None) conn.api_request.assert_called_once_with(method="GET", path=path, timeout=None) self.assertEqual(service_account_email, email) @@ -357,10 +378,14 @@ def test_get_service_account_email_w_custom_retry(self): ) with api_request_patcher as fake_api_request: - service_account_email = client.get_service_account_email( - retry=retry, timeout=7.5 - ) + with mock.patch( + "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" + ) as final_attributes: + service_account_email = client.get_service_account_email( + retry=retry, timeout=7.5 + ) + final_attributes.assert_called_once_with({"path": api_path}, client, None) self.assertEqual( service_account_email, "bq-123@bigquery-encryption.iam.gserviceaccount.com" ) @@ -612,8 +637,12 @@ def test_get_dataset(self): } conn = client._connection = make_connection(resource) dataset_ref = DatasetReference(self.PROJECT, self.DS_ID) + with mock.patch( + "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" + ) as final_attributes: + dataset = client.get_dataset(dataset_ref, timeout=7.5) - dataset = client.get_dataset(dataset_ref, timeout=7.5) + final_attributes.assert_called_once_with({"path": "/%s" % path}, client, None) conn.api_request.assert_called_once_with( method="GET", path="/%s" % path, timeout=7.5 @@ -625,35 +654,61 @@ def test_get_dataset(self): # Not a cloud API exception (missing 'errors' field). client._connection = make_connection(Exception(""), resource) with self.assertRaises(Exception): - client.get_dataset(dataset_ref) + with mock.patch( + "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" + ) as final_attributes: + client.get_dataset(dataset_ref) + + final_attributes.assert_called_once_with({"path": "/%s" % path}, client, None) # Zero-length errors field. client._connection = make_connection(ServerError(""), resource) with self.assertRaises(ServerError): - client.get_dataset(dataset_ref) + with mock.patch( + "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" + ) as final_attributes: + client.get_dataset(dataset_ref) + + final_attributes.assert_called_once_with({"path": "/%s" % path}, client, None) # Non-retryable reason. client._connection = make_connection( ServerError("", errors=[{"reason": "serious"}]), resource ) with self.assertRaises(ServerError): - client.get_dataset(dataset_ref) + with mock.patch( + "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" + ) as final_attributes: + client.get_dataset(dataset_ref) + + final_attributes.assert_called_once_with({"path": "/%s" % path}, client, None) # Retryable reason, but retry is disabled. client._connection = make_connection( ServerError("", errors=[{"reason": "backendError"}]), resource ) with self.assertRaises(ServerError): - client.get_dataset(dataset_ref, retry=None) + with mock.patch( + "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" + ) as final_attributes: + client.get_dataset(dataset_ref, retry=None) + + final_attributes.assert_called_once_with({"path": "/%s" % path}, client, None) # Retryable reason, default retry: success. client._connection = make_connection( ServerError("", errors=[{"reason": "backendError"}]), resource ) - dataset = client.get_dataset( - # Test with a string for dataset ID. - dataset_ref.dataset_id - ) + with mock.patch( + "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" + ) as final_attributes: + dataset = client.get_dataset( + # Test with a string for dataset ID. + dataset_ref.dataset_id + ) + + final_attributes.assert_called_once_with({"path": "/%s" % path}, client, None) + self.assertEqual(dataset.dataset_id, self.DS_ID) @unittest.skipIf( @@ -713,8 +768,12 @@ def test_create_dataset_minimal(self): ds_ref = DatasetReference(self.PROJECT, self.DS_ID) before = Dataset(ds_ref) + with mock.patch( + "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" + ) as final_attributes: + after = client.create_dataset(before, timeout=7.5) - after = client.create_dataset(before, timeout=7.5) + final_attributes.assert_called_once_with({"path": "/%s" % PATH}, client, None) self.assertEqual(after.dataset_id, self.DS_ID) self.assertEqual(after.project, self.PROJECT) @@ -775,8 +834,12 @@ def test_create_dataset_w_attrs(self): before.default_table_expiration_ms = 3600 before.location = LOCATION before.labels = LABELS + with mock.patch( + "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" + ) as final_attributes: + after = client.create_dataset(before) - after = client.create_dataset(before) + final_attributes.assert_called_once_with({"path": "/%s" % PATH}, client, None) self.assertEqual(after.dataset_id, self.DS_ID) self.assertEqual(after.project, self.PROJECT) @@ -826,8 +889,12 @@ def test_create_dataset_w_custom_property(self): ds_ref = DatasetReference(self.PROJECT, self.DS_ID) before = Dataset(ds_ref) before._properties["newAlphaProperty"] = "unreleased property" + with mock.patch( + "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" + ) as final_attributes: + after = client.create_dataset(before) - after = client.create_dataset(before) + final_attributes.assert_called_once_with({"path": path}, client, None) self.assertEqual(after.dataset_id, self.DS_ID) self.assertEqual(after.project, self.PROJECT) @@ -865,8 +932,12 @@ def test_create_dataset_w_client_location_wo_dataset_location(self): ds_ref = DatasetReference(self.PROJECT, self.DS_ID) before = Dataset(ds_ref) + with mock.patch( + "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" + ) as final_attributes: + after = client.create_dataset(before) - after = client.create_dataset(before) + final_attributes.assert_called_once_with({"path": "/%s" % PATH}, client, None) self.assertEqual(after.dataset_id, self.DS_ID) self.assertEqual(after.project, self.PROJECT) @@ -908,8 +979,12 @@ def test_create_dataset_w_client_location_w_dataset_location(self): ds_ref = DatasetReference(self.PROJECT, self.DS_ID) before = Dataset(ds_ref) before.location = OTHER_LOCATION + with mock.patch( + "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" + ) as final_attributes: + after = client.create_dataset(before) - after = client.create_dataset(before) + final_attributes.assert_called_once_with({"path": "/%s" % PATH}, client, None) self.assertEqual(after.dataset_id, self.DS_ID) self.assertEqual(after.project, self.PROJECT) @@ -944,8 +1019,12 @@ def test_create_dataset_w_reference(self): project=self.PROJECT, credentials=creds, location=self.LOCATION ) conn = client._connection = make_connection(resource) + with mock.patch( + "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" + ) as final_attributes: + dataset = client.create_dataset(DatasetReference(self.PROJECT, self.DS_ID)) - dataset = client.create_dataset(DatasetReference(self.PROJECT, self.DS_ID)) + final_attributes.assert_called_once_with({"path": path}, client, None) self.assertEqual(dataset.dataset_id, self.DS_ID) self.assertEqual(dataset.project, self.PROJECT) @@ -980,8 +1059,12 @@ def test_create_dataset_w_fully_qualified_string(self): project=self.PROJECT, credentials=creds, location=self.LOCATION ) conn = client._connection = make_connection(resource) + with mock.patch( + "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" + ) as final_attributes: + dataset = client.create_dataset("{}.{}".format(self.PROJECT, self.DS_ID)) - dataset = client.create_dataset("{}.{}".format(self.PROJECT, self.DS_ID)) + final_attributes.assert_called_once_with({"path": path}, client, None) self.assertEqual(dataset.dataset_id, self.DS_ID) self.assertEqual(dataset.project, self.PROJECT) @@ -1016,8 +1099,12 @@ def test_create_dataset_w_string(self): project=self.PROJECT, credentials=creds, location=self.LOCATION ) conn = client._connection = make_connection(resource) + with mock.patch( + "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" + ) as final_attributes: + dataset = client.create_dataset(self.DS_ID) - dataset = client.create_dataset(self.DS_ID) + final_attributes.assert_called_once_with({"path": path}, client, None) self.assertEqual(dataset.dataset_id, self.DS_ID) self.assertEqual(dataset.project, self.PROJECT) @@ -1067,8 +1154,12 @@ def test_create_dataset_alreadyexists_w_exists_ok_true(self): conn = client._connection = make_connection( google.api_core.exceptions.AlreadyExists("dataset already exists"), resource ) + with mock.patch( + "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" + ) as final_attributes: + dataset = client.create_dataset(self.DS_ID, exists_ok=True) - dataset = client.create_dataset(self.DS_ID, exists_ok=True) + final_attributes.assert_called_with({"path": get_path}, client, None) self.assertEqual(dataset.dataset_id, self.DS_ID) self.assertEqual(dataset.project, self.PROJECT) @@ -1100,6 +1191,7 @@ def test_create_routine_w_minimal_resource(self): from google.cloud.bigquery.routine import RoutineReference creds = _make_credentials() + path = "/projects/test-routine-project/datasets/test_routines/routines" resource = { "routineReference": { "projectId": "test-routine-project", @@ -1111,14 +1203,15 @@ def test_create_routine_w_minimal_resource(self): conn = client._connection = make_connection(resource) full_routine_id = "test-routine-project.test_routines.minimal_routine" routine = Routine(full_routine_id) + with mock.patch( + "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" + ) as final_attributes: + actual_routine = client.create_routine(routine, timeout=7.5) - actual_routine = client.create_routine(routine, timeout=7.5) + final_attributes.assert_called_once_with({"path": path}, client, None) conn.api_request.assert_called_once_with( - method="POST", - path="/projects/test-routine-project/datasets/test_routines/routines", - data=resource, - timeout=7.5, + method="POST", path=path, data=resource, timeout=7.5, ) self.assertEqual( actual_routine.reference, RoutineReference.from_string(full_routine_id) @@ -1132,12 +1225,54 @@ def test_create_routine_w_conflict(self): conn = client._connection = make_connection( google.api_core.exceptions.AlreadyExists("routine already exists") ) + path = "/projects/test-routine-project/datasets/test_routines/routines" + full_routine_id = "test-routine-project.test_routines.minimal_routine" + routine = Routine(full_routine_id) + + with pytest.raises(google.api_core.exceptions.AlreadyExists): + with mock.patch( + "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" + ) as final_attributes: + client.create_routine(routine) + + final_attributes.assert_called_once_with({"path": path}, client, None) + + resource = { + "routineReference": { + "projectId": "test-routine-project", + "datasetId": "test_routines", + "routineId": "minimal_routine", + } + } + conn.api_request.assert_called_once_with( + method="POST", path=path, data=resource, timeout=None, + ) + + @unittest.skipIf(opentelemetry is None, "Requires `opentelemetry`") + def test_span_status_is_set(self): + from google.cloud.bigquery.routine import Routine + + tracer_provider = TracerProvider() + memory_exporter = InMemorySpanExporter() + span_processor = SimpleExportSpanProcessor(memory_exporter) + tracer_provider.add_span_processor(span_processor) + trace.set_tracer_provider(tracer_provider) + + creds = _make_credentials() + client = self._make_one(project=self.PROJECT, credentials=creds) + conn = client._connection = make_connection( + google.api_core.exceptions.AlreadyExists("routine already exists") + ) + path = "/projects/test-routine-project/datasets/test_routines/routines" full_routine_id = "test-routine-project.test_routines.minimal_routine" routine = Routine(full_routine_id) with pytest.raises(google.api_core.exceptions.AlreadyExists): client.create_routine(routine) + span_list = memory_exporter.get_finished_spans() + self.assertTrue(span_list[0].status is not None) + resource = { "routineReference": { "projectId": "test-routine-project", @@ -1146,10 +1281,7 @@ def test_create_routine_w_conflict(self): } } conn.api_request.assert_called_once_with( - method="POST", - path="/projects/test-routine-project/datasets/test_routines/routines", - data=resource, - timeout=None, + method="POST", path=path, data=resource, timeout=None, ) def test_create_routine_w_conflict_exists_ok(self): @@ -1164,25 +1296,28 @@ def test_create_routine_w_conflict_exists_ok(self): "routineId": "minimal_routine", } } + path = "/projects/test-routine-project/datasets/test_routines/routines" + conn = client._connection = make_connection( google.api_core.exceptions.AlreadyExists("routine already exists"), resource ) full_routine_id = "test-routine-project.test_routines.minimal_routine" routine = Routine(full_routine_id) + with mock.patch( + "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" + ) as final_attributes: + actual_routine = client.create_routine(routine, exists_ok=True) - actual_routine = client.create_routine(routine, exists_ok=True) + final_attributes.assert_called_with( + {"path": "%s/minimal_routine" % path}, client, None + ) self.assertEqual(actual_routine.project, "test-routine-project") self.assertEqual(actual_routine.dataset_id, "test_routines") self.assertEqual(actual_routine.routine_id, "minimal_routine") conn.api_request.assert_has_calls( [ - mock.call( - method="POST", - path="/projects/test-routine-project/datasets/test_routines/routines", - data=resource, - timeout=None, - ), + mock.call(method="POST", path=path, data=resource, timeout=None,), mock.call( method="GET", path="/projects/test-routine-project/datasets/test_routines/routines/minimal_routine", @@ -1202,8 +1337,14 @@ def test_create_table_w_day_partition(self): conn = client._connection = make_connection(resource) table = Table(self.TABLE_REF) table.time_partitioning = TimePartitioning() + with mock.patch( + "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" + ) as final_attributes: + got = client.create_table(table, timeout=7.5) - got = client.create_table(table, timeout=7.5) + final_attributes.assert_called_once_with( + {"path": "/%s" % path, "dataset_id": table.dataset_id}, client, None + ) conn.api_request.assert_called_once_with( method="POST", @@ -1235,8 +1376,14 @@ def test_create_table_w_custom_property(self): conn = client._connection = make_connection(resource) table = Table(self.TABLE_REF) table._properties["newAlphaProperty"] = "unreleased property" + with mock.patch( + "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" + ) as final_attributes: + got = client.create_table(table) - got = client.create_table(table) + final_attributes.assert_called_once_with( + {"path": "/%s" % path, "dataset_id": table.dataset_id}, client, None + ) conn.api_request.assert_called_once_with( method="POST", @@ -1270,8 +1417,14 @@ def test_create_table_w_encryption_configuration(self): table.encryption_configuration = EncryptionConfiguration( kms_key_name=self.KMS_KEY_NAME ) + with mock.patch( + "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" + ) as final_attributes: + got = client.create_table(table) - got = client.create_table(table) + final_attributes.assert_called_once_with( + {"path": "/%s" % path, "dataset_id": table.dataset_id}, client, None + ) conn.api_request.assert_called_once_with( method="POST", @@ -1300,8 +1453,14 @@ def test_create_table_w_day_partition_and_expire(self): conn = client._connection = make_connection(resource) table = Table(self.TABLE_REF) table.time_partitioning = TimePartitioning(expiration_ms=100) + with mock.patch( + "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" + ) as final_attributes: + got = client.create_table(table) - got = client.create_table(table) + final_attributes.assert_called_once_with( + {"path": "/%s" % path, "dataset_id": table.dataset_id}, client, None + ) conn.api_request.assert_called_once_with( method="POST", @@ -1359,7 +1518,14 @@ def test_create_table_w_schema_and_query(self): table = Table(self.TABLE_REF, schema=schema) table.view_query = query - got = client.create_table(table) + with mock.patch( + "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" + ) as final_attributes: + got = client.create_table(table) + + final_attributes.assert_called_once_with( + {"path": "/%s" % path, "dataset_id": table.dataset_id}, client, None + ) conn.api_request.assert_called_once_with( method="POST", @@ -1420,7 +1586,14 @@ def test_create_table_w_external(self): ec.autodetect = True table.external_data_configuration = ec - got = client.create_table(table) + with mock.patch( + "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" + ) as final_attributes: + got = client.create_table(table) + + final_attributes.assert_called_once_with( + {"path": "/%s" % path, "dataset_id": table.dataset_id}, client, None + ) conn.api_request.assert_called_once_with( method="POST", @@ -1454,7 +1627,16 @@ def test_create_table_w_reference(self): resource = self._make_table_resource() conn = client._connection = make_connection(resource) - got = client.create_table(self.TABLE_REF) + with mock.patch( + "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" + ) as final_attributes: + got = client.create_table(self.TABLE_REF) + + final_attributes.assert_called_once_with( + {"path": "/%s" % path, "dataset_id": self.TABLE_REF.dataset_id}, + client, + None, + ) conn.api_request.assert_called_once_with( method="POST", @@ -1477,9 +1659,17 @@ def test_create_table_w_fully_qualified_string(self): client = self._make_one(project=self.PROJECT, credentials=creds) resource = self._make_table_resource() conn = client._connection = make_connection(resource) + with mock.patch( + "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" + ) as final_attributes: + got = client.create_table( + "{}.{}.{}".format(self.PROJECT, self.DS_ID, self.TABLE_ID) + ) - got = client.create_table( - "{}.{}.{}".format(self.PROJECT, self.DS_ID, self.TABLE_ID) + final_attributes.assert_called_once_with( + {"path": "/%s" % path, "dataset_id": self.TABLE_REF.dataset_id}, + client, + None, ) conn.api_request.assert_called_once_with( @@ -1503,8 +1693,16 @@ def test_create_table_w_string(self): client = self._make_one(project=self.PROJECT, credentials=creds) resource = self._make_table_resource() conn = client._connection = make_connection(resource) + with mock.patch( + "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" + ) as final_attributes: + got = client.create_table("{}.{}".format(self.DS_ID, self.TABLE_ID)) - got = client.create_table("{}.{}".format(self.DS_ID, self.TABLE_ID)) + final_attributes.assert_called_once_with( + {"path": "/%s" % path, "dataset_id": self.TABLE_REF.dataset_id}, + client, + None, + ) conn.api_request.assert_called_once_with( method="POST", @@ -1532,7 +1730,14 @@ def test_create_table_alreadyexists_w_exists_ok_false(self): ) with pytest.raises(google.api_core.exceptions.AlreadyExists): - client.create_table("{}.{}".format(self.DS_ID, self.TABLE_ID)) + with mock.patch( + "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" + ) as final_attributes: + client.create_table("{}.{}".format(self.DS_ID, self.TABLE_ID)) + + final_attributes.assert_called_with( + {"path": post_path, "dataset_id": self.TABLE_REF.dataset_id}, client, None, + ) conn.api_request.assert_called_once_with( method="POST", @@ -1562,9 +1767,14 @@ def test_create_table_alreadyexists_w_exists_ok_true(self): google.api_core.exceptions.AlreadyExists("table already exists"), resource ) - got = client.create_table( - "{}.{}".format(self.DS_ID, self.TABLE_ID), exists_ok=True - ) + with mock.patch( + "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" + ) as final_attributes: + got = client.create_table( + "{}.{}".format(self.DS_ID, self.TABLE_ID), exists_ok=True + ) + + final_attributes.assert_called_with({"path": get_path}, client, None) self.assertEqual(got.project, self.PROJECT) self.assertEqual(got.dataset_id, self.DS_ID) @@ -1619,7 +1829,12 @@ def test_get_model(self): conn = client._connection = make_connection(resource) model_ref = DatasetReference(self.PROJECT, self.DS_ID).model(self.MODEL_ID) - got = client.get_model(model_ref, timeout=7.5) + with mock.patch( + "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" + ) as final_attributes: + got = client.get_model(model_ref, timeout=7.5) + + final_attributes.assert_called_once_with({"path": "/%s" % path}, client, None) conn.api_request.assert_called_once_with( method="GET", path="/%s" % path, timeout=7.5 @@ -1645,7 +1860,12 @@ def test_get_model_w_string(self): conn = client._connection = make_connection(resource) model_id = "{}.{}.{}".format(self.PROJECT, self.DS_ID, self.MODEL_ID) - got = client.get_model(model_id) + with mock.patch( + "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" + ) as final_attributes: + got = client.get_model(model_id) + + final_attributes.assert_called_once_with({"path": "/%s" % path}, client, None) conn.api_request.assert_called_once_with( method="GET", path="/%s" % path, timeout=None @@ -1673,15 +1893,20 @@ def test_get_routine(self): }, "routineType": "SCALAR_FUNCTION", } + path = "/projects/test-routine-project/datasets/test_routines/routines/minimal_routine" + client = self._make_one(project=self.PROJECT, credentials=creds) conn = client._connection = make_connection(resource) - actual_routine = client.get_routine(routine, timeout=7.5) + with mock.patch( + "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" + ) as final_attributes: + actual_routine = client.get_routine(routine, timeout=7.5) + + final_attributes.assert_called_once_with({"path": path}, client, None) conn.api_request.assert_called_once_with( - method="GET", - path="/projects/test-routine-project/datasets/test_routines/routines/minimal_routine", - timeout=7.5, + method="GET", path=path, timeout=7.5, ) self.assertEqual( actual_routine.reference, @@ -1710,7 +1935,12 @@ def test_get_table(self): client = self._make_one(project=self.PROJECT, credentials=creds, _http=http) resource = self._make_table_resource() conn = client._connection = make_connection(resource) - table = client.get_table(self.TABLE_REF, timeout=7.5) + with mock.patch( + "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" + ) as final_attributes: + table = client.get_table(self.TABLE_REF, timeout=7.5) + + final_attributes.assert_called_once_with({"path": "/%s" % path}, client, None) conn.api_request.assert_called_once_with( method="GET", path="/%s" % path, timeout=7.5 @@ -1786,8 +2016,12 @@ def test_get_iam_policy(self): http = object() client = self._make_one(project=self.PROJECT, credentials=creds, _http=http) conn = client._connection = make_connection(RETURNED) + with mock.patch( + "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" + ) as final_attributes: + policy = client.get_iam_policy(self.TABLE_REF, timeout=7.5) - policy = client.get_iam_policy(self.TABLE_REF, timeout=7.5) + final_attributes.assert_called_once_with({"path": PATH}, client, None) conn.api_request.assert_called_once_with( method="POST", path=PATH, data=BODY, timeout=7.5 @@ -1856,9 +2090,14 @@ def test_set_iam_policy(self): client = self._make_one(project=self.PROJECT, credentials=creds, _http=http) conn = client._connection = make_connection(RETURNED) - returned_policy = client.set_iam_policy( - self.TABLE_REF, policy, updateMask=MASK, timeout=7.5 - ) + with mock.patch( + "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" + ) as final_attributes: + returned_policy = client.set_iam_policy( + self.TABLE_REF, policy, updateMask=MASK, timeout=7.5 + ) + + final_attributes.assert_called_once_with({"path": PATH}, client, None) conn.api_request.assert_called_once_with( method="POST", path=PATH, data=BODY, timeout=7.5 @@ -1884,8 +2123,12 @@ def test_set_iam_policy_no_mask(self): http = object() client = self._make_one(project=self.PROJECT, credentials=creds, _http=http) conn = client._connection = make_connection(RETURNED) + with mock.patch( + "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" + ) as final_attributes: + client.set_iam_policy(self.TABLE_REF, policy, timeout=7.5) - client.set_iam_policy(self.TABLE_REF, policy, timeout=7.5) + final_attributes.assert_called_once_with({"path": PATH}, client, None) conn.api_request.assert_called_once_with( method="POST", path=PATH, data=BODY, timeout=7.5 @@ -1937,8 +2180,12 @@ def test_test_iam_permissions(self): http = object() client = self._make_one(project=self.PROJECT, credentials=creds, _http=http) conn = client._connection = make_connection(RETURNED) + with mock.patch( + "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" + ) as final_attributes: + client.test_iam_permissions(self.TABLE_REF, PERMISSIONS, timeout=7.5) - client.test_iam_permissions(self.TABLE_REF, PERMISSIONS, timeout=7.5) + final_attributes.assert_called_once_with({"path": PATH}, client, None) conn.api_request.assert_called_once_with( method="POST", path=PATH, data=BODY, timeout=7.5 @@ -2000,11 +2247,23 @@ def test_update_dataset(self): ds.default_table_expiration_ms = EXP ds.labels = LABELS ds.access_entries = [AccessEntry("OWNER", "userByEmail", "phred@example.com")] - ds2 = client.update_dataset( - ds, - ["description", "friendly_name", "location", "labels", "access_entries"], - timeout=7.5, + fields = [ + "description", + "friendly_name", + "location", + "labels", + "access_entries", + ] + + with mock.patch( + "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" + ) as final_attributes: + ds2 = client.update_dataset(ds, fields=fields, timeout=7.5,) + + final_attributes.assert_called_once_with( + {"path": "/%s" % PATH, "fields": fields}, client, None ) + conn.api_request.assert_called_once_with( method="PATCH", data={ @@ -2046,7 +2305,15 @@ def test_update_dataset_w_custom_property(self): dataset = Dataset(DatasetReference(self.PROJECT, self.DS_ID)) dataset._properties["newAlphaProperty"] = "unreleased property" - dataset = client.update_dataset(dataset, ["newAlphaProperty"]) + with mock.patch( + "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" + ) as final_attributes: + dataset = client.update_dataset(dataset, ["newAlphaProperty"]) + + final_attributes.assert_called_once_with( + {"path": path, "fields": ["newAlphaProperty"]}, client, None + ) + conn.api_request.assert_called_once_with( method="PATCH", data={"newAlphaProperty": "unreleased property"}, @@ -2093,9 +2360,14 @@ def test_update_model(self): model.friendly_name = title model.expires = expires model.labels = {"x": "y"} + fields = ["description", "friendly_name", "labels", "expires"] + with mock.patch( + "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" + ) as final_attributes: + updated_model = client.update_model(model, fields, timeout=7.5) - updated_model = client.update_model( - model, ["description", "friendly_name", "labels", "expires"], timeout=7.5 + final_attributes.assert_called_once_with( + {"path": "/%s" % path, "fields": fields}, client, None ) sent = { @@ -2153,11 +2425,22 @@ def test_update_routine(self): routine.language = "SQL" routine.type_ = "SCALAR_FUNCTION" routine._properties["someNewField"] = "someValue" + fields = [ + "arguments", + "language", + "body", + "type_", + "return_type", + "someNewField", + ] - actual_routine = client.update_routine( - routine, - ["arguments", "language", "body", "type_", "return_type", "someNewField"], - timeout=7.5, + with mock.patch( + "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" + ) as final_attributes: + actual_routine = client.update_routine(routine, fields, timeout=7.5,) + + final_attributes.assert_called_once_with( + {"path": routine.path, "fields": fields}, client, None ) # TODO: routineReference isn't needed when the Routines API supports @@ -2177,7 +2460,15 @@ def test_update_routine(self): # ETag becomes If-Match header. routine._properties["etag"] = "im-an-etag" - client.update_routine(routine, []) + with mock.patch( + "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" + ) as final_attributes: + client.update_routine(routine, []) + + final_attributes.assert_called_once_with( + {"path": routine.path, "fields": []}, client, None + ) + req = conn.api_request.call_args self.assertEqual(req[1]["headers"]["If-Match"], "im-an-etag") @@ -2228,9 +2519,15 @@ def test_update_table(self): table.description = description table.friendly_name = title table.labels = {"x": "y"} + fields = ["schema", "description", "friendly_name", "labels"] + with mock.patch( + "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" + ) as final_attributes: + updated_table = client.update_table(table, fields, timeout=7.5) + span_path = "/%s" % path - updated_table = client.update_table( - table, ["schema", "description", "friendly_name", "labels"], timeout=7.5 + final_attributes.assert_called_once_with( + {"path": span_path, "fields": fields}, client, None ) sent = { @@ -2264,7 +2561,15 @@ def test_update_table(self): # ETag becomes If-Match header. table._properties["etag"] = "etag" - client.update_table(table, []) + with mock.patch( + "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" + ) as final_attributes: + client.update_table(table, []) + + final_attributes.assert_called_once_with( + {"path": "/%s" % path, "fields": []}, client, None + ) + req = conn.api_request.call_args self.assertEqual(req[1]["headers"]["If-Match"], "etag") @@ -2284,7 +2589,14 @@ def test_update_table_w_custom_property(self): table = Table(self.TABLE_REF) table._properties["newAlphaProperty"] = "unreleased property" - updated_table = client.update_table(table, ["newAlphaProperty"]) + with mock.patch( + "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" + ) as final_attributes: + updated_table = client.update_table(table, ["newAlphaProperty"]) + + final_attributes.assert_called_once_with( + {"path": "/%s" % path, "fields": ["newAlphaProperty"]}, client, None, + ) conn.api_request.assert_called_once_with( method="PATCH", @@ -2312,8 +2624,14 @@ def test_update_table_only_use_legacy_sql(self): conn = client._connection = make_connection(resource) table = Table(self.TABLE_REF) table.view_use_legacy_sql = True + with mock.patch( + "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" + ) as final_attributes: + updated_table = client.update_table(table, ["view_use_legacy_sql"]) - updated_table = client.update_table(table, ["view_use_legacy_sql"]) + final_attributes.assert_called_once_with( + {"path": "/%s" % path, "fields": ["view_use_legacy_sql"]}, client, None, + ) conn.api_request.assert_called_once_with( method="PATCH", @@ -2376,8 +2694,14 @@ def test_update_table_w_query(self): table.view_query = query table.view_use_legacy_sql = True updated_properties = ["schema", "view_query", "expires", "view_use_legacy_sql"] + with mock.patch( + "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" + ) as final_attributes: + updated_table = client.update_table(table, updated_properties) - updated_table = client.update_table(table, updated_properties) + final_attributes.assert_called_once_with( + {"path": "/%s" % path, "fields": updated_properties}, client, None, + ) self.assertEqual(updated_table.schema, table.schema) self.assertEqual(updated_table.view_query, table.view_query) @@ -2420,17 +2744,30 @@ def test_update_table_w_schema_None(self): creds = _make_credentials() client = self._make_one(project=self.PROJECT, credentials=creds) conn = client._connection = make_connection(resource1, resource2) - table = client.get_table( - # Test with string for table ID - "{}.{}.{}".format( - self.TABLE_REF.project, - self.TABLE_REF.dataset_id, - self.TABLE_REF.table_id, + with mock.patch( + "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" + ) as final_attributes: + table = client.get_table( + # Test with string for table ID + "{}.{}.{}".format( + self.TABLE_REF.project, + self.TABLE_REF.dataset_id, + self.TABLE_REF.table_id, + ) ) - ) + + final_attributes.assert_called_once_with({"path": "/%s" % path}, client, None) + table.schema = None - updated_table = client.update_table(table, ["schema"]) + with mock.patch( + "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" + ) as final_attributes: + updated_table = client.update_table(table, ["schema"]) + + final_attributes.assert_called_once_with( + {"path": "/%s" % path, "fields": ["schema"]}, client, None + ) self.assertEqual(len(conn.api_request.call_args_list), 2) req = conn.api_request.call_args_list[1] @@ -2460,11 +2797,30 @@ def test_update_table_delete_property(self): table = Table(self.TABLE_REF) table.description = description table.friendly_name = title - table2 = client.update_table(table, ["description", "friendly_name"]) + + with mock.patch( + "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" + ) as final_attributes: + table2 = client.update_table(table, ["description", "friendly_name"]) + + final_attributes.assert_called_once_with( + {"path": "/%s" % path, "fields": ["description", "friendly_name"]}, + client, + None, + ) + self.assertEqual(table2.description, table.description) table2.description = None - table3 = client.update_table(table2, ["description"]) + with mock.patch( + "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" + ) as final_attributes: + table3 = client.update_table(table2, ["description"]) + + final_attributes.assert_called_once_with( + {"path": "/%s" % path, "fields": ["description"]}, client, None + ) + self.assertEqual(len(conn.api_request.call_args_list), 2) req = conn.api_request.call_args_list[1] self.assertEqual(req[1]["method"], "PATCH") @@ -2777,7 +3133,15 @@ def test_delete_dataset(self): client = self._make_one(project=self.PROJECT, credentials=creds) conn = client._connection = make_connection(*([{}] * len(datasets))) for arg in datasets: - client.delete_dataset(arg, timeout=7.5) + with mock.patch( + "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" + ) as final_attributes: + client.delete_dataset(arg, timeout=7.5) + + final_attributes.assert_called_once_with( + {"path": "/%s" % PATH}, client, None + ) + conn.api_request.assert_called_with( method="DELETE", path="/%s" % PATH, query_params={}, timeout=7.5 ) @@ -2791,7 +3155,14 @@ def test_delete_dataset_delete_contents(self): conn = client._connection = make_connection({}, {}) ds_ref = DatasetReference(self.PROJECT, self.DS_ID) for arg in (ds_ref, Dataset(ds_ref)): - client.delete_dataset(arg, delete_contents=True) + with mock.patch( + "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" + ) as final_attributes: + client.delete_dataset(arg, delete_contents=True) + + final_attributes.assert_called_once_with( + {"path": "/%s" % PATH, "deleteContents": True}, client, None + ) conn.api_request.assert_called_with( method="DELETE", path="/%s" % PATH, @@ -2817,7 +3188,12 @@ def test_delete_dataset_w_not_found_ok_false(self): ) with self.assertRaises(google.api_core.exceptions.NotFound): - client.delete_dataset(self.DS_ID) + with mock.patch( + "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" + ) as final_attributes: + client.delete_dataset(self.DS_ID) + + final_attributes.assert_called_once_with({"path": path}, client, None) conn.api_request.assert_called_with( method="DELETE", path=path, query_params={}, timeout=None @@ -2832,7 +3208,12 @@ def test_delete_dataset_w_not_found_ok_true(self): google.api_core.exceptions.NotFound("dataset not found") ) - client.delete_dataset(self.DS_ID, not_found_ok=True) + with mock.patch( + "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" + ) as final_attributes: + client.delete_dataset(self.DS_ID, not_found_ok=True) + + final_attributes.assert_called_once_with({"path": path}, client, None) conn.api_request.assert_called_with( method="DELETE", path=path, query_params={}, timeout=None @@ -2858,7 +3239,14 @@ def test_delete_model(self): conn = client._connection = make_connection(*([{}] * len(models))) for arg in models: - client.delete_model(arg, timeout=7.5) + with mock.patch( + "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" + ) as final_attributes: + client.delete_model(arg, timeout=7.5) + + final_attributes.assert_called_once_with( + {"path": "/%s" % path}, client, None + ) conn.api_request.assert_called_with( method="DELETE", path="/%s" % path, timeout=7.5 ) @@ -2895,10 +3283,14 @@ def test_delete_model_w_not_found_ok_true(self): conn = client._connection = make_connection( google.api_core.exceptions.NotFound("model not found") ) + with mock.patch( + "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" + ) as final_attributes: + client.delete_model( + "{}.{}".format(self.DS_ID, self.MODEL_ID), not_found_ok=True + ) - client.delete_model( - "{}.{}".format(self.DS_ID, self.MODEL_ID), not_found_ok=True - ) + final_attributes.assert_called_once_with({"path": path}, client, None) conn.api_request.assert_called_with(method="DELETE", path=path, timeout=None) @@ -2914,15 +3306,20 @@ def test_delete_routine(self): ] creds = _make_credentials() http = object() + path = "/projects/test-routine-project/datasets/test_routines/routines/minimal_routine" client = self._make_one(project=self.PROJECT, credentials=creds, _http=http) conn = client._connection = make_connection(*([{}] * len(routines))) for routine in routines: - client.delete_routine(routine, timeout=7.5) + with mock.patch( + "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" + ) as final_attributes: + client.delete_routine(routine, timeout=7.5) + + final_attributes.assert_called_once_with({"path": path}, client, None) + conn.api_request.assert_called_with( - method="DELETE", - path="/projects/test-routine-project/datasets/test_routines/routines/minimal_routine", - timeout=7.5, + method="DELETE", path=path, timeout=7.5, ) def test_delete_routine_w_wrong_type(self): @@ -2938,14 +3335,18 @@ def test_delete_routine_w_not_found_ok_false(self): conn = client._connection = make_connection( google.api_core.exceptions.NotFound("routine not found") ) + path = "/projects/routines-project/datasets/test_routines/routines/test_routine" with self.assertRaises(google.api_core.exceptions.NotFound): - client.delete_routine("routines-project.test_routines.test_routine") + with mock.patch( + "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" + ) as final_attributes: + client.delete_routine("routines-project.test_routines.test_routine") + + final_attributes.assert_called_once_with({"path": path}, client, None) conn.api_request.assert_called_with( - method="DELETE", - path="/projects/routines-project/datasets/test_routines/routines/test_routine", - timeout=None, + method="DELETE", path=path, timeout=None, ) def test_delete_routine_w_not_found_ok_true(self): @@ -2955,15 +3356,19 @@ def test_delete_routine_w_not_found_ok_true(self): conn = client._connection = make_connection( google.api_core.exceptions.NotFound("routine not found") ) + path = "/projects/routines-project/datasets/test_routines/routines/test_routine" - client.delete_routine( - "routines-project.test_routines.test_routine", not_found_ok=True - ) + with mock.patch( + "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" + ) as final_attributes: + client.delete_routine( + "routines-project.test_routines.test_routine", not_found_ok=True + ) + + final_attributes.assert_called_once_with({"path": path}, client, None) conn.api_request.assert_called_with( - method="DELETE", - path="/projects/routines-project/datasets/test_routines/routines/test_routine", - timeout=None, + method="DELETE", path=path, timeout=None, ) def test_delete_table(self): @@ -2989,7 +3394,15 @@ def test_delete_table(self): conn = client._connection = make_connection(*([{}] * len(tables))) for arg in tables: - client.delete_table(arg, timeout=7.5) + with mock.patch( + "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" + ) as final_attributes: + client.delete_table(arg, timeout=7.5) + + final_attributes.assert_called_once_with( + {"path": "/%s" % path}, client, None + ) + conn.api_request.assert_called_with( method="DELETE", path="/%s" % path, timeout=7.5 ) @@ -3012,7 +3425,12 @@ def test_delete_table_w_not_found_ok_false(self): ) with self.assertRaises(google.api_core.exceptions.NotFound): - client.delete_table("{}.{}".format(self.DS_ID, self.TABLE_ID)) + with mock.patch( + "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" + ) as final_attributes: + client.delete_table("{}.{}".format(self.DS_ID, self.TABLE_ID)) + + final_attributes.assert_called_once_with({"path": path}, client, None) conn.api_request.assert_called_with(method="DELETE", path=path, timeout=None) @@ -3027,9 +3445,14 @@ def test_delete_table_w_not_found_ok_true(self): google.api_core.exceptions.NotFound("table not found") ) - client.delete_table( - "{}.{}".format(self.DS_ID, self.TABLE_ID), not_found_ok=True - ) + with mock.patch( + "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" + ) as final_attributes: + client.delete_table( + "{}.{}".format(self.DS_ID, self.TABLE_ID), not_found_ok=True + ) + + final_attributes.assert_called_once_with({"path": path}, client, None) conn.api_request.assert_called_with(method="DELETE", path=path, timeout=None) diff --git a/packages/google-cloud-bigquery/tests/unit/test_job.py b/packages/google-cloud-bigquery/tests/unit/test_job.py index 9cd3631e1a84..d5497ffa8d0e 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_job.py +++ b/packages/google-cloud-bigquery/tests/unit/test_job.py @@ -620,15 +620,16 @@ def test__begin_defaults(self): builder.return_value = resource call_api = job._client._call_api = mock.Mock() call_api.return_value = resource + path = "/projects/{}/jobs".format(self.PROJECT) + with mock.patch( + "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" + ) as final_attributes: + job._begin() - job._begin() + final_attributes.assert_called_with({"path": path}, None, job) call_api.assert_called_once_with( - DEFAULT_RETRY, - method="POST", - path="/projects/{}/jobs".format(self.PROJECT), - data=resource, - timeout=None, + DEFAULT_RETRY, method="POST", path=path, data=resource, timeout=None, ) self.assertEqual(job._properties, resource) @@ -651,15 +652,16 @@ def test__begin_explicit(self): call_api = client._call_api = mock.Mock() call_api.return_value = resource retry = DEFAULT_RETRY.with_deadline(1) + path = "/projects/{}/jobs".format(self.PROJECT) + with mock.patch( + "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" + ) as final_attributes: + job._begin(client=client, retry=retry, timeout=7.5) - job._begin(client=client, retry=retry, timeout=7.5) + final_attributes.assert_called_with({"path": path}, None, job) call_api.assert_called_once_with( - retry, - method="POST", - path="/projects/{}/jobs".format(self.PROJECT), - data=resource, - timeout=7.5, + retry, method="POST", path=path, data=resource, timeout=7.5, ) self.assertEqual(job._properties, resource) @@ -672,7 +674,16 @@ def test_exists_defaults_miss(self): call_api = job._client._call_api = mock.Mock() call_api.side_effect = NotFound("testing") - self.assertFalse(job.exists()) + with mock.patch( + "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" + ) as final_attributes: + self.assertFalse(job.exists()) + + final_attributes.assert_called_with( + {"path": "/projects/{}/jobs/{}".format(self.PROJECT, self.JOB_ID)}, + None, + job, + ) call_api.assert_called_once_with( DEFAULT_RETRY, @@ -699,8 +710,16 @@ def test_exists_explicit_hit(self): call_api = client._call_api = mock.Mock() call_api.return_value = resource retry = DEFAULT_RETRY.with_deadline(1) + with mock.patch( + "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" + ) as final_attributes: + self.assertTrue(job.exists(client=client, retry=retry)) - self.assertTrue(job.exists(client=client, retry=retry)) + final_attributes.assert_called_with( + {"path": "/projects/{}/jobs/{}".format(self.PROJECT, self.JOB_ID)}, + None, + job, + ) call_api.assert_called_once_with( retry, @@ -716,8 +735,12 @@ def test_exists_w_timeout(self): PATH = "/projects/{}/jobs/{}".format(self.PROJECT, self.JOB_ID) job = self._set_properties_job() call_api = job._client._call_api = mock.Mock() + with mock.patch( + "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" + ) as final_attributes: + job.exists(timeout=7.5) - job.exists(timeout=7.5) + final_attributes.assert_called_with({"path": PATH}, None, job) call_api.assert_called_once_with( DEFAULT_RETRY, @@ -742,8 +765,16 @@ def test_reload_defaults(self): job._properties["jobReference"]["location"] = self.LOCATION call_api = job._client._call_api = mock.Mock() call_api.return_value = resource + with mock.patch( + "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" + ) as final_attributes: + job.reload() - job.reload() + final_attributes.assert_called_with( + {"path": "/projects/{}/jobs/{}".format(self.PROJECT, self.JOB_ID)}, + None, + job, + ) call_api.assert_called_once_with( DEFAULT_RETRY, @@ -771,8 +802,16 @@ def test_reload_explicit(self): call_api = client._call_api = mock.Mock() call_api.return_value = resource retry = DEFAULT_RETRY.with_deadline(1) + with mock.patch( + "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" + ) as final_attributes: + job.reload(client=client, retry=retry, timeout=4.2) - job.reload(client=client, retry=retry, timeout=4.2) + final_attributes.assert_called_with( + {"path": "/projects/{}/jobs/{}".format(self.PROJECT, self.JOB_ID)}, + None, + job, + ) call_api.assert_called_once_with( retry, @@ -796,8 +835,16 @@ def test_cancel_defaults(self): job = self._set_properties_job() job._properties["jobReference"]["location"] = self.LOCATION connection = job._client._connection = _make_connection(response) + with mock.patch( + "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" + ) as final_attributes: + self.assertTrue(job.cancel()) - self.assertTrue(job.cancel()) + final_attributes.assert_called_with( + {"path": "/projects/{}/jobs/{}/cancel".format(self.PROJECT, self.JOB_ID)}, + None, + job, + ) connection.api_request.assert_called_once_with( method="POST", @@ -821,8 +868,16 @@ def test_cancel_explicit(self): job = self._set_properties_job() client = _make_client(project=other_project) connection = client._connection = _make_connection(response) + with mock.patch( + "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" + ) as final_attributes: + self.assertTrue(job.cancel(client=client, timeout=7.5)) - self.assertTrue(job.cancel(client=client, timeout=7.5)) + final_attributes.assert_called_with( + {"path": "/projects/{}/jobs/{}/cancel".format(self.PROJECT, self.JOB_ID)}, + None, + job, + ) connection.api_request.assert_called_once_with( method="POST", @@ -855,7 +910,12 @@ def test_cancel_w_custom_retry(self): ) with api_request_patcher as fake_api_request: - result = job.cancel(retry=retry, timeout=7.5) + with mock.patch( + "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" + ) as final_attributes: + result = job.cancel(retry=retry, timeout=7.5) + + final_attributes.assert_called_with({"path": api_path}, None, job) self.assertTrue(result) self.assertEqual(job._properties, resource) @@ -2343,12 +2403,17 @@ def test_begin_w_bound_client(self): conn = _make_connection(RESOURCE) client = _make_client(project=self.PROJECT, connection=conn) job = self._make_one(self.JOB_ID, [self.SOURCE1], self.TABLE_REF, client) + path = "/projects/{}/jobs".format(self.PROJECT) + with mock.patch( + "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" + ) as final_attributes: + job._begin() - job._begin() + final_attributes.assert_called_with({"path": path}, None, job) conn.api_request.assert_called_once_with( method="POST", - path="/projects/{}/jobs".format(self.PROJECT), + path=path, data={ "jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID}, "configuration": { @@ -2384,7 +2449,12 @@ def test_begin_w_autodetect(self): job = self._make_one( self.JOB_ID, [self.SOURCE1], self.TABLE_REF, client, config ) - job._begin() + with mock.patch( + "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" + ) as final_attributes: + job._begin() + + final_attributes.assert_called_with({"path": path}, None, job) sent = { "jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID}, @@ -2478,8 +2548,12 @@ def test_begin_w_alternate_client(self): config.use_avro_logical_types = True config.write_disposition = WriteDisposition.WRITE_TRUNCATE config.schema_update_options = [SchemaUpdateOption.ALLOW_FIELD_ADDITION] + with mock.patch( + "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" + ) as final_attributes: + job._begin(client=client2) - job._begin(client=client2) + final_attributes.assert_called_with({"path": PATH}, None, job) conn1.api_request.assert_not_called() self.assertEqual(len(conn2.api_request.call_args_list), 1) @@ -2504,8 +2578,13 @@ def test_begin_w_job_reference(self): conn = _make_connection(resource) client = _make_client(project=self.PROJECT, connection=conn) load_job = self._make_one(job_ref, [self.SOURCE1], self.TABLE_REF, client) - - load_job._begin() + with mock.patch( + "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" + ) as final_attributes: + load_job._begin() + final_attributes.assert_called_with( + {"path": "/projects/alternative-project/jobs"}, None, load_job + ) conn.api_request.assert_called_once() _, request = conn.api_request.call_args @@ -2522,8 +2601,16 @@ def test_exists_miss_w_bound_client(self): conn = _make_connection() client = _make_client(project=self.PROJECT, connection=conn) job = self._make_one(self.JOB_ID, [self.SOURCE1], self.TABLE_REF, client) + with mock.patch( + "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" + ) as final_attributes: + self.assertFalse(job.exists()) - self.assertFalse(job.exists()) + final_attributes.assert_called_with( + {"path": "/projects/{}/jobs/{}".format(self.PROJECT, self.JOB_ID)}, + None, + job, + ) conn.api_request.assert_called_once_with( method="GET", path=PATH, query_params={"fields": "id"}, timeout=None @@ -2536,8 +2623,16 @@ def test_exists_hit_w_alternate_client(self): conn2 = _make_connection({}) client2 = _make_client(project=self.PROJECT, connection=conn2) job = self._make_one(self.JOB_ID, [self.SOURCE1], self.TABLE_REF, client1) + with mock.patch( + "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" + ) as final_attributes: + self.assertTrue(job.exists(client=client2)) - self.assertTrue(job.exists(client=client2)) + final_attributes.assert_called_with( + {"path": "/projects/{}/jobs/{}".format(self.PROJECT, self.JOB_ID)}, + None, + job, + ) conn1.api_request.assert_not_called() conn2.api_request.assert_called_once_with( @@ -2551,8 +2646,14 @@ def test_exists_miss_w_job_reference(self): conn = _make_connection() client = _make_client(project=self.PROJECT, connection=conn) load_job = self._make_one(job_ref, [self.SOURCE1], self.TABLE_REF, client) + with mock.patch( + "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" + ) as final_attributes: + self.assertFalse(load_job.exists()) - self.assertFalse(load_job.exists()) + final_attributes.assert_called_with( + {"path": "/projects/other-project/jobs/my-job-id"}, None, load_job + ) conn.api_request.assert_called_once_with( method="GET", @@ -2567,8 +2668,12 @@ def test_reload_w_bound_client(self): conn = _make_connection(RESOURCE) client = _make_client(project=self.PROJECT, connection=conn) job = self._make_one(self.JOB_ID, [self.SOURCE1], self.TABLE_REF, client) + with mock.patch( + "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" + ) as final_attributes: + job.reload() - job.reload() + final_attributes.assert_called_with({"path": PATH}, None, job) conn.api_request.assert_called_once_with( method="GET", path=PATH, query_params={}, timeout=None @@ -2583,8 +2688,12 @@ def test_reload_w_alternate_client(self): conn2 = _make_connection(RESOURCE) client2 = _make_client(project=self.PROJECT, connection=conn2) job = self._make_one(self.JOB_ID, [self.SOURCE1], self.TABLE_REF, client1) + with mock.patch( + "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" + ) as final_attributes: + job.reload(client=client2) - job.reload(client=client2) + final_attributes.assert_called_with({"path": PATH}, None, job) conn1.api_request.assert_not_called() conn2.api_request.assert_called_once_with( @@ -2602,8 +2711,16 @@ def test_reload_w_job_reference(self): conn = _make_connection(resource) client = _make_client(project=self.PROJECT, connection=conn) load_job = self._make_one(job_ref, [self.SOURCE1], self.TABLE_REF, client) + with mock.patch( + "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" + ) as final_attributes: + load_job.reload() - load_job.reload() + final_attributes.assert_called_with( + {"path": "/projects/alternative-project/jobs/{}".format(self.JOB_ID)}, + None, + load_job, + ) conn.api_request.assert_called_once_with( method="GET", @@ -2619,8 +2736,12 @@ def test_cancel_w_bound_client(self): conn = _make_connection(RESPONSE) client = _make_client(project=self.PROJECT, connection=conn) job = self._make_one(self.JOB_ID, [self.SOURCE1], self.TABLE_REF, client) + with mock.patch( + "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" + ) as final_attributes: + job.cancel() - job.cancel() + final_attributes.assert_called_with({"path": PATH}, None, job) conn.api_request.assert_called_once_with( method="POST", path=PATH, query_params={}, timeout=None, @@ -2636,8 +2757,12 @@ def test_cancel_w_alternate_client(self): conn2 = _make_connection(RESPONSE) client2 = _make_client(project=self.PROJECT, connection=conn2) job = self._make_one(self.JOB_ID, [self.SOURCE1], self.TABLE_REF, client1) + with mock.patch( + "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" + ) as final_attributes: + job.cancel(client=client2) - job.cancel(client=client2) + final_attributes.assert_called_with({"path": PATH}, None, job) conn1.api_request.assert_not_called() conn2.api_request.assert_called_once_with( @@ -2655,9 +2780,20 @@ def test_cancel_w_job_reference(self): conn = _make_connection({"job": resource}) client = _make_client(project=self.PROJECT, connection=conn) load_job = self._make_one(job_ref, [self.SOURCE1], self.TABLE_REF, client) + with mock.patch( + "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" + ) as final_attributes: + load_job.cancel() - load_job.cancel() - + final_attributes.assert_called_with( + { + "path": "/projects/alternative-project/jobs/{}/cancel".format( + self.JOB_ID + ) + }, + None, + load_job, + ) conn.api_request.assert_called_once_with( method="POST", path="/projects/alternative-project/jobs/{}/cancel".format(self.JOB_ID), @@ -2952,8 +3088,12 @@ def test_begin_w_bound_client(self): source = self._table_ref(self.SOURCE_TABLE) destination = self._table_ref(self.DESTINATION_TABLE) job = self._make_one(self.JOB_ID, [source], destination, client) + with mock.patch( + "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" + ) as final_attributes: + job._begin() - job._begin() + final_attributes.assert_called_with({"path": PATH}, None, job) conn.api_request.assert_called_once_with( method="POST", @@ -3016,7 +3156,12 @@ def test_begin_w_alternate_client(self): config.create_disposition = CreateDisposition.CREATE_NEVER config.write_disposition = WriteDisposition.WRITE_TRUNCATE job = self._make_one(self.JOB_ID, [source], destination, client1, config) - job._begin(client=client2) + with mock.patch( + "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" + ) as final_attributes: + job._begin(client=client2) + + final_attributes.assert_called_with({"path": PATH}, None, job) conn1.api_request.assert_not_called() conn2.api_request.assert_called_once_with( @@ -3038,8 +3183,12 @@ def test_exists_miss_w_bound_client(self): source = self._table_ref(self.SOURCE_TABLE) destination = self._table_ref(self.DESTINATION_TABLE) job = self._make_one(self.JOB_ID, [source], destination, client) + with mock.patch( + "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" + ) as final_attributes: + self.assertFalse(job.exists()) - self.assertFalse(job.exists()) + final_attributes.assert_called_with({"path": PATH}, None, job) conn.api_request.assert_called_once_with( method="GET", path=PATH, query_params={"fields": "id"}, timeout=None, @@ -3054,8 +3203,12 @@ def test_exists_hit_w_alternate_client(self): source = self._table_ref(self.SOURCE_TABLE) destination = self._table_ref(self.DESTINATION_TABLE) job = self._make_one(self.JOB_ID, [source], destination, client1) + with mock.patch( + "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" + ) as final_attributes: + self.assertTrue(job.exists(client=client2)) - self.assertTrue(job.exists(client=client2)) + final_attributes.assert_called_with({"path": PATH}, None, job) conn1.api_request.assert_not_called() conn2.api_request.assert_called_once_with( @@ -3070,8 +3223,12 @@ def test_reload_w_bound_client(self): source = self._table_ref(self.SOURCE_TABLE) destination = self._table_ref(self.DESTINATION_TABLE) job = self._make_one(self.JOB_ID, [source], destination, client) + with mock.patch( + "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" + ) as final_attributes: + job.reload() - job.reload() + final_attributes.assert_called_with({"path": PATH}, None, job) conn.api_request.assert_called_once_with( method="GET", path=PATH, query_params={}, timeout=None @@ -3088,8 +3245,12 @@ def test_reload_w_alternate_client(self): source = self._table_ref(self.SOURCE_TABLE) destination = self._table_ref(self.DESTINATION_TABLE) job = self._make_one(self.JOB_ID, [source], destination, client1) + with mock.patch( + "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" + ) as final_attributes: + job.reload(client=client2) - job.reload(client=client2) + final_attributes.assert_called_with({"path": PATH}, None, job) conn1.api_request.assert_not_called() conn2.api_request.assert_called_once_with( @@ -3349,8 +3510,12 @@ def test_begin_w_bound_client(self): source_dataset = DatasetReference(self.PROJECT, self.DS_ID) source = source_dataset.table(self.SOURCE_TABLE) job = self._make_one(self.JOB_ID, source, [self.DESTINATION_URI], client) + with mock.patch( + "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" + ) as final_attributes: + job._begin() - job._begin() + final_attributes.assert_called_with({"path": PATH}, None, job) conn.api_request.assert_called_once_with( method="POST", @@ -3407,8 +3572,12 @@ def test_begin_w_alternate_client(self): job = self._make_one( self.JOB_ID, source, [self.DESTINATION_URI], client1, config ) + with mock.patch( + "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" + ) as final_attributes: + job._begin(client=client2) - job._begin(client=client2) + final_attributes.assert_called_with({"path": PATH}, None, job) conn1.api_request.assert_not_called() conn2.api_request.assert_called_once_with( @@ -3429,8 +3598,12 @@ def test_exists_miss_w_bound_client(self): job = self._make_one( self.JOB_ID, self.TABLE_REF, [self.DESTINATION_URI], client ) + with mock.patch( + "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" + ) as final_attributes: + self.assertFalse(job.exists()) - self.assertFalse(job.exists()) + final_attributes.assert_called_with({"path": PATH}, None, job) conn.api_request.assert_called_once_with( method="GET", path=PATH, query_params={"fields": "id"}, timeout=None, @@ -3445,8 +3618,12 @@ def test_exists_hit_w_alternate_client(self): job = self._make_one( self.JOB_ID, self.TABLE_REF, [self.DESTINATION_URI], client1 ) + with mock.patch( + "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" + ) as final_attributes: + self.assertTrue(job.exists(client=client2)) - self.assertTrue(job.exists(client=client2)) + final_attributes.assert_called_with({"path": PATH}, None, job) conn1.api_request.assert_not_called() conn2.api_request.assert_called_once_with( @@ -3463,9 +3640,12 @@ def test_reload_w_bound_client(self): source_dataset = DatasetReference(self.PROJECT, self.DS_ID) source = source_dataset.table(self.SOURCE_TABLE) job = self._make_one(self.JOB_ID, source, [self.DESTINATION_URI], client) + with mock.patch( + "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" + ) as final_attributes: + job.reload() - job.reload() - + final_attributes.assert_called_with({"path": PATH}, None, job) conn.api_request.assert_called_once_with( method="GET", path=PATH, query_params={}, timeout=None ) @@ -3483,8 +3663,12 @@ def test_reload_w_alternate_client(self): source_dataset = DatasetReference(self.PROJECT, self.DS_ID) source = source_dataset.table(self.SOURCE_TABLE) job = self._make_one(self.JOB_ID, source, [self.DESTINATION_URI], client1) + with mock.patch( + "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" + ) as final_attributes: + job.reload(client=client2) - job.reload(client=client2) + final_attributes.assert_called_with({"path": PATH}, None, job) conn1.api_request.assert_not_called() conn2.api_request.assert_called_once_with( @@ -4823,8 +5007,12 @@ def test__begin_w_timeout(self): conn = _make_connection(RESOURCE) client = _make_client(project=self.PROJECT, connection=conn) job = self._make_one(self.JOB_ID, self.QUERY, client) + with mock.patch( + "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" + ) as final_attributes: + job._begin(timeout=7.5) - job._begin(timeout=7.5) + final_attributes.assert_called_with({"path": PATH}, None, job) conn.api_request.assert_called_once_with( method="POST", @@ -4856,8 +5044,12 @@ def test_begin_w_bound_client(self): config = QueryJobConfig() config.default_dataset = DatasetReference(self.PROJECT, DS_ID) job = self._make_one(self.JOB_ID, self.QUERY, client, job_config=config) + with mock.patch( + "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" + ) as final_attributes: + job._begin() - job._begin() + final_attributes.assert_called_with({"path": PATH}, None, job) self.assertIsNone(job.default_dataset) self.assertEqual(job.udf_resources, []) @@ -4936,8 +5128,12 @@ def test_begin_w_alternate_client(self): config.maximum_bytes_billed = 123456 config.schema_update_options = [SchemaUpdateOption.ALLOW_FIELD_RELAXATION] job = self._make_one(self.JOB_ID, self.QUERY, client1, job_config=config) + with mock.patch( + "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" + ) as final_attributes: + job._begin(client=client2) - job._begin(client=client2) + final_attributes.assert_called_with({"path": PATH}, None, job) conn1.api_request.assert_not_called() conn2.api_request.assert_called_once_with( @@ -4978,8 +5174,12 @@ def test_begin_w_udf(self): config.udf_resources = udf_resources config.use_legacy_sql = True job = self._make_one(self.JOB_ID, self.QUERY, client, job_config=config) + with mock.patch( + "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" + ) as final_attributes: + job._begin() - job._begin() + final_attributes.assert_called_with({"path": PATH}, None, job) self.assertEqual(job.udf_resources, udf_resources) conn.api_request.assert_called_once_with( @@ -5028,8 +5228,12 @@ def test_begin_w_named_query_parameter(self): jconfig = QueryJobConfig() jconfig.query_parameters = query_parameters job = self._make_one(self.JOB_ID, self.QUERY, client, job_config=jconfig) + with mock.patch( + "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" + ) as final_attributes: + job._begin() - job._begin() + final_attributes.assert_called_with({"path": PATH}, None, job) self.assertEqual(job.query_parameters, query_parameters) conn.api_request.assert_called_once_with( @@ -5072,8 +5276,12 @@ def test_begin_w_positional_query_parameter(self): jconfig = QueryJobConfig() jconfig.query_parameters = query_parameters job = self._make_one(self.JOB_ID, self.QUERY, client, job_config=jconfig) + with mock.patch( + "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" + ) as final_attributes: + job._begin() - job._begin() + final_attributes.assert_called_with({"path": PATH}, None, job) self.assertEqual(job.query_parameters, query_parameters) conn.api_request.assert_called_once_with( @@ -5148,8 +5356,12 @@ def test_begin_w_table_defs(self): config.table_definitions = {bt_table: bt_config, csv_table: csv_config} config.use_legacy_sql = True job = self._make_one(self.JOB_ID, self.QUERY, client, job_config=config) + with mock.patch( + "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" + ) as final_attributes: + job._begin() - job._begin() + final_attributes.assert_called_with({"path": PATH}, None, job) conn.api_request.assert_called_once_with( method="POST", @@ -5187,8 +5399,12 @@ def test_dry_run_query(self): config = QueryJobConfig() config.dry_run = True job = self._make_one(self.JOB_ID, self.QUERY, client, job_config=config) + with mock.patch( + "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" + ) as final_attributes: + job._begin() - job._begin() + final_attributes.assert_called_with({"path": PATH}, None, job) self.assertEqual(job.udf_resources, []) conn.api_request.assert_called_once_with( method="POST", @@ -5209,8 +5425,12 @@ def test_exists_miss_w_bound_client(self): conn = _make_connection() client = _make_client(project=self.PROJECT, connection=conn) job = self._make_one(self.JOB_ID, self.QUERY, client) + with mock.patch( + "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" + ) as final_attributes: + self.assertFalse(job.exists()) - self.assertFalse(job.exists()) + final_attributes.assert_called_with({"path": PATH}, None, job) conn.api_request.assert_called_once_with( method="GET", path=PATH, query_params={"fields": "id"}, timeout=None @@ -5223,8 +5443,12 @@ def test_exists_hit_w_alternate_client(self): conn2 = _make_connection({}) client2 = _make_client(project=self.PROJECT, connection=conn2) job = self._make_one(self.JOB_ID, self.QUERY, client1) + with mock.patch( + "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" + ) as final_attributes: + self.assertTrue(job.exists(client=client2)) - self.assertTrue(job.exists(client=client2)) + final_attributes.assert_called_with({"path": PATH}, None, job) conn1.api_request.assert_not_called() conn2.api_request.assert_called_once_with( @@ -5246,8 +5470,12 @@ def test_reload_w_bound_client(self): config = QueryJobConfig() config.destination = table_ref job = self._make_one(self.JOB_ID, None, client, job_config=config) + with mock.patch( + "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" + ) as final_attributes: + job.reload() - job.reload() + final_attributes.assert_called_with({"path": PATH}, None, job) self.assertNotEqual(job.destination, table_ref) @@ -5272,8 +5500,12 @@ def test_reload_w_alternate_client(self): conn2 = _make_connection(RESOURCE) client2 = _make_client(project=self.PROJECT, connection=conn2) job = self._make_one(self.JOB_ID, self.QUERY, client1) + with mock.patch( + "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" + ) as final_attributes: + job.reload(client=client2) - job.reload(client=client2) + final_attributes.assert_called_with({"path": PATH}, None, job) conn1.api_request.assert_not_called() conn2.api_request.assert_called_once_with( @@ -5296,8 +5528,12 @@ def test_reload_w_timeout(self): config = QueryJobConfig() config.destination = table_ref job = self._make_one(self.JOB_ID, None, client, job_config=config) + with mock.patch( + "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" + ) as final_attributes: + job.reload(timeout=4.2) - job.reload(timeout=4.2) + final_attributes.assert_called_with({"path": PATH}, None, job) self.assertNotEqual(job.destination, table_ref) diff --git a/packages/google-cloud-bigquery/tests/unit/test_opentelemetry_tracing.py b/packages/google-cloud-bigquery/tests/unit/test_opentelemetry_tracing.py new file mode 100644 index 000000000000..1c35b0a82443 --- /dev/null +++ b/packages/google-cloud-bigquery/tests/unit/test_opentelemetry_tracing.py @@ -0,0 +1,212 @@ +# Copyright 2020 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import datetime +import sys + +import mock + +try: + import opentelemetry + from opentelemetry import trace + from opentelemetry.sdk.trace import TracerProvider + from opentelemetry.sdk.trace.export import SimpleExportSpanProcessor + from opentelemetry.sdk.trace.export.in_memory_span_exporter import ( + InMemorySpanExporter, + ) +except ImportError: + opentelemetry = None +import pytest +from six.moves import reload_module + +from google.cloud.bigquery import opentelemetry_tracing + +TEST_SPAN_NAME = "bar" +TEST_SPAN_ATTRIBUTES = {"foo": "baz"} + + +@pytest.mark.skipif(opentelemetry is None, reason="Require `opentelemetry`") +@pytest.fixture +def setup(): + reload_module(opentelemetry_tracing) + tracer_provider = TracerProvider() + memory_exporter = InMemorySpanExporter() + span_processor = SimpleExportSpanProcessor(memory_exporter) + tracer_provider.add_span_processor(span_processor) + trace.set_tracer_provider(tracer_provider) + yield memory_exporter + + +@pytest.mark.skipif(opentelemetry is None, reason="Require `opentelemetry`") +def test_opentelemetry_not_installed(setup, monkeypatch): + monkeypatch.setitem(sys.modules, "opentelemetry", None) + reload_module(opentelemetry_tracing) + with opentelemetry_tracing.create_span("No-op for opentelemetry") as span: + assert span is None + + +@pytest.mark.skipif(opentelemetry is None, reason="Require `opentelemetry`") +def test_opentelemetry_success(setup): + expected_attributes = {"foo": "baz", "db.system": "BigQuery"} + + with opentelemetry_tracing.create_span( + TEST_SPAN_NAME, attributes=TEST_SPAN_ATTRIBUTES, client=None, job_ref=None + ) as span: + assert span is not None + assert span.name == TEST_SPAN_NAME + assert span.attributes == expected_attributes + + +@pytest.mark.skipif(opentelemetry is None, reason="Require `opentelemetry`") +def test_default_client_attributes(setup): + expected_attributes = { + "foo": "baz", + "db.system": "BigQuery", + "db.name": "test_project", + "location": "test_location", + } + with mock.patch("google.cloud.bigquery.client.Client") as test_client: + test_client.project = "test_project" + test_client.location = "test_location" + with opentelemetry_tracing.create_span( + TEST_SPAN_NAME, attributes=TEST_SPAN_ATTRIBUTES, client=test_client + ) as span: + assert span is not None + assert span.name == TEST_SPAN_NAME + assert span.attributes == expected_attributes + + +@pytest.mark.skipif(opentelemetry is None, reason="Require `opentelemetry`") +def test_default_job_attributes(setup): + import google.cloud._helpers + + time_created = datetime.datetime( + 2010, 5, 19, 16, 0, 0, tzinfo=google.cloud._helpers.UTC + ) + started_time = datetime.datetime( + 2011, 10, 1, 16, 0, 0, tzinfo=google.cloud._helpers.UTC + ) + ended_time = datetime.datetime( + 2011, 10, 2, 16, 0, 0, tzinfo=google.cloud._helpers.UTC + ) + error_result = [ + {"errorResult1": "some_error_result1", "errorResult2": "some_error_result2"} + ] + + expected_attributes = { + "db.system": "BigQuery", + "db.name": "test_project_id", + "location": "test_location", + "num_child_jobs": "0", + "job_id": "test_job_id", + "foo": "baz", + "parent_job_id": "parent_job_id", + "timeCreated": time_created.isoformat(), + "timeStarted": started_time.isoformat(), + "timeEnded": ended_time.isoformat(), + "hasErrors": True, + "state": "some_job_state", + } + with mock.patch("google.cloud.bigquery.job._AsyncJob") as test_job_ref: + test_job_ref.job_id = "test_job_id" + test_job_ref.location = "test_location" + test_job_ref.project = "test_project_id" + test_job_ref.num_child_jobs = "0" + test_job_ref.parent_job_id = "parent_job_id" + test_job_ref.created = time_created + test_job_ref.started = started_time + test_job_ref.ended = ended_time + test_job_ref.error_result = error_result + test_job_ref.state = "some_job_state" + + with opentelemetry_tracing.create_span( + TEST_SPAN_NAME, attributes=TEST_SPAN_ATTRIBUTES, job_ref=test_job_ref + ) as span: + assert span is not None + assert span.name == TEST_SPAN_NAME + assert span.attributes == expected_attributes + + +@pytest.mark.skipif(opentelemetry is None, reason="Require `opentelemetry`") +def test_default_no_data_leakage(setup): + import google.auth.credentials + from google.cloud.bigquery import client + from google.cloud.bigquery import job + + mock_credentials = mock.Mock(spec=google.auth.credentials.Credentials) + test_client = client.Client( + project="test_project", credentials=mock_credentials, location="test_location" + ) + + expected_attributes = { + "foo": "baz", + "db.system": "BigQuery", + "db.name": "test_project", + "location": "test_location", + } + with opentelemetry_tracing.create_span( + TEST_SPAN_NAME, attributes=TEST_SPAN_ATTRIBUTES, client=test_client + ) as span: + assert span.name == TEST_SPAN_NAME + assert span.attributes == expected_attributes + + test_job_reference = job._JobReference( + job_id="test_job_id", project="test_project_id", location="test_location" + ) + test_client = client.Client( + project="test_project", credentials=mock_credentials, location="test_location" + ) + test_job = job._AsyncJob(job_id=test_job_reference, client=test_client) + + expected_attributes = { + "db.system": "BigQuery", + "db.name": "test_project_id", + "location": "test_location", + "num_child_jobs": 0, + "job_id": "test_job_id", + "foo": "baz", + "hasErrors": False, + } + + with opentelemetry_tracing.create_span( + TEST_SPAN_NAME, attributes=TEST_SPAN_ATTRIBUTES, job_ref=test_job + ) as span: + assert span.name == TEST_SPAN_NAME + assert span.attributes == expected_attributes + + +@pytest.mark.skipif(opentelemetry is None, reason="Require `opentelemetry`") +def test_span_creation_error(setup): + import google.auth.credentials + from google.cloud.bigquery import client + from google.api_core.exceptions import GoogleAPICallError, InvalidArgument + + mock_credentials = mock.Mock(spec=google.auth.credentials.Credentials) + test_client = client.Client( + project="test_project", credentials=mock_credentials, location="test_location" + ) + + expected_attributes = { + "foo": "baz", + "db.system": "BigQuery", + "db.name": "test_project", + "location": "test_location", + } + with pytest.raises(GoogleAPICallError): + with opentelemetry_tracing.create_span( + TEST_SPAN_NAME, attributes=TEST_SPAN_ATTRIBUTES, client=test_client + ) as span: + assert span.name == TEST_SPAN_NAME + assert span.attributes == expected_attributes + raise InvalidArgument("test_error") From 546e80572006bdc47320f896502bbf1efa5418a3 Mon Sep 17 00:00:00 2001 From: Aravin <34178459+aravinsiva@users.noreply.github.com> Date: Mon, 24 Aug 2020 12:49:54 -0400 Subject: [PATCH 0923/2016] fix: fix dependency issue in fastavro (#241) * fixing dependency version to last version that supports Python 2.7 * Update noxfile.py Removing fastavro dependency. Co-authored-by: Tim Swast Co-authored-by: Tim Swast --- packages/google-cloud-bigquery/noxfile.py | 1 - 1 file changed, 1 deletion(-) diff --git a/packages/google-cloud-bigquery/noxfile.py b/packages/google-cloud-bigquery/noxfile.py index 5db14c31f349..a27a5939e339 100644 --- a/packages/google-cloud-bigquery/noxfile.py +++ b/packages/google-cloud-bigquery/noxfile.py @@ -92,7 +92,6 @@ def system(session): # Install all test dependencies, then install local packages in place. session.install("mock", "pytest", "psutil", "google-cloud-testutils") session.install("google-cloud-storage") - session.install("fastavro") session.install("-e", ".[all]") # IPython does not support Python 2 after version 5.x From 56b08624a1d32ddbf887dc3bdb9d2a676b37c9c8 Mon Sep 17 00:00:00 2001 From: Aravin <34178459+aravinsiva@users.noreply.github.com> Date: Mon, 24 Aug 2020 14:31:18 -0400 Subject: [PATCH 0924/2016] feat: add instrumentation to list methods (#239) * testing first trace export * instrumention client.py * instrumenting job.py and adding documentation * reconfiguring imports * quick cleanup of unused variable * adding more attributes in module and limiting complexity of instrumentation * adding tests, nox and correct attribute additions in client & job * adding tests, nox and correct attribute additions in client & job (left out of last commit) * linting * reformatting noxfile.[y * addressing suggested changes * adding suggested changes * removing print statements * setting same version across all OT [ackages and other reccommended changes * suggested changes * fixing packages issue in nox and updating documentation * fixing module install issue * restructuring design for testing adding first layer of tests (some still failing) * adding reamining client tests and all job tests * fixing linting issues * fixing trace not defined issue * fixing lint issues * fixing documentation issues and python2 testing issue * linting and fixing coverage issues * adding suggested changes * linting * adding Shawn's suggested changes * fixing _default_span_attribute_bug * reverting uneccesxsary changes * adding more tests for all job_ref parameters * removing dependecny, ordering imports and other changes * addressing Shawn concerns * adding test and suggested changes * adding opentelemetry to setup.py and other suggested changes * adding reasoning for not adding to [all] * linting * adding nested functions * adding test * adding Tim suggested changes * adding full tests * linting and fixing missing test Co-authored-by: Tim Swast --- .../google/cloud/bigquery/client.py | 63 ++++++++- .../tests/unit/test_client.py | 122 +++++++++++++++--- 2 files changed, 162 insertions(+), 23 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py index fbbfda05108b..e7f8c5c66e7b 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py @@ -289,9 +289,17 @@ def list_projects( Iterator of :class:`~google.cloud.bigquery.client.Project` accessible to the current client. """ + span_attributes = {"path": "/projects"} + + def api_request(*args, **kwargs): + with create_span( + name="BigQuery.listProjects", attributes=span_attributes, client=self + ): + return self._call_api(retry, *args, timeout=timeout, **kwargs) + return page_iterator.HTTPIterator( client=self, - api_request=functools.partial(self._call_api, retry, timeout=timeout), + api_request=api_request, path="/projects", item_to_value=_item_to_project, items_key="projects", @@ -353,9 +361,18 @@ def list_datasets( # and converting it into a string here. extra_params["filter"] = filter path = "/projects/%s/datasets" % (project,) + + span_attributes = {"path": path} + + def api_request(*args, **kwargs): + with create_span( + name="BigQuery.listDatasets", attributes=span_attributes, client=self + ): + return self._call_api(retry, *args, timeout=timeout, **kwargs) + return page_iterator.HTTPIterator( client=self, - api_request=functools.partial(self._call_api, retry, timeout=timeout), + api_request=api_request, path=path, item_to_value=_item_to_dataset, items_key="datasets", @@ -1067,9 +1084,17 @@ def list_models( raise TypeError("dataset must be a Dataset, DatasetReference, or string") path = "%s/models" % dataset.path + span_attributes = {"path": path} + + def api_request(*args, **kwargs): + with create_span( + name="BigQuery.listModels", attributes=span_attributes, client=self + ): + return self._call_api(retry, *args, timeout=timeout, **kwargs) + result = page_iterator.HTTPIterator( client=self, - api_request=functools.partial(self._call_api, retry, timeout=timeout), + api_request=api_request, path=path, item_to_value=_item_to_model, items_key="models", @@ -1132,9 +1157,18 @@ def list_routines( raise TypeError("dataset must be a Dataset, DatasetReference, or string") path = "{}/routines".format(dataset.path) + + span_attributes = {"path": path} + + def api_request(*args, **kwargs): + with create_span( + name="BigQuery.listRoutines", attributes=span_attributes, client=self + ): + return self._call_api(retry, *args, timeout=timeout, **kwargs) + result = page_iterator.HTTPIterator( client=self, - api_request=functools.partial(self._call_api, retry, timeout=timeout), + api_request=api_request, path=path, item_to_value=_item_to_routine, items_key="routines", @@ -1197,9 +1231,17 @@ def list_tables( raise TypeError("dataset must be a Dataset, DatasetReference, or string") path = "%s/tables" % dataset.path + span_attributes = {"path": path} + + def api_request(*args, **kwargs): + with create_span( + name="BigQuery.listTables", attributes=span_attributes, client=self + ): + return self._call_api(retry, *args, timeout=timeout, **kwargs) + result = page_iterator.HTTPIterator( client=self, - api_request=functools.partial(self._call_api, retry, timeout=timeout), + api_request=api_request, path=path, item_to_value=_item_to_table, items_key="tables", @@ -1765,9 +1807,18 @@ def list_jobs( project = self.project path = "/projects/%s/jobs" % (project,) + + span_attributes = {"path": path} + + def api_request(*args, **kwargs): + with create_span( + name="BigQuery.listJobs", attributes=span_attributes, client=self + ): + return self._call_api(retry, *args, timeout=timeout, **kwargs) + return page_iterator.HTTPIterator( client=self, - api_request=functools.partial(self._call_api, retry, timeout=timeout), + api_request=api_request, path=path, item_to_value=_item_to_job, items_key="jobs", diff --git a/packages/google-cloud-bigquery/tests/unit/test_client.py b/packages/google-cloud-bigquery/tests/unit/test_client.py index 271640dd5592..01bb1f2e1719 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_client.py +++ b/packages/google-cloud-bigquery/tests/unit/test_client.py @@ -425,9 +425,14 @@ def test_list_projects_defaults(self): creds = _make_credentials() client = self._make_one(PROJECT_1, creds) conn = client._connection = make_connection(DATA) - iterator = client.list_projects() - page = six.next(iterator.pages) + + with mock.patch( + "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" + ) as final_attributes: + page = six.next(iterator.pages) + + final_attributes.assert_called_once_with({"path": "/projects"}, client, None) projects = list(page) token = iterator.next_page_token @@ -455,7 +460,13 @@ def test_list_projects_w_timeout(self): conn = client._connection = make_connection(DATA) iterator = client.list_projects(timeout=7.5) - six.next(iterator.pages) + + with mock.patch( + "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" + ) as final_attributes: + six.next(iterator.pages) + + final_attributes.assert_called_once_with({"path": "/projects"}, client, None) conn.api_request.assert_called_once_with( method="GET", path="/projects", query_params={}, timeout=7.5 @@ -469,7 +480,13 @@ def test_list_projects_explicit_response_missing_projects_key(self): conn = client._connection = make_connection(DATA) iterator = client.list_projects(max_results=3, page_token=TOKEN) - page = six.next(iterator.pages) + + with mock.patch( + "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" + ) as final_attributes: + page = six.next(iterator.pages) + + final_attributes.assert_called_once_with({"path": "/projects"}, client, None) projects = list(page) token = iterator.next_page_token @@ -518,7 +535,12 @@ def test_list_datasets_defaults(self): conn = client._connection = make_connection(DATA) iterator = client.list_datasets() - page = six.next(iterator.pages) + with mock.patch( + "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" + ) as final_attributes: + page = six.next(iterator.pages) + + final_attributes.assert_called_once_with({"path": "/%s" % PATH}, client, None) datasets = list(page) token = iterator.next_page_token @@ -538,7 +560,14 @@ def test_list_datasets_w_project_and_timeout(self): client = self._make_one(self.PROJECT, creds) conn = client._connection = make_connection({}) - list(client.list_datasets(project="other-project", timeout=7.5)) + with mock.patch( + "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" + ) as final_attributes: + list(client.list_datasets(project="other-project", timeout=7.5)) + + final_attributes.assert_called_once_with( + {"path": "/projects/other-project/datasets"}, client, None + ) conn.api_request.assert_called_once_with( method="GET", @@ -559,7 +588,12 @@ def test_list_datasets_explicit_response_missing_datasets_key(self): iterator = client.list_datasets( include_all=True, filter=FILTER, max_results=3, page_token=TOKEN ) - page = six.next(iterator.pages) + with mock.patch( + "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" + ) as final_attributes: + page = six.next(iterator.pages) + + final_attributes.assert_called_once_with({"path": "/%s" % PATH}, client, None) datasets = list(page) token = iterator.next_page_token @@ -2838,7 +2872,12 @@ def test_list_tables_empty_w_timeout(self): dataset = DatasetReference(self.PROJECT, self.DS_ID) iterator = client.list_tables(dataset, timeout=7.5) self.assertIs(iterator.dataset, dataset) - page = six.next(iterator.pages) + with mock.patch( + "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" + ) as final_attributes: + page = six.next(iterator.pages) + + final_attributes.assert_called_once_with({"path": path}, client, None) tables = list(page) token = iterator.next_page_token @@ -2856,7 +2895,12 @@ def test_list_models_empty_w_timeout(self): dataset_id = "{}.{}".format(self.PROJECT, self.DS_ID) iterator = client.list_models(dataset_id, timeout=7.5) - page = six.next(iterator.pages) + with mock.patch( + "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" + ) as final_attributes: + page = six.next(iterator.pages) + + final_attributes.assert_called_once_with({"path": path}, client, None) models = list(page) token = iterator.next_page_token @@ -2900,7 +2944,12 @@ def test_list_models_defaults(self): iterator = client.list_models(dataset) self.assertIs(iterator.dataset, dataset) - page = six.next(iterator.pages) + with mock.patch( + "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" + ) as final_attributes: + page = six.next(iterator.pages) + + final_attributes.assert_called_once_with({"path": "/%s" % PATH}, client, None) models = list(page) token = iterator.next_page_token @@ -2926,7 +2975,16 @@ def test_list_routines_empty_w_timeout(self): conn = client._connection = make_connection({}) iterator = client.list_routines("test-routines.test_routines", timeout=7.5) - page = six.next(iterator.pages) + with mock.patch( + "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" + ) as final_attributes: + page = six.next(iterator.pages) + + final_attributes.assert_called_once_with( + {"path": "/projects/test-routines/datasets/test_routines/routines"}, + client, + None, + ) routines = list(page) token = iterator.next_page_token @@ -2975,7 +3033,12 @@ def test_list_routines_defaults(self): iterator = client.list_routines(dataset) self.assertIs(iterator.dataset, dataset) - page = six.next(iterator.pages) + with mock.patch( + "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" + ) as final_attributes: + page = six.next(iterator.pages) + + final_attributes.assert_called_once_with({"path": path}, client, None) routines = list(page) actual_token = iterator.next_page_token @@ -3039,7 +3102,12 @@ def test_list_tables_defaults(self): iterator = client.list_tables(dataset) self.assertIs(iterator.dataset, dataset) - page = six.next(iterator.pages) + with mock.patch( + "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" + ) as final_attributes: + page = six.next(iterator.pages) + + final_attributes.assert_called_once_with({"path": "/%s" % PATH}, client, None) tables = list(page) token = iterator.next_page_token @@ -3098,7 +3166,12 @@ def test_list_tables_explicit(self): page_token=TOKEN, ) self.assertEqual(iterator.dataset, dataset) - page = six.next(iterator.pages) + with mock.patch( + "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" + ) as final_attributes: + page = six.next(iterator.pages) + + final_attributes.assert_called_once_with({"path": "/%s" % PATH}, client, None) tables = list(page) token = iterator.next_page_token @@ -3921,7 +3994,12 @@ def test_list_jobs_defaults(self): conn = client._connection = make_connection(DATA) iterator = client.list_jobs() - page = six.next(iterator.pages) + with mock.patch( + "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" + ) as final_attributes: + page = six.next(iterator.pages) + + final_attributes.assert_called_once_with({"path": "/%s" % PATH}, client, None) jobs = list(page) token = iterator.next_page_token @@ -3966,7 +4044,12 @@ def test_list_jobs_load_job_wo_sourceUris(self): conn = client._connection = make_connection(DATA) iterator = client.list_jobs() - page = six.next(iterator.pages) + with mock.patch( + "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" + ) as final_attributes: + page = six.next(iterator.pages) + + final_attributes.assert_called_once_with({"path": "/%s" % PATH}, client, None) jobs = list(page) token = iterator.next_page_token @@ -3995,7 +4078,12 @@ def test_list_jobs_explicit_missing(self): iterator = client.list_jobs( max_results=1000, page_token=TOKEN, all_users=True, state_filter="done" ) - page = six.next(iterator.pages) + with mock.patch( + "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" + ) as final_attributes: + page = six.next(iterator.pages) + + final_attributes.assert_called_once_with({"path": "/%s" % PATH}, client, None) jobs = list(page) token = iterator.next_page_token From 30c5cdb49658668ad909f8176d95e3eb40c30958 Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Thu, 27 Aug 2020 10:31:07 -0500 Subject: [PATCH 0925/2016] chore: use specific version for black (#247) --- packages/google-cloud-bigquery/noxfile.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/packages/google-cloud-bigquery/noxfile.py b/packages/google-cloud-bigquery/noxfile.py index a27a5939e339..7f2dee34c8b2 100644 --- a/packages/google-cloud-bigquery/noxfile.py +++ b/packages/google-cloud-bigquery/noxfile.py @@ -20,6 +20,7 @@ import nox +BLACK_VERSION = "black==19.10b0" BLACK_PATHS = ("docs", "google", "samples", "tests", "noxfile.py", "setup.py") @@ -147,7 +148,7 @@ def lint(session): serious code quality issues. """ - session.install("black", "flake8") + session.install("flake8", BLACK_VERSION) session.install("-e", ".") session.run("flake8", os.path.join("google", "cloud", "bigquery")) session.run("flake8", "tests") @@ -173,7 +174,7 @@ def blacken(session): That run uses an image that doesn't have 3.6 installed. Before updating this check the state of the `gcp_ubuntu_config` we use for that Kokoro run. """ - session.install("black") + session.install(BLACK_VERSION) session.run("black", *BLACK_PATHS) From bd78f8a948f64919fdc81b93037aa64bd9240039 Mon Sep 17 00:00:00 2001 From: HemangChothani <50404902+HemangChothani@users.noreply.github.com> Date: Mon, 31 Aug 2020 16:42:32 +0530 Subject: [PATCH 0926/2016] chore: bump pyarrow to 1.0.0 (#250) * chore: bump pyarrow to 1.0.0 * chore: add condition for python3 --- packages/google-cloud-bigquery/setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/setup.py b/packages/google-cloud-bigquery/setup.py index f30968364e08..d23f77b1cc37 100644 --- a/packages/google-cloud-bigquery/setup.py +++ b/packages/google-cloud-bigquery/setup.py @@ -43,7 +43,7 @@ # as `google-api-core[grpc]`. We thus need to explicitly specify it here. # See: https://github.com/googleapis/python-bigquery/issues/83 "grpcio >= 1.8.2, < 2.0dev", - "pyarrow>=0.16.0, < 2.0dev", + "pyarrow >= 1.0.0, < 2.0dev; python_version >= '3.5'", ], "pandas": ["pandas>=0.17.1"], # Exclude PyArrow dependency from Windows Python 2.7. From eff1f0bc24496e49f0de59cd7ff6dca06fb6ba4f Mon Sep 17 00:00:00 2001 From: HemangChothani <50404902+HemangChothani@users.noreply.github.com> Date: Thu, 3 Sep 2020 00:32:13 +0530 Subject: [PATCH 0927/2016] tests: fix flaky systest (#232) Avoid sharing dataset ids between separate test cases. --- packages/google-cloud-bigquery/tests/system.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/packages/google-cloud-bigquery/tests/system.py b/packages/google-cloud-bigquery/tests/system.py index be79a6d20780..0048c13e0f6c 100644 --- a/packages/google-cloud-bigquery/tests/system.py +++ b/packages/google-cloud-bigquery/tests/system.py @@ -422,7 +422,7 @@ def test_create_table_w_time_partitioning_w_clustering_fields(self): self.assertEqual(table.clustering_fields, ["user_email", "store_code"]) def test_delete_dataset_with_string(self): - dataset_id = _make_dataset_id("delete_table_true") + dataset_id = _make_dataset_id("delete_table_true_with_string") project = Config.CLIENT.project dataset_ref = bigquery.DatasetReference(project, dataset_id) retry_403(Config.CLIENT.create_dataset)(Dataset(dataset_ref)) @@ -431,7 +431,7 @@ def test_delete_dataset_with_string(self): self.assertFalse(_dataset_exists(dataset_ref)) def test_delete_dataset_delete_contents_true(self): - dataset_id = _make_dataset_id("delete_table_true") + dataset_id = _make_dataset_id("delete_table_true_with_content") project = Config.CLIENT.project dataset_ref = bigquery.DatasetReference(project, dataset_id) dataset = retry_403(Config.CLIENT.create_dataset)(Dataset(dataset_ref)) From b30d33a13c478badb47b0930502fbb8a186c110d Mon Sep 17 00:00:00 2001 From: HemangChothani <50404902+HemangChothani@users.noreply.github.com> Date: Mon, 7 Sep 2020 10:24:32 +0530 Subject: [PATCH 0928/2016] feat: expose require_partition_filter for hive_partition (#257) --- .../google/cloud/bigquery/external_config.py | 15 +++++++++++++++ .../tests/unit/test_external_config.py | 4 ++++ 2 files changed, 19 insertions(+) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/external_config.py b/packages/google-cloud-bigquery/google/cloud/bigquery/external_config.py index d702d9d83302..112dfdba4e5a 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/external_config.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/external_config.py @@ -586,6 +586,21 @@ def source_uri_prefix(self): def source_uri_prefix(self, value): self._properties["sourceUriPrefix"] = value + @property + def require_partition_filter(self): + """Optional[bool]: If set to true, queries over the partitioned table require a + partition filter that can be used for partition elimination to be + specified. + + See + https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#HivePartitioningOptions.FIELDS.mode + """ + return self._properties.get("requirePartitionFilter") + + @require_partition_filter.setter + def require_partition_filter(self, value): + self._properties["requirePartitionFilter"] = value + def to_api_repr(self): """Build an API representation of this object. diff --git a/packages/google-cloud-bigquery/tests/unit/test_external_config.py b/packages/google-cloud-bigquery/tests/unit/test_external_config.py index 6028d069bcbe..4b6ef51189ce 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_external_config.py +++ b/packages/google-cloud-bigquery/tests/unit/test_external_config.py @@ -181,6 +181,7 @@ def test_from_api_repr_hive_partitioning(self): "hivePartitioningOptions": { "sourceUriPrefix": "http://foo/bar", "mode": "STRINGS", + "requirePartitionFilter": True, }, }, ) @@ -194,6 +195,7 @@ def test_from_api_repr_hive_partitioning(self): ) self.assertEqual(ec.hive_partitioning.source_uri_prefix, "http://foo/bar") self.assertEqual(ec.hive_partitioning.mode, "STRINGS") + self.assertEqual(ec.hive_partitioning.require_partition_filter, True) # converting back to API representation should yield the same result got_resource = ec.to_api_repr() @@ -210,6 +212,7 @@ def test_to_api_repr_hive_partitioning(self): hive_partitioning = external_config.HivePartitioningOptions() hive_partitioning.source_uri_prefix = "http://foo/bar" hive_partitioning.mode = "STRINGS" + hive_partitioning.require_partition_filter = False ec = external_config.ExternalConfig("FORMAT_FOO") ec.hive_partitioning = hive_partitioning @@ -221,6 +224,7 @@ def test_to_api_repr_hive_partitioning(self): "hivePartitioningOptions": { "sourceUriPrefix": "http://foo/bar", "mode": "STRINGS", + "requirePartitionFilter": False, }, } self.assertEqual(got_resource, expected_resource) From ddc19c8d06efa22391e818a3ef36835f6e24830e Mon Sep 17 00:00:00 2001 From: Peter Lamut Date: Wed, 9 Sep 2020 22:21:31 +0200 Subject: [PATCH 0929/2016] feat: add custom cell magic parser to handle complex `--params` values (#213) * chore: Move cell magic code into its own directory * Add custom argument parser for cell magic * Add AST node visitor * Use a custom parser for cell magic arguments * Improve cell magic parser test coverage * Generalize valid option values The parser should accept as wide a range of values as possible and let the code that delas with the semantics to decide whether the values are good or not. * Fix recognizing --params option in state 3 The --params option spec must be followed by a non-alphanumeric character, otherwise it's a different option spec (e.g. --paramsX). * Fix typo in comment * Cover missing parser code path with a test * Preserve the cell magic context's import path The context still needs to be importable from the old path * Clarify lexer states * Replace re.scanner with finditer() * Fix typo in docstring * Simplify string literal in a single line Apparently black just places all implicitly concatenated string literals in a single line when short enough without replacing them with a single string literal. * Explain the visitors module. * Pass pos as a positional arg to finditer() This is necessary to retain Python 2 compatibility. * Resolve coverage complaint about a code path The tokens are designed in a way that the scanner *always* returns some match, even if just UNKNOWN or EOL. The "no matches" code path can thus never be taken, but the coverage check can't know that. --- .../google-cloud-bigquery/docs/magics.rst | 2 +- .../google/cloud/bigquery/__init__.py | 2 +- .../google/cloud/bigquery/magics/__init__.py | 20 + .../magics/line_arg_parser/__init__.py | 34 ++ .../magics/line_arg_parser/exceptions.py | 25 + .../bigquery/magics/line_arg_parser/lexer.py | 268 ++++++++++ .../bigquery/magics/line_arg_parser/parser.py | 484 ++++++++++++++++++ .../magics/line_arg_parser/visitors.py | 159 ++++++ .../cloud/bigquery/{ => magics}/magics.py | 70 ++- .../tests/unit/line_arg_parser/__init__.py | 13 + .../tests/unit/line_arg_parser/test_lexer.py | 32 ++ .../tests/unit/line_arg_parser/test_parser.py | 204 ++++++++ .../unit/line_arg_parser/test_visitors.py | 34 ++ .../tests/unit/test_magics.py | 362 +++++++++++-- 14 files changed, 1644 insertions(+), 65 deletions(-) create mode 100644 packages/google-cloud-bigquery/google/cloud/bigquery/magics/__init__.py create mode 100644 packages/google-cloud-bigquery/google/cloud/bigquery/magics/line_arg_parser/__init__.py create mode 100644 packages/google-cloud-bigquery/google/cloud/bigquery/magics/line_arg_parser/exceptions.py create mode 100644 packages/google-cloud-bigquery/google/cloud/bigquery/magics/line_arg_parser/lexer.py create mode 100644 packages/google-cloud-bigquery/google/cloud/bigquery/magics/line_arg_parser/parser.py create mode 100644 packages/google-cloud-bigquery/google/cloud/bigquery/magics/line_arg_parser/visitors.py rename packages/google-cloud-bigquery/google/cloud/bigquery/{ => magics}/magics.py (91%) create mode 100644 packages/google-cloud-bigquery/tests/unit/line_arg_parser/__init__.py create mode 100644 packages/google-cloud-bigquery/tests/unit/line_arg_parser/test_lexer.py create mode 100644 packages/google-cloud-bigquery/tests/unit/line_arg_parser/test_parser.py create mode 100644 packages/google-cloud-bigquery/tests/unit/line_arg_parser/test_visitors.py diff --git a/packages/google-cloud-bigquery/docs/magics.rst b/packages/google-cloud-bigquery/docs/magics.rst index 732c27af94dd..bcaad8fa3683 100644 --- a/packages/google-cloud-bigquery/docs/magics.rst +++ b/packages/google-cloud-bigquery/docs/magics.rst @@ -1,5 +1,5 @@ IPython Magics for BigQuery =========================== -.. automodule:: google.cloud.bigquery.magics +.. automodule:: google.cloud.bigquery.magics.magics :members: diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/__init__.py b/packages/google-cloud-bigquery/google/cloud/bigquery/__init__.py index 63d71694cb5c..89c5a36240db 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/__init__.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/__init__.py @@ -150,7 +150,7 @@ def load_ipython_extension(ipython): """Called by IPython when this module is loaded as an IPython extension.""" - from google.cloud.bigquery.magics import _cell_magic + from google.cloud.bigquery.magics.magics import _cell_magic ipython.register_magic_function( _cell_magic, magic_kind="cell", magic_name="bigquery" diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/magics/__init__.py b/packages/google-cloud-bigquery/google/cloud/bigquery/magics/__init__.py new file mode 100644 index 000000000000..d228a35bb134 --- /dev/null +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/magics/__init__.py @@ -0,0 +1,20 @@ +# Copyright 2020 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from google.cloud.bigquery.magics.magics import context + + +# For backwards compatibility we need to make the context available in the path +# google.cloud.bigquery.magics.context +__all__ = ("context",) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/magics/line_arg_parser/__init__.py b/packages/google-cloud-bigquery/google/cloud/bigquery/magics/line_arg_parser/__init__.py new file mode 100644 index 000000000000..9471446c516b --- /dev/null +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/magics/line_arg_parser/__init__.py @@ -0,0 +1,34 @@ +# Copyright 2020 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from google.cloud.bigquery.magics.line_arg_parser.exceptions import ParseError +from google.cloud.bigquery.magics.line_arg_parser.exceptions import ( + DuplicateQueryParamsError, + QueryParamsParseError, +) +from google.cloud.bigquery.magics.line_arg_parser.lexer import Lexer +from google.cloud.bigquery.magics.line_arg_parser.lexer import TokenType +from google.cloud.bigquery.magics.line_arg_parser.parser import Parser +from google.cloud.bigquery.magics.line_arg_parser.visitors import QueryParamsExtractor + + +__all__ = ( + "DuplicateQueryParamsError", + "Lexer", + "Parser", + "ParseError", + "QueryParamsExtractor", + "QueryParamsParseError", + "TokenType", +) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/magics/line_arg_parser/exceptions.py b/packages/google-cloud-bigquery/google/cloud/bigquery/magics/line_arg_parser/exceptions.py new file mode 100644 index 000000000000..6b2081186cab --- /dev/null +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/magics/line_arg_parser/exceptions.py @@ -0,0 +1,25 @@ +# Copyright 2020 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +class ParseError(Exception): + pass + + +class QueryParamsParseError(ParseError): + """Raised when --params option is syntactically incorrect.""" + + +class DuplicateQueryParamsError(ParseError): + pass diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/magics/line_arg_parser/lexer.py b/packages/google-cloud-bigquery/google/cloud/bigquery/magics/line_arg_parser/lexer.py new file mode 100644 index 000000000000..17e1ffdae0bf --- /dev/null +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/magics/line_arg_parser/lexer.py @@ -0,0 +1,268 @@ +# Copyright 2020 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from collections import namedtuple +from collections import OrderedDict +import itertools +import re + +import enum + + +Token = namedtuple("Token", ("type_", "lexeme", "pos")) +StateTransition = namedtuple("StateTransition", ("new_state", "total_offset")) + +# Pattern matching is done with regexes, and the order in which the token patterns are +# defined is important. +# +# Suppose we had the following token definitions: +# * INT - a token matching integers, +# * FLOAT - a token matching floating point numbers, +# * DOT - a token matching a single literal dot character, i.e. "." +# +# The FLOAT token would have to be defined first, since we would want the input "1.23" +# to be tokenized as a single FLOAT token, and *not* three tokens (INT, DOT, INT). +# +# Sometimes, however, different tokens match too similar patterns, and it is not +# possible to define them in order that would avoid any ambiguity. One such case are +# the OPT_VAL and PY_NUMBER tokens, as both can match an integer literal, say "42". +# +# In order to avoid the dilemmas, the lexer implements a concept of STATES. States are +# used to split token definitions into subgroups, and in each lexer state only a single +# subgroup is used for tokenizing the input. Lexer states can therefore be though of as +# token namespaces. +# +# For example, while parsing the value of the "--params" option, we do not want to +# "recognize" it as a single OPT_VAL token, but instead want to parse it as a Python +# dictionary and verify its syntactial correctness. On the other hand, while parsing +# the value of an option other than "--params", we do not really care about its +# structure, and thus do not want to use any of the "Python tokens" for pattern matching. +# +# Since token definition order is important, an OrderedDict is needed with tightly +# controlled member definitions (i.e. passed as a sequence, and *not* via kwargs). +token_types = OrderedDict( + [ + ( + "state_parse_pos_args", + OrderedDict( + [ + ( + "GOTO_PARSE_NON_PARAMS_OPTIONS", + r"(?P(?=--))", # double dash - starting the options list + ), + ( + "DEST_VAR", + r"(?P[^\d\W]\w*)", # essentially a Python ID + ), + ] + ), + ), + ( + "state_parse_non_params_options", + OrderedDict( + [ + ( + "GOTO_PARSE_PARAMS_OPTION", + r"(?P(?=--params(?:\s|=|--|$)))", # the --params option + ), + ("OPTION_SPEC", r"(?P--\w+)"), + ("OPTION_EQ", r"(?P=)"), + ("OPT_VAL", r"(?P\S+?(?=\s|--|$))"), + ] + ), + ), + ( + "state_parse_params_option", + OrderedDict( + [ + ( + "PY_STRING", + r"(?P(?:{})|(?:{}))".format( + r"'(?:[^'\\]|\.)*'", + r'"(?:[^"\\]|\.)*"', # single and double quoted strings + ), + ), + ("PARAMS_OPT_SPEC", r"(?P--params(?=\s|=|--|$))"), + ("PARAMS_OPT_EQ", r"(?P=)"), + ( + "GOTO_PARSE_NON_PARAMS_OPTIONS", + r"(?P(?=--\w+))", # found another option spec + ), + ("PY_BOOL", r"(?PTrue|False)"), + ("DOLLAR_PY_ID", r"(?P\$[^\d\W]\w*)"), + ( + "PY_NUMBER", + r"(?P-?[1-9]\d*(?:\.\d+)?(:?[e|E][+-]?\d+)?)", + ), + ("SQUOTE", r"(?P')"), + ("DQUOTE", r'(?P")'), + ("COLON", r"(?P:)"), + ("COMMA", r"(?P,)"), + ("LCURL", r"(?P\{)"), + ("RCURL", r"(?P})"), + ("LSQUARE", r"(?P\[)"), + ("RSQUARE", r"(?P])"), + ("LPAREN", r"(?P\()"), + ("RPAREN", r"(?P\))"), + ] + ), + ), + ( + "common", + OrderedDict( + [ + ("WS", r"(?P\s+)"), + ("EOL", r"(?P$)"), + ( + # anything not a whitespace or matched by something else + "UNKNOWN", + r"(?P\S+)", + ), + ] + ), + ), + ] +) + + +# The _generate_next_value_() enum hook is only available in Python 3.6+, thus we +# need to do some acrobatics to implement an "auto str enum" base class. Implementation +# based on the recipe provided by the very author of the Enum library: +# https://stackoverflow.com/a/32313954/5040035 +class StrEnumMeta(enum.EnumMeta): + @classmethod + def __prepare__(metacls, name, bases, **kwargs): + # Having deterministic enum members definition order is nice. + return OrderedDict() + + def __new__(metacls, name, bases, oldclassdict): + # Scan through the declared enum members and convert any value that is a plain + # empty tuple into a `str` of the name instead. + newclassdict = enum._EnumDict() + for key, val in oldclassdict.items(): + if val == (): + val = key + newclassdict[key] = val + return super(StrEnumMeta, metacls).__new__(metacls, name, bases, newclassdict) + + +# The @six.add_metaclass decorator does not work, Enum complains about _sunder_ names, +# and we cannot use class syntax directly, because the Python 3 version would cause +# a syntax error under Python 2. +AutoStrEnum = StrEnumMeta( + "AutoStrEnum", + (str, enum.Enum), + {"__doc__": "Base enum class for for name=value str enums."}, +) + +TokenType = AutoStrEnum( + "TokenType", + [ + (name, name) + for name in itertools.chain.from_iterable(token_types.values()) + if not name.startswith("GOTO_") + ], +) + + +class LexerState(AutoStrEnum): + PARSE_POS_ARGS = () # parsing positional arguments + PARSE_NON_PARAMS_OPTIONS = () # parsing options other than "--params" + PARSE_PARAMS_OPTION = () # parsing the "--params" option + STATE_END = () + + +class Lexer(object): + """Lexical analyzer for tokenizing the cell magic input line.""" + + _GRAND_PATTERNS = { + LexerState.PARSE_POS_ARGS: re.compile( + "|".join( + itertools.chain( + token_types["state_parse_pos_args"].values(), + token_types["common"].values(), + ) + ) + ), + LexerState.PARSE_NON_PARAMS_OPTIONS: re.compile( + "|".join( + itertools.chain( + token_types["state_parse_non_params_options"].values(), + token_types["common"].values(), + ) + ) + ), + LexerState.PARSE_PARAMS_OPTION: re.compile( + "|".join( + itertools.chain( + token_types["state_parse_params_option"].values(), + token_types["common"].values(), + ) + ) + ), + } + + def __init__(self, input_text): + self._text = input_text + + def __iter__(self): + # Since re.scanner does not seem to support manipulating inner scanner states, + # we need to implement lexer state transitions manually using special + # non-capturing lookahead token patterns to signal when a state transition + # should be made. + # Since we don't have "nested" states, we don't really need a stack and + # this simple mechanism is sufficient. + state = LexerState.PARSE_POS_ARGS + offset = 0 # the number of characters processed so far + + while state != LexerState.STATE_END: + token_stream = self._find_state_tokens(state, offset) + + for maybe_token in token_stream: # pragma: NO COVER + if isinstance(maybe_token, StateTransition): + state = maybe_token.new_state + offset = maybe_token.total_offset + break + + if maybe_token.type_ != TokenType.WS: + yield maybe_token + + if maybe_token.type_ == TokenType.EOL: + state = LexerState.STATE_END + break + + def _find_state_tokens(self, state, current_offset): + """Scan the input for current state's tokens starting at ``current_offset``. + + Args: + state (LexerState): The current lexer state. + current_offset (int): The offset in the input text, i.e. the number + of characters already scanned so far. + + Yields: + The next ``Token`` or ``StateTransition`` instance. + """ + pattern = self._GRAND_PATTERNS[state] + scanner = pattern.finditer(self._text, current_offset) + + for match in scanner: # pragma: NO COVER + token_type = match.lastgroup + + if token_type.startswith("GOTO_"): + yield StateTransition( + new_state=getattr(LexerState, token_type[5:]), # w/o "GOTO_" prefix + total_offset=match.start(), + ) + + yield Token(token_type, match.group(), match.start()) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/magics/line_arg_parser/parser.py b/packages/google-cloud-bigquery/google/cloud/bigquery/magics/line_arg_parser/parser.py new file mode 100644 index 000000000000..b9da20cd7862 --- /dev/null +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/magics/line_arg_parser/parser.py @@ -0,0 +1,484 @@ +# Copyright 2020 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from google.cloud.bigquery.magics.line_arg_parser import DuplicateQueryParamsError +from google.cloud.bigquery.magics.line_arg_parser import ParseError +from google.cloud.bigquery.magics.line_arg_parser import QueryParamsParseError +from google.cloud.bigquery.magics.line_arg_parser import TokenType + + +class ParseNode(object): + """A base class for nodes in the input parsed to an abstract syntax tree.""" + + +class InputLine(ParseNode): + def __init__(self, destination_var, option_list): + self.destination_var = destination_var + self.option_list = option_list + + +class DestinationVar(ParseNode): + def __init__(self, token): + # token type is DEST_VAR + self.token = token + self.name = token.lexeme if token is not None else None + + +class CmdOptionList(ParseNode): + def __init__(self, option_nodes): + self.options = [node for node in option_nodes] # shallow copy + + +class CmdOption(ParseNode): + def __init__(self, name, value): + self.name = name # string + self.value = value # CmdOptionValue node + + +class ParamsOption(CmdOption): + def __init__(self, value): + super(ParamsOption, self).__init__("params", value) + + +class CmdOptionValue(ParseNode): + def __init__(self, token): + # token type is OPT_VAL + self.token = token + self.value = token.lexeme + + +class PyVarExpansion(ParseNode): + def __init__(self, token): + self.token = token + self.raw_value = token.lexeme + + +class PyDict(ParseNode): + def __init__(self, dict_items): + self.items = [item for item in dict_items] # shallow copy + + +class PyDictItem(ParseNode): + def __init__(self, key, value): + self.key = key + self.value = value + + +class PyDictKey(ParseNode): + def __init__(self, token): + self.token = token + self.key_value = token.lexeme + + +class PyScalarValue(ParseNode): + def __init__(self, token, raw_value): + self.token = token + self.raw_value = raw_value + + +class PyTuple(ParseNode): + def __init__(self, tuple_items): + self.items = [item for item in tuple_items] # shallow copy + + +class PyList(ParseNode): + def __init__(self, list_items): + self.items = [item for item in list_items] # shallow copy + + +class Parser(object): + """Parser for the tokenized cell magic input line. + + The parser recognizes a simplified subset of Python grammar, specifically + a dictionary representation in typical use cases when the "--params" option + is used with the %%bigquery cell magic. + + The grammar (terminal symbols are CAPITALIZED): + + input_line : destination_var option_list + destination_var : DEST_VAR | EMPTY + option_list : (OPTION_SPEC [OPTION_EQ] option_value)* + (params_option | EMPTY) + (OPTION_SPEC [OPTION_EQ] option_value)* + + option_value : OPT_VAL | EMPTY + + # DOLLAR_PY_ID can occur if a variable passed to --params does not exist + # and is thus not expanded to a dict. + params_option : PARAMS_OPT_SPEC [PARAMS_OPT_EQ] \ + (DOLLAR_PY_ID | PY_STRING | py_dict) + + py_dict : LCURL dict_items RCURL + dict_items : dict_item | (dict_item COMMA dict_items) + dict_item : (dict_key COLON py_value) | EMPTY + + # dict items are actually @parameter names in the cell body (i.e. the query), + # thus restricting them to strings. + dict_key : PY_STRING + + py_value : PY_BOOL + | PY_NUMBER + | PY_STRING + | py_tuple + | py_list + | py_dict + + py_tuple : LPAREN collection_items RPAREN + py_list : LSQUARE collection_items RSQUARE + collection_items : collection_item | (collection_item COMMA collection_items) + collection_item : py_value | EMPTY + + Args: + lexer (line_arg_parser.lexer.Lexer): + An iterable producing a tokenized cell magic argument line. + """ + + def __init__(self, lexer): + self._lexer = lexer + self._tokens_iter = iter(self._lexer) + self.get_next_token() + + def get_next_token(self): + """Obtain the next token from the token stream and store it as current.""" + token = next(self._tokens_iter) + self._current_token = token + + def consume(self, expected_type, exc_type=ParseError): + """Move to the next token in token stream if it matches the expected type. + + Args: + expected_type (lexer.TokenType): The expected token type to be consumed. + exc_type (Optional[ParseError]): The type of the exception to raise. Should be + the ``ParseError`` class or one of its subclasses. Defaults to + ``ParseError``. + + Raises: + ParseError: If the current token does not match the expected type. + """ + if self._current_token.type_ == expected_type: + if expected_type != TokenType.EOL: + self.get_next_token() + else: + if self._current_token.type_ == TokenType.EOL: + msg = "Unexpected end of input, expected {}.".format(expected_type) + else: + msg = "Expected token type {}, but found {} at position {}.".format( + expected_type, self._current_token.lexeme, self._current_token.pos + ) + self.error(message=msg, exc_type=exc_type) + + def error(self, message="Syntax error.", exc_type=ParseError): + """Raise an error with the given message. + + Args: + expected_type (lexer.TokenType): The expected token type to be consumed. + exc_type (Optional[ParseError]): The type of the exception to raise. Should be + the ``ParseError`` class or one of its subclasses. Defaults to + ``ParseError``. + + Raises: + ParseError: If the current token does not match the expected type. + """ + raise exc_type(message) + + def input_line(self): + """The top level method for parsing the cell magic arguments line. + + Implements the following grammar production rule: + + input_line : destination_var option_list + """ + dest_var = self.destination_var() + options = self.option_list() + + token = self._current_token + + if token.type_ != TokenType.EOL: + msg = "Unexpected input at position {}: {}".format(token.pos, token.lexeme) + self.error(msg) + + return InputLine(dest_var, options) + + def destination_var(self): + """Implementation of the ``destination_var`` grammar production rule. + + Production: + + destination_var : DEST_VAR | EMPTY + """ + token = self._current_token + + if token.type_ == TokenType.DEST_VAR: + self.consume(TokenType.DEST_VAR) + result = DestinationVar(token) + elif token.type_ == TokenType.UNKNOWN: + msg = "Unknown input at position {}: {}".format(token.pos, token.lexeme) + self.error(msg) + else: + result = DestinationVar(None) + + return result + + def option_list(self): + """Implementation of the ``option_list`` grammar production rule. + + Production: + + option_list : (OPTION_SPEC [OPTION_EQ] option_value)* + (params_option | EMPTY) + (OPTION_SPEC [OPTION_EQ] option_value)* + """ + all_options = [] + + def parse_nonparams_options(): + while self._current_token.type_ == TokenType.OPTION_SPEC: + token = self._current_token + self.consume(TokenType.OPTION_SPEC) + + opt_name = token.lexeme[2:] # cut off the "--" prefix + + # skip the optional "=" character + if self._current_token.type_ == TokenType.OPTION_EQ: + self.consume(TokenType.OPTION_EQ) + + opt_value = self.option_value() + option = CmdOption(opt_name, opt_value) + all_options.append(option) + + parse_nonparams_options() + + token = self._current_token + + if token.type_ == TokenType.PARAMS_OPT_SPEC: + option = self.params_option() + all_options.append(option) + + parse_nonparams_options() + + if self._current_token.type_ == TokenType.PARAMS_OPT_SPEC: + self.error( + message="Duplicate --params option", exc_type=DuplicateQueryParamsError + ) + + return CmdOptionList(all_options) + + def option_value(self): + """Implementation of the ``option_value`` grammar production rule. + + Production: + + option_value : OPT_VAL | EMPTY + """ + token = self._current_token + + if token.type_ == TokenType.OPT_VAL: + self.consume(TokenType.OPT_VAL) + result = CmdOptionValue(token) + elif token.type_ == TokenType.UNKNOWN: + msg = "Unknown input at position {}: {}".format(token.pos, token.lexeme) + self.error(msg) + else: + result = None + + return result + + def params_option(self): + """Implementation of the ``params_option`` grammar production rule. + + Production: + + params_option : PARAMS_OPT_SPEC [PARAMS_OPT_EQ] \ + (DOLLAR_PY_ID | PY_STRING | py_dict) + """ + self.consume(TokenType.PARAMS_OPT_SPEC) + + # skip the optional "=" character + if self._current_token.type_ == TokenType.PARAMS_OPT_EQ: + self.consume(TokenType.PARAMS_OPT_EQ) + + if self._current_token.type_ == TokenType.DOLLAR_PY_ID: + token = self._current_token + self.consume(TokenType.DOLLAR_PY_ID) + opt_value = PyVarExpansion(token) + elif self._current_token.type_ == TokenType.PY_STRING: + token = self._current_token + self.consume(TokenType.PY_STRING, exc_type=QueryParamsParseError) + opt_value = PyScalarValue(token, token.lexeme) + else: + opt_value = self.py_dict() + + result = ParamsOption(opt_value) + + return result + + def py_dict(self): + """Implementation of the ``py_dict`` grammar production rule. + + Production: + + py_dict : LCURL dict_items RCURL + """ + self.consume(TokenType.LCURL, exc_type=QueryParamsParseError) + dict_items = self.dict_items() + self.consume(TokenType.RCURL, exc_type=QueryParamsParseError) + + return PyDict(dict_items) + + def dict_items(self): + """Implementation of the ``dict_items`` grammar production rule. + + Production: + + dict_items : dict_item | (dict_item COMMA dict_items) + """ + result = [] + + item = self.dict_item() + if item is not None: + result.append(item) + + while self._current_token.type_ == TokenType.COMMA: + self.consume(TokenType.COMMA, exc_type=QueryParamsParseError) + item = self.dict_item() + if item is not None: + result.append(item) + + return result + + def dict_item(self): + """Implementation of the ``dict_item`` grammar production rule. + + Production: + + dict_item : (dict_key COLON py_value) | EMPTY + """ + token = self._current_token + + if token.type_ == TokenType.PY_STRING: + key = self.dict_key() + self.consume(TokenType.COLON, exc_type=QueryParamsParseError) + value = self.py_value() + result = PyDictItem(key, value) + elif token.type_ == TokenType.UNKNOWN: + msg = "Unknown input at position {}: {}".format(token.pos, token.lexeme) + self.error(msg, exc_type=QueryParamsParseError) + else: + result = None + + return result + + def dict_key(self): + """Implementation of the ``dict_key`` grammar production rule. + + Production: + + dict_key : PY_STRING + """ + token = self._current_token + self.consume(TokenType.PY_STRING, exc_type=QueryParamsParseError) + return PyDictKey(token) + + def py_value(self): + """Implementation of the ``py_value`` grammar production rule. + + Production: + + py_value : PY_BOOL | PY_NUMBER | PY_STRING | py_tuple | py_list | py_dict + """ + token = self._current_token + + if token.type_ == TokenType.PY_BOOL: + self.consume(TokenType.PY_BOOL, exc_type=QueryParamsParseError) + return PyScalarValue(token, token.lexeme) + elif token.type_ == TokenType.PY_NUMBER: + self.consume(TokenType.PY_NUMBER, exc_type=QueryParamsParseError) + return PyScalarValue(token, token.lexeme) + elif token.type_ == TokenType.PY_STRING: + self.consume(TokenType.PY_STRING, exc_type=QueryParamsParseError) + return PyScalarValue(token, token.lexeme) + elif token.type_ == TokenType.LPAREN: + tuple_node = self.py_tuple() + return tuple_node + elif token.type_ == TokenType.LSQUARE: + list_node = self.py_list() + return list_node + elif token.type_ == TokenType.LCURL: + dict_node = self.py_dict() + return dict_node + else: + msg = "Unexpected token type {} at position {}.".format( + token.type_, token.pos + ) + self.error(msg, exc_type=QueryParamsParseError) + + def py_tuple(self): + """Implementation of the ``py_tuple`` grammar production rule. + + Production: + + py_tuple : LPAREN collection_items RPAREN + """ + self.consume(TokenType.LPAREN, exc_type=QueryParamsParseError) + items = self.collection_items() + self.consume(TokenType.RPAREN, exc_type=QueryParamsParseError) + + return PyTuple(items) + + def py_list(self): + """Implementation of the ``py_list`` grammar production rule. + + Production: + + py_list : LSQUARE collection_items RSQUARE + """ + self.consume(TokenType.LSQUARE, exc_type=QueryParamsParseError) + items = self.collection_items() + self.consume(TokenType.RSQUARE, exc_type=QueryParamsParseError) + + return PyList(items) + + def collection_items(self): + """Implementation of the ``collection_items`` grammar production rule. + + Production: + + collection_items : collection_item | (collection_item COMMA collection_items) + """ + result = [] + + item = self.collection_item() + if item is not None: + result.append(item) + + while self._current_token.type_ == TokenType.COMMA: + self.consume(TokenType.COMMA, exc_type=QueryParamsParseError) + item = self.collection_item() + if item is not None: + result.append(item) + + return result + + def collection_item(self): + """Implementation of the ``collection_item`` grammar production rule. + + Production: + + collection_item : py_value | EMPTY + """ + if self._current_token.type_ not in {TokenType.RPAREN, TokenType.RSQUARE}: + result = self.py_value() + else: + result = None # end of list/tuple items + + return result diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/magics/line_arg_parser/visitors.py b/packages/google-cloud-bigquery/google/cloud/bigquery/magics/line_arg_parser/visitors.py new file mode 100644 index 000000000000..cbe236c06d91 --- /dev/null +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/magics/line_arg_parser/visitors.py @@ -0,0 +1,159 @@ +# Copyright 2020 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""This module contains classes that traverse AST and convert it to something else. + +If the parser successfully accepts a valid input (the bigquery cell magic arguments), +the result is an Abstract Syntax Tree (AST) that represents the input as a tree +with notes containing various useful metadata. + +Node visitors can process such tree and convert it to something else that can +be used for further processing, for example: + + * An optimized version of the tree with redundancy removed/simplified (not used here). + * The same tree, but with semantic errors checked, because an otherwise syntactically + valid input might still contain errors (not used here, semantic errors are detected + elsewhere). + * A form that can be directly handed to the code that operates on the input. The + ``QueryParamsExtractor`` class, for instance, splits the input arguments into + the "--params <...>" part and everything else. + The "everything else" part can be then parsed by the default Jupyter argument parser, + while the --params option is processed separately by the Python evaluator. + +More info on the visitor design pattern: +https://en.wikipedia.org/wiki/Visitor_pattern + +""" + +from __future__ import print_function + + +class NodeVisitor(object): + """Base visitor class implementing the dispatch machinery.""" + + def visit(self, node): + method_name = "visit_{}".format(type(node).__name__) + visitor_method = getattr(self, method_name, self.method_missing) + return visitor_method(node) + + def method_missing(self, node): + raise Exception("No visit_{} method".format(type(node).__name__)) + + +class QueryParamsExtractor(NodeVisitor): + """A visitor that extracts the "--params <...>" part from input line arguments.""" + + def visit_InputLine(self, node): + params_dict_parts = [] + other_parts = [] + + dest_var_parts = self.visit(node.destination_var) + params, other_options = self.visit(node.option_list) + + if dest_var_parts: + other_parts.extend(dest_var_parts) + + if dest_var_parts and other_options: + other_parts.append(" ") + other_parts.extend(other_options) + + params_dict_parts.extend(params) + + return "".join(params_dict_parts), "".join(other_parts) + + def visit_DestinationVar(self, node): + return [node.name] if node.name is not None else [] + + def visit_CmdOptionList(self, node): + params_opt_parts = [] + other_parts = [] + + for i, opt in enumerate(node.options): + option_parts = self.visit(opt) + list_to_extend = params_opt_parts if opt.name == "params" else other_parts + + if list_to_extend: + list_to_extend.append(" ") + list_to_extend.extend(option_parts) + + return params_opt_parts, other_parts + + def visit_CmdOption(self, node): + result = ["--{}".format(node.name)] + + if node.value is not None: + result.append(" ") + value_parts = self.visit(node.value) + result.extend(value_parts) + + return result + + def visit_CmdOptionValue(self, node): + return [node.value] + + def visit_ParamsOption(self, node): + value_parts = self.visit(node.value) + return value_parts + + def visit_PyVarExpansion(self, node): + return [node.raw_value] + + def visit_PyDict(self, node): + result = ["{"] + + for i, item in enumerate(node.items): + if i > 0: + result.append(", ") + item_parts = self.visit(item) + result.extend(item_parts) + + result.append("}") + return result + + def visit_PyDictItem(self, node): + result = self.visit(node.key) # key parts + result.append(": ") + value_parts = self.visit(node.value) + result.extend(value_parts) + return result + + def visit_PyDictKey(self, node): + return [node.key_value] + + def visit_PyScalarValue(self, node): + return [node.raw_value] + + def visit_PyTuple(self, node): + result = ["("] + + for i, item in enumerate(node.items): + if i > 0: + result.append(", ") + item_parts = self.visit(item) + result.extend(item_parts) + + result.append(")") + return result + + def visit_PyList(self, node): + result = ["["] + + for i, item in enumerate(node.items): + if i > 0: + result.append(", ") + item_parts = self.visit(item) + result.extend(item_parts) + + result.append("]") + return result diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/magics.py b/packages/google-cloud-bigquery/google/cloud/bigquery/magics/magics.py similarity index 91% rename from packages/google-cloud-bigquery/google/cloud/bigquery/magics.py rename to packages/google-cloud-bigquery/google/cloud/bigquery/magics/magics.py index 7128e32bfea1..4842c76803a2 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/magics.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/magics/magics.py @@ -65,13 +65,6 @@ the variable name (ex. ``$my_dict_var``). See ``In[6]`` and ``In[7]`` in the Examples section below. - .. note:: - - Due to the way IPython argument parser works, negative numbers in - dictionaries are incorrectly "recognized" as additional arguments, - resulting in an error ("unrecognized arguments"). To get around this, - pass such dictionary as a JSON string variable. - * ```` (required, cell argument): SQL query to run. If the query does not contain any whitespace (aside from leading and trailing whitespace), it is assumed to represent a @@ -159,13 +152,15 @@ except ImportError: # pragma: NO COVER raise ImportError("This module can only be loaded in IPython.") +import six + from google.api_core import client_info from google.api_core.exceptions import NotFound import google.auth from google.cloud import bigquery import google.cloud.bigquery.dataset from google.cloud.bigquery.dbapi import _helpers -import six +from google.cloud.bigquery.magics import line_arg_parser as lap IPYTHON_USER_AGENT = "ipython-{}".format(IPython.__version__) @@ -473,7 +468,27 @@ def _cell_magic(line, query): Returns: pandas.DataFrame: the query results. """ - args = magic_arguments.parse_argstring(_cell_magic, line) + # The built-in parser does not recognize Python structures such as dicts, thus + # we extract the "--params" option and inteprpret it separately. + try: + params_option_value, rest_of_args = _split_args_line(line) + except lap.exceptions.QueryParamsParseError as exc: + rebranded_error = SyntaxError( + "--params is not a correctly formatted JSON string or a JSON " + "serializable dictionary" + ) + six.raise_from(rebranded_error, exc) + except lap.exceptions.DuplicateQueryParamsError as exc: + rebranded_error = ValueError("Duplicate --params option.") + six.raise_from(rebranded_error, exc) + except lap.exceptions.ParseError as exc: + rebranded_error = ValueError( + "Unrecognized input, are option values correct? " + "Error details: {}".format(exc.args[0]) + ) + six.raise_from(rebranded_error, exc) + + args = magic_arguments.parse_argstring(_cell_magic, rest_of_args) if args.use_bqstorage_api is not None: warnings.warn( @@ -484,16 +499,16 @@ def _cell_magic(line, query): use_bqstorage_api = not args.use_rest_api params = [] - if args.params is not None: - try: - params = _helpers.to_query_parameters( - ast.literal_eval("".join(args.params)) - ) - except Exception: - raise SyntaxError( - "--params is not a correctly formatted JSON string or a JSON " - "serializable dictionary" + if params_option_value: + # A non-existing params variable is not expanded and ends up in the input + # in its raw form, e.g. "$query_params". + if params_option_value.startswith("$"): + msg = 'Parameter expansion failed, undefined variable "{}".'.format( + params_option_value[1:] ) + raise NameError(msg) + + params = _helpers.to_query_parameters(ast.literal_eval(params_option_value)) project = args.project or context.project client = bigquery.Client( @@ -598,6 +613,25 @@ def _cell_magic(line, query): close_transports() +def _split_args_line(line): + """Split out the --params option value from the input line arguments. + + Args: + line (str): The line arguments passed to the cell magic. + + Returns: + Tuple[str, str] + """ + lexer = lap.Lexer(line) + scanner = lap.Parser(lexer) + tree = scanner.input_line() + + extractor = lap.QueryParamsExtractor() + params_option_value, rest_of_args = extractor.visit(tree) + + return params_option_value, rest_of_args + + def _make_bqstorage_client(use_bqstorage_api, credentials): if not use_bqstorage_api: return None diff --git a/packages/google-cloud-bigquery/tests/unit/line_arg_parser/__init__.py b/packages/google-cloud-bigquery/tests/unit/line_arg_parser/__init__.py new file mode 100644 index 000000000000..c6334245aea5 --- /dev/null +++ b/packages/google-cloud-bigquery/tests/unit/line_arg_parser/__init__.py @@ -0,0 +1,13 @@ +# Copyright 2020 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/packages/google-cloud-bigquery/tests/unit/line_arg_parser/test_lexer.py b/packages/google-cloud-bigquery/tests/unit/line_arg_parser/test_lexer.py new file mode 100644 index 000000000000..22fa96f228eb --- /dev/null +++ b/packages/google-cloud-bigquery/tests/unit/line_arg_parser/test_lexer.py @@ -0,0 +1,32 @@ +# Copyright 2020 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import pytest + + +@pytest.fixture(scope="session") +def lexer_class(): + from google.cloud.bigquery.magics.line_arg_parser.lexer import Lexer + + return Lexer + + +def test_empy_input(lexer_class): + from google.cloud.bigquery.magics.line_arg_parser import TokenType + from google.cloud.bigquery.magics.line_arg_parser.lexer import Token + + lexer = lexer_class("") + tokens = list(lexer) + + assert tokens == [Token(TokenType.EOL, lexeme="", pos=0)] diff --git a/packages/google-cloud-bigquery/tests/unit/line_arg_parser/test_parser.py b/packages/google-cloud-bigquery/tests/unit/line_arg_parser/test_parser.py new file mode 100644 index 000000000000..3edff88e9675 --- /dev/null +++ b/packages/google-cloud-bigquery/tests/unit/line_arg_parser/test_parser.py @@ -0,0 +1,204 @@ +# Copyright 2020 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import pytest + + +@pytest.fixture(scope="session") +def parser_class(): + from google.cloud.bigquery.magics.line_arg_parser.parser import Parser + + return Parser + + +def test_consume_expected_eol(parser_class): + from google.cloud.bigquery.magics.line_arg_parser import TokenType + from google.cloud.bigquery.magics.line_arg_parser.lexer import Token + + # A simple iterable of Tokens is sufficient. + fake_lexer = [Token(TokenType.EOL, lexeme="", pos=0)] + parser = parser_class(fake_lexer) + + parser.consume(TokenType.EOL) # no error + + +def test_consume_unexpected_eol(parser_class): + from google.cloud.bigquery.magics.line_arg_parser import ParseError + from google.cloud.bigquery.magics.line_arg_parser import TokenType + from google.cloud.bigquery.magics.line_arg_parser.lexer import Token + + # A simple iterable of Tokens is sufficient. + fake_lexer = [Token(TokenType.EOL, lexeme="", pos=0)] + parser = parser_class(fake_lexer) + + with pytest.raises(ParseError, match=r"Unexpected end of input.*expected COLON.*"): + parser.consume(TokenType.COLON) + + +def test_input_line_unexpected_input(parser_class): + from google.cloud.bigquery.magics.line_arg_parser import ParseError + from google.cloud.bigquery.magics.line_arg_parser import TokenType + from google.cloud.bigquery.magics.line_arg_parser.lexer import Token + + # A simple iterable of Tokens is sufficient. + fake_lexer = [ + Token(TokenType.DEST_VAR, lexeme="results", pos=0), + Token(TokenType.UNKNOWN, lexeme="boo!", pos=8), + Token(TokenType.EOL, lexeme="", pos=12), + ] + parser = parser_class(fake_lexer) + + with pytest.raises(ParseError, match=r"Unexpected input.*position 8.*boo!.*"): + parser.input_line() + + +def test_destination_var_unexpected_input(parser_class): + from google.cloud.bigquery.magics.line_arg_parser import ParseError + from google.cloud.bigquery.magics.line_arg_parser import TokenType + from google.cloud.bigquery.magics.line_arg_parser.lexer import Token + + # A simple iterable of Tokens is sufficient. + fake_lexer = [ + Token(TokenType.UNKNOWN, lexeme="@!#", pos=2), + Token(TokenType.EOL, lexeme="", pos=5), + ] + parser = parser_class(fake_lexer) + + with pytest.raises(ParseError, match=r"Unknown.*position 2.*@!#.*"): + parser.destination_var() + + +def test_option_value_unexpected_input(parser_class): + from google.cloud.bigquery.magics.line_arg_parser import ParseError + from google.cloud.bigquery.magics.line_arg_parser import TokenType + from google.cloud.bigquery.magics.line_arg_parser.lexer import Token + + # A simple iterable of Tokens is sufficient. + fake_lexer = [ + Token(TokenType.UNKNOWN, lexeme="@!#", pos=8), + Token(TokenType.OPTION_SPEC, lexeme="--foo", pos=13), + ] + parser = parser_class(fake_lexer) + + with pytest.raises(ParseError, match=r"Unknown input.*position 8.*@!#.*"): + parser.option_value() + + +def test_dict_items_empty_dict(parser_class): + from google.cloud.bigquery.magics.line_arg_parser import TokenType + from google.cloud.bigquery.magics.line_arg_parser.lexer import Token + + # A simple iterable of Tokens is sufficient. + fake_lexer = [Token(TokenType.RCURL, lexeme="}", pos=22)] + parser = parser_class(fake_lexer) + + result = parser.dict_items() + + assert result == [] + + +def test_dict_items_trailing_comma(parser_class): + from google.cloud.bigquery.magics.line_arg_parser import TokenType + from google.cloud.bigquery.magics.line_arg_parser.lexer import Token + + # A simple iterable of Tokens is sufficient. + fake_lexer = [ + Token(TokenType.PY_STRING, lexeme="'age'", pos=10), + Token(TokenType.COLON, lexeme=":", pos=17), + Token(TokenType.PY_NUMBER, lexeme="18", pos=19), + Token(TokenType.COMMA, lexeme=",", pos=21), + Token(TokenType.RCURL, lexeme="}", pos=22), + ] + parser = parser_class(fake_lexer) + + result = parser.dict_items() + + assert len(result) == 1 + dict_item = result[0] + assert dict_item.key.key_value == "'age'" + assert dict_item.value.raw_value == "18" + + +def test_dict_item_unknown_input(parser_class): + from google.cloud.bigquery.magics.line_arg_parser import ParseError + from google.cloud.bigquery.magics.line_arg_parser import TokenType + from google.cloud.bigquery.magics.line_arg_parser.lexer import Token + + # A simple iterable of Tokens is sufficient. + fake_lexer = [Token(TokenType.UNKNOWN, lexeme="#/%", pos=35)] + parser = parser_class(fake_lexer) + + with pytest.raises(ParseError, match=r"Unknown.*position 35.*#/%.*"): + parser.dict_item() + + +def test_pyvalue_list_containing_dict(parser_class): + from google.cloud.bigquery.magics.line_arg_parser import TokenType + from google.cloud.bigquery.magics.line_arg_parser.lexer import Token + from google.cloud.bigquery.magics.line_arg_parser.parser import PyDict + from google.cloud.bigquery.magics.line_arg_parser.parser import PyList + + # A simple iterable of Tokens is sufficient. + fake_lexer = [ + Token(TokenType.LSQUARE, lexeme="[", pos=21), + Token(TokenType.LCURL, lexeme="{", pos=22), + Token(TokenType.PY_STRING, lexeme="'age'", pos=23), + Token(TokenType.COLON, lexeme=":", pos=28), + Token(TokenType.PY_NUMBER, lexeme="18", pos=30), + Token(TokenType.RCURL, lexeme="}", pos=32), + Token(TokenType.COMMA, lexeme=",", pos=33), # trailing comma + Token(TokenType.RSQUARE, lexeme="]", pos=34), + Token(TokenType.EOL, lexeme="", pos=40), + ] + parser = parser_class(fake_lexer) + + result = parser.py_value() + + assert isinstance(result, PyList) + assert len(result.items) == 1 + + element = result.items[0] + assert isinstance(element, PyDict) + assert len(element.items) == 1 + + dict_item = element.items[0] + assert dict_item.key.key_value == "'age'" + assert dict_item.value.raw_value == "18" + + +def test_pyvalue_invalid_token(parser_class): + from google.cloud.bigquery.magics.line_arg_parser import ParseError + from google.cloud.bigquery.magics.line_arg_parser import TokenType + from google.cloud.bigquery.magics.line_arg_parser.lexer import Token + + # A simple iterable of Tokens is sufficient. + fake_lexer = [Token(TokenType.OPTION_SPEC, lexeme="--verbose", pos=75)] + parser = parser_class(fake_lexer) + + error_pattern = r"Unexpected token.*OPTION_SPEC.*position 75.*" + with pytest.raises(ParseError, match=error_pattern): + parser.py_value() + + +def test_collection_items_empty(parser_class): + from google.cloud.bigquery.magics.line_arg_parser import TokenType + from google.cloud.bigquery.magics.line_arg_parser.lexer import Token + + # A simple iterable of Tokens is sufficient. + fake_lexer = [Token(TokenType.RPAREN, lexeme=")", pos=30)] + parser = parser_class(fake_lexer) + + result = parser.collection_items() + + assert result == [] diff --git a/packages/google-cloud-bigquery/tests/unit/line_arg_parser/test_visitors.py b/packages/google-cloud-bigquery/tests/unit/line_arg_parser/test_visitors.py new file mode 100644 index 000000000000..51d4f837a91c --- /dev/null +++ b/packages/google-cloud-bigquery/tests/unit/line_arg_parser/test_visitors.py @@ -0,0 +1,34 @@ +# Copyright 2020 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import pytest + + +@pytest.fixture +def base_visitor(): + from google.cloud.bigquery.magics.line_arg_parser.visitors import NodeVisitor + + return NodeVisitor() + + +def test_unknown_node(base_visitor): + from google.cloud.bigquery.magics.line_arg_parser.parser import ParseNode + + class UnknownNode(ParseNode): + pass + + node = UnknownNode() + + with pytest.raises(Exception, match=r"No visit_UnknownNode method"): + base_visitor.visit(node) diff --git a/packages/google-cloud-bigquery/tests/unit/test_magics.py b/packages/google-cloud-bigquery/tests/unit/test_magics.py index 7b07626ad9aa..73e44f311d8f 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_magics.py +++ b/packages/google-cloud-bigquery/tests/unit/test_magics.py @@ -43,7 +43,7 @@ from google.cloud import bigquery from google.cloud.bigquery import job from google.cloud.bigquery import table -from google.cloud.bigquery import magics +from google.cloud.bigquery.magics import magics from tests.unit.helpers import make_connection from test_utils.imports import maybe_fail_import @@ -69,6 +69,21 @@ def ipython_interactive(request, ipython): yield ipython +@pytest.fixture() +def ipython_ns_cleanup(): + """A helper to clean up user namespace after the test + + for the duration of the test scope. + """ + names_to_clean = [] # pairs (IPython_instance, name_to_clean) + + yield names_to_clean + + for ip, name in names_to_clean: + if name in ip.user_ns: + del ip.user_ns[name] + + @pytest.fixture(scope="session") def missing_bq_storage(): """Provide a patcher that can make the bigquery storage import to fail.""" @@ -256,7 +271,7 @@ def test__run_query(): ] client_patch = mock.patch( - "google.cloud.bigquery.magics.bigquery.Client", autospec=True + "google.cloud.bigquery.magics.magics.bigquery.Client", autospec=True ) with client_patch as client_mock, io.capture_output() as captured: client_mock().query(sql).result.side_effect = responses @@ -284,7 +299,7 @@ def test__run_query_dry_run_without_errors_is_silent(): sql = "SELECT 17" client_patch = mock.patch( - "google.cloud.bigquery.magics.bigquery.Client", autospec=True + "google.cloud.bigquery.magics.magics.bigquery.Client", autospec=True ) job_config = job.QueryJobConfig() @@ -350,7 +365,7 @@ def test__create_dataset_if_necessary_exists(): dataset_reference = bigquery.dataset.DatasetReference(project, dataset_id) dataset = bigquery.Dataset(dataset_reference) client_patch = mock.patch( - "google.cloud.bigquery.magics.bigquery.Client", autospec=True + "google.cloud.bigquery.magics.magics.bigquery.Client", autospec=True ) with client_patch as client_mock: client = client_mock() @@ -364,7 +379,7 @@ def test__create_dataset_if_necessary_not_exist(): project = "project_id" dataset_id = "dataset_id" client_patch = mock.patch( - "google.cloud.bigquery.magics.bigquery.Client", autospec=True + "google.cloud.bigquery.magics.magics.bigquery.Client", autospec=True ) with client_patch as client_mock: client = client_mock() @@ -382,7 +397,7 @@ def test_extension_load(): # verify that the magic is registered and has the correct source magic = ip.magics_manager.magics["cell"].get("bigquery") - assert magic.__module__ == "google.cloud.bigquery.magics" + assert magic.__module__ == "google.cloud.bigquery.magics.magics" @pytest.mark.usefixtures("ipython_interactive") @@ -415,7 +430,7 @@ def test_bigquery_magic_without_optional_arguments(monkeypatch): sql = "SELECT 17 AS num" result = pandas.DataFrame([17], columns=["num"]) run_query_patch = mock.patch( - "google.cloud.bigquery.magics._run_query", autospec=True + "google.cloud.bigquery.magics.magics._run_query", autospec=True ) query_job_mock = mock.create_autospec( google.cloud.bigquery.job.QueryJob, instance=True @@ -445,7 +460,7 @@ def test_bigquery_magic_default_connection_user_agent(): "google.auth.default", return_value=(credentials_mock, "general-project") ) run_query_patch = mock.patch( - "google.cloud.bigquery.magics._run_query", autospec=True + "google.cloud.bigquery.magics.magics._run_query", autospec=True ) conn_patch = mock.patch("google.cloud.bigquery.client.Connection", autospec=True) @@ -466,7 +481,7 @@ def test_bigquery_magic_with_legacy_sql(): ) run_query_patch = mock.patch( - "google.cloud.bigquery.magics._run_query", autospec=True + "google.cloud.bigquery.magics.magics._run_query", autospec=True ) with run_query_patch as run_query_mock: ip.run_cell_magic("bigquery", "--use_legacy_sql", "SELECT 17 AS num") @@ -477,19 +492,21 @@ def test_bigquery_magic_with_legacy_sql(): @pytest.mark.usefixtures("ipython_interactive") @pytest.mark.skipif(pandas is None, reason="Requires `pandas`") -def test_bigquery_magic_with_result_saved_to_variable(): +def test_bigquery_magic_with_result_saved_to_variable(ipython_ns_cleanup): ip = IPython.get_ipython() ip.extension_manager.load_extension("google.cloud.bigquery") magics.context.credentials = mock.create_autospec( google.auth.credentials.Credentials, instance=True ) + ipython_ns_cleanup.append((ip, "df")) + sql = "SELECT 17 AS num" result = pandas.DataFrame([17], columns=["num"]) assert "df" not in ip.user_ns run_query_patch = mock.patch( - "google.cloud.bigquery.magics._run_query", autospec=True + "google.cloud.bigquery.magics.magics._run_query", autospec=True ) query_job_mock = mock.create_autospec( google.cloud.bigquery.job.QueryJob, instance=True @@ -516,10 +533,10 @@ def test_bigquery_magic_does_not_clear_display_in_verbose_mode(): ) clear_patch = mock.patch( - "google.cloud.bigquery.magics.display.clear_output", autospec=True + "google.cloud.bigquery.magics.magics.display.clear_output", autospec=True, ) run_query_patch = mock.patch( - "google.cloud.bigquery.magics._run_query", autospec=True + "google.cloud.bigquery.magics.magics._run_query", autospec=True ) with clear_patch as clear_mock, run_query_patch: ip.run_cell_magic("bigquery", "--verbose", "SELECT 17 as num") @@ -536,10 +553,10 @@ def test_bigquery_magic_clears_display_in_verbose_mode(): ) clear_patch = mock.patch( - "google.cloud.bigquery.magics.display.clear_output", autospec=True + "google.cloud.bigquery.magics.magics.display.clear_output", autospec=True, ) run_query_patch = mock.patch( - "google.cloud.bigquery.magics._run_query", autospec=True + "google.cloud.bigquery.magics.magics._run_query", autospec=True ) with clear_patch as clear_mock, run_query_patch: ip.run_cell_magic("bigquery", "", "SELECT 17 as num") @@ -576,7 +593,7 @@ def test_bigquery_magic_with_bqstorage_from_argument(monkeypatch): sql = "SELECT 17 AS num" result = pandas.DataFrame([17], columns=["num"]) run_query_patch = mock.patch( - "google.cloud.bigquery.magics._run_query", autospec=True + "google.cloud.bigquery.magics.magics._run_query", autospec=True ) query_job_mock = mock.create_autospec( google.cloud.bigquery.job.QueryJob, instance=True @@ -635,7 +652,7 @@ def test_bigquery_magic_with_rest_client_requested(monkeypatch): sql = "SELECT 17 AS num" result = pandas.DataFrame([17], columns=["num"]) run_query_patch = mock.patch( - "google.cloud.bigquery.magics._run_query", autospec=True + "google.cloud.bigquery.magics.magics._run_query", autospec=True ) query_job_mock = mock.create_autospec( google.cloud.bigquery.job.QueryJob, instance=True @@ -719,7 +736,7 @@ def test_bigquery_magic_w_max_results_query_job_results_fails(): "google.cloud.bigquery.client.Client.query", autospec=True ) close_transports_patch = mock.patch( - "google.cloud.bigquery.magics._close_transports", autospec=True, + "google.cloud.bigquery.magics.magics._close_transports", autospec=True, ) sql = "SELECT 17 AS num" @@ -751,7 +768,7 @@ def test_bigquery_magic_w_table_id_invalid(): ) list_rows_patch = mock.patch( - "google.cloud.bigquery.magics.bigquery.Client.list_rows", + "google.cloud.bigquery.magics.magics.bigquery.Client.list_rows", autospec=True, side_effect=exceptions.BadRequest("Not a valid table ID"), ) @@ -792,11 +809,13 @@ def test_bigquery_magic_w_missing_query(): @pytest.mark.usefixtures("ipython_interactive") @pytest.mark.skipif(pandas is None, reason="Requires `pandas`") -def test_bigquery_magic_w_table_id_and_destination_var(): +def test_bigquery_magic_w_table_id_and_destination_var(ipython_ns_cleanup): ip = IPython.get_ipython() ip.extension_manager.load_extension("google.cloud.bigquery") magics.context._project = None + ipython_ns_cleanup.append((ip, "df")) + credentials_mock = mock.create_autospec( google.auth.credentials.Credentials, instance=True ) @@ -809,7 +828,7 @@ def test_bigquery_magic_w_table_id_and_destination_var(): ) client_patch = mock.patch( - "google.cloud.bigquery.magics.bigquery.Client", autospec=True + "google.cloud.bigquery.magics.magics.bigquery.Client", autospec=True ) table_id = "bigquery-public-data.samples.shakespeare" @@ -849,7 +868,7 @@ def test_bigquery_magic_w_table_id_and_bqstorage_client(): ) client_patch = mock.patch( - "google.cloud.bigquery.magics.bigquery.Client", autospec=True + "google.cloud.bigquery.magics.magics.bigquery.Client", autospec=True ) bqstorage_mock = mock.create_autospec(bigquery_storage_v1.BigQueryReadClient) @@ -882,7 +901,7 @@ def test_bigquery_magic_dryrun_option_sets_job_config(): ) run_query_patch = mock.patch( - "google.cloud.bigquery.magics._run_query", autospec=True + "google.cloud.bigquery.magics.magics._run_query", autospec=True ) sql = "SELECT 17 AS num" @@ -905,7 +924,7 @@ def test_bigquery_magic_dryrun_option_returns_query_job(): google.cloud.bigquery.job.QueryJob, instance=True ) run_query_patch = mock.patch( - "google.cloud.bigquery.magics._run_query", autospec=True + "google.cloud.bigquery.magics.magics._run_query", autospec=True ) sql = "SELECT 17 AS num" @@ -919,15 +938,17 @@ def test_bigquery_magic_dryrun_option_returns_query_job(): @pytest.mark.usefixtures("ipython_interactive") -def test_bigquery_magic_dryrun_option_variable_error_message(): +def test_bigquery_magic_dryrun_option_variable_error_message(ipython_ns_cleanup): ip = IPython.get_ipython() ip.extension_manager.load_extension("google.cloud.bigquery") magics.context.credentials = mock.create_autospec( google.auth.credentials.Credentials, instance=True ) + ipython_ns_cleanup.append((ip, "q_job")) + run_query_patch = mock.patch( - "google.cloud.bigquery.magics._run_query", + "google.cloud.bigquery.magics.magics._run_query", autospec=True, side_effect=exceptions.BadRequest("Syntax error in SQL query"), ) @@ -944,7 +965,7 @@ def test_bigquery_magic_dryrun_option_variable_error_message(): @pytest.mark.usefixtures("ipython_interactive") -def test_bigquery_magic_dryrun_option_saves_query_job_to_variable(): +def test_bigquery_magic_dryrun_option_saves_query_job_to_variable(ipython_ns_cleanup): ip = IPython.get_ipython() ip.extension_manager.load_extension("google.cloud.bigquery") magics.context.credentials = mock.create_autospec( @@ -954,9 +975,11 @@ def test_bigquery_magic_dryrun_option_saves_query_job_to_variable(): google.cloud.bigquery.job.QueryJob, instance=True ) run_query_patch = mock.patch( - "google.cloud.bigquery.magics._run_query", autospec=True + "google.cloud.bigquery.magics.magics._run_query", autospec=True ) + ipython_ns_cleanup.append((ip, "q_job")) + sql = "SELECT 17 AS num" assert "q_job" not in ip.user_ns @@ -972,13 +995,15 @@ def test_bigquery_magic_dryrun_option_saves_query_job_to_variable(): @pytest.mark.usefixtures("ipython_interactive") -def test_bigquery_magic_saves_query_job_to_variable_on_error(): +def test_bigquery_magic_saves_query_job_to_variable_on_error(ipython_ns_cleanup): ip = IPython.get_ipython() ip.extension_manager.load_extension("google.cloud.bigquery") magics.context.credentials = mock.create_autospec( google.auth.credentials.Credentials, instance=True ) + ipython_ns_cleanup.append((ip, "result")) + client_query_patch = mock.patch( "google.cloud.bigquery.client.Client.query", autospec=True ) @@ -1151,7 +1176,7 @@ def test_bigquery_magic_with_project(): "google.auth.default", return_value=(credentials_mock, "general-project") ) run_query_patch = mock.patch( - "google.cloud.bigquery.magics._run_query", autospec=True + "google.cloud.bigquery.magics.magics._run_query", autospec=True ) with run_query_patch as run_query_mock, default_patch: ip.run_cell_magic("bigquery", "--project=specific-project", "SELECT 17 as num") @@ -1162,30 +1187,65 @@ def test_bigquery_magic_with_project(): assert magics.context.project == "general-project" +@pytest.mark.usefixtures("ipython_interactive") +def test_bigquery_magic_with_multiple_options(): + ip = IPython.get_ipython() + ip.extension_manager.load_extension("google.cloud.bigquery") + magics.context._project = None + + credentials_mock = mock.create_autospec( + google.auth.credentials.Credentials, instance=True + ) + default_patch = mock.patch( + "google.auth.default", return_value=(credentials_mock, "general-project") + ) + run_query_patch = mock.patch( + "google.cloud.bigquery.magics.magics._run_query", autospec=True + ) + with run_query_patch as run_query_mock, default_patch: + ip.run_cell_magic( + "bigquery", + "--project=specific-project --use_legacy_sql --maximum_bytes_billed 1024", + "SELECT 17 as num", + ) + + args, kwargs = run_query_mock.call_args + client_used = args[0] + assert client_used.project == "specific-project" + + job_config_used = kwargs["job_config"] + assert job_config_used.use_legacy_sql + assert job_config_used.maximum_bytes_billed == 1024 + + @pytest.mark.usefixtures("ipython_interactive") @pytest.mark.skipif(pandas is None, reason="Requires `pandas`") -def test_bigquery_magic_with_string_params(): +def test_bigquery_magic_with_string_params(ipython_ns_cleanup): ip = IPython.get_ipython() ip.extension_manager.load_extension("google.cloud.bigquery") magics.context.credentials = mock.create_autospec( google.auth.credentials.Credentials, instance=True ) + ipython_ns_cleanup.append((ip, "params_dict_df")) + sql = "SELECT @num AS num" result = pandas.DataFrame([17], columns=["num"]) - assert "params_string_df" not in ip.user_ns + + assert "params_dict_df" not in ip.user_ns run_query_patch = mock.patch( - "google.cloud.bigquery.magics._run_query", autospec=True + "google.cloud.bigquery.magics.magics._run_query", autospec=True ) query_job_mock = mock.create_autospec( google.cloud.bigquery.job.QueryJob, instance=True ) query_job_mock.to_dataframe.return_value = result + with run_query_patch as run_query_mock: run_query_mock.return_value = query_job_mock - ip.run_cell_magic("bigquery", 'params_string_df --params {"num":17}', sql) + ip.run_cell_magic("bigquery", "params_string_df --params='{\"num\":17}'", sql) run_query_mock.assert_called_once_with(mock.ANY, sql.format(num=17), mock.ANY) @@ -1197,19 +1257,24 @@ def test_bigquery_magic_with_string_params(): @pytest.mark.usefixtures("ipython_interactive") @pytest.mark.skipif(pandas is None, reason="Requires `pandas`") -def test_bigquery_magic_with_dict_params(): +def test_bigquery_magic_with_dict_params(ipython_ns_cleanup): ip = IPython.get_ipython() ip.extension_manager.load_extension("google.cloud.bigquery") magics.context.credentials = mock.create_autospec( google.auth.credentials.Credentials, instance=True ) - sql = "SELECT @num AS num" - result = pandas.DataFrame([17], columns=["num"]) + ipython_ns_cleanup.append((ip, "params_dict_df")) + + sql = "SELECT @num AS num, @tricky_value as tricky_value" + result = pandas.DataFrame( + [(False, '--params "value"')], columns=["valid", "tricky_value"] + ) + assert "params_dict_df" not in ip.user_ns run_query_patch = mock.patch( - "google.cloud.bigquery.magics._run_query", autospec=True + "google.cloud.bigquery.magics.magics._run_query", autospec=True ) query_job_mock = mock.create_autospec( google.cloud.bigquery.job.QueryJob, instance=True @@ -1218,7 +1283,7 @@ def test_bigquery_magic_with_dict_params(): with run_query_patch as run_query_mock: run_query_mock.return_value = query_job_mock - params = {"num": 17} + params = {"valid": False, "tricky_value": '--params "value"'} # Insert dictionary into user namespace so that it can be expanded ip.user_ns["params"] = params ip.run_cell_magic("bigquery", "params_dict_df --params $params", sql) @@ -1230,6 +1295,194 @@ def test_bigquery_magic_with_dict_params(): assert len(df) == len(result) # verify row count assert list(df) == list(result) # verify column names + assert not df["valid"][0] + assert df["tricky_value"][0] == '--params "value"' + + +@pytest.mark.usefixtures("ipython_interactive") +@pytest.mark.skipif(pandas is None, reason="Requires `pandas`") +def test_bigquery_magic_with_dict_params_nonexisting(): + ip = IPython.get_ipython() + ip.extension_manager.load_extension("google.cloud.bigquery") + magics.context.credentials = mock.create_autospec( + google.auth.credentials.Credentials, instance=True + ) + + sql = "SELECT @foo AS foo" + + with pytest.raises(NameError, match=r".*undefined variable.*unknown_name.*"): + ip.run_cell_magic("bigquery", "params_dict_df --params $unknown_name", sql) + + +@pytest.mark.usefixtures("ipython_interactive") +@pytest.mark.skipif(pandas is None, reason="Requires `pandas`") +def test_bigquery_magic_with_dict_params_incorrect_syntax(): + ip = IPython.get_ipython() + ip.extension_manager.load_extension("google.cloud.bigquery") + magics.context.credentials = mock.create_autospec( + google.auth.credentials.Credentials, instance=True + ) + + sql = "SELECT @foo AS foo" + + with pytest.raises(SyntaxError, match=r".*--params.*"): + cell_magic_args = "params_dict_df --params {'foo': 1; 'bar': 2}" + ip.run_cell_magic("bigquery", cell_magic_args, sql) + + +@pytest.mark.usefixtures("ipython_interactive") +@pytest.mark.skipif(pandas is None, reason="Requires `pandas`") +def test_bigquery_magic_with_dict_params_duplicate(): + ip = IPython.get_ipython() + ip.extension_manager.load_extension("google.cloud.bigquery") + magics.context.credentials = mock.create_autospec( + google.auth.credentials.Credentials, instance=True + ) + + sql = "SELECT @foo AS foo" + + with pytest.raises(ValueError, match=r"Duplicate --params option\."): + cell_magic_args = ( + "params_dict_df --params {'foo': 1} --verbose --params {'bar': 2} " + ) + ip.run_cell_magic("bigquery", cell_magic_args, sql) + + +@pytest.mark.usefixtures("ipython_interactive") +@pytest.mark.skipif(pandas is None, reason="Requires `pandas`") +def test_bigquery_magic_with_option_value_incorrect(): + ip = IPython.get_ipython() + ip.extension_manager.load_extension("google.cloud.bigquery") + magics.context.credentials = mock.create_autospec( + google.auth.credentials.Credentials, instance=True + ) + + sql = "SELECT @foo AS foo" + + with pytest.raises(ValueError, match=r".*invalid literal.*\[PLENTY!\].*"): + cell_magic_args = "params_dict_df --max_results [PLENTY!]" + ip.run_cell_magic("bigquery", cell_magic_args, sql) + + +@pytest.mark.usefixtures("ipython_interactive") +@pytest.mark.skipif(pandas is None, reason="Requires `pandas`") +def test_bigquery_magic_with_dict_params_negative_value(ipython_ns_cleanup): + ip = IPython.get_ipython() + ip.extension_manager.load_extension("google.cloud.bigquery") + magics.context.credentials = mock.create_autospec( + google.auth.credentials.Credentials, instance=True + ) + + ipython_ns_cleanup.append((ip, "params_dict_df")) + + sql = "SELECT @num AS num" + result = pandas.DataFrame([-17], columns=["num"]) + + assert "params_dict_df" not in ip.user_ns + + run_query_patch = mock.patch( + "google.cloud.bigquery.magics.magics._run_query", autospec=True + ) + query_job_mock = mock.create_autospec( + google.cloud.bigquery.job.QueryJob, instance=True + ) + query_job_mock.to_dataframe.return_value = result + with run_query_patch as run_query_mock: + run_query_mock.return_value = query_job_mock + + params = {"num": -17} + # Insert dictionary into user namespace so that it can be expanded + ip.user_ns["params"] = params + ip.run_cell_magic("bigquery", "params_dict_df --params $params", sql) + + run_query_mock.assert_called_once_with(mock.ANY, sql.format(num=-17), mock.ANY) + + assert "params_dict_df" in ip.user_ns # verify that the variable exists + df = ip.user_ns["params_dict_df"] + assert len(df) == len(result) # verify row count + assert list(df) == list(result) # verify column names + assert df["num"][0] == -17 + + +@pytest.mark.usefixtures("ipython_interactive") +@pytest.mark.skipif(pandas is None, reason="Requires `pandas`") +def test_bigquery_magic_with_dict_params_array_value(ipython_ns_cleanup): + ip = IPython.get_ipython() + ip.extension_manager.load_extension("google.cloud.bigquery") + magics.context.credentials = mock.create_autospec( + google.auth.credentials.Credentials, instance=True + ) + + ipython_ns_cleanup.append((ip, "params_dict_df")) + + sql = "SELECT @num AS num" + result = pandas.DataFrame(["foo bar", "baz quux"], columns=["array_data"]) + + assert "params_dict_df" not in ip.user_ns + + run_query_patch = mock.patch( + "google.cloud.bigquery.magics.magics._run_query", autospec=True + ) + query_job_mock = mock.create_autospec( + google.cloud.bigquery.job.QueryJob, instance=True + ) + query_job_mock.to_dataframe.return_value = result + with run_query_patch as run_query_mock: + run_query_mock.return_value = query_job_mock + + params = {"array_data": ["foo bar", "baz quux"]} + # Insert dictionary into user namespace so that it can be expanded + ip.user_ns["params"] = params + ip.run_cell_magic("bigquery", "params_dict_df --params $params", sql) + + run_query_mock.assert_called_once_with(mock.ANY, sql.format(num=-17), mock.ANY) + + assert "params_dict_df" in ip.user_ns # verify that the variable exists + df = ip.user_ns["params_dict_df"] + assert len(df) == len(result) # verify row count + assert list(df) == list(result) # verify column names + assert list(df["array_data"]) == ["foo bar", "baz quux"] + + +@pytest.mark.usefixtures("ipython_interactive") +@pytest.mark.skipif(pandas is None, reason="Requires `pandas`") +def test_bigquery_magic_with_dict_params_tuple_value(ipython_ns_cleanup): + ip = IPython.get_ipython() + ip.extension_manager.load_extension("google.cloud.bigquery") + magics.context.credentials = mock.create_autospec( + google.auth.credentials.Credentials, instance=True + ) + + ipython_ns_cleanup.append((ip, "params_dict_df")) + + sql = "SELECT @num AS num" + result = pandas.DataFrame(["foo bar", "baz quux"], columns=["array_data"]) + + assert "params_dict_df" not in ip.user_ns + + run_query_patch = mock.patch( + "google.cloud.bigquery.magics.magics._run_query", autospec=True + ) + query_job_mock = mock.create_autospec( + google.cloud.bigquery.job.QueryJob, instance=True + ) + query_job_mock.to_dataframe.return_value = result + with run_query_patch as run_query_mock: + run_query_mock.return_value = query_job_mock + + params = {"array_data": ("foo bar", "baz quux")} + # Insert dictionary into user namespace so that it can be expanded + ip.user_ns["params"] = params + ip.run_cell_magic("bigquery", "params_dict_df --params $params", sql) + + run_query_mock.assert_called_once_with(mock.ANY, sql.format(num=-17), mock.ANY) + + assert "params_dict_df" in ip.user_ns # verify that the variable exists + df = ip.user_ns["params_dict_df"] + assert len(df) == len(result) # verify row count + assert list(df) == list(result) # verify column names + assert list(df["array_data"]) == ["foo bar", "baz quux"] + @pytest.mark.usefixtures("ipython_interactive") @pytest.mark.skipif(pandas is None, reason="Requires `pandas`") @@ -1246,6 +1499,24 @@ def test_bigquery_magic_with_improperly_formatted_params(): ip.run_cell_magic("bigquery", "--params {17}", sql) +@pytest.mark.usefixtures("ipython_interactive") +@pytest.mark.skipif(pandas is None, reason="Requires `pandas`") +def test_bigquery_magic_with_invalid_multiple_option_values(): + ip = IPython.get_ipython() + ip.extension_manager.load_extension("google.cloud.bigquery") + magics.context.credentials = mock.create_autospec( + google.auth.credentials.Credentials, instance=True + ) + + sql = "SELECT @foo AS foo" + + exc_pattern = r".*[Uu]nrecognized input.*option values correct\?.*567.*" + + with pytest.raises(ValueError, match=exc_pattern): + cell_magic_args = "params_dict_df --max_results 10 567" + ip.run_cell_magic("bigquery", cell_magic_args, sql) + + @pytest.mark.usefixtures("ipython_interactive") def test_bigquery_magic_omits_tracebacks_from_error_message(): ip = IPython.get_ipython() @@ -1259,7 +1530,7 @@ def test_bigquery_magic_omits_tracebacks_from_error_message(): ) run_query_patch = mock.patch( - "google.cloud.bigquery.magics._run_query", + "google.cloud.bigquery.magics.magics._run_query", autospec=True, side_effect=exceptions.BadRequest("Syntax error in SQL query"), ) @@ -1287,7 +1558,7 @@ def test_bigquery_magic_w_destination_table_invalid_format(): ) client_patch = mock.patch( - "google.cloud.bigquery.magics.bigquery.Client", autospec=True + "google.cloud.bigquery.magics.magics.bigquery.Client", autospec=True ) with client_patch, default_patch, pytest.raises(ValueError) as exc_context: @@ -1310,11 +1581,12 @@ def test_bigquery_magic_w_destination_table(): ) create_dataset_if_necessary_patch = mock.patch( - "google.cloud.bigquery.magics._create_dataset_if_necessary", autospec=True + "google.cloud.bigquery.magics.magics._create_dataset_if_necessary", + autospec=True, ) run_query_patch = mock.patch( - "google.cloud.bigquery.magics._run_query", autospec=True + "google.cloud.bigquery.magics.magics._run_query", autospec=True ) with create_dataset_if_necessary_patch, run_query_patch as run_query_mock: @@ -1341,12 +1613,12 @@ def test_bigquery_magic_create_dataset_fails(): ) create_dataset_if_necessary_patch = mock.patch( - "google.cloud.bigquery.magics._create_dataset_if_necessary", + "google.cloud.bigquery.magics.magics._create_dataset_if_necessary", autospec=True, side_effect=OSError, ) close_transports_patch = mock.patch( - "google.cloud.bigquery.magics._close_transports", autospec=True, + "google.cloud.bigquery.magics.magics._close_transports", autospec=True, ) with pytest.raises( From ca853c250db415a08da5a9842222e9531c5d4d79 Mon Sep 17 00:00:00 2001 From: Aravin <34178459+aravinsiva@users.noreply.github.com> Date: Fri, 11 Sep 2020 10:31:35 -0400 Subject: [PATCH 0930/2016] refactor: restructure opentelemetry instrumentation (#244) * restructing instrumentation format * adding suggested changes * renaming to span_name * liniting * adding tests for create span in call_api * swapping test names Co-authored-by: Tim Swast --- .../google/cloud/bigquery/client.py | 466 ++++++++++-------- .../google/cloud/bigquery/job.py | 84 ++-- .../tests/unit/test_client.py | 48 ++ .../tests/unit/test_job.py | 193 ++++---- 4 files changed, 445 insertions(+), 346 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py index e7f8c5c66e7b..86275487b341 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py @@ -248,14 +248,14 @@ def get_service_account_email( project = self.project path = "/projects/%s/serviceAccount" % (project,) span_attributes = {"path": path} - with create_span( - name="BigQuery.getServiceAccountEmail", - attributes=span_attributes, - client=self, - ): - api_response = self._call_api( - retry, method="GET", path=path, timeout=timeout - ) + api_response = self._call_api( + retry, + span_name="BigQuery.getServiceAccountEmail", + span_attributes=span_attributes, + method="GET", + path=path, + timeout=timeout, + ) return api_response["email"] def list_projects( @@ -292,10 +292,14 @@ def list_projects( span_attributes = {"path": "/projects"} def api_request(*args, **kwargs): - with create_span( - name="BigQuery.listProjects", attributes=span_attributes, client=self - ): - return self._call_api(retry, *args, timeout=timeout, **kwargs) + return self._call_api( + retry, + span_name="BigQuery.listProjects", + span_attributes=span_attributes, + *args, + timeout=timeout, + **kwargs + ) return page_iterator.HTTPIterator( client=self, @@ -365,10 +369,15 @@ def list_datasets( span_attributes = {"path": path} def api_request(*args, **kwargs): - with create_span( - name="BigQuery.listDatasets", attributes=span_attributes, client=self - ): - return self._call_api(retry, *args, timeout=timeout, **kwargs) + + return self._call_api( + retry, + span_name="BigQuery.listDatasets", + span_attributes=span_attributes, + *args, + timeout=timeout, + **kwargs + ) return page_iterator.HTTPIterator( client=self, @@ -497,12 +506,16 @@ def create_dataset( try: span_attributes = {"path": path} - with create_span( - name="BigQuery.createDataset", attributes=span_attributes, client=self - ): - api_response = self._call_api( - retry, method="POST", path=path, data=data, timeout=timeout - ) + + api_response = self._call_api( + retry, + span_name="BigQuery.createDataset", + span_attributes=span_attributes, + method="POST", + path=path, + data=data, + timeout=timeout, + ) return Dataset.from_api_repr(api_response) except google.api_core.exceptions.Conflict: if not exists_ok: @@ -545,12 +558,15 @@ def create_routine( resource = routine.to_api_repr() try: span_attributes = {"path": path} - with create_span( - name="BigQuery.createRoutine", attributes=span_attributes, client=self - ): - api_response = self._call_api( - retry, method="POST", path=path, data=resource, timeout=timeout - ) + api_response = self._call_api( + retry, + span_name="BigQuery.createRoutine", + span_attributes=span_attributes, + method="POST", + path=path, + data=resource, + timeout=timeout, + ) return Routine.from_api_repr(api_response) except google.api_core.exceptions.Conflict: if not exists_ok: @@ -596,22 +612,33 @@ def create_table(self, table, exists_ok=False, retry=DEFAULT_RETRY, timeout=None data = table.to_api_repr() try: span_attributes = {"path": path, "dataset_id": dataset_id} - with create_span( - name="BigQuery.createTable", attributes=span_attributes, client=self - ): - api_response = self._call_api( - retry, method="POST", path=path, data=data, timeout=timeout - ) + api_response = self._call_api( + retry, + span_name="BigQuery.createTable", + span_attributes=span_attributes, + method="POST", + path=path, + data=data, + timeout=timeout, + ) return Table.from_api_repr(api_response) except google.api_core.exceptions.Conflict: if not exists_ok: raise return self.get_table(table.reference, retry=retry) - def _call_api(self, retry, **kwargs): + def _call_api( + self, retry, span_name=None, span_attributes=None, job_ref=None, **kwargs + ): + call = functools.partial(self._connection.api_request, **kwargs) if retry: call = retry(call) + if span_name is not None: + with create_span( + name=span_name, attributes=span_attributes, client=self, job_ref=job_ref + ): + return call() return call() def get_dataset(self, dataset_ref, retry=DEFAULT_RETRY, timeout=None): @@ -642,12 +669,14 @@ def get_dataset(self, dataset_ref, retry=DEFAULT_RETRY, timeout=None): ) path = dataset_ref.path span_attributes = {"path": path} - with create_span( - name="BigQuery.getDataset", attributes=span_attributes, client=self - ): - api_response = self._call_api( - retry, method="GET", path=path, timeout=timeout - ) + api_response = self._call_api( + retry, + span_name="BigQuery.getDataset", + span_attributes=span_attributes, + method="GET", + path=path, + timeout=timeout, + ) return Dataset.from_api_repr(api_response) def get_iam_policy( @@ -663,12 +692,15 @@ def get_iam_policy( path = "{}:getIamPolicy".format(table.path) span_attributes = {"path": path} - with create_span( - name="BigQuery.getIamPolicy", attributes=span_attributes, client=self - ): - response = self._call_api( - retry, method="POST", path=path, data=body, timeout=timeout, - ) + response = self._call_api( + retry, + span_name="BigQuery.getIamPolicy", + span_attributes=span_attributes, + method="POST", + path=path, + data=body, + timeout=timeout, + ) return Policy.from_api_repr(response) @@ -688,12 +720,16 @@ def set_iam_policy( path = "{}:setIamPolicy".format(table.path) span_attributes = {"path": path} - with create_span( - name="BigQuery.setIamPolicy", attributes=span_attributes, client=self - ): - response = self._call_api( - retry, method="POST", path=path, data=body, timeout=timeout, - ) + + response = self._call_api( + retry, + span_name="BigQuery.setIamPolicy", + span_attributes=span_attributes, + method="POST", + path=path, + data=body, + timeout=timeout, + ) return Policy.from_api_repr(response) @@ -707,12 +743,15 @@ def test_iam_permissions( path = "{}:testIamPermissions".format(table.path) span_attributes = {"path": path} - with create_span( - name="BigQuery.testIamPermissions", attributes=span_attributes, client=self - ): - response = self._call_api( - retry, method="POST", path=path, data=body, timeout=timeout, - ) + response = self._call_api( + retry, + span_name="BigQuery.testIamPermissions", + span_attributes=span_attributes, + method="POST", + path=path, + data=body, + timeout=timeout, + ) return response @@ -743,12 +782,15 @@ def get_model(self, model_ref, retry=DEFAULT_RETRY, timeout=None): ) path = model_ref.path span_attributes = {"path": path} - with create_span( - name="BigQuery.getModel", attributes=span_attributes, client=self - ): - api_response = self._call_api( - retry, method="GET", path=path, timeout=timeout - ) + + api_response = self._call_api( + retry, + span_name="BigQuery.getModel", + span_attributes=span_attributes, + method="GET", + path=path, + timeout=timeout, + ) return Model.from_api_repr(api_response) def get_routine(self, routine_ref, retry=DEFAULT_RETRY, timeout=None): @@ -780,12 +822,14 @@ def get_routine(self, routine_ref, retry=DEFAULT_RETRY, timeout=None): ) path = routine_ref.path span_attributes = {"path": path} - with create_span( - name="BigQuery.getRoutine", attributes=span_attributes, client=self - ): - api_response = self._call_api( - retry, method="GET", path=path, timeout=timeout - ) + api_response = self._call_api( + retry, + span_name="BigQuery.getRoutine", + span_attributes=span_attributes, + method="GET", + path=path, + timeout=timeout, + ) return Routine.from_api_repr(api_response) def get_table(self, table, retry=DEFAULT_RETRY, timeout=None): @@ -814,12 +858,14 @@ def get_table(self, table, retry=DEFAULT_RETRY, timeout=None): table_ref = _table_arg_to_table_ref(table, default_project=self.project) path = table_ref.path span_attributes = {"path": path} - with create_span( - name="BigQuery.getTable", attributes=span_attributes, client=self - ): - api_response = self._call_api( - retry, method="GET", path=path, timeout=timeout - ) + api_response = self._call_api( + retry, + span_name="BigQuery.getTable", + span_attributes=span_attributes, + method="GET", + path=path, + timeout=timeout, + ) return Table.from_api_repr(api_response) def update_dataset(self, dataset, fields, retry=DEFAULT_RETRY, timeout=None): @@ -859,17 +905,16 @@ def update_dataset(self, dataset, fields, retry=DEFAULT_RETRY, timeout=None): path = dataset.path span_attributes = {"path": path, "fields": fields} - with create_span( - name="BigQuery.updateDataset", attributes=span_attributes, client=self - ): - api_response = self._call_api( - retry, - method="PATCH", - path=path, - data=partial, - headers=headers, - timeout=timeout, - ) + api_response = self._call_api( + retry, + span_name="BigQuery.updateDataset", + span_attributes=span_attributes, + method="PATCH", + path=path, + data=partial, + headers=headers, + timeout=timeout, + ) return Dataset.from_api_repr(api_response) def update_model(self, model, fields, retry=DEFAULT_RETRY, timeout=None): @@ -908,17 +953,16 @@ def update_model(self, model, fields, retry=DEFAULT_RETRY, timeout=None): path = model.path span_attributes = {"path": path, "fields": fields} - with create_span( - name="BigQuery.updateModel", attributes=span_attributes, client=self - ): - api_response = self._call_api( - retry, - method="PATCH", - path=path, - data=partial, - headers=headers, - timeout=timeout, - ) + api_response = self._call_api( + retry, + span_name="BigQuery.updateModel", + span_attributes=span_attributes, + method="PATCH", + path=path, + data=partial, + headers=headers, + timeout=timeout, + ) return Model.from_api_repr(api_response) def update_routine(self, routine, fields, retry=DEFAULT_RETRY, timeout=None): @@ -968,17 +1012,16 @@ def update_routine(self, routine, fields, retry=DEFAULT_RETRY, timeout=None): path = routine.path span_attributes = {"path": path, "fields": fields} - with create_span( - name="BigQuery.updateRoutine", attributes=span_attributes, client=self - ): - api_response = self._call_api( - retry, - method="PUT", - path=path, - data=partial, - headers=headers, - timeout=timeout, - ) + api_response = self._call_api( + retry, + span_name="BigQuery.updateRoutine", + span_attributes=span_attributes, + method="PUT", + path=path, + data=partial, + headers=headers, + timeout=timeout, + ) return Routine.from_api_repr(api_response) def update_table(self, table, fields, retry=DEFAULT_RETRY, timeout=None): @@ -1018,17 +1061,16 @@ def update_table(self, table, fields, retry=DEFAULT_RETRY, timeout=None): path = table.path span_attributes = {"path": path, "fields": fields} - with create_span( - name="BigQuery.updateTable", attributes=span_attributes, client=self - ): - api_response = self._call_api( - retry, - method="PATCH", - path=path, - data=partial, - headers=headers, - timeout=timeout, - ) + api_response = self._call_api( + retry, + span_name="BigQuery.updateTable", + span_attributes=span_attributes, + method="PATCH", + path=path, + data=partial, + headers=headers, + timeout=timeout, + ) return Table.from_api_repr(api_response) def list_models( @@ -1087,10 +1129,14 @@ def list_models( span_attributes = {"path": path} def api_request(*args, **kwargs): - with create_span( - name="BigQuery.listModels", attributes=span_attributes, client=self - ): - return self._call_api(retry, *args, timeout=timeout, **kwargs) + return self._call_api( + retry, + span_name="BigQuery.listModels", + span_attributes=span_attributes, + *args, + timeout=timeout, + **kwargs + ) result = page_iterator.HTTPIterator( client=self, @@ -1161,10 +1207,14 @@ def list_routines( span_attributes = {"path": path} def api_request(*args, **kwargs): - with create_span( - name="BigQuery.listRoutines", attributes=span_attributes, client=self - ): - return self._call_api(retry, *args, timeout=timeout, **kwargs) + return self._call_api( + retry, + span_name="BigQuery.listRoutines", + span_attributes=span_attributes, + *args, + timeout=timeout, + **kwargs + ) result = page_iterator.HTTPIterator( client=self, @@ -1234,10 +1284,14 @@ def list_tables( span_attributes = {"path": path} def api_request(*args, **kwargs): - with create_span( - name="BigQuery.listTables", attributes=span_attributes, client=self - ): - return self._call_api(retry, *args, timeout=timeout, **kwargs) + return self._call_api( + retry, + span_name="BigQuery.listTables", + span_attributes=span_attributes, + *args, + timeout=timeout, + **kwargs + ) result = page_iterator.HTTPIterator( client=self, @@ -1304,16 +1358,15 @@ def delete_dataset( span_attributes = {"path": path} try: - with create_span( - name="BigQuery.deleteDataset", attributes=span_attributes, client=self - ): - self._call_api( - retry, - method="DELETE", - path=path, - query_params=params, - timeout=timeout, - ) + self._call_api( + retry, + span_name="BigQuery.deleteDataset", + span_attributes=span_attributes, + method="DELETE", + path=path, + query_params=params, + timeout=timeout, + ) except google.api_core.exceptions.NotFound: if not not_found_ok: raise @@ -1354,10 +1407,14 @@ def delete_model( path = model.path try: span_attributes = {"path": path} - with create_span( - name="BigQuery.deleteModel", attributes=span_attributes, client=self - ): - self._call_api(retry, method="DELETE", path=path, timeout=timeout) + self._call_api( + retry, + span_name="BigQuery.deleteModel", + span_attributes=span_attributes, + method="DELETE", + path=path, + timeout=timeout, + ) except google.api_core.exceptions.NotFound: if not not_found_ok: raise @@ -1400,10 +1457,14 @@ def delete_routine( try: span_attributes = {"path": path} - with create_span( - name="BigQuery.deleteRoutine", attributes=span_attributes, client=self - ): - self._call_api(retry, method="DELETE", path=path, timeout=timeout) + self._call_api( + retry, + span_name="BigQuery.deleteRoutine", + span_attributes=span_attributes, + method="DELETE", + path=path, + timeout=timeout, + ) except google.api_core.exceptions.NotFound: if not not_found_ok: raise @@ -1442,10 +1503,14 @@ def delete_table( try: path = table.path span_attributes = {"path": path} - with create_span( - name="BigQuery.deleteTable", attributes=span_attributes, client=self - ): - self._call_api(retry, method="DELETE", path=path, timeout=timeout) + self._call_api( + retry, + span_name="BigQuery.deleteTable", + span_attributes=span_attributes, + method="DELETE", + path=path, + timeout=timeout, + ) except google.api_core.exceptions.NotFound: if not not_found_ok: raise @@ -1494,17 +1559,15 @@ def _get_query_results( # job is complete (from QueryJob.done(), called ultimately from # QueryJob.result()). So we don't need to poll here. span_attributes = {"path": path} - - with create_span( - name="BigQuery.getQueryResults", attributes=span_attributes, client=self - ): - resource = self._call_api( - retry, - method="GET", - path=path, - query_params=extra_params, - timeout=timeout, - ) + resource = self._call_api( + retry, + span_name="BigQuery.getQueryResults", + span_attributes=span_attributes, + method="GET", + path=path, + query_params=extra_params, + timeout=timeout, + ) return _QueryResults.from_api_repr(resource) def job_from_resource(self, resource): @@ -1650,16 +1713,15 @@ def get_job( span_attributes = {"path": path, "job_id": job_id, "location": location} - with create_span( - name="BigQuery.getJob", attributes=span_attributes, client=self - ): - resource = self._call_api( - retry, - method="GET", - path=path, - query_params=extra_params, - timeout=timeout, - ) + resource = self._call_api( + retry, + span_name="BigQuery.getJob", + span_attributes=span_attributes, + method="GET", + path=path, + query_params=extra_params, + timeout=timeout, + ) return self.job_from_resource(resource) @@ -1708,16 +1770,15 @@ def cancel_job( span_attributes = {"path": path, "job_id": job_id, "location": location} - with create_span( - name="BigQuery.cancelJob", attributes=span_attributes, client=self - ): - resource = self._call_api( - retry, - method="POST", - path=path, - query_params=extra_params, - timeout=timeout, - ) + resource = self._call_api( + retry, + span_name="BigQuery.cancelJob", + span_attributes=span_attributes, + method="POST", + path=path, + query_params=extra_params, + timeout=timeout, + ) return self.job_from_resource(resource["job"]) @@ -1811,10 +1872,14 @@ def list_jobs( span_attributes = {"path": path} def api_request(*args, **kwargs): - with create_span( - name="BigQuery.listJobs", attributes=span_attributes, client=self - ): - return self._call_api(retry, *args, timeout=timeout, **kwargs) + return self._call_api( + retry, + span_name="BigQuery.listJobs", + span_attributes=span_attributes, + *args, + timeout=timeout, + **kwargs + ) return page_iterator.HTTPIterator( client=self, @@ -2904,12 +2969,15 @@ def insert_rows_json( path = "%s/insertAll" % table.path # We can always retry, because every row has an insert ID. span_attributes = {"path": path} - with create_span( - name="BigQuery.insertRowsJson", attributes=span_attributes, client=self - ): - response = self._call_api( - retry, method="POST", path=path, data=data, timeout=timeout, - ) + response = self._call_api( + retry, + span_name="BigQuery.insertRowsJson", + span_attributes=span_attributes, + method="POST", + path=path, + data=data, + timeout=timeout, + ) errors = [] for error in response.get("insertErrors", ()): @@ -3066,10 +3134,10 @@ def list_rows( def _schema_from_json_file_object(self, file_obj): """Helper function for schema_from_json that takes a - file object that describes a table schema. + file object that describes a table schema. - Returns: - List of schema field objects. + Returns: + List of schema field objects. """ json_data = json.load(file_obj) return [SchemaField.from_api_repr(field) for field in json_data] diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/job.py b/packages/google-cloud-bigquery/google/cloud/bigquery/job.py index a8e0c25edaf9..20bce597a1b2 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/job.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/job.py @@ -34,7 +34,6 @@ from google.cloud.bigquery.encryption_configuration import EncryptionConfiguration from google.cloud.bigquery.external_config import ExternalConfig from google.cloud.bigquery.external_config import HivePartitioningOptions -from google.cloud.bigquery.opentelemetry_tracing import create_span from google.cloud.bigquery import _helpers from google.cloud.bigquery.query import _query_param_from_api_repr from google.cloud.bigquery.query import ArrayQueryParameter @@ -636,16 +635,16 @@ def _begin(self, client=None, retry=DEFAULT_RETRY, timeout=None): # jobs.insert is idempotent because we ensure that every new # job has an ID. span_attributes = {"path": path} - with create_span( - name="BigQuery.job.begin", attributes=span_attributes, job_ref=self - ): - api_response = client._call_api( - retry, - method="POST", - path=path, - data=self.to_api_repr(), - timeout=timeout, - ) + api_response = client._call_api( + retry, + span_name="BigQuery.job.begin", + span_attributes=span_attributes, + job_ref=self, + method="POST", + path=path, + data=self.to_api_repr(), + timeout=timeout, + ) self._set_properties(api_response) def exists(self, client=None, retry=DEFAULT_RETRY, timeout=None): @@ -675,16 +674,17 @@ def exists(self, client=None, retry=DEFAULT_RETRY, timeout=None): try: span_attributes = {"path": self.path} - with create_span( - name="BigQuery.job.exists", attributes=span_attributes, job_ref=self - ): - client._call_api( - retry, - method="GET", - path=self.path, - query_params=extra_params, - timeout=timeout, - ) + + client._call_api( + retry, + span_name="BigQuery.job.exists", + span_attributes=span_attributes, + job_ref=self, + method="GET", + path=self.path, + query_params=extra_params, + timeout=timeout, + ) except NotFound: return False else: @@ -712,16 +712,17 @@ def reload(self, client=None, retry=DEFAULT_RETRY, timeout=None): if self.location: extra_params["location"] = self.location span_attributes = {"path": self.path} - with create_span( - name="BigQuery.job.reload", attributes=span_attributes, job_ref=self - ): - api_response = client._call_api( - retry, - method="GET", - path=self.path, - query_params=extra_params, - timeout=timeout, - ) + + api_response = client._call_api( + retry, + span_name="BigQuery.job.reload", + span_attributes=span_attributes, + job_ref=self, + method="GET", + path=self.path, + query_params=extra_params, + timeout=timeout, + ) self._set_properties(api_response) def cancel(self, client=None, retry=DEFAULT_RETRY, timeout=None): @@ -750,16 +751,17 @@ def cancel(self, client=None, retry=DEFAULT_RETRY, timeout=None): path = "{}/cancel".format(self.path) span_attributes = {"path": path} - with create_span( - name="BigQuery.job.cancel", attributes=span_attributes, job_ref=self - ): - api_response = client._call_api( - retry, - method="POST", - path=path, - query_params=extra_params, - timeout=timeout, - ) + + api_response = client._call_api( + retry, + span_name="BigQuery.job.cancel", + span_attributes=span_attributes, + job_ref=self, + method="POST", + path=path, + query_params=extra_params, + timeout=timeout, + ) self._set_properties(api_response["job"]) # The Future interface requires that we return True if the *attempt* # to cancel was successful. diff --git a/packages/google-cloud-bigquery/tests/unit/test_client.py b/packages/google-cloud-bigquery/tests/unit/test_client.py index 01bb1f2e1719..d354735a13bb 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_client.py +++ b/packages/google-cloud-bigquery/tests/unit/test_client.py @@ -250,6 +250,54 @@ def test__call_api_applying_custom_retry_on_timeout(self): [mock.call(foo="bar"), mock.call(foo="bar")], # was retried once ) + def test__call_api_span_creator_not_called(self): + from concurrent.futures import TimeoutError + from google.cloud.bigquery.retry import DEFAULT_RETRY + + creds = _make_credentials() + client = self._make_one(project=self.PROJECT, credentials=creds) + + api_request_patcher = mock.patch.object( + client._connection, "api_request", side_effect=[TimeoutError, "result"], + ) + retry = DEFAULT_RETRY.with_deadline(1).with_predicate( + lambda exc: isinstance(exc, TimeoutError) + ) + + with api_request_patcher: + with mock.patch( + "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" + ) as final_attributes: + client._call_api(retry) + + final_attributes.assert_not_called() + + def test__call_api_span_creator_called(self): + from concurrent.futures import TimeoutError + from google.cloud.bigquery.retry import DEFAULT_RETRY + + creds = _make_credentials() + client = self._make_one(project=self.PROJECT, credentials=creds) + + api_request_patcher = mock.patch.object( + client._connection, "api_request", side_effect=[TimeoutError, "result"], + ) + retry = DEFAULT_RETRY.with_deadline(1).with_predicate( + lambda exc: isinstance(exc, TimeoutError) + ) + + with api_request_patcher: + with mock.patch( + "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" + ) as final_attributes: + client._call_api( + retry, + span_name="test_name", + span_attributes={"test_attribute": "test_attribute-value"}, + ) + + final_attributes.assert_called_once() + def test__get_query_results_miss_w_explicit_project_and_timeout(self): from google.cloud.exceptions import NotFound diff --git a/packages/google-cloud-bigquery/tests/unit/test_job.py b/packages/google-cloud-bigquery/tests/unit/test_job.py index d5497ffa8d0e..fb6a46bd616c 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_job.py +++ b/packages/google-cloud-bigquery/tests/unit/test_job.py @@ -621,15 +621,17 @@ def test__begin_defaults(self): call_api = job._client._call_api = mock.Mock() call_api.return_value = resource path = "/projects/{}/jobs".format(self.PROJECT) - with mock.patch( - "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" - ) as final_attributes: - job._begin() - - final_attributes.assert_called_with({"path": path}, None, job) + job._begin() call_api.assert_called_once_with( - DEFAULT_RETRY, method="POST", path=path, data=resource, timeout=None, + DEFAULT_RETRY, + span_name="BigQuery.job.begin", + span_attributes={"path": path}, + job_ref=job, + method="POST", + path=path, + data=resource, + timeout=None, ) self.assertEqual(job._properties, resource) @@ -653,15 +655,17 @@ def test__begin_explicit(self): call_api.return_value = resource retry = DEFAULT_RETRY.with_deadline(1) path = "/projects/{}/jobs".format(self.PROJECT) - with mock.patch( - "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" - ) as final_attributes: - job._begin(client=client, retry=retry, timeout=7.5) - - final_attributes.assert_called_with({"path": path}, None, job) + job._begin(client=client, retry=retry, timeout=7.5) call_api.assert_called_once_with( - retry, method="POST", path=path, data=resource, timeout=7.5, + retry, + span_name="BigQuery.job.begin", + span_attributes={"path": path}, + job_ref=job, + method="POST", + path=path, + data=resource, + timeout=7.5, ) self.assertEqual(job._properties, resource) @@ -673,20 +677,15 @@ def test_exists_defaults_miss(self): job._properties["jobReference"]["location"] = self.LOCATION call_api = job._client._call_api = mock.Mock() call_api.side_effect = NotFound("testing") - - with mock.patch( - "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" - ) as final_attributes: - self.assertFalse(job.exists()) - - final_attributes.assert_called_with( - {"path": "/projects/{}/jobs/{}".format(self.PROJECT, self.JOB_ID)}, - None, - job, - ) + self.assertFalse(job.exists()) call_api.assert_called_once_with( DEFAULT_RETRY, + span_name="BigQuery.job.exists", + span_attributes={ + "path": "/projects/{}/jobs/{}".format(self.PROJECT, self.JOB_ID) + }, + job_ref=job, method="GET", path="/projects/{}/jobs/{}".format(self.PROJECT, self.JOB_ID), query_params={"fields": "id", "location": self.LOCATION}, @@ -710,19 +709,15 @@ def test_exists_explicit_hit(self): call_api = client._call_api = mock.Mock() call_api.return_value = resource retry = DEFAULT_RETRY.with_deadline(1) - with mock.patch( - "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" - ) as final_attributes: - self.assertTrue(job.exists(client=client, retry=retry)) - - final_attributes.assert_called_with( - {"path": "/projects/{}/jobs/{}".format(self.PROJECT, self.JOB_ID)}, - None, - job, - ) + self.assertTrue(job.exists(client=client, retry=retry)) call_api.assert_called_once_with( retry, + span_name="BigQuery.job.exists", + span_attributes={ + "path": "/projects/{}/jobs/{}".format(self.PROJECT, self.JOB_ID) + }, + job_ref=job, method="GET", path="/projects/{}/jobs/{}".format(self.PROJECT, self.JOB_ID), query_params={"fields": "id"}, @@ -735,15 +730,13 @@ def test_exists_w_timeout(self): PATH = "/projects/{}/jobs/{}".format(self.PROJECT, self.JOB_ID) job = self._set_properties_job() call_api = job._client._call_api = mock.Mock() - with mock.patch( - "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" - ) as final_attributes: - job.exists(timeout=7.5) - - final_attributes.assert_called_with({"path": PATH}, None, job) + job.exists(timeout=7.5) call_api.assert_called_once_with( DEFAULT_RETRY, + span_name="BigQuery.job.exists", + span_attributes={"path": PATH}, + job_ref=job, method="GET", path=PATH, query_params={"fields": "id"}, @@ -765,19 +758,15 @@ def test_reload_defaults(self): job._properties["jobReference"]["location"] = self.LOCATION call_api = job._client._call_api = mock.Mock() call_api.return_value = resource - with mock.patch( - "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" - ) as final_attributes: - job.reload() - - final_attributes.assert_called_with( - {"path": "/projects/{}/jobs/{}".format(self.PROJECT, self.JOB_ID)}, - None, - job, - ) + job.reload() call_api.assert_called_once_with( DEFAULT_RETRY, + span_name="BigQuery.job.reload", + span_attributes={ + "path": "/projects/{}/jobs/{}".format(self.PROJECT, self.JOB_ID) + }, + job_ref=job, method="GET", path="/projects/{}/jobs/{}".format(self.PROJECT, self.JOB_ID), query_params={"location": self.LOCATION}, @@ -802,19 +791,15 @@ def test_reload_explicit(self): call_api = client._call_api = mock.Mock() call_api.return_value = resource retry = DEFAULT_RETRY.with_deadline(1) - with mock.patch( - "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" - ) as final_attributes: - job.reload(client=client, retry=retry, timeout=4.2) - - final_attributes.assert_called_with( - {"path": "/projects/{}/jobs/{}".format(self.PROJECT, self.JOB_ID)}, - None, - job, - ) + job.reload(client=client, retry=retry, timeout=4.2) call_api.assert_called_once_with( retry, + span_name="BigQuery.job.reload", + span_attributes={ + "path": "/projects/{}/jobs/{}".format(self.PROJECT, self.JOB_ID) + }, + job_ref=job, method="GET", path="/projects/{}/jobs/{}".format(self.PROJECT, self.JOB_ID), query_params={}, @@ -840,11 +825,7 @@ def test_cancel_defaults(self): ) as final_attributes: self.assertTrue(job.cancel()) - final_attributes.assert_called_with( - {"path": "/projects/{}/jobs/{}/cancel".format(self.PROJECT, self.JOB_ID)}, - None, - job, - ) + final_attributes.assert_called() connection.api_request.assert_called_once_with( method="POST", @@ -875,7 +856,7 @@ def test_cancel_explicit(self): final_attributes.assert_called_with( {"path": "/projects/{}/jobs/{}/cancel".format(self.PROJECT, self.JOB_ID)}, - None, + client, job, ) @@ -915,7 +896,7 @@ def test_cancel_w_custom_retry(self): ) as final_attributes: result = job.cancel(retry=retry, timeout=7.5) - final_attributes.assert_called_with({"path": api_path}, None, job) + final_attributes.assert_called() self.assertTrue(result) self.assertEqual(job._properties, resource) @@ -2409,7 +2390,7 @@ def test_begin_w_bound_client(self): ) as final_attributes: job._begin() - final_attributes.assert_called_with({"path": path}, None, job) + final_attributes.assert_called_with({"path": path}, client, job) conn.api_request.assert_called_once_with( method="POST", @@ -2454,7 +2435,7 @@ def test_begin_w_autodetect(self): ) as final_attributes: job._begin() - final_attributes.assert_called_with({"path": path}, None, job) + final_attributes.assert_called_with({"path": path}, client, job) sent = { "jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID}, @@ -2553,7 +2534,7 @@ def test_begin_w_alternate_client(self): ) as final_attributes: job._begin(client=client2) - final_attributes.assert_called_with({"path": PATH}, None, job) + final_attributes.assert_called_with({"path": PATH}, client2, job) conn1.api_request.assert_not_called() self.assertEqual(len(conn2.api_request.call_args_list), 1) @@ -2583,7 +2564,7 @@ def test_begin_w_job_reference(self): ) as final_attributes: load_job._begin() final_attributes.assert_called_with( - {"path": "/projects/alternative-project/jobs"}, None, load_job + {"path": "/projects/alternative-project/jobs"}, client, load_job ) conn.api_request.assert_called_once() @@ -2608,7 +2589,7 @@ def test_exists_miss_w_bound_client(self): final_attributes.assert_called_with( {"path": "/projects/{}/jobs/{}".format(self.PROJECT, self.JOB_ID)}, - None, + client, job, ) @@ -2630,7 +2611,7 @@ def test_exists_hit_w_alternate_client(self): final_attributes.assert_called_with( {"path": "/projects/{}/jobs/{}".format(self.PROJECT, self.JOB_ID)}, - None, + client2, job, ) @@ -2652,7 +2633,7 @@ def test_exists_miss_w_job_reference(self): self.assertFalse(load_job.exists()) final_attributes.assert_called_with( - {"path": "/projects/other-project/jobs/my-job-id"}, None, load_job + {"path": "/projects/other-project/jobs/my-job-id"}, client, load_job ) conn.api_request.assert_called_once_with( @@ -2673,7 +2654,7 @@ def test_reload_w_bound_client(self): ) as final_attributes: job.reload() - final_attributes.assert_called_with({"path": PATH}, None, job) + final_attributes.assert_called_with({"path": PATH}, client, job) conn.api_request.assert_called_once_with( method="GET", path=PATH, query_params={}, timeout=None @@ -2693,7 +2674,7 @@ def test_reload_w_alternate_client(self): ) as final_attributes: job.reload(client=client2) - final_attributes.assert_called_with({"path": PATH}, None, job) + final_attributes.assert_called_with({"path": PATH}, client2, job) conn1.api_request.assert_not_called() conn2.api_request.assert_called_once_with( @@ -2718,7 +2699,7 @@ def test_reload_w_job_reference(self): final_attributes.assert_called_with( {"path": "/projects/alternative-project/jobs/{}".format(self.JOB_ID)}, - None, + client, load_job, ) @@ -2741,7 +2722,7 @@ def test_cancel_w_bound_client(self): ) as final_attributes: job.cancel() - final_attributes.assert_called_with({"path": PATH}, None, job) + final_attributes.assert_called_with({"path": PATH}, client, job) conn.api_request.assert_called_once_with( method="POST", path=PATH, query_params={}, timeout=None, @@ -2762,7 +2743,7 @@ def test_cancel_w_alternate_client(self): ) as final_attributes: job.cancel(client=client2) - final_attributes.assert_called_with({"path": PATH}, None, job) + final_attributes.assert_called_with({"path": PATH}, client2, job) conn1.api_request.assert_not_called() conn2.api_request.assert_called_once_with( @@ -2791,7 +2772,7 @@ def test_cancel_w_job_reference(self): self.JOB_ID ) }, - None, + client, load_job, ) conn.api_request.assert_called_once_with( @@ -3093,7 +3074,7 @@ def test_begin_w_bound_client(self): ) as final_attributes: job._begin() - final_attributes.assert_called_with({"path": PATH}, None, job) + final_attributes.assert_called_with({"path": PATH}, client, job) conn.api_request.assert_called_once_with( method="POST", @@ -3161,7 +3142,7 @@ def test_begin_w_alternate_client(self): ) as final_attributes: job._begin(client=client2) - final_attributes.assert_called_with({"path": PATH}, None, job) + final_attributes.assert_called_with({"path": PATH}, client2, job) conn1.api_request.assert_not_called() conn2.api_request.assert_called_once_with( @@ -3188,7 +3169,7 @@ def test_exists_miss_w_bound_client(self): ) as final_attributes: self.assertFalse(job.exists()) - final_attributes.assert_called_with({"path": PATH}, None, job) + final_attributes.assert_called_with({"path": PATH}, client, job) conn.api_request.assert_called_once_with( method="GET", path=PATH, query_params={"fields": "id"}, timeout=None, @@ -3208,7 +3189,7 @@ def test_exists_hit_w_alternate_client(self): ) as final_attributes: self.assertTrue(job.exists(client=client2)) - final_attributes.assert_called_with({"path": PATH}, None, job) + final_attributes.assert_called_with({"path": PATH}, client2, job) conn1.api_request.assert_not_called() conn2.api_request.assert_called_once_with( @@ -3228,7 +3209,7 @@ def test_reload_w_bound_client(self): ) as final_attributes: job.reload() - final_attributes.assert_called_with({"path": PATH}, None, job) + final_attributes.assert_called_with({"path": PATH}, client, job) conn.api_request.assert_called_once_with( method="GET", path=PATH, query_params={}, timeout=None @@ -3250,7 +3231,7 @@ def test_reload_w_alternate_client(self): ) as final_attributes: job.reload(client=client2) - final_attributes.assert_called_with({"path": PATH}, None, job) + final_attributes.assert_called_with({"path": PATH}, client2, job) conn1.api_request.assert_not_called() conn2.api_request.assert_called_once_with( @@ -3515,7 +3496,7 @@ def test_begin_w_bound_client(self): ) as final_attributes: job._begin() - final_attributes.assert_called_with({"path": PATH}, None, job) + final_attributes.assert_called_with({"path": PATH}, client, job) conn.api_request.assert_called_once_with( method="POST", @@ -3577,7 +3558,7 @@ def test_begin_w_alternate_client(self): ) as final_attributes: job._begin(client=client2) - final_attributes.assert_called_with({"path": PATH}, None, job) + final_attributes.assert_called_with({"path": PATH}, client2, job) conn1.api_request.assert_not_called() conn2.api_request.assert_called_once_with( @@ -3603,7 +3584,7 @@ def test_exists_miss_w_bound_client(self): ) as final_attributes: self.assertFalse(job.exists()) - final_attributes.assert_called_with({"path": PATH}, None, job) + final_attributes.assert_called_with({"path": PATH}, client, job) conn.api_request.assert_called_once_with( method="GET", path=PATH, query_params={"fields": "id"}, timeout=None, @@ -3623,7 +3604,7 @@ def test_exists_hit_w_alternate_client(self): ) as final_attributes: self.assertTrue(job.exists(client=client2)) - final_attributes.assert_called_with({"path": PATH}, None, job) + final_attributes.assert_called_with({"path": PATH}, client2, job) conn1.api_request.assert_not_called() conn2.api_request.assert_called_once_with( @@ -3645,7 +3626,7 @@ def test_reload_w_bound_client(self): ) as final_attributes: job.reload() - final_attributes.assert_called_with({"path": PATH}, None, job) + final_attributes.assert_called_with({"path": PATH}, client, job) conn.api_request.assert_called_once_with( method="GET", path=PATH, query_params={}, timeout=None ) @@ -3668,7 +3649,7 @@ def test_reload_w_alternate_client(self): ) as final_attributes: job.reload(client=client2) - final_attributes.assert_called_with({"path": PATH}, None, job) + final_attributes.assert_called_with({"path": PATH}, client2, job) conn1.api_request.assert_not_called() conn2.api_request.assert_called_once_with( @@ -5012,7 +4993,7 @@ def test__begin_w_timeout(self): ) as final_attributes: job._begin(timeout=7.5) - final_attributes.assert_called_with({"path": PATH}, None, job) + final_attributes.assert_called_with({"path": PATH}, client, job) conn.api_request.assert_called_once_with( method="POST", @@ -5049,7 +5030,7 @@ def test_begin_w_bound_client(self): ) as final_attributes: job._begin() - final_attributes.assert_called_with({"path": PATH}, None, job) + final_attributes.assert_called_with({"path": PATH}, client, job) self.assertIsNone(job.default_dataset) self.assertEqual(job.udf_resources, []) @@ -5133,7 +5114,7 @@ def test_begin_w_alternate_client(self): ) as final_attributes: job._begin(client=client2) - final_attributes.assert_called_with({"path": PATH}, None, job) + final_attributes.assert_called_with({"path": PATH}, client2, job) conn1.api_request.assert_not_called() conn2.api_request.assert_called_once_with( @@ -5179,7 +5160,7 @@ def test_begin_w_udf(self): ) as final_attributes: job._begin() - final_attributes.assert_called_with({"path": PATH}, None, job) + final_attributes.assert_called_with({"path": PATH}, client, job) self.assertEqual(job.udf_resources, udf_resources) conn.api_request.assert_called_once_with( @@ -5233,7 +5214,7 @@ def test_begin_w_named_query_parameter(self): ) as final_attributes: job._begin() - final_attributes.assert_called_with({"path": PATH}, None, job) + final_attributes.assert_called_with({"path": PATH}, client, job) self.assertEqual(job.query_parameters, query_parameters) conn.api_request.assert_called_once_with( @@ -5281,7 +5262,7 @@ def test_begin_w_positional_query_parameter(self): ) as final_attributes: job._begin() - final_attributes.assert_called_with({"path": PATH}, None, job) + final_attributes.assert_called_with({"path": PATH}, client, job) self.assertEqual(job.query_parameters, query_parameters) conn.api_request.assert_called_once_with( @@ -5361,7 +5342,7 @@ def test_begin_w_table_defs(self): ) as final_attributes: job._begin() - final_attributes.assert_called_with({"path": PATH}, None, job) + final_attributes.assert_called_with({"path": PATH}, client, job) conn.api_request.assert_called_once_with( method="POST", @@ -5404,7 +5385,7 @@ def test_dry_run_query(self): ) as final_attributes: job._begin() - final_attributes.assert_called_with({"path": PATH}, None, job) + final_attributes.assert_called_with({"path": PATH}, client, job) self.assertEqual(job.udf_resources, []) conn.api_request.assert_called_once_with( method="POST", @@ -5430,7 +5411,7 @@ def test_exists_miss_w_bound_client(self): ) as final_attributes: self.assertFalse(job.exists()) - final_attributes.assert_called_with({"path": PATH}, None, job) + final_attributes.assert_called_with({"path": PATH}, client, job) conn.api_request.assert_called_once_with( method="GET", path=PATH, query_params={"fields": "id"}, timeout=None @@ -5448,7 +5429,7 @@ def test_exists_hit_w_alternate_client(self): ) as final_attributes: self.assertTrue(job.exists(client=client2)) - final_attributes.assert_called_with({"path": PATH}, None, job) + final_attributes.assert_called_with({"path": PATH}, client2, job) conn1.api_request.assert_not_called() conn2.api_request.assert_called_once_with( @@ -5475,7 +5456,7 @@ def test_reload_w_bound_client(self): ) as final_attributes: job.reload() - final_attributes.assert_called_with({"path": PATH}, None, job) + final_attributes.assert_called_with({"path": PATH}, client, job) self.assertNotEqual(job.destination, table_ref) @@ -5505,7 +5486,7 @@ def test_reload_w_alternate_client(self): ) as final_attributes: job.reload(client=client2) - final_attributes.assert_called_with({"path": PATH}, None, job) + final_attributes.assert_called_with({"path": PATH}, client2, job) conn1.api_request.assert_not_called() conn2.api_request.assert_called_once_with( @@ -5533,7 +5514,7 @@ def test_reload_w_timeout(self): ) as final_attributes: job.reload(timeout=4.2) - final_attributes.assert_called_with({"path": PATH}, None, job) + final_attributes.assert_called_with({"path": PATH}, client, job) self.assertNotEqual(job.destination, table_ref) From cf7dcc4a34c2759d1b2f153a74e0997a49f5864b Mon Sep 17 00:00:00 2001 From: Stephanie Wang Date: Mon, 14 Sep 2020 17:42:53 -0400 Subject: [PATCH 0931/2016] chore: update CODEOWNERS (#259) --- packages/google-cloud-bigquery/.github/CODEOWNERS | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/.github/CODEOWNERS b/packages/google-cloud-bigquery/.github/CODEOWNERS index 309a57710ab1..10f4ee7c0f7a 100644 --- a/packages/google-cloud-bigquery/.github/CODEOWNERS +++ b/packages/google-cloud-bigquery/.github/CODEOWNERS @@ -4,6 +4,9 @@ # For syntax help see: # https://help.github.com/en/github/creating-cloning-and-archiving-repositories/about-code-owners#codeowners-syntax +# The @googleapis/api-bigquery is the default owner for changes in this repo +* @googleapis/api-bigquery -/samples/ @shollyman @googleapis/python-samples-owners +# The python-samples-reviewers team is the default owner for samples changes +/samples/ @googleapis/python-samples-owners From 681bf7c0b71f7e70c320d7962eb2f4cb22237d6c Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Tue, 15 Sep 2020 10:10:53 -0500 Subject: [PATCH 0932/2016] docs: recommend insert_rows_json to avoid call to tables.get (#258) * docs: recommend insert_rows_json to avoid call to tables.get Since tables.get has a much lower QPS than tabledata.insertAll, we want to avoid recommending a pattern that requires fetching a table schema. If developers convert to a dictionary of the correct JSON format, no table schema is required. * update comments --- .../samples/table_insert_rows.py | 13 ++++++++----- .../table_insert_rows_explicit_none_insert_ids.py | 15 +++++++++------ 2 files changed, 17 insertions(+), 11 deletions(-) diff --git a/packages/google-cloud-bigquery/samples/table_insert_rows.py b/packages/google-cloud-bigquery/samples/table_insert_rows.py index 130f9dbbddf2..24d73987175b 100644 --- a/packages/google-cloud-bigquery/samples/table_insert_rows.py +++ b/packages/google-cloud-bigquery/samples/table_insert_rows.py @@ -16,19 +16,22 @@ def table_insert_rows(table_id): # [START bigquery_table_insert_rows] - from google.cloud import bigquery # Construct a BigQuery client object. client = bigquery.Client() - # TODO(developer): Set table_id to the ID of the model to fetch. + # TODO(developer): Set table_id to the ID of table to append to. # table_id = "your-project.your_dataset.your_table" - table = client.get_table(table_id) # Make an API request. - rows_to_insert = [(u"Phred Phlyntstone", 32), (u"Wylma Phlyntstone", 29)] + rows_to_insert = [ + {u"full_name": u"Phred Phlyntstone", u"age": 32}, + {u"full_name": u"Wylma Phlyntstone", u"age": 29}, + ] - errors = client.insert_rows(table, rows_to_insert) # Make an API request. + errors = client.insert_rows_json(table_id, rows_to_insert) # Make an API request. if errors == []: print("New rows have been added.") + else: + print("Encountered errors while inserting rows: {}".format(errors)) # [END bigquery_table_insert_rows] diff --git a/packages/google-cloud-bigquery/samples/table_insert_rows_explicit_none_insert_ids.py b/packages/google-cloud-bigquery/samples/table_insert_rows_explicit_none_insert_ids.py index 2410ba1765fc..d91792b82233 100644 --- a/packages/google-cloud-bigquery/samples/table_insert_rows_explicit_none_insert_ids.py +++ b/packages/google-cloud-bigquery/samples/table_insert_rows_explicit_none_insert_ids.py @@ -16,21 +16,24 @@ def table_insert_rows_explicit_none_insert_ids(table_id): # [START bigquery_table_insert_rows_explicit_none_insert_ids] - from google.cloud import bigquery # Construct a BigQuery client object. client = bigquery.Client() - # TODO(developer): Set table_id to the ID of the model to fetch. + # TODO(developer): Set table_id to the ID of table to append to. # table_id = "your-project.your_dataset.your_table" - table = client.get_table(table_id) # Make an API request. - rows_to_insert = [(u"Phred Phlyntstone", 32), (u"Wylma Phlyntstone", 29)] + rows_to_insert = [ + {u"full_name": u"Phred Phlyntstone", u"age": 32}, + {u"full_name": u"Wylma Phlyntstone", u"age": 29}, + ] - errors = client.insert_rows( - table, rows_to_insert, row_ids=[None] * len(rows_to_insert) + errors = client.insert_rows_json( + table_id, rows_to_insert, row_ids=[None] * len(rows_to_insert) ) # Make an API request. if errors == []: print("New rows have been added.") + else: + print("Encountered errors while inserting rows: {}".format(errors)) # [END bigquery_table_insert_rows_explicit_none_insert_ids] From ff9928cbe2684975ddc8d5bd4a169930146463ed Mon Sep 17 00:00:00 2001 From: HemangChothani <50404902+HemangChothani@users.noreply.github.com> Date: Fri, 18 Sep 2020 19:41:28 +0530 Subject: [PATCH 0933/2016] fix: validate job_config.source_format in load_table_from_dataframe (#262) * fix: address job_congig.source_format * fix: nit --- .../google/cloud/bigquery/client.py | 10 ++- .../tests/unit/test_client.py | 78 ++++++++++++++++++- 2 files changed, 85 insertions(+), 3 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py index 86275487b341..d2aa45999809 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py @@ -2174,7 +2174,15 @@ def load_table_from_dataframe( else: job_config = job.LoadJobConfig() - job_config.source_format = job.SourceFormat.PARQUET + if job_config.source_format: + if job_config.source_format != job.SourceFormat.PARQUET: + raise ValueError( + "Got unexpected source_format: '{}'. Currently, only PARQUET is supported".format( + job_config.source_format + ) + ) + else: + job_config.source_format = job.SourceFormat.PARQUET if location is None: location = self.location diff --git a/packages/google-cloud-bigquery/tests/unit/test_client.py b/packages/google-cloud-bigquery/tests/unit/test_client.py index d354735a13bb..00bc47017fc5 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_client.py +++ b/packages/google-cloud-bigquery/tests/unit/test_client.py @@ -7544,7 +7544,7 @@ def test_load_table_from_dataframe_w_client_location(self): @unittest.skipIf(pandas is None, "Requires `pandas`") @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") - def test_load_table_from_dataframe_w_custom_job_config(self): + def test_load_table_from_dataframe_w_custom_job_config_wihtout_source_format(self): from google.cloud.bigquery.client import _DEFAULT_NUM_RETRIES from google.cloud.bigquery import job from google.cloud.bigquery.schema import SchemaField @@ -7553,7 +7553,7 @@ def test_load_table_from_dataframe_w_custom_job_config(self): records = [{"id": 1, "age": 100}, {"id": 2, "age": 60}] dataframe = pandas.DataFrame(records) job_config = job.LoadJobConfig( - write_disposition=job.WriteDisposition.WRITE_TRUNCATE + write_disposition=job.WriteDisposition.WRITE_TRUNCATE, ) original_config_copy = copy.deepcopy(job_config) @@ -7595,6 +7595,80 @@ def test_load_table_from_dataframe_w_custom_job_config(self): # the original config object should not have been modified assert job_config.to_api_repr() == original_config_copy.to_api_repr() + @unittest.skipIf(pandas is None, "Requires `pandas`") + @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") + def test_load_table_from_dataframe_w_custom_job_config_w_source_format(self): + from google.cloud.bigquery.client import _DEFAULT_NUM_RETRIES + from google.cloud.bigquery import job + from google.cloud.bigquery.schema import SchemaField + + client = self._make_client() + records = [{"id": 1, "age": 100}, {"id": 2, "age": 60}] + dataframe = pandas.DataFrame(records) + job_config = job.LoadJobConfig( + write_disposition=job.WriteDisposition.WRITE_TRUNCATE, + source_format=job.SourceFormat.PARQUET, + ) + original_config_copy = copy.deepcopy(job_config) + + get_table_patch = mock.patch( + "google.cloud.bigquery.client.Client.get_table", + autospec=True, + return_value=mock.Mock( + schema=[SchemaField("id", "INTEGER"), SchemaField("age", "INTEGER")] + ), + ) + load_patch = mock.patch( + "google.cloud.bigquery.client.Client.load_table_from_file", autospec=True + ) + with load_patch as load_table_from_file, get_table_patch as get_table: + client.load_table_from_dataframe( + dataframe, self.TABLE_REF, job_config=job_config, location=self.LOCATION + ) + + # no need to fetch and inspect table schema for WRITE_TRUNCATE jobs + assert not get_table.called + + load_table_from_file.assert_called_once_with( + client, + mock.ANY, + self.TABLE_REF, + num_retries=_DEFAULT_NUM_RETRIES, + rewind=True, + job_id=mock.ANY, + job_id_prefix=None, + location=self.LOCATION, + project=None, + job_config=mock.ANY, + ) + + sent_config = load_table_from_file.mock_calls[0][2]["job_config"] + assert sent_config.source_format == job.SourceFormat.PARQUET + assert sent_config.write_disposition == job.WriteDisposition.WRITE_TRUNCATE + + # the original config object should not have been modified + assert job_config.to_api_repr() == original_config_copy.to_api_repr() + + @unittest.skipIf(pandas is None, "Requires `pandas`") + @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") + def test_load_table_from_dataframe_w_custom_job_config_w_wrong_source_format(self): + from google.cloud.bigquery import job + + client = self._make_client() + records = [{"id": 1, "age": 100}, {"id": 2, "age": 60}] + dataframe = pandas.DataFrame(records) + job_config = job.LoadJobConfig( + write_disposition=job.WriteDisposition.WRITE_TRUNCATE, + source_format=job.SourceFormat.ORC, + ) + + with pytest.raises(ValueError) as exc: + client.load_table_from_dataframe( + dataframe, self.TABLE_REF, job_config=job_config, location=self.LOCATION + ) + + assert "Got unexpected source_format:" in str(exc.value) + @unittest.skipIf(pandas is None, "Requires `pandas`") @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_load_table_from_dataframe_w_automatic_schema(self): From f913514d8f01283f44fe34388199c906d771f082 Mon Sep 17 00:00:00 2001 From: Yoshi Automation Bot Date: Mon, 21 Sep 2020 14:00:02 -0700 Subject: [PATCH 0934/2016] chore(CI): add snippet bot to Kokoro (via synth) (#256) This PR was generated using Autosynth. :rainbow: Synth log will be available here: https://source.cloud.google.com/results/invocations/b8e5112c-3774-4b12-81b9-e691b2c52110/targets - [ ] To automatically regenerate this PR, check this box. --- .../.github/snippet-bot.yml | 0 packages/google-cloud-bigquery/synth.metadata | 101 +++++++++++++++++- 2 files changed, 99 insertions(+), 2 deletions(-) create mode 100644 packages/google-cloud-bigquery/.github/snippet-bot.yml diff --git a/packages/google-cloud-bigquery/.github/snippet-bot.yml b/packages/google-cloud-bigquery/.github/snippet-bot.yml new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/packages/google-cloud-bigquery/synth.metadata b/packages/google-cloud-bigquery/synth.metadata index 46c63367de1d..efee17785517 100644 --- a/packages/google-cloud-bigquery/synth.metadata +++ b/packages/google-cloud-bigquery/synth.metadata @@ -3,8 +3,8 @@ { "git": { "name": ".", - "remote": "git@github.com:tmatsuo/python-bigquery.git", - "sha": "5ed817523a85a6f332951e10c0bf7dbb86d7e1cf" + "remote": "https://github.com/googleapis/python-bigquery.git", + "sha": "a125160696d1453b04a66c967819f90e70e03a52" } }, { @@ -14,6 +14,20 @@ "sha": "868615a5c1c1059c636bb3d82a555edb1d5a251e", "internalRef": "324294521" } + }, + { + "git": { + "name": "synthtool", + "remote": "https://github.com/googleapis/synthtool.git", + "sha": "32c758f11b8c578f515a746c9d263b82a615a77c" + } + }, + { + "git": { + "name": "synthtool", + "remote": "https://github.com/googleapis/synthtool.git", + "sha": "32c758f11b8c578f515a746c9d263b82a615a77c" + } } ], "destinations": [ @@ -26,5 +40,88 @@ "generator": "bazel" } } + ], + "generatedFiles": [ + ".coveragerc", + ".flake8", + ".github/CONTRIBUTING.md", + ".github/ISSUE_TEMPLATE/bug_report.md", + ".github/ISSUE_TEMPLATE/feature_request.md", + ".github/ISSUE_TEMPLATE/support_request.md", + ".github/PULL_REQUEST_TEMPLATE.md", + ".github/release-please.yml", + ".github/snippet-bot.yml", + ".gitignore", + ".kokoro/build.sh", + ".kokoro/continuous/common.cfg", + ".kokoro/continuous/continuous.cfg", + ".kokoro/docker/docs/Dockerfile", + ".kokoro/docker/docs/fetch_gpg_keys.sh", + ".kokoro/docs/common.cfg", + ".kokoro/docs/docs-presubmit.cfg", + ".kokoro/docs/docs.cfg", + ".kokoro/presubmit/common.cfg", + ".kokoro/presubmit/presubmit.cfg", + ".kokoro/presubmit/system-2.7.cfg", + ".kokoro/presubmit/system-3.8.cfg", + ".kokoro/publish-docs.sh", + ".kokoro/release.sh", + ".kokoro/release/common.cfg", + ".kokoro/release/release.cfg", + ".kokoro/samples/lint/common.cfg", + ".kokoro/samples/lint/continuous.cfg", + ".kokoro/samples/lint/periodic.cfg", + ".kokoro/samples/lint/presubmit.cfg", + ".kokoro/samples/python3.6/common.cfg", + ".kokoro/samples/python3.6/continuous.cfg", + ".kokoro/samples/python3.6/periodic.cfg", + ".kokoro/samples/python3.6/presubmit.cfg", + ".kokoro/samples/python3.7/common.cfg", + ".kokoro/samples/python3.7/continuous.cfg", + ".kokoro/samples/python3.7/periodic.cfg", + ".kokoro/samples/python3.7/presubmit.cfg", + ".kokoro/samples/python3.8/common.cfg", + ".kokoro/samples/python3.8/continuous.cfg", + ".kokoro/samples/python3.8/periodic.cfg", + ".kokoro/samples/python3.8/presubmit.cfg", + ".kokoro/test-samples.sh", + ".kokoro/trampoline.sh", + ".kokoro/trampoline_v2.sh", + ".trampolinerc", + "CODE_OF_CONDUCT.md", + "CONTRIBUTING.rst", + "LICENSE", + "MANIFEST.in", + "docs/_static/custom.css", + "docs/_templates/layout.html", + "docs/conf.py", + "google/cloud/bigquery_v2/gapic/enums.py", + "google/cloud/bigquery_v2/proto/encryption_config.proto", + "google/cloud/bigquery_v2/proto/encryption_config_pb2.py", + "google/cloud/bigquery_v2/proto/encryption_config_pb2_grpc.py", + "google/cloud/bigquery_v2/proto/model.proto", + "google/cloud/bigquery_v2/proto/model_pb2.py", + "google/cloud/bigquery_v2/proto/model_pb2_grpc.py", + "google/cloud/bigquery_v2/proto/model_reference.proto", + "google/cloud/bigquery_v2/proto/model_reference_pb2.py", + "google/cloud/bigquery_v2/proto/model_reference_pb2_grpc.py", + "google/cloud/bigquery_v2/proto/standard_sql.proto", + "google/cloud/bigquery_v2/proto/standard_sql_pb2.py", + "google/cloud/bigquery_v2/proto/standard_sql_pb2_grpc.py", + "google/cloud/bigquery_v2/types.py", + "renovate.json", + "samples/AUTHORING_GUIDE.md", + "samples/CONTRIBUTING.md", + "samples/snippets/README.rst", + "samples/snippets/noxfile.py", + "scripts/decrypt-secrets.sh", + "scripts/readme-gen/readme_gen.py", + "scripts/readme-gen/templates/README.tmpl.rst", + "scripts/readme-gen/templates/auth.tmpl.rst", + "scripts/readme-gen/templates/auth_api_key.tmpl.rst", + "scripts/readme-gen/templates/install_deps.tmpl.rst", + "scripts/readme-gen/templates/install_portaudio.tmpl.rst", + "setup.cfg", + "testing/.gitignore" ] } \ No newline at end of file From 6105936af4ed254525646d4f69ff3378eeaff6ef Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Tue, 22 Sep 2020 13:04:03 -0500 Subject: [PATCH 0935/2016] fix: update minimum dependency versions (#263) This PR updates the minimum dependency versions to match those that I found to be actually runnable. Updates tests to use constraint files so that at least one test session uses these minimum versions. Thank you for opening a Pull Request! Before submitting your PR, there are a few things you can do to make sure it goes smoothly: - [x] Make sure to open an issue as a [bug/issue](https://github.com/googleapis/python-bigquery/issues/new/choose) before writing your code! - Based on internal Python Client Library Testing Improvements docs. - In response to internal bug 166792569 covering insufficient pyarrow minimum dependency. - [x] Ensure the tests and linter pass - [x] Code coverage does not decrease (if any source code was changed) - [x] Appropriate docs were updated (if necessary) --- packages/google-cloud-bigquery/noxfile.py | 74 ++++++++++++------- .../samples/snippets/jupyter_tutorial_test.py | 8 +- .../tests/test_download_public_data.py | 4 + .../test_download_public_data_sandbox.py | 4 + .../samples/tests/test_query_to_arrow.py | 4 +- packages/google-cloud-bigquery/setup.py | 23 +++--- .../testing/constraints-2.7.txt | 9 +++ .../testing/constraints-3.5.txt | 12 +++ .../testing/constraints-3.6.txt | 0 .../testing/constraints-3.7.txt | 0 .../testing/constraints-3.8.txt | 0 .../google-cloud-bigquery/tests/system.py | 19 +++-- .../tests/unit/test__pandas_helpers.py | 20 +++++ .../tests/unit/test_client.py | 2 +- .../tests/unit/test_dbapi_connection.py | 14 +++- .../tests/unit/test_magics.py | 17 ++--- .../tests/unit/test_table.py | 12 ++- 17 files changed, 155 insertions(+), 67 deletions(-) create mode 100644 packages/google-cloud-bigquery/testing/constraints-2.7.txt create mode 100644 packages/google-cloud-bigquery/testing/constraints-3.5.txt create mode 100644 packages/google-cloud-bigquery/testing/constraints-3.6.txt create mode 100644 packages/google-cloud-bigquery/testing/constraints-3.7.txt create mode 100644 packages/google-cloud-bigquery/testing/constraints-3.8.txt diff --git a/packages/google-cloud-bigquery/noxfile.py b/packages/google-cloud-bigquery/noxfile.py index 7f2dee34c8b2..90f023addf63 100644 --- a/packages/google-cloud-bigquery/noxfile.py +++ b/packages/google-cloud-bigquery/noxfile.py @@ -14,6 +14,7 @@ from __future__ import absolute_import +import pathlib import os import shutil @@ -22,6 +23,7 @@ BLACK_VERSION = "black==19.10b0" BLACK_PATHS = ("docs", "google", "samples", "tests", "noxfile.py", "setup.py") +CURRENT_DIRECTORY = pathlib.Path(__file__).parent.absolute() def default(session): @@ -32,27 +34,33 @@ def default(session): Python corresponding to the ``nox`` binary the ``PATH`` can run the tests. """ + constraints_path = str( + CURRENT_DIRECTORY / "testing" / f"constraints-{session.python}.txt" + ) + # Install all test dependencies, then install local packages in-place. session.install( - "mock", "pytest", "google-cloud-testutils", "pytest-cov", "freezegun" + "mock", + "pytest", + "google-cloud-testutils", + "pytest-cov", + "freezegun", + "-c", + constraints_path, ) - session.install("grpcio") - - # fastparquet is not included in .[all] because, in general, it's redundant - # with pyarrow. We still want to run some unit tests with fastparquet - # serialization, though. - session.install("-e", ".[all,fastparquet]") - # IPython does not support Python 2 after version 5.x if session.python == "2.7": - session.install("ipython==5.5") + # The [all] extra is not installable on Python 2.7. + session.install("-e", ".[pandas,pyarrow]", "-c", constraints_path) + elif session.python == "3.5": + session.install("-e", ".[all]", "-c", constraints_path) else: - session.install("ipython") + # fastparquet is not included in .[all] because, in general, it's + # redundant with pyarrow. We still want to run some unit tests with + # fastparquet serialization, though. + session.install("-e", ".[all,fastparquet]", "-c", constraints_path) - # opentelemetry was not added to [all] because opentelemetry does not support Python 2. - # Exporter does not need to be in nox thus it has been added to README documentation - if session.python != "2.7": - session.install("-e", ".[opentelemetry]") + session.install("ipython", "-c", constraints_path) # Run py.test against the unit tests. session.run( @@ -79,6 +87,10 @@ def unit(session): def system(session): """Run the system test suite.""" + constraints_path = str( + CURRENT_DIRECTORY / "testing" / f"constraints-{session.python}.txt" + ) + # Check the value of `RUN_SYSTEM_TESTS` env var. It defaults to true. if os.environ.get("RUN_SYSTEM_TESTS", "true") == "false": session.skip("RUN_SYSTEM_TESTS is set to false, skipping") @@ -88,18 +100,21 @@ def system(session): session.skip("Credentials must be set via environment variable.") # Use pre-release gRPC for system tests. - session.install("--pre", "grpcio") + session.install("--pre", "grpcio", "-c", constraints_path) # Install all test dependencies, then install local packages in place. - session.install("mock", "pytest", "psutil", "google-cloud-testutils") - session.install("google-cloud-storage") - session.install("-e", ".[all]") + session.install( + "mock", "pytest", "psutil", "google-cloud-testutils", "-c", constraints_path + ) + session.install("google-cloud-storage", "-c", constraints_path) - # IPython does not support Python 2 after version 5.x if session.python == "2.7": - session.install("ipython==5.5") + # The [all] extra is not installable on Python 2.7. + session.install("-e", ".[pandas]", "-c", constraints_path) else: - session.install("ipython") + session.install("-e", ".[all]", "-c", constraints_path) + + session.install("ipython", "-c", constraints_path) # Run py.test against the system tests. session.run( @@ -111,15 +126,24 @@ def system(session): def snippets(session): """Run the snippets test suite.""" + constraints_path = str( + CURRENT_DIRECTORY / "testing" / f"constraints-{session.python}.txt" + ) + # Sanity check: Only run snippets tests if the environment variable is set. if not os.environ.get("GOOGLE_APPLICATION_CREDENTIALS", ""): session.skip("Credentials must be set via environment variable.") # Install all test dependencies, then install local packages in place. - session.install("mock", "pytest", "google-cloud-testutils") - session.install("google-cloud-storage") - session.install("grpcio") - session.install("-e", ".[all]") + session.install("mock", "pytest", "google-cloud-testutils", "-c", constraints_path) + session.install("google-cloud-storage", "-c", constraints_path) + session.install("grpcio", "-c", constraints_path) + + if session.python == "2.7": + # The [all] extra is not installable on Python 2.7. + session.install("-e", ".[pandas]", "-c", constraints_path) + else: + session.install("-e", ".[all]", "-c", constraints_path) # Run py.test against the snippets tests. # Skip tests in samples/snippets, as those are run in a different session diff --git a/packages/google-cloud-bigquery/samples/snippets/jupyter_tutorial_test.py b/packages/google-cloud-bigquery/samples/snippets/jupyter_tutorial_test.py index 353590b82864..7fe1cde854f8 100644 --- a/packages/google-cloud-bigquery/samples/snippets/jupyter_tutorial_test.py +++ b/packages/google-cloud-bigquery/samples/snippets/jupyter_tutorial_test.py @@ -11,12 +11,12 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -import IPython -from IPython.terminal import interactiveshell -from IPython.testing import tools -import matplotlib import pytest +IPython = pytest.importorskip("IPython") +interactiveshell = pytest.importorskip("IPython.terminal.interactiveshell") +tools = pytest.importorskip("IPython.testing.tools") +matplotlib = pytest.importorskip("matplotlib") # Ignore semicolon lint warning because semicolons are used in notebooks # flake8: noqa E703 diff --git a/packages/google-cloud-bigquery/samples/tests/test_download_public_data.py b/packages/google-cloud-bigquery/samples/tests/test_download_public_data.py index 82297b2032f6..2412c147f18a 100644 --- a/packages/google-cloud-bigquery/samples/tests/test_download_public_data.py +++ b/packages/google-cloud-bigquery/samples/tests/test_download_public_data.py @@ -14,8 +14,12 @@ import logging +import pytest + from .. import download_public_data +pytest.importorskip("google.cloud.bigquery_storage_v1") + def test_download_public_data(caplog, capsys): # Enable debug-level logging to verify the BigQuery Storage API is used. diff --git a/packages/google-cloud-bigquery/samples/tests/test_download_public_data_sandbox.py b/packages/google-cloud-bigquery/samples/tests/test_download_public_data_sandbox.py index e322cb2e54c9..08e1aab73fad 100644 --- a/packages/google-cloud-bigquery/samples/tests/test_download_public_data_sandbox.py +++ b/packages/google-cloud-bigquery/samples/tests/test_download_public_data_sandbox.py @@ -14,8 +14,12 @@ import logging +import pytest + from .. import download_public_data_sandbox +pytest.importorskip("google.cloud.bigquery_storage_v1") + def test_download_public_data_sandbox(caplog, capsys): # Enable debug-level logging to verify the BigQuery Storage API is used. diff --git a/packages/google-cloud-bigquery/samples/tests/test_query_to_arrow.py b/packages/google-cloud-bigquery/samples/tests/test_query_to_arrow.py index 77d3f7130305..f14ce55613ac 100644 --- a/packages/google-cloud-bigquery/samples/tests/test_query_to_arrow.py +++ b/packages/google-cloud-bigquery/samples/tests/test_query_to_arrow.py @@ -12,10 +12,12 @@ # See the License for the specific language governing permissions and # limitations under the License. -import pyarrow +import pytest from .. import query_to_arrow +pyarrow = pytest.importorskip("pyarrow") + def test_query_to_arrow(capsys,): diff --git a/packages/google-cloud-bigquery/setup.py b/packages/google-cloud-bigquery/setup.py index d23f77b1cc37..6e66ae4cd76d 100644 --- a/packages/google-cloud-bigquery/setup.py +++ b/packages/google-cloud-bigquery/setup.py @@ -32,7 +32,7 @@ 'enum34; python_version < "3.4"', "google-api-core >= 1.21.0, < 2.0dev", "google-cloud-core >= 1.4.1, < 2.0dev", - "google-resumable-media >= 0.5.0, < 2.0dev", + "google-resumable-media >= 0.6.0, < 2.0dev", "six >=1.13.0,< 2.0.0dev", ] extras = { @@ -41,18 +41,19 @@ # Due to an issue in pip's dependency resolver, the `grpc` extra is not # installed, even though `google-cloud-bigquery-storage` specifies it # as `google-api-core[grpc]`. We thus need to explicitly specify it here. - # See: https://github.com/googleapis/python-bigquery/issues/83 - "grpcio >= 1.8.2, < 2.0dev", - "pyarrow >= 1.0.0, < 2.0dev; python_version >= '3.5'", + # See: https://github.com/googleapis/python-bigquery/issues/83 The + # grpc.Channel.close() method isn't added until 1.32.0. + # https://github.com/grpc/grpc/pull/15254 + "grpcio >= 1.32.0, < 2.0dev", + "pyarrow >= 1.0.0, < 2.0dev", ], - "pandas": ["pandas>=0.17.1"], - # Exclude PyArrow dependency from Windows Python 2.7. + "pandas": ["pandas>=0.23.0"], "pyarrow": [ - "pyarrow >= 1.0.0, < 2.0dev; python_version >= '3.5'", - # Pyarrow >= 0.17.0 is not compatible with Python 2 anymore. - "pyarrow < 0.17.0; python_version < '3.0' and platform_system != 'Windows'", + # pyarrow 1.0.0 is required for the use of timestamp_as_object keyword. + "pyarrow >= 1.0.0, < 2.0de ; python_version>='3.5'", + "pyarrow >= 0.16.0, < 0.17.0dev ; python_version<'3.5'", ], - "tqdm": ["tqdm >= 4.0.0, <5.0.0dev"], + "tqdm": ["tqdm >= 4.7.4, <5.0.0dev"], "fastparquet": [ "fastparquet", "python-snappy", @@ -77,8 +78,6 @@ # creates a dependency on pre-release versions of numpy. See: # https://github.com/googleapis/google-cloud-python/issues/8549 "fastparquet", - # Skip opentelemetry because the library is not compatible with Python 2. - "opentelemetry", ): continue all_extras.extend(extras[extra]) diff --git a/packages/google-cloud-bigquery/testing/constraints-2.7.txt b/packages/google-cloud-bigquery/testing/constraints-2.7.txt new file mode 100644 index 000000000000..fafbaa27f0e3 --- /dev/null +++ b/packages/google-cloud-bigquery/testing/constraints-2.7.txt @@ -0,0 +1,9 @@ +google-api-core==1.21.0 +google-cloud-core==1.4.1 +google-cloud-storage==1.30.0 +google-resumable-media==0.6.0 +ipython==5.5 +pandas==0.23.0 +pyarrow==0.16.0 +six==1.13.0 +tqdm==4.7.4 \ No newline at end of file diff --git a/packages/google-cloud-bigquery/testing/constraints-3.5.txt b/packages/google-cloud-bigquery/testing/constraints-3.5.txt new file mode 100644 index 000000000000..a262dbe5f2a4 --- /dev/null +++ b/packages/google-cloud-bigquery/testing/constraints-3.5.txt @@ -0,0 +1,12 @@ +google-api-core==1.21.0 +google-cloud-bigquery-storage==1.0.0 +google-cloud-core==1.4.1 +google-resumable-media==0.6.0 +google-cloud-storage==1.30.0 +grpcio==1.32.0 +ipython==5.5 +# pandas 0.23.0 is the first version to work with pyarrow to_pandas. +pandas==0.23.0 +pyarrow==1.0.0 +six==1.13.0 +tqdm==4.7.4 \ No newline at end of file diff --git a/packages/google-cloud-bigquery/testing/constraints-3.6.txt b/packages/google-cloud-bigquery/testing/constraints-3.6.txt new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/packages/google-cloud-bigquery/testing/constraints-3.7.txt b/packages/google-cloud-bigquery/testing/constraints-3.7.txt new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/packages/google-cloud-bigquery/testing/constraints-3.8.txt b/packages/google-cloud-bigquery/testing/constraints-3.8.txt new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/packages/google-cloud-bigquery/tests/system.py b/packages/google-cloud-bigquery/tests/system.py index 0048c13e0f6c..02cc8e139302 100644 --- a/packages/google-cloud-bigquery/tests/system.py +++ b/packages/google-cloud-bigquery/tests/system.py @@ -64,6 +64,7 @@ from google.api_core.exceptions import PreconditionFailed from google.api_core.exceptions import BadRequest +from google.api_core.exceptions import ClientError from google.api_core.exceptions import Conflict from google.api_core.exceptions import Forbidden from google.api_core.exceptions import GoogleAPICallError @@ -130,9 +131,17 @@ ) PANDAS_MINIMUM_VERSION = pkg_resources.parse_version("1.0.0") -PANDAS_INSTALLED_VERSION = pkg_resources.get_distribution("pandas").parsed_version PYARROW_MINIMUM_VERSION = pkg_resources.parse_version("0.17.0") -PYARROW_INSTALLED_VERSION = pkg_resources.get_distribution("pyarrow").parsed_version + +if pandas: + PANDAS_INSTALLED_VERSION = pkg_resources.get_distribution("pandas").parsed_version +else: + PANDAS_INSTALLED_VERSION = None + +if pyarrow: + PYARROW_INSTALLED_VERSION = pkg_resources.get_distribution("pyarrow").parsed_version +else: + PYARROW_INSTALLED_VERSION = None def _has_rows(result): @@ -1312,9 +1321,9 @@ def test_load_table_from_file_w_explicit_location(self): self.assertEqual("EU", load_job.location) # Cannot cancel the job from the US. - with self.assertRaises(NotFound): + with self.assertRaises(ClientError): client.cancel_job(job_id, location="US") - with self.assertRaises(NotFound): + with self.assertRaises(ClientError): load_job_us.cancel() # Can list the table rows. @@ -2897,7 +2906,7 @@ def test_bigquery_magic(): LIMIT 10 """ with io.capture_output() as captured: - result = ip.run_cell_magic("bigquery", "", sql) + result = ip.run_cell_magic("bigquery", "--use_rest_api", sql) conn_count_end = len(current_process.connections()) diff --git a/packages/google-cloud-bigquery/tests/unit/test__pandas_helpers.py b/packages/google-cloud-bigquery/tests/unit/test__pandas_helpers.py index e229e04a2581..f4355072aed0 100644 --- a/packages/google-cloud-bigquery/tests/unit/test__pandas_helpers.py +++ b/packages/google-cloud-bigquery/tests/unit/test__pandas_helpers.py @@ -773,6 +773,26 @@ def test_dataframe_to_bq_schema_dict_sequence(module_under_test): assert returned_schema == expected_schema +@pytest.mark.skipif(pandas is None, reason="Requires `pandas`") +@pytest.mark.skipif(not six.PY2, reason="Requires Python 2.7") +def test_dataframe_to_bq_schema_w_struct_raises_py27(module_under_test): + dataframe = pandas.DataFrame( + data=[{"struct_field": {"int_col": 1}}, {"struct_field": {"int_col": 2}}] + ) + bq_schema = [ + schema.SchemaField( + "struct_field", + field_type="STRUCT", + fields=[schema.SchemaField("int_col", field_type="INT64")], + ), + ] + + with pytest.raises(ValueError) as excinfo: + module_under_test.dataframe_to_bq_schema(dataframe, bq_schema=bq_schema) + + assert "struct (record) column types is not supported" in str(excinfo.value) + + @pytest.mark.skipif(pandas is None, reason="Requires `pandas`") @pytest.mark.skipif(isinstance(pyarrow, mock.Mock), reason="Requires `pyarrow`") def test_dataframe_to_arrow_with_multiindex(module_under_test): diff --git a/packages/google-cloud-bigquery/tests/unit/test_client.py b/packages/google-cloud-bigquery/tests/unit/test_client.py index 00bc47017fc5..c4c604ed0bc3 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_client.py +++ b/packages/google-cloud-bigquery/tests/unit/test_client.py @@ -56,7 +56,7 @@ pyarrow = None import google.api_core.exceptions -from google.api_core.gapic_v1 import client_info +from google.api_core import client_info import google.cloud._helpers from google.cloud import bigquery_v2 from google.cloud.bigquery.dataset import DatasetReference diff --git a/packages/google-cloud-bigquery/tests/unit/test_dbapi_connection.py b/packages/google-cloud-bigquery/tests/unit/test_dbapi_connection.py index 96ec41c51555..0f1be45ee562 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_dbapi_connection.py +++ b/packages/google-cloud-bigquery/tests/unit/test_dbapi_connection.py @@ -41,9 +41,11 @@ def _mock_client(self): return mock_client def _mock_bqstorage_client(self): - from google.cloud.bigquery_storage_v1 import client - - mock_client = mock.create_autospec(client.BigQueryReadClient) + if bigquery_storage_v1 is None: + return None + mock_client = mock.create_autospec( + bigquery_storage_v1.client.BigQueryReadClient + ) mock_client.transport = mock.Mock(spec=["channel"]) mock_client.transport.channel = mock.Mock(spec=["close"]) return mock_client @@ -127,6 +129,9 @@ def test_raises_error_if_closed(self): ): getattr(connection, method)() + @unittest.skipIf( + bigquery_storage_v1 is None, "Requires `google-cloud-bigquery-storage`" + ) def test_close_closes_all_created_bigquery_clients(self): client = self._mock_client() bqstorage_client = self._mock_bqstorage_client() @@ -147,6 +152,9 @@ def test_close_closes_all_created_bigquery_clients(self): self.assertTrue(client.close.called) self.assertTrue(bqstorage_client.transport.channel.close.called) + @unittest.skipIf( + bigquery_storage_v1 is None, "Requires `google-cloud-bigquery-storage`" + ) def test_close_does_not_close_bigquery_clients_passed_to_it(self): client = self._mock_client() bqstorage_client = self._mock_bqstorage_client() diff --git a/packages/google-cloud-bigquery/tests/unit/test_magics.py b/packages/google-cloud-bigquery/tests/unit/test_magics.py index 73e44f311d8f..c4527c837f07 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_magics.py +++ b/packages/google-cloud-bigquery/tests/unit/test_magics.py @@ -25,21 +25,10 @@ import pandas except ImportError: # pragma: NO COVER pandas = None -try: - import IPython - from IPython.utils import io - from IPython.testing import tools - from IPython.terminal import interactiveshell -except ImportError: # pragma: NO COVER - IPython = None from google.api_core import exceptions import google.auth.credentials -try: - from google.cloud import bigquery_storage_v1 -except ImportError: # pragma: NO COVER - bigquery_storage_v1 = None from google.cloud import bigquery from google.cloud.bigquery import job from google.cloud.bigquery import table @@ -48,7 +37,11 @@ from test_utils.imports import maybe_fail_import -pytestmark = pytest.mark.skipif(IPython is None, reason="Requires `ipython`") +IPython = pytest.importorskip("IPython") +io = pytest.importorskip("IPython.utils.io") +tools = pytest.importorskip("IPython.testing.tools") +interactiveshell = pytest.importorskip("IPython.terminal.interactiveshell") +bigquery_storage_v1 = pytest.importorskip("google.cloud.bigquery_storage_v1") @pytest.fixture(scope="session") diff --git a/packages/google-cloud-bigquery/tests/unit/test_table.py b/packages/google-cloud-bigquery/tests/unit/test_table.py index 80223e8e1396..10bedfee126e 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_table.py +++ b/packages/google-cloud-bigquery/tests/unit/test_table.py @@ -2472,7 +2472,10 @@ def test_to_dataframe_no_tqdm_no_progress_bar(self): with warnings.catch_warnings(record=True) as warned: df = row_iterator.to_dataframe(create_bqstorage_client=False) - self.assertEqual(len(warned), 0) + user_warnings = [ + warning for warning in warned if warning.category is UserWarning + ] + self.assertEqual(len(user_warnings), 0) self.assertEqual(len(df), 4) @unittest.skipIf(pandas is None, "Requires `pandas`") @@ -2499,9 +2502,10 @@ def test_to_dataframe_no_tqdm(self): progress_bar_type="tqdm", create_bqstorage_client=False, ) - self.assertEqual(len(warned), 1) - for warning in warned: - self.assertIs(warning.category, UserWarning) + user_warnings = [ + warning for warning in warned if warning.category is UserWarning + ] + self.assertEqual(len(user_warnings), 1) # Even though the progress bar won't show, downloading the dataframe # should still work. From 3fa2f8a41d20113979be85f8cde715343ec93fe9 Mon Sep 17 00:00:00 2001 From: "release-please[bot]" <55107282+release-please[bot]@users.noreply.github.com> Date: Tue, 22 Sep 2020 18:30:11 +0000 Subject: [PATCH 0936/2016] chore: release 1.28.0 (#238) :robot: I have created a release \*beep\* \*boop\* --- ## [1.28.0](https://www.github.com/googleapis/python-bigquery/compare/v1.27.2...v1.28.0) (2020-09-22) ### Features * add custom cell magic parser to handle complex `--params` values ([#213](https://www.github.com/googleapis/python-bigquery/issues/213)) ([dcfbac2](https://www.github.com/googleapis/python-bigquery/commit/dcfbac267fbf66d189b0cc7e76f4712122a74b7b)) * add instrumentation to list methods ([#239](https://www.github.com/googleapis/python-bigquery/issues/239)) ([fa9f9ca](https://www.github.com/googleapis/python-bigquery/commit/fa9f9ca491c3f9954287102c567ec483aa6151d4)) * add opentelemetry tracing ([#215](https://www.github.com/googleapis/python-bigquery/issues/215)) ([a04996c](https://www.github.com/googleapis/python-bigquery/commit/a04996c537e9d8847411fcbb1b05da5f175b339e)) * expose require_partition_filter for hive_partition ([#257](https://www.github.com/googleapis/python-bigquery/issues/257)) ([aa1613c](https://www.github.com/googleapis/python-bigquery/commit/aa1613c1bf48c7efb999cb8b8c422c80baf1950b)) ### Bug Fixes * fix dependency issue in fastavro ([#241](https://www.github.com/googleapis/python-bigquery/issues/241)) ([2874abf](https://www.github.com/googleapis/python-bigquery/commit/2874abf4827f1ea529519d4b138511d31f732a50)) * update minimum dependency versions ([#263](https://www.github.com/googleapis/python-bigquery/issues/263)) ([1be66ce](https://www.github.com/googleapis/python-bigquery/commit/1be66ce94a32b1f924bdda05d068c2977631af9e)) * validate job_config.source_format in load_table_from_dataframe ([#262](https://www.github.com/googleapis/python-bigquery/issues/262)) ([6160fee](https://www.github.com/googleapis/python-bigquery/commit/6160fee4b1a79b0ea9031cc18caf6322fe4c4084)) ### Documentation * recommend insert_rows_json to avoid call to tables.get ([#258](https://www.github.com/googleapis/python-bigquery/issues/258)) ([ae647eb](https://www.github.com/googleapis/python-bigquery/commit/ae647ebd68deff6e30ca2cffb5b7422c6de4940b)) --- This PR was generated with [Release Please](https://github.com/googleapis/release-please). --- packages/google-cloud-bigquery/CHANGELOG.md | 22 +++++++++++++++++++++ packages/google-cloud-bigquery/setup.py | 2 +- 2 files changed, 23 insertions(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/CHANGELOG.md b/packages/google-cloud-bigquery/CHANGELOG.md index 5ef22e8d765a..e8d367f73292 100644 --- a/packages/google-cloud-bigquery/CHANGELOG.md +++ b/packages/google-cloud-bigquery/CHANGELOG.md @@ -4,6 +4,28 @@ [1]: https://pypi.org/project/google-cloud-bigquery/#history +## [1.28.0](https://www.github.com/googleapis/python-bigquery/compare/v1.27.2...v1.28.0) (2020-09-22) + + +### Features + +* add custom cell magic parser to handle complex `--params` values ([#213](https://www.github.com/googleapis/python-bigquery/issues/213)) ([dcfbac2](https://www.github.com/googleapis/python-bigquery/commit/dcfbac267fbf66d189b0cc7e76f4712122a74b7b)) +* add instrumentation to list methods ([#239](https://www.github.com/googleapis/python-bigquery/issues/239)) ([fa9f9ca](https://www.github.com/googleapis/python-bigquery/commit/fa9f9ca491c3f9954287102c567ec483aa6151d4)) +* add opentelemetry tracing ([#215](https://www.github.com/googleapis/python-bigquery/issues/215)) ([a04996c](https://www.github.com/googleapis/python-bigquery/commit/a04996c537e9d8847411fcbb1b05da5f175b339e)) +* expose require_partition_filter for hive_partition ([#257](https://www.github.com/googleapis/python-bigquery/issues/257)) ([aa1613c](https://www.github.com/googleapis/python-bigquery/commit/aa1613c1bf48c7efb999cb8b8c422c80baf1950b)) + + +### Bug Fixes + +* fix dependency issue in fastavro ([#241](https://www.github.com/googleapis/python-bigquery/issues/241)) ([2874abf](https://www.github.com/googleapis/python-bigquery/commit/2874abf4827f1ea529519d4b138511d31f732a50)) +* update minimum dependency versions ([#263](https://www.github.com/googleapis/python-bigquery/issues/263)) ([1be66ce](https://www.github.com/googleapis/python-bigquery/commit/1be66ce94a32b1f924bdda05d068c2977631af9e)) +* validate job_config.source_format in load_table_from_dataframe ([#262](https://www.github.com/googleapis/python-bigquery/issues/262)) ([6160fee](https://www.github.com/googleapis/python-bigquery/commit/6160fee4b1a79b0ea9031cc18caf6322fe4c4084)) + + +### Documentation + +* recommend insert_rows_json to avoid call to tables.get ([#258](https://www.github.com/googleapis/python-bigquery/issues/258)) ([ae647eb](https://www.github.com/googleapis/python-bigquery/commit/ae647ebd68deff6e30ca2cffb5b7422c6de4940b)) + ### [1.27.2](https://www.github.com/googleapis/python-bigquery/compare/v1.27.1...v1.27.2) (2020-08-18) diff --git a/packages/google-cloud-bigquery/setup.py b/packages/google-cloud-bigquery/setup.py index 6e66ae4cd76d..73d9a03cad05 100644 --- a/packages/google-cloud-bigquery/setup.py +++ b/packages/google-cloud-bigquery/setup.py @@ -22,7 +22,7 @@ name = "google-cloud-bigquery" description = "Google BigQuery API client library" -version = "1.27.2" +version = "1.28.0" # Should be one of: # 'Development Status :: 3 - Alpha' # 'Development Status :: 4 - Beta' From 055a9f6b4beca3c7bd928a3917bd48d7d0a0c5ad Mon Sep 17 00:00:00 2001 From: Yoshi Automation Bot Date: Fri, 25 Sep 2020 11:36:05 -0700 Subject: [PATCH 0937/2016] chore: add CI secrets manager (via synth) (#271) This PR was generated using Autosynth. :rainbow: Synth log will be available here: https://source.cloud.google.com/results/invocations/5da1d2aa-a963-44d1-952a-3ed428de6719/targets - [ ] To automatically regenerate this PR, check this box. Source-Link: https://github.com/googleapis/synthtool/commit/27f4406999b1eee29e04b09b2423a8e4646c7e24 Source-Link: https://github.com/googleapis/synthtool/commit/dba48bb9bc6959c232bec9150ac6313b608fe7bd Source-Link: https://github.com/googleapis/synthtool/commit/257fda18168bedb76985024bd198ed1725485488 Source-Link: https://github.com/googleapis/synthtool/commit/ffcee7952b74f647cbb3ef021d95422f10816fca Source-Link: https://github.com/googleapis/synthtool/commit/d302f93d7f47e2852e585ac35ab2d15585717ec0 --- .../.kokoro/populate-secrets.sh | 43 ++++++++++++++++ .../.kokoro/release/common.cfg | 50 +++++-------------- .../.kokoro/trampoline.sh | 15 ++++-- packages/google-cloud-bigquery/docs/conf.py | 3 +- .../scripts/decrypt-secrets.sh | 15 +++++- packages/google-cloud-bigquery/synth.metadata | 7 +-- 6 files changed, 86 insertions(+), 47 deletions(-) create mode 100755 packages/google-cloud-bigquery/.kokoro/populate-secrets.sh diff --git a/packages/google-cloud-bigquery/.kokoro/populate-secrets.sh b/packages/google-cloud-bigquery/.kokoro/populate-secrets.sh new file mode 100755 index 000000000000..f52514257ef0 --- /dev/null +++ b/packages/google-cloud-bigquery/.kokoro/populate-secrets.sh @@ -0,0 +1,43 @@ +#!/bin/bash +# Copyright 2020 Google LLC. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +set -eo pipefail + +function now { date +"%Y-%m-%d %H:%M:%S" | tr -d '\n' ;} +function msg { println "$*" >&2 ;} +function println { printf '%s\n' "$(now) $*" ;} + + +# Populates requested secrets set in SECRET_MANAGER_KEYS from service account: +# kokoro-trampoline@cloud-devrel-kokoro-resources.iam.gserviceaccount.com +SECRET_LOCATION="${KOKORO_GFILE_DIR}/secret_manager" +msg "Creating folder on disk for secrets: ${SECRET_LOCATION}" +mkdir -p ${SECRET_LOCATION} +for key in $(echo ${SECRET_MANAGER_KEYS} | sed "s/,/ /g") +do + msg "Retrieving secret ${key}" + docker run --entrypoint=gcloud \ + --volume=${KOKORO_GFILE_DIR}:${KOKORO_GFILE_DIR} \ + gcr.io/google.com/cloudsdktool/cloud-sdk \ + secrets versions access latest \ + --project cloud-devrel-kokoro-resources \ + --secret ${key} > \ + "${SECRET_LOCATION}/${key}" + if [[ $? == 0 ]]; then + msg "Secret written to ${SECRET_LOCATION}/${key}" + else + msg "Error retrieving secret ${key}" + fi +done diff --git a/packages/google-cloud-bigquery/.kokoro/release/common.cfg b/packages/google-cloud-bigquery/.kokoro/release/common.cfg index 661a044811ee..18b417709bde 100644 --- a/packages/google-cloud-bigquery/.kokoro/release/common.cfg +++ b/packages/google-cloud-bigquery/.kokoro/release/common.cfg @@ -23,42 +23,18 @@ env_vars: { value: "github/python-bigquery/.kokoro/release.sh" } -# Fetch the token needed for reporting release status to GitHub -before_action { - fetch_keystore { - keystore_resource { - keystore_config_id: 73713 - keyname: "yoshi-automation-github-key" - } - } -} - -# Fetch PyPI password -before_action { - fetch_keystore { - keystore_resource { - keystore_config_id: 73713 - keyname: "google_cloud_pypi_password" - } - } -} - -# Fetch magictoken to use with Magic Github Proxy -before_action { - fetch_keystore { - keystore_resource { - keystore_config_id: 73713 - keyname: "releasetool-magictoken" - } - } +# Fetch PyPI password +before_action { + fetch_keystore { + keystore_resource { + keystore_config_id: 73713 + keyname: "google_cloud_pypi_password" + } + } } -# Fetch api key to use with Magic Github Proxy -before_action { - fetch_keystore { - keystore_resource { - keystore_config_id: 73713 - keyname: "magic-github-proxy-api-key" - } - } -} +# Tokens needed to report release status back to GitHub +env_vars: { + key: "SECRET_MANAGER_KEYS" + value: "releasetool-publish-reporter-app,releasetool-publish-reporter-googleapis-installation,releasetool-publish-reporter-pem" +} \ No newline at end of file diff --git a/packages/google-cloud-bigquery/.kokoro/trampoline.sh b/packages/google-cloud-bigquery/.kokoro/trampoline.sh index e8c4251f3ed4..f39236e943a8 100755 --- a/packages/google-cloud-bigquery/.kokoro/trampoline.sh +++ b/packages/google-cloud-bigquery/.kokoro/trampoline.sh @@ -15,9 +15,14 @@ set -eo pipefail -python3 "${KOKORO_GFILE_DIR}/trampoline_v1.py" || ret_code=$? +# Always run the cleanup script, regardless of the success of bouncing into +# the container. +function cleanup() { + chmod +x ${KOKORO_GFILE_DIR}/trampoline_cleanup.sh + ${KOKORO_GFILE_DIR}/trampoline_cleanup.sh + echo "cleanup"; +} +trap cleanup EXIT -chmod +x ${KOKORO_GFILE_DIR}/trampoline_cleanup.sh -${KOKORO_GFILE_DIR}/trampoline_cleanup.sh || true - -exit ${ret_code} +$(dirname $0)/populate-secrets.sh # Secret Manager secrets. +python3 "${KOKORO_GFILE_DIR}/trampoline_v1.py" \ No newline at end of file diff --git a/packages/google-cloud-bigquery/docs/conf.py b/packages/google-cloud-bigquery/docs/conf.py index 155606c9792f..b38bdd1fffd6 100644 --- a/packages/google-cloud-bigquery/docs/conf.py +++ b/packages/google-cloud-bigquery/docs/conf.py @@ -29,7 +29,7 @@ # -- General configuration ------------------------------------------------ # If your documentation needs a minimal Sphinx version, state it here. -needs_sphinx = "1.6.3" +needs_sphinx = "1.5.5" # Add any Sphinx extension module names here, as strings. They can be # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom @@ -39,6 +39,7 @@ "sphinx.ext.autosummary", "sphinx.ext.intersphinx", "sphinx.ext.coverage", + "sphinx.ext.doctest", "sphinx.ext.napoleon", "sphinx.ext.todo", "sphinx.ext.viewcode", diff --git a/packages/google-cloud-bigquery/scripts/decrypt-secrets.sh b/packages/google-cloud-bigquery/scripts/decrypt-secrets.sh index ff599eb2af25..21f6d2a26d90 100755 --- a/packages/google-cloud-bigquery/scripts/decrypt-secrets.sh +++ b/packages/google-cloud-bigquery/scripts/decrypt-secrets.sh @@ -20,14 +20,27 @@ ROOT=$( dirname "$DIR" ) # Work from the project root. cd $ROOT +# Prevent it from overriding files. +# We recommend that sample authors use their own service account files and cloud project. +# In that case, they are supposed to prepare these files by themselves. +if [[ -f "testing/test-env.sh" ]] || \ + [[ -f "testing/service-account.json" ]] || \ + [[ -f "testing/client-secrets.json" ]]; then + echo "One or more target files exist, aborting." + exit 1 +fi + # Use SECRET_MANAGER_PROJECT if set, fallback to cloud-devrel-kokoro-resources. PROJECT_ID="${SECRET_MANAGER_PROJECT:-cloud-devrel-kokoro-resources}" gcloud secrets versions access latest --secret="python-docs-samples-test-env" \ + --project="${PROJECT_ID}" \ > testing/test-env.sh gcloud secrets versions access latest \ --secret="python-docs-samples-service-account" \ + --project="${PROJECT_ID}" \ > testing/service-account.json gcloud secrets versions access latest \ --secret="python-docs-samples-client-secrets" \ - > testing/client-secrets.json \ No newline at end of file + --project="${PROJECT_ID}" \ + > testing/client-secrets.json diff --git a/packages/google-cloud-bigquery/synth.metadata b/packages/google-cloud-bigquery/synth.metadata index efee17785517..e786fb3649bc 100644 --- a/packages/google-cloud-bigquery/synth.metadata +++ b/packages/google-cloud-bigquery/synth.metadata @@ -4,7 +4,7 @@ "git": { "name": ".", "remote": "https://github.com/googleapis/python-bigquery.git", - "sha": "a125160696d1453b04a66c967819f90e70e03a52" + "sha": "b716e1c8ecd90142b498b95e7f8830835529cf4a" } }, { @@ -19,14 +19,14 @@ "git": { "name": "synthtool", "remote": "https://github.com/googleapis/synthtool.git", - "sha": "32c758f11b8c578f515a746c9d263b82a615a77c" + "sha": "27f4406999b1eee29e04b09b2423a8e4646c7e24" } }, { "git": { "name": "synthtool", "remote": "https://github.com/googleapis/synthtool.git", - "sha": "32c758f11b8c578f515a746c9d263b82a615a77c" + "sha": "27f4406999b1eee29e04b09b2423a8e4646c7e24" } } ], @@ -60,6 +60,7 @@ ".kokoro/docs/common.cfg", ".kokoro/docs/docs-presubmit.cfg", ".kokoro/docs/docs.cfg", + ".kokoro/populate-secrets.sh", ".kokoro/presubmit/common.cfg", ".kokoro/presubmit/presubmit.cfg", ".kokoro/presubmit/system-2.7.cfg", From 04b147cd1c003c7e2d17713d90608903ad21e757 Mon Sep 17 00:00:00 2001 From: Yoshi Automation Bot Date: Fri, 25 Sep 2020 13:20:06 -0700 Subject: [PATCH 0938/2016] chore: update protoc-generated comments (via synth) (#270) This PR was generated using Autosynth. :rainbow: Synth log will be available here: https://source.cloud.google.com/results/invocations/5da1d2aa-a963-44d1-952a-3ed428de6719/targets - [ ] To automatically regenerate this PR, check this box. PiperOrigin-RevId: 327026955 Source-Link: https://github.com/googleapis/googleapis/commit/0dc0a6c0f1a9f979bc0690f0caa5fbafa3000c2c --- .../google/cloud/bigquery_v2/proto/encryption_config_pb2.py | 2 +- .../google/cloud/bigquery_v2/proto/model_pb2.py | 2 +- .../google/cloud/bigquery_v2/proto/model_reference_pb2.py | 2 +- .../google/cloud/bigquery_v2/proto/standard_sql_pb2.py | 2 +- packages/google-cloud-bigquery/synth.metadata | 4 ++-- 5 files changed, 6 insertions(+), 6 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery_v2/proto/encryption_config_pb2.py b/packages/google-cloud-bigquery/google/cloud/bigquery_v2/proto/encryption_config_pb2.py index 5147743b61dd..5ae21ea6f49f 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery_v2/proto/encryption_config_pb2.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery_v2/proto/encryption_config_pb2.py @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- # Generated by the protocol buffer compiler. DO NOT EDIT! # source: google/cloud/bigquery_v2/proto/encryption_config.proto - +"""Generated protocol buffer code.""" from google.protobuf import descriptor as _descriptor from google.protobuf import message as _message from google.protobuf import reflection as _reflection diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery_v2/proto/model_pb2.py b/packages/google-cloud-bigquery/google/cloud/bigquery_v2/proto/model_pb2.py index f485c45684f0..7b66be8f7131 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery_v2/proto/model_pb2.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery_v2/proto/model_pb2.py @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- # Generated by the protocol buffer compiler. DO NOT EDIT! # source: google/cloud/bigquery_v2/proto/model.proto - +"""Generated protocol buffer code.""" from google.protobuf import descriptor as _descriptor from google.protobuf import message as _message from google.protobuf import reflection as _reflection diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery_v2/proto/model_reference_pb2.py b/packages/google-cloud-bigquery/google/cloud/bigquery_v2/proto/model_reference_pb2.py index 07d7e4c4b0a8..2411c48632c8 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery_v2/proto/model_reference_pb2.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery_v2/proto/model_reference_pb2.py @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- # Generated by the protocol buffer compiler. DO NOT EDIT! # source: google/cloud/bigquery_v2/proto/model_reference.proto - +"""Generated protocol buffer code.""" from google.protobuf import descriptor as _descriptor from google.protobuf import message as _message from google.protobuf import reflection as _reflection diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery_v2/proto/standard_sql_pb2.py b/packages/google-cloud-bigquery/google/cloud/bigquery_v2/proto/standard_sql_pb2.py index 15f6715a253d..bfe77f934338 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery_v2/proto/standard_sql_pb2.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery_v2/proto/standard_sql_pb2.py @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- # Generated by the protocol buffer compiler. DO NOT EDIT! # source: google/cloud/bigquery_v2/proto/standard_sql.proto - +"""Generated protocol buffer code.""" from google.protobuf import descriptor as _descriptor from google.protobuf import message as _message from google.protobuf import reflection as _reflection diff --git a/packages/google-cloud-bigquery/synth.metadata b/packages/google-cloud-bigquery/synth.metadata index e786fb3649bc..7fdc4fb28c99 100644 --- a/packages/google-cloud-bigquery/synth.metadata +++ b/packages/google-cloud-bigquery/synth.metadata @@ -11,8 +11,8 @@ "git": { "name": "googleapis", "remote": "https://github.com/googleapis/googleapis.git", - "sha": "868615a5c1c1059c636bb3d82a555edb1d5a251e", - "internalRef": "324294521" + "sha": "0dc0a6c0f1a9f979bc0690f0caa5fbafa3000c2c", + "internalRef": "327026955" } }, { From 3f21b433de3b30982c2ed9b4105e8ec3428631ea Mon Sep 17 00:00:00 2001 From: Peter Lamut Date: Wed, 30 Sep 2020 23:47:22 +0200 Subject: [PATCH 0939/2016] chore: Prepare for 2.0 release (#278) * Remove BQ Storage v1beta1 compatibility code * Adjust code to new BQ Storage 2.0 * Remove Python 2/3 compatibility code * Bump test coverage to 100% * Update supported Python versions in README * Add UPGRADING guide. * Regenerate bigquery_v2 code with microgenerator * Adjust hand-written unit tests to regened BQ v2 * Adjust samples to BQ v2 regenerated code * Adjust system tests to regenerated BQ v2 * Skip failing generated unit test The assertion seems to fail for a banal reason, i.e. an extra newline in the string representation. * Delete Kokoro config for Python 2.7 * Fix docs build * Undelete failing test, but mark as skipped * Fix namespace name in docstrings and comments * Define minimum dependency versions for Python 3.6 * Exclude autogenerated docs from docs index * Exclude generated services from the library There are currently no public API endpoints for the ModelServiceClient, thus there is no point in generating that code in the first place. * Bump minumum proto-plus version to 1.10.0 The old pin (1.4.0) does not work, tests detected some problem. * Include generated types in the docs and rebuild * Ignore skipped test in coverage check * Explain moved enums in UPGRADING guide --- .../.kokoro/presubmit/presubmit.cfg | 8 +- .../.kokoro/presubmit/system-2.7.cfg | 7 - .../.kokoro/samples/python3.6/common.cfg | 6 + .../.kokoro/samples/python3.7/common.cfg | 6 + .../.kokoro/samples/python3.8/common.cfg | 6 + .../google-cloud-bigquery/CONTRIBUTING.rst | 19 - packages/google-cloud-bigquery/README.rst | 11 +- packages/google-cloud-bigquery/UPGRADING.md | 59 ++ .../google-cloud-bigquery/docs/UPGRADING.md | 1 + .../docs/bigquery_v2/services.rst | 6 + .../docs/bigquery_v2/types.rst | 5 + packages/google-cloud-bigquery/docs/conf.py | 1 + .../docs/gapic/v2/enums.rst | 8 - .../docs/gapic/v2/types.rst | 6 - packages/google-cloud-bigquery/docs/index.rst | 10 + .../google-cloud-bigquery/docs/reference.rst | 4 +- .../google/cloud/bigquery/_pandas_helpers.py | 77 +- .../google/cloud/bigquery/client.py | 12 +- .../google/cloud/bigquery/dbapi/_helpers.py | 5 +- .../google/cloud/bigquery/dbapi/connection.py | 2 +- .../google/cloud/bigquery/dbapi/cursor.py | 61 +- .../google/cloud/bigquery/enums.py | 6 +- .../google/cloud/bigquery/magics/magics.py | 8 +- .../google/cloud/bigquery/model.py | 13 +- .../google/cloud/bigquery/routine.py | 18 +- .../google/cloud/bigquery/schema.py | 52 +- .../google/cloud/bigquery/table.py | 102 +- .../google/cloud/bigquery_v2/__init__.py | 41 +- .../cloud/bigquery_v2/gapic/__init__.py | 0 .../google/cloud/bigquery_v2/gapic/enums.py | 171 ---- .../cloud/bigquery_v2/proto/__init__.py | 0 .../proto/encryption_config_pb2_grpc.py | 3 - .../proto/location_metadata_pb2.py | 98 -- .../proto/location_metadata_pb2_grpc.py | 2 - .../cloud/bigquery_v2/proto/model_pb2_grpc.py | 214 ---- .../proto/model_reference_pb2_grpc.py | 3 - .../proto/standard_sql_pb2_grpc.py | 3 - .../google/cloud/bigquery_v2/py.typed | 2 + .../google/cloud/bigquery_v2/types.py | 58 -- .../cloud/bigquery_v2/types/__init__.py | 47 + .../bigquery_v2/types/encryption_config.py | 44 + .../google/cloud/bigquery_v2/types/model.py | 966 ++++++++++++++++++ .../bigquery_v2/types/model_reference.py | 49 + .../cloud/bigquery_v2/types/standard_sql.py | 106 ++ packages/google-cloud-bigquery/noxfile.py | 33 +- .../samples/create_routine.py | 2 +- .../samples/tests/conftest.py | 2 +- .../samples/tests/test_routine_samples.py | 8 +- .../scripts/fixup_bigquery_v2_keywords.py | 181 ++++ packages/google-cloud-bigquery/setup.py | 32 +- packages/google-cloud-bigquery/synth.metadata | 105 +- packages/google-cloud-bigquery/synth.py | 80 +- .../testing/constraints-2.7.txt | 9 - .../testing/constraints-3.5.txt | 12 - .../testing/constraints-3.6.txt | 16 + .../google-cloud-bigquery/tests/system.py | 107 +- .../enums/test_standard_sql_data_types.py | 7 +- .../tests/unit/model/test_model.py | 6 +- .../tests/unit/routine/test_routine.py | 9 +- .../unit/routine/test_routine_argument.py | 6 +- .../tests/unit/test__pandas_helpers.py | 40 +- .../tests/unit/test_client.py | 111 +- .../tests/unit/test_dbapi_connection.py | 36 +- .../tests/unit/test_dbapi_cursor.py | 108 +- .../tests/unit/test_job.py | 24 +- .../tests/unit/test_magics.py | 48 +- .../tests/unit/test_opentelemetry_tracing.py | 2 +- .../tests/unit/test_schema.py | 42 +- .../tests/unit/test_table.py | 304 ++---- 69 files changed, 1974 insertions(+), 1682 deletions(-) delete mode 100644 packages/google-cloud-bigquery/.kokoro/presubmit/system-2.7.cfg create mode 100644 packages/google-cloud-bigquery/UPGRADING.md create mode 120000 packages/google-cloud-bigquery/docs/UPGRADING.md create mode 100644 packages/google-cloud-bigquery/docs/bigquery_v2/services.rst create mode 100644 packages/google-cloud-bigquery/docs/bigquery_v2/types.rst delete mode 100644 packages/google-cloud-bigquery/docs/gapic/v2/enums.rst delete mode 100644 packages/google-cloud-bigquery/docs/gapic/v2/types.rst delete mode 100644 packages/google-cloud-bigquery/google/cloud/bigquery_v2/gapic/__init__.py delete mode 100644 packages/google-cloud-bigquery/google/cloud/bigquery_v2/gapic/enums.py delete mode 100644 packages/google-cloud-bigquery/google/cloud/bigquery_v2/proto/__init__.py delete mode 100644 packages/google-cloud-bigquery/google/cloud/bigquery_v2/proto/encryption_config_pb2_grpc.py delete mode 100644 packages/google-cloud-bigquery/google/cloud/bigquery_v2/proto/location_metadata_pb2.py delete mode 100644 packages/google-cloud-bigquery/google/cloud/bigquery_v2/proto/location_metadata_pb2_grpc.py delete mode 100644 packages/google-cloud-bigquery/google/cloud/bigquery_v2/proto/model_pb2_grpc.py delete mode 100644 packages/google-cloud-bigquery/google/cloud/bigquery_v2/proto/model_reference_pb2_grpc.py delete mode 100644 packages/google-cloud-bigquery/google/cloud/bigquery_v2/proto/standard_sql_pb2_grpc.py create mode 100644 packages/google-cloud-bigquery/google/cloud/bigquery_v2/py.typed delete mode 100644 packages/google-cloud-bigquery/google/cloud/bigquery_v2/types.py create mode 100644 packages/google-cloud-bigquery/google/cloud/bigquery_v2/types/__init__.py create mode 100644 packages/google-cloud-bigquery/google/cloud/bigquery_v2/types/encryption_config.py create mode 100644 packages/google-cloud-bigquery/google/cloud/bigquery_v2/types/model.py create mode 100644 packages/google-cloud-bigquery/google/cloud/bigquery_v2/types/model_reference.py create mode 100644 packages/google-cloud-bigquery/google/cloud/bigquery_v2/types/standard_sql.py create mode 100644 packages/google-cloud-bigquery/scripts/fixup_bigquery_v2_keywords.py delete mode 100644 packages/google-cloud-bigquery/testing/constraints-2.7.txt delete mode 100644 packages/google-cloud-bigquery/testing/constraints-3.5.txt diff --git a/packages/google-cloud-bigquery/.kokoro/presubmit/presubmit.cfg b/packages/google-cloud-bigquery/.kokoro/presubmit/presubmit.cfg index b158096f0ae2..8f43917d92fe 100644 --- a/packages/google-cloud-bigquery/.kokoro/presubmit/presubmit.cfg +++ b/packages/google-cloud-bigquery/.kokoro/presubmit/presubmit.cfg @@ -1,7 +1 @@ -# Format: //devtools/kokoro/config/proto/build.proto - -# Disable system tests. -env_vars: { - key: "RUN_SYSTEM_TESTS" - value: "false" -} +# Format: //devtools/kokoro/config/proto/build.proto \ No newline at end of file diff --git a/packages/google-cloud-bigquery/.kokoro/presubmit/system-2.7.cfg b/packages/google-cloud-bigquery/.kokoro/presubmit/system-2.7.cfg deleted file mode 100644 index 3b6523a197cc..000000000000 --- a/packages/google-cloud-bigquery/.kokoro/presubmit/system-2.7.cfg +++ /dev/null @@ -1,7 +0,0 @@ -# Format: //devtools/kokoro/config/proto/build.proto - -# Only run this nox session. -env_vars: { - key: "NOX_SESSION" - value: "system-2.7" -} \ No newline at end of file diff --git a/packages/google-cloud-bigquery/.kokoro/samples/python3.6/common.cfg b/packages/google-cloud-bigquery/.kokoro/samples/python3.6/common.cfg index a56768eae259..f3b930960b41 100644 --- a/packages/google-cloud-bigquery/.kokoro/samples/python3.6/common.cfg +++ b/packages/google-cloud-bigquery/.kokoro/samples/python3.6/common.cfg @@ -13,6 +13,12 @@ env_vars: { value: "py-3.6" } +# Declare build specific Cloud project. +env_vars: { + key: "BUILD_SPECIFIC_GCLOUD_PROJECT" + value: "python-docs-samples-tests-py36" +} + env_vars: { key: "TRAMPOLINE_BUILD_FILE" value: "github/python-bigquery/.kokoro/test-samples.sh" diff --git a/packages/google-cloud-bigquery/.kokoro/samples/python3.7/common.cfg b/packages/google-cloud-bigquery/.kokoro/samples/python3.7/common.cfg index c93747180b2e..fc06545655d4 100644 --- a/packages/google-cloud-bigquery/.kokoro/samples/python3.7/common.cfg +++ b/packages/google-cloud-bigquery/.kokoro/samples/python3.7/common.cfg @@ -13,6 +13,12 @@ env_vars: { value: "py-3.7" } +# Declare build specific Cloud project. +env_vars: { + key: "BUILD_SPECIFIC_GCLOUD_PROJECT" + value: "python-docs-samples-tests-py37" +} + env_vars: { key: "TRAMPOLINE_BUILD_FILE" value: "github/python-bigquery/.kokoro/test-samples.sh" diff --git a/packages/google-cloud-bigquery/.kokoro/samples/python3.8/common.cfg b/packages/google-cloud-bigquery/.kokoro/samples/python3.8/common.cfg index 9808f15e32a9..2b0bf59b3ef5 100644 --- a/packages/google-cloud-bigquery/.kokoro/samples/python3.8/common.cfg +++ b/packages/google-cloud-bigquery/.kokoro/samples/python3.8/common.cfg @@ -13,6 +13,12 @@ env_vars: { value: "py-3.8" } +# Declare build specific Cloud project. +env_vars: { + key: "BUILD_SPECIFIC_GCLOUD_PROJECT" + value: "python-docs-samples-tests-py38" +} + env_vars: { key: "TRAMPOLINE_BUILD_FILE" value: "github/python-bigquery/.kokoro/test-samples.sh" diff --git a/packages/google-cloud-bigquery/CONTRIBUTING.rst b/packages/google-cloud-bigquery/CONTRIBUTING.rst index 3366287d68be..b3b802b49516 100644 --- a/packages/google-cloud-bigquery/CONTRIBUTING.rst +++ b/packages/google-cloud-bigquery/CONTRIBUTING.rst @@ -80,25 +80,6 @@ We use `nox `__ to instrument our tests. .. nox: https://pypi.org/project/nox/ -Note on Editable Installs / Develop Mode -======================================== - -- As mentioned previously, using ``setuptools`` in `develop mode`_ - or a ``pip`` `editable install`_ is not possible with this - library. This is because this library uses `namespace packages`_. - For context see `Issue #2316`_ and the relevant `PyPA issue`_. - - Since ``editable`` / ``develop`` mode can't be used, packages - need to be installed directly. Hence your changes to the source - tree don't get incorporated into the **already installed** - package. - -.. _namespace packages: https://www.python.org/dev/peps/pep-0420/ -.. _Issue #2316: https://github.com/GoogleCloudPlatform/google-cloud-python/issues/2316 -.. _PyPA issue: https://github.com/pypa/packaging-problems/issues/12 -.. _develop mode: https://setuptools.readthedocs.io/en/latest/setuptools.html#development-mode -.. _editable install: https://pip.pypa.io/en/stable/reference/pip_install/#editable-installs - ***************************************** I'm getting weird errors... Can you help? ***************************************** diff --git a/packages/google-cloud-bigquery/README.rst b/packages/google-cloud-bigquery/README.rst index c6bc17834f35..c7d50d7291a1 100644 --- a/packages/google-cloud-bigquery/README.rst +++ b/packages/google-cloud-bigquery/README.rst @@ -52,11 +52,14 @@ dependencies. Supported Python Versions ^^^^^^^^^^^^^^^^^^^^^^^^^ -Python >= 3.5 +Python >= 3.6 -Deprecated Python Versions -^^^^^^^^^^^^^^^^^^^^^^^^^^ -Python == 2.7. Python 2.7 support will be removed on January 1, 2020. +Unsupported Python Versions +^^^^^^^^^^^^^^^^^^^^^^^^^^^ +Python == 2.7, Python == 3.5. + +The last version of this library compatible with Python 2.7 and 3.5 is +`google-cloud-bigquery==1.28.0`. Mac/Linux diff --git a/packages/google-cloud-bigquery/UPGRADING.md b/packages/google-cloud-bigquery/UPGRADING.md new file mode 100644 index 000000000000..a4ba0efd2cab --- /dev/null +++ b/packages/google-cloud-bigquery/UPGRADING.md @@ -0,0 +1,59 @@ + + + +# 2.0.0 Migration Guide + +The 2.0 release of the `google-cloud-bigquery` client drops support for Python +versions below 3.6. The client surface itself has not changed, but the 1.x series +will not be receiving any more feature updates or bug fixes. You are thus +encouraged to upgrade to the 2.x series. + +If you experience issues or have questions, please file an +[issue](https://github.com/googleapis/python-bigquery/issues). + + +## Supported Python Versions + +> **WARNING**: Breaking change + +The 2.0.0 release requires Python 3.6+. + + +## Supported BigQuery Storage Clients + +The 2.0.0 release requires BigQuery Storage `>= 2.0.0`, which dropped support +for `v1beta1` and `v1beta2` versions of the BigQuery Storage API. If you want to +use a BigQuery Storage client, it must be the one supporting the `v1` API version. + + +## Changed GAPIC Enums Path + +> **WARNING**: Breaking change + +Generated GAPIC enum types have been moved under `types`. Import paths need to be +adjusted. + +**Before:** +```py +from google.cloud.bigquery_v2.gapic import enums + +distance_type = enums.Model.DistanceType.COSINE +``` + +**After:** +```py +from google.cloud.bigquery_v2 import types + +distance_type = types.Model.DistanceType.COSINE +``` \ No newline at end of file diff --git a/packages/google-cloud-bigquery/docs/UPGRADING.md b/packages/google-cloud-bigquery/docs/UPGRADING.md new file mode 120000 index 000000000000..01097c8c0fb8 --- /dev/null +++ b/packages/google-cloud-bigquery/docs/UPGRADING.md @@ -0,0 +1 @@ +../UPGRADING.md \ No newline at end of file diff --git a/packages/google-cloud-bigquery/docs/bigquery_v2/services.rst b/packages/google-cloud-bigquery/docs/bigquery_v2/services.rst new file mode 100644 index 000000000000..65fbb438c70b --- /dev/null +++ b/packages/google-cloud-bigquery/docs/bigquery_v2/services.rst @@ -0,0 +1,6 @@ +Services for Google Cloud Bigquery v2 API +========================================= + +.. automodule:: google.cloud.bigquery_v2.services.model_service + :members: + :inherited-members: diff --git a/packages/google-cloud-bigquery/docs/bigquery_v2/types.rst b/packages/google-cloud-bigquery/docs/bigquery_v2/types.rst new file mode 100644 index 000000000000..f4380995849e --- /dev/null +++ b/packages/google-cloud-bigquery/docs/bigquery_v2/types.rst @@ -0,0 +1,5 @@ +Types for Google Cloud Bigquery v2 API +====================================== + +.. automodule:: google.cloud.bigquery_v2.types + :members: diff --git a/packages/google-cloud-bigquery/docs/conf.py b/packages/google-cloud-bigquery/docs/conf.py index b38bdd1fffd6..ee59f3492a90 100644 --- a/packages/google-cloud-bigquery/docs/conf.py +++ b/packages/google-cloud-bigquery/docs/conf.py @@ -100,6 +100,7 @@ "samples/AUTHORING_GUIDE.md", "samples/CONTRIBUTING.md", "samples/snippets/README.rst", + "bigquery_v2/services.rst", # generated by the code generator ] # The reST default role (used for this markup: `text`) to use for all diff --git a/packages/google-cloud-bigquery/docs/gapic/v2/enums.rst b/packages/google-cloud-bigquery/docs/gapic/v2/enums.rst deleted file mode 100644 index 0e0f05adaea5..000000000000 --- a/packages/google-cloud-bigquery/docs/gapic/v2/enums.rst +++ /dev/null @@ -1,8 +0,0 @@ -Enums for BigQuery API Client -============================= - -.. autoclass:: google.cloud.bigquery_v2.gapic.enums.Model - :members: - -.. autoclass:: google.cloud.bigquery_v2.gapic.enums.StandardSqlDataType - :members: diff --git a/packages/google-cloud-bigquery/docs/gapic/v2/types.rst b/packages/google-cloud-bigquery/docs/gapic/v2/types.rst deleted file mode 100644 index 99b954eca86d..000000000000 --- a/packages/google-cloud-bigquery/docs/gapic/v2/types.rst +++ /dev/null @@ -1,6 +0,0 @@ -Types for BigQuery API Client -============================= - -.. automodule:: google.cloud.bigquery_v2.types - :members: - :noindex: \ No newline at end of file diff --git a/packages/google-cloud-bigquery/docs/index.rst b/packages/google-cloud-bigquery/docs/index.rst index 62a82e0e95c5..3f8ba23046aa 100644 --- a/packages/google-cloud-bigquery/docs/index.rst +++ b/packages/google-cloud-bigquery/docs/index.rst @@ -27,6 +27,16 @@ API Reference reference dbapi +Migration Guide +--------------- + +See the guide below for instructions on migrating to the 2.x release of this library. + +.. toctree:: + :maxdepth: 2 + + UPGRADING + Changelog --------- diff --git a/packages/google-cloud-bigquery/docs/reference.rst b/packages/google-cloud-bigquery/docs/reference.rst index 981059de5226..21dd8e43d42e 100644 --- a/packages/google-cloud-bigquery/docs/reference.rst +++ b/packages/google-cloud-bigquery/docs/reference.rst @@ -182,6 +182,7 @@ Encryption Configuration encryption_configuration.EncryptionConfiguration + Additional Types ================ @@ -190,5 +191,4 @@ Protocol buffer classes for working with the Models API. .. toctree:: :maxdepth: 2 - gapic/v2/enums - gapic/v2/types + bigquery_v2/types diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/_pandas_helpers.py b/packages/google-cloud-bigquery/google/cloud/bigquery/_pandas_helpers.py index 953b7d0fe199..57c8f95f6f9e 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/_pandas_helpers.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/_pandas_helpers.py @@ -22,11 +22,6 @@ import six from six.moves import queue -try: - from google.cloud import bigquery_storage_v1 -except ImportError: # pragma: NO COVER - bigquery_storage_v1 = None - try: import pandas except ImportError: # pragma: NO COVER @@ -287,14 +282,6 @@ def dataframe_to_bq_schema(dataframe, bq_schema): """ if bq_schema: bq_schema = schema._to_schema_fields(bq_schema) - if six.PY2: - for field in bq_schema: - if field.field_type in schema._STRUCT_TYPES: - raise ValueError( - "Uploading dataframes with struct (record) column types " - "is not supported under Python2. See: " - "https://github.com/googleapis/python-bigquery/issues/21" - ) bq_schema_index = {field.name: field for field in bq_schema} bq_schema_unused = set(bq_schema_index.keys()) else: @@ -578,19 +565,7 @@ def _bqstorage_page_to_dataframe(column_names, dtypes, page): def _download_table_bqstorage_stream( download_state, bqstorage_client, session, stream, worker_queue, page_to_item ): - # Passing a BQ Storage client in implies that the BigQuery Storage library - # is available and can be imported. - from google.cloud import bigquery_storage_v1beta1 - - # We want to preserve comaptibility with the v1beta1 BQ Storage clients, - # thus adjust constructing the rowstream if needed. - # The assumption is that the caller provides a BQ Storage `session` that is - # compatible with the version of the BQ Storage client passed in. - if isinstance(bqstorage_client, bigquery_storage_v1beta1.BigQueryStorageClient): - position = bigquery_storage_v1beta1.types.StreamPosition(stream=stream) - rowstream = bqstorage_client.read_rows(position).rows(session) - else: - rowstream = bqstorage_client.read_rows(stream.name).rows(session) + rowstream = bqstorage_client.read_rows(stream.name).rows(session) for page in rowstream.pages: if download_state.done: @@ -625,8 +600,7 @@ def _download_table_bqstorage( # Passing a BQ Storage client in implies that the BigQuery Storage library # is available and can be imported. - from google.cloud import bigquery_storage_v1 - from google.cloud import bigquery_storage_v1beta1 + from google.cloud import bigquery_storage if "$" in table.table_id: raise ValueError( @@ -637,41 +611,18 @@ def _download_table_bqstorage( requested_streams = 1 if preserve_order else 0 - # We want to preserve comaptibility with the v1beta1 BQ Storage clients, - # thus adjust the session creation if needed. - if isinstance(bqstorage_client, bigquery_storage_v1beta1.BigQueryStorageClient): - warnings.warn( - "Support for BigQuery Storage v1beta1 clients is deprecated, please " - "consider upgrading the client to BigQuery Storage v1 stable version.", - category=DeprecationWarning, - ) - read_options = bigquery_storage_v1beta1.types.TableReadOptions() - - if selected_fields is not None: - for field in selected_fields: - read_options.selected_fields.append(field.name) - - session = bqstorage_client.create_read_session( - table.to_bqstorage(v1beta1=True), - "projects/{}".format(project_id), - format_=bigquery_storage_v1beta1.enums.DataFormat.ARROW, - read_options=read_options, - requested_streams=requested_streams, - ) - else: - requested_session = bigquery_storage_v1.types.ReadSession( - table=table.to_bqstorage(), - data_format=bigquery_storage_v1.enums.DataFormat.ARROW, - ) - if selected_fields is not None: - for field in selected_fields: - requested_session.read_options.selected_fields.append(field.name) - - session = bqstorage_client.create_read_session( - parent="projects/{}".format(project_id), - read_session=requested_session, - max_stream_count=requested_streams, - ) + requested_session = bigquery_storage.types.ReadSession( + table=table.to_bqstorage(), data_format=bigquery_storage.types.DataFormat.ARROW + ) + if selected_fields is not None: + for field in selected_fields: + requested_session.read_options.selected_fields.append(field.name) + + session = bqstorage_client.create_read_session( + parent="projects/{}".format(project_id), + read_session=requested_session, + max_stream_count=requested_streams, + ) _LOGGER.debug( "Started reading table '{}.{}.{}' with BQ Storage API session '{}'.".format( diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py index d2aa45999809..fcb18385d17f 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py @@ -17,11 +17,7 @@ from __future__ import absolute_import from __future__ import division -try: - from collections import abc as collections_abc -except ImportError: # Python 2.7 - import collections as collections_abc - +from collections import abc as collections_abc import copy import functools import gzip @@ -435,11 +431,11 @@ def _create_bqstorage_client(self): warning and return ``None``. Returns: - Optional[google.cloud.bigquery_storage_v1.BigQueryReadClient]: + Optional[google.cloud.bigquery_storage.BigQueryReadClient]: A BigQuery Storage API client. """ try: - from google.cloud import bigquery_storage_v1 + from google.cloud import bigquery_storage except ImportError: warnings.warn( "Cannot create BigQuery Storage client, the dependency " @@ -447,7 +443,7 @@ def _create_bqstorage_client(self): ) return None - return bigquery_storage_v1.BigQueryReadClient(credentials=self._credentials) + return bigquery_storage.BigQueryReadClient(credentials=self._credentials) def create_dataset( self, dataset, exists_ok=False, retry=DEFAULT_RETRY, timeout=None diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/dbapi/_helpers.py b/packages/google-cloud-bigquery/google/cloud/bigquery/dbapi/_helpers.py index 1bcf45f3163b..fdf4e17c30c1 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/dbapi/_helpers.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/dbapi/_helpers.py @@ -12,11 +12,8 @@ # See the License for the specific language governing permissions and # limitations under the License. -try: - from collections import abc as collections_abc -except ImportError: # Python 2.7 - import collections as collections_abc +from collections import abc as collections_abc import datetime import decimal import functools diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/dbapi/connection.py b/packages/google-cloud-bigquery/google/cloud/bigquery/dbapi/connection.py index 464b0fd066bf..300c77dc9f82 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/dbapi/connection.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/dbapi/connection.py @@ -73,7 +73,7 @@ def close(self): if self._owns_bqstorage_client: # There is no close() on the BQ Storage client itself. - self._bqstorage_client.transport.channel.close() + self._bqstorage_client._transport.grpc_channel.close() for cursor_ in self._cursors_created: cursor_.close() diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/dbapi/cursor.py b/packages/google-cloud-bigquery/google/cloud/bigquery/dbapi/cursor.py index 7a10637f0669..63264e9abc5a 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/dbapi/cursor.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/dbapi/cursor.py @@ -15,14 +15,8 @@ """Cursor for the Google BigQuery DB-API.""" import collections +from collections import abc as collections_abc import copy -import warnings - -try: - from collections import abc as collections_abc -except ImportError: # Python 2.7 - import collections as collections_abc - import logging import six @@ -267,54 +261,27 @@ def _bqstorage_fetch(self, bqstorage_client): A sequence of rows, represented as dictionaries. """ # Hitting this code path with a BQ Storage client instance implies that - # bigquery_storage_v1* can indeed be imported here without errors. - from google.cloud import bigquery_storage_v1 - from google.cloud import bigquery_storage_v1beta1 + # bigquery_storage can indeed be imported here without errors. + from google.cloud import bigquery_storage table_reference = self._query_job.destination - is_v1beta1_client = isinstance( - bqstorage_client, bigquery_storage_v1beta1.BigQueryStorageClient + requested_session = bigquery_storage.types.ReadSession( + table=table_reference.to_bqstorage(), + data_format=bigquery_storage.types.DataFormat.ARROW, + ) + read_session = bqstorage_client.create_read_session( + parent="projects/{}".format(table_reference.project), + read_session=requested_session, + # a single stream only, as DB API is not well-suited for multithreading + max_stream_count=1, ) - - # We want to preserve compatibility with the v1beta1 BQ Storage clients, - # thus adjust the session creation if needed. - if is_v1beta1_client: - warnings.warn( - "Support for BigQuery Storage v1beta1 clients is deprecated, please " - "consider upgrading the client to BigQuery Storage v1 stable version.", - category=DeprecationWarning, - ) - read_session = bqstorage_client.create_read_session( - table_reference.to_bqstorage(v1beta1=True), - "projects/{}".format(table_reference.project), - # a single stream only, as DB API is not well-suited for multithreading - requested_streams=1, - format_=bigquery_storage_v1beta1.enums.DataFormat.ARROW, - ) - else: - requested_session = bigquery_storage_v1.types.ReadSession( - table=table_reference.to_bqstorage(), - data_format=bigquery_storage_v1.enums.DataFormat.ARROW, - ) - read_session = bqstorage_client.create_read_session( - parent="projects/{}".format(table_reference.project), - read_session=requested_session, - # a single stream only, as DB API is not well-suited for multithreading - max_stream_count=1, - ) if not read_session.streams: return iter([]) # empty table, nothing to read - if is_v1beta1_client: - read_position = bigquery_storage_v1beta1.types.StreamPosition( - stream=read_session.streams[0], - ) - read_rows_stream = bqstorage_client.read_rows(read_position) - else: - stream_name = read_session.streams[0].name - read_rows_stream = bqstorage_client.read_rows(stream_name) + stream_name = read_session.streams[0].name + read_rows_stream = bqstorage_client.read_rows(stream_name) rows_iterable = read_rows_stream.rows(read_session) return rows_iterable diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/enums.py b/packages/google-cloud-bigquery/google/cloud/bigquery/enums.py index 29fe543f6505..3247372e3660 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/enums.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/enums.py @@ -17,7 +17,7 @@ import enum import six -from google.cloud.bigquery_v2.gapic import enums as gapic_enums +from google.cloud.bigquery_v2 import types as gapic_types _SQL_SCALAR_TYPES = frozenset( @@ -46,13 +46,13 @@ def _make_sql_scalars_enum(): "StandardSqlDataTypes", ( (member.name, member.value) - for member in gapic_enums.StandardSqlDataType.TypeKind + for member in gapic_types.StandardSqlDataType.TypeKind if member.name in _SQL_SCALAR_TYPES ), ) # make sure the docstring for the new enum is also correct - orig_doc = gapic_enums.StandardSqlDataType.TypeKind.__doc__ + orig_doc = gapic_types.StandardSqlDataType.TypeKind.__doc__ skip_pattern = re.compile( "|".join(_SQL_NONSCALAR_TYPES) + "|because a JSON object" # the second description line of STRUCT member diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/magics/magics.py b/packages/google-cloud-bigquery/google/cloud/bigquery/magics/magics.py index 4842c76803a2..22175ee45e4c 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/magics/magics.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/magics/magics.py @@ -637,7 +637,7 @@ def _make_bqstorage_client(use_bqstorage_api, credentials): return None try: - from google.cloud import bigquery_storage_v1 + from google.cloud import bigquery_storage except ImportError as err: customized_error = ImportError( "The default BigQuery Storage API client cannot be used, install " @@ -655,7 +655,7 @@ def _make_bqstorage_client(use_bqstorage_api, credentials): ) six.raise_from(customized_error, err) - return bigquery_storage_v1.BigQueryReadClient( + return bigquery_storage.BigQueryReadClient( credentials=credentials, client_info=gapic_client_info.ClientInfo(user_agent=IPYTHON_USER_AGENT), ) @@ -670,10 +670,10 @@ def _close_transports(client, bqstorage_client): Args: client (:class:`~google.cloud.bigquery.client.Client`): bqstorage_client - (Optional[:class:`~google.cloud.bigquery_storage_v1.BigQueryReadClient`]): + (Optional[:class:`~google.cloud.bigquery_storage.BigQueryReadClient`]): A client for the BigQuery Storage API. """ client.close() if bqstorage_client is not None: - bqstorage_client.transport.channel.close() + bqstorage_client._transport.grpc_channel.close() diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/model.py b/packages/google-cloud-bigquery/google/cloud/bigquery/model.py index d3fe8a9379af..092d98c2e9b2 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/model.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/model.py @@ -55,7 +55,7 @@ class Model(object): def __init__(self, model_ref): # Use _proto on read-only properties to use it's built-in type # conversion. - self._proto = types.Model() + self._proto = types.Model()._pb # Use _properties on read-write properties to match the REST API # semantics. The BigQuery API makes a distinction between an unset @@ -151,13 +151,13 @@ def modified(self): @property def model_type(self): - """google.cloud.bigquery_v2.gapic.enums.Model.ModelType: Type of the + """google.cloud.bigquery_v2.types.Model.ModelType: Type of the model resource. Read-only. The value is one of elements of the - :class:`~google.cloud.bigquery_v2.gapic.enums.Model.ModelType` + :class:`~google.cloud.bigquery_v2.types.Model.ModelType` enumeration. """ return self._proto.model_type @@ -306,7 +306,7 @@ def from_api_repr(cls, resource): training_run["startTime"] = datetime_helpers.to_rfc3339(start_time) this._proto = json_format.ParseDict( - resource, types.Model(), ignore_unknown_fields=True + resource, types.Model()._pb, ignore_unknown_fields=True ) return this @@ -326,7 +326,7 @@ class ModelReference(object): """ def __init__(self): - self._proto = types.ModelReference() + self._proto = types.ModelReference()._pb self._properties = {} @property @@ -370,8 +370,9 @@ def from_api_repr(cls, resource): # field values. ref._properties = resource ref._proto = json_format.ParseDict( - resource, types.ModelReference(), ignore_unknown_fields=True + resource, types.ModelReference()._pb, ignore_unknown_fields=True ) + return ref @classmethod diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/routine.py b/packages/google-cloud-bigquery/google/cloud/bigquery/routine.py index 03423c01b71a..6a0ed9fb0f92 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/routine.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/routine.py @@ -189,14 +189,17 @@ def return_type(self): resource = self._properties.get(self._PROPERTY_TO_API_FIELD["return_type"]) if not resource: return resource + output = google.cloud.bigquery_v2.types.StandardSqlDataType() - output = json_format.ParseDict(resource, output, ignore_unknown_fields=True) - return output + raw_protobuf = json_format.ParseDict( + resource, output._pb, ignore_unknown_fields=True + ) + return type(output).wrap(raw_protobuf) @return_type.setter def return_type(self, value): if value: - resource = json_format.MessageToDict(value) + resource = json_format.MessageToDict(value._pb) else: resource = None self._properties[self._PROPERTY_TO_API_FIELD["return_type"]] = resource @@ -357,14 +360,17 @@ def data_type(self): resource = self._properties.get(self._PROPERTY_TO_API_FIELD["data_type"]) if not resource: return resource + output = google.cloud.bigquery_v2.types.StandardSqlDataType() - output = json_format.ParseDict(resource, output, ignore_unknown_fields=True) - return output + raw_protobuf = json_format.ParseDict( + resource, output._pb, ignore_unknown_fields=True + ) + return type(output).wrap(raw_protobuf) @data_type.setter def data_type(self, value): if value: - resource = json_format.MessageToDict(value) + resource = json_format.MessageToDict(value._pb) else: resource = None self._properties[self._PROPERTY_TO_API_FIELD["data_type"]] = resource diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/schema.py b/packages/google-cloud-bigquery/google/cloud/bigquery/schema.py index c1b2588be811..8ae0a3a85e93 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/schema.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/schema.py @@ -25,22 +25,22 @@ # https://cloud.google.com/bigquery/data-types#legacy_sql_data_types # https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types LEGACY_TO_STANDARD_TYPES = { - "STRING": types.StandardSqlDataType.STRING, - "BYTES": types.StandardSqlDataType.BYTES, - "INTEGER": types.StandardSqlDataType.INT64, - "INT64": types.StandardSqlDataType.INT64, - "FLOAT": types.StandardSqlDataType.FLOAT64, - "FLOAT64": types.StandardSqlDataType.FLOAT64, - "NUMERIC": types.StandardSqlDataType.NUMERIC, - "BOOLEAN": types.StandardSqlDataType.BOOL, - "BOOL": types.StandardSqlDataType.BOOL, - "GEOGRAPHY": types.StandardSqlDataType.GEOGRAPHY, - "RECORD": types.StandardSqlDataType.STRUCT, - "STRUCT": types.StandardSqlDataType.STRUCT, - "TIMESTAMP": types.StandardSqlDataType.TIMESTAMP, - "DATE": types.StandardSqlDataType.DATE, - "TIME": types.StandardSqlDataType.TIME, - "DATETIME": types.StandardSqlDataType.DATETIME, + "STRING": types.StandardSqlDataType.TypeKind.STRING, + "BYTES": types.StandardSqlDataType.TypeKind.BYTES, + "INTEGER": types.StandardSqlDataType.TypeKind.INT64, + "INT64": types.StandardSqlDataType.TypeKind.INT64, + "FLOAT": types.StandardSqlDataType.TypeKind.FLOAT64, + "FLOAT64": types.StandardSqlDataType.TypeKind.FLOAT64, + "NUMERIC": types.StandardSqlDataType.TypeKind.NUMERIC, + "BOOLEAN": types.StandardSqlDataType.TypeKind.BOOL, + "BOOL": types.StandardSqlDataType.TypeKind.BOOL, + "GEOGRAPHY": types.StandardSqlDataType.TypeKind.GEOGRAPHY, + "RECORD": types.StandardSqlDataType.TypeKind.STRUCT, + "STRUCT": types.StandardSqlDataType.TypeKind.STRUCT, + "TIMESTAMP": types.StandardSqlDataType.TypeKind.TIMESTAMP, + "DATE": types.StandardSqlDataType.TypeKind.DATE, + "TIME": types.StandardSqlDataType.TypeKind.TIME, + "DATETIME": types.StandardSqlDataType.TypeKind.DATETIME, # no direct conversion from ARRAY, the latter is represented by mode="REPEATED" } """String names of the legacy SQL types to integer codes of Standard SQL types.""" @@ -209,26 +209,34 @@ def to_standard_sql(self): sql_type = types.StandardSqlDataType() if self.mode == "REPEATED": - sql_type.type_kind = types.StandardSqlDataType.ARRAY + sql_type.type_kind = types.StandardSqlDataType.TypeKind.ARRAY else: sql_type.type_kind = LEGACY_TO_STANDARD_TYPES.get( - self.field_type, types.StandardSqlDataType.TYPE_KIND_UNSPECIFIED + self.field_type, + types.StandardSqlDataType.TypeKind.TYPE_KIND_UNSPECIFIED, ) - if sql_type.type_kind == types.StandardSqlDataType.ARRAY: # noqa: E721 + if sql_type.type_kind == types.StandardSqlDataType.TypeKind.ARRAY: # noqa: E721 array_element_type = LEGACY_TO_STANDARD_TYPES.get( - self.field_type, types.StandardSqlDataType.TYPE_KIND_UNSPECIFIED + self.field_type, + types.StandardSqlDataType.TypeKind.TYPE_KIND_UNSPECIFIED, ) sql_type.array_element_type.type_kind = array_element_type # ARRAY cannot directly contain other arrays, only scalar types and STRUCTs # https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#array-type - if array_element_type == types.StandardSqlDataType.STRUCT: # noqa: E721 + if ( + array_element_type + == types.StandardSqlDataType.TypeKind.STRUCT # noqa: E721 + ): sql_type.array_element_type.struct_type.fields.extend( field.to_standard_sql() for field in self.fields ) - elif sql_type.type_kind == types.StandardSqlDataType.STRUCT: # noqa: E721 + elif ( + sql_type.type_kind + == types.StandardSqlDataType.TypeKind.STRUCT # noqa: E721 + ): sql_type.struct_type.fields.extend( field.to_standard_sql() for field in self.fields ) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py index d9e5f7773af1..902a7040a315 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py @@ -26,12 +26,6 @@ import six -try: - # Needed for the to_bqstorage() method. - from google.cloud import bigquery_storage_v1beta1 -except ImportError: # pragma: NO COVER - bigquery_storage_v1beta1 = None - try: import pandas except ImportError: # pragma: NO COVER @@ -228,7 +222,7 @@ def to_api_repr(self): "tableId": self._table_id, } - def to_bqstorage(self, v1beta1=False): + def to_bqstorage(self): """Construct a BigQuery Storage API representation of this table. Install the ``google-cloud-bigquery-storage`` package to use this @@ -237,41 +231,21 @@ def to_bqstorage(self, v1beta1=False): If the ``table_id`` contains a partition identifier (e.g. ``my_table$201812``) or a snapshot identifier (e.g. ``mytable@1234567890``), it is ignored. Use - :class:`google.cloud.bigquery_storage_v1.types.ReadSession.TableReadOptions` + :class:`google.cloud.bigquery_storage.types.ReadSession.TableReadOptions` to filter rows by partition. Use - :class:`google.cloud.bigquery_storage_v1.types.ReadSession.TableModifiers` + :class:`google.cloud.bigquery_storage.types.ReadSession.TableModifiers` to select a specific snapshot to read from. - Args: - v1beta1 (Optiona[bool]): - If :data:`True`, return representation compatible with BigQuery - Storage ``v1beta1`` version. Defaults to :data:`False`. - Returns: - Union[str, google.cloud.bigquery_storage_v1beta1.types.TableReference:]: - A reference to this table in the BigQuery Storage API. - - Raises: - ValueError: - If ``v1beta1`` compatibility is requested, but the - :mod:`google.cloud.bigquery_storage_v1beta1` module cannot be imported. + str: A reference to this table in the BigQuery Storage API. """ - if v1beta1 and bigquery_storage_v1beta1 is None: - raise ValueError(_NO_BQSTORAGE_ERROR) table_id, _, _ = self._table_id.partition("@") table_id, _, _ = table_id.partition("$") - if v1beta1: - table_ref = bigquery_storage_v1beta1.types.TableReference( - project_id=self._project, - dataset_id=self._dataset_id, - table_id=table_id, - ) - else: - table_ref = "projects/{}/datasets/{}/tables/{}".format( - self._project, self._dataset_id, table_id, - ) + table_ref = "projects/{}/datasets/{}/tables/{}".format( + self._project, self._dataset_id, table_id, + ) return table_ref @@ -876,19 +850,13 @@ def to_api_repr(self): """ return copy.deepcopy(self._properties) - def to_bqstorage(self, v1beta1=False): + def to_bqstorage(self): """Construct a BigQuery Storage API representation of this table. - Args: - v1beta1 (Optiona[bool]): - If :data:`True`, return representation compatible with BigQuery - Storage ``v1beta1`` version. Defaults to :data:`False`. - Returns: - Union[str, google.cloud.bigquery_storage_v1beta1.types.TableReference:]: - A reference to this table in the BigQuery Storage API. + str: A reference to this table in the BigQuery Storage API. """ - return self.reference.to_bqstorage(v1beta1=v1beta1) + return self.reference.to_bqstorage() def _build_resource(self, filter_fields): """Generate a resource for ``update``.""" @@ -1096,19 +1064,13 @@ def from_string(cls, full_table_id): {"tableReference": TableReference.from_string(full_table_id).to_api_repr()} ) - def to_bqstorage(self, v1beta1=False): + def to_bqstorage(self): """Construct a BigQuery Storage API representation of this table. - Args: - v1beta1 (Optiona[bool]): - If :data:`True`, return representation compatible with BigQuery - Storage ``v1beta1`` version. Defaults to :data:`False`. - Returns: - Union[str, google.cloud.bigquery_storage_v1beta1.types.TableReference:]: - A reference to this table in the BigQuery Storage API. + str: A reference to this table in the BigQuery Storage API. """ - return self.reference.to_bqstorage(v1beta1=v1beta1) + return self.reference.to_bqstorage() def _row_from_mapping(mapping, schema): @@ -1559,7 +1521,7 @@ def to_arrow( progress_bar.close() finally: if owns_bqstorage_client: - bqstorage_client.transport.channel.close() + bqstorage_client._transport.grpc_channel.close() if record_batches: return pyarrow.Table.from_batches(record_batches) @@ -1731,28 +1693,22 @@ def to_dataframe( # When converting timestamp values to nanosecond precision, the result # can be out of pyarrow bounds. To avoid the error when converting to # Pandas, we set the timestamp_as_object parameter to True, if necessary. - # - # NOTE: Python 3+ only, as timestamp_as_object parameter is only supported - # in pyarrow>=1.0, but the latter is not compatible with Python 2. - if six.PY2: - extra_kwargs = {} + types_to_check = { + pyarrow.timestamp("us"), + pyarrow.timestamp("us", tz=pytz.UTC), + } + + for column in record_batch: + if column.type in types_to_check: + try: + column.cast("timestamp[ns]") + except pyarrow.lib.ArrowInvalid: + timestamp_as_object = True + break else: - types_to_check = { - pyarrow.timestamp("us"), - pyarrow.timestamp("us", tz=pytz.UTC), - } - - for column in record_batch: - if column.type in types_to_check: - try: - column.cast("timestamp[ns]") - except pyarrow.lib.ArrowInvalid: - timestamp_as_object = True - break - else: - timestamp_as_object = False - - extra_kwargs = {"timestamp_as_object": timestamp_as_object} + timestamp_as_object = False + + extra_kwargs = {"timestamp_as_object": timestamp_as_object} df = record_batch.to_pandas(date_as_object=date_as_object, **extra_kwargs) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery_v2/__init__.py b/packages/google-cloud-bigquery/google/cloud/bigquery_v2/__init__.py index e582214329f3..c1989c3b093f 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery_v2/__init__.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery_v2/__init__.py @@ -1,33 +1,44 @@ # -*- coding: utf-8 -*- -# -# Copyright 2018 Google LLC + +# Copyright 2020 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # -# https://www.apache.org/licenses/LICENSE-2.0 +# http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +# -from __future__ import absolute_import - -import pkg_resources - -__version__ = pkg_resources.get_distribution("google-cloud-bigquery").version # noqa -from google.cloud.bigquery_v2 import types -from google.cloud.bigquery_v2.gapic import enums +from .types.encryption_config import EncryptionConfiguration +from .types.model import DeleteModelRequest +from .types.model import GetModelRequest +from .types.model import ListModelsRequest +from .types.model import ListModelsResponse +from .types.model import Model +from .types.model import PatchModelRequest +from .types.model_reference import ModelReference +from .types.standard_sql import StandardSqlDataType +from .types.standard_sql import StandardSqlField +from .types.standard_sql import StandardSqlStructType __all__ = ( - # google.cloud.bigquery_v2 - "__version__", - "types", - # google.cloud.bigquery_v2 - "enums", + "DeleteModelRequest", + "EncryptionConfiguration", + "GetModelRequest", + "ListModelsRequest", + "ListModelsResponse", + "Model", + "ModelReference", + "PatchModelRequest", + "StandardSqlDataType", + "StandardSqlField", + "StandardSqlStructType", ) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery_v2/gapic/__init__.py b/packages/google-cloud-bigquery/google/cloud/bigquery_v2/gapic/__init__.py deleted file mode 100644 index e69de29bb2d1..000000000000 diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery_v2/gapic/enums.py b/packages/google-cloud-bigquery/google/cloud/bigquery_v2/gapic/enums.py deleted file mode 100644 index 10d7c2517296..000000000000 --- a/packages/google-cloud-bigquery/google/cloud/bigquery_v2/gapic/enums.py +++ /dev/null @@ -1,171 +0,0 @@ -# -*- coding: utf-8 -*- -# -# Copyright 2020 Google LLC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -"""Wrappers for protocol buffer enum types.""" - -import enum - - -class Model(object): - class DataSplitMethod(enum.IntEnum): - """ - Indicates the method to split input data into multiple tables. - - Attributes: - DATA_SPLIT_METHOD_UNSPECIFIED (int) - RANDOM (int): Splits data randomly. - CUSTOM (int): Splits data with the user provided tags. - SEQUENTIAL (int): Splits data sequentially. - NO_SPLIT (int): Data split will be skipped. - AUTO_SPLIT (int): Splits data automatically: Uses NO_SPLIT if the data size is small. - Otherwise uses RANDOM. - """ - - DATA_SPLIT_METHOD_UNSPECIFIED = 0 - RANDOM = 1 - CUSTOM = 2 - SEQUENTIAL = 3 - NO_SPLIT = 4 - AUTO_SPLIT = 5 - - class DistanceType(enum.IntEnum): - """ - Distance metric used to compute the distance between two points. - - Attributes: - DISTANCE_TYPE_UNSPECIFIED (int) - EUCLIDEAN (int): Eculidean distance. - COSINE (int): Cosine distance. - """ - - DISTANCE_TYPE_UNSPECIFIED = 0 - EUCLIDEAN = 1 - COSINE = 2 - - class LearnRateStrategy(enum.IntEnum): - """ - Indicates the learning rate optimization strategy to use. - - Attributes: - LEARN_RATE_STRATEGY_UNSPECIFIED (int) - LINE_SEARCH (int): Use line search to determine learning rate. - CONSTANT (int): Use a constant learning rate. - """ - - LEARN_RATE_STRATEGY_UNSPECIFIED = 0 - LINE_SEARCH = 1 - CONSTANT = 2 - - class LossType(enum.IntEnum): - """ - Loss metric to evaluate model training performance. - - Attributes: - LOSS_TYPE_UNSPECIFIED (int) - MEAN_SQUARED_LOSS (int): Mean squared loss, used for linear regression. - MEAN_LOG_LOSS (int): Mean log loss, used for logistic regression. - """ - - LOSS_TYPE_UNSPECIFIED = 0 - MEAN_SQUARED_LOSS = 1 - MEAN_LOG_LOSS = 2 - - class ModelType(enum.IntEnum): - """ - Indicates the type of the Model. - - Attributes: - MODEL_TYPE_UNSPECIFIED (int) - LINEAR_REGRESSION (int): Linear regression model. - LOGISTIC_REGRESSION (int): Logistic regression based classification model. - KMEANS (int): K-means clustering model. - TENSORFLOW (int): [Beta] An imported TensorFlow model. - """ - - MODEL_TYPE_UNSPECIFIED = 0 - LINEAR_REGRESSION = 1 - LOGISTIC_REGRESSION = 2 - KMEANS = 3 - TENSORFLOW = 6 - - class OptimizationStrategy(enum.IntEnum): - """ - Indicates the optimization strategy used for training. - - Attributes: - OPTIMIZATION_STRATEGY_UNSPECIFIED (int) - BATCH_GRADIENT_DESCENT (int): Uses an iterative batch gradient descent algorithm. - NORMAL_EQUATION (int): Uses a normal equation to solve linear regression problem. - """ - - OPTIMIZATION_STRATEGY_UNSPECIFIED = 0 - BATCH_GRADIENT_DESCENT = 1 - NORMAL_EQUATION = 2 - - class KmeansEnums(object): - class KmeansInitializationMethod(enum.IntEnum): - """ - Indicates the method used to initialize the centroids for KMeans - clustering algorithm. - - Attributes: - KMEANS_INITIALIZATION_METHOD_UNSPECIFIED (int) - RANDOM (int): Initializes the centroids randomly. - CUSTOM (int): Initializes the centroids using data specified in - kmeans_initialization_column. - """ - - KMEANS_INITIALIZATION_METHOD_UNSPECIFIED = 0 - RANDOM = 1 - CUSTOM = 2 - - -class StandardSqlDataType(object): - class TypeKind(enum.IntEnum): - """ - Attributes: - TYPE_KIND_UNSPECIFIED (int): Invalid type. - INT64 (int): Encoded as a string in decimal format. - BOOL (int): Encoded as a boolean "false" or "true". - FLOAT64 (int): Encoded as a number, or string "NaN", "Infinity" or "-Infinity". - STRING (int): Encoded as a string value. - BYTES (int): Encoded as a base64 string per RFC 4648, section 4. - TIMESTAMP (int): Encoded as an RFC 3339 timestamp with mandatory "Z" time zone string: - 1985-04-12T23:20:50.52Z - DATE (int): Encoded as RFC 3339 full-date format string: 1985-04-12 - TIME (int): Encoded as RFC 3339 partial-time format string: 23:20:50.52 - DATETIME (int): Encoded as RFC 3339 full-date "T" partial-time: 1985-04-12T23:20:50.52 - GEOGRAPHY (int): Encoded as WKT - NUMERIC (int): Encoded as a decimal string. - ARRAY (int): Encoded as a list with types matching Type.array_type. - STRUCT (int): Encoded as a list with fields of type Type.struct_type[i]. List is - used because a JSON object cannot have duplicate field names. - """ - - TYPE_KIND_UNSPECIFIED = 0 - INT64 = 2 - BOOL = 5 - FLOAT64 = 7 - STRING = 8 - BYTES = 9 - TIMESTAMP = 19 - DATE = 10 - TIME = 20 - DATETIME = 21 - GEOGRAPHY = 22 - NUMERIC = 23 - ARRAY = 16 - STRUCT = 17 diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery_v2/proto/__init__.py b/packages/google-cloud-bigquery/google/cloud/bigquery_v2/proto/__init__.py deleted file mode 100644 index e69de29bb2d1..000000000000 diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery_v2/proto/encryption_config_pb2_grpc.py b/packages/google-cloud-bigquery/google/cloud/bigquery_v2/proto/encryption_config_pb2_grpc.py deleted file mode 100644 index 8a9393943bdf..000000000000 --- a/packages/google-cloud-bigquery/google/cloud/bigquery_v2/proto/encryption_config_pb2_grpc.py +++ /dev/null @@ -1,3 +0,0 @@ -# Generated by the gRPC Python protocol compiler plugin. DO NOT EDIT! -"""Client and server classes corresponding to protobuf-defined services.""" -import grpc diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery_v2/proto/location_metadata_pb2.py b/packages/google-cloud-bigquery/google/cloud/bigquery_v2/proto/location_metadata_pb2.py deleted file mode 100644 index 6dd9da52e4ed..000000000000 --- a/packages/google-cloud-bigquery/google/cloud/bigquery_v2/proto/location_metadata_pb2.py +++ /dev/null @@ -1,98 +0,0 @@ -# Generated by the protocol buffer compiler. DO NOT EDIT! -# source: google/cloud/bigquery_v2/proto/location_metadata.proto - -import sys - -_b = sys.version_info[0] < 3 and (lambda x: x) or (lambda x: x.encode("latin1")) -from google.protobuf import descriptor as _descriptor -from google.protobuf import message as _message -from google.protobuf import reflection as _reflection -from google.protobuf import symbol_database as _symbol_database - -# @@protoc_insertion_point(imports) - -_sym_db = _symbol_database.Default() - - -from google.api import annotations_pb2 as google_dot_api_dot_annotations__pb2 - - -DESCRIPTOR = _descriptor.FileDescriptor( - name="google/cloud/bigquery_v2/proto/location_metadata.proto", - package="google.cloud.bigquery.v2", - syntax="proto3", - serialized_options=_b( - "\n\034com.google.cloud.bigquery.v2B\025LocationMetadataProtoZ@google.golang.org/genproto/googleapis/cloud/bigquery/v2;bigquery" - ), - serialized_pb=_b( - '\n6google/cloud/bigquery_v2/proto/location_metadata.proto\x12\x18google.cloud.bigquery.v2\x1a\x1cgoogle/api/annotations.proto".\n\x10LocationMetadata\x12\x1a\n\x12legacy_location_id\x18\x01 \x01(\tBw\n\x1c\x63om.google.cloud.bigquery.v2B\x15LocationMetadataProtoZ@google.golang.org/genproto/googleapis/cloud/bigquery/v2;bigqueryb\x06proto3' - ), - dependencies=[google_dot_api_dot_annotations__pb2.DESCRIPTOR], -) - - -_LOCATIONMETADATA = _descriptor.Descriptor( - name="LocationMetadata", - full_name="google.cloud.bigquery.v2.LocationMetadata", - filename=None, - file=DESCRIPTOR, - containing_type=None, - fields=[ - _descriptor.FieldDescriptor( - name="legacy_location_id", - full_name="google.cloud.bigquery.v2.LocationMetadata.legacy_location_id", - index=0, - number=1, - type=9, - cpp_type=9, - label=1, - has_default_value=False, - default_value=_b("").decode("utf-8"), - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - ) - ], - extensions=[], - nested_types=[], - enum_types=[], - serialized_options=None, - is_extendable=False, - syntax="proto3", - extension_ranges=[], - oneofs=[], - serialized_start=114, - serialized_end=160, -) - -DESCRIPTOR.message_types_by_name["LocationMetadata"] = _LOCATIONMETADATA -_sym_db.RegisterFileDescriptor(DESCRIPTOR) - -LocationMetadata = _reflection.GeneratedProtocolMessageType( - "LocationMetadata", - (_message.Message,), - dict( - DESCRIPTOR=_LOCATIONMETADATA, - __module__="google.cloud.bigquery_v2.proto.location_metadata_pb2", - __doc__="""BigQuery-specific metadata about a location. This will be set on - google.cloud.location.Location.metadata in Cloud Location API responses. - - - Attributes: - legacy_location_id: - The legacy BigQuery location ID, e.g. ``EU`` for the ``europe`` - location. This is for any API consumers that need the legacy - ``US`` and ``EU`` locations. - """, - # @@protoc_insertion_point(class_scope:google.cloud.bigquery.v2.LocationMetadata) - ), -) -_sym_db.RegisterMessage(LocationMetadata) - - -DESCRIPTOR._options = None -# @@protoc_insertion_point(module_scope) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery_v2/proto/location_metadata_pb2_grpc.py b/packages/google-cloud-bigquery/google/cloud/bigquery_v2/proto/location_metadata_pb2_grpc.py deleted file mode 100644 index 07cb78fe03a9..000000000000 --- a/packages/google-cloud-bigquery/google/cloud/bigquery_v2/proto/location_metadata_pb2_grpc.py +++ /dev/null @@ -1,2 +0,0 @@ -# Generated by the gRPC Python protocol compiler plugin. DO NOT EDIT! -import grpc diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery_v2/proto/model_pb2_grpc.py b/packages/google-cloud-bigquery/google/cloud/bigquery_v2/proto/model_pb2_grpc.py deleted file mode 100644 index 13db95717f60..000000000000 --- a/packages/google-cloud-bigquery/google/cloud/bigquery_v2/proto/model_pb2_grpc.py +++ /dev/null @@ -1,214 +0,0 @@ -# Generated by the gRPC Python protocol compiler plugin. DO NOT EDIT! -"""Client and server classes corresponding to protobuf-defined services.""" -import grpc - -from google.cloud.bigquery_v2.proto import ( - model_pb2 as google_dot_cloud_dot_bigquery__v2_dot_proto_dot_model__pb2, -) -from google.protobuf import empty_pb2 as google_dot_protobuf_dot_empty__pb2 - - -class ModelServiceStub(object): - """Missing associated documentation comment in .proto file.""" - - def __init__(self, channel): - """Constructor. - - Args: - channel: A grpc.Channel. - """ - self.GetModel = channel.unary_unary( - "/google.cloud.bigquery.v2.ModelService/GetModel", - request_serializer=google_dot_cloud_dot_bigquery__v2_dot_proto_dot_model__pb2.GetModelRequest.SerializeToString, - response_deserializer=google_dot_cloud_dot_bigquery__v2_dot_proto_dot_model__pb2.Model.FromString, - ) - self.ListModels = channel.unary_unary( - "/google.cloud.bigquery.v2.ModelService/ListModels", - request_serializer=google_dot_cloud_dot_bigquery__v2_dot_proto_dot_model__pb2.ListModelsRequest.SerializeToString, - response_deserializer=google_dot_cloud_dot_bigquery__v2_dot_proto_dot_model__pb2.ListModelsResponse.FromString, - ) - self.PatchModel = channel.unary_unary( - "/google.cloud.bigquery.v2.ModelService/PatchModel", - request_serializer=google_dot_cloud_dot_bigquery__v2_dot_proto_dot_model__pb2.PatchModelRequest.SerializeToString, - response_deserializer=google_dot_cloud_dot_bigquery__v2_dot_proto_dot_model__pb2.Model.FromString, - ) - self.DeleteModel = channel.unary_unary( - "/google.cloud.bigquery.v2.ModelService/DeleteModel", - request_serializer=google_dot_cloud_dot_bigquery__v2_dot_proto_dot_model__pb2.DeleteModelRequest.SerializeToString, - response_deserializer=google_dot_protobuf_dot_empty__pb2.Empty.FromString, - ) - - -class ModelServiceServicer(object): - """Missing associated documentation comment in .proto file.""" - - def GetModel(self, request, context): - """Gets the specified model resource by model ID. - """ - context.set_code(grpc.StatusCode.UNIMPLEMENTED) - context.set_details("Method not implemented!") - raise NotImplementedError("Method not implemented!") - - def ListModels(self, request, context): - """Lists all models in the specified dataset. Requires the READER dataset - role. - """ - context.set_code(grpc.StatusCode.UNIMPLEMENTED) - context.set_details("Method not implemented!") - raise NotImplementedError("Method not implemented!") - - def PatchModel(self, request, context): - """Patch specific fields in the specified model. - """ - context.set_code(grpc.StatusCode.UNIMPLEMENTED) - context.set_details("Method not implemented!") - raise NotImplementedError("Method not implemented!") - - def DeleteModel(self, request, context): - """Deletes the model specified by modelId from the dataset. - """ - context.set_code(grpc.StatusCode.UNIMPLEMENTED) - context.set_details("Method not implemented!") - raise NotImplementedError("Method not implemented!") - - -def add_ModelServiceServicer_to_server(servicer, server): - rpc_method_handlers = { - "GetModel": grpc.unary_unary_rpc_method_handler( - servicer.GetModel, - request_deserializer=google_dot_cloud_dot_bigquery__v2_dot_proto_dot_model__pb2.GetModelRequest.FromString, - response_serializer=google_dot_cloud_dot_bigquery__v2_dot_proto_dot_model__pb2.Model.SerializeToString, - ), - "ListModels": grpc.unary_unary_rpc_method_handler( - servicer.ListModels, - request_deserializer=google_dot_cloud_dot_bigquery__v2_dot_proto_dot_model__pb2.ListModelsRequest.FromString, - response_serializer=google_dot_cloud_dot_bigquery__v2_dot_proto_dot_model__pb2.ListModelsResponse.SerializeToString, - ), - "PatchModel": grpc.unary_unary_rpc_method_handler( - servicer.PatchModel, - request_deserializer=google_dot_cloud_dot_bigquery__v2_dot_proto_dot_model__pb2.PatchModelRequest.FromString, - response_serializer=google_dot_cloud_dot_bigquery__v2_dot_proto_dot_model__pb2.Model.SerializeToString, - ), - "DeleteModel": grpc.unary_unary_rpc_method_handler( - servicer.DeleteModel, - request_deserializer=google_dot_cloud_dot_bigquery__v2_dot_proto_dot_model__pb2.DeleteModelRequest.FromString, - response_serializer=google_dot_protobuf_dot_empty__pb2.Empty.SerializeToString, - ), - } - generic_handler = grpc.method_handlers_generic_handler( - "google.cloud.bigquery.v2.ModelService", rpc_method_handlers - ) - server.add_generic_rpc_handlers((generic_handler,)) - - -# This class is part of an EXPERIMENTAL API. -class ModelService(object): - """Missing associated documentation comment in .proto file.""" - - @staticmethod - def GetModel( - request, - target, - options=(), - channel_credentials=None, - call_credentials=None, - compression=None, - wait_for_ready=None, - timeout=None, - metadata=None, - ): - return grpc.experimental.unary_unary( - request, - target, - "/google.cloud.bigquery.v2.ModelService/GetModel", - google_dot_cloud_dot_bigquery__v2_dot_proto_dot_model__pb2.GetModelRequest.SerializeToString, - google_dot_cloud_dot_bigquery__v2_dot_proto_dot_model__pb2.Model.FromString, - options, - channel_credentials, - call_credentials, - compression, - wait_for_ready, - timeout, - metadata, - ) - - @staticmethod - def ListModels( - request, - target, - options=(), - channel_credentials=None, - call_credentials=None, - compression=None, - wait_for_ready=None, - timeout=None, - metadata=None, - ): - return grpc.experimental.unary_unary( - request, - target, - "/google.cloud.bigquery.v2.ModelService/ListModels", - google_dot_cloud_dot_bigquery__v2_dot_proto_dot_model__pb2.ListModelsRequest.SerializeToString, - google_dot_cloud_dot_bigquery__v2_dot_proto_dot_model__pb2.ListModelsResponse.FromString, - options, - channel_credentials, - call_credentials, - compression, - wait_for_ready, - timeout, - metadata, - ) - - @staticmethod - def PatchModel( - request, - target, - options=(), - channel_credentials=None, - call_credentials=None, - compression=None, - wait_for_ready=None, - timeout=None, - metadata=None, - ): - return grpc.experimental.unary_unary( - request, - target, - "/google.cloud.bigquery.v2.ModelService/PatchModel", - google_dot_cloud_dot_bigquery__v2_dot_proto_dot_model__pb2.PatchModelRequest.SerializeToString, - google_dot_cloud_dot_bigquery__v2_dot_proto_dot_model__pb2.Model.FromString, - options, - channel_credentials, - call_credentials, - compression, - wait_for_ready, - timeout, - metadata, - ) - - @staticmethod - def DeleteModel( - request, - target, - options=(), - channel_credentials=None, - call_credentials=None, - compression=None, - wait_for_ready=None, - timeout=None, - metadata=None, - ): - return grpc.experimental.unary_unary( - request, - target, - "/google.cloud.bigquery.v2.ModelService/DeleteModel", - google_dot_cloud_dot_bigquery__v2_dot_proto_dot_model__pb2.DeleteModelRequest.SerializeToString, - google_dot_protobuf_dot_empty__pb2.Empty.FromString, - options, - channel_credentials, - call_credentials, - compression, - wait_for_ready, - timeout, - metadata, - ) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery_v2/proto/model_reference_pb2_grpc.py b/packages/google-cloud-bigquery/google/cloud/bigquery_v2/proto/model_reference_pb2_grpc.py deleted file mode 100644 index 8a9393943bdf..000000000000 --- a/packages/google-cloud-bigquery/google/cloud/bigquery_v2/proto/model_reference_pb2_grpc.py +++ /dev/null @@ -1,3 +0,0 @@ -# Generated by the gRPC Python protocol compiler plugin. DO NOT EDIT! -"""Client and server classes corresponding to protobuf-defined services.""" -import grpc diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery_v2/proto/standard_sql_pb2_grpc.py b/packages/google-cloud-bigquery/google/cloud/bigquery_v2/proto/standard_sql_pb2_grpc.py deleted file mode 100644 index 8a9393943bdf..000000000000 --- a/packages/google-cloud-bigquery/google/cloud/bigquery_v2/proto/standard_sql_pb2_grpc.py +++ /dev/null @@ -1,3 +0,0 @@ -# Generated by the gRPC Python protocol compiler plugin. DO NOT EDIT! -"""Client and server classes corresponding to protobuf-defined services.""" -import grpc diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery_v2/py.typed b/packages/google-cloud-bigquery/google/cloud/bigquery_v2/py.typed new file mode 100644 index 000000000000..e73777993c34 --- /dev/null +++ b/packages/google-cloud-bigquery/google/cloud/bigquery_v2/py.typed @@ -0,0 +1,2 @@ +# Marker file for PEP 561. +# The google-cloud-bigquery package uses inline types. diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery_v2/types.py b/packages/google-cloud-bigquery/google/cloud/bigquery_v2/types.py deleted file mode 100644 index 7d4f9b7326e4..000000000000 --- a/packages/google-cloud-bigquery/google/cloud/bigquery_v2/types.py +++ /dev/null @@ -1,58 +0,0 @@ -# -*- coding: utf-8 -*- -# -# Copyright 2020 Google LLC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - - -from __future__ import absolute_import -import sys - -from google.api_core.protobuf_helpers import get_messages - -from google.cloud.bigquery_v2.proto import encryption_config_pb2 -from google.cloud.bigquery_v2.proto import model_pb2 -from google.cloud.bigquery_v2.proto import model_reference_pb2 -from google.cloud.bigquery_v2.proto import standard_sql_pb2 -from google.protobuf import empty_pb2 -from google.protobuf import timestamp_pb2 -from google.protobuf import wrappers_pb2 - - -_shared_modules = [ - empty_pb2, - timestamp_pb2, - wrappers_pb2, -] - -_local_modules = [ - encryption_config_pb2, - model_pb2, - model_reference_pb2, - standard_sql_pb2, -] - -names = [] - -for module in _shared_modules: # pragma: NO COVER - for name, message in get_messages(module).items(): - setattr(sys.modules[__name__], name, message) - names.append(name) -for module in _local_modules: - for name, message in get_messages(module).items(): - message.__module__ = "google.cloud.bigquery_v2.types" - setattr(sys.modules[__name__], name, message) - names.append(name) - - -__all__ = tuple(sorted(names)) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery_v2/types/__init__.py b/packages/google-cloud-bigquery/google/cloud/bigquery_v2/types/__init__.py new file mode 100644 index 000000000000..a8839c74e5f9 --- /dev/null +++ b/packages/google-cloud-bigquery/google/cloud/bigquery_v2/types/__init__.py @@ -0,0 +1,47 @@ +# -*- coding: utf-8 -*- + +# Copyright 2020 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +from .encryption_config import EncryptionConfiguration +from .model_reference import ModelReference +from .standard_sql import ( + StandardSqlDataType, + StandardSqlField, + StandardSqlStructType, +) +from .model import ( + Model, + GetModelRequest, + PatchModelRequest, + DeleteModelRequest, + ListModelsRequest, + ListModelsResponse, +) + + +__all__ = ( + "EncryptionConfiguration", + "ModelReference", + "StandardSqlDataType", + "StandardSqlField", + "StandardSqlStructType", + "Model", + "GetModelRequest", + "PatchModelRequest", + "DeleteModelRequest", + "ListModelsRequest", + "ListModelsResponse", +) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery_v2/types/encryption_config.py b/packages/google-cloud-bigquery/google/cloud/bigquery_v2/types/encryption_config.py new file mode 100644 index 000000000000..6fb90f3409c3 --- /dev/null +++ b/packages/google-cloud-bigquery/google/cloud/bigquery_v2/types/encryption_config.py @@ -0,0 +1,44 @@ +# -*- coding: utf-8 -*- + +# Copyright 2020 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import proto # type: ignore + + +from google.protobuf import wrappers_pb2 as wrappers # type: ignore + + +__protobuf__ = proto.module( + package="google.cloud.bigquery.v2", manifest={"EncryptionConfiguration",}, +) + + +class EncryptionConfiguration(proto.Message): + r""" + + Attributes: + kms_key_name (~.wrappers.StringValue): + Optional. Describes the Cloud KMS encryption + key that will be used to protect destination + BigQuery table. The BigQuery Service Account + associated with your project requires access to + this encryption key. + """ + + kms_key_name = proto.Field(proto.MESSAGE, number=1, message=wrappers.StringValue,) + + +__all__ = tuple(sorted(__protobuf__.manifest)) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery_v2/types/model.py b/packages/google-cloud-bigquery/google/cloud/bigquery_v2/types/model.py new file mode 100644 index 000000000000..a00720d48d68 --- /dev/null +++ b/packages/google-cloud-bigquery/google/cloud/bigquery_v2/types/model.py @@ -0,0 +1,966 @@ +# -*- coding: utf-8 -*- + +# Copyright 2020 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import proto # type: ignore + + +from google.cloud.bigquery_v2.types import encryption_config +from google.cloud.bigquery_v2.types import model_reference as gcb_model_reference +from google.cloud.bigquery_v2.types import standard_sql +from google.protobuf import timestamp_pb2 as timestamp # type: ignore +from google.protobuf import wrappers_pb2 as wrappers # type: ignore + + +__protobuf__ = proto.module( + package="google.cloud.bigquery.v2", + manifest={ + "Model", + "GetModelRequest", + "PatchModelRequest", + "DeleteModelRequest", + "ListModelsRequest", + "ListModelsResponse", + }, +) + + +class Model(proto.Message): + r""" + + Attributes: + etag (str): + Output only. A hash of this resource. + model_reference (~.gcb_model_reference.ModelReference): + Required. Unique identifier for this model. + creation_time (int): + Output only. The time when this model was + created, in millisecs since the epoch. + last_modified_time (int): + Output only. The time when this model was + last modified, in millisecs since the epoch. + description (str): + Optional. A user-friendly description of this + model. + friendly_name (str): + Optional. A descriptive name for this model. + labels (Sequence[~.gcb_model.Model.LabelsEntry]): + The labels associated with this model. You + can use these to organize and group your models. + Label keys and values can be no longer than 63 + characters, can only contain lowercase letters, + numeric characters, underscores and dashes. + International characters are allowed. Label + values are optional. Label keys must start with + a letter and each label in the list must have a + different key. + expiration_time (int): + Optional. The time when this model expires, + in milliseconds since the epoch. If not present, + the model will persist indefinitely. Expired + models will be deleted and their storage + reclaimed. The defaultTableExpirationMs + property of the encapsulating dataset can be + used to set a default expirationTime on newly + created models. + location (str): + Output only. The geographic location where + the model resides. This value is inherited from + the dataset. + encryption_configuration (~.encryption_config.EncryptionConfiguration): + Custom encryption configuration (e.g., Cloud + KMS keys). This shows the encryption + configuration of the model data while stored in + BigQuery storage. + model_type (~.gcb_model.Model.ModelType): + Output only. Type of the model resource. + training_runs (Sequence[~.gcb_model.Model.TrainingRun]): + Output only. Information for all training runs in increasing + order of start_time. + feature_columns (Sequence[~.standard_sql.StandardSqlField]): + Output only. Input feature columns that were + used to train this model. + label_columns (Sequence[~.standard_sql.StandardSqlField]): + Output only. Label columns that were used to train this + model. The output of the model will have a `predicted_` + prefix to these columns. + """ + + class ModelType(proto.Enum): + r"""Indicates the type of the Model.""" + MODEL_TYPE_UNSPECIFIED = 0 + LINEAR_REGRESSION = 1 + LOGISTIC_REGRESSION = 2 + KMEANS = 3 + TENSORFLOW = 6 + + class LossType(proto.Enum): + r"""Loss metric to evaluate model training performance.""" + LOSS_TYPE_UNSPECIFIED = 0 + MEAN_SQUARED_LOSS = 1 + MEAN_LOG_LOSS = 2 + + class DistanceType(proto.Enum): + r"""Distance metric used to compute the distance between two + points. + """ + DISTANCE_TYPE_UNSPECIFIED = 0 + EUCLIDEAN = 1 + COSINE = 2 + + class DataSplitMethod(proto.Enum): + r"""Indicates the method to split input data into multiple + tables. + """ + DATA_SPLIT_METHOD_UNSPECIFIED = 0 + RANDOM = 1 + CUSTOM = 2 + SEQUENTIAL = 3 + NO_SPLIT = 4 + AUTO_SPLIT = 5 + + class LearnRateStrategy(proto.Enum): + r"""Indicates the learning rate optimization strategy to use.""" + LEARN_RATE_STRATEGY_UNSPECIFIED = 0 + LINE_SEARCH = 1 + CONSTANT = 2 + + class OptimizationStrategy(proto.Enum): + r"""Indicates the optimization strategy used for training.""" + OPTIMIZATION_STRATEGY_UNSPECIFIED = 0 + BATCH_GRADIENT_DESCENT = 1 + NORMAL_EQUATION = 2 + + class KmeansEnums(proto.Message): + r"""""" + + class KmeansInitializationMethod(proto.Enum): + r"""Indicates the method used to initialize the centroids for + KMeans clustering algorithm. + """ + KMEANS_INITIALIZATION_METHOD_UNSPECIFIED = 0 + RANDOM = 1 + CUSTOM = 2 + + class RegressionMetrics(proto.Message): + r"""Evaluation metrics for regression and explicit feedback type + matrix factorization models. + + Attributes: + mean_absolute_error (~.wrappers.DoubleValue): + Mean absolute error. + mean_squared_error (~.wrappers.DoubleValue): + Mean squared error. + mean_squared_log_error (~.wrappers.DoubleValue): + Mean squared log error. + median_absolute_error (~.wrappers.DoubleValue): + Median absolute error. + r_squared (~.wrappers.DoubleValue): + R^2 score. + """ + + mean_absolute_error = proto.Field( + proto.MESSAGE, number=1, message=wrappers.DoubleValue, + ) + + mean_squared_error = proto.Field( + proto.MESSAGE, number=2, message=wrappers.DoubleValue, + ) + + mean_squared_log_error = proto.Field( + proto.MESSAGE, number=3, message=wrappers.DoubleValue, + ) + + median_absolute_error = proto.Field( + proto.MESSAGE, number=4, message=wrappers.DoubleValue, + ) + + r_squared = proto.Field(proto.MESSAGE, number=5, message=wrappers.DoubleValue,) + + class AggregateClassificationMetrics(proto.Message): + r"""Aggregate metrics for classification/classifier models. For + multi-class models, the metrics are either macro-averaged or + micro-averaged. When macro-averaged, the metrics are calculated + for each label and then an unweighted average is taken of those + values. When micro-averaged, the metric is calculated globally + by counting the total number of correctly predicted rows. + + Attributes: + precision (~.wrappers.DoubleValue): + Precision is the fraction of actual positive + predictions that had positive actual labels. For + multiclass this is a macro-averaged metric + treating each class as a binary classifier. + recall (~.wrappers.DoubleValue): + Recall is the fraction of actual positive + labels that were given a positive prediction. + For multiclass this is a macro-averaged metric. + accuracy (~.wrappers.DoubleValue): + Accuracy is the fraction of predictions given + the correct label. For multiclass this is a + micro-averaged metric. + threshold (~.wrappers.DoubleValue): + Threshold at which the metrics are computed. + For binary classification models this is the + positive class threshold. For multi-class + classfication models this is the confidence + threshold. + f1_score (~.wrappers.DoubleValue): + The F1 score is an average of recall and + precision. For multiclass this is a macro- + averaged metric. + log_loss (~.wrappers.DoubleValue): + Logarithmic Loss. For multiclass this is a + macro-averaged metric. + roc_auc (~.wrappers.DoubleValue): + Area Under a ROC Curve. For multiclass this + is a macro-averaged metric. + """ + + precision = proto.Field(proto.MESSAGE, number=1, message=wrappers.DoubleValue,) + + recall = proto.Field(proto.MESSAGE, number=2, message=wrappers.DoubleValue,) + + accuracy = proto.Field(proto.MESSAGE, number=3, message=wrappers.DoubleValue,) + + threshold = proto.Field(proto.MESSAGE, number=4, message=wrappers.DoubleValue,) + + f1_score = proto.Field(proto.MESSAGE, number=5, message=wrappers.DoubleValue,) + + log_loss = proto.Field(proto.MESSAGE, number=6, message=wrappers.DoubleValue,) + + roc_auc = proto.Field(proto.MESSAGE, number=7, message=wrappers.DoubleValue,) + + class BinaryClassificationMetrics(proto.Message): + r"""Evaluation metrics for binary classification/classifier + models. + + Attributes: + aggregate_classification_metrics (~.gcb_model.Model.AggregateClassificationMetrics): + Aggregate classification metrics. + binary_confusion_matrix_list (Sequence[~.gcb_model.Model.BinaryClassificationMetrics.BinaryConfusionMatrix]): + Binary confusion matrix at multiple + thresholds. + positive_label (str): + Label representing the positive class. + negative_label (str): + Label representing the negative class. + """ + + class BinaryConfusionMatrix(proto.Message): + r"""Confusion matrix for binary classification models. + + Attributes: + positive_class_threshold (~.wrappers.DoubleValue): + Threshold value used when computing each of + the following metric. + true_positives (~.wrappers.Int64Value): + Number of true samples predicted as true. + false_positives (~.wrappers.Int64Value): + Number of false samples predicted as true. + true_negatives (~.wrappers.Int64Value): + Number of true samples predicted as false. + false_negatives (~.wrappers.Int64Value): + Number of false samples predicted as false. + precision (~.wrappers.DoubleValue): + The fraction of actual positive predictions + that had positive actual labels. + recall (~.wrappers.DoubleValue): + The fraction of actual positive labels that + were given a positive prediction. + f1_score (~.wrappers.DoubleValue): + The equally weighted average of recall and + precision. + accuracy (~.wrappers.DoubleValue): + The fraction of predictions given the correct + label. + """ + + positive_class_threshold = proto.Field( + proto.MESSAGE, number=1, message=wrappers.DoubleValue, + ) + + true_positives = proto.Field( + proto.MESSAGE, number=2, message=wrappers.Int64Value, + ) + + false_positives = proto.Field( + proto.MESSAGE, number=3, message=wrappers.Int64Value, + ) + + true_negatives = proto.Field( + proto.MESSAGE, number=4, message=wrappers.Int64Value, + ) + + false_negatives = proto.Field( + proto.MESSAGE, number=5, message=wrappers.Int64Value, + ) + + precision = proto.Field( + proto.MESSAGE, number=6, message=wrappers.DoubleValue, + ) + + recall = proto.Field(proto.MESSAGE, number=7, message=wrappers.DoubleValue,) + + f1_score = proto.Field( + proto.MESSAGE, number=8, message=wrappers.DoubleValue, + ) + + accuracy = proto.Field( + proto.MESSAGE, number=9, message=wrappers.DoubleValue, + ) + + aggregate_classification_metrics = proto.Field( + proto.MESSAGE, number=1, message="Model.AggregateClassificationMetrics", + ) + + binary_confusion_matrix_list = proto.RepeatedField( + proto.MESSAGE, + number=2, + message="Model.BinaryClassificationMetrics.BinaryConfusionMatrix", + ) + + positive_label = proto.Field(proto.STRING, number=3) + + negative_label = proto.Field(proto.STRING, number=4) + + class MultiClassClassificationMetrics(proto.Message): + r"""Evaluation metrics for multi-class classification/classifier + models. + + Attributes: + aggregate_classification_metrics (~.gcb_model.Model.AggregateClassificationMetrics): + Aggregate classification metrics. + confusion_matrix_list (Sequence[~.gcb_model.Model.MultiClassClassificationMetrics.ConfusionMatrix]): + Confusion matrix at different thresholds. + """ + + class ConfusionMatrix(proto.Message): + r"""Confusion matrix for multi-class classification models. + + Attributes: + confidence_threshold (~.wrappers.DoubleValue): + Confidence threshold used when computing the + entries of the confusion matrix. + rows (Sequence[~.gcb_model.Model.MultiClassClassificationMetrics.ConfusionMatrix.Row]): + One row per actual label. + """ + + class Entry(proto.Message): + r"""A single entry in the confusion matrix. + + Attributes: + predicted_label (str): + The predicted label. For confidence_threshold > 0, we will + also add an entry indicating the number of items under the + confidence threshold. + item_count (~.wrappers.Int64Value): + Number of items being predicted as this + label. + """ + + predicted_label = proto.Field(proto.STRING, number=1) + + item_count = proto.Field( + proto.MESSAGE, number=2, message=wrappers.Int64Value, + ) + + class Row(proto.Message): + r"""A single row in the confusion matrix. + + Attributes: + actual_label (str): + The original label of this row. + entries (Sequence[~.gcb_model.Model.MultiClassClassificationMetrics.ConfusionMatrix.Entry]): + Info describing predicted label distribution. + """ + + actual_label = proto.Field(proto.STRING, number=1) + + entries = proto.RepeatedField( + proto.MESSAGE, + number=2, + message="Model.MultiClassClassificationMetrics.ConfusionMatrix.Entry", + ) + + confidence_threshold = proto.Field( + proto.MESSAGE, number=1, message=wrappers.DoubleValue, + ) + + rows = proto.RepeatedField( + proto.MESSAGE, + number=2, + message="Model.MultiClassClassificationMetrics.ConfusionMatrix.Row", + ) + + aggregate_classification_metrics = proto.Field( + proto.MESSAGE, number=1, message="Model.AggregateClassificationMetrics", + ) + + confusion_matrix_list = proto.RepeatedField( + proto.MESSAGE, + number=2, + message="Model.MultiClassClassificationMetrics.ConfusionMatrix", + ) + + class ClusteringMetrics(proto.Message): + r"""Evaluation metrics for clustering models. + + Attributes: + davies_bouldin_index (~.wrappers.DoubleValue): + Davies-Bouldin index. + mean_squared_distance (~.wrappers.DoubleValue): + Mean of squared distances between each sample + to its cluster centroid. + clusters (Sequence[~.gcb_model.Model.ClusteringMetrics.Cluster]): + [Beta] Information for all clusters. + """ + + class Cluster(proto.Message): + r"""Message containing the information about one cluster. + + Attributes: + centroid_id (int): + Centroid id. + feature_values (Sequence[~.gcb_model.Model.ClusteringMetrics.Cluster.FeatureValue]): + Values of highly variant features for this + cluster. + count (~.wrappers.Int64Value): + Count of training data rows that were + assigned to this cluster. + """ + + class FeatureValue(proto.Message): + r"""Representative value of a single feature within the cluster. + + Attributes: + feature_column (str): + The feature column name. + numerical_value (~.wrappers.DoubleValue): + The numerical feature value. This is the + centroid value for this feature. + categorical_value (~.gcb_model.Model.ClusteringMetrics.Cluster.FeatureValue.CategoricalValue): + The categorical feature value. + """ + + class CategoricalValue(proto.Message): + r"""Representative value of a categorical feature. + + Attributes: + category_counts (Sequence[~.gcb_model.Model.ClusteringMetrics.Cluster.FeatureValue.CategoricalValue.CategoryCount]): + Counts of all categories for the categorical feature. If + there are more than ten categories, we return top ten (by + count) and return one more CategoryCount with category + "*OTHER*" and count as aggregate counts of remaining + categories. + """ + + class CategoryCount(proto.Message): + r"""Represents the count of a single category within the cluster. + + Attributes: + category (str): + The name of category. + count (~.wrappers.Int64Value): + The count of training samples matching the + category within the cluster. + """ + + category = proto.Field(proto.STRING, number=1) + + count = proto.Field( + proto.MESSAGE, number=2, message=wrappers.Int64Value, + ) + + category_counts = proto.RepeatedField( + proto.MESSAGE, + number=1, + message="Model.ClusteringMetrics.Cluster.FeatureValue.CategoricalValue.CategoryCount", + ) + + feature_column = proto.Field(proto.STRING, number=1) + + numerical_value = proto.Field( + proto.MESSAGE, + number=2, + oneof="value", + message=wrappers.DoubleValue, + ) + + categorical_value = proto.Field( + proto.MESSAGE, + number=3, + oneof="value", + message="Model.ClusteringMetrics.Cluster.FeatureValue.CategoricalValue", + ) + + centroid_id = proto.Field(proto.INT64, number=1) + + feature_values = proto.RepeatedField( + proto.MESSAGE, + number=2, + message="Model.ClusteringMetrics.Cluster.FeatureValue", + ) + + count = proto.Field(proto.MESSAGE, number=3, message=wrappers.Int64Value,) + + davies_bouldin_index = proto.Field( + proto.MESSAGE, number=1, message=wrappers.DoubleValue, + ) + + mean_squared_distance = proto.Field( + proto.MESSAGE, number=2, message=wrappers.DoubleValue, + ) + + clusters = proto.RepeatedField( + proto.MESSAGE, number=3, message="Model.ClusteringMetrics.Cluster", + ) + + class EvaluationMetrics(proto.Message): + r"""Evaluation metrics of a model. These are either computed on + all training data or just the eval data based on whether eval + data was used during training. These are not present for + imported models. + + Attributes: + regression_metrics (~.gcb_model.Model.RegressionMetrics): + Populated for regression models and explicit + feedback type matrix factorization models. + binary_classification_metrics (~.gcb_model.Model.BinaryClassificationMetrics): + Populated for binary + classification/classifier models. + multi_class_classification_metrics (~.gcb_model.Model.MultiClassClassificationMetrics): + Populated for multi-class + classification/classifier models. + clustering_metrics (~.gcb_model.Model.ClusteringMetrics): + Populated for clustering models. + """ + + regression_metrics = proto.Field( + proto.MESSAGE, number=1, oneof="metrics", message="Model.RegressionMetrics", + ) + + binary_classification_metrics = proto.Field( + proto.MESSAGE, + number=2, + oneof="metrics", + message="Model.BinaryClassificationMetrics", + ) + + multi_class_classification_metrics = proto.Field( + proto.MESSAGE, + number=3, + oneof="metrics", + message="Model.MultiClassClassificationMetrics", + ) + + clustering_metrics = proto.Field( + proto.MESSAGE, number=4, oneof="metrics", message="Model.ClusteringMetrics", + ) + + class TrainingRun(proto.Message): + r"""Information about a single training query run for the model. + + Attributes: + training_options (~.gcb_model.Model.TrainingRun.TrainingOptions): + Options that were used for this training run, + includes user specified and default options that + were used. + start_time (~.timestamp.Timestamp): + The start time of this training run. + results (Sequence[~.gcb_model.Model.TrainingRun.IterationResult]): + Output of each iteration run, results.size() <= + max_iterations. + evaluation_metrics (~.gcb_model.Model.EvaluationMetrics): + The evaluation metrics over training/eval + data that were computed at the end of training. + """ + + class TrainingOptions(proto.Message): + r""" + + Attributes: + max_iterations (int): + The maximum number of iterations in training. + Used only for iterative training algorithms. + loss_type (~.gcb_model.Model.LossType): + Type of loss function used during training + run. + learn_rate (float): + Learning rate in training. Used only for + iterative training algorithms. + l1_regularization (~.wrappers.DoubleValue): + L1 regularization coefficient. + l2_regularization (~.wrappers.DoubleValue): + L2 regularization coefficient. + min_relative_progress (~.wrappers.DoubleValue): + When early_stop is true, stops training when accuracy + improvement is less than 'min_relative_progress'. Used only + for iterative training algorithms. + warm_start (~.wrappers.BoolValue): + Whether to train a model from the last + checkpoint. + early_stop (~.wrappers.BoolValue): + Whether to stop early when the loss doesn't improve + significantly any more (compared to min_relative_progress). + Used only for iterative training algorithms. + input_label_columns (Sequence[str]): + Name of input label columns in training data. + data_split_method (~.gcb_model.Model.DataSplitMethod): + The data split type for training and + evaluation, e.g. RANDOM. + data_split_eval_fraction (float): + The fraction of evaluation data over the + whole input data. The rest of data will be used + as training data. The format should be double. + Accurate to two decimal places. + Default value is 0.2. + data_split_column (str): + The column to split data with. This column won't be used as + a feature. + + 1. When data_split_method is CUSTOM, the corresponding + column should be boolean. The rows with true value tag + are eval data, and the false are training data. + 2. When data_split_method is SEQ, the first + DATA_SPLIT_EVAL_FRACTION rows (from smallest to largest) + in the corresponding column are used as training data, + and the rest are eval data. It respects the order in + Orderable data types: + https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#data-type-properties + learn_rate_strategy (~.gcb_model.Model.LearnRateStrategy): + The strategy to determine learn rate for the + current iteration. + initial_learn_rate (float): + Specifies the initial learning rate for the + line search learn rate strategy. + label_class_weights (Sequence[~.gcb_model.Model.TrainingRun.TrainingOptions.LabelClassWeightsEntry]): + Weights associated with each label class, for + rebalancing the training data. Only applicable + for classification models. + distance_type (~.gcb_model.Model.DistanceType): + Distance type for clustering models. + num_clusters (int): + Number of clusters for clustering models. + model_uri (str): + [Beta] Google Cloud Storage URI from which the model was + imported. Only applicable for imported models. + optimization_strategy (~.gcb_model.Model.OptimizationStrategy): + Optimization strategy for training linear + regression models. + kmeans_initialization_method (~.gcb_model.Model.KmeansEnums.KmeansInitializationMethod): + The method used to initialize the centroids + for kmeans algorithm. + kmeans_initialization_column (str): + The column used to provide the initial centroids for kmeans + algorithm when kmeans_initialization_method is CUSTOM. + """ + + max_iterations = proto.Field(proto.INT64, number=1) + + loss_type = proto.Field(proto.ENUM, number=2, enum="Model.LossType",) + + learn_rate = proto.Field(proto.DOUBLE, number=3) + + l1_regularization = proto.Field( + proto.MESSAGE, number=4, message=wrappers.DoubleValue, + ) + + l2_regularization = proto.Field( + proto.MESSAGE, number=5, message=wrappers.DoubleValue, + ) + + min_relative_progress = proto.Field( + proto.MESSAGE, number=6, message=wrappers.DoubleValue, + ) + + warm_start = proto.Field( + proto.MESSAGE, number=7, message=wrappers.BoolValue, + ) + + early_stop = proto.Field( + proto.MESSAGE, number=8, message=wrappers.BoolValue, + ) + + input_label_columns = proto.RepeatedField(proto.STRING, number=9) + + data_split_method = proto.Field( + proto.ENUM, number=10, enum="Model.DataSplitMethod", + ) + + data_split_eval_fraction = proto.Field(proto.DOUBLE, number=11) + + data_split_column = proto.Field(proto.STRING, number=12) + + learn_rate_strategy = proto.Field( + proto.ENUM, number=13, enum="Model.LearnRateStrategy", + ) + + initial_learn_rate = proto.Field(proto.DOUBLE, number=16) + + label_class_weights = proto.MapField(proto.STRING, proto.DOUBLE, number=17) + + distance_type = proto.Field( + proto.ENUM, number=20, enum="Model.DistanceType", + ) + + num_clusters = proto.Field(proto.INT64, number=21) + + model_uri = proto.Field(proto.STRING, number=22) + + optimization_strategy = proto.Field( + proto.ENUM, number=23, enum="Model.OptimizationStrategy", + ) + + kmeans_initialization_method = proto.Field( + proto.ENUM, + number=33, + enum="Model.KmeansEnums.KmeansInitializationMethod", + ) + + kmeans_initialization_column = proto.Field(proto.STRING, number=34) + + class IterationResult(proto.Message): + r"""Information about a single iteration of the training run. + + Attributes: + index (~.wrappers.Int32Value): + Index of the iteration, 0 based. + duration_ms (~.wrappers.Int64Value): + Time taken to run the iteration in + milliseconds. + training_loss (~.wrappers.DoubleValue): + Loss computed on the training data at the end + of iteration. + eval_loss (~.wrappers.DoubleValue): + Loss computed on the eval data at the end of + iteration. + learn_rate (float): + Learn rate used for this iteration. + cluster_infos (Sequence[~.gcb_model.Model.TrainingRun.IterationResult.ClusterInfo]): + Information about top clusters for clustering + models. + """ + + class ClusterInfo(proto.Message): + r"""Information about a single cluster for clustering model. + + Attributes: + centroid_id (int): + Centroid id. + cluster_radius (~.wrappers.DoubleValue): + Cluster radius, the average distance from + centroid to each point assigned to the cluster. + cluster_size (~.wrappers.Int64Value): + Cluster size, the total number of points + assigned to the cluster. + """ + + centroid_id = proto.Field(proto.INT64, number=1) + + cluster_radius = proto.Field( + proto.MESSAGE, number=2, message=wrappers.DoubleValue, + ) + + cluster_size = proto.Field( + proto.MESSAGE, number=3, message=wrappers.Int64Value, + ) + + index = proto.Field(proto.MESSAGE, number=1, message=wrappers.Int32Value,) + + duration_ms = proto.Field( + proto.MESSAGE, number=4, message=wrappers.Int64Value, + ) + + training_loss = proto.Field( + proto.MESSAGE, number=5, message=wrappers.DoubleValue, + ) + + eval_loss = proto.Field( + proto.MESSAGE, number=6, message=wrappers.DoubleValue, + ) + + learn_rate = proto.Field(proto.DOUBLE, number=7) + + cluster_infos = proto.RepeatedField( + proto.MESSAGE, + number=8, + message="Model.TrainingRun.IterationResult.ClusterInfo", + ) + + training_options = proto.Field( + proto.MESSAGE, number=1, message="Model.TrainingRun.TrainingOptions", + ) + + start_time = proto.Field(proto.MESSAGE, number=8, message=timestamp.Timestamp,) + + results = proto.RepeatedField( + proto.MESSAGE, number=6, message="Model.TrainingRun.IterationResult", + ) + + evaluation_metrics = proto.Field( + proto.MESSAGE, number=7, message="Model.EvaluationMetrics", + ) + + etag = proto.Field(proto.STRING, number=1) + + model_reference = proto.Field( + proto.MESSAGE, number=2, message=gcb_model_reference.ModelReference, + ) + + creation_time = proto.Field(proto.INT64, number=5) + + last_modified_time = proto.Field(proto.INT64, number=6) + + description = proto.Field(proto.STRING, number=12) + + friendly_name = proto.Field(proto.STRING, number=14) + + labels = proto.MapField(proto.STRING, proto.STRING, number=15) + + expiration_time = proto.Field(proto.INT64, number=16) + + location = proto.Field(proto.STRING, number=13) + + encryption_configuration = proto.Field( + proto.MESSAGE, number=17, message=encryption_config.EncryptionConfiguration, + ) + + model_type = proto.Field(proto.ENUM, number=7, enum=ModelType,) + + training_runs = proto.RepeatedField(proto.MESSAGE, number=9, message=TrainingRun,) + + feature_columns = proto.RepeatedField( + proto.MESSAGE, number=10, message=standard_sql.StandardSqlField, + ) + + label_columns = proto.RepeatedField( + proto.MESSAGE, number=11, message=standard_sql.StandardSqlField, + ) + + +class GetModelRequest(proto.Message): + r""" + + Attributes: + project_id (str): + Required. Project ID of the requested model. + dataset_id (str): + Required. Dataset ID of the requested model. + model_id (str): + Required. Model ID of the requested model. + """ + + project_id = proto.Field(proto.STRING, number=1) + + dataset_id = proto.Field(proto.STRING, number=2) + + model_id = proto.Field(proto.STRING, number=3) + + +class PatchModelRequest(proto.Message): + r""" + + Attributes: + project_id (str): + Required. Project ID of the model to patch. + dataset_id (str): + Required. Dataset ID of the model to patch. + model_id (str): + Required. Model ID of the model to patch. + model (~.gcb_model.Model): + Required. Patched model. + Follows RFC5789 patch semantics. Missing fields + are not updated. To clear a field, explicitly + set to default value. + """ + + project_id = proto.Field(proto.STRING, number=1) + + dataset_id = proto.Field(proto.STRING, number=2) + + model_id = proto.Field(proto.STRING, number=3) + + model = proto.Field(proto.MESSAGE, number=4, message=Model,) + + +class DeleteModelRequest(proto.Message): + r""" + + Attributes: + project_id (str): + Required. Project ID of the model to delete. + dataset_id (str): + Required. Dataset ID of the model to delete. + model_id (str): + Required. Model ID of the model to delete. + """ + + project_id = proto.Field(proto.STRING, number=1) + + dataset_id = proto.Field(proto.STRING, number=2) + + model_id = proto.Field(proto.STRING, number=3) + + +class ListModelsRequest(proto.Message): + r""" + + Attributes: + project_id (str): + Required. Project ID of the models to list. + dataset_id (str): + Required. Dataset ID of the models to list. + max_results (~.wrappers.UInt32Value): + The maximum number of results to return in a + single response page. Leverage the page tokens + to iterate through the entire collection. + page_token (str): + Page token, returned by a previous call to + request the next page of results + """ + + project_id = proto.Field(proto.STRING, number=1) + + dataset_id = proto.Field(proto.STRING, number=2) + + max_results = proto.Field(proto.MESSAGE, number=3, message=wrappers.UInt32Value,) + + page_token = proto.Field(proto.STRING, number=4) + + +class ListModelsResponse(proto.Message): + r""" + + Attributes: + models (Sequence[~.gcb_model.Model]): + Models in the requested dataset. Only the following fields + are populated: model_reference, model_type, creation_time, + last_modified_time and labels. + next_page_token (str): + A token to request the next page of results. + """ + + @property + def raw_page(self): + return self + + models = proto.RepeatedField(proto.MESSAGE, number=1, message=Model,) + + next_page_token = proto.Field(proto.STRING, number=2) + + +__all__ = tuple(sorted(__protobuf__.manifest)) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery_v2/types/model_reference.py b/packages/google-cloud-bigquery/google/cloud/bigquery_v2/types/model_reference.py new file mode 100644 index 000000000000..e3891d6c17ca --- /dev/null +++ b/packages/google-cloud-bigquery/google/cloud/bigquery_v2/types/model_reference.py @@ -0,0 +1,49 @@ +# -*- coding: utf-8 -*- + +# Copyright 2020 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import proto # type: ignore + + +__protobuf__ = proto.module( + package="google.cloud.bigquery.v2", manifest={"ModelReference",}, +) + + +class ModelReference(proto.Message): + r"""Id path of a model. + + Attributes: + project_id (str): + Required. The ID of the project containing + this model. + dataset_id (str): + Required. The ID of the dataset containing + this model. + model_id (str): + Required. The ID of the model. The ID must contain only + letters (a-z, A-Z), numbers (0-9), or underscores (_). The + maximum length is 1,024 characters. + """ + + project_id = proto.Field(proto.STRING, number=1) + + dataset_id = proto.Field(proto.STRING, number=2) + + model_id = proto.Field(proto.STRING, number=3) + + +__all__ = tuple(sorted(__protobuf__.manifest)) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery_v2/types/standard_sql.py b/packages/google-cloud-bigquery/google/cloud/bigquery_v2/types/standard_sql.py new file mode 100644 index 000000000000..72f12f2840f4 --- /dev/null +++ b/packages/google-cloud-bigquery/google/cloud/bigquery_v2/types/standard_sql.py @@ -0,0 +1,106 @@ +# -*- coding: utf-8 -*- + +# Copyright 2020 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import proto # type: ignore + + +__protobuf__ = proto.module( + package="google.cloud.bigquery.v2", + manifest={"StandardSqlDataType", "StandardSqlField", "StandardSqlStructType",}, +) + + +class StandardSqlDataType(proto.Message): + r"""The type of a variable, e.g., a function argument. Examples: INT64: + {type_kind="INT64"} ARRAY: {type_kind="ARRAY", + array_element_type="STRING"} STRUCT: + {type_kind="STRUCT", struct_type={fields=[ {name="x", + type={type_kind="STRING"}}, {name="y", type={type_kind="ARRAY", + array_element_type="DATE"}} ]}} + + Attributes: + type_kind (~.standard_sql.StandardSqlDataType.TypeKind): + Required. The top level type of this field. + Can be any standard SQL data type (e.g., + "INT64", "DATE", "ARRAY"). + array_element_type (~.standard_sql.StandardSqlDataType): + The type of the array's elements, if type_kind = "ARRAY". + struct_type (~.standard_sql.StandardSqlStructType): + The fields of this struct, in order, if type_kind = + "STRUCT". + """ + + class TypeKind(proto.Enum): + r"""""" + TYPE_KIND_UNSPECIFIED = 0 + INT64 = 2 + BOOL = 5 + FLOAT64 = 7 + STRING = 8 + BYTES = 9 + TIMESTAMP = 19 + DATE = 10 + TIME = 20 + DATETIME = 21 + GEOGRAPHY = 22 + NUMERIC = 23 + ARRAY = 16 + STRUCT = 17 + + type_kind = proto.Field(proto.ENUM, number=1, enum=TypeKind,) + + array_element_type = proto.Field( + proto.MESSAGE, number=2, oneof="sub_type", message="StandardSqlDataType", + ) + + struct_type = proto.Field( + proto.MESSAGE, number=3, oneof="sub_type", message="StandardSqlStructType", + ) + + +class StandardSqlField(proto.Message): + r"""A field or a column. + + Attributes: + name (str): + Optional. The name of this field. Can be + absent for struct fields. + type (~.standard_sql.StandardSqlDataType): + Optional. The type of this parameter. Absent + if not explicitly specified (e.g., CREATE + FUNCTION statement can omit the return type; in + this case the output parameter does not have + this "type" field). + """ + + name = proto.Field(proto.STRING, number=1) + + type = proto.Field(proto.MESSAGE, number=2, message=StandardSqlDataType,) + + +class StandardSqlStructType(proto.Message): + r""" + + Attributes: + fields (Sequence[~.standard_sql.StandardSqlField]): + + """ + + fields = proto.RepeatedField(proto.MESSAGE, number=1, message=StandardSqlField,) + + +__all__ = tuple(sorted(__protobuf__.manifest)) diff --git a/packages/google-cloud-bigquery/noxfile.py b/packages/google-cloud-bigquery/noxfile.py index 90f023addf63..42d8f93565d5 100644 --- a/packages/google-cloud-bigquery/noxfile.py +++ b/packages/google-cloud-bigquery/noxfile.py @@ -49,16 +49,10 @@ def default(session): constraints_path, ) - if session.python == "2.7": - # The [all] extra is not installable on Python 2.7. - session.install("-e", ".[pandas,pyarrow]", "-c", constraints_path) - elif session.python == "3.5": - session.install("-e", ".[all]", "-c", constraints_path) - else: - # fastparquet is not included in .[all] because, in general, it's - # redundant with pyarrow. We still want to run some unit tests with - # fastparquet serialization, though. - session.install("-e", ".[all,fastparquet]", "-c", constraints_path) + # fastparquet is not included in .[all] because, in general, it's + # redundant with pyarrow. We still want to run some unit tests with + # fastparquet serialization, though. + session.install("-e", ".[all,fastparquet]", "-c", constraints_path) session.install("ipython", "-c", constraints_path) @@ -77,13 +71,13 @@ def default(session): ) -@nox.session(python=["2.7", "3.5", "3.6", "3.7", "3.8"]) +@nox.session(python=["3.6", "3.7", "3.8"]) def unit(session): """Run the unit test suite.""" default(session) -@nox.session(python=["2.7", "3.8"]) +@nox.session(python=["3.8"]) def system(session): """Run the system test suite.""" @@ -108,12 +102,7 @@ def system(session): ) session.install("google-cloud-storage", "-c", constraints_path) - if session.python == "2.7": - # The [all] extra is not installable on Python 2.7. - session.install("-e", ".[pandas]", "-c", constraints_path) - else: - session.install("-e", ".[all]", "-c", constraints_path) - + session.install("-e", ".[all]", "-c", constraints_path) session.install("ipython", "-c", constraints_path) # Run py.test against the system tests. @@ -122,7 +111,7 @@ def system(session): ) -@nox.session(python=["2.7", "3.8"]) +@nox.session(python=["3.8"]) def snippets(session): """Run the snippets test suite.""" @@ -139,11 +128,7 @@ def snippets(session): session.install("google-cloud-storage", "-c", constraints_path) session.install("grpcio", "-c", constraints_path) - if session.python == "2.7": - # The [all] extra is not installable on Python 2.7. - session.install("-e", ".[pandas]", "-c", constraints_path) - else: - session.install("-e", ".[all]", "-c", constraints_path) + session.install("-e", ".[all]", "-c", constraints_path) # Run py.test against the snippets tests. # Skip tests in samples/snippets, as those are run in a different session diff --git a/packages/google-cloud-bigquery/samples/create_routine.py b/packages/google-cloud-bigquery/samples/create_routine.py index d9b221a4f62b..012c7927a1aa 100644 --- a/packages/google-cloud-bigquery/samples/create_routine.py +++ b/packages/google-cloud-bigquery/samples/create_routine.py @@ -34,7 +34,7 @@ def create_routine(routine_id): bigquery.RoutineArgument( name="x", data_type=bigquery_v2.types.StandardSqlDataType( - type_kind=bigquery_v2.enums.StandardSqlDataType.TypeKind.INT64 + type_kind=bigquery_v2.types.StandardSqlDataType.TypeKind.INT64 ), ) ], diff --git a/packages/google-cloud-bigquery/samples/tests/conftest.py b/packages/google-cloud-bigquery/samples/tests/conftest.py index d80085dd3425..0fdacaaec6e0 100644 --- a/packages/google-cloud-bigquery/samples/tests/conftest.py +++ b/packages/google-cloud-bigquery/samples/tests/conftest.py @@ -126,7 +126,7 @@ def routine_id(client, dataset_id): bigquery.RoutineArgument( name="x", data_type=bigquery_v2.types.StandardSqlDataType( - type_kind=bigquery_v2.enums.StandardSqlDataType.TypeKind.INT64 + type_kind=bigquery_v2.types.StandardSqlDataType.TypeKind.INT64 ), ) ] diff --git a/packages/google-cloud-bigquery/samples/tests/test_routine_samples.py b/packages/google-cloud-bigquery/samples/tests/test_routine_samples.py index a4467c59a896..59ec1fae94ae 100644 --- a/packages/google-cloud-bigquery/samples/tests/test_routine_samples.py +++ b/packages/google-cloud-bigquery/samples/tests/test_routine_samples.py @@ -39,21 +39,21 @@ def test_create_routine_ddl(capsys, random_routine_id, client): bigquery.RoutineArgument( name="arr", data_type=bigquery_v2.types.StandardSqlDataType( - type_kind=bigquery_v2.enums.StandardSqlDataType.TypeKind.ARRAY, + type_kind=bigquery_v2.types.StandardSqlDataType.TypeKind.ARRAY, array_element_type=bigquery_v2.types.StandardSqlDataType( - type_kind=bigquery_v2.enums.StandardSqlDataType.TypeKind.STRUCT, + type_kind=bigquery_v2.types.StandardSqlDataType.TypeKind.STRUCT, struct_type=bigquery_v2.types.StandardSqlStructType( fields=[ bigquery_v2.types.StandardSqlField( name="name", type=bigquery_v2.types.StandardSqlDataType( - type_kind=bigquery_v2.enums.StandardSqlDataType.TypeKind.STRING + type_kind=bigquery_v2.types.StandardSqlDataType.TypeKind.STRING ), ), bigquery_v2.types.StandardSqlField( name="val", type=bigquery_v2.types.StandardSqlDataType( - type_kind=bigquery_v2.enums.StandardSqlDataType.TypeKind.INT64 + type_kind=bigquery_v2.types.StandardSqlDataType.TypeKind.INT64 ), ), ] diff --git a/packages/google-cloud-bigquery/scripts/fixup_bigquery_v2_keywords.py b/packages/google-cloud-bigquery/scripts/fixup_bigquery_v2_keywords.py new file mode 100644 index 000000000000..82b46d64e49e --- /dev/null +++ b/packages/google-cloud-bigquery/scripts/fixup_bigquery_v2_keywords.py @@ -0,0 +1,181 @@ +# -*- coding: utf-8 -*- + +# Copyright 2020 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import argparse +import os +import libcst as cst +import pathlib +import sys +from typing import (Any, Callable, Dict, List, Sequence, Tuple) + + +def partition( + predicate: Callable[[Any], bool], + iterator: Sequence[Any] +) -> Tuple[List[Any], List[Any]]: + """A stable, out-of-place partition.""" + results = ([], []) + + for i in iterator: + results[int(predicate(i))].append(i) + + # Returns trueList, falseList + return results[1], results[0] + + +class bigqueryCallTransformer(cst.CSTTransformer): + CTRL_PARAMS: Tuple[str] = ('retry', 'timeout', 'metadata') + METHOD_TO_PARAMS: Dict[str, Tuple[str]] = { + 'delete_model': ('project_id', 'dataset_id', 'model_id', ), + 'get_model': ('project_id', 'dataset_id', 'model_id', ), + 'list_models': ('project_id', 'dataset_id', 'max_results', 'page_token', ), + 'patch_model': ('project_id', 'dataset_id', 'model_id', 'model', ), + + } + + def leave_Call(self, original: cst.Call, updated: cst.Call) -> cst.CSTNode: + try: + key = original.func.attr.value + kword_params = self.METHOD_TO_PARAMS[key] + except (AttributeError, KeyError): + # Either not a method from the API or too convoluted to be sure. + return updated + + # If the existing code is valid, keyword args come after positional args. + # Therefore, all positional args must map to the first parameters. + args, kwargs = partition(lambda a: not bool(a.keyword), updated.args) + if any(k.keyword.value == "request" for k in kwargs): + # We've already fixed this file, don't fix it again. + return updated + + kwargs, ctrl_kwargs = partition( + lambda a: not a.keyword.value in self.CTRL_PARAMS, + kwargs + ) + + args, ctrl_args = args[:len(kword_params)], args[len(kword_params):] + ctrl_kwargs.extend(cst.Arg(value=a.value, keyword=cst.Name(value=ctrl)) + for a, ctrl in zip(ctrl_args, self.CTRL_PARAMS)) + + request_arg = cst.Arg( + value=cst.Dict([ + cst.DictElement( + cst.SimpleString("'{}'".format(name)), + cst.Element(value=arg.value) + ) + # Note: the args + kwargs looks silly, but keep in mind that + # the control parameters had to be stripped out, and that + # those could have been passed positionally or by keyword. + for name, arg in zip(kword_params, args + kwargs)]), + keyword=cst.Name("request") + ) + + return updated.with_changes( + args=[request_arg] + ctrl_kwargs + ) + + +def fix_files( + in_dir: pathlib.Path, + out_dir: pathlib.Path, + *, + transformer=bigqueryCallTransformer(), +): + """Duplicate the input dir to the output dir, fixing file method calls. + + Preconditions: + * in_dir is a real directory + * out_dir is a real, empty directory + """ + pyfile_gen = ( + pathlib.Path(os.path.join(root, f)) + for root, _, files in os.walk(in_dir) + for f in files if os.path.splitext(f)[1] == ".py" + ) + + for fpath in pyfile_gen: + with open(fpath, 'r') as f: + src = f.read() + + # Parse the code and insert method call fixes. + tree = cst.parse_module(src) + updated = tree.visit(transformer) + + # Create the path and directory structure for the new file. + updated_path = out_dir.joinpath(fpath.relative_to(in_dir)) + updated_path.parent.mkdir(parents=True, exist_ok=True) + + # Generate the updated source file at the corresponding path. + with open(updated_path, 'w') as f: + f.write(updated.code) + + +if __name__ == '__main__': + parser = argparse.ArgumentParser( + description="""Fix up source that uses the bigquery client library. + +The existing sources are NOT overwritten but are copied to output_dir with changes made. + +Note: This tool operates at a best-effort level at converting positional + parameters in client method calls to keyword based parameters. + Cases where it WILL FAIL include + A) * or ** expansion in a method call. + B) Calls via function or method alias (includes free function calls) + C) Indirect or dispatched calls (e.g. the method is looked up dynamically) + + These all constitute false negatives. The tool will also detect false + positives when an API method shares a name with another method. +""") + parser.add_argument( + '-d', + '--input-directory', + required=True, + dest='input_dir', + help='the input directory to walk for python files to fix up', + ) + parser.add_argument( + '-o', + '--output-directory', + required=True, + dest='output_dir', + help='the directory to output files fixed via un-flattening', + ) + args = parser.parse_args() + input_dir = pathlib.Path(args.input_dir) + output_dir = pathlib.Path(args.output_dir) + if not input_dir.is_dir(): + print( + f"input directory '{input_dir}' does not exist or is not a directory", + file=sys.stderr, + ) + sys.exit(-1) + + if not output_dir.is_dir(): + print( + f"output directory '{output_dir}' does not exist or is not a directory", + file=sys.stderr, + ) + sys.exit(-1) + + if os.listdir(output_dir): + print( + f"output directory '{output_dir}' is not empty", + file=sys.stderr, + ) + sys.exit(-1) + + fix_files(input_dir, output_dir) diff --git a/packages/google-cloud-bigquery/setup.py b/packages/google-cloud-bigquery/setup.py index 73d9a03cad05..2cb57aad2b4a 100644 --- a/packages/google-cloud-bigquery/setup.py +++ b/packages/google-cloud-bigquery/setup.py @@ -22,22 +22,23 @@ name = "google-cloud-bigquery" description = "Google BigQuery API client library" -version = "1.28.0" +version = "2.0.0" # Should be one of: # 'Development Status :: 3 - Alpha' # 'Development Status :: 4 - Beta' # 'Development Status :: 5 - Production/Stable' release_status = "Development Status :: 5 - Production/Stable" dependencies = [ - 'enum34; python_version < "3.4"', - "google-api-core >= 1.21.0, < 2.0dev", + "google-api-core[grpc] >= 1.22.2, < 2.0.0dev", + "proto-plus >= 1.10.0", + "libcst >= 0.2.5", "google-cloud-core >= 1.4.1, < 2.0dev", "google-resumable-media >= 0.6.0, < 2.0dev", "six >=1.13.0,< 2.0.0dev", ] extras = { "bqstorage": [ - "google-cloud-bigquery-storage >= 1.0.0, <2.0.0dev", + "google-cloud-bigquery-storage >= 2.0.0, <3.0.0dev", # Due to an issue in pip's dependency resolver, the `grpc` extra is not # installed, even though `google-cloud-bigquery-storage` specifies it # as `google-api-core[grpc]`. We thus need to explicitly specify it here. @@ -50,19 +51,10 @@ "pandas": ["pandas>=0.23.0"], "pyarrow": [ # pyarrow 1.0.0 is required for the use of timestamp_as_object keyword. - "pyarrow >= 1.0.0, < 2.0de ; python_version>='3.5'", - "pyarrow >= 0.16.0, < 0.17.0dev ; python_version<'3.5'", + "pyarrow >= 1.0.0, < 2.0dev", ], "tqdm": ["tqdm >= 4.7.4, <5.0.0dev"], - "fastparquet": [ - "fastparquet", - "python-snappy", - # llvmlite >= 0.32.0 cannot be installed on Python 3.5 and below - # (building the wheel fails), thus needs to be restricted. - # See: https://github.com/googleapis/python-bigquery/issues/78 - "llvmlite<=0.34.0;python_version>='3.6'", - "llvmlite<=0.31.0;python_version<'3.6'", - ], + "fastparquet": ["fastparquet", "python-snappy", "llvmlite>=0.34.0"], "opentelemetry": [ "opentelemetry-api==0.9b0", "opentelemetry-sdk==0.9b0", @@ -95,7 +87,9 @@ # Only include packages under the 'google' namespace. Do not include tests, # benchmarks, etc. packages = [ - package for package in setuptools.find_packages() if package.startswith("google") + package + for package in setuptools.PEP420PackageFinder.find() + if package.startswith("google") ] # Determine which namespaces are needed. @@ -118,10 +112,7 @@ "Intended Audience :: Developers", "License :: OSI Approved :: Apache Software License", "Programming Language :: Python", - "Programming Language :: Python :: 2", - "Programming Language :: Python :: 2.7", "Programming Language :: Python :: 3", - "Programming Language :: Python :: 3.5", "Programming Language :: Python :: 3.6", "Programming Language :: Python :: 3.7", "Programming Language :: Python :: 3.8", @@ -133,7 +124,8 @@ namespace_packages=namespaces, install_requires=dependencies, extras_require=extras, - python_requires=">=2.7,!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*", + python_requires=">=3.6", + scripts=["scripts/fixup_bigquery_v2_keywords.py"], include_package_data=True, zip_safe=False, ) diff --git a/packages/google-cloud-bigquery/synth.metadata b/packages/google-cloud-bigquery/synth.metadata index 7fdc4fb28c99..c47ff1e5170d 100644 --- a/packages/google-cloud-bigquery/synth.metadata +++ b/packages/google-cloud-bigquery/synth.metadata @@ -3,30 +3,15 @@ { "git": { "name": ".", - "remote": "https://github.com/googleapis/python-bigquery.git", - "sha": "b716e1c8ecd90142b498b95e7f8830835529cf4a" - } - }, - { - "git": { - "name": "googleapis", - "remote": "https://github.com/googleapis/googleapis.git", - "sha": "0dc0a6c0f1a9f979bc0690f0caa5fbafa3000c2c", - "internalRef": "327026955" + "remote": "git@github.com:plamut/python-bigquery.git", + "sha": "64d666033446f9af669bb8eb9170b8f62d6308e4" } }, { "git": { "name": "synthtool", "remote": "https://github.com/googleapis/synthtool.git", - "sha": "27f4406999b1eee29e04b09b2423a8e4646c7e24" - } - }, - { - "git": { - "name": "synthtool", - "remote": "https://github.com/googleapis/synthtool.git", - "sha": "27f4406999b1eee29e04b09b2423a8e4646c7e24" + "sha": "8a7a3021fe97aa0a3641db642fe2b767f1c8110f" } } ], @@ -40,89 +25,5 @@ "generator": "bazel" } } - ], - "generatedFiles": [ - ".coveragerc", - ".flake8", - ".github/CONTRIBUTING.md", - ".github/ISSUE_TEMPLATE/bug_report.md", - ".github/ISSUE_TEMPLATE/feature_request.md", - ".github/ISSUE_TEMPLATE/support_request.md", - ".github/PULL_REQUEST_TEMPLATE.md", - ".github/release-please.yml", - ".github/snippet-bot.yml", - ".gitignore", - ".kokoro/build.sh", - ".kokoro/continuous/common.cfg", - ".kokoro/continuous/continuous.cfg", - ".kokoro/docker/docs/Dockerfile", - ".kokoro/docker/docs/fetch_gpg_keys.sh", - ".kokoro/docs/common.cfg", - ".kokoro/docs/docs-presubmit.cfg", - ".kokoro/docs/docs.cfg", - ".kokoro/populate-secrets.sh", - ".kokoro/presubmit/common.cfg", - ".kokoro/presubmit/presubmit.cfg", - ".kokoro/presubmit/system-2.7.cfg", - ".kokoro/presubmit/system-3.8.cfg", - ".kokoro/publish-docs.sh", - ".kokoro/release.sh", - ".kokoro/release/common.cfg", - ".kokoro/release/release.cfg", - ".kokoro/samples/lint/common.cfg", - ".kokoro/samples/lint/continuous.cfg", - ".kokoro/samples/lint/periodic.cfg", - ".kokoro/samples/lint/presubmit.cfg", - ".kokoro/samples/python3.6/common.cfg", - ".kokoro/samples/python3.6/continuous.cfg", - ".kokoro/samples/python3.6/periodic.cfg", - ".kokoro/samples/python3.6/presubmit.cfg", - ".kokoro/samples/python3.7/common.cfg", - ".kokoro/samples/python3.7/continuous.cfg", - ".kokoro/samples/python3.7/periodic.cfg", - ".kokoro/samples/python3.7/presubmit.cfg", - ".kokoro/samples/python3.8/common.cfg", - ".kokoro/samples/python3.8/continuous.cfg", - ".kokoro/samples/python3.8/periodic.cfg", - ".kokoro/samples/python3.8/presubmit.cfg", - ".kokoro/test-samples.sh", - ".kokoro/trampoline.sh", - ".kokoro/trampoline_v2.sh", - ".trampolinerc", - "CODE_OF_CONDUCT.md", - "CONTRIBUTING.rst", - "LICENSE", - "MANIFEST.in", - "docs/_static/custom.css", - "docs/_templates/layout.html", - "docs/conf.py", - "google/cloud/bigquery_v2/gapic/enums.py", - "google/cloud/bigquery_v2/proto/encryption_config.proto", - "google/cloud/bigquery_v2/proto/encryption_config_pb2.py", - "google/cloud/bigquery_v2/proto/encryption_config_pb2_grpc.py", - "google/cloud/bigquery_v2/proto/model.proto", - "google/cloud/bigquery_v2/proto/model_pb2.py", - "google/cloud/bigquery_v2/proto/model_pb2_grpc.py", - "google/cloud/bigquery_v2/proto/model_reference.proto", - "google/cloud/bigquery_v2/proto/model_reference_pb2.py", - "google/cloud/bigquery_v2/proto/model_reference_pb2_grpc.py", - "google/cloud/bigquery_v2/proto/standard_sql.proto", - "google/cloud/bigquery_v2/proto/standard_sql_pb2.py", - "google/cloud/bigquery_v2/proto/standard_sql_pb2_grpc.py", - "google/cloud/bigquery_v2/types.py", - "renovate.json", - "samples/AUTHORING_GUIDE.md", - "samples/CONTRIBUTING.md", - "samples/snippets/README.rst", - "samples/snippets/noxfile.py", - "scripts/decrypt-secrets.sh", - "scripts/readme-gen/readme_gen.py", - "scripts/readme-gen/templates/README.tmpl.rst", - "scripts/readme-gen/templates/auth.tmpl.rst", - "scripts/readme-gen/templates/auth_api_key.tmpl.rst", - "scripts/readme-gen/templates/install_deps.tmpl.rst", - "scripts/readme-gen/templates/install_portaudio.tmpl.rst", - "setup.cfg", - "testing/.gitignore" ] } \ No newline at end of file diff --git a/packages/google-cloud-bigquery/synth.py b/packages/google-cloud-bigquery/synth.py index ac20c9aec32a..501380be2966 100644 --- a/packages/google-cloud-bigquery/synth.py +++ b/packages/google-cloud-bigquery/synth.py @@ -20,56 +20,73 @@ gapic = gcp.GAPICBazel() common = gcp.CommonTemplates() -version = 'v2' +version = "v2" library = gapic.py_library( - service='bigquery', + service="bigquery", version=version, bazel_target=f"//google/cloud/bigquery/{version}:bigquery-{version}-py", include_protos=True, ) s.move( - [ - library / "google/cloud/bigquery_v2/gapic/enums.py", - library / "google/cloud/bigquery_v2/types.py", - library / "google/cloud/bigquery_v2/proto/location*", - library / "google/cloud/bigquery_v2/proto/encryption_config*", - library / "google/cloud/bigquery_v2/proto/model*", - library / "google/cloud/bigquery_v2/proto/standard_sql*", + library, + excludes=[ + "docs/index.rst", + "README.rst", + "noxfile.py", + "setup.py", + library / f"google/cloud/bigquery/__init__.py", + library / f"google/cloud/bigquery/py.typed", + # There are no public API endpoints for the generated ModelServiceClient, + # thus there's no point in generating it and its tests. + library / f"google/cloud/bigquery_{version}/services/**", + library / f"tests/unit/gapic/bigquery_{version}/**", ], ) -# Fix up proto docs that are missing summary line. -s.replace( - "google/cloud/bigquery_v2/proto/model_pb2.py", - '"""Attributes:', - '"""Protocol buffer.\n\n Attributes:', -) -s.replace( - "google/cloud/bigquery_v2/proto/encryption_config_pb2.py", - '"""Attributes:', - '"""Encryption configuration.\n\n Attributes:', -) - -# Remove non-ascii characters from docstrings for Python 2.7. -# Format quoted strings as plain text. -s.replace("google/cloud/bigquery_v2/proto/*.py", "[“”]", '``') - # ---------------------------------------------------------------------------- # Add templated files # ---------------------------------------------------------------------------- -templated_files = common.py_library(cov_level=100, samples=True, split_system_tests=True) +templated_files = common.py_library( + cov_level=100, + samples=True, + microgenerator=True, + split_system_tests=True, +) # BigQuery has a custom multiprocessing note -s.move(templated_files, excludes=["noxfile.py", "docs/multiprocessing.rst"]) +s.move( + templated_files, + excludes=["noxfile.py", "docs/multiprocessing.rst", ".coveragerc"] +) # ---------------------------------------------------------------------------- # Samples templates # ---------------------------------------------------------------------------- -python.py_samples() +# python.py_samples() # TODO: why doesn't this work here with Bazel? + +# Do not expose ModelServiceClient, as there is no public API endpoint for the +# models service. +s.replace( + "google/cloud/bigquery_v2/__init__.py", + r"from \.services\.model_service import ModelServiceClient", + "", +) +s.replace( + "google/cloud/bigquery_v2/__init__.py", + r"""["']ModelServiceClient["'],""", + "", +) +# Adjust Model docstring so that Sphinx does not think that "predicted_" is +# a reference to something, issuing a false warning. +s.replace( + "google/cloud/bigquery_v2/types/model.py", + r'will have a "predicted_"', + "will have a `predicted_`", +) s.replace( "docs/conf.py", @@ -77,4 +94,11 @@ '{"members": True, "inherited-members": True}' ) +# Tell Sphinx to ingore autogenerated docs files. +s.replace( + "docs/conf.py", + r'"samples/snippets/README\.rst",', + '\g<0>\n "bigquery_v2/services.rst", # generated by the code generator', +) + s.shell.run(["nox", "-s", "blacken"], hide_output=False) diff --git a/packages/google-cloud-bigquery/testing/constraints-2.7.txt b/packages/google-cloud-bigquery/testing/constraints-2.7.txt deleted file mode 100644 index fafbaa27f0e3..000000000000 --- a/packages/google-cloud-bigquery/testing/constraints-2.7.txt +++ /dev/null @@ -1,9 +0,0 @@ -google-api-core==1.21.0 -google-cloud-core==1.4.1 -google-cloud-storage==1.30.0 -google-resumable-media==0.6.0 -ipython==5.5 -pandas==0.23.0 -pyarrow==0.16.0 -six==1.13.0 -tqdm==4.7.4 \ No newline at end of file diff --git a/packages/google-cloud-bigquery/testing/constraints-3.5.txt b/packages/google-cloud-bigquery/testing/constraints-3.5.txt deleted file mode 100644 index a262dbe5f2a4..000000000000 --- a/packages/google-cloud-bigquery/testing/constraints-3.5.txt +++ /dev/null @@ -1,12 +0,0 @@ -google-api-core==1.21.0 -google-cloud-bigquery-storage==1.0.0 -google-cloud-core==1.4.1 -google-resumable-media==0.6.0 -google-cloud-storage==1.30.0 -grpcio==1.32.0 -ipython==5.5 -# pandas 0.23.0 is the first version to work with pyarrow to_pandas. -pandas==0.23.0 -pyarrow==1.0.0 -six==1.13.0 -tqdm==4.7.4 \ No newline at end of file diff --git a/packages/google-cloud-bigquery/testing/constraints-3.6.txt b/packages/google-cloud-bigquery/testing/constraints-3.6.txt index e69de29bb2d1..a9f4faa92a31 100644 --- a/packages/google-cloud-bigquery/testing/constraints-3.6.txt +++ b/packages/google-cloud-bigquery/testing/constraints-3.6.txt @@ -0,0 +1,16 @@ +fastparquet==0.4.1 +google-api-core==1.22.2 +google-cloud-bigquery-storage==2.0.0 +google-cloud-core==1.4.1 +google-resumable-media==0.6.0 +grpcio==1.32.0 +ipython==5.5 +libcst==0.2.5 +llvmlite==0.34.0 +# pandas 0.23.0 is the first version to work with pyarrow to_pandas. +pandas==0.23.0 +proto-plus==1.10.0 +pyarrow==1.0.0 +python-snappy==0.5.4 +six==1.13.0 +tqdm==4.7.4 diff --git a/packages/google-cloud-bigquery/tests/system.py b/packages/google-cloud-bigquery/tests/system.py index 02cc8e139302..68fcb918c936 100644 --- a/packages/google-cloud-bigquery/tests/system.py +++ b/packages/google-cloud-bigquery/tests/system.py @@ -34,11 +34,9 @@ import pkg_resources try: - from google.cloud import bigquery_storage_v1 - from google.cloud import bigquery_storage_v1beta1 + from google.cloud import bigquery_storage except ImportError: # pragma: NO COVER - bigquery_storage_v1 = None - bigquery_storage_v1beta1 = None + bigquery_storage = None try: import fastavro # to parse BQ storage client results @@ -1793,57 +1791,11 @@ def test_dbapi_fetchall(self): self.assertEqual(row_tuples, [(1, 2), (3, 4), (5, 6)]) @unittest.skipIf( - bigquery_storage_v1 is None, "Requires `google-cloud-bigquery-storage`" + bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" ) @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_dbapi_fetch_w_bqstorage_client_large_result_set(self): - bqstorage_client = bigquery_storage_v1.BigQueryReadClient( - credentials=Config.CLIENT._credentials - ) - cursor = dbapi.connect(Config.CLIENT, bqstorage_client).cursor() - - cursor.execute( - """ - SELECT id, `by`, time_ts - FROM `bigquery-public-data.hacker_news.comments` - ORDER BY `id` ASC - LIMIT 100000 - """ - ) - - result_rows = [cursor.fetchone(), cursor.fetchone(), cursor.fetchone()] - - field_name = operator.itemgetter(0) - fetched_data = [sorted(row.items(), key=field_name) for row in result_rows] - - # Since DB API is not thread safe, only a single result stream should be - # requested by the BQ storage client, meaning that results should arrive - # in the sorted order. - expected_data = [ - [ - ("by", "sama"), - ("id", 15), - ("time_ts", datetime.datetime(2006, 10, 9, 19, 51, 1, tzinfo=UTC)), - ], - [ - ("by", "pg"), - ("id", 17), - ("time_ts", datetime.datetime(2006, 10, 9, 19, 52, 45, tzinfo=UTC)), - ], - [ - ("by", "pg"), - ("id", 22), - ("time_ts", datetime.datetime(2006, 10, 10, 2, 18, 22, tzinfo=UTC)), - ], - ] - self.assertEqual(fetched_data, expected_data) - - @unittest.skipIf( - bigquery_storage_v1beta1 is None, "Requires `google-cloud-bigquery-storage`" - ) - @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") - def test_dbapi_fetch_w_bqstorage_client_v1beta1_large_result_set(self): - bqstorage_client = bigquery_storage_v1beta1.BigQueryStorageClient( + bqstorage_client = bigquery_storage.BigQueryReadClient( credentials=Config.CLIENT._credentials ) cursor = dbapi.connect(Config.CLIENT, bqstorage_client).cursor() @@ -1901,7 +1853,7 @@ def test_dbapi_dry_run_query(self): self.assertEqual(list(rows), []) @unittest.skipIf( - bigquery_storage_v1 is None, "Requires `google-cloud-bigquery-storage`" + bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" ) def test_dbapi_connection_does_not_leak_sockets(self): current_process = psutil.Process() @@ -2331,7 +2283,7 @@ def test_query_results_to_dataframe(self): @unittest.skipIf(pandas is None, "Requires `pandas`") @unittest.skipIf( - bigquery_storage_v1 is None, "Requires `google-cloud-bigquery-storage`" + bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" ) def test_query_results_to_dataframe_w_bqstorage(self): query = """ @@ -2340,40 +2292,7 @@ def test_query_results_to_dataframe_w_bqstorage(self): LIMIT 10 """ - bqstorage_client = bigquery_storage_v1.BigQueryReadClient( - credentials=Config.CLIENT._credentials - ) - - df = Config.CLIENT.query(query).result().to_dataframe(bqstorage_client) - - self.assertIsInstance(df, pandas.DataFrame) - self.assertEqual(len(df), 10) # verify the number of rows - column_names = ["id", "author", "time_ts", "dead"] - self.assertEqual(list(df), column_names) - exp_datatypes = { - "id": int, - "author": six.text_type, - "time_ts": pandas.Timestamp, - "dead": bool, - } - for index, row in df.iterrows(): - for col in column_names: - # all the schema fields are nullable, so None is acceptable - if not row[col] is None: - self.assertIsInstance(row[col], exp_datatypes[col]) - - @unittest.skipIf(pandas is None, "Requires `pandas`") - @unittest.skipIf( - bigquery_storage_v1beta1 is None, "Requires `google-cloud-bigquery-storage`" - ) - def test_query_results_to_dataframe_w_bqstorage_v1beta1(self): - query = """ - SELECT id, author, time_ts, dead - FROM `bigquery-public-data.hacker_news.comments` - LIMIT 10 - """ - - bqstorage_client = bigquery_storage_v1beta1.BigQueryStorageClient( + bqstorage_client = bigquery_storage.BigQueryReadClient( credentials=Config.CLIENT._credentials ) @@ -2569,7 +2488,7 @@ def test_create_routine(self): routine_name = "test_routine" dataset = self.temp_dataset(_make_dataset_id("create_routine")) float64_type = bigquery_v2.types.StandardSqlDataType( - type_kind=bigquery_v2.enums.StandardSqlDataType.TypeKind.FLOAT64 + type_kind=bigquery_v2.types.StandardSqlDataType.TypeKind.FLOAT64 ) routine = bigquery.Routine( dataset.routine(routine_name), @@ -2584,7 +2503,7 @@ def test_create_routine(self): bigquery.RoutineArgument( name="arr", data_type=bigquery_v2.types.StandardSqlDataType( - type_kind=bigquery_v2.enums.StandardSqlDataType.TypeKind.ARRAY, + type_kind=bigquery_v2.types.StandardSqlDataType.TypeKind.ARRAY, array_element_type=float64_type, ), ) @@ -2663,7 +2582,7 @@ def _fetch_dataframe(self, query): @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") @unittest.skipIf( - bigquery_storage_v1 is None, "Requires `google-cloud-bigquery-storage`" + bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" ) def test_nested_table_to_arrow(self): from google.cloud.bigquery.job import SourceFormat @@ -2699,7 +2618,7 @@ def test_nested_table_to_arrow(self): job_config.schema = schema # Load a table using a local JSON file from memory. Config.CLIENT.load_table_from_file(body, table, job_config=job_config).result() - bqstorage_client = bigquery_storage_v1.BigQueryReadClient( + bqstorage_client = bigquery_storage.BigQueryReadClient( credentials=Config.CLIENT._credentials ) @@ -2855,13 +2774,13 @@ def test_list_rows_page_size(self): @unittest.skipIf(pandas is None, "Requires `pandas`") @unittest.skipIf( - bigquery_storage_v1 is None, "Requires `google-cloud-bigquery-storage`" + bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" ) def test_list_rows_max_results_w_bqstorage(self): table_ref = DatasetReference("bigquery-public-data", "utility_us").table( "country_code_iso" ) - bqstorage_client = bigquery_storage_v1.BigQueryReadClient( + bqstorage_client = bigquery_storage.BigQueryReadClient( credentials=Config.CLIENT._credentials ) diff --git a/packages/google-cloud-bigquery/tests/unit/enums/test_standard_sql_data_types.py b/packages/google-cloud-bigquery/tests/unit/enums/test_standard_sql_data_types.py index 6fa4f057fb98..7f62c46fd34d 100644 --- a/packages/google-cloud-bigquery/tests/unit/enums/test_standard_sql_data_types.py +++ b/packages/google-cloud-bigquery/tests/unit/enums/test_standard_sql_data_types.py @@ -32,7 +32,7 @@ def enum_under_test(): @pytest.fixture def gapic_enum(): """The referential autogenerated enum the enum under test is based on.""" - from google.cloud.bigquery_v2.gapic.enums import StandardSqlDataType + from google.cloud.bigquery_v2.types import StandardSqlDataType return StandardSqlDataType.TypeKind @@ -61,7 +61,10 @@ def test_standard_sql_types_enum_members(enum_under_test, gapic_enum): assert name not in enum_under_test.__members__ -def test_standard_sql_types_enum_docstring(enum_under_test, gapic_enum): +@pytest.mark.skip(reason="Code generator issue, the docstring is not generated.") +def test_standard_sql_types_enum_docstring( + enum_under_test, gapic_enum +): # pragma: NO COVER assert "STRUCT (int):" not in enum_under_test.__doc__ assert "BOOL (int):" in enum_under_test.__doc__ assert "TIME (int):" in enum_under_test.__doc__ diff --git a/packages/google-cloud-bigquery/tests/unit/model/test_model.py b/packages/google-cloud-bigquery/tests/unit/model/test_model.py index 90fc09e66ab9..2c0079429864 100644 --- a/packages/google-cloud-bigquery/tests/unit/model/test_model.py +++ b/packages/google-cloud-bigquery/tests/unit/model/test_model.py @@ -19,7 +19,7 @@ import pytest import google.cloud._helpers -from google.cloud.bigquery_v2.gapic import enums +from google.cloud.bigquery_v2 import types KMS_KEY_NAME = "projects/1/locations/us/keyRings/1/cryptoKeys/1" @@ -117,7 +117,7 @@ def test_from_api_repr(target_class): assert got.expires == expiration_time assert got.description == u"A friendly description." assert got.friendly_name == u"A friendly name." - assert got.model_type == enums.Model.ModelType.LOGISTIC_REGRESSION + assert got.model_type == types.Model.ModelType.LOGISTIC_REGRESSION assert got.labels == {"greeting": u"こんにちは"} assert got.encryption_configuration.kms_key_name == KMS_KEY_NAME assert got.training_runs[0].training_options.initial_learn_rate == 1.0 @@ -162,7 +162,7 @@ def test_from_api_repr_w_minimal_resource(target_class): assert got.expires is None assert got.description is None assert got.friendly_name is None - assert got.model_type == enums.Model.ModelType.MODEL_TYPE_UNSPECIFIED + assert got.model_type == types.Model.ModelType.MODEL_TYPE_UNSPECIFIED assert got.labels == {} assert got.encryption_configuration is None assert len(got.training_runs) == 0 diff --git a/packages/google-cloud-bigquery/tests/unit/routine/test_routine.py b/packages/google-cloud-bigquery/tests/unit/routine/test_routine.py index 02f703535227..b02ace1db813 100644 --- a/packages/google-cloud-bigquery/tests/unit/routine/test_routine.py +++ b/packages/google-cloud-bigquery/tests/unit/routine/test_routine.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- # # Copyright 2019 Google LLC # @@ -63,14 +62,14 @@ def test_ctor_w_properties(target_class): RoutineArgument( name="x", data_type=bigquery_v2.types.StandardSqlDataType( - type_kind=bigquery_v2.enums.StandardSqlDataType.TypeKind.INT64 + type_kind=bigquery_v2.types.StandardSqlDataType.TypeKind.INT64 ), ) ] body = "x * 3" language = "SQL" return_type = bigquery_v2.types.StandardSqlDataType( - type_kind=bigquery_v2.enums.StandardSqlDataType.TypeKind.INT64 + type_kind=bigquery_v2.types.StandardSqlDataType.TypeKind.INT64 ) type_ = "SCALAR_FUNCTION" description = "A routine description." @@ -141,14 +140,14 @@ def test_from_api_repr(target_class): RoutineArgument( name="x", data_type=bigquery_v2.types.StandardSqlDataType( - type_kind=bigquery_v2.enums.StandardSqlDataType.TypeKind.INT64 + type_kind=bigquery_v2.types.StandardSqlDataType.TypeKind.INT64 ), ) ] assert actual_routine.body == "42" assert actual_routine.language == "SQL" assert actual_routine.return_type == bigquery_v2.types.StandardSqlDataType( - type_kind=bigquery_v2.enums.StandardSqlDataType.TypeKind.INT64 + type_kind=bigquery_v2.types.StandardSqlDataType.TypeKind.INT64 ) assert actual_routine.type_ == "SCALAR_FUNCTION" assert actual_routine._properties["someNewField"] == "someValue" diff --git a/packages/google-cloud-bigquery/tests/unit/routine/test_routine_argument.py b/packages/google-cloud-bigquery/tests/unit/routine/test_routine_argument.py index 7d17b5fc703f..e3bda95391fa 100644 --- a/packages/google-cloud-bigquery/tests/unit/routine/test_routine_argument.py +++ b/packages/google-cloud-bigquery/tests/unit/routine/test_routine_argument.py @@ -28,7 +28,7 @@ def target_class(): def test_ctor(target_class): data_type = bigquery_v2.types.StandardSqlDataType( - type_kind=bigquery_v2.enums.StandardSqlDataType.TypeKind.INT64 + type_kind=bigquery_v2.types.StandardSqlDataType.TypeKind.INT64 ) actual_arg = target_class( name="field_name", kind="FIXED_TYPE", mode="IN", data_type=data_type @@ -51,7 +51,7 @@ def test_from_api_repr(target_class): assert actual_arg.kind == "FIXED_TYPE" assert actual_arg.mode == "IN" assert actual_arg.data_type == bigquery_v2.types.StandardSqlDataType( - type_kind=bigquery_v2.enums.StandardSqlDataType.TypeKind.INT64 + type_kind=bigquery_v2.types.StandardSqlDataType.TypeKind.INT64 ) @@ -72,7 +72,7 @@ def test_from_api_repr_w_unknown_fields(target_class): def test_eq(target_class): data_type = bigquery_v2.types.StandardSqlDataType( - type_kind=bigquery_v2.enums.StandardSqlDataType.TypeKind.INT64 + type_kind=bigquery_v2.types.StandardSqlDataType.TypeKind.INT64 ) arg = target_class( name="field_name", kind="FIXED_TYPE", mode="IN", data_type=data_type diff --git a/packages/google-cloud-bigquery/tests/unit/test__pandas_helpers.py b/packages/google-cloud-bigquery/tests/unit/test__pandas_helpers.py index f4355072aed0..c1073066d479 100644 --- a/packages/google-cloud-bigquery/tests/unit/test__pandas_helpers.py +++ b/packages/google-cloud-bigquery/tests/unit/test__pandas_helpers.py @@ -20,7 +20,6 @@ import warnings import mock -import six try: import pandas @@ -300,10 +299,7 @@ def test_bq_to_arrow_data_type_w_struct(module_under_test, bq_type): ) ) assert pyarrow.types.is_struct(actual) - try: - assert actual.num_fields == len(fields) - except AttributeError: # py27 - assert actual.num_children == len(fields) + assert actual.num_fields == len(fields) assert actual.equals(expected) @@ -348,10 +344,7 @@ def test_bq_to_arrow_data_type_w_array_struct(module_under_test, bq_type): ) assert pyarrow.types.is_list(actual) assert pyarrow.types.is_struct(actual.value_type) - try: - assert actual.value_type.num_fields == len(fields) - except AttributeError: # py27 - assert actual.value_type.num_children == len(fields) + assert actual.value_type.num_fields == len(fields) assert actual.value_type.equals(expected_value_type) @@ -553,12 +546,9 @@ def test_bq_to_arrow_schema_w_unknown_type(module_under_test): actual = module_under_test.bq_to_arrow_schema(fields) assert actual is None - if six.PY3: - assert len(warned) == 1 - warning = warned[0] - assert "field3" in str(warning) - else: - assert len(warned) == 0 + assert len(warned) == 1 + warning = warned[0] + assert "field3" in str(warning) @pytest.mark.skipif(pandas is None, reason="Requires `pandas`") @@ -773,26 +763,6 @@ def test_dataframe_to_bq_schema_dict_sequence(module_under_test): assert returned_schema == expected_schema -@pytest.mark.skipif(pandas is None, reason="Requires `pandas`") -@pytest.mark.skipif(not six.PY2, reason="Requires Python 2.7") -def test_dataframe_to_bq_schema_w_struct_raises_py27(module_under_test): - dataframe = pandas.DataFrame( - data=[{"struct_field": {"int_col": 1}}, {"struct_field": {"int_col": 2}}] - ) - bq_schema = [ - schema.SchemaField( - "struct_field", - field_type="STRUCT", - fields=[schema.SchemaField("int_col", field_type="INT64")], - ), - ] - - with pytest.raises(ValueError) as excinfo: - module_under_test.dataframe_to_bq_schema(dataframe, bq_schema=bq_schema) - - assert "struct (record) column types is not supported" in str(excinfo.value) - - @pytest.mark.skipif(pandas is None, reason="Requires `pandas`") @pytest.mark.skipif(isinstance(pyarrow, mock.Mock), reason="Requires `pyarrow`") def test_dataframe_to_arrow_with_multiindex(module_under_test): diff --git a/packages/google-cloud-bigquery/tests/unit/test_client.py b/packages/google-cloud-bigquery/tests/unit/test_client.py index c4c604ed0bc3..f44201ab854d 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_client.py +++ b/packages/google-cloud-bigquery/tests/unit/test_client.py @@ -48,7 +48,7 @@ from opentelemetry.sdk.trace.export.in_memory_span_exporter import ( InMemorySpanExporter, ) -except (ImportError, AttributeError): +except (ImportError, AttributeError): # pragma: NO COVER opentelemetry = None try: import pyarrow @@ -62,9 +62,9 @@ from google.cloud.bigquery.dataset import DatasetReference try: - from google.cloud import bigquery_storage_v1 + from google.cloud import bigquery_storage except (ImportError, AttributeError): # pragma: NO COVER - bigquery_storage_v1 = None + bigquery_storage = None from test_utils.imports import maybe_fail_import from tests.unit.helpers import make_connection @@ -794,17 +794,17 @@ def test_get_dataset(self): self.assertEqual(dataset.dataset_id, self.DS_ID) @unittest.skipIf( - bigquery_storage_v1 is None, "Requires `google-cloud-bigquery-storage`" + bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" ) def test_create_bqstorage_client(self): - mock_client = mock.create_autospec(bigquery_storage_v1.BigQueryReadClient) + mock_client = mock.create_autospec(bigquery_storage.BigQueryReadClient) mock_client_instance = object() mock_client.return_value = mock_client_instance creds = _make_credentials() client = self._make_one(project=self.PROJECT, credentials=creds) with mock.patch( - "google.cloud.bigquery_storage_v1.BigQueryReadClient", mock_client + "google.cloud.bigquery_storage.BigQueryReadClient", mock_client ): bqstorage_client = client._create_bqstorage_client() @@ -817,8 +817,8 @@ def test_create_bqstorage_client_missing_dependency(self): def fail_bqstorage_import(name, globals, locals, fromlist, level): # NOTE: *very* simplified, assuming a straightforward absolute import - return "bigquery_storage_v1" in name or ( - fromlist is not None and "bigquery_storage_v1" in fromlist + return "bigquery_storage" in name or ( + fromlist is not None and "bigquery_storage" in fromlist ) no_bqstorage = maybe_fail_import(predicate=fail_bqstorage_import) @@ -2499,7 +2499,7 @@ def test_update_routine(self): RoutineArgument( name="x", data_type=bigquery_v2.types.StandardSqlDataType( - type_kind=bigquery_v2.enums.StandardSqlDataType.TypeKind.INT64 + type_kind=bigquery_v2.types.StandardSqlDataType.TypeKind.INT64 ), ) ] @@ -8032,49 +8032,35 @@ def test_load_table_from_dataframe_struct_fields(self): "google.cloud.bigquery.client.Client.load_table_from_file", autospec=True ) - if six.PY2: - with pytest.raises(ValueError) as exc_info, load_patch: - client.load_table_from_dataframe( - dataframe, - self.TABLE_REF, - job_config=job_config, - location=self.LOCATION, - ) - - err_msg = str(exc_info.value) - assert "struct" in err_msg - assert "not support" in err_msg - - else: - get_table_patch = mock.patch( - "google.cloud.bigquery.client.Client.get_table", - autospec=True, - side_effect=google.api_core.exceptions.NotFound("Table not found"), - ) - with load_patch as load_table_from_file, get_table_patch: - client.load_table_from_dataframe( - dataframe, - self.TABLE_REF, - job_config=job_config, - location=self.LOCATION, - ) - - load_table_from_file.assert_called_once_with( - client, - mock.ANY, + get_table_patch = mock.patch( + "google.cloud.bigquery.client.Client.get_table", + autospec=True, + side_effect=google.api_core.exceptions.NotFound("Table not found"), + ) + with load_patch as load_table_from_file, get_table_patch: + client.load_table_from_dataframe( + dataframe, self.TABLE_REF, - num_retries=_DEFAULT_NUM_RETRIES, - rewind=True, - job_id=mock.ANY, - job_id_prefix=None, + job_config=job_config, location=self.LOCATION, - project=None, - job_config=mock.ANY, ) - sent_config = load_table_from_file.mock_calls[0][2]["job_config"] - assert sent_config.source_format == job.SourceFormat.PARQUET - assert sent_config.schema == schema + load_table_from_file.assert_called_once_with( + client, + mock.ANY, + self.TABLE_REF, + num_retries=_DEFAULT_NUM_RETRIES, + rewind=True, + job_id=mock.ANY, + job_id_prefix=None, + location=self.LOCATION, + project=None, + job_config=mock.ANY, + ) + + sent_config = load_table_from_file.mock_calls[0][2]["job_config"] + assert sent_config.source_format == job.SourceFormat.PARQUET + assert sent_config.schema == schema @unittest.skipIf(pandas is None, "Requires `pandas`") @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") @@ -8671,14 +8657,9 @@ def test_schema_from_json_with_file_path(self): client = self._make_client() mock_file_path = "/mocked/file.json" - if six.PY2: - open_patch = mock.patch( - "__builtin__.open", mock.mock_open(read_data=file_content) - ) - else: - open_patch = mock.patch( - "builtins.open", new=mock.mock_open(read_data=file_content) - ) + open_patch = mock.patch( + "builtins.open", new=mock.mock_open(read_data=file_content) + ) with open_patch as _mock_file: actual = client.schema_from_json(mock_file_path) @@ -8720,12 +8701,7 @@ def test_schema_from_json_with_file_object(self): ] client = self._make_client() - - if six.PY2: - fake_file = io.BytesIO(file_content) - else: - fake_file = io.StringIO(file_content) - + fake_file = io.StringIO(file_content) actual = client.schema_from_json(fake_file) assert expected == actual @@ -8762,11 +8738,7 @@ def test_schema_to_json_with_file_path(self): client = self._make_client() mock_file_path = "/mocked/file.json" - - if six.PY2: - open_patch = mock.patch("__builtin__.open", mock.mock_open()) - else: - open_patch = mock.patch("builtins.open", mock.mock_open()) + open_patch = mock.patch("builtins.open", mock.mock_open()) with open_patch as mock_file, mock.patch("json.dump") as mock_dump: client.schema_to_json(schema_list, mock_file_path) @@ -8808,10 +8780,7 @@ def test_schema_to_json_with_file_object(self): SchemaField("sales", "FLOAT", "NULLABLE", "total sales"), ] - if six.PY2: - fake_file = io.BytesIO() - else: - fake_file = io.StringIO() + fake_file = io.StringIO() client = self._make_client() diff --git a/packages/google-cloud-bigquery/tests/unit/test_dbapi_connection.py b/packages/google-cloud-bigquery/tests/unit/test_dbapi_connection.py index 0f1be45ee562..30fb1292e7d0 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_dbapi_connection.py +++ b/packages/google-cloud-bigquery/tests/unit/test_dbapi_connection.py @@ -19,9 +19,9 @@ import six try: - from google.cloud import bigquery_storage_v1 + from google.cloud import bigquery_storage except ImportError: # pragma: NO COVER - bigquery_storage_v1 = None + bigquery_storage = None class TestConnection(unittest.TestCase): @@ -41,29 +41,26 @@ def _mock_client(self): return mock_client def _mock_bqstorage_client(self): - if bigquery_storage_v1 is None: - return None - mock_client = mock.create_autospec( - bigquery_storage_v1.client.BigQueryReadClient - ) - mock_client.transport = mock.Mock(spec=["channel"]) - mock_client.transport.channel = mock.Mock(spec=["close"]) + # Assumption: bigquery_storage exists. It's the test's responisbility to + # not use this helper or skip itself if bqstroage is not installed. + mock_client = mock.create_autospec(bigquery_storage.BigQueryReadClient) + mock_client._transport = mock.Mock(spec=["channel"]) + mock_client._transport.grpc_channel = mock.Mock(spec=["close"]) return mock_client def test_ctor_wo_bqstorage_client(self): from google.cloud.bigquery.dbapi import Connection mock_client = self._mock_client() - mock_bqstorage_client = self._mock_bqstorage_client() - mock_client._create_bqstorage_client.return_value = mock_bqstorage_client + mock_client._create_bqstorage_client.return_value = None connection = self._make_one(client=mock_client) self.assertIsInstance(connection, Connection) self.assertIs(connection._client, mock_client) - self.assertIs(connection._bqstorage_client, mock_bqstorage_client) + self.assertIs(connection._bqstorage_client, None) @unittest.skipIf( - bigquery_storage_v1 is None, "Requires `google-cloud-bigquery-storage`" + bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" ) def test_ctor_w_bqstorage_client(self): from google.cloud.bigquery.dbapi import Connection @@ -87,6 +84,9 @@ def test_connect_wo_client(self, mock_client): self.assertIsNotNone(connection._client) self.assertIsNotNone(connection._bqstorage_client) + @unittest.skipIf( + bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" + ) def test_connect_w_client(self): from google.cloud.bigquery.dbapi import connect from google.cloud.bigquery.dbapi import Connection @@ -101,7 +101,7 @@ def test_connect_w_client(self): self.assertIs(connection._bqstorage_client, mock_bqstorage_client) @unittest.skipIf( - bigquery_storage_v1 is None, "Requires `google-cloud-bigquery-storage`" + bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" ) def test_connect_w_both_clients(self): from google.cloud.bigquery.dbapi import connect @@ -130,7 +130,7 @@ def test_raises_error_if_closed(self): getattr(connection, method)() @unittest.skipIf( - bigquery_storage_v1 is None, "Requires `google-cloud-bigquery-storage`" + bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" ) def test_close_closes_all_created_bigquery_clients(self): client = self._mock_client() @@ -150,10 +150,10 @@ def test_close_closes_all_created_bigquery_clients(self): connection.close() self.assertTrue(client.close.called) - self.assertTrue(bqstorage_client.transport.channel.close.called) + self.assertTrue(bqstorage_client._transport.grpc_channel.close.called) @unittest.skipIf( - bigquery_storage_v1 is None, "Requires `google-cloud-bigquery-storage`" + bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" ) def test_close_does_not_close_bigquery_clients_passed_to_it(self): client = self._mock_client() @@ -163,7 +163,7 @@ def test_close_does_not_close_bigquery_clients_passed_to_it(self): connection.close() self.assertFalse(client.close.called) - self.assertFalse(bqstorage_client.transport.channel.called) + self.assertFalse(bqstorage_client._transport.grpc_channel.close.called) def test_close_closes_all_created_cursors(self): connection = self._make_one(client=self._mock_client()) diff --git a/packages/google-cloud-bigquery/tests/unit/test_dbapi_cursor.py b/packages/google-cloud-bigquery/tests/unit/test_dbapi_cursor.py index bd1d9dc0aaed..9a1a6b1e877a 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_dbapi_cursor.py +++ b/packages/google-cloud-bigquery/tests/unit/test_dbapi_cursor.py @@ -14,7 +14,6 @@ import operator as op import unittest -import warnings import mock import six @@ -27,11 +26,9 @@ from google.api_core import exceptions try: - from google.cloud import bigquery_storage_v1 - from google.cloud import bigquery_storage_v1beta1 + from google.cloud import bigquery_storage except ImportError: # pragma: NO COVER - bigquery_storage_v1 = None - bigquery_storage_v1beta1 = None + bigquery_storage = None from tests.unit.helpers import _to_pyarrow @@ -78,32 +75,17 @@ def _mock_client( return mock_client - def _mock_bqstorage_client(self, rows=None, stream_count=0, v1beta1=False): - from google.cloud.bigquery_storage_v1 import client - from google.cloud.bigquery_storage_v1 import types - from google.cloud.bigquery_storage_v1beta1 import types as types_v1beta1 - + def _mock_bqstorage_client(self, rows=None, stream_count=0): if rows is None: rows = [] - if v1beta1: - mock_client = mock.create_autospec( - bigquery_storage_v1beta1.BigQueryStorageClient - ) - mock_read_session = mock.MagicMock( - streams=[ - types_v1beta1.Stream(name="streams/stream_{}".format(i)) - for i in range(stream_count) - ] - ) - else: - mock_client = mock.create_autospec(client.BigQueryReadClient) - mock_read_session = mock.MagicMock( - streams=[ - types.ReadStream(name="streams/stream_{}".format(i)) - for i in range(stream_count) - ] - ) + mock_client = mock.create_autospec(bigquery_storage.BigQueryReadClient) + mock_read_session = mock.MagicMock( + streams=[ + bigquery_storage.types.ReadStream(name="streams/stream_{}".format(i)) + for i in range(stream_count) + ] + ) mock_client.create_read_session.return_value = mock_read_session @@ -291,7 +273,7 @@ def test_fetchall_w_row(self): self.assertEqual(rows[0], (1,)) @unittest.skipIf( - bigquery_storage_v1 is None, "Requires `google-cloud-bigquery-storage`" + bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" ) @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_fetchall_w_bqstorage_client_fetch_success(self): @@ -345,71 +327,7 @@ def test_fetchall_w_bqstorage_client_fetch_success(self): self.assertEqual(sorted_row_data, expected_row_data) @unittest.skipIf( - bigquery_storage_v1beta1 is None, "Requires `google-cloud-bigquery-storage`" - ) - @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") - def test_fetchall_w_bqstorage_client_v1beta1_fetch_success(self): - from google.cloud.bigquery import dbapi - from google.cloud.bigquery import table - - # use unordered data to also test any non-determenistic key order in dicts - row_data = [ - table.Row([1.4, 1.1, 1.3, 1.2], {"bar": 3, "baz": 2, "foo": 1, "quux": 0}), - table.Row([2.4, 2.1, 2.3, 2.2], {"bar": 3, "baz": 2, "foo": 1, "quux": 0}), - ] - bqstorage_streamed_rows = [ - { - "bar": _to_pyarrow(1.2), - "foo": _to_pyarrow(1.1), - "quux": _to_pyarrow(1.4), - "baz": _to_pyarrow(1.3), - }, - { - "bar": _to_pyarrow(2.2), - "foo": _to_pyarrow(2.1), - "quux": _to_pyarrow(2.4), - "baz": _to_pyarrow(2.3), - }, - ] - - mock_client = self._mock_client(rows=row_data) - mock_bqstorage_client = self._mock_bqstorage_client( - stream_count=1, rows=bqstorage_streamed_rows, v1beta1=True - ) - - connection = dbapi.connect( - client=mock_client, bqstorage_client=mock_bqstorage_client, - ) - cursor = connection.cursor() - cursor.execute("SELECT foo, bar FROM some_table") - - with warnings.catch_warnings(record=True) as warned: - rows = cursor.fetchall() - - # a deprecation warning should have been emitted - expected_warnings = [ - warning - for warning in warned - if issubclass(warning.category, DeprecationWarning) - and "v1beta1" in str(warning) - ] - self.assertEqual(len(expected_warnings), 1, "Deprecation warning not raised.") - - # the default client was not used - mock_client.list_rows.assert_not_called() - - # check the data returned - field_value = op.itemgetter(1) - sorted_row_data = [sorted(row.items(), key=field_value) for row in rows] - expected_row_data = [ - [("foo", 1.1), ("bar", 1.2), ("baz", 1.3), ("quux", 1.4)], - [("foo", 2.1), ("bar", 2.2), ("baz", 2.3), ("quux", 2.4)], - ] - - self.assertEqual(sorted_row_data, expected_row_data) - - @unittest.skipIf( - bigquery_storage_v1 is None, "Requires `google-cloud-bigquery-storage`" + bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" ) def test_fetchall_w_bqstorage_client_fetch_no_rows(self): from google.cloud.bigquery import dbapi @@ -432,7 +350,7 @@ def test_fetchall_w_bqstorage_client_fetch_no_rows(self): self.assertEqual(rows, []) @unittest.skipIf( - bigquery_storage_v1 is None, "Requires `google-cloud-bigquery-storage`" + bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" ) def test_fetchall_w_bqstorage_client_fetch_error_no_fallback(self): from google.cloud.bigquery import dbapi diff --git a/packages/google-cloud-bigquery/tests/unit/test_job.py b/packages/google-cloud-bigquery/tests/unit/test_job.py index fb6a46bd616c..fb042e18cc96 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_job.py +++ b/packages/google-cloud-bigquery/tests/unit/test_job.py @@ -35,9 +35,9 @@ except ImportError: # pragma: NO COVER pyarrow = None try: - from google.cloud import bigquery_storage_v1 + from google.cloud import bigquery_storage except (ImportError, AttributeError): # pragma: NO COVER - bigquery_storage_v1 = None + bigquery_storage = None try: from tqdm import tqdm except (ImportError, AttributeError): # pragma: NO COVER @@ -5667,7 +5667,7 @@ def test_to_dataframe_ddl_query(self): @unittest.skipIf(pandas is None, "Requires `pandas`") @unittest.skipIf( - bigquery_storage_v1 is None, "Requires `google-cloud-bigquery-storage`" + bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" ) def test_to_dataframe_bqstorage(self): query_resource = { @@ -5685,8 +5685,8 @@ def test_to_dataframe_bqstorage(self): client = _make_client(self.PROJECT, connection=connection) resource = self._make_resource(ended=True) job = self._get_target_class().from_api_repr(resource, client) - bqstorage_client = mock.create_autospec(bigquery_storage_v1.BigQueryReadClient) - session = bigquery_storage_v1.types.ReadSession() + bqstorage_client = mock.create_autospec(bigquery_storage.BigQueryReadClient) + session = bigquery_storage.types.ReadSession() session.avro_schema.schema = json.dumps( { "type": "record", @@ -5704,9 +5704,9 @@ def test_to_dataframe_bqstorage(self): destination_table = "projects/{projectId}/datasets/{datasetId}/tables/{tableId}".format( **resource["configuration"]["query"]["destinationTable"] ) - expected_session = bigquery_storage_v1.types.ReadSession( + expected_session = bigquery_storage.types.ReadSession( table=destination_table, - data_format=bigquery_storage_v1.enums.DataFormat.ARROW, + data_format=bigquery_storage.types.DataFormat.ARROW, ) bqstorage_client.create_read_session.assert_called_once_with( parent="projects/{}".format(self.PROJECT), @@ -6259,7 +6259,7 @@ def test__contains_order_by(query, expected): @pytest.mark.skipif(pandas is None, reason="Requires `pandas`") @pytest.mark.skipif( - bigquery_storage_v1 is None, reason="Requires `google-cloud-bigquery-storage`" + bigquery_storage is None, reason="Requires `google-cloud-bigquery-storage`" ) @pytest.mark.parametrize( "query", @@ -6295,8 +6295,8 @@ def test_to_dataframe_bqstorage_preserve_order(query): connection = _make_connection(get_query_results_resource, job_resource) client = _make_client(connection=connection) job = target_class.from_api_repr(job_resource, client) - bqstorage_client = mock.create_autospec(bigquery_storage_v1.BigQueryReadClient) - session = bigquery_storage_v1.types.ReadSession() + bqstorage_client = mock.create_autospec(bigquery_storage.BigQueryReadClient) + session = bigquery_storage.types.ReadSession() session.avro_schema.schema = json.dumps( { "type": "record", @@ -6314,8 +6314,8 @@ def test_to_dataframe_bqstorage_preserve_order(query): destination_table = "projects/{projectId}/datasets/{datasetId}/tables/{tableId}".format( **job_resource["configuration"]["query"]["destinationTable"] ) - expected_session = bigquery_storage_v1.types.ReadSession( - table=destination_table, data_format=bigquery_storage_v1.enums.DataFormat.ARROW, + expected_session = bigquery_storage.types.ReadSession( + table=destination_table, data_format=bigquery_storage.types.DataFormat.ARROW, ) bqstorage_client.create_read_session.assert_called_once_with( parent="projects/test-project", diff --git a/packages/google-cloud-bigquery/tests/unit/test_magics.py b/packages/google-cloud-bigquery/tests/unit/test_magics.py index c4527c837f07..20be6b7552bf 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_magics.py +++ b/packages/google-cloud-bigquery/tests/unit/test_magics.py @@ -41,7 +41,7 @@ io = pytest.importorskip("IPython.utils.io") tools = pytest.importorskip("IPython.testing.tools") interactiveshell = pytest.importorskip("IPython.terminal.interactiveshell") -bigquery_storage_v1 = pytest.importorskip("google.cloud.bigquery_storage_v1") +bigquery_storage = pytest.importorskip("google.cloud.bigquery_storage") @pytest.fixture(scope="session") @@ -83,8 +83,8 @@ def missing_bq_storage(): def fail_if(name, globals, locals, fromlist, level): # NOTE: *very* simplified, assuming a straightforward absolute import - return "bigquery_storage_v1" in name or ( - fromlist is not None and "bigquery_storage_v1" in fromlist + return "bigquery_storage" in name or ( + fromlist is not None and "bigquery_storage" in fromlist ) return maybe_fail_import(predicate=fail_if) @@ -314,14 +314,14 @@ def test__make_bqstorage_client_false(): @pytest.mark.skipif( - bigquery_storage_v1 is None, reason="Requires `google-cloud-bigquery-storage`" + bigquery_storage is None, reason="Requires `google-cloud-bigquery-storage`" ) def test__make_bqstorage_client_true(): credentials_mock = mock.create_autospec( google.auth.credentials.Credentials, instance=True ) got = magics._make_bqstorage_client(True, credentials_mock) - assert isinstance(got, bigquery_storage_v1.BigQueryReadClient) + assert isinstance(got, bigquery_storage.BigQueryReadClient) def test__make_bqstorage_client_true_raises_import_error(missing_bq_storage): @@ -338,7 +338,7 @@ def test__make_bqstorage_client_true_raises_import_error(missing_bq_storage): @pytest.mark.skipif( - bigquery_storage_v1 is None, reason="Requires `google-cloud-bigquery-storage`" + bigquery_storage is None, reason="Requires `google-cloud-bigquery-storage`" ) @pytest.mark.skipif(pandas is None, reason="Requires `pandas`") def test__make_bqstorage_client_true_missing_gapic(missing_grpcio_lib): @@ -396,7 +396,7 @@ def test_extension_load(): @pytest.mark.usefixtures("ipython_interactive") @pytest.mark.skipif(pandas is None, reason="Requires `pandas`") @pytest.mark.skipif( - bigquery_storage_v1 is None, reason="Requires `google-cloud-bigquery-storage`" + bigquery_storage is None, reason="Requires `google-cloud-bigquery-storage`" ) def test_bigquery_magic_without_optional_arguments(monkeypatch): ip = IPython.get_ipython() @@ -410,14 +410,14 @@ def test_bigquery_magic_without_optional_arguments(monkeypatch): monkeypatch.setattr(magics.context, "_credentials", mock_credentials) # Mock out the BigQuery Storage API. - bqstorage_mock = mock.create_autospec(bigquery_storage_v1.BigQueryReadClient) + bqstorage_mock = mock.create_autospec(bigquery_storage.BigQueryReadClient) bqstorage_instance_mock = mock.create_autospec( - bigquery_storage_v1.BigQueryReadClient, instance=True + bigquery_storage.BigQueryReadClient, instance=True ) - bqstorage_instance_mock.transport = mock.Mock() + bqstorage_instance_mock._transport = mock.Mock() bqstorage_mock.return_value = bqstorage_instance_mock bqstorage_client_patch = mock.patch( - "google.cloud.bigquery_storage_v1.BigQueryReadClient", bqstorage_mock + "google.cloud.bigquery_storage.BigQueryReadClient", bqstorage_mock ) sql = "SELECT 17 AS num" @@ -559,7 +559,7 @@ def test_bigquery_magic_clears_display_in_verbose_mode(): @pytest.mark.usefixtures("ipython_interactive") @pytest.mark.skipif( - bigquery_storage_v1 is None, reason="Requires `google-cloud-bigquery-storage`" + bigquery_storage is None, reason="Requires `google-cloud-bigquery-storage`" ) def test_bigquery_magic_with_bqstorage_from_argument(monkeypatch): ip = IPython.get_ipython() @@ -573,14 +573,14 @@ def test_bigquery_magic_with_bqstorage_from_argument(monkeypatch): monkeypatch.setattr(magics.context, "_credentials", mock_credentials) # Mock out the BigQuery Storage API. - bqstorage_mock = mock.create_autospec(bigquery_storage_v1.BigQueryReadClient) + bqstorage_mock = mock.create_autospec(bigquery_storage.BigQueryReadClient) bqstorage_instance_mock = mock.create_autospec( - bigquery_storage_v1.BigQueryReadClient, instance=True + bigquery_storage.BigQueryReadClient, instance=True ) - bqstorage_instance_mock.transport = mock.Mock() + bqstorage_instance_mock._transport = mock.Mock() bqstorage_mock.return_value = bqstorage_instance_mock bqstorage_client_patch = mock.patch( - "google.cloud.bigquery_storage_v1.BigQueryReadClient", bqstorage_mock + "google.cloud.bigquery_storage.BigQueryReadClient", bqstorage_mock ) sql = "SELECT 17 AS num" @@ -623,7 +623,7 @@ def warning_match(warning): @pytest.mark.usefixtures("ipython_interactive") @pytest.mark.skipif( - bigquery_storage_v1 is None, reason="Requires `google-cloud-bigquery-storage`" + bigquery_storage is None, reason="Requires `google-cloud-bigquery-storage`" ) def test_bigquery_magic_with_rest_client_requested(monkeypatch): ip = IPython.get_ipython() @@ -637,9 +637,9 @@ def test_bigquery_magic_with_rest_client_requested(monkeypatch): monkeypatch.setattr(magics.context, "_credentials", mock_credentials) # Mock out the BigQuery Storage API. - bqstorage_mock = mock.create_autospec(bigquery_storage_v1.BigQueryReadClient) + bqstorage_mock = mock.create_autospec(bigquery_storage.BigQueryReadClient) bqstorage_client_patch = mock.patch( - "google.cloud.bigquery_storage_v1.BigQueryReadClient", bqstorage_mock + "google.cloud.bigquery_storage.BigQueryReadClient", bqstorage_mock ) sql = "SELECT 17 AS num" @@ -841,7 +841,7 @@ def test_bigquery_magic_w_table_id_and_destination_var(ipython_ns_cleanup): @pytest.mark.usefixtures("ipython_interactive") @pytest.mark.skipif( - bigquery_storage_v1 is None, reason="Requires `google-cloud-bigquery-storage`" + bigquery_storage is None, reason="Requires `google-cloud-bigquery-storage`" ) @pytest.mark.skipif(pandas is None, reason="Requires `pandas`") def test_bigquery_magic_w_table_id_and_bqstorage_client(): @@ -864,14 +864,14 @@ def test_bigquery_magic_w_table_id_and_bqstorage_client(): "google.cloud.bigquery.magics.magics.bigquery.Client", autospec=True ) - bqstorage_mock = mock.create_autospec(bigquery_storage_v1.BigQueryReadClient) + bqstorage_mock = mock.create_autospec(bigquery_storage.BigQueryReadClient) bqstorage_instance_mock = mock.create_autospec( - bigquery_storage_v1.BigQueryReadClient, instance=True + bigquery_storage.BigQueryReadClient, instance=True ) - bqstorage_instance_mock.transport = mock.Mock() + bqstorage_instance_mock._transport = mock.Mock() bqstorage_mock.return_value = bqstorage_instance_mock bqstorage_client_patch = mock.patch( - "google.cloud.bigquery_storage_v1.BigQueryReadClient", bqstorage_mock + "google.cloud.bigquery_storage.BigQueryReadClient", bqstorage_mock ) table_id = "bigquery-public-data.samples.shakespeare" diff --git a/packages/google-cloud-bigquery/tests/unit/test_opentelemetry_tracing.py b/packages/google-cloud-bigquery/tests/unit/test_opentelemetry_tracing.py index 1c35b0a82443..09afa7531438 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_opentelemetry_tracing.py +++ b/packages/google-cloud-bigquery/tests/unit/test_opentelemetry_tracing.py @@ -25,7 +25,7 @@ from opentelemetry.sdk.trace.export.in_memory_span_exporter import ( InMemorySpanExporter, ) -except ImportError: +except ImportError: # pragma: NO COVER opentelemetry = None import pytest from six.moves import reload_module diff --git a/packages/google-cloud-bigquery/tests/unit/test_schema.py b/packages/google-cloud-bigquery/tests/unit/test_schema.py index 9f7ee7bb3af4..71bf6b5aeadb 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_schema.py +++ b/packages/google-cloud-bigquery/tests/unit/test_schema.py @@ -206,15 +206,15 @@ def test_to_standard_sql_simple_type(self): sql_type = self._get_standard_sql_data_type_class() examples = ( # a few legacy types - ("INTEGER", sql_type.INT64), - ("FLOAT", sql_type.FLOAT64), - ("BOOLEAN", sql_type.BOOL), - ("DATETIME", sql_type.DATETIME), + ("INTEGER", sql_type.TypeKind.INT64), + ("FLOAT", sql_type.TypeKind.FLOAT64), + ("BOOLEAN", sql_type.TypeKind.BOOL), + ("DATETIME", sql_type.TypeKind.DATETIME), # a few standard types - ("INT64", sql_type.INT64), - ("FLOAT64", sql_type.FLOAT64), - ("BOOL", sql_type.BOOL), - ("GEOGRAPHY", sql_type.GEOGRAPHY), + ("INT64", sql_type.TypeKind.INT64), + ("FLOAT64", sql_type.TypeKind.FLOAT64), + ("BOOL", sql_type.TypeKind.BOOL), + ("GEOGRAPHY", sql_type.TypeKind.GEOGRAPHY), ) for legacy_type, standard_type in examples: field = self._make_one("some_field", legacy_type) @@ -258,26 +258,26 @@ def test_to_standard_sql_struct_type(self): # level 2 fields sub_sub_field_date = types.StandardSqlField( - name="date_field", type=sql_type(type_kind=sql_type.DATE) + name="date_field", type=sql_type(type_kind=sql_type.TypeKind.DATE) ) sub_sub_field_time = types.StandardSqlField( - name="time_field", type=sql_type(type_kind=sql_type.TIME) + name="time_field", type=sql_type(type_kind=sql_type.TypeKind.TIME) ) # level 1 fields sub_field_struct = types.StandardSqlField( - name="last_used", type=sql_type(type_kind=sql_type.STRUCT) + name="last_used", type=sql_type(type_kind=sql_type.TypeKind.STRUCT) ) sub_field_struct.type.struct_type.fields.extend( [sub_sub_field_date, sub_sub_field_time] ) sub_field_bytes = types.StandardSqlField( - name="image_content", type=sql_type(type_kind=sql_type.BYTES) + name="image_content", type=sql_type(type_kind=sql_type.TypeKind.BYTES) ) # level 0 (top level) expected_result = types.StandardSqlField( - name="image_usage", type=sql_type(type_kind=sql_type.STRUCT) + name="image_usage", type=sql_type(type_kind=sql_type.TypeKind.STRUCT) ) expected_result.type.struct_type.fields.extend( [sub_field_bytes, sub_field_struct] @@ -304,8 +304,8 @@ def test_to_standard_sql_array_type_simple(self): sql_type = self._get_standard_sql_data_type_class() # construct expected result object - expected_sql_type = sql_type(type_kind=sql_type.ARRAY) - expected_sql_type.array_element_type.type_kind = sql_type.INT64 + expected_sql_type = sql_type(type_kind=sql_type.TypeKind.ARRAY) + expected_sql_type.array_element_type.type_kind = sql_type.TypeKind.INT64 expected_result = types.StandardSqlField( name="valid_numbers", type=expected_sql_type ) @@ -323,19 +323,19 @@ def test_to_standard_sql_array_type_struct(self): # define person STRUCT name_field = types.StandardSqlField( - name="name", type=sql_type(type_kind=sql_type.STRING) + name="name", type=sql_type(type_kind=sql_type.TypeKind.STRING) ) age_field = types.StandardSqlField( - name="age", type=sql_type(type_kind=sql_type.INT64) + name="age", type=sql_type(type_kind=sql_type.TypeKind.INT64) ) person_struct = types.StandardSqlField( - name="person_info", type=sql_type(type_kind=sql_type.STRUCT) + name="person_info", type=sql_type(type_kind=sql_type.TypeKind.STRUCT) ) person_struct.type.struct_type.fields.extend([name_field, age_field]) # define expected result - an ARRAY of person structs expected_sql_type = sql_type( - type_kind=sql_type.ARRAY, array_element_type=person_struct.type + type_kind=sql_type.TypeKind.ARRAY, array_element_type=person_struct.type ) expected_result = types.StandardSqlField( name="known_people", type=expected_sql_type @@ -358,7 +358,9 @@ def test_to_standard_sql_unknown_type(self): standard_field = field.to_standard_sql() self.assertEqual(standard_field.name, "weird_field") - self.assertEqual(standard_field.type.type_kind, sql_type.TYPE_KIND_UNSPECIFIED) + self.assertEqual( + standard_field.type.type_kind, sql_type.TypeKind.TYPE_KIND_UNSPECIFIED + ) def test___eq___wrong_type(self): field = self._make_one("test", "STRING") diff --git a/packages/google-cloud-bigquery/tests/unit/test_table.py b/packages/google-cloud-bigquery/tests/unit/test_table.py index 10bedfee126e..12169658e7b3 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_table.py +++ b/packages/google-cloud-bigquery/tests/unit/test_table.py @@ -13,7 +13,6 @@ # limitations under the License. import datetime as dt -import itertools import logging import time import unittest @@ -26,19 +25,13 @@ import google.api_core.exceptions try: - from google.cloud import bigquery_storage_v1 - from google.cloud import bigquery_storage_v1beta1 - from google.cloud.bigquery_storage_v1.gapic.transports import ( - big_query_read_grpc_transport, - ) - from google.cloud.bigquery_storage_v1beta1.gapic.transports import ( - big_query_storage_grpc_transport as big_query_storage_grpc_transport_v1beta1, + from google.cloud import bigquery_storage + from google.cloud.bigquery_storage_v1.services.big_query_read.transports import ( + grpc as big_query_read_grpc_transport, ) except ImportError: # pragma: NO COVER - bigquery_storage_v1 = None - bigquery_storage_v1beta1 = None + bigquery_storage = None big_query_read_grpc_transport = None - big_query_storage_grpc_transport_v1beta1 = None try: import pandas @@ -1846,7 +1839,7 @@ def test_to_arrow_w_empty_table(self): @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") @unittest.skipIf( - bigquery_storage_v1 is None, "Requires `google-cloud-bigquery-storage`" + bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" ) def test_to_arrow_max_results_w_create_bqstorage_warning(self): from google.cloud.bigquery.schema import SchemaField @@ -1886,15 +1879,15 @@ def test_to_arrow_max_results_w_create_bqstorage_warning(self): @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") @unittest.skipIf( - bigquery_storage_v1 is None, "Requires `google-cloud-bigquery-storage`" + bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" ) def test_to_arrow_w_bqstorage(self): from google.cloud.bigquery import schema from google.cloud.bigquery import table as mut from google.cloud.bigquery_storage_v1 import reader - bqstorage_client = mock.create_autospec(bigquery_storage_v1.BigQueryReadClient) - bqstorage_client.transport = mock.create_autospec( + bqstorage_client = mock.create_autospec(bigquery_storage.BigQueryReadClient) + bqstorage_client._transport = mock.create_autospec( big_query_read_grpc_transport.BigQueryReadGrpcTransport ) streams = [ @@ -1902,7 +1895,7 @@ def test_to_arrow_w_bqstorage(self): {"name": "/projects/proj/dataset/dset/tables/tbl/streams/1234"}, {"name": "/projects/proj/dataset/dset/tables/tbl/streams/5678"}, ] - session = bigquery_storage_v1.types.ReadSession(streams=streams) + session = bigquery_storage.types.ReadSession(streams=streams) arrow_schema = pyarrow.schema( [ pyarrow.field("colA", pyarrow.int64()), @@ -1963,23 +1956,23 @@ def test_to_arrow_w_bqstorage(self): self.assertEqual(actual_tbl.num_rows, total_rows) # Don't close the client if it was passed in. - bqstorage_client.transport.channel.close.assert_not_called() + bqstorage_client._transport.grpc_channel.close.assert_not_called() @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") @unittest.skipIf( - bigquery_storage_v1 is None, "Requires `google-cloud-bigquery-storage`" + bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" ) def test_to_arrow_w_bqstorage_creates_client(self): from google.cloud.bigquery import schema from google.cloud.bigquery import table as mut mock_client = _mock_client() - bqstorage_client = mock.create_autospec(bigquery_storage_v1.BigQueryReadClient) - bqstorage_client.transport = mock.create_autospec( + bqstorage_client = mock.create_autospec(bigquery_storage.BigQueryReadClient) + bqstorage_client._transport = mock.create_autospec( big_query_read_grpc_transport.BigQueryReadGrpcTransport ) mock_client._create_bqstorage_client.return_value = bqstorage_client - session = bigquery_storage_v1.types.ReadSession() + session = bigquery_storage.types.ReadSession() bqstorage_client.create_read_session.return_value = session row_iterator = mut.RowIterator( mock_client, @@ -1994,7 +1987,7 @@ def test_to_arrow_w_bqstorage_creates_client(self): ) row_iterator.to_arrow(create_bqstorage_client=True) mock_client._create_bqstorage_client.assert_called_once() - bqstorage_client.transport.channel.close.assert_called_once() + bqstorage_client._transport.grpc_channel.close.assert_called_once() @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_to_arrow_create_bqstorage_client_wo_bqstorage(self): @@ -2025,14 +2018,14 @@ def test_to_arrow_create_bqstorage_client_wo_bqstorage(self): @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") @unittest.skipIf( - bigquery_storage_v1 is None, "Requires `google-cloud-bigquery-storage`" + bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" ) def test_to_arrow_w_bqstorage_no_streams(self): from google.cloud.bigquery import schema from google.cloud.bigquery import table as mut - bqstorage_client = mock.create_autospec(bigquery_storage_v1.BigQueryReadClient) - session = bigquery_storage_v1.types.ReadSession() + bqstorage_client = mock.create_autospec(bigquery_storage.BigQueryReadClient) + session = bigquery_storage.types.ReadSession() arrow_schema = pyarrow.schema( [ pyarrow.field("colA", pyarrow.string()), @@ -2157,7 +2150,7 @@ def test_to_dataframe_iterable(self): @unittest.skipIf(pandas is None, "Requires `pandas`") @unittest.skipIf( - bigquery_storage_v1 is None, "Requires `google-cloud-bigquery-storage`" + bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" ) @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_to_dataframe_iterable_w_bqstorage(self): @@ -2173,8 +2166,8 @@ def test_to_dataframe_iterable_w_bqstorage(self): ] arrow_schema = pyarrow.schema(arrow_fields) - bqstorage_client = mock.create_autospec(bigquery_storage_v1.BigQueryReadClient) - bqstorage_client.transport = mock.create_autospec( + bqstorage_client = mock.create_autospec(bigquery_storage.BigQueryReadClient) + bqstorage_client._transport = mock.create_autospec( big_query_read_grpc_transport.BigQueryReadGrpcTransport ) streams = [ @@ -2182,7 +2175,7 @@ def test_to_dataframe_iterable_w_bqstorage(self): {"name": "/projects/proj/dataset/dset/tables/tbl/streams/1234"}, {"name": "/projects/proj/dataset/dset/tables/tbl/streams/5678"}, ] - session = bigquery_storage_v1.types.ReadSession( + session = bigquery_storage.types.ReadSession( streams=streams, arrow_schema={"serialized_schema": arrow_schema.serialize().to_pybytes()}, ) @@ -2225,7 +2218,7 @@ def test_to_dataframe_iterable_w_bqstorage(self): self.assertEqual(len(got), total_pages) # Don't close the client if it was passed in. - bqstorage_client.transport.channel.close.assert_not_called() + bqstorage_client._transport.grpc_channel.close.assert_not_called() @mock.patch("google.cloud.bigquery.table.pandas", new=None) def test_to_dataframe_iterable_error_if_pandas_is_none(self): @@ -2790,19 +2783,19 @@ def test_to_dataframe_max_results_w_create_bqstorage_warning(self): @unittest.skipIf(pandas is None, "Requires `pandas`") @unittest.skipIf( - bigquery_storage_v1 is None, "Requires `google-cloud-bigquery-storage`" + bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" ) def test_to_dataframe_w_bqstorage_creates_client(self): from google.cloud.bigquery import schema from google.cloud.bigquery import table as mut mock_client = _mock_client() - bqstorage_client = mock.create_autospec(bigquery_storage_v1.BigQueryReadClient) - bqstorage_client.transport = mock.create_autospec( + bqstorage_client = mock.create_autospec(bigquery_storage.BigQueryReadClient) + bqstorage_client._transport = mock.create_autospec( big_query_read_grpc_transport.BigQueryReadGrpcTransport ) mock_client._create_bqstorage_client.return_value = bqstorage_client - session = bigquery_storage_v1.types.ReadSession() + session = bigquery_storage.types.ReadSession() bqstorage_client.create_read_session.return_value = session row_iterator = mut.RowIterator( mock_client, @@ -2817,18 +2810,18 @@ def test_to_dataframe_w_bqstorage_creates_client(self): ) row_iterator.to_dataframe(create_bqstorage_client=True) mock_client._create_bqstorage_client.assert_called_once() - bqstorage_client.transport.channel.close.assert_called_once() + bqstorage_client._transport.grpc_channel.close.assert_called_once() @unittest.skipIf(pandas is None, "Requires `pandas`") @unittest.skipIf( - bigquery_storage_v1 is None, "Requires `google-cloud-bigquery-storage`" + bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" ) def test_to_dataframe_w_bqstorage_no_streams(self): from google.cloud.bigquery import schema from google.cloud.bigquery import table as mut - bqstorage_client = mock.create_autospec(bigquery_storage_v1.BigQueryReadClient) - session = bigquery_storage_v1.types.ReadSession() + bqstorage_client = mock.create_autospec(bigquery_storage.BigQueryReadClient) + session = bigquery_storage.types.ReadSession() bqstorage_client.create_read_session.return_value = session row_iterator = mut.RowIterator( @@ -2848,55 +2841,16 @@ def test_to_dataframe_w_bqstorage_no_streams(self): self.assertEqual(list(got), column_names) self.assertTrue(got.empty) - @unittest.skipIf(pandas is None, "Requires `pandas`") - @unittest.skipIf( - bigquery_storage_v1beta1 is None, "Requires `google-cloud-bigquery-storage`" - ) - def test_to_dataframe_w_bqstorage_v1beta1_no_streams(self): - from google.cloud.bigquery import schema - from google.cloud.bigquery import table as mut - - bqstorage_client = mock.create_autospec( - bigquery_storage_v1beta1.BigQueryStorageClient - ) - session = bigquery_storage_v1beta1.types.ReadSession() - bqstorage_client.create_read_session.return_value = session - - row_iterator = mut.RowIterator( - _mock_client(), - api_request=None, - path=None, - schema=[ - schema.SchemaField("colA", "INTEGER"), - schema.SchemaField("colC", "FLOAT"), - schema.SchemaField("colB", "STRING"), - ], - table=mut.TableReference.from_string("proj.dset.tbl"), - ) - - with warnings.catch_warnings(record=True) as warned: - got = row_iterator.to_dataframe(bqstorage_client) - - column_names = ["colA", "colC", "colB"] - self.assertEqual(list(got), column_names) - self.assertTrue(got.empty) - - self.assertEqual(len(warned), 1) - warning = warned[0] - self.assertTrue( - "Support for BigQuery Storage v1beta1 clients is deprecated" in str(warning) - ) - @unittest.skipIf( - bigquery_storage_v1 is None, "Requires `google-cloud-bigquery-storage`" + bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" ) @unittest.skipIf(pandas is None, "Requires `pandas`") @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_to_dataframe_w_bqstorage_logs_session(self): from google.cloud.bigquery.table import Table - bqstorage_client = mock.create_autospec(bigquery_storage_v1.BigQueryReadClient) - session = bigquery_storage_v1.types.ReadSession() + bqstorage_client = mock.create_autospec(bigquery_storage.BigQueryReadClient) + session = bigquery_storage.types.ReadSession() session.name = "projects/test-proj/locations/us/sessions/SOMESESSION" bqstorage_client.create_read_session.return_value = session mock_logger = mock.create_autospec(logging.Logger) @@ -2914,7 +2868,7 @@ def test_to_dataframe_w_bqstorage_logs_session(self): @unittest.skipIf(pandas is None, "Requires `pandas`") @unittest.skipIf( - bigquery_storage_v1 is None, "Requires `google-cloud-bigquery-storage`" + bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" ) @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_to_dataframe_w_bqstorage_empty_streams(self): @@ -2930,8 +2884,8 @@ def test_to_dataframe_w_bqstorage_empty_streams(self): ] arrow_schema = pyarrow.schema(arrow_fields) - bqstorage_client = mock.create_autospec(bigquery_storage_v1.BigQueryReadClient) - session = bigquery_storage_v1.types.ReadSession( + bqstorage_client = mock.create_autospec(bigquery_storage.BigQueryReadClient) + session = bigquery_storage.types.ReadSession( streams=[{"name": "/projects/proj/dataset/dset/tables/tbl/streams/1234"}], arrow_schema={"serialized_schema": arrow_schema.serialize().to_pybytes()}, ) @@ -2969,7 +2923,7 @@ def test_to_dataframe_w_bqstorage_empty_streams(self): @unittest.skipIf(pandas is None, "Requires `pandas`") @unittest.skipIf( - bigquery_storage_v1 is None, "Requires `google-cloud-bigquery-storage`" + bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" ) @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_to_dataframe_w_bqstorage_nonempty(self): @@ -2985,8 +2939,8 @@ def test_to_dataframe_w_bqstorage_nonempty(self): ] arrow_schema = pyarrow.schema(arrow_fields) - bqstorage_client = mock.create_autospec(bigquery_storage_v1.BigQueryReadClient) - bqstorage_client.transport = mock.create_autospec( + bqstorage_client = mock.create_autospec(bigquery_storage.BigQueryReadClient) + bqstorage_client._transport = mock.create_autospec( big_query_read_grpc_transport.BigQueryReadGrpcTransport ) streams = [ @@ -2994,7 +2948,7 @@ def test_to_dataframe_w_bqstorage_nonempty(self): {"name": "/projects/proj/dataset/dset/tables/tbl/streams/1234"}, {"name": "/projects/proj/dataset/dset/tables/tbl/streams/5678"}, ] - session = bigquery_storage_v1.types.ReadSession( + session = bigquery_storage.types.ReadSession( streams=streams, arrow_schema={"serialized_schema": arrow_schema.serialize().to_pybytes()}, ) @@ -3045,103 +2999,11 @@ def test_to_dataframe_w_bqstorage_nonempty(self): self.assertEqual(len(got.index), total_rows) # Don't close the client if it was passed in. - bqstorage_client.transport.channel.close.assert_not_called() - - @unittest.skipIf(pandas is None, "Requires `pandas`") - @unittest.skipIf( - bigquery_storage_v1beta1 is None, "Requires `google-cloud-bigquery-storage`" - ) - @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") - def test_to_dataframe_w_bqstorage_v1beta1_nonempty(self): - from google.cloud.bigquery import schema - from google.cloud.bigquery import table as mut - from google.cloud.bigquery_storage_v1beta1 import reader - - arrow_fields = [ - pyarrow.field("colA", pyarrow.int64()), - # Not alphabetical to test column order. - pyarrow.field("colC", pyarrow.float64()), - pyarrow.field("colB", pyarrow.utf8()), - ] - arrow_schema = pyarrow.schema(arrow_fields) - - bqstorage_client = mock.create_autospec( - bigquery_storage_v1beta1.BigQueryStorageClient - ) - bqstorage_client.transport = mock.create_autospec( - big_query_storage_grpc_transport_v1beta1.BigQueryStorageGrpcTransport - ) - streams = [ - # Use two streams we want to check frames are read from each stream. - {"name": "/projects/proj/dataset/dset/tables/tbl/streams/1234"}, - {"name": "/projects/proj/dataset/dset/tables/tbl/streams/5678"}, - ] - session = bigquery_storage_v1beta1.types.ReadSession( - streams=streams, - arrow_schema={"serialized_schema": arrow_schema.serialize().to_pybytes()}, - ) - bqstorage_client.create_read_session.return_value = session - - mock_rowstream = mock.create_autospec(reader.ReadRowsStream) - bqstorage_client.read_rows.return_value = mock_rowstream - - mock_rows = mock.create_autospec(reader.ReadRowsIterable) - mock_rowstream.rows.return_value = mock_rows - page_items = [ - pyarrow.array([1, -1]), - pyarrow.array([2.0, 4.0]), - pyarrow.array(["abc", "def"]), - ] - page_record_batch = pyarrow.RecordBatch.from_arrays( - page_items, schema=arrow_schema - ) - mock_page = mock.create_autospec(reader.ReadRowsPage) - mock_page.to_arrow.return_value = page_record_batch - mock_pages = (mock_page, mock_page, mock_page) - type(mock_rows).pages = mock.PropertyMock(return_value=mock_pages) - - schema = [ - schema.SchemaField("colA", "IGNORED"), - schema.SchemaField("colC", "IGNORED"), - schema.SchemaField("colB", "IGNORED"), - ] - - row_iterator = mut.RowIterator( - _mock_client(), - None, # api_request: ignored - None, # path: ignored - schema, - table=mut.TableReference.from_string("proj.dset.tbl"), - selected_fields=schema, - ) - - with warnings.catch_warnings(record=True) as warned: - got = row_iterator.to_dataframe(bqstorage_client=bqstorage_client) - - # Was a deprecation warning emitted? - expected_warnings = [ - warning - for warning in warned - if issubclass(warning.category, DeprecationWarning) - and "v1beta1" in str(warning) - ] - self.assertEqual(len(expected_warnings), 1, "Deprecation warning not raised.") - - # Are the columns in the expected order? - column_names = ["colA", "colC", "colB"] - self.assertEqual(list(got), column_names) - - # Have expected number of rows? - total_pages = len(streams) * len(mock_pages) - total_rows = len(page_items[0]) * total_pages - self.assertEqual(len(got.index), total_rows) - - # Don't close the client if it was passed in. - bqstorage_client.transport.channel.close.assert_not_called() + bqstorage_client._transport.grpc_channel.close.assert_not_called() @unittest.skipIf(pandas is None, "Requires `pandas`") @unittest.skipIf( - bigquery_storage_v1 is None, "Requires `google-cloud-bigquery-storage`" + bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" ) @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_to_dataframe_w_bqstorage_multiple_streams_return_unique_index(self): @@ -3156,12 +3018,12 @@ def test_to_dataframe_w_bqstorage_multiple_streams_return_unique_index(self): {"name": "/projects/proj/dataset/dset/tables/tbl/streams/1234"}, {"name": "/projects/proj/dataset/dset/tables/tbl/streams/5678"}, ] - session = bigquery_storage_v1.types.ReadSession( + session = bigquery_storage.types.ReadSession( streams=streams, arrow_schema={"serialized_schema": arrow_schema.serialize().to_pybytes()}, ) - bqstorage_client = mock.create_autospec(bigquery_storage_v1.BigQueryReadClient) + bqstorage_client = mock.create_autospec(bigquery_storage.BigQueryReadClient) bqstorage_client.create_read_session.return_value = session mock_rowstream = mock.create_autospec(reader.ReadRowsStream) @@ -3195,7 +3057,7 @@ def test_to_dataframe_w_bqstorage_multiple_streams_return_unique_index(self): @unittest.skipIf(pandas is None, "Requires `pandas`") @unittest.skipIf( - bigquery_storage_v1 is None, "Requires `google-cloud-bigquery-storage`" + bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" ) @unittest.skipIf(tqdm is None, "Requires `tqdm`") @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") @@ -3211,14 +3073,14 @@ def test_to_dataframe_w_bqstorage_updates_progress_bar(self, tqdm_mock): arrow_fields = [pyarrow.field("testcol", pyarrow.int64())] arrow_schema = pyarrow.schema(arrow_fields) - bqstorage_client = mock.create_autospec(bigquery_storage_v1.BigQueryReadClient) + bqstorage_client = mock.create_autospec(bigquery_storage.BigQueryReadClient) streams = [ # Use two streams we want to check that progress bar updates are # sent from each stream. {"name": "/projects/proj/dataset/dset/tables/tbl/streams/1234"}, {"name": "/projects/proj/dataset/dset/tables/tbl/streams/5678"}, ] - session = bigquery_storage_v1.types.ReadSession( + session = bigquery_storage.types.ReadSession( streams=streams, arrow_schema={"serialized_schema": arrow_schema.serialize().to_pybytes()}, ) @@ -3274,7 +3136,7 @@ def blocking_to_arrow(*args, **kwargs): @unittest.skipIf(pandas is None, "Requires `pandas`") @unittest.skipIf( - bigquery_storage_v1 is None, "Requires `google-cloud-bigquery-storage`" + bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" ) @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_to_dataframe_w_bqstorage_exits_on_keyboardinterrupt(self): @@ -3293,8 +3155,8 @@ def test_to_dataframe_w_bqstorage_exits_on_keyboardinterrupt(self): ] arrow_schema = pyarrow.schema(arrow_fields) - bqstorage_client = mock.create_autospec(bigquery_storage_v1.BigQueryReadClient) - session = bigquery_storage_v1.types.ReadSession( + bqstorage_client = mock.create_autospec(bigquery_storage.BigQueryReadClient) + session = bigquery_storage.types.ReadSession( streams=[ # Use multiple streams because one will fail with a # KeyboardInterrupt, and we want to check that the other streams @@ -3393,12 +3255,12 @@ def test_to_dataframe_tabledata_list_w_multiple_pages_return_unique_index(self): @unittest.skipIf(pandas is None, "Requires `pandas`") @unittest.skipIf( - bigquery_storage_v1 is None, "Requires `google-cloud-bigquery-storage`" + bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" ) def test_to_dataframe_w_bqstorage_raises_auth_error(self): from google.cloud.bigquery import table as mut - bqstorage_client = mock.create_autospec(bigquery_storage_v1.BigQueryReadClient) + bqstorage_client = mock.create_autospec(bigquery_storage.BigQueryReadClient) bqstorage_client.create_read_session.side_effect = google.api_core.exceptions.Forbidden( "TEST BigQuery Storage API not enabled. TEST" ) @@ -3412,13 +3274,13 @@ def test_to_dataframe_w_bqstorage_raises_auth_error(self): row_iterator.to_dataframe(bqstorage_client=bqstorage_client) @unittest.skipIf( - bigquery_storage_v1 is None, "Requires `google-cloud-bigquery-storage`" + bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" ) def test_to_dataframe_w_bqstorage_partition(self): from google.cloud.bigquery import schema from google.cloud.bigquery import table as mut - bqstorage_client = mock.create_autospec(bigquery_storage_v1.BigQueryReadClient) + bqstorage_client = mock.create_autospec(bigquery_storage.BigQueryReadClient) row_iterator = mut.RowIterator( _mock_client(), @@ -3432,13 +3294,13 @@ def test_to_dataframe_w_bqstorage_partition(self): row_iterator.to_dataframe(bqstorage_client) @unittest.skipIf( - bigquery_storage_v1 is None, "Requires `google-cloud-bigquery-storage`" + bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" ) def test_to_dataframe_w_bqstorage_snapshot(self): from google.cloud.bigquery import schema from google.cloud.bigquery import table as mut - bqstorage_client = mock.create_autospec(bigquery_storage_v1.BigQueryReadClient) + bqstorage_client = mock.create_autospec(bigquery_storage.BigQueryReadClient) row_iterator = mut.RowIterator( _mock_client(), @@ -3453,7 +3315,7 @@ def test_to_dataframe_w_bqstorage_snapshot(self): @unittest.skipIf(pandas is None, "Requires `pandas`") @unittest.skipIf( - bigquery_storage_v1 is None, "Requires `google-cloud-bigquery-storage`" + bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" ) @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_to_dataframe_concat_categorical_dtype_w_pyarrow(self): @@ -3472,11 +3334,11 @@ def test_to_dataframe_concat_categorical_dtype_w_pyarrow(self): arrow_schema = pyarrow.schema(arrow_fields) # create a mock BQ storage client - bqstorage_client = mock.create_autospec(bigquery_storage_v1.BigQueryReadClient) - bqstorage_client.transport = mock.create_autospec( + bqstorage_client = mock.create_autospec(bigquery_storage.BigQueryReadClient) + bqstorage_client._transport = mock.create_autospec( big_query_read_grpc_transport.BigQueryReadGrpcTransport ) - session = bigquery_storage_v1.types.ReadSession( + session = bigquery_storage.types.ReadSession( streams=[{"name": "/projects/proj/dataset/dset/tables/tbl/streams/1234"}], arrow_schema={"serialized_schema": arrow_schema.serialize().to_pybytes()}, ) @@ -3560,7 +3422,7 @@ def test_to_dataframe_concat_categorical_dtype_w_pyarrow(self): ) # Don't close the client if it was passed in. - bqstorage_client.transport.channel.close.assert_not_called() + bqstorage_client._transport.grpc_channel.close.assert_not_called() @unittest.skipIf(pandas is None, "Requires `pandas`") def test_to_dataframe_concat_categorical_dtype_wo_pyarrow(self): @@ -4003,7 +3865,7 @@ def test_set_expiration_w_none(self): @pytest.mark.skipif( - bigquery_storage_v1 is None, reason="Requires `google-cloud-bigquery-storage`" + bigquery_storage is None, reason="Requires `google-cloud-bigquery-storage`" ) @pytest.mark.parametrize( "table_path", @@ -4022,43 +3884,3 @@ def test_table_reference_to_bqstorage_v1_stable(table_path): for klass in (mut.TableReference, mut.Table, mut.TableListItem): got = klass.from_string(table_path).to_bqstorage() assert got == expected - - -@pytest.mark.skipif( - bigquery_storage_v1beta1 is None, reason="Requires `google-cloud-bigquery-storage`" -) -def test_table_reference_to_bqstorage_v1beta1(): - from google.cloud.bigquery import table as mut - - # Can't use parametrized pytest because bigquery_storage_v1beta1 may not be - # available. - expected = bigquery_storage_v1beta1.types.TableReference( - project_id="my-project", dataset_id="my_dataset", table_id="my_table" - ) - cases = ( - "my-project.my_dataset.my_table", - "my-project.my_dataset.my_table$20181225", - "my-project.my_dataset.my_table@1234567890", - "my-project.my_dataset.my_table$20181225@1234567890", - ) - - classes = (mut.TableReference, mut.Table, mut.TableListItem) - - for case, cls in itertools.product(cases, classes): - got = cls.from_string(case).to_bqstorage(v1beta1=True) - assert got == expected - - -@unittest.skipIf( - bigquery_storage_v1beta1 is None, "Requires `google-cloud-bigquery-storage`" -) -def test_table_reference_to_bqstorage_v1beta1_raises_import_error(): - from google.cloud.bigquery import table as mut - - classes = (mut.TableReference, mut.Table, mut.TableListItem) - for cls in classes: - with mock.patch.object(mut, "bigquery_storage_v1beta1", None), pytest.raises( - ValueError - ) as exc_context: - cls.from_string("my-project.my_dataset.my_table").to_bqstorage(v1beta1=True) - assert mut._NO_BQSTORAGE_ERROR in str(exc_context.value) From 923213523072a55b7ff9f444a4e64c7a66abd843 Mon Sep 17 00:00:00 2001 From: Peter Lamut Date: Thu, 1 Oct 2020 00:22:05 +0200 Subject: [PATCH 0940/2016] chore: Release v2.0.0 (#284) This pull request was generated using releasetool. --- packages/google-cloud-bigquery/CHANGELOG.md | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/packages/google-cloud-bigquery/CHANGELOG.md b/packages/google-cloud-bigquery/CHANGELOG.md index e8d367f73292..3dac7a0f10ef 100644 --- a/packages/google-cloud-bigquery/CHANGELOG.md +++ b/packages/google-cloud-bigquery/CHANGELOG.md @@ -4,6 +4,24 @@ [1]: https://pypi.org/project/google-cloud-bigquery/#history +## 2.0.0 + +09-30-2020 14:51 PDT + + +### Implementation Changes + +- Transition the library to microgenerator. ([#278](https://github.com/googleapis/python-bigquery/pull/278)) + This is a **breaking change** that **drops support for Python 2.7 and 3.5** and brings a few other changes. + See [migration guide](https://googleapis.dev/python/bigquery/latest/UPGRADING.html) for more info. + + + +### Internal / Testing Changes + +- Update protoc-generated comments (via synth). ([#270](https://github.com/googleapis/python-bigquery/pull/270)) +- Add CI secrets manager (via synth). ([#271](https://github.com/googleapis/python-bigquery/pull/271)) + ## [1.28.0](https://www.github.com/googleapis/python-bigquery/compare/v1.27.2...v1.28.0) (2020-09-22) From af3099adde4213e880cf1aad5b0642dbc3498a33 Mon Sep 17 00:00:00 2001 From: WhiteSource Renovate Date: Thu, 1 Oct 2020 18:49:59 +0200 Subject: [PATCH 0941/2016] chore(deps): update dependency google-cloud-bigquery to v2 (#287) --- .../google-cloud-bigquery/samples/snippets/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/samples/snippets/requirements.txt b/packages/google-cloud-bigquery/samples/snippets/requirements.txt index 7fe8391191b6..6edca4f10341 100644 --- a/packages/google-cloud-bigquery/samples/snippets/requirements.txt +++ b/packages/google-cloud-bigquery/samples/snippets/requirements.txt @@ -1,4 +1,4 @@ -google-cloud-bigquery[pandas,bqstorage,pyarrow]==1.26.1 +google-cloud-bigquery[pandas,bqstorage,pyarrow]==2.0.0 google-auth-oauthlib==0.4.1 ipython==7.16.1; python_version < '3.7' ipython==7.17.0; python_version >= '3.7' From cd3b6f495806cf1030ccbb0c274a9f39ef9d543b Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Tue, 6 Oct 2020 11:58:59 -0500 Subject: [PATCH 0942/2016] test: update tests to support latest google-cloud-core (#276) `google-cloud-core` version 1.4.2 populates `prettyPrint=false` by default. Update the connection tests to expect a value for `prettyPrint`. --- .../tests/unit/test__http.py | 28 +++++++++++++++---- 1 file changed, 23 insertions(+), 5 deletions(-) diff --git a/packages/google-cloud-bigquery/tests/unit/test__http.py b/packages/google-cloud-bigquery/tests/unit/test__http.py index 4da805d48c78..691c4c80242a 100644 --- a/packages/google-cloud-bigquery/tests/unit/test__http.py +++ b/packages/google-cloud-bigquery/tests/unit/test__http.py @@ -35,15 +35,33 @@ def _make_one(self, *args, **kw): return self._get_target_class()(*args, **kw) def test_build_api_url_no_extra_query_params(self): + from six.moves.urllib.parse import parse_qsl + from six.moves.urllib.parse import urlsplit + conn = self._make_one(object()) - URI = "/".join([conn.DEFAULT_API_ENDPOINT, "bigquery", conn.API_VERSION, "foo"]) - self.assertEqual(conn.build_api_url("/foo"), URI) + uri = conn.build_api_url("/foo") + scheme, netloc, path, qs, _ = urlsplit(uri) + self.assertEqual("%s://%s" % (scheme, netloc), conn.API_BASE_URL) + self.assertEqual(path, "/".join(["", "bigquery", conn.API_VERSION, "foo"])) + parms = dict(parse_qsl(qs)) + pretty_print = parms.pop("prettyPrint", "false") + self.assertEqual(pretty_print, "false") + self.assertEqual(parms, {}) def test_build_api_url_w_custom_endpoint(self): - custom_endpoint = "https://www.foo-googleapis.com" + from six.moves.urllib.parse import parse_qsl + from six.moves.urllib.parse import urlsplit + + custom_endpoint = "https://foo-bigquery.googleapis.com" conn = self._make_one(object(), api_endpoint=custom_endpoint) - URI = "/".join([custom_endpoint, "bigquery", conn.API_VERSION, "foo"]) - self.assertEqual(conn.build_api_url("/foo"), URI) + uri = conn.build_api_url("/foo") + scheme, netloc, path, qs, _ = urlsplit(uri) + self.assertEqual("%s://%s" % (scheme, netloc), custom_endpoint) + self.assertEqual(path, "/".join(["", "bigquery", conn.API_VERSION, "foo"])) + parms = dict(parse_qsl(qs)) + pretty_print = parms.pop("prettyPrint", "false") + self.assertEqual(pretty_print, "false") + self.assertEqual(parms, {}) def test_build_api_url_w_extra_query_params(self): from six.moves.urllib.parse import parse_qsl From 478dce6a9dc9e57fb02cad38ad9d869cab5cfe58 Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Tue, 6 Oct 2020 12:57:37 -0500 Subject: [PATCH 0943/2016] feat: add constants for MONTH and YEAR time partitioning types (#283) Co-authored-by: Steffany Brown <30247553+steffnay@users.noreply.github.com> --- .../google/cloud/bigquery/table.py | 25 ++++++++++++++++--- 1 file changed, 21 insertions(+), 4 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py index 902a7040a315..a72bacb7419e 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py @@ -1980,6 +1980,12 @@ class TimePartitioningType(object): HOUR = "HOUR" """str: Generates one partition per hour.""" + MONTH = "MONTH" + """str: Generates one partition per month.""" + + YEAR = "YEAR" + """str: Generates one partition per year.""" + class TimePartitioning(object): """Configures time-based partitioning for a table. @@ -1987,13 +1993,24 @@ class TimePartitioning(object): Args: type_ (Optional[google.cloud.bigquery.table.TimePartitioningType]): Specifies the type of time partitioning to perform. Defaults to - :attr:`~google.cloud.bigquery.table.TimePartitioningType.DAY`, - which is the only currently supported type. + :attr:`~google.cloud.bigquery.table.TimePartitioningType.DAY`. + + Supported values are: + + * :attr:`~google.cloud.bigquery.table.TimePartitioningType.HOUR` + * :attr:`~google.cloud.bigquery.table.TimePartitioningType.DAY` + * :attr:`~google.cloud.bigquery.table.TimePartitioningType.MONTH` + * :attr:`~google.cloud.bigquery.table.TimePartitioningType.YEAR` + field (Optional[str]): If set, the table is partitioned by this field. If not set, the table is partitioned by pseudo column ``_PARTITIONTIME``. The field - must be a top-level ``TIMESTAMP`` or ``DATE`` field. Its mode must - be ``NULLABLE`` or ``REQUIRED``. + must be a top-level ``TIMESTAMP``, ``DATETIME``, or ``DATE`` + field. Its mode must be ``NULLABLE`` or ``REQUIRED``. + + See the `time-unit column-partitioned tables guide + `_ + in the BigQuery documentation. expiration_ms(Optional[int]): Number of milliseconds for which to keep the storage for a partition. From da9edcd06e544df8a99a0f6a75a6294d47fa3890 Mon Sep 17 00:00:00 2001 From: Ryan Yuan Date: Wed, 7 Oct 2020 06:00:18 +1100 Subject: [PATCH 0944/2016] docs(samples): add create_table_clustered code snippet (#291) * docs(samples): add create_table_clustered code snippet * docs(samples): add create_table_clustered code snippet * fix unit test and lint Co-authored-by: Tim Swast --- .../docs/usage/tables.rst | 9 ++++ .../samples/create_table_clustered.py | 42 +++++++++++++++++++ .../tests/test_create_table_clustered.py | 22 ++++++++++ 3 files changed, 73 insertions(+) create mode 100644 packages/google-cloud-bigquery/samples/create_table_clustered.py create mode 100644 packages/google-cloud-bigquery/samples/tests/test_create_table_clustered.py diff --git a/packages/google-cloud-bigquery/docs/usage/tables.rst b/packages/google-cloud-bigquery/docs/usage/tables.rst index 27af7c7dfd3d..7afca05e2b9c 100644 --- a/packages/google-cloud-bigquery/docs/usage/tables.rst +++ b/packages/google-cloud-bigquery/docs/usage/tables.rst @@ -58,6 +58,15 @@ Create an empty table with the :start-after: [START bigquery_create_table] :end-before: [END bigquery_create_table] +Create a clustered table with the +:func:`~google.cloud.bigquery.client.Client.create_table` method: + +.. literalinclude:: ../samples/create_table_clustered.py + :language: python + :dedent: 4 + :start-after: [START bigquery_create_table_clustered] + :end-before: [END bigquery_create_table_clustered] + Create an integer range partitioned table with the :func:`~google.cloud.bigquery.client.Client.create_table` method: diff --git a/packages/google-cloud-bigquery/samples/create_table_clustered.py b/packages/google-cloud-bigquery/samples/create_table_clustered.py new file mode 100644 index 000000000000..2b45b747e8e4 --- /dev/null +++ b/packages/google-cloud-bigquery/samples/create_table_clustered.py @@ -0,0 +1,42 @@ +# Copyright 2019 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +def create_table_clustered(table_id): + + # [START bigquery_create_table_clustered] + from google.cloud import bigquery + + # Construct a BigQuery client object. + client = bigquery.Client() + + # TODO(developer): Set table_id to the ID of the table to create. + # table_id = "your-project.your_dataset.your_table_name" + + schema = [ + bigquery.SchemaField("full_name", "STRING"), + bigquery.SchemaField("city", "STRING"), + bigquery.SchemaField("zipcode", "INTEGER"), + ] + + table = bigquery.Table(table_id, schema=schema) + table.clustering_fields = ["city", "zipcode"] + table = client.create_table(table) # Make an API request. + print( + "Created clustered table {}.{}.{}".format( + table.project, table.dataset_id, table.table_id + ) + ) + # [END bigquery_create_table_clustered] + return table diff --git a/packages/google-cloud-bigquery/samples/tests/test_create_table_clustered.py b/packages/google-cloud-bigquery/samples/tests/test_create_table_clustered.py new file mode 100644 index 000000000000..8eab5d48b5e9 --- /dev/null +++ b/packages/google-cloud-bigquery/samples/tests/test_create_table_clustered.py @@ -0,0 +1,22 @@ +# Copyright 2019 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from .. import create_table_clustered + + +def test_create_table_clustered(capsys, random_table_id): + table = create_table_clustered.create_table_clustered(random_table_id) + out, _ = capsys.readouterr() + assert "Created clustered table {}".format(random_table_id) in out + assert table.clustering_fields == ["city", "zipcode"] From b68873e61a6939ca877986bdfad49027719b0673 Mon Sep 17 00:00:00 2001 From: Avihay Kain <2963806+grooveygr@users.noreply.github.com> Date: Wed, 7 Oct 2020 17:38:43 +0300 Subject: [PATCH 0945/2016] perf: remove redundant array deepcopy (#26) * perf(bigquery): remove redundant array deepcopy deepcopy can be a very costly operation when considering large arrays with complex nested objects. refactor helpers to allow recursive conversion without copying arrays. * add check to ignore REPEATED mode * Update google/cloud/bigquery/_helpers.py Co-authored-by: Bu Sun Kim <8822365+busunkim96@users.noreply.github.com> Co-authored-by: Tres Seaver Co-authored-by: Tim Swast Co-authored-by: Bu Sun Kim <8822365+busunkim96@users.noreply.github.com> --- .../google/cloud/bigquery/_helpers.py | 39 ++++++++++++++----- .../tests/unit/test__helpers.py | 35 +++++++++++++++++ 2 files changed, 64 insertions(+), 10 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py b/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py index 47851d42cdf4..b59b3d794a95 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py @@ -15,7 +15,6 @@ """Shared helper functions for BigQuery API classes.""" import base64 -import copy import datetime import decimal import re @@ -397,13 +396,9 @@ def _repeated_field_to_json(field, row_value): Returns: List[Any]: A list of JSON-serializable objects. """ - # Remove the REPEATED, but keep the other fields. This allows us to process - # each item as if it were a top-level field. - item_field = copy.deepcopy(field) - item_field._mode = "NULLABLE" values = [] for item in row_value: - values.append(_field_to_json(item_field, item)) + values.append(_single_field_to_json(field, item)) return values @@ -462,6 +457,33 @@ def _record_field_to_json(fields, row_value): return record +def _single_field_to_json(field, row_value): + """Convert a single field into JSON-serializable values. + + Ignores mode so that this can function for ARRAY / REPEATING fields + without requiring a deepcopy of the field. See: + https://github.com/googleapis/python-bigquery/issues/6 + + Args: + field (google.cloud.bigquery.schema.SchemaField): + The SchemaField to use for type conversion and field name. + + row_value (Any): + Scalar or Struct to be inserted. The type + is inferred from the SchemaField's field_type. + + Returns: + Any: A JSON-serializable object. + """ + if row_value is None: + return None + + if field.field_type == "RECORD": + return _record_field_to_json(field.fields, row_value) + + return _scalar_field_to_json(field, row_value) + + def _field_to_json(field, row_value): """Convert a field into JSON-serializable values. @@ -483,10 +505,7 @@ def _field_to_json(field, row_value): if field.mode == "REPEATED": return _repeated_field_to_json(field, row_value) - if field.field_type == "RECORD": - return _record_field_to_json(field.fields, row_value) - - return _scalar_field_to_json(field, row_value) + return _single_field_to_json(field, row_value) def _snake_to_camel_case(value): diff --git a/packages/google-cloud-bigquery/tests/unit/test__helpers.py b/packages/google-cloud-bigquery/tests/unit/test__helpers.py index 28ebe81443af..16c4fb8a52f6 100644 --- a/packages/google-cloud-bigquery/tests/unit/test__helpers.py +++ b/packages/google-cloud-bigquery/tests/unit/test__helpers.py @@ -806,6 +806,41 @@ def test_w_known_field_type(self): self.assertEqual(converted, str(original)) +class Test_single_field_to_json(unittest.TestCase): + def _call_fut(self, field, value): + from google.cloud.bigquery._helpers import _single_field_to_json + + return _single_field_to_json(field, value) + + def test_w_none(self): + field = _make_field("INT64") + original = None + converted = self._call_fut(field, original) + self.assertIsNone(converted) + + def test_w_record(self): + subfields = [ + _make_field("INT64", name="one"), + _make_field("STRING", name="two"), + ] + field = _make_field("RECORD", fields=subfields) + original = {"one": 42, "two": "two"} + converted = self._call_fut(field, original) + self.assertEqual(converted, {"one": "42", "two": "two"}) + + def test_w_scalar(self): + field = _make_field("INT64") + original = 42 + converted = self._call_fut(field, original) + self.assertEqual(converted, str(original)) + + def test_w_scalar_ignores_mode(self): + field = _make_field("STRING", mode="REPEATED") + original = "hello world" + converted = self._call_fut(field, original) + self.assertEqual(converted, original) + + class Test_repeated_field_to_json(unittest.TestCase): def _call_fut(self, field, value): from google.cloud.bigquery._helpers import _repeated_field_to_json From f19a51ac4db880755e5b1914d79079209301564e Mon Sep 17 00:00:00 2001 From: Yoshi Automation Bot Date: Wed, 7 Oct 2020 10:25:05 -0700 Subject: [PATCH 0946/2016] chore(python): skip reporting coverage for namespace package (#279) * chore(python): remove note about editable installs `pip install -e .` is supported and is how we install the library for tests. Source-Author: Bu Sun Kim <8822365+busunkim96@users.noreply.github.com> Source-Date: Tue Sep 22 12:06:12 2020 -0600 Source-Repo: googleapis/synthtool Source-Sha: a651c5fb763c69a921aecdd3e1d8dc51dbf20f8d Source-Link: https://github.com/googleapis/synthtool/commit/a651c5fb763c69a921aecdd3e1d8dc51dbf20f8d * chore(python): skip reporting coverage for namespace package Source-Author: Tres Seaver Source-Date: Wed Sep 23 10:58:13 2020 -0400 Source-Repo: googleapis/synthtool Source-Sha: f3c04883d6c43261ff13db1f52d03a283be06871 Source-Link: https://github.com/googleapis/synthtool/commit/f3c04883d6c43261ff13db1f52d03a283be06871 Co-authored-by: Tim Swast --- packages/google-cloud-bigquery/.coveragerc | 5 ++++- packages/google-cloud-bigquery/synth.metadata | 14 +++++++++++--- 2 files changed, 15 insertions(+), 4 deletions(-) diff --git a/packages/google-cloud-bigquery/.coveragerc b/packages/google-cloud-bigquery/.coveragerc index dd39c8546c41..0d8e6297dc9c 100644 --- a/packages/google-cloud-bigquery/.coveragerc +++ b/packages/google-cloud-bigquery/.coveragerc @@ -17,6 +17,8 @@ # Generated by synthtool. DO NOT EDIT! [run] branch = True +omit = + google/cloud/__init__.py [report] fail_under = 100 @@ -32,4 +34,5 @@ omit = */gapic/*.py */proto/*.py */core/*.py - */site-packages/*.py \ No newline at end of file + */site-packages/*.py + google/cloud/__init__.py diff --git a/packages/google-cloud-bigquery/synth.metadata b/packages/google-cloud-bigquery/synth.metadata index c47ff1e5170d..d40e66dac1d2 100644 --- a/packages/google-cloud-bigquery/synth.metadata +++ b/packages/google-cloud-bigquery/synth.metadata @@ -3,15 +3,23 @@ { "git": { "name": ".", - "remote": "git@github.com:plamut/python-bigquery.git", - "sha": "64d666033446f9af669bb8eb9170b8f62d6308e4" + "remote": "https://github.com/googleapis/python-bigquery.git", + "sha": "fbbe0cb0ea22161d81f1e5504bb89b55e4198634" + } + }, + { + "git": { + "name": "googleapis", + "remote": "https://github.com/googleapis/googleapis.git", + "sha": "0dc0a6c0f1a9f979bc0690f0caa5fbafa3000c2c", + "internalRef": "327026955" } }, { "git": { "name": "synthtool", "remote": "https://github.com/googleapis/synthtool.git", - "sha": "8a7a3021fe97aa0a3641db642fe2b767f1c8110f" + "sha": "f3c04883d6c43261ff13db1f52d03a283be06871" } } ], From 4977544c69b3a9c8b2766be9782fc49151d4abc5 Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Thu, 8 Oct 2020 10:19:04 -0500 Subject: [PATCH 0947/2016] fix: remove unnecessary dependency on libcst (#308) * fix: remove unnecessary dependency on libcst * remove scripts Co-authored-by: Steffany Brown <30247553+steffnay@users.noreply.github.com> --- .../scripts/fixup_bigquery_v2_keywords.py | 181 ------------------ packages/google-cloud-bigquery/setup.py | 2 - packages/google-cloud-bigquery/synth.py | 1 + 3 files changed, 1 insertion(+), 183 deletions(-) delete mode 100644 packages/google-cloud-bigquery/scripts/fixup_bigquery_v2_keywords.py diff --git a/packages/google-cloud-bigquery/scripts/fixup_bigquery_v2_keywords.py b/packages/google-cloud-bigquery/scripts/fixup_bigquery_v2_keywords.py deleted file mode 100644 index 82b46d64e49e..000000000000 --- a/packages/google-cloud-bigquery/scripts/fixup_bigquery_v2_keywords.py +++ /dev/null @@ -1,181 +0,0 @@ -# -*- coding: utf-8 -*- - -# Copyright 2020 Google LLC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -import argparse -import os -import libcst as cst -import pathlib -import sys -from typing import (Any, Callable, Dict, List, Sequence, Tuple) - - -def partition( - predicate: Callable[[Any], bool], - iterator: Sequence[Any] -) -> Tuple[List[Any], List[Any]]: - """A stable, out-of-place partition.""" - results = ([], []) - - for i in iterator: - results[int(predicate(i))].append(i) - - # Returns trueList, falseList - return results[1], results[0] - - -class bigqueryCallTransformer(cst.CSTTransformer): - CTRL_PARAMS: Tuple[str] = ('retry', 'timeout', 'metadata') - METHOD_TO_PARAMS: Dict[str, Tuple[str]] = { - 'delete_model': ('project_id', 'dataset_id', 'model_id', ), - 'get_model': ('project_id', 'dataset_id', 'model_id', ), - 'list_models': ('project_id', 'dataset_id', 'max_results', 'page_token', ), - 'patch_model': ('project_id', 'dataset_id', 'model_id', 'model', ), - - } - - def leave_Call(self, original: cst.Call, updated: cst.Call) -> cst.CSTNode: - try: - key = original.func.attr.value - kword_params = self.METHOD_TO_PARAMS[key] - except (AttributeError, KeyError): - # Either not a method from the API or too convoluted to be sure. - return updated - - # If the existing code is valid, keyword args come after positional args. - # Therefore, all positional args must map to the first parameters. - args, kwargs = partition(lambda a: not bool(a.keyword), updated.args) - if any(k.keyword.value == "request" for k in kwargs): - # We've already fixed this file, don't fix it again. - return updated - - kwargs, ctrl_kwargs = partition( - lambda a: not a.keyword.value in self.CTRL_PARAMS, - kwargs - ) - - args, ctrl_args = args[:len(kword_params)], args[len(kword_params):] - ctrl_kwargs.extend(cst.Arg(value=a.value, keyword=cst.Name(value=ctrl)) - for a, ctrl in zip(ctrl_args, self.CTRL_PARAMS)) - - request_arg = cst.Arg( - value=cst.Dict([ - cst.DictElement( - cst.SimpleString("'{}'".format(name)), - cst.Element(value=arg.value) - ) - # Note: the args + kwargs looks silly, but keep in mind that - # the control parameters had to be stripped out, and that - # those could have been passed positionally or by keyword. - for name, arg in zip(kword_params, args + kwargs)]), - keyword=cst.Name("request") - ) - - return updated.with_changes( - args=[request_arg] + ctrl_kwargs - ) - - -def fix_files( - in_dir: pathlib.Path, - out_dir: pathlib.Path, - *, - transformer=bigqueryCallTransformer(), -): - """Duplicate the input dir to the output dir, fixing file method calls. - - Preconditions: - * in_dir is a real directory - * out_dir is a real, empty directory - """ - pyfile_gen = ( - pathlib.Path(os.path.join(root, f)) - for root, _, files in os.walk(in_dir) - for f in files if os.path.splitext(f)[1] == ".py" - ) - - for fpath in pyfile_gen: - with open(fpath, 'r') as f: - src = f.read() - - # Parse the code and insert method call fixes. - tree = cst.parse_module(src) - updated = tree.visit(transformer) - - # Create the path and directory structure for the new file. - updated_path = out_dir.joinpath(fpath.relative_to(in_dir)) - updated_path.parent.mkdir(parents=True, exist_ok=True) - - # Generate the updated source file at the corresponding path. - with open(updated_path, 'w') as f: - f.write(updated.code) - - -if __name__ == '__main__': - parser = argparse.ArgumentParser( - description="""Fix up source that uses the bigquery client library. - -The existing sources are NOT overwritten but are copied to output_dir with changes made. - -Note: This tool operates at a best-effort level at converting positional - parameters in client method calls to keyword based parameters. - Cases where it WILL FAIL include - A) * or ** expansion in a method call. - B) Calls via function or method alias (includes free function calls) - C) Indirect or dispatched calls (e.g. the method is looked up dynamically) - - These all constitute false negatives. The tool will also detect false - positives when an API method shares a name with another method. -""") - parser.add_argument( - '-d', - '--input-directory', - required=True, - dest='input_dir', - help='the input directory to walk for python files to fix up', - ) - parser.add_argument( - '-o', - '--output-directory', - required=True, - dest='output_dir', - help='the directory to output files fixed via un-flattening', - ) - args = parser.parse_args() - input_dir = pathlib.Path(args.input_dir) - output_dir = pathlib.Path(args.output_dir) - if not input_dir.is_dir(): - print( - f"input directory '{input_dir}' does not exist or is not a directory", - file=sys.stderr, - ) - sys.exit(-1) - - if not output_dir.is_dir(): - print( - f"output directory '{output_dir}' does not exist or is not a directory", - file=sys.stderr, - ) - sys.exit(-1) - - if os.listdir(output_dir): - print( - f"output directory '{output_dir}' is not empty", - file=sys.stderr, - ) - sys.exit(-1) - - fix_files(input_dir, output_dir) diff --git a/packages/google-cloud-bigquery/setup.py b/packages/google-cloud-bigquery/setup.py index 2cb57aad2b4a..109fcb10ce8e 100644 --- a/packages/google-cloud-bigquery/setup.py +++ b/packages/google-cloud-bigquery/setup.py @@ -31,7 +31,6 @@ dependencies = [ "google-api-core[grpc] >= 1.22.2, < 2.0.0dev", "proto-plus >= 1.10.0", - "libcst >= 0.2.5", "google-cloud-core >= 1.4.1, < 2.0dev", "google-resumable-media >= 0.6.0, < 2.0dev", "six >=1.13.0,< 2.0.0dev", @@ -125,7 +124,6 @@ install_requires=dependencies, extras_require=extras, python_requires=">=3.6", - scripts=["scripts/fixup_bigquery_v2_keywords.py"], include_package_data=True, zip_safe=False, ) diff --git a/packages/google-cloud-bigquery/synth.py b/packages/google-cloud-bigquery/synth.py index 501380be2966..97466d0f42b1 100644 --- a/packages/google-cloud-bigquery/synth.py +++ b/packages/google-cloud-bigquery/synth.py @@ -36,6 +36,7 @@ "README.rst", "noxfile.py", "setup.py", + "scripts/fixup_bigquery_v2_keywords.py", library / f"google/cloud/bigquery/__init__.py", library / f"google/cloud/bigquery/py.typed", # There are no public API endpoints for the generated ModelServiceClient, From fe9cd815462d1d52a901dc75bd42ff21d151dd58 Mon Sep 17 00:00:00 2001 From: "release-please[bot]" <55107282+release-please[bot]@users.noreply.github.com> Date: Thu, 8 Oct 2020 15:44:05 +0000 Subject: [PATCH 0948/2016] chore: release 2.1.0 (#301) :robot: I have created a release \*beep\* \*boop\* --- ## [2.1.0](https://www.github.com/googleapis/python-bigquery/compare/v2.0.0...v2.1.0) (2020-10-08) ### Features * add constants for MONTH and YEAR time partitioning types ([#283](https://www.github.com/googleapis/python-bigquery/issues/283)) ([9090e1c](https://www.github.com/googleapis/python-bigquery/commit/9090e1ccd8825a97835325b4829f6e7ecfd9ea88)) ### Bug Fixes * remove unnecessary dependency on libcst ([#308](https://www.github.com/googleapis/python-bigquery/issues/308)) ([c055930](https://www.github.com/googleapis/python-bigquery/commit/c05593094c1405f752b2c51b15202a6dbb5cb83f)) ### Performance Improvements * remove redundant array deepcopy ([#26](https://www.github.com/googleapis/python-bigquery/issues/26)) ([b54f867](https://www.github.com/googleapis/python-bigquery/commit/b54f86769c982ce5c8fcbf3889f82450428bb40c)) ### Documentation * **samples:** add create_table_clustered code snippet ([#291](https://www.github.com/googleapis/python-bigquery/issues/291)) ([d1eb8b3](https://www.github.com/googleapis/python-bigquery/commit/d1eb8b3dcc789916c5d3ba8464f62b1f8bef35ff)) --- This PR was generated with [Release Please](https://github.com/googleapis/release-please). --- packages/google-cloud-bigquery/CHANGELOG.md | 22 +++++++++++++++++++++ packages/google-cloud-bigquery/setup.py | 2 +- 2 files changed, 23 insertions(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/CHANGELOG.md b/packages/google-cloud-bigquery/CHANGELOG.md index 3dac7a0f10ef..ad6c9551f449 100644 --- a/packages/google-cloud-bigquery/CHANGELOG.md +++ b/packages/google-cloud-bigquery/CHANGELOG.md @@ -4,6 +4,28 @@ [1]: https://pypi.org/project/google-cloud-bigquery/#history +## [2.1.0](https://www.github.com/googleapis/python-bigquery/compare/v2.0.0...v2.1.0) (2020-10-08) + + +### Features + +* add constants for MONTH and YEAR time partitioning types ([#283](https://www.github.com/googleapis/python-bigquery/issues/283)) ([9090e1c](https://www.github.com/googleapis/python-bigquery/commit/9090e1ccd8825a97835325b4829f6e7ecfd9ea88)) + + +### Bug Fixes + +* remove unnecessary dependency on libcst ([#308](https://www.github.com/googleapis/python-bigquery/issues/308)) ([c055930](https://www.github.com/googleapis/python-bigquery/commit/c05593094c1405f752b2c51b15202a6dbb5cb83f)) + + +### Performance Improvements + +* remove redundant array deepcopy ([#26](https://www.github.com/googleapis/python-bigquery/issues/26)) ([b54f867](https://www.github.com/googleapis/python-bigquery/commit/b54f86769c982ce5c8fcbf3889f82450428bb40c)) + + +### Documentation + +* **samples:** add create_table_clustered code snippet ([#291](https://www.github.com/googleapis/python-bigquery/issues/291)) ([d1eb8b3](https://www.github.com/googleapis/python-bigquery/commit/d1eb8b3dcc789916c5d3ba8464f62b1f8bef35ff)) + ## 2.0.0 09-30-2020 14:51 PDT diff --git a/packages/google-cloud-bigquery/setup.py b/packages/google-cloud-bigquery/setup.py index 109fcb10ce8e..14b38b63e4dd 100644 --- a/packages/google-cloud-bigquery/setup.py +++ b/packages/google-cloud-bigquery/setup.py @@ -22,7 +22,7 @@ name = "google-cloud-bigquery" description = "Google BigQuery API client library" -version = "2.0.0" +version = "2.1.0" # Should be one of: # 'Development Status :: 3 - Alpha' # 'Development Status :: 4 - Beta' From 8f1d979a38a6148dfb003909a3b7e39f32789cf7 Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Fri, 9 Oct 2020 11:57:40 -0500 Subject: [PATCH 0949/2016] docs: update snippets samples to support version 2.0 (#309) * docs: update snippets samples to support version 2.0 For some reason, old versions of the google-cloud-bigquery-storage library were still getting used. This pins those dependencies directly, instead. Also, updates the samples to remove warnings about `client.dataset`. * blacken --- .../samples/snippets/authorized_view_tutorial.py | 8 ++++++-- .../samples/snippets/authorized_view_tutorial_test.py | 10 +++++++--- .../samples/snippets/natality_tutorial.py | 4 +++- .../samples/snippets/natality_tutorial_test.py | 4 ++-- .../samples/snippets/quickstart.py | 4 ++-- .../samples/snippets/requirements.txt | 6 +++++- 6 files changed, 25 insertions(+), 11 deletions(-) diff --git a/packages/google-cloud-bigquery/samples/snippets/authorized_view_tutorial.py b/packages/google-cloud-bigquery/samples/snippets/authorized_view_tutorial.py index 6b5cc378f00b..b6a20c6ec27d 100644 --- a/packages/google-cloud-bigquery/samples/snippets/authorized_view_tutorial.py +++ b/packages/google-cloud-bigquery/samples/snippets/authorized_view_tutorial.py @@ -27,16 +27,18 @@ def run_authorized_view_tutorial(override_values={}): client = bigquery.Client() source_dataset_id = "github_source_data" + source_dataset_id_full = "{}.{}".format(client.project, source_dataset_id) # [END bigquery_authorized_view_tutorial] # [END bigquery_avt_create_source_dataset] # To facilitate testing, we replace values with alternatives # provided by the testing harness. source_dataset_id = override_values.get("source_dataset_id", source_dataset_id) + source_dataset_id_full = "{}.{}".format(client.project, source_dataset_id) # [START bigquery_authorized_view_tutorial] # [START bigquery_avt_create_source_dataset] - source_dataset = bigquery.Dataset(client.dataset(source_dataset_id)) + source_dataset = bigquery.Dataset(source_dataset_id_full) # Specify the geographic location where the dataset should reside. source_dataset.location = "US" source_dataset = client.create_dataset(source_dataset) # API request @@ -66,16 +68,18 @@ def run_authorized_view_tutorial(override_values={}): # Create a separate dataset to store your view # [START bigquery_avt_create_shared_dataset] shared_dataset_id = "shared_views" + shared_dataset_id_full = "{}.{}".format(client.project, shared_dataset_id) # [END bigquery_authorized_view_tutorial] # [END bigquery_avt_create_shared_dataset] # To facilitate testing, we replace values with alternatives # provided by the testing harness. shared_dataset_id = override_values.get("shared_dataset_id", shared_dataset_id) + shared_dataset_id_full = "{}.{}".format(client.project, shared_dataset_id) # [START bigquery_authorized_view_tutorial] # [START bigquery_avt_create_shared_dataset] - shared_dataset = bigquery.Dataset(client.dataset(shared_dataset_id)) + shared_dataset = bigquery.Dataset(shared_dataset_id_full) shared_dataset.location = "US" shared_dataset = client.create_dataset(shared_dataset) # API request # [END bigquery_avt_create_shared_dataset] diff --git a/packages/google-cloud-bigquery/samples/snippets/authorized_view_tutorial_test.py b/packages/google-cloud-bigquery/samples/snippets/authorized_view_tutorial_test.py index 4c74020bd117..eb247c5ebc7b 100644 --- a/packages/google-cloud-bigquery/samples/snippets/authorized_view_tutorial_test.py +++ b/packages/google-cloud-bigquery/samples/snippets/authorized_view_tutorial_test.py @@ -30,7 +30,7 @@ def datasets_to_delete(client): doomed = [] yield doomed for item in doomed: - client.delete_dataset(item, delete_contents=True) + client.delete_dataset(item, delete_contents=True, not_found_ok=True) def test_authorized_view_tutorial(client, datasets_to_delete): @@ -42,8 +42,12 @@ def test_authorized_view_tutorial(client, datasets_to_delete): str(uuid.uuid4()).replace("-", "_") ), } - source_dataset_ref = client.dataset(override_values["source_dataset_id"]) - shared_dataset_ref = client.dataset(override_values["shared_dataset_id"]) + source_dataset_ref = "{}.{}".format( + client.project, override_values["source_dataset_id"] + ) + shared_dataset_ref = "{}.{}".format( + client.project, override_values["shared_dataset_id"] + ) datasets_to_delete.extend( [override_values["source_dataset_id"], override_values["shared_dataset_id"]] ) diff --git a/packages/google-cloud-bigquery/samples/snippets/natality_tutorial.py b/packages/google-cloud-bigquery/samples/snippets/natality_tutorial.py index b2b607b0daf5..a8d90501ac79 100644 --- a/packages/google-cloud-bigquery/samples/snippets/natality_tutorial.py +++ b/packages/google-cloud-bigquery/samples/snippets/natality_tutorial.py @@ -38,13 +38,15 @@ def run_natality_tutorial(override_values={}): # Prepare a reference to a new dataset for storing the query results. dataset_id = "natality_regression" + dataset_id_full = "{}.{}".format(client.project, dataset_id) # [END bigquery_query_natality_tutorial] # To facilitate testing, we replace values with alternatives # provided by the testing harness. dataset_id = override_values.get("dataset_id", dataset_id) + dataset_id_full = "{}.{}".format(client.project, dataset_id) # [START bigquery_query_natality_tutorial] - dataset = bigquery.Dataset(client.dataset(dataset_id)) + dataset = bigquery.Dataset(dataset_id_full) # Create the new BigQuery dataset. dataset = client.create_dataset(dataset) diff --git a/packages/google-cloud-bigquery/samples/snippets/natality_tutorial_test.py b/packages/google-cloud-bigquery/samples/snippets/natality_tutorial_test.py index fae72fa46852..d9c89bef25d3 100644 --- a/packages/google-cloud-bigquery/samples/snippets/natality_tutorial_test.py +++ b/packages/google-cloud-bigquery/samples/snippets/natality_tutorial_test.py @@ -43,8 +43,8 @@ def test_natality_tutorial(client, datasets_to_delete): natality_tutorial.run_natality_tutorial(override_values) - table_ref = bigquery.Dataset(client.dataset(override_values["dataset_id"])).table( - "regression_input" + table_ref = "{}.{}.{}".format( + client.project, override_values["dataset_id"], "regression_input" ) table = client.get_table(table_ref) assert table.num_rows > 0 diff --git a/packages/google-cloud-bigquery/samples/snippets/quickstart.py b/packages/google-cloud-bigquery/samples/snippets/quickstart.py index 56d6fd843c3a..1b0ef5b3ad85 100644 --- a/packages/google-cloud-bigquery/samples/snippets/quickstart.py +++ b/packages/google-cloud-bigquery/samples/snippets/quickstart.py @@ -33,8 +33,8 @@ def run_quickstart(override_values={}): # [START bigquery_quickstart] # Prepares a reference to the new dataset - dataset_ref = bigquery_client.dataset(dataset_id) - dataset = bigquery.Dataset(dataset_ref) + dataset_id_full = "{}.{}".format(bigquery_client.project, dataset_id) + dataset = bigquery.Dataset(dataset_id_full) # Creates the new dataset dataset = bigquery_client.create_dataset(dataset) diff --git a/packages/google-cloud-bigquery/samples/snippets/requirements.txt b/packages/google-cloud-bigquery/samples/snippets/requirements.txt index 6edca4f10341..76c333b467e1 100644 --- a/packages/google-cloud-bigquery/samples/snippets/requirements.txt +++ b/packages/google-cloud-bigquery/samples/snippets/requirements.txt @@ -1,6 +1,10 @@ -google-cloud-bigquery[pandas,bqstorage,pyarrow]==2.0.0 +google-cloud-bigquery==2.0.0 +google-cloud-bigquery-storage==2.0.0 google-auth-oauthlib==0.4.1 +grpcio==1.32.0 ipython==7.16.1; python_version < '3.7' ipython==7.17.0; python_version >= '3.7' matplotlib==3.3.1 +pandas==1.1.3 +pyarrow==1.0.1 pytz==2020.1 From c717e5efbf353fd0adcdde01b0174c65782d8f80 Mon Sep 17 00:00:00 2001 From: WhiteSource Renovate Date: Fri, 9 Oct 2020 20:10:31 +0200 Subject: [PATCH 0950/2016] chore(deps): update dependency google-cloud-bigquery to v2.1.0 (#312) --- .../google-cloud-bigquery/samples/snippets/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/samples/snippets/requirements.txt b/packages/google-cloud-bigquery/samples/snippets/requirements.txt index 76c333b467e1..daabdf745306 100644 --- a/packages/google-cloud-bigquery/samples/snippets/requirements.txt +++ b/packages/google-cloud-bigquery/samples/snippets/requirements.txt @@ -1,4 +1,4 @@ -google-cloud-bigquery==2.0.0 +google-cloud-bigquery==2.1.0 google-cloud-bigquery-storage==2.0.0 google-auth-oauthlib==0.4.1 grpcio==1.32.0 From b1a8bb8615ffe8af19276310f2b4bdce88d3dc3b Mon Sep 17 00:00:00 2001 From: Yoshi Automation Bot Date: Fri, 9 Oct 2020 13:04:02 -0700 Subject: [PATCH 0951/2016] chore: start tracking obsolete files (#310) This PR was generated using Autosynth. :rainbow: Synth log will be available here: https://source.cloud.google.com/results/invocations/4563ab42-f1d6-4a7f-8e48-cc92dfba56b5/targets - [ ] To automatically regenerate this PR, check this box. PiperOrigin-RevId: 334645418 Source-Link: https://github.com/googleapis/googleapis/commit/c941026e5e3d600817a20e9ab4d4be03dff21a68 --- .../.kokoro/presubmit/presubmit.cfg | 8 +- .../.kokoro/samples/python3.6/common.cfg | 6 -- .../.kokoro/samples/python3.7/common.cfg | 6 -- .../.kokoro/samples/python3.8/common.cfg | 6 -- packages/google-cloud-bigquery/mypy.ini | 3 + packages/google-cloud-bigquery/synth.metadata | 94 ++++++++++++++++++- 6 files changed, 101 insertions(+), 22 deletions(-) create mode 100644 packages/google-cloud-bigquery/mypy.ini diff --git a/packages/google-cloud-bigquery/.kokoro/presubmit/presubmit.cfg b/packages/google-cloud-bigquery/.kokoro/presubmit/presubmit.cfg index 8f43917d92fe..b158096f0ae2 100644 --- a/packages/google-cloud-bigquery/.kokoro/presubmit/presubmit.cfg +++ b/packages/google-cloud-bigquery/.kokoro/presubmit/presubmit.cfg @@ -1 +1,7 @@ -# Format: //devtools/kokoro/config/proto/build.proto \ No newline at end of file +# Format: //devtools/kokoro/config/proto/build.proto + +# Disable system tests. +env_vars: { + key: "RUN_SYSTEM_TESTS" + value: "false" +} diff --git a/packages/google-cloud-bigquery/.kokoro/samples/python3.6/common.cfg b/packages/google-cloud-bigquery/.kokoro/samples/python3.6/common.cfg index f3b930960b41..a56768eae259 100644 --- a/packages/google-cloud-bigquery/.kokoro/samples/python3.6/common.cfg +++ b/packages/google-cloud-bigquery/.kokoro/samples/python3.6/common.cfg @@ -13,12 +13,6 @@ env_vars: { value: "py-3.6" } -# Declare build specific Cloud project. -env_vars: { - key: "BUILD_SPECIFIC_GCLOUD_PROJECT" - value: "python-docs-samples-tests-py36" -} - env_vars: { key: "TRAMPOLINE_BUILD_FILE" value: "github/python-bigquery/.kokoro/test-samples.sh" diff --git a/packages/google-cloud-bigquery/.kokoro/samples/python3.7/common.cfg b/packages/google-cloud-bigquery/.kokoro/samples/python3.7/common.cfg index fc06545655d4..c93747180b2e 100644 --- a/packages/google-cloud-bigquery/.kokoro/samples/python3.7/common.cfg +++ b/packages/google-cloud-bigquery/.kokoro/samples/python3.7/common.cfg @@ -13,12 +13,6 @@ env_vars: { value: "py-3.7" } -# Declare build specific Cloud project. -env_vars: { - key: "BUILD_SPECIFIC_GCLOUD_PROJECT" - value: "python-docs-samples-tests-py37" -} - env_vars: { key: "TRAMPOLINE_BUILD_FILE" value: "github/python-bigquery/.kokoro/test-samples.sh" diff --git a/packages/google-cloud-bigquery/.kokoro/samples/python3.8/common.cfg b/packages/google-cloud-bigquery/.kokoro/samples/python3.8/common.cfg index 2b0bf59b3ef5..9808f15e32a9 100644 --- a/packages/google-cloud-bigquery/.kokoro/samples/python3.8/common.cfg +++ b/packages/google-cloud-bigquery/.kokoro/samples/python3.8/common.cfg @@ -13,12 +13,6 @@ env_vars: { value: "py-3.8" } -# Declare build specific Cloud project. -env_vars: { - key: "BUILD_SPECIFIC_GCLOUD_PROJECT" - value: "python-docs-samples-tests-py38" -} - env_vars: { key: "TRAMPOLINE_BUILD_FILE" value: "github/python-bigquery/.kokoro/test-samples.sh" diff --git a/packages/google-cloud-bigquery/mypy.ini b/packages/google-cloud-bigquery/mypy.ini new file mode 100644 index 000000000000..4505b485436b --- /dev/null +++ b/packages/google-cloud-bigquery/mypy.ini @@ -0,0 +1,3 @@ +[mypy] +python_version = 3.6 +namespace_packages = True diff --git a/packages/google-cloud-bigquery/synth.metadata b/packages/google-cloud-bigquery/synth.metadata index d40e66dac1d2..00881063059a 100644 --- a/packages/google-cloud-bigquery/synth.metadata +++ b/packages/google-cloud-bigquery/synth.metadata @@ -4,15 +4,22 @@ "git": { "name": ".", "remote": "https://github.com/googleapis/python-bigquery.git", - "sha": "fbbe0cb0ea22161d81f1e5504bb89b55e4198634" + "sha": "31644d380b35a76a9147801a4b6b0271c246fd0c" } }, { "git": { "name": "googleapis", "remote": "https://github.com/googleapis/googleapis.git", - "sha": "0dc0a6c0f1a9f979bc0690f0caa5fbafa3000c2c", - "internalRef": "327026955" + "sha": "c941026e5e3d600817a20e9ab4d4be03dff21a68", + "internalRef": "334645418" + } + }, + { + "git": { + "name": "synthtool", + "remote": "https://github.com/googleapis/synthtool.git", + "sha": "f3c04883d6c43261ff13db1f52d03a283be06871" } }, { @@ -33,5 +40,86 @@ "generator": "bazel" } } + ], + "generatedFiles": [ + ".flake8", + ".github/CONTRIBUTING.md", + ".github/ISSUE_TEMPLATE/bug_report.md", + ".github/ISSUE_TEMPLATE/feature_request.md", + ".github/ISSUE_TEMPLATE/support_request.md", + ".github/PULL_REQUEST_TEMPLATE.md", + ".github/release-please.yml", + ".github/snippet-bot.yml", + ".gitignore", + ".kokoro/build.sh", + ".kokoro/continuous/common.cfg", + ".kokoro/continuous/continuous.cfg", + ".kokoro/docker/docs/Dockerfile", + ".kokoro/docker/docs/fetch_gpg_keys.sh", + ".kokoro/docs/common.cfg", + ".kokoro/docs/docs-presubmit.cfg", + ".kokoro/docs/docs.cfg", + ".kokoro/populate-secrets.sh", + ".kokoro/presubmit/common.cfg", + ".kokoro/presubmit/presubmit.cfg", + ".kokoro/presubmit/system-3.8.cfg", + ".kokoro/publish-docs.sh", + ".kokoro/release.sh", + ".kokoro/release/common.cfg", + ".kokoro/release/release.cfg", + ".kokoro/samples/lint/common.cfg", + ".kokoro/samples/lint/continuous.cfg", + ".kokoro/samples/lint/periodic.cfg", + ".kokoro/samples/lint/presubmit.cfg", + ".kokoro/samples/python3.6/common.cfg", + ".kokoro/samples/python3.6/continuous.cfg", + ".kokoro/samples/python3.6/periodic.cfg", + ".kokoro/samples/python3.6/presubmit.cfg", + ".kokoro/samples/python3.7/common.cfg", + ".kokoro/samples/python3.7/continuous.cfg", + ".kokoro/samples/python3.7/periodic.cfg", + ".kokoro/samples/python3.7/presubmit.cfg", + ".kokoro/samples/python3.8/common.cfg", + ".kokoro/samples/python3.8/continuous.cfg", + ".kokoro/samples/python3.8/periodic.cfg", + ".kokoro/samples/python3.8/presubmit.cfg", + ".kokoro/test-samples.sh", + ".kokoro/trampoline.sh", + ".kokoro/trampoline_v2.sh", + ".trampolinerc", + "CODE_OF_CONDUCT.md", + "CONTRIBUTING.rst", + "LICENSE", + "MANIFEST.in", + "docs/_static/custom.css", + "docs/_templates/layout.html", + "docs/bigquery_v2/services.rst", + "docs/bigquery_v2/types.rst", + "docs/conf.py", + "google/cloud/bigquery_v2/__init__.py", + "google/cloud/bigquery_v2/proto/encryption_config.proto", + "google/cloud/bigquery_v2/proto/model.proto", + "google/cloud/bigquery_v2/proto/model_reference.proto", + "google/cloud/bigquery_v2/proto/standard_sql.proto", + "google/cloud/bigquery_v2/py.typed", + "google/cloud/bigquery_v2/types/__init__.py", + "google/cloud/bigquery_v2/types/encryption_config.py", + "google/cloud/bigquery_v2/types/model.py", + "google/cloud/bigquery_v2/types/model_reference.py", + "google/cloud/bigquery_v2/types/standard_sql.py", + "mypy.ini", + "renovate.json", + "samples/AUTHORING_GUIDE.md", + "samples/CONTRIBUTING.md", + "scripts/decrypt-secrets.sh", + "scripts/fixup_bigquery_v2_keywords.py", + "scripts/readme-gen/readme_gen.py", + "scripts/readme-gen/templates/README.tmpl.rst", + "scripts/readme-gen/templates/auth.tmpl.rst", + "scripts/readme-gen/templates/auth_api_key.tmpl.rst", + "scripts/readme-gen/templates/install_deps.tmpl.rst", + "scripts/readme-gen/templates/install_portaudio.tmpl.rst", + "setup.cfg", + "testing/.gitignore" ] } \ No newline at end of file From b8e03dcd7c4fa494c7fcdbf785586a34e43cc89f Mon Sep 17 00:00:00 2001 From: HemangChothani <50404902+HemangChothani@users.noreply.github.com> Date: Mon, 12 Oct 2020 10:24:16 -0400 Subject: [PATCH 0952/2016] fix: use version.py instead of pkg_resources.get_distribution (#307) * fix: use version.py instead of pkg_resources.get_distribution * fix: nit --- .../google/cloud/bigquery/__init__.py | 4 ++-- .../google/cloud/bigquery/version.py | 15 +++++++++++++++ packages/google-cloud-bigquery/setup.py | 7 ++++++- 3 files changed, 23 insertions(+), 3 deletions(-) create mode 100644 packages/google-cloud-bigquery/google/cloud/bigquery/version.py diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/__init__.py b/packages/google-cloud-bigquery/google/cloud/bigquery/__init__.py index 89c5a36240db..e83e70084c78 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/__init__.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/__init__.py @@ -28,9 +28,9 @@ """ -from pkg_resources import get_distribution +from google.cloud.bigquery import version as bigquery_version -__version__ = get_distribution("google-cloud-bigquery").version +__version__ = bigquery_version.__version__ from google.cloud.bigquery.client import Client from google.cloud.bigquery.dataset import AccessEntry diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/version.py b/packages/google-cloud-bigquery/google/cloud/bigquery/version.py new file mode 100644 index 000000000000..8b5d3328c28b --- /dev/null +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/version.py @@ -0,0 +1,15 @@ +# Copyright 2020 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +__version__ = "2.1.0" diff --git a/packages/google-cloud-bigquery/setup.py b/packages/google-cloud-bigquery/setup.py index 14b38b63e4dd..be7296081e63 100644 --- a/packages/google-cloud-bigquery/setup.py +++ b/packages/google-cloud-bigquery/setup.py @@ -22,7 +22,7 @@ name = "google-cloud-bigquery" description = "Google BigQuery API client library" -version = "2.1.0" + # Should be one of: # 'Development Status :: 3 - Alpha' # 'Development Status :: 4 - Beta' @@ -83,6 +83,11 @@ with io.open(readme_filename, encoding="utf-8") as readme_file: readme = readme_file.read() +version = {} +with open(os.path.join(package_root, "google/cloud/bigquery/version.py")) as fp: + exec(fp.read(), version) +version = version["__version__"] + # Only include packages under the 'google' namespace. Do not include tests, # benchmarks, etc. packages = [ From 2c0a9ed8e21c4a5a8d95eed25710619f4628c2df Mon Sep 17 00:00:00 2001 From: Carlos de la Guardia Date: Mon, 12 Oct 2020 17:10:07 -0500 Subject: [PATCH 0953/2016] deps: require pyarrow for pandas support (#314) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Thank you for opening a Pull Request! Before submitting your PR, there are a few things you can do to make sure it goes smoothly: - [X] Make sure to open an issue as a [bug/issue](https://github.com/googleapis/python-bigquery/issues/new/choose) before writing your code! That way we can discuss the change, evaluate designs, and agree on the general idea - [X] Ensure the tests and linter pass - [X] Code coverage does not decrease (if any source code was changed) - [X] Appropriate docs were updated (if necessary) Fixes #265 🦕 --- .../google-cloud-bigquery/docs/snippets.py | 4 - .../google/cloud/bigquery/__init__.py | 3 - .../google/cloud/bigquery/client.py | 43 +--- .../google/cloud/bigquery/exceptions.py | 17 -- .../google/cloud/bigquery/table.py | 92 ++----- packages/google-cloud-bigquery/noxfile.py | 5 +- packages/google-cloud-bigquery/setup.py | 12 +- .../testing/constraints-3.6.txt | 1 - .../tests/unit/test__pandas_helpers.py | 8 + .../tests/unit/test_client.py | 144 +---------- .../tests/unit/test_job.py | 39 --- .../tests/unit/test_table.py | 232 ++++-------------- 12 files changed, 97 insertions(+), 503 deletions(-) delete mode 100644 packages/google-cloud-bigquery/google/cloud/bigquery/exceptions.py diff --git a/packages/google-cloud-bigquery/docs/snippets.py b/packages/google-cloud-bigquery/docs/snippets.py index bc6b580201b9..8c106e63d54b 100644 --- a/packages/google-cloud-bigquery/docs/snippets.py +++ b/packages/google-cloud-bigquery/docs/snippets.py @@ -26,10 +26,6 @@ import pytest -try: - import fastparquet -except (ImportError, AttributeError): - fastparquet = None try: import pandas except (ImportError, AttributeError): diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/__init__.py b/packages/google-cloud-bigquery/google/cloud/bigquery/__init__.py index e83e70084c78..b8d1cc4d7a0c 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/__init__.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/__init__.py @@ -38,7 +38,6 @@ from google.cloud.bigquery.dataset import DatasetReference from google.cloud.bigquery import enums from google.cloud.bigquery.enums import StandardSqlDataTypes -from google.cloud.bigquery.exceptions import PyarrowMissingWarning from google.cloud.bigquery.external_config import ExternalConfig from google.cloud.bigquery.external_config import BigtableOptions from google.cloud.bigquery.external_config import BigtableColumnFamily @@ -143,8 +142,6 @@ "WriteDisposition", # EncryptionConfiguration "EncryptionConfiguration", - # Errors and warnings - "PyarrowMissingWarning", ] diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py index fcb18385d17f..2afffab80832 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py @@ -58,7 +58,6 @@ from google.cloud.bigquery.dataset import Dataset from google.cloud.bigquery.dataset import DatasetListItem from google.cloud.bigquery.dataset import DatasetReference -from google.cloud.bigquery.exceptions import PyarrowMissingWarning from google.cloud.bigquery.opentelemetry_tracing import create_span from google.cloud.bigquery import job from google.cloud.bigquery.model import Model @@ -2135,29 +2134,31 @@ def load_table_from_dataframe( [Beta] The compression method to use if intermittently serializing ``dataframe`` to a parquet file. - If ``pyarrow`` and job config schema are used, the argument - is directly passed as the ``compression`` argument to the - underlying ``pyarrow.parquet.write_table()`` method (the - default value "snappy" gets converted to uppercase). + The argument is directly passed as the ``compression`` + argument to the underlying ``pyarrow.parquet.write_table()`` + method (the default value "snappy" gets converted to uppercase). https://arrow.apache.org/docs/python/generated/pyarrow.parquet.write_table.html#pyarrow-parquet-write-table - If either ``pyarrow`` or job config schema are missing, the - argument is directly passed as the ``compression`` argument - to the underlying ``DataFrame.to_parquet()`` method. + If the job config schema is missing, the argument is directly + passed as the ``compression`` argument to the underlying + ``DataFrame.to_parquet()`` method. https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.to_parquet.html#pandas.DataFrame.to_parquet Returns: google.cloud.bigquery.job.LoadJob: A new load job. Raises: - ImportError: + ValueError: If a usable parquet engine cannot be found. This method - requires :mod:`pyarrow` or :mod:`fastparquet` to be - installed. + requires :mod:`pyarrow` to be installed. TypeError: If ``job_config`` is not an instance of :class:`~google.cloud.bigquery.job.LoadJobConfig` class. """ + if pyarrow is None: + # pyarrow is now the only supported parquet engine. + raise ValueError("This method requires pyarrow to be installed") + job_id = _make_job_id(job_id, job_id_prefix) if job_config: @@ -2222,7 +2223,7 @@ def load_table_from_dataframe( os.close(tmpfd) try: - if pyarrow and job_config.schema: + if job_config.schema: if parquet_compression == "snappy": # adjust the default value parquet_compression = parquet_compression.upper() @@ -2233,24 +2234,6 @@ def load_table_from_dataframe( parquet_compression=parquet_compression, ) else: - if not pyarrow: - warnings.warn( - "Loading dataframe data without pyarrow installed is " - "deprecated and will become unsupported in the future. " - "Please install the pyarrow package.", - PyarrowMissingWarning, - stacklevel=2, - ) - - if job_config.schema: - warnings.warn( - "job_config.schema is set, but not used to assist in " - "identifying correct types for data serialization. " - "Please install the pyarrow package.", - PendingDeprecationWarning, - stacklevel=2, - ) - dataframe.to_parquet(tmppath, compression=parquet_compression) with open(tmppath, "rb") as parquet_file: diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/exceptions.py b/packages/google-cloud-bigquery/google/cloud/bigquery/exceptions.py deleted file mode 100644 index 93490ef97668..000000000000 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/exceptions.py +++ /dev/null @@ -1,17 +0,0 @@ -# Copyright 2020 Google LLC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - - -class PyarrowMissingWarning(DeprecationWarning): - pass diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py index a72bacb7419e..01e8815da4c1 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py @@ -50,7 +50,6 @@ from google.cloud.bigquery.schema import _build_schema_resource from google.cloud.bigquery.schema import _parse_schema_resource from google.cloud.bigquery.schema import _to_schema_fields -from google.cloud.bigquery.exceptions import PyarrowMissingWarning from google.cloud.bigquery.external_config import ExternalConfig from google.cloud.bigquery.encryption_configuration import EncryptionConfiguration @@ -1679,75 +1678,38 @@ def to_dataframe( create_bqstorage_client = False bqstorage_client = None - if pyarrow is not None: - # If pyarrow is available, calling to_arrow, then converting to a - # pandas dataframe is about 2x faster. This is because pandas.concat is - # rarely no-copy, whereas pyarrow.Table.from_batches + to_pandas is - # usually no-copy. - record_batch = self.to_arrow( - progress_bar_type=progress_bar_type, - bqstorage_client=bqstorage_client, - create_bqstorage_client=create_bqstorage_client, - ) + record_batch = self.to_arrow( + progress_bar_type=progress_bar_type, + bqstorage_client=bqstorage_client, + create_bqstorage_client=create_bqstorage_client, + ) + + # When converting timestamp values to nanosecond precision, the result + # can be out of pyarrow bounds. To avoid the error when converting to + # Pandas, we set the timestamp_as_object parameter to True, if necessary. + types_to_check = { + pyarrow.timestamp("us"), + pyarrow.timestamp("us", tz=pytz.UTC), + } - # When converting timestamp values to nanosecond precision, the result - # can be out of pyarrow bounds. To avoid the error when converting to - # Pandas, we set the timestamp_as_object parameter to True, if necessary. - types_to_check = { - pyarrow.timestamp("us"), - pyarrow.timestamp("us", tz=pytz.UTC), - } - - for column in record_batch: - if column.type in types_to_check: - try: - column.cast("timestamp[ns]") - except pyarrow.lib.ArrowInvalid: - timestamp_as_object = True - break - else: - timestamp_as_object = False - - extra_kwargs = {"timestamp_as_object": timestamp_as_object} - - df = record_batch.to_pandas(date_as_object=date_as_object, **extra_kwargs) - - for column in dtypes: - df[column] = pandas.Series(df[column], dtype=dtypes[column]) - return df + for column in record_batch: + if column.type in types_to_check: + try: + column.cast("timestamp[ns]") + except pyarrow.lib.ArrowInvalid: + timestamp_as_object = True + break else: - warnings.warn( - "Converting to a dataframe without pyarrow installed is " - "often slower and will become unsupported in the future. " - "Please install the pyarrow package.", - PyarrowMissingWarning, - stacklevel=2, - ) + timestamp_as_object = False - # The bqstorage_client is only used if pyarrow is available, so the - # rest of this method only needs to account for tabledata.list. - progress_bar = self._get_progress_bar(progress_bar_type) + extra_kwargs = {"timestamp_as_object": timestamp_as_object} - frames = [] - for frame in self.to_dataframe_iterable(dtypes=dtypes): - frames.append(frame) + df = record_batch.to_pandas(date_as_object=date_as_object, **extra_kwargs) - if progress_bar is not None: - # In some cases, the number of total rows is not populated - # until the first page of rows is fetched. Update the - # progress bar's total to keep an accurate count. - progress_bar.total = progress_bar.total or self.total_rows - progress_bar.update(len(frame)) - - if progress_bar is not None: - # Indicate that the download has finished. - progress_bar.close() - - # Avoid concatting an empty list. - if not frames: - column_names = [field.name for field in self._schema] - return pandas.DataFrame(columns=column_names) - return pandas.concat(frames, ignore_index=True) + for column in dtypes: + df[column] = pandas.Series(df[column], dtype=dtypes[column]) + + return df class _EmptyRowIterator(object): diff --git a/packages/google-cloud-bigquery/noxfile.py b/packages/google-cloud-bigquery/noxfile.py index 42d8f93565d5..db1dcffdef84 100644 --- a/packages/google-cloud-bigquery/noxfile.py +++ b/packages/google-cloud-bigquery/noxfile.py @@ -49,10 +49,7 @@ def default(session): constraints_path, ) - # fastparquet is not included in .[all] because, in general, it's - # redundant with pyarrow. We still want to run some unit tests with - # fastparquet serialization, though. - session.install("-e", ".[all,fastparquet]", "-c", constraints_path) + session.install("-e", ".[all]", "-c", constraints_path) session.install("ipython", "-c", constraints_path) diff --git a/packages/google-cloud-bigquery/setup.py b/packages/google-cloud-bigquery/setup.py index be7296081e63..abd5cef95310 100644 --- a/packages/google-cloud-bigquery/setup.py +++ b/packages/google-cloud-bigquery/setup.py @@ -47,13 +47,12 @@ "grpcio >= 1.32.0, < 2.0dev", "pyarrow >= 1.0.0, < 2.0dev", ], - "pandas": ["pandas>=0.23.0"], - "pyarrow": [ + "pandas": [ + "pandas>=0.23.0", # pyarrow 1.0.0 is required for the use of timestamp_as_object keyword. "pyarrow >= 1.0.0, < 2.0dev", ], "tqdm": ["tqdm >= 4.7.4, <5.0.0dev"], - "fastparquet": ["fastparquet", "python-snappy", "llvmlite>=0.34.0"], "opentelemetry": [ "opentelemetry-api==0.9b0", "opentelemetry-sdk==0.9b0", @@ -64,13 +63,6 @@ all_extras = [] for extra in extras: - if extra in ( - # Skip fastparquet from "all" because it is redundant with pyarrow and - # creates a dependency on pre-release versions of numpy. See: - # https://github.com/googleapis/google-cloud-python/issues/8549 - "fastparquet", - ): - continue all_extras.extend(extras[extra]) extras["all"] = all_extras diff --git a/packages/google-cloud-bigquery/testing/constraints-3.6.txt b/packages/google-cloud-bigquery/testing/constraints-3.6.txt index a9f4faa92a31..7988049419a8 100644 --- a/packages/google-cloud-bigquery/testing/constraints-3.6.txt +++ b/packages/google-cloud-bigquery/testing/constraints-3.6.txt @@ -1,4 +1,3 @@ -fastparquet==0.4.1 google-api-core==1.22.2 google-cloud-bigquery-storage==2.0.0 google-cloud-core==1.4.1 diff --git a/packages/google-cloud-bigquery/tests/unit/test__pandas_helpers.py b/packages/google-cloud-bigquery/tests/unit/test__pandas_helpers.py index c1073066d479..bdb1c56ea2f7 100644 --- a/packages/google-cloud-bigquery/tests/unit/test__pandas_helpers.py +++ b/packages/google-cloud-bigquery/tests/unit/test__pandas_helpers.py @@ -1329,3 +1329,11 @@ def test_download_dataframe_tabledata_list_dict_sequence_schema(module_under_tes ) ) assert result.equals(expected_result) + + with pytest.raises(StopIteration): + result = next(results_gen) + + +def test_table_data_listpage_to_dataframe_skips_stop_iteration(module_under_test): + dataframe = module_under_test._tabledata_list_page_to_dataframe([], [], {}) + assert isinstance(dataframe, pandas.DataFrame) diff --git a/packages/google-cloud-bigquery/tests/unit/test_client.py b/packages/google-cloud-bigquery/tests/unit/test_client.py index f44201ab854d..737c1aef7530 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_client.py +++ b/packages/google-cloud-bigquery/tests/unit/test_client.py @@ -32,10 +32,6 @@ import pytz import pkg_resources -try: - import fastparquet -except (ImportError, AttributeError): # pragma: NO COVER - fastparquet = None try: import pandas except (ImportError, AttributeError): # pragma: NO COVER @@ -7838,80 +7834,6 @@ def test_load_table_from_dataframe_unknown_table(self): job_config=mock.ANY, ) - @unittest.skipIf(pandas is None, "Requires `pandas`") - @unittest.skipIf(fastparquet is None, "Requires `fastparquet`") - def test_load_table_from_dataframe_no_pyarrow_warning(self): - from google.cloud.bigquery.client import PyarrowMissingWarning - - client = self._make_client() - - # Pick at least one column type that translates to Pandas dtype - # "object". A string column matches that. - records = [{"name": "Monty", "age": 100}, {"name": "Python", "age": 60}] - dataframe = pandas.DataFrame(records) - - get_table_patch = mock.patch( - "google.cloud.bigquery.client.Client.get_table", - autospec=True, - side_effect=google.api_core.exceptions.NotFound("Table not found"), - ) - load_patch = mock.patch( - "google.cloud.bigquery.client.Client.load_table_from_file", autospec=True - ) - pyarrow_patch = mock.patch("google.cloud.bigquery.client.pyarrow", None) - pyarrow_patch_helpers = mock.patch( - "google.cloud.bigquery._pandas_helpers.pyarrow", None - ) - catch_warnings = warnings.catch_warnings(record=True) - - with get_table_patch, load_patch, pyarrow_patch, pyarrow_patch_helpers, catch_warnings as warned: - client.load_table_from_dataframe( - dataframe, self.TABLE_REF, location=self.LOCATION - ) - - matches = [ - warning for warning in warned if warning.category is PyarrowMissingWarning - ] - assert matches, "A missing pyarrow deprecation warning was not raised." - - @unittest.skipIf(pandas is None, "Requires `pandas`") - @unittest.skipIf(fastparquet is None, "Requires `fastparquet`") - def test_load_table_from_dataframe_no_schema_warning_wo_pyarrow(self): - client = self._make_client() - - # Pick at least one column type that translates to Pandas dtype - # "object". A string column matches that. - records = [{"name": "Monty", "age": 100}, {"name": "Python", "age": 60}] - dataframe = pandas.DataFrame(records) - - get_table_patch = mock.patch( - "google.cloud.bigquery.client.Client.get_table", - autospec=True, - side_effect=google.api_core.exceptions.NotFound("Table not found"), - ) - load_patch = mock.patch( - "google.cloud.bigquery.client.Client.load_table_from_file", autospec=True - ) - pyarrow_patch = mock.patch("google.cloud.bigquery.client.pyarrow", None) - pyarrow_patch_helpers = mock.patch( - "google.cloud.bigquery._pandas_helpers.pyarrow", None - ) - catch_warnings = warnings.catch_warnings(record=True) - - with get_table_patch, load_patch, pyarrow_patch, pyarrow_patch_helpers, catch_warnings as warned: - client.load_table_from_dataframe( - dataframe, self.TABLE_REF, location=self.LOCATION - ) - - matches = [ - warning - for warning in warned - if warning.category in (DeprecationWarning, PendingDeprecationWarning) - and "could not be detected" in str(warning) - and "please provide a schema" in str(warning) - ] - assert matches, "A missing schema deprecation warning was not raised." - @unittest.skipIf( pandas is None or PANDAS_INSTALLED_VERSION < PANDAS_MINIUM_VERSION, "Only `pandas version >=1.0.0` supported", @@ -8182,7 +8104,6 @@ def test_load_table_from_dataframe_w_partial_schema_extra_types(self): assert "unknown_col" in message @unittest.skipIf(pandas is None, "Requires `pandas`") - @unittest.skipIf(fastparquet is None, "Requires `fastparquet`") def test_load_table_from_dataframe_w_partial_schema_missing_types(self): from google.cloud.bigquery.client import _DEFAULT_NUM_RETRIES from google.cloud.bigquery import job @@ -8236,55 +8157,6 @@ def test_load_table_from_dataframe_w_partial_schema_missing_types(self): assert sent_config.source_format == job.SourceFormat.PARQUET assert sent_config.schema is None - @unittest.skipIf(pandas is None, "Requires `pandas`") - @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") - def test_load_table_from_dataframe_w_schema_wo_pyarrow(self): - from google.cloud.bigquery.client import _DEFAULT_NUM_RETRIES - from google.cloud.bigquery import job - from google.cloud.bigquery.schema import SchemaField - - client = self._make_client() - records = [{"name": u"Monty", "age": 100}, {"name": u"Python", "age": 60}] - dataframe = pandas.DataFrame(records, columns=["name", "age"]) - schema = (SchemaField("name", "STRING"), SchemaField("age", "INTEGER")) - job_config = job.LoadJobConfig(schema=schema) - - load_patch = mock.patch( - "google.cloud.bigquery.client.Client.load_table_from_file", autospec=True - ) - pyarrow_patch = mock.patch("google.cloud.bigquery.client.pyarrow", None) - - with load_patch as load_table_from_file, pyarrow_patch, warnings.catch_warnings( - record=True - ) as warned: - client.load_table_from_dataframe( - dataframe, self.TABLE_REF, job_config=job_config, location=self.LOCATION - ) - - assert warned # there should be at least one warning - for warning in warned: - assert "pyarrow" in str(warning) - assert issubclass( - warning.category, (DeprecationWarning, PendingDeprecationWarning) - ) - - load_table_from_file.assert_called_once_with( - client, - mock.ANY, - self.TABLE_REF, - num_retries=_DEFAULT_NUM_RETRIES, - rewind=True, - job_id=mock.ANY, - job_id_prefix=None, - location=self.LOCATION, - project=None, - job_config=mock.ANY, - ) - - sent_config = load_table_from_file.mock_calls[0][2]["job_config"] - assert sent_config.source_format == job.SourceFormat.PARQUET - assert tuple(sent_config.schema) == schema - @unittest.skipIf(pandas is None, "Requires `pandas`") @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_load_table_from_dataframe_w_schema_arrow_custom_compression(self): @@ -8320,7 +8192,7 @@ def test_load_table_from_dataframe_w_schema_arrow_custom_compression(self): @unittest.skipIf(pandas is None, "Requires `pandas`") @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") - def test_load_table_from_dataframe_wo_pyarrow_custom_compression(self): + def test_load_table_from_dataframe_wo_pyarrow_raises_error(self): client = self._make_client() records = [{"id": 1, "age": 100}, {"id": 2, "age": 60}] dataframe = pandas.DataFrame(records) @@ -8338,8 +8210,8 @@ def test_load_table_from_dataframe_wo_pyarrow_custom_compression(self): dataframe, "to_parquet", wraps=dataframe.to_parquet ) - with load_patch, get_table_patch, pyarrow_patch, to_parquet_patch as to_parquet_spy: - with warnings.catch_warnings(record=True) as warned: + with load_patch, get_table_patch, pyarrow_patch, to_parquet_patch: + with pytest.raises(ValueError): client.load_table_from_dataframe( dataframe, self.TABLE_REF, @@ -8347,16 +8219,6 @@ def test_load_table_from_dataframe_wo_pyarrow_custom_compression(self): parquet_compression="gzip", ) - call_args = to_parquet_spy.call_args - assert call_args is not None - assert call_args.kwargs.get("compression") == "gzip" - - assert len(warned) == 2 - warning = warned[0] - assert "Loading dataframe data without pyarrow" in str(warning) - warning = warned[1] - assert "Please install the pyarrow package" in str(warning) - @unittest.skipIf(pandas is None, "Requires `pandas`") @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_load_table_from_dataframe_w_nulls(self): diff --git a/packages/google-cloud-bigquery/tests/unit/test_job.py b/packages/google-cloud-bigquery/tests/unit/test_job.py index fb042e18cc96..d21489616653 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_job.py +++ b/packages/google-cloud-bigquery/tests/unit/test_job.py @@ -5802,45 +5802,6 @@ def test_to_dataframe_column_date_dtypes(self): self.assertEqual(df.date.dtype.name, "datetime64[ns]") - @unittest.skipIf(pandas is None, "Requires `pandas`") - def test_to_dataframe_column_date_dtypes_wo_pyarrow(self): - begun_resource = self._make_resource() - query_resource = { - "jobComplete": True, - "jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID}, - "totalRows": "1", - "schema": {"fields": [{"name": "date", "type": "DATE"}]}, - } - row_data = [ - ["1999-12-01"], - ] - rows = [{"f": [{"v": field} for field in row]} for row in row_data] - query_resource["rows"] = rows - done_resource = copy.deepcopy(begun_resource) - done_resource["status"] = {"state": "DONE"} - connection = _make_connection( - begun_resource, query_resource, done_resource, query_resource - ) - client = _make_client(project=self.PROJECT, connection=connection) - job = self._make_one(self.JOB_ID, self.QUERY, client) - - with mock.patch("google.cloud.bigquery.table.pyarrow", None): - with warnings.catch_warnings(record=True) as warned: - df = job.to_dataframe( - date_as_object=False, create_bqstorage_client=False - ) - - self.assertIsInstance(df, pandas.DataFrame) - self.assertEqual(len(df), 1) # verify the number of rows - exp_columns = [field["name"] for field in query_resource["schema"]["fields"]] - self.assertEqual(list(df), exp_columns) # verify the column names - - self.assertEqual(df.date.dtype.name, "object") - - assert len(warned) == 1 - warning = warned[0] - assert "without pyarrow" in str(warning) - @unittest.skipIf(pandas is None, "Requires `pandas`") @unittest.skipIf(tqdm is None, "Requires `tqdm`") @mock.patch("tqdm.tqdm") diff --git a/packages/google-cloud-bigquery/tests/unit/test_table.py b/packages/google-cloud-bigquery/tests/unit/test_table.py index 12169658e7b3..fe17d285221b 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_table.py +++ b/packages/google-cloud-bigquery/tests/unit/test_table.py @@ -2148,6 +2148,49 @@ def test_to_dataframe_iterable(self): self.assertEqual(df_2["name"][0], "Sven") self.assertEqual(df_2["age"][0], 33) + @unittest.skipIf(pandas is None, "Requires `pandas`") + def test_to_dataframe_iterable_with_dtypes(self): + from google.cloud.bigquery.schema import SchemaField + import types + + schema = [ + SchemaField("name", "STRING", mode="REQUIRED"), + SchemaField("age", "INTEGER", mode="REQUIRED"), + ] + + path = "/foo" + api_request = mock.Mock( + side_effect=[ + { + "rows": [{"f": [{"v": "Bengt"}, {"v": "32"}]}], + "pageToken": "NEXTPAGE", + }, + {"rows": [{"f": [{"v": "Sven"}, {"v": "33"}]}]}, + ] + ) + + row_iterator = self._make_one( + _mock_client(), api_request, path, schema, page_size=1, max_results=5 + ) + dfs = row_iterator.to_dataframe_iterable(dtypes={"age": "int32"}) + + self.assertIsInstance(dfs, types.GeneratorType) + + df_1 = next(dfs) + self.assertIsInstance(df_1, pandas.DataFrame) + self.assertEqual(df_1.name.dtype.name, "object") + self.assertEqual(df_1.age.dtype.name, "int32") + self.assertEqual(len(df_1), 1) # verify the number of rows + self.assertEqual( + df_1["name"][0], "Bengt" + ) # verify the first value of 'name' column + self.assertEqual(df_1["age"][0], 32) # verify the first value of 'age' column + + df_2 = next(dfs) + self.assertEqual(len(df_2), 1) # verify the number of rows + self.assertEqual(df_2["name"][0], "Sven") + self.assertEqual(df_2["age"][0], 33) + @unittest.skipIf(pandas is None, "Requires `pandas`") @unittest.skipIf( bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" @@ -2327,38 +2370,6 @@ def test_to_dataframe_datetime_out_of_pyarrow_bounds(self): [dt.datetime(4567, 1, 1), dt.datetime(9999, 12, 31)], ) - @unittest.skipIf(pandas is None, "Requires `pandas`") - def test_to_dataframe_warning_wo_pyarrow(self): - from google.cloud.bigquery.client import PyarrowMissingWarning - from google.cloud.bigquery.schema import SchemaField - - schema = [ - SchemaField("name", "STRING", mode="REQUIRED"), - SchemaField("age", "INTEGER", mode="REQUIRED"), - ] - rows = [ - {"f": [{"v": "Phred Phlyntstone"}, {"v": "32"}]}, - {"f": [{"v": "Bharney Rhubble"}, {"v": "33"}]}, - ] - path = "/foo" - api_request = mock.Mock(return_value={"rows": rows}) - row_iterator = self._make_one(_mock_client(), api_request, path, schema) - - no_pyarrow_patch = mock.patch("google.cloud.bigquery.table.pyarrow", new=None) - catch_warnings = warnings.catch_warnings(record=True) - - with no_pyarrow_patch, catch_warnings as warned: - df = row_iterator.to_dataframe() - - self.assertIsInstance(df, pandas.DataFrame) - self.assertEqual(len(df), 2) - matches = [ - warning for warning in warned if warning.category is PyarrowMissingWarning - ] - self.assertTrue( - matches, msg="A missing pyarrow deprecation warning was not raised." - ) - @unittest.skipIf(pandas is None, "Requires `pandas`") @unittest.skipIf(tqdm is None, "Requires `tqdm`") @mock.patch("tqdm.tqdm_gui") @@ -2399,50 +2410,6 @@ def test_to_dataframe_progress_bar( progress_bar_mock().close.assert_called_once() self.assertEqual(len(df), 4) - @unittest.skipIf(pandas is None, "Requires `pandas`") - @unittest.skipIf(tqdm is None, "Requires `tqdm`") - @mock.patch("tqdm.tqdm_gui") - @mock.patch("tqdm.tqdm_notebook") - @mock.patch("tqdm.tqdm") - def test_to_dataframe_progress_bar_wo_pyarrow( - self, tqdm_mock, tqdm_notebook_mock, tqdm_gui_mock - ): - from google.cloud.bigquery.schema import SchemaField - - schema = [ - SchemaField("name", "STRING", mode="REQUIRED"), - SchemaField("age", "INTEGER", mode="REQUIRED"), - ] - rows = [ - {"f": [{"v": "Phred Phlyntstone"}, {"v": "32"}]}, - {"f": [{"v": "Bharney Rhubble"}, {"v": "33"}]}, - {"f": [{"v": "Wylma Phlyntstone"}, {"v": "29"}]}, - {"f": [{"v": "Bhettye Rhubble"}, {"v": "27"}]}, - ] - path = "/foo" - api_request = mock.Mock(return_value={"rows": rows}) - - progress_bars = ( - ("tqdm", tqdm_mock), - ("tqdm_notebook", tqdm_notebook_mock), - ("tqdm_gui", tqdm_gui_mock), - ) - - for progress_bar_type, progress_bar_mock in progress_bars: - row_iterator = self._make_one(_mock_client(), api_request, path, schema) - with mock.patch("google.cloud.bigquery.table.pyarrow", None): - with warnings.catch_warnings(record=True) as warned: - df = row_iterator.to_dataframe(progress_bar_type=progress_bar_type) - - progress_bar_mock.assert_called() - progress_bar_mock().update.assert_called() - progress_bar_mock().close.assert_called_once() - self.assertEqual(len(df), 4) - - self.assertEqual(len(warned), 1) - warning = warned[0] - self.assertTrue("without pyarrow" in str(warning)) - @unittest.skipIf(pandas is None, "Requires `pandas`") @mock.patch("google.cloud.bigquery.table.tqdm", new=None) def test_to_dataframe_no_tqdm_no_progress_bar(self): @@ -2557,57 +2524,6 @@ def test_to_dataframe_w_empty_results(self): self.assertEqual(len(df), 0) # verify the number of rows self.assertEqual(list(df), ["name", "age"]) # verify the column names - @unittest.skipIf(pandas is None, "Requires `pandas`") - def test_to_dataframe_w_empty_results_wo_pyarrow(self): - from google.cloud.bigquery.schema import SchemaField - - with mock.patch("google.cloud.bigquery.table.pyarrow", None): - schema = [ - SchemaField("name", "STRING", mode="REQUIRED"), - SchemaField("age", "INTEGER", mode="REQUIRED"), - ] - api_request = mock.Mock(return_value={"rows": []}) - row_iterator = self._make_one(_mock_client(), api_request, schema=schema) - - with warnings.catch_warnings(record=True) as warned: - df = row_iterator.to_dataframe() - - self.assertIsInstance(df, pandas.DataFrame) - self.assertEqual(len(df), 0) # verify the number of rows - self.assertEqual(list(df), ["name", "age"]) # verify the column names - - self.assertEqual(len(warned), 1) - warning = warned[0] - self.assertTrue("without pyarrow" in str(warning)) - - @unittest.skipIf(pandas is None, "Requires `pandas`") - def test_to_dataframe_w_no_results_wo_pyarrow(self): - from google.cloud.bigquery.schema import SchemaField - - with mock.patch("google.cloud.bigquery.table.pyarrow", None): - schema = [ - SchemaField("name", "STRING", mode="REQUIRED"), - SchemaField("age", "INTEGER", mode="REQUIRED"), - ] - api_request = mock.Mock(return_value={"rows": []}) - row_iterator = self._make_one(_mock_client(), api_request, schema=schema) - - def empty_iterable(dtypes=None): - return [] - - row_iterator.to_dataframe_iterable = empty_iterable - - with warnings.catch_warnings(record=True) as warned: - df = row_iterator.to_dataframe() - - self.assertIsInstance(df, pandas.DataFrame) - self.assertEqual(len(df), 0) # verify the number of rows - self.assertEqual(list(df), ["name", "age"]) # verify the column names - - self.assertEqual(len(warned), 1) - warning = warned[0] - self.assertTrue("without pyarrow" in str(warning)) - @unittest.skipIf(pandas is None, "Requires `pandas`") def test_to_dataframe_w_various_types_nullable(self): import datetime @@ -3424,68 +3340,6 @@ def test_to_dataframe_concat_categorical_dtype_w_pyarrow(self): # Don't close the client if it was passed in. bqstorage_client._transport.grpc_channel.close.assert_not_called() - @unittest.skipIf(pandas is None, "Requires `pandas`") - def test_to_dataframe_concat_categorical_dtype_wo_pyarrow(self): - from google.cloud.bigquery.schema import SchemaField - - schema = [ - SchemaField("col_str", "STRING"), - SchemaField("col_category", "STRING"), - ] - row_data = [ - [u"foo", u"low"], - [u"bar", u"medium"], - [u"baz", u"low"], - [u"foo_page2", u"medium"], - [u"bar_page2", u"high"], - [u"baz_page2", u"low"], - ] - path = "/foo" - - rows = [{"f": [{"v": field} for field in row]} for row in row_data[:3]] - rows_page2 = [{"f": [{"v": field} for field in row]} for row in row_data[3:]] - api_request = mock.Mock( - side_effect=[{"rows": rows, "pageToken": "NEXTPAGE"}, {"rows": rows_page2}] - ) - - row_iterator = self._make_one(_mock_client(), api_request, path, schema) - - mock_pyarrow = mock.patch("google.cloud.bigquery.table.pyarrow", None) - catch_warnings = warnings.catch_warnings(record=True) - - with mock_pyarrow, catch_warnings as warned: - got = row_iterator.to_dataframe( - dtypes={ - "col_category": pandas.core.dtypes.dtypes.CategoricalDtype( - categories=["low", "medium", "high"], ordered=False, - ), - }, - ) - - self.assertIsInstance(got, pandas.DataFrame) - self.assertEqual(len(got), 6) # verify the number of rows - expected_columns = [field.name for field in schema] - self.assertEqual(list(got), expected_columns) # verify the column names - - # Are column types correct? - expected_dtypes = [ - pandas.core.dtypes.dtypes.np.dtype("O"), # the default for string data - pandas.core.dtypes.dtypes.CategoricalDtype( - categories=["low", "medium", "high"], ordered=False, - ), - ] - self.assertEqual(list(got.dtypes), expected_dtypes) - - # And the data in the categorical column? - self.assertEqual( - list(got["col_category"]), - ["low", "medium", "low", "medium", "high", "low"], - ) - - self.assertEqual(len(warned), 1) - warning = warned[0] - self.assertTrue("without pyarrow" in str(warning)) - class TestPartitionRange(unittest.TestCase): def _get_target_class(self): From 566eb3eeba7a5b28546827dae1b2c0335b3f99e9 Mon Sep 17 00:00:00 2001 From: HemangChothani <50404902+HemangChothani@users.noreply.github.com> Date: Tue, 13 Oct 2020 02:53:19 -0400 Subject: [PATCH 0954/2016] feat: add method api_repr for table list item (#299) --- .../google/cloud/bigquery/table.py | 8 ++++++++ .../google-cloud-bigquery/tests/unit/test_table.py | 11 +++++++++++ 2 files changed, 19 insertions(+) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py index 01e8815da4c1..2214d0172061 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py @@ -1071,6 +1071,14 @@ def to_bqstorage(self): """ return self.reference.to_bqstorage() + def to_api_repr(self): + """Constructs the API resource of this table + + Returns: + Dict[str, object]: Table represented as an API resource + """ + return copy.deepcopy(self._properties) + def _row_from_mapping(mapping, schema): """Convert a mapping to a row tuple using the schema. diff --git a/packages/google-cloud-bigquery/tests/unit/test_table.py b/packages/google-cloud-bigquery/tests/unit/test_table.py index fe17d285221b..37660552182d 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_table.py +++ b/packages/google-cloud-bigquery/tests/unit/test_table.py @@ -1433,6 +1433,17 @@ def test_labels_update_in_place(self): labels["foo"] = "bar" # update in place self.assertEqual(table.labels, {"foo": "bar"}) + def test_to_api_repr(self): + resource = { + "tableReference": { + "projectId": "testproject", + "datasetId": "testdataset", + "tableId": "testtable", + } + } + table = self._make_one(resource) + self.assertEqual(table.to_api_repr(), resource) + class TestRow(unittest.TestCase): def test_row(self): From c777a1093cc99cd672d02c4e4b503f75365bc547 Mon Sep 17 00:00:00 2001 From: Ryan Yuan Date: Wed, 14 Oct 2020 08:02:02 +1100 Subject: [PATCH 0955/2016] docs: update clustering field docstrings (#286) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Thank you for opening a Pull Request! Before submitting your PR, there are a few things you can do to make sure it goes smoothly: - [x] Make sure to open an issue as a [bug/issue](https://github.com/googleapis/python-bigquery/issues/new/choose) before writing your code! That way we can discuss the change, evaluate designs, and agree on the general idea - [x] Ensure the tests and linter pass - [x] Code coverage does not decrease (if any source code was changed) - [x] Appropriate docs were updated (if necessary) Fixes #285 🦕 --- .../google-cloud-bigquery/google/cloud/bigquery/job.py | 8 ++++---- .../google-cloud-bigquery/google/cloud/bigquery/table.py | 8 ++++---- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/job.py b/packages/google-cloud-bigquery/google/cloud/bigquery/job.py index 20bce597a1b2..70db69e713bf 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/job.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/job.py @@ -1073,8 +1073,8 @@ def clustering_fields(self): .. note:: - As of 2018-06-29, clustering fields cannot be set on a table - which does not also have time partioning defined. + BigQuery supports clustering for both partitioned and + non-partitioned tables. """ prop = self._get_sub_prop("clustering") if prop is not None: @@ -2554,8 +2554,8 @@ def clustering_fields(self): .. note:: - As of 2018-06-29, clustering fields cannot be set on a table - which does not also have time partioning defined. + BigQuery supports clustering for both partitioned and + non-partitioned tables. """ prop = self._get_sub_prop("clustering") if prop is not None: diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py index 2214d0172061..5474f643e05b 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py @@ -625,8 +625,8 @@ def clustering_fields(self): .. note:: - As of 2018-06-29, clustering fields cannot be set on a table - which does not also have time partioning defined. + BigQuery supports clustering for both partitioned and + non-partitioned tables. """ prop = self._properties.get("clustering") if prop is not None: @@ -1030,8 +1030,8 @@ def clustering_fields(self): .. note:: - As of 2018-06-29, clustering fields cannot be set on a table - which does not also have time partioning defined. + BigQuery supports clustering for both partitioned and + non-partitioned tables. """ prop = self._properties.get("clustering") if prop is not None: From 198b56a72dc6b1e7028f9ba6547380ec0101bd19 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fran=C3=A7ois=20BAPTISTE?= Date: Tue, 13 Oct 2020 23:26:05 +0200 Subject: [PATCH 0956/2016] fix: make TimePartitioning repr evaluable (#110) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fixes #109 🦕 --- .../google/cloud/bigquery/table.py | 15 ++++++++++++++- .../tests/unit/test_table.py | 4 ++-- 2 files changed, 16 insertions(+), 3 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py index 5474f643e05b..d6d966eee9b0 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py @@ -2114,7 +2114,20 @@ def to_api_repr(self): return self._properties def _key(self): - return tuple(sorted(self._properties.items())) + # because we are only "renaming" top level keys shallow copy is sufficient here. + properties = self._properties.copy() + # calling repr for non built-in type objects. + properties["type_"] = repr(properties.pop("type")) + if "field" in properties: + # calling repr for non built-in type objects. + properties["field"] = repr(properties["field"]) + if "requirePartitionFilter" in properties: + properties["require_partition_filter"] = properties.pop( + "requirePartitionFilter" + ) + if "expirationMs" in properties: + properties["expiration_ms"] = properties.pop("expirationMs") + return tuple(sorted(properties.items())) def __eq__(self, other): if not isinstance(other, TimePartitioning): diff --git a/packages/google-cloud-bigquery/tests/unit/test_table.py b/packages/google-cloud-bigquery/tests/unit/test_table.py index 37660552182d..e21453b9f8f7 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_table.py +++ b/packages/google-cloud-bigquery/tests/unit/test_table.py @@ -3711,7 +3711,7 @@ def test___hash__not_equals(self): def test___repr___minimal(self): time_partitioning = self._make_one() - expected = "TimePartitioning(type=DAY)" + expected = "TimePartitioning(type_='DAY')" self.assertEqual(repr(time_partitioning), expected) def test___repr___explicit(self): @@ -3720,7 +3720,7 @@ def test___repr___explicit(self): time_partitioning = self._make_one( type_=TimePartitioningType.DAY, field="name", expiration_ms=10000 ) - expected = "TimePartitioning(" "expirationMs=10000," "field=name," "type=DAY)" + expected = "TimePartitioning(expiration_ms=10000,field='name',type_='DAY')" self.assertEqual(repr(time_partitioning), expected) def test_set_expiration_w_none(self): From d858b1013d118f77def294ccd9ac4e7efb3e6e85 Mon Sep 17 00:00:00 2001 From: WhiteSource Renovate Date: Tue, 13 Oct 2020 23:41:46 +0200 Subject: [PATCH 0957/2016] chore(deps): update dependency matplotlib to v3.3.2 (#260) Co-authored-by: Tim Swast --- .../google-cloud-bigquery/samples/snippets/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/samples/snippets/requirements.txt b/packages/google-cloud-bigquery/samples/snippets/requirements.txt index daabdf745306..7d001fa2f547 100644 --- a/packages/google-cloud-bigquery/samples/snippets/requirements.txt +++ b/packages/google-cloud-bigquery/samples/snippets/requirements.txt @@ -4,7 +4,7 @@ google-auth-oauthlib==0.4.1 grpcio==1.32.0 ipython==7.16.1; python_version < '3.7' ipython==7.17.0; python_version >= '3.7' -matplotlib==3.3.1 +matplotlib==3.3.2 pandas==1.1.3 pyarrow==1.0.1 pytz==2020.1 From f3d36d12e0c7da72b6f91b459a96a6ea0931146c Mon Sep 17 00:00:00 2001 From: HemangChothani <50404902+HemangChothani@users.noreply.github.com> Date: Wed, 14 Oct 2020 01:42:40 -0400 Subject: [PATCH 0958/2016] perf: add size parameter for load table from dataframe and json methods (#280) * feat: add size parameter for load from dataframe and json * pref: calculate length of encoded string --- .../google/cloud/bigquery/client.py | 7 +++++-- .../tests/unit/test_client.py | 15 +++++++++++++++ 2 files changed, 20 insertions(+), 2 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py index 2afffab80832..b7e082daa3f0 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py @@ -2237,11 +2237,13 @@ def load_table_from_dataframe( dataframe.to_parquet(tmppath, compression=parquet_compression) with open(tmppath, "rb") as parquet_file: + file_size = os.path.getsize(tmppath) return self.load_table_from_file( parquet_file, destination, num_retries=num_retries, rewind=True, + size=file_size, job_id=job_id, job_id_prefix=job_id_prefix, location=location, @@ -2343,11 +2345,12 @@ def load_table_from_json( destination = _table_arg_to_table_ref(destination, default_project=self.project) data_str = u"\n".join(json.dumps(item) for item in json_rows) - data_file = io.BytesIO(data_str.encode()) - + encoded_str = data_str.encode() + data_file = io.BytesIO(encoded_str) return self.load_table_from_file( data_file, destination, + size=len(encoded_str), num_retries=num_retries, job_id=job_id, job_id_prefix=job_id_prefix, diff --git a/packages/google-cloud-bigquery/tests/unit/test_client.py b/packages/google-cloud-bigquery/tests/unit/test_client.py index 737c1aef7530..52e00d7c7465 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_client.py +++ b/packages/google-cloud-bigquery/tests/unit/test_client.py @@ -7482,6 +7482,7 @@ def test_load_table_from_dataframe(self): self.TABLE_REF, num_retries=_DEFAULT_NUM_RETRIES, rewind=True, + size=mock.ANY, job_id=mock.ANY, job_id_prefix=None, location=None, @@ -7525,6 +7526,7 @@ def test_load_table_from_dataframe_w_client_location(self): self.TABLE_REF, num_retries=_DEFAULT_NUM_RETRIES, rewind=True, + size=mock.ANY, job_id=mock.ANY, job_id_prefix=None, location=self.LOCATION, @@ -7577,6 +7579,7 @@ def test_load_table_from_dataframe_w_custom_job_config_wihtout_source_format(sel self.TABLE_REF, num_retries=_DEFAULT_NUM_RETRIES, rewind=True, + size=mock.ANY, job_id=mock.ANY, job_id_prefix=None, location=self.LOCATION, @@ -7631,6 +7634,7 @@ def test_load_table_from_dataframe_w_custom_job_config_w_source_format(self): self.TABLE_REF, num_retries=_DEFAULT_NUM_RETRIES, rewind=True, + size=mock.ANY, job_id=mock.ANY, job_id_prefix=None, location=self.LOCATION, @@ -7723,6 +7727,7 @@ def test_load_table_from_dataframe_w_automatic_schema(self): self.TABLE_REF, num_retries=_DEFAULT_NUM_RETRIES, rewind=True, + size=mock.ANY, job_id=mock.ANY, job_id_prefix=None, location=self.LOCATION, @@ -7782,6 +7787,7 @@ def test_load_table_from_dataframe_w_index_and_auto_schema(self): self.TABLE_REF, num_retries=_DEFAULT_NUM_RETRIES, rewind=True, + size=mock.ANY, job_id=mock.ANY, job_id_prefix=None, location=self.LOCATION, @@ -7827,6 +7833,7 @@ def test_load_table_from_dataframe_unknown_table(self): self.TABLE_REF, num_retries=_DEFAULT_NUM_RETRIES, rewind=True, + size=mock.ANY, job_id=mock.ANY, job_id_prefix=None, location=None, @@ -7867,6 +7874,7 @@ def test_load_table_from_dataframe_w_nullable_int64_datatype(self): self.TABLE_REF, num_retries=_DEFAULT_NUM_RETRIES, rewind=True, + size=mock.ANY, job_id=mock.ANY, job_id_prefix=None, location=self.LOCATION, @@ -7913,6 +7921,7 @@ def test_load_table_from_dataframe_w_nullable_int64_datatype_automatic_schema(se self.TABLE_REF, num_retries=_DEFAULT_NUM_RETRIES, rewind=True, + size=mock.ANY, job_id=mock.ANY, job_id_prefix=None, location=self.LOCATION, @@ -7973,6 +7982,7 @@ def test_load_table_from_dataframe_struct_fields(self): self.TABLE_REF, num_retries=_DEFAULT_NUM_RETRIES, rewind=True, + size=mock.ANY, job_id=mock.ANY, job_id_prefix=None, location=self.LOCATION, @@ -8046,6 +8056,7 @@ def test_load_table_from_dataframe_w_partial_schema(self): self.TABLE_REF, num_retries=_DEFAULT_NUM_RETRIES, rewind=True, + size=mock.ANY, job_id=mock.ANY, job_id_prefix=None, location=self.LOCATION, @@ -8139,6 +8150,7 @@ def test_load_table_from_dataframe_w_partial_schema_missing_types(self): self.TABLE_REF, num_retries=_DEFAULT_NUM_RETRIES, rewind=True, + size=mock.ANY, job_id=mock.ANY, job_id_prefix=None, location=self.LOCATION, @@ -8251,6 +8263,7 @@ def test_load_table_from_dataframe_w_nulls(self): self.TABLE_REF, num_retries=_DEFAULT_NUM_RETRIES, rewind=True, + size=mock.ANY, job_id=mock.ANY, job_id_prefix=None, location=self.LOCATION, @@ -8302,6 +8315,7 @@ def test_load_table_from_json_basic_use(self): client, mock.ANY, self.TABLE_REF, + size=mock.ANY, num_retries=_DEFAULT_NUM_RETRIES, job_id=mock.ANY, job_id_prefix=None, @@ -8353,6 +8367,7 @@ def test_load_table_from_json_non_default_args(self): client, mock.ANY, self.TABLE_REF, + size=mock.ANY, num_retries=_DEFAULT_NUM_RETRIES, job_id=mock.ANY, job_id_prefix=None, From 55d8fb4a7d387bb81a2628c9342cab06c803f0bb Mon Sep 17 00:00:00 2001 From: HemangChothani <50404902+HemangChothani@users.noreply.github.com> Date: Wed, 14 Oct 2020 10:05:18 -0400 Subject: [PATCH 0959/2016] feat: add to_api_repr method to Model (#326) --- .../google/cloud/bigquery/model.py | 8 ++++ .../tests/unit/model/test_model.py | 44 +++++++++++++++++++ 2 files changed, 52 insertions(+) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/model.py b/packages/google-cloud-bigquery/google/cloud/bigquery/model.py index 092d98c2e9b2..1143b71f9825 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/model.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/model.py @@ -317,6 +317,14 @@ def _build_resource(self, filter_fields): def __repr__(self): return "Model(reference={})".format(repr(self.reference)) + def to_api_repr(self): + """Construct the API resource representation of this model. + + Returns: + Dict[str, object]: Model reference represented as an API resource + """ + return json_format.MessageToDict(self._proto) + class ModelReference(object): """ModelReferences are pointers to models. diff --git a/packages/google-cloud-bigquery/tests/unit/model/test_model.py b/packages/google-cloud-bigquery/tests/unit/model/test_model.py index 2c0079429864..9fa29a49649b 100644 --- a/packages/google-cloud-bigquery/tests/unit/model/test_model.py +++ b/packages/google-cloud-bigquery/tests/unit/model/test_model.py @@ -318,3 +318,47 @@ def test_repr(target_class): "Model(reference=ModelReference(" "project_id='my-proj', dataset_id='my_dset', model_id='my_model'))" ) + + +def test_to_api_repr(target_class): + from google.protobuf import json_format + + model = target_class("my-proj.my_dset.my_model") + resource = { + "etag": "abcdefg", + "modelReference": { + "projectId": "my-project", + "datasetId": "my_dataset", + "modelId": "my_model", + }, + "creationTime": "1274284800000", + "lastModifiedTime": "1317484800000", + "modelType": "LOGISTIC_REGRESSION", + "trainingRuns": [ + { + "trainingOptions": {"initialLearnRate": 1.0}, + "startTime": "2010-05-19T16:00:00Z", + }, + { + "trainingOptions": {"initialLearnRate": 0.5}, + "startTime": "2011-10-01T16:00:00Z", + }, + { + "trainingOptions": {"initialLearnRate": 0.25}, + "startTime": "2012-12-21T16:00:00Z", + }, + ], + "description": "A friendly description.", + "location": "US", + "friendlyName": "A friendly name.", + "labels": {"greeting": "こんにちは"}, + "expirationTime": "1356105600000", + "encryptionConfiguration": { + "kmsKeyName": "projects/1/locations/us/keyRings/1/cryptoKeys/1" + }, + } + model._proto = json_format.ParseDict( + resource, types.Model()._pb, ignore_unknown_fields=True + ) + got = model.to_api_repr() + assert got == resource From f79292ab77bf490199507c1942dcddac0d7cc4bb Mon Sep 17 00:00:00 2001 From: Carlos de la Guardia Date: Wed, 14 Oct 2020 13:42:39 -0500 Subject: [PATCH 0960/2016] feat: allow client options to be set in magics context (#322) * feat: allow client options to be set in magics context * add separate client options for storage client --- .../google/cloud/bigquery/magics/magics.py | 98 ++++++++++++++++++- .../tests/unit/test_magics.py | 98 ++++++++++++++++++- 2 files changed, 188 insertions(+), 8 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/magics/magics.py b/packages/google-cloud-bigquery/google/cloud/bigquery/magics/magics.py index 22175ee45e4c..5645a84a5224 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/magics/magics.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/magics/magics.py @@ -139,6 +139,7 @@ import re import ast +import copy import functools import sys import time @@ -155,6 +156,7 @@ import six from google.api_core import client_info +from google.api_core import client_options from google.api_core.exceptions import NotFound import google.auth from google.cloud import bigquery @@ -178,11 +180,13 @@ def __init__(self): self._project = None self._connection = None self._default_query_job_config = bigquery.QueryJobConfig() + self._bigquery_client_options = client_options.ClientOptions() + self._bqstorage_client_options = client_options.ClientOptions() @property def credentials(self): """google.auth.credentials.Credentials: Credentials to use for queries - performed through IPython magics + performed through IPython magics. Note: These credentials do not need to be explicitly defined if you are @@ -217,7 +221,7 @@ def credentials(self, value): @property def project(self): """str: Default project to use for queries performed through IPython - magics + magics. Note: The project does not need to be explicitly defined if you have an @@ -239,6 +243,54 @@ def project(self): def project(self, value): self._project = value + @property + def bigquery_client_options(self): + """google.api_core.client_options.ClientOptions: client options to be + used through IPython magics. + + Note:: + The client options do not need to be explicitly defined if no + special network connections are required. Normally you would be + using the https://bigquery.googleapis.com/ end point. + + Example: + Manually setting the endpoint: + + >>> from google.cloud.bigquery import magics + >>> client_options = {} + >>> client_options['api_endpoint'] = "https://some.special.url" + >>> magics.context.bigquery_client_options = client_options + """ + return self._bigquery_client_options + + @bigquery_client_options.setter + def bigquery_client_options(self, value): + self._bigquery_client_options = value + + @property + def bqstorage_client_options(self): + """google.api_core.client_options.ClientOptions: client options to be + used through IPython magics for the storage client. + + Note:: + The client options do not need to be explicitly defined if no + special network connections are required. Normally you would be + using the https://bigquerystorage.googleapis.com/ end point. + + Example: + Manually setting the endpoint: + + >>> from google.cloud.bigquery import magics + >>> client_options = {} + >>> client_options['api_endpoint'] = "https://some.special.url" + >>> magics.context.bqstorage_client_options = client_options + """ + return self._bqstorage_client_options + + @bqstorage_client_options.setter + def bqstorage_client_options(self, value): + self._bqstorage_client_options = value + @property def default_query_job_config(self): """google.cloud.bigquery.job.QueryJobConfig: Default job @@ -410,6 +462,24 @@ def _create_dataset_if_necessary(client, dataset_id): "Standard SQL if this argument is not used." ), ) +@magic_arguments.argument( + "--bigquery_api_endpoint", + type=str, + default=None, + help=( + "The desired API endpoint, e.g., bigquery.googlepis.com. Defaults to this " + "option's value in the context bigquery_client_options." + ), +) +@magic_arguments.argument( + "--bqstorage_api_endpoint", + type=str, + default=None, + help=( + "The desired API endpoint, e.g., bigquerystorage.googlepis.com. Defaults to " + "this option's value in the context bqstorage_client_options." + ), +) @magic_arguments.argument( "--use_bqstorage_api", action="store_true", @@ -511,15 +581,34 @@ def _cell_magic(line, query): params = _helpers.to_query_parameters(ast.literal_eval(params_option_value)) project = args.project or context.project + + bigquery_client_options = copy.deepcopy(context.bigquery_client_options) + if args.bigquery_api_endpoint: + if isinstance(bigquery_client_options, dict): + bigquery_client_options["api_endpoint"] = args.bigquery_api_endpoint + else: + bigquery_client_options.api_endpoint = args.bigquery_api_endpoint + client = bigquery.Client( project=project, credentials=context.credentials, default_query_job_config=context.default_query_job_config, client_info=client_info.ClientInfo(user_agent=IPYTHON_USER_AGENT), + client_options=bigquery_client_options, ) if context._connection: client._connection = context._connection - bqstorage_client = _make_bqstorage_client(use_bqstorage_api, context.credentials) + + bqstorage_client_options = copy.deepcopy(context.bqstorage_client_options) + if args.bqstorage_api_endpoint: + if isinstance(bqstorage_client_options, dict): + bqstorage_client_options["api_endpoint"] = args.bqstorage_api_endpoint + else: + bqstorage_client_options.api_endpoint = args.bqstorage_api_endpoint + + bqstorage_client = _make_bqstorage_client( + use_bqstorage_api, context.credentials, bqstorage_client_options, + ) close_transports = functools.partial(_close_transports, client, bqstorage_client) @@ -632,7 +721,7 @@ def _split_args_line(line): return params_option_value, rest_of_args -def _make_bqstorage_client(use_bqstorage_api, credentials): +def _make_bqstorage_client(use_bqstorage_api, credentials, client_options): if not use_bqstorage_api: return None @@ -658,6 +747,7 @@ def _make_bqstorage_client(use_bqstorage_api, credentials): return bigquery_storage.BigQueryReadClient( credentials=credentials, client_info=gapic_client_info.ClientInfo(user_agent=IPYTHON_USER_AGENT), + client_options=client_options, ) diff --git a/packages/google-cloud-bigquery/tests/unit/test_magics.py b/packages/google-cloud-bigquery/tests/unit/test_magics.py index 20be6b7552bf..30ca4d70c3e4 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_magics.py +++ b/packages/google-cloud-bigquery/tests/unit/test_magics.py @@ -309,7 +309,7 @@ def test__make_bqstorage_client_false(): credentials_mock = mock.create_autospec( google.auth.credentials.Credentials, instance=True ) - got = magics._make_bqstorage_client(False, credentials_mock) + got = magics._make_bqstorage_client(False, credentials_mock, {}) assert got is None @@ -320,7 +320,7 @@ def test__make_bqstorage_client_true(): credentials_mock = mock.create_autospec( google.auth.credentials.Credentials, instance=True ) - got = magics._make_bqstorage_client(True, credentials_mock) + got = magics._make_bqstorage_client(True, credentials_mock, {}) assert isinstance(got, bigquery_storage.BigQueryReadClient) @@ -330,7 +330,7 @@ def test__make_bqstorage_client_true_raises_import_error(missing_bq_storage): ) with pytest.raises(ImportError) as exc_context, missing_bq_storage: - magics._make_bqstorage_client(True, credentials_mock) + magics._make_bqstorage_client(True, credentials_mock, {}) error_msg = str(exc_context.value) assert "google-cloud-bigquery-storage" in error_msg @@ -347,7 +347,7 @@ def test__make_bqstorage_client_true_missing_gapic(missing_grpcio_lib): ) with pytest.raises(ImportError) as exc_context, missing_grpcio_lib: - magics._make_bqstorage_client(True, credentials_mock) + magics._make_bqstorage_client(True, credentials_mock, {}) assert "grpcio" in str(exc_context.value) @@ -1180,6 +1180,96 @@ def test_bigquery_magic_with_project(): assert magics.context.project == "general-project" +@pytest.mark.usefixtures("ipython_interactive") +def test_bigquery_magic_with_bigquery_api_endpoint(ipython_ns_cleanup): + ip = IPython.get_ipython() + ip.extension_manager.load_extension("google.cloud.bigquery") + magics.context._connection = None + + run_query_patch = mock.patch( + "google.cloud.bigquery.magics.magics._run_query", autospec=True + ) + with run_query_patch as run_query_mock: + ip.run_cell_magic( + "bigquery", + "--bigquery_api_endpoint=https://bigquery_api.endpoint.com", + "SELECT 17 as num", + ) + + connection_used = run_query_mock.call_args_list[0][0][0]._connection + assert connection_used.API_BASE_URL == "https://bigquery_api.endpoint.com" + # context client options should not change + assert magics.context.bigquery_client_options.api_endpoint is None + + +@pytest.mark.usefixtures("ipython_interactive") +def test_bigquery_magic_with_bigquery_api_endpoint_context_dict(): + ip = IPython.get_ipython() + ip.extension_manager.load_extension("google.cloud.bigquery") + magics.context._connection = None + magics.context.bigquery_client_options = {} + + run_query_patch = mock.patch( + "google.cloud.bigquery.magics.magics._run_query", autospec=True + ) + with run_query_patch as run_query_mock: + ip.run_cell_magic( + "bigquery", + "--bigquery_api_endpoint=https://bigquery_api.endpoint.com", + "SELECT 17 as num", + ) + + connection_used = run_query_mock.call_args_list[0][0][0]._connection + assert connection_used.API_BASE_URL == "https://bigquery_api.endpoint.com" + # context client options should not change + assert magics.context.bigquery_client_options == {} + + +@pytest.mark.usefixtures("ipython_interactive") +def test_bigquery_magic_with_bqstorage_api_endpoint(ipython_ns_cleanup): + ip = IPython.get_ipython() + ip.extension_manager.load_extension("google.cloud.bigquery") + magics.context._connection = None + + run_query_patch = mock.patch( + "google.cloud.bigquery.magics.magics._run_query", autospec=True + ) + with run_query_patch as run_query_mock: + ip.run_cell_magic( + "bigquery", + "--bqstorage_api_endpoint=https://bqstorage_api.endpoint.com", + "SELECT 17 as num", + ) + + client_used = run_query_mock.mock_calls[1][2]["bqstorage_client"] + assert client_used._transport._host == "https://bqstorage_api.endpoint.com" + # context client options should not change + assert magics.context.bqstorage_client_options.api_endpoint is None + + +@pytest.mark.usefixtures("ipython_interactive") +def test_bigquery_magic_with_bqstorage_api_endpoint_context_dict(): + ip = IPython.get_ipython() + ip.extension_manager.load_extension("google.cloud.bigquery") + magics.context._connection = None + magics.context.bqstorage_client_options = {} + + run_query_patch = mock.patch( + "google.cloud.bigquery.magics.magics._run_query", autospec=True + ) + with run_query_patch as run_query_mock: + ip.run_cell_magic( + "bigquery", + "--bqstorage_api_endpoint=https://bqstorage_api.endpoint.com", + "SELECT 17 as num", + ) + + client_used = run_query_mock.mock_calls[1][2]["bqstorage_client"] + assert client_used._transport._host == "https://bqstorage_api.endpoint.com" + # context client options should not change + assert magics.context.bqstorage_client_options == {} + + @pytest.mark.usefixtures("ipython_interactive") def test_bigquery_magic_with_multiple_options(): ip = IPython.get_ipython() From 6d33954b6dbf6bcd00abeb56d412d0c36ae6f89f Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Fri, 16 Oct 2020 04:55:34 -0500 Subject: [PATCH 0961/2016] refactor: update Job classes to use common _properties pattern (#323) Instead of mixing _properties and plain-old Python objects, always use _properties as the source of truth. This has the side-effect of properly reloading the whole job resource. Previously some properties were not reloaded. --- .../google/cloud/bigquery/job.py | 364 +++++++++--------- .../tests/unit/test_client.py | 69 +++- .../tests/unit/test_job.py | 62 +-- 3 files changed, 259 insertions(+), 236 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/job.py b/packages/google-cloud-bigquery/google/cloud/bigquery/job.py index 70db69e713bf..766db1d4214d 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/job.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/job.py @@ -35,6 +35,7 @@ from google.cloud.bigquery.external_config import ExternalConfig from google.cloud.bigquery.external_config import HivePartitioningOptions from google.cloud.bigquery import _helpers +from google.cloud.bigquery.model import ModelReference from google.cloud.bigquery.query import _query_param_from_api_repr from google.cloud.bigquery.query import ArrayQueryParameter from google.cloud.bigquery.query import ScalarQueryParameter @@ -47,8 +48,9 @@ from google.cloud.bigquery.table import _EmptyRowIterator from google.cloud.bigquery.table import RangePartitioning from google.cloud.bigquery.table import _table_arg_to_table_ref -from google.cloud.bigquery.table import TableReference from google.cloud.bigquery.table import Table +from google.cloud.bigquery.table import TableListItem +from google.cloud.bigquery.table import TableReference from google.cloud.bigquery.table import TimePartitioning _DONE_STATE = "DONE" @@ -461,11 +463,11 @@ def created(self): Optional[datetime.datetime]: the creation time (None until set from the server). """ - statistics = self._properties.get("statistics") - if statistics is not None: - millis = statistics.get("creationTime") - if millis is not None: - return _helpers._datetime_from_microseconds(millis * 1000.0) + millis = _helpers._get_sub_prop( + self._properties, ["statistics", "creationTime"] + ) + if millis is not None: + return _helpers._datetime_from_microseconds(millis * 1000.0) @property def started(self): @@ -475,11 +477,9 @@ def started(self): Optional[datetime.datetime]: the start time (None until set from the server). """ - statistics = self._properties.get("statistics") - if statistics is not None: - millis = statistics.get("startTime") - if millis is not None: - return _helpers._datetime_from_microseconds(millis * 1000.0) + millis = _helpers._get_sub_prop(self._properties, ["statistics", "startTime"]) + if millis is not None: + return _helpers._datetime_from_microseconds(millis * 1000.0) @property def ended(self): @@ -489,11 +489,9 @@ def ended(self): Optional[datetime.datetime]: the end time (None until set from the server). """ - statistics = self._properties.get("statistics") - if statistics is not None: - millis = statistics.get("endTime") - if millis is not None: - return _helpers._datetime_from_microseconds(millis * 1000.0) + millis = _helpers._get_sub_prop(self._properties, ["statistics", "endTime"]) + if millis is not None: + return _helpers._datetime_from_microseconds(millis * 1000.0) def _job_statistics(self): """Helper for job-type specific statistics-based properties.""" @@ -535,14 +533,6 @@ def state(self): if status is not None: return status.get("state") - def _scrub_local_properties(self, cleaned): - """Helper: handle subclass properties in cleaned.""" - pass - - def _copy_configuration_properties(self, configuration): - """Helper: assign subclass configuration properties in cleaned.""" - raise NotImplementedError("Abstract") - def _set_properties(self, api_response): """Update properties from resource in body of ``api_response`` @@ -550,7 +540,6 @@ def _set_properties(self, api_response): api_response (Dict): response returned from an API call. """ cleaned = api_response.copy() - self._scrub_local_properties(cleaned) statistics = cleaned.get("statistics", {}) if "creationTime" in statistics: @@ -560,25 +549,24 @@ def _set_properties(self, api_response): if "endTime" in statistics: statistics["endTime"] = float(statistics["endTime"]) + # Save configuration to keep reference same in self._configuration. + cleaned_config = cleaned.pop("configuration", {}) + configuration = self._properties.pop("configuration", {}) self._properties.clear() self._properties.update(cleaned) - self._copy_configuration_properties(cleaned.get("configuration", {})) + self._properties["configuration"] = configuration + self._properties["configuration"].update(cleaned_config) # For Future interface self._set_future_result() @classmethod - def _get_resource_config(cls, resource): + def _check_resource_config(cls, resource): """Helper for :meth:`from_api_repr` Args: resource (Dict): resource for the job. - Returns: - (str, Dict): - tuple (string, dict), where the first element is the - job ID and the second contains job-specific configuration. - Raises: KeyError: If the resource has no identifier, or @@ -589,7 +577,6 @@ def _get_resource_config(cls, resource): "Resource lacks required identity information: " '["jobReference"]["jobId"]' ) - job_id = resource["jobReference"]["jobId"] if ( "configuration" not in resource or cls._JOB_TYPE not in resource["configuration"] @@ -598,7 +585,6 @@ def _get_resource_config(cls, resource): "Resource lacks required configuration: " '["configuration"]["%s"]' % cls._JOB_TYPE ) - return job_id, resource["configuration"] def to_api_repr(self): """Generate a resource for the job.""" @@ -1002,15 +988,15 @@ def from_api_repr(cls, resource): Args: resource (Dict): - An extract job configuration in the same representation as is - returned from the API. + A job configuration in the same representation as is returned + from the API. Returns: google.cloud.bigquery.job._JobConfig: Configuration parsed from ``resource``. """ - config = cls() - config._properties = copy.deepcopy(resource) - return config + job_config = cls() + job_config._properties = resource + return job_config class LoadJobConfig(_JobConfig): @@ -1450,12 +1436,23 @@ class LoadJob(_AsyncJob): def __init__(self, job_id, source_uris, destination, client, job_config=None): super(LoadJob, self).__init__(job_id, client) - if job_config is None: + if not job_config: job_config = LoadJobConfig() - self.source_uris = source_uris - self._destination = destination self._configuration = job_config + self._properties["configuration"] = job_config._properties + + if source_uris is not None: + _helpers._set_sub_prop( + self._properties, ["configuration", "load", "sourceUris"], source_uris + ) + + if destination is not None: + _helpers._set_sub_prop( + self._properties, + ["configuration", "load", "destinationTable"], + destination.to_api_repr(), + ) @property def destination(self): @@ -1464,7 +1461,20 @@ def destination(self): See: https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationLoad.FIELDS.destination_table """ - return self._destination + dest_config = _helpers._get_sub_prop( + self._properties, ["configuration", "load", "destinationTable"] + ) + return TableReference.from_api_repr(dest_config) + + @property + def source_uris(self): + """Optional[Sequence[str]]: URIs of data files to be loaded. See + https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationLoad.FIELDS.source_uris + for supported URI formats. None for jobs that load from a file. + """ + return _helpers._get_sub_prop( + self._properties, ["configuration", "load", "sourceUris"] + ) @property def allow_jagged_rows(self): @@ -1687,24 +1697,12 @@ def output_rows(self): def to_api_repr(self): """Generate a resource for :meth:`_begin`.""" - configuration = self._configuration.to_api_repr() - if self.source_uris is not None: - _helpers._set_sub_prop( - configuration, ["load", "sourceUris"], self.source_uris - ) - _helpers._set_sub_prop( - configuration, ["load", "destinationTable"], self.destination.to_api_repr() - ) - + # Exclude statistics, if set. return { "jobReference": self._properties["jobReference"], - "configuration": configuration, + "configuration": self._properties["configuration"], } - def _copy_configuration_properties(self, configuration): - """Helper: assign subclass configuration properties in cleaned.""" - self._configuration._properties = copy.deepcopy(configuration) - @classmethod def from_api_repr(cls, resource, client): """Factory: construct a job given its API representation @@ -1724,16 +1722,9 @@ def from_api_repr(cls, resource, client): Returns: google.cloud.bigquery.job.LoadJob: Job parsed from ``resource``. """ - config_resource = resource.get("configuration", {}) - config = LoadJobConfig.from_api_repr(config_resource) - # A load job requires a destination table. - dest_config = config_resource["load"]["destinationTable"] - ds_ref = DatasetReference(dest_config["projectId"], dest_config["datasetId"]) - destination = TableReference(ds_ref, dest_config["tableId"]) - # sourceUris will be absent if this is a file upload. - source_uris = _helpers._get_sub_prop(config_resource, ["load", "sourceUris"]) + cls._check_resource_config(resource) job_ref = _JobReference._from_api_repr(resource["jobReference"]) - job = cls(job_ref, source_uris, destination, client, config) + job = cls(job_ref, None, None, client) job._set_properties(resource) return job @@ -1824,12 +1815,59 @@ class CopyJob(_AsyncJob): def __init__(self, job_id, sources, destination, client, job_config=None): super(CopyJob, self).__init__(job_id, client) - if job_config is None: + if not job_config: job_config = CopyJobConfig() - self.destination = destination - self.sources = sources self._configuration = job_config + self._properties["configuration"] = job_config._properties + + if destination: + _helpers._set_sub_prop( + self._properties, + ["configuration", "copy", "destinationTable"], + destination.to_api_repr(), + ) + + if sources: + source_resources = [source.to_api_repr() for source in sources] + _helpers._set_sub_prop( + self._properties, + ["configuration", "copy", "sourceTables"], + source_resources, + ) + + @property + def destination(self): + """google.cloud.bigquery.table.TableReference: Table into which data + is to be loaded. + """ + return TableReference.from_api_repr( + _helpers._get_sub_prop( + self._properties, ["configuration", "copy", "destinationTable"], + ) + ) + + @property + def sources(self): + """List[google.cloud.bigquery.table.TableReference]): Table(s) from + which data is to be loaded. + """ + source_configs = _helpers._get_sub_prop( + self._properties, ["configuration", "copy", "sourceTables"] + ) + if source_configs is None: + single = _helpers._get_sub_prop( + self._properties, ["configuration", "copy", "sourceTable"] + ) + if single is None: + raise KeyError("Resource missing 'sourceTables' / 'sourceTable'") + source_configs = [single] + + sources = [] + for source_config in source_configs: + table_ref = TableReference.from_api_repr(source_config) + sources.append(table_ref) + return sources @property def create_disposition(self): @@ -1860,40 +1898,15 @@ def destination_encryption_configuration(self): def to_api_repr(self): """Generate a resource for :meth:`_begin`.""" - - source_refs = [ - { - "projectId": table.project, - "datasetId": table.dataset_id, - "tableId": table.table_id, - } - for table in self.sources - ] - - configuration = self._configuration.to_api_repr() - _helpers._set_sub_prop(configuration, ["copy", "sourceTables"], source_refs) - _helpers._set_sub_prop( - configuration, - ["copy", "destinationTable"], - { - "projectId": self.destination.project, - "datasetId": self.destination.dataset_id, - "tableId": self.destination.table_id, - }, - ) - + # Exclude statistics, if set. return { "jobReference": self._properties["jobReference"], - "configuration": configuration, + "configuration": self._properties["configuration"], } - def _copy_configuration_properties(self, configuration): - """Helper: assign subclass configuration properties in cleaned.""" - self._configuration._properties = copy.deepcopy(configuration) - @classmethod def from_api_repr(cls, resource, client): - """Factory: construct a job given its API representation + """Factory: construct a job given its API representation .. note: @@ -1902,7 +1915,6 @@ def from_api_repr(cls, resource, client): Args: resource (Dict): dataset job representation returned from the API - client (google.cloud.bigquery.client.Client): Client which holds credentials and project configuration for the dataset. @@ -1910,22 +1922,9 @@ def from_api_repr(cls, resource, client): Returns: google.cloud.bigquery.job.CopyJob: Job parsed from ``resource``. """ - job_id, config_resource = cls._get_resource_config(resource) - config = CopyJobConfig.from_api_repr(config_resource) - # Copy required fields to the job. - copy_resource = config_resource["copy"] - destination = TableReference.from_api_repr(copy_resource["destinationTable"]) - sources = [] - source_configs = copy_resource.get("sourceTables") - if source_configs is None: - single = copy_resource.get("sourceTable") - if single is None: - raise KeyError("Resource missing 'sourceTables' / 'sourceTable'") - source_configs = [single] - for source_config in source_configs: - table_ref = TableReference.from_api_repr(source_config) - sources.append(table_ref) - job = cls(job_id, sources, destination, client=client, job_config=config) + cls._check_resource_config(resource) + job_ref = _JobReference._from_api_repr(resource["jobReference"]) + job = cls(job_ref, None, None, client=client) job._set_properties(resource) return job @@ -2038,10 +2037,61 @@ def __init__(self, job_id, source, destination_uris, client, job_config=None): if job_config is None: job_config = ExtractJobConfig() - self.source = source - self.destination_uris = destination_uris + self._properties["configuration"] = job_config._properties self._configuration = job_config + if source: + source_ref = { + "projectId": source.project, + "datasetId": source.dataset_id, + } + + if isinstance(source, (Table, TableListItem, TableReference)): + source_ref["tableId"] = source.table_id + source_key = "sourceTable" + else: + source_ref["modelId"] = source.model_id + source_key = "sourceModel" + + _helpers._set_sub_prop( + self._properties, ["configuration", "extract", source_key], source_ref + ) + + if destination_uris: + _helpers._set_sub_prop( + self._properties, + ["configuration", "extract", "destinationUris"], + destination_uris, + ) + + @property + def source(self): + """Union[ \ + google.cloud.bigquery.table.TableReference, \ + google.cloud.bigquery.model.ModelReference \ + ]: Table or Model from which data is to be loaded or extracted. + """ + source_config = _helpers._get_sub_prop( + self._properties, ["configuration", "extract", "sourceTable"] + ) + if source_config: + return TableReference.from_api_repr(source_config) + else: + source_config = _helpers._get_sub_prop( + self._properties, ["configuration", "extract", "sourceModel"] + ) + return ModelReference.from_api_repr(source_config) + + @property + def destination_uris(self): + """List[str]: URIs describing where the extracted data will be + written in Cloud Storage, using the format + ``gs:///``. + """ + return _helpers._get_sub_prop( + self._properties, ["configuration", "extract", "destinationUris"] + ) + @property def compression(self): """See @@ -2092,34 +2142,12 @@ def destination_uri_file_counts(self): def to_api_repr(self): """Generate a resource for :meth:`_begin`.""" - - configuration = self._configuration.to_api_repr() - source_ref = { - "projectId": self.source.project, - "datasetId": self.source.dataset_id, - } - - source = "sourceTable" - if isinstance(self.source, TableReference): - source_ref["tableId"] = self.source.table_id - else: - source_ref["modelId"] = self.source.model_id - source = "sourceModel" - - _helpers._set_sub_prop(configuration, ["extract", source], source_ref) - _helpers._set_sub_prop( - configuration, ["extract", "destinationUris"], self.destination_uris - ) - + # Exclude statistics, if set. return { "jobReference": self._properties["jobReference"], - "configuration": configuration, + "configuration": self._properties["configuration"], } - def _copy_configuration_properties(self, configuration): - """Helper: assign subclass configuration properties in cleaned.""" - self._configuration._properties = copy.deepcopy(configuration) - @classmethod def from_api_repr(cls, resource, client): """Factory: construct a job given its API representation @@ -2139,30 +2167,9 @@ def from_api_repr(cls, resource, client): Returns: google.cloud.bigquery.job.ExtractJob: Job parsed from ``resource``. """ - job_id, config_resource = cls._get_resource_config(resource) - config = ExtractJobConfig.from_api_repr(config_resource) - source_config = _helpers._get_sub_prop( - config_resource, ["extract", "sourceTable"] - ) - if source_config: - dataset = DatasetReference( - source_config["projectId"], source_config["datasetId"] - ) - source = dataset.table(source_config["tableId"]) - else: - source_config = _helpers._get_sub_prop( - config_resource, ["extract", "sourceModel"] - ) - dataset = DatasetReference( - source_config["projectId"], source_config["datasetId"] - ) - source = dataset.model(source_config["modelId"]) - - destination_uris = _helpers._get_sub_prop( - config_resource, ["extract", "destinationUris"] - ) - - job = cls(job_id, source, destination_uris, client=client, job_config=config) + cls._check_resource_config(resource) + job_ref = _JobReference._from_api_repr(resource["jobReference"]) + job = cls(job_ref, None, None, client=client) job._set_properties(resource) return job @@ -2631,11 +2638,14 @@ def __init__(self, job_id, query, client, job_config=None): if job_config.use_legacy_sql is None: job_config.use_legacy_sql = False - _helpers._set_sub_prop( - self._properties, ["configuration", "query", "query"], query - ) - + self._properties["configuration"] = job_config._properties self._configuration = job_config + + if query: + _helpers._set_sub_prop( + self._properties, ["configuration", "query", "query"], query + ) + self._query_results = None self._done_timeout = None self._transport_timeout = None @@ -2799,19 +2809,13 @@ def schema_update_options(self): def to_api_repr(self): """Generate a resource for :meth:`_begin`.""" + # Use to_api_repr to allow for some configuration properties to be set + # automatically. configuration = self._configuration.to_api_repr() - - resource = { + return { "jobReference": self._properties["jobReference"], "configuration": configuration, } - configuration["query"]["query"] = self.query - - return resource - - def _copy_configuration_properties(self, configuration): - """Helper: assign subclass configuration properties in cleaned.""" - self._configuration._properties = copy.deepcopy(configuration) @classmethod def from_api_repr(cls, resource, client): @@ -2827,9 +2831,9 @@ def from_api_repr(cls, resource, client): Returns: google.cloud.bigquery.job.QueryJob: Job parsed from ``resource``. """ - job_id, config = cls._get_resource_config(resource) - query = _helpers._get_sub_prop(config, ["query", "query"]) - job = cls(job_id, query, client=client) + cls._check_resource_config(resource) + job_ref = _JobReference._from_api_repr(resource["jobReference"]) + job = cls(job_ref, None, client=client) job._set_properties(resource) return job diff --git a/packages/google-cloud-bigquery/tests/unit/test_client.py b/packages/google-cloud-bigquery/tests/unit/test_client.py index 52e00d7c7465..bc2658961aac 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_client.py +++ b/packages/google-cloud-bigquery/tests/unit/test_client.py @@ -4266,7 +4266,7 @@ def test_load_table_from_uri(self): self.assertIs(job._client, client) self.assertEqual(job.job_id, JOB) self.assertEqual(list(job.source_uris), [SOURCE_URI]) - self.assertIs(job.destination, destination) + self.assertEqual(job.destination, destination) conn = client._connection = make_connection(RESOURCE) @@ -4275,7 +4275,7 @@ def test_load_table_from_uri(self): self.assertIs(job._client, client) self.assertEqual(job.job_id, JOB) self.assertEqual(list(job.source_uris), [SOURCE_URI]) - self.assertIs(job.destination, destination) + self.assertEqual(job.destination, destination) def test_load_table_from_uri_w_explicit_project(self): job_id = "this-is-a-job-id" @@ -4576,16 +4576,67 @@ def test_copy_table(self): self.assertIs(job._client, client) self.assertEqual(job.job_id, JOB) self.assertEqual(list(job.sources), [source]) - self.assertIs(job.destination, destination) + self.assertEqual(job.destination, destination) - conn = client._connection = make_connection(RESOURCE) - source2 = dataset.table(SOURCE + "2") - job = client.copy_table([source, source2], destination, job_id=JOB) + def test_copy_table_w_multiple_sources(self): + from google.cloud.bigquery.job import CopyJob + from google.cloud.bigquery.table import TableReference + + job_id = "job_name" + source_id = "my-project.my_dataset.source_table" + source_id2 = "my-project.my_dataset.source_table2" + destination_id = "my-other-project.another_dataset.destination_table" + expected_resource = { + "jobReference": {"projectId": self.PROJECT, "jobId": job_id}, + "configuration": { + "copy": { + "sourceTables": [ + { + "projectId": "my-project", + "datasetId": "my_dataset", + "tableId": "source_table", + }, + { + "projectId": "my-project", + "datasetId": "my_dataset", + "tableId": "source_table2", + }, + ], + "destinationTable": { + "projectId": "my-other-project", + "datasetId": "another_dataset", + "tableId": "destination_table", + }, + } + }, + } + returned_resource = expected_resource.copy() + returned_resource["statistics"] = {} + creds = _make_credentials() + http = object() + client = self._make_one(project=self.PROJECT, credentials=creds, _http=http) + conn = client._connection = make_connection(returned_resource) + + job = client.copy_table([source_id, source_id2], destination_id, job_id=job_id) + + # Check that copy_table actually starts the job. + conn.api_request.assert_called_once_with( + method="POST", + path="/projects/%s/jobs" % self.PROJECT, + data=expected_resource, + timeout=None, + ) self.assertIsInstance(job, CopyJob) self.assertIs(job._client, client) - self.assertEqual(job.job_id, JOB) - self.assertEqual(list(job.sources), [source, source2]) - self.assertIs(job.destination, destination) + self.assertEqual(job.job_id, job_id) + self.assertEqual( + list(sorted(job.sources, key=lambda tbl: tbl.table_id)), + [ + TableReference.from_string(source_id), + TableReference.from_string(source_id2), + ], + ) + self.assertEqual(job.destination, TableReference.from_string(destination_id)) def test_copy_table_w_explicit_project(self): job_id = "this-is-a-job-id" diff --git a/packages/google-cloud-bigquery/tests/unit/test_job.py b/packages/google-cloud-bigquery/tests/unit/test_job.py index d21489616653..75212ae95537 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_job.py +++ b/packages/google-cloud-bigquery/tests/unit/test_job.py @@ -455,28 +455,9 @@ def test_state(self): status["state"] = state self.assertEqual(job.state, state) - def test__scrub_local_properties(self): - before = {"foo": "bar"} - resource = before.copy() - client = _make_client(project=self.PROJECT) - job = self._make_one(self.JOB_ID, client) - job._scrub_local_properties(resource) # no raise - self.assertEqual(resource, before) - - def test__copy_configuration_properties(self): - before = {"foo": "bar"} - resource = before.copy() - client = _make_client(project=self.PROJECT) - job = self._make_one(self.JOB_ID, client) - with self.assertRaises(NotImplementedError): - job._copy_configuration_properties(resource) - self.assertEqual(resource, before) - def _set_properties_job(self): client = _make_client(project=self.PROJECT) job = self._make_one(self.JOB_ID, client) - job._scrub_local_properties = mock.Mock() - job._copy_configuration_properties = mock.Mock() job._set_future_result = mock.Mock() job._properties = { "jobReference": job._properties["jobReference"], @@ -493,9 +474,6 @@ def test__set_properties_no_stats(self): self.assertEqual(job._properties, resource) - job._scrub_local_properties.assert_called_once_with(resource) - job._copy_configuration_properties.assert_called_once_with(config) - def test__set_properties_w_creation_time(self): now, millis = self._datetime_and_millis() config = {"test": True} @@ -509,9 +487,6 @@ def test__set_properties_w_creation_time(self): cleaned["statistics"]["creationTime"] = float(millis) self.assertEqual(job._properties, cleaned) - job._scrub_local_properties.assert_called_once_with(resource) - job._copy_configuration_properties.assert_called_once_with(config) - def test__set_properties_w_start_time(self): now, millis = self._datetime_and_millis() config = {"test": True} @@ -525,9 +500,6 @@ def test__set_properties_w_start_time(self): cleaned["statistics"]["startTime"] = float(millis) self.assertEqual(job._properties, cleaned) - job._scrub_local_properties.assert_called_once_with(resource) - job._copy_configuration_properties.assert_called_once_with(config) - def test__set_properties_w_end_time(self): now, millis = self._datetime_and_millis() config = {"test": True} @@ -541,38 +513,35 @@ def test__set_properties_w_end_time(self): cleaned["statistics"]["endTime"] = float(millis) self.assertEqual(job._properties, cleaned) - job._scrub_local_properties.assert_called_once_with(resource) - job._copy_configuration_properties.assert_called_once_with(config) - - def test__get_resource_config_missing_job_ref(self): + def test__check_resource_config_missing_job_ref(self): resource = {} klass = self._make_derived_class() with self.assertRaises(KeyError): - klass._get_resource_config(resource) + klass._check_resource_config(resource) - def test__get_resource_config_missing_job_id(self): + def test__check_resource_config_missing_job_id(self): resource = {"jobReference": {}} klass = self._make_derived_class() with self.assertRaises(KeyError): - klass._get_resource_config(resource) + klass._check_resource_config(resource) - def test__get_resource_config_missing_configuration(self): + def test__check_resource_config_missing_configuration(self): resource = {"jobReference": {"jobId": self.JOB_ID}} klass = self._make_derived_class() with self.assertRaises(KeyError): - klass._get_resource_config(resource) + klass._check_resource_config(resource) - def test__get_resource_config_missing_config_type(self): + def test__check_resource_config_missing_config_type(self): resource = {"jobReference": {"jobId": self.JOB_ID}, "configuration": {}} klass = self._make_derived_class() with self.assertRaises(KeyError): - klass._get_resource_config(resource) + klass._check_resource_config(resource) - def test__get_resource_config_ok(self): + def test__check_resource_config_ok(self): derived_config = {"foo": "bar"} resource = { "jobReference": {"jobId": self.JOB_ID}, @@ -580,10 +549,8 @@ def test__get_resource_config_ok(self): } klass = self._make_derived_class() - job_id, config = klass._get_resource_config(resource) - - self.assertEqual(job_id, self.JOB_ID) - self.assertEqual(config, {"derived": derived_config}) + # Should not throw. + klass._check_resource_config(resource) def test__build_resource(self): client = _make_client(project=self.PROJECT) @@ -2093,7 +2060,7 @@ def _verifyResourceProperties(self, job, resource): def test_ctor(self): client = _make_client(project=self.PROJECT) job = self._make_one(self.JOB_ID, [self.SOURCE1], self.TABLE_REF, client) - self.assertIs(job.destination, self.TABLE_REF) + self.assertEqual(job.destination, self.TABLE_REF) self.assertEqual(list(job.source_uris), [self.SOURCE1]) self.assertIs(job._client, client) self.assertEqual(job.job_type, self.JOB_TYPE) @@ -2907,7 +2874,7 @@ def test_ctor(self): source = self._table_ref(self.SOURCE_TABLE) destination = self._table_ref(self.DESTINATION_TABLE) job = self._make_one(self.JOB_ID, [source], destination, client) - self.assertIs(job.destination, destination) + self.assertEqual(job.destination, destination) self.assertEqual(job.sources, [source]) self.assertIs(job._client, client) self.assertEqual(job.job_type, self.JOB_TYPE) @@ -3041,8 +3008,9 @@ def test_from_api_repr_wo_sources(self): }, } klass = self._get_target_class() + job = klass.from_api_repr(RESOURCE, client=client) with self.assertRaises(KeyError): - klass.from_api_repr(RESOURCE, client=client) + _ = job.sources def test_from_api_repr_w_properties(self): from google.cloud.bigquery.job import CreateDisposition From 497ea31ef1015b749e6e0d3d576cf431ea0b64a6 Mon Sep 17 00:00:00 2001 From: Kumar Anirudha <5357586+anistark@users.noreply.github.com> Date: Fri, 16 Oct 2020 20:02:03 +0530 Subject: [PATCH 0962/2016] deps: add protobuf dependency (#306) Thank you for opening a Pull Request! Before submitting your PR, there are a few things you can do to make sure it goes smoothly: - [x] Make sure to open an issue as a [bug/issue](https://github.com/googleapis/python-bigquery/issues/new/choose) before writing your code! That way we can discuss the change, evaluate designs, and agree on the general idea - [x] Ensure the tests and linter pass - [x] Code coverage does not decrease (if any source code was changed) - [x] Appropriate docs were updated (if necessary) Fixes #305 --- packages/google-cloud-bigquery/setup.py | 1 + packages/google-cloud-bigquery/testing/constraints-3.6.txt | 1 + 2 files changed, 2 insertions(+) diff --git a/packages/google-cloud-bigquery/setup.py b/packages/google-cloud-bigquery/setup.py index abd5cef95310..c7410601e8d7 100644 --- a/packages/google-cloud-bigquery/setup.py +++ b/packages/google-cloud-bigquery/setup.py @@ -34,6 +34,7 @@ "google-cloud-core >= 1.4.1, < 2.0dev", "google-resumable-media >= 0.6.0, < 2.0dev", "six >=1.13.0,< 2.0.0dev", + "protobuf >= 3.12.0", ] extras = { "bqstorage": [ diff --git a/packages/google-cloud-bigquery/testing/constraints-3.6.txt b/packages/google-cloud-bigquery/testing/constraints-3.6.txt index 7988049419a8..cea0ed84e467 100644 --- a/packages/google-cloud-bigquery/testing/constraints-3.6.txt +++ b/packages/google-cloud-bigquery/testing/constraints-3.6.txt @@ -8,6 +8,7 @@ libcst==0.2.5 llvmlite==0.34.0 # pandas 0.23.0 is the first version to work with pyarrow to_pandas. pandas==0.23.0 +protobuf == 3.12.0 proto-plus==1.10.0 pyarrow==1.0.0 python-snappy==0.5.4 From 6321ee009f82dc7d21cd0c780e508a0a3b499905 Mon Sep 17 00:00:00 2001 From: Tres Seaver Date: Fri, 16 Oct 2020 19:04:02 -0400 Subject: [PATCH 0963/2016] tests: split out snippets builds (#219) @tmatsuo Emulating PR #207. I don't know if I'm missing anything: e.g., I don't quite understand what the `split_system_tests=True` does in the `synth.py` there. Toward #191 --- .../.kokoro/presubmit/presubmit.cfg | 4 ++++ .../.kokoro/presubmit/snippets-2.7.cfg | 7 +++++++ .../.kokoro/presubmit/snippets-3.8.cfg | 7 +++++++ packages/google-cloud-bigquery/noxfile.py | 10 +++++++--- 4 files changed, 25 insertions(+), 3 deletions(-) create mode 100644 packages/google-cloud-bigquery/.kokoro/presubmit/snippets-2.7.cfg create mode 100644 packages/google-cloud-bigquery/.kokoro/presubmit/snippets-3.8.cfg diff --git a/packages/google-cloud-bigquery/.kokoro/presubmit/presubmit.cfg b/packages/google-cloud-bigquery/.kokoro/presubmit/presubmit.cfg index b158096f0ae2..17d071cae8b0 100644 --- a/packages/google-cloud-bigquery/.kokoro/presubmit/presubmit.cfg +++ b/packages/google-cloud-bigquery/.kokoro/presubmit/presubmit.cfg @@ -5,3 +5,7 @@ env_vars: { key: "RUN_SYSTEM_TESTS" value: "false" } +env_vars: { + key: "RUN_SNIPPETS_TESTS" + value: "false" +} diff --git a/packages/google-cloud-bigquery/.kokoro/presubmit/snippets-2.7.cfg b/packages/google-cloud-bigquery/.kokoro/presubmit/snippets-2.7.cfg new file mode 100644 index 000000000000..3bd6134d2bb4 --- /dev/null +++ b/packages/google-cloud-bigquery/.kokoro/presubmit/snippets-2.7.cfg @@ -0,0 +1,7 @@ +# Format: //devtools/kokoro/config/proto/build.proto + +# Only run this nox session. +env_vars: { + key: "NOX_SESSION" + value: "snippets-2.7" +} diff --git a/packages/google-cloud-bigquery/.kokoro/presubmit/snippets-3.8.cfg b/packages/google-cloud-bigquery/.kokoro/presubmit/snippets-3.8.cfg new file mode 100644 index 000000000000..840d9e716650 --- /dev/null +++ b/packages/google-cloud-bigquery/.kokoro/presubmit/snippets-3.8.cfg @@ -0,0 +1,7 @@ +# Format: //devtools/kokoro/config/proto/build.proto + +# Only run this nox session. +env_vars: { + key: "NOX_SESSION" + value: "snippets-3.8" +} diff --git a/packages/google-cloud-bigquery/noxfile.py b/packages/google-cloud-bigquery/noxfile.py index db1dcffdef84..441782583f43 100644 --- a/packages/google-cloud-bigquery/noxfile.py +++ b/packages/google-cloud-bigquery/noxfile.py @@ -112,14 +112,18 @@ def system(session): def snippets(session): """Run the snippets test suite.""" - constraints_path = str( - CURRENT_DIRECTORY / "testing" / f"constraints-{session.python}.txt" - ) + # Check the value of `RUN_SNIPPETS_TESTS` env var. It defaults to true. + if os.environ.get("RUN_SNIPPETS_TESTS", "true") == "false": + session.skip("RUN_SNIPPETS_TESTS is set to false, skipping") # Sanity check: Only run snippets tests if the environment variable is set. if not os.environ.get("GOOGLE_APPLICATION_CREDENTIALS", ""): session.skip("Credentials must be set via environment variable.") + constraints_path = str( + CURRENT_DIRECTORY / "testing" / f"constraints-{session.python}.txt" + ) + # Install all test dependencies, then install local packages in place. session.install("mock", "pytest", "google-cloud-testutils", "-c", constraints_path) session.install("google-cloud-storage", "-c", constraints_path) From 55ff7ec8e5a791add1fd50f9c276c3034c38eaa5 Mon Sep 17 00:00:00 2001 From: HemangChothani <50404902+HemangChothani@users.noreply.github.com> Date: Mon, 19 Oct 2020 09:45:12 -0400 Subject: [PATCH 0964/2016] feat: add timeout paramter to load_table_from_file and it dependent methods (#327) --- .../google/cloud/bigquery/client.py | 71 +++++++++++++++---- .../tests/unit/test_client.py | 45 ++++++++---- 2 files changed, 92 insertions(+), 24 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py index b7e082daa3f0..cce393d6cc15 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py @@ -1591,7 +1591,7 @@ def job_from_resource(self, resource): return job.QueryJob.from_api_repr(resource, self) return job.UnknownJob.from_api_repr(resource, self) - def create_job(self, job_config, retry=DEFAULT_RETRY): + def create_job(self, job_config, retry=DEFAULT_RETRY, timeout=None): """Create a new job. Args: job_config (dict): configuration job representation returned from the API. @@ -1599,6 +1599,9 @@ def create_job(self, job_config, retry=DEFAULT_RETRY): Keyword Arguments: retry (Optional[google.api_core.retry.Retry]): How to retry the RPC. + timeout (Optional[float]): + The number of seconds to wait for the underlying HTTP transport + before using ``retry``. Returns: Union[ \ @@ -1617,7 +1620,11 @@ def create_job(self, job_config, retry=DEFAULT_RETRY): destination = _get_sub_prop(job_config, ["load", "destinationTable"]) source_uris = _get_sub_prop(job_config, ["load", "sourceUris"]) return self.load_table_from_uri( - source_uris, destination, job_config=load_job_config, retry=retry + source_uris, + destination, + job_config=load_job_config, + retry=retry, + timeout=timeout, ) elif "copy" in job_config: copy_job_config = google.cloud.bigquery.job.CopyJobConfig.from_api_repr( @@ -1633,7 +1640,11 @@ def create_job(self, job_config, retry=DEFAULT_RETRY): table_ref = TableReference.from_api_repr(source_config) sources.append(table_ref) return self.copy_table( - sources, destination, job_config=copy_job_config, retry=retry + sources, + destination, + job_config=copy_job_config, + retry=retry, + timeout=timeout, ) elif "extract" in job_config: extract_job_config = google.cloud.bigquery.job.ExtractJobConfig.from_api_repr( @@ -1650,6 +1661,7 @@ def create_job(self, job_config, retry=DEFAULT_RETRY): destination_uris, job_config=extract_job_config, retry=retry, + timeout=timeout, source_type=source_type, ) elif "query" in job_config: @@ -1659,7 +1671,9 @@ def create_job(self, job_config, retry=DEFAULT_RETRY): copy_config ) query = _get_sub_prop(copy_config, ["query", "query"]) - return self.query(query, job_config=query_job_config, retry=retry) + return self.query( + query, job_config=query_job_config, retry=retry, timeout=timeout + ) else: raise TypeError("Invalid job configuration received.") @@ -1981,6 +1995,7 @@ def load_table_from_file( location=None, project=None, job_config=None, + timeout=None, ): """Upload the contents of this table from a file-like object. @@ -2020,6 +2035,9 @@ def load_table_from_file( to the client's project. job_config (Optional[google.cloud.bigquery.job.LoadJobConfig]): Extra configuration options for the job. + timeout (Optional[float]): + The number of seconds to wait for the underlying HTTP transport + before using ``retry``. Returns: google.cloud.bigquery.job.LoadJob: A new load job. @@ -2058,11 +2076,11 @@ def load_table_from_file( try: if size is None or size >= _MAX_MULTIPART_SIZE: response = self._do_resumable_upload( - file_obj, job_resource, num_retries + file_obj, job_resource, num_retries, timeout ) else: response = self._do_multipart_upload( - file_obj, job_resource, size, num_retries + file_obj, job_resource, size, num_retries, timeout ) except resumable_media.InvalidResponse as exc: raise exceptions.from_http_response(exc.response) @@ -2080,6 +2098,7 @@ def load_table_from_dataframe( project=None, job_config=None, parquet_compression="snappy", + timeout=None, ): """Upload the contents of a table from a pandas DataFrame. @@ -2143,6 +2162,9 @@ def load_table_from_dataframe( passed as the ``compression`` argument to the underlying ``DataFrame.to_parquet()`` method. https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.to_parquet.html#pandas.DataFrame.to_parquet + timeout (Optional[float]): + The number of seconds to wait for the underlying HTTP transport + before using ``retry``. Returns: google.cloud.bigquery.job.LoadJob: A new load job. @@ -2249,6 +2271,7 @@ def load_table_from_dataframe( location=location, project=project, job_config=job_config, + timeout=timeout, ) finally: @@ -2264,6 +2287,7 @@ def load_table_from_json( location=None, project=None, job_config=None, + timeout=None, ): """Upload the contents of a table from a JSON string or dict. @@ -2313,6 +2337,9 @@ def load_table_from_json( Extra configuration options for the job. The ``source_format`` setting is always set to :attr:`~google.cloud.bigquery.job.SourceFormat.NEWLINE_DELIMITED_JSON`. + timeout (Optional[float]): + The number of seconds to wait for the underlying HTTP transport + before using ``retry``. Returns: google.cloud.bigquery.job.LoadJob: A new load job. @@ -2357,9 +2384,10 @@ def load_table_from_json( location=location, project=project, job_config=job_config, + timeout=timeout, ) - def _do_resumable_upload(self, stream, metadata, num_retries): + def _do_resumable_upload(self, stream, metadata, num_retries, timeout): """Perform a resumable upload. Args: @@ -2371,13 +2399,17 @@ def _do_resumable_upload(self, stream, metadata, num_retries): Number of upload retries. (Deprecated: This argument will be removed in a future release.) + timeout (float): + The number of seconds to wait for the underlying HTTP transport + before using ``retry``. + Returns: requests.Response: The "200 OK" response object returned after the final chunk is uploaded. """ upload, transport = self._initiate_resumable_upload( - stream, metadata, num_retries + stream, metadata, num_retries, timeout ) while not upload.finished: @@ -2385,7 +2417,7 @@ def _do_resumable_upload(self, stream, metadata, num_retries): return response - def _initiate_resumable_upload(self, stream, metadata, num_retries): + def _initiate_resumable_upload(self, stream, metadata, num_retries, timeout): """Initiate a resumable upload. Args: @@ -2397,6 +2429,10 @@ def _initiate_resumable_upload(self, stream, metadata, num_retries): Number of upload retries. (Deprecated: This argument will be removed in a future release.) + timeout (float): + The number of seconds to wait for the underlying HTTP transport + before using ``retry``. + Returns: Tuple: Pair of @@ -2419,12 +2455,17 @@ def _initiate_resumable_upload(self, stream, metadata, num_retries): ) upload.initiate( - transport, stream, metadata, _GENERIC_CONTENT_TYPE, stream_final=False + transport, + stream, + metadata, + _GENERIC_CONTENT_TYPE, + stream_final=False, + timeout=timeout, ) return upload, transport - def _do_multipart_upload(self, stream, metadata, size, num_retries): + def _do_multipart_upload(self, stream, metadata, size, num_retries, timeout): """Perform a multipart upload. Args: @@ -2441,6 +2482,10 @@ def _do_multipart_upload(self, stream, metadata, size, num_retries): Number of upload retries. (Deprecated: This argument will be removed in a future release.) + timeout (float): + The number of seconds to wait for the underlying HTTP transport + before using ``retry``. + Returns: requests.Response: The "200 OK" response object returned after the multipart @@ -2466,7 +2511,9 @@ def _do_multipart_upload(self, stream, metadata, size, num_retries): max_retries=num_retries ) - response = upload.transmit(self._http, data, metadata, _GENERIC_CONTENT_TYPE) + response = upload.transmit( + self._http, data, metadata, _GENERIC_CONTENT_TYPE, timeout=timeout + ) return response diff --git a/packages/google-cloud-bigquery/tests/unit/test_client.py b/packages/google-cloud-bigquery/tests/unit/test_client.py index bc2658961aac..2001ad42b59b 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_client.py +++ b/packages/google-cloud-bigquery/tests/unit/test_client.py @@ -4425,7 +4425,7 @@ def _initiate_resumable_upload_helper(self, num_retries=None): job = LoadJob(None, None, self.TABLE_REF, client, job_config=config) metadata = job.to_api_repr() upload, transport = client._initiate_resumable_upload( - stream, metadata, num_retries + stream, metadata, num_retries, None ) # Check the returned values. @@ -4492,7 +4492,9 @@ def _do_multipart_upload_success_helper(self, get_boundary, num_retries=None): job = LoadJob(None, None, self.TABLE_REF, client, job_config=config) metadata = job.to_api_repr() size = len(data) - response = client._do_multipart_upload(stream, metadata, size, num_retries) + response = client._do_multipart_upload( + stream, metadata, size, num_retries, None + ) # Check the mocks and the returned value. self.assertIs(response, fake_transport.request.return_value) @@ -7251,7 +7253,7 @@ def test_load_table_from_file_resumable(self): ) do_upload.assert_called_once_with( - file_obj, self.EXPECTED_CONFIGURATION, _DEFAULT_NUM_RETRIES + file_obj, self.EXPECTED_CONFIGURATION, _DEFAULT_NUM_RETRIES, None ) # the original config object should not have been modified @@ -7280,7 +7282,7 @@ def test_load_table_from_file_w_explicit_project(self): expected_resource["jobReference"]["location"] = self.LOCATION expected_resource["jobReference"]["projectId"] = "other-project" do_upload.assert_called_once_with( - file_obj, expected_resource, _DEFAULT_NUM_RETRIES + file_obj, expected_resource, _DEFAULT_NUM_RETRIES, None ) def test_load_table_from_file_w_client_location(self): @@ -7310,7 +7312,7 @@ def test_load_table_from_file_w_client_location(self): expected_resource["jobReference"]["location"] = self.LOCATION expected_resource["jobReference"]["projectId"] = "other-project" do_upload.assert_called_once_with( - file_obj, expected_resource, _DEFAULT_NUM_RETRIES + file_obj, expected_resource, _DEFAULT_NUM_RETRIES, None ) def test_load_table_from_file_resumable_metadata(self): @@ -7368,7 +7370,7 @@ def test_load_table_from_file_resumable_metadata(self): ) do_upload.assert_called_once_with( - file_obj, expected_config, _DEFAULT_NUM_RETRIES + file_obj, expected_config, _DEFAULT_NUM_RETRIES, None ) def test_load_table_from_file_multipart(self): @@ -7392,7 +7394,11 @@ def test_load_table_from_file_multipart(self): ) do_upload.assert_called_once_with( - file_obj, self.EXPECTED_CONFIGURATION, file_obj_size, _DEFAULT_NUM_RETRIES + file_obj, + self.EXPECTED_CONFIGURATION, + file_obj_size, + _DEFAULT_NUM_RETRIES, + None, ) def test_load_table_from_file_with_retries(self): @@ -7413,7 +7419,7 @@ def test_load_table_from_file_with_retries(self): ) do_upload.assert_called_once_with( - file_obj, self.EXPECTED_CONFIGURATION, num_retries + file_obj, self.EXPECTED_CONFIGURATION, num_retries, None ) def test_load_table_from_file_with_rewind(self): @@ -7446,7 +7452,7 @@ def test_load_table_from_file_with_readable_gzip(self): ) do_upload.assert_called_once_with( - gzip_file, self.EXPECTED_CONFIGURATION, _DEFAULT_NUM_RETRIES + gzip_file, self.EXPECTED_CONFIGURATION, _DEFAULT_NUM_RETRIES, None ) def test_load_table_from_file_with_writable_gzip(self): @@ -7539,6 +7545,7 @@ def test_load_table_from_dataframe(self): location=None, project=None, job_config=mock.ANY, + timeout=None, ) sent_file = load_table_from_file.mock_calls[0][1][1] @@ -7583,6 +7590,7 @@ def test_load_table_from_dataframe_w_client_location(self): location=self.LOCATION, project=None, job_config=mock.ANY, + timeout=None, ) sent_file = load_table_from_file.mock_calls[0][1][1] @@ -7636,6 +7644,7 @@ def test_load_table_from_dataframe_w_custom_job_config_wihtout_source_format(sel location=self.LOCATION, project=None, job_config=mock.ANY, + timeout=None, ) sent_config = load_table_from_file.mock_calls[0][2]["job_config"] @@ -7691,6 +7700,7 @@ def test_load_table_from_dataframe_w_custom_job_config_w_source_format(self): location=self.LOCATION, project=None, job_config=mock.ANY, + timeout=None, ) sent_config = load_table_from_file.mock_calls[0][2]["job_config"] @@ -7784,6 +7794,7 @@ def test_load_table_from_dataframe_w_automatic_schema(self): location=self.LOCATION, project=None, job_config=mock.ANY, + timeout=None, ) sent_config = load_table_from_file.mock_calls[0][2]["job_config"] @@ -7844,6 +7855,7 @@ def test_load_table_from_dataframe_w_index_and_auto_schema(self): location=self.LOCATION, project=None, job_config=mock.ANY, + timeout=None, ) sent_config = load_table_from_file.mock_calls[0][2]["job_config"] @@ -7890,6 +7902,7 @@ def test_load_table_from_dataframe_unknown_table(self): location=None, project=None, job_config=mock.ANY, + timeout=None, ) @unittest.skipIf( @@ -7931,6 +7944,7 @@ def test_load_table_from_dataframe_w_nullable_int64_datatype(self): location=self.LOCATION, project=None, job_config=mock.ANY, + timeout=None, ) sent_config = load_table_from_file.mock_calls[0][2]["job_config"] @@ -7978,6 +7992,7 @@ def test_load_table_from_dataframe_w_nullable_int64_datatype_automatic_schema(se location=self.LOCATION, project=None, job_config=mock.ANY, + timeout=None, ) sent_config = load_table_from_file.mock_calls[0][2]["job_config"] @@ -8039,6 +8054,7 @@ def test_load_table_from_dataframe_struct_fields(self): location=self.LOCATION, project=None, job_config=mock.ANY, + timeout=None, ) sent_config = load_table_from_file.mock_calls[0][2]["job_config"] @@ -8113,6 +8129,7 @@ def test_load_table_from_dataframe_w_partial_schema(self): location=self.LOCATION, project=None, job_config=mock.ANY, + timeout=None, ) sent_config = load_table_from_file.mock_calls[0][2]["job_config"] @@ -8207,6 +8224,7 @@ def test_load_table_from_dataframe_w_partial_schema_missing_types(self): location=self.LOCATION, project=None, job_config=mock.ANY, + timeout=None, ) assert warned # there should be at least one warning @@ -8320,6 +8338,7 @@ def test_load_table_from_dataframe_w_nulls(self): location=self.LOCATION, project=None, job_config=mock.ANY, + timeout=None, ) sent_config = load_table_from_file.mock_calls[0][2]["job_config"] @@ -8373,6 +8392,7 @@ def test_load_table_from_json_basic_use(self): location=client.location, project=client.project, job_config=mock.ANY, + timeout=None, ) sent_config = load_table_from_file.mock_calls[0][2]["job_config"] @@ -8425,6 +8445,7 @@ def test_load_table_from_json_non_default_args(self): location="EU", project="project-x", job_config=mock.ANY, + timeout=None, ) sent_config = load_table_from_file.mock_calls[0][2]["job_config"] @@ -8499,7 +8520,7 @@ def test__do_resumable_upload(self): client = self._make_client(transport) result = client._do_resumable_upload( - file_obj, self.EXPECTED_CONFIGURATION, None + file_obj, self.EXPECTED_CONFIGURATION, None, None ) content = result.content.decode("utf-8") @@ -8522,7 +8543,7 @@ def test__do_multipart_upload(self): file_obj_len = len(file_obj.getvalue()) client._do_multipart_upload( - file_obj, self.EXPECTED_CONFIGURATION, file_obj_len, None + file_obj, self.EXPECTED_CONFIGURATION, file_obj_len, None, None ) # Verify that configuration data was passed in with the initial @@ -8550,7 +8571,7 @@ def test__do_multipart_upload_wrong_size(self): file_obj_len = len(file_obj.getvalue()) with pytest.raises(ValueError): - client._do_multipart_upload(file_obj, {}, file_obj_len + 1, None) + client._do_multipart_upload(file_obj, {}, file_obj_len + 1, None, None) def test_schema_from_json_with_file_path(self): from google.cloud.bigquery.schema import SchemaField From 14ff6436f6e2023adb404e5573678b32f15ddf96 Mon Sep 17 00:00:00 2001 From: Yoshi Automation Bot Date: Mon, 19 Oct 2020 07:51:53 -0700 Subject: [PATCH 0965/2016] feat: add support for listing arima, automl, boosted tree, DNN, and matrix factorization models (#328) * changes without context autosynth cannot find the source of changes triggered by earlier changes in this repository, or by version upgrades to tools such as linters. * chore: update proto definitions for bigquery/v2 to support BQML statistics PiperOrigin-RevId: 337113354 Source-Author: Google APIs Source-Date: Wed Oct 14 10:04:20 2020 -0700 Source-Repo: googleapis/googleapis Source-Sha: 215c12ade72d9d9616457d9b8b2f8a37f38e79f3 Source-Link: https://github.com/googleapis/googleapis/commit/215c12ade72d9d9616457d9b8b2f8a37f38e79f3 * fix: manually revert `type` to `type_` breaking change This should allow us to merge the fixes for `list_models` and avoid a breaking change until `proto-plus` becomes acceptable for our use. * feat: add BIGNUMERIC to data type enums Co-authored-by: Tim Swast --- .../google/cloud/bigquery/enums.py | 2 + .../google/cloud/bigquery_v2/__init__.py | 2 + .../bigquery_v2/proto/encryption_config.proto | 3 +- .../cloud/bigquery_v2/proto/model.proto | 576 ++++++++++++++++- .../bigquery_v2/proto/model_reference.proto | 3 +- .../bigquery_v2/proto/standard_sql.proto | 6 +- .../bigquery_v2/proto/table_reference.proto | 39 ++ .../cloud/bigquery_v2/types/__init__.py | 2 + .../google/cloud/bigquery_v2/types/model.py | 603 +++++++++++++++++- .../cloud/bigquery_v2/types/standard_sql.py | 1 + .../bigquery_v2/types/table_reference.py | 51 ++ packages/google-cloud-bigquery/synth.metadata | 9 +- 12 files changed, 1282 insertions(+), 15 deletions(-) create mode 100644 packages/google-cloud-bigquery/google/cloud/bigquery_v2/proto/table_reference.proto create mode 100644 packages/google-cloud-bigquery/google/cloud/bigquery_v2/types/table_reference.py diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/enums.py b/packages/google-cloud-bigquery/google/cloud/bigquery/enums.py index 3247372e3660..eb33e42763b0 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/enums.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/enums.py @@ -33,6 +33,7 @@ "DATETIME", "GEOGRAPHY", "NUMERIC", + "BIGNUMERIC", ) ) @@ -81,6 +82,7 @@ class SqlTypeNames(str, enum.Enum): FLOAT = "FLOAT" FLOAT64 = "FLOAT" NUMERIC = "NUMERIC" + BIGNUMERIC = "BIGNUMERIC" BOOLEAN = "BOOLEAN" BOOL = "BOOLEAN" GEOGRAPHY = "GEOGRAPHY" # NOTE: not available in legacy types diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery_v2/__init__.py b/packages/google-cloud-bigquery/google/cloud/bigquery_v2/__init__.py index c1989c3b093f..ebcc26befdc1 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery_v2/__init__.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery_v2/__init__.py @@ -27,6 +27,7 @@ from .types.standard_sql import StandardSqlDataType from .types.standard_sql import StandardSqlField from .types.standard_sql import StandardSqlStructType +from .types.table_reference import TableReference __all__ = ( @@ -41,4 +42,5 @@ "StandardSqlDataType", "StandardSqlField", "StandardSqlStructType", + "TableReference", ) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery_v2/proto/encryption_config.proto b/packages/google-cloud-bigquery/google/cloud/bigquery_v2/proto/encryption_config.proto index 54445f0fa770..1c0512a17ae6 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery_v2/proto/encryption_config.proto +++ b/packages/google-cloud-bigquery/google/cloud/bigquery_v2/proto/encryption_config.proto @@ -1,4 +1,4 @@ -// Copyright 2019 Google LLC. +// Copyright 2020 Google LLC // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -11,7 +11,6 @@ // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. -// syntax = "proto3"; diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery_v2/proto/model.proto b/packages/google-cloud-bigquery/google/cloud/bigquery_v2/proto/model.proto index 13d980774413..2d400dddd44e 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery_v2/proto/model.proto +++ b/packages/google-cloud-bigquery/google/cloud/bigquery_v2/proto/model.proto @@ -1,4 +1,4 @@ -// Copyright 2019 Google LLC. +// Copyright 2020 Google LLC // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -11,7 +11,6 @@ // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. -// syntax = "proto3"; @@ -22,6 +21,7 @@ import "google/api/field_behavior.proto"; import "google/cloud/bigquery/v2/encryption_config.proto"; import "google/cloud/bigquery/v2/model_reference.proto"; import "google/cloud/bigquery/v2/standard_sql.proto"; +import "google/cloud/bigquery/v2/table_reference.proto"; import "google/protobuf/empty.proto"; import "google/protobuf/timestamp.proto"; import "google/protobuf/wrappers.proto"; @@ -62,6 +62,32 @@ service ModelService { } message Model { + message SeasonalPeriod { + enum SeasonalPeriodType { + SEASONAL_PERIOD_TYPE_UNSPECIFIED = 0; + + // No seasonality + NO_SEASONALITY = 1; + + // Daily period, 24 hours. + DAILY = 2; + + // Weekly period, 7 days. + WEEKLY = 3; + + // Monthly period, 30 days or irregular. + MONTHLY = 4; + + // Quarterly period, 90 days or irregular. + QUARTERLY = 5; + + // Yearly period, 365 days or irregular. + YEARLY = 6; + } + + + } + message KmeansEnums { // Indicates the method used to initialize the centroids for KMeans // clustering algorithm. @@ -74,6 +100,9 @@ message Model { // Initializes the centroids using data specified in // kmeans_initialization_column. CUSTOM = 2; + + // Initializes with kmeans++. + KMEANS_PLUS_PLUS = 3; } @@ -280,6 +309,73 @@ message Model { repeated Cluster clusters = 3; } + // Evaluation metrics used by weighted-ALS models specified by + // feedback_type=implicit. + message RankingMetrics { + // Calculates a precision per user for all the items by ranking them and + // then averages all the precisions across all the users. + google.protobuf.DoubleValue mean_average_precision = 1; + + // Similar to the mean squared error computed in regression and explicit + // recommendation models except instead of computing the rating directly, + // the output from evaluate is computed against a preference which is 1 or 0 + // depending on if the rating exists or not. + google.protobuf.DoubleValue mean_squared_error = 2; + + // A metric to determine the goodness of a ranking calculated from the + // predicted confidence by comparing it to an ideal rank measured by the + // original ratings. + google.protobuf.DoubleValue normalized_discounted_cumulative_gain = 3; + + // Determines the goodness of a ranking by computing the percentile rank + // from the predicted confidence and dividing it by the original rank. + google.protobuf.DoubleValue average_rank = 4; + } + + // Model evaluation metrics for ARIMA forecasting models. + message ArimaForecastingMetrics { + // Model evaluation metrics for a single ARIMA forecasting model. + message ArimaSingleModelForecastingMetrics { + // Non-seasonal order. + ArimaOrder non_seasonal_order = 1; + + // Arima fitting metrics. + ArimaFittingMetrics arima_fitting_metrics = 2; + + // Is arima model fitted with drift or not. It is always false when d + // is not 1. + bool has_drift = 3; + + // The id to indicate different time series. + string time_series_id = 4; + + // Seasonal periods. Repeated because multiple periods are supported + // for one time series. + repeated SeasonalPeriod.SeasonalPeriodType seasonal_periods = 5; + } + + // Non-seasonal order. + repeated ArimaOrder non_seasonal_order = 1; + + // Arima model fitting metrics. + repeated ArimaFittingMetrics arima_fitting_metrics = 2; + + // Seasonal periods. Repeated because multiple periods are supported for one + // time series. + repeated SeasonalPeriod.SeasonalPeriodType seasonal_periods = 3; + + // Whether Arima model fitted with drift or not. It is always false when d + // is not 1. + repeated bool has_drift = 4; + + // Id to differentiate different time series for the large-scale case. + repeated string time_series_id = 5; + + // Repeated as there can be many metric sets (one for each model) in + // auto-arima and the large-scale case. + repeated ArimaSingleModelForecastingMetrics arima_single_model_forecasting_metrics = 6; + } + // Evaluation metrics of a model. These are either computed on all training // data or just the eval data based on whether eval data was used during // training. These are not present for imported models. @@ -297,7 +393,71 @@ message Model { // Populated for clustering models. ClusteringMetrics clustering_metrics = 4; + + // Populated for implicit feedback type matrix factorization models. + RankingMetrics ranking_metrics = 5; + + // Populated for ARIMA models. + ArimaForecastingMetrics arima_forecasting_metrics = 6; + } + } + + // Data split result. This contains references to the training and evaluation + // data tables that were used to train the model. + message DataSplitResult { + // Table reference of the training data after split. + TableReference training_table = 1; + + // Table reference of the evaluation data after split. + TableReference evaluation_table = 2; + } + + // Arima order, can be used for both non-seasonal and seasonal parts. + message ArimaOrder { + // Order of the autoregressive part. + int64 p = 1; + + // Order of the differencing part. + int64 d = 2; + + // Order of the moving-average part. + int64 q = 3; + } + + // ARIMA model fitting metrics. + message ArimaFittingMetrics { + // Log-likelihood. + double log_likelihood = 1; + + // AIC. + double aic = 2; + + // Variance. + double variance = 3; + } + + // Global explanations containing the top most important features + // after training. + message GlobalExplanation { + // Explanation for a single feature. + message Explanation { + // Full name of the feature. For non-numerical features, will be + // formatted like .. Overall size of + // feature name will always be truncated to first 120 characters. + string feature_name = 1; + + // Attribution of feature. + google.protobuf.DoubleValue attribution = 2; } + + // A list of the top global explanations. Sorted by absolute value of + // attribution in descending order. + repeated Explanation explanations = 1; + + // Class label for this set of global explanations. Will be empty/null for + // binary logistic and linear regression models. Sorted alphabetically in + // descending order. + string class_label = 2; } // Information about a single training query run for the model. @@ -367,6 +527,12 @@ message Model { // training data. Only applicable for classification models. map label_class_weights = 17; + // User column specified for matrix factorization models. + string user_column = 18; + + // Item column specified for matrix factorization models. + string item_column = 19; + // Distance type for clustering models. DistanceType distance_type = 20; @@ -380,12 +546,83 @@ message Model { // Optimization strategy for training linear regression models. OptimizationStrategy optimization_strategy = 23; + // Hidden units for dnn models. + repeated int64 hidden_units = 24; + + // Batch size for dnn models. + int64 batch_size = 25; + + // Dropout probability for dnn models. + google.protobuf.DoubleValue dropout = 26; + + // Maximum depth of a tree for boosted tree models. + int64 max_tree_depth = 27; + + // Subsample fraction of the training data to grow tree to prevent + // overfitting for boosted tree models. + double subsample = 28; + + // Minimum split loss for boosted tree models. + google.protobuf.DoubleValue min_split_loss = 29; + + // Num factors specified for matrix factorization models. + int64 num_factors = 30; + + // Feedback type that specifies which algorithm to run for matrix + // factorization. + FeedbackType feedback_type = 31; + + // Hyperparameter for matrix factoration when implicit feedback type is + // specified. + google.protobuf.DoubleValue wals_alpha = 32; + // The method used to initialize the centroids for kmeans algorithm. KmeansEnums.KmeansInitializationMethod kmeans_initialization_method = 33; // The column used to provide the initial centroids for kmeans algorithm // when kmeans_initialization_method is CUSTOM. string kmeans_initialization_column = 34; + + // Column to be designated as time series timestamp for ARIMA model. + string time_series_timestamp_column = 35; + + // Column to be designated as time series data for ARIMA model. + string time_series_data_column = 36; + + // Whether to enable auto ARIMA or not. + bool auto_arima = 37; + + // A specification of the non-seasonal part of the ARIMA model: the three + // components (p, d, q) are the AR order, the degree of differencing, and + // the MA order. + ArimaOrder non_seasonal_order = 38; + + // The data frequency of a time series. + DataFrequency data_frequency = 39; + + // Include drift when fitting an ARIMA model. + bool include_drift = 41; + + // The geographical region based on which the holidays are considered in + // time series modeling. If a valid value is specified, then holiday + // effects modeling is enabled. + HolidayRegion holiday_region = 42; + + // The id column that will be used to indicate different time series to + // forecast in parallel. + string time_series_id_column = 43; + + // The number of periods ahead that need to be forecasted. + int64 horizon = 44; + + // Whether to preserve the input structs in output feature names. + // Suppose there is a struct A with field b. + // When false (default), the output feature name is A_b. + // When true, the output feature name is A.b. + bool preserve_input_structs = 45; + + // The max value of non-seasonal p and q. + int64 auto_arima_max_order = 46; } // Information about a single iteration of the training run. @@ -403,6 +640,53 @@ message Model { google.protobuf.Int64Value cluster_size = 3; } + // (Auto-)arima fitting result. Wrap everything in ArimaResult for easier + // refactoring if we want to use model-specific iteration results. + message ArimaResult { + // Arima coefficients. + message ArimaCoefficients { + // Auto-regressive coefficients, an array of double. + repeated double auto_regressive_coefficients = 1; + + // Moving-average coefficients, an array of double. + repeated double moving_average_coefficients = 2; + + // Intercept coefficient, just a double not an array. + double intercept_coefficient = 3; + } + + // Arima model information. + message ArimaModelInfo { + // Non-seasonal order. + ArimaOrder non_seasonal_order = 1; + + // Arima coefficients. + ArimaCoefficients arima_coefficients = 2; + + // Arima fitting metrics. + ArimaFittingMetrics arima_fitting_metrics = 3; + + // Whether Arima model fitted with drift or not. It is always false + // when d is not 1. + bool has_drift = 4; + + // The id to indicate different time series. + string time_series_id = 5; + + // Seasonal periods. Repeated because multiple periods are supported + // for one time series. + repeated SeasonalPeriod.SeasonalPeriodType seasonal_periods = 6; + } + + // This message is repeated because there are multiple arima models + // fitted in auto-arima. For non-auto-arima model, its size is one. + repeated ArimaModelInfo arima_model_info = 1; + + // Seasonal periods. Repeated because multiple periods are supported for + // one time series. + repeated SeasonalPeriod.SeasonalPeriodType seasonal_periods = 2; + } + // Index of the iteration, 0 based. google.protobuf.Int32Value index = 1; @@ -420,6 +704,8 @@ message Model { // Information about top clusters for clustering models. repeated ClusterInfo cluster_infos = 8; + + ArimaResult arima_result = 9; } // Options that were used for this training run, includes @@ -435,6 +721,15 @@ message Model { // The evaluation metrics over training/eval data that were computed at the // end of training. EvaluationMetrics evaluation_metrics = 7; + + // Data split result of the training run. Only set when the input data is + // actually split. + DataSplitResult data_split_result = 9; + + // Global explanations for important features of the model. For multi-class + // models, there is one entry for each label class. For other models, there + // is only one entry in the list. + repeated GlobalExplanation global_explanations = 10; } // Indicates the type of the Model. @@ -450,8 +745,32 @@ message Model { // K-means clustering model. KMEANS = 3; + // Matrix factorization model. + MATRIX_FACTORIZATION = 4; + + // [Beta] DNN classifier model. + DNN_CLASSIFIER = 5; + // [Beta] An imported TensorFlow model. TENSORFLOW = 6; + + // [Beta] DNN regressor model. + DNN_REGRESSOR = 7; + + // [Beta] Boosted tree regressor model. + BOOSTED_TREE_REGRESSOR = 9; + + // [Beta] Boosted tree classifier model. + BOOSTED_TREE_CLASSIFIER = 10; + + // [Beta] ARIMA model. + ARIMA = 11; + + // [Beta] AutoML Tables regression model. + AUTOML_REGRESSOR = 12; + + // [Beta] AutoML Tables classification model. + AUTOML_CLASSIFIER = 13; } // Loss metric to evaluate model training performance. @@ -497,6 +816,243 @@ message Model { AUTO_SPLIT = 5; } + // Type of supported data frequency for time series forecasting models. + enum DataFrequency { + DATA_FREQUENCY_UNSPECIFIED = 0; + + // Automatically inferred from timestamps. + AUTO_FREQUENCY = 1; + + // Yearly data. + YEARLY = 2; + + // Quarterly data. + QUARTERLY = 3; + + // Monthly data. + MONTHLY = 4; + + // Weekly data. + WEEKLY = 5; + + // Daily data. + DAILY = 6; + + // Hourly data. + HOURLY = 7; + } + + // Type of supported holiday regions for time series forecasting models. + enum HolidayRegion { + // Holiday region unspecified. + HOLIDAY_REGION_UNSPECIFIED = 0; + + // Global. + GLOBAL = 1; + + // North America. + NA = 2; + + // Japan and Asia Pacific: Korea, Greater China, India, Australia, and New + // Zealand. + JAPAC = 3; + + // Europe, the Middle East and Africa. + EMEA = 4; + + // Latin America and the Caribbean. + LAC = 5; + + // United Arab Emirates + AE = 6; + + // Argentina + AR = 7; + + // Austria + AT = 8; + + // Australia + AU = 9; + + // Belgium + BE = 10; + + // Brazil + BR = 11; + + // Canada + CA = 12; + + // Switzerland + CH = 13; + + // Chile + CL = 14; + + // China + CN = 15; + + // Colombia + CO = 16; + + // Czechoslovakia + CS = 17; + + // Czech Republic + CZ = 18; + + // Germany + DE = 19; + + // Denmark + DK = 20; + + // Algeria + DZ = 21; + + // Ecuador + EC = 22; + + // Estonia + EE = 23; + + // Egypt + EG = 24; + + // Spain + ES = 25; + + // Finland + FI = 26; + + // France + FR = 27; + + // Great Britain (United Kingdom) + GB = 28; + + // Greece + GR = 29; + + // Hong Kong + HK = 30; + + // Hungary + HU = 31; + + // Indonesia + ID = 32; + + // Ireland + IE = 33; + + // Israel + IL = 34; + + // India + IN = 35; + + // Iran + IR = 36; + + // Italy + IT = 37; + + // Japan + JP = 38; + + // Korea (South) + KR = 39; + + // Latvia + LV = 40; + + // Morocco + MA = 41; + + // Mexico + MX = 42; + + // Malaysia + MY = 43; + + // Nigeria + NG = 44; + + // Netherlands + NL = 45; + + // Norway + NO = 46; + + // New Zealand + NZ = 47; + + // Peru + PE = 48; + + // Philippines + PH = 49; + + // Pakistan + PK = 50; + + // Poland + PL = 51; + + // Portugal + PT = 52; + + // Romania + RO = 53; + + // Serbia + RS = 54; + + // Russian Federation + RU = 55; + + // Saudi Arabia + SA = 56; + + // Sweden + SE = 57; + + // Singapore + SG = 58; + + // Slovenia + SI = 59; + + // Slovakia + SK = 60; + + // Thailand + TH = 61; + + // Turkey + TR = 62; + + // Taiwan + TW = 63; + + // Ukraine + UA = 64; + + // United States + US = 65; + + // Venezuela + VE = 66; + + // Viet Nam + VN = 67; + + // South Africa + ZA = 68; + } + // Indicates the learning rate optimization strategy to use. enum LearnRateStrategy { LEARN_RATE_STRATEGY_UNSPECIFIED = 0; @@ -519,6 +1075,17 @@ message Model { NORMAL_EQUATION = 2; } + // Indicates the training algorithm to use for matrix factorization models. + enum FeedbackType { + FEEDBACK_TYPE_UNSPECIFIED = 0; + + // Use weighted-als for implicit feedback problems. + IMPLICIT = 1; + + // Use nonweighted-als for explicit feedback problems. + EXPLICIT = 2; + } + // Output only. A hash of this resource. string etag = 1 [(google.api.field_behavior) = OUTPUT_ONLY]; @@ -558,8 +1125,9 @@ message Model { // Custom encryption configuration (e.g., Cloud KMS keys). This shows the // encryption configuration of the model data while stored in BigQuery - // storage. - google.cloud.bigquery.v2.EncryptionConfiguration encryption_configuration = 17; + // storage. This field can be used with PatchModel to update encryption key + // for an already encrypted model. + EncryptionConfiguration encryption_configuration = 17; // Output only. Type of the model resource. ModelType model_type = 7 [(google.api.field_behavior) = OUTPUT_ONLY]; diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery_v2/proto/model_reference.proto b/packages/google-cloud-bigquery/google/cloud/bigquery_v2/proto/model_reference.proto index fadd175146b0..c3d1a49a8bdd 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery_v2/proto/model_reference.proto +++ b/packages/google-cloud-bigquery/google/cloud/bigquery_v2/proto/model_reference.proto @@ -1,4 +1,4 @@ -// Copyright 2019 Google LLC. +// Copyright 2020 Google LLC // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -11,7 +11,6 @@ // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. -// syntax = "proto3"; diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery_v2/proto/standard_sql.proto b/packages/google-cloud-bigquery/google/cloud/bigquery_v2/proto/standard_sql.proto index ff69dfc4eb30..1514eccbb81a 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery_v2/proto/standard_sql.proto +++ b/packages/google-cloud-bigquery/google/cloud/bigquery_v2/proto/standard_sql.proto @@ -1,4 +1,4 @@ -// Copyright 2019 Google LLC. +// Copyright 2020 Google LLC // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -11,7 +11,6 @@ // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. -// syntax = "proto3"; @@ -73,6 +72,9 @@ message StandardSqlDataType { // Encoded as a decimal string. NUMERIC = 23; + // Encoded as a decimal string. + BIGNUMERIC = 24; + // Encoded as a list with types matching Type.array_type. ARRAY = 16; diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery_v2/proto/table_reference.proto b/packages/google-cloud-bigquery/google/cloud/bigquery_v2/proto/table_reference.proto new file mode 100644 index 000000000000..ba02f80c4dec --- /dev/null +++ b/packages/google-cloud-bigquery/google/cloud/bigquery_v2/proto/table_reference.proto @@ -0,0 +1,39 @@ +// Copyright 2020 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +syntax = "proto3"; + +package google.cloud.bigquery.v2; + +import "google/api/field_behavior.proto"; +import "google/api/annotations.proto"; + +option go_package = "google.golang.org/genproto/googleapis/cloud/bigquery/v2;bigquery"; +option java_outer_classname = "TableReferenceProto"; +option java_package = "com.google.cloud.bigquery.v2"; + +message TableReference { + // Required. The ID of the project containing this table. + string project_id = 1 [(google.api.field_behavior) = REQUIRED]; + + // Required. The ID of the dataset containing this table. + string dataset_id = 2 [(google.api.field_behavior) = REQUIRED]; + + // Required. The ID of the table. The ID must contain only + // letters (a-z, A-Z), numbers (0-9), or underscores (_). The maximum + // length is 1,024 characters. Certain operations allow + // suffixing of the table ID with a partition decorator, such as + // `sample_table$20190123`. + string table_id = 3 [(google.api.field_behavior) = REQUIRED]; +} diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery_v2/types/__init__.py b/packages/google-cloud-bigquery/google/cloud/bigquery_v2/types/__init__.py index a8839c74e5f9..1e354641a12e 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery_v2/types/__init__.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery_v2/types/__init__.py @@ -22,6 +22,7 @@ StandardSqlField, StandardSqlStructType, ) +from .table_reference import TableReference from .model import ( Model, GetModelRequest, @@ -38,6 +39,7 @@ "StandardSqlDataType", "StandardSqlField", "StandardSqlStructType", + "TableReference", "Model", "GetModelRequest", "PatchModelRequest", diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery_v2/types/model.py b/packages/google-cloud-bigquery/google/cloud/bigquery_v2/types/model.py index a00720d48d68..3a7bbf43b50b 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery_v2/types/model.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery_v2/types/model.py @@ -21,6 +21,7 @@ from google.cloud.bigquery_v2.types import encryption_config from google.cloud.bigquery_v2.types import model_reference as gcb_model_reference from google.cloud.bigquery_v2.types import standard_sql +from google.cloud.bigquery_v2.types import table_reference from google.protobuf import timestamp_pb2 as timestamp # type: ignore from google.protobuf import wrappers_pb2 as wrappers # type: ignore @@ -84,7 +85,9 @@ class Model(proto.Message): Custom encryption configuration (e.g., Cloud KMS keys). This shows the encryption configuration of the model data while stored in - BigQuery storage. + BigQuery storage. This field can be used with + PatchModel to update encryption key for an + already encrypted model. model_type (~.gcb_model.Model.ModelType): Output only. Type of the model resource. training_runs (Sequence[~.gcb_model.Model.TrainingRun]): @@ -105,7 +108,15 @@ class ModelType(proto.Enum): LINEAR_REGRESSION = 1 LOGISTIC_REGRESSION = 2 KMEANS = 3 + MATRIX_FACTORIZATION = 4 + DNN_CLASSIFIER = 5 TENSORFLOW = 6 + DNN_REGRESSOR = 7 + BOOSTED_TREE_REGRESSOR = 9 + BOOSTED_TREE_CLASSIFIER = 10 + ARIMA = 11 + AUTOML_REGRESSOR = 12 + AUTOML_CLASSIFIER = 13 class LossType(proto.Enum): r"""Loss metric to evaluate model training performance.""" @@ -132,6 +143,93 @@ class DataSplitMethod(proto.Enum): NO_SPLIT = 4 AUTO_SPLIT = 5 + class DataFrequency(proto.Enum): + r"""Type of supported data frequency for time series forecasting + models. + """ + DATA_FREQUENCY_UNSPECIFIED = 0 + AUTO_FREQUENCY = 1 + YEARLY = 2 + QUARTERLY = 3 + MONTHLY = 4 + WEEKLY = 5 + DAILY = 6 + HOURLY = 7 + + class HolidayRegion(proto.Enum): + r"""Type of supported holiday regions for time series forecasting + models. + """ + HOLIDAY_REGION_UNSPECIFIED = 0 + GLOBAL = 1 + NA = 2 + JAPAC = 3 + EMEA = 4 + LAC = 5 + AE = 6 + AR = 7 + AT = 8 + AU = 9 + BE = 10 + BR = 11 + CA = 12 + CH = 13 + CL = 14 + CN = 15 + CO = 16 + CS = 17 + CZ = 18 + DE = 19 + DK = 20 + DZ = 21 + EC = 22 + EE = 23 + EG = 24 + ES = 25 + FI = 26 + FR = 27 + GB = 28 + GR = 29 + HK = 30 + HU = 31 + ID = 32 + IE = 33 + IL = 34 + IN = 35 + IR = 36 + IT = 37 + JP = 38 + KR = 39 + LV = 40 + MA = 41 + MX = 42 + MY = 43 + NG = 44 + NL = 45 + NO = 46 + NZ = 47 + PE = 48 + PH = 49 + PK = 50 + PL = 51 + PT = 52 + RO = 53 + RS = 54 + RU = 55 + SA = 56 + SE = 57 + SG = 58 + SI = 59 + SK = 60 + TH = 61 + TR = 62 + TW = 63 + UA = 64 + US = 65 + VE = 66 + VN = 67 + ZA = 68 + class LearnRateStrategy(proto.Enum): r"""Indicates the learning rate optimization strategy to use.""" LEARN_RATE_STRATEGY_UNSPECIFIED = 0 @@ -144,6 +242,27 @@ class OptimizationStrategy(proto.Enum): BATCH_GRADIENT_DESCENT = 1 NORMAL_EQUATION = 2 + class FeedbackType(proto.Enum): + r"""Indicates the training algorithm to use for matrix + factorization models. + """ + FEEDBACK_TYPE_UNSPECIFIED = 0 + IMPLICIT = 1 + EXPLICIT = 2 + + class SeasonalPeriod(proto.Message): + r"""""" + + class SeasonalPeriodType(proto.Enum): + r"""""" + SEASONAL_PERIOD_TYPE_UNSPECIFIED = 0 + NO_SEASONALITY = 1 + DAILY = 2 + WEEKLY = 3 + MONTHLY = 4 + QUARTERLY = 5 + YEARLY = 6 + class KmeansEnums(proto.Message): r"""""" @@ -154,6 +273,7 @@ class KmeansInitializationMethod(proto.Enum): KMEANS_INITIALIZATION_METHOD_UNSPECIFIED = 0 RANDOM = 1 CUSTOM = 2 + KMEANS_PLUS_PLUS = 3 class RegressionMetrics(proto.Message): r"""Evaluation metrics for regression and explicit feedback type @@ -529,6 +649,129 @@ class CategoryCount(proto.Message): proto.MESSAGE, number=3, message="Model.ClusteringMetrics.Cluster", ) + class RankingMetrics(proto.Message): + r"""Evaluation metrics used by weighted-ALS models specified by + feedback_type=implicit. + + Attributes: + mean_average_precision (~.wrappers.DoubleValue): + Calculates a precision per user for all the + items by ranking them and then averages all the + precisions across all the users. + mean_squared_error (~.wrappers.DoubleValue): + Similar to the mean squared error computed in + regression and explicit recommendation models + except instead of computing the rating directly, + the output from evaluate is computed against a + preference which is 1 or 0 depending on if the + rating exists or not. + normalized_discounted_cumulative_gain (~.wrappers.DoubleValue): + A metric to determine the goodness of a + ranking calculated from the predicted confidence + by comparing it to an ideal rank measured by the + original ratings. + average_rank (~.wrappers.DoubleValue): + Determines the goodness of a ranking by + computing the percentile rank from the predicted + confidence and dividing it by the original rank. + """ + + mean_average_precision = proto.Field( + proto.MESSAGE, number=1, message=wrappers.DoubleValue, + ) + + mean_squared_error = proto.Field( + proto.MESSAGE, number=2, message=wrappers.DoubleValue, + ) + + normalized_discounted_cumulative_gain = proto.Field( + proto.MESSAGE, number=3, message=wrappers.DoubleValue, + ) + + average_rank = proto.Field( + proto.MESSAGE, number=4, message=wrappers.DoubleValue, + ) + + class ArimaForecastingMetrics(proto.Message): + r"""Model evaluation metrics for ARIMA forecasting models. + + Attributes: + non_seasonal_order (Sequence[~.gcb_model.Model.ArimaOrder]): + Non-seasonal order. + arima_fitting_metrics (Sequence[~.gcb_model.Model.ArimaFittingMetrics]): + Arima model fitting metrics. + seasonal_periods (Sequence[~.gcb_model.Model.SeasonalPeriod.SeasonalPeriodType]): + Seasonal periods. Repeated because multiple + periods are supported for one time series. + has_drift (Sequence[bool]): + Whether Arima model fitted with drift or not. + It is always false when d is not 1. + time_series_id (Sequence[str]): + Id to differentiate different time series for + the large-scale case. + arima_single_model_forecasting_metrics (Sequence[~.gcb_model.Model.ArimaForecastingMetrics.ArimaSingleModelForecastingMetrics]): + Repeated as there can be many metric sets + (one for each model) in auto-arima and the + large-scale case. + """ + + class ArimaSingleModelForecastingMetrics(proto.Message): + r"""Model evaluation metrics for a single ARIMA forecasting + model. + + Attributes: + non_seasonal_order (~.gcb_model.Model.ArimaOrder): + Non-seasonal order. + arima_fitting_metrics (~.gcb_model.Model.ArimaFittingMetrics): + Arima fitting metrics. + has_drift (bool): + Is arima model fitted with drift or not. It + is always false when d is not 1. + time_series_id (str): + The id to indicate different time series. + seasonal_periods (Sequence[~.gcb_model.Model.SeasonalPeriod.SeasonalPeriodType]): + Seasonal periods. Repeated because multiple + periods are supported for one time series. + """ + + non_seasonal_order = proto.Field( + proto.MESSAGE, number=1, message="Model.ArimaOrder", + ) + + arima_fitting_metrics = proto.Field( + proto.MESSAGE, number=2, message="Model.ArimaFittingMetrics", + ) + + has_drift = proto.Field(proto.BOOL, number=3) + + time_series_id = proto.Field(proto.STRING, number=4) + + seasonal_periods = proto.RepeatedField( + proto.ENUM, number=5, enum="Model.SeasonalPeriod.SeasonalPeriodType", + ) + + non_seasonal_order = proto.RepeatedField( + proto.MESSAGE, number=1, message="Model.ArimaOrder", + ) + + arima_fitting_metrics = proto.RepeatedField( + proto.MESSAGE, number=2, message="Model.ArimaFittingMetrics", + ) + + seasonal_periods = proto.RepeatedField( + proto.ENUM, number=3, enum="Model.SeasonalPeriod.SeasonalPeriodType", + ) + + has_drift = proto.RepeatedField(proto.BOOL, number=4) + + time_series_id = proto.RepeatedField(proto.STRING, number=5) + + arima_single_model_forecasting_metrics = proto.RepeatedField( + proto.MESSAGE, + number=6, + message="Model.ArimaForecastingMetrics.ArimaSingleModelForecastingMetrics", + ) + class EvaluationMetrics(proto.Message): r"""Evaluation metrics of a model. These are either computed on all training data or just the eval data based on whether eval @@ -547,6 +790,11 @@ class EvaluationMetrics(proto.Message): classification/classifier models. clustering_metrics (~.gcb_model.Model.ClusteringMetrics): Populated for clustering models. + ranking_metrics (~.gcb_model.Model.RankingMetrics): + Populated for implicit feedback type matrix + factorization models. + arima_forecasting_metrics (~.gcb_model.Model.ArimaForecastingMetrics): + Populated for ARIMA models. """ regression_metrics = proto.Field( @@ -571,6 +819,116 @@ class EvaluationMetrics(proto.Message): proto.MESSAGE, number=4, oneof="metrics", message="Model.ClusteringMetrics", ) + ranking_metrics = proto.Field( + proto.MESSAGE, number=5, oneof="metrics", message="Model.RankingMetrics", + ) + + arima_forecasting_metrics = proto.Field( + proto.MESSAGE, + number=6, + oneof="metrics", + message="Model.ArimaForecastingMetrics", + ) + + class DataSplitResult(proto.Message): + r"""Data split result. This contains references to the training + and evaluation data tables that were used to train the model. + + Attributes: + training_table (~.table_reference.TableReference): + Table reference of the training data after + split. + evaluation_table (~.table_reference.TableReference): + Table reference of the evaluation data after + split. + """ + + training_table = proto.Field( + proto.MESSAGE, number=1, message=table_reference.TableReference, + ) + + evaluation_table = proto.Field( + proto.MESSAGE, number=2, message=table_reference.TableReference, + ) + + class ArimaOrder(proto.Message): + r"""Arima order, can be used for both non-seasonal and seasonal + parts. + + Attributes: + p (int): + Order of the autoregressive part. + d (int): + Order of the differencing part. + q (int): + Order of the moving-average part. + """ + + p = proto.Field(proto.INT64, number=1) + + d = proto.Field(proto.INT64, number=2) + + q = proto.Field(proto.INT64, number=3) + + class ArimaFittingMetrics(proto.Message): + r"""ARIMA model fitting metrics. + + Attributes: + log_likelihood (float): + Log-likelihood. + aic (float): + AIC. + variance (float): + Variance. + """ + + log_likelihood = proto.Field(proto.DOUBLE, number=1) + + aic = proto.Field(proto.DOUBLE, number=2) + + variance = proto.Field(proto.DOUBLE, number=3) + + class GlobalExplanation(proto.Message): + r"""Global explanations containing the top most important + features after training. + + Attributes: + explanations (Sequence[~.gcb_model.Model.GlobalExplanation.Explanation]): + A list of the top global explanations. Sorted + by absolute value of attribution in descending + order. + class_label (str): + Class label for this set of global + explanations. Will be empty/null for binary + logistic and linear regression models. Sorted + alphabetically in descending order. + """ + + class Explanation(proto.Message): + r"""Explanation for a single feature. + + Attributes: + feature_name (str): + Full name of the feature. For non-numerical features, will + be formatted like .. + Overall size of feature name will always be truncated to + first 120 characters. + attribution (~.wrappers.DoubleValue): + Attribution of feature. + """ + + feature_name = proto.Field(proto.STRING, number=1) + + attribution = proto.Field( + proto.MESSAGE, number=2, message=wrappers.DoubleValue, + ) + + explanations = proto.RepeatedField( + proto.MESSAGE, number=1, message="Model.GlobalExplanation.Explanation", + ) + + class_label = proto.Field(proto.STRING, number=2) + class TrainingRun(proto.Message): r"""Information about a single training query run for the model. @@ -587,6 +945,14 @@ class TrainingRun(proto.Message): evaluation_metrics (~.gcb_model.Model.EvaluationMetrics): The evaluation metrics over training/eval data that were computed at the end of training. + data_split_result (~.gcb_model.Model.DataSplitResult): + Data split result of the training run. Only + set when the input data is actually split. + global_explanations (Sequence[~.gcb_model.Model.GlobalExplanation]): + Global explanations for important features of + the model. For multi-class models, there is one + entry for each label class. For other models, + there is only one entry in the list. """ class TrainingOptions(proto.Message): @@ -651,6 +1017,12 @@ class TrainingOptions(proto.Message): Weights associated with each label class, for rebalancing the training data. Only applicable for classification models. + user_column (str): + User column specified for matrix + factorization models. + item_column (str): + Item column specified for matrix + factorization models. distance_type (~.gcb_model.Model.DistanceType): Distance type for clustering models. num_clusters (int): @@ -661,12 +1033,71 @@ class TrainingOptions(proto.Message): optimization_strategy (~.gcb_model.Model.OptimizationStrategy): Optimization strategy for training linear regression models. + hidden_units (Sequence[int]): + Hidden units for dnn models. + batch_size (int): + Batch size for dnn models. + dropout (~.wrappers.DoubleValue): + Dropout probability for dnn models. + max_tree_depth (int): + Maximum depth of a tree for boosted tree + models. + subsample (float): + Subsample fraction of the training data to + grow tree to prevent overfitting for boosted + tree models. + min_split_loss (~.wrappers.DoubleValue): + Minimum split loss for boosted tree models. + num_factors (int): + Num factors specified for matrix + factorization models. + feedback_type (~.gcb_model.Model.FeedbackType): + Feedback type that specifies which algorithm + to run for matrix factorization. + wals_alpha (~.wrappers.DoubleValue): + Hyperparameter for matrix factoration when + implicit feedback type is specified. kmeans_initialization_method (~.gcb_model.Model.KmeansEnums.KmeansInitializationMethod): The method used to initialize the centroids for kmeans algorithm. kmeans_initialization_column (str): The column used to provide the initial centroids for kmeans algorithm when kmeans_initialization_method is CUSTOM. + time_series_timestamp_column (str): + Column to be designated as time series + timestamp for ARIMA model. + time_series_data_column (str): + Column to be designated as time series data + for ARIMA model. + auto_arima (bool): + Whether to enable auto ARIMA or not. + non_seasonal_order (~.gcb_model.Model.ArimaOrder): + A specification of the non-seasonal part of + the ARIMA model: the three components (p, d, q) + are the AR order, the degree of differencing, + and the MA order. + data_frequency (~.gcb_model.Model.DataFrequency): + The data frequency of a time series. + include_drift (bool): + Include drift when fitting an ARIMA model. + holiday_region (~.gcb_model.Model.HolidayRegion): + The geographical region based on which the + holidays are considered in time series modeling. + If a valid value is specified, then holiday + effects modeling is enabled. + time_series_id_column (str): + The id column that will be used to indicate + different time series to forecast in parallel. + horizon (int): + The number of periods ahead that need to be + forecasted. + preserve_input_structs (bool): + Whether to preserve the input structs in output feature + names. Suppose there is a struct A with field b. When false + (default), the output feature name is A_b. When true, the + output feature name is A.b. + auto_arima_max_order (int): + The max value of non-seasonal p and q. """ max_iterations = proto.Field(proto.INT64, number=1) @@ -713,6 +1144,10 @@ class TrainingOptions(proto.Message): label_class_weights = proto.MapField(proto.STRING, proto.DOUBLE, number=17) + user_column = proto.Field(proto.STRING, number=18) + + item_column = proto.Field(proto.STRING, number=19) + distance_type = proto.Field( proto.ENUM, number=20, enum="Model.DistanceType", ) @@ -725,6 +1160,32 @@ class TrainingOptions(proto.Message): proto.ENUM, number=23, enum="Model.OptimizationStrategy", ) + hidden_units = proto.RepeatedField(proto.INT64, number=24) + + batch_size = proto.Field(proto.INT64, number=25) + + dropout = proto.Field( + proto.MESSAGE, number=26, message=wrappers.DoubleValue, + ) + + max_tree_depth = proto.Field(proto.INT64, number=27) + + subsample = proto.Field(proto.DOUBLE, number=28) + + min_split_loss = proto.Field( + proto.MESSAGE, number=29, message=wrappers.DoubleValue, + ) + + num_factors = proto.Field(proto.INT64, number=30) + + feedback_type = proto.Field( + proto.ENUM, number=31, enum="Model.FeedbackType", + ) + + wals_alpha = proto.Field( + proto.MESSAGE, number=32, message=wrappers.DoubleValue, + ) + kmeans_initialization_method = proto.Field( proto.ENUM, number=33, @@ -733,6 +1194,34 @@ class TrainingOptions(proto.Message): kmeans_initialization_column = proto.Field(proto.STRING, number=34) + time_series_timestamp_column = proto.Field(proto.STRING, number=35) + + time_series_data_column = proto.Field(proto.STRING, number=36) + + auto_arima = proto.Field(proto.BOOL, number=37) + + non_seasonal_order = proto.Field( + proto.MESSAGE, number=38, message="Model.ArimaOrder", + ) + + data_frequency = proto.Field( + proto.ENUM, number=39, enum="Model.DataFrequency", + ) + + include_drift = proto.Field(proto.BOOL, number=41) + + holiday_region = proto.Field( + proto.ENUM, number=42, enum="Model.HolidayRegion", + ) + + time_series_id_column = proto.Field(proto.STRING, number=43) + + horizon = proto.Field(proto.INT64, number=44) + + preserve_input_structs = proto.Field(proto.BOOL, number=45) + + auto_arima_max_order = proto.Field(proto.INT64, number=46) + class IterationResult(proto.Message): r"""Information about a single iteration of the training run. @@ -753,6 +1242,8 @@ class IterationResult(proto.Message): cluster_infos (Sequence[~.gcb_model.Model.TrainingRun.IterationResult.ClusterInfo]): Information about top clusters for clustering models. + arima_result (~.gcb_model.Model.TrainingRun.IterationResult.ArimaResult): + """ class ClusterInfo(proto.Message): @@ -779,6 +1270,102 @@ class ClusterInfo(proto.Message): proto.MESSAGE, number=3, message=wrappers.Int64Value, ) + class ArimaResult(proto.Message): + r"""(Auto-)arima fitting result. Wrap everything in ArimaResult + for easier refactoring if we want to use model-specific + iteration results. + + Attributes: + arima_model_info (Sequence[~.gcb_model.Model.TrainingRun.IterationResult.ArimaResult.ArimaModelInfo]): + This message is repeated because there are + multiple arima models fitted in auto-arima. For + non-auto-arima model, its size is one. + seasonal_periods (Sequence[~.gcb_model.Model.SeasonalPeriod.SeasonalPeriodType]): + Seasonal periods. Repeated because multiple + periods are supported for one time series. + """ + + class ArimaCoefficients(proto.Message): + r"""Arima coefficients. + + Attributes: + auto_regressive_coefficients (Sequence[float]): + Auto-regressive coefficients, an array of + double. + moving_average_coefficients (Sequence[float]): + Moving-average coefficients, an array of + double. + intercept_coefficient (float): + Intercept coefficient, just a double not an + array. + """ + + auto_regressive_coefficients = proto.RepeatedField( + proto.DOUBLE, number=1 + ) + + moving_average_coefficients = proto.RepeatedField( + proto.DOUBLE, number=2 + ) + + intercept_coefficient = proto.Field(proto.DOUBLE, number=3) + + class ArimaModelInfo(proto.Message): + r"""Arima model information. + + Attributes: + non_seasonal_order (~.gcb_model.Model.ArimaOrder): + Non-seasonal order. + arima_coefficients (~.gcb_model.Model.TrainingRun.IterationResult.ArimaResult.ArimaCoefficients): + Arima coefficients. + arima_fitting_metrics (~.gcb_model.Model.ArimaFittingMetrics): + Arima fitting metrics. + has_drift (bool): + Whether Arima model fitted with drift or not. + It is always false when d is not 1. + time_series_id (str): + The id to indicate different time series. + seasonal_periods (Sequence[~.gcb_model.Model.SeasonalPeriod.SeasonalPeriodType]): + Seasonal periods. Repeated because multiple + periods are supported for one time series. + """ + + non_seasonal_order = proto.Field( + proto.MESSAGE, number=1, message="Model.ArimaOrder", + ) + + arima_coefficients = proto.Field( + proto.MESSAGE, + number=2, + message="Model.TrainingRun.IterationResult.ArimaResult.ArimaCoefficients", + ) + + arima_fitting_metrics = proto.Field( + proto.MESSAGE, number=3, message="Model.ArimaFittingMetrics", + ) + + has_drift = proto.Field(proto.BOOL, number=4) + + time_series_id = proto.Field(proto.STRING, number=5) + + seasonal_periods = proto.RepeatedField( + proto.ENUM, + number=6, + enum="Model.SeasonalPeriod.SeasonalPeriodType", + ) + + arima_model_info = proto.RepeatedField( + proto.MESSAGE, + number=1, + message="Model.TrainingRun.IterationResult.ArimaResult.ArimaModelInfo", + ) + + seasonal_periods = proto.RepeatedField( + proto.ENUM, + number=2, + enum="Model.SeasonalPeriod.SeasonalPeriodType", + ) + index = proto.Field(proto.MESSAGE, number=1, message=wrappers.Int32Value,) duration_ms = proto.Field( @@ -801,6 +1388,12 @@ class ClusterInfo(proto.Message): message="Model.TrainingRun.IterationResult.ClusterInfo", ) + arima_result = proto.Field( + proto.MESSAGE, + number=9, + message="Model.TrainingRun.IterationResult.ArimaResult", + ) + training_options = proto.Field( proto.MESSAGE, number=1, message="Model.TrainingRun.TrainingOptions", ) @@ -815,6 +1408,14 @@ class ClusterInfo(proto.Message): proto.MESSAGE, number=7, message="Model.EvaluationMetrics", ) + data_split_result = proto.Field( + proto.MESSAGE, number=9, message="Model.DataSplitResult", + ) + + global_explanations = proto.RepeatedField( + proto.MESSAGE, number=10, message="Model.GlobalExplanation", + ) + etag = proto.Field(proto.STRING, number=1) model_reference = proto.Field( diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery_v2/types/standard_sql.py b/packages/google-cloud-bigquery/google/cloud/bigquery_v2/types/standard_sql.py index 72f12f2840f4..1a32a3c759dd 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery_v2/types/standard_sql.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery_v2/types/standard_sql.py @@ -58,6 +58,7 @@ class TypeKind(proto.Enum): DATETIME = 21 GEOGRAPHY = 22 NUMERIC = 23 + BIGNUMERIC = 24 ARRAY = 16 STRUCT = 17 diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery_v2/types/table_reference.py b/packages/google-cloud-bigquery/google/cloud/bigquery_v2/types/table_reference.py new file mode 100644 index 000000000000..d213e8bb6d82 --- /dev/null +++ b/packages/google-cloud-bigquery/google/cloud/bigquery_v2/types/table_reference.py @@ -0,0 +1,51 @@ +# -*- coding: utf-8 -*- + +# Copyright 2020 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import proto # type: ignore + + +__protobuf__ = proto.module( + package="google.cloud.bigquery.v2", manifest={"TableReference",}, +) + + +class TableReference(proto.Message): + r""" + + Attributes: + project_id (str): + Required. The ID of the project containing + this table. + dataset_id (str): + Required. The ID of the dataset containing + this table. + table_id (str): + Required. The ID of the table. The ID must contain only + letters (a-z, A-Z), numbers (0-9), or underscores (_). The + maximum length is 1,024 characters. Certain operations allow + suffixing of the table ID with a partition decorator, such + as ``sample_table$20190123``. + """ + + project_id = proto.Field(proto.STRING, number=1) + + dataset_id = proto.Field(proto.STRING, number=2) + + table_id = proto.Field(proto.STRING, number=3) + + +__all__ = tuple(sorted(__protobuf__.manifest)) diff --git a/packages/google-cloud-bigquery/synth.metadata b/packages/google-cloud-bigquery/synth.metadata index 00881063059a..db77e463dd2b 100644 --- a/packages/google-cloud-bigquery/synth.metadata +++ b/packages/google-cloud-bigquery/synth.metadata @@ -4,15 +4,15 @@ "git": { "name": ".", "remote": "https://github.com/googleapis/python-bigquery.git", - "sha": "31644d380b35a76a9147801a4b6b0271c246fd0c" + "sha": "5178b55682f5e264bfc082cde26acb1fdc953a18" } }, { "git": { "name": "googleapis", "remote": "https://github.com/googleapis/googleapis.git", - "sha": "c941026e5e3d600817a20e9ab4d4be03dff21a68", - "internalRef": "334645418" + "sha": "215c12ade72d9d9616457d9b8b2f8a37f38e79f3", + "internalRef": "337113354" } }, { @@ -101,18 +101,19 @@ "google/cloud/bigquery_v2/proto/model.proto", "google/cloud/bigquery_v2/proto/model_reference.proto", "google/cloud/bigquery_v2/proto/standard_sql.proto", + "google/cloud/bigquery_v2/proto/table_reference.proto", "google/cloud/bigquery_v2/py.typed", "google/cloud/bigquery_v2/types/__init__.py", "google/cloud/bigquery_v2/types/encryption_config.py", "google/cloud/bigquery_v2/types/model.py", "google/cloud/bigquery_v2/types/model_reference.py", "google/cloud/bigquery_v2/types/standard_sql.py", + "google/cloud/bigquery_v2/types/table_reference.py", "mypy.ini", "renovate.json", "samples/AUTHORING_GUIDE.md", "samples/CONTRIBUTING.md", "scripts/decrypt-secrets.sh", - "scripts/fixup_bigquery_v2_keywords.py", "scripts/readme-gen/readme_gen.py", "scripts/readme-gen/templates/README.tmpl.rst", "scripts/readme-gen/templates/auth.tmpl.rst", From d66b33f5abe117f90472e2bc2ac9f67920e2b0ab Mon Sep 17 00:00:00 2001 From: "release-please[bot]" <55107282+release-please[bot]@users.noreply.github.com> Date: Mon, 19 Oct 2020 15:04:03 +0000 Subject: [PATCH 0966/2016] chore: release 2.2.0 (#321) :robot: I have created a release \*beep\* \*boop\* --- ## [2.2.0](https://www.github.com/googleapis/python-bigquery/compare/v2.1.0...v2.2.0) (2020-10-19) ### Features * add method api_repr for table list item ([#299](https://www.github.com/googleapis/python-bigquery/issues/299)) ([07c70f0](https://www.github.com/googleapis/python-bigquery/commit/07c70f0292f9212f0c968cd5c9206e8b0409c0da)) * add support for listing arima, automl, boosted tree, DNN, and matrix factorization models ([#328](https://www.github.com/googleapis/python-bigquery/issues/328)) ([502a092](https://www.github.com/googleapis/python-bigquery/commit/502a0926018abf058cb84bd18043c25eba15a2cc)) * add timeout paramter to load_table_from_file and it dependent methods ([#327](https://www.github.com/googleapis/python-bigquery/issues/327)) ([b0dd892](https://www.github.com/googleapis/python-bigquery/commit/b0dd892176e31ac25fddd15554b5bfa054299d4d)) * add to_api_repr method to Model ([#326](https://www.github.com/googleapis/python-bigquery/issues/326)) ([fb401bd](https://www.github.com/googleapis/python-bigquery/commit/fb401bd94477323bba68cf252dd88166495daf54)) * allow client options to be set in magics context ([#322](https://www.github.com/googleapis/python-bigquery/issues/322)) ([5178b55](https://www.github.com/googleapis/python-bigquery/commit/5178b55682f5e264bfc082cde26acb1fdc953a18)) ### Bug Fixes * make TimePartitioning repr evaluable ([#110](https://www.github.com/googleapis/python-bigquery/issues/110)) ([20f473b](https://www.github.com/googleapis/python-bigquery/commit/20f473bfff5ae98377f5d9cdf18bfe5554d86ff4)), closes [#109](https://www.github.com/googleapis/python-bigquery/issues/109) * use version.py instead of pkg_resources.get_distribution ([#307](https://www.github.com/googleapis/python-bigquery/issues/307)) ([b8f502b](https://www.github.com/googleapis/python-bigquery/commit/b8f502b14f21d1815697e4d57cf1225dfb4a7c5e)) ### Performance Improvements * add size parameter for load table from dataframe and json methods ([#280](https://www.github.com/googleapis/python-bigquery/issues/280)) ([3be78b7](https://www.github.com/googleapis/python-bigquery/commit/3be78b737add7111e24e912cd02fc6df75a07de6)) ### Documentation * update clustering field docstrings ([#286](https://www.github.com/googleapis/python-bigquery/issues/286)) ([5ea1ece](https://www.github.com/googleapis/python-bigquery/commit/5ea1ece2d911cdd1f3d9549ee01559ce8ed8269a)), closes [#285](https://www.github.com/googleapis/python-bigquery/issues/285) * update snippets samples to support version 2.0 ([#309](https://www.github.com/googleapis/python-bigquery/issues/309)) ([61634be](https://www.github.com/googleapis/python-bigquery/commit/61634be9bf9e3df7589fc1bfdbda87288859bb13)) ### Dependencies * add protobuf dependency ([#306](https://www.github.com/googleapis/python-bigquery/issues/306)) ([cebb5e0](https://www.github.com/googleapis/python-bigquery/commit/cebb5e0e911e8c9059bc8c9e7fce4440e518bff3)), closes [#305](https://www.github.com/googleapis/python-bigquery/issues/305) * require pyarrow for pandas support ([#314](https://www.github.com/googleapis/python-bigquery/issues/314)) ([801e4c0](https://www.github.com/googleapis/python-bigquery/commit/801e4c0574b7e421aa3a28cafec6fd6bcce940dd)), closes [#265](https://www.github.com/googleapis/python-bigquery/issues/265) --- This PR was generated with [Release Please](https://github.com/googleapis/release-please). --- packages/google-cloud-bigquery/CHANGELOG.md | 34 +++++++++++++++++++ .../google/cloud/bigquery/version.py | 2 +- 2 files changed, 35 insertions(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/CHANGELOG.md b/packages/google-cloud-bigquery/CHANGELOG.md index ad6c9551f449..384704bbf6fb 100644 --- a/packages/google-cloud-bigquery/CHANGELOG.md +++ b/packages/google-cloud-bigquery/CHANGELOG.md @@ -4,6 +4,40 @@ [1]: https://pypi.org/project/google-cloud-bigquery/#history +## [2.2.0](https://www.github.com/googleapis/python-bigquery/compare/v2.1.0...v2.2.0) (2020-10-19) + + +### Features + +* add method api_repr for table list item ([#299](https://www.github.com/googleapis/python-bigquery/issues/299)) ([07c70f0](https://www.github.com/googleapis/python-bigquery/commit/07c70f0292f9212f0c968cd5c9206e8b0409c0da)) +* add support for listing arima, automl, boosted tree, DNN, and matrix factorization models ([#328](https://www.github.com/googleapis/python-bigquery/issues/328)) ([502a092](https://www.github.com/googleapis/python-bigquery/commit/502a0926018abf058cb84bd18043c25eba15a2cc)) +* add timeout paramter to load_table_from_file and it dependent methods ([#327](https://www.github.com/googleapis/python-bigquery/issues/327)) ([b0dd892](https://www.github.com/googleapis/python-bigquery/commit/b0dd892176e31ac25fddd15554b5bfa054299d4d)) +* add to_api_repr method to Model ([#326](https://www.github.com/googleapis/python-bigquery/issues/326)) ([fb401bd](https://www.github.com/googleapis/python-bigquery/commit/fb401bd94477323bba68cf252dd88166495daf54)) +* allow client options to be set in magics context ([#322](https://www.github.com/googleapis/python-bigquery/issues/322)) ([5178b55](https://www.github.com/googleapis/python-bigquery/commit/5178b55682f5e264bfc082cde26acb1fdc953a18)) + + +### Bug Fixes + +* make TimePartitioning repr evaluable ([#110](https://www.github.com/googleapis/python-bigquery/issues/110)) ([20f473b](https://www.github.com/googleapis/python-bigquery/commit/20f473bfff5ae98377f5d9cdf18bfe5554d86ff4)), closes [#109](https://www.github.com/googleapis/python-bigquery/issues/109) +* use version.py instead of pkg_resources.get_distribution ([#307](https://www.github.com/googleapis/python-bigquery/issues/307)) ([b8f502b](https://www.github.com/googleapis/python-bigquery/commit/b8f502b14f21d1815697e4d57cf1225dfb4a7c5e)) + + +### Performance Improvements + +* add size parameter for load table from dataframe and json methods ([#280](https://www.github.com/googleapis/python-bigquery/issues/280)) ([3be78b7](https://www.github.com/googleapis/python-bigquery/commit/3be78b737add7111e24e912cd02fc6df75a07de6)) + + +### Documentation + +* update clustering field docstrings ([#286](https://www.github.com/googleapis/python-bigquery/issues/286)) ([5ea1ece](https://www.github.com/googleapis/python-bigquery/commit/5ea1ece2d911cdd1f3d9549ee01559ce8ed8269a)), closes [#285](https://www.github.com/googleapis/python-bigquery/issues/285) +* update snippets samples to support version 2.0 ([#309](https://www.github.com/googleapis/python-bigquery/issues/309)) ([61634be](https://www.github.com/googleapis/python-bigquery/commit/61634be9bf9e3df7589fc1bfdbda87288859bb13)) + + +### Dependencies + +* add protobuf dependency ([#306](https://www.github.com/googleapis/python-bigquery/issues/306)) ([cebb5e0](https://www.github.com/googleapis/python-bigquery/commit/cebb5e0e911e8c9059bc8c9e7fce4440e518bff3)), closes [#305](https://www.github.com/googleapis/python-bigquery/issues/305) +* require pyarrow for pandas support ([#314](https://www.github.com/googleapis/python-bigquery/issues/314)) ([801e4c0](https://www.github.com/googleapis/python-bigquery/commit/801e4c0574b7e421aa3a28cafec6fd6bcce940dd)), closes [#265](https://www.github.com/googleapis/python-bigquery/issues/265) + ## [2.1.0](https://www.github.com/googleapis/python-bigquery/compare/v2.0.0...v2.1.0) (2020-10-08) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/version.py b/packages/google-cloud-bigquery/google/cloud/bigquery/version.py index 8b5d3328c28b..bd0f8e5c7d25 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/version.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/version.py @@ -12,4 +12,4 @@ # See the License for the specific language governing permissions and # limitations under the License. -__version__ = "2.1.0" +__version__ = "2.2.0" From 6861ee9d58bbe74ebc83f53cfd1926a4855b11fa Mon Sep 17 00:00:00 2001 From: HemangChothani <50404902+HemangChothani@users.noreply.github.com> Date: Tue, 20 Oct 2020 15:07:55 -0400 Subject: [PATCH 0967/2016] fix: create_job method accepts dictionary arguments (#300) * fix: broken create_job method * fix: changes in unit tests * fix: fix sourceTable thing * fix: handle sourceTable passed in job resource * fix: remove delete destination table from query * fix: revert destination table for query --- .../google/cloud/bigquery/client.py | 12 ++-- .../tests/unit/test_client.py | 57 +++++++++---------- 2 files changed, 36 insertions(+), 33 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py index cce393d6cc15..e4b5b22ab0d8 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py @@ -48,11 +48,11 @@ from google.cloud import exceptions from google.cloud.client import ClientWithProject +from google.cloud.bigquery._helpers import _del_sub_prop from google.cloud.bigquery._helpers import _get_sub_prop from google.cloud.bigquery._helpers import _record_field_to_json from google.cloud.bigquery._helpers import _str_or_none from google.cloud.bigquery._helpers import _verify_job_config_type -from google.cloud.bigquery._helpers import _del_sub_prop from google.cloud.bigquery._http import Connection from google.cloud.bigquery import _pandas_helpers from google.cloud.bigquery.dataset import Dataset @@ -1619,6 +1619,7 @@ def create_job(self, job_config, retry=DEFAULT_RETRY, timeout=None): ) destination = _get_sub_prop(job_config, ["load", "destinationTable"]) source_uris = _get_sub_prop(job_config, ["load", "sourceUris"]) + destination = TableReference.from_api_repr(destination) return self.load_table_from_uri( source_uris, destination, @@ -1631,9 +1632,9 @@ def create_job(self, job_config, retry=DEFAULT_RETRY, timeout=None): job_config ) destination = _get_sub_prop(job_config, ["copy", "destinationTable"]) + destination = TableReference.from_api_repr(destination) sources = [] source_configs = _get_sub_prop(job_config, ["copy", "sourceTables"]) - if source_configs is None: source_configs = [_get_sub_prop(job_config, ["copy", "sourceTable"])] for source_config in source_configs: @@ -1651,10 +1652,13 @@ def create_job(self, job_config, retry=DEFAULT_RETRY, timeout=None): job_config ) source = _get_sub_prop(job_config, ["extract", "sourceTable"]) - source_type = "Table" - if not source: + if source: + source_type = "Table" + source = TableReference.from_api_repr(source) + else: source = _get_sub_prop(job_config, ["extract", "sourceModel"]) source_type = "Model" + source = ModelReference.from_api_repr(source) destination_uris = _get_sub_prop(job_config, ["extract", "destinationUris"]) return self.extract_table( source, diff --git a/packages/google-cloud-bigquery/tests/unit/test_client.py b/packages/google-cloud-bigquery/tests/unit/test_client.py index 2001ad42b59b..e507834f6097 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_client.py +++ b/packages/google-cloud-bigquery/tests/unit/test_client.py @@ -3573,21 +3573,28 @@ def test_delete_table_w_not_found_ok_true(self): conn.api_request.assert_called_with(method="DELETE", path=path, timeout=None) - def _create_job_helper(self, job_config, client_method): + def _create_job_helper(self, job_config): + from google.cloud.bigquery import _helpers + creds = _make_credentials() http = object() client = self._make_one(project=self.PROJECT, credentials=creds, _http=http) - client._connection = make_connection() - rf1 = mock.Mock() - get_config_patch = mock.patch( - "google.cloud.bigquery.job._JobConfig.from_api_repr", return_value=rf1, - ) - load_patch = mock.patch(client_method, autospec=True) + RESOURCE = { + "jobReference": {"projectId": self.PROJECT, "jobId": mock.ANY}, + "configuration": job_config, + } + conn = client._connection = make_connection(RESOURCE) + client.create_job(job_config=job_config) + if "query" in job_config: + _helpers._del_sub_prop(job_config, ["query", "destinationTable"]) - with load_patch as client_method, get_config_patch: - client.create_job(job_config=job_config) - client_method.assert_called_once() + conn.api_request.assert_called_once_with( + method="POST", + path="/projects/%s/jobs" % self.PROJECT, + data=RESOURCE, + timeout=None, + ) def test_create_job_load_config(self): configuration = { @@ -3601,9 +3608,7 @@ def test_create_job_load_config(self): } } - self._create_job_helper( - configuration, "google.cloud.bigquery.client.Client.load_table_from_uri" - ) + self._create_job_helper(configuration) def test_create_job_copy_config(self): configuration = { @@ -3623,9 +3628,7 @@ def test_create_job_copy_config(self): } } - self._create_job_helper( - configuration, "google.cloud.bigquery.client.Client.copy_table", - ) + self._create_job_helper(configuration) def test_create_job_copy_config_w_single_source(self): configuration = { @@ -3643,9 +3646,7 @@ def test_create_job_copy_config_w_single_source(self): } } - self._create_job_helper( - configuration, "google.cloud.bigquery.client.Client.copy_table", - ) + self._create_job_helper(configuration) def test_create_job_extract_config(self): configuration = { @@ -3658,9 +3659,7 @@ def test_create_job_extract_config(self): "destinationUris": ["gs://test_bucket/dst_object*"], } } - self._create_job_helper( - configuration, "google.cloud.bigquery.client.Client.extract_table", - ) + self._create_job_helper(configuration) def test_create_job_extract_config_for_model(self): configuration = { @@ -3673,17 +3672,17 @@ def test_create_job_extract_config_for_model(self): "destinationUris": ["gs://test_bucket/dst_object*"], } } - self._create_job_helper( - configuration, "google.cloud.bigquery.client.Client.extract_table", - ) + self._create_job_helper(configuration) def test_create_job_query_config(self): configuration = { - "query": {"query": "query", "destinationTable": {"tableId": "table_id"}} + "query": { + "query": "query", + "destinationTable": {"tableId": "table_id"}, + "useLegacySql": False, + } } - self._create_job_helper( - configuration, "google.cloud.bigquery.client.Client.query", - ) + self._create_job_helper(configuration) def test_create_job_query_config_w_rateLimitExceeded_error(self): from google.cloud.exceptions import Forbidden From a12d97eba5f6f012f0e4fdc48bcc3cefb9ccbb26 Mon Sep 17 00:00:00 2001 From: WhiteSource Renovate Date: Fri, 23 Oct 2020 20:47:13 +0200 Subject: [PATCH 0968/2016] chore(deps): update dependency grpcio to v1.33.1 (#338) --- .../google-cloud-bigquery/samples/snippets/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/samples/snippets/requirements.txt b/packages/google-cloud-bigquery/samples/snippets/requirements.txt index 7d001fa2f547..544e92eb11f8 100644 --- a/packages/google-cloud-bigquery/samples/snippets/requirements.txt +++ b/packages/google-cloud-bigquery/samples/snippets/requirements.txt @@ -1,7 +1,7 @@ google-cloud-bigquery==2.1.0 google-cloud-bigquery-storage==2.0.0 google-auth-oauthlib==0.4.1 -grpcio==1.32.0 +grpcio==1.33.1 ipython==7.16.1; python_version < '3.7' ipython==7.17.0; python_version >= '3.7' matplotlib==3.3.2 From 21817dafe01b94bb9c5d63b2ed67f8697838fcdd Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Mon, 26 Oct 2020 10:04:34 -0500 Subject: [PATCH 0969/2016] test: make `_AsyncJob` tests mock at a lower layer (#340) This is intented to make the `_AsyncJob` tests more robust to changes in retry behavior. It also more explicitly tests the retry behavior by observing API calls rather than calls to certain methods. --- .../google/cloud/bigquery/client.py | 1 - .../google/cloud/bigquery/job.py | 7 +- .../tests/unit/test_job.py | 121 +++++++++++++----- 3 files changed, 95 insertions(+), 34 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py index e4b5b22ab0d8..57df9455e45c 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py @@ -625,7 +625,6 @@ def create_table(self, table, exists_ok=False, retry=DEFAULT_RETRY, timeout=None def _call_api( self, retry, span_name=None, span_attributes=None, job_ref=None, **kwargs ): - call = functools.partial(self._connection.api_request, **kwargs) if retry: call = retry(call) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/job.py b/packages/google-cloud-bigquery/google/cloud/bigquery/job.py index 766db1d4214d..6cb138acfc03 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/job.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/job.py @@ -529,9 +529,8 @@ def state(self): Optional[str]: the state (None until set from the server). """ - status = self._properties.get("status") - if status is not None: - return status.get("state") + status = self._properties.get("status", {}) + return status.get("state") def _set_properties(self, api_response): """Update properties from resource in body of ``api_response`` @@ -588,7 +587,7 @@ def _check_resource_config(cls, resource): def to_api_repr(self): """Generate a resource for the job.""" - raise NotImplementedError("Abstract") + return copy.deepcopy(self._properties) _build_resource = to_api_repr # backward-compatibility alias diff --git a/packages/google-cloud-bigquery/tests/unit/test_job.py b/packages/google-cloud-bigquery/tests/unit/test_job.py index 75212ae95537..f577b08bd969 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_job.py +++ b/packages/google-cloud-bigquery/tests/unit/test_job.py @@ -20,6 +20,8 @@ import warnings import freezegun +from google.api_core import exceptions +import google.api_core.retry import mock import pytest import requests @@ -70,6 +72,12 @@ def _make_connection(*responses): return mock_conn +def _make_retriable_exception(): + return exceptions.TooManyRequests( + "retriable exception", errors=[{"reason": "rateLimitExceeded"}] + ) + + def _make_job_resource( creation_time_ms=1437767599006, started_time_ms=1437767600007, @@ -84,6 +92,7 @@ def _make_job_resource( user_email="bq-user@example.com", ): resource = { + "status": {"state": "PENDING"}, "configuration": {job_type: {}}, "statistics": {"creationTime": creation_time_ms, job_type: {}}, "etag": etag, @@ -97,9 +106,11 @@ def _make_job_resource( if started or ended: resource["statistics"]["startTime"] = started_time_ms + resource["status"]["state"] = "RUNNING" if ended: resource["statistics"]["endTime"] = ended_time_ms + resource["status"]["state"] = "DONE" if job_type == "query": resource["configuration"]["query"]["destinationTable"] = { @@ -555,14 +566,14 @@ def test__check_resource_config_ok(self): def test__build_resource(self): client = _make_client(project=self.PROJECT) job = self._make_one(self.JOB_ID, client) - with self.assertRaises(NotImplementedError): - job._build_resource() + resource = job._build_resource() + assert resource["jobReference"]["jobId"] == self.JOB_ID def test_to_api_repr(self): client = _make_client(project=self.PROJECT) job = self._make_one(self.JOB_ID, client) - with self.assertRaises(NotImplementedError): - job.to_api_repr() + resource = job.to_api_repr() + assert resource["jobReference"]["jobId"] == self.JOB_ID def test__begin_already(self): job = self._set_properties_job() @@ -965,43 +976,95 @@ def test_done_already(self): self.assertTrue(job.done()) - @mock.patch("google.api_core.future.polling.PollingFuture.result") - def test_result_default_wo_state(self, result): - from google.cloud.bigquery.retry import DEFAULT_RETRY - - client = _make_client(project=self.PROJECT) + def test_result_default_wo_state(self): + begun_job_resource = _make_job_resource( + job_id=self.JOB_ID, project_id=self.PROJECT, started=True + ) + done_job_resource = _make_job_resource( + job_id=self.JOB_ID, project_id=self.PROJECT, started=True, ended=True + ) + conn = _make_connection( + _make_retriable_exception(), + begun_job_resource, + _make_retriable_exception(), + done_job_resource, + ) + client = _make_client(project=self.PROJECT, connection=conn) job = self._make_one(self.JOB_ID, client) - begin = job._begin = mock.Mock() - self.assertIs(job.result(), result.return_value) + self.assertIs(job.result(), job) - begin.assert_called_once_with(retry=DEFAULT_RETRY, timeout=None) - result.assert_called_once_with(timeout=None) + begin_call = mock.call( + method="POST", + path=f"/projects/{self.PROJECT}/jobs", + data={"jobReference": {"jobId": self.JOB_ID, "projectId": self.PROJECT}}, + timeout=None, + ) + reload_call = mock.call( + method="GET", + path=f"/projects/{self.PROJECT}/jobs/{self.JOB_ID}", + query_params={}, + timeout=None, + ) + conn.api_request.assert_has_calls( + [begin_call, begin_call, reload_call, reload_call] + ) - @mock.patch("google.api_core.future.polling.PollingFuture.result") - def test_result_w_retry_wo_state(self, result): - client = _make_client(project=self.PROJECT) + def test_result_w_retry_wo_state(self): + begun_job_resource = _make_job_resource( + job_id=self.JOB_ID, project_id=self.PROJECT, started=True + ) + done_job_resource = _make_job_resource( + job_id=self.JOB_ID, project_id=self.PROJECT, started=True, ended=True + ) + conn = _make_connection( + exceptions.NotFound("not normally retriable"), + begun_job_resource, + # The call to done() / reload() does not get the custom retry + # policy passed to it, so we don't throw a non-retriable + # exception here. See: + # https://github.com/googleapis/python-bigquery/issues/24 + _make_retriable_exception(), + done_job_resource, + ) + client = _make_client(project=self.PROJECT, connection=conn) job = self._make_one(self.JOB_ID, client) - begin = job._begin = mock.Mock() - retry = mock.Mock() + custom_predicate = mock.Mock() + custom_predicate.return_value = True + custom_retry = google.api_core.retry.Retry(predicate=custom_predicate) - self.assertIs(job.result(retry=retry), result.return_value) + self.assertIs(job.result(retry=custom_retry), job) - begin.assert_called_once_with(retry=retry, timeout=None) - result.assert_called_once_with(timeout=None) + begin_call = mock.call( + method="POST", + path=f"/projects/{self.PROJECT}/jobs", + data={"jobReference": {"jobId": self.JOB_ID, "projectId": self.PROJECT}}, + timeout=None, + ) + reload_call = mock.call( + method="GET", + path=f"/projects/{self.PROJECT}/jobs/{self.JOB_ID}", + query_params={}, + timeout=None, + ) + conn.api_request.assert_has_calls( + [begin_call, begin_call, reload_call, reload_call] + ) - @mock.patch("google.api_core.future.polling.PollingFuture.result") - def test_result_explicit_w_state(self, result): - client = _make_client(project=self.PROJECT) + def test_result_explicit_w_state(self): + conn = _make_connection() + client = _make_client(project=self.PROJECT, connection=conn) job = self._make_one(self.JOB_ID, client) - job._properties["status"] = {"state": "DONE"} - begin = job._begin = mock.Mock() + # Use _set_properties() instead of directly modifying _properties so + # that the result state is set properly. + job_resource = job._properties + job_resource["status"] = {"state": "DONE"} + job._set_properties(job_resource) timeout = 1 - self.assertIs(job.result(timeout=timeout), result.return_value) + self.assertIs(job.result(timeout=timeout), job) - begin.assert_not_called() - result.assert_called_once_with(timeout=timeout) + conn.api_request.assert_not_called() def test_cancelled_wo_error_result(self): client = _make_client(project=self.PROJECT) From a311ef1e001a5ecf708619a7229d2bcefa88aefc Mon Sep 17 00:00:00 2001 From: HemangChothani <50404902+HemangChothani@users.noreply.github.com> Date: Tue, 27 Oct 2020 10:34:55 -0400 Subject: [PATCH 0970/2016] docs: add documents for QueryPlanEntry and QueryPlanEntryStep (#344) --- packages/google-cloud-bigquery/docs/reference.rst | 2 ++ .../google-cloud-bigquery/google/cloud/bigquery/job.py | 8 ++++---- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/packages/google-cloud-bigquery/docs/reference.rst b/packages/google-cloud-bigquery/docs/reference.rst index 21dd8e43d42e..3643831cbc2f 100644 --- a/packages/google-cloud-bigquery/docs/reference.rst +++ b/packages/google-cloud-bigquery/docs/reference.rst @@ -59,6 +59,8 @@ Job-Related Types job.CreateDisposition job.DestinationFormat job.Encoding + job.QueryPlanEntry + job.QueryPlanEntryStep job.QueryPriority job.SourceFormat job.WriteDisposition diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/job.py b/packages/google-cloud-bigquery/google/cloud/bigquery/job.py index 6cb138acfc03..977d7a559f92 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/job.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/job.py @@ -2844,7 +2844,7 @@ def query_plan(self): https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobStatistics2.FIELDS.query_plan Returns: - List[QueryPlanEntry]: + List[google.cloud.bigquery.job.QueryPlanEntry]: mappings describing the query plan, or an empty list if the query has not yet completed. """ @@ -3418,7 +3418,6 @@ class QueryPlanEntryStep(object): Args: kind (str): step type. - substeps (List): names of substeps. """ @@ -3434,7 +3433,8 @@ def from_api_repr(cls, resource): resource (Dict): JSON representation of the entry. Returns: - QueryPlanEntryStep: new instance built from the resource. + google.cloud.bigquery.job.QueryPlanEntryStep: + New instance built from the resource. """ return cls(kind=resource.get("kind"), substeps=resource.get("substeps", ())) @@ -3464,7 +3464,7 @@ def from_api_repr(cls, resource): ExplainQueryStage representation returned from API. Returns: - google.cloud.bigquery.QueryPlanEntry: + google.cloud.bigquery.job.QueryPlanEntry: Query plan entry parsed from ``resource``. """ entry = cls() From 03464fcb4c553c8cc5cb69fad3b098b95349f50c Mon Sep 17 00:00:00 2001 From: WhiteSource Renovate Date: Wed, 28 Oct 2020 18:56:58 +0100 Subject: [PATCH 0971/2016] chore(deps): update dependency google-auth-oauthlib to v0.4.2 (#349) --- .../google-cloud-bigquery/samples/snippets/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/samples/snippets/requirements.txt b/packages/google-cloud-bigquery/samples/snippets/requirements.txt index 544e92eb11f8..fab7974941f6 100644 --- a/packages/google-cloud-bigquery/samples/snippets/requirements.txt +++ b/packages/google-cloud-bigquery/samples/snippets/requirements.txt @@ -1,6 +1,6 @@ google-cloud-bigquery==2.1.0 google-cloud-bigquery-storage==2.0.0 -google-auth-oauthlib==0.4.1 +google-auth-oauthlib==0.4.2 grpcio==1.33.1 ipython==7.16.1; python_version < '3.7' ipython==7.17.0; python_version >= '3.7' From 9e7436290263b0c5c2bc5ec16988a78387121775 Mon Sep 17 00:00:00 2001 From: WhiteSource Renovate Date: Wed, 28 Oct 2020 19:16:06 +0100 Subject: [PATCH 0972/2016] chore(deps): update dependency grpcio to v1.33.2 (#350) This PR contains the following updates: | Package | Update | Change | |---|---|---| | [grpcio](https://grpc.io) | patch | `==1.33.1` -> `==1.33.2` | --- ### Renovate configuration :date: **Schedule**: At any time (no schedule defined). :vertical_traffic_light: **Automerge**: Disabled by config. Please merge this manually once you are satisfied. :recycle: **Rebasing**: Whenever PR becomes conflicted, or you tick the rebase/retry checkbox. :no_bell: **Ignore**: Close this PR and you won't be reminded about this update again. --- - [ ] If you want to rebase/retry this PR, check this box --- This PR has been generated by [WhiteSource Renovate](https://renovate.whitesourcesoftware.com). View repository job log [here](https://app.renovatebot.com/dashboard#github/googleapis/python-bigquery). --- .../google-cloud-bigquery/samples/snippets/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/samples/snippets/requirements.txt b/packages/google-cloud-bigquery/samples/snippets/requirements.txt index fab7974941f6..3bcab1ace3c2 100644 --- a/packages/google-cloud-bigquery/samples/snippets/requirements.txt +++ b/packages/google-cloud-bigquery/samples/snippets/requirements.txt @@ -1,7 +1,7 @@ google-cloud-bigquery==2.1.0 google-cloud-bigquery-storage==2.0.0 google-auth-oauthlib==0.4.2 -grpcio==1.33.1 +grpcio==1.33.2 ipython==7.16.1; python_version < '3.7' ipython==7.17.0; python_version >= '3.7' matplotlib==3.3.2 From 71a2b735799c582dffc2441e824a3e06a8b7a915 Mon Sep 17 00:00:00 2001 From: WhiteSource Renovate Date: Wed, 28 Oct 2020 19:36:05 +0100 Subject: [PATCH 0973/2016] chore(deps): update dependency google-cloud-bigquery-storage to v2.0.1 (#337) This PR contains the following updates: | Package | Update | Change | |---|---|---| | [google-cloud-bigquery-storage](https://togithub.com/googleapis/python-bigquery-storage) | patch | `==2.0.0` -> `==2.0.1` | --- ### Release Notes
googleapis/python-bigquery-storage ### [`v2.0.1`](https://togithub.com/googleapis/python-bigquery-storage/blob/master/CHANGELOG.md#​201-httpswwwgithubcomgoogleapispython-bigquery-storagecomparev200v201-2020-10-21) [Compare Source](https://togithub.com/googleapis/python-bigquery-storage/compare/v2.0.0...v2.0.1)
--- ### Renovate configuration :date: **Schedule**: At any time (no schedule defined). :vertical_traffic_light: **Automerge**: Disabled by config. Please merge this manually once you are satisfied. :recycle: **Rebasing**: Whenever PR becomes conflicted, or you tick the rebase/retry checkbox. :no_bell: **Ignore**: Close this PR and you won't be reminded about this update again. --- - [ ] If you want to rebase/retry this PR, check this box --- This PR has been generated by [WhiteSource Renovate](https://renovate.whitesourcesoftware.com). View repository job log [here](https://app.renovatebot.com/dashboard#github/googleapis/python-bigquery). --- .../google-cloud-bigquery/samples/snippets/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/samples/snippets/requirements.txt b/packages/google-cloud-bigquery/samples/snippets/requirements.txt index 3bcab1ace3c2..411a86daedb0 100644 --- a/packages/google-cloud-bigquery/samples/snippets/requirements.txt +++ b/packages/google-cloud-bigquery/samples/snippets/requirements.txt @@ -1,5 +1,5 @@ google-cloud-bigquery==2.1.0 -google-cloud-bigquery-storage==2.0.0 +google-cloud-bigquery-storage==2.0.1 google-auth-oauthlib==0.4.2 grpcio==1.33.2 ipython==7.16.1; python_version < '3.7' From cf3772e2f6b2b31eacaad1503f90260e157ac668 Mon Sep 17 00:00:00 2001 From: WhiteSource Renovate Date: Wed, 28 Oct 2020 19:48:03 +0100 Subject: [PATCH 0974/2016] chore(deps): update dependency google-cloud-bigquery to v2.2.0 (#333) This PR contains the following updates: | Package | Update | Change | |---|---|---| | [google-cloud-bigquery](https://togithub.com/googleapis/python-bigquery) | minor | `==2.1.0` -> `==2.2.0` | --- ### Release Notes
googleapis/python-bigquery ### [`v2.2.0`](https://togithub.com/googleapis/python-bigquery/blob/master/CHANGELOG.md#​220-httpswwwgithubcomgoogleapispython-bigquerycomparev210v220-2020-10-19) [Compare Source](https://togithub.com/googleapis/python-bigquery/compare/v2.1.0...v2.2.0) ##### Features - add method api_repr for table list item ([#​299](https://www.github.com/googleapis/python-bigquery/issues/299)) ([07c70f0](https://www.github.com/googleapis/python-bigquery/commit/07c70f0292f9212f0c968cd5c9206e8b0409c0da)) - add support for listing arima, automl, boosted tree, DNN, and matrix factorization models ([#​328](https://www.github.com/googleapis/python-bigquery/issues/328)) ([502a092](https://www.github.com/googleapis/python-bigquery/commit/502a0926018abf058cb84bd18043c25eba15a2cc)) - add timeout paramter to load_table_from_file and it dependent methods ([#​327](https://www.github.com/googleapis/python-bigquery/issues/327)) ([b0dd892](https://www.github.com/googleapis/python-bigquery/commit/b0dd892176e31ac25fddd15554b5bfa054299d4d)) - add to_api_repr method to Model ([#​326](https://www.github.com/googleapis/python-bigquery/issues/326)) ([fb401bd](https://www.github.com/googleapis/python-bigquery/commit/fb401bd94477323bba68cf252dd88166495daf54)) - allow client options to be set in magics context ([#​322](https://www.github.com/googleapis/python-bigquery/issues/322)) ([5178b55](https://www.github.com/googleapis/python-bigquery/commit/5178b55682f5e264bfc082cde26acb1fdc953a18)) ##### Bug Fixes - make TimePartitioning repr evaluable ([#​110](https://www.github.com/googleapis/python-bigquery/issues/110)) ([20f473b](https://www.github.com/googleapis/python-bigquery/commit/20f473bfff5ae98377f5d9cdf18bfe5554d86ff4)), closes [#​109](https://www.github.com/googleapis/python-bigquery/issues/109) - use version.py instead of pkg_resources.get_distribution ([#​307](https://www.github.com/googleapis/python-bigquery/issues/307)) ([b8f502b](https://www.github.com/googleapis/python-bigquery/commit/b8f502b14f21d1815697e4d57cf1225dfb4a7c5e)) ##### Performance Improvements - add size parameter for load table from dataframe and json methods ([#​280](https://www.github.com/googleapis/python-bigquery/issues/280)) ([3be78b7](https://www.github.com/googleapis/python-bigquery/commit/3be78b737add7111e24e912cd02fc6df75a07de6)) ##### Documentation - update clustering field docstrings ([#​286](https://www.github.com/googleapis/python-bigquery/issues/286)) ([5ea1ece](https://www.github.com/googleapis/python-bigquery/commit/5ea1ece2d911cdd1f3d9549ee01559ce8ed8269a)), closes [#​285](https://www.github.com/googleapis/python-bigquery/issues/285) - update snippets samples to support version 2.0 ([#​309](https://www.github.com/googleapis/python-bigquery/issues/309)) ([61634be](https://www.github.com/googleapis/python-bigquery/commit/61634be9bf9e3df7589fc1bfdbda87288859bb13)) ##### Dependencies - add protobuf dependency ([#​306](https://www.github.com/googleapis/python-bigquery/issues/306)) ([cebb5e0](https://www.github.com/googleapis/python-bigquery/commit/cebb5e0e911e8c9059bc8c9e7fce4440e518bff3)), closes [#​305](https://www.github.com/googleapis/python-bigquery/issues/305) - require pyarrow for pandas support ([#​314](https://www.github.com/googleapis/python-bigquery/issues/314)) ([801e4c0](https://www.github.com/googleapis/python-bigquery/commit/801e4c0574b7e421aa3a28cafec6fd6bcce940dd)), closes [#​265](https://www.github.com/googleapis/python-bigquery/issues/265)
--- ### Renovate configuration :date: **Schedule**: At any time (no schedule defined). :vertical_traffic_light: **Automerge**: Disabled by config. Please merge this manually once you are satisfied. :recycle: **Rebasing**: Whenever PR becomes conflicted, or you tick the rebase/retry checkbox. :no_bell: **Ignore**: Close this PR and you won't be reminded about this update again. --- - [ ] If you want to rebase/retry this PR, check this box --- This PR has been generated by [WhiteSource Renovate](https://renovate.whitesourcesoftware.com). View repository job log [here](https://app.renovatebot.com/dashboard#github/googleapis/python-bigquery). --- .../google-cloud-bigquery/samples/snippets/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/samples/snippets/requirements.txt b/packages/google-cloud-bigquery/samples/snippets/requirements.txt index 411a86daedb0..c5ab3ef3d002 100644 --- a/packages/google-cloud-bigquery/samples/snippets/requirements.txt +++ b/packages/google-cloud-bigquery/samples/snippets/requirements.txt @@ -1,4 +1,4 @@ -google-cloud-bigquery==2.1.0 +google-cloud-bigquery==2.2.0 google-cloud-bigquery-storage==2.0.1 google-auth-oauthlib==0.4.2 grpcio==1.33.2 From 233b3a41d4fc769a3eb700606685c4947e93aa54 Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Wed, 28 Oct 2020 16:32:02 -0500 Subject: [PATCH 0975/2016] feat: add `reload` argument to `*Job.done()` functions (#341) This enables checking the job status without making an API call. It also fixes an inconsistency in `QueryJob`, where a job can be reported as "done" without having the results of a `getQueryResults` API call. Follow-up to https://github.com/googleapis/python-bigquery/pull/340 --- .../google/cloud/bigquery/job.py | 67 +++---- .../tests/unit/test_job.py | 101 ++++++++-- .../tests/unit/test_magics.py | 173 ++++++++++-------- 3 files changed, 218 insertions(+), 123 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/job.py b/packages/google-cloud-bigquery/google/cloud/bigquery/job.py index 977d7a559f92..204c5f7744b9 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/job.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/job.py @@ -767,7 +767,7 @@ def _set_future_result(self): # set, do not call set_result/set_exception again. # Note: self._result_set is set to True in set_result and # set_exception, in case those methods are invoked directly. - if self.state != _DONE_STATE or self._result_set: + if not self.done(reload=False) or self._result_set: return if self.error_result is not None: @@ -776,21 +776,24 @@ def _set_future_result(self): else: self.set_result(self) - def done(self, retry=DEFAULT_RETRY, timeout=None): - """Refresh the job and checks if it is complete. + def done(self, retry=DEFAULT_RETRY, timeout=None, reload=True): + """Checks if the job is complete. Args: retry (Optional[google.api_core.retry.Retry]): How to retry the RPC. timeout (Optional[float]): The number of seconds to wait for the underlying HTTP transport before using ``retry``. + reload (Optional[bool]): + If ``True``, make an API call to refresh the job state of + unfinished jobs before checking. Default ``True``. Returns: bool: True if the job is complete, False otherwise. """ # Do not refresh is the state is already done, as the job will not # change once complete. - if self.state != _DONE_STATE: + if self.state != _DONE_STATE and reload: self.reload(retry=retry, timeout=timeout) return self.state == _DONE_STATE @@ -3073,7 +3076,7 @@ def estimated_bytes_processed(self): result = int(result) return result - def done(self, retry=DEFAULT_RETRY, timeout=None): + def done(self, retry=DEFAULT_RETRY, timeout=None, reload=True): """Refresh the job and checks if it is complete. Args: @@ -3082,10 +3085,25 @@ def done(self, retry=DEFAULT_RETRY, timeout=None): timeout (Optional[float]): The number of seconds to wait for the underlying HTTP transport before using ``retry``. + reload (Optional[bool]): + If ``True``, make an API call to refresh the job state of + unfinished jobs before checking. Default ``True``. Returns: bool: True if the job is complete, False otherwise. """ + is_done = ( + # Only consider a QueryJob complete when we know we have the final + # query results available. + self._query_results is not None + and self._query_results.complete + and self.state == _DONE_STATE + ) + # Do not refresh if the state is already done, as the job will not + # change once complete. + if not reload or is_done: + return is_done + # Since the API to getQueryResults can hang up to the timeout value # (default of 10 seconds), set the timeout parameter to ensure that # the timeout from the futures API is respected. See: @@ -3103,23 +3121,20 @@ def done(self, retry=DEFAULT_RETRY, timeout=None): # stored in _blocking_poll() in the process of polling for job completion. transport_timeout = timeout if timeout is not None else self._transport_timeout - # Do not refresh if the state is already done, as the job will not - # change once complete. - if self.state != _DONE_STATE: - self._query_results = self._client._get_query_results( - self.job_id, - retry, - project=self.project, - timeout_ms=timeout_ms, - location=self.location, - timeout=transport_timeout, - ) + self._query_results = self._client._get_query_results( + self.job_id, + retry, + project=self.project, + timeout_ms=timeout_ms, + location=self.location, + timeout=transport_timeout, + ) - # Only reload the job once we know the query is complete. - # This will ensure that fields such as the destination table are - # correctly populated. - if self._query_results.complete: - self.reload(retry=retry, timeout=transport_timeout) + # Only reload the job once we know the query is complete. + # This will ensure that fields such as the destination table are + # correctly populated. + if self._query_results.complete and self.state != _DONE_STATE: + self.reload(retry=retry, timeout=transport_timeout) return self.state == _DONE_STATE @@ -3231,16 +3246,6 @@ def result( """ try: super(QueryJob, self).result(retry=retry, timeout=timeout) - - # Return an iterator instead of returning the job. - if not self._query_results: - self._query_results = self._client._get_query_results( - self.job_id, - retry, - project=self.project, - location=self.location, - timeout=timeout, - ) except exceptions.GoogleCloudError as exc: exc.message += self._format_for_exception(self.query, self.job_id) exc.query_job = self diff --git a/packages/google-cloud-bigquery/tests/unit/test_job.py b/packages/google-cloud-bigquery/tests/unit/test_job.py index f577b08bd969..2d1e8fec8076 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_job.py +++ b/packages/google-cloud-bigquery/tests/unit/test_job.py @@ -45,6 +45,8 @@ except (ImportError, AttributeError): # pragma: NO COVER tqdm = None +import google.cloud.bigquery.query + def _make_credentials(): import google.auth.credentials @@ -3942,10 +3944,6 @@ def _make_resource(self, started=False, ended=False): resource = super(TestQueryJob, self)._make_resource(started, ended) config = resource["configuration"]["query"] config["query"] = self.QUERY - - if ended: - resource["status"] = {"state": "DONE"} - return resource def _verifyBooleanResourceProperties(self, job, config): @@ -4211,6 +4209,9 @@ def test_done(self): client = _make_client(project=self.PROJECT) resource = self._make_resource(ended=True) job = self._get_target_class().from_api_repr(resource, client) + job._query_results = google.cloud.bigquery.query._QueryResults.from_api_repr( + {"jobComplete": True, "jobReference": resource["jobReference"]} + ) self.assertTrue(job.done()) def test_done_w_timeout(self): @@ -4668,28 +4669,39 @@ def test_result(self): from google.cloud.bigquery.table import RowIterator query_resource = { + "jobComplete": False, + "jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID}, + } + query_resource_done = { "jobComplete": True, "jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID}, "schema": {"fields": [{"name": "col1", "type": "STRING"}]}, "totalRows": "2", } + job_resource = self._make_resource(started=True) + job_resource_done = self._make_resource(started=True, ended=True) + job_resource_done["configuration"]["query"]["destinationTable"] = { + "projectId": "dest-project", + "datasetId": "dest_dataset", + "tableId": "dest_table", + } tabledata_resource = { - # Explicitly set totalRows to be different from the query response. - # to test update during iteration. + # Explicitly set totalRows to be different from the initial + # response to test update during iteration. "totalRows": "1", "pageToken": None, "rows": [{"f": [{"v": "abc"}]}], } - connection = _make_connection(query_resource, tabledata_resource) - client = _make_client(self.PROJECT, connection=connection) - resource = self._make_resource(ended=True) - job = self._get_target_class().from_api_repr(resource, client) + conn = _make_connection( + query_resource, query_resource_done, job_resource_done, tabledata_resource + ) + client = _make_client(self.PROJECT, connection=conn) + job = self._get_target_class().from_api_repr(job_resource, client) result = job.result() self.assertIsInstance(result, RowIterator) self.assertEqual(result.total_rows, 2) - rows = list(result) self.assertEqual(len(rows), 1) self.assertEqual(rows[0].col1, "abc") @@ -4697,6 +4709,70 @@ def test_result(self): # on the response from tabledata.list. self.assertEqual(result.total_rows, 1) + query_results_call = mock.call( + method="GET", + path=f"/projects/{self.PROJECT}/queries/{self.JOB_ID}", + query_params={"maxResults": 0}, + timeout=None, + ) + reload_call = mock.call( + method="GET", + path=f"/projects/{self.PROJECT}/jobs/{self.JOB_ID}", + query_params={}, + timeout=None, + ) + tabledata_call = mock.call( + method="GET", + path="/projects/dest-project/datasets/dest_dataset/tables/dest_table/data", + query_params={}, + timeout=None, + ) + conn.api_request.assert_has_calls( + [query_results_call, query_results_call, reload_call, tabledata_call] + ) + + def test_result_with_done_job_calls_get_query_results(self): + query_resource_done = { + "jobComplete": True, + "jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID}, + "schema": {"fields": [{"name": "col1", "type": "STRING"}]}, + "totalRows": "1", + } + job_resource = self._make_resource(started=True, ended=True) + job_resource["configuration"]["query"]["destinationTable"] = { + "projectId": "dest-project", + "datasetId": "dest_dataset", + "tableId": "dest_table", + } + tabledata_resource = { + "totalRows": "1", + "pageToken": None, + "rows": [{"f": [{"v": "abc"}]}], + } + conn = _make_connection(query_resource_done, tabledata_resource) + client = _make_client(self.PROJECT, connection=conn) + job = self._get_target_class().from_api_repr(job_resource, client) + + result = job.result() + + rows = list(result) + self.assertEqual(len(rows), 1) + self.assertEqual(rows[0].col1, "abc") + + query_results_call = mock.call( + method="GET", + path=f"/projects/{self.PROJECT}/queries/{self.JOB_ID}", + query_params={"maxResults": 0}, + timeout=None, + ) + tabledata_call = mock.call( + method="GET", + path="/projects/dest-project/datasets/dest_dataset/tables/dest_table/data", + query_params={}, + timeout=None, + ) + conn.api_request.assert_has_calls([query_results_call, tabledata_call]) + def test_result_with_max_results(self): from google.cloud.bigquery.table import RowIterator @@ -4938,6 +5014,9 @@ def test_result_error(self): "errors": [error_result], "state": "DONE", } + job._query_results = google.cloud.bigquery.query._QueryResults.from_api_repr( + {"jobComplete": True, "jobReference": job._properties["jobReference"]} + ) job._set_future_result() with self.assertRaises(exceptions.GoogleCloudError) as exc_info: diff --git a/packages/google-cloud-bigquery/tests/unit/test_magics.py b/packages/google-cloud-bigquery/tests/unit/test_magics.py index 30ca4d70c3e4..b2877845af36 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_magics.py +++ b/packages/google-cloud-bigquery/tests/unit/test_magics.py @@ -19,7 +19,6 @@ import mock import pytest -import six try: import pandas @@ -101,27 +100,38 @@ def fail_if(name, globals, locals, fromlist, level): return maybe_fail_import(predicate=fail_if) -JOB_REFERENCE_RESOURCE = {"projectId": "its-a-project-eh", "jobId": "some-random-id"} +PROJECT_ID = "its-a-project-eh" +JOB_ID = "some-random-id" +JOB_REFERENCE_RESOURCE = {"projectId": PROJECT_ID, "jobId": JOB_ID} +DATASET_ID = "dest_dataset" +TABLE_ID = "dest_table" TABLE_REFERENCE_RESOURCE = { - "projectId": "its-a-project-eh", - "datasetId": "ds", - "tableId": "persons", + "projectId": PROJECT_ID, + "datasetId": DATASET_ID, + "tableId": TABLE_ID, } +QUERY_STRING = "SELECT 42 AS the_answer FROM `life.the_universe.and_everything`;" QUERY_RESOURCE = { "jobReference": JOB_REFERENCE_RESOURCE, "configuration": { "query": { "destinationTable": TABLE_REFERENCE_RESOURCE, - "query": "SELECT 42 FROM `life.the_universe.and_everything`;", + "query": QUERY_STRING, "queryParameters": [], "useLegacySql": False, } }, "status": {"state": "DONE"}, } +QUERY_RESULTS_RESOURCE = { + "jobReference": JOB_REFERENCE_RESOURCE, + "totalRows": 1, + "jobComplete": True, + "schema": {"fields": [{"name": "the_answer", "type": "INTEGER"}]}, +} -def test_context_credentials_auto_set_w_application_default_credentials(): +def test_context_with_default_credentials(): """When Application Default Credentials are set, the context credentials will be created the first time it is called """ @@ -142,6 +152,50 @@ def test_context_credentials_auto_set_w_application_default_credentials(): assert default_mock.call_count == 2 +@pytest.mark.usefixtures("ipython_interactive") +@pytest.mark.skipif(pandas is None, reason="Requires `pandas`") +def test_context_with_default_connection(): + ip = IPython.get_ipython() + ip.extension_manager.load_extension("google.cloud.bigquery") + magics.context._credentials = None + magics.context._project = None + magics.context._connection = None + + default_credentials = mock.create_autospec( + google.auth.credentials.Credentials, instance=True + ) + credentials_patch = mock.patch( + "google.auth.default", return_value=(default_credentials, "project-from-env") + ) + default_conn = make_connection(QUERY_RESOURCE, QUERY_RESULTS_RESOURCE) + conn_patch = mock.patch("google.cloud.bigquery.client.Connection", autospec=True) + list_rows_patch = mock.patch( + "google.cloud.bigquery.client.Client.list_rows", + return_value=google.cloud.bigquery.table._EmptyRowIterator(), + ) + + with conn_patch as conn, credentials_patch, list_rows_patch as list_rows: + conn.return_value = default_conn + ip.run_cell_magic("bigquery", "", QUERY_STRING) + + # Check that query actually starts the job. + conn.assert_called() + list_rows.assert_called() + begin_call = mock.call( + method="POST", + path="/projects/project-from-env/jobs", + data=mock.ANY, + timeout=None, + ) + query_results_call = mock.call( + method="GET", + path=f"/projects/{PROJECT_ID}/queries/{JOB_ID}", + query_params=mock.ANY, + timeout=mock.ANY, + ) + default_conn.api_request.assert_has_calls([begin_call, query_results_call]) + + def test_context_credentials_and_project_can_be_set_explicitly(): project1 = "one-project-55564" project2 = "other-project-52569" @@ -163,93 +217,47 @@ def test_context_credentials_and_project_can_be_set_explicitly(): @pytest.mark.usefixtures("ipython_interactive") @pytest.mark.skipif(pandas is None, reason="Requires `pandas`") -def test_context_connection_can_be_overriden(): +def test_context_with_custom_connection(): ip = IPython.get_ipython() ip.extension_manager.load_extension("google.cloud.bigquery") magics.context._project = None magics.context._credentials = None - - credentials_mock = mock.create_autospec( - google.auth.credentials.Credentials, instance=True - ) - project = "project-123" - default_patch = mock.patch( - "google.auth.default", return_value=(credentials_mock, project) - ) - job_reference = copy.deepcopy(JOB_REFERENCE_RESOURCE) - job_reference["projectId"] = project - - query = "select * from persons" - resource = copy.deepcopy(QUERY_RESOURCE) - resource["jobReference"] = job_reference - resource["configuration"]["query"]["query"] = query - data = {"jobReference": job_reference, "totalRows": 0, "rows": []} - - conn = magics.context._connection = make_connection(resource, data) - list_rows_patch = mock.patch( - "google.cloud.bigquery.client.Client.list_rows", - return_value=google.cloud.bigquery.table._EmptyRowIterator(), + context_conn = magics.context._connection = make_connection( + QUERY_RESOURCE, QUERY_RESULTS_RESOURCE ) - with list_rows_patch as list_rows, default_patch: - ip.run_cell_magic("bigquery", "", query) - # Check that query actually starts the job. - list_rows.assert_called() - assert len(conn.api_request.call_args_list) == 2 - _, req = conn.api_request.call_args_list[0] - assert req["method"] == "POST" - assert req["path"] == "/projects/{}/jobs".format(project) - sent = req["data"] - assert isinstance(sent["jobReference"]["jobId"], six.string_types) - sent_config = sent["configuration"]["query"] - assert sent_config["query"] == query - - -@pytest.mark.usefixtures("ipython_interactive") -@pytest.mark.skipif(pandas is None, reason="Requires `pandas`") -def test_context_no_connection(): - ip = IPython.get_ipython() - ip.extension_manager.load_extension("google.cloud.bigquery") - magics.context._project = None - magics.context._credentials = None - magics.context._connection = None - - credentials_mock = mock.create_autospec( + default_credentials = mock.create_autospec( google.auth.credentials.Credentials, instance=True ) - project = "project-123" - default_patch = mock.patch( - "google.auth.default", return_value=(credentials_mock, project) + credentials_patch = mock.patch( + "google.auth.default", return_value=(default_credentials, "project-from-env") ) - job_reference = copy.deepcopy(JOB_REFERENCE_RESOURCE) - job_reference["projectId"] = project - - query = "select * from persons" - resource = copy.deepcopy(QUERY_RESOURCE) - resource["jobReference"] = job_reference - resource["configuration"]["query"]["query"] = query - data = {"jobReference": job_reference, "totalRows": 0, "rows": []} - - conn_mock = make_connection(resource, data, data, data) + default_conn = make_connection() conn_patch = mock.patch("google.cloud.bigquery.client.Connection", autospec=True) list_rows_patch = mock.patch( "google.cloud.bigquery.client.Client.list_rows", return_value=google.cloud.bigquery.table._EmptyRowIterator(), ) - with conn_patch as conn, list_rows_patch as list_rows, default_patch: - conn.return_value = conn_mock - ip.run_cell_magic("bigquery", "", query) - # Check that query actually starts the job. + with conn_patch as conn, credentials_patch, list_rows_patch as list_rows: + conn.return_value = default_conn + ip.run_cell_magic("bigquery", "", QUERY_STRING) + list_rows.assert_called() - assert len(conn_mock.api_request.call_args_list) == 2 - _, req = conn_mock.api_request.call_args_list[0] - assert req["method"] == "POST" - assert req["path"] == "/projects/{}/jobs".format(project) - sent = req["data"] - assert isinstance(sent["jobReference"]["jobId"], six.string_types) - sent_config = sent["configuration"]["query"] - assert sent_config["query"] == query + default_conn.api_request.assert_not_called() + begin_call = mock.call( + method="POST", + path="/projects/project-from-env/jobs", + data=mock.ANY, + timeout=None, + ) + query_results_call = mock.call( + method="GET", + path=f"/projects/{PROJECT_ID}/queries/{JOB_ID}", + query_params=mock.ANY, + timeout=mock.ANY, + ) + context_conn.api_request.assert_has_calls([begin_call, query_results_call]) def test__run_query(): @@ -1060,6 +1068,7 @@ def test_bigquery_magic_w_maximum_bytes_billed_overrides_context(param_value, ex resource = copy.deepcopy(QUERY_RESOURCE) resource["jobReference"] = job_reference resource["configuration"]["query"]["query"] = query + query_results = {"jobReference": job_reference, "totalRows": 0, "jobComplete": True} data = {"jobReference": job_reference, "totalRows": 0, "rows": []} credentials_mock = mock.create_autospec( google.auth.credentials.Credentials, instance=True @@ -1067,7 +1076,7 @@ def test_bigquery_magic_w_maximum_bytes_billed_overrides_context(param_value, ex default_patch = mock.patch( "google.auth.default", return_value=(credentials_mock, "general-project") ) - conn = magics.context._connection = make_connection(resource, data) + conn = magics.context._connection = make_connection(resource, query_results, data) list_rows_patch = mock.patch( "google.cloud.bigquery.client.Client.list_rows", return_value=google.cloud.bigquery.table._EmptyRowIterator(), @@ -1098,6 +1107,7 @@ def test_bigquery_magic_w_maximum_bytes_billed_w_context_inplace(): resource = copy.deepcopy(QUERY_RESOURCE) resource["jobReference"] = job_reference resource["configuration"]["query"]["query"] = query + query_results = {"jobReference": job_reference, "totalRows": 0, "jobComplete": True} data = {"jobReference": job_reference, "totalRows": 0, "rows": []} credentials_mock = mock.create_autospec( google.auth.credentials.Credentials, instance=True @@ -1105,7 +1115,7 @@ def test_bigquery_magic_w_maximum_bytes_billed_w_context_inplace(): default_patch = mock.patch( "google.auth.default", return_value=(credentials_mock, "general-project") ) - conn = magics.context._connection = make_connection(resource, data) + conn = magics.context._connection = make_connection(resource, query_results, data) list_rows_patch = mock.patch( "google.cloud.bigquery.client.Client.list_rows", return_value=google.cloud.bigquery.table._EmptyRowIterator(), @@ -1136,6 +1146,7 @@ def test_bigquery_magic_w_maximum_bytes_billed_w_context_setter(): resource = copy.deepcopy(QUERY_RESOURCE) resource["jobReference"] = job_reference resource["configuration"]["query"]["query"] = query + query_results = {"jobReference": job_reference, "totalRows": 0, "jobComplete": True} data = {"jobReference": job_reference, "totalRows": 0, "rows": []} credentials_mock = mock.create_autospec( google.auth.credentials.Credentials, instance=True @@ -1143,7 +1154,7 @@ def test_bigquery_magic_w_maximum_bytes_billed_w_context_setter(): default_patch = mock.patch( "google.auth.default", return_value=(credentials_mock, "general-project") ) - conn = magics.context._connection = make_connection(resource, data) + conn = magics.context._connection = make_connection(resource, query_results, data) list_rows_patch = mock.patch( "google.cloud.bigquery.client.Client.list_rows", return_value=google.cloud.bigquery.table._EmptyRowIterator(), From 17360fd7e1e142940050b3cebe8f1f75f40d254f Mon Sep 17 00:00:00 2001 From: HemangChothani <50404902+HemangChothani@users.noreply.github.com> Date: Thu, 29 Oct 2020 10:16:07 -0400 Subject: [PATCH 0976/2016] fix(dbapi): avoid running % format with no query parameters (#348) * fix: aviod running %format when no query params * fix: nit * fix: change in unit test --- .../google/cloud/bigquery/dbapi/cursor.py | 2 +- .../google-cloud-bigquery/tests/unit/test_dbapi_cursor.py | 6 ++++++ 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/dbapi/cursor.py b/packages/google-cloud-bigquery/google/cloud/bigquery/dbapi/cursor.py index 63264e9abc5a..597313fd6263 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/dbapi/cursor.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/dbapi/cursor.py @@ -441,7 +441,7 @@ def _format_operation(operation, parameters=None): if a parameter used in the operation is not found in the ``parameters`` argument. """ - if parameters is None: + if parameters is None or len(parameters) == 0: return operation if isinstance(parameters, collections_abc.Mapping): diff --git a/packages/google-cloud-bigquery/tests/unit/test_dbapi_cursor.py b/packages/google-cloud-bigquery/tests/unit/test_dbapi_cursor.py index 9a1a6b1e877a..5c3bfcae9198 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_dbapi_cursor.py +++ b/packages/google-cloud-bigquery/tests/unit/test_dbapi_cursor.py @@ -601,3 +601,9 @@ def test__format_operation_w_too_short_sequence(self): "SELECT %s, %s;", ("hello",), ) + + def test__format_operation_w_empty_dict(self): + from google.cloud.bigquery.dbapi import cursor + + formatted_operation = cursor._format_operation("SELECT '%f'", {}) + self.assertEqual(formatted_operation, "SELECT '%f'") From d5c509e8f814fc89e999cbbf6ce9ba226af12f36 Mon Sep 17 00:00:00 2001 From: WhiteSource Renovate Date: Sat, 31 Oct 2020 00:28:57 +0100 Subject: [PATCH 0977/2016] chore(deps): update dependency pandas to v1.1.4 (#355) --- .../google-cloud-bigquery/samples/snippets/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/samples/snippets/requirements.txt b/packages/google-cloud-bigquery/samples/snippets/requirements.txt index c5ab3ef3d002..bf895a1ae751 100644 --- a/packages/google-cloud-bigquery/samples/snippets/requirements.txt +++ b/packages/google-cloud-bigquery/samples/snippets/requirements.txt @@ -5,6 +5,6 @@ grpcio==1.33.2 ipython==7.16.1; python_version < '3.7' ipython==7.17.0; python_version >= '3.7' matplotlib==3.3.2 -pandas==1.1.3 +pandas==1.1.4 pyarrow==1.0.1 pytz==2020.1 From a215a0c3619e7975758d337eaf8813a9b6322dfd Mon Sep 17 00:00:00 2001 From: Ilya Gurov Date: Tue, 3 Nov 2020 18:43:15 +0300 Subject: [PATCH 0978/2016] feat: pass retry from Job.result() to Job.done() (#41) * feat(bigquery): pass retry from Job.result() to Job.done(). * fix merge conflicts * drop the comment * use kwargs sentinel * check the mock retry * update dependencies * use kwargs pattern * feat: added unit test for retry * feat: added more exceptions Co-authored-by: Tim Swast Co-authored-by: HemangChothani --- .../google/cloud/bigquery/job.py | 16 ++--- packages/google-cloud-bigquery/setup.py | 2 +- .../testing/constraints-3.6.txt | 2 +- .../tests/unit/test_job.py | 67 +++++++++++++++++-- 4 files changed, 69 insertions(+), 18 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/job.py b/packages/google-cloud-bigquery/google/cloud/bigquery/job.py index 204c5f7744b9..e2e7e839a125 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/job.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/job.py @@ -819,8 +819,9 @@ def result(self, retry=DEFAULT_RETRY, timeout=None): """ if self.state is None: self._begin(retry=retry, timeout=timeout) - # TODO: modify PollingFuture so it can pass a retry argument to done(). - return super(_AsyncJob, self).result(timeout=timeout) + + kwargs = {} if retry is DEFAULT_RETRY else {"retry": retry} + return super(_AsyncJob, self).result(timeout=timeout, **kwargs) def cancelled(self): """Check if the job has been cancelled. @@ -1845,7 +1846,7 @@ def destination(self): """ return TableReference.from_api_repr( _helpers._get_sub_prop( - self._properties, ["configuration", "copy", "destinationTable"], + self._properties, ["configuration", "copy", "destinationTable"] ) ) @@ -2043,10 +2044,7 @@ def __init__(self, job_id, source, destination_uris, client, job_config=None): self._configuration = job_config if source: - source_ref = { - "projectId": source.project, - "datasetId": source.dataset_id, - } + source_ref = {"projectId": source.project, "datasetId": source.dataset_id} if isinstance(source, (Table, TableListItem, TableReference)): source_ref["tableId"] = source.table_id @@ -3138,10 +3136,10 @@ def done(self, retry=DEFAULT_RETRY, timeout=None, reload=True): return self.state == _DONE_STATE - def _blocking_poll(self, timeout=None): + def _blocking_poll(self, timeout=None, **kwargs): self._done_timeout = timeout self._transport_timeout = timeout - super(QueryJob, self)._blocking_poll(timeout=timeout) + super(QueryJob, self)._blocking_poll(timeout=timeout, **kwargs) @staticmethod def _format_for_exception(query, job_id): diff --git a/packages/google-cloud-bigquery/setup.py b/packages/google-cloud-bigquery/setup.py index c7410601e8d7..548ceac09392 100644 --- a/packages/google-cloud-bigquery/setup.py +++ b/packages/google-cloud-bigquery/setup.py @@ -29,7 +29,7 @@ # 'Development Status :: 5 - Production/Stable' release_status = "Development Status :: 5 - Production/Stable" dependencies = [ - "google-api-core[grpc] >= 1.22.2, < 2.0.0dev", + "google-api-core[grpc] >= 1.23.0, < 2.0.0dev", "proto-plus >= 1.10.0", "google-cloud-core >= 1.4.1, < 2.0dev", "google-resumable-media >= 0.6.0, < 2.0dev", diff --git a/packages/google-cloud-bigquery/testing/constraints-3.6.txt b/packages/google-cloud-bigquery/testing/constraints-3.6.txt index cea0ed84e467..91a507a5c2ce 100644 --- a/packages/google-cloud-bigquery/testing/constraints-3.6.txt +++ b/packages/google-cloud-bigquery/testing/constraints-3.6.txt @@ -1,4 +1,4 @@ -google-api-core==1.22.2 +google-api-core==1.23.0 google-cloud-bigquery-storage==2.0.0 google-cloud-core==1.4.1 google-resumable-media==0.6.0 diff --git a/packages/google-cloud-bigquery/tests/unit/test_job.py b/packages/google-cloud-bigquery/tests/unit/test_job.py index 2d1e8fec8076..8590e05765ce 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_job.py +++ b/packages/google-cloud-bigquery/tests/unit/test_job.py @@ -864,7 +864,7 @@ def test_cancel_w_custom_retry(self): job = self._set_properties_job() api_request_patcher = mock.patch.object( - job._client._connection, "api_request", side_effect=[ValueError, response], + job._client._connection, "api_request", side_effect=[ValueError, response] ) retry = DEFAULT_RETRY.with_deadline(1).with_predicate( lambda exc: isinstance(exc, ValueError) @@ -885,7 +885,7 @@ def test_cancel_w_custom_retry(self): [ mock.call(method="POST", path=api_path, query_params={}, timeout=7.5), mock.call( - method="POST", path=api_path, query_params={}, timeout=7.5, + method="POST", path=api_path, query_params={}, timeout=7.5 ), # was retried once ], ) @@ -1034,7 +1034,6 @@ def test_result_w_retry_wo_state(self): custom_predicate = mock.Mock() custom_predicate.return_value = True custom_retry = google.api_core.retry.Retry(predicate=custom_predicate) - self.assertIs(job.result(retry=custom_retry), job) begin_call = mock.call( @@ -2757,7 +2756,7 @@ def test_cancel_w_bound_client(self): final_attributes.assert_called_with({"path": PATH}, client, job) conn.api_request.assert_called_once_with( - method="POST", path=PATH, query_params={}, timeout=None, + method="POST", path=PATH, query_params={}, timeout=None ) self._verifyResourceProperties(job, RESOURCE) @@ -2779,7 +2778,7 @@ def test_cancel_w_alternate_client(self): conn1.api_request.assert_not_called() conn2.api_request.assert_called_once_with( - method="POST", path=PATH, query_params={}, timeout=None, + method="POST", path=PATH, query_params={}, timeout=None ) self._verifyResourceProperties(job, RESOURCE) @@ -3205,7 +3204,7 @@ def test_exists_miss_w_bound_client(self): final_attributes.assert_called_with({"path": PATH}, client, job) conn.api_request.assert_called_once_with( - method="GET", path=PATH, query_params={"fields": "id"}, timeout=None, + method="GET", path=PATH, query_params={"fields": "id"}, timeout=None ) def test_exists_hit_w_alternate_client(self): @@ -3620,7 +3619,7 @@ def test_exists_miss_w_bound_client(self): final_attributes.assert_called_with({"path": PATH}, client, job) conn.api_request.assert_called_once_with( - method="GET", path=PATH, query_params={"fields": "id"}, timeout=None, + method="GET", path=PATH, query_params={"fields": "id"}, timeout=None ) def test_exists_hit_w_alternate_client(self): @@ -4812,6 +4811,60 @@ def test_result_with_max_results(self): tabledata_list_request[1]["query_params"]["maxResults"], max_results ) + def test_result_w_retry(self): + from google.cloud.bigquery.table import RowIterator + + query_resource = { + "jobComplete": False, + "jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID}, + } + query_resource_done = { + "jobComplete": True, + "jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID}, + "schema": {"fields": [{"name": "col1", "type": "STRING"}]}, + "totalRows": "2", + } + job_resource = self._make_resource(started=True) + job_resource_done = self._make_resource(started=True, ended=True) + job_resource_done["configuration"]["query"]["destinationTable"] = { + "projectId": "dest-project", + "datasetId": "dest_dataset", + "tableId": "dest_table", + } + + connection = _make_connection( + exceptions.NotFound("not normally retriable"), + query_resource, + exceptions.NotFound("not normally retriable"), + query_resource_done, + exceptions.NotFound("not normally retriable"), + job_resource_done, + ) + client = _make_client(self.PROJECT, connection=connection) + job = self._get_target_class().from_api_repr(job_resource, client) + + custom_predicate = mock.Mock() + custom_predicate.return_value = True + custom_retry = google.api_core.retry.Retry(predicate=custom_predicate) + + self.assertIsInstance(job.result(retry=custom_retry), RowIterator) + query_results_call = mock.call( + method="GET", + path=f"/projects/{self.PROJECT}/queries/{self.JOB_ID}", + query_params={"maxResults": 0}, + timeout=None, + ) + reload_call = mock.call( + method="GET", + path=f"/projects/{self.PROJECT}/jobs/{self.JOB_ID}", + query_params={}, + timeout=None, + ) + + connection.api_request.assert_has_calls( + [query_results_call, query_results_call, reload_call] + ) + def test_result_w_empty_schema(self): from google.cloud.bigquery.table import _EmptyRowIterator From e52d56dd3356e4afdacbef64189e07d24e4118f0 Mon Sep 17 00:00:00 2001 From: Rickard von Essen Date: Tue, 3 Nov 2020 16:58:03 +0100 Subject: [PATCH 0979/2016] fix: add missing spaces in opentelemetry log message (#360) Currently this log message renders like this: ``` This service is instrumented using OpenTelemetry.OpenTelemetry could not be imported; pleaseadd opentelemetry-api and opentelemetry-instrumentationpackages in order to get BigQuery Tracing data. ``` where it should be ``` This service is instrumented using OpenTelemetry. OpenTelemetry could not be imported; please add opentelemetry-api and opentelemetry-instrumentation packages in order to get BigQuery Tracing data." ``` --- .../google/cloud/bigquery/opentelemetry_tracing.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/opentelemetry_tracing.py b/packages/google-cloud-bigquery/google/cloud/bigquery/opentelemetry_tracing.py index f7375c3466a4..b9d18efade0c 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/opentelemetry_tracing.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/opentelemetry_tracing.py @@ -26,9 +26,9 @@ except ImportError: logger.info( - "This service is instrumented using OpenTelemetry." - "OpenTelemetry could not be imported; please" - "add opentelemetry-api and opentelemetry-instrumentation" + "This service is instrumented using OpenTelemetry. " + "OpenTelemetry could not be imported; please " + "add opentelemetry-api and opentelemetry-instrumentation " "packages in order to get BigQuery Tracing data." ) From 9b4f4fee3db7fb70de00c53a8d3fe738bd08d0c5 Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Wed, 4 Nov 2020 09:09:35 -0600 Subject: [PATCH 0980/2016] refactor: break job into multiple modules (#361) Original paths are retained for backwards compatibility. --- .../google/cloud/bigquery/enums.py | 139 + .../google/cloud/bigquery/job.py | 3846 ---------- .../google/cloud/bigquery/job/__init__.py | 77 + .../google/cloud/bigquery/job/base.py | 912 +++ .../google/cloud/bigquery/job/copy_.py | 223 + .../google/cloud/bigquery/job/extract.py | 266 + .../google/cloud/bigquery/job/load.py | 758 ++ .../google/cloud/bigquery/job/query.py | 1644 +++++ .../tests/unit/job/__init__.py | 13 + .../tests/unit/job/helpers.py | 198 + .../tests/unit/job/test_base.py | 1105 +++ .../tests/unit/job/test_copy.py | 477 ++ .../tests/unit/job/test_extract.py | 437 ++ .../tests/unit/job/test_load.py | 838 +++ .../tests/unit/job/test_load_config.py | 710 ++ .../tests/unit/job/test_query.py | 1811 +++++ .../tests/unit/job/test_query_config.py | 255 + .../tests/unit/job/test_query_pandas.py | 450 ++ .../tests/unit/job/test_query_stats.py | 356 + .../tests/unit/test_job.py | 6448 ----------------- 20 files changed, 10669 insertions(+), 10294 deletions(-) delete mode 100644 packages/google-cloud-bigquery/google/cloud/bigquery/job.py create mode 100644 packages/google-cloud-bigquery/google/cloud/bigquery/job/__init__.py create mode 100644 packages/google-cloud-bigquery/google/cloud/bigquery/job/base.py create mode 100644 packages/google-cloud-bigquery/google/cloud/bigquery/job/copy_.py create mode 100644 packages/google-cloud-bigquery/google/cloud/bigquery/job/extract.py create mode 100644 packages/google-cloud-bigquery/google/cloud/bigquery/job/load.py create mode 100644 packages/google-cloud-bigquery/google/cloud/bigquery/job/query.py create mode 100644 packages/google-cloud-bigquery/tests/unit/job/__init__.py create mode 100644 packages/google-cloud-bigquery/tests/unit/job/helpers.py create mode 100644 packages/google-cloud-bigquery/tests/unit/job/test_base.py create mode 100644 packages/google-cloud-bigquery/tests/unit/job/test_copy.py create mode 100644 packages/google-cloud-bigquery/tests/unit/job/test_extract.py create mode 100644 packages/google-cloud-bigquery/tests/unit/job/test_load.py create mode 100644 packages/google-cloud-bigquery/tests/unit/job/test_load_config.py create mode 100644 packages/google-cloud-bigquery/tests/unit/job/test_query.py create mode 100644 packages/google-cloud-bigquery/tests/unit/job/test_query_config.py create mode 100644 packages/google-cloud-bigquery/tests/unit/job/test_query_pandas.py create mode 100644 packages/google-cloud-bigquery/tests/unit/job/test_query_stats.py delete mode 100644 packages/google-cloud-bigquery/tests/unit/test_job.py diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/enums.py b/packages/google-cloud-bigquery/google/cloud/bigquery/enums.py index eb33e42763b0..3f72333afbeb 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/enums.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/enums.py @@ -20,6 +20,124 @@ from google.cloud.bigquery_v2 import types as gapic_types +class Compression(object): + """The compression type to use for exported files. The default value is + :attr:`NONE`. + + :attr:`DEFLATE` and :attr:`SNAPPY` are + only supported for Avro. + """ + + GZIP = "GZIP" + """Specifies GZIP format.""" + + DEFLATE = "DEFLATE" + """Specifies DEFLATE format.""" + + SNAPPY = "SNAPPY" + """Specifies SNAPPY format.""" + + NONE = "NONE" + """Specifies no compression.""" + + +class CreateDisposition(object): + """Specifies whether the job is allowed to create new tables. The default + value is :attr:`CREATE_IF_NEEDED`. + + Creation, truncation and append actions occur as one atomic update + upon job completion. + """ + + CREATE_IF_NEEDED = "CREATE_IF_NEEDED" + """If the table does not exist, BigQuery creates the table.""" + + CREATE_NEVER = "CREATE_NEVER" + """The table must already exist. If it does not, a 'notFound' error is + returned in the job result.""" + + +class DestinationFormat(object): + """The exported file format. The default value is :attr:`CSV`. + + Tables with nested or repeated fields cannot be exported as CSV. + """ + + CSV = "CSV" + """Specifies CSV format.""" + + NEWLINE_DELIMITED_JSON = "NEWLINE_DELIMITED_JSON" + """Specifies newline delimited JSON format.""" + + AVRO = "AVRO" + """Specifies Avro format.""" + + +class Encoding(object): + """The character encoding of the data. The default is :attr:`UTF_8`. + + BigQuery decodes the data after the raw, binary data has been + split using the values of the quote and fieldDelimiter properties. + """ + + UTF_8 = "UTF-8" + """Specifies UTF-8 encoding.""" + + ISO_8859_1 = "ISO-8859-1" + """Specifies ISO-8859-1 encoding.""" + + +class QueryPriority(object): + """Specifies a priority for the query. The default value is + :attr:`INTERACTIVE`. + """ + + INTERACTIVE = "INTERACTIVE" + """Specifies interactive priority.""" + + BATCH = "BATCH" + """Specifies batch priority.""" + + +class SchemaUpdateOption(object): + """Specifies an update to the destination table schema as a side effect of + a load job. + """ + + ALLOW_FIELD_ADDITION = "ALLOW_FIELD_ADDITION" + """Allow adding a nullable field to the schema.""" + + ALLOW_FIELD_RELAXATION = "ALLOW_FIELD_RELAXATION" + """Allow relaxing a required field in the original schema to nullable.""" + + +class SourceFormat(object): + """The format of the data files. The default value is :attr:`CSV`. + + Note that the set of allowed values for loading data is different + than the set used for external data sources (see + :class:`~google.cloud.bigquery.external_config.ExternalSourceFormat`). + """ + + CSV = "CSV" + """Specifies CSV format.""" + + DATASTORE_BACKUP = "DATASTORE_BACKUP" + """Specifies datastore backup format""" + + NEWLINE_DELIMITED_JSON = "NEWLINE_DELIMITED_JSON" + """Specifies newline delimited JSON format.""" + + AVRO = "AVRO" + """Specifies Avro format.""" + + PARQUET = "PARQUET" + """Specifies Parquet format.""" + + ORC = "ORC" + """Specifies Orc format.""" + + _SQL_SCALAR_TYPES = frozenset( ( "INT64", @@ -92,3 +210,24 @@ class SqlTypeNames(str, enum.Enum): DATE = "DATE" TIME = "TIME" DATETIME = "DATETIME" + + +class WriteDisposition(object): + """Specifies the action that occurs if destination table already exists. + + The default value is :attr:`WRITE_APPEND`. + + Each action is atomic and only occurs if BigQuery is able to complete + the job successfully. Creation, truncation and append actions occur as one + atomic update upon job completion. + """ + + WRITE_APPEND = "WRITE_APPEND" + """If the table already exists, BigQuery appends the data to the table.""" + + WRITE_TRUNCATE = "WRITE_TRUNCATE" + """If the table already exists, BigQuery overwrites the table data.""" + + WRITE_EMPTY = "WRITE_EMPTY" + """If the table already exists and contains data, a 'duplicate' error is + returned in the job result.""" diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/job.py b/packages/google-cloud-bigquery/google/cloud/bigquery/job.py deleted file mode 100644 index e2e7e839a125..000000000000 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/job.py +++ /dev/null @@ -1,3846 +0,0 @@ -# Copyright 2015 Google LLC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -"""Define API Jobs.""" - -from __future__ import division - -import concurrent.futures -import copy -import re -import threading - -import requests -import six -from six.moves import http_client - -import google.api_core.future.polling -from google.cloud import exceptions -from google.cloud.exceptions import NotFound -from google.cloud.bigquery.dataset import Dataset -from google.cloud.bigquery.dataset import DatasetListItem -from google.cloud.bigquery.dataset import DatasetReference -from google.cloud.bigquery.encryption_configuration import EncryptionConfiguration -from google.cloud.bigquery.external_config import ExternalConfig -from google.cloud.bigquery.external_config import HivePartitioningOptions -from google.cloud.bigquery import _helpers -from google.cloud.bigquery.model import ModelReference -from google.cloud.bigquery.query import _query_param_from_api_repr -from google.cloud.bigquery.query import ArrayQueryParameter -from google.cloud.bigquery.query import ScalarQueryParameter -from google.cloud.bigquery.query import StructQueryParameter -from google.cloud.bigquery.query import UDFResource -from google.cloud.bigquery.retry import DEFAULT_RETRY -from google.cloud.bigquery.routine import RoutineReference -from google.cloud.bigquery.schema import SchemaField -from google.cloud.bigquery.schema import _to_schema_fields -from google.cloud.bigquery.table import _EmptyRowIterator -from google.cloud.bigquery.table import RangePartitioning -from google.cloud.bigquery.table import _table_arg_to_table_ref -from google.cloud.bigquery.table import Table -from google.cloud.bigquery.table import TableListItem -from google.cloud.bigquery.table import TableReference -from google.cloud.bigquery.table import TimePartitioning - -_DONE_STATE = "DONE" -_STOPPED_REASON = "stopped" -_TIMEOUT_BUFFER_SECS = 0.1 -_CONTAINS_ORDER_BY = re.compile(r"ORDER\s+BY", re.IGNORECASE) - -_ERROR_REASON_TO_EXCEPTION = { - "accessDenied": http_client.FORBIDDEN, - "backendError": http_client.INTERNAL_SERVER_ERROR, - "billingNotEnabled": http_client.FORBIDDEN, - "billingTierLimitExceeded": http_client.BAD_REQUEST, - "blocked": http_client.FORBIDDEN, - "duplicate": http_client.CONFLICT, - "internalError": http_client.INTERNAL_SERVER_ERROR, - "invalid": http_client.BAD_REQUEST, - "invalidQuery": http_client.BAD_REQUEST, - "notFound": http_client.NOT_FOUND, - "notImplemented": http_client.NOT_IMPLEMENTED, - "quotaExceeded": http_client.FORBIDDEN, - "rateLimitExceeded": http_client.FORBIDDEN, - "resourceInUse": http_client.BAD_REQUEST, - "resourcesExceeded": http_client.BAD_REQUEST, - "responseTooLarge": http_client.FORBIDDEN, - "stopped": http_client.OK, - "tableUnavailable": http_client.BAD_REQUEST, -} - - -def _error_result_to_exception(error_result): - """Maps BigQuery error reasons to an exception. - - The reasons and their matching HTTP status codes are documented on - the `troubleshooting errors`_ page. - - .. _troubleshooting errors: https://cloud.google.com/bigquery\ - /troubleshooting-errors - - Args: - error_result (Mapping[str, str]): The error result from BigQuery. - - Returns: - google.cloud.exceptions.GoogleCloudError: The mapped exception. - """ - reason = error_result.get("reason") - status_code = _ERROR_REASON_TO_EXCEPTION.get( - reason, http_client.INTERNAL_SERVER_ERROR - ) - return exceptions.from_http_status( - status_code, error_result.get("message", ""), errors=[error_result] - ) - - -def _contains_order_by(query): - """Do we need to preserve the order of the query results? - - This function has known false positives, such as with ordered window - functions: - - .. code-block:: sql - - SELECT SUM(x) OVER ( - window_name - PARTITION BY... - ORDER BY... - window_frame_clause) - FROM ... - - This false positive failure case means the behavior will be correct, but - downloading results with the BigQuery Storage API may be slower than it - otherwise would. This is preferable to the false negative case, where - results are expected to be in order but are not (due to parallel reads). - """ - return query and _CONTAINS_ORDER_BY.search(query) - - -class Compression(object): - """The compression type to use for exported files. The default value is - :attr:`NONE`. - - :attr:`DEFLATE` and :attr:`SNAPPY` are - only supported for Avro. - """ - - GZIP = "GZIP" - """Specifies GZIP format.""" - - DEFLATE = "DEFLATE" - """Specifies DEFLATE format.""" - - SNAPPY = "SNAPPY" - """Specifies SNAPPY format.""" - - NONE = "NONE" - """Specifies no compression.""" - - -class CreateDisposition(object): - """Specifies whether the job is allowed to create new tables. The default - value is :attr:`CREATE_IF_NEEDED`. - - Creation, truncation and append actions occur as one atomic update - upon job completion. - """ - - CREATE_IF_NEEDED = "CREATE_IF_NEEDED" - """If the table does not exist, BigQuery creates the table.""" - - CREATE_NEVER = "CREATE_NEVER" - """The table must already exist. If it does not, a 'notFound' error is - returned in the job result.""" - - -class DestinationFormat(object): - """The exported file format. The default value is :attr:`CSV`. - - Tables with nested or repeated fields cannot be exported as CSV. - """ - - CSV = "CSV" - """Specifies CSV format.""" - - NEWLINE_DELIMITED_JSON = "NEWLINE_DELIMITED_JSON" - """Specifies newline delimited JSON format.""" - - AVRO = "AVRO" - """Specifies Avro format.""" - - -class Encoding(object): - """The character encoding of the data. The default is :attr:`UTF_8`. - - BigQuery decodes the data after the raw, binary data has been - split using the values of the quote and fieldDelimiter properties. - """ - - UTF_8 = "UTF-8" - """Specifies UTF-8 encoding.""" - - ISO_8859_1 = "ISO-8859-1" - """Specifies ISO-8859-1 encoding.""" - - -class QueryPriority(object): - """Specifies a priority for the query. The default value is - :attr:`INTERACTIVE`. - """ - - INTERACTIVE = "INTERACTIVE" - """Specifies interactive priority.""" - - BATCH = "BATCH" - """Specifies batch priority.""" - - -class SourceFormat(object): - """The format of the data files. The default value is :attr:`CSV`. - - Note that the set of allowed values for loading data is different - than the set used for external data sources (see - :class:`~google.cloud.bigquery.external_config.ExternalSourceFormat`). - """ - - CSV = "CSV" - """Specifies CSV format.""" - - DATASTORE_BACKUP = "DATASTORE_BACKUP" - """Specifies datastore backup format""" - - NEWLINE_DELIMITED_JSON = "NEWLINE_DELIMITED_JSON" - """Specifies newline delimited JSON format.""" - - AVRO = "AVRO" - """Specifies Avro format.""" - - PARQUET = "PARQUET" - """Specifies Parquet format.""" - - ORC = "ORC" - """Specifies Orc format.""" - - -class WriteDisposition(object): - """Specifies the action that occurs if destination table already exists. - - The default value is :attr:`WRITE_APPEND`. - - Each action is atomic and only occurs if BigQuery is able to complete - the job successfully. Creation, truncation and append actions occur as one - atomic update upon job completion. - """ - - WRITE_APPEND = "WRITE_APPEND" - """If the table already exists, BigQuery appends the data to the table.""" - - WRITE_TRUNCATE = "WRITE_TRUNCATE" - """If the table already exists, BigQuery overwrites the table data.""" - - WRITE_EMPTY = "WRITE_EMPTY" - """If the table already exists and contains data, a 'duplicate' error is - returned in the job result.""" - - -class SchemaUpdateOption(object): - """Specifies an update to the destination table schema as a side effect of - a load job. - """ - - ALLOW_FIELD_ADDITION = "ALLOW_FIELD_ADDITION" - """Allow adding a nullable field to the schema.""" - - ALLOW_FIELD_RELAXATION = "ALLOW_FIELD_RELAXATION" - """Allow relaxing a required field in the original schema to nullable.""" - - -class _JobReference(object): - """A reference to a job. - - Args: - job_id (str): ID of the job to run. - project (str): ID of the project where the job runs. - location (str): Location of where the job runs. - """ - - def __init__(self, job_id, project, location): - self._properties = {"jobId": job_id, "projectId": project} - # The location field must not be populated if it is None. - if location: - self._properties["location"] = location - - @property - def job_id(self): - """str: ID of the job.""" - return self._properties.get("jobId") - - @property - def project(self): - """str: ID of the project where the job runs.""" - return self._properties.get("projectId") - - @property - def location(self): - """str: Location where the job runs.""" - return self._properties.get("location") - - def _to_api_repr(self): - """Returns the API resource representation of the job reference.""" - return copy.deepcopy(self._properties) - - @classmethod - def _from_api_repr(cls, resource): - """Returns a job reference for an API resource representation.""" - job_id = resource.get("jobId") - project = resource.get("projectId") - location = resource.get("location") - job_ref = cls(job_id, project, location) - return job_ref - - -class _AsyncJob(google.api_core.future.polling.PollingFuture): - """Base class for asynchronous jobs. - - Args: - job_id (Union[str, _JobReference]): - Job's ID in the project associated with the client or a - fully-qualified job reference. - client (google.cloud.bigquery.client.Client): - Client which holds credentials and project configuration. - """ - - def __init__(self, job_id, client): - super(_AsyncJob, self).__init__() - - # The job reference can be either a plain job ID or the full resource. - # Populate the properties dictionary consistently depending on what has - # been passed in. - job_ref = job_id - if not isinstance(job_id, _JobReference): - job_ref = _JobReference(job_id, client.project, None) - self._properties = {"jobReference": job_ref._to_api_repr()} - - self._client = client - self._result_set = False - self._completion_lock = threading.Lock() - - @property - def job_id(self): - """str: ID of the job.""" - return _helpers._get_sub_prop(self._properties, ["jobReference", "jobId"]) - - @property - def parent_job_id(self): - """Return the ID of the parent job. - - See: - https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobStatistics.FIELDS.parent_job_id - - Returns: - Optional[str]: parent job id. - """ - return _helpers._get_sub_prop(self._properties, ["statistics", "parentJobId"]) - - @property - def script_statistics(self): - resource = _helpers._get_sub_prop( - self._properties, ["statistics", "scriptStatistics"] - ) - if resource is None: - return None - return ScriptStatistics(resource) - - @property - def num_child_jobs(self): - """The number of child jobs executed. - - See: - https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobStatistics.FIELDS.num_child_jobs - - Returns: - int - """ - count = _helpers._get_sub_prop(self._properties, ["statistics", "numChildJobs"]) - return int(count) if count is not None else 0 - - @property - def project(self): - """Project bound to the job. - - Returns: - str: the project (derived from the client). - """ - return _helpers._get_sub_prop(self._properties, ["jobReference", "projectId"]) - - @property - def location(self): - """str: Location where the job runs.""" - return _helpers._get_sub_prop(self._properties, ["jobReference", "location"]) - - def _require_client(self, client): - """Check client or verify over-ride. - - Args: - client (Optional[google.cloud.bigquery.client.Client]): - the client to use. If not passed, falls back to the - ``client`` stored on the current dataset. - - Returns: - google.cloud.bigquery.client.Client: - The client passed in or the currently bound client. - """ - if client is None: - client = self._client - return client - - @property - def job_type(self): - """Type of job. - - Returns: - str: one of 'load', 'copy', 'extract', 'query'. - """ - return self._JOB_TYPE - - @property - def path(self): - """URL path for the job's APIs. - - Returns: - str: the path based on project and job ID. - """ - return "/projects/%s/jobs/%s" % (self.project, self.job_id) - - @property - def labels(self): - """Dict[str, str]: Labels for the job.""" - return self._properties.setdefault("labels", {}) - - @property - def etag(self): - """ETag for the job resource. - - Returns: - Optional[str]: the ETag (None until set from the server). - """ - return self._properties.get("etag") - - @property - def self_link(self): - """URL for the job resource. - - Returns: - Optional[str]: the URL (None until set from the server). - """ - return self._properties.get("selfLink") - - @property - def user_email(self): - """E-mail address of user who submitted the job. - - Returns: - Optional[str]: the URL (None until set from the server). - """ - return self._properties.get("user_email") - - @property - def created(self): - """Datetime at which the job was created. - - Returns: - Optional[datetime.datetime]: - the creation time (None until set from the server). - """ - millis = _helpers._get_sub_prop( - self._properties, ["statistics", "creationTime"] - ) - if millis is not None: - return _helpers._datetime_from_microseconds(millis * 1000.0) - - @property - def started(self): - """Datetime at which the job was started. - - Returns: - Optional[datetime.datetime]: - the start time (None until set from the server). - """ - millis = _helpers._get_sub_prop(self._properties, ["statistics", "startTime"]) - if millis is not None: - return _helpers._datetime_from_microseconds(millis * 1000.0) - - @property - def ended(self): - """Datetime at which the job finished. - - Returns: - Optional[datetime.datetime]: - the end time (None until set from the server). - """ - millis = _helpers._get_sub_prop(self._properties, ["statistics", "endTime"]) - if millis is not None: - return _helpers._datetime_from_microseconds(millis * 1000.0) - - def _job_statistics(self): - """Helper for job-type specific statistics-based properties.""" - statistics = self._properties.get("statistics", {}) - return statistics.get(self._JOB_TYPE, {}) - - @property - def error_result(self): - """Error information about the job as a whole. - - Returns: - Optional[Mapping]: the error information (None until set from the server). - """ - status = self._properties.get("status") - if status is not None: - return status.get("errorResult") - - @property - def errors(self): - """Information about individual errors generated by the job. - - Returns: - Optional[List[Mapping]]: - the error information (None until set from the server). - """ - status = self._properties.get("status") - if status is not None: - return status.get("errors") - - @property - def state(self): - """Status of the job. - - Returns: - Optional[str]: - the state (None until set from the server). - """ - status = self._properties.get("status", {}) - return status.get("state") - - def _set_properties(self, api_response): - """Update properties from resource in body of ``api_response`` - - Args: - api_response (Dict): response returned from an API call. - """ - cleaned = api_response.copy() - - statistics = cleaned.get("statistics", {}) - if "creationTime" in statistics: - statistics["creationTime"] = float(statistics["creationTime"]) - if "startTime" in statistics: - statistics["startTime"] = float(statistics["startTime"]) - if "endTime" in statistics: - statistics["endTime"] = float(statistics["endTime"]) - - # Save configuration to keep reference same in self._configuration. - cleaned_config = cleaned.pop("configuration", {}) - configuration = self._properties.pop("configuration", {}) - self._properties.clear() - self._properties.update(cleaned) - self._properties["configuration"] = configuration - self._properties["configuration"].update(cleaned_config) - - # For Future interface - self._set_future_result() - - @classmethod - def _check_resource_config(cls, resource): - """Helper for :meth:`from_api_repr` - - Args: - resource (Dict): resource for the job. - - Raises: - KeyError: - If the resource has no identifier, or - is missing the appropriate configuration. - """ - if "jobReference" not in resource or "jobId" not in resource["jobReference"]: - raise KeyError( - "Resource lacks required identity information: " - '["jobReference"]["jobId"]' - ) - if ( - "configuration" not in resource - or cls._JOB_TYPE not in resource["configuration"] - ): - raise KeyError( - "Resource lacks required configuration: " - '["configuration"]["%s"]' % cls._JOB_TYPE - ) - - def to_api_repr(self): - """Generate a resource for the job.""" - return copy.deepcopy(self._properties) - - _build_resource = to_api_repr # backward-compatibility alias - - def _begin(self, client=None, retry=DEFAULT_RETRY, timeout=None): - """API call: begin the job via a POST request - - See - https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs/insert - - Args: - client (Optional[google.cloud.bigquery.client.Client]): - The client to use. If not passed, falls back to the ``client`` - associated with the job object or``NoneType`` - retry (Optional[google.api_core.retry.Retry]): - How to retry the RPC. - timeout (Optional[float]): - The number of seconds to wait for the underlying HTTP transport - before using ``retry``. - - Raises: - ValueError: - If the job has already begun. - """ - if self.state is not None: - raise ValueError("Job already begun.") - - client = self._require_client(client) - path = "/projects/%s/jobs" % (self.project,) - - # jobs.insert is idempotent because we ensure that every new - # job has an ID. - span_attributes = {"path": path} - api_response = client._call_api( - retry, - span_name="BigQuery.job.begin", - span_attributes=span_attributes, - job_ref=self, - method="POST", - path=path, - data=self.to_api_repr(), - timeout=timeout, - ) - self._set_properties(api_response) - - def exists(self, client=None, retry=DEFAULT_RETRY, timeout=None): - """API call: test for the existence of the job via a GET request - - See - https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs/get - - Args: - client (Optional[google.cloud.bigquery.client.Client]): - the client to use. If not passed, falls back to the - ``client`` stored on the current dataset. - - retry (Optional[google.api_core.retry.Retry]): How to retry the RPC. - timeout (Optional[float]): - The number of seconds to wait for the underlying HTTP transport - before using ``retry``. - - Returns: - bool: Boolean indicating existence of the job. - """ - client = self._require_client(client) - - extra_params = {"fields": "id"} - if self.location: - extra_params["location"] = self.location - - try: - span_attributes = {"path": self.path} - - client._call_api( - retry, - span_name="BigQuery.job.exists", - span_attributes=span_attributes, - job_ref=self, - method="GET", - path=self.path, - query_params=extra_params, - timeout=timeout, - ) - except NotFound: - return False - else: - return True - - def reload(self, client=None, retry=DEFAULT_RETRY, timeout=None): - """API call: refresh job properties via a GET request. - - See - https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs/get - - Args: - client (Optional[google.cloud.bigquery.client.Client]): - the client to use. If not passed, falls back to the - ``client`` stored on the current dataset. - - retry (Optional[google.api_core.retry.Retry]): How to retry the RPC. - timeout (Optional[float]): - The number of seconds to wait for the underlying HTTP transport - before using ``retry``. - """ - client = self._require_client(client) - - extra_params = {} - if self.location: - extra_params["location"] = self.location - span_attributes = {"path": self.path} - - api_response = client._call_api( - retry, - span_name="BigQuery.job.reload", - span_attributes=span_attributes, - job_ref=self, - method="GET", - path=self.path, - query_params=extra_params, - timeout=timeout, - ) - self._set_properties(api_response) - - def cancel(self, client=None, retry=DEFAULT_RETRY, timeout=None): - """API call: cancel job via a POST request - - See - https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs/cancel - - Args: - client (Optional[google.cloud.bigquery.client.Client]): - the client to use. If not passed, falls back to the - ``client`` stored on the current dataset. - retry (Optional[google.api_core.retry.Retry]): How to retry the RPC. - timeout (Optional[float]): - The number of seconds to wait for the underlying HTTP transport - before using ``retry`` - - Returns: - bool: Boolean indicating that the cancel request was sent. - """ - client = self._require_client(client) - - extra_params = {} - if self.location: - extra_params["location"] = self.location - - path = "{}/cancel".format(self.path) - span_attributes = {"path": path} - - api_response = client._call_api( - retry, - span_name="BigQuery.job.cancel", - span_attributes=span_attributes, - job_ref=self, - method="POST", - path=path, - query_params=extra_params, - timeout=timeout, - ) - self._set_properties(api_response["job"]) - # The Future interface requires that we return True if the *attempt* - # to cancel was successful. - return True - - # The following methods implement the PollingFuture interface. Note that - # the methods above are from the pre-Future interface and are left for - # compatibility. The only "overloaded" method is :meth:`cancel`, which - # satisfies both interfaces. - - def _set_future_result(self): - """Set the result or exception from the job if it is complete.""" - # This must be done in a lock to prevent the polling thread - # and main thread from both executing the completion logic - # at the same time. - with self._completion_lock: - # If the operation isn't complete or if the result has already been - # set, do not call set_result/set_exception again. - # Note: self._result_set is set to True in set_result and - # set_exception, in case those methods are invoked directly. - if not self.done(reload=False) or self._result_set: - return - - if self.error_result is not None: - exception = _error_result_to_exception(self.error_result) - self.set_exception(exception) - else: - self.set_result(self) - - def done(self, retry=DEFAULT_RETRY, timeout=None, reload=True): - """Checks if the job is complete. - - Args: - retry (Optional[google.api_core.retry.Retry]): How to retry the RPC. - timeout (Optional[float]): - The number of seconds to wait for the underlying HTTP transport - before using ``retry``. - reload (Optional[bool]): - If ``True``, make an API call to refresh the job state of - unfinished jobs before checking. Default ``True``. - - Returns: - bool: True if the job is complete, False otherwise. - """ - # Do not refresh is the state is already done, as the job will not - # change once complete. - if self.state != _DONE_STATE and reload: - self.reload(retry=retry, timeout=timeout) - return self.state == _DONE_STATE - - def result(self, retry=DEFAULT_RETRY, timeout=None): - """Start the job and wait for it to complete and get the result. - - Args: - retry (Optional[google.api_core.retry.Retry]): How to retry the RPC. - timeout (Optional[float]): - The number of seconds to wait for the underlying HTTP transport - before using ``retry``. - If multiple requests are made under the hood, ``timeout`` - applies to each individual request. - - Returns: - _AsyncJob: This instance. - - Raises: - google.cloud.exceptions.GoogleCloudError: - if the job failed. - concurrent.futures.TimeoutError: - if the job did not complete in the given timeout. - """ - if self.state is None: - self._begin(retry=retry, timeout=timeout) - - kwargs = {} if retry is DEFAULT_RETRY else {"retry": retry} - return super(_AsyncJob, self).result(timeout=timeout, **kwargs) - - def cancelled(self): - """Check if the job has been cancelled. - - This always returns False. It's not possible to check if a job was - cancelled in the API. This method is here to satisfy the interface - for :class:`google.api_core.future.Future`. - - Returns: - bool: False - """ - return ( - self.error_result is not None - and self.error_result.get("reason") == _STOPPED_REASON - ) - - -class _JobConfig(object): - """Abstract base class for job configuration objects. - - Args: - job_type (str): The key to use for the job configuration. - """ - - def __init__(self, job_type, **kwargs): - self._job_type = job_type - self._properties = {job_type: {}} - for prop, val in kwargs.items(): - setattr(self, prop, val) - - @property - def labels(self): - """Dict[str, str]: Labels for the job. - - This method always returns a dict. To change a job's labels, - modify the dict, then call ``Client.update_job``. To delete a - label, set its value to :data:`None` before updating. - - Raises: - ValueError: If ``value`` type is invalid. - """ - return self._properties.setdefault("labels", {}) - - @labels.setter - def labels(self, value): - if not isinstance(value, dict): - raise ValueError("Pass a dict") - self._properties["labels"] = value - - def _get_sub_prop(self, key, default=None): - """Get a value in the ``self._properties[self._job_type]`` dictionary. - - Most job properties are inside the dictionary related to the job type - (e.g. 'copy', 'extract', 'load', 'query'). Use this method to access - those properties:: - - self._get_sub_prop('destinationTable') - - This is equivalent to using the ``_helpers._get_sub_prop`` function:: - - _helpers._get_sub_prop( - self._properties, ['query', 'destinationTable']) - - Args: - key (str): - Key for the value to get in the - ``self._properties[self._job_type]`` dictionary. - default (Optional[object]): - Default value to return if the key is not found. - Defaults to :data:`None`. - - Returns: - object: The value if present or the default. - """ - return _helpers._get_sub_prop( - self._properties, [self._job_type, key], default=default - ) - - def _set_sub_prop(self, key, value): - """Set a value in the ``self._properties[self._job_type]`` dictionary. - - Most job properties are inside the dictionary related to the job type - (e.g. 'copy', 'extract', 'load', 'query'). Use this method to set - those properties:: - - self._set_sub_prop('useLegacySql', False) - - This is equivalent to using the ``_helper._set_sub_prop`` function:: - - _helper._set_sub_prop( - self._properties, ['query', 'useLegacySql'], False) - - Args: - key (str): - Key to set in the ``self._properties[self._job_type]`` - dictionary. - value (object): Value to set. - """ - _helpers._set_sub_prop(self._properties, [self._job_type, key], value) - - def _del_sub_prop(self, key): - """Remove ``key`` from the ``self._properties[self._job_type]`` dict. - - Most job properties are inside the dictionary related to the job type - (e.g. 'copy', 'extract', 'load', 'query'). Use this method to clear - those properties:: - - self._del_sub_prop('useLegacySql') - - This is equivalent to using the ``_helper._del_sub_prop`` function:: - - _helper._del_sub_prop( - self._properties, ['query', 'useLegacySql']) - - Args: - key (str): - Key to remove in the ``self._properties[self._job_type]`` - dictionary. - """ - _helpers._del_sub_prop(self._properties, [self._job_type, key]) - - def to_api_repr(self): - """Build an API representation of the job config. - - Returns: - Dict: A dictionary in the format used by the BigQuery API. - """ - return copy.deepcopy(self._properties) - - def _fill_from_default(self, default_job_config): - """Merge this job config with a default job config. - - The keys in this object take precedence over the keys in the default - config. The merge is done at the top-level as well as for keys one - level below the job type. - - Args: - default_job_config (google.cloud.bigquery.job._JobConfig): - The default job config that will be used to fill in self. - - Returns: - google.cloud.bigquery.job._JobConfig: A new (merged) job config. - """ - if self._job_type != default_job_config._job_type: - raise TypeError( - "attempted to merge two incompatible job types: " - + repr(self._job_type) - + ", " - + repr(default_job_config._job_type) - ) - - new_job_config = self.__class__() - - default_job_properties = copy.deepcopy(default_job_config._properties) - for key in self._properties: - if key != self._job_type: - default_job_properties[key] = self._properties[key] - - default_job_properties[self._job_type].update(self._properties[self._job_type]) - new_job_config._properties = default_job_properties - - return new_job_config - - @classmethod - def from_api_repr(cls, resource): - """Factory: construct a job configuration given its API representation - - Args: - resource (Dict): - A job configuration in the same representation as is returned - from the API. - - Returns: - google.cloud.bigquery.job._JobConfig: Configuration parsed from ``resource``. - """ - job_config = cls() - job_config._properties = resource - return job_config - - -class LoadJobConfig(_JobConfig): - """Configuration options for load jobs. - - All properties in this class are optional. Values which are :data:`None` -> - server defaults. Set properties on the constructed configuration by using - the property name as the name of a keyword argument. - """ - - def __init__(self, **kwargs): - super(LoadJobConfig, self).__init__("load", **kwargs) - - @property - def allow_jagged_rows(self): - """Optional[bool]: Allow missing trailing optional columns (CSV only). - - See: - https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationLoad.FIELDS.allow_jagged_rows - """ - return self._get_sub_prop("allowJaggedRows") - - @allow_jagged_rows.setter - def allow_jagged_rows(self, value): - self._set_sub_prop("allowJaggedRows", value) - - @property - def allow_quoted_newlines(self): - """Optional[bool]: Allow quoted data containing newline characters (CSV only). - - See: - https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationLoad.FIELDS.allow_quoted_newlines - """ - return self._get_sub_prop("allowQuotedNewlines") - - @allow_quoted_newlines.setter - def allow_quoted_newlines(self, value): - self._set_sub_prop("allowQuotedNewlines", value) - - @property - def autodetect(self): - """Optional[bool]: Automatically infer the schema from a sample of the data. - - See: - https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationLoad.FIELDS.autodetect - """ - return self._get_sub_prop("autodetect") - - @autodetect.setter - def autodetect(self, value): - self._set_sub_prop("autodetect", value) - - @property - def clustering_fields(self): - """Optional[List[str]]: Fields defining clustering for the table - - (Defaults to :data:`None`). - - Clustering fields are immutable after table creation. - - .. note:: - - BigQuery supports clustering for both partitioned and - non-partitioned tables. - """ - prop = self._get_sub_prop("clustering") - if prop is not None: - return list(prop.get("fields", ())) - - @clustering_fields.setter - def clustering_fields(self, value): - """Optional[List[str]]: Fields defining clustering for the table - - (Defaults to :data:`None`). - """ - if value is not None: - self._set_sub_prop("clustering", {"fields": value}) - else: - self._del_sub_prop("clustering") - - @property - def create_disposition(self): - """Optional[google.cloud.bigquery.job.CreateDisposition]: Specifies behavior - for creating tables. - - See: - https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationLoad.FIELDS.create_disposition - """ - return self._get_sub_prop("createDisposition") - - @create_disposition.setter - def create_disposition(self, value): - self._set_sub_prop("createDisposition", value) - - @property - def destination_encryption_configuration(self): - """Optional[google.cloud.bigquery.encryption_configuration.EncryptionConfiguration]: Custom - encryption configuration for the destination table. - - Custom encryption configuration (e.g., Cloud KMS keys) or :data:`None` - if using default encryption. - - See: - https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationLoad.FIELDS.destination_encryption_configuration - """ - prop = self._get_sub_prop("destinationEncryptionConfiguration") - if prop is not None: - prop = EncryptionConfiguration.from_api_repr(prop) - return prop - - @destination_encryption_configuration.setter - def destination_encryption_configuration(self, value): - api_repr = value - if value is not None: - api_repr = value.to_api_repr() - self._set_sub_prop("destinationEncryptionConfiguration", api_repr) - else: - self._del_sub_prop("destinationEncryptionConfiguration") - - @property - def destination_table_description(self): - """Optional[str]: Name given to destination table. - - See: - https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#DestinationTableProperties.FIELDS.description - """ - prop = self._get_sub_prop("destinationTableProperties") - if prop is not None: - return prop["description"] - - @destination_table_description.setter - def destination_table_description(self, value): - keys = [self._job_type, "destinationTableProperties", "description"] - if value is not None: - _helpers._set_sub_prop(self._properties, keys, value) - else: - _helpers._del_sub_prop(self._properties, keys) - - @property - def destination_table_friendly_name(self): - """Optional[str]: Name given to destination table. - - See: - https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#DestinationTableProperties.FIELDS.friendly_name - """ - prop = self._get_sub_prop("destinationTableProperties") - if prop is not None: - return prop["friendlyName"] - - @destination_table_friendly_name.setter - def destination_table_friendly_name(self, value): - keys = [self._job_type, "destinationTableProperties", "friendlyName"] - if value is not None: - _helpers._set_sub_prop(self._properties, keys, value) - else: - _helpers._del_sub_prop(self._properties, keys) - - @property - def encoding(self): - """Optional[google.cloud.bigquery.job.Encoding]: The character encoding of the - data. - - See: - https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationLoad.FIELDS.encoding - """ - return self._get_sub_prop("encoding") - - @encoding.setter - def encoding(self, value): - self._set_sub_prop("encoding", value) - - @property - def field_delimiter(self): - """Optional[str]: The separator for fields in a CSV file. - - See: - https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationLoad.FIELDS.field_delimiter - """ - return self._get_sub_prop("fieldDelimiter") - - @field_delimiter.setter - def field_delimiter(self, value): - self._set_sub_prop("fieldDelimiter", value) - - @property - def hive_partitioning(self): - """Optional[:class:`~.external_config.HivePartitioningOptions`]: [Beta] When set, \ - it configures hive partitioning support. - - .. note:: - **Experimental**. This feature is experimental and might change or - have limited support. - - See: - https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationLoad.FIELDS.hive_partitioning_options - """ - prop = self._get_sub_prop("hivePartitioningOptions") - if prop is None: - return None - return HivePartitioningOptions.from_api_repr(prop) - - @hive_partitioning.setter - def hive_partitioning(self, value): - if value is not None: - if isinstance(value, HivePartitioningOptions): - value = value.to_api_repr() - else: - raise TypeError("Expected a HivePartitioningOptions instance or None.") - - self._set_sub_prop("hivePartitioningOptions", value) - - @property - def ignore_unknown_values(self): - """Optional[bool]: Ignore extra values not represented in the table schema. - - See: - https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationLoad.FIELDS.ignore_unknown_values - """ - return self._get_sub_prop("ignoreUnknownValues") - - @ignore_unknown_values.setter - def ignore_unknown_values(self, value): - self._set_sub_prop("ignoreUnknownValues", value) - - @property - def max_bad_records(self): - """Optional[int]: Number of invalid rows to ignore. - - See: - https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationLoad.FIELDS.max_bad_records - """ - return _helpers._int_or_none(self._get_sub_prop("maxBadRecords")) - - @max_bad_records.setter - def max_bad_records(self, value): - self._set_sub_prop("maxBadRecords", value) - - @property - def null_marker(self): - """Optional[str]: Represents a null value (CSV only). - - See: - https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationLoad.FIELDS.null_marker - """ - return self._get_sub_prop("nullMarker") - - @null_marker.setter - def null_marker(self, value): - self._set_sub_prop("nullMarker", value) - - @property - def quote_character(self): - """Optional[str]: Character used to quote data sections (CSV only). - - See: - https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationLoad.FIELDS.quote - """ - return self._get_sub_prop("quote") - - @quote_character.setter - def quote_character(self, value): - self._set_sub_prop("quote", value) - - @property - def range_partitioning(self): - """Optional[google.cloud.bigquery.table.RangePartitioning]: - Configures range-based partitioning for destination table. - - .. note:: - **Beta**. The integer range partitioning feature is in a - pre-release state and might change or have limited support. - - Only specify at most one of - :attr:`~google.cloud.bigquery.job.LoadJobConfig.time_partitioning` or - :attr:`~google.cloud.bigquery.job.LoadJobConfig.range_partitioning`. - - Raises: - ValueError: - If the value is not - :class:`~google.cloud.bigquery.table.RangePartitioning` or - :data:`None`. - """ - resource = self._get_sub_prop("rangePartitioning") - if resource is not None: - return RangePartitioning(_properties=resource) - - @range_partitioning.setter - def range_partitioning(self, value): - resource = value - if isinstance(value, RangePartitioning): - resource = value._properties - elif value is not None: - raise ValueError( - "Expected value to be RangePartitioning or None, got {}.".format(value) - ) - self._set_sub_prop("rangePartitioning", resource) - - @property - def schema(self): - """Optional[Sequence[Union[ \ - :class:`~google.cloud.bigquery.schema.SchemaField`, \ - Mapping[str, Any] \ - ]]]: Schema of the destination table. - - See: - https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationLoad.FIELDS.schema - """ - schema = _helpers._get_sub_prop(self._properties, ["load", "schema", "fields"]) - if schema is None: - return - return [SchemaField.from_api_repr(field) for field in schema] - - @schema.setter - def schema(self, value): - if value is None: - self._del_sub_prop("schema") - return - - value = _to_schema_fields(value) - - _helpers._set_sub_prop( - self._properties, - ["load", "schema", "fields"], - [field.to_api_repr() for field in value], - ) - - @property - def schema_update_options(self): - """Optional[List[google.cloud.bigquery.job.SchemaUpdateOption]]: Specifies - updates to the destination table schema to allow as a side effect of - the load job. - """ - return self._get_sub_prop("schemaUpdateOptions") - - @schema_update_options.setter - def schema_update_options(self, values): - self._set_sub_prop("schemaUpdateOptions", values) - - @property - def skip_leading_rows(self): - """Optional[int]: Number of rows to skip when reading data (CSV only). - - See: - https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationLoad.FIELDS.skip_leading_rows - """ - return _helpers._int_or_none(self._get_sub_prop("skipLeadingRows")) - - @skip_leading_rows.setter - def skip_leading_rows(self, value): - self._set_sub_prop("skipLeadingRows", str(value)) - - @property - def source_format(self): - """Optional[google.cloud.bigquery.job.SourceFormat]: File format of the data. - - See: - https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationLoad.FIELDS.source_format - """ - return self._get_sub_prop("sourceFormat") - - @source_format.setter - def source_format(self, value): - self._set_sub_prop("sourceFormat", value) - - @property - def time_partitioning(self): - """Optional[google.cloud.bigquery.table.TimePartitioning]: Specifies time-based - partitioning for the destination table. - - Only specify at most one of - :attr:`~google.cloud.bigquery.job.LoadJobConfig.time_partitioning` or - :attr:`~google.cloud.bigquery.job.LoadJobConfig.range_partitioning`. - """ - prop = self._get_sub_prop("timePartitioning") - if prop is not None: - prop = TimePartitioning.from_api_repr(prop) - return prop - - @time_partitioning.setter - def time_partitioning(self, value): - api_repr = value - if value is not None: - api_repr = value.to_api_repr() - self._set_sub_prop("timePartitioning", api_repr) - else: - self._del_sub_prop("timePartitioning") - - @property - def use_avro_logical_types(self): - """Optional[bool]: For loads of Avro data, governs whether Avro logical types are - converted to their corresponding BigQuery types (e.g. TIMESTAMP) rather than - raw types (e.g. INTEGER). - """ - return self._get_sub_prop("useAvroLogicalTypes") - - @use_avro_logical_types.setter - def use_avro_logical_types(self, value): - self._set_sub_prop("useAvroLogicalTypes", bool(value)) - - @property - def write_disposition(self): - """Optional[google.cloud.bigquery.job.WriteDisposition]: Action that occurs if - the destination table already exists. - - See: - https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationLoad.FIELDS.write_disposition - """ - return self._get_sub_prop("writeDisposition") - - @write_disposition.setter - def write_disposition(self, value): - self._set_sub_prop("writeDisposition", value) - - -class LoadJob(_AsyncJob): - """Asynchronous job for loading data into a table. - - Can load from Google Cloud Storage URIs or from a file. - - Args: - job_id (str): the job's ID - - source_uris (Optional[Sequence[str]]): - URIs of one or more data files to be loaded. See - https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationLoad.FIELDS.source_uris - for supported URI formats. Pass None for jobs that load from a file. - - destination (google.cloud.bigquery.table.TableReference): reference to table into which data is to be loaded. - - client (google.cloud.bigquery.client.Client): - A client which holds credentials and project configuration - for the dataset (which requires a project). - """ - - _JOB_TYPE = "load" - - def __init__(self, job_id, source_uris, destination, client, job_config=None): - super(LoadJob, self).__init__(job_id, client) - - if not job_config: - job_config = LoadJobConfig() - - self._configuration = job_config - self._properties["configuration"] = job_config._properties - - if source_uris is not None: - _helpers._set_sub_prop( - self._properties, ["configuration", "load", "sourceUris"], source_uris - ) - - if destination is not None: - _helpers._set_sub_prop( - self._properties, - ["configuration", "load", "destinationTable"], - destination.to_api_repr(), - ) - - @property - def destination(self): - """google.cloud.bigquery.table.TableReference: table where loaded rows are written - - See: - https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationLoad.FIELDS.destination_table - """ - dest_config = _helpers._get_sub_prop( - self._properties, ["configuration", "load", "destinationTable"] - ) - return TableReference.from_api_repr(dest_config) - - @property - def source_uris(self): - """Optional[Sequence[str]]: URIs of data files to be loaded. See - https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationLoad.FIELDS.source_uris - for supported URI formats. None for jobs that load from a file. - """ - return _helpers._get_sub_prop( - self._properties, ["configuration", "load", "sourceUris"] - ) - - @property - def allow_jagged_rows(self): - """See - :attr:`google.cloud.bigquery.job.LoadJobConfig.allow_jagged_rows`. - """ - return self._configuration.allow_jagged_rows - - @property - def allow_quoted_newlines(self): - """See - :attr:`google.cloud.bigquery.job.LoadJobConfig.allow_quoted_newlines`. - """ - return self._configuration.allow_quoted_newlines - - @property - def autodetect(self): - """See - :attr:`google.cloud.bigquery.job.LoadJobConfig.autodetect`. - """ - return self._configuration.autodetect - - @property - def create_disposition(self): - """See - :attr:`google.cloud.bigquery.job.LoadJobConfig.create_disposition`. - """ - return self._configuration.create_disposition - - @property - def encoding(self): - """See - :attr:`google.cloud.bigquery.job.LoadJobConfig.encoding`. - """ - return self._configuration.encoding - - @property - def field_delimiter(self): - """See - :attr:`google.cloud.bigquery.job.LoadJobConfig.field_delimiter`. - """ - return self._configuration.field_delimiter - - @property - def ignore_unknown_values(self): - """See - :attr:`google.cloud.bigquery.job.LoadJobConfig.ignore_unknown_values`. - """ - return self._configuration.ignore_unknown_values - - @property - def max_bad_records(self): - """See - :attr:`google.cloud.bigquery.job.LoadJobConfig.max_bad_records`. - """ - return self._configuration.max_bad_records - - @property - def null_marker(self): - """See - :attr:`google.cloud.bigquery.job.LoadJobConfig.null_marker`. - """ - return self._configuration.null_marker - - @property - def quote_character(self): - """See - :attr:`google.cloud.bigquery.job.LoadJobConfig.quote_character`. - """ - return self._configuration.quote_character - - @property - def skip_leading_rows(self): - """See - :attr:`google.cloud.bigquery.job.LoadJobConfig.skip_leading_rows`. - """ - return self._configuration.skip_leading_rows - - @property - def source_format(self): - """See - :attr:`google.cloud.bigquery.job.LoadJobConfig.source_format`. - """ - return self._configuration.source_format - - @property - def write_disposition(self): - """See - :attr:`google.cloud.bigquery.job.LoadJobConfig.write_disposition`. - """ - return self._configuration.write_disposition - - @property - def schema(self): - """See - :attr:`google.cloud.bigquery.job.LoadJobConfig.schema`. - """ - return self._configuration.schema - - @property - def destination_encryption_configuration(self): - """google.cloud.bigquery.encryption_configuration.EncryptionConfiguration: Custom - encryption configuration for the destination table. - - Custom encryption configuration (e.g., Cloud KMS keys) - or :data:`None` if using default encryption. - - See - :attr:`google.cloud.bigquery.job.LoadJobConfig.destination_encryption_configuration`. - """ - return self._configuration.destination_encryption_configuration - - @property - def destination_table_description(self): - """Optional[str] name given to destination table. - - See: - https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#DestinationTableProperties.FIELDS.description - """ - return self._configuration.destination_table_description - - @property - def destination_table_friendly_name(self): - """Optional[str] name given to destination table. - - See: - https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#DestinationTableProperties.FIELDS.friendly_name - """ - return self._configuration.destination_table_friendly_name - - @property - def range_partitioning(self): - """See - :attr:`google.cloud.bigquery.job.LoadJobConfig.range_partitioning`. - """ - return self._configuration.range_partitioning - - @property - def time_partitioning(self): - """See - :attr:`google.cloud.bigquery.job.LoadJobConfig.time_partitioning`. - """ - return self._configuration.time_partitioning - - @property - def use_avro_logical_types(self): - """See - :attr:`google.cloud.bigquery.job.LoadJobConfig.use_avro_logical_types`. - """ - return self._configuration.use_avro_logical_types - - @property - def clustering_fields(self): - """See - :attr:`google.cloud.bigquery.job.LoadJobConfig.clustering_fields`. - """ - return self._configuration.clustering_fields - - @property - def schema_update_options(self): - """See - :attr:`google.cloud.bigquery.job.LoadJobConfig.schema_update_options`. - """ - return self._configuration.schema_update_options - - @property - def input_file_bytes(self): - """Count of bytes loaded from source files. - - Returns: - Optional[int]: the count (None until set from the server). - - Raises: - ValueError: for invalid value types. - """ - return _helpers._int_or_none( - _helpers._get_sub_prop( - self._properties, ["statistics", "load", "inputFileBytes"] - ) - ) - - @property - def input_files(self): - """Count of source files. - - Returns: - Optional[int]: the count (None until set from the server). - """ - return _helpers._int_or_none( - _helpers._get_sub_prop( - self._properties, ["statistics", "load", "inputFiles"] - ) - ) - - @property - def output_bytes(self): - """Count of bytes saved to destination table. - - Returns: - Optional[int]: the count (None until set from the server). - """ - return _helpers._int_or_none( - _helpers._get_sub_prop( - self._properties, ["statistics", "load", "outputBytes"] - ) - ) - - @property - def output_rows(self): - """Count of rows saved to destination table. - - Returns: - Optional[int]: the count (None until set from the server). - """ - return _helpers._int_or_none( - _helpers._get_sub_prop( - self._properties, ["statistics", "load", "outputRows"] - ) - ) - - def to_api_repr(self): - """Generate a resource for :meth:`_begin`.""" - # Exclude statistics, if set. - return { - "jobReference": self._properties["jobReference"], - "configuration": self._properties["configuration"], - } - - @classmethod - def from_api_repr(cls, resource, client): - """Factory: construct a job given its API representation - - .. note: - - This method assumes that the project found in the resource matches - the client's project. - - Args: - resource (Dict): dataset job representation returned from the API - - client (google.cloud.bigquery.client.Client): - Client which holds credentials and project - configuration for the dataset. - - Returns: - google.cloud.bigquery.job.LoadJob: Job parsed from ``resource``. - """ - cls._check_resource_config(resource) - job_ref = _JobReference._from_api_repr(resource["jobReference"]) - job = cls(job_ref, None, None, client) - job._set_properties(resource) - return job - - -class CopyJobConfig(_JobConfig): - """Configuration options for copy jobs. - - All properties in this class are optional. Values which are :data:`None` -> - server defaults. Set properties on the constructed configuration by using - the property name as the name of a keyword argument. - """ - - def __init__(self, **kwargs): - super(CopyJobConfig, self).__init__("copy", **kwargs) - - @property - def create_disposition(self): - """google.cloud.bigquery.job.CreateDisposition: Specifies behavior - for creating tables. - - See - https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationTableCopy.FIELDS.create_disposition - """ - return self._get_sub_prop("createDisposition") - - @create_disposition.setter - def create_disposition(self, value): - self._set_sub_prop("createDisposition", value) - - @property - def write_disposition(self): - """google.cloud.bigquery.job.WriteDisposition: Action that occurs if - the destination table already exists. - - See - https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationTableCopy.FIELDS.write_disposition - """ - return self._get_sub_prop("writeDisposition") - - @write_disposition.setter - def write_disposition(self, value): - self._set_sub_prop("writeDisposition", value) - - @property - def destination_encryption_configuration(self): - """google.cloud.bigquery.encryption_configuration.EncryptionConfiguration: Custom - encryption configuration for the destination table. - - Custom encryption configuration (e.g., Cloud KMS keys) or :data:`None` - if using default encryption. - - See - https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationTableCopy.FIELDS.destination_encryption_configuration - """ - prop = self._get_sub_prop("destinationEncryptionConfiguration") - if prop is not None: - prop = EncryptionConfiguration.from_api_repr(prop) - return prop - - @destination_encryption_configuration.setter - def destination_encryption_configuration(self, value): - api_repr = value - if value is not None: - api_repr = value.to_api_repr() - self._set_sub_prop("destinationEncryptionConfiguration", api_repr) - - -class CopyJob(_AsyncJob): - """Asynchronous job: copy data into a table from other tables. - - Args: - job_id (str): the job's ID, within the project belonging to ``client``. - - sources (List[google.cloud.bigquery.table.TableReference]): Table from which data is to be loaded. - - destination (google.cloud.bigquery.table.TableReference): Table into which data is to be loaded. - - client (google.cloud.bigquery.client.Client): - A client which holds credentials and project configuration - for the dataset (which requires a project). - - job_config (Optional[google.cloud.bigquery.job.CopyJobConfig]): - Extra configuration options for the copy job. - """ - - _JOB_TYPE = "copy" - - def __init__(self, job_id, sources, destination, client, job_config=None): - super(CopyJob, self).__init__(job_id, client) - - if not job_config: - job_config = CopyJobConfig() - - self._configuration = job_config - self._properties["configuration"] = job_config._properties - - if destination: - _helpers._set_sub_prop( - self._properties, - ["configuration", "copy", "destinationTable"], - destination.to_api_repr(), - ) - - if sources: - source_resources = [source.to_api_repr() for source in sources] - _helpers._set_sub_prop( - self._properties, - ["configuration", "copy", "sourceTables"], - source_resources, - ) - - @property - def destination(self): - """google.cloud.bigquery.table.TableReference: Table into which data - is to be loaded. - """ - return TableReference.from_api_repr( - _helpers._get_sub_prop( - self._properties, ["configuration", "copy", "destinationTable"] - ) - ) - - @property - def sources(self): - """List[google.cloud.bigquery.table.TableReference]): Table(s) from - which data is to be loaded. - """ - source_configs = _helpers._get_sub_prop( - self._properties, ["configuration", "copy", "sourceTables"] - ) - if source_configs is None: - single = _helpers._get_sub_prop( - self._properties, ["configuration", "copy", "sourceTable"] - ) - if single is None: - raise KeyError("Resource missing 'sourceTables' / 'sourceTable'") - source_configs = [single] - - sources = [] - for source_config in source_configs: - table_ref = TableReference.from_api_repr(source_config) - sources.append(table_ref) - return sources - - @property - def create_disposition(self): - """See - :attr:`google.cloud.bigquery.job.CopyJobConfig.create_disposition`. - """ - return self._configuration.create_disposition - - @property - def write_disposition(self): - """See - :attr:`google.cloud.bigquery.job.CopyJobConfig.write_disposition`. - """ - return self._configuration.write_disposition - - @property - def destination_encryption_configuration(self): - """google.cloud.bigquery.encryption_configuration.EncryptionConfiguration: Custom - encryption configuration for the destination table. - - Custom encryption configuration (e.g., Cloud KMS keys) or :data:`None` - if using default encryption. - - See - :attr:`google.cloud.bigquery.job.CopyJobConfig.destination_encryption_configuration`. - """ - return self._configuration.destination_encryption_configuration - - def to_api_repr(self): - """Generate a resource for :meth:`_begin`.""" - # Exclude statistics, if set. - return { - "jobReference": self._properties["jobReference"], - "configuration": self._properties["configuration"], - } - - @classmethod - def from_api_repr(cls, resource, client): - """Factory: construct a job given its API representation - - .. note: - - This method assumes that the project found in the resource matches - the client's project. - - Args: - resource (Dict): dataset job representation returned from the API - client (google.cloud.bigquery.client.Client): - Client which holds credentials and project - configuration for the dataset. - - Returns: - google.cloud.bigquery.job.CopyJob: Job parsed from ``resource``. - """ - cls._check_resource_config(resource) - job_ref = _JobReference._from_api_repr(resource["jobReference"]) - job = cls(job_ref, None, None, client=client) - job._set_properties(resource) - return job - - -class ExtractJobConfig(_JobConfig): - """Configuration options for extract jobs. - - All properties in this class are optional. Values which are :data:`None` -> - server defaults. Set properties on the constructed configuration by using - the property name as the name of a keyword argument. - """ - - def __init__(self, **kwargs): - super(ExtractJobConfig, self).__init__("extract", **kwargs) - - @property - def compression(self): - """google.cloud.bigquery.job.Compression: Compression type to use for - exported files. - - See - https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationExtract.FIELDS.compression - """ - return self._get_sub_prop("compression") - - @compression.setter - def compression(self, value): - self._set_sub_prop("compression", value) - - @property - def destination_format(self): - """google.cloud.bigquery.job.DestinationFormat: Exported file format. - - See - https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationExtract.FIELDS.destination_format - """ - return self._get_sub_prop("destinationFormat") - - @destination_format.setter - def destination_format(self, value): - self._set_sub_prop("destinationFormat", value) - - @property - def field_delimiter(self): - """str: Delimiter to use between fields in the exported data. - - See - https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationExtract.FIELDS.field_delimiter - """ - return self._get_sub_prop("fieldDelimiter") - - @field_delimiter.setter - def field_delimiter(self, value): - self._set_sub_prop("fieldDelimiter", value) - - @property - def print_header(self): - """bool: Print a header row in the exported data. - - See - https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationExtract.FIELDS.print_header - """ - return self._get_sub_prop("printHeader") - - @print_header.setter - def print_header(self, value): - self._set_sub_prop("printHeader", value) - - @property - def use_avro_logical_types(self): - """bool: For loads of Avro data, governs whether Avro logical types are - converted to their corresponding BigQuery types (e.g. TIMESTAMP) rather than - raw types (e.g. INTEGER). - """ - return self._get_sub_prop("useAvroLogicalTypes") - - @use_avro_logical_types.setter - def use_avro_logical_types(self, value): - self._set_sub_prop("useAvroLogicalTypes", bool(value)) - - -class ExtractJob(_AsyncJob): - """Asynchronous job: extract data from a table into Cloud Storage. - - Args: - job_id (str): the job's ID. - - source (Union[ \ - google.cloud.bigquery.table.TableReference, \ - google.cloud.bigquery.model.ModelReference \ - ]): - Table or Model from which data is to be loaded or extracted. - - destination_uris (List[str]): - URIs describing where the extracted data will be written in Cloud - Storage, using the format ``gs:///``. - - client (google.cloud.bigquery.client.Client): - A client which holds credentials and project configuration. - - job_config (Optional[google.cloud.bigquery.job.ExtractJobConfig]): - Extra configuration options for the extract job. - """ - - _JOB_TYPE = "extract" - - def __init__(self, job_id, source, destination_uris, client, job_config=None): - super(ExtractJob, self).__init__(job_id, client) - - if job_config is None: - job_config = ExtractJobConfig() - - self._properties["configuration"] = job_config._properties - self._configuration = job_config - - if source: - source_ref = {"projectId": source.project, "datasetId": source.dataset_id} - - if isinstance(source, (Table, TableListItem, TableReference)): - source_ref["tableId"] = source.table_id - source_key = "sourceTable" - else: - source_ref["modelId"] = source.model_id - source_key = "sourceModel" - - _helpers._set_sub_prop( - self._properties, ["configuration", "extract", source_key], source_ref - ) - - if destination_uris: - _helpers._set_sub_prop( - self._properties, - ["configuration", "extract", "destinationUris"], - destination_uris, - ) - - @property - def source(self): - """Union[ \ - google.cloud.bigquery.table.TableReference, \ - google.cloud.bigquery.model.ModelReference \ - ]: Table or Model from which data is to be loaded or extracted. - """ - source_config = _helpers._get_sub_prop( - self._properties, ["configuration", "extract", "sourceTable"] - ) - if source_config: - return TableReference.from_api_repr(source_config) - else: - source_config = _helpers._get_sub_prop( - self._properties, ["configuration", "extract", "sourceModel"] - ) - return ModelReference.from_api_repr(source_config) - - @property - def destination_uris(self): - """List[str]: URIs describing where the extracted data will be - written in Cloud Storage, using the format - ``gs:///``. - """ - return _helpers._get_sub_prop( - self._properties, ["configuration", "extract", "destinationUris"] - ) - - @property - def compression(self): - """See - :attr:`google.cloud.bigquery.job.ExtractJobConfig.compression`. - """ - return self._configuration.compression - - @property - def destination_format(self): - """See - :attr:`google.cloud.bigquery.job.ExtractJobConfig.destination_format`. - """ - return self._configuration.destination_format - - @property - def field_delimiter(self): - """See - :attr:`google.cloud.bigquery.job.ExtractJobConfig.field_delimiter`. - """ - return self._configuration.field_delimiter - - @property - def print_header(self): - """See - :attr:`google.cloud.bigquery.job.ExtractJobConfig.print_header`. - """ - return self._configuration.print_header - - @property - def destination_uri_file_counts(self): - """Return file counts from job statistics, if present. - - See: - https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobStatistics4.FIELDS.destination_uri_file_counts - - Returns: - List[int]: - A list of integer counts, each representing the number of files - per destination URI or URI pattern specified in the extract - configuration. These values will be in the same order as the URIs - specified in the 'destinationUris' field. Returns None if job is - not yet complete. - """ - counts = self._job_statistics().get("destinationUriFileCounts") - if counts is not None: - return [int(count) for count in counts] - return None - - def to_api_repr(self): - """Generate a resource for :meth:`_begin`.""" - # Exclude statistics, if set. - return { - "jobReference": self._properties["jobReference"], - "configuration": self._properties["configuration"], - } - - @classmethod - def from_api_repr(cls, resource, client): - """Factory: construct a job given its API representation - - .. note: - - This method assumes that the project found in the resource matches - the client's project. - - Args: - resource (Dict): dataset job representation returned from the API - - client (google.cloud.bigquery.client.Client): - Client which holds credentials and project - configuration for the dataset. - - Returns: - google.cloud.bigquery.job.ExtractJob: Job parsed from ``resource``. - """ - cls._check_resource_config(resource) - job_ref = _JobReference._from_api_repr(resource["jobReference"]) - job = cls(job_ref, None, None, client=client) - job._set_properties(resource) - return job - - -def _from_api_repr_query_parameters(resource): - return [_query_param_from_api_repr(mapping) for mapping in resource] - - -def _to_api_repr_query_parameters(value): - return [query_parameter.to_api_repr() for query_parameter in value] - - -def _from_api_repr_udf_resources(resource): - udf_resources = [] - for udf_mapping in resource: - for udf_type, udf_value in udf_mapping.items(): - udf_resources.append(UDFResource(udf_type, udf_value)) - return udf_resources - - -def _to_api_repr_udf_resources(value): - return [{udf_resource.udf_type: udf_resource.value} for udf_resource in value] - - -def _from_api_repr_table_defs(resource): - return {k: ExternalConfig.from_api_repr(v) for k, v in resource.items()} - - -def _to_api_repr_table_defs(value): - return {k: ExternalConfig.to_api_repr(v) for k, v in value.items()} - - -class QueryJobConfig(_JobConfig): - """Configuration options for query jobs. - - All properties in this class are optional. Values which are :data:`None` -> - server defaults. Set properties on the constructed configuration by using - the property name as the name of a keyword argument. - """ - - def __init__(self, **kwargs): - super(QueryJobConfig, self).__init__("query", **kwargs) - - @property - def destination_encryption_configuration(self): - """google.cloud.bigquery.encryption_configuration.EncryptionConfiguration: Custom - encryption configuration for the destination table. - - Custom encryption configuration (e.g., Cloud KMS keys) or :data:`None` - if using default encryption. - - See - https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationQuery.FIELDS.destination_encryption_configuration - """ - prop = self._get_sub_prop("destinationEncryptionConfiguration") - if prop is not None: - prop = EncryptionConfiguration.from_api_repr(prop) - return prop - - @destination_encryption_configuration.setter - def destination_encryption_configuration(self, value): - api_repr = value - if value is not None: - api_repr = value.to_api_repr() - self._set_sub_prop("destinationEncryptionConfiguration", api_repr) - - @property - def allow_large_results(self): - """bool: Allow large query results tables (legacy SQL, only) - - See - https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationQuery.FIELDS.allow_large_results - """ - return self._get_sub_prop("allowLargeResults") - - @allow_large_results.setter - def allow_large_results(self, value): - self._set_sub_prop("allowLargeResults", value) - - @property - def create_disposition(self): - """google.cloud.bigquery.job.CreateDisposition: Specifies behavior - for creating tables. - - See - https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationQuery.FIELDS.create_disposition - """ - return self._get_sub_prop("createDisposition") - - @create_disposition.setter - def create_disposition(self, value): - self._set_sub_prop("createDisposition", value) - - @property - def default_dataset(self): - """google.cloud.bigquery.dataset.DatasetReference: the default dataset - to use for unqualified table names in the query or :data:`None` if not - set. - - The ``default_dataset`` setter accepts: - - - a :class:`~google.cloud.bigquery.dataset.Dataset`, or - - a :class:`~google.cloud.bigquery.dataset.DatasetReference`, or - - a :class:`str` of the fully-qualified dataset ID in standard SQL - format. The value must included a project ID and dataset ID - separated by ``.``. For example: ``your-project.your_dataset``. - - See - https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationQuery.FIELDS.default_dataset - """ - prop = self._get_sub_prop("defaultDataset") - if prop is not None: - prop = DatasetReference.from_api_repr(prop) - return prop - - @default_dataset.setter - def default_dataset(self, value): - if value is None: - self._set_sub_prop("defaultDataset", None) - return - - if isinstance(value, six.string_types): - value = DatasetReference.from_string(value) - - if isinstance(value, (Dataset, DatasetListItem)): - value = value.reference - - resource = value.to_api_repr() - self._set_sub_prop("defaultDataset", resource) - - @property - def destination(self): - """google.cloud.bigquery.table.TableReference: table where results are - written or :data:`None` if not set. - - The ``destination`` setter accepts: - - - a :class:`~google.cloud.bigquery.table.Table`, or - - a :class:`~google.cloud.bigquery.table.TableReference`, or - - a :class:`str` of the fully-qualified table ID in standard SQL - format. The value must included a project ID, dataset ID, and table - ID, each separated by ``.``. For example: - ``your-project.your_dataset.your_table``. - - See - https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationQuery.FIELDS.destination_table - """ - prop = self._get_sub_prop("destinationTable") - if prop is not None: - prop = TableReference.from_api_repr(prop) - return prop - - @destination.setter - def destination(self, value): - if value is None: - self._set_sub_prop("destinationTable", None) - return - - value = _table_arg_to_table_ref(value) - resource = value.to_api_repr() - self._set_sub_prop("destinationTable", resource) - - @property - def dry_run(self): - """bool: :data:`True` if this query should be a dry run to estimate - costs. - - See - https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfiguration.FIELDS.dry_run - """ - return self._properties.get("dryRun") - - @dry_run.setter - def dry_run(self, value): - self._properties["dryRun"] = value - - @property - def flatten_results(self): - """bool: Flatten nested/repeated fields in results. (Legacy SQL only) - - See - https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationQuery.FIELDS.flatten_results - """ - return self._get_sub_prop("flattenResults") - - @flatten_results.setter - def flatten_results(self, value): - self._set_sub_prop("flattenResults", value) - - @property - def maximum_billing_tier(self): - """int: Deprecated. Changes the billing tier to allow high-compute - queries. - - See - https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationQuery.FIELDS.maximum_billing_tier - """ - return self._get_sub_prop("maximumBillingTier") - - @maximum_billing_tier.setter - def maximum_billing_tier(self, value): - self._set_sub_prop("maximumBillingTier", value) - - @property - def maximum_bytes_billed(self): - """int: Maximum bytes to be billed for this job or :data:`None` if not set. - - See - https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationQuery.FIELDS.maximum_bytes_billed - """ - return _helpers._int_or_none(self._get_sub_prop("maximumBytesBilled")) - - @maximum_bytes_billed.setter - def maximum_bytes_billed(self, value): - self._set_sub_prop("maximumBytesBilled", str(value)) - - @property - def priority(self): - """google.cloud.bigquery.job.QueryPriority: Priority of the query. - - See - https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationQuery.FIELDS.priority - """ - return self._get_sub_prop("priority") - - @priority.setter - def priority(self, value): - self._set_sub_prop("priority", value) - - @property - def query_parameters(self): - """List[Union[google.cloud.bigquery.query.ArrayQueryParameter, \ - google.cloud.bigquery.query.ScalarQueryParameter, \ - google.cloud.bigquery.query.StructQueryParameter]]: list of parameters - for parameterized query (empty by default) - - See: - https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationQuery.FIELDS.query_parameters - """ - prop = self._get_sub_prop("queryParameters", default=[]) - return _from_api_repr_query_parameters(prop) - - @query_parameters.setter - def query_parameters(self, values): - self._set_sub_prop("queryParameters", _to_api_repr_query_parameters(values)) - - @property - def range_partitioning(self): - """Optional[google.cloud.bigquery.table.RangePartitioning]: - Configures range-based partitioning for destination table. - - .. note:: - **Beta**. The integer range partitioning feature is in a - pre-release state and might change or have limited support. - - Only specify at most one of - :attr:`~google.cloud.bigquery.job.LoadJobConfig.time_partitioning` or - :attr:`~google.cloud.bigquery.job.LoadJobConfig.range_partitioning`. - - Raises: - ValueError: - If the value is not - :class:`~google.cloud.bigquery.table.RangePartitioning` or - :data:`None`. - """ - resource = self._get_sub_prop("rangePartitioning") - if resource is not None: - return RangePartitioning(_properties=resource) - - @range_partitioning.setter - def range_partitioning(self, value): - resource = value - if isinstance(value, RangePartitioning): - resource = value._properties - elif value is not None: - raise ValueError( - "Expected value to be RangePartitioning or None, got {}.".format(value) - ) - self._set_sub_prop("rangePartitioning", resource) - - @property - def udf_resources(self): - """List[google.cloud.bigquery.query.UDFResource]: user - defined function resources (empty by default) - - See: - https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationQuery.FIELDS.user_defined_function_resources - """ - prop = self._get_sub_prop("userDefinedFunctionResources", default=[]) - return _from_api_repr_udf_resources(prop) - - @udf_resources.setter - def udf_resources(self, values): - self._set_sub_prop( - "userDefinedFunctionResources", _to_api_repr_udf_resources(values) - ) - - @property - def use_legacy_sql(self): - """bool: Use legacy SQL syntax. - - See - https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationQuery.FIELDS.use_legacy_sql - """ - return self._get_sub_prop("useLegacySql") - - @use_legacy_sql.setter - def use_legacy_sql(self, value): - self._set_sub_prop("useLegacySql", value) - - @property - def use_query_cache(self): - """bool: Look for the query result in the cache. - - See - https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationQuery.FIELDS.use_query_cache - """ - return self._get_sub_prop("useQueryCache") - - @use_query_cache.setter - def use_query_cache(self, value): - self._set_sub_prop("useQueryCache", value) - - @property - def write_disposition(self): - """google.cloud.bigquery.job.WriteDisposition: Action that occurs if - the destination table already exists. - - See - https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationQuery.FIELDS.write_disposition - """ - return self._get_sub_prop("writeDisposition") - - @write_disposition.setter - def write_disposition(self, value): - self._set_sub_prop("writeDisposition", value) - - @property - def table_definitions(self): - """Dict[str, google.cloud.bigquery.external_config.ExternalConfig]: - Definitions for external tables or :data:`None` if not set. - - See - https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationQuery.FIELDS.external_table_definitions - """ - prop = self._get_sub_prop("tableDefinitions") - if prop is not None: - prop = _from_api_repr_table_defs(prop) - return prop - - @table_definitions.setter - def table_definitions(self, values): - self._set_sub_prop("tableDefinitions", _to_api_repr_table_defs(values)) - - @property - def time_partitioning(self): - """Optional[google.cloud.bigquery.table.TimePartitioning]: Specifies - time-based partitioning for the destination table. - - Only specify at most one of - :attr:`~google.cloud.bigquery.job.LoadJobConfig.time_partitioning` or - :attr:`~google.cloud.bigquery.job.LoadJobConfig.range_partitioning`. - - Raises: - ValueError: - If the value is not - :class:`~google.cloud.bigquery.table.TimePartitioning` or - :data:`None`. - """ - prop = self._get_sub_prop("timePartitioning") - if prop is not None: - prop = TimePartitioning.from_api_repr(prop) - return prop - - @time_partitioning.setter - def time_partitioning(self, value): - api_repr = value - if value is not None: - api_repr = value.to_api_repr() - self._set_sub_prop("timePartitioning", api_repr) - - @property - def clustering_fields(self): - """Optional[List[str]]: Fields defining clustering for the table - - (Defaults to :data:`None`). - - Clustering fields are immutable after table creation. - - .. note:: - - BigQuery supports clustering for both partitioned and - non-partitioned tables. - """ - prop = self._get_sub_prop("clustering") - if prop is not None: - return list(prop.get("fields", ())) - - @clustering_fields.setter - def clustering_fields(self, value): - """Optional[List[str]]: Fields defining clustering for the table - - (Defaults to :data:`None`). - """ - if value is not None: - self._set_sub_prop("clustering", {"fields": value}) - else: - self._del_sub_prop("clustering") - - @property - def schema_update_options(self): - """List[google.cloud.bigquery.job.SchemaUpdateOption]: Specifies - updates to the destination table schema to allow as a side effect of - the query job. - """ - return self._get_sub_prop("schemaUpdateOptions") - - @schema_update_options.setter - def schema_update_options(self, values): - self._set_sub_prop("schemaUpdateOptions", values) - - def to_api_repr(self): - """Build an API representation of the query job config. - - Returns: - Dict: A dictionary in the format used by the BigQuery API. - """ - resource = copy.deepcopy(self._properties) - - # Query parameters have an addition property associated with them - # to indicate if the query is using named or positional parameters. - query_parameters = resource["query"].get("queryParameters") - if query_parameters: - if query_parameters[0].get("name") is None: - resource["query"]["parameterMode"] = "POSITIONAL" - else: - resource["query"]["parameterMode"] = "NAMED" - - return resource - - -class QueryJob(_AsyncJob): - """Asynchronous job: query tables. - - Args: - job_id (str): the job's ID, within the project belonging to ``client``. - - query (str): SQL query string. - - client (google.cloud.bigquery.client.Client): - A client which holds credentials and project configuration - for the dataset (which requires a project). - - job_config (Optional[google.cloud.bigquery.job.QueryJobConfig]): - Extra configuration options for the query job. - """ - - _JOB_TYPE = "query" - _UDF_KEY = "userDefinedFunctionResources" - - def __init__(self, job_id, query, client, job_config=None): - super(QueryJob, self).__init__(job_id, client) - - if job_config is None: - job_config = QueryJobConfig() - if job_config.use_legacy_sql is None: - job_config.use_legacy_sql = False - - self._properties["configuration"] = job_config._properties - self._configuration = job_config - - if query: - _helpers._set_sub_prop( - self._properties, ["configuration", "query", "query"], query - ) - - self._query_results = None - self._done_timeout = None - self._transport_timeout = None - - @property - def allow_large_results(self): - """See - :attr:`google.cloud.bigquery.job.QueryJobConfig.allow_large_results`. - """ - return self._configuration.allow_large_results - - @property - def create_disposition(self): - """See - :attr:`google.cloud.bigquery.job.QueryJobConfig.create_disposition`. - """ - return self._configuration.create_disposition - - @property - def default_dataset(self): - """See - :attr:`google.cloud.bigquery.job.QueryJobConfig.default_dataset`. - """ - return self._configuration.default_dataset - - @property - def destination(self): - """See - :attr:`google.cloud.bigquery.job.QueryJobConfig.destination`. - """ - return self._configuration.destination - - @property - def destination_encryption_configuration(self): - """google.cloud.bigquery.encryption_configuration.EncryptionConfiguration: Custom - encryption configuration for the destination table. - - Custom encryption configuration (e.g., Cloud KMS keys) or :data:`None` - if using default encryption. - - See - :attr:`google.cloud.bigquery.job.QueryJobConfig.destination_encryption_configuration`. - """ - return self._configuration.destination_encryption_configuration - - @property - def dry_run(self): - """See - :attr:`google.cloud.bigquery.job.QueryJobConfig.dry_run`. - """ - return self._configuration.dry_run - - @property - def flatten_results(self): - """See - :attr:`google.cloud.bigquery.job.QueryJobConfig.flatten_results`. - """ - return self._configuration.flatten_results - - @property - def priority(self): - """See - :attr:`google.cloud.bigquery.job.QueryJobConfig.priority`. - """ - return self._configuration.priority - - @property - def query(self): - """str: The query text used in this query job. - - See: - https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationQuery.FIELDS.query - """ - return _helpers._get_sub_prop( - self._properties, ["configuration", "query", "query"] - ) - - @property - def query_parameters(self): - """See - :attr:`google.cloud.bigquery.job.QueryJobConfig.query_parameters`. - """ - return self._configuration.query_parameters - - @property - def udf_resources(self): - """See - :attr:`google.cloud.bigquery.job.QueryJobConfig.udf_resources`. - """ - return self._configuration.udf_resources - - @property - def use_legacy_sql(self): - """See - :attr:`google.cloud.bigquery.job.QueryJobConfig.use_legacy_sql`. - """ - return self._configuration.use_legacy_sql - - @property - def use_query_cache(self): - """See - :attr:`google.cloud.bigquery.job.QueryJobConfig.use_query_cache`. - """ - return self._configuration.use_query_cache - - @property - def write_disposition(self): - """See - :attr:`google.cloud.bigquery.job.QueryJobConfig.write_disposition`. - """ - return self._configuration.write_disposition - - @property - def maximum_billing_tier(self): - """See - :attr:`google.cloud.bigquery.job.QueryJobConfig.maximum_billing_tier`. - """ - return self._configuration.maximum_billing_tier - - @property - def maximum_bytes_billed(self): - """See - :attr:`google.cloud.bigquery.job.QueryJobConfig.maximum_bytes_billed`. - """ - return self._configuration.maximum_bytes_billed - - @property - def range_partitioning(self): - """See - :attr:`google.cloud.bigquery.job.QueryJobConfig.range_partitioning`. - """ - return self._configuration.range_partitioning - - @property - def table_definitions(self): - """See - :attr:`google.cloud.bigquery.job.QueryJobConfig.table_definitions`. - """ - return self._configuration.table_definitions - - @property - def time_partitioning(self): - """See - :attr:`google.cloud.bigquery.job.QueryJobConfig.time_partitioning`. - """ - return self._configuration.time_partitioning - - @property - def clustering_fields(self): - """See - :attr:`google.cloud.bigquery.job.QueryJobConfig.clustering_fields`. - """ - return self._configuration.clustering_fields - - @property - def schema_update_options(self): - """See - :attr:`google.cloud.bigquery.job.QueryJobConfig.schema_update_options`. - """ - return self._configuration.schema_update_options - - def to_api_repr(self): - """Generate a resource for :meth:`_begin`.""" - # Use to_api_repr to allow for some configuration properties to be set - # automatically. - configuration = self._configuration.to_api_repr() - return { - "jobReference": self._properties["jobReference"], - "configuration": configuration, - } - - @classmethod - def from_api_repr(cls, resource, client): - """Factory: construct a job given its API representation - - Args: - resource (Dict): dataset job representation returned from the API - - client (google.cloud.bigquery.client.Client): - Client which holds credentials and project - configuration for the dataset. - - Returns: - google.cloud.bigquery.job.QueryJob: Job parsed from ``resource``. - """ - cls._check_resource_config(resource) - job_ref = _JobReference._from_api_repr(resource["jobReference"]) - job = cls(job_ref, None, client=client) - job._set_properties(resource) - return job - - @property - def query_plan(self): - """Return query plan from job statistics, if present. - - See: - https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobStatistics2.FIELDS.query_plan - - Returns: - List[google.cloud.bigquery.job.QueryPlanEntry]: - mappings describing the query plan, or an empty list - if the query has not yet completed. - """ - plan_entries = self._job_statistics().get("queryPlan", ()) - return [QueryPlanEntry.from_api_repr(entry) for entry in plan_entries] - - @property - def timeline(self): - """List(TimelineEntry): Return the query execution timeline - from job statistics. - """ - raw = self._job_statistics().get("timeline", ()) - return [TimelineEntry.from_api_repr(entry) for entry in raw] - - @property - def total_bytes_processed(self): - """Return total bytes processed from job statistics, if present. - - See: - https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobStatistics2.FIELDS.total_bytes_processed - - Returns: - Optional[int]: - Total bytes processed by the job, or None if job is not - yet complete. - """ - result = self._job_statistics().get("totalBytesProcessed") - if result is not None: - result = int(result) - return result - - @property - def total_bytes_billed(self): - """Return total bytes billed from job statistics, if present. - - See: - https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobStatistics2.FIELDS.total_bytes_billed - - Returns: - Optional[int]: - Total bytes processed by the job, or None if job is not - yet complete. - """ - result = self._job_statistics().get("totalBytesBilled") - if result is not None: - result = int(result) - return result - - @property - def billing_tier(self): - """Return billing tier from job statistics, if present. - - See: - https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobStatistics2.FIELDS.billing_tier - - Returns: - Optional[int]: - Billing tier used by the job, or None if job is not - yet complete. - """ - return self._job_statistics().get("billingTier") - - @property - def cache_hit(self): - """Return whether or not query results were served from cache. - - See: - https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobStatistics2.FIELDS.cache_hit - - Returns: - Optional[bool]: - whether the query results were returned from cache, or None - if job is not yet complete. - """ - return self._job_statistics().get("cacheHit") - - @property - def ddl_operation_performed(self): - """Optional[str]: Return the DDL operation performed. - - See: - https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobStatistics2.FIELDS.ddl_operation_performed - - """ - return self._job_statistics().get("ddlOperationPerformed") - - @property - def ddl_target_routine(self): - """Optional[google.cloud.bigquery.routine.RoutineReference]: Return the DDL target routine, present - for CREATE/DROP FUNCTION/PROCEDURE queries. - - See: - https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobStatistics2.FIELDS.ddl_target_routine - """ - prop = self._job_statistics().get("ddlTargetRoutine") - if prop is not None: - prop = RoutineReference.from_api_repr(prop) - return prop - - @property - def ddl_target_table(self): - """Optional[google.cloud.bigquery.table.TableReference]: Return the DDL target table, present - for CREATE/DROP TABLE/VIEW queries. - - See: - https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobStatistics2.FIELDS.ddl_target_table - """ - prop = self._job_statistics().get("ddlTargetTable") - if prop is not None: - prop = TableReference.from_api_repr(prop) - return prop - - @property - def num_dml_affected_rows(self): - """Return the number of DML rows affected by the job. - - See: - https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobStatistics2.FIELDS.num_dml_affected_rows - - Returns: - Optional[int]: - number of DML rows affected by the job, or None if job is not - yet complete. - """ - result = self._job_statistics().get("numDmlAffectedRows") - if result is not None: - result = int(result) - return result - - @property - def slot_millis(self): - """Union[int, None]: Slot-milliseconds used by this query job.""" - return _helpers._int_or_none(self._job_statistics().get("totalSlotMs")) - - @property - def statement_type(self): - """Return statement type from job statistics, if present. - - See: - https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobStatistics2.FIELDS.statement_type - - Returns: - Optional[str]: - type of statement used by the job, or None if job is not - yet complete. - """ - return self._job_statistics().get("statementType") - - @property - def referenced_tables(self): - """Return referenced tables from job statistics, if present. - - See: - https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobStatistics2.FIELDS.referenced_tables - - Returns: - List[Dict]: - mappings describing the query plan, or an empty list - if the query has not yet completed. - """ - tables = [] - datasets_by_project_name = {} - - for table in self._job_statistics().get("referencedTables", ()): - - t_project = table["projectId"] - - ds_id = table["datasetId"] - t_dataset = datasets_by_project_name.get((t_project, ds_id)) - if t_dataset is None: - t_dataset = DatasetReference(t_project, ds_id) - datasets_by_project_name[(t_project, ds_id)] = t_dataset - - t_name = table["tableId"] - tables.append(t_dataset.table(t_name)) - - return tables - - @property - def undeclared_query_parameters(self): - """Return undeclared query parameters from job statistics, if present. - - See: - https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobStatistics2.FIELDS.undeclared_query_parameters - - Returns: - List[Union[ \ - google.cloud.bigquery.query.ArrayQueryParameter, \ - google.cloud.bigquery.query.ScalarQueryParameter, \ - google.cloud.bigquery.query.StructQueryParameter \ - ]]: - Undeclared parameters, or an empty list if the query has - not yet completed. - """ - parameters = [] - undeclared = self._job_statistics().get("undeclaredQueryParameters", ()) - - for parameter in undeclared: - p_type = parameter["parameterType"] - - if "arrayType" in p_type: - klass = ArrayQueryParameter - elif "structTypes" in p_type: - klass = StructQueryParameter - else: - klass = ScalarQueryParameter - - parameters.append(klass.from_api_repr(parameter)) - - return parameters - - @property - def estimated_bytes_processed(self): - """Return the estimated number of bytes processed by the query. - - See: - https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobStatistics2.FIELDS.estimated_bytes_processed - - Returns: - Optional[int]: - number of DML rows affected by the job, or None if job is not - yet complete. - """ - result = self._job_statistics().get("estimatedBytesProcessed") - if result is not None: - result = int(result) - return result - - def done(self, retry=DEFAULT_RETRY, timeout=None, reload=True): - """Refresh the job and checks if it is complete. - - Args: - retry (Optional[google.api_core.retry.Retry]): - How to retry the call that retrieves query results. - timeout (Optional[float]): - The number of seconds to wait for the underlying HTTP transport - before using ``retry``. - reload (Optional[bool]): - If ``True``, make an API call to refresh the job state of - unfinished jobs before checking. Default ``True``. - - Returns: - bool: True if the job is complete, False otherwise. - """ - is_done = ( - # Only consider a QueryJob complete when we know we have the final - # query results available. - self._query_results is not None - and self._query_results.complete - and self.state == _DONE_STATE - ) - # Do not refresh if the state is already done, as the job will not - # change once complete. - if not reload or is_done: - return is_done - - # Since the API to getQueryResults can hang up to the timeout value - # (default of 10 seconds), set the timeout parameter to ensure that - # the timeout from the futures API is respected. See: - # https://github.com/GoogleCloudPlatform/google-cloud-python/issues/4135 - timeout_ms = None - if self._done_timeout is not None: - # Subtract a buffer for context switching, network latency, etc. - api_timeout = self._done_timeout - _TIMEOUT_BUFFER_SECS - api_timeout = max(min(api_timeout, 10), 0) - self._done_timeout -= api_timeout - self._done_timeout = max(0, self._done_timeout) - timeout_ms = int(api_timeout * 1000) - - # If an explicit timeout is not given, fall back to the transport timeout - # stored in _blocking_poll() in the process of polling for job completion. - transport_timeout = timeout if timeout is not None else self._transport_timeout - - self._query_results = self._client._get_query_results( - self.job_id, - retry, - project=self.project, - timeout_ms=timeout_ms, - location=self.location, - timeout=transport_timeout, - ) - - # Only reload the job once we know the query is complete. - # This will ensure that fields such as the destination table are - # correctly populated. - if self._query_results.complete and self.state != _DONE_STATE: - self.reload(retry=retry, timeout=transport_timeout) - - return self.state == _DONE_STATE - - def _blocking_poll(self, timeout=None, **kwargs): - self._done_timeout = timeout - self._transport_timeout = timeout - super(QueryJob, self)._blocking_poll(timeout=timeout, **kwargs) - - @staticmethod - def _format_for_exception(query, job_id): - """Format a query for the output in exception message. - - Args: - query (str): The SQL query to format. - job_id (str): The ID of the job that ran the query. - - Returns: - str: A formatted query text. - """ - template = "\n\n(job ID: {job_id})\n\n{header}\n\n{ruler}\n{body}\n{ruler}" - - lines = query.splitlines() - max_line_len = max(len(line) for line in lines) - - header = "-----Query Job SQL Follows-----" - header = "{:^{total_width}}".format(header, total_width=max_line_len + 5) - - # Print out a "ruler" above and below the SQL so we can judge columns. - # Left pad for the line numbers (4 digits plus ":"). - ruler = " |" + " . |" * (max_line_len // 10) - - # Put line numbers next to the SQL. - body = "\n".join( - "{:4}:{}".format(n, line) for n, line in enumerate(lines, start=1) - ) - - return template.format(job_id=job_id, header=header, ruler=ruler, body=body) - - def _begin(self, client=None, retry=DEFAULT_RETRY, timeout=None): - """API call: begin the job via a POST request - - See - https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs/insert - - Args: - client (Optional[google.cloud.bigquery.client.Client]): - The client to use. If not passed, falls back to the ``client`` - associated with the job object or``NoneType``. - retry (Optional[google.api_core.retry.Retry]): - How to retry the RPC. - timeout (Optional[float]): - The number of seconds to wait for the underlying HTTP transport - before using ``retry``. - - Raises: - ValueError: If the job has already begun. - """ - - try: - super(QueryJob, self)._begin(client=client, retry=retry, timeout=timeout) - except exceptions.GoogleCloudError as exc: - exc.message += self._format_for_exception(self.query, self.job_id) - exc.query_job = self - raise - - def result( - self, - page_size=None, - max_results=None, - retry=DEFAULT_RETRY, - timeout=None, - start_index=None, - ): - """Start the job and wait for it to complete and get the result. - - Args: - page_size (Optional[int]): - The maximum number of rows in each page of results from this - request. Non-positive values are ignored. - max_results (Optional[int]): - The maximum total number of rows from this request. - retry (Optional[google.api_core.retry.Retry]): - How to retry the call that retrieves rows. - timeout (Optional[float]): - The number of seconds to wait for the underlying HTTP transport - before using ``retry``. - If multiple requests are made under the hood, ``timeout`` - applies to each individual request. - start_index (Optional[int]): - The zero-based index of the starting row to read. - - Returns: - google.cloud.bigquery.table.RowIterator: - Iterator of row data - :class:`~google.cloud.bigquery.table.Row`-s. During each - page, the iterator will have the ``total_rows`` attribute - set, which counts the total number of rows **in the result - set** (this is distinct from the total number of rows in the - current page: ``iterator.page.num_items``). - - If the query is a special query that produces no results, e.g. - a DDL query, an ``_EmptyRowIterator`` instance is returned. - - Raises: - google.cloud.exceptions.GoogleCloudError: - If the job failed. - concurrent.futures.TimeoutError: - If the job did not complete in the given timeout. - """ - try: - super(QueryJob, self).result(retry=retry, timeout=timeout) - except exceptions.GoogleCloudError as exc: - exc.message += self._format_for_exception(self.query, self.job_id) - exc.query_job = self - raise - except requests.exceptions.Timeout as exc: - six.raise_from(concurrent.futures.TimeoutError, exc) - - # If the query job is complete but there are no query results, this was - # special job, such as a DDL query. Return an empty result set to - # indicate success and avoid calling tabledata.list on a table which - # can't be read (such as a view table). - if self._query_results.total_rows is None: - return _EmptyRowIterator() - - schema = self._query_results.schema - dest_table_ref = self.destination - dest_table = Table(dest_table_ref, schema=schema) - dest_table._properties["numRows"] = self._query_results.total_rows - rows = self._client.list_rows( - dest_table, - page_size=page_size, - max_results=max_results, - start_index=start_index, - retry=retry, - timeout=timeout, - ) - rows._preserve_order = _contains_order_by(self.query) - return rows - - # If changing the signature of this method, make sure to apply the same - # changes to table.RowIterator.to_arrow() - def to_arrow( - self, - progress_bar_type=None, - bqstorage_client=None, - create_bqstorage_client=True, - ): - """[Beta] Create a class:`pyarrow.Table` by loading all pages of a - table or query. - - Args: - progress_bar_type (Optional[str]): - If set, use the `tqdm `_ library to - display a progress bar while the data downloads. Install the - ``tqdm`` package to use this feature. - - Possible values of ``progress_bar_type`` include: - - ``None`` - No progress bar. - ``'tqdm'`` - Use the :func:`tqdm.tqdm` function to print a progress bar - to :data:`sys.stderr`. - ``'tqdm_notebook'`` - Use the :func:`tqdm.tqdm_notebook` function to display a - progress bar as a Jupyter notebook widget. - ``'tqdm_gui'`` - Use the :func:`tqdm.tqdm_gui` function to display a - progress bar as a graphical dialog box. - bqstorage_client (Optional[google.cloud.bigquery_storage_v1.BigQueryReadClient]): - A BigQuery Storage API client. If supplied, use the faster - BigQuery Storage API to fetch rows from BigQuery. This API - is a billable API. - - This method requires the ``pyarrow`` and - ``google-cloud-bigquery-storage`` libraries. - - Reading from a specific partition or snapshot is not - currently supported by this method. - create_bqstorage_client (Optional[bool]): - If ``True`` (default), create a BigQuery Storage API client - using the default API settings. The BigQuery Storage API - is a faster way to fetch rows from BigQuery. See the - ``bqstorage_client`` parameter for more information. - - This argument does nothing if ``bqstorage_client`` is supplied. - - ..versionadded:: 1.24.0 - - Returns: - pyarrow.Table - A :class:`pyarrow.Table` populated with row data and column - headers from the query results. The column headers are derived - from the destination table's schema. - - Raises: - ValueError: - If the :mod:`pyarrow` library cannot be imported. - - ..versionadded:: 1.17.0 - """ - return self.result().to_arrow( - progress_bar_type=progress_bar_type, - bqstorage_client=bqstorage_client, - create_bqstorage_client=create_bqstorage_client, - ) - - # If changing the signature of this method, make sure to apply the same - # changes to table.RowIterator.to_dataframe() - def to_dataframe( - self, - bqstorage_client=None, - dtypes=None, - progress_bar_type=None, - create_bqstorage_client=True, - date_as_object=True, - ): - """Return a pandas DataFrame from a QueryJob - - Args: - bqstorage_client (Optional[google.cloud.bigquery_storage_v1.BigQueryReadClient]): - A BigQuery Storage API client. If supplied, use the faster - BigQuery Storage API to fetch rows from BigQuery. This - API is a billable API. - - This method requires the ``fastavro`` and - ``google-cloud-bigquery-storage`` libraries. - - Reading from a specific partition or snapshot is not - currently supported by this method. - - dtypes (Optional[Map[str, Union[str, pandas.Series.dtype]]]): - A dictionary of column names pandas ``dtype``s. The provided - ``dtype`` is used when constructing the series for the column - specified. Otherwise, the default pandas behavior is used. - - progress_bar_type (Optional[str]): - If set, use the `tqdm `_ library to - display a progress bar while the data downloads. Install the - ``tqdm`` package to use this feature. - - See - :func:`~google.cloud.bigquery.table.RowIterator.to_dataframe` - for details. - - ..versionadded:: 1.11.0 - create_bqstorage_client (Optional[bool]): - If ``True`` (default), create a BigQuery Storage API client - using the default API settings. The BigQuery Storage API - is a faster way to fetch rows from BigQuery. See the - ``bqstorage_client`` parameter for more information. - - This argument does nothing if ``bqstorage_client`` is supplied. - - ..versionadded:: 1.24.0 - - date_as_object (Optional[bool]): - If ``True`` (default), cast dates to objects. If ``False``, convert - to datetime64[ns] dtype. - - ..versionadded:: 1.26.0 - - Returns: - A :class:`~pandas.DataFrame` populated with row data and column - headers from the query results. The column headers are derived - from the destination table's schema. - - Raises: - ValueError: If the `pandas` library cannot be imported. - """ - return self.result().to_dataframe( - bqstorage_client=bqstorage_client, - dtypes=dtypes, - progress_bar_type=progress_bar_type, - create_bqstorage_client=create_bqstorage_client, - date_as_object=date_as_object, - ) - - def __iter__(self): - return iter(self.result()) - - -class QueryPlanEntryStep(object): - """Map a single step in a query plan entry. - - Args: - kind (str): step type. - substeps (List): names of substeps. - """ - - def __init__(self, kind, substeps): - self.kind = kind - self.substeps = list(substeps) - - @classmethod - def from_api_repr(cls, resource): - """Factory: construct instance from the JSON repr. - - Args: - resource (Dict): JSON representation of the entry. - - Returns: - google.cloud.bigquery.job.QueryPlanEntryStep: - New instance built from the resource. - """ - return cls(kind=resource.get("kind"), substeps=resource.get("substeps", ())) - - def __eq__(self, other): - if not isinstance(other, self.__class__): - return NotImplemented - return self.kind == other.kind and self.substeps == other.substeps - - -class QueryPlanEntry(object): - """QueryPlanEntry represents a single stage of a query execution plan. - - See - https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#ExplainQueryStage - for the underlying API representation within query statistics. - """ - - def __init__(self): - self._properties = {} - - @classmethod - def from_api_repr(cls, resource): - """Factory: construct instance from the JSON repr. - - Args: - resource(Dict[str: object]): - ExplainQueryStage representation returned from API. - - Returns: - google.cloud.bigquery.job.QueryPlanEntry: - Query plan entry parsed from ``resource``. - """ - entry = cls() - entry._properties = resource - return entry - - @property - def name(self): - """Optional[str]: Human-readable name of the stage.""" - return self._properties.get("name") - - @property - def entry_id(self): - """Optional[str]: Unique ID for the stage within the plan.""" - return self._properties.get("id") - - @property - def start(self): - """Optional[Datetime]: Datetime when the stage started.""" - if self._properties.get("startMs") is None: - return None - return _helpers._datetime_from_microseconds( - int(self._properties.get("startMs")) * 1000.0 - ) - - @property - def end(self): - """Optional[Datetime]: Datetime when the stage ended.""" - if self._properties.get("endMs") is None: - return None - return _helpers._datetime_from_microseconds( - int(self._properties.get("endMs")) * 1000.0 - ) - - @property - def input_stages(self): - """List(int): Entry IDs for stages that were inputs for this stage.""" - if self._properties.get("inputStages") is None: - return [] - return [ - _helpers._int_or_none(entry) - for entry in self._properties.get("inputStages") - ] - - @property - def parallel_inputs(self): - """Optional[int]: Number of parallel input segments within - the stage. - """ - return _helpers._int_or_none(self._properties.get("parallelInputs")) - - @property - def completed_parallel_inputs(self): - """Optional[int]: Number of parallel input segments completed.""" - return _helpers._int_or_none(self._properties.get("completedParallelInputs")) - - @property - def wait_ms_avg(self): - """Optional[int]: Milliseconds the average worker spent waiting to - be scheduled. - """ - return _helpers._int_or_none(self._properties.get("waitMsAvg")) - - @property - def wait_ms_max(self): - """Optional[int]: Milliseconds the slowest worker spent waiting to - be scheduled. - """ - return _helpers._int_or_none(self._properties.get("waitMsMax")) - - @property - def wait_ratio_avg(self): - """Optional[float]: Ratio of time the average worker spent waiting - to be scheduled, relative to the longest time spent by any worker in - any stage of the overall plan. - """ - return self._properties.get("waitRatioAvg") - - @property - def wait_ratio_max(self): - """Optional[float]: Ratio of time the slowest worker spent waiting - to be scheduled, relative to the longest time spent by any worker in - any stage of the overall plan. - """ - return self._properties.get("waitRatioMax") - - @property - def read_ms_avg(self): - """Optional[int]: Milliseconds the average worker spent reading - input. - """ - return _helpers._int_or_none(self._properties.get("readMsAvg")) - - @property - def read_ms_max(self): - """Optional[int]: Milliseconds the slowest worker spent reading - input. - """ - return _helpers._int_or_none(self._properties.get("readMsMax")) - - @property - def read_ratio_avg(self): - """Optional[float]: Ratio of time the average worker spent reading - input, relative to the longest time spent by any worker in any stage - of the overall plan. - """ - return self._properties.get("readRatioAvg") - - @property - def read_ratio_max(self): - """Optional[float]: Ratio of time the slowest worker spent reading - to be scheduled, relative to the longest time spent by any worker in - any stage of the overall plan. - """ - return self._properties.get("readRatioMax") - - @property - def compute_ms_avg(self): - """Optional[int]: Milliseconds the average worker spent on CPU-bound - processing. - """ - return _helpers._int_or_none(self._properties.get("computeMsAvg")) - - @property - def compute_ms_max(self): - """Optional[int]: Milliseconds the slowest worker spent on CPU-bound - processing. - """ - return _helpers._int_or_none(self._properties.get("computeMsMax")) - - @property - def compute_ratio_avg(self): - """Optional[float]: Ratio of time the average worker spent on - CPU-bound processing, relative to the longest time spent by any - worker in any stage of the overall plan. - """ - return self._properties.get("computeRatioAvg") - - @property - def compute_ratio_max(self): - """Optional[float]: Ratio of time the slowest worker spent on - CPU-bound processing, relative to the longest time spent by any - worker in any stage of the overall plan. - """ - return self._properties.get("computeRatioMax") - - @property - def write_ms_avg(self): - """Optional[int]: Milliseconds the average worker spent writing - output data. - """ - return _helpers._int_or_none(self._properties.get("writeMsAvg")) - - @property - def write_ms_max(self): - """Optional[int]: Milliseconds the slowest worker spent writing - output data. - """ - return _helpers._int_or_none(self._properties.get("writeMsMax")) - - @property - def write_ratio_avg(self): - """Optional[float]: Ratio of time the average worker spent writing - output data, relative to the longest time spent by any worker in any - stage of the overall plan. - """ - return self._properties.get("writeRatioAvg") - - @property - def write_ratio_max(self): - """Optional[float]: Ratio of time the slowest worker spent writing - output data, relative to the longest time spent by any worker in any - stage of the overall plan. - """ - return self._properties.get("writeRatioMax") - - @property - def records_read(self): - """Optional[int]: Number of records read by this stage.""" - return _helpers._int_or_none(self._properties.get("recordsRead")) - - @property - def records_written(self): - """Optional[int]: Number of records written by this stage.""" - return _helpers._int_or_none(self._properties.get("recordsWritten")) - - @property - def status(self): - """Optional[str]: status of this stage.""" - return self._properties.get("status") - - @property - def shuffle_output_bytes(self): - """Optional[int]: Number of bytes written by this stage to - intermediate shuffle. - """ - return _helpers._int_or_none(self._properties.get("shuffleOutputBytes")) - - @property - def shuffle_output_bytes_spilled(self): - """Optional[int]: Number of bytes written by this stage to - intermediate shuffle and spilled to disk. - """ - return _helpers._int_or_none(self._properties.get("shuffleOutputBytesSpilled")) - - @property - def steps(self): - """List(QueryPlanEntryStep): List of step operations performed by - each worker in the stage. - """ - return [ - QueryPlanEntryStep.from_api_repr(step) - for step in self._properties.get("steps", []) - ] - - -class TimelineEntry(object): - """TimelineEntry represents progress of a query job at a particular - point in time. - - See - https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#querytimelinesample - for the underlying API representation within query statistics. - """ - - def __init__(self): - self._properties = {} - - @classmethod - def from_api_repr(cls, resource): - """Factory: construct instance from the JSON repr. - - Args: - resource(Dict[str: object]): - QueryTimelineSample representation returned from API. - - Returns: - google.cloud.bigquery.TimelineEntry: - Timeline sample parsed from ``resource``. - """ - entry = cls() - entry._properties = resource - return entry - - @property - def elapsed_ms(self): - """Optional[int]: Milliseconds elapsed since start of query - execution.""" - return _helpers._int_or_none(self._properties.get("elapsedMs")) - - @property - def active_units(self): - """Optional[int]: Current number of input units being processed - by workers, reported as largest value since the last sample.""" - return _helpers._int_or_none(self._properties.get("activeUnits")) - - @property - def pending_units(self): - """Optional[int]: Current number of input units remaining for - query stages active at this sample time.""" - return _helpers._int_or_none(self._properties.get("pendingUnits")) - - @property - def completed_units(self): - """Optional[int]: Current number of input units completed by - this query.""" - return _helpers._int_or_none(self._properties.get("completedUnits")) - - @property - def slot_millis(self): - """Optional[int]: Cumulative slot-milliseconds consumed by - this query.""" - return _helpers._int_or_none(self._properties.get("totalSlotMs")) - - -class UnknownJob(_AsyncJob): - """A job whose type cannot be determined.""" - - @classmethod - def from_api_repr(cls, resource, client): - """Construct an UnknownJob from the JSON representation. - - Args: - resource (Dict): JSON representation of a job. - client (google.cloud.bigquery.client.Client): - Client connected to BigQuery API. - - Returns: - UnknownJob: Job corresponding to the resource. - """ - job_ref_properties = resource.get("jobReference", {"projectId": client.project}) - job_ref = _JobReference._from_api_repr(job_ref_properties) - job = cls(job_ref, client) - # Populate the job reference with the project, even if it has been - # redacted, because we know it should equal that of the request. - resource["jobReference"] = job_ref_properties - job._properties = resource - return job - - -class ScriptStackFrame(object): - """Stack frame showing the line/column/procedure name where the current - evaluation happened. - - Args: - resource (Map[str, Any]): JSON representation of object. - """ - - def __init__(self, resource): - self._properties = resource - - @property - def procedure_id(self): - """Optional[str]: Name of the active procedure. - - Omitted if in a top-level script. - """ - return self._properties.get("procedureId") - - @property - def text(self): - """str: Text of the current statement/expression.""" - return self._properties.get("text") - - @property - def start_line(self): - """int: One-based start line.""" - return _helpers._int_or_none(self._properties.get("startLine")) - - @property - def start_column(self): - """int: One-based start column.""" - return _helpers._int_or_none(self._properties.get("startColumn")) - - @property - def end_line(self): - """int: One-based end line.""" - return _helpers._int_or_none(self._properties.get("endLine")) - - @property - def end_column(self): - """int: One-based end column.""" - return _helpers._int_or_none(self._properties.get("endColumn")) - - -class ScriptStatistics(object): - """Statistics for a child job of a script. - - Args: - resource (Map[str, Any]): JSON representation of object. - """ - - def __init__(self, resource): - self._properties = resource - - @property - def stack_frames(self): - """List[ScriptStackFrame]: Stack trace where the current evaluation - happened. - - Shows line/column/procedure name of each frame on the stack at the - point where the current evaluation happened. - - The leaf frame is first, the primary script is last. - """ - return [ - ScriptStackFrame(frame) for frame in self._properties.get("stackFrames", []) - ] - - @property - def evaluation_kind(self): - """str: Indicates the type of child job. - - Possible values include ``STATEMENT`` and ``EXPRESSION``. - """ - return self._properties.get("evaluationKind") diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/job/__init__.py b/packages/google-cloud-bigquery/google/cloud/bigquery/job/__init__.py new file mode 100644 index 000000000000..26ecf8d3cf42 --- /dev/null +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/job/__init__.py @@ -0,0 +1,77 @@ +# Copyright 2015 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Define API Jobs.""" + +from google.cloud.bigquery.job.base import _AsyncJob +from google.cloud.bigquery.job.base import _error_result_to_exception +from google.cloud.bigquery.job.base import _DONE_STATE +from google.cloud.bigquery.job.base import _JobConfig +from google.cloud.bigquery.job.base import _JobReference +from google.cloud.bigquery.job.base import ScriptStatistics +from google.cloud.bigquery.job.base import ScriptStackFrame +from google.cloud.bigquery.job.base import UnknownJob +from google.cloud.bigquery.job.copy_ import CopyJob +from google.cloud.bigquery.job.copy_ import CopyJobConfig +from google.cloud.bigquery.job.extract import ExtractJob +from google.cloud.bigquery.job.extract import ExtractJobConfig +from google.cloud.bigquery.job.load import LoadJob +from google.cloud.bigquery.job.load import LoadJobConfig +from google.cloud.bigquery.job.query import _contains_order_by +from google.cloud.bigquery.job.query import QueryJob +from google.cloud.bigquery.job.query import QueryJobConfig +from google.cloud.bigquery.job.query import QueryPlanEntry +from google.cloud.bigquery.job.query import QueryPlanEntryStep +from google.cloud.bigquery.job.query import TimelineEntry +from google.cloud.bigquery.enums import Compression +from google.cloud.bigquery.enums import CreateDisposition +from google.cloud.bigquery.enums import DestinationFormat +from google.cloud.bigquery.enums import Encoding +from google.cloud.bigquery.enums import QueryPriority +from google.cloud.bigquery.enums import SchemaUpdateOption +from google.cloud.bigquery.enums import SourceFormat +from google.cloud.bigquery.enums import WriteDisposition + + +# Include classes previously in job.py for backwards compatibility. +__all__ = [ + "_AsyncJob", + "_error_result_to_exception", + "_DONE_STATE", + "_JobConfig", + "_JobReference", + "ScriptStatistics", + "ScriptStackFrame", + "UnknownJob", + "CopyJob", + "CopyJobConfig", + "ExtractJob", + "ExtractJobConfig", + "LoadJob", + "LoadJobConfig", + "_contains_order_by", + "QueryJob", + "QueryJobConfig", + "QueryPlanEntry", + "QueryPlanEntryStep", + "TimelineEntry", + "Compression", + "CreateDisposition", + "DestinationFormat", + "Encoding", + "QueryPriority", + "SchemaUpdateOption", + "SourceFormat", + "WriteDisposition", +] diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/job/base.py b/packages/google-cloud-bigquery/google/cloud/bigquery/job/base.py new file mode 100644 index 000000000000..2f4ae1460a46 --- /dev/null +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/job/base.py @@ -0,0 +1,912 @@ +# Copyright 2015 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Base classes and helpers for job classes.""" + +import copy +import threading + +from google.api_core import exceptions +import google.api_core.future.polling +from six.moves import http_client + +from google.cloud.bigquery import _helpers +from google.cloud.bigquery.retry import DEFAULT_RETRY + + +_DONE_STATE = "DONE" +_STOPPED_REASON = "stopped" +_ERROR_REASON_TO_EXCEPTION = { + "accessDenied": http_client.FORBIDDEN, + "backendError": http_client.INTERNAL_SERVER_ERROR, + "billingNotEnabled": http_client.FORBIDDEN, + "billingTierLimitExceeded": http_client.BAD_REQUEST, + "blocked": http_client.FORBIDDEN, + "duplicate": http_client.CONFLICT, + "internalError": http_client.INTERNAL_SERVER_ERROR, + "invalid": http_client.BAD_REQUEST, + "invalidQuery": http_client.BAD_REQUEST, + "notFound": http_client.NOT_FOUND, + "notImplemented": http_client.NOT_IMPLEMENTED, + "quotaExceeded": http_client.FORBIDDEN, + "rateLimitExceeded": http_client.FORBIDDEN, + "resourceInUse": http_client.BAD_REQUEST, + "resourcesExceeded": http_client.BAD_REQUEST, + "responseTooLarge": http_client.FORBIDDEN, + "stopped": http_client.OK, + "tableUnavailable": http_client.BAD_REQUEST, +} + + +def _error_result_to_exception(error_result): + """Maps BigQuery error reasons to an exception. + + The reasons and their matching HTTP status codes are documented on + the `troubleshooting errors`_ page. + + .. _troubleshooting errors: https://cloud.google.com/bigquery\ + /troubleshooting-errors + + Args: + error_result (Mapping[str, str]): The error result from BigQuery. + + Returns: + google.cloud.exceptions.GoogleAPICallError: The mapped exception. + """ + reason = error_result.get("reason") + status_code = _ERROR_REASON_TO_EXCEPTION.get( + reason, http_client.INTERNAL_SERVER_ERROR + ) + return exceptions.from_http_status( + status_code, error_result.get("message", ""), errors=[error_result] + ) + + +class _JobReference(object): + """A reference to a job. + + Args: + job_id (str): ID of the job to run. + project (str): ID of the project where the job runs. + location (str): Location of where the job runs. + """ + + def __init__(self, job_id, project, location): + self._properties = {"jobId": job_id, "projectId": project} + # The location field must not be populated if it is None. + if location: + self._properties["location"] = location + + @property + def job_id(self): + """str: ID of the job.""" + return self._properties.get("jobId") + + @property + def project(self): + """str: ID of the project where the job runs.""" + return self._properties.get("projectId") + + @property + def location(self): + """str: Location where the job runs.""" + return self._properties.get("location") + + def _to_api_repr(self): + """Returns the API resource representation of the job reference.""" + return copy.deepcopy(self._properties) + + @classmethod + def _from_api_repr(cls, resource): + """Returns a job reference for an API resource representation.""" + job_id = resource.get("jobId") + project = resource.get("projectId") + location = resource.get("location") + job_ref = cls(job_id, project, location) + return job_ref + + +class _AsyncJob(google.api_core.future.polling.PollingFuture): + """Base class for asynchronous jobs. + + Args: + job_id (Union[str, _JobReference]): + Job's ID in the project associated with the client or a + fully-qualified job reference. + client (google.cloud.bigquery.client.Client): + Client which holds credentials and project configuration. + """ + + def __init__(self, job_id, client): + super(_AsyncJob, self).__init__() + + # The job reference can be either a plain job ID or the full resource. + # Populate the properties dictionary consistently depending on what has + # been passed in. + job_ref = job_id + if not isinstance(job_id, _JobReference): + job_ref = _JobReference(job_id, client.project, None) + self._properties = {"jobReference": job_ref._to_api_repr()} + + self._client = client + self._result_set = False + self._completion_lock = threading.Lock() + + @property + def job_id(self): + """str: ID of the job.""" + return _helpers._get_sub_prop(self._properties, ["jobReference", "jobId"]) + + @property + def parent_job_id(self): + """Return the ID of the parent job. + + See: + https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobStatistics.FIELDS.parent_job_id + + Returns: + Optional[str]: parent job id. + """ + return _helpers._get_sub_prop(self._properties, ["statistics", "parentJobId"]) + + @property + def script_statistics(self): + resource = _helpers._get_sub_prop( + self._properties, ["statistics", "scriptStatistics"] + ) + if resource is None: + return None + return ScriptStatistics(resource) + + @property + def num_child_jobs(self): + """The number of child jobs executed. + + See: + https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobStatistics.FIELDS.num_child_jobs + + Returns: + int + """ + count = _helpers._get_sub_prop(self._properties, ["statistics", "numChildJobs"]) + return int(count) if count is not None else 0 + + @property + def project(self): + """Project bound to the job. + + Returns: + str: the project (derived from the client). + """ + return _helpers._get_sub_prop(self._properties, ["jobReference", "projectId"]) + + @property + def location(self): + """str: Location where the job runs.""" + return _helpers._get_sub_prop(self._properties, ["jobReference", "location"]) + + def _require_client(self, client): + """Check client or verify over-ride. + + Args: + client (Optional[google.cloud.bigquery.client.Client]): + the client to use. If not passed, falls back to the + ``client`` stored on the current dataset. + + Returns: + google.cloud.bigquery.client.Client: + The client passed in or the currently bound client. + """ + if client is None: + client = self._client + return client + + @property + def job_type(self): + """Type of job. + + Returns: + str: one of 'load', 'copy', 'extract', 'query'. + """ + return self._JOB_TYPE + + @property + def path(self): + """URL path for the job's APIs. + + Returns: + str: the path based on project and job ID. + """ + return "/projects/%s/jobs/%s" % (self.project, self.job_id) + + @property + def labels(self): + """Dict[str, str]: Labels for the job.""" + return self._properties.setdefault("labels", {}) + + @property + def etag(self): + """ETag for the job resource. + + Returns: + Optional[str]: the ETag (None until set from the server). + """ + return self._properties.get("etag") + + @property + def self_link(self): + """URL for the job resource. + + Returns: + Optional[str]: the URL (None until set from the server). + """ + return self._properties.get("selfLink") + + @property + def user_email(self): + """E-mail address of user who submitted the job. + + Returns: + Optional[str]: the URL (None until set from the server). + """ + return self._properties.get("user_email") + + @property + def created(self): + """Datetime at which the job was created. + + Returns: + Optional[datetime.datetime]: + the creation time (None until set from the server). + """ + millis = _helpers._get_sub_prop( + self._properties, ["statistics", "creationTime"] + ) + if millis is not None: + return _helpers._datetime_from_microseconds(millis * 1000.0) + + @property + def started(self): + """Datetime at which the job was started. + + Returns: + Optional[datetime.datetime]: + the start time (None until set from the server). + """ + millis = _helpers._get_sub_prop(self._properties, ["statistics", "startTime"]) + if millis is not None: + return _helpers._datetime_from_microseconds(millis * 1000.0) + + @property + def ended(self): + """Datetime at which the job finished. + + Returns: + Optional[datetime.datetime]: + the end time (None until set from the server). + """ + millis = _helpers._get_sub_prop(self._properties, ["statistics", "endTime"]) + if millis is not None: + return _helpers._datetime_from_microseconds(millis * 1000.0) + + def _job_statistics(self): + """Helper for job-type specific statistics-based properties.""" + statistics = self._properties.get("statistics", {}) + return statistics.get(self._JOB_TYPE, {}) + + @property + def error_result(self): + """Error information about the job as a whole. + + Returns: + Optional[Mapping]: the error information (None until set from the server). + """ + status = self._properties.get("status") + if status is not None: + return status.get("errorResult") + + @property + def errors(self): + """Information about individual errors generated by the job. + + Returns: + Optional[List[Mapping]]: + the error information (None until set from the server). + """ + status = self._properties.get("status") + if status is not None: + return status.get("errors") + + @property + def state(self): + """Status of the job. + + Returns: + Optional[str]: + the state (None until set from the server). + """ + status = self._properties.get("status", {}) + return status.get("state") + + def _set_properties(self, api_response): + """Update properties from resource in body of ``api_response`` + + Args: + api_response (Dict): response returned from an API call. + """ + cleaned = api_response.copy() + + statistics = cleaned.get("statistics", {}) + if "creationTime" in statistics: + statistics["creationTime"] = float(statistics["creationTime"]) + if "startTime" in statistics: + statistics["startTime"] = float(statistics["startTime"]) + if "endTime" in statistics: + statistics["endTime"] = float(statistics["endTime"]) + + # Save configuration to keep reference same in self._configuration. + cleaned_config = cleaned.pop("configuration", {}) + configuration = self._properties.pop("configuration", {}) + self._properties.clear() + self._properties.update(cleaned) + self._properties["configuration"] = configuration + self._properties["configuration"].update(cleaned_config) + + # For Future interface + self._set_future_result() + + @classmethod + def _check_resource_config(cls, resource): + """Helper for :meth:`from_api_repr` + + Args: + resource (Dict): resource for the job. + + Raises: + KeyError: + If the resource has no identifier, or + is missing the appropriate configuration. + """ + if "jobReference" not in resource or "jobId" not in resource["jobReference"]: + raise KeyError( + "Resource lacks required identity information: " + '["jobReference"]["jobId"]' + ) + if ( + "configuration" not in resource + or cls._JOB_TYPE not in resource["configuration"] + ): + raise KeyError( + "Resource lacks required configuration: " + '["configuration"]["%s"]' % cls._JOB_TYPE + ) + + def to_api_repr(self): + """Generate a resource for the job.""" + return copy.deepcopy(self._properties) + + _build_resource = to_api_repr # backward-compatibility alias + + def _begin(self, client=None, retry=DEFAULT_RETRY, timeout=None): + """API call: begin the job via a POST request + + See + https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs/insert + + Args: + client (Optional[google.cloud.bigquery.client.Client]): + The client to use. If not passed, falls back to the ``client`` + associated with the job object or``NoneType`` + retry (Optional[google.api_core.retry.Retry]): + How to retry the RPC. + timeout (Optional[float]): + The number of seconds to wait for the underlying HTTP transport + before using ``retry``. + + Raises: + ValueError: + If the job has already begun. + """ + if self.state is not None: + raise ValueError("Job already begun.") + + client = self._require_client(client) + path = "/projects/%s/jobs" % (self.project,) + + # jobs.insert is idempotent because we ensure that every new + # job has an ID. + span_attributes = {"path": path} + api_response = client._call_api( + retry, + span_name="BigQuery.job.begin", + span_attributes=span_attributes, + job_ref=self, + method="POST", + path=path, + data=self.to_api_repr(), + timeout=timeout, + ) + self._set_properties(api_response) + + def exists(self, client=None, retry=DEFAULT_RETRY, timeout=None): + """API call: test for the existence of the job via a GET request + + See + https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs/get + + Args: + client (Optional[google.cloud.bigquery.client.Client]): + the client to use. If not passed, falls back to the + ``client`` stored on the current dataset. + + retry (Optional[google.api_core.retry.Retry]): How to retry the RPC. + timeout (Optional[float]): + The number of seconds to wait for the underlying HTTP transport + before using ``retry``. + + Returns: + bool: Boolean indicating existence of the job. + """ + client = self._require_client(client) + + extra_params = {"fields": "id"} + if self.location: + extra_params["location"] = self.location + + try: + span_attributes = {"path": self.path} + + client._call_api( + retry, + span_name="BigQuery.job.exists", + span_attributes=span_attributes, + job_ref=self, + method="GET", + path=self.path, + query_params=extra_params, + timeout=timeout, + ) + except exceptions.NotFound: + return False + else: + return True + + def reload(self, client=None, retry=DEFAULT_RETRY, timeout=None): + """API call: refresh job properties via a GET request. + + See + https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs/get + + Args: + client (Optional[google.cloud.bigquery.client.Client]): + the client to use. If not passed, falls back to the + ``client`` stored on the current dataset. + + retry (Optional[google.api_core.retry.Retry]): How to retry the RPC. + timeout (Optional[float]): + The number of seconds to wait for the underlying HTTP transport + before using ``retry``. + """ + client = self._require_client(client) + + extra_params = {} + if self.location: + extra_params["location"] = self.location + span_attributes = {"path": self.path} + + api_response = client._call_api( + retry, + span_name="BigQuery.job.reload", + span_attributes=span_attributes, + job_ref=self, + method="GET", + path=self.path, + query_params=extra_params, + timeout=timeout, + ) + self._set_properties(api_response) + + def cancel(self, client=None, retry=DEFAULT_RETRY, timeout=None): + """API call: cancel job via a POST request + + See + https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs/cancel + + Args: + client (Optional[google.cloud.bigquery.client.Client]): + the client to use. If not passed, falls back to the + ``client`` stored on the current dataset. + retry (Optional[google.api_core.retry.Retry]): How to retry the RPC. + timeout (Optional[float]): + The number of seconds to wait for the underlying HTTP transport + before using ``retry`` + + Returns: + bool: Boolean indicating that the cancel request was sent. + """ + client = self._require_client(client) + + extra_params = {} + if self.location: + extra_params["location"] = self.location + + path = "{}/cancel".format(self.path) + span_attributes = {"path": path} + + api_response = client._call_api( + retry, + span_name="BigQuery.job.cancel", + span_attributes=span_attributes, + job_ref=self, + method="POST", + path=path, + query_params=extra_params, + timeout=timeout, + ) + self._set_properties(api_response["job"]) + # The Future interface requires that we return True if the *attempt* + # to cancel was successful. + return True + + # The following methods implement the PollingFuture interface. Note that + # the methods above are from the pre-Future interface and are left for + # compatibility. The only "overloaded" method is :meth:`cancel`, which + # satisfies both interfaces. + + def _set_future_result(self): + """Set the result or exception from the job if it is complete.""" + # This must be done in a lock to prevent the polling thread + # and main thread from both executing the completion logic + # at the same time. + with self._completion_lock: + # If the operation isn't complete or if the result has already been + # set, do not call set_result/set_exception again. + # Note: self._result_set is set to True in set_result and + # set_exception, in case those methods are invoked directly. + if not self.done(reload=False) or self._result_set: + return + + if self.error_result is not None: + exception = _error_result_to_exception(self.error_result) + self.set_exception(exception) + else: + self.set_result(self) + + def done(self, retry=DEFAULT_RETRY, timeout=None, reload=True): + """Checks if the job is complete. + + Args: + retry (Optional[google.api_core.retry.Retry]): How to retry the RPC. + timeout (Optional[float]): + The number of seconds to wait for the underlying HTTP transport + before using ``retry``. + reload (Optional[bool]): + If ``True``, make an API call to refresh the job state of + unfinished jobs before checking. Default ``True``. + + Returns: + bool: True if the job is complete, False otherwise. + """ + # Do not refresh is the state is already done, as the job will not + # change once complete. + if self.state != _DONE_STATE and reload: + self.reload(retry=retry, timeout=timeout) + return self.state == _DONE_STATE + + def result(self, retry=DEFAULT_RETRY, timeout=None): + """Start the job and wait for it to complete and get the result. + + Args: + retry (Optional[google.api_core.retry.Retry]): How to retry the RPC. + timeout (Optional[float]): + The number of seconds to wait for the underlying HTTP transport + before using ``retry``. + If multiple requests are made under the hood, ``timeout`` + applies to each individual request. + + Returns: + _AsyncJob: This instance. + + Raises: + google.cloud.exceptions.GoogleAPICallError: + if the job failed. + concurrent.futures.TimeoutError: + if the job did not complete in the given timeout. + """ + if self.state is None: + self._begin(retry=retry, timeout=timeout) + + kwargs = {} if retry is DEFAULT_RETRY else {"retry": retry} + return super(_AsyncJob, self).result(timeout=timeout, **kwargs) + + def cancelled(self): + """Check if the job has been cancelled. + + This always returns False. It's not possible to check if a job was + cancelled in the API. This method is here to satisfy the interface + for :class:`google.api_core.future.Future`. + + Returns: + bool: False + """ + return ( + self.error_result is not None + and self.error_result.get("reason") == _STOPPED_REASON + ) + + +class _JobConfig(object): + """Abstract base class for job configuration objects. + + Args: + job_type (str): The key to use for the job configuration. + """ + + def __init__(self, job_type, **kwargs): + self._job_type = job_type + self._properties = {job_type: {}} + for prop, val in kwargs.items(): + setattr(self, prop, val) + + @property + def labels(self): + """Dict[str, str]: Labels for the job. + + This method always returns a dict. To change a job's labels, + modify the dict, then call ``Client.update_job``. To delete a + label, set its value to :data:`None` before updating. + + Raises: + ValueError: If ``value`` type is invalid. + """ + return self._properties.setdefault("labels", {}) + + @labels.setter + def labels(self, value): + if not isinstance(value, dict): + raise ValueError("Pass a dict") + self._properties["labels"] = value + + def _get_sub_prop(self, key, default=None): + """Get a value in the ``self._properties[self._job_type]`` dictionary. + + Most job properties are inside the dictionary related to the job type + (e.g. 'copy', 'extract', 'load', 'query'). Use this method to access + those properties:: + + self._get_sub_prop('destinationTable') + + This is equivalent to using the ``_helpers._get_sub_prop`` function:: + + _helpers._get_sub_prop( + self._properties, ['query', 'destinationTable']) + + Args: + key (str): + Key for the value to get in the + ``self._properties[self._job_type]`` dictionary. + default (Optional[object]): + Default value to return if the key is not found. + Defaults to :data:`None`. + + Returns: + object: The value if present or the default. + """ + return _helpers._get_sub_prop( + self._properties, [self._job_type, key], default=default + ) + + def _set_sub_prop(self, key, value): + """Set a value in the ``self._properties[self._job_type]`` dictionary. + + Most job properties are inside the dictionary related to the job type + (e.g. 'copy', 'extract', 'load', 'query'). Use this method to set + those properties:: + + self._set_sub_prop('useLegacySql', False) + + This is equivalent to using the ``_helper._set_sub_prop`` function:: + + _helper._set_sub_prop( + self._properties, ['query', 'useLegacySql'], False) + + Args: + key (str): + Key to set in the ``self._properties[self._job_type]`` + dictionary. + value (object): Value to set. + """ + _helpers._set_sub_prop(self._properties, [self._job_type, key], value) + + def _del_sub_prop(self, key): + """Remove ``key`` from the ``self._properties[self._job_type]`` dict. + + Most job properties are inside the dictionary related to the job type + (e.g. 'copy', 'extract', 'load', 'query'). Use this method to clear + those properties:: + + self._del_sub_prop('useLegacySql') + + This is equivalent to using the ``_helper._del_sub_prop`` function:: + + _helper._del_sub_prop( + self._properties, ['query', 'useLegacySql']) + + Args: + key (str): + Key to remove in the ``self._properties[self._job_type]`` + dictionary. + """ + _helpers._del_sub_prop(self._properties, [self._job_type, key]) + + def to_api_repr(self): + """Build an API representation of the job config. + + Returns: + Dict: A dictionary in the format used by the BigQuery API. + """ + return copy.deepcopy(self._properties) + + def _fill_from_default(self, default_job_config): + """Merge this job config with a default job config. + + The keys in this object take precedence over the keys in the default + config. The merge is done at the top-level as well as for keys one + level below the job type. + + Args: + default_job_config (google.cloud.bigquery.job._JobConfig): + The default job config that will be used to fill in self. + + Returns: + google.cloud.bigquery.job._JobConfig: A new (merged) job config. + """ + if self._job_type != default_job_config._job_type: + raise TypeError( + "attempted to merge two incompatible job types: " + + repr(self._job_type) + + ", " + + repr(default_job_config._job_type) + ) + + new_job_config = self.__class__() + + default_job_properties = copy.deepcopy(default_job_config._properties) + for key in self._properties: + if key != self._job_type: + default_job_properties[key] = self._properties[key] + + default_job_properties[self._job_type].update(self._properties[self._job_type]) + new_job_config._properties = default_job_properties + + return new_job_config + + @classmethod + def from_api_repr(cls, resource): + """Factory: construct a job configuration given its API representation + + Args: + resource (Dict): + A job configuration in the same representation as is returned + from the API. + + Returns: + google.cloud.bigquery.job._JobConfig: Configuration parsed from ``resource``. + """ + job_config = cls() + job_config._properties = resource + return job_config + + +class ScriptStackFrame(object): + """Stack frame showing the line/column/procedure name where the current + evaluation happened. + + Args: + resource (Map[str, Any]): JSON representation of object. + """ + + def __init__(self, resource): + self._properties = resource + + @property + def procedure_id(self): + """Optional[str]: Name of the active procedure. + + Omitted if in a top-level script. + """ + return self._properties.get("procedureId") + + @property + def text(self): + """str: Text of the current statement/expression.""" + return self._properties.get("text") + + @property + def start_line(self): + """int: One-based start line.""" + return _helpers._int_or_none(self._properties.get("startLine")) + + @property + def start_column(self): + """int: One-based start column.""" + return _helpers._int_or_none(self._properties.get("startColumn")) + + @property + def end_line(self): + """int: One-based end line.""" + return _helpers._int_or_none(self._properties.get("endLine")) + + @property + def end_column(self): + """int: One-based end column.""" + return _helpers._int_or_none(self._properties.get("endColumn")) + + +class ScriptStatistics(object): + """Statistics for a child job of a script. + + Args: + resource (Map[str, Any]): JSON representation of object. + """ + + def __init__(self, resource): + self._properties = resource + + @property + def stack_frames(self): + """List[ScriptStackFrame]: Stack trace where the current evaluation + happened. + + Shows line/column/procedure name of each frame on the stack at the + point where the current evaluation happened. + + The leaf frame is first, the primary script is last. + """ + return [ + ScriptStackFrame(frame) for frame in self._properties.get("stackFrames", []) + ] + + @property + def evaluation_kind(self): + """str: Indicates the type of child job. + + Possible values include ``STATEMENT`` and ``EXPRESSION``. + """ + return self._properties.get("evaluationKind") + + +class UnknownJob(_AsyncJob): + """A job whose type cannot be determined.""" + + @classmethod + def from_api_repr(cls, resource, client): + """Construct an UnknownJob from the JSON representation. + + Args: + resource (Dict): JSON representation of a job. + client (google.cloud.bigquery.client.Client): + Client connected to BigQuery API. + + Returns: + UnknownJob: Job corresponding to the resource. + """ + job_ref_properties = resource.get("jobReference", {"projectId": client.project}) + job_ref = _JobReference._from_api_repr(job_ref_properties) + job = cls(job_ref, client) + # Populate the job reference with the project, even if it has been + # redacted, because we know it should equal that of the request. + resource["jobReference"] = job_ref_properties + job._properties = resource + return job diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/job/copy_.py b/packages/google-cloud-bigquery/google/cloud/bigquery/job/copy_.py new file mode 100644 index 000000000000..95f4b613b5cb --- /dev/null +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/job/copy_.py @@ -0,0 +1,223 @@ +# Copyright 2015 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Classes for copy jobs.""" + +from google.cloud.bigquery.encryption_configuration import EncryptionConfiguration +from google.cloud.bigquery import _helpers +from google.cloud.bigquery.table import TableReference + +from google.cloud.bigquery.job.base import _AsyncJob +from google.cloud.bigquery.job.base import _JobConfig +from google.cloud.bigquery.job.base import _JobReference + + +class CopyJobConfig(_JobConfig): + """Configuration options for copy jobs. + + All properties in this class are optional. Values which are :data:`None` -> + server defaults. Set properties on the constructed configuration by using + the property name as the name of a keyword argument. + """ + + def __init__(self, **kwargs): + super(CopyJobConfig, self).__init__("copy", **kwargs) + + @property + def create_disposition(self): + """google.cloud.bigquery.job.CreateDisposition: Specifies behavior + for creating tables. + + See + https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationTableCopy.FIELDS.create_disposition + """ + return self._get_sub_prop("createDisposition") + + @create_disposition.setter + def create_disposition(self, value): + self._set_sub_prop("createDisposition", value) + + @property + def write_disposition(self): + """google.cloud.bigquery.job.WriteDisposition: Action that occurs if + the destination table already exists. + + See + https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationTableCopy.FIELDS.write_disposition + """ + return self._get_sub_prop("writeDisposition") + + @write_disposition.setter + def write_disposition(self, value): + self._set_sub_prop("writeDisposition", value) + + @property + def destination_encryption_configuration(self): + """google.cloud.bigquery.encryption_configuration.EncryptionConfiguration: Custom + encryption configuration for the destination table. + + Custom encryption configuration (e.g., Cloud KMS keys) or :data:`None` + if using default encryption. + + See + https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationTableCopy.FIELDS.destination_encryption_configuration + """ + prop = self._get_sub_prop("destinationEncryptionConfiguration") + if prop is not None: + prop = EncryptionConfiguration.from_api_repr(prop) + return prop + + @destination_encryption_configuration.setter + def destination_encryption_configuration(self, value): + api_repr = value + if value is not None: + api_repr = value.to_api_repr() + self._set_sub_prop("destinationEncryptionConfiguration", api_repr) + + +class CopyJob(_AsyncJob): + """Asynchronous job: copy data into a table from other tables. + + Args: + job_id (str): the job's ID, within the project belonging to ``client``. + + sources (List[google.cloud.bigquery.table.TableReference]): Table from which data is to be loaded. + + destination (google.cloud.bigquery.table.TableReference): Table into which data is to be loaded. + + client (google.cloud.bigquery.client.Client): + A client which holds credentials and project configuration + for the dataset (which requires a project). + + job_config (Optional[google.cloud.bigquery.job.CopyJobConfig]): + Extra configuration options for the copy job. + """ + + _JOB_TYPE = "copy" + + def __init__(self, job_id, sources, destination, client, job_config=None): + super(CopyJob, self).__init__(job_id, client) + + if not job_config: + job_config = CopyJobConfig() + + self._configuration = job_config + self._properties["configuration"] = job_config._properties + + if destination: + _helpers._set_sub_prop( + self._properties, + ["configuration", "copy", "destinationTable"], + destination.to_api_repr(), + ) + + if sources: + source_resources = [source.to_api_repr() for source in sources] + _helpers._set_sub_prop( + self._properties, + ["configuration", "copy", "sourceTables"], + source_resources, + ) + + @property + def destination(self): + """google.cloud.bigquery.table.TableReference: Table into which data + is to be loaded. + """ + return TableReference.from_api_repr( + _helpers._get_sub_prop( + self._properties, ["configuration", "copy", "destinationTable"] + ) + ) + + @property + def sources(self): + """List[google.cloud.bigquery.table.TableReference]): Table(s) from + which data is to be loaded. + """ + source_configs = _helpers._get_sub_prop( + self._properties, ["configuration", "copy", "sourceTables"] + ) + if source_configs is None: + single = _helpers._get_sub_prop( + self._properties, ["configuration", "copy", "sourceTable"] + ) + if single is None: + raise KeyError("Resource missing 'sourceTables' / 'sourceTable'") + source_configs = [single] + + sources = [] + for source_config in source_configs: + table_ref = TableReference.from_api_repr(source_config) + sources.append(table_ref) + return sources + + @property + def create_disposition(self): + """See + :attr:`google.cloud.bigquery.job.CopyJobConfig.create_disposition`. + """ + return self._configuration.create_disposition + + @property + def write_disposition(self): + """See + :attr:`google.cloud.bigquery.job.CopyJobConfig.write_disposition`. + """ + return self._configuration.write_disposition + + @property + def destination_encryption_configuration(self): + """google.cloud.bigquery.encryption_configuration.EncryptionConfiguration: Custom + encryption configuration for the destination table. + + Custom encryption configuration (e.g., Cloud KMS keys) or :data:`None` + if using default encryption. + + See + :attr:`google.cloud.bigquery.job.CopyJobConfig.destination_encryption_configuration`. + """ + return self._configuration.destination_encryption_configuration + + def to_api_repr(self): + """Generate a resource for :meth:`_begin`.""" + # Exclude statistics, if set. + return { + "jobReference": self._properties["jobReference"], + "configuration": self._properties["configuration"], + } + + @classmethod + def from_api_repr(cls, resource, client): + """Factory: construct a job given its API representation + + .. note: + + This method assumes that the project found in the resource matches + the client's project. + + Args: + resource (Dict): dataset job representation returned from the API + client (google.cloud.bigquery.client.Client): + Client which holds credentials and project + configuration for the dataset. + + Returns: + google.cloud.bigquery.job.CopyJob: Job parsed from ``resource``. + """ + cls._check_resource_config(resource) + job_ref = _JobReference._from_api_repr(resource["jobReference"]) + job = cls(job_ref, None, None, client=client) + job._set_properties(resource) + return job diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/job/extract.py b/packages/google-cloud-bigquery/google/cloud/bigquery/job/extract.py new file mode 100644 index 000000000000..a6e262a32cfe --- /dev/null +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/job/extract.py @@ -0,0 +1,266 @@ +# Copyright 2015 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Classes for extract (export) jobs.""" + +from google.cloud.bigquery import _helpers +from google.cloud.bigquery.model import ModelReference +from google.cloud.bigquery.table import Table +from google.cloud.bigquery.table import TableListItem +from google.cloud.bigquery.table import TableReference +from google.cloud.bigquery.job.base import _AsyncJob +from google.cloud.bigquery.job.base import _JobConfig +from google.cloud.bigquery.job.base import _JobReference + + +class ExtractJobConfig(_JobConfig): + """Configuration options for extract jobs. + + All properties in this class are optional. Values which are :data:`None` -> + server defaults. Set properties on the constructed configuration by using + the property name as the name of a keyword argument. + """ + + def __init__(self, **kwargs): + super(ExtractJobConfig, self).__init__("extract", **kwargs) + + @property + def compression(self): + """google.cloud.bigquery.job.Compression: Compression type to use for + exported files. + + See + https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationExtract.FIELDS.compression + """ + return self._get_sub_prop("compression") + + @compression.setter + def compression(self, value): + self._set_sub_prop("compression", value) + + @property + def destination_format(self): + """google.cloud.bigquery.job.DestinationFormat: Exported file format. + + See + https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationExtract.FIELDS.destination_format + """ + return self._get_sub_prop("destinationFormat") + + @destination_format.setter + def destination_format(self, value): + self._set_sub_prop("destinationFormat", value) + + @property + def field_delimiter(self): + """str: Delimiter to use between fields in the exported data. + + See + https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationExtract.FIELDS.field_delimiter + """ + return self._get_sub_prop("fieldDelimiter") + + @field_delimiter.setter + def field_delimiter(self, value): + self._set_sub_prop("fieldDelimiter", value) + + @property + def print_header(self): + """bool: Print a header row in the exported data. + + See + https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationExtract.FIELDS.print_header + """ + return self._get_sub_prop("printHeader") + + @print_header.setter + def print_header(self, value): + self._set_sub_prop("printHeader", value) + + @property + def use_avro_logical_types(self): + """bool: For loads of Avro data, governs whether Avro logical types are + converted to their corresponding BigQuery types (e.g. TIMESTAMP) rather than + raw types (e.g. INTEGER). + """ + return self._get_sub_prop("useAvroLogicalTypes") + + @use_avro_logical_types.setter + def use_avro_logical_types(self, value): + self._set_sub_prop("useAvroLogicalTypes", bool(value)) + + +class ExtractJob(_AsyncJob): + """Asynchronous job: extract data from a table into Cloud Storage. + + Args: + job_id (str): the job's ID. + + source (Union[ \ + google.cloud.bigquery.table.TableReference, \ + google.cloud.bigquery.model.ModelReference \ + ]): + Table or Model from which data is to be loaded or extracted. + + destination_uris (List[str]): + URIs describing where the extracted data will be written in Cloud + Storage, using the format ``gs:///``. + + client (google.cloud.bigquery.client.Client): + A client which holds credentials and project configuration. + + job_config (Optional[google.cloud.bigquery.job.ExtractJobConfig]): + Extra configuration options for the extract job. + """ + + _JOB_TYPE = "extract" + + def __init__(self, job_id, source, destination_uris, client, job_config=None): + super(ExtractJob, self).__init__(job_id, client) + + if job_config is None: + job_config = ExtractJobConfig() + + self._properties["configuration"] = job_config._properties + self._configuration = job_config + + if source: + source_ref = {"projectId": source.project, "datasetId": source.dataset_id} + + if isinstance(source, (Table, TableListItem, TableReference)): + source_ref["tableId"] = source.table_id + source_key = "sourceTable" + else: + source_ref["modelId"] = source.model_id + source_key = "sourceModel" + + _helpers._set_sub_prop( + self._properties, ["configuration", "extract", source_key], source_ref + ) + + if destination_uris: + _helpers._set_sub_prop( + self._properties, + ["configuration", "extract", "destinationUris"], + destination_uris, + ) + + @property + def source(self): + """Union[ \ + google.cloud.bigquery.table.TableReference, \ + google.cloud.bigquery.model.ModelReference \ + ]: Table or Model from which data is to be loaded or extracted. + """ + source_config = _helpers._get_sub_prop( + self._properties, ["configuration", "extract", "sourceTable"] + ) + if source_config: + return TableReference.from_api_repr(source_config) + else: + source_config = _helpers._get_sub_prop( + self._properties, ["configuration", "extract", "sourceModel"] + ) + return ModelReference.from_api_repr(source_config) + + @property + def destination_uris(self): + """List[str]: URIs describing where the extracted data will be + written in Cloud Storage, using the format + ``gs:///``. + """ + return _helpers._get_sub_prop( + self._properties, ["configuration", "extract", "destinationUris"] + ) + + @property + def compression(self): + """See + :attr:`google.cloud.bigquery.job.ExtractJobConfig.compression`. + """ + return self._configuration.compression + + @property + def destination_format(self): + """See + :attr:`google.cloud.bigquery.job.ExtractJobConfig.destination_format`. + """ + return self._configuration.destination_format + + @property + def field_delimiter(self): + """See + :attr:`google.cloud.bigquery.job.ExtractJobConfig.field_delimiter`. + """ + return self._configuration.field_delimiter + + @property + def print_header(self): + """See + :attr:`google.cloud.bigquery.job.ExtractJobConfig.print_header`. + """ + return self._configuration.print_header + + @property + def destination_uri_file_counts(self): + """Return file counts from job statistics, if present. + + See: + https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobStatistics4.FIELDS.destination_uri_file_counts + + Returns: + List[int]: + A list of integer counts, each representing the number of files + per destination URI or URI pattern specified in the extract + configuration. These values will be in the same order as the URIs + specified in the 'destinationUris' field. Returns None if job is + not yet complete. + """ + counts = self._job_statistics().get("destinationUriFileCounts") + if counts is not None: + return [int(count) for count in counts] + return None + + def to_api_repr(self): + """Generate a resource for :meth:`_begin`.""" + # Exclude statistics, if set. + return { + "jobReference": self._properties["jobReference"], + "configuration": self._properties["configuration"], + } + + @classmethod + def from_api_repr(cls, resource, client): + """Factory: construct a job given its API representation + + .. note: + + This method assumes that the project found in the resource matches + the client's project. + + Args: + resource (Dict): dataset job representation returned from the API + + client (google.cloud.bigquery.client.Client): + Client which holds credentials and project + configuration for the dataset. + + Returns: + google.cloud.bigquery.job.ExtractJob: Job parsed from ``resource``. + """ + cls._check_resource_config(resource) + job_ref = _JobReference._from_api_repr(resource["jobReference"]) + job = cls(job_ref, None, None, client=client) + job._set_properties(resource) + return job diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/job/load.py b/packages/google-cloud-bigquery/google/cloud/bigquery/job/load.py new file mode 100644 index 000000000000..e784af0a6bb8 --- /dev/null +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/job/load.py @@ -0,0 +1,758 @@ +# Copyright 2015 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Classes for load jobs.""" + +from google.cloud.bigquery.encryption_configuration import EncryptionConfiguration +from google.cloud.bigquery.external_config import HivePartitioningOptions +from google.cloud.bigquery import _helpers +from google.cloud.bigquery.schema import SchemaField +from google.cloud.bigquery.schema import _to_schema_fields +from google.cloud.bigquery.table import RangePartitioning +from google.cloud.bigquery.table import TableReference +from google.cloud.bigquery.table import TimePartitioning + +from google.cloud.bigquery.job.base import _AsyncJob +from google.cloud.bigquery.job.base import _JobConfig +from google.cloud.bigquery.job.base import _JobReference + + +class LoadJobConfig(_JobConfig): + """Configuration options for load jobs. + + All properties in this class are optional. Values which are :data:`None` -> + server defaults. Set properties on the constructed configuration by using + the property name as the name of a keyword argument. + """ + + def __init__(self, **kwargs): + super(LoadJobConfig, self).__init__("load", **kwargs) + + @property + def allow_jagged_rows(self): + """Optional[bool]: Allow missing trailing optional columns (CSV only). + + See: + https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationLoad.FIELDS.allow_jagged_rows + """ + return self._get_sub_prop("allowJaggedRows") + + @allow_jagged_rows.setter + def allow_jagged_rows(self, value): + self._set_sub_prop("allowJaggedRows", value) + + @property + def allow_quoted_newlines(self): + """Optional[bool]: Allow quoted data containing newline characters (CSV only). + + See: + https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationLoad.FIELDS.allow_quoted_newlines + """ + return self._get_sub_prop("allowQuotedNewlines") + + @allow_quoted_newlines.setter + def allow_quoted_newlines(self, value): + self._set_sub_prop("allowQuotedNewlines", value) + + @property + def autodetect(self): + """Optional[bool]: Automatically infer the schema from a sample of the data. + + See: + https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationLoad.FIELDS.autodetect + """ + return self._get_sub_prop("autodetect") + + @autodetect.setter + def autodetect(self, value): + self._set_sub_prop("autodetect", value) + + @property + def clustering_fields(self): + """Optional[List[str]]: Fields defining clustering for the table + + (Defaults to :data:`None`). + + Clustering fields are immutable after table creation. + + .. note:: + + BigQuery supports clustering for both partitioned and + non-partitioned tables. + """ + prop = self._get_sub_prop("clustering") + if prop is not None: + return list(prop.get("fields", ())) + + @clustering_fields.setter + def clustering_fields(self, value): + """Optional[List[str]]: Fields defining clustering for the table + + (Defaults to :data:`None`). + """ + if value is not None: + self._set_sub_prop("clustering", {"fields": value}) + else: + self._del_sub_prop("clustering") + + @property + def create_disposition(self): + """Optional[google.cloud.bigquery.job.CreateDisposition]: Specifies behavior + for creating tables. + + See: + https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationLoad.FIELDS.create_disposition + """ + return self._get_sub_prop("createDisposition") + + @create_disposition.setter + def create_disposition(self, value): + self._set_sub_prop("createDisposition", value) + + @property + def destination_encryption_configuration(self): + """Optional[google.cloud.bigquery.encryption_configuration.EncryptionConfiguration]: Custom + encryption configuration for the destination table. + + Custom encryption configuration (e.g., Cloud KMS keys) or :data:`None` + if using default encryption. + + See: + https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationLoad.FIELDS.destination_encryption_configuration + """ + prop = self._get_sub_prop("destinationEncryptionConfiguration") + if prop is not None: + prop = EncryptionConfiguration.from_api_repr(prop) + return prop + + @destination_encryption_configuration.setter + def destination_encryption_configuration(self, value): + api_repr = value + if value is not None: + api_repr = value.to_api_repr() + self._set_sub_prop("destinationEncryptionConfiguration", api_repr) + else: + self._del_sub_prop("destinationEncryptionConfiguration") + + @property + def destination_table_description(self): + """Optional[str]: Name given to destination table. + + See: + https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#DestinationTableProperties.FIELDS.description + """ + prop = self._get_sub_prop("destinationTableProperties") + if prop is not None: + return prop["description"] + + @destination_table_description.setter + def destination_table_description(self, value): + keys = [self._job_type, "destinationTableProperties", "description"] + if value is not None: + _helpers._set_sub_prop(self._properties, keys, value) + else: + _helpers._del_sub_prop(self._properties, keys) + + @property + def destination_table_friendly_name(self): + """Optional[str]: Name given to destination table. + + See: + https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#DestinationTableProperties.FIELDS.friendly_name + """ + prop = self._get_sub_prop("destinationTableProperties") + if prop is not None: + return prop["friendlyName"] + + @destination_table_friendly_name.setter + def destination_table_friendly_name(self, value): + keys = [self._job_type, "destinationTableProperties", "friendlyName"] + if value is not None: + _helpers._set_sub_prop(self._properties, keys, value) + else: + _helpers._del_sub_prop(self._properties, keys) + + @property + def encoding(self): + """Optional[google.cloud.bigquery.job.Encoding]: The character encoding of the + data. + + See: + https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationLoad.FIELDS.encoding + """ + return self._get_sub_prop("encoding") + + @encoding.setter + def encoding(self, value): + self._set_sub_prop("encoding", value) + + @property + def field_delimiter(self): + """Optional[str]: The separator for fields in a CSV file. + + See: + https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationLoad.FIELDS.field_delimiter + """ + return self._get_sub_prop("fieldDelimiter") + + @field_delimiter.setter + def field_delimiter(self, value): + self._set_sub_prop("fieldDelimiter", value) + + @property + def hive_partitioning(self): + """Optional[:class:`~.external_config.HivePartitioningOptions`]: [Beta] When set, \ + it configures hive partitioning support. + + .. note:: + **Experimental**. This feature is experimental and might change or + have limited support. + + See: + https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationLoad.FIELDS.hive_partitioning_options + """ + prop = self._get_sub_prop("hivePartitioningOptions") + if prop is None: + return None + return HivePartitioningOptions.from_api_repr(prop) + + @hive_partitioning.setter + def hive_partitioning(self, value): + if value is not None: + if isinstance(value, HivePartitioningOptions): + value = value.to_api_repr() + else: + raise TypeError("Expected a HivePartitioningOptions instance or None.") + + self._set_sub_prop("hivePartitioningOptions", value) + + @property + def ignore_unknown_values(self): + """Optional[bool]: Ignore extra values not represented in the table schema. + + See: + https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationLoad.FIELDS.ignore_unknown_values + """ + return self._get_sub_prop("ignoreUnknownValues") + + @ignore_unknown_values.setter + def ignore_unknown_values(self, value): + self._set_sub_prop("ignoreUnknownValues", value) + + @property + def max_bad_records(self): + """Optional[int]: Number of invalid rows to ignore. + + See: + https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationLoad.FIELDS.max_bad_records + """ + return _helpers._int_or_none(self._get_sub_prop("maxBadRecords")) + + @max_bad_records.setter + def max_bad_records(self, value): + self._set_sub_prop("maxBadRecords", value) + + @property + def null_marker(self): + """Optional[str]: Represents a null value (CSV only). + + See: + https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationLoad.FIELDS.null_marker + """ + return self._get_sub_prop("nullMarker") + + @null_marker.setter + def null_marker(self, value): + self._set_sub_prop("nullMarker", value) + + @property + def quote_character(self): + """Optional[str]: Character used to quote data sections (CSV only). + + See: + https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationLoad.FIELDS.quote + """ + return self._get_sub_prop("quote") + + @quote_character.setter + def quote_character(self, value): + self._set_sub_prop("quote", value) + + @property + def range_partitioning(self): + """Optional[google.cloud.bigquery.table.RangePartitioning]: + Configures range-based partitioning for destination table. + + .. note:: + **Beta**. The integer range partitioning feature is in a + pre-release state and might change or have limited support. + + Only specify at most one of + :attr:`~google.cloud.bigquery.job.LoadJobConfig.time_partitioning` or + :attr:`~google.cloud.bigquery.job.LoadJobConfig.range_partitioning`. + + Raises: + ValueError: + If the value is not + :class:`~google.cloud.bigquery.table.RangePartitioning` or + :data:`None`. + """ + resource = self._get_sub_prop("rangePartitioning") + if resource is not None: + return RangePartitioning(_properties=resource) + + @range_partitioning.setter + def range_partitioning(self, value): + resource = value + if isinstance(value, RangePartitioning): + resource = value._properties + elif value is not None: + raise ValueError( + "Expected value to be RangePartitioning or None, got {}.".format(value) + ) + self._set_sub_prop("rangePartitioning", resource) + + @property + def schema(self): + """Optional[Sequence[Union[ \ + :class:`~google.cloud.bigquery.schema.SchemaField`, \ + Mapping[str, Any] \ + ]]]: Schema of the destination table. + + See: + https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationLoad.FIELDS.schema + """ + schema = _helpers._get_sub_prop(self._properties, ["load", "schema", "fields"]) + if schema is None: + return + return [SchemaField.from_api_repr(field) for field in schema] + + @schema.setter + def schema(self, value): + if value is None: + self._del_sub_prop("schema") + return + + value = _to_schema_fields(value) + + _helpers._set_sub_prop( + self._properties, + ["load", "schema", "fields"], + [field.to_api_repr() for field in value], + ) + + @property + def schema_update_options(self): + """Optional[List[google.cloud.bigquery.job.SchemaUpdateOption]]: Specifies + updates to the destination table schema to allow as a side effect of + the load job. + """ + return self._get_sub_prop("schemaUpdateOptions") + + @schema_update_options.setter + def schema_update_options(self, values): + self._set_sub_prop("schemaUpdateOptions", values) + + @property + def skip_leading_rows(self): + """Optional[int]: Number of rows to skip when reading data (CSV only). + + See: + https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationLoad.FIELDS.skip_leading_rows + """ + return _helpers._int_or_none(self._get_sub_prop("skipLeadingRows")) + + @skip_leading_rows.setter + def skip_leading_rows(self, value): + self._set_sub_prop("skipLeadingRows", str(value)) + + @property + def source_format(self): + """Optional[google.cloud.bigquery.job.SourceFormat]: File format of the data. + + See: + https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationLoad.FIELDS.source_format + """ + return self._get_sub_prop("sourceFormat") + + @source_format.setter + def source_format(self, value): + self._set_sub_prop("sourceFormat", value) + + @property + def time_partitioning(self): + """Optional[google.cloud.bigquery.table.TimePartitioning]: Specifies time-based + partitioning for the destination table. + + Only specify at most one of + :attr:`~google.cloud.bigquery.job.LoadJobConfig.time_partitioning` or + :attr:`~google.cloud.bigquery.job.LoadJobConfig.range_partitioning`. + """ + prop = self._get_sub_prop("timePartitioning") + if prop is not None: + prop = TimePartitioning.from_api_repr(prop) + return prop + + @time_partitioning.setter + def time_partitioning(self, value): + api_repr = value + if value is not None: + api_repr = value.to_api_repr() + self._set_sub_prop("timePartitioning", api_repr) + else: + self._del_sub_prop("timePartitioning") + + @property + def use_avro_logical_types(self): + """Optional[bool]: For loads of Avro data, governs whether Avro logical types are + converted to their corresponding BigQuery types (e.g. TIMESTAMP) rather than + raw types (e.g. INTEGER). + """ + return self._get_sub_prop("useAvroLogicalTypes") + + @use_avro_logical_types.setter + def use_avro_logical_types(self, value): + self._set_sub_prop("useAvroLogicalTypes", bool(value)) + + @property + def write_disposition(self): + """Optional[google.cloud.bigquery.job.WriteDisposition]: Action that occurs if + the destination table already exists. + + See: + https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationLoad.FIELDS.write_disposition + """ + return self._get_sub_prop("writeDisposition") + + @write_disposition.setter + def write_disposition(self, value): + self._set_sub_prop("writeDisposition", value) + + +class LoadJob(_AsyncJob): + """Asynchronous job for loading data into a table. + + Can load from Google Cloud Storage URIs or from a file. + + Args: + job_id (str): the job's ID + + source_uris (Optional[Sequence[str]]): + URIs of one or more data files to be loaded. See + https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationLoad.FIELDS.source_uris + for supported URI formats. Pass None for jobs that load from a file. + + destination (google.cloud.bigquery.table.TableReference): reference to table into which data is to be loaded. + + client (google.cloud.bigquery.client.Client): + A client which holds credentials and project configuration + for the dataset (which requires a project). + """ + + _JOB_TYPE = "load" + + def __init__(self, job_id, source_uris, destination, client, job_config=None): + super(LoadJob, self).__init__(job_id, client) + + if not job_config: + job_config = LoadJobConfig() + + self._configuration = job_config + self._properties["configuration"] = job_config._properties + + if source_uris is not None: + _helpers._set_sub_prop( + self._properties, ["configuration", "load", "sourceUris"], source_uris + ) + + if destination is not None: + _helpers._set_sub_prop( + self._properties, + ["configuration", "load", "destinationTable"], + destination.to_api_repr(), + ) + + @property + def destination(self): + """google.cloud.bigquery.table.TableReference: table where loaded rows are written + + See: + https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationLoad.FIELDS.destination_table + """ + dest_config = _helpers._get_sub_prop( + self._properties, ["configuration", "load", "destinationTable"] + ) + return TableReference.from_api_repr(dest_config) + + @property + def source_uris(self): + """Optional[Sequence[str]]: URIs of data files to be loaded. See + https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationLoad.FIELDS.source_uris + for supported URI formats. None for jobs that load from a file. + """ + return _helpers._get_sub_prop( + self._properties, ["configuration", "load", "sourceUris"] + ) + + @property + def allow_jagged_rows(self): + """See + :attr:`google.cloud.bigquery.job.LoadJobConfig.allow_jagged_rows`. + """ + return self._configuration.allow_jagged_rows + + @property + def allow_quoted_newlines(self): + """See + :attr:`google.cloud.bigquery.job.LoadJobConfig.allow_quoted_newlines`. + """ + return self._configuration.allow_quoted_newlines + + @property + def autodetect(self): + """See + :attr:`google.cloud.bigquery.job.LoadJobConfig.autodetect`. + """ + return self._configuration.autodetect + + @property + def create_disposition(self): + """See + :attr:`google.cloud.bigquery.job.LoadJobConfig.create_disposition`. + """ + return self._configuration.create_disposition + + @property + def encoding(self): + """See + :attr:`google.cloud.bigquery.job.LoadJobConfig.encoding`. + """ + return self._configuration.encoding + + @property + def field_delimiter(self): + """See + :attr:`google.cloud.bigquery.job.LoadJobConfig.field_delimiter`. + """ + return self._configuration.field_delimiter + + @property + def ignore_unknown_values(self): + """See + :attr:`google.cloud.bigquery.job.LoadJobConfig.ignore_unknown_values`. + """ + return self._configuration.ignore_unknown_values + + @property + def max_bad_records(self): + """See + :attr:`google.cloud.bigquery.job.LoadJobConfig.max_bad_records`. + """ + return self._configuration.max_bad_records + + @property + def null_marker(self): + """See + :attr:`google.cloud.bigquery.job.LoadJobConfig.null_marker`. + """ + return self._configuration.null_marker + + @property + def quote_character(self): + """See + :attr:`google.cloud.bigquery.job.LoadJobConfig.quote_character`. + """ + return self._configuration.quote_character + + @property + def skip_leading_rows(self): + """See + :attr:`google.cloud.bigquery.job.LoadJobConfig.skip_leading_rows`. + """ + return self._configuration.skip_leading_rows + + @property + def source_format(self): + """See + :attr:`google.cloud.bigquery.job.LoadJobConfig.source_format`. + """ + return self._configuration.source_format + + @property + def write_disposition(self): + """See + :attr:`google.cloud.bigquery.job.LoadJobConfig.write_disposition`. + """ + return self._configuration.write_disposition + + @property + def schema(self): + """See + :attr:`google.cloud.bigquery.job.LoadJobConfig.schema`. + """ + return self._configuration.schema + + @property + def destination_encryption_configuration(self): + """google.cloud.bigquery.encryption_configuration.EncryptionConfiguration: Custom + encryption configuration for the destination table. + + Custom encryption configuration (e.g., Cloud KMS keys) + or :data:`None` if using default encryption. + + See + :attr:`google.cloud.bigquery.job.LoadJobConfig.destination_encryption_configuration`. + """ + return self._configuration.destination_encryption_configuration + + @property + def destination_table_description(self): + """Optional[str] name given to destination table. + + See: + https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#DestinationTableProperties.FIELDS.description + """ + return self._configuration.destination_table_description + + @property + def destination_table_friendly_name(self): + """Optional[str] name given to destination table. + + See: + https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#DestinationTableProperties.FIELDS.friendly_name + """ + return self._configuration.destination_table_friendly_name + + @property + def range_partitioning(self): + """See + :attr:`google.cloud.bigquery.job.LoadJobConfig.range_partitioning`. + """ + return self._configuration.range_partitioning + + @property + def time_partitioning(self): + """See + :attr:`google.cloud.bigquery.job.LoadJobConfig.time_partitioning`. + """ + return self._configuration.time_partitioning + + @property + def use_avro_logical_types(self): + """See + :attr:`google.cloud.bigquery.job.LoadJobConfig.use_avro_logical_types`. + """ + return self._configuration.use_avro_logical_types + + @property + def clustering_fields(self): + """See + :attr:`google.cloud.bigquery.job.LoadJobConfig.clustering_fields`. + """ + return self._configuration.clustering_fields + + @property + def schema_update_options(self): + """See + :attr:`google.cloud.bigquery.job.LoadJobConfig.schema_update_options`. + """ + return self._configuration.schema_update_options + + @property + def input_file_bytes(self): + """Count of bytes loaded from source files. + + Returns: + Optional[int]: the count (None until set from the server). + + Raises: + ValueError: for invalid value types. + """ + return _helpers._int_or_none( + _helpers._get_sub_prop( + self._properties, ["statistics", "load", "inputFileBytes"] + ) + ) + + @property + def input_files(self): + """Count of source files. + + Returns: + Optional[int]: the count (None until set from the server). + """ + return _helpers._int_or_none( + _helpers._get_sub_prop( + self._properties, ["statistics", "load", "inputFiles"] + ) + ) + + @property + def output_bytes(self): + """Count of bytes saved to destination table. + + Returns: + Optional[int]: the count (None until set from the server). + """ + return _helpers._int_or_none( + _helpers._get_sub_prop( + self._properties, ["statistics", "load", "outputBytes"] + ) + ) + + @property + def output_rows(self): + """Count of rows saved to destination table. + + Returns: + Optional[int]: the count (None until set from the server). + """ + return _helpers._int_or_none( + _helpers._get_sub_prop( + self._properties, ["statistics", "load", "outputRows"] + ) + ) + + def to_api_repr(self): + """Generate a resource for :meth:`_begin`.""" + # Exclude statistics, if set. + return { + "jobReference": self._properties["jobReference"], + "configuration": self._properties["configuration"], + } + + @classmethod + def from_api_repr(cls, resource, client): + """Factory: construct a job given its API representation + + .. note: + + This method assumes that the project found in the resource matches + the client's project. + + Args: + resource (Dict): dataset job representation returned from the API + + client (google.cloud.bigquery.client.Client): + Client which holds credentials and project + configuration for the dataset. + + Returns: + google.cloud.bigquery.job.LoadJob: Job parsed from ``resource``. + """ + cls._check_resource_config(resource) + job_ref = _JobReference._from_api_repr(resource["jobReference"]) + job = cls(job_ref, None, None, client) + job._set_properties(resource) + return job diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/job/query.py b/packages/google-cloud-bigquery/google/cloud/bigquery/job/query.py new file mode 100644 index 000000000000..e25077360da5 --- /dev/null +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/job/query.py @@ -0,0 +1,1644 @@ +# Copyright 2015 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Classes for query jobs.""" + +import concurrent.futures +import copy +import re + +from google.api_core import exceptions +import requests +import six + +from google.cloud.bigquery.dataset import Dataset +from google.cloud.bigquery.dataset import DatasetListItem +from google.cloud.bigquery.dataset import DatasetReference +from google.cloud.bigquery.encryption_configuration import EncryptionConfiguration +from google.cloud.bigquery.external_config import ExternalConfig +from google.cloud.bigquery import _helpers +from google.cloud.bigquery.query import _query_param_from_api_repr +from google.cloud.bigquery.query import ArrayQueryParameter +from google.cloud.bigquery.query import ScalarQueryParameter +from google.cloud.bigquery.query import StructQueryParameter +from google.cloud.bigquery.query import UDFResource +from google.cloud.bigquery.retry import DEFAULT_RETRY +from google.cloud.bigquery.routine import RoutineReference +from google.cloud.bigquery.table import _EmptyRowIterator +from google.cloud.bigquery.table import RangePartitioning +from google.cloud.bigquery.table import _table_arg_to_table_ref +from google.cloud.bigquery.table import Table +from google.cloud.bigquery.table import TableReference +from google.cloud.bigquery.table import TimePartitioning + +from google.cloud.bigquery.job.base import _AsyncJob +from google.cloud.bigquery.job.base import _DONE_STATE +from google.cloud.bigquery.job.base import _JobConfig +from google.cloud.bigquery.job.base import _JobReference + + +_CONTAINS_ORDER_BY = re.compile(r"ORDER\s+BY", re.IGNORECASE) +_TIMEOUT_BUFFER_SECS = 0.1 + + +def _contains_order_by(query): + """Do we need to preserve the order of the query results? + + This function has known false positives, such as with ordered window + functions: + + .. code-block:: sql + + SELECT SUM(x) OVER ( + window_name + PARTITION BY... + ORDER BY... + window_frame_clause) + FROM ... + + This false positive failure case means the behavior will be correct, but + downloading results with the BigQuery Storage API may be slower than it + otherwise would. This is preferable to the false negative case, where + results are expected to be in order but are not (due to parallel reads). + """ + return query and _CONTAINS_ORDER_BY.search(query) + + +def _from_api_repr_query_parameters(resource): + return [_query_param_from_api_repr(mapping) for mapping in resource] + + +def _to_api_repr_query_parameters(value): + return [query_parameter.to_api_repr() for query_parameter in value] + + +def _from_api_repr_udf_resources(resource): + udf_resources = [] + for udf_mapping in resource: + for udf_type, udf_value in udf_mapping.items(): + udf_resources.append(UDFResource(udf_type, udf_value)) + return udf_resources + + +def _to_api_repr_udf_resources(value): + return [{udf_resource.udf_type: udf_resource.value} for udf_resource in value] + + +def _from_api_repr_table_defs(resource): + return {k: ExternalConfig.from_api_repr(v) for k, v in resource.items()} + + +def _to_api_repr_table_defs(value): + return {k: ExternalConfig.to_api_repr(v) for k, v in value.items()} + + +class QueryJobConfig(_JobConfig): + """Configuration options for query jobs. + + All properties in this class are optional. Values which are :data:`None` -> + server defaults. Set properties on the constructed configuration by using + the property name as the name of a keyword argument. + """ + + def __init__(self, **kwargs): + super(QueryJobConfig, self).__init__("query", **kwargs) + + @property + def destination_encryption_configuration(self): + """google.cloud.bigquery.encryption_configuration.EncryptionConfiguration: Custom + encryption configuration for the destination table. + + Custom encryption configuration (e.g., Cloud KMS keys) or :data:`None` + if using default encryption. + + See + https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationQuery.FIELDS.destination_encryption_configuration + """ + prop = self._get_sub_prop("destinationEncryptionConfiguration") + if prop is not None: + prop = EncryptionConfiguration.from_api_repr(prop) + return prop + + @destination_encryption_configuration.setter + def destination_encryption_configuration(self, value): + api_repr = value + if value is not None: + api_repr = value.to_api_repr() + self._set_sub_prop("destinationEncryptionConfiguration", api_repr) + + @property + def allow_large_results(self): + """bool: Allow large query results tables (legacy SQL, only) + + See + https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationQuery.FIELDS.allow_large_results + """ + return self._get_sub_prop("allowLargeResults") + + @allow_large_results.setter + def allow_large_results(self, value): + self._set_sub_prop("allowLargeResults", value) + + @property + def create_disposition(self): + """google.cloud.bigquery.job.CreateDisposition: Specifies behavior + for creating tables. + + See + https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationQuery.FIELDS.create_disposition + """ + return self._get_sub_prop("createDisposition") + + @create_disposition.setter + def create_disposition(self, value): + self._set_sub_prop("createDisposition", value) + + @property + def default_dataset(self): + """google.cloud.bigquery.dataset.DatasetReference: the default dataset + to use for unqualified table names in the query or :data:`None` if not + set. + + The ``default_dataset`` setter accepts: + + - a :class:`~google.cloud.bigquery.dataset.Dataset`, or + - a :class:`~google.cloud.bigquery.dataset.DatasetReference`, or + - a :class:`str` of the fully-qualified dataset ID in standard SQL + format. The value must included a project ID and dataset ID + separated by ``.``. For example: ``your-project.your_dataset``. + + See + https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationQuery.FIELDS.default_dataset + """ + prop = self._get_sub_prop("defaultDataset") + if prop is not None: + prop = DatasetReference.from_api_repr(prop) + return prop + + @default_dataset.setter + def default_dataset(self, value): + if value is None: + self._set_sub_prop("defaultDataset", None) + return + + if isinstance(value, six.string_types): + value = DatasetReference.from_string(value) + + if isinstance(value, (Dataset, DatasetListItem)): + value = value.reference + + resource = value.to_api_repr() + self._set_sub_prop("defaultDataset", resource) + + @property + def destination(self): + """google.cloud.bigquery.table.TableReference: table where results are + written or :data:`None` if not set. + + The ``destination`` setter accepts: + + - a :class:`~google.cloud.bigquery.table.Table`, or + - a :class:`~google.cloud.bigquery.table.TableReference`, or + - a :class:`str` of the fully-qualified table ID in standard SQL + format. The value must included a project ID, dataset ID, and table + ID, each separated by ``.``. For example: + ``your-project.your_dataset.your_table``. + + See + https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationQuery.FIELDS.destination_table + """ + prop = self._get_sub_prop("destinationTable") + if prop is not None: + prop = TableReference.from_api_repr(prop) + return prop + + @destination.setter + def destination(self, value): + if value is None: + self._set_sub_prop("destinationTable", None) + return + + value = _table_arg_to_table_ref(value) + resource = value.to_api_repr() + self._set_sub_prop("destinationTable", resource) + + @property + def dry_run(self): + """bool: :data:`True` if this query should be a dry run to estimate + costs. + + See + https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfiguration.FIELDS.dry_run + """ + return self._properties.get("dryRun") + + @dry_run.setter + def dry_run(self, value): + self._properties["dryRun"] = value + + @property + def flatten_results(self): + """bool: Flatten nested/repeated fields in results. (Legacy SQL only) + + See + https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationQuery.FIELDS.flatten_results + """ + return self._get_sub_prop("flattenResults") + + @flatten_results.setter + def flatten_results(self, value): + self._set_sub_prop("flattenResults", value) + + @property + def maximum_billing_tier(self): + """int: Deprecated. Changes the billing tier to allow high-compute + queries. + + See + https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationQuery.FIELDS.maximum_billing_tier + """ + return self._get_sub_prop("maximumBillingTier") + + @maximum_billing_tier.setter + def maximum_billing_tier(self, value): + self._set_sub_prop("maximumBillingTier", value) + + @property + def maximum_bytes_billed(self): + """int: Maximum bytes to be billed for this job or :data:`None` if not set. + + See + https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationQuery.FIELDS.maximum_bytes_billed + """ + return _helpers._int_or_none(self._get_sub_prop("maximumBytesBilled")) + + @maximum_bytes_billed.setter + def maximum_bytes_billed(self, value): + self._set_sub_prop("maximumBytesBilled", str(value)) + + @property + def priority(self): + """google.cloud.bigquery.job.QueryPriority: Priority of the query. + + See + https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationQuery.FIELDS.priority + """ + return self._get_sub_prop("priority") + + @priority.setter + def priority(self, value): + self._set_sub_prop("priority", value) + + @property + def query_parameters(self): + """List[Union[google.cloud.bigquery.query.ArrayQueryParameter, \ + google.cloud.bigquery.query.ScalarQueryParameter, \ + google.cloud.bigquery.query.StructQueryParameter]]: list of parameters + for parameterized query (empty by default) + + See: + https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationQuery.FIELDS.query_parameters + """ + prop = self._get_sub_prop("queryParameters", default=[]) + return _from_api_repr_query_parameters(prop) + + @query_parameters.setter + def query_parameters(self, values): + self._set_sub_prop("queryParameters", _to_api_repr_query_parameters(values)) + + @property + def range_partitioning(self): + """Optional[google.cloud.bigquery.table.RangePartitioning]: + Configures range-based partitioning for destination table. + + .. note:: + **Beta**. The integer range partitioning feature is in a + pre-release state and might change or have limited support. + + Only specify at most one of + :attr:`~google.cloud.bigquery.job.LoadJobConfig.time_partitioning` or + :attr:`~google.cloud.bigquery.job.LoadJobConfig.range_partitioning`. + + Raises: + ValueError: + If the value is not + :class:`~google.cloud.bigquery.table.RangePartitioning` or + :data:`None`. + """ + resource = self._get_sub_prop("rangePartitioning") + if resource is not None: + return RangePartitioning(_properties=resource) + + @range_partitioning.setter + def range_partitioning(self, value): + resource = value + if isinstance(value, RangePartitioning): + resource = value._properties + elif value is not None: + raise ValueError( + "Expected value to be RangePartitioning or None, got {}.".format(value) + ) + self._set_sub_prop("rangePartitioning", resource) + + @property + def udf_resources(self): + """List[google.cloud.bigquery.query.UDFResource]: user + defined function resources (empty by default) + + See: + https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationQuery.FIELDS.user_defined_function_resources + """ + prop = self._get_sub_prop("userDefinedFunctionResources", default=[]) + return _from_api_repr_udf_resources(prop) + + @udf_resources.setter + def udf_resources(self, values): + self._set_sub_prop( + "userDefinedFunctionResources", _to_api_repr_udf_resources(values) + ) + + @property + def use_legacy_sql(self): + """bool: Use legacy SQL syntax. + + See + https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationQuery.FIELDS.use_legacy_sql + """ + return self._get_sub_prop("useLegacySql") + + @use_legacy_sql.setter + def use_legacy_sql(self, value): + self._set_sub_prop("useLegacySql", value) + + @property + def use_query_cache(self): + """bool: Look for the query result in the cache. + + See + https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationQuery.FIELDS.use_query_cache + """ + return self._get_sub_prop("useQueryCache") + + @use_query_cache.setter + def use_query_cache(self, value): + self._set_sub_prop("useQueryCache", value) + + @property + def write_disposition(self): + """google.cloud.bigquery.job.WriteDisposition: Action that occurs if + the destination table already exists. + + See + https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationQuery.FIELDS.write_disposition + """ + return self._get_sub_prop("writeDisposition") + + @write_disposition.setter + def write_disposition(self, value): + self._set_sub_prop("writeDisposition", value) + + @property + def table_definitions(self): + """Dict[str, google.cloud.bigquery.external_config.ExternalConfig]: + Definitions for external tables or :data:`None` if not set. + + See + https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationQuery.FIELDS.external_table_definitions + """ + prop = self._get_sub_prop("tableDefinitions") + if prop is not None: + prop = _from_api_repr_table_defs(prop) + return prop + + @table_definitions.setter + def table_definitions(self, values): + self._set_sub_prop("tableDefinitions", _to_api_repr_table_defs(values)) + + @property + def time_partitioning(self): + """Optional[google.cloud.bigquery.table.TimePartitioning]: Specifies + time-based partitioning for the destination table. + + Only specify at most one of + :attr:`~google.cloud.bigquery.job.LoadJobConfig.time_partitioning` or + :attr:`~google.cloud.bigquery.job.LoadJobConfig.range_partitioning`. + + Raises: + ValueError: + If the value is not + :class:`~google.cloud.bigquery.table.TimePartitioning` or + :data:`None`. + """ + prop = self._get_sub_prop("timePartitioning") + if prop is not None: + prop = TimePartitioning.from_api_repr(prop) + return prop + + @time_partitioning.setter + def time_partitioning(self, value): + api_repr = value + if value is not None: + api_repr = value.to_api_repr() + self._set_sub_prop("timePartitioning", api_repr) + + @property + def clustering_fields(self): + """Optional[List[str]]: Fields defining clustering for the table + + (Defaults to :data:`None`). + + Clustering fields are immutable after table creation. + + .. note:: + + BigQuery supports clustering for both partitioned and + non-partitioned tables. + """ + prop = self._get_sub_prop("clustering") + if prop is not None: + return list(prop.get("fields", ())) + + @clustering_fields.setter + def clustering_fields(self, value): + """Optional[List[str]]: Fields defining clustering for the table + + (Defaults to :data:`None`). + """ + if value is not None: + self._set_sub_prop("clustering", {"fields": value}) + else: + self._del_sub_prop("clustering") + + @property + def schema_update_options(self): + """List[google.cloud.bigquery.job.SchemaUpdateOption]: Specifies + updates to the destination table schema to allow as a side effect of + the query job. + """ + return self._get_sub_prop("schemaUpdateOptions") + + @schema_update_options.setter + def schema_update_options(self, values): + self._set_sub_prop("schemaUpdateOptions", values) + + def to_api_repr(self): + """Build an API representation of the query job config. + + Returns: + Dict: A dictionary in the format used by the BigQuery API. + """ + resource = copy.deepcopy(self._properties) + + # Query parameters have an addition property associated with them + # to indicate if the query is using named or positional parameters. + query_parameters = resource["query"].get("queryParameters") + if query_parameters: + if query_parameters[0].get("name") is None: + resource["query"]["parameterMode"] = "POSITIONAL" + else: + resource["query"]["parameterMode"] = "NAMED" + + return resource + + +class QueryJob(_AsyncJob): + """Asynchronous job: query tables. + + Args: + job_id (str): the job's ID, within the project belonging to ``client``. + + query (str): SQL query string. + + client (google.cloud.bigquery.client.Client): + A client which holds credentials and project configuration + for the dataset (which requires a project). + + job_config (Optional[google.cloud.bigquery.job.QueryJobConfig]): + Extra configuration options for the query job. + """ + + _JOB_TYPE = "query" + _UDF_KEY = "userDefinedFunctionResources" + + def __init__(self, job_id, query, client, job_config=None): + super(QueryJob, self).__init__(job_id, client) + + if job_config is None: + job_config = QueryJobConfig() + if job_config.use_legacy_sql is None: + job_config.use_legacy_sql = False + + self._properties["configuration"] = job_config._properties + self._configuration = job_config + + if query: + _helpers._set_sub_prop( + self._properties, ["configuration", "query", "query"], query + ) + + self._query_results = None + self._done_timeout = None + self._transport_timeout = None + + @property + def allow_large_results(self): + """See + :attr:`google.cloud.bigquery.job.QueryJobConfig.allow_large_results`. + """ + return self._configuration.allow_large_results + + @property + def create_disposition(self): + """See + :attr:`google.cloud.bigquery.job.QueryJobConfig.create_disposition`. + """ + return self._configuration.create_disposition + + @property + def default_dataset(self): + """See + :attr:`google.cloud.bigquery.job.QueryJobConfig.default_dataset`. + """ + return self._configuration.default_dataset + + @property + def destination(self): + """See + :attr:`google.cloud.bigquery.job.QueryJobConfig.destination`. + """ + return self._configuration.destination + + @property + def destination_encryption_configuration(self): + """google.cloud.bigquery.encryption_configuration.EncryptionConfiguration: Custom + encryption configuration for the destination table. + + Custom encryption configuration (e.g., Cloud KMS keys) or :data:`None` + if using default encryption. + + See + :attr:`google.cloud.bigquery.job.QueryJobConfig.destination_encryption_configuration`. + """ + return self._configuration.destination_encryption_configuration + + @property + def dry_run(self): + """See + :attr:`google.cloud.bigquery.job.QueryJobConfig.dry_run`. + """ + return self._configuration.dry_run + + @property + def flatten_results(self): + """See + :attr:`google.cloud.bigquery.job.QueryJobConfig.flatten_results`. + """ + return self._configuration.flatten_results + + @property + def priority(self): + """See + :attr:`google.cloud.bigquery.job.QueryJobConfig.priority`. + """ + return self._configuration.priority + + @property + def query(self): + """str: The query text used in this query job. + + See: + https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationQuery.FIELDS.query + """ + return _helpers._get_sub_prop( + self._properties, ["configuration", "query", "query"] + ) + + @property + def query_parameters(self): + """See + :attr:`google.cloud.bigquery.job.QueryJobConfig.query_parameters`. + """ + return self._configuration.query_parameters + + @property + def udf_resources(self): + """See + :attr:`google.cloud.bigquery.job.QueryJobConfig.udf_resources`. + """ + return self._configuration.udf_resources + + @property + def use_legacy_sql(self): + """See + :attr:`google.cloud.bigquery.job.QueryJobConfig.use_legacy_sql`. + """ + return self._configuration.use_legacy_sql + + @property + def use_query_cache(self): + """See + :attr:`google.cloud.bigquery.job.QueryJobConfig.use_query_cache`. + """ + return self._configuration.use_query_cache + + @property + def write_disposition(self): + """See + :attr:`google.cloud.bigquery.job.QueryJobConfig.write_disposition`. + """ + return self._configuration.write_disposition + + @property + def maximum_billing_tier(self): + """See + :attr:`google.cloud.bigquery.job.QueryJobConfig.maximum_billing_tier`. + """ + return self._configuration.maximum_billing_tier + + @property + def maximum_bytes_billed(self): + """See + :attr:`google.cloud.bigquery.job.QueryJobConfig.maximum_bytes_billed`. + """ + return self._configuration.maximum_bytes_billed + + @property + def range_partitioning(self): + """See + :attr:`google.cloud.bigquery.job.QueryJobConfig.range_partitioning`. + """ + return self._configuration.range_partitioning + + @property + def table_definitions(self): + """See + :attr:`google.cloud.bigquery.job.QueryJobConfig.table_definitions`. + """ + return self._configuration.table_definitions + + @property + def time_partitioning(self): + """See + :attr:`google.cloud.bigquery.job.QueryJobConfig.time_partitioning`. + """ + return self._configuration.time_partitioning + + @property + def clustering_fields(self): + """See + :attr:`google.cloud.bigquery.job.QueryJobConfig.clustering_fields`. + """ + return self._configuration.clustering_fields + + @property + def schema_update_options(self): + """See + :attr:`google.cloud.bigquery.job.QueryJobConfig.schema_update_options`. + """ + return self._configuration.schema_update_options + + def to_api_repr(self): + """Generate a resource for :meth:`_begin`.""" + # Use to_api_repr to allow for some configuration properties to be set + # automatically. + configuration = self._configuration.to_api_repr() + return { + "jobReference": self._properties["jobReference"], + "configuration": configuration, + } + + @classmethod + def from_api_repr(cls, resource, client): + """Factory: construct a job given its API representation + + Args: + resource (Dict): dataset job representation returned from the API + + client (google.cloud.bigquery.client.Client): + Client which holds credentials and project + configuration for the dataset. + + Returns: + google.cloud.bigquery.job.QueryJob: Job parsed from ``resource``. + """ + cls._check_resource_config(resource) + job_ref = _JobReference._from_api_repr(resource["jobReference"]) + job = cls(job_ref, None, client=client) + job._set_properties(resource) + return job + + @property + def query_plan(self): + """Return query plan from job statistics, if present. + + See: + https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobStatistics2.FIELDS.query_plan + + Returns: + List[google.cloud.bigquery.job.QueryPlanEntry]: + mappings describing the query plan, or an empty list + if the query has not yet completed. + """ + plan_entries = self._job_statistics().get("queryPlan", ()) + return [QueryPlanEntry.from_api_repr(entry) for entry in plan_entries] + + @property + def timeline(self): + """List(TimelineEntry): Return the query execution timeline + from job statistics. + """ + raw = self._job_statistics().get("timeline", ()) + return [TimelineEntry.from_api_repr(entry) for entry in raw] + + @property + def total_bytes_processed(self): + """Return total bytes processed from job statistics, if present. + + See: + https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobStatistics2.FIELDS.total_bytes_processed + + Returns: + Optional[int]: + Total bytes processed by the job, or None if job is not + yet complete. + """ + result = self._job_statistics().get("totalBytesProcessed") + if result is not None: + result = int(result) + return result + + @property + def total_bytes_billed(self): + """Return total bytes billed from job statistics, if present. + + See: + https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobStatistics2.FIELDS.total_bytes_billed + + Returns: + Optional[int]: + Total bytes processed by the job, or None if job is not + yet complete. + """ + result = self._job_statistics().get("totalBytesBilled") + if result is not None: + result = int(result) + return result + + @property + def billing_tier(self): + """Return billing tier from job statistics, if present. + + See: + https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobStatistics2.FIELDS.billing_tier + + Returns: + Optional[int]: + Billing tier used by the job, or None if job is not + yet complete. + """ + return self._job_statistics().get("billingTier") + + @property + def cache_hit(self): + """Return whether or not query results were served from cache. + + See: + https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobStatistics2.FIELDS.cache_hit + + Returns: + Optional[bool]: + whether the query results were returned from cache, or None + if job is not yet complete. + """ + return self._job_statistics().get("cacheHit") + + @property + def ddl_operation_performed(self): + """Optional[str]: Return the DDL operation performed. + + See: + https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobStatistics2.FIELDS.ddl_operation_performed + + """ + return self._job_statistics().get("ddlOperationPerformed") + + @property + def ddl_target_routine(self): + """Optional[google.cloud.bigquery.routine.RoutineReference]: Return the DDL target routine, present + for CREATE/DROP FUNCTION/PROCEDURE queries. + + See: + https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobStatistics2.FIELDS.ddl_target_routine + """ + prop = self._job_statistics().get("ddlTargetRoutine") + if prop is not None: + prop = RoutineReference.from_api_repr(prop) + return prop + + @property + def ddl_target_table(self): + """Optional[google.cloud.bigquery.table.TableReference]: Return the DDL target table, present + for CREATE/DROP TABLE/VIEW queries. + + See: + https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobStatistics2.FIELDS.ddl_target_table + """ + prop = self._job_statistics().get("ddlTargetTable") + if prop is not None: + prop = TableReference.from_api_repr(prop) + return prop + + @property + def num_dml_affected_rows(self): + """Return the number of DML rows affected by the job. + + See: + https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobStatistics2.FIELDS.num_dml_affected_rows + + Returns: + Optional[int]: + number of DML rows affected by the job, or None if job is not + yet complete. + """ + result = self._job_statistics().get("numDmlAffectedRows") + if result is not None: + result = int(result) + return result + + @property + def slot_millis(self): + """Union[int, None]: Slot-milliseconds used by this query job.""" + return _helpers._int_or_none(self._job_statistics().get("totalSlotMs")) + + @property + def statement_type(self): + """Return statement type from job statistics, if present. + + See: + https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobStatistics2.FIELDS.statement_type + + Returns: + Optional[str]: + type of statement used by the job, or None if job is not + yet complete. + """ + return self._job_statistics().get("statementType") + + @property + def referenced_tables(self): + """Return referenced tables from job statistics, if present. + + See: + https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobStatistics2.FIELDS.referenced_tables + + Returns: + List[Dict]: + mappings describing the query plan, or an empty list + if the query has not yet completed. + """ + tables = [] + datasets_by_project_name = {} + + for table in self._job_statistics().get("referencedTables", ()): + + t_project = table["projectId"] + + ds_id = table["datasetId"] + t_dataset = datasets_by_project_name.get((t_project, ds_id)) + if t_dataset is None: + t_dataset = DatasetReference(t_project, ds_id) + datasets_by_project_name[(t_project, ds_id)] = t_dataset + + t_name = table["tableId"] + tables.append(t_dataset.table(t_name)) + + return tables + + @property + def undeclared_query_parameters(self): + """Return undeclared query parameters from job statistics, if present. + + See: + https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobStatistics2.FIELDS.undeclared_query_parameters + + Returns: + List[Union[ \ + google.cloud.bigquery.query.ArrayQueryParameter, \ + google.cloud.bigquery.query.ScalarQueryParameter, \ + google.cloud.bigquery.query.StructQueryParameter \ + ]]: + Undeclared parameters, or an empty list if the query has + not yet completed. + """ + parameters = [] + undeclared = self._job_statistics().get("undeclaredQueryParameters", ()) + + for parameter in undeclared: + p_type = parameter["parameterType"] + + if "arrayType" in p_type: + klass = ArrayQueryParameter + elif "structTypes" in p_type: + klass = StructQueryParameter + else: + klass = ScalarQueryParameter + + parameters.append(klass.from_api_repr(parameter)) + + return parameters + + @property + def estimated_bytes_processed(self): + """Return the estimated number of bytes processed by the query. + + See: + https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobStatistics2.FIELDS.estimated_bytes_processed + + Returns: + Optional[int]: + number of DML rows affected by the job, or None if job is not + yet complete. + """ + result = self._job_statistics().get("estimatedBytesProcessed") + if result is not None: + result = int(result) + return result + + def done(self, retry=DEFAULT_RETRY, timeout=None, reload=True): + """Refresh the job and checks if it is complete. + + Args: + retry (Optional[google.api_core.retry.Retry]): + How to retry the call that retrieves query results. + timeout (Optional[float]): + The number of seconds to wait for the underlying HTTP transport + before using ``retry``. + reload (Optional[bool]): + If ``True``, make an API call to refresh the job state of + unfinished jobs before checking. Default ``True``. + + Returns: + bool: True if the job is complete, False otherwise. + """ + is_done = ( + # Only consider a QueryJob complete when we know we have the final + # query results available. + self._query_results is not None + and self._query_results.complete + and self.state == _DONE_STATE + ) + # Do not refresh if the state is already done, as the job will not + # change once complete. + if not reload or is_done: + return is_done + + # Since the API to getQueryResults can hang up to the timeout value + # (default of 10 seconds), set the timeout parameter to ensure that + # the timeout from the futures API is respected. See: + # https://github.com/GoogleCloudPlatform/google-cloud-python/issues/4135 + timeout_ms = None + if self._done_timeout is not None: + # Subtract a buffer for context switching, network latency, etc. + api_timeout = self._done_timeout - _TIMEOUT_BUFFER_SECS + api_timeout = max(min(api_timeout, 10), 0) + self._done_timeout -= api_timeout + self._done_timeout = max(0, self._done_timeout) + timeout_ms = int(api_timeout * 1000) + + # If an explicit timeout is not given, fall back to the transport timeout + # stored in _blocking_poll() in the process of polling for job completion. + transport_timeout = timeout if timeout is not None else self._transport_timeout + + self._query_results = self._client._get_query_results( + self.job_id, + retry, + project=self.project, + timeout_ms=timeout_ms, + location=self.location, + timeout=transport_timeout, + ) + + # Only reload the job once we know the query is complete. + # This will ensure that fields such as the destination table are + # correctly populated. + if self._query_results.complete and self.state != _DONE_STATE: + self.reload(retry=retry, timeout=transport_timeout) + + return self.state == _DONE_STATE + + def _blocking_poll(self, timeout=None, **kwargs): + self._done_timeout = timeout + self._transport_timeout = timeout + super(QueryJob, self)._blocking_poll(timeout=timeout, **kwargs) + + @staticmethod + def _format_for_exception(query, job_id): + """Format a query for the output in exception message. + + Args: + query (str): The SQL query to format. + job_id (str): The ID of the job that ran the query. + + Returns: + str: A formatted query text. + """ + template = "\n\n(job ID: {job_id})\n\n{header}\n\n{ruler}\n{body}\n{ruler}" + + lines = query.splitlines() + max_line_len = max(len(line) for line in lines) + + header = "-----Query Job SQL Follows-----" + header = "{:^{total_width}}".format(header, total_width=max_line_len + 5) + + # Print out a "ruler" above and below the SQL so we can judge columns. + # Left pad for the line numbers (4 digits plus ":"). + ruler = " |" + " . |" * (max_line_len // 10) + + # Put line numbers next to the SQL. + body = "\n".join( + "{:4}:{}".format(n, line) for n, line in enumerate(lines, start=1) + ) + + return template.format(job_id=job_id, header=header, ruler=ruler, body=body) + + def _begin(self, client=None, retry=DEFAULT_RETRY, timeout=None): + """API call: begin the job via a POST request + + See + https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs/insert + + Args: + client (Optional[google.cloud.bigquery.client.Client]): + The client to use. If not passed, falls back to the ``client`` + associated with the job object or``NoneType``. + retry (Optional[google.api_core.retry.Retry]): + How to retry the RPC. + timeout (Optional[float]): + The number of seconds to wait for the underlying HTTP transport + before using ``retry``. + + Raises: + ValueError: If the job has already begun. + """ + + try: + super(QueryJob, self)._begin(client=client, retry=retry, timeout=timeout) + except exceptions.GoogleAPICallError as exc: + exc.message += self._format_for_exception(self.query, self.job_id) + exc.query_job = self + raise + + def result( + self, + page_size=None, + max_results=None, + retry=DEFAULT_RETRY, + timeout=None, + start_index=None, + ): + """Start the job and wait for it to complete and get the result. + + Args: + page_size (Optional[int]): + The maximum number of rows in each page of results from this + request. Non-positive values are ignored. + max_results (Optional[int]): + The maximum total number of rows from this request. + retry (Optional[google.api_core.retry.Retry]): + How to retry the call that retrieves rows. + timeout (Optional[float]): + The number of seconds to wait for the underlying HTTP transport + before using ``retry``. + If multiple requests are made under the hood, ``timeout`` + applies to each individual request. + start_index (Optional[int]): + The zero-based index of the starting row to read. + + Returns: + google.cloud.bigquery.table.RowIterator: + Iterator of row data + :class:`~google.cloud.bigquery.table.Row`-s. During each + page, the iterator will have the ``total_rows`` attribute + set, which counts the total number of rows **in the result + set** (this is distinct from the total number of rows in the + current page: ``iterator.page.num_items``). + + If the query is a special query that produces no results, e.g. + a DDL query, an ``_EmptyRowIterator`` instance is returned. + + Raises: + google.cloud.exceptions.GoogleAPICallError: + If the job failed. + concurrent.futures.TimeoutError: + If the job did not complete in the given timeout. + """ + try: + super(QueryJob, self).result(retry=retry, timeout=timeout) + except exceptions.GoogleAPICallError as exc: + exc.message += self._format_for_exception(self.query, self.job_id) + exc.query_job = self + raise + except requests.exceptions.Timeout as exc: + six.raise_from(concurrent.futures.TimeoutError, exc) + + # If the query job is complete but there are no query results, this was + # special job, such as a DDL query. Return an empty result set to + # indicate success and avoid calling tabledata.list on a table which + # can't be read (such as a view table). + if self._query_results.total_rows is None: + return _EmptyRowIterator() + + schema = self._query_results.schema + dest_table_ref = self.destination + dest_table = Table(dest_table_ref, schema=schema) + dest_table._properties["numRows"] = self._query_results.total_rows + rows = self._client.list_rows( + dest_table, + page_size=page_size, + max_results=max_results, + start_index=start_index, + retry=retry, + timeout=timeout, + ) + rows._preserve_order = _contains_order_by(self.query) + return rows + + # If changing the signature of this method, make sure to apply the same + # changes to table.RowIterator.to_arrow() + def to_arrow( + self, + progress_bar_type=None, + bqstorage_client=None, + create_bqstorage_client=True, + ): + """[Beta] Create a class:`pyarrow.Table` by loading all pages of a + table or query. + + Args: + progress_bar_type (Optional[str]): + If set, use the `tqdm `_ library to + display a progress bar while the data downloads. Install the + ``tqdm`` package to use this feature. + + Possible values of ``progress_bar_type`` include: + + ``None`` + No progress bar. + ``'tqdm'`` + Use the :func:`tqdm.tqdm` function to print a progress bar + to :data:`sys.stderr`. + ``'tqdm_notebook'`` + Use the :func:`tqdm.tqdm_notebook` function to display a + progress bar as a Jupyter notebook widget. + ``'tqdm_gui'`` + Use the :func:`tqdm.tqdm_gui` function to display a + progress bar as a graphical dialog box. + bqstorage_client (Optional[google.cloud.bigquery_storage_v1.BigQueryReadClient]): + A BigQuery Storage API client. If supplied, use the faster + BigQuery Storage API to fetch rows from BigQuery. This API + is a billable API. + + This method requires the ``pyarrow`` and + ``google-cloud-bigquery-storage`` libraries. + + Reading from a specific partition or snapshot is not + currently supported by this method. + create_bqstorage_client (Optional[bool]): + If ``True`` (default), create a BigQuery Storage API client + using the default API settings. The BigQuery Storage API + is a faster way to fetch rows from BigQuery. See the + ``bqstorage_client`` parameter for more information. + + This argument does nothing if ``bqstorage_client`` is supplied. + + ..versionadded:: 1.24.0 + + Returns: + pyarrow.Table + A :class:`pyarrow.Table` populated with row data and column + headers from the query results. The column headers are derived + from the destination table's schema. + + Raises: + ValueError: + If the :mod:`pyarrow` library cannot be imported. + + ..versionadded:: 1.17.0 + """ + return self.result().to_arrow( + progress_bar_type=progress_bar_type, + bqstorage_client=bqstorage_client, + create_bqstorage_client=create_bqstorage_client, + ) + + # If changing the signature of this method, make sure to apply the same + # changes to table.RowIterator.to_dataframe() + def to_dataframe( + self, + bqstorage_client=None, + dtypes=None, + progress_bar_type=None, + create_bqstorage_client=True, + date_as_object=True, + ): + """Return a pandas DataFrame from a QueryJob + + Args: + bqstorage_client (Optional[google.cloud.bigquery_storage_v1.BigQueryReadClient]): + A BigQuery Storage API client. If supplied, use the faster + BigQuery Storage API to fetch rows from BigQuery. This + API is a billable API. + + This method requires the ``fastavro`` and + ``google-cloud-bigquery-storage`` libraries. + + Reading from a specific partition or snapshot is not + currently supported by this method. + + dtypes (Optional[Map[str, Union[str, pandas.Series.dtype]]]): + A dictionary of column names pandas ``dtype``s. The provided + ``dtype`` is used when constructing the series for the column + specified. Otherwise, the default pandas behavior is used. + + progress_bar_type (Optional[str]): + If set, use the `tqdm `_ library to + display a progress bar while the data downloads. Install the + ``tqdm`` package to use this feature. + + See + :func:`~google.cloud.bigquery.table.RowIterator.to_dataframe` + for details. + + ..versionadded:: 1.11.0 + create_bqstorage_client (Optional[bool]): + If ``True`` (default), create a BigQuery Storage API client + using the default API settings. The BigQuery Storage API + is a faster way to fetch rows from BigQuery. See the + ``bqstorage_client`` parameter for more information. + + This argument does nothing if ``bqstorage_client`` is supplied. + + ..versionadded:: 1.24.0 + + date_as_object (Optional[bool]): + If ``True`` (default), cast dates to objects. If ``False``, convert + to datetime64[ns] dtype. + + ..versionadded:: 1.26.0 + + Returns: + A :class:`~pandas.DataFrame` populated with row data and column + headers from the query results. The column headers are derived + from the destination table's schema. + + Raises: + ValueError: If the `pandas` library cannot be imported. + """ + return self.result().to_dataframe( + bqstorage_client=bqstorage_client, + dtypes=dtypes, + progress_bar_type=progress_bar_type, + create_bqstorage_client=create_bqstorage_client, + date_as_object=date_as_object, + ) + + def __iter__(self): + return iter(self.result()) + + +class QueryPlanEntryStep(object): + """Map a single step in a query plan entry. + + Args: + kind (str): step type. + substeps (List): names of substeps. + """ + + def __init__(self, kind, substeps): + self.kind = kind + self.substeps = list(substeps) + + @classmethod + def from_api_repr(cls, resource): + """Factory: construct instance from the JSON repr. + + Args: + resource (Dict): JSON representation of the entry. + + Returns: + google.cloud.bigquery.job.QueryPlanEntryStep: + New instance built from the resource. + """ + return cls(kind=resource.get("kind"), substeps=resource.get("substeps", ())) + + def __eq__(self, other): + if not isinstance(other, self.__class__): + return NotImplemented + return self.kind == other.kind and self.substeps == other.substeps + + +class QueryPlanEntry(object): + """QueryPlanEntry represents a single stage of a query execution plan. + + See + https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#ExplainQueryStage + for the underlying API representation within query statistics. + """ + + def __init__(self): + self._properties = {} + + @classmethod + def from_api_repr(cls, resource): + """Factory: construct instance from the JSON repr. + + Args: + resource(Dict[str: object]): + ExplainQueryStage representation returned from API. + + Returns: + google.cloud.bigquery.job.QueryPlanEntry: + Query plan entry parsed from ``resource``. + """ + entry = cls() + entry._properties = resource + return entry + + @property + def name(self): + """Optional[str]: Human-readable name of the stage.""" + return self._properties.get("name") + + @property + def entry_id(self): + """Optional[str]: Unique ID for the stage within the plan.""" + return self._properties.get("id") + + @property + def start(self): + """Optional[Datetime]: Datetime when the stage started.""" + if self._properties.get("startMs") is None: + return None + return _helpers._datetime_from_microseconds( + int(self._properties.get("startMs")) * 1000.0 + ) + + @property + def end(self): + """Optional[Datetime]: Datetime when the stage ended.""" + if self._properties.get("endMs") is None: + return None + return _helpers._datetime_from_microseconds( + int(self._properties.get("endMs")) * 1000.0 + ) + + @property + def input_stages(self): + """List(int): Entry IDs for stages that were inputs for this stage.""" + if self._properties.get("inputStages") is None: + return [] + return [ + _helpers._int_or_none(entry) + for entry in self._properties.get("inputStages") + ] + + @property + def parallel_inputs(self): + """Optional[int]: Number of parallel input segments within + the stage. + """ + return _helpers._int_or_none(self._properties.get("parallelInputs")) + + @property + def completed_parallel_inputs(self): + """Optional[int]: Number of parallel input segments completed.""" + return _helpers._int_or_none(self._properties.get("completedParallelInputs")) + + @property + def wait_ms_avg(self): + """Optional[int]: Milliseconds the average worker spent waiting to + be scheduled. + """ + return _helpers._int_or_none(self._properties.get("waitMsAvg")) + + @property + def wait_ms_max(self): + """Optional[int]: Milliseconds the slowest worker spent waiting to + be scheduled. + """ + return _helpers._int_or_none(self._properties.get("waitMsMax")) + + @property + def wait_ratio_avg(self): + """Optional[float]: Ratio of time the average worker spent waiting + to be scheduled, relative to the longest time spent by any worker in + any stage of the overall plan. + """ + return self._properties.get("waitRatioAvg") + + @property + def wait_ratio_max(self): + """Optional[float]: Ratio of time the slowest worker spent waiting + to be scheduled, relative to the longest time spent by any worker in + any stage of the overall plan. + """ + return self._properties.get("waitRatioMax") + + @property + def read_ms_avg(self): + """Optional[int]: Milliseconds the average worker spent reading + input. + """ + return _helpers._int_or_none(self._properties.get("readMsAvg")) + + @property + def read_ms_max(self): + """Optional[int]: Milliseconds the slowest worker spent reading + input. + """ + return _helpers._int_or_none(self._properties.get("readMsMax")) + + @property + def read_ratio_avg(self): + """Optional[float]: Ratio of time the average worker spent reading + input, relative to the longest time spent by any worker in any stage + of the overall plan. + """ + return self._properties.get("readRatioAvg") + + @property + def read_ratio_max(self): + """Optional[float]: Ratio of time the slowest worker spent reading + to be scheduled, relative to the longest time spent by any worker in + any stage of the overall plan. + """ + return self._properties.get("readRatioMax") + + @property + def compute_ms_avg(self): + """Optional[int]: Milliseconds the average worker spent on CPU-bound + processing. + """ + return _helpers._int_or_none(self._properties.get("computeMsAvg")) + + @property + def compute_ms_max(self): + """Optional[int]: Milliseconds the slowest worker spent on CPU-bound + processing. + """ + return _helpers._int_or_none(self._properties.get("computeMsMax")) + + @property + def compute_ratio_avg(self): + """Optional[float]: Ratio of time the average worker spent on + CPU-bound processing, relative to the longest time spent by any + worker in any stage of the overall plan. + """ + return self._properties.get("computeRatioAvg") + + @property + def compute_ratio_max(self): + """Optional[float]: Ratio of time the slowest worker spent on + CPU-bound processing, relative to the longest time spent by any + worker in any stage of the overall plan. + """ + return self._properties.get("computeRatioMax") + + @property + def write_ms_avg(self): + """Optional[int]: Milliseconds the average worker spent writing + output data. + """ + return _helpers._int_or_none(self._properties.get("writeMsAvg")) + + @property + def write_ms_max(self): + """Optional[int]: Milliseconds the slowest worker spent writing + output data. + """ + return _helpers._int_or_none(self._properties.get("writeMsMax")) + + @property + def write_ratio_avg(self): + """Optional[float]: Ratio of time the average worker spent writing + output data, relative to the longest time spent by any worker in any + stage of the overall plan. + """ + return self._properties.get("writeRatioAvg") + + @property + def write_ratio_max(self): + """Optional[float]: Ratio of time the slowest worker spent writing + output data, relative to the longest time spent by any worker in any + stage of the overall plan. + """ + return self._properties.get("writeRatioMax") + + @property + def records_read(self): + """Optional[int]: Number of records read by this stage.""" + return _helpers._int_or_none(self._properties.get("recordsRead")) + + @property + def records_written(self): + """Optional[int]: Number of records written by this stage.""" + return _helpers._int_or_none(self._properties.get("recordsWritten")) + + @property + def status(self): + """Optional[str]: status of this stage.""" + return self._properties.get("status") + + @property + def shuffle_output_bytes(self): + """Optional[int]: Number of bytes written by this stage to + intermediate shuffle. + """ + return _helpers._int_or_none(self._properties.get("shuffleOutputBytes")) + + @property + def shuffle_output_bytes_spilled(self): + """Optional[int]: Number of bytes written by this stage to + intermediate shuffle and spilled to disk. + """ + return _helpers._int_or_none(self._properties.get("shuffleOutputBytesSpilled")) + + @property + def steps(self): + """List(QueryPlanEntryStep): List of step operations performed by + each worker in the stage. + """ + return [ + QueryPlanEntryStep.from_api_repr(step) + for step in self._properties.get("steps", []) + ] + + +class TimelineEntry(object): + """TimelineEntry represents progress of a query job at a particular + point in time. + + See + https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#querytimelinesample + for the underlying API representation within query statistics. + """ + + def __init__(self): + self._properties = {} + + @classmethod + def from_api_repr(cls, resource): + """Factory: construct instance from the JSON repr. + + Args: + resource(Dict[str: object]): + QueryTimelineSample representation returned from API. + + Returns: + google.cloud.bigquery.TimelineEntry: + Timeline sample parsed from ``resource``. + """ + entry = cls() + entry._properties = resource + return entry + + @property + def elapsed_ms(self): + """Optional[int]: Milliseconds elapsed since start of query + execution.""" + return _helpers._int_or_none(self._properties.get("elapsedMs")) + + @property + def active_units(self): + """Optional[int]: Current number of input units being processed + by workers, reported as largest value since the last sample.""" + return _helpers._int_or_none(self._properties.get("activeUnits")) + + @property + def pending_units(self): + """Optional[int]: Current number of input units remaining for + query stages active at this sample time.""" + return _helpers._int_or_none(self._properties.get("pendingUnits")) + + @property + def completed_units(self): + """Optional[int]: Current number of input units completed by + this query.""" + return _helpers._int_or_none(self._properties.get("completedUnits")) + + @property + def slot_millis(self): + """Optional[int]: Cumulative slot-milliseconds consumed by + this query.""" + return _helpers._int_or_none(self._properties.get("totalSlotMs")) diff --git a/packages/google-cloud-bigquery/tests/unit/job/__init__.py b/packages/google-cloud-bigquery/tests/unit/job/__init__.py new file mode 100644 index 000000000000..c6334245aea5 --- /dev/null +++ b/packages/google-cloud-bigquery/tests/unit/job/__init__.py @@ -0,0 +1,13 @@ +# Copyright 2020 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/packages/google-cloud-bigquery/tests/unit/job/helpers.py b/packages/google-cloud-bigquery/tests/unit/job/helpers.py new file mode 100644 index 000000000000..f928054f6127 --- /dev/null +++ b/packages/google-cloud-bigquery/tests/unit/job/helpers.py @@ -0,0 +1,198 @@ +# Copyright 2015 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import unittest + +import mock +from google.api_core import exceptions + + +def _make_credentials(): + import google.auth.credentials + + return mock.Mock(spec=google.auth.credentials.Credentials) + + +def _make_client(project="test-project", connection=None): + from google.cloud.bigquery.client import Client + + if connection is None: + connection = _make_connection() + + client = Client(project=project, credentials=_make_credentials(), _http=object()) + client._connection = connection + return client + + +def _make_connection(*responses): + import google.cloud.bigquery._http + from google.cloud.exceptions import NotFound + + mock_conn = mock.create_autospec(google.cloud.bigquery._http.Connection) + mock_conn.api_request.side_effect = list(responses) + [NotFound("miss")] + return mock_conn + + +def _make_retriable_exception(): + return exceptions.TooManyRequests( + "retriable exception", errors=[{"reason": "rateLimitExceeded"}] + ) + + +def _make_job_resource( + creation_time_ms=1437767599006, + started_time_ms=1437767600007, + ended_time_ms=1437767601008, + started=False, + ended=False, + etag="abc-def-hjk", + endpoint="https://bigquery.googleapis.com", + job_type="load", + job_id="a-random-id", + project_id="some-project", + user_email="bq-user@example.com", +): + resource = { + "status": {"state": "PENDING"}, + "configuration": {job_type: {}}, + "statistics": {"creationTime": creation_time_ms, job_type: {}}, + "etag": etag, + "id": "{}:{}".format(project_id, job_id), + "jobReference": {"projectId": project_id, "jobId": job_id}, + "selfLink": "{}/bigquery/v2/projects/{}/jobs/{}".format( + endpoint, project_id, job_id + ), + "user_email": user_email, + } + + if started or ended: + resource["statistics"]["startTime"] = started_time_ms + resource["status"]["state"] = "RUNNING" + + if ended: + resource["statistics"]["endTime"] = ended_time_ms + resource["status"]["state"] = "DONE" + + if job_type == "query": + resource["configuration"]["query"]["destinationTable"] = { + "projectId": project_id, + "datasetId": "_temp_dataset", + "tableId": "_temp_table", + } + + return resource + + +class _Base(unittest.TestCase): + from google.cloud.bigquery.dataset import DatasetReference + from google.cloud.bigquery.table import TableReference + + ENDPOINT = "https://bigquery.googleapis.com" + PROJECT = "project" + SOURCE1 = "http://example.com/source1.csv" + DS_ID = "dataset_id" + DS_REF = DatasetReference(PROJECT, DS_ID) + TABLE_ID = "table_id" + TABLE_REF = TableReference(DS_REF, TABLE_ID) + JOB_ID = "JOB_ID" + JOB_TYPE = "unknown" + KMS_KEY_NAME = "projects/1/locations/us/keyRings/1/cryptoKeys/1" + + def _make_one(self, *args, **kw): + return self._get_target_class()(*args, **kw) + + def _setUpConstants(self): + import datetime + from google.cloud._helpers import UTC + + self.WHEN_TS = 1437767599.006 + self.WHEN = datetime.datetime.utcfromtimestamp(self.WHEN_TS).replace(tzinfo=UTC) + self.ETAG = "ETAG" + self.FULL_JOB_ID = "%s:%s" % (self.PROJECT, self.JOB_ID) + self.RESOURCE_URL = "{}/bigquery/v2/projects/{}/jobs/{}".format( + self.ENDPOINT, self.PROJECT, self.JOB_ID + ) + self.USER_EMAIL = "phred@example.com" + + def _table_ref(self, table_id): + from google.cloud.bigquery.table import TableReference + + return TableReference(self.DS_REF, table_id) + + def _make_resource(self, started=False, ended=False): + self._setUpConstants() + return _make_job_resource( + creation_time_ms=int(self.WHEN_TS * 1000), + started_time_ms=int(self.WHEN_TS * 1000), + ended_time_ms=int(self.WHEN_TS * 1000) + 1000000, + started=started, + ended=ended, + etag=self.ETAG, + endpoint=self.ENDPOINT, + job_type=self.JOB_TYPE, + job_id=self.JOB_ID, + project_id=self.PROJECT, + user_email=self.USER_EMAIL, + ) + + def _verifyInitialReadonlyProperties(self, job): + # root elements of resource + self.assertIsNone(job.etag) + self.assertIsNone(job.self_link) + self.assertIsNone(job.user_email) + + # derived from resource['statistics'] + self.assertIsNone(job.created) + self.assertIsNone(job.started) + self.assertIsNone(job.ended) + + # derived from resource['status'] + self.assertIsNone(job.error_result) + self.assertIsNone(job.errors) + self.assertIsNone(job.state) + + def _verifyReadonlyResourceProperties(self, job, resource): + from datetime import timedelta + + statistics = resource.get("statistics", {}) + + if "creationTime" in statistics: + self.assertEqual(job.created, self.WHEN) + else: + self.assertIsNone(job.created) + + if "startTime" in statistics: + self.assertEqual(job.started, self.WHEN) + else: + self.assertIsNone(job.started) + + if "endTime" in statistics: + self.assertEqual(job.ended, self.WHEN + timedelta(seconds=1000)) + else: + self.assertIsNone(job.ended) + + if "etag" in resource: + self.assertEqual(job.etag, self.ETAG) + else: + self.assertIsNone(job.etag) + + if "selfLink" in resource: + self.assertEqual(job.self_link, self.RESOURCE_URL) + else: + self.assertIsNone(job.self_link) + + if "user_email" in resource: + self.assertEqual(job.user_email, self.USER_EMAIL) + else: + self.assertIsNone(job.user_email) diff --git a/packages/google-cloud-bigquery/tests/unit/job/test_base.py b/packages/google-cloud-bigquery/tests/unit/job/test_base.py new file mode 100644 index 000000000000..90d4388b8fd9 --- /dev/null +++ b/packages/google-cloud-bigquery/tests/unit/job/test_base.py @@ -0,0 +1,1105 @@ +# Copyright 2015 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import copy +import unittest + +from google.api_core import exceptions +import google.api_core.retry +import mock +from six.moves import http_client + +from .helpers import _make_client +from .helpers import _make_connection +from .helpers import _make_retriable_exception +from .helpers import _make_job_resource + + +class Test__error_result_to_exception(unittest.TestCase): + def _call_fut(self, *args, **kwargs): + from google.cloud.bigquery import job + + return job._error_result_to_exception(*args, **kwargs) + + def test_simple(self): + error_result = {"reason": "invalid", "message": "bad request"} + exception = self._call_fut(error_result) + self.assertEqual(exception.code, http_client.BAD_REQUEST) + self.assertTrue(exception.message.startswith("bad request")) + self.assertIn(error_result, exception.errors) + + def test_missing_reason(self): + error_result = {} + exception = self._call_fut(error_result) + self.assertEqual(exception.code, http_client.INTERNAL_SERVER_ERROR) + + +class Test_JobReference(unittest.TestCase): + JOB_ID = "job-id" + PROJECT = "test-project-123" + LOCATION = "us-central" + + @staticmethod + def _get_target_class(): + from google.cloud.bigquery import job + + return job._JobReference + + def _make_one(self, job_id, project, location): + return self._get_target_class()(job_id, project, location) + + def test_ctor(self): + job_ref = self._make_one(self.JOB_ID, self.PROJECT, self.LOCATION) + + self.assertEqual(job_ref.job_id, self.JOB_ID) + self.assertEqual(job_ref.project, self.PROJECT) + self.assertEqual(job_ref.location, self.LOCATION) + + def test__to_api_repr(self): + job_ref = self._make_one(self.JOB_ID, self.PROJECT, self.LOCATION) + + self.assertEqual( + job_ref._to_api_repr(), + { + "jobId": self.JOB_ID, + "projectId": self.PROJECT, + "location": self.LOCATION, + }, + ) + + def test_from_api_repr(self): + api_repr = { + "jobId": self.JOB_ID, + "projectId": self.PROJECT, + "location": self.LOCATION, + } + + job_ref = self._get_target_class()._from_api_repr(api_repr) + + self.assertEqual(job_ref.job_id, self.JOB_ID) + self.assertEqual(job_ref.project, self.PROJECT) + self.assertEqual(job_ref.location, self.LOCATION) + + +class Test_AsyncJob(unittest.TestCase): + JOB_ID = "job-id" + PROJECT = "test-project-123" + LOCATION = "us-central" + + @staticmethod + def _get_target_class(): + from google.cloud.bigquery import job + + return job._AsyncJob + + def _make_one(self, job_id, client): + return self._get_target_class()(job_id, client) + + def _make_derived_class(self): + class Derived(self._get_target_class()): + _JOB_TYPE = "derived" + + return Derived + + def _make_derived(self, job_id, client): + return self._make_derived_class()(job_id, client) + + @staticmethod + def _job_reference(job_id, project, location): + from google.cloud.bigquery import job + + return job._JobReference(job_id, project, location) + + def test_ctor_w_bare_job_id(self): + import threading + + client = _make_client(project=self.PROJECT) + job = self._make_one(self.JOB_ID, client) + + self.assertEqual(job.job_id, self.JOB_ID) + self.assertEqual(job.project, self.PROJECT) + self.assertIsNone(job.location) + self.assertIs(job._client, client) + self.assertEqual( + job._properties, + {"jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID}}, + ) + self.assertIsInstance(job._completion_lock, type(threading.Lock())) + self.assertEqual( + job.path, "/projects/{}/jobs/{}".format(self.PROJECT, self.JOB_ID) + ) + + def test_ctor_w_job_ref(self): + import threading + + other_project = "other-project-234" + client = _make_client(project=other_project) + job_ref = self._job_reference(self.JOB_ID, self.PROJECT, self.LOCATION) + job = self._make_one(job_ref, client) + + self.assertEqual(job.job_id, self.JOB_ID) + self.assertEqual(job.project, self.PROJECT) + self.assertEqual(job.location, self.LOCATION) + self.assertIs(job._client, client) + self.assertEqual( + job._properties, + { + "jobReference": { + "projectId": self.PROJECT, + "location": self.LOCATION, + "jobId": self.JOB_ID, + } + }, + ) + self.assertFalse(job._result_set) + self.assertIsInstance(job._completion_lock, type(threading.Lock())) + self.assertEqual( + job.path, "/projects/{}/jobs/{}".format(self.PROJECT, self.JOB_ID) + ) + + def test__require_client_w_none(self): + client = _make_client(project=self.PROJECT) + job = self._make_one(self.JOB_ID, client) + + self.assertIs(job._require_client(None), client) + + def test__require_client_w_other(self): + client = _make_client(project=self.PROJECT) + other = object() + job = self._make_one(self.JOB_ID, client) + + self.assertIs(job._require_client(other), other) + + def test_job_type(self): + client = _make_client(project=self.PROJECT) + derived = self._make_derived(self.JOB_ID, client) + + self.assertEqual(derived.job_type, "derived") + + def test_parent_job_id(self): + client = _make_client(project=self.PROJECT) + job = self._make_one(self.JOB_ID, client) + + self.assertIsNone(job.parent_job_id) + job._properties["statistics"] = {"parentJobId": "parent-job-123"} + self.assertEqual(job.parent_job_id, "parent-job-123") + + def test_script_statistics(self): + client = _make_client(project=self.PROJECT) + job = self._make_one(self.JOB_ID, client) + + self.assertIsNone(job.script_statistics) + job._properties["statistics"] = { + "scriptStatistics": { + "evaluationKind": "EXPRESSION", + "stackFrames": [ + { + "startLine": 5, + "startColumn": 29, + "endLine": 9, + "endColumn": 14, + "text": "QUERY TEXT", + } + ], + } + } + script_stats = job.script_statistics + self.assertEqual(script_stats.evaluation_kind, "EXPRESSION") + stack_frames = script_stats.stack_frames + self.assertEqual(len(stack_frames), 1) + stack_frame = stack_frames[0] + self.assertIsNone(stack_frame.procedure_id) + self.assertEqual(stack_frame.start_line, 5) + self.assertEqual(stack_frame.start_column, 29) + self.assertEqual(stack_frame.end_line, 9) + self.assertEqual(stack_frame.end_column, 14) + self.assertEqual(stack_frame.text, "QUERY TEXT") + + def test_num_child_jobs(self): + client = _make_client(project=self.PROJECT) + job = self._make_one(self.JOB_ID, client) + + self.assertEqual(job.num_child_jobs, 0) + job._properties["statistics"] = {"numChildJobs": "17"} + self.assertEqual(job.num_child_jobs, 17) + + def test_labels_miss(self): + client = _make_client(project=self.PROJECT) + job = self._make_one(self.JOB_ID, client) + self.assertEqual(job.labels, {}) + + def test_labels_update_in_place(self): + client = _make_client(project=self.PROJECT) + job = self._make_one(self.JOB_ID, client) + labels = job.labels + labels["foo"] = "bar" # update in place + self.assertEqual(job.labels, {"foo": "bar"}) + + def test_labels_hit(self): + labels = {"foo": "bar"} + client = _make_client(project=self.PROJECT) + job = self._make_one(self.JOB_ID, client) + job._properties["labels"] = labels + self.assertEqual(job.labels, labels) + + def test_etag(self): + etag = "ETAG-123" + client = _make_client(project=self.PROJECT) + job = self._make_one(self.JOB_ID, client) + self.assertIsNone(job.etag) + job._properties["etag"] = etag + self.assertEqual(job.etag, etag) + + def test_self_link(self): + self_link = "https://api.example.com/123" + client = _make_client(project=self.PROJECT) + job = self._make_one(self.JOB_ID, client) + self.assertIsNone(job.self_link) + job._properties["selfLink"] = self_link + self.assertEqual(job.self_link, self_link) + + def test_user_email(self): + user_email = "user@example.com" + client = _make_client(project=self.PROJECT) + job = self._make_one(self.JOB_ID, client) + self.assertIsNone(job.user_email) + job._properties["user_email"] = user_email + self.assertEqual(job.user_email, user_email) + + @staticmethod + def _datetime_and_millis(): + import datetime + import pytz + from google.cloud._helpers import _millis + + now = datetime.datetime.utcnow().replace( + microsecond=123000, tzinfo=pytz.UTC # stats timestamps have ms precision + ) + return now, _millis(now) + + def test_created(self): + now, millis = self._datetime_and_millis() + client = _make_client(project=self.PROJECT) + job = self._make_one(self.JOB_ID, client) + self.assertIsNone(job.created) + stats = job._properties["statistics"] = {} + self.assertIsNone(job.created) + stats["creationTime"] = millis + self.assertEqual(job.created, now) + + def test_started(self): + now, millis = self._datetime_and_millis() + client = _make_client(project=self.PROJECT) + job = self._make_one(self.JOB_ID, client) + self.assertIsNone(job.started) + stats = job._properties["statistics"] = {} + self.assertIsNone(job.started) + stats["startTime"] = millis + self.assertEqual(job.started, now) + + def test_ended(self): + now, millis = self._datetime_and_millis() + client = _make_client(project=self.PROJECT) + job = self._make_one(self.JOB_ID, client) + self.assertIsNone(job.ended) + stats = job._properties["statistics"] = {} + self.assertIsNone(job.ended) + stats["endTime"] = millis + self.assertEqual(job.ended, now) + + def test__job_statistics(self): + statistics = {"foo": "bar"} + client = _make_client(project=self.PROJECT) + derived = self._make_derived(self.JOB_ID, client) + self.assertEqual(derived._job_statistics(), {}) + stats = derived._properties["statistics"] = {} + self.assertEqual(derived._job_statistics(), {}) + stats["derived"] = statistics + self.assertEqual(derived._job_statistics(), statistics) + + def test_error_result(self): + error_result = { + "debugInfo": "DEBUG INFO", + "location": "LOCATION", + "message": "MESSAGE", + "reason": "REASON", + } + client = _make_client(project=self.PROJECT) + job = self._make_one(self.JOB_ID, client) + self.assertIsNone(job.error_result) + status = job._properties["status"] = {} + self.assertIsNone(job.error_result) + status["errorResult"] = error_result + self.assertEqual(job.error_result, error_result) + + def test_errors(self): + errors = [ + { + "debugInfo": "DEBUG INFO", + "location": "LOCATION", + "message": "MESSAGE", + "reason": "REASON", + } + ] + client = _make_client(project=self.PROJECT) + job = self._make_one(self.JOB_ID, client) + self.assertIsNone(job.errors) + status = job._properties["status"] = {} + self.assertIsNone(job.errors) + status["errors"] = errors + self.assertEqual(job.errors, errors) + + def test_state(self): + state = "STATE" + client = _make_client(project=self.PROJECT) + job = self._make_one(self.JOB_ID, client) + self.assertIsNone(job.state) + status = job._properties["status"] = {} + self.assertIsNone(job.state) + status["state"] = state + self.assertEqual(job.state, state) + + def _set_properties_job(self): + client = _make_client(project=self.PROJECT) + job = self._make_one(self.JOB_ID, client) + job._set_future_result = mock.Mock() + job._properties = { + "jobReference": job._properties["jobReference"], + "foo": "bar", + } + return job + + def test__set_properties_no_stats(self): + config = {"test": True} + resource = {"configuration": config} + job = self._set_properties_job() + + job._set_properties(resource) + + self.assertEqual(job._properties, resource) + + def test__set_properties_w_creation_time(self): + now, millis = self._datetime_and_millis() + config = {"test": True} + stats = {"creationTime": str(millis)} + resource = {"configuration": config, "statistics": stats} + job = self._set_properties_job() + + job._set_properties(resource) + + cleaned = copy.deepcopy(resource) + cleaned["statistics"]["creationTime"] = float(millis) + self.assertEqual(job._properties, cleaned) + + def test__set_properties_w_start_time(self): + now, millis = self._datetime_and_millis() + config = {"test": True} + stats = {"startTime": str(millis)} + resource = {"configuration": config, "statistics": stats} + job = self._set_properties_job() + + job._set_properties(resource) + + cleaned = copy.deepcopy(resource) + cleaned["statistics"]["startTime"] = float(millis) + self.assertEqual(job._properties, cleaned) + + def test__set_properties_w_end_time(self): + now, millis = self._datetime_and_millis() + config = {"test": True} + stats = {"endTime": str(millis)} + resource = {"configuration": config, "statistics": stats} + job = self._set_properties_job() + + job._set_properties(resource) + + cleaned = copy.deepcopy(resource) + cleaned["statistics"]["endTime"] = float(millis) + self.assertEqual(job._properties, cleaned) + + def test__check_resource_config_missing_job_ref(self): + resource = {} + klass = self._make_derived_class() + + with self.assertRaises(KeyError): + klass._check_resource_config(resource) + + def test__check_resource_config_missing_job_id(self): + resource = {"jobReference": {}} + klass = self._make_derived_class() + + with self.assertRaises(KeyError): + klass._check_resource_config(resource) + + def test__check_resource_config_missing_configuration(self): + resource = {"jobReference": {"jobId": self.JOB_ID}} + klass = self._make_derived_class() + + with self.assertRaises(KeyError): + klass._check_resource_config(resource) + + def test__check_resource_config_missing_config_type(self): + resource = {"jobReference": {"jobId": self.JOB_ID}, "configuration": {}} + klass = self._make_derived_class() + + with self.assertRaises(KeyError): + klass._check_resource_config(resource) + + def test__check_resource_config_ok(self): + derived_config = {"foo": "bar"} + resource = { + "jobReference": {"jobId": self.JOB_ID}, + "configuration": {"derived": derived_config}, + } + klass = self._make_derived_class() + + # Should not throw. + klass._check_resource_config(resource) + + def test__build_resource(self): + client = _make_client(project=self.PROJECT) + job = self._make_one(self.JOB_ID, client) + resource = job._build_resource() + assert resource["jobReference"]["jobId"] == self.JOB_ID + + def test_to_api_repr(self): + client = _make_client(project=self.PROJECT) + job = self._make_one(self.JOB_ID, client) + resource = job.to_api_repr() + assert resource["jobReference"]["jobId"] == self.JOB_ID + + def test__begin_already(self): + job = self._set_properties_job() + job._properties["status"] = {"state": "WHATEVER"} + + with self.assertRaises(ValueError): + job._begin() + + def test__begin_defaults(self): + from google.cloud.bigquery.retry import DEFAULT_RETRY + + resource = { + "jobReference": { + "jobId": self.JOB_ID, + "projectId": self.PROJECT, + "location": None, + }, + "configuration": {"test": True}, + } + job = self._set_properties_job() + builder = job.to_api_repr = mock.Mock() + builder.return_value = resource + call_api = job._client._call_api = mock.Mock() + call_api.return_value = resource + path = "/projects/{}/jobs".format(self.PROJECT) + job._begin() + + call_api.assert_called_once_with( + DEFAULT_RETRY, + span_name="BigQuery.job.begin", + span_attributes={"path": path}, + job_ref=job, + method="POST", + path=path, + data=resource, + timeout=None, + ) + self.assertEqual(job._properties, resource) + + def test__begin_explicit(self): + from google.cloud.bigquery.retry import DEFAULT_RETRY + + other_project = "other-project-234" + resource = { + "jobReference": { + "jobId": self.JOB_ID, + "projectId": self.PROJECT, + "location": None, + }, + "configuration": {"test": True}, + } + job = self._set_properties_job() + builder = job.to_api_repr = mock.Mock() + builder.return_value = resource + client = _make_client(project=other_project) + call_api = client._call_api = mock.Mock() + call_api.return_value = resource + retry = DEFAULT_RETRY.with_deadline(1) + path = "/projects/{}/jobs".format(self.PROJECT) + job._begin(client=client, retry=retry, timeout=7.5) + + call_api.assert_called_once_with( + retry, + span_name="BigQuery.job.begin", + span_attributes={"path": path}, + job_ref=job, + method="POST", + path=path, + data=resource, + timeout=7.5, + ) + self.assertEqual(job._properties, resource) + + def test_exists_defaults_miss(self): + from google.cloud.exceptions import NotFound + from google.cloud.bigquery.retry import DEFAULT_RETRY + + job = self._set_properties_job() + job._properties["jobReference"]["location"] = self.LOCATION + call_api = job._client._call_api = mock.Mock() + call_api.side_effect = NotFound("testing") + self.assertFalse(job.exists()) + + call_api.assert_called_once_with( + DEFAULT_RETRY, + span_name="BigQuery.job.exists", + span_attributes={ + "path": "/projects/{}/jobs/{}".format(self.PROJECT, self.JOB_ID) + }, + job_ref=job, + method="GET", + path="/projects/{}/jobs/{}".format(self.PROJECT, self.JOB_ID), + query_params={"fields": "id", "location": self.LOCATION}, + timeout=None, + ) + + def test_exists_explicit_hit(self): + from google.cloud.bigquery.retry import DEFAULT_RETRY + + other_project = "other-project-234" + resource = { + "jobReference": { + "jobId": self.JOB_ID, + "projectId": self.PROJECT, + "location": None, + }, + "configuration": {"test": True}, + } + job = self._set_properties_job() + client = _make_client(project=other_project) + call_api = client._call_api = mock.Mock() + call_api.return_value = resource + retry = DEFAULT_RETRY.with_deadline(1) + self.assertTrue(job.exists(client=client, retry=retry)) + + call_api.assert_called_once_with( + retry, + span_name="BigQuery.job.exists", + span_attributes={ + "path": "/projects/{}/jobs/{}".format(self.PROJECT, self.JOB_ID) + }, + job_ref=job, + method="GET", + path="/projects/{}/jobs/{}".format(self.PROJECT, self.JOB_ID), + query_params={"fields": "id"}, + timeout=None, + ) + + def test_exists_w_timeout(self): + from google.cloud.bigquery.retry import DEFAULT_RETRY + + PATH = "/projects/{}/jobs/{}".format(self.PROJECT, self.JOB_ID) + job = self._set_properties_job() + call_api = job._client._call_api = mock.Mock() + job.exists(timeout=7.5) + + call_api.assert_called_once_with( + DEFAULT_RETRY, + span_name="BigQuery.job.exists", + span_attributes={"path": PATH}, + job_ref=job, + method="GET", + path=PATH, + query_params={"fields": "id"}, + timeout=7.5, + ) + + def test_reload_defaults(self): + from google.cloud.bigquery.retry import DEFAULT_RETRY + + resource = { + "jobReference": { + "jobId": self.JOB_ID, + "projectId": self.PROJECT, + "location": None, + }, + "configuration": {"test": True}, + } + job = self._set_properties_job() + job._properties["jobReference"]["location"] = self.LOCATION + call_api = job._client._call_api = mock.Mock() + call_api.return_value = resource + job.reload() + + call_api.assert_called_once_with( + DEFAULT_RETRY, + span_name="BigQuery.job.reload", + span_attributes={ + "path": "/projects/{}/jobs/{}".format(self.PROJECT, self.JOB_ID) + }, + job_ref=job, + method="GET", + path="/projects/{}/jobs/{}".format(self.PROJECT, self.JOB_ID), + query_params={"location": self.LOCATION}, + timeout=None, + ) + self.assertEqual(job._properties, resource) + + def test_reload_explicit(self): + from google.cloud.bigquery.retry import DEFAULT_RETRY + + other_project = "other-project-234" + resource = { + "jobReference": { + "jobId": self.JOB_ID, + "projectId": self.PROJECT, + "location": None, + }, + "configuration": {"test": True}, + } + job = self._set_properties_job() + client = _make_client(project=other_project) + call_api = client._call_api = mock.Mock() + call_api.return_value = resource + retry = DEFAULT_RETRY.with_deadline(1) + job.reload(client=client, retry=retry, timeout=4.2) + + call_api.assert_called_once_with( + retry, + span_name="BigQuery.job.reload", + span_attributes={ + "path": "/projects/{}/jobs/{}".format(self.PROJECT, self.JOB_ID) + }, + job_ref=job, + method="GET", + path="/projects/{}/jobs/{}".format(self.PROJECT, self.JOB_ID), + query_params={}, + timeout=4.2, + ) + self.assertEqual(job._properties, resource) + + def test_cancel_defaults(self): + resource = { + "jobReference": { + "jobId": self.JOB_ID, + "projectId": self.PROJECT, + "location": None, + }, + "configuration": {"test": True}, + } + response = {"job": resource} + job = self._set_properties_job() + job._properties["jobReference"]["location"] = self.LOCATION + connection = job._client._connection = _make_connection(response) + with mock.patch( + "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" + ) as final_attributes: + self.assertTrue(job.cancel()) + + final_attributes.assert_called() + + connection.api_request.assert_called_once_with( + method="POST", + path="/projects/{}/jobs/{}/cancel".format(self.PROJECT, self.JOB_ID), + query_params={"location": self.LOCATION}, + timeout=None, + ) + self.assertEqual(job._properties, resource) + + def test_cancel_explicit(self): + other_project = "other-project-234" + resource = { + "jobReference": { + "jobId": self.JOB_ID, + "projectId": self.PROJECT, + "location": None, + }, + "configuration": {"test": True}, + } + response = {"job": resource} + job = self._set_properties_job() + client = _make_client(project=other_project) + connection = client._connection = _make_connection(response) + with mock.patch( + "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" + ) as final_attributes: + self.assertTrue(job.cancel(client=client, timeout=7.5)) + + final_attributes.assert_called_with( + {"path": "/projects/{}/jobs/{}/cancel".format(self.PROJECT, self.JOB_ID)}, + client, + job, + ) + + connection.api_request.assert_called_once_with( + method="POST", + path="/projects/{}/jobs/{}/cancel".format(self.PROJECT, self.JOB_ID), + query_params={}, + timeout=7.5, + ) + self.assertEqual(job._properties, resource) + + def test_cancel_w_custom_retry(self): + from google.cloud.bigquery.retry import DEFAULT_RETRY + + api_path = "/projects/{}/jobs/{}/cancel".format(self.PROJECT, self.JOB_ID) + resource = { + "jobReference": { + "jobId": self.JOB_ID, + "projectId": self.PROJECT, + "location": None, + }, + "configuration": {"test": True}, + } + response = {"job": resource} + job = self._set_properties_job() + + api_request_patcher = mock.patch.object( + job._client._connection, "api_request", side_effect=[ValueError, response] + ) + retry = DEFAULT_RETRY.with_deadline(1).with_predicate( + lambda exc: isinstance(exc, ValueError) + ) + + with api_request_patcher as fake_api_request: + with mock.patch( + "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" + ) as final_attributes: + result = job.cancel(retry=retry, timeout=7.5) + + final_attributes.assert_called() + + self.assertTrue(result) + self.assertEqual(job._properties, resource) + self.assertEqual( + fake_api_request.call_args_list, + [ + mock.call(method="POST", path=api_path, query_params={}, timeout=7.5), + mock.call( + method="POST", path=api_path, query_params={}, timeout=7.5 + ), # was retried once + ], + ) + + def test__set_future_result_wo_done(self): + client = _make_client(project=self.PROJECT) + job = self._make_one(self.JOB_ID, client) + set_exception = job.set_exception = mock.Mock() + set_result = job.set_result = mock.Mock() + + job._set_future_result() + + set_exception.assert_not_called() + set_result.assert_not_called() + + def test__set_future_result_w_result_set(self): + client = _make_client(project=self.PROJECT) + job = self._make_one(self.JOB_ID, client) + job._properties["status"] = {"state": "DONE"} + job._result_set = True + set_exception = job.set_exception = mock.Mock() + set_result = job.set_result = mock.Mock() + + job._set_future_result() + + set_exception.assert_not_called() + set_result.assert_not_called() + + def test__set_future_result_w_done_wo_result_set_w_error(self): + from google.cloud.exceptions import NotFound + + client = _make_client(project=self.PROJECT) + job = self._make_one(self.JOB_ID, client) + job._properties["status"] = { + "state": "DONE", + "errorResult": {"reason": "notFound", "message": "testing"}, + } + set_exception = job.set_exception = mock.Mock() + set_result = job.set_result = mock.Mock() + + job._set_future_result() + + set_exception.assert_called_once() + args, kw = set_exception.call_args + (exception,) = args + self.assertIsInstance(exception, NotFound) + self.assertEqual(exception.message, "testing") + self.assertEqual(kw, {}) + set_result.assert_not_called() + + def test__set_future_result_w_done_wo_result_set_wo_error(self): + client = _make_client(project=self.PROJECT) + job = self._make_one(self.JOB_ID, client) + job._properties["status"] = {"state": "DONE"} + set_exception = job.set_exception = mock.Mock() + set_result = job.set_result = mock.Mock() + + job._set_future_result() + + set_exception.assert_not_called() + set_result.assert_called_once_with(job) + + def test_done_defaults_wo_state(self): + from google.cloud.bigquery.retry import DEFAULT_RETRY + + client = _make_client(project=self.PROJECT) + job = self._make_one(self.JOB_ID, client) + reload_ = job.reload = mock.Mock() + + self.assertFalse(job.done()) + + reload_.assert_called_once_with(retry=DEFAULT_RETRY, timeout=None) + + def test_done_explicit_wo_state(self): + from google.cloud.bigquery.retry import DEFAULT_RETRY + + client = _make_client(project=self.PROJECT) + job = self._make_one(self.JOB_ID, client) + reload_ = job.reload = mock.Mock() + retry = DEFAULT_RETRY.with_deadline(1) + + self.assertFalse(job.done(retry=retry, timeout=7.5)) + + reload_.assert_called_once_with(retry=retry, timeout=7.5) + + def test_done_already(self): + client = _make_client(project=self.PROJECT) + job = self._make_one(self.JOB_ID, client) + job._properties["status"] = {"state": "DONE"} + + self.assertTrue(job.done()) + + def test_result_default_wo_state(self): + begun_job_resource = _make_job_resource( + job_id=self.JOB_ID, project_id=self.PROJECT, started=True + ) + done_job_resource = _make_job_resource( + job_id=self.JOB_ID, project_id=self.PROJECT, started=True, ended=True + ) + conn = _make_connection( + _make_retriable_exception(), + begun_job_resource, + _make_retriable_exception(), + done_job_resource, + ) + client = _make_client(project=self.PROJECT, connection=conn) + job = self._make_one(self.JOB_ID, client) + + self.assertIs(job.result(), job) + + begin_call = mock.call( + method="POST", + path=f"/projects/{self.PROJECT}/jobs", + data={"jobReference": {"jobId": self.JOB_ID, "projectId": self.PROJECT}}, + timeout=None, + ) + reload_call = mock.call( + method="GET", + path=f"/projects/{self.PROJECT}/jobs/{self.JOB_ID}", + query_params={}, + timeout=None, + ) + conn.api_request.assert_has_calls( + [begin_call, begin_call, reload_call, reload_call] + ) + + def test_result_w_retry_wo_state(self): + begun_job_resource = _make_job_resource( + job_id=self.JOB_ID, project_id=self.PROJECT, started=True + ) + done_job_resource = _make_job_resource( + job_id=self.JOB_ID, project_id=self.PROJECT, started=True, ended=True + ) + conn = _make_connection( + exceptions.NotFound("not normally retriable"), + begun_job_resource, + # The call to done() / reload() does not get the custom retry + # policy passed to it, so we don't throw a non-retriable + # exception here. See: + # https://github.com/googleapis/python-bigquery/issues/24 + _make_retriable_exception(), + done_job_resource, + ) + client = _make_client(project=self.PROJECT, connection=conn) + job = self._make_one(self.JOB_ID, client) + custom_predicate = mock.Mock() + custom_predicate.return_value = True + custom_retry = google.api_core.retry.Retry(predicate=custom_predicate) + self.assertIs(job.result(retry=custom_retry), job) + + begin_call = mock.call( + method="POST", + path=f"/projects/{self.PROJECT}/jobs", + data={"jobReference": {"jobId": self.JOB_ID, "projectId": self.PROJECT}}, + timeout=None, + ) + reload_call = mock.call( + method="GET", + path=f"/projects/{self.PROJECT}/jobs/{self.JOB_ID}", + query_params={}, + timeout=None, + ) + conn.api_request.assert_has_calls( + [begin_call, begin_call, reload_call, reload_call] + ) + + def test_result_explicit_w_state(self): + conn = _make_connection() + client = _make_client(project=self.PROJECT, connection=conn) + job = self._make_one(self.JOB_ID, client) + # Use _set_properties() instead of directly modifying _properties so + # that the result state is set properly. + job_resource = job._properties + job_resource["status"] = {"state": "DONE"} + job._set_properties(job_resource) + timeout = 1 + + self.assertIs(job.result(timeout=timeout), job) + + conn.api_request.assert_not_called() + + def test_cancelled_wo_error_result(self): + client = _make_client(project=self.PROJECT) + job = self._make_one(self.JOB_ID, client) + + self.assertFalse(job.cancelled()) + + def test_cancelled_w_error_result_not_stopped(self): + client = _make_client(project=self.PROJECT) + job = self._make_one(self.JOB_ID, client) + job._properties["status"] = {"errorResult": {"reason": "other"}} + + self.assertFalse(job.cancelled()) + + def test_cancelled_w_error_result_w_stopped(self): + client = _make_client(project=self.PROJECT) + job = self._make_one(self.JOB_ID, client) + job._properties["status"] = {"errorResult": {"reason": "stopped"}} + + self.assertTrue(job.cancelled()) + + +class Test_JobConfig(unittest.TestCase): + JOB_TYPE = "testing" + + @staticmethod + def _get_target_class(): + from google.cloud.bigquery import job + + return job._JobConfig + + def _make_one(self, job_type=JOB_TYPE): + return self._get_target_class()(job_type) + + def test_ctor(self): + job_config = self._make_one() + self.assertEqual(job_config._job_type, self.JOB_TYPE) + self.assertEqual(job_config._properties, {self.JOB_TYPE: {}}) + + def test_fill_from_default(self): + from google.cloud.bigquery import QueryJobConfig + + job_config = QueryJobConfig() + job_config.dry_run = True + job_config.maximum_bytes_billed = 1000 + + default_job_config = QueryJobConfig() + default_job_config.use_query_cache = True + default_job_config.maximum_bytes_billed = 2000 + + final_job_config = job_config._fill_from_default(default_job_config) + self.assertTrue(final_job_config.dry_run) + self.assertTrue(final_job_config.use_query_cache) + self.assertEqual(final_job_config.maximum_bytes_billed, 1000) + + def test_fill_from_default_conflict(self): + from google.cloud.bigquery import QueryJobConfig + + basic_job_config = QueryJobConfig() + conflicting_job_config = self._make_one("conflicting_job_type") + self.assertNotEqual( + basic_job_config._job_type, conflicting_job_config._job_type + ) + + with self.assertRaises(TypeError): + basic_job_config._fill_from_default(conflicting_job_config) + + @mock.patch("google.cloud.bigquery._helpers._get_sub_prop") + def test__get_sub_prop_wo_default(self, _get_sub_prop): + job_config = self._make_one() + key = "key" + self.assertIs(job_config._get_sub_prop(key), _get_sub_prop.return_value) + _get_sub_prop.assert_called_once_with( + job_config._properties, [self.JOB_TYPE, key], default=None + ) + + @mock.patch("google.cloud.bigquery._helpers._get_sub_prop") + def test__get_sub_prop_w_default(self, _get_sub_prop): + job_config = self._make_one() + key = "key" + default = "default" + self.assertIs( + job_config._get_sub_prop(key, default=default), _get_sub_prop.return_value + ) + _get_sub_prop.assert_called_once_with( + job_config._properties, [self.JOB_TYPE, key], default=default + ) + + @mock.patch("google.cloud.bigquery._helpers._set_sub_prop") + def test__set_sub_prop(self, _set_sub_prop): + job_config = self._make_one() + key = "key" + value = "value" + job_config._set_sub_prop(key, value) + _set_sub_prop.assert_called_once_with( + job_config._properties, [self.JOB_TYPE, key], value + ) + + def test_to_api_repr(self): + job_config = self._make_one() + expected = job_config._properties = {self.JOB_TYPE: {"foo": "bar"}} + found = job_config.to_api_repr() + self.assertEqual(found, expected) + self.assertIsNot(found, expected) # copied + + # 'from_api_repr' cannot be tested on '_JobConfig', because it presumes + # the ctor can be called w/o arguments + + def test_labels_miss(self): + job_config = self._make_one() + self.assertEqual(job_config.labels, {}) + + def test_labels_update_in_place(self): + job_config = self._make_one() + labels = job_config.labels + labels["foo"] = "bar" # update in place + self.assertEqual(job_config.labels, {"foo": "bar"}) + + def test_labels_hit(self): + labels = {"foo": "bar"} + job_config = self._make_one() + job_config._properties["labels"] = labels + self.assertEqual(job_config.labels, labels) + + def test_labels_setter_invalid(self): + labels = object() + job_config = self._make_one() + with self.assertRaises(ValueError): + job_config.labels = labels + + def test_labels_setter(self): + labels = {"foo": "bar"} + job_config = self._make_one() + job_config.labels = labels + self.assertEqual(job_config._properties["labels"], labels) diff --git a/packages/google-cloud-bigquery/tests/unit/job/test_copy.py b/packages/google-cloud-bigquery/tests/unit/job/test_copy.py new file mode 100644 index 000000000000..fb0c87391f78 --- /dev/null +++ b/packages/google-cloud-bigquery/tests/unit/job/test_copy.py @@ -0,0 +1,477 @@ +# Copyright 2015 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import mock + +from .helpers import _Base +from .helpers import _make_client +from .helpers import _make_connection + + +class TestCopyJobConfig(_Base): + JOB_TYPE = "copy" + + @staticmethod + def _get_target_class(): + from google.cloud.bigquery.job import CopyJobConfig + + return CopyJobConfig + + def test_ctor_w_properties(self): + from google.cloud.bigquery.job import CreateDisposition + from google.cloud.bigquery.job import WriteDisposition + + create_disposition = CreateDisposition.CREATE_NEVER + write_disposition = WriteDisposition.WRITE_TRUNCATE + config = self._get_target_class()( + create_disposition=create_disposition, write_disposition=write_disposition + ) + + self.assertEqual(config.create_disposition, create_disposition) + self.assertEqual(config.write_disposition, write_disposition) + + def test_to_api_repr_with_encryption(self): + from google.cloud.bigquery.encryption_configuration import ( + EncryptionConfiguration, + ) + + config = self._make_one() + config.destination_encryption_configuration = EncryptionConfiguration( + kms_key_name=self.KMS_KEY_NAME + ) + resource = config.to_api_repr() + self.assertEqual( + resource, + { + "copy": { + "destinationEncryptionConfiguration": { + "kmsKeyName": self.KMS_KEY_NAME + } + } + }, + ) + + def test_to_api_repr_with_encryption_none(self): + config = self._make_one() + config.destination_encryption_configuration = None + resource = config.to_api_repr() + self.assertEqual( + resource, {"copy": {"destinationEncryptionConfiguration": None}} + ) + + +class TestCopyJob(_Base): + JOB_TYPE = "copy" + SOURCE_TABLE = "source_table" + DESTINATION_TABLE = "destination_table" + + @staticmethod + def _get_target_class(): + from google.cloud.bigquery.job import CopyJob + + return CopyJob + + def _make_resource(self, started=False, ended=False): + resource = super(TestCopyJob, self)._make_resource(started, ended) + config = resource["configuration"]["copy"] + config["sourceTables"] = [ + { + "projectId": self.PROJECT, + "datasetId": self.DS_ID, + "tableId": self.SOURCE_TABLE, + } + ] + config["destinationTable"] = { + "projectId": self.PROJECT, + "datasetId": self.DS_ID, + "tableId": self.DESTINATION_TABLE, + } + + return resource + + def _verifyResourceProperties(self, job, resource): + self._verifyReadonlyResourceProperties(job, resource) + + config = resource.get("configuration", {}).get("copy") + + table_ref = config["destinationTable"] + self.assertEqual(job.destination.project, table_ref["projectId"]) + self.assertEqual(job.destination.dataset_id, table_ref["datasetId"]) + self.assertEqual(job.destination.table_id, table_ref["tableId"]) + + sources = config.get("sourceTables") + if sources is None: + sources = [config["sourceTable"]] + self.assertEqual(len(sources), len(job.sources)) + for table_ref, table in zip(sources, job.sources): + self.assertEqual(table.project, table_ref["projectId"]) + self.assertEqual(table.dataset_id, table_ref["datasetId"]) + self.assertEqual(table.table_id, table_ref["tableId"]) + + if "createDisposition" in config: + self.assertEqual(job.create_disposition, config["createDisposition"]) + else: + self.assertIsNone(job.create_disposition) + + if "writeDisposition" in config: + self.assertEqual(job.write_disposition, config["writeDisposition"]) + else: + self.assertIsNone(job.write_disposition) + + if "destinationEncryptionConfiguration" in config: + self.assertIsNotNone(job.destination_encryption_configuration) + self.assertEqual( + job.destination_encryption_configuration.kms_key_name, + config["destinationEncryptionConfiguration"]["kmsKeyName"], + ) + else: + self.assertIsNone(job.destination_encryption_configuration) + + def test_ctor(self): + client = _make_client(project=self.PROJECT) + source = self._table_ref(self.SOURCE_TABLE) + destination = self._table_ref(self.DESTINATION_TABLE) + job = self._make_one(self.JOB_ID, [source], destination, client) + self.assertEqual(job.destination, destination) + self.assertEqual(job.sources, [source]) + self.assertIs(job._client, client) + self.assertEqual(job.job_type, self.JOB_TYPE) + self.assertEqual(job.path, "/projects/%s/jobs/%s" % (self.PROJECT, self.JOB_ID)) + + self._verifyInitialReadonlyProperties(job) + + # set/read from resource['configuration']['copy'] + self.assertIsNone(job.create_disposition) + self.assertIsNone(job.write_disposition) + self.assertIsNone(job.destination_encryption_configuration) + + def test_from_api_repr_missing_identity(self): + self._setUpConstants() + client = _make_client(project=self.PROJECT) + RESOURCE = {} + klass = self._get_target_class() + with self.assertRaises(KeyError): + klass.from_api_repr(RESOURCE, client=client) + + def test_from_api_repr_missing_config(self): + self._setUpConstants() + client = _make_client(project=self.PROJECT) + RESOURCE = { + "id": "%s:%s" % (self.PROJECT, self.DS_ID), + "jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID}, + } + klass = self._get_target_class() + with self.assertRaises(KeyError): + klass.from_api_repr(RESOURCE, client=client) + + def test_from_api_repr_bare(self): + self._setUpConstants() + client = _make_client(project=self.PROJECT) + RESOURCE = { + "id": self.JOB_ID, + "jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID}, + "configuration": { + "copy": { + "sourceTables": [ + { + "projectId": self.PROJECT, + "datasetId": self.DS_ID, + "tableId": self.SOURCE_TABLE, + } + ], + "destinationTable": { + "projectId": self.PROJECT, + "datasetId": self.DS_ID, + "tableId": self.DESTINATION_TABLE, + }, + } + }, + } + klass = self._get_target_class() + job = klass.from_api_repr(RESOURCE, client=client) + self.assertIs(job._client, client) + self._verifyResourceProperties(job, RESOURCE) + + def test_from_api_with_encryption(self): + self._setUpConstants() + client = _make_client(project=self.PROJECT) + RESOURCE = { + "id": self.JOB_ID, + "jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID}, + "configuration": { + "copy": { + "sourceTables": [ + { + "projectId": self.PROJECT, + "datasetId": self.DS_ID, + "tableId": self.SOURCE_TABLE, + } + ], + "destinationTable": { + "projectId": self.PROJECT, + "datasetId": self.DS_ID, + "tableId": self.DESTINATION_TABLE, + }, + "destinationEncryptionConfiguration": { + "kmsKeyName": self.KMS_KEY_NAME + }, + } + }, + } + klass = self._get_target_class() + job = klass.from_api_repr(RESOURCE, client=client) + self.assertIs(job._client, client) + self._verifyResourceProperties(job, RESOURCE) + + def test_from_api_repr_w_sourcetable(self): + self._setUpConstants() + client = _make_client(project=self.PROJECT) + RESOURCE = { + "id": self.JOB_ID, + "jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID}, + "configuration": { + "copy": { + "sourceTable": { + "projectId": self.PROJECT, + "datasetId": self.DS_ID, + "tableId": self.SOURCE_TABLE, + }, + "destinationTable": { + "projectId": self.PROJECT, + "datasetId": self.DS_ID, + "tableId": self.DESTINATION_TABLE, + }, + } + }, + } + klass = self._get_target_class() + job = klass.from_api_repr(RESOURCE, client=client) + self.assertIs(job._client, client) + self._verifyResourceProperties(job, RESOURCE) + + def test_from_api_repr_wo_sources(self): + self._setUpConstants() + client = _make_client(project=self.PROJECT) + RESOURCE = { + "id": self.JOB_ID, + "jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID}, + "configuration": { + "copy": { + "destinationTable": { + "projectId": self.PROJECT, + "datasetId": self.DS_ID, + "tableId": self.DESTINATION_TABLE, + } + } + }, + } + klass = self._get_target_class() + job = klass.from_api_repr(RESOURCE, client=client) + with self.assertRaises(KeyError): + _ = job.sources + + def test_from_api_repr_w_properties(self): + from google.cloud.bigquery.job import CreateDisposition + + client = _make_client(project=self.PROJECT) + RESOURCE = self._make_resource() + copy_config = RESOURCE["configuration"]["copy"] + copy_config["createDisposition"] = CreateDisposition.CREATE_IF_NEEDED + klass = self._get_target_class() + job = klass.from_api_repr(RESOURCE, client=client) + self.assertIs(job._client, client) + self._verifyResourceProperties(job, RESOURCE) + + def test_begin_w_bound_client(self): + PATH = "/projects/%s/jobs" % (self.PROJECT,) + RESOURCE = self._make_resource() + # Ensure None for missing server-set props + del RESOURCE["statistics"]["creationTime"] + del RESOURCE["etag"] + del RESOURCE["selfLink"] + del RESOURCE["user_email"] + conn = _make_connection(RESOURCE) + client = _make_client(project=self.PROJECT, connection=conn) + source = self._table_ref(self.SOURCE_TABLE) + destination = self._table_ref(self.DESTINATION_TABLE) + job = self._make_one(self.JOB_ID, [source], destination, client) + with mock.patch( + "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" + ) as final_attributes: + job._begin() + + final_attributes.assert_called_with({"path": PATH}, client, job) + + conn.api_request.assert_called_once_with( + method="POST", + path=PATH, + data={ + "jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID}, + "configuration": { + "copy": { + "sourceTables": [ + { + "projectId": self.PROJECT, + "datasetId": self.DS_ID, + "tableId": self.SOURCE_TABLE, + } + ], + "destinationTable": { + "projectId": self.PROJECT, + "datasetId": self.DS_ID, + "tableId": self.DESTINATION_TABLE, + }, + } + }, + }, + timeout=None, + ) + self._verifyResourceProperties(job, RESOURCE) + + def test_begin_w_alternate_client(self): + from google.cloud.bigquery.job import CopyJobConfig + + from google.cloud.bigquery.job import CreateDisposition + from google.cloud.bigquery.job import WriteDisposition + + PATH = "/projects/%s/jobs" % (self.PROJECT,) + RESOURCE = self._make_resource(ended=True) + COPY_CONFIGURATION = { + "sourceTables": [ + { + "projectId": self.PROJECT, + "datasetId": self.DS_ID, + "tableId": self.SOURCE_TABLE, + } + ], + "destinationTable": { + "projectId": self.PROJECT, + "datasetId": self.DS_ID, + "tableId": self.DESTINATION_TABLE, + }, + "createDisposition": CreateDisposition.CREATE_NEVER, + "writeDisposition": WriteDisposition.WRITE_TRUNCATE, + } + RESOURCE["configuration"]["copy"] = COPY_CONFIGURATION + conn1 = _make_connection() + client1 = _make_client(project=self.PROJECT, connection=conn1) + conn2 = _make_connection(RESOURCE) + client2 = _make_client(project=self.PROJECT, connection=conn2) + source = self._table_ref(self.SOURCE_TABLE) + destination = self._table_ref(self.DESTINATION_TABLE) + config = CopyJobConfig() + config.create_disposition = CreateDisposition.CREATE_NEVER + config.write_disposition = WriteDisposition.WRITE_TRUNCATE + job = self._make_one(self.JOB_ID, [source], destination, client1, config) + with mock.patch( + "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" + ) as final_attributes: + job._begin(client=client2) + + final_attributes.assert_called_with({"path": PATH}, client2, job) + + conn1.api_request.assert_not_called() + conn2.api_request.assert_called_once_with( + method="POST", + path=PATH, + data={ + "jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID}, + "configuration": {"copy": COPY_CONFIGURATION}, + }, + timeout=None, + ) + self._verifyResourceProperties(job, RESOURCE) + + def test_exists_miss_w_bound_client(self): + PATH = "/projects/%s/jobs/%s" % (self.PROJECT, self.JOB_ID) + conn = _make_connection() + client = _make_client(project=self.PROJECT, connection=conn) + + source = self._table_ref(self.SOURCE_TABLE) + destination = self._table_ref(self.DESTINATION_TABLE) + job = self._make_one(self.JOB_ID, [source], destination, client) + with mock.patch( + "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" + ) as final_attributes: + self.assertFalse(job.exists()) + + final_attributes.assert_called_with({"path": PATH}, client, job) + + conn.api_request.assert_called_once_with( + method="GET", path=PATH, query_params={"fields": "id"}, timeout=None + ) + + def test_exists_hit_w_alternate_client(self): + PATH = "/projects/%s/jobs/%s" % (self.PROJECT, self.JOB_ID) + conn1 = _make_connection() + client1 = _make_client(project=self.PROJECT, connection=conn1) + conn2 = _make_connection({}) + client2 = _make_client(project=self.PROJECT, connection=conn2) + source = self._table_ref(self.SOURCE_TABLE) + destination = self._table_ref(self.DESTINATION_TABLE) + job = self._make_one(self.JOB_ID, [source], destination, client1) + with mock.patch( + "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" + ) as final_attributes: + self.assertTrue(job.exists(client=client2)) + + final_attributes.assert_called_with({"path": PATH}, client2, job) + + conn1.api_request.assert_not_called() + conn2.api_request.assert_called_once_with( + method="GET", path=PATH, query_params={"fields": "id"}, timeout=None + ) + + def test_reload_w_bound_client(self): + PATH = "/projects/%s/jobs/%s" % (self.PROJECT, self.JOB_ID) + RESOURCE = self._make_resource() + conn = _make_connection(RESOURCE) + client = _make_client(project=self.PROJECT, connection=conn) + source = self._table_ref(self.SOURCE_TABLE) + destination = self._table_ref(self.DESTINATION_TABLE) + job = self._make_one(self.JOB_ID, [source], destination, client) + with mock.patch( + "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" + ) as final_attributes: + job.reload() + + final_attributes.assert_called_with({"path": PATH}, client, job) + + conn.api_request.assert_called_once_with( + method="GET", path=PATH, query_params={}, timeout=None + ) + self._verifyResourceProperties(job, RESOURCE) + + def test_reload_w_alternate_client(self): + PATH = "/projects/%s/jobs/%s" % (self.PROJECT, self.JOB_ID) + RESOURCE = self._make_resource() + conn1 = _make_connection() + client1 = _make_client(project=self.PROJECT, connection=conn1) + conn2 = _make_connection(RESOURCE) + client2 = _make_client(project=self.PROJECT, connection=conn2) + source = self._table_ref(self.SOURCE_TABLE) + destination = self._table_ref(self.DESTINATION_TABLE) + job = self._make_one(self.JOB_ID, [source], destination, client1) + with mock.patch( + "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" + ) as final_attributes: + job.reload(client=client2) + + final_attributes.assert_called_with({"path": PATH}, client2, job) + + conn1.api_request.assert_not_called() + conn2.api_request.assert_called_once_with( + method="GET", path=PATH, query_params={}, timeout=None + ) + self._verifyResourceProperties(job, RESOURCE) diff --git a/packages/google-cloud-bigquery/tests/unit/job/test_extract.py b/packages/google-cloud-bigquery/tests/unit/job/test_extract.py new file mode 100644 index 000000000000..4c9411d0d154 --- /dev/null +++ b/packages/google-cloud-bigquery/tests/unit/job/test_extract.py @@ -0,0 +1,437 @@ +# Copyright 2015 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import mock + +from .helpers import _Base +from .helpers import _make_client +from .helpers import _make_connection + + +class TestExtractJobConfig(_Base): + JOB_TYPE = "extract" + + @staticmethod + def _get_target_class(): + from google.cloud.bigquery.job import ExtractJobConfig + + return ExtractJobConfig + + def test_ctor_w_properties(self): + config = self._get_target_class()(field_delimiter="\t", print_header=True) + + self.assertEqual(config.field_delimiter, "\t") + self.assertTrue(config.print_header) + + def test_to_api_repr(self): + from google.cloud.bigquery import job + + config = self._make_one() + config.compression = job.Compression.SNAPPY + config.destination_format = job.DestinationFormat.AVRO + config.field_delimiter = "ignored for avro" + config.print_header = False + config._properties["extract"]["someNewField"] = "some-value" + config.use_avro_logical_types = True + resource = config.to_api_repr() + self.assertEqual( + resource, + { + "extract": { + "compression": "SNAPPY", + "destinationFormat": "AVRO", + "fieldDelimiter": "ignored for avro", + "printHeader": False, + "someNewField": "some-value", + "useAvroLogicalTypes": True, + } + }, + ) + + def test_from_api_repr(self): + cls = self._get_target_class() + config = cls.from_api_repr( + { + "extract": { + "compression": "NONE", + "destinationFormat": "CSV", + "fieldDelimiter": "\t", + "printHeader": True, + "someNewField": "some-value", + "useAvroLogicalTypes": False, + } + } + ) + self.assertEqual(config.compression, "NONE") + self.assertEqual(config.destination_format, "CSV") + self.assertEqual(config.field_delimiter, "\t") + self.assertEqual(config.print_header, True) + self.assertEqual(config._properties["extract"]["someNewField"], "some-value") + self.assertEqual(config.use_avro_logical_types, False) + + +class TestExtractJob(_Base): + JOB_TYPE = "extract" + SOURCE_TABLE = "source_table" + DESTINATION_URI = "gs://bucket_name/object_name" + + @staticmethod + def _get_target_class(): + from google.cloud.bigquery.job import ExtractJob + + return ExtractJob + + def _make_resource(self, started=False, ended=False): + resource = super(TestExtractJob, self)._make_resource(started, ended) + config = resource["configuration"]["extract"] + config["sourceTable"] = { + "projectId": self.PROJECT, + "datasetId": self.DS_ID, + "tableId": self.SOURCE_TABLE, + } + config["destinationUris"] = [self.DESTINATION_URI] + return resource + + def _verifyResourceProperties(self, job, resource): + self._verifyReadonlyResourceProperties(job, resource) + + config = resource.get("configuration", {}).get("extract") + + self.assertEqual(job.destination_uris, config["destinationUris"]) + + if "sourceTable" in config: + table_ref = config["sourceTable"] + self.assertEqual(job.source.project, table_ref["projectId"]) + self.assertEqual(job.source.dataset_id, table_ref["datasetId"]) + self.assertEqual(job.source.table_id, table_ref["tableId"]) + else: + model_ref = config["sourceModel"] + self.assertEqual(job.source.project, model_ref["projectId"]) + self.assertEqual(job.source.dataset_id, model_ref["datasetId"]) + self.assertEqual(job.source.model_id, model_ref["modelId"]) + + if "compression" in config: + self.assertEqual(job.compression, config["compression"]) + else: + self.assertIsNone(job.compression) + + if "destinationFormat" in config: + self.assertEqual(job.destination_format, config["destinationFormat"]) + else: + self.assertIsNone(job.destination_format) + + if "fieldDelimiter" in config: + self.assertEqual(job.field_delimiter, config["fieldDelimiter"]) + else: + self.assertIsNone(job.field_delimiter) + + if "printHeader" in config: + self.assertEqual(job.print_header, config["printHeader"]) + else: + self.assertIsNone(job.print_header) + + def test_ctor(self): + from google.cloud.bigquery.table import Table + + client = _make_client(project=self.PROJECT) + source = Table(self.TABLE_REF) + job = self._make_one(self.JOB_ID, source, [self.DESTINATION_URI], client) + self.assertEqual(job.source.project, self.PROJECT) + self.assertEqual(job.source.dataset_id, self.DS_ID) + self.assertEqual(job.source.table_id, self.TABLE_ID) + self.assertEqual(job.destination_uris, [self.DESTINATION_URI]) + self.assertIs(job._client, client) + self.assertEqual(job.job_type, self.JOB_TYPE) + self.assertEqual(job.path, "/projects/%s/jobs/%s" % (self.PROJECT, self.JOB_ID)) + + self._verifyInitialReadonlyProperties(job) + + # set/read from resource['configuration']['extract'] + self.assertIsNone(job.compression) + self.assertIsNone(job.destination_format) + self.assertIsNone(job.field_delimiter) + self.assertIsNone(job.print_header) + + def test_destination_uri_file_counts(self): + file_counts = 23 + client = _make_client(project=self.PROJECT) + job = self._make_one( + self.JOB_ID, self.TABLE_REF, [self.DESTINATION_URI], client + ) + self.assertIsNone(job.destination_uri_file_counts) + + statistics = job._properties["statistics"] = {} + self.assertIsNone(job.destination_uri_file_counts) + + extract_stats = statistics["extract"] = {} + self.assertIsNone(job.destination_uri_file_counts) + + extract_stats["destinationUriFileCounts"] = [str(file_counts)] + self.assertEqual(job.destination_uri_file_counts, [file_counts]) + + def test_from_api_repr_missing_identity(self): + self._setUpConstants() + client = _make_client(project=self.PROJECT) + RESOURCE = {} + klass = self._get_target_class() + with self.assertRaises(KeyError): + klass.from_api_repr(RESOURCE, client=client) + + def test_from_api_repr_missing_config(self): + self._setUpConstants() + client = _make_client(project=self.PROJECT) + RESOURCE = { + "id": "%s:%s" % (self.PROJECT, self.DS_ID), + "jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID}, + } + klass = self._get_target_class() + with self.assertRaises(KeyError): + klass.from_api_repr(RESOURCE, client=client) + + def test_from_api_repr_bare(self): + self._setUpConstants() + client = _make_client(project=self.PROJECT) + RESOURCE = { + "id": self.JOB_ID, + "jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID}, + "configuration": { + "extract": { + "sourceTable": { + "projectId": self.PROJECT, + "datasetId": self.DS_ID, + "tableId": self.SOURCE_TABLE, + }, + "destinationUris": [self.DESTINATION_URI], + } + }, + } + klass = self._get_target_class() + job = klass.from_api_repr(RESOURCE, client=client) + self.assertIs(job._client, client) + self._verifyResourceProperties(job, RESOURCE) + + def test_from_api_repr_for_model(self): + self._setUpConstants() + client = _make_client(project=self.PROJECT) + RESOURCE = { + "id": self.JOB_ID, + "jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID}, + "configuration": { + "extract": { + "sourceModel": { + "projectId": self.PROJECT, + "datasetId": self.DS_ID, + "modelId": "model_id", + }, + "destinationUris": [self.DESTINATION_URI], + } + }, + } + klass = self._get_target_class() + job = klass.from_api_repr(RESOURCE, client=client) + self.assertIs(job._client, client) + self._verifyResourceProperties(job, RESOURCE) + + def test_from_api_repr_w_properties(self): + from google.cloud.bigquery.job import Compression + + client = _make_client(project=self.PROJECT) + RESOURCE = self._make_resource() + extract_config = RESOURCE["configuration"]["extract"] + extract_config["compression"] = Compression.GZIP + klass = self._get_target_class() + job = klass.from_api_repr(RESOURCE, client=client) + self.assertIs(job._client, client) + self._verifyResourceProperties(job, RESOURCE) + + def test_begin_w_bound_client(self): + from google.cloud.bigquery.dataset import DatasetReference + + PATH = "/projects/%s/jobs" % (self.PROJECT,) + RESOURCE = self._make_resource() + # Ensure None for missing server-set props + del RESOURCE["statistics"]["creationTime"] + del RESOURCE["etag"] + del RESOURCE["selfLink"] + del RESOURCE["user_email"] + conn = _make_connection(RESOURCE) + client = _make_client(project=self.PROJECT, connection=conn) + source_dataset = DatasetReference(self.PROJECT, self.DS_ID) + source = source_dataset.table(self.SOURCE_TABLE) + job = self._make_one(self.JOB_ID, source, [self.DESTINATION_URI], client) + with mock.patch( + "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" + ) as final_attributes: + job._begin() + + final_attributes.assert_called_with({"path": PATH}, client, job) + + conn.api_request.assert_called_once_with( + method="POST", + path=PATH, + data={ + "jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID}, + "configuration": { + "extract": { + "sourceTable": { + "projectId": self.PROJECT, + "datasetId": self.DS_ID, + "tableId": self.SOURCE_TABLE, + }, + "destinationUris": [self.DESTINATION_URI], + } + }, + }, + timeout=None, + ) + self._verifyResourceProperties(job, RESOURCE) + + def test_begin_w_alternate_client(self): + from google.cloud.bigquery.dataset import DatasetReference + from google.cloud.bigquery.job import Compression + from google.cloud.bigquery.job import DestinationFormat + from google.cloud.bigquery.job import ExtractJobConfig + + PATH = "/projects/%s/jobs" % (self.PROJECT,) + RESOURCE = self._make_resource(ended=True) + EXTRACT_CONFIGURATION = { + "sourceTable": { + "projectId": self.PROJECT, + "datasetId": self.DS_ID, + "tableId": self.SOURCE_TABLE, + }, + "destinationUris": [self.DESTINATION_URI], + "compression": Compression.GZIP, + "destinationFormat": DestinationFormat.NEWLINE_DELIMITED_JSON, + "fieldDelimiter": "|", + "printHeader": False, + } + RESOURCE["configuration"]["extract"] = EXTRACT_CONFIGURATION + conn1 = _make_connection() + client1 = _make_client(project=self.PROJECT, connection=conn1) + conn2 = _make_connection(RESOURCE) + client2 = _make_client(project=self.PROJECT, connection=conn2) + source_dataset = DatasetReference(self.PROJECT, self.DS_ID) + source = source_dataset.table(self.SOURCE_TABLE) + config = ExtractJobConfig() + config.compression = Compression.GZIP + config.destination_format = DestinationFormat.NEWLINE_DELIMITED_JSON + config.field_delimiter = "|" + config.print_header = False + job = self._make_one( + self.JOB_ID, source, [self.DESTINATION_URI], client1, config + ) + with mock.patch( + "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" + ) as final_attributes: + job._begin(client=client2) + + final_attributes.assert_called_with({"path": PATH}, client2, job) + + conn1.api_request.assert_not_called() + conn2.api_request.assert_called_once_with( + method="POST", + path=PATH, + data={ + "jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID}, + "configuration": {"extract": EXTRACT_CONFIGURATION}, + }, + timeout=None, + ) + self._verifyResourceProperties(job, RESOURCE) + + def test_exists_miss_w_bound_client(self): + PATH = "/projects/%s/jobs/%s" % (self.PROJECT, self.JOB_ID) + conn = _make_connection() + client = _make_client(project=self.PROJECT, connection=conn) + job = self._make_one( + self.JOB_ID, self.TABLE_REF, [self.DESTINATION_URI], client + ) + with mock.patch( + "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" + ) as final_attributes: + self.assertFalse(job.exists()) + + final_attributes.assert_called_with({"path": PATH}, client, job) + + conn.api_request.assert_called_once_with( + method="GET", path=PATH, query_params={"fields": "id"}, timeout=None + ) + + def test_exists_hit_w_alternate_client(self): + PATH = "/projects/%s/jobs/%s" % (self.PROJECT, self.JOB_ID) + conn1 = _make_connection() + client1 = _make_client(project=self.PROJECT, connection=conn1) + conn2 = _make_connection({}) + client2 = _make_client(project=self.PROJECT, connection=conn2) + job = self._make_one( + self.JOB_ID, self.TABLE_REF, [self.DESTINATION_URI], client1 + ) + with mock.patch( + "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" + ) as final_attributes: + self.assertTrue(job.exists(client=client2)) + + final_attributes.assert_called_with({"path": PATH}, client2, job) + + conn1.api_request.assert_not_called() + conn2.api_request.assert_called_once_with( + method="GET", path=PATH, query_params={"fields": "id"}, timeout=None + ) + + def test_reload_w_bound_client(self): + from google.cloud.bigquery.dataset import DatasetReference + + PATH = "/projects/%s/jobs/%s" % (self.PROJECT, self.JOB_ID) + RESOURCE = self._make_resource() + conn = _make_connection(RESOURCE) + client = _make_client(project=self.PROJECT, connection=conn) + source_dataset = DatasetReference(self.PROJECT, self.DS_ID) + source = source_dataset.table(self.SOURCE_TABLE) + job = self._make_one(self.JOB_ID, source, [self.DESTINATION_URI], client) + with mock.patch( + "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" + ) as final_attributes: + job.reload() + + final_attributes.assert_called_with({"path": PATH}, client, job) + conn.api_request.assert_called_once_with( + method="GET", path=PATH, query_params={}, timeout=None + ) + self._verifyResourceProperties(job, RESOURCE) + + def test_reload_w_alternate_client(self): + from google.cloud.bigquery.dataset import DatasetReference + + PATH = "/projects/%s/jobs/%s" % (self.PROJECT, self.JOB_ID) + RESOURCE = self._make_resource() + conn1 = _make_connection() + client1 = _make_client(project=self.PROJECT, connection=conn1) + conn2 = _make_connection(RESOURCE) + client2 = _make_client(project=self.PROJECT, connection=conn2) + source_dataset = DatasetReference(self.PROJECT, self.DS_ID) + source = source_dataset.table(self.SOURCE_TABLE) + job = self._make_one(self.JOB_ID, source, [self.DESTINATION_URI], client1) + with mock.patch( + "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" + ) as final_attributes: + job.reload(client=client2) + + final_attributes.assert_called_with({"path": PATH}, client2, job) + + conn1.api_request.assert_not_called() + conn2.api_request.assert_called_once_with( + method="GET", path=PATH, query_params={}, timeout=None + ) + self._verifyResourceProperties(job, RESOURCE) diff --git a/packages/google-cloud-bigquery/tests/unit/job/test_load.py b/packages/google-cloud-bigquery/tests/unit/job/test_load.py new file mode 100644 index 000000000000..70e7860a7904 --- /dev/null +++ b/packages/google-cloud-bigquery/tests/unit/job/test_load.py @@ -0,0 +1,838 @@ +# Copyright 2015 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import copy + +import mock + +from .helpers import _Base +from .helpers import _make_client +from .helpers import _make_connection + + +class TestLoadJob(_Base): + JOB_TYPE = "load" + + @staticmethod + def _get_target_class(): + from google.cloud.bigquery.job import LoadJob + + return LoadJob + + def _setUpConstants(self): + super(TestLoadJob, self)._setUpConstants() + self.INPUT_FILES = 2 + self.INPUT_BYTES = 12345 + self.OUTPUT_BYTES = 23456 + self.OUTPUT_ROWS = 345 + + def _make_resource(self, started=False, ended=False): + resource = super(TestLoadJob, self)._make_resource(started, ended) + config = resource["configuration"]["load"] + config["sourceUris"] = [self.SOURCE1] + config["destinationTable"] = { + "projectId": self.PROJECT, + "datasetId": self.DS_ID, + "tableId": self.TABLE_ID, + } + + if ended: + resource["status"] = {"state": "DONE"} + resource["statistics"]["load"]["inputFiles"] = self.INPUT_FILES + resource["statistics"]["load"]["inputFileBytes"] = self.INPUT_BYTES + resource["statistics"]["load"]["outputBytes"] = self.OUTPUT_BYTES + resource["statistics"]["load"]["outputRows"] = self.OUTPUT_ROWS + + return resource + + def _verifyBooleanConfigProperties(self, job, config): + if "allowJaggedRows" in config: + self.assertEqual(job.allow_jagged_rows, config["allowJaggedRows"]) + else: + self.assertIsNone(job.allow_jagged_rows) + if "allowQuotedNewlines" in config: + self.assertEqual(job.allow_quoted_newlines, config["allowQuotedNewlines"]) + else: + self.assertIsNone(job.allow_quoted_newlines) + if "autodetect" in config: + self.assertEqual(job.autodetect, config["autodetect"]) + else: + self.assertIsNone(job.autodetect) + if "ignoreUnknownValues" in config: + self.assertEqual(job.ignore_unknown_values, config["ignoreUnknownValues"]) + else: + self.assertIsNone(job.ignore_unknown_values) + if "useAvroLogicalTypes" in config: + self.assertEqual(job.use_avro_logical_types, config["useAvroLogicalTypes"]) + else: + self.assertIsNone(job.use_avro_logical_types) + + def _verifyEnumConfigProperties(self, job, config): + if "createDisposition" in config: + self.assertEqual(job.create_disposition, config["createDisposition"]) + else: + self.assertIsNone(job.create_disposition) + if "encoding" in config: + self.assertEqual(job.encoding, config["encoding"]) + else: + self.assertIsNone(job.encoding) + if "sourceFormat" in config: + self.assertEqual(job.source_format, config["sourceFormat"]) + else: + self.assertIsNone(job.source_format) + if "writeDisposition" in config: + self.assertEqual(job.write_disposition, config["writeDisposition"]) + else: + self.assertIsNone(job.write_disposition) + if "schemaUpdateOptions" in config: + self.assertEqual(job.schema_update_options, config["schemaUpdateOptions"]) + else: + self.assertIsNone(job.schema_update_options) + + def _verifyResourceProperties(self, job, resource): + self._verifyReadonlyResourceProperties(job, resource) + + config = resource.get("configuration", {}).get("load") + + self._verifyBooleanConfigProperties(job, config) + self._verifyEnumConfigProperties(job, config) + + self.assertEqual(job.source_uris, config["sourceUris"]) + + table_ref = config["destinationTable"] + self.assertEqual(job.destination.project, table_ref["projectId"]) + self.assertEqual(job.destination.dataset_id, table_ref["datasetId"]) + self.assertEqual(job.destination.table_id, table_ref["tableId"]) + + if "fieldDelimiter" in config: + self.assertEqual(job.field_delimiter, config["fieldDelimiter"]) + else: + self.assertIsNone(job.field_delimiter) + if "maxBadRecords" in config: + self.assertEqual(job.max_bad_records, config["maxBadRecords"]) + else: + self.assertIsNone(job.max_bad_records) + if "nullMarker" in config: + self.assertEqual(job.null_marker, config["nullMarker"]) + else: + self.assertIsNone(job.null_marker) + if "quote" in config: + self.assertEqual(job.quote_character, config["quote"]) + else: + self.assertIsNone(job.quote_character) + if "skipLeadingRows" in config: + self.assertEqual(str(job.skip_leading_rows), config["skipLeadingRows"]) + else: + self.assertIsNone(job.skip_leading_rows) + + if "destinationEncryptionConfiguration" in config: + self.assertIsNotNone(job.destination_encryption_configuration) + self.assertEqual( + job.destination_encryption_configuration.kms_key_name, + config["destinationEncryptionConfiguration"]["kmsKeyName"], + ) + else: + self.assertIsNone(job.destination_encryption_configuration) + + def test_ctor(self): + client = _make_client(project=self.PROJECT) + job = self._make_one(self.JOB_ID, [self.SOURCE1], self.TABLE_REF, client) + self.assertEqual(job.destination, self.TABLE_REF) + self.assertEqual(list(job.source_uris), [self.SOURCE1]) + self.assertIs(job._client, client) + self.assertEqual(job.job_type, self.JOB_TYPE) + self.assertEqual(job.path, "/projects/%s/jobs/%s" % (self.PROJECT, self.JOB_ID)) + + self._verifyInitialReadonlyProperties(job) + + # derived from resource['statistics']['load'] + self.assertIsNone(job.input_file_bytes) + self.assertIsNone(job.input_files) + self.assertIsNone(job.output_bytes) + self.assertIsNone(job.output_rows) + + # set/read from resource['configuration']['load'] + self.assertIsNone(job.schema) + self.assertIsNone(job.allow_jagged_rows) + self.assertIsNone(job.allow_quoted_newlines) + self.assertIsNone(job.autodetect) + self.assertIsNone(job.create_disposition) + self.assertIsNone(job.encoding) + self.assertIsNone(job.field_delimiter) + self.assertIsNone(job.ignore_unknown_values) + self.assertIsNone(job.max_bad_records) + self.assertIsNone(job.null_marker) + self.assertIsNone(job.quote_character) + self.assertIsNone(job.skip_leading_rows) + self.assertIsNone(job.source_format) + self.assertIsNone(job.write_disposition) + self.assertIsNone(job.destination_encryption_configuration) + self.assertIsNone(job.destination_table_description) + self.assertIsNone(job.destination_table_friendly_name) + self.assertIsNone(job.range_partitioning) + self.assertIsNone(job.time_partitioning) + self.assertIsNone(job.use_avro_logical_types) + self.assertIsNone(job.clustering_fields) + self.assertIsNone(job.schema_update_options) + + def test_ctor_w_config(self): + from google.cloud.bigquery.schema import SchemaField + from google.cloud.bigquery.job import LoadJobConfig + + client = _make_client(project=self.PROJECT) + full_name = SchemaField("full_name", "STRING", mode="REQUIRED") + age = SchemaField("age", "INTEGER", mode="REQUIRED") + config = LoadJobConfig() + config.schema = [full_name, age] + job = self._make_one( + self.JOB_ID, [self.SOURCE1], self.TABLE_REF, client, config + ) + self.assertEqual(job.schema, [full_name, age]) + config.destination_table_description = "Description" + expected = {"description": "Description"} + self.assertEqual( + config._properties["load"]["destinationTableProperties"], expected + ) + friendly_name = "Friendly Name" + config._properties["load"]["destinationTableProperties"] = { + "friendlyName": friendly_name + } + self.assertEqual(config.destination_table_friendly_name, friendly_name) + + def test_ctor_w_job_reference(self): + from google.cloud.bigquery import job + + client = _make_client(project=self.PROJECT) + job_ref = job._JobReference(self.JOB_ID, "alternative-project", "US") + load_job = self._make_one(job_ref, [self.SOURCE1], self.TABLE_REF, client) + self.assertEqual(load_job.project, "alternative-project") + self.assertEqual(load_job.location, "US") + + def test_done(self): + client = _make_client(project=self.PROJECT) + resource = self._make_resource(ended=True) + job = self._get_target_class().from_api_repr(resource, client) + self.assertTrue(job.done()) + + def test_result(self): + client = _make_client(project=self.PROJECT) + resource = self._make_resource(ended=True) + job = self._get_target_class().from_api_repr(resource, client) + + result = job.result() + + self.assertIs(result, job) + + def test_result_invokes_begin(self): + begun_resource = self._make_resource() + done_resource = copy.deepcopy(begun_resource) + done_resource["status"] = {"state": "DONE"} + connection = _make_connection(begun_resource, done_resource) + client = _make_client(self.PROJECT) + client._connection = connection + + job = self._make_one(self.JOB_ID, [self.SOURCE1], self.TABLE_REF, client) + job.result() + + self.assertEqual(len(connection.api_request.call_args_list), 2) + begin_request, reload_request = connection.api_request.call_args_list + self.assertEqual(begin_request[1]["method"], "POST") + self.assertEqual(reload_request[1]["method"], "GET") + + def test_schema_setter_non_list(self): + from google.cloud.bigquery.job import LoadJobConfig + + config = LoadJobConfig() + with self.assertRaises(TypeError): + config.schema = object() + + def test_schema_setter_invalid_field(self): + from google.cloud.bigquery.job import LoadJobConfig + from google.cloud.bigquery.schema import SchemaField + + config = LoadJobConfig() + full_name = SchemaField("full_name", "STRING", mode="REQUIRED") + with self.assertRaises(ValueError): + config.schema = [full_name, object()] + + def test_schema_setter(self): + from google.cloud.bigquery.job import LoadJobConfig + from google.cloud.bigquery.schema import SchemaField + + config = LoadJobConfig() + full_name = SchemaField("full_name", "STRING", mode="REQUIRED") + age = SchemaField("age", "INTEGER", mode="REQUIRED") + config.schema = [full_name, age] + self.assertEqual(config.schema, [full_name, age]) + + def test_props_set_by_server(self): + import datetime + from google.cloud._helpers import UTC + from google.cloud._helpers import _millis + + CREATED = datetime.datetime(2015, 8, 11, 12, 13, 22, tzinfo=UTC) + STARTED = datetime.datetime(2015, 8, 11, 13, 47, 15, tzinfo=UTC) + ENDED = datetime.datetime(2015, 8, 11, 14, 47, 15, tzinfo=UTC) + FULL_JOB_ID = "%s:%s" % (self.PROJECT, self.JOB_ID) + URL = "http://example.com/projects/%s/jobs/%s" % (self.PROJECT, self.JOB_ID) + EMAIL = "phred@example.com" + ERROR_RESULT = { + "debugInfo": "DEBUG", + "location": "LOCATION", + "message": "MESSAGE", + "reason": "REASON", + } + + client = _make_client(project=self.PROJECT) + job = self._make_one(self.JOB_ID, [self.SOURCE1], self.TABLE_REF, client) + job._properties["etag"] = "ETAG" + job._properties["id"] = FULL_JOB_ID + job._properties["selfLink"] = URL + job._properties["user_email"] = EMAIL + + statistics = job._properties["statistics"] = {} + statistics["creationTime"] = _millis(CREATED) + statistics["startTime"] = _millis(STARTED) + statistics["endTime"] = _millis(ENDED) + + self.assertEqual(job.etag, "ETAG") + self.assertEqual(job.self_link, URL) + self.assertEqual(job.user_email, EMAIL) + + self.assertEqual(job.created, CREATED) + self.assertEqual(job.started, STARTED) + self.assertEqual(job.ended, ENDED) + + # running jobs have no load stats not yet set. + self.assertIsNone(job.output_bytes) + + load_stats = statistics["load"] = {} + load_stats["inputFileBytes"] = 12345 + load_stats["inputFiles"] = 1 + load_stats["outputBytes"] = 23456 + load_stats["outputRows"] = 345 + + self.assertEqual(job.input_file_bytes, 12345) + self.assertEqual(job.input_files, 1) + self.assertEqual(job.output_bytes, 23456) + self.assertEqual(job.output_rows, 345) + + status = job._properties["status"] = {} + + self.assertIsNone(job.error_result) + self.assertIsNone(job.errors) + self.assertIsNone(job.state) + + status["errorResult"] = ERROR_RESULT + status["errors"] = [ERROR_RESULT] + status["state"] = "STATE" + + self.assertEqual(job.error_result, ERROR_RESULT) + self.assertEqual(job.errors, [ERROR_RESULT]) + self.assertEqual(job.state, "STATE") + + def test_from_api_repr_missing_identity(self): + self._setUpConstants() + client = _make_client(project=self.PROJECT) + RESOURCE = {} + klass = self._get_target_class() + with self.assertRaises(KeyError): + klass.from_api_repr(RESOURCE, client=client) + + def test_from_api_repr_missing_config(self): + self._setUpConstants() + client = _make_client(project=self.PROJECT) + RESOURCE = { + "id": "%s:%s" % (self.PROJECT, self.JOB_ID), + "jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID}, + } + klass = self._get_target_class() + with self.assertRaises(KeyError): + klass.from_api_repr(RESOURCE, client=client) + + def test_from_api_repr_bare(self): + self._setUpConstants() + client = _make_client(project=self.PROJECT) + RESOURCE = { + "id": self.FULL_JOB_ID, + "jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID}, + "configuration": { + "load": { + "sourceUris": [self.SOURCE1], + "destinationTable": { + "projectId": self.PROJECT, + "datasetId": self.DS_ID, + "tableId": self.TABLE_ID, + }, + } + }, + } + klass = self._get_target_class() + job = klass.from_api_repr(RESOURCE, client=client) + self.assertIs(job._client, client) + self._verifyResourceProperties(job, RESOURCE) + + def test_from_api_with_encryption(self): + self._setUpConstants() + client = _make_client(project=self.PROJECT) + RESOURCE = { + "id": self.FULL_JOB_ID, + "jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID}, + "configuration": { + "load": { + "sourceUris": [self.SOURCE1], + "destinationTable": { + "projectId": self.PROJECT, + "datasetId": self.DS_ID, + "tableId": self.TABLE_ID, + }, + "destinationEncryptionConfiguration": { + "kmsKeyName": self.KMS_KEY_NAME + }, + } + }, + } + klass = self._get_target_class() + job = klass.from_api_repr(RESOURCE, client=client) + self.assertIs(job._client, client) + self._verifyResourceProperties(job, RESOURCE) + + def test_from_api_repr_w_properties(self): + from google.cloud.bigquery.job import CreateDisposition + + client = _make_client(project=self.PROJECT) + RESOURCE = self._make_resource() + load_config = RESOURCE["configuration"]["load"] + load_config["createDisposition"] = CreateDisposition.CREATE_IF_NEEDED + klass = self._get_target_class() + job = klass.from_api_repr(RESOURCE, client=client) + self.assertIs(job._client, client) + self._verifyResourceProperties(job, RESOURCE) + + def test_begin_w_already_running(self): + conn = _make_connection() + client = _make_client(project=self.PROJECT, connection=conn) + job = self._make_one(self.JOB_ID, [self.SOURCE1], self.TABLE_REF, client) + job._properties["status"] = {"state": "RUNNING"} + + with self.assertRaises(ValueError): + job._begin() + + def test_begin_w_bound_client(self): + RESOURCE = self._make_resource() + # Ensure None for missing server-set props + del RESOURCE["statistics"]["creationTime"] + del RESOURCE["etag"] + del RESOURCE["selfLink"] + del RESOURCE["user_email"] + conn = _make_connection(RESOURCE) + client = _make_client(project=self.PROJECT, connection=conn) + job = self._make_one(self.JOB_ID, [self.SOURCE1], self.TABLE_REF, client) + path = "/projects/{}/jobs".format(self.PROJECT) + with mock.patch( + "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" + ) as final_attributes: + job._begin() + + final_attributes.assert_called_with({"path": path}, client, job) + + conn.api_request.assert_called_once_with( + method="POST", + path=path, + data={ + "jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID}, + "configuration": { + "load": { + "sourceUris": [self.SOURCE1], + "destinationTable": { + "projectId": self.PROJECT, + "datasetId": self.DS_ID, + "tableId": self.TABLE_ID, + }, + } + }, + }, + timeout=None, + ) + self._verifyResourceProperties(job, RESOURCE) + + def test_begin_w_autodetect(self): + from google.cloud.bigquery.job import LoadJobConfig + + path = "/projects/{}/jobs".format(self.PROJECT) + resource = self._make_resource() + resource["configuration"]["load"]["autodetect"] = True + # Ensure None for missing server-set props + del resource["statistics"]["creationTime"] + del resource["etag"] + del resource["selfLink"] + del resource["user_email"] + conn = _make_connection(resource) + client = _make_client(project=self.PROJECT, connection=conn) + config = LoadJobConfig() + config.autodetect = True + job = self._make_one( + self.JOB_ID, [self.SOURCE1], self.TABLE_REF, client, config + ) + with mock.patch( + "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" + ) as final_attributes: + job._begin() + + final_attributes.assert_called_with({"path": path}, client, job) + + sent = { + "jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID}, + "configuration": { + "load": { + "sourceUris": [self.SOURCE1], + "destinationTable": { + "projectId": self.PROJECT, + "datasetId": self.DS_ID, + "tableId": self.TABLE_ID, + }, + "autodetect": True, + } + }, + } + conn.api_request.assert_called_once_with( + method="POST", path=path, data=sent, timeout=None + ) + self._verifyResourceProperties(job, resource) + + def test_begin_w_alternate_client(self): + from google.cloud.bigquery.job import CreateDisposition + from google.cloud.bigquery.job import LoadJobConfig + from google.cloud.bigquery.job import SchemaUpdateOption + from google.cloud.bigquery.job import WriteDisposition + from google.cloud.bigquery.schema import SchemaField + + PATH = "/projects/%s/jobs" % (self.PROJECT,) + RESOURCE = self._make_resource(ended=True) + LOAD_CONFIGURATION = { + "sourceUris": [self.SOURCE1], + "destinationTable": { + "projectId": self.PROJECT, + "datasetId": self.DS_ID, + "tableId": self.TABLE_ID, + }, + "allowJaggedRows": True, + "allowQuotedNewlines": True, + "createDisposition": CreateDisposition.CREATE_NEVER, + "encoding": "ISO-8559-1", + "fieldDelimiter": "|", + "ignoreUnknownValues": True, + "maxBadRecords": 100, + "nullMarker": r"\N", + "quote": "'", + "skipLeadingRows": "1", + "sourceFormat": "CSV", + "useAvroLogicalTypes": True, + "writeDisposition": WriteDisposition.WRITE_TRUNCATE, + "schema": { + "fields": [ + { + "name": "full_name", + "type": "STRING", + "mode": "REQUIRED", + "description": None, + }, + { + "name": "age", + "type": "INTEGER", + "mode": "REQUIRED", + "description": None, + }, + ] + }, + "schemaUpdateOptions": [SchemaUpdateOption.ALLOW_FIELD_ADDITION], + } + RESOURCE["configuration"]["load"] = LOAD_CONFIGURATION + conn1 = _make_connection() + client1 = _make_client(project=self.PROJECT, connection=conn1) + conn2 = _make_connection(RESOURCE) + client2 = _make_client(project=self.PROJECT, connection=conn2) + full_name = SchemaField("full_name", "STRING", mode="REQUIRED") + age = SchemaField("age", "INTEGER", mode="REQUIRED") + config = LoadJobConfig() + config.schema = [full_name, age] + job = self._make_one( + self.JOB_ID, [self.SOURCE1], self.TABLE_REF, client1, config + ) + config.allow_jagged_rows = True + config.allow_quoted_newlines = True + config.create_disposition = CreateDisposition.CREATE_NEVER + config.encoding = "ISO-8559-1" + config.field_delimiter = "|" + config.ignore_unknown_values = True + config.max_bad_records = 100 + config.null_marker = r"\N" + config.quote_character = "'" + config.skip_leading_rows = 1 + config.source_format = "CSV" + config.use_avro_logical_types = True + config.write_disposition = WriteDisposition.WRITE_TRUNCATE + config.schema_update_options = [SchemaUpdateOption.ALLOW_FIELD_ADDITION] + with mock.patch( + "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" + ) as final_attributes: + job._begin(client=client2) + + final_attributes.assert_called_with({"path": PATH}, client2, job) + + conn1.api_request.assert_not_called() + self.assertEqual(len(conn2.api_request.call_args_list), 1) + req = conn2.api_request.call_args_list[0] + self.assertEqual(req[1]["method"], "POST") + self.assertEqual(req[1]["path"], PATH) + SENT = { + "jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID}, + "configuration": {"load": LOAD_CONFIGURATION}, + } + self.maxDiff = None + self.assertEqual(req[1]["data"], SENT) + self._verifyResourceProperties(job, RESOURCE) + + def test_begin_w_job_reference(self): + from google.cloud.bigquery import job + + resource = self._make_resource() + resource["jobReference"]["projectId"] = "alternative-project" + resource["jobReference"]["location"] = "US" + job_ref = job._JobReference(self.JOB_ID, "alternative-project", "US") + conn = _make_connection(resource) + client = _make_client(project=self.PROJECT, connection=conn) + load_job = self._make_one(job_ref, [self.SOURCE1], self.TABLE_REF, client) + with mock.patch( + "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" + ) as final_attributes: + load_job._begin() + final_attributes.assert_called_with( + {"path": "/projects/alternative-project/jobs"}, client, load_job + ) + + conn.api_request.assert_called_once() + _, request = conn.api_request.call_args + self.assertEqual(request["method"], "POST") + self.assertEqual(request["path"], "/projects/alternative-project/jobs") + self.assertEqual( + request["data"]["jobReference"]["projectId"], "alternative-project" + ) + self.assertEqual(request["data"]["jobReference"]["location"], "US") + self.assertEqual(request["data"]["jobReference"]["jobId"], self.JOB_ID) + + def test_exists_miss_w_bound_client(self): + PATH = "/projects/%s/jobs/%s" % (self.PROJECT, self.JOB_ID) + conn = _make_connection() + client = _make_client(project=self.PROJECT, connection=conn) + job = self._make_one(self.JOB_ID, [self.SOURCE1], self.TABLE_REF, client) + with mock.patch( + "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" + ) as final_attributes: + self.assertFalse(job.exists()) + + final_attributes.assert_called_with( + {"path": "/projects/{}/jobs/{}".format(self.PROJECT, self.JOB_ID)}, + client, + job, + ) + + conn.api_request.assert_called_once_with( + method="GET", path=PATH, query_params={"fields": "id"}, timeout=None + ) + + def test_exists_hit_w_alternate_client(self): + PATH = "/projects/%s/jobs/%s" % (self.PROJECT, self.JOB_ID) + conn1 = _make_connection() + client1 = _make_client(project=self.PROJECT, connection=conn1) + conn2 = _make_connection({}) + client2 = _make_client(project=self.PROJECT, connection=conn2) + job = self._make_one(self.JOB_ID, [self.SOURCE1], self.TABLE_REF, client1) + with mock.patch( + "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" + ) as final_attributes: + self.assertTrue(job.exists(client=client2)) + + final_attributes.assert_called_with( + {"path": "/projects/{}/jobs/{}".format(self.PROJECT, self.JOB_ID)}, + client2, + job, + ) + + conn1.api_request.assert_not_called() + conn2.api_request.assert_called_once_with( + method="GET", path=PATH, query_params={"fields": "id"}, timeout=None + ) + + def test_exists_miss_w_job_reference(self): + from google.cloud.bigquery import job + + job_ref = job._JobReference("my-job-id", "other-project", "US") + conn = _make_connection() + client = _make_client(project=self.PROJECT, connection=conn) + load_job = self._make_one(job_ref, [self.SOURCE1], self.TABLE_REF, client) + with mock.patch( + "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" + ) as final_attributes: + self.assertFalse(load_job.exists()) + + final_attributes.assert_called_with( + {"path": "/projects/other-project/jobs/my-job-id"}, client, load_job + ) + + conn.api_request.assert_called_once_with( + method="GET", + path="/projects/other-project/jobs/my-job-id", + query_params={"fields": "id", "location": "US"}, + timeout=None, + ) + + def test_reload_w_bound_client(self): + PATH = "/projects/%s/jobs/%s" % (self.PROJECT, self.JOB_ID) + RESOURCE = self._make_resource() + conn = _make_connection(RESOURCE) + client = _make_client(project=self.PROJECT, connection=conn) + job = self._make_one(self.JOB_ID, [self.SOURCE1], self.TABLE_REF, client) + with mock.patch( + "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" + ) as final_attributes: + job.reload() + + final_attributes.assert_called_with({"path": PATH}, client, job) + + conn.api_request.assert_called_once_with( + method="GET", path=PATH, query_params={}, timeout=None + ) + self._verifyResourceProperties(job, RESOURCE) + + def test_reload_w_alternate_client(self): + PATH = "/projects/%s/jobs/%s" % (self.PROJECT, self.JOB_ID) + RESOURCE = self._make_resource() + conn1 = _make_connection() + client1 = _make_client(project=self.PROJECT, connection=conn1) + conn2 = _make_connection(RESOURCE) + client2 = _make_client(project=self.PROJECT, connection=conn2) + job = self._make_one(self.JOB_ID, [self.SOURCE1], self.TABLE_REF, client1) + with mock.patch( + "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" + ) as final_attributes: + job.reload(client=client2) + + final_attributes.assert_called_with({"path": PATH}, client2, job) + + conn1.api_request.assert_not_called() + conn2.api_request.assert_called_once_with( + method="GET", path=PATH, query_params={}, timeout=None + ) + self._verifyResourceProperties(job, RESOURCE) + + def test_reload_w_job_reference(self): + from google.cloud.bigquery import job + + resource = self._make_resource(ended=True) + resource["jobReference"]["projectId"] = "alternative-project" + resource["jobReference"]["location"] = "US" + job_ref = job._JobReference(self.JOB_ID, "alternative-project", "US") + conn = _make_connection(resource) + client = _make_client(project=self.PROJECT, connection=conn) + load_job = self._make_one(job_ref, [self.SOURCE1], self.TABLE_REF, client) + with mock.patch( + "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" + ) as final_attributes: + load_job.reload() + + final_attributes.assert_called_with( + {"path": "/projects/alternative-project/jobs/{}".format(self.JOB_ID)}, + client, + load_job, + ) + + conn.api_request.assert_called_once_with( + method="GET", + path="/projects/alternative-project/jobs/{}".format(self.JOB_ID), + query_params={"location": "US"}, + timeout=None, + ) + + def test_cancel_w_bound_client(self): + PATH = "/projects/%s/jobs/%s/cancel" % (self.PROJECT, self.JOB_ID) + RESOURCE = self._make_resource(ended=True) + RESPONSE = {"job": RESOURCE} + conn = _make_connection(RESPONSE) + client = _make_client(project=self.PROJECT, connection=conn) + job = self._make_one(self.JOB_ID, [self.SOURCE1], self.TABLE_REF, client) + with mock.patch( + "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" + ) as final_attributes: + job.cancel() + + final_attributes.assert_called_with({"path": PATH}, client, job) + + conn.api_request.assert_called_once_with( + method="POST", path=PATH, query_params={}, timeout=None + ) + self._verifyResourceProperties(job, RESOURCE) + + def test_cancel_w_alternate_client(self): + PATH = "/projects/%s/jobs/%s/cancel" % (self.PROJECT, self.JOB_ID) + RESOURCE = self._make_resource(ended=True) + RESPONSE = {"job": RESOURCE} + conn1 = _make_connection() + client1 = _make_client(project=self.PROJECT, connection=conn1) + conn2 = _make_connection(RESPONSE) + client2 = _make_client(project=self.PROJECT, connection=conn2) + job = self._make_one(self.JOB_ID, [self.SOURCE1], self.TABLE_REF, client1) + with mock.patch( + "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" + ) as final_attributes: + job.cancel(client=client2) + + final_attributes.assert_called_with({"path": PATH}, client2, job) + + conn1.api_request.assert_not_called() + conn2.api_request.assert_called_once_with( + method="POST", path=PATH, query_params={}, timeout=None + ) + self._verifyResourceProperties(job, RESOURCE) + + def test_cancel_w_job_reference(self): + from google.cloud.bigquery import job + + resource = self._make_resource(ended=True) + resource["jobReference"]["projectId"] = "alternative-project" + resource["jobReference"]["location"] = "US" + job_ref = job._JobReference(self.JOB_ID, "alternative-project", "US") + conn = _make_connection({"job": resource}) + client = _make_client(project=self.PROJECT, connection=conn) + load_job = self._make_one(job_ref, [self.SOURCE1], self.TABLE_REF, client) + with mock.patch( + "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" + ) as final_attributes: + load_job.cancel() + + final_attributes.assert_called_with( + { + "path": "/projects/alternative-project/jobs/{}/cancel".format( + self.JOB_ID + ) + }, + client, + load_job, + ) + conn.api_request.assert_called_once_with( + method="POST", + path="/projects/alternative-project/jobs/{}/cancel".format(self.JOB_ID), + query_params={"location": "US"}, + timeout=None, + ) diff --git a/packages/google-cloud-bigquery/tests/unit/job/test_load_config.py b/packages/google-cloud-bigquery/tests/unit/job/test_load_config.py new file mode 100644 index 000000000000..c18f51bff5c8 --- /dev/null +++ b/packages/google-cloud-bigquery/tests/unit/job/test_load_config.py @@ -0,0 +1,710 @@ +# Copyright 2015 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import warnings + +import pytest + +from .helpers import _Base + + +class TestLoadJobConfig(_Base): + JOB_TYPE = "load" + + @staticmethod + def _get_target_class(): + from google.cloud.bigquery.job import LoadJobConfig + + return LoadJobConfig + + def test_ctor_w_properties(self): + config = self._get_target_class()( + allow_jagged_rows=True, allow_quoted_newlines=True + ) + + self.assertTrue(config.allow_jagged_rows) + self.assertTrue(config.allow_quoted_newlines) + + def test_allow_jagged_rows_missing(self): + config = self._get_target_class()() + self.assertIsNone(config.allow_jagged_rows) + + def test_allow_jagged_rows_hit(self): + config = self._get_target_class()() + config._properties["load"]["allowJaggedRows"] = True + self.assertTrue(config.allow_jagged_rows) + + def test_allow_jagged_rows_setter(self): + config = self._get_target_class()() + config.allow_jagged_rows = True + self.assertTrue(config._properties["load"]["allowJaggedRows"]) + + def test_allow_quoted_newlines_missing(self): + config = self._get_target_class()() + self.assertIsNone(config.allow_quoted_newlines) + + def test_allow_quoted_newlines_hit(self): + config = self._get_target_class()() + config._properties["load"]["allowQuotedNewlines"] = True + self.assertTrue(config.allow_quoted_newlines) + + def test_allow_quoted_newlines_setter(self): + config = self._get_target_class()() + config.allow_quoted_newlines = True + self.assertTrue(config._properties["load"]["allowQuotedNewlines"]) + + def test_autodetect_missing(self): + config = self._get_target_class()() + self.assertIsNone(config.autodetect) + + def test_autodetect_hit(self): + config = self._get_target_class()() + config._properties["load"]["autodetect"] = True + self.assertTrue(config.autodetect) + + def test_autodetect_setter(self): + config = self._get_target_class()() + config.autodetect = True + self.assertTrue(config._properties["load"]["autodetect"]) + + def test_clustering_fields_miss(self): + config = self._get_target_class()() + self.assertIsNone(config.clustering_fields) + + def test_clustering_fields_hit(self): + config = self._get_target_class()() + fields = ["email", "postal_code"] + config._properties["load"]["clustering"] = {"fields": fields} + self.assertEqual(config.clustering_fields, fields) + + def test_clustering_fields_setter(self): + fields = ["email", "postal_code"] + config = self._get_target_class()() + config.clustering_fields = fields + self.assertEqual(config._properties["load"]["clustering"], {"fields": fields}) + + def test_clustering_fields_setter_w_none(self): + config = self._get_target_class()() + fields = ["email", "postal_code"] + config._properties["load"]["clustering"] = {"fields": fields} + config.clustering_fields = None + self.assertIsNone(config.clustering_fields) + self.assertNotIn("clustering", config._properties["load"]) + + def test_create_disposition_missing(self): + config = self._get_target_class()() + self.assertIsNone(config.create_disposition) + + def test_create_disposition_hit(self): + from google.cloud.bigquery.job import CreateDisposition + + disposition = CreateDisposition.CREATE_IF_NEEDED + config = self._get_target_class()() + config._properties["load"]["createDisposition"] = disposition + self.assertEqual(config.create_disposition, disposition) + + def test_create_disposition_setter(self): + from google.cloud.bigquery.job import CreateDisposition + + disposition = CreateDisposition.CREATE_IF_NEEDED + config = self._get_target_class()() + config.create_disposition = disposition + self.assertEqual(config._properties["load"]["createDisposition"], disposition) + + def test_destination_encryption_configuration_missing(self): + config = self._get_target_class()() + self.assertIsNone(config.destination_encryption_configuration) + + def test_destination_encryption_configuration_hit(self): + from google.cloud.bigquery.encryption_configuration import ( + EncryptionConfiguration, + ) + + kms_key_name = "kms-key-name" + encryption_configuration = EncryptionConfiguration(kms_key_name) + config = self._get_target_class()() + config._properties["load"]["destinationEncryptionConfiguration"] = { + "kmsKeyName": kms_key_name + } + self.assertEqual( + config.destination_encryption_configuration, encryption_configuration + ) + + def test_destination_encryption_configuration_setter(self): + from google.cloud.bigquery.encryption_configuration import ( + EncryptionConfiguration, + ) + + kms_key_name = "kms-key-name" + encryption_configuration = EncryptionConfiguration(kms_key_name) + config = self._get_target_class()() + config.destination_encryption_configuration = encryption_configuration + expected = {"kmsKeyName": kms_key_name} + self.assertEqual( + config._properties["load"]["destinationEncryptionConfiguration"], expected + ) + + def test_destination_encryption_configuration_setter_w_none(self): + kms_key_name = "kms-key-name" + config = self._get_target_class()() + config._properties["load"]["destinationEncryptionConfiguration"] = { + "kmsKeyName": kms_key_name + } + config.destination_encryption_configuration = None + self.assertIsNone(config.destination_encryption_configuration) + self.assertNotIn( + "destinationEncryptionConfiguration", config._properties["load"] + ) + + def test_destination_table_description_missing(self): + config = self._get_target_class()() + self.assertIsNone(config.destination_table_description) + + def test_destination_table_description_hit(self): + description = "Description" + config = self._get_target_class()() + config._properties["load"]["destinationTableProperties"] = { + "description": description + } + self.assertEqual(config.destination_table_description, description) + + def test_destination_table_description_setter(self): + description = "Description" + config = self._get_target_class()() + config.destination_table_description = description + expected = {"description": description} + self.assertEqual( + config._properties["load"]["destinationTableProperties"], expected + ) + + def test_destination_table_description_setter_w_fn_already(self): + description = "Description" + friendly_name = "Friendly Name" + config = self._get_target_class()() + config._properties["load"]["destinationTableProperties"] = { + "friendlyName": friendly_name + } + config.destination_table_description = description + expected = {"friendlyName": friendly_name, "description": description} + self.assertEqual( + config._properties["load"]["destinationTableProperties"], expected + ) + + def test_destination_table_description_w_none(self): + description = "Description" + friendly_name = "Friendly Name" + config = self._get_target_class()() + config._properties["load"]["destinationTableProperties"] = { + "description": description, + "friendlyName": friendly_name, + } + config.destination_table_description = None + expected = {"friendlyName": friendly_name} + self.assertEqual( + config._properties["load"]["destinationTableProperties"], expected + ) + + def test_destination_table_friendly_name_missing(self): + config = self._get_target_class()() + self.assertIsNone(config.destination_table_friendly_name) + + def test_destination_table_friendly_name_hit(self): + friendly_name = "Friendly Name" + config = self._get_target_class()() + config._properties["load"]["destinationTableProperties"] = { + "friendlyName": friendly_name + } + self.assertEqual(config.destination_table_friendly_name, friendly_name) + + def test_destination_table_friendly_name_setter(self): + friendly_name = "Friendly Name" + config = self._get_target_class()() + config.destination_table_friendly_name = friendly_name + expected = {"friendlyName": friendly_name} + self.assertEqual( + config._properties["load"]["destinationTableProperties"], expected + ) + + def test_destination_table_friendly_name_setter_w_descr_already(self): + friendly_name = "Friendly Name" + description = "Description" + config = self._get_target_class()() + config._properties["load"]["destinationTableProperties"] = { + "description": description + } + config.destination_table_friendly_name = friendly_name + expected = {"friendlyName": friendly_name, "description": description} + self.assertEqual( + config._properties["load"]["destinationTableProperties"], expected + ) + + def test_destination_table_friendly_name_w_none(self): + friendly_name = "Friendly Name" + description = "Description" + config = self._get_target_class()() + config._properties["load"]["destinationTableProperties"] = { + "description": description, + "friendlyName": friendly_name, + } + config.destination_table_friendly_name = None + expected = {"description": description} + self.assertEqual( + config._properties["load"]["destinationTableProperties"], expected + ) + + def test_encoding_missing(self): + config = self._get_target_class()() + self.assertIsNone(config.encoding) + + def test_encoding_hit(self): + from google.cloud.bigquery.job import Encoding + + encoding = Encoding.UTF_8 + config = self._get_target_class()() + config._properties["load"]["encoding"] = encoding + self.assertEqual(config.encoding, encoding) + + def test_encoding_setter(self): + from google.cloud.bigquery.job import Encoding + + encoding = Encoding.UTF_8 + config = self._get_target_class()() + config.encoding = encoding + self.assertEqual(config._properties["load"]["encoding"], encoding) + + def test_field_delimiter_missing(self): + config = self._get_target_class()() + self.assertIsNone(config.field_delimiter) + + def test_field_delimiter_hit(self): + field_delimiter = "|" + config = self._get_target_class()() + config._properties["load"]["fieldDelimiter"] = field_delimiter + self.assertEqual(config.field_delimiter, field_delimiter) + + def test_field_delimiter_setter(self): + field_delimiter = "|" + config = self._get_target_class()() + config.field_delimiter = field_delimiter + self.assertEqual(config._properties["load"]["fieldDelimiter"], field_delimiter) + + def test_hive_partitioning_missing(self): + config = self._get_target_class()() + self.assertIsNone(config.hive_partitioning) + + def test_hive_partitioning_hit(self): + from google.cloud.bigquery.external_config import HivePartitioningOptions + + config = self._get_target_class()() + config._properties["load"]["hivePartitioningOptions"] = { + "sourceUriPrefix": "http://foo/bar", + "mode": "STRINGS", + } + result = config.hive_partitioning + self.assertIsInstance(result, HivePartitioningOptions) + self.assertEqual(result.source_uri_prefix, "http://foo/bar") + self.assertEqual(result.mode, "STRINGS") + + def test_hive_partitioning_setter(self): + from google.cloud.bigquery.external_config import HivePartitioningOptions + + hive_partitioning = HivePartitioningOptions() + hive_partitioning.source_uri_prefix = "http://foo/bar" + hive_partitioning.mode = "AUTO" + + config = self._get_target_class()() + config.hive_partitioning = hive_partitioning + self.assertEqual( + config._properties["load"]["hivePartitioningOptions"], + {"sourceUriPrefix": "http://foo/bar", "mode": "AUTO"}, + ) + + config.hive_partitioning = None + self.assertIsNone(config._properties["load"]["hivePartitioningOptions"]) + + def test_hive_partitioning_invalid_type(self): + config = self._get_target_class()() + + with self.assertRaises(TypeError): + config.hive_partitioning = {"mode": "AUTO"} + + def test_ignore_unknown_values_missing(self): + config = self._get_target_class()() + self.assertIsNone(config.ignore_unknown_values) + + def test_ignore_unknown_values_hit(self): + config = self._get_target_class()() + config._properties["load"]["ignoreUnknownValues"] = True + self.assertTrue(config.ignore_unknown_values) + + def test_ignore_unknown_values_setter(self): + config = self._get_target_class()() + config.ignore_unknown_values = True + self.assertTrue(config._properties["load"]["ignoreUnknownValues"]) + + def test_max_bad_records_missing(self): + config = self._get_target_class()() + self.assertIsNone(config.max_bad_records) + + def test_max_bad_records_hit(self): + max_bad_records = 13 + config = self._get_target_class()() + config._properties["load"]["maxBadRecords"] = max_bad_records + self.assertEqual(config.max_bad_records, max_bad_records) + + def test_max_bad_records_setter(self): + max_bad_records = 13 + config = self._get_target_class()() + config.max_bad_records = max_bad_records + self.assertEqual(config._properties["load"]["maxBadRecords"], max_bad_records) + + def test_null_marker_missing(self): + config = self._get_target_class()() + self.assertIsNone(config.null_marker) + + def test_null_marker_hit(self): + null_marker = "XXX" + config = self._get_target_class()() + config._properties["load"]["nullMarker"] = null_marker + self.assertEqual(config.null_marker, null_marker) + + def test_null_marker_setter(self): + null_marker = "XXX" + config = self._get_target_class()() + config.null_marker = null_marker + self.assertEqual(config._properties["load"]["nullMarker"], null_marker) + + def test_quote_character_missing(self): + config = self._get_target_class()() + self.assertIsNone(config.quote_character) + + def test_quote_character_hit(self): + quote_character = "'" + config = self._get_target_class()() + config._properties["load"]["quote"] = quote_character + self.assertEqual(config.quote_character, quote_character) + + def test_quote_character_setter(self): + quote_character = "'" + config = self._get_target_class()() + config.quote_character = quote_character + self.assertEqual(config._properties["load"]["quote"], quote_character) + + def test_schema_missing(self): + config = self._get_target_class()() + self.assertIsNone(config.schema) + + def test_schema_hit(self): + from google.cloud.bigquery.schema import SchemaField + + config = self._get_target_class()() + all_props_repr = { + "mode": "REQUIRED", + "name": "foo", + "type": "INTEGER", + "description": "Foo", + } + minimal_repr = {"name": "bar", "type": "STRING"} + config._properties["load"]["schema"] = { + "fields": [all_props_repr, minimal_repr] + } + all_props, minimal = config.schema + self.assertEqual(all_props, SchemaField.from_api_repr(all_props_repr)) + self.assertEqual(minimal, SchemaField.from_api_repr(minimal_repr)) + + def test_schema_setter_fields(self): + from google.cloud.bigquery.schema import SchemaField + + config = self._get_target_class()() + full_name = SchemaField("full_name", "STRING", mode="REQUIRED") + age = SchemaField("age", "INTEGER", mode="REQUIRED") + config.schema = [full_name, age] + full_name_repr = { + "name": "full_name", + "type": "STRING", + "mode": "REQUIRED", + "description": None, + } + age_repr = { + "name": "age", + "type": "INTEGER", + "mode": "REQUIRED", + "description": None, + } + self.assertEqual( + config._properties["load"]["schema"], {"fields": [full_name_repr, age_repr]} + ) + + def test_schema_setter_valid_mappings_list(self): + config = self._get_target_class()() + + schema = [ + {"name": "full_name", "type": "STRING", "mode": "REQUIRED"}, + {"name": "age", "type": "INTEGER", "mode": "REQUIRED"}, + ] + config.schema = schema + + full_name_repr = { + "name": "full_name", + "type": "STRING", + "mode": "REQUIRED", + "description": None, + } + age_repr = { + "name": "age", + "type": "INTEGER", + "mode": "REQUIRED", + "description": None, + } + self.assertEqual( + config._properties["load"]["schema"], {"fields": [full_name_repr, age_repr]} + ) + + def test_schema_setter_invalid_mappings_list(self): + config = self._get_target_class()() + + schema = [ + {"name": "full_name", "type": "STRING", "mode": "REQUIRED"}, + {"name": "age", "typeoo": "INTEGER", "mode": "REQUIRED"}, + ] + + with self.assertRaises(Exception): + config.schema = schema + + def test_schema_setter_unsetting_schema(self): + from google.cloud.bigquery.schema import SchemaField + + config = self._get_target_class()() + config._properties["load"]["schema"] = [ + SchemaField("full_name", "STRING", mode="REQUIRED"), + SchemaField("age", "INTEGER", mode="REQUIRED"), + ] + + config.schema = None + self.assertNotIn("schema", config._properties["load"]) + config.schema = None # no error, idempotent operation + + def test_schema_update_options_missing(self): + config = self._get_target_class()() + self.assertIsNone(config.schema_update_options) + + def test_schema_update_options_hit(self): + from google.cloud.bigquery.job import SchemaUpdateOption + + options = [ + SchemaUpdateOption.ALLOW_FIELD_ADDITION, + SchemaUpdateOption.ALLOW_FIELD_RELAXATION, + ] + config = self._get_target_class()() + config._properties["load"]["schemaUpdateOptions"] = options + self.assertEqual(config.schema_update_options, options) + + def test_schema_update_options_setter(self): + from google.cloud.bigquery.job import SchemaUpdateOption + + options = [ + SchemaUpdateOption.ALLOW_FIELD_ADDITION, + SchemaUpdateOption.ALLOW_FIELD_RELAXATION, + ] + config = self._get_target_class()() + config.schema_update_options = options + self.assertEqual(config._properties["load"]["schemaUpdateOptions"], options) + + def test_skip_leading_rows_missing(self): + config = self._get_target_class()() + self.assertIsNone(config.skip_leading_rows) + + def test_skip_leading_rows_hit_w_str(self): + skip_leading_rows = 1 + config = self._get_target_class()() + config._properties["load"]["skipLeadingRows"] = str(skip_leading_rows) + self.assertEqual(config.skip_leading_rows, skip_leading_rows) + + def test_skip_leading_rows_hit_w_integer(self): + skip_leading_rows = 1 + config = self._get_target_class()() + config._properties["load"]["skipLeadingRows"] = skip_leading_rows + self.assertEqual(config.skip_leading_rows, skip_leading_rows) + + def test_skip_leading_rows_setter(self): + skip_leading_rows = 1 + config = self._get_target_class()() + config.skip_leading_rows = skip_leading_rows + self.assertEqual( + config._properties["load"]["skipLeadingRows"], str(skip_leading_rows) + ) + + def test_source_format_missing(self): + config = self._get_target_class()() + self.assertIsNone(config.source_format) + + def test_source_format_hit(self): + from google.cloud.bigquery.job import SourceFormat + + source_format = SourceFormat.CSV + config = self._get_target_class()() + config._properties["load"]["sourceFormat"] = source_format + self.assertEqual(config.source_format, source_format) + + def test_source_format_setter(self): + from google.cloud.bigquery.job import SourceFormat + + source_format = SourceFormat.CSV + config = self._get_target_class()() + config.source_format = source_format + self.assertEqual(config._properties["load"]["sourceFormat"], source_format) + + def test_range_partitioning_w_none(self): + object_under_test = self._get_target_class()() + assert object_under_test.range_partitioning is None + + def test_range_partitioning_w_value(self): + object_under_test = self._get_target_class()() + object_under_test._properties["load"]["rangePartitioning"] = { + "field": "column_one", + "range": {"start": 1, "end": 1000, "interval": 10}, + } + object_under_test.range_partitioning.field == "column_one" + object_under_test.range_partitioning.range_.start == 1 + object_under_test.range_partitioning.range_.end == 1000 + object_under_test.range_partitioning.range_.interval == 10 + + def test_range_partitioning_setter(self): + from google.cloud.bigquery.table import PartitionRange + from google.cloud.bigquery.table import RangePartitioning + + object_under_test = self._get_target_class()() + object_under_test.range_partitioning = RangePartitioning( + field="column_one", range_=PartitionRange(start=1, end=1000, interval=10) + ) + object_under_test.range_partitioning.field == "column_one" + object_under_test.range_partitioning.range_.start == 1 + object_under_test.range_partitioning.range_.end == 1000 + object_under_test.range_partitioning.range_.interval == 10 + + def test_range_partitioning_setter_w_none(self): + object_under_test = self._get_target_class()() + object_under_test.range_partitioning = None + assert object_under_test.range_partitioning is None + + def test_range_partitioning_setter_w_wrong_type(self): + object_under_test = self._get_target_class()() + with pytest.raises(ValueError, match="RangePartitioning"): + object_under_test.range_partitioning = object() + + def test_time_partitioning_miss(self): + config = self._get_target_class()() + self.assertIsNone(config.time_partitioning) + + def test_time_partitioning_hit(self): + from google.cloud.bigquery.table import TimePartitioning + from google.cloud.bigquery.table import TimePartitioningType + + field = "creation_date" + year_ms = 86400 * 1000 * 365 + config = self._get_target_class()() + config._properties["load"]["timePartitioning"] = { + "type": TimePartitioningType.DAY, + "field": field, + "expirationMs": str(year_ms), + "requirePartitionFilter": False, + } + with warnings.catch_warnings(record=True) as warned: + expected = TimePartitioning( + type_=TimePartitioningType.DAY, + field=field, + expiration_ms=year_ms, + require_partition_filter=False, + ) + self.assertEqual(config.time_partitioning, expected) + + assert len(warned) == 1 + warning = warned[0] + assert "TimePartitioning.require_partition_filter" in str(warning) + + def test_time_partitioning_setter(self): + from google.cloud.bigquery.table import TimePartitioning + from google.cloud.bigquery.table import TimePartitioningType + + field = "creation_date" + year_ms = 86400 * 1000 * 365 + + with warnings.catch_warnings(record=True) as warned: + time_partitioning = TimePartitioning( + type_=TimePartitioningType.DAY, + field=field, + expiration_ms=year_ms, + require_partition_filter=False, + ) + + config = self._get_target_class()() + config.time_partitioning = time_partitioning + expected = { + "type": TimePartitioningType.DAY, + "field": field, + "expirationMs": str(year_ms), + "requirePartitionFilter": False, + } + self.assertEqual(config._properties["load"]["timePartitioning"], expected) + + assert len(warned) == 1 + warning = warned[0] + assert "TimePartitioning.require_partition_filter" in str(warning) + + def test_time_partitioning_setter_w_none(self): + from google.cloud.bigquery.table import TimePartitioningType + + field = "creation_date" + year_ms = 86400 * 1000 * 365 + config = self._get_target_class()() + config._properties["load"]["timePartitioning"] = { + "type": TimePartitioningType.DAY, + "field": field, + "expirationMs": str(year_ms), + "requirePartitionFilter": False, + } + config.time_partitioning = None + self.assertIsNone(config.time_partitioning) + self.assertNotIn("timePartitioning", config._properties["load"]) + + def test_use_avro_logical_types(self): + config = self._get_target_class()() + self.assertIsNone(config.use_avro_logical_types) + + def test_use_avro_logical_types_setter(self): + config = self._get_target_class()() + config.use_avro_logical_types = True + self.assertTrue(config._properties["load"]["useAvroLogicalTypes"]) + + def test_write_disposition_missing(self): + config = self._get_target_class()() + self.assertIsNone(config.write_disposition) + + def test_write_disposition_hit(self): + from google.cloud.bigquery.job import WriteDisposition + + write_disposition = WriteDisposition.WRITE_TRUNCATE + config = self._get_target_class()() + config._properties["load"]["writeDisposition"] = write_disposition + self.assertEqual(config.write_disposition, write_disposition) + + def test_write_disposition_setter(self): + from google.cloud.bigquery.job import WriteDisposition + + write_disposition = WriteDisposition.WRITE_TRUNCATE + config = self._get_target_class()() + config.write_disposition = write_disposition + self.assertEqual( + config._properties["load"]["writeDisposition"], write_disposition + ) diff --git a/packages/google-cloud-bigquery/tests/unit/job/test_query.py b/packages/google-cloud-bigquery/tests/unit/job/test_query.py new file mode 100644 index 000000000000..c0b90d8ea699 --- /dev/null +++ b/packages/google-cloud-bigquery/tests/unit/job/test_query.py @@ -0,0 +1,1811 @@ +# Copyright 2015 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import concurrent +import copy +import textwrap + +import freezegun +from google.api_core import exceptions +import google.api_core.retry +import mock +import requests +from six.moves import http_client + +import google.cloud.bigquery.query +from .helpers import _Base +from .helpers import _make_client +from .helpers import _make_connection + + +class TestQueryJob(_Base): + JOB_TYPE = "query" + QUERY = "select count(*) from persons" + DESTINATION_TABLE = "destination_table" + + @staticmethod + def _get_target_class(): + from google.cloud.bigquery.job import QueryJob + + return QueryJob + + def _make_resource(self, started=False, ended=False): + resource = super(TestQueryJob, self)._make_resource(started, ended) + config = resource["configuration"]["query"] + config["query"] = self.QUERY + return resource + + def _verifyBooleanResourceProperties(self, job, config): + + if "allowLargeResults" in config: + self.assertEqual(job.allow_large_results, config["allowLargeResults"]) + else: + self.assertIsNone(job.allow_large_results) + if "flattenResults" in config: + self.assertEqual(job.flatten_results, config["flattenResults"]) + else: + self.assertIsNone(job.flatten_results) + if "useQueryCache" in config: + self.assertEqual(job.use_query_cache, config["useQueryCache"]) + else: + self.assertIsNone(job.use_query_cache) + if "useLegacySql" in config: + self.assertEqual(job.use_legacy_sql, config["useLegacySql"]) + else: + self.assertIsNone(job.use_legacy_sql) + + def _verifyIntegerResourceProperties(self, job, config): + if "maximumBillingTier" in config: + self.assertEqual(job.maximum_billing_tier, config["maximumBillingTier"]) + else: + self.assertIsNone(job.maximum_billing_tier) + if "maximumBytesBilled" in config: + self.assertEqual( + str(job.maximum_bytes_billed), config["maximumBytesBilled"] + ) + self.assertIsInstance(job.maximum_bytes_billed, int) + else: + self.assertIsNone(job.maximum_bytes_billed) + + def _verify_udf_resources(self, job, config): + udf_resources = config.get("userDefinedFunctionResources", ()) + self.assertEqual(len(job.udf_resources), len(udf_resources)) + for found, expected in zip(job.udf_resources, udf_resources): + if "resourceUri" in expected: + self.assertEqual(found.udf_type, "resourceUri") + self.assertEqual(found.value, expected["resourceUri"]) + else: + self.assertEqual(found.udf_type, "inlineCode") + self.assertEqual(found.value, expected["inlineCode"]) + + def _verifyQueryParameters(self, job, config): + query_parameters = config.get("queryParameters", ()) + self.assertEqual(len(job.query_parameters), len(query_parameters)) + for found, expected in zip(job.query_parameters, query_parameters): + self.assertEqual(found.to_api_repr(), expected) + + def _verify_table_definitions(self, job, config): + table_defs = config.get("tableDefinitions") + if job.table_definitions is None: + self.assertIsNone(table_defs) + else: + self.assertEqual(len(job.table_definitions), len(table_defs)) + for found_key, found_ec in job.table_definitions.items(): + expected_ec = table_defs.get(found_key) + self.assertIsNotNone(expected_ec) + self.assertEqual(found_ec.to_api_repr(), expected_ec) + + def _verify_configuration_properties(self, job, configuration): + if "dryRun" in configuration: + self.assertEqual(job.dry_run, configuration["dryRun"]) + else: + self.assertIsNone(job.dry_run) + + def _verifyResourceProperties(self, job, resource): + self._verifyReadonlyResourceProperties(job, resource) + + configuration = resource.get("configuration", {}) + self._verify_configuration_properties(job, configuration) + + query_config = resource.get("configuration", {}).get("query") + self._verifyBooleanResourceProperties(job, query_config) + self._verifyIntegerResourceProperties(job, query_config) + self._verify_udf_resources(job, query_config) + self._verifyQueryParameters(job, query_config) + self._verify_table_definitions(job, query_config) + + self.assertEqual(job.query, query_config["query"]) + if "createDisposition" in query_config: + self.assertEqual(job.create_disposition, query_config["createDisposition"]) + else: + self.assertIsNone(job.create_disposition) + if "defaultDataset" in query_config: + ds_ref = job.default_dataset + ds_ref = {"projectId": ds_ref.project, "datasetId": ds_ref.dataset_id} + self.assertEqual(ds_ref, query_config["defaultDataset"]) + else: + self.assertIsNone(job.default_dataset) + if "destinationTable" in query_config: + table = job.destination + tb_ref = { + "projectId": table.project, + "datasetId": table.dataset_id, + "tableId": table.table_id, + } + self.assertEqual(tb_ref, query_config["destinationTable"]) + else: + self.assertIsNone(job.destination) + if "priority" in query_config: + self.assertEqual(job.priority, query_config["priority"]) + else: + self.assertIsNone(job.priority) + if "writeDisposition" in query_config: + self.assertEqual(job.write_disposition, query_config["writeDisposition"]) + else: + self.assertIsNone(job.write_disposition) + if "destinationEncryptionConfiguration" in query_config: + self.assertIsNotNone(job.destination_encryption_configuration) + self.assertEqual( + job.destination_encryption_configuration.kms_key_name, + query_config["destinationEncryptionConfiguration"]["kmsKeyName"], + ) + else: + self.assertIsNone(job.destination_encryption_configuration) + if "schemaUpdateOptions" in query_config: + self.assertEqual( + job.schema_update_options, query_config["schemaUpdateOptions"] + ) + else: + self.assertIsNone(job.schema_update_options) + + def test_ctor_defaults(self): + client = _make_client(project=self.PROJECT) + job = self._make_one(self.JOB_ID, self.QUERY, client) + self.assertEqual(job.query, self.QUERY) + self.assertIs(job._client, client) + self.assertEqual(job.job_type, self.JOB_TYPE) + self.assertEqual(job.path, "/projects/%s/jobs/%s" % (self.PROJECT, self.JOB_ID)) + + self._verifyInitialReadonlyProperties(job) + + self.assertFalse(job.use_legacy_sql) + + # set/read from resource['configuration']['query'] + self.assertIsNone(job.allow_large_results) + self.assertIsNone(job.create_disposition) + self.assertIsNone(job.default_dataset) + self.assertIsNone(job.destination) + self.assertIsNone(job.flatten_results) + self.assertIsNone(job.priority) + self.assertIsNone(job.use_query_cache) + self.assertIsNone(job.dry_run) + self.assertIsNone(job.write_disposition) + self.assertIsNone(job.maximum_billing_tier) + self.assertIsNone(job.maximum_bytes_billed) + self.assertIsNone(job.table_definitions) + self.assertIsNone(job.destination_encryption_configuration) + self.assertIsNone(job.range_partitioning) + self.assertIsNone(job.time_partitioning) + self.assertIsNone(job.clustering_fields) + self.assertIsNone(job.schema_update_options) + + def test_ctor_w_udf_resources(self): + from google.cloud.bigquery.job import QueryJobConfig + from google.cloud.bigquery.query import UDFResource + + RESOURCE_URI = "gs://some-bucket/js/lib.js" + udf_resources = [UDFResource("resourceUri", RESOURCE_URI)] + client = _make_client(project=self.PROJECT) + config = QueryJobConfig() + config.udf_resources = udf_resources + job = self._make_one(self.JOB_ID, self.QUERY, client, job_config=config) + self.assertEqual(job.udf_resources, udf_resources) + + def test_ctor_w_query_parameters(self): + from google.cloud.bigquery.job import QueryJobConfig + from google.cloud.bigquery.query import ScalarQueryParameter + + query_parameters = [ScalarQueryParameter("foo", "INT64", 123)] + client = _make_client(project=self.PROJECT) + config = QueryJobConfig(query_parameters=query_parameters) + job = self._make_one(self.JOB_ID, self.QUERY, client, job_config=config) + self.assertEqual(job.query_parameters, query_parameters) + + def test_from_api_repr_missing_identity(self): + self._setUpConstants() + client = _make_client(project=self.PROJECT) + RESOURCE = {} + klass = self._get_target_class() + with self.assertRaises(KeyError): + klass.from_api_repr(RESOURCE, client=client) + + def test_from_api_repr_missing_config(self): + self._setUpConstants() + client = _make_client(project=self.PROJECT) + RESOURCE = { + "id": "%s:%s" % (self.PROJECT, self.DS_ID), + "jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID}, + } + klass = self._get_target_class() + with self.assertRaises(KeyError): + klass.from_api_repr(RESOURCE, client=client) + + def test_from_api_repr_bare(self): + self._setUpConstants() + client = _make_client(project=self.PROJECT) + RESOURCE = { + "id": self.JOB_ID, + "jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID}, + "configuration": {"query": {"query": self.QUERY}}, + } + klass = self._get_target_class() + job = klass.from_api_repr(RESOURCE, client=client) + self.assertIs(job._client, client) + self._verifyResourceProperties(job, RESOURCE) + + def test_from_api_repr_with_encryption(self): + self._setUpConstants() + client = _make_client(project=self.PROJECT) + RESOURCE = { + "id": self.JOB_ID, + "jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID}, + "configuration": { + "query": { + "query": self.QUERY, + "destinationEncryptionConfiguration": { + "kmsKeyName": self.KMS_KEY_NAME + }, + } + }, + } + klass = self._get_target_class() + job = klass.from_api_repr(RESOURCE, client=client) + self.assertIs(job._client, client) + self._verifyResourceProperties(job, RESOURCE) + + def test_from_api_repr_w_properties(self): + from google.cloud.bigquery.job import CreateDisposition + from google.cloud.bigquery.job import SchemaUpdateOption + from google.cloud.bigquery.job import WriteDisposition + + client = _make_client(project=self.PROJECT) + RESOURCE = self._make_resource() + query_config = RESOURCE["configuration"]["query"] + query_config["createDisposition"] = CreateDisposition.CREATE_IF_NEEDED + query_config["writeDisposition"] = WriteDisposition.WRITE_TRUNCATE + query_config["destinationTable"] = { + "projectId": self.PROJECT, + "datasetId": self.DS_ID, + "tableId": self.DESTINATION_TABLE, + } + query_config["schemaUpdateOptions"] = [SchemaUpdateOption.ALLOW_FIELD_ADDITION] + klass = self._get_target_class() + job = klass.from_api_repr(RESOURCE, client=client) + self.assertIs(job._client, client) + self._verifyResourceProperties(job, RESOURCE) + + def test_cancelled(self): + client = _make_client(project=self.PROJECT) + job = self._make_one(self.JOB_ID, self.QUERY, client) + job._properties["status"] = { + "state": "DONE", + "errorResult": {"reason": "stopped"}, + } + + self.assertTrue(job.cancelled()) + + def test_done(self): + client = _make_client(project=self.PROJECT) + resource = self._make_resource(ended=True) + job = self._get_target_class().from_api_repr(resource, client) + job._query_results = google.cloud.bigquery.query._QueryResults.from_api_repr( + {"jobComplete": True, "jobReference": resource["jobReference"]} + ) + self.assertTrue(job.done()) + + def test_done_w_timeout(self): + client = _make_client(project=self.PROJECT) + resource = self._make_resource(ended=False) + job = self._get_target_class().from_api_repr(resource, client) + + with mock.patch.object( + client, "_get_query_results" + ) as fake_get_results, mock.patch.object(job, "reload") as fake_reload: + job.done(timeout=42) + + fake_get_results.assert_called_once() + call_args = fake_get_results.call_args + self.assertEqual(call_args.kwargs.get("timeout"), 42) + + call_args = fake_reload.call_args + self.assertEqual(call_args.kwargs.get("timeout"), 42) + + def test_done_w_timeout_and_longer_internal_api_timeout(self): + client = _make_client(project=self.PROJECT) + resource = self._make_resource(ended=False) + job = self._get_target_class().from_api_repr(resource, client) + job._done_timeout = 8.8 + + with mock.patch.object( + client, "_get_query_results" + ) as fake_get_results, mock.patch.object(job, "reload") as fake_reload: + job.done(timeout=5.5) + + # The expected timeout used is simply the given timeout, as the latter + # is shorter than the job's internal done timeout. + expected_timeout = 5.5 + + fake_get_results.assert_called_once() + call_args = fake_get_results.call_args + self.assertAlmostEqual(call_args.kwargs.get("timeout"), expected_timeout) + + call_args = fake_reload.call_args + self.assertAlmostEqual(call_args.kwargs.get("timeout"), expected_timeout) + + def test_query_plan(self): + from google.cloud._helpers import _RFC3339_MICROS + from google.cloud.bigquery.job import QueryPlanEntry + from google.cloud.bigquery.job import QueryPlanEntryStep + + plan_entries = [ + { + "name": "NAME", + "id": "1234", + "inputStages": ["88", "101"], + "startMs": "1522540800000", + "endMs": "1522540804000", + "parallelInputs": "1000", + "completedParallelInputs": "5", + "waitMsAvg": "33", + "waitMsMax": "400", + "waitRatioAvg": 2.71828, + "waitRatioMax": 3.14159, + "readMsAvg": "45", + "readMsMax": "90", + "readRatioAvg": 1.41421, + "readRatioMax": 1.73205, + "computeMsAvg": "55", + "computeMsMax": "99", + "computeRatioAvg": 0.69315, + "computeRatioMax": 1.09861, + "writeMsAvg": "203", + "writeMsMax": "340", + "writeRatioAvg": 3.32193, + "writeRatioMax": 2.30258, + "recordsRead": "100", + "recordsWritten": "1", + "status": "STATUS", + "shuffleOutputBytes": "1024", + "shuffleOutputBytesSpilled": "1", + "steps": [{"kind": "KIND", "substeps": ["SUBSTEP1", "SUBSTEP2"]}], + } + ] + client = _make_client(project=self.PROJECT) + job = self._make_one(self.JOB_ID, self.QUERY, client) + self.assertEqual(job.query_plan, []) + + statistics = job._properties["statistics"] = {} + self.assertEqual(job.query_plan, []) + + query_stats = statistics["query"] = {} + self.assertEqual(job.query_plan, []) + + query_stats["queryPlan"] = plan_entries + + self.assertEqual(len(job.query_plan), len(plan_entries)) + for found, expected in zip(job.query_plan, plan_entries): + self.assertIsInstance(found, QueryPlanEntry) + self.assertEqual(found.name, expected["name"]) + self.assertEqual(found.entry_id, expected["id"]) + self.assertEqual(len(found.input_stages), len(expected["inputStages"])) + for f_id in found.input_stages: + self.assertIn(f_id, [int(e) for e in expected["inputStages"]]) + self.assertEqual( + found.start.strftime(_RFC3339_MICROS), "2018-04-01T00:00:00.000000Z" + ) + self.assertEqual( + found.end.strftime(_RFC3339_MICROS), "2018-04-01T00:00:04.000000Z" + ) + self.assertEqual(found.parallel_inputs, int(expected["parallelInputs"])) + self.assertEqual( + found.completed_parallel_inputs, + int(expected["completedParallelInputs"]), + ) + self.assertEqual(found.wait_ms_avg, int(expected["waitMsAvg"])) + self.assertEqual(found.wait_ms_max, int(expected["waitMsMax"])) + self.assertEqual(found.wait_ratio_avg, expected["waitRatioAvg"]) + self.assertEqual(found.wait_ratio_max, expected["waitRatioMax"]) + self.assertEqual(found.read_ms_avg, int(expected["readMsAvg"])) + self.assertEqual(found.read_ms_max, int(expected["readMsMax"])) + self.assertEqual(found.read_ratio_avg, expected["readRatioAvg"]) + self.assertEqual(found.read_ratio_max, expected["readRatioMax"]) + self.assertEqual(found.compute_ms_avg, int(expected["computeMsAvg"])) + self.assertEqual(found.compute_ms_max, int(expected["computeMsMax"])) + self.assertEqual(found.compute_ratio_avg, expected["computeRatioAvg"]) + self.assertEqual(found.compute_ratio_max, expected["computeRatioMax"]) + self.assertEqual(found.write_ms_avg, int(expected["writeMsAvg"])) + self.assertEqual(found.write_ms_max, int(expected["writeMsMax"])) + self.assertEqual(found.write_ratio_avg, expected["writeRatioAvg"]) + self.assertEqual(found.write_ratio_max, expected["writeRatioMax"]) + self.assertEqual(found.records_read, int(expected["recordsRead"])) + self.assertEqual(found.records_written, int(expected["recordsWritten"])) + self.assertEqual(found.status, expected["status"]) + self.assertEqual( + found.shuffle_output_bytes, int(expected["shuffleOutputBytes"]) + ) + self.assertEqual( + found.shuffle_output_bytes_spilled, + int(expected["shuffleOutputBytesSpilled"]), + ) + + self.assertEqual(len(found.steps), len(expected["steps"])) + for f_step, e_step in zip(found.steps, expected["steps"]): + self.assertIsInstance(f_step, QueryPlanEntryStep) + self.assertEqual(f_step.kind, e_step["kind"]) + self.assertEqual(f_step.substeps, e_step["substeps"]) + + def test_total_bytes_processed(self): + total_bytes = 1234 + client = _make_client(project=self.PROJECT) + job = self._make_one(self.JOB_ID, self.QUERY, client) + self.assertIsNone(job.total_bytes_processed) + + statistics = job._properties["statistics"] = {} + self.assertIsNone(job.total_bytes_processed) + + query_stats = statistics["query"] = {} + self.assertIsNone(job.total_bytes_processed) + + query_stats["totalBytesProcessed"] = str(total_bytes) + self.assertEqual(job.total_bytes_processed, total_bytes) + + def test_total_bytes_billed(self): + total_bytes = 1234 + client = _make_client(project=self.PROJECT) + job = self._make_one(self.JOB_ID, self.QUERY, client) + self.assertIsNone(job.total_bytes_billed) + + statistics = job._properties["statistics"] = {} + self.assertIsNone(job.total_bytes_billed) + + query_stats = statistics["query"] = {} + self.assertIsNone(job.total_bytes_billed) + + query_stats["totalBytesBilled"] = str(total_bytes) + self.assertEqual(job.total_bytes_billed, total_bytes) + + def test_billing_tier(self): + billing_tier = 1 + client = _make_client(project=self.PROJECT) + job = self._make_one(self.JOB_ID, self.QUERY, client) + self.assertIsNone(job.billing_tier) + + statistics = job._properties["statistics"] = {} + self.assertIsNone(job.billing_tier) + + query_stats = statistics["query"] = {} + self.assertIsNone(job.billing_tier) + + query_stats["billingTier"] = billing_tier + self.assertEqual(job.billing_tier, billing_tier) + + def test_cache_hit(self): + client = _make_client(project=self.PROJECT) + job = self._make_one(self.JOB_ID, self.QUERY, client) + self.assertIsNone(job.cache_hit) + + statistics = job._properties["statistics"] = {} + self.assertIsNone(job.cache_hit) + + query_stats = statistics["query"] = {} + self.assertIsNone(job.cache_hit) + + query_stats["cacheHit"] = True + self.assertTrue(job.cache_hit) + + def test_ddl_operation_performed(self): + op = "SKIP" + client = _make_client(project=self.PROJECT) + job = self._make_one(self.JOB_ID, self.QUERY, client) + self.assertIsNone(job.ddl_operation_performed) + + statistics = job._properties["statistics"] = {} + self.assertIsNone(job.ddl_operation_performed) + + query_stats = statistics["query"] = {} + self.assertIsNone(job.ddl_operation_performed) + + query_stats["ddlOperationPerformed"] = op + self.assertEqual(job.ddl_operation_performed, op) + + def test_ddl_target_routine(self): + from google.cloud.bigquery.routine import RoutineReference + + ref_routine = { + "projectId": self.PROJECT, + "datasetId": "ddl_ds", + "routineId": "targetroutine", + } + client = _make_client(project=self.PROJECT) + job = self._make_one(self.JOB_ID, self.QUERY, client) + self.assertIsNone(job.ddl_target_routine) + + statistics = job._properties["statistics"] = {} + self.assertIsNone(job.ddl_target_routine) + + query_stats = statistics["query"] = {} + self.assertIsNone(job.ddl_target_routine) + + query_stats["ddlTargetRoutine"] = ref_routine + self.assertIsInstance(job.ddl_target_routine, RoutineReference) + self.assertEqual(job.ddl_target_routine.routine_id, "targetroutine") + self.assertEqual(job.ddl_target_routine.dataset_id, "ddl_ds") + self.assertEqual(job.ddl_target_routine.project, self.PROJECT) + + def test_ddl_target_table(self): + from google.cloud.bigquery.table import TableReference + + ref_table = { + "projectId": self.PROJECT, + "datasetId": "ddl_ds", + "tableId": "targettable", + } + client = _make_client(project=self.PROJECT) + job = self._make_one(self.JOB_ID, self.QUERY, client) + self.assertIsNone(job.ddl_target_table) + + statistics = job._properties["statistics"] = {} + self.assertIsNone(job.ddl_target_table) + + query_stats = statistics["query"] = {} + self.assertIsNone(job.ddl_target_table) + + query_stats["ddlTargetTable"] = ref_table + self.assertIsInstance(job.ddl_target_table, TableReference) + self.assertEqual(job.ddl_target_table.table_id, "targettable") + self.assertEqual(job.ddl_target_table.dataset_id, "ddl_ds") + self.assertEqual(job.ddl_target_table.project, self.PROJECT) + + def test_num_dml_affected_rows(self): + num_rows = 1234 + client = _make_client(project=self.PROJECT) + job = self._make_one(self.JOB_ID, self.QUERY, client) + self.assertIsNone(job.num_dml_affected_rows) + + statistics = job._properties["statistics"] = {} + self.assertIsNone(job.num_dml_affected_rows) + + query_stats = statistics["query"] = {} + self.assertIsNone(job.num_dml_affected_rows) + + query_stats["numDmlAffectedRows"] = str(num_rows) + self.assertEqual(job.num_dml_affected_rows, num_rows) + + def test_slot_millis(self): + millis = 1234 + client = _make_client(project=self.PROJECT) + job = self._make_one(self.JOB_ID, self.QUERY, client) + self.assertIsNone(job.slot_millis) + + statistics = job._properties["statistics"] = {} + self.assertIsNone(job.slot_millis) + + query_stats = statistics["query"] = {} + self.assertIsNone(job.slot_millis) + + query_stats["totalSlotMs"] = millis + self.assertEqual(job.slot_millis, millis) + + def test_statement_type(self): + statement_type = "SELECT" + client = _make_client(project=self.PROJECT) + job = self._make_one(self.JOB_ID, self.QUERY, client) + self.assertIsNone(job.statement_type) + + statistics = job._properties["statistics"] = {} + self.assertIsNone(job.statement_type) + + query_stats = statistics["query"] = {} + self.assertIsNone(job.statement_type) + + query_stats["statementType"] = statement_type + self.assertEqual(job.statement_type, statement_type) + + def test_referenced_tables(self): + from google.cloud.bigquery.table import TableReference + + ref_tables_resource = [ + {"projectId": self.PROJECT, "datasetId": "dataset", "tableId": "local1"}, + {"projectId": self.PROJECT, "datasetId": "dataset", "tableId": "local2"}, + { + "projectId": "other-project-123", + "datasetId": "other-dataset", + "tableId": "other-table", + }, + ] + client = _make_client(project=self.PROJECT) + job = self._make_one(self.JOB_ID, self.QUERY, client) + self.assertEqual(job.referenced_tables, []) + + statistics = job._properties["statistics"] = {} + self.assertEqual(job.referenced_tables, []) + + query_stats = statistics["query"] = {} + self.assertEqual(job.referenced_tables, []) + + query_stats["referencedTables"] = ref_tables_resource + + local1, local2, remote = job.referenced_tables + + self.assertIsInstance(local1, TableReference) + self.assertEqual(local1.table_id, "local1") + self.assertEqual(local1.dataset_id, "dataset") + self.assertEqual(local1.project, self.PROJECT) + + self.assertIsInstance(local2, TableReference) + self.assertEqual(local2.table_id, "local2") + self.assertEqual(local2.dataset_id, "dataset") + self.assertEqual(local2.project, self.PROJECT) + + self.assertIsInstance(remote, TableReference) + self.assertEqual(remote.table_id, "other-table") + self.assertEqual(remote.dataset_id, "other-dataset") + self.assertEqual(remote.project, "other-project-123") + + def test_timeline(self): + timeline_resource = [ + { + "elapsedMs": 1, + "activeUnits": 22, + "pendingUnits": 33, + "completedUnits": 44, + "totalSlotMs": 101, + } + ] + + client = _make_client(project=self.PROJECT) + job = self._make_one(self.JOB_ID, self.QUERY, client) + self.assertEqual(job.timeline, []) + + statistics = job._properties["statistics"] = {} + self.assertEqual(job.timeline, []) + + query_stats = statistics["query"] = {} + self.assertEqual(job.timeline, []) + + query_stats["timeline"] = timeline_resource + + self.assertEqual(len(job.timeline), len(timeline_resource)) + self.assertEqual(job.timeline[0].elapsed_ms, 1) + self.assertEqual(job.timeline[0].active_units, 22) + self.assertEqual(job.timeline[0].pending_units, 33) + self.assertEqual(job.timeline[0].completed_units, 44) + self.assertEqual(job.timeline[0].slot_millis, 101) + + def test_undeclared_query_parameters(self): + from google.cloud.bigquery.query import ArrayQueryParameter + from google.cloud.bigquery.query import ScalarQueryParameter + from google.cloud.bigquery.query import StructQueryParameter + + undeclared = [ + { + "name": "my_scalar", + "parameterType": {"type": "STRING"}, + "parameterValue": {"value": "value"}, + }, + { + "name": "my_array", + "parameterType": {"type": "ARRAY", "arrayType": {"type": "INT64"}}, + "parameterValue": { + "arrayValues": [{"value": "1066"}, {"value": "1745"}] + }, + }, + { + "name": "my_struct", + "parameterType": { + "type": "STRUCT", + "structTypes": [{"name": "count", "type": {"type": "INT64"}}], + }, + "parameterValue": {"structValues": {"count": {"value": "123"}}}, + }, + ] + client = _make_client(project=self.PROJECT) + job = self._make_one(self.JOB_ID, self.QUERY, client) + self.assertEqual(job.undeclared_query_parameters, []) + + statistics = job._properties["statistics"] = {} + self.assertEqual(job.undeclared_query_parameters, []) + + query_stats = statistics["query"] = {} + self.assertEqual(job.undeclared_query_parameters, []) + + query_stats["undeclaredQueryParameters"] = undeclared + + scalar, array, struct = job.undeclared_query_parameters + + self.assertIsInstance(scalar, ScalarQueryParameter) + self.assertEqual(scalar.name, "my_scalar") + self.assertEqual(scalar.type_, "STRING") + self.assertEqual(scalar.value, "value") + + self.assertIsInstance(array, ArrayQueryParameter) + self.assertEqual(array.name, "my_array") + self.assertEqual(array.array_type, "INT64") + self.assertEqual(array.values, [1066, 1745]) + + self.assertIsInstance(struct, StructQueryParameter) + self.assertEqual(struct.name, "my_struct") + self.assertEqual(struct.struct_types, {"count": "INT64"}) + self.assertEqual(struct.struct_values, {"count": 123}) + + def test_estimated_bytes_processed(self): + est_bytes = 123456 + + client = _make_client(project=self.PROJECT) + job = self._make_one(self.JOB_ID, self.QUERY, client) + self.assertIsNone(job.estimated_bytes_processed) + + statistics = job._properties["statistics"] = {} + self.assertIsNone(job.estimated_bytes_processed) + + query_stats = statistics["query"] = {} + self.assertIsNone(job.estimated_bytes_processed) + + query_stats["estimatedBytesProcessed"] = str(est_bytes) + self.assertEqual(job.estimated_bytes_processed, est_bytes) + + def test_result(self): + from google.cloud.bigquery.table import RowIterator + + query_resource = { + "jobComplete": False, + "jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID}, + } + query_resource_done = { + "jobComplete": True, + "jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID}, + "schema": {"fields": [{"name": "col1", "type": "STRING"}]}, + "totalRows": "2", + } + job_resource = self._make_resource(started=True) + job_resource_done = self._make_resource(started=True, ended=True) + job_resource_done["configuration"]["query"]["destinationTable"] = { + "projectId": "dest-project", + "datasetId": "dest_dataset", + "tableId": "dest_table", + } + tabledata_resource = { + # Explicitly set totalRows to be different from the initial + # response to test update during iteration. + "totalRows": "1", + "pageToken": None, + "rows": [{"f": [{"v": "abc"}]}], + } + conn = _make_connection( + query_resource, query_resource_done, job_resource_done, tabledata_resource + ) + client = _make_client(self.PROJECT, connection=conn) + job = self._get_target_class().from_api_repr(job_resource, client) + + result = job.result() + + self.assertIsInstance(result, RowIterator) + self.assertEqual(result.total_rows, 2) + rows = list(result) + self.assertEqual(len(rows), 1) + self.assertEqual(rows[0].col1, "abc") + # Test that the total_rows property has changed during iteration, based + # on the response from tabledata.list. + self.assertEqual(result.total_rows, 1) + + query_results_call = mock.call( + method="GET", + path=f"/projects/{self.PROJECT}/queries/{self.JOB_ID}", + query_params={"maxResults": 0}, + timeout=None, + ) + reload_call = mock.call( + method="GET", + path=f"/projects/{self.PROJECT}/jobs/{self.JOB_ID}", + query_params={}, + timeout=None, + ) + tabledata_call = mock.call( + method="GET", + path="/projects/dest-project/datasets/dest_dataset/tables/dest_table/data", + query_params={}, + timeout=None, + ) + conn.api_request.assert_has_calls( + [query_results_call, query_results_call, reload_call, tabledata_call] + ) + + def test_result_with_done_job_calls_get_query_results(self): + query_resource_done = { + "jobComplete": True, + "jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID}, + "schema": {"fields": [{"name": "col1", "type": "STRING"}]}, + "totalRows": "1", + } + job_resource = self._make_resource(started=True, ended=True) + job_resource["configuration"]["query"]["destinationTable"] = { + "projectId": "dest-project", + "datasetId": "dest_dataset", + "tableId": "dest_table", + } + tabledata_resource = { + "totalRows": "1", + "pageToken": None, + "rows": [{"f": [{"v": "abc"}]}], + } + conn = _make_connection(query_resource_done, tabledata_resource) + client = _make_client(self.PROJECT, connection=conn) + job = self._get_target_class().from_api_repr(job_resource, client) + + result = job.result() + + rows = list(result) + self.assertEqual(len(rows), 1) + self.assertEqual(rows[0].col1, "abc") + + query_results_call = mock.call( + method="GET", + path=f"/projects/{self.PROJECT}/queries/{self.JOB_ID}", + query_params={"maxResults": 0}, + timeout=None, + ) + tabledata_call = mock.call( + method="GET", + path="/projects/dest-project/datasets/dest_dataset/tables/dest_table/data", + query_params={}, + timeout=None, + ) + conn.api_request.assert_has_calls([query_results_call, tabledata_call]) + + def test_result_with_max_results(self): + from google.cloud.bigquery.table import RowIterator + + query_resource = { + "jobComplete": True, + "jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID}, + "schema": {"fields": [{"name": "col1", "type": "STRING"}]}, + "totalRows": "5", + } + tabledata_resource = { + "totalRows": "5", + "pageToken": None, + "rows": [ + {"f": [{"v": "abc"}]}, + {"f": [{"v": "def"}]}, + {"f": [{"v": "ghi"}]}, + ], + } + connection = _make_connection(query_resource, tabledata_resource) + client = _make_client(self.PROJECT, connection=connection) + resource = self._make_resource(ended=True) + job = self._get_target_class().from_api_repr(resource, client) + + max_results = 3 + + result = job.result(max_results=max_results) + + self.assertIsInstance(result, RowIterator) + self.assertEqual(result.total_rows, 5) + + rows = list(result) + + self.assertEqual(len(rows), 3) + self.assertEqual(len(connection.api_request.call_args_list), 2) + tabledata_list_request = connection.api_request.call_args_list[1] + self.assertEqual( + tabledata_list_request[1]["query_params"]["maxResults"], max_results + ) + + def test_result_w_retry(self): + from google.cloud.bigquery.table import RowIterator + + query_resource = { + "jobComplete": False, + "jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID}, + } + query_resource_done = { + "jobComplete": True, + "jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID}, + "schema": {"fields": [{"name": "col1", "type": "STRING"}]}, + "totalRows": "2", + } + job_resource = self._make_resource(started=True) + job_resource_done = self._make_resource(started=True, ended=True) + job_resource_done["configuration"]["query"]["destinationTable"] = { + "projectId": "dest-project", + "datasetId": "dest_dataset", + "tableId": "dest_table", + } + + connection = _make_connection( + exceptions.NotFound("not normally retriable"), + query_resource, + exceptions.NotFound("not normally retriable"), + query_resource_done, + exceptions.NotFound("not normally retriable"), + job_resource_done, + ) + client = _make_client(self.PROJECT, connection=connection) + job = self._get_target_class().from_api_repr(job_resource, client) + + custom_predicate = mock.Mock() + custom_predicate.return_value = True + custom_retry = google.api_core.retry.Retry( + initial=0.001, + maximum=0.001, + multiplier=1.0, + deadline=0.001, + predicate=custom_predicate, + ) + + self.assertIsInstance(job.result(retry=custom_retry), RowIterator) + query_results_call = mock.call( + method="GET", + path=f"/projects/{self.PROJECT}/queries/{self.JOB_ID}", + query_params={"maxResults": 0}, + timeout=None, + ) + reload_call = mock.call( + method="GET", + path=f"/projects/{self.PROJECT}/jobs/{self.JOB_ID}", + query_params={}, + timeout=None, + ) + + connection.api_request.assert_has_calls( + [query_results_call, query_results_call, reload_call] + ) + + def test_result_w_empty_schema(self): + from google.cloud.bigquery.table import _EmptyRowIterator + + # Destination table may have no schema for some DDL and DML queries. + query_resource = { + "jobComplete": True, + "jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID}, + "schema": {"fields": []}, + } + connection = _make_connection(query_resource, query_resource) + client = _make_client(self.PROJECT, connection=connection) + resource = self._make_resource(ended=True) + job = self._get_target_class().from_api_repr(resource, client) + + result = job.result() + + self.assertIsInstance(result, _EmptyRowIterator) + self.assertEqual(list(result), []) + + def test_result_invokes_begins(self): + begun_resource = self._make_resource() + incomplete_resource = { + "jobComplete": False, + "jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID}, + "schema": {"fields": [{"name": "col1", "type": "STRING"}]}, + } + query_resource = copy.deepcopy(incomplete_resource) + query_resource["jobComplete"] = True + done_resource = copy.deepcopy(begun_resource) + done_resource["status"] = {"state": "DONE"} + connection = _make_connection( + begun_resource, + incomplete_resource, + query_resource, + done_resource, + query_resource, + ) + client = _make_client(project=self.PROJECT, connection=connection) + job = self._make_one(self.JOB_ID, self.QUERY, client) + + job.result() + + self.assertEqual(len(connection.api_request.call_args_list), 4) + begin_request = connection.api_request.call_args_list[0] + query_request = connection.api_request.call_args_list[2] + reload_request = connection.api_request.call_args_list[3] + self.assertEqual(begin_request[1]["method"], "POST") + self.assertEqual(query_request[1]["method"], "GET") + self.assertEqual(reload_request[1]["method"], "GET") + + def test_result_w_timeout(self): + begun_resource = self._make_resource() + query_resource = { + "jobComplete": True, + "jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID}, + "schema": {"fields": [{"name": "col1", "type": "STRING"}]}, + } + done_resource = copy.deepcopy(begun_resource) + done_resource["status"] = {"state": "DONE"} + connection = _make_connection(begun_resource, query_resource, done_resource) + client = _make_client(project=self.PROJECT, connection=connection) + job = self._make_one(self.JOB_ID, self.QUERY, client) + + with freezegun.freeze_time("1970-01-01 00:00:00", tick=False): + job.result(timeout=1.0) + + self.assertEqual(len(connection.api_request.call_args_list), 3) + begin_request = connection.api_request.call_args_list[0] + query_request = connection.api_request.call_args_list[1] + reload_request = connection.api_request.call_args_list[2] + self.assertEqual(begin_request[1]["method"], "POST") + self.assertEqual(query_request[1]["method"], "GET") + self.assertEqual( + query_request[1]["path"], + "/projects/{}/queries/{}".format(self.PROJECT, self.JOB_ID), + ) + self.assertEqual(query_request[1]["query_params"]["timeoutMs"], 900) + self.assertEqual(reload_request[1]["method"], "GET") + + def test_result_w_page_size(self): + # Arrange + query_results_resource = { + "jobComplete": True, + "jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID}, + "schema": {"fields": [{"name": "col1", "type": "STRING"}]}, + "totalRows": "4", + } + job_resource = self._make_resource(started=True, ended=True) + q_config = job_resource["configuration"]["query"] + q_config["destinationTable"] = { + "projectId": self.PROJECT, + "datasetId": self.DS_ID, + "tableId": self.TABLE_ID, + } + tabledata_resource = { + "totalRows": 4, + "pageToken": "some-page-token", + "rows": [ + {"f": [{"v": "row1"}]}, + {"f": [{"v": "row2"}]}, + {"f": [{"v": "row3"}]}, + ], + } + tabledata_resource_page_2 = {"totalRows": 4, "rows": [{"f": [{"v": "row4"}]}]} + conn = _make_connection( + query_results_resource, tabledata_resource, tabledata_resource_page_2 + ) + client = _make_client(self.PROJECT, connection=conn) + job = self._get_target_class().from_api_repr(job_resource, client) + + # Act + result = job.result(page_size=3) + + # Assert + actual_rows = list(result) + self.assertEqual(len(actual_rows), 4) + + tabledata_path = "/projects/%s/datasets/%s/tables/%s/data" % ( + self.PROJECT, + self.DS_ID, + self.TABLE_ID, + ) + conn.api_request.assert_has_calls( + [ + mock.call( + method="GET", + path=tabledata_path, + query_params={"maxResults": 3}, + timeout=None, + ), + mock.call( + method="GET", + path=tabledata_path, + query_params={"pageToken": "some-page-token", "maxResults": 3}, + timeout=None, + ), + ] + ) + + def test_result_with_start_index(self): + from google.cloud.bigquery.table import RowIterator + + query_resource = { + "jobComplete": True, + "jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID}, + "schema": {"fields": [{"name": "col1", "type": "STRING"}]}, + "totalRows": "5", + } + tabledata_resource = { + "totalRows": "5", + "pageToken": None, + "rows": [ + {"f": [{"v": "abc"}]}, + {"f": [{"v": "def"}]}, + {"f": [{"v": "ghi"}]}, + {"f": [{"v": "jkl"}]}, + ], + } + connection = _make_connection(query_resource, tabledata_resource) + client = _make_client(self.PROJECT, connection=connection) + resource = self._make_resource(ended=True) + job = self._get_target_class().from_api_repr(resource, client) + + start_index = 1 + + result = job.result(start_index=start_index) + + self.assertIsInstance(result, RowIterator) + self.assertEqual(result.total_rows, 5) + + rows = list(result) + + self.assertEqual(len(rows), 4) + self.assertEqual(len(connection.api_request.call_args_list), 2) + tabledata_list_request = connection.api_request.call_args_list[1] + self.assertEqual( + tabledata_list_request[1]["query_params"]["startIndex"], start_index + ) + + def test_result_error(self): + from google.cloud import exceptions + + query = textwrap.dedent( + """ + SELECT foo, bar + FROM table_baz + WHERE foo == bar""" + ) + + client = _make_client(project=self.PROJECT) + job = self._make_one(self.JOB_ID, query, client) + error_result = { + "debugInfo": "DEBUG", + "location": "LOCATION", + "message": "MESSAGE", + "reason": "invalid", + } + job._properties["status"] = { + "errorResult": error_result, + "errors": [error_result], + "state": "DONE", + } + job._query_results = google.cloud.bigquery.query._QueryResults.from_api_repr( + {"jobComplete": True, "jobReference": job._properties["jobReference"]} + ) + job._set_future_result() + + with self.assertRaises(exceptions.GoogleCloudError) as exc_info: + job.result() + + self.assertIsInstance(exc_info.exception, exceptions.GoogleCloudError) + self.assertEqual(exc_info.exception.code, http_client.BAD_REQUEST) + + exc_job_instance = getattr(exc_info.exception, "query_job", None) + self.assertIs(exc_job_instance, job) + + full_text = str(exc_info.exception) + assert job.job_id in full_text + assert "Query Job SQL Follows" in full_text + + for i, line in enumerate(query.splitlines(), start=1): + expected_line = "{}:{}".format(i, line) + assert expected_line in full_text + + def test_result_transport_timeout_error(self): + query = textwrap.dedent( + """ + SELECT foo, bar + FROM table_baz + WHERE foo == bar""" + ) + + client = _make_client(project=self.PROJECT) + job = self._make_one(self.JOB_ID, query, client) + call_api_patch = mock.patch( + "google.cloud.bigquery.client.Client._call_api", + autospec=True, + side_effect=requests.exceptions.Timeout("Server response took too long."), + ) + + # Make sure that timeout errors get rebranded to concurrent futures timeout. + with call_api_patch, self.assertRaises(concurrent.futures.TimeoutError): + job.result(timeout=1) + + def test__begin_error(self): + from google.cloud import exceptions + + query = textwrap.dedent( + """ + SELECT foo, bar + FROM table_baz + WHERE foo == bar""" + ) + + client = _make_client(project=self.PROJECT) + job = self._make_one(self.JOB_ID, query, client) + call_api_patch = mock.patch( + "google.cloud.bigquery.client.Client._call_api", + autospec=True, + side_effect=exceptions.BadRequest("Syntax error in SQL query"), + ) + + with call_api_patch, self.assertRaises(exceptions.GoogleCloudError) as exc_info: + job.result() + + self.assertIsInstance(exc_info.exception, exceptions.GoogleCloudError) + self.assertEqual(exc_info.exception.code, http_client.BAD_REQUEST) + + exc_job_instance = getattr(exc_info.exception, "query_job", None) + self.assertIs(exc_job_instance, job) + + full_text = str(exc_info.exception) + assert job.job_id in full_text + assert "Query Job SQL Follows" in full_text + + for i, line in enumerate(query.splitlines(), start=1): + expected_line = "{}:{}".format(i, line) + assert expected_line in full_text + + def test__begin_w_timeout(self): + PATH = "/projects/%s/jobs" % (self.PROJECT,) + RESOURCE = self._make_resource() + + conn = _make_connection(RESOURCE) + client = _make_client(project=self.PROJECT, connection=conn) + job = self._make_one(self.JOB_ID, self.QUERY, client) + with mock.patch( + "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" + ) as final_attributes: + job._begin(timeout=7.5) + + final_attributes.assert_called_with({"path": PATH}, client, job) + + conn.api_request.assert_called_once_with( + method="POST", + path=PATH, + data={ + "jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID}, + "configuration": { + "query": {"query": self.QUERY, "useLegacySql": False} + }, + }, + timeout=7.5, + ) + + def test_begin_w_bound_client(self): + from google.cloud.bigquery.dataset import DatasetReference + from google.cloud.bigquery.job import QueryJobConfig + + PATH = "/projects/%s/jobs" % (self.PROJECT,) + DS_ID = "DATASET" + RESOURCE = self._make_resource() + # Ensure None for missing server-set props + del RESOURCE["statistics"]["creationTime"] + del RESOURCE["etag"] + del RESOURCE["selfLink"] + del RESOURCE["user_email"] + conn = _make_connection(RESOURCE) + client = _make_client(project=self.PROJECT, connection=conn) + + config = QueryJobConfig() + config.default_dataset = DatasetReference(self.PROJECT, DS_ID) + job = self._make_one(self.JOB_ID, self.QUERY, client, job_config=config) + with mock.patch( + "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" + ) as final_attributes: + job._begin() + + final_attributes.assert_called_with({"path": PATH}, client, job) + + self.assertIsNone(job.default_dataset) + self.assertEqual(job.udf_resources, []) + conn.api_request.assert_called_once_with( + method="POST", + path=PATH, + data={ + "jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID}, + "configuration": { + "query": { + "query": self.QUERY, + "useLegacySql": False, + "defaultDataset": { + "projectId": self.PROJECT, + "datasetId": DS_ID, + }, + } + }, + }, + timeout=None, + ) + self._verifyResourceProperties(job, RESOURCE) + + def test_begin_w_alternate_client(self): + from google.cloud.bigquery.dataset import DatasetReference + from google.cloud.bigquery.job import CreateDisposition + from google.cloud.bigquery.job import QueryJobConfig + from google.cloud.bigquery.job import QueryPriority + from google.cloud.bigquery.job import SchemaUpdateOption + from google.cloud.bigquery.job import WriteDisposition + + PATH = "/projects/%s/jobs" % (self.PROJECT,) + TABLE = "TABLE" + DS_ID = "DATASET" + RESOURCE = self._make_resource(ended=True) + QUERY_CONFIGURATION = { + "query": self.QUERY, + "allowLargeResults": True, + "createDisposition": CreateDisposition.CREATE_NEVER, + "defaultDataset": {"projectId": self.PROJECT, "datasetId": DS_ID}, + "destinationTable": { + "projectId": self.PROJECT, + "datasetId": DS_ID, + "tableId": TABLE, + }, + "flattenResults": True, + "priority": QueryPriority.INTERACTIVE, + "useQueryCache": True, + "useLegacySql": True, + "writeDisposition": WriteDisposition.WRITE_TRUNCATE, + "maximumBillingTier": 4, + "maximumBytesBilled": "123456", + "schemaUpdateOptions": [SchemaUpdateOption.ALLOW_FIELD_RELAXATION], + } + RESOURCE["configuration"]["query"] = QUERY_CONFIGURATION + RESOURCE["configuration"]["dryRun"] = True + conn1 = _make_connection() + client1 = _make_client(project=self.PROJECT, connection=conn1) + conn2 = _make_connection(RESOURCE) + client2 = _make_client(project=self.PROJECT, connection=conn2) + dataset_ref = DatasetReference(self.PROJECT, DS_ID) + table_ref = dataset_ref.table(TABLE) + + config = QueryJobConfig() + config.allow_large_results = True + config.create_disposition = CreateDisposition.CREATE_NEVER + config.default_dataset = dataset_ref + config.destination = table_ref + config.dry_run = True + config.flatten_results = True + config.maximum_billing_tier = 4 + config.priority = QueryPriority.INTERACTIVE + config.use_legacy_sql = True + config.use_query_cache = True + config.write_disposition = WriteDisposition.WRITE_TRUNCATE + config.maximum_bytes_billed = 123456 + config.schema_update_options = [SchemaUpdateOption.ALLOW_FIELD_RELAXATION] + job = self._make_one(self.JOB_ID, self.QUERY, client1, job_config=config) + with mock.patch( + "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" + ) as final_attributes: + job._begin(client=client2) + + final_attributes.assert_called_with({"path": PATH}, client2, job) + + conn1.api_request.assert_not_called() + conn2.api_request.assert_called_once_with( + method="POST", + path=PATH, + data={ + "jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID}, + "configuration": {"dryRun": True, "query": QUERY_CONFIGURATION}, + }, + timeout=None, + ) + self._verifyResourceProperties(job, RESOURCE) + + def test_begin_w_udf(self): + from google.cloud.bigquery.job import QueryJobConfig + from google.cloud.bigquery.query import UDFResource + + RESOURCE_URI = "gs://some-bucket/js/lib.js" + INLINE_UDF_CODE = 'var someCode = "here";' + PATH = "/projects/%s/jobs" % (self.PROJECT,) + RESOURCE = self._make_resource() + # Ensure None for missing server-set props + del RESOURCE["statistics"]["creationTime"] + del RESOURCE["etag"] + del RESOURCE["selfLink"] + del RESOURCE["user_email"] + RESOURCE["configuration"]["query"]["userDefinedFunctionResources"] = [ + {"resourceUri": RESOURCE_URI}, + {"inlineCode": INLINE_UDF_CODE}, + ] + conn = _make_connection(RESOURCE) + client = _make_client(project=self.PROJECT, connection=conn) + udf_resources = [ + UDFResource("resourceUri", RESOURCE_URI), + UDFResource("inlineCode", INLINE_UDF_CODE), + ] + config = QueryJobConfig() + config.udf_resources = udf_resources + config.use_legacy_sql = True + job = self._make_one(self.JOB_ID, self.QUERY, client, job_config=config) + with mock.patch( + "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" + ) as final_attributes: + job._begin() + + final_attributes.assert_called_with({"path": PATH}, client, job) + + self.assertEqual(job.udf_resources, udf_resources) + conn.api_request.assert_called_once_with( + method="POST", + path=PATH, + data={ + "jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID}, + "configuration": { + "query": { + "query": self.QUERY, + "useLegacySql": True, + "userDefinedFunctionResources": [ + {"resourceUri": RESOURCE_URI}, + {"inlineCode": INLINE_UDF_CODE}, + ], + } + }, + }, + timeout=None, + ) + self._verifyResourceProperties(job, RESOURCE) + + def test_begin_w_named_query_parameter(self): + from google.cloud.bigquery.job import QueryJobConfig + from google.cloud.bigquery.query import ScalarQueryParameter + + query_parameters = [ScalarQueryParameter("foo", "INT64", 123)] + PATH = "/projects/%s/jobs" % (self.PROJECT,) + RESOURCE = self._make_resource() + # Ensure None for missing server-set props + del RESOURCE["statistics"]["creationTime"] + del RESOURCE["etag"] + del RESOURCE["selfLink"] + del RESOURCE["user_email"] + config = RESOURCE["configuration"]["query"] + config["parameterMode"] = "NAMED" + config["queryParameters"] = [ + { + "name": "foo", + "parameterType": {"type": "INT64"}, + "parameterValue": {"value": "123"}, + } + ] + conn = _make_connection(RESOURCE) + client = _make_client(project=self.PROJECT, connection=conn) + jconfig = QueryJobConfig() + jconfig.query_parameters = query_parameters + job = self._make_one(self.JOB_ID, self.QUERY, client, job_config=jconfig) + with mock.patch( + "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" + ) as final_attributes: + job._begin() + + final_attributes.assert_called_with({"path": PATH}, client, job) + + self.assertEqual(job.query_parameters, query_parameters) + conn.api_request.assert_called_once_with( + method="POST", + path=PATH, + data={ + "jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID}, + "configuration": { + "query": { + "query": self.QUERY, + "useLegacySql": False, + "parameterMode": "NAMED", + "queryParameters": config["queryParameters"], + } + }, + }, + timeout=None, + ) + self._verifyResourceProperties(job, RESOURCE) + + def test_begin_w_positional_query_parameter(self): + from google.cloud.bigquery.job import QueryJobConfig + from google.cloud.bigquery.query import ScalarQueryParameter + + query_parameters = [ScalarQueryParameter.positional("INT64", 123)] + PATH = "/projects/%s/jobs" % (self.PROJECT,) + RESOURCE = self._make_resource() + # Ensure None for missing server-set props + del RESOURCE["statistics"]["creationTime"] + del RESOURCE["etag"] + del RESOURCE["selfLink"] + del RESOURCE["user_email"] + config = RESOURCE["configuration"]["query"] + config["parameterMode"] = "POSITIONAL" + config["queryParameters"] = [ + {"parameterType": {"type": "INT64"}, "parameterValue": {"value": "123"}} + ] + conn = _make_connection(RESOURCE) + client = _make_client(project=self.PROJECT, connection=conn) + jconfig = QueryJobConfig() + jconfig.query_parameters = query_parameters + job = self._make_one(self.JOB_ID, self.QUERY, client, job_config=jconfig) + with mock.patch( + "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" + ) as final_attributes: + job._begin() + + final_attributes.assert_called_with({"path": PATH}, client, job) + + self.assertEqual(job.query_parameters, query_parameters) + conn.api_request.assert_called_once_with( + method="POST", + path=PATH, + data={ + "jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID}, + "configuration": { + "query": { + "query": self.QUERY, + "useLegacySql": False, + "parameterMode": "POSITIONAL", + "queryParameters": config["queryParameters"], + } + }, + }, + timeout=None, + ) + self._verifyResourceProperties(job, RESOURCE) + + def test_begin_w_table_defs(self): + from google.cloud.bigquery.job import QueryJobConfig + from google.cloud.bigquery.external_config import ExternalConfig + from google.cloud.bigquery.external_config import BigtableColumn + from google.cloud.bigquery.external_config import BigtableColumnFamily + + PATH = "/projects/%s/jobs" % (self.PROJECT,) + RESOURCE = self._make_resource() + # Ensure None for missing server-set props + del RESOURCE["statistics"]["creationTime"] + del RESOURCE["etag"] + del RESOURCE["selfLink"] + del RESOURCE["user_email"] + + bt_config = ExternalConfig("BIGTABLE") + bt_config.ignore_unknown_values = True + bt_config.options.read_rowkey_as_string = True + cf = BigtableColumnFamily() + cf.family_id = "cf" + col = BigtableColumn() + col.field_name = "fn" + cf.columns = [col] + bt_config.options.column_families = [cf] + BT_CONFIG_RESOURCE = { + "sourceFormat": "BIGTABLE", + "ignoreUnknownValues": True, + "bigtableOptions": { + "readRowkeyAsString": True, + "columnFamilies": [ + {"familyId": "cf", "columns": [{"fieldName": "fn"}]} + ], + }, + } + CSV_CONFIG_RESOURCE = { + "sourceFormat": "CSV", + "maxBadRecords": 8, + "csvOptions": {"allowJaggedRows": True}, + } + csv_config = ExternalConfig("CSV") + csv_config.max_bad_records = 8 + csv_config.options.allow_jagged_rows = True + bt_table = "bigtable-table" + csv_table = "csv-table" + RESOURCE["configuration"]["query"]["tableDefinitions"] = { + bt_table: BT_CONFIG_RESOURCE, + csv_table: CSV_CONFIG_RESOURCE, + } + want_resource = copy.deepcopy(RESOURCE) + conn = _make_connection(RESOURCE) + client = _make_client(project=self.PROJECT, connection=conn) + config = QueryJobConfig() + config.table_definitions = {bt_table: bt_config, csv_table: csv_config} + config.use_legacy_sql = True + job = self._make_one(self.JOB_ID, self.QUERY, client, job_config=config) + with mock.patch( + "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" + ) as final_attributes: + job._begin() + + final_attributes.assert_called_with({"path": PATH}, client, job) + + conn.api_request.assert_called_once_with( + method="POST", + path=PATH, + data={ + "jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID}, + "configuration": { + "query": { + "query": self.QUERY, + "useLegacySql": True, + "tableDefinitions": { + bt_table: BT_CONFIG_RESOURCE, + csv_table: CSV_CONFIG_RESOURCE, + }, + } + }, + }, + timeout=None, + ) + self._verifyResourceProperties(job, want_resource) + + def test_dry_run_query(self): + from google.cloud.bigquery.job import QueryJobConfig + + PATH = "/projects/%s/jobs" % (self.PROJECT,) + RESOURCE = self._make_resource() + # Ensure None for missing server-set props + del RESOURCE["statistics"]["creationTime"] + del RESOURCE["etag"] + del RESOURCE["selfLink"] + del RESOURCE["user_email"] + RESOURCE["configuration"]["dryRun"] = True + conn = _make_connection(RESOURCE) + client = _make_client(project=self.PROJECT, connection=conn) + config = QueryJobConfig() + config.dry_run = True + job = self._make_one(self.JOB_ID, self.QUERY, client, job_config=config) + with mock.patch( + "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" + ) as final_attributes: + job._begin() + + final_attributes.assert_called_with({"path": PATH}, client, job) + self.assertEqual(job.udf_resources, []) + conn.api_request.assert_called_once_with( + method="POST", + path=PATH, + data={ + "jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID}, + "configuration": { + "query": {"query": self.QUERY, "useLegacySql": False}, + "dryRun": True, + }, + }, + timeout=None, + ) + self._verifyResourceProperties(job, RESOURCE) + + def test_exists_miss_w_bound_client(self): + PATH = "/projects/%s/jobs/%s" % (self.PROJECT, self.JOB_ID) + conn = _make_connection() + client = _make_client(project=self.PROJECT, connection=conn) + job = self._make_one(self.JOB_ID, self.QUERY, client) + with mock.patch( + "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" + ) as final_attributes: + self.assertFalse(job.exists()) + + final_attributes.assert_called_with({"path": PATH}, client, job) + + conn.api_request.assert_called_once_with( + method="GET", path=PATH, query_params={"fields": "id"}, timeout=None + ) + + def test_exists_hit_w_alternate_client(self): + PATH = "/projects/%s/jobs/%s" % (self.PROJECT, self.JOB_ID) + conn1 = _make_connection() + client1 = _make_client(project=self.PROJECT, connection=conn1) + conn2 = _make_connection({}) + client2 = _make_client(project=self.PROJECT, connection=conn2) + job = self._make_one(self.JOB_ID, self.QUERY, client1) + with mock.patch( + "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" + ) as final_attributes: + self.assertTrue(job.exists(client=client2)) + + final_attributes.assert_called_with({"path": PATH}, client2, job) + + conn1.api_request.assert_not_called() + conn2.api_request.assert_called_once_with( + method="GET", path=PATH, query_params={"fields": "id"}, timeout=None + ) + + def test_reload_w_bound_client(self): + from google.cloud.bigquery.dataset import DatasetReference + from google.cloud.bigquery.job import QueryJobConfig + + PATH = "/projects/%s/jobs/%s" % (self.PROJECT, self.JOB_ID) + DS_ID = "DATASET" + DEST_TABLE = "dest_table" + RESOURCE = self._make_resource() + conn = _make_connection(RESOURCE) + client = _make_client(project=self.PROJECT, connection=conn) + dataset_ref = DatasetReference(self.PROJECT, DS_ID) + table_ref = dataset_ref.table(DEST_TABLE) + config = QueryJobConfig() + config.destination = table_ref + job = self._make_one(self.JOB_ID, None, client, job_config=config) + with mock.patch( + "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" + ) as final_attributes: + job.reload() + + final_attributes.assert_called_with({"path": PATH}, client, job) + + self.assertNotEqual(job.destination, table_ref) + + conn.api_request.assert_called_once_with( + method="GET", path=PATH, query_params={}, timeout=None + ) + self._verifyResourceProperties(job, RESOURCE) + + def test_reload_w_alternate_client(self): + PATH = "/projects/%s/jobs/%s" % (self.PROJECT, self.JOB_ID) + DS_ID = "DATASET" + DEST_TABLE = "dest_table" + RESOURCE = self._make_resource() + q_config = RESOURCE["configuration"]["query"] + q_config["destinationTable"] = { + "projectId": self.PROJECT, + "datasetId": DS_ID, + "tableId": DEST_TABLE, + } + conn1 = _make_connection() + client1 = _make_client(project=self.PROJECT, connection=conn1) + conn2 = _make_connection(RESOURCE) + client2 = _make_client(project=self.PROJECT, connection=conn2) + job = self._make_one(self.JOB_ID, self.QUERY, client1) + with mock.patch( + "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" + ) as final_attributes: + job.reload(client=client2) + + final_attributes.assert_called_with({"path": PATH}, client2, job) + + conn1.api_request.assert_not_called() + conn2.api_request.assert_called_once_with( + method="GET", path=PATH, query_params={}, timeout=None + ) + self._verifyResourceProperties(job, RESOURCE) + + def test_reload_w_timeout(self): + from google.cloud.bigquery.dataset import DatasetReference + from google.cloud.bigquery.job import QueryJobConfig + + PATH = "/projects/%s/jobs/%s" % (self.PROJECT, self.JOB_ID) + DS_ID = "DATASET" + DEST_TABLE = "dest_table" + RESOURCE = self._make_resource() + conn = _make_connection(RESOURCE) + client = _make_client(project=self.PROJECT, connection=conn) + dataset_ref = DatasetReference(self.PROJECT, DS_ID) + table_ref = dataset_ref.table(DEST_TABLE) + config = QueryJobConfig() + config.destination = table_ref + job = self._make_one(self.JOB_ID, None, client, job_config=config) + with mock.patch( + "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" + ) as final_attributes: + job.reload(timeout=4.2) + + final_attributes.assert_called_with({"path": PATH}, client, job) + + self.assertNotEqual(job.destination, table_ref) + + conn.api_request.assert_called_once_with( + method="GET", path=PATH, query_params={}, timeout=4.2 + ) + + def test_iter(self): + import types + + begun_resource = self._make_resource() + query_resource = { + "jobComplete": True, + "jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID}, + "totalRows": "0", + "schema": {"fields": [{"name": "col1", "type": "STRING"}]}, + } + done_resource = copy.deepcopy(begun_resource) + done_resource["status"] = {"state": "DONE"} + connection = _make_connection(begun_resource, query_resource, done_resource) + client = _make_client(project=self.PROJECT, connection=connection) + job = self._make_one(self.JOB_ID, self.QUERY, client) + + self.assertIsInstance(iter(job), types.GeneratorType) diff --git a/packages/google-cloud-bigquery/tests/unit/job/test_query_config.py b/packages/google-cloud-bigquery/tests/unit/job/test_query_config.py new file mode 100644 index 000000000000..db03d6a3b246 --- /dev/null +++ b/packages/google-cloud-bigquery/tests/unit/job/test_query_config.py @@ -0,0 +1,255 @@ +# Copyright 2015 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import pytest + +from .helpers import _Base + + +class TestQueryJobConfig(_Base): + @staticmethod + def _get_target_class(): + from google.cloud.bigquery.job import QueryJobConfig + + return QueryJobConfig + + def _make_one(self, *args, **kw): + return self._get_target_class()(*args, **kw) + + def test_ctor(self): + config = self._make_one() + self.assertEqual(config._properties, {"query": {}}) + + def test_ctor_w_none(self): + config = self._make_one() + config.default_dataset = None + config.destination = None + self.assertIsNone(config.default_dataset) + self.assertIsNone(config.destination) + + def test_ctor_w_properties(self): + config = self._get_target_class()(use_query_cache=False, use_legacy_sql=True) + + self.assertFalse(config.use_query_cache) + self.assertTrue(config.use_legacy_sql) + + def test_ctor_w_string_default_dataset(self): + from google.cloud.bigquery import dataset + + default_dataset = "default-proj.default_dset" + config = self._get_target_class()(default_dataset=default_dataset) + expected = dataset.DatasetReference.from_string(default_dataset) + self.assertEqual(config.default_dataset, expected) + + def test_ctor_w_string_destinaton(self): + from google.cloud.bigquery import table + + destination = "dest-proj.dest_dset.dest_tbl" + config = self._get_target_class()(destination=destination) + expected = table.TableReference.from_string(destination) + self.assertEqual(config.destination, expected) + + def test_default_dataset_w_string(self): + from google.cloud.bigquery import dataset + + default_dataset = "default-proj.default_dset" + config = self._make_one() + config.default_dataset = default_dataset + expected = dataset.DatasetReference.from_string(default_dataset) + self.assertEqual(config.default_dataset, expected) + + def test_default_dataset_w_dataset(self): + from google.cloud.bigquery import dataset + + default_dataset = "default-proj.default_dset" + expected = dataset.DatasetReference.from_string(default_dataset) + config = self._make_one() + config.default_dataset = dataset.Dataset(expected) + self.assertEqual(config.default_dataset, expected) + + def test_destinaton_w_string(self): + from google.cloud.bigquery import table + + destination = "dest-proj.dest_dset.dest_tbl" + config = self._make_one() + config.destination = destination + expected = table.TableReference.from_string(destination) + self.assertEqual(config.destination, expected) + + def test_range_partitioning_w_none(self): + object_under_test = self._get_target_class()() + assert object_under_test.range_partitioning is None + + def test_range_partitioning_w_value(self): + object_under_test = self._get_target_class()() + object_under_test._properties["query"]["rangePartitioning"] = { + "field": "column_one", + "range": {"start": 1, "end": 1000, "interval": 10}, + } + object_under_test.range_partitioning.field == "column_one" + object_under_test.range_partitioning.range_.start == 1 + object_under_test.range_partitioning.range_.end == 1000 + object_under_test.range_partitioning.range_.interval == 10 + + def test_range_partitioning_setter(self): + from google.cloud.bigquery.table import PartitionRange + from google.cloud.bigquery.table import RangePartitioning + + object_under_test = self._get_target_class()() + object_under_test.range_partitioning = RangePartitioning( + field="column_one", range_=PartitionRange(start=1, end=1000, interval=10) + ) + object_under_test.range_partitioning.field == "column_one" + object_under_test.range_partitioning.range_.start == 1 + object_under_test.range_partitioning.range_.end == 1000 + object_under_test.range_partitioning.range_.interval == 10 + + def test_range_partitioning_setter_w_none(self): + object_under_test = self._get_target_class()() + object_under_test.range_partitioning = None + assert object_under_test.range_partitioning is None + + def test_range_partitioning_setter_w_wrong_type(self): + object_under_test = self._get_target_class()() + with pytest.raises(ValueError, match="RangePartitioning"): + object_under_test.range_partitioning = object() + + def test_time_partitioning(self): + from google.cloud.bigquery import table + + time_partitioning = table.TimePartitioning( + type_=table.TimePartitioningType.DAY, field="name" + ) + config = self._make_one() + config.time_partitioning = time_partitioning + # TimePartitioning should be configurable after assigning + time_partitioning.expiration_ms = 10000 + + self.assertEqual(config.time_partitioning.type_, table.TimePartitioningType.DAY) + self.assertEqual(config.time_partitioning.field, "name") + self.assertEqual(config.time_partitioning.expiration_ms, 10000) + + config.time_partitioning = None + self.assertIsNone(config.time_partitioning) + + def test_clustering_fields(self): + fields = ["email", "postal_code"] + config = self._get_target_class()() + config.clustering_fields = fields + self.assertEqual(config.clustering_fields, fields) + + config.clustering_fields = None + self.assertIsNone(config.clustering_fields) + + def test_from_api_repr_empty(self): + klass = self._get_target_class() + config = klass.from_api_repr({}) + self.assertIsNone(config.dry_run) + self.assertIsNone(config.use_legacy_sql) + self.assertIsNone(config.default_dataset) + self.assertIsNone(config.destination) + self.assertIsNone(config.destination_encryption_configuration) + + def test_from_api_repr_normal(self): + from google.cloud.bigquery.dataset import DatasetReference + + resource = { + "query": { + "useLegacySql": True, + "query": "no property for me", + "defaultDataset": { + "projectId": "someproject", + "datasetId": "somedataset", + }, + "someNewProperty": "I should be saved, too.", + }, + "dryRun": True, + } + klass = self._get_target_class() + + config = klass.from_api_repr(resource) + + self.assertTrue(config.use_legacy_sql) + self.assertEqual( + config.default_dataset, DatasetReference("someproject", "somedataset") + ) + self.assertTrue(config.dry_run) + # Make sure unknown properties propagate. + self.assertEqual(config._properties["query"]["query"], "no property for me") + self.assertEqual( + config._properties["query"]["someNewProperty"], "I should be saved, too." + ) + + def test_to_api_repr_normal(self): + from google.cloud.bigquery.dataset import DatasetReference + + config = self._make_one() + config.use_legacy_sql = True + config.default_dataset = DatasetReference("someproject", "somedataset") + config.dry_run = False + config._properties["someNewProperty"] = "Woohoo, alpha stuff." + + resource = config.to_api_repr() + + self.assertFalse(resource["dryRun"]) + self.assertTrue(resource["query"]["useLegacySql"]) + self.assertEqual( + resource["query"]["defaultDataset"]["projectId"], "someproject" + ) + self.assertEqual( + resource["query"]["defaultDataset"]["datasetId"], "somedataset" + ) + # Make sure unknown properties propagate. + self.assertEqual(resource["someNewProperty"], "Woohoo, alpha stuff.") + + def test_to_api_repr_with_encryption(self): + from google.cloud.bigquery.encryption_configuration import ( + EncryptionConfiguration, + ) + + config = self._make_one() + config.destination_encryption_configuration = EncryptionConfiguration( + kms_key_name=self.KMS_KEY_NAME + ) + resource = config.to_api_repr() + self.assertEqual( + resource, + { + "query": { + "destinationEncryptionConfiguration": { + "kmsKeyName": self.KMS_KEY_NAME + } + } + }, + ) + + def test_to_api_repr_with_encryption_none(self): + config = self._make_one() + config.destination_encryption_configuration = None + resource = config.to_api_repr() + self.assertEqual( + resource, {"query": {"destinationEncryptionConfiguration": None}} + ) + + def test_from_api_repr_with_encryption(self): + resource = { + "query": { + "destinationEncryptionConfiguration": {"kmsKeyName": self.KMS_KEY_NAME} + } + } + klass = self._get_target_class() + config = klass.from_api_repr(resource) + self.assertEqual( + config.destination_encryption_configuration.kms_key_name, self.KMS_KEY_NAME + ) diff --git a/packages/google-cloud-bigquery/tests/unit/job/test_query_pandas.py b/packages/google-cloud-bigquery/tests/unit/job/test_query_pandas.py new file mode 100644 index 000000000000..37f4a6dec10e --- /dev/null +++ b/packages/google-cloud-bigquery/tests/unit/job/test_query_pandas.py @@ -0,0 +1,450 @@ +# Copyright 2015 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import copy +import json + +import mock +import pytest + +try: + import pandas +except (ImportError, AttributeError): # pragma: NO COVER + pandas = None +try: + import pyarrow +except (ImportError, AttributeError): # pragma: NO COVER + pyarrow = None +try: + from google.cloud import bigquery_storage +except (ImportError, AttributeError): # pragma: NO COVER + bigquery_storage = None +try: + from tqdm import tqdm +except (ImportError, AttributeError): # pragma: NO COVER + tqdm = None + +from .helpers import _make_client +from .helpers import _make_connection +from .helpers import _make_job_resource + + +@pytest.mark.parametrize( + "query,expected", + ( + (None, False), + ("", False), + ("select name, age from table", False), + ("select name, age from table LIMIT 10;", False), + ("select name, age from table order by other_column;", True), + ("Select name, age From table Order By other_column", True), + ("SELECT name, age FROM table ORDER BY other_column;", True), + ("select name, age from table order\nby other_column", True), + ("Select name, age From table Order\nBy other_column;", True), + ("SELECT name, age FROM table ORDER\nBY other_column", True), + ("SelecT name, age froM table OrdeR \n\t BY other_column;", True), + ), +) +def test__contains_order_by(query, expected): + from google.cloud.bigquery import job as mut + + if expected: + assert mut._contains_order_by(query) + else: + assert not mut._contains_order_by(query) + + +@pytest.mark.skipif(pandas is None, reason="Requires `pandas`") +@pytest.mark.skipif( + bigquery_storage is None, reason="Requires `google-cloud-bigquery-storage`" +) +@pytest.mark.parametrize( + "query", + ( + "select name, age from table order by other_column;", + "Select name, age From table Order By other_column;", + "SELECT name, age FROM table ORDER BY other_column;", + "select name, age from table order\nby other_column;", + "Select name, age From table Order\nBy other_column;", + "SELECT name, age FROM table ORDER\nBY other_column;", + "SelecT name, age froM table OrdeR \n\t BY other_column;", + ), +) +def test_to_dataframe_bqstorage_preserve_order(query): + from google.cloud.bigquery.job import QueryJob as target_class + + job_resource = _make_job_resource( + project_id="test-project", job_type="query", ended=True + ) + job_resource["configuration"]["query"]["query"] = query + job_resource["status"] = {"state": "DONE"} + get_query_results_resource = { + "jobComplete": True, + "jobReference": {"projectId": "test-project", "jobId": "test-job"}, + "schema": { + "fields": [ + {"name": "name", "type": "STRING", "mode": "NULLABLE"}, + {"name": "age", "type": "INTEGER", "mode": "NULLABLE"}, + ] + }, + "totalRows": "4", + } + connection = _make_connection(get_query_results_resource, job_resource) + client = _make_client(connection=connection) + job = target_class.from_api_repr(job_resource, client) + bqstorage_client = mock.create_autospec(bigquery_storage.BigQueryReadClient) + session = bigquery_storage.types.ReadSession() + session.avro_schema.schema = json.dumps( + { + "type": "record", + "name": "__root__", + "fields": [ + {"name": "name", "type": ["null", "string"]}, + {"name": "age", "type": ["null", "long"]}, + ], + } + ) + bqstorage_client.create_read_session.return_value = session + + job.to_dataframe(bqstorage_client=bqstorage_client) + + destination_table = "projects/{projectId}/datasets/{datasetId}/tables/{tableId}".format( + **job_resource["configuration"]["query"]["destinationTable"] + ) + expected_session = bigquery_storage.types.ReadSession( + table=destination_table, data_format=bigquery_storage.types.DataFormat.ARROW, + ) + bqstorage_client.create_read_session.assert_called_once_with( + parent="projects/test-project", + read_session=expected_session, + max_stream_count=1, # Use a single stream to preserve row order. + ) + + +@pytest.mark.skipif(pyarrow is None, reason="Requires `pyarrow`") +def test_to_arrow(): + from google.cloud.bigquery.job import QueryJob as target_class + + begun_resource = _make_job_resource(job_type="query") + query_resource = { + "jobComplete": True, + "jobReference": begun_resource["jobReference"], + "totalRows": "4", + "schema": { + "fields": [ + { + "name": "spouse_1", + "type": "RECORD", + "fields": [ + {"name": "name", "type": "STRING", "mode": "NULLABLE"}, + {"name": "age", "type": "INTEGER", "mode": "NULLABLE"}, + ], + }, + { + "name": "spouse_2", + "type": "RECORD", + "fields": [ + {"name": "name", "type": "STRING", "mode": "NULLABLE"}, + {"name": "age", "type": "INTEGER", "mode": "NULLABLE"}, + ], + }, + ] + }, + } + tabledata_resource = { + "rows": [ + { + "f": [ + {"v": {"f": [{"v": "Phred Phlyntstone"}, {"v": "32"}]}}, + {"v": {"f": [{"v": "Wylma Phlyntstone"}, {"v": "29"}]}}, + ] + }, + { + "f": [ + {"v": {"f": [{"v": "Bhettye Rhubble"}, {"v": "27"}]}}, + {"v": {"f": [{"v": "Bharney Rhubble"}, {"v": "33"}]}}, + ] + }, + ] + } + done_resource = copy.deepcopy(begun_resource) + done_resource["status"] = {"state": "DONE"} + connection = _make_connection( + begun_resource, query_resource, done_resource, tabledata_resource + ) + client = _make_client(connection=connection) + job = target_class.from_api_repr(begun_resource, client) + + tbl = job.to_arrow(create_bqstorage_client=False) + + assert isinstance(tbl, pyarrow.Table) + assert tbl.num_rows == 2 + + # Check the schema. + assert tbl.schema[0].name == "spouse_1" + assert tbl.schema[0].type[0].name == "name" + assert tbl.schema[0].type[1].name == "age" + assert pyarrow.types.is_struct(tbl.schema[0].type) + assert pyarrow.types.is_string(tbl.schema[0].type[0].type) + assert pyarrow.types.is_int64(tbl.schema[0].type[1].type) + assert tbl.schema[1].name == "spouse_2" + assert tbl.schema[1].type[0].name == "name" + assert tbl.schema[1].type[1].name == "age" + assert pyarrow.types.is_struct(tbl.schema[1].type) + assert pyarrow.types.is_string(tbl.schema[1].type[0].type) + assert pyarrow.types.is_int64(tbl.schema[1].type[1].type) + + # Check the data. + tbl_data = tbl.to_pydict() + spouse_1 = tbl_data["spouse_1"] + assert spouse_1 == [ + {"name": "Phred Phlyntstone", "age": 32}, + {"name": "Bhettye Rhubble", "age": 27}, + ] + spouse_2 = tbl_data["spouse_2"] + assert spouse_2 == [ + {"name": "Wylma Phlyntstone", "age": 29}, + {"name": "Bharney Rhubble", "age": 33}, + ] + + +@pytest.mark.skipif(pandas is None, reason="Requires `pandas`") +def test_to_dataframe(): + from google.cloud.bigquery.job import QueryJob as target_class + + begun_resource = _make_job_resource(job_type="query") + query_resource = { + "jobComplete": True, + "jobReference": begun_resource["jobReference"], + "totalRows": "4", + "schema": { + "fields": [ + {"name": "name", "type": "STRING", "mode": "NULLABLE"}, + {"name": "age", "type": "INTEGER", "mode": "NULLABLE"}, + ] + }, + } + tabledata_resource = { + "rows": [ + {"f": [{"v": "Phred Phlyntstone"}, {"v": "32"}]}, + {"f": [{"v": "Bharney Rhubble"}, {"v": "33"}]}, + {"f": [{"v": "Wylma Phlyntstone"}, {"v": "29"}]}, + {"f": [{"v": "Bhettye Rhubble"}, {"v": "27"}]}, + ] + } + done_resource = copy.deepcopy(begun_resource) + done_resource["status"] = {"state": "DONE"} + connection = _make_connection( + begun_resource, query_resource, done_resource, tabledata_resource + ) + client = _make_client(connection=connection) + job = target_class.from_api_repr(begun_resource, client) + + df = job.to_dataframe(create_bqstorage_client=False) + + assert isinstance(df, pandas.DataFrame) + assert len(df) == 4 # verify the number of rows + assert list(df) == ["name", "age"] # verify the column names + + +@pytest.mark.skipif(pandas is None, reason="Requires `pandas`") +def test_to_dataframe_ddl_query(): + from google.cloud.bigquery.job import QueryJob as target_class + + # Destination table may have no schema for some DDL and DML queries. + resource = _make_job_resource(job_type="query", ended=True) + query_resource = { + "jobComplete": True, + "jobReference": resource["jobReference"], + "schema": {"fields": []}, + } + connection = _make_connection(query_resource) + client = _make_client(connection=connection) + job = target_class.from_api_repr(resource, client) + + df = job.to_dataframe() + + assert len(df) == 0 + + +@pytest.mark.skipif(pandas is None, reason="Requires `pandas`") +@pytest.mark.skipif( + bigquery_storage is None, reason="Requires `google-cloud-bigquery-storage`" +) +def test_to_dataframe_bqstorage(): + from google.cloud.bigquery.job import QueryJob as target_class + + resource = _make_job_resource(job_type="query", ended=True) + query_resource = { + "jobComplete": True, + "jobReference": resource["jobReference"], + "totalRows": "4", + "schema": { + "fields": [ + {"name": "name", "type": "STRING", "mode": "NULLABLE"}, + {"name": "age", "type": "INTEGER", "mode": "NULLABLE"}, + ] + }, + } + connection = _make_connection(query_resource) + client = _make_client(connection=connection) + job = target_class.from_api_repr(resource, client) + bqstorage_client = mock.create_autospec(bigquery_storage.BigQueryReadClient) + session = bigquery_storage.types.ReadSession() + session.avro_schema.schema = json.dumps( + { + "type": "record", + "name": "__root__", + "fields": [ + {"name": "name", "type": ["null", "string"]}, + {"name": "age", "type": ["null", "long"]}, + ], + } + ) + bqstorage_client.create_read_session.return_value = session + + job.to_dataframe(bqstorage_client=bqstorage_client) + + destination_table = "projects/{projectId}/datasets/{datasetId}/tables/{tableId}".format( + **resource["configuration"]["query"]["destinationTable"] + ) + expected_session = bigquery_storage.types.ReadSession( + table=destination_table, data_format=bigquery_storage.types.DataFormat.ARROW, + ) + bqstorage_client.create_read_session.assert_called_once_with( + parent=f"projects/{client.project}", + read_session=expected_session, + max_stream_count=0, # Use default number of streams for best performance. + ) + + +@pytest.mark.skipif(pandas is None, reason="Requires `pandas`") +def test_to_dataframe_column_dtypes(): + from google.cloud.bigquery.job import QueryJob as target_class + + begun_resource = _make_job_resource(job_type="query") + query_resource = { + "jobComplete": True, + "jobReference": begun_resource["jobReference"], + "totalRows": "4", + "schema": { + "fields": [ + {"name": "start_timestamp", "type": "TIMESTAMP"}, + {"name": "seconds", "type": "INT64"}, + {"name": "miles", "type": "FLOAT64"}, + {"name": "km", "type": "FLOAT64"}, + {"name": "payment_type", "type": "STRING"}, + {"name": "complete", "type": "BOOL"}, + {"name": "date", "type": "DATE"}, + ] + }, + } + row_data = [ + [ + "1.4338368E9", + "420", + "1.1", + "1.77", + "Cto_dataframeash", + "true", + "1999-12-01", + ], + ["1.3878117E9", "2580", "17.7", "28.5", "Cash", "false", "1953-06-14"], + ["1.3855653E9", "2280", "4.4", "7.1", "Credit", "true", "1981-11-04"], + ] + rows = [{"f": [{"v": field} for field in row]} for row in row_data] + query_resource["rows"] = rows + done_resource = copy.deepcopy(begun_resource) + done_resource["status"] = {"state": "DONE"} + connection = _make_connection( + begun_resource, query_resource, done_resource, query_resource + ) + client = _make_client(connection=connection) + job = target_class.from_api_repr(begun_resource, client) + + df = job.to_dataframe(dtypes={"km": "float16"}, create_bqstorage_client=False) + + assert isinstance(df, pandas.DataFrame) + assert len(df) == 3 # verify the number of rows + exp_columns = [field["name"] for field in query_resource["schema"]["fields"]] + assert list(df) == exp_columns # verify the column names + + assert df.start_timestamp.dtype.name == "datetime64[ns, UTC]" + assert df.seconds.dtype.name == "int64" + assert df.miles.dtype.name == "float64" + assert df.km.dtype.name == "float16" + assert df.payment_type.dtype.name == "object" + assert df.complete.dtype.name == "bool" + assert df.date.dtype.name == "object" + + +@pytest.mark.skipif(pyarrow is None, reason="Requires `pyarrow`") +@pytest.mark.skipif(pandas is None, reason="Requires `pandas`") +def test_to_dataframe_column_date_dtypes(): + from google.cloud.bigquery.job import QueryJob as target_class + + begun_resource = _make_job_resource(job_type="query") + query_resource = { + "jobComplete": True, + "jobReference": begun_resource["jobReference"], + "totalRows": "1", + "schema": {"fields": [{"name": "date", "type": "DATE"}]}, + } + row_data = [ + ["1999-12-01"], + ] + rows = [{"f": [{"v": field} for field in row]} for row in row_data] + query_resource["rows"] = rows + done_resource = copy.deepcopy(begun_resource) + done_resource["status"] = {"state": "DONE"} + connection = _make_connection( + begun_resource, query_resource, done_resource, query_resource + ) + client = _make_client(connection=connection) + job = target_class.from_api_repr(begun_resource, client) + df = job.to_dataframe(date_as_object=False, create_bqstorage_client=False) + + assert isinstance(df, pandas.DataFrame) + assert len(df) == 1 # verify the number of rows + exp_columns = [field["name"] for field in query_resource["schema"]["fields"]] + assert list(df) == exp_columns # verify the column names + assert df.date.dtype.name == "datetime64[ns]" + + +@pytest.mark.skipif(pandas is None, reason="Requires `pandas`") +@pytest.mark.skipif(tqdm is None, reason="Requires `tqdm`") +@mock.patch("tqdm.tqdm") +def test_to_dataframe_with_progress_bar(tqdm_mock): + from google.cloud.bigquery.job import QueryJob as target_class + + begun_resource = _make_job_resource(job_type="query") + query_resource = { + "jobComplete": True, + "jobReference": begun_resource["jobReference"], + "totalRows": "4", + "schema": {"fields": [{"name": "name", "type": "STRING", "mode": "NULLABLE"}]}, + } + done_resource = copy.deepcopy(begun_resource) + done_resource["status"] = {"state": "DONE"} + connection = _make_connection( + begun_resource, query_resource, done_resource, query_resource, query_resource, + ) + client = _make_client(connection=connection) + job = target_class.from_api_repr(begun_resource, client) + + job.to_dataframe(progress_bar_type=None, create_bqstorage_client=False) + tqdm_mock.assert_not_called() + + job.to_dataframe(progress_bar_type="tqdm", create_bqstorage_client=False) + tqdm_mock.assert_called() diff --git a/packages/google-cloud-bigquery/tests/unit/job/test_query_stats.py b/packages/google-cloud-bigquery/tests/unit/job/test_query_stats.py new file mode 100644 index 000000000000..09a0efc45866 --- /dev/null +++ b/packages/google-cloud-bigquery/tests/unit/job/test_query_stats.py @@ -0,0 +1,356 @@ +# Copyright 2015 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from .helpers import _Base + + +class TestQueryPlanEntryStep(_Base): + KIND = "KIND" + SUBSTEPS = ("SUB1", "SUB2") + + @staticmethod + def _get_target_class(): + from google.cloud.bigquery.job import QueryPlanEntryStep + + return QueryPlanEntryStep + + def _make_one(self, *args, **kw): + return self._get_target_class()(*args, **kw) + + def test_ctor(self): + step = self._make_one(self.KIND, self.SUBSTEPS) + self.assertEqual(step.kind, self.KIND) + self.assertEqual(step.substeps, list(self.SUBSTEPS)) + + def test_from_api_repr_empty(self): + klass = self._get_target_class() + step = klass.from_api_repr({}) + self.assertIsNone(step.kind) + self.assertEqual(step.substeps, []) + + def test_from_api_repr_normal(self): + resource = {"kind": self.KIND, "substeps": self.SUBSTEPS} + klass = self._get_target_class() + step = klass.from_api_repr(resource) + self.assertEqual(step.kind, self.KIND) + self.assertEqual(step.substeps, list(self.SUBSTEPS)) + + def test___eq___mismatched_type(self): + step = self._make_one(self.KIND, self.SUBSTEPS) + self.assertNotEqual(step, object()) + + def test___eq___mismatch_kind(self): + step = self._make_one(self.KIND, self.SUBSTEPS) + other = self._make_one("OTHER", self.SUBSTEPS) + self.assertNotEqual(step, other) + + def test___eq___mismatch_substeps(self): + step = self._make_one(self.KIND, self.SUBSTEPS) + other = self._make_one(self.KIND, ()) + self.assertNotEqual(step, other) + + def test___eq___hit(self): + step = self._make_one(self.KIND, self.SUBSTEPS) + other = self._make_one(self.KIND, self.SUBSTEPS) + self.assertEqual(step, other) + + def test___eq___wrong_type(self): + step = self._make_one(self.KIND, self.SUBSTEPS) + self.assertFalse(step == "hello") + + +class TestQueryPlanEntry(_Base): + NAME = "NAME" + ENTRY_ID = 1234 + START_MS = 1522540800000 + END_MS = 1522540804000 + INPUT_STAGES = (88, 101) + PARALLEL_INPUTS = 1000 + COMPLETED_PARALLEL_INPUTS = 5 + WAIT_MS_AVG = 33 + WAIT_MS_MAX = 400 + WAIT_RATIO_AVG = 2.71828 + WAIT_RATIO_MAX = 3.14159 + READ_MS_AVG = 45 + READ_MS_MAX = 90 + READ_RATIO_AVG = 1.41421 + READ_RATIO_MAX = 1.73205 + COMPUTE_MS_AVG = 55 + COMPUTE_MS_MAX = 99 + COMPUTE_RATIO_AVG = 0.69315 + COMPUTE_RATIO_MAX = 1.09861 + WRITE_MS_AVG = 203 + WRITE_MS_MAX = 340 + WRITE_RATIO_AVG = 3.32193 + WRITE_RATIO_MAX = 2.30258 + RECORDS_READ = 100 + RECORDS_WRITTEN = 1 + STATUS = "STATUS" + SHUFFLE_OUTPUT_BYTES = 1024 + SHUFFLE_OUTPUT_BYTES_SPILLED = 1 + + START_RFC3339_MICROS = "2018-04-01T00:00:00.000000Z" + END_RFC3339_MICROS = "2018-04-01T00:00:04.000000Z" + + @staticmethod + def _get_target_class(): + from google.cloud.bigquery.job import QueryPlanEntry + + return QueryPlanEntry + + def test_from_api_repr_empty(self): + klass = self._get_target_class() + + entry = klass.from_api_repr({}) + + self.assertIsNone(entry.name) + self.assertIsNone(entry.entry_id) + self.assertEqual(entry.input_stages, []) + self.assertIsNone(entry.start) + self.assertIsNone(entry.end) + self.assertIsNone(entry.parallel_inputs) + self.assertIsNone(entry.completed_parallel_inputs) + self.assertIsNone(entry.wait_ms_avg) + self.assertIsNone(entry.wait_ms_max) + self.assertIsNone(entry.wait_ratio_avg) + self.assertIsNone(entry.wait_ratio_max) + self.assertIsNone(entry.read_ms_avg) + self.assertIsNone(entry.read_ms_max) + self.assertIsNone(entry.read_ratio_avg) + self.assertIsNone(entry.read_ratio_max) + self.assertIsNone(entry.compute_ms_avg) + self.assertIsNone(entry.compute_ms_max) + self.assertIsNone(entry.compute_ratio_avg) + self.assertIsNone(entry.compute_ratio_max) + self.assertIsNone(entry.write_ms_avg) + self.assertIsNone(entry.write_ms_max) + self.assertIsNone(entry.write_ratio_avg) + self.assertIsNone(entry.write_ratio_max) + self.assertIsNone(entry.records_read) + self.assertIsNone(entry.records_written) + self.assertIsNone(entry.status) + self.assertIsNone(entry.shuffle_output_bytes) + self.assertIsNone(entry.shuffle_output_bytes_spilled) + self.assertEqual(entry.steps, []) + + def test_from_api_repr_normal(self): + from google.cloud.bigquery.job import QueryPlanEntryStep + + steps = [ + QueryPlanEntryStep( + kind=TestQueryPlanEntryStep.KIND, + substeps=TestQueryPlanEntryStep.SUBSTEPS, + ) + ] + resource = { + "name": self.NAME, + "id": self.ENTRY_ID, + "inputStages": self.INPUT_STAGES, + "startMs": self.START_MS, + "endMs": self.END_MS, + "waitMsAvg": self.WAIT_MS_AVG, + "waitMsMax": self.WAIT_MS_MAX, + "waitRatioAvg": self.WAIT_RATIO_AVG, + "waitRatioMax": self.WAIT_RATIO_MAX, + "readMsAvg": self.READ_MS_AVG, + "readMsMax": self.READ_MS_MAX, + "readRatioAvg": self.READ_RATIO_AVG, + "readRatioMax": self.READ_RATIO_MAX, + "computeMsAvg": self.COMPUTE_MS_AVG, + "computeMsMax": self.COMPUTE_MS_MAX, + "computeRatioAvg": self.COMPUTE_RATIO_AVG, + "computeRatioMax": self.COMPUTE_RATIO_MAX, + "writeMsAvg": self.WRITE_MS_AVG, + "writeMsMax": self.WRITE_MS_MAX, + "writeRatioAvg": self.WRITE_RATIO_AVG, + "writeRatioMax": self.WRITE_RATIO_MAX, + "recordsRead": self.RECORDS_READ, + "recordsWritten": self.RECORDS_WRITTEN, + "status": self.STATUS, + "shuffleOutputBytes": self.SHUFFLE_OUTPUT_BYTES, + "shuffleOutputBytesSpilled": self.SHUFFLE_OUTPUT_BYTES_SPILLED, + "steps": [ + { + "kind": TestQueryPlanEntryStep.KIND, + "substeps": TestQueryPlanEntryStep.SUBSTEPS, + } + ], + } + klass = self._get_target_class() + + entry = klass.from_api_repr(resource) + self.assertEqual(entry.name, self.NAME) + self.assertEqual(entry.entry_id, self.ENTRY_ID) + self.assertEqual(entry.wait_ratio_avg, self.WAIT_RATIO_AVG) + self.assertEqual(entry.wait_ratio_max, self.WAIT_RATIO_MAX) + self.assertEqual(entry.read_ratio_avg, self.READ_RATIO_AVG) + self.assertEqual(entry.read_ratio_max, self.READ_RATIO_MAX) + self.assertEqual(entry.compute_ratio_avg, self.COMPUTE_RATIO_AVG) + self.assertEqual(entry.compute_ratio_max, self.COMPUTE_RATIO_MAX) + self.assertEqual(entry.write_ratio_avg, self.WRITE_RATIO_AVG) + self.assertEqual(entry.write_ratio_max, self.WRITE_RATIO_MAX) + self.assertEqual(entry.records_read, self.RECORDS_READ) + self.assertEqual(entry.records_written, self.RECORDS_WRITTEN) + self.assertEqual(entry.status, self.STATUS) + self.assertEqual(entry.steps, steps) + + def test_start(self): + from google.cloud._helpers import _RFC3339_MICROS + + klass = self._get_target_class() + + entry = klass.from_api_repr({}) + self.assertEqual(entry.start, None) + + entry._properties["startMs"] = self.START_MS + self.assertEqual( + entry.start.strftime(_RFC3339_MICROS), self.START_RFC3339_MICROS + ) + + def test_end(self): + from google.cloud._helpers import _RFC3339_MICROS + + klass = self._get_target_class() + + entry = klass.from_api_repr({}) + self.assertEqual(entry.end, None) + + entry._properties["endMs"] = self.END_MS + self.assertEqual(entry.end.strftime(_RFC3339_MICROS), self.END_RFC3339_MICROS) + + +class TestScriptStackFrame(_Base): + def _make_one(self, resource): + from google.cloud.bigquery.job import ScriptStackFrame + + return ScriptStackFrame(resource) + + def test_procedure_id(self): + frame = self._make_one({"procedureId": "some-procedure"}) + self.assertEqual(frame.procedure_id, "some-procedure") + del frame._properties["procedureId"] + self.assertIsNone(frame.procedure_id) + + def test_start_line(self): + frame = self._make_one({"startLine": 5}) + self.assertEqual(frame.start_line, 5) + frame._properties["startLine"] = "5" + self.assertEqual(frame.start_line, 5) + + def test_start_column(self): + frame = self._make_one({"startColumn": 29}) + self.assertEqual(frame.start_column, 29) + frame._properties["startColumn"] = "29" + self.assertEqual(frame.start_column, 29) + + def test_end_line(self): + frame = self._make_one({"endLine": 9}) + self.assertEqual(frame.end_line, 9) + frame._properties["endLine"] = "9" + self.assertEqual(frame.end_line, 9) + + def test_end_column(self): + frame = self._make_one({"endColumn": 14}) + self.assertEqual(frame.end_column, 14) + frame._properties["endColumn"] = "14" + self.assertEqual(frame.end_column, 14) + + def test_text(self): + frame = self._make_one({"text": "QUERY TEXT"}) + self.assertEqual(frame.text, "QUERY TEXT") + + +class TestScriptStatistics(_Base): + def _make_one(self, resource): + from google.cloud.bigquery.job import ScriptStatistics + + return ScriptStatistics(resource) + + def test_evalutation_kind(self): + stats = self._make_one({"evaluationKind": "EXPRESSION"}) + self.assertEqual(stats.evaluation_kind, "EXPRESSION") + self.assertEqual(stats.stack_frames, []) + + def test_stack_frames(self): + stats = self._make_one( + { + "stackFrames": [ + { + "procedureId": "some-procedure", + "startLine": 5, + "startColumn": 29, + "endLine": 9, + "endColumn": 14, + "text": "QUERY TEXT", + }, + {}, + ] + } + ) + stack_frames = stats.stack_frames + self.assertEqual(len(stack_frames), 2) + stack_frame = stack_frames[0] + self.assertEqual(stack_frame.procedure_id, "some-procedure") + self.assertEqual(stack_frame.start_line, 5) + self.assertEqual(stack_frame.start_column, 29) + self.assertEqual(stack_frame.end_line, 9) + self.assertEqual(stack_frame.end_column, 14) + self.assertEqual(stack_frame.text, "QUERY TEXT") + stack_frame = stack_frames[1] + self.assertIsNone(stack_frame.procedure_id) + self.assertIsNone(stack_frame.start_line) + self.assertIsNone(stack_frame.start_column) + self.assertIsNone(stack_frame.end_line) + self.assertIsNone(stack_frame.end_column) + self.assertIsNone(stack_frame.text) + + +class TestTimelineEntry(_Base): + ELAPSED_MS = 101 + ACTIVE_UNITS = 50 + PENDING_UNITS = 98 + COMPLETED_UNITS = 520 + SLOT_MILLIS = 12029 + + @staticmethod + def _get_target_class(): + from google.cloud.bigquery.job import TimelineEntry + + return TimelineEntry + + def test_from_api_repr_empty(self): + klass = self._get_target_class() + entry = klass.from_api_repr({}) + self.assertIsNone(entry.elapsed_ms) + self.assertIsNone(entry.active_units) + self.assertIsNone(entry.pending_units) + self.assertIsNone(entry.completed_units) + self.assertIsNone(entry.slot_millis) + + def test_from_api_repr_normal(self): + resource = { + "elapsedMs": self.ELAPSED_MS, + "activeUnits": self.ACTIVE_UNITS, + "pendingUnits": self.PENDING_UNITS, + "completedUnits": self.COMPLETED_UNITS, + "totalSlotMs": self.SLOT_MILLIS, + } + klass = self._get_target_class() + + entry = klass.from_api_repr(resource) + self.assertEqual(entry.elapsed_ms, self.ELAPSED_MS) + self.assertEqual(entry.active_units, self.ACTIVE_UNITS) + self.assertEqual(entry.pending_units, self.PENDING_UNITS) + self.assertEqual(entry.completed_units, self.COMPLETED_UNITS) + self.assertEqual(entry.slot_millis, self.SLOT_MILLIS) diff --git a/packages/google-cloud-bigquery/tests/unit/test_job.py b/packages/google-cloud-bigquery/tests/unit/test_job.py deleted file mode 100644 index 8590e05765ce..000000000000 --- a/packages/google-cloud-bigquery/tests/unit/test_job.py +++ /dev/null @@ -1,6448 +0,0 @@ -# Copyright 2015 Google LLC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import concurrent -import copy -import json -import textwrap -import unittest -import warnings - -import freezegun -from google.api_core import exceptions -import google.api_core.retry -import mock -import pytest -import requests -from six.moves import http_client - -try: - import pandas -except (ImportError, AttributeError): # pragma: NO COVER - pandas = None - -try: - import pyarrow -except ImportError: # pragma: NO COVER - pyarrow = None -try: - from google.cloud import bigquery_storage -except (ImportError, AttributeError): # pragma: NO COVER - bigquery_storage = None -try: - from tqdm import tqdm -except (ImportError, AttributeError): # pragma: NO COVER - tqdm = None - -import google.cloud.bigquery.query - - -def _make_credentials(): - import google.auth.credentials - - return mock.Mock(spec=google.auth.credentials.Credentials) - - -def _make_client(project="test-project", connection=None): - from google.cloud.bigquery.client import Client - - if connection is None: - connection = _make_connection() - - client = Client(project=project, credentials=_make_credentials(), _http=object()) - client._connection = connection - return client - - -def _make_connection(*responses): - import google.cloud.bigquery._http - from google.cloud.exceptions import NotFound - - mock_conn = mock.create_autospec(google.cloud.bigquery._http.Connection) - mock_conn.api_request.side_effect = list(responses) + [NotFound("miss")] - return mock_conn - - -def _make_retriable_exception(): - return exceptions.TooManyRequests( - "retriable exception", errors=[{"reason": "rateLimitExceeded"}] - ) - - -def _make_job_resource( - creation_time_ms=1437767599006, - started_time_ms=1437767600007, - ended_time_ms=1437767601008, - started=False, - ended=False, - etag="abc-def-hjk", - endpoint="https://bigquery.googleapis.com", - job_type="load", - job_id="a-random-id", - project_id="some-project", - user_email="bq-user@example.com", -): - resource = { - "status": {"state": "PENDING"}, - "configuration": {job_type: {}}, - "statistics": {"creationTime": creation_time_ms, job_type: {}}, - "etag": etag, - "id": "{}:{}".format(project_id, job_id), - "jobReference": {"projectId": project_id, "jobId": job_id}, - "selfLink": "{}/bigquery/v2/projects/{}/jobs/{}".format( - endpoint, project_id, job_id - ), - "user_email": user_email, - } - - if started or ended: - resource["statistics"]["startTime"] = started_time_ms - resource["status"]["state"] = "RUNNING" - - if ended: - resource["statistics"]["endTime"] = ended_time_ms - resource["status"]["state"] = "DONE" - - if job_type == "query": - resource["configuration"]["query"]["destinationTable"] = { - "projectId": project_id, - "datasetId": "_temp_dataset", - "tableId": "_temp_table", - } - - return resource - - -class Test__error_result_to_exception(unittest.TestCase): - def _call_fut(self, *args, **kwargs): - from google.cloud.bigquery import job - - return job._error_result_to_exception(*args, **kwargs) - - def test_simple(self): - error_result = {"reason": "invalid", "message": "bad request"} - exception = self._call_fut(error_result) - self.assertEqual(exception.code, http_client.BAD_REQUEST) - self.assertTrue(exception.message.startswith("bad request")) - self.assertIn(error_result, exception.errors) - - def test_missing_reason(self): - error_result = {} - exception = self._call_fut(error_result) - self.assertEqual(exception.code, http_client.INTERNAL_SERVER_ERROR) - - -class Test_JobReference(unittest.TestCase): - JOB_ID = "job-id" - PROJECT = "test-project-123" - LOCATION = "us-central" - - @staticmethod - def _get_target_class(): - from google.cloud.bigquery import job - - return job._JobReference - - def _make_one(self, job_id, project, location): - return self._get_target_class()(job_id, project, location) - - def test_ctor(self): - job_ref = self._make_one(self.JOB_ID, self.PROJECT, self.LOCATION) - - self.assertEqual(job_ref.job_id, self.JOB_ID) - self.assertEqual(job_ref.project, self.PROJECT) - self.assertEqual(job_ref.location, self.LOCATION) - - def test__to_api_repr(self): - job_ref = self._make_one(self.JOB_ID, self.PROJECT, self.LOCATION) - - self.assertEqual( - job_ref._to_api_repr(), - { - "jobId": self.JOB_ID, - "projectId": self.PROJECT, - "location": self.LOCATION, - }, - ) - - def test_from_api_repr(self): - api_repr = { - "jobId": self.JOB_ID, - "projectId": self.PROJECT, - "location": self.LOCATION, - } - - job_ref = self._get_target_class()._from_api_repr(api_repr) - - self.assertEqual(job_ref.job_id, self.JOB_ID) - self.assertEqual(job_ref.project, self.PROJECT) - self.assertEqual(job_ref.location, self.LOCATION) - - -class Test_AsyncJob(unittest.TestCase): - JOB_ID = "job-id" - PROJECT = "test-project-123" - LOCATION = "us-central" - - @staticmethod - def _get_target_class(): - from google.cloud.bigquery import job - - return job._AsyncJob - - def _make_one(self, job_id, client): - return self._get_target_class()(job_id, client) - - def _make_derived_class(self): - class Derived(self._get_target_class()): - _JOB_TYPE = "derived" - - return Derived - - def _make_derived(self, job_id, client): - return self._make_derived_class()(job_id, client) - - @staticmethod - def _job_reference(job_id, project, location): - from google.cloud.bigquery import job - - return job._JobReference(job_id, project, location) - - def test_ctor_w_bare_job_id(self): - import threading - - client = _make_client(project=self.PROJECT) - job = self._make_one(self.JOB_ID, client) - - self.assertEqual(job.job_id, self.JOB_ID) - self.assertEqual(job.project, self.PROJECT) - self.assertIsNone(job.location) - self.assertIs(job._client, client) - self.assertEqual( - job._properties, - {"jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID}}, - ) - self.assertIsInstance(job._completion_lock, type(threading.Lock())) - self.assertEqual( - job.path, "/projects/{}/jobs/{}".format(self.PROJECT, self.JOB_ID) - ) - - def test_ctor_w_job_ref(self): - import threading - - other_project = "other-project-234" - client = _make_client(project=other_project) - job_ref = self._job_reference(self.JOB_ID, self.PROJECT, self.LOCATION) - job = self._make_one(job_ref, client) - - self.assertEqual(job.job_id, self.JOB_ID) - self.assertEqual(job.project, self.PROJECT) - self.assertEqual(job.location, self.LOCATION) - self.assertIs(job._client, client) - self.assertEqual( - job._properties, - { - "jobReference": { - "projectId": self.PROJECT, - "location": self.LOCATION, - "jobId": self.JOB_ID, - } - }, - ) - self.assertFalse(job._result_set) - self.assertIsInstance(job._completion_lock, type(threading.Lock())) - self.assertEqual( - job.path, "/projects/{}/jobs/{}".format(self.PROJECT, self.JOB_ID) - ) - - def test__require_client_w_none(self): - client = _make_client(project=self.PROJECT) - job = self._make_one(self.JOB_ID, client) - - self.assertIs(job._require_client(None), client) - - def test__require_client_w_other(self): - client = _make_client(project=self.PROJECT) - other = object() - job = self._make_one(self.JOB_ID, client) - - self.assertIs(job._require_client(other), other) - - def test_job_type(self): - client = _make_client(project=self.PROJECT) - derived = self._make_derived(self.JOB_ID, client) - - self.assertEqual(derived.job_type, "derived") - - def test_parent_job_id(self): - client = _make_client(project=self.PROJECT) - job = self._make_one(self.JOB_ID, client) - - self.assertIsNone(job.parent_job_id) - job._properties["statistics"] = {"parentJobId": "parent-job-123"} - self.assertEqual(job.parent_job_id, "parent-job-123") - - def test_script_statistics(self): - client = _make_client(project=self.PROJECT) - job = self._make_one(self.JOB_ID, client) - - self.assertIsNone(job.script_statistics) - job._properties["statistics"] = { - "scriptStatistics": { - "evaluationKind": "EXPRESSION", - "stackFrames": [ - { - "startLine": 5, - "startColumn": 29, - "endLine": 9, - "endColumn": 14, - "text": "QUERY TEXT", - } - ], - } - } - script_stats = job.script_statistics - self.assertEqual(script_stats.evaluation_kind, "EXPRESSION") - stack_frames = script_stats.stack_frames - self.assertEqual(len(stack_frames), 1) - stack_frame = stack_frames[0] - self.assertIsNone(stack_frame.procedure_id) - self.assertEqual(stack_frame.start_line, 5) - self.assertEqual(stack_frame.start_column, 29) - self.assertEqual(stack_frame.end_line, 9) - self.assertEqual(stack_frame.end_column, 14) - self.assertEqual(stack_frame.text, "QUERY TEXT") - - def test_num_child_jobs(self): - client = _make_client(project=self.PROJECT) - job = self._make_one(self.JOB_ID, client) - - self.assertEqual(job.num_child_jobs, 0) - job._properties["statistics"] = {"numChildJobs": "17"} - self.assertEqual(job.num_child_jobs, 17) - - def test_labels_miss(self): - client = _make_client(project=self.PROJECT) - job = self._make_one(self.JOB_ID, client) - self.assertEqual(job.labels, {}) - - def test_labels_update_in_place(self): - client = _make_client(project=self.PROJECT) - job = self._make_one(self.JOB_ID, client) - labels = job.labels - labels["foo"] = "bar" # update in place - self.assertEqual(job.labels, {"foo": "bar"}) - - def test_labels_hit(self): - labels = {"foo": "bar"} - client = _make_client(project=self.PROJECT) - job = self._make_one(self.JOB_ID, client) - job._properties["labels"] = labels - self.assertEqual(job.labels, labels) - - def test_etag(self): - etag = "ETAG-123" - client = _make_client(project=self.PROJECT) - job = self._make_one(self.JOB_ID, client) - self.assertIsNone(job.etag) - job._properties["etag"] = etag - self.assertEqual(job.etag, etag) - - def test_self_link(self): - self_link = "https://api.example.com/123" - client = _make_client(project=self.PROJECT) - job = self._make_one(self.JOB_ID, client) - self.assertIsNone(job.self_link) - job._properties["selfLink"] = self_link - self.assertEqual(job.self_link, self_link) - - def test_user_email(self): - user_email = "user@example.com" - client = _make_client(project=self.PROJECT) - job = self._make_one(self.JOB_ID, client) - self.assertIsNone(job.user_email) - job._properties["user_email"] = user_email - self.assertEqual(job.user_email, user_email) - - @staticmethod - def _datetime_and_millis(): - import datetime - import pytz - from google.cloud._helpers import _millis - - now = datetime.datetime.utcnow().replace( - microsecond=123000, tzinfo=pytz.UTC # stats timestamps have ms precision - ) - return now, _millis(now) - - def test_created(self): - now, millis = self._datetime_and_millis() - client = _make_client(project=self.PROJECT) - job = self._make_one(self.JOB_ID, client) - self.assertIsNone(job.created) - stats = job._properties["statistics"] = {} - self.assertIsNone(job.created) - stats["creationTime"] = millis - self.assertEqual(job.created, now) - - def test_started(self): - now, millis = self._datetime_and_millis() - client = _make_client(project=self.PROJECT) - job = self._make_one(self.JOB_ID, client) - self.assertIsNone(job.started) - stats = job._properties["statistics"] = {} - self.assertIsNone(job.started) - stats["startTime"] = millis - self.assertEqual(job.started, now) - - def test_ended(self): - now, millis = self._datetime_and_millis() - client = _make_client(project=self.PROJECT) - job = self._make_one(self.JOB_ID, client) - self.assertIsNone(job.ended) - stats = job._properties["statistics"] = {} - self.assertIsNone(job.ended) - stats["endTime"] = millis - self.assertEqual(job.ended, now) - - def test__job_statistics(self): - statistics = {"foo": "bar"} - client = _make_client(project=self.PROJECT) - derived = self._make_derived(self.JOB_ID, client) - self.assertEqual(derived._job_statistics(), {}) - stats = derived._properties["statistics"] = {} - self.assertEqual(derived._job_statistics(), {}) - stats["derived"] = statistics - self.assertEqual(derived._job_statistics(), statistics) - - def test_error_result(self): - error_result = { - "debugInfo": "DEBUG INFO", - "location": "LOCATION", - "message": "MESSAGE", - "reason": "REASON", - } - client = _make_client(project=self.PROJECT) - job = self._make_one(self.JOB_ID, client) - self.assertIsNone(job.error_result) - status = job._properties["status"] = {} - self.assertIsNone(job.error_result) - status["errorResult"] = error_result - self.assertEqual(job.error_result, error_result) - - def test_errors(self): - errors = [ - { - "debugInfo": "DEBUG INFO", - "location": "LOCATION", - "message": "MESSAGE", - "reason": "REASON", - } - ] - client = _make_client(project=self.PROJECT) - job = self._make_one(self.JOB_ID, client) - self.assertIsNone(job.errors) - status = job._properties["status"] = {} - self.assertIsNone(job.errors) - status["errors"] = errors - self.assertEqual(job.errors, errors) - - def test_state(self): - state = "STATE" - client = _make_client(project=self.PROJECT) - job = self._make_one(self.JOB_ID, client) - self.assertIsNone(job.state) - status = job._properties["status"] = {} - self.assertIsNone(job.state) - status["state"] = state - self.assertEqual(job.state, state) - - def _set_properties_job(self): - client = _make_client(project=self.PROJECT) - job = self._make_one(self.JOB_ID, client) - job._set_future_result = mock.Mock() - job._properties = { - "jobReference": job._properties["jobReference"], - "foo": "bar", - } - return job - - def test__set_properties_no_stats(self): - config = {"test": True} - resource = {"configuration": config} - job = self._set_properties_job() - - job._set_properties(resource) - - self.assertEqual(job._properties, resource) - - def test__set_properties_w_creation_time(self): - now, millis = self._datetime_and_millis() - config = {"test": True} - stats = {"creationTime": str(millis)} - resource = {"configuration": config, "statistics": stats} - job = self._set_properties_job() - - job._set_properties(resource) - - cleaned = copy.deepcopy(resource) - cleaned["statistics"]["creationTime"] = float(millis) - self.assertEqual(job._properties, cleaned) - - def test__set_properties_w_start_time(self): - now, millis = self._datetime_and_millis() - config = {"test": True} - stats = {"startTime": str(millis)} - resource = {"configuration": config, "statistics": stats} - job = self._set_properties_job() - - job._set_properties(resource) - - cleaned = copy.deepcopy(resource) - cleaned["statistics"]["startTime"] = float(millis) - self.assertEqual(job._properties, cleaned) - - def test__set_properties_w_end_time(self): - now, millis = self._datetime_and_millis() - config = {"test": True} - stats = {"endTime": str(millis)} - resource = {"configuration": config, "statistics": stats} - job = self._set_properties_job() - - job._set_properties(resource) - - cleaned = copy.deepcopy(resource) - cleaned["statistics"]["endTime"] = float(millis) - self.assertEqual(job._properties, cleaned) - - def test__check_resource_config_missing_job_ref(self): - resource = {} - klass = self._make_derived_class() - - with self.assertRaises(KeyError): - klass._check_resource_config(resource) - - def test__check_resource_config_missing_job_id(self): - resource = {"jobReference": {}} - klass = self._make_derived_class() - - with self.assertRaises(KeyError): - klass._check_resource_config(resource) - - def test__check_resource_config_missing_configuration(self): - resource = {"jobReference": {"jobId": self.JOB_ID}} - klass = self._make_derived_class() - - with self.assertRaises(KeyError): - klass._check_resource_config(resource) - - def test__check_resource_config_missing_config_type(self): - resource = {"jobReference": {"jobId": self.JOB_ID}, "configuration": {}} - klass = self._make_derived_class() - - with self.assertRaises(KeyError): - klass._check_resource_config(resource) - - def test__check_resource_config_ok(self): - derived_config = {"foo": "bar"} - resource = { - "jobReference": {"jobId": self.JOB_ID}, - "configuration": {"derived": derived_config}, - } - klass = self._make_derived_class() - - # Should not throw. - klass._check_resource_config(resource) - - def test__build_resource(self): - client = _make_client(project=self.PROJECT) - job = self._make_one(self.JOB_ID, client) - resource = job._build_resource() - assert resource["jobReference"]["jobId"] == self.JOB_ID - - def test_to_api_repr(self): - client = _make_client(project=self.PROJECT) - job = self._make_one(self.JOB_ID, client) - resource = job.to_api_repr() - assert resource["jobReference"]["jobId"] == self.JOB_ID - - def test__begin_already(self): - job = self._set_properties_job() - job._properties["status"] = {"state": "WHATEVER"} - - with self.assertRaises(ValueError): - job._begin() - - def test__begin_defaults(self): - from google.cloud.bigquery.retry import DEFAULT_RETRY - - resource = { - "jobReference": { - "jobId": self.JOB_ID, - "projectId": self.PROJECT, - "location": None, - }, - "configuration": {"test": True}, - } - job = self._set_properties_job() - builder = job.to_api_repr = mock.Mock() - builder.return_value = resource - call_api = job._client._call_api = mock.Mock() - call_api.return_value = resource - path = "/projects/{}/jobs".format(self.PROJECT) - job._begin() - - call_api.assert_called_once_with( - DEFAULT_RETRY, - span_name="BigQuery.job.begin", - span_attributes={"path": path}, - job_ref=job, - method="POST", - path=path, - data=resource, - timeout=None, - ) - self.assertEqual(job._properties, resource) - - def test__begin_explicit(self): - from google.cloud.bigquery.retry import DEFAULT_RETRY - - other_project = "other-project-234" - resource = { - "jobReference": { - "jobId": self.JOB_ID, - "projectId": self.PROJECT, - "location": None, - }, - "configuration": {"test": True}, - } - job = self._set_properties_job() - builder = job.to_api_repr = mock.Mock() - builder.return_value = resource - client = _make_client(project=other_project) - call_api = client._call_api = mock.Mock() - call_api.return_value = resource - retry = DEFAULT_RETRY.with_deadline(1) - path = "/projects/{}/jobs".format(self.PROJECT) - job._begin(client=client, retry=retry, timeout=7.5) - - call_api.assert_called_once_with( - retry, - span_name="BigQuery.job.begin", - span_attributes={"path": path}, - job_ref=job, - method="POST", - path=path, - data=resource, - timeout=7.5, - ) - self.assertEqual(job._properties, resource) - - def test_exists_defaults_miss(self): - from google.cloud.exceptions import NotFound - from google.cloud.bigquery.retry import DEFAULT_RETRY - - job = self._set_properties_job() - job._properties["jobReference"]["location"] = self.LOCATION - call_api = job._client._call_api = mock.Mock() - call_api.side_effect = NotFound("testing") - self.assertFalse(job.exists()) - - call_api.assert_called_once_with( - DEFAULT_RETRY, - span_name="BigQuery.job.exists", - span_attributes={ - "path": "/projects/{}/jobs/{}".format(self.PROJECT, self.JOB_ID) - }, - job_ref=job, - method="GET", - path="/projects/{}/jobs/{}".format(self.PROJECT, self.JOB_ID), - query_params={"fields": "id", "location": self.LOCATION}, - timeout=None, - ) - - def test_exists_explicit_hit(self): - from google.cloud.bigquery.retry import DEFAULT_RETRY - - other_project = "other-project-234" - resource = { - "jobReference": { - "jobId": self.JOB_ID, - "projectId": self.PROJECT, - "location": None, - }, - "configuration": {"test": True}, - } - job = self._set_properties_job() - client = _make_client(project=other_project) - call_api = client._call_api = mock.Mock() - call_api.return_value = resource - retry = DEFAULT_RETRY.with_deadline(1) - self.assertTrue(job.exists(client=client, retry=retry)) - - call_api.assert_called_once_with( - retry, - span_name="BigQuery.job.exists", - span_attributes={ - "path": "/projects/{}/jobs/{}".format(self.PROJECT, self.JOB_ID) - }, - job_ref=job, - method="GET", - path="/projects/{}/jobs/{}".format(self.PROJECT, self.JOB_ID), - query_params={"fields": "id"}, - timeout=None, - ) - - def test_exists_w_timeout(self): - from google.cloud.bigquery.retry import DEFAULT_RETRY - - PATH = "/projects/{}/jobs/{}".format(self.PROJECT, self.JOB_ID) - job = self._set_properties_job() - call_api = job._client._call_api = mock.Mock() - job.exists(timeout=7.5) - - call_api.assert_called_once_with( - DEFAULT_RETRY, - span_name="BigQuery.job.exists", - span_attributes={"path": PATH}, - job_ref=job, - method="GET", - path=PATH, - query_params={"fields": "id"}, - timeout=7.5, - ) - - def test_reload_defaults(self): - from google.cloud.bigquery.retry import DEFAULT_RETRY - - resource = { - "jobReference": { - "jobId": self.JOB_ID, - "projectId": self.PROJECT, - "location": None, - }, - "configuration": {"test": True}, - } - job = self._set_properties_job() - job._properties["jobReference"]["location"] = self.LOCATION - call_api = job._client._call_api = mock.Mock() - call_api.return_value = resource - job.reload() - - call_api.assert_called_once_with( - DEFAULT_RETRY, - span_name="BigQuery.job.reload", - span_attributes={ - "path": "/projects/{}/jobs/{}".format(self.PROJECT, self.JOB_ID) - }, - job_ref=job, - method="GET", - path="/projects/{}/jobs/{}".format(self.PROJECT, self.JOB_ID), - query_params={"location": self.LOCATION}, - timeout=None, - ) - self.assertEqual(job._properties, resource) - - def test_reload_explicit(self): - from google.cloud.bigquery.retry import DEFAULT_RETRY - - other_project = "other-project-234" - resource = { - "jobReference": { - "jobId": self.JOB_ID, - "projectId": self.PROJECT, - "location": None, - }, - "configuration": {"test": True}, - } - job = self._set_properties_job() - client = _make_client(project=other_project) - call_api = client._call_api = mock.Mock() - call_api.return_value = resource - retry = DEFAULT_RETRY.with_deadline(1) - job.reload(client=client, retry=retry, timeout=4.2) - - call_api.assert_called_once_with( - retry, - span_name="BigQuery.job.reload", - span_attributes={ - "path": "/projects/{}/jobs/{}".format(self.PROJECT, self.JOB_ID) - }, - job_ref=job, - method="GET", - path="/projects/{}/jobs/{}".format(self.PROJECT, self.JOB_ID), - query_params={}, - timeout=4.2, - ) - self.assertEqual(job._properties, resource) - - def test_cancel_defaults(self): - resource = { - "jobReference": { - "jobId": self.JOB_ID, - "projectId": self.PROJECT, - "location": None, - }, - "configuration": {"test": True}, - } - response = {"job": resource} - job = self._set_properties_job() - job._properties["jobReference"]["location"] = self.LOCATION - connection = job._client._connection = _make_connection(response) - with mock.patch( - "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" - ) as final_attributes: - self.assertTrue(job.cancel()) - - final_attributes.assert_called() - - connection.api_request.assert_called_once_with( - method="POST", - path="/projects/{}/jobs/{}/cancel".format(self.PROJECT, self.JOB_ID), - query_params={"location": self.LOCATION}, - timeout=None, - ) - self.assertEqual(job._properties, resource) - - def test_cancel_explicit(self): - other_project = "other-project-234" - resource = { - "jobReference": { - "jobId": self.JOB_ID, - "projectId": self.PROJECT, - "location": None, - }, - "configuration": {"test": True}, - } - response = {"job": resource} - job = self._set_properties_job() - client = _make_client(project=other_project) - connection = client._connection = _make_connection(response) - with mock.patch( - "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" - ) as final_attributes: - self.assertTrue(job.cancel(client=client, timeout=7.5)) - - final_attributes.assert_called_with( - {"path": "/projects/{}/jobs/{}/cancel".format(self.PROJECT, self.JOB_ID)}, - client, - job, - ) - - connection.api_request.assert_called_once_with( - method="POST", - path="/projects/{}/jobs/{}/cancel".format(self.PROJECT, self.JOB_ID), - query_params={}, - timeout=7.5, - ) - self.assertEqual(job._properties, resource) - - def test_cancel_w_custom_retry(self): - from google.cloud.bigquery.retry import DEFAULT_RETRY - - api_path = "/projects/{}/jobs/{}/cancel".format(self.PROJECT, self.JOB_ID) - resource = { - "jobReference": { - "jobId": self.JOB_ID, - "projectId": self.PROJECT, - "location": None, - }, - "configuration": {"test": True}, - } - response = {"job": resource} - job = self._set_properties_job() - - api_request_patcher = mock.patch.object( - job._client._connection, "api_request", side_effect=[ValueError, response] - ) - retry = DEFAULT_RETRY.with_deadline(1).with_predicate( - lambda exc: isinstance(exc, ValueError) - ) - - with api_request_patcher as fake_api_request: - with mock.patch( - "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" - ) as final_attributes: - result = job.cancel(retry=retry, timeout=7.5) - - final_attributes.assert_called() - - self.assertTrue(result) - self.assertEqual(job._properties, resource) - self.assertEqual( - fake_api_request.call_args_list, - [ - mock.call(method="POST", path=api_path, query_params={}, timeout=7.5), - mock.call( - method="POST", path=api_path, query_params={}, timeout=7.5 - ), # was retried once - ], - ) - - def test__set_future_result_wo_done(self): - client = _make_client(project=self.PROJECT) - job = self._make_one(self.JOB_ID, client) - set_exception = job.set_exception = mock.Mock() - set_result = job.set_result = mock.Mock() - - job._set_future_result() - - set_exception.assert_not_called() - set_result.assert_not_called() - - def test__set_future_result_w_result_set(self): - client = _make_client(project=self.PROJECT) - job = self._make_one(self.JOB_ID, client) - job._properties["status"] = {"state": "DONE"} - job._result_set = True - set_exception = job.set_exception = mock.Mock() - set_result = job.set_result = mock.Mock() - - job._set_future_result() - - set_exception.assert_not_called() - set_result.assert_not_called() - - def test__set_future_result_w_done_wo_result_set_w_error(self): - from google.cloud.exceptions import NotFound - - client = _make_client(project=self.PROJECT) - job = self._make_one(self.JOB_ID, client) - job._properties["status"] = { - "state": "DONE", - "errorResult": {"reason": "notFound", "message": "testing"}, - } - set_exception = job.set_exception = mock.Mock() - set_result = job.set_result = mock.Mock() - - job._set_future_result() - - set_exception.assert_called_once() - args, kw = set_exception.call_args - (exception,) = args - self.assertIsInstance(exception, NotFound) - self.assertEqual(exception.message, "testing") - self.assertEqual(kw, {}) - set_result.assert_not_called() - - def test__set_future_result_w_done_wo_result_set_wo_error(self): - client = _make_client(project=self.PROJECT) - job = self._make_one(self.JOB_ID, client) - job._properties["status"] = {"state": "DONE"} - set_exception = job.set_exception = mock.Mock() - set_result = job.set_result = mock.Mock() - - job._set_future_result() - - set_exception.assert_not_called() - set_result.assert_called_once_with(job) - - def test_done_defaults_wo_state(self): - from google.cloud.bigquery.retry import DEFAULT_RETRY - - client = _make_client(project=self.PROJECT) - job = self._make_one(self.JOB_ID, client) - reload_ = job.reload = mock.Mock() - - self.assertFalse(job.done()) - - reload_.assert_called_once_with(retry=DEFAULT_RETRY, timeout=None) - - def test_done_explicit_wo_state(self): - from google.cloud.bigquery.retry import DEFAULT_RETRY - - client = _make_client(project=self.PROJECT) - job = self._make_one(self.JOB_ID, client) - reload_ = job.reload = mock.Mock() - retry = DEFAULT_RETRY.with_deadline(1) - - self.assertFalse(job.done(retry=retry, timeout=7.5)) - - reload_.assert_called_once_with(retry=retry, timeout=7.5) - - def test_done_already(self): - client = _make_client(project=self.PROJECT) - job = self._make_one(self.JOB_ID, client) - job._properties["status"] = {"state": "DONE"} - - self.assertTrue(job.done()) - - def test_result_default_wo_state(self): - begun_job_resource = _make_job_resource( - job_id=self.JOB_ID, project_id=self.PROJECT, started=True - ) - done_job_resource = _make_job_resource( - job_id=self.JOB_ID, project_id=self.PROJECT, started=True, ended=True - ) - conn = _make_connection( - _make_retriable_exception(), - begun_job_resource, - _make_retriable_exception(), - done_job_resource, - ) - client = _make_client(project=self.PROJECT, connection=conn) - job = self._make_one(self.JOB_ID, client) - - self.assertIs(job.result(), job) - - begin_call = mock.call( - method="POST", - path=f"/projects/{self.PROJECT}/jobs", - data={"jobReference": {"jobId": self.JOB_ID, "projectId": self.PROJECT}}, - timeout=None, - ) - reload_call = mock.call( - method="GET", - path=f"/projects/{self.PROJECT}/jobs/{self.JOB_ID}", - query_params={}, - timeout=None, - ) - conn.api_request.assert_has_calls( - [begin_call, begin_call, reload_call, reload_call] - ) - - def test_result_w_retry_wo_state(self): - begun_job_resource = _make_job_resource( - job_id=self.JOB_ID, project_id=self.PROJECT, started=True - ) - done_job_resource = _make_job_resource( - job_id=self.JOB_ID, project_id=self.PROJECT, started=True, ended=True - ) - conn = _make_connection( - exceptions.NotFound("not normally retriable"), - begun_job_resource, - # The call to done() / reload() does not get the custom retry - # policy passed to it, so we don't throw a non-retriable - # exception here. See: - # https://github.com/googleapis/python-bigquery/issues/24 - _make_retriable_exception(), - done_job_resource, - ) - client = _make_client(project=self.PROJECT, connection=conn) - job = self._make_one(self.JOB_ID, client) - custom_predicate = mock.Mock() - custom_predicate.return_value = True - custom_retry = google.api_core.retry.Retry(predicate=custom_predicate) - self.assertIs(job.result(retry=custom_retry), job) - - begin_call = mock.call( - method="POST", - path=f"/projects/{self.PROJECT}/jobs", - data={"jobReference": {"jobId": self.JOB_ID, "projectId": self.PROJECT}}, - timeout=None, - ) - reload_call = mock.call( - method="GET", - path=f"/projects/{self.PROJECT}/jobs/{self.JOB_ID}", - query_params={}, - timeout=None, - ) - conn.api_request.assert_has_calls( - [begin_call, begin_call, reload_call, reload_call] - ) - - def test_result_explicit_w_state(self): - conn = _make_connection() - client = _make_client(project=self.PROJECT, connection=conn) - job = self._make_one(self.JOB_ID, client) - # Use _set_properties() instead of directly modifying _properties so - # that the result state is set properly. - job_resource = job._properties - job_resource["status"] = {"state": "DONE"} - job._set_properties(job_resource) - timeout = 1 - - self.assertIs(job.result(timeout=timeout), job) - - conn.api_request.assert_not_called() - - def test_cancelled_wo_error_result(self): - client = _make_client(project=self.PROJECT) - job = self._make_one(self.JOB_ID, client) - - self.assertFalse(job.cancelled()) - - def test_cancelled_w_error_result_not_stopped(self): - client = _make_client(project=self.PROJECT) - job = self._make_one(self.JOB_ID, client) - job._properties["status"] = {"errorResult": {"reason": "other"}} - - self.assertFalse(job.cancelled()) - - def test_cancelled_w_error_result_w_stopped(self): - client = _make_client(project=self.PROJECT) - job = self._make_one(self.JOB_ID, client) - job._properties["status"] = {"errorResult": {"reason": "stopped"}} - - self.assertTrue(job.cancelled()) - - -class Test_JobConfig(unittest.TestCase): - JOB_TYPE = "testing" - - @staticmethod - def _get_target_class(): - from google.cloud.bigquery import job - - return job._JobConfig - - def _make_one(self, job_type=JOB_TYPE): - return self._get_target_class()(job_type) - - def test_ctor(self): - job_config = self._make_one() - self.assertEqual(job_config._job_type, self.JOB_TYPE) - self.assertEqual(job_config._properties, {self.JOB_TYPE: {}}) - - def test_fill_from_default(self): - from google.cloud.bigquery import QueryJobConfig - - job_config = QueryJobConfig() - job_config.dry_run = True - job_config.maximum_bytes_billed = 1000 - - default_job_config = QueryJobConfig() - default_job_config.use_query_cache = True - default_job_config.maximum_bytes_billed = 2000 - - final_job_config = job_config._fill_from_default(default_job_config) - self.assertTrue(final_job_config.dry_run) - self.assertTrue(final_job_config.use_query_cache) - self.assertEqual(final_job_config.maximum_bytes_billed, 1000) - - def test_fill_from_default_conflict(self): - from google.cloud.bigquery import QueryJobConfig - - basic_job_config = QueryJobConfig() - conflicting_job_config = self._make_one("conflicting_job_type") - self.assertNotEqual( - basic_job_config._job_type, conflicting_job_config._job_type - ) - - with self.assertRaises(TypeError): - basic_job_config._fill_from_default(conflicting_job_config) - - @mock.patch("google.cloud.bigquery._helpers._get_sub_prop") - def test__get_sub_prop_wo_default(self, _get_sub_prop): - job_config = self._make_one() - key = "key" - self.assertIs(job_config._get_sub_prop(key), _get_sub_prop.return_value) - _get_sub_prop.assert_called_once_with( - job_config._properties, [self.JOB_TYPE, key], default=None - ) - - @mock.patch("google.cloud.bigquery._helpers._get_sub_prop") - def test__get_sub_prop_w_default(self, _get_sub_prop): - job_config = self._make_one() - key = "key" - default = "default" - self.assertIs( - job_config._get_sub_prop(key, default=default), _get_sub_prop.return_value - ) - _get_sub_prop.assert_called_once_with( - job_config._properties, [self.JOB_TYPE, key], default=default - ) - - @mock.patch("google.cloud.bigquery._helpers._set_sub_prop") - def test__set_sub_prop(self, _set_sub_prop): - job_config = self._make_one() - key = "key" - value = "value" - job_config._set_sub_prop(key, value) - _set_sub_prop.assert_called_once_with( - job_config._properties, [self.JOB_TYPE, key], value - ) - - def test_to_api_repr(self): - job_config = self._make_one() - expected = job_config._properties = {self.JOB_TYPE: {"foo": "bar"}} - found = job_config.to_api_repr() - self.assertEqual(found, expected) - self.assertIsNot(found, expected) # copied - - # 'from_api_repr' cannot be tested on '_JobConfig', because it presumes - # the ctor can be called w/o arguments - - def test_labels_miss(self): - job_config = self._make_one() - self.assertEqual(job_config.labels, {}) - - def test_labels_update_in_place(self): - job_config = self._make_one() - labels = job_config.labels - labels["foo"] = "bar" # update in place - self.assertEqual(job_config.labels, {"foo": "bar"}) - - def test_labels_hit(self): - labels = {"foo": "bar"} - job_config = self._make_one() - job_config._properties["labels"] = labels - self.assertEqual(job_config.labels, labels) - - def test_labels_setter_invalid(self): - labels = object() - job_config = self._make_one() - with self.assertRaises(ValueError): - job_config.labels = labels - - def test_labels_setter(self): - labels = {"foo": "bar"} - job_config = self._make_one() - job_config.labels = labels - self.assertEqual(job_config._properties["labels"], labels) - - -class _Base(object): - from google.cloud.bigquery.dataset import DatasetReference - from google.cloud.bigquery.table import TableReference - - ENDPOINT = "https://bigquery.googleapis.com" - PROJECT = "project" - SOURCE1 = "http://example.com/source1.csv" - DS_ID = "dataset_id" - DS_REF = DatasetReference(PROJECT, DS_ID) - TABLE_ID = "table_id" - TABLE_REF = TableReference(DS_REF, TABLE_ID) - JOB_ID = "JOB_ID" - KMS_KEY_NAME = "projects/1/locations/us/keyRings/1/cryptoKeys/1" - - def _make_one(self, *args, **kw): - return self._get_target_class()(*args, **kw) - - def _setUpConstants(self): - import datetime - from google.cloud._helpers import UTC - - self.WHEN_TS = 1437767599.006 - self.WHEN = datetime.datetime.utcfromtimestamp(self.WHEN_TS).replace(tzinfo=UTC) - self.ETAG = "ETAG" - self.FULL_JOB_ID = "%s:%s" % (self.PROJECT, self.JOB_ID) - self.RESOURCE_URL = "{}/bigquery/v2/projects/{}/jobs/{}".format( - self.ENDPOINT, self.PROJECT, self.JOB_ID - ) - self.USER_EMAIL = "phred@example.com" - - def _table_ref(self, table_id): - from google.cloud.bigquery.table import TableReference - - return TableReference(self.DS_REF, table_id) - - def _make_resource(self, started=False, ended=False): - self._setUpConstants() - return _make_job_resource( - creation_time_ms=int(self.WHEN_TS * 1000), - started_time_ms=int(self.WHEN_TS * 1000), - ended_time_ms=int(self.WHEN_TS * 1000) + 1000000, - started=started, - ended=ended, - etag=self.ETAG, - endpoint=self.ENDPOINT, - job_type=self.JOB_TYPE, - job_id=self.JOB_ID, - project_id=self.PROJECT, - user_email=self.USER_EMAIL, - ) - - def _verifyInitialReadonlyProperties(self, job): - # root elements of resource - self.assertIsNone(job.etag) - self.assertIsNone(job.self_link) - self.assertIsNone(job.user_email) - - # derived from resource['statistics'] - self.assertIsNone(job.created) - self.assertIsNone(job.started) - self.assertIsNone(job.ended) - - # derived from resource['status'] - self.assertIsNone(job.error_result) - self.assertIsNone(job.errors) - self.assertIsNone(job.state) - - def _verifyReadonlyResourceProperties(self, job, resource): - from datetime import timedelta - - statistics = resource.get("statistics", {}) - - if "creationTime" in statistics: - self.assertEqual(job.created, self.WHEN) - else: - self.assertIsNone(job.created) - - if "startTime" in statistics: - self.assertEqual(job.started, self.WHEN) - else: - self.assertIsNone(job.started) - - if "endTime" in statistics: - self.assertEqual(job.ended, self.WHEN + timedelta(seconds=1000)) - else: - self.assertIsNone(job.ended) - - if "etag" in resource: - self.assertEqual(job.etag, self.ETAG) - else: - self.assertIsNone(job.etag) - - if "selfLink" in resource: - self.assertEqual(job.self_link, self.RESOURCE_URL) - else: - self.assertIsNone(job.self_link) - - if "user_email" in resource: - self.assertEqual(job.user_email, self.USER_EMAIL) - else: - self.assertIsNone(job.user_email) - - -class TestLoadJobConfig(unittest.TestCase, _Base): - JOB_TYPE = "load" - - @staticmethod - def _get_target_class(): - from google.cloud.bigquery.job import LoadJobConfig - - return LoadJobConfig - - def test_ctor_w_properties(self): - config = self._get_target_class()( - allow_jagged_rows=True, allow_quoted_newlines=True - ) - - self.assertTrue(config.allow_jagged_rows) - self.assertTrue(config.allow_quoted_newlines) - - def test_allow_jagged_rows_missing(self): - config = self._get_target_class()() - self.assertIsNone(config.allow_jagged_rows) - - def test_allow_jagged_rows_hit(self): - config = self._get_target_class()() - config._properties["load"]["allowJaggedRows"] = True - self.assertTrue(config.allow_jagged_rows) - - def test_allow_jagged_rows_setter(self): - config = self._get_target_class()() - config.allow_jagged_rows = True - self.assertTrue(config._properties["load"]["allowJaggedRows"]) - - def test_allow_quoted_newlines_missing(self): - config = self._get_target_class()() - self.assertIsNone(config.allow_quoted_newlines) - - def test_allow_quoted_newlines_hit(self): - config = self._get_target_class()() - config._properties["load"]["allowQuotedNewlines"] = True - self.assertTrue(config.allow_quoted_newlines) - - def test_allow_quoted_newlines_setter(self): - config = self._get_target_class()() - config.allow_quoted_newlines = True - self.assertTrue(config._properties["load"]["allowQuotedNewlines"]) - - def test_autodetect_missing(self): - config = self._get_target_class()() - self.assertIsNone(config.autodetect) - - def test_autodetect_hit(self): - config = self._get_target_class()() - config._properties["load"]["autodetect"] = True - self.assertTrue(config.autodetect) - - def test_autodetect_setter(self): - config = self._get_target_class()() - config.autodetect = True - self.assertTrue(config._properties["load"]["autodetect"]) - - def test_clustering_fields_miss(self): - config = self._get_target_class()() - self.assertIsNone(config.clustering_fields) - - def test_clustering_fields_hit(self): - config = self._get_target_class()() - fields = ["email", "postal_code"] - config._properties["load"]["clustering"] = {"fields": fields} - self.assertEqual(config.clustering_fields, fields) - - def test_clustering_fields_setter(self): - fields = ["email", "postal_code"] - config = self._get_target_class()() - config.clustering_fields = fields - self.assertEqual(config._properties["load"]["clustering"], {"fields": fields}) - - def test_clustering_fields_setter_w_none(self): - config = self._get_target_class()() - fields = ["email", "postal_code"] - config._properties["load"]["clustering"] = {"fields": fields} - config.clustering_fields = None - self.assertIsNone(config.clustering_fields) - self.assertNotIn("clustering", config._properties["load"]) - - def test_create_disposition_missing(self): - config = self._get_target_class()() - self.assertIsNone(config.create_disposition) - - def test_create_disposition_hit(self): - from google.cloud.bigquery.job import CreateDisposition - - disposition = CreateDisposition.CREATE_IF_NEEDED - config = self._get_target_class()() - config._properties["load"]["createDisposition"] = disposition - self.assertEqual(config.create_disposition, disposition) - - def test_create_disposition_setter(self): - from google.cloud.bigquery.job import CreateDisposition - - disposition = CreateDisposition.CREATE_IF_NEEDED - config = self._get_target_class()() - config.create_disposition = disposition - self.assertEqual(config._properties["load"]["createDisposition"], disposition) - - def test_destination_encryption_configuration_missing(self): - config = self._get_target_class()() - self.assertIsNone(config.destination_encryption_configuration) - - def test_destination_encryption_configuration_hit(self): - from google.cloud.bigquery.encryption_configuration import ( - EncryptionConfiguration, - ) - - kms_key_name = "kms-key-name" - encryption_configuration = EncryptionConfiguration(kms_key_name) - config = self._get_target_class()() - config._properties["load"]["destinationEncryptionConfiguration"] = { - "kmsKeyName": kms_key_name - } - self.assertEqual( - config.destination_encryption_configuration, encryption_configuration - ) - - def test_destination_encryption_configuration_setter(self): - from google.cloud.bigquery.encryption_configuration import ( - EncryptionConfiguration, - ) - - kms_key_name = "kms-key-name" - encryption_configuration = EncryptionConfiguration(kms_key_name) - config = self._get_target_class()() - config.destination_encryption_configuration = encryption_configuration - expected = {"kmsKeyName": kms_key_name} - self.assertEqual( - config._properties["load"]["destinationEncryptionConfiguration"], expected - ) - - def test_destination_encryption_configuration_setter_w_none(self): - kms_key_name = "kms-key-name" - config = self._get_target_class()() - config._properties["load"]["destinationEncryptionConfiguration"] = { - "kmsKeyName": kms_key_name - } - config.destination_encryption_configuration = None - self.assertIsNone(config.destination_encryption_configuration) - self.assertNotIn( - "destinationEncryptionConfiguration", config._properties["load"] - ) - - def test_destination_table_description_missing(self): - config = self._get_target_class()() - self.assertIsNone(config.destination_table_description) - - def test_destination_table_description_hit(self): - description = "Description" - config = self._get_target_class()() - config._properties["load"]["destinationTableProperties"] = { - "description": description - } - self.assertEqual(config.destination_table_description, description) - - def test_destination_table_description_setter(self): - description = "Description" - config = self._get_target_class()() - config.destination_table_description = description - expected = {"description": description} - self.assertEqual( - config._properties["load"]["destinationTableProperties"], expected - ) - - def test_destination_table_description_setter_w_fn_already(self): - description = "Description" - friendly_name = "Friendly Name" - config = self._get_target_class()() - config._properties["load"]["destinationTableProperties"] = { - "friendlyName": friendly_name - } - config.destination_table_description = description - expected = {"friendlyName": friendly_name, "description": description} - self.assertEqual( - config._properties["load"]["destinationTableProperties"], expected - ) - - def test_destination_table_description_w_none(self): - description = "Description" - friendly_name = "Friendly Name" - config = self._get_target_class()() - config._properties["load"]["destinationTableProperties"] = { - "description": description, - "friendlyName": friendly_name, - } - config.destination_table_description = None - expected = {"friendlyName": friendly_name} - self.assertEqual( - config._properties["load"]["destinationTableProperties"], expected - ) - - def test_destination_table_friendly_name_missing(self): - config = self._get_target_class()() - self.assertIsNone(config.destination_table_friendly_name) - - def test_destination_table_friendly_name_hit(self): - friendly_name = "Friendly Name" - config = self._get_target_class()() - config._properties["load"]["destinationTableProperties"] = { - "friendlyName": friendly_name - } - self.assertEqual(config.destination_table_friendly_name, friendly_name) - - def test_destination_table_friendly_name_setter(self): - friendly_name = "Friendly Name" - config = self._get_target_class()() - config.destination_table_friendly_name = friendly_name - expected = {"friendlyName": friendly_name} - self.assertEqual( - config._properties["load"]["destinationTableProperties"], expected - ) - - def test_destination_table_friendly_name_setter_w_descr_already(self): - friendly_name = "Friendly Name" - description = "Description" - config = self._get_target_class()() - config._properties["load"]["destinationTableProperties"] = { - "description": description - } - config.destination_table_friendly_name = friendly_name - expected = {"friendlyName": friendly_name, "description": description} - self.assertEqual( - config._properties["load"]["destinationTableProperties"], expected - ) - - def test_destination_table_friendly_name_w_none(self): - friendly_name = "Friendly Name" - description = "Description" - config = self._get_target_class()() - config._properties["load"]["destinationTableProperties"] = { - "description": description, - "friendlyName": friendly_name, - } - config.destination_table_friendly_name = None - expected = {"description": description} - self.assertEqual( - config._properties["load"]["destinationTableProperties"], expected - ) - - def test_encoding_missing(self): - config = self._get_target_class()() - self.assertIsNone(config.encoding) - - def test_encoding_hit(self): - from google.cloud.bigquery.job import Encoding - - encoding = Encoding.UTF_8 - config = self._get_target_class()() - config._properties["load"]["encoding"] = encoding - self.assertEqual(config.encoding, encoding) - - def test_encoding_setter(self): - from google.cloud.bigquery.job import Encoding - - encoding = Encoding.UTF_8 - config = self._get_target_class()() - config.encoding = encoding - self.assertEqual(config._properties["load"]["encoding"], encoding) - - def test_field_delimiter_missing(self): - config = self._get_target_class()() - self.assertIsNone(config.field_delimiter) - - def test_field_delimiter_hit(self): - field_delimiter = "|" - config = self._get_target_class()() - config._properties["load"]["fieldDelimiter"] = field_delimiter - self.assertEqual(config.field_delimiter, field_delimiter) - - def test_field_delimiter_setter(self): - field_delimiter = "|" - config = self._get_target_class()() - config.field_delimiter = field_delimiter - self.assertEqual(config._properties["load"]["fieldDelimiter"], field_delimiter) - - def test_hive_partitioning_missing(self): - config = self._get_target_class()() - self.assertIsNone(config.hive_partitioning) - - def test_hive_partitioning_hit(self): - from google.cloud.bigquery.external_config import HivePartitioningOptions - - config = self._get_target_class()() - config._properties["load"]["hivePartitioningOptions"] = { - "sourceUriPrefix": "http://foo/bar", - "mode": "STRINGS", - } - result = config.hive_partitioning - self.assertIsInstance(result, HivePartitioningOptions) - self.assertEqual(result.source_uri_prefix, "http://foo/bar") - self.assertEqual(result.mode, "STRINGS") - - def test_hive_partitioning_setter(self): - from google.cloud.bigquery.external_config import HivePartitioningOptions - - hive_partitioning = HivePartitioningOptions() - hive_partitioning.source_uri_prefix = "http://foo/bar" - hive_partitioning.mode = "AUTO" - - config = self._get_target_class()() - config.hive_partitioning = hive_partitioning - self.assertEqual( - config._properties["load"]["hivePartitioningOptions"], - {"sourceUriPrefix": "http://foo/bar", "mode": "AUTO"}, - ) - - config.hive_partitioning = None - self.assertIsNone(config._properties["load"]["hivePartitioningOptions"]) - - def test_hive_partitioning_invalid_type(self): - config = self._get_target_class()() - - with self.assertRaises(TypeError): - config.hive_partitioning = {"mode": "AUTO"} - - def test_ignore_unknown_values_missing(self): - config = self._get_target_class()() - self.assertIsNone(config.ignore_unknown_values) - - def test_ignore_unknown_values_hit(self): - config = self._get_target_class()() - config._properties["load"]["ignoreUnknownValues"] = True - self.assertTrue(config.ignore_unknown_values) - - def test_ignore_unknown_values_setter(self): - config = self._get_target_class()() - config.ignore_unknown_values = True - self.assertTrue(config._properties["load"]["ignoreUnknownValues"]) - - def test_max_bad_records_missing(self): - config = self._get_target_class()() - self.assertIsNone(config.max_bad_records) - - def test_max_bad_records_hit(self): - max_bad_records = 13 - config = self._get_target_class()() - config._properties["load"]["maxBadRecords"] = max_bad_records - self.assertEqual(config.max_bad_records, max_bad_records) - - def test_max_bad_records_setter(self): - max_bad_records = 13 - config = self._get_target_class()() - config.max_bad_records = max_bad_records - self.assertEqual(config._properties["load"]["maxBadRecords"], max_bad_records) - - def test_null_marker_missing(self): - config = self._get_target_class()() - self.assertIsNone(config.null_marker) - - def test_null_marker_hit(self): - null_marker = "XXX" - config = self._get_target_class()() - config._properties["load"]["nullMarker"] = null_marker - self.assertEqual(config.null_marker, null_marker) - - def test_null_marker_setter(self): - null_marker = "XXX" - config = self._get_target_class()() - config.null_marker = null_marker - self.assertEqual(config._properties["load"]["nullMarker"], null_marker) - - def test_quote_character_missing(self): - config = self._get_target_class()() - self.assertIsNone(config.quote_character) - - def test_quote_character_hit(self): - quote_character = "'" - config = self._get_target_class()() - config._properties["load"]["quote"] = quote_character - self.assertEqual(config.quote_character, quote_character) - - def test_quote_character_setter(self): - quote_character = "'" - config = self._get_target_class()() - config.quote_character = quote_character - self.assertEqual(config._properties["load"]["quote"], quote_character) - - def test_schema_missing(self): - config = self._get_target_class()() - self.assertIsNone(config.schema) - - def test_schema_hit(self): - from google.cloud.bigquery.schema import SchemaField - - config = self._get_target_class()() - all_props_repr = { - "mode": "REQUIRED", - "name": "foo", - "type": "INTEGER", - "description": "Foo", - } - minimal_repr = {"name": "bar", "type": "STRING"} - config._properties["load"]["schema"] = { - "fields": [all_props_repr, minimal_repr] - } - all_props, minimal = config.schema - self.assertEqual(all_props, SchemaField.from_api_repr(all_props_repr)) - self.assertEqual(minimal, SchemaField.from_api_repr(minimal_repr)) - - def test_schema_setter_fields(self): - from google.cloud.bigquery.schema import SchemaField - - config = self._get_target_class()() - full_name = SchemaField("full_name", "STRING", mode="REQUIRED") - age = SchemaField("age", "INTEGER", mode="REQUIRED") - config.schema = [full_name, age] - full_name_repr = { - "name": "full_name", - "type": "STRING", - "mode": "REQUIRED", - "description": None, - } - age_repr = { - "name": "age", - "type": "INTEGER", - "mode": "REQUIRED", - "description": None, - } - self.assertEqual( - config._properties["load"]["schema"], {"fields": [full_name_repr, age_repr]} - ) - - def test_schema_setter_valid_mappings_list(self): - config = self._get_target_class()() - - schema = [ - {"name": "full_name", "type": "STRING", "mode": "REQUIRED"}, - {"name": "age", "type": "INTEGER", "mode": "REQUIRED"}, - ] - config.schema = schema - - full_name_repr = { - "name": "full_name", - "type": "STRING", - "mode": "REQUIRED", - "description": None, - } - age_repr = { - "name": "age", - "type": "INTEGER", - "mode": "REQUIRED", - "description": None, - } - self.assertEqual( - config._properties["load"]["schema"], {"fields": [full_name_repr, age_repr]} - ) - - def test_schema_setter_invalid_mappings_list(self): - config = self._get_target_class()() - - schema = [ - {"name": "full_name", "type": "STRING", "mode": "REQUIRED"}, - {"name": "age", "typeoo": "INTEGER", "mode": "REQUIRED"}, - ] - - with self.assertRaises(Exception): - config.schema = schema - - def test_schema_setter_unsetting_schema(self): - from google.cloud.bigquery.schema import SchemaField - - config = self._get_target_class()() - config._properties["load"]["schema"] = [ - SchemaField("full_name", "STRING", mode="REQUIRED"), - SchemaField("age", "INTEGER", mode="REQUIRED"), - ] - - config.schema = None - self.assertNotIn("schema", config._properties["load"]) - config.schema = None # no error, idempotent operation - - def test_schema_update_options_missing(self): - config = self._get_target_class()() - self.assertIsNone(config.schema_update_options) - - def test_schema_update_options_hit(self): - from google.cloud.bigquery.job import SchemaUpdateOption - - options = [ - SchemaUpdateOption.ALLOW_FIELD_ADDITION, - SchemaUpdateOption.ALLOW_FIELD_RELAXATION, - ] - config = self._get_target_class()() - config._properties["load"]["schemaUpdateOptions"] = options - self.assertEqual(config.schema_update_options, options) - - def test_schema_update_options_setter(self): - from google.cloud.bigquery.job import SchemaUpdateOption - - options = [ - SchemaUpdateOption.ALLOW_FIELD_ADDITION, - SchemaUpdateOption.ALLOW_FIELD_RELAXATION, - ] - config = self._get_target_class()() - config.schema_update_options = options - self.assertEqual(config._properties["load"]["schemaUpdateOptions"], options) - - def test_skip_leading_rows_missing(self): - config = self._get_target_class()() - self.assertIsNone(config.skip_leading_rows) - - def test_skip_leading_rows_hit_w_str(self): - skip_leading_rows = 1 - config = self._get_target_class()() - config._properties["load"]["skipLeadingRows"] = str(skip_leading_rows) - self.assertEqual(config.skip_leading_rows, skip_leading_rows) - - def test_skip_leading_rows_hit_w_integer(self): - skip_leading_rows = 1 - config = self._get_target_class()() - config._properties["load"]["skipLeadingRows"] = skip_leading_rows - self.assertEqual(config.skip_leading_rows, skip_leading_rows) - - def test_skip_leading_rows_setter(self): - skip_leading_rows = 1 - config = self._get_target_class()() - config.skip_leading_rows = skip_leading_rows - self.assertEqual( - config._properties["load"]["skipLeadingRows"], str(skip_leading_rows) - ) - - def test_source_format_missing(self): - config = self._get_target_class()() - self.assertIsNone(config.source_format) - - def test_source_format_hit(self): - from google.cloud.bigquery.job import SourceFormat - - source_format = SourceFormat.CSV - config = self._get_target_class()() - config._properties["load"]["sourceFormat"] = source_format - self.assertEqual(config.source_format, source_format) - - def test_source_format_setter(self): - from google.cloud.bigquery.job import SourceFormat - - source_format = SourceFormat.CSV - config = self._get_target_class()() - config.source_format = source_format - self.assertEqual(config._properties["load"]["sourceFormat"], source_format) - - def test_range_partitioning_w_none(self): - object_under_test = self._get_target_class()() - assert object_under_test.range_partitioning is None - - def test_range_partitioning_w_value(self): - object_under_test = self._get_target_class()() - object_under_test._properties["load"]["rangePartitioning"] = { - "field": "column_one", - "range": {"start": 1, "end": 1000, "interval": 10}, - } - object_under_test.range_partitioning.field == "column_one" - object_under_test.range_partitioning.range_.start == 1 - object_under_test.range_partitioning.range_.end == 1000 - object_under_test.range_partitioning.range_.interval == 10 - - def test_range_partitioning_setter(self): - from google.cloud.bigquery.table import PartitionRange - from google.cloud.bigquery.table import RangePartitioning - - object_under_test = self._get_target_class()() - object_under_test.range_partitioning = RangePartitioning( - field="column_one", range_=PartitionRange(start=1, end=1000, interval=10) - ) - object_under_test.range_partitioning.field == "column_one" - object_under_test.range_partitioning.range_.start == 1 - object_under_test.range_partitioning.range_.end == 1000 - object_under_test.range_partitioning.range_.interval == 10 - - def test_range_partitioning_setter_w_none(self): - object_under_test = self._get_target_class()() - object_under_test.range_partitioning = None - assert object_under_test.range_partitioning is None - - def test_range_partitioning_setter_w_wrong_type(self): - object_under_test = self._get_target_class()() - with pytest.raises(ValueError, match="RangePartitioning"): - object_under_test.range_partitioning = object() - - def test_time_partitioning_miss(self): - config = self._get_target_class()() - self.assertIsNone(config.time_partitioning) - - def test_time_partitioning_hit(self): - from google.cloud.bigquery.table import TimePartitioning - from google.cloud.bigquery.table import TimePartitioningType - - field = "creation_date" - year_ms = 86400 * 1000 * 365 - config = self._get_target_class()() - config._properties["load"]["timePartitioning"] = { - "type": TimePartitioningType.DAY, - "field": field, - "expirationMs": str(year_ms), - "requirePartitionFilter": False, - } - with warnings.catch_warnings(record=True) as warned: - expected = TimePartitioning( - type_=TimePartitioningType.DAY, - field=field, - expiration_ms=year_ms, - require_partition_filter=False, - ) - self.assertEqual(config.time_partitioning, expected) - - assert len(warned) == 1 - warning = warned[0] - assert "TimePartitioning.require_partition_filter" in str(warning) - - def test_time_partitioning_setter(self): - from google.cloud.bigquery.table import TimePartitioning - from google.cloud.bigquery.table import TimePartitioningType - - field = "creation_date" - year_ms = 86400 * 1000 * 365 - - with warnings.catch_warnings(record=True) as warned: - time_partitioning = TimePartitioning( - type_=TimePartitioningType.DAY, - field=field, - expiration_ms=year_ms, - require_partition_filter=False, - ) - - config = self._get_target_class()() - config.time_partitioning = time_partitioning - expected = { - "type": TimePartitioningType.DAY, - "field": field, - "expirationMs": str(year_ms), - "requirePartitionFilter": False, - } - self.assertEqual(config._properties["load"]["timePartitioning"], expected) - - assert len(warned) == 1 - warning = warned[0] - assert "TimePartitioning.require_partition_filter" in str(warning) - - def test_time_partitioning_setter_w_none(self): - from google.cloud.bigquery.table import TimePartitioningType - - field = "creation_date" - year_ms = 86400 * 1000 * 365 - config = self._get_target_class()() - config._properties["load"]["timePartitioning"] = { - "type": TimePartitioningType.DAY, - "field": field, - "expirationMs": str(year_ms), - "requirePartitionFilter": False, - } - config.time_partitioning = None - self.assertIsNone(config.time_partitioning) - self.assertNotIn("timePartitioning", config._properties["load"]) - - def test_use_avro_logical_types(self): - config = self._get_target_class()() - self.assertIsNone(config.use_avro_logical_types) - - def test_use_avro_logical_types_setter(self): - config = self._get_target_class()() - config.use_avro_logical_types = True - self.assertTrue(config._properties["load"]["useAvroLogicalTypes"]) - - def test_write_disposition_missing(self): - config = self._get_target_class()() - self.assertIsNone(config.write_disposition) - - def test_write_disposition_hit(self): - from google.cloud.bigquery.job import WriteDisposition - - write_disposition = WriteDisposition.WRITE_TRUNCATE - config = self._get_target_class()() - config._properties["load"]["writeDisposition"] = write_disposition - self.assertEqual(config.write_disposition, write_disposition) - - def test_write_disposition_setter(self): - from google.cloud.bigquery.job import WriteDisposition - - write_disposition = WriteDisposition.WRITE_TRUNCATE - config = self._get_target_class()() - config.write_disposition = write_disposition - self.assertEqual( - config._properties["load"]["writeDisposition"], write_disposition - ) - - -class TestLoadJob(unittest.TestCase, _Base): - JOB_TYPE = "load" - - @staticmethod - def _get_target_class(): - from google.cloud.bigquery.job import LoadJob - - return LoadJob - - def _setUpConstants(self): - super(TestLoadJob, self)._setUpConstants() - self.INPUT_FILES = 2 - self.INPUT_BYTES = 12345 - self.OUTPUT_BYTES = 23456 - self.OUTPUT_ROWS = 345 - - def _make_resource(self, started=False, ended=False): - resource = super(TestLoadJob, self)._make_resource(started, ended) - config = resource["configuration"]["load"] - config["sourceUris"] = [self.SOURCE1] - config["destinationTable"] = { - "projectId": self.PROJECT, - "datasetId": self.DS_ID, - "tableId": self.TABLE_ID, - } - - if ended: - resource["status"] = {"state": "DONE"} - resource["statistics"]["load"]["inputFiles"] = self.INPUT_FILES - resource["statistics"]["load"]["inputFileBytes"] = self.INPUT_BYTES - resource["statistics"]["load"]["outputBytes"] = self.OUTPUT_BYTES - resource["statistics"]["load"]["outputRows"] = self.OUTPUT_ROWS - - return resource - - def _verifyBooleanConfigProperties(self, job, config): - if "allowJaggedRows" in config: - self.assertEqual(job.allow_jagged_rows, config["allowJaggedRows"]) - else: - self.assertIsNone(job.allow_jagged_rows) - if "allowQuotedNewlines" in config: - self.assertEqual(job.allow_quoted_newlines, config["allowQuotedNewlines"]) - else: - self.assertIsNone(job.allow_quoted_newlines) - if "autodetect" in config: - self.assertEqual(job.autodetect, config["autodetect"]) - else: - self.assertIsNone(job.autodetect) - if "ignoreUnknownValues" in config: - self.assertEqual(job.ignore_unknown_values, config["ignoreUnknownValues"]) - else: - self.assertIsNone(job.ignore_unknown_values) - if "useAvroLogicalTypes" in config: - self.assertEqual(job.use_avro_logical_types, config["useAvroLogicalTypes"]) - else: - self.assertIsNone(job.use_avro_logical_types) - - def _verifyEnumConfigProperties(self, job, config): - if "createDisposition" in config: - self.assertEqual(job.create_disposition, config["createDisposition"]) - else: - self.assertIsNone(job.create_disposition) - if "encoding" in config: - self.assertEqual(job.encoding, config["encoding"]) - else: - self.assertIsNone(job.encoding) - if "sourceFormat" in config: - self.assertEqual(job.source_format, config["sourceFormat"]) - else: - self.assertIsNone(job.source_format) - if "writeDisposition" in config: - self.assertEqual(job.write_disposition, config["writeDisposition"]) - else: - self.assertIsNone(job.write_disposition) - if "schemaUpdateOptions" in config: - self.assertEqual(job.schema_update_options, config["schemaUpdateOptions"]) - else: - self.assertIsNone(job.schema_update_options) - - def _verifyResourceProperties(self, job, resource): - self._verifyReadonlyResourceProperties(job, resource) - - config = resource.get("configuration", {}).get("load") - - self._verifyBooleanConfigProperties(job, config) - self._verifyEnumConfigProperties(job, config) - - self.assertEqual(job.source_uris, config["sourceUris"]) - - table_ref = config["destinationTable"] - self.assertEqual(job.destination.project, table_ref["projectId"]) - self.assertEqual(job.destination.dataset_id, table_ref["datasetId"]) - self.assertEqual(job.destination.table_id, table_ref["tableId"]) - - if "fieldDelimiter" in config: - self.assertEqual(job.field_delimiter, config["fieldDelimiter"]) - else: - self.assertIsNone(job.field_delimiter) - if "maxBadRecords" in config: - self.assertEqual(job.max_bad_records, config["maxBadRecords"]) - else: - self.assertIsNone(job.max_bad_records) - if "nullMarker" in config: - self.assertEqual(job.null_marker, config["nullMarker"]) - else: - self.assertIsNone(job.null_marker) - if "quote" in config: - self.assertEqual(job.quote_character, config["quote"]) - else: - self.assertIsNone(job.quote_character) - if "skipLeadingRows" in config: - self.assertEqual(str(job.skip_leading_rows), config["skipLeadingRows"]) - else: - self.assertIsNone(job.skip_leading_rows) - - if "destinationEncryptionConfiguration" in config: - self.assertIsNotNone(job.destination_encryption_configuration) - self.assertEqual( - job.destination_encryption_configuration.kms_key_name, - config["destinationEncryptionConfiguration"]["kmsKeyName"], - ) - else: - self.assertIsNone(job.destination_encryption_configuration) - - def test_ctor(self): - client = _make_client(project=self.PROJECT) - job = self._make_one(self.JOB_ID, [self.SOURCE1], self.TABLE_REF, client) - self.assertEqual(job.destination, self.TABLE_REF) - self.assertEqual(list(job.source_uris), [self.SOURCE1]) - self.assertIs(job._client, client) - self.assertEqual(job.job_type, self.JOB_TYPE) - self.assertEqual(job.path, "/projects/%s/jobs/%s" % (self.PROJECT, self.JOB_ID)) - - self._verifyInitialReadonlyProperties(job) - - # derived from resource['statistics']['load'] - self.assertIsNone(job.input_file_bytes) - self.assertIsNone(job.input_files) - self.assertIsNone(job.output_bytes) - self.assertIsNone(job.output_rows) - - # set/read from resource['configuration']['load'] - self.assertIsNone(job.schema) - self.assertIsNone(job.allow_jagged_rows) - self.assertIsNone(job.allow_quoted_newlines) - self.assertIsNone(job.autodetect) - self.assertIsNone(job.create_disposition) - self.assertIsNone(job.encoding) - self.assertIsNone(job.field_delimiter) - self.assertIsNone(job.ignore_unknown_values) - self.assertIsNone(job.max_bad_records) - self.assertIsNone(job.null_marker) - self.assertIsNone(job.quote_character) - self.assertIsNone(job.skip_leading_rows) - self.assertIsNone(job.source_format) - self.assertIsNone(job.write_disposition) - self.assertIsNone(job.destination_encryption_configuration) - self.assertIsNone(job.destination_table_description) - self.assertIsNone(job.destination_table_friendly_name) - self.assertIsNone(job.range_partitioning) - self.assertIsNone(job.time_partitioning) - self.assertIsNone(job.use_avro_logical_types) - self.assertIsNone(job.clustering_fields) - self.assertIsNone(job.schema_update_options) - - def test_ctor_w_config(self): - from google.cloud.bigquery.schema import SchemaField - from google.cloud.bigquery.job import LoadJobConfig - - client = _make_client(project=self.PROJECT) - full_name = SchemaField("full_name", "STRING", mode="REQUIRED") - age = SchemaField("age", "INTEGER", mode="REQUIRED") - config = LoadJobConfig() - config.schema = [full_name, age] - job = self._make_one( - self.JOB_ID, [self.SOURCE1], self.TABLE_REF, client, config - ) - self.assertEqual(job.schema, [full_name, age]) - config.destination_table_description = "Description" - expected = {"description": "Description"} - self.assertEqual( - config._properties["load"]["destinationTableProperties"], expected - ) - friendly_name = "Friendly Name" - config._properties["load"]["destinationTableProperties"] = { - "friendlyName": friendly_name - } - self.assertEqual(config.destination_table_friendly_name, friendly_name) - - def test_ctor_w_job_reference(self): - from google.cloud.bigquery import job - - client = _make_client(project=self.PROJECT) - job_ref = job._JobReference(self.JOB_ID, "alternative-project", "US") - load_job = self._make_one(job_ref, [self.SOURCE1], self.TABLE_REF, client) - self.assertEqual(load_job.project, "alternative-project") - self.assertEqual(load_job.location, "US") - - def test_done(self): - client = _make_client(project=self.PROJECT) - resource = self._make_resource(ended=True) - job = self._get_target_class().from_api_repr(resource, client) - self.assertTrue(job.done()) - - def test_result(self): - client = _make_client(project=self.PROJECT) - resource = self._make_resource(ended=True) - job = self._get_target_class().from_api_repr(resource, client) - - result = job.result() - - self.assertIs(result, job) - - def test_result_invokes_begin(self): - begun_resource = self._make_resource() - done_resource = copy.deepcopy(begun_resource) - done_resource["status"] = {"state": "DONE"} - connection = _make_connection(begun_resource, done_resource) - client = _make_client(self.PROJECT) - client._connection = connection - - job = self._make_one(self.JOB_ID, [self.SOURCE1], self.TABLE_REF, client) - job.result() - - self.assertEqual(len(connection.api_request.call_args_list), 2) - begin_request, reload_request = connection.api_request.call_args_list - self.assertEqual(begin_request[1]["method"], "POST") - self.assertEqual(reload_request[1]["method"], "GET") - - def test_schema_setter_non_list(self): - from google.cloud.bigquery.job import LoadJobConfig - - config = LoadJobConfig() - with self.assertRaises(TypeError): - config.schema = object() - - def test_schema_setter_invalid_field(self): - from google.cloud.bigquery.job import LoadJobConfig - from google.cloud.bigquery.schema import SchemaField - - config = LoadJobConfig() - full_name = SchemaField("full_name", "STRING", mode="REQUIRED") - with self.assertRaises(ValueError): - config.schema = [full_name, object()] - - def test_schema_setter(self): - from google.cloud.bigquery.job import LoadJobConfig - from google.cloud.bigquery.schema import SchemaField - - config = LoadJobConfig() - full_name = SchemaField("full_name", "STRING", mode="REQUIRED") - age = SchemaField("age", "INTEGER", mode="REQUIRED") - config.schema = [full_name, age] - self.assertEqual(config.schema, [full_name, age]) - - def test_props_set_by_server(self): - import datetime - from google.cloud._helpers import UTC - from google.cloud._helpers import _millis - - CREATED = datetime.datetime(2015, 8, 11, 12, 13, 22, tzinfo=UTC) - STARTED = datetime.datetime(2015, 8, 11, 13, 47, 15, tzinfo=UTC) - ENDED = datetime.datetime(2015, 8, 11, 14, 47, 15, tzinfo=UTC) - FULL_JOB_ID = "%s:%s" % (self.PROJECT, self.JOB_ID) - URL = "http://example.com/projects/%s/jobs/%s" % (self.PROJECT, self.JOB_ID) - EMAIL = "phred@example.com" - ERROR_RESULT = { - "debugInfo": "DEBUG", - "location": "LOCATION", - "message": "MESSAGE", - "reason": "REASON", - } - - client = _make_client(project=self.PROJECT) - job = self._make_one(self.JOB_ID, [self.SOURCE1], self.TABLE_REF, client) - job._properties["etag"] = "ETAG" - job._properties["id"] = FULL_JOB_ID - job._properties["selfLink"] = URL - job._properties["user_email"] = EMAIL - - statistics = job._properties["statistics"] = {} - statistics["creationTime"] = _millis(CREATED) - statistics["startTime"] = _millis(STARTED) - statistics["endTime"] = _millis(ENDED) - - self.assertEqual(job.etag, "ETAG") - self.assertEqual(job.self_link, URL) - self.assertEqual(job.user_email, EMAIL) - - self.assertEqual(job.created, CREATED) - self.assertEqual(job.started, STARTED) - self.assertEqual(job.ended, ENDED) - - # running jobs have no load stats not yet set. - self.assertIsNone(job.output_bytes) - - load_stats = statistics["load"] = {} - load_stats["inputFileBytes"] = 12345 - load_stats["inputFiles"] = 1 - load_stats["outputBytes"] = 23456 - load_stats["outputRows"] = 345 - - self.assertEqual(job.input_file_bytes, 12345) - self.assertEqual(job.input_files, 1) - self.assertEqual(job.output_bytes, 23456) - self.assertEqual(job.output_rows, 345) - - status = job._properties["status"] = {} - - self.assertIsNone(job.error_result) - self.assertIsNone(job.errors) - self.assertIsNone(job.state) - - status["errorResult"] = ERROR_RESULT - status["errors"] = [ERROR_RESULT] - status["state"] = "STATE" - - self.assertEqual(job.error_result, ERROR_RESULT) - self.assertEqual(job.errors, [ERROR_RESULT]) - self.assertEqual(job.state, "STATE") - - def test_from_api_repr_missing_identity(self): - self._setUpConstants() - client = _make_client(project=self.PROJECT) - RESOURCE = {} - klass = self._get_target_class() - with self.assertRaises(KeyError): - klass.from_api_repr(RESOURCE, client=client) - - def test_from_api_repr_missing_config(self): - self._setUpConstants() - client = _make_client(project=self.PROJECT) - RESOURCE = { - "id": "%s:%s" % (self.PROJECT, self.JOB_ID), - "jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID}, - } - klass = self._get_target_class() - with self.assertRaises(KeyError): - klass.from_api_repr(RESOURCE, client=client) - - def test_from_api_repr_bare(self): - self._setUpConstants() - client = _make_client(project=self.PROJECT) - RESOURCE = { - "id": self.FULL_JOB_ID, - "jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID}, - "configuration": { - "load": { - "sourceUris": [self.SOURCE1], - "destinationTable": { - "projectId": self.PROJECT, - "datasetId": self.DS_ID, - "tableId": self.TABLE_ID, - }, - } - }, - } - klass = self._get_target_class() - job = klass.from_api_repr(RESOURCE, client=client) - self.assertIs(job._client, client) - self._verifyResourceProperties(job, RESOURCE) - - def test_from_api_with_encryption(self): - self._setUpConstants() - client = _make_client(project=self.PROJECT) - RESOURCE = { - "id": self.FULL_JOB_ID, - "jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID}, - "configuration": { - "load": { - "sourceUris": [self.SOURCE1], - "destinationTable": { - "projectId": self.PROJECT, - "datasetId": self.DS_ID, - "tableId": self.TABLE_ID, - }, - "destinationEncryptionConfiguration": { - "kmsKeyName": self.KMS_KEY_NAME - }, - } - }, - } - klass = self._get_target_class() - job = klass.from_api_repr(RESOURCE, client=client) - self.assertIs(job._client, client) - self._verifyResourceProperties(job, RESOURCE) - - def test_from_api_repr_w_properties(self): - from google.cloud.bigquery.job import CreateDisposition - - client = _make_client(project=self.PROJECT) - RESOURCE = self._make_resource() - load_config = RESOURCE["configuration"]["load"] - load_config["createDisposition"] = CreateDisposition.CREATE_IF_NEEDED - klass = self._get_target_class() - job = klass.from_api_repr(RESOURCE, client=client) - self.assertIs(job._client, client) - self._verifyResourceProperties(job, RESOURCE) - - def test_begin_w_already_running(self): - conn = _make_connection() - client = _make_client(project=self.PROJECT, connection=conn) - job = self._make_one(self.JOB_ID, [self.SOURCE1], self.TABLE_REF, client) - job._properties["status"] = {"state": "RUNNING"} - - with self.assertRaises(ValueError): - job._begin() - - def test_begin_w_bound_client(self): - RESOURCE = self._make_resource() - # Ensure None for missing server-set props - del RESOURCE["statistics"]["creationTime"] - del RESOURCE["etag"] - del RESOURCE["selfLink"] - del RESOURCE["user_email"] - conn = _make_connection(RESOURCE) - client = _make_client(project=self.PROJECT, connection=conn) - job = self._make_one(self.JOB_ID, [self.SOURCE1], self.TABLE_REF, client) - path = "/projects/{}/jobs".format(self.PROJECT) - with mock.patch( - "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" - ) as final_attributes: - job._begin() - - final_attributes.assert_called_with({"path": path}, client, job) - - conn.api_request.assert_called_once_with( - method="POST", - path=path, - data={ - "jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID}, - "configuration": { - "load": { - "sourceUris": [self.SOURCE1], - "destinationTable": { - "projectId": self.PROJECT, - "datasetId": self.DS_ID, - "tableId": self.TABLE_ID, - }, - } - }, - }, - timeout=None, - ) - self._verifyResourceProperties(job, RESOURCE) - - def test_begin_w_autodetect(self): - from google.cloud.bigquery.job import LoadJobConfig - - path = "/projects/{}/jobs".format(self.PROJECT) - resource = self._make_resource() - resource["configuration"]["load"]["autodetect"] = True - # Ensure None for missing server-set props - del resource["statistics"]["creationTime"] - del resource["etag"] - del resource["selfLink"] - del resource["user_email"] - conn = _make_connection(resource) - client = _make_client(project=self.PROJECT, connection=conn) - config = LoadJobConfig() - config.autodetect = True - job = self._make_one( - self.JOB_ID, [self.SOURCE1], self.TABLE_REF, client, config - ) - with mock.patch( - "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" - ) as final_attributes: - job._begin() - - final_attributes.assert_called_with({"path": path}, client, job) - - sent = { - "jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID}, - "configuration": { - "load": { - "sourceUris": [self.SOURCE1], - "destinationTable": { - "projectId": self.PROJECT, - "datasetId": self.DS_ID, - "tableId": self.TABLE_ID, - }, - "autodetect": True, - } - }, - } - conn.api_request.assert_called_once_with( - method="POST", path=path, data=sent, timeout=None - ) - self._verifyResourceProperties(job, resource) - - def test_begin_w_alternate_client(self): - from google.cloud.bigquery.job import CreateDisposition - from google.cloud.bigquery.job import LoadJobConfig - from google.cloud.bigquery.job import SchemaUpdateOption - from google.cloud.bigquery.job import WriteDisposition - from google.cloud.bigquery.schema import SchemaField - - PATH = "/projects/%s/jobs" % (self.PROJECT,) - RESOURCE = self._make_resource(ended=True) - LOAD_CONFIGURATION = { - "sourceUris": [self.SOURCE1], - "destinationTable": { - "projectId": self.PROJECT, - "datasetId": self.DS_ID, - "tableId": self.TABLE_ID, - }, - "allowJaggedRows": True, - "allowQuotedNewlines": True, - "createDisposition": CreateDisposition.CREATE_NEVER, - "encoding": "ISO-8559-1", - "fieldDelimiter": "|", - "ignoreUnknownValues": True, - "maxBadRecords": 100, - "nullMarker": r"\N", - "quote": "'", - "skipLeadingRows": "1", - "sourceFormat": "CSV", - "useAvroLogicalTypes": True, - "writeDisposition": WriteDisposition.WRITE_TRUNCATE, - "schema": { - "fields": [ - { - "name": "full_name", - "type": "STRING", - "mode": "REQUIRED", - "description": None, - }, - { - "name": "age", - "type": "INTEGER", - "mode": "REQUIRED", - "description": None, - }, - ] - }, - "schemaUpdateOptions": [SchemaUpdateOption.ALLOW_FIELD_ADDITION], - } - RESOURCE["configuration"]["load"] = LOAD_CONFIGURATION - conn1 = _make_connection() - client1 = _make_client(project=self.PROJECT, connection=conn1) - conn2 = _make_connection(RESOURCE) - client2 = _make_client(project=self.PROJECT, connection=conn2) - full_name = SchemaField("full_name", "STRING", mode="REQUIRED") - age = SchemaField("age", "INTEGER", mode="REQUIRED") - config = LoadJobConfig() - config.schema = [full_name, age] - job = self._make_one( - self.JOB_ID, [self.SOURCE1], self.TABLE_REF, client1, config - ) - config.allow_jagged_rows = True - config.allow_quoted_newlines = True - config.create_disposition = CreateDisposition.CREATE_NEVER - config.encoding = "ISO-8559-1" - config.field_delimiter = "|" - config.ignore_unknown_values = True - config.max_bad_records = 100 - config.null_marker = r"\N" - config.quote_character = "'" - config.skip_leading_rows = 1 - config.source_format = "CSV" - config.use_avro_logical_types = True - config.write_disposition = WriteDisposition.WRITE_TRUNCATE - config.schema_update_options = [SchemaUpdateOption.ALLOW_FIELD_ADDITION] - with mock.patch( - "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" - ) as final_attributes: - job._begin(client=client2) - - final_attributes.assert_called_with({"path": PATH}, client2, job) - - conn1.api_request.assert_not_called() - self.assertEqual(len(conn2.api_request.call_args_list), 1) - req = conn2.api_request.call_args_list[0] - self.assertEqual(req[1]["method"], "POST") - self.assertEqual(req[1]["path"], PATH) - SENT = { - "jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID}, - "configuration": {"load": LOAD_CONFIGURATION}, - } - self.maxDiff = None - self.assertEqual(req[1]["data"], SENT) - self._verifyResourceProperties(job, RESOURCE) - - def test_begin_w_job_reference(self): - from google.cloud.bigquery import job - - resource = self._make_resource() - resource["jobReference"]["projectId"] = "alternative-project" - resource["jobReference"]["location"] = "US" - job_ref = job._JobReference(self.JOB_ID, "alternative-project", "US") - conn = _make_connection(resource) - client = _make_client(project=self.PROJECT, connection=conn) - load_job = self._make_one(job_ref, [self.SOURCE1], self.TABLE_REF, client) - with mock.patch( - "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" - ) as final_attributes: - load_job._begin() - final_attributes.assert_called_with( - {"path": "/projects/alternative-project/jobs"}, client, load_job - ) - - conn.api_request.assert_called_once() - _, request = conn.api_request.call_args - self.assertEqual(request["method"], "POST") - self.assertEqual(request["path"], "/projects/alternative-project/jobs") - self.assertEqual( - request["data"]["jobReference"]["projectId"], "alternative-project" - ) - self.assertEqual(request["data"]["jobReference"]["location"], "US") - self.assertEqual(request["data"]["jobReference"]["jobId"], self.JOB_ID) - - def test_exists_miss_w_bound_client(self): - PATH = "/projects/%s/jobs/%s" % (self.PROJECT, self.JOB_ID) - conn = _make_connection() - client = _make_client(project=self.PROJECT, connection=conn) - job = self._make_one(self.JOB_ID, [self.SOURCE1], self.TABLE_REF, client) - with mock.patch( - "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" - ) as final_attributes: - self.assertFalse(job.exists()) - - final_attributes.assert_called_with( - {"path": "/projects/{}/jobs/{}".format(self.PROJECT, self.JOB_ID)}, - client, - job, - ) - - conn.api_request.assert_called_once_with( - method="GET", path=PATH, query_params={"fields": "id"}, timeout=None - ) - - def test_exists_hit_w_alternate_client(self): - PATH = "/projects/%s/jobs/%s" % (self.PROJECT, self.JOB_ID) - conn1 = _make_connection() - client1 = _make_client(project=self.PROJECT, connection=conn1) - conn2 = _make_connection({}) - client2 = _make_client(project=self.PROJECT, connection=conn2) - job = self._make_one(self.JOB_ID, [self.SOURCE1], self.TABLE_REF, client1) - with mock.patch( - "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" - ) as final_attributes: - self.assertTrue(job.exists(client=client2)) - - final_attributes.assert_called_with( - {"path": "/projects/{}/jobs/{}".format(self.PROJECT, self.JOB_ID)}, - client2, - job, - ) - - conn1.api_request.assert_not_called() - conn2.api_request.assert_called_once_with( - method="GET", path=PATH, query_params={"fields": "id"}, timeout=None - ) - - def test_exists_miss_w_job_reference(self): - from google.cloud.bigquery import job - - job_ref = job._JobReference("my-job-id", "other-project", "US") - conn = _make_connection() - client = _make_client(project=self.PROJECT, connection=conn) - load_job = self._make_one(job_ref, [self.SOURCE1], self.TABLE_REF, client) - with mock.patch( - "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" - ) as final_attributes: - self.assertFalse(load_job.exists()) - - final_attributes.assert_called_with( - {"path": "/projects/other-project/jobs/my-job-id"}, client, load_job - ) - - conn.api_request.assert_called_once_with( - method="GET", - path="/projects/other-project/jobs/my-job-id", - query_params={"fields": "id", "location": "US"}, - timeout=None, - ) - - def test_reload_w_bound_client(self): - PATH = "/projects/%s/jobs/%s" % (self.PROJECT, self.JOB_ID) - RESOURCE = self._make_resource() - conn = _make_connection(RESOURCE) - client = _make_client(project=self.PROJECT, connection=conn) - job = self._make_one(self.JOB_ID, [self.SOURCE1], self.TABLE_REF, client) - with mock.patch( - "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" - ) as final_attributes: - job.reload() - - final_attributes.assert_called_with({"path": PATH}, client, job) - - conn.api_request.assert_called_once_with( - method="GET", path=PATH, query_params={}, timeout=None - ) - self._verifyResourceProperties(job, RESOURCE) - - def test_reload_w_alternate_client(self): - PATH = "/projects/%s/jobs/%s" % (self.PROJECT, self.JOB_ID) - RESOURCE = self._make_resource() - conn1 = _make_connection() - client1 = _make_client(project=self.PROJECT, connection=conn1) - conn2 = _make_connection(RESOURCE) - client2 = _make_client(project=self.PROJECT, connection=conn2) - job = self._make_one(self.JOB_ID, [self.SOURCE1], self.TABLE_REF, client1) - with mock.patch( - "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" - ) as final_attributes: - job.reload(client=client2) - - final_attributes.assert_called_with({"path": PATH}, client2, job) - - conn1.api_request.assert_not_called() - conn2.api_request.assert_called_once_with( - method="GET", path=PATH, query_params={}, timeout=None - ) - self._verifyResourceProperties(job, RESOURCE) - - def test_reload_w_job_reference(self): - from google.cloud.bigquery import job - - resource = self._make_resource(ended=True) - resource["jobReference"]["projectId"] = "alternative-project" - resource["jobReference"]["location"] = "US" - job_ref = job._JobReference(self.JOB_ID, "alternative-project", "US") - conn = _make_connection(resource) - client = _make_client(project=self.PROJECT, connection=conn) - load_job = self._make_one(job_ref, [self.SOURCE1], self.TABLE_REF, client) - with mock.patch( - "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" - ) as final_attributes: - load_job.reload() - - final_attributes.assert_called_with( - {"path": "/projects/alternative-project/jobs/{}".format(self.JOB_ID)}, - client, - load_job, - ) - - conn.api_request.assert_called_once_with( - method="GET", - path="/projects/alternative-project/jobs/{}".format(self.JOB_ID), - query_params={"location": "US"}, - timeout=None, - ) - - def test_cancel_w_bound_client(self): - PATH = "/projects/%s/jobs/%s/cancel" % (self.PROJECT, self.JOB_ID) - RESOURCE = self._make_resource(ended=True) - RESPONSE = {"job": RESOURCE} - conn = _make_connection(RESPONSE) - client = _make_client(project=self.PROJECT, connection=conn) - job = self._make_one(self.JOB_ID, [self.SOURCE1], self.TABLE_REF, client) - with mock.patch( - "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" - ) as final_attributes: - job.cancel() - - final_attributes.assert_called_with({"path": PATH}, client, job) - - conn.api_request.assert_called_once_with( - method="POST", path=PATH, query_params={}, timeout=None - ) - self._verifyResourceProperties(job, RESOURCE) - - def test_cancel_w_alternate_client(self): - PATH = "/projects/%s/jobs/%s/cancel" % (self.PROJECT, self.JOB_ID) - RESOURCE = self._make_resource(ended=True) - RESPONSE = {"job": RESOURCE} - conn1 = _make_connection() - client1 = _make_client(project=self.PROJECT, connection=conn1) - conn2 = _make_connection(RESPONSE) - client2 = _make_client(project=self.PROJECT, connection=conn2) - job = self._make_one(self.JOB_ID, [self.SOURCE1], self.TABLE_REF, client1) - with mock.patch( - "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" - ) as final_attributes: - job.cancel(client=client2) - - final_attributes.assert_called_with({"path": PATH}, client2, job) - - conn1.api_request.assert_not_called() - conn2.api_request.assert_called_once_with( - method="POST", path=PATH, query_params={}, timeout=None - ) - self._verifyResourceProperties(job, RESOURCE) - - def test_cancel_w_job_reference(self): - from google.cloud.bigquery import job - - resource = self._make_resource(ended=True) - resource["jobReference"]["projectId"] = "alternative-project" - resource["jobReference"]["location"] = "US" - job_ref = job._JobReference(self.JOB_ID, "alternative-project", "US") - conn = _make_connection({"job": resource}) - client = _make_client(project=self.PROJECT, connection=conn) - load_job = self._make_one(job_ref, [self.SOURCE1], self.TABLE_REF, client) - with mock.patch( - "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" - ) as final_attributes: - load_job.cancel() - - final_attributes.assert_called_with( - { - "path": "/projects/alternative-project/jobs/{}/cancel".format( - self.JOB_ID - ) - }, - client, - load_job, - ) - conn.api_request.assert_called_once_with( - method="POST", - path="/projects/alternative-project/jobs/{}/cancel".format(self.JOB_ID), - query_params={"location": "US"}, - timeout=None, - ) - - -class TestCopyJobConfig(unittest.TestCase, _Base): - JOB_TYPE = "copy" - - @staticmethod - def _get_target_class(): - from google.cloud.bigquery.job import CopyJobConfig - - return CopyJobConfig - - def test_ctor_w_properties(self): - from google.cloud.bigquery.job import CreateDisposition - from google.cloud.bigquery.job import WriteDisposition - - create_disposition = CreateDisposition.CREATE_NEVER - write_disposition = WriteDisposition.WRITE_TRUNCATE - config = self._get_target_class()( - create_disposition=create_disposition, write_disposition=write_disposition - ) - - self.assertEqual(config.create_disposition, create_disposition) - self.assertEqual(config.write_disposition, write_disposition) - - def test_to_api_repr_with_encryption(self): - from google.cloud.bigquery.encryption_configuration import ( - EncryptionConfiguration, - ) - - config = self._make_one() - config.destination_encryption_configuration = EncryptionConfiguration( - kms_key_name=self.KMS_KEY_NAME - ) - resource = config.to_api_repr() - self.assertEqual( - resource, - { - "copy": { - "destinationEncryptionConfiguration": { - "kmsKeyName": self.KMS_KEY_NAME - } - } - }, - ) - - def test_to_api_repr_with_encryption_none(self): - config = self._make_one() - config.destination_encryption_configuration = None - resource = config.to_api_repr() - self.assertEqual( - resource, {"copy": {"destinationEncryptionConfiguration": None}} - ) - - -class TestCopyJob(unittest.TestCase, _Base): - JOB_TYPE = "copy" - SOURCE_TABLE = "source_table" - DESTINATION_TABLE = "destination_table" - - @staticmethod - def _get_target_class(): - from google.cloud.bigquery.job import CopyJob - - return CopyJob - - def _make_resource(self, started=False, ended=False): - resource = super(TestCopyJob, self)._make_resource(started, ended) - config = resource["configuration"]["copy"] - config["sourceTables"] = [ - { - "projectId": self.PROJECT, - "datasetId": self.DS_ID, - "tableId": self.SOURCE_TABLE, - } - ] - config["destinationTable"] = { - "projectId": self.PROJECT, - "datasetId": self.DS_ID, - "tableId": self.DESTINATION_TABLE, - } - - return resource - - def _verifyResourceProperties(self, job, resource): - self._verifyReadonlyResourceProperties(job, resource) - - config = resource.get("configuration", {}).get("copy") - - table_ref = config["destinationTable"] - self.assertEqual(job.destination.project, table_ref["projectId"]) - self.assertEqual(job.destination.dataset_id, table_ref["datasetId"]) - self.assertEqual(job.destination.table_id, table_ref["tableId"]) - - sources = config.get("sourceTables") - if sources is None: - sources = [config["sourceTable"]] - self.assertEqual(len(sources), len(job.sources)) - for table_ref, table in zip(sources, job.sources): - self.assertEqual(table.project, table_ref["projectId"]) - self.assertEqual(table.dataset_id, table_ref["datasetId"]) - self.assertEqual(table.table_id, table_ref["tableId"]) - - if "createDisposition" in config: - self.assertEqual(job.create_disposition, config["createDisposition"]) - else: - self.assertIsNone(job.create_disposition) - - if "writeDisposition" in config: - self.assertEqual(job.write_disposition, config["writeDisposition"]) - else: - self.assertIsNone(job.write_disposition) - - if "destinationEncryptionConfiguration" in config: - self.assertIsNotNone(job.destination_encryption_configuration) - self.assertEqual( - job.destination_encryption_configuration.kms_key_name, - config["destinationEncryptionConfiguration"]["kmsKeyName"], - ) - else: - self.assertIsNone(job.destination_encryption_configuration) - - def test_ctor(self): - client = _make_client(project=self.PROJECT) - source = self._table_ref(self.SOURCE_TABLE) - destination = self._table_ref(self.DESTINATION_TABLE) - job = self._make_one(self.JOB_ID, [source], destination, client) - self.assertEqual(job.destination, destination) - self.assertEqual(job.sources, [source]) - self.assertIs(job._client, client) - self.assertEqual(job.job_type, self.JOB_TYPE) - self.assertEqual(job.path, "/projects/%s/jobs/%s" % (self.PROJECT, self.JOB_ID)) - - self._verifyInitialReadonlyProperties(job) - - # set/read from resource['configuration']['copy'] - self.assertIsNone(job.create_disposition) - self.assertIsNone(job.write_disposition) - self.assertIsNone(job.destination_encryption_configuration) - - def test_from_api_repr_missing_identity(self): - self._setUpConstants() - client = _make_client(project=self.PROJECT) - RESOURCE = {} - klass = self._get_target_class() - with self.assertRaises(KeyError): - klass.from_api_repr(RESOURCE, client=client) - - def test_from_api_repr_missing_config(self): - self._setUpConstants() - client = _make_client(project=self.PROJECT) - RESOURCE = { - "id": "%s:%s" % (self.PROJECT, self.DS_ID), - "jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID}, - } - klass = self._get_target_class() - with self.assertRaises(KeyError): - klass.from_api_repr(RESOURCE, client=client) - - def test_from_api_repr_bare(self): - self._setUpConstants() - client = _make_client(project=self.PROJECT) - RESOURCE = { - "id": self.JOB_ID, - "jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID}, - "configuration": { - "copy": { - "sourceTables": [ - { - "projectId": self.PROJECT, - "datasetId": self.DS_ID, - "tableId": self.SOURCE_TABLE, - } - ], - "destinationTable": { - "projectId": self.PROJECT, - "datasetId": self.DS_ID, - "tableId": self.DESTINATION_TABLE, - }, - } - }, - } - klass = self._get_target_class() - job = klass.from_api_repr(RESOURCE, client=client) - self.assertIs(job._client, client) - self._verifyResourceProperties(job, RESOURCE) - - def test_from_api_with_encryption(self): - self._setUpConstants() - client = _make_client(project=self.PROJECT) - RESOURCE = { - "id": self.JOB_ID, - "jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID}, - "configuration": { - "copy": { - "sourceTables": [ - { - "projectId": self.PROJECT, - "datasetId": self.DS_ID, - "tableId": self.SOURCE_TABLE, - } - ], - "destinationTable": { - "projectId": self.PROJECT, - "datasetId": self.DS_ID, - "tableId": self.DESTINATION_TABLE, - }, - "destinationEncryptionConfiguration": { - "kmsKeyName": self.KMS_KEY_NAME - }, - } - }, - } - klass = self._get_target_class() - job = klass.from_api_repr(RESOURCE, client=client) - self.assertIs(job._client, client) - self._verifyResourceProperties(job, RESOURCE) - - def test_from_api_repr_w_sourcetable(self): - self._setUpConstants() - client = _make_client(project=self.PROJECT) - RESOURCE = { - "id": self.JOB_ID, - "jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID}, - "configuration": { - "copy": { - "sourceTable": { - "projectId": self.PROJECT, - "datasetId": self.DS_ID, - "tableId": self.SOURCE_TABLE, - }, - "destinationTable": { - "projectId": self.PROJECT, - "datasetId": self.DS_ID, - "tableId": self.DESTINATION_TABLE, - }, - } - }, - } - klass = self._get_target_class() - job = klass.from_api_repr(RESOURCE, client=client) - self.assertIs(job._client, client) - self._verifyResourceProperties(job, RESOURCE) - - def test_from_api_repr_wo_sources(self): - self._setUpConstants() - client = _make_client(project=self.PROJECT) - RESOURCE = { - "id": self.JOB_ID, - "jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID}, - "configuration": { - "copy": { - "destinationTable": { - "projectId": self.PROJECT, - "datasetId": self.DS_ID, - "tableId": self.DESTINATION_TABLE, - } - } - }, - } - klass = self._get_target_class() - job = klass.from_api_repr(RESOURCE, client=client) - with self.assertRaises(KeyError): - _ = job.sources - - def test_from_api_repr_w_properties(self): - from google.cloud.bigquery.job import CreateDisposition - - client = _make_client(project=self.PROJECT) - RESOURCE = self._make_resource() - copy_config = RESOURCE["configuration"]["copy"] - copy_config["createDisposition"] = CreateDisposition.CREATE_IF_NEEDED - klass = self._get_target_class() - job = klass.from_api_repr(RESOURCE, client=client) - self.assertIs(job._client, client) - self._verifyResourceProperties(job, RESOURCE) - - def test_begin_w_bound_client(self): - PATH = "/projects/%s/jobs" % (self.PROJECT,) - RESOURCE = self._make_resource() - # Ensure None for missing server-set props - del RESOURCE["statistics"]["creationTime"] - del RESOURCE["etag"] - del RESOURCE["selfLink"] - del RESOURCE["user_email"] - conn = _make_connection(RESOURCE) - client = _make_client(project=self.PROJECT, connection=conn) - source = self._table_ref(self.SOURCE_TABLE) - destination = self._table_ref(self.DESTINATION_TABLE) - job = self._make_one(self.JOB_ID, [source], destination, client) - with mock.patch( - "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" - ) as final_attributes: - job._begin() - - final_attributes.assert_called_with({"path": PATH}, client, job) - - conn.api_request.assert_called_once_with( - method="POST", - path=PATH, - data={ - "jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID}, - "configuration": { - "copy": { - "sourceTables": [ - { - "projectId": self.PROJECT, - "datasetId": self.DS_ID, - "tableId": self.SOURCE_TABLE, - } - ], - "destinationTable": { - "projectId": self.PROJECT, - "datasetId": self.DS_ID, - "tableId": self.DESTINATION_TABLE, - }, - } - }, - }, - timeout=None, - ) - self._verifyResourceProperties(job, RESOURCE) - - def test_begin_w_alternate_client(self): - from google.cloud.bigquery.job import CopyJobConfig - - from google.cloud.bigquery.job import CreateDisposition - from google.cloud.bigquery.job import WriteDisposition - - PATH = "/projects/%s/jobs" % (self.PROJECT,) - RESOURCE = self._make_resource(ended=True) - COPY_CONFIGURATION = { - "sourceTables": [ - { - "projectId": self.PROJECT, - "datasetId": self.DS_ID, - "tableId": self.SOURCE_TABLE, - } - ], - "destinationTable": { - "projectId": self.PROJECT, - "datasetId": self.DS_ID, - "tableId": self.DESTINATION_TABLE, - }, - "createDisposition": CreateDisposition.CREATE_NEVER, - "writeDisposition": WriteDisposition.WRITE_TRUNCATE, - } - RESOURCE["configuration"]["copy"] = COPY_CONFIGURATION - conn1 = _make_connection() - client1 = _make_client(project=self.PROJECT, connection=conn1) - conn2 = _make_connection(RESOURCE) - client2 = _make_client(project=self.PROJECT, connection=conn2) - source = self._table_ref(self.SOURCE_TABLE) - destination = self._table_ref(self.DESTINATION_TABLE) - config = CopyJobConfig() - config.create_disposition = CreateDisposition.CREATE_NEVER - config.write_disposition = WriteDisposition.WRITE_TRUNCATE - job = self._make_one(self.JOB_ID, [source], destination, client1, config) - with mock.patch( - "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" - ) as final_attributes: - job._begin(client=client2) - - final_attributes.assert_called_with({"path": PATH}, client2, job) - - conn1.api_request.assert_not_called() - conn2.api_request.assert_called_once_with( - method="POST", - path=PATH, - data={ - "jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID}, - "configuration": {"copy": COPY_CONFIGURATION}, - }, - timeout=None, - ) - self._verifyResourceProperties(job, RESOURCE) - - def test_exists_miss_w_bound_client(self): - PATH = "/projects/%s/jobs/%s" % (self.PROJECT, self.JOB_ID) - conn = _make_connection() - client = _make_client(project=self.PROJECT, connection=conn) - - source = self._table_ref(self.SOURCE_TABLE) - destination = self._table_ref(self.DESTINATION_TABLE) - job = self._make_one(self.JOB_ID, [source], destination, client) - with mock.patch( - "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" - ) as final_attributes: - self.assertFalse(job.exists()) - - final_attributes.assert_called_with({"path": PATH}, client, job) - - conn.api_request.assert_called_once_with( - method="GET", path=PATH, query_params={"fields": "id"}, timeout=None - ) - - def test_exists_hit_w_alternate_client(self): - PATH = "/projects/%s/jobs/%s" % (self.PROJECT, self.JOB_ID) - conn1 = _make_connection() - client1 = _make_client(project=self.PROJECT, connection=conn1) - conn2 = _make_connection({}) - client2 = _make_client(project=self.PROJECT, connection=conn2) - source = self._table_ref(self.SOURCE_TABLE) - destination = self._table_ref(self.DESTINATION_TABLE) - job = self._make_one(self.JOB_ID, [source], destination, client1) - with mock.patch( - "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" - ) as final_attributes: - self.assertTrue(job.exists(client=client2)) - - final_attributes.assert_called_with({"path": PATH}, client2, job) - - conn1.api_request.assert_not_called() - conn2.api_request.assert_called_once_with( - method="GET", path=PATH, query_params={"fields": "id"}, timeout=None - ) - - def test_reload_w_bound_client(self): - PATH = "/projects/%s/jobs/%s" % (self.PROJECT, self.JOB_ID) - RESOURCE = self._make_resource() - conn = _make_connection(RESOURCE) - client = _make_client(project=self.PROJECT, connection=conn) - source = self._table_ref(self.SOURCE_TABLE) - destination = self._table_ref(self.DESTINATION_TABLE) - job = self._make_one(self.JOB_ID, [source], destination, client) - with mock.patch( - "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" - ) as final_attributes: - job.reload() - - final_attributes.assert_called_with({"path": PATH}, client, job) - - conn.api_request.assert_called_once_with( - method="GET", path=PATH, query_params={}, timeout=None - ) - self._verifyResourceProperties(job, RESOURCE) - - def test_reload_w_alternate_client(self): - PATH = "/projects/%s/jobs/%s" % (self.PROJECT, self.JOB_ID) - RESOURCE = self._make_resource() - conn1 = _make_connection() - client1 = _make_client(project=self.PROJECT, connection=conn1) - conn2 = _make_connection(RESOURCE) - client2 = _make_client(project=self.PROJECT, connection=conn2) - source = self._table_ref(self.SOURCE_TABLE) - destination = self._table_ref(self.DESTINATION_TABLE) - job = self._make_one(self.JOB_ID, [source], destination, client1) - with mock.patch( - "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" - ) as final_attributes: - job.reload(client=client2) - - final_attributes.assert_called_with({"path": PATH}, client2, job) - - conn1.api_request.assert_not_called() - conn2.api_request.assert_called_once_with( - method="GET", path=PATH, query_params={}, timeout=None - ) - self._verifyResourceProperties(job, RESOURCE) - - -class TestExtractJobConfig(unittest.TestCase, _Base): - JOB_TYPE = "extract" - - @staticmethod - def _get_target_class(): - from google.cloud.bigquery.job import ExtractJobConfig - - return ExtractJobConfig - - def test_ctor_w_properties(self): - config = self._get_target_class()(field_delimiter="\t", print_header=True) - - self.assertEqual(config.field_delimiter, "\t") - self.assertTrue(config.print_header) - - def test_to_api_repr(self): - from google.cloud.bigquery import job - - config = self._make_one() - config.compression = job.Compression.SNAPPY - config.destination_format = job.DestinationFormat.AVRO - config.field_delimiter = "ignored for avro" - config.print_header = False - config._properties["extract"]["someNewField"] = "some-value" - config.use_avro_logical_types = True - resource = config.to_api_repr() - self.assertEqual( - resource, - { - "extract": { - "compression": "SNAPPY", - "destinationFormat": "AVRO", - "fieldDelimiter": "ignored for avro", - "printHeader": False, - "someNewField": "some-value", - "useAvroLogicalTypes": True, - } - }, - ) - - def test_from_api_repr(self): - cls = self._get_target_class() - config = cls.from_api_repr( - { - "extract": { - "compression": "NONE", - "destinationFormat": "CSV", - "fieldDelimiter": "\t", - "printHeader": True, - "someNewField": "some-value", - "useAvroLogicalTypes": False, - } - } - ) - self.assertEqual(config.compression, "NONE") - self.assertEqual(config.destination_format, "CSV") - self.assertEqual(config.field_delimiter, "\t") - self.assertEqual(config.print_header, True) - self.assertEqual(config._properties["extract"]["someNewField"], "some-value") - self.assertEqual(config.use_avro_logical_types, False) - - -class TestExtractJob(unittest.TestCase, _Base): - JOB_TYPE = "extract" - SOURCE_TABLE = "source_table" - DESTINATION_URI = "gs://bucket_name/object_name" - - @staticmethod - def _get_target_class(): - from google.cloud.bigquery.job import ExtractJob - - return ExtractJob - - def _make_resource(self, started=False, ended=False): - resource = super(TestExtractJob, self)._make_resource(started, ended) - config = resource["configuration"]["extract"] - config["sourceTable"] = { - "projectId": self.PROJECT, - "datasetId": self.DS_ID, - "tableId": self.SOURCE_TABLE, - } - config["destinationUris"] = [self.DESTINATION_URI] - return resource - - def _verifyResourceProperties(self, job, resource): - self._verifyReadonlyResourceProperties(job, resource) - - config = resource.get("configuration", {}).get("extract") - - self.assertEqual(job.destination_uris, config["destinationUris"]) - - if "sourceTable" in config: - table_ref = config["sourceTable"] - self.assertEqual(job.source.project, table_ref["projectId"]) - self.assertEqual(job.source.dataset_id, table_ref["datasetId"]) - self.assertEqual(job.source.table_id, table_ref["tableId"]) - else: - model_ref = config["sourceModel"] - self.assertEqual(job.source.project, model_ref["projectId"]) - self.assertEqual(job.source.dataset_id, model_ref["datasetId"]) - self.assertEqual(job.source.model_id, model_ref["modelId"]) - - if "compression" in config: - self.assertEqual(job.compression, config["compression"]) - else: - self.assertIsNone(job.compression) - - if "destinationFormat" in config: - self.assertEqual(job.destination_format, config["destinationFormat"]) - else: - self.assertIsNone(job.destination_format) - - if "fieldDelimiter" in config: - self.assertEqual(job.field_delimiter, config["fieldDelimiter"]) - else: - self.assertIsNone(job.field_delimiter) - - if "printHeader" in config: - self.assertEqual(job.print_header, config["printHeader"]) - else: - self.assertIsNone(job.print_header) - - def test_ctor(self): - from google.cloud.bigquery.table import Table - - client = _make_client(project=self.PROJECT) - source = Table(self.TABLE_REF) - job = self._make_one(self.JOB_ID, source, [self.DESTINATION_URI], client) - self.assertEqual(job.source.project, self.PROJECT) - self.assertEqual(job.source.dataset_id, self.DS_ID) - self.assertEqual(job.source.table_id, self.TABLE_ID) - self.assertEqual(job.destination_uris, [self.DESTINATION_URI]) - self.assertIs(job._client, client) - self.assertEqual(job.job_type, self.JOB_TYPE) - self.assertEqual(job.path, "/projects/%s/jobs/%s" % (self.PROJECT, self.JOB_ID)) - - self._verifyInitialReadonlyProperties(job) - - # set/read from resource['configuration']['extract'] - self.assertIsNone(job.compression) - self.assertIsNone(job.destination_format) - self.assertIsNone(job.field_delimiter) - self.assertIsNone(job.print_header) - - def test_destination_uri_file_counts(self): - file_counts = 23 - client = _make_client(project=self.PROJECT) - job = self._make_one( - self.JOB_ID, self.TABLE_REF, [self.DESTINATION_URI], client - ) - self.assertIsNone(job.destination_uri_file_counts) - - statistics = job._properties["statistics"] = {} - self.assertIsNone(job.destination_uri_file_counts) - - extract_stats = statistics["extract"] = {} - self.assertIsNone(job.destination_uri_file_counts) - - extract_stats["destinationUriFileCounts"] = [str(file_counts)] - self.assertEqual(job.destination_uri_file_counts, [file_counts]) - - def test_from_api_repr_missing_identity(self): - self._setUpConstants() - client = _make_client(project=self.PROJECT) - RESOURCE = {} - klass = self._get_target_class() - with self.assertRaises(KeyError): - klass.from_api_repr(RESOURCE, client=client) - - def test_from_api_repr_missing_config(self): - self._setUpConstants() - client = _make_client(project=self.PROJECT) - RESOURCE = { - "id": "%s:%s" % (self.PROJECT, self.DS_ID), - "jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID}, - } - klass = self._get_target_class() - with self.assertRaises(KeyError): - klass.from_api_repr(RESOURCE, client=client) - - def test_from_api_repr_bare(self): - self._setUpConstants() - client = _make_client(project=self.PROJECT) - RESOURCE = { - "id": self.JOB_ID, - "jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID}, - "configuration": { - "extract": { - "sourceTable": { - "projectId": self.PROJECT, - "datasetId": self.DS_ID, - "tableId": self.SOURCE_TABLE, - }, - "destinationUris": [self.DESTINATION_URI], - } - }, - } - klass = self._get_target_class() - job = klass.from_api_repr(RESOURCE, client=client) - self.assertIs(job._client, client) - self._verifyResourceProperties(job, RESOURCE) - - def test_from_api_repr_for_model(self): - self._setUpConstants() - client = _make_client(project=self.PROJECT) - RESOURCE = { - "id": self.JOB_ID, - "jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID}, - "configuration": { - "extract": { - "sourceModel": { - "projectId": self.PROJECT, - "datasetId": self.DS_ID, - "modelId": "model_id", - }, - "destinationUris": [self.DESTINATION_URI], - } - }, - } - klass = self._get_target_class() - job = klass.from_api_repr(RESOURCE, client=client) - self.assertIs(job._client, client) - self._verifyResourceProperties(job, RESOURCE) - - def test_from_api_repr_w_properties(self): - from google.cloud.bigquery.job import Compression - - client = _make_client(project=self.PROJECT) - RESOURCE = self._make_resource() - extract_config = RESOURCE["configuration"]["extract"] - extract_config["compression"] = Compression.GZIP - klass = self._get_target_class() - job = klass.from_api_repr(RESOURCE, client=client) - self.assertIs(job._client, client) - self._verifyResourceProperties(job, RESOURCE) - - def test_begin_w_bound_client(self): - from google.cloud.bigquery.dataset import DatasetReference - - PATH = "/projects/%s/jobs" % (self.PROJECT,) - RESOURCE = self._make_resource() - # Ensure None for missing server-set props - del RESOURCE["statistics"]["creationTime"] - del RESOURCE["etag"] - del RESOURCE["selfLink"] - del RESOURCE["user_email"] - conn = _make_connection(RESOURCE) - client = _make_client(project=self.PROJECT, connection=conn) - source_dataset = DatasetReference(self.PROJECT, self.DS_ID) - source = source_dataset.table(self.SOURCE_TABLE) - job = self._make_one(self.JOB_ID, source, [self.DESTINATION_URI], client) - with mock.patch( - "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" - ) as final_attributes: - job._begin() - - final_attributes.assert_called_with({"path": PATH}, client, job) - - conn.api_request.assert_called_once_with( - method="POST", - path=PATH, - data={ - "jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID}, - "configuration": { - "extract": { - "sourceTable": { - "projectId": self.PROJECT, - "datasetId": self.DS_ID, - "tableId": self.SOURCE_TABLE, - }, - "destinationUris": [self.DESTINATION_URI], - } - }, - }, - timeout=None, - ) - self._verifyResourceProperties(job, RESOURCE) - - def test_begin_w_alternate_client(self): - from google.cloud.bigquery.dataset import DatasetReference - from google.cloud.bigquery.job import Compression - from google.cloud.bigquery.job import DestinationFormat - from google.cloud.bigquery.job import ExtractJobConfig - - PATH = "/projects/%s/jobs" % (self.PROJECT,) - RESOURCE = self._make_resource(ended=True) - EXTRACT_CONFIGURATION = { - "sourceTable": { - "projectId": self.PROJECT, - "datasetId": self.DS_ID, - "tableId": self.SOURCE_TABLE, - }, - "destinationUris": [self.DESTINATION_URI], - "compression": Compression.GZIP, - "destinationFormat": DestinationFormat.NEWLINE_DELIMITED_JSON, - "fieldDelimiter": "|", - "printHeader": False, - } - RESOURCE["configuration"]["extract"] = EXTRACT_CONFIGURATION - conn1 = _make_connection() - client1 = _make_client(project=self.PROJECT, connection=conn1) - conn2 = _make_connection(RESOURCE) - client2 = _make_client(project=self.PROJECT, connection=conn2) - source_dataset = DatasetReference(self.PROJECT, self.DS_ID) - source = source_dataset.table(self.SOURCE_TABLE) - config = ExtractJobConfig() - config.compression = Compression.GZIP - config.destination_format = DestinationFormat.NEWLINE_DELIMITED_JSON - config.field_delimiter = "|" - config.print_header = False - job = self._make_one( - self.JOB_ID, source, [self.DESTINATION_URI], client1, config - ) - with mock.patch( - "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" - ) as final_attributes: - job._begin(client=client2) - - final_attributes.assert_called_with({"path": PATH}, client2, job) - - conn1.api_request.assert_not_called() - conn2.api_request.assert_called_once_with( - method="POST", - path=PATH, - data={ - "jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID}, - "configuration": {"extract": EXTRACT_CONFIGURATION}, - }, - timeout=None, - ) - self._verifyResourceProperties(job, RESOURCE) - - def test_exists_miss_w_bound_client(self): - PATH = "/projects/%s/jobs/%s" % (self.PROJECT, self.JOB_ID) - conn = _make_connection() - client = _make_client(project=self.PROJECT, connection=conn) - job = self._make_one( - self.JOB_ID, self.TABLE_REF, [self.DESTINATION_URI], client - ) - with mock.patch( - "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" - ) as final_attributes: - self.assertFalse(job.exists()) - - final_attributes.assert_called_with({"path": PATH}, client, job) - - conn.api_request.assert_called_once_with( - method="GET", path=PATH, query_params={"fields": "id"}, timeout=None - ) - - def test_exists_hit_w_alternate_client(self): - PATH = "/projects/%s/jobs/%s" % (self.PROJECT, self.JOB_ID) - conn1 = _make_connection() - client1 = _make_client(project=self.PROJECT, connection=conn1) - conn2 = _make_connection({}) - client2 = _make_client(project=self.PROJECT, connection=conn2) - job = self._make_one( - self.JOB_ID, self.TABLE_REF, [self.DESTINATION_URI], client1 - ) - with mock.patch( - "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" - ) as final_attributes: - self.assertTrue(job.exists(client=client2)) - - final_attributes.assert_called_with({"path": PATH}, client2, job) - - conn1.api_request.assert_not_called() - conn2.api_request.assert_called_once_with( - method="GET", path=PATH, query_params={"fields": "id"}, timeout=None - ) - - def test_reload_w_bound_client(self): - from google.cloud.bigquery.dataset import DatasetReference - - PATH = "/projects/%s/jobs/%s" % (self.PROJECT, self.JOB_ID) - RESOURCE = self._make_resource() - conn = _make_connection(RESOURCE) - client = _make_client(project=self.PROJECT, connection=conn) - source_dataset = DatasetReference(self.PROJECT, self.DS_ID) - source = source_dataset.table(self.SOURCE_TABLE) - job = self._make_one(self.JOB_ID, source, [self.DESTINATION_URI], client) - with mock.patch( - "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" - ) as final_attributes: - job.reload() - - final_attributes.assert_called_with({"path": PATH}, client, job) - conn.api_request.assert_called_once_with( - method="GET", path=PATH, query_params={}, timeout=None - ) - self._verifyResourceProperties(job, RESOURCE) - - def test_reload_w_alternate_client(self): - from google.cloud.bigquery.dataset import DatasetReference - - PATH = "/projects/%s/jobs/%s" % (self.PROJECT, self.JOB_ID) - RESOURCE = self._make_resource() - conn1 = _make_connection() - client1 = _make_client(project=self.PROJECT, connection=conn1) - conn2 = _make_connection(RESOURCE) - client2 = _make_client(project=self.PROJECT, connection=conn2) - source_dataset = DatasetReference(self.PROJECT, self.DS_ID) - source = source_dataset.table(self.SOURCE_TABLE) - job = self._make_one(self.JOB_ID, source, [self.DESTINATION_URI], client1) - with mock.patch( - "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" - ) as final_attributes: - job.reload(client=client2) - - final_attributes.assert_called_with({"path": PATH}, client2, job) - - conn1.api_request.assert_not_called() - conn2.api_request.assert_called_once_with( - method="GET", path=PATH, query_params={}, timeout=None - ) - self._verifyResourceProperties(job, RESOURCE) - - -class TestQueryJobConfig(unittest.TestCase, _Base): - @staticmethod - def _get_target_class(): - from google.cloud.bigquery.job import QueryJobConfig - - return QueryJobConfig - - def _make_one(self, *args, **kw): - return self._get_target_class()(*args, **kw) - - def test_ctor(self): - config = self._make_one() - self.assertEqual(config._properties, {"query": {}}) - - def test_ctor_w_none(self): - config = self._make_one() - config.default_dataset = None - config.destination = None - self.assertIsNone(config.default_dataset) - self.assertIsNone(config.destination) - - def test_ctor_w_properties(self): - config = self._get_target_class()(use_query_cache=False, use_legacy_sql=True) - - self.assertFalse(config.use_query_cache) - self.assertTrue(config.use_legacy_sql) - - def test_ctor_w_string_default_dataset(self): - from google.cloud.bigquery import dataset - - default_dataset = "default-proj.default_dset" - config = self._get_target_class()(default_dataset=default_dataset) - expected = dataset.DatasetReference.from_string(default_dataset) - self.assertEqual(config.default_dataset, expected) - - def test_ctor_w_string_destinaton(self): - from google.cloud.bigquery import table - - destination = "dest-proj.dest_dset.dest_tbl" - config = self._get_target_class()(destination=destination) - expected = table.TableReference.from_string(destination) - self.assertEqual(config.destination, expected) - - def test_default_dataset_w_string(self): - from google.cloud.bigquery import dataset - - default_dataset = "default-proj.default_dset" - config = self._make_one() - config.default_dataset = default_dataset - expected = dataset.DatasetReference.from_string(default_dataset) - self.assertEqual(config.default_dataset, expected) - - def test_default_dataset_w_dataset(self): - from google.cloud.bigquery import dataset - - default_dataset = "default-proj.default_dset" - expected = dataset.DatasetReference.from_string(default_dataset) - config = self._make_one() - config.default_dataset = dataset.Dataset(expected) - self.assertEqual(config.default_dataset, expected) - - def test_destinaton_w_string(self): - from google.cloud.bigquery import table - - destination = "dest-proj.dest_dset.dest_tbl" - config = self._make_one() - config.destination = destination - expected = table.TableReference.from_string(destination) - self.assertEqual(config.destination, expected) - - def test_range_partitioning_w_none(self): - object_under_test = self._get_target_class()() - assert object_under_test.range_partitioning is None - - def test_range_partitioning_w_value(self): - object_under_test = self._get_target_class()() - object_under_test._properties["query"]["rangePartitioning"] = { - "field": "column_one", - "range": {"start": 1, "end": 1000, "interval": 10}, - } - object_under_test.range_partitioning.field == "column_one" - object_under_test.range_partitioning.range_.start == 1 - object_under_test.range_partitioning.range_.end == 1000 - object_under_test.range_partitioning.range_.interval == 10 - - def test_range_partitioning_setter(self): - from google.cloud.bigquery.table import PartitionRange - from google.cloud.bigquery.table import RangePartitioning - - object_under_test = self._get_target_class()() - object_under_test.range_partitioning = RangePartitioning( - field="column_one", range_=PartitionRange(start=1, end=1000, interval=10) - ) - object_under_test.range_partitioning.field == "column_one" - object_under_test.range_partitioning.range_.start == 1 - object_under_test.range_partitioning.range_.end == 1000 - object_under_test.range_partitioning.range_.interval == 10 - - def test_range_partitioning_setter_w_none(self): - object_under_test = self._get_target_class()() - object_under_test.range_partitioning = None - assert object_under_test.range_partitioning is None - - def test_range_partitioning_setter_w_wrong_type(self): - object_under_test = self._get_target_class()() - with pytest.raises(ValueError, match="RangePartitioning"): - object_under_test.range_partitioning = object() - - def test_time_partitioning(self): - from google.cloud.bigquery import table - - time_partitioning = table.TimePartitioning( - type_=table.TimePartitioningType.DAY, field="name" - ) - config = self._make_one() - config.time_partitioning = time_partitioning - # TimePartitioning should be configurable after assigning - time_partitioning.expiration_ms = 10000 - - self.assertEqual(config.time_partitioning.type_, table.TimePartitioningType.DAY) - self.assertEqual(config.time_partitioning.field, "name") - self.assertEqual(config.time_partitioning.expiration_ms, 10000) - - config.time_partitioning = None - self.assertIsNone(config.time_partitioning) - - def test_clustering_fields(self): - fields = ["email", "postal_code"] - config = self._get_target_class()() - config.clustering_fields = fields - self.assertEqual(config.clustering_fields, fields) - - config.clustering_fields = None - self.assertIsNone(config.clustering_fields) - - def test_from_api_repr_empty(self): - klass = self._get_target_class() - config = klass.from_api_repr({}) - self.assertIsNone(config.dry_run) - self.assertIsNone(config.use_legacy_sql) - self.assertIsNone(config.default_dataset) - self.assertIsNone(config.destination) - self.assertIsNone(config.destination_encryption_configuration) - - def test_from_api_repr_normal(self): - from google.cloud.bigquery.dataset import DatasetReference - - resource = { - "query": { - "useLegacySql": True, - "query": "no property for me", - "defaultDataset": { - "projectId": "someproject", - "datasetId": "somedataset", - }, - "someNewProperty": "I should be saved, too.", - }, - "dryRun": True, - } - klass = self._get_target_class() - - config = klass.from_api_repr(resource) - - self.assertTrue(config.use_legacy_sql) - self.assertEqual( - config.default_dataset, DatasetReference("someproject", "somedataset") - ) - self.assertTrue(config.dry_run) - # Make sure unknown properties propagate. - self.assertEqual(config._properties["query"]["query"], "no property for me") - self.assertEqual( - config._properties["query"]["someNewProperty"], "I should be saved, too." - ) - - def test_to_api_repr_normal(self): - from google.cloud.bigquery.dataset import DatasetReference - - config = self._make_one() - config.use_legacy_sql = True - config.default_dataset = DatasetReference("someproject", "somedataset") - config.dry_run = False - config._properties["someNewProperty"] = "Woohoo, alpha stuff." - - resource = config.to_api_repr() - - self.assertFalse(resource["dryRun"]) - self.assertTrue(resource["query"]["useLegacySql"]) - self.assertEqual( - resource["query"]["defaultDataset"]["projectId"], "someproject" - ) - self.assertEqual( - resource["query"]["defaultDataset"]["datasetId"], "somedataset" - ) - # Make sure unknown properties propagate. - self.assertEqual(resource["someNewProperty"], "Woohoo, alpha stuff.") - - def test_to_api_repr_with_encryption(self): - from google.cloud.bigquery.encryption_configuration import ( - EncryptionConfiguration, - ) - - config = self._make_one() - config.destination_encryption_configuration = EncryptionConfiguration( - kms_key_name=self.KMS_KEY_NAME - ) - resource = config.to_api_repr() - self.assertEqual( - resource, - { - "query": { - "destinationEncryptionConfiguration": { - "kmsKeyName": self.KMS_KEY_NAME - } - } - }, - ) - - def test_to_api_repr_with_encryption_none(self): - config = self._make_one() - config.destination_encryption_configuration = None - resource = config.to_api_repr() - self.assertEqual( - resource, {"query": {"destinationEncryptionConfiguration": None}} - ) - - def test_from_api_repr_with_encryption(self): - resource = { - "query": { - "destinationEncryptionConfiguration": {"kmsKeyName": self.KMS_KEY_NAME} - } - } - klass = self._get_target_class() - config = klass.from_api_repr(resource) - self.assertEqual( - config.destination_encryption_configuration.kms_key_name, self.KMS_KEY_NAME - ) - - -class TestQueryJob(unittest.TestCase, _Base): - JOB_TYPE = "query" - QUERY = "select count(*) from persons" - DESTINATION_TABLE = "destination_table" - - @staticmethod - def _get_target_class(): - from google.cloud.bigquery.job import QueryJob - - return QueryJob - - def _make_resource(self, started=False, ended=False): - resource = super(TestQueryJob, self)._make_resource(started, ended) - config = resource["configuration"]["query"] - config["query"] = self.QUERY - return resource - - def _verifyBooleanResourceProperties(self, job, config): - - if "allowLargeResults" in config: - self.assertEqual(job.allow_large_results, config["allowLargeResults"]) - else: - self.assertIsNone(job.allow_large_results) - if "flattenResults" in config: - self.assertEqual(job.flatten_results, config["flattenResults"]) - else: - self.assertIsNone(job.flatten_results) - if "useQueryCache" in config: - self.assertEqual(job.use_query_cache, config["useQueryCache"]) - else: - self.assertIsNone(job.use_query_cache) - if "useLegacySql" in config: - self.assertEqual(job.use_legacy_sql, config["useLegacySql"]) - else: - self.assertIsNone(job.use_legacy_sql) - - def _verifyIntegerResourceProperties(self, job, config): - if "maximumBillingTier" in config: - self.assertEqual(job.maximum_billing_tier, config["maximumBillingTier"]) - else: - self.assertIsNone(job.maximum_billing_tier) - if "maximumBytesBilled" in config: - self.assertEqual( - str(job.maximum_bytes_billed), config["maximumBytesBilled"] - ) - self.assertIsInstance(job.maximum_bytes_billed, int) - else: - self.assertIsNone(job.maximum_bytes_billed) - - def _verify_udf_resources(self, job, config): - udf_resources = config.get("userDefinedFunctionResources", ()) - self.assertEqual(len(job.udf_resources), len(udf_resources)) - for found, expected in zip(job.udf_resources, udf_resources): - if "resourceUri" in expected: - self.assertEqual(found.udf_type, "resourceUri") - self.assertEqual(found.value, expected["resourceUri"]) - else: - self.assertEqual(found.udf_type, "inlineCode") - self.assertEqual(found.value, expected["inlineCode"]) - - def _verifyQueryParameters(self, job, config): - query_parameters = config.get("queryParameters", ()) - self.assertEqual(len(job.query_parameters), len(query_parameters)) - for found, expected in zip(job.query_parameters, query_parameters): - self.assertEqual(found.to_api_repr(), expected) - - def _verify_table_definitions(self, job, config): - table_defs = config.get("tableDefinitions") - if job.table_definitions is None: - self.assertIsNone(table_defs) - else: - self.assertEqual(len(job.table_definitions), len(table_defs)) - for found_key, found_ec in job.table_definitions.items(): - expected_ec = table_defs.get(found_key) - self.assertIsNotNone(expected_ec) - self.assertEqual(found_ec.to_api_repr(), expected_ec) - - def _verify_configuration_properties(self, job, configuration): - if "dryRun" in configuration: - self.assertEqual(job.dry_run, configuration["dryRun"]) - else: - self.assertIsNone(job.dry_run) - - def _verifyResourceProperties(self, job, resource): - self._verifyReadonlyResourceProperties(job, resource) - - configuration = resource.get("configuration", {}) - self._verify_configuration_properties(job, configuration) - - query_config = resource.get("configuration", {}).get("query") - self._verifyBooleanResourceProperties(job, query_config) - self._verifyIntegerResourceProperties(job, query_config) - self._verify_udf_resources(job, query_config) - self._verifyQueryParameters(job, query_config) - self._verify_table_definitions(job, query_config) - - self.assertEqual(job.query, query_config["query"]) - if "createDisposition" in query_config: - self.assertEqual(job.create_disposition, query_config["createDisposition"]) - else: - self.assertIsNone(job.create_disposition) - if "defaultDataset" in query_config: - ds_ref = job.default_dataset - ds_ref = {"projectId": ds_ref.project, "datasetId": ds_ref.dataset_id} - self.assertEqual(ds_ref, query_config["defaultDataset"]) - else: - self.assertIsNone(job.default_dataset) - if "destinationTable" in query_config: - table = job.destination - tb_ref = { - "projectId": table.project, - "datasetId": table.dataset_id, - "tableId": table.table_id, - } - self.assertEqual(tb_ref, query_config["destinationTable"]) - else: - self.assertIsNone(job.destination) - if "priority" in query_config: - self.assertEqual(job.priority, query_config["priority"]) - else: - self.assertIsNone(job.priority) - if "writeDisposition" in query_config: - self.assertEqual(job.write_disposition, query_config["writeDisposition"]) - else: - self.assertIsNone(job.write_disposition) - if "destinationEncryptionConfiguration" in query_config: - self.assertIsNotNone(job.destination_encryption_configuration) - self.assertEqual( - job.destination_encryption_configuration.kms_key_name, - query_config["destinationEncryptionConfiguration"]["kmsKeyName"], - ) - else: - self.assertIsNone(job.destination_encryption_configuration) - if "schemaUpdateOptions" in query_config: - self.assertEqual( - job.schema_update_options, query_config["schemaUpdateOptions"] - ) - else: - self.assertIsNone(job.schema_update_options) - - def test_ctor_defaults(self): - client = _make_client(project=self.PROJECT) - job = self._make_one(self.JOB_ID, self.QUERY, client) - self.assertEqual(job.query, self.QUERY) - self.assertIs(job._client, client) - self.assertEqual(job.job_type, self.JOB_TYPE) - self.assertEqual(job.path, "/projects/%s/jobs/%s" % (self.PROJECT, self.JOB_ID)) - - self._verifyInitialReadonlyProperties(job) - - self.assertFalse(job.use_legacy_sql) - - # set/read from resource['configuration']['query'] - self.assertIsNone(job.allow_large_results) - self.assertIsNone(job.create_disposition) - self.assertIsNone(job.default_dataset) - self.assertIsNone(job.destination) - self.assertIsNone(job.flatten_results) - self.assertIsNone(job.priority) - self.assertIsNone(job.use_query_cache) - self.assertIsNone(job.dry_run) - self.assertIsNone(job.write_disposition) - self.assertIsNone(job.maximum_billing_tier) - self.assertIsNone(job.maximum_bytes_billed) - self.assertIsNone(job.table_definitions) - self.assertIsNone(job.destination_encryption_configuration) - self.assertIsNone(job.range_partitioning) - self.assertIsNone(job.time_partitioning) - self.assertIsNone(job.clustering_fields) - self.assertIsNone(job.schema_update_options) - - def test_ctor_w_udf_resources(self): - from google.cloud.bigquery.job import QueryJobConfig - from google.cloud.bigquery.query import UDFResource - - RESOURCE_URI = "gs://some-bucket/js/lib.js" - udf_resources = [UDFResource("resourceUri", RESOURCE_URI)] - client = _make_client(project=self.PROJECT) - config = QueryJobConfig() - config.udf_resources = udf_resources - job = self._make_one(self.JOB_ID, self.QUERY, client, job_config=config) - self.assertEqual(job.udf_resources, udf_resources) - - def test_ctor_w_query_parameters(self): - from google.cloud.bigquery.job import QueryJobConfig - from google.cloud.bigquery.query import ScalarQueryParameter - - query_parameters = [ScalarQueryParameter("foo", "INT64", 123)] - client = _make_client(project=self.PROJECT) - config = QueryJobConfig(query_parameters=query_parameters) - job = self._make_one(self.JOB_ID, self.QUERY, client, job_config=config) - self.assertEqual(job.query_parameters, query_parameters) - - def test_from_api_repr_missing_identity(self): - self._setUpConstants() - client = _make_client(project=self.PROJECT) - RESOURCE = {} - klass = self._get_target_class() - with self.assertRaises(KeyError): - klass.from_api_repr(RESOURCE, client=client) - - def test_from_api_repr_missing_config(self): - self._setUpConstants() - client = _make_client(project=self.PROJECT) - RESOURCE = { - "id": "%s:%s" % (self.PROJECT, self.DS_ID), - "jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID}, - } - klass = self._get_target_class() - with self.assertRaises(KeyError): - klass.from_api_repr(RESOURCE, client=client) - - def test_from_api_repr_bare(self): - self._setUpConstants() - client = _make_client(project=self.PROJECT) - RESOURCE = { - "id": self.JOB_ID, - "jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID}, - "configuration": {"query": {"query": self.QUERY}}, - } - klass = self._get_target_class() - job = klass.from_api_repr(RESOURCE, client=client) - self.assertIs(job._client, client) - self._verifyResourceProperties(job, RESOURCE) - - def test_from_api_repr_with_encryption(self): - self._setUpConstants() - client = _make_client(project=self.PROJECT) - RESOURCE = { - "id": self.JOB_ID, - "jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID}, - "configuration": { - "query": { - "query": self.QUERY, - "destinationEncryptionConfiguration": { - "kmsKeyName": self.KMS_KEY_NAME - }, - } - }, - } - klass = self._get_target_class() - job = klass.from_api_repr(RESOURCE, client=client) - self.assertIs(job._client, client) - self._verifyResourceProperties(job, RESOURCE) - - def test_from_api_repr_w_properties(self): - from google.cloud.bigquery.job import CreateDisposition - from google.cloud.bigquery.job import SchemaUpdateOption - from google.cloud.bigquery.job import WriteDisposition - - client = _make_client(project=self.PROJECT) - RESOURCE = self._make_resource() - query_config = RESOURCE["configuration"]["query"] - query_config["createDisposition"] = CreateDisposition.CREATE_IF_NEEDED - query_config["writeDisposition"] = WriteDisposition.WRITE_TRUNCATE - query_config["destinationTable"] = { - "projectId": self.PROJECT, - "datasetId": self.DS_ID, - "tableId": self.DESTINATION_TABLE, - } - query_config["schemaUpdateOptions"] = [SchemaUpdateOption.ALLOW_FIELD_ADDITION] - klass = self._get_target_class() - job = klass.from_api_repr(RESOURCE, client=client) - self.assertIs(job._client, client) - self._verifyResourceProperties(job, RESOURCE) - - def test_cancelled(self): - client = _make_client(project=self.PROJECT) - job = self._make_one(self.JOB_ID, self.QUERY, client) - job._properties["status"] = { - "state": "DONE", - "errorResult": {"reason": "stopped"}, - } - - self.assertTrue(job.cancelled()) - - def test_done(self): - client = _make_client(project=self.PROJECT) - resource = self._make_resource(ended=True) - job = self._get_target_class().from_api_repr(resource, client) - job._query_results = google.cloud.bigquery.query._QueryResults.from_api_repr( - {"jobComplete": True, "jobReference": resource["jobReference"]} - ) - self.assertTrue(job.done()) - - def test_done_w_timeout(self): - client = _make_client(project=self.PROJECT) - resource = self._make_resource(ended=False) - job = self._get_target_class().from_api_repr(resource, client) - - with mock.patch.object( - client, "_get_query_results" - ) as fake_get_results, mock.patch.object(job, "reload") as fake_reload: - job.done(timeout=42) - - fake_get_results.assert_called_once() - call_args = fake_get_results.call_args - self.assertEqual(call_args.kwargs.get("timeout"), 42) - - call_args = fake_reload.call_args - self.assertEqual(call_args.kwargs.get("timeout"), 42) - - def test_done_w_timeout_and_longer_internal_api_timeout(self): - client = _make_client(project=self.PROJECT) - resource = self._make_resource(ended=False) - job = self._get_target_class().from_api_repr(resource, client) - job._done_timeout = 8.8 - - with mock.patch.object( - client, "_get_query_results" - ) as fake_get_results, mock.patch.object(job, "reload") as fake_reload: - job.done(timeout=5.5) - - # The expected timeout used is simply the given timeout, as the latter - # is shorter than the job's internal done timeout. - expected_timeout = 5.5 - - fake_get_results.assert_called_once() - call_args = fake_get_results.call_args - self.assertAlmostEqual(call_args.kwargs.get("timeout"), expected_timeout) - - call_args = fake_reload.call_args - self.assertAlmostEqual(call_args.kwargs.get("timeout"), expected_timeout) - - def test_query_plan(self): - from google.cloud._helpers import _RFC3339_MICROS - from google.cloud.bigquery.job import QueryPlanEntry - from google.cloud.bigquery.job import QueryPlanEntryStep - - plan_entries = [ - { - "name": "NAME", - "id": "1234", - "inputStages": ["88", "101"], - "startMs": "1522540800000", - "endMs": "1522540804000", - "parallelInputs": "1000", - "completedParallelInputs": "5", - "waitMsAvg": "33", - "waitMsMax": "400", - "waitRatioAvg": 2.71828, - "waitRatioMax": 3.14159, - "readMsAvg": "45", - "readMsMax": "90", - "readRatioAvg": 1.41421, - "readRatioMax": 1.73205, - "computeMsAvg": "55", - "computeMsMax": "99", - "computeRatioAvg": 0.69315, - "computeRatioMax": 1.09861, - "writeMsAvg": "203", - "writeMsMax": "340", - "writeRatioAvg": 3.32193, - "writeRatioMax": 2.30258, - "recordsRead": "100", - "recordsWritten": "1", - "status": "STATUS", - "shuffleOutputBytes": "1024", - "shuffleOutputBytesSpilled": "1", - "steps": [{"kind": "KIND", "substeps": ["SUBSTEP1", "SUBSTEP2"]}], - } - ] - client = _make_client(project=self.PROJECT) - job = self._make_one(self.JOB_ID, self.QUERY, client) - self.assertEqual(job.query_plan, []) - - statistics = job._properties["statistics"] = {} - self.assertEqual(job.query_plan, []) - - query_stats = statistics["query"] = {} - self.assertEqual(job.query_plan, []) - - query_stats["queryPlan"] = plan_entries - - self.assertEqual(len(job.query_plan), len(plan_entries)) - for found, expected in zip(job.query_plan, plan_entries): - self.assertIsInstance(found, QueryPlanEntry) - self.assertEqual(found.name, expected["name"]) - self.assertEqual(found.entry_id, expected["id"]) - self.assertEqual(len(found.input_stages), len(expected["inputStages"])) - for f_id in found.input_stages: - self.assertIn(f_id, [int(e) for e in expected["inputStages"]]) - self.assertEqual( - found.start.strftime(_RFC3339_MICROS), "2018-04-01T00:00:00.000000Z" - ) - self.assertEqual( - found.end.strftime(_RFC3339_MICROS), "2018-04-01T00:00:04.000000Z" - ) - self.assertEqual(found.parallel_inputs, int(expected["parallelInputs"])) - self.assertEqual( - found.completed_parallel_inputs, - int(expected["completedParallelInputs"]), - ) - self.assertEqual(found.wait_ms_avg, int(expected["waitMsAvg"])) - self.assertEqual(found.wait_ms_max, int(expected["waitMsMax"])) - self.assertEqual(found.wait_ratio_avg, expected["waitRatioAvg"]) - self.assertEqual(found.wait_ratio_max, expected["waitRatioMax"]) - self.assertEqual(found.read_ms_avg, int(expected["readMsAvg"])) - self.assertEqual(found.read_ms_max, int(expected["readMsMax"])) - self.assertEqual(found.read_ratio_avg, expected["readRatioAvg"]) - self.assertEqual(found.read_ratio_max, expected["readRatioMax"]) - self.assertEqual(found.compute_ms_avg, int(expected["computeMsAvg"])) - self.assertEqual(found.compute_ms_max, int(expected["computeMsMax"])) - self.assertEqual(found.compute_ratio_avg, expected["computeRatioAvg"]) - self.assertEqual(found.compute_ratio_max, expected["computeRatioMax"]) - self.assertEqual(found.write_ms_avg, int(expected["writeMsAvg"])) - self.assertEqual(found.write_ms_max, int(expected["writeMsMax"])) - self.assertEqual(found.write_ratio_avg, expected["writeRatioAvg"]) - self.assertEqual(found.write_ratio_max, expected["writeRatioMax"]) - self.assertEqual(found.records_read, int(expected["recordsRead"])) - self.assertEqual(found.records_written, int(expected["recordsWritten"])) - self.assertEqual(found.status, expected["status"]) - self.assertEqual( - found.shuffle_output_bytes, int(expected["shuffleOutputBytes"]) - ) - self.assertEqual( - found.shuffle_output_bytes_spilled, - int(expected["shuffleOutputBytesSpilled"]), - ) - - self.assertEqual(len(found.steps), len(expected["steps"])) - for f_step, e_step in zip(found.steps, expected["steps"]): - self.assertIsInstance(f_step, QueryPlanEntryStep) - self.assertEqual(f_step.kind, e_step["kind"]) - self.assertEqual(f_step.substeps, e_step["substeps"]) - - def test_total_bytes_processed(self): - total_bytes = 1234 - client = _make_client(project=self.PROJECT) - job = self._make_one(self.JOB_ID, self.QUERY, client) - self.assertIsNone(job.total_bytes_processed) - - statistics = job._properties["statistics"] = {} - self.assertIsNone(job.total_bytes_processed) - - query_stats = statistics["query"] = {} - self.assertIsNone(job.total_bytes_processed) - - query_stats["totalBytesProcessed"] = str(total_bytes) - self.assertEqual(job.total_bytes_processed, total_bytes) - - def test_total_bytes_billed(self): - total_bytes = 1234 - client = _make_client(project=self.PROJECT) - job = self._make_one(self.JOB_ID, self.QUERY, client) - self.assertIsNone(job.total_bytes_billed) - - statistics = job._properties["statistics"] = {} - self.assertIsNone(job.total_bytes_billed) - - query_stats = statistics["query"] = {} - self.assertIsNone(job.total_bytes_billed) - - query_stats["totalBytesBilled"] = str(total_bytes) - self.assertEqual(job.total_bytes_billed, total_bytes) - - def test_billing_tier(self): - billing_tier = 1 - client = _make_client(project=self.PROJECT) - job = self._make_one(self.JOB_ID, self.QUERY, client) - self.assertIsNone(job.billing_tier) - - statistics = job._properties["statistics"] = {} - self.assertIsNone(job.billing_tier) - - query_stats = statistics["query"] = {} - self.assertIsNone(job.billing_tier) - - query_stats["billingTier"] = billing_tier - self.assertEqual(job.billing_tier, billing_tier) - - def test_cache_hit(self): - client = _make_client(project=self.PROJECT) - job = self._make_one(self.JOB_ID, self.QUERY, client) - self.assertIsNone(job.cache_hit) - - statistics = job._properties["statistics"] = {} - self.assertIsNone(job.cache_hit) - - query_stats = statistics["query"] = {} - self.assertIsNone(job.cache_hit) - - query_stats["cacheHit"] = True - self.assertTrue(job.cache_hit) - - def test_ddl_operation_performed(self): - op = "SKIP" - client = _make_client(project=self.PROJECT) - job = self._make_one(self.JOB_ID, self.QUERY, client) - self.assertIsNone(job.ddl_operation_performed) - - statistics = job._properties["statistics"] = {} - self.assertIsNone(job.ddl_operation_performed) - - query_stats = statistics["query"] = {} - self.assertIsNone(job.ddl_operation_performed) - - query_stats["ddlOperationPerformed"] = op - self.assertEqual(job.ddl_operation_performed, op) - - def test_ddl_target_routine(self): - from google.cloud.bigquery.routine import RoutineReference - - ref_routine = { - "projectId": self.PROJECT, - "datasetId": "ddl_ds", - "routineId": "targetroutine", - } - client = _make_client(project=self.PROJECT) - job = self._make_one(self.JOB_ID, self.QUERY, client) - self.assertIsNone(job.ddl_target_routine) - - statistics = job._properties["statistics"] = {} - self.assertIsNone(job.ddl_target_routine) - - query_stats = statistics["query"] = {} - self.assertIsNone(job.ddl_target_routine) - - query_stats["ddlTargetRoutine"] = ref_routine - self.assertIsInstance(job.ddl_target_routine, RoutineReference) - self.assertEqual(job.ddl_target_routine.routine_id, "targetroutine") - self.assertEqual(job.ddl_target_routine.dataset_id, "ddl_ds") - self.assertEqual(job.ddl_target_routine.project, self.PROJECT) - - def test_ddl_target_table(self): - from google.cloud.bigquery.table import TableReference - - ref_table = { - "projectId": self.PROJECT, - "datasetId": "ddl_ds", - "tableId": "targettable", - } - client = _make_client(project=self.PROJECT) - job = self._make_one(self.JOB_ID, self.QUERY, client) - self.assertIsNone(job.ddl_target_table) - - statistics = job._properties["statistics"] = {} - self.assertIsNone(job.ddl_target_table) - - query_stats = statistics["query"] = {} - self.assertIsNone(job.ddl_target_table) - - query_stats["ddlTargetTable"] = ref_table - self.assertIsInstance(job.ddl_target_table, TableReference) - self.assertEqual(job.ddl_target_table.table_id, "targettable") - self.assertEqual(job.ddl_target_table.dataset_id, "ddl_ds") - self.assertEqual(job.ddl_target_table.project, self.PROJECT) - - def test_num_dml_affected_rows(self): - num_rows = 1234 - client = _make_client(project=self.PROJECT) - job = self._make_one(self.JOB_ID, self.QUERY, client) - self.assertIsNone(job.num_dml_affected_rows) - - statistics = job._properties["statistics"] = {} - self.assertIsNone(job.num_dml_affected_rows) - - query_stats = statistics["query"] = {} - self.assertIsNone(job.num_dml_affected_rows) - - query_stats["numDmlAffectedRows"] = str(num_rows) - self.assertEqual(job.num_dml_affected_rows, num_rows) - - def test_slot_millis(self): - millis = 1234 - client = _make_client(project=self.PROJECT) - job = self._make_one(self.JOB_ID, self.QUERY, client) - self.assertIsNone(job.slot_millis) - - statistics = job._properties["statistics"] = {} - self.assertIsNone(job.slot_millis) - - query_stats = statistics["query"] = {} - self.assertIsNone(job.slot_millis) - - query_stats["totalSlotMs"] = millis - self.assertEqual(job.slot_millis, millis) - - def test_statement_type(self): - statement_type = "SELECT" - client = _make_client(project=self.PROJECT) - job = self._make_one(self.JOB_ID, self.QUERY, client) - self.assertIsNone(job.statement_type) - - statistics = job._properties["statistics"] = {} - self.assertIsNone(job.statement_type) - - query_stats = statistics["query"] = {} - self.assertIsNone(job.statement_type) - - query_stats["statementType"] = statement_type - self.assertEqual(job.statement_type, statement_type) - - def test_referenced_tables(self): - from google.cloud.bigquery.table import TableReference - - ref_tables_resource = [ - {"projectId": self.PROJECT, "datasetId": "dataset", "tableId": "local1"}, - {"projectId": self.PROJECT, "datasetId": "dataset", "tableId": "local2"}, - { - "projectId": "other-project-123", - "datasetId": "other-dataset", - "tableId": "other-table", - }, - ] - client = _make_client(project=self.PROJECT) - job = self._make_one(self.JOB_ID, self.QUERY, client) - self.assertEqual(job.referenced_tables, []) - - statistics = job._properties["statistics"] = {} - self.assertEqual(job.referenced_tables, []) - - query_stats = statistics["query"] = {} - self.assertEqual(job.referenced_tables, []) - - query_stats["referencedTables"] = ref_tables_resource - - local1, local2, remote = job.referenced_tables - - self.assertIsInstance(local1, TableReference) - self.assertEqual(local1.table_id, "local1") - self.assertEqual(local1.dataset_id, "dataset") - self.assertEqual(local1.project, self.PROJECT) - - self.assertIsInstance(local2, TableReference) - self.assertEqual(local2.table_id, "local2") - self.assertEqual(local2.dataset_id, "dataset") - self.assertEqual(local2.project, self.PROJECT) - - self.assertIsInstance(remote, TableReference) - self.assertEqual(remote.table_id, "other-table") - self.assertEqual(remote.dataset_id, "other-dataset") - self.assertEqual(remote.project, "other-project-123") - - def test_timeline(self): - timeline_resource = [ - { - "elapsedMs": 1, - "activeUnits": 22, - "pendingUnits": 33, - "completedUnits": 44, - "totalSlotMs": 101, - } - ] - - client = _make_client(project=self.PROJECT) - job = self._make_one(self.JOB_ID, self.QUERY, client) - self.assertEqual(job.timeline, []) - - statistics = job._properties["statistics"] = {} - self.assertEqual(job.timeline, []) - - query_stats = statistics["query"] = {} - self.assertEqual(job.timeline, []) - - query_stats["timeline"] = timeline_resource - - self.assertEqual(len(job.timeline), len(timeline_resource)) - self.assertEqual(job.timeline[0].elapsed_ms, 1) - self.assertEqual(job.timeline[0].active_units, 22) - self.assertEqual(job.timeline[0].pending_units, 33) - self.assertEqual(job.timeline[0].completed_units, 44) - self.assertEqual(job.timeline[0].slot_millis, 101) - - def test_undeclared_query_parameters(self): - from google.cloud.bigquery.query import ArrayQueryParameter - from google.cloud.bigquery.query import ScalarQueryParameter - from google.cloud.bigquery.query import StructQueryParameter - - undeclared = [ - { - "name": "my_scalar", - "parameterType": {"type": "STRING"}, - "parameterValue": {"value": "value"}, - }, - { - "name": "my_array", - "parameterType": {"type": "ARRAY", "arrayType": {"type": "INT64"}}, - "parameterValue": { - "arrayValues": [{"value": "1066"}, {"value": "1745"}] - }, - }, - { - "name": "my_struct", - "parameterType": { - "type": "STRUCT", - "structTypes": [{"name": "count", "type": {"type": "INT64"}}], - }, - "parameterValue": {"structValues": {"count": {"value": "123"}}}, - }, - ] - client = _make_client(project=self.PROJECT) - job = self._make_one(self.JOB_ID, self.QUERY, client) - self.assertEqual(job.undeclared_query_parameters, []) - - statistics = job._properties["statistics"] = {} - self.assertEqual(job.undeclared_query_parameters, []) - - query_stats = statistics["query"] = {} - self.assertEqual(job.undeclared_query_parameters, []) - - query_stats["undeclaredQueryParameters"] = undeclared - - scalar, array, struct = job.undeclared_query_parameters - - self.assertIsInstance(scalar, ScalarQueryParameter) - self.assertEqual(scalar.name, "my_scalar") - self.assertEqual(scalar.type_, "STRING") - self.assertEqual(scalar.value, "value") - - self.assertIsInstance(array, ArrayQueryParameter) - self.assertEqual(array.name, "my_array") - self.assertEqual(array.array_type, "INT64") - self.assertEqual(array.values, [1066, 1745]) - - self.assertIsInstance(struct, StructQueryParameter) - self.assertEqual(struct.name, "my_struct") - self.assertEqual(struct.struct_types, {"count": "INT64"}) - self.assertEqual(struct.struct_values, {"count": 123}) - - def test_estimated_bytes_processed(self): - est_bytes = 123456 - - client = _make_client(project=self.PROJECT) - job = self._make_one(self.JOB_ID, self.QUERY, client) - self.assertIsNone(job.estimated_bytes_processed) - - statistics = job._properties["statistics"] = {} - self.assertIsNone(job.estimated_bytes_processed) - - query_stats = statistics["query"] = {} - self.assertIsNone(job.estimated_bytes_processed) - - query_stats["estimatedBytesProcessed"] = str(est_bytes) - self.assertEqual(job.estimated_bytes_processed, est_bytes) - - def test_result(self): - from google.cloud.bigquery.table import RowIterator - - query_resource = { - "jobComplete": False, - "jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID}, - } - query_resource_done = { - "jobComplete": True, - "jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID}, - "schema": {"fields": [{"name": "col1", "type": "STRING"}]}, - "totalRows": "2", - } - job_resource = self._make_resource(started=True) - job_resource_done = self._make_resource(started=True, ended=True) - job_resource_done["configuration"]["query"]["destinationTable"] = { - "projectId": "dest-project", - "datasetId": "dest_dataset", - "tableId": "dest_table", - } - tabledata_resource = { - # Explicitly set totalRows to be different from the initial - # response to test update during iteration. - "totalRows": "1", - "pageToken": None, - "rows": [{"f": [{"v": "abc"}]}], - } - conn = _make_connection( - query_resource, query_resource_done, job_resource_done, tabledata_resource - ) - client = _make_client(self.PROJECT, connection=conn) - job = self._get_target_class().from_api_repr(job_resource, client) - - result = job.result() - - self.assertIsInstance(result, RowIterator) - self.assertEqual(result.total_rows, 2) - rows = list(result) - self.assertEqual(len(rows), 1) - self.assertEqual(rows[0].col1, "abc") - # Test that the total_rows property has changed during iteration, based - # on the response from tabledata.list. - self.assertEqual(result.total_rows, 1) - - query_results_call = mock.call( - method="GET", - path=f"/projects/{self.PROJECT}/queries/{self.JOB_ID}", - query_params={"maxResults": 0}, - timeout=None, - ) - reload_call = mock.call( - method="GET", - path=f"/projects/{self.PROJECT}/jobs/{self.JOB_ID}", - query_params={}, - timeout=None, - ) - tabledata_call = mock.call( - method="GET", - path="/projects/dest-project/datasets/dest_dataset/tables/dest_table/data", - query_params={}, - timeout=None, - ) - conn.api_request.assert_has_calls( - [query_results_call, query_results_call, reload_call, tabledata_call] - ) - - def test_result_with_done_job_calls_get_query_results(self): - query_resource_done = { - "jobComplete": True, - "jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID}, - "schema": {"fields": [{"name": "col1", "type": "STRING"}]}, - "totalRows": "1", - } - job_resource = self._make_resource(started=True, ended=True) - job_resource["configuration"]["query"]["destinationTable"] = { - "projectId": "dest-project", - "datasetId": "dest_dataset", - "tableId": "dest_table", - } - tabledata_resource = { - "totalRows": "1", - "pageToken": None, - "rows": [{"f": [{"v": "abc"}]}], - } - conn = _make_connection(query_resource_done, tabledata_resource) - client = _make_client(self.PROJECT, connection=conn) - job = self._get_target_class().from_api_repr(job_resource, client) - - result = job.result() - - rows = list(result) - self.assertEqual(len(rows), 1) - self.assertEqual(rows[0].col1, "abc") - - query_results_call = mock.call( - method="GET", - path=f"/projects/{self.PROJECT}/queries/{self.JOB_ID}", - query_params={"maxResults": 0}, - timeout=None, - ) - tabledata_call = mock.call( - method="GET", - path="/projects/dest-project/datasets/dest_dataset/tables/dest_table/data", - query_params={}, - timeout=None, - ) - conn.api_request.assert_has_calls([query_results_call, tabledata_call]) - - def test_result_with_max_results(self): - from google.cloud.bigquery.table import RowIterator - - query_resource = { - "jobComplete": True, - "jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID}, - "schema": {"fields": [{"name": "col1", "type": "STRING"}]}, - "totalRows": "5", - } - tabledata_resource = { - "totalRows": "5", - "pageToken": None, - "rows": [ - {"f": [{"v": "abc"}]}, - {"f": [{"v": "def"}]}, - {"f": [{"v": "ghi"}]}, - ], - } - connection = _make_connection(query_resource, tabledata_resource) - client = _make_client(self.PROJECT, connection=connection) - resource = self._make_resource(ended=True) - job = self._get_target_class().from_api_repr(resource, client) - - max_results = 3 - - result = job.result(max_results=max_results) - - self.assertIsInstance(result, RowIterator) - self.assertEqual(result.total_rows, 5) - - rows = list(result) - - self.assertEqual(len(rows), 3) - self.assertEqual(len(connection.api_request.call_args_list), 2) - tabledata_list_request = connection.api_request.call_args_list[1] - self.assertEqual( - tabledata_list_request[1]["query_params"]["maxResults"], max_results - ) - - def test_result_w_retry(self): - from google.cloud.bigquery.table import RowIterator - - query_resource = { - "jobComplete": False, - "jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID}, - } - query_resource_done = { - "jobComplete": True, - "jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID}, - "schema": {"fields": [{"name": "col1", "type": "STRING"}]}, - "totalRows": "2", - } - job_resource = self._make_resource(started=True) - job_resource_done = self._make_resource(started=True, ended=True) - job_resource_done["configuration"]["query"]["destinationTable"] = { - "projectId": "dest-project", - "datasetId": "dest_dataset", - "tableId": "dest_table", - } - - connection = _make_connection( - exceptions.NotFound("not normally retriable"), - query_resource, - exceptions.NotFound("not normally retriable"), - query_resource_done, - exceptions.NotFound("not normally retriable"), - job_resource_done, - ) - client = _make_client(self.PROJECT, connection=connection) - job = self._get_target_class().from_api_repr(job_resource, client) - - custom_predicate = mock.Mock() - custom_predicate.return_value = True - custom_retry = google.api_core.retry.Retry(predicate=custom_predicate) - - self.assertIsInstance(job.result(retry=custom_retry), RowIterator) - query_results_call = mock.call( - method="GET", - path=f"/projects/{self.PROJECT}/queries/{self.JOB_ID}", - query_params={"maxResults": 0}, - timeout=None, - ) - reload_call = mock.call( - method="GET", - path=f"/projects/{self.PROJECT}/jobs/{self.JOB_ID}", - query_params={}, - timeout=None, - ) - - connection.api_request.assert_has_calls( - [query_results_call, query_results_call, reload_call] - ) - - def test_result_w_empty_schema(self): - from google.cloud.bigquery.table import _EmptyRowIterator - - # Destination table may have no schema for some DDL and DML queries. - query_resource = { - "jobComplete": True, - "jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID}, - "schema": {"fields": []}, - } - connection = _make_connection(query_resource, query_resource) - client = _make_client(self.PROJECT, connection=connection) - resource = self._make_resource(ended=True) - job = self._get_target_class().from_api_repr(resource, client) - - result = job.result() - - self.assertIsInstance(result, _EmptyRowIterator) - self.assertEqual(list(result), []) - - def test_result_invokes_begins(self): - begun_resource = self._make_resource() - incomplete_resource = { - "jobComplete": False, - "jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID}, - "schema": {"fields": [{"name": "col1", "type": "STRING"}]}, - } - query_resource = copy.deepcopy(incomplete_resource) - query_resource["jobComplete"] = True - done_resource = copy.deepcopy(begun_resource) - done_resource["status"] = {"state": "DONE"} - connection = _make_connection( - begun_resource, - incomplete_resource, - query_resource, - done_resource, - query_resource, - ) - client = _make_client(project=self.PROJECT, connection=connection) - job = self._make_one(self.JOB_ID, self.QUERY, client) - - job.result() - - self.assertEqual(len(connection.api_request.call_args_list), 4) - begin_request = connection.api_request.call_args_list[0] - query_request = connection.api_request.call_args_list[2] - reload_request = connection.api_request.call_args_list[3] - self.assertEqual(begin_request[1]["method"], "POST") - self.assertEqual(query_request[1]["method"], "GET") - self.assertEqual(reload_request[1]["method"], "GET") - - def test_result_w_timeout(self): - begun_resource = self._make_resource() - query_resource = { - "jobComplete": True, - "jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID}, - "schema": {"fields": [{"name": "col1", "type": "STRING"}]}, - } - done_resource = copy.deepcopy(begun_resource) - done_resource["status"] = {"state": "DONE"} - connection = _make_connection(begun_resource, query_resource, done_resource) - client = _make_client(project=self.PROJECT, connection=connection) - job = self._make_one(self.JOB_ID, self.QUERY, client) - - with freezegun.freeze_time("1970-01-01 00:00:00", tick=False): - job.result(timeout=1.0) - - self.assertEqual(len(connection.api_request.call_args_list), 3) - begin_request = connection.api_request.call_args_list[0] - query_request = connection.api_request.call_args_list[1] - reload_request = connection.api_request.call_args_list[2] - self.assertEqual(begin_request[1]["method"], "POST") - self.assertEqual(query_request[1]["method"], "GET") - self.assertEqual( - query_request[1]["path"], - "/projects/{}/queries/{}".format(self.PROJECT, self.JOB_ID), - ) - self.assertEqual(query_request[1]["query_params"]["timeoutMs"], 900) - self.assertEqual(reload_request[1]["method"], "GET") - - def test_result_w_page_size(self): - # Arrange - query_results_resource = { - "jobComplete": True, - "jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID}, - "schema": {"fields": [{"name": "col1", "type": "STRING"}]}, - "totalRows": "4", - } - job_resource = self._make_resource(started=True, ended=True) - q_config = job_resource["configuration"]["query"] - q_config["destinationTable"] = { - "projectId": self.PROJECT, - "datasetId": self.DS_ID, - "tableId": self.TABLE_ID, - } - tabledata_resource = { - "totalRows": 4, - "pageToken": "some-page-token", - "rows": [ - {"f": [{"v": "row1"}]}, - {"f": [{"v": "row2"}]}, - {"f": [{"v": "row3"}]}, - ], - } - tabledata_resource_page_2 = {"totalRows": 4, "rows": [{"f": [{"v": "row4"}]}]} - conn = _make_connection( - query_results_resource, tabledata_resource, tabledata_resource_page_2 - ) - client = _make_client(self.PROJECT, connection=conn) - job = self._get_target_class().from_api_repr(job_resource, client) - - # Act - result = job.result(page_size=3) - - # Assert - actual_rows = list(result) - self.assertEqual(len(actual_rows), 4) - - tabledata_path = "/projects/%s/datasets/%s/tables/%s/data" % ( - self.PROJECT, - self.DS_ID, - self.TABLE_ID, - ) - conn.api_request.assert_has_calls( - [ - mock.call( - method="GET", - path=tabledata_path, - query_params={"maxResults": 3}, - timeout=None, - ), - mock.call( - method="GET", - path=tabledata_path, - query_params={"pageToken": "some-page-token", "maxResults": 3}, - timeout=None, - ), - ] - ) - - def test_result_with_start_index(self): - from google.cloud.bigquery.table import RowIterator - - query_resource = { - "jobComplete": True, - "jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID}, - "schema": {"fields": [{"name": "col1", "type": "STRING"}]}, - "totalRows": "5", - } - tabledata_resource = { - "totalRows": "5", - "pageToken": None, - "rows": [ - {"f": [{"v": "abc"}]}, - {"f": [{"v": "def"}]}, - {"f": [{"v": "ghi"}]}, - {"f": [{"v": "jkl"}]}, - ], - } - connection = _make_connection(query_resource, tabledata_resource) - client = _make_client(self.PROJECT, connection=connection) - resource = self._make_resource(ended=True) - job = self._get_target_class().from_api_repr(resource, client) - - start_index = 1 - - result = job.result(start_index=start_index) - - self.assertIsInstance(result, RowIterator) - self.assertEqual(result.total_rows, 5) - - rows = list(result) - - self.assertEqual(len(rows), 4) - self.assertEqual(len(connection.api_request.call_args_list), 2) - tabledata_list_request = connection.api_request.call_args_list[1] - self.assertEqual( - tabledata_list_request[1]["query_params"]["startIndex"], start_index - ) - - def test_result_error(self): - from google.cloud import exceptions - - query = textwrap.dedent( - """ - SELECT foo, bar - FROM table_baz - WHERE foo == bar""" - ) - - client = _make_client(project=self.PROJECT) - job = self._make_one(self.JOB_ID, query, client) - error_result = { - "debugInfo": "DEBUG", - "location": "LOCATION", - "message": "MESSAGE", - "reason": "invalid", - } - job._properties["status"] = { - "errorResult": error_result, - "errors": [error_result], - "state": "DONE", - } - job._query_results = google.cloud.bigquery.query._QueryResults.from_api_repr( - {"jobComplete": True, "jobReference": job._properties["jobReference"]} - ) - job._set_future_result() - - with self.assertRaises(exceptions.GoogleCloudError) as exc_info: - job.result() - - self.assertIsInstance(exc_info.exception, exceptions.GoogleCloudError) - self.assertEqual(exc_info.exception.code, http_client.BAD_REQUEST) - - exc_job_instance = getattr(exc_info.exception, "query_job", None) - self.assertIs(exc_job_instance, job) - - full_text = str(exc_info.exception) - assert job.job_id in full_text - assert "Query Job SQL Follows" in full_text - - for i, line in enumerate(query.splitlines(), start=1): - expected_line = "{}:{}".format(i, line) - assert expected_line in full_text - - def test_result_transport_timeout_error(self): - query = textwrap.dedent( - """ - SELECT foo, bar - FROM table_baz - WHERE foo == bar""" - ) - - client = _make_client(project=self.PROJECT) - job = self._make_one(self.JOB_ID, query, client) - call_api_patch = mock.patch( - "google.cloud.bigquery.client.Client._call_api", - autospec=True, - side_effect=requests.exceptions.Timeout("Server response took too long."), - ) - - # Make sure that timeout errors get rebranded to concurrent futures timeout. - with call_api_patch, self.assertRaises(concurrent.futures.TimeoutError): - job.result(timeout=1) - - def test__begin_error(self): - from google.cloud import exceptions - - query = textwrap.dedent( - """ - SELECT foo, bar - FROM table_baz - WHERE foo == bar""" - ) - - client = _make_client(project=self.PROJECT) - job = self._make_one(self.JOB_ID, query, client) - call_api_patch = mock.patch( - "google.cloud.bigquery.client.Client._call_api", - autospec=True, - side_effect=exceptions.BadRequest("Syntax error in SQL query"), - ) - - with call_api_patch, self.assertRaises(exceptions.GoogleCloudError) as exc_info: - job.result() - - self.assertIsInstance(exc_info.exception, exceptions.GoogleCloudError) - self.assertEqual(exc_info.exception.code, http_client.BAD_REQUEST) - - exc_job_instance = getattr(exc_info.exception, "query_job", None) - self.assertIs(exc_job_instance, job) - - full_text = str(exc_info.exception) - assert job.job_id in full_text - assert "Query Job SQL Follows" in full_text - - for i, line in enumerate(query.splitlines(), start=1): - expected_line = "{}:{}".format(i, line) - assert expected_line in full_text - - def test__begin_w_timeout(self): - PATH = "/projects/%s/jobs" % (self.PROJECT,) - RESOURCE = self._make_resource() - - conn = _make_connection(RESOURCE) - client = _make_client(project=self.PROJECT, connection=conn) - job = self._make_one(self.JOB_ID, self.QUERY, client) - with mock.patch( - "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" - ) as final_attributes: - job._begin(timeout=7.5) - - final_attributes.assert_called_with({"path": PATH}, client, job) - - conn.api_request.assert_called_once_with( - method="POST", - path=PATH, - data={ - "jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID}, - "configuration": { - "query": {"query": self.QUERY, "useLegacySql": False} - }, - }, - timeout=7.5, - ) - - def test_begin_w_bound_client(self): - from google.cloud.bigquery.dataset import DatasetReference - from google.cloud.bigquery.job import QueryJobConfig - - PATH = "/projects/%s/jobs" % (self.PROJECT,) - DS_ID = "DATASET" - RESOURCE = self._make_resource() - # Ensure None for missing server-set props - del RESOURCE["statistics"]["creationTime"] - del RESOURCE["etag"] - del RESOURCE["selfLink"] - del RESOURCE["user_email"] - conn = _make_connection(RESOURCE) - client = _make_client(project=self.PROJECT, connection=conn) - - config = QueryJobConfig() - config.default_dataset = DatasetReference(self.PROJECT, DS_ID) - job = self._make_one(self.JOB_ID, self.QUERY, client, job_config=config) - with mock.patch( - "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" - ) as final_attributes: - job._begin() - - final_attributes.assert_called_with({"path": PATH}, client, job) - - self.assertIsNone(job.default_dataset) - self.assertEqual(job.udf_resources, []) - conn.api_request.assert_called_once_with( - method="POST", - path=PATH, - data={ - "jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID}, - "configuration": { - "query": { - "query": self.QUERY, - "useLegacySql": False, - "defaultDataset": { - "projectId": self.PROJECT, - "datasetId": DS_ID, - }, - } - }, - }, - timeout=None, - ) - self._verifyResourceProperties(job, RESOURCE) - - def test_begin_w_alternate_client(self): - from google.cloud.bigquery.dataset import DatasetReference - from google.cloud.bigquery.job import CreateDisposition - from google.cloud.bigquery.job import QueryJobConfig - from google.cloud.bigquery.job import QueryPriority - from google.cloud.bigquery.job import SchemaUpdateOption - from google.cloud.bigquery.job import WriteDisposition - - PATH = "/projects/%s/jobs" % (self.PROJECT,) - TABLE = "TABLE" - DS_ID = "DATASET" - RESOURCE = self._make_resource(ended=True) - QUERY_CONFIGURATION = { - "query": self.QUERY, - "allowLargeResults": True, - "createDisposition": CreateDisposition.CREATE_NEVER, - "defaultDataset": {"projectId": self.PROJECT, "datasetId": DS_ID}, - "destinationTable": { - "projectId": self.PROJECT, - "datasetId": DS_ID, - "tableId": TABLE, - }, - "flattenResults": True, - "priority": QueryPriority.INTERACTIVE, - "useQueryCache": True, - "useLegacySql": True, - "writeDisposition": WriteDisposition.WRITE_TRUNCATE, - "maximumBillingTier": 4, - "maximumBytesBilled": "123456", - "schemaUpdateOptions": [SchemaUpdateOption.ALLOW_FIELD_RELAXATION], - } - RESOURCE["configuration"]["query"] = QUERY_CONFIGURATION - RESOURCE["configuration"]["dryRun"] = True - conn1 = _make_connection() - client1 = _make_client(project=self.PROJECT, connection=conn1) - conn2 = _make_connection(RESOURCE) - client2 = _make_client(project=self.PROJECT, connection=conn2) - dataset_ref = DatasetReference(self.PROJECT, DS_ID) - table_ref = dataset_ref.table(TABLE) - - config = QueryJobConfig() - config.allow_large_results = True - config.create_disposition = CreateDisposition.CREATE_NEVER - config.default_dataset = dataset_ref - config.destination = table_ref - config.dry_run = True - config.flatten_results = True - config.maximum_billing_tier = 4 - config.priority = QueryPriority.INTERACTIVE - config.use_legacy_sql = True - config.use_query_cache = True - config.write_disposition = WriteDisposition.WRITE_TRUNCATE - config.maximum_bytes_billed = 123456 - config.schema_update_options = [SchemaUpdateOption.ALLOW_FIELD_RELAXATION] - job = self._make_one(self.JOB_ID, self.QUERY, client1, job_config=config) - with mock.patch( - "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" - ) as final_attributes: - job._begin(client=client2) - - final_attributes.assert_called_with({"path": PATH}, client2, job) - - conn1.api_request.assert_not_called() - conn2.api_request.assert_called_once_with( - method="POST", - path=PATH, - data={ - "jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID}, - "configuration": {"dryRun": True, "query": QUERY_CONFIGURATION}, - }, - timeout=None, - ) - self._verifyResourceProperties(job, RESOURCE) - - def test_begin_w_udf(self): - from google.cloud.bigquery.job import QueryJobConfig - from google.cloud.bigquery.query import UDFResource - - RESOURCE_URI = "gs://some-bucket/js/lib.js" - INLINE_UDF_CODE = 'var someCode = "here";' - PATH = "/projects/%s/jobs" % (self.PROJECT,) - RESOURCE = self._make_resource() - # Ensure None for missing server-set props - del RESOURCE["statistics"]["creationTime"] - del RESOURCE["etag"] - del RESOURCE["selfLink"] - del RESOURCE["user_email"] - RESOURCE["configuration"]["query"]["userDefinedFunctionResources"] = [ - {"resourceUri": RESOURCE_URI}, - {"inlineCode": INLINE_UDF_CODE}, - ] - conn = _make_connection(RESOURCE) - client = _make_client(project=self.PROJECT, connection=conn) - udf_resources = [ - UDFResource("resourceUri", RESOURCE_URI), - UDFResource("inlineCode", INLINE_UDF_CODE), - ] - config = QueryJobConfig() - config.udf_resources = udf_resources - config.use_legacy_sql = True - job = self._make_one(self.JOB_ID, self.QUERY, client, job_config=config) - with mock.patch( - "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" - ) as final_attributes: - job._begin() - - final_attributes.assert_called_with({"path": PATH}, client, job) - - self.assertEqual(job.udf_resources, udf_resources) - conn.api_request.assert_called_once_with( - method="POST", - path=PATH, - data={ - "jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID}, - "configuration": { - "query": { - "query": self.QUERY, - "useLegacySql": True, - "userDefinedFunctionResources": [ - {"resourceUri": RESOURCE_URI}, - {"inlineCode": INLINE_UDF_CODE}, - ], - } - }, - }, - timeout=None, - ) - self._verifyResourceProperties(job, RESOURCE) - - def test_begin_w_named_query_parameter(self): - from google.cloud.bigquery.job import QueryJobConfig - from google.cloud.bigquery.query import ScalarQueryParameter - - query_parameters = [ScalarQueryParameter("foo", "INT64", 123)] - PATH = "/projects/%s/jobs" % (self.PROJECT,) - RESOURCE = self._make_resource() - # Ensure None for missing server-set props - del RESOURCE["statistics"]["creationTime"] - del RESOURCE["etag"] - del RESOURCE["selfLink"] - del RESOURCE["user_email"] - config = RESOURCE["configuration"]["query"] - config["parameterMode"] = "NAMED" - config["queryParameters"] = [ - { - "name": "foo", - "parameterType": {"type": "INT64"}, - "parameterValue": {"value": "123"}, - } - ] - conn = _make_connection(RESOURCE) - client = _make_client(project=self.PROJECT, connection=conn) - jconfig = QueryJobConfig() - jconfig.query_parameters = query_parameters - job = self._make_one(self.JOB_ID, self.QUERY, client, job_config=jconfig) - with mock.patch( - "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" - ) as final_attributes: - job._begin() - - final_attributes.assert_called_with({"path": PATH}, client, job) - - self.assertEqual(job.query_parameters, query_parameters) - conn.api_request.assert_called_once_with( - method="POST", - path=PATH, - data={ - "jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID}, - "configuration": { - "query": { - "query": self.QUERY, - "useLegacySql": False, - "parameterMode": "NAMED", - "queryParameters": config["queryParameters"], - } - }, - }, - timeout=None, - ) - self._verifyResourceProperties(job, RESOURCE) - - def test_begin_w_positional_query_parameter(self): - from google.cloud.bigquery.job import QueryJobConfig - from google.cloud.bigquery.query import ScalarQueryParameter - - query_parameters = [ScalarQueryParameter.positional("INT64", 123)] - PATH = "/projects/%s/jobs" % (self.PROJECT,) - RESOURCE = self._make_resource() - # Ensure None for missing server-set props - del RESOURCE["statistics"]["creationTime"] - del RESOURCE["etag"] - del RESOURCE["selfLink"] - del RESOURCE["user_email"] - config = RESOURCE["configuration"]["query"] - config["parameterMode"] = "POSITIONAL" - config["queryParameters"] = [ - {"parameterType": {"type": "INT64"}, "parameterValue": {"value": "123"}} - ] - conn = _make_connection(RESOURCE) - client = _make_client(project=self.PROJECT, connection=conn) - jconfig = QueryJobConfig() - jconfig.query_parameters = query_parameters - job = self._make_one(self.JOB_ID, self.QUERY, client, job_config=jconfig) - with mock.patch( - "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" - ) as final_attributes: - job._begin() - - final_attributes.assert_called_with({"path": PATH}, client, job) - - self.assertEqual(job.query_parameters, query_parameters) - conn.api_request.assert_called_once_with( - method="POST", - path=PATH, - data={ - "jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID}, - "configuration": { - "query": { - "query": self.QUERY, - "useLegacySql": False, - "parameterMode": "POSITIONAL", - "queryParameters": config["queryParameters"], - } - }, - }, - timeout=None, - ) - self._verifyResourceProperties(job, RESOURCE) - - def test_begin_w_table_defs(self): - from google.cloud.bigquery.job import QueryJobConfig - from google.cloud.bigquery.external_config import ExternalConfig - from google.cloud.bigquery.external_config import BigtableColumn - from google.cloud.bigquery.external_config import BigtableColumnFamily - - PATH = "/projects/%s/jobs" % (self.PROJECT,) - RESOURCE = self._make_resource() - # Ensure None for missing server-set props - del RESOURCE["statistics"]["creationTime"] - del RESOURCE["etag"] - del RESOURCE["selfLink"] - del RESOURCE["user_email"] - - bt_config = ExternalConfig("BIGTABLE") - bt_config.ignore_unknown_values = True - bt_config.options.read_rowkey_as_string = True - cf = BigtableColumnFamily() - cf.family_id = "cf" - col = BigtableColumn() - col.field_name = "fn" - cf.columns = [col] - bt_config.options.column_families = [cf] - BT_CONFIG_RESOURCE = { - "sourceFormat": "BIGTABLE", - "ignoreUnknownValues": True, - "bigtableOptions": { - "readRowkeyAsString": True, - "columnFamilies": [ - {"familyId": "cf", "columns": [{"fieldName": "fn"}]} - ], - }, - } - CSV_CONFIG_RESOURCE = { - "sourceFormat": "CSV", - "maxBadRecords": 8, - "csvOptions": {"allowJaggedRows": True}, - } - csv_config = ExternalConfig("CSV") - csv_config.max_bad_records = 8 - csv_config.options.allow_jagged_rows = True - bt_table = "bigtable-table" - csv_table = "csv-table" - RESOURCE["configuration"]["query"]["tableDefinitions"] = { - bt_table: BT_CONFIG_RESOURCE, - csv_table: CSV_CONFIG_RESOURCE, - } - want_resource = copy.deepcopy(RESOURCE) - conn = _make_connection(RESOURCE) - client = _make_client(project=self.PROJECT, connection=conn) - config = QueryJobConfig() - config.table_definitions = {bt_table: bt_config, csv_table: csv_config} - config.use_legacy_sql = True - job = self._make_one(self.JOB_ID, self.QUERY, client, job_config=config) - with mock.patch( - "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" - ) as final_attributes: - job._begin() - - final_attributes.assert_called_with({"path": PATH}, client, job) - - conn.api_request.assert_called_once_with( - method="POST", - path=PATH, - data={ - "jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID}, - "configuration": { - "query": { - "query": self.QUERY, - "useLegacySql": True, - "tableDefinitions": { - bt_table: BT_CONFIG_RESOURCE, - csv_table: CSV_CONFIG_RESOURCE, - }, - } - }, - }, - timeout=None, - ) - self._verifyResourceProperties(job, want_resource) - - def test_dry_run_query(self): - from google.cloud.bigquery.job import QueryJobConfig - - PATH = "/projects/%s/jobs" % (self.PROJECT,) - RESOURCE = self._make_resource() - # Ensure None for missing server-set props - del RESOURCE["statistics"]["creationTime"] - del RESOURCE["etag"] - del RESOURCE["selfLink"] - del RESOURCE["user_email"] - RESOURCE["configuration"]["dryRun"] = True - conn = _make_connection(RESOURCE) - client = _make_client(project=self.PROJECT, connection=conn) - config = QueryJobConfig() - config.dry_run = True - job = self._make_one(self.JOB_ID, self.QUERY, client, job_config=config) - with mock.patch( - "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" - ) as final_attributes: - job._begin() - - final_attributes.assert_called_with({"path": PATH}, client, job) - self.assertEqual(job.udf_resources, []) - conn.api_request.assert_called_once_with( - method="POST", - path=PATH, - data={ - "jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID}, - "configuration": { - "query": {"query": self.QUERY, "useLegacySql": False}, - "dryRun": True, - }, - }, - timeout=None, - ) - self._verifyResourceProperties(job, RESOURCE) - - def test_exists_miss_w_bound_client(self): - PATH = "/projects/%s/jobs/%s" % (self.PROJECT, self.JOB_ID) - conn = _make_connection() - client = _make_client(project=self.PROJECT, connection=conn) - job = self._make_one(self.JOB_ID, self.QUERY, client) - with mock.patch( - "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" - ) as final_attributes: - self.assertFalse(job.exists()) - - final_attributes.assert_called_with({"path": PATH}, client, job) - - conn.api_request.assert_called_once_with( - method="GET", path=PATH, query_params={"fields": "id"}, timeout=None - ) - - def test_exists_hit_w_alternate_client(self): - PATH = "/projects/%s/jobs/%s" % (self.PROJECT, self.JOB_ID) - conn1 = _make_connection() - client1 = _make_client(project=self.PROJECT, connection=conn1) - conn2 = _make_connection({}) - client2 = _make_client(project=self.PROJECT, connection=conn2) - job = self._make_one(self.JOB_ID, self.QUERY, client1) - with mock.patch( - "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" - ) as final_attributes: - self.assertTrue(job.exists(client=client2)) - - final_attributes.assert_called_with({"path": PATH}, client2, job) - - conn1.api_request.assert_not_called() - conn2.api_request.assert_called_once_with( - method="GET", path=PATH, query_params={"fields": "id"}, timeout=None - ) - - def test_reload_w_bound_client(self): - from google.cloud.bigquery.dataset import DatasetReference - from google.cloud.bigquery.job import QueryJobConfig - - PATH = "/projects/%s/jobs/%s" % (self.PROJECT, self.JOB_ID) - DS_ID = "DATASET" - DEST_TABLE = "dest_table" - RESOURCE = self._make_resource() - conn = _make_connection(RESOURCE) - client = _make_client(project=self.PROJECT, connection=conn) - dataset_ref = DatasetReference(self.PROJECT, DS_ID) - table_ref = dataset_ref.table(DEST_TABLE) - config = QueryJobConfig() - config.destination = table_ref - job = self._make_one(self.JOB_ID, None, client, job_config=config) - with mock.patch( - "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" - ) as final_attributes: - job.reload() - - final_attributes.assert_called_with({"path": PATH}, client, job) - - self.assertNotEqual(job.destination, table_ref) - - conn.api_request.assert_called_once_with( - method="GET", path=PATH, query_params={}, timeout=None - ) - self._verifyResourceProperties(job, RESOURCE) - - def test_reload_w_alternate_client(self): - PATH = "/projects/%s/jobs/%s" % (self.PROJECT, self.JOB_ID) - DS_ID = "DATASET" - DEST_TABLE = "dest_table" - RESOURCE = self._make_resource() - q_config = RESOURCE["configuration"]["query"] - q_config["destinationTable"] = { - "projectId": self.PROJECT, - "datasetId": DS_ID, - "tableId": DEST_TABLE, - } - conn1 = _make_connection() - client1 = _make_client(project=self.PROJECT, connection=conn1) - conn2 = _make_connection(RESOURCE) - client2 = _make_client(project=self.PROJECT, connection=conn2) - job = self._make_one(self.JOB_ID, self.QUERY, client1) - with mock.patch( - "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" - ) as final_attributes: - job.reload(client=client2) - - final_attributes.assert_called_with({"path": PATH}, client2, job) - - conn1.api_request.assert_not_called() - conn2.api_request.assert_called_once_with( - method="GET", path=PATH, query_params={}, timeout=None - ) - self._verifyResourceProperties(job, RESOURCE) - - def test_reload_w_timeout(self): - from google.cloud.bigquery.dataset import DatasetReference - from google.cloud.bigquery.job import QueryJobConfig - - PATH = "/projects/%s/jobs/%s" % (self.PROJECT, self.JOB_ID) - DS_ID = "DATASET" - DEST_TABLE = "dest_table" - RESOURCE = self._make_resource() - conn = _make_connection(RESOURCE) - client = _make_client(project=self.PROJECT, connection=conn) - dataset_ref = DatasetReference(self.PROJECT, DS_ID) - table_ref = dataset_ref.table(DEST_TABLE) - config = QueryJobConfig() - config.destination = table_ref - job = self._make_one(self.JOB_ID, None, client, job_config=config) - with mock.patch( - "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" - ) as final_attributes: - job.reload(timeout=4.2) - - final_attributes.assert_called_with({"path": PATH}, client, job) - - self.assertNotEqual(job.destination, table_ref) - - conn.api_request.assert_called_once_with( - method="GET", path=PATH, query_params={}, timeout=4.2 - ) - - @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") - def test_to_arrow(self): - begun_resource = self._make_resource() - query_resource = { - "jobComplete": True, - "jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID}, - "totalRows": "4", - "schema": { - "fields": [ - { - "name": "spouse_1", - "type": "RECORD", - "fields": [ - {"name": "name", "type": "STRING", "mode": "NULLABLE"}, - {"name": "age", "type": "INTEGER", "mode": "NULLABLE"}, - ], - }, - { - "name": "spouse_2", - "type": "RECORD", - "fields": [ - {"name": "name", "type": "STRING", "mode": "NULLABLE"}, - {"name": "age", "type": "INTEGER", "mode": "NULLABLE"}, - ], - }, - ] - }, - } - tabledata_resource = { - "rows": [ - { - "f": [ - {"v": {"f": [{"v": "Phred Phlyntstone"}, {"v": "32"}]}}, - {"v": {"f": [{"v": "Wylma Phlyntstone"}, {"v": "29"}]}}, - ] - }, - { - "f": [ - {"v": {"f": [{"v": "Bhettye Rhubble"}, {"v": "27"}]}}, - {"v": {"f": [{"v": "Bharney Rhubble"}, {"v": "33"}]}}, - ] - }, - ] - } - done_resource = copy.deepcopy(begun_resource) - done_resource["status"] = {"state": "DONE"} - connection = _make_connection( - begun_resource, query_resource, done_resource, tabledata_resource - ) - client = _make_client(project=self.PROJECT, connection=connection) - job = self._make_one(self.JOB_ID, self.QUERY, client) - - tbl = job.to_arrow(create_bqstorage_client=False) - - self.assertIsInstance(tbl, pyarrow.Table) - self.assertEqual(tbl.num_rows, 2) - - # Check the schema. - self.assertEqual(tbl.schema[0].name, "spouse_1") - self.assertEqual(tbl.schema[0].type[0].name, "name") - self.assertEqual(tbl.schema[0].type[1].name, "age") - self.assertTrue(pyarrow.types.is_struct(tbl.schema[0].type)) - self.assertTrue(pyarrow.types.is_string(tbl.schema[0].type[0].type)) - self.assertTrue(pyarrow.types.is_int64(tbl.schema[0].type[1].type)) - self.assertEqual(tbl.schema[1].name, "spouse_2") - self.assertEqual(tbl.schema[1].type[0].name, "name") - self.assertEqual(tbl.schema[1].type[1].name, "age") - self.assertTrue(pyarrow.types.is_struct(tbl.schema[1].type)) - self.assertTrue(pyarrow.types.is_string(tbl.schema[1].type[0].type)) - self.assertTrue(pyarrow.types.is_int64(tbl.schema[1].type[1].type)) - - # Check the data. - tbl_data = tbl.to_pydict() - spouse_1 = tbl_data["spouse_1"] - self.assertEqual( - spouse_1, - [ - {"name": "Phred Phlyntstone", "age": 32}, - {"name": "Bhettye Rhubble", "age": 27}, - ], - ) - spouse_2 = tbl_data["spouse_2"] - self.assertEqual( - spouse_2, - [ - {"name": "Wylma Phlyntstone", "age": 29}, - {"name": "Bharney Rhubble", "age": 33}, - ], - ) - - @unittest.skipIf(pandas is None, "Requires `pandas`") - def test_to_dataframe(self): - begun_resource = self._make_resource() - query_resource = { - "jobComplete": True, - "jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID}, - "totalRows": "4", - "schema": { - "fields": [ - {"name": "name", "type": "STRING", "mode": "NULLABLE"}, - {"name": "age", "type": "INTEGER", "mode": "NULLABLE"}, - ] - }, - } - tabledata_resource = { - "rows": [ - {"f": [{"v": "Phred Phlyntstone"}, {"v": "32"}]}, - {"f": [{"v": "Bharney Rhubble"}, {"v": "33"}]}, - {"f": [{"v": "Wylma Phlyntstone"}, {"v": "29"}]}, - {"f": [{"v": "Bhettye Rhubble"}, {"v": "27"}]}, - ] - } - done_resource = copy.deepcopy(begun_resource) - done_resource["status"] = {"state": "DONE"} - connection = _make_connection( - begun_resource, query_resource, done_resource, tabledata_resource - ) - client = _make_client(project=self.PROJECT, connection=connection) - job = self._make_one(self.JOB_ID, self.QUERY, client) - - df = job.to_dataframe(create_bqstorage_client=False) - - self.assertIsInstance(df, pandas.DataFrame) - self.assertEqual(len(df), 4) # verify the number of rows - self.assertEqual(list(df), ["name", "age"]) # verify the column names - - @unittest.skipIf(pandas is None, "Requires `pandas`") - def test_to_dataframe_ddl_query(self): - # Destination table may have no schema for some DDL and DML queries. - query_resource = { - "jobComplete": True, - "jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID}, - "schema": {"fields": []}, - } - connection = _make_connection(query_resource) - client = _make_client(self.PROJECT, connection=connection) - resource = self._make_resource(ended=True) - job = self._get_target_class().from_api_repr(resource, client) - - df = job.to_dataframe() - - self.assertEqual(len(df), 0) - - @unittest.skipIf(pandas is None, "Requires `pandas`") - @unittest.skipIf( - bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" - ) - def test_to_dataframe_bqstorage(self): - query_resource = { - "jobComplete": True, - "jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID}, - "totalRows": "4", - "schema": { - "fields": [ - {"name": "name", "type": "STRING", "mode": "NULLABLE"}, - {"name": "age", "type": "INTEGER", "mode": "NULLABLE"}, - ] - }, - } - connection = _make_connection(query_resource) - client = _make_client(self.PROJECT, connection=connection) - resource = self._make_resource(ended=True) - job = self._get_target_class().from_api_repr(resource, client) - bqstorage_client = mock.create_autospec(bigquery_storage.BigQueryReadClient) - session = bigquery_storage.types.ReadSession() - session.avro_schema.schema = json.dumps( - { - "type": "record", - "name": "__root__", - "fields": [ - {"name": "name", "type": ["null", "string"]}, - {"name": "age", "type": ["null", "long"]}, - ], - } - ) - bqstorage_client.create_read_session.return_value = session - - job.to_dataframe(bqstorage_client=bqstorage_client) - - destination_table = "projects/{projectId}/datasets/{datasetId}/tables/{tableId}".format( - **resource["configuration"]["query"]["destinationTable"] - ) - expected_session = bigquery_storage.types.ReadSession( - table=destination_table, - data_format=bigquery_storage.types.DataFormat.ARROW, - ) - bqstorage_client.create_read_session.assert_called_once_with( - parent="projects/{}".format(self.PROJECT), - read_session=expected_session, - max_stream_count=0, # Use default number of streams for best performance. - ) - - @unittest.skipIf(pandas is None, "Requires `pandas`") - def test_to_dataframe_column_dtypes(self): - begun_resource = self._make_resource() - query_resource = { - "jobComplete": True, - "jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID}, - "totalRows": "4", - "schema": { - "fields": [ - {"name": "start_timestamp", "type": "TIMESTAMP"}, - {"name": "seconds", "type": "INT64"}, - {"name": "miles", "type": "FLOAT64"}, - {"name": "km", "type": "FLOAT64"}, - {"name": "payment_type", "type": "STRING"}, - {"name": "complete", "type": "BOOL"}, - {"name": "date", "type": "DATE"}, - ] - }, - } - row_data = [ - [ - "1.4338368E9", - "420", - "1.1", - "1.77", - "Cto_dataframeash", - "true", - "1999-12-01", - ], - ["1.3878117E9", "2580", "17.7", "28.5", "Cash", "false", "1953-06-14"], - ["1.3855653E9", "2280", "4.4", "7.1", "Credit", "true", "1981-11-04"], - ] - rows = [{"f": [{"v": field} for field in row]} for row in row_data] - query_resource["rows"] = rows - done_resource = copy.deepcopy(begun_resource) - done_resource["status"] = {"state": "DONE"} - connection = _make_connection( - begun_resource, query_resource, done_resource, query_resource - ) - client = _make_client(project=self.PROJECT, connection=connection) - job = self._make_one(self.JOB_ID, self.QUERY, client) - - df = job.to_dataframe(dtypes={"km": "float16"}, create_bqstorage_client=False) - - self.assertIsInstance(df, pandas.DataFrame) - self.assertEqual(len(df), 3) # verify the number of rows - exp_columns = [field["name"] for field in query_resource["schema"]["fields"]] - self.assertEqual(list(df), exp_columns) # verify the column names - - self.assertEqual(df.start_timestamp.dtype.name, "datetime64[ns, UTC]") - self.assertEqual(df.seconds.dtype.name, "int64") - self.assertEqual(df.miles.dtype.name, "float64") - self.assertEqual(df.km.dtype.name, "float16") - self.assertEqual(df.payment_type.dtype.name, "object") - self.assertEqual(df.complete.dtype.name, "bool") - self.assertEqual(df.date.dtype.name, "object") - - @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") - @unittest.skipIf(pandas is None, "Requires `pandas`") - def test_to_dataframe_column_date_dtypes(self): - begun_resource = self._make_resource() - query_resource = { - "jobComplete": True, - "jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID}, - "totalRows": "1", - "schema": {"fields": [{"name": "date", "type": "DATE"}]}, - } - row_data = [ - ["1999-12-01"], - ] - rows = [{"f": [{"v": field} for field in row]} for row in row_data] - query_resource["rows"] = rows - done_resource = copy.deepcopy(begun_resource) - done_resource["status"] = {"state": "DONE"} - connection = _make_connection( - begun_resource, query_resource, done_resource, query_resource - ) - client = _make_client(project=self.PROJECT, connection=connection) - job = self._make_one(self.JOB_ID, self.QUERY, client) - df = job.to_dataframe(date_as_object=False, create_bqstorage_client=False) - - self.assertIsInstance(df, pandas.DataFrame) - self.assertEqual(len(df), 1) # verify the number of rows - exp_columns = [field["name"] for field in query_resource["schema"]["fields"]] - self.assertEqual(list(df), exp_columns) # verify the column names - - self.assertEqual(df.date.dtype.name, "datetime64[ns]") - - @unittest.skipIf(pandas is None, "Requires `pandas`") - @unittest.skipIf(tqdm is None, "Requires `tqdm`") - @mock.patch("tqdm.tqdm") - def test_to_dataframe_with_progress_bar(self, tqdm_mock): - begun_resource = self._make_resource() - query_resource = { - "jobComplete": True, - "jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID}, - "totalRows": "4", - "schema": { - "fields": [{"name": "name", "type": "STRING", "mode": "NULLABLE"}] - }, - } - done_resource = copy.deepcopy(begun_resource) - done_resource["status"] = {"state": "DONE"} - connection = _make_connection( - begun_resource, - query_resource, - done_resource, - query_resource, - query_resource, - ) - client = _make_client(project=self.PROJECT, connection=connection) - job = self._make_one(self.JOB_ID, self.QUERY, client) - - job.to_dataframe(progress_bar_type=None, create_bqstorage_client=False) - tqdm_mock.assert_not_called() - - job.to_dataframe(progress_bar_type="tqdm", create_bqstorage_client=False) - tqdm_mock.assert_called() - - def test_iter(self): - import types - - begun_resource = self._make_resource() - query_resource = { - "jobComplete": True, - "jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID}, - "totalRows": "0", - "schema": {"fields": [{"name": "col1", "type": "STRING"}]}, - } - done_resource = copy.deepcopy(begun_resource) - done_resource["status"] = {"state": "DONE"} - connection = _make_connection(begun_resource, query_resource, done_resource) - client = _make_client(project=self.PROJECT, connection=connection) - job = self._make_one(self.JOB_ID, self.QUERY, client) - - self.assertIsInstance(iter(job), types.GeneratorType) - - -class TestQueryPlanEntryStep(unittest.TestCase, _Base): - KIND = "KIND" - SUBSTEPS = ("SUB1", "SUB2") - - @staticmethod - def _get_target_class(): - from google.cloud.bigquery.job import QueryPlanEntryStep - - return QueryPlanEntryStep - - def _make_one(self, *args, **kw): - return self._get_target_class()(*args, **kw) - - def test_ctor(self): - step = self._make_one(self.KIND, self.SUBSTEPS) - self.assertEqual(step.kind, self.KIND) - self.assertEqual(step.substeps, list(self.SUBSTEPS)) - - def test_from_api_repr_empty(self): - klass = self._get_target_class() - step = klass.from_api_repr({}) - self.assertIsNone(step.kind) - self.assertEqual(step.substeps, []) - - def test_from_api_repr_normal(self): - resource = {"kind": self.KIND, "substeps": self.SUBSTEPS} - klass = self._get_target_class() - step = klass.from_api_repr(resource) - self.assertEqual(step.kind, self.KIND) - self.assertEqual(step.substeps, list(self.SUBSTEPS)) - - def test___eq___mismatched_type(self): - step = self._make_one(self.KIND, self.SUBSTEPS) - self.assertNotEqual(step, object()) - - def test___eq___mismatch_kind(self): - step = self._make_one(self.KIND, self.SUBSTEPS) - other = self._make_one("OTHER", self.SUBSTEPS) - self.assertNotEqual(step, other) - - def test___eq___mismatch_substeps(self): - step = self._make_one(self.KIND, self.SUBSTEPS) - other = self._make_one(self.KIND, ()) - self.assertNotEqual(step, other) - - def test___eq___hit(self): - step = self._make_one(self.KIND, self.SUBSTEPS) - other = self._make_one(self.KIND, self.SUBSTEPS) - self.assertEqual(step, other) - - def test___eq___wrong_type(self): - step = self._make_one(self.KIND, self.SUBSTEPS) - self.assertFalse(step == "hello") - - -class TestQueryPlanEntry(unittest.TestCase, _Base): - NAME = "NAME" - ENTRY_ID = 1234 - START_MS = 1522540800000 - END_MS = 1522540804000 - INPUT_STAGES = (88, 101) - PARALLEL_INPUTS = 1000 - COMPLETED_PARALLEL_INPUTS = 5 - WAIT_MS_AVG = 33 - WAIT_MS_MAX = 400 - WAIT_RATIO_AVG = 2.71828 - WAIT_RATIO_MAX = 3.14159 - READ_MS_AVG = 45 - READ_MS_MAX = 90 - READ_RATIO_AVG = 1.41421 - READ_RATIO_MAX = 1.73205 - COMPUTE_MS_AVG = 55 - COMPUTE_MS_MAX = 99 - COMPUTE_RATIO_AVG = 0.69315 - COMPUTE_RATIO_MAX = 1.09861 - WRITE_MS_AVG = 203 - WRITE_MS_MAX = 340 - WRITE_RATIO_AVG = 3.32193 - WRITE_RATIO_MAX = 2.30258 - RECORDS_READ = 100 - RECORDS_WRITTEN = 1 - STATUS = "STATUS" - SHUFFLE_OUTPUT_BYTES = 1024 - SHUFFLE_OUTPUT_BYTES_SPILLED = 1 - - START_RFC3339_MICROS = "2018-04-01T00:00:00.000000Z" - END_RFC3339_MICROS = "2018-04-01T00:00:04.000000Z" - - @staticmethod - def _get_target_class(): - from google.cloud.bigquery.job import QueryPlanEntry - - return QueryPlanEntry - - def test_from_api_repr_empty(self): - klass = self._get_target_class() - - entry = klass.from_api_repr({}) - - self.assertIsNone(entry.name) - self.assertIsNone(entry.entry_id) - self.assertEqual(entry.input_stages, []) - self.assertIsNone(entry.start) - self.assertIsNone(entry.end) - self.assertIsNone(entry.parallel_inputs) - self.assertIsNone(entry.completed_parallel_inputs) - self.assertIsNone(entry.wait_ms_avg) - self.assertIsNone(entry.wait_ms_max) - self.assertIsNone(entry.wait_ratio_avg) - self.assertIsNone(entry.wait_ratio_max) - self.assertIsNone(entry.read_ms_avg) - self.assertIsNone(entry.read_ms_max) - self.assertIsNone(entry.read_ratio_avg) - self.assertIsNone(entry.read_ratio_max) - self.assertIsNone(entry.compute_ms_avg) - self.assertIsNone(entry.compute_ms_max) - self.assertIsNone(entry.compute_ratio_avg) - self.assertIsNone(entry.compute_ratio_max) - self.assertIsNone(entry.write_ms_avg) - self.assertIsNone(entry.write_ms_max) - self.assertIsNone(entry.write_ratio_avg) - self.assertIsNone(entry.write_ratio_max) - self.assertIsNone(entry.records_read) - self.assertIsNone(entry.records_written) - self.assertIsNone(entry.status) - self.assertIsNone(entry.shuffle_output_bytes) - self.assertIsNone(entry.shuffle_output_bytes_spilled) - self.assertEqual(entry.steps, []) - - def test_from_api_repr_normal(self): - from google.cloud.bigquery.job import QueryPlanEntryStep - - steps = [ - QueryPlanEntryStep( - kind=TestQueryPlanEntryStep.KIND, - substeps=TestQueryPlanEntryStep.SUBSTEPS, - ) - ] - resource = { - "name": self.NAME, - "id": self.ENTRY_ID, - "inputStages": self.INPUT_STAGES, - "startMs": self.START_MS, - "endMs": self.END_MS, - "waitMsAvg": self.WAIT_MS_AVG, - "waitMsMax": self.WAIT_MS_MAX, - "waitRatioAvg": self.WAIT_RATIO_AVG, - "waitRatioMax": self.WAIT_RATIO_MAX, - "readMsAvg": self.READ_MS_AVG, - "readMsMax": self.READ_MS_MAX, - "readRatioAvg": self.READ_RATIO_AVG, - "readRatioMax": self.READ_RATIO_MAX, - "computeMsAvg": self.COMPUTE_MS_AVG, - "computeMsMax": self.COMPUTE_MS_MAX, - "computeRatioAvg": self.COMPUTE_RATIO_AVG, - "computeRatioMax": self.COMPUTE_RATIO_MAX, - "writeMsAvg": self.WRITE_MS_AVG, - "writeMsMax": self.WRITE_MS_MAX, - "writeRatioAvg": self.WRITE_RATIO_AVG, - "writeRatioMax": self.WRITE_RATIO_MAX, - "recordsRead": self.RECORDS_READ, - "recordsWritten": self.RECORDS_WRITTEN, - "status": self.STATUS, - "shuffleOutputBytes": self.SHUFFLE_OUTPUT_BYTES, - "shuffleOutputBytesSpilled": self.SHUFFLE_OUTPUT_BYTES_SPILLED, - "steps": [ - { - "kind": TestQueryPlanEntryStep.KIND, - "substeps": TestQueryPlanEntryStep.SUBSTEPS, - } - ], - } - klass = self._get_target_class() - - entry = klass.from_api_repr(resource) - self.assertEqual(entry.name, self.NAME) - self.assertEqual(entry.entry_id, self.ENTRY_ID) - self.assertEqual(entry.wait_ratio_avg, self.WAIT_RATIO_AVG) - self.assertEqual(entry.wait_ratio_max, self.WAIT_RATIO_MAX) - self.assertEqual(entry.read_ratio_avg, self.READ_RATIO_AVG) - self.assertEqual(entry.read_ratio_max, self.READ_RATIO_MAX) - self.assertEqual(entry.compute_ratio_avg, self.COMPUTE_RATIO_AVG) - self.assertEqual(entry.compute_ratio_max, self.COMPUTE_RATIO_MAX) - self.assertEqual(entry.write_ratio_avg, self.WRITE_RATIO_AVG) - self.assertEqual(entry.write_ratio_max, self.WRITE_RATIO_MAX) - self.assertEqual(entry.records_read, self.RECORDS_READ) - self.assertEqual(entry.records_written, self.RECORDS_WRITTEN) - self.assertEqual(entry.status, self.STATUS) - self.assertEqual(entry.steps, steps) - - def test_start(self): - from google.cloud._helpers import _RFC3339_MICROS - - klass = self._get_target_class() - - entry = klass.from_api_repr({}) - self.assertEqual(entry.start, None) - - entry._properties["startMs"] = self.START_MS - self.assertEqual( - entry.start.strftime(_RFC3339_MICROS), self.START_RFC3339_MICROS - ) - - def test_end(self): - from google.cloud._helpers import _RFC3339_MICROS - - klass = self._get_target_class() - - entry = klass.from_api_repr({}) - self.assertEqual(entry.end, None) - - entry._properties["endMs"] = self.END_MS - self.assertEqual(entry.end.strftime(_RFC3339_MICROS), self.END_RFC3339_MICROS) - - -class TestScriptStackFrame(unittest.TestCase, _Base): - def _make_one(self, resource): - from google.cloud.bigquery.job import ScriptStackFrame - - return ScriptStackFrame(resource) - - def test_procedure_id(self): - frame = self._make_one({"procedureId": "some-procedure"}) - self.assertEqual(frame.procedure_id, "some-procedure") - del frame._properties["procedureId"] - self.assertIsNone(frame.procedure_id) - - def test_start_line(self): - frame = self._make_one({"startLine": 5}) - self.assertEqual(frame.start_line, 5) - frame._properties["startLine"] = "5" - self.assertEqual(frame.start_line, 5) - - def test_start_column(self): - frame = self._make_one({"startColumn": 29}) - self.assertEqual(frame.start_column, 29) - frame._properties["startColumn"] = "29" - self.assertEqual(frame.start_column, 29) - - def test_end_line(self): - frame = self._make_one({"endLine": 9}) - self.assertEqual(frame.end_line, 9) - frame._properties["endLine"] = "9" - self.assertEqual(frame.end_line, 9) - - def test_end_column(self): - frame = self._make_one({"endColumn": 14}) - self.assertEqual(frame.end_column, 14) - frame._properties["endColumn"] = "14" - self.assertEqual(frame.end_column, 14) - - def test_text(self): - frame = self._make_one({"text": "QUERY TEXT"}) - self.assertEqual(frame.text, "QUERY TEXT") - - -class TestScriptStatistics(unittest.TestCase, _Base): - def _make_one(self, resource): - from google.cloud.bigquery.job import ScriptStatistics - - return ScriptStatistics(resource) - - def test_evalutation_kind(self): - stats = self._make_one({"evaluationKind": "EXPRESSION"}) - self.assertEqual(stats.evaluation_kind, "EXPRESSION") - self.assertEqual(stats.stack_frames, []) - - def test_stack_frames(self): - stats = self._make_one( - { - "stackFrames": [ - { - "procedureId": "some-procedure", - "startLine": 5, - "startColumn": 29, - "endLine": 9, - "endColumn": 14, - "text": "QUERY TEXT", - }, - {}, - ] - } - ) - stack_frames = stats.stack_frames - self.assertEqual(len(stack_frames), 2) - stack_frame = stack_frames[0] - self.assertEqual(stack_frame.procedure_id, "some-procedure") - self.assertEqual(stack_frame.start_line, 5) - self.assertEqual(stack_frame.start_column, 29) - self.assertEqual(stack_frame.end_line, 9) - self.assertEqual(stack_frame.end_column, 14) - self.assertEqual(stack_frame.text, "QUERY TEXT") - stack_frame = stack_frames[1] - self.assertIsNone(stack_frame.procedure_id) - self.assertIsNone(stack_frame.start_line) - self.assertIsNone(stack_frame.start_column) - self.assertIsNone(stack_frame.end_line) - self.assertIsNone(stack_frame.end_column) - self.assertIsNone(stack_frame.text) - - -class TestTimelineEntry(unittest.TestCase, _Base): - ELAPSED_MS = 101 - ACTIVE_UNITS = 50 - PENDING_UNITS = 98 - COMPLETED_UNITS = 520 - SLOT_MILLIS = 12029 - - @staticmethod - def _get_target_class(): - from google.cloud.bigquery.job import TimelineEntry - - return TimelineEntry - - def test_from_api_repr_empty(self): - klass = self._get_target_class() - entry = klass.from_api_repr({}) - self.assertIsNone(entry.elapsed_ms) - self.assertIsNone(entry.active_units) - self.assertIsNone(entry.pending_units) - self.assertIsNone(entry.completed_units) - self.assertIsNone(entry.slot_millis) - - def test_from_api_repr_normal(self): - resource = { - "elapsedMs": self.ELAPSED_MS, - "activeUnits": self.ACTIVE_UNITS, - "pendingUnits": self.PENDING_UNITS, - "completedUnits": self.COMPLETED_UNITS, - "totalSlotMs": self.SLOT_MILLIS, - } - klass = self._get_target_class() - - entry = klass.from_api_repr(resource) - self.assertEqual(entry.elapsed_ms, self.ELAPSED_MS) - self.assertEqual(entry.active_units, self.ACTIVE_UNITS) - self.assertEqual(entry.pending_units, self.PENDING_UNITS) - self.assertEqual(entry.completed_units, self.COMPLETED_UNITS) - self.assertEqual(entry.slot_millis, self.SLOT_MILLIS) - - -@pytest.mark.parametrize( - "query,expected", - ( - (None, False), - ("", False), - ("select name, age from table", False), - ("select name, age from table LIMIT 10;", False), - ("select name, age from table order by other_column;", True), - ("Select name, age From table Order By other_column", True), - ("SELECT name, age FROM table ORDER BY other_column;", True), - ("select name, age from table order\nby other_column", True), - ("Select name, age From table Order\nBy other_column;", True), - ("SELECT name, age FROM table ORDER\nBY other_column", True), - ("SelecT name, age froM table OrdeR \n\t BY other_column;", True), - ), -) -def test__contains_order_by(query, expected): - from google.cloud.bigquery import job as mut - - if expected: - assert mut._contains_order_by(query) - else: - assert not mut._contains_order_by(query) - - -@pytest.mark.skipif(pandas is None, reason="Requires `pandas`") -@pytest.mark.skipif( - bigquery_storage is None, reason="Requires `google-cloud-bigquery-storage`" -) -@pytest.mark.parametrize( - "query", - ( - "select name, age from table order by other_column;", - "Select name, age From table Order By other_column;", - "SELECT name, age FROM table ORDER BY other_column;", - "select name, age from table order\nby other_column;", - "Select name, age From table Order\nBy other_column;", - "SELECT name, age FROM table ORDER\nBY other_column;", - "SelecT name, age froM table OrdeR \n\t BY other_column;", - ), -) -def test_to_dataframe_bqstorage_preserve_order(query): - from google.cloud.bigquery.job import QueryJob as target_class - - job_resource = _make_job_resource( - project_id="test-project", job_type="query", ended=True - ) - job_resource["configuration"]["query"]["query"] = query - job_resource["status"] = {"state": "DONE"} - get_query_results_resource = { - "jobComplete": True, - "jobReference": {"projectId": "test-project", "jobId": "test-job"}, - "schema": { - "fields": [ - {"name": "name", "type": "STRING", "mode": "NULLABLE"}, - {"name": "age", "type": "INTEGER", "mode": "NULLABLE"}, - ] - }, - "totalRows": "4", - } - connection = _make_connection(get_query_results_resource, job_resource) - client = _make_client(connection=connection) - job = target_class.from_api_repr(job_resource, client) - bqstorage_client = mock.create_autospec(bigquery_storage.BigQueryReadClient) - session = bigquery_storage.types.ReadSession() - session.avro_schema.schema = json.dumps( - { - "type": "record", - "name": "__root__", - "fields": [ - {"name": "name", "type": ["null", "string"]}, - {"name": "age", "type": ["null", "long"]}, - ], - } - ) - bqstorage_client.create_read_session.return_value = session - - job.to_dataframe(bqstorage_client=bqstorage_client) - - destination_table = "projects/{projectId}/datasets/{datasetId}/tables/{tableId}".format( - **job_resource["configuration"]["query"]["destinationTable"] - ) - expected_session = bigquery_storage.types.ReadSession( - table=destination_table, data_format=bigquery_storage.types.DataFormat.ARROW, - ) - bqstorage_client.create_read_session.assert_called_once_with( - parent="projects/test-project", - read_session=expected_session, - max_stream_count=1, # Use a single stream to preserve row order. - ) From 03c0e4b0622f44b57563b979a17c4269070f739f Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Wed, 4 Nov 2020 13:42:01 -0600 Subject: [PATCH 0981/2016] perf: use `jobs.getQueryResults` to download result sets (#363) * refactor: break job into multiple modules Original paths are retained for backwards compatibility. * perf: use `jobs.getQueryResults` to download result sets Replaces `tabledata.list` when `RowIterator` is used for query results. This likely also fixes a few edge cases around BigQuery scripting jobs. * revert unnecessary changes to _get_query_results * simplify RowIterator. no need to hack Table object * fix tests for bqstorage warning * populate location --- .../google/cloud/bigquery/_pandas_helpers.py | 16 +-- .../google/cloud/bigquery/client.py | 104 ++++++++++++-- .../google/cloud/bigquery/job/query.py | 14 +- .../google/cloud/bigquery/table.py | 17 ++- .../tests/unit/job/helpers.py | 10 +- .../tests/unit/job/test_base.py | 42 ++++-- .../tests/unit/job/test_query.py | 133 ++++++++++-------- .../tests/unit/test__pandas_helpers.py | 18 +-- .../tests/unit/test_client.py | 12 +- .../tests/unit/test_magics.py | 10 +- .../tests/unit/test_table.py | 11 +- 11 files changed, 256 insertions(+), 131 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/_pandas_helpers.py b/packages/google-cloud-bigquery/google/cloud/bigquery/_pandas_helpers.py index 57c8f95f6f9e..7774ce26bd8d 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/_pandas_helpers.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/_pandas_helpers.py @@ -474,7 +474,7 @@ def dataframe_to_parquet(dataframe, bq_schema, filepath, parquet_compression="SN pyarrow.parquet.write_table(arrow_table, filepath, compression=parquet_compression) -def _tabledata_list_page_to_arrow(page, column_names, arrow_types): +def _row_iterator_page_to_arrow(page, column_names, arrow_types): # Iterate over the page to force the API request to get the page data. try: next(iter(page)) @@ -490,8 +490,8 @@ def _tabledata_list_page_to_arrow(page, column_names, arrow_types): return pyarrow.RecordBatch.from_arrays(arrays, names=column_names) -def download_arrow_tabledata_list(pages, bq_schema): - """Use tabledata.list to construct an iterable of RecordBatches. +def download_arrow_row_iterator(pages, bq_schema): + """Use HTTP JSON RowIterator to construct an iterable of RecordBatches. Args: pages (Iterator[:class:`google.api_core.page_iterator.Page`]): @@ -510,10 +510,10 @@ def download_arrow_tabledata_list(pages, bq_schema): arrow_types = [bq_to_arrow_data_type(field) for field in bq_schema] for page in pages: - yield _tabledata_list_page_to_arrow(page, column_names, arrow_types) + yield _row_iterator_page_to_arrow(page, column_names, arrow_types) -def _tabledata_list_page_to_dataframe(page, column_names, dtypes): +def _row_iterator_page_to_dataframe(page, column_names, dtypes): # Iterate over the page to force the API request to get the page data. try: next(iter(page)) @@ -528,8 +528,8 @@ def _tabledata_list_page_to_dataframe(page, column_names, dtypes): return pandas.DataFrame(columns, columns=column_names) -def download_dataframe_tabledata_list(pages, bq_schema, dtypes): - """Use (slower, but free) tabledata.list to construct a DataFrame. +def download_dataframe_row_iterator(pages, bq_schema, dtypes): + """Use HTTP JSON RowIterator to construct a DataFrame. Args: pages (Iterator[:class:`google.api_core.page_iterator.Page`]): @@ -549,7 +549,7 @@ def download_dataframe_tabledata_list(pages, bq_schema, dtypes): bq_schema = schema._to_schema_fields(bq_schema) column_names = [field.name for field in bq_schema] for page in pages: - yield _tabledata_list_page_to_dataframe(page, column_names, dtypes) + yield _row_iterator_page_to_dataframe(page, column_names, dtypes) def _bqstorage_page_to_arrow(page): diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py index 57df9455e45c..cd1474336ea0 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py @@ -80,18 +80,19 @@ _MAX_MULTIPART_SIZE = 5 * 1024 * 1024 _DEFAULT_NUM_RETRIES = 6 _BASE_UPLOAD_TEMPLATE = ( - u"https://bigquery.googleapis.com/upload/bigquery/v2/projects/" - u"{project}/jobs?uploadType=" + "https://bigquery.googleapis.com/upload/bigquery/v2/projects/" + "{project}/jobs?uploadType=" ) -_MULTIPART_URL_TEMPLATE = _BASE_UPLOAD_TEMPLATE + u"multipart" -_RESUMABLE_URL_TEMPLATE = _BASE_UPLOAD_TEMPLATE + u"resumable" -_GENERIC_CONTENT_TYPE = u"*/*" +_MULTIPART_URL_TEMPLATE = _BASE_UPLOAD_TEMPLATE + "multipart" +_RESUMABLE_URL_TEMPLATE = _BASE_UPLOAD_TEMPLATE + "resumable" +_GENERIC_CONTENT_TYPE = "*/*" _READ_LESS_THAN_SIZE = ( "Size {:d} was specified but the file-like object only had " "{:d} bytes remaining." ) _NEED_TABLE_ARGUMENT = ( "The table argument should be a table ID string, Table, or TableReference" ) +_LIST_ROWS_FROM_QUERY_RESULTS_FIELDS = "jobReference,totalRows,pageToken,rows" class Project(object): @@ -293,7 +294,7 @@ def api_request(*args, **kwargs): span_attributes=span_attributes, *args, timeout=timeout, - **kwargs + **kwargs, ) return page_iterator.HTTPIterator( @@ -371,7 +372,7 @@ def api_request(*args, **kwargs): span_attributes=span_attributes, *args, timeout=timeout, - **kwargs + **kwargs, ) return page_iterator.HTTPIterator( @@ -1129,7 +1130,7 @@ def api_request(*args, **kwargs): span_attributes=span_attributes, *args, timeout=timeout, - **kwargs + **kwargs, ) result = page_iterator.HTTPIterator( @@ -1207,7 +1208,7 @@ def api_request(*args, **kwargs): span_attributes=span_attributes, *args, timeout=timeout, - **kwargs + **kwargs, ) result = page_iterator.HTTPIterator( @@ -1284,7 +1285,7 @@ def api_request(*args, **kwargs): span_attributes=span_attributes, *args, timeout=timeout, - **kwargs + **kwargs, ) result = page_iterator.HTTPIterator( @@ -1510,7 +1511,7 @@ def delete_table( raise def _get_query_results( - self, job_id, retry, project=None, timeout_ms=None, location=None, timeout=None + self, job_id, retry, project=None, timeout_ms=None, location=None, timeout=None, ): """Get the query results object for a query job. @@ -1890,7 +1891,7 @@ def api_request(*args, **kwargs): span_attributes=span_attributes, *args, timeout=timeout, - **kwargs + **kwargs, ) return page_iterator.HTTPIterator( @@ -2374,7 +2375,7 @@ def load_table_from_json( destination = _table_arg_to_table_ref(destination, default_project=self.project) - data_str = u"\n".join(json.dumps(item) for item in json_rows) + data_str = "\n".join(json.dumps(item) for item in json_rows) encoded_str = data_str.encode() data_file = io.BytesIO(encoded_str) return self.load_table_from_file( @@ -3169,6 +3170,83 @@ def list_rows( # Pass in selected_fields separately from schema so that full # tables can be fetched without a column filter. selected_fields=selected_fields, + total_rows=getattr(table, "num_rows", None), + ) + return row_iterator + + def _list_rows_from_query_results( + self, + job_id, + location, + project, + schema, + total_rows=None, + destination=None, + max_results=None, + start_index=None, + page_size=None, + retry=DEFAULT_RETRY, + timeout=None, + ): + """List the rows of a completed query. + See + https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs/getQueryResults + Args: + job_id (str): + ID of a query job. + location (str): Location of the query job. + project (str): + ID of the project where the query job was run. + schema (Sequence[google.cloud.bigquery.schema.SchemaField]): + The fields expected in these query results. Used to convert + from JSON to expected Python types. + total_rows (Optional[int]): + Total number of rows in the query results. + destination (Optional[Union[ \ + google.cloud.bigquery.table.Table, \ + google.cloud.bigquery.table.TableListItem, \ + google.cloud.bigquery.table.TableReference, \ + str, \ + ]]): + Destination table reference. Used to fetch the query results + with the BigQuery Storage API. + max_results (Optional[int]): + Maximum number of rows to return across the whole iterator. + start_index (Optional[int]): + The zero-based index of the starting row to read. + page_size (Optional[int]): + The maximum number of rows in each page of results from this request. + Non-positive values are ignored. Defaults to a sensible value set by the API. + retry (Optional[google.api_core.retry.Retry]): + How to retry the RPC. + timeout (Optional[float]): + The number of seconds to wait for the underlying HTTP transport + before using ``retry``. + If multiple requests are made under the hood, ``timeout`` + applies to each individual request. + Returns: + google.cloud.bigquery.table.RowIterator: + Iterator of row data + :class:`~google.cloud.bigquery.table.Row`-s. + """ + params = { + "fields": _LIST_ROWS_FROM_QUERY_RESULTS_FIELDS, + "location": location, + } + + if start_index is not None: + params["startIndex"] = start_index + + row_iterator = RowIterator( + client=self, + api_request=functools.partial(self._call_api, retry, timeout=timeout), + path=f"/projects/{project}/queries/{job_id}", + schema=schema, + max_results=max_results, + page_size=page_size, + table=destination, + extra_params=params, + total_rows=total_rows, ) return row_iterator diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/job/query.py b/packages/google-cloud-bigquery/google/cloud/bigquery/job/query.py index e25077360da5..1e2002eabc17 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/job/query.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/job/query.py @@ -38,7 +38,6 @@ from google.cloud.bigquery.table import _EmptyRowIterator from google.cloud.bigquery.table import RangePartitioning from google.cloud.bigquery.table import _table_arg_to_table_ref -from google.cloud.bigquery.table import Table from google.cloud.bigquery.table import TableReference from google.cloud.bigquery.table import TimePartitioning @@ -1159,12 +1158,13 @@ def result( if self._query_results.total_rows is None: return _EmptyRowIterator() - schema = self._query_results.schema - dest_table_ref = self.destination - dest_table = Table(dest_table_ref, schema=schema) - dest_table._properties["numRows"] = self._query_results.total_rows - rows = self._client.list_rows( - dest_table, + rows = self._client._list_rows_from_query_results( + self._query_results.job_id, + self.location, + self._query_results.project, + self._query_results.schema, + total_rows=self._query_results.total_rows, + destination=self.destination, page_size=page_size, max_results=max_results, start_index=start_index, diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py index d6d966eee9b0..e46b7e3cd1ea 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py @@ -1306,6 +1306,8 @@ class RowIterator(HTTPIterator): call the BigQuery Storage API to fetch rows. selected_fields (Optional[Sequence[google.cloud.bigquery.schema.SchemaField]]): A subset of columns to select from this table. + total_rows (Optional[int]): + Total number of rows in the table. """ @@ -1321,6 +1323,7 @@ def __init__( extra_params=None, table=None, selected_fields=None, + total_rows=None, ): super(RowIterator, self).__init__( client, @@ -1342,7 +1345,7 @@ def __init__( self._schema = schema self._selected_fields = selected_fields self._table = table - self._total_rows = getattr(table, "num_rows", None) + self._total_rows = total_rows def _get_next_page_response(self): """Requests the next page from the path provided. @@ -1419,7 +1422,7 @@ def _to_arrow_iterable(self, bqstorage_client=None): selected_fields=self._selected_fields, ) tabledata_list_download = functools.partial( - _pandas_helpers.download_arrow_tabledata_list, iter(self.pages), self.schema + _pandas_helpers.download_arrow_row_iterator, iter(self.pages), self.schema ) return self._to_page_iterable( bqstorage_download, @@ -1496,7 +1499,7 @@ def to_arrow( ) and self.max_results is not None: warnings.warn( "Cannot use bqstorage_client if max_results is set, " - "reverting to fetching data with the tabledata.list endpoint.", + "reverting to fetching data with the REST endpoint.", stacklevel=2, ) create_bqstorage_client = False @@ -1582,7 +1585,7 @@ def to_dataframe_iterable(self, bqstorage_client=None, dtypes=None): selected_fields=self._selected_fields, ) tabledata_list_download = functools.partial( - _pandas_helpers.download_dataframe_tabledata_list, + _pandas_helpers.download_dataframe_row_iterator, iter(self.pages), self.schema, dtypes, @@ -1680,7 +1683,7 @@ def to_dataframe( ) and self.max_results is not None: warnings.warn( "Cannot use bqstorage_client if max_results is set, " - "reverting to fetching data with the tabledata.list endpoint.", + "reverting to fetching data with the REST endpoint.", stacklevel=2, ) create_bqstorage_client = False @@ -2167,7 +2170,7 @@ def _item_to_row(iterator, resource): ) -def _tabledata_list_page_columns(schema, response): +def _row_iterator_page_columns(schema, response): """Make a generator of all the columns in a page from tabledata.list. This enables creating a :class:`pandas.DataFrame` and other @@ -2197,7 +2200,7 @@ def _rows_page_start(iterator, page, response): """ # Make a (lazy) copy of the page in column-oriented format for use in data # science packages. - page._columns = _tabledata_list_page_columns(iterator._schema, response) + page._columns = _row_iterator_page_columns(iterator._schema, response) total_rows = response.get("totalRows") if total_rows is not None: diff --git a/packages/google-cloud-bigquery/tests/unit/job/helpers.py b/packages/google-cloud-bigquery/tests/unit/job/helpers.py index f928054f6127..ea071c5acd59 100644 --- a/packages/google-cloud-bigquery/tests/unit/job/helpers.py +++ b/packages/google-cloud-bigquery/tests/unit/job/helpers.py @@ -60,6 +60,7 @@ def _make_job_resource( endpoint="https://bigquery.googleapis.com", job_type="load", job_id="a-random-id", + location="US", project_id="some-project", user_email="bq-user@example.com", ): @@ -69,7 +70,11 @@ def _make_job_resource( "statistics": {"creationTime": creation_time_ms, job_type: {}}, "etag": etag, "id": "{}:{}".format(project_id, job_id), - "jobReference": {"projectId": project_id, "jobId": job_id}, + "jobReference": { + "projectId": project_id, + "jobId": job_id, + "location": location, + }, "selfLink": "{}/bigquery/v2/projects/{}/jobs/{}".format( endpoint, project_id, job_id ), @@ -130,7 +135,7 @@ def _table_ref(self, table_id): return TableReference(self.DS_REF, table_id) - def _make_resource(self, started=False, ended=False): + def _make_resource(self, started=False, ended=False, location="US"): self._setUpConstants() return _make_job_resource( creation_time_ms=int(self.WHEN_TS * 1000), @@ -144,6 +149,7 @@ def _make_resource(self, started=False, ended=False): job_id=self.JOB_ID, project_id=self.PROJECT, user_email=self.USER_EMAIL, + location=location, ) def _verifyInitialReadonlyProperties(self, job): diff --git a/packages/google-cloud-bigquery/tests/unit/job/test_base.py b/packages/google-cloud-bigquery/tests/unit/job/test_base.py index 90d4388b8fd9..12e2d4b8b44a 100644 --- a/packages/google-cloud-bigquery/tests/unit/job/test_base.py +++ b/packages/google-cloud-bigquery/tests/unit/job/test_base.py @@ -882,10 +882,14 @@ def test_done_already(self): def test_result_default_wo_state(self): begun_job_resource = _make_job_resource( - job_id=self.JOB_ID, project_id=self.PROJECT, started=True + job_id=self.JOB_ID, project_id=self.PROJECT, location="US", started=True ) done_job_resource = _make_job_resource( - job_id=self.JOB_ID, project_id=self.PROJECT, started=True, ended=True + job_id=self.JOB_ID, + project_id=self.PROJECT, + location="US", + started=True, + ended=True, ) conn = _make_connection( _make_retriable_exception(), @@ -907,7 +911,7 @@ def test_result_default_wo_state(self): reload_call = mock.call( method="GET", path=f"/projects/{self.PROJECT}/jobs/{self.JOB_ID}", - query_params={}, + query_params={"location": "US"}, timeout=None, ) conn.api_request.assert_has_calls( @@ -916,38 +920,48 @@ def test_result_default_wo_state(self): def test_result_w_retry_wo_state(self): begun_job_resource = _make_job_resource( - job_id=self.JOB_ID, project_id=self.PROJECT, started=True + job_id=self.JOB_ID, project_id=self.PROJECT, location="EU", started=True ) done_job_resource = _make_job_resource( - job_id=self.JOB_ID, project_id=self.PROJECT, started=True, ended=True + job_id=self.JOB_ID, + project_id=self.PROJECT, + location="EU", + started=True, + ended=True, ) conn = _make_connection( exceptions.NotFound("not normally retriable"), begun_job_resource, - # The call to done() / reload() does not get the custom retry - # policy passed to it, so we don't throw a non-retriable - # exception here. See: - # https://github.com/googleapis/python-bigquery/issues/24 - _make_retriable_exception(), + exceptions.NotFound("not normally retriable"), done_job_resource, ) client = _make_client(project=self.PROJECT, connection=conn) - job = self._make_one(self.JOB_ID, client) + job = self._make_one( + self._job_reference(self.JOB_ID, self.PROJECT, "EU"), client + ) custom_predicate = mock.Mock() custom_predicate.return_value = True - custom_retry = google.api_core.retry.Retry(predicate=custom_predicate) + custom_retry = google.api_core.retry.Retry( + predicate=custom_predicate, initial=0.001, maximum=0.001, deadline=0.001, + ) self.assertIs(job.result(retry=custom_retry), job) begin_call = mock.call( method="POST", path=f"/projects/{self.PROJECT}/jobs", - data={"jobReference": {"jobId": self.JOB_ID, "projectId": self.PROJECT}}, + data={ + "jobReference": { + "jobId": self.JOB_ID, + "projectId": self.PROJECT, + "location": "EU", + } + }, timeout=None, ) reload_call = mock.call( method="GET", path=f"/projects/{self.PROJECT}/jobs/{self.JOB_ID}", - query_params={}, + query_params={"location": "EU"}, timeout=None, ) conn.api_request.assert_has_calls( diff --git a/packages/google-cloud-bigquery/tests/unit/job/test_query.py b/packages/google-cloud-bigquery/tests/unit/job/test_query.py index c0b90d8ea699..daaf2e557108 100644 --- a/packages/google-cloud-bigquery/tests/unit/job/test_query.py +++ b/packages/google-cloud-bigquery/tests/unit/job/test_query.py @@ -23,6 +23,7 @@ import requests from six.moves import http_client +from google.cloud.bigquery.client import _LIST_ROWS_FROM_QUERY_RESULTS_FIELDS import google.cloud.bigquery.query from .helpers import _Base from .helpers import _make_client @@ -40,8 +41,10 @@ def _get_target_class(): return QueryJob - def _make_resource(self, started=False, ended=False): - resource = super(TestQueryJob, self)._make_resource(started, ended) + def _make_resource(self, started=False, ended=False, location="US"): + resource = super(TestQueryJob, self)._make_resource( + started, ended, location=location + ) config = resource["configuration"]["query"] config["query"] = self.QUERY return resource @@ -770,22 +773,30 @@ def test_result(self): query_resource = { "jobComplete": False, - "jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID}, + "jobReference": { + "projectId": self.PROJECT, + "jobId": self.JOB_ID, + "location": "EU", + }, } query_resource_done = { "jobComplete": True, - "jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID}, + "jobReference": { + "projectId": self.PROJECT, + "jobId": self.JOB_ID, + "location": "EU", + }, "schema": {"fields": [{"name": "col1", "type": "STRING"}]}, "totalRows": "2", } - job_resource = self._make_resource(started=True) - job_resource_done = self._make_resource(started=True, ended=True) + job_resource = self._make_resource(started=True, location="EU") + job_resource_done = self._make_resource(started=True, ended=True, location="EU") job_resource_done["configuration"]["query"]["destinationTable"] = { "projectId": "dest-project", "datasetId": "dest_dataset", "tableId": "dest_table", } - tabledata_resource = { + query_page_resource = { # Explicitly set totalRows to be different from the initial # response to test update during iteration. "totalRows": "1", @@ -793,7 +804,7 @@ def test_result(self): "rows": [{"f": [{"v": "abc"}]}], } conn = _make_connection( - query_resource, query_resource_done, job_resource_done, tabledata_resource + query_resource, query_resource_done, job_resource_done, query_page_resource ) client = _make_client(self.PROJECT, connection=conn) job = self._get_target_class().from_api_repr(job_resource, client) @@ -809,26 +820,30 @@ def test_result(self): # on the response from tabledata.list. self.assertEqual(result.total_rows, 1) + query_results_path = f"/projects/{self.PROJECT}/queries/{self.JOB_ID}" query_results_call = mock.call( method="GET", - path=f"/projects/{self.PROJECT}/queries/{self.JOB_ID}", - query_params={"maxResults": 0}, + path=query_results_path, + query_params={"maxResults": 0, "location": "EU"}, timeout=None, ) reload_call = mock.call( method="GET", path=f"/projects/{self.PROJECT}/jobs/{self.JOB_ID}", - query_params={}, + query_params={"location": "EU"}, timeout=None, ) - tabledata_call = mock.call( + query_page_call = mock.call( method="GET", - path="/projects/dest-project/datasets/dest_dataset/tables/dest_table/data", - query_params={}, + path=query_results_path, + query_params={ + "fields": _LIST_ROWS_FROM_QUERY_RESULTS_FIELDS, + "location": "EU", + }, timeout=None, ) conn.api_request.assert_has_calls( - [query_results_call, query_results_call, reload_call, tabledata_call] + [query_results_call, query_results_call, reload_call, query_page_call] ) def test_result_with_done_job_calls_get_query_results(self): @@ -838,18 +853,18 @@ def test_result_with_done_job_calls_get_query_results(self): "schema": {"fields": [{"name": "col1", "type": "STRING"}]}, "totalRows": "1", } - job_resource = self._make_resource(started=True, ended=True) + job_resource = self._make_resource(started=True, ended=True, location="EU") job_resource["configuration"]["query"]["destinationTable"] = { "projectId": "dest-project", "datasetId": "dest_dataset", "tableId": "dest_table", } - tabledata_resource = { + results_page_resource = { "totalRows": "1", "pageToken": None, "rows": [{"f": [{"v": "abc"}]}], } - conn = _make_connection(query_resource_done, tabledata_resource) + conn = _make_connection(query_resource_done, results_page_resource) client = _make_client(self.PROJECT, connection=conn) job = self._get_target_class().from_api_repr(job_resource, client) @@ -859,19 +874,23 @@ def test_result_with_done_job_calls_get_query_results(self): self.assertEqual(len(rows), 1) self.assertEqual(rows[0].col1, "abc") + query_results_path = f"/projects/{self.PROJECT}/queries/{self.JOB_ID}" query_results_call = mock.call( method="GET", - path=f"/projects/{self.PROJECT}/queries/{self.JOB_ID}", - query_params={"maxResults": 0}, + path=query_results_path, + query_params={"maxResults": 0, "location": "EU"}, timeout=None, ) - tabledata_call = mock.call( + query_results_page_call = mock.call( method="GET", - path="/projects/dest-project/datasets/dest_dataset/tables/dest_table/data", - query_params={}, + path=query_results_path, + query_params={ + "fields": _LIST_ROWS_FROM_QUERY_RESULTS_FIELDS, + "location": "EU", + }, timeout=None, ) - conn.api_request.assert_has_calls([query_results_call, tabledata_call]) + conn.api_request.assert_has_calls([query_results_call, query_results_page_call]) def test_result_with_max_results(self): from google.cloud.bigquery.table import RowIterator @@ -882,7 +901,7 @@ def test_result_with_max_results(self): "schema": {"fields": [{"name": "col1", "type": "STRING"}]}, "totalRows": "5", } - tabledata_resource = { + query_page_resource = { "totalRows": "5", "pageToken": None, "rows": [ @@ -891,7 +910,7 @@ def test_result_with_max_results(self): {"f": [{"v": "ghi"}]}, ], } - connection = _make_connection(query_resource, tabledata_resource) + connection = _make_connection(query_resource, query_page_resource) client = _make_client(self.PROJECT, connection=connection) resource = self._make_resource(ended=True) job = self._get_target_class().from_api_repr(resource, client) @@ -907,9 +926,9 @@ def test_result_with_max_results(self): self.assertEqual(len(rows), 3) self.assertEqual(len(connection.api_request.call_args_list), 2) - tabledata_list_request = connection.api_request.call_args_list[1] + query_page_request = connection.api_request.call_args_list[1] self.assertEqual( - tabledata_list_request[1]["query_params"]["maxResults"], max_results + query_page_request[1]["query_params"]["maxResults"], max_results ) def test_result_w_retry(self): @@ -925,8 +944,10 @@ def test_result_w_retry(self): "schema": {"fields": [{"name": "col1", "type": "STRING"}]}, "totalRows": "2", } - job_resource = self._make_resource(started=True) - job_resource_done = self._make_resource(started=True, ended=True) + job_resource = self._make_resource(started=True, location="asia-northeast1") + job_resource_done = self._make_resource( + started=True, ended=True, location="asia-northeast1" + ) job_resource_done["configuration"]["query"]["destinationTable"] = { "projectId": "dest-project", "datasetId": "dest_dataset", @@ -958,13 +979,13 @@ def test_result_w_retry(self): query_results_call = mock.call( method="GET", path=f"/projects/{self.PROJECT}/queries/{self.JOB_ID}", - query_params={"maxResults": 0}, + query_params={"maxResults": 0, "location": "asia-northeast1"}, timeout=None, ) reload_call = mock.call( method="GET", path=f"/projects/{self.PROJECT}/jobs/{self.JOB_ID}", - query_params={}, + query_params={"location": "asia-northeast1"}, timeout=None, ) @@ -1059,14 +1080,14 @@ def test_result_w_page_size(self): "schema": {"fields": [{"name": "col1", "type": "STRING"}]}, "totalRows": "4", } - job_resource = self._make_resource(started=True, ended=True) + job_resource = self._make_resource(started=True, ended=True, location="US") q_config = job_resource["configuration"]["query"] q_config["destinationTable"] = { "projectId": self.PROJECT, "datasetId": self.DS_ID, "tableId": self.TABLE_ID, } - tabledata_resource = { + query_page_resource = { "totalRows": 4, "pageToken": "some-page-token", "rows": [ @@ -1075,9 +1096,9 @@ def test_result_w_page_size(self): {"f": [{"v": "row3"}]}, ], } - tabledata_resource_page_2 = {"totalRows": 4, "rows": [{"f": [{"v": "row4"}]}]} + query_page_resource_2 = {"totalRows": 4, "rows": [{"f": [{"v": "row4"}]}]} conn = _make_connection( - query_results_resource, tabledata_resource, tabledata_resource_page_2 + query_results_resource, query_page_resource, query_page_resource_2 ) client = _make_client(self.PROJECT, connection=conn) job = self._get_target_class().from_api_repr(job_resource, client) @@ -1089,27 +1110,29 @@ def test_result_w_page_size(self): actual_rows = list(result) self.assertEqual(len(actual_rows), 4) - tabledata_path = "/projects/%s/datasets/%s/tables/%s/data" % ( - self.PROJECT, - self.DS_ID, - self.TABLE_ID, + query_results_path = f"/projects/{self.PROJECT}/queries/{self.JOB_ID}" + query_page_1_call = mock.call( + method="GET", + path=query_results_path, + query_params={ + "maxResults": 3, + "fields": _LIST_ROWS_FROM_QUERY_RESULTS_FIELDS, + "location": "US", + }, + timeout=None, ) - conn.api_request.assert_has_calls( - [ - mock.call( - method="GET", - path=tabledata_path, - query_params={"maxResults": 3}, - timeout=None, - ), - mock.call( - method="GET", - path=tabledata_path, - query_params={"pageToken": "some-page-token", "maxResults": 3}, - timeout=None, - ), - ] + query_page_2_call = mock.call( + method="GET", + path=query_results_path, + query_params={ + "pageToken": "some-page-token", + "maxResults": 3, + "fields": _LIST_ROWS_FROM_QUERY_RESULTS_FIELDS, + "location": "US", + }, + timeout=None, ) + conn.api_request.assert_has_calls([query_page_1_call, query_page_2_call]) def test_result_with_start_index(self): from google.cloud.bigquery.table import RowIterator diff --git a/packages/google-cloud-bigquery/tests/unit/test__pandas_helpers.py b/packages/google-cloud-bigquery/tests/unit/test__pandas_helpers.py index bdb1c56ea2f7..ef0c40e1aee1 100644 --- a/packages/google-cloud-bigquery/tests/unit/test__pandas_helpers.py +++ b/packages/google-cloud-bigquery/tests/unit/test__pandas_helpers.py @@ -1202,7 +1202,7 @@ def test_dataframe_to_parquet_dict_sequence_schema(module_under_test): @pytest.mark.skipif(isinstance(pyarrow, mock.Mock), reason="Requires `pyarrow`") -def test_download_arrow_tabledata_list_unknown_field_type(module_under_test): +def test_download_arrow_row_iterator_unknown_field_type(module_under_test): fake_page = api_core.page_iterator.Page( parent=mock.Mock(), items=[{"page_data": "foo"}], @@ -1216,7 +1216,7 @@ def test_download_arrow_tabledata_list_unknown_field_type(module_under_test): schema.SchemaField("alien_field", "ALIEN_FLOAT_TYPE"), ] - results_gen = module_under_test.download_arrow_tabledata_list(pages, bq_schema) + results_gen = module_under_test.download_arrow_row_iterator(pages, bq_schema) with warnings.catch_warnings(record=True) as warned: result = next(results_gen) @@ -1238,7 +1238,7 @@ def test_download_arrow_tabledata_list_unknown_field_type(module_under_test): @pytest.mark.skipif(isinstance(pyarrow, mock.Mock), reason="Requires `pyarrow`") -def test_download_arrow_tabledata_list_known_field_type(module_under_test): +def test_download_arrow_row_iterator_known_field_type(module_under_test): fake_page = api_core.page_iterator.Page( parent=mock.Mock(), items=[{"page_data": "foo"}], @@ -1252,7 +1252,7 @@ def test_download_arrow_tabledata_list_known_field_type(module_under_test): schema.SchemaField("non_alien_field", "STRING"), ] - results_gen = module_under_test.download_arrow_tabledata_list(pages, bq_schema) + results_gen = module_under_test.download_arrow_row_iterator(pages, bq_schema) with warnings.catch_warnings(record=True) as warned: result = next(results_gen) @@ -1273,7 +1273,7 @@ def test_download_arrow_tabledata_list_known_field_type(module_under_test): @pytest.mark.skipif(isinstance(pyarrow, mock.Mock), reason="Requires `pyarrow`") -def test_download_arrow_tabledata_list_dict_sequence_schema(module_under_test): +def test_download_arrow_row_iterator_dict_sequence_schema(module_under_test): fake_page = api_core.page_iterator.Page( parent=mock.Mock(), items=[{"page_data": "foo"}], @@ -1287,7 +1287,7 @@ def test_download_arrow_tabledata_list_dict_sequence_schema(module_under_test): {"name": "non_alien_field", "type": "STRING", "mode": "NULLABLE"}, ] - results_gen = module_under_test.download_arrow_tabledata_list(pages, dict_schema) + results_gen = module_under_test.download_arrow_row_iterator(pages, dict_schema) result = next(results_gen) assert len(result.columns) == 2 @@ -1301,7 +1301,7 @@ def test_download_arrow_tabledata_list_dict_sequence_schema(module_under_test): @pytest.mark.skipif(pandas is None, reason="Requires `pandas`") @pytest.mark.skipif(isinstance(pyarrow, mock.Mock), reason="Requires `pyarrow`") -def test_download_dataframe_tabledata_list_dict_sequence_schema(module_under_test): +def test_download_dataframe_row_iterator_dict_sequence_schema(module_under_test): fake_page = api_core.page_iterator.Page( parent=mock.Mock(), items=[{"page_data": "foo"}], @@ -1315,7 +1315,7 @@ def test_download_dataframe_tabledata_list_dict_sequence_schema(module_under_tes {"name": "non_alien_field", "type": "STRING", "mode": "NULLABLE"}, ] - results_gen = module_under_test.download_dataframe_tabledata_list( + results_gen = module_under_test.download_dataframe_row_iterator( pages, dict_schema, dtypes={} ) result = next(results_gen) @@ -1335,5 +1335,5 @@ def test_download_dataframe_tabledata_list_dict_sequence_schema(module_under_tes def test_table_data_listpage_to_dataframe_skips_stop_iteration(module_under_test): - dataframe = module_under_test._tabledata_list_page_to_dataframe([], [], {}) + dataframe = module_under_test._row_iterator_page_to_dataframe([], [], {}) assert isinstance(dataframe, pandas.DataFrame) diff --git a/packages/google-cloud-bigquery/tests/unit/test_client.py b/packages/google-cloud-bigquery/tests/unit/test_client.py index e507834f6097..ca2f7ea66d00 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_client.py +++ b/packages/google-cloud-bigquery/tests/unit/test_client.py @@ -6786,12 +6786,17 @@ def _bigquery_timestamp_float_repr(ts_float): age = SchemaField("age", "INTEGER", mode="NULLABLE") joined = SchemaField("joined", "TIMESTAMP", mode="NULLABLE") table = Table(self.TABLE_REF, schema=[full_name, age, joined]) + table._properties["numRows"] = 7 iterator = client.list_rows(table, timeout=7.5) + + # Check that initial total_rows is populated from the table. + self.assertEqual(iterator.total_rows, 7) page = six.next(iterator.pages) rows = list(page) - total_rows = iterator.total_rows - page_token = iterator.next_page_token + + # Check that total_rows is updated based on API response. + self.assertEqual(iterator.total_rows, ROWS) f2i = {"full_name": 0, "age": 1, "joined": 2} self.assertEqual(len(rows), 4) @@ -6799,8 +6804,7 @@ def _bigquery_timestamp_float_repr(ts_float): self.assertEqual(rows[1], Row(("Bharney Rhubble", 33, WHEN_1), f2i)) self.assertEqual(rows[2], Row(("Wylma Phlyntstone", 29, WHEN_2), f2i)) self.assertEqual(rows[3], Row(("Bhettye Rhubble", None, None), f2i)) - self.assertEqual(total_rows, ROWS) - self.assertEqual(page_token, TOKEN) + self.assertEqual(iterator.next_page_token, TOKEN) conn.api_request.assert_called_once_with( method="GET", path="/%s" % PATH, query_params={}, timeout=7.5 diff --git a/packages/google-cloud-bigquery/tests/unit/test_magics.py b/packages/google-cloud-bigquery/tests/unit/test_magics.py index b2877845af36..a7cf92919dd6 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_magics.py +++ b/packages/google-cloud-bigquery/tests/unit/test_magics.py @@ -170,7 +170,7 @@ def test_context_with_default_connection(): default_conn = make_connection(QUERY_RESOURCE, QUERY_RESULTS_RESOURCE) conn_patch = mock.patch("google.cloud.bigquery.client.Connection", autospec=True) list_rows_patch = mock.patch( - "google.cloud.bigquery.client.Client.list_rows", + "google.cloud.bigquery.client.Client._list_rows_from_query_results", return_value=google.cloud.bigquery.table._EmptyRowIterator(), ) @@ -235,7 +235,7 @@ def test_context_with_custom_connection(): default_conn = make_connection() conn_patch = mock.patch("google.cloud.bigquery.client.Connection", autospec=True) list_rows_patch = mock.patch( - "google.cloud.bigquery.client.Client.list_rows", + "google.cloud.bigquery.client.Client._list_rows_from_query_results", return_value=google.cloud.bigquery.table._EmptyRowIterator(), ) @@ -1078,7 +1078,7 @@ def test_bigquery_magic_w_maximum_bytes_billed_overrides_context(param_value, ex ) conn = magics.context._connection = make_connection(resource, query_results, data) list_rows_patch = mock.patch( - "google.cloud.bigquery.client.Client.list_rows", + "google.cloud.bigquery.client.Client._list_rows_from_query_results", return_value=google.cloud.bigquery.table._EmptyRowIterator(), ) with list_rows_patch, default_patch: @@ -1117,7 +1117,7 @@ def test_bigquery_magic_w_maximum_bytes_billed_w_context_inplace(): ) conn = magics.context._connection = make_connection(resource, query_results, data) list_rows_patch = mock.patch( - "google.cloud.bigquery.client.Client.list_rows", + "google.cloud.bigquery.client.Client._list_rows_from_query_results", return_value=google.cloud.bigquery.table._EmptyRowIterator(), ) with list_rows_patch, default_patch: @@ -1156,7 +1156,7 @@ def test_bigquery_magic_w_maximum_bytes_billed_w_context_setter(): ) conn = magics.context._connection = make_connection(resource, query_results, data) list_rows_patch = mock.patch( - "google.cloud.bigquery.client.Client.list_rows", + "google.cloud.bigquery.client.Client._list_rows_from_query_results", return_value=google.cloud.bigquery.table._EmptyRowIterator(), ) with list_rows_patch, default_patch: diff --git a/packages/google-cloud-bigquery/tests/unit/test_table.py b/packages/google-cloud-bigquery/tests/unit/test_table.py index e21453b9f8f7..e232f32e68c4 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_table.py +++ b/packages/google-cloud-bigquery/tests/unit/test_table.py @@ -1572,10 +1572,7 @@ def test_constructor_with_table(self): from google.cloud.bigquery.table import Table table = Table("proj.dset.tbl") - table._properties["numRows"] = 100 - - iterator = self._make_one(table=table) - + iterator = self._make_one(table=table, total_rows=100) self.assertIs(iterator._table, table) self.assertEqual(iterator.total_rows, 100) @@ -1883,7 +1880,7 @@ def test_to_arrow_max_results_w_create_bqstorage_warning(self): for warning in warned if warning.category is UserWarning and "cannot use bqstorage_client" in str(warning).lower() - and "tabledata.list" in str(warning) + and "REST" in str(warning) ] self.assertEqual(len(matches), 1, msg="User warning was not emitted.") mock_client._create_bqstorage_client.assert_not_called() @@ -2667,7 +2664,7 @@ def test_to_dataframe_max_results_w_bqstorage_warning(self): for warning in warned if warning.category is UserWarning and "cannot use bqstorage_client" in str(warning).lower() - and "tabledata.list" in str(warning) + and "REST" in str(warning) ] self.assertEqual(len(matches), 1, msg="User warning was not emitted.") @@ -2703,7 +2700,7 @@ def test_to_dataframe_max_results_w_create_bqstorage_warning(self): for warning in warned if warning.category is UserWarning and "cannot use bqstorage_client" in str(warning).lower() - and "tabledata.list" in str(warning) + and "REST" in str(warning) ] self.assertEqual(len(matches), 1, msg="User warning was not emitted.") mock_client._create_bqstorage_client.assert_not_called() From e7655011fc1f7f50e8ed555e90608d9471d7c4e5 Mon Sep 17 00:00:00 2001 From: "release-please[bot]" <55107282+release-please[bot]@users.noreply.github.com> Date: Thu, 5 Nov 2020 11:13:26 -0600 Subject: [PATCH 0982/2016] chore: release 2.3.0 (#351) Co-authored-by: release-please[bot] <55107282+release-please[bot]@users.noreply.github.com> --- packages/google-cloud-bigquery/CHANGELOG.md | 25 +++++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/packages/google-cloud-bigquery/CHANGELOG.md b/packages/google-cloud-bigquery/CHANGELOG.md index 384704bbf6fb..cdcfbe81f320 100644 --- a/packages/google-cloud-bigquery/CHANGELOG.md +++ b/packages/google-cloud-bigquery/CHANGELOG.md @@ -4,6 +4,31 @@ [1]: https://pypi.org/project/google-cloud-bigquery/#history +## [2.3.0](https://www.github.com/googleapis/python-bigquery/compare/v2.2.0...v2.3.0) (2020-11-04) + + +### Features + +* add `reload` argument to `*Job.done()` functions ([#341](https://www.github.com/googleapis/python-bigquery/issues/341)) ([e51fd45](https://www.github.com/googleapis/python-bigquery/commit/e51fd45fdb0481ac5d59cc0edbfa0750928b2596)) +* pass retry from Job.result() to Job.done() ([#41](https://www.github.com/googleapis/python-bigquery/issues/41)) ([284e17a](https://www.github.com/googleapis/python-bigquery/commit/284e17a17adf6844a17db2c6fed54a649b1f997e)) + + +### Bug Fixes + +* add missing spaces in opentelemetry log message ([#360](https://www.github.com/googleapis/python-bigquery/issues/360)) ([4f326b1](https://www.github.com/googleapis/python-bigquery/commit/4f326b1ca4411cfbf5ded86955a963d3e05a409f)) +* **dbapi:** avoid running % format with no query parameters ([#348](https://www.github.com/googleapis/python-bigquery/issues/348)) ([5dd1a5e](https://www.github.com/googleapis/python-bigquery/commit/5dd1a5e77f13b8e576e917069e247c5390a81900)) +* create_job method accepts dictionary arguments ([#300](https://www.github.com/googleapis/python-bigquery/issues/300)) ([155bacc](https://www.github.com/googleapis/python-bigquery/commit/155bacc156f181384ca6dba699ab83d0398176d1)) + + +### Performance Improvements + +* use `jobs.getQueryResults` to download result sets ([#363](https://www.github.com/googleapis/python-bigquery/issues/363)) ([0c3476d](https://www.github.com/googleapis/python-bigquery/commit/0c3476d56380d70115f6fd765bf5c5261967052f)) + + +### Documentation + +* add documents for QueryPlanEntry and QueryPlanEntryStep ([#344](https://www.github.com/googleapis/python-bigquery/issues/344)) ([dca2e4c](https://www.github.com/googleapis/python-bigquery/commit/dca2e4ca7c2ae183ac4bb60f653d425a43a86bea)) + ## [2.2.0](https://www.github.com/googleapis/python-bigquery/compare/v2.1.0...v2.2.0) (2020-10-19) From 63d6a736d301348404805f5901d701925a73943c Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Thu, 5 Nov 2020 11:42:06 -0600 Subject: [PATCH 0983/2016] chore: release v2.3.1 (#370) Follow-up to failed #351 release --- packages/google-cloud-bigquery/CHANGELOG.md | 8 ++++++++ .../google/cloud/bigquery/version.py | 2 +- 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/CHANGELOG.md b/packages/google-cloud-bigquery/CHANGELOG.md index cdcfbe81f320..787ba7557558 100644 --- a/packages/google-cloud-bigquery/CHANGELOG.md +++ b/packages/google-cloud-bigquery/CHANGELOG.md @@ -4,6 +4,14 @@ [1]: https://pypi.org/project/google-cloud-bigquery/#history +## 2.3.1 + +11-05-2020 09:27 PST + +### Internal / Testing Changes + +- update `google.cloud.bigquery.__version__` + ## [2.3.0](https://www.github.com/googleapis/python-bigquery/compare/v2.2.0...v2.3.0) (2020-11-04) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/version.py b/packages/google-cloud-bigquery/google/cloud/bigquery/version.py index bd0f8e5c7d25..474ccbcf2e6e 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/version.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/version.py @@ -12,4 +12,4 @@ # See the License for the specific language governing permissions and # limitations under the License. -__version__ = "2.2.0" +__version__ = "2.3.1" From b829d35776e7837ba057fadbebffe9a2c526c3d6 Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Tue, 10 Nov 2020 11:11:21 -0600 Subject: [PATCH 0984/2016] perf: use `getQueryResults` from DB-API (#375) I suspect `list_rows` (`tabledata.list`) was being called directly due to no `page_size` parameter on `QueryJob.result` at the time. --- .../google/cloud/bigquery/dbapi/cursor.py | 7 +------ .../google-cloud-bigquery/tests/unit/test_dbapi_cursor.py | 8 ++++++-- 2 files changed, 7 insertions(+), 8 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/dbapi/cursor.py b/packages/google-cloud-bigquery/google/cloud/bigquery/dbapi/cursor.py index 597313fd6263..74f8aec4ed46 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/dbapi/cursor.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/dbapi/cursor.py @@ -229,7 +229,6 @@ def _try_fetch(self, size=None): return if self._query_data is None: - client = self.connection._client bqstorage_client = self.connection._bqstorage_client if bqstorage_client is not None: @@ -237,11 +236,7 @@ def _try_fetch(self, size=None): self._query_data = _helpers.to_bq_table_rows(rows_iterable) return - rows_iter = client.list_rows( - self._query_job.destination, - selected_fields=self._query_job._query_results.schema, - page_size=self.arraysize, - ) + rows_iter = self._query_job.result(page_size=self.arraysize) self._query_data = iter(rows_iter) def _bqstorage_fetch(self, bqstorage_client): diff --git a/packages/google-cloud-bigquery/tests/unit/test_dbapi_cursor.py b/packages/google-cloud-bigquery/tests/unit/test_dbapi_cursor.py index 5c3bfcae9198..f55b3fd3f804 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_dbapi_cursor.py +++ b/packages/google-cloud-bigquery/tests/unit/test_dbapi_cursor.py @@ -66,8 +66,8 @@ def _mock_client( num_dml_affected_rows=num_dml_affected_rows, dry_run=dry_run_job, total_bytes_processed=total_bytes_processed, + rows=rows, ) - mock_client.list_rows.return_value = rows mock_client._default_query_job_config = default_query_job_config # Assure that the REST client gets used, not the BQ Storage client. @@ -102,9 +102,13 @@ def _mock_job( num_dml_affected_rows=None, dry_run=False, total_bytes_processed=0, + rows=None, ): from google.cloud.bigquery import job + if rows is None: + rows = [] + mock_job = mock.create_autospec(job.QueryJob) mock_job.error_result = None mock_job.state = "DONE" @@ -114,7 +118,7 @@ def _mock_job( mock_job.result.side_effect = exceptions.NotFound mock_job.total_bytes_processed = total_bytes_processed else: - mock_job.result.return_value = mock_job + mock_job.result.return_value = rows mock_job._query_results = self._mock_results( total_rows=total_rows, schema=schema, From b9e12ebd4855478303a950eae1aeff6375b7efd9 Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Tue, 10 Nov 2020 11:48:10 -0600 Subject: [PATCH 0985/2016] deps: expand pyarrow dependencies to include version 2 (#368) Pyarrow 2.0 includes several bug fixes. The wire format remains the same, so it continues to be compatible with the BigQuery Storage API. --- packages/google-cloud-bigquery/setup.py | 4 ++-- .../tests/unit/test_table.py | 16 +++++++++++++++- 2 files changed, 17 insertions(+), 3 deletions(-) diff --git a/packages/google-cloud-bigquery/setup.py b/packages/google-cloud-bigquery/setup.py index 548ceac09392..48c4a7518c16 100644 --- a/packages/google-cloud-bigquery/setup.py +++ b/packages/google-cloud-bigquery/setup.py @@ -46,12 +46,12 @@ # grpc.Channel.close() method isn't added until 1.32.0. # https://github.com/grpc/grpc/pull/15254 "grpcio >= 1.32.0, < 2.0dev", - "pyarrow >= 1.0.0, < 2.0dev", + "pyarrow >= 1.0.0, < 3.0dev", ], "pandas": [ "pandas>=0.23.0", # pyarrow 1.0.0 is required for the use of timestamp_as_object keyword. - "pyarrow >= 1.0.0, < 2.0dev", + "pyarrow >= 1.0.0, < 3.0dev", ], "tqdm": ["tqdm >= 4.7.4, <5.0.0dev"], "opentelemetry": [ diff --git a/packages/google-cloud-bigquery/tests/unit/test_table.py b/packages/google-cloud-bigquery/tests/unit/test_table.py index e232f32e68c4..eccc46a7ae94 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_table.py +++ b/packages/google-cloud-bigquery/tests/unit/test_table.py @@ -19,6 +19,7 @@ import warnings import mock +import pkg_resources import pytest import six @@ -41,8 +42,11 @@ try: import pyarrow import pyarrow.types + + PYARROW_VERSION = pkg_resources.parse_version(pyarrow.__version__) except ImportError: # pragma: NO COVER pyarrow = None + PYARROW_VERSION = pkg_resources.parse_version("0.0.1") try: from tqdm import tqdm @@ -52,6 +56,9 @@ from google.cloud.bigquery.dataset import DatasetReference +PYARROW_TIMESTAMP_VERSION = pkg_resources.parse_version("2.0.0") + + def _mock_client(): from google.cloud.bigquery import client @@ -2339,12 +2346,19 @@ def test_to_dataframe_timestamp_out_of_pyarrow_bounds(self): df = row_iterator.to_dataframe(create_bqstorage_client=False) + tzinfo = None + if PYARROW_VERSION >= PYARROW_TIMESTAMP_VERSION: + tzinfo = dt.timezone.utc + self.assertIsInstance(df, pandas.DataFrame) self.assertEqual(len(df), 2) # verify the number of rows self.assertEqual(list(df.columns), ["some_timestamp"]) self.assertEqual( list(df["some_timestamp"]), - [dt.datetime(4567, 1, 1), dt.datetime(9999, 12, 31)], + [ + dt.datetime(4567, 1, 1, tzinfo=tzinfo), + dt.datetime(9999, 12, 31, tzinfo=tzinfo), + ], ) @pytest.mark.xfail( From c03bf72fe8cd4dc8c61cfe8a44d62ef33eb3f1f4 Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Tue, 10 Nov 2020 13:33:59 -0600 Subject: [PATCH 0986/2016] perf: cache first page of `jobs.getQueryResults` rows (#374) Co-authored-by: Steffany Brown <30247553+steffnay@users.noreply.github.com> --- .../google/cloud/bigquery/client.py | 4 +- .../google/cloud/bigquery/job/query.py | 85 ++++++++++++------- .../google/cloud/bigquery/table.py | 11 ++- .../tests/unit/job/test_query.py | 55 +++++++++--- .../tests/unit/job/test_query_pandas.py | 16 +--- .../tests/unit/test_client.py | 4 +- 6 files changed, 115 insertions(+), 60 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py index cd1474336ea0..c67ef54e0f41 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py @@ -1534,7 +1534,7 @@ def _get_query_results( A new ``_QueryResults`` instance. """ - extra_params = {"maxResults": 0} + extra_params = {} if project is None: project = self.project @@ -3187,6 +3187,7 @@ def _list_rows_from_query_results( page_size=None, retry=DEFAULT_RETRY, timeout=None, + first_page_response=None, ): """List the rows of a completed query. See @@ -3247,6 +3248,7 @@ def _list_rows_from_query_results( table=destination, extra_params=params, total_rows=total_rows, + first_page_response=first_page_response, ) return row_iterator diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/job/query.py b/packages/google-cloud-bigquery/google/cloud/bigquery/job/query.py index 1e2002eabc17..6c9221043fdd 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/job/query.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/job/query.py @@ -990,48 +990,22 @@ def done(self, retry=DEFAULT_RETRY, timeout=None, reload=True): Returns: bool: True if the job is complete, False otherwise. """ - is_done = ( - # Only consider a QueryJob complete when we know we have the final - # query results available. - self._query_results is not None - and self._query_results.complete - and self.state == _DONE_STATE - ) # Do not refresh if the state is already done, as the job will not # change once complete. + is_done = self.state == _DONE_STATE if not reload or is_done: return is_done - # Since the API to getQueryResults can hang up to the timeout value - # (default of 10 seconds), set the timeout parameter to ensure that - # the timeout from the futures API is respected. See: - # https://github.com/GoogleCloudPlatform/google-cloud-python/issues/4135 - timeout_ms = None - if self._done_timeout is not None: - # Subtract a buffer for context switching, network latency, etc. - api_timeout = self._done_timeout - _TIMEOUT_BUFFER_SECS - api_timeout = max(min(api_timeout, 10), 0) - self._done_timeout -= api_timeout - self._done_timeout = max(0, self._done_timeout) - timeout_ms = int(api_timeout * 1000) + self._reload_query_results(retry=retry, timeout=timeout) # If an explicit timeout is not given, fall back to the transport timeout # stored in _blocking_poll() in the process of polling for job completion. transport_timeout = timeout if timeout is not None else self._transport_timeout - self._query_results = self._client._get_query_results( - self.job_id, - retry, - project=self.project, - timeout_ms=timeout_ms, - location=self.location, - timeout=transport_timeout, - ) - # Only reload the job once we know the query is complete. # This will ensure that fields such as the destination table are # correctly populated. - if self._query_results.complete and self.state != _DONE_STATE: + if self._query_results.complete: self.reload(retry=retry, timeout=transport_timeout) return self.state == _DONE_STATE @@ -1098,6 +1072,45 @@ def _begin(self, client=None, retry=DEFAULT_RETRY, timeout=None): exc.query_job = self raise + def _reload_query_results(self, retry=DEFAULT_RETRY, timeout=None): + """Refresh the cached query results. + + Args: + retry (Optional[google.api_core.retry.Retry]): + How to retry the call that retrieves query results. + timeout (Optional[float]): + The number of seconds to wait for the underlying HTTP transport + before using ``retry``. + """ + if self._query_results and self._query_results.complete: + return + + # Since the API to getQueryResults can hang up to the timeout value + # (default of 10 seconds), set the timeout parameter to ensure that + # the timeout from the futures API is respected. See: + # https://github.com/GoogleCloudPlatform/google-cloud-python/issues/4135 + timeout_ms = None + if self._done_timeout is not None: + # Subtract a buffer for context switching, network latency, etc. + api_timeout = self._done_timeout - _TIMEOUT_BUFFER_SECS + api_timeout = max(min(api_timeout, 10), 0) + self._done_timeout -= api_timeout + self._done_timeout = max(0, self._done_timeout) + timeout_ms = int(api_timeout * 1000) + + # If an explicit timeout is not given, fall back to the transport timeout + # stored in _blocking_poll() in the process of polling for job completion. + transport_timeout = timeout if timeout is not None else self._transport_timeout + + self._query_results = self._client._get_query_results( + self.job_id, + retry, + project=self.project, + timeout_ms=timeout_ms, + location=self.location, + timeout=transport_timeout, + ) + def result( self, page_size=None, @@ -1144,6 +1157,11 @@ def result( """ try: super(QueryJob, self).result(retry=retry, timeout=timeout) + + # Since the job could already be "done" (e.g. got a finished job + # via client.get_job), the superclass call to done() might not + # set the self._query_results cache. + self._reload_query_results(retry=retry, timeout=timeout) except exceptions.GoogleAPICallError as exc: exc.message += self._format_for_exception(self.query, self.job_id) exc.query_job = self @@ -1158,10 +1176,14 @@ def result( if self._query_results.total_rows is None: return _EmptyRowIterator() + first_page_response = None + if max_results is None and page_size is None and start_index is None: + first_page_response = self._query_results._properties + rows = self._client._list_rows_from_query_results( - self._query_results.job_id, + self.job_id, self.location, - self._query_results.project, + self.project, self._query_results.schema, total_rows=self._query_results.total_rows, destination=self.destination, @@ -1170,6 +1192,7 @@ def result( start_index=start_index, retry=retry, timeout=timeout, + first_page_response=first_page_response, ) rows._preserve_order = _contains_order_by(self.query) return rows diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py index e46b7e3cd1ea..c14a8adc46d5 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py @@ -1308,7 +1308,9 @@ class RowIterator(HTTPIterator): A subset of columns to select from this table. total_rows (Optional[int]): Total number of rows in the table. - + first_page_response (Optional[dict]): + API response for the first page of results. These are returned when + the first page is requested. """ def __init__( @@ -1324,6 +1326,7 @@ def __init__( table=None, selected_fields=None, total_rows=None, + first_page_response=None, ): super(RowIterator, self).__init__( client, @@ -1346,6 +1349,7 @@ def __init__( self._selected_fields = selected_fields self._table = table self._total_rows = total_rows + self._first_page_response = first_page_response def _get_next_page_response(self): """Requests the next page from the path provided. @@ -1354,6 +1358,11 @@ def _get_next_page_response(self): Dict[str, object]: The parsed JSON response of the next page's contents. """ + if self._first_page_response: + response = self._first_page_response + self._first_page_response = None + return response + params = self._get_query_params() if self._page_size is not None: if self.page_number and "startIndex" in params: diff --git a/packages/google-cloud-bigquery/tests/unit/job/test_query.py b/packages/google-cloud-bigquery/tests/unit/job/test_query.py index daaf2e557108..41e31f4694e2 100644 --- a/packages/google-cloud-bigquery/tests/unit/job/test_query.py +++ b/packages/google-cloud-bigquery/tests/unit/job/test_query.py @@ -787,7 +787,9 @@ def test_result(self): "location": "EU", }, "schema": {"fields": [{"name": "col1", "type": "STRING"}]}, - "totalRows": "2", + "totalRows": "3", + "rows": [{"f": [{"v": "abc"}]}], + "pageToken": "next-page", } job_resource = self._make_resource(started=True, location="EU") job_resource_done = self._make_resource(started=True, ended=True, location="EU") @@ -799,9 +801,9 @@ def test_result(self): query_page_resource = { # Explicitly set totalRows to be different from the initial # response to test update during iteration. - "totalRows": "1", + "totalRows": "2", "pageToken": None, - "rows": [{"f": [{"v": "abc"}]}], + "rows": [{"f": [{"v": "def"}]}], } conn = _make_connection( query_resource, query_resource_done, job_resource_done, query_page_resource @@ -812,19 +814,20 @@ def test_result(self): result = job.result() self.assertIsInstance(result, RowIterator) - self.assertEqual(result.total_rows, 2) + self.assertEqual(result.total_rows, 3) rows = list(result) - self.assertEqual(len(rows), 1) + self.assertEqual(len(rows), 2) self.assertEqual(rows[0].col1, "abc") + self.assertEqual(rows[1].col1, "def") # Test that the total_rows property has changed during iteration, based # on the response from tabledata.list. - self.assertEqual(result.total_rows, 1) + self.assertEqual(result.total_rows, 2) query_results_path = f"/projects/{self.PROJECT}/queries/{self.JOB_ID}" query_results_call = mock.call( method="GET", path=query_results_path, - query_params={"maxResults": 0, "location": "EU"}, + query_params={"location": "EU"}, timeout=None, ) reload_call = mock.call( @@ -839,6 +842,7 @@ def test_result(self): query_params={ "fields": _LIST_ROWS_FROM_QUERY_RESULTS_FIELDS, "location": "EU", + "pageToken": "next-page", }, timeout=None, ) @@ -851,7 +855,9 @@ def test_result_with_done_job_calls_get_query_results(self): "jobComplete": True, "jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID}, "schema": {"fields": [{"name": "col1", "type": "STRING"}]}, - "totalRows": "1", + "totalRows": "2", + "rows": [{"f": [{"v": "abc"}]}], + "pageToken": "next-page", } job_resource = self._make_resource(started=True, ended=True, location="EU") job_resource["configuration"]["query"]["destinationTable"] = { @@ -860,9 +866,9 @@ def test_result_with_done_job_calls_get_query_results(self): "tableId": "dest_table", } results_page_resource = { - "totalRows": "1", + "totalRows": "2", "pageToken": None, - "rows": [{"f": [{"v": "abc"}]}], + "rows": [{"f": [{"v": "def"}]}], } conn = _make_connection(query_resource_done, results_page_resource) client = _make_client(self.PROJECT, connection=conn) @@ -871,14 +877,15 @@ def test_result_with_done_job_calls_get_query_results(self): result = job.result() rows = list(result) - self.assertEqual(len(rows), 1) + self.assertEqual(len(rows), 2) self.assertEqual(rows[0].col1, "abc") + self.assertEqual(rows[1].col1, "def") query_results_path = f"/projects/{self.PROJECT}/queries/{self.JOB_ID}" query_results_call = mock.call( method="GET", path=query_results_path, - query_params={"maxResults": 0, "location": "EU"}, + query_params={"location": "EU"}, timeout=None, ) query_results_page_call = mock.call( @@ -887,6 +894,7 @@ def test_result_with_done_job_calls_get_query_results(self): query_params={ "fields": _LIST_ROWS_FROM_QUERY_RESULTS_FIELDS, "location": "EU", + "pageToken": "next-page", }, timeout=None, ) @@ -900,6 +908,12 @@ def test_result_with_max_results(self): "jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID}, "schema": {"fields": [{"name": "col1", "type": "STRING"}]}, "totalRows": "5", + # These rows are discarded because max_results is set. + "rows": [ + {"f": [{"v": "xyz"}]}, + {"f": [{"v": "uvw"}]}, + {"f": [{"v": "rst"}]}, + ], } query_page_resource = { "totalRows": "5", @@ -925,6 +939,7 @@ def test_result_with_max_results(self): rows = list(result) self.assertEqual(len(rows), 3) + self.assertEqual(rows[0].col1, "abc") self.assertEqual(len(connection.api_request.call_args_list), 2) query_page_request = connection.api_request.call_args_list[1] self.assertEqual( @@ -979,7 +994,7 @@ def test_result_w_retry(self): query_results_call = mock.call( method="GET", path=f"/projects/{self.PROJECT}/queries/{self.JOB_ID}", - query_params={"maxResults": 0, "location": "asia-northeast1"}, + query_params={"location": "asia-northeast1"}, timeout=None, ) reload_call = mock.call( @@ -1079,6 +1094,12 @@ def test_result_w_page_size(self): "jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID}, "schema": {"fields": [{"name": "col1", "type": "STRING"}]}, "totalRows": "4", + # These rows are discarded because page_size is set. + "rows": [ + {"f": [{"v": "xyz"}]}, + {"f": [{"v": "uvw"}]}, + {"f": [{"v": "rst"}]}, + ], } job_resource = self._make_resource(started=True, ended=True, location="US") q_config = job_resource["configuration"]["query"] @@ -1109,6 +1130,7 @@ def test_result_w_page_size(self): # Assert actual_rows = list(result) self.assertEqual(len(actual_rows), 4) + self.assertEqual(actual_rows[0].col1, "row1") query_results_path = f"/projects/{self.PROJECT}/queries/{self.JOB_ID}" query_page_1_call = mock.call( @@ -1142,6 +1164,12 @@ def test_result_with_start_index(self): "jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID}, "schema": {"fields": [{"name": "col1", "type": "STRING"}]}, "totalRows": "5", + # These rows are discarded because start_index is set. + "rows": [ + {"f": [{"v": "xyz"}]}, + {"f": [{"v": "uvw"}]}, + {"f": [{"v": "rst"}]}, + ], } tabledata_resource = { "totalRows": "5", @@ -1168,6 +1196,7 @@ def test_result_with_start_index(self): rows = list(result) self.assertEqual(len(rows), 4) + self.assertEqual(rows[0].col1, "abc") self.assertEqual(len(connection.api_request.call_args_list), 2) tabledata_list_request = connection.api_request.call_args_list[1] self.assertEqual( diff --git a/packages/google-cloud-bigquery/tests/unit/job/test_query_pandas.py b/packages/google-cloud-bigquery/tests/unit/job/test_query_pandas.py index 37f4a6dec10e..b0a652b783f6 100644 --- a/packages/google-cloud-bigquery/tests/unit/job/test_query_pandas.py +++ b/packages/google-cloud-bigquery/tests/unit/job/test_query_pandas.py @@ -161,8 +161,6 @@ def test_to_arrow(): }, ] }, - } - tabledata_resource = { "rows": [ { "f": [ @@ -176,13 +174,11 @@ def test_to_arrow(): {"v": {"f": [{"v": "Bharney Rhubble"}, {"v": "33"}]}}, ] }, - ] + ], } done_resource = copy.deepcopy(begun_resource) done_resource["status"] = {"state": "DONE"} - connection = _make_connection( - begun_resource, query_resource, done_resource, tabledata_resource - ) + connection = _make_connection(begun_resource, query_resource, done_resource) client = _make_client(connection=connection) job = target_class.from_api_repr(begun_resource, client) @@ -234,20 +230,16 @@ def test_to_dataframe(): {"name": "age", "type": "INTEGER", "mode": "NULLABLE"}, ] }, - } - tabledata_resource = { "rows": [ {"f": [{"v": "Phred Phlyntstone"}, {"v": "32"}]}, {"f": [{"v": "Bharney Rhubble"}, {"v": "33"}]}, {"f": [{"v": "Wylma Phlyntstone"}, {"v": "29"}]}, {"f": [{"v": "Bhettye Rhubble"}, {"v": "27"}]}, - ] + ], } done_resource = copy.deepcopy(begun_resource) done_resource["status"] = {"state": "DONE"} - connection = _make_connection( - begun_resource, query_resource, done_resource, tabledata_resource - ) + connection = _make_connection(begun_resource, query_resource, done_resource) client = _make_client(connection=connection) job = target_class.from_api_repr(begun_resource, client) diff --git a/packages/google-cloud-bigquery/tests/unit/test_client.py b/packages/google-cloud-bigquery/tests/unit/test_client.py index ca2f7ea66d00..dd57ee79814d 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_client.py +++ b/packages/google-cloud-bigquery/tests/unit/test_client.py @@ -319,7 +319,7 @@ def test__get_query_results_miss_w_explicit_project_and_timeout(self): conn.api_request.assert_called_once_with( method="GET", path=path, - query_params={"maxResults": 0, "timeoutMs": 500, "location": self.LOCATION}, + query_params={"timeoutMs": 500, "location": self.LOCATION}, timeout=42, ) @@ -336,7 +336,7 @@ def test__get_query_results_miss_w_client_location(self): conn.api_request.assert_called_once_with( method="GET", path="/projects/PROJECT/queries/nothere", - query_params={"maxResults": 0, "location": self.LOCATION}, + query_params={"location": self.LOCATION}, timeout=None, ) From 2664db8c52c1aa43eba3c46c002d0465ea7e1f16 Mon Sep 17 00:00:00 2001 From: Carlos de la Guardia Date: Tue, 10 Nov 2020 16:02:15 -0600 Subject: [PATCH 0987/2016] feat: allow routine references (#378) * feat: allow routine references in dataset access property * build: black formatting --- .../google/cloud/bigquery/dataset.py | 46 +++++++++++++------ .../tests/unit/test_dataset.py | 26 +++++++++++ 2 files changed, 58 insertions(+), 14 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/dataset.py b/packages/google-cloud-bigquery/google/cloud/bigquery/dataset.py index 9a80f30b5fa7..ce07c8048558 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/dataset.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/dataset.py @@ -79,8 +79,9 @@ class AccessEntry(object): """Represents grant of an access role to an entity. An entry must have exactly one of the allowed :attr:`ENTITY_TYPES`. If - anything but ``view`` is set, a ``role`` is also required. ``role`` is - omitted for a ``view``, because ``view`` s are always read-only. + anything but ``view`` or ``routine`` are set, a ``role`` is also required. + ``role`` is omitted for ``view`` and ``routine``, because they are always + read-only. See https://cloud.google.com/bigquery/docs/reference/rest/v2/datasets. @@ -88,17 +89,17 @@ class AccessEntry(object): role (str): Role granted to the entity. The following string values are supported: `'READER'`, `'WRITER'`, `'OWNER'`. It may also be - :data:`None` if the ``entity_type`` is ``view``. + :data:`None` if the ``entity_type`` is ``view`` or ``routine``. entity_type (str): Type of entity being granted the role. One of :attr:`ENTITY_TYPES`. entity_id (Union[str, Dict[str, str]]): - If the ``entity_type`` is not 'view', the ``entity_id`` is the - ``str`` ID of the entity being granted the role. If the - ``entity_type`` is 'view', the ``entity_id`` is a ``dict`` - representing the view from a different dataset to grant access to - in the following format:: + If the ``entity_type`` is not 'view' or 'routine', the ``entity_id`` + is the ``str`` ID of the entity being granted the role. If the + ``entity_type`` is 'view' or 'routine', the ``entity_id`` is a ``dict`` + representing the view or routine from a different dataset to grant + access to in the following format for views:: { 'projectId': string, @@ -106,11 +107,19 @@ class AccessEntry(object): 'tableId': string } + For routines:: + + { + 'projectId': string, + 'datasetId': string, + 'routineId': string + } + Raises: ValueError: If the ``entity_type`` is not among :attr:`ENTITY_TYPES`, or if a - ``view`` has ``role`` set, or a non ``view`` **does not** have a - ``role`` set. + ``view`` or a ``routine`` has ``role`` set, or a non ``view`` and + non ``routine`` **does not** have a ``role`` set. Examples: >>> entry = AccessEntry('OWNER', 'userByEmail', 'user@example.com') @@ -124,7 +133,15 @@ class AccessEntry(object): """ ENTITY_TYPES = frozenset( - ["userByEmail", "groupByEmail", "domain", "specialGroup", "view", "iamMember"] + [ + "userByEmail", + "groupByEmail", + "domain", + "specialGroup", + "view", + "iamMember", + "routine", + ] ) """Allowed entity types.""" @@ -135,10 +152,11 @@ def __init__(self, role, entity_type, entity_id): ", ".join(self.ENTITY_TYPES), ) raise ValueError(message) - if entity_type == "view": + if entity_type in ("view", "routine"): if role is not None: raise ValueError( - "Role must be None for a view. Received " "role: %r" % (role,) + "Role must be None for a %r. Received " + "role: %r" % (entity_type, role) ) else: if role is None: @@ -409,7 +427,7 @@ def access_entries(self): entries. ``role`` augments the entity type and must be present **unless** the - entity type is ``view``. + entity type is ``view`` or ``routine``. Raises: TypeError: If 'value' is not a sequence diff --git a/packages/google-cloud-bigquery/tests/unit/test_dataset.py b/packages/google-cloud-bigquery/tests/unit/test_dataset.py index e4977a2703f8..b3a53a08dcb9 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_dataset.py +++ b/packages/google-cloud-bigquery/tests/unit/test_dataset.py @@ -53,6 +53,21 @@ def test_ctor_view_success(self): self.assertEqual(entry.entity_type, entity_type) self.assertEqual(entry.entity_id, entity_id) + def test_ctor_routine_with_role(self): + role = "READER" + entity_type = "routine" + with self.assertRaises(ValueError): + self._make_one(role, entity_type, None) + + def test_ctor_routine_success(self): + role = None + entity_type = "routine" + entity_id = object() + entry = self._make_one(role, entity_type, entity_id) + self.assertEqual(entry.role, role) + self.assertEqual(entry.entity_type, entity_type) + self.assertEqual(entry.entity_id, entity_id) + def test_ctor_nonview_without_role(self): role = None entity_type = "userByEmail" @@ -115,6 +130,17 @@ def test_to_api_repr_view(self): exp_resource = {"view": view} self.assertEqual(resource, exp_resource) + def test_to_api_repr_routine(self): + routine = { + "projectId": "my-project", + "datasetId": "my_dataset", + "routineId": "my_routine", + } + entry = self._make_one(None, "routine", routine) + resource = entry.to_api_repr() + exp_resource = {"routine": routine} + self.assertEqual(resource, exp_resource) + def test_from_api_repr(self): resource = {"role": "OWNER", "userByEmail": "salmon@example.com"} entry = self._get_target_class().from_api_repr(resource) From f9d504812674d2bae30495f84e4d016c665f1972 Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Wed, 11 Nov 2020 15:03:44 -0600 Subject: [PATCH 0988/2016] perf: avoid extra API calls from `to_dataframe` if all rows are cached (#384) Follow-up to previous PR, which cached the first page of `getQueryResults`. If the first page is the only page (no `pageToken`), then it is unnecessary to make extra API calls from `to_dataframe` or `to_arrow` to the BigQuery Storage API. --- .../google/cloud/bigquery/table.py | 56 +++++++++++++------ .../tests/unit/job/test_query_pandas.py | 28 ++++++++-- 2 files changed, 64 insertions(+), 20 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py index c14a8adc46d5..1ee36c7eabe1 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py @@ -1351,6 +1351,41 @@ def __init__( self._total_rows = total_rows self._first_page_response = first_page_response + def _is_completely_cached(self): + """Check if all results are completely cached. + + This is useful to know, because we can avoid alternative download + mechanisms. + """ + if self._first_page_response is None or self.next_page_token: + return False + + return self._first_page_response.get(self._next_token) is None + + def _validate_bqstorage(self, bqstorage_client, create_bqstorage_client): + """Returns if the BigQuery Storage API can be used. + + Returns: + bool + True if the BigQuery Storage client can be used or created. + """ + using_bqstorage_api = bqstorage_client or create_bqstorage_client + if not using_bqstorage_api: + return False + + if self._is_completely_cached(): + return False + + if self.max_results is not None: + warnings.warn( + "Cannot use bqstorage_client if max_results is set, " + "reverting to fetching data with the REST endpoint.", + stacklevel=2, + ) + return False + + return True + def _get_next_page_response(self): """Requests the next page from the path provided. @@ -1412,6 +1447,9 @@ def _get_progress_bar(self, progress_bar_type): def _to_page_iterable( self, bqstorage_download, tabledata_list_download, bqstorage_client=None ): + if not self._validate_bqstorage(bqstorage_client, False): + bqstorage_client = None + if bqstorage_client is not None: for item in bqstorage_download(): yield item @@ -1503,14 +1541,7 @@ def to_arrow( if pyarrow is None: raise ValueError(_NO_PYARROW_ERROR) - if ( - bqstorage_client or create_bqstorage_client - ) and self.max_results is not None: - warnings.warn( - "Cannot use bqstorage_client if max_results is set, " - "reverting to fetching data with the REST endpoint.", - stacklevel=2, - ) + if not self._validate_bqstorage(bqstorage_client, create_bqstorage_client): create_bqstorage_client = False bqstorage_client = None @@ -1687,14 +1718,7 @@ def to_dataframe( if dtypes is None: dtypes = {} - if ( - bqstorage_client or create_bqstorage_client - ) and self.max_results is not None: - warnings.warn( - "Cannot use bqstorage_client if max_results is set, " - "reverting to fetching data with the REST endpoint.", - stacklevel=2, - ) + if not self._validate_bqstorage(bqstorage_client, create_bqstorage_client): create_bqstorage_client = False bqstorage_client = None diff --git a/packages/google-cloud-bigquery/tests/unit/job/test_query_pandas.py b/packages/google-cloud-bigquery/tests/unit/job/test_query_pandas.py index b0a652b783f6..a481bff696f8 100644 --- a/packages/google-cloud-bigquery/tests/unit/job/test_query_pandas.py +++ b/packages/google-cloud-bigquery/tests/unit/job/test_query_pandas.py @@ -99,6 +99,7 @@ def test_to_dataframe_bqstorage_preserve_order(query): ] }, "totalRows": "4", + "pageToken": "next-page", } connection = _make_connection(get_query_results_resource, job_resource) client = _make_client(connection=connection) @@ -133,7 +134,16 @@ def test_to_dataframe_bqstorage_preserve_order(query): @pytest.mark.skipif(pyarrow is None, reason="Requires `pyarrow`") -def test_to_arrow(): +@pytest.mark.parametrize( + "method_kwargs", + [ + {"create_bqstorage_client": False}, + # Since all rows are contained in the first page of results, the BigQuery + # Storage API won't actually be used. + {"create_bqstorage_client": True}, + ], +) +def test_to_arrow(method_kwargs): from google.cloud.bigquery.job import QueryJob as target_class begun_resource = _make_job_resource(job_type="query") @@ -182,7 +192,7 @@ def test_to_arrow(): client = _make_client(connection=connection) job = target_class.from_api_repr(begun_resource, client) - tbl = job.to_arrow(create_bqstorage_client=False) + tbl = job.to_arrow(**method_kwargs) assert isinstance(tbl, pyarrow.Table) assert tbl.num_rows == 2 @@ -216,7 +226,16 @@ def test_to_arrow(): @pytest.mark.skipif(pandas is None, reason="Requires `pandas`") -def test_to_dataframe(): +@pytest.mark.parametrize( + "method_kwargs", + [ + {"create_bqstorage_client": False}, + # Since all rows are contained in the first page of results, the BigQuery + # Storage API won't actually be used. + {"create_bqstorage_client": True}, + ], +) +def test_to_dataframe(method_kwargs): from google.cloud.bigquery.job import QueryJob as target_class begun_resource = _make_job_resource(job_type="query") @@ -243,7 +262,7 @@ def test_to_dataframe(): client = _make_client(connection=connection) job = target_class.from_api_repr(begun_resource, client) - df = job.to_dataframe(create_bqstorage_client=False) + df = job.to_dataframe(**method_kwargs) assert isinstance(df, pandas.DataFrame) assert len(df) == 4 # verify the number of rows @@ -288,6 +307,7 @@ def test_to_dataframe_bqstorage(): {"name": "age", "type": "INTEGER", "mode": "NULLABLE"}, ] }, + "pageToken": "next-page", } connection = _make_connection(query_resource) client = _make_client(connection=connection) From c0fef2bd0d295e07cd6bdaf74653d8e58a57ac32 Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Thu, 12 Nov 2020 17:22:09 -0600 Subject: [PATCH 0989/2016] fix(dbapi): allow rows to be fetched from scripts (#387) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The `is_dml` logic is not needed now that we moved to `getQueryResults` instead of `tabledata.list` (https://github.com/googleapis/python-bigquery/pull/375). Previously, the destination table of a DML query would return a non-null value that was unreadable or would return nonsense with DML (and some DDL) queries. Thank you for opening a Pull Request! Before submitting your PR, there are a few things you can do to make sure it goes smoothly: - [ ] Make sure to open an issue as a [bug/issue](https://github.com/googleapis/python-bigquery/issues/new/choose) before writing your code! That way we can discuss the change, evaluate designs, and agree on the general idea - [ ] Ensure the tests and linter pass - [ ] Code coverage does not decrease (if any source code was changed) - [ ] Appropriate docs were updated (if necessary) Towards #377 🦕 --- .../google/cloud/bigquery/dbapi/cursor.py | 8 ---- .../google-cloud-bigquery/tests/system.py | 43 ++++++++++++++++++- 2 files changed, 42 insertions(+), 9 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/dbapi/cursor.py b/packages/google-cloud-bigquery/google/cloud/bigquery/dbapi/cursor.py index 74f8aec4ed46..f48b47c1240d 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/dbapi/cursor.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/dbapi/cursor.py @@ -220,14 +220,6 @@ def _try_fetch(self, size=None): self._query_data = iter([]) return - is_dml = ( - self._query_job.statement_type - and self._query_job.statement_type.upper() != "SELECT" - ) - if is_dml: - self._query_data = iter([]) - return - if self._query_data is None: bqstorage_client = self.connection._bqstorage_client diff --git a/packages/google-cloud-bigquery/tests/system.py b/packages/google-cloud-bigquery/tests/system.py index 68fcb918c936..51a47c0b791f 100644 --- a/packages/google-cloud-bigquery/tests/system.py +++ b/packages/google-cloud-bigquery/tests/system.py @@ -180,6 +180,7 @@ class Config(object): CLIENT = None CURSOR = None + DATASET = None def setUpModule(): @@ -189,7 +190,9 @@ def setUpModule(): class TestBigQuery(unittest.TestCase): def setUp(self): - self.to_delete = [] + Config.DATASET = _make_dataset_id("bq_system_tests") + dataset = Config.CLIENT.create_dataset(Config.DATASET) + self.to_delete = [dataset] def tearDown(self): def _still_in_use(bad_request): @@ -1790,6 +1793,44 @@ def test_dbapi_fetchall(self): row_tuples = [r.values() for r in rows] self.assertEqual(row_tuples, [(1, 2), (3, 4), (5, 6)]) + def test_dbapi_fetchall_from_script(self): + query = """ + CREATE TEMP TABLE Example + ( + x INT64, + y STRING + ); + + INSERT INTO Example + VALUES (5, 'foo'), + (6, 'bar'), + (7, 'baz'); + + SELECT * + FROM Example + ORDER BY x ASC; + """ + + Config.CURSOR.execute(query) + self.assertEqual(Config.CURSOR.rowcount, 3, "expected 3 rows") + rows = Config.CURSOR.fetchall() + row_tuples = [r.values() for r in rows] + self.assertEqual(row_tuples, [(5, "foo"), (6, "bar"), (7, "baz")]) + + def test_dbapi_create_view(self): + + query = """ + CREATE VIEW {}.dbapi_create_view + AS SELECT name, SUM(number) AS total + FROM `bigquery-public-data.usa_names.usa_1910_2013` + GROUP BY name; + """.format( + Config.DATASET + ) + + Config.CURSOR.execute(query) + self.assertEqual(Config.CURSOR.rowcount, 0, "expected 0 rows") + @unittest.skipIf( bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" ) From b7c634ab41c8ae98f858b1c775b8ec430ee1e958 Mon Sep 17 00:00:00 2001 From: HemangChothani <50404902+HemangChothani@users.noreply.github.com> Date: Mon, 16 Nov 2020 11:46:28 -0500 Subject: [PATCH 0990/2016] feat: add progress bar to `QueryJob.to_dataframe` and `to_arrow` (#352) * feat: add progress bar for to_arrow method * feat: add progress bar for to_dataframe * feat: add default progress bar and unit test * feat: nit * feat: result timout for without queryplan --- .../google/cloud/bigquery/_tqdm_helpers.py | 94 +++++++ .../google/cloud/bigquery/job/query.py | 7 +- .../google/cloud/bigquery/table.py | 41 +-- .../tests/unit/job/test_query_pandas.py | 261 ++++++++++++++++++ .../tests/unit/test_table.py | 4 +- 5 files changed, 367 insertions(+), 40 deletions(-) create mode 100644 packages/google-cloud-bigquery/google/cloud/bigquery/_tqdm_helpers.py diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/_tqdm_helpers.py b/packages/google-cloud-bigquery/google/cloud/bigquery/_tqdm_helpers.py new file mode 100644 index 000000000000..bdecefe4ad72 --- /dev/null +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/_tqdm_helpers.py @@ -0,0 +1,94 @@ +# Copyright 2019 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Shared helper functions for tqdm progress bar.""" + +import concurrent.futures +import time +import warnings + +try: + import tqdm +except ImportError: # pragma: NO COVER + tqdm = None + +_NO_TQDM_ERROR = ( + "A progress bar was requested, but there was an error loading the tqdm " + "library. Please install tqdm to use the progress bar functionality." +) + +_PROGRESS_BAR_UPDATE_INTERVAL = 0.5 + + +def get_progress_bar(progress_bar_type, description, total, unit): + """Construct a tqdm progress bar object, if tqdm is .""" + if tqdm is None: + if progress_bar_type is not None: + warnings.warn(_NO_TQDM_ERROR, UserWarning, stacklevel=3) + return None + + try: + if progress_bar_type == "tqdm": + return tqdm.tqdm(desc=description, total=total, unit=unit) + elif progress_bar_type == "tqdm_notebook": + return tqdm.tqdm_notebook(desc=description, total=total, unit=unit) + elif progress_bar_type == "tqdm_gui": + return tqdm.tqdm_gui(desc=description, total=total, unit=unit) + except (KeyError, TypeError): + # Protect ourselves from any tqdm errors. In case of + # unexpected tqdm behavior, just fall back to showing + # no progress bar. + warnings.warn(_NO_TQDM_ERROR, UserWarning, stacklevel=3) + return None + + +def wait_for_query(query_job, progress_bar_type=None): + """Return query result and display a progress bar while the query running, if tqdm is installed.""" + if progress_bar_type is None: + return query_job.result() + + default_total = 1 + current_stage = None + start_time = time.time() + progress_bar = get_progress_bar( + progress_bar_type, "Query is running", default_total, "query" + ) + i = 0 + while True: + if query_job.query_plan: + default_total = len(query_job.query_plan) + current_stage = query_job.query_plan[i] + progress_bar.total = len(query_job.query_plan) + progress_bar.set_description( + "Query executing stage {} and status {} : {:0.2f}s".format( + current_stage.name, current_stage.status, time.time() - start_time, + ), + ) + try: + query_result = query_job.result(timeout=_PROGRESS_BAR_UPDATE_INTERVAL) + progress_bar.update(default_total) + progress_bar.set_description( + "Query complete after {:0.2f}s".format(time.time() - start_time), + ) + break + except concurrent.futures.TimeoutError: + query_job.reload() # Refreshes the state via a GET request. + if current_stage: + if current_stage.status == "COMPLETE": + if i < default_total - 1: + progress_bar.update(i + 1) + i += 1 + continue + progress_bar.close() + return query_result diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/job/query.py b/packages/google-cloud-bigquery/google/cloud/bigquery/job/query.py index 6c9221043fdd..7a1a749547d1 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/job/query.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/job/query.py @@ -40,6 +40,7 @@ from google.cloud.bigquery.table import _table_arg_to_table_ref from google.cloud.bigquery.table import TableReference from google.cloud.bigquery.table import TimePartitioning +from google.cloud.bigquery._tqdm_helpers import wait_for_query from google.cloud.bigquery.job.base import _AsyncJob from google.cloud.bigquery.job.base import _DONE_STATE @@ -1259,7 +1260,8 @@ def to_arrow( ..versionadded:: 1.17.0 """ - return self.result().to_arrow( + query_result = wait_for_query(self, progress_bar_type) + return query_result.to_arrow( progress_bar_type=progress_bar_type, bqstorage_client=bqstorage_client, create_bqstorage_client=create_bqstorage_client, @@ -1328,7 +1330,8 @@ def to_dataframe( Raises: ValueError: If the `pandas` library cannot be imported. """ - return self.result().to_dataframe( + query_result = wait_for_query(self, progress_bar_type) + return query_result.to_dataframe( bqstorage_client=bqstorage_client, dtypes=dtypes, progress_bar_type=progress_bar_type, diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py index 1ee36c7eabe1..4bfedd7581df 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py @@ -36,11 +36,6 @@ except ImportError: # pragma: NO COVER pyarrow = None -try: - import tqdm -except ImportError: # pragma: NO COVER - tqdm = None - import google.api_core.exceptions from google.api_core.page_iterator import HTTPIterator @@ -50,6 +45,7 @@ from google.cloud.bigquery.schema import _build_schema_resource from google.cloud.bigquery.schema import _parse_schema_resource from google.cloud.bigquery.schema import _to_schema_fields +from google.cloud.bigquery._tqdm_helpers import get_progress_bar from google.cloud.bigquery.external_config import ExternalConfig from google.cloud.bigquery.encryption_configuration import EncryptionConfiguration @@ -68,10 +64,7 @@ "The pyarrow library is not installed, please install " "pyarrow to use the to_arrow() function." ) -_NO_TQDM_ERROR = ( - "A progress bar was requested, but there was an error loading the tqdm " - "library. Please install tqdm to use the progress bar functionality." -) + _TABLE_HAS_NO_SCHEMA = 'Table has no schema: call "client.get_table()"' @@ -1418,32 +1411,6 @@ def total_rows(self): """int: The total number of rows in the table.""" return self._total_rows - def _get_progress_bar(self, progress_bar_type): - """Construct a tqdm progress bar object, if tqdm is installed.""" - if tqdm is None: - if progress_bar_type is not None: - warnings.warn(_NO_TQDM_ERROR, UserWarning, stacklevel=3) - return None - - description = "Downloading" - unit = "rows" - - try: - if progress_bar_type == "tqdm": - return tqdm.tqdm(desc=description, total=self.total_rows, unit=unit) - elif progress_bar_type == "tqdm_notebook": - return tqdm.tqdm_notebook( - desc=description, total=self.total_rows, unit=unit - ) - elif progress_bar_type == "tqdm_gui": - return tqdm.tqdm_gui(desc=description, total=self.total_rows, unit=unit) - except (KeyError, TypeError): - # Protect ourselves from any tqdm errors. In case of - # unexpected tqdm behavior, just fall back to showing - # no progress bar. - warnings.warn(_NO_TQDM_ERROR, UserWarning, stacklevel=3) - return None - def _to_page_iterable( self, bqstorage_download, tabledata_list_download, bqstorage_client=None ): @@ -1551,7 +1518,9 @@ def to_arrow( owns_bqstorage_client = bqstorage_client is not None try: - progress_bar = self._get_progress_bar(progress_bar_type) + progress_bar = get_progress_bar( + progress_bar_type, "Downloading", self.total_rows, "rows" + ) record_batches = [] for record_batch in self._to_arrow_iterable( diff --git a/packages/google-cloud-bigquery/tests/unit/job/test_query_pandas.py b/packages/google-cloud-bigquery/tests/unit/job/test_query_pandas.py index a481bff696f8..f9d823eb08fc 100644 --- a/packages/google-cloud-bigquery/tests/unit/job/test_query_pandas.py +++ b/packages/google-cloud-bigquery/tests/unit/job/test_query_pandas.py @@ -12,6 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. +import concurrent.futures import copy import json @@ -225,6 +226,154 @@ def test_to_arrow(method_kwargs): ] +@pytest.mark.skipif(pyarrow is None, reason="Requires `pyarrow`") +@pytest.mark.skipif(tqdm is None, reason="Requires `tqdm`") +def test_to_arrow_w_tqdm_w_query_plan(): + from google.cloud.bigquery import table + from google.cloud.bigquery.job import QueryJob as target_class + from google.cloud.bigquery.schema import SchemaField + from google.cloud.bigquery._tqdm_helpers import _PROGRESS_BAR_UPDATE_INTERVAL + + begun_resource = _make_job_resource(job_type="query") + rows = [ + {"f": [{"v": "Bharney Rhubble"}, {"v": "33"}]}, + {"f": [{"v": "Wylma Phlyntstone"}, {"v": "29"}]}, + ] + + schema = [ + SchemaField("name", "STRING", mode="REQUIRED"), + SchemaField("age", "INTEGER", mode="REQUIRED"), + ] + connection = _make_connection({}) + client = _make_client(connection=connection) + job = target_class.from_api_repr(begun_resource, client) + + path = "/foo" + api_request = mock.Mock(return_value={"rows": rows}) + row_iterator = table.RowIterator(client, api_request, path, schema) + + job._properties["statistics"] = { + "query": { + "queryPlan": [ + {"name": "S00: Input", "id": "0", "status": "COMPLETE"}, + {"name": "S01: Output", "id": "1", "status": "COMPLETE"}, + ] + }, + } + reload_patch = mock.patch( + "google.cloud.bigquery.job._AsyncJob.reload", autospec=True + ) + result_patch = mock.patch( + "google.cloud.bigquery.job.QueryJob.result", + side_effect=[ + concurrent.futures.TimeoutError, + concurrent.futures.TimeoutError, + row_iterator, + ], + ) + + with result_patch as result_patch_tqdm, reload_patch: + tbl = job.to_arrow(progress_bar_type="tqdm", create_bqstorage_client=False) + + assert result_patch_tqdm.call_count == 3 + assert isinstance(tbl, pyarrow.Table) + assert tbl.num_rows == 2 + result_patch_tqdm.assert_called_with(timeout=_PROGRESS_BAR_UPDATE_INTERVAL) + + +@pytest.mark.skipif(pyarrow is None, reason="Requires `pyarrow`") +@pytest.mark.skipif(tqdm is None, reason="Requires `tqdm`") +def test_to_arrow_w_tqdm_w_pending_status(): + from google.cloud.bigquery import table + from google.cloud.bigquery.job import QueryJob as target_class + from google.cloud.bigquery.schema import SchemaField + from google.cloud.bigquery._tqdm_helpers import _PROGRESS_BAR_UPDATE_INTERVAL + + begun_resource = _make_job_resource(job_type="query") + rows = [ + {"f": [{"v": "Bharney Rhubble"}, {"v": "33"}]}, + {"f": [{"v": "Wylma Phlyntstone"}, {"v": "29"}]}, + ] + + schema = [ + SchemaField("name", "STRING", mode="REQUIRED"), + SchemaField("age", "INTEGER", mode="REQUIRED"), + ] + connection = _make_connection({}) + client = _make_client(connection=connection) + job = target_class.from_api_repr(begun_resource, client) + + path = "/foo" + api_request = mock.Mock(return_value={"rows": rows}) + row_iterator = table.RowIterator(client, api_request, path, schema) + + job._properties["statistics"] = { + "query": { + "queryPlan": [ + {"name": "S00: Input", "id": "0", "status": "PENDING"}, + {"name": "S00: Input", "id": "1", "status": "COMPLETE"}, + ] + }, + } + reload_patch = mock.patch( + "google.cloud.bigquery.job._AsyncJob.reload", autospec=True + ) + result_patch = mock.patch( + "google.cloud.bigquery.job.QueryJob.result", + side_effect=[concurrent.futures.TimeoutError, row_iterator], + ) + + with result_patch as result_patch_tqdm, reload_patch: + tbl = job.to_arrow(progress_bar_type="tqdm", create_bqstorage_client=False) + + assert result_patch_tqdm.call_count == 2 + assert isinstance(tbl, pyarrow.Table) + assert tbl.num_rows == 2 + result_patch_tqdm.assert_called_with(timeout=_PROGRESS_BAR_UPDATE_INTERVAL) + + +@pytest.mark.skipif(pyarrow is None, reason="Requires `pyarrow`") +@pytest.mark.skipif(tqdm is None, reason="Requires `tqdm`") +def test_to_arrow_w_tqdm_wo_query_plan(): + from google.cloud.bigquery import table + from google.cloud.bigquery.job import QueryJob as target_class + from google.cloud.bigquery.schema import SchemaField + + begun_resource = _make_job_resource(job_type="query") + rows = [ + {"f": [{"v": "Bharney Rhubble"}, {"v": "33"}]}, + {"f": [{"v": "Wylma Phlyntstone"}, {"v": "29"}]}, + ] + + schema = [ + SchemaField("name", "STRING", mode="REQUIRED"), + SchemaField("age", "INTEGER", mode="REQUIRED"), + ] + connection = _make_connection({}) + client = _make_client(connection=connection) + job = target_class.from_api_repr(begun_resource, client) + + path = "/foo" + api_request = mock.Mock(return_value={"rows": rows}) + row_iterator = table.RowIterator(client, api_request, path, schema) + + reload_patch = mock.patch( + "google.cloud.bigquery.job._AsyncJob.reload", autospec=True + ) + result_patch = mock.patch( + "google.cloud.bigquery.job.QueryJob.result", + side_effect=[concurrent.futures.TimeoutError, row_iterator], + ) + + with result_patch as result_patch_tqdm, reload_patch: + tbl = job.to_arrow(progress_bar_type="tqdm", create_bqstorage_client=False) + + assert result_patch_tqdm.call_count == 2 + assert isinstance(tbl, pyarrow.Table) + assert tbl.num_rows == 2 + result_patch_tqdm.assert_called() + + @pytest.mark.skipif(pandas is None, reason="Requires `pandas`") @pytest.mark.parametrize( "method_kwargs", @@ -460,3 +609,115 @@ def test_to_dataframe_with_progress_bar(tqdm_mock): job.to_dataframe(progress_bar_type="tqdm", create_bqstorage_client=False) tqdm_mock.assert_called() + + +@pytest.mark.skipif(pandas is None, reason="Requires `pandas`") +@pytest.mark.skipif(tqdm is None, reason="Requires `tqdm`") +def test_to_dataframe_w_tqdm_pending(): + from google.cloud.bigquery import table + from google.cloud.bigquery.job import QueryJob as target_class + from google.cloud.bigquery.schema import SchemaField + from google.cloud.bigquery._tqdm_helpers import _PROGRESS_BAR_UPDATE_INTERVAL + + begun_resource = _make_job_resource(job_type="query") + schema = [ + SchemaField("name", "STRING", mode="NULLABLE"), + SchemaField("age", "INTEGER", mode="NULLABLE"), + ] + rows = [ + {"f": [{"v": "Phred Phlyntstone"}, {"v": "32"}]}, + {"f": [{"v": "Bharney Rhubble"}, {"v": "33"}]}, + {"f": [{"v": "Wylma Phlyntstone"}, {"v": "29"}]}, + {"f": [{"v": "Bhettye Rhubble"}, {"v": "27"}]}, + ] + + connection = _make_connection({}) + client = _make_client(connection=connection) + job = target_class.from_api_repr(begun_resource, client) + + path = "/foo" + api_request = mock.Mock(return_value={"rows": rows}) + row_iterator = table.RowIterator(client, api_request, path, schema) + + job._properties["statistics"] = { + "query": { + "queryPlan": [ + {"name": "S00: Input", "id": "0", "status": "PRNDING"}, + {"name": "S01: Output", "id": "1", "status": "COMPLETE"}, + ] + }, + } + reload_patch = mock.patch( + "google.cloud.bigquery.job._AsyncJob.reload", autospec=True + ) + result_patch = mock.patch( + "google.cloud.bigquery.job.QueryJob.result", + side_effect=[concurrent.futures.TimeoutError, row_iterator], + ) + + with result_patch as result_patch_tqdm, reload_patch: + df = job.to_dataframe(progress_bar_type="tqdm", create_bqstorage_client=False) + + assert result_patch_tqdm.call_count == 2 + assert isinstance(df, pandas.DataFrame) + assert len(df) == 4 # verify the number of rows + assert list(df) == ["name", "age"] # verify the column names + result_patch_tqdm.assert_called_with(timeout=_PROGRESS_BAR_UPDATE_INTERVAL) + + +@pytest.mark.skipif(pandas is None, reason="Requires `pandas`") +@pytest.mark.skipif(tqdm is None, reason="Requires `tqdm`") +def test_to_dataframe_w_tqdm(): + from google.cloud.bigquery import table + from google.cloud.bigquery.job import QueryJob as target_class + from google.cloud.bigquery.schema import SchemaField + from google.cloud.bigquery._tqdm_helpers import _PROGRESS_BAR_UPDATE_INTERVAL + + begun_resource = _make_job_resource(job_type="query") + schema = [ + SchemaField("name", "STRING", mode="NULLABLE"), + SchemaField("age", "INTEGER", mode="NULLABLE"), + ] + rows = [ + {"f": [{"v": "Phred Phlyntstone"}, {"v": "32"}]}, + {"f": [{"v": "Bharney Rhubble"}, {"v": "33"}]}, + {"f": [{"v": "Wylma Phlyntstone"}, {"v": "29"}]}, + {"f": [{"v": "Bhettye Rhubble"}, {"v": "27"}]}, + ] + + connection = _make_connection({}) + client = _make_client(connection=connection) + job = target_class.from_api_repr(begun_resource, client) + + path = "/foo" + api_request = mock.Mock(return_value={"rows": rows}) + row_iterator = table.RowIterator(client, api_request, path, schema) + + job._properties["statistics"] = { + "query": { + "queryPlan": [ + {"name": "S00: Input", "id": "0", "status": "COMPLETE"}, + {"name": "S01: Output", "id": "1", "status": "COMPLETE"}, + ] + }, + } + reload_patch = mock.patch( + "google.cloud.bigquery.job._AsyncJob.reload", autospec=True + ) + result_patch = mock.patch( + "google.cloud.bigquery.job.QueryJob.result", + side_effect=[ + concurrent.futures.TimeoutError, + concurrent.futures.TimeoutError, + row_iterator, + ], + ) + + with result_patch as result_patch_tqdm, reload_patch: + df = job.to_dataframe(progress_bar_type="tqdm", create_bqstorage_client=False) + + assert result_patch_tqdm.call_count == 3 + assert isinstance(df, pandas.DataFrame) + assert len(df) == 4 # verify the number of rows + assert list(df), ["name", "age"] # verify the column names + result_patch_tqdm.assert_called_with(timeout=_PROGRESS_BAR_UPDATE_INTERVAL) diff --git a/packages/google-cloud-bigquery/tests/unit/test_table.py b/packages/google-cloud-bigquery/tests/unit/test_table.py index eccc46a7ae94..be67eafcda35 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_table.py +++ b/packages/google-cloud-bigquery/tests/unit/test_table.py @@ -2433,7 +2433,7 @@ def test_to_dataframe_progress_bar( self.assertEqual(len(df), 4) @unittest.skipIf(pandas is None, "Requires `pandas`") - @mock.patch("google.cloud.bigquery.table.tqdm", new=None) + @mock.patch("google.cloud.bigquery._tqdm_helpers.tqdm", new=None) def test_to_dataframe_no_tqdm_no_progress_bar(self): from google.cloud.bigquery.schema import SchemaField @@ -2461,7 +2461,7 @@ def test_to_dataframe_no_tqdm_no_progress_bar(self): self.assertEqual(len(df), 4) @unittest.skipIf(pandas is None, "Requires `pandas`") - @mock.patch("google.cloud.bigquery.table.tqdm", new=None) + @mock.patch("google.cloud.bigquery._tqdm_helpers.tqdm", new=None) def test_to_dataframe_no_tqdm(self): from google.cloud.bigquery.schema import SchemaField From 6e03ef705bcb4497a377898971c7e594280bc72c Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Mon, 16 Nov 2020 14:39:01 -0600 Subject: [PATCH 0991/2016] test: fix DML system tests (#388) --- packages/google-cloud-bigquery/tests/system.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/packages/google-cloud-bigquery/tests/system.py b/packages/google-cloud-bigquery/tests/system.py index 51a47c0b791f..e347c8a7093f 100644 --- a/packages/google-cloud-bigquery/tests/system.py +++ b/packages/google-cloud-bigquery/tests/system.py @@ -249,7 +249,7 @@ def test_close_releases_open_sockets(self): client.close() conn_count_end = len(current_process.connections()) - self.assertEqual(conn_count_end, conn_count_start) + self.assertLessEqual(conn_count_end, conn_count_start) def test_create_dataset(self): DATASET_ID = _make_dataset_id("create_dataset") @@ -1972,7 +1972,9 @@ def test_query_w_dml(self): def test_dbapi_w_dml(self): dataset_name = _make_dataset_id("dml_dbapi") table_name = "test_table" - self._load_table_for_dml([("Hello World",)], dataset_name, table_name) + self._load_table_for_dml( + [("こんにちは",), ("Hello World",), ("Howdy!",)], dataset_name, table_name + ) query_template = """UPDATE {}.{} SET greeting = 'Guten Tag' WHERE greeting = 'Hello World' @@ -1983,7 +1985,6 @@ def test_dbapi_w_dml(self): job_id="test_dbapi_w_dml_{}".format(str(uuid.uuid4())), ) self.assertEqual(Config.CURSOR.rowcount, 1) - self.assertIsNone(Config.CURSOR.fetchone()) def test_query_w_query_params(self): from google.cloud.bigquery.job import QueryJobConfig From f2cf855cc303cf7d97d2a50398c20af0bd2048e2 Mon Sep 17 00:00:00 2001 From: "release-please[bot]" <55107282+release-please[bot]@users.noreply.github.com> Date: Mon, 16 Nov 2020 16:05:35 -0600 Subject: [PATCH 0992/2016] chore: release 2.4.0 (#381) Co-authored-by: release-please[bot] <55107282+release-please[bot]@users.noreply.github.com> Co-authored-by: Tim Swast --- packages/google-cloud-bigquery/CHANGELOG.md | 25 +++++++++++++++++++ .../google/cloud/bigquery/version.py | 2 +- 2 files changed, 26 insertions(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/CHANGELOG.md b/packages/google-cloud-bigquery/CHANGELOG.md index 787ba7557558..03a4659269f7 100644 --- a/packages/google-cloud-bigquery/CHANGELOG.md +++ b/packages/google-cloud-bigquery/CHANGELOG.md @@ -4,6 +4,31 @@ [1]: https://pypi.org/project/google-cloud-bigquery/#history +## [2.4.0](https://www.github.com/googleapis/python-bigquery/compare/v2.3.1...v2.4.0) (2020-11-16) + + +### Features + +* add progress bar to `QueryJob.to_dataframe` and `to_arrow` ([#352](https://www.github.com/googleapis/python-bigquery/issues/352)) ([dc78edd](https://www.github.com/googleapis/python-bigquery/commit/dc78eddde7a6a312c8fed7bace7d64036837ab1a)) +* allow routine references ([#378](https://www.github.com/googleapis/python-bigquery/issues/378)) ([f9480dc](https://www.github.com/googleapis/python-bigquery/commit/f9480dc2a1bc58367083176bd74725aa8b903301)) + + +### Bug Fixes + +* **dbapi:** allow rows to be fetched from scripts ([#387](https://www.github.com/googleapis/python-bigquery/issues/387)) ([b899ad1](https://www.github.com/googleapis/python-bigquery/commit/b899ad12e17cb87c58d3ae46b4388d917c5743f2)), closes [#377](https://www.github.com/googleapis/python-bigquery/issues/377) + + +### Performance Improvements + +* avoid extra API calls from `to_dataframe` if all rows are cached ([#384](https://www.github.com/googleapis/python-bigquery/issues/384)) ([c52b317](https://www.github.com/googleapis/python-bigquery/commit/c52b31789998fc0dfde07c3296650c85104d719d)) +* cache first page of `jobs.getQueryResults` rows ([#374](https://www.github.com/googleapis/python-bigquery/issues/374)) ([86f6a51](https://www.github.com/googleapis/python-bigquery/commit/86f6a516d1c7c5dc204ab085ea2578793e6561ff)) +* use `getQueryResults` from DB-API ([#375](https://www.github.com/googleapis/python-bigquery/issues/375)) ([30de15f](https://www.github.com/googleapis/python-bigquery/commit/30de15f7255de5ea221df4e8db7991d279e0ea28)) + + +### Dependencies + +* expand pyarrow dependencies to include version 2 ([#368](https://www.github.com/googleapis/python-bigquery/issues/368)) ([cd9febd](https://www.github.com/googleapis/python-bigquery/commit/cd9febd20c34983781386c3bf603e5fca7135695)) + ## 2.3.1 11-05-2020 09:27 PST diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/version.py b/packages/google-cloud-bigquery/google/cloud/bigquery/version.py index 474ccbcf2e6e..fe11624d91dd 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/version.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/version.py @@ -12,4 +12,4 @@ # See the License for the specific language governing permissions and # limitations under the License. -__version__ = "2.3.1" +__version__ = "2.4.0" From 003d42ea5989064b7c2c7f77e35f3f87589f1eb8 Mon Sep 17 00:00:00 2001 From: Ryan Yuan Date: Wed, 18 Nov 2020 03:04:07 +1100 Subject: [PATCH 0993/2016] docs(samples): add more clustering code snippets (#330) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add sample code for creating a clustered table from a query result. File: samples/client_query_destination_table_clustered.py Section: [https://cloud.google.com/bigquery/docs/creating-clustered-tables#creating_a_clustered_table_from_a_query_result](https://cloud.google.com/bigquery/docs/creating-clustered-tables#creating_a_clustered_table_from_a_query_result) Add sample code for creating a clustered table when you load data. File: samples/load_table_clustered.py Section: [https://cloud.google.com/bigquery/docs/creating-clustered-tables#creating_a_clustered_table_when_you_load_data](https://cloud.google.com/bigquery/docs/creating-clustered-tables#creating_a_clustered_table_when_you_load_data) Fixes #329 🦕 --- .../docs/usage/tables.rst | 17 ++++++ .../google/cloud/bigquery/__init__.py | 4 +- ...lient_query_destination_table_clustered.py | 43 +++++++++++++++ .../samples/load_table_clustered.py | 55 +++++++++++++++++++ ...lient_query_destination_table_clustered.py | 27 +++++++++ .../tests/test_load_table_clustered.py | 27 +++++++++ 6 files changed, 172 insertions(+), 1 deletion(-) create mode 100644 packages/google-cloud-bigquery/samples/client_query_destination_table_clustered.py create mode 100644 packages/google-cloud-bigquery/samples/load_table_clustered.py create mode 100644 packages/google-cloud-bigquery/samples/tests/test_client_query_destination_table_clustered.py create mode 100644 packages/google-cloud-bigquery/samples/tests/test_load_table_clustered.py diff --git a/packages/google-cloud-bigquery/docs/usage/tables.rst b/packages/google-cloud-bigquery/docs/usage/tables.rst index 7afca05e2b9c..d924fe214a71 100644 --- a/packages/google-cloud-bigquery/docs/usage/tables.rst +++ b/packages/google-cloud-bigquery/docs/usage/tables.rst @@ -85,6 +85,23 @@ Load table data from a file with the :start-after: [START bigquery_load_from_file] :end-before: [END bigquery_load_from_file] +Creating a clustered table from a query result: + +.. literalinclude:: ../samples/client_query_destination_table_clustered.py + :language: python + :dedent: 4 + :start-after: [START bigquery_query_clustered_table] + :end-before: [END bigquery_query_clustered_table] + +Creating a clustered table when you load data with the +:func:`~google.cloud.bigquery.client.Client.load_table_from_uri` method: + +.. literalinclude:: ../samples/load_table_clustered.py + :language: python + :dedent: 4 + :start-after: [START bigquery_load_table_clustered] + :end-before: [END bigquery_load_table_clustered] + Load a CSV file from Cloud Storage with the :func:`~google.cloud.bigquery.client.Client.load_table_from_uri` method: diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/__init__.py b/packages/google-cloud-bigquery/google/cloud/bigquery/__init__.py index b8d1cc4d7a0c..41f987228ee2 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/__init__.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/__init__.py @@ -37,6 +37,7 @@ from google.cloud.bigquery.dataset import Dataset from google.cloud.bigquery.dataset import DatasetReference from google.cloud.bigquery import enums +from google.cloud.bigquery.enums import SqlTypeNames from google.cloud.bigquery.enums import StandardSqlDataTypes from google.cloud.bigquery.external_config import ExternalConfig from google.cloud.bigquery.external_config import BigtableOptions @@ -137,8 +138,9 @@ "Encoding", "QueryPriority", "SchemaUpdateOption", - "StandardSqlDataTypes", "SourceFormat", + "SqlTypeNames", + "StandardSqlDataTypes", "WriteDisposition", # EncryptionConfiguration "EncryptionConfiguration", diff --git a/packages/google-cloud-bigquery/samples/client_query_destination_table_clustered.py b/packages/google-cloud-bigquery/samples/client_query_destination_table_clustered.py new file mode 100644 index 000000000000..5a109ed104a0 --- /dev/null +++ b/packages/google-cloud-bigquery/samples/client_query_destination_table_clustered.py @@ -0,0 +1,43 @@ +# Copyright 2020 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +def client_query_destination_table_clustered(table_id): + + # [START bigquery_query_clustered_table] + from google.cloud import bigquery + + # Construct a BigQuery client object. + client = bigquery.Client() + + # TODO(developer): Set table_id to the ID of the destination table. + # table_id = "your-project.your_dataset.your_table_name" + + sql = "SELECT * FROM `bigquery-public-data.samples.shakespeare`" + cluster_fields = ["corpus"] + + job_config = bigquery.QueryJobConfig( + clustering_fields=cluster_fields, destination=table_id + ) + + # Start the query, passing in the extra configuration. + query_job = client.query(sql, job_config=job_config) # Make an API request. + query_job.result() # Wait for the job to complete. + + table = client.get_table(table_id) # Make an API request. + if table.clustering_fields == cluster_fields: + print( + "The destination table is written using the cluster_fields configuration." + ) + # [END bigquery_query_clustered_table] diff --git a/packages/google-cloud-bigquery/samples/load_table_clustered.py b/packages/google-cloud-bigquery/samples/load_table_clustered.py new file mode 100644 index 000000000000..20d412cb3878 --- /dev/null +++ b/packages/google-cloud-bigquery/samples/load_table_clustered.py @@ -0,0 +1,55 @@ +# Copyright 2020 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +def load_table_clustered(table_id): + + # [START bigquery_load_table_clustered] + from google.cloud import bigquery + + # Construct a BigQuery client object. + client = bigquery.Client() + + # TODO(developer): Set table_id to the ID of the table to create. + # table_id = "your-project.your_dataset.your_table_name" + + job_config = bigquery.LoadJobConfig( + skip_leading_rows=1, + source_format=bigquery.SourceFormat.CSV, + schema=[ + bigquery.SchemaField("timestamp", bigquery.SqlTypeNames.TIMESTAMP), + bigquery.SchemaField("origin", bigquery.SqlTypeNames.STRING), + bigquery.SchemaField("destination", bigquery.SqlTypeNames.STRING), + bigquery.SchemaField("amount", bigquery.SqlTypeNames.NUMERIC), + ], + time_partitioning=bigquery.TimePartitioning(field="timestamp"), + clustering_fields=["origin", "destination"], + ) + + job = client.load_table_from_uri( + ["gs://cloud-samples-data/bigquery/sample-transactions/transactions.csv"], + table_id, + job_config=job_config, + ) + + job.result() # Waits for the job to complete. + + table = client.get_table(table_id) # Make an API request. + print( + "Loaded {} rows and {} columns to {}".format( + table.num_rows, len(table.schema), table_id + ) + ) + # [END bigquery_load_table_clustered] + return table diff --git a/packages/google-cloud-bigquery/samples/tests/test_client_query_destination_table_clustered.py b/packages/google-cloud-bigquery/samples/tests/test_client_query_destination_table_clustered.py new file mode 100644 index 000000000000..b4bdd588c57e --- /dev/null +++ b/packages/google-cloud-bigquery/samples/tests/test_client_query_destination_table_clustered.py @@ -0,0 +1,27 @@ +# Copyright 2020 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from .. import client_query_destination_table_clustered + + +def test_client_query_destination_table_clustered(capsys, random_table_id): + + client_query_destination_table_clustered.client_query_destination_table_clustered( + random_table_id + ) + out, err = capsys.readouterr() + assert ( + "The destination table is written using the cluster_fields configuration." + in out + ) diff --git a/packages/google-cloud-bigquery/samples/tests/test_load_table_clustered.py b/packages/google-cloud-bigquery/samples/tests/test_load_table_clustered.py new file mode 100644 index 000000000000..bafdc2051f5d --- /dev/null +++ b/packages/google-cloud-bigquery/samples/tests/test_load_table_clustered.py @@ -0,0 +1,27 @@ +# Copyright 2020 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from .. import load_table_clustered + + +def test_load_table_clustered(capsys, random_table_id, client): + + table = load_table_clustered.load_table_clustered(random_table_id) + + out, _ = capsys.readouterr() + assert "rows and 4 columns" in out + + rows = list(client.list_rows(table)) # Make an API request. + assert len(rows) > 0 + assert table.clustering_fields == ["origin", "destination"] From 382528cd3577632b0ca6e5583cdd9907d51ca107 Mon Sep 17 00:00:00 2001 From: WhiteSource Renovate Date: Tue, 17 Nov 2020 23:38:45 +0100 Subject: [PATCH 0994/2016] chore(deps): update dependency matplotlib to v3.3.3 (#385) --- .../google-cloud-bigquery/samples/snippets/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/samples/snippets/requirements.txt b/packages/google-cloud-bigquery/samples/snippets/requirements.txt index bf895a1ae751..2c1fb38c9c0f 100644 --- a/packages/google-cloud-bigquery/samples/snippets/requirements.txt +++ b/packages/google-cloud-bigquery/samples/snippets/requirements.txt @@ -4,7 +4,7 @@ google-auth-oauthlib==0.4.2 grpcio==1.33.2 ipython==7.16.1; python_version < '3.7' ipython==7.17.0; python_version >= '3.7' -matplotlib==3.3.2 +matplotlib==3.3.3 pandas==1.1.4 pyarrow==1.0.1 pytz==2020.1 From 1132dad16d6e1d9ae74f52aeba1a1af9f3eb02c8 Mon Sep 17 00:00:00 2001 From: WhiteSource Renovate Date: Tue, 17 Nov 2020 23:54:04 +0100 Subject: [PATCH 0995/2016] chore(deps): update dependency google-cloud-bigquery to v2.3.1 (#371) This PR contains the following updates: | Package | Update | Change | |---|---|---| | [google-cloud-bigquery](https://togithub.com/googleapis/python-bigquery) | minor | `==2.2.0` -> `==2.3.1` | --- ### Release Notes
googleapis/python-bigquery ### [`v2.3.1`](https://togithub.com/googleapis/python-bigquery/blob/master/CHANGELOG.md#​231) [Compare Source](https://togithub.com/googleapis/python-bigquery/compare/v2.2.0...v2.3.1) 11-05-2020 09:27 PST ##### Internal / Testing Changes - update `google.cloud.bigquery.__version__`
--- ### Renovate configuration :date: **Schedule**: At any time (no schedule defined). :vertical_traffic_light: **Automerge**: Disabled by config. Please merge this manually once you are satisfied. :recycle: **Rebasing**: Renovate will not automatically rebase this PR, because other commits have been found. :no_bell: **Ignore**: Close this PR and you won't be reminded about this update again. --- - [ ] If you want to rebase/retry this PR, check this box --- This PR has been generated by [WhiteSource Renovate](https://renovate.whitesourcesoftware.com). View repository job log [here](https://app.renovatebot.com/dashboard#github/googleapis/python-bigquery). --- .../google-cloud-bigquery/samples/snippets/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/samples/snippets/requirements.txt b/packages/google-cloud-bigquery/samples/snippets/requirements.txt index 2c1fb38c9c0f..509a61ade9ca 100644 --- a/packages/google-cloud-bigquery/samples/snippets/requirements.txt +++ b/packages/google-cloud-bigquery/samples/snippets/requirements.txt @@ -1,4 +1,4 @@ -google-cloud-bigquery==2.2.0 +google-cloud-bigquery==2.3.1 google-cloud-bigquery-storage==2.0.1 google-auth-oauthlib==0.4.2 grpcio==1.33.2 From 9106d5a99bf54bfbc1b58b9bbaa1233ef4810a33 Mon Sep 17 00:00:00 2001 From: HemangChothani <50404902+HemangChothani@users.noreply.github.com> Date: Mon, 23 Nov 2020 14:37:45 -0500 Subject: [PATCH 0996/2016] fix: avoid floating point for timestamp in `insert_rows` (#393) * fix: timestamp precision in insert_rows * fix: remove floating point coversion and add datetime format * fix: add formatted string in unit tests --- .../google/cloud/bigquery/_helpers.py | 9 +++------ .../tests/unit/test__helpers.py | 10 +++++++--- .../tests/unit/test_client.py | 16 ++++++++-------- 3 files changed, 18 insertions(+), 17 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py b/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py index b59b3d794a95..35129d8440f8 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py @@ -23,7 +23,7 @@ from google.cloud._helpers import UTC from google.cloud._helpers import _date_from_iso8601_date from google.cloud._helpers import _datetime_from_microseconds -from google.cloud._helpers import _microseconds_from_datetime +from google.cloud._helpers import _RFC3339_MICROS from google.cloud._helpers import _RFC3339_NO_FRACTION from google.cloud._helpers import _to_bytes @@ -313,12 +313,9 @@ def _timestamp_to_json_parameter(value): def _timestamp_to_json_row(value): - """Coerce 'value' to an JSON-compatible representation. - - This version returns floating-point seconds value used in row data. - """ + """Coerce 'value' to an JSON-compatible representation.""" if isinstance(value, datetime.datetime): - value = _microseconds_from_datetime(value) * 1e-6 + value = value.strftime(_RFC3339_MICROS) return value diff --git a/packages/google-cloud-bigquery/tests/unit/test__helpers.py b/packages/google-cloud-bigquery/tests/unit/test__helpers.py index 16c4fb8a52f6..a5258150168e 100644 --- a/packages/google-cloud-bigquery/tests/unit/test__helpers.py +++ b/packages/google-cloud-bigquery/tests/unit/test__helpers.py @@ -728,10 +728,14 @@ def test_w_string(self): self.assertEqual(self._call_fut(ZULU), ZULU) def test_w_datetime(self): - from google.cloud._helpers import _microseconds_from_datetime - when = datetime.datetime(2016, 12, 20, 15, 58, 27, 339328) - self.assertEqual(self._call_fut(when), _microseconds_from_datetime(when) / 1e6) + self.assertEqual(self._call_fut(when), "2016-12-20T15:58:27.339328Z") + + def test_w_datetime_w_utc_zone(self): + from google.cloud._helpers import UTC + + when = datetime.datetime(2020, 11, 17, 1, 6, 52, 353795, tzinfo=UTC) + self.assertEqual(self._call_fut(when), "2020-11-17T01:06:52.353795Z") class Test_datetime_to_json(unittest.TestCase): diff --git a/packages/google-cloud-bigquery/tests/unit/test_client.py b/packages/google-cloud-bigquery/tests/unit/test_client.py index dd57ee79814d..4fba1150c987 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_client.py +++ b/packages/google-cloud-bigquery/tests/unit/test_client.py @@ -5804,7 +5804,7 @@ def test_insert_rows_w_schema(self): import datetime from google.cloud._helpers import UTC from google.cloud._helpers import _datetime_to_rfc3339 - from google.cloud._helpers import _microseconds_from_datetime + from google.cloud._helpers import _RFC3339_MICROS from google.cloud.bigquery.schema import SchemaField WHEN_TS = 1437767599.006 @@ -5834,7 +5834,7 @@ def _row_data(row): result = {"full_name": row[0], "age": str(row[1])} joined = row[2] if isinstance(joined, datetime.datetime): - joined = _microseconds_from_datetime(joined) * 1e-6 + joined = joined.strftime(_RFC3339_MICROS) if joined is not None: result["joined"] = joined return result @@ -5864,7 +5864,7 @@ def test_insert_rows_w_list_of_dictionaries(self): import datetime from google.cloud._helpers import UTC from google.cloud._helpers import _datetime_to_rfc3339 - from google.cloud._helpers import _microseconds_from_datetime + from google.cloud._helpers import _RFC3339_MICROS from google.cloud.bigquery.schema import SchemaField from google.cloud.bigquery.table import Table @@ -5910,7 +5910,7 @@ def _row_data(row): row = copy.deepcopy(row) del row["joined"] elif isinstance(joined, datetime.datetime): - row["joined"] = _microseconds_from_datetime(joined) * 1e-6 + row["joined"] = joined.strftime(_RFC3339_MICROS) row["age"] = str(row["age"]) return row @@ -6109,16 +6109,16 @@ def test_insert_rows_w_repeated_fields(self): { "score": "12", "times": [ - 1543665600.0, # 2018-12-01 12:00 UTC - 1543669200.0, # 2018-12-01 13:00 UTC + "2018-12-01T12:00:00.000000Z", + "2018-12-01T13:00:00.000000Z", ], "distances": [1.25, 2.5], }, { "score": "13", "times": [ - 1543752000.0, # 2018-12-02 12:00 UTC - 1543755600.0, # 2018-12-02 13:00 UTC + "2018-12-02T12:00:00.000000Z", + "2018-12-02T13:00:00.000000Z", ], "distances": [-1.25, -2.5], }, From 72dea80f0bc7008b4229a3491a4ea1b724b75a53 Mon Sep 17 00:00:00 2001 From: HemangChothani <50404902+HemangChothani@users.noreply.github.com> Date: Tue, 24 Nov 2020 16:28:22 -0500 Subject: [PATCH 0997/2016] deps: update required version of opentelementry for opentelemetry-exporter-google-cloud (#398) --- packages/google-cloud-bigquery/setup.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/packages/google-cloud-bigquery/setup.py b/packages/google-cloud-bigquery/setup.py index 48c4a7518c16..5f4e506eb08a 100644 --- a/packages/google-cloud-bigquery/setup.py +++ b/packages/google-cloud-bigquery/setup.py @@ -55,9 +55,9 @@ ], "tqdm": ["tqdm >= 4.7.4, <5.0.0dev"], "opentelemetry": [ - "opentelemetry-api==0.9b0", - "opentelemetry-sdk==0.9b0", - "opentelemetry-instrumentation==0.9b0 ", + "opentelemetry-api==0.11b0", + "opentelemetry-sdk==0.11b0", + "opentelemetry-instrumentation==0.11b0", ], } From 33b39200a9539d85a696ff227ff721eeaa22b64e Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Tue, 24 Nov 2020 16:44:02 -0600 Subject: [PATCH 0998/2016] perf: don't fetch rows when waiting for query to finish (#400) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When there are large result sets, fetching rows while waiting for the query to finish can cause the API to hang indefinitely. (This may be due to an interaction between connection timeout and API timeout.) This reverts commit 86f6a516d1c7c5dc204ab085ea2578793e6561ff (#374). Thank you for opening a Pull Request! Before submitting your PR, there are a few things you can do to make sure it goes smoothly: - [x] Make sure to open an issue as a [bug/issue](https://github.com/googleapis/python-bigquery/issues/new/choose) before writing your code! That way we can discuss the change, evaluate designs, and agree on the general idea - [x] Ensure the tests and linter pass - [x] Code coverage does not decrease (if any source code was changed) - [x] Appropriate docs were updated (if necessary) Fixes https://github.com/pydata/pandas-gbq/issues/343 Fixes #394 🦕 --- .../google/cloud/bigquery/client.py | 4 +- .../google/cloud/bigquery/job/query.py | 5 -- .../tests/unit/job/test_query.py | 55 +++++------------- .../tests/unit/job/test_query_pandas.py | 44 ++++++-------- .../tests/unit/test_client.py | 4 +- .../tests/unit/test_table.py | 57 +++++++++++++++++++ 6 files changed, 89 insertions(+), 80 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py index c67ef54e0f41..cd1474336ea0 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py @@ -1534,7 +1534,7 @@ def _get_query_results( A new ``_QueryResults`` instance. """ - extra_params = {} + extra_params = {"maxResults": 0} if project is None: project = self.project @@ -3187,7 +3187,6 @@ def _list_rows_from_query_results( page_size=None, retry=DEFAULT_RETRY, timeout=None, - first_page_response=None, ): """List the rows of a completed query. See @@ -3248,7 +3247,6 @@ def _list_rows_from_query_results( table=destination, extra_params=params, total_rows=total_rows, - first_page_response=first_page_response, ) return row_iterator diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/job/query.py b/packages/google-cloud-bigquery/google/cloud/bigquery/job/query.py index 7a1a749547d1..9e890861376e 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/job/query.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/job/query.py @@ -1177,10 +1177,6 @@ def result( if self._query_results.total_rows is None: return _EmptyRowIterator() - first_page_response = None - if max_results is None and page_size is None and start_index is None: - first_page_response = self._query_results._properties - rows = self._client._list_rows_from_query_results( self.job_id, self.location, @@ -1193,7 +1189,6 @@ def result( start_index=start_index, retry=retry, timeout=timeout, - first_page_response=first_page_response, ) rows._preserve_order = _contains_order_by(self.query) return rows diff --git a/packages/google-cloud-bigquery/tests/unit/job/test_query.py b/packages/google-cloud-bigquery/tests/unit/job/test_query.py index 41e31f4694e2..daaf2e557108 100644 --- a/packages/google-cloud-bigquery/tests/unit/job/test_query.py +++ b/packages/google-cloud-bigquery/tests/unit/job/test_query.py @@ -787,9 +787,7 @@ def test_result(self): "location": "EU", }, "schema": {"fields": [{"name": "col1", "type": "STRING"}]}, - "totalRows": "3", - "rows": [{"f": [{"v": "abc"}]}], - "pageToken": "next-page", + "totalRows": "2", } job_resource = self._make_resource(started=True, location="EU") job_resource_done = self._make_resource(started=True, ended=True, location="EU") @@ -801,9 +799,9 @@ def test_result(self): query_page_resource = { # Explicitly set totalRows to be different from the initial # response to test update during iteration. - "totalRows": "2", + "totalRows": "1", "pageToken": None, - "rows": [{"f": [{"v": "def"}]}], + "rows": [{"f": [{"v": "abc"}]}], } conn = _make_connection( query_resource, query_resource_done, job_resource_done, query_page_resource @@ -814,20 +812,19 @@ def test_result(self): result = job.result() self.assertIsInstance(result, RowIterator) - self.assertEqual(result.total_rows, 3) + self.assertEqual(result.total_rows, 2) rows = list(result) - self.assertEqual(len(rows), 2) + self.assertEqual(len(rows), 1) self.assertEqual(rows[0].col1, "abc") - self.assertEqual(rows[1].col1, "def") # Test that the total_rows property has changed during iteration, based # on the response from tabledata.list. - self.assertEqual(result.total_rows, 2) + self.assertEqual(result.total_rows, 1) query_results_path = f"/projects/{self.PROJECT}/queries/{self.JOB_ID}" query_results_call = mock.call( method="GET", path=query_results_path, - query_params={"location": "EU"}, + query_params={"maxResults": 0, "location": "EU"}, timeout=None, ) reload_call = mock.call( @@ -842,7 +839,6 @@ def test_result(self): query_params={ "fields": _LIST_ROWS_FROM_QUERY_RESULTS_FIELDS, "location": "EU", - "pageToken": "next-page", }, timeout=None, ) @@ -855,9 +851,7 @@ def test_result_with_done_job_calls_get_query_results(self): "jobComplete": True, "jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID}, "schema": {"fields": [{"name": "col1", "type": "STRING"}]}, - "totalRows": "2", - "rows": [{"f": [{"v": "abc"}]}], - "pageToken": "next-page", + "totalRows": "1", } job_resource = self._make_resource(started=True, ended=True, location="EU") job_resource["configuration"]["query"]["destinationTable"] = { @@ -866,9 +860,9 @@ def test_result_with_done_job_calls_get_query_results(self): "tableId": "dest_table", } results_page_resource = { - "totalRows": "2", + "totalRows": "1", "pageToken": None, - "rows": [{"f": [{"v": "def"}]}], + "rows": [{"f": [{"v": "abc"}]}], } conn = _make_connection(query_resource_done, results_page_resource) client = _make_client(self.PROJECT, connection=conn) @@ -877,15 +871,14 @@ def test_result_with_done_job_calls_get_query_results(self): result = job.result() rows = list(result) - self.assertEqual(len(rows), 2) + self.assertEqual(len(rows), 1) self.assertEqual(rows[0].col1, "abc") - self.assertEqual(rows[1].col1, "def") query_results_path = f"/projects/{self.PROJECT}/queries/{self.JOB_ID}" query_results_call = mock.call( method="GET", path=query_results_path, - query_params={"location": "EU"}, + query_params={"maxResults": 0, "location": "EU"}, timeout=None, ) query_results_page_call = mock.call( @@ -894,7 +887,6 @@ def test_result_with_done_job_calls_get_query_results(self): query_params={ "fields": _LIST_ROWS_FROM_QUERY_RESULTS_FIELDS, "location": "EU", - "pageToken": "next-page", }, timeout=None, ) @@ -908,12 +900,6 @@ def test_result_with_max_results(self): "jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID}, "schema": {"fields": [{"name": "col1", "type": "STRING"}]}, "totalRows": "5", - # These rows are discarded because max_results is set. - "rows": [ - {"f": [{"v": "xyz"}]}, - {"f": [{"v": "uvw"}]}, - {"f": [{"v": "rst"}]}, - ], } query_page_resource = { "totalRows": "5", @@ -939,7 +925,6 @@ def test_result_with_max_results(self): rows = list(result) self.assertEqual(len(rows), 3) - self.assertEqual(rows[0].col1, "abc") self.assertEqual(len(connection.api_request.call_args_list), 2) query_page_request = connection.api_request.call_args_list[1] self.assertEqual( @@ -994,7 +979,7 @@ def test_result_w_retry(self): query_results_call = mock.call( method="GET", path=f"/projects/{self.PROJECT}/queries/{self.JOB_ID}", - query_params={"location": "asia-northeast1"}, + query_params={"maxResults": 0, "location": "asia-northeast1"}, timeout=None, ) reload_call = mock.call( @@ -1094,12 +1079,6 @@ def test_result_w_page_size(self): "jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID}, "schema": {"fields": [{"name": "col1", "type": "STRING"}]}, "totalRows": "4", - # These rows are discarded because page_size is set. - "rows": [ - {"f": [{"v": "xyz"}]}, - {"f": [{"v": "uvw"}]}, - {"f": [{"v": "rst"}]}, - ], } job_resource = self._make_resource(started=True, ended=True, location="US") q_config = job_resource["configuration"]["query"] @@ -1130,7 +1109,6 @@ def test_result_w_page_size(self): # Assert actual_rows = list(result) self.assertEqual(len(actual_rows), 4) - self.assertEqual(actual_rows[0].col1, "row1") query_results_path = f"/projects/{self.PROJECT}/queries/{self.JOB_ID}" query_page_1_call = mock.call( @@ -1164,12 +1142,6 @@ def test_result_with_start_index(self): "jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID}, "schema": {"fields": [{"name": "col1", "type": "STRING"}]}, "totalRows": "5", - # These rows are discarded because start_index is set. - "rows": [ - {"f": [{"v": "xyz"}]}, - {"f": [{"v": "uvw"}]}, - {"f": [{"v": "rst"}]}, - ], } tabledata_resource = { "totalRows": "5", @@ -1196,7 +1168,6 @@ def test_result_with_start_index(self): rows = list(result) self.assertEqual(len(rows), 4) - self.assertEqual(rows[0].col1, "abc") self.assertEqual(len(connection.api_request.call_args_list), 2) tabledata_list_request = connection.api_request.call_args_list[1] self.assertEqual( diff --git a/packages/google-cloud-bigquery/tests/unit/job/test_query_pandas.py b/packages/google-cloud-bigquery/tests/unit/job/test_query_pandas.py index f9d823eb08fc..cdd6f2b3cf88 100644 --- a/packages/google-cloud-bigquery/tests/unit/job/test_query_pandas.py +++ b/packages/google-cloud-bigquery/tests/unit/job/test_query_pandas.py @@ -100,7 +100,6 @@ def test_to_dataframe_bqstorage_preserve_order(query): ] }, "totalRows": "4", - "pageToken": "next-page", } connection = _make_connection(get_query_results_resource, job_resource) client = _make_client(connection=connection) @@ -135,16 +134,7 @@ def test_to_dataframe_bqstorage_preserve_order(query): @pytest.mark.skipif(pyarrow is None, reason="Requires `pyarrow`") -@pytest.mark.parametrize( - "method_kwargs", - [ - {"create_bqstorage_client": False}, - # Since all rows are contained in the first page of results, the BigQuery - # Storage API won't actually be used. - {"create_bqstorage_client": True}, - ], -) -def test_to_arrow(method_kwargs): +def test_to_arrow(): from google.cloud.bigquery.job import QueryJob as target_class begun_resource = _make_job_resource(job_type="query") @@ -172,6 +162,8 @@ def test_to_arrow(method_kwargs): }, ] }, + } + tabledata_resource = { "rows": [ { "f": [ @@ -185,15 +177,17 @@ def test_to_arrow(method_kwargs): {"v": {"f": [{"v": "Bharney Rhubble"}, {"v": "33"}]}}, ] }, - ], + ] } done_resource = copy.deepcopy(begun_resource) done_resource["status"] = {"state": "DONE"} - connection = _make_connection(begun_resource, query_resource, done_resource) + connection = _make_connection( + begun_resource, query_resource, done_resource, tabledata_resource + ) client = _make_client(connection=connection) job = target_class.from_api_repr(begun_resource, client) - tbl = job.to_arrow(**method_kwargs) + tbl = job.to_arrow(create_bqstorage_client=False) assert isinstance(tbl, pyarrow.Table) assert tbl.num_rows == 2 @@ -375,16 +369,7 @@ def test_to_arrow_w_tqdm_wo_query_plan(): @pytest.mark.skipif(pandas is None, reason="Requires `pandas`") -@pytest.mark.parametrize( - "method_kwargs", - [ - {"create_bqstorage_client": False}, - # Since all rows are contained in the first page of results, the BigQuery - # Storage API won't actually be used. - {"create_bqstorage_client": True}, - ], -) -def test_to_dataframe(method_kwargs): +def test_to_dataframe(): from google.cloud.bigquery.job import QueryJob as target_class begun_resource = _make_job_resource(job_type="query") @@ -398,20 +383,24 @@ def test_to_dataframe(method_kwargs): {"name": "age", "type": "INTEGER", "mode": "NULLABLE"}, ] }, + } + tabledata_resource = { "rows": [ {"f": [{"v": "Phred Phlyntstone"}, {"v": "32"}]}, {"f": [{"v": "Bharney Rhubble"}, {"v": "33"}]}, {"f": [{"v": "Wylma Phlyntstone"}, {"v": "29"}]}, {"f": [{"v": "Bhettye Rhubble"}, {"v": "27"}]}, - ], + ] } done_resource = copy.deepcopy(begun_resource) done_resource["status"] = {"state": "DONE"} - connection = _make_connection(begun_resource, query_resource, done_resource) + connection = _make_connection( + begun_resource, query_resource, done_resource, tabledata_resource + ) client = _make_client(connection=connection) job = target_class.from_api_repr(begun_resource, client) - df = job.to_dataframe(**method_kwargs) + df = job.to_dataframe(create_bqstorage_client=False) assert isinstance(df, pandas.DataFrame) assert len(df) == 4 # verify the number of rows @@ -456,7 +445,6 @@ def test_to_dataframe_bqstorage(): {"name": "age", "type": "INTEGER", "mode": "NULLABLE"}, ] }, - "pageToken": "next-page", } connection = _make_connection(query_resource) client = _make_client(connection=connection) diff --git a/packages/google-cloud-bigquery/tests/unit/test_client.py b/packages/google-cloud-bigquery/tests/unit/test_client.py index 4fba1150c987..c4bdea2f81df 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_client.py +++ b/packages/google-cloud-bigquery/tests/unit/test_client.py @@ -319,7 +319,7 @@ def test__get_query_results_miss_w_explicit_project_and_timeout(self): conn.api_request.assert_called_once_with( method="GET", path=path, - query_params={"timeoutMs": 500, "location": self.LOCATION}, + query_params={"maxResults": 0, "timeoutMs": 500, "location": self.LOCATION}, timeout=42, ) @@ -336,7 +336,7 @@ def test__get_query_results_miss_w_client_location(self): conn.api_request.assert_called_once_with( method="GET", path="/projects/PROJECT/queries/nothere", - query_params={"location": self.LOCATION}, + query_params={"maxResults": 0, "location": self.LOCATION}, timeout=None, ) diff --git a/packages/google-cloud-bigquery/tests/unit/test_table.py b/packages/google-cloud-bigquery/tests/unit/test_table.py index be67eafcda35..1dd5fab46e8d 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_table.py +++ b/packages/google-cloud-bigquery/tests/unit/test_table.py @@ -1630,6 +1630,40 @@ def test_iterate(self): api_request.assert_called_once_with(method="GET", path=path, query_params={}) + def test_iterate_with_cached_first_page(self): + from google.cloud.bigquery.schema import SchemaField + + first_page = { + "rows": [ + {"f": [{"v": "Whillma Phlyntstone"}, {"v": "27"}]}, + {"f": [{"v": "Bhetty Rhubble"}, {"v": "28"}]}, + ], + "pageToken": "next-page", + } + schema = [ + SchemaField("name", "STRING", mode="REQUIRED"), + SchemaField("age", "INTEGER", mode="REQUIRED"), + ] + rows = [ + {"f": [{"v": "Phred Phlyntstone"}, {"v": "32"}]}, + {"f": [{"v": "Bharney Rhubble"}, {"v": "33"}]}, + ] + path = "/foo" + api_request = mock.Mock(return_value={"rows": rows}) + row_iterator = self._make_one( + _mock_client(), api_request, path, schema, first_page_response=first_page + ) + rows = list(row_iterator) + self.assertEqual(len(rows), 4) + self.assertEqual(rows[0].age, 27) + self.assertEqual(rows[1].age, 28) + self.assertEqual(rows[2].age, 32) + self.assertEqual(rows[3].age, 33) + + api_request.assert_called_once_with( + method="GET", path=path, query_params={"pageToken": "next-page"} + ) + def test_page_size(self): from google.cloud.bigquery.schema import SchemaField @@ -1655,6 +1689,29 @@ def test_page_size(self): query_params={"maxResults": row_iterator._page_size}, ) + def test__is_completely_cached_returns_false_without_first_page(self): + iterator = self._make_one(first_page_response=None) + self.assertFalse(iterator._is_completely_cached()) + + def test__is_completely_cached_returns_false_with_page_token(self): + first_page = {"pageToken": "next-page"} + iterator = self._make_one(first_page_response=first_page) + self.assertFalse(iterator._is_completely_cached()) + + def test__is_completely_cached_returns_true(self): + first_page = {"rows": []} + iterator = self._make_one(first_page_response=first_page) + self.assertTrue(iterator._is_completely_cached()) + + def test__validate_bqstorage_returns_false_when_completely_cached(self): + first_page = {"rows": []} + iterator = self._make_one(first_page_response=first_page) + self.assertFalse( + iterator._validate_bqstorage( + bqstorage_client=None, create_bqstorage_client=True + ) + ) + @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_to_arrow(self): from google.cloud.bigquery.schema import SchemaField From 6e9da701ec94a5e6bf0405d817bb4cc5d8bd4c9b Mon Sep 17 00:00:00 2001 From: WhiteSource Renovate Date: Wed, 25 Nov 2020 20:42:04 +0100 Subject: [PATCH 0999/2016] chore(deps): update dependency google-cloud-bigquery to v2.4.0 (#391) [![WhiteSource Renovate](https://app.renovatebot.com/images/banner.svg)](https://renovatebot.com) This PR contains the following updates: | Package | Update | Change | |---|---|---| | [google-cloud-bigquery](https://togithub.com/googleapis/python-bigquery) | minor | `==2.3.1` -> `==2.4.0` | --- ### Release Notes
googleapis/python-bigquery ### [`v2.4.0`](https://togithub.com/googleapis/python-bigquery/blob/master/CHANGELOG.md#​240-httpswwwgithubcomgoogleapispython-bigquerycomparev231v240-2020-11-16) [Compare Source](https://togithub.com/googleapis/python-bigquery/compare/v2.3.1...v2.4.0) ##### Features - add progress bar to `QueryJob.to_dataframe` and `to_arrow` ([#​352](https://www.github.com/googleapis/python-bigquery/issues/352)) ([dc78edd](https://www.github.com/googleapis/python-bigquery/commit/dc78eddde7a6a312c8fed7bace7d64036837ab1a)) - allow routine references ([#​378](https://www.github.com/googleapis/python-bigquery/issues/378)) ([f9480dc](https://www.github.com/googleapis/python-bigquery/commit/f9480dc2a1bc58367083176bd74725aa8b903301)) ##### Bug Fixes - **dbapi:** allow rows to be fetched from scripts ([#​387](https://www.github.com/googleapis/python-bigquery/issues/387)) ([b899ad1](https://www.github.com/googleapis/python-bigquery/commit/b899ad12e17cb87c58d3ae46b4388d917c5743f2)), closes [#​377](https://www.github.com/googleapis/python-bigquery/issues/377) ##### Performance Improvements - avoid extra API calls from `to_dataframe` if all rows are cached ([#​384](https://www.github.com/googleapis/python-bigquery/issues/384)) ([c52b317](https://www.github.com/googleapis/python-bigquery/commit/c52b31789998fc0dfde07c3296650c85104d719d)) - cache first page of `jobs.getQueryResults` rows ([#​374](https://www.github.com/googleapis/python-bigquery/issues/374)) ([86f6a51](https://www.github.com/googleapis/python-bigquery/commit/86f6a516d1c7c5dc204ab085ea2578793e6561ff)) - use `getQueryResults` from DB-API ([#​375](https://www.github.com/googleapis/python-bigquery/issues/375)) ([30de15f](https://www.github.com/googleapis/python-bigquery/commit/30de15f7255de5ea221df4e8db7991d279e0ea28)) ##### Dependencies - expand pyarrow dependencies to include version 2 ([#​368](https://www.github.com/googleapis/python-bigquery/issues/368)) ([cd9febd](https://www.github.com/googleapis/python-bigquery/commit/cd9febd20c34983781386c3bf603e5fca7135695))
--- ### Renovate configuration :date: **Schedule**: At any time (no schedule defined). :vertical_traffic_light: **Automerge**: Disabled by config. Please merge this manually once you are satisfied. :recycle: **Rebasing**: Whenever PR becomes conflicted, or you tick the rebase/retry checkbox. :no_bell: **Ignore**: Close this PR and you won't be reminded about this update again. --- - [ ] If you want to rebase/retry this PR, check this box --- This PR has been generated by [WhiteSource Renovate](https://renovate.whitesourcesoftware.com). View repository job log [here](https://app.renovatebot.com/dashboard#github/googleapis/python-bigquery). --- .../google-cloud-bigquery/samples/snippets/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/samples/snippets/requirements.txt b/packages/google-cloud-bigquery/samples/snippets/requirements.txt index 509a61ade9ca..36363a377f68 100644 --- a/packages/google-cloud-bigquery/samples/snippets/requirements.txt +++ b/packages/google-cloud-bigquery/samples/snippets/requirements.txt @@ -1,4 +1,4 @@ -google-cloud-bigquery==2.3.1 +google-cloud-bigquery==2.4.0 google-cloud-bigquery-storage==2.0.1 google-auth-oauthlib==0.4.2 grpcio==1.33.2 From d491cd015d53cd89af37dbbe24be27bd2ac4731f Mon Sep 17 00:00:00 2001 From: WhiteSource Renovate Date: Wed, 25 Nov 2020 20:56:03 +0100 Subject: [PATCH 1000/2016] chore(deps): update dependency pyarrow to v2 (#380) [![WhiteSource Renovate](https://app.renovatebot.com/images/banner.svg)](https://renovatebot.com) This PR contains the following updates: | Package | Update | Change | |---|---|---| | [pyarrow](https://arrow.apache.org/) | major | `==1.0.1` -> `==2.0.0` | --- ### Renovate configuration :date: **Schedule**: At any time (no schedule defined). :vertical_traffic_light: **Automerge**: Disabled by config. Please merge this manually once you are satisfied. :recycle: **Rebasing**: Whenever PR becomes conflicted, or you tick the rebase/retry checkbox. :no_bell: **Ignore**: Close this PR and you won't be reminded about this update again. --- - [ ] If you want to rebase/retry this PR, check this box --- This PR has been generated by [WhiteSource Renovate](https://renovate.whitesourcesoftware.com). View repository job log [here](https://app.renovatebot.com/dashboard#github/googleapis/python-bigquery). --- .../google-cloud-bigquery/samples/snippets/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/samples/snippets/requirements.txt b/packages/google-cloud-bigquery/samples/snippets/requirements.txt index 36363a377f68..f47f2228eaa4 100644 --- a/packages/google-cloud-bigquery/samples/snippets/requirements.txt +++ b/packages/google-cloud-bigquery/samples/snippets/requirements.txt @@ -6,5 +6,5 @@ ipython==7.16.1; python_version < '3.7' ipython==7.17.0; python_version >= '3.7' matplotlib==3.3.3 pandas==1.1.4 -pyarrow==1.0.1 +pyarrow==2.0.0 pytz==2020.1 From 37c4ba6b03a481a3ac3ff667519d6d8cb228faaa Mon Sep 17 00:00:00 2001 From: WhiteSource Renovate Date: Wed, 25 Nov 2020 21:16:14 +0100 Subject: [PATCH 1001/2016] chore(deps): update dependency pytz to v2020.4 (#356) --- .../google-cloud-bigquery/samples/snippets/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/samples/snippets/requirements.txt b/packages/google-cloud-bigquery/samples/snippets/requirements.txt index f47f2228eaa4..eeb94db5a648 100644 --- a/packages/google-cloud-bigquery/samples/snippets/requirements.txt +++ b/packages/google-cloud-bigquery/samples/snippets/requirements.txt @@ -7,4 +7,4 @@ ipython==7.17.0; python_version >= '3.7' matplotlib==3.3.3 pandas==1.1.4 pyarrow==2.0.0 -pytz==2020.1 +pytz==2020.4 From 03443b2789d9cc0b3c39b5af2a7e9f5ec5858e29 Mon Sep 17 00:00:00 2001 From: Steffany Brown <30247553+steffnay@users.noreply.github.com> Date: Mon, 30 Nov 2020 13:55:22 -0800 Subject: [PATCH 1002/2016] feat: add support for unrecognized model types (#401) * feat: add support for unrecognized model types * refactor Co-authored-by: Tim Swast --- .../google/cloud/bigquery/model.py | 12 +++++++++--- .../tests/unit/model/test_model.py | 17 +++++++++++++++++ 2 files changed, 26 insertions(+), 3 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/model.py b/packages/google-cloud-bigquery/google/cloud/bigquery/model.py index 1143b71f9825..0f5d8f83b8c4 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/model.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/model.py @@ -305,9 +305,15 @@ def from_api_repr(cls, resource): start_time = datetime_helpers.from_microseconds(1e3 * float(start_time)) training_run["startTime"] = datetime_helpers.to_rfc3339(start_time) - this._proto = json_format.ParseDict( - resource, types.Model()._pb, ignore_unknown_fields=True - ) + try: + this._proto = json_format.ParseDict( + resource, types.Model()._pb, ignore_unknown_fields=True + ) + except json_format.ParseError: + resource["modelType"] = "MODEL_TYPE_UNSPECIFIED" + this._proto = json_format.ParseDict( + resource, types.Model()._pb, ignore_unknown_fields=True + ) return this def _build_resource(self, filter_fields): diff --git a/packages/google-cloud-bigquery/tests/unit/model/test_model.py b/packages/google-cloud-bigquery/tests/unit/model/test_model.py index 9fa29a49649b..8f0bf58d5750 100644 --- a/packages/google-cloud-bigquery/tests/unit/model/test_model.py +++ b/packages/google-cloud-bigquery/tests/unit/model/test_model.py @@ -186,6 +186,23 @@ def test_from_api_repr_w_unknown_fields(target_class): assert got._properties is resource +def test_from_api_repr_w_unknown_type(target_class): + from google.cloud.bigquery import ModelReference + + resource = { + "modelReference": { + "projectId": "my-project", + "datasetId": "my_dataset", + "modelId": "my_model", + }, + "modelType": "BE_A_GOOD_ROLE_MODEL", + } + got = target_class.from_api_repr(resource) + assert got.reference == ModelReference.from_string("my-project.my_dataset.my_model") + assert got.model_type == 0 + assert got._properties is resource + + @pytest.mark.parametrize( "resource,filter_fields,expected", [ From e00f24601c854a09c6cc0d82497982c7f9645611 Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Mon, 30 Nov 2020 16:10:04 -0600 Subject: [PATCH 1003/2016] feat: add `TableReference.__str__` to get table ID in standard SQL (#405) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This is the natural inverse of the `TableReference.from_string` method. Thank you for opening a Pull Request! Before submitting your PR, there are a few things you can do to make sure it goes smoothly: - [x] Make sure to open an issue as a [bug/issue](https://github.com/googleapis/python-bigquery/issues/new/choose) before writing your code! That way we can discuss the change, evaluate designs, and agree on the general idea - [x] Ensure the tests and linter pass - [x] Code coverage does not decrease (if any source code was changed) - [x] Appropriate docs were updated (if necessary) Fixes #354 🦕 --- .../google-cloud-bigquery/google/cloud/bigquery/table.py | 8 ++++++-- packages/google-cloud-bigquery/tests/unit/test_table.py | 8 ++++++++ 2 files changed, 14 insertions(+), 2 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py index 4bfedd7581df..f30c057734f5 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py @@ -262,6 +262,9 @@ def __ne__(self, other): def __hash__(self): return hash(self._key()) + def __str__(self): + return f"{self.project}.{self.dataset_id}.{self.table_id}" + def __repr__(self): from google.cloud.bigquery.dataset import DatasetReference @@ -475,7 +478,7 @@ def full_table_id(self): """Union[str, None]: ID for the table (:data:`None` until set from the server). - In the format ``project_id:dataset_id.table_id``. + In the format ``project-id:dataset_id.table_id``. """ return self._properties.get("id") @@ -484,7 +487,8 @@ def table_type(self): """Union[str, None]: The type of the table (:data:`None` until set from the server). - Possible values are ``'TABLE'``, ``'VIEW'``, or ``'EXTERNAL'``. + Possible values are ``'TABLE'``, ``'VIEW'``, ``'MATERIALIZED_VIEW'`` or + ``'EXTERNAL'``. """ return self._properties.get("type") diff --git a/packages/google-cloud-bigquery/tests/unit/test_table.py b/packages/google-cloud-bigquery/tests/unit/test_table.py index 1dd5fab46e8d..67874ff91217 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_table.py +++ b/packages/google-cloud-bigquery/tests/unit/test_table.py @@ -272,6 +272,11 @@ def test___repr__(self): ) self.assertEqual(repr(table1), expected) + def test___str__(self): + dataset = DatasetReference("project1", "dataset1") + table1 = self._make_one(dataset, "table1") + self.assertEqual(str(table1), "project1.dataset1.table1") + class TestTable(unittest.TestCase, _SchemaBase): @@ -813,6 +818,9 @@ def test_from_string(self): self.assertEqual(got.project, "string-project") self.assertEqual(got.dataset_id, "string_dataset") self.assertEqual(got.table_id, "string_table") + self.assertEqual( + str(got.reference), "string-project.string_dataset.string_table" + ) def test_from_string_legacy_string(self): cls = self._get_target_class() From 11db1fb8668cb24435955e651ca2a29b32b65955 Mon Sep 17 00:00:00 2001 From: HemangChothani <50404902+HemangChothani@users.noreply.github.com> Date: Wed, 2 Dec 2020 11:28:48 -0500 Subject: [PATCH 1004/2016] feat: add progress bar for magics (#396) * feat: add progress bar for magics * feat: remove default progress bar * feat: add default tqdm value in magic --- .../google/cloud/bigquery/_tqdm_helpers.py | 5 +- .../google/cloud/bigquery/magics/magics.py | 38 +++++++++- .../tests/unit/test_magics.py | 71 ++++++++++++++++++- 3 files changed, 107 insertions(+), 7 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/_tqdm_helpers.py b/packages/google-cloud-bigquery/google/cloud/bigquery/_tqdm_helpers.py index bdecefe4ad72..2fcf2a981802 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/_tqdm_helpers.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/_tqdm_helpers.py @@ -55,15 +55,14 @@ def get_progress_bar(progress_bar_type, description, total, unit): def wait_for_query(query_job, progress_bar_type=None): """Return query result and display a progress bar while the query running, if tqdm is installed.""" - if progress_bar_type is None: - return query_job.result() - default_total = 1 current_stage = None start_time = time.time() progress_bar = get_progress_bar( progress_bar_type, "Query is running", default_total, "query" ) + if progress_bar is None: + return query_job.result() i = 0 while True: if query_job.query_plan: diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/magics/magics.py b/packages/google-cloud-bigquery/google/cloud/bigquery/magics/magics.py index 5645a84a5224..f04a6364a2b6 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/magics/magics.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/magics/magics.py @@ -182,6 +182,7 @@ def __init__(self): self._default_query_job_config = bigquery.QueryJobConfig() self._bigquery_client_options = client_options.ClientOptions() self._bqstorage_client_options = client_options.ClientOptions() + self._progress_bar_type = "tqdm" @property def credentials(self): @@ -313,6 +314,26 @@ def default_query_job_config(self): def default_query_job_config(self, value): self._default_query_job_config = value + @property + def progress_bar_type(self): + """str: Default progress bar type to use to display progress bar while + executing queries through IPython magics. + + Note:: + Install the ``tqdm`` package to use this feature. + + Example: + Manually setting the progress_bar_type: + + >>> from google.cloud.bigquery import magics + >>> magics.context.progress_bar_type = "tqdm" + """ + return self._progress_bar_type + + @progress_bar_type.setter + def progress_bar_type(self, value): + self._progress_bar_type = value + context = Context() @@ -524,6 +545,15 @@ def _create_dataset_if_necessary(client, dataset_id): "name (ex. $my_dict_var)." ), ) +@magic_arguments.argument( + "--progress_bar_type", + type=str, + default=None, + help=( + "Sets progress bar type to display a progress bar while executing the query." + "Defaults to use tqdm. Install the ``tqdm`` package to use this feature." + ), +) def _cell_magic(line, query): """Underlying function for bigquery cell magic @@ -687,12 +717,16 @@ def _cell_magic(line, query): ) return query_job + progress_bar = context.progress_bar_type or args.progress_bar_type + if max_results: result = query_job.result(max_results=max_results).to_dataframe( - bqstorage_client=bqstorage_client + bqstorage_client=bqstorage_client, progress_bar_type=progress_bar ) else: - result = query_job.to_dataframe(bqstorage_client=bqstorage_client) + result = query_job.to_dataframe( + bqstorage_client=bqstorage_client, progress_bar_type=progress_bar + ) if args.destination_var: IPython.get_ipython().push({args.destination_var: result}) diff --git a/packages/google-cloud-bigquery/tests/unit/test_magics.py b/packages/google-cloud-bigquery/tests/unit/test_magics.py index a7cf92919dd6..ff41fe7206c1 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_magics.py +++ b/packages/google-cloud-bigquery/tests/unit/test_magics.py @@ -623,7 +623,7 @@ def warning_match(warning): assert client_info.user_agent == "ipython-" + IPython.__version__ query_job_mock.to_dataframe.assert_called_once_with( - bqstorage_client=bqstorage_instance_mock + bqstorage_client=bqstorage_instance_mock, progress_bar_type="tqdm" ) assert isinstance(return_value, pandas.DataFrame) @@ -665,7 +665,9 @@ def test_bigquery_magic_with_rest_client_requested(monkeypatch): return_value = ip.run_cell_magic("bigquery", "--use_rest_api", sql) bqstorage_mock.assert_not_called() - query_job_mock.to_dataframe.assert_called_once_with(bqstorage_client=None) + query_job_mock.to_dataframe.assert_called_once_with( + bqstorage_client=None, progress_bar_type="tqdm" + ) assert isinstance(return_value, pandas.DataFrame) @@ -1167,6 +1169,71 @@ def test_bigquery_magic_w_maximum_bytes_billed_w_context_setter(): assert sent_config["maximumBytesBilled"] == "10203" +@pytest.mark.usefixtures("ipython_interactive") +@pytest.mark.skipif(pandas is None, reason="Requires `pandas`") +def test_bigquery_magic_w_progress_bar_type_w_context_setter(monkeypatch): + ip = IPython.get_ipython() + ip.extension_manager.load_extension("google.cloud.bigquery") + magics.context._project = None + + magics.context.progress_bar_type = "tqdm_gui" + + mock_credentials = mock.create_autospec( + google.auth.credentials.Credentials, instance=True + ) + + # Set up the context with monkeypatch so that it's reset for subsequent + # tests. + monkeypatch.setattr(magics.context, "_credentials", mock_credentials) + + # Mock out the BigQuery Storage API. + bqstorage_mock = mock.create_autospec(bigquery_storage.BigQueryReadClient) + bqstorage_client_patch = mock.patch( + "google.cloud.bigquery_storage.BigQueryReadClient", bqstorage_mock + ) + + sql = "SELECT 17 AS num" + result = pandas.DataFrame([17], columns=["num"]) + run_query_patch = mock.patch( + "google.cloud.bigquery.magics.magics._run_query", autospec=True + ) + query_job_mock = mock.create_autospec( + google.cloud.bigquery.job.QueryJob, instance=True + ) + query_job_mock.to_dataframe.return_value = result + with run_query_patch as run_query_mock, bqstorage_client_patch: + run_query_mock.return_value = query_job_mock + + return_value = ip.run_cell_magic("bigquery", "--use_rest_api", sql) + + bqstorage_mock.assert_not_called() + query_job_mock.to_dataframe.assert_called_once_with( + bqstorage_client=None, progress_bar_type=magics.context.progress_bar_type + ) + + assert isinstance(return_value, pandas.DataFrame) + + +@pytest.mark.usefixtures("ipython_interactive") +def test_bigquery_magic_with_progress_bar_type(): + ip = IPython.get_ipython() + ip.extension_manager.load_extension("google.cloud.bigquery") + magics.context.progress_bar_type = None + + run_query_patch = mock.patch( + "google.cloud.bigquery.magics.magics._run_query", autospec=True + ) + with run_query_patch as run_query_mock: + ip.run_cell_magic( + "bigquery", "--progress_bar_type=tqdm_gui", "SELECT 17 as num" + ) + + progress_bar_used = run_query_mock.mock_calls[1][2]["progress_bar_type"] + assert progress_bar_used == "tqdm_gui" + # context progress bar type should not change + assert magics.context.progress_bar_type is None + + @pytest.mark.usefixtures("ipython_interactive") def test_bigquery_magic_with_project(): ip = IPython.get_ipython() From 056386f4a425a2514c45f8f5553ce984806ab5dc Mon Sep 17 00:00:00 2001 From: "release-please[bot]" <55107282+release-please[bot]@users.noreply.github.com> Date: Wed, 2 Dec 2020 13:36:57 -0600 Subject: [PATCH 1005/2016] chore: release 2.5.0 (#406) Co-authored-by: release-please[bot] <55107282+release-please[bot]@users.noreply.github.com> --- packages/google-cloud-bigquery/CHANGELOG.md | 29 +++++++++++++++++++ .../google/cloud/bigquery/version.py | 2 +- 2 files changed, 30 insertions(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/CHANGELOG.md b/packages/google-cloud-bigquery/CHANGELOG.md index 03a4659269f7..c71f85d0c78c 100644 --- a/packages/google-cloud-bigquery/CHANGELOG.md +++ b/packages/google-cloud-bigquery/CHANGELOG.md @@ -4,6 +4,35 @@ [1]: https://pypi.org/project/google-cloud-bigquery/#history +## [2.5.0](https://www.github.com/googleapis/python-bigquery/compare/v2.4.0...v2.5.0) (2020-12-02) + + +### Features + +* add `TableReference.__str__` to get table ID in standard SQL ([#405](https://www.github.com/googleapis/python-bigquery/issues/405)) ([53dff2a](https://www.github.com/googleapis/python-bigquery/commit/53dff2ad3889af04369a22437e6ab9b92c5755b6)), closes [#354](https://www.github.com/googleapis/python-bigquery/issues/354) +* add progress bar for magics ([#396](https://www.github.com/googleapis/python-bigquery/issues/396)) ([04d0273](https://www.github.com/googleapis/python-bigquery/commit/04d027317a99e3f353e0b7a18076da9b6ba4d8d3)) +* add support for unrecognized model types ([#401](https://www.github.com/googleapis/python-bigquery/issues/401)) ([168f035](https://www.github.com/googleapis/python-bigquery/commit/168f0354c4815bd1aeadbd4e388dcc9b32f97d6b)) + + +### Bug Fixes + +* avoid floating point for timestamp in `insert_rows` ([#393](https://www.github.com/googleapis/python-bigquery/issues/393)) ([a1949ae](https://www.github.com/googleapis/python-bigquery/commit/a1949ae20ec4f9c771b0cffbcd70792dd6a30dbf)) + + +### Performance Improvements + +* don't fetch rows when waiting for query to finish ([#400](https://www.github.com/googleapis/python-bigquery/issues/400)) ([730df17](https://www.github.com/googleapis/python-bigquery/commit/730df17ae1ab0b0bb2454f3c134c8f62665bc51b)), closes [#374](https://www.github.com/googleapis/python-bigquery/issues/374) [#394](https://www.github.com/googleapis/python-bigquery/issues/394) + + +### Documentation + +* **samples:** add more clustering code snippets ([#330](https://www.github.com/googleapis/python-bigquery/issues/330)) ([809e4a2](https://www.github.com/googleapis/python-bigquery/commit/809e4a27b94ba30c10e0c9a7e89576a9de9fda2b)), closes [#329](https://www.github.com/googleapis/python-bigquery/issues/329) + + +### Dependencies + +* update required version of opentelementry for opentelemetry-exporter-google-cloud ([#398](https://www.github.com/googleapis/python-bigquery/issues/398)) ([673a9cb](https://www.github.com/googleapis/python-bigquery/commit/673a9cb51c577c1dd016e76f3634b1e9e21482c5)) + ## [2.4.0](https://www.github.com/googleapis/python-bigquery/compare/v2.3.1...v2.4.0) (2020-11-16) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/version.py b/packages/google-cloud-bigquery/google/cloud/bigquery/version.py index fe11624d91dd..5836d8051156 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/version.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/version.py @@ -12,4 +12,4 @@ # See the License for the specific language governing permissions and # limitations under the License. -__version__ = "2.4.0" +__version__ = "2.5.0" From 3e8020e7082ceb1ccf98ddc0a1bc60dbbc05a519 Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Thu, 3 Dec 2020 09:32:02 -0600 Subject: [PATCH 1006/2016] feat: add support for materialized views (#408) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Thank you for opening a Pull Request! Before submitting your PR, there are a few things you can do to make sure it goes smoothly: - [x] Make sure to open an issue as a [bug/issue](https://github.com/googleapis/python-bigquery/issues/new/choose) before writing your code! That way we can discuss the change, evaluate designs, and agree on the general idea - [x] Ensure the tests and linter pass - [x] Code coverage does not decrease (if any source code was changed) - [x] Appropriate docs were updated (if necessary) Fixes #407 🦕 --- .../google/cloud/bigquery/table.py | 95 ++++++++++++++++--- .../samples/snippets/materialized_view.py | 86 +++++++++++++++++ .../snippets/materialized_view_test.py | 93 ++++++++++++++++++ .../tests/unit/test_table.py | 61 ++++++++++-- 4 files changed, 317 insertions(+), 18 deletions(-) create mode 100644 packages/google-cloud-bigquery/samples/snippets/materialized_view.py create mode 100644 packages/google-cloud-bigquery/samples/snippets/materialized_view_test.py diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py index f30c057734f5..6daccf518957 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py @@ -293,15 +293,18 @@ class Table(object): """ _PROPERTY_TO_API_FIELD = { - "friendly_name": "friendlyName", + "encryption_configuration": "encryptionConfiguration", "expires": "expirationTime", - "time_partitioning": "timePartitioning", - "partitioning_type": "timePartitioning", + "external_data_configuration": "externalDataConfiguration", + "friendly_name": "friendlyName", + "mview_enable_refresh": "materializedView", + "mview_query": "materializedView", + "mview_refresh_interval": "materializedView", "partition_expiration": "timePartitioning", + "partitioning_type": "timePartitioning", + "time_partitioning": "timePartitioning", "view_use_legacy_sql": "view", "view_query": "view", - "external_data_configuration": "externalDataConfiguration", - "encryption_configuration": "encryptionConfiguration", "require_partition_filter": "requirePartitionFilter", } @@ -714,18 +717,14 @@ def view_query(self): Raises: ValueError: For invalid value types. """ - view = self._properties.get("view") - if view is not None: - return view.get("query") + return _helpers._get_sub_prop(self._properties, ["view", "query"]) @view_query.setter def view_query(self, value): if not isinstance(value, six.string_types): raise ValueError("Pass a string") - view = self._properties.get("view") - if view is None: - view = self._properties["view"] = {} - view["query"] = value + _helpers._set_sub_prop(self._properties, ["view", "query"], value) + view = self._properties["view"] # The service defaults useLegacySql to True, but this # client uses Standard SQL by default. if view.get("useLegacySql") is None: @@ -746,6 +745,78 @@ def view_use_legacy_sql(self, value): self._properties["view"] = {} self._properties["view"]["useLegacySql"] = value + @property + def mview_query(self): + """Optional[str]: SQL query defining the table as a materialized + view (defaults to :data:`None`). + """ + return _helpers._get_sub_prop(self._properties, ["materializedView", "query"]) + + @mview_query.setter + def mview_query(self, value): + _helpers._set_sub_prop( + self._properties, ["materializedView", "query"], str(value) + ) + + @mview_query.deleter + def mview_query(self): + """Delete SQL query defining the table as a materialized view.""" + self._properties.pop("materializedView", None) + + @property + def mview_last_refresh_time(self): + """Optional[datetime.datetime]: Datetime at which the materialized view was last + refreshed (:data:`None` until set from the server). + """ + refresh_time = _helpers._get_sub_prop( + self._properties, ["materializedView", "lastRefreshTime"] + ) + if refresh_time is not None: + # refresh_time will be in milliseconds. + return google.cloud._helpers._datetime_from_microseconds( + 1000 * int(refresh_time) + ) + + @property + def mview_enable_refresh(self): + """Optional[bool]: Enable automatic refresh of the materialized view + when the base table is updated. The default value is :data:`True`. + """ + return _helpers._get_sub_prop( + self._properties, ["materializedView", "enableRefresh"] + ) + + @mview_enable_refresh.setter + def mview_enable_refresh(self, value): + return _helpers._set_sub_prop( + self._properties, ["materializedView", "enableRefresh"], value + ) + + @property + def mview_refresh_interval(self): + """Optional[datetime.timedelta]: The maximum frequency at which this + materialized view will be refreshed. The default value is 1800000 + milliseconds (30 minutes). + """ + refresh_interval = _helpers._get_sub_prop( + self._properties, ["materializedView", "refreshIntervalMs"] + ) + if refresh_interval is not None: + return datetime.timedelta(milliseconds=int(refresh_interval)) + + @mview_refresh_interval.setter + def mview_refresh_interval(self, value): + if value is None: + refresh_interval_ms = None + else: + refresh_interval_ms = str(value // datetime.timedelta(milliseconds=1)) + + _helpers._set_sub_prop( + self._properties, + ["materializedView", "refreshIntervalMs"], + refresh_interval_ms, + ) + @property def streaming_buffer(self): """google.cloud.bigquery.StreamingBuffer: Information about a table's diff --git a/packages/google-cloud-bigquery/samples/snippets/materialized_view.py b/packages/google-cloud-bigquery/samples/snippets/materialized_view.py new file mode 100644 index 000000000000..d925ec23033e --- /dev/null +++ b/packages/google-cloud-bigquery/samples/snippets/materialized_view.py @@ -0,0 +1,86 @@ +# Copyright 2020 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +def create_materialized_view(override_values={}): + # [START bigquery_create_materialized_view] + from google.cloud import bigquery + + bigquery_client = bigquery.Client() + + view_id = "my-project.my_dataset.my_materialized_view" + base_table_id = "my-project.my_dataset.my_base_table" + # [END bigquery_create_materialized_view] + # To facilitate testing, we replace values with alternatives + # provided by the testing harness. + view_id = override_values.get("view_id", view_id) + base_table_id = override_values.get("base_table_id", view_id) + # [START bigquery_create_materialized_view] + view = bigquery.Table(view_id) + view.mview_query = f""" + SELECT product_id, SUM(clicks) AS sum_clicks + FROM `{base_table_id}` + GROUP BY 1 + """ + + # Make an API request to create the materialized view. + view = bigquery_client.create_table(view) + print(f"Created {view.table_type}: {str(view.reference)}") + # [END bigquery_create_materialized_view] + return view + + +def update_materialized_view(override_values={}): + # [START bigquery_update_materialized_view] + import datetime + from google.cloud import bigquery + + bigquery_client = bigquery.Client() + + view_id = "my-project.my_dataset.my_materialized_view" + # [END bigquery_update_materialized_view] + # To facilitate testing, we replace values with alternatives + # provided by the testing harness. + view_id = override_values.get("view_id", view_id) + # [START bigquery_update_materialized_view] + view = bigquery.Table(view_id) + view.mview_enable_refresh = True + view.mview_refresh_interval = datetime.timedelta(hours=1) + + # Make an API request to update the materialized view. + view = bigquery_client.update_table( + view, + # Pass in a list of any fields you need to modify. + ["mview_enable_refresh", "mview_refresh_interval"], + ) + print(f"Updated {view.table_type}: {str(view.reference)}") + # [END bigquery_update_materialized_view] + return view + + +def delete_materialized_view(override_values={}): + # [START bigquery_delete_materialized_view] + from google.cloud import bigquery + + bigquery_client = bigquery.Client() + + view_id = "my-project.my_dataset.my_materialized_view" + # [END bigquery_delete_materialized_view] + # To facilitate testing, we replace values with alternatives + # provided by the testing harness. + view_id = override_values.get("view_id", view_id) + # [START bigquery_delete_materialized_view] + # Make an API request to delete the materialized view. + bigquery_client.delete_table(view_id) + # [END bigquery_delete_materialized_view] diff --git a/packages/google-cloud-bigquery/samples/snippets/materialized_view_test.py b/packages/google-cloud-bigquery/samples/snippets/materialized_view_test.py new file mode 100644 index 000000000000..fc3db533ca97 --- /dev/null +++ b/packages/google-cloud-bigquery/samples/snippets/materialized_view_test.py @@ -0,0 +1,93 @@ +# Copyright 2020 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import datetime +import uuid + +from google.api_core import exceptions +from google.cloud import bigquery +import pytest + +import materialized_view + + +def temp_suffix(): + return str(uuid.uuid4()).replace("-", "_") + + +@pytest.fixture(scope="module") +def bigquery_client(): + bigquery_client = bigquery.Client() + return bigquery_client + + +@pytest.fixture(autouse=True) +def bigquery_client_patch(monkeypatch, bigquery_client): + monkeypatch.setattr(bigquery, "Client", lambda: bigquery_client) + + +@pytest.fixture(scope="module") +def project_id(bigquery_client): + return bigquery_client.project + + +@pytest.fixture(scope="module") +def dataset_id(bigquery_client): + dataset_id = f"mvdataset_{temp_suffix()}" + bigquery_client.create_dataset(dataset_id) + yield dataset_id + bigquery_client.delete_dataset(dataset_id, delete_contents=True) + + +@pytest.fixture(scope="module") +def base_table_id(bigquery_client, project_id, dataset_id): + base_table_id = f"{project_id}.{dataset_id}.base_{temp_suffix()}" + # Schema from materialized views guide: + # https://cloud.google.com/bigquery/docs/materialized-views#create + base_table = bigquery.Table(base_table_id) + base_table.schema = [ + bigquery.SchemaField("product_id", bigquery.SqlTypeNames.INT64), + bigquery.SchemaField("clicks", bigquery.SqlTypeNames.INT64), + ] + bigquery_client.create_table(base_table) + yield base_table_id + bigquery_client.delete_table(base_table_id) + + +@pytest.fixture(scope="module") +def view_id(bigquery_client, project_id, dataset_id): + view_id = f"{project_id}.{dataset_id}.mview_{temp_suffix()}" + yield view_id + bigquery_client.delete_table(view_id, not_found_ok=True) + + +def test_materialized_view(capsys, bigquery_client, base_table_id, view_id): + override_values = { + "base_table_id": base_table_id, + "view_id": view_id, + } + view = materialized_view.create_materialized_view(override_values) + assert base_table_id in view.mview_query + out, _ = capsys.readouterr() + assert view_id in out + + view = materialized_view.update_materialized_view(override_values) + assert view.mview_enable_refresh + assert view.mview_refresh_interval == datetime.timedelta(hours=1) + out, _ = capsys.readouterr() + assert view_id in out + + materialized_view.delete_materialized_view(override_values) + with pytest.raises(exceptions.NotFound): + bigquery_client.get_table(view_id) diff --git a/packages/google-cloud-bigquery/tests/unit/test_table.py b/packages/google-cloud-bigquery/tests/unit/test_table.py index 67874ff91217..c1876adaadfd 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_table.py +++ b/packages/google-cloud-bigquery/tests/unit/test_table.py @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -import datetime as dt +import datetime import logging import time import unittest @@ -21,6 +21,7 @@ import mock import pkg_resources import pytest +import pytz import six import google.api_core.exceptions @@ -292,6 +293,13 @@ def _get_target_class(): return Table def _make_one(self, *args, **kw): + from google.cloud.bigquery.dataset import DatasetReference + + if len(args) == 0: + dataset = DatasetReference(self.PROJECT, self.DS_ID) + table_ref = dataset.table(self.TABLE_NAME) + args = (table_ref,) + return self._get_target_class()(*args, **kw) def _setUpConstants(self): @@ -812,6 +820,48 @@ def test_labels_setter_bad_value(self): with self.assertRaises(ValueError): table.labels = 12345 + def test_mview_query(self): + table = self._make_one() + self.assertIsNone(table.mview_query) + table.mview_query = "SELECT name, SUM(number) FROM dset.tbl GROUP BY 1" + self.assertEqual( + table.mview_query, "SELECT name, SUM(number) FROM dset.tbl GROUP BY 1" + ) + del table.mview_query + self.assertIsNone(table.mview_query) + + def test_mview_last_refresh_time(self): + table = self._make_one() + self.assertIsNone(table.mview_last_refresh_time) + table._properties["materializedView"] = { + "lastRefreshTime": "1606751842496", + } + self.assertEqual( + table.mview_last_refresh_time, + datetime.datetime(2020, 11, 30, 15, 57, 22, 496000, tzinfo=pytz.utc), + ) + + def test_mview_enable_refresh(self): + table = self._make_one() + self.assertIsNone(table.mview_enable_refresh) + table.mview_enable_refresh = True + self.assertTrue(table.mview_enable_refresh) + table.mview_enable_refresh = False + self.assertFalse(table.mview_enable_refresh) + table.mview_enable_refresh = None + self.assertIsNone(table.mview_enable_refresh) + + def test_mview_refresh_interval(self): + table = self._make_one() + self.assertIsNone(table.mview_refresh_interval) + table.mview_refresh_interval = datetime.timedelta(minutes=30) + self.assertEqual(table.mview_refresh_interval, datetime.timedelta(minutes=30)) + self.assertEqual( + table._properties["materializedView"]["refreshIntervalMs"], "1800000" + ) + table.mview_refresh_interval = None + self.assertIsNone(table.mview_refresh_interval) + def test_from_string(self): cls = self._get_target_class() got = cls.from_string("string-project.string_dataset.string_table") @@ -1286,7 +1336,6 @@ def _make_one(self, *args, **kw): return self._get_target_class()(*args, **kw) def _setUpConstants(self): - import datetime from google.cloud._helpers import UTC self.WHEN_TS = 1437767599.125 @@ -2413,7 +2462,7 @@ def test_to_dataframe_timestamp_out_of_pyarrow_bounds(self): tzinfo = None if PYARROW_VERSION >= PYARROW_TIMESTAMP_VERSION: - tzinfo = dt.timezone.utc + tzinfo = datetime.timezone.utc self.assertIsInstance(df, pandas.DataFrame) self.assertEqual(len(df), 2) # verify the number of rows @@ -2421,8 +2470,8 @@ def test_to_dataframe_timestamp_out_of_pyarrow_bounds(self): self.assertEqual( list(df["some_timestamp"]), [ - dt.datetime(4567, 1, 1, tzinfo=tzinfo), - dt.datetime(9999, 12, 31, tzinfo=tzinfo), + datetime.datetime(4567, 1, 1, tzinfo=tzinfo), + datetime.datetime(9999, 12, 31, tzinfo=tzinfo), ], ) @@ -2454,7 +2503,7 @@ def test_to_dataframe_datetime_out_of_pyarrow_bounds(self): self.assertEqual(list(df.columns), ["some_datetime"]) self.assertEqual( list(df["some_datetime"]), - [dt.datetime(4567, 1, 1), dt.datetime(9999, 12, 31)], + [datetime.datetime(4567, 1, 1), datetime.datetime(9999, 12, 31)], ) @unittest.skipIf(pandas is None, "Requires `pandas`") From ecafefc55b39a702200a816e9b8ece6bc73378a5 Mon Sep 17 00:00:00 2001 From: WhiteSource Renovate Date: Thu, 3 Dec 2020 19:21:10 +0100 Subject: [PATCH 1007/2016] chore(deps): update dependency grpcio to v1.34.0 (#411) Co-authored-by: Takashi Matsuo --- .../google-cloud-bigquery/samples/snippets/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/samples/snippets/requirements.txt b/packages/google-cloud-bigquery/samples/snippets/requirements.txt index eeb94db5a648..d11397bc6936 100644 --- a/packages/google-cloud-bigquery/samples/snippets/requirements.txt +++ b/packages/google-cloud-bigquery/samples/snippets/requirements.txt @@ -1,7 +1,7 @@ google-cloud-bigquery==2.4.0 google-cloud-bigquery-storage==2.0.1 google-auth-oauthlib==0.4.2 -grpcio==1.33.2 +grpcio==1.34.0 ipython==7.16.1; python_version < '3.7' ipython==7.17.0; python_version >= '3.7' matplotlib==3.3.3 From c441b056c77b52eb2c903d917a26a352526b5fd6 Mon Sep 17 00:00:00 2001 From: HemangChothani <50404902+HemangChothani@users.noreply.github.com> Date: Fri, 4 Dec 2020 15:42:46 -0500 Subject: [PATCH 1008/2016] fix: preserve timestamp microsecond precision with rows from REST API (#402) * feat: add formatOption default tru for tablelist and query result * feat: remove float point serialize * fix: lint * feat: remove comments --- .../google/cloud/bigquery/_helpers.py | 4 +- .../google/cloud/bigquery/client.py | 2 + .../tests/unit/job/test_query.py | 4 + .../tests/unit/job/test_query_pandas.py | 6 +- .../tests/unit/test__helpers.py | 8 +- .../tests/unit/test_client.py | 74 ++++++++++--------- .../tests/unit/test_table.py | 24 ++++-- 7 files changed, 69 insertions(+), 53 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py b/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py index 35129d8440f8..6f6a63ea5d03 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py @@ -81,8 +81,8 @@ def _bytes_from_json(value, field): def _timestamp_from_json(value, field): """Coerce 'value' to a datetime, if set or not nullable.""" if _not_null(value, field): - # value will be a float in seconds, to microsecond precision, in UTC. - return _datetime_from_microseconds(1e6 * float(value)) + # value will be a integer in seconds, to microsecond precision, in UTC. + return _datetime_from_microseconds(int(value)) def _timestamp_query_param_from_json(value, field): diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py index cd1474336ea0..168054623ad7 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py @@ -3157,6 +3157,7 @@ def list_rows( if start_index is not None: params["startIndex"] = start_index + params["formatOptions.useInt64Timestamp"] = True row_iterator = RowIterator( client=self, api_request=functools.partial(self._call_api, retry, timeout=timeout), @@ -3237,6 +3238,7 @@ def _list_rows_from_query_results( if start_index is not None: params["startIndex"] = start_index + params["formatOptions.useInt64Timestamp"] = True row_iterator = RowIterator( client=self, api_request=functools.partial(self._call_api, retry, timeout=timeout), diff --git a/packages/google-cloud-bigquery/tests/unit/job/test_query.py b/packages/google-cloud-bigquery/tests/unit/job/test_query.py index daaf2e557108..0567b59cdb45 100644 --- a/packages/google-cloud-bigquery/tests/unit/job/test_query.py +++ b/packages/google-cloud-bigquery/tests/unit/job/test_query.py @@ -839,6 +839,7 @@ def test_result(self): query_params={ "fields": _LIST_ROWS_FROM_QUERY_RESULTS_FIELDS, "location": "EU", + "formatOptions.useInt64Timestamp": True, }, timeout=None, ) @@ -887,6 +888,7 @@ def test_result_with_done_job_calls_get_query_results(self): query_params={ "fields": _LIST_ROWS_FROM_QUERY_RESULTS_FIELDS, "location": "EU", + "formatOptions.useInt64Timestamp": True, }, timeout=None, ) @@ -1118,6 +1120,7 @@ def test_result_w_page_size(self): "maxResults": 3, "fields": _LIST_ROWS_FROM_QUERY_RESULTS_FIELDS, "location": "US", + "formatOptions.useInt64Timestamp": True, }, timeout=None, ) @@ -1129,6 +1132,7 @@ def test_result_w_page_size(self): "maxResults": 3, "fields": _LIST_ROWS_FROM_QUERY_RESULTS_FIELDS, "location": "US", + "formatOptions.useInt64Timestamp": True, }, timeout=None, ) diff --git a/packages/google-cloud-bigquery/tests/unit/job/test_query_pandas.py b/packages/google-cloud-bigquery/tests/unit/job/test_query_pandas.py index cdd6f2b3cf88..d1600ad4376d 100644 --- a/packages/google-cloud-bigquery/tests/unit/job/test_query_pandas.py +++ b/packages/google-cloud-bigquery/tests/unit/job/test_query_pandas.py @@ -501,7 +501,7 @@ def test_to_dataframe_column_dtypes(): } row_data = [ [ - "1.4338368E9", + "1433836800000000", "420", "1.1", "1.77", @@ -509,8 +509,8 @@ def test_to_dataframe_column_dtypes(): "true", "1999-12-01", ], - ["1.3878117E9", "2580", "17.7", "28.5", "Cash", "false", "1953-06-14"], - ["1.3855653E9", "2280", "4.4", "7.1", "Credit", "true", "1981-11-04"], + ["1387811700000000", "2580", "17.7", "28.5", "Cash", "false", "1953-06-14"], + ["1385565300000000", "2280", "4.4", "7.1", "Credit", "true", "1981-11-04"], ] rows = [{"f": [{"v": field} for field in row]} for row in row_data] query_resource["rows"] = rows diff --git a/packages/google-cloud-bigquery/tests/unit/test__helpers.py b/packages/google-cloud-bigquery/tests/unit/test__helpers.py index a5258150168e..5907a36785ad 100644 --- a/packages/google-cloud-bigquery/tests/unit/test__helpers.py +++ b/packages/google-cloud-bigquery/tests/unit/test__helpers.py @@ -190,18 +190,18 @@ def test_w_none_required(self): with self.assertRaises(TypeError): self._call_fut(None, _Field("REQUIRED")) - def test_w_string_value(self): + def test_w_string_int_value(self): from google.cloud._helpers import _EPOCH - coerced = self._call_fut("1.234567", object()) + coerced = self._call_fut("1234567", object()) self.assertEqual( coerced, _EPOCH + datetime.timedelta(seconds=1, microseconds=234567) ) - def test_w_float_value(self): + def test_w_int_value(self): from google.cloud._helpers import _EPOCH - coerced = self._call_fut(1.234567, object()) + coerced = self._call_fut(1234567, object()) self.assertEqual( coerced, _EPOCH + datetime.timedelta(seconds=1, microseconds=234567) ) diff --git a/packages/google-cloud-bigquery/tests/unit/test_client.py b/packages/google-cloud-bigquery/tests/unit/test_client.py index c4bdea2f81df..f28455cf8b56 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_client.py +++ b/packages/google-cloud-bigquery/tests/unit/test_client.py @@ -6739,42 +6739,21 @@ def test_list_rows(self): self.DS_ID, self.TABLE_ID, ) - WHEN_TS = 1437767599.006 - WHEN = datetime.datetime.utcfromtimestamp(WHEN_TS).replace(tzinfo=UTC) - WHEN_1 = WHEN + datetime.timedelta(seconds=1) - WHEN_2 = WHEN + datetime.timedelta(seconds=2) + WHEN_TS = 1437767599006000 + + WHEN = datetime.datetime.utcfromtimestamp(WHEN_TS / 1e6).replace(tzinfo=UTC) + WHEN_1 = WHEN + datetime.timedelta(microseconds=1) + WHEN_2 = WHEN + datetime.timedelta(microseconds=2) ROWS = 1234 TOKEN = "TOKEN" - def _bigquery_timestamp_float_repr(ts_float): - # Preserve microsecond precision for E+09 timestamps - return "%0.15E" % (ts_float,) - DATA = { "totalRows": str(ROWS), "pageToken": TOKEN, "rows": [ - { - "f": [ - {"v": "Phred Phlyntstone"}, - {"v": "32"}, - {"v": _bigquery_timestamp_float_repr(WHEN_TS)}, - ] - }, - { - "f": [ - {"v": "Bharney Rhubble"}, - {"v": "33"}, - {"v": _bigquery_timestamp_float_repr(WHEN_TS + 1)}, - ] - }, - { - "f": [ - {"v": "Wylma Phlyntstone"}, - {"v": "29"}, - {"v": _bigquery_timestamp_float_repr(WHEN_TS + 2)}, - ] - }, + {"f": [{"v": "Phred Phlyntstone"}, {"v": "32"}, {"v": WHEN_TS}]}, + {"f": [{"v": "Bharney Rhubble"}, {"v": "33"}, {"v": WHEN_TS + 1}]}, + {"f": [{"v": "Wylma Phlyntstone"}, {"v": "29"}, {"v": WHEN_TS + 2}]}, {"f": [{"v": "Bhettye Rhubble"}, {"v": None}, {"v": None}]}, ], } @@ -6807,7 +6786,10 @@ def _bigquery_timestamp_float_repr(ts_float): self.assertEqual(iterator.next_page_token, TOKEN) conn.api_request.assert_called_once_with( - method="GET", path="/%s" % PATH, query_params={}, timeout=7.5 + method="GET", + path="/%s" % PATH, + query_params={"formatOptions.useInt64Timestamp": True}, + timeout=7.5, ) def test_list_rows_w_start_index_w_page_size(self): @@ -6856,20 +6838,30 @@ def test_list_rows_w_start_index_w_page_size(self): self.assertEqual(len(rows), 2) self.assertEqual(rows[0], Row(("Wylma Phlyntstone",), f2i)) self.assertEqual(rows[1], Row(("Bhettye Rhubble",), f2i)) - self.assertEqual(extra_params, {"startIndex": 1}) + self.assertEqual( + extra_params, {"startIndex": 1, "formatOptions.useInt64Timestamp": True} + ) conn.api_request.assert_has_calls( [ mock.call( method="GET", path="/%s" % PATH, - query_params={"startIndex": 1, "maxResults": 2}, + query_params={ + "startIndex": 1, + "maxResults": 2, + "formatOptions.useInt64Timestamp": True, + }, timeout=None, ), mock.call( method="GET", path="/%s" % PATH, - query_params={"pageToken": "some-page-token", "maxResults": 2}, + query_params={ + "pageToken": "some-page-token", + "maxResults": 2, + "formatOptions.useInt64Timestamp": True, + }, timeout=None, ), ] @@ -6920,6 +6912,7 @@ def test_list_rows_query_params(self): iterator = client.list_rows(table, **test[0]) six.next(iterator.pages) req = conn.api_request.call_args_list[i] + test[1]["formatOptions.useInt64Timestamp"] = True self.assertEqual(req[1]["query_params"], test[1], "for kwargs %s" % test[0]) def test_list_rows_repeated_fields(self): @@ -6979,7 +6972,10 @@ def test_list_rows_repeated_fields(self): conn.api_request.assert_called_once_with( method="GET", path="/%s" % PATH, - query_params={"selectedFields": "color,struct"}, + query_params={ + "selectedFields": "color,struct", + "formatOptions.useInt64Timestamp": True, + }, timeout=None, ) @@ -7047,7 +7043,10 @@ def test_list_rows_w_record_schema(self): self.assertEqual(page_token, TOKEN) conn.api_request.assert_called_once_with( - method="GET", path="/%s" % PATH, query_params={}, timeout=None + method="GET", + path="/%s" % PATH, + query_params={"formatOptions.useInt64Timestamp": True}, + timeout=None, ) def test_list_rows_with_missing_schema(self): @@ -7109,7 +7108,10 @@ def test_list_rows_with_missing_schema(self): rows = list(row_iter) conn.api_request.assert_called_once_with( - method="GET", path=tabledata_path, query_params={}, timeout=None + method="GET", + path=tabledata_path, + query_params={"formatOptions.useInt64Timestamp": True}, + timeout=None, ) self.assertEqual(row_iter.total_rows, 3, msg=repr(table)) self.assertEqual(rows[0].name, "Phred Phlyntstone", msg=repr(table)) diff --git a/packages/google-cloud-bigquery/tests/unit/test_table.py b/packages/google-cloud-bigquery/tests/unit/test_table.py index c1876adaadfd..0e7b0bb4d890 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_table.py +++ b/packages/google-cloud-bigquery/tests/unit/test_table.py @@ -2451,8 +2451,8 @@ def test_to_dataframe_timestamp_out_of_pyarrow_bounds(self): schema = [SchemaField("some_timestamp", "TIMESTAMP")] rows = [ - {"f": [{"v": "81953424000.0"}]}, # 4567-01-01 00:00:00 UTC - {"f": [{"v": "253402214400.0"}]}, # 9999-12-31 00:00:00 UTC + {"f": [{"v": "81953424000000000"}]}, # 4567-01-01 00:00:00 UTC + {"f": [{"v": "253402214400000000"}]}, # 9999-12-31 00:00:00 UTC ] path = "/foo" api_request = mock.Mock(return_value={"rows": rows}) @@ -2675,9 +2675,9 @@ def test_to_dataframe_w_various_types_nullable(self): ] row_data = [ [None, None, None, None, None, None], - ["1.4338368E9", "420", "1.1", u"Cash", "true", "1999-12-01"], - ["1.3878117E9", "2580", "17.7", u"Cash", "false", "1953-06-14"], - ["1.3855653E9", "2280", "4.4", u"Credit", "true", "1981-11-04"], + ["1433836800000000", "420", "1.1", u"Cash", "true", "1999-12-01"], + ["1387811700000000", "2580", "17.7", u"Cash", "false", "1953-06-14"], + ["1385565300000000", "2280", "4.4", u"Credit", "true", "1981-11-04"], ] rows = [{"f": [{"v": field} for field in row]} for row in row_data] path = "/foo" @@ -2715,9 +2715,17 @@ def test_to_dataframe_column_dtypes(self): SchemaField("date", "DATE"), ] row_data = [ - ["1.4338368E9", "420", "1.1", "1.77", u"Cash", "true", "1999-12-01"], - ["1.3878117E9", "2580", "17.7", "28.5", u"Cash", "false", "1953-06-14"], - ["1.3855653E9", "2280", "4.4", "7.1", u"Credit", "true", "1981-11-04"], + ["1433836800000000", "420", "1.1", "1.77", u"Cash", "true", "1999-12-01"], + [ + "1387811700000000", + "2580", + "17.7", + "28.5", + u"Cash", + "false", + "1953-06-14", + ], + ["1385565300000000", "2280", "4.4", "7.1", u"Credit", "true", "1981-11-04"], ] rows = [{"f": [{"v": field} for field in row]} for row in row_data] path = "/foo" From b279f8c3ddbdea5877087e42de9a07563b3c23ee Mon Sep 17 00:00:00 2001 From: Bu Sun Kim <8822365+busunkim96@users.noreply.github.com> Date: Fri, 4 Dec 2020 13:58:04 -0700 Subject: [PATCH 1009/2016] chore: require samples checks (#409) Make samples kokoro sessions required --- .../.github/sync-repo-settings.yaml | 14 ++++++++++++++ 1 file changed, 14 insertions(+) create mode 100644 packages/google-cloud-bigquery/.github/sync-repo-settings.yaml diff --git a/packages/google-cloud-bigquery/.github/sync-repo-settings.yaml b/packages/google-cloud-bigquery/.github/sync-repo-settings.yaml new file mode 100644 index 000000000000..b18fb9c29d68 --- /dev/null +++ b/packages/google-cloud-bigquery/.github/sync-repo-settings.yaml @@ -0,0 +1,14 @@ +# https://github.com/googleapis/repo-automation-bots/tree/master/packages/sync-repo-settings +# Rules for master branch protection +branchProtectionRules: +# Identifies the protection rule pattern. Name of the branch to be protected. +# Defaults to `master` +- pattern: master + requiredStatusCheckContexts: + - 'Kokoro' + - 'Kokoro snippets-3.8' + - 'cla/google' + - 'Samples - Lint' + - 'Samples - Python 3.6' + - 'Samples - Python 3.7' + - 'Samples - Python 3.8' From a184b18c1525dfd7baf7c3f2f61e01128d80dcaa Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Fri, 4 Dec 2020 16:04:09 -0600 Subject: [PATCH 1010/2016] feat: convert `BIGNUMERIC` values to decimal objects (#414) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Thank you for opening a Pull Request! Before submitting your PR, there are a few things you can do to make sure it goes smoothly: - [x] Make sure to open an issue as a [bug/issue](https://github.com/googleapis/python-bigquery/issues/new/choose) before writing your code! That way we can discuss the change, evaluate designs, and agree on the general idea - [x] Ensure the tests and linter pass - [x] Code coverage does not decrease (if any source code was changed) - [ ] Appropriate docs were updated (if necessary) Towards #367 🦕 --- .../google/cloud/bigquery/_helpers.py | 2 + .../tests/unit/test_client.py | 88 ++++++++++++++----- 2 files changed, 67 insertions(+), 23 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py b/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py index 6f6a63ea5d03..716c8a394316 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py @@ -188,6 +188,7 @@ def _record_from_json(value, field): "FLOAT": _float_from_json, "FLOAT64": _float_from_json, "NUMERIC": _decimal_from_json, + "BIGNUMERIC": _decimal_from_json, "BOOLEAN": _bool_from_json, "BOOL": _bool_from_json, "STRING": _string_from_json, @@ -347,6 +348,7 @@ def _time_to_json(value): "FLOAT": _float_to_json, "FLOAT64": _float_to_json, "NUMERIC": _decimal_to_json, + "BIGNUMERIC": _decimal_to_json, "BOOLEAN": _bool_to_json, "BOOL": _bool_to_json, "BYTES": _bytes_to_json, diff --git a/packages/google-cloud-bigquery/tests/unit/test_client.py b/packages/google-cloud-bigquery/tests/unit/test_client.py index f28455cf8b56..0e68b2538d5d 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_client.py +++ b/packages/google-cloud-bigquery/tests/unit/test_client.py @@ -6290,38 +6290,43 @@ def test_insert_rows_w_numeric(self): creds = _make_credentials() http = object() client = self._make_one(project=project, credentials=creds, _http=http) - conn = client._connection = make_connection({}) table_ref = DatasetReference(project, ds_id).table(table_id) - schema = [SchemaField("account", "STRING"), SchemaField("balance", "NUMERIC")] - insert_table = table.Table(table_ref, schema=schema) rows = [ ("Savings", decimal.Decimal("23.47")), ("Checking", decimal.Decimal("1.98")), ("Mortgage", decimal.Decimal("-12345678909.87654321")), ] + schemas = [ + [SchemaField("account", "STRING"), SchemaField("balance", "NUMERIC")], + [SchemaField("account", "STRING"), SchemaField("balance", "BIGNUMERIC")], + ] - with mock.patch("uuid.uuid4", side_effect=map(str, range(len(rows)))): - errors = client.insert_rows(insert_table, rows) + for schema in schemas: + conn = client._connection = make_connection({}) - self.assertEqual(len(errors), 0) - rows_json = [ - {"account": "Savings", "balance": "23.47"}, - {"account": "Checking", "balance": "1.98"}, - {"account": "Mortgage", "balance": "-12345678909.87654321"}, - ] - sent = { - "rows": [ - {"json": row, "insertId": str(i)} for i, row in enumerate(rows_json) + insert_table = table.Table(table_ref, schema=schema) + with mock.patch("uuid.uuid4", side_effect=map(str, range(len(rows)))): + errors = client.insert_rows(insert_table, rows) + + self.assertEqual(len(errors), 0) + rows_json = [ + {"account": "Savings", "balance": "23.47"}, + {"account": "Checking", "balance": "1.98"}, + {"account": "Mortgage", "balance": "-12345678909.87654321"}, ] - } - conn.api_request.assert_called_once_with( - method="POST", - path="/projects/{}/datasets/{}/tables/{}/insertAll".format( - project, ds_id, table_id - ), - data=sent, - timeout=None, - ) + sent = { + "rows": [ + {"json": row, "insertId": str(i)} for i, row in enumerate(rows_json) + ] + } + conn.api_request.assert_called_once_with( + method="POST", + path="/projects/{}/datasets/{}/tables/{}/insertAll".format( + project, ds_id, table_id + ), + data=sent, + timeout=None, + ) @unittest.skipIf(pandas is None, "Requires `pandas`") def test_insert_rows_from_dataframe(self): @@ -6915,6 +6920,43 @@ def test_list_rows_query_params(self): test[1]["formatOptions.useInt64Timestamp"] = True self.assertEqual(req[1]["query_params"], test[1], "for kwargs %s" % test[0]) + def test_list_rows_w_numeric(self): + from google.cloud.bigquery.schema import SchemaField + from google.cloud.bigquery.table import Table + + resource = { + "totalRows": 3, + "rows": [ + {"f": [{"v": "-1.23456789"}, {"v": "-123456789.987654321"}]}, + {"f": [{"v": None}, {"v": "3.141592653589793238462643383279502884"}]}, + {"f": [{"v": "2718281828459045235360287471.352662497"}, {"v": None}]}, + ], + } + creds = _make_credentials() + http = object() + client = self._make_one(project=self.PROJECT, credentials=creds, _http=http) + client._connection = make_connection(resource) + schema = [ + SchemaField("num", "NUMERIC"), + SchemaField("bignum", "BIGNUMERIC"), + ] + table = Table(self.TABLE_REF, schema=schema) + + iterator = client.list_rows(table) + rows = list(iterator) + + self.assertEqual(len(rows), 3) + self.assertEqual(rows[0]["num"], decimal.Decimal("-1.23456789")) + self.assertEqual(rows[0]["bignum"], decimal.Decimal("-123456789.987654321")) + self.assertIsNone(rows[1]["num"]) + self.assertEqual( + rows[1]["bignum"], decimal.Decimal("3.141592653589793238462643383279502884") + ) + self.assertEqual( + rows[2]["num"], decimal.Decimal("2718281828459045235360287471.352662497") + ) + self.assertIsNone(rows[2]["bignum"]) + def test_list_rows_repeated_fields(self): from google.cloud.bigquery.schema import SchemaField From 002c43ee32a497aacbd887f84b4f4b82c6a6ae95 Mon Sep 17 00:00:00 2001 From: WhiteSource Renovate Date: Sat, 5 Dec 2020 06:50:06 +0100 Subject: [PATCH 1011/2016] chore(deps): update dependency google-cloud-bigquery to v2.5.0 (#410) [![WhiteSource Renovate](https://app.renovatebot.com/images/banner.svg)](https://renovatebot.com) This PR contains the following updates: | Package | Update | Change | |---|---|---| | [google-cloud-bigquery](https://togithub.com/googleapis/python-bigquery) | minor | `==2.4.0` -> `==2.5.0` | --- ### Release Notes
googleapis/python-bigquery ### [`v2.5.0`](https://togithub.com/googleapis/python-bigquery/blob/master/CHANGELOG.md#​250-httpswwwgithubcomgoogleapispython-bigquerycomparev240v250-2020-12-02) [Compare Source](https://togithub.com/googleapis/python-bigquery/compare/v2.4.0...v2.5.0) ##### Features - add `TableReference.__str__` to get table ID in standard SQL ([#​405](https://www.github.com/googleapis/python-bigquery/issues/405)) ([53dff2a](https://www.github.com/googleapis/python-bigquery/commit/53dff2ad3889af04369a22437e6ab9b92c5755b6)), closes [#​354](https://www.github.com/googleapis/python-bigquery/issues/354) - add progress bar for magics ([#​396](https://www.github.com/googleapis/python-bigquery/issues/396)) ([04d0273](https://www.github.com/googleapis/python-bigquery/commit/04d027317a99e3f353e0b7a18076da9b6ba4d8d3)) - add support for unrecognized model types ([#​401](https://www.github.com/googleapis/python-bigquery/issues/401)) ([168f035](https://www.github.com/googleapis/python-bigquery/commit/168f0354c4815bd1aeadbd4e388dcc9b32f97d6b)) ##### Bug Fixes - avoid floating point for timestamp in `insert_rows` ([#​393](https://www.github.com/googleapis/python-bigquery/issues/393)) ([a1949ae](https://www.github.com/googleapis/python-bigquery/commit/a1949ae20ec4f9c771b0cffbcd70792dd6a30dbf)) ##### Performance Improvements - don't fetch rows when waiting for query to finish ([#​400](https://www.github.com/googleapis/python-bigquery/issues/400)) ([730df17](https://www.github.com/googleapis/python-bigquery/commit/730df17ae1ab0b0bb2454f3c134c8f62665bc51b)), closes [#​374](https://www.github.com/googleapis/python-bigquery/issues/374) [#​394](https://www.github.com/googleapis/python-bigquery/issues/394) ##### Documentation - **samples:** add more clustering code snippets ([#​330](https://www.github.com/googleapis/python-bigquery/issues/330)) ([809e4a2](https://www.github.com/googleapis/python-bigquery/commit/809e4a27b94ba30c10e0c9a7e89576a9de9fda2b)), closes [#​329](https://www.github.com/googleapis/python-bigquery/issues/329) ##### Dependencies - update required version of opentelementry for opentelemetry-exporter-google-cloud ([#​398](https://www.github.com/googleapis/python-bigquery/issues/398)) ([673a9cb](https://www.github.com/googleapis/python-bigquery/commit/673a9cb51c577c1dd016e76f3634b1e9e21482c5))
--- ### Renovate configuration :date: **Schedule**: At any time (no schedule defined). :vertical_traffic_light: **Automerge**: Disabled by config. Please merge this manually once you are satisfied. :recycle: **Rebasing**: Whenever PR becomes conflicted, or you tick the rebase/retry checkbox. :no_bell: **Ignore**: Close this PR and you won't be reminded about this update again. --- - [ ] If you want to rebase/retry this PR, check this box --- This PR has been generated by [WhiteSource Renovate](https://renovate.whitesourcesoftware.com). View repository job log [here](https://app.renovatebot.com/dashboard#github/googleapis/python-bigquery). --- .../google-cloud-bigquery/samples/snippets/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/samples/snippets/requirements.txt b/packages/google-cloud-bigquery/samples/snippets/requirements.txt index d11397bc6936..3eecbf546c70 100644 --- a/packages/google-cloud-bigquery/samples/snippets/requirements.txt +++ b/packages/google-cloud-bigquery/samples/snippets/requirements.txt @@ -1,4 +1,4 @@ -google-cloud-bigquery==2.4.0 +google-cloud-bigquery==2.5.0 google-cloud-bigquery-storage==2.0.1 google-auth-oauthlib==0.4.2 grpcio==1.34.0 From 5f502173aa4227674661633c43d9d75ed6e96885 Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Mon, 7 Dec 2020 12:38:45 -0600 Subject: [PATCH 1012/2016] docs: update intersphinx links (#404) --- .../.kokoro/docs/common.cfg | 2 +- .../.kokoro/samples/python3.6/common.cfg | 6 + .../.kokoro/samples/python3.7/common.cfg | 6 + .../.kokoro/samples/python3.8/common.cfg | 6 + .../.kokoro/test-samples.sh | 8 +- .../google-cloud-bigquery/CODE_OF_CONDUCT.md | 123 +++++++++++++----- .../docs/bigquery_v2/types.rst | 1 + packages/google-cloud-bigquery/docs/conf.py | 7 +- .../google/cloud/bigquery_v2/types/model.py | 4 +- .../cloud/bigquery_v2/types/standard_sql.py | 4 +- .../samples/snippets/noxfile.py | 26 +++- packages/google-cloud-bigquery/synth.metadata | 94 +------------ packages/google-cloud-bigquery/synth.py | 19 ++- 13 files changed, 170 insertions(+), 136 deletions(-) diff --git a/packages/google-cloud-bigquery/.kokoro/docs/common.cfg b/packages/google-cloud-bigquery/.kokoro/docs/common.cfg index 8f9807f722a4..0c99ae611dfe 100644 --- a/packages/google-cloud-bigquery/.kokoro/docs/common.cfg +++ b/packages/google-cloud-bigquery/.kokoro/docs/common.cfg @@ -30,7 +30,7 @@ env_vars: { env_vars: { key: "V2_STAGING_BUCKET" - value: "docs-staging-v2-staging" + value: "docs-staging-v2" } # It will upload the docker image after successful builds. diff --git a/packages/google-cloud-bigquery/.kokoro/samples/python3.6/common.cfg b/packages/google-cloud-bigquery/.kokoro/samples/python3.6/common.cfg index a56768eae259..f3b930960b41 100644 --- a/packages/google-cloud-bigquery/.kokoro/samples/python3.6/common.cfg +++ b/packages/google-cloud-bigquery/.kokoro/samples/python3.6/common.cfg @@ -13,6 +13,12 @@ env_vars: { value: "py-3.6" } +# Declare build specific Cloud project. +env_vars: { + key: "BUILD_SPECIFIC_GCLOUD_PROJECT" + value: "python-docs-samples-tests-py36" +} + env_vars: { key: "TRAMPOLINE_BUILD_FILE" value: "github/python-bigquery/.kokoro/test-samples.sh" diff --git a/packages/google-cloud-bigquery/.kokoro/samples/python3.7/common.cfg b/packages/google-cloud-bigquery/.kokoro/samples/python3.7/common.cfg index c93747180b2e..fc06545655d4 100644 --- a/packages/google-cloud-bigquery/.kokoro/samples/python3.7/common.cfg +++ b/packages/google-cloud-bigquery/.kokoro/samples/python3.7/common.cfg @@ -13,6 +13,12 @@ env_vars: { value: "py-3.7" } +# Declare build specific Cloud project. +env_vars: { + key: "BUILD_SPECIFIC_GCLOUD_PROJECT" + value: "python-docs-samples-tests-py37" +} + env_vars: { key: "TRAMPOLINE_BUILD_FILE" value: "github/python-bigquery/.kokoro/test-samples.sh" diff --git a/packages/google-cloud-bigquery/.kokoro/samples/python3.8/common.cfg b/packages/google-cloud-bigquery/.kokoro/samples/python3.8/common.cfg index 9808f15e32a9..2b0bf59b3ef5 100644 --- a/packages/google-cloud-bigquery/.kokoro/samples/python3.8/common.cfg +++ b/packages/google-cloud-bigquery/.kokoro/samples/python3.8/common.cfg @@ -13,6 +13,12 @@ env_vars: { value: "py-3.8" } +# Declare build specific Cloud project. +env_vars: { + key: "BUILD_SPECIFIC_GCLOUD_PROJECT" + value: "python-docs-samples-tests-py38" +} + env_vars: { key: "TRAMPOLINE_BUILD_FILE" value: "github/python-bigquery/.kokoro/test-samples.sh" diff --git a/packages/google-cloud-bigquery/.kokoro/test-samples.sh b/packages/google-cloud-bigquery/.kokoro/test-samples.sh index 905732a405e3..c5653a81d9c6 100755 --- a/packages/google-cloud-bigquery/.kokoro/test-samples.sh +++ b/packages/google-cloud-bigquery/.kokoro/test-samples.sh @@ -28,6 +28,12 @@ if [[ $KOKORO_BUILD_ARTIFACTS_SUBDIR = *"periodic"* ]]; then git checkout $LATEST_RELEASE fi +# Exit early if samples directory doesn't exist +if [ ! -d "./samples" ]; then + echo "No tests run. `./samples` not found" + exit 0 +fi + # Disable buffering, so that the logs stream through. export PYTHONUNBUFFERED=1 @@ -101,4 +107,4 @@ cd "$ROOT" # Workaround for Kokoro permissions issue: delete secrets rm testing/{test-env.sh,client-secrets.json,service-account.json} -exit "$RTN" \ No newline at end of file +exit "$RTN" diff --git a/packages/google-cloud-bigquery/CODE_OF_CONDUCT.md b/packages/google-cloud-bigquery/CODE_OF_CONDUCT.md index b3d1f6029849..039f43681204 100644 --- a/packages/google-cloud-bigquery/CODE_OF_CONDUCT.md +++ b/packages/google-cloud-bigquery/CODE_OF_CONDUCT.md @@ -1,44 +1,95 @@ -# Contributor Code of Conduct +# Code of Conduct -As contributors and maintainers of this project, -and in the interest of fostering an open and welcoming community, -we pledge to respect all people who contribute through reporting issues, -posting feature requests, updating documentation, -submitting pull requests or patches, and other activities. +## Our Pledge -We are committed to making participation in this project -a harassment-free experience for everyone, -regardless of level of experience, gender, gender identity and expression, -sexual orientation, disability, personal appearance, -body size, race, ethnicity, age, religion, or nationality. +In the interest of fostering an open and welcoming environment, we as +contributors and maintainers pledge to making participation in our project and +our community a harassment-free experience for everyone, regardless of age, body +size, disability, ethnicity, gender identity and expression, level of +experience, education, socio-economic status, nationality, personal appearance, +race, religion, or sexual identity and orientation. + +## Our Standards + +Examples of behavior that contributes to creating a positive environment +include: + +* Using welcoming and inclusive language +* Being respectful of differing viewpoints and experiences +* Gracefully accepting constructive criticism +* Focusing on what is best for the community +* Showing empathy towards other community members Examples of unacceptable behavior by participants include: -* The use of sexualized language or imagery -* Personal attacks -* Trolling or insulting/derogatory comments -* Public or private harassment -* Publishing other's private information, -such as physical or electronic -addresses, without explicit permission -* Other unethical or unprofessional conduct. +* The use of sexualized language or imagery and unwelcome sexual attention or + advances +* Trolling, insulting/derogatory comments, and personal or political attacks +* Public or private harassment +* Publishing others' private information, such as a physical or electronic + address, without explicit permission +* Other conduct which could reasonably be considered inappropriate in a + professional setting + +## Our Responsibilities + +Project maintainers are responsible for clarifying the standards of acceptable +behavior and are expected to take appropriate and fair corrective action in +response to any instances of unacceptable behavior. Project maintainers have the right and responsibility to remove, edit, or reject -comments, commits, code, wiki edits, issues, and other contributions -that are not aligned to this Code of Conduct. -By adopting this Code of Conduct, -project maintainers commit themselves to fairly and consistently -applying these principles to every aspect of managing this project. -Project maintainers who do not follow or enforce the Code of Conduct -may be permanently removed from the project team. - -This code of conduct applies both within project spaces and in public spaces -when an individual is representing the project or its community. - -Instances of abusive, harassing, or otherwise unacceptable behavior -may be reported by opening an issue -or contacting one or more of the project maintainers. - -This Code of Conduct is adapted from the [Contributor Covenant](http://contributor-covenant.org), version 1.2.0, -available at [http://contributor-covenant.org/version/1/2/0/](http://contributor-covenant.org/version/1/2/0/) +comments, commits, code, wiki edits, issues, and other contributions that are +not aligned to this Code of Conduct, or to ban temporarily or permanently any +contributor for other behaviors that they deem inappropriate, threatening, +offensive, or harmful. + +## Scope + +This Code of Conduct applies both within project spaces and in public spaces +when an individual is representing the project or its community. Examples of +representing a project or community include using an official project e-mail +address, posting via an official social media account, or acting as an appointed +representative at an online or offline event. Representation of a project may be +further defined and clarified by project maintainers. + +This Code of Conduct also applies outside the project spaces when the Project +Steward has a reasonable belief that an individual's behavior may have a +negative impact on the project or its community. + +## Conflict Resolution + +We do not believe that all conflict is bad; healthy debate and disagreement +often yield positive results. However, it is never okay to be disrespectful or +to engage in behavior that violates the project’s code of conduct. + +If you see someone violating the code of conduct, you are encouraged to address +the behavior directly with those involved. Many issues can be resolved quickly +and easily, and this gives people more control over the outcome of their +dispute. If you are unable to resolve the matter for any reason, or if the +behavior is threatening or harassing, report it. We are dedicated to providing +an environment where participants feel welcome and safe. + + +Reports should be directed to *googleapis-stewards@google.com*, the +Project Steward(s) for *Google Cloud Client Libraries*. It is the Project Steward’s duty to +receive and address reported violations of the code of conduct. They will then +work with a committee consisting of representatives from the Open Source +Programs Office and the Google Open Source Strategy team. If for any reason you +are uncomfortable reaching out to the Project Steward, please email +opensource@google.com. + +We will investigate every complaint, but you may not receive a direct response. +We will use our discretion in determining when and how to follow up on reported +incidents, which may range from not taking action to permanent expulsion from +the project and project-sponsored spaces. We will notify the accused of the +report and provide them an opportunity to discuss it before any action is taken. +The identity of the reporter will be omitted from the details of the report +supplied to the accused. In potentially harmful situations, such as ongoing +harassment or threats to anyone's safety, we may take action without notice. + +## Attribution + +This Code of Conduct is adapted from the Contributor Covenant, version 1.4, +available at +https://www.contributor-covenant.org/version/1/4/code-of-conduct.html \ No newline at end of file diff --git a/packages/google-cloud-bigquery/docs/bigquery_v2/types.rst b/packages/google-cloud-bigquery/docs/bigquery_v2/types.rst index f4380995849e..41b906514840 100644 --- a/packages/google-cloud-bigquery/docs/bigquery_v2/types.rst +++ b/packages/google-cloud-bigquery/docs/bigquery_v2/types.rst @@ -3,3 +3,4 @@ Types for Google Cloud Bigquery v2 API .. automodule:: google.cloud.bigquery_v2.types :members: + :show-inheritance: diff --git a/packages/google-cloud-bigquery/docs/conf.py b/packages/google-cloud-bigquery/docs/conf.py index ee59f3492a90..37e0c46af9d4 100644 --- a/packages/google-cloud-bigquery/docs/conf.py +++ b/packages/google-cloud-bigquery/docs/conf.py @@ -346,10 +346,11 @@ # Example configuration for intersphinx: refer to the Python standard library. intersphinx_mapping = { - "python": ("http://python.readthedocs.org/en/latest/", None), - "google-auth": ("https://google-auth.readthedocs.io/en/stable", None), + "python": ("https://python.readthedocs.org/en/latest/", None), + "google-auth": ("https://googleapis.dev/python/google-auth/latest/", None), "google.api_core": ("https://googleapis.dev/python/google-api-core/latest/", None,), - "grpc": ("https://grpc.io/grpc/python/", None), + "grpc": ("https://grpc.github.io/grpc/python/", None), + "proto-plus": ("https://proto-plus-python.readthedocs.io/en/latest/", None), } diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery_v2/types/model.py b/packages/google-cloud-bigquery/google/cloud/bigquery_v2/types/model.py index 3a7bbf43b50b..c3530dec2bdb 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery_v2/types/model.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery_v2/types/model.py @@ -1495,7 +1495,7 @@ class PatchModelRequest(proto.Message): model_id = proto.Field(proto.STRING, number=3) - model = proto.Field(proto.MESSAGE, number=4, message=Model,) + model = proto.Field(proto.MESSAGE, number=4, message="Model",) class DeleteModelRequest(proto.Message): @@ -1559,7 +1559,7 @@ class ListModelsResponse(proto.Message): def raw_page(self): return self - models = proto.RepeatedField(proto.MESSAGE, number=1, message=Model,) + models = proto.RepeatedField(proto.MESSAGE, number=1, message="Model",) next_page_token = proto.Field(proto.STRING, number=2) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery_v2/types/standard_sql.py b/packages/google-cloud-bigquery/google/cloud/bigquery_v2/types/standard_sql.py index 1a32a3c759dd..80e4632f7e15 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery_v2/types/standard_sql.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery_v2/types/standard_sql.py @@ -90,7 +90,7 @@ class StandardSqlField(proto.Message): name = proto.Field(proto.STRING, number=1) - type = proto.Field(proto.MESSAGE, number=2, message=StandardSqlDataType,) + type = proto.Field(proto.MESSAGE, number=2, message="StandardSqlDataType",) class StandardSqlStructType(proto.Message): @@ -101,7 +101,7 @@ class StandardSqlStructType(proto.Message): """ - fields = proto.RepeatedField(proto.MESSAGE, number=1, message=StandardSqlField,) + fields = proto.RepeatedField(proto.MESSAGE, number=1, message="StandardSqlField",) __all__ = tuple(sorted(__protobuf__.manifest)) diff --git a/packages/google-cloud-bigquery/samples/snippets/noxfile.py b/packages/google-cloud-bigquery/samples/snippets/noxfile.py index 5660f08be441..ab2c49227c3b 100644 --- a/packages/google-cloud-bigquery/samples/snippets/noxfile.py +++ b/packages/google-cloud-bigquery/samples/snippets/noxfile.py @@ -38,6 +38,9 @@ TEST_CONFIG = { # You can opt out from the test for specific Python versions. "ignored_versions": ["2.7"], + # Old samples are opted out of enforcing Python type hints + # All new samples should feature them + "enforce_type_hints": False, # An envvar key for determining the project id to use. Change it # to 'BUILD_SPECIFIC_GCLOUD_PROJECT' if you want to opt in using a # build specific Cloud project. You can also use your own string @@ -130,7 +133,10 @@ def _determine_local_import_names(start_dir): @nox.session def lint(session): - session.install("flake8", "flake8-import-order") + if not TEST_CONFIG["enforce_type_hints"]: + session.install("flake8", "flake8-import-order") + else: + session.install("flake8", "flake8-import-order", "flake8-annotations") local_names = _determine_local_import_names(".") args = FLAKE8_COMMON_ARGS + [ @@ -141,6 +147,19 @@ def lint(session): session.run("flake8", *args) +# +# Black +# + + +@nox.session +def blacken(session): + session.install("black") + python_files = [path for path in os.listdir(".") if path.endswith(".py")] + + session.run("black", *python_files) + + # # Sample Tests # @@ -199,6 +218,11 @@ def _get_repo_root(): break if Path(p / ".git").exists(): return str(p) + # .git is not available in repos cloned via Cloud Build + # setup.py is always in the library's root, so use that instead + # https://github.com/googleapis/synthtool/issues/792 + if Path(p / "setup.py").exists(): + return str(p) p = p.parent raise Exception("Unable to detect repository root.") diff --git a/packages/google-cloud-bigquery/synth.metadata b/packages/google-cloud-bigquery/synth.metadata index db77e463dd2b..6b785486026a 100644 --- a/packages/google-cloud-bigquery/synth.metadata +++ b/packages/google-cloud-bigquery/synth.metadata @@ -3,30 +3,30 @@ { "git": { "name": ".", - "remote": "https://github.com/googleapis/python-bigquery.git", - "sha": "5178b55682f5e264bfc082cde26acb1fdc953a18" + "remote": "git@github.com:tswast/python-bigquery.git", + "sha": "5a422eb20c57dae66c5716fd319b66432d3edce6" } }, { "git": { "name": "googleapis", "remote": "https://github.com/googleapis/googleapis.git", - "sha": "215c12ade72d9d9616457d9b8b2f8a37f38e79f3", - "internalRef": "337113354" + "sha": "df4fd38d040c5c8a0869936205bca13fb64b2cff", + "internalRef": "344443035" } }, { "git": { "name": "synthtool", "remote": "https://github.com/googleapis/synthtool.git", - "sha": "f3c04883d6c43261ff13db1f52d03a283be06871" + "sha": "c2de32114ec484aa708d32012d1fa8d75232daf5" } }, { "git": { "name": "synthtool", "remote": "https://github.com/googleapis/synthtool.git", - "sha": "f3c04883d6c43261ff13db1f52d03a283be06871" + "sha": "c2de32114ec484aa708d32012d1fa8d75232daf5" } } ], @@ -40,87 +40,5 @@ "generator": "bazel" } } - ], - "generatedFiles": [ - ".flake8", - ".github/CONTRIBUTING.md", - ".github/ISSUE_TEMPLATE/bug_report.md", - ".github/ISSUE_TEMPLATE/feature_request.md", - ".github/ISSUE_TEMPLATE/support_request.md", - ".github/PULL_REQUEST_TEMPLATE.md", - ".github/release-please.yml", - ".github/snippet-bot.yml", - ".gitignore", - ".kokoro/build.sh", - ".kokoro/continuous/common.cfg", - ".kokoro/continuous/continuous.cfg", - ".kokoro/docker/docs/Dockerfile", - ".kokoro/docker/docs/fetch_gpg_keys.sh", - ".kokoro/docs/common.cfg", - ".kokoro/docs/docs-presubmit.cfg", - ".kokoro/docs/docs.cfg", - ".kokoro/populate-secrets.sh", - ".kokoro/presubmit/common.cfg", - ".kokoro/presubmit/presubmit.cfg", - ".kokoro/presubmit/system-3.8.cfg", - ".kokoro/publish-docs.sh", - ".kokoro/release.sh", - ".kokoro/release/common.cfg", - ".kokoro/release/release.cfg", - ".kokoro/samples/lint/common.cfg", - ".kokoro/samples/lint/continuous.cfg", - ".kokoro/samples/lint/periodic.cfg", - ".kokoro/samples/lint/presubmit.cfg", - ".kokoro/samples/python3.6/common.cfg", - ".kokoro/samples/python3.6/continuous.cfg", - ".kokoro/samples/python3.6/periodic.cfg", - ".kokoro/samples/python3.6/presubmit.cfg", - ".kokoro/samples/python3.7/common.cfg", - ".kokoro/samples/python3.7/continuous.cfg", - ".kokoro/samples/python3.7/periodic.cfg", - ".kokoro/samples/python3.7/presubmit.cfg", - ".kokoro/samples/python3.8/common.cfg", - ".kokoro/samples/python3.8/continuous.cfg", - ".kokoro/samples/python3.8/periodic.cfg", - ".kokoro/samples/python3.8/presubmit.cfg", - ".kokoro/test-samples.sh", - ".kokoro/trampoline.sh", - ".kokoro/trampoline_v2.sh", - ".trampolinerc", - "CODE_OF_CONDUCT.md", - "CONTRIBUTING.rst", - "LICENSE", - "MANIFEST.in", - "docs/_static/custom.css", - "docs/_templates/layout.html", - "docs/bigquery_v2/services.rst", - "docs/bigquery_v2/types.rst", - "docs/conf.py", - "google/cloud/bigquery_v2/__init__.py", - "google/cloud/bigquery_v2/proto/encryption_config.proto", - "google/cloud/bigquery_v2/proto/model.proto", - "google/cloud/bigquery_v2/proto/model_reference.proto", - "google/cloud/bigquery_v2/proto/standard_sql.proto", - "google/cloud/bigquery_v2/proto/table_reference.proto", - "google/cloud/bigquery_v2/py.typed", - "google/cloud/bigquery_v2/types/__init__.py", - "google/cloud/bigquery_v2/types/encryption_config.py", - "google/cloud/bigquery_v2/types/model.py", - "google/cloud/bigquery_v2/types/model_reference.py", - "google/cloud/bigquery_v2/types/standard_sql.py", - "google/cloud/bigquery_v2/types/table_reference.py", - "mypy.ini", - "renovate.json", - "samples/AUTHORING_GUIDE.md", - "samples/CONTRIBUTING.md", - "scripts/decrypt-secrets.sh", - "scripts/readme-gen/readme_gen.py", - "scripts/readme-gen/templates/README.tmpl.rst", - "scripts/readme-gen/templates/auth.tmpl.rst", - "scripts/readme-gen/templates/auth_api_key.tmpl.rst", - "scripts/readme-gen/templates/install_deps.tmpl.rst", - "scripts/readme-gen/templates/install_portaudio.tmpl.rst", - "setup.cfg", - "testing/.gitignore" ] } \ No newline at end of file diff --git a/packages/google-cloud-bigquery/synth.py b/packages/google-cloud-bigquery/synth.py index 97466d0f42b1..341c5832f49d 100644 --- a/packages/google-cloud-bigquery/synth.py +++ b/packages/google-cloud-bigquery/synth.py @@ -59,14 +59,21 @@ # BigQuery has a custom multiprocessing note s.move( templated_files, - excludes=["noxfile.py", "docs/multiprocessing.rst", ".coveragerc"] + excludes=[ + "noxfile.py", + "docs/multiprocessing.rst", + ".coveragerc", + # Include custom SNIPPETS_TESTS job for performance. + # https://github.com/googleapis/python-bigquery/issues/191 + ".kokoro/presubmit/presubmit.cfg", + ] ) # ---------------------------------------------------------------------------- # Samples templates # ---------------------------------------------------------------------------- -# python.py_samples() # TODO: why doesn't this work here with Bazel? +python.py_samples() # Do not expose ModelServiceClient, as there is no public API endpoint for the # models service. @@ -95,6 +102,14 @@ '{"members": True, "inherited-members": True}' ) +# Avoid breaking change due to change in field renames. +# https://github.com/googleapis/python-bigquery/issues/319 +s.replace( + "google/cloud/bigquery_v2/types/standard_sql.py", + r"type_ ", + "type " +) + # Tell Sphinx to ingore autogenerated docs files. s.replace( "docs/conf.py", From 715aca4576600d426f2a7088e3195d9e3bfc53c7 Mon Sep 17 00:00:00 2001 From: Carlos de la Guardia Date: Mon, 7 Dec 2020 13:42:59 -0600 Subject: [PATCH 1013/2016] feat: support CSV format in `load_table_from_dataframe` pandas connector (#399) * WIP: support alternative serialization formats for load_table_from_dataframe * fix: address review comments * docs: make clear repeated fields are not supportedin csv --- .../google/cloud/bigquery/client.py | 82 +++++++---- .../google-cloud-bigquery/tests/system.py | 134 ++++++++++++++++++ .../tests/unit/test_client.py | 50 +++++++ 3 files changed, 239 insertions(+), 27 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py index 168054623ad7..c7cd694c685b 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py @@ -2111,9 +2111,12 @@ def load_table_from_dataframe( .. note:: - Due to the way REPEATED fields are encoded in the ``parquet`` file - format, a mismatch with the existing table schema can occur, and - 100% compatibility cannot be guaranteed for REPEATED fields. + REPEATED fields are NOT supported when using the CSV source format. + They are supported when using the PARQUET source format, but + due to the way they are encoded in the ``parquet`` file, + a mismatch with the existing table schema can occur, so + 100% compatibility cannot be guaranteed for REPEATED fields when + using the parquet format. https://github.com/googleapis/python-bigquery/issues/17 @@ -2153,6 +2156,14 @@ def load_table_from_dataframe( column names matching those of the dataframe. The BigQuery schema is used to determine the correct data type conversion. Indexes are not loaded. Requires the :mod:`pyarrow` library. + + By default, this method uses the parquet source format. To + override this, supply a value for + :attr:`~google.cloud.bigquery.job.LoadJobConfig.source_format` + with the format name. Currently only + :attr:`~google.cloud.bigquery.job.SourceFormat.CSV` and + :attr:`~google.cloud.bigquery.job.SourceFormat.PARQUET` are + supported. parquet_compression (Optional[str]): [Beta] The compression method to use if intermittently serializing ``dataframe`` to a parquet file. @@ -2181,10 +2192,6 @@ def load_table_from_dataframe( If ``job_config`` is not an instance of :class:`~google.cloud.bigquery.job.LoadJobConfig` class. """ - if pyarrow is None: - # pyarrow is now the only supported parquet engine. - raise ValueError("This method requires pyarrow to be installed") - job_id = _make_job_id(job_id, job_id_prefix) if job_config: @@ -2197,15 +2204,20 @@ def load_table_from_dataframe( else: job_config = job.LoadJobConfig() - if job_config.source_format: - if job_config.source_format != job.SourceFormat.PARQUET: - raise ValueError( - "Got unexpected source_format: '{}'. Currently, only PARQUET is supported".format( - job_config.source_format - ) - ) - else: + supported_formats = {job.SourceFormat.CSV, job.SourceFormat.PARQUET} + if job_config.source_format is None: + # default value job_config.source_format = job.SourceFormat.PARQUET + if job_config.source_format not in supported_formats: + raise ValueError( + "Got unexpected source_format: '{}'. Currently, only PARQUET and CSV are supported".format( + job_config.source_format + ) + ) + + if pyarrow is None and job_config.source_format == job.SourceFormat.PARQUET: + # pyarrow is now the only supported parquet engine. + raise ValueError("This method requires pyarrow to be installed") if location is None: location = self.location @@ -2245,27 +2257,43 @@ def load_table_from_dataframe( stacklevel=2, ) - tmpfd, tmppath = tempfile.mkstemp(suffix="_job_{}.parquet".format(job_id[:8])) + tmpfd, tmppath = tempfile.mkstemp( + suffix="_job_{}.{}".format(job_id[:8], job_config.source_format.lower()) + ) os.close(tmpfd) try: - if job_config.schema: - if parquet_compression == "snappy": # adjust the default value - parquet_compression = parquet_compression.upper() - _pandas_helpers.dataframe_to_parquet( - dataframe, - job_config.schema, + if job_config.source_format == job.SourceFormat.PARQUET: + + if job_config.schema: + if parquet_compression == "snappy": # adjust the default value + parquet_compression = parquet_compression.upper() + + _pandas_helpers.dataframe_to_parquet( + dataframe, + job_config.schema, + tmppath, + parquet_compression=parquet_compression, + ) + else: + dataframe.to_parquet(tmppath, compression=parquet_compression) + + else: + + dataframe.to_csv( tmppath, - parquet_compression=parquet_compression, + index=False, + header=False, + encoding="utf-8", + float_format="%.17g", + date_format="%Y-%m-%d %H:%M:%S.%f", ) - else: - dataframe.to_parquet(tmppath, compression=parquet_compression) - with open(tmppath, "rb") as parquet_file: + with open(tmppath, "rb") as tmpfile: file_size = os.path.getsize(tmppath) return self.load_table_from_file( - parquet_file, + tmpfile, destination, num_retries=num_retries, rewind=True, diff --git a/packages/google-cloud-bigquery/tests/system.py b/packages/google-cloud-bigquery/tests/system.py index e347c8a7093f..d481967d8088 100644 --- a/packages/google-cloud-bigquery/tests/system.py +++ b/packages/google-cloud-bigquery/tests/system.py @@ -1165,6 +1165,140 @@ def test_load_table_from_json_basic_use(self): self.assertEqual(tuple(table.schema), table_schema) self.assertEqual(table.num_rows, 2) + @unittest.skipIf(pandas is None, "Requires `pandas`") + def test_load_table_from_dataframe_w_explicit_schema_source_format_csv(self): + from google.cloud.bigquery.job import SourceFormat + + table_schema = ( + bigquery.SchemaField("bool_col", "BOOLEAN"), + bigquery.SchemaField("bytes_col", "BYTES"), + bigquery.SchemaField("date_col", "DATE"), + bigquery.SchemaField("dt_col", "DATETIME"), + bigquery.SchemaField("float_col", "FLOAT"), + bigquery.SchemaField("geo_col", "GEOGRAPHY"), + bigquery.SchemaField("int_col", "INTEGER"), + bigquery.SchemaField("num_col", "NUMERIC"), + bigquery.SchemaField("str_col", "STRING"), + bigquery.SchemaField("time_col", "TIME"), + bigquery.SchemaField("ts_col", "TIMESTAMP"), + ) + df_data = collections.OrderedDict( + [ + ("bool_col", [True, None, False]), + ("bytes_col", ["abc", None, "def"]), + ( + "date_col", + [datetime.date(1, 1, 1), None, datetime.date(9999, 12, 31)], + ), + ( + "dt_col", + [ + datetime.datetime(1, 1, 1, 0, 0, 0), + None, + datetime.datetime(9999, 12, 31, 23, 59, 59, 999999), + ], + ), + ("float_col", [float("-inf"), float("nan"), float("inf")]), + ( + "geo_col", + [ + "POINT(30 10)", + None, + "POLYGON ((30 10, 40 40, 20 40, 10 20, 30 10))", + ], + ), + ("int_col", [-9223372036854775808, None, 9223372036854775807]), + ( + "num_col", + [ + decimal.Decimal("-99999999999999999999999999999.999999999"), + None, + decimal.Decimal("99999999999999999999999999999.999999999"), + ], + ), + ("str_col", [u"abc", None, u"def"]), + ( + "time_col", + [datetime.time(0, 0, 0), None, datetime.time(23, 59, 59, 999999)], + ), + ( + "ts_col", + [ + datetime.datetime(1, 1, 1, 0, 0, 0, tzinfo=pytz.utc), + None, + datetime.datetime( + 9999, 12, 31, 23, 59, 59, 999999, tzinfo=pytz.utc + ), + ], + ), + ] + ) + dataframe = pandas.DataFrame(df_data, dtype="object", columns=df_data.keys()) + + dataset_id = _make_dataset_id("bq_load_test") + self.temp_dataset(dataset_id) + table_id = "{}.{}.load_table_from_dataframe_w_explicit_schema_csv".format( + Config.CLIENT.project, dataset_id + ) + + job_config = bigquery.LoadJobConfig( + schema=table_schema, source_format=SourceFormat.CSV + ) + load_job = Config.CLIENT.load_table_from_dataframe( + dataframe, table_id, job_config=job_config + ) + load_job.result() + + table = Config.CLIENT.get_table(table_id) + self.assertEqual(tuple(table.schema), table_schema) + self.assertEqual(table.num_rows, 3) + + @unittest.skipIf(pandas is None, "Requires `pandas`") + def test_load_table_from_dataframe_w_explicit_schema_source_format_csv_floats(self): + from google.cloud.bigquery.job import SourceFormat + + table_schema = (bigquery.SchemaField("float_col", "FLOAT"),) + df_data = collections.OrderedDict( + [ + ( + "float_col", + [ + 0.14285714285714285, + 0.51428571485748, + 0.87128748, + 1.807960649, + 2.0679610649, + 2.4406779661016949, + 3.7148514257, + 3.8571428571428572, + 1.51251252e40, + ], + ), + ] + ) + dataframe = pandas.DataFrame(df_data, dtype="object", columns=df_data.keys()) + + dataset_id = _make_dataset_id("bq_load_test") + self.temp_dataset(dataset_id) + table_id = "{}.{}.load_table_from_dataframe_w_explicit_schema_csv".format( + Config.CLIENT.project, dataset_id + ) + + job_config = bigquery.LoadJobConfig( + schema=table_schema, source_format=SourceFormat.CSV + ) + load_job = Config.CLIENT.load_table_from_dataframe( + dataframe, table_id, job_config=job_config + ) + load_job.result() + + table = Config.CLIENT.get_table(table_id) + rows = self._fetch_single_page(table) + floats = [r.values()[0] for r in rows] + self.assertEqual(tuple(table.schema), table_schema) + self.assertEqual(table.num_rows, 9) + self.assertEqual(floats, df_data["float_col"]) + def test_load_table_from_json_schema_autodetect(self): json_rows = [ {"name": "John", "age": 18, "birthday": "2001-10-15", "is_awesome": False}, diff --git a/packages/google-cloud-bigquery/tests/unit/test_client.py b/packages/google-cloud-bigquery/tests/unit/test_client.py index 0e68b2538d5d..e5ead0cccf33 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_client.py +++ b/packages/google-cloud-bigquery/tests/unit/test_client.py @@ -8410,6 +8410,56 @@ def test_load_table_from_dataframe_w_invaild_job_config(self): err_msg = str(exc.value) assert "Expected an instance of LoadJobConfig" in err_msg + @unittest.skipIf(pandas is None, "Requires `pandas`") + def test_load_table_from_dataframe_with_csv_source_format(self): + from google.cloud.bigquery.client import _DEFAULT_NUM_RETRIES + from google.cloud.bigquery import job + from google.cloud.bigquery.schema import SchemaField + + client = self._make_client() + records = [{"id": 1, "age": 100}, {"id": 2, "age": 60}] + dataframe = pandas.DataFrame(records) + job_config = job.LoadJobConfig( + write_disposition=job.WriteDisposition.WRITE_TRUNCATE, + source_format=job.SourceFormat.CSV, + ) + + get_table_patch = mock.patch( + "google.cloud.bigquery.client.Client.get_table", + autospec=True, + return_value=mock.Mock( + schema=[SchemaField("id", "INTEGER"), SchemaField("age", "INTEGER")] + ), + ) + load_patch = mock.patch( + "google.cloud.bigquery.client.Client.load_table_from_file", autospec=True + ) + with load_patch as load_table_from_file, get_table_patch: + client.load_table_from_dataframe( + dataframe, self.TABLE_REF, job_config=job_config + ) + + load_table_from_file.assert_called_once_with( + client, + mock.ANY, + self.TABLE_REF, + num_retries=_DEFAULT_NUM_RETRIES, + rewind=True, + size=mock.ANY, + job_id=mock.ANY, + job_id_prefix=None, + location=None, + project=None, + job_config=mock.ANY, + timeout=None, + ) + + sent_file = load_table_from_file.mock_calls[0][1][1] + assert sent_file.closed + + sent_config = load_table_from_file.mock_calls[0][2]["job_config"] + assert sent_config.source_format == job.SourceFormat.CSV + def test_load_table_from_json_basic_use(self): from google.cloud.bigquery.client import _DEFAULT_NUM_RETRIES from google.cloud.bigquery import job From cb334b91247a8aa23ea5592e2048816425a312de Mon Sep 17 00:00:00 2001 From: "release-please[bot]" <55107282+release-please[bot]@users.noreply.github.com> Date: Mon, 7 Dec 2020 13:19:42 -0700 Subject: [PATCH 1014/2016] chore: release 2.6.0 (#412) Co-authored-by: release-please[bot] <55107282+release-please[bot]@users.noreply.github.com> --- packages/google-cloud-bigquery/CHANGELOG.md | 19 +++++++++++++++++++ .../google/cloud/bigquery/version.py | 2 +- 2 files changed, 20 insertions(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/CHANGELOG.md b/packages/google-cloud-bigquery/CHANGELOG.md index c71f85d0c78c..c0233bbd4ba8 100644 --- a/packages/google-cloud-bigquery/CHANGELOG.md +++ b/packages/google-cloud-bigquery/CHANGELOG.md @@ -4,6 +4,25 @@ [1]: https://pypi.org/project/google-cloud-bigquery/#history +## [2.6.0](https://www.github.com/googleapis/python-bigquery/compare/v2.5.0...v2.6.0) (2020-12-07) + + +### Features + +* add support for materialized views ([#408](https://www.github.com/googleapis/python-bigquery/issues/408)) ([57ffc66](https://www.github.com/googleapis/python-bigquery/commit/57ffc665319331e0a00583d5d652fd14a510cf2a)), closes [#407](https://www.github.com/googleapis/python-bigquery/issues/407) +* convert `BIGNUMERIC` values to decimal objects ([#414](https://www.github.com/googleapis/python-bigquery/issues/414)) ([d472d2d](https://www.github.com/googleapis/python-bigquery/commit/d472d2d2b33e40b954652d31476dea8c90e6a2dc)), closes [#367](https://www.github.com/googleapis/python-bigquery/issues/367) +* support CSV format in `load_table_from_dataframe` pandas connector ([#399](https://www.github.com/googleapis/python-bigquery/issues/399)) ([0046742](https://www.github.com/googleapis/python-bigquery/commit/0046742abdd2b5eab3c3e935316f91e7eef44d44)) + + +### Bug Fixes + +* preserve timestamp microsecond precision with rows from REST API ([#402](https://www.github.com/googleapis/python-bigquery/issues/402)) ([04510a7](https://www.github.com/googleapis/python-bigquery/commit/04510a7dc7570466550bbdf500d7020bef2af44d)) + + +### Documentation + +* update intersphinx links ([#404](https://www.github.com/googleapis/python-bigquery/issues/404)) ([a9d8ae8](https://www.github.com/googleapis/python-bigquery/commit/a9d8ae8a920dec655b77dca9d9128e569f1d07a7)) + ## [2.5.0](https://www.github.com/googleapis/python-bigquery/compare/v2.4.0...v2.5.0) (2020-12-02) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/version.py b/packages/google-cloud-bigquery/google/cloud/bigquery/version.py index 5836d8051156..ae34a9fbeb85 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/version.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/version.py @@ -12,4 +12,4 @@ # See the License for the specific language governing permissions and # limitations under the License. -__version__ = "2.5.0" +__version__ = "2.6.0" From d289aeff6bed65ae7b3d806854e7b3f78f3d5ad9 Mon Sep 17 00:00:00 2001 From: Bu Sun Kim <8822365+busunkim96@users.noreply.github.com> Date: Tue, 8 Dec 2020 11:52:38 -0700 Subject: [PATCH 1015/2016] ci: skip docfx in main 'Kokoro' presubmit (#423) * ci: skip docfx in main 'Kokoro' presubmit * fix: specify default sessions in noxfile * add conditional for cd to project root --- packages/google-cloud-bigquery/.kokoro/build.sh | 6 +++++- .../.kokoro/docs/docs-presubmit.cfg | 11 +++++++++++ packages/google-cloud-bigquery/noxfile.py | 12 ++++++++++++ 3 files changed, 28 insertions(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/.kokoro/build.sh b/packages/google-cloud-bigquery/.kokoro/build.sh index 0e71e2aca650..cb81a05f883e 100755 --- a/packages/google-cloud-bigquery/.kokoro/build.sh +++ b/packages/google-cloud-bigquery/.kokoro/build.sh @@ -15,7 +15,11 @@ set -eo pipefail -cd github/python-bigquery +if [[ -z "${PROJECT_ROOT:-}" ]]; then + PROJECT_ROOT="github/python-bigquery" +fi + +cd "${PROJECT_ROOT}" # Disable buffering, so that the logs stream through. export PYTHONUNBUFFERED=1 diff --git a/packages/google-cloud-bigquery/.kokoro/docs/docs-presubmit.cfg b/packages/google-cloud-bigquery/.kokoro/docs/docs-presubmit.cfg index 1118107829b7..5c216b4bcfd4 100644 --- a/packages/google-cloud-bigquery/.kokoro/docs/docs-presubmit.cfg +++ b/packages/google-cloud-bigquery/.kokoro/docs/docs-presubmit.cfg @@ -15,3 +15,14 @@ env_vars: { key: "TRAMPOLINE_IMAGE_UPLOAD" value: "false" } + +env_vars: { + key: "TRAMPOLINE_BUILD_FILE" + value: "github/python-bigquery/.kokoro/build.sh" +} + +# Only run this nox session. +env_vars: { + key: "NOX_SESSION" + value: "docs docfx" +} \ No newline at end of file diff --git a/packages/google-cloud-bigquery/noxfile.py b/packages/google-cloud-bigquery/noxfile.py index 441782583f43..95818d3c8e9d 100644 --- a/packages/google-cloud-bigquery/noxfile.py +++ b/packages/google-cloud-bigquery/noxfile.py @@ -25,6 +25,18 @@ BLACK_PATHS = ("docs", "google", "samples", "tests", "noxfile.py", "setup.py") CURRENT_DIRECTORY = pathlib.Path(__file__).parent.absolute() +# 'docfx' is excluded since it only needs to run in 'docs-presubmit' +nox.options.sessions = [ + "unit", + "system", + "snippets", + "cover", + "lint", + "lint_setup_py", + "blacken", + "docs", +] + def default(session): """Default unit test session. From 91dcd1a8e76e3b3a7d4440d46478d83be97b8275 Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Tue, 8 Dec 2020 13:16:28 -0600 Subject: [PATCH 1016/2016] docs: add examples of `fields` argument to update methods (#418) --- .../google/cloud/bigquery/client.py | 60 ++++++++++++++++--- 1 file changed, 52 insertions(+), 8 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py index c7cd694c685b..28cac64adfc5 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py @@ -881,7 +881,22 @@ def update_dataset(self, dataset, fields, retry=DEFAULT_RETRY, timeout=None): dataset (google.cloud.bigquery.dataset.Dataset): The dataset to update. fields (Sequence[str]): - The properties of ``dataset`` to change (e.g. "friendly_name"). + The properties of ``dataset`` to change. These are strings + corresponding to the properties of + :class:`~google.cloud.bigquery.dataset.Dataset`. + + For example, to update the default expiration times, specify + both properties in the ``fields`` argument: + + .. code-block:: python + + bigquery_client.update_dataset( + dataset, + [ + "default_partition_expiration_ms", + "default_table_expiration_ms", + ] + ) retry (Optional[google.api_core.retry.Retry]): How to retry the RPC. timeout (Optional[float]): @@ -928,8 +943,18 @@ def update_model(self, model, fields, retry=DEFAULT_RETRY, timeout=None): Args: model (google.cloud.bigquery.model.Model): The model to update. fields (Sequence[str]): - The fields of ``model`` to change, spelled as the Model - properties (e.g. "friendly_name"). + The properties of ``model`` to change. These are strings + corresponding to the properties of + :class:`~google.cloud.bigquery.model.Model`. + + For example, to update the descriptive properties of the model, + specify them in the ``fields`` argument: + + .. code-block:: python + + bigquery_client.update_model( + model, ["description", "friendly_name"] + ) retry (Optional[google.api_core.retry.Retry]): A description of how to retry the API call. timeout (Optional[float]): @@ -980,11 +1005,20 @@ def update_routine(self, routine, fields, retry=DEFAULT_RETRY, timeout=None): occurred since the read. Args: - routine (google.cloud.bigquery.routine.Routine): The routine to update. + routine (google.cloud.bigquery.routine.Routine): + The routine to update. fields (Sequence[str]): The fields of ``routine`` to change, spelled as the - :class:`~google.cloud.bigquery.routine.Routine` properties - (e.g. ``type_``). + :class:`~google.cloud.bigquery.routine.Routine` properties. + + For example, to update the description property of the routine, + specify it in the ``fields`` argument: + + .. code-block:: python + + bigquery_client.update_routine( + routine, ["description"] + ) retry (Optional[google.api_core.retry.Retry]): A description of how to retry the API call. timeout (Optional[float]): @@ -1035,8 +1069,18 @@ def update_table(self, table, fields, retry=DEFAULT_RETRY, timeout=None): Args: table (google.cloud.bigquery.table.Table): The table to update. fields (Sequence[str]): - The fields of ``table`` to change, spelled as the Table - properties (e.g. "friendly_name"). + The fields of ``table`` to change, spelled as the + :class:`~google.cloud.bigquery.table.Table` properties. + + For example, to update the descriptive properties of the table, + specify them in the ``fields`` argument: + + .. code-block:: python + + bigquery_client.update_table( + table, + ["description", "friendly_name"] + ) retry (Optional[google.api_core.retry.Retry]): A description of how to retry the API call. timeout (Optional[float]): From 7a0c0417574fff788b16e86c838bd0574aa51155 Mon Sep 17 00:00:00 2001 From: WhiteSource Renovate Date: Tue, 8 Dec 2020 21:54:28 +0100 Subject: [PATCH 1017/2016] chore(deps): update dependency google-cloud-bigquery to v2.6.0 (#419) Co-authored-by: Tim Swast --- .../google-cloud-bigquery/samples/snippets/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/samples/snippets/requirements.txt b/packages/google-cloud-bigquery/samples/snippets/requirements.txt index 3eecbf546c70..f9211d66c00b 100644 --- a/packages/google-cloud-bigquery/samples/snippets/requirements.txt +++ b/packages/google-cloud-bigquery/samples/snippets/requirements.txt @@ -1,4 +1,4 @@ -google-cloud-bigquery==2.5.0 +google-cloud-bigquery==2.6.0 google-cloud-bigquery-storage==2.0.1 google-auth-oauthlib==0.4.2 grpcio==1.34.0 From 75b6b659d1ff995d9173ecec441f31edaef84972 Mon Sep 17 00:00:00 2001 From: WhiteSource Renovate Date: Tue, 8 Dec 2020 22:08:03 +0100 Subject: [PATCH 1018/2016] chore(deps): update dependency google-cloud-bigquery-storage to v2.1.0 (#369) [![WhiteSource Renovate](https://app.renovatebot.com/images/banner.svg)](https://renovatebot.com) This PR contains the following updates: | Package | Update | Change | |---|---|---| | [google-cloud-bigquery-storage](https://togithub.com/googleapis/python-bigquery-storage) | minor | `==2.0.1` -> `==2.1.0` | --- ### Release Notes
googleapis/python-bigquery-storage ### [`v2.1.0`](https://togithub.com/googleapis/python-bigquery-storage/blob/master/CHANGELOG.md#​210-httpswwwgithubcomgoogleapispython-bigquery-storagecomparev201v210-2020-11-04) [Compare Source](https://togithub.com/googleapis/python-bigquery-storage/compare/v2.0.1...v2.1.0) ##### Features - add public transport property and path formatting methods to client ([#​80](https://www.github.com/googleapis/python-bigquery-storage/issues/80)) ([fbbb439](https://www.github.com/googleapis/python-bigquery-storage/commit/fbbb439b8c77fa9367a4b5bea725dd0b0f26b769)) ##### Documentation - add intersphinx to proto-plus library ([#​86](https://www.github.com/googleapis/python-bigquery-storage/issues/86)) ([4cd35d2](https://www.github.com/googleapis/python-bigquery-storage/commit/4cd35d21de4486f659b7efc4ff4dcb9b4eee6c9e)) - show inheritance in types reference ([#​91](https://www.github.com/googleapis/python-bigquery-storage/issues/91)) ([e5fd4e6](https://www.github.com/googleapis/python-bigquery-storage/commit/e5fd4e62de2768a49d633dc3a81e03d64df9fe1f)) ##### [2.0.1](https://www.github.com/googleapis/python-bigquery-storage/compare/v2.0.0...v2.0.1) (2020-10-21) ##### Bug Fixes - don't fail with 429 when downloading wide tables ([#​79](https://www.github.com/googleapis/python-bigquery-storage/issues/79)) ([45faf97](https://www.github.com/googleapis/python-bigquery-storage/commit/45faf9712b25bd63d962ca7e5afc8b8d3a0d8353)) ##### Documentation - update to_dataframe sample to latest dependencies ([#​72](https://www.github.com/googleapis/python-bigquery-storage/issues/72)) ([a7fe762](https://www.github.com/googleapis/python-bigquery-storage/commit/a7fe7626312a5b9fe1e7bd0e0fe5601ae97605c7))
--- ### Renovate configuration :date: **Schedule**: At any time (no schedule defined). :vertical_traffic_light: **Automerge**: Disabled by config. Please merge this manually once you are satisfied. :recycle: **Rebasing**: Renovate will not automatically rebase this PR, because other commits have been found. :no_bell: **Ignore**: Close this PR and you won't be reminded about this update again. --- - [ ] If you want to rebase/retry this PR, check this box --- This PR has been generated by [WhiteSource Renovate](https://renovate.whitesourcesoftware.com). View repository job log [here](https://app.renovatebot.com/dashboard#github/googleapis/python-bigquery). --- .../google-cloud-bigquery/samples/snippets/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/samples/snippets/requirements.txt b/packages/google-cloud-bigquery/samples/snippets/requirements.txt index f9211d66c00b..6000a4d24db6 100644 --- a/packages/google-cloud-bigquery/samples/snippets/requirements.txt +++ b/packages/google-cloud-bigquery/samples/snippets/requirements.txt @@ -1,5 +1,5 @@ google-cloud-bigquery==2.6.0 -google-cloud-bigquery-storage==2.0.1 +google-cloud-bigquery-storage==2.1.0 google-auth-oauthlib==0.4.2 grpcio==1.34.0 ipython==7.16.1; python_version < '3.7' From 4fb8a7e3d80b13a19b4bb0585454af8fd8ec38d6 Mon Sep 17 00:00:00 2001 From: HemangChothani <50404902+HemangChothani@users.noreply.github.com> Date: Wed, 9 Dec 2020 15:27:19 -0500 Subject: [PATCH 1019/2016] fix: handle null values in array query parameters (#426) --- .../google/cloud/bigquery/_helpers.py | 2 +- .../google-cloud-bigquery/tests/unit/test_query.py | 10 ++++++++++ 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py b/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py index 716c8a394316..100136108e90 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py @@ -40,7 +40,7 @@ def _not_null(value, field): """Check whether 'value' should be coerced to 'field' type.""" - return value is not None or field.mode != "NULLABLE" + return value is not None or (field is not None and field.mode != "NULLABLE") def _int_from_json(value, field): diff --git a/packages/google-cloud-bigquery/tests/unit/test_query.py b/packages/google-cloud-bigquery/tests/unit/test_query.py index a7c639ed1e77..cf268daf193a 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_query.py +++ b/packages/google-cloud-bigquery/tests/unit/test_query.py @@ -383,6 +383,16 @@ def test_from_api_repr_wo_values(self): self.assertEqual(param.array_type, "INT64") self.assertEqual(param.values, []) + def test_from_api_repr_w_none_values(self): + RESOURCE = { + "parameterType": {"type": "ARRAY", "arrayType": {"type": "INT64"}}, + "parameterValue": {"arrayValues": [{"value": "1"}, {"value": None}]}, + } + klass = self._get_target_class() + param = klass.from_api_repr(RESOURCE) + self.assertEqual(param.array_type, "INT64") + self.assertEqual(param.values, [1, None]) + def test_from_api_repr_w_struct_type(self): from google.cloud.bigquery.query import StructQueryParameter From 61b1a9e3796ebc10a928bdd4f177ee57dd295534 Mon Sep 17 00:00:00 2001 From: WhiteSource Renovate Date: Wed, 9 Dec 2020 21:29:10 +0100 Subject: [PATCH 1020/2016] chore(deps): update dependency pandas to v1.1.5 (#417) Co-authored-by: Tim Swast --- .../google-cloud-bigquery/samples/snippets/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/samples/snippets/requirements.txt b/packages/google-cloud-bigquery/samples/snippets/requirements.txt index 6000a4d24db6..1d3cace2bba5 100644 --- a/packages/google-cloud-bigquery/samples/snippets/requirements.txt +++ b/packages/google-cloud-bigquery/samples/snippets/requirements.txt @@ -5,6 +5,6 @@ grpcio==1.34.0 ipython==7.16.1; python_version < '3.7' ipython==7.17.0; python_version >= '3.7' matplotlib==3.3.3 -pandas==1.1.4 +pandas==1.1.5 pyarrow==2.0.0 pytz==2020.4 From 875f870a1bc9d03d5582d46d0002a46caa61a2bd Mon Sep 17 00:00:00 2001 From: "release-please[bot]" <55107282+release-please[bot]@users.noreply.github.com> Date: Wed, 9 Dec 2020 14:56:52 -0600 Subject: [PATCH 1021/2016] chore: release 2.6.1 (#424) Co-authored-by: release-please[bot] <55107282+release-please[bot]@users.noreply.github.com> --- packages/google-cloud-bigquery/CHANGELOG.md | 12 ++++++++++++ .../google/cloud/bigquery/version.py | 2 +- 2 files changed, 13 insertions(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/CHANGELOG.md b/packages/google-cloud-bigquery/CHANGELOG.md index c0233bbd4ba8..d01f62ff64eb 100644 --- a/packages/google-cloud-bigquery/CHANGELOG.md +++ b/packages/google-cloud-bigquery/CHANGELOG.md @@ -4,6 +4,18 @@ [1]: https://pypi.org/project/google-cloud-bigquery/#history +### [2.6.1](https://www.github.com/googleapis/python-bigquery/compare/v2.6.0...v2.6.1) (2020-12-09) + + +### Bug Fixes + +* handle null values in array query parameters ([#426](https://www.github.com/googleapis/python-bigquery/issues/426)) ([78fde4a](https://www.github.com/googleapis/python-bigquery/commit/78fde4a92e61a89d0b490b93acc90fff9635d1bf)) + + +### Documentation + +* add examples of `fields` argument to update methods ([#418](https://www.github.com/googleapis/python-bigquery/issues/418)) ([8c7e02b](https://www.github.com/googleapis/python-bigquery/commit/8c7e02b0de2c92ee965414e7c430eb57d1877326)) + ## [2.6.0](https://www.github.com/googleapis/python-bigquery/compare/v2.5.0...v2.6.0) (2020-12-07) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/version.py b/packages/google-cloud-bigquery/google/cloud/bigquery/version.py index ae34a9fbeb85..410cd066ed8d 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/version.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/version.py @@ -12,4 +12,4 @@ # See the License for the specific language governing permissions and # limitations under the License. -__version__ = "2.6.0" +__version__ = "2.6.1" From 3f0d83cb0a9929de7ede64a696bda07b8cd0206d Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Wed, 9 Dec 2020 15:59:52 -0600 Subject: [PATCH 1022/2016] docs: add GEOGRAPHY data type code samples (#428) * docs: add GEOGRAPHY data type code samples These are added to a separate directory in order to isolate the GeoJSON and WKT dependencies from the other code samples. * skip geography samples in snippets session --- packages/google-cloud-bigquery/noxfile.py | 8 +- .../samples/geography/__init__.py | 13 + .../samples/geography/conftest.py | 55 ++++ .../samples/geography/insert_geojson.py | 49 ++++ .../samples/geography/insert_geojson_test.py | 20 ++ .../samples/geography/insert_wkt.py | 49 ++++ .../samples/geography/insert_wkt_test.py | 20 ++ .../samples/geography/noxfile.py | 246 ++++++++++++++++++ .../samples/geography/noxfile_config.py | 35 +++ .../samples/geography/requirements-test.txt | 2 + .../samples/geography/requirements.txt | 3 + .../google-cloud-bigquery/tests/system.py | 9 +- 12 files changed, 502 insertions(+), 7 deletions(-) create mode 100644 packages/google-cloud-bigquery/samples/geography/__init__.py create mode 100644 packages/google-cloud-bigquery/samples/geography/conftest.py create mode 100644 packages/google-cloud-bigquery/samples/geography/insert_geojson.py create mode 100644 packages/google-cloud-bigquery/samples/geography/insert_geojson_test.py create mode 100644 packages/google-cloud-bigquery/samples/geography/insert_wkt.py create mode 100644 packages/google-cloud-bigquery/samples/geography/insert_wkt_test.py create mode 100644 packages/google-cloud-bigquery/samples/geography/noxfile.py create mode 100644 packages/google-cloud-bigquery/samples/geography/noxfile_config.py create mode 100644 packages/google-cloud-bigquery/samples/geography/requirements-test.txt create mode 100644 packages/google-cloud-bigquery/samples/geography/requirements.txt diff --git a/packages/google-cloud-bigquery/noxfile.py b/packages/google-cloud-bigquery/noxfile.py index 95818d3c8e9d..8523eabb552f 100644 --- a/packages/google-cloud-bigquery/noxfile.py +++ b/packages/google-cloud-bigquery/noxfile.py @@ -147,7 +147,13 @@ def snippets(session): # Skip tests in samples/snippets, as those are run in a different session # using the nox config from that directory. session.run("py.test", os.path.join("docs", "snippets.py"), *session.posargs) - session.run("py.test", "samples", "--ignore=samples/snippets", *session.posargs) + session.run( + "py.test", + "samples", + "--ignore=samples/snippets", + "--ignore=samples/geography", + *session.posargs, + ) @nox.session(python="3.8") diff --git a/packages/google-cloud-bigquery/samples/geography/__init__.py b/packages/google-cloud-bigquery/samples/geography/__init__.py new file mode 100644 index 000000000000..c6334245aea5 --- /dev/null +++ b/packages/google-cloud-bigquery/samples/geography/__init__.py @@ -0,0 +1,13 @@ +# Copyright 2020 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/packages/google-cloud-bigquery/samples/geography/conftest.py b/packages/google-cloud-bigquery/samples/geography/conftest.py new file mode 100644 index 000000000000..265900f5a264 --- /dev/null +++ b/packages/google-cloud-bigquery/samples/geography/conftest.py @@ -0,0 +1,55 @@ +# Copyright 2020 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import datetime +import uuid + +from google.cloud import bigquery +import pytest + + +def temp_suffix(): + now = datetime.datetime.now() + return f"{now.strftime('%Y%m%d%H%M%S')}_{uuid.uuid4().hex[:8]}" + + +@pytest.fixture(scope="session") +def bigquery_client(): + bigquery_client = bigquery.Client() + return bigquery_client + + +@pytest.fixture(scope="session") +def project_id(bigquery_client): + return bigquery_client.project + + +@pytest.fixture +def dataset_id(bigquery_client): + dataset_id = f"geography_{temp_suffix()}" + bigquery_client.create_dataset(dataset_id) + yield dataset_id + bigquery_client.delete_dataset(dataset_id, delete_contents=True) + + +@pytest.fixture +def table_id(bigquery_client, project_id, dataset_id): + table_id = f"{project_id}.{dataset_id}.geography_{temp_suffix()}" + table = bigquery.Table(table_id) + table.schema = [ + bigquery.SchemaField("geo", bigquery.SqlTypeNames.GEOGRAPHY), + ] + bigquery_client.create_table(table) + yield table_id + bigquery_client.delete_table(table_id) diff --git a/packages/google-cloud-bigquery/samples/geography/insert_geojson.py b/packages/google-cloud-bigquery/samples/geography/insert_geojson.py new file mode 100644 index 000000000000..23f249c15beb --- /dev/null +++ b/packages/google-cloud-bigquery/samples/geography/insert_geojson.py @@ -0,0 +1,49 @@ +# Copyright 2020 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +def insert_geojson(override_values={}): + # [START bigquery_insert_geojson] + import geojson + from google.cloud import bigquery + + bigquery_client = bigquery.Client() + + # This example uses a table containing a column named "geo" with the + # GEOGRAPHY data type. + table_id = "my-project.my_dataset.my_table" + # [END bigquery_insert_geojson] + # To facilitate testing, we replace values with alternatives + # provided by the testing harness. + table_id = override_values.get("table_id", table_id) + # [START bigquery_insert_geojson] + + # Use the python-geojson library to generate GeoJSON of a line from LAX to + # JFK airports. Alternatively, you may define GeoJSON data directly, but it + # must be converted to a string before loading it into BigQuery. + my_geography = geojson.LineString([(-118.4085, 33.9416), (-73.7781, 40.6413)]) + rows = [ + # Convert GeoJSON data into a string. + {"geo": geojson.dumps(my_geography)} + ] + + # table already exists and has a column + # named "geo" with data type GEOGRAPHY. + errors = bigquery_client.insert_rows_json(table_id, rows) + if errors: + raise RuntimeError(f"row insert failed: {errors}") + else: + print(f"wrote 1 row to {table_id}") + # [END bigquery_insert_geojson] + return errors diff --git a/packages/google-cloud-bigquery/samples/geography/insert_geojson_test.py b/packages/google-cloud-bigquery/samples/geography/insert_geojson_test.py new file mode 100644 index 000000000000..5ef15ee13864 --- /dev/null +++ b/packages/google-cloud-bigquery/samples/geography/insert_geojson_test.py @@ -0,0 +1,20 @@ +# Copyright 2020 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from . import insert_geojson + + +def test_insert_geojson(table_id): + errors = insert_geojson.insert_geojson(override_values={"table_id": table_id}) + assert not errors diff --git a/packages/google-cloud-bigquery/samples/geography/insert_wkt.py b/packages/google-cloud-bigquery/samples/geography/insert_wkt.py new file mode 100644 index 000000000000..1f3d575464bf --- /dev/null +++ b/packages/google-cloud-bigquery/samples/geography/insert_wkt.py @@ -0,0 +1,49 @@ +# Copyright 2020 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +def insert_wkt(override_values={}): + # [START bigquery_insert_geography_wkt] + from google.cloud import bigquery + import shapely + import shapely.wkt + + bigquery_client = bigquery.Client() + + # This example uses a table containing a column named "geo" with the + # GEOGRAPHY data type. + table_id = "my-project.my_dataset.my_table" + # [END bigquery_insert_geography_wkt] + # To facilitate testing, we replace values with alternatives + # provided by the testing harness. + table_id = override_values.get("table_id", table_id) + # [START bigquery_insert_geography_wkt] + + # Use the Shapely library to generate WKT of a line from LAX to + # JFK airports. Alternatively, you may define WKT data directly. + my_geography = shapely.LineString([(-118.4085, 33.9416), (-73.7781, 40.6413)]) + rows = [ + # Convert data into a WKT string. + {"geo": shapely.wkt.dumps(my_geography)}, + ] + + # table already exists and has a column + # named "geo" with data type GEOGRAPHY. + errors = bigquery_client.insert_rows_json(table_id, rows) + if errors: + raise RuntimeError(f"row insert failed: {errors}") + else: + print(f"wrote 1 row to {table_id}") + # [END bigquery_insert_geography_wkt] + return errors diff --git a/packages/google-cloud-bigquery/samples/geography/insert_wkt_test.py b/packages/google-cloud-bigquery/samples/geography/insert_wkt_test.py new file mode 100644 index 000000000000..5ef15ee13864 --- /dev/null +++ b/packages/google-cloud-bigquery/samples/geography/insert_wkt_test.py @@ -0,0 +1,20 @@ +# Copyright 2020 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from . import insert_geojson + + +def test_insert_geojson(table_id): + errors = insert_geojson.insert_geojson(override_values={"table_id": table_id}) + assert not errors diff --git a/packages/google-cloud-bigquery/samples/geography/noxfile.py b/packages/google-cloud-bigquery/samples/geography/noxfile.py new file mode 100644 index 000000000000..ab2c49227c3b --- /dev/null +++ b/packages/google-cloud-bigquery/samples/geography/noxfile.py @@ -0,0 +1,246 @@ +# Copyright 2019 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import print_function + +import os +from pathlib import Path +import sys + +import nox + + +# WARNING - WARNING - WARNING - WARNING - WARNING +# WARNING - WARNING - WARNING - WARNING - WARNING +# DO NOT EDIT THIS FILE EVER! +# WARNING - WARNING - WARNING - WARNING - WARNING +# WARNING - WARNING - WARNING - WARNING - WARNING + +# Copy `noxfile_config.py` to your directory and modify it instead. + + +# `TEST_CONFIG` dict is a configuration hook that allows users to +# modify the test configurations. The values here should be in sync +# with `noxfile_config.py`. Users will copy `noxfile_config.py` into +# their directory and modify it. + +TEST_CONFIG = { + # You can opt out from the test for specific Python versions. + "ignored_versions": ["2.7"], + # Old samples are opted out of enforcing Python type hints + # All new samples should feature them + "enforce_type_hints": False, + # An envvar key for determining the project id to use. Change it + # to 'BUILD_SPECIFIC_GCLOUD_PROJECT' if you want to opt in using a + # build specific Cloud project. You can also use your own string + # to use your own Cloud project. + "gcloud_project_env": "GOOGLE_CLOUD_PROJECT", + # 'gcloud_project_env': 'BUILD_SPECIFIC_GCLOUD_PROJECT', + # A dictionary you want to inject into your test. Don't put any + # secrets here. These values will override predefined values. + "envs": {}, +} + + +try: + # Ensure we can import noxfile_config in the project's directory. + sys.path.append(".") + from noxfile_config import TEST_CONFIG_OVERRIDE +except ImportError as e: + print("No user noxfile_config found: detail: {}".format(e)) + TEST_CONFIG_OVERRIDE = {} + +# Update the TEST_CONFIG with the user supplied values. +TEST_CONFIG.update(TEST_CONFIG_OVERRIDE) + + +def get_pytest_env_vars(): + """Returns a dict for pytest invocation.""" + ret = {} + + # Override the GCLOUD_PROJECT and the alias. + env_key = TEST_CONFIG["gcloud_project_env"] + # This should error out if not set. + ret["GOOGLE_CLOUD_PROJECT"] = os.environ[env_key] + + # Apply user supplied envs. + ret.update(TEST_CONFIG["envs"]) + return ret + + +# DO NOT EDIT - automatically generated. +# All versions used to tested samples. +ALL_VERSIONS = ["2.7", "3.6", "3.7", "3.8"] + +# Any default versions that should be ignored. +IGNORED_VERSIONS = TEST_CONFIG["ignored_versions"] + +TESTED_VERSIONS = sorted([v for v in ALL_VERSIONS if v not in IGNORED_VERSIONS]) + +INSTALL_LIBRARY_FROM_SOURCE = bool(os.environ.get("INSTALL_LIBRARY_FROM_SOURCE", False)) +# +# Style Checks +# + + +def _determine_local_import_names(start_dir): + """Determines all import names that should be considered "local". + + This is used when running the linter to insure that import order is + properly checked. + """ + file_ext_pairs = [os.path.splitext(path) for path in os.listdir(start_dir)] + return [ + basename + for basename, extension in file_ext_pairs + if extension == ".py" + or os.path.isdir(os.path.join(start_dir, basename)) + and basename not in ("__pycache__") + ] + + +# Linting with flake8. +# +# We ignore the following rules: +# E203: whitespace before ‘:’ +# E266: too many leading ‘#’ for block comment +# E501: line too long +# I202: Additional newline in a section of imports +# +# We also need to specify the rules which are ignored by default: +# ['E226', 'W504', 'E126', 'E123', 'W503', 'E24', 'E704', 'E121'] +FLAKE8_COMMON_ARGS = [ + "--show-source", + "--builtin=gettext", + "--max-complexity=20", + "--import-order-style=google", + "--exclude=.nox,.cache,env,lib,generated_pb2,*_pb2.py,*_pb2_grpc.py", + "--ignore=E121,E123,E126,E203,E226,E24,E266,E501,E704,W503,W504,I202", + "--max-line-length=88", +] + + +@nox.session +def lint(session): + if not TEST_CONFIG["enforce_type_hints"]: + session.install("flake8", "flake8-import-order") + else: + session.install("flake8", "flake8-import-order", "flake8-annotations") + + local_names = _determine_local_import_names(".") + args = FLAKE8_COMMON_ARGS + [ + "--application-import-names", + ",".join(local_names), + ".", + ] + session.run("flake8", *args) + + +# +# Black +# + + +@nox.session +def blacken(session): + session.install("black") + python_files = [path for path in os.listdir(".") if path.endswith(".py")] + + session.run("black", *python_files) + + +# +# Sample Tests +# + + +PYTEST_COMMON_ARGS = ["--junitxml=sponge_log.xml"] + + +def _session_tests(session, post_install=None): + """Runs py.test for a particular project.""" + if os.path.exists("requirements.txt"): + session.install("-r", "requirements.txt") + + if os.path.exists("requirements-test.txt"): + session.install("-r", "requirements-test.txt") + + if INSTALL_LIBRARY_FROM_SOURCE: + session.install("-e", _get_repo_root()) + + if post_install: + post_install(session) + + session.run( + "pytest", + *(PYTEST_COMMON_ARGS + session.posargs), + # Pytest will return 5 when no tests are collected. This can happen + # on travis where slow and flaky tests are excluded. + # See http://doc.pytest.org/en/latest/_modules/_pytest/main.html + success_codes=[0, 5], + env=get_pytest_env_vars() + ) + + +@nox.session(python=ALL_VERSIONS) +def py(session): + """Runs py.test for a sample using the specified version of Python.""" + if session.python in TESTED_VERSIONS: + _session_tests(session) + else: + session.skip( + "SKIPPED: {} tests are disabled for this sample.".format(session.python) + ) + + +# +# Readmegen +# + + +def _get_repo_root(): + """ Returns the root folder of the project. """ + # Get root of this repository. Assume we don't have directories nested deeper than 10 items. + p = Path(os.getcwd()) + for i in range(10): + if p is None: + break + if Path(p / ".git").exists(): + return str(p) + # .git is not available in repos cloned via Cloud Build + # setup.py is always in the library's root, so use that instead + # https://github.com/googleapis/synthtool/issues/792 + if Path(p / "setup.py").exists(): + return str(p) + p = p.parent + raise Exception("Unable to detect repository root.") + + +GENERATED_READMES = sorted([x for x in Path(".").rglob("*.rst.in")]) + + +@nox.session +@nox.parametrize("path", GENERATED_READMES) +def readmegen(session, path): + """(Re-)generates the readme for a sample.""" + session.install("jinja2", "pyyaml") + dir_ = os.path.dirname(path) + + if os.path.exists(os.path.join(dir_, "requirements.txt")): + session.install("-r", os.path.join(dir_, "requirements.txt")) + + in_file = os.path.join(dir_, "README.rst.in") + session.run( + "python", _get_repo_root() + "/scripts/readme-gen/readme_gen.py", in_file + ) diff --git a/packages/google-cloud-bigquery/samples/geography/noxfile_config.py b/packages/google-cloud-bigquery/samples/geography/noxfile_config.py new file mode 100644 index 000000000000..7d2e02346071 --- /dev/null +++ b/packages/google-cloud-bigquery/samples/geography/noxfile_config.py @@ -0,0 +1,35 @@ +# Copyright 2020 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Default TEST_CONFIG_OVERRIDE for python repos. + +# You can copy this file into your directory, then it will be inported from +# the noxfile.py. + +# The source of truth: +# https://github.com/GoogleCloudPlatform/python-docs-samples/blob/master/noxfile_config.py + +TEST_CONFIG_OVERRIDE = { + # You can opt out from the test for specific Python versions. + "ignored_versions": ["2.7"], + # An envvar key for determining the project id to use. Change it + # to 'BUILD_SPECIFIC_GCLOUD_PROJECT' if you want to opt in using a + # build specific Cloud project. You can also use your own string + # to use your own Cloud project. + "gcloud_project_env": "GOOGLE_CLOUD_PROJECT", + # "gcloud_project_env": "BUILD_SPECIFIC_GCLOUD_PROJECT", + # A dictionary you want to inject into your test. Don't put any + # secrets here. These values will override predefined values. + "envs": {}, +} diff --git a/packages/google-cloud-bigquery/samples/geography/requirements-test.txt b/packages/google-cloud-bigquery/samples/geography/requirements-test.txt new file mode 100644 index 000000000000..676ff949e8ae --- /dev/null +++ b/packages/google-cloud-bigquery/samples/geography/requirements-test.txt @@ -0,0 +1,2 @@ +pytest==5.4.3 +mock==4.0.2 diff --git a/packages/google-cloud-bigquery/samples/geography/requirements.txt b/packages/google-cloud-bigquery/samples/geography/requirements.txt new file mode 100644 index 000000000000..9bd6638d719c --- /dev/null +++ b/packages/google-cloud-bigquery/samples/geography/requirements.txt @@ -0,0 +1,3 @@ +geojson==2.5.0 +google-cloud-bigquery==2.6.0 +Shapely==1.7.1 diff --git a/packages/google-cloud-bigquery/tests/system.py b/packages/google-cloud-bigquery/tests/system.py index d481967d8088..185722e83e16 100644 --- a/packages/google-cloud-bigquery/tests/system.py +++ b/packages/google-cloud-bigquery/tests/system.py @@ -2414,9 +2414,8 @@ def test_querying_data_w_timeout(self): query_job = Config.CLIENT.query( """ - SELECT name, SUM(number) AS total_people - FROM `bigquery-public-data.usa_names.usa_1910_current` - GROUP BY name + SELECT COUNT(*) + FROM UNNEST(GENERATE_ARRAY(1,1000000)), UNNEST(GENERATE_ARRAY(1, 10000)) """, location="US", job_config=job_config, @@ -2427,9 +2426,7 @@ def test_querying_data_w_timeout(self): with self.assertRaises(requests.exceptions.Timeout): query_job.done(timeout=0.1) - # Now wait for the result using a more realistic deadline. - query_job.result(timeout=30) - self.assertTrue(query_job.done(timeout=30)) + Config.CLIENT.cancel_job(query_job.job_id, location=query_job.location) @unittest.skipIf(pandas is None, "Requires `pandas`") def test_query_results_to_dataframe(self): From 42f98726092c3734563e73b8733dba77b70ccca4 Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Wed, 9 Dec 2020 16:57:16 -0600 Subject: [PATCH 1023/2016] docs: fix Shapely import in GEOGRAPHY sample (#431) --- .../google-cloud-bigquery/samples/geography/insert_wkt.py | 6 ++++-- .../samples/geography/insert_wkt_test.py | 6 +++--- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/packages/google-cloud-bigquery/samples/geography/insert_wkt.py b/packages/google-cloud-bigquery/samples/geography/insert_wkt.py index 1f3d575464bf..d7d3accdef2a 100644 --- a/packages/google-cloud-bigquery/samples/geography/insert_wkt.py +++ b/packages/google-cloud-bigquery/samples/geography/insert_wkt.py @@ -16,7 +16,7 @@ def insert_wkt(override_values={}): # [START bigquery_insert_geography_wkt] from google.cloud import bigquery - import shapely + import shapely.geometry import shapely.wkt bigquery_client = bigquery.Client() @@ -32,7 +32,9 @@ def insert_wkt(override_values={}): # Use the Shapely library to generate WKT of a line from LAX to # JFK airports. Alternatively, you may define WKT data directly. - my_geography = shapely.LineString([(-118.4085, 33.9416), (-73.7781, 40.6413)]) + my_geography = shapely.geometry.LineString( + [(-118.4085, 33.9416), (-73.7781, 40.6413)] + ) rows = [ # Convert data into a WKT string. {"geo": shapely.wkt.dumps(my_geography)}, diff --git a/packages/google-cloud-bigquery/samples/geography/insert_wkt_test.py b/packages/google-cloud-bigquery/samples/geography/insert_wkt_test.py index 5ef15ee13864..8bcb62cec1a0 100644 --- a/packages/google-cloud-bigquery/samples/geography/insert_wkt_test.py +++ b/packages/google-cloud-bigquery/samples/geography/insert_wkt_test.py @@ -12,9 +12,9 @@ # See the License for the specific language governing permissions and # limitations under the License. -from . import insert_geojson +from . import insert_wkt -def test_insert_geojson(table_id): - errors = insert_geojson.insert_geojson(override_values={"table_id": table_id}) +def test_insert_wkt(table_id): + errors = insert_wkt.insert_wkt(override_values={"table_id": table_id}) assert not errors From 681266477bca07227f61991b1c6c30a6eeeab927 Mon Sep 17 00:00:00 2001 From: WhiteSource Renovate Date: Fri, 11 Dec 2020 00:04:26 +0100 Subject: [PATCH 1024/2016] chore(deps): update dependency google-cloud-bigquery to v2.6.1 (#430) --- .../google-cloud-bigquery/samples/geography/requirements.txt | 2 +- .../google-cloud-bigquery/samples/snippets/requirements.txt | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/packages/google-cloud-bigquery/samples/geography/requirements.txt b/packages/google-cloud-bigquery/samples/geography/requirements.txt index 9bd6638d719c..3ea0e6e06914 100644 --- a/packages/google-cloud-bigquery/samples/geography/requirements.txt +++ b/packages/google-cloud-bigquery/samples/geography/requirements.txt @@ -1,3 +1,3 @@ geojson==2.5.0 -google-cloud-bigquery==2.6.0 +google-cloud-bigquery==2.6.1 Shapely==1.7.1 diff --git a/packages/google-cloud-bigquery/samples/snippets/requirements.txt b/packages/google-cloud-bigquery/samples/snippets/requirements.txt index 1d3cace2bba5..0b9b69487d67 100644 --- a/packages/google-cloud-bigquery/samples/snippets/requirements.txt +++ b/packages/google-cloud-bigquery/samples/snippets/requirements.txt @@ -1,4 +1,4 @@ -google-cloud-bigquery==2.6.0 +google-cloud-bigquery==2.6.1 google-cloud-bigquery-storage==2.1.0 google-auth-oauthlib==0.4.2 grpcio==1.34.0 From dcfc8f6f6d92c0ae364adf4ba9f14e2dcce0b103 Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Thu, 10 Dec 2020 17:05:08 -0600 Subject: [PATCH 1025/2016] docs: move and refresh view samples (#420) docs: restore old view snippets remove relative imports docs: fix missing space in comment sort imports --- .../samples/snippets/conftest.py | 27 +++ .../samples/snippets/materialized_view.py | 2 +- .../snippets/materialized_view_test.py | 14 +- .../samples/snippets/view.py | 164 ++++++++++++++++++ .../samples/snippets/view_test.py | 117 +++++++++++++ 5 files changed, 311 insertions(+), 13 deletions(-) create mode 100644 packages/google-cloud-bigquery/samples/snippets/conftest.py create mode 100644 packages/google-cloud-bigquery/samples/snippets/view.py create mode 100644 packages/google-cloud-bigquery/samples/snippets/view_test.py diff --git a/packages/google-cloud-bigquery/samples/snippets/conftest.py b/packages/google-cloud-bigquery/samples/snippets/conftest.py new file mode 100644 index 000000000000..d22a3331812d --- /dev/null +++ b/packages/google-cloud-bigquery/samples/snippets/conftest.py @@ -0,0 +1,27 @@ +# Copyright 2020 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from google.cloud import bigquery +import pytest + + +@pytest.fixture(scope="session") +def bigquery_client(): + bigquery_client = bigquery.Client() + return bigquery_client + + +@pytest.fixture(scope="session") +def project_id(bigquery_client): + return bigquery_client.project diff --git a/packages/google-cloud-bigquery/samples/snippets/materialized_view.py b/packages/google-cloud-bigquery/samples/snippets/materialized_view.py index d925ec23033e..429bd98b4fa1 100644 --- a/packages/google-cloud-bigquery/samples/snippets/materialized_view.py +++ b/packages/google-cloud-bigquery/samples/snippets/materialized_view.py @@ -25,7 +25,7 @@ def create_materialized_view(override_values={}): # To facilitate testing, we replace values with alternatives # provided by the testing harness. view_id = override_values.get("view_id", view_id) - base_table_id = override_values.get("base_table_id", view_id) + base_table_id = override_values.get("base_table_id", base_table_id) # [START bigquery_create_materialized_view] view = bigquery.Table(view_id) view.mview_query = f""" diff --git a/packages/google-cloud-bigquery/samples/snippets/materialized_view_test.py b/packages/google-cloud-bigquery/samples/snippets/materialized_view_test.py index fc3db533ca97..75c6b2106f39 100644 --- a/packages/google-cloud-bigquery/samples/snippets/materialized_view_test.py +++ b/packages/google-cloud-bigquery/samples/snippets/materialized_view_test.py @@ -23,13 +23,8 @@ def temp_suffix(): - return str(uuid.uuid4()).replace("-", "_") - - -@pytest.fixture(scope="module") -def bigquery_client(): - bigquery_client = bigquery.Client() - return bigquery_client + now = datetime.datetime.now() + return f"{now.strftime('%Y%m%d%H%M%S')}_{uuid.uuid4().hex[:8]}" @pytest.fixture(autouse=True) @@ -37,11 +32,6 @@ def bigquery_client_patch(monkeypatch, bigquery_client): monkeypatch.setattr(bigquery, "Client", lambda: bigquery_client) -@pytest.fixture(scope="module") -def project_id(bigquery_client): - return bigquery_client.project - - @pytest.fixture(scope="module") def dataset_id(bigquery_client): dataset_id = f"mvdataset_{temp_suffix()}" diff --git a/packages/google-cloud-bigquery/samples/snippets/view.py b/packages/google-cloud-bigquery/samples/snippets/view.py new file mode 100644 index 000000000000..ad3f117177cb --- /dev/null +++ b/packages/google-cloud-bigquery/samples/snippets/view.py @@ -0,0 +1,164 @@ +# Copyright 2020 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +def create_view(override_values={}): + # [START bigquery_create_view] + from google.cloud import bigquery + + client = bigquery.Client() + + view_id = "my-project.my_dataset.my_view" + source_id = "my-project.my_dataset.my_table" + # [END bigquery_create_view] + # To facilitate testing, we replace values with alternatives + # provided by the testing harness. + view_id = override_values.get("view_id", view_id) + source_id = override_values.get("source_id", source_id) + # [START bigquery_create_view] + view = bigquery.Table(view_id) + + # The source table in this example is created from a CSV file in Google + # Cloud Storage located at + # `gs://cloud-samples-data/bigquery/us-states/us-states.csv`. It contains + # 50 US states, while the view returns only those states with names + # starting with the letter 'W'. + view.view_query = f"SELECT name, post_abbr FROM `{source_id}` WHERE name LIKE 'W%'" + + # Make an API request to create the view. + view = client.create_table(view) + print(f"Created {view.table_type}: {str(view.reference)}") + # [END bigquery_create_view] + return view + + +def get_view(override_values={}): + # [START bigquery_get_view] + from google.cloud import bigquery + + client = bigquery.Client() + + view_id = "my-project.my_dataset.my_view" + # [END bigquery_get_view] + # To facilitate testing, we replace values with alternatives + # provided by the testing harness. + view_id = override_values.get("view_id", view_id) + # [START bigquery_get_view] + # Make an API request to get the table resource. + view = client.get_table(view_id) + + # Display view properties + print(f"Retrieved {view.table_type}: {str(view.reference)}") + print(f"View Query:\n{view.view_query}") + # [END bigquery_get_view] + return view + + +def update_view(override_values={}): + # [START bigquery_update_view_query] + from google.cloud import bigquery + + client = bigquery.Client() + + view_id = "my-project.my_dataset.my_view" + source_id = "my-project.my_dataset.my_table" + # [END bigquery_update_view_query] + # To facilitate testing, we replace values with alternatives + # provided by the testing harness. + view_id = override_values.get("view_id", view_id) + source_id = override_values.get("source_id", source_id) + # [START bigquery_update_view_query] + view = bigquery.Table(view_id) + + # The source table in this example is created from a CSV file in Google + # Cloud Storage located at + # `gs://cloud-samples-data/bigquery/us-states/us-states.csv`. It contains + # 50 US states, while the view returns only those states with names + # starting with the letter 'M'. + view.view_query = f"SELECT name, post_abbr FROM `{source_id}` WHERE name LIKE 'M%'" + + # Make an API request to update the query property of the view. + view = client.update_table(view, ["view_query"]) + print(f"Updated {view.table_type}: {str(view.reference)}") + # [END bigquery_update_view_query] + return view + + +def grant_access(override_values={}): + # [START bigquery_grant_view_access] + from google.cloud import bigquery + + client = bigquery.Client() + + # To use a view, the analyst requires ACLs to both the view and the source + # table. Create an authorized view to allow an analyst to use a view + # without direct access permissions to the source table. + view_dataset_id = "my-project.my_view_dataset" + # [END bigquery_grant_view_access] + # To facilitate testing, we replace values with alternatives + # provided by the testing harness. + view_dataset_id = override_values.get("view_dataset_id", view_dataset_id) + # [START bigquery_grant_view_access] + # Make an API request to get the view dataset ACLs. + view_dataset = client.get_dataset(view_dataset_id) + + analyst_group_email = "data_analysts@example.com" + # [END bigquery_grant_view_access] + # To facilitate testing, we replace values with alternatives + # provided by the testing harness. + analyst_group_email = override_values.get( + "analyst_group_email", analyst_group_email + ) + # [START bigquery_grant_view_access] + access_entries = view_dataset.access_entries + access_entries.append( + bigquery.AccessEntry("READER", "groupByEmail", analyst_group_email) + ) + view_dataset.access_entries = access_entries + + # Make an API request to update the ACLs property of the view dataset. + view_dataset = client.update_dataset(view_dataset, ["access_entries"]) + print(f"Access to view: {view_dataset.access_entries}") + + # Group members of "data_analysts@example.com" now have access to the view, + # but they require access to the source table to use it. To remove this + # restriction, authorize the view to access the source dataset. + source_dataset_id = "my-project.my_source_dataset" + # [END bigquery_grant_view_access] + # To facilitate testing, we replace values with alternatives + # provided by the testing harness. + source_dataset_id = override_values.get("source_dataset_id", source_dataset_id) + # [START bigquery_grant_view_access] + # Make an API request to set the source dataset ACLs. + source_dataset = client.get_dataset(source_dataset_id) + + view_reference = { + "projectId": "my-project", + "datasetId": "my_view_dataset", + "tableId": "my_authorized_view", + } + # [END bigquery_grant_view_access] + # To facilitate testing, we replace values with alternatives + # provided by the testing harness. + view_reference = override_values.get("view_reference", view_reference) + # [START bigquery_grant_view_access] + access_entries = source_dataset.access_entries + access_entries.append(bigquery.AccessEntry(None, "view", view_reference)) + source_dataset.access_entries = access_entries + + # Make an API request to update the ACLs property of the source dataset. + source_dataset = client.update_dataset(source_dataset, ["access_entries"]) + print(f"Access to source: {source_dataset.access_entries}") + # [END bigquery_grant_view_access] + return view_dataset, source_dataset diff --git a/packages/google-cloud-bigquery/samples/snippets/view_test.py b/packages/google-cloud-bigquery/samples/snippets/view_test.py new file mode 100644 index 000000000000..77105b61ae6f --- /dev/null +++ b/packages/google-cloud-bigquery/samples/snippets/view_test.py @@ -0,0 +1,117 @@ +# Copyright 2020 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import datetime +import uuid + +from google.cloud import bigquery +import pytest + +import view + + +def temp_suffix(): + now = datetime.datetime.now() + return f"{now.strftime('%Y%m%d%H%M%S')}_{uuid.uuid4().hex[:8]}" + + +@pytest.fixture(autouse=True) +def bigquery_client_patch(monkeypatch, bigquery_client): + monkeypatch.setattr(bigquery, "Client", lambda: bigquery_client) + + +@pytest.fixture(scope="module") +def view_dataset_id(bigquery_client, project_id): + dataset_id = f"{project_id}.view_{temp_suffix()}" + bigquery_client.create_dataset(dataset_id) + yield dataset_id + bigquery_client.delete_dataset(dataset_id, delete_contents=True) + + +@pytest.fixture(scope="module") +def view_id(bigquery_client, view_dataset_id): + view_id = f"{view_dataset_id}.my_view" + yield view_id + bigquery_client.delete_table(view_id, not_found_ok=True) + + +@pytest.fixture(scope="module") +def source_dataset_id(bigquery_client, project_id): + dataset_id = f"{project_id}.view_{temp_suffix()}" + bigquery_client.create_dataset(dataset_id) + yield dataset_id + bigquery_client.delete_dataset(dataset_id, delete_contents=True) + + +@pytest.fixture(scope="module") +def source_table_id(bigquery_client, source_dataset_id): + source_table_id = f"{source_dataset_id}.us_states" + job_config = bigquery.LoadJobConfig( + schema=[ + bigquery.SchemaField("name", "STRING"), + bigquery.SchemaField("post_abbr", "STRING"), + ], + skip_leading_rows=1, + ) + load_job = bigquery_client.load_table_from_uri( + "gs://cloud-samples-data/bigquery/us-states/us-states.csv", + source_table_id, + job_config=job_config, + ) + load_job.result() + yield source_table_id + bigquery_client.delete_table(source_table_id, not_found_ok=True) + + +def test_view(capsys, view_id, view_dataset_id, source_table_id, source_dataset_id): + override_values = { + "view_id": view_id, + "source_id": source_table_id, + } + got = view.create_view(override_values) + assert source_table_id in got.view_query + out, _ = capsys.readouterr() + assert view_id in out + + got = view.get_view(override_values) + assert source_table_id in got.view_query + assert "'W%'" in got.view_query + out, _ = capsys.readouterr() + assert view_id in out + assert source_table_id in out + assert "'W%'" in out + + got = view.update_view(override_values) + assert source_table_id in got.view_query + assert "'M%'" in got.view_query + out, _ = capsys.readouterr() + assert view_id in out + + project_id, dataset_id, table_id = view_id.split(".") + override_values = { + "analyst_group_email": "cloud-dpes-bigquery@google.com", + "view_dataset_id": view_dataset_id, + "source_dataset_id": source_dataset_id, + "view_reference": { + "projectId": project_id, + "datasetId": dataset_id, + "tableId": table_id, + }, + } + view_dataset, source_dataset = view.grant_access(override_values) + assert len(view_dataset.access_entries) != 0 + assert len(source_dataset.access_entries) != 0 + out, _ = capsys.readouterr() + assert "cloud-dpes-bigquery@google.com" in out + assert table_id in out From b6598a15b96181adac05e830afc13105209a4f99 Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Tue, 22 Dec 2020 16:20:48 -0600 Subject: [PATCH 1026/2016] test: add session to test with nightly dependencies (#449) This should catch errors introduced in the next versions of dependency packages. --- .../.kokoro/presubmit/prerelease-deps-3.8.cfg | 7 ++++ packages/google-cloud-bigquery/noxfile.py | 32 +++++++++++++++++++ 2 files changed, 39 insertions(+) create mode 100644 packages/google-cloud-bigquery/.kokoro/presubmit/prerelease-deps-3.8.cfg diff --git a/packages/google-cloud-bigquery/.kokoro/presubmit/prerelease-deps-3.8.cfg b/packages/google-cloud-bigquery/.kokoro/presubmit/prerelease-deps-3.8.cfg new file mode 100644 index 000000000000..f06806baf188 --- /dev/null +++ b/packages/google-cloud-bigquery/.kokoro/presubmit/prerelease-deps-3.8.cfg @@ -0,0 +1,7 @@ +# Format: //devtools/kokoro/config/proto/build.proto + +# Only run this nox session. +env_vars: { + key: "NOX_SESSION" + value: "prerelease_deps" +} \ No newline at end of file diff --git a/packages/google-cloud-bigquery/noxfile.py b/packages/google-cloud-bigquery/noxfile.py index 8523eabb552f..f3326d01bade 100644 --- a/packages/google-cloud-bigquery/noxfile.py +++ b/packages/google-cloud-bigquery/noxfile.py @@ -168,6 +168,38 @@ def cover(session): session.run("coverage", "erase") +@nox.session(python="3.8") +def prerelease_deps(session): + """Run all tests with prerelease versions of dependencies installed. + + https://github.com/googleapis/python-bigquery/issues/95 + """ + # PyArrow prerelease packages are published to an alternative PyPI host. + # https://arrow.apache.org/docs/python/install.html#installing-nightly-packages + session.install( + "--extra-index-url", "https://pypi.fury.io/arrow-nightlies/", "--pre", "pyarrow" + ) + session.install("--pre", "grpcio", "pandas") + session.install( + "mock", + "pytest", + "google-cloud-testutils", + "pytest-cov", + "freezegun", + "IPython", + ) + session.install("-e", ".[all]") + + # Print out prerelease package versions. + session.run("python", "-c", "import grpc; print(grpc.__version__)") + session.run("python", "-c", "import pandas; print(pandas.__version__)") + session.run("python", "-c", "import pyarrow; print(pyarrow.__version__)") + + # Run all tests, except a few samples tests which require extra dependencies. + session.run("py.test", "tests") + session.run("py.test", "samples/tests") + + @nox.session(python="3.8") def lint(session): """Run linters. From 054f9e4f4ca13d74e8d77277ff7558141e58908a Mon Sep 17 00:00:00 2001 From: Bu Sun Kim <8822365+busunkim96@users.noreply.github.com> Date: Tue, 5 Jan 2021 15:37:44 -0700 Subject: [PATCH 1027/2016] chore: add constraints file (#456) * chore: add constraints file * chore: add constraints file * chore: add constraints file * chore: add constraints file --- .../testing/constraints-3.10.txt | 0 .../testing/constraints-3.11.txt | 0 .../testing/constraints-3.6.txt | 33 ++++++++++++++----- .../testing/constraints-3.9.txt | 0 4 files changed, 24 insertions(+), 9 deletions(-) create mode 100644 packages/google-cloud-bigquery/testing/constraints-3.10.txt create mode 100644 packages/google-cloud-bigquery/testing/constraints-3.11.txt create mode 100644 packages/google-cloud-bigquery/testing/constraints-3.9.txt diff --git a/packages/google-cloud-bigquery/testing/constraints-3.10.txt b/packages/google-cloud-bigquery/testing/constraints-3.10.txt new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/packages/google-cloud-bigquery/testing/constraints-3.11.txt b/packages/google-cloud-bigquery/testing/constraints-3.11.txt new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/packages/google-cloud-bigquery/testing/constraints-3.6.txt b/packages/google-cloud-bigquery/testing/constraints-3.6.txt index 91a507a5c2ce..fe2bcfda78fe 100644 --- a/packages/google-cloud-bigquery/testing/constraints-3.6.txt +++ b/packages/google-cloud-bigquery/testing/constraints-3.6.txt @@ -1,16 +1,31 @@ +# This constraints file is used to check that lower bounds +# are correct in setup.py +# List *all* library dependencies and extras in this file. +# Pin the version to the lower bound. +# +# e.g., if setup.py has "foo >= 1.14.0, < 2.0.0dev", +# Then this file should have foo==1.14.0 google-api-core==1.23.0 -google-cloud-bigquery-storage==2.0.0 +proto-plus==1.10.0 google-cloud-core==1.4.1 google-resumable-media==0.6.0 +six==1.13.0 +protobuf==3.12.0 +google-cloud-bigquery-storage==2.0.0 grpcio==1.32.0 -ipython==5.5 -libcst==0.2.5 -llvmlite==0.34.0 -# pandas 0.23.0 is the first version to work with pyarrow to_pandas. +pyarrow==1.0.0 pandas==0.23.0 -protobuf == 3.12.0 -proto-plus==1.10.0 pyarrow==1.0.0 -python-snappy==0.5.4 -six==1.13.0 tqdm==4.7.4 +opentelemetry-api==0.11b0 +opentelemetry-sdk==0.11b0 +opentelemetry-instrumentation==0.11b0 +google-cloud-bigquery-storage==2.0.0 +grpcio==1.32.0 +pyarrow==1.0.0 +opentelemetry-api==0.11b0 +opentelemetry-sdk==0.11b0 +opentelemetry-instrumentation==0.11b0 +pandas==0.23.0 +pyarrow==1.0.0 +tqdm==4.7.4 \ No newline at end of file diff --git a/packages/google-cloud-bigquery/testing/constraints-3.9.txt b/packages/google-cloud-bigquery/testing/constraints-3.9.txt new file mode 100644 index 000000000000..e69de29bb2d1 From 85d26a5c5cb27bad424d06e2b8c335e819d45411 Mon Sep 17 00:00:00 2001 From: WhiteSource Renovate Date: Thu, 7 Jan 2021 00:06:40 +0100 Subject: [PATCH 1028/2016] chore(deps): update dependency pytz to v2020.5 (#452) --- .../google-cloud-bigquery/samples/snippets/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/samples/snippets/requirements.txt b/packages/google-cloud-bigquery/samples/snippets/requirements.txt index 0b9b69487d67..5cda3421480f 100644 --- a/packages/google-cloud-bigquery/samples/snippets/requirements.txt +++ b/packages/google-cloud-bigquery/samples/snippets/requirements.txt @@ -7,4 +7,4 @@ ipython==7.17.0; python_version >= '3.7' matplotlib==3.3.3 pandas==1.1.5 pyarrow==2.0.0 -pytz==2020.4 +pytz==2020.5 From 91cd515b24263ab3af3f2bad39f5f102a0a6ca8e Mon Sep 17 00:00:00 2001 From: WhiteSource Renovate Date: Thu, 7 Jan 2021 00:26:11 +0100 Subject: [PATCH 1029/2016] chore(deps): update dependency pandas to v1.2.0 (#454) [![WhiteSource Renovate](https://app.renovatebot.com/images/banner.svg)](https://renovatebot.com) This PR contains the following updates: | Package | Update | Change | |---|---|---| | [pandas](https://pandas.pydata.org) ([source](https://togithub.com/pandas-dev/pandas)) | minor | `==1.1.5` -> `==1.2.0` | --- ### Release Notes
pandas-dev/pandas ### [`v1.2.0`](https://togithub.com/pandas-dev/pandas/releases/v1.2.0) [Compare Source](https://togithub.com/pandas-dev/pandas/compare/v1.1.5...v1.2.0) This release includes some new features, bug fixes, and performance improvements. We recommend that all users upgrade to this version. See the [full whatsnew](https://pandas.pydata.org/pandas-docs/version/1.2.0/whatsnew/v1.2.0.html) for a list of all the changes. The release will be available on the defaults and conda-forge channels: conda install -c conda-forge pandas Or via PyPI: python3 -m pip install --upgrade pandas Please report any issues with the release on the [pandas issue tracker](https://togithub.com/pandas-dev/pandas/issues).
--- ### Renovate configuration :date: **Schedule**: At any time (no schedule defined). :vertical_traffic_light: **Automerge**: Disabled by config. Please merge this manually once you are satisfied. :recycle: **Rebasing**: Whenever PR becomes conflicted, or you tick the rebase/retry checkbox. :no_bell: **Ignore**: Close this PR and you won't be reminded about this update again. --- - [ ] If you want to rebase/retry this PR, check this box --- This PR has been generated by [WhiteSource Renovate](https://renovate.whitesourcesoftware.com). View repository job log [here](https://app.renovatebot.com/dashboard#github/googleapis/python-bigquery). --- .../google-cloud-bigquery/samples/snippets/requirements.txt | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/samples/snippets/requirements.txt b/packages/google-cloud-bigquery/samples/snippets/requirements.txt index 5cda3421480f..208eb4526658 100644 --- a/packages/google-cloud-bigquery/samples/snippets/requirements.txt +++ b/packages/google-cloud-bigquery/samples/snippets/requirements.txt @@ -5,6 +5,7 @@ grpcio==1.34.0 ipython==7.16.1; python_version < '3.7' ipython==7.17.0; python_version >= '3.7' matplotlib==3.3.3 -pandas==1.1.5 +pandas==1.1.5; python_version < '3.7' +pandas==1.2.0; python_version >= '3.7' pyarrow==2.0.0 pytz==2020.5 From bde77d4ed55c9e0fe316aa8d2ad96ac231a4c60f Mon Sep 17 00:00:00 2001 From: Bu Sun Kim <8822365+busunkim96@users.noreply.github.com> Date: Fri, 8 Jan 2021 13:29:22 -0700 Subject: [PATCH 1030/2016] ci: use python3 instead of python3.6 in build.sh (#425) * ci: skip docfx in main 'Kokoro' presubmit * fix: specify default sessions in noxfile * fix: use python3 instead of 3.6 * fix: add NOX_SESSION to pass down envvars * fix: remove quotes arround sessions Co-authored-by: Tim Swast --- packages/google-cloud-bigquery/.kokoro/build.sh | 10 +++++----- packages/google-cloud-bigquery/.trampolinerc | 2 ++ 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/packages/google-cloud-bigquery/.kokoro/build.sh b/packages/google-cloud-bigquery/.kokoro/build.sh index cb81a05f883e..058f363e14c0 100755 --- a/packages/google-cloud-bigquery/.kokoro/build.sh +++ b/packages/google-cloud-bigquery/.kokoro/build.sh @@ -34,16 +34,16 @@ export GOOGLE_APPLICATION_CREDENTIALS=${KOKORO_GFILE_DIR}/service-account.json export PROJECT_ID=$(cat "${KOKORO_GFILE_DIR}/project-id.json") # Remove old nox -python3.6 -m pip uninstall --yes --quiet nox-automation +python3 -m pip uninstall --yes --quiet nox-automation # Install nox -python3.6 -m pip install --upgrade --quiet nox -python3.6 -m nox --version +python3 -m pip install --upgrade --quiet nox +python3 -m nox --version # If NOX_SESSION is set, it only runs the specified session, # otherwise run all the sessions. if [[ -n "${NOX_SESSION:-}" ]]; then - python3.6 -m nox -s "${NOX_SESSION:-}" + python3 -m nox -s ${NOX_SESSION:-} else - python3.6 -m nox + python3 -m nox fi diff --git a/packages/google-cloud-bigquery/.trampolinerc b/packages/google-cloud-bigquery/.trampolinerc index 995ee29111e1..c7d663ae9c57 100644 --- a/packages/google-cloud-bigquery/.trampolinerc +++ b/packages/google-cloud-bigquery/.trampolinerc @@ -18,12 +18,14 @@ required_envvars+=( "STAGING_BUCKET" "V2_STAGING_BUCKET" + "NOX_SESSION" ) # Add env vars which are passed down into the container here. pass_down_envvars+=( "STAGING_BUCKET" "V2_STAGING_BUCKET" + "NOX_SESSION" ) # Prevent unintentional override on the default image. From 91c6990102e6cdbfc6b9bd5f8543d5002851fc8d Mon Sep 17 00:00:00 2001 From: Peter Lamut Date: Fri, 8 Jan 2021 22:20:21 +0100 Subject: [PATCH 1031/2016] chore: remove six dependency (#461) * chore: remove six dependency * Remove now-redundant self argument --- .../google/cloud/bigquery/_helpers.py | 3 +- .../google/cloud/bigquery/_pandas_helpers.py | 5 +- .../google/cloud/bigquery/client.py | 5 +- .../google/cloud/bigquery/dataset.py | 15 ++-- .../google/cloud/bigquery/dbapi/_helpers.py | 10 ++- .../google/cloud/bigquery/dbapi/cursor.py | 4 +- .../google/cloud/bigquery/enums.py | 4 +- .../google/cloud/bigquery/job/base.py | 40 +++++------ .../google/cloud/bigquery/job/query.py | 5 +- .../google/cloud/bigquery/magics/magics.py | 12 ++-- .../google/cloud/bigquery/model.py | 5 +- .../google/cloud/bigquery/routine.py | 3 +- .../google/cloud/bigquery/schema.py | 4 +- .../google/cloud/bigquery/table.py | 18 +++-- .../samples/load_table_uri_truncate_avro.py | 4 +- .../samples/load_table_uri_truncate_csv.py | 4 +- .../samples/load_table_uri_truncate_json.py | 4 +- .../samples/load_table_uri_truncate_orc.py | 4 +- .../load_table_uri_truncate_parquet.py | 4 +- .../tests/test_copy_table_multiple_source.py | 4 +- packages/google-cloud-bigquery/setup.py | 1 - .../google-cloud-bigquery/tests/system.py | 26 ++++--- .../tests/unit/job/test_base.py | 6 +- .../tests/unit/job/test_query.py | 6 +- .../tests/unit/test__helpers.py | 5 +- .../tests/unit/test__http.py | 12 ++-- .../tests/unit/test_client.py | 72 +++++++++---------- .../tests/unit/test_dbapi__helpers.py | 8 +-- .../tests/unit/test_dbapi_connection.py | 5 +- .../tests/unit/test_dbapi_cursor.py | 7 +- .../tests/unit/test_opentelemetry_tracing.py | 6 +- .../tests/unit/test_table.py | 27 ++----- 32 files changed, 150 insertions(+), 188 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py b/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py index 100136108e90..6b66a3020aca 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py @@ -18,7 +18,6 @@ import datetime import decimal import re -import six from google.cloud._helpers import UTC from google.cloud._helpers import _date_from_iso8601_date @@ -451,7 +450,7 @@ def _record_field_to_json(fields, row_value): for field_name in not_processed: value = row_value[field_name] if value is not None: - record[field_name] = six.text_type(value) + record[field_name] = str(value) return record diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/_pandas_helpers.py b/packages/google-cloud-bigquery/google/cloud/bigquery/_pandas_helpers.py index 7774ce26bd8d..162c58b4b500 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/_pandas_helpers.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/_pandas_helpers.py @@ -17,10 +17,9 @@ import concurrent.futures import functools import logging +import queue import warnings -import six -from six.moves import queue try: import pandas @@ -738,7 +737,7 @@ def download_dataframe_bqstorage( def dataframe_to_json_generator(dataframe): for row in dataframe.itertuples(index=False, name=None): output = {} - for column, value in six.moves.zip(dataframe.columns, row): + for column, value in zip(dataframe.columns, row): # Omit NaN values. if value != value: continue diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py index 28cac64adfc5..19693c9ff2ee 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py @@ -34,7 +34,6 @@ import pyarrow except ImportError: # pragma: NO COVER pyarrow = None -import six from google import resumable_media from google.resumable_media.requests import MultipartUpload @@ -2017,7 +2016,7 @@ def load_table_from_uri( job_ref = job._JobReference(job_id, project=project, location=location) - if isinstance(source_uris, six.string_types): + if isinstance(source_uris, str): source_uris = [source_uris] destination = _table_arg_to_table_ref(destination, default_project=self.project) @@ -2779,7 +2778,7 @@ def extract_table( ) ) - if isinstance(destination_uris, six.string_types): + if isinstance(destination_uris, str): destination_uris = [destination_uris] if job_config: diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/dataset.py b/packages/google-cloud-bigquery/google/cloud/bigquery/dataset.py index ce07c8048558..2d3a4755f260 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/dataset.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/dataset.py @@ -16,7 +16,6 @@ from __future__ import absolute_import -import six import copy import google.cloud._helpers @@ -260,9 +259,9 @@ class DatasetReference(object): """ def __init__(self, project, dataset_id): - if not isinstance(project, six.string_types): + if not isinstance(project, str): raise ValueError("Pass a string for project") - if not isinstance(dataset_id, six.string_types): + if not isinstance(dataset_id, str): raise ValueError("Pass a string for dataset_id") self._project = project self._dataset_id = dataset_id @@ -407,7 +406,7 @@ class Dataset(object): } def __init__(self, dataset_ref): - if isinstance(dataset_ref, six.string_types): + if isinstance(dataset_ref, str): dataset_ref = DatasetReference.from_string(dataset_ref) self._properties = {"datasetReference": dataset_ref.to_api_repr(), "labels": {}} @@ -544,7 +543,7 @@ def default_table_expiration_ms(self): @default_table_expiration_ms.setter def default_table_expiration_ms(self, value): - if not isinstance(value, six.integer_types) and value is not None: + if not isinstance(value, int) and value is not None: raise ValueError("Pass an integer, or None") self._properties["defaultTableExpirationMs"] = _helpers._str_or_none(value) @@ -560,7 +559,7 @@ def description(self): @description.setter def description(self, value): - if not isinstance(value, six.string_types) and value is not None: + if not isinstance(value, str) and value is not None: raise ValueError("Pass a string, or None") self._properties["description"] = value @@ -576,7 +575,7 @@ def friendly_name(self): @friendly_name.setter def friendly_name(self, value): - if not isinstance(value, six.string_types) and value is not None: + if not isinstance(value, str) and value is not None: raise ValueError("Pass a string, or None") self._properties["friendlyName"] = value @@ -592,7 +591,7 @@ def location(self): @location.setter def location(self, value): - if not isinstance(value, six.string_types) and value is not None: + if not isinstance(value, str) and value is not None: raise ValueError("Pass a string, or None") self._properties["location"] = value diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/dbapi/_helpers.py b/packages/google-cloud-bigquery/google/cloud/bigquery/dbapi/_helpers.py index fdf4e17c30c1..95b5869e509a 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/dbapi/_helpers.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/dbapi/_helpers.py @@ -19,8 +19,6 @@ import functools import numbers -import six - from google.cloud import bigquery from google.cloud.bigquery import table from google.cloud.bigquery.dbapi import exceptions @@ -132,7 +130,7 @@ def to_query_parameters_dict(parameters): """ result = [] - for name, value in six.iteritems(parameters): + for name, value in parameters.items(): if isinstance(value, collections_abc.Mapping): raise NotImplementedError( "STRUCT-like parameter values are not supported " @@ -187,9 +185,9 @@ def bigquery_scalar_type(value): return "FLOAT64" elif isinstance(value, decimal.Decimal): return "NUMERIC" - elif isinstance(value, six.text_type): + elif isinstance(value, str): return "STRING" - elif isinstance(value, six.binary_type): + elif isinstance(value, bytes): return "BYTES" elif isinstance(value, datetime.datetime): return "DATETIME" if value.tzinfo is None else "TIMESTAMP" @@ -215,7 +213,7 @@ def array_like(value): bool: ``True`` if the value is considered array-like, ``False`` otherwise. """ return isinstance(value, collections_abc.Sequence) and not isinstance( - value, (six.text_type, six.binary_type, bytearray) + value, (str, bytes, bytearray) ) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/dbapi/cursor.py b/packages/google-cloud-bigquery/google/cloud/bigquery/dbapi/cursor.py index f48b47c1240d..e90bcc2c0d6a 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/dbapi/cursor.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/dbapi/cursor.py @@ -19,8 +19,6 @@ import copy import logging -import six - from google.cloud.bigquery import job from google.cloud.bigquery.dbapi import _helpers from google.cloud.bigquery.dbapi import exceptions @@ -289,7 +287,7 @@ def fetchone(self): """ self._try_fetch() try: - return six.next(self._query_data) + return next(self._query_data) except StopIteration: return None diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/enums.py b/packages/google-cloud-bigquery/google/cloud/bigquery/enums.py index 3f72333afbeb..2268808fdb5f 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/enums.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/enums.py @@ -15,7 +15,7 @@ import re import enum -import six +import itertools from google.cloud.bigquery_v2 import types as gapic_types @@ -178,7 +178,7 @@ def _make_sql_scalars_enum(): ) new_doc = "\n".join( - six.moves.filterfalse(skip_pattern.search, orig_doc.splitlines()) + itertools.filterfalse(skip_pattern.search, orig_doc.splitlines()) ) new_enum.__doc__ = "An Enum of scalar SQL types.\n" + new_doc diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/job/base.py b/packages/google-cloud-bigquery/google/cloud/bigquery/job/base.py index 2f4ae1460a46..3c601f072592 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/job/base.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/job/base.py @@ -15,11 +15,11 @@ """Base classes and helpers for job classes.""" import copy +import http import threading from google.api_core import exceptions import google.api_core.future.polling -from six.moves import http_client from google.cloud.bigquery import _helpers from google.cloud.bigquery.retry import DEFAULT_RETRY @@ -28,24 +28,24 @@ _DONE_STATE = "DONE" _STOPPED_REASON = "stopped" _ERROR_REASON_TO_EXCEPTION = { - "accessDenied": http_client.FORBIDDEN, - "backendError": http_client.INTERNAL_SERVER_ERROR, - "billingNotEnabled": http_client.FORBIDDEN, - "billingTierLimitExceeded": http_client.BAD_REQUEST, - "blocked": http_client.FORBIDDEN, - "duplicate": http_client.CONFLICT, - "internalError": http_client.INTERNAL_SERVER_ERROR, - "invalid": http_client.BAD_REQUEST, - "invalidQuery": http_client.BAD_REQUEST, - "notFound": http_client.NOT_FOUND, - "notImplemented": http_client.NOT_IMPLEMENTED, - "quotaExceeded": http_client.FORBIDDEN, - "rateLimitExceeded": http_client.FORBIDDEN, - "resourceInUse": http_client.BAD_REQUEST, - "resourcesExceeded": http_client.BAD_REQUEST, - "responseTooLarge": http_client.FORBIDDEN, - "stopped": http_client.OK, - "tableUnavailable": http_client.BAD_REQUEST, + "accessDenied": http.client.FORBIDDEN, + "backendError": http.client.INTERNAL_SERVER_ERROR, + "billingNotEnabled": http.client.FORBIDDEN, + "billingTierLimitExceeded": http.client.BAD_REQUEST, + "blocked": http.client.FORBIDDEN, + "duplicate": http.client.CONFLICT, + "internalError": http.client.INTERNAL_SERVER_ERROR, + "invalid": http.client.BAD_REQUEST, + "invalidQuery": http.client.BAD_REQUEST, + "notFound": http.client.NOT_FOUND, + "notImplemented": http.client.NOT_IMPLEMENTED, + "quotaExceeded": http.client.FORBIDDEN, + "rateLimitExceeded": http.client.FORBIDDEN, + "resourceInUse": http.client.BAD_REQUEST, + "resourcesExceeded": http.client.BAD_REQUEST, + "responseTooLarge": http.client.FORBIDDEN, + "stopped": http.client.OK, + "tableUnavailable": http.client.BAD_REQUEST, } @@ -66,7 +66,7 @@ def _error_result_to_exception(error_result): """ reason = error_result.get("reason") status_code = _ERROR_REASON_TO_EXCEPTION.get( - reason, http_client.INTERNAL_SERVER_ERROR + reason, http.client.INTERNAL_SERVER_ERROR ) return exceptions.from_http_status( status_code, error_result.get("message", ""), errors=[error_result] diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/job/query.py b/packages/google-cloud-bigquery/google/cloud/bigquery/job/query.py index 9e890861376e..d87f87f52835 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/job/query.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/job/query.py @@ -20,7 +20,6 @@ from google.api_core import exceptions import requests -import six from google.cloud.bigquery.dataset import Dataset from google.cloud.bigquery.dataset import DatasetListItem @@ -192,7 +191,7 @@ def default_dataset(self, value): self._set_sub_prop("defaultDataset", None) return - if isinstance(value, six.string_types): + if isinstance(value, str): value = DatasetReference.from_string(value) if isinstance(value, (Dataset, DatasetListItem)): @@ -1168,7 +1167,7 @@ def result( exc.query_job = self raise except requests.exceptions.Timeout as exc: - six.raise_from(concurrent.futures.TimeoutError, exc) + raise concurrent.futures.TimeoutError from exc # If the query job is complete but there are no query results, this was # special job, such as a DDL query. Return an empty result set to diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/magics/magics.py b/packages/google-cloud-bigquery/google/cloud/bigquery/magics/magics.py index f04a6364a2b6..8f343ddcc96a 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/magics/magics.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/magics/magics.py @@ -153,8 +153,6 @@ except ImportError: # pragma: NO COVER raise ImportError("This module can only be loaded in IPython.") -import six - from google.api_core import client_info from google.api_core import client_options from google.api_core.exceptions import NotFound @@ -577,16 +575,16 @@ def _cell_magic(line, query): "--params is not a correctly formatted JSON string or a JSON " "serializable dictionary" ) - six.raise_from(rebranded_error, exc) + raise rebranded_error from exc except lap.exceptions.DuplicateQueryParamsError as exc: rebranded_error = ValueError("Duplicate --params option.") - six.raise_from(rebranded_error, exc) + raise rebranded_error from exc except lap.exceptions.ParseError as exc: rebranded_error = ValueError( "Unrecognized input, are option values correct? " "Error details: {}".format(exc.args[0]) ) - six.raise_from(rebranded_error, exc) + raise rebranded_error from exc args = magic_arguments.parse_argstring(_cell_magic, rest_of_args) @@ -768,7 +766,7 @@ def _make_bqstorage_client(use_bqstorage_api, credentials, client_options): "to use it. Alternatively, use the classic REST API by specifying " "the --use_rest_api magic option." ) - six.raise_from(customized_error, err) + raise customized_error from err try: from google.api_core.gapic_v1 import client_info as gapic_client_info @@ -776,7 +774,7 @@ def _make_bqstorage_client(use_bqstorage_api, credentials, client_options): customized_error = ImportError( "Install the grpcio package to use the BigQuery Storage API." ) - six.raise_from(customized_error, err) + raise customized_error from err return bigquery_storage.BigQueryReadClient( credentials=credentials, diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/model.py b/packages/google-cloud-bigquery/google/cloud/bigquery/model.py index 0f5d8f83b8c4..55846bd1a696 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/model.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/model.py @@ -19,7 +19,6 @@ import copy from google.protobuf import json_format -import six import google.cloud._helpers from google.api_core import datetime_helpers @@ -63,7 +62,7 @@ def __init__(self, model_ref): # buffer classes do not. self._properties = {} - if isinstance(model_ref, six.string_types): + if isinstance(model_ref, str): model_ref = ModelReference.from_string(model_ref) if model_ref: @@ -455,7 +454,7 @@ def _model_arg_to_model_ref(value, default_project=None): This function keeps ModelReference and other kinds of objects unchanged. """ - if isinstance(value, six.string_types): + if isinstance(value, str): return ModelReference.from_string(value, default_project=default_project) if isinstance(value, Model): return value.reference diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/routine.py b/packages/google-cloud-bigquery/google/cloud/bigquery/routine.py index 6a0ed9fb0f92..f26f2088630d 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/routine.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/routine.py @@ -17,7 +17,6 @@ """Define resources for the BigQuery Routines API.""" from google.protobuf import json_format -import six import google.cloud._helpers from google.cloud.bigquery import _helpers @@ -54,7 +53,7 @@ class Routine(object): } def __init__(self, routine_ref, **kwargs): - if isinstance(routine_ref, six.string_types): + if isinstance(routine_ref, str): routine_ref = RoutineReference.from_string(routine_ref) self._properties = {"routineReference": routine_ref.to_api_repr()} diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/schema.py b/packages/google-cloud-bigquery/google/cloud/bigquery/schema.py index 8ae0a3a85e93..c76aded02118 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/schema.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/schema.py @@ -14,7 +14,7 @@ """Schemas for BigQuery tables / queries.""" -from six.moves import collections_abc +import collections from google.cloud.bigquery_v2 import types @@ -318,7 +318,7 @@ def _to_schema_fields(schema): instance or a compatible mapping representation of the field. """ for field in schema: - if not isinstance(field, (SchemaField, collections_abc.Mapping)): + if not isinstance(field, (SchemaField, collections.abc.Mapping)): raise ValueError( "Schema items must either be fields or compatible " "mapping representations." diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py index 6daccf518957..a2366b806eff 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py @@ -24,8 +24,6 @@ import pytz import warnings -import six - try: import pandas except ImportError: # pragma: NO COVER @@ -657,7 +655,7 @@ def description(self): @description.setter def description(self, value): - if not isinstance(value, six.string_types) and value is not None: + if not isinstance(value, str) and value is not None: raise ValueError("Pass a string, or None") self._properties["description"] = value @@ -694,7 +692,7 @@ def friendly_name(self): @friendly_name.setter def friendly_name(self, value): - if not isinstance(value, six.string_types) and value is not None: + if not isinstance(value, str) and value is not None: raise ValueError("Pass a string, or None") self._properties["friendlyName"] = value @@ -721,7 +719,7 @@ def view_query(self): @view_query.setter def view_query(self, value): - if not isinstance(value, six.string_types): + if not isinstance(value, str): raise ValueError("Pass a string") _helpers._set_sub_prop(self._properties, ["view", "query"], value) view = self._properties["view"] @@ -1244,7 +1242,7 @@ def keys(self): >>> list(Row(('a', 'b'), {'x': 0, 'y': 1}).keys()) ['x', 'y'] """ - return six.iterkeys(self._xxx_field_to_index) + return self._xxx_field_to_index.keys() def items(self): """Return items as ``(key, value)`` pairs. @@ -1258,7 +1256,7 @@ def items(self): >>> list(Row(('a', 'b'), {'x': 0, 'y': 1}).items()) [('x', 'a'), ('y', 'b')] """ - for key, index in six.iteritems(self._xxx_field_to_index): + for key, index in self._xxx_field_to_index.items(): yield (key, copy.deepcopy(self._xxx_values[index])) def get(self, key, default=None): @@ -1308,7 +1306,7 @@ def __len__(self): return len(self._xxx_values) def __getitem__(self, key): - if isinstance(key, six.string_types): + if isinstance(key, str): value = self._xxx_field_to_index.get(key) if value is None: raise KeyError("no row field {!r}".format(key)) @@ -2293,7 +2291,7 @@ def _table_arg_to_table_ref(value, default_project=None): This function keeps TableReference and other kinds of objects unchanged. """ - if isinstance(value, six.string_types): + if isinstance(value, str): value = TableReference.from_string(value, default_project=default_project) if isinstance(value, (Table, TableListItem)): value = value.reference @@ -2305,7 +2303,7 @@ def _table_arg_to_table(value, default_project=None): This function keeps Table and other kinds of objects unchanged. """ - if isinstance(value, six.string_types): + if isinstance(value, str): value = TableReference.from_string(value, default_project=default_project) if isinstance(value, TableReference): value = Table(value) diff --git a/packages/google-cloud-bigquery/samples/load_table_uri_truncate_avro.py b/packages/google-cloud-bigquery/samples/load_table_uri_truncate_avro.py index 98a791477dd1..1aa0aa49c2e4 100644 --- a/packages/google-cloud-bigquery/samples/load_table_uri_truncate_avro.py +++ b/packages/google-cloud-bigquery/samples/load_table_uri_truncate_avro.py @@ -16,7 +16,7 @@ def load_table_uri_truncate_avro(table_id): # [START bigquery_load_table_gcs_avro_truncate] - import six + import io from google.cloud import bigquery @@ -33,7 +33,7 @@ def load_table_uri_truncate_avro(table_id): ], ) - body = six.BytesIO(b"Washington,WA") + body = io.BytesIO(b"Washington,WA") client.load_table_from_file(body, table_id, job_config=job_config).result() previous_rows = client.get_table(table_id).num_rows assert previous_rows > 0 diff --git a/packages/google-cloud-bigquery/samples/load_table_uri_truncate_csv.py b/packages/google-cloud-bigquery/samples/load_table_uri_truncate_csv.py index 73de7a8c17cf..198cdc281c12 100644 --- a/packages/google-cloud-bigquery/samples/load_table_uri_truncate_csv.py +++ b/packages/google-cloud-bigquery/samples/load_table_uri_truncate_csv.py @@ -16,7 +16,7 @@ def load_table_uri_truncate_csv(table_id): # [START bigquery_load_table_gcs_csv_truncate] - import six + import io from google.cloud import bigquery @@ -33,7 +33,7 @@ def load_table_uri_truncate_csv(table_id): ], ) - body = six.BytesIO(b"Washington,WA") + body = io.BytesIO(b"Washington,WA") client.load_table_from_file(body, table_id, job_config=job_config).result() previous_rows = client.get_table(table_id).num_rows assert previous_rows > 0 diff --git a/packages/google-cloud-bigquery/samples/load_table_uri_truncate_json.py b/packages/google-cloud-bigquery/samples/load_table_uri_truncate_json.py index a30fae73629e..d67d93e7b0c4 100644 --- a/packages/google-cloud-bigquery/samples/load_table_uri_truncate_json.py +++ b/packages/google-cloud-bigquery/samples/load_table_uri_truncate_json.py @@ -16,7 +16,7 @@ def load_table_uri_truncate_json(table_id): # [START bigquery_load_table_gcs_json_truncate] - import six + import io from google.cloud import bigquery @@ -33,7 +33,7 @@ def load_table_uri_truncate_json(table_id): ], ) - body = six.BytesIO(b"Washington,WA") + body = io.BytesIO(b"Washington,WA") client.load_table_from_file(body, table_id, job_config=job_config).result() previous_rows = client.get_table(table_id).num_rows assert previous_rows > 0 diff --git a/packages/google-cloud-bigquery/samples/load_table_uri_truncate_orc.py b/packages/google-cloud-bigquery/samples/load_table_uri_truncate_orc.py index 18f963be2e7d..90543b791a37 100644 --- a/packages/google-cloud-bigquery/samples/load_table_uri_truncate_orc.py +++ b/packages/google-cloud-bigquery/samples/load_table_uri_truncate_orc.py @@ -16,7 +16,7 @@ def load_table_uri_truncate_orc(table_id): # [START bigquery_load_table_gcs_orc_truncate] - import six + import io from google.cloud import bigquery @@ -33,7 +33,7 @@ def load_table_uri_truncate_orc(table_id): ], ) - body = six.BytesIO(b"Washington,WA") + body = io.BytesIO(b"Washington,WA") client.load_table_from_file(body, table_id, job_config=job_config).result() previous_rows = client.get_table(table_id).num_rows assert previous_rows > 0 diff --git a/packages/google-cloud-bigquery/samples/load_table_uri_truncate_parquet.py b/packages/google-cloud-bigquery/samples/load_table_uri_truncate_parquet.py index 28692d840d37..e036fc180a99 100644 --- a/packages/google-cloud-bigquery/samples/load_table_uri_truncate_parquet.py +++ b/packages/google-cloud-bigquery/samples/load_table_uri_truncate_parquet.py @@ -16,7 +16,7 @@ def load_table_uri_truncate_parquet(table_id): # [START bigquery_load_table_gcs_parquet_truncate] - import six + import io from google.cloud import bigquery @@ -33,7 +33,7 @@ def load_table_uri_truncate_parquet(table_id): ], ) - body = six.BytesIO(b"Washington,WA") + body = io.BytesIO(b"Washington,WA") client.load_table_from_file(body, table_id, job_config=job_config).result() previous_rows = client.get_table(table_id).num_rows assert previous_rows > 0 diff --git a/packages/google-cloud-bigquery/samples/tests/test_copy_table_multiple_source.py b/packages/google-cloud-bigquery/samples/tests/test_copy_table_multiple_source.py index 45c6d34f5a41..5bc4668b08fb 100644 --- a/packages/google-cloud-bigquery/samples/tests/test_copy_table_multiple_source.py +++ b/packages/google-cloud-bigquery/samples/tests/test_copy_table_multiple_source.py @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -import six +import io from google.cloud import bigquery from .. import copy_table_multiple_source @@ -32,7 +32,7 @@ def test_copy_table_multiple_source(capsys, random_table_id, random_dataset_id, bigquery.SchemaField("post_abbr", "STRING"), ] ) - body = six.BytesIO(data) + body = io.BytesIO(data) client.load_table_from_file( body, table_ref, location="US", job_config=job_config ).result() diff --git a/packages/google-cloud-bigquery/setup.py b/packages/google-cloud-bigquery/setup.py index 5f4e506eb08a..fcafddbd2d6d 100644 --- a/packages/google-cloud-bigquery/setup.py +++ b/packages/google-cloud-bigquery/setup.py @@ -33,7 +33,6 @@ "proto-plus >= 1.10.0", "google-cloud-core >= 1.4.1, < 2.0dev", "google-resumable-media >= 0.6.0, < 2.0dev", - "six >=1.13.0,< 2.0.0dev", "protobuf >= 3.12.0", ] extras = { diff --git a/packages/google-cloud-bigquery/tests/system.py b/packages/google-cloud-bigquery/tests/system.py index 185722e83e16..bfe54b7df360 100644 --- a/packages/google-cloud-bigquery/tests/system.py +++ b/packages/google-cloud-bigquery/tests/system.py @@ -18,6 +18,7 @@ import csv import datetime import decimal +import io import json import operator import os @@ -27,7 +28,6 @@ import re import requests -import six import psutil import pytest import pytz @@ -54,7 +54,7 @@ pyarrow = None try: import IPython - from IPython.utils import io + from IPython.utils import io as ipython_io from IPython.testing import tools from IPython.terminal import interactiveshell except ImportError: # pragma: NO COVER @@ -219,7 +219,7 @@ def test_get_service_account_email(self): got = client.get_service_account_email() - self.assertIsInstance(got, six.text_type) + self.assertIsInstance(got, str) self.assertIn("@", got) def _create_bucket(self, bucket_name, location=None): @@ -598,7 +598,7 @@ def test_update_table_schema(self): @staticmethod def _fetch_single_page(table, selected_fields=None): iterator = Config.CLIENT.list_rows(table, selected_fields=selected_fields) - page = six.next(iterator.pages) + page = next(iterator.pages) return list(page) def _create_table_many_columns(self, rowcount): @@ -1415,7 +1415,7 @@ def test_load_table_from_file_w_explicit_location(self): self._create_bucket(bucket_name, location="eu") # Create a temporary dataset & table in the EU. - table_bytes = six.BytesIO(b"a,3\nb,2\nc,1\n") + table_bytes = io.BytesIO(b"a,3\nb,2\nc,1\n") client = Config.CLIENT dataset = self.temp_dataset(_make_dataset_id("eu_load_file"), location="EU") table_ref = dataset.table("letters") @@ -2444,7 +2444,7 @@ def test_query_results_to_dataframe(self): self.assertEqual(list(df), column_names) # verify the column names exp_datatypes = { "id": int, - "author": six.text_type, + "author": str, "time_ts": pandas.Timestamp, "dead": bool, } @@ -2477,7 +2477,7 @@ def test_query_results_to_dataframe_w_bqstorage(self): self.assertEqual(list(df), column_names) exp_datatypes = { "id": int, - "author": six.text_type, + "author": str, "time_ts": pandas.Timestamp, "dead": bool, } @@ -2572,9 +2572,7 @@ def test_insert_rows_from_dataframe(self): assert len(row_tuples) == len(expected) for row, expected_row in zip(row_tuples, expected): - six.assertCountEqual( - self, row, expected_row - ) # column order does not matter + self.assertCountEqual(row, expected_row) # column order does not matter def test_insert_rows_nested_nested(self): # See #2951 @@ -2780,7 +2778,7 @@ def test_nested_table_to_arrow(self): {"string_col": "Some value", "record_col": record, "float_col": 3.14} ] rows = [json.dumps(row) for row in to_insert] - body = six.BytesIO("{}\n".format("\n".join(rows)).encode("ascii")) + body = io.BytesIO("{}\n".format("\n".join(rows)).encode("ascii")) table_id = "test_table" dataset = self.temp_dataset(_make_dataset_id("nested_df")) table = dataset.table(table_id) @@ -2858,7 +2856,7 @@ def test_nested_table_to_dataframe(self): } ] rows = [json.dumps(row) for row in to_insert] - body = six.BytesIO("{}\n".format("\n".join(rows)).encode("ascii")) + body = io.BytesIO("{}\n".format("\n".join(rows)).encode("ascii")) table_id = "test_table" dataset = self.temp_dataset(_make_dataset_id("nested_df")) table = dataset.table(table_id) @@ -2923,7 +2921,7 @@ def test_list_rows_page_size(self): schema = [SF("string_col", "STRING", mode="NULLABLE")] to_insert = [{"string_col": "item%d" % i} for i in range(num_items)] rows = [json.dumps(row) for row in to_insert] - body = six.BytesIO("{}\n".format("\n".join(rows)).encode("ascii")) + body = io.BytesIO("{}\n".format("\n".join(rows)).encode("ascii")) table_id = "test_table" dataset = self.temp_dataset(_make_dataset_id("nested_df")) @@ -2997,7 +2995,7 @@ def test_bigquery_magic(): ORDER BY view_count DESC LIMIT 10 """ - with io.capture_output() as captured: + with ipython_io.capture_output() as captured: result = ip.run_cell_magic("bigquery", "--use_rest_api", sql) conn_count_end = len(current_process.connections()) diff --git a/packages/google-cloud-bigquery/tests/unit/job/test_base.py b/packages/google-cloud-bigquery/tests/unit/job/test_base.py index 12e2d4b8b44a..478e30e6f1ff 100644 --- a/packages/google-cloud-bigquery/tests/unit/job/test_base.py +++ b/packages/google-cloud-bigquery/tests/unit/job/test_base.py @@ -13,12 +13,12 @@ # limitations under the License. import copy +import http import unittest from google.api_core import exceptions import google.api_core.retry import mock -from six.moves import http_client from .helpers import _make_client from .helpers import _make_connection @@ -35,14 +35,14 @@ def _call_fut(self, *args, **kwargs): def test_simple(self): error_result = {"reason": "invalid", "message": "bad request"} exception = self._call_fut(error_result) - self.assertEqual(exception.code, http_client.BAD_REQUEST) + self.assertEqual(exception.code, http.client.BAD_REQUEST) self.assertTrue(exception.message.startswith("bad request")) self.assertIn(error_result, exception.errors) def test_missing_reason(self): error_result = {} exception = self._call_fut(error_result) - self.assertEqual(exception.code, http_client.INTERNAL_SERVER_ERROR) + self.assertEqual(exception.code, http.client.INTERNAL_SERVER_ERROR) class Test_JobReference(unittest.TestCase): diff --git a/packages/google-cloud-bigquery/tests/unit/job/test_query.py b/packages/google-cloud-bigquery/tests/unit/job/test_query.py index 0567b59cdb45..579a841d1caa 100644 --- a/packages/google-cloud-bigquery/tests/unit/job/test_query.py +++ b/packages/google-cloud-bigquery/tests/unit/job/test_query.py @@ -14,6 +14,7 @@ import concurrent import copy +import http import textwrap import freezegun @@ -21,7 +22,6 @@ import google.api_core.retry import mock import requests -from six.moves import http_client from google.cloud.bigquery.client import _LIST_ROWS_FROM_QUERY_RESULTS_FIELDS import google.cloud.bigquery.query @@ -1210,7 +1210,7 @@ def test_result_error(self): job.result() self.assertIsInstance(exc_info.exception, exceptions.GoogleCloudError) - self.assertEqual(exc_info.exception.code, http_client.BAD_REQUEST) + self.assertEqual(exc_info.exception.code, http.client.BAD_REQUEST) exc_job_instance = getattr(exc_info.exception, "query_job", None) self.assertIs(exc_job_instance, job) @@ -1265,7 +1265,7 @@ def test__begin_error(self): job.result() self.assertIsInstance(exc_info.exception, exceptions.GoogleCloudError) - self.assertEqual(exc_info.exception.code, http_client.BAD_REQUEST) + self.assertEqual(exc_info.exception.code, http.client.BAD_REQUEST) exc_job_instance = getattr(exc_info.exception, "query_job", None) self.assertIs(exc_job_instance, job) diff --git a/packages/google-cloud-bigquery/tests/unit/test__helpers.py b/packages/google-cloud-bigquery/tests/unit/test__helpers.py index 5907a36785ad..8948d41522d7 100644 --- a/packages/google-cloud-bigquery/tests/unit/test__helpers.py +++ b/packages/google-cloud-bigquery/tests/unit/test__helpers.py @@ -18,7 +18,6 @@ import unittest import mock -import six class Test_not_null(unittest.TestCase): @@ -894,7 +893,7 @@ def test_w_list_missing_fields(self): ] original = [42] - with six.assertRaisesRegex(self, ValueError, r".*not match schema length.*"): + with self.assertRaisesRegex(ValueError, r".*not match schema length.*"): self._call_fut(fields, original) def test_w_list_too_many_fields(self): @@ -904,7 +903,7 @@ def test_w_list_too_many_fields(self): ] original = [42, "two", "three"] - with six.assertRaisesRegex(self, ValueError, r".*not match schema length.*"): + with self.assertRaisesRegex(ValueError, r".*not match schema length.*"): self._call_fut(fields, original) def test_w_non_empty_dict(self): diff --git a/packages/google-cloud-bigquery/tests/unit/test__http.py b/packages/google-cloud-bigquery/tests/unit/test__http.py index 691c4c80242a..78e59cb306d9 100644 --- a/packages/google-cloud-bigquery/tests/unit/test__http.py +++ b/packages/google-cloud-bigquery/tests/unit/test__http.py @@ -35,8 +35,8 @@ def _make_one(self, *args, **kw): return self._get_target_class()(*args, **kw) def test_build_api_url_no_extra_query_params(self): - from six.moves.urllib.parse import parse_qsl - from six.moves.urllib.parse import urlsplit + from urllib.parse import parse_qsl + from urllib.parse import urlsplit conn = self._make_one(object()) uri = conn.build_api_url("/foo") @@ -49,8 +49,8 @@ def test_build_api_url_no_extra_query_params(self): self.assertEqual(parms, {}) def test_build_api_url_w_custom_endpoint(self): - from six.moves.urllib.parse import parse_qsl - from six.moves.urllib.parse import urlsplit + from urllib.parse import parse_qsl + from urllib.parse import urlsplit custom_endpoint = "https://foo-bigquery.googleapis.com" conn = self._make_one(object(), api_endpoint=custom_endpoint) @@ -64,8 +64,8 @@ def test_build_api_url_w_custom_endpoint(self): self.assertEqual(parms, {}) def test_build_api_url_w_extra_query_params(self): - from six.moves.urllib.parse import parse_qsl - from six.moves.urllib.parse import urlsplit + from urllib.parse import parse_qsl + from urllib.parse import urlsplit conn = self._make_one(object()) uri = conn.build_api_url("/foo", {"bar": "baz"}) diff --git a/packages/google-cloud-bigquery/tests/unit/test_client.py b/packages/google-cloud-bigquery/tests/unit/test_client.py index e5ead0cccf33..98dec00f9408 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_client.py +++ b/packages/google-cloud-bigquery/tests/unit/test_client.py @@ -18,7 +18,9 @@ import decimal import email import gzip +import http.client import io +import itertools import json import operator import unittest @@ -26,8 +28,6 @@ import mock import requests -import six -from six.moves import http_client import pytest import pytz import pkg_resources @@ -474,7 +474,7 @@ def test_list_projects_defaults(self): with mock.patch( "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" ) as final_attributes: - page = six.next(iterator.pages) + page = next(iterator.pages) final_attributes.assert_called_once_with({"path": "/projects"}, client, None) projects = list(page) @@ -508,7 +508,7 @@ def test_list_projects_w_timeout(self): with mock.patch( "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" ) as final_attributes: - six.next(iterator.pages) + next(iterator.pages) final_attributes.assert_called_once_with({"path": "/projects"}, client, None) @@ -528,7 +528,7 @@ def test_list_projects_explicit_response_missing_projects_key(self): with mock.patch( "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" ) as final_attributes: - page = six.next(iterator.pages) + page = next(iterator.pages) final_attributes.assert_called_once_with({"path": "/projects"}, client, None) projects = list(page) @@ -582,7 +582,7 @@ def test_list_datasets_defaults(self): with mock.patch( "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" ) as final_attributes: - page = six.next(iterator.pages) + page = next(iterator.pages) final_attributes.assert_called_once_with({"path": "/%s" % PATH}, client, None) datasets = list(page) @@ -635,7 +635,7 @@ def test_list_datasets_explicit_response_missing_datasets_key(self): with mock.patch( "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" ) as final_attributes: - page = six.next(iterator.pages) + page = next(iterator.pages) final_attributes.assert_called_once_with({"path": "/%s" % PATH}, client, None) datasets = list(page) @@ -2919,7 +2919,7 @@ def test_list_tables_empty_w_timeout(self): with mock.patch( "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" ) as final_attributes: - page = six.next(iterator.pages) + page = next(iterator.pages) final_attributes.assert_called_once_with({"path": path}, client, None) tables = list(page) @@ -2942,7 +2942,7 @@ def test_list_models_empty_w_timeout(self): with mock.patch( "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" ) as final_attributes: - page = six.next(iterator.pages) + page = next(iterator.pages) final_attributes.assert_called_once_with({"path": path}, client, None) models = list(page) @@ -2991,7 +2991,7 @@ def test_list_models_defaults(self): with mock.patch( "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" ) as final_attributes: - page = six.next(iterator.pages) + page = next(iterator.pages) final_attributes.assert_called_once_with({"path": "/%s" % PATH}, client, None) models = list(page) @@ -3022,7 +3022,7 @@ def test_list_routines_empty_w_timeout(self): with mock.patch( "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" ) as final_attributes: - page = six.next(iterator.pages) + page = next(iterator.pages) final_attributes.assert_called_once_with( {"path": "/projects/test-routines/datasets/test_routines/routines"}, @@ -3080,7 +3080,7 @@ def test_list_routines_defaults(self): with mock.patch( "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" ) as final_attributes: - page = six.next(iterator.pages) + page = next(iterator.pages) final_attributes.assert_called_once_with({"path": path}, client, None) routines = list(page) @@ -3149,7 +3149,7 @@ def test_list_tables_defaults(self): with mock.patch( "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" ) as final_attributes: - page = six.next(iterator.pages) + page = next(iterator.pages) final_attributes.assert_called_once_with({"path": "/%s" % PATH}, client, None) tables = list(page) @@ -3213,7 +3213,7 @@ def test_list_tables_explicit(self): with mock.patch( "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" ) as final_attributes: - page = six.next(iterator.pages) + page = next(iterator.pages) final_attributes.assert_called_once_with({"path": "/%s" % PATH}, client, None) tables = list(page) @@ -4040,7 +4040,7 @@ def test_list_jobs_defaults(self): with mock.patch( "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" ) as final_attributes: - page = six.next(iterator.pages) + page = next(iterator.pages) final_attributes.assert_called_once_with({"path": "/%s" % PATH}, client, None) jobs = list(page) @@ -4090,7 +4090,7 @@ def test_list_jobs_load_job_wo_sourceUris(self): with mock.patch( "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" ) as final_attributes: - page = six.next(iterator.pages) + page = next(iterator.pages) final_attributes.assert_called_once_with({"path": "/%s" % PATH}, client, None) jobs = list(page) @@ -4124,7 +4124,7 @@ def test_list_jobs_explicit_missing(self): with mock.patch( "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" ) as final_attributes: - page = six.next(iterator.pages) + page = next(iterator.pages) final_attributes.assert_called_once_with({"path": "/%s" % PATH}, client, None) jobs = list(page) @@ -4412,7 +4412,7 @@ def _initiate_resumable_upload_helper(self, num_retries=None): # Create mocks to be checked for doing transport. resumable_url = "http://test.invalid?upload_id=hey-you" response_headers = {"location": resumable_url} - fake_transport = self._mock_transport(http_client.OK, response_headers) + fake_transport = self._mock_transport(http.client.OK, response_headers) client = self._make_one(project=self.PROJECT, _http=fake_transport) conn = client._connection = make_connection() @@ -4479,7 +4479,7 @@ def _do_multipart_upload_success_helper(self, get_boundary, num_retries=None): from google.cloud.bigquery.job import LoadJobConfig from google.cloud.bigquery.job import SourceFormat - fake_transport = self._mock_transport(http_client.OK, {}) + fake_transport = self._mock_transport(http.client.OK, {}) client = self._make_one(project=self.PROJECT, _http=fake_transport) conn = client._connection = make_connection() @@ -5022,7 +5022,7 @@ def test_extract_table_generated_job_id(self): _, req = conn.api_request.call_args self.assertEqual(req["method"], "POST") self.assertEqual(req["path"], "/projects/PROJECT/jobs") - self.assertIsInstance(req["data"]["jobReference"]["jobId"], six.string_types) + self.assertIsInstance(req["data"]["jobReference"]["jobId"], str) self.assertIsNone(req["timeout"]) # Check the job resource. @@ -5227,7 +5227,7 @@ def test_query_defaults(self): job = client.query(QUERY) self.assertIsInstance(job, QueryJob) - self.assertIsInstance(job.job_id, six.string_types) + self.assertIsInstance(job.job_id, str) self.assertIs(job._client, client) self.assertEqual(job.query, QUERY) self.assertEqual(job.udf_resources, []) @@ -5240,7 +5240,7 @@ def test_query_defaults(self): self.assertEqual(req["path"], "/projects/PROJECT/jobs") self.assertIsNone(req["timeout"]) sent = req["data"] - self.assertIsInstance(sent["jobReference"]["jobId"], six.string_types) + self.assertIsInstance(sent["jobReference"]["jobId"], str) sent_config = sent["configuration"]["query"] self.assertEqual(sent_config["query"], QUERY) self.assertFalse(sent_config["useLegacySql"]) @@ -5687,7 +5687,7 @@ def test_query_w_udf_resources(self): self.assertEqual(req["path"], "/projects/PROJECT/jobs") self.assertIsNone(req["timeout"]) sent = req["data"] - self.assertIsInstance(sent["jobReference"]["jobId"], six.string_types) + self.assertIsInstance(sent["jobReference"]["jobId"], str) sent_config = sent["configuration"]["query"] self.assertEqual(sent_config["query"], QUERY) self.assertTrue(sent_config["useLegacySql"]) @@ -6398,7 +6398,7 @@ def test_insert_rows_from_dataframe(self): actual_calls = conn.api_request.call_args_list - for call, expected_data in six.moves.zip_longest( + for call, expected_data in itertools.zip_longest( actual_calls, EXPECTED_SENT_DATA ): expected_call = mock.call( @@ -6466,7 +6466,7 @@ def test_insert_rows_from_dataframe_nan(self): actual_calls = conn.api_request.call_args_list - for call, expected_data in six.moves.zip_longest( + for call, expected_data in itertools.zip_longest( actual_calls, EXPECTED_SENT_DATA ): expected_call = mock.call( @@ -6776,7 +6776,7 @@ def test_list_rows(self): # Check that initial total_rows is populated from the table. self.assertEqual(iterator.total_rows, 7) - page = six.next(iterator.pages) + page = next(iterator.pages) rows = list(page) # Check that total_rows is updated based on API response. @@ -6831,14 +6831,14 @@ def test_list_rows_w_start_index_w_page_size(self): table = Table(self.TABLE_REF, schema=[full_name]) iterator = client.list_rows(table, max_results=4, page_size=2, start_index=1) pages = iterator.pages - rows = list(six.next(pages)) + rows = list(next(pages)) extra_params = iterator.extra_params f2i = {"full_name": 0} self.assertEqual(len(rows), 2) self.assertEqual(rows[0], Row(("Phred Phlyntstone",), f2i)) self.assertEqual(rows[1], Row(("Bharney Rhubble",), f2i)) - rows = list(six.next(pages)) + rows = list(next(pages)) self.assertEqual(len(rows), 2) self.assertEqual(rows[0], Row(("Wylma Phlyntstone",), f2i)) @@ -6915,7 +6915,7 @@ def test_list_rows_query_params(self): conn = client._connection = make_connection(*len(tests) * [{}]) for i, test in enumerate(tests): iterator = client.list_rows(table, **test[0]) - six.next(iterator.pages) + next(iterator.pages) req = conn.api_request.call_args_list[i] test[1]["formatOptions.useInt64Timestamp"] = True self.assertEqual(req[1]["query_params"], test[1], "for kwargs %s" % test[0]) @@ -7000,7 +7000,7 @@ def test_list_rows_repeated_fields(self): struct = SchemaField("struct", "RECORD", mode="REPEATED", fields=[index, score]) iterator = client.list_rows(self.TABLE_REF, selected_fields=[color, struct]) - page = six.next(iterator.pages) + page = next(iterator.pages) rows = list(page) total_rows = iterator.total_rows page_token = iterator.next_page_token @@ -7065,7 +7065,7 @@ def test_list_rows_w_record_schema(self): table = Table(self.TABLE_REF, schema=[full_name, phone]) iterator = client.list_rows(table) - page = six.next(iterator.pages) + page = next(iterator.pages) rows = list(page) total_rows = iterator.total_rows page_token = iterator.next_page_token @@ -7241,7 +7241,7 @@ def _make_do_upload_patch(cls, client, method, resource={}, side_effect=None): if side_effect is None: side_effect = [ cls._make_response( - http_client.OK, + http.client.OK, json.dumps(resource), {"Content-Type": "application/json"}, ) @@ -7522,7 +7522,7 @@ def test_load_table_from_file_failure(self): file_obj = self._make_file_obj() response = self._make_response( - content="Someone is already in this spot.", status_code=http_client.CONFLICT + content="Someone is already in this spot.", status_code=http.client.CONFLICT ) do_upload_patch = self._make_do_upload_patch( @@ -8584,7 +8584,7 @@ def _make_resumable_upload_responses(cls, size): resumable_url = "http://test.invalid?upload_id=and-then-there-was-1" initial_response = cls._make_response( - http_client.OK, "", {"location": resumable_url} + http.client.OK, "", {"location": resumable_url} ) data_response = cls._make_response( resumable_media.PERMANENT_REDIRECT, @@ -8592,7 +8592,7 @@ def _make_resumable_upload_responses(cls, size): {"range": "bytes=0-{:d}".format(size - 1)}, ) final_response = cls._make_response( - http_client.OK, + http.client.OK, json.dumps({"size": size}), {"Content-Type": "application/json"}, ) @@ -8634,7 +8634,7 @@ def test__do_resumable_upload(self): ) def test__do_multipart_upload(self): - transport = self._make_transport([self._make_response(http_client.OK)]) + transport = self._make_transport([self._make_response(http.client.OK)]) client = self._make_client(transport) file_obj = self._make_file_obj() file_obj_len = len(file_obj.getvalue()) diff --git a/packages/google-cloud-bigquery/tests/unit/test_dbapi__helpers.py b/packages/google-cloud-bigquery/tests/unit/test_dbapi__helpers.py index 08dd6dcfaa7a..fffa46aa8957 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_dbapi__helpers.py +++ b/packages/google-cloud-bigquery/tests/unit/test_dbapi__helpers.py @@ -23,8 +23,6 @@ except ImportError: # pragma: NO COVER pyarrow = None -import six - import google.cloud._helpers from google.cloud.bigquery import table from google.cloud.bigquery.dbapi import _helpers @@ -293,7 +291,7 @@ def test_public_instance_methods_on_closed_instance(self): instance = decorated_class() instance._closed = True - with six.assertRaisesRegex(self, exceptions.ProgrammingError, "I'm closed!"): + with self.assertRaisesRegex(exceptions.ProgrammingError, "I'm closed!"): instance.instance_method() def test_methods_wo_public_instance_methods_on_closed_instance(self): @@ -316,7 +314,7 @@ def test_custom_class_closed_attribute(self): instance._closed = False instance._really_closed = True - with six.assertRaisesRegex(self, exceptions.ProgrammingError, "I'm closed!"): + with self.assertRaisesRegex(exceptions.ProgrammingError, "I'm closed!"): instance.instance_method() def test_custom_on_closed_error_type(self): @@ -327,5 +325,5 @@ def test_custom_on_closed_error_type(self): instance = decorated_class() instance._closed = True - with six.assertRaisesRegex(self, RuntimeError, "I'm closed!"): + with self.assertRaisesRegex(RuntimeError, "I'm closed!"): instance.instance_method() diff --git a/packages/google-cloud-bigquery/tests/unit/test_dbapi_connection.py b/packages/google-cloud-bigquery/tests/unit/test_dbapi_connection.py index 30fb1292e7d0..edec559b285b 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_dbapi_connection.py +++ b/packages/google-cloud-bigquery/tests/unit/test_dbapi_connection.py @@ -16,7 +16,6 @@ import unittest import mock -import six try: from google.cloud import bigquery_storage @@ -124,8 +123,8 @@ def test_raises_error_if_closed(self): connection.close() for method in ("close", "commit", "cursor"): - with six.assertRaisesRegex( - self, ProgrammingError, r"Operating on a closed connection\." + with self.assertRaisesRegex( + ProgrammingError, r"Operating on a closed connection\." ): getattr(connection, method)() diff --git a/packages/google-cloud-bigquery/tests/unit/test_dbapi_cursor.py b/packages/google-cloud-bigquery/tests/unit/test_dbapi_cursor.py index f55b3fd3f804..cbd6f69098de 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_dbapi_cursor.py +++ b/packages/google-cloud-bigquery/tests/unit/test_dbapi_cursor.py @@ -16,7 +16,6 @@ import unittest import mock -import six try: import pyarrow @@ -181,8 +180,8 @@ def test_raises_error_if_closed(self): ) for method in method_names: - with six.assertRaisesRegex( - self, ProgrammingError, r"Operating on a closed cursor\." + with self.assertRaisesRegex( + ProgrammingError, r"Operating on a closed cursor\." ): getattr(cursor, method)() @@ -375,7 +374,7 @@ def test_fetchall_w_bqstorage_client_fetch_error_no_fallback(self): cursor = connection.cursor() cursor.execute("SELECT foo, bar FROM some_table") - with six.assertRaisesRegex(self, exceptions.Forbidden, "invalid credentials"): + with self.assertRaisesRegex(exceptions.Forbidden, "invalid credentials"): cursor.fetchall() # the default client was not used diff --git a/packages/google-cloud-bigquery/tests/unit/test_opentelemetry_tracing.py b/packages/google-cloud-bigquery/tests/unit/test_opentelemetry_tracing.py index 09afa7531438..5d0cf20539aa 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_opentelemetry_tracing.py +++ b/packages/google-cloud-bigquery/tests/unit/test_opentelemetry_tracing.py @@ -13,6 +13,7 @@ # limitations under the License. import datetime +import importlib import sys import mock @@ -28,7 +29,6 @@ except ImportError: # pragma: NO COVER opentelemetry = None import pytest -from six.moves import reload_module from google.cloud.bigquery import opentelemetry_tracing @@ -39,7 +39,7 @@ @pytest.mark.skipif(opentelemetry is None, reason="Require `opentelemetry`") @pytest.fixture def setup(): - reload_module(opentelemetry_tracing) + importlib.reload(opentelemetry_tracing) tracer_provider = TracerProvider() memory_exporter = InMemorySpanExporter() span_processor = SimpleExportSpanProcessor(memory_exporter) @@ -51,7 +51,7 @@ def setup(): @pytest.mark.skipif(opentelemetry is None, reason="Require `opentelemetry`") def test_opentelemetry_not_installed(setup, monkeypatch): monkeypatch.setitem(sys.modules, "opentelemetry", None) - reload_module(opentelemetry_tracing) + importlib.reload(opentelemetry_tracing) with opentelemetry_tracing.create_span("No-op for opentelemetry") as span: assert span is None diff --git a/packages/google-cloud-bigquery/tests/unit/test_table.py b/packages/google-cloud-bigquery/tests/unit/test_table.py index 0e7b0bb4d890..3373528e03f8 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_table.py +++ b/packages/google-cloud-bigquery/tests/unit/test_table.py @@ -22,7 +22,6 @@ import pkg_resources import pytest import pytz -import six import google.api_core.exceptions @@ -1674,16 +1673,16 @@ def test_iterate(self): rows_iter = iter(row_iterator) - val1 = six.next(rows_iter) + val1 = next(rows_iter) self.assertEqual(val1.name, "Phred Phlyntstone") self.assertEqual(row_iterator.num_results, 1) - val2 = six.next(rows_iter) + val2 = next(rows_iter) self.assertEqual(val2.name, "Bharney Rhubble") self.assertEqual(row_iterator.num_results, 2) with self.assertRaises(StopIteration): - six.next(rows_iter) + next(rows_iter) api_request.assert_called_once_with(method="GET", path=path, query_params={}) @@ -2437,13 +2436,6 @@ def test_to_dataframe(self): self.assertEqual(df.name.dtype.name, "object") self.assertEqual(df.age.dtype.name, "int64") - @pytest.mark.xfail( - six.PY2, - reason=( - "Requires pyarrow>-1.0 to work, but the latter is not compatible " - "with Python 2 anymore." - ), - ) @unittest.skipIf(pandas is None, "Requires `pandas`") @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_to_dataframe_timestamp_out_of_pyarrow_bounds(self): @@ -2475,13 +2467,6 @@ def test_to_dataframe_timestamp_out_of_pyarrow_bounds(self): ], ) - @pytest.mark.xfail( - six.PY2, - reason=( - "Requires pyarrow>-1.0 to work, but the latter is not compatible " - "with Python 2 anymore." - ), - ) @unittest.skipIf(pandas is None, "Requires `pandas`") @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_to_dataframe_datetime_out_of_pyarrow_bounds(self): @@ -2697,7 +2682,7 @@ def test_to_dataframe_w_various_types_nullable(self): else: self.assertIsInstance(row.start_timestamp, pandas.Timestamp) self.assertIsInstance(row.seconds, float) - self.assertIsInstance(row.payment_type, six.string_types) + self.assertIsInstance(row.payment_type, str) self.assertIsInstance(row.complete, bool) self.assertIsInstance(row.date, datetime.date) @@ -3542,7 +3527,7 @@ def test__eq___type_mismatch(self): def test_unhashable_object(self): object_under_test1 = self._make_one(start=1, end=10, interval=2) - with six.assertRaisesRegex(self, TypeError, r".*unhashable type.*"): + with self.assertRaisesRegex(TypeError, r".*unhashable type.*"): hash(object_under_test1) def test_repr(self): @@ -3642,7 +3627,7 @@ def test_unhashable_object(self): object_under_test1 = self._make_one( range_=PartitionRange(start=1, end=10, interval=2), field="integer_col" ) - with six.assertRaisesRegex(self, TypeError, r".*unhashable type.*"): + with self.assertRaisesRegex(TypeError, r".*unhashable type.*"): hash(object_under_test1) def test_repr(self): From 55d8ec0d29152652afe5b7ef0dae4f5096c6cf5e Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Fri, 8 Jan 2021 16:56:18 -0600 Subject: [PATCH 1032/2016] fix: add minimum timeout to getQueryResults API requests (#444) * fix: add minimum timeout to getQueryResults API requests Since successful responses can still take a long time to download, have a minimum timeout which should accomodate 99.9%+ of responses. I figure it's more important that *any* timeout is set if desired than it is that the specific timeout is used. This is especially true in cases where a short timeout is requested for the purposes of a progress bar. Making forward progress is more important than the progress bar update frequency. * docs: document minimum timeout value * test: remove redundant query timeout test * test: change assertion for done method * chore: remove unused import --- .../google/cloud/bigquery/client.py | 22 +++++++++++-- .../google-cloud-bigquery/tests/system.py | 33 +++++++------------ .../tests/unit/job/test_query.py | 6 ++++ .../tests/unit/test_client.py | 29 ++++++++++++++-- 4 files changed, 64 insertions(+), 26 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py index 19693c9ff2ee..3541726b83fc 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py @@ -93,6 +93,14 @@ ) _LIST_ROWS_FROM_QUERY_RESULTS_FIELDS = "jobReference,totalRows,pageToken,rows" +# In microbenchmarks, it's been shown that even in ideal conditions (query +# finished, local data), requests to getQueryResults can take 10+ seconds. +# In less-than-ideal situations, the response can take even longer, as it must +# be able to download a full 100+ MB row in that time. Don't let the +# connection timeout before data can be downloaded. +# https://github.com/googleapis/python-bigquery/issues/438 +_MIN_GET_QUERY_RESULTS_TIMEOUT = 120 + class Project(object): """Wrapper for resource describing a BigQuery project. @@ -1570,7 +1578,9 @@ def _get_query_results( location (Optional[str]): Location of the query job. timeout (Optional[float]): The number of seconds to wait for the underlying HTTP transport - before using ``retry``. + before using ``retry``. If set, this connection timeout may be + increased to a minimum value. This prevents retries on what + would otherwise be a successful response. Returns: google.cloud.bigquery.query._QueryResults: @@ -1579,6 +1589,9 @@ def _get_query_results( extra_params = {"maxResults": 0} + if timeout is not None: + timeout = max(timeout, _MIN_GET_QUERY_RESULTS_TIMEOUT) + if project is None: project = self.project @@ -3293,7 +3306,9 @@ def _list_rows_from_query_results( How to retry the RPC. timeout (Optional[float]): The number of seconds to wait for the underlying HTTP transport - before using ``retry``. + before using ``retry``. If set, this connection timeout may be + increased to a minimum value. This prevents retries on what + would otherwise be a successful response. If multiple requests are made under the hood, ``timeout`` applies to each individual request. Returns: @@ -3306,6 +3321,9 @@ def _list_rows_from_query_results( "location": location, } + if timeout is not None: + timeout = max(timeout, _MIN_GET_QUERY_RESULTS_TIMEOUT) + if start_index is not None: params["startIndex"] = start_index diff --git a/packages/google-cloud-bigquery/tests/system.py b/packages/google-cloud-bigquery/tests/system.py index bfe54b7df360..102c8f78d618 100644 --- a/packages/google-cloud-bigquery/tests/system.py +++ b/packages/google-cloud-bigquery/tests/system.py @@ -27,7 +27,6 @@ import uuid import re -import requests import psutil import pytest import pytz @@ -1798,15 +1797,25 @@ def test_query_w_wrong_config(self): Config.CLIENT.query(good_query, job_config=bad_config).result() def test_query_w_timeout(self): + job_config = bigquery.QueryJobConfig() + job_config.use_query_cache = False + query_job = Config.CLIENT.query( "SELECT * FROM `bigquery-public-data.github_repos.commits`;", job_id_prefix="test_query_w_timeout_", + location="US", + job_config=job_config, ) with self.assertRaises(concurrent.futures.TimeoutError): - # 1 second is much too short for this query. query_job.result(timeout=1) + # Even though the query takes >1 second, the call to getQueryResults + # should succeed. + self.assertFalse(query_job.done(timeout=1)) + + Config.CLIENT.cancel_job(query_job.job_id, location=query_job.location) + def test_query_w_page_size(self): page_size = 45 query_job = Config.CLIENT.query( @@ -2408,26 +2417,6 @@ def test_query_iter(self): row_tuples = [r.values() for r in query_job] self.assertEqual(row_tuples, [(1,)]) - def test_querying_data_w_timeout(self): - job_config = bigquery.QueryJobConfig() - job_config.use_query_cache = False - - query_job = Config.CLIENT.query( - """ - SELECT COUNT(*) - FROM UNNEST(GENERATE_ARRAY(1,1000000)), UNNEST(GENERATE_ARRAY(1, 10000)) - """, - location="US", - job_config=job_config, - ) - - # Specify a very tight deadline to demonstrate that the timeout - # actually has effect. - with self.assertRaises(requests.exceptions.Timeout): - query_job.done(timeout=0.1) - - Config.CLIENT.cancel_job(query_job.job_id, location=query_job.location) - @unittest.skipIf(pandas is None, "Requires `pandas`") def test_query_results_to_dataframe(self): QUERY = """ diff --git a/packages/google-cloud-bigquery/tests/unit/job/test_query.py b/packages/google-cloud-bigquery/tests/unit/job/test_query.py index 579a841d1caa..a4ab11ab6ad6 100644 --- a/packages/google-cloud-bigquery/tests/unit/job/test_query.py +++ b/packages/google-cloud-bigquery/tests/unit/job/test_query.py @@ -1046,6 +1046,8 @@ def test_result_invokes_begins(self): self.assertEqual(reload_request[1]["method"], "GET") def test_result_w_timeout(self): + import google.cloud.bigquery.client + begun_resource = self._make_resource() query_resource = { "jobComplete": True, @@ -1072,6 +1074,10 @@ def test_result_w_timeout(self): "/projects/{}/queries/{}".format(self.PROJECT, self.JOB_ID), ) self.assertEqual(query_request[1]["query_params"]["timeoutMs"], 900) + self.assertEqual( + query_request[1]["timeout"], + google.cloud.bigquery.client._MIN_GET_QUERY_RESULTS_TIMEOUT, + ) self.assertEqual(reload_request[1]["method"], "GET") def test_result_w_page_size(self): diff --git a/packages/google-cloud-bigquery/tests/unit/test_client.py b/packages/google-cloud-bigquery/tests/unit/test_client.py index 98dec00f9408..bf183b5a4e60 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_client.py +++ b/packages/google-cloud-bigquery/tests/unit/test_client.py @@ -311,7 +311,7 @@ def test__get_query_results_miss_w_explicit_project_and_timeout(self): project="other-project", location=self.LOCATION, timeout_ms=500, - timeout=42, + timeout=420, ) final_attributes.assert_called_once_with({"path": path}, client, None) @@ -320,7 +320,32 @@ def test__get_query_results_miss_w_explicit_project_and_timeout(self): method="GET", path=path, query_params={"maxResults": 0, "timeoutMs": 500, "location": self.LOCATION}, - timeout=42, + timeout=420, + ) + + def test__get_query_results_miss_w_short_timeout(self): + import google.cloud.bigquery.client + from google.cloud.exceptions import NotFound + + creds = _make_credentials() + client = self._make_one(self.PROJECT, creds) + conn = client._connection = make_connection() + path = "/projects/other-project/queries/nothere" + with self.assertRaises(NotFound): + client._get_query_results( + "nothere", + None, + project="other-project", + location=self.LOCATION, + timeout_ms=500, + timeout=1, + ) + + conn.api_request.assert_called_once_with( + method="GET", + path=path, + query_params={"maxResults": 0, "timeoutMs": 500, "location": self.LOCATION}, + timeout=google.cloud.bigquery.client._MIN_GET_QUERY_RESULTS_TIMEOUT, ) def test__get_query_results_miss_w_client_location(self): From cfeab25c685c667c36c311a6f30b22f2cc1a95e2 Mon Sep 17 00:00:00 2001 From: Peter Lamut Date: Mon, 11 Jan 2021 10:18:08 +0100 Subject: [PATCH 1033/2016] chore: Bound maximum supported Python version (#465) * chore: bound maximum supported Python version * Bound supported Python versions claim in README --- packages/google-cloud-bigquery/README.rst | 2 +- packages/google-cloud-bigquery/setup.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/packages/google-cloud-bigquery/README.rst b/packages/google-cloud-bigquery/README.rst index c7d50d7291a1..61192b625ed3 100644 --- a/packages/google-cloud-bigquery/README.rst +++ b/packages/google-cloud-bigquery/README.rst @@ -52,7 +52,7 @@ dependencies. Supported Python Versions ^^^^^^^^^^^^^^^^^^^^^^^^^ -Python >= 3.6 +Python >= 3.6, < 3.9 Unsupported Python Versions ^^^^^^^^^^^^^^^^^^^^^^^^^^^ diff --git a/packages/google-cloud-bigquery/setup.py b/packages/google-cloud-bigquery/setup.py index fcafddbd2d6d..0ea6ccca222d 100644 --- a/packages/google-cloud-bigquery/setup.py +++ b/packages/google-cloud-bigquery/setup.py @@ -120,7 +120,7 @@ namespace_packages=namespaces, install_requires=dependencies, extras_require=extras, - python_requires=">=3.6", + python_requires=">=3.6, <3.9", include_package_data=True, zip_safe=False, ) From 74653c2b4209fb3dd44570aebd01a56bcceb639a Mon Sep 17 00:00:00 2001 From: Peter Lamut Date: Mon, 11 Jan 2021 10:20:06 +0100 Subject: [PATCH 1034/2016] refactor: simplify AutoStrEnum definition (#458) With now only Python 3.6 supported, we can use the _generate_next_value() hook instead of metaclass magic. --- .../bigquery/magics/line_arg_parser/lexer.py | 41 ++++--------------- 1 file changed, 9 insertions(+), 32 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/magics/line_arg_parser/lexer.py b/packages/google-cloud-bigquery/google/cloud/bigquery/magics/line_arg_parser/lexer.py index 17e1ffdae0bf..0cb63292c413 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/magics/line_arg_parser/lexer.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/magics/line_arg_parser/lexer.py @@ -136,40 +136,17 @@ ) -# The _generate_next_value_() enum hook is only available in Python 3.6+, thus we -# need to do some acrobatics to implement an "auto str enum" base class. Implementation -# based on the recipe provided by the very author of the Enum library: -# https://stackoverflow.com/a/32313954/5040035 -class StrEnumMeta(enum.EnumMeta): - @classmethod - def __prepare__(metacls, name, bases, **kwargs): - # Having deterministic enum members definition order is nice. - return OrderedDict() +class AutoStrEnum(str, enum.Enum): + """Base enum class for for name=value str enums.""" - def __new__(metacls, name, bases, oldclassdict): - # Scan through the declared enum members and convert any value that is a plain - # empty tuple into a `str` of the name instead. - newclassdict = enum._EnumDict() - for key, val in oldclassdict.items(): - if val == (): - val = key - newclassdict[key] = val - return super(StrEnumMeta, metacls).__new__(metacls, name, bases, newclassdict) + def _generate_next_value_(name, start, count, last_values): + return name -# The @six.add_metaclass decorator does not work, Enum complains about _sunder_ names, -# and we cannot use class syntax directly, because the Python 3 version would cause -# a syntax error under Python 2. -AutoStrEnum = StrEnumMeta( - "AutoStrEnum", - (str, enum.Enum), - {"__doc__": "Base enum class for for name=value str enums."}, -) - TokenType = AutoStrEnum( "TokenType", [ - (name, name) + (name, enum.auto()) for name in itertools.chain.from_iterable(token_types.values()) if not name.startswith("GOTO_") ], @@ -177,10 +154,10 @@ def __new__(metacls, name, bases, oldclassdict): class LexerState(AutoStrEnum): - PARSE_POS_ARGS = () # parsing positional arguments - PARSE_NON_PARAMS_OPTIONS = () # parsing options other than "--params" - PARSE_PARAMS_OPTION = () # parsing the "--params" option - STATE_END = () + PARSE_POS_ARGS = enum.auto() # parsing positional arguments + PARSE_NON_PARAMS_OPTIONS = enum.auto() # parsing options other than "--params" + PARSE_PARAMS_OPTION = enum.auto() # parsing the "--params" option + STATE_END = enum.auto() class Lexer(object): From e2a919a43057d707edfe88c699d7c573db5391b4 Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Mon, 11 Jan 2021 11:07:35 -0600 Subject: [PATCH 1035/2016] fix: use debug logging level for OpenTelemetry message (#442) * fix: use debug logging level for OpenTelemetry message * only warn at span creation time * add unit test for skipping warning * refactor: rename _warned_telemetry to indicate private and mutable --- .../cloud/bigquery/opentelemetry_tracing.py | 19 ++++++++++++------- .../tests/unit/test_opentelemetry_tracing.py | 12 ++++++++++++ 2 files changed, 24 insertions(+), 7 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/opentelemetry_tracing.py b/packages/google-cloud-bigquery/google/cloud/bigquery/opentelemetry_tracing.py index b9d18efade0c..57f258ac4db8 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/opentelemetry_tracing.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/opentelemetry_tracing.py @@ -23,16 +23,11 @@ from opentelemetry.trace.status import Status HAS_OPENTELEMETRY = True + _warned_telemetry = True except ImportError: - logger.info( - "This service is instrumented using OpenTelemetry. " - "OpenTelemetry could not be imported; please " - "add opentelemetry-api and opentelemetry-instrumentation " - "packages in order to get BigQuery Tracing data." - ) - HAS_OPENTELEMETRY = False + _warned_telemetry = False _default_attributes = { "db.system": "BigQuery" @@ -64,8 +59,18 @@ def create_span(name, attributes=None, client=None, job_ref=None): Raised if a span could not be yielded or issue with call to OpenTelemetry. """ + global _warned_telemetry final_attributes = _get_final_span_attributes(attributes, client, job_ref) if not HAS_OPENTELEMETRY: + if not _warned_telemetry: + logger.debug( + "This service is instrumented using OpenTelemetry. " + "OpenTelemetry could not be imported; please " + "add opentelemetry-api and opentelemetry-instrumentation " + "packages in order to get BigQuery Tracing data." + ) + _warned_telemetry = True + yield None return tracer = trace.get_tracer(__name__) diff --git a/packages/google-cloud-bigquery/tests/unit/test_opentelemetry_tracing.py b/packages/google-cloud-bigquery/tests/unit/test_opentelemetry_tracing.py index 5d0cf20539aa..726e3cf6f2d6 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_opentelemetry_tracing.py +++ b/packages/google-cloud-bigquery/tests/unit/test_opentelemetry_tracing.py @@ -52,8 +52,20 @@ def setup(): def test_opentelemetry_not_installed(setup, monkeypatch): monkeypatch.setitem(sys.modules, "opentelemetry", None) importlib.reload(opentelemetry_tracing) + assert not opentelemetry_tracing._warned_telemetry with opentelemetry_tracing.create_span("No-op for opentelemetry") as span: assert span is None + assert opentelemetry_tracing._warned_telemetry + + +@pytest.mark.skipif(opentelemetry is None, reason="Require `opentelemetry`") +def test_opentelemetry_not_installed_doesnt_warn(setup, monkeypatch): + monkeypatch.setitem(sys.modules, "opentelemetry", None) + importlib.reload(opentelemetry_tracing) + opentelemetry_tracing._warned_telemetry = True + with opentelemetry_tracing.create_span("No-op for opentelemetry") as span: + assert span is None + assert opentelemetry_tracing._warned_telemetry @pytest.mark.skipif(opentelemetry is None, reason="Require `opentelemetry`") From a140336c338b37c06c54c5b7709c254a3e2d0b22 Mon Sep 17 00:00:00 2001 From: "release-please[bot]" <55107282+release-please[bot]@users.noreply.github.com> Date: Mon, 11 Jan 2021 11:40:09 -0600 Subject: [PATCH 1036/2016] chore: release 2.6.2 (#429) Co-authored-by: release-please[bot] <55107282+release-please[bot]@users.noreply.github.com> --- packages/google-cloud-bigquery/CHANGELOG.md | 15 +++++++++++++++ .../google/cloud/bigquery/version.py | 2 +- 2 files changed, 16 insertions(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/CHANGELOG.md b/packages/google-cloud-bigquery/CHANGELOG.md index d01f62ff64eb..4d58072e85b0 100644 --- a/packages/google-cloud-bigquery/CHANGELOG.md +++ b/packages/google-cloud-bigquery/CHANGELOG.md @@ -4,6 +4,21 @@ [1]: https://pypi.org/project/google-cloud-bigquery/#history +### [2.6.2](https://www.github.com/googleapis/python-bigquery/compare/v2.6.1...v2.6.2) (2021-01-11) + + +### Bug Fixes + +* add minimum timeout to getQueryResults API requests ([#444](https://www.github.com/googleapis/python-bigquery/issues/444)) ([015a73e](https://www.github.com/googleapis/python-bigquery/commit/015a73e1839e3427408ef6e0f879717d9ddbdb61)) +* use debug logging level for OpenTelemetry message ([#442](https://www.github.com/googleapis/python-bigquery/issues/442)) ([7ea6b7c](https://www.github.com/googleapis/python-bigquery/commit/7ea6b7c2469d2415192cfdacc379e38e49d24775)) + + +### Documentation + +* add GEOGRAPHY data type code samples ([#428](https://www.github.com/googleapis/python-bigquery/issues/428)) ([dbc68b3](https://www.github.com/googleapis/python-bigquery/commit/dbc68b3d1f325f80d24a2da5f028b0f653fb0317)) +* fix Shapely import in GEOGRAPHY sample ([#431](https://www.github.com/googleapis/python-bigquery/issues/431)) ([96a1c5b](https://www.github.com/googleapis/python-bigquery/commit/96a1c5b3c72855ba6ae8c88dfd0cdb02d2faf909)) +* move and refresh view samples ([#420](https://www.github.com/googleapis/python-bigquery/issues/420)) ([079b6a1](https://www.github.com/googleapis/python-bigquery/commit/079b6a162f6929bf801366d92f8daeb3318426c4)) + ### [2.6.1](https://www.github.com/googleapis/python-bigquery/compare/v2.6.0...v2.6.1) (2020-12-09) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/version.py b/packages/google-cloud-bigquery/google/cloud/bigquery/version.py index 410cd066ed8d..9aaeb8bc4c7d 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/version.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/version.py @@ -12,4 +12,4 @@ # See the License for the specific language governing permissions and # limitations under the License. -__version__ = "2.6.1" +__version__ = "2.6.2" From 992d2369c7318babfd99c993187906927fdd5136 Mon Sep 17 00:00:00 2001 From: WhiteSource Renovate Date: Mon, 11 Jan 2021 22:51:55 +0100 Subject: [PATCH 1037/2016] chore(deps): update dependency google-cloud-bigquery to v2.6.2 (#466) --- .../google-cloud-bigquery/samples/geography/requirements.txt | 2 +- .../google-cloud-bigquery/samples/snippets/requirements.txt | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/packages/google-cloud-bigquery/samples/geography/requirements.txt b/packages/google-cloud-bigquery/samples/geography/requirements.txt index 3ea0e6e06914..338cf2e894d2 100644 --- a/packages/google-cloud-bigquery/samples/geography/requirements.txt +++ b/packages/google-cloud-bigquery/samples/geography/requirements.txt @@ -1,3 +1,3 @@ geojson==2.5.0 -google-cloud-bigquery==2.6.1 +google-cloud-bigquery==2.6.2 Shapely==1.7.1 diff --git a/packages/google-cloud-bigquery/samples/snippets/requirements.txt b/packages/google-cloud-bigquery/samples/snippets/requirements.txt index 208eb4526658..003dc6fb4fc4 100644 --- a/packages/google-cloud-bigquery/samples/snippets/requirements.txt +++ b/packages/google-cloud-bigquery/samples/snippets/requirements.txt @@ -1,4 +1,4 @@ -google-cloud-bigquery==2.6.1 +google-cloud-bigquery==2.6.2 google-cloud-bigquery-storage==2.1.0 google-auth-oauthlib==0.4.2 grpcio==1.34.0 From 8501884671bd100710f3cd276e50e521b6a64fa6 Mon Sep 17 00:00:00 2001 From: Carlos de la Guardia Date: Wed, 13 Jan 2021 09:55:33 -0600 Subject: [PATCH 1038/2016] feature: raise error for unknown properties in job config (#446) * feature: warn about unknown properties in job config * fix: raise error instead of warning * fix: use hasattr instead of __dict__ * fix bad merge * fix system test that sets wrong property Co-authored-by: Tim Swast --- .../google/cloud/bigquery/job/base.py | 8 ++++++++ packages/google-cloud-bigquery/tests/system.py | 6 ++---- .../google-cloud-bigquery/tests/unit/job/test_base.py | 7 +++++++ 3 files changed, 17 insertions(+), 4 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/job/base.py b/packages/google-cloud-bigquery/google/cloud/bigquery/job/base.py index 3c601f072592..930b71e8ae41 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/job/base.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/job/base.py @@ -659,6 +659,14 @@ def __init__(self, job_type, **kwargs): for prop, val in kwargs.items(): setattr(self, prop, val) + def __setattr__(self, name, value): + """Override to be able to raise error if an unknown property is being set""" + if not name.startswith("_") and not hasattr(type(self), name): + raise AttributeError( + "Property {} is unknown for {}.".format(name, type(self)) + ) + super(_JobConfig, self).__setattr__(name, value) + @property def labels(self): """Dict[str, str]: Labels for the job. diff --git a/packages/google-cloud-bigquery/tests/system.py b/packages/google-cloud-bigquery/tests/system.py index 102c8f78d618..447f66b1a95d 100644 --- a/packages/google-cloud-bigquery/tests/system.py +++ b/packages/google-cloud-bigquery/tests/system.py @@ -76,7 +76,7 @@ from google.cloud.bigquery.dataset import DatasetReference from google.cloud.bigquery.table import Table from google.cloud._helpers import UTC -from google.cloud.bigquery import dbapi +from google.cloud.bigquery import dbapi, enums from google.cloud import storage from test_utils.retry import RetryErrors @@ -1789,10 +1789,8 @@ def test_query_w_wrong_config(self): rows = list(Config.CLIENT.query("SELECT 1;").result()) assert rows[0][0] == 1 - project = Config.CLIENT.project - dataset_ref = bigquery.DatasetReference(project, "dset") bad_config = LoadJobConfig() - bad_config.destination = dataset_ref.table("tbl") + bad_config.source_format = enums.SourceFormat.CSV with self.assertRaises(Exception): Config.CLIENT.query(good_query, job_config=bad_config).result() diff --git a/packages/google-cloud-bigquery/tests/unit/job/test_base.py b/packages/google-cloud-bigquery/tests/unit/job/test_base.py index 478e30e6f1ff..610ad2875d95 100644 --- a/packages/google-cloud-bigquery/tests/unit/job/test_base.py +++ b/packages/google-cloud-bigquery/tests/unit/job/test_base.py @@ -19,6 +19,7 @@ from google.api_core import exceptions import google.api_core.retry import mock +import pytest from .helpers import _make_client from .helpers import _make_connection @@ -1021,6 +1022,12 @@ def test_ctor(self): self.assertEqual(job_config._job_type, self.JOB_TYPE) self.assertEqual(job_config._properties, {self.JOB_TYPE: {}}) + def test_ctor_with_unknown_property_raises_error(self): + error_text = "Property wrong_name is unknown for" + with pytest.raises(AttributeError, match=error_text): + config = self._make_one() + config.wrong_name = None + def test_fill_from_default(self): from google.cloud.bigquery import QueryJobConfig From 5b5682f025c0eb811f3ba11522084c48926ea6b3 Mon Sep 17 00:00:00 2001 From: WhiteSource Renovate Date: Wed, 13 Jan 2021 23:56:25 +0100 Subject: [PATCH 1039/2016] chore(deps): update dependency grpcio to v1.34.1 (#470) --- .../google-cloud-bigquery/samples/snippets/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/samples/snippets/requirements.txt b/packages/google-cloud-bigquery/samples/snippets/requirements.txt index 003dc6fb4fc4..de882844bb22 100644 --- a/packages/google-cloud-bigquery/samples/snippets/requirements.txt +++ b/packages/google-cloud-bigquery/samples/snippets/requirements.txt @@ -1,7 +1,7 @@ google-cloud-bigquery==2.6.2 google-cloud-bigquery-storage==2.1.0 google-auth-oauthlib==0.4.2 -grpcio==1.34.0 +grpcio==1.34.1 ipython==7.16.1; python_version < '3.7' ipython==7.17.0; python_version >= '3.7' matplotlib==3.3.3 From 81b44dd112b9d8d4cb207df7dc1d44372dc1d4b4 Mon Sep 17 00:00:00 2001 From: Peter Lamut Date: Thu, 14 Jan 2021 16:59:26 +0100 Subject: [PATCH 1040/2016] fix: reading the labels attribute on Job instances (#471) --- .../google/cloud/bigquery/job/base.py | 7 +++---- packages/google-cloud-bigquery/tests/system.py | 17 +++++++++++++++++ .../tests/unit/job/test_base.py | 2 +- 3 files changed, 21 insertions(+), 5 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/job/base.py b/packages/google-cloud-bigquery/google/cloud/bigquery/job/base.py index 930b71e8ae41..5ba01aa673d9 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/job/base.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/job/base.py @@ -233,7 +233,7 @@ def path(self): @property def labels(self): """Dict[str, str]: Labels for the job.""" - return self._properties.setdefault("labels", {}) + return self._properties.setdefault("configuration", {}).setdefault("labels", {}) @property def etag(self): @@ -671,9 +671,8 @@ def __setattr__(self, name, value): def labels(self): """Dict[str, str]: Labels for the job. - This method always returns a dict. To change a job's labels, - modify the dict, then call ``Client.update_job``. To delete a - label, set its value to :data:`None` before updating. + This method always returns a dict. Once a job has been created on the + server, its labels cannot be modified anymore. Raises: ValueError: If ``value`` type is invalid. diff --git a/packages/google-cloud-bigquery/tests/system.py b/packages/google-cloud-bigquery/tests/system.py index 447f66b1a95d..0fa5bc41e6fa 100644 --- a/packages/google-cloud-bigquery/tests/system.py +++ b/packages/google-cloud-bigquery/tests/system.py @@ -1667,6 +1667,23 @@ def test_job_cancel(self): # raise an error, and that the job completed (in the `retry()` # above). + def test_job_labels(self): + DATASET_ID = _make_dataset_id("job_cancel") + JOB_ID_PREFIX = "fetch_" + DATASET_ID + QUERY = "SELECT 1 as one" + + self.temp_dataset(DATASET_ID) + + job_config = bigquery.QueryJobConfig( + labels={"custom_label": "label_value", "another_label": "foo123"} + ) + job = Config.CLIENT.query( + QUERY, job_id_prefix=JOB_ID_PREFIX, job_config=job_config + ) + + expected_labels = {"custom_label": "label_value", "another_label": "foo123"} + self.assertEqual(job.labels, expected_labels) + def test_get_failed_job(self): # issue 4246 from google.api_core.exceptions import BadRequest diff --git a/packages/google-cloud-bigquery/tests/unit/job/test_base.py b/packages/google-cloud-bigquery/tests/unit/job/test_base.py index 610ad2875d95..44bbc2c773d3 100644 --- a/packages/google-cloud-bigquery/tests/unit/job/test_base.py +++ b/packages/google-cloud-bigquery/tests/unit/job/test_base.py @@ -251,7 +251,7 @@ def test_labels_hit(self): labels = {"foo": "bar"} client = _make_client(project=self.PROJECT) job = self._make_one(self.JOB_ID, client) - job._properties["labels"] = labels + job._properties.setdefault("configuration", {})["labels"] = labels self.assertEqual(job.labels, labels) def test_etag(self): From d78cf4a3096197ee2777b97136673b699b35b812 Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Wed, 20 Jan 2021 17:04:02 -0600 Subject: [PATCH 1041/2016] refactor: move system tests into `tests/system` directory (#475) This aligns more closely with the templates from synthtool. Thank you for opening a Pull Request! Before submitting your PR, there are a few things you can do to make sure it goes smoothly: - [x] Make sure to open an issue as a [bug/issue](https://github.com/googleapis/python-bigquery/issues/new/choose) before writing your code! That way we can discuss the change, evaluate designs, and agree on the general idea - [x] Ensure the tests and linter pass - [x] Code coverage does not decrease (if any source code was changed) - [x] Appropriate docs were updated (if necessary) Towards #366 --- packages/google-cloud-bigquery/noxfile.py | 15 ++-- .../tests/system/__init__.py | 13 +++ .../{system.py => system/test_client.py} | 81 ++---------------- .../tests/system/test_magics.py | 83 +++++++++++++++++++ 4 files changed, 111 insertions(+), 81 deletions(-) create mode 100644 packages/google-cloud-bigquery/tests/system/__init__.py rename packages/google-cloud-bigquery/tests/{system.py => system/test_client.py} (97%) create mode 100644 packages/google-cloud-bigquery/tests/system/test_magics.py diff --git a/packages/google-cloud-bigquery/noxfile.py b/packages/google-cloud-bigquery/noxfile.py index f3326d01bade..e6a739d1e2e9 100644 --- a/packages/google-cloud-bigquery/noxfile.py +++ b/packages/google-cloud-bigquery/noxfile.py @@ -115,9 +115,7 @@ def system(session): session.install("ipython", "-c", constraints_path) # Run py.test against the system tests. - session.run( - "py.test", "--quiet", os.path.join("tests", "system.py"), *session.posargs - ) + session.run("py.test", "--quiet", os.path.join("tests", "system"), *session.posargs) @nox.session(python=["3.8"]) @@ -181,12 +179,14 @@ def prerelease_deps(session): ) session.install("--pre", "grpcio", "pandas") session.install( + "freezegun", + "google-cloud-storage", + "google-cloud-testutils", + "IPython", "mock", + "psutil", "pytest", - "google-cloud-testutils", "pytest-cov", - "freezegun", - "IPython", ) session.install("-e", ".[all]") @@ -196,7 +196,8 @@ def prerelease_deps(session): session.run("python", "-c", "import pyarrow; print(pyarrow.__version__)") # Run all tests, except a few samples tests which require extra dependencies. - session.run("py.test", "tests") + session.run("py.test", "tests/unit") + session.run("py.test", "tests/system") session.run("py.test", "samples/tests") diff --git a/packages/google-cloud-bigquery/tests/system/__init__.py b/packages/google-cloud-bigquery/tests/system/__init__.py new file mode 100644 index 000000000000..4fbd93bb2ca4 --- /dev/null +++ b/packages/google-cloud-bigquery/tests/system/__init__.py @@ -0,0 +1,13 @@ +# Copyright 2021 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/packages/google-cloud-bigquery/tests/system.py b/packages/google-cloud-bigquery/tests/system/test_client.py similarity index 97% rename from packages/google-cloud-bigquery/tests/system.py rename to packages/google-cloud-bigquery/tests/system/test_client.py index 0fa5bc41e6fa..aa1a03160b35 100644 --- a/packages/google-cloud-bigquery/tests/system.py +++ b/packages/google-cloud-bigquery/tests/system/test_client.py @@ -22,13 +22,12 @@ import json import operator import os +import pathlib import time import unittest import uuid -import re import psutil -import pytest import pytz import pkg_resources @@ -51,13 +50,6 @@ import pyarrow.types except ImportError: # pragma: NO COVER pyarrow = None -try: - import IPython - from IPython.utils import io as ipython_io - from IPython.testing import tools - from IPython.terminal import interactiveshell -except ImportError: # pragma: NO COVER - IPython = None from google.api_core.exceptions import PreconditionFailed from google.api_core.exceptions import BadRequest @@ -86,7 +78,7 @@ JOB_TIMEOUT = 120 # 2 minutes -WHERE = os.path.abspath(os.path.dirname(__file__)) +DATA_PATH = pathlib.Path(__file__).parent.parent / "data" # Common table data used for many tests. ROWS = [ @@ -149,10 +141,10 @@ def _make_dataset_id(prefix): return "%s%s" % (prefix, unique_resource_id()) -def _load_json_schema(filename="data/schema.json"): +def _load_json_schema(filename="schema.json"): from google.cloud.bigquery.table import _parse_schema_resource - json_filename = os.path.join(WHERE, filename) + json_filename = DATA_PATH / filename with open(json_filename, "r") as schema_file: return _parse_schema_resource(json.load(schema_file)) @@ -716,7 +708,7 @@ def test_load_table_from_local_avro_file_then_dump_table(self): table = Table(table_ref) self.to_delete.insert(0, table) - with open(os.path.join(WHERE, "data", "colors.avro"), "rb") as avrof: + with open(DATA_PATH / "colors.avro", "rb") as avrof: config = bigquery.LoadJobConfig() config.source_format = SourceFormat.AVRO config.write_disposition = WriteDisposition.WRITE_TRUNCATE @@ -1347,7 +1339,7 @@ def test_load_avro_from_uri_then_dump_table(self): ("orange", 590), ("red", 650), ] - with open(os.path.join(WHERE, "data", "colors.avro"), "rb") as f: + with open(DATA_PATH / "colors.avro", "rb") as f: GS_URL = self._write_avro_to_storage( "bq_load_test" + unique_resource_id(), "colors.avro", f ) @@ -2707,7 +2699,7 @@ def test_create_table_rows_fetch_nested_schema(self): to_insert = [] # Data is in "JSON Lines" format, see http://jsonlines.org/ - json_filename = os.path.join(WHERE, "data", "characters.jsonl") + json_filename = DATA_PATH / "characters.jsonl" with open(json_filename) as rows_file: for line in rows_file: to_insert.append(json.loads(line)) @@ -2979,47 +2971,6 @@ def temp_dataset(self, dataset_id, location=None): return dataset -@pytest.mark.skipif(pandas is None, reason="Requires `pandas`") -@pytest.mark.skipif(IPython is None, reason="Requires `ipython`") -@pytest.mark.usefixtures("ipython_interactive") -def test_bigquery_magic(): - ip = IPython.get_ipython() - current_process = psutil.Process() - conn_count_start = len(current_process.connections()) - - ip.extension_manager.load_extension("google.cloud.bigquery") - sql = """ - SELECT - CONCAT( - 'https://stackoverflow.com/questions/', - CAST(id as STRING)) as url, - view_count - FROM `bigquery-public-data.stackoverflow.posts_questions` - WHERE tags like '%google-bigquery%' - ORDER BY view_count DESC - LIMIT 10 - """ - with ipython_io.capture_output() as captured: - result = ip.run_cell_magic("bigquery", "--use_rest_api", sql) - - conn_count_end = len(current_process.connections()) - - lines = re.split("\n|\r", captured.stdout) - # Removes blanks & terminal code (result of display clearing) - updates = list(filter(lambda x: bool(x) and x != "\x1b[2K", lines)) - assert re.match("Executing query with job ID: .*", updates[0]) - assert all(re.match("Query executing: .*s", line) for line in updates[1:-1]) - assert re.match("Query complete after .*s", updates[-1]) - assert isinstance(result, pandas.DataFrame) - assert len(result) == 10 # verify row count - assert list(result) == ["url", "view_count"] # verify column names - - # NOTE: For some reason, the number of open sockets is sometimes one *less* - # than expected when running system tests on Kokoro, thus using the <= assertion. - # That's still fine, however, since the sockets are apparently not leaked. - assert conn_count_end <= conn_count_start # system resources are released - - def _job_done(instance): return instance.state.lower() == "done" @@ -3039,21 +2990,3 @@ def _table_exists(t): return True except NotFound: return False - - -@pytest.fixture(scope="session") -def ipython(): - config = tools.default_config() - config.TerminalInteractiveShell.simple_prompt = True - shell = interactiveshell.TerminalInteractiveShell.instance(config=config) - return shell - - -@pytest.fixture() -def ipython_interactive(request, ipython): - """Activate IPython's builtin hooks - - for the duration of the test scope. - """ - with ipython.builtin_trap: - yield ipython diff --git a/packages/google-cloud-bigquery/tests/system/test_magics.py b/packages/google-cloud-bigquery/tests/system/test_magics.py new file mode 100644 index 000000000000..78c15cb503d2 --- /dev/null +++ b/packages/google-cloud-bigquery/tests/system/test_magics.py @@ -0,0 +1,83 @@ +# Copyright 2020 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""System tests for Jupyter/IPython connector.""" + +import re + +import pytest +import psutil + + +IPython = pytest.importorskip("IPython") +io = pytest.importorskip("IPython.utils.io") +pandas = pytest.importorskip("pandas") +tools = pytest.importorskip("IPython.testing.tools") +interactiveshell = pytest.importorskip("IPython.terminal.interactiveshell") + + +@pytest.fixture(scope="session") +def ipython(): + config = tools.default_config() + config.TerminalInteractiveShell.simple_prompt = True + shell = interactiveshell.TerminalInteractiveShell.instance(config=config) + return shell + + +@pytest.fixture() +def ipython_interactive(ipython): + """Activate IPython's builtin hooks + + for the duration of the test scope. + """ + with ipython.builtin_trap: + yield ipython + + +def test_bigquery_magic(ipython_interactive): + ip = IPython.get_ipython() + current_process = psutil.Process() + conn_count_start = len(current_process.connections()) + + ip.extension_manager.load_extension("google.cloud.bigquery") + sql = """ + SELECT + CONCAT( + 'https://stackoverflow.com/questions/', + CAST(id as STRING)) as url, + view_count + FROM `bigquery-public-data.stackoverflow.posts_questions` + WHERE tags like '%google-bigquery%' + ORDER BY view_count DESC + LIMIT 10 + """ + with io.capture_output() as captured: + result = ip.run_cell_magic("bigquery", "--use_rest_api", sql) + + conn_count_end = len(current_process.connections()) + + lines = re.split("\n|\r", captured.stdout) + # Removes blanks & terminal code (result of display clearing) + updates = list(filter(lambda x: bool(x) and x != "\x1b[2K", lines)) + assert re.match("Executing query with job ID: .*", updates[0]) + assert all(re.match("Query executing: .*s", line) for line in updates[1:-1]) + assert re.match("Query complete after .*s", updates[-1]) + assert isinstance(result, pandas.DataFrame) + assert len(result) == 10 # verify row count + assert list(result) == ["url", "view_count"] # verify column names + + # NOTE: For some reason, the number of open sockets is sometimes one *less* + # than expected when running system tests on Kokoro, thus using the <= assertion. + # That's still fine, however, since the sockets are apparently not leaked. + assert conn_count_end <= conn_count_start # system resources are released From 075bf06fa5a3d4696d972722b1c4ff4308221065 Mon Sep 17 00:00:00 2001 From: WhiteSource Renovate Date: Fri, 22 Jan 2021 18:46:27 +0100 Subject: [PATCH 1042/2016] chore(deps): update dependency grpcio to v1.35.0 (#477) @tswast confirmed this is ok to merge and that broken docs-presubmit is unrelateed --- .../google-cloud-bigquery/samples/snippets/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/samples/snippets/requirements.txt b/packages/google-cloud-bigquery/samples/snippets/requirements.txt index de882844bb22..603d49a721e7 100644 --- a/packages/google-cloud-bigquery/samples/snippets/requirements.txt +++ b/packages/google-cloud-bigquery/samples/snippets/requirements.txt @@ -1,7 +1,7 @@ google-cloud-bigquery==2.6.2 google-cloud-bigquery-storage==2.1.0 google-auth-oauthlib==0.4.2 -grpcio==1.34.1 +grpcio==1.35.0 ipython==7.16.1; python_version < '3.7' ipython==7.17.0; python_version >= '3.7' matplotlib==3.3.3 From 6d1154efe7c4b55ec037f3d040b4f22fbc58de51 Mon Sep 17 00:00:00 2001 From: Peter Lamut Date: Fri, 22 Jan 2021 23:35:12 +0100 Subject: [PATCH 1043/2016] fix: use explicitly given project over the client's default project for load jobs (#482) * fix: use project parameter if given for load jobs * blacken client tests * Refactor string concatenations in client tests * Silence invalid coverage complaint --- .../google/cloud/bigquery/client.py | 41 ++++- .../tests/unit/test_client.py | 157 +++++++++++++----- 2 files changed, 148 insertions(+), 50 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py index 3541726b83fc..b270075a9aad 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py @@ -2136,11 +2136,11 @@ def load_table_from_file( try: if size is None or size >= _MAX_MULTIPART_SIZE: response = self._do_resumable_upload( - file_obj, job_resource, num_retries, timeout + file_obj, job_resource, num_retries, timeout, project=project ) else: response = self._do_multipart_upload( - file_obj, job_resource, size, num_retries, timeout + file_obj, job_resource, size, num_retries, timeout, project=project ) except resumable_media.InvalidResponse as exc: raise exceptions.from_http_response(exc.response) @@ -2475,7 +2475,9 @@ def load_table_from_json( timeout=timeout, ) - def _do_resumable_upload(self, stream, metadata, num_retries, timeout): + def _do_resumable_upload( + self, stream, metadata, num_retries, timeout, project=None + ): """Perform a resumable upload. Args: @@ -2491,13 +2493,17 @@ def _do_resumable_upload(self, stream, metadata, num_retries, timeout): The number of seconds to wait for the underlying HTTP transport before using ``retry``. + project (Optional[str]): + Project ID of the project of where to run the upload. Defaults + to the client's project. + Returns: requests.Response: The "200 OK" response object returned after the final chunk is uploaded. """ upload, transport = self._initiate_resumable_upload( - stream, metadata, num_retries, timeout + stream, metadata, num_retries, timeout, project=project ) while not upload.finished: @@ -2505,7 +2511,9 @@ def _do_resumable_upload(self, stream, metadata, num_retries, timeout): return response - def _initiate_resumable_upload(self, stream, metadata, num_retries, timeout): + def _initiate_resumable_upload( + self, stream, metadata, num_retries, timeout, project=None + ): """Initiate a resumable upload. Args: @@ -2521,6 +2529,10 @@ def _initiate_resumable_upload(self, stream, metadata, num_retries, timeout): The number of seconds to wait for the underlying HTTP transport before using ``retry``. + project (Optional[str]): + Project ID of the project of where to run the upload. Defaults + to the client's project. + Returns: Tuple: Pair of @@ -2532,7 +2544,11 @@ def _initiate_resumable_upload(self, stream, metadata, num_retries, timeout): chunk_size = _DEFAULT_CHUNKSIZE transport = self._http headers = _get_upload_headers(self._connection.user_agent) - upload_url = _RESUMABLE_URL_TEMPLATE.format(project=self.project) + + if project is None: + project = self.project + upload_url = _RESUMABLE_URL_TEMPLATE.format(project=project) + # TODO: modify ResumableUpload to take a retry.Retry object # that it can use for the initial RPC. upload = ResumableUpload(upload_url, chunk_size, headers=headers) @@ -2553,7 +2569,9 @@ def _initiate_resumable_upload(self, stream, metadata, num_retries, timeout): return upload, transport - def _do_multipart_upload(self, stream, metadata, size, num_retries, timeout): + def _do_multipart_upload( + self, stream, metadata, size, num_retries, timeout, project=None + ): """Perform a multipart upload. Args: @@ -2574,6 +2592,10 @@ def _do_multipart_upload(self, stream, metadata, size, num_retries, timeout): The number of seconds to wait for the underlying HTTP transport before using ``retry``. + project (Optional[str]): + Project ID of the project of where to run the upload. Defaults + to the client's project. + Returns: requests.Response: The "200 OK" response object returned after the multipart @@ -2591,7 +2613,10 @@ def _do_multipart_upload(self, stream, metadata, size, num_retries, timeout): headers = _get_upload_headers(self._connection.user_agent) - upload_url = _MULTIPART_URL_TEMPLATE.format(project=self.project) + if project is None: + project = self.project + + upload_url = _MULTIPART_URL_TEMPLATE.format(project=project) upload = MultipartUpload(upload_url, headers=headers) if num_retries is not None: diff --git a/packages/google-cloud-bigquery/tests/unit/test_client.py b/packages/google-cloud-bigquery/tests/unit/test_client.py index bf183b5a4e60..625256e6e740 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_client.py +++ b/packages/google-cloud-bigquery/tests/unit/test_client.py @@ -4455,9 +4455,8 @@ def _initiate_resumable_upload_helper(self, num_retries=None): # Check the returned values. self.assertIsInstance(upload, ResumableUpload) upload_url = ( - "https://bigquery.googleapis.com/upload/bigquery/v2/projects/" - + self.PROJECT - + "/jobs?uploadType=resumable" + f"https://bigquery.googleapis.com/upload/bigquery/v2/projects/{self.PROJECT}" + "/jobs?uploadType=resumable" ) self.assertEqual(upload.upload_url, upload_url) expected_headers = _get_upload_headers(conn.user_agent) @@ -4498,7 +4497,9 @@ def test__initiate_resumable_upload(self): def test__initiate_resumable_upload_with_retry(self): self._initiate_resumable_upload_helper(num_retries=11) - def _do_multipart_upload_success_helper(self, get_boundary, num_retries=None): + def _do_multipart_upload_success_helper( + self, get_boundary, num_retries=None, project=None + ): from google.cloud.bigquery.client import _get_upload_headers from google.cloud.bigquery.job import LoadJob from google.cloud.bigquery.job import LoadJobConfig @@ -4508,6 +4509,9 @@ def _do_multipart_upload_success_helper(self, get_boundary, num_retries=None): client = self._make_one(project=self.PROJECT, _http=fake_transport) conn = client._connection = make_connection() + if project is None: + project = self.PROJECT + # Create some mock arguments. data = b"Bzzzz-zap \x00\x01\xf4" stream = io.BytesIO(data) @@ -4516,8 +4520,9 @@ def _do_multipart_upload_success_helper(self, get_boundary, num_retries=None): job = LoadJob(None, None, self.TABLE_REF, client, job_config=config) metadata = job.to_api_repr() size = len(data) + response = client._do_multipart_upload( - stream, metadata, size, num_retries, None + stream, metadata, size, num_retries, None, project=project ) # Check the mocks and the returned value. @@ -4526,35 +4531,39 @@ def _do_multipart_upload_success_helper(self, get_boundary, num_retries=None): get_boundary.assert_called_once_with() upload_url = ( - "https://bigquery.googleapis.com/upload/bigquery/v2/projects/" - + self.PROJECT - + "/jobs?uploadType=multipart" + f"https://bigquery.googleapis.com/upload/bigquery/v2/projects/{project}" + "/jobs?uploadType=multipart" ) payload = ( b"--==0==\r\n" - + b"content-type: application/json; charset=UTF-8\r\n\r\n" - + json.dumps(metadata).encode("utf-8") - + b"\r\n" - + b"--==0==\r\n" - + b"content-type: */*\r\n\r\n" - + data - + b"\r\n" - + b"--==0==--" - ) + b"content-type: application/json; charset=UTF-8\r\n\r\n" + b"%(json_metadata)s" + b"\r\n" + b"--==0==\r\n" + b"content-type: */*\r\n\r\n" + b"%(data)s" + b"\r\n" + b"--==0==--" + ) % {b"json_metadata": json.dumps(metadata).encode("utf-8"), b"data": data} + headers = _get_upload_headers(conn.user_agent) headers["content-type"] = b'multipart/related; boundary="==0=="' fake_transport.request.assert_called_once_with( "POST", upload_url, data=payload, headers=headers, timeout=mock.ANY ) - @mock.patch(u"google.resumable_media._upload.get_boundary", return_value=b"==0==") + @mock.patch("google.resumable_media._upload.get_boundary", return_value=b"==0==") def test__do_multipart_upload(self, get_boundary): self._do_multipart_upload_success_helper(get_boundary) - @mock.patch(u"google.resumable_media._upload.get_boundary", return_value=b"==0==") + @mock.patch("google.resumable_media._upload.get_boundary", return_value=b"==0==") def test__do_multipart_upload_with_retry(self, get_boundary): self._do_multipart_upload_success_helper(get_boundary, num_retries=8) + @mock.patch("google.resumable_media._upload.get_boundary", return_value=b"==0==") + def test__do_multipart_upload_with_custom_project(self, get_boundary): + self._do_multipart_upload_success_helper(get_boundary, project="custom-project") + def test_copy_table(self): from google.cloud.bigquery.job import CopyJob @@ -6364,10 +6373,10 @@ def test_insert_rows_from_dataframe(self): dataframe = pandas.DataFrame( [ - {"name": u"Little One", "age": 10, "adult": False}, - {"name": u"Young Gun", "age": 20, "adult": True}, - {"name": u"Dad", "age": 30, "adult": True}, - {"name": u"Stranger", "age": 40, "adult": True}, + {"name": "Little One", "age": 10, "adult": False}, + {"name": "Young Gun", "age": 20, "adult": True}, + {"name": "Dad", "age": 30, "adult": True}, + {"name": "Stranger", "age": 40, "adult": True}, ] ) @@ -6560,8 +6569,8 @@ def test_insert_rows_from_dataframe_w_explicit_none_insert_ids(self): dataframe = pandas.DataFrame( [ - {"name": u"Little One", "adult": False}, - {"name": u"Young Gun", "adult": True}, + {"name": "Little One", "adult": False}, + {"name": "Young Gun", "adult": True}, ] ) @@ -7230,17 +7239,18 @@ class TestClientUpload(object): # `pytest`-style tests rather than `unittest`-style. from google.cloud.bigquery.job import SourceFormat - TABLE_REF = DatasetReference("project_id", "test_dataset").table("test_table") + PROJECT = "project_id" + TABLE_REF = DatasetReference(PROJECT, "test_dataset").table("test_table") LOCATION = "us-central" - @staticmethod - def _make_client(transport=None, location=None): + @classmethod + def _make_client(cls, transport=None, location=None): from google.cloud.bigquery import _http from google.cloud.bigquery import client cl = client.Client( - project="project_id", + project=cls.PROJECT, credentials=_make_credentials(), _http=transport, location=location, @@ -7274,12 +7284,12 @@ def _make_do_upload_patch(cls, client, method, resource={}, side_effect=None): return mock.patch.object(client, method, side_effect=side_effect, autospec=True) EXPECTED_CONFIGURATION = { - "jobReference": {"projectId": "project_id", "jobId": "job_id"}, + "jobReference": {"projectId": PROJECT, "jobId": "job_id"}, "configuration": { "load": { "sourceFormat": SourceFormat.CSV, "destinationTable": { - "projectId": "project_id", + "projectId": PROJECT, "datasetId": "test_dataset", "tableId": "test_table", }, @@ -7325,7 +7335,11 @@ def test_load_table_from_file_resumable(self): ) do_upload.assert_called_once_with( - file_obj, self.EXPECTED_CONFIGURATION, _DEFAULT_NUM_RETRIES, None + file_obj, + self.EXPECTED_CONFIGURATION, + _DEFAULT_NUM_RETRIES, + None, + project=self.EXPECTED_CONFIGURATION["jobReference"]["projectId"], ) # the original config object should not have been modified @@ -7354,7 +7368,11 @@ def test_load_table_from_file_w_explicit_project(self): expected_resource["jobReference"]["location"] = self.LOCATION expected_resource["jobReference"]["projectId"] = "other-project" do_upload.assert_called_once_with( - file_obj, expected_resource, _DEFAULT_NUM_RETRIES, None + file_obj, + expected_resource, + _DEFAULT_NUM_RETRIES, + None, + project="other-project", ) def test_load_table_from_file_w_client_location(self): @@ -7384,7 +7402,11 @@ def test_load_table_from_file_w_client_location(self): expected_resource["jobReference"]["location"] = self.LOCATION expected_resource["jobReference"]["projectId"] = "other-project" do_upload.assert_called_once_with( - file_obj, expected_resource, _DEFAULT_NUM_RETRIES, None + file_obj, + expected_resource, + _DEFAULT_NUM_RETRIES, + None, + project="other-project", ) def test_load_table_from_file_resumable_metadata(self): @@ -7409,7 +7431,7 @@ def test_load_table_from_file_resumable_metadata(self): config.null_marker = r"\N" expected_config = { - "jobReference": {"projectId": "project_id", "jobId": "job_id"}, + "jobReference": {"projectId": self.PROJECT, "jobId": "job_id"}, "configuration": { "load": { "destinationTable": { @@ -7442,7 +7464,11 @@ def test_load_table_from_file_resumable_metadata(self): ) do_upload.assert_called_once_with( - file_obj, expected_config, _DEFAULT_NUM_RETRIES, None + file_obj, + expected_config, + _DEFAULT_NUM_RETRIES, + None, + project=self.EXPECTED_CONFIGURATION["jobReference"]["projectId"], ) def test_load_table_from_file_multipart(self): @@ -7471,6 +7497,7 @@ def test_load_table_from_file_multipart(self): file_obj_size, _DEFAULT_NUM_RETRIES, None, + project=self.PROJECT, ) def test_load_table_from_file_with_retries(self): @@ -7491,7 +7518,11 @@ def test_load_table_from_file_with_retries(self): ) do_upload.assert_called_once_with( - file_obj, self.EXPECTED_CONFIGURATION, num_retries, None + file_obj, + self.EXPECTED_CONFIGURATION, + num_retries, + None, + project=self.EXPECTED_CONFIGURATION["jobReference"]["projectId"], ) def test_load_table_from_file_with_rewind(self): @@ -7524,7 +7555,11 @@ def test_load_table_from_file_with_readable_gzip(self): ) do_upload.assert_called_once_with( - gzip_file, self.EXPECTED_CONFIGURATION, _DEFAULT_NUM_RETRIES, None + gzip_file, + self.EXPECTED_CONFIGURATION, + _DEFAULT_NUM_RETRIES, + None, + project=self.EXPECTED_CONFIGURATION["jobReference"]["projectId"], ) def test_load_table_from_file_with_writable_gzip(self): @@ -8169,7 +8204,7 @@ def test_load_table_from_dataframe_w_partial_schema(self): dtype="datetime64[ns]", ).dt.tz_localize(pytz.utc), ), - ("string_col", [u"abc", None, u"def"]), + ("string_col", ["abc", None, "def"]), ("bytes_col", [b"abc", b"def", None]), ] ) @@ -8228,7 +8263,7 @@ def test_load_table_from_dataframe_w_partial_schema_extra_types(self): [ ("int_col", [1, 2, 3]), ("int_as_float_col", [1.0, float("nan"), 3.0]), - ("string_col", [u"abc", None, u"def"]), + ("string_col", ["abc", None, "def"]), ] ) dataframe = pandas.DataFrame(df_data, columns=df_data.keys()) @@ -8263,7 +8298,7 @@ def test_load_table_from_dataframe_w_partial_schema_missing_types(self): client = self._make_client() df_data = collections.OrderedDict( [ - ("string_col", [u"abc", u"def", u"ghi"]), + ("string_col", ["abc", "def", "ghi"]), ("unknown_col", [b"jkl", None, b"mno"]), ] ) @@ -8317,7 +8352,7 @@ def test_load_table_from_dataframe_w_schema_arrow_custom_compression(self): from google.cloud.bigquery.schema import SchemaField client = self._make_client() - records = [{"name": u"Monty", "age": 100}, {"name": u"Python", "age": 60}] + records = [{"name": "Monty", "age": 100}, {"name": "Python", "age": 60}] dataframe = pandas.DataFrame(records) schema = (SchemaField("name", "STRING"), SchemaField("age", "INTEGER")) job_config = job.LoadJobConfig(schema=schema) @@ -8658,6 +8693,44 @@ def test__do_resumable_upload(self): timeout=mock.ANY, ) + def test__do_resumable_upload_custom_project(self): + file_obj = self._make_file_obj() + file_obj_len = len(file_obj.getvalue()) + transport = self._make_transport( + self._make_resumable_upload_responses(file_obj_len) + ) + client = self._make_client(transport) + + result = client._do_resumable_upload( + file_obj, self.EXPECTED_CONFIGURATION, None, None, project="custom-project", + ) + + content = result.content.decode("utf-8") + assert json.loads(content) == {"size": file_obj_len} + + # Verify that configuration data was passed in with the initial + # request. + transport.request.assert_any_call( + "POST", + mock.ANY, + data=json.dumps(self.EXPECTED_CONFIGURATION).encode("utf-8"), + headers=mock.ANY, + timeout=mock.ANY, + ) + + # Check the project ID used in the call to initiate resumable upload. + initiation_url = next( + ( + call.args[1] + for call in transport.request.call_args_list + if call.args[0] == "POST" and "uploadType=resumable" in call.args[1] + ), + None, + ) # pragma: NO COVER + + assert initiation_url is not None + assert "projects/custom-project" in initiation_url + def test__do_multipart_upload(self): transport = self._make_transport([self._make_response(http.client.OK)]) client = self._make_client(transport) From e31b7179e8a6f9e677b47f0adf68f93b06d260b7 Mon Sep 17 00:00:00 2001 From: Peter Lamut Date: Mon, 25 Jan 2021 18:04:04 +0100 Subject: [PATCH 1044/2016] fix: invalid conversion of timezone-aware datetime values to JSON (#480) * fix: correctly convert timezone-aware datetimes * blacken * Remove python-dateutil test dependency * Remove unused dst() methods --- .../google/cloud/bigquery/_helpers.py | 8 ++ .../tests/unit/test__helpers.py | 77 +++++++++++-------- 2 files changed, 51 insertions(+), 34 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py b/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py index 6b66a3020aca..daa14b92a460 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py @@ -315,6 +315,10 @@ def _timestamp_to_json_parameter(value): def _timestamp_to_json_row(value): """Coerce 'value' to an JSON-compatible representation.""" if isinstance(value, datetime.datetime): + # For naive datetime objects UTC timezone is assumed, thus we format + # those to string directly without conversion. + if value.tzinfo is not None: + value = value.astimezone(UTC) value = value.strftime(_RFC3339_MICROS) return value @@ -322,6 +326,10 @@ def _timestamp_to_json_row(value): def _datetime_to_json(value): """Coerce 'value' to an JSON-compatible representation.""" if isinstance(value, datetime.datetime): + # For naive datetime objects UTC timezone is assumed, thus we format + # those to string directly without conversion. + if value.tzinfo is not None: + value = value.astimezone(UTC) value = value.strftime(_RFC3339_MICROS_NO_ZULU) return value diff --git a/packages/google-cloud-bigquery/tests/unit/test__helpers.py b/packages/google-cloud-bigquery/tests/unit/test__helpers.py index 8948d41522d7..0fdf1142f110 100644 --- a/packages/google-cloud-bigquery/tests/unit/test__helpers.py +++ b/packages/google-cloud-bigquery/tests/unit/test__helpers.py @@ -420,13 +420,13 @@ def _call_fut(self, row, schema): def test_w_single_scalar_column(self): # SELECT 1 AS col col = _Field("REQUIRED", "col", "INTEGER") - row = {u"f": [{u"v": u"1"}]} + row = {"f": [{"v": "1"}]} self.assertEqual(self._call_fut(row, schema=[col]), (1,)) def test_w_single_scalar_geography_column(self): # SELECT 1 AS col col = _Field("REQUIRED", "geo", "GEOGRAPHY") - row = {u"f": [{u"v": u"POINT(1, 2)"}]} + row = {"f": [{"v": "POINT(1, 2)"}]} self.assertEqual(self._call_fut(row, schema=[col]), ("POINT(1, 2)",)) def test_w_single_struct_column(self): @@ -434,13 +434,13 @@ def test_w_single_struct_column(self): sub_1 = _Field("REQUIRED", "sub_1", "INTEGER") sub_2 = _Field("REQUIRED", "sub_2", "INTEGER") col = _Field("REQUIRED", "col", "RECORD", fields=[sub_1, sub_2]) - row = {u"f": [{u"v": {u"f": [{u"v": u"1"}, {u"v": u"2"}]}}]} + row = {"f": [{"v": {"f": [{"v": "1"}, {"v": "2"}]}}]} self.assertEqual(self._call_fut(row, schema=[col]), ({"sub_1": 1, "sub_2": 2},)) def test_w_single_array_column(self): # SELECT [1, 2, 3] as col col = _Field("REPEATED", "col", "INTEGER") - row = {u"f": [{u"v": [{u"v": u"1"}, {u"v": u"2"}, {u"v": u"3"}]}]} + row = {"f": [{"v": [{"v": "1"}, {"v": "2"}, {"v": "3"}]}]} self.assertEqual(self._call_fut(row, schema=[col]), ([1, 2, 3],)) def test_w_struct_w_nested_array_column(self): @@ -450,13 +450,13 @@ def test_w_struct_w_nested_array_column(self): third = _Field("REPEATED", "third", "INTEGER") col = _Field("REQUIRED", "col", "RECORD", fields=[first, second, third]) row = { - u"f": [ + "f": [ { - u"v": { - u"f": [ - {u"v": [{u"v": u"1"}, {u"v": u"2"}]}, - {u"v": u"3"}, - {u"v": [{u"v": u"4"}, {u"v": u"5"}]}, + "v": { + "f": [ + {"v": [{"v": "1"}, {"v": "2"}]}, + {"v": "3"}, + {"v": [{"v": "4"}, {"v": "5"}]}, ] } } @@ -464,7 +464,7 @@ def test_w_struct_w_nested_array_column(self): } self.assertEqual( self._call_fut(row, schema=[col]), - ({u"first": [1, 2], u"second": 3, u"third": [4, 5]},), + ({"first": [1, 2], "second": 3, "third": [4, 5]},), ) def test_w_array_of_struct(self): @@ -474,11 +474,11 @@ def test_w_array_of_struct(self): third = _Field("REQUIRED", "third", "INTEGER") col = _Field("REPEATED", "col", "RECORD", fields=[first, second, third]) row = { - u"f": [ + "f": [ { - u"v": [ - {u"v": {u"f": [{u"v": u"1"}, {u"v": u"2"}, {u"v": u"3"}]}}, - {u"v": {u"f": [{u"v": u"4"}, {u"v": u"5"}, {u"v": u"6"}]}}, + "v": [ + {"v": {"f": [{"v": "1"}, {"v": "2"}, {"v": "3"}]}}, + {"v": {"f": [{"v": "4"}, {"v": "5"}, {"v": "6"}]}}, ] } ] @@ -487,8 +487,8 @@ def test_w_array_of_struct(self): self._call_fut(row, schema=[col]), ( [ - {u"first": 1, u"second": 2, u"third": 3}, - {u"first": 4, u"second": 5, u"third": 6}, + {"first": 1, "second": 2, "third": 3}, + {"first": 4, "second": 5, "third": 6}, ], ), ) @@ -499,32 +499,25 @@ def test_w_array_of_struct_w_array(self): second = _Field("REQUIRED", "second", "INTEGER") col = _Field("REPEATED", "col", "RECORD", fields=[first, second]) row = { - u"f": [ + "f": [ { - u"v": [ - { - u"v": { - u"f": [ - {u"v": [{u"v": u"1"}, {u"v": u"2"}, {u"v": u"3"}]}, - {u"v": u"4"}, - ] - } - }, + "v": [ { - u"v": { - u"f": [ - {u"v": [{u"v": u"5"}, {u"v": u"6"}]}, - {u"v": u"7"}, + "v": { + "f": [ + {"v": [{"v": "1"}, {"v": "2"}, {"v": "3"}]}, + {"v": "4"}, ] } }, + {"v": {"f": [{"v": [{"v": "5"}, {"v": "6"}]}, {"v": "7"}]}}, ] } ] } self.assertEqual( self._call_fut(row, schema=[col]), - ([{u"first": [1, 2, 3], u"second": 4}, {u"first": [5, 6], u"second": 7}],), + ([{"first": [1, 2, 3], "second": 4}, {"first": [5, 6], "second": 7}],), ) @@ -673,7 +666,7 @@ def test_w_non_bytes(self): def test_w_bytes(self): source = b"source" - expected = u"c291cmNl" + expected = "c291cmNl" converted = self._call_fut(source) self.assertEqual(converted, expected) @@ -726,7 +719,7 @@ def test_w_string(self): ZULU = "2016-12-20 15:58:27.339328+00:00" self.assertEqual(self._call_fut(ZULU), ZULU) - def test_w_datetime(self): + def test_w_datetime_no_zone(self): when = datetime.datetime(2016, 12, 20, 15, 58, 27, 339328) self.assertEqual(self._call_fut(when), "2016-12-20T15:58:27.339328Z") @@ -736,6 +729,14 @@ def test_w_datetime_w_utc_zone(self): when = datetime.datetime(2020, 11, 17, 1, 6, 52, 353795, tzinfo=UTC) self.assertEqual(self._call_fut(when), "2020-11-17T01:06:52.353795Z") + def test_w_datetime_w_non_utc_zone(self): + class EstZone(datetime.tzinfo): + def utcoffset(self, _): + return datetime.timedelta(minutes=-300) + + when = datetime.datetime(2020, 11, 17, 1, 6, 52, 353795, tzinfo=EstZone()) + self.assertEqual(self._call_fut(when), "2020-11-17T06:06:52.353795Z") + class Test_datetime_to_json(unittest.TestCase): def _call_fut(self, value): @@ -753,6 +754,14 @@ def test_w_datetime(self): when = datetime.datetime(2016, 12, 3, 14, 11, 27, 123456, tzinfo=UTC) self.assertEqual(self._call_fut(when), "2016-12-03T14:11:27.123456") + def test_w_datetime_w_non_utc_zone(self): + class EstZone(datetime.tzinfo): + def utcoffset(self, _): + return datetime.timedelta(minutes=-300) + + when = datetime.datetime(2016, 12, 3, 14, 11, 27, 123456, tzinfo=EstZone()) + self.assertEqual(self._call_fut(when), "2016-12-03T19:11:27.123456") + class Test_date_to_json(unittest.TestCase): def _call_fut(self, value): From 9f79073ad3cc556fd1244c705c95dd81fe0cce4e Mon Sep 17 00:00:00 2001 From: WhiteSource Renovate Date: Wed, 27 Jan 2021 21:44:48 +0100 Subject: [PATCH 1045/2016] chore(deps): update dependency google-cloud-bigquery-storage to v2.2.1 (#485) --- .../google-cloud-bigquery/samples/snippets/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/samples/snippets/requirements.txt b/packages/google-cloud-bigquery/samples/snippets/requirements.txt index 603d49a721e7..599b6d52f2e7 100644 --- a/packages/google-cloud-bigquery/samples/snippets/requirements.txt +++ b/packages/google-cloud-bigquery/samples/snippets/requirements.txt @@ -1,5 +1,5 @@ google-cloud-bigquery==2.6.2 -google-cloud-bigquery-storage==2.1.0 +google-cloud-bigquery-storage==2.2.1 google-auth-oauthlib==0.4.2 grpcio==1.35.0 ipython==7.16.1; python_version < '3.7' From ebd810bb4aa40afbe4b170929b3b7af39217794e Mon Sep 17 00:00:00 2001 From: Peter Lamut Date: Wed, 27 Jan 2021 21:54:02 +0100 Subject: [PATCH 1046/2016] deps: declare support for Python 3.9 (#488) Towards #462. With wheels for `pyarrow==3.0.0` released we can now officially support Python 3.9 - well, at least after when we add Python 3.9 checks to Kokoro. PR checklist: - [x] Make sure to open an issue as a [bug/issue](https://github.com/googleapis/python-bigquery/issues/new/choose) before writing your code! That way we can discuss the change, evaluate designs, and agree on the general idea - [x] Ensure the tests and linter pass - [x] Code coverage does not decrease (if any source code was changed) - [x] Appropriate docs were updated (if necessary) --- packages/google-cloud-bigquery/README.rst | 2 +- packages/google-cloud-bigquery/noxfile.py | 22 +++++++++++-------- packages/google-cloud-bigquery/setup.py | 11 ++++------ .../testing/constraints-3.6.txt | 20 +++++------------ 4 files changed, 23 insertions(+), 32 deletions(-) diff --git a/packages/google-cloud-bigquery/README.rst b/packages/google-cloud-bigquery/README.rst index 61192b625ed3..8454cf9c0475 100644 --- a/packages/google-cloud-bigquery/README.rst +++ b/packages/google-cloud-bigquery/README.rst @@ -52,7 +52,7 @@ dependencies. Supported Python Versions ^^^^^^^^^^^^^^^^^^^^^^^^^ -Python >= 3.6, < 3.9 +Python >= 3.6, < 3.10 Unsupported Python Versions ^^^^^^^^^^^^^^^^^^^^^^^^^^^ diff --git a/packages/google-cloud-bigquery/noxfile.py b/packages/google-cloud-bigquery/noxfile.py index e6a739d1e2e9..942525ca933c 100644 --- a/packages/google-cloud-bigquery/noxfile.py +++ b/packages/google-cloud-bigquery/noxfile.py @@ -23,6 +23,10 @@ BLACK_VERSION = "black==19.10b0" BLACK_PATHS = ("docs", "google", "samples", "tests", "noxfile.py", "setup.py") + +DEFAULT_PYTHON_VERSION = "3.8" +SYSTEM_TEST_PYTHON_VERSIONS = ["3.8"] +UNIT_TEST_PYTHON_VERSIONS = ["3.6", "3.7", "3.8", "3.9"] CURRENT_DIRECTORY = pathlib.Path(__file__).parent.absolute() # 'docfx' is excluded since it only needs to run in 'docs-presubmit' @@ -80,13 +84,13 @@ def default(session): ) -@nox.session(python=["3.6", "3.7", "3.8"]) +@nox.session(python=UNIT_TEST_PYTHON_VERSIONS) def unit(session): """Run the unit test suite.""" default(session) -@nox.session(python=["3.8"]) +@nox.session(python=SYSTEM_TEST_PYTHON_VERSIONS) def system(session): """Run the system test suite.""" @@ -118,7 +122,7 @@ def system(session): session.run("py.test", "--quiet", os.path.join("tests", "system"), *session.posargs) -@nox.session(python=["3.8"]) +@nox.session(python=SYSTEM_TEST_PYTHON_VERSIONS) def snippets(session): """Run the snippets test suite.""" @@ -154,7 +158,7 @@ def snippets(session): ) -@nox.session(python="3.8") +@nox.session(python=DEFAULT_PYTHON_VERSION) def cover(session): """Run the final coverage report. @@ -166,7 +170,7 @@ def cover(session): session.run("coverage", "erase") -@nox.session(python="3.8") +@nox.session(python=SYSTEM_TEST_PYTHON_VERSIONS) def prerelease_deps(session): """Run all tests with prerelease versions of dependencies installed. @@ -201,7 +205,7 @@ def prerelease_deps(session): session.run("py.test", "samples/tests") -@nox.session(python="3.8") +@nox.session(python=DEFAULT_PYTHON_VERSION) def lint(session): """Run linters. @@ -218,7 +222,7 @@ def lint(session): session.run("black", "--check", *BLACK_PATHS) -@nox.session(python="3.8") +@nox.session(python=DEFAULT_PYTHON_VERSION) def lint_setup_py(session): """Verify that setup.py is valid (including RST check).""" @@ -239,7 +243,7 @@ def blacken(session): session.run("black", *BLACK_PATHS) -@nox.session(python="3.8") +@nox.session(python=DEFAULT_PYTHON_VERSION) def docs(session): """Build the docs.""" @@ -262,7 +266,7 @@ def docs(session): ) -@nox.session(python="3.8") +@nox.session(python=DEFAULT_PYTHON_VERSION) def docfx(session): """Build the docfx yaml files for this library.""" diff --git a/packages/google-cloud-bigquery/setup.py b/packages/google-cloud-bigquery/setup.py index 0ea6ccca222d..ea2df4843c0c 100644 --- a/packages/google-cloud-bigquery/setup.py +++ b/packages/google-cloud-bigquery/setup.py @@ -45,13 +45,9 @@ # grpc.Channel.close() method isn't added until 1.32.0. # https://github.com/grpc/grpc/pull/15254 "grpcio >= 1.32.0, < 2.0dev", - "pyarrow >= 1.0.0, < 3.0dev", - ], - "pandas": [ - "pandas>=0.23.0", - # pyarrow 1.0.0 is required for the use of timestamp_as_object keyword. - "pyarrow >= 1.0.0, < 3.0dev", + "pyarrow >= 1.0.0, < 4.0dev", ], + "pandas": ["pandas>=0.23.0", "pyarrow >= 1.0.0, < 4.0dev",], "tqdm": ["tqdm >= 4.7.4, <5.0.0dev"], "opentelemetry": [ "opentelemetry-api==0.11b0", @@ -112,6 +108,7 @@ "Programming Language :: Python :: 3.6", "Programming Language :: Python :: 3.7", "Programming Language :: Python :: 3.8", + "Programming Language :: Python :: 3.9", "Operating System :: OS Independent", "Topic :: Internet", ], @@ -120,7 +117,7 @@ namespace_packages=namespaces, install_requires=dependencies, extras_require=extras, - python_requires=">=3.6, <3.9", + python_requires=">=3.6, <3.10", include_package_data=True, zip_safe=False, ) diff --git a/packages/google-cloud-bigquery/testing/constraints-3.6.txt b/packages/google-cloud-bigquery/testing/constraints-3.6.txt index fe2bcfda78fe..c4a5c51bee72 100644 --- a/packages/google-cloud-bigquery/testing/constraints-3.6.txt +++ b/packages/google-cloud-bigquery/testing/constraints-3.6.txt @@ -6,26 +6,16 @@ # e.g., if setup.py has "foo >= 1.14.0, < 2.0.0dev", # Then this file should have foo==1.14.0 google-api-core==1.23.0 -proto-plus==1.10.0 +google-cloud-bigquery-storage==2.0.0 google-cloud-core==1.4.1 google-resumable-media==0.6.0 -six==1.13.0 -protobuf==3.12.0 -google-cloud-bigquery-storage==2.0.0 grpcio==1.32.0 -pyarrow==1.0.0 -pandas==0.23.0 -pyarrow==1.0.0 -tqdm==4.7.4 opentelemetry-api==0.11b0 -opentelemetry-sdk==0.11b0 opentelemetry-instrumentation==0.11b0 -google-cloud-bigquery-storage==2.0.0 -grpcio==1.32.0 -pyarrow==1.0.0 -opentelemetry-api==0.11b0 opentelemetry-sdk==0.11b0 -opentelemetry-instrumentation==0.11b0 pandas==0.23.0 +proto-plus==1.10.0 +protobuf==3.12.0 pyarrow==1.0.0 -tqdm==4.7.4 \ No newline at end of file +six==1.13.0 +tqdm==4.7.4 From 49cc2ac4d133becd0eb8fdcde9c78d9617ffd8fc Mon Sep 17 00:00:00 2001 From: "release-please[bot]" <55107282+release-please[bot]@users.noreply.github.com> Date: Wed, 27 Jan 2021 21:08:02 +0000 Subject: [PATCH 1047/2016] chore: release 2.7.0 (#472) :robot: I have created a release \*beep\* \*boop\* --- ## [2.7.0](https://www.github.com/googleapis/python-bigquery/compare/v2.6.2...v2.7.0) (2021-01-27) ### Bug Fixes * invalid conversion of timezone-aware datetime values to JSON ([#480](https://www.github.com/googleapis/python-bigquery/issues/480)) ([61b4385](https://www.github.com/googleapis/python-bigquery/commit/61b438523d305ce66a68fde7cb49e9abbf0a8d1d)) * reading the labels attribute on Job instances ([#471](https://www.github.com/googleapis/python-bigquery/issues/471)) ([80944f0](https://www.github.com/googleapis/python-bigquery/commit/80944f080bcc4fda870a6daf1d884de616d39ae7)) * use explicitly given project over the client's default project for load jobs ([#482](https://www.github.com/googleapis/python-bigquery/issues/482)) ([530e1e8](https://www.github.com/googleapis/python-bigquery/commit/530e1e8d8fe8939e914a78ff1b220907c1b87af7)) ### Dependencies * declare support for Python 3.9 ([#488](https://www.github.com/googleapis/python-bigquery/issues/488)) ([55daa7d](https://www.github.com/googleapis/python-bigquery/commit/55daa7da9857a8a2fb14a80a4efa3f466386a85f)) --- This PR was generated with [Release Please](https://github.com/googleapis/release-please). See [documentation](https://github.com/googleapis/release-please#release-please). --- packages/google-cloud-bigquery/CHANGELOG.md | 14 ++++++++++++++ .../google/cloud/bigquery/version.py | 2 +- 2 files changed, 15 insertions(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/CHANGELOG.md b/packages/google-cloud-bigquery/CHANGELOG.md index 4d58072e85b0..a58510c66ab5 100644 --- a/packages/google-cloud-bigquery/CHANGELOG.md +++ b/packages/google-cloud-bigquery/CHANGELOG.md @@ -4,6 +4,20 @@ [1]: https://pypi.org/project/google-cloud-bigquery/#history +## [2.7.0](https://www.github.com/googleapis/python-bigquery/compare/v2.6.2...v2.7.0) (2021-01-27) + + +### Bug Fixes + +* invalid conversion of timezone-aware datetime values to JSON ([#480](https://www.github.com/googleapis/python-bigquery/issues/480)) ([61b4385](https://www.github.com/googleapis/python-bigquery/commit/61b438523d305ce66a68fde7cb49e9abbf0a8d1d)) +* reading the labels attribute on Job instances ([#471](https://www.github.com/googleapis/python-bigquery/issues/471)) ([80944f0](https://www.github.com/googleapis/python-bigquery/commit/80944f080bcc4fda870a6daf1d884de616d39ae7)) +* use explicitly given project over the client's default project for load jobs ([#482](https://www.github.com/googleapis/python-bigquery/issues/482)) ([530e1e8](https://www.github.com/googleapis/python-bigquery/commit/530e1e8d8fe8939e914a78ff1b220907c1b87af7)) + + +### Dependencies + +* declare support for Python 3.9 ([#488](https://www.github.com/googleapis/python-bigquery/issues/488)) ([55daa7d](https://www.github.com/googleapis/python-bigquery/commit/55daa7da9857a8a2fb14a80a4efa3f466386a85f)) + ### [2.6.2](https://www.github.com/googleapis/python-bigquery/compare/v2.6.1...v2.6.2) (2021-01-11) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/version.py b/packages/google-cloud-bigquery/google/cloud/bigquery/version.py index 9aaeb8bc4c7d..d962613e0244 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/version.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/version.py @@ -12,4 +12,4 @@ # See the License for the specific language governing permissions and # limitations under the License. -__version__ = "2.6.2" +__version__ = "2.7.0" From b7524af27ab58366854181854b339052e3152ad1 Mon Sep 17 00:00:00 2001 From: Beto Dealmeida Date: Fri, 29 Jan 2021 00:34:08 -0800 Subject: [PATCH 1048/2016] fix: don't try to close closed cursors (#498) --- .../google/cloud/bigquery/dbapi/connection.py | 3 ++- .../tests/unit/test_dbapi_connection.py | 16 ++++++++++++++++ 2 files changed, 18 insertions(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/dbapi/connection.py b/packages/google-cloud-bigquery/google/cloud/bigquery/dbapi/connection.py index 300c77dc9f82..459fc82aa4a0 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/dbapi/connection.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/dbapi/connection.py @@ -76,7 +76,8 @@ def close(self): self._bqstorage_client._transport.grpc_channel.close() for cursor_ in self._cursors_created: - cursor_.close() + if not cursor_._closed: + cursor_.close() def commit(self): """No-op, but for consistency raise an error if connection is closed.""" diff --git a/packages/google-cloud-bigquery/tests/unit/test_dbapi_connection.py b/packages/google-cloud-bigquery/tests/unit/test_dbapi_connection.py index edec559b285b..74da318bfade 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_dbapi_connection.py +++ b/packages/google-cloud-bigquery/tests/unit/test_dbapi_connection.py @@ -176,6 +176,22 @@ def test_close_closes_all_created_cursors(self): self.assertTrue(cursor_1._closed) self.assertTrue(cursor_2._closed) + def test_close_closes_only_open_created_cursors(self): + connection = self._make_one(client=self._mock_client()) + cursor_1 = connection.cursor() + cursor_2 = connection.cursor() + self.assertFalse(cursor_1._closed) + self.assertFalse(cursor_2._closed) + + cursor_1.close() + self.assertTrue(cursor_1._closed) + cursor_1.close = mock.MagicMock() + + connection.close() + + self.assertFalse(cursor_1.close.called) + self.assertTrue(cursor_2._closed) + def test_does_not_keep_cursor_instances_alive(self): from google.cloud.bigquery.dbapi import Cursor From a21a34d0bf7519f3a4fae0bf1e1a9a24c89cdedc Mon Sep 17 00:00:00 2001 From: Justin Beckwith Date: Fri, 29 Jan 2021 08:09:02 -0800 Subject: [PATCH 1049/2016] build: migrate to flakybot (#500) --- packages/google-cloud-bigquery/.kokoro/test-samples.sh | 8 ++++---- packages/google-cloud-bigquery/.kokoro/trampoline_v2.sh | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/packages/google-cloud-bigquery/.kokoro/test-samples.sh b/packages/google-cloud-bigquery/.kokoro/test-samples.sh index c5653a81d9c6..3ce8994cb96f 100755 --- a/packages/google-cloud-bigquery/.kokoro/test-samples.sh +++ b/packages/google-cloud-bigquery/.kokoro/test-samples.sh @@ -87,11 +87,11 @@ for file in samples/**/requirements.txt; do python3.6 -m nox -s "$RUN_TESTS_SESSION" EXIT=$? - # If this is a periodic build, send the test log to the Build Cop Bot. - # See https://github.com/googleapis/repo-automation-bots/tree/master/packages/buildcop. + # If this is a periodic build, send the test log to the FlakyBot. + # See https://github.com/googleapis/repo-automation-bots/tree/master/packages/flakybot. if [[ $KOKORO_BUILD_ARTIFACTS_SUBDIR = *"periodic"* ]]; then - chmod +x $KOKORO_GFILE_DIR/linux_amd64/buildcop - $KOKORO_GFILE_DIR/linux_amd64/buildcop + chmod +x $KOKORO_GFILE_DIR/linux_amd64/flakybot + $KOKORO_GFILE_DIR/linux_amd64/flakybot fi if [[ $EXIT -ne 0 ]]; then diff --git a/packages/google-cloud-bigquery/.kokoro/trampoline_v2.sh b/packages/google-cloud-bigquery/.kokoro/trampoline_v2.sh index 719bcd5ba84d..4af6cdc26dbc 100755 --- a/packages/google-cloud-bigquery/.kokoro/trampoline_v2.sh +++ b/packages/google-cloud-bigquery/.kokoro/trampoline_v2.sh @@ -159,7 +159,7 @@ if [[ -n "${KOKORO_BUILD_ID:-}" ]]; then "KOKORO_GITHUB_COMMIT" "KOKORO_GITHUB_PULL_REQUEST_NUMBER" "KOKORO_GITHUB_PULL_REQUEST_COMMIT" - # For Build Cop Bot + # For FlakyBot "KOKORO_GITHUB_COMMIT_URL" "KOKORO_GITHUB_PULL_REQUEST_URL" ) From 493d8e8bed94c82083c98435005b310a88666b9d Mon Sep 17 00:00:00 2001 From: WhiteSource Renovate Date: Mon, 1 Feb 2021 19:37:14 +0100 Subject: [PATCH 1050/2016] chore(deps): update dependency pyarrow to v3 (#490) --- .../google-cloud-bigquery/samples/snippets/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/samples/snippets/requirements.txt b/packages/google-cloud-bigquery/samples/snippets/requirements.txt index 599b6d52f2e7..979506199e02 100644 --- a/packages/google-cloud-bigquery/samples/snippets/requirements.txt +++ b/packages/google-cloud-bigquery/samples/snippets/requirements.txt @@ -7,5 +7,5 @@ ipython==7.17.0; python_version >= '3.7' matplotlib==3.3.3 pandas==1.1.5; python_version < '3.7' pandas==1.2.0; python_version >= '3.7' -pyarrow==2.0.0 +pyarrow==3.0.0 pytz==2020.5 From d91011dbf1d497800bd8744c6ebb9fef1f795b4d Mon Sep 17 00:00:00 2001 From: WhiteSource Renovate Date: Mon, 1 Feb 2021 19:37:49 +0100 Subject: [PATCH 1051/2016] chore(deps): update dependency google-cloud-bigquery to v2.7.0 (#491) --- .../google-cloud-bigquery/samples/geography/requirements.txt | 2 +- .../google-cloud-bigquery/samples/snippets/requirements.txt | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/packages/google-cloud-bigquery/samples/geography/requirements.txt b/packages/google-cloud-bigquery/samples/geography/requirements.txt index 338cf2e894d2..6f9306af2d41 100644 --- a/packages/google-cloud-bigquery/samples/geography/requirements.txt +++ b/packages/google-cloud-bigquery/samples/geography/requirements.txt @@ -1,3 +1,3 @@ geojson==2.5.0 -google-cloud-bigquery==2.6.2 +google-cloud-bigquery==2.7.0 Shapely==1.7.1 diff --git a/packages/google-cloud-bigquery/samples/snippets/requirements.txt b/packages/google-cloud-bigquery/samples/snippets/requirements.txt index 979506199e02..bc3985ebd059 100644 --- a/packages/google-cloud-bigquery/samples/snippets/requirements.txt +++ b/packages/google-cloud-bigquery/samples/snippets/requirements.txt @@ -1,4 +1,4 @@ -google-cloud-bigquery==2.6.2 +google-cloud-bigquery==2.7.0 google-cloud-bigquery-storage==2.2.1 google-auth-oauthlib==0.4.2 grpcio==1.35.0 From e5ed8802cf5cfec478b33eef2101c2babb6de647 Mon Sep 17 00:00:00 2001 From: WhiteSource Renovate Date: Mon, 1 Feb 2021 19:38:33 +0100 Subject: [PATCH 1052/2016] chore(deps): update dependency matplotlib to v3.3.4 (#495) --- .../google-cloud-bigquery/samples/snippets/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/samples/snippets/requirements.txt b/packages/google-cloud-bigquery/samples/snippets/requirements.txt index bc3985ebd059..00d28fa0b2aa 100644 --- a/packages/google-cloud-bigquery/samples/snippets/requirements.txt +++ b/packages/google-cloud-bigquery/samples/snippets/requirements.txt @@ -4,7 +4,7 @@ google-auth-oauthlib==0.4.2 grpcio==1.35.0 ipython==7.16.1; python_version < '3.7' ipython==7.17.0; python_version >= '3.7' -matplotlib==3.3.3 +matplotlib==3.3.4 pandas==1.1.5; python_version < '3.7' pandas==1.2.0; python_version >= '3.7' pyarrow==3.0.0 From 7de20223de820fcc0ffca94e131026cf44b9958a Mon Sep 17 00:00:00 2001 From: Yoshi Automation Bot Date: Mon, 1 Feb 2021 10:41:52 -0800 Subject: [PATCH 1053/2016] chore: update shared templates (#468) * changes without context autosynth cannot find the source of changes triggered by earlier changes in this repository, or by version upgrades to tools such as linters. * chore: add config / docs for 'pre-commit' support Source-Author: Tres Seaver Source-Date: Tue Dec 1 16:01:20 2020 -0500 Source-Repo: googleapis/synthtool Source-Sha: 32af6da519a6b042e3da62008e2a75e991efb6b4 Source-Link: https://github.com/googleapis/synthtool/commit/32af6da519a6b042e3da62008e2a75e991efb6b4 * chore(deps): update precommit hook pre-commit/pre-commit-hooks to v3.3.0 Source-Author: WhiteSource Renovate Source-Date: Wed Dec 2 17:18:24 2020 +0100 Source-Repo: googleapis/synthtool Source-Sha: 69629b64b83c6421d616be2b8e11795738ec8a6c Source-Link: https://github.com/googleapis/synthtool/commit/69629b64b83c6421d616be2b8e11795738ec8a6c * chore: update noxfile.py.j2 * Update noxfile.py.j2 add changes from @glasnt to the template template to ensure that enforcing type hinting doesn't fail for repos with the sample noxfile (aka all samples repos) See https://github.com/GoogleCloudPlatform/python-docs-samples/pull/4869/files for context * fix typo Source-Author: Leah E. Cole <6719667+leahecole@users.noreply.github.com> Source-Date: Thu Dec 3 13:44:30 2020 -0800 Source-Repo: googleapis/synthtool Source-Sha: 18c5dbdb4ac8cf75d4d8174e7b4558f48e76f8a1 Source-Link: https://github.com/googleapis/synthtool/commit/18c5dbdb4ac8cf75d4d8174e7b4558f48e76f8a1 * chore(deps): update precommit hook pre-commit/pre-commit-hooks to v3.4.0 Co-authored-by: Tres Seaver Source-Author: WhiteSource Renovate Source-Date: Wed Dec 16 18:13:24 2020 +0100 Source-Repo: googleapis/synthtool Source-Sha: aa255b15d52b6d8950cca48cfdf58f7d27a60c8a Source-Link: https://github.com/googleapis/synthtool/commit/aa255b15d52b6d8950cca48cfdf58f7d27a60c8a * docs(python): document adding Python 3.9 support, dropping 3.5 support Closes #787 Source-Author: Tres Seaver Source-Date: Thu Dec 17 16:08:02 2020 -0500 Source-Repo: googleapis/synthtool Source-Sha: b670a77a454f415d247907908e8ee7943e06d718 Source-Link: https://github.com/googleapis/synthtool/commit/b670a77a454f415d247907908e8ee7943e06d718 * chore: exclude `.nox` directories from linting The samples tests create `.nox` directories with all dependencies installed. These directories should be excluded from linting. I've tested this change locally, and it significantly speeds up linting on my machine. Source-Author: Tim Swast Source-Date: Tue Dec 22 13:04:04 2020 -0600 Source-Repo: googleapis/synthtool Source-Sha: 373861061648b5fe5e0ac4f8a38b32d639ee93e4 Source-Link: https://github.com/googleapis/synthtool/commit/373861061648b5fe5e0ac4f8a38b32d639ee93e4 * chore(python): fix column sizing issue in docs Source-Author: Bu Sun Kim <8822365+busunkim96@users.noreply.github.com> Source-Date: Thu Jan 7 11:58:32 2021 -0700 Source-Repo: googleapis/synthtool Source-Sha: f15b57ccfd71106c2299e9b89835fe6e55015662 Source-Link: https://github.com/googleapis/synthtool/commit/f15b57ccfd71106c2299e9b89835fe6e55015662 * chore(python): use 'http' in LICENSE Co-authored-by: Tim Swast Source-Author: Bu Sun Kim <8822365+busunkim96@users.noreply.github.com> Source-Date: Thu Jan 7 13:05:12 2021 -0700 Source-Repo: googleapis/synthtool Source-Sha: 41a4e56982620d3edcf110d76f4fcdfdec471ac8 Source-Link: https://github.com/googleapis/synthtool/commit/41a4e56982620d3edcf110d76f4fcdfdec471ac8 * chore(python): skip docfx in main presubmit * chore(python): skip docfx in main presubmit * fix: properly template the repo name Source-Author: Bu Sun Kim <8822365+busunkim96@users.noreply.github.com> Source-Date: Fri Jan 8 10:32:13 2021 -0700 Source-Repo: googleapis/synthtool Source-Sha: fb53b6fb373b7c3edf4e55f3e8036bc6d73fa483 Source-Link: https://github.com/googleapis/synthtool/commit/fb53b6fb373b7c3edf4e55f3e8036bc6d73fa483 * chore: add missing quotation mark Source-Author: Bu Sun Kim <8822365+busunkim96@users.noreply.github.com> Source-Date: Mon Jan 11 09:43:06 2021 -0700 Source-Repo: googleapis/synthtool Source-Sha: 16ec872dd898d7de6e1822badfac32484b5d9031 Source-Link: https://github.com/googleapis/synthtool/commit/16ec872dd898d7de6e1822badfac32484b5d9031 --- packages/google-cloud-bigquery/.flake8 | 1 + .../.kokoro/docs/docs-presubmit.cfg | 2 +- .../.pre-commit-config.yaml | 17 ++++ .../google-cloud-bigquery/CONTRIBUTING.rst | 21 +++-- packages/google-cloud-bigquery/LICENSE | 7 +- .../docs/_static/custom.css | 7 +- .../samples/geography/noxfile.py | 19 ++-- .../samples/snippets/noxfile.py | 19 ++-- packages/google-cloud-bigquery/synth.metadata | 92 ++++++++++++++++++- 9 files changed, 154 insertions(+), 31 deletions(-) create mode 100644 packages/google-cloud-bigquery/.pre-commit-config.yaml diff --git a/packages/google-cloud-bigquery/.flake8 b/packages/google-cloud-bigquery/.flake8 index ed9316381c9c..29227d4cf419 100644 --- a/packages/google-cloud-bigquery/.flake8 +++ b/packages/google-cloud-bigquery/.flake8 @@ -26,6 +26,7 @@ exclude = *_pb2.py # Standard linting exemptions. + **/.nox/** __pycache__, .git, *.pyc, diff --git a/packages/google-cloud-bigquery/.kokoro/docs/docs-presubmit.cfg b/packages/google-cloud-bigquery/.kokoro/docs/docs-presubmit.cfg index 5c216b4bcfd4..08adb2e28baf 100644 --- a/packages/google-cloud-bigquery/.kokoro/docs/docs-presubmit.cfg +++ b/packages/google-cloud-bigquery/.kokoro/docs/docs-presubmit.cfg @@ -25,4 +25,4 @@ env_vars: { env_vars: { key: "NOX_SESSION" value: "docs docfx" -} \ No newline at end of file +} diff --git a/packages/google-cloud-bigquery/.pre-commit-config.yaml b/packages/google-cloud-bigquery/.pre-commit-config.yaml new file mode 100644 index 000000000000..a9024b15d725 --- /dev/null +++ b/packages/google-cloud-bigquery/.pre-commit-config.yaml @@ -0,0 +1,17 @@ +# See https://pre-commit.com for more information +# See https://pre-commit.com/hooks.html for more hooks +repos: +- repo: https://github.com/pre-commit/pre-commit-hooks + rev: v3.4.0 + hooks: + - id: trailing-whitespace + - id: end-of-file-fixer + - id: check-yaml +- repo: https://github.com/psf/black + rev: 19.10b0 + hooks: + - id: black +- repo: https://gitlab.com/pycqa/flake8 + rev: 3.8.4 + hooks: + - id: flake8 diff --git a/packages/google-cloud-bigquery/CONTRIBUTING.rst b/packages/google-cloud-bigquery/CONTRIBUTING.rst index b3b802b49516..15bcd2e28132 100644 --- a/packages/google-cloud-bigquery/CONTRIBUTING.rst +++ b/packages/google-cloud-bigquery/CONTRIBUTING.rst @@ -21,8 +21,8 @@ In order to add a feature: - The feature must be documented in both the API and narrative documentation. -- The feature must work fully on the following CPython versions: 2.7, - 3.5, 3.6, 3.7 and 3.8 on both UNIX and Windows. +- The feature must work fully on the following CPython versions: + 3.6, 3.7, 3.8 and 3.9 on both UNIX and Windows. - The feature must not add unnecessary dependencies (where "unnecessary" is of course subjective, but new dependencies should @@ -111,6 +111,16 @@ Coding Style should point to the official ``googleapis`` checkout and the the branch should be the main branch on that remote (``master``). +- This repository contains configuration for the + `pre-commit `__ tool, which automates checking + our linters during a commit. If you have it installed on your ``$PATH``, + you can enable enforcing those checks via: + +.. code-block:: bash + + $ pre-commit install + pre-commit installed at .git/hooks/pre-commit + Exceptions to PEP8: - Many unit tests use a helper method, ``_call_fut`` ("FUT" is short for @@ -192,25 +202,24 @@ Supported Python Versions We support: -- `Python 3.5`_ - `Python 3.6`_ - `Python 3.7`_ - `Python 3.8`_ +- `Python 3.9`_ -.. _Python 3.5: https://docs.python.org/3.5/ .. _Python 3.6: https://docs.python.org/3.6/ .. _Python 3.7: https://docs.python.org/3.7/ .. _Python 3.8: https://docs.python.org/3.8/ +.. _Python 3.9: https://docs.python.org/3.9/ Supported versions can be found in our ``noxfile.py`` `config`_. .. _config: https://github.com/googleapis/python-bigquery/blob/master/noxfile.py -Python 2.7 support is deprecated. All code changes should maintain Python 2.7 compatibility until January 1, 2020. We also explicitly decided to support Python 3 beginning with version -3.5. Reasons for this include: +3.6. Reasons for this include: - Encouraging use of newest versions of Python 3 - Taking the lead of `prominent`_ open-source `projects`_ diff --git a/packages/google-cloud-bigquery/LICENSE b/packages/google-cloud-bigquery/LICENSE index a8ee855de2aa..d64569567334 100644 --- a/packages/google-cloud-bigquery/LICENSE +++ b/packages/google-cloud-bigquery/LICENSE @@ -1,6 +1,7 @@ - Apache License + + Apache License Version 2.0, January 2004 - https://www.apache.org/licenses/ + http://www.apache.org/licenses/ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION @@ -192,7 +193,7 @@ you may not use this file except in compliance with the License. You may obtain a copy of the License at - https://www.apache.org/licenses/LICENSE-2.0 + http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, diff --git a/packages/google-cloud-bigquery/docs/_static/custom.css b/packages/google-cloud-bigquery/docs/_static/custom.css index 0abaf229fce3..bcd37bbd3c4a 100644 --- a/packages/google-cloud-bigquery/docs/_static/custom.css +++ b/packages/google-cloud-bigquery/docs/_static/custom.css @@ -1,4 +1,9 @@ div#python2-eol { border-color: red; border-width: medium; -} \ No newline at end of file +} + +/* Ensure minimum width for 'Parameters' / 'Returns' column */ +dl.field-list > dt { + min-width: 100px +} diff --git a/packages/google-cloud-bigquery/samples/geography/noxfile.py b/packages/google-cloud-bigquery/samples/geography/noxfile.py index ab2c49227c3b..bbd25fcdb5e7 100644 --- a/packages/google-cloud-bigquery/samples/geography/noxfile.py +++ b/packages/google-cloud-bigquery/samples/geography/noxfile.py @@ -17,6 +17,7 @@ import os from pathlib import Path import sys +from typing import Callable, Dict, List, Optional import nox @@ -65,7 +66,7 @@ TEST_CONFIG.update(TEST_CONFIG_OVERRIDE) -def get_pytest_env_vars(): +def get_pytest_env_vars() -> Dict[str, str]: """Returns a dict for pytest invocation.""" ret = {} @@ -94,7 +95,7 @@ def get_pytest_env_vars(): # -def _determine_local_import_names(start_dir): +def _determine_local_import_names(start_dir: str) -> List[str]: """Determines all import names that should be considered "local". This is used when running the linter to insure that import order is @@ -132,7 +133,7 @@ def _determine_local_import_names(start_dir): @nox.session -def lint(session): +def lint(session: nox.sessions.Session) -> None: if not TEST_CONFIG["enforce_type_hints"]: session.install("flake8", "flake8-import-order") else: @@ -153,7 +154,7 @@ def lint(session): @nox.session -def blacken(session): +def blacken(session: nox.sessions.Session) -> None: session.install("black") python_files = [path for path in os.listdir(".") if path.endswith(".py")] @@ -168,7 +169,9 @@ def blacken(session): PYTEST_COMMON_ARGS = ["--junitxml=sponge_log.xml"] -def _session_tests(session, post_install=None): +def _session_tests( + session: nox.sessions.Session, post_install: Callable = None +) -> None: """Runs py.test for a particular project.""" if os.path.exists("requirements.txt"): session.install("-r", "requirements.txt") @@ -194,7 +197,7 @@ def _session_tests(session, post_install=None): @nox.session(python=ALL_VERSIONS) -def py(session): +def py(session: nox.sessions.Session) -> None: """Runs py.test for a sample using the specified version of Python.""" if session.python in TESTED_VERSIONS: _session_tests(session) @@ -209,7 +212,7 @@ def py(session): # -def _get_repo_root(): +def _get_repo_root() -> Optional[str]: """ Returns the root folder of the project. """ # Get root of this repository. Assume we don't have directories nested deeper than 10 items. p = Path(os.getcwd()) @@ -232,7 +235,7 @@ def _get_repo_root(): @nox.session @nox.parametrize("path", GENERATED_READMES) -def readmegen(session, path): +def readmegen(session: nox.sessions.Session, path: str) -> None: """(Re-)generates the readme for a sample.""" session.install("jinja2", "pyyaml") dir_ = os.path.dirname(path) diff --git a/packages/google-cloud-bigquery/samples/snippets/noxfile.py b/packages/google-cloud-bigquery/samples/snippets/noxfile.py index ab2c49227c3b..bbd25fcdb5e7 100644 --- a/packages/google-cloud-bigquery/samples/snippets/noxfile.py +++ b/packages/google-cloud-bigquery/samples/snippets/noxfile.py @@ -17,6 +17,7 @@ import os from pathlib import Path import sys +from typing import Callable, Dict, List, Optional import nox @@ -65,7 +66,7 @@ TEST_CONFIG.update(TEST_CONFIG_OVERRIDE) -def get_pytest_env_vars(): +def get_pytest_env_vars() -> Dict[str, str]: """Returns a dict for pytest invocation.""" ret = {} @@ -94,7 +95,7 @@ def get_pytest_env_vars(): # -def _determine_local_import_names(start_dir): +def _determine_local_import_names(start_dir: str) -> List[str]: """Determines all import names that should be considered "local". This is used when running the linter to insure that import order is @@ -132,7 +133,7 @@ def _determine_local_import_names(start_dir): @nox.session -def lint(session): +def lint(session: nox.sessions.Session) -> None: if not TEST_CONFIG["enforce_type_hints"]: session.install("flake8", "flake8-import-order") else: @@ -153,7 +154,7 @@ def lint(session): @nox.session -def blacken(session): +def blacken(session: nox.sessions.Session) -> None: session.install("black") python_files = [path for path in os.listdir(".") if path.endswith(".py")] @@ -168,7 +169,9 @@ def blacken(session): PYTEST_COMMON_ARGS = ["--junitxml=sponge_log.xml"] -def _session_tests(session, post_install=None): +def _session_tests( + session: nox.sessions.Session, post_install: Callable = None +) -> None: """Runs py.test for a particular project.""" if os.path.exists("requirements.txt"): session.install("-r", "requirements.txt") @@ -194,7 +197,7 @@ def _session_tests(session, post_install=None): @nox.session(python=ALL_VERSIONS) -def py(session): +def py(session: nox.sessions.Session) -> None: """Runs py.test for a sample using the specified version of Python.""" if session.python in TESTED_VERSIONS: _session_tests(session) @@ -209,7 +212,7 @@ def py(session): # -def _get_repo_root(): +def _get_repo_root() -> Optional[str]: """ Returns the root folder of the project. """ # Get root of this repository. Assume we don't have directories nested deeper than 10 items. p = Path(os.getcwd()) @@ -232,7 +235,7 @@ def _get_repo_root(): @nox.session @nox.parametrize("path", GENERATED_READMES) -def readmegen(session, path): +def readmegen(session: nox.sessions.Session, path: str) -> None: """(Re-)generates the readme for a sample.""" session.install("jinja2", "pyyaml") dir_ = os.path.dirname(path) diff --git a/packages/google-cloud-bigquery/synth.metadata b/packages/google-cloud-bigquery/synth.metadata index 6b785486026a..eb9009391246 100644 --- a/packages/google-cloud-bigquery/synth.metadata +++ b/packages/google-cloud-bigquery/synth.metadata @@ -3,8 +3,8 @@ { "git": { "name": ".", - "remote": "git@github.com:tswast/python-bigquery.git", - "sha": "5a422eb20c57dae66c5716fd319b66432d3edce6" + "remote": "https://github.com/googleapis/python-bigquery.git", + "sha": "2788736b80a4c4ac0ae3029aeb28bcefd34f2db7" } }, { @@ -19,14 +19,14 @@ "git": { "name": "synthtool", "remote": "https://github.com/googleapis/synthtool.git", - "sha": "c2de32114ec484aa708d32012d1fa8d75232daf5" + "sha": "16ec872dd898d7de6e1822badfac32484b5d9031" } }, { "git": { "name": "synthtool", "remote": "https://github.com/googleapis/synthtool.git", - "sha": "c2de32114ec484aa708d32012d1fa8d75232daf5" + "sha": "16ec872dd898d7de6e1822badfac32484b5d9031" } } ], @@ -40,5 +40,89 @@ "generator": "bazel" } } + ], + "generatedFiles": [ + ".flake8", + ".github/CONTRIBUTING.md", + ".github/ISSUE_TEMPLATE/bug_report.md", + ".github/ISSUE_TEMPLATE/feature_request.md", + ".github/ISSUE_TEMPLATE/support_request.md", + ".github/PULL_REQUEST_TEMPLATE.md", + ".github/release-please.yml", + ".github/snippet-bot.yml", + ".gitignore", + ".kokoro/build.sh", + ".kokoro/continuous/common.cfg", + ".kokoro/continuous/continuous.cfg", + ".kokoro/docker/docs/Dockerfile", + ".kokoro/docker/docs/fetch_gpg_keys.sh", + ".kokoro/docs/common.cfg", + ".kokoro/docs/docs-presubmit.cfg", + ".kokoro/docs/docs.cfg", + ".kokoro/populate-secrets.sh", + ".kokoro/presubmit/common.cfg", + ".kokoro/presubmit/system-3.8.cfg", + ".kokoro/publish-docs.sh", + ".kokoro/release.sh", + ".kokoro/release/common.cfg", + ".kokoro/release/release.cfg", + ".kokoro/samples/lint/common.cfg", + ".kokoro/samples/lint/continuous.cfg", + ".kokoro/samples/lint/periodic.cfg", + ".kokoro/samples/lint/presubmit.cfg", + ".kokoro/samples/python3.6/common.cfg", + ".kokoro/samples/python3.6/continuous.cfg", + ".kokoro/samples/python3.6/periodic.cfg", + ".kokoro/samples/python3.6/presubmit.cfg", + ".kokoro/samples/python3.7/common.cfg", + ".kokoro/samples/python3.7/continuous.cfg", + ".kokoro/samples/python3.7/periodic.cfg", + ".kokoro/samples/python3.7/presubmit.cfg", + ".kokoro/samples/python3.8/common.cfg", + ".kokoro/samples/python3.8/continuous.cfg", + ".kokoro/samples/python3.8/periodic.cfg", + ".kokoro/samples/python3.8/presubmit.cfg", + ".kokoro/test-samples.sh", + ".kokoro/trampoline.sh", + ".kokoro/trampoline_v2.sh", + ".pre-commit-config.yaml", + ".trampolinerc", + "CODE_OF_CONDUCT.md", + "CONTRIBUTING.rst", + "LICENSE", + "MANIFEST.in", + "docs/_static/custom.css", + "docs/_templates/layout.html", + "docs/bigquery_v2/services.rst", + "docs/bigquery_v2/types.rst", + "docs/conf.py", + "google/cloud/bigquery_v2/__init__.py", + "google/cloud/bigquery_v2/proto/encryption_config.proto", + "google/cloud/bigquery_v2/proto/model.proto", + "google/cloud/bigquery_v2/proto/model_reference.proto", + "google/cloud/bigquery_v2/proto/standard_sql.proto", + "google/cloud/bigquery_v2/proto/table_reference.proto", + "google/cloud/bigquery_v2/py.typed", + "google/cloud/bigquery_v2/types/__init__.py", + "google/cloud/bigquery_v2/types/encryption_config.py", + "google/cloud/bigquery_v2/types/model.py", + "google/cloud/bigquery_v2/types/model_reference.py", + "google/cloud/bigquery_v2/types/standard_sql.py", + "google/cloud/bigquery_v2/types/table_reference.py", + "mypy.ini", + "renovate.json", + "samples/AUTHORING_GUIDE.md", + "samples/CONTRIBUTING.md", + "samples/geography/noxfile.py", + "samples/snippets/noxfile.py", + "scripts/decrypt-secrets.sh", + "scripts/readme-gen/readme_gen.py", + "scripts/readme-gen/templates/README.tmpl.rst", + "scripts/readme-gen/templates/auth.tmpl.rst", + "scripts/readme-gen/templates/auth_api_key.tmpl.rst", + "scripts/readme-gen/templates/install_deps.tmpl.rst", + "scripts/readme-gen/templates/install_portaudio.tmpl.rst", + "setup.cfg", + "testing/.gitignore" ] } \ No newline at end of file From 8ea0471a19a5629b5167eeba71ef6585e700644d Mon Sep 17 00:00:00 2001 From: WhiteSource Renovate Date: Mon, 1 Feb 2021 19:52:02 +0100 Subject: [PATCH 1054/2016] chore(deps): update dependency pytz to v2021 (#502) [![WhiteSource Renovate](https://app.renovatebot.com/images/banner.svg)](https://renovatebot.com) This PR contains the following updates: | Package | Change | Age | Adoption | Passing | Confidence | |---|---|---|---|---|---| | [pytz](http://pythonhosted.org/pytz) | `==2020.5` -> `==2021.1` | [![age](https://badges.renovateapi.com/packages/pypi/pytz/2021.1/age-slim)](https://docs.renovatebot.com/merge-confidence/) | [![adoption](https://badges.renovateapi.com/packages/pypi/pytz/2021.1/adoption-slim)](https://docs.renovatebot.com/merge-confidence/) | [![passing](https://badges.renovateapi.com/packages/pypi/pytz/2021.1/compatibility-slim/2020.5)](https://docs.renovatebot.com/merge-confidence/) | [![confidence](https://badges.renovateapi.com/packages/pypi/pytz/2021.1/confidence-slim/2020.5)](https://docs.renovatebot.com/merge-confidence/) | --- ### Renovate configuration :date: **Schedule**: At any time (no schedule defined). :vertical_traffic_light: **Automerge**: Disabled by config. Please merge this manually once you are satisfied. :recycle: **Rebasing**: Whenever PR becomes conflicted, or you tick the rebase/retry checkbox. :no_bell: **Ignore**: Close this PR and you won't be reminded about this update again. --- - [ ] If you want to rebase/retry this PR, check this box --- This PR has been generated by [WhiteSource Renovate](https://renovate.whitesourcesoftware.com). View repository job log [here](https://app.renovatebot.com/dashboard#github/googleapis/python-bigquery). --- .../google-cloud-bigquery/samples/snippets/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/samples/snippets/requirements.txt b/packages/google-cloud-bigquery/samples/snippets/requirements.txt index 00d28fa0b2aa..7087121b576a 100644 --- a/packages/google-cloud-bigquery/samples/snippets/requirements.txt +++ b/packages/google-cloud-bigquery/samples/snippets/requirements.txt @@ -8,4 +8,4 @@ matplotlib==3.3.4 pandas==1.1.5; python_version < '3.7' pandas==1.2.0; python_version >= '3.7' pyarrow==3.0.0 -pytz==2020.5 +pytz==2021.1 From f728c9b9a42b877b1ee94325e2c6cb91f31994b5 Mon Sep 17 00:00:00 2001 From: Yoshi Automation Bot Date: Wed, 3 Feb 2021 09:41:41 -0800 Subject: [PATCH 1055/2016] test: add samples Python 3.9 test session (#506) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * changes without context autosynth cannot find the source of changes triggered by earlier changes in this repository, or by version upgrades to tools such as linters. * feat: sync v1beta1 GKE API fix: deprecate SetLocations; use UpdateCluster feat: support for sysctls config in Linux nodes feat: support for node kubelet config controlling CPU manager policy, CFS quota feat: support for Customer Managed Encryption in nodes feat: support for SSDs as ephemeral storage feat: support for node reservation affinity feat: support for Gvisor in nodes fix: deprecate basic auth fields (removed in 1.19 clusters) feat: support for NodeLocalDNS feat: support for ConfigConnector feat: support for the Compute Engine Persistent Disk CSI driver feat: support for KALM feat: support for private cluster VPC peering and master global access feat: support for CloudRun load balancers feat: support using routes for pod IPs feat: support for Shielded Nodes feat: support for release channels feat: support for Workload Identity feat: support for Cluster Telemetry feat: support for Cloud TPU feat: support for receiving upgrade notifications feat: support for Confidential Nodes feat: support for disabling default sNAT feat: support for selecting Kubernetes datapath model feat: support for encrypting etcd databases feat: support for configuration of master components fix: deprecate Operation.cluster_conditions and operation_conditions; use error feat: support updating NodePool locations feat: support for node Surge Upgrades feat: support for specifying Cluster Autoscaling profile. feat: support for Node Auto Provisioning feat: support for specifying node disk size and type fix: deprecated StatusCondition.code; use canonical_code docs: many minor documentation clarifications docs: some output only fields now annotated as such PiperOrigin-RevId: 344443035 Source-Author: Google APIs Source-Date: Thu Nov 26 11:27:06 2020 -0800 Source-Repo: googleapis/googleapis Source-Sha: df4fd38d040c5c8a0869936205bca13fb64b2cff Source-Link: https://github.com/googleapis/googleapis/commit/df4fd38d040c5c8a0869936205bca13fb64b2cff * chore: add 3.9 to noxfile template Since the python-docs-samples noxfile-template doesn't sync with this, I wanted to make sure the noxfile template matched the most recent change [here](https://github.com/GoogleCloudPlatform/python-docs-samples/pull/4968/files) cc @tmatsuo Source-Author: Leah E. Cole <6719667+leahecole@users.noreply.github.com> Source-Date: Fri Jan 15 17:24:05 2021 -0800 Source-Repo: googleapis/synthtool Source-Sha: 56ddc68f36b32341e9f22c2c59b4ce6aa3ba635f Source-Link: https://github.com/googleapis/synthtool/commit/56ddc68f36b32341e9f22c2c59b4ce6aa3ba635f * build(python): make `NOX_SESSION` optional I added this accidentally in #889. `NOX_SESSION` should be passed down if it is set but not marked required. Source-Author: Bu Sun Kim <8822365+busunkim96@users.noreply.github.com> Source-Date: Tue Jan 19 09:38:04 2021 -0700 Source-Repo: googleapis/synthtool Source-Sha: ba960d730416fe05c50547e975ce79fcee52c671 Source-Link: https://github.com/googleapis/synthtool/commit/ba960d730416fe05c50547e975ce79fcee52c671 * chore: Add header checker config to python library synth Now that we have it working in [python-docs-samples](https://github.com/GoogleCloudPlatform/python-docs-samples/blob/master/.github/header-checker-lint.yml) we should consider adding it to the 🐍 libraries :) Source-Author: Leah E. Cole <6719667+leahecole@users.noreply.github.com> Source-Date: Mon Jan 25 13:24:08 2021 -0800 Source-Repo: googleapis/synthtool Source-Sha: 573f7655311b553a937f9123bee17bf78497db95 Source-Link: https://github.com/googleapis/synthtool/commit/573f7655311b553a937f9123bee17bf78497db95 * build: migrate to flakybot Source-Author: Justin Beckwith Source-Date: Thu Jan 28 22:22:38 2021 -0800 Source-Repo: googleapis/synthtool Source-Sha: d1bb9173100f62c0cfc8f3138b62241e7f47ca6a Source-Link: https://github.com/googleapis/synthtool/commit/d1bb9173100f62c0cfc8f3138b62241e7f47ca6a * remove tarball Co-authored-by: Tim Swast --- .../.github/header-checker-lint.yml | 15 +++++++++++++++ packages/google-cloud-bigquery/.trampolinerc | 1 - .../samples/geography/noxfile.py | 2 +- .../samples/snippets/noxfile.py | 2 +- packages/google-cloud-bigquery/synth.metadata | 7 ++++--- 5 files changed, 21 insertions(+), 6 deletions(-) create mode 100644 packages/google-cloud-bigquery/.github/header-checker-lint.yml diff --git a/packages/google-cloud-bigquery/.github/header-checker-lint.yml b/packages/google-cloud-bigquery/.github/header-checker-lint.yml new file mode 100644 index 000000000000..fc281c05bd55 --- /dev/null +++ b/packages/google-cloud-bigquery/.github/header-checker-lint.yml @@ -0,0 +1,15 @@ +{"allowedCopyrightHolders": ["Google LLC"], + "allowedLicenses": ["Apache-2.0", "MIT", "BSD-3"], + "ignoreFiles": ["**/requirements.txt", "**/requirements-test.txt"], + "sourceFileExtensions": [ + "ts", + "js", + "java", + "sh", + "Dockerfile", + "yaml", + "py", + "html", + "txt" + ] +} \ No newline at end of file diff --git a/packages/google-cloud-bigquery/.trampolinerc b/packages/google-cloud-bigquery/.trampolinerc index c7d663ae9c57..383b6ec89fbc 100644 --- a/packages/google-cloud-bigquery/.trampolinerc +++ b/packages/google-cloud-bigquery/.trampolinerc @@ -18,7 +18,6 @@ required_envvars+=( "STAGING_BUCKET" "V2_STAGING_BUCKET" - "NOX_SESSION" ) # Add env vars which are passed down into the container here. diff --git a/packages/google-cloud-bigquery/samples/geography/noxfile.py b/packages/google-cloud-bigquery/samples/geography/noxfile.py index bbd25fcdb5e7..f2320ea0001c 100644 --- a/packages/google-cloud-bigquery/samples/geography/noxfile.py +++ b/packages/google-cloud-bigquery/samples/geography/noxfile.py @@ -82,7 +82,7 @@ def get_pytest_env_vars() -> Dict[str, str]: # DO NOT EDIT - automatically generated. # All versions used to tested samples. -ALL_VERSIONS = ["2.7", "3.6", "3.7", "3.8"] +ALL_VERSIONS = ["2.7", "3.6", "3.7", "3.8", "3.9"] # Any default versions that should be ignored. IGNORED_VERSIONS = TEST_CONFIG["ignored_versions"] diff --git a/packages/google-cloud-bigquery/samples/snippets/noxfile.py b/packages/google-cloud-bigquery/samples/snippets/noxfile.py index bbd25fcdb5e7..f2320ea0001c 100644 --- a/packages/google-cloud-bigquery/samples/snippets/noxfile.py +++ b/packages/google-cloud-bigquery/samples/snippets/noxfile.py @@ -82,7 +82,7 @@ def get_pytest_env_vars() -> Dict[str, str]: # DO NOT EDIT - automatically generated. # All versions used to tested samples. -ALL_VERSIONS = ["2.7", "3.6", "3.7", "3.8"] +ALL_VERSIONS = ["2.7", "3.6", "3.7", "3.8", "3.9"] # Any default versions that should be ignored. IGNORED_VERSIONS = TEST_CONFIG["ignored_versions"] diff --git a/packages/google-cloud-bigquery/synth.metadata b/packages/google-cloud-bigquery/synth.metadata index eb9009391246..f6dcca132848 100644 --- a/packages/google-cloud-bigquery/synth.metadata +++ b/packages/google-cloud-bigquery/synth.metadata @@ -4,7 +4,7 @@ "git": { "name": ".", "remote": "https://github.com/googleapis/python-bigquery.git", - "sha": "2788736b80a4c4ac0ae3029aeb28bcefd34f2db7" + "sha": "64e1c0581b915e36756ea465936939390da7d818" } }, { @@ -19,14 +19,14 @@ "git": { "name": "synthtool", "remote": "https://github.com/googleapis/synthtool.git", - "sha": "16ec872dd898d7de6e1822badfac32484b5d9031" + "sha": "d1bb9173100f62c0cfc8f3138b62241e7f47ca6a" } }, { "git": { "name": "synthtool", "remote": "https://github.com/googleapis/synthtool.git", - "sha": "16ec872dd898d7de6e1822badfac32484b5d9031" + "sha": "d1bb9173100f62c0cfc8f3138b62241e7f47ca6a" } } ], @@ -48,6 +48,7 @@ ".github/ISSUE_TEMPLATE/feature_request.md", ".github/ISSUE_TEMPLATE/support_request.md", ".github/PULL_REQUEST_TEMPLATE.md", + ".github/header-checker-lint.yml", ".github/release-please.yml", ".github/snippet-bot.yml", ".gitignore", From ebf1273e94c25cb262a940607235754235db8a07 Mon Sep 17 00:00:00 2001 From: arithmetic1728 <58957152+arithmetic1728@users.noreply.github.com> Date: Fri, 5 Feb 2021 07:53:15 -0800 Subject: [PATCH 1056/2016] feat: add mtls support to client (#492) * feat: add mtls feature --- .../google/cloud/bigquery/_http.py | 21 ++++++++++++++-- .../google/cloud/bigquery/client.py | 25 ++++++++++++++----- .../tests/system/test_client.py | 6 +++++ .../tests/unit/helpers.py | 2 ++ .../tests/unit/test__http.py | 14 +++++++++++ .../tests/unit/test_client.py | 23 ++++++++++++++--- 6 files changed, 79 insertions(+), 12 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/_http.py b/packages/google-cloud-bigquery/google/cloud/bigquery/_http.py index 8ee633e64147..ede26cc7024e 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/_http.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/_http.py @@ -14,11 +14,23 @@ """Create / interact with Google BigQuery connections.""" +import os +import pkg_resources + from google.cloud import _http from google.cloud.bigquery import __version__ +# TODO: Increase the minimum version of google-cloud-core to 1.6.0 +# and remove this logic. See: +# https://github.com/googleapis/python-bigquery/issues/509 +if os.getenv("GOOGLE_API_USE_CLIENT_CERTIFICATE") == "true": # pragma: NO COVER + release = pkg_resources.get_distribution("google-cloud-core").parsed_version + if release < pkg_resources.parse_version("1.6.0"): + raise ImportError("google-cloud-core >= 1.6.0 is required to use mTLS feature") + + class Connection(_http.JSONConnection): """A connection to Google BigQuery via the JSON REST API. @@ -26,13 +38,18 @@ class Connection(_http.JSONConnection): client (google.cloud.bigquery.client.Client): The client that owns the current connection. client_info (Optional[google.api_core.client_info.ClientInfo]): Instance used to generate user agent. + + api_endpoint (str): The api_endpoint to use. If None, the library will decide what endpoint to use. """ DEFAULT_API_ENDPOINT = "https://bigquery.googleapis.com" + DEFAULT_API_MTLS_ENDPOINT = "https://bigquery.mtls.googleapis.com" - def __init__(self, client, client_info=None, api_endpoint=DEFAULT_API_ENDPOINT): + def __init__(self, client, client_info=None, api_endpoint=None): super(Connection, self).__init__(client, client_info) - self.API_BASE_URL = api_endpoint + self.API_BASE_URL = api_endpoint or self.DEFAULT_API_ENDPOINT + self.API_BASE_MTLS_URL = self.DEFAULT_API_MTLS_ENDPOINT + self.ALLOW_AUTO_SWITCH_TO_MTLS_URL = api_endpoint is None self._client_info.gapic_version = __version__ self._client_info.client_library_version = __version__ diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py index b270075a9aad..f8c0d7c93ea8 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py @@ -78,10 +78,7 @@ _DEFAULT_CHUNKSIZE = 1048576 # 1024 * 1024 B = 1 MB _MAX_MULTIPART_SIZE = 5 * 1024 * 1024 _DEFAULT_NUM_RETRIES = 6 -_BASE_UPLOAD_TEMPLATE = ( - "https://bigquery.googleapis.com/upload/bigquery/v2/projects/" - "{project}/jobs?uploadType=" -) +_BASE_UPLOAD_TEMPLATE = "{host}/upload/bigquery/v2/projects/{project}/jobs?uploadType=" _MULTIPART_URL_TEMPLATE = _BASE_UPLOAD_TEMPLATE + "multipart" _RESUMABLE_URL_TEMPLATE = _BASE_UPLOAD_TEMPLATE + "resumable" _GENERIC_CONTENT_TYPE = "*/*" @@ -2547,7 +2544,15 @@ def _initiate_resumable_upload( if project is None: project = self.project - upload_url = _RESUMABLE_URL_TEMPLATE.format(project=project) + # TODO: Increase the minimum version of google-cloud-core to 1.6.0 + # and remove this logic. See: + # https://github.com/googleapis/python-bigquery/issues/509 + hostname = ( + self._connection.API_BASE_URL + if not hasattr(self._connection, "get_api_base_url_for_mtls") + else self._connection.get_api_base_url_for_mtls() + ) + upload_url = _RESUMABLE_URL_TEMPLATE.format(host=hostname, project=project) # TODO: modify ResumableUpload to take a retry.Retry object # that it can use for the initial RPC. @@ -2616,7 +2621,15 @@ def _do_multipart_upload( if project is None: project = self.project - upload_url = _MULTIPART_URL_TEMPLATE.format(project=project) + # TODO: Increase the minimum version of google-cloud-core to 1.6.0 + # and remove this logic. See: + # https://github.com/googleapis/python-bigquery/issues/509 + hostname = ( + self._connection.API_BASE_URL + if not hasattr(self._connection, "get_api_base_url_for_mtls") + else self._connection.get_api_base_url_for_mtls() + ) + upload_url = _MULTIPART_URL_TEMPLATE.format(host=hostname, project=project) upload = MultipartUpload(upload_url, headers=headers) if num_retries is not None: diff --git a/packages/google-cloud-bigquery/tests/system/test_client.py b/packages/google-cloud-bigquery/tests/system/test_client.py index aa1a03160b35..85c044badd2f 100644 --- a/packages/google-cloud-bigquery/tests/system/test_client.py +++ b/packages/google-cloud-bigquery/tests/system/test_client.py @@ -28,6 +28,7 @@ import uuid import psutil +import pytest import pytz import pkg_resources @@ -132,6 +133,8 @@ else: PYARROW_INSTALLED_VERSION = None +MTLS_TESTING = os.getenv("GOOGLE_API_USE_CLIENT_CERTIFICATE") == "true" + def _has_rows(result): return len(result) > 0 @@ -2651,6 +2654,9 @@ def test_insert_rows_nested_nested_dictionary(self): expected_rows = [("Some value", record)] self.assertEqual(row_tuples, expected_rows) + @pytest.mark.skipif( + MTLS_TESTING, reason="mTLS testing has no permission to the max-value.js file" + ) def test_create_routine(self): routine_name = "test_routine" dataset = self.temp_dataset(_make_dataset_id("create_routine")) diff --git a/packages/google-cloud-bigquery/tests/unit/helpers.py b/packages/google-cloud-bigquery/tests/unit/helpers.py index eea345e8979b..b51b0bbb7e03 100644 --- a/packages/google-cloud-bigquery/tests/unit/helpers.py +++ b/packages/google-cloud-bigquery/tests/unit/helpers.py @@ -21,6 +21,8 @@ def make_connection(*responses): mock_conn = mock.create_autospec(google.cloud.bigquery._http.Connection) mock_conn.user_agent = "testing 1.2.3" mock_conn.api_request.side_effect = list(responses) + [NotFound("miss")] + mock_conn.API_BASE_URL = "https://bigquery.googleapis.com" + mock_conn.get_api_base_url_for_mtls = mock.Mock(return_value=mock_conn.API_BASE_URL) return mock_conn diff --git a/packages/google-cloud-bigquery/tests/unit/test__http.py b/packages/google-cloud-bigquery/tests/unit/test__http.py index 78e59cb306d9..09f6d29d71b5 100644 --- a/packages/google-cloud-bigquery/tests/unit/test__http.py +++ b/packages/google-cloud-bigquery/tests/unit/test__http.py @@ -32,6 +32,9 @@ def _get_target_class(): return Connection def _make_one(self, *args, **kw): + if "api_endpoint" not in kw: + kw["api_endpoint"] = "https://bigquery.googleapis.com" + return self._get_target_class()(*args, **kw) def test_build_api_url_no_extra_query_params(self): @@ -138,3 +141,14 @@ def test_extra_headers_replace(self): url=expected_uri, timeout=self._get_default_timeout(), ) + + def test_ctor_mtls(self): + conn = self._make_one(object(), api_endpoint=None) + self.assertEqual(conn.ALLOW_AUTO_SWITCH_TO_MTLS_URL, True) + self.assertEqual(conn.API_BASE_URL, "https://bigquery.googleapis.com") + self.assertEqual(conn.API_BASE_MTLS_URL, "https://bigquery.mtls.googleapis.com") + + conn = self._make_one(object(), api_endpoint="http://foo") + self.assertEqual(conn.ALLOW_AUTO_SWITCH_TO_MTLS_URL, False) + self.assertEqual(conn.API_BASE_URL, "http://foo") + self.assertEqual(conn.API_BASE_MTLS_URL, "https://bigquery.mtls.googleapis.com") diff --git a/packages/google-cloud-bigquery/tests/unit/test_client.py b/packages/google-cloud-bigquery/tests/unit/test_client.py index 625256e6e740..66add9c0a034 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_client.py +++ b/packages/google-cloud-bigquery/tests/unit/test_client.py @@ -2057,6 +2057,7 @@ def test_get_table_sets_user_agent(self): url=mock.ANY, method=mock.ANY, headers=mock.ANY, data=mock.ANY ) http.reset_mock() + http.is_mtls = False mock_response.status_code = 200 mock_response.json.return_value = self._make_table_resource() user_agent_override = client_info.ClientInfo(user_agent="my-application/1.2.3") @@ -4425,7 +4426,7 @@ def _mock_transport(self, status_code, headers, content=b""): fake_transport.request.return_value = fake_response return fake_transport - def _initiate_resumable_upload_helper(self, num_retries=None): + def _initiate_resumable_upload_helper(self, num_retries=None, mtls=False): from google.resumable_media.requests import ResumableUpload from google.cloud.bigquery.client import _DEFAULT_CHUNKSIZE from google.cloud.bigquery.client import _GENERIC_CONTENT_TYPE @@ -4440,6 +4441,8 @@ def _initiate_resumable_upload_helper(self, num_retries=None): fake_transport = self._mock_transport(http.client.OK, response_headers) client = self._make_one(project=self.PROJECT, _http=fake_transport) conn = client._connection = make_connection() + if mtls: + conn.get_api_base_url_for_mtls = mock.Mock(return_value="https://foo.mtls") # Create some mock arguments and call the method under test. data = b"goodbye gudbi gootbee" @@ -4454,8 +4457,10 @@ def _initiate_resumable_upload_helper(self, num_retries=None): # Check the returned values. self.assertIsInstance(upload, ResumableUpload) + + host_name = "https://foo.mtls" if mtls else "https://bigquery.googleapis.com" upload_url = ( - f"https://bigquery.googleapis.com/upload/bigquery/v2/projects/{self.PROJECT}" + f"{host_name}/upload/bigquery/v2/projects/{self.PROJECT}" "/jobs?uploadType=resumable" ) self.assertEqual(upload.upload_url, upload_url) @@ -4494,11 +4499,14 @@ def _initiate_resumable_upload_helper(self, num_retries=None): def test__initiate_resumable_upload(self): self._initiate_resumable_upload_helper() + def test__initiate_resumable_upload_mtls(self): + self._initiate_resumable_upload_helper(mtls=True) + def test__initiate_resumable_upload_with_retry(self): self._initiate_resumable_upload_helper(num_retries=11) def _do_multipart_upload_success_helper( - self, get_boundary, num_retries=None, project=None + self, get_boundary, num_retries=None, project=None, mtls=False ): from google.cloud.bigquery.client import _get_upload_headers from google.cloud.bigquery.job import LoadJob @@ -4508,6 +4516,8 @@ def _do_multipart_upload_success_helper( fake_transport = self._mock_transport(http.client.OK, {}) client = self._make_one(project=self.PROJECT, _http=fake_transport) conn = client._connection = make_connection() + if mtls: + conn.get_api_base_url_for_mtls = mock.Mock(return_value="https://foo.mtls") if project is None: project = self.PROJECT @@ -4530,8 +4540,9 @@ def _do_multipart_upload_success_helper( self.assertEqual(stream.tell(), size) get_boundary.assert_called_once_with() + host_name = "https://foo.mtls" if mtls else "https://bigquery.googleapis.com" upload_url = ( - f"https://bigquery.googleapis.com/upload/bigquery/v2/projects/{project}" + f"{host_name}/upload/bigquery/v2/projects/{project}" "/jobs?uploadType=multipart" ) payload = ( @@ -4556,6 +4567,10 @@ def _do_multipart_upload_success_helper( def test__do_multipart_upload(self, get_boundary): self._do_multipart_upload_success_helper(get_boundary) + @mock.patch("google.resumable_media._upload.get_boundary", return_value=b"==0==") + def test__do_multipart_upload_mtls(self, get_boundary): + self._do_multipart_upload_success_helper(get_boundary, mtls=True) + @mock.patch("google.resumable_media._upload.get_boundary", return_value=b"==0==") def test__do_multipart_upload_with_retry(self, get_boundary): self._do_multipart_upload_success_helper(get_boundary, num_retries=8) From 3b6076a6375dbfeedcb9e75fc8de814b6491cd05 Mon Sep 17 00:00:00 2001 From: Yoshi Automation Bot Date: Mon, 8 Feb 2021 09:16:42 -0800 Subject: [PATCH 1057/2016] chore(python): include py.typed files in release (#511) A py.typed file must be included in the released package for it to be considered typed by type checkers. https://www.python.org/dev/peps/pep-0561/#packaging-type-information. See https://github.com/googleapis/python-secret-manager/issues/79 Source-Author: Bu Sun Kim <8822365+busunkim96@users.noreply.github.com> Source-Date: Fri Feb 5 17:32:06 2021 -0700 Source-Repo: googleapis/synthtool Source-Sha: 33366574ffb9e11737b3547eb6f020ecae0536e8 Source-Link: https://github.com/googleapis/synthtool/commit/33366574ffb9e11737b3547eb6f020ecae0536e8 --- packages/google-cloud-bigquery/MANIFEST.in | 4 ++-- packages/google-cloud-bigquery/synth.metadata | 6 +++--- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/packages/google-cloud-bigquery/MANIFEST.in b/packages/google-cloud-bigquery/MANIFEST.in index e9e29d12033d..e783f4c6209b 100644 --- a/packages/google-cloud-bigquery/MANIFEST.in +++ b/packages/google-cloud-bigquery/MANIFEST.in @@ -16,10 +16,10 @@ # Generated by synthtool. DO NOT EDIT! include README.rst LICENSE -recursive-include google *.json *.proto +recursive-include google *.json *.proto py.typed recursive-include tests * global-exclude *.py[co] global-exclude __pycache__ # Exclude scripts for samples readmegen -prune scripts/readme-gen \ No newline at end of file +prune scripts/readme-gen diff --git a/packages/google-cloud-bigquery/synth.metadata b/packages/google-cloud-bigquery/synth.metadata index f6dcca132848..1c5fecaf8533 100644 --- a/packages/google-cloud-bigquery/synth.metadata +++ b/packages/google-cloud-bigquery/synth.metadata @@ -4,7 +4,7 @@ "git": { "name": ".", "remote": "https://github.com/googleapis/python-bigquery.git", - "sha": "64e1c0581b915e36756ea465936939390da7d818" + "sha": "1823cadee3acf95c516d0479400e4175349ea199" } }, { @@ -19,14 +19,14 @@ "git": { "name": "synthtool", "remote": "https://github.com/googleapis/synthtool.git", - "sha": "d1bb9173100f62c0cfc8f3138b62241e7f47ca6a" + "sha": "33366574ffb9e11737b3547eb6f020ecae0536e8" } }, { "git": { "name": "synthtool", "remote": "https://github.com/googleapis/synthtool.git", - "sha": "d1bb9173100f62c0cfc8f3138b62241e7f47ca6a" + "sha": "33366574ffb9e11737b3547eb6f020ecae0536e8" } } ], From b8cb85142826bfe185e9446af552aea206b473a3 Mon Sep 17 00:00:00 2001 From: "release-please[bot]" <55107282+release-please[bot]@users.noreply.github.com> Date: Wed, 10 Feb 2021 08:28:01 +0000 Subject: [PATCH 1058/2016] chore: release 2.8.0 (#510) :robot: I have created a release \*beep\* \*boop\* --- ## [2.8.0](https://www.github.com/googleapis/python-bigquery/compare/v2.7.0...v2.8.0) (2021-02-08) ### Features * add mtls support to client ([#492](https://www.github.com/googleapis/python-bigquery/issues/492)) ([1823cad](https://www.github.com/googleapis/python-bigquery/commit/1823cadee3acf95c516d0479400e4175349ea199)) ### Bug Fixes * don't try to close closed cursors ([#498](https://www.github.com/googleapis/python-bigquery/issues/498)) ([bf44e7b](https://www.github.com/googleapis/python-bigquery/commit/bf44e7b67d2de41c13053a4550484b9ea049db3e)) --- This PR was generated with [Release Please](https://github.com/googleapis/release-please). See [documentation](https://github.com/googleapis/release-please#release-please). --- packages/google-cloud-bigquery/CHANGELOG.md | 12 ++++++++++++ .../google/cloud/bigquery/version.py | 2 +- 2 files changed, 13 insertions(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/CHANGELOG.md b/packages/google-cloud-bigquery/CHANGELOG.md index a58510c66ab5..768b7b0361b0 100644 --- a/packages/google-cloud-bigquery/CHANGELOG.md +++ b/packages/google-cloud-bigquery/CHANGELOG.md @@ -4,6 +4,18 @@ [1]: https://pypi.org/project/google-cloud-bigquery/#history +## [2.8.0](https://www.github.com/googleapis/python-bigquery/compare/v2.7.0...v2.8.0) (2021-02-08) + + +### Features + +* Add mTLS support to client. ([#492](https://www.github.com/googleapis/python-bigquery/issues/492)) ([1823cad](https://www.github.com/googleapis/python-bigquery/commit/1823cadee3acf95c516d0479400e4175349ea199)) + + +### Bug Fixes + +* Don't try to close closed cursors. ([#498](https://www.github.com/googleapis/python-bigquery/issues/498)) ([bf44e7b](https://www.github.com/googleapis/python-bigquery/commit/bf44e7b67d2de41c13053a4550484b9ea049db3e)) + ## [2.7.0](https://www.github.com/googleapis/python-bigquery/compare/v2.6.2...v2.7.0) (2021-01-27) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/version.py b/packages/google-cloud-bigquery/google/cloud/bigquery/version.py index d962613e0244..0a9aecb37574 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/version.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/version.py @@ -12,4 +12,4 @@ # See the License for the specific language governing permissions and # limitations under the License. -__version__ = "2.7.0" +__version__ = "2.8.0" From bcdc44a4f339d900462665e8db98fe49283e42ac Mon Sep 17 00:00:00 2001 From: arithmetic1728 <58957152+arithmetic1728@users.noreply.github.com> Date: Thu, 11 Feb 2021 13:08:39 -0800 Subject: [PATCH 1059/2016] test: update system test for mtls (#518) --- packages/google-cloud-bigquery/noxfile.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/noxfile.py b/packages/google-cloud-bigquery/noxfile.py index 942525ca933c..df36d237e955 100644 --- a/packages/google-cloud-bigquery/noxfile.py +++ b/packages/google-cloud-bigquery/noxfile.py @@ -113,7 +113,11 @@ def system(session): session.install( "mock", "pytest", "psutil", "google-cloud-testutils", "-c", constraints_path ) - session.install("google-cloud-storage", "-c", constraints_path) + if os.environ.get("GOOGLE_API_USE_CLIENT_CERTIFICATE", "") == "true": + # mTLS test requires pyopenssl and latest google-cloud-storage + session.install("google-cloud-storage", "pyopenssl") + else: + session.install("google-cloud-storage", "-c", constraints_path) session.install("-e", ".[all]", "-c", constraints_path) session.install("ipython", "-c", constraints_path) From 12898c6944fd60d75193809358bfca3efb9afea0 Mon Sep 17 00:00:00 2001 From: shollyman Date: Tue, 16 Feb 2021 06:49:16 -0800 Subject: [PATCH 1060/2016] chore: add PARQUET to DestinationFormat enum (#521) --- packages/google-cloud-bigquery/google/cloud/bigquery/enums.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/enums.py b/packages/google-cloud-bigquery/google/cloud/bigquery/enums.py index 2268808fdb5f..db463afdce73 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/enums.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/enums.py @@ -72,6 +72,9 @@ class DestinationFormat(object): AVRO = "AVRO" """Specifies Avro format.""" + PARQUET = "PARQUET" + """Specifies Parquet format.""" + class Encoding(object): """The character encoding of the data. The default is :attr:`UTF_8`. From 20a54b37557c1933af7cedf1a737404a558aff56 Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Tue, 16 Feb 2021 08:58:02 -0600 Subject: [PATCH 1061/2016] docs: clarify `%%bigquery`` magics and fix broken link (#508) --- packages/google-cloud-bigquery/docs/usage/index.rst | 4 ++-- .../google/cloud/bigquery/magics/magics.py | 9 +++++++++ 2 files changed, 11 insertions(+), 2 deletions(-) diff --git a/packages/google-cloud-bigquery/docs/usage/index.rst b/packages/google-cloud-bigquery/docs/usage/index.rst index ff4c9d7f1a8f..1d3cc9f64fdd 100644 --- a/packages/google-cloud-bigquery/docs/usage/index.rst +++ b/packages/google-cloud-bigquery/docs/usage/index.rst @@ -29,7 +29,7 @@ Integrations with Other Libraries pandas -See also, the :mod:`google.cloud.bigquery.magics` module for integrations -with Jupyter. +See also, the :mod:`google.cloud.bigquery.magics.magics` module for +integrations with Jupyter. diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/magics/magics.py b/packages/google-cloud-bigquery/google/cloud/bigquery/magics/magics.py index 8f343ddcc96a..6ae7cae12345 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/magics/magics.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/magics/magics.py @@ -14,6 +14,15 @@ """IPython Magics +To use these magics, you must first register them. Run the ``%load_ext`` magic +in a Jupyter notebook cell. + +.. code:: + + %load_ext google.cloud.bigquery + +This makes the ``%%bigquery`` magic available. + .. function:: %%bigquery IPython cell magic to run a query and display the result as a DataFrame From 31903abf88591b618cd0e5c6456ec451c6ebdab8 Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Tue, 16 Feb 2021 09:25:22 -0600 Subject: [PATCH 1062/2016] chore: remove redundant view code samples (#437) --- .../google-cloud-bigquery/docs/snippets.py | 126 ------------------ 1 file changed, 126 deletions(-) diff --git a/packages/google-cloud-bigquery/docs/snippets.py b/packages/google-cloud-bigquery/docs/snippets.py index 8c106e63d54b..3f9b9a88c7e2 100644 --- a/packages/google-cloud-bigquery/docs/snippets.py +++ b/packages/google-cloud-bigquery/docs/snippets.py @@ -478,132 +478,6 @@ def test_update_table_cmek(client, to_delete): # [END bigquery_update_table_cmek] -@pytest.mark.skip( - reason=( - "update_table() is flaky " - "https://github.com/GoogleCloudPlatform/google-cloud-python/issues/5589" - ) -) -def test_manage_views(client, to_delete): - project = client.project - source_dataset_id = "source_dataset_{}".format(_millis()) - source_dataset_ref = bigquery.DatasetReference(project, source_dataset_id) - source_dataset = bigquery.Dataset(source_dataset_ref) - source_dataset = client.create_dataset(source_dataset) - to_delete.append(source_dataset) - - job_config = bigquery.LoadJobConfig() - job_config.schema = [ - bigquery.SchemaField("name", "STRING"), - bigquery.SchemaField("post_abbr", "STRING"), - ] - job_config.skip_leading_rows = 1 - uri = "gs://cloud-samples-data/bigquery/us-states/us-states.csv" - source_table_id = "us_states" - load_job = client.load_table_from_uri( - uri, source_dataset.table(source_table_id), job_config=job_config - ) - load_job.result() - - shared_dataset_id = "shared_dataset_{}".format(_millis()) - shared_dataset_ref = bigquery.DatasetReference(project, shared_dataset_id) - shared_dataset = bigquery.Dataset(shared_dataset_ref) - shared_dataset = client.create_dataset(shared_dataset) - to_delete.append(shared_dataset) - - # [START bigquery_create_view] - # from google.cloud import bigquery - # client = bigquery.Client() - # project = 'my-project' - # source_dataset_id = 'my_source_dataset' - # source_table_id = 'us_states' - # shared_dataset_ref = bigquery.DatasetReference(project, 'my_shared_dataset') - - # This example shows how to create a shared view of a source table of - # US States. The source table contains all 50 states, while the view will - # contain only states with names starting with 'W'. - view_ref = shared_dataset_ref.table("my_shared_view") - view = bigquery.Table(view_ref) - sql_template = 'SELECT name, post_abbr FROM `{}.{}.{}` WHERE name LIKE "W%"' - view.view_query = sql_template.format(project, source_dataset_id, source_table_id) - view = client.create_table(view) # API request - - print("Successfully created view at {}".format(view.full_table_id)) - # [END bigquery_create_view] - - # [START bigquery_update_view_query] - # from google.cloud import bigquery - # client = bigquery.Client() - # project = 'my-project' - # source_dataset_id = 'my_source_dataset' - # source_table_id = 'us_states' - # shared_dataset_ref = bigquery.DatasetReference(project, 'my_shared_dataset') - - # This example shows how to update a shared view of a source table of - # US States. The view's query will be updated to contain only states with - # names starting with 'M'. - view_ref = shared_dataset_ref.table("my_shared_view") - view = bigquery.Table(view_ref) - sql_template = 'SELECT name, post_abbr FROM `{}.{}.{}` WHERE name LIKE "M%"' - view.view_query = sql_template.format(project, source_dataset_id, source_table_id) - view = client.update_table(view, ["view_query"]) # API request - # [END bigquery_update_view_query] - - # [START bigquery_get_view] - # from google.cloud import bigquery - # client = bigquery.Client() - # shared_dataset_id = 'my_shared_dataset' - project = client.project - shared_dataset_ref = bigquery.DatasetReference(project, shared_dataset_id) - view_ref = shared_dataset_ref.table("my_shared_view") - view = client.get_table(view_ref) # API Request - - # Display view properties - print("View at {}".format(view.full_table_id)) - print("View Query:\n{}".format(view.view_query)) - # [END bigquery_get_view] - assert view.view_query is not None - - analyst_group_email = "example-analyst-group@google.com" - # [START bigquery_grant_view_access] - # from google.cloud import bigquery - # client = bigquery.Client() - - # Assign access controls to the dataset containing the view - # shared_dataset_id = 'my_shared_dataset' - # analyst_group_email = 'data_analysts@example.com' - project = client.project - shared_dataset_ref = bigquery.DatasetReference(project, shared_dataset_id) - shared_dataset = client.get_dataset(shared_dataset_ref) # API request - access_entries = shared_dataset.access_entries - access_entries.append( - bigquery.AccessEntry("READER", "groupByEmail", analyst_group_email) - ) - shared_dataset.access_entries = access_entries - shared_dataset = client.update_dataset( - shared_dataset, ["access_entries"] - ) # API request - - # Authorize the view to access the source dataset - # project = 'my-project' - # source_dataset_id = 'my_source_dataset' - project = client.project - source_dataset_ref = bigquery.DatasetReference(project, source_dataset_id) - source_dataset = client.get_dataset(source_dataset_ref) # API request - view_reference = { - "projectId": project, - "datasetId": shared_dataset_id, - "tableId": "my_shared_view", - } - access_entries = source_dataset.access_entries - access_entries.append(bigquery.AccessEntry(None, "view", view_reference)) - source_dataset.access_entries = access_entries - source_dataset = client.update_dataset( - source_dataset, ["access_entries"] - ) # API request - # [END bigquery_grant_view_access] - - def test_load_table_add_column(client, to_delete): dataset_id = "load_table_add_column_{}".format(_millis()) project = client.project From 5784319ea7ea7e26050d18ef41ac4c108bdbd5bb Mon Sep 17 00:00:00 2001 From: Peter Lamut Date: Wed, 17 Feb 2021 17:52:56 +0100 Subject: [PATCH 1063/2016] feat: add determinism level for javascript UDFs (#522) * feat: add determinism level for javascript UDFs * Add enum-like class for routine determinism level --- .../google-cloud-bigquery/docs/reference.rst | 1 + .../google/cloud/bigquery/__init__.py | 2 + .../google/cloud/bigquery/enums.py | 17 ++++++++ .../google/cloud/bigquery/routine/__init__.py | 29 +++++++++++++ .../cloud/bigquery/{ => routine}/routine.py | 12 ++++++ .../tests/system/test_client.py | 1 + .../tests/unit/routine/test_routine.py | 41 ++++++++++++++++++- 7 files changed, 102 insertions(+), 1 deletion(-) create mode 100644 packages/google-cloud-bigquery/google/cloud/bigquery/routine/__init__.py rename packages/google-cloud-bigquery/google/cloud/bigquery/{ => routine}/routine.py (97%) diff --git a/packages/google-cloud-bigquery/docs/reference.rst b/packages/google-cloud-bigquery/docs/reference.rst index 3643831cbc2f..6b802e2a567a 100644 --- a/packages/google-cloud-bigquery/docs/reference.rst +++ b/packages/google-cloud-bigquery/docs/reference.rst @@ -110,6 +110,7 @@ Routine .. autosummary:: :toctree: generated + routine.DeterminismLevel routine.Routine routine.RoutineArgument routine.RoutineReference diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/__init__.py b/packages/google-cloud-bigquery/google/cloud/bigquery/__init__.py index 41f987228ee2..29d375b03a1d 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/__init__.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/__init__.py @@ -70,6 +70,7 @@ from google.cloud.bigquery.query import StructQueryParameter from google.cloud.bigquery.query import UDFResource from google.cloud.bigquery.retry import DEFAULT_RETRY +from google.cloud.bigquery.routine import DeterminismLevel from google.cloud.bigquery.routine import Routine from google.cloud.bigquery.routine import RoutineArgument from google.cloud.bigquery.routine import RoutineReference @@ -134,6 +135,7 @@ "Compression", "CreateDisposition", "DestinationFormat", + "DeterminismLevel", "ExternalSourceFormat", "Encoding", "QueryPriority", diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/enums.py b/packages/google-cloud-bigquery/google/cloud/bigquery/enums.py index db463afdce73..e353b3132e81 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/enums.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/enums.py @@ -234,3 +234,20 @@ class WriteDisposition(object): WRITE_EMPTY = "WRITE_EMPTY" """If the table already exists and contains data, a 'duplicate' error is returned in the job result.""" + + +class DeterminismLevel: + """Specifies determinism level for JavaScript user-defined functions (UDFs). + + https://cloud.google.com/bigquery/docs/reference/rest/v2/routines#DeterminismLevel + """ + + DETERMINISM_LEVEL_UNSPECIFIED = "DETERMINISM_LEVEL_UNSPECIFIED" + """The determinism of the UDF is unspecified.""" + + DETERMINISTIC = "DETERMINISTIC" + """The UDF is deterministic, meaning that 2 function calls with the same inputs + always produce the same result, even across 2 query runs.""" + + NOT_DETERMINISTIC = "NOT_DETERMINISTIC" + """The UDF is not deterministic.""" diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/routine/__init__.py b/packages/google-cloud-bigquery/google/cloud/bigquery/routine/__init__.py new file mode 100644 index 000000000000..d1c79b05eee9 --- /dev/null +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/routine/__init__.py @@ -0,0 +1,29 @@ +# Copyright 2021 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""User-Defined Routines.""" + + +from google.cloud.bigquery.enums import DeterminismLevel +from google.cloud.bigquery.routine.routine import Routine +from google.cloud.bigquery.routine.routine import RoutineArgument +from google.cloud.bigquery.routine.routine import RoutineReference + + +__all__ = ( + "DeterminismLevel", + "Routine", + "RoutineArgument", + "RoutineReference", +) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/routine.py b/packages/google-cloud-bigquery/google/cloud/bigquery/routine/routine.py similarity index 97% rename from packages/google-cloud-bigquery/google/cloud/bigquery/routine.py rename to packages/google-cloud-bigquery/google/cloud/bigquery/routine/routine.py index f26f2088630d..103799e8fd4a 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/routine.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/routine/routine.py @@ -50,6 +50,7 @@ class Routine(object): "return_type": "returnType", "type_": "routineType", "description": "description", + "determinism_level": "determinismLevel", } def __init__(self, routine_ref, **kwargs): @@ -253,6 +254,17 @@ def description(self): def description(self, value): self._properties[self._PROPERTY_TO_API_FIELD["description"]] = value + @property + def determinism_level(self): + """Optional[str]: (experimental) The determinism level of the JavaScript UDF + if defined. + """ + return self._properties.get(self._PROPERTY_TO_API_FIELD["determinism_level"]) + + @determinism_level.setter + def determinism_level(self, value): + self._properties[self._PROPERTY_TO_API_FIELD["determinism_level"]] = value + @classmethod def from_api_repr(cls, resource): """Factory: construct a routine given its API representation. diff --git a/packages/google-cloud-bigquery/tests/system/test_client.py b/packages/google-cloud-bigquery/tests/system/test_client.py index 85c044badd2f..60c3b3fa8afe 100644 --- a/packages/google-cloud-bigquery/tests/system/test_client.py +++ b/packages/google-cloud-bigquery/tests/system/test_client.py @@ -2682,6 +2682,7 @@ def test_create_routine(self): ) ] routine.body = "return maxValue(arr)" + routine.determinism_level = bigquery.DeterminismLevel.DETERMINISTIC query_string = "SELECT `{}`([-100.0, 3.14, 100.0, 42.0]) as max_value;".format( str(routine.reference) ) diff --git a/packages/google-cloud-bigquery/tests/unit/routine/test_routine.py b/packages/google-cloud-bigquery/tests/unit/routine/test_routine.py index b02ace1db813..0a59e7c5fed7 100644 --- a/packages/google-cloud-bigquery/tests/unit/routine/test_routine.py +++ b/packages/google-cloud-bigquery/tests/unit/routine/test_routine.py @@ -18,6 +18,7 @@ import pytest import google.cloud._helpers +from google.cloud import bigquery from google.cloud import bigquery_v2 @@ -73,6 +74,7 @@ def test_ctor_w_properties(target_class): ) type_ = "SCALAR_FUNCTION" description = "A routine description." + determinism_level = bigquery.DeterminismLevel.NOT_DETERMINISTIC actual_routine = target_class( routine_id, @@ -82,6 +84,7 @@ def test_ctor_w_properties(target_class): return_type=return_type, type_=type_, description=description, + determinism_level=determinism_level, ) ref = RoutineReference.from_string(routine_id) @@ -92,6 +95,9 @@ def test_ctor_w_properties(target_class): assert actual_routine.return_type == return_type assert actual_routine.type_ == type_ assert actual_routine.description == description + assert ( + actual_routine.determinism_level == bigquery.DeterminismLevel.NOT_DETERMINISTIC + ) def test_from_api_repr(target_class): @@ -120,6 +126,7 @@ def test_from_api_repr(target_class): "routineType": "SCALAR_FUNCTION", "someNewField": "someValue", "description": "A routine description.", + "determinismLevel": bigquery.DeterminismLevel.DETERMINISTIC, } actual_routine = target_class.from_api_repr(resource) @@ -152,6 +159,7 @@ def test_from_api_repr(target_class): assert actual_routine.type_ == "SCALAR_FUNCTION" assert actual_routine._properties["someNewField"] == "someValue" assert actual_routine.description == "A routine description." + assert actual_routine.determinism_level == "DETERMINISTIC" def test_from_api_repr_w_minimal_resource(target_class): @@ -177,6 +185,7 @@ def test_from_api_repr_w_minimal_resource(target_class): assert actual_routine.return_type is None assert actual_routine.type_ is None assert actual_routine.description is None + assert actual_routine.determinism_level is None def test_from_api_repr_w_unknown_fields(target_class): @@ -208,6 +217,7 @@ def test_from_api_repr_w_unknown_fields(target_class): "returnType": {"typeKind": "INT64"}, "routineType": "SCALAR_FUNCTION", "description": "A routine description.", + "determinismLevel": bigquery.DeterminismLevel.DETERMINISM_LEVEL_UNSPECIFIED, }, ["arguments"], {"arguments": [{"name": "x", "dataType": {"typeKind": "INT64"}}]}, @@ -220,6 +230,7 @@ def test_from_api_repr_w_unknown_fields(target_class): "returnType": {"typeKind": "INT64"}, "routineType": "SCALAR_FUNCTION", "description": "A routine description.", + "determinismLevel": bigquery.DeterminismLevel.DETERMINISM_LEVEL_UNSPECIFIED, }, ["body"], {"definitionBody": "x * 3"}, @@ -232,6 +243,7 @@ def test_from_api_repr_w_unknown_fields(target_class): "returnType": {"typeKind": "INT64"}, "routineType": "SCALAR_FUNCTION", "description": "A routine description.", + "determinismLevel": bigquery.DeterminismLevel.DETERMINISM_LEVEL_UNSPECIFIED, }, ["language"], {"language": "SQL"}, @@ -244,6 +256,7 @@ def test_from_api_repr_w_unknown_fields(target_class): "returnType": {"typeKind": "INT64"}, "routineType": "SCALAR_FUNCTION", "description": "A routine description.", + "determinismLevel": bigquery.DeterminismLevel.DETERMINISM_LEVEL_UNSPECIFIED, }, ["return_type"], {"returnType": {"typeKind": "INT64"}}, @@ -256,6 +269,7 @@ def test_from_api_repr_w_unknown_fields(target_class): "returnType": {"typeKind": "INT64"}, "routineType": "SCALAR_FUNCTION", "description": "A routine description.", + "determinismLevel": bigquery.DeterminismLevel.DETERMINISM_LEVEL_UNSPECIFIED, }, ["type_"], {"routineType": "SCALAR_FUNCTION"}, @@ -268,13 +282,37 @@ def test_from_api_repr_w_unknown_fields(target_class): "returnType": {"typeKind": "INT64"}, "routineType": "SCALAR_FUNCTION", "description": "A routine description.", + "determinismLevel": bigquery.DeterminismLevel.DETERMINISM_LEVEL_UNSPECIFIED, }, ["description"], {"description": "A routine description."}, ), + ( + { + "arguments": [{"name": "x", "dataType": {"typeKind": "INT64"}}], + "definitionBody": "x * 3", + "language": "SQL", + "returnType": {"typeKind": "INT64"}, + "routineType": "SCALAR_FUNCTION", + "description": "A routine description.", + "determinismLevel": bigquery.DeterminismLevel.DETERMINISM_LEVEL_UNSPECIFIED, + }, + ["determinism_level"], + { + "determinismLevel": bigquery.DeterminismLevel.DETERMINISM_LEVEL_UNSPECIFIED + }, + ), ( {}, - ["arguments", "language", "body", "type_", "return_type", "description"], + [ + "arguments", + "language", + "body", + "type_", + "return_type", + "description", + "determinism_level", + ], { "arguments": None, "definitionBody": None, @@ -282,6 +320,7 @@ def test_from_api_repr_w_unknown_fields(target_class): "returnType": None, "routineType": None, "description": None, + "determinismLevel": None, }, ), ( From a4688e6ee7bdf060ff148ec3660f12206c3a48dc Mon Sep 17 00:00:00 2001 From: Peter Lamut Date: Wed, 17 Feb 2021 18:30:58 +0100 Subject: [PATCH 1064/2016] feat: expose reservation usage stats on jobs (#524) * feat: expose reservation usage stats on jobs * Add ReservationUsage to job types in docs * Remove redundant space in docstring. --- .../google-cloud-bigquery/docs/reference.rst | 1 + .../google/cloud/bigquery/job/__init__.py | 2 ++ .../google/cloud/bigquery/job/base.py | 27 +++++++++++++++++++ .../tests/unit/job/test_base.py | 24 +++++++++++++++++ 4 files changed, 54 insertions(+) diff --git a/packages/google-cloud-bigquery/docs/reference.rst b/packages/google-cloud-bigquery/docs/reference.rst index 6b802e2a567a..52d916f96ac0 100644 --- a/packages/google-cloud-bigquery/docs/reference.rst +++ b/packages/google-cloud-bigquery/docs/reference.rst @@ -62,6 +62,7 @@ Job-Related Types job.QueryPlanEntry job.QueryPlanEntryStep job.QueryPriority + job.ReservationUsage job.SourceFormat job.WriteDisposition job.SchemaUpdateOption diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/job/__init__.py b/packages/google-cloud-bigquery/google/cloud/bigquery/job/__init__.py index 26ecf8d3cf42..4945841d960f 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/job/__init__.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/job/__init__.py @@ -19,6 +19,7 @@ from google.cloud.bigquery.job.base import _DONE_STATE from google.cloud.bigquery.job.base import _JobConfig from google.cloud.bigquery.job.base import _JobReference +from google.cloud.bigquery.job.base import ReservationUsage from google.cloud.bigquery.job.base import ScriptStatistics from google.cloud.bigquery.job.base import ScriptStackFrame from google.cloud.bigquery.job.base import UnknownJob @@ -51,6 +52,7 @@ "_DONE_STATE", "_JobConfig", "_JobReference", + "ReservationUsage", "ScriptStatistics", "ScriptStackFrame", "UnknownJob", diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/job/base.py b/packages/google-cloud-bigquery/google/cloud/bigquery/job/base.py index 5ba01aa673d9..d8f5d6528c56 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/job/base.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/job/base.py @@ -14,6 +14,7 @@ """Base classes and helpers for job classes.""" +from collections import namedtuple import copy import http import threading @@ -73,6 +74,16 @@ def _error_result_to_exception(error_result): ) +ReservationUsage = namedtuple("ReservationUsage", "name slot_ms") +ReservationUsage.__doc__ = "Job resource usage for a reservation." +ReservationUsage.name.__doc__ = ( + 'Reservation name or "unreserved" for on-demand resources usage.' +) +ReservationUsage.slot_ms.__doc__ = ( + "Total slot milliseconds used by the reservation for a particular job." +) + + class _JobReference(object): """A reference to a job. @@ -305,6 +316,22 @@ def _job_statistics(self): statistics = self._properties.get("statistics", {}) return statistics.get(self._JOB_TYPE, {}) + @property + def reservation_usage(self): + """Job resource usage breakdown by reservation. + + Returns: + List[google.cloud.bigquery.job.ReservationUsage]: + Reservation usage stats. Can be empty if not set from the server. + """ + usage_stats_raw = _helpers._get_sub_prop( + self._properties, ["statistics", "reservationUsage"], default=() + ) + return [ + ReservationUsage(name=usage["name"], slot_ms=int(usage["slotMs"])) + for usage in usage_stats_raw + ] + @property def error_result(self): """Error information about the job as a whole. diff --git a/packages/google-cloud-bigquery/tests/unit/job/test_base.py b/packages/google-cloud-bigquery/tests/unit/job/test_base.py index 44bbc2c773d3..bbeffba50e38 100644 --- a/packages/google-cloud-bigquery/tests/unit/job/test_base.py +++ b/packages/google-cloud-bigquery/tests/unit/job/test_base.py @@ -319,6 +319,30 @@ def test_ended(self): stats["endTime"] = millis self.assertEqual(job.ended, now) + def test_reservation_usage_no_stats(self): + client = _make_client(project=self.PROJECT) + job = self._make_one(self.JOB_ID, client) + job._properties["statistics"] = {} + self.assertEqual(job.reservation_usage, []) + + def test_reservation_usage_stats_exist(self): + from google.cloud.bigquery.job import ReservationUsage + + client = _make_client(project=self.PROJECT) + job = self._make_one(self.JOB_ID, client) + job._properties["statistics"] = { + "reservationUsage": [ + {"name": "slot_foo", "slotMs": "42"}, + {"name": "slot_bar", "slotMs": "123"}, + ], + } + + expected = [ + ReservationUsage(name="slot_foo", slot_ms=42), + ReservationUsage(name="slot_bar", slot_ms=123), + ] + self.assertEqual(job.reservation_usage, expected) + def test__job_statistics(self): statistics = {"foo": "bar"} client = _make_client(project=self.PROJECT) From e8367016041dbdfb9fe3a0a87eb1b717b9a5e875 Mon Sep 17 00:00:00 2001 From: Yoshi Automation Bot Date: Wed, 17 Feb 2021 12:06:05 -0800 Subject: [PATCH 1065/2016] docs: update python contributing guide (#514) This PR was generated using Autosynth. :rainbow: Synth log will be available here: https://source.cloud.google.com/results/invocations/9d4e6069-5c18-4f79-97fb-99ebae377691/targets - [ ] To automatically regenerate this PR, check this box. Source-Link: https://github.com/googleapis/synthtool/commit/4679e7e415221f03ff2a71e3ffad75b9ec41d87e PiperOrigin-RevId: 344443035 Source-Link: https://github.com/googleapis/googleapis/commit/df4fd38d040c5c8a0869936205bca13fb64b2cff --- .../google-cloud-bigquery/CONTRIBUTING.rst | 22 +++++++++++++++---- packages/google-cloud-bigquery/synth.metadata | 7 +++--- 2 files changed, 22 insertions(+), 7 deletions(-) diff --git a/packages/google-cloud-bigquery/CONTRIBUTING.rst b/packages/google-cloud-bigquery/CONTRIBUTING.rst index 15bcd2e28132..a0e330e44c1e 100644 --- a/packages/google-cloud-bigquery/CONTRIBUTING.rst +++ b/packages/google-cloud-bigquery/CONTRIBUTING.rst @@ -70,9 +70,14 @@ We use `nox `__ to instrument our tests. - To test your changes, run unit tests with ``nox``:: $ nox -s unit-2.7 - $ nox -s unit-3.7 + $ nox -s unit-3.8 $ ... +- Args to pytest can be passed through the nox command separated by a `--`. For + example, to run a single test:: + + $ nox -s unit-3.8 -- -k + .. note:: The unit tests and system tests are described in the @@ -93,8 +98,12 @@ On Debian/Ubuntu:: ************ Coding Style ************ +- We use the automatic code formatter ``black``. You can run it using + the nox session ``blacken``. This will eliminate many lint errors. Run via:: + + $ nox -s blacken -- PEP8 compliance, with exceptions defined in the linter configuration. +- PEP8 compliance is required, with exceptions defined in the linter configuration. If you have ``nox`` installed, you can test that you have not introduced any non-compliant code via:: @@ -133,13 +142,18 @@ Running System Tests - To run system tests, you can execute:: - $ nox -s system-3.7 + # Run all system tests + $ nox -s system-3.8 $ nox -s system-2.7 + # Run a single system test + $ nox -s system-3.8 -- -k + + .. note:: System tests are only configured to run under Python 2.7 and - Python 3.7. For expediency, we do not run them in older versions + Python 3.8. For expediency, we do not run them in older versions of Python 3. This alone will not run the tests. You'll need to change some local diff --git a/packages/google-cloud-bigquery/synth.metadata b/packages/google-cloud-bigquery/synth.metadata index 1c5fecaf8533..f91ffab69131 100644 --- a/packages/google-cloud-bigquery/synth.metadata +++ b/packages/google-cloud-bigquery/synth.metadata @@ -4,7 +4,7 @@ "git": { "name": ".", "remote": "https://github.com/googleapis/python-bigquery.git", - "sha": "1823cadee3acf95c516d0479400e4175349ea199" + "sha": "1c6681aba872c00afb16a904a2ba9bae8e9618d3" } }, { @@ -19,14 +19,14 @@ "git": { "name": "synthtool", "remote": "https://github.com/googleapis/synthtool.git", - "sha": "33366574ffb9e11737b3547eb6f020ecae0536e8" + "sha": "4679e7e415221f03ff2a71e3ffad75b9ec41d87e" } }, { "git": { "name": "synthtool", "remote": "https://github.com/googleapis/synthtool.git", - "sha": "33366574ffb9e11737b3547eb6f020ecae0536e8" + "sha": "4679e7e415221f03ff2a71e3ffad75b9ec41d87e" } } ], @@ -92,6 +92,7 @@ "CONTRIBUTING.rst", "LICENSE", "MANIFEST.in", + "bigquery-v2-py.tar.gz", "docs/_static/custom.css", "docs/_templates/layout.html", "docs/bigquery_v2/services.rst", From 851337ee1cf480b8287b6e2dfe82df2b1165f278 Mon Sep 17 00:00:00 2001 From: Yoshi Automation Bot Date: Thu, 18 Feb 2021 08:18:03 -0800 Subject: [PATCH 1066/2016] chore: update automation naming, smaller generated code fixes (#505) This PR was generated using Autosynth. :rainbow: Synth log will be available here: https://source.cloud.google.com/results/invocations/9d4e6069-5c18-4f79-97fb-99ebae377691/targets - [ ] To automatically regenerate this PR, check this box. PiperOrigin-RevId: 350246057 Source-Link: https://github.com/googleapis/googleapis/commit/520682435235d9c503983a360a2090025aa47cd1 PiperOrigin-RevId: 347055288 Source-Link: https://github.com/googleapis/googleapis/commit/dd372aa22ded7a8ba6f0e03a80e06358a3fa0907 --- packages/google-cloud-bigquery/.coveragerc | 34 +-- packages/google-cloud-bigquery/.gitignore | 4 +- .../google-cloud-bigquery/.kokoro/build.sh | 10 + .../docs/bigquery_v2/services.rst | 6 - .../docs/bigquery_v2/types.rst | 1 + .../cloud/bigquery_v2/types/__init__.py | 1 - .../bigquery_v2/types/encryption_config.py | 2 +- .../google/cloud/bigquery_v2/types/model.py | 216 +++++++++--------- .../cloud/bigquery_v2/types/standard_sql.py | 10 +- packages/google-cloud-bigquery/synth.metadata | 12 +- packages/google-cloud-bigquery/synth.py | 2 + 11 files changed, 144 insertions(+), 154 deletions(-) delete mode 100644 packages/google-cloud-bigquery/docs/bigquery_v2/services.rst diff --git a/packages/google-cloud-bigquery/.coveragerc b/packages/google-cloud-bigquery/.coveragerc index 0d8e6297dc9c..23861a8eb51f 100644 --- a/packages/google-cloud-bigquery/.coveragerc +++ b/packages/google-cloud-bigquery/.coveragerc @@ -1,38 +1,18 @@ -# -*- coding: utf-8 -*- -# -# Copyright 2020 Google LLC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -# Generated by synthtool. DO NOT EDIT! [run] branch = True -omit = - google/cloud/__init__.py [report] fail_under = 100 show_missing = True +omit = + google/cloud/bigquery/__init__.py exclude_lines = # Re-enable the standard pragma pragma: NO COVER # Ignore debug-only repr def __repr__ - # Ignore abstract methods - raise NotImplementedError -omit = - */gapic/*.py - */proto/*.py - */core/*.py - */site-packages/*.py - google/cloud/__init__.py + # Ignore pkg_resources exceptions. + # This is added at the module level as a safeguard for if someone + # generates the code and tries to run it without pip installing. This + # makes it virtually impossible to test properly. + except pkg_resources.DistributionNotFound diff --git a/packages/google-cloud-bigquery/.gitignore b/packages/google-cloud-bigquery/.gitignore index b9daa52f118d..b4243ced74e4 100644 --- a/packages/google-cloud-bigquery/.gitignore +++ b/packages/google-cloud-bigquery/.gitignore @@ -50,8 +50,10 @@ docs.metadata # Virtual environment env/ + +# Test logs coverage.xml -sponge_log.xml +*sponge_log.xml # System test environment variables. system_tests/local_test_setup diff --git a/packages/google-cloud-bigquery/.kokoro/build.sh b/packages/google-cloud-bigquery/.kokoro/build.sh index 058f363e14c0..302cc1e1ac40 100755 --- a/packages/google-cloud-bigquery/.kokoro/build.sh +++ b/packages/google-cloud-bigquery/.kokoro/build.sh @@ -40,6 +40,16 @@ python3 -m pip uninstall --yes --quiet nox-automation python3 -m pip install --upgrade --quiet nox python3 -m nox --version +# If this is a continuous build, send the test log to the FlakyBot. +# See https://github.com/googleapis/repo-automation-bots/tree/master/packages/flakybot. +if [[ $KOKORO_BUILD_ARTIFACTS_SUBDIR = *"continuous"* ]]; then + cleanup() { + chmod +x $KOKORO_GFILE_DIR/linux_amd64/flakybot + $KOKORO_GFILE_DIR/linux_amd64/flakybot + } + trap cleanup EXIT HUP +fi + # If NOX_SESSION is set, it only runs the specified session, # otherwise run all the sessions. if [[ -n "${NOX_SESSION:-}" ]]; then diff --git a/packages/google-cloud-bigquery/docs/bigquery_v2/services.rst b/packages/google-cloud-bigquery/docs/bigquery_v2/services.rst deleted file mode 100644 index 65fbb438c70b..000000000000 --- a/packages/google-cloud-bigquery/docs/bigquery_v2/services.rst +++ /dev/null @@ -1,6 +0,0 @@ -Services for Google Cloud Bigquery v2 API -========================================= - -.. automodule:: google.cloud.bigquery_v2.services.model_service - :members: - :inherited-members: diff --git a/packages/google-cloud-bigquery/docs/bigquery_v2/types.rst b/packages/google-cloud-bigquery/docs/bigquery_v2/types.rst index 41b906514840..c36a83e0b415 100644 --- a/packages/google-cloud-bigquery/docs/bigquery_v2/types.rst +++ b/packages/google-cloud-bigquery/docs/bigquery_v2/types.rst @@ -3,4 +3,5 @@ Types for Google Cloud Bigquery v2 API .. automodule:: google.cloud.bigquery_v2.types :members: + :undoc-members: :show-inheritance: diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery_v2/types/__init__.py b/packages/google-cloud-bigquery/google/cloud/bigquery_v2/types/__init__.py index 1e354641a12e..00dc837c9bd4 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery_v2/types/__init__.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery_v2/types/__init__.py @@ -32,7 +32,6 @@ ListModelsResponse, ) - __all__ = ( "EncryptionConfiguration", "ModelReference", diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery_v2/types/encryption_config.py b/packages/google-cloud-bigquery/google/cloud/bigquery_v2/types/encryption_config.py index 6fb90f3409c3..2d801bde35f3 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery_v2/types/encryption_config.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery_v2/types/encryption_config.py @@ -30,7 +30,7 @@ class EncryptionConfiguration(proto.Message): r""" Attributes: - kms_key_name (~.wrappers.StringValue): + kms_key_name (google.protobuf.wrappers_pb2.StringValue): Optional. Describes the Cloud KMS encryption key that will be used to protect destination BigQuery table. The BigQuery Service Account diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery_v2/types/model.py b/packages/google-cloud-bigquery/google/cloud/bigquery_v2/types/model.py index c3530dec2bdb..8ae158b6409c 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery_v2/types/model.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery_v2/types/model.py @@ -45,7 +45,7 @@ class Model(proto.Message): Attributes: etag (str): Output only. A hash of this resource. - model_reference (~.gcb_model_reference.ModelReference): + model_reference (google.cloud.bigquery_v2.types.ModelReference): Required. Unique identifier for this model. creation_time (int): Output only. The time when this model was @@ -58,7 +58,7 @@ class Model(proto.Message): model. friendly_name (str): Optional. A descriptive name for this model. - labels (Sequence[~.gcb_model.Model.LabelsEntry]): + labels (Sequence[google.cloud.bigquery_v2.types.Model.LabelsEntry]): The labels associated with this model. You can use these to organize and group your models. Label keys and values can be no longer than 63 @@ -81,22 +81,22 @@ class Model(proto.Message): Output only. The geographic location where the model resides. This value is inherited from the dataset. - encryption_configuration (~.encryption_config.EncryptionConfiguration): + encryption_configuration (google.cloud.bigquery_v2.types.EncryptionConfiguration): Custom encryption configuration (e.g., Cloud KMS keys). This shows the encryption configuration of the model data while stored in BigQuery storage. This field can be used with PatchModel to update encryption key for an already encrypted model. - model_type (~.gcb_model.Model.ModelType): + model_type (google.cloud.bigquery_v2.types.Model.ModelType): Output only. Type of the model resource. - training_runs (Sequence[~.gcb_model.Model.TrainingRun]): + training_runs (Sequence[google.cloud.bigquery_v2.types.Model.TrainingRun]): Output only. Information for all training runs in increasing order of start_time. - feature_columns (Sequence[~.standard_sql.StandardSqlField]): + feature_columns (Sequence[google.cloud.bigquery_v2.types.StandardSqlField]): Output only. Input feature columns that were used to train this model. - label_columns (Sequence[~.standard_sql.StandardSqlField]): + label_columns (Sequence[google.cloud.bigquery_v2.types.StandardSqlField]): Output only. Label columns that were used to train this model. The output of the model will have a `predicted_` prefix to these columns. @@ -280,15 +280,15 @@ class RegressionMetrics(proto.Message): matrix factorization models. Attributes: - mean_absolute_error (~.wrappers.DoubleValue): + mean_absolute_error (google.protobuf.wrappers_pb2.DoubleValue): Mean absolute error. - mean_squared_error (~.wrappers.DoubleValue): + mean_squared_error (google.protobuf.wrappers_pb2.DoubleValue): Mean squared error. - mean_squared_log_error (~.wrappers.DoubleValue): + mean_squared_log_error (google.protobuf.wrappers_pb2.DoubleValue): Mean squared log error. - median_absolute_error (~.wrappers.DoubleValue): + median_absolute_error (google.protobuf.wrappers_pb2.DoubleValue): Median absolute error. - r_squared (~.wrappers.DoubleValue): + r_squared (google.protobuf.wrappers_pb2.DoubleValue): R^2 score. """ @@ -319,33 +319,33 @@ class AggregateClassificationMetrics(proto.Message): by counting the total number of correctly predicted rows. Attributes: - precision (~.wrappers.DoubleValue): + precision (google.protobuf.wrappers_pb2.DoubleValue): Precision is the fraction of actual positive predictions that had positive actual labels. For multiclass this is a macro-averaged metric treating each class as a binary classifier. - recall (~.wrappers.DoubleValue): + recall (google.protobuf.wrappers_pb2.DoubleValue): Recall is the fraction of actual positive labels that were given a positive prediction. For multiclass this is a macro-averaged metric. - accuracy (~.wrappers.DoubleValue): + accuracy (google.protobuf.wrappers_pb2.DoubleValue): Accuracy is the fraction of predictions given the correct label. For multiclass this is a micro-averaged metric. - threshold (~.wrappers.DoubleValue): + threshold (google.protobuf.wrappers_pb2.DoubleValue): Threshold at which the metrics are computed. For binary classification models this is the positive class threshold. For multi-class classfication models this is the confidence threshold. - f1_score (~.wrappers.DoubleValue): + f1_score (google.protobuf.wrappers_pb2.DoubleValue): The F1 score is an average of recall and precision. For multiclass this is a macro- averaged metric. - log_loss (~.wrappers.DoubleValue): + log_loss (google.protobuf.wrappers_pb2.DoubleValue): Logarithmic Loss. For multiclass this is a macro-averaged metric. - roc_auc (~.wrappers.DoubleValue): + roc_auc (google.protobuf.wrappers_pb2.DoubleValue): Area Under a ROC Curve. For multiclass this is a macro-averaged metric. """ @@ -369,9 +369,9 @@ class BinaryClassificationMetrics(proto.Message): models. Attributes: - aggregate_classification_metrics (~.gcb_model.Model.AggregateClassificationMetrics): + aggregate_classification_metrics (google.cloud.bigquery_v2.types.Model.AggregateClassificationMetrics): Aggregate classification metrics. - binary_confusion_matrix_list (Sequence[~.gcb_model.Model.BinaryClassificationMetrics.BinaryConfusionMatrix]): + binary_confusion_matrix_list (Sequence[google.cloud.bigquery_v2.types.Model.BinaryClassificationMetrics.BinaryConfusionMatrix]): Binary confusion matrix at multiple thresholds. positive_label (str): @@ -384,27 +384,27 @@ class BinaryConfusionMatrix(proto.Message): r"""Confusion matrix for binary classification models. Attributes: - positive_class_threshold (~.wrappers.DoubleValue): + positive_class_threshold (google.protobuf.wrappers_pb2.DoubleValue): Threshold value used when computing each of the following metric. - true_positives (~.wrappers.Int64Value): + true_positives (google.protobuf.wrappers_pb2.Int64Value): Number of true samples predicted as true. - false_positives (~.wrappers.Int64Value): + false_positives (google.protobuf.wrappers_pb2.Int64Value): Number of false samples predicted as true. - true_negatives (~.wrappers.Int64Value): + true_negatives (google.protobuf.wrappers_pb2.Int64Value): Number of true samples predicted as false. - false_negatives (~.wrappers.Int64Value): + false_negatives (google.protobuf.wrappers_pb2.Int64Value): Number of false samples predicted as false. - precision (~.wrappers.DoubleValue): + precision (google.protobuf.wrappers_pb2.DoubleValue): The fraction of actual positive predictions that had positive actual labels. - recall (~.wrappers.DoubleValue): + recall (google.protobuf.wrappers_pb2.DoubleValue): The fraction of actual positive labels that were given a positive prediction. - f1_score (~.wrappers.DoubleValue): + f1_score (google.protobuf.wrappers_pb2.DoubleValue): The equally weighted average of recall and precision. - accuracy (~.wrappers.DoubleValue): + accuracy (google.protobuf.wrappers_pb2.DoubleValue): The fraction of predictions given the correct label. """ @@ -462,9 +462,9 @@ class MultiClassClassificationMetrics(proto.Message): models. Attributes: - aggregate_classification_metrics (~.gcb_model.Model.AggregateClassificationMetrics): + aggregate_classification_metrics (google.cloud.bigquery_v2.types.Model.AggregateClassificationMetrics): Aggregate classification metrics. - confusion_matrix_list (Sequence[~.gcb_model.Model.MultiClassClassificationMetrics.ConfusionMatrix]): + confusion_matrix_list (Sequence[google.cloud.bigquery_v2.types.Model.MultiClassClassificationMetrics.ConfusionMatrix]): Confusion matrix at different thresholds. """ @@ -472,10 +472,10 @@ class ConfusionMatrix(proto.Message): r"""Confusion matrix for multi-class classification models. Attributes: - confidence_threshold (~.wrappers.DoubleValue): + confidence_threshold (google.protobuf.wrappers_pb2.DoubleValue): Confidence threshold used when computing the entries of the confusion matrix. - rows (Sequence[~.gcb_model.Model.MultiClassClassificationMetrics.ConfusionMatrix.Row]): + rows (Sequence[google.cloud.bigquery_v2.types.Model.MultiClassClassificationMetrics.ConfusionMatrix.Row]): One row per actual label. """ @@ -487,7 +487,7 @@ class Entry(proto.Message): The predicted label. For confidence_threshold > 0, we will also add an entry indicating the number of items under the confidence threshold. - item_count (~.wrappers.Int64Value): + item_count (google.protobuf.wrappers_pb2.Int64Value): Number of items being predicted as this label. """ @@ -504,7 +504,7 @@ class Row(proto.Message): Attributes: actual_label (str): The original label of this row. - entries (Sequence[~.gcb_model.Model.MultiClassClassificationMetrics.ConfusionMatrix.Entry]): + entries (Sequence[google.cloud.bigquery_v2.types.Model.MultiClassClassificationMetrics.ConfusionMatrix.Entry]): Info describing predicted label distribution. """ @@ -540,12 +540,12 @@ class ClusteringMetrics(proto.Message): r"""Evaluation metrics for clustering models. Attributes: - davies_bouldin_index (~.wrappers.DoubleValue): + davies_bouldin_index (google.protobuf.wrappers_pb2.DoubleValue): Davies-Bouldin index. - mean_squared_distance (~.wrappers.DoubleValue): + mean_squared_distance (google.protobuf.wrappers_pb2.DoubleValue): Mean of squared distances between each sample to its cluster centroid. - clusters (Sequence[~.gcb_model.Model.ClusteringMetrics.Cluster]): + clusters (Sequence[google.cloud.bigquery_v2.types.Model.ClusteringMetrics.Cluster]): [Beta] Information for all clusters. """ @@ -555,10 +555,10 @@ class Cluster(proto.Message): Attributes: centroid_id (int): Centroid id. - feature_values (Sequence[~.gcb_model.Model.ClusteringMetrics.Cluster.FeatureValue]): + feature_values (Sequence[google.cloud.bigquery_v2.types.Model.ClusteringMetrics.Cluster.FeatureValue]): Values of highly variant features for this cluster. - count (~.wrappers.Int64Value): + count (google.protobuf.wrappers_pb2.Int64Value): Count of training data rows that were assigned to this cluster. """ @@ -569,10 +569,10 @@ class FeatureValue(proto.Message): Attributes: feature_column (str): The feature column name. - numerical_value (~.wrappers.DoubleValue): + numerical_value (google.protobuf.wrappers_pb2.DoubleValue): The numerical feature value. This is the centroid value for this feature. - categorical_value (~.gcb_model.Model.ClusteringMetrics.Cluster.FeatureValue.CategoricalValue): + categorical_value (google.cloud.bigquery_v2.types.Model.ClusteringMetrics.Cluster.FeatureValue.CategoricalValue): The categorical feature value. """ @@ -580,7 +580,7 @@ class CategoricalValue(proto.Message): r"""Representative value of a categorical feature. Attributes: - category_counts (Sequence[~.gcb_model.Model.ClusteringMetrics.Cluster.FeatureValue.CategoricalValue.CategoryCount]): + category_counts (Sequence[google.cloud.bigquery_v2.types.Model.ClusteringMetrics.Cluster.FeatureValue.CategoricalValue.CategoryCount]): Counts of all categories for the categorical feature. If there are more than ten categories, we return top ten (by count) and return one more CategoryCount with category @@ -594,7 +594,7 @@ class CategoryCount(proto.Message): Attributes: category (str): The name of category. - count (~.wrappers.Int64Value): + count (google.protobuf.wrappers_pb2.Int64Value): The count of training samples matching the category within the cluster. """ @@ -654,23 +654,23 @@ class RankingMetrics(proto.Message): feedback_type=implicit. Attributes: - mean_average_precision (~.wrappers.DoubleValue): + mean_average_precision (google.protobuf.wrappers_pb2.DoubleValue): Calculates a precision per user for all the items by ranking them and then averages all the precisions across all the users. - mean_squared_error (~.wrappers.DoubleValue): + mean_squared_error (google.protobuf.wrappers_pb2.DoubleValue): Similar to the mean squared error computed in regression and explicit recommendation models except instead of computing the rating directly, the output from evaluate is computed against a preference which is 1 or 0 depending on if the rating exists or not. - normalized_discounted_cumulative_gain (~.wrappers.DoubleValue): + normalized_discounted_cumulative_gain (google.protobuf.wrappers_pb2.DoubleValue): A metric to determine the goodness of a ranking calculated from the predicted confidence by comparing it to an ideal rank measured by the original ratings. - average_rank (~.wrappers.DoubleValue): + average_rank (google.protobuf.wrappers_pb2.DoubleValue): Determines the goodness of a ranking by computing the percentile rank from the predicted confidence and dividing it by the original rank. @@ -696,11 +696,11 @@ class ArimaForecastingMetrics(proto.Message): r"""Model evaluation metrics for ARIMA forecasting models. Attributes: - non_seasonal_order (Sequence[~.gcb_model.Model.ArimaOrder]): + non_seasonal_order (Sequence[google.cloud.bigquery_v2.types.Model.ArimaOrder]): Non-seasonal order. - arima_fitting_metrics (Sequence[~.gcb_model.Model.ArimaFittingMetrics]): + arima_fitting_metrics (Sequence[google.cloud.bigquery_v2.types.Model.ArimaFittingMetrics]): Arima model fitting metrics. - seasonal_periods (Sequence[~.gcb_model.Model.SeasonalPeriod.SeasonalPeriodType]): + seasonal_periods (Sequence[google.cloud.bigquery_v2.types.Model.SeasonalPeriod.SeasonalPeriodType]): Seasonal periods. Repeated because multiple periods are supported for one time series. has_drift (Sequence[bool]): @@ -709,7 +709,7 @@ class ArimaForecastingMetrics(proto.Message): time_series_id (Sequence[str]): Id to differentiate different time series for the large-scale case. - arima_single_model_forecasting_metrics (Sequence[~.gcb_model.Model.ArimaForecastingMetrics.ArimaSingleModelForecastingMetrics]): + arima_single_model_forecasting_metrics (Sequence[google.cloud.bigquery_v2.types.Model.ArimaForecastingMetrics.ArimaSingleModelForecastingMetrics]): Repeated as there can be many metric sets (one for each model) in auto-arima and the large-scale case. @@ -720,16 +720,16 @@ class ArimaSingleModelForecastingMetrics(proto.Message): model. Attributes: - non_seasonal_order (~.gcb_model.Model.ArimaOrder): + non_seasonal_order (google.cloud.bigquery_v2.types.Model.ArimaOrder): Non-seasonal order. - arima_fitting_metrics (~.gcb_model.Model.ArimaFittingMetrics): + arima_fitting_metrics (google.cloud.bigquery_v2.types.Model.ArimaFittingMetrics): Arima fitting metrics. has_drift (bool): Is arima model fitted with drift or not. It is always false when d is not 1. time_series_id (str): The id to indicate different time series. - seasonal_periods (Sequence[~.gcb_model.Model.SeasonalPeriod.SeasonalPeriodType]): + seasonal_periods (Sequence[google.cloud.bigquery_v2.types.Model.SeasonalPeriod.SeasonalPeriodType]): Seasonal periods. Repeated because multiple periods are supported for one time series. """ @@ -779,21 +779,21 @@ class EvaluationMetrics(proto.Message): imported models. Attributes: - regression_metrics (~.gcb_model.Model.RegressionMetrics): + regression_metrics (google.cloud.bigquery_v2.types.Model.RegressionMetrics): Populated for regression models and explicit feedback type matrix factorization models. - binary_classification_metrics (~.gcb_model.Model.BinaryClassificationMetrics): + binary_classification_metrics (google.cloud.bigquery_v2.types.Model.BinaryClassificationMetrics): Populated for binary classification/classifier models. - multi_class_classification_metrics (~.gcb_model.Model.MultiClassClassificationMetrics): + multi_class_classification_metrics (google.cloud.bigquery_v2.types.Model.MultiClassClassificationMetrics): Populated for multi-class classification/classifier models. - clustering_metrics (~.gcb_model.Model.ClusteringMetrics): + clustering_metrics (google.cloud.bigquery_v2.types.Model.ClusteringMetrics): Populated for clustering models. - ranking_metrics (~.gcb_model.Model.RankingMetrics): + ranking_metrics (google.cloud.bigquery_v2.types.Model.RankingMetrics): Populated for implicit feedback type matrix factorization models. - arima_forecasting_metrics (~.gcb_model.Model.ArimaForecastingMetrics): + arima_forecasting_metrics (google.cloud.bigquery_v2.types.Model.ArimaForecastingMetrics): Populated for ARIMA models. """ @@ -835,10 +835,10 @@ class DataSplitResult(proto.Message): and evaluation data tables that were used to train the model. Attributes: - training_table (~.table_reference.TableReference): + training_table (google.cloud.bigquery_v2.types.TableReference): Table reference of the training data after split. - evaluation_table (~.table_reference.TableReference): + evaluation_table (google.cloud.bigquery_v2.types.TableReference): Table reference of the evaluation data after split. """ @@ -893,7 +893,7 @@ class GlobalExplanation(proto.Message): features after training. Attributes: - explanations (Sequence[~.gcb_model.Model.GlobalExplanation.Explanation]): + explanations (Sequence[google.cloud.bigquery_v2.types.Model.GlobalExplanation.Explanation]): A list of the top global explanations. Sorted by absolute value of attribution in descending order. @@ -913,7 +913,7 @@ class Explanation(proto.Message): be formatted like .. Overall size of feature name will always be truncated to first 120 characters. - attribution (~.wrappers.DoubleValue): + attribution (google.protobuf.wrappers_pb2.DoubleValue): Attribution of feature. """ @@ -933,22 +933,22 @@ class TrainingRun(proto.Message): r"""Information about a single training query run for the model. Attributes: - training_options (~.gcb_model.Model.TrainingRun.TrainingOptions): + training_options (google.cloud.bigquery_v2.types.Model.TrainingRun.TrainingOptions): Options that were used for this training run, includes user specified and default options that were used. - start_time (~.timestamp.Timestamp): + start_time (google.protobuf.timestamp_pb2.Timestamp): The start time of this training run. - results (Sequence[~.gcb_model.Model.TrainingRun.IterationResult]): + results (Sequence[google.cloud.bigquery_v2.types.Model.TrainingRun.IterationResult]): Output of each iteration run, results.size() <= max_iterations. - evaluation_metrics (~.gcb_model.Model.EvaluationMetrics): + evaluation_metrics (google.cloud.bigquery_v2.types.Model.EvaluationMetrics): The evaluation metrics over training/eval data that were computed at the end of training. - data_split_result (~.gcb_model.Model.DataSplitResult): + data_split_result (google.cloud.bigquery_v2.types.Model.DataSplitResult): Data split result of the training run. Only set when the input data is actually split. - global_explanations (Sequence[~.gcb_model.Model.GlobalExplanation]): + global_explanations (Sequence[google.cloud.bigquery_v2.types.Model.GlobalExplanation]): Global explanations for important features of the model. For multi-class models, there is one entry for each label class. For other models, @@ -962,30 +962,30 @@ class TrainingOptions(proto.Message): max_iterations (int): The maximum number of iterations in training. Used only for iterative training algorithms. - loss_type (~.gcb_model.Model.LossType): + loss_type (google.cloud.bigquery_v2.types.Model.LossType): Type of loss function used during training run. learn_rate (float): Learning rate in training. Used only for iterative training algorithms. - l1_regularization (~.wrappers.DoubleValue): + l1_regularization (google.protobuf.wrappers_pb2.DoubleValue): L1 regularization coefficient. - l2_regularization (~.wrappers.DoubleValue): + l2_regularization (google.protobuf.wrappers_pb2.DoubleValue): L2 regularization coefficient. - min_relative_progress (~.wrappers.DoubleValue): + min_relative_progress (google.protobuf.wrappers_pb2.DoubleValue): When early_stop is true, stops training when accuracy improvement is less than 'min_relative_progress'. Used only for iterative training algorithms. - warm_start (~.wrappers.BoolValue): + warm_start (google.protobuf.wrappers_pb2.BoolValue): Whether to train a model from the last checkpoint. - early_stop (~.wrappers.BoolValue): + early_stop (google.protobuf.wrappers_pb2.BoolValue): Whether to stop early when the loss doesn't improve significantly any more (compared to min_relative_progress). Used only for iterative training algorithms. input_label_columns (Sequence[str]): Name of input label columns in training data. - data_split_method (~.gcb_model.Model.DataSplitMethod): + data_split_method (google.cloud.bigquery_v2.types.Model.DataSplitMethod): The data split type for training and evaluation, e.g. RANDOM. data_split_eval_fraction (float): @@ -1007,13 +1007,13 @@ class TrainingOptions(proto.Message): and the rest are eval data. It respects the order in Orderable data types: https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#data-type-properties - learn_rate_strategy (~.gcb_model.Model.LearnRateStrategy): + learn_rate_strategy (google.cloud.bigquery_v2.types.Model.LearnRateStrategy): The strategy to determine learn rate for the current iteration. initial_learn_rate (float): Specifies the initial learning rate for the line search learn rate strategy. - label_class_weights (Sequence[~.gcb_model.Model.TrainingRun.TrainingOptions.LabelClassWeightsEntry]): + label_class_weights (Sequence[google.cloud.bigquery_v2.types.Model.TrainingRun.TrainingOptions.LabelClassWeightsEntry]): Weights associated with each label class, for rebalancing the training data. Only applicable for classification models. @@ -1023,21 +1023,21 @@ class TrainingOptions(proto.Message): item_column (str): Item column specified for matrix factorization models. - distance_type (~.gcb_model.Model.DistanceType): + distance_type (google.cloud.bigquery_v2.types.Model.DistanceType): Distance type for clustering models. num_clusters (int): Number of clusters for clustering models. model_uri (str): [Beta] Google Cloud Storage URI from which the model was imported. Only applicable for imported models. - optimization_strategy (~.gcb_model.Model.OptimizationStrategy): + optimization_strategy (google.cloud.bigquery_v2.types.Model.OptimizationStrategy): Optimization strategy for training linear regression models. hidden_units (Sequence[int]): Hidden units for dnn models. batch_size (int): Batch size for dnn models. - dropout (~.wrappers.DoubleValue): + dropout (google.protobuf.wrappers_pb2.DoubleValue): Dropout probability for dnn models. max_tree_depth (int): Maximum depth of a tree for boosted tree @@ -1046,18 +1046,18 @@ class TrainingOptions(proto.Message): Subsample fraction of the training data to grow tree to prevent overfitting for boosted tree models. - min_split_loss (~.wrappers.DoubleValue): + min_split_loss (google.protobuf.wrappers_pb2.DoubleValue): Minimum split loss for boosted tree models. num_factors (int): Num factors specified for matrix factorization models. - feedback_type (~.gcb_model.Model.FeedbackType): + feedback_type (google.cloud.bigquery_v2.types.Model.FeedbackType): Feedback type that specifies which algorithm to run for matrix factorization. - wals_alpha (~.wrappers.DoubleValue): + wals_alpha (google.protobuf.wrappers_pb2.DoubleValue): Hyperparameter for matrix factoration when implicit feedback type is specified. - kmeans_initialization_method (~.gcb_model.Model.KmeansEnums.KmeansInitializationMethod): + kmeans_initialization_method (google.cloud.bigquery_v2.types.Model.KmeansEnums.KmeansInitializationMethod): The method used to initialize the centroids for kmeans algorithm. kmeans_initialization_column (str): @@ -1071,16 +1071,16 @@ class TrainingOptions(proto.Message): for ARIMA model. auto_arima (bool): Whether to enable auto ARIMA or not. - non_seasonal_order (~.gcb_model.Model.ArimaOrder): + non_seasonal_order (google.cloud.bigquery_v2.types.Model.ArimaOrder): A specification of the non-seasonal part of the ARIMA model: the three components (p, d, q) are the AR order, the degree of differencing, and the MA order. - data_frequency (~.gcb_model.Model.DataFrequency): + data_frequency (google.cloud.bigquery_v2.types.Model.DataFrequency): The data frequency of a time series. include_drift (bool): Include drift when fitting an ARIMA model. - holiday_region (~.gcb_model.Model.HolidayRegion): + holiday_region (google.cloud.bigquery_v2.types.Model.HolidayRegion): The geographical region based on which the holidays are considered in time series modeling. If a valid value is specified, then holiday @@ -1226,23 +1226,23 @@ class IterationResult(proto.Message): r"""Information about a single iteration of the training run. Attributes: - index (~.wrappers.Int32Value): + index (google.protobuf.wrappers_pb2.Int32Value): Index of the iteration, 0 based. - duration_ms (~.wrappers.Int64Value): + duration_ms (google.protobuf.wrappers_pb2.Int64Value): Time taken to run the iteration in milliseconds. - training_loss (~.wrappers.DoubleValue): + training_loss (google.protobuf.wrappers_pb2.DoubleValue): Loss computed on the training data at the end of iteration. - eval_loss (~.wrappers.DoubleValue): + eval_loss (google.protobuf.wrappers_pb2.DoubleValue): Loss computed on the eval data at the end of iteration. learn_rate (float): Learn rate used for this iteration. - cluster_infos (Sequence[~.gcb_model.Model.TrainingRun.IterationResult.ClusterInfo]): + cluster_infos (Sequence[google.cloud.bigquery_v2.types.Model.TrainingRun.IterationResult.ClusterInfo]): Information about top clusters for clustering models. - arima_result (~.gcb_model.Model.TrainingRun.IterationResult.ArimaResult): + arima_result (google.cloud.bigquery_v2.types.Model.TrainingRun.IterationResult.ArimaResult): """ @@ -1252,10 +1252,10 @@ class ClusterInfo(proto.Message): Attributes: centroid_id (int): Centroid id. - cluster_radius (~.wrappers.DoubleValue): + cluster_radius (google.protobuf.wrappers_pb2.DoubleValue): Cluster radius, the average distance from centroid to each point assigned to the cluster. - cluster_size (~.wrappers.Int64Value): + cluster_size (google.protobuf.wrappers_pb2.Int64Value): Cluster size, the total number of points assigned to the cluster. """ @@ -1276,11 +1276,11 @@ class ArimaResult(proto.Message): iteration results. Attributes: - arima_model_info (Sequence[~.gcb_model.Model.TrainingRun.IterationResult.ArimaResult.ArimaModelInfo]): + arima_model_info (Sequence[google.cloud.bigquery_v2.types.Model.TrainingRun.IterationResult.ArimaResult.ArimaModelInfo]): This message is repeated because there are multiple arima models fitted in auto-arima. For non-auto-arima model, its size is one. - seasonal_periods (Sequence[~.gcb_model.Model.SeasonalPeriod.SeasonalPeriodType]): + seasonal_periods (Sequence[google.cloud.bigquery_v2.types.Model.SeasonalPeriod.SeasonalPeriodType]): Seasonal periods. Repeated because multiple periods are supported for one time series. """ @@ -1314,18 +1314,18 @@ class ArimaModelInfo(proto.Message): r"""Arima model information. Attributes: - non_seasonal_order (~.gcb_model.Model.ArimaOrder): + non_seasonal_order (google.cloud.bigquery_v2.types.Model.ArimaOrder): Non-seasonal order. - arima_coefficients (~.gcb_model.Model.TrainingRun.IterationResult.ArimaResult.ArimaCoefficients): + arima_coefficients (google.cloud.bigquery_v2.types.Model.TrainingRun.IterationResult.ArimaResult.ArimaCoefficients): Arima coefficients. - arima_fitting_metrics (~.gcb_model.Model.ArimaFittingMetrics): + arima_fitting_metrics (google.cloud.bigquery_v2.types.Model.ArimaFittingMetrics): Arima fitting metrics. has_drift (bool): Whether Arima model fitted with drift or not. It is always false when d is not 1. time_series_id (str): The id to indicate different time series. - seasonal_periods (Sequence[~.gcb_model.Model.SeasonalPeriod.SeasonalPeriodType]): + seasonal_periods (Sequence[google.cloud.bigquery_v2.types.Model.SeasonalPeriod.SeasonalPeriodType]): Seasonal periods. Repeated because multiple periods are supported for one time series. """ @@ -1482,7 +1482,7 @@ class PatchModelRequest(proto.Message): Required. Dataset ID of the model to patch. model_id (str): Required. Model ID of the model to patch. - model (~.gcb_model.Model): + model (google.cloud.bigquery_v2.types.Model): Required. Patched model. Follows RFC5789 patch semantics. Missing fields are not updated. To clear a field, explicitly @@ -1525,7 +1525,7 @@ class ListModelsRequest(proto.Message): Required. Project ID of the models to list. dataset_id (str): Required. Dataset ID of the models to list. - max_results (~.wrappers.UInt32Value): + max_results (google.protobuf.wrappers_pb2.UInt32Value): The maximum number of results to return in a single response page. Leverage the page tokens to iterate through the entire collection. @@ -1547,7 +1547,7 @@ class ListModelsResponse(proto.Message): r""" Attributes: - models (Sequence[~.gcb_model.Model]): + models (Sequence[google.cloud.bigquery_v2.types.Model]): Models in the requested dataset. Only the following fields are populated: model_reference, model_type, creation_time, last_modified_time and labels. diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery_v2/types/standard_sql.py b/packages/google-cloud-bigquery/google/cloud/bigquery_v2/types/standard_sql.py index 80e4632f7e15..3bc6afedce92 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery_v2/types/standard_sql.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery_v2/types/standard_sql.py @@ -33,13 +33,13 @@ class StandardSqlDataType(proto.Message): array_element_type="DATE"}} ]}} Attributes: - type_kind (~.standard_sql.StandardSqlDataType.TypeKind): + type_kind (google.cloud.bigquery_v2.types.StandardSqlDataType.TypeKind): Required. The top level type of this field. Can be any standard SQL data type (e.g., "INT64", "DATE", "ARRAY"). - array_element_type (~.standard_sql.StandardSqlDataType): + array_element_type (google.cloud.bigquery_v2.types.StandardSqlDataType): The type of the array's elements, if type_kind = "ARRAY". - struct_type (~.standard_sql.StandardSqlStructType): + struct_type (google.cloud.bigquery_v2.types.StandardSqlStructType): The fields of this struct, in order, if type_kind = "STRUCT". """ @@ -80,7 +80,7 @@ class StandardSqlField(proto.Message): name (str): Optional. The name of this field. Can be absent for struct fields. - type (~.standard_sql.StandardSqlDataType): + type (google.cloud.bigquery_v2.types.StandardSqlDataType): Optional. The type of this parameter. Absent if not explicitly specified (e.g., CREATE FUNCTION statement can omit the return type; in @@ -97,7 +97,7 @@ class StandardSqlStructType(proto.Message): r""" Attributes: - fields (Sequence[~.standard_sql.StandardSqlField]): + fields (Sequence[google.cloud.bigquery_v2.types.StandardSqlField]): """ diff --git a/packages/google-cloud-bigquery/synth.metadata b/packages/google-cloud-bigquery/synth.metadata index f91ffab69131..dc183a72e087 100644 --- a/packages/google-cloud-bigquery/synth.metadata +++ b/packages/google-cloud-bigquery/synth.metadata @@ -11,22 +11,22 @@ "git": { "name": "googleapis", "remote": "https://github.com/googleapis/googleapis.git", - "sha": "df4fd38d040c5c8a0869936205bca13fb64b2cff", - "internalRef": "344443035" + "sha": "e13001be33d69042a9505e698f792587a804a5cf", + "internalRef": "358152223" } }, { "git": { "name": "synthtool", "remote": "https://github.com/googleapis/synthtool.git", - "sha": "4679e7e415221f03ff2a71e3ffad75b9ec41d87e" + "sha": "4dca4132c6d63788c6675e1b1e11e7b9225f8694" } }, { "git": { "name": "synthtool", "remote": "https://github.com/googleapis/synthtool.git", - "sha": "4679e7e415221f03ff2a71e3ffad75b9ec41d87e" + "sha": "4dca4132c6d63788c6675e1b1e11e7b9225f8694" } } ], @@ -42,6 +42,7 @@ } ], "generatedFiles": [ + ".coveragerc", ".flake8", ".github/CONTRIBUTING.md", ".github/ISSUE_TEMPLATE/bug_report.md", @@ -95,6 +96,7 @@ "bigquery-v2-py.tar.gz", "docs/_static/custom.css", "docs/_templates/layout.html", + "docs/bigquery_v2/model_service.rst", "docs/bigquery_v2/services.rst", "docs/bigquery_v2/types.rst", "docs/conf.py", @@ -127,4 +129,4 @@ "setup.cfg", "testing/.gitignore" ] -} \ No newline at end of file +} diff --git a/packages/google-cloud-bigquery/synth.py b/packages/google-cloud-bigquery/synth.py index 341c5832f49d..3ab271c96375 100644 --- a/packages/google-cloud-bigquery/synth.py +++ b/packages/google-cloud-bigquery/synth.py @@ -33,6 +33,8 @@ library, excludes=[ "docs/index.rst", + "docs/bigquery_v2/*_service.rst", + "docs/bigquery_v2/services.rst", "README.rst", "noxfile.py", "setup.py", From 96d450625c6e72dd0ccf08aaeb6cd0073376ad0f Mon Sep 17 00:00:00 2001 From: "release-please[bot]" <55107282+release-please[bot]@users.noreply.github.com> Date: Thu, 18 Feb 2021 10:52:56 -0600 Subject: [PATCH 1067/2016] chore: release 2.9.0 (#526) Co-authored-by: release-please[bot] <55107282+release-please[bot]@users.noreply.github.com> --- packages/google-cloud-bigquery/CHANGELOG.md | 14 ++++++++++++++ .../google/cloud/bigquery/version.py | 2 +- 2 files changed, 15 insertions(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/CHANGELOG.md b/packages/google-cloud-bigquery/CHANGELOG.md index 768b7b0361b0..51fad831e074 100644 --- a/packages/google-cloud-bigquery/CHANGELOG.md +++ b/packages/google-cloud-bigquery/CHANGELOG.md @@ -4,6 +4,20 @@ [1]: https://pypi.org/project/google-cloud-bigquery/#history +## [2.9.0](https://www.github.com/googleapis/python-bigquery/compare/v2.8.0...v2.9.0) (2021-02-18) + + +### Features + +* add determinism level for javascript UDFs ([#522](https://www.github.com/googleapis/python-bigquery/issues/522)) ([edd3328](https://www.github.com/googleapis/python-bigquery/commit/edd3328fffa3040b2cd3a3c668c90a0e43e4c94c)) +* expose reservation usage stats on jobs ([#524](https://www.github.com/googleapis/python-bigquery/issues/524)) ([4ffb4e0](https://www.github.com/googleapis/python-bigquery/commit/4ffb4e067abdaa54dad6eff49a7fbdb0fa358637)) + + +### Documentation + +* clarify `%%bigquery`` magics and fix broken link ([#508](https://www.github.com/googleapis/python-bigquery/issues/508)) ([eedf93b](https://www.github.com/googleapis/python-bigquery/commit/eedf93b6636c5ff1bd810c6038cfeaea8ccb64d8)) +* update python contributing guide ([#514](https://www.github.com/googleapis/python-bigquery/issues/514)) ([01e851d](https://www.github.com/googleapis/python-bigquery/commit/01e851d00fc17a780375580776753d78f6d74174)) + ## [2.8.0](https://www.github.com/googleapis/python-bigquery/compare/v2.7.0...v2.8.0) (2021-02-08) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/version.py b/packages/google-cloud-bigquery/google/cloud/bigquery/version.py index 0a9aecb37574..b2a8c5535e20 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/version.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/version.py @@ -12,4 +12,4 @@ # See the License for the specific language governing permissions and # limitations under the License. -__version__ = "2.8.0" +__version__ = "2.9.0" From 9183848f3896722dcfe746f7979e093358fa254d Mon Sep 17 00:00:00 2001 From: Alan Yee Date: Fri, 19 Feb 2021 03:20:15 -0800 Subject: [PATCH 1068/2016] docs(bigquery): Add alternative approach to setting credentials (#517) * docs(bigquery): Add alternative approach to setting credentials * docs(bigquery): Add alternative approach to setting credentials Correction: json object rather than string * Remove trailing space Co-authored-by: Peter Lamut --- .../samples/snippets/authenticate_service_account.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/packages/google-cloud-bigquery/samples/snippets/authenticate_service_account.py b/packages/google-cloud-bigquery/samples/snippets/authenticate_service_account.py index 58cd2b542fe7..c07848beea47 100644 --- a/packages/google-cloud-bigquery/samples/snippets/authenticate_service_account.py +++ b/packages/google-cloud-bigquery/samples/snippets/authenticate_service_account.py @@ -30,6 +30,11 @@ def main(): key_path, scopes=["https://www.googleapis.com/auth/cloud-platform"], ) + # Alternatively, use service_account.Credentials.from_service_account_info() + # to set credentials directly via a json object rather than set a filepath + # TODO(developer): Set key_json to the content of the service account key file. + # credentials = service_account.Credentials.from_service_account_info(key_json) + client = bigquery.Client(credentials=credentials, project=credentials.project_id,) # [END bigquery_client_json_credentials] return client From 9ab55f1d13a4ebb3b1a2289e5dcc8e34149a626a Mon Sep 17 00:00:00 2001 From: WhiteSource Renovate Date: Fri, 19 Feb 2021 19:53:11 +0100 Subject: [PATCH 1069/2016] chore(deps): update dependency google-cloud-bigquery to v2.9.0 (#515) --- .../google-cloud-bigquery/samples/geography/requirements.txt | 2 +- .../google-cloud-bigquery/samples/snippets/requirements.txt | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/packages/google-cloud-bigquery/samples/geography/requirements.txt b/packages/google-cloud-bigquery/samples/geography/requirements.txt index 6f9306af2d41..e9fcfca03605 100644 --- a/packages/google-cloud-bigquery/samples/geography/requirements.txt +++ b/packages/google-cloud-bigquery/samples/geography/requirements.txt @@ -1,3 +1,3 @@ geojson==2.5.0 -google-cloud-bigquery==2.7.0 +google-cloud-bigquery==2.9.0 Shapely==1.7.1 diff --git a/packages/google-cloud-bigquery/samples/snippets/requirements.txt b/packages/google-cloud-bigquery/samples/snippets/requirements.txt index 7087121b576a..9def04cb8771 100644 --- a/packages/google-cloud-bigquery/samples/snippets/requirements.txt +++ b/packages/google-cloud-bigquery/samples/snippets/requirements.txt @@ -1,4 +1,4 @@ -google-cloud-bigquery==2.7.0 +google-cloud-bigquery==2.9.0 google-cloud-bigquery-storage==2.2.1 google-auth-oauthlib==0.4.2 grpcio==1.35.0 From d0fc9f73eaf6098c8cd9cf3a9c85f16dd2bc8d5c Mon Sep 17 00:00:00 2001 From: WhiteSource Renovate Date: Fri, 19 Feb 2021 20:06:05 +0100 Subject: [PATCH 1070/2016] chore(deps): update dependency google-cloud-bigquery-storage to v2.3.0 (#529) [![WhiteSource Renovate](https://app.renovatebot.com/images/banner.svg)](https://renovatebot.com) This PR contains the following updates: | Package | Change | Age | Adoption | Passing | Confidence | |---|---|---|---|---|---| | [google-cloud-bigquery-storage](https://togithub.com/googleapis/python-bigquery-storage) | `==2.2.1` -> `==2.3.0` | [![age](https://badges.renovateapi.com/packages/pypi/google-cloud-bigquery-storage/2.3.0/age-slim)](https://docs.renovatebot.com/merge-confidence/) | [![adoption](https://badges.renovateapi.com/packages/pypi/google-cloud-bigquery-storage/2.3.0/adoption-slim)](https://docs.renovatebot.com/merge-confidence/) | [![passing](https://badges.renovateapi.com/packages/pypi/google-cloud-bigquery-storage/2.3.0/compatibility-slim/2.2.1)](https://docs.renovatebot.com/merge-confidence/) | [![confidence](https://badges.renovateapi.com/packages/pypi/google-cloud-bigquery-storage/2.3.0/confidence-slim/2.2.1)](https://docs.renovatebot.com/merge-confidence/) | --- ### Release Notes
googleapis/python-bigquery-storage ### [`v2.3.0`](https://togithub.com/googleapis/python-bigquery-storage/blob/master/CHANGELOG.md#​230-httpswwwgithubcomgoogleapispython-bigquery-storagecomparev221v230-2021-02-18) [Compare Source](https://togithub.com/googleapis/python-bigquery-storage/compare/v2.2.1...v2.3.0) ##### Features - add `client_cert_source_for_mtls` argument to transports ([#​135](https://www.github.com/googleapis/python-bigquery-storage/issues/135)) ([072850d](https://www.github.com/googleapis/python-bigquery-storage/commit/072850dd341909fdc22f330117a17e48da12fdd1)) ##### Documentation - update python contributing guide ([#​140](https://www.github.com/googleapis/python-bigquery-storage/issues/140)) ([1671056](https://www.github.com/googleapis/python-bigquery-storage/commit/1671056bfe181660440b1bf4415005e3eed01eb2)) ##### [2.2.1](https://www.github.com/googleapis/python-bigquery-storage/compare/v2.2.0...v2.2.1) (2021-01-25) ##### Documentation - remove required session variable to fix publish ([#​124](https://www.github.com/googleapis/python-bigquery-storage/issues/124)) ([19a105c](https://www.github.com/googleapis/python-bigquery-storage/commit/19a105cb9c868bb1a9e63966609a2488876f511b))
--- ### Renovate configuration :date: **Schedule**: At any time (no schedule defined). :vertical_traffic_light: **Automerge**: Disabled by config. Please merge this manually once you are satisfied. :recycle: **Rebasing**: Whenever PR becomes conflicted, or you tick the rebase/retry checkbox. :no_bell: **Ignore**: Close this PR and you won't be reminded about this update again. --- - [ ] If you want to rebase/retry this PR, check this box --- This PR has been generated by [WhiteSource Renovate](https://renovate.whitesourcesoftware.com). View repository job log [here](https://app.renovatebot.com/dashboard#github/googleapis/python-bigquery). --- .../google-cloud-bigquery/samples/snippets/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/samples/snippets/requirements.txt b/packages/google-cloud-bigquery/samples/snippets/requirements.txt index 9def04cb8771..c638178fce46 100644 --- a/packages/google-cloud-bigquery/samples/snippets/requirements.txt +++ b/packages/google-cloud-bigquery/samples/snippets/requirements.txt @@ -1,5 +1,5 @@ google-cloud-bigquery==2.9.0 -google-cloud-bigquery-storage==2.2.1 +google-cloud-bigquery-storage==2.3.0 google-auth-oauthlib==0.4.2 grpcio==1.35.0 ipython==7.16.1; python_version < '3.7' From f424c05fc93dcf137337b4f4117ff3c4ede67aa4 Mon Sep 17 00:00:00 2001 From: Peter Lamut Date: Tue, 23 Feb 2021 10:14:26 +0100 Subject: [PATCH 1071/2016] docs: explain retry behavior for DONE jobs (#532) * docs: explain retry behavior for DONE jobs * Update google/cloud/bigquery/job/query.py Co-authored-by: Tim Swast --- .../google/cloud/bigquery/job/base.py | 8 ++++++-- .../google/cloud/bigquery/job/query.py | 7 +++++-- 2 files changed, 11 insertions(+), 4 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/job/base.py b/packages/google-cloud-bigquery/google/cloud/bigquery/job/base.py index d8f5d6528c56..f24e972c8f21 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/job/base.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/job/base.py @@ -614,7 +614,9 @@ def done(self, retry=DEFAULT_RETRY, timeout=None, reload=True): """Checks if the job is complete. Args: - retry (Optional[google.api_core.retry.Retry]): How to retry the RPC. + retry (Optional[google.api_core.retry.Retry]): + How to retry the RPC. If the job state is ``DONE``, retrying is aborted + early, as the job will not change anymore. timeout (Optional[float]): The number of seconds to wait for the underlying HTTP transport before using ``retry``. @@ -635,7 +637,9 @@ def result(self, retry=DEFAULT_RETRY, timeout=None): """Start the job and wait for it to complete and get the result. Args: - retry (Optional[google.api_core.retry.Retry]): How to retry the RPC. + retry (Optional[google.api_core.retry.Retry]): + How to retry the RPC. If the job state is ``DONE``, retrying is aborted + early, as the job will not change anymore. timeout (Optional[float]): The number of seconds to wait for the underlying HTTP transport before using ``retry``. diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/job/query.py b/packages/google-cloud-bigquery/google/cloud/bigquery/job/query.py index d87f87f52835..b3ca8d940222 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/job/query.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/job/query.py @@ -979,7 +979,8 @@ def done(self, retry=DEFAULT_RETRY, timeout=None, reload=True): Args: retry (Optional[google.api_core.retry.Retry]): - How to retry the call that retrieves query results. + How to retry the call that retrieves query results. If the job state is + ``DONE``, retrying is aborted early, as the job will not change anymore. timeout (Optional[float]): The number of seconds to wait for the underlying HTTP transport before using ``retry``. @@ -1128,7 +1129,9 @@ def result( max_results (Optional[int]): The maximum total number of rows from this request. retry (Optional[google.api_core.retry.Retry]): - How to retry the call that retrieves rows. + How to retry the call that retrieves rows. If the job state is + ``DONE``, retrying is aborted early even if the results are not + available, as this will not change anymore. timeout (Optional[float]): The number of seconds to wait for the underlying HTTP transport before using ``retry``. From 1e8f131489744dacc742148bf539375226c60bee Mon Sep 17 00:00:00 2001 From: Peter Lamut Date: Tue, 23 Feb 2021 18:33:11 +0100 Subject: [PATCH 1072/2016] feat: add BIGNUMERIC support (#527) * feat: add support of BIGNUMERIC * feat: add BIGNUMERIC support * Add bignumeric_type extra * Add additional BIGNUMERIC tests * Prevent import time error if no BIGNUMERIC support * Add/improve a few comments * Add feature flag for BIGNUMERIC suppport Co-authored-by: HemangChothani --- .../google/cloud/bigquery/_pandas_helpers.py | 18 +- .../google/cloud/bigquery/dbapi/_helpers.py | 12 +- .../google/cloud/bigquery/dbapi/types.py | 2 +- .../google/cloud/bigquery/query.py | 8 +- .../google/cloud/bigquery/schema.py | 1 + packages/google-cloud-bigquery/setup.py | 2 + .../tests/system/test_client.py | 162 ++++++++----- .../tests/unit/test__pandas_helpers.py | 228 +++++++++++------- .../tests/unit/test_dbapi__helpers.py | 14 ++ .../tests/unit/test_query.py | 10 + 10 files changed, 305 insertions(+), 152 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/_pandas_helpers.py b/packages/google-cloud-bigquery/google/cloud/bigquery/_pandas_helpers.py index 162c58b4b500..7ad416e084ed 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/_pandas_helpers.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/_pandas_helpers.py @@ -20,6 +20,7 @@ import queue import warnings +from packaging import version try: import pandas @@ -80,6 +81,10 @@ def pyarrow_numeric(): return pyarrow.decimal128(38, 9) +def pyarrow_bignumeric(): + return pyarrow.decimal256(76, 38) + + def pyarrow_time(): return pyarrow.time64("us") @@ -128,14 +133,23 @@ def pyarrow_timestamp(): pyarrow.date64().id: "DATETIME", # because millisecond resolution pyarrow.binary().id: "BYTES", pyarrow.string().id: "STRING", # also alias for pyarrow.utf8() + # The exact scale and precision don't matter, see below. pyarrow.decimal128(38, scale=9).id: "NUMERIC", - # The exact decimal's scale and precision are not important, as only - # the type ID matters, and it's the same for all decimal128 instances. } + if version.parse(pyarrow.__version__) >= version.parse("3.0.0"): + BQ_TO_ARROW_SCALARS["BIGNUMERIC"] = pyarrow_bignumeric + # The exact decimal's scale and precision are not important, as only + # the type ID matters, and it's the same for all decimal256 instances. + ARROW_SCALAR_IDS_TO_BQ[pyarrow.decimal256(76, scale=38).id] = "BIGNUMERIC" + _BIGNUMERIC_SUPPORT = True + else: + _BIGNUMERIC_SUPPORT = False + else: # pragma: NO COVER BQ_TO_ARROW_SCALARS = {} # pragma: NO COVER ARROW_SCALAR_IDS_TO_BQ = {} # pragma: NO_COVER + _BIGNUMERIC_SUPPORT = False # pragma: NO COVER def bq_to_arrow_struct_data_type(field): diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/dbapi/_helpers.py b/packages/google-cloud-bigquery/google/cloud/bigquery/dbapi/_helpers.py index 95b5869e509a..6b36d6e433bf 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/dbapi/_helpers.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/dbapi/_helpers.py @@ -19,6 +19,11 @@ import functools import numbers +try: + import pyarrow +except ImportError: # pragma: NO COVER + pyarrow = None + from google.cloud import bigquery from google.cloud.bigquery import table from google.cloud.bigquery.dbapi import exceptions @@ -184,7 +189,12 @@ def bigquery_scalar_type(value): elif isinstance(value, numbers.Real): return "FLOAT64" elif isinstance(value, decimal.Decimal): - return "NUMERIC" + # We check for NUMERIC before BIGNUMERIC in order to support pyarrow < 3.0. + scalar_object = pyarrow.scalar(value) + if isinstance(scalar_object, pyarrow.Decimal128Scalar): + return "NUMERIC" + else: + return "BIGNUMERIC" elif isinstance(value, str): return "STRING" elif isinstance(value, bytes): diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/dbapi/types.py b/packages/google-cloud-bigquery/google/cloud/bigquery/dbapi/types.py index 14917820cd38..20eca9b00c65 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/dbapi/types.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/dbapi/types.py @@ -78,7 +78,7 @@ def __eq__(self, other): STRING = "STRING" BINARY = _DBAPITypeObject("BYTES", "RECORD", "STRUCT") NUMBER = _DBAPITypeObject( - "INTEGER", "INT64", "FLOAT", "FLOAT64", "NUMERIC", "BOOLEAN", "BOOL" + "INTEGER", "INT64", "FLOAT", "FLOAT64", "NUMERIC", "BIGNUMERIC", "BOOLEAN", "BOOL" ) DATETIME = _DBAPITypeObject("TIMESTAMP", "DATE", "TIME", "DATETIME") ROWID = "ROWID" diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/query.py b/packages/google-cloud-bigquery/google/cloud/bigquery/query.py index f2ed6337e697..ecec73e9938b 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/query.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/query.py @@ -83,7 +83,7 @@ class ScalarQueryParameter(_AbstractQueryParameter): type_ (str): Name of parameter type. One of 'STRING', 'INT64', - 'FLOAT64', 'NUMERIC', 'BOOL', 'TIMESTAMP', 'DATETIME', or + 'FLOAT64', 'NUMERIC', 'BIGNUMERIC', 'BOOL', 'TIMESTAMP', 'DATETIME', or 'DATE'. value (Union[str, int, float, decimal.Decimal, bool, datetime.datetime, datetime.date]): @@ -102,7 +102,7 @@ def positional(cls, type_, value): Args: type_ (str): Name of parameter type. One of 'STRING', 'INT64', - 'FLOAT64', 'NUMERIC', 'BOOL', 'TIMESTAMP', 'DATETIME', or + 'FLOAT64', 'NUMERIC', 'BIGNUMERIC', 'BOOL', 'TIMESTAMP', 'DATETIME', or 'DATE'. value (Union[str, int, float, decimal.Decimal, bool, datetime.datetime, datetime.date]): @@ -186,7 +186,7 @@ class ArrayQueryParameter(_AbstractQueryParameter): array_type (str): Name of type of array elements. One of `'STRING'`, `'INT64'`, - `'FLOAT64'`, `'NUMERIC'`, `'BOOL'`, `'TIMESTAMP'`, or `'DATE'`. + `'FLOAT64'`, `'NUMERIC'`, `'BIGNUMERIC'`, `'BOOL'`, `'TIMESTAMP'`, or `'DATE'`. values (List[appropriate scalar type]): The parameter array values. """ @@ -203,7 +203,7 @@ def positional(cls, array_type, values): Args: array_type (str): Name of type of array elements. One of `'STRING'`, `'INT64'`, - `'FLOAT64'`, `'NUMERIC'`, `'BOOL'`, `'TIMESTAMP'`, or `'DATE'`. + `'FLOAT64'`, `'NUMERIC'`, `'BIGNUMERIC'`, `'BOOL'`, `'TIMESTAMP'`, or `'DATE'`. values (List[appropriate scalar type]): The parameter array values. diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/schema.py b/packages/google-cloud-bigquery/google/cloud/bigquery/schema.py index c76aded02118..9be27f3e8f8f 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/schema.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/schema.py @@ -32,6 +32,7 @@ "FLOAT": types.StandardSqlDataType.TypeKind.FLOAT64, "FLOAT64": types.StandardSqlDataType.TypeKind.FLOAT64, "NUMERIC": types.StandardSqlDataType.TypeKind.NUMERIC, + "BIGNUMERIC": types.StandardSqlDataType.TypeKind.BIGNUMERIC, "BOOLEAN": types.StandardSqlDataType.TypeKind.BOOL, "BOOL": types.StandardSqlDataType.TypeKind.BOOL, "GEOGRAPHY": types.StandardSqlDataType.TypeKind.GEOGRAPHY, diff --git a/packages/google-cloud-bigquery/setup.py b/packages/google-cloud-bigquery/setup.py index ea2df4843c0c..31b6a3ff7bd4 100644 --- a/packages/google-cloud-bigquery/setup.py +++ b/packages/google-cloud-bigquery/setup.py @@ -33,6 +33,7 @@ "proto-plus >= 1.10.0", "google-cloud-core >= 1.4.1, < 2.0dev", "google-resumable-media >= 0.6.0, < 2.0dev", + "packaging >= 14.3", "protobuf >= 3.12.0", ] extras = { @@ -48,6 +49,7 @@ "pyarrow >= 1.0.0, < 4.0dev", ], "pandas": ["pandas>=0.23.0", "pyarrow >= 1.0.0, < 4.0dev",], + "bignumeric_type": ["pyarrow >= 3.0.0, < 4.0dev"], "tqdm": ["tqdm >= 4.7.4, <5.0.0dev"], "opentelemetry": [ "opentelemetry-api==0.11b0", diff --git a/packages/google-cloud-bigquery/tests/system/test_client.py b/packages/google-cloud-bigquery/tests/system/test_client.py index 60c3b3fa8afe..684a42c3028b 100644 --- a/packages/google-cloud-bigquery/tests/system/test_client.py +++ b/packages/google-cloud-bigquery/tests/system/test_client.py @@ -65,6 +65,7 @@ from google.api_core.iam import Policy from google.cloud import bigquery from google.cloud import bigquery_v2 +from google.cloud.bigquery._pandas_helpers import _BIGNUMERIC_SUPPORT from google.cloud.bigquery.dataset import Dataset from google.cloud.bigquery.dataset import DatasetReference from google.cloud.bigquery.table import Table @@ -891,6 +892,9 @@ def test_load_table_from_dataframe_w_nulls(self): bigquery.SchemaField("time_col", "TIME"), bigquery.SchemaField("ts_col", "TIMESTAMP"), ) + if _BIGNUMERIC_SUPPORT: + scalars_schema += (bigquery.SchemaField("bignum_col", "BIGNUMERIC"),) + table_schema = scalars_schema + ( # TODO: Array columns can't be read due to NULLABLE versus REPEATED # mode mismatch. See: @@ -902,21 +906,22 @@ def test_load_table_from_dataframe_w_nulls(self): ) num_rows = 100 nulls = [None] * num_rows - df_data = collections.OrderedDict( - [ - ("bool_col", nulls), - ("bytes_col", nulls), - ("date_col", nulls), - ("dt_col", nulls), - ("float_col", nulls), - ("geo_col", nulls), - ("int_col", nulls), - ("num_col", nulls), - ("str_col", nulls), - ("time_col", nulls), - ("ts_col", nulls), - ] - ) + df_data = [ + ("bool_col", nulls), + ("bytes_col", nulls), + ("date_col", nulls), + ("dt_col", nulls), + ("float_col", nulls), + ("geo_col", nulls), + ("int_col", nulls), + ("num_col", nulls), + ("str_col", nulls), + ("time_col", nulls), + ("ts_col", nulls), + ] + if _BIGNUMERIC_SUPPORT: + df_data.append(("bignum_col", nulls)) + df_data = collections.OrderedDict(df_data) dataframe = pandas.DataFrame(df_data, columns=df_data.keys()) dataset_id = _make_dataset_id("bq_load_test") @@ -1003,6 +1008,9 @@ def test_load_table_from_dataframe_w_explicit_schema(self): bigquery.SchemaField("time_col", "TIME"), bigquery.SchemaField("ts_col", "TIMESTAMP"), ) + if _BIGNUMERIC_SUPPORT: + scalars_schema += (bigquery.SchemaField("bignum_col", "BIGNUMERIC"),) + table_schema = scalars_schema + ( # TODO: Array columns can't be read due to NULLABLE versus REPEATED # mode mismatch. See: @@ -1012,57 +1020,65 @@ def test_load_table_from_dataframe_w_explicit_schema(self): # https://jira.apache.org/jira/browse/ARROW-2587 # bigquery.SchemaField("struct_col", "RECORD", fields=scalars_schema), ) - df_data = collections.OrderedDict( - [ - ("bool_col", [True, None, False]), - ("bytes_col", [b"abc", None, b"def"]), - ( - "date_col", - [datetime.date(1, 1, 1), None, datetime.date(9999, 12, 31)], - ), - # ( - # "dt_col", - # [ - # datetime.datetime(1, 1, 1, 0, 0, 0), - # None, - # datetime.datetime(9999, 12, 31, 23, 59, 59, 999999), - # ], - # ), - ("float_col", [float("-inf"), float("nan"), float("inf")]), - ( - "geo_col", - [ - "POINT(30 10)", - None, - "POLYGON ((30 10, 40 40, 20 40, 10 20, 30 10))", - ], - ), - ("int_col", [-9223372036854775808, None, 9223372036854775807]), - ( - "num_col", - [ - decimal.Decimal("-99999999999999999999999999999.999999999"), - None, - decimal.Decimal("99999999999999999999999999999.999999999"), - ], - ), - ("str_col", [u"abc", None, u"def"]), - ( - "time_col", - [datetime.time(0, 0, 0), None, datetime.time(23, 59, 59, 999999)], - ), + + df_data = [ + ("bool_col", [True, None, False]), + ("bytes_col", [b"abc", None, b"def"]), + ("date_col", [datetime.date(1, 1, 1), None, datetime.date(9999, 12, 31)]), + # ( + # "dt_col", + # [ + # datetime.datetime(1, 1, 1, 0, 0, 0), + # None, + # datetime.datetime(9999, 12, 31, 23, 59, 59, 999999), + # ], + # ), + ("float_col", [float("-inf"), float("nan"), float("inf")]), + ( + "geo_col", + [ + "POINT(30 10)", + None, + "POLYGON ((30 10, 40 40, 20 40, 10 20, 30 10))", + ], + ), + ("int_col", [-9223372036854775808, None, 9223372036854775807]), + ( + "num_col", + [ + decimal.Decimal("-99999999999999999999999999999.999999999"), + None, + decimal.Decimal("99999999999999999999999999999.999999999"), + ], + ), + ("str_col", [u"abc", None, u"def"]), + ( + "time_col", + [datetime.time(0, 0, 0), None, datetime.time(23, 59, 59, 999999)], + ), + ( + "ts_col", + [ + datetime.datetime(1, 1, 1, 0, 0, 0, tzinfo=pytz.utc), + None, + datetime.datetime( + 9999, 12, 31, 23, 59, 59, 999999, tzinfo=pytz.utc + ), + ], + ), + ] + if _BIGNUMERIC_SUPPORT: + df_data.append( ( - "ts_col", + "bignum_col", [ - datetime.datetime(1, 1, 1, 0, 0, 0, tzinfo=pytz.utc), + decimal.Decimal("-{d38}.{d38}".format(d38="9" * 38)), None, - datetime.datetime( - 9999, 12, 31, 23, 59, 59, 999999, tzinfo=pytz.utc - ), + decimal.Decimal("{d38}.{d38}".format(d38="9" * 38)), ], - ), - ] - ) + ) + ) + df_data = collections.OrderedDict(df_data) dataframe = pandas.DataFrame(df_data, dtype="object", columns=df_data.keys()) dataset_id = _make_dataset_id("bq_load_test") @@ -1172,6 +1188,7 @@ def test_load_table_from_dataframe_w_explicit_schema_source_format_csv(self): bigquery.SchemaField("geo_col", "GEOGRAPHY"), bigquery.SchemaField("int_col", "INTEGER"), bigquery.SchemaField("num_col", "NUMERIC"), + bigquery.SchemaField("bignum_col", "BIGNUMERIC"), bigquery.SchemaField("str_col", "STRING"), bigquery.SchemaField("time_col", "TIME"), bigquery.SchemaField("ts_col", "TIMESTAMP"), @@ -1210,6 +1227,14 @@ def test_load_table_from_dataframe_w_explicit_schema_source_format_csv(self): decimal.Decimal("99999999999999999999999999999.999999999"), ], ), + ( + "bignum_col", + [ + decimal.Decimal("-{d38}.{d38}".format(d38="9" * 38)), + None, + decimal.Decimal("{d38}.{d38}".format(d38="9" * 38)), + ], + ), ("str_col", [u"abc", None, u"def"]), ( "time_col", @@ -2157,6 +2182,10 @@ def test_query_w_query_params(self): pi_numeric_param = ScalarQueryParameter( name="pi_numeric_param", type_="NUMERIC", value=pi_numeric ) + bignum = decimal.Decimal("-{d38}.{d38}".format(d38="9" * 38)) + bignum_param = ScalarQueryParameter( + name="bignum_param", type_="BIGNUMERIC", value=bignum + ) truthy = True truthy_param = ScalarQueryParameter(name="truthy", type_="BOOL", value=truthy) beef = b"DEADBEEF" @@ -2302,6 +2331,15 @@ def test_query_w_query_params(self): "query_parameters": [with_friends_param], }, ] + if _BIGNUMERIC_SUPPORT: + examples.append( + { + "sql": "SELECT @bignum_param", + "expected": bignum, + "query_parameters": [bignum_param], + } + ) + for example in examples: jconfig = QueryJobConfig() jconfig.query_parameters = example["query_parameters"] diff --git a/packages/google-cloud-bigquery/tests/unit/test__pandas_helpers.py b/packages/google-cloud-bigquery/tests/unit/test__pandas_helpers.py index ef0c40e1aee1..abd72582015e 100644 --- a/packages/google-cloud-bigquery/tests/unit/test__pandas_helpers.py +++ b/packages/google-cloud-bigquery/tests/unit/test__pandas_helpers.py @@ -39,6 +39,12 @@ from google import api_core from google.cloud.bigquery import schema +from google.cloud.bigquery._pandas_helpers import _BIGNUMERIC_SUPPORT + + +skip_if_no_bignumeric = pytest.mark.skipif( + not _BIGNUMERIC_SUPPORT, reason="BIGNUMERIC support requires pyarrow>=3.0.0", +) @pytest.fixture @@ -70,6 +76,15 @@ def is_numeric(type_): )(type_) +def is_bignumeric(type_): + # See: https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#numeric-type + return all_( + pyarrow.types.is_decimal, + lambda type_: type_.precision == 76, + lambda type_: type_.scale == 38, + )(type_) + + def is_timestamp(type_): # See: https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#timestamp-type return all_( @@ -120,6 +135,9 @@ def test_all_(): ("FLOAT", "NULLABLE", pyarrow.types.is_float64), ("FLOAT64", "NULLABLE", pyarrow.types.is_float64), ("NUMERIC", "NULLABLE", is_numeric), + pytest.param( + "BIGNUMERIC", "NULLABLE", is_bignumeric, marks=skip_if_no_bignumeric, + ), ("BOOLEAN", "NULLABLE", pyarrow.types.is_boolean), ("BOOL", "NULLABLE", pyarrow.types.is_boolean), ("TIMESTAMP", "NULLABLE", is_timestamp), @@ -198,6 +216,12 @@ def test_all_(): "REPEATED", all_(pyarrow.types.is_list, lambda type_: is_numeric(type_.value_type)), ), + pytest.param( + "BIGNUMERIC", + "REPEATED", + all_(pyarrow.types.is_list, lambda type_: is_bignumeric(type_.value_type)), + marks=skip_if_no_bignumeric, + ), ( "BOOLEAN", "REPEATED", @@ -270,34 +294,41 @@ def test_bq_to_arrow_data_type_w_struct(module_under_test, bq_type): schema.SchemaField("field05", "FLOAT"), schema.SchemaField("field06", "FLOAT64"), schema.SchemaField("field07", "NUMERIC"), - schema.SchemaField("field08", "BOOLEAN"), - schema.SchemaField("field09", "BOOL"), - schema.SchemaField("field10", "TIMESTAMP"), - schema.SchemaField("field11", "DATE"), - schema.SchemaField("field12", "TIME"), - schema.SchemaField("field13", "DATETIME"), - schema.SchemaField("field14", "GEOGRAPHY"), + schema.SchemaField("field09", "BOOLEAN"), + schema.SchemaField("field10", "BOOL"), + schema.SchemaField("field11", "TIMESTAMP"), + schema.SchemaField("field12", "DATE"), + schema.SchemaField("field13", "TIME"), + schema.SchemaField("field14", "DATETIME"), + schema.SchemaField("field15", "GEOGRAPHY"), ) + + if _BIGNUMERIC_SUPPORT: + fields += (schema.SchemaField("field08", "BIGNUMERIC"),) + field = schema.SchemaField("ignored_name", bq_type, mode="NULLABLE", fields=fields) actual = module_under_test.bq_to_arrow_data_type(field) - expected = pyarrow.struct( - ( - pyarrow.field("field01", pyarrow.string()), - pyarrow.field("field02", pyarrow.binary()), - pyarrow.field("field03", pyarrow.int64()), - pyarrow.field("field04", pyarrow.int64()), - pyarrow.field("field05", pyarrow.float64()), - pyarrow.field("field06", pyarrow.float64()), - pyarrow.field("field07", module_under_test.pyarrow_numeric()), - pyarrow.field("field08", pyarrow.bool_()), - pyarrow.field("field09", pyarrow.bool_()), - pyarrow.field("field10", module_under_test.pyarrow_timestamp()), - pyarrow.field("field11", pyarrow.date32()), - pyarrow.field("field12", module_under_test.pyarrow_time()), - pyarrow.field("field13", module_under_test.pyarrow_datetime()), - pyarrow.field("field14", pyarrow.string()), - ) + + expected = ( + pyarrow.field("field01", pyarrow.string()), + pyarrow.field("field02", pyarrow.binary()), + pyarrow.field("field03", pyarrow.int64()), + pyarrow.field("field04", pyarrow.int64()), + pyarrow.field("field05", pyarrow.float64()), + pyarrow.field("field06", pyarrow.float64()), + pyarrow.field("field07", module_under_test.pyarrow_numeric()), + pyarrow.field("field09", pyarrow.bool_()), + pyarrow.field("field10", pyarrow.bool_()), + pyarrow.field("field11", module_under_test.pyarrow_timestamp()), + pyarrow.field("field12", pyarrow.date32()), + pyarrow.field("field13", module_under_test.pyarrow_time()), + pyarrow.field("field14", module_under_test.pyarrow_datetime()), + pyarrow.field("field15", pyarrow.string()), ) + if _BIGNUMERIC_SUPPORT: + expected += (pyarrow.field("field08", module_under_test.pyarrow_bignumeric()),) + expected = pyarrow.struct(expected) + assert pyarrow.types.is_struct(actual) assert actual.num_fields == len(fields) assert actual.equals(expected) @@ -314,34 +345,41 @@ def test_bq_to_arrow_data_type_w_array_struct(module_under_test, bq_type): schema.SchemaField("field05", "FLOAT"), schema.SchemaField("field06", "FLOAT64"), schema.SchemaField("field07", "NUMERIC"), - schema.SchemaField("field08", "BOOLEAN"), - schema.SchemaField("field09", "BOOL"), - schema.SchemaField("field10", "TIMESTAMP"), - schema.SchemaField("field11", "DATE"), - schema.SchemaField("field12", "TIME"), - schema.SchemaField("field13", "DATETIME"), - schema.SchemaField("field14", "GEOGRAPHY"), + schema.SchemaField("field09", "BOOLEAN"), + schema.SchemaField("field10", "BOOL"), + schema.SchemaField("field11", "TIMESTAMP"), + schema.SchemaField("field12", "DATE"), + schema.SchemaField("field13", "TIME"), + schema.SchemaField("field14", "DATETIME"), + schema.SchemaField("field15", "GEOGRAPHY"), ) + + if _BIGNUMERIC_SUPPORT: + fields += (schema.SchemaField("field08", "BIGNUMERIC"),) + field = schema.SchemaField("ignored_name", bq_type, mode="REPEATED", fields=fields) actual = module_under_test.bq_to_arrow_data_type(field) - expected_value_type = pyarrow.struct( - ( - pyarrow.field("field01", pyarrow.string()), - pyarrow.field("field02", pyarrow.binary()), - pyarrow.field("field03", pyarrow.int64()), - pyarrow.field("field04", pyarrow.int64()), - pyarrow.field("field05", pyarrow.float64()), - pyarrow.field("field06", pyarrow.float64()), - pyarrow.field("field07", module_under_test.pyarrow_numeric()), - pyarrow.field("field08", pyarrow.bool_()), - pyarrow.field("field09", pyarrow.bool_()), - pyarrow.field("field10", module_under_test.pyarrow_timestamp()), - pyarrow.field("field11", pyarrow.date32()), - pyarrow.field("field12", module_under_test.pyarrow_time()), - pyarrow.field("field13", module_under_test.pyarrow_datetime()), - pyarrow.field("field14", pyarrow.string()), - ) + + expected = ( + pyarrow.field("field01", pyarrow.string()), + pyarrow.field("field02", pyarrow.binary()), + pyarrow.field("field03", pyarrow.int64()), + pyarrow.field("field04", pyarrow.int64()), + pyarrow.field("field05", pyarrow.float64()), + pyarrow.field("field06", pyarrow.float64()), + pyarrow.field("field07", module_under_test.pyarrow_numeric()), + pyarrow.field("field09", pyarrow.bool_()), + pyarrow.field("field10", pyarrow.bool_()), + pyarrow.field("field11", module_under_test.pyarrow_timestamp()), + pyarrow.field("field12", pyarrow.date32()), + pyarrow.field("field13", module_under_test.pyarrow_time()), + pyarrow.field("field14", module_under_test.pyarrow_datetime()), + pyarrow.field("field15", pyarrow.string()), ) + if _BIGNUMERIC_SUPPORT: + expected += (pyarrow.field("field08", module_under_test.pyarrow_bignumeric()),) + expected_value_type = pyarrow.struct(expected) + assert pyarrow.types.is_list(actual) assert pyarrow.types.is_struct(actual.value_type) assert actual.value_type.num_fields == len(fields) @@ -385,6 +423,16 @@ def test_bq_to_arrow_data_type_w_struct_unknown_subfield(module_under_test): decimal.Decimal("999.123456789"), ], ), + pytest.param( + "BIGNUMERIC", + [ + decimal.Decimal("-{d38}.{d38}".format(d38="9" * 38)), + None, + decimal.Decimal("{d38}.{d38}".format(d38="9" * 38)), + decimal.Decimal("3.141592653589793238462643383279"), + ], + marks=skip_if_no_bignumeric, + ), ("BOOLEAN", [True, None, False, None]), ("BOOL", [False, None, True, None]), ( @@ -841,41 +889,45 @@ def test_dataframe_to_arrow_with_required_fields(module_under_test): schema.SchemaField("field05", "FLOAT", mode="REQUIRED"), schema.SchemaField("field06", "FLOAT64", mode="REQUIRED"), schema.SchemaField("field07", "NUMERIC", mode="REQUIRED"), - schema.SchemaField("field08", "BOOLEAN", mode="REQUIRED"), - schema.SchemaField("field09", "BOOL", mode="REQUIRED"), - schema.SchemaField("field10", "TIMESTAMP", mode="REQUIRED"), - schema.SchemaField("field11", "DATE", mode="REQUIRED"), - schema.SchemaField("field12", "TIME", mode="REQUIRED"), - schema.SchemaField("field13", "DATETIME", mode="REQUIRED"), - schema.SchemaField("field14", "GEOGRAPHY", mode="REQUIRED"), - ) - dataframe = pandas.DataFrame( - { - "field01": ["hello", "world"], - "field02": [b"abd", b"efg"], - "field03": [1, 2], - "field04": [3, 4], - "field05": [1.25, 9.75], - "field06": [-1.75, -3.5], - "field07": [decimal.Decimal("1.2345"), decimal.Decimal("6.7891")], - "field08": [True, False], - "field09": [False, True], - "field10": [ - datetime.datetime(1970, 1, 1, 0, 0, 0, tzinfo=pytz.utc), - datetime.datetime(2012, 12, 21, 9, 7, 42, tzinfo=pytz.utc), - ], - "field11": [datetime.date(9999, 12, 31), datetime.date(1970, 1, 1)], - "field12": [datetime.time(23, 59, 59, 999999), datetime.time(12, 0, 0)], - "field13": [ - datetime.datetime(1970, 1, 1, 0, 0, 0), - datetime.datetime(2012, 12, 21, 9, 7, 42), - ], - "field14": [ - "POINT(30 10)", - "POLYGON ((30 10, 40 40, 20 40, 10 20, 30 10))", - ], - } + schema.SchemaField("field09", "BOOLEAN", mode="REQUIRED"), + schema.SchemaField("field10", "BOOL", mode="REQUIRED"), + schema.SchemaField("field11", "TIMESTAMP", mode="REQUIRED"), + schema.SchemaField("field12", "DATE", mode="REQUIRED"), + schema.SchemaField("field13", "TIME", mode="REQUIRED"), + schema.SchemaField("field14", "DATETIME", mode="REQUIRED"), + schema.SchemaField("field15", "GEOGRAPHY", mode="REQUIRED"), ) + if _BIGNUMERIC_SUPPORT: + bq_schema += (schema.SchemaField("field08", "BIGNUMERIC", mode="REQUIRED"),) + + data = { + "field01": ["hello", "world"], + "field02": [b"abd", b"efg"], + "field03": [1, 2], + "field04": [3, 4], + "field05": [1.25, 9.75], + "field06": [-1.75, -3.5], + "field07": [decimal.Decimal("1.2345"), decimal.Decimal("6.7891")], + "field09": [True, False], + "field10": [False, True], + "field11": [ + datetime.datetime(1970, 1, 1, 0, 0, 0, tzinfo=pytz.utc), + datetime.datetime(2012, 12, 21, 9, 7, 42, tzinfo=pytz.utc), + ], + "field12": [datetime.date(9999, 12, 31), datetime.date(1970, 1, 1)], + "field13": [datetime.time(23, 59, 59, 999999), datetime.time(12, 0, 0)], + "field14": [ + datetime.datetime(1970, 1, 1, 0, 0, 0), + datetime.datetime(2012, 12, 21, 9, 7, 42), + ], + "field15": ["POINT(30 10)", "POLYGON ((30 10, 40 40, 20 40, 10 20, 30 10))"], + } + if _BIGNUMERIC_SUPPORT: + data["field08"] = [ + decimal.Decimal("-{d38}.{d38}".format(d38="9" * 38)), + decimal.Decimal("{d38}.{d38}".format(d38="9" * 38)), + ] + dataframe = pandas.DataFrame(data) arrow_table = module_under_test.dataframe_to_arrow(dataframe, bq_schema) arrow_schema = arrow_table.schema @@ -1089,6 +1141,7 @@ def test_augment_schema_type_detection_succeeds(module_under_test): "bytes_field": b"some bytes", "string_field": u"some characters", "numeric_field": decimal.Decimal("123.456"), + "bignumeric_field": decimal.Decimal("{d38}.{d38}".format(d38="9" * 38)), } ] ) @@ -1109,6 +1162,10 @@ def test_augment_schema_type_detection_succeeds(module_under_test): schema.SchemaField("string_field", field_type=None, mode="NULLABLE"), schema.SchemaField("numeric_field", field_type=None, mode="NULLABLE"), ) + if _BIGNUMERIC_SUPPORT: + current_schema += ( + schema.SchemaField("bignumeric_field", field_type=None, mode="NULLABLE"), + ) with warnings.catch_warnings(record=True) as warned: augmented_schema = module_under_test.augment_schema(dataframe, current_schema) @@ -1131,6 +1188,13 @@ def test_augment_schema_type_detection_succeeds(module_under_test): schema.SchemaField("string_field", field_type="STRING", mode="NULLABLE"), schema.SchemaField("numeric_field", field_type="NUMERIC", mode="NULLABLE"), ) + if _BIGNUMERIC_SUPPORT: + expected_schema += ( + schema.SchemaField( + "bignumeric_field", field_type="BIGNUMERIC", mode="NULLABLE" + ), + ) + by_name = operator.attrgetter("name") assert sorted(augmented_schema, key=by_name) == sorted(expected_schema, key=by_name) diff --git a/packages/google-cloud-bigquery/tests/unit/test_dbapi__helpers.py b/packages/google-cloud-bigquery/tests/unit/test_dbapi__helpers.py index fffa46aa8957..c28c014d48f9 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_dbapi__helpers.py +++ b/packages/google-cloud-bigquery/tests/unit/test_dbapi__helpers.py @@ -25,6 +25,7 @@ import google.cloud._helpers from google.cloud.bigquery import table +from google.cloud.bigquery._pandas_helpers import _BIGNUMERIC_SUPPORT from google.cloud.bigquery.dbapi import _helpers from google.cloud.bigquery.dbapi import exceptions from tests.unit.helpers import _to_pyarrow @@ -51,6 +52,14 @@ def test_scalar_to_query_parameter(self): "TIMESTAMP", ), ] + if _BIGNUMERIC_SUPPORT: + expected_types.append( + ( + decimal.Decimal("1.1234567890123456789012345678901234567890"), + "BIGNUMERIC", + ) + ) + for value, expected_type in expected_types: msg = "value: {} expected_type: {}".format(value, expected_type) parameter = _helpers.scalar_to_query_parameter(value) @@ -104,6 +113,11 @@ def test_array_to_query_parameter_valid_argument(self): ), ] + if _BIGNUMERIC_SUPPORT: + expected_types.append( + ([decimal.Decimal("{d38}.{d38}".format(d38="9" * 38))], "BIGNUMERIC") + ) + for values, expected_type in expected_types: msg = "value: {} expected_type: {}".format(values, expected_type) parameter = _helpers.array_to_query_parameter(values) diff --git a/packages/google-cloud-bigquery/tests/unit/test_query.py b/packages/google-cloud-bigquery/tests/unit/test_query.py index cf268daf193a..ae2c29d09bdb 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_query.py +++ b/packages/google-cloud-bigquery/tests/unit/test_query.py @@ -166,6 +166,16 @@ def test_to_api_repr_w_numeric(self): param = klass.positional(type_="NUMERIC", value="123456789.123456789") self.assertEqual(param.to_api_repr(), EXPECTED) + def test_to_api_repr_w_bignumeric(self): + big_num_string = "{d38}.{d38}".format(d38="9" * 38) + EXPECTED = { + "parameterType": {"type": "BIGNUMERIC"}, + "parameterValue": {"value": big_num_string}, + } + klass = self._get_target_class() + param = klass.positional(type_="BIGNUMERIC", value=big_num_string) + self.assertEqual(param.to_api_repr(), EXPECTED) + def test_to_api_repr_w_bool(self): EXPECTED = { "parameterType": {"type": "BOOL"}, From cee5f2cd5b0f4db85f3457489d23784b15836b89 Mon Sep 17 00:00:00 2001 From: Peter Lamut Date: Wed, 24 Feb 2021 16:26:00 +0100 Subject: [PATCH 1073/2016] fix: error using empty array of structs parameter (#474) * fix: error using empty array of structs parameter * Add QueryParameterType classes * Use query parameter types with ArrayQueryParameter * Adjust system test to changed ArrayQueryParameter * Clarify a comment about an assertion Co-authored-by: Tim Swast * Clarify when name/descr. is omitted from API repr * Rename subtypes to fields * Add fields property to StructQueryParameterType * Add a check for empty struct fields * Define scalar SQL parameter types as type objects Co-authored-by: Tim Swast --- .../google/cloud/bigquery/__init__.py | 6 + .../google/cloud/bigquery/enums.py | 21 + .../google/cloud/bigquery/query.py | 301 +++++++++++++- .../tests/system/test_client.py | 15 + .../tests/unit/test_query.py | 383 +++++++++++++++++- 5 files changed, 711 insertions(+), 15 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/__init__.py b/packages/google-cloud-bigquery/google/cloud/bigquery/__init__.py index 29d375b03a1d..f609468f5844 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/__init__.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/__init__.py @@ -66,8 +66,11 @@ from google.cloud.bigquery.model import Model from google.cloud.bigquery.model import ModelReference from google.cloud.bigquery.query import ArrayQueryParameter +from google.cloud.bigquery.query import ArrayQueryParameterType from google.cloud.bigquery.query import ScalarQueryParameter +from google.cloud.bigquery.query import ScalarQueryParameterType from google.cloud.bigquery.query import StructQueryParameter +from google.cloud.bigquery.query import StructQueryParameterType from google.cloud.bigquery.query import UDFResource from google.cloud.bigquery.retry import DEFAULT_RETRY from google.cloud.bigquery.routine import DeterminismLevel @@ -93,6 +96,9 @@ "ArrayQueryParameter", "ScalarQueryParameter", "StructQueryParameter", + "ArrayQueryParameterType", + "ScalarQueryParameterType", + "StructQueryParameterType", # Datasets "Dataset", "DatasetReference", diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/enums.py b/packages/google-cloud-bigquery/google/cloud/bigquery/enums.py index e353b3132e81..b378f091b28c 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/enums.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/enums.py @@ -18,6 +18,7 @@ import itertools from google.cloud.bigquery_v2 import types as gapic_types +from google.cloud.bigquery.query import ScalarQueryParameterType class Compression(object): @@ -215,6 +216,26 @@ class SqlTypeNames(str, enum.Enum): DATETIME = "DATETIME" +class SqlParameterScalarTypes: + """Supported scalar SQL query parameter types as type objects.""" + + STRING = ScalarQueryParameterType("STRING") + BYTES = ScalarQueryParameterType("BYTES") + INTEGER = ScalarQueryParameterType("INT64") + INT64 = ScalarQueryParameterType("INT64") + FLOAT = ScalarQueryParameterType("FLOAT64") + FLOAT64 = ScalarQueryParameterType("FLOAT64") + NUMERIC = ScalarQueryParameterType("NUMERIC") + BIGNUMERIC = ScalarQueryParameterType("BIGNUMERIC") + BOOLEAN = ScalarQueryParameterType("BOOL") + BOOL = ScalarQueryParameterType("BOOL") + GEOGRAPHY = ScalarQueryParameterType("GEOGRAPHY") + TIMESTAMP = ScalarQueryParameterType("TIMESTAMP") + DATE = ScalarQueryParameterType("DATE") + TIME = ScalarQueryParameterType("TIME") + DATETIME = ScalarQueryParameterType("DATETIME") + + class WriteDisposition(object): """Specifies the action that occurs if destination table already exists. diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/query.py b/packages/google-cloud-bigquery/google/cloud/bigquery/query.py index ecec73e9938b..42547cd73d4e 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/query.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/query.py @@ -48,6 +48,239 @@ def __ne__(self, other): return not self == other +class _AbstractQueryParameterType: + """Base class for representing query parameter types. + + https://cloud.google.com/bigquery/docs/reference/rest/v2/QueryParameter#queryparametertype + """ + + @classmethod + def from_api_repr(cls, resource): + """Factory: construct parameter type from JSON resource. + + Args: + resource (Dict): JSON mapping of parameter + + Returns: + google.cloud.bigquery.query.QueryParameterType: Instance + """ + raise NotImplementedError + + def to_api_repr(self): + """Construct JSON API representation for the parameter type. + + Returns: + Dict: JSON mapping + """ + raise NotImplementedError + + +class ScalarQueryParameterType(_AbstractQueryParameterType): + """Type representation for scalar query parameters. + + Args: + type_ (str): + One of 'STRING', 'INT64', 'FLOAT64', 'NUMERIC', 'BOOL', 'TIMESTAMP', + 'DATETIME', or 'DATE'. + name (Optional[str]): + The name of the query parameter. Primarily used if the type is + one of the subfields in ``StructQueryParameterType`` instance. + description (Optional[str]): + The query parameter description. Primarily used if the type is + one of the subfields in ``StructQueryParameterType`` instance. + """ + + def __init__(self, type_, *, name=None, description=None): + self._type = type_ + self.name = name + self.description = description + + @classmethod + def from_api_repr(cls, resource): + """Factory: construct parameter type from JSON resource. + + Args: + resource (Dict): JSON mapping of parameter + + Returns: + google.cloud.bigquery.query.ScalarQueryParameterType: Instance + """ + type_ = resource["type"] + return cls(type_) + + def to_api_repr(self): + """Construct JSON API representation for the parameter type. + + Returns: + Dict: JSON mapping + """ + # Name and description are only used if the type is a field inside a struct + # type, but it's StructQueryParameterType's responsibilty to use these two + # attributes in the API representation when needed. Here we omit them. + return {"type": self._type} + + def __repr__(self): + name = f", name={self.name!r}" if self.name is not None else "" + description = ( + f", description={self.description!r}" + if self.description is not None + else "" + ) + return f"{self.__class__.__name__}({self._type!r}{name}{description})" + + +class ArrayQueryParameterType(_AbstractQueryParameterType): + """Type representation for array query parameters. + + Args: + array_type (Union[ScalarQueryParameterType, StructQueryParameterType]): + The type of array elements. + name (Optional[str]): + The name of the query parameter. Primarily used if the type is + one of the subfields in ``StructQueryParameterType`` instance. + description (Optional[str]): + The query parameter description. Primarily used if the type is + one of the subfields in ``StructQueryParameterType`` instance. + """ + + def __init__(self, array_type, *, name=None, description=None): + self._array_type = array_type + self.name = name + self.description = description + + @classmethod + def from_api_repr(cls, resource): + """Factory: construct parameter type from JSON resource. + + Args: + resource (Dict): JSON mapping of parameter + + Returns: + google.cloud.bigquery.query.ArrayQueryParameterType: Instance + """ + array_item_type = resource["arrayType"]["type"] + + if array_item_type in {"STRUCT", "RECORD"}: + klass = StructQueryParameterType + else: + klass = ScalarQueryParameterType + + item_type_instance = klass.from_api_repr(resource["arrayType"]) + return cls(item_type_instance) + + def to_api_repr(self): + """Construct JSON API representation for the parameter type. + + Returns: + Dict: JSON mapping + """ + # Name and description are only used if the type is a field inside a struct + # type, but it's StructQueryParameterType's responsibilty to use these two + # attributes in the API representation when needed. Here we omit them. + return { + "type": "ARRAY", + "arrayType": self._array_type.to_api_repr(), + } + + def __repr__(self): + name = f", name={self.name!r}" if self.name is not None else "" + description = ( + f", description={self.description!r}" + if self.description is not None + else "" + ) + return f"{self.__class__.__name__}({self._array_type!r}{name}{description})" + + +class StructQueryParameterType(_AbstractQueryParameterType): + """Type representation for struct query parameters. + + Args: + fields (Iterable[Union[ \ + ArrayQueryParameterType, ScalarQueryParameterType, StructQueryParameterType \ + ]]): + An non-empty iterable describing the struct's field types. + name (Optional[str]): + The name of the query parameter. Primarily used if the type is + one of the subfields in ``StructQueryParameterType`` instance. + description (Optional[str]): + The query parameter description. Primarily used if the type is + one of the subfields in ``StructQueryParameterType`` instance. + """ + + def __init__(self, *fields, name=None, description=None): + if not fields: + raise ValueError("Struct type must have at least one field defined.") + + self._fields = fields # fields is a tuple (immutable), no shallow copy needed + self.name = name + self.description = description + + @property + def fields(self): + return self._fields # no copy needed, self._fields is an immutable sequence + + @classmethod + def from_api_repr(cls, resource): + """Factory: construct parameter type from JSON resource. + + Args: + resource (Dict): JSON mapping of parameter + + Returns: + google.cloud.bigquery.query.StructQueryParameterType: Instance + """ + fields = [] + + for struct_field in resource["structTypes"]: + type_repr = struct_field["type"] + if type_repr["type"] in {"STRUCT", "RECORD"}: + klass = StructQueryParameterType + elif type_repr["type"] == "ARRAY": + klass = ArrayQueryParameterType + else: + klass = ScalarQueryParameterType + + type_instance = klass.from_api_repr(type_repr) + type_instance.name = struct_field.get("name") + type_instance.description = struct_field.get("description") + fields.append(type_instance) + + return cls(*fields) + + def to_api_repr(self): + """Construct JSON API representation for the parameter type. + + Returns: + Dict: JSON mapping + """ + fields = [] + + for field in self._fields: + item = {"type": field.to_api_repr()} + if field.name is not None: + item["name"] = field.name + if field.description is not None: + item["description"] = field.description + + fields.append(item) + + return { + "type": "STRUCT", + "structTypes": fields, + } + + def __repr__(self): + name = f", name={self.name!r}" if self.name is not None else "" + description = ( + f", description={self.description!r}" + if self.description is not None + else "" + ) + items = ", ".join(repr(field) for field in self._fields) + return f"{self.__class__.__name__}({items}{name}{description})" + + class _AbstractQueryParameter(object): """Base class for named / positional query parameters. """ @@ -184,28 +417,43 @@ class ArrayQueryParameter(_AbstractQueryParameter): Parameter name, used via ``@foo`` syntax. If None, the parameter can only be addressed via position (``?``). - array_type (str): - Name of type of array elements. One of `'STRING'`, `'INT64'`, - `'FLOAT64'`, `'NUMERIC'`, `'BIGNUMERIC'`, `'BOOL'`, `'TIMESTAMP'`, or `'DATE'`. + array_type (Union[str, ScalarQueryParameterType, StructQueryParameterType]): + The type of array elements. If given as a string, it must be one of + `'STRING'`, `'INT64'`, `'FLOAT64'`, `'NUMERIC'`, `'BIGNUMERIC'`, `'BOOL'`, + `'TIMESTAMP'`, `'DATE'`, or `'STRUCT'`/`'RECORD'`. + If the type is ``'STRUCT'``/``'RECORD'`` and ``values`` is empty, + the exact item type cannot be deduced, thus a ``StructQueryParameterType`` + instance needs to be passed in. - values (List[appropriate scalar type]): The parameter array values. + values (List[appropriate type]): The parameter array values. """ def __init__(self, name, array_type, values): self.name = name - self.array_type = array_type self.values = values + if isinstance(array_type, str): + if not values and array_type in {"RECORD", "STRUCT"}: + raise ValueError( + "Missing detailed struct item type info for an empty array, " + "please provide a StructQueryParameterType instance." + ) + self.array_type = array_type + @classmethod def positional(cls, array_type, values): """Factory for positional parameters. Args: - array_type (str): - Name of type of array elements. One of `'STRING'`, `'INT64'`, - `'FLOAT64'`, `'NUMERIC'`, `'BIGNUMERIC'`, `'BOOL'`, `'TIMESTAMP'`, or `'DATE'`. + array_type (Union[str, ScalarQueryParameterType, StructQueryParameterType]): + The type of array elements. If given as a string, it must be one of + `'STRING'`, `'INT64'`, `'FLOAT64'`, `'NUMERIC'`, `'BIGNUMERIC'`, + `'BOOL'`, `'TIMESTAMP'`, `'DATE'`, or `'STRUCT'`/`'RECORD'`. + If the type is ``'STRUCT'``/``'RECORD'`` and ``values`` is empty, + the exact item type cannot be deduced, thus a ``StructQueryParameterType`` + instance needs to be passed in. - values (List[appropriate scalar type]): The parameter array values. + values (List[appropriate type]): The parameter array values. Returns: google.cloud.bigquery.query.ArrayQueryParameter: Instance without name @@ -263,22 +511,40 @@ def to_api_repr(self): Dict: JSON mapping """ values = self.values - if self.array_type == "RECORD" or self.array_type == "STRUCT": + + if self.array_type in {"RECORD", "STRUCT"} or isinstance( + self.array_type, StructQueryParameterType + ): reprs = [value.to_api_repr() for value in values] - a_type = reprs[0]["parameterType"] a_values = [repr_["parameterValue"] for repr_ in reprs] + + if reprs: + a_type = reprs[0]["parameterType"] + else: + # This assertion always evaluates to True because the + # constructor disallows STRUCT/RECORD type defined as a + # string with empty values. + assert isinstance(self.array_type, StructQueryParameterType) + a_type = self.array_type.to_api_repr() else: - a_type = {"type": self.array_type} - converter = _SCALAR_VALUE_TO_JSON_PARAM.get(self.array_type) + # Scalar array item type. + if isinstance(self.array_type, str): + a_type = {"type": self.array_type} + else: + a_type = self.array_type.to_api_repr() + + converter = _SCALAR_VALUE_TO_JSON_PARAM.get(a_type["type"]) if converter is not None: values = [converter(value) for value in values] a_values = [{"value": value} for value in values] + resource = { "parameterType": {"type": "ARRAY", "arrayType": a_type}, "parameterValue": {"arrayValues": a_values}, } if self.name is not None: resource["name"] = self.name + return resource def _key(self): @@ -289,7 +555,14 @@ def _key(self): Returns: Tuple: The contents of this :class:`~google.cloud.bigquery.query.ArrayQueryParameter`. """ - return (self.name, self.array_type.upper(), self.values) + if isinstance(self.array_type, str): + item_type = self.array_type + elif isinstance(self.array_type, ScalarQueryParameterType): + item_type = self.array_type._type + else: + item_type = "STRUCT" + + return (self.name, item_type.upper(), self.values) def __eq__(self, other): if not isinstance(other, ArrayQueryParameter): diff --git a/packages/google-cloud-bigquery/tests/system/test_client.py b/packages/google-cloud-bigquery/tests/system/test_client.py index 684a42c3028b..ed48b0bfea06 100644 --- a/packages/google-cloud-bigquery/tests/system/test_client.py +++ b/packages/google-cloud-bigquery/tests/system/test_client.py @@ -2168,7 +2168,9 @@ def test_query_w_query_params(self): from google.cloud.bigquery.job import QueryJobConfig from google.cloud.bigquery.query import ArrayQueryParameter from google.cloud.bigquery.query import ScalarQueryParameter + from google.cloud.bigquery.query import ScalarQueryParameterType from google.cloud.bigquery.query import StructQueryParameter + from google.cloud.bigquery.query import StructQueryParameterType question = "What is the answer to life, the universe, and everything?" question_param = ScalarQueryParameter( @@ -2227,6 +2229,14 @@ def test_query_w_query_params(self): characters_param = ArrayQueryParameter( name=None, array_type="RECORD", values=[phred_param, bharney_param] ) + empty_struct_array_param = ArrayQueryParameter( + name="empty_array_param", + values=[], + array_type=StructQueryParameterType( + ScalarQueryParameterType(name="foo", type_="INT64"), + ScalarQueryParameterType(name="bar", type_="STRING"), + ), + ) hero_param = StructQueryParameter("hero", phred_name_param, phred_age_param) sidekick_param = StructQueryParameter( "sidekick", bharney_name_param, bharney_age_param @@ -2317,6 +2327,11 @@ def test_query_w_query_params(self): ], "query_parameters": [characters_param], }, + { + "sql": "SELECT @empty_array_param", + "expected": [], + "query_parameters": [empty_struct_array_param], + }, { "sql": "SELECT @roles", "expected": { diff --git a/packages/google-cloud-bigquery/tests/unit/test_query.py b/packages/google-cloud-bigquery/tests/unit/test_query.py index ae2c29d09bdb..c8be2911f2fb 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_query.py +++ b/packages/google-cloud-bigquery/tests/unit/test_query.py @@ -43,6 +43,318 @@ def test___eq__(self): self.assertNotEqual(udf, wrong_type) +class Test__AbstractQueryParameterType(unittest.TestCase): + @staticmethod + def _get_target_class(): + from google.cloud.bigquery.query import _AbstractQueryParameterType + + return _AbstractQueryParameterType + + @classmethod + def _make_one(cls, *args, **kw): + return cls._get_target_class()(*args, **kw) + + def test_from_api_virtual(self): + klass = self._get_target_class() + with self.assertRaises(NotImplementedError): + klass.from_api_repr({}) + + def test_to_api_virtual(self): + param_type = self._make_one() + with self.assertRaises(NotImplementedError): + param_type.to_api_repr() + + +class Test_ScalarQueryParameterType(unittest.TestCase): + @staticmethod + def _get_target_class(): + from google.cloud.bigquery.query import ScalarQueryParameterType + + return ScalarQueryParameterType + + def _make_one(self, *args, **kw): + return self._get_target_class()(*args, **kw) + + def test_from_api_repr(self): + klass = self._get_target_class() + result = klass.from_api_repr({"type": "BOOLEAN"}) + self.assertEqual(result._type, "BOOLEAN") + self.assertIsNone(result.name) + self.assertIsNone(result.description) + + def test_to_api_repr(self): + param_type = self._make_one("BYTES", name="foo", description="bar") + result = param_type.to_api_repr() + self.assertEqual(result, {"type": "BYTES"}) + + def test_repr_no_optional_attrs(self): + param_type = self._make_one("BYTES") + self.assertEqual(repr(param_type), "ScalarQueryParameterType('BYTES')") + + def test_repr_all_optional_attrs(self): + param_type = self._make_one("BYTES", name="foo", description="this is foo") + self.assertEqual( + repr(param_type), + "ScalarQueryParameterType('BYTES', name='foo', description='this is foo')", + ) + + +class Test_ArrayQueryParameterType(unittest.TestCase): + @staticmethod + def _get_target_class(): + from google.cloud.bigquery.query import ArrayQueryParameterType + + return ArrayQueryParameterType + + def _make_one(self, *args, **kw): + return self._get_target_class()(*args, **kw) + + def test_from_api_repr(self): + from google.cloud.bigquery.query import StructQueryParameterType + + api_resource = { + "type": "ARRAY", + "arrayType": { + "type": "STRUCT", + "structTypes": [ + { + "name": "weight", + "type": {"type": "INTEGER"}, + "description": "in kg", + }, + {"name": "last_name", "type": {"type": "STRING"}}, + ], + }, + } + + klass = self._get_target_class() + result = klass.from_api_repr(api_resource) + + self.assertIsNone(result.name) + self.assertIsNone(result.description) + item_type = result._array_type + self.assertIsInstance(item_type, StructQueryParameterType) + + self.assertIsNone(item_type.name) + self.assertIsNone(item_type.description) + + field = item_type.fields[0] + self.assertEqual(field.name, "weight") + self.assertEqual(field.description, "in kg") + self.assertEqual(field._type, "INTEGER") + + field = item_type.fields[1] + self.assertEqual(field.name, "last_name") + self.assertIsNone(field.description) + self.assertEqual(field._type, "STRING") + + def test_to_api_repr(self): + from google.cloud.bigquery.query import ScalarQueryParameterType + from google.cloud.bigquery.query import StructQueryParameterType + + array_item_type = StructQueryParameterType( + ScalarQueryParameterType("INTEGER", name="weight", description="in kg"), + ScalarQueryParameterType("STRING", name="last_name"), + ) + param_type = self._make_one(array_item_type, name="foo", description="bar") + + result = param_type.to_api_repr() + + expected_result = { + "type": "ARRAY", + "arrayType": { + "type": "STRUCT", + "structTypes": [ + { + "name": "weight", + "type": {"type": "INTEGER"}, + "description": "in kg", + }, + {"name": "last_name", "type": {"type": "STRING"}}, + ], + }, + } + self.assertEqual(result, expected_result) + + def test_repr_no_optional_attrs(self): + param_type = self._make_one("BOOLEAN") + self.assertEqual(repr(param_type), "ArrayQueryParameterType('BOOLEAN')") + + def test_repr_all_optional_attrs(self): + param_type = self._make_one("INT64", name="bar", description="this is bar") + self.assertEqual( + repr(param_type), + "ArrayQueryParameterType('INT64', name='bar', description='this is bar')", + ) + + +class Test_StructQueryParameterType(unittest.TestCase): + @staticmethod + def _get_target_class(): + from google.cloud.bigquery.query import StructQueryParameterType + + return StructQueryParameterType + + def _make_one(self, *args, **kw): + return self._get_target_class()(*args, **kw) + + def test_raises_error_without_any_fields(self): + with self.assertRaisesRegex(ValueError, ".*at least one field.*"): + self._make_one() + + def test_from_api_repr(self): + from google.cloud.bigquery.query import ArrayQueryParameterType + from google.cloud.bigquery.query import ScalarQueryParameterType + + api_resource = { + "type": "STRUCT", + "structTypes": [ + { + "name": "age", + "type": {"type": "INTEGER"}, + "description": "in years", + }, + { + "name": "aliases", + "type": {"type": "ARRAY", "arrayType": {"type": "STRING"}}, + }, + { + "description": "a nested struct", + "type": { + "type": "STRUCT", + "structTypes": [ + {"type": {"type": "DATE"}, "name": "nested_date"}, + { + "type": {"type": "BOOLEAN"}, + "description": "nested bool field", + }, + ], + }, + }, + ], + } + + klass = self._get_target_class() + result = klass.from_api_repr(api_resource) + + self.assertIsNone(result.name) + self.assertIsNone(result.description) + self.assertEqual(len(result.fields), 3) + + field = result.fields[0] + self.assertIsInstance(field, ScalarQueryParameterType) + self.assertEqual(field.name, "age") + self.assertEqual(field.description, "in years") + + field = result.fields[1] + self.assertIsInstance(field, ArrayQueryParameterType) + self.assertEqual(field.name, "aliases") + self.assertIsNone(field.description) + self.assertIsInstance(field._array_type, ScalarQueryParameterType) + self.assertEqual(field._array_type._type, "STRING") + + field = result.fields[2] + self.assertIsInstance(field, self._get_target_class()) + self.assertIsNone(field.name) + self.assertEqual(field.description, "a nested struct") + + date_field = field.fields[0] + self.assertEqual(date_field._type, "DATE") + self.assertEqual(date_field.name, "nested_date") + self.assertIsNone(date_field.description) + + bool_field = field.fields[1] + self.assertEqual(bool_field._type, "BOOLEAN") + self.assertIsNone(bool_field.name) + self.assertEqual(bool_field.description, "nested bool field") + + def test_to_api_repr(self): + from google.cloud.bigquery.query import ScalarQueryParameterType + + int_type = ScalarQueryParameterType("INTEGER", description="in years") + date_type = ScalarQueryParameterType("DATE", name="day_of_birth") + param_type = self._make_one(int_type, date_type, name="foo", description="bar") + + result = param_type.to_api_repr() + + expected_result = { + "type": "STRUCT", + "structTypes": [ + {"type": {"type": "INTEGER"}, "description": "in years"}, + {"name": "day_of_birth", "type": {"type": "DATE"}}, + ], + } + self.assertEqual(result, expected_result) + + def test_to_api_repr_nested(self): + from google.cloud.bigquery.query import ScalarQueryParameterType + + struct_class = self._get_target_class() + + int_type = ScalarQueryParameterType("INTEGER", description="in years") + nested_struct_type = struct_class( + ScalarQueryParameterType("DATE", name="nested_date"), + ScalarQueryParameterType("BOOLEAN", description="nested bool field"), + name="nested", + ) + param_type = self._make_one( + int_type, nested_struct_type, name="foo", description="bar" + ) + + result = param_type.to_api_repr() + + expected_result = { + "type": "STRUCT", + "structTypes": [ + {"type": {"type": "INTEGER"}, "description": "in years"}, + { + "name": "nested", + "type": { + "type": "STRUCT", + "structTypes": [ + {"type": {"type": "DATE"}, "name": "nested_date"}, + { + "type": {"type": "BOOLEAN"}, + "description": "nested bool field", + }, + ], + }, + }, + ], + } + self.assertEqual(result, expected_result) + + def test_repr_no_optional_attrs(self): + from google.cloud.bigquery.query import ScalarQueryParameterType + + param_type = self._make_one( + ScalarQueryParameterType("BOOLEAN"), ScalarQueryParameterType("STRING") + ) + expected = ( + "StructQueryParameterType(" + "ScalarQueryParameterType('BOOLEAN'), ScalarQueryParameterType('STRING')" + ")" + ) + self.assertEqual(repr(param_type), expected) + + def test_repr_all_optional_attrs(self): + from google.cloud.bigquery.query import ScalarQueryParameterType + + param_type = self._make_one( + ScalarQueryParameterType("BOOLEAN"), + ScalarQueryParameterType("STRING"), + name="data_record", + description="this is it", + ) + expected = ( + "StructQueryParameterType(" + "ScalarQueryParameterType('BOOLEAN'), ScalarQueryParameterType('STRING'), " + "name='data_record', description='this is it'" + ")" + ) + self.assertEqual(repr(param_type), expected) + + class Test__AbstractQueryParameter(unittest.TestCase): @staticmethod def _get_target_class(): @@ -340,6 +652,10 @@ def test_ctor(self): self.assertEqual(param.array_type, "INT64") self.assertEqual(param.values, [1, 2]) + def test_ctor_empty_struct_array_wo_type_info(self): + with self.assertRaisesRegex(ValueError, r"(?i)missing.*struct.*type info.*"): + self._make_one(name="foo", array_type="STRUCT", values=[]) + def test___eq__(self): param = self._make_one(name="foo", array_type="INT64", values=[123]) self.assertEqual(param, param) @@ -467,6 +783,19 @@ def test_to_api_repr_wo_name(self): param = klass.positional(array_type="INT64", values=[1, 2]) self.assertEqual(param.to_api_repr(), EXPECTED) + def test_to_api_repr_array_type_as_type_instance(self): + from google.cloud.bigquery.query import ScalarQueryParameterType + + EXPECTED = { + "parameterType": {"type": "ARRAY", "arrayType": {"type": "BOOLEAN"}}, + "parameterValue": {"arrayValues": [{"value": "true"}, {"value": "false"}]}, + } + klass = self._get_target_class() + param = klass.positional( + array_type=ScalarQueryParameterType("BOOLEAN"), values=[True, False], + ) + self.assertEqual(param.to_api_repr(), EXPECTED) + def test_to_api_repr_w_unknown_type(self): EXPECTED = { "parameterType": {"type": "ARRAY", "arrayType": {"type": "UNKNOWN"}}, @@ -503,6 +832,31 @@ def test_to_api_repr_w_record_type(self): param = klass.positional(array_type="RECORD", values=[struct]) self.assertEqual(param.to_api_repr(), EXPECTED) + def test_to_api_repr_w_empty_array_of_records_type(self): + from google.cloud.bigquery.query import ScalarQueryParameterType + from google.cloud.bigquery.query import StructQueryParameterType + + EXPECTED = { + "parameterType": { + "type": "ARRAY", + "arrayType": { + "type": "STRUCT", + "structTypes": [ + {"name": "foo", "type": {"type": "STRING"}}, + {"name": "bar", "type": {"type": "INT64"}}, + ], + }, + }, + "parameterValue": {"arrayValues": []}, + } + item_type = StructQueryParameterType( + ScalarQueryParameterType("STRING", name="foo"), + ScalarQueryParameterType("INT64", name="bar"), + ) + klass = self._get_target_class() + param = klass.positional(array_type=item_type, values=[]) + self.assertEqual(param.to_api_repr(), EXPECTED) + def test___eq___wrong_type(self): field = self._make_one("test", "STRING", ["value"]) other = object() @@ -547,11 +901,38 @@ def test___ne___different_values(self): field2 = self._make_one("test", "INT64", [12]) self.assertNotEqual(field1, field2) - def test___repr__(self): + def test___repr__array_type_str(self): field1 = self._make_one("field1", "STRING", ["value"]) expected = "ArrayQueryParameter('field1', 'STRING', ['value'])" self.assertEqual(repr(field1), expected) + def test___repr__array_type_scalar_type_instance(self): + from google.cloud.bigquery.query import ScalarQueryParameterType + + int_items = self._make_one( + "int_items", ScalarQueryParameterType("INTEGER"), [64] + ) + expected = "ArrayQueryParameter('int_items', 'INTEGER', [64])" + self.assertEqual(repr(int_items), expected) + + def test___repr__array_type_struct_type_instance(self): + from google.cloud.bigquery.query import ScalarQueryParameterType + from google.cloud.bigquery.query import StructQueryParameterType + + struct_items = self._make_one( + "struct_items", + StructQueryParameterType( + ScalarQueryParameterType("INTEGER", name="age"), + ScalarQueryParameterType("STRING", name="last_name"), + ), + [{"age": 18, "last_name": "Doe"}], + ) + expected = ( + "ArrayQueryParameter('struct_items', 'STRUCT', " + "[{'age': 18, 'last_name': 'Doe'}])" + ) + self.assertEqual(repr(struct_items), expected) + class Test_StructQueryParameter(unittest.TestCase): @staticmethod From b901a14d2c79fa6fdcdab43dfa506a1d5de35e70 Mon Sep 17 00:00:00 2001 From: Yoshi Automation Bot Date: Thu, 25 Feb 2021 08:42:04 -0800 Subject: [PATCH 1074/2016] chore: exclude tarball from code generation (#512) This PR was generated using Autosynth. :rainbow: Synth log will be available here: https://source.cloud.google.com/results/invocations/ca115e36-5d95-4acd-a2d8-7ac2f22a7261/targets - [x] To automatically regenerate this PR, check this box. --- packages/google-cloud-bigquery/synth.metadata | 3 +-- packages/google-cloud-bigquery/synth.py | 1 + 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/packages/google-cloud-bigquery/synth.metadata b/packages/google-cloud-bigquery/synth.metadata index dc183a72e087..9412653c65a1 100644 --- a/packages/google-cloud-bigquery/synth.metadata +++ b/packages/google-cloud-bigquery/synth.metadata @@ -4,7 +4,7 @@ "git": { "name": ".", "remote": "https://github.com/googleapis/python-bigquery.git", - "sha": "1c6681aba872c00afb16a904a2ba9bae8e9618d3" + "sha": "1823cadee3acf95c516d0479400e4175349ea199" } }, { @@ -93,7 +93,6 @@ "CONTRIBUTING.rst", "LICENSE", "MANIFEST.in", - "bigquery-v2-py.tar.gz", "docs/_static/custom.css", "docs/_templates/layout.html", "docs/bigquery_v2/model_service.rst", diff --git a/packages/google-cloud-bigquery/synth.py b/packages/google-cloud-bigquery/synth.py index 3ab271c96375..3c64406001ca 100644 --- a/packages/google-cloud-bigquery/synth.py +++ b/packages/google-cloud-bigquery/synth.py @@ -32,6 +32,7 @@ s.move( library, excludes=[ + "*.tar.gz", "docs/index.rst", "docs/bigquery_v2/*_service.rst", "docs/bigquery_v2/services.rst", From e0200fa3cc21e06afc43adf706317c60e34db88c Mon Sep 17 00:00:00 2001 From: Peter Lamut Date: Thu, 25 Feb 2021 19:32:01 +0100 Subject: [PATCH 1075/2016] fix: QueryJob.exception() *returns* the errors, not raises them (#467) * fix: QueryJob.exception() should *return* errors * Reload query job on error, raise any reload errors * Catch errors on reloading failed query jobs * Add additional unit test * Increase retry deadline to mitigate test flakiness * Store the more informative exception in done() --- .../google/cloud/bigquery/job/query.py | 26 +++++- .../tests/unit/job/test_base.py | 2 +- .../tests/unit/job/test_query.py | 83 ++++++++++++++++++- 3 files changed, 104 insertions(+), 7 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/job/query.py b/packages/google-cloud-bigquery/google/cloud/bigquery/job/query.py index b3ca8d940222..5c1118500c27 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/job/query.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/job/query.py @@ -989,7 +989,8 @@ def done(self, retry=DEFAULT_RETRY, timeout=None, reload=True): unfinished jobs before checking. Default ``True``. Returns: - bool: True if the job is complete, False otherwise. + bool: ``True`` if the job is complete or if fetching its status resulted in + an error, ``False`` otherwise. """ # Do not refresh if the state is already done, as the job will not # change once complete. @@ -997,17 +998,34 @@ def done(self, retry=DEFAULT_RETRY, timeout=None, reload=True): if not reload or is_done: return is_done - self._reload_query_results(retry=retry, timeout=timeout) - # If an explicit timeout is not given, fall back to the transport timeout # stored in _blocking_poll() in the process of polling for job completion. transport_timeout = timeout if timeout is not None else self._transport_timeout + try: + self._reload_query_results(retry=retry, timeout=transport_timeout) + except exceptions.GoogleAPIError as exc: + # Reloading also updates error details on self, thus no need for an + # explicit self.set_exception() call if reloading succeeds. + try: + self.reload(retry=retry, timeout=transport_timeout) + except exceptions.GoogleAPIError: + # Use the query results reload exception, as it generally contains + # much more useful error information. + self.set_exception(exc) + return True + else: + return self.state == _DONE_STATE + # Only reload the job once we know the query is complete. # This will ensure that fields such as the destination table are # correctly populated. if self._query_results.complete: - self.reload(retry=retry, timeout=transport_timeout) + try: + self.reload(retry=retry, timeout=transport_timeout) + except exceptions.GoogleAPIError as exc: + self.set_exception(exc) + return True return self.state == _DONE_STATE diff --git a/packages/google-cloud-bigquery/tests/unit/job/test_base.py b/packages/google-cloud-bigquery/tests/unit/job/test_base.py index bbeffba50e38..405ad6ee58e7 100644 --- a/packages/google-cloud-bigquery/tests/unit/job/test_base.py +++ b/packages/google-cloud-bigquery/tests/unit/job/test_base.py @@ -967,7 +967,7 @@ def test_result_w_retry_wo_state(self): custom_predicate = mock.Mock() custom_predicate.return_value = True custom_retry = google.api_core.retry.Retry( - predicate=custom_predicate, initial=0.001, maximum=0.001, deadline=0.001, + predicate=custom_predicate, initial=0.001, maximum=0.001, deadline=0.1, ) self.assertIs(job.result(retry=custom_retry), job) diff --git a/packages/google-cloud-bigquery/tests/unit/job/test_query.py b/packages/google-cloud-bigquery/tests/unit/job/test_query.py index a4ab11ab6ad6..655a121e65f2 100644 --- a/packages/google-cloud-bigquery/tests/unit/job/test_query.py +++ b/packages/google-cloud-bigquery/tests/unit/job/test_query.py @@ -16,6 +16,7 @@ import copy import http import textwrap +import types import freezegun from google.api_core import exceptions @@ -308,7 +309,7 @@ def test_cancelled(self): self.assertTrue(job.cancelled()) - def test_done(self): + def test_done_job_complete(self): client = _make_client(project=self.PROJECT) resource = self._make_resource(ended=True) job = self._get_target_class().from_api_repr(resource, client) @@ -356,6 +357,84 @@ def test_done_w_timeout_and_longer_internal_api_timeout(self): call_args = fake_reload.call_args self.assertAlmostEqual(call_args.kwargs.get("timeout"), expected_timeout) + def test_done_w_query_results_error_reload_ok_job_finished(self): + client = _make_client(project=self.PROJECT) + bad_request_error = exceptions.BadRequest("Error in query") + client._get_query_results = mock.Mock(side_effect=bad_request_error) + + resource = self._make_resource(ended=False) + job = self._get_target_class().from_api_repr(resource, client) + job._exception = None + + def fake_reload(self, *args, **kwargs): + self._properties["status"]["state"] = "DONE" + self.set_exception(copy.copy(bad_request_error)) + + fake_reload_method = types.MethodType(fake_reload, job) + + with mock.patch.object(job, "reload", new=fake_reload_method): + is_done = job.done() + + assert is_done + assert isinstance(job._exception, exceptions.BadRequest) + + def test_done_w_query_results_error_reload_ok_job_still_running(self): + client = _make_client(project=self.PROJECT) + retry_error = exceptions.RetryError("Too many retries", cause=TimeoutError) + client._get_query_results = mock.Mock(side_effect=retry_error) + + resource = self._make_resource(ended=False) + job = self._get_target_class().from_api_repr(resource, client) + job._exception = None + + def fake_reload(self, *args, **kwargs): + self._properties["status"]["state"] = "RUNNING" + + fake_reload_method = types.MethodType(fake_reload, job) + + with mock.patch.object(job, "reload", new=fake_reload_method): + is_done = job.done() + + assert not is_done + assert job._exception is None + + def test_done_w_query_results_error_reload_error(self): + client = _make_client(project=self.PROJECT) + bad_request_error = exceptions.BadRequest("Error in query") + client._get_query_results = mock.Mock(side_effect=bad_request_error) + + resource = self._make_resource(ended=False) + job = self._get_target_class().from_api_repr(resource, client) + reload_error = exceptions.DataLoss("Oops, sorry!") + job.reload = mock.Mock(side_effect=reload_error) + job._exception = None + + is_done = job.done() + + assert is_done + assert job._exception is bad_request_error + + def test_done_w_job_query_results_ok_reload_error(self): + client = _make_client(project=self.PROJECT) + query_results = google.cloud.bigquery.query._QueryResults( + properties={ + "jobComplete": True, + "jobReference": {"projectId": self.PROJECT, "jobId": "12345"}, + } + ) + client._get_query_results = mock.Mock(return_value=query_results) + + resource = self._make_resource(ended=False) + job = self._get_target_class().from_api_repr(resource, client) + retry_error = exceptions.RetryError("Too many retries", cause=TimeoutError) + job.reload = mock.Mock(side_effect=retry_error) + job._exception = None + + is_done = job.done() + + assert is_done + assert job._exception is retry_error + def test_query_plan(self): from google.cloud._helpers import _RFC3339_MICROS from google.cloud.bigquery.job import QueryPlanEntry @@ -973,7 +1052,7 @@ def test_result_w_retry(self): initial=0.001, maximum=0.001, multiplier=1.0, - deadline=0.001, + deadline=0.1, predicate=custom_predicate, ) From 797bf3954ecb5c5e5cec27b14bb86e9c8c929d41 Mon Sep 17 00:00:00 2001 From: "release-please[bot]" <55107282+release-please[bot]@users.noreply.github.com> Date: Thu, 25 Feb 2021 18:46:03 +0000 Subject: [PATCH 1076/2016] chore: release 2.10.0 (#533) :robot: I have created a release \*beep\* \*boop\* --- ## [2.10.0](https://www.github.com/googleapis/python-bigquery/compare/v2.9.0...v2.10.0) (2021-02-25) ### Features * add BIGNUMERIC support ([#527](https://www.github.com/googleapis/python-bigquery/issues/527)) ([cc3394f](https://www.github.com/googleapis/python-bigquery/commit/cc3394f80934419eb00c2029bb81c92a696e7d88)) ### Bug Fixes * error using empty array of structs parameter ([#474](https://www.github.com/googleapis/python-bigquery/issues/474)) ([c1d15f4](https://www.github.com/googleapis/python-bigquery/commit/c1d15f4e5da4b7e10c00afffd59a5c7f3ded027a)) * QueryJob.exception() *returns* the errors, not raises them ([#467](https://www.github.com/googleapis/python-bigquery/issues/467)) ([d763279](https://www.github.com/googleapis/python-bigquery/commit/d7632799769248b09a8558ba18f5025ebdd9675a)) ### Documentation * **bigquery:** Add alternative approach to setting credentials ([#517](https://www.github.com/googleapis/python-bigquery/issues/517)) ([60fbf28](https://www.github.com/googleapis/python-bigquery/commit/60fbf287b0d34d5db2e61cce7a5b42735ed43d0e)) * explain retry behavior for DONE jobs ([#532](https://www.github.com/googleapis/python-bigquery/issues/532)) ([696c443](https://www.github.com/googleapis/python-bigquery/commit/696c443f0a6740be0767e12b706a7771bc1460c3)) --- This PR was generated with [Release Please](https://github.com/googleapis/release-please). See [documentation](https://github.com/googleapis/release-please#release-please). --- packages/google-cloud-bigquery/CHANGELOG.md | 19 +++++++++++++++++++ .../google/cloud/bigquery/version.py | 2 +- 2 files changed, 20 insertions(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/CHANGELOG.md b/packages/google-cloud-bigquery/CHANGELOG.md index 51fad831e074..9afd523a4583 100644 --- a/packages/google-cloud-bigquery/CHANGELOG.md +++ b/packages/google-cloud-bigquery/CHANGELOG.md @@ -4,6 +4,25 @@ [1]: https://pypi.org/project/google-cloud-bigquery/#history +## [2.10.0](https://www.github.com/googleapis/python-bigquery/compare/v2.9.0...v2.10.0) (2021-02-25) + + +### Features + +* add BIGNUMERIC support ([#527](https://www.github.com/googleapis/python-bigquery/issues/527)) ([cc3394f](https://www.github.com/googleapis/python-bigquery/commit/cc3394f80934419eb00c2029bb81c92a696e7d88)) + + +### Bug Fixes + +* error using empty array of structs parameter ([#474](https://www.github.com/googleapis/python-bigquery/issues/474)) ([c1d15f4](https://www.github.com/googleapis/python-bigquery/commit/c1d15f4e5da4b7e10c00afffd59a5c7f3ded027a)) +* QueryJob.exception() *returns* the errors, not raises them ([#467](https://www.github.com/googleapis/python-bigquery/issues/467)) ([d763279](https://www.github.com/googleapis/python-bigquery/commit/d7632799769248b09a8558ba18f5025ebdd9675a)) + + +### Documentation + +* **bigquery:** Add alternative approach to setting credentials ([#517](https://www.github.com/googleapis/python-bigquery/issues/517)) ([60fbf28](https://www.github.com/googleapis/python-bigquery/commit/60fbf287b0d34d5db2e61cce7a5b42735ed43d0e)) +* explain retry behavior for DONE jobs ([#532](https://www.github.com/googleapis/python-bigquery/issues/532)) ([696c443](https://www.github.com/googleapis/python-bigquery/commit/696c443f0a6740be0767e12b706a7771bc1460c3)) + ## [2.9.0](https://www.github.com/googleapis/python-bigquery/compare/v2.8.0...v2.9.0) (2021-02-18) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/version.py b/packages/google-cloud-bigquery/google/cloud/bigquery/version.py index b2a8c5535e20..13e710fccca2 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/version.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/version.py @@ -12,4 +12,4 @@ # See the License for the specific language governing permissions and # limitations under the License. -__version__ = "2.9.0" +__version__ = "2.10.0" From 9412ed6d500201b07e119f94f877a7af173bd76d Mon Sep 17 00:00:00 2001 From: WhiteSource Renovate Date: Mon, 1 Mar 2021 17:36:08 +0100 Subject: [PATCH 1077/2016] chore(deps): update dependency google-cloud-bigquery to v2.10.0 (#535) --- .../google-cloud-bigquery/samples/geography/requirements.txt | 2 +- .../google-cloud-bigquery/samples/snippets/requirements.txt | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/packages/google-cloud-bigquery/samples/geography/requirements.txt b/packages/google-cloud-bigquery/samples/geography/requirements.txt index e9fcfca03605..8ff7fa850ec2 100644 --- a/packages/google-cloud-bigquery/samples/geography/requirements.txt +++ b/packages/google-cloud-bigquery/samples/geography/requirements.txt @@ -1,3 +1,3 @@ geojson==2.5.0 -google-cloud-bigquery==2.9.0 +google-cloud-bigquery==2.10.0 Shapely==1.7.1 diff --git a/packages/google-cloud-bigquery/samples/snippets/requirements.txt b/packages/google-cloud-bigquery/samples/snippets/requirements.txt index c638178fce46..150fe2993a34 100644 --- a/packages/google-cloud-bigquery/samples/snippets/requirements.txt +++ b/packages/google-cloud-bigquery/samples/snippets/requirements.txt @@ -1,4 +1,4 @@ -google-cloud-bigquery==2.9.0 +google-cloud-bigquery==2.10.0 google-cloud-bigquery-storage==2.3.0 google-auth-oauthlib==0.4.2 grpcio==1.35.0 From 7e86d3fa5652f93c79c78fdf9f388bb8f9f1f3a4 Mon Sep 17 00:00:00 2001 From: WhiteSource Renovate Date: Mon, 1 Mar 2021 17:36:31 +0100 Subject: [PATCH 1078/2016] chore(deps): update dependency grpcio to v1.36.0 (#536) --- .../google-cloud-bigquery/samples/snippets/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/samples/snippets/requirements.txt b/packages/google-cloud-bigquery/samples/snippets/requirements.txt index 150fe2993a34..d645d8a1da45 100644 --- a/packages/google-cloud-bigquery/samples/snippets/requirements.txt +++ b/packages/google-cloud-bigquery/samples/snippets/requirements.txt @@ -1,7 +1,7 @@ google-cloud-bigquery==2.10.0 google-cloud-bigquery-storage==2.3.0 google-auth-oauthlib==0.4.2 -grpcio==1.35.0 +grpcio==1.36.0 ipython==7.16.1; python_version < '3.7' ipython==7.17.0; python_version >= '3.7' matplotlib==3.3.4 From 25cd6b7c9310a1ec14ae9d5856f7741415917cbe Mon Sep 17 00:00:00 2001 From: WhiteSource Renovate Date: Thu, 4 Mar 2021 00:24:05 +0100 Subject: [PATCH 1079/2016] chore(deps): update dependency grpcio to v1.36.1 (#541) [![WhiteSource Renovate](https://app.renovatebot.com/images/banner.svg)](https://renovatebot.com) This PR contains the following updates: | Package | Change | Age | Adoption | Passing | Confidence | |---|---|---|---|---|---| | [grpcio](https://grpc.io) | `==1.36.0` -> `==1.36.1` | [![age](https://badges.renovateapi.com/packages/pypi/grpcio/1.36.1/age-slim)](https://docs.renovatebot.com/merge-confidence/) | [![adoption](https://badges.renovateapi.com/packages/pypi/grpcio/1.36.1/adoption-slim)](https://docs.renovatebot.com/merge-confidence/) | [![passing](https://badges.renovateapi.com/packages/pypi/grpcio/1.36.1/compatibility-slim/1.36.0)](https://docs.renovatebot.com/merge-confidence/) | [![confidence](https://badges.renovateapi.com/packages/pypi/grpcio/1.36.1/confidence-slim/1.36.0)](https://docs.renovatebot.com/merge-confidence/) | --- ### Renovate configuration :date: **Schedule**: At any time (no schedule defined). :vertical_traffic_light: **Automerge**: Disabled by config. Please merge this manually once you are satisfied. :recycle: **Rebasing**: Whenever PR becomes conflicted, or you tick the rebase/retry checkbox. :no_bell: **Ignore**: Close this PR and you won't be reminded about this update again. --- - [ ] If you want to rebase/retry this PR, check this box --- This PR has been generated by [WhiteSource Renovate](https://renovate.whitesourcesoftware.com). View repository job log [here](https://app.renovatebot.com/dashboard#github/googleapis/python-bigquery). --- .../google-cloud-bigquery/samples/snippets/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/samples/snippets/requirements.txt b/packages/google-cloud-bigquery/samples/snippets/requirements.txt index d645d8a1da45..a80b7fa05962 100644 --- a/packages/google-cloud-bigquery/samples/snippets/requirements.txt +++ b/packages/google-cloud-bigquery/samples/snippets/requirements.txt @@ -1,7 +1,7 @@ google-cloud-bigquery==2.10.0 google-cloud-bigquery-storage==2.3.0 google-auth-oauthlib==0.4.2 -grpcio==1.36.0 +grpcio==1.36.1 ipython==7.16.1; python_version < '3.7' ipython==7.17.0; python_version >= '3.7' matplotlib==3.3.4 From ea22345f27f977bbf31f3f32af19f5f84f23e04f Mon Sep 17 00:00:00 2001 From: WhiteSource Renovate Date: Fri, 5 Mar 2021 05:06:51 +0100 Subject: [PATCH 1080/2016] chore(deps): update dependency google-auth-oauthlib to v0.4.3 (#542) --- .../google-cloud-bigquery/samples/snippets/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/samples/snippets/requirements.txt b/packages/google-cloud-bigquery/samples/snippets/requirements.txt index a80b7fa05962..8ccbec38e8bf 100644 --- a/packages/google-cloud-bigquery/samples/snippets/requirements.txt +++ b/packages/google-cloud-bigquery/samples/snippets/requirements.txt @@ -1,6 +1,6 @@ google-cloud-bigquery==2.10.0 google-cloud-bigquery-storage==2.3.0 -google-auth-oauthlib==0.4.2 +google-auth-oauthlib==0.4.3 grpcio==1.36.1 ipython==7.16.1; python_version < '3.7' ipython==7.17.0; python_version >= '3.7' From c6f5302aeb80bc065d7cd9a03440b5263219a1f9 Mon Sep 17 00:00:00 2001 From: Yoshi Automation Bot Date: Tue, 9 Mar 2021 11:57:40 -0800 Subject: [PATCH 1081/2016] chore: upgrade gapic-generator-python to 0.42.2 (#543) PiperOrigin-RevId: 361662015 Source-Author: Google APIs Source-Date: Mon Mar 8 14:47:18 2021 -0800 Source-Repo: googleapis/googleapis Source-Sha: 28a591963253d52ce3a25a918cafbdd9928de8cf Source-Link: https://github.com/googleapis/googleapis/commit/28a591963253d52ce3a25a918cafbdd9928de8cf --- .../cloud/bigquery_v2/types/__init__.py | 28 +++++++++---------- packages/google-cloud-bigquery/synth.metadata | 10 +++---- 2 files changed, 18 insertions(+), 20 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery_v2/types/__init__.py b/packages/google-cloud-bigquery/google/cloud/bigquery_v2/types/__init__.py index 00dc837c9bd4..b76e65c65b25 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery_v2/types/__init__.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery_v2/types/__init__.py @@ -16,6 +16,14 @@ # from .encryption_config import EncryptionConfiguration +from .model import ( + DeleteModelRequest, + GetModelRequest, + ListModelsRequest, + ListModelsResponse, + Model, + PatchModelRequest, +) from .model_reference import ModelReference from .standard_sql import ( StandardSqlDataType, @@ -23,26 +31,18 @@ StandardSqlStructType, ) from .table_reference import TableReference -from .model import ( - Model, - GetModelRequest, - PatchModelRequest, - DeleteModelRequest, - ListModelsRequest, - ListModelsResponse, -) __all__ = ( "EncryptionConfiguration", + "DeleteModelRequest", + "GetModelRequest", + "ListModelsRequest", + "ListModelsResponse", + "Model", + "PatchModelRequest", "ModelReference", "StandardSqlDataType", "StandardSqlField", "StandardSqlStructType", "TableReference", - "Model", - "GetModelRequest", - "PatchModelRequest", - "DeleteModelRequest", - "ListModelsRequest", - "ListModelsResponse", ) diff --git a/packages/google-cloud-bigquery/synth.metadata b/packages/google-cloud-bigquery/synth.metadata index 9412653c65a1..cab985521217 100644 --- a/packages/google-cloud-bigquery/synth.metadata +++ b/packages/google-cloud-bigquery/synth.metadata @@ -4,15 +4,15 @@ "git": { "name": ".", "remote": "https://github.com/googleapis/python-bigquery.git", - "sha": "1823cadee3acf95c516d0479400e4175349ea199" + "sha": "f0259eb7ed4ff254ee238e87651992ff93481dae" } }, { "git": { "name": "googleapis", "remote": "https://github.com/googleapis/googleapis.git", - "sha": "e13001be33d69042a9505e698f792587a804a5cf", - "internalRef": "358152223" + "sha": "28a591963253d52ce3a25a918cafbdd9928de8cf", + "internalRef": "361662015" } }, { @@ -95,8 +95,6 @@ "MANIFEST.in", "docs/_static/custom.css", "docs/_templates/layout.html", - "docs/bigquery_v2/model_service.rst", - "docs/bigquery_v2/services.rst", "docs/bigquery_v2/types.rst", "docs/conf.py", "google/cloud/bigquery_v2/__init__.py", @@ -128,4 +126,4 @@ "setup.cfg", "testing/.gitignore" ] -} +} \ No newline at end of file From 6da8b5f3c33bf542d8ac144452f00979a2d5e304 Mon Sep 17 00:00:00 2001 From: Peter Lamut Date: Tue, 9 Mar 2021 21:02:56 +0100 Subject: [PATCH 1082/2016] feat: add context manager support to client (#540) --- .../google/cloud/bigquery/client.py | 6 +++++ .../tests/unit/test_client.py | 22 +++++++++++++++++++ 2 files changed, 28 insertions(+) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py index f8c0d7c93ea8..bdbcb767cb70 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py @@ -3423,6 +3423,12 @@ def schema_to_json(self, schema_list, destination): with open(destination, mode="w") as file_obj: return self._schema_to_json_file_object(json_schema_list, file_obj) + def __enter__(self): + return self + + def __exit__(self, exc_type, exc_value, traceback): + self.close() + # pylint: disable=unused-argument def _item_to_project(iterator, resource): diff --git a/packages/google-cloud-bigquery/tests/unit/test_client.py b/packages/google-cloud-bigquery/tests/unit/test_client.py index 66add9c0a034..6c3263ea5ba5 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_client.py +++ b/packages/google-cloud-bigquery/tests/unit/test_client.py @@ -7218,6 +7218,28 @@ def test_list_rows_error(self): with self.assertRaises(TypeError): client.list_rows(1) + def test_context_manager_enter_returns_itself(self): + creds = _make_credentials() + http = object() + client = self._make_one(project=self.PROJECT, credentials=creds, _http=http) + + with mock.patch.object(client, "close"), client as context_var: + pass + + self.assertIs(client, context_var) + + def test_context_manager_exit_closes_client(self): + creds = _make_credentials() + http = object() + client = self._make_one(project=self.PROJECT, credentials=creds, _http=http) + + fake_close = mock.Mock() + with mock.patch.object(client, "close", fake_close): + with client: + pass + + fake_close.assert_called_once() + class Test_make_job_id(unittest.TestCase): def _call_fut(self, job_id, prefix=None): From 1a686173a43d440db7d09e9eff9bda9611ca3cfd Mon Sep 17 00:00:00 2001 From: "release-please[bot]" <55107282+release-please[bot]@users.noreply.github.com> Date: Tue, 9 Mar 2021 20:16:09 +0000 Subject: [PATCH 1083/2016] chore: release 2.11.0 (#545) :robot: I have created a release \*beep\* \*boop\* --- ## [2.11.0](https://www.github.com/googleapis/python-bigquery/compare/v2.10.0...v2.11.0) (2021-03-09) ### Features * add context manager support to client ([#540](https://www.github.com/googleapis/python-bigquery/issues/540)) ([d5c7e11](https://www.github.com/googleapis/python-bigquery/commit/d5c7e11a1dc2a149d74294bfadbae62d70573e69)) --- This PR was generated with [Release Please](https://github.com/googleapis/release-please). See [documentation](https://github.com/googleapis/release-please#release-please). --- packages/google-cloud-bigquery/CHANGELOG.md | 7 +++++++ .../google-cloud-bigquery/google/cloud/bigquery/version.py | 2 +- 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/CHANGELOG.md b/packages/google-cloud-bigquery/CHANGELOG.md index 9afd523a4583..512d38108a57 100644 --- a/packages/google-cloud-bigquery/CHANGELOG.md +++ b/packages/google-cloud-bigquery/CHANGELOG.md @@ -4,6 +4,13 @@ [1]: https://pypi.org/project/google-cloud-bigquery/#history +## [2.11.0](https://www.github.com/googleapis/python-bigquery/compare/v2.10.0...v2.11.0) (2021-03-09) + + +### Features + +* add context manager support to client ([#540](https://www.github.com/googleapis/python-bigquery/issues/540)) ([d5c7e11](https://www.github.com/googleapis/python-bigquery/commit/d5c7e11a1dc2a149d74294bfadbae62d70573e69)) + ## [2.10.0](https://www.github.com/googleapis/python-bigquery/compare/v2.9.0...v2.10.0) (2021-02-25) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/version.py b/packages/google-cloud-bigquery/google/cloud/bigquery/version.py index 13e710fccca2..e6e357434ce4 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/version.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/version.py @@ -12,4 +12,4 @@ # See the License for the specific language governing permissions and # limitations under the License. -__version__ = "2.10.0" +__version__ = "2.11.0" From 4a51b0099911f93872735f8d80869123ce1bab40 Mon Sep 17 00:00:00 2001 From: Peter Lamut Date: Wed, 10 Mar 2021 21:31:51 +0100 Subject: [PATCH 1084/2016] feat: make QueryJob.done() method more performant (#544) --- .../google/cloud/bigquery/job/query.py | 91 +++++++------------ .../tests/unit/job/test_query.py | 54 ++--------- 2 files changed, 45 insertions(+), 100 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/job/query.py b/packages/google-cloud-bigquery/google/cloud/bigquery/job/query.py index 5c1118500c27..491983f8ebfb 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/job/query.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/job/query.py @@ -19,6 +19,7 @@ import re from google.api_core import exceptions +from google.api_core.future import polling as polling_future import requests from google.cloud.bigquery.dataset import Dataset @@ -42,7 +43,6 @@ from google.cloud.bigquery._tqdm_helpers import wait_for_query from google.cloud.bigquery.job.base import _AsyncJob -from google.cloud.bigquery.job.base import _DONE_STATE from google.cloud.bigquery.job.base import _JobConfig from google.cloud.bigquery.job.base import _JobReference @@ -974,61 +974,6 @@ def estimated_bytes_processed(self): result = int(result) return result - def done(self, retry=DEFAULT_RETRY, timeout=None, reload=True): - """Refresh the job and checks if it is complete. - - Args: - retry (Optional[google.api_core.retry.Retry]): - How to retry the call that retrieves query results. If the job state is - ``DONE``, retrying is aborted early, as the job will not change anymore. - timeout (Optional[float]): - The number of seconds to wait for the underlying HTTP transport - before using ``retry``. - reload (Optional[bool]): - If ``True``, make an API call to refresh the job state of - unfinished jobs before checking. Default ``True``. - - Returns: - bool: ``True`` if the job is complete or if fetching its status resulted in - an error, ``False`` otherwise. - """ - # Do not refresh if the state is already done, as the job will not - # change once complete. - is_done = self.state == _DONE_STATE - if not reload or is_done: - return is_done - - # If an explicit timeout is not given, fall back to the transport timeout - # stored in _blocking_poll() in the process of polling for job completion. - transport_timeout = timeout if timeout is not None else self._transport_timeout - - try: - self._reload_query_results(retry=retry, timeout=transport_timeout) - except exceptions.GoogleAPIError as exc: - # Reloading also updates error details on self, thus no need for an - # explicit self.set_exception() call if reloading succeeds. - try: - self.reload(retry=retry, timeout=transport_timeout) - except exceptions.GoogleAPIError: - # Use the query results reload exception, as it generally contains - # much more useful error information. - self.set_exception(exc) - return True - else: - return self.state == _DONE_STATE - - # Only reload the job once we know the query is complete. - # This will ensure that fields such as the destination table are - # correctly populated. - if self._query_results.complete: - try: - self.reload(retry=retry, timeout=transport_timeout) - except exceptions.GoogleAPIError as exc: - self.set_exception(exc) - return True - - return self.state == _DONE_STATE - def _blocking_poll(self, timeout=None, **kwargs): self._done_timeout = timeout self._transport_timeout = timeout @@ -1130,6 +1075,40 @@ def _reload_query_results(self, retry=DEFAULT_RETRY, timeout=None): timeout=transport_timeout, ) + def _done_or_raise(self, retry=DEFAULT_RETRY, timeout=None): + """Check if the query has finished running and raise if it's not. + + If the query has finished, also reload the job itself. + """ + # If an explicit timeout is not given, fall back to the transport timeout + # stored in _blocking_poll() in the process of polling for job completion. + transport_timeout = timeout if timeout is not None else self._transport_timeout + + try: + self._reload_query_results(retry=retry, timeout=transport_timeout) + except exceptions.GoogleAPIError as exc: + # Reloading also updates error details on self, thus no need for an + # explicit self.set_exception() call if reloading succeeds. + try: + self.reload(retry=retry, timeout=transport_timeout) + except exceptions.GoogleAPIError: + # Use the query results reload exception, as it generally contains + # much more useful error information. + self.set_exception(exc) + finally: + return + + # Only reload the job once we know the query is complete. + # This will ensure that fields such as the destination table are + # correctly populated. + if not self._query_results.complete: + raise polling_future._OperationNotComplete() + else: + try: + self.reload(retry=retry, timeout=transport_timeout) + except exceptions.GoogleAPIError as exc: + self.set_exception(exc) + def result( self, page_size=None, diff --git a/packages/google-cloud-bigquery/tests/unit/job/test_query.py b/packages/google-cloud-bigquery/tests/unit/job/test_query.py index 655a121e65f2..4665933eae42 100644 --- a/packages/google-cloud-bigquery/tests/unit/job/test_query.py +++ b/packages/google-cloud-bigquery/tests/unit/job/test_query.py @@ -309,16 +309,7 @@ def test_cancelled(self): self.assertTrue(job.cancelled()) - def test_done_job_complete(self): - client = _make_client(project=self.PROJECT) - resource = self._make_resource(ended=True) - job = self._get_target_class().from_api_repr(resource, client) - job._query_results = google.cloud.bigquery.query._QueryResults.from_api_repr( - {"jobComplete": True, "jobReference": resource["jobReference"]} - ) - self.assertTrue(job.done()) - - def test_done_w_timeout(self): + def test__done_or_raise_w_timeout(self): client = _make_client(project=self.PROJECT) resource = self._make_resource(ended=False) job = self._get_target_class().from_api_repr(resource, client) @@ -326,7 +317,7 @@ def test_done_w_timeout(self): with mock.patch.object( client, "_get_query_results" ) as fake_get_results, mock.patch.object(job, "reload") as fake_reload: - job.done(timeout=42) + job._done_or_raise(timeout=42) fake_get_results.assert_called_once() call_args = fake_get_results.call_args @@ -335,7 +326,7 @@ def test_done_w_timeout(self): call_args = fake_reload.call_args self.assertEqual(call_args.kwargs.get("timeout"), 42) - def test_done_w_timeout_and_longer_internal_api_timeout(self): + def test__done_or_raise_w_timeout_and_longer_internal_api_timeout(self): client = _make_client(project=self.PROJECT) resource = self._make_resource(ended=False) job = self._get_target_class().from_api_repr(resource, client) @@ -344,7 +335,7 @@ def test_done_w_timeout_and_longer_internal_api_timeout(self): with mock.patch.object( client, "_get_query_results" ) as fake_get_results, mock.patch.object(job, "reload") as fake_reload: - job.done(timeout=5.5) + job._done_or_raise(timeout=5.5) # The expected timeout used is simply the given timeout, as the latter # is shorter than the job's internal done timeout. @@ -357,7 +348,7 @@ def test_done_w_timeout_and_longer_internal_api_timeout(self): call_args = fake_reload.call_args self.assertAlmostEqual(call_args.kwargs.get("timeout"), expected_timeout) - def test_done_w_query_results_error_reload_ok_job_finished(self): + def test__done_or_raise_w_query_results_error_reload_ok(self): client = _make_client(project=self.PROJECT) bad_request_error = exceptions.BadRequest("Error in query") client._get_query_results = mock.Mock(side_effect=bad_request_error) @@ -373,32 +364,11 @@ def fake_reload(self, *args, **kwargs): fake_reload_method = types.MethodType(fake_reload, job) with mock.patch.object(job, "reload", new=fake_reload_method): - is_done = job.done() + job._done_or_raise() - assert is_done assert isinstance(job._exception, exceptions.BadRequest) - def test_done_w_query_results_error_reload_ok_job_still_running(self): - client = _make_client(project=self.PROJECT) - retry_error = exceptions.RetryError("Too many retries", cause=TimeoutError) - client._get_query_results = mock.Mock(side_effect=retry_error) - - resource = self._make_resource(ended=False) - job = self._get_target_class().from_api_repr(resource, client) - job._exception = None - - def fake_reload(self, *args, **kwargs): - self._properties["status"]["state"] = "RUNNING" - - fake_reload_method = types.MethodType(fake_reload, job) - - with mock.patch.object(job, "reload", new=fake_reload_method): - is_done = job.done() - - assert not is_done - assert job._exception is None - - def test_done_w_query_results_error_reload_error(self): + def test__done_or_raise_w_query_results_error_reload_error(self): client = _make_client(project=self.PROJECT) bad_request_error = exceptions.BadRequest("Error in query") client._get_query_results = mock.Mock(side_effect=bad_request_error) @@ -409,12 +379,11 @@ def test_done_w_query_results_error_reload_error(self): job.reload = mock.Mock(side_effect=reload_error) job._exception = None - is_done = job.done() + job._done_or_raise() - assert is_done assert job._exception is bad_request_error - def test_done_w_job_query_results_ok_reload_error(self): + def test__done_or_raise_w_job_query_results_ok_reload_error(self): client = _make_client(project=self.PROJECT) query_results = google.cloud.bigquery.query._QueryResults( properties={ @@ -430,9 +399,8 @@ def test_done_w_job_query_results_ok_reload_error(self): job.reload = mock.Mock(side_effect=retry_error) job._exception = None - is_done = job.done() + job._done_or_raise() - assert is_done assert job._exception is retry_error def test_query_plan(self): @@ -1905,8 +1873,6 @@ def test_reload_w_timeout(self): ) def test_iter(self): - import types - begun_resource = self._make_resource() query_resource = { "jobComplete": True, From 5156077aa6c571431c084c291eb16e78e6832b4d Mon Sep 17 00:00:00 2001 From: WhiteSource Renovate Date: Wed, 10 Mar 2021 21:39:07 +0100 Subject: [PATCH 1085/2016] chore(deps): update dependency google-cloud-bigquery to v2.11.0 (#546) --- .../google-cloud-bigquery/samples/geography/requirements.txt | 2 +- .../google-cloud-bigquery/samples/snippets/requirements.txt | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/packages/google-cloud-bigquery/samples/geography/requirements.txt b/packages/google-cloud-bigquery/samples/geography/requirements.txt index 8ff7fa850ec2..34896627e68f 100644 --- a/packages/google-cloud-bigquery/samples/geography/requirements.txt +++ b/packages/google-cloud-bigquery/samples/geography/requirements.txt @@ -1,3 +1,3 @@ geojson==2.5.0 -google-cloud-bigquery==2.10.0 +google-cloud-bigquery==2.11.0 Shapely==1.7.1 diff --git a/packages/google-cloud-bigquery/samples/snippets/requirements.txt b/packages/google-cloud-bigquery/samples/snippets/requirements.txt index 8ccbec38e8bf..b55d2b3a4a25 100644 --- a/packages/google-cloud-bigquery/samples/snippets/requirements.txt +++ b/packages/google-cloud-bigquery/samples/snippets/requirements.txt @@ -1,4 +1,4 @@ -google-cloud-bigquery==2.10.0 +google-cloud-bigquery==2.11.0 google-cloud-bigquery-storage==2.3.0 google-auth-oauthlib==0.4.3 grpcio==1.36.1 From fce0fa24a163e7c6b4e3a6b2de2c45fe1d843012 Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Mon, 15 Mar 2021 09:52:04 -0500 Subject: [PATCH 1086/2016] refactor: split pandas system tests to new module (#548) Thank you for opening a Pull Request! Before submitting your PR, there are a few things you can do to make sure it goes smoothly: - [ ] Make sure to open an issue as a [bug/issue](https://github.com/googleapis/python-bigquery/issues/new/choose) before writing your code! That way we can discuss the change, evaluate designs, and agree on the general idea - [ ] Ensure the tests and linter pass - [ ] Code coverage does not decrease (if any source code was changed) - [ ] Appropriate docs were updated (if necessary) Follow-up to https://github.com/googleapis/python-bigquery/pull/448 Towards #366 --- .../tests/system/conftest.py | 39 + .../tests/system/helpers.py | 94 ++ .../tests/system/test_client.py | 953 +----------------- .../tests/system/test_pandas.py | 801 +++++++++++++++ 4 files changed, 969 insertions(+), 918 deletions(-) create mode 100644 packages/google-cloud-bigquery/tests/system/conftest.py create mode 100644 packages/google-cloud-bigquery/tests/system/helpers.py create mode 100644 packages/google-cloud-bigquery/tests/system/test_pandas.py diff --git a/packages/google-cloud-bigquery/tests/system/conftest.py b/packages/google-cloud-bigquery/tests/system/conftest.py new file mode 100644 index 000000000000..4b5fcb54319b --- /dev/null +++ b/packages/google-cloud-bigquery/tests/system/conftest.py @@ -0,0 +1,39 @@ +# Copyright 2021 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import pytest + +from . import helpers + + +@pytest.fixture(scope="session") +def bigquery_client(): + from google.cloud import bigquery + + return bigquery.Client() + + +@pytest.fixture(scope="session") +def bqstorage_client(bigquery_client): + from google.cloud import bigquery_storage + + return bigquery_storage.BigQueryReadClient(credentials=bigquery_client._credentials) + + +@pytest.fixture +def dataset_id(bigquery_client): + dataset_id = f"bqsystem_{helpers.temp_suffix()}" + bigquery_client.create_dataset(dataset_id) + yield dataset_id + bigquery_client.delete_dataset(dataset_id, delete_contents=True) diff --git a/packages/google-cloud-bigquery/tests/system/helpers.py b/packages/google-cloud-bigquery/tests/system/helpers.py new file mode 100644 index 000000000000..76e609345a10 --- /dev/null +++ b/packages/google-cloud-bigquery/tests/system/helpers.py @@ -0,0 +1,94 @@ +# Copyright 2021 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import datetime +import decimal +import uuid + +import google.api_core.exceptions +import test_utils.retry + +from google.cloud._helpers import UTC + + +_naive = datetime.datetime(2016, 12, 5, 12, 41, 9) +_naive_microseconds = datetime.datetime(2016, 12, 5, 12, 41, 9, 250000) +_stamp = "%s %s" % (_naive.date().isoformat(), _naive.time().isoformat()) +_stamp_microseconds = _stamp + ".250000" +_zoned = _naive.replace(tzinfo=UTC) +_zoned_microseconds = _naive_microseconds.replace(tzinfo=UTC) +_numeric = decimal.Decimal("123456789.123456789") + + +# Examples of most data types to test with query() and DB-API. +STANDARD_SQL_EXAMPLES = [ + ("SELECT 1", 1), + ("SELECT 1.3", 1.3), + ("SELECT TRUE", True), + ('SELECT "ABC"', "ABC"), + ('SELECT CAST("foo" AS BYTES)', b"foo"), + ('SELECT TIMESTAMP "%s"' % (_stamp,), _zoned), + ('SELECT TIMESTAMP "%s"' % (_stamp_microseconds,), _zoned_microseconds,), + ('SELECT DATETIME(TIMESTAMP "%s")' % (_stamp,), _naive), + ('SELECT DATETIME(TIMESTAMP "%s")' % (_stamp_microseconds,), _naive_microseconds,), + ('SELECT DATE(TIMESTAMP "%s")' % (_stamp,), _naive.date()), + ('SELECT TIME(TIMESTAMP "%s")' % (_stamp,), _naive.time()), + ('SELECT NUMERIC "%s"' % (_numeric,), _numeric), + ("SELECT (1, 2)", {"_field_1": 1, "_field_2": 2}), + ( + "SELECT ((1, 2), (3, 4), 5)", + { + "_field_1": {"_field_1": 1, "_field_2": 2}, + "_field_2": {"_field_1": 3, "_field_2": 4}, + "_field_3": 5, + }, + ), + ("SELECT [1, 2, 3]", [1, 2, 3]), + ( + "SELECT ([1, 2], 3, [4, 5])", + {"_field_1": [1, 2], "_field_2": 3, "_field_3": [4, 5]}, + ), + ( + "SELECT [(1, 2, 3), (4, 5, 6)]", + [ + {"_field_1": 1, "_field_2": 2, "_field_3": 3}, + {"_field_1": 4, "_field_2": 5, "_field_3": 6}, + ], + ), + ( + "SELECT [([1, 2, 3], 4), ([5, 6], 7)]", + [{"_field_1": [1, 2, 3], "_field_2": 4}, {"_field_1": [5, 6], "_field_2": 7}], + ), + ("SELECT ARRAY(SELECT STRUCT([1, 2]))", [{"_field_1": [1, 2]}]), + ("SELECT ST_GeogPoint(1, 2)", "POINT(1 2)"), +] + + +def temp_suffix(): + now = datetime.datetime.now() + return f"{now.strftime('%Y%m%d%H%M%S')}_{uuid.uuid4().hex[:8]}" + + +def _rate_limit_exceeded(forbidden): + """Predicate: pass only exceptions with 'rateLimitExceeded' as reason.""" + return any(error["reason"] == "rateLimitExceeded" for error in forbidden._errors) + + +# We need to wait to stay within the rate limits. +# The alternative outcome is a 403 Forbidden response from upstream, which +# they return instead of the more appropriate 429. +# See https://cloud.google.com/bigquery/quota-policy +retry_403 = test_utils.retry.RetryErrors( + google.api_core.exceptions.Forbidden, error_predicate=_rate_limit_exceeded, +) diff --git a/packages/google-cloud-bigquery/tests/system/test_client.py b/packages/google-cloud-bigquery/tests/system/test_client.py index ed48b0bfea06..133f609a62f3 100644 --- a/packages/google-cloud-bigquery/tests/system/test_client.py +++ b/packages/google-cloud-bigquery/tests/system/test_client.py @@ -13,7 +13,6 @@ # limitations under the License. import base64 -import collections import concurrent.futures import csv import datetime @@ -29,9 +28,11 @@ import psutil import pytest -import pytz import pkg_resources +from google.cloud.bigquery._pandas_helpers import _BIGNUMERIC_SUPPORT +from . import helpers + try: from google.cloud import bigquery_storage except ImportError: # pragma: NO COVER @@ -42,10 +43,6 @@ except ImportError: # pragma: NO COVER fastavro = None -try: - import pandas -except ImportError: # pragma: NO COVER - pandas = None try: import pyarrow import pyarrow.types @@ -56,7 +53,6 @@ from google.api_core.exceptions import BadRequest from google.api_core.exceptions import ClientError from google.api_core.exceptions import Conflict -from google.api_core.exceptions import Forbidden from google.api_core.exceptions import GoogleAPICallError from google.api_core.exceptions import NotFound from google.api_core.exceptions import InternalServerError @@ -65,7 +61,6 @@ from google.api_core.iam import Policy from google.cloud import bigquery from google.cloud import bigquery_v2 -from google.cloud.bigquery._pandas_helpers import _BIGNUMERIC_SUPPORT from google.cloud.bigquery.dataset import Dataset from google.cloud.bigquery.dataset import DatasetReference from google.cloud.bigquery.table import Table @@ -121,14 +116,8 @@ (TooManyRequests, InternalServerError, ServiceUnavailable) ) -PANDAS_MINIMUM_VERSION = pkg_resources.parse_version("1.0.0") PYARROW_MINIMUM_VERSION = pkg_resources.parse_version("0.17.0") -if pandas: - PANDAS_INSTALLED_VERSION = pkg_resources.get_distribution("pandas").parsed_version -else: - PANDAS_INSTALLED_VERSION = None - if pyarrow: PYARROW_INSTALLED_VERSION = pkg_resources.get_distribution("pyarrow").parsed_version else: @@ -154,18 +143,6 @@ def _load_json_schema(filename="schema.json"): return _parse_schema_resource(json.load(schema_file)) -def _rate_limit_exceeded(forbidden): - """Predicate: pass only exceptions with 'rateLimitExceeded' as reason.""" - return any(error["reason"] == "rateLimitExceeded" for error in forbidden._errors) - - -# We need to wait to stay within the rate limits. -# The alternative outcome is a 403 Forbidden response from upstream, which -# they return instead of the more appropriate 429. -# See https://cloud.google.com/bigquery/quota-policy -retry_403 = RetryErrors(Forbidden, error_predicate=_rate_limit_exceeded) - - class Config(object): """Run-time configuration to be modified at set-up. @@ -262,7 +239,7 @@ def test_get_dataset(self): dataset_arg = Dataset(dataset_ref) dataset_arg.friendly_name = "Friendly" dataset_arg.description = "Description" - dataset = retry_403(client.create_dataset)(dataset_arg) + dataset = helpers.retry_403(client.create_dataset)(dataset_arg) self.to_delete.append(dataset) dataset_ref = bigquery.DatasetReference(project, dataset_id) @@ -345,7 +322,7 @@ def test_create_table(self): table_arg = Table(dataset.table(table_id), schema=SCHEMA) self.assertFalse(_table_exists(table_arg)) - table = retry_403(Config.CLIENT.create_table)(table_arg) + table = helpers.retry_403(Config.CLIENT.create_table)(table_arg) self.to_delete.insert(0, table) self.assertTrue(_table_exists(table)) @@ -380,7 +357,7 @@ def test_create_table_with_policy(self): table_arg = Table(dataset.table(table_id), schema=schema) self.assertFalse(_table_exists(table_arg)) - table = retry_403(Config.CLIENT.create_table)(table_arg) + table = helpers.retry_403(Config.CLIENT.create_table)(table_arg) self.to_delete.insert(0, table) self.assertTrue(_table_exists(table)) @@ -416,7 +393,7 @@ def test_create_table_w_time_partitioning_w_clustering_fields(self): table_arg.time_partitioning = TimePartitioning(field="transaction_time") table_arg.clustering_fields = ["user_email", "store_code"] - table = retry_403(Config.CLIENT.create_table)(table_arg) + table = helpers.retry_403(Config.CLIENT.create_table)(table_arg) self.to_delete.insert(0, table) self.assertTrue(_table_exists(table)) @@ -430,7 +407,7 @@ def test_delete_dataset_with_string(self): dataset_id = _make_dataset_id("delete_table_true_with_string") project = Config.CLIENT.project dataset_ref = bigquery.DatasetReference(project, dataset_id) - retry_403(Config.CLIENT.create_dataset)(Dataset(dataset_ref)) + helpers.retry_403(Config.CLIENT.create_dataset)(Dataset(dataset_ref)) self.assertTrue(_dataset_exists(dataset_ref)) Config.CLIENT.delete_dataset(dataset_id) self.assertFalse(_dataset_exists(dataset_ref)) @@ -439,11 +416,11 @@ def test_delete_dataset_delete_contents_true(self): dataset_id = _make_dataset_id("delete_table_true_with_content") project = Config.CLIENT.project dataset_ref = bigquery.DatasetReference(project, dataset_id) - dataset = retry_403(Config.CLIENT.create_dataset)(Dataset(dataset_ref)) + dataset = helpers.retry_403(Config.CLIENT.create_dataset)(Dataset(dataset_ref)) table_id = "test_table" table_arg = Table(dataset.table(table_id), schema=SCHEMA) - table = retry_403(Config.CLIENT.create_table)(table_arg) + table = helpers.retry_403(Config.CLIENT.create_table)(table_arg) Config.CLIENT.delete_dataset(dataset, delete_contents=True) self.assertFalse(_table_exists(table)) @@ -455,7 +432,7 @@ def test_delete_dataset_delete_contents_false(self): table_id = "test_table" table_arg = Table(dataset.table(table_id), schema=SCHEMA) - retry_403(Config.CLIENT.create_table)(table_arg) + helpers.retry_403(Config.CLIENT.create_table)(table_arg) with self.assertRaises(exceptions.BadRequest): Config.CLIENT.delete_dataset(dataset) @@ -504,7 +481,7 @@ def test_list_tables(self): ] for table_name in tables_to_create: table = Table(dataset.table(table_name), schema=SCHEMA) - created_table = retry_403(Config.CLIENT.create_table)(table) + created_table = helpers.retry_403(Config.CLIENT.create_table)(table) self.to_delete.insert(0, created_table) # Retrieve the tables. @@ -534,7 +511,7 @@ def test_update_table(self): TABLE_NAME = "test_table" table_arg = Table(dataset.table(TABLE_NAME), schema=SCHEMA) self.assertFalse(_table_exists(table_arg)) - table = retry_403(Config.CLIENT.create_table)(table_arg) + table = helpers.retry_403(Config.CLIENT.create_table)(table_arg) self.to_delete.insert(0, table) self.assertTrue(_table_exists(table)) self.assertIsNone(table.friendly_name) @@ -574,7 +551,7 @@ def test_update_table_schema(self): TABLE_NAME = "test_table" table_arg = Table(dataset.table(TABLE_NAME), schema=SCHEMA) self.assertFalse(_table_exists(table_arg)) - table = retry_403(Config.CLIENT.create_table)(table_arg) + table = helpers.retry_403(Config.CLIENT.create_table)(table_arg) self.to_delete.insert(0, table) self.assertTrue(_table_exists(table)) voter = bigquery.SchemaField("voter", "BOOLEAN", mode="NULLABLE") @@ -674,7 +651,7 @@ def test_insert_rows_then_dump_table(self): ] table_arg = Table(dataset.table(TABLE_ID), schema=schema) self.assertFalse(_table_exists(table_arg)) - table = retry_403(Config.CLIENT.create_table)(table_arg) + table = helpers.retry_403(Config.CLIENT.create_table)(table_arg) self.to_delete.insert(0, table) self.assertTrue(_table_exists(table)) @@ -732,413 +709,6 @@ def test_load_table_from_local_avro_file_then_dump_table(self): sorted(row_tuples, key=by_wavelength), sorted(ROWS, key=by_wavelength) ) - @unittest.skipIf(pandas is None, "Requires `pandas`") - @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") - def test_load_table_from_dataframe_w_automatic_schema(self): - """Test that a DataFrame with dtypes that map well to BigQuery types - can be uploaded without specifying a schema. - - https://github.com/googleapis/google-cloud-python/issues/9044 - """ - df_data = collections.OrderedDict( - [ - ("bool_col", pandas.Series([True, False, True], dtype="bool")), - ( - "ts_col", - pandas.Series( - [ - datetime.datetime(2010, 1, 2, 3, 44, 50), - datetime.datetime(2011, 2, 3, 14, 50, 59), - datetime.datetime(2012, 3, 14, 15, 16), - ], - dtype="datetime64[ns]", - ).dt.tz_localize(pytz.utc), - ), - ( - "dt_col", - pandas.Series( - [ - datetime.datetime(2010, 1, 2, 3, 44, 50), - datetime.datetime(2011, 2, 3, 14, 50, 59), - datetime.datetime(2012, 3, 14, 15, 16), - ], - dtype="datetime64[ns]", - ), - ), - ("float32_col", pandas.Series([1.0, 2.0, 3.0], dtype="float32")), - ("float64_col", pandas.Series([4.0, 5.0, 6.0], dtype="float64")), - ("int8_col", pandas.Series([-12, -11, -10], dtype="int8")), - ("int16_col", pandas.Series([-9, -8, -7], dtype="int16")), - ("int32_col", pandas.Series([-6, -5, -4], dtype="int32")), - ("int64_col", pandas.Series([-3, -2, -1], dtype="int64")), - ("uint8_col", pandas.Series([0, 1, 2], dtype="uint8")), - ("uint16_col", pandas.Series([3, 4, 5], dtype="uint16")), - ("uint32_col", pandas.Series([6, 7, 8], dtype="uint32")), - ] - ) - dataframe = pandas.DataFrame(df_data, columns=df_data.keys()) - - dataset_id = _make_dataset_id("bq_load_test") - self.temp_dataset(dataset_id) - table_id = "{}.{}.load_table_from_dataframe_w_automatic_schema".format( - Config.CLIENT.project, dataset_id - ) - - load_job = Config.CLIENT.load_table_from_dataframe(dataframe, table_id) - load_job.result() - - table = Config.CLIENT.get_table(table_id) - self.assertEqual( - tuple(table.schema), - ( - bigquery.SchemaField("bool_col", "BOOLEAN"), - bigquery.SchemaField("ts_col", "TIMESTAMP"), - # BigQuery does not support uploading DATETIME values from - # Parquet files. See: - # https://github.com/googleapis/google-cloud-python/issues/9996 - bigquery.SchemaField("dt_col", "TIMESTAMP"), - bigquery.SchemaField("float32_col", "FLOAT"), - bigquery.SchemaField("float64_col", "FLOAT"), - bigquery.SchemaField("int8_col", "INTEGER"), - bigquery.SchemaField("int16_col", "INTEGER"), - bigquery.SchemaField("int32_col", "INTEGER"), - bigquery.SchemaField("int64_col", "INTEGER"), - bigquery.SchemaField("uint8_col", "INTEGER"), - bigquery.SchemaField("uint16_col", "INTEGER"), - bigquery.SchemaField("uint32_col", "INTEGER"), - ), - ) - self.assertEqual(table.num_rows, 3) - - @unittest.skipIf( - pandas is None or PANDAS_INSTALLED_VERSION < PANDAS_MINIMUM_VERSION, - "Only `pandas version >=1.0.0` is supported", - ) - @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") - def test_load_table_from_dataframe_w_nullable_int64_datatype(self): - """Test that a DataFrame containing column with None-type values and int64 datatype - can be uploaded if a BigQuery schema is specified. - - https://github.com/googleapis/python-bigquery/issues/22 - """ - - dataset_id = _make_dataset_id("bq_load_test") - self.temp_dataset(dataset_id) - table_id = "{}.{}.load_table_from_dataframe_w_nullable_int64_datatype".format( - Config.CLIENT.project, dataset_id - ) - table_schema = (bigquery.SchemaField("x", "INTEGER", mode="NULLABLE"),) - table = retry_403(Config.CLIENT.create_table)( - Table(table_id, schema=table_schema) - ) - self.to_delete.insert(0, table) - - df_data = collections.OrderedDict( - [("x", pandas.Series([1, 2, None, 4], dtype="Int64"))] - ) - dataframe = pandas.DataFrame(df_data, columns=df_data.keys()) - load_job = Config.CLIENT.load_table_from_dataframe(dataframe, table_id) - load_job.result() - table = Config.CLIENT.get_table(table_id) - self.assertEqual(tuple(table.schema), (bigquery.SchemaField("x", "INTEGER"),)) - self.assertEqual(table.num_rows, 4) - - @unittest.skipIf( - pandas is None or PANDAS_INSTALLED_VERSION < PANDAS_MINIMUM_VERSION, - "Only `pandas version >=1.0.0` is supported", - ) - @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") - def test_load_table_from_dataframe_w_nullable_int64_datatype_automatic_schema(self): - """Test that a DataFrame containing column with None-type values and int64 datatype - can be uploaded without specifying a schema. - - https://github.com/googleapis/python-bigquery/issues/22 - """ - - dataset_id = _make_dataset_id("bq_load_test") - self.temp_dataset(dataset_id) - table_id = "{}.{}.load_table_from_dataframe_w_nullable_int64_datatype".format( - Config.CLIENT.project, dataset_id - ) - df_data = collections.OrderedDict( - [("x", pandas.Series([1, 2, None, 4], dtype="Int64"))] - ) - dataframe = pandas.DataFrame(df_data, columns=df_data.keys()) - load_job = Config.CLIENT.load_table_from_dataframe(dataframe, table_id) - load_job.result() - table = Config.CLIENT.get_table(table_id) - self.assertEqual(tuple(table.schema), (bigquery.SchemaField("x", "INTEGER"),)) - self.assertEqual(table.num_rows, 4) - - @unittest.skipIf(pandas is None, "Requires `pandas`") - @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") - def test_load_table_from_dataframe_w_nulls(self): - """Test that a DataFrame with null columns can be uploaded if a - BigQuery schema is specified. - - See: https://github.com/googleapis/google-cloud-python/issues/7370 - """ - # Schema with all scalar types. - scalars_schema = ( - bigquery.SchemaField("bool_col", "BOOLEAN"), - bigquery.SchemaField("bytes_col", "BYTES"), - bigquery.SchemaField("date_col", "DATE"), - bigquery.SchemaField("dt_col", "DATETIME"), - bigquery.SchemaField("float_col", "FLOAT"), - bigquery.SchemaField("geo_col", "GEOGRAPHY"), - bigquery.SchemaField("int_col", "INTEGER"), - bigquery.SchemaField("num_col", "NUMERIC"), - bigquery.SchemaField("str_col", "STRING"), - bigquery.SchemaField("time_col", "TIME"), - bigquery.SchemaField("ts_col", "TIMESTAMP"), - ) - if _BIGNUMERIC_SUPPORT: - scalars_schema += (bigquery.SchemaField("bignum_col", "BIGNUMERIC"),) - - table_schema = scalars_schema + ( - # TODO: Array columns can't be read due to NULLABLE versus REPEATED - # mode mismatch. See: - # https://issuetracker.google.com/133415569#comment3 - # bigquery.SchemaField("array_col", "INTEGER", mode="REPEATED"), - # TODO: Support writing StructArrays to Parquet. See: - # https://jira.apache.org/jira/browse/ARROW-2587 - # bigquery.SchemaField("struct_col", "RECORD", fields=scalars_schema), - ) - num_rows = 100 - nulls = [None] * num_rows - df_data = [ - ("bool_col", nulls), - ("bytes_col", nulls), - ("date_col", nulls), - ("dt_col", nulls), - ("float_col", nulls), - ("geo_col", nulls), - ("int_col", nulls), - ("num_col", nulls), - ("str_col", nulls), - ("time_col", nulls), - ("ts_col", nulls), - ] - if _BIGNUMERIC_SUPPORT: - df_data.append(("bignum_col", nulls)) - df_data = collections.OrderedDict(df_data) - dataframe = pandas.DataFrame(df_data, columns=df_data.keys()) - - dataset_id = _make_dataset_id("bq_load_test") - self.temp_dataset(dataset_id) - table_id = "{}.{}.load_table_from_dataframe_w_nulls".format( - Config.CLIENT.project, dataset_id - ) - - # Create the table before loading so that schema mismatch errors are - # identified. - table = retry_403(Config.CLIENT.create_table)( - Table(table_id, schema=table_schema) - ) - self.to_delete.insert(0, table) - - job_config = bigquery.LoadJobConfig(schema=table_schema) - load_job = Config.CLIENT.load_table_from_dataframe( - dataframe, table_id, job_config=job_config - ) - load_job.result() - - table = Config.CLIENT.get_table(table) - self.assertEqual(tuple(table.schema), table_schema) - self.assertEqual(table.num_rows, num_rows) - - @unittest.skipIf(pandas is None, "Requires `pandas`") - @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") - def test_load_table_from_dataframe_w_required(self): - """Test that a DataFrame with required columns can be uploaded if a - BigQuery schema is specified. - - See: https://github.com/googleapis/google-cloud-python/issues/8093 - """ - table_schema = ( - bigquery.SchemaField("name", "STRING", mode="REQUIRED"), - bigquery.SchemaField("age", "INTEGER", mode="REQUIRED"), - ) - - records = [{"name": "Chip", "age": 2}, {"name": "Dale", "age": 3}] - dataframe = pandas.DataFrame(records, columns=["name", "age"]) - job_config = bigquery.LoadJobConfig(schema=table_schema) - dataset_id = _make_dataset_id("bq_load_test") - self.temp_dataset(dataset_id) - table_id = "{}.{}.load_table_from_dataframe_w_required".format( - Config.CLIENT.project, dataset_id - ) - - # Create the table before loading so that schema mismatch errors are - # identified. - table = retry_403(Config.CLIENT.create_table)( - Table(table_id, schema=table_schema) - ) - self.to_delete.insert(0, table) - - job_config = bigquery.LoadJobConfig(schema=table_schema) - load_job = Config.CLIENT.load_table_from_dataframe( - dataframe, table_id, job_config=job_config - ) - load_job.result() - - table = Config.CLIENT.get_table(table) - self.assertEqual(tuple(table.schema), table_schema) - self.assertEqual(table.num_rows, 2) - - @unittest.skipIf(pandas is None, "Requires `pandas`") - @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") - def test_load_table_from_dataframe_w_explicit_schema(self): - # Schema with all scalar types. - # TODO: Uploading DATETIME columns currently fails, thus that field type - # is temporarily removed from the test. - # See: - # https://github.com/googleapis/python-bigquery/issues/61 - # https://issuetracker.google.com/issues/151765076 - scalars_schema = ( - bigquery.SchemaField("bool_col", "BOOLEAN"), - bigquery.SchemaField("bytes_col", "BYTES"), - bigquery.SchemaField("date_col", "DATE"), - # bigquery.SchemaField("dt_col", "DATETIME"), - bigquery.SchemaField("float_col", "FLOAT"), - bigquery.SchemaField("geo_col", "GEOGRAPHY"), - bigquery.SchemaField("int_col", "INTEGER"), - bigquery.SchemaField("num_col", "NUMERIC"), - bigquery.SchemaField("str_col", "STRING"), - bigquery.SchemaField("time_col", "TIME"), - bigquery.SchemaField("ts_col", "TIMESTAMP"), - ) - if _BIGNUMERIC_SUPPORT: - scalars_schema += (bigquery.SchemaField("bignum_col", "BIGNUMERIC"),) - - table_schema = scalars_schema + ( - # TODO: Array columns can't be read due to NULLABLE versus REPEATED - # mode mismatch. See: - # https://issuetracker.google.com/133415569#comment3 - # bigquery.SchemaField("array_col", "INTEGER", mode="REPEATED"), - # TODO: Support writing StructArrays to Parquet. See: - # https://jira.apache.org/jira/browse/ARROW-2587 - # bigquery.SchemaField("struct_col", "RECORD", fields=scalars_schema), - ) - - df_data = [ - ("bool_col", [True, None, False]), - ("bytes_col", [b"abc", None, b"def"]), - ("date_col", [datetime.date(1, 1, 1), None, datetime.date(9999, 12, 31)]), - # ( - # "dt_col", - # [ - # datetime.datetime(1, 1, 1, 0, 0, 0), - # None, - # datetime.datetime(9999, 12, 31, 23, 59, 59, 999999), - # ], - # ), - ("float_col", [float("-inf"), float("nan"), float("inf")]), - ( - "geo_col", - [ - "POINT(30 10)", - None, - "POLYGON ((30 10, 40 40, 20 40, 10 20, 30 10))", - ], - ), - ("int_col", [-9223372036854775808, None, 9223372036854775807]), - ( - "num_col", - [ - decimal.Decimal("-99999999999999999999999999999.999999999"), - None, - decimal.Decimal("99999999999999999999999999999.999999999"), - ], - ), - ("str_col", [u"abc", None, u"def"]), - ( - "time_col", - [datetime.time(0, 0, 0), None, datetime.time(23, 59, 59, 999999)], - ), - ( - "ts_col", - [ - datetime.datetime(1, 1, 1, 0, 0, 0, tzinfo=pytz.utc), - None, - datetime.datetime( - 9999, 12, 31, 23, 59, 59, 999999, tzinfo=pytz.utc - ), - ], - ), - ] - if _BIGNUMERIC_SUPPORT: - df_data.append( - ( - "bignum_col", - [ - decimal.Decimal("-{d38}.{d38}".format(d38="9" * 38)), - None, - decimal.Decimal("{d38}.{d38}".format(d38="9" * 38)), - ], - ) - ) - df_data = collections.OrderedDict(df_data) - dataframe = pandas.DataFrame(df_data, dtype="object", columns=df_data.keys()) - - dataset_id = _make_dataset_id("bq_load_test") - self.temp_dataset(dataset_id) - table_id = "{}.{}.load_table_from_dataframe_w_explicit_schema".format( - Config.CLIENT.project, dataset_id - ) - - job_config = bigquery.LoadJobConfig(schema=table_schema) - load_job = Config.CLIENT.load_table_from_dataframe( - dataframe, table_id, job_config=job_config - ) - load_job.result() - - table = Config.CLIENT.get_table(table_id) - self.assertEqual(tuple(table.schema), table_schema) - self.assertEqual(table.num_rows, 3) - - @unittest.skipIf( - pyarrow is None or PYARROW_INSTALLED_VERSION < PYARROW_MINIMUM_VERSION, - "Only `pyarrow version >=0.17.0` is supported", - ) - @unittest.skipIf(pandas is None, "Requires `pandas`") - def test_load_table_from_dataframe_w_struct_datatype(self): - """Test that a DataFrame with struct datatype can be uploaded if a - BigQuery schema is specified. - - https://github.com/googleapis/python-bigquery/issues/21 - """ - dataset_id = _make_dataset_id("bq_load_test") - self.temp_dataset(dataset_id) - table_id = "{}.{}.load_table_from_dataframe_w_struct_datatype".format( - Config.CLIENT.project, dataset_id - ) - table_schema = [ - bigquery.SchemaField( - "bar", - "RECORD", - fields=[ - bigquery.SchemaField("id", "INTEGER", mode="REQUIRED"), - bigquery.SchemaField("age", "INTEGER", mode="REQUIRED"), - ], - mode="REQUIRED", - ), - ] - table = retry_403(Config.CLIENT.create_table)( - Table(table_id, schema=table_schema) - ) - self.to_delete.insert(0, table) - - df_data = [{"id": 1, "age": 21}, {"id": 2, "age": 22}, {"id": 2, "age": 23}] - dataframe = pandas.DataFrame(data={"bar": df_data}, columns=["bar"]) - - load_job = Config.CLIENT.load_table_from_dataframe(dataframe, table_id) - load_job.result() - - table = Config.CLIENT.get_table(table_id) - self.assertEqual(table.schema, table_schema) - self.assertEqual(table.num_rows, 3) - def test_load_table_from_json_basic_use(self): table_schema = ( bigquery.SchemaField("name", "STRING", mode="REQUIRED"), @@ -1160,7 +730,7 @@ def test_load_table_from_json_basic_use(self): # Create the table before loading so that schema mismatch errors are # identified. - table = retry_403(Config.CLIENT.create_table)( + table = helpers.retry_403(Config.CLIENT.create_table)( Table(table_id, schema=table_schema) ) self.to_delete.insert(0, table) @@ -1175,149 +745,6 @@ def test_load_table_from_json_basic_use(self): self.assertEqual(tuple(table.schema), table_schema) self.assertEqual(table.num_rows, 2) - @unittest.skipIf(pandas is None, "Requires `pandas`") - def test_load_table_from_dataframe_w_explicit_schema_source_format_csv(self): - from google.cloud.bigquery.job import SourceFormat - - table_schema = ( - bigquery.SchemaField("bool_col", "BOOLEAN"), - bigquery.SchemaField("bytes_col", "BYTES"), - bigquery.SchemaField("date_col", "DATE"), - bigquery.SchemaField("dt_col", "DATETIME"), - bigquery.SchemaField("float_col", "FLOAT"), - bigquery.SchemaField("geo_col", "GEOGRAPHY"), - bigquery.SchemaField("int_col", "INTEGER"), - bigquery.SchemaField("num_col", "NUMERIC"), - bigquery.SchemaField("bignum_col", "BIGNUMERIC"), - bigquery.SchemaField("str_col", "STRING"), - bigquery.SchemaField("time_col", "TIME"), - bigquery.SchemaField("ts_col", "TIMESTAMP"), - ) - df_data = collections.OrderedDict( - [ - ("bool_col", [True, None, False]), - ("bytes_col", ["abc", None, "def"]), - ( - "date_col", - [datetime.date(1, 1, 1), None, datetime.date(9999, 12, 31)], - ), - ( - "dt_col", - [ - datetime.datetime(1, 1, 1, 0, 0, 0), - None, - datetime.datetime(9999, 12, 31, 23, 59, 59, 999999), - ], - ), - ("float_col", [float("-inf"), float("nan"), float("inf")]), - ( - "geo_col", - [ - "POINT(30 10)", - None, - "POLYGON ((30 10, 40 40, 20 40, 10 20, 30 10))", - ], - ), - ("int_col", [-9223372036854775808, None, 9223372036854775807]), - ( - "num_col", - [ - decimal.Decimal("-99999999999999999999999999999.999999999"), - None, - decimal.Decimal("99999999999999999999999999999.999999999"), - ], - ), - ( - "bignum_col", - [ - decimal.Decimal("-{d38}.{d38}".format(d38="9" * 38)), - None, - decimal.Decimal("{d38}.{d38}".format(d38="9" * 38)), - ], - ), - ("str_col", [u"abc", None, u"def"]), - ( - "time_col", - [datetime.time(0, 0, 0), None, datetime.time(23, 59, 59, 999999)], - ), - ( - "ts_col", - [ - datetime.datetime(1, 1, 1, 0, 0, 0, tzinfo=pytz.utc), - None, - datetime.datetime( - 9999, 12, 31, 23, 59, 59, 999999, tzinfo=pytz.utc - ), - ], - ), - ] - ) - dataframe = pandas.DataFrame(df_data, dtype="object", columns=df_data.keys()) - - dataset_id = _make_dataset_id("bq_load_test") - self.temp_dataset(dataset_id) - table_id = "{}.{}.load_table_from_dataframe_w_explicit_schema_csv".format( - Config.CLIENT.project, dataset_id - ) - - job_config = bigquery.LoadJobConfig( - schema=table_schema, source_format=SourceFormat.CSV - ) - load_job = Config.CLIENT.load_table_from_dataframe( - dataframe, table_id, job_config=job_config - ) - load_job.result() - - table = Config.CLIENT.get_table(table_id) - self.assertEqual(tuple(table.schema), table_schema) - self.assertEqual(table.num_rows, 3) - - @unittest.skipIf(pandas is None, "Requires `pandas`") - def test_load_table_from_dataframe_w_explicit_schema_source_format_csv_floats(self): - from google.cloud.bigquery.job import SourceFormat - - table_schema = (bigquery.SchemaField("float_col", "FLOAT"),) - df_data = collections.OrderedDict( - [ - ( - "float_col", - [ - 0.14285714285714285, - 0.51428571485748, - 0.87128748, - 1.807960649, - 2.0679610649, - 2.4406779661016949, - 3.7148514257, - 3.8571428571428572, - 1.51251252e40, - ], - ), - ] - ) - dataframe = pandas.DataFrame(df_data, dtype="object", columns=df_data.keys()) - - dataset_id = _make_dataset_id("bq_load_test") - self.temp_dataset(dataset_id) - table_id = "{}.{}.load_table_from_dataframe_w_explicit_schema_csv".format( - Config.CLIENT.project, dataset_id - ) - - job_config = bigquery.LoadJobConfig( - schema=table_schema, source_format=SourceFormat.CSV - ) - load_job = Config.CLIENT.load_table_from_dataframe( - dataframe, table_id, job_config=job_config - ) - load_job.result() - - table = Config.CLIENT.get_table(table_id) - rows = self._fetch_single_page(table) - floats = [r.values()[0] for r in rows] - self.assertEqual(tuple(table.schema), table_schema) - self.assertEqual(table.num_rows, 9) - self.assertEqual(floats, df_data["float_col"]) - def test_load_table_from_json_schema_autodetect(self): json_rows = [ {"name": "John", "age": 18, "birthday": "2001-10-15", "is_awesome": False}, @@ -1339,7 +766,7 @@ def test_load_table_from_json_schema_autodetect(self): bigquery.SchemaField("is_awesome", "BOOLEAN", mode="NULLABLE"), ) # create the table before loading so that the column order is predictable - table = retry_403(Config.CLIENT.create_table)( + table = helpers.retry_403(Config.CLIENT.create_table)( Table(table_id, schema=table_schema) ) self.to_delete.insert(0, table) @@ -1374,7 +801,7 @@ def test_load_avro_from_uri_then_dump_table(self): dataset = self.temp_dataset(_make_dataset_id("bq_load_test")) table_arg = dataset.table(table_name) - table = retry_403(Config.CLIENT.create_table)(Table(table_arg)) + table = helpers.retry_403(Config.CLIENT.create_table)(Table(table_arg)) self.to_delete.insert(0, table) config = bigquery.LoadJobConfig() @@ -1405,7 +832,7 @@ def test_load_table_from_uri_then_dump_table(self): dataset = self.temp_dataset(_make_dataset_id("load_gcs_then_dump")) table_arg = Table(dataset.table(TABLE_ID), schema=SCHEMA) - table = retry_403(Config.CLIENT.create_table)(table_arg) + table = helpers.retry_403(Config.CLIENT.create_table)(table_arg) self.to_delete.insert(0, table) config = bigquery.LoadJobConfig() @@ -1623,7 +1050,7 @@ def test_get_set_iam_policy(self): table_ref = Table(dataset.table(table_id)) self.assertFalse(_table_exists(table_ref)) - table = retry_403(Config.CLIENT.create_table)(table_ref) + table = helpers.retry_403(Config.CLIENT.create_table)(table_ref) self.to_delete.insert(0, table) self.assertTrue(_table_exists(table)) @@ -1648,7 +1075,7 @@ def test_test_iam_permissions(self): table_ref = Table(dataset.table(table_id)) self.assertFalse(_table_exists(table_ref)) - table = retry_403(Config.CLIENT.create_table)(table_ref) + table = helpers.retry_403(Config.CLIENT.create_table)(table_ref) self.to_delete.insert(0, table) self.assertTrue(_table_exists(table)) @@ -1672,7 +1099,7 @@ def test_job_cancel(self): dataset = self.temp_dataset(DATASET_ID) table_arg = Table(dataset.table(TABLE_NAME), schema=SCHEMA) - table = retry_403(Config.CLIENT.create_table)(table_arg) + table = helpers.retry_403(Config.CLIENT.create_table)(table_arg) self.to_delete.insert(0, table) job = Config.CLIENT.query(QUERY, job_id_prefix=JOB_ID_PREFIX) @@ -1743,75 +1170,12 @@ def test_query_w_legacy_sql_types(self): self.assertEqual(len(rows[0]), 1) self.assertEqual(rows[0][0], example["expected"]) - def _generate_standard_sql_types_examples(self): - naive = datetime.datetime(2016, 12, 5, 12, 41, 9) - naive_microseconds = datetime.datetime(2016, 12, 5, 12, 41, 9, 250000) - stamp = "%s %s" % (naive.date().isoformat(), naive.time().isoformat()) - stamp_microseconds = stamp + ".250000" - zoned = naive.replace(tzinfo=UTC) - zoned_microseconds = naive_microseconds.replace(tzinfo=UTC) - numeric = decimal.Decimal("123456789.123456789") - return [ - {"sql": "SELECT 1", "expected": 1}, - {"sql": "SELECT 1.3", "expected": 1.3}, - {"sql": "SELECT TRUE", "expected": True}, - {"sql": 'SELECT "ABC"', "expected": "ABC"}, - {"sql": 'SELECT CAST("foo" AS BYTES)', "expected": b"foo"}, - {"sql": 'SELECT TIMESTAMP "%s"' % (stamp,), "expected": zoned}, - { - "sql": 'SELECT TIMESTAMP "%s"' % (stamp_microseconds,), - "expected": zoned_microseconds, - }, - {"sql": 'SELECT DATETIME(TIMESTAMP "%s")' % (stamp,), "expected": naive}, - { - "sql": 'SELECT DATETIME(TIMESTAMP "%s")' % (stamp_microseconds,), - "expected": naive_microseconds, - }, - {"sql": 'SELECT DATE(TIMESTAMP "%s")' % (stamp,), "expected": naive.date()}, - {"sql": 'SELECT TIME(TIMESTAMP "%s")' % (stamp,), "expected": naive.time()}, - {"sql": 'SELECT NUMERIC "%s"' % (numeric,), "expected": numeric}, - {"sql": "SELECT (1, 2)", "expected": {"_field_1": 1, "_field_2": 2}}, - { - "sql": "SELECT ((1, 2), (3, 4), 5)", - "expected": { - "_field_1": {"_field_1": 1, "_field_2": 2}, - "_field_2": {"_field_1": 3, "_field_2": 4}, - "_field_3": 5, - }, - }, - {"sql": "SELECT [1, 2, 3]", "expected": [1, 2, 3]}, - { - "sql": "SELECT ([1, 2], 3, [4, 5])", - "expected": {"_field_1": [1, 2], "_field_2": 3, "_field_3": [4, 5]}, - }, - { - "sql": "SELECT [(1, 2, 3), (4, 5, 6)]", - "expected": [ - {"_field_1": 1, "_field_2": 2, "_field_3": 3}, - {"_field_1": 4, "_field_2": 5, "_field_3": 6}, - ], - }, - { - "sql": "SELECT [([1, 2, 3], 4), ([5, 6], 7)]", - "expected": [ - {u"_field_1": [1, 2, 3], u"_field_2": 4}, - {u"_field_1": [5, 6], u"_field_2": 7}, - ], - }, - { - "sql": "SELECT ARRAY(SELECT STRUCT([1, 2]))", - "expected": [{u"_field_1": [1, 2]}], - }, - {"sql": "SELECT ST_GeogPoint(1, 2)", "expected": "POINT(1 2)"}, - ] - def test_query_w_standard_sql_types(self): - examples = self._generate_standard_sql_types_examples() - for example in examples: - rows = list(Config.CLIENT.query(example["sql"])) + for sql, expected in helpers.STANDARD_SQL_EXAMPLES: + rows = list(Config.CLIENT.query(sql)) self.assertEqual(len(rows), 1) self.assertEqual(len(rows[0]), 1) - self.assertEqual(rows[0][0], example["expected"]) + self.assertEqual(rows[0][0], expected) def test_query_w_failed_query(self): from google.api_core.exceptions import BadRequest @@ -1950,13 +1314,12 @@ def test_query_statistics(self): self.assertGreater(len(plan), stages_with_inputs) def test_dbapi_w_standard_sql_types(self): - examples = self._generate_standard_sql_types_examples() - for example in examples: - Config.CURSOR.execute(example["sql"]) + for sql, expected in helpers.STANDARD_SQL_EXAMPLES: + Config.CURSOR.execute(sql) self.assertEqual(Config.CURSOR.rowcount, 1) row = Config.CURSOR.fetchone() self.assertEqual(len(row), 1) - self.assertEqual(row[0], example["expected"]) + self.assertEqual(row[0], expected) row = Config.CURSOR.fetchone() self.assertIsNone(row) @@ -2107,7 +1470,7 @@ def _load_table_for_dml(self, rows, dataset_id, table_id): greeting = bigquery.SchemaField("greeting", "STRING", mode="NULLABLE") table_ref = dataset.table(table_id) table_arg = Table(table_ref, schema=[greeting]) - table = retry_403(Config.CLIENT.create_table)(table_arg) + table = helpers.retry_403(Config.CLIENT.create_table)(table_arg) self.to_delete.insert(0, table) with _NamedTemporaryFile() as temp: @@ -2480,152 +1843,6 @@ def test_query_iter(self): row_tuples = [r.values() for r in query_job] self.assertEqual(row_tuples, [(1,)]) - @unittest.skipIf(pandas is None, "Requires `pandas`") - def test_query_results_to_dataframe(self): - QUERY = """ - SELECT id, author, time_ts, dead - FROM `bigquery-public-data.hacker_news.comments` - LIMIT 10 - """ - - df = Config.CLIENT.query(QUERY).result().to_dataframe() - - self.assertIsInstance(df, pandas.DataFrame) - self.assertEqual(len(df), 10) # verify the number of rows - column_names = ["id", "author", "time_ts", "dead"] - self.assertEqual(list(df), column_names) # verify the column names - exp_datatypes = { - "id": int, - "author": str, - "time_ts": pandas.Timestamp, - "dead": bool, - } - for index, row in df.iterrows(): - for col in column_names: - # all the schema fields are nullable, so None is acceptable - if not row[col] is None: - self.assertIsInstance(row[col], exp_datatypes[col]) - - @unittest.skipIf(pandas is None, "Requires `pandas`") - @unittest.skipIf( - bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" - ) - def test_query_results_to_dataframe_w_bqstorage(self): - query = """ - SELECT id, author, time_ts, dead - FROM `bigquery-public-data.hacker_news.comments` - LIMIT 10 - """ - - bqstorage_client = bigquery_storage.BigQueryReadClient( - credentials=Config.CLIENT._credentials - ) - - df = Config.CLIENT.query(query).result().to_dataframe(bqstorage_client) - - self.assertIsInstance(df, pandas.DataFrame) - self.assertEqual(len(df), 10) # verify the number of rows - column_names = ["id", "author", "time_ts", "dead"] - self.assertEqual(list(df), column_names) - exp_datatypes = { - "id": int, - "author": str, - "time_ts": pandas.Timestamp, - "dead": bool, - } - for index, row in df.iterrows(): - for col in column_names: - # all the schema fields are nullable, so None is acceptable - if not row[col] is None: - self.assertIsInstance(row[col], exp_datatypes[col]) - - @unittest.skipIf(pandas is None, "Requires `pandas`") - def test_insert_rows_from_dataframe(self): - SF = bigquery.SchemaField - schema = [ - SF("float_col", "FLOAT", mode="REQUIRED"), - SF("int_col", "INTEGER", mode="REQUIRED"), - SF("bool_col", "BOOLEAN", mode="REQUIRED"), - SF("string_col", "STRING", mode="NULLABLE"), - ] - - dataframe = pandas.DataFrame( - [ - { - "float_col": 1.11, - "bool_col": True, - "string_col": "my string", - "int_col": 10, - }, - { - "float_col": 2.22, - "bool_col": False, - "string_col": "another string", - "int_col": 20, - }, - { - "float_col": 3.33, - "bool_col": False, - "string_col": "another string", - "int_col": 30, - }, - { - "float_col": 4.44, - "bool_col": True, - "string_col": "another string", - "int_col": 40, - }, - { - "float_col": 5.55, - "bool_col": False, - "string_col": "another string", - "int_col": 50, - }, - { - "float_col": 6.66, - "bool_col": True, - # Include a NaN value, because pandas often uses NaN as a - # NULL value indicator. - "string_col": float("NaN"), - "int_col": 60, - }, - ] - ) - - table_id = "test_table" - dataset = self.temp_dataset(_make_dataset_id("issue_7553")) - table_arg = Table(dataset.table(table_id), schema=schema) - table = retry_403(Config.CLIENT.create_table)(table_arg) - self.to_delete.insert(0, table) - - chunk_errors = Config.CLIENT.insert_rows_from_dataframe( - table, dataframe, chunk_size=3 - ) - for errors in chunk_errors: - assert not errors - - # Use query to fetch rows instead of listing directly from the table so - # that we get values from the streaming buffer. - rows = list( - Config.CLIENT.query( - "SELECT * FROM `{}.{}.{}`".format( - table.project, table.dataset_id, table.table_id - ) - ) - ) - - sorted_rows = sorted(rows, key=operator.attrgetter("int_col")) - row_tuples = [r.values() for r in sorted_rows] - expected = [ - tuple(None if col != col else col for col in data_row) - for data_row in dataframe.itertuples(index=False) - ] - - assert len(row_tuples) == len(expected) - - for row, expected_row in zip(row_tuples, expected): - self.assertCountEqual(row, expected_row) # column order does not matter - def test_insert_rows_nested_nested(self): # See #2951 SF = bigquery.SchemaField @@ -2656,7 +1873,7 @@ def test_insert_rows_nested_nested(self): table_id = "test_table" dataset = self.temp_dataset(_make_dataset_id("issue_2951")) table_arg = Table(dataset.table(table_id), schema=schema) - table = retry_403(Config.CLIENT.create_table)(table_arg) + table = helpers.retry_403(Config.CLIENT.create_table)(table_arg) self.to_delete.insert(0, table) Config.CLIENT.insert_rows(table, to_insert) @@ -2696,7 +1913,7 @@ def test_insert_rows_nested_nested_dictionary(self): table_id = "test_table" dataset = self.temp_dataset(_make_dataset_id("issue_2951")) table_arg = Table(dataset.table(table_id), schema=schema) - table = retry_403(Config.CLIENT.create_table)(table_arg) + table = helpers.retry_403(Config.CLIENT.create_table)(table_arg) self.to_delete.insert(0, table) Config.CLIENT.insert_rows(table, to_insert) @@ -2740,8 +1957,8 @@ def test_create_routine(self): str(routine.reference) ) - routine = retry_403(Config.CLIENT.create_routine)(routine) - query_job = retry_403(Config.CLIENT.query)(query_string) + routine = helpers.retry_403(Config.CLIENT.create_routine)(routine) + query_job = helpers.retry_403(Config.CLIENT.query)(query_string) rows = list(query_job.result()) assert len(rows) == 1 @@ -2752,7 +1969,7 @@ def test_create_table_rows_fetch_nested_schema(self): dataset = self.temp_dataset(_make_dataset_id("create_table_nested_schema")) schema = _load_json_schema() table_arg = Table(dataset.table(table_name), schema=schema) - table = retry_403(Config.CLIENT.create_table)(table_arg) + table = helpers.retry_403(Config.CLIENT.create_table)(table_arg) self.to_delete.insert(0, table) self.assertTrue(_table_exists(table)) self.assertEqual(table.table_id, table_name) @@ -2872,85 +2089,6 @@ def test_nested_table_to_arrow(self): self.assertTrue(pyarrow.types.is_list(record_col[1].type)) self.assertTrue(pyarrow.types.is_int64(record_col[1].type.value_type)) - @unittest.skipIf(pandas is None, "Requires `pandas`") - def test_nested_table_to_dataframe(self): - from google.cloud.bigquery.job import SourceFormat - from google.cloud.bigquery.job import WriteDisposition - - SF = bigquery.SchemaField - schema = [ - SF("string_col", "STRING", mode="NULLABLE"), - SF( - "record_col", - "RECORD", - mode="NULLABLE", - fields=[ - SF("nested_string", "STRING", mode="NULLABLE"), - SF("nested_repeated", "INTEGER", mode="REPEATED"), - SF( - "nested_record", - "RECORD", - mode="NULLABLE", - fields=[SF("nested_nested_string", "STRING", mode="NULLABLE")], - ), - ], - ), - SF("bigfloat_col", "FLOAT", mode="NULLABLE"), - SF("smallfloat_col", "FLOAT", mode="NULLABLE"), - ] - record = { - "nested_string": "another string value", - "nested_repeated": [0, 1, 2], - "nested_record": {"nested_nested_string": "some deep insight"}, - } - to_insert = [ - { - "string_col": "Some value", - "record_col": record, - "bigfloat_col": 3.14, - "smallfloat_col": 2.72, - } - ] - rows = [json.dumps(row) for row in to_insert] - body = io.BytesIO("{}\n".format("\n".join(rows)).encode("ascii")) - table_id = "test_table" - dataset = self.temp_dataset(_make_dataset_id("nested_df")) - table = dataset.table(table_id) - self.to_delete.insert(0, table) - job_config = bigquery.LoadJobConfig() - job_config.write_disposition = WriteDisposition.WRITE_TRUNCATE - job_config.source_format = SourceFormat.NEWLINE_DELIMITED_JSON - job_config.schema = schema - # Load a table using a local JSON file from memory. - Config.CLIENT.load_table_from_file(body, table, job_config=job_config).result() - - df = Config.CLIENT.list_rows(table, selected_fields=schema).to_dataframe( - dtypes={"smallfloat_col": "float16"} - ) - - self.assertIsInstance(df, pandas.DataFrame) - self.assertEqual(len(df), 1) # verify the number of rows - exp_columns = ["string_col", "record_col", "bigfloat_col", "smallfloat_col"] - self.assertEqual(list(df), exp_columns) # verify the column names - row = df.iloc[0] - # verify the row content - self.assertEqual(row["string_col"], "Some value") - expected_keys = tuple(sorted(record.keys())) - row_keys = tuple(sorted(row["record_col"].keys())) - self.assertEqual(row_keys, expected_keys) - # Can't compare numpy arrays, which pyarrow encodes the embedded - # repeated column to, so convert to list. - self.assertEqual(list(row["record_col"]["nested_repeated"]), [0, 1, 2]) - # verify that nested data can be accessed with indices/keys - self.assertEqual(row["record_col"]["nested_repeated"][0], 0) - self.assertEqual( - row["record_col"]["nested_record"]["nested_nested_string"], - "some deep insight", - ) - # verify dtypes - self.assertEqual(df.dtypes["bigfloat_col"].name, "float64") - self.assertEqual(df.dtypes["smallfloat_col"].name, "float16") - def test_list_rows_empty_table(self): from google.cloud.bigquery.table import RowIterator @@ -2999,34 +2137,13 @@ def test_list_rows_page_size(self): page = next(pages) self.assertEqual(page.num_items, num_last_page) - @unittest.skipIf(pandas is None, "Requires `pandas`") - @unittest.skipIf( - bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" - ) - def test_list_rows_max_results_w_bqstorage(self): - table_ref = DatasetReference("bigquery-public-data", "utility_us").table( - "country_code_iso" - ) - bqstorage_client = bigquery_storage.BigQueryReadClient( - credentials=Config.CLIENT._credentials - ) - - row_iterator = Config.CLIENT.list_rows( - table_ref, - selected_fields=[bigquery.SchemaField("country_name", "STRING")], - max_results=100, - ) - dataframe = row_iterator.to_dataframe(bqstorage_client=bqstorage_client) - - self.assertEqual(len(dataframe.index), 100) - def temp_dataset(self, dataset_id, location=None): project = Config.CLIENT.project dataset_ref = bigquery.DatasetReference(project, dataset_id) dataset = Dataset(dataset_ref) if location: dataset.location = location - dataset = retry_403(Config.CLIENT.create_dataset)(dataset) + dataset = helpers.retry_403(Config.CLIENT.create_dataset)(dataset) self.to_delete.append(dataset) return dataset diff --git a/packages/google-cloud-bigquery/tests/system/test_pandas.py b/packages/google-cloud-bigquery/tests/system/test_pandas.py new file mode 100644 index 000000000000..1164e36da8b0 --- /dev/null +++ b/packages/google-cloud-bigquery/tests/system/test_pandas.py @@ -0,0 +1,801 @@ +# Copyright 2021 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""System tests for pandas connector.""" + +import collections +import datetime +import decimal +import json +import io +import operator + +import pkg_resources +import pytest +import pytz + +from google.cloud import bigquery +from google.cloud.bigquery._pandas_helpers import _BIGNUMERIC_SUPPORT +from . import helpers + + +bigquery_storage = pytest.importorskip( + "google.cloud.bigquery_storage", minversion="2.0.0" +) +pandas = pytest.importorskip("pandas", minversion="0.23.0") +pyarrow = pytest.importorskip("pyarrow", minversion="1.0.0") + + +PANDAS_INSTALLED_VERSION = pkg_resources.get_distribution("pandas").parsed_version +PANDAS_INT64_VERSION = pkg_resources.parse_version("1.0.0") + + +def test_load_table_from_dataframe_w_automatic_schema(bigquery_client, dataset_id): + """Test that a DataFrame with dtypes that map well to BigQuery types + can be uploaded without specifying a schema. + + https://github.com/googleapis/google-cloud-python/issues/9044 + """ + df_data = collections.OrderedDict( + [ + ("bool_col", pandas.Series([True, False, True], dtype="bool")), + ( + "ts_col", + pandas.Series( + [ + datetime.datetime(2010, 1, 2, 3, 44, 50), + datetime.datetime(2011, 2, 3, 14, 50, 59), + datetime.datetime(2012, 3, 14, 15, 16), + ], + dtype="datetime64[ns]", + ).dt.tz_localize(pytz.utc), + ), + ( + "dt_col", + pandas.Series( + [ + datetime.datetime(2010, 1, 2, 3, 44, 50), + datetime.datetime(2011, 2, 3, 14, 50, 59), + datetime.datetime(2012, 3, 14, 15, 16), + ], + dtype="datetime64[ns]", + ), + ), + ("float32_col", pandas.Series([1.0, 2.0, 3.0], dtype="float32")), + ("float64_col", pandas.Series([4.0, 5.0, 6.0], dtype="float64")), + ("int8_col", pandas.Series([-12, -11, -10], dtype="int8")), + ("int16_col", pandas.Series([-9, -8, -7], dtype="int16")), + ("int32_col", pandas.Series([-6, -5, -4], dtype="int32")), + ("int64_col", pandas.Series([-3, -2, -1], dtype="int64")), + ("uint8_col", pandas.Series([0, 1, 2], dtype="uint8")), + ("uint16_col", pandas.Series([3, 4, 5], dtype="uint16")), + ("uint32_col", pandas.Series([6, 7, 8], dtype="uint32")), + ] + ) + dataframe = pandas.DataFrame(df_data, columns=df_data.keys()) + + table_id = "{}.{}.load_table_from_dataframe_w_automatic_schema".format( + bigquery_client.project, dataset_id + ) + + load_job = bigquery_client.load_table_from_dataframe(dataframe, table_id) + load_job.result() + + table = bigquery_client.get_table(table_id) + assert tuple(table.schema) == ( + bigquery.SchemaField("bool_col", "BOOLEAN"), + bigquery.SchemaField("ts_col", "TIMESTAMP"), + # BigQuery does not support uploading DATETIME values from + # Parquet files. See: + # https://github.com/googleapis/google-cloud-python/issues/9996 + bigquery.SchemaField("dt_col", "TIMESTAMP"), + bigquery.SchemaField("float32_col", "FLOAT"), + bigquery.SchemaField("float64_col", "FLOAT"), + bigquery.SchemaField("int8_col", "INTEGER"), + bigquery.SchemaField("int16_col", "INTEGER"), + bigquery.SchemaField("int32_col", "INTEGER"), + bigquery.SchemaField("int64_col", "INTEGER"), + bigquery.SchemaField("uint8_col", "INTEGER"), + bigquery.SchemaField("uint16_col", "INTEGER"), + bigquery.SchemaField("uint32_col", "INTEGER"), + ) + assert table.num_rows == 3 + + +@pytest.mark.skipif( + PANDAS_INSTALLED_VERSION < PANDAS_INT64_VERSION, + reason="Only `pandas version >=1.0.0` is supported", +) +def test_load_table_from_dataframe_w_nullable_int64_datatype( + bigquery_client, dataset_id +): + """Test that a DataFrame containing column with None-type values and int64 datatype + can be uploaded if a BigQuery schema is specified. + + https://github.com/googleapis/python-bigquery/issues/22 + """ + table_id = "{}.{}.load_table_from_dataframe_w_nullable_int64_datatype".format( + bigquery_client.project, dataset_id + ) + table_schema = (bigquery.SchemaField("x", "INTEGER", mode="NULLABLE"),) + table = helpers.retry_403(bigquery_client.create_table)( + bigquery.Table(table_id, schema=table_schema) + ) + + df_data = collections.OrderedDict( + [("x", pandas.Series([1, 2, None, 4], dtype="Int64"))] + ) + dataframe = pandas.DataFrame(df_data, columns=df_data.keys()) + load_job = bigquery_client.load_table_from_dataframe(dataframe, table_id) + load_job.result() + table = bigquery_client.get_table(table_id) + assert tuple(table.schema) == (bigquery.SchemaField("x", "INTEGER"),) + assert table.num_rows == 4 + + +@pytest.mark.skipif( + PANDAS_INSTALLED_VERSION < PANDAS_INT64_VERSION, + reason="Only `pandas version >=1.0.0` is supported", +) +def test_load_table_from_dataframe_w_nullable_int64_datatype_automatic_schema( + bigquery_client, dataset_id +): + """Test that a DataFrame containing column with None-type values and int64 datatype + can be uploaded without specifying a schema. + + https://github.com/googleapis/python-bigquery/issues/22 + """ + + table_id = "{}.{}.load_table_from_dataframe_w_nullable_int64_datatype".format( + bigquery_client.project, dataset_id + ) + df_data = collections.OrderedDict( + [("x", pandas.Series([1, 2, None, 4], dtype="Int64"))] + ) + dataframe = pandas.DataFrame(df_data, columns=df_data.keys()) + load_job = bigquery_client.load_table_from_dataframe(dataframe, table_id) + load_job.result() + table = bigquery_client.get_table(table_id) + assert tuple(table.schema) == (bigquery.SchemaField("x", "INTEGER"),) + assert table.num_rows == 4 + + +def test_load_table_from_dataframe_w_nulls(bigquery_client, dataset_id): + """Test that a DataFrame with null columns can be uploaded if a + BigQuery schema is specified. + + See: https://github.com/googleapis/google-cloud-python/issues/7370 + """ + # Schema with all scalar types. + scalars_schema = ( + bigquery.SchemaField("bool_col", "BOOLEAN"), + bigquery.SchemaField("bytes_col", "BYTES"), + bigquery.SchemaField("date_col", "DATE"), + bigquery.SchemaField("dt_col", "DATETIME"), + bigquery.SchemaField("float_col", "FLOAT"), + bigquery.SchemaField("geo_col", "GEOGRAPHY"), + bigquery.SchemaField("int_col", "INTEGER"), + bigquery.SchemaField("num_col", "NUMERIC"), + bigquery.SchemaField("str_col", "STRING"), + bigquery.SchemaField("time_col", "TIME"), + bigquery.SchemaField("ts_col", "TIMESTAMP"), + ) + if _BIGNUMERIC_SUPPORT: + scalars_schema += (bigquery.SchemaField("bignum_col", "BIGNUMERIC"),) + + table_schema = scalars_schema + ( + # TODO: Array columns can't be read due to NULLABLE versus REPEATED + # mode mismatch. See: + # https://issuetracker.google.com/133415569#comment3 + # bigquery.SchemaField("array_col", "INTEGER", mode="REPEATED"), + # TODO: Support writing StructArrays to Parquet. See: + # https://jira.apache.org/jira/browse/ARROW-2587 + # bigquery.SchemaField("struct_col", "RECORD", fields=scalars_schema), + ) + num_rows = 100 + nulls = [None] * num_rows + df_data = [ + ("bool_col", nulls), + ("bytes_col", nulls), + ("date_col", nulls), + ("dt_col", nulls), + ("float_col", nulls), + ("geo_col", nulls), + ("int_col", nulls), + ("num_col", nulls), + ("str_col", nulls), + ("time_col", nulls), + ("ts_col", nulls), + ] + if _BIGNUMERIC_SUPPORT: + df_data.append(("bignum_col", nulls)) + df_data = collections.OrderedDict(df_data) + dataframe = pandas.DataFrame(df_data, columns=df_data.keys()) + + table_id = "{}.{}.load_table_from_dataframe_w_nulls".format( + bigquery_client.project, dataset_id + ) + + # Create the table before loading so that schema mismatch errors are + # identified. + table = helpers.retry_403(bigquery_client.create_table)( + bigquery.Table(table_id, schema=table_schema) + ) + + job_config = bigquery.LoadJobConfig(schema=table_schema) + load_job = bigquery_client.load_table_from_dataframe( + dataframe, table_id, job_config=job_config + ) + load_job.result() + + table = bigquery_client.get_table(table) + assert tuple(table.schema) == table_schema + assert table.num_rows == num_rows + + +def test_load_table_from_dataframe_w_required(bigquery_client, dataset_id): + """Test that a DataFrame with required columns can be uploaded if a + BigQuery schema is specified. + + See: https://github.com/googleapis/google-cloud-python/issues/8093 + """ + table_schema = ( + bigquery.SchemaField("name", "STRING", mode="REQUIRED"), + bigquery.SchemaField("age", "INTEGER", mode="REQUIRED"), + ) + + records = [{"name": "Chip", "age": 2}, {"name": "Dale", "age": 3}] + dataframe = pandas.DataFrame(records, columns=["name", "age"]) + job_config = bigquery.LoadJobConfig(schema=table_schema) + table_id = "{}.{}.load_table_from_dataframe_w_required".format( + bigquery_client.project, dataset_id + ) + + # Create the table before loading so that schema mismatch errors are + # identified. + table = helpers.retry_403(bigquery_client.create_table)( + bigquery.Table(table_id, schema=table_schema) + ) + + job_config = bigquery.LoadJobConfig(schema=table_schema) + load_job = bigquery_client.load_table_from_dataframe( + dataframe, table_id, job_config=job_config + ) + load_job.result() + + table = bigquery_client.get_table(table) + assert tuple(table.schema) == table_schema + assert table.num_rows == 2 + + +def test_load_table_from_dataframe_w_explicit_schema(bigquery_client, dataset_id): + # Schema with all scalar types. + # TODO: Uploading DATETIME columns currently fails, thus that field type + # is temporarily removed from the test. + # See: + # https://github.com/googleapis/python-bigquery/issues/61 + # https://issuetracker.google.com/issues/151765076 + scalars_schema = ( + bigquery.SchemaField("bool_col", "BOOLEAN"), + bigquery.SchemaField("bytes_col", "BYTES"), + bigquery.SchemaField("date_col", "DATE"), + # bigquery.SchemaField("dt_col", "DATETIME"), + bigquery.SchemaField("float_col", "FLOAT"), + bigquery.SchemaField("geo_col", "GEOGRAPHY"), + bigquery.SchemaField("int_col", "INTEGER"), + bigquery.SchemaField("num_col", "NUMERIC"), + bigquery.SchemaField("str_col", "STRING"), + bigquery.SchemaField("time_col", "TIME"), + bigquery.SchemaField("ts_col", "TIMESTAMP"), + ) + if _BIGNUMERIC_SUPPORT: + scalars_schema += (bigquery.SchemaField("bignum_col", "BIGNUMERIC"),) + + table_schema = scalars_schema + ( + # TODO: Array columns can't be read due to NULLABLE versus REPEATED + # mode mismatch. See: + # https://issuetracker.google.com/133415569#comment3 + # bigquery.SchemaField("array_col", "INTEGER", mode="REPEATED"), + # TODO: Support writing StructArrays to Parquet. See: + # https://jira.apache.org/jira/browse/ARROW-2587 + # bigquery.SchemaField("struct_col", "RECORD", fields=scalars_schema), + ) + + df_data = [ + ("bool_col", [True, None, False]), + ("bytes_col", [b"abc", None, b"def"]), + ("date_col", [datetime.date(1, 1, 1), None, datetime.date(9999, 12, 31)]), + # ( + # "dt_col", + # [ + # datetime.datetime(1, 1, 1, 0, 0, 0), + # None, + # datetime.datetime(9999, 12, 31, 23, 59, 59, 999999), + # ], + # ), + ("float_col", [float("-inf"), float("nan"), float("inf")]), + ( + "geo_col", + ["POINT(30 10)", None, "POLYGON ((30 10, 40 40, 20 40, 10 20, 30 10))"], + ), + ("int_col", [-9223372036854775808, None, 9223372036854775807]), + ( + "num_col", + [ + decimal.Decimal("-99999999999999999999999999999.999999999"), + None, + decimal.Decimal("99999999999999999999999999999.999999999"), + ], + ), + ("str_col", ["abc", None, "def"]), + ( + "time_col", + [datetime.time(0, 0, 0), None, datetime.time(23, 59, 59, 999999)], + ), + ( + "ts_col", + [ + datetime.datetime(1, 1, 1, 0, 0, 0, tzinfo=pytz.utc), + None, + datetime.datetime(9999, 12, 31, 23, 59, 59, 999999, tzinfo=pytz.utc), + ], + ), + ] + if _BIGNUMERIC_SUPPORT: + df_data.append( + ( + "bignum_col", + [ + decimal.Decimal("-{d38}.{d38}".format(d38="9" * 38)), + None, + decimal.Decimal("{d38}.{d38}".format(d38="9" * 38)), + ], + ) + ) + df_data = collections.OrderedDict(df_data) + dataframe = pandas.DataFrame(df_data, dtype="object", columns=df_data.keys()) + + table_id = "{}.{}.load_table_from_dataframe_w_explicit_schema".format( + bigquery_client.project, dataset_id + ) + + job_config = bigquery.LoadJobConfig(schema=table_schema) + load_job = bigquery_client.load_table_from_dataframe( + dataframe, table_id, job_config=job_config + ) + load_job.result() + + table = bigquery_client.get_table(table_id) + assert tuple(table.schema) == table_schema + assert table.num_rows == 3 + + +def test_load_table_from_dataframe_w_struct_datatype(bigquery_client, dataset_id): + """Test that a DataFrame with struct datatype can be uploaded if a + BigQuery schema is specified. + + https://github.com/googleapis/python-bigquery/issues/21 + """ + table_id = "{}.{}.load_table_from_dataframe_w_struct_datatype".format( + bigquery_client.project, dataset_id + ) + table_schema = [ + bigquery.SchemaField( + "bar", + "RECORD", + fields=[ + bigquery.SchemaField("id", "INTEGER", mode="REQUIRED"), + bigquery.SchemaField("age", "INTEGER", mode="REQUIRED"), + ], + mode="REQUIRED", + ), + ] + table = helpers.retry_403(bigquery_client.create_table)( + bigquery.Table(table_id, schema=table_schema) + ) + + df_data = [{"id": 1, "age": 21}, {"id": 2, "age": 22}, {"id": 2, "age": 23}] + dataframe = pandas.DataFrame(data={"bar": df_data}, columns=["bar"]) + + load_job = bigquery_client.load_table_from_dataframe(dataframe, table_id) + load_job.result() + + table = bigquery_client.get_table(table_id) + assert table.schema == table_schema + assert table.num_rows == 3 + + +def test_load_table_from_dataframe_w_explicit_schema_source_format_csv( + bigquery_client, dataset_id +): + from google.cloud.bigquery.job import SourceFormat + + table_schema = ( + bigquery.SchemaField("bool_col", "BOOLEAN"), + bigquery.SchemaField("bytes_col", "BYTES"), + bigquery.SchemaField("date_col", "DATE"), + bigquery.SchemaField("dt_col", "DATETIME"), + bigquery.SchemaField("float_col", "FLOAT"), + bigquery.SchemaField("geo_col", "GEOGRAPHY"), + bigquery.SchemaField("int_col", "INTEGER"), + bigquery.SchemaField("num_col", "NUMERIC"), + bigquery.SchemaField("bignum_col", "BIGNUMERIC"), + bigquery.SchemaField("str_col", "STRING"), + bigquery.SchemaField("time_col", "TIME"), + bigquery.SchemaField("ts_col", "TIMESTAMP"), + ) + df_data = collections.OrderedDict( + [ + ("bool_col", [True, None, False]), + ("bytes_col", ["abc", None, "def"]), + ("date_col", [datetime.date(1, 1, 1), None, datetime.date(9999, 12, 31)],), + ( + "dt_col", + [ + datetime.datetime(1, 1, 1, 0, 0, 0), + None, + datetime.datetime(9999, 12, 31, 23, 59, 59, 999999), + ], + ), + ("float_col", [float("-inf"), float("nan"), float("inf")]), + ( + "geo_col", + [ + "POINT(30 10)", + None, + "POLYGON ((30 10, 40 40, 20 40, 10 20, 30 10))", + ], + ), + ("int_col", [-9223372036854775808, None, 9223372036854775807]), + ( + "num_col", + [ + decimal.Decimal("-99999999999999999999999999999.999999999"), + None, + decimal.Decimal("99999999999999999999999999999.999999999"), + ], + ), + ( + "bignum_col", + [ + decimal.Decimal("-{d38}.{d38}".format(d38="9" * 38)), + None, + decimal.Decimal("{d38}.{d38}".format(d38="9" * 38)), + ], + ), + ("str_col", ["abc", None, "def"]), + ( + "time_col", + [datetime.time(0, 0, 0), None, datetime.time(23, 59, 59, 999999)], + ), + ( + "ts_col", + [ + datetime.datetime(1, 1, 1, 0, 0, 0, tzinfo=pytz.utc), + None, + datetime.datetime( + 9999, 12, 31, 23, 59, 59, 999999, tzinfo=pytz.utc + ), + ], + ), + ] + ) + dataframe = pandas.DataFrame(df_data, dtype="object", columns=df_data.keys()) + + table_id = "{}.{}.load_table_from_dataframe_w_explicit_schema_csv".format( + bigquery_client.project, dataset_id + ) + + job_config = bigquery.LoadJobConfig( + schema=table_schema, source_format=SourceFormat.CSV + ) + load_job = bigquery_client.load_table_from_dataframe( + dataframe, table_id, job_config=job_config + ) + load_job.result() + + table = bigquery_client.get_table(table_id) + assert tuple(table.schema) == table_schema + assert table.num_rows == 3 + + +def test_load_table_from_dataframe_w_explicit_schema_source_format_csv_floats( + bigquery_client, dataset_id +): + from google.cloud.bigquery.job import SourceFormat + + table_schema = (bigquery.SchemaField("float_col", "FLOAT"),) + df_data = collections.OrderedDict( + [ + ( + "float_col", + [ + 0.14285714285714285, + 0.51428571485748, + 0.87128748, + 1.807960649, + 2.0679610649, + 2.4406779661016949, + 3.7148514257, + 3.8571428571428572, + 1.51251252e40, + ], + ), + ] + ) + dataframe = pandas.DataFrame(df_data, dtype="object", columns=df_data.keys()) + + table_id = "{}.{}.load_table_from_dataframe_w_explicit_schema_csv".format( + bigquery_client.project, dataset_id + ) + + job_config = bigquery.LoadJobConfig( + schema=table_schema, source_format=SourceFormat.CSV + ) + load_job = bigquery_client.load_table_from_dataframe( + dataframe, table_id, job_config=job_config + ) + load_job.result() + + table = bigquery_client.get_table(table_id) + rows = bigquery_client.list_rows(table_id) + floats = [r.values()[0] for r in rows] + assert tuple(table.schema) == table_schema + assert table.num_rows == 9 + assert floats == df_data["float_col"] + + +def test_query_results_to_dataframe(bigquery_client): + QUERY = """ + SELECT id, author, time_ts, dead + FROM `bigquery-public-data.hacker_news.comments` + LIMIT 10 + """ + + df = bigquery_client.query(QUERY).result().to_dataframe() + + assert isinstance(df, pandas.DataFrame) + assert len(df) == 10 # verify the number of rows + column_names = ["id", "author", "time_ts", "dead"] + assert list(df) == column_names # verify the column names + exp_datatypes = { + "id": int, + "author": str, + "time_ts": pandas.Timestamp, + "dead": bool, + } + for _, row in df.iterrows(): + for col in column_names: + # all the schema fields are nullable, so None is acceptable + if not row[col] is None: + assert isinstance(row[col], exp_datatypes[col]) + + +def test_query_results_to_dataframe_w_bqstorage(bigquery_client): + query = """ + SELECT id, author, time_ts, dead + FROM `bigquery-public-data.hacker_news.comments` + LIMIT 10 + """ + + bqstorage_client = bigquery_storage.BigQueryReadClient( + credentials=bigquery_client._credentials + ) + + df = bigquery_client.query(query).result().to_dataframe(bqstorage_client) + + assert isinstance(df, pandas.DataFrame) + assert len(df) == 10 # verify the number of rows + column_names = ["id", "author", "time_ts", "dead"] + assert list(df) == column_names + exp_datatypes = { + "id": int, + "author": str, + "time_ts": pandas.Timestamp, + "dead": bool, + } + for index, row in df.iterrows(): + for col in column_names: + # all the schema fields are nullable, so None is acceptable + if not row[col] is None: + assert isinstance(row[col], exp_datatypes[col]) + + +def test_insert_rows_from_dataframe(bigquery_client, dataset_id): + SF = bigquery.SchemaField + schema = [ + SF("float_col", "FLOAT", mode="REQUIRED"), + SF("int_col", "INTEGER", mode="REQUIRED"), + SF("bool_col", "BOOLEAN", mode="REQUIRED"), + SF("string_col", "STRING", mode="NULLABLE"), + ] + + dataframe = pandas.DataFrame( + [ + { + "float_col": 1.11, + "bool_col": True, + "string_col": "my string", + "int_col": 10, + }, + { + "float_col": 2.22, + "bool_col": False, + "string_col": "another string", + "int_col": 20, + }, + { + "float_col": 3.33, + "bool_col": False, + "string_col": "another string", + "int_col": 30, + }, + { + "float_col": 4.44, + "bool_col": True, + "string_col": "another string", + "int_col": 40, + }, + { + "float_col": 5.55, + "bool_col": False, + "string_col": "another string", + "int_col": 50, + }, + { + "float_col": 6.66, + "bool_col": True, + # Include a NaN value, because pandas often uses NaN as a + # NULL value indicator. + "string_col": float("NaN"), + "int_col": 60, + }, + ] + ) + + table_id = f"{bigquery_client.project}.{dataset_id}.test_insert_rows_from_dataframe" + table_arg = bigquery.Table(table_id, schema=schema) + table = helpers.retry_403(bigquery_client.create_table)(table_arg) + + chunk_errors = bigquery_client.insert_rows_from_dataframe( + table, dataframe, chunk_size=3 + ) + for errors in chunk_errors: + assert not errors + + # Use query to fetch rows instead of listing directly from the table so + # that we get values from the streaming buffer. + rows = list( + bigquery_client.query( + "SELECT * FROM `{}.{}.{}`".format( + table.project, table.dataset_id, table.table_id + ) + ) + ) + + sorted_rows = sorted(rows, key=operator.attrgetter("int_col")) + row_tuples = [r.values() for r in sorted_rows] + expected = [ + # Pandas often represents NULL values as NaN. Convert to None for + # easier comparison. + tuple(None if col != col else col for col in data_row) + for data_row in dataframe.itertuples(index=False) + ] + + assert len(row_tuples) == len(expected) + + for row, expected_row in zip(row_tuples, expected): + assert ( + # Use Counter to verify the same number of values in each, because + # column order does not matter. + collections.Counter(row) + == collections.Counter(expected_row) + ) + + +def test_nested_table_to_dataframe(bigquery_client, dataset_id): + from google.cloud.bigquery.job import SourceFormat + from google.cloud.bigquery.job import WriteDisposition + + SF = bigquery.SchemaField + schema = [ + SF("string_col", "STRING", mode="NULLABLE"), + SF( + "record_col", + "RECORD", + mode="NULLABLE", + fields=[ + SF("nested_string", "STRING", mode="NULLABLE"), + SF("nested_repeated", "INTEGER", mode="REPEATED"), + SF( + "nested_record", + "RECORD", + mode="NULLABLE", + fields=[SF("nested_nested_string", "STRING", mode="NULLABLE")], + ), + ], + ), + SF("bigfloat_col", "FLOAT", mode="NULLABLE"), + SF("smallfloat_col", "FLOAT", mode="NULLABLE"), + ] + record = { + "nested_string": "another string value", + "nested_repeated": [0, 1, 2], + "nested_record": {"nested_nested_string": "some deep insight"}, + } + to_insert = [ + { + "string_col": "Some value", + "record_col": record, + "bigfloat_col": 3.14, + "smallfloat_col": 2.72, + } + ] + rows = [json.dumps(row) for row in to_insert] + body = io.BytesIO("{}\n".format("\n".join(rows)).encode("ascii")) + table_id = f"{bigquery_client.project}.{dataset_id}.test_nested_table_to_dataframe" + job_config = bigquery.LoadJobConfig() + job_config.write_disposition = WriteDisposition.WRITE_TRUNCATE + job_config.source_format = SourceFormat.NEWLINE_DELIMITED_JSON + job_config.schema = schema + # Load a table using a local JSON file from memory. + bigquery_client.load_table_from_file(body, table_id, job_config=job_config).result() + + df = bigquery_client.list_rows(table_id, selected_fields=schema).to_dataframe( + dtypes={"smallfloat_col": "float16"} + ) + + assert isinstance(df, pandas.DataFrame) + assert len(df) == 1 # verify the number of rows + exp_columns = ["string_col", "record_col", "bigfloat_col", "smallfloat_col"] + assert list(df) == exp_columns # verify the column names + row = df.iloc[0] + # verify the row content + assert row["string_col"] == "Some value" + expected_keys = tuple(sorted(record.keys())) + row_keys = tuple(sorted(row["record_col"].keys())) + assert row_keys == expected_keys + # Can't compare numpy arrays, which pyarrow encodes the embedded + # repeated column to, so convert to list. + assert list(row["record_col"]["nested_repeated"]) == [0, 1, 2] + # verify that nested data can be accessed with indices/keys + assert row["record_col"]["nested_repeated"][0] == 0 + assert ( + row["record_col"]["nested_record"]["nested_nested_string"] + == "some deep insight" + ) + # verify dtypes + assert df.dtypes["bigfloat_col"].name == "float64" + assert df.dtypes["smallfloat_col"].name == "float16" + + +def test_list_rows_max_results_w_bqstorage(bigquery_client): + table_ref = bigquery.DatasetReference("bigquery-public-data", "utility_us").table( + "country_code_iso" + ) + bqstorage_client = bigquery_storage.BigQueryReadClient( + credentials=bigquery_client._credentials + ) + + row_iterator = bigquery_client.list_rows( + table_ref, + selected_fields=[bigquery.SchemaField("country_name", "STRING")], + max_results=100, + ) + with pytest.warns( + UserWarning, match="Cannot use bqstorage_client if max_results is set" + ): + dataframe = row_iterator.to_dataframe(bqstorage_client=bqstorage_client) + + assert len(dataframe.index) == 100 From 6a908a1d8cf41c31ea06d467795ad7e5e216a6ee Mon Sep 17 00:00:00 2001 From: Yoshi Automation Bot Date: Tue, 16 Mar 2021 08:13:39 -0700 Subject: [PATCH 1087/2016] chore: add pre-commit-config to renovate ignore paths (#552) Disable renovate PRs on the .pre-commit-config.yaml which is templated from synthtool. https://docs.renovatebot.com/configuration-options/#ignorepaths Source-Author: Bu Sun Kim <8822365+busunkim96@users.noreply.github.com> Source-Date: Mon Mar 15 09:05:39 2021 -0600 Source-Repo: googleapis/synthtool Source-Sha: 2c54c473779ea731128cea61a3a6c975a08a5378 Source-Link: https://github.com/googleapis/synthtool/commit/2c54c473779ea731128cea61a3a6c975a08a5378 --- packages/google-cloud-bigquery/renovate.json | 3 ++- packages/google-cloud-bigquery/synth.metadata | 6 +++--- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/packages/google-cloud-bigquery/renovate.json b/packages/google-cloud-bigquery/renovate.json index 4fa949311b20..f08bc22c9a55 100644 --- a/packages/google-cloud-bigquery/renovate.json +++ b/packages/google-cloud-bigquery/renovate.json @@ -1,5 +1,6 @@ { "extends": [ "config:base", ":preserveSemverRanges" - ] + ], + "ignorePaths": [".pre-commit-config.yaml"] } diff --git a/packages/google-cloud-bigquery/synth.metadata b/packages/google-cloud-bigquery/synth.metadata index cab985521217..9f81d304598a 100644 --- a/packages/google-cloud-bigquery/synth.metadata +++ b/packages/google-cloud-bigquery/synth.metadata @@ -4,7 +4,7 @@ "git": { "name": ".", "remote": "https://github.com/googleapis/python-bigquery.git", - "sha": "f0259eb7ed4ff254ee238e87651992ff93481dae" + "sha": "efdf1c653770f7c03c17e31e3c2f279bb685637b" } }, { @@ -19,14 +19,14 @@ "git": { "name": "synthtool", "remote": "https://github.com/googleapis/synthtool.git", - "sha": "4dca4132c6d63788c6675e1b1e11e7b9225f8694" + "sha": "2c54c473779ea731128cea61a3a6c975a08a5378" } }, { "git": { "name": "synthtool", "remote": "https://github.com/googleapis/synthtool.git", - "sha": "4dca4132c6d63788c6675e1b1e11e7b9225f8694" + "sha": "2c54c473779ea731128cea61a3a6c975a08a5378" } } ], From 7beceec6376c341638080c3d0cbdce7d1ae9728f Mon Sep 17 00:00:00 2001 From: Peter Lamut Date: Tue, 16 Mar 2021 16:24:21 +0100 Subject: [PATCH 1088/2016] fix: remove DB-API dependency on pyarrow with decimal query parameters (#551) * fix: DB API pyarrow dependency with decimal values DB API should gracefully handle the case when the optional pyarrow dependency is not installed. * Blacken DB API helpers tests * Refine the logic for recognizing NUMERIC Decimals --- .../google/cloud/bigquery/dbapi/_helpers.py | 23 ++++--- .../tests/unit/test_dbapi__helpers.py | 66 +++++++++++++------ 2 files changed, 60 insertions(+), 29 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/dbapi/_helpers.py b/packages/google-cloud-bigquery/google/cloud/bigquery/dbapi/_helpers.py index 6b36d6e433bf..69694c98c0cd 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/dbapi/_helpers.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/dbapi/_helpers.py @@ -19,16 +19,15 @@ import functools import numbers -try: - import pyarrow -except ImportError: # pragma: NO COVER - pyarrow = None - from google.cloud import bigquery from google.cloud.bigquery import table from google.cloud.bigquery.dbapi import exceptions +_NUMERIC_SERVER_MIN = decimal.Decimal("-9.9999999999999999999999999999999999999E+28") +_NUMERIC_SERVER_MAX = decimal.Decimal("9.9999999999999999999999999999999999999E+28") + + def scalar_to_query_parameter(value, name=None): """Convert a scalar value into a query parameter. @@ -189,12 +188,20 @@ def bigquery_scalar_type(value): elif isinstance(value, numbers.Real): return "FLOAT64" elif isinstance(value, decimal.Decimal): - # We check for NUMERIC before BIGNUMERIC in order to support pyarrow < 3.0. - scalar_object = pyarrow.scalar(value) - if isinstance(scalar_object, pyarrow.Decimal128Scalar): + vtuple = value.as_tuple() + # NUMERIC values have precision of 38 (number of digits) and scale of 9 (number + # of fractional digits), and their max absolute value must be strictly smaller + # than 1.0E+29. + # https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#decimal_types + if ( + len(vtuple.digits) <= 38 # max precision: 38 + and vtuple.exponent >= -9 # max scale: 9 + and _NUMERIC_SERVER_MIN <= value <= _NUMERIC_SERVER_MAX + ): return "NUMERIC" else: return "BIGNUMERIC" + elif isinstance(value, str): return "STRING" elif isinstance(value, bytes): diff --git a/packages/google-cloud-bigquery/tests/unit/test_dbapi__helpers.py b/packages/google-cloud-bigquery/tests/unit/test_dbapi__helpers.py index c28c014d48f9..9a505c1ec3b3 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_dbapi__helpers.py +++ b/packages/google-cloud-bigquery/tests/unit/test_dbapi__helpers.py @@ -25,7 +25,6 @@ import google.cloud._helpers from google.cloud.bigquery import table -from google.cloud.bigquery._pandas_helpers import _BIGNUMERIC_SUPPORT from google.cloud.bigquery.dbapi import _helpers from google.cloud.bigquery.dbapi import exceptions from tests.unit.helpers import _to_pyarrow @@ -39,9 +38,8 @@ def test_scalar_to_query_parameter(self): (123, "INT64"), (-123456789, "INT64"), (1.25, "FLOAT64"), - (decimal.Decimal("1.25"), "NUMERIC"), (b"I am some bytes", "BYTES"), - (u"I am a string", "STRING"), + ("I am a string", "STRING"), (datetime.date(2017, 4, 1), "DATE"), (datetime.time(12, 34, 56), "TIME"), (datetime.datetime(2012, 3, 4, 5, 6, 7), "DATETIME"), @@ -51,14 +49,17 @@ def test_scalar_to_query_parameter(self): ), "TIMESTAMP", ), + (decimal.Decimal("1.25"), "NUMERIC"), + (decimal.Decimal("9.9999999999999999999999999999999999999E+28"), "NUMERIC"), + (decimal.Decimal("1.0E+29"), "BIGNUMERIC"), # more than max NUMERIC value + (decimal.Decimal("1.123456789"), "NUMERIC"), + (decimal.Decimal("1.1234567891"), "BIGNUMERIC"), # scale > 9 + (decimal.Decimal("12345678901234567890123456789.012345678"), "NUMERIC"), + ( + decimal.Decimal("12345678901234567890123456789012345678"), + "BIGNUMERIC", # larger than max NUMERIC value, despite precision <=38 + ), ] - if _BIGNUMERIC_SUPPORT: - expected_types.append( - ( - decimal.Decimal("1.1234567890123456789012345678901234567890"), - "BIGNUMERIC", - ) - ) for value, expected_type in expected_types: msg = "value: {} expected_type: {}".format(value, expected_type) @@ -71,6 +72,33 @@ def test_scalar_to_query_parameter(self): self.assertEqual(named_parameter.type_, expected_type, msg=msg) self.assertEqual(named_parameter.value, value, msg=msg) + def test_decimal_to_query_parameter(self): # TODO: merge with previous test + + expected_types = [ + (decimal.Decimal("9.9999999999999999999999999999999999999E+28"), "NUMERIC"), + (decimal.Decimal("1.0E+29"), "BIGNUMERIC"), # more than max value + (decimal.Decimal("1.123456789"), "NUMERIC"), + (decimal.Decimal("1.1234567891"), "BIGNUMERIC"), # scale > 9 + (decimal.Decimal("12345678901234567890123456789.012345678"), "NUMERIC"), + ( + decimal.Decimal("12345678901234567890123456789012345678"), + "BIGNUMERIC", # larger than max size, even if precision <=38 + ), + ] + + for value, expected_type in expected_types: + msg = f"value: {value} expected_type: {expected_type}" + + parameter = _helpers.scalar_to_query_parameter(value) + self.assertIsNone(parameter.name, msg=msg) + self.assertEqual(parameter.type_, expected_type, msg=msg) + self.assertEqual(parameter.value, value, msg=msg) + + named_parameter = _helpers.scalar_to_query_parameter(value, name="myvar") + self.assertEqual(named_parameter.name, "myvar", msg=msg) + self.assertEqual(named_parameter.type_, expected_type, msg=msg) + self.assertEqual(named_parameter.value, value, msg=msg) + def test_scalar_to_query_parameter_w_unexpected_type(self): with self.assertRaises(exceptions.ProgrammingError): _helpers.scalar_to_query_parameter(value={"a": "dictionary"}) @@ -89,8 +117,9 @@ def test_array_to_query_parameter_valid_argument(self): ([123, -456, 0], "INT64"), ([1.25, 2.50], "FLOAT64"), ([decimal.Decimal("1.25")], "NUMERIC"), + ([decimal.Decimal("{d38}.{d38}".format(d38="9" * 38))], "BIGNUMERIC"), ([b"foo", b"bar"], "BYTES"), - ([u"foo", u"bar"], "STRING"), + (["foo", "bar"], "STRING"), ([datetime.date(2017, 4, 1), datetime.date(2018, 4, 1)], "DATE"), ([datetime.time(12, 34, 56), datetime.time(10, 20, 30)], "TIME"), ( @@ -113,11 +142,6 @@ def test_array_to_query_parameter_valid_argument(self): ), ] - if _BIGNUMERIC_SUPPORT: - expected_types.append( - ([decimal.Decimal("{d38}.{d38}".format(d38="9" * 38))], "BIGNUMERIC") - ) - for values, expected_type in expected_types: msg = "value: {} expected_type: {}".format(values, expected_type) parameter = _helpers.array_to_query_parameter(values) @@ -134,7 +158,7 @@ def test_array_to_query_parameter_empty_argument(self): _helpers.array_to_query_parameter([]) def test_array_to_query_parameter_unsupported_sequence(self): - unsupported_iterables = [{10, 20, 30}, u"foo", b"bar", bytearray([65, 75, 85])] + unsupported_iterables = [{10, 20, 30}, "foo", b"bar", bytearray([65, 75, 85])] for iterable in unsupported_iterables: with self.assertRaises(exceptions.ProgrammingError): _helpers.array_to_query_parameter(iterable) @@ -144,7 +168,7 @@ def test_array_to_query_parameter_sequence_w_invalid_elements(self): _helpers.array_to_query_parameter([object(), 2, 7]) def test_to_query_parameters_w_dict(self): - parameters = {"somebool": True, "somestring": u"a-string-value"} + parameters = {"somebool": True, "somestring": "a-string-value"} query_parameters = _helpers.to_query_parameters(parameters) query_parameter_tuples = [] for param in query_parameters: @@ -154,7 +178,7 @@ def test_to_query_parameters_w_dict(self): sorted( [ ("somebool", "BOOL", True), - ("somestring", "STRING", u"a-string-value"), + ("somestring", "STRING", "a-string-value"), ] ), ) @@ -177,14 +201,14 @@ def test_to_query_parameters_w_dict_dict_param(self): _helpers.to_query_parameters(parameters) def test_to_query_parameters_w_list(self): - parameters = [True, u"a-string-value"] + parameters = [True, "a-string-value"] query_parameters = _helpers.to_query_parameters(parameters) query_parameter_tuples = [] for param in query_parameters: query_parameter_tuples.append((param.name, param.type_, param.value)) self.assertSequenceEqual( sorted(query_parameter_tuples), - sorted([(None, "BOOL", True), (None, "STRING", u"a-string-value")]), + sorted([(None, "BOOL", True), (None, "STRING", "a-string-value")]), ) def test_to_query_parameters_w_list_array_param(self): From 10a16cb5b0edaadf37f7a298aafd9dc330bd5632 Mon Sep 17 00:00:00 2001 From: "release-please[bot]" <55107282+release-please[bot]@users.noreply.github.com> Date: Tue, 16 Mar 2021 15:36:02 +0000 Subject: [PATCH 1089/2016] chore: release 2.12.0 (#547) :robot: I have created a release \*beep\* \*boop\* --- ## [2.12.0](https://www.github.com/googleapis/python-bigquery/compare/v2.11.0...v2.12.0) (2021-03-16) ### Features * make QueryJob.done() method more performant ([#544](https://www.github.com/googleapis/python-bigquery/issues/544)) ([a3ab9ef](https://www.github.com/googleapis/python-bigquery/commit/a3ab9efdd0758829845cfcb6ca0ac1f03ab44f64)) ### Bug Fixes * remove DB-API dependency on pyarrow with decimal query parameters ([#551](https://www.github.com/googleapis/python-bigquery/issues/551)) ([1b946ba](https://www.github.com/googleapis/python-bigquery/commit/1b946ba23ee7df86114c6acb338ec34e6c92af6d)) --- This PR was generated with [Release Please](https://github.com/googleapis/release-please). See [documentation](https://github.com/googleapis/release-please#release-please). --- packages/google-cloud-bigquery/CHANGELOG.md | 12 ++++++++++++ .../google/cloud/bigquery/version.py | 2 +- 2 files changed, 13 insertions(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/CHANGELOG.md b/packages/google-cloud-bigquery/CHANGELOG.md index 512d38108a57..e5ed7bc9dc19 100644 --- a/packages/google-cloud-bigquery/CHANGELOG.md +++ b/packages/google-cloud-bigquery/CHANGELOG.md @@ -4,6 +4,18 @@ [1]: https://pypi.org/project/google-cloud-bigquery/#history +## [2.12.0](https://www.github.com/googleapis/python-bigquery/compare/v2.11.0...v2.12.0) (2021-03-16) + + +### Features + +* make QueryJob.done() method more performant ([#544](https://www.github.com/googleapis/python-bigquery/issues/544)) ([a3ab9ef](https://www.github.com/googleapis/python-bigquery/commit/a3ab9efdd0758829845cfcb6ca0ac1f03ab44f64)) + + +### Bug Fixes + +* remove DB-API dependency on pyarrow with decimal query parameters ([#551](https://www.github.com/googleapis/python-bigquery/issues/551)) ([1b946ba](https://www.github.com/googleapis/python-bigquery/commit/1b946ba23ee7df86114c6acb338ec34e6c92af6d)) + ## [2.11.0](https://www.github.com/googleapis/python-bigquery/compare/v2.10.0...v2.11.0) (2021-03-09) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/version.py b/packages/google-cloud-bigquery/google/cloud/bigquery/version.py index e6e357434ce4..67e043bde2a0 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/version.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/version.py @@ -12,4 +12,4 @@ # See the License for the specific language governing permissions and # limitations under the License. -__version__ = "2.11.0" +__version__ = "2.12.0" From 95289f5c61127ec53044261bc230a0ea85b48359 Mon Sep 17 00:00:00 2001 From: Peter Lamut Date: Tue, 16 Mar 2021 18:26:04 +0100 Subject: [PATCH 1090/2016] test: remove duplicate test for Decimal scalars (#553) --- .../tests/unit/test_dbapi__helpers.py | 27 ------------------- 1 file changed, 27 deletions(-) diff --git a/packages/google-cloud-bigquery/tests/unit/test_dbapi__helpers.py b/packages/google-cloud-bigquery/tests/unit/test_dbapi__helpers.py index 9a505c1ec3b3..4b2724de0bd1 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_dbapi__helpers.py +++ b/packages/google-cloud-bigquery/tests/unit/test_dbapi__helpers.py @@ -72,33 +72,6 @@ def test_scalar_to_query_parameter(self): self.assertEqual(named_parameter.type_, expected_type, msg=msg) self.assertEqual(named_parameter.value, value, msg=msg) - def test_decimal_to_query_parameter(self): # TODO: merge with previous test - - expected_types = [ - (decimal.Decimal("9.9999999999999999999999999999999999999E+28"), "NUMERIC"), - (decimal.Decimal("1.0E+29"), "BIGNUMERIC"), # more than max value - (decimal.Decimal("1.123456789"), "NUMERIC"), - (decimal.Decimal("1.1234567891"), "BIGNUMERIC"), # scale > 9 - (decimal.Decimal("12345678901234567890123456789.012345678"), "NUMERIC"), - ( - decimal.Decimal("12345678901234567890123456789012345678"), - "BIGNUMERIC", # larger than max size, even if precision <=38 - ), - ] - - for value, expected_type in expected_types: - msg = f"value: {value} expected_type: {expected_type}" - - parameter = _helpers.scalar_to_query_parameter(value) - self.assertIsNone(parameter.name, msg=msg) - self.assertEqual(parameter.type_, expected_type, msg=msg) - self.assertEqual(parameter.value, value, msg=msg) - - named_parameter = _helpers.scalar_to_query_parameter(value, name="myvar") - self.assertEqual(named_parameter.name, "myvar", msg=msg) - self.assertEqual(named_parameter.type_, expected_type, msg=msg) - self.assertEqual(named_parameter.value, value, msg=msg) - def test_scalar_to_query_parameter_w_unexpected_type(self): with self.assertRaises(exceptions.ProgrammingError): _helpers.scalar_to_query_parameter(value={"a": "dictionary"}) From 1223734b823dc406da9df72b5d4ba7f0b5335925 Mon Sep 17 00:00:00 2001 From: WhiteSource Renovate Date: Tue, 16 Mar 2021 18:49:35 +0100 Subject: [PATCH 1091/2016] chore(deps): update dependency google-cloud-bigquery to v2.12.0 (#554) --- .../google-cloud-bigquery/samples/geography/requirements.txt | 2 +- .../google-cloud-bigquery/samples/snippets/requirements.txt | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/packages/google-cloud-bigquery/samples/geography/requirements.txt b/packages/google-cloud-bigquery/samples/geography/requirements.txt index 34896627e68f..ef9264454d63 100644 --- a/packages/google-cloud-bigquery/samples/geography/requirements.txt +++ b/packages/google-cloud-bigquery/samples/geography/requirements.txt @@ -1,3 +1,3 @@ geojson==2.5.0 -google-cloud-bigquery==2.11.0 +google-cloud-bigquery==2.12.0 Shapely==1.7.1 diff --git a/packages/google-cloud-bigquery/samples/snippets/requirements.txt b/packages/google-cloud-bigquery/samples/snippets/requirements.txt index b55d2b3a4a25..db1c4b66a631 100644 --- a/packages/google-cloud-bigquery/samples/snippets/requirements.txt +++ b/packages/google-cloud-bigquery/samples/snippets/requirements.txt @@ -1,4 +1,4 @@ -google-cloud-bigquery==2.11.0 +google-cloud-bigquery==2.12.0 google-cloud-bigquery-storage==2.3.0 google-auth-oauthlib==0.4.3 grpcio==1.36.1 From 9a6cb44d8111f9ced74529dc8c13684d1fcdf48f Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Fri, 19 Mar 2021 12:54:53 -0500 Subject: [PATCH 1092/2016] fix: avoid policy tags 403 error in `load_table_from_dataframe` (#557) * WIP: fix: don't set policy tags in load job from dataframe * copy fields parameter for struct support * update tests to allow missing description property * fix load from dataframe test on python 3.6 Also, check that sent schema matches DataFrame order, not table order --- .../google/cloud/bigquery/client.py | 13 +- .../google/cloud/bigquery/schema.py | 43 ++++--- .../tests/unit/job/test_load_config.py | 12 +- .../tests/unit/test_client.py | 113 ++++++++++++------ .../tests/unit/test_external_config.py | 9 +- .../tests/unit/test_schema.py | 109 +++++++---------- 6 files changed, 150 insertions(+), 149 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py index bdbcb767cb70..305d60d3be01 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py @@ -2291,9 +2291,18 @@ def load_table_from_dataframe( name for name, _ in _pandas_helpers.list_columns_and_indexes(dataframe) ) - # schema fields not present in the dataframe are not needed job_config.schema = [ - field for field in table.schema if field.name in columns_and_indexes + # Field description and policy tags are not needed to + # serialize a data frame. + SchemaField( + field.name, + field.field_type, + mode=field.mode, + fields=field.fields, + ) + # schema fields not present in the dataframe are not needed + for field in table.schema + if field.name in columns_and_indexes ] job_config.schema = _pandas_helpers.dataframe_to_bq_schema( diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/schema.py b/packages/google-cloud-bigquery/google/cloud/bigquery/schema.py index 9be27f3e8f8f..680dcc13850c 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/schema.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/schema.py @@ -19,6 +19,7 @@ from google.cloud.bigquery_v2 import types +_DEFAULT_VALUE = object() _STRUCT_TYPES = ("RECORD", "STRUCT") # SQL types reference: @@ -73,14 +74,18 @@ def __init__( name, field_type, mode="NULLABLE", - description=None, + description=_DEFAULT_VALUE, fields=(), policy_tags=None, ): - self._name = name - self._field_type = field_type - self._mode = mode - self._description = description + self._properties = { + "name": name, + "type": field_type, + } + if mode is not None: + self._properties["mode"] = mode.upper() + if description is not _DEFAULT_VALUE: + self._properties["description"] = description self._fields = tuple(fields) self._policy_tags = policy_tags @@ -98,7 +103,7 @@ def from_api_repr(cls, api_repr): """ # Handle optional properties with default values mode = api_repr.get("mode", "NULLABLE") - description = api_repr.get("description") + description = api_repr.get("description", _DEFAULT_VALUE) fields = api_repr.get("fields", ()) return cls( @@ -113,7 +118,7 @@ def from_api_repr(cls, api_repr): @property def name(self): """str: The name of the field.""" - return self._name + return self._properties["name"] @property def field_type(self): @@ -122,7 +127,7 @@ def field_type(self): See: https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#TableFieldSchema.FIELDS.type """ - return self._field_type + return self._properties["type"] @property def mode(self): @@ -131,17 +136,17 @@ def mode(self): See: https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#TableFieldSchema.FIELDS.mode """ - return self._mode + return self._properties.get("mode") @property def is_nullable(self): """bool: whether 'mode' is 'nullable'.""" - return self._mode == "NULLABLE" + return self.mode == "NULLABLE" @property def description(self): """Optional[str]: description for the field.""" - return self._description + return self._properties.get("description") @property def fields(self): @@ -164,13 +169,7 @@ def to_api_repr(self): Returns: Dict: A dictionary representing the SchemaField in a serialized form. """ - # Put together the basic representation. See http://bit.ly/2hOAT5u. - answer = { - "mode": self.mode.upper(), - "name": self.name, - "type": self.field_type.upper(), - "description": self.description, - } + answer = self._properties.copy() # If this is a RECORD type, then sub-fields are also included, # add this to the serialized representation. @@ -193,10 +192,10 @@ def _key(self): Tuple: The contents of this :class:`~google.cloud.bigquery.schema.SchemaField`. """ return ( - self._name, - self._field_type.upper(), - self._mode.upper(), - self._description, + self.name, + self.field_type.upper(), + self.mode.upper(), + self.description, self._fields, self._policy_tags, ) diff --git a/packages/google-cloud-bigquery/tests/unit/job/test_load_config.py b/packages/google-cloud-bigquery/tests/unit/job/test_load_config.py index c18f51bff5c8..63f15ec5a4eb 100644 --- a/packages/google-cloud-bigquery/tests/unit/job/test_load_config.py +++ b/packages/google-cloud-bigquery/tests/unit/job/test_load_config.py @@ -434,13 +434,11 @@ def test_schema_setter_fields(self): "name": "full_name", "type": "STRING", "mode": "REQUIRED", - "description": None, } age_repr = { "name": "age", "type": "INTEGER", "mode": "REQUIRED", - "description": None, } self.assertEqual( config._properties["load"]["schema"], {"fields": [full_name_repr, age_repr]} @@ -449,24 +447,18 @@ def test_schema_setter_fields(self): def test_schema_setter_valid_mappings_list(self): config = self._get_target_class()() - schema = [ - {"name": "full_name", "type": "STRING", "mode": "REQUIRED"}, - {"name": "age", "type": "INTEGER", "mode": "REQUIRED"}, - ] - config.schema = schema - full_name_repr = { "name": "full_name", "type": "STRING", "mode": "REQUIRED", - "description": None, } age_repr = { "name": "age", "type": "INTEGER", "mode": "REQUIRED", - "description": None, } + schema = [full_name_repr, age_repr] + config.schema = schema self.assertEqual( config._properties["load"]["schema"], {"fields": [full_name_repr, age_repr]} ) diff --git a/packages/google-cloud-bigquery/tests/unit/test_client.py b/packages/google-cloud-bigquery/tests/unit/test_client.py index 6c3263ea5ba5..26ef340de19b 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_client.py +++ b/packages/google-cloud-bigquery/tests/unit/test_client.py @@ -1596,18 +1596,8 @@ def test_create_table_w_schema_and_query(self): { "schema": { "fields": [ - { - "name": "full_name", - "type": "STRING", - "mode": "REQUIRED", - "description": None, - }, - { - "name": "age", - "type": "INTEGER", - "mode": "REQUIRED", - "description": None, - }, + {"name": "full_name", "type": "STRING", "mode": "REQUIRED"}, + {"name": "age", "type": "INTEGER", "mode": "REQUIRED"}, ] }, "view": {"query": query}, @@ -1641,18 +1631,8 @@ def test_create_table_w_schema_and_query(self): }, "schema": { "fields": [ - { - "name": "full_name", - "type": "STRING", - "mode": "REQUIRED", - "description": None, - }, - { - "name": "age", - "type": "INTEGER", - "mode": "REQUIRED", - "description": None, - }, + {"name": "full_name", "type": "STRING", "mode": "REQUIRED"}, + {"name": "age", "type": "INTEGER", "mode": "REQUIRED"}, ] }, "view": {"query": query, "useLegacySql": False}, @@ -2602,7 +2582,7 @@ def test_update_table(self): "name": "age", "type": "INTEGER", "mode": "REQUIRED", - "description": None, + "description": "New field description", }, ] }, @@ -2613,8 +2593,10 @@ def test_update_table(self): } ) schema = [ - SchemaField("full_name", "STRING", mode="REQUIRED"), - SchemaField("age", "INTEGER", mode="REQUIRED"), + SchemaField("full_name", "STRING", mode="REQUIRED", description=None), + SchemaField( + "age", "INTEGER", mode="REQUIRED", description="New field description" + ), ] creds = _make_credentials() client = self._make_one(project=self.PROJECT, credentials=creds) @@ -2647,7 +2629,7 @@ def test_update_table(self): "name": "age", "type": "INTEGER", "mode": "REQUIRED", - "description": None, + "description": "New field description", }, ] }, @@ -2773,13 +2755,24 @@ def test_update_table_w_query(self): "name": "age", "type": "INTEGER", "mode": "REQUIRED", - "description": None, + "description": "this is a column", }, + {"name": "country", "type": "STRING", "mode": "NULLABLE"}, ] } schema = [ - SchemaField("full_name", "STRING", mode="REQUIRED"), - SchemaField("age", "INTEGER", mode="REQUIRED"), + SchemaField( + "full_name", + "STRING", + mode="REQUIRED", + # Explicitly unset the description. + description=None, + ), + SchemaField( + "age", "INTEGER", mode="REQUIRED", description="this is a column" + ), + # Omit the description to not make updates to it. + SchemaField("country", "STRING"), ] resource = self._make_table_resource() resource.update( @@ -7658,18 +7651,47 @@ def test_load_table_from_file_w_invalid_job_config(self): def test_load_table_from_dataframe(self): from google.cloud.bigquery.client import _DEFAULT_NUM_RETRIES from google.cloud.bigquery import job - from google.cloud.bigquery.schema import SchemaField + from google.cloud.bigquery.schema import PolicyTagList, SchemaField client = self._make_client() - records = [{"id": 1, "age": 100}, {"id": 2, "age": 60}] - dataframe = pandas.DataFrame(records) + records = [ + {"id": 1, "age": 100, "accounts": [2, 3]}, + {"id": 2, "age": 60, "accounts": [5]}, + {"id": 3, "age": 40, "accounts": []}, + ] + # Mixup column order so that we can verify sent schema matches the + # serialized order, not the table column order. + column_order = ["age", "accounts", "id"] + dataframe = pandas.DataFrame(records, columns=column_order) + table_fields = { + "id": SchemaField( + "id", + "INTEGER", + mode="REQUIRED", + description="integer column", + policy_tags=PolicyTagList(names=("foo", "bar")), + ), + "age": SchemaField( + "age", + "INTEGER", + mode="NULLABLE", + description="age column", + policy_tags=PolicyTagList(names=("baz",)), + ), + "accounts": SchemaField( + "accounts", "INTEGER", mode="REPEATED", description="array column", + ), + } + get_table_schema = [ + table_fields["id"], + table_fields["age"], + table_fields["accounts"], + ] get_table_patch = mock.patch( "google.cloud.bigquery.client.Client.get_table", autospec=True, - return_value=mock.Mock( - schema=[SchemaField("id", "INTEGER"), SchemaField("age", "INTEGER")] - ), + return_value=mock.Mock(schema=get_table_schema), ) load_patch = mock.patch( "google.cloud.bigquery.client.Client.load_table_from_file", autospec=True @@ -7695,8 +7717,21 @@ def test_load_table_from_dataframe(self): sent_file = load_table_from_file.mock_calls[0][1][1] assert sent_file.closed - sent_config = load_table_from_file.mock_calls[0][2]["job_config"] - assert sent_config.source_format == job.SourceFormat.PARQUET + sent_config = load_table_from_file.mock_calls[0][2]["job_config"].to_api_repr()[ + "load" + ] + assert sent_config["sourceFormat"] == job.SourceFormat.PARQUET + for field_index, field in enumerate(sent_config["schema"]["fields"]): + assert field["name"] == column_order[field_index] + table_field = table_fields[field["name"]] + assert field["name"] == table_field.name + assert field["type"] == table_field.field_type + assert field["mode"] == table_field.mode + assert len(field.get("fields", [])) == len(table_field.fields) + # Omit unnecessary fields when they come from getting the table + # (not passed in via job_config) + assert "description" not in field + assert "policyTags" not in field @unittest.skipIf(pandas is None, "Requires `pandas`") @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") diff --git a/packages/google-cloud-bigquery/tests/unit/test_external_config.py b/packages/google-cloud-bigquery/tests/unit/test_external_config.py index 4b6ef51189ce..4ca2e9012ec6 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_external_config.py +++ b/packages/google-cloud-bigquery/tests/unit/test_external_config.py @@ -77,14 +77,7 @@ def test_to_api_repr_base(self): ec.schema = [schema.SchemaField("full_name", "STRING", mode="REQUIRED")] exp_schema = { - "fields": [ - { - "name": "full_name", - "type": "STRING", - "mode": "REQUIRED", - "description": None, - } - ] + "fields": [{"name": "full_name", "type": "STRING", "mode": "REQUIRED"}] } got_resource = ec.to_api_repr() exp_resource = { diff --git a/packages/google-cloud-bigquery/tests/unit/test_schema.py b/packages/google-cloud-bigquery/tests/unit/test_schema.py index 71bf6b5aeadb..87baaf3790f8 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_schema.py +++ b/packages/google-cloud-bigquery/tests/unit/test_schema.py @@ -35,19 +35,19 @@ def _make_one(self, *args, **kw): def test_constructor_defaults(self): field = self._make_one("test", "STRING") - self.assertEqual(field._name, "test") - self.assertEqual(field._field_type, "STRING") - self.assertEqual(field._mode, "NULLABLE") - self.assertIsNone(field._description) - self.assertEqual(field._fields, ()) + self.assertEqual(field.name, "test") + self.assertEqual(field.field_type, "STRING") + self.assertEqual(field.mode, "NULLABLE") + self.assertIsNone(field.description) + self.assertEqual(field.fields, ()) def test_constructor_explicit(self): field = self._make_one("test", "STRING", mode="REQUIRED", description="Testing") - self.assertEqual(field._name, "test") - self.assertEqual(field._field_type, "STRING") - self.assertEqual(field._mode, "REQUIRED") - self.assertEqual(field._description, "Testing") - self.assertEqual(field._fields, ()) + self.assertEqual(field.name, "test") + self.assertEqual(field.field_type, "STRING") + self.assertEqual(field.mode, "REQUIRED") + self.assertEqual(field.description, "Testing") + self.assertEqual(field.fields, ()) def test_constructor_subfields(self): sub_field1 = self._make_one("area_code", "STRING") @@ -55,13 +55,13 @@ def test_constructor_subfields(self): field = self._make_one( "phone_number", "RECORD", fields=[sub_field1, sub_field2] ) - self.assertEqual(field._name, "phone_number") - self.assertEqual(field._field_type, "RECORD") - self.assertEqual(field._mode, "NULLABLE") - self.assertIsNone(field._description) - self.assertEqual(len(field._fields), 2) - self.assertIs(field._fields[0], sub_field1) - self.assertIs(field._fields[1], sub_field2) + self.assertEqual(field.name, "phone_number") + self.assertEqual(field.field_type, "RECORD") + self.assertEqual(field.mode, "NULLABLE") + self.assertIsNone(field.description) + self.assertEqual(len(field.fields), 2) + self.assertEqual(field.fields[0], sub_field1) + self.assertEqual(field.fields[1], sub_field2) def test_constructor_with_policy_tags(self): from google.cloud.bigquery.schema import PolicyTagList @@ -70,12 +70,12 @@ def test_constructor_with_policy_tags(self): field = self._make_one( "test", "STRING", mode="REQUIRED", description="Testing", policy_tags=policy ) - self.assertEqual(field._name, "test") - self.assertEqual(field._field_type, "STRING") - self.assertEqual(field._mode, "REQUIRED") - self.assertEqual(field._description, "Testing") - self.assertEqual(field._fields, ()) - self.assertEqual(field._policy_tags, policy) + self.assertEqual(field.name, "test") + self.assertEqual(field.field_type, "STRING") + self.assertEqual(field.mode, "REQUIRED") + self.assertEqual(field.description, "Testing") + self.assertEqual(field.fields, ()) + self.assertEqual(field.policy_tags, policy) def test_to_api_repr(self): from google.cloud.bigquery.schema import PolicyTagList @@ -92,7 +92,6 @@ def test_to_api_repr(self): "mode": "NULLABLE", "name": "foo", "type": "INTEGER", - "description": None, "policyTags": {"names": ["foo", "bar"]}, }, ) @@ -104,18 +103,10 @@ def test_to_api_repr_with_subfield(self): self.assertEqual( field.to_api_repr(), { - "fields": [ - { - "mode": "NULLABLE", - "name": "bar", - "type": "INTEGER", - "description": None, - } - ], + "fields": [{"mode": "NULLABLE", "name": "bar", "type": "INTEGER"}], "mode": "REQUIRED", "name": "foo", "type": record_type, - "description": None, }, ) @@ -168,17 +159,17 @@ def test_from_api_repr_defaults(self): def test_name_property(self): name = "lemon-ness" schema_field = self._make_one(name, "INTEGER") - self.assertIs(schema_field.name, name) + self.assertEqual(schema_field.name, name) def test_field_type_property(self): field_type = "BOOLEAN" schema_field = self._make_one("whether", field_type) - self.assertIs(schema_field.field_type, field_type) + self.assertEqual(schema_field.field_type, field_type) def test_mode_property(self): mode = "REPEATED" schema_field = self._make_one("again", "FLOAT", mode=mode) - self.assertIs(schema_field.mode, mode) + self.assertEqual(schema_field.mode, mode) def test_is_nullable(self): mode = "NULLABLE" @@ -193,14 +184,14 @@ def test_is_not_nullable(self): def test_description_property(self): description = "It holds some data." schema_field = self._make_one("do", "TIMESTAMP", description=description) - self.assertIs(schema_field.description, description) + self.assertEqual(schema_field.description, description) def test_fields_property(self): sub_field1 = self._make_one("one", "STRING") sub_field2 = self._make_one("fish", "INTEGER") fields = (sub_field1, sub_field2) schema_field = self._make_one("boat", "RECORD", fields=fields) - self.assertIs(schema_field.fields, fields) + self.assertEqual(schema_field.fields, fields) def test_to_standard_sql_simple_type(self): sql_type = self._get_standard_sql_data_type_class() @@ -532,17 +523,10 @@ def test_defaults(self): resource = self._call_fut([full_name, age]) self.assertEqual(len(resource), 2) self.assertEqual( - resource[0], - { - "name": "full_name", - "type": "STRING", - "mode": "REQUIRED", - "description": None, - }, + resource[0], {"name": "full_name", "type": "STRING", "mode": "REQUIRED"}, ) self.assertEqual( - resource[1], - {"name": "age", "type": "INTEGER", "mode": "REQUIRED", "description": None}, + resource[1], {"name": "age", "type": "INTEGER", "mode": "REQUIRED"} ) def test_w_description(self): @@ -552,7 +536,13 @@ def test_w_description(self): full_name = SchemaField( "full_name", "STRING", mode="REQUIRED", description=DESCRIPTION ) - age = SchemaField("age", "INTEGER", mode="REQUIRED") + age = SchemaField( + "age", + "INTEGER", + mode="REQUIRED", + # Explicitly unset description. + description=None, + ) resource = self._call_fut([full_name, age]) self.assertEqual(len(resource), 2) self.assertEqual( @@ -581,13 +571,7 @@ def test_w_subfields(self): resource = self._call_fut([full_name, phone]) self.assertEqual(len(resource), 2) self.assertEqual( - resource[0], - { - "name": "full_name", - "type": "STRING", - "mode": "REQUIRED", - "description": None, - }, + resource[0], {"name": "full_name", "type": "STRING", "mode": "REQUIRED"}, ) self.assertEqual( resource[1], @@ -595,20 +579,9 @@ def test_w_subfields(self): "name": "phone", "type": "RECORD", "mode": "REPEATED", - "description": None, "fields": [ - { - "name": "type", - "type": "STRING", - "mode": "REQUIRED", - "description": None, - }, - { - "name": "number", - "type": "STRING", - "mode": "REQUIRED", - "description": None, - }, + {"name": "type", "type": "STRING", "mode": "REQUIRED"}, + {"name": "number", "type": "STRING", "mode": "REQUIRED"}, ], }, ) From 1fb37d7eed3ffa0ad726b93f70a751a36925c6e4 Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Mon, 22 Mar 2021 03:53:09 -0500 Subject: [PATCH 1093/2016] fix: avoid overly strict dependency on pyarrow 3.x (#564) Exclude "bignumeric_type" from the "all" extra --- packages/google-cloud-bigquery/setup.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/setup.py b/packages/google-cloud-bigquery/setup.py index 31b6a3ff7bd4..99d3804edae1 100644 --- a/packages/google-cloud-bigquery/setup.py +++ b/packages/google-cloud-bigquery/setup.py @@ -48,7 +48,7 @@ "grpcio >= 1.32.0, < 2.0dev", "pyarrow >= 1.0.0, < 4.0dev", ], - "pandas": ["pandas>=0.23.0", "pyarrow >= 1.0.0, < 4.0dev",], + "pandas": ["pandas>=0.23.0", "pyarrow >= 1.0.0, < 4.0dev"], "bignumeric_type": ["pyarrow >= 3.0.0, < 4.0dev"], "tqdm": ["tqdm >= 4.7.4, <5.0.0dev"], "opentelemetry": [ @@ -61,6 +61,11 @@ all_extras = [] for extra in extras: + # Exclude this extra from all to avoid overly strict dependencies on core + # libraries such as pyarrow. + # https://github.com/googleapis/python-bigquery/issues/563 + if extra in {"bignumeric_type"}: + continue all_extras.extend(extras[extra]) extras["all"] = all_extras From 2cf6c85ba46efd14b86f76de28bc3f885aac56ad Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Mon, 22 Mar 2021 03:54:39 -0500 Subject: [PATCH 1094/2016] feat: add `ExternalConfig.connection_id` property to connect to external sources (#560) * feat: add `ExternalConfig.connection_id` property to connect to external sources * add tests * fix unit tests --- .../google/cloud/bigquery/external_config.py | 17 +++++++++++++++++ .../tests/unit/test_external_config.py | 8 ++++++++ 2 files changed, 25 insertions(+) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/external_config.py b/packages/google-cloud-bigquery/google/cloud/bigquery/external_config.py index 112dfdba4e5a..59e4960f9851 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/external_config.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/external_config.py @@ -760,6 +760,23 @@ def schema(self): prop = self._properties.get("schema", {}) return [SchemaField.from_api_repr(field) for field in prop.get("fields", [])] + @property + def connection_id(self): + """Optional[str]: [Experimental] ID of a BigQuery Connection API + resource. + + .. WARNING:: + + This feature is experimental. Pre-GA features may have limited + support, and changes to pre-GA features may not be compatible with + other pre-GA versions. + """ + return self._properties.get("connectionId") + + @connection_id.setter + def connection_id(self, value): + self._properties["connectionId"] = value + @schema.setter def schema(self, value): prop = value diff --git a/packages/google-cloud-bigquery/tests/unit/test_external_config.py b/packages/google-cloud-bigquery/tests/unit/test_external_config.py index 4ca2e9012ec6..648a8717ee7f 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_external_config.py +++ b/packages/google-cloud-bigquery/tests/unit/test_external_config.py @@ -74,6 +74,7 @@ def test_to_api_repr_base(self): ec.autodetect = True ec.ignore_unknown_values = False ec.compression = "compression" + ec.connection_id = "path/to/connection" ec.schema = [schema.SchemaField("full_name", "STRING", mode="REQUIRED")] exp_schema = { @@ -87,10 +88,17 @@ def test_to_api_repr_base(self): "autodetect": True, "ignoreUnknownValues": False, "compression": "compression", + "connectionId": "path/to/connection", "schema": exp_schema, } self.assertEqual(got_resource, exp_resource) + def test_connection_id(self): + ec = external_config.ExternalConfig("") + self.assertIsNone(ec.connection_id) + ec.connection_id = "path/to/connection" + self.assertEqual(ec.connection_id, "path/to/connection") + def test_schema_None(self): ec = external_config.ExternalConfig("") ec.schema = None From 7016577198bea112e1fb94a145e103564b3bc651 Mon Sep 17 00:00:00 2001 From: Yoshi Automation Bot Date: Mon, 22 Mar 2021 07:41:26 -0700 Subject: [PATCH 1095/2016] chore(python): add kokoro configs for periodic builds against head (#565) This change should be non-destructive. Note for library repo maintainers: After applying this change, you can easily add (or change) periodic builds against head by adding config files in google3. See python-pubsub repo for example. Source-Author: Takashi Matsuo Source-Date: Fri Mar 19 11:17:59 2021 -0700 Source-Repo: googleapis/synthtool Source-Sha: 79c8dd7ee768292f933012d3a69a5b4676404cda Source-Link: https://github.com/googleapis/synthtool/commit/79c8dd7ee768292f933012d3a69a5b4676404cda --- .../samples/python3.6/periodic-head.cfg | 11 ++ .../samples/python3.7/periodic-head.cfg | 11 ++ .../samples/python3.8/periodic-head.cfg | 11 ++ .../.kokoro/test-samples-against-head.sh | 28 +++++ .../.kokoro/test-samples-impl.sh | 102 ++++++++++++++++++ .../.kokoro/test-samples.sh | 96 +++-------------- packages/google-cloud-bigquery/synth.metadata | 11 +- 7 files changed, 187 insertions(+), 83 deletions(-) create mode 100644 packages/google-cloud-bigquery/.kokoro/samples/python3.6/periodic-head.cfg create mode 100644 packages/google-cloud-bigquery/.kokoro/samples/python3.7/periodic-head.cfg create mode 100644 packages/google-cloud-bigquery/.kokoro/samples/python3.8/periodic-head.cfg create mode 100755 packages/google-cloud-bigquery/.kokoro/test-samples-against-head.sh create mode 100755 packages/google-cloud-bigquery/.kokoro/test-samples-impl.sh diff --git a/packages/google-cloud-bigquery/.kokoro/samples/python3.6/periodic-head.cfg b/packages/google-cloud-bigquery/.kokoro/samples/python3.6/periodic-head.cfg new file mode 100644 index 000000000000..f9cfcd33e058 --- /dev/null +++ b/packages/google-cloud-bigquery/.kokoro/samples/python3.6/periodic-head.cfg @@ -0,0 +1,11 @@ +# Format: //devtools/kokoro/config/proto/build.proto + +env_vars: { + key: "INSTALL_LIBRARY_FROM_SOURCE" + value: "True" +} + +env_vars: { + key: "TRAMPOLINE_BUILD_FILE" + value: "github/python-pubsub/.kokoro/test-samples-against-head.sh" +} diff --git a/packages/google-cloud-bigquery/.kokoro/samples/python3.7/periodic-head.cfg b/packages/google-cloud-bigquery/.kokoro/samples/python3.7/periodic-head.cfg new file mode 100644 index 000000000000..f9cfcd33e058 --- /dev/null +++ b/packages/google-cloud-bigquery/.kokoro/samples/python3.7/periodic-head.cfg @@ -0,0 +1,11 @@ +# Format: //devtools/kokoro/config/proto/build.proto + +env_vars: { + key: "INSTALL_LIBRARY_FROM_SOURCE" + value: "True" +} + +env_vars: { + key: "TRAMPOLINE_BUILD_FILE" + value: "github/python-pubsub/.kokoro/test-samples-against-head.sh" +} diff --git a/packages/google-cloud-bigquery/.kokoro/samples/python3.8/periodic-head.cfg b/packages/google-cloud-bigquery/.kokoro/samples/python3.8/periodic-head.cfg new file mode 100644 index 000000000000..f9cfcd33e058 --- /dev/null +++ b/packages/google-cloud-bigquery/.kokoro/samples/python3.8/periodic-head.cfg @@ -0,0 +1,11 @@ +# Format: //devtools/kokoro/config/proto/build.proto + +env_vars: { + key: "INSTALL_LIBRARY_FROM_SOURCE" + value: "True" +} + +env_vars: { + key: "TRAMPOLINE_BUILD_FILE" + value: "github/python-pubsub/.kokoro/test-samples-against-head.sh" +} diff --git a/packages/google-cloud-bigquery/.kokoro/test-samples-against-head.sh b/packages/google-cloud-bigquery/.kokoro/test-samples-against-head.sh new file mode 100755 index 000000000000..689948a23005 --- /dev/null +++ b/packages/google-cloud-bigquery/.kokoro/test-samples-against-head.sh @@ -0,0 +1,28 @@ +#!/bin/bash +# Copyright 2020 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# A customized test runner for samples. +# +# For periodic builds, you can specify this file for testing against head. + +# `-e` enables the script to automatically fail when a command fails +# `-o pipefail` sets the exit code to the rightmost comment to exit with a non-zero +set -eo pipefail +# Enables `**` to include files nested inside sub-folders +shopt -s globstar + +cd github/python-bigquery + +exec .kokoro/test-samples-impl.sh diff --git a/packages/google-cloud-bigquery/.kokoro/test-samples-impl.sh b/packages/google-cloud-bigquery/.kokoro/test-samples-impl.sh new file mode 100755 index 000000000000..cf5de74c17a5 --- /dev/null +++ b/packages/google-cloud-bigquery/.kokoro/test-samples-impl.sh @@ -0,0 +1,102 @@ +#!/bin/bash +# Copyright 2021 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +# `-e` enables the script to automatically fail when a command fails +# `-o pipefail` sets the exit code to the rightmost comment to exit with a non-zero +set -eo pipefail +# Enables `**` to include files nested inside sub-folders +shopt -s globstar + +# Exit early if samples directory doesn't exist +if [ ! -d "./samples" ]; then + echo "No tests run. `./samples` not found" + exit 0 +fi + +# Disable buffering, so that the logs stream through. +export PYTHONUNBUFFERED=1 + +# Debug: show build environment +env | grep KOKORO + +# Install nox +python3.6 -m pip install --upgrade --quiet nox + +# Use secrets acessor service account to get secrets +if [[ -f "${KOKORO_GFILE_DIR}/secrets_viewer_service_account.json" ]]; then + gcloud auth activate-service-account \ + --key-file="${KOKORO_GFILE_DIR}/secrets_viewer_service_account.json" \ + --project="cloud-devrel-kokoro-resources" +fi + +# This script will create 3 files: +# - testing/test-env.sh +# - testing/service-account.json +# - testing/client-secrets.json +./scripts/decrypt-secrets.sh + +source ./testing/test-env.sh +export GOOGLE_APPLICATION_CREDENTIALS=$(pwd)/testing/service-account.json + +# For cloud-run session, we activate the service account for gcloud sdk. +gcloud auth activate-service-account \ + --key-file "${GOOGLE_APPLICATION_CREDENTIALS}" + +export GOOGLE_CLIENT_SECRETS=$(pwd)/testing/client-secrets.json + +echo -e "\n******************** TESTING PROJECTS ********************" + +# Switch to 'fail at end' to allow all tests to complete before exiting. +set +e +# Use RTN to return a non-zero value if the test fails. +RTN=0 +ROOT=$(pwd) +# Find all requirements.txt in the samples directory (may break on whitespace). +for file in samples/**/requirements.txt; do + cd "$ROOT" + # Navigate to the project folder. + file=$(dirname "$file") + cd "$file" + + echo "------------------------------------------------------------" + echo "- testing $file" + echo "------------------------------------------------------------" + + # Use nox to execute the tests for the project. + python3.6 -m nox -s "$RUN_TESTS_SESSION" + EXIT=$? + + # If this is a periodic build, send the test log to the FlakyBot. + # See https://github.com/googleapis/repo-automation-bots/tree/master/packages/flakybot. + if [[ $KOKORO_BUILD_ARTIFACTS_SUBDIR = *"periodic"* ]]; then + chmod +x $KOKORO_GFILE_DIR/linux_amd64/flakybot + $KOKORO_GFILE_DIR/linux_amd64/flakybot + fi + + if [[ $EXIT -ne 0 ]]; then + RTN=1 + echo -e "\n Testing failed: Nox returned a non-zero exit code. \n" + else + echo -e "\n Testing completed.\n" + fi + +done +cd "$ROOT" + +# Workaround for Kokoro permissions issue: delete secrets +rm testing/{test-env.sh,client-secrets.json,service-account.json} + +exit "$RTN" diff --git a/packages/google-cloud-bigquery/.kokoro/test-samples.sh b/packages/google-cloud-bigquery/.kokoro/test-samples.sh index 3ce8994cb96f..62ef534cd59e 100755 --- a/packages/google-cloud-bigquery/.kokoro/test-samples.sh +++ b/packages/google-cloud-bigquery/.kokoro/test-samples.sh @@ -13,6 +13,10 @@ # See the License for the specific language governing permissions and # limitations under the License. +# The default test runner for samples. +# +# For periodic builds, we rewinds the repo to the latest release, and +# run test-samples-impl.sh. # `-e` enables the script to automatically fail when a command fails # `-o pipefail` sets the exit code to the rightmost comment to exit with a non-zero @@ -24,87 +28,19 @@ cd github/python-bigquery # Run periodic samples tests at latest release if [[ $KOKORO_BUILD_ARTIFACTS_SUBDIR = *"periodic"* ]]; then + # preserving the test runner implementation. + cp .kokoro/test-samples-impl.sh "${TMPDIR}/test-samples-impl.sh" + echo "--- IMPORTANT IMPORTANT IMPORTANT ---" + echo "Now we rewind the repo back to the latest release..." LATEST_RELEASE=$(git describe --abbrev=0 --tags) git checkout $LATEST_RELEASE -fi - -# Exit early if samples directory doesn't exist -if [ ! -d "./samples" ]; then - echo "No tests run. `./samples` not found" - exit 0 -fi - -# Disable buffering, so that the logs stream through. -export PYTHONUNBUFFERED=1 - -# Debug: show build environment -env | grep KOKORO - -# Install nox -python3.6 -m pip install --upgrade --quiet nox - -# Use secrets acessor service account to get secrets -if [[ -f "${KOKORO_GFILE_DIR}/secrets_viewer_service_account.json" ]]; then - gcloud auth activate-service-account \ - --key-file="${KOKORO_GFILE_DIR}/secrets_viewer_service_account.json" \ - --project="cloud-devrel-kokoro-resources" -fi - -# This script will create 3 files: -# - testing/test-env.sh -# - testing/service-account.json -# - testing/client-secrets.json -./scripts/decrypt-secrets.sh - -source ./testing/test-env.sh -export GOOGLE_APPLICATION_CREDENTIALS=$(pwd)/testing/service-account.json - -# For cloud-run session, we activate the service account for gcloud sdk. -gcloud auth activate-service-account \ - --key-file "${GOOGLE_APPLICATION_CREDENTIALS}" - -export GOOGLE_CLIENT_SECRETS=$(pwd)/testing/client-secrets.json - -echo -e "\n******************** TESTING PROJECTS ********************" - -# Switch to 'fail at end' to allow all tests to complete before exiting. -set +e -# Use RTN to return a non-zero value if the test fails. -RTN=0 -ROOT=$(pwd) -# Find all requirements.txt in the samples directory (may break on whitespace). -for file in samples/**/requirements.txt; do - cd "$ROOT" - # Navigate to the project folder. - file=$(dirname "$file") - cd "$file" - - echo "------------------------------------------------------------" - echo "- testing $file" - echo "------------------------------------------------------------" - - # Use nox to execute the tests for the project. - python3.6 -m nox -s "$RUN_TESTS_SESSION" - EXIT=$? - - # If this is a periodic build, send the test log to the FlakyBot. - # See https://github.com/googleapis/repo-automation-bots/tree/master/packages/flakybot. - if [[ $KOKORO_BUILD_ARTIFACTS_SUBDIR = *"periodic"* ]]; then - chmod +x $KOKORO_GFILE_DIR/linux_amd64/flakybot - $KOKORO_GFILE_DIR/linux_amd64/flakybot + echo "The current head is: " + echo $(git rev-parse --verify HEAD) + echo "--- IMPORTANT IMPORTANT IMPORTANT ---" + # move back the test runner implementation if there's no file. + if [ ! -f .kokoro/test-samples-impl.sh ]; then + cp "${TMPDIR}/test-samples-impl.sh" .kokoro/test-samples-impl.sh fi +fi - if [[ $EXIT -ne 0 ]]; then - RTN=1 - echo -e "\n Testing failed: Nox returned a non-zero exit code. \n" - else - echo -e "\n Testing completed.\n" - fi - -done -cd "$ROOT" - -# Workaround for Kokoro permissions issue: delete secrets -rm testing/{test-env.sh,client-secrets.json,service-account.json} - -exit "$RTN" +exec .kokoro/test-samples-impl.sh diff --git a/packages/google-cloud-bigquery/synth.metadata b/packages/google-cloud-bigquery/synth.metadata index 9f81d304598a..2425b03fbeac 100644 --- a/packages/google-cloud-bigquery/synth.metadata +++ b/packages/google-cloud-bigquery/synth.metadata @@ -4,7 +4,7 @@ "git": { "name": ".", "remote": "https://github.com/googleapis/python-bigquery.git", - "sha": "efdf1c653770f7c03c17e31e3c2f279bb685637b" + "sha": "84e646e6b7087a1626e56ad51eeb130f4ddfa2fb" } }, { @@ -19,14 +19,14 @@ "git": { "name": "synthtool", "remote": "https://github.com/googleapis/synthtool.git", - "sha": "2c54c473779ea731128cea61a3a6c975a08a5378" + "sha": "79c8dd7ee768292f933012d3a69a5b4676404cda" } }, { "git": { "name": "synthtool", "remote": "https://github.com/googleapis/synthtool.git", - "sha": "2c54c473779ea731128cea61a3a6c975a08a5378" + "sha": "79c8dd7ee768292f933012d3a69a5b4676404cda" } } ], @@ -74,16 +74,21 @@ ".kokoro/samples/lint/presubmit.cfg", ".kokoro/samples/python3.6/common.cfg", ".kokoro/samples/python3.6/continuous.cfg", + ".kokoro/samples/python3.6/periodic-head.cfg", ".kokoro/samples/python3.6/periodic.cfg", ".kokoro/samples/python3.6/presubmit.cfg", ".kokoro/samples/python3.7/common.cfg", ".kokoro/samples/python3.7/continuous.cfg", + ".kokoro/samples/python3.7/periodic-head.cfg", ".kokoro/samples/python3.7/periodic.cfg", ".kokoro/samples/python3.7/presubmit.cfg", ".kokoro/samples/python3.8/common.cfg", ".kokoro/samples/python3.8/continuous.cfg", + ".kokoro/samples/python3.8/periodic-head.cfg", ".kokoro/samples/python3.8/periodic.cfg", ".kokoro/samples/python3.8/presubmit.cfg", + ".kokoro/test-samples-against-head.sh", + ".kokoro/test-samples-impl.sh", ".kokoro/test-samples.sh", ".kokoro/trampoline.sh", ".kokoro/trampoline_v2.sh", From 8b76cdf34834ee8def264e59b5fb5bb2864ad0e3 Mon Sep 17 00:00:00 2001 From: "release-please[bot]" <55107282+release-please[bot]@users.noreply.github.com> Date: Mon, 22 Mar 2021 09:49:45 -0500 Subject: [PATCH 1096/2016] chore: release 2.13.0 (#568) Co-authored-by: release-please[bot] <55107282+release-please[bot]@users.noreply.github.com> --- packages/google-cloud-bigquery/CHANGELOG.md | 13 +++++++++++++ .../google/cloud/bigquery/version.py | 2 +- 2 files changed, 14 insertions(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/CHANGELOG.md b/packages/google-cloud-bigquery/CHANGELOG.md index e5ed7bc9dc19..9cdcdf5fb211 100644 --- a/packages/google-cloud-bigquery/CHANGELOG.md +++ b/packages/google-cloud-bigquery/CHANGELOG.md @@ -4,6 +4,19 @@ [1]: https://pypi.org/project/google-cloud-bigquery/#history +## [2.13.0](https://www.github.com/googleapis/python-bigquery/compare/v2.12.0...v2.13.0) (2021-03-22) + + +### Features + +* add `ExternalConfig.connection_id` property to connect to external sources ([#560](https://www.github.com/googleapis/python-bigquery/issues/560)) ([d93986e](https://www.github.com/googleapis/python-bigquery/commit/d93986e0259952257f2571f60719b52099c29c0c)) + + +### Bug Fixes + +* avoid overly strict dependency on pyarrow 3.x ([#564](https://www.github.com/googleapis/python-bigquery/issues/564)) ([97ee6ec](https://www.github.com/googleapis/python-bigquery/commit/97ee6ec6cd4bc9f833cd506dc6d244d103654cfd)) +* avoid policy tags 403 error in `load_table_from_dataframe` ([#557](https://www.github.com/googleapis/python-bigquery/issues/557)) ([84e646e](https://www.github.com/googleapis/python-bigquery/commit/84e646e6b7087a1626e56ad51eeb130f4ddfa2fb)) + ## [2.12.0](https://www.github.com/googleapis/python-bigquery/compare/v2.11.0...v2.12.0) (2021-03-16) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/version.py b/packages/google-cloud-bigquery/google/cloud/bigquery/version.py index 67e043bde2a0..b6000e20f865 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/version.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/version.py @@ -12,4 +12,4 @@ # See the License for the specific language governing permissions and # limitations under the License. -__version__ = "2.12.0" +__version__ = "2.13.0" From cd72d0f9d88d1140fea30ac50ba2a54aa03f684f Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Tue, 23 Mar 2021 15:06:24 -0500 Subject: [PATCH 1097/2016] fix: add ConnectionError to default retry (#571) --- .../google-cloud-bigquery/google/cloud/bigquery/retry.py | 8 ++++---- packages/google-cloud-bigquery/setup.py | 1 + .../google-cloud-bigquery/testing/constraints-3.6.txt | 1 + packages/google-cloud-bigquery/tests/unit/test_retry.py | 9 +++++++++ 4 files changed, 15 insertions(+), 4 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/retry.py b/packages/google-cloud-bigquery/google/cloud/bigquery/retry.py index 4bc4b757f45d..20a8e7b13e1e 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/retry.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/retry.py @@ -14,6 +14,7 @@ from google.api_core import exceptions from google.api_core import retry +import requests.exceptions _RETRYABLE_REASONS = frozenset( @@ -21,9 +22,11 @@ ) _UNSTRUCTURED_RETRYABLE_TYPES = ( + ConnectionError, exceptions.TooManyRequests, exceptions.InternalServerError, exceptions.BadGateway, + requests.exceptions.ConnectionError, ) @@ -33,10 +36,7 @@ def _should_retry(exc): We retry if and only if the 'reason' is 'backendError' or 'rateLimitExceeded'. """ - if not hasattr(exc, "errors"): - return False - - if len(exc.errors) == 0: + if not hasattr(exc, "errors") or len(exc.errors) == 0: # Check for unstructured error returns, e.g. from GFE return isinstance(exc, _UNSTRUCTURED_RETRYABLE_TYPES) diff --git a/packages/google-cloud-bigquery/setup.py b/packages/google-cloud-bigquery/setup.py index 99d3804edae1..12a9bde31d5d 100644 --- a/packages/google-cloud-bigquery/setup.py +++ b/packages/google-cloud-bigquery/setup.py @@ -35,6 +35,7 @@ "google-resumable-media >= 0.6.0, < 2.0dev", "packaging >= 14.3", "protobuf >= 3.12.0", + "requests >= 2.18.0, < 3.0.0dev", ] extras = { "bqstorage": [ diff --git a/packages/google-cloud-bigquery/testing/constraints-3.6.txt b/packages/google-cloud-bigquery/testing/constraints-3.6.txt index c4a5c51bee72..322373eba20c 100644 --- a/packages/google-cloud-bigquery/testing/constraints-3.6.txt +++ b/packages/google-cloud-bigquery/testing/constraints-3.6.txt @@ -17,5 +17,6 @@ pandas==0.23.0 proto-plus==1.10.0 protobuf==3.12.0 pyarrow==1.0.0 +requests==2.18.0 six==1.13.0 tqdm==4.7.4 diff --git a/packages/google-cloud-bigquery/tests/unit/test_retry.py b/packages/google-cloud-bigquery/tests/unit/test_retry.py index d9f867cb30f7..318a54d34a8a 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_retry.py +++ b/packages/google-cloud-bigquery/tests/unit/test_retry.py @@ -15,6 +15,7 @@ import unittest import mock +import requests.exceptions class Test_should_retry(unittest.TestCase): @@ -42,6 +43,14 @@ def test_w_rateLimitExceeded(self): exc = mock.Mock(errors=[{"reason": "rateLimitExceeded"}], spec=["errors"]) self.assertTrue(self._call_fut(exc)) + def test_w_unstructured_connectionerror(self): + exc = ConnectionError() + self.assertTrue(self._call_fut(exc)) + + def test_w_unstructured_requests_connectionerror(self): + exc = requests.exceptions.ConnectionError() + self.assertTrue(self._call_fut(exc)) + def test_w_unstructured_too_many_requests(self): from google.api_core.exceptions import TooManyRequests From 16bc91691e189a87cc61e46c04265ad36f4d7fb9 Mon Sep 17 00:00:00 2001 From: WhiteSource Renovate Date: Tue, 23 Mar 2021 21:06:52 +0100 Subject: [PATCH 1098/2016] chore(deps): update dependency google-cloud-bigquery to v2.13.0 (#570) --- .../google-cloud-bigquery/samples/geography/requirements.txt | 2 +- .../google-cloud-bigquery/samples/snippets/requirements.txt | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/packages/google-cloud-bigquery/samples/geography/requirements.txt b/packages/google-cloud-bigquery/samples/geography/requirements.txt index ef9264454d63..c5f60911e9b0 100644 --- a/packages/google-cloud-bigquery/samples/geography/requirements.txt +++ b/packages/google-cloud-bigquery/samples/geography/requirements.txt @@ -1,3 +1,3 @@ geojson==2.5.0 -google-cloud-bigquery==2.12.0 +google-cloud-bigquery==2.13.0 Shapely==1.7.1 diff --git a/packages/google-cloud-bigquery/samples/snippets/requirements.txt b/packages/google-cloud-bigquery/samples/snippets/requirements.txt index db1c4b66a631..abbe6fde4019 100644 --- a/packages/google-cloud-bigquery/samples/snippets/requirements.txt +++ b/packages/google-cloud-bigquery/samples/snippets/requirements.txt @@ -1,4 +1,4 @@ -google-cloud-bigquery==2.12.0 +google-cloud-bigquery==2.13.0 google-cloud-bigquery-storage==2.3.0 google-auth-oauthlib==0.4.3 grpcio==1.36.1 From bccd055151d157652bee2765ade6f32ec4d605c4 Mon Sep 17 00:00:00 2001 From: "release-please[bot]" <55107282+release-please[bot]@users.noreply.github.com> Date: Tue, 23 Mar 2021 20:20:07 +0000 Subject: [PATCH 1099/2016] chore: release 2.13.1 (#572) :robot: I have created a release \*beep\* \*boop\* --- ### [2.13.1](https://www.github.com/googleapis/python-bigquery/compare/v2.13.0...v2.13.1) (2021-03-23) ### Bug Fixes * add ConnectionError to default retry ([#571](https://www.github.com/googleapis/python-bigquery/issues/571)) ([a3edb8b](https://www.github.com/googleapis/python-bigquery/commit/a3edb8b921e029e2c03d33302d408ad5d4e9d4ad)) --- This PR was generated with [Release Please](https://github.com/googleapis/release-please). See [documentation](https://github.com/googleapis/release-please#release-please). --- packages/google-cloud-bigquery/CHANGELOG.md | 7 +++++++ .../google-cloud-bigquery/google/cloud/bigquery/version.py | 2 +- 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/CHANGELOG.md b/packages/google-cloud-bigquery/CHANGELOG.md index 9cdcdf5fb211..5dc2c883838a 100644 --- a/packages/google-cloud-bigquery/CHANGELOG.md +++ b/packages/google-cloud-bigquery/CHANGELOG.md @@ -4,6 +4,13 @@ [1]: https://pypi.org/project/google-cloud-bigquery/#history +### [2.13.1](https://www.github.com/googleapis/python-bigquery/compare/v2.13.0...v2.13.1) (2021-03-23) + + +### Bug Fixes + +* add ConnectionError to default retry ([#571](https://www.github.com/googleapis/python-bigquery/issues/571)) ([a3edb8b](https://www.github.com/googleapis/python-bigquery/commit/a3edb8b921e029e2c03d33302d408ad5d4e9d4ad)) + ## [2.13.0](https://www.github.com/googleapis/python-bigquery/compare/v2.12.0...v2.13.0) (2021-03-22) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/version.py b/packages/google-cloud-bigquery/google/cloud/bigquery/version.py index b6000e20f865..2330d0c2cdfb 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/version.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/version.py @@ -12,4 +12,4 @@ # See the License for the specific language governing permissions and # limitations under the License. -__version__ = "2.13.0" +__version__ = "2.13.1" From 1eab861d2bcb3a9589fd4d890c8fd3c769b3746a Mon Sep 17 00:00:00 2001 From: Yoshi Automation Bot Date: Wed, 31 Mar 2021 08:00:39 -0700 Subject: [PATCH 1100/2016] chore(deps): update precommit hook pycqa/flake8 to v3.9.0 (#574) [![WhiteSource Renovate](https://app.renovatebot.com/images/banner.svg)](https://renovatebot.com) This PR contains the following updates: | Package | Type | Update | Change | |---|---|---|---| | [pycqa/flake8](https://gitlab.com/pycqa/flake8) | repository | minor | `3.8.4` -> `3.9.0` | --- ### Release Notes
pycqa/flake8 ### [`v3.9.0`](https://gitlab.com/pycqa/flake8/compare/3.8.4...3.9.0) [Compare Source](https://gitlab.com/pycqa/flake8/compare/3.8.4...3.9.0)
--- ### Renovate configuration :date: **Schedule**: At any time (no schedule defined). :vertical_traffic_light: **Automerge**: Disabled by config. Please merge this manually once you are satisfied. :recycle: **Rebasing**: Whenever PR becomes conflicted, or you tick the rebase/retry checkbox. :no_bell: **Ignore**: Close this PR and you won't be reminded about this update again. --- - [ ] If you want to rebase/retry this PR, check this box --- This PR has been generated by [WhiteSource Renovate](https://renovate.whitesourcesoftware.com). View repository job log [here](https://app.renovatebot.com/dashboard#github/googleapis/synthtool). Source-Author: WhiteSource Renovate Source-Date: Tue Mar 23 17:38:03 2021 +0100 Source-Repo: googleapis/synthtool Source-Sha: f5c5904fb0c6aa3b3730eadf4e5a4485afc65726 Source-Link: https://github.com/googleapis/synthtool/commit/f5c5904fb0c6aa3b3730eadf4e5a4485afc65726 --- packages/google-cloud-bigquery/.pre-commit-config.yaml | 2 +- packages/google-cloud-bigquery/synth.metadata | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/packages/google-cloud-bigquery/.pre-commit-config.yaml b/packages/google-cloud-bigquery/.pre-commit-config.yaml index a9024b15d725..32302e4883a1 100644 --- a/packages/google-cloud-bigquery/.pre-commit-config.yaml +++ b/packages/google-cloud-bigquery/.pre-commit-config.yaml @@ -12,6 +12,6 @@ repos: hooks: - id: black - repo: https://gitlab.com/pycqa/flake8 - rev: 3.8.4 + rev: 3.9.0 hooks: - id: flake8 diff --git a/packages/google-cloud-bigquery/synth.metadata b/packages/google-cloud-bigquery/synth.metadata index 2425b03fbeac..3b34bf519cb9 100644 --- a/packages/google-cloud-bigquery/synth.metadata +++ b/packages/google-cloud-bigquery/synth.metadata @@ -4,7 +4,7 @@ "git": { "name": ".", "remote": "https://github.com/googleapis/python-bigquery.git", - "sha": "84e646e6b7087a1626e56ad51eeb130f4ddfa2fb" + "sha": "e175d3a26f68e1bc5148bf055089dbfc1b83c76a" } }, { @@ -19,14 +19,14 @@ "git": { "name": "synthtool", "remote": "https://github.com/googleapis/synthtool.git", - "sha": "79c8dd7ee768292f933012d3a69a5b4676404cda" + "sha": "f5c5904fb0c6aa3b3730eadf4e5a4485afc65726" } }, { "git": { "name": "synthtool", "remote": "https://github.com/googleapis/synthtool.git", - "sha": "79c8dd7ee768292f933012d3a69a5b4676404cda" + "sha": "f5c5904fb0c6aa3b3730eadf4e5a4485afc65726" } } ], From dba63c0f2b3c3e597da388ee3deb4adf994bb78d Mon Sep 17 00:00:00 2001 From: WhiteSource Renovate Date: Wed, 31 Mar 2021 17:01:09 +0200 Subject: [PATCH 1101/2016] chore(deps): update dependency google-auth-oauthlib to v0.4.4 (#578) --- .../google-cloud-bigquery/samples/snippets/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/samples/snippets/requirements.txt b/packages/google-cloud-bigquery/samples/snippets/requirements.txt index abbe6fde4019..9f6073c8fd8e 100644 --- a/packages/google-cloud-bigquery/samples/snippets/requirements.txt +++ b/packages/google-cloud-bigquery/samples/snippets/requirements.txt @@ -1,6 +1,6 @@ google-cloud-bigquery==2.13.0 google-cloud-bigquery-storage==2.3.0 -google-auth-oauthlib==0.4.3 +google-auth-oauthlib==0.4.4 grpcio==1.36.1 ipython==7.16.1; python_version < '3.7' ipython==7.17.0; python_version >= '3.7' From e7f95861bc2384350cf1c7aa1c80e998aa9578ee Mon Sep 17 00:00:00 2001 From: WhiteSource Renovate Date: Wed, 31 Mar 2021 17:38:03 +0200 Subject: [PATCH 1102/2016] chore(deps): update dependency matplotlib to v3.4.1 (#576) [![WhiteSource Renovate](https://app.renovatebot.com/images/banner.svg)](https://renovatebot.com) This PR contains the following updates: | Package | Change | Age | Adoption | Passing | Confidence | |---|---|---|---|---|---| | [matplotlib](https://matplotlib.org) ([source](https://togithub.com/matplotlib/matplotlib)) | `==3.3.4` -> `==3.4.1` | [![age](https://badges.renovateapi.com/packages/pypi/matplotlib/3.4.1/age-slim)](https://docs.renovatebot.com/merge-confidence/) | [![adoption](https://badges.renovateapi.com/packages/pypi/matplotlib/3.4.1/adoption-slim)](https://docs.renovatebot.com/merge-confidence/) | [![passing](https://badges.renovateapi.com/packages/pypi/matplotlib/3.4.1/compatibility-slim/3.3.4)](https://docs.renovatebot.com/merge-confidence/) | [![confidence](https://badges.renovateapi.com/packages/pypi/matplotlib/3.4.1/confidence-slim/3.3.4)](https://docs.renovatebot.com/merge-confidence/) | --- ### Release Notes
matplotlib/matplotlib ### [`v3.4.1`](https://togithub.com/matplotlib/matplotlib/releases/v3.4.1) [Compare Source](https://togithub.com/matplotlib/matplotlib/compare/v3.4.0...v3.4.1) This is the first bugfix release of the 3.4.x series. This release contains several critical bug-fixes: - fix errorbar when specifying fillstyle - fix Inkscape cleanup at exit on Windows for tests - fix legends of colour-mapped scatter plots - fix positioning of annotation fancy arrows - fix size and color rendering for 3D scatter plots - fix suptitle manual positioning when using constrained layout - respect antialiasing settings in cairo backends as well ### [`v3.4.0`](https://togithub.com/matplotlib/matplotlib/releases/v3.4.0) [Compare Source](https://togithub.com/matplotlib/matplotlib/compare/v3.3.4...v3.4.0) Highlights of this release include: - Figure and Axes creation / management - New subfigure functionality - Single-line string notation for `subplot_mosaic` - Changes to behavior of Axes creation methods (`gca`, `add_axes`, `add_subplot`) - `add_subplot`/`add_axes` gained an _axes_class_ parameter - Subplot and subplot2grid can now work with constrained layout - Plotting methods - `axline` supports transform parameter - New automatic labeling for bar charts - A list of hatches can be specified to `bar` and `barh` - Setting `BarContainer` orientation - Contour plots now default to using `ScalarFormatter` - `Axes.errorbar` cycles non-color properties correctly - `errorbar` _errorevery_ parameter matches _markevery_ - `hexbin` supports data reference for _C_ parameter - Support callable for formatting of Sankey labels - `Axes.spines` access shortcuts - New `stairs` method and `StepPatch` artist - Added _orientation_ parameter for stem plots - Angles on Bracket arrow styles - `TickedStroke` patheffect - Colors and colormaps - Collection color specification and mapping - Transparency (alpha) can be set as an array in collections - pcolormesh has improved transparency handling by enabling snapping - IPython representations for Colormap objects - `Colormap.set_extremes` and `Colormap.with_extremes` - Get under/over/bad colors of Colormap objects - New `cm.unregister_cmap` function - New `CenteredNorm` for symmetrical data around a center - New `FuncNorm` for arbitrary normalizations - GridSpec-based colorbars can now be positioned above or to the left of the main axes - Titles, ticks, and labels - supxlabel and supylabel - Shared-axes `subplots` tick label visibility is now correct for top or left labels - An iterable object with labels can be passed to `Axes.plot` - Fonts and Text - Text transform can rotate text direction - `matplotlib.mathtext` now supports _overset_ and _underset_ LaTeX symbols - _math_fontfamily_ parameter to change `Text` font family - `TextArea`/`AnchoredText` support _horizontalalignment_ - PDF supports URLs on Text artists - rcParams improvements - New rcParams for dates: set converter and whether to use interval_multiples - Date formatters now respect _usetex_ rcParam - Setting _image.cmap_ to a Colormap - Tick and tick label colors can be set independently using rcParams - 3D Axes improvements - Errorbar method in 3D Axes - Stem plots in 3D Axes - 3D Collection properties are now modifiable - Panning in 3D Axes - Interactive tool improvements - New `RangeSlider` widget - Sliders can now snap to arbitrary values - Pausing and Resuming Animations - Sphinx extensions - `plot_directive` _caption_ option - Backend-specific improvements - Consecutive rasterized draws now merged - Support raw/rgba frame format in `FFMpegFileWriter` - nbAgg/WebAgg support middle-click and double-click - nbAgg support binary communication - Indexed color for PNG images in PDF files when possible - Improved font subsettings in PDF/PS - Kerning added to strings in PDFs - Fully-fractional HiDPI in QtAgg - wxAgg supports fullscreen toggle
--- ### Renovate configuration :date: **Schedule**: At any time (no schedule defined). :vertical_traffic_light: **Automerge**: Disabled by config. Please merge this manually once you are satisfied. :recycle: **Rebasing**: Renovate will not automatically rebase this PR, because other commits have been found. :no_bell: **Ignore**: Close this PR and you won't be reminded about this update again. --- - [ ] If you want to rebase/retry this PR, check this box --- This PR has been generated by [WhiteSource Renovate](https://renovate.whitesourcesoftware.com). View repository job log [here](https://app.renovatebot.com/dashboard#github/googleapis/python-bigquery). --- .../google-cloud-bigquery/samples/snippets/requirements.txt | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/samples/snippets/requirements.txt b/packages/google-cloud-bigquery/samples/snippets/requirements.txt index 9f6073c8fd8e..6024d76554aa 100644 --- a/packages/google-cloud-bigquery/samples/snippets/requirements.txt +++ b/packages/google-cloud-bigquery/samples/snippets/requirements.txt @@ -4,7 +4,8 @@ google-auth-oauthlib==0.4.4 grpcio==1.36.1 ipython==7.16.1; python_version < '3.7' ipython==7.17.0; python_version >= '3.7' -matplotlib==3.3.4 +matplotlib==3.3.4; python_version < '3.7' +matplotlib==3.4.1; python_version >= '3.7' pandas==1.1.5; python_version < '3.7' pandas==1.2.0; python_version >= '3.7' pyarrow==3.0.0 From 123b8bb205857766320c77b807cacf4b33c99993 Mon Sep 17 00:00:00 2001 From: Peter Lamut Date: Mon, 5 Apr 2021 15:47:58 +0200 Subject: [PATCH 1103/2016] chore: loosen opentelemetry dependencies (#587) --- packages/google-cloud-bigquery/setup.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/packages/google-cloud-bigquery/setup.py b/packages/google-cloud-bigquery/setup.py index 12a9bde31d5d..607ffb63fbaf 100644 --- a/packages/google-cloud-bigquery/setup.py +++ b/packages/google-cloud-bigquery/setup.py @@ -53,9 +53,9 @@ "bignumeric_type": ["pyarrow >= 3.0.0, < 4.0dev"], "tqdm": ["tqdm >= 4.7.4, <5.0.0dev"], "opentelemetry": [ - "opentelemetry-api==0.11b0", - "opentelemetry-sdk==0.11b0", - "opentelemetry-instrumentation==0.11b0", + "opentelemetry-api >= 0.11b0", + "opentelemetry-sdk >= 0.11b0", + "opentelemetry-instrumentation >= 0.11b0", ], } From 1a326be98f9b23150af544c7cf258ac2a1b54608 Mon Sep 17 00:00:00 2001 From: Bu Sun Kim <8822365+busunkim96@users.noreply.github.com> Date: Tue, 6 Apr 2021 04:26:17 -0600 Subject: [PATCH 1104/2016] chore: use gcp-sphinx-docfx-yaml (#584) Porting change in https://github.com/googleapis/synthtool/pull/1011 --- packages/google-cloud-bigquery/noxfile.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/noxfile.py b/packages/google-cloud-bigquery/noxfile.py index df36d237e955..a738d8c00de9 100644 --- a/packages/google-cloud-bigquery/noxfile.py +++ b/packages/google-cloud-bigquery/noxfile.py @@ -275,7 +275,7 @@ def docfx(session): """Build the docfx yaml files for this library.""" session.install("-e", ".") - session.install("sphinx", "alabaster", "recommonmark", "sphinx-docfx-yaml") + session.install("sphinx", "alabaster", "recommonmark", "gcp-sphinx-docfx-yaml") shutil.rmtree(os.path.join("docs", "_build"), ignore_errors=True) session.run( From 9b181811d9380add1d7fee62a290a0416daacb48 Mon Sep 17 00:00:00 2001 From: Yoshi Automation Bot Date: Wed, 7 Apr 2021 09:01:43 -0700 Subject: [PATCH 1105/2016] chore: Add license headers for python config files (#592) Source-Author: Anthonios Partheniou Source-Date: Tue Apr 6 11:32:03 2021 -0400 Source-Repo: googleapis/synthtool Source-Sha: 5b5bf6d519b2d658d9f2e483d9f6f3d0ba8ee6bc Source-Link: https://github.com/googleapis/synthtool/commit/5b5bf6d519b2d658d9f2e483d9f6f3d0ba8ee6bc --- .../google-cloud-bigquery/.pre-commit-config.yaml | 14 ++++++++++++++ packages/google-cloud-bigquery/docs/conf.py | 13 +++++++++++++ packages/google-cloud-bigquery/synth.metadata | 6 +++--- 3 files changed, 30 insertions(+), 3 deletions(-) diff --git a/packages/google-cloud-bigquery/.pre-commit-config.yaml b/packages/google-cloud-bigquery/.pre-commit-config.yaml index 32302e4883a1..8912e9b5d7d7 100644 --- a/packages/google-cloud-bigquery/.pre-commit-config.yaml +++ b/packages/google-cloud-bigquery/.pre-commit-config.yaml @@ -1,3 +1,17 @@ +# Copyright 2021 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# # See https://pre-commit.com for more information # See https://pre-commit.com/hooks.html for more hooks repos: diff --git a/packages/google-cloud-bigquery/docs/conf.py b/packages/google-cloud-bigquery/docs/conf.py index 37e0c46af9d4..fdea01aadf03 100644 --- a/packages/google-cloud-bigquery/docs/conf.py +++ b/packages/google-cloud-bigquery/docs/conf.py @@ -1,4 +1,17 @@ # -*- coding: utf-8 -*- +# Copyright 2021 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. # # google-cloud-bigquery documentation build configuration file # diff --git a/packages/google-cloud-bigquery/synth.metadata b/packages/google-cloud-bigquery/synth.metadata index 3b34bf519cb9..114359b88156 100644 --- a/packages/google-cloud-bigquery/synth.metadata +++ b/packages/google-cloud-bigquery/synth.metadata @@ -4,7 +4,7 @@ "git": { "name": ".", "remote": "https://github.com/googleapis/python-bigquery.git", - "sha": "e175d3a26f68e1bc5148bf055089dbfc1b83c76a" + "sha": "c1195147a6e9220f26558a301427dd447646da3a" } }, { @@ -19,14 +19,14 @@ "git": { "name": "synthtool", "remote": "https://github.com/googleapis/synthtool.git", - "sha": "f5c5904fb0c6aa3b3730eadf4e5a4485afc65726" + "sha": "5b5bf6d519b2d658d9f2e483d9f6f3d0ba8ee6bc" } }, { "git": { "name": "synthtool", "remote": "https://github.com/googleapis/synthtool.git", - "sha": "f5c5904fb0c6aa3b3730eadf4e5a4485afc65726" + "sha": "5b5bf6d519b2d658d9f2e483d9f6f3d0ba8ee6bc" } } ], From e4fd454b6ae811db66c32fadafdf73d678f60732 Mon Sep 17 00:00:00 2001 From: WhiteSource Renovate Date: Wed, 7 Apr 2021 20:14:18 +0200 Subject: [PATCH 1106/2016] chore(deps): update dependency grpcio to v1.37.0 (#596) --- .../google-cloud-bigquery/samples/snippets/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/samples/snippets/requirements.txt b/packages/google-cloud-bigquery/samples/snippets/requirements.txt index 6024d76554aa..734cdf445016 100644 --- a/packages/google-cloud-bigquery/samples/snippets/requirements.txt +++ b/packages/google-cloud-bigquery/samples/snippets/requirements.txt @@ -1,7 +1,7 @@ google-cloud-bigquery==2.13.0 google-cloud-bigquery-storage==2.3.0 google-auth-oauthlib==0.4.4 -grpcio==1.36.1 +grpcio==1.37.0 ipython==7.16.1; python_version < '3.7' ipython==7.17.0; python_version >= '3.7' matplotlib==3.3.4; python_version < '3.7' From 51cf5d16a997e50ad18a34dd8240a11489222ffc Mon Sep 17 00:00:00 2001 From: Kevin Deggelman Date: Thu, 8 Apr 2021 07:16:06 -0700 Subject: [PATCH 1107/2016] docs: update the description of the return value of `_QueryResults.rows()` (#594) Updated the description of the return value of `rows` to be more accurate. --- packages/google-cloud-bigquery/google/cloud/bigquery/query.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/query.py b/packages/google-cloud-bigquery/google/cloud/bigquery/query.py index 42547cd73d4e..495c4effbe8c 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/query.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/query.py @@ -815,7 +815,7 @@ def total_rows(self): https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs/query#body.QueryResponse.FIELDS.total_rows Returns: - Optional[int}: Count generated on the server (None until set by the server). + Optional[int]: Count generated on the server (None until set by the server). """ total_rows = self._properties.get("totalRows") if total_rows is not None: @@ -858,7 +858,7 @@ def rows(self): Returns: Optional[List[google.cloud.bigquery.table.Row]]: - Fields describing the schema (None until set by the server). + Rows containing the results of the query. """ return _rows_from_json(self._properties.get("rows", ()), self.schema) From 7bfb5af068224834966248ca3a7188ffc6675465 Mon Sep 17 00:00:00 2001 From: Jim Fulton Date: Mon, 12 Apr 2021 07:20:24 -0600 Subject: [PATCH 1108/2016] feat: accept DatasetListItem where DatasetReference is accepted (#597) * split out and pytestify list_tables tests. Also, exercise dataset polymorphism in some of the tests. * list_tables now accepts DatasetListItem objects * Get coverage to 100% But why do we run coverage on test code? * lint * Update exception text for DatasetListItem * Bypass opentelemetry tracing in unit tests. * Got rid of opentelemetry tracing checks. They aren't needed. * abstracted dataset-argument handling And applied it to `list_tables` and `list_models`. * Converted list_model tests to pytest and included check for dataset polymorphism * removed unneeded blanl lines. * Made list_routines accept DatasetListItem and conveted list_routines tests to pytest. * create_dataset accepts DatasetListItem Also converted create_dataset tests to pytest. (And fixed some long lines.) * Converted list_routine tests to pytest * include string dataset representation in dataset polymorphism. * removed some unused imports * Updated delete_dataset tests - Polymorphoc on dataset - pytest * black * lint * We don't actually need to avoid opentelemetry And a 3.6 test dependened on it. * fixed docstrings to include DatasetListItem in dataset polymorphic APIs. --- .../google/cloud/bigquery/client.py | 61 +- .../tests/unit/conftest.py | 23 + .../tests/unit/helpers.py | 49 + .../tests/unit/test_client.py | 3266 ++++++----------- .../tests/unit/test_create_dataset.py | 349 ++ .../tests/unit/test_delete_dataset.py | 64 + .../tests/unit/test_list_models.py | 72 + .../tests/unit/test_list_routines.py | 75 + .../tests/unit/test_list_tables.py | 145 + 9 files changed, 2005 insertions(+), 2099 deletions(-) create mode 100644 packages/google-cloud-bigquery/tests/unit/conftest.py create mode 100644 packages/google-cloud-bigquery/tests/unit/test_create_dataset.py create mode 100644 packages/google-cloud-bigquery/tests/unit/test_delete_dataset.py create mode 100644 packages/google-cloud-bigquery/tests/unit/test_list_models.py create mode 100644 packages/google-cloud-bigquery/tests/unit/test_list_routines.py create mode 100644 packages/google-cloud-bigquery/tests/unit/test_list_tables.py diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py index 305d60d3be01..10127e10d2bf 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py @@ -449,6 +449,22 @@ def _create_bqstorage_client(self): return bigquery_storage.BigQueryReadClient(credentials=self._credentials) + def _dataset_from_arg(self, dataset): + if isinstance(dataset, str): + dataset = DatasetReference.from_string( + dataset, default_project=self.project + ) + + if not isinstance(dataset, (Dataset, DatasetReference)): + if isinstance(dataset, DatasetListItem): + dataset = dataset.reference + else: + raise TypeError( + "dataset must be a Dataset, DatasetReference, DatasetListItem," + " or string" + ) + return dataset + def create_dataset( self, dataset, exists_ok=False, retry=DEFAULT_RETRY, timeout=None ): @@ -461,6 +477,7 @@ def create_dataset( dataset (Union[ \ google.cloud.bigquery.dataset.Dataset, \ google.cloud.bigquery.dataset.DatasetReference, \ + google.cloud.bigquery.dataset.DatasetListItem, \ str, \ ]): A :class:`~google.cloud.bigquery.dataset.Dataset` to create. @@ -491,10 +508,7 @@ def create_dataset( >>> dataset = client.create_dataset(dataset) """ - if isinstance(dataset, str): - dataset = DatasetReference.from_string( - dataset, default_project=self.project - ) + dataset = self._dataset_from_arg(dataset) if isinstance(dataset, DatasetReference): dataset = Dataset(dataset) @@ -1133,6 +1147,7 @@ def list_models( dataset (Union[ \ google.cloud.bigquery.dataset.Dataset, \ google.cloud.bigquery.dataset.DatasetReference, \ + google.cloud.bigquery.dataset.DatasetListItem, \ str, \ ]): A reference to the dataset whose models to list from the @@ -1160,13 +1175,7 @@ def list_models( :class:`~google.cloud.bigquery.model.Model` contained within the requested dataset. """ - if isinstance(dataset, str): - dataset = DatasetReference.from_string( - dataset, default_project=self.project - ) - - if not isinstance(dataset, (Dataset, DatasetReference)): - raise TypeError("dataset must be a Dataset, DatasetReference, or string") + dataset = self._dataset_from_arg(dataset) path = "%s/models" % dataset.path span_attributes = {"path": path} @@ -1210,6 +1219,7 @@ def list_routines( dataset (Union[ \ google.cloud.bigquery.dataset.Dataset, \ google.cloud.bigquery.dataset.DatasetReference, \ + google.cloud.bigquery.dataset.DatasetListItem, \ str, \ ]): A reference to the dataset whose routines to list from the @@ -1237,14 +1247,7 @@ def list_routines( :class:`~google.cloud.bigquery.routine.Routine`s contained within the requested dataset, limited by ``max_results``. """ - if isinstance(dataset, str): - dataset = DatasetReference.from_string( - dataset, default_project=self.project - ) - - if not isinstance(dataset, (Dataset, DatasetReference)): - raise TypeError("dataset must be a Dataset, DatasetReference, or string") - + dataset = self._dataset_from_arg(dataset) path = "{}/routines".format(dataset.path) span_attributes = {"path": path} @@ -1288,6 +1291,7 @@ def list_tables( dataset (Union[ \ google.cloud.bigquery.dataset.Dataset, \ google.cloud.bigquery.dataset.DatasetReference, \ + google.cloud.bigquery.dataset.DatasetListItem, \ str, \ ]): A reference to the dataset whose tables to list from the @@ -1315,14 +1319,7 @@ def list_tables( :class:`~google.cloud.bigquery.table.TableListItem` contained within the requested dataset. """ - if isinstance(dataset, str): - dataset = DatasetReference.from_string( - dataset, default_project=self.project - ) - - if not isinstance(dataset, (Dataset, DatasetReference)): - raise TypeError("dataset must be a Dataset, DatasetReference, or string") - + dataset = self._dataset_from_arg(dataset) path = "%s/tables" % dataset.path span_attributes = {"path": path} @@ -1365,6 +1362,7 @@ def delete_dataset( dataset (Union[ \ google.cloud.bigquery.dataset.Dataset, \ google.cloud.bigquery.dataset.DatasetReference, \ + google.cloud.bigquery.dataset.DatasetListItem, \ str, \ ]): A reference to the dataset to delete. If a string is passed @@ -1384,14 +1382,7 @@ def delete_dataset( Defaults to ``False``. If ``True``, ignore "not found" errors when deleting the dataset. """ - if isinstance(dataset, str): - dataset = DatasetReference.from_string( - dataset, default_project=self.project - ) - - if not isinstance(dataset, (Dataset, DatasetReference)): - raise TypeError("dataset must be a Dataset or a DatasetReference") - + dataset = self._dataset_from_arg(dataset) params = {} path = dataset.path if delete_contents: diff --git a/packages/google-cloud-bigquery/tests/unit/conftest.py b/packages/google-cloud-bigquery/tests/unit/conftest.py new file mode 100644 index 000000000000..07fc9b4ad56c --- /dev/null +++ b/packages/google-cloud-bigquery/tests/unit/conftest.py @@ -0,0 +1,23 @@ +import pytest + +from .helpers import make_client + + +@pytest.fixture +def client(): + yield make_client() + + +@pytest.fixture +def PROJECT(): + yield "PROJECT" + + +@pytest.fixture +def DS_ID(): + yield "DATASET_ID" + + +@pytest.fixture +def LOCATION(): + yield "us-central" diff --git a/packages/google-cloud-bigquery/tests/unit/helpers.py b/packages/google-cloud-bigquery/tests/unit/helpers.py index b51b0bbb7e03..67aeaca35587 100644 --- a/packages/google-cloud-bigquery/tests/unit/helpers.py +++ b/packages/google-cloud-bigquery/tests/unit/helpers.py @@ -12,6 +12,11 @@ # See the License for the specific language governing permissions and # limitations under the License. +import google.cloud.bigquery.client +import google.cloud.bigquery.dataset +import mock +import pytest + def make_connection(*responses): import google.cloud.bigquery._http @@ -31,3 +36,47 @@ def _to_pyarrow(value): import pyarrow return pyarrow.array([value])[0] + + +def make_client(project="PROJECT", **kw): + credentials = mock.Mock(spec=google.auth.credentials.Credentials) + return google.cloud.bigquery.client.Client(project, credentials, **kw) + + +def make_dataset_reference_string(project, ds_id): + return f"{project}.{ds_id}" + + +def make_dataset(project, ds_id): + return google.cloud.bigquery.dataset.Dataset( + google.cloud.bigquery.dataset.DatasetReference(project, ds_id) + ) + + +def make_dataset_list_item(project, ds_id): + return google.cloud.bigquery.dataset.DatasetListItem( + dict(datasetReference=dict(projectId=project, datasetId=ds_id)) + ) + + +def identity(x): + return x + + +def get_reference(x): + return x.reference + + +dataset_like = [ + (google.cloud.bigquery.dataset.DatasetReference, identity), + (make_dataset, identity), + (make_dataset_list_item, get_reference), + ( + make_dataset_reference_string, + google.cloud.bigquery.dataset.DatasetReference.from_string, + ), +] + +dataset_polymorphic = pytest.mark.parametrize( + "make_dataset,get_reference", dataset_like +) diff --git a/packages/google-cloud-bigquery/tests/unit/test_client.py b/packages/google-cloud-bigquery/tests/unit/test_client.py index 26ef340de19b..96e51678f8eb 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_client.py +++ b/packages/google-cloud-bigquery/tests/unit/test_client.py @@ -856,2505 +856,1643 @@ def fail_bqstorage_import(name, globals, locals, fromlist, level): ] assert matching_warnings, "Missing dependency warning not raised." - def test_create_dataset_minimal(self): - from google.cloud.bigquery.dataset import Dataset + def test_create_routine_w_minimal_resource(self): + from google.cloud.bigquery.routine import Routine + from google.cloud.bigquery.routine import RoutineReference - PATH = "projects/%s/datasets" % self.PROJECT - RESOURCE = { - "datasetReference": {"projectId": self.PROJECT, "datasetId": self.DS_ID}, - "etag": "etag", - "id": "%s:%s" % (self.PROJECT, self.DS_ID), - } creds = _make_credentials() + path = "/projects/test-routine-project/datasets/test_routines/routines" + resource = { + "routineReference": { + "projectId": "test-routine-project", + "datasetId": "test_routines", + "routineId": "minimal_routine", + } + } client = self._make_one(project=self.PROJECT, credentials=creds) - conn = client._connection = make_connection(RESOURCE) - - ds_ref = DatasetReference(self.PROJECT, self.DS_ID) - before = Dataset(ds_ref) + conn = client._connection = make_connection(resource) + full_routine_id = "test-routine-project.test_routines.minimal_routine" + routine = Routine(full_routine_id) with mock.patch( "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" ) as final_attributes: - after = client.create_dataset(before, timeout=7.5) - - final_attributes.assert_called_once_with({"path": "/%s" % PATH}, client, None) + actual_routine = client.create_routine(routine, timeout=7.5) - self.assertEqual(after.dataset_id, self.DS_ID) - self.assertEqual(after.project, self.PROJECT) - self.assertEqual(after.etag, RESOURCE["etag"]) - self.assertEqual(after.full_dataset_id, RESOURCE["id"]) + final_attributes.assert_called_once_with({"path": path}, client, None) conn.api_request.assert_called_once_with( - method="POST", - path="/%s" % PATH, - data={ - "datasetReference": { - "projectId": self.PROJECT, - "datasetId": self.DS_ID, - }, - "labels": {}, - }, - timeout=7.5, + method="POST", path=path, data=resource, timeout=7.5, + ) + self.assertEqual( + actual_routine.reference, RoutineReference.from_string(full_routine_id) ) - def test_create_dataset_w_attrs(self): - from google.cloud.bigquery.dataset import Dataset, AccessEntry + def test_create_routine_w_conflict(self): + from google.cloud.bigquery.routine import Routine - PATH = "projects/%s/datasets" % self.PROJECT - DESCRIPTION = "DESC" - FRIENDLY_NAME = "FN" - LOCATION = "US" - USER_EMAIL = "phred@example.com" - LABELS = {"color": "red"} - VIEW = { - "projectId": "my-proj", - "datasetId": "starry-skies", - "tableId": "northern-hemisphere", + creds = _make_credentials() + client = self._make_one(project=self.PROJECT, credentials=creds) + conn = client._connection = make_connection( + google.api_core.exceptions.AlreadyExists("routine already exists") + ) + path = "/projects/test-routine-project/datasets/test_routines/routines" + full_routine_id = "test-routine-project.test_routines.minimal_routine" + routine = Routine(full_routine_id) + + with pytest.raises(google.api_core.exceptions.AlreadyExists): + with mock.patch( + "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" + ) as final_attributes: + client.create_routine(routine) + + final_attributes.assert_called_once_with({"path": path}, client, None) + + resource = { + "routineReference": { + "projectId": "test-routine-project", + "datasetId": "test_routines", + "routineId": "minimal_routine", + } } - RESOURCE = { - "datasetReference": {"projectId": self.PROJECT, "datasetId": self.DS_ID}, - "etag": "etag", - "id": "%s:%s" % (self.PROJECT, self.DS_ID), - "description": DESCRIPTION, - "friendlyName": FRIENDLY_NAME, - "location": LOCATION, - "defaultTableExpirationMs": "3600", - "labels": LABELS, - "access": [{"role": "OWNER", "userByEmail": USER_EMAIL}, {"view": VIEW}], + conn.api_request.assert_called_once_with( + method="POST", path=path, data=resource, timeout=None, + ) + + @unittest.skipIf(opentelemetry is None, "Requires `opentelemetry`") + def test_span_status_is_set(self): + from google.cloud.bigquery.routine import Routine + + tracer_provider = TracerProvider() + memory_exporter = InMemorySpanExporter() + span_processor = SimpleExportSpanProcessor(memory_exporter) + tracer_provider.add_span_processor(span_processor) + trace.set_tracer_provider(tracer_provider) + + creds = _make_credentials() + client = self._make_one(project=self.PROJECT, credentials=creds) + conn = client._connection = make_connection( + google.api_core.exceptions.AlreadyExists("routine already exists") + ) + path = "/projects/test-routine-project/datasets/test_routines/routines" + full_routine_id = "test-routine-project.test_routines.minimal_routine" + routine = Routine(full_routine_id) + + with pytest.raises(google.api_core.exceptions.AlreadyExists): + client.create_routine(routine) + + span_list = memory_exporter.get_finished_spans() + self.assertTrue(span_list[0].status is not None) + + resource = { + "routineReference": { + "projectId": "test-routine-project", + "datasetId": "test_routines", + "routineId": "minimal_routine", + } } + conn.api_request.assert_called_once_with( + method="POST", path=path, data=resource, timeout=None, + ) + + def test_create_routine_w_conflict_exists_ok(self): + from google.cloud.bigquery.routine import Routine + creds = _make_credentials() client = self._make_one(project=self.PROJECT, credentials=creds) - conn = client._connection = make_connection(RESOURCE) - entries = [ - AccessEntry("OWNER", "userByEmail", USER_EMAIL), - AccessEntry(None, "view", VIEW), - ] + resource = { + "routineReference": { + "projectId": "test-routine-project", + "datasetId": "test_routines", + "routineId": "minimal_routine", + } + } + path = "/projects/test-routine-project/datasets/test_routines/routines" - ds_ref = DatasetReference(self.PROJECT, self.DS_ID) - before = Dataset(ds_ref) - before.access_entries = entries - before.description = DESCRIPTION - before.friendly_name = FRIENDLY_NAME - before.default_table_expiration_ms = 3600 - before.location = LOCATION - before.labels = LABELS + conn = client._connection = make_connection( + google.api_core.exceptions.AlreadyExists("routine already exists"), resource + ) + full_routine_id = "test-routine-project.test_routines.minimal_routine" + routine = Routine(full_routine_id) with mock.patch( "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" ) as final_attributes: - after = client.create_dataset(before) + actual_routine = client.create_routine(routine, exists_ok=True) - final_attributes.assert_called_once_with({"path": "/%s" % PATH}, client, None) + final_attributes.assert_called_with( + {"path": "%s/minimal_routine" % path}, client, None + ) + + self.assertEqual(actual_routine.project, "test-routine-project") + self.assertEqual(actual_routine.dataset_id, "test_routines") + self.assertEqual(actual_routine.routine_id, "minimal_routine") + conn.api_request.assert_has_calls( + [ + mock.call(method="POST", path=path, data=resource, timeout=None,), + mock.call( + method="GET", + path="/projects/test-routine-project/datasets/test_routines/routines/minimal_routine", + timeout=None, + ), + ] + ) + + def test_create_table_w_day_partition(self): + from google.cloud.bigquery.table import Table + from google.cloud.bigquery.table import TimePartitioning + + path = "projects/%s/datasets/%s/tables" % (self.PROJECT, self.DS_ID) + creds = _make_credentials() + client = self._make_one(project=self.PROJECT, credentials=creds) + resource = self._make_table_resource() + conn = client._connection = make_connection(resource) + table = Table(self.TABLE_REF) + table.time_partitioning = TimePartitioning() + with mock.patch( + "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" + ) as final_attributes: + got = client.create_table(table, timeout=7.5) - self.assertEqual(after.dataset_id, self.DS_ID) - self.assertEqual(after.project, self.PROJECT) - self.assertEqual(after.etag, RESOURCE["etag"]) - self.assertEqual(after.full_dataset_id, RESOURCE["id"]) - self.assertEqual(after.description, DESCRIPTION) - self.assertEqual(after.friendly_name, FRIENDLY_NAME) - self.assertEqual(after.location, LOCATION) - self.assertEqual(after.default_table_expiration_ms, 3600) - self.assertEqual(after.labels, LABELS) + final_attributes.assert_called_once_with( + {"path": "/%s" % path, "dataset_id": table.dataset_id}, client, None + ) conn.api_request.assert_called_once_with( method="POST", - path="/%s" % PATH, + path="/%s" % path, data={ - "datasetReference": { + "tableReference": { "projectId": self.PROJECT, "datasetId": self.DS_ID, + "tableId": self.TABLE_ID, }, - "description": DESCRIPTION, - "friendlyName": FRIENDLY_NAME, - "location": LOCATION, - "defaultTableExpirationMs": "3600", - "access": [ - {"role": "OWNER", "userByEmail": USER_EMAIL}, - {"view": VIEW}, - ], - "labels": LABELS, + "timePartitioning": {"type": "DAY"}, + "labels": {}, }, - timeout=None, + timeout=7.5, ) + self.assertEqual(table.time_partitioning.type_, "DAY") + self.assertEqual(got.table_id, self.TABLE_ID) - def test_create_dataset_w_custom_property(self): + def test_create_table_w_custom_property(self): # The library should handle sending properties to the API that are not # yet part of the library - from google.cloud.bigquery.dataset import Dataset + from google.cloud.bigquery.table import Table - path = "/projects/%s/datasets" % self.PROJECT - resource = { - "datasetReference": {"projectId": self.PROJECT, "datasetId": self.DS_ID}, - "newAlphaProperty": "unreleased property", - } + path = "projects/%s/datasets/%s/tables" % (self.PROJECT, self.DS_ID) creds = _make_credentials() client = self._make_one(project=self.PROJECT, credentials=creds) + resource = self._make_table_resource() + resource["newAlphaProperty"] = "unreleased property" conn = client._connection = make_connection(resource) - - ds_ref = DatasetReference(self.PROJECT, self.DS_ID) - before = Dataset(ds_ref) - before._properties["newAlphaProperty"] = "unreleased property" + table = Table(self.TABLE_REF) + table._properties["newAlphaProperty"] = "unreleased property" with mock.patch( "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" ) as final_attributes: - after = client.create_dataset(before) - - final_attributes.assert_called_once_with({"path": path}, client, None) + got = client.create_table(table) - self.assertEqual(after.dataset_id, self.DS_ID) - self.assertEqual(after.project, self.PROJECT) - self.assertEqual(after._properties["newAlphaProperty"], "unreleased property") + final_attributes.assert_called_once_with( + {"path": "/%s" % path, "dataset_id": table.dataset_id}, client, None + ) conn.api_request.assert_called_once_with( method="POST", - path=path, + path="/%s" % path, data={ - "datasetReference": { + "tableReference": { "projectId": self.PROJECT, "datasetId": self.DS_ID, + "tableId": self.TABLE_ID, }, "newAlphaProperty": "unreleased property", "labels": {}, }, timeout=None, ) + self.assertEqual(got._properties["newAlphaProperty"], "unreleased property") + self.assertEqual(got.table_id, self.TABLE_ID) - def test_create_dataset_w_client_location_wo_dataset_location(self): - from google.cloud.bigquery.dataset import Dataset + def test_create_table_w_encryption_configuration(self): + from google.cloud.bigquery.encryption_configuration import ( + EncryptionConfiguration, + ) + from google.cloud.bigquery.table import Table - PATH = "projects/%s/datasets" % self.PROJECT - RESOURCE = { - "datasetReference": {"projectId": self.PROJECT, "datasetId": self.DS_ID}, - "etag": "etag", - "id": "%s:%s" % (self.PROJECT, self.DS_ID), - "location": self.LOCATION, - } + path = "projects/%s/datasets/%s/tables" % (self.PROJECT, self.DS_ID) creds = _make_credentials() - client = self._make_one( - project=self.PROJECT, credentials=creds, location=self.LOCATION + client = self._make_one(project=self.PROJECT, credentials=creds) + resource = self._make_table_resource() + conn = client._connection = make_connection(resource) + table = Table(self.TABLE_REF) + table.encryption_configuration = EncryptionConfiguration( + kms_key_name=self.KMS_KEY_NAME ) - conn = client._connection = make_connection(RESOURCE) - - ds_ref = DatasetReference(self.PROJECT, self.DS_ID) - before = Dataset(ds_ref) with mock.patch( "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" ) as final_attributes: - after = client.create_dataset(before) - - final_attributes.assert_called_once_with({"path": "/%s" % PATH}, client, None) + got = client.create_table(table) - self.assertEqual(after.dataset_id, self.DS_ID) - self.assertEqual(after.project, self.PROJECT) - self.assertEqual(after.etag, RESOURCE["etag"]) - self.assertEqual(after.full_dataset_id, RESOURCE["id"]) - self.assertEqual(after.location, self.LOCATION) + final_attributes.assert_called_once_with( + {"path": "/%s" % path, "dataset_id": table.dataset_id}, client, None + ) conn.api_request.assert_called_once_with( method="POST", - path="/%s" % PATH, + path="/%s" % path, data={ - "datasetReference": { + "tableReference": { "projectId": self.PROJECT, "datasetId": self.DS_ID, + "tableId": self.TABLE_ID, }, "labels": {}, - "location": self.LOCATION, + "encryptionConfiguration": {"kmsKeyName": self.KMS_KEY_NAME}, }, timeout=None, ) + self.assertEqual(got.table_id, self.TABLE_ID) - def test_create_dataset_w_client_location_w_dataset_location(self): - from google.cloud.bigquery.dataset import Dataset + def test_create_table_w_day_partition_and_expire(self): + from google.cloud.bigquery.table import Table + from google.cloud.bigquery.table import TimePartitioning - PATH = "projects/%s/datasets" % self.PROJECT - OTHER_LOCATION = "EU" - RESOURCE = { - "datasetReference": {"projectId": self.PROJECT, "datasetId": self.DS_ID}, - "etag": "etag", - "id": "%s:%s" % (self.PROJECT, self.DS_ID), - "location": OTHER_LOCATION, - } + path = "projects/%s/datasets/%s/tables" % (self.PROJECT, self.DS_ID) creds = _make_credentials() - client = self._make_one( - project=self.PROJECT, credentials=creds, location=self.LOCATION - ) - conn = client._connection = make_connection(RESOURCE) - - ds_ref = DatasetReference(self.PROJECT, self.DS_ID) - before = Dataset(ds_ref) - before.location = OTHER_LOCATION + client = self._make_one(project=self.PROJECT, credentials=creds) + resource = self._make_table_resource() + conn = client._connection = make_connection(resource) + table = Table(self.TABLE_REF) + table.time_partitioning = TimePartitioning(expiration_ms=100) with mock.patch( "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" ) as final_attributes: - after = client.create_dataset(before) - - final_attributes.assert_called_once_with({"path": "/%s" % PATH}, client, None) + got = client.create_table(table) - self.assertEqual(after.dataset_id, self.DS_ID) - self.assertEqual(after.project, self.PROJECT) - self.assertEqual(after.etag, RESOURCE["etag"]) - self.assertEqual(after.full_dataset_id, RESOURCE["id"]) - self.assertEqual(after.location, OTHER_LOCATION) + final_attributes.assert_called_once_with( + {"path": "/%s" % path, "dataset_id": table.dataset_id}, client, None + ) conn.api_request.assert_called_once_with( method="POST", - path="/%s" % PATH, + path="/%s" % path, data={ - "datasetReference": { + "tableReference": { "projectId": self.PROJECT, "datasetId": self.DS_ID, + "tableId": self.TABLE_ID, }, + "timePartitioning": {"type": "DAY", "expirationMs": "100"}, "labels": {}, - "location": OTHER_LOCATION, }, timeout=None, ) + self.assertEqual(table.time_partitioning.type_, "DAY") + self.assertEqual(table.time_partitioning.expiration_ms, 100) + self.assertEqual(got.table_id, self.TABLE_ID) - def test_create_dataset_w_reference(self): - path = "/projects/%s/datasets" % self.PROJECT - resource = { - "datasetReference": {"projectId": self.PROJECT, "datasetId": self.DS_ID}, - "etag": "etag", - "id": "%s:%s" % (self.PROJECT, self.DS_ID), - "location": self.LOCATION, - } + def test_create_table_w_schema_and_query(self): + from google.cloud.bigquery.schema import SchemaField + from google.cloud.bigquery.table import Table + + path = "projects/%s/datasets/%s/tables" % (self.PROJECT, self.DS_ID) + query = "SELECT * from %s:%s" % (self.DS_ID, self.TABLE_ID) creds = _make_credentials() - client = self._make_one( - project=self.PROJECT, credentials=creds, location=self.LOCATION + client = self._make_one(project=self.PROJECT, credentials=creds) + resource = self._make_table_resource() + resource.update( + { + "schema": { + "fields": [ + {"name": "full_name", "type": "STRING", "mode": "REQUIRED"}, + {"name": "age", "type": "INTEGER", "mode": "REQUIRED"}, + ] + }, + "view": {"query": query}, + } ) + schema = [ + SchemaField("full_name", "STRING", mode="REQUIRED"), + SchemaField("age", "INTEGER", mode="REQUIRED"), + ] conn = client._connection = make_connection(resource) + table = Table(self.TABLE_REF, schema=schema) + table.view_query = query + with mock.patch( "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" ) as final_attributes: - dataset = client.create_dataset(DatasetReference(self.PROJECT, self.DS_ID)) - - final_attributes.assert_called_once_with({"path": path}, client, None) + got = client.create_table(table) - self.assertEqual(dataset.dataset_id, self.DS_ID) - self.assertEqual(dataset.project, self.PROJECT) - self.assertEqual(dataset.etag, resource["etag"]) - self.assertEqual(dataset.full_dataset_id, resource["id"]) - self.assertEqual(dataset.location, self.LOCATION) + final_attributes.assert_called_once_with( + {"path": "/%s" % path, "dataset_id": table.dataset_id}, client, None + ) conn.api_request.assert_called_once_with( method="POST", - path=path, + path="/%s" % path, data={ - "datasetReference": { + "tableReference": { "projectId": self.PROJECT, "datasetId": self.DS_ID, + "tableId": self.TABLE_ID, + }, + "schema": { + "fields": [ + {"name": "full_name", "type": "STRING", "mode": "REQUIRED"}, + {"name": "age", "type": "INTEGER", "mode": "REQUIRED"}, + ] }, + "view": {"query": query, "useLegacySql": False}, "labels": {}, - "location": self.LOCATION, }, timeout=None, ) + self.assertEqual(got.table_id, self.TABLE_ID) + self.assertEqual(got.project, self.PROJECT) + self.assertEqual(got.dataset_id, self.DS_ID) + self.assertEqual(got.schema, schema) + self.assertEqual(got.view_query, query) - def test_create_dataset_w_fully_qualified_string(self): - path = "/projects/%s/datasets" % self.PROJECT - resource = { - "datasetReference": {"projectId": self.PROJECT, "datasetId": self.DS_ID}, - "etag": "etag", - "id": "%s:%s" % (self.PROJECT, self.DS_ID), - "location": self.LOCATION, - } + def test_create_table_w_external(self): + from google.cloud.bigquery.external_config import ExternalConfig + from google.cloud.bigquery.job import SourceFormat + from google.cloud.bigquery.table import Table + + path = "projects/%s/datasets/%s/tables" % (self.PROJECT, self.DS_ID) creds = _make_credentials() - client = self._make_one( - project=self.PROJECT, credentials=creds, location=self.LOCATION + client = self._make_one(project=self.PROJECT, credentials=creds) + resource = self._make_table_resource() + resource.update( + { + "externalDataConfiguration": { + "sourceFormat": SourceFormat.CSV, + "autodetect": True, + } + } ) conn = client._connection = make_connection(resource) + table = Table(self.TABLE_REF) + ec = ExternalConfig("CSV") + ec.autodetect = True + table.external_data_configuration = ec + with mock.patch( "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" ) as final_attributes: - dataset = client.create_dataset("{}.{}".format(self.PROJECT, self.DS_ID)) - - final_attributes.assert_called_once_with({"path": path}, client, None) + got = client.create_table(table) - self.assertEqual(dataset.dataset_id, self.DS_ID) - self.assertEqual(dataset.project, self.PROJECT) - self.assertEqual(dataset.etag, resource["etag"]) - self.assertEqual(dataset.full_dataset_id, resource["id"]) - self.assertEqual(dataset.location, self.LOCATION) + final_attributes.assert_called_once_with( + {"path": "/%s" % path, "dataset_id": table.dataset_id}, client, None + ) conn.api_request.assert_called_once_with( method="POST", - path=path, + path="/%s" % path, data={ - "datasetReference": { + "tableReference": { "projectId": self.PROJECT, "datasetId": self.DS_ID, + "tableId": self.TABLE_ID, + }, + "externalDataConfiguration": { + "sourceFormat": SourceFormat.CSV, + "autodetect": True, }, "labels": {}, - "location": self.LOCATION, }, timeout=None, ) + self.assertEqual(got.table_id, self.TABLE_ID) + self.assertEqual(got.project, self.PROJECT) + self.assertEqual(got.dataset_id, self.DS_ID) + self.assertEqual( + got.external_data_configuration.source_format, SourceFormat.CSV + ) + self.assertEqual(got.external_data_configuration.autodetect, True) - def test_create_dataset_w_string(self): - path = "/projects/%s/datasets" % self.PROJECT - resource = { - "datasetReference": {"projectId": self.PROJECT, "datasetId": self.DS_ID}, - "etag": "etag", - "id": "%s:%s" % (self.PROJECT, self.DS_ID), - "location": self.LOCATION, - } + def test_create_table_w_reference(self): + path = "projects/%s/datasets/%s/tables" % (self.PROJECT, self.DS_ID) creds = _make_credentials() - client = self._make_one( - project=self.PROJECT, credentials=creds, location=self.LOCATION - ) + client = self._make_one(project=self.PROJECT, credentials=creds) + resource = self._make_table_resource() conn = client._connection = make_connection(resource) + with mock.patch( "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" ) as final_attributes: - dataset = client.create_dataset(self.DS_ID) - - final_attributes.assert_called_once_with({"path": path}, client, None) + got = client.create_table(self.TABLE_REF) - self.assertEqual(dataset.dataset_id, self.DS_ID) - self.assertEqual(dataset.project, self.PROJECT) - self.assertEqual(dataset.etag, resource["etag"]) - self.assertEqual(dataset.full_dataset_id, resource["id"]) - self.assertEqual(dataset.location, self.LOCATION) + final_attributes.assert_called_once_with( + {"path": "/%s" % path, "dataset_id": self.TABLE_REF.dataset_id}, + client, + None, + ) conn.api_request.assert_called_once_with( method="POST", - path=path, + path="/%s" % path, data={ - "datasetReference": { + "tableReference": { "projectId": self.PROJECT, "datasetId": self.DS_ID, + "tableId": self.TABLE_ID, }, "labels": {}, - "location": self.LOCATION, }, timeout=None, ) + self.assertEqual(got.table_id, self.TABLE_ID) - def test_create_dataset_alreadyexists_w_exists_ok_false(self): - creds = _make_credentials() - client = self._make_one( - project=self.PROJECT, credentials=creds, location=self.LOCATION - ) - client._connection = make_connection( - google.api_core.exceptions.AlreadyExists("dataset already exists") - ) - - with pytest.raises(google.api_core.exceptions.AlreadyExists): - client.create_dataset(self.DS_ID) - - def test_create_dataset_alreadyexists_w_exists_ok_true(self): - post_path = "/projects/{}/datasets".format(self.PROJECT) - get_path = "/projects/{}/datasets/{}".format(self.PROJECT, self.DS_ID) - resource = { - "datasetReference": {"projectId": self.PROJECT, "datasetId": self.DS_ID}, - "etag": "etag", - "id": "{}:{}".format(self.PROJECT, self.DS_ID), - "location": self.LOCATION, - } + def test_create_table_w_fully_qualified_string(self): + path = "projects/%s/datasets/%s/tables" % (self.PROJECT, self.DS_ID) creds = _make_credentials() - client = self._make_one( - project=self.PROJECT, credentials=creds, location=self.LOCATION - ) - conn = client._connection = make_connection( - google.api_core.exceptions.AlreadyExists("dataset already exists"), resource - ) + client = self._make_one(project=self.PROJECT, credentials=creds) + resource = self._make_table_resource() + conn = client._connection = make_connection(resource) with mock.patch( "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" ) as final_attributes: - dataset = client.create_dataset(self.DS_ID, exists_ok=True) - - final_attributes.assert_called_with({"path": get_path}, client, None) - - self.assertEqual(dataset.dataset_id, self.DS_ID) - self.assertEqual(dataset.project, self.PROJECT) - self.assertEqual(dataset.etag, resource["etag"]) - self.assertEqual(dataset.full_dataset_id, resource["id"]) - self.assertEqual(dataset.location, self.LOCATION) + got = client.create_table( + "{}.{}.{}".format(self.PROJECT, self.DS_ID, self.TABLE_ID) + ) - conn.api_request.assert_has_calls( - [ - mock.call( - method="POST", - path=post_path, - data={ - "datasetReference": { - "projectId": self.PROJECT, - "datasetId": self.DS_ID, - }, - "labels": {}, - "location": self.LOCATION, - }, - timeout=None, - ), - mock.call(method="GET", path=get_path, timeout=None), - ] + final_attributes.assert_called_once_with( + {"path": "/%s" % path, "dataset_id": self.TABLE_REF.dataset_id}, + client, + None, ) - def test_create_routine_w_minimal_resource(self): - from google.cloud.bigquery.routine import Routine - from google.cloud.bigquery.routine import RoutineReference + conn.api_request.assert_called_once_with( + method="POST", + path="/%s" % path, + data={ + "tableReference": { + "projectId": self.PROJECT, + "datasetId": self.DS_ID, + "tableId": self.TABLE_ID, + }, + "labels": {}, + }, + timeout=None, + ) + self.assertEqual(got.table_id, self.TABLE_ID) + def test_create_table_w_string(self): + path = "projects/%s/datasets/%s/tables" % (self.PROJECT, self.DS_ID) creds = _make_credentials() - path = "/projects/test-routine-project/datasets/test_routines/routines" - resource = { - "routineReference": { - "projectId": "test-routine-project", - "datasetId": "test_routines", - "routineId": "minimal_routine", - } - } client = self._make_one(project=self.PROJECT, credentials=creds) + resource = self._make_table_resource() conn = client._connection = make_connection(resource) - full_routine_id = "test-routine-project.test_routines.minimal_routine" - routine = Routine(full_routine_id) with mock.patch( "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" ) as final_attributes: - actual_routine = client.create_routine(routine, timeout=7.5) - - final_attributes.assert_called_once_with({"path": path}, client, None) + got = client.create_table("{}.{}".format(self.DS_ID, self.TABLE_ID)) - conn.api_request.assert_called_once_with( - method="POST", path=path, data=resource, timeout=7.5, - ) - self.assertEqual( - actual_routine.reference, RoutineReference.from_string(full_routine_id) + final_attributes.assert_called_once_with( + {"path": "/%s" % path, "dataset_id": self.TABLE_REF.dataset_id}, + client, + None, ) - def test_create_routine_w_conflict(self): - from google.cloud.bigquery.routine import Routine + conn.api_request.assert_called_once_with( + method="POST", + path="/%s" % path, + data={ + "tableReference": { + "projectId": self.PROJECT, + "datasetId": self.DS_ID, + "tableId": self.TABLE_ID, + }, + "labels": {}, + }, + timeout=None, + ) + self.assertEqual(got.table_id, self.TABLE_ID) + def test_create_table_alreadyexists_w_exists_ok_false(self): + post_path = "/projects/{}/datasets/{}/tables".format(self.PROJECT, self.DS_ID) creds = _make_credentials() - client = self._make_one(project=self.PROJECT, credentials=creds) + client = self._make_one( + project=self.PROJECT, credentials=creds, location=self.LOCATION + ) conn = client._connection = make_connection( - google.api_core.exceptions.AlreadyExists("routine already exists") + google.api_core.exceptions.AlreadyExists("table already exists") ) - path = "/projects/test-routine-project/datasets/test_routines/routines" - full_routine_id = "test-routine-project.test_routines.minimal_routine" - routine = Routine(full_routine_id) with pytest.raises(google.api_core.exceptions.AlreadyExists): with mock.patch( "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" ) as final_attributes: - client.create_routine(routine) - - final_attributes.assert_called_once_with({"path": path}, client, None) - - resource = { - "routineReference": { - "projectId": "test-routine-project", - "datasetId": "test_routines", - "routineId": "minimal_routine", - } - } - conn.api_request.assert_called_once_with( - method="POST", path=path, data=resource, timeout=None, - ) - - @unittest.skipIf(opentelemetry is None, "Requires `opentelemetry`") - def test_span_status_is_set(self): - from google.cloud.bigquery.routine import Routine - - tracer_provider = TracerProvider() - memory_exporter = InMemorySpanExporter() - span_processor = SimpleExportSpanProcessor(memory_exporter) - tracer_provider.add_span_processor(span_processor) - trace.set_tracer_provider(tracer_provider) + client.create_table("{}.{}".format(self.DS_ID, self.TABLE_ID)) - creds = _make_credentials() - client = self._make_one(project=self.PROJECT, credentials=creds) - conn = client._connection = make_connection( - google.api_core.exceptions.AlreadyExists("routine already exists") + final_attributes.assert_called_with( + {"path": post_path, "dataset_id": self.TABLE_REF.dataset_id}, client, None, ) - path = "/projects/test-routine-project/datasets/test_routines/routines" - full_routine_id = "test-routine-project.test_routines.minimal_routine" - routine = Routine(full_routine_id) - - with pytest.raises(google.api_core.exceptions.AlreadyExists): - client.create_routine(routine) - - span_list = memory_exporter.get_finished_spans() - self.assertTrue(span_list[0].status is not None) - resource = { - "routineReference": { - "projectId": "test-routine-project", - "datasetId": "test_routines", - "routineId": "minimal_routine", - } - } conn.api_request.assert_called_once_with( - method="POST", path=path, data=resource, timeout=None, + method="POST", + path=post_path, + data={ + "tableReference": { + "projectId": self.PROJECT, + "datasetId": self.DS_ID, + "tableId": self.TABLE_ID, + }, + "labels": {}, + }, + timeout=None, ) - def test_create_routine_w_conflict_exists_ok(self): - from google.cloud.bigquery.routine import Routine - + def test_create_table_alreadyexists_w_exists_ok_true(self): + post_path = "/projects/{}/datasets/{}/tables".format(self.PROJECT, self.DS_ID) + get_path = "/projects/{}/datasets/{}/tables/{}".format( + self.PROJECT, self.DS_ID, self.TABLE_ID + ) + resource = self._make_table_resource() creds = _make_credentials() - client = self._make_one(project=self.PROJECT, credentials=creds) - resource = { - "routineReference": { - "projectId": "test-routine-project", - "datasetId": "test_routines", - "routineId": "minimal_routine", - } - } - path = "/projects/test-routine-project/datasets/test_routines/routines" - + client = self._make_one( + project=self.PROJECT, credentials=creds, location=self.LOCATION + ) conn = client._connection = make_connection( - google.api_core.exceptions.AlreadyExists("routine already exists"), resource + google.api_core.exceptions.AlreadyExists("table already exists"), resource ) - full_routine_id = "test-routine-project.test_routines.minimal_routine" - routine = Routine(full_routine_id) + with mock.patch( "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" ) as final_attributes: - actual_routine = client.create_routine(routine, exists_ok=True) + got = client.create_table( + "{}.{}".format(self.DS_ID, self.TABLE_ID), exists_ok=True + ) - final_attributes.assert_called_with( - {"path": "%s/minimal_routine" % path}, client, None - ) + final_attributes.assert_called_with({"path": get_path}, client, None) + + self.assertEqual(got.project, self.PROJECT) + self.assertEqual(got.dataset_id, self.DS_ID) + self.assertEqual(got.table_id, self.TABLE_ID) - self.assertEqual(actual_routine.project, "test-routine-project") - self.assertEqual(actual_routine.dataset_id, "test_routines") - self.assertEqual(actual_routine.routine_id, "minimal_routine") conn.api_request.assert_has_calls( [ - mock.call(method="POST", path=path, data=resource, timeout=None,), mock.call( - method="GET", - path="/projects/test-routine-project/datasets/test_routines/routines/minimal_routine", + method="POST", + path=post_path, + data={ + "tableReference": { + "projectId": self.PROJECT, + "datasetId": self.DS_ID, + "tableId": self.TABLE_ID, + }, + "labels": {}, + }, timeout=None, ), + mock.call(method="GET", path=get_path, timeout=None), ] ) - def test_create_table_w_day_partition(self): - from google.cloud.bigquery.table import Table - from google.cloud.bigquery.table import TimePartitioning + def test_close(self): + creds = _make_credentials() + http = mock.Mock() + http._auth_request.session = mock.Mock() + client = self._make_one(project=self.PROJECT, credentials=creds, _http=http) - path = "projects/%s/datasets/%s/tables" % (self.PROJECT, self.DS_ID) + client.close() + + http.close.assert_called_once() + http._auth_request.session.close.assert_called_once() + + def test_get_model(self): + path = "projects/%s/datasets/%s/models/%s" % ( + self.PROJECT, + self.DS_ID, + self.MODEL_ID, + ) creds = _make_credentials() - client = self._make_one(project=self.PROJECT, credentials=creds) - resource = self._make_table_resource() + http = object() + client = self._make_one(project=self.PROJECT, credentials=creds, _http=http) + resource = { + "modelReference": { + "projectId": self.PROJECT, + "datasetId": self.DS_ID, + "modelId": self.MODEL_ID, + } + } conn = client._connection = make_connection(resource) - table = Table(self.TABLE_REF) - table.time_partitioning = TimePartitioning() + + model_ref = DatasetReference(self.PROJECT, self.DS_ID).model(self.MODEL_ID) with mock.patch( "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" ) as final_attributes: - got = client.create_table(table, timeout=7.5) + got = client.get_model(model_ref, timeout=7.5) - final_attributes.assert_called_once_with( - {"path": "/%s" % path, "dataset_id": table.dataset_id}, client, None - ) + final_attributes.assert_called_once_with({"path": "/%s" % path}, client, None) conn.api_request.assert_called_once_with( - method="POST", - path="/%s" % path, - data={ - "tableReference": { - "projectId": self.PROJECT, - "datasetId": self.DS_ID, - "tableId": self.TABLE_ID, - }, - "timePartitioning": {"type": "DAY"}, - "labels": {}, - }, - timeout=7.5, + method="GET", path="/%s" % path, timeout=7.5 ) - self.assertEqual(table.time_partitioning.type_, "DAY") - self.assertEqual(got.table_id, self.TABLE_ID) - - def test_create_table_w_custom_property(self): - # The library should handle sending properties to the API that are not - # yet part of the library - from google.cloud.bigquery.table import Table + self.assertEqual(got.model_id, self.MODEL_ID) - path = "projects/%s/datasets/%s/tables" % (self.PROJECT, self.DS_ID) + def test_get_model_w_string(self): + path = "projects/%s/datasets/%s/models/%s" % ( + self.PROJECT, + self.DS_ID, + self.MODEL_ID, + ) creds = _make_credentials() - client = self._make_one(project=self.PROJECT, credentials=creds) - resource = self._make_table_resource() - resource["newAlphaProperty"] = "unreleased property" + http = object() + client = self._make_one(project=self.PROJECT, credentials=creds, _http=http) + resource = { + "modelReference": { + "projectId": self.PROJECT, + "datasetId": self.DS_ID, + "modelId": self.MODEL_ID, + } + } conn = client._connection = make_connection(resource) - table = Table(self.TABLE_REF) - table._properties["newAlphaProperty"] = "unreleased property" + + model_id = "{}.{}.{}".format(self.PROJECT, self.DS_ID, self.MODEL_ID) with mock.patch( "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" ) as final_attributes: - got = client.create_table(table) + got = client.get_model(model_id) - final_attributes.assert_called_once_with( - {"path": "/%s" % path, "dataset_id": table.dataset_id}, client, None - ) + final_attributes.assert_called_once_with({"path": "/%s" % path}, client, None) conn.api_request.assert_called_once_with( - method="POST", - path="/%s" % path, - data={ - "tableReference": { - "projectId": self.PROJECT, - "datasetId": self.DS_ID, - "tableId": self.TABLE_ID, - }, - "newAlphaProperty": "unreleased property", - "labels": {}, - }, - timeout=None, + method="GET", path="/%s" % path, timeout=None ) - self.assertEqual(got._properties["newAlphaProperty"], "unreleased property") - self.assertEqual(got.table_id, self.TABLE_ID) + self.assertEqual(got.model_id, self.MODEL_ID) - def test_create_table_w_encryption_configuration(self): - from google.cloud.bigquery.encryption_configuration import ( - EncryptionConfiguration, - ) - from google.cloud.bigquery.table import Table + def test_get_routine(self): + from google.cloud.bigquery.routine import Routine + from google.cloud.bigquery.routine import RoutineReference - path = "projects/%s/datasets/%s/tables" % (self.PROJECT, self.DS_ID) - creds = _make_credentials() - client = self._make_one(project=self.PROJECT, credentials=creds) - resource = self._make_table_resource() - conn = client._connection = make_connection(resource) - table = Table(self.TABLE_REF) - table.encryption_configuration = EncryptionConfiguration( - kms_key_name=self.KMS_KEY_NAME - ) - with mock.patch( - "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" - ) as final_attributes: - got = client.create_table(table) - - final_attributes.assert_called_once_with( - {"path": "/%s" % path, "dataset_id": table.dataset_id}, client, None - ) - - conn.api_request.assert_called_once_with( - method="POST", - path="/%s" % path, - data={ - "tableReference": { - "projectId": self.PROJECT, - "datasetId": self.DS_ID, - "tableId": self.TABLE_ID, + full_routine_id = "test-routine-project.test_routines.minimal_routine" + routines = [ + full_routine_id, + Routine(full_routine_id), + RoutineReference.from_string(full_routine_id), + ] + for routine in routines: + creds = _make_credentials() + resource = { + "etag": "im-an-etag", + "routineReference": { + "projectId": "test-routine-project", + "datasetId": "test_routines", + "routineId": "minimal_routine", }, - "labels": {}, - "encryptionConfiguration": {"kmsKeyName": self.KMS_KEY_NAME}, - }, - timeout=None, - ) - self.assertEqual(got.table_id, self.TABLE_ID) - - def test_create_table_w_day_partition_and_expire(self): - from google.cloud.bigquery.table import Table - from google.cloud.bigquery.table import TimePartitioning + "routineType": "SCALAR_FUNCTION", + } + path = "/projects/test-routine-project/datasets/test_routines/routines/minimal_routine" - path = "projects/%s/datasets/%s/tables" % (self.PROJECT, self.DS_ID) - creds = _make_credentials() - client = self._make_one(project=self.PROJECT, credentials=creds) - resource = self._make_table_resource() - conn = client._connection = make_connection(resource) - table = Table(self.TABLE_REF) - table.time_partitioning = TimePartitioning(expiration_ms=100) - with mock.patch( - "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" - ) as final_attributes: - got = client.create_table(table) + client = self._make_one(project=self.PROJECT, credentials=creds) + conn = client._connection = make_connection(resource) - final_attributes.assert_called_once_with( - {"path": "/%s" % path, "dataset_id": table.dataset_id}, client, None - ) + with mock.patch( + "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" + ) as final_attributes: + actual_routine = client.get_routine(routine, timeout=7.5) - conn.api_request.assert_called_once_with( - method="POST", - path="/%s" % path, - data={ - "tableReference": { - "projectId": self.PROJECT, - "datasetId": self.DS_ID, - "tableId": self.TABLE_ID, - }, - "timePartitioning": {"type": "DAY", "expirationMs": "100"}, - "labels": {}, - }, - timeout=None, - ) - self.assertEqual(table.time_partitioning.type_, "DAY") - self.assertEqual(table.time_partitioning.expiration_ms, 100) - self.assertEqual(got.table_id, self.TABLE_ID) + final_attributes.assert_called_once_with({"path": path}, client, None) - def test_create_table_w_schema_and_query(self): - from google.cloud.bigquery.schema import SchemaField - from google.cloud.bigquery.table import Table + conn.api_request.assert_called_once_with( + method="GET", path=path, timeout=7.5, + ) + self.assertEqual( + actual_routine.reference, + RoutineReference.from_string(full_routine_id), + msg="routine={}".format(repr(routine)), + ) + self.assertEqual( + actual_routine.etag, + "im-an-etag", + msg="routine={}".format(repr(routine)), + ) + self.assertEqual( + actual_routine.type_, + "SCALAR_FUNCTION", + msg="routine={}".format(repr(routine)), + ) - path = "projects/%s/datasets/%s/tables" % (self.PROJECT, self.DS_ID) - query = "SELECT * from %s:%s" % (self.DS_ID, self.TABLE_ID) + def test_get_table(self): + path = "projects/%s/datasets/%s/tables/%s" % ( + self.PROJECT, + self.DS_ID, + self.TABLE_ID, + ) creds = _make_credentials() - client = self._make_one(project=self.PROJECT, credentials=creds) + http = object() + client = self._make_one(project=self.PROJECT, credentials=creds, _http=http) resource = self._make_table_resource() - resource.update( - { - "schema": { - "fields": [ - {"name": "full_name", "type": "STRING", "mode": "REQUIRED"}, - {"name": "age", "type": "INTEGER", "mode": "REQUIRED"}, - ] - }, - "view": {"query": query}, - } - ) - schema = [ - SchemaField("full_name", "STRING", mode="REQUIRED"), - SchemaField("age", "INTEGER", mode="REQUIRED"), - ] conn = client._connection = make_connection(resource) - table = Table(self.TABLE_REF, schema=schema) - table.view_query = query - with mock.patch( "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" ) as final_attributes: - got = client.create_table(table) + table = client.get_table(self.TABLE_REF, timeout=7.5) - final_attributes.assert_called_once_with( - {"path": "/%s" % path, "dataset_id": table.dataset_id}, client, None - ) + final_attributes.assert_called_once_with({"path": "/%s" % path}, client, None) conn.api_request.assert_called_once_with( - method="POST", - path="/%s" % path, - data={ - "tableReference": { - "projectId": self.PROJECT, - "datasetId": self.DS_ID, - "tableId": self.TABLE_ID, - }, - "schema": { - "fields": [ - {"name": "full_name", "type": "STRING", "mode": "REQUIRED"}, - {"name": "age", "type": "INTEGER", "mode": "REQUIRED"}, - ] - }, - "view": {"query": query, "useLegacySql": False}, - "labels": {}, - }, - timeout=None, + method="GET", path="/%s" % path, timeout=7.5 ) - self.assertEqual(got.table_id, self.TABLE_ID) - self.assertEqual(got.project, self.PROJECT) - self.assertEqual(got.dataset_id, self.DS_ID) - self.assertEqual(got.schema, schema) - self.assertEqual(got.view_query, query) - - def test_create_table_w_external(self): - from google.cloud.bigquery.external_config import ExternalConfig - from google.cloud.bigquery.job import SourceFormat - from google.cloud.bigquery.table import Table + self.assertEqual(table.table_id, self.TABLE_ID) - path = "projects/%s/datasets/%s/tables" % (self.PROJECT, self.DS_ID) + def test_get_table_sets_user_agent(self): creds = _make_credentials() - client = self._make_one(project=self.PROJECT, credentials=creds) - resource = self._make_table_resource() - resource.update( - { - "externalDataConfiguration": { - "sourceFormat": SourceFormat.CSV, - "autodetect": True, - } - } + http = mock.create_autospec(requests.Session) + mock_response = http.request( + url=mock.ANY, method=mock.ANY, headers=mock.ANY, data=mock.ANY ) - conn = client._connection = make_connection(resource) - table = Table(self.TABLE_REF) - ec = ExternalConfig("CSV") - ec.autodetect = True - table.external_data_configuration = ec - - with mock.patch( - "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" - ) as final_attributes: - got = client.create_table(table) - - final_attributes.assert_called_once_with( - {"path": "/%s" % path, "dataset_id": table.dataset_id}, client, None + http.reset_mock() + http.is_mtls = False + mock_response.status_code = 200 + mock_response.json.return_value = self._make_table_resource() + user_agent_override = client_info.ClientInfo(user_agent="my-application/1.2.3") + client = self._make_one( + project=self.PROJECT, + credentials=creds, + client_info=user_agent_override, + _http=http, ) - conn.api_request.assert_called_once_with( - method="POST", - path="/%s" % path, - data={ - "tableReference": { - "projectId": self.PROJECT, - "datasetId": self.DS_ID, - "tableId": self.TABLE_ID, - }, - "externalDataConfiguration": { - "sourceFormat": SourceFormat.CSV, - "autodetect": True, - }, - "labels": {}, + client.get_table(self.TABLE_REF) + + expected_user_agent = user_agent_override.to_user_agent() + http.request.assert_called_once_with( + url=mock.ANY, + method="GET", + headers={ + "X-Goog-API-Client": expected_user_agent, + "Accept-Encoding": "gzip", + "User-Agent": expected_user_agent, }, + data=mock.ANY, timeout=None, ) - self.assertEqual(got.table_id, self.TABLE_ID) - self.assertEqual(got.project, self.PROJECT) - self.assertEqual(got.dataset_id, self.DS_ID) - self.assertEqual( - got.external_data_configuration.source_format, SourceFormat.CSV - ) - self.assertEqual(got.external_data_configuration.autodetect, True) - - def test_create_table_w_reference(self): - path = "projects/%s/datasets/%s/tables" % (self.PROJECT, self.DS_ID) - creds = _make_credentials() - client = self._make_one(project=self.PROJECT, credentials=creds) - resource = self._make_table_resource() - conn = client._connection = make_connection(resource) + self.assertIn("my-application/1.2.3", expected_user_agent) - with mock.patch( - "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" - ) as final_attributes: - got = client.create_table(self.TABLE_REF) - - final_attributes.assert_called_once_with( - {"path": "/%s" % path, "dataset_id": self.TABLE_REF.dataset_id}, - client, - None, - ) - - conn.api_request.assert_called_once_with( - method="POST", - path="/%s" % path, - data={ - "tableReference": { - "projectId": self.PROJECT, - "datasetId": self.DS_ID, - "tableId": self.TABLE_ID, - }, - "labels": {}, - }, - timeout=None, - ) - self.assertEqual(got.table_id, self.TABLE_ID) - - def test_create_table_w_fully_qualified_string(self): - path = "projects/%s/datasets/%s/tables" % (self.PROJECT, self.DS_ID) - creds = _make_credentials() - client = self._make_one(project=self.PROJECT, credentials=creds) - resource = self._make_table_resource() - conn = client._connection = make_connection(resource) - with mock.patch( - "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" - ) as final_attributes: - got = client.create_table( - "{}.{}.{}".format(self.PROJECT, self.DS_ID, self.TABLE_ID) - ) - - final_attributes.assert_called_once_with( - {"path": "/%s" % path, "dataset_id": self.TABLE_REF.dataset_id}, - client, - None, - ) - - conn.api_request.assert_called_once_with( - method="POST", - path="/%s" % path, - data={ - "tableReference": { - "projectId": self.PROJECT, - "datasetId": self.DS_ID, - "tableId": self.TABLE_ID, - }, - "labels": {}, - }, - timeout=None, - ) - self.assertEqual(got.table_id, self.TABLE_ID) - - def test_create_table_w_string(self): - path = "projects/%s/datasets/%s/tables" % (self.PROJECT, self.DS_ID) - creds = _make_credentials() - client = self._make_one(project=self.PROJECT, credentials=creds) - resource = self._make_table_resource() - conn = client._connection = make_connection(resource) - with mock.patch( - "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" - ) as final_attributes: - got = client.create_table("{}.{}".format(self.DS_ID, self.TABLE_ID)) - - final_attributes.assert_called_once_with( - {"path": "/%s" % path, "dataset_id": self.TABLE_REF.dataset_id}, - client, - None, - ) - - conn.api_request.assert_called_once_with( - method="POST", - path="/%s" % path, - data={ - "tableReference": { - "projectId": self.PROJECT, - "datasetId": self.DS_ID, - "tableId": self.TABLE_ID, - }, - "labels": {}, - }, - timeout=None, - ) - self.assertEqual(got.table_id, self.TABLE_ID) - - def test_create_table_alreadyexists_w_exists_ok_false(self): - post_path = "/projects/{}/datasets/{}/tables".format(self.PROJECT, self.DS_ID) - creds = _make_credentials() - client = self._make_one( - project=self.PROJECT, credentials=creds, location=self.LOCATION - ) - conn = client._connection = make_connection( - google.api_core.exceptions.AlreadyExists("table already exists") - ) - - with pytest.raises(google.api_core.exceptions.AlreadyExists): - with mock.patch( - "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" - ) as final_attributes: - client.create_table("{}.{}".format(self.DS_ID, self.TABLE_ID)) - - final_attributes.assert_called_with( - {"path": post_path, "dataset_id": self.TABLE_REF.dataset_id}, client, None, - ) - - conn.api_request.assert_called_once_with( - method="POST", - path=post_path, - data={ - "tableReference": { - "projectId": self.PROJECT, - "datasetId": self.DS_ID, - "tableId": self.TABLE_ID, - }, - "labels": {}, - }, - timeout=None, - ) - - def test_create_table_alreadyexists_w_exists_ok_true(self): - post_path = "/projects/{}/datasets/{}/tables".format(self.PROJECT, self.DS_ID) - get_path = "/projects/{}/datasets/{}/tables/{}".format( - self.PROJECT, self.DS_ID, self.TABLE_ID - ) - resource = self._make_table_resource() - creds = _make_credentials() - client = self._make_one( - project=self.PROJECT, credentials=creds, location=self.LOCATION - ) - conn = client._connection = make_connection( - google.api_core.exceptions.AlreadyExists("table already exists"), resource - ) - - with mock.patch( - "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" - ) as final_attributes: - got = client.create_table( - "{}.{}".format(self.DS_ID, self.TABLE_ID), exists_ok=True - ) - - final_attributes.assert_called_with({"path": get_path}, client, None) - - self.assertEqual(got.project, self.PROJECT) - self.assertEqual(got.dataset_id, self.DS_ID) - self.assertEqual(got.table_id, self.TABLE_ID) - - conn.api_request.assert_has_calls( - [ - mock.call( - method="POST", - path=post_path, - data={ - "tableReference": { - "projectId": self.PROJECT, - "datasetId": self.DS_ID, - "tableId": self.TABLE_ID, - }, - "labels": {}, - }, - timeout=None, - ), - mock.call(method="GET", path=get_path, timeout=None), - ] - ) - - def test_close(self): - creds = _make_credentials() - http = mock.Mock() - http._auth_request.session = mock.Mock() - client = self._make_one(project=self.PROJECT, credentials=creds, _http=http) - - client.close() - - http.close.assert_called_once() - http._auth_request.session.close.assert_called_once() - - def test_get_model(self): - path = "projects/%s/datasets/%s/models/%s" % ( - self.PROJECT, - self.DS_ID, - self.MODEL_ID, - ) - creds = _make_credentials() - http = object() - client = self._make_one(project=self.PROJECT, credentials=creds, _http=http) - resource = { - "modelReference": { - "projectId": self.PROJECT, - "datasetId": self.DS_ID, - "modelId": self.MODEL_ID, - } - } - conn = client._connection = make_connection(resource) - - model_ref = DatasetReference(self.PROJECT, self.DS_ID).model(self.MODEL_ID) - with mock.patch( - "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" - ) as final_attributes: - got = client.get_model(model_ref, timeout=7.5) - - final_attributes.assert_called_once_with({"path": "/%s" % path}, client, None) - - conn.api_request.assert_called_once_with( - method="GET", path="/%s" % path, timeout=7.5 - ) - self.assertEqual(got.model_id, self.MODEL_ID) - - def test_get_model_w_string(self): - path = "projects/%s/datasets/%s/models/%s" % ( - self.PROJECT, - self.DS_ID, - self.MODEL_ID, - ) - creds = _make_credentials() - http = object() - client = self._make_one(project=self.PROJECT, credentials=creds, _http=http) - resource = { - "modelReference": { - "projectId": self.PROJECT, - "datasetId": self.DS_ID, - "modelId": self.MODEL_ID, - } - } - conn = client._connection = make_connection(resource) - - model_id = "{}.{}.{}".format(self.PROJECT, self.DS_ID, self.MODEL_ID) - with mock.patch( - "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" - ) as final_attributes: - got = client.get_model(model_id) - - final_attributes.assert_called_once_with({"path": "/%s" % path}, client, None) - - conn.api_request.assert_called_once_with( - method="GET", path="/%s" % path, timeout=None - ) - self.assertEqual(got.model_id, self.MODEL_ID) - - def test_get_routine(self): - from google.cloud.bigquery.routine import Routine - from google.cloud.bigquery.routine import RoutineReference - - full_routine_id = "test-routine-project.test_routines.minimal_routine" - routines = [ - full_routine_id, - Routine(full_routine_id), - RoutineReference.from_string(full_routine_id), - ] - for routine in routines: - creds = _make_credentials() - resource = { - "etag": "im-an-etag", - "routineReference": { - "projectId": "test-routine-project", - "datasetId": "test_routines", - "routineId": "minimal_routine", - }, - "routineType": "SCALAR_FUNCTION", - } - path = "/projects/test-routine-project/datasets/test_routines/routines/minimal_routine" - - client = self._make_one(project=self.PROJECT, credentials=creds) - conn = client._connection = make_connection(resource) - - with mock.patch( - "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" - ) as final_attributes: - actual_routine = client.get_routine(routine, timeout=7.5) - - final_attributes.assert_called_once_with({"path": path}, client, None) - - conn.api_request.assert_called_once_with( - method="GET", path=path, timeout=7.5, - ) - self.assertEqual( - actual_routine.reference, - RoutineReference.from_string(full_routine_id), - msg="routine={}".format(repr(routine)), - ) - self.assertEqual( - actual_routine.etag, - "im-an-etag", - msg="routine={}".format(repr(routine)), - ) - self.assertEqual( - actual_routine.type_, - "SCALAR_FUNCTION", - msg="routine={}".format(repr(routine)), - ) - - def test_get_table(self): - path = "projects/%s/datasets/%s/tables/%s" % ( - self.PROJECT, - self.DS_ID, - self.TABLE_ID, - ) - creds = _make_credentials() - http = object() - client = self._make_one(project=self.PROJECT, credentials=creds, _http=http) - resource = self._make_table_resource() - conn = client._connection = make_connection(resource) - with mock.patch( - "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" - ) as final_attributes: - table = client.get_table(self.TABLE_REF, timeout=7.5) - - final_attributes.assert_called_once_with({"path": "/%s" % path}, client, None) - - conn.api_request.assert_called_once_with( - method="GET", path="/%s" % path, timeout=7.5 - ) - self.assertEqual(table.table_id, self.TABLE_ID) - - def test_get_table_sets_user_agent(self): - creds = _make_credentials() - http = mock.create_autospec(requests.Session) - mock_response = http.request( - url=mock.ANY, method=mock.ANY, headers=mock.ANY, data=mock.ANY - ) - http.reset_mock() - http.is_mtls = False - mock_response.status_code = 200 - mock_response.json.return_value = self._make_table_resource() - user_agent_override = client_info.ClientInfo(user_agent="my-application/1.2.3") - client = self._make_one( - project=self.PROJECT, - credentials=creds, - client_info=user_agent_override, - _http=http, - ) - - client.get_table(self.TABLE_REF) - - expected_user_agent = user_agent_override.to_user_agent() - http.request.assert_called_once_with( - url=mock.ANY, - method="GET", - headers={ - "X-Goog-API-Client": expected_user_agent, - "Accept-Encoding": "gzip", - "User-Agent": expected_user_agent, - }, - data=mock.ANY, - timeout=None, - ) - self.assertIn("my-application/1.2.3", expected_user_agent) - - def test_get_iam_policy(self): - from google.cloud.bigquery.iam import BIGQUERY_DATA_OWNER_ROLE - from google.cloud.bigquery.iam import BIGQUERY_DATA_EDITOR_ROLE - from google.cloud.bigquery.iam import BIGQUERY_DATA_VIEWER_ROLE - from google.api_core.iam import Policy - - PATH = "/projects/{}/datasets/{}/tables/{}:getIamPolicy".format( - self.PROJECT, self.DS_ID, self.TABLE_ID, - ) - BODY = {"options": {"requestedPolicyVersion": 1}} - ETAG = "CARDI" - VERSION = 1 - OWNER1 = "user:phred@example.com" - OWNER2 = "group:cloud-logs@google.com" - EDITOR1 = "domain:google.com" - EDITOR2 = "user:phred@example.com" - VIEWER1 = "serviceAccount:1234-abcdef@service.example.com" - VIEWER2 = "user:phred@example.com" - RETURNED = { - "resourceId": PATH, - "etag": ETAG, - "version": VERSION, - "bindings": [ - {"role": BIGQUERY_DATA_OWNER_ROLE, "members": [OWNER1, OWNER2]}, - {"role": BIGQUERY_DATA_EDITOR_ROLE, "members": [EDITOR1, EDITOR2]}, - {"role": BIGQUERY_DATA_VIEWER_ROLE, "members": [VIEWER1, VIEWER2]}, - ], - } - EXPECTED = { - binding["role"]: set(binding["members"]) for binding in RETURNED["bindings"] - } - - creds = _make_credentials() - http = object() - client = self._make_one(project=self.PROJECT, credentials=creds, _http=http) - conn = client._connection = make_connection(RETURNED) - with mock.patch( - "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" - ) as final_attributes: - policy = client.get_iam_policy(self.TABLE_REF, timeout=7.5) - - final_attributes.assert_called_once_with({"path": PATH}, client, None) - - conn.api_request.assert_called_once_with( - method="POST", path=PATH, data=BODY, timeout=7.5 - ) - - self.assertIsInstance(policy, Policy) - self.assertEqual(policy.etag, RETURNED["etag"]) - self.assertEqual(policy.version, RETURNED["version"]) - self.assertEqual(dict(policy), EXPECTED) - - def test_get_iam_policy_w_invalid_table(self): - creds = _make_credentials() - http = object() - client = self._make_one(project=self.PROJECT, credentials=creds, _http=http) - - table_resource_string = "projects/{}/datasets/{}/tables/{}".format( - self.PROJECT, self.DS_ID, self.TABLE_ID, - ) - - with self.assertRaises(TypeError): - client.get_iam_policy(table_resource_string) - - def test_get_iam_policy_w_invalid_version(self): - creds = _make_credentials() - http = object() - client = self._make_one(project=self.PROJECT, credentials=creds, _http=http) - - with self.assertRaises(ValueError): - client.get_iam_policy(self.TABLE_REF, requested_policy_version=2) - - def test_set_iam_policy(self): - from google.cloud.bigquery.iam import BIGQUERY_DATA_OWNER_ROLE - from google.cloud.bigquery.iam import BIGQUERY_DATA_EDITOR_ROLE - from google.cloud.bigquery.iam import BIGQUERY_DATA_VIEWER_ROLE - from google.api_core.iam import Policy - - PATH = "/projects/%s/datasets/%s/tables/%s:setIamPolicy" % ( - self.PROJECT, - self.DS_ID, - self.TABLE_ID, - ) - ETAG = "foo" - VERSION = 1 - OWNER1 = "user:phred@example.com" - OWNER2 = "group:cloud-logs@google.com" - EDITOR1 = "domain:google.com" - EDITOR2 = "user:phred@example.com" - VIEWER1 = "serviceAccount:1234-abcdef@service.example.com" - VIEWER2 = "user:phred@example.com" - BINDINGS = [ - {"role": BIGQUERY_DATA_OWNER_ROLE, "members": [OWNER1, OWNER2]}, - {"role": BIGQUERY_DATA_EDITOR_ROLE, "members": [EDITOR1, EDITOR2]}, - {"role": BIGQUERY_DATA_VIEWER_ROLE, "members": [VIEWER1, VIEWER2]}, - ] - MASK = "bindings,etag" - RETURNED = {"etag": ETAG, "version": VERSION, "bindings": BINDINGS} - - policy = Policy() - for binding in BINDINGS: - policy[binding["role"]] = binding["members"] - - BODY = {"policy": policy.to_api_repr(), "updateMask": MASK} - - creds = _make_credentials() - http = object() - client = self._make_one(project=self.PROJECT, credentials=creds, _http=http) - conn = client._connection = make_connection(RETURNED) - - with mock.patch( - "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" - ) as final_attributes: - returned_policy = client.set_iam_policy( - self.TABLE_REF, policy, updateMask=MASK, timeout=7.5 - ) - - final_attributes.assert_called_once_with({"path": PATH}, client, None) - - conn.api_request.assert_called_once_with( - method="POST", path=PATH, data=BODY, timeout=7.5 - ) - self.assertEqual(returned_policy.etag, ETAG) - self.assertEqual(returned_policy.version, VERSION) - self.assertEqual(dict(returned_policy), dict(policy)) - - def test_set_iam_policy_no_mask(self): - from google.api_core.iam import Policy - - PATH = "/projects/%s/datasets/%s/tables/%s:setIamPolicy" % ( - self.PROJECT, - self.DS_ID, - self.TABLE_ID, - ) - RETURNED = {"etag": "foo", "version": 1, "bindings": []} - - policy = Policy() - BODY = {"policy": policy.to_api_repr()} - - creds = _make_credentials() - http = object() - client = self._make_one(project=self.PROJECT, credentials=creds, _http=http) - conn = client._connection = make_connection(RETURNED) - with mock.patch( - "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" - ) as final_attributes: - client.set_iam_policy(self.TABLE_REF, policy, timeout=7.5) - - final_attributes.assert_called_once_with({"path": PATH}, client, None) - - conn.api_request.assert_called_once_with( - method="POST", path=PATH, data=BODY, timeout=7.5 - ) - - def test_set_iam_policy_invalid_policy(self): - from google.api_core.iam import Policy - - policy = Policy() - invalid_policy_repr = policy.to_api_repr() - - creds = _make_credentials() - http = object() - client = self._make_one(project=self.PROJECT, credentials=creds, _http=http) - - with self.assertRaises(TypeError): - client.set_iam_policy(self.TABLE_REF, invalid_policy_repr) - - def test_set_iam_policy_w_invalid_table(self): - from google.api_core.iam import Policy - - policy = Policy() - - creds = _make_credentials() - http = object() - client = self._make_one(project=self.PROJECT, credentials=creds, _http=http) - - table_resource_string = "projects/%s/datasets/%s/tables/%s" % ( - self.PROJECT, - self.DS_ID, - self.TABLE_ID, - ) - - with self.assertRaises(TypeError): - client.set_iam_policy(table_resource_string, policy) - - def test_test_iam_permissions(self): - PATH = "/projects/%s/datasets/%s/tables/%s:testIamPermissions" % ( - self.PROJECT, - self.DS_ID, - self.TABLE_ID, - ) - - PERMISSIONS = ["bigquery.tables.get", "bigquery.tables.update"] - BODY = {"permissions": PERMISSIONS} - RETURNED = {"permissions": PERMISSIONS} - - creds = _make_credentials() - http = object() - client = self._make_one(project=self.PROJECT, credentials=creds, _http=http) - conn = client._connection = make_connection(RETURNED) - with mock.patch( - "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" - ) as final_attributes: - client.test_iam_permissions(self.TABLE_REF, PERMISSIONS, timeout=7.5) - - final_attributes.assert_called_once_with({"path": PATH}, client, None) - - conn.api_request.assert_called_once_with( - method="POST", path=PATH, data=BODY, timeout=7.5 - ) - - def test_test_iam_permissions_w_invalid_table(self): - creds = _make_credentials() - http = object() - client = self._make_one(project=self.PROJECT, credentials=creds, _http=http) - - table_resource_string = "projects/%s/datasets/%s/tables/%s" % ( - self.PROJECT, - self.DS_ID, - self.TABLE_ID, - ) - - PERMISSIONS = ["bigquery.tables.get", "bigquery.tables.update"] - - with self.assertRaises(TypeError): - client.test_iam_permissions(table_resource_string, PERMISSIONS) - - def test_update_dataset_w_invalid_field(self): - from google.cloud.bigquery.dataset import Dataset - - creds = _make_credentials() - client = self._make_one(project=self.PROJECT, credentials=creds) - with self.assertRaises(ValueError): - client.update_dataset( - Dataset("{}.{}".format(self.PROJECT, self.DS_ID)), ["foo"] - ) - - def test_update_dataset(self): - from google.cloud.bigquery.dataset import Dataset, AccessEntry - - PATH = "projects/%s/datasets/%s" % (self.PROJECT, self.DS_ID) - DESCRIPTION = "DESCRIPTION" - FRIENDLY_NAME = "TITLE" - LOCATION = "loc" - LABELS = {"priority": "high"} - ACCESS = [{"role": "OWNER", "userByEmail": "phred@example.com"}] - EXP = 17 - RESOURCE = { - "datasetReference": {"projectId": self.PROJECT, "datasetId": self.DS_ID}, - "etag": "etag", - "description": DESCRIPTION, - "friendlyName": FRIENDLY_NAME, - "location": LOCATION, - "defaultTableExpirationMs": EXP, - "labels": LABELS, - "access": ACCESS, - } - creds = _make_credentials() - client = self._make_one(project=self.PROJECT, credentials=creds) - conn = client._connection = make_connection(RESOURCE, RESOURCE) - ds = Dataset(DatasetReference(self.PROJECT, self.DS_ID)) - ds.description = DESCRIPTION - ds.friendly_name = FRIENDLY_NAME - ds.location = LOCATION - ds.default_table_expiration_ms = EXP - ds.labels = LABELS - ds.access_entries = [AccessEntry("OWNER", "userByEmail", "phred@example.com")] - fields = [ - "description", - "friendly_name", - "location", - "labels", - "access_entries", - ] - - with mock.patch( - "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" - ) as final_attributes: - ds2 = client.update_dataset(ds, fields=fields, timeout=7.5,) - - final_attributes.assert_called_once_with( - {"path": "/%s" % PATH, "fields": fields}, client, None - ) - - conn.api_request.assert_called_once_with( - method="PATCH", - data={ - "description": DESCRIPTION, - "friendlyName": FRIENDLY_NAME, - "location": LOCATION, - "labels": LABELS, - "access": ACCESS, - }, - path="/" + PATH, - headers=None, - timeout=7.5, - ) - self.assertEqual(ds2.description, ds.description) - self.assertEqual(ds2.friendly_name, ds.friendly_name) - self.assertEqual(ds2.location, ds.location) - self.assertEqual(ds2.labels, ds.labels) - self.assertEqual(ds2.access_entries, ds.access_entries) - - # ETag becomes If-Match header. - ds._properties["etag"] = "etag" - client.update_dataset(ds, []) - req = conn.api_request.call_args - self.assertEqual(req[1]["headers"]["If-Match"], "etag") - - def test_update_dataset_w_custom_property(self): - # The library should handle sending properties to the API that are not - # yet part of the library - from google.cloud.bigquery.dataset import Dataset - - path = "/projects/%s/datasets/%s" % (self.PROJECT, self.DS_ID) - resource = { - "datasetReference": {"projectId": self.PROJECT, "datasetId": self.DS_ID}, - "newAlphaProperty": "unreleased property", - } - creds = _make_credentials() - client = self._make_one(project=self.PROJECT, credentials=creds) - conn = client._connection = make_connection(resource) - dataset = Dataset(DatasetReference(self.PROJECT, self.DS_ID)) - dataset._properties["newAlphaProperty"] = "unreleased property" - - with mock.patch( - "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" - ) as final_attributes: - dataset = client.update_dataset(dataset, ["newAlphaProperty"]) - - final_attributes.assert_called_once_with( - {"path": path, "fields": ["newAlphaProperty"]}, client, None - ) - - conn.api_request.assert_called_once_with( - method="PATCH", - data={"newAlphaProperty": "unreleased property"}, - path=path, - headers=None, - timeout=None, - ) - - self.assertEqual(dataset.dataset_id, self.DS_ID) - self.assertEqual(dataset.project, self.PROJECT) - self.assertEqual(dataset._properties["newAlphaProperty"], "unreleased property") - - def test_update_model(self): - from google.cloud.bigquery.model import Model - - path = "projects/%s/datasets/%s/models/%s" % ( - self.PROJECT, - self.DS_ID, - self.MODEL_ID, - ) - description = "description" - title = "title" - expires = datetime.datetime( - 2012, 12, 21, 16, 0, 0, tzinfo=google.cloud._helpers.UTC - ) - resource = { - "modelReference": { - "projectId": self.PROJECT, - "datasetId": self.DS_ID, - "modelId": self.MODEL_ID, - }, - "description": description, - "etag": "etag", - "expirationTime": str(google.cloud._helpers._millis(expires)), - "friendlyName": title, - "labels": {"x": "y"}, - } - creds = _make_credentials() - client = self._make_one(project=self.PROJECT, credentials=creds) - conn = client._connection = make_connection(resource, resource) - model_id = "{}.{}.{}".format(self.PROJECT, self.DS_ID, self.MODEL_ID) - model = Model(model_id) - model.description = description - model.friendly_name = title - model.expires = expires - model.labels = {"x": "y"} - fields = ["description", "friendly_name", "labels", "expires"] - with mock.patch( - "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" - ) as final_attributes: - updated_model = client.update_model(model, fields, timeout=7.5) - - final_attributes.assert_called_once_with( - {"path": "/%s" % path, "fields": fields}, client, None - ) - - sent = { - "description": description, - "expirationTime": str(google.cloud._helpers._millis(expires)), - "friendlyName": title, - "labels": {"x": "y"}, - } - conn.api_request.assert_called_once_with( - method="PATCH", data=sent, path="/" + path, headers=None, timeout=7.5 - ) - self.assertEqual(updated_model.model_id, model.model_id) - self.assertEqual(updated_model.description, model.description) - self.assertEqual(updated_model.friendly_name, model.friendly_name) - self.assertEqual(updated_model.labels, model.labels) - self.assertEqual(updated_model.expires, model.expires) - - # ETag becomes If-Match header. - model._proto.etag = "etag" - client.update_model(model, []) - req = conn.api_request.call_args - self.assertEqual(req[1]["headers"]["If-Match"], "etag") - - def test_update_routine(self): - from google.cloud.bigquery.routine import Routine - from google.cloud.bigquery.routine import RoutineArgument - - full_routine_id = "routines-project.test_routines.updated_routine" - resource = { - "routineReference": { - "projectId": "routines-project", - "datasetId": "test_routines", - "routineId": "updated_routine", - }, - "routineType": "SCALAR_FUNCTION", - "language": "SQL", - "definitionBody": "x * 3", - "arguments": [{"name": "x", "dataType": {"typeKind": "INT64"}}], - "returnType": None, - "someNewField": "someValue", - } - creds = _make_credentials() - client = self._make_one(project=self.PROJECT, credentials=creds) - conn = client._connection = make_connection(resource, resource) - routine = Routine(full_routine_id) - routine.arguments = [ - RoutineArgument( - name="x", - data_type=bigquery_v2.types.StandardSqlDataType( - type_kind=bigquery_v2.types.StandardSqlDataType.TypeKind.INT64 - ), - ) - ] - routine.body = "x * 3" - routine.language = "SQL" - routine.type_ = "SCALAR_FUNCTION" - routine._properties["someNewField"] = "someValue" - fields = [ - "arguments", - "language", - "body", - "type_", - "return_type", - "someNewField", - ] - - with mock.patch( - "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" - ) as final_attributes: - actual_routine = client.update_routine(routine, fields, timeout=7.5,) - - final_attributes.assert_called_once_with( - {"path": routine.path, "fields": fields}, client, None - ) - - # TODO: routineReference isn't needed when the Routines API supports - # partial updates. - sent = resource - conn.api_request.assert_called_once_with( - method="PUT", - data=sent, - path="/projects/routines-project/datasets/test_routines/routines/updated_routine", - headers=None, - timeout=7.5, - ) - self.assertEqual(actual_routine.arguments, routine.arguments) - self.assertEqual(actual_routine.body, routine.body) - self.assertEqual(actual_routine.language, routine.language) - self.assertEqual(actual_routine.type_, routine.type_) - - # ETag becomes If-Match header. - routine._properties["etag"] = "im-an-etag" - with mock.patch( - "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" - ) as final_attributes: - client.update_routine(routine, []) - - final_attributes.assert_called_once_with( - {"path": routine.path, "fields": []}, client, None - ) - - req = conn.api_request.call_args - self.assertEqual(req[1]["headers"]["If-Match"], "im-an-etag") - - def test_update_table(self): - from google.cloud.bigquery.schema import SchemaField - from google.cloud.bigquery.table import Table - - path = "projects/%s/datasets/%s/tables/%s" % ( - self.PROJECT, - self.DS_ID, - self.TABLE_ID, - ) - description = "description" - title = "title" - resource = self._make_table_resource() - resource.update( - { - "schema": { - "fields": [ - { - "name": "full_name", - "type": "STRING", - "mode": "REQUIRED", - "description": None, - }, - { - "name": "age", - "type": "INTEGER", - "mode": "REQUIRED", - "description": "New field description", - }, - ] - }, - "etag": "etag", - "description": description, - "friendlyName": title, - "labels": {"x": "y"}, - } - ) - schema = [ - SchemaField("full_name", "STRING", mode="REQUIRED", description=None), - SchemaField( - "age", "INTEGER", mode="REQUIRED", description="New field description" - ), - ] - creds = _make_credentials() - client = self._make_one(project=self.PROJECT, credentials=creds) - conn = client._connection = make_connection(resource, resource) - table = Table(self.TABLE_REF, schema=schema) - table.description = description - table.friendly_name = title - table.labels = {"x": "y"} - fields = ["schema", "description", "friendly_name", "labels"] - with mock.patch( - "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" - ) as final_attributes: - updated_table = client.update_table(table, fields, timeout=7.5) - span_path = "/%s" % path + def test_get_iam_policy(self): + from google.cloud.bigquery.iam import BIGQUERY_DATA_OWNER_ROLE + from google.cloud.bigquery.iam import BIGQUERY_DATA_EDITOR_ROLE + from google.cloud.bigquery.iam import BIGQUERY_DATA_VIEWER_ROLE + from google.api_core.iam import Policy - final_attributes.assert_called_once_with( - {"path": span_path, "fields": fields}, client, None + PATH = "/projects/{}/datasets/{}/tables/{}:getIamPolicy".format( + self.PROJECT, self.DS_ID, self.TABLE_ID, ) - - sent = { - "schema": { - "fields": [ - { - "name": "full_name", - "type": "STRING", - "mode": "REQUIRED", - "description": None, - }, - { - "name": "age", - "type": "INTEGER", - "mode": "REQUIRED", - "description": "New field description", - }, - ] - }, - "description": description, - "friendlyName": title, - "labels": {"x": "y"}, + BODY = {"options": {"requestedPolicyVersion": 1}} + ETAG = "CARDI" + VERSION = 1 + OWNER1 = "user:phred@example.com" + OWNER2 = "group:cloud-logs@google.com" + EDITOR1 = "domain:google.com" + EDITOR2 = "user:phred@example.com" + VIEWER1 = "serviceAccount:1234-abcdef@service.example.com" + VIEWER2 = "user:phred@example.com" + RETURNED = { + "resourceId": PATH, + "etag": ETAG, + "version": VERSION, + "bindings": [ + {"role": BIGQUERY_DATA_OWNER_ROLE, "members": [OWNER1, OWNER2]}, + {"role": BIGQUERY_DATA_EDITOR_ROLE, "members": [EDITOR1, EDITOR2]}, + {"role": BIGQUERY_DATA_VIEWER_ROLE, "members": [VIEWER1, VIEWER2]}, + ], + } + EXPECTED = { + binding["role"]: set(binding["members"]) for binding in RETURNED["bindings"] } - conn.api_request.assert_called_once_with( - method="PATCH", data=sent, path="/" + path, headers=None, timeout=7.5 - ) - self.assertEqual(updated_table.description, table.description) - self.assertEqual(updated_table.friendly_name, table.friendly_name) - self.assertEqual(updated_table.schema, table.schema) - self.assertEqual(updated_table.labels, table.labels) - # ETag becomes If-Match header. - table._properties["etag"] = "etag" + creds = _make_credentials() + http = object() + client = self._make_one(project=self.PROJECT, credentials=creds, _http=http) + conn = client._connection = make_connection(RETURNED) with mock.patch( "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" ) as final_attributes: - client.update_table(table, []) + policy = client.get_iam_policy(self.TABLE_REF, timeout=7.5) - final_attributes.assert_called_once_with( - {"path": "/%s" % path, "fields": []}, client, None + final_attributes.assert_called_once_with({"path": PATH}, client, None) + + conn.api_request.assert_called_once_with( + method="POST", path=PATH, data=BODY, timeout=7.5 ) - req = conn.api_request.call_args - self.assertEqual(req[1]["headers"]["If-Match"], "etag") + self.assertIsInstance(policy, Policy) + self.assertEqual(policy.etag, RETURNED["etag"]) + self.assertEqual(policy.version, RETURNED["version"]) + self.assertEqual(dict(policy), EXPECTED) - def test_update_table_w_custom_property(self): - from google.cloud.bigquery.table import Table + def test_get_iam_policy_w_invalid_table(self): + creds = _make_credentials() + http = object() + client = self._make_one(project=self.PROJECT, credentials=creds, _http=http) - path = "projects/%s/datasets/%s/tables/%s" % ( + table_resource_string = "projects/{}/datasets/{}/tables/{}".format( + self.PROJECT, self.DS_ID, self.TABLE_ID, + ) + + with self.assertRaises(TypeError): + client.get_iam_policy(table_resource_string) + + def test_get_iam_policy_w_invalid_version(self): + creds = _make_credentials() + http = object() + client = self._make_one(project=self.PROJECT, credentials=creds, _http=http) + + with self.assertRaises(ValueError): + client.get_iam_policy(self.TABLE_REF, requested_policy_version=2) + + def test_set_iam_policy(self): + from google.cloud.bigquery.iam import BIGQUERY_DATA_OWNER_ROLE + from google.cloud.bigquery.iam import BIGQUERY_DATA_EDITOR_ROLE + from google.cloud.bigquery.iam import BIGQUERY_DATA_VIEWER_ROLE + from google.api_core.iam import Policy + + PATH = "/projects/%s/datasets/%s/tables/%s:setIamPolicy" % ( self.PROJECT, self.DS_ID, self.TABLE_ID, ) - resource = self._make_table_resource() - resource["newAlphaProperty"] = "unreleased property" + ETAG = "foo" + VERSION = 1 + OWNER1 = "user:phred@example.com" + OWNER2 = "group:cloud-logs@google.com" + EDITOR1 = "domain:google.com" + EDITOR2 = "user:phred@example.com" + VIEWER1 = "serviceAccount:1234-abcdef@service.example.com" + VIEWER2 = "user:phred@example.com" + BINDINGS = [ + {"role": BIGQUERY_DATA_OWNER_ROLE, "members": [OWNER1, OWNER2]}, + {"role": BIGQUERY_DATA_EDITOR_ROLE, "members": [EDITOR1, EDITOR2]}, + {"role": BIGQUERY_DATA_VIEWER_ROLE, "members": [VIEWER1, VIEWER2]}, + ] + MASK = "bindings,etag" + RETURNED = {"etag": ETAG, "version": VERSION, "bindings": BINDINGS} + + policy = Policy() + for binding in BINDINGS: + policy[binding["role"]] = binding["members"] + + BODY = {"policy": policy.to_api_repr(), "updateMask": MASK} + creds = _make_credentials() - client = self._make_one(project=self.PROJECT, credentials=creds) - conn = client._connection = make_connection(resource) - table = Table(self.TABLE_REF) - table._properties["newAlphaProperty"] = "unreleased property" + http = object() + client = self._make_one(project=self.PROJECT, credentials=creds, _http=http) + conn = client._connection = make_connection(RETURNED) with mock.patch( "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" ) as final_attributes: - updated_table = client.update_table(table, ["newAlphaProperty"]) + returned_policy = client.set_iam_policy( + self.TABLE_REF, policy, updateMask=MASK, timeout=7.5 + ) - final_attributes.assert_called_once_with( - {"path": "/%s" % path, "fields": ["newAlphaProperty"]}, client, None, + final_attributes.assert_called_once_with({"path": PATH}, client, None) + + conn.api_request.assert_called_once_with( + method="POST", path=PATH, data=BODY, timeout=7.5 + ) + self.assertEqual(returned_policy.etag, ETAG) + self.assertEqual(returned_policy.version, VERSION) + self.assertEqual(dict(returned_policy), dict(policy)) + + def test_set_iam_policy_no_mask(self): + from google.api_core.iam import Policy + + PATH = "/projects/%s/datasets/%s/tables/%s:setIamPolicy" % ( + self.PROJECT, + self.DS_ID, + self.TABLE_ID, ) + RETURNED = {"etag": "foo", "version": 1, "bindings": []} + + policy = Policy() + BODY = {"policy": policy.to_api_repr()} + + creds = _make_credentials() + http = object() + client = self._make_one(project=self.PROJECT, credentials=creds, _http=http) + conn = client._connection = make_connection(RETURNED) + with mock.patch( + "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" + ) as final_attributes: + client.set_iam_policy(self.TABLE_REF, policy, timeout=7.5) + + final_attributes.assert_called_once_with({"path": PATH}, client, None) conn.api_request.assert_called_once_with( - method="PATCH", - path="/%s" % path, - data={"newAlphaProperty": "unreleased property"}, - headers=None, - timeout=None, + method="POST", path=PATH, data=BODY, timeout=7.5 ) - self.assertEqual( - updated_table._properties["newAlphaProperty"], "unreleased property" + + def test_set_iam_policy_invalid_policy(self): + from google.api_core.iam import Policy + + policy = Policy() + invalid_policy_repr = policy.to_api_repr() + + creds = _make_credentials() + http = object() + client = self._make_one(project=self.PROJECT, credentials=creds, _http=http) + + with self.assertRaises(TypeError): + client.set_iam_policy(self.TABLE_REF, invalid_policy_repr) + + def test_set_iam_policy_w_invalid_table(self): + from google.api_core.iam import Policy + + policy = Policy() + + creds = _make_credentials() + http = object() + client = self._make_one(project=self.PROJECT, credentials=creds, _http=http) + + table_resource_string = "projects/%s/datasets/%s/tables/%s" % ( + self.PROJECT, + self.DS_ID, + self.TABLE_ID, ) - def test_update_table_only_use_legacy_sql(self): - from google.cloud.bigquery.table import Table + with self.assertRaises(TypeError): + client.set_iam_policy(table_resource_string, policy) - path = "projects/%s/datasets/%s/tables/%s" % ( + def test_test_iam_permissions(self): + PATH = "/projects/%s/datasets/%s/tables/%s:testIamPermissions" % ( self.PROJECT, self.DS_ID, self.TABLE_ID, ) - resource = self._make_table_resource() - resource["view"] = {"useLegacySql": True} + + PERMISSIONS = ["bigquery.tables.get", "bigquery.tables.update"] + BODY = {"permissions": PERMISSIONS} + RETURNED = {"permissions": PERMISSIONS} + creds = _make_credentials() - client = self._make_one(project=self.PROJECT, credentials=creds) - conn = client._connection = make_connection(resource) - table = Table(self.TABLE_REF) - table.view_use_legacy_sql = True + http = object() + client = self._make_one(project=self.PROJECT, credentials=creds, _http=http) + conn = client._connection = make_connection(RETURNED) with mock.patch( "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" ) as final_attributes: - updated_table = client.update_table(table, ["view_use_legacy_sql"]) + client.test_iam_permissions(self.TABLE_REF, PERMISSIONS, timeout=7.5) - final_attributes.assert_called_once_with( - {"path": "/%s" % path, "fields": ["view_use_legacy_sql"]}, client, None, - ) + final_attributes.assert_called_once_with({"path": PATH}, client, None) conn.api_request.assert_called_once_with( - method="PATCH", - path="/%s" % path, - data={"view": {"useLegacySql": True}}, - headers=None, - timeout=None, + method="POST", path=PATH, data=BODY, timeout=7.5 ) - self.assertEqual(updated_table.view_use_legacy_sql, table.view_use_legacy_sql) - def test_update_table_w_query(self): - import datetime - from google.cloud._helpers import UTC - from google.cloud._helpers import _millis - from google.cloud.bigquery.schema import SchemaField - from google.cloud.bigquery.table import Table + def test_test_iam_permissions_w_invalid_table(self): + creds = _make_credentials() + http = object() + client = self._make_one(project=self.PROJECT, credentials=creds, _http=http) - path = "projects/%s/datasets/%s/tables/%s" % ( + table_resource_string = "projects/%s/datasets/%s/tables/%s" % ( self.PROJECT, self.DS_ID, self.TABLE_ID, ) - query = "select fullname, age from person_ages" - location = "EU" - exp_time = datetime.datetime(2015, 8, 1, 23, 59, 59, tzinfo=UTC) - schema_resource = { - "fields": [ - { - "name": "full_name", - "type": "STRING", - "mode": "REQUIRED", - "description": None, - }, - { - "name": "age", - "type": "INTEGER", - "mode": "REQUIRED", - "description": "this is a column", - }, - {"name": "country", "type": "STRING", "mode": "NULLABLE"}, - ] + + PERMISSIONS = ["bigquery.tables.get", "bigquery.tables.update"] + + with self.assertRaises(TypeError): + client.test_iam_permissions(table_resource_string, PERMISSIONS) + + def test_update_dataset_w_invalid_field(self): + from google.cloud.bigquery.dataset import Dataset + + creds = _make_credentials() + client = self._make_one(project=self.PROJECT, credentials=creds) + with self.assertRaises(ValueError): + client.update_dataset( + Dataset("{}.{}".format(self.PROJECT, self.DS_ID)), ["foo"] + ) + + def test_update_dataset(self): + from google.cloud.bigquery.dataset import Dataset, AccessEntry + + PATH = "projects/%s/datasets/%s" % (self.PROJECT, self.DS_ID) + DESCRIPTION = "DESCRIPTION" + FRIENDLY_NAME = "TITLE" + LOCATION = "loc" + LABELS = {"priority": "high"} + ACCESS = [{"role": "OWNER", "userByEmail": "phred@example.com"}] + EXP = 17 + RESOURCE = { + "datasetReference": {"projectId": self.PROJECT, "datasetId": self.DS_ID}, + "etag": "etag", + "description": DESCRIPTION, + "friendlyName": FRIENDLY_NAME, + "location": LOCATION, + "defaultTableExpirationMs": EXP, + "labels": LABELS, + "access": ACCESS, } - schema = [ - SchemaField( - "full_name", - "STRING", - mode="REQUIRED", - # Explicitly unset the description. - description=None, - ), - SchemaField( - "age", "INTEGER", mode="REQUIRED", description="this is a column" - ), - # Omit the description to not make updates to it. - SchemaField("country", "STRING"), - ] - resource = self._make_table_resource() - resource.update( - { - "schema": schema_resource, - "view": {"query": query, "useLegacySql": True}, - "location": location, - "expirationTime": _millis(exp_time), - } - ) creds = _make_credentials() client = self._make_one(project=self.PROJECT, credentials=creds) - conn = client._connection = make_connection(resource) - table = Table(self.TABLE_REF, schema=schema) - table.expires = exp_time - table.view_query = query - table.view_use_legacy_sql = True - updated_properties = ["schema", "view_query", "expires", "view_use_legacy_sql"] + conn = client._connection = make_connection(RESOURCE, RESOURCE) + ds = Dataset(DatasetReference(self.PROJECT, self.DS_ID)) + ds.description = DESCRIPTION + ds.friendly_name = FRIENDLY_NAME + ds.location = LOCATION + ds.default_table_expiration_ms = EXP + ds.labels = LABELS + ds.access_entries = [AccessEntry("OWNER", "userByEmail", "phred@example.com")] + fields = [ + "description", + "friendly_name", + "location", + "labels", + "access_entries", + ] + with mock.patch( "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" ) as final_attributes: - updated_table = client.update_table(table, updated_properties) + ds2 = client.update_dataset(ds, fields=fields, timeout=7.5,) final_attributes.assert_called_once_with( - {"path": "/%s" % path, "fields": updated_properties}, client, None, + {"path": "/%s" % PATH, "fields": fields}, client, None ) - self.assertEqual(updated_table.schema, table.schema) - self.assertEqual(updated_table.view_query, table.view_query) - self.assertEqual(updated_table.expires, table.expires) - self.assertEqual(updated_table.view_use_legacy_sql, table.view_use_legacy_sql) - self.assertEqual(updated_table.location, location) - conn.api_request.assert_called_once_with( method="PATCH", - path="/%s" % path, data={ - "view": {"query": query, "useLegacySql": True}, - "expirationTime": str(_millis(exp_time)), - "schema": schema_resource, + "description": DESCRIPTION, + "friendlyName": FRIENDLY_NAME, + "location": LOCATION, + "labels": LABELS, + "access": ACCESS, }, + path="/" + PATH, headers=None, - timeout=None, + timeout=7.5, ) + self.assertEqual(ds2.description, ds.description) + self.assertEqual(ds2.friendly_name, ds.friendly_name) + self.assertEqual(ds2.location, ds.location) + self.assertEqual(ds2.labels, ds.labels) + self.assertEqual(ds2.access_entries, ds.access_entries) - def test_update_table_w_schema_None(self): - # Simulate deleting schema: not sure if back-end will actually - # allow this operation, but the spec says it is optional. - path = "projects/%s/datasets/%s/tables/%s" % ( - self.PROJECT, - self.DS_ID, - self.TABLE_ID, - ) - resource1 = self._make_table_resource() - resource1.update( - { - "schema": { - "fields": [ - {"name": "full_name", "type": "STRING", "mode": "REQUIRED"}, - {"name": "age", "type": "INTEGER", "mode": "REQUIRED"}, - ] - } - } - ) - resource2 = self._make_table_resource() - creds = _make_credentials() - client = self._make_one(project=self.PROJECT, credentials=creds) - conn = client._connection = make_connection(resource1, resource2) - with mock.patch( - "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" - ) as final_attributes: - table = client.get_table( - # Test with string for table ID - "{}.{}.{}".format( - self.TABLE_REF.project, - self.TABLE_REF.dataset_id, - self.TABLE_REF.table_id, - ) - ) + # ETag becomes If-Match header. + ds._properties["etag"] = "etag" + client.update_dataset(ds, []) + req = conn.api_request.call_args + self.assertEqual(req[1]["headers"]["If-Match"], "etag") - final_attributes.assert_called_once_with({"path": "/%s" % path}, client, None) + def test_update_dataset_w_custom_property(self): + # The library should handle sending properties to the API that are not + # yet part of the library + from google.cloud.bigquery.dataset import Dataset - table.schema = None + path = "/projects/%s/datasets/%s" % (self.PROJECT, self.DS_ID) + resource = { + "datasetReference": {"projectId": self.PROJECT, "datasetId": self.DS_ID}, + "newAlphaProperty": "unreleased property", + } + creds = _make_credentials() + client = self._make_one(project=self.PROJECT, credentials=creds) + conn = client._connection = make_connection(resource) + dataset = Dataset(DatasetReference(self.PROJECT, self.DS_ID)) + dataset._properties["newAlphaProperty"] = "unreleased property" with mock.patch( "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" ) as final_attributes: - updated_table = client.update_table(table, ["schema"]) + dataset = client.update_dataset(dataset, ["newAlphaProperty"]) final_attributes.assert_called_once_with( - {"path": "/%s" % path, "fields": ["schema"]}, client, None + {"path": path, "fields": ["newAlphaProperty"]}, client, None ) - self.assertEqual(len(conn.api_request.call_args_list), 2) - req = conn.api_request.call_args_list[1] - self.assertEqual(req[1]["method"], "PATCH") - sent = {"schema": None} - self.assertEqual(req[1]["data"], sent) - self.assertEqual(req[1]["path"], "/%s" % path) - self.assertEqual(len(updated_table.schema), 0) + conn.api_request.assert_called_once_with( + method="PATCH", + data={"newAlphaProperty": "unreleased property"}, + path=path, + headers=None, + timeout=None, + ) - def test_update_table_delete_property(self): - from google.cloud.bigquery.table import Table + self.assertEqual(dataset.dataset_id, self.DS_ID) + self.assertEqual(dataset.project, self.PROJECT) + self.assertEqual(dataset._properties["newAlphaProperty"], "unreleased property") - description = "description" - title = "title" - path = "projects/%s/datasets/%s/tables/%s" % ( + def test_update_model(self): + from google.cloud.bigquery.model import Model + + path = "projects/%s/datasets/%s/models/%s" % ( self.PROJECT, self.DS_ID, - self.TABLE_ID, + self.MODEL_ID, ) - resource1 = self._make_table_resource() - resource1.update({"description": description, "friendlyName": title}) - resource2 = self._make_table_resource() - resource2["description"] = None + description = "description" + title = "title" + expires = datetime.datetime( + 2012, 12, 21, 16, 0, 0, tzinfo=google.cloud._helpers.UTC + ) + resource = { + "modelReference": { + "projectId": self.PROJECT, + "datasetId": self.DS_ID, + "modelId": self.MODEL_ID, + }, + "description": description, + "etag": "etag", + "expirationTime": str(google.cloud._helpers._millis(expires)), + "friendlyName": title, + "labels": {"x": "y"}, + } creds = _make_credentials() client = self._make_one(project=self.PROJECT, credentials=creds) - conn = client._connection = make_connection(resource1, resource2) - table = Table(self.TABLE_REF) - table.description = description - table.friendly_name = title - + conn = client._connection = make_connection(resource, resource) + model_id = "{}.{}.{}".format(self.PROJECT, self.DS_ID, self.MODEL_ID) + model = Model(model_id) + model.description = description + model.friendly_name = title + model.expires = expires + model.labels = {"x": "y"} + fields = ["description", "friendly_name", "labels", "expires"] with mock.patch( "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" ) as final_attributes: - table2 = client.update_table(table, ["description", "friendly_name"]) + updated_model = client.update_model(model, fields, timeout=7.5) final_attributes.assert_called_once_with( - {"path": "/%s" % path, "fields": ["description", "friendly_name"]}, - client, - None, + {"path": "/%s" % path, "fields": fields}, client, None ) - self.assertEqual(table2.description, table.description) - table2.description = None - - with mock.patch( - "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" - ) as final_attributes: - table3 = client.update_table(table2, ["description"]) - - final_attributes.assert_called_once_with( - {"path": "/%s" % path, "fields": ["description"]}, client, None + sent = { + "description": description, + "expirationTime": str(google.cloud._helpers._millis(expires)), + "friendlyName": title, + "labels": {"x": "y"}, + } + conn.api_request.assert_called_once_with( + method="PATCH", data=sent, path="/" + path, headers=None, timeout=7.5 ) + self.assertEqual(updated_model.model_id, model.model_id) + self.assertEqual(updated_model.description, model.description) + self.assertEqual(updated_model.friendly_name, model.friendly_name) + self.assertEqual(updated_model.labels, model.labels) + self.assertEqual(updated_model.expires, model.expires) - self.assertEqual(len(conn.api_request.call_args_list), 2) - req = conn.api_request.call_args_list[1] - self.assertEqual(req[1]["method"], "PATCH") - self.assertEqual(req[1]["path"], "/%s" % path) - sent = {"description": None} - self.assertEqual(req[1]["data"], sent) - self.assertIsNone(table3.description) + # ETag becomes If-Match header. + model._proto.etag = "etag" + client.update_model(model, []) + req = conn.api_request.call_args + self.assertEqual(req[1]["headers"]["If-Match"], "etag") - def test_list_tables_empty_w_timeout(self): - path = "/projects/{}/datasets/{}/tables".format(self.PROJECT, self.DS_ID) + def test_update_routine(self): + from google.cloud.bigquery.routine import Routine + from google.cloud.bigquery.routine import RoutineArgument + + full_routine_id = "routines-project.test_routines.updated_routine" + resource = { + "routineReference": { + "projectId": "routines-project", + "datasetId": "test_routines", + "routineId": "updated_routine", + }, + "routineType": "SCALAR_FUNCTION", + "language": "SQL", + "definitionBody": "x * 3", + "arguments": [{"name": "x", "dataType": {"typeKind": "INT64"}}], + "returnType": None, + "someNewField": "someValue", + } creds = _make_credentials() client = self._make_one(project=self.PROJECT, credentials=creds) - conn = client._connection = make_connection({}) + conn = client._connection = make_connection(resource, resource) + routine = Routine(full_routine_id) + routine.arguments = [ + RoutineArgument( + name="x", + data_type=bigquery_v2.types.StandardSqlDataType( + type_kind=bigquery_v2.types.StandardSqlDataType.TypeKind.INT64 + ), + ) + ] + routine.body = "x * 3" + routine.language = "SQL" + routine.type_ = "SCALAR_FUNCTION" + routine._properties["someNewField"] = "someValue" + fields = [ + "arguments", + "language", + "body", + "type_", + "return_type", + "someNewField", + ] - dataset = DatasetReference(self.PROJECT, self.DS_ID) - iterator = client.list_tables(dataset, timeout=7.5) - self.assertIs(iterator.dataset, dataset) with mock.patch( "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" ) as final_attributes: - page = next(iterator.pages) + actual_routine = client.update_routine(routine, fields, timeout=7.5,) - final_attributes.assert_called_once_with({"path": path}, client, None) - tables = list(page) - token = iterator.next_page_token + final_attributes.assert_called_once_with( + {"path": routine.path, "fields": fields}, client, None + ) - self.assertEqual(tables, []) - self.assertIsNone(token) + # TODO: routineReference isn't needed when the Routines API supports + # partial updates. + sent = resource conn.api_request.assert_called_once_with( - method="GET", path=path, query_params={}, timeout=7.5 + method="PUT", + data=sent, + path="/projects/routines-project/datasets/test_routines/routines/updated_routine", + headers=None, + timeout=7.5, ) + self.assertEqual(actual_routine.arguments, routine.arguments) + self.assertEqual(actual_routine.body, routine.body) + self.assertEqual(actual_routine.language, routine.language) + self.assertEqual(actual_routine.type_, routine.type_) - def test_list_models_empty_w_timeout(self): - path = "/projects/{}/datasets/{}/models".format(self.PROJECT, self.DS_ID) - creds = _make_credentials() - client = self._make_one(project=self.PROJECT, credentials=creds) - conn = client._connection = make_connection({}) - - dataset_id = "{}.{}".format(self.PROJECT, self.DS_ID) - iterator = client.list_models(dataset_id, timeout=7.5) + # ETag becomes If-Match header. + routine._properties["etag"] = "im-an-etag" with mock.patch( "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" ) as final_attributes: - page = next(iterator.pages) - - final_attributes.assert_called_once_with({"path": path}, client, None) - models = list(page) - token = iterator.next_page_token + client.update_routine(routine, []) - self.assertEqual(models, []) - self.assertIsNone(token) - conn.api_request.assert_called_once_with( - method="GET", path=path, query_params={}, timeout=7.5 + final_attributes.assert_called_once_with( + {"path": routine.path, "fields": []}, client, None ) - def test_list_models_defaults(self): - from google.cloud.bigquery.model import Model + req = conn.api_request.call_args + self.assertEqual(req[1]["headers"]["If-Match"], "im-an-etag") - MODEL_1 = "model_one" - MODEL_2 = "model_two" - PATH = "projects/%s/datasets/%s/models" % (self.PROJECT, self.DS_ID) - TOKEN = "TOKEN" - DATA = { - "nextPageToken": TOKEN, - "models": [ - { - "modelReference": { - "modelId": MODEL_1, - "datasetId": self.DS_ID, - "projectId": self.PROJECT, - } - }, - { - "modelReference": { - "modelId": MODEL_2, - "datasetId": self.DS_ID, - "projectId": self.PROJECT, - } - }, - ], - } + def test_update_table(self): + from google.cloud.bigquery.schema import SchemaField + from google.cloud.bigquery.table import Table + path = "projects/%s/datasets/%s/tables/%s" % ( + self.PROJECT, + self.DS_ID, + self.TABLE_ID, + ) + description = "description" + title = "title" + resource = self._make_table_resource() + resource.update( + { + "schema": { + "fields": [ + { + "name": "full_name", + "type": "STRING", + "mode": "REQUIRED", + "description": None, + }, + { + "name": "age", + "type": "INTEGER", + "mode": "REQUIRED", + "description": "New field description", + }, + ] + }, + "etag": "etag", + "description": description, + "friendlyName": title, + "labels": {"x": "y"}, + } + ) + schema = [ + SchemaField("full_name", "STRING", mode="REQUIRED", description=None), + SchemaField( + "age", "INTEGER", mode="REQUIRED", description="New field description" + ), + ] creds = _make_credentials() client = self._make_one(project=self.PROJECT, credentials=creds) - conn = client._connection = make_connection(DATA) - dataset = DatasetReference(self.PROJECT, self.DS_ID) - - iterator = client.list_models(dataset) - self.assertIs(iterator.dataset, dataset) + conn = client._connection = make_connection(resource, resource) + table = Table(self.TABLE_REF, schema=schema) + table.description = description + table.friendly_name = title + table.labels = {"x": "y"} + fields = ["schema", "description", "friendly_name", "labels"] with mock.patch( "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" ) as final_attributes: - page = next(iterator.pages) - - final_attributes.assert_called_once_with({"path": "/%s" % PATH}, client, None) - models = list(page) - token = iterator.next_page_token + updated_table = client.update_table(table, fields, timeout=7.5) + span_path = "/%s" % path - self.assertEqual(len(models), len(DATA["models"])) - for found, expected in zip(models, DATA["models"]): - self.assertIsInstance(found, Model) - self.assertEqual(found.model_id, expected["modelReference"]["modelId"]) - self.assertEqual(token, TOKEN) + final_attributes.assert_called_once_with( + {"path": span_path, "fields": fields}, client, None + ) + sent = { + "schema": { + "fields": [ + { + "name": "full_name", + "type": "STRING", + "mode": "REQUIRED", + "description": None, + }, + { + "name": "age", + "type": "INTEGER", + "mode": "REQUIRED", + "description": "New field description", + }, + ] + }, + "description": description, + "friendlyName": title, + "labels": {"x": "y"}, + } conn.api_request.assert_called_once_with( - method="GET", path="/%s" % PATH, query_params={}, timeout=None + method="PATCH", data=sent, path="/" + path, headers=None, timeout=7.5 ) + self.assertEqual(updated_table.description, table.description) + self.assertEqual(updated_table.friendly_name, table.friendly_name) + self.assertEqual(updated_table.schema, table.schema) + self.assertEqual(updated_table.labels, table.labels) - def test_list_models_wrong_type(self): - creds = _make_credentials() - client = self._make_one(project=self.PROJECT, credentials=creds) - with self.assertRaises(TypeError): - client.list_models(DatasetReference(self.PROJECT, self.DS_ID).model("foo")) - - def test_list_routines_empty_w_timeout(self): - creds = _make_credentials() - client = self._make_one(project=self.PROJECT, credentials=creds) - conn = client._connection = make_connection({}) - - iterator = client.list_routines("test-routines.test_routines", timeout=7.5) + # ETag becomes If-Match header. + table._properties["etag"] = "etag" with mock.patch( "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" ) as final_attributes: - page = next(iterator.pages) + client.update_table(table, []) final_attributes.assert_called_once_with( - {"path": "/projects/test-routines/datasets/test_routines/routines"}, - client, - None, - ) - routines = list(page) - token = iterator.next_page_token - - self.assertEqual(routines, []) - self.assertIsNone(token) - conn.api_request.assert_called_once_with( - method="GET", - path="/projects/test-routines/datasets/test_routines/routines", - query_params={}, - timeout=7.5, + {"path": "/%s" % path, "fields": []}, client, None ) - def test_list_routines_defaults(self): - from google.cloud.bigquery.routine import Routine + req = conn.api_request.call_args + self.assertEqual(req[1]["headers"]["If-Match"], "etag") - project_id = "test-routines" - dataset_id = "test_routines" - path = "/projects/test-routines/datasets/test_routines/routines" - routine_1 = "routine_one" - routine_2 = "routine_two" - token = "TOKEN" - resource = { - "nextPageToken": token, - "routines": [ - { - "routineReference": { - "routineId": routine_1, - "datasetId": dataset_id, - "projectId": project_id, - } - }, - { - "routineReference": { - "routineId": routine_2, - "datasetId": dataset_id, - "projectId": project_id, - } - }, - ], - } + def test_update_table_w_custom_property(self): + from google.cloud.bigquery.table import Table + path = "projects/%s/datasets/%s/tables/%s" % ( + self.PROJECT, + self.DS_ID, + self.TABLE_ID, + ) + resource = self._make_table_resource() + resource["newAlphaProperty"] = "unreleased property" creds = _make_credentials() - client = self._make_one(project=project_id, credentials=creds) + client = self._make_one(project=self.PROJECT, credentials=creds) conn = client._connection = make_connection(resource) - dataset = DatasetReference(client.project, dataset_id) + table = Table(self.TABLE_REF) + table._properties["newAlphaProperty"] = "unreleased property" - iterator = client.list_routines(dataset) - self.assertIs(iterator.dataset, dataset) with mock.patch( "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" ) as final_attributes: - page = next(iterator.pages) - - final_attributes.assert_called_once_with({"path": path}, client, None) - routines = list(page) - actual_token = iterator.next_page_token + updated_table = client.update_table(table, ["newAlphaProperty"]) - self.assertEqual(len(routines), len(resource["routines"])) - for found, expected in zip(routines, resource["routines"]): - self.assertIsInstance(found, Routine) - self.assertEqual( - found.routine_id, expected["routineReference"]["routineId"] - ) - self.assertEqual(actual_token, token) + final_attributes.assert_called_once_with( + {"path": "/%s" % path, "fields": ["newAlphaProperty"]}, client, None, + ) conn.api_request.assert_called_once_with( - method="GET", path=path, query_params={}, timeout=None + method="PATCH", + path="/%s" % path, + data={"newAlphaProperty": "unreleased property"}, + headers=None, + timeout=None, + ) + self.assertEqual( + updated_table._properties["newAlphaProperty"], "unreleased property" ) - def test_list_routines_wrong_type(self): - creds = _make_credentials() - client = self._make_one(project=self.PROJECT, credentials=creds) - with self.assertRaises(TypeError): - client.list_routines( - DatasetReference(self.PROJECT, self.DS_ID).table("foo") - ) - - def test_list_tables_defaults(self): - from google.cloud.bigquery.table import TableListItem - - TABLE_1 = "table_one" - TABLE_2 = "table_two" - PATH = "projects/%s/datasets/%s/tables" % (self.PROJECT, self.DS_ID) - TOKEN = "TOKEN" - DATA = { - "nextPageToken": TOKEN, - "tables": [ - { - "kind": "bigquery#table", - "id": "%s:%s.%s" % (self.PROJECT, self.DS_ID, TABLE_1), - "tableReference": { - "tableId": TABLE_1, - "datasetId": self.DS_ID, - "projectId": self.PROJECT, - }, - "type": "TABLE", - }, - { - "kind": "bigquery#table", - "id": "%s:%s.%s" % (self.PROJECT, self.DS_ID, TABLE_2), - "tableReference": { - "tableId": TABLE_2, - "datasetId": self.DS_ID, - "projectId": self.PROJECT, - }, - "type": "TABLE", - }, - ], - } + def test_update_table_only_use_legacy_sql(self): + from google.cloud.bigquery.table import Table + path = "projects/%s/datasets/%s/tables/%s" % ( + self.PROJECT, + self.DS_ID, + self.TABLE_ID, + ) + resource = self._make_table_resource() + resource["view"] = {"useLegacySql": True} creds = _make_credentials() client = self._make_one(project=self.PROJECT, credentials=creds) - conn = client._connection = make_connection(DATA) - dataset = DatasetReference(self.PROJECT, self.DS_ID) - - iterator = client.list_tables(dataset) - self.assertIs(iterator.dataset, dataset) + conn = client._connection = make_connection(resource) + table = Table(self.TABLE_REF) + table.view_use_legacy_sql = True with mock.patch( "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" ) as final_attributes: - page = next(iterator.pages) - - final_attributes.assert_called_once_with({"path": "/%s" % PATH}, client, None) - tables = list(page) - token = iterator.next_page_token + updated_table = client.update_table(table, ["view_use_legacy_sql"]) - self.assertEqual(len(tables), len(DATA["tables"])) - for found, expected in zip(tables, DATA["tables"]): - self.assertIsInstance(found, TableListItem) - self.assertEqual(found.full_table_id, expected["id"]) - self.assertEqual(found.table_type, expected["type"]) - self.assertEqual(token, TOKEN) + final_attributes.assert_called_once_with( + {"path": "/%s" % path, "fields": ["view_use_legacy_sql"]}, client, None, + ) conn.api_request.assert_called_once_with( - method="GET", path="/%s" % PATH, query_params={}, timeout=None + method="PATCH", + path="/%s" % path, + data={"view": {"useLegacySql": True}}, + headers=None, + timeout=None, ) + self.assertEqual(updated_table.view_use_legacy_sql, table.view_use_legacy_sql) - def test_list_tables_explicit(self): - from google.cloud.bigquery.table import TableListItem + def test_update_table_w_query(self): + import datetime + from google.cloud._helpers import UTC + from google.cloud._helpers import _millis + from google.cloud.bigquery.schema import SchemaField + from google.cloud.bigquery.table import Table - TABLE_1 = "table_one" - TABLE_2 = "table_two" - PATH = "projects/%s/datasets/%s/tables" % (self.PROJECT, self.DS_ID) - TOKEN = "TOKEN" - DATA = { - "tables": [ + path = "projects/%s/datasets/%s/tables/%s" % ( + self.PROJECT, + self.DS_ID, + self.TABLE_ID, + ) + query = "select fullname, age from person_ages" + location = "EU" + exp_time = datetime.datetime(2015, 8, 1, 23, 59, 59, tzinfo=UTC) + schema_resource = { + "fields": [ { - "kind": "bigquery#dataset", - "id": "%s:%s.%s" % (self.PROJECT, self.DS_ID, TABLE_1), - "tableReference": { - "tableId": TABLE_1, - "datasetId": self.DS_ID, - "projectId": self.PROJECT, - }, - "type": "TABLE", + "name": "full_name", + "type": "STRING", + "mode": "REQUIRED", + "description": None, }, { - "kind": "bigquery#dataset", - "id": "%s:%s.%s" % (self.PROJECT, self.DS_ID, TABLE_2), - "tableReference": { - "tableId": TABLE_2, - "datasetId": self.DS_ID, - "projectId": self.PROJECT, - }, - "type": "TABLE", + "name": "age", + "type": "INTEGER", + "mode": "REQUIRED", + "description": "this is a column", }, + {"name": "country", "type": "STRING", "mode": "NULLABLE"}, ] } - + schema = [ + SchemaField( + "full_name", + "STRING", + mode="REQUIRED", + # Explicitly unset the description. + description=None, + ), + SchemaField( + "age", "INTEGER", mode="REQUIRED", description="this is a column" + ), + # Omit the description to not make updates to it. + SchemaField("country", "STRING"), + ] + resource = self._make_table_resource() + resource.update( + { + "schema": schema_resource, + "view": {"query": query, "useLegacySql": True}, + "location": location, + "expirationTime": _millis(exp_time), + } + ) creds = _make_credentials() client = self._make_one(project=self.PROJECT, credentials=creds) - conn = client._connection = make_connection(DATA) - dataset = DatasetReference(self.PROJECT, self.DS_ID) - - iterator = client.list_tables( - # Test with string for dataset ID. - self.DS_ID, - max_results=3, - page_token=TOKEN, - ) - self.assertEqual(iterator.dataset, dataset) + conn = client._connection = make_connection(resource) + table = Table(self.TABLE_REF, schema=schema) + table.expires = exp_time + table.view_query = query + table.view_use_legacy_sql = True + updated_properties = ["schema", "view_query", "expires", "view_use_legacy_sql"] with mock.patch( "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" ) as final_attributes: - page = next(iterator.pages) + updated_table = client.update_table(table, updated_properties) - final_attributes.assert_called_once_with({"path": "/%s" % PATH}, client, None) - tables = list(page) - token = iterator.next_page_token + final_attributes.assert_called_once_with( + {"path": "/%s" % path, "fields": updated_properties}, client, None, + ) - self.assertEqual(len(tables), len(DATA["tables"])) - for found, expected in zip(tables, DATA["tables"]): - self.assertIsInstance(found, TableListItem) - self.assertEqual(found.full_table_id, expected["id"]) - self.assertEqual(found.table_type, expected["type"]) - self.assertIsNone(token) + self.assertEqual(updated_table.schema, table.schema) + self.assertEqual(updated_table.view_query, table.view_query) + self.assertEqual(updated_table.expires, table.expires) + self.assertEqual(updated_table.view_use_legacy_sql, table.view_use_legacy_sql) + self.assertEqual(updated_table.location, location) conn.api_request.assert_called_once_with( - method="GET", - path="/%s" % PATH, - query_params={"maxResults": 3, "pageToken": TOKEN}, + method="PATCH", + path="/%s" % path, + data={ + "view": {"query": query, "useLegacySql": True}, + "expirationTime": str(_millis(exp_time)), + "schema": schema_resource, + }, + headers=None, timeout=None, ) - def test_list_tables_wrong_type(self): + def test_update_table_w_schema_None(self): + # Simulate deleting schema: not sure if back-end will actually + # allow this operation, but the spec says it is optional. + path = "projects/%s/datasets/%s/tables/%s" % ( + self.PROJECT, + self.DS_ID, + self.TABLE_ID, + ) + resource1 = self._make_table_resource() + resource1.update( + { + "schema": { + "fields": [ + {"name": "full_name", "type": "STRING", "mode": "REQUIRED"}, + {"name": "age", "type": "INTEGER", "mode": "REQUIRED"}, + ] + } + } + ) + resource2 = self._make_table_resource() creds = _make_credentials() client = self._make_one(project=self.PROJECT, credentials=creds) - with self.assertRaises(TypeError): - client.list_tables(DatasetReference(self.PROJECT, self.DS_ID).table("foo")) - - def test_delete_dataset(self): - from google.cloud.bigquery.dataset import Dataset - from google.cloud.bigquery.dataset import DatasetReference + conn = client._connection = make_connection(resource1, resource2) + with mock.patch( + "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" + ) as final_attributes: + table = client.get_table( + # Test with string for table ID + "{}.{}.{}".format( + self.TABLE_REF.project, + self.TABLE_REF.dataset_id, + self.TABLE_REF.table_id, + ) + ) - ds_ref = DatasetReference(self.PROJECT, self.DS_ID) - datasets = (ds_ref, Dataset(ds_ref), "{}.{}".format(self.PROJECT, self.DS_ID)) - PATH = "projects/%s/datasets/%s" % (self.PROJECT, self.DS_ID) - creds = _make_credentials() - client = self._make_one(project=self.PROJECT, credentials=creds) - conn = client._connection = make_connection(*([{}] * len(datasets))) - for arg in datasets: - with mock.patch( - "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" - ) as final_attributes: - client.delete_dataset(arg, timeout=7.5) + final_attributes.assert_called_once_with({"path": "/%s" % path}, client, None) - final_attributes.assert_called_once_with( - {"path": "/%s" % PATH}, client, None - ) + table.schema = None - conn.api_request.assert_called_with( - method="DELETE", path="/%s" % PATH, query_params={}, timeout=7.5 - ) + with mock.patch( + "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" + ) as final_attributes: + updated_table = client.update_table(table, ["schema"]) - def test_delete_dataset_delete_contents(self): - from google.cloud.bigquery.dataset import Dataset + final_attributes.assert_called_once_with( + {"path": "/%s" % path, "fields": ["schema"]}, client, None + ) - PATH = "projects/%s/datasets/%s" % (self.PROJECT, self.DS_ID) - creds = _make_credentials() - client = self._make_one(project=self.PROJECT, credentials=creds) - conn = client._connection = make_connection({}, {}) - ds_ref = DatasetReference(self.PROJECT, self.DS_ID) - for arg in (ds_ref, Dataset(ds_ref)): - with mock.patch( - "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" - ) as final_attributes: - client.delete_dataset(arg, delete_contents=True) + self.assertEqual(len(conn.api_request.call_args_list), 2) + req = conn.api_request.call_args_list[1] + self.assertEqual(req[1]["method"], "PATCH") + sent = {"schema": None} + self.assertEqual(req[1]["data"], sent) + self.assertEqual(req[1]["path"], "/%s" % path) + self.assertEqual(len(updated_table.schema), 0) - final_attributes.assert_called_once_with( - {"path": "/%s" % PATH, "deleteContents": True}, client, None - ) - conn.api_request.assert_called_with( - method="DELETE", - path="/%s" % PATH, - query_params={"deleteContents": "true"}, - timeout=None, - ) + def test_update_table_delete_property(self): + from google.cloud.bigquery.table import Table - def test_delete_dataset_wrong_type(self): + description = "description" + title = "title" + path = "projects/%s/datasets/%s/tables/%s" % ( + self.PROJECT, + self.DS_ID, + self.TABLE_ID, + ) + resource1 = self._make_table_resource() + resource1.update({"description": description, "friendlyName": title}) + resource2 = self._make_table_resource() + resource2["description"] = None creds = _make_credentials() client = self._make_one(project=self.PROJECT, credentials=creds) - with self.assertRaises(TypeError): - client.delete_dataset( - DatasetReference(self.PROJECT, self.DS_ID).table("foo") - ) - - def test_delete_dataset_w_not_found_ok_false(self): - path = "/projects/{}/datasets/{}".format(self.PROJECT, self.DS_ID) - creds = _make_credentials() - http = object() - client = self._make_one(project=self.PROJECT, credentials=creds, _http=http) - conn = client._connection = make_connection( - google.api_core.exceptions.NotFound("dataset not found") - ) - - with self.assertRaises(google.api_core.exceptions.NotFound): - with mock.patch( - "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" - ) as final_attributes: - client.delete_dataset(self.DS_ID) + conn = client._connection = make_connection(resource1, resource2) + table = Table(self.TABLE_REF) + table.description = description + table.friendly_name = title - final_attributes.assert_called_once_with({"path": path}, client, None) + with mock.patch( + "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" + ) as final_attributes: + table2 = client.update_table(table, ["description", "friendly_name"]) - conn.api_request.assert_called_with( - method="DELETE", path=path, query_params={}, timeout=None + final_attributes.assert_called_once_with( + {"path": "/%s" % path, "fields": ["description", "friendly_name"]}, + client, + None, ) - def test_delete_dataset_w_not_found_ok_true(self): - path = "/projects/{}/datasets/{}".format(self.PROJECT, self.DS_ID) - creds = _make_credentials() - http = object() - client = self._make_one(project=self.PROJECT, credentials=creds, _http=http) - conn = client._connection = make_connection( - google.api_core.exceptions.NotFound("dataset not found") - ) + self.assertEqual(table2.description, table.description) + table2.description = None with mock.patch( "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" ) as final_attributes: - client.delete_dataset(self.DS_ID, not_found_ok=True) - - final_attributes.assert_called_once_with({"path": path}, client, None) + table3 = client.update_table(table2, ["description"]) - conn.api_request.assert_called_with( - method="DELETE", path=path, query_params={}, timeout=None + final_attributes.assert_called_once_with( + {"path": "/%s" % path, "fields": ["description"]}, client, None ) + self.assertEqual(len(conn.api_request.call_args_list), 2) + req = conn.api_request.call_args_list[1] + self.assertEqual(req[1]["method"], "PATCH") + self.assertEqual(req[1]["path"], "/%s" % path) + sent = {"description": None} + self.assertEqual(req[1]["data"], sent) + self.assertIsNone(table3.description) + def test_delete_model(self): from google.cloud.bigquery.model import Model diff --git a/packages/google-cloud-bigquery/tests/unit/test_create_dataset.py b/packages/google-cloud-bigquery/tests/unit/test_create_dataset.py new file mode 100644 index 000000000000..3eb8f107236f --- /dev/null +++ b/packages/google-cloud-bigquery/tests/unit/test_create_dataset.py @@ -0,0 +1,349 @@ +from google.cloud.bigquery.dataset import Dataset, DatasetReference +from .helpers import make_connection, dataset_polymorphic, make_client +import google.cloud.bigquery.dataset +import mock +import pytest + + +@dataset_polymorphic +def test_create_dataset_minimal(make_dataset, get_reference, client, PROJECT, DS_ID): + PATH = "projects/%s/datasets" % PROJECT + RESOURCE = { + "datasetReference": {"projectId": PROJECT, "datasetId": DS_ID}, + "etag": "etag", + "id": "%s:%s" % (PROJECT, DS_ID), + } + conn = client._connection = make_connection(RESOURCE) + + dataset = make_dataset(PROJECT, DS_ID) + after = client.create_dataset(dataset, timeout=7.5) + + assert after.dataset_id == DS_ID + assert after.project == PROJECT + assert after.etag == RESOURCE["etag"] + assert after.full_dataset_id == RESOURCE["id"] + + conn.api_request.assert_called_once_with( + method="POST", + path="/%s" % PATH, + data={ + "datasetReference": {"projectId": PROJECT, "datasetId": DS_ID}, + "labels": {}, + }, + timeout=7.5, + ) + + +def test_create_dataset_w_attrs(client, PROJECT, DS_ID): + from google.cloud.bigquery.dataset import AccessEntry + + PATH = "projects/%s/datasets" % PROJECT + DESCRIPTION = "DESC" + FRIENDLY_NAME = "FN" + LOCATION = "US" + USER_EMAIL = "phred@example.com" + LABELS = {"color": "red"} + VIEW = { + "projectId": "my-proj", + "datasetId": "starry-skies", + "tableId": "northern-hemisphere", + } + RESOURCE = { + "datasetReference": {"projectId": PROJECT, "datasetId": DS_ID}, + "etag": "etag", + "id": "%s:%s" % (PROJECT, DS_ID), + "description": DESCRIPTION, + "friendlyName": FRIENDLY_NAME, + "location": LOCATION, + "defaultTableExpirationMs": "3600", + "labels": LABELS, + "access": [{"role": "OWNER", "userByEmail": USER_EMAIL}, {"view": VIEW}], + } + conn = client._connection = make_connection(RESOURCE) + entries = [ + AccessEntry("OWNER", "userByEmail", USER_EMAIL), + AccessEntry(None, "view", VIEW), + ] + + ds_ref = DatasetReference(PROJECT, DS_ID) + before = Dataset(ds_ref) + before.access_entries = entries + before.description = DESCRIPTION + before.friendly_name = FRIENDLY_NAME + before.default_table_expiration_ms = 3600 + before.location = LOCATION + before.labels = LABELS + after = client.create_dataset(before) + + assert after.dataset_id == DS_ID + assert after.project == PROJECT + assert after.etag == RESOURCE["etag"] + assert after.full_dataset_id == RESOURCE["id"] + assert after.description == DESCRIPTION + assert after.friendly_name == FRIENDLY_NAME + assert after.location == LOCATION + assert after.default_table_expiration_ms == 3600 + assert after.labels == LABELS + + conn.api_request.assert_called_once_with( + method="POST", + path="/%s" % PATH, + data={ + "datasetReference": {"projectId": PROJECT, "datasetId": DS_ID}, + "description": DESCRIPTION, + "friendlyName": FRIENDLY_NAME, + "location": LOCATION, + "defaultTableExpirationMs": "3600", + "access": [{"role": "OWNER", "userByEmail": USER_EMAIL}, {"view": VIEW}], + "labels": LABELS, + }, + timeout=None, + ) + + +def test_create_dataset_w_custom_property(client, PROJECT, DS_ID): + # The library should handle sending properties to the API that are not + # yet part of the library + + path = "/projects/%s/datasets" % PROJECT + resource = { + "datasetReference": {"projectId": PROJECT, "datasetId": DS_ID}, + "newAlphaProperty": "unreleased property", + } + conn = client._connection = make_connection(resource) + + ds_ref = DatasetReference(PROJECT, DS_ID) + before = Dataset(ds_ref) + before._properties["newAlphaProperty"] = "unreleased property" + after = client.create_dataset(before) + + assert after.dataset_id == DS_ID + assert after.project == PROJECT + assert after._properties["newAlphaProperty"] == "unreleased property" + + conn.api_request.assert_called_once_with( + method="POST", + path=path, + data={ + "datasetReference": {"projectId": PROJECT, "datasetId": DS_ID}, + "newAlphaProperty": "unreleased property", + "labels": {}, + }, + timeout=None, + ) + + +def test_create_dataset_w_client_location_wo_dataset_location(PROJECT, DS_ID, LOCATION): + PATH = "projects/%s/datasets" % PROJECT + RESOURCE = { + "datasetReference": {"projectId": PROJECT, "datasetId": DS_ID}, + "etag": "etag", + "id": "%s:%s" % (PROJECT, DS_ID), + "location": LOCATION, + } + client = make_client(location=LOCATION) + conn = client._connection = make_connection(RESOURCE) + + ds_ref = DatasetReference(PROJECT, DS_ID) + before = Dataset(ds_ref) + after = client.create_dataset(before) + + assert after.dataset_id == DS_ID + assert after.project == PROJECT + assert after.etag == RESOURCE["etag"] + assert after.full_dataset_id == RESOURCE["id"] + assert after.location == LOCATION + + conn.api_request.assert_called_once_with( + method="POST", + path="/%s" % PATH, + data={ + "datasetReference": {"projectId": PROJECT, "datasetId": DS_ID}, + "labels": {}, + "location": LOCATION, + }, + timeout=None, + ) + + +def test_create_dataset_w_client_location_w_dataset_location(PROJECT, DS_ID, LOCATION): + PATH = "projects/%s/datasets" % PROJECT + OTHER_LOCATION = "EU" + RESOURCE = { + "datasetReference": {"projectId": PROJECT, "datasetId": DS_ID}, + "etag": "etag", + "id": "%s:%s" % (PROJECT, DS_ID), + "location": OTHER_LOCATION, + } + client = make_client(location=LOCATION) + conn = client._connection = make_connection(RESOURCE) + + ds_ref = DatasetReference(PROJECT, DS_ID) + before = Dataset(ds_ref) + before.location = OTHER_LOCATION + after = client.create_dataset(before) + + assert after.dataset_id == DS_ID + assert after.project == PROJECT + assert after.etag == RESOURCE["etag"] + assert after.full_dataset_id == RESOURCE["id"] + assert after.location == OTHER_LOCATION + + conn.api_request.assert_called_once_with( + method="POST", + path="/%s" % PATH, + data={ + "datasetReference": {"projectId": PROJECT, "datasetId": DS_ID}, + "labels": {}, + "location": OTHER_LOCATION, + }, + timeout=None, + ) + + +def test_create_dataset_w_reference(PROJECT, DS_ID, LOCATION): + path = "/projects/%s/datasets" % PROJECT + resource = { + "datasetReference": {"projectId": PROJECT, "datasetId": DS_ID}, + "etag": "etag", + "id": "%s:%s" % (PROJECT, DS_ID), + "location": LOCATION, + } + client = make_client(location=LOCATION) + conn = client._connection = make_connection(resource) + dataset = client.create_dataset(DatasetReference(PROJECT, DS_ID)) + + assert dataset.dataset_id == DS_ID + assert dataset.project == PROJECT + assert dataset.etag == resource["etag"] + assert dataset.full_dataset_id == resource["id"] + assert dataset.location == LOCATION + + conn.api_request.assert_called_once_with( + method="POST", + path=path, + data={ + "datasetReference": {"projectId": PROJECT, "datasetId": DS_ID}, + "labels": {}, + "location": LOCATION, + }, + timeout=None, + ) + + +def test_create_dataset_w_fully_qualified_string(PROJECT, DS_ID, LOCATION): + path = "/projects/%s/datasets" % PROJECT + resource = { + "datasetReference": {"projectId": PROJECT, "datasetId": DS_ID}, + "etag": "etag", + "id": "%s:%s" % (PROJECT, DS_ID), + "location": LOCATION, + } + client = make_client(location=LOCATION) + conn = client._connection = make_connection(resource) + dataset = client.create_dataset("{}.{}".format(PROJECT, DS_ID)) + + assert dataset.dataset_id == DS_ID + assert dataset.project == PROJECT + assert dataset.etag == resource["etag"] + assert dataset.full_dataset_id == resource["id"] + assert dataset.location == LOCATION + + conn.api_request.assert_called_once_with( + method="POST", + path=path, + data={ + "datasetReference": {"projectId": PROJECT, "datasetId": DS_ID}, + "labels": {}, + "location": LOCATION, + }, + timeout=None, + ) + + +def test_create_dataset_w_string(PROJECT, DS_ID, LOCATION): + path = "/projects/%s/datasets" % PROJECT + resource = { + "datasetReference": {"projectId": PROJECT, "datasetId": DS_ID}, + "etag": "etag", + "id": "%s:%s" % (PROJECT, DS_ID), + "location": LOCATION, + } + client = make_client(location=LOCATION) + conn = client._connection = make_connection(resource) + with mock.patch( + "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" + ) as final_attributes: + dataset = client.create_dataset(DS_ID) + + final_attributes.assert_called_once_with({"path": path}, client, None) + + assert dataset.dataset_id == DS_ID + assert dataset.project == PROJECT + assert dataset.etag == resource["etag"] + assert dataset.full_dataset_id == resource["id"] + assert dataset.location == LOCATION + + conn.api_request.assert_called_once_with( + method="POST", + path=path, + data={ + "datasetReference": {"projectId": PROJECT, "datasetId": DS_ID}, + "labels": {}, + "location": LOCATION, + }, + timeout=None, + ) + + +def test_create_dataset_alreadyexists_w_exists_ok_false(PROJECT, DS_ID, LOCATION): + client = make_client(location=LOCATION) + client._connection = make_connection( + google.api_core.exceptions.AlreadyExists("dataset already exists") + ) + + with pytest.raises(google.api_core.exceptions.AlreadyExists): + client.create_dataset(DS_ID) + + +def test_create_dataset_alreadyexists_w_exists_ok_true(PROJECT, DS_ID, LOCATION): + post_path = "/projects/{}/datasets".format(PROJECT) + get_path = "/projects/{}/datasets/{}".format(PROJECT, DS_ID) + resource = { + "datasetReference": {"projectId": PROJECT, "datasetId": DS_ID}, + "etag": "etag", + "id": "{}:{}".format(PROJECT, DS_ID), + "location": LOCATION, + } + client = make_client(location=LOCATION) + conn = client._connection = make_connection( + google.api_core.exceptions.AlreadyExists("dataset already exists"), resource + ) + with mock.patch( + "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" + ) as final_attributes: + dataset = client.create_dataset(DS_ID, exists_ok=True) + + final_attributes.assert_called_with({"path": get_path}, client, None) + + assert dataset.dataset_id == DS_ID + assert dataset.project == PROJECT + assert dataset.etag == resource["etag"] + assert dataset.full_dataset_id == resource["id"] + assert dataset.location == LOCATION + + conn.api_request.assert_has_calls( + [ + mock.call( + method="POST", + path=post_path, + data={ + "datasetReference": {"projectId": PROJECT, "datasetId": DS_ID}, + "labels": {}, + "location": LOCATION, + }, + timeout=None, + ), + mock.call(method="GET", path=get_path, timeout=None), + ] + ) diff --git a/packages/google-cloud-bigquery/tests/unit/test_delete_dataset.py b/packages/google-cloud-bigquery/tests/unit/test_delete_dataset.py new file mode 100644 index 000000000000..c57b517e0e82 --- /dev/null +++ b/packages/google-cloud-bigquery/tests/unit/test_delete_dataset.py @@ -0,0 +1,64 @@ +from .helpers import make_connection, make_client, dataset_polymorphic +import google.api_core.exceptions +import pytest + + +@dataset_polymorphic +def test_delete_dataset(make_dataset, get_reference, client, PROJECT, DS_ID): + dataset = make_dataset(PROJECT, DS_ID) + PATH = "projects/%s/datasets/%s" % (PROJECT, DS_ID) + conn = client._connection = make_connection({}) + client.delete_dataset(dataset, timeout=7.5) + conn.api_request.assert_called_with( + method="DELETE", path="/%s" % PATH, query_params={}, timeout=7.5 + ) + + +@dataset_polymorphic +def test_delete_dataset_delete_contents( + make_dataset, get_reference, client, PROJECT, DS_ID +): + PATH = "projects/%s/datasets/%s" % (PROJECT, DS_ID) + conn = client._connection = make_connection({}) + dataset = make_dataset(PROJECT, DS_ID) + client.delete_dataset(dataset, delete_contents=True) + conn.api_request.assert_called_with( + method="DELETE", + path="/%s" % PATH, + query_params={"deleteContents": "true"}, + timeout=None, + ) + + +def test_delete_dataset_wrong_type(client): + with pytest.raises(TypeError): + client.delete_dataset(42) + + +def test_delete_dataset_w_not_found_ok_false(PROJECT, DS_ID): + path = "/projects/{}/datasets/{}".format(PROJECT, DS_ID) + http = object() + client = make_client(_http=http) + conn = client._connection = make_connection( + google.api_core.exceptions.NotFound("dataset not found") + ) + + with pytest.raises(google.api_core.exceptions.NotFound): + client.delete_dataset(DS_ID) + + conn.api_request.assert_called_with( + method="DELETE", path=path, query_params={}, timeout=None + ) + + +def test_delete_dataset_w_not_found_ok_true(PROJECT, DS_ID): + path = "/projects/{}/datasets/{}".format(PROJECT, DS_ID) + http = object() + client = make_client(_http=http) + conn = client._connection = make_connection( + google.api_core.exceptions.NotFound("dataset not found") + ) + client.delete_dataset(DS_ID, not_found_ok=True) + conn.api_request.assert_called_with( + method="DELETE", path=path, query_params={}, timeout=None + ) diff --git a/packages/google-cloud-bigquery/tests/unit/test_list_models.py b/packages/google-cloud-bigquery/tests/unit/test_list_models.py new file mode 100644 index 000000000000..534a4b54c2eb --- /dev/null +++ b/packages/google-cloud-bigquery/tests/unit/test_list_models.py @@ -0,0 +1,72 @@ +from .helpers import make_connection, dataset_polymorphic +import pytest + + +def test_list_models_empty_w_timeout(client, PROJECT, DS_ID): + path = "/projects/{}/datasets/{}/models".format(PROJECT, DS_ID) + conn = client._connection = make_connection({}) + + dataset_id = "{}.{}".format(PROJECT, DS_ID) + iterator = client.list_models(dataset_id, timeout=7.5) + page = next(iterator.pages) + models = list(page) + token = iterator.next_page_token + + assert models == [] + assert token is None + conn.api_request.assert_called_once_with( + method="GET", path=path, query_params={}, timeout=7.5 + ) + + +@dataset_polymorphic +def test_list_models_defaults(make_dataset, get_reference, client, PROJECT, DS_ID): + from google.cloud.bigquery.model import Model + + MODEL_1 = "model_one" + MODEL_2 = "model_two" + PATH = "projects/%s/datasets/%s/models" % (PROJECT, DS_ID) + TOKEN = "TOKEN" + DATA = { + "nextPageToken": TOKEN, + "models": [ + { + "modelReference": { + "modelId": MODEL_1, + "datasetId": DS_ID, + "projectId": PROJECT, + } + }, + { + "modelReference": { + "modelId": MODEL_2, + "datasetId": DS_ID, + "projectId": PROJECT, + } + }, + ], + } + + conn = client._connection = make_connection(DATA) + dataset = make_dataset(PROJECT, DS_ID) + + iterator = client.list_models(dataset) + assert iterator.dataset == get_reference(dataset) + page = next(iterator.pages) + models = list(page) + token = iterator.next_page_token + + assert len(models) == len(DATA["models"]) + for found, expected in zip(models, DATA["models"]): + assert isinstance(found, Model) + assert found.model_id == expected["modelReference"]["modelId"] + assert token == TOKEN + + conn.api_request.assert_called_once_with( + method="GET", path="/%s" % PATH, query_params={}, timeout=None + ) + + +def test_list_models_wrong_type(client): + with pytest.raises(TypeError): + client.list_models(42) diff --git a/packages/google-cloud-bigquery/tests/unit/test_list_routines.py b/packages/google-cloud-bigquery/tests/unit/test_list_routines.py new file mode 100644 index 000000000000..82719fce6ec7 --- /dev/null +++ b/packages/google-cloud-bigquery/tests/unit/test_list_routines.py @@ -0,0 +1,75 @@ +from .helpers import make_connection, dataset_polymorphic +import pytest + + +def test_list_routines_empty_w_timeout(client): + conn = client._connection = make_connection({}) + + iterator = client.list_routines("test-routines.test_routines", timeout=7.5) + page = next(iterator.pages) + routines = list(page) + token = iterator.next_page_token + + assert routines == [] + assert token is None + conn.api_request.assert_called_once_with( + method="GET", + path="/projects/test-routines/datasets/test_routines/routines", + query_params={}, + timeout=7.5, + ) + + +@dataset_polymorphic +def test_list_routines_defaults(make_dataset, get_reference, client, PROJECT): + from google.cloud.bigquery.routine import Routine + + project_id = PROJECT + dataset_id = "test_routines" + path = f"/projects/{PROJECT}/datasets/test_routines/routines" + routine_1 = "routine_one" + routine_2 = "routine_two" + token = "TOKEN" + resource = { + "nextPageToken": token, + "routines": [ + { + "routineReference": { + "routineId": routine_1, + "datasetId": dataset_id, + "projectId": project_id, + } + }, + { + "routineReference": { + "routineId": routine_2, + "datasetId": dataset_id, + "projectId": project_id, + } + }, + ], + } + + conn = client._connection = make_connection(resource) + dataset = make_dataset(client.project, dataset_id) + + iterator = client.list_routines(dataset) + assert iterator.dataset == get_reference(dataset) + page = next(iterator.pages) + routines = list(page) + actual_token = iterator.next_page_token + + assert len(routines) == len(resource["routines"]) + for found, expected in zip(routines, resource["routines"]): + assert isinstance(found, Routine) + assert found.routine_id == expected["routineReference"]["routineId"] + assert actual_token == token + + conn.api_request.assert_called_once_with( + method="GET", path=path, query_params={}, timeout=None + ) + + +def test_list_routines_wrong_type(client): + with pytest.raises(TypeError): + client.list_routines(42) diff --git a/packages/google-cloud-bigquery/tests/unit/test_list_tables.py b/packages/google-cloud-bigquery/tests/unit/test_list_tables.py new file mode 100644 index 000000000000..fdd3aa85797e --- /dev/null +++ b/packages/google-cloud-bigquery/tests/unit/test_list_tables.py @@ -0,0 +1,145 @@ +from .helpers import make_connection, dataset_polymorphic +import google.cloud.bigquery.dataset +import pytest + + +@dataset_polymorphic +def test_list_tables_empty_w_timeout( + make_dataset, get_reference, client, PROJECT, DS_ID +): + path = "/projects/{}/datasets/{}/tables".format(PROJECT, DS_ID) + conn = client._connection = make_connection({}) + + dataset = make_dataset(PROJECT, DS_ID) + iterator = client.list_tables(dataset, timeout=7.5) + assert iterator.dataset == get_reference(dataset) + page = next(iterator.pages) + tables = list(page) + token = iterator.next_page_token + + assert tables == [] + assert token is None + conn.api_request.assert_called_once_with( + method="GET", path=path, query_params={}, timeout=7.5 + ) + + +@dataset_polymorphic +def test_list_tables_defaults(make_dataset, get_reference, client, PROJECT, DS_ID): + from google.cloud.bigquery.table import TableListItem + + TABLE_1 = "table_one" + TABLE_2 = "table_two" + PATH = "projects/%s/datasets/%s/tables" % (PROJECT, DS_ID) + TOKEN = "TOKEN" + DATA = { + "nextPageToken": TOKEN, + "tables": [ + { + "kind": "bigquery#table", + "id": "%s:%s.%s" % (PROJECT, DS_ID, TABLE_1), + "tableReference": { + "tableId": TABLE_1, + "datasetId": DS_ID, + "projectId": PROJECT, + }, + "type": "TABLE", + }, + { + "kind": "bigquery#table", + "id": "%s:%s.%s" % (PROJECT, DS_ID, TABLE_2), + "tableReference": { + "tableId": TABLE_2, + "datasetId": DS_ID, + "projectId": PROJECT, + }, + "type": "TABLE", + }, + ], + } + + conn = client._connection = make_connection(DATA) + dataset = make_dataset(PROJECT, DS_ID) + + iterator = client.list_tables(dataset) + assert iterator.dataset == get_reference(dataset) + page = next(iterator.pages) + tables = list(page) + token = iterator.next_page_token + + assert len(tables) == len(DATA["tables"]) + for found, expected in zip(tables, DATA["tables"]): + assert isinstance(found, TableListItem) + assert found.full_table_id == expected["id"] + assert found.table_type == expected["type"] + assert token == TOKEN + + conn.api_request.assert_called_once_with( + method="GET", path="/%s" % PATH, query_params={}, timeout=None + ) + + +def test_list_tables_explicit(client, PROJECT, DS_ID): + from google.cloud.bigquery.table import TableListItem + + TABLE_1 = "table_one" + TABLE_2 = "table_two" + PATH = "projects/%s/datasets/%s/tables" % (PROJECT, DS_ID) + TOKEN = "TOKEN" + DATA = { + "tables": [ + { + "kind": "bigquery#dataset", + "id": "%s:%s.%s" % (PROJECT, DS_ID, TABLE_1), + "tableReference": { + "tableId": TABLE_1, + "datasetId": DS_ID, + "projectId": PROJECT, + }, + "type": "TABLE", + }, + { + "kind": "bigquery#dataset", + "id": "%s:%s.%s" % (PROJECT, DS_ID, TABLE_2), + "tableReference": { + "tableId": TABLE_2, + "datasetId": DS_ID, + "projectId": PROJECT, + }, + "type": "TABLE", + }, + ] + } + + conn = client._connection = make_connection(DATA) + dataset = google.cloud.bigquery.dataset.DatasetReference(PROJECT, DS_ID) + + iterator = client.list_tables( + # Test with string for dataset ID. + DS_ID, + max_results=3, + page_token=TOKEN, + ) + assert iterator.dataset == dataset + page = next(iterator.pages) + tables = list(page) + token = iterator.next_page_token + + assert len(tables) == len(DATA["tables"]) + for found, expected in zip(tables, DATA["tables"]): + assert isinstance(found, TableListItem) + assert found.full_table_id == expected["id"] + assert found.table_type == expected["type"] + assert token is None + + conn.api_request.assert_called_once_with( + method="GET", + path="/%s" % PATH, + query_params={"maxResults": 3, "pageToken": TOKEN}, + timeout=None, + ) + + +def test_list_tables_wrong_type(client): + with pytest.raises(TypeError): + client.list_tables(42) From 2d533cf6661a91365082afd93a9d9818ec2c2b1d Mon Sep 17 00:00:00 2001 From: Peter Lamut Date: Mon, 12 Apr 2021 19:00:25 +0200 Subject: [PATCH 1109/2016] feat: use pyarrow stream compression, if available (#593) * feat: use pyarrow stream compression, if available * Remove unnecessary pyarrow version check Arrow stream compression requires pyarrow>=1.0.0, but that's already guaranteed by a version pin in setup.py if bqstorage extra is installed. * Remvoe unused pyarrow version parsing in tests * Only use arrow compression in tests if available --- .../google/cloud/bigquery/_pandas_helpers.py | 13 ++++ .../google/cloud/bigquery/dbapi/cursor.py | 14 ++++ .../tests/system/test_client.py | 8 -- .../tests/unit/job/test_query_pandas.py | 78 +++++++++++++++++-- .../tests/unit/test_dbapi_cursor.py | 47 +++++++++++ 5 files changed, 146 insertions(+), 14 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/_pandas_helpers.py b/packages/google-cloud-bigquery/google/cloud/bigquery/_pandas_helpers.py index 7ad416e084ed..412f32754276 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/_pandas_helpers.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/_pandas_helpers.py @@ -33,6 +33,14 @@ except ImportError: # pragma: NO COVER pyarrow = None +try: + from google.cloud.bigquery_storage import ArrowSerializationOptions +except ImportError: + _ARROW_COMPRESSION_SUPPORT = False +else: + # Having BQ Storage available implies that pyarrow >=1.0.0 is available, too. + _ARROW_COMPRESSION_SUPPORT = True + from google.cloud.bigquery import schema @@ -631,6 +639,11 @@ def _download_table_bqstorage( for field in selected_fields: requested_session.read_options.selected_fields.append(field.name) + if _ARROW_COMPRESSION_SUPPORT: + requested_session.read_options.arrow_serialization_options.buffer_compression = ( + ArrowSerializationOptions.CompressionCodec.LZ4_FRAME + ) + session = bqstorage_client.create_read_session( parent="projects/{}".format(project_id), read_session=requested_session, diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/dbapi/cursor.py b/packages/google-cloud-bigquery/google/cloud/bigquery/dbapi/cursor.py index e90bcc2c0d6a..ee09158d8842 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/dbapi/cursor.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/dbapi/cursor.py @@ -19,6 +19,14 @@ import copy import logging +try: + from google.cloud.bigquery_storage import ArrowSerializationOptions +except ImportError: + _ARROW_COMPRESSION_SUPPORT = False +else: + # Having BQ Storage available implies that pyarrow >=1.0.0 is available, too. + _ARROW_COMPRESSION_SUPPORT = True + from google.cloud.bigquery import job from google.cloud.bigquery.dbapi import _helpers from google.cloud.bigquery.dbapi import exceptions @@ -255,6 +263,12 @@ def _bqstorage_fetch(self, bqstorage_client): table=table_reference.to_bqstorage(), data_format=bigquery_storage.types.DataFormat.ARROW, ) + + if _ARROW_COMPRESSION_SUPPORT: + requested_session.read_options.arrow_serialization_options.buffer_compression = ( + ArrowSerializationOptions.CompressionCodec.LZ4_FRAME + ) + read_session = bqstorage_client.create_read_session( parent="projects/{}".format(table_reference.project), read_session=requested_session, diff --git a/packages/google-cloud-bigquery/tests/system/test_client.py b/packages/google-cloud-bigquery/tests/system/test_client.py index 133f609a62f3..02444101213d 100644 --- a/packages/google-cloud-bigquery/tests/system/test_client.py +++ b/packages/google-cloud-bigquery/tests/system/test_client.py @@ -28,7 +28,6 @@ import psutil import pytest -import pkg_resources from google.cloud.bigquery._pandas_helpers import _BIGNUMERIC_SUPPORT from . import helpers @@ -116,13 +115,6 @@ (TooManyRequests, InternalServerError, ServiceUnavailable) ) -PYARROW_MINIMUM_VERSION = pkg_resources.parse_version("0.17.0") - -if pyarrow: - PYARROW_INSTALLED_VERSION = pkg_resources.get_distribution("pyarrow").parsed_version -else: - PYARROW_INSTALLED_VERSION = None - MTLS_TESTING = os.getenv("GOOGLE_API_USE_CLIENT_CERTIFICATE") == "true" diff --git a/packages/google-cloud-bigquery/tests/unit/job/test_query_pandas.py b/packages/google-cloud-bigquery/tests/unit/job/test_query_pandas.py index d1600ad4376d..0f96232036d0 100644 --- a/packages/google-cloud-bigquery/tests/unit/job/test_query_pandas.py +++ b/packages/google-cloud-bigquery/tests/unit/job/test_query_pandas.py @@ -41,6 +41,22 @@ from .helpers import _make_job_resource +@pytest.fixture +def table_read_options_kwarg(): + # Create a BigQuery Storage table read options object with pyarrow compression + # enabled if a recent-enough version of google-cloud-bigquery-storage dependency is + # installed to support the compression. + if not hasattr(bigquery_storage, "ArrowSerializationOptions"): + return {} + + read_options = bigquery_storage.ReadSession.TableReadOptions( + arrow_serialization_options=bigquery_storage.ArrowSerializationOptions( + buffer_compression=bigquery_storage.ArrowSerializationOptions.CompressionCodec.LZ4_FRAME + ) + ) + return {"read_options": read_options} + + @pytest.mark.parametrize( "query,expected", ( @@ -82,7 +98,7 @@ def test__contains_order_by(query, expected): "SelecT name, age froM table OrdeR \n\t BY other_column;", ), ) -def test_to_dataframe_bqstorage_preserve_order(query): +def test_to_dataframe_bqstorage_preserve_order(query, table_read_options_kwarg): from google.cloud.bigquery.job import QueryJob as target_class job_resource = _make_job_resource( @@ -123,8 +139,10 @@ def test_to_dataframe_bqstorage_preserve_order(query): destination_table = "projects/{projectId}/datasets/{datasetId}/tables/{tableId}".format( **job_resource["configuration"]["query"]["destinationTable"] ) - expected_session = bigquery_storage.types.ReadSession( - table=destination_table, data_format=bigquery_storage.types.DataFormat.ARROW, + expected_session = bigquery_storage.ReadSession( + table=destination_table, + data_format=bigquery_storage.DataFormat.ARROW, + **table_read_options_kwarg, ) bqstorage_client.create_read_session.assert_called_once_with( parent="projects/test-project", @@ -431,7 +449,7 @@ def test_to_dataframe_ddl_query(): @pytest.mark.skipif( bigquery_storage is None, reason="Requires `google-cloud-bigquery-storage`" ) -def test_to_dataframe_bqstorage(): +def test_to_dataframe_bqstorage(table_read_options_kwarg): from google.cloud.bigquery.job import QueryJob as target_class resource = _make_job_resource(job_type="query", ended=True) @@ -468,8 +486,10 @@ def test_to_dataframe_bqstorage(): destination_table = "projects/{projectId}/datasets/{datasetId}/tables/{tableId}".format( **resource["configuration"]["query"]["destinationTable"] ) - expected_session = bigquery_storage.types.ReadSession( - table=destination_table, data_format=bigquery_storage.types.DataFormat.ARROW, + expected_session = bigquery_storage.ReadSession( + table=destination_table, + data_format=bigquery_storage.DataFormat.ARROW, + **table_read_options_kwarg, ) bqstorage_client.create_read_session.assert_called_once_with( parent=f"projects/{client.project}", @@ -478,6 +498,52 @@ def test_to_dataframe_bqstorage(): ) +@pytest.mark.skipif(pandas is None, reason="Requires `pandas`") +@pytest.mark.skipif( + bigquery_storage is None, reason="Requires `google-cloud-bigquery-storage`" +) +def test_to_dataframe_bqstorage_no_pyarrow_compression(): + from google.cloud.bigquery.job import QueryJob as target_class + + resource = _make_job_resource(job_type="query", ended=True) + query_resource = { + "jobComplete": True, + "jobReference": resource["jobReference"], + "totalRows": "4", + "schema": {"fields": [{"name": "name", "type": "STRING", "mode": "NULLABLE"}]}, + } + connection = _make_connection(query_resource) + client = _make_client(connection=connection) + job = target_class.from_api_repr(resource, client) + bqstorage_client = mock.create_autospec(bigquery_storage.BigQueryReadClient) + session = bigquery_storage.types.ReadSession() + session.avro_schema.schema = json.dumps( + { + "type": "record", + "name": "__root__", + "fields": [{"name": "name", "type": ["null", "string"]}], + } + ) + bqstorage_client.create_read_session.return_value = session + + with mock.patch( + "google.cloud.bigquery._pandas_helpers._ARROW_COMPRESSION_SUPPORT", new=False + ): + job.to_dataframe(bqstorage_client=bqstorage_client) + + destination_table = "projects/{projectId}/datasets/{datasetId}/tables/{tableId}".format( + **resource["configuration"]["query"]["destinationTable"] + ) + expected_session = bigquery_storage.ReadSession( + table=destination_table, data_format=bigquery_storage.DataFormat.ARROW, + ) + bqstorage_client.create_read_session.assert_called_once_with( + parent=f"projects/{client.project}", + read_session=expected_session, + max_stream_count=0, + ) + + @pytest.mark.skipif(pandas is None, reason="Requires `pandas`") def test_to_dataframe_column_dtypes(): from google.cloud.bigquery.job import QueryJob as target_class diff --git a/packages/google-cloud-bigquery/tests/unit/test_dbapi_cursor.py b/packages/google-cloud-bigquery/tests/unit/test_dbapi_cursor.py index cbd6f69098de..0f44e3895f9e 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_dbapi_cursor.py +++ b/packages/google-cloud-bigquery/tests/unit/test_dbapi_cursor.py @@ -123,6 +123,7 @@ def _mock_job( schema=schema, num_dml_affected_rows=num_dml_affected_rows, ) + mock_job.destination.project = "P" mock_job.destination.to_bqstorage.return_value = ( "projects/P/datasets/DS/tables/T" ) @@ -380,6 +381,52 @@ def test_fetchall_w_bqstorage_client_fetch_error_no_fallback(self): # the default client was not used mock_client.list_rows.assert_not_called() + @unittest.skipIf( + bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" + ) + @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") + def test_fetchall_w_bqstorage_client_no_arrow_compression(self): + from google.cloud.bigquery import dbapi + from google.cloud.bigquery import table + + # Use unordered data to also test any non-determenistic key order in dicts. + row_data = [table.Row([1.2, 1.1], {"bar": 1, "foo": 0})] + bqstorage_streamed_rows = [{"bar": _to_pyarrow(1.2), "foo": _to_pyarrow(1.1)}] + + mock_client = self._mock_client(rows=row_data) + mock_bqstorage_client = self._mock_bqstorage_client( + stream_count=1, rows=bqstorage_streamed_rows, + ) + + connection = dbapi.connect( + client=mock_client, bqstorage_client=mock_bqstorage_client, + ) + cursor = connection.cursor() + cursor.execute("SELECT foo, bar FROM some_table") + + with mock.patch( + "google.cloud.bigquery.dbapi.cursor._ARROW_COMPRESSION_SUPPORT", new=False + ): + rows = cursor.fetchall() + + mock_client.list_rows.assert_not_called() # The default client was not used. + + # Check the BQ Storage session config. + expected_session = bigquery_storage.ReadSession( + table="projects/P/datasets/DS/tables/T", + data_format=bigquery_storage.DataFormat.ARROW, + ) + mock_bqstorage_client.create_read_session.assert_called_once_with( + parent="projects/P", read_session=expected_session, max_stream_count=1 + ) + + # Check the data returned. + field_value = op.itemgetter(1) + sorted_row_data = [sorted(row.items(), key=field_value) for row in rows] + expected_row_data = [[("foo", 1.1), ("bar", 1.2)]] + + self.assertEqual(sorted_row_data, expected_row_data) + def test_execute_custom_job_id(self): from google.cloud.bigquery.dbapi import connect From fda18a0872e2914f408d3900c2834e16a298cf4d Mon Sep 17 00:00:00 2001 From: Yoshi Automation Bot Date: Tue, 13 Apr 2021 08:06:04 -0700 Subject: [PATCH 1110/2016] chore: add constraints file check for python samples (#601) This PR was generated using Autosynth. :rainbow: Synth log will be available here: https://source.cloud.google.com/results/invocations/b7a528df-1b0b-42e0-a583-e53b45ee05fc/targets - [ ] To automatically regenerate this PR, check this box. (May take up to 24 hours.) Source-Link: https://github.com/googleapis/synthtool/commit/0a071b3460344886297a304253bf924aa68ddb7e --- .../.github/header-checker-lint.yml | 2 +- packages/google-cloud-bigquery/renovate.json | 5 ++++- .../google-cloud-bigquery/samples/geography/noxfile.py | 10 ++++++++-- .../google-cloud-bigquery/samples/snippets/noxfile.py | 10 ++++++++-- packages/google-cloud-bigquery/synth.metadata | 6 +++--- 5 files changed, 24 insertions(+), 9 deletions(-) diff --git a/packages/google-cloud-bigquery/.github/header-checker-lint.yml b/packages/google-cloud-bigquery/.github/header-checker-lint.yml index fc281c05bd55..6fe78aa7987a 100644 --- a/packages/google-cloud-bigquery/.github/header-checker-lint.yml +++ b/packages/google-cloud-bigquery/.github/header-checker-lint.yml @@ -1,6 +1,6 @@ {"allowedCopyrightHolders": ["Google LLC"], "allowedLicenses": ["Apache-2.0", "MIT", "BSD-3"], - "ignoreFiles": ["**/requirements.txt", "**/requirements-test.txt"], + "ignoreFiles": ["**/requirements.txt", "**/requirements-test.txt", "**/__init__.py", "samples/**/constraints.txt", "samples/**/constraints-test.txt"], "sourceFileExtensions": [ "ts", "js", diff --git a/packages/google-cloud-bigquery/renovate.json b/packages/google-cloud-bigquery/renovate.json index f08bc22c9a55..c04895563e69 100644 --- a/packages/google-cloud-bigquery/renovate.json +++ b/packages/google-cloud-bigquery/renovate.json @@ -2,5 +2,8 @@ "extends": [ "config:base", ":preserveSemverRanges" ], - "ignorePaths": [".pre-commit-config.yaml"] + "ignorePaths": [".pre-commit-config.yaml"], + "pip_requirements": { + "fileMatch": ["requirements-test.txt", "samples/[\\S/]*constraints.txt", "samples/[\\S/]*constraints-test.txt"] + } } diff --git a/packages/google-cloud-bigquery/samples/geography/noxfile.py b/packages/google-cloud-bigquery/samples/geography/noxfile.py index f2320ea0001c..be1a3f251496 100644 --- a/packages/google-cloud-bigquery/samples/geography/noxfile.py +++ b/packages/google-cloud-bigquery/samples/geography/noxfile.py @@ -174,10 +174,16 @@ def _session_tests( ) -> None: """Runs py.test for a particular project.""" if os.path.exists("requirements.txt"): - session.install("-r", "requirements.txt") + if os.path.exists("constraints.txt"): + session.install("-r", "requirements.txt", "-c", "constraints.txt") + else: + session.install("-r", "requirements.txt") if os.path.exists("requirements-test.txt"): - session.install("-r", "requirements-test.txt") + if os.path.exists("constraints-test.txt"): + session.install("-r", "requirements-test.txt", "-c", "constraints-test.txt") + else: + session.install("-r", "requirements-test.txt") if INSTALL_LIBRARY_FROM_SOURCE: session.install("-e", _get_repo_root()) diff --git a/packages/google-cloud-bigquery/samples/snippets/noxfile.py b/packages/google-cloud-bigquery/samples/snippets/noxfile.py index f2320ea0001c..be1a3f251496 100644 --- a/packages/google-cloud-bigquery/samples/snippets/noxfile.py +++ b/packages/google-cloud-bigquery/samples/snippets/noxfile.py @@ -174,10 +174,16 @@ def _session_tests( ) -> None: """Runs py.test for a particular project.""" if os.path.exists("requirements.txt"): - session.install("-r", "requirements.txt") + if os.path.exists("constraints.txt"): + session.install("-r", "requirements.txt", "-c", "constraints.txt") + else: + session.install("-r", "requirements.txt") if os.path.exists("requirements-test.txt"): - session.install("-r", "requirements-test.txt") + if os.path.exists("constraints-test.txt"): + session.install("-r", "requirements-test.txt", "-c", "constraints-test.txt") + else: + session.install("-r", "requirements-test.txt") if INSTALL_LIBRARY_FROM_SOURCE: session.install("-e", _get_repo_root()) diff --git a/packages/google-cloud-bigquery/synth.metadata b/packages/google-cloud-bigquery/synth.metadata index 114359b88156..7221c0f0fbce 100644 --- a/packages/google-cloud-bigquery/synth.metadata +++ b/packages/google-cloud-bigquery/synth.metadata @@ -4,7 +4,7 @@ "git": { "name": ".", "remote": "https://github.com/googleapis/python-bigquery.git", - "sha": "c1195147a6e9220f26558a301427dd447646da3a" + "sha": "8f4c0b84dac3840532d7865247b8ad94b625b897" } }, { @@ -19,14 +19,14 @@ "git": { "name": "synthtool", "remote": "https://github.com/googleapis/synthtool.git", - "sha": "5b5bf6d519b2d658d9f2e483d9f6f3d0ba8ee6bc" + "sha": "0a071b3460344886297a304253bf924aa68ddb7e" } }, { "git": { "name": "synthtool", "remote": "https://github.com/googleapis/synthtool.git", - "sha": "5b5bf6d519b2d658d9f2e483d9f6f3d0ba8ee6bc" + "sha": "0a071b3460344886297a304253bf924aa68ddb7e" } } ], From 037004f75f388acec041e6890cb401f780bcd439 Mon Sep 17 00:00:00 2001 From: Jim Fulton Date: Tue, 13 Apr 2021 09:20:17 -0600 Subject: [PATCH 1111/2016] fix: missing license headers in new test files (#604) --- .../google-cloud-bigquery/tests/unit/conftest.py | 14 ++++++++++++++ .../tests/unit/test_create_dataset.py | 14 ++++++++++++++ .../tests/unit/test_delete_dataset.py | 14 ++++++++++++++ .../tests/unit/test_list_models.py | 14 ++++++++++++++ .../tests/unit/test_list_routines.py | 14 ++++++++++++++ .../tests/unit/test_list_tables.py | 14 ++++++++++++++ 6 files changed, 84 insertions(+) diff --git a/packages/google-cloud-bigquery/tests/unit/conftest.py b/packages/google-cloud-bigquery/tests/unit/conftest.py index 07fc9b4ad56c..7a67ea6b5c77 100644 --- a/packages/google-cloud-bigquery/tests/unit/conftest.py +++ b/packages/google-cloud-bigquery/tests/unit/conftest.py @@ -1,3 +1,17 @@ +# Copyright 2021 Google LLC + +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at + +# https://www.apache.org/licenses/LICENSE-2.0 + +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + import pytest from .helpers import make_client diff --git a/packages/google-cloud-bigquery/tests/unit/test_create_dataset.py b/packages/google-cloud-bigquery/tests/unit/test_create_dataset.py index 3eb8f107236f..d07aaed4f004 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_create_dataset.py +++ b/packages/google-cloud-bigquery/tests/unit/test_create_dataset.py @@ -1,3 +1,17 @@ +# Copyright 2021 Google LLC + +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at + +# https://www.apache.org/licenses/LICENSE-2.0 + +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + from google.cloud.bigquery.dataset import Dataset, DatasetReference from .helpers import make_connection, dataset_polymorphic, make_client import google.cloud.bigquery.dataset diff --git a/packages/google-cloud-bigquery/tests/unit/test_delete_dataset.py b/packages/google-cloud-bigquery/tests/unit/test_delete_dataset.py index c57b517e0e82..3a65e031c2bf 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_delete_dataset.py +++ b/packages/google-cloud-bigquery/tests/unit/test_delete_dataset.py @@ -1,3 +1,17 @@ +# Copyright 2021 Google LLC + +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at + +# https://www.apache.org/licenses/LICENSE-2.0 + +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + from .helpers import make_connection, make_client, dataset_polymorphic import google.api_core.exceptions import pytest diff --git a/packages/google-cloud-bigquery/tests/unit/test_list_models.py b/packages/google-cloud-bigquery/tests/unit/test_list_models.py index 534a4b54c2eb..56aa66126753 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_list_models.py +++ b/packages/google-cloud-bigquery/tests/unit/test_list_models.py @@ -1,3 +1,17 @@ +# Copyright 2021 Google LLC + +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at + +# https://www.apache.org/licenses/LICENSE-2.0 + +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + from .helpers import make_connection, dataset_polymorphic import pytest diff --git a/packages/google-cloud-bigquery/tests/unit/test_list_routines.py b/packages/google-cloud-bigquery/tests/unit/test_list_routines.py index 82719fce6ec7..714ede0d420f 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_list_routines.py +++ b/packages/google-cloud-bigquery/tests/unit/test_list_routines.py @@ -1,3 +1,17 @@ +# Copyright 2021 Google LLC + +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at + +# https://www.apache.org/licenses/LICENSE-2.0 + +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + from .helpers import make_connection, dataset_polymorphic import pytest diff --git a/packages/google-cloud-bigquery/tests/unit/test_list_tables.py b/packages/google-cloud-bigquery/tests/unit/test_list_tables.py index fdd3aa85797e..9acee95807ba 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_list_tables.py +++ b/packages/google-cloud-bigquery/tests/unit/test_list_tables.py @@ -1,3 +1,17 @@ +# Copyright 2021 Google LLC + +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at + +# https://www.apache.org/licenses/LICENSE-2.0 + +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + from .helpers import make_connection, dataset_polymorphic import google.cloud.bigquery.dataset import pytest From 4a4bf01b3a20cb0352c1d20dd065af8787c4a57b Mon Sep 17 00:00:00 2001 From: Peter Lamut Date: Tue, 13 Apr 2021 18:50:05 +0200 Subject: [PATCH 1112/2016] refactor: simplify OrderedDict arguments in lexer (#598) Python 3.6+ guarantees that kwargs order is preserved, thus we don't need to assure the order by passing them as a list of tuples. --- .../bigquery/magics/line_arg_parser/lexer.py | 119 ++++++------------ 1 file changed, 37 insertions(+), 82 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/magics/line_arg_parser/lexer.py b/packages/google-cloud-bigquery/google/cloud/bigquery/magics/line_arg_parser/lexer.py index 0cb63292c413..5a6ee1a83b28 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/magics/line_arg_parser/lexer.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/magics/line_arg_parser/lexer.py @@ -49,90 +49,45 @@ # the value of an option other than "--params", we do not really care about its # structure, and thus do not want to use any of the "Python tokens" for pattern matching. # -# Since token definition order is important, an OrderedDict is needed with tightly -# controlled member definitions (i.e. passed as a sequence, and *not* via kwargs). +# Token definition order is important, thus an OrderedDict is used. In addition, PEP 468 +# guarantees us that the order of kwargs is preserved in Python 3.6+. token_types = OrderedDict( - [ - ( - "state_parse_pos_args", - OrderedDict( - [ - ( - "GOTO_PARSE_NON_PARAMS_OPTIONS", - r"(?P(?=--))", # double dash - starting the options list - ), - ( - "DEST_VAR", - r"(?P[^\d\W]\w*)", # essentially a Python ID - ), - ] - ), - ), - ( - "state_parse_non_params_options", - OrderedDict( - [ - ( - "GOTO_PARSE_PARAMS_OPTION", - r"(?P(?=--params(?:\s|=|--|$)))", # the --params option - ), - ("OPTION_SPEC", r"(?P--\w+)"), - ("OPTION_EQ", r"(?P=)"), - ("OPT_VAL", r"(?P\S+?(?=\s|--|$))"), - ] - ), - ), - ( - "state_parse_params_option", - OrderedDict( - [ - ( - "PY_STRING", - r"(?P(?:{})|(?:{}))".format( - r"'(?:[^'\\]|\.)*'", - r'"(?:[^"\\]|\.)*"', # single and double quoted strings - ), - ), - ("PARAMS_OPT_SPEC", r"(?P--params(?=\s|=|--|$))"), - ("PARAMS_OPT_EQ", r"(?P=)"), - ( - "GOTO_PARSE_NON_PARAMS_OPTIONS", - r"(?P(?=--\w+))", # found another option spec - ), - ("PY_BOOL", r"(?PTrue|False)"), - ("DOLLAR_PY_ID", r"(?P\$[^\d\W]\w*)"), - ( - "PY_NUMBER", - r"(?P-?[1-9]\d*(?:\.\d+)?(:?[e|E][+-]?\d+)?)", - ), - ("SQUOTE", r"(?P')"), - ("DQUOTE", r'(?P")'), - ("COLON", r"(?P:)"), - ("COMMA", r"(?P,)"), - ("LCURL", r"(?P\{)"), - ("RCURL", r"(?P})"), - ("LSQUARE", r"(?P\[)"), - ("RSQUARE", r"(?P])"), - ("LPAREN", r"(?P\()"), - ("RPAREN", r"(?P\))"), - ] - ), - ), - ( - "common", - OrderedDict( - [ - ("WS", r"(?P\s+)"), - ("EOL", r"(?P$)"), - ( - # anything not a whitespace or matched by something else - "UNKNOWN", - r"(?P\S+)", - ), - ] - ), + state_parse_pos_args=OrderedDict( + GOTO_PARSE_NON_PARAMS_OPTIONS=r"(?P(?=--))", # double dash - starting the options list + DEST_VAR=r"(?P[^\d\W]\w*)", # essentially a Python ID + ), + state_parse_non_params_options=OrderedDict( + GOTO_PARSE_PARAMS_OPTION=r"(?P(?=--params(?:\s|=|--|$)))", # the --params option + OPTION_SPEC=r"(?P--\w+)", + OPTION_EQ=r"(?P=)", + OPT_VAL=r"(?P\S+?(?=\s|--|$))", + ), + state_parse_params_option=OrderedDict( + PY_STRING=r"(?P(?:{})|(?:{}))".format( # single and double quoted strings + r"'(?:[^'\\]|\.)*'", r'"(?:[^"\\]|\.)*"' ), - ] + PARAMS_OPT_SPEC=r"(?P--params(?=\s|=|--|$))", + PARAMS_OPT_EQ=r"(?P=)", + GOTO_PARSE_NON_PARAMS_OPTIONS=r"(?P(?=--\w+))", # found another option spec + PY_BOOL=r"(?PTrue|False)", + DOLLAR_PY_ID=r"(?P\$[^\d\W]\w*)", + PY_NUMBER=r"(?P-?[1-9]\d*(?:\.\d+)?(:?[e|E][+-]?\d+)?)", + SQUOTE=r"(?P')", + DQUOTE=r'(?P")', + COLON=r"(?P:)", + COMMA=r"(?P,)", + LCURL=r"(?P\{)", + RCURL=r"(?P})", + LSQUARE=r"(?P\[)", + RSQUARE=r"(?P])", + LPAREN=r"(?P\()", + RPAREN=r"(?P\))", + ), + common=OrderedDict( + WS=r"(?P\s+)", + EOL=r"(?P$)", + UNKNOWN=r"(?P\S+)", # anything not a whitespace or matched by something else + ), ) From 7e9e1a3be0d016bd2dfd6a2949d09b9a22c95123 Mon Sep 17 00:00:00 2001 From: WhiteSource Renovate Date: Tue, 13 Apr 2021 19:17:39 +0200 Subject: [PATCH 1113/2016] chore(deps): update dependency mock to v4.0.3 (#605) --- .../samples/geography/requirements-test.txt | 2 +- .../samples/snippets/requirements-test.txt | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/packages/google-cloud-bigquery/samples/geography/requirements-test.txt b/packages/google-cloud-bigquery/samples/geography/requirements-test.txt index 676ff949e8ae..a5da1a77d747 100644 --- a/packages/google-cloud-bigquery/samples/geography/requirements-test.txt +++ b/packages/google-cloud-bigquery/samples/geography/requirements-test.txt @@ -1,2 +1,2 @@ pytest==5.4.3 -mock==4.0.2 +mock==4.0.3 diff --git a/packages/google-cloud-bigquery/samples/snippets/requirements-test.txt b/packages/google-cloud-bigquery/samples/snippets/requirements-test.txt index 676ff949e8ae..a5da1a77d747 100644 --- a/packages/google-cloud-bigquery/samples/snippets/requirements-test.txt +++ b/packages/google-cloud-bigquery/samples/snippets/requirements-test.txt @@ -1,2 +1,2 @@ pytest==5.4.3 -mock==4.0.2 +mock==4.0.3 From fdabb9e472c688b04e44a5f867ad4fe8aaf5e08c Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Tue, 13 Apr 2021 12:49:39 -0500 Subject: [PATCH 1114/2016] docs: add sample to run DML query (#591) * docs: add sample to run DML query * cleanup leftover datasets before test run * fix import order --- .../samples/snippets/conftest.py | 40 +++++++++ .../samples/snippets/test_update_with_dml.py | 36 ++++++++ .../samples/snippets/update_with_dml.py | 82 +++++++++++++++++++ .../samples/snippets/user_sessions_data.json | 10 +++ 4 files changed, 168 insertions(+) create mode 100644 packages/google-cloud-bigquery/samples/snippets/test_update_with_dml.py create mode 100644 packages/google-cloud-bigquery/samples/snippets/update_with_dml.py create mode 100644 packages/google-cloud-bigquery/samples/snippets/user_sessions_data.json diff --git a/packages/google-cloud-bigquery/samples/snippets/conftest.py b/packages/google-cloud-bigquery/samples/snippets/conftest.py index d22a3331812d..31c6ba104687 100644 --- a/packages/google-cloud-bigquery/samples/snippets/conftest.py +++ b/packages/google-cloud-bigquery/samples/snippets/conftest.py @@ -12,10 +12,35 @@ # See the License for the specific language governing permissions and # limitations under the License. +import datetime +import random + from google.cloud import bigquery import pytest +RESOURCE_PREFIX = "python_bigquery_samples_snippets" + + +def resource_prefix() -> str: + timestamp = datetime.datetime.utcnow().strftime("%Y%m%d_%H%M%S") + random_string = hex(random.randrange(1000000))[2:] + return f"{RESOURCE_PREFIX}_{timestamp}_{random_string}" + + +@pytest.fixture(scope="session", autouse=True) +def cleanup_datasets(bigquery_client: bigquery.Client): + yesterday = datetime.datetime.utcnow() - datetime.timedelta(days=1) + for dataset in bigquery_client.list_datasets(): + if ( + dataset.dataset_id.startswith(RESOURCE_PREFIX) + and dataset.created < yesterday + ): + bigquery_client.delete_dataset( + dataset, delete_contents=True, not_found_ok=True + ) + + @pytest.fixture(scope="session") def bigquery_client(): bigquery_client = bigquery.Client() @@ -25,3 +50,18 @@ def bigquery_client(): @pytest.fixture(scope="session") def project_id(bigquery_client): return bigquery_client.project + + +@pytest.fixture(scope="session") +def dataset_id(bigquery_client: bigquery.Client, project_id: str): + dataset_id = resource_prefix() + full_dataset_id = f"{project_id}.{dataset_id}" + dataset = bigquery.Dataset(full_dataset_id) + bigquery_client.create_dataset(dataset) + yield dataset_id + bigquery_client.delete_dataset(dataset, delete_contents=True, not_found_ok=True) + + +@pytest.fixture +def bigquery_client_patch(monkeypatch, bigquery_client): + monkeypatch.setattr(bigquery, "Client", lambda: bigquery_client) diff --git a/packages/google-cloud-bigquery/samples/snippets/test_update_with_dml.py b/packages/google-cloud-bigquery/samples/snippets/test_update_with_dml.py new file mode 100644 index 000000000000..3cca7a649c59 --- /dev/null +++ b/packages/google-cloud-bigquery/samples/snippets/test_update_with_dml.py @@ -0,0 +1,36 @@ +# Copyright 2021 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from google.cloud import bigquery +import pytest + +from conftest import resource_prefix +import update_with_dml + + +@pytest.fixture +def table_id(bigquery_client: bigquery.Client, project_id: str, dataset_id: str): + table_id = f"{resource_prefix()}_update_with_dml" + yield table_id + full_table_id = f"{project_id}.{dataset_id}.{table_id}" + bigquery_client.delete_table(full_table_id, not_found_ok=True) + + +def test_update_with_dml(bigquery_client_patch, dataset_id, table_id): + override_values = { + "dataset_id": dataset_id, + "table_id": table_id, + } + num_rows = update_with_dml.run_sample(override_values=override_values) + assert num_rows > 0 diff --git a/packages/google-cloud-bigquery/samples/snippets/update_with_dml.py b/packages/google-cloud-bigquery/samples/snippets/update_with_dml.py new file mode 100644 index 000000000000..7fd09dd80c13 --- /dev/null +++ b/packages/google-cloud-bigquery/samples/snippets/update_with_dml.py @@ -0,0 +1,82 @@ +# Copyright 2021 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# [START bigquery_update_with_dml] +import pathlib + +from google.cloud import bigquery +from google.cloud.bigquery import enums + + +def load_from_newline_delimited_json( + client: bigquery.Client, + filepath: pathlib.Path, + project_id: str, + dataset_id: str, + table_id: str, +): + full_table_id = f"{project_id}.{dataset_id}.{table_id}" + job_config = bigquery.LoadJobConfig() + job_config.source_format = enums.SourceFormat.NEWLINE_DELIMITED_JSON + job_config.schema = [ + bigquery.SchemaField("id", enums.SqlTypeNames.STRING), + bigquery.SchemaField("user_id", enums.SqlTypeNames.INTEGER), + bigquery.SchemaField("login_time", enums.SqlTypeNames.TIMESTAMP), + bigquery.SchemaField("logout_time", enums.SqlTypeNames.TIMESTAMP), + bigquery.SchemaField("ip_address", enums.SqlTypeNames.STRING), + ] + + with open(filepath, "rb") as json_file: + load_job = client.load_table_from_file( + json_file, full_table_id, job_config=job_config + ) + + # Wait for load job to finish. + load_job.result() + + +def update_with_dml( + client: bigquery.Client, project_id: str, dataset_id: str, table_id: str +): + query_text = f""" + UPDATE `{project_id}.{dataset_id}.{table_id}` + SET ip_address = REGEXP_REPLACE(ip_address, r"(\\.[0-9]+)$", ".0") + WHERE TRUE + """ + query_job = client.query(query_text) + + # Wait for query job to finish. + query_job.result() + + print(f"DML query modified {query_job.num_dml_affected_rows} rows.") + return query_job.num_dml_affected_rows + + +def run_sample(override_values={}): + client = bigquery.Client() + filepath = pathlib.Path(__file__).parent / "user_sessions_data.json" + project_id = client.project + dataset_id = "sample_db" + table_id = "UserSessions" + # [END bigquery_update_with_dml] + # To facilitate testing, we replace values with alternatives + # provided by the testing harness. + dataset_id = override_values.get("dataset_id", dataset_id) + table_id = override_values.get("table_id", table_id) + # [START bigquery_update_with_dml] + load_from_newline_delimited_json(client, filepath, project_id, dataset_id, table_id) + return update_with_dml(client, project_id, dataset_id, table_id) + + +# [END bigquery_update_with_dml] diff --git a/packages/google-cloud-bigquery/samples/snippets/user_sessions_data.json b/packages/google-cloud-bigquery/samples/snippets/user_sessions_data.json new file mode 100644 index 000000000000..7ea3715adf5f --- /dev/null +++ b/packages/google-cloud-bigquery/samples/snippets/user_sessions_data.json @@ -0,0 +1,10 @@ +{"id":"2ad525d6-c832-4c3d-b7fe-59d104885519","user_id":"38","login_time":"1.47766087E9","logout_time":"1.477661109E9","ip_address":"192.0.2.12"} +{"id":"53d65e20-6ea9-4650-98d9-a2111fbd1122","user_id":"88","login_time":"1.47707544E9","logout_time":"1.477075519E9","ip_address":"192.0.2.88"} +{"id":"5e6c3021-d5e7-4ccd-84b2-adfa9176d13d","user_id":"39","login_time":"1.474022869E9","logout_time":"1.474022961E9","ip_address":"203.0.113.52"} +{"id":"6196eefa-1498-4567-8ef0-498845b888d9","user_id":"52","login_time":"1.478604612E9","logout_time":"1.478604691E9","ip_address":"203.0.113.169"} +{"id":"70656dc5-7e0f-49cf-9e00-f06ed93c1f5b","user_id":"46","login_time":"1.474089924E9","logout_time":"1.474090227E9","ip_address":"192.0.2.10"} +{"id":"aafa5eef-ad49-49a7-9a0f-fbc7fd639bd3","user_id":"40","login_time":"1.478031161E9","logout_time":"1.478031388E9","ip_address":"203.0.113.18"} +{"id":"d2792fc2-24dd-4260-9456-3fbe6cdfdd90","user_id":"5","login_time":"1.481259081E9","logout_time":"1.481259247E9","ip_address":"192.0.2.140"} +{"id":"d835dc49-32f9-4790-b4eb-dddee62e0dcc","user_id":"62","login_time":"1.478892977E9","logout_time":"1.478893219E9","ip_address":"203.0.113.83"} +{"id":"f4a0d3c7-351f-471c-8e11-e093e7a6ce75","user_id":"89","login_time":"1.459031555E9","logout_time":"1.459031831E9","ip_address":"203.0.113.233"} +{"id":"f6e9f526-5b22-4679-9c3e-56a636e815bb","user_id":"97","login_time":"1.482426034E9","logout_time":"1.482426415E9","ip_address":"203.0.113.167"} From 5a435ac47b17de5dc8c43c7ac4a275879b5dddbb Mon Sep 17 00:00:00 2001 From: WhiteSource Renovate Date: Wed, 14 Apr 2021 16:01:42 +0200 Subject: [PATCH 1115/2016] chore(deps): update dependency pytest to v6 (#606) --- .../samples/geography/requirements-test.txt | 2 +- .../samples/snippets/requirements-test.txt | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/packages/google-cloud-bigquery/samples/geography/requirements-test.txt b/packages/google-cloud-bigquery/samples/geography/requirements-test.txt index a5da1a77d747..299d90b65200 100644 --- a/packages/google-cloud-bigquery/samples/geography/requirements-test.txt +++ b/packages/google-cloud-bigquery/samples/geography/requirements-test.txt @@ -1,2 +1,2 @@ -pytest==5.4.3 +pytest==6.2.3 mock==4.0.3 diff --git a/packages/google-cloud-bigquery/samples/snippets/requirements-test.txt b/packages/google-cloud-bigquery/samples/snippets/requirements-test.txt index a5da1a77d747..299d90b65200 100644 --- a/packages/google-cloud-bigquery/samples/snippets/requirements-test.txt +++ b/packages/google-cloud-bigquery/samples/snippets/requirements-test.txt @@ -1,2 +1,2 @@ -pytest==5.4.3 +pytest==6.2.3 mock==4.0.3 From 91d7114a82adce30bf407033d969907965005ba3 Mon Sep 17 00:00:00 2001 From: WhiteSource Renovate Date: Wed, 14 Apr 2021 16:03:06 +0200 Subject: [PATCH 1116/2016] chore(deps): update dependency google-cloud-bigquery to v2.13.1 (#573) --- .../google-cloud-bigquery/samples/geography/requirements.txt | 2 +- .../google-cloud-bigquery/samples/snippets/requirements.txt | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/packages/google-cloud-bigquery/samples/geography/requirements.txt b/packages/google-cloud-bigquery/samples/geography/requirements.txt index c5f60911e9b0..6939c07e06e7 100644 --- a/packages/google-cloud-bigquery/samples/geography/requirements.txt +++ b/packages/google-cloud-bigquery/samples/geography/requirements.txt @@ -1,3 +1,3 @@ geojson==2.5.0 -google-cloud-bigquery==2.13.0 +google-cloud-bigquery==2.13.1 Shapely==1.7.1 diff --git a/packages/google-cloud-bigquery/samples/snippets/requirements.txt b/packages/google-cloud-bigquery/samples/snippets/requirements.txt index 734cdf445016..74a18981e2e9 100644 --- a/packages/google-cloud-bigquery/samples/snippets/requirements.txt +++ b/packages/google-cloud-bigquery/samples/snippets/requirements.txt @@ -1,4 +1,4 @@ -google-cloud-bigquery==2.13.0 +google-cloud-bigquery==2.13.1 google-cloud-bigquery-storage==2.3.0 google-auth-oauthlib==0.4.4 grpcio==1.37.0 From fb8a6cee5d38bab133afd111b884c9bbea56f2e3 Mon Sep 17 00:00:00 2001 From: Peter Lamut Date: Wed, 14 Apr 2021 23:08:17 +0200 Subject: [PATCH 1117/2016] feat: add `max_queue_size` argument to `RowIterator.to_dataframe_iterable` (#575) * feat: add max_queue_size option for BQ Storage API The new parameter allows configuring the maximum size of the internal queue used to hold result pages when query data is streamed over the BigQuery Storage API. * Slightly simplify bits of page streaming logic * Only retain max_queue_size where most relevant * Adjust tests, add support for infinite queue size * Remove deleted param's description --- .../google/cloud/bigquery/_pandas_helpers.py | 28 +++++--- .../google/cloud/bigquery/table.py | 32 ++++++--- .../tests/unit/test__pandas_helpers.py | 66 +++++++++++++++++++ 3 files changed, 110 insertions(+), 16 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/_pandas_helpers.py b/packages/google-cloud-bigquery/google/cloud/bigquery/_pandas_helpers.py index 412f32754276..7553726fa396 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/_pandas_helpers.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/_pandas_helpers.py @@ -53,6 +53,8 @@ _PROGRESS_INTERVAL = 0.2 # Maximum time between download status checks, in seconds. +_MAX_QUEUE_SIZE_DEFAULT = object() # max queue size sentinel for BQ Storage downloads + _PANDAS_DTYPE_TO_BQ = { "bool": "BOOLEAN", "datetime64[ns, UTC]": "TIMESTAMP", @@ -616,6 +618,7 @@ def _download_table_bqstorage( preserve_order=False, selected_fields=None, page_to_item=None, + max_queue_size=_MAX_QUEUE_SIZE_DEFAULT, ): """Use (faster, but billable) BQ Storage API to construct DataFrame.""" @@ -667,7 +670,17 @@ def _download_table_bqstorage( download_state = _DownloadState() # Create a queue to collect frames as they are created in each thread. - worker_queue = queue.Queue() + # + # The queue needs to be bounded by default, because if the user code processes the + # fetched result pages too slowly, while at the same time new pages are rapidly being + # fetched from the server, the queue can grow to the point where the process runs + # out of memory. + if max_queue_size is _MAX_QUEUE_SIZE_DEFAULT: + max_queue_size = total_streams + elif max_queue_size is None: + max_queue_size = 0 # unbounded + + worker_queue = queue.Queue(maxsize=max_queue_size) with concurrent.futures.ThreadPoolExecutor(max_workers=total_streams) as pool: try: @@ -708,15 +721,12 @@ def _download_table_bqstorage( continue # Return any remaining values after the workers finished. - while not worker_queue.empty(): # pragma: NO COVER + while True: # pragma: NO COVER try: - # Include a timeout because even though the queue is - # non-empty, it doesn't guarantee that a subsequent call to - # get() will not block. - frame = worker_queue.get(timeout=_PROGRESS_INTERVAL) + frame = worker_queue.get_nowait() yield frame except queue.Empty: # pragma: NO COVER - continue + break finally: # No need for a lock because reading/replacing a variable is # defined to be an atomic operation in the Python language @@ -729,7 +739,7 @@ def _download_table_bqstorage( def download_arrow_bqstorage( - project_id, table, bqstorage_client, preserve_order=False, selected_fields=None + project_id, table, bqstorage_client, preserve_order=False, selected_fields=None, ): return _download_table_bqstorage( project_id, @@ -749,6 +759,7 @@ def download_dataframe_bqstorage( dtypes, preserve_order=False, selected_fields=None, + max_queue_size=_MAX_QUEUE_SIZE_DEFAULT, ): page_to_item = functools.partial(_bqstorage_page_to_dataframe, column_names, dtypes) return _download_table_bqstorage( @@ -758,6 +769,7 @@ def download_dataframe_bqstorage( preserve_order=preserve_order, selected_fields=selected_fields, page_to_item=page_to_item, + max_queue_size=max_queue_size, ) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py index a2366b806eff..bd5bca30fe8a 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py @@ -1490,13 +1490,12 @@ def _to_page_iterable( if not self._validate_bqstorage(bqstorage_client, False): bqstorage_client = None - if bqstorage_client is not None: - for item in bqstorage_download(): - yield item - return - - for item in tabledata_list_download(): - yield item + result_pages = ( + bqstorage_download() + if bqstorage_client is not None + else tabledata_list_download() + ) + yield from result_pages def _to_arrow_iterable(self, bqstorage_client=None): """Create an iterable of arrow RecordBatches, to process the table as a stream.""" @@ -1622,7 +1621,12 @@ def to_arrow( arrow_schema = _pandas_helpers.bq_to_arrow_schema(self._schema) return pyarrow.Table.from_batches(record_batches, schema=arrow_schema) - def to_dataframe_iterable(self, bqstorage_client=None, dtypes=None): + def to_dataframe_iterable( + self, + bqstorage_client=None, + dtypes=None, + max_queue_size=_pandas_helpers._MAX_QUEUE_SIZE_DEFAULT, + ): """Create an iterable of pandas DataFrames, to process the table as a stream. Args: @@ -1642,6 +1646,17 @@ def to_dataframe_iterable(self, bqstorage_client=None, dtypes=None): ``dtype`` is used when constructing the series for the column specified. Otherwise, the default pandas behavior is used. + max_queue_size (Optional[int]): + The maximum number of result pages to hold in the internal queue when + streaming query results over the BigQuery Storage API. Ignored if + Storage API is not used. + + By default, the max queue size is set to the number of BQ Storage streams + created by the server. If ``max_queue_size`` is :data:`None`, the queue + size is infinite. + + ..versionadded:: 2.14.0 + Returns: pandas.DataFrame: A generator of :class:`~pandas.DataFrame`. @@ -1665,6 +1680,7 @@ def to_dataframe_iterable(self, bqstorage_client=None, dtypes=None): dtypes, preserve_order=self._preserve_order, selected_fields=self._selected_fields, + max_queue_size=max_queue_size, ) tabledata_list_download = functools.partial( _pandas_helpers.download_dataframe_row_iterator, diff --git a/packages/google-cloud-bigquery/tests/unit/test__pandas_helpers.py b/packages/google-cloud-bigquery/tests/unit/test__pandas_helpers.py index abd72582015e..43692f4af27c 100644 --- a/packages/google-cloud-bigquery/tests/unit/test__pandas_helpers.py +++ b/packages/google-cloud-bigquery/tests/unit/test__pandas_helpers.py @@ -17,6 +17,7 @@ import decimal import functools import operator +import queue import warnings import mock @@ -41,6 +42,11 @@ from google.cloud.bigquery import schema from google.cloud.bigquery._pandas_helpers import _BIGNUMERIC_SUPPORT +try: + from google.cloud import bigquery_storage +except ImportError: # pragma: NO COVER + bigquery_storage = None + skip_if_no_bignumeric = pytest.mark.skipif( not _BIGNUMERIC_SUPPORT, reason="BIGNUMERIC support requires pyarrow>=3.0.0", @@ -1265,6 +1271,66 @@ def test_dataframe_to_parquet_dict_sequence_schema(module_under_test): assert schema_arg == expected_schema_arg +@pytest.mark.parametrize( + "stream_count,maxsize_kwarg,expected_call_count,expected_maxsize", + [ + (3, {"max_queue_size": 2}, 3, 2), # custom queue size + (4, {}, 4, 4), # default queue size + (7, {"max_queue_size": None}, 7, 0), # infinite queue size + ], +) +@pytest.mark.skipif( + bigquery_storage is None, reason="Requires `google-cloud-bigquery-storage`" +) +def test__download_table_bqstorage( + module_under_test, + stream_count, + maxsize_kwarg, + expected_call_count, + expected_maxsize, +): + from google.cloud.bigquery import dataset + from google.cloud.bigquery import table + + queue_used = None # A reference to the queue used by code under test. + + bqstorage_client = mock.create_autospec( + bigquery_storage.BigQueryReadClient, instance=True + ) + fake_session = mock.Mock(streams=["stream/s{i}" for i in range(stream_count)]) + bqstorage_client.create_read_session.return_value = fake_session + + table_ref = table.TableReference( + dataset.DatasetReference("project-x", "dataset-y"), "table-z", + ) + + def fake_download_stream( + download_state, bqstorage_client, session, stream, worker_queue, page_to_item + ): + nonlocal queue_used + queue_used = worker_queue + try: + worker_queue.put_nowait("result_page") + except queue.Full: # pragma: NO COVER + pass + + download_stream = mock.Mock(side_effect=fake_download_stream) + + with mock.patch.object( + module_under_test, "_download_table_bqstorage_stream", new=download_stream + ): + result_gen = module_under_test._download_table_bqstorage( + "some-project", table_ref, bqstorage_client, **maxsize_kwarg + ) + list(result_gen) + + # Timing-safe, as the method under test should block until the pool shutdown is + # complete, at which point all download stream workers have already been submitted + # to the thread pool. + assert download_stream.call_count == stream_count # once for each stream + assert queue_used.maxsize == expected_maxsize + + @pytest.mark.skipif(isinstance(pyarrow, mock.Mock), reason="Requires `pyarrow`") def test_download_arrow_row_iterator_unknown_field_type(module_under_test): fake_page = api_core.page_iterator.Page( From 934f5f1a52d49cc8211018167ec133fefc373b7e Mon Sep 17 00:00:00 2001 From: Yoshi Automation Bot Date: Thu, 15 Apr 2021 07:55:15 -0700 Subject: [PATCH 1118/2016] chore: generate PyPI token in secrets manager, fix spacing in docs (via synth) (#612) * docs(python): add empty lines between methods Source-Author: Bu Sun Kim <8822365+busunkim96@users.noreply.github.com> Source-Date: Wed Apr 14 14:41:09 2021 -0600 Source-Repo: googleapis/synthtool Source-Sha: 721339ab60a6eb63b889978b3d9b295dcb3be370 Source-Link: https://github.com/googleapis/synthtool/commit/721339ab60a6eb63b889978b3d9b295dcb3be370 * build: use PyPI API token in secret manager Migrate python libraries onto the PyPI API token stored in secret manager. A PyPI API token is limited in scope to uploading new releases. https://pypi.org/help/#apitoken Verified that this works with [build](https://fusion2.corp.google.com/invocations/14bae126-83fa-4328-8da9-d390ed99315c/targets/cloud-devrel%2Fclient-libraries%2Fpython%2Fgoogleapis%2Fpython-vision%2Frelease%2Frelease;config=default/log) on https://github.com/googleapis/python-vision/pull/136 Source-Author: Bu Sun Kim <8822365+busunkim96@users.noreply.github.com> Source-Date: Wed Apr 14 17:46:06 2021 -0600 Source-Repo: googleapis/synthtool Source-Sha: 043cc620d6a6111816d9e09f2a97208565fde958 Source-Link: https://github.com/googleapis/synthtool/commit/043cc620d6a6111816d9e09f2a97208565fde958 --- packages/google-cloud-bigquery/.kokoro/release.sh | 4 ++-- .../.kokoro/release/common.cfg | 14 ++------------ .../google-cloud-bigquery/docs/_static/custom.css | 13 ++++++++++++- packages/google-cloud-bigquery/synth.metadata | 6 +++--- 4 files changed, 19 insertions(+), 18 deletions(-) diff --git a/packages/google-cloud-bigquery/.kokoro/release.sh b/packages/google-cloud-bigquery/.kokoro/release.sh index 0e58f0640fdf..3abba6e06681 100755 --- a/packages/google-cloud-bigquery/.kokoro/release.sh +++ b/packages/google-cloud-bigquery/.kokoro/release.sh @@ -26,7 +26,7 @@ python3 -m pip install --upgrade twine wheel setuptools export PYTHONUNBUFFERED=1 # Move into the package, build the distribution and upload. -TWINE_PASSWORD=$(cat "${KOKORO_KEYSTORE_DIR}/73713_google_cloud_pypi_password") +TWINE_PASSWORD=$(cat "${KOKORO_GFILE_DIR}/secret_manager/google-cloud-pypi-token") cd github/python-bigquery python3 setup.py sdist bdist_wheel -twine upload --username gcloudpypi --password "${TWINE_PASSWORD}" dist/* +twine upload --username __token__ --password "${TWINE_PASSWORD}" dist/* diff --git a/packages/google-cloud-bigquery/.kokoro/release/common.cfg b/packages/google-cloud-bigquery/.kokoro/release/common.cfg index 18b417709bde..922d7fe504bd 100644 --- a/packages/google-cloud-bigquery/.kokoro/release/common.cfg +++ b/packages/google-cloud-bigquery/.kokoro/release/common.cfg @@ -23,18 +23,8 @@ env_vars: { value: "github/python-bigquery/.kokoro/release.sh" } -# Fetch PyPI password -before_action { - fetch_keystore { - keystore_resource { - keystore_config_id: 73713 - keyname: "google_cloud_pypi_password" - } - } -} - # Tokens needed to report release status back to GitHub env_vars: { key: "SECRET_MANAGER_KEYS" - value: "releasetool-publish-reporter-app,releasetool-publish-reporter-googleapis-installation,releasetool-publish-reporter-pem" -} \ No newline at end of file + value: "releasetool-publish-reporter-app,releasetool-publish-reporter-googleapis-installation,releasetool-publish-reporter-pem,google-cloud-pypi-token" +} diff --git a/packages/google-cloud-bigquery/docs/_static/custom.css b/packages/google-cloud-bigquery/docs/_static/custom.css index bcd37bbd3c4a..b0a295464b23 100644 --- a/packages/google-cloud-bigquery/docs/_static/custom.css +++ b/packages/google-cloud-bigquery/docs/_static/custom.css @@ -1,9 +1,20 @@ div#python2-eol { border-color: red; border-width: medium; -} +} /* Ensure minimum width for 'Parameters' / 'Returns' column */ dl.field-list > dt { min-width: 100px } + +/* Insert space between methods for readability */ +dl.method { + padding-top: 10px; + padding-bottom: 10px +} + +/* Insert empty space between classes */ +dl.class { + padding-bottom: 50px +} diff --git a/packages/google-cloud-bigquery/synth.metadata b/packages/google-cloud-bigquery/synth.metadata index 7221c0f0fbce..b031618b0e0b 100644 --- a/packages/google-cloud-bigquery/synth.metadata +++ b/packages/google-cloud-bigquery/synth.metadata @@ -4,7 +4,7 @@ "git": { "name": ".", "remote": "https://github.com/googleapis/python-bigquery.git", - "sha": "8f4c0b84dac3840532d7865247b8ad94b625b897" + "sha": "f95f415d3441b3928f6cc705cb8a75603d790fd6" } }, { @@ -19,14 +19,14 @@ "git": { "name": "synthtool", "remote": "https://github.com/googleapis/synthtool.git", - "sha": "0a071b3460344886297a304253bf924aa68ddb7e" + "sha": "043cc620d6a6111816d9e09f2a97208565fde958" } }, { "git": { "name": "synthtool", "remote": "https://github.com/googleapis/synthtool.git", - "sha": "0a071b3460344886297a304253bf924aa68ddb7e" + "sha": "043cc620d6a6111816d9e09f2a97208565fde958" } } ], From 1762f29f5466c867c533c7f254dc6669ab9ccf8b Mon Sep 17 00:00:00 2001 From: Dan Lee <71398022+dandhlee@users.noreply.github.com> Date: Thu, 15 Apr 2021 14:56:38 -0400 Subject: [PATCH 1119/2016] chore: prevent normalization of semver versioning (#611) --- packages/google-cloud-bigquery/setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/setup.py b/packages/google-cloud-bigquery/setup.py index 607ffb63fbaf..46a1284261da 100644 --- a/packages/google-cloud-bigquery/setup.py +++ b/packages/google-cloud-bigquery/setup.py @@ -100,7 +100,7 @@ setuptools.setup( name=name, - version=version, + version=setuptools.sic(version), description=description, long_description=readme, author="Google LLC", From 053a4989d359fb1c2bdf8d2db0b1d4ec8373c934 Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Thu, 15 Apr 2021 18:40:04 -0500 Subject: [PATCH 1120/2016] feat: accept job object as argument to `get_job` and `cancel_job` (#617) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This allows one to more easily cancel or get updated metadata for an existing job from the client class. Ensures that project ID and location are correctly populated. Thank you for opening a Pull Request! Before submitting your PR, there are a few things you can do to make sure it goes smoothly: - [ ] Make sure to open an issue as a [bug/issue](https://github.com/googleapis/python-bigquery/issues/new/choose) before writing your code! That way we can discuss the change, evaluate designs, and agree on the general idea - [ ] Ensure the tests and linter pass - [ ] Code coverage does not decrease (if any source code was changed) - [ ] Appropriate docs were updated (if necessary) Fixes #616 🦕 --- .../google/cloud/bigquery/client.py | 63 +++++++++++++++++-- .../tests/system/test_client.py | 11 ++-- .../tests/unit/test_client.py | 43 ++++++++----- 3 files changed, 93 insertions(+), 24 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py index 10127e10d2bf..8211e23a3a67 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py @@ -1734,12 +1734,20 @@ def get_job( https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs/get Args: - job_id (str): Unique job identifier. + job_id (Union[ \ + str, \ + google.cloud.bigquery.job.LoadJob, \ + google.cloud.bigquery.job.CopyJob, \ + google.cloud.bigquery.job.ExtractJob, \ + google.cloud.bigquery.job.QueryJob \ + ]): Job identifier. Keyword Arguments: project (Optional[str]): ID of the project which owns the job (defaults to the client's project). - location (Optional[str]): Location where the job was run. + location (Optional[str]): + Location where the job was run. Ignored if ``job_id`` is a job + object. retry (Optional[google.api_core.retry.Retry]): How to retry the RPC. timeout (Optional[float]): @@ -1757,6 +1765,10 @@ def get_job( """ extra_params = {"projection": "full"} + project, location, job_id = _extract_job_reference( + job_id, project=project, location=location + ) + if project is None: project = self.project @@ -1791,12 +1803,20 @@ def cancel_job( https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs/cancel Args: - job_id (str): Unique job identifier. + job_id (Union[ \ + str, \ + google.cloud.bigquery.job.LoadJob, \ + google.cloud.bigquery.job.CopyJob, \ + google.cloud.bigquery.job.ExtractJob, \ + google.cloud.bigquery.job.QueryJob \ + ]): Job identifier. Keyword Arguments: project (Optional[str]): ID of the project which owns the job (defaults to the client's project). - location (Optional[str]): Location where the job was run. + location (Optional[str]): + Location where the job was run. Ignored if ``job_id`` is a job + object. retry (Optional[google.api_core.retry.Retry]): How to retry the RPC. timeout (Optional[float]): @@ -1814,6 +1834,10 @@ def cancel_job( """ extra_params = {"projection": "full"} + project, location, job_id = _extract_job_reference( + job_id, project=project, location=location + ) + if project is None: project = self.project @@ -3518,6 +3542,37 @@ def _item_to_table(iterator, resource): return TableListItem(resource) +def _extract_job_reference(job, project=None, location=None): + """Extract fully-qualified job reference from a job-like object. + + Args: + job_id (Union[ \ + str, \ + google.cloud.bigquery.job.LoadJob, \ + google.cloud.bigquery.job.CopyJob, \ + google.cloud.bigquery.job.ExtractJob, \ + google.cloud.bigquery.job.QueryJob \ + ]): Job identifier. + project (Optional[str]): + Project where the job was run. Ignored if ``job_id`` is a job + object. + location (Optional[str]): + Location where the job was run. Ignored if ``job_id`` is a job + object. + + Returns: + Tuple[str, str, str]: ``(project, location, job_id)`` + """ + if hasattr(job, "job_id"): + project = job.project + job_id = job.job_id + location = job.location + else: + job_id = job + + return (project, location, job_id) + + def _make_job_id(job_id, prefix=None): """Construct an ID for a new job. diff --git a/packages/google-cloud-bigquery/tests/system/test_client.py b/packages/google-cloud-bigquery/tests/system/test_client.py index 02444101213d..f31d994cae8d 100644 --- a/packages/google-cloud-bigquery/tests/system/test_client.py +++ b/packages/google-cloud-bigquery/tests/system/test_client.py @@ -189,7 +189,9 @@ def test_get_service_account_email(self): def _create_bucket(self, bucket_name, location=None): storage_client = storage.Client() bucket = storage_client.bucket(bucket_name) - retry_storage_errors(bucket.create)(location=location) + retry_storage_errors(storage_client.create_bucket)( + bucket_name, location=location + ) self.to_delete.append(bucket) return bucket @@ -872,7 +874,7 @@ def test_load_table_from_file_w_explicit_location(self): job_id = load_job.job_id # Can get the job from the EU. - load_job = client.get_job(job_id, location="EU") + load_job = client.get_job(load_job) self.assertEqual(job_id, load_job.job_id) self.assertEqual("EU", load_job.location) self.assertTrue(load_job.exists()) @@ -889,7 +891,7 @@ def test_load_table_from_file_w_explicit_location(self): # Can cancel the job from the EU. self.assertTrue(load_job.cancel()) - load_job = client.cancel_job(job_id, location="EU") + load_job = client.cancel_job(load_job) self.assertEqual(job_id, load_job.job_id) self.assertEqual("EU", load_job.location) @@ -1204,8 +1206,7 @@ def test_query_w_timeout(self): # Even though the query takes >1 second, the call to getQueryResults # should succeed. self.assertFalse(query_job.done(timeout=1)) - - Config.CLIENT.cancel_job(query_job.job_id, location=query_job.location) + self.assertIsNotNone(Config.CLIENT.cancel_job(query_job)) def test_query_w_page_size(self): page_size = 45 diff --git a/packages/google-cloud-bigquery/tests/unit/test_client.py b/packages/google-cloud-bigquery/tests/unit/test_client.py index 96e51678f8eb..c5e742c9e750 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_client.py +++ b/packages/google-cloud-bigquery/tests/unit/test_client.py @@ -2933,31 +2933,30 @@ def test_get_job_miss_w_explict_project(self): conn = client._connection = make_connection() with self.assertRaises(NotFound): - client.get_job(JOB_ID, project=OTHER_PROJECT, location=self.LOCATION) + client.get_job(JOB_ID, project=OTHER_PROJECT) conn.api_request.assert_called_once_with( method="GET", path="/projects/OTHER_PROJECT/jobs/NONESUCH", - query_params={"projection": "full", "location": self.LOCATION}, + query_params={"projection": "full"}, timeout=None, ) def test_get_job_miss_w_client_location(self): from google.cloud.exceptions import NotFound - OTHER_PROJECT = "OTHER_PROJECT" JOB_ID = "NONESUCH" creds = _make_credentials() - client = self._make_one(self.PROJECT, creds, location=self.LOCATION) + client = self._make_one("client-proj", creds, location="client-loc") conn = client._connection = make_connection() with self.assertRaises(NotFound): - client.get_job(JOB_ID, project=OTHER_PROJECT) + client.get_job(JOB_ID) conn.api_request.assert_called_once_with( method="GET", - path="/projects/OTHER_PROJECT/jobs/NONESUCH", - query_params={"projection": "full", "location": self.LOCATION}, + path="/projects/client-proj/jobs/NONESUCH", + query_params={"projection": "full", "location": "client-loc"}, timeout=None, ) @@ -2971,7 +2970,11 @@ def test_get_job_hit_w_timeout(self): QUERY = "SELECT * from test_dataset:test_table" ASYNC_QUERY_DATA = { "id": "{}:{}".format(self.PROJECT, JOB_ID), - "jobReference": {"projectId": self.PROJECT, "jobId": "query_job"}, + "jobReference": { + "projectId": "resource-proj", + "jobId": "query_job", + "location": "us-east1", + }, "state": "DONE", "configuration": { "query": { @@ -2989,18 +2992,21 @@ def test_get_job_hit_w_timeout(self): creds = _make_credentials() client = self._make_one(self.PROJECT, creds) conn = client._connection = make_connection(ASYNC_QUERY_DATA) + job_from_resource = QueryJob.from_api_repr(ASYNC_QUERY_DATA, client) - job = client.get_job(JOB_ID, timeout=7.5) + job = client.get_job(job_from_resource, timeout=7.5) self.assertIsInstance(job, QueryJob) self.assertEqual(job.job_id, JOB_ID) + self.assertEqual(job.project, "resource-proj") + self.assertEqual(job.location, "us-east1") self.assertEqual(job.create_disposition, CreateDisposition.CREATE_IF_NEEDED) self.assertEqual(job.write_disposition, WriteDisposition.WRITE_TRUNCATE) conn.api_request.assert_called_once_with( method="GET", - path="/projects/PROJECT/jobs/query_job", - query_params={"projection": "full"}, + path="/projects/resource-proj/jobs/query_job", + query_params={"projection": "full", "location": "us-east1"}, timeout=7.5, ) @@ -3049,7 +3055,11 @@ def test_cancel_job_hit(self): QUERY = "SELECT * from test_dataset:test_table" QUERY_JOB_RESOURCE = { "id": "{}:{}".format(self.PROJECT, JOB_ID), - "jobReference": {"projectId": self.PROJECT, "jobId": "query_job"}, + "jobReference": { + "projectId": "job-based-proj", + "jobId": "query_job", + "location": "asia-northeast1", + }, "state": "RUNNING", "configuration": {"query": {"query": QUERY}}, } @@ -3057,17 +3067,20 @@ def test_cancel_job_hit(self): creds = _make_credentials() client = self._make_one(self.PROJECT, creds) conn = client._connection = make_connection(RESOURCE) + job_from_resource = QueryJob.from_api_repr(QUERY_JOB_RESOURCE, client) - job = client.cancel_job(JOB_ID) + job = client.cancel_job(job_from_resource) self.assertIsInstance(job, QueryJob) self.assertEqual(job.job_id, JOB_ID) + self.assertEqual(job.project, "job-based-proj") + self.assertEqual(job.location, "asia-northeast1") self.assertEqual(job.query, QUERY) conn.api_request.assert_called_once_with( method="POST", - path="/projects/PROJECT/jobs/query_job/cancel", - query_params={"projection": "full"}, + path="/projects/job-based-proj/jobs/query_job/cancel", + query_params={"projection": "full", "location": "asia-northeast1"}, timeout=None, ) From 1c2f436cd81351be1bdbd930f2c0e2b08352b5f5 Mon Sep 17 00:00:00 2001 From: Peter Lamut Date: Fri, 16 Apr 2021 17:19:36 +0200 Subject: [PATCH 1121/2016] feat: DB API cursors are now iterable (#618) * feat: make DB API Cursors iterable * Raise error if obtaining iterator of closed Cursor --- .../google/cloud/bigquery/dbapi/_helpers.py | 2 +- .../google/cloud/bigquery/dbapi/cursor.py | 4 ++++ .../tests/unit/test_dbapi_cursor.py | 24 +++++++++++++++++++ 3 files changed, 29 insertions(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/dbapi/_helpers.py b/packages/google-cloud-bigquery/google/cloud/bigquery/dbapi/_helpers.py index 69694c98c0cd..beb3c5e712cb 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/dbapi/_helpers.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/dbapi/_helpers.py @@ -276,7 +276,7 @@ def decorate_public_methods(klass): """Apply ``_raise_on_closed()`` decorator to public instance methods. """ for name in dir(klass): - if name.startswith("_"): + if name.startswith("_") and name != "__iter__": continue member = getattr(klass, name) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/dbapi/cursor.py b/packages/google-cloud-bigquery/google/cloud/bigquery/dbapi/cursor.py index ee09158d8842..7e5449718478 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/dbapi/cursor.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/dbapi/cursor.py @@ -365,6 +365,10 @@ def setinputsizes(self, sizes): def setoutputsize(self, size, column=None): """No-op, but for consistency raise an error if cursor is closed.""" + def __iter__(self): + self._try_fetch() + return iter(self._query_data) + def _format_operation_list(operation, parameters): """Formats parameters in operation in the way BigQuery expects. diff --git a/packages/google-cloud-bigquery/tests/unit/test_dbapi_cursor.py b/packages/google-cloud-bigquery/tests/unit/test_dbapi_cursor.py index 0f44e3895f9e..8ca4e9b6cc8f 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_dbapi_cursor.py +++ b/packages/google-cloud-bigquery/tests/unit/test_dbapi_cursor.py @@ -178,6 +178,7 @@ def test_raises_error_if_closed(self): "fetchone", "setinputsizes", "setoutputsize", + "__iter__", ) for method in method_names: @@ -611,6 +612,29 @@ def test_executemany_w_dml(self): self.assertIsNone(cursor.description) self.assertEqual(cursor.rowcount, 12) + def test_is_iterable(self): + from google.cloud.bigquery import dbapi + + connection = dbapi.connect( + self._mock_client(rows=[("hello", "there", 7), ("good", "bye", -3)]) + ) + cursor = connection.cursor() + cursor.execute("SELECT foo, bar, baz FROM hello_world WHERE baz < 42;") + + rows_iter = iter(cursor) + + row = next(rows_iter) + self.assertEqual(row, ("hello", "there", 7)) + row = next(rows_iter) + self.assertEqual(row, ("good", "bye", -3)) + self.assertRaises(StopIteration, next, rows_iter) + + self.assertEqual( + list(cursor), + [], + "Iterating again over the same results should produce no rows.", + ) + def test__format_operation_w_dict(self): from google.cloud.bigquery.dbapi import cursor From 83174ed8dfbb53830c1efc19fae1146bc30a17e5 Mon Sep 17 00:00:00 2001 From: Peter Lamut Date: Fri, 16 Apr 2021 17:52:07 +0200 Subject: [PATCH 1122/2016] fix: consistent percents handling in DB API query (#619) Fixes #608. Percents in the query string are now always de-escaped, regardless of whether any query parameters are passed or not. In addition, misformatting placeholders that don't match parameter values now consistently raise `ProgrammingError`. **PR checklist:** - [x] Make sure to open an issue as a [bug/issue](https://github.com/googleapis/python-bigquery/issues/new/choose) before writing your code! That way we can discuss the change, evaluate designs, and agree on the general idea - [x] Ensure the tests and linter pass - [x] Code coverage does not decrease (if any source code was changed) - [x] Appropriate docs were updated (if necessary) --- .../google/cloud/bigquery/dbapi/cursor.py | 6 +-- .../tests/unit/test_dbapi_cursor.py | 53 +++++++++++++++++++ 2 files changed, 56 insertions(+), 3 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/dbapi/cursor.py b/packages/google-cloud-bigquery/google/cloud/bigquery/dbapi/cursor.py index 7e5449718478..ca78d3907ed8 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/dbapi/cursor.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/dbapi/cursor.py @@ -393,7 +393,7 @@ def _format_operation_list(operation, parameters): try: return operation % tuple(formatted_params) - except TypeError as exc: + except (TypeError, ValueError) as exc: raise exceptions.ProgrammingError(exc) @@ -423,7 +423,7 @@ def _format_operation_dict(operation, parameters): try: return operation % formatted_params - except KeyError as exc: + except (KeyError, ValueError, TypeError) as exc: raise exceptions.ProgrammingError(exc) @@ -445,7 +445,7 @@ def _format_operation(operation, parameters=None): ``parameters`` argument. """ if parameters is None or len(parameters) == 0: - return operation + return operation.replace("%%", "%") # Still do percent de-escaping. if isinstance(parameters, collections_abc.Mapping): return _format_operation_dict(operation, parameters) diff --git a/packages/google-cloud-bigquery/tests/unit/test_dbapi_cursor.py b/packages/google-cloud-bigquery/tests/unit/test_dbapi_cursor.py index 8ca4e9b6cc8f..039ef3b4c41e 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_dbapi_cursor.py +++ b/packages/google-cloud-bigquery/tests/unit/test_dbapi_cursor.py @@ -657,6 +657,14 @@ def test__format_operation_w_wrong_dict(self): {"somevalue-not-here": "hi", "othervalue": "world"}, ) + def test__format_operation_w_redundant_dict_key(self): + from google.cloud.bigquery.dbapi import cursor + + formatted_operation = cursor._format_operation( + "SELECT %(somevalue)s;", {"somevalue": "foo", "value-not-used": "bar"} + ) + self.assertEqual(formatted_operation, "SELECT @`somevalue`;") + def test__format_operation_w_sequence(self): from google.cloud.bigquery.dbapi import cursor @@ -676,8 +684,53 @@ def test__format_operation_w_too_short_sequence(self): ("hello",), ) + def test__format_operation_w_too_long_sequence(self): + from google.cloud.bigquery import dbapi + from google.cloud.bigquery.dbapi import cursor + + self.assertRaises( + dbapi.ProgrammingError, + cursor._format_operation, + "SELECT %s, %s;", + ("hello", "world", "everyone"), + ) + def test__format_operation_w_empty_dict(self): from google.cloud.bigquery.dbapi import cursor formatted_operation = cursor._format_operation("SELECT '%f'", {}) self.assertEqual(formatted_operation, "SELECT '%f'") + + def test__format_operation_wo_params_single_percent(self): + from google.cloud.bigquery.dbapi import cursor + + formatted_operation = cursor._format_operation("SELECT '%'", {}) + self.assertEqual(formatted_operation, "SELECT '%'") + + def test__format_operation_wo_params_double_percents(self): + from google.cloud.bigquery.dbapi import cursor + + formatted_operation = cursor._format_operation("SELECT '%%'", {}) + self.assertEqual(formatted_operation, "SELECT '%'") + + def test__format_operation_unescaped_percent_w_dict_param(self): + from google.cloud.bigquery import dbapi + from google.cloud.bigquery.dbapi import cursor + + self.assertRaises( + dbapi.ProgrammingError, + cursor._format_operation, + "SELECT %(foo)s, '100 %';", + {"foo": "bar"}, + ) + + def test__format_operation_unescaped_percent_w_list_param(self): + from google.cloud.bigquery import dbapi + from google.cloud.bigquery.dbapi import cursor + + self.assertRaises( + dbapi.ProgrammingError, + cursor._format_operation, + "SELECT %s, %s, '100 %';", + ["foo", "bar"], + ) From fbefa4bf5a1bf84d79752ad364e943acf8acfa50 Mon Sep 17 00:00:00 2001 From: Peter Lamut Date: Fri, 23 Apr 2021 22:49:42 +0200 Subject: [PATCH 1123/2016] chore: add unit test nox session w/o extras (#623) --- packages/google-cloud-bigquery/noxfile.py | 12 ++++++++++-- .../tests/unit/test__pandas_helpers.py | 1 + .../google-cloud-bigquery/tests/unit/test_client.py | 7 ++++++- 3 files changed, 17 insertions(+), 3 deletions(-) diff --git a/packages/google-cloud-bigquery/noxfile.py b/packages/google-cloud-bigquery/noxfile.py index a738d8c00de9..bde3b990e23b 100644 --- a/packages/google-cloud-bigquery/noxfile.py +++ b/packages/google-cloud-bigquery/noxfile.py @@ -31,6 +31,7 @@ # 'docfx' is excluded since it only needs to run in 'docs-presubmit' nox.options.sessions = [ + "unit_noextras", "unit", "system", "snippets", @@ -42,7 +43,7 @@ ] -def default(session): +def default(session, install_extras=True): """Default unit test session. This is intended to be run **without** an interpreter set, so @@ -65,7 +66,8 @@ def default(session): constraints_path, ) - session.install("-e", ".[all]", "-c", constraints_path) + install_target = ".[all]" if install_extras else "." + session.install("-e", install_target, "-c", constraints_path) session.install("ipython", "-c", constraints_path) @@ -90,6 +92,12 @@ def unit(session): default(session) +@nox.session(python=UNIT_TEST_PYTHON_VERSIONS[-1]) +def unit_noextras(session): + """Run the unit test suite.""" + default(session, install_extras=False) + + @nox.session(python=SYSTEM_TEST_PYTHON_VERSIONS) def system(session): """Run the system test suite.""" diff --git a/packages/google-cloud-bigquery/tests/unit/test__pandas_helpers.py b/packages/google-cloud-bigquery/tests/unit/test__pandas_helpers.py index 43692f4af27c..39a3d845bc6b 100644 --- a/packages/google-cloud-bigquery/tests/unit/test__pandas_helpers.py +++ b/packages/google-cloud-bigquery/tests/unit/test__pandas_helpers.py @@ -1464,6 +1464,7 @@ def test_download_dataframe_row_iterator_dict_sequence_schema(module_under_test) result = next(results_gen) +@pytest.mark.skipif(pandas is None, reason="Requires `pandas`") def test_table_data_listpage_to_dataframe_skips_stop_iteration(module_under_test): dataframe = module_under_test._row_iterator_page_to_dataframe([], [], {}) assert isinstance(dataframe, pandas.DataFrame) diff --git a/packages/google-cloud-bigquery/tests/unit/test_client.py b/packages/google-cloud-bigquery/tests/unit/test_client.py index c5e742c9e750..860f25f35426 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_client.py +++ b/packages/google-cloud-bigquery/tests/unit/test_client.py @@ -65,7 +65,12 @@ from tests.unit.helpers import make_connection PANDAS_MINIUM_VERSION = pkg_resources.parse_version("1.0.0") -PANDAS_INSTALLED_VERSION = pkg_resources.get_distribution("pandas").parsed_version + +if pandas is not None: + PANDAS_INSTALLED_VERSION = pkg_resources.get_distribution("pandas").parsed_version +else: + # Set to less than MIN version. + PANDAS_INSTALLED_VERSION = pkg_resources.parse_version("0.0.0") def _make_credentials(): From d2574e6d92c8142c4d0cb4dcb9f4deef6bdb229a Mon Sep 17 00:00:00 2001 From: Peter Lamut Date: Fri, 23 Apr 2021 22:54:32 +0200 Subject: [PATCH 1124/2016] feat: retry google.auth TransportError by default (#624) --- .../google-cloud-bigquery/google/cloud/bigquery/retry.py | 2 ++ packages/google-cloud-bigquery/tests/unit/test_retry.py | 6 ++++++ 2 files changed, 8 insertions(+) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/retry.py b/packages/google-cloud-bigquery/google/cloud/bigquery/retry.py index 20a8e7b13e1e..5e9075fe1b86 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/retry.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/retry.py @@ -14,6 +14,7 @@ from google.api_core import exceptions from google.api_core import retry +from google.auth import exceptions as auth_exceptions import requests.exceptions @@ -27,6 +28,7 @@ exceptions.InternalServerError, exceptions.BadGateway, requests.exceptions.ConnectionError, + auth_exceptions.TransportError, ) diff --git a/packages/google-cloud-bigquery/tests/unit/test_retry.py b/packages/google-cloud-bigquery/tests/unit/test_retry.py index 318a54d34a8a..0bef1e5e185a 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_retry.py +++ b/packages/google-cloud-bigquery/tests/unit/test_retry.py @@ -51,6 +51,12 @@ def test_w_unstructured_requests_connectionerror(self): exc = requests.exceptions.ConnectionError() self.assertTrue(self._call_fut(exc)) + def test_w_auth_transporterror(self): + from google.auth.exceptions import TransportError + + exc = TransportError("testing") + self.assertTrue(self._call_fut(exc)) + def test_w_unstructured_too_many_requests(self): from google.api_core.exceptions import TooManyRequests From 1bb524ee544deb6b14d63efa384fa4a60d9cd6b0 Mon Sep 17 00:00:00 2001 From: Peter Lamut Date: Fri, 23 Apr 2021 23:49:26 +0200 Subject: [PATCH 1125/2016] feat: add type hints for public methods (#613) * feat: add type hint for public methods * feat: add bigquery-storage in requirement file * feat: add pandas in requirement file * feat: add return type hint * feat: remove pandas import as a string * Use the latest pytype version (2021.4.9) * Silence false import and module attribute errors * Fix misc. pytype warnings and false postiives * Make changes to generated files persistent * Make final cleanup of client.py * Change import ignores to more specific errors * Silence false positive type warning in job config * Silence noisy _helper type warnings * Silence false positives for resumable media code * Add pytype to nox.options.sessions * Hide for-type-check-only imports behind a flag * Remove obsolete skipIf decorator from two tests inspect.signature() was added in Python 3.3, and the library only needs to suppport Python3.6+. * Install dependencies in pytype session This avoids numerous unnecessary import and module attribute errors, rendering lots of pytype directive comments obsolete. * Be more specific about to_dataframe()'s return type * Add missing return type for _get_query_results() * Be more specific about pandas/pyarrow return types * Exclude typing-only imports from coverage checks Co-authored-by: HemangChothani Co-authored-by: Tim Swast --- packages/google-cloud-bigquery/.gitignore | 1 + .../google/cloud/bigquery/_http.py | 3 +- .../google/cloud/bigquery/_pandas_helpers.py | 2 + .../google/cloud/bigquery/client.py | 549 +++++++++++------- .../google/cloud/bigquery/dataset.py | 16 +- .../google/cloud/bigquery/external_config.py | 28 +- .../google/cloud/bigquery/job/base.py | 43 +- .../google/cloud/bigquery/job/extract.py | 2 +- .../google/cloud/bigquery/job/load.py | 2 +- .../google/cloud/bigquery/job/query.py | 55 +- .../bigquery/magics/line_arg_parser/lexer.py | 2 +- .../google/cloud/bigquery/model.py | 10 +- .../google/cloud/bigquery/query.py | 20 +- .../google/cloud/bigquery/routine/routine.py | 22 +- .../google/cloud/bigquery/schema.py | 13 +- .../google/cloud/bigquery/table.py | 75 ++- packages/google-cloud-bigquery/noxfile.py | 11 + .../samples/geography/requirements.txt | 1 + packages/google-cloud-bigquery/setup.cfg | 14 + packages/google-cloud-bigquery/synth.py | 29 + .../unit/test_signature_compatibility.py | 8 - 21 files changed, 575 insertions(+), 331 deletions(-) diff --git a/packages/google-cloud-bigquery/.gitignore b/packages/google-cloud-bigquery/.gitignore index b4243ced74e4..99c3a1444ed2 100644 --- a/packages/google-cloud-bigquery/.gitignore +++ b/packages/google-cloud-bigquery/.gitignore @@ -29,6 +29,7 @@ pip-log.txt .nox .cache .pytest_cache +.pytype # Mac diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/_http.py b/packages/google-cloud-bigquery/google/cloud/bigquery/_http.py index ede26cc7024e..81e7922e6e34 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/_http.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/_http.py @@ -17,8 +17,7 @@ import os import pkg_resources -from google.cloud import _http - +from google.cloud import _http # pytype: disable=import-error from google.cloud.bigquery import __version__ diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/_pandas_helpers.py b/packages/google-cloud-bigquery/google/cloud/bigquery/_pandas_helpers.py index 7553726fa396..e93a99eba010 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/_pandas_helpers.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/_pandas_helpers.py @@ -373,6 +373,7 @@ def augment_schema(dataframe, current_bq_schema): Returns: Optional[Sequence[google.cloud.bigquery.schema.SchemaField]] """ + # pytype: disable=attribute-error augmented_schema = [] unknown_type_fields = [] @@ -406,6 +407,7 @@ def augment_schema(dataframe, current_bq_schema): return None return augmented_schema + # pytype: enable=attribute-error def dataframe_to_arrow(dataframe, bq_schema): diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py index 8211e23a3a67..5aa8608a5316 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py @@ -19,6 +19,7 @@ from collections import abc as collections_abc import copy +import datetime import functools import gzip import io @@ -27,6 +28,7 @@ import math import os import tempfile +from typing import Any, BinaryIO, Dict, Iterable, Optional, Sequence, Tuple, Union import uuid import warnings @@ -35,17 +37,18 @@ except ImportError: # pragma: NO COVER pyarrow = None -from google import resumable_media +from google import resumable_media # type: ignore from google.resumable_media.requests import MultipartUpload from google.resumable_media.requests import ResumableUpload import google.api_core.client_options -import google.api_core.exceptions +import google.api_core.exceptions as core_exceptions from google.api_core.iam import Policy from google.api_core import page_iterator +from google.api_core import retry as retries import google.cloud._helpers -from google.cloud import exceptions -from google.cloud.client import ClientWithProject +from google.cloud import exceptions # pytype: disable=import-error +from google.cloud.client import ClientWithProject # pytype: disable=import-error from google.cloud.bigquery._helpers import _del_sub_prop from google.cloud.bigquery._helpers import _get_sub_prop @@ -59,6 +62,13 @@ from google.cloud.bigquery.dataset import DatasetReference from google.cloud.bigquery.opentelemetry_tracing import create_span from google.cloud.bigquery import job +from google.cloud.bigquery.job import ( + LoadJobConfig, + QueryJob, + QueryJobConfig, + CopyJobConfig, + ExtractJobConfig, +) from google.cloud.bigquery.model import Model from google.cloud.bigquery.model import ModelReference from google.cloud.bigquery.model import _model_arg_to_model_ref @@ -216,8 +226,11 @@ def close(self): self._http.close() def get_service_account_email( - self, project=None, retry=DEFAULT_RETRY, timeout=None - ): + self, + project: str = None, + retry: retries.Retry = DEFAULT_RETRY, + timeout: float = None, + ) -> str: """Get the email address of the project's BigQuery service account Note: @@ -259,8 +272,12 @@ def get_service_account_email( return api_response["email"] def list_projects( - self, max_results=None, page_token=None, retry=DEFAULT_RETRY, timeout=None - ): + self, + max_results: int = None, + page_token: str = None, + retry: retries.Retry = DEFAULT_RETRY, + timeout: float = None, + ) -> page_iterator.Iterator: """List projects for the project associated with this client. See @@ -313,14 +330,14 @@ def api_request(*args, **kwargs): def list_datasets( self, - project=None, - include_all=False, - filter=None, - max_results=None, - page_token=None, - retry=DEFAULT_RETRY, - timeout=None, - ): + project: str = None, + include_all: bool = False, + filter: str = None, + max_results: int = None, + page_token: str = None, + retry: retries.Retry = DEFAULT_RETRY, + timeout: float = None, + ) -> page_iterator.Iterator: """List datasets for the project associated with this client. See @@ -390,7 +407,7 @@ def api_request(*args, **kwargs): extra_params=extra_params, ) - def dataset(self, dataset_id, project=None): + def dataset(self, dataset_id: str, project: str = None) -> DatasetReference: """Deprecated: Construct a reference to a dataset. .. deprecated:: 1.24.0 @@ -466,8 +483,12 @@ def _dataset_from_arg(self, dataset): return dataset def create_dataset( - self, dataset, exists_ok=False, retry=DEFAULT_RETRY, timeout=None - ): + self, + dataset: Union[str, Dataset, DatasetReference], + exists_ok: bool = False, + retry: retries.Retry = DEFAULT_RETRY, + timeout: float = None, + ) -> Dataset: """API call: create the dataset via a POST request. See @@ -531,14 +552,18 @@ def create_dataset( timeout=timeout, ) return Dataset.from_api_repr(api_response) - except google.api_core.exceptions.Conflict: + except core_exceptions.Conflict: if not exists_ok: raise return self.get_dataset(dataset.reference, retry=retry) def create_routine( - self, routine, exists_ok=False, retry=DEFAULT_RETRY, timeout=None - ): + self, + routine: Routine, + exists_ok: bool = False, + retry: retries.Retry = DEFAULT_RETRY, + timeout: float = None, + ) -> Routine: """[Beta] Create a routine via a POST request. See @@ -582,12 +607,18 @@ def create_routine( timeout=timeout, ) return Routine.from_api_repr(api_response) - except google.api_core.exceptions.Conflict: + except core_exceptions.Conflict: if not exists_ok: raise return self.get_routine(routine.reference, retry=retry) - def create_table(self, table, exists_ok=False, retry=DEFAULT_RETRY, timeout=None): + def create_table( + self, + table: Union[str, Table, TableReference], + exists_ok: bool = False, + retry: retries.Retry = DEFAULT_RETRY, + timeout: float = None, + ) -> Table: """API call: create a table via a PUT request See @@ -636,7 +667,7 @@ def create_table(self, table, exists_ok=False, retry=DEFAULT_RETRY, timeout=None timeout=timeout, ) return Table.from_api_repr(api_response) - except google.api_core.exceptions.Conflict: + except core_exceptions.Conflict: if not exists_ok: raise return self.get_table(table.reference, retry=retry) @@ -654,7 +685,12 @@ def _call_api( return call() return call() - def get_dataset(self, dataset_ref, retry=DEFAULT_RETRY, timeout=None): + def get_dataset( + self, + dataset_ref: Union[DatasetReference, str], + retry: retries.Retry = DEFAULT_RETRY, + timeout: float = None, + ) -> Dataset: """Fetch the dataset referenced by ``dataset_ref`` Args: @@ -693,8 +729,12 @@ def get_dataset(self, dataset_ref, retry=DEFAULT_RETRY, timeout=None): return Dataset.from_api_repr(api_response) def get_iam_policy( - self, table, requested_policy_version=1, retry=DEFAULT_RETRY, timeout=None, - ): + self, + table: Union[Table, TableReference], + requested_policy_version: int = 1, + retry: retries.Retry = DEFAULT_RETRY, + timeout: float = None, + ) -> Policy: if not isinstance(table, (Table, TableReference)): raise TypeError("table must be a Table or TableReference") @@ -718,8 +758,13 @@ def get_iam_policy( return Policy.from_api_repr(response) def set_iam_policy( - self, table, policy, updateMask=None, retry=DEFAULT_RETRY, timeout=None, - ): + self, + table: Union[Table, TableReference], + policy: Policy, + updateMask: str = None, + retry: retries.Retry = DEFAULT_RETRY, + timeout: float = None, + ) -> Policy: if not isinstance(table, (Table, TableReference)): raise TypeError("table must be a Table or TableReference") @@ -747,8 +792,12 @@ def set_iam_policy( return Policy.from_api_repr(response) def test_iam_permissions( - self, table, permissions, retry=DEFAULT_RETRY, timeout=None, - ): + self, + table: Union[Table, TableReference], + permissions: Sequence[str], + retry: retries.Retry = DEFAULT_RETRY, + timeout: float = None, + ) -> Dict[str, Any]: if not isinstance(table, (Table, TableReference)): raise TypeError("table must be a Table or TableReference") @@ -768,7 +817,12 @@ def test_iam_permissions( return response - def get_model(self, model_ref, retry=DEFAULT_RETRY, timeout=None): + def get_model( + self, + model_ref: Union[ModelReference, str], + retry: retries.Retry = DEFAULT_RETRY, + timeout: float = None, + ) -> Model: """[Beta] Fetch the model referenced by ``model_ref``. Args: @@ -806,7 +860,12 @@ def get_model(self, model_ref, retry=DEFAULT_RETRY, timeout=None): ) return Model.from_api_repr(api_response) - def get_routine(self, routine_ref, retry=DEFAULT_RETRY, timeout=None): + def get_routine( + self, + routine_ref: Union[Routine, RoutineReference, str], + retry: retries.Retry = DEFAULT_RETRY, + timeout: float = None, + ) -> Routine: """[Beta] Get the routine referenced by ``routine_ref``. Args: @@ -845,7 +904,12 @@ def get_routine(self, routine_ref, retry=DEFAULT_RETRY, timeout=None): ) return Routine.from_api_repr(api_response) - def get_table(self, table, retry=DEFAULT_RETRY, timeout=None): + def get_table( + self, + table: Union[Table, TableReference, str], + retry: retries.Retry = DEFAULT_RETRY, + timeout: float = None, + ) -> Table: """Fetch the table referenced by ``table``. Args: @@ -881,7 +945,13 @@ def get_table(self, table, retry=DEFAULT_RETRY, timeout=None): ) return Table.from_api_repr(api_response) - def update_dataset(self, dataset, fields, retry=DEFAULT_RETRY, timeout=None): + def update_dataset( + self, + dataset: Dataset, + fields: Sequence[str], + retry: retries.Retry = DEFAULT_RETRY, + timeout: float = None, + ) -> Dataset: """Change some fields of a dataset. Use ``fields`` to specify which fields to update. At least one field @@ -945,7 +1015,13 @@ def update_dataset(self, dataset, fields, retry=DEFAULT_RETRY, timeout=None): ) return Dataset.from_api_repr(api_response) - def update_model(self, model, fields, retry=DEFAULT_RETRY, timeout=None): + def update_model( + self, + model: Model, + fields: Sequence[str], + retry: retries.Retry = DEFAULT_RETRY, + timeout: float = None, + ) -> Model: """[Beta] Change some fields of a model. Use ``fields`` to specify which fields to update. At least one field @@ -1003,7 +1079,13 @@ def update_model(self, model, fields, retry=DEFAULT_RETRY, timeout=None): ) return Model.from_api_repr(api_response) - def update_routine(self, routine, fields, retry=DEFAULT_RETRY, timeout=None): + def update_routine( + self, + routine: Routine, + fields: Sequence[str], + retry: retries.Retry = DEFAULT_RETRY, + timeout: float = None, + ) -> Routine: """[Beta] Change some fields of a routine. Use ``fields`` to specify which fields to update. At least one field @@ -1071,7 +1153,13 @@ def update_routine(self, routine, fields, retry=DEFAULT_RETRY, timeout=None): ) return Routine.from_api_repr(api_response) - def update_table(self, table, fields, retry=DEFAULT_RETRY, timeout=None): + def update_table( + self, + table: Table, + fields: Sequence[str], + retry: retries.Retry = DEFAULT_RETRY, + timeout: float = None, + ) -> Table: """Change some fields of a table. Use ``fields`` to specify which fields to update. At least one field @@ -1132,12 +1220,12 @@ def update_table(self, table, fields, retry=DEFAULT_RETRY, timeout=None): def list_models( self, - dataset, - max_results=None, - page_token=None, - retry=DEFAULT_RETRY, - timeout=None, - ): + dataset: Union[Dataset, DatasetReference, str], + max_results: int = None, + page_token: str = None, + retry: retries.Retry = DEFAULT_RETRY, + timeout: float = None, + ) -> page_iterator.Iterator: """[Beta] List models in the dataset. See @@ -1204,12 +1292,12 @@ def api_request(*args, **kwargs): def list_routines( self, - dataset, - max_results=None, - page_token=None, - retry=DEFAULT_RETRY, - timeout=None, - ): + dataset: Union[Dataset, DatasetReference, str], + max_results: int = None, + page_token: str = None, + retry: retries.Retry = DEFAULT_RETRY, + timeout: float = None, + ) -> page_iterator.Iterator: """[Beta] List routines in the dataset. See @@ -1276,12 +1364,12 @@ def api_request(*args, **kwargs): def list_tables( self, - dataset, - max_results=None, - page_token=None, - retry=DEFAULT_RETRY, - timeout=None, - ): + dataset: Union[Dataset, DatasetReference, str], + max_results: int = None, + page_token: str = None, + retry: retries.Retry = DEFAULT_RETRY, + timeout: float = None, + ) -> page_iterator.Iterator: """List tables in the dataset. See @@ -1347,12 +1435,12 @@ def api_request(*args, **kwargs): def delete_dataset( self, - dataset, - delete_contents=False, - retry=DEFAULT_RETRY, - timeout=None, - not_found_ok=False, - ): + dataset: Union[Dataset, DatasetReference, str], + delete_contents: bool = False, + retry: retries.Retry = DEFAULT_RETRY, + timeout: float = None, + not_found_ok: bool = False, + ) -> None: """Delete a dataset. See @@ -1401,13 +1489,17 @@ def delete_dataset( query_params=params, timeout=timeout, ) - except google.api_core.exceptions.NotFound: + except core_exceptions.NotFound: if not not_found_ok: raise def delete_model( - self, model, retry=DEFAULT_RETRY, timeout=None, not_found_ok=False - ): + self, + model: Union[Model, ModelReference, str], + retry: retries.Retry = DEFAULT_RETRY, + timeout: float = None, + not_found_ok: bool = False, + ) -> None: """[Beta] Delete a model See @@ -1449,13 +1541,17 @@ def delete_model( path=path, timeout=timeout, ) - except google.api_core.exceptions.NotFound: + except core_exceptions.NotFound: if not not_found_ok: raise def delete_routine( - self, routine, retry=DEFAULT_RETRY, timeout=None, not_found_ok=False - ): + self, + routine: Union[Routine, RoutineReference, str], + retry: retries.Retry = DEFAULT_RETRY, + timeout: float = None, + not_found_ok: bool = False, + ) -> None: """[Beta] Delete a routine. See @@ -1499,13 +1595,17 @@ def delete_routine( path=path, timeout=timeout, ) - except google.api_core.exceptions.NotFound: + except core_exceptions.NotFound: if not not_found_ok: raise def delete_table( - self, table, retry=DEFAULT_RETRY, timeout=None, not_found_ok=False - ): + self, + table: Union[Table, TableReference, str], + retry: retries.Retry = DEFAULT_RETRY, + timeout: float = None, + not_found_ok: bool = False, + ) -> None: """Delete a table See @@ -1545,13 +1645,19 @@ def delete_table( path=path, timeout=timeout, ) - except google.api_core.exceptions.NotFound: + except core_exceptions.NotFound: if not not_found_ok: raise def _get_query_results( - self, job_id, retry, project=None, timeout_ms=None, location=None, timeout=None, - ): + self, + job_id: str, + retry: retries.Retry, + project: str = None, + timeout_ms: int = None, + location: str = None, + timeout: float = None, + ) -> _QueryResults: """Get the query results object for a query job. Args: @@ -1609,7 +1715,7 @@ def _get_query_results( ) return _QueryResults.from_api_repr(resource) - def job_from_resource(self, resource): + def job_from_resource(self, resource: dict) -> job.UnknownJob: """Detect correct job type from resource and instantiate. Args: @@ -1635,7 +1741,12 @@ def job_from_resource(self, resource): return job.QueryJob.from_api_repr(resource, self) return job.UnknownJob.from_api_repr(resource, self) - def create_job(self, job_config, retry=DEFAULT_RETRY, timeout=None): + def create_job( + self, + job_config: dict, + retry: retries.Retry = DEFAULT_RETRY, + timeout: float = None, + ) -> Union[job.LoadJob, job.CopyJob, job.ExtractJob, job.QueryJob]: """Create a new job. Args: job_config (dict): configuration job representation returned from the API. @@ -1726,8 +1837,13 @@ def create_job(self, job_config, retry=DEFAULT_RETRY, timeout=None): raise TypeError("Invalid job configuration received.") def get_job( - self, job_id, project=None, location=None, retry=DEFAULT_RETRY, timeout=None - ): + self, + job_id: str, + project: str = None, + location: str = None, + retry: retries.Retry = DEFAULT_RETRY, + timeout: float = None, + ) -> Union[job.LoadJob, job.CopyJob, job.ExtractJob, job.QueryJob]: """Fetch a job for the project associated with this client. See @@ -1795,8 +1911,13 @@ def get_job( return self.job_from_resource(resource) def cancel_job( - self, job_id, project=None, location=None, retry=DEFAULT_RETRY, timeout=None - ): + self, + job_id: str, + project: str = None, + location: str = None, + retry: retries.Retry = DEFAULT_RETRY, + timeout: float = None, + ) -> Union[job.LoadJob, job.CopyJob, job.ExtractJob, job.QueryJob]: """Attempt to cancel a job from a job ID. See @@ -1865,17 +1986,17 @@ def cancel_job( def list_jobs( self, - project=None, - parent_job=None, - max_results=None, - page_token=None, - all_users=None, - state_filter=None, - retry=DEFAULT_RETRY, - timeout=None, - min_creation_time=None, - max_creation_time=None, - ): + project: str = None, + parent_job: Optional[Union[QueryJob, str]] = None, + max_results: int = None, + page_token: str = None, + all_users: bool = None, + state_filter: str = None, + retry: retries.Retry = DEFAULT_RETRY, + timeout: float = None, + min_creation_time: datetime.datetime = None, + max_creation_time: datetime.datetime = None, + ) -> page_iterator.Iterator: """List jobs for the project associated with this client. See @@ -1926,7 +2047,7 @@ def list_jobs( Iterable of job instances. """ if isinstance(parent_job, job._AsyncJob): - parent_job = parent_job.job_id + parent_job = parent_job.job_id # pytype: disable=attribute-error extra_params = { "allUsers": all_users, @@ -1975,16 +2096,16 @@ def api_request(*args, **kwargs): def load_table_from_uri( self, - source_uris, - destination, - job_id=None, - job_id_prefix=None, - location=None, - project=None, - job_config=None, - retry=DEFAULT_RETRY, - timeout=None, - ): + source_uris: Union[str, Sequence[str]], + destination: Union[Table, TableReference, str], + job_id: str = None, + job_id_prefix: str = None, + location: str = None, + project: str = None, + job_config: LoadJobConfig = None, + retry: retries.Retry = DEFAULT_RETRY, + timeout: float = None, + ) -> job.LoadJob: """Starts a job for loading data into a table from CloudStorage. See @@ -2057,18 +2178,18 @@ def load_table_from_uri( def load_table_from_file( self, - file_obj, - destination, - rewind=False, - size=None, - num_retries=_DEFAULT_NUM_RETRIES, - job_id=None, - job_id_prefix=None, - location=None, - project=None, - job_config=None, - timeout=None, - ): + file_obj: BinaryIO, + destination: Union[Table, TableReference, str], + rewind: bool = False, + size: int = None, + num_retries: int = _DEFAULT_NUM_RETRIES, + job_id: str = None, + job_id_prefix: str = None, + location: str = None, + project: str = None, + job_config: LoadJobConfig = None, + timeout: float = None, + ) -> job.LoadJob: """Upload the contents of this table from a file-like object. Similar to :meth:`load_table_from_uri`, this method creates, starts and @@ -2162,16 +2283,16 @@ def load_table_from_file( def load_table_from_dataframe( self, dataframe, - destination, - num_retries=_DEFAULT_NUM_RETRIES, - job_id=None, - job_id_prefix=None, - location=None, - project=None, - job_config=None, - parquet_compression="snappy", - timeout=None, - ): + destination: Union[Table, TableReference, str], + num_retries: int = _DEFAULT_NUM_RETRIES, + job_id: str = None, + job_id_prefix: str = None, + location: str = None, + project: str = None, + job_config: LoadJobConfig = None, + parquet_compression: str = "snappy", + timeout: float = None, + ) -> job.LoadJob: """Upload the contents of a table from a pandas DataFrame. Similar to :meth:`load_table_from_uri`, this method creates, starts and @@ -2299,7 +2420,7 @@ def load_table_from_dataframe( ): try: table = self.get_table(destination) - except google.api_core.exceptions.NotFound: + except core_exceptions.NotFound: table = None else: columns_and_indexes = frozenset( @@ -2388,16 +2509,16 @@ def load_table_from_dataframe( def load_table_from_json( self, - json_rows, - destination, - num_retries=_DEFAULT_NUM_RETRIES, - job_id=None, - job_id_prefix=None, - location=None, - project=None, - job_config=None, - timeout=None, - ): + json_rows: Iterable[Dict[str, Any]], + destination: Union[Table, TableReference, str], + num_retries: int = _DEFAULT_NUM_RETRIES, + job_id: str = None, + job_id_prefix: str = None, + location: str = None, + project: str = None, + job_config: LoadJobConfig = None, + timeout: float = None, + ) -> job.LoadJob: """Upload the contents of a table from a JSON string or dict. Args: @@ -2669,16 +2790,18 @@ def _do_multipart_upload( def copy_table( self, - sources, - destination, - job_id=None, - job_id_prefix=None, - location=None, - project=None, - job_config=None, - retry=DEFAULT_RETRY, - timeout=None, - ): + sources: Union[ + Table, TableReference, str, Sequence[Union[Table, TableReference, str]] + ], + destination: Union[Table, TableReference, str], + job_id: str = None, + job_id_prefix: str = None, + location: str = None, + project: str = None, + job_config: CopyJobConfig = None, + retry: retries.Retry = DEFAULT_RETRY, + timeout: float = None, + ) -> job.CopyJob: """Copy one or more tables to another table. See @@ -2772,17 +2895,17 @@ def copy_table( def extract_table( self, - source, - destination_uris, - job_id=None, - job_id_prefix=None, - location=None, - project=None, - job_config=None, - retry=DEFAULT_RETRY, - timeout=None, - source_type="Table", - ): + source: Union[Table, TableReference, Model, ModelReference, str], + destination_uris: Union[str, Sequence[str]], + job_id: str = None, + job_id_prefix: str = None, + location: str = None, + project: str = None, + job_config: ExtractJobConfig = None, + retry: retries.Retry = DEFAULT_RETRY, + timeout: float = None, + source_type: str = "Table", + ) -> job.ExtractJob: """Start a job to extract a table into Cloud Storage files. See @@ -2871,15 +2994,15 @@ def extract_table( def query( self, - query, - job_config=None, - job_id=None, - job_id_prefix=None, - location=None, - project=None, - retry=DEFAULT_RETRY, - timeout=None, - ): + query: str, + job_config: QueryJobConfig = None, + job_id: str = None, + job_id_prefix: str = None, + location: str = None, + project: str = None, + retry: retries.Retry = DEFAULT_RETRY, + timeout: float = None, + ) -> job.QueryJob: """Run a SQL query. See @@ -2956,7 +3079,13 @@ def query( return query_job - def insert_rows(self, table, rows, selected_fields=None, **kwargs): + def insert_rows( + self, + table: Union[Table, TableReference, str], + rows: Union[Iterable[Tuple], Iterable[Dict]], + selected_fields: Sequence[SchemaField] = None, + **kwargs: dict, + ) -> Sequence[dict]: """Insert rows into a table via the streaming API. See @@ -2979,7 +3108,7 @@ def insert_rows(self, table, rows, selected_fields=None, **kwargs): selected_fields (Sequence[google.cloud.bigquery.schema.SchemaField]): The fields to return. Required if ``table`` is a :class:`~google.cloud.bigquery.table.TableReference`. - kwargs (Dict): + kwargs (dict): Keyword arguments to :meth:`~google.cloud.bigquery.client.Client.insert_rows_json`. @@ -3019,8 +3148,13 @@ def insert_rows(self, table, rows, selected_fields=None, **kwargs): return self.insert_rows_json(table, json_rows, **kwargs) def insert_rows_from_dataframe( - self, table, dataframe, selected_fields=None, chunk_size=500, **kwargs - ): + self, + table: Union[Table, TableReference, str], + dataframe, + selected_fields: Sequence[SchemaField] = None, + chunk_size: int = 500, + **kwargs: Dict, + ) -> Sequence[Sequence[dict]]: """Insert rows into a table from a dataframe via the streaming API. Args: @@ -3068,15 +3202,15 @@ def insert_rows_from_dataframe( def insert_rows_json( self, - table, - json_rows, - row_ids=None, - skip_invalid_rows=None, - ignore_unknown_values=None, - template_suffix=None, - retry=DEFAULT_RETRY, - timeout=None, - ): + table: Union[Table, TableReference, str], + json_rows: Sequence[Dict], + row_ids: Sequence[str] = None, + skip_invalid_rows: bool = None, + ignore_unknown_values: bool = None, + template_suffix: str = None, + retry: retries.Retry = DEFAULT_RETRY, + timeout: float = None, + ) -> Sequence[dict]: """Insert rows into a table without applying local type conversions. See @@ -3172,7 +3306,12 @@ def insert_rows_json( return errors - def list_partitions(self, table, retry=DEFAULT_RETRY, timeout=None): + def list_partitions( + self, + table: Union[Table, TableReference, str], + retry: retries.Retry = DEFAULT_RETRY, + timeout: float = None, + ) -> Sequence[str]: """List the partitions in a table. Args: @@ -3214,15 +3353,15 @@ def list_partitions(self, table, retry=DEFAULT_RETRY, timeout=None): def list_rows( self, - table, - selected_fields=None, - max_results=None, - page_token=None, - start_index=None, - page_size=None, - retry=DEFAULT_RETRY, - timeout=None, - ): + table: Union[Table, TableListItem, TableReference, str], + selected_fields: Sequence[SchemaField] = None, + max_results: int = None, + page_token: str = None, + start_index: int = None, + page_size: int = None, + retry: retries.Retry = DEFAULT_RETRY, + timeout: float = None, + ) -> RowIterator: """List the rows of the table. See @@ -3323,18 +3462,18 @@ def list_rows( def _list_rows_from_query_results( self, - job_id, - location, - project, - schema, - total_rows=None, - destination=None, - max_results=None, - start_index=None, - page_size=None, - retry=DEFAULT_RETRY, - timeout=None, - ): + job_id: str, + location: str, + project: str, + schema: SchemaField, + total_rows: int = None, + destination: Union[Table, TableReference, TableListItem, str] = None, + max_results: int = None, + start_index: int = None, + page_size: int = None, + retry: retries.Retry = DEFAULT_RETRY, + timeout: float = None, + ) -> RowIterator: """List the rows of a completed query. See https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs/getQueryResults @@ -3419,7 +3558,7 @@ def _schema_to_json_file_object(self, schema_list, file_obj): """ json.dump(schema_list, file_obj, indent=2, sort_keys=True) - def schema_from_json(self, file_or_path): + def schema_from_json(self, file_or_path: Union[str, BinaryIO]): """Takes a file object or file path that contains json that describes a table schema. @@ -3432,7 +3571,9 @@ def schema_from_json(self, file_or_path): with open(file_or_path) as file_obj: return self._schema_from_json_file_object(file_obj) - def schema_to_json(self, schema_list, destination): + def schema_to_json( + self, schema_list: Sequence[SchemaField], destination: Union[str, BinaryIO] + ): """Takes a list of schema field objects. Serializes the list of schema field objects as json to a file. @@ -3606,7 +3747,7 @@ def _check_mode(stream): mode = getattr(stream, "mode", None) if isinstance(stream, gzip.GzipFile): - if mode != gzip.READ: + if mode != gzip.READ: # pytype: disable=module-attr raise ValueError( "Cannot upload gzip files opened in write mode: use " "gzip.GzipFile(filename, mode='rb')" diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/dataset.py b/packages/google-cloud-bigquery/google/cloud/bigquery/dataset.py index 2d3a4755f260..21e56f305d73 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/dataset.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/dataset.py @@ -220,7 +220,7 @@ def to_api_repr(self): return resource @classmethod - def from_api_repr(cls, resource): + def from_api_repr(cls, resource: dict) -> "AccessEntry": """Factory: construct an access entry given its API representation Args: @@ -288,7 +288,7 @@ def path(self): routine = _get_routine_reference @classmethod - def from_api_repr(cls, resource): + def from_api_repr(cls, resource: dict) -> "DatasetReference": """Factory: construct a dataset reference given its API representation Args: @@ -304,7 +304,9 @@ def from_api_repr(cls, resource): return cls(project, dataset_id) @classmethod - def from_string(cls, dataset_id, default_project=None): + def from_string( + cls, dataset_id: str, default_project: str = None + ) -> "DatasetReference": """Construct a dataset reference from dataset ID string. Args: @@ -350,7 +352,7 @@ def from_string(cls, dataset_id, default_project=None): return cls(output_project_id, output_dataset_id) - def to_api_repr(self): + def to_api_repr(self) -> dict: """Construct the API resource representation of this dataset reference Returns: @@ -640,7 +642,7 @@ def default_encryption_configuration(self, value): self._properties["defaultEncryptionConfiguration"] = api_repr @classmethod - def from_string(cls, full_dataset_id): + def from_string(cls, full_dataset_id: str) -> "Dataset": """Construct a dataset from fully-qualified dataset ID. Args: @@ -664,7 +666,7 @@ def from_string(cls, full_dataset_id): return cls(DatasetReference.from_string(full_dataset_id)) @classmethod - def from_api_repr(cls, resource): + def from_api_repr(cls, resource: dict) -> "Dataset": """Factory: construct a dataset given its API representation Args: @@ -689,7 +691,7 @@ def from_api_repr(cls, resource): dataset._properties = copy.deepcopy(resource) return dataset - def to_api_repr(self): + def to_api_repr(self) -> dict: """Construct the API resource representation of this dataset Returns: diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/external_config.py b/packages/google-cloud-bigquery/google/cloud/bigquery/external_config.py index 59e4960f9851..ef4d569fa8e5 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/external_config.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/external_config.py @@ -149,7 +149,7 @@ def type_(self): def type_(self, value): self._properties["type"] = value - def to_api_repr(self): + def to_api_repr(self) -> dict: """Build an API representation of this object. Returns: @@ -159,7 +159,7 @@ def to_api_repr(self): return copy.deepcopy(self._properties) @classmethod - def from_api_repr(cls, resource): + def from_api_repr(cls, resource: dict) -> "BigtableColumn": """Factory: construct a :class:`~.external_config.BigtableColumn` instance given its API representation. @@ -251,7 +251,7 @@ def columns(self): def columns(self, value): self._properties["columns"] = [col.to_api_repr() for col in value] - def to_api_repr(self): + def to_api_repr(self) -> dict: """Build an API representation of this object. Returns: @@ -261,7 +261,7 @@ def to_api_repr(self): return copy.deepcopy(self._properties) @classmethod - def from_api_repr(cls, resource): + def from_api_repr(cls, resource: dict) -> "BigtableColumnFamily": """Factory: construct a :class:`~.external_config.BigtableColumnFamily` instance given its API representation. @@ -333,7 +333,7 @@ def column_families(self): def column_families(self, value): self._properties["columnFamilies"] = [cf.to_api_repr() for cf in value] - def to_api_repr(self): + def to_api_repr(self) -> dict: """Build an API representation of this object. Returns: @@ -343,7 +343,7 @@ def to_api_repr(self): return copy.deepcopy(self._properties) @classmethod - def from_api_repr(cls, resource): + def from_api_repr(cls, resource: dict) -> "BigtableOptions": """Factory: construct a :class:`~.external_config.BigtableOptions` instance given its API representation. @@ -450,7 +450,7 @@ def skip_leading_rows(self): def skip_leading_rows(self, value): self._properties["skipLeadingRows"] = str(value) - def to_api_repr(self): + def to_api_repr(self) -> dict: """Build an API representation of this object. Returns: @@ -459,7 +459,7 @@ def to_api_repr(self): return copy.deepcopy(self._properties) @classmethod - def from_api_repr(cls, resource): + def from_api_repr(cls, resource: dict) -> "CSVOptions": """Factory: construct a :class:`~.external_config.CSVOptions` instance given its API representation. @@ -513,7 +513,7 @@ def range(self): def range(self, value): self._properties["range"] = value - def to_api_repr(self): + def to_api_repr(self) -> dict: """Build an API representation of this object. Returns: @@ -522,7 +522,7 @@ def to_api_repr(self): return copy.deepcopy(self._properties) @classmethod - def from_api_repr(cls, resource): + def from_api_repr(cls, resource: dict) -> "GoogleSheetsOptions": """Factory: construct a :class:`~.external_config.GoogleSheetsOptions` instance given its API representation. @@ -601,7 +601,7 @@ def require_partition_filter(self): def require_partition_filter(self, value): self._properties["requirePartitionFilter"] = value - def to_api_repr(self): + def to_api_repr(self) -> dict: """Build an API representation of this object. Returns: @@ -610,7 +610,7 @@ def to_api_repr(self): return copy.deepcopy(self._properties) @classmethod - def from_api_repr(cls, resource): + def from_api_repr(cls, resource: dict) -> "HivePartitioningOptions": """Factory: construct a :class:`~.external_config.HivePartitioningOptions` instance given its API representation. @@ -784,7 +784,7 @@ def schema(self, value): prop = {"fields": [field.to_api_repr() for field in value]} self._properties["schema"] = prop - def to_api_repr(self): + def to_api_repr(self) -> dict: """Build an API representation of this object. Returns: @@ -799,7 +799,7 @@ def to_api_repr(self): return config @classmethod - def from_api_repr(cls, resource): + def from_api_repr(cls, resource: dict) -> "ExternalConfig": """Factory: construct an :class:`~.external_config.ExternalConfig` instance given its API representation. diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/job/base.py b/packages/google-cloud-bigquery/google/cloud/bigquery/job/base.py index f24e972c8f21..20ad81c0b9e1 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/job/base.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/job/base.py @@ -18,6 +18,7 @@ import copy import http import threading +import typing from google.api_core import exceptions import google.api_core.future.polling @@ -25,6 +26,9 @@ from google.cloud.bigquery import _helpers from google.cloud.bigquery.retry import DEFAULT_RETRY +if typing.TYPE_CHECKING: # pragma: NO COVER + from google.api_core import retry as retries + _DONE_STATE = "DONE" _STOPPED_REASON = "stopped" @@ -466,7 +470,9 @@ def _begin(self, client=None, retry=DEFAULT_RETRY, timeout=None): ) self._set_properties(api_response) - def exists(self, client=None, retry=DEFAULT_RETRY, timeout=None): + def exists( + self, client=None, retry: "retries.Retry" = DEFAULT_RETRY, timeout: float = None + ) -> bool: """API call: test for the existence of the job via a GET request See @@ -509,7 +515,9 @@ def exists(self, client=None, retry=DEFAULT_RETRY, timeout=None): else: return True - def reload(self, client=None, retry=DEFAULT_RETRY, timeout=None): + def reload( + self, client=None, retry: "retries.Retry" = DEFAULT_RETRY, timeout: float = None + ): """API call: refresh job properties via a GET request. See @@ -544,7 +552,9 @@ def reload(self, client=None, retry=DEFAULT_RETRY, timeout=None): ) self._set_properties(api_response) - def cancel(self, client=None, retry=DEFAULT_RETRY, timeout=None): + def cancel( + self, client=None, retry: "retries.Retry" = DEFAULT_RETRY, timeout: float = None + ) -> bool: """API call: cancel job via a POST request See @@ -610,7 +620,12 @@ def _set_future_result(self): else: self.set_result(self) - def done(self, retry=DEFAULT_RETRY, timeout=None, reload=True): + def done( + self, + retry: "retries.Retry" = DEFAULT_RETRY, + timeout: float = None, + reload: bool = True, + ) -> bool: """Checks if the job is complete. Args: @@ -633,7 +648,9 @@ def done(self, retry=DEFAULT_RETRY, timeout=None, reload=True): self.reload(retry=retry, timeout=timeout) return self.state == _DONE_STATE - def result(self, retry=DEFAULT_RETRY, timeout=None): + def result( + self, retry: "retries.Retry" = DEFAULT_RETRY, timeout: float = None + ) -> "_AsyncJob": """Start the job and wait for it to complete and get the result. Args: @@ -788,7 +805,7 @@ def _del_sub_prop(self, key): """ _helpers._del_sub_prop(self._properties, [self._job_type, key]) - def to_api_repr(self): + def to_api_repr(self) -> dict: """Build an API representation of the job config. Returns: @@ -818,7 +835,10 @@ def _fill_from_default(self, default_job_config): + repr(default_job_config._job_type) ) - new_job_config = self.__class__() + # cls is one of the job config subclasses that provides the job_type argument to + # this base class on instantiation, thus missing-parameter warning is a false + # positive here. + new_job_config = self.__class__() # pytype: disable=missing-parameter default_job_properties = copy.deepcopy(default_job_config._properties) for key in self._properties: @@ -831,7 +851,7 @@ def _fill_from_default(self, default_job_config): return new_job_config @classmethod - def from_api_repr(cls, resource): + def from_api_repr(cls, resource: dict) -> "_JobConfig": """Factory: construct a job configuration given its API representation Args: @@ -842,7 +862,10 @@ def from_api_repr(cls, resource): Returns: google.cloud.bigquery.job._JobConfig: Configuration parsed from ``resource``. """ - job_config = cls() + # cls is one of the job config subclasses that provides the job_type argument to + # this base class on instantiation, thus missing-parameter warning is a false + # positive here. + job_config = cls() # pytype: disable=missing-parameter job_config._properties = resource return job_config @@ -929,7 +952,7 @@ class UnknownJob(_AsyncJob): """A job whose type cannot be determined.""" @classmethod - def from_api_repr(cls, resource, client): + def from_api_repr(cls, resource: dict, client) -> "UnknownJob": """Construct an UnknownJob from the JSON representation. Args: diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/job/extract.py b/packages/google-cloud-bigquery/google/cloud/bigquery/job/extract.py index a6e262a32cfe..3373bcdefccb 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/job/extract.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/job/extract.py @@ -241,7 +241,7 @@ def to_api_repr(self): } @classmethod - def from_api_repr(cls, resource, client): + def from_api_repr(cls, resource: dict, client) -> "ExtractJob": """Factory: construct a job given its API representation .. note: diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/job/load.py b/packages/google-cloud-bigquery/google/cloud/bigquery/job/load.py index e784af0a6bb8..b8174af3ea08 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/job/load.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/job/load.py @@ -733,7 +733,7 @@ def to_api_repr(self): } @classmethod - def from_api_repr(cls, resource, client): + def from_api_repr(cls, resource: dict, client) -> "LoadJob": """Factory: construct a job given its API representation .. note: diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/job/query.py b/packages/google-cloud-bigquery/google/cloud/bigquery/job/query.py index 491983f8ebfb..f52f9c621f3d 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/job/query.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/job/query.py @@ -17,6 +17,8 @@ import concurrent.futures import copy import re +import typing +from typing import Any, Dict, Union from google.api_core import exceptions from google.api_core.future import polling as polling_future @@ -46,6 +48,15 @@ from google.cloud.bigquery.job.base import _JobConfig from google.cloud.bigquery.job.base import _JobReference +if typing.TYPE_CHECKING: # pragma: NO COVER + # Assumption: type checks are only used by library developers and CI environments + # that have all optional dependencies installed, thus no conditional imports. + import pandas + import pyarrow + from google.api_core import retry as retries + from google.cloud import bigquery_storage + from google.cloud.bigquery.table import RowIterator + _CONTAINS_ORDER_BY = re.compile(r"ORDER\s+BY", re.IGNORECASE) _TIMEOUT_BUFFER_SECS = 0.1 @@ -491,7 +502,7 @@ def schema_update_options(self): def schema_update_options(self, values): self._set_sub_prop("schemaUpdateOptions", values) - def to_api_repr(self): + def to_api_repr(self) -> dict: """Build an API representation of the query job config. Returns: @@ -718,7 +729,7 @@ def to_api_repr(self): } @classmethod - def from_api_repr(cls, resource, client): + def from_api_repr(cls, resource: dict, client) -> "QueryJob": """Factory: construct a job given its API representation Args: @@ -1036,7 +1047,9 @@ def _begin(self, client=None, retry=DEFAULT_RETRY, timeout=None): exc.query_job = self raise - def _reload_query_results(self, retry=DEFAULT_RETRY, timeout=None): + def _reload_query_results( + self, retry: "retries.Retry" = DEFAULT_RETRY, timeout: float = None + ): """Refresh the cached query results. Args: @@ -1111,12 +1124,12 @@ def _done_or_raise(self, retry=DEFAULT_RETRY, timeout=None): def result( self, - page_size=None, - max_results=None, - retry=DEFAULT_RETRY, - timeout=None, - start_index=None, - ): + page_size: int = None, + max_results: int = None, + retry: "retries.Retry" = DEFAULT_RETRY, + timeout: float = None, + start_index: int = None, + ) -> Union["RowIterator", _EmptyRowIterator]: """Start the job and wait for it to complete and get the result. Args: @@ -1196,10 +1209,10 @@ def result( # changes to table.RowIterator.to_arrow() def to_arrow( self, - progress_bar_type=None, - bqstorage_client=None, - create_bqstorage_client=True, - ): + progress_bar_type: str = None, + bqstorage_client: "bigquery_storage.BigQueryReadClient" = None, + create_bqstorage_client: bool = True, + ) -> "pyarrow.Table": """[Beta] Create a class:`pyarrow.Table` by loading all pages of a table or query. @@ -1265,12 +1278,12 @@ def to_arrow( # changes to table.RowIterator.to_dataframe() def to_dataframe( self, - bqstorage_client=None, - dtypes=None, - progress_bar_type=None, - create_bqstorage_client=True, - date_as_object=True, - ): + bqstorage_client: "bigquery_storage.BigQueryReadClient" = None, + dtypes: Dict[str, Any] = None, + progress_bar_type: str = None, + create_bqstorage_client: bool = True, + date_as_object: bool = True, + ) -> "pandas.DataFrame": """Return a pandas DataFrame from a QueryJob Args: @@ -1350,7 +1363,7 @@ def __init__(self, kind, substeps): self.substeps = list(substeps) @classmethod - def from_api_repr(cls, resource): + def from_api_repr(cls, resource: dict) -> "QueryPlanEntryStep": """Factory: construct instance from the JSON repr. Args: @@ -1380,7 +1393,7 @@ def __init__(self): self._properties = {} @classmethod - def from_api_repr(cls, resource): + def from_api_repr(cls, resource: dict) -> "QueryPlanEntry": """Factory: construct instance from the JSON repr. Args: diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/magics/line_arg_parser/lexer.py b/packages/google-cloud-bigquery/google/cloud/bigquery/magics/line_arg_parser/lexer.py index 5a6ee1a83b28..cd809c389213 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/magics/line_arg_parser/lexer.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/magics/line_arg_parser/lexer.py @@ -98,7 +98,7 @@ def _generate_next_value_(name, start, count, last_values): return name -TokenType = AutoStrEnum( +TokenType = AutoStrEnum( # pytype: disable=wrong-arg-types "TokenType", [ (name, enum.auto()) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/model.py b/packages/google-cloud-bigquery/google/cloud/bigquery/model.py index 55846bd1a696..2d3f6660f17c 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/model.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/model.py @@ -279,7 +279,7 @@ def encryption_configuration(self, value): self._properties["encryptionConfiguration"] = api_repr @classmethod - def from_api_repr(cls, resource): + def from_api_repr(cls, resource: dict) -> "Model": """Factory: construct a model resource given its API representation Args: @@ -322,7 +322,7 @@ def _build_resource(self, filter_fields): def __repr__(self): return "Model(reference={})".format(repr(self.reference)) - def to_api_repr(self): + def to_api_repr(self) -> dict: """Construct the API resource representation of this model. Returns: @@ -389,7 +389,9 @@ def from_api_repr(cls, resource): return ref @classmethod - def from_string(cls, model_id, default_project=None): + def from_string( + cls, model_id: str, default_project: str = None + ) -> "ModelReference": """Construct a model reference from model ID string. Args: @@ -417,7 +419,7 @@ def from_string(cls, model_id, default_project=None): {"projectId": proj, "datasetId": dset, "modelId": model} ) - def to_api_repr(self): + def to_api_repr(self) -> dict: """Construct the API resource representation of this model reference. Returns: diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/query.py b/packages/google-cloud-bigquery/google/cloud/bigquery/query.py index 495c4effbe8c..3751eb12403b 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/query.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/query.py @@ -286,7 +286,7 @@ class _AbstractQueryParameter(object): """ @classmethod - def from_api_repr(cls, resource): + def from_api_repr(cls, resource: dict) -> "ScalarQueryParameter": """Factory: construct parameter from JSON resource. Args: @@ -297,7 +297,7 @@ def from_api_repr(cls, resource): """ raise NotImplementedError - def to_api_repr(self): + def to_api_repr(self) -> dict: """Construct JSON API representation for the parameter. Returns: @@ -329,7 +329,7 @@ def __init__(self, name, type_, value): self.value = value @classmethod - def positional(cls, type_, value): + def positional(cls, type_: str, value) -> "ScalarQueryParameter": """Factory for positional paramater. Args: @@ -347,7 +347,7 @@ def positional(cls, type_, value): return cls(None, type_, value) @classmethod - def from_api_repr(cls, resource): + def from_api_repr(cls, resource: dict) -> "ScalarQueryParameter": """Factory: construct parameter from JSON resource. Args: @@ -369,7 +369,7 @@ def from_api_repr(cls, resource): return cls(name, type_, converted) - def to_api_repr(self): + def to_api_repr(self) -> dict: """Construct JSON API representation for the parameter. Returns: @@ -441,7 +441,7 @@ def __init__(self, name, array_type, values): self.array_type = array_type @classmethod - def positional(cls, array_type, values): + def positional(cls, array_type: str, values: list) -> "ArrayQueryParameter": """Factory for positional parameters. Args: @@ -490,7 +490,7 @@ def _from_api_repr_scalar(cls, resource): return cls(name, array_type, converted) @classmethod - def from_api_repr(cls, resource): + def from_api_repr(cls, resource: dict) -> "ArrayQueryParameter": """Factory: construct parameter from JSON resource. Args: @@ -504,7 +504,7 @@ def from_api_repr(cls, resource): return cls._from_api_repr_struct(resource) return cls._from_api_repr_scalar(resource) - def to_api_repr(self): + def to_api_repr(self) -> dict: """Construct JSON API representation for the parameter. Returns: @@ -623,7 +623,7 @@ def positional(cls, *sub_params): return cls(None, *sub_params) @classmethod - def from_api_repr(cls, resource): + def from_api_repr(cls, resource: dict) -> "StructQueryParameter": """Factory: construct parameter from JSON resource. Args: @@ -663,7 +663,7 @@ def from_api_repr(cls, resource): instance.struct_values[key] = converted return instance - def to_api_repr(self): + def to_api_repr(self) -> dict: """Construct JSON API representation for the parameter. Returns: diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/routine/routine.py b/packages/google-cloud-bigquery/google/cloud/bigquery/routine/routine.py index 103799e8fd4a..bbc0a7693739 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/routine/routine.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/routine/routine.py @@ -266,7 +266,7 @@ def determinism_level(self, value): self._properties[self._PROPERTY_TO_API_FIELD["determinism_level"]] = value @classmethod - def from_api_repr(cls, resource): + def from_api_repr(cls, resource: dict) -> "Routine": """Factory: construct a routine given its API representation. Args: @@ -281,7 +281,7 @@ def from_api_repr(cls, resource): ref._properties = resource return ref - def to_api_repr(self): + def to_api_repr(self) -> dict: """Construct the API resource representation of this routine. Returns: @@ -387,7 +387,7 @@ def data_type(self, value): self._properties[self._PROPERTY_TO_API_FIELD["data_type"]] = resource @classmethod - def from_api_repr(cls, resource): + def from_api_repr(cls, resource: dict) -> "RoutineArgument": """Factory: construct a routine argument given its API representation. Args: @@ -401,7 +401,7 @@ def from_api_repr(cls, resource): ref._properties = resource return ref - def to_api_repr(self): + def to_api_repr(self) -> dict: """Construct the API resource representation of this routine argument. Returns: @@ -438,17 +438,17 @@ def __init__(self): @property def project(self): """str: ID of the project containing the routine.""" - return self._properties["projectId"] + return self._properties["projectId"] # pytype: disable=key-error @property def dataset_id(self): """str: ID of dataset containing the routine.""" - return self._properties["datasetId"] + return self._properties["datasetId"] # pytype: disable=key-error @property def routine_id(self): """str: The routine ID.""" - return self._properties["routineId"] + return self._properties["routineId"] # pytype: disable=key-error @property def path(self): @@ -460,7 +460,7 @@ def path(self): ) @classmethod - def from_api_repr(cls, resource): + def from_api_repr(cls, resource: dict) -> "RoutineReference": """Factory: construct a routine reference given its API representation. Args: @@ -476,7 +476,9 @@ def from_api_repr(cls, resource): return ref @classmethod - def from_string(cls, routine_id, default_project=None): + def from_string( + cls, routine_id: str, default_project: str = None + ) -> "RoutineReference": """Factory: construct a routine reference from routine ID string. Args: @@ -504,7 +506,7 @@ def from_string(cls, routine_id, default_project=None): {"projectId": proj, "datasetId": dset, "routineId": routine} ) - def to_api_repr(self): + def to_api_repr(self) -> dict: """Construct the API resource representation of this routine reference. Returns: diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/schema.py b/packages/google-cloud-bigquery/google/cloud/bigquery/schema.py index 680dcc13850c..cb221d6de982 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/schema.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/schema.py @@ -90,7 +90,7 @@ def __init__( self._policy_tags = policy_tags @classmethod - def from_api_repr(cls, api_repr): + def from_api_repr(cls, api_repr: dict) -> "SchemaField": """Return a ``SchemaField`` object deserialized from a dictionary. Args: @@ -163,7 +163,7 @@ def policy_tags(self): """ return self._policy_tags - def to_api_repr(self): + def to_api_repr(self) -> dict: """Return a dictionary representing this schema field. Returns: @@ -194,13 +194,14 @@ def _key(self): return ( self.name, self.field_type.upper(), - self.mode.upper(), + # Mode is always str, if not given it defaults to a str value + self.mode.upper(), # pytype: disable=attribute-error self.description, self._fields, self._policy_tags, ) - def to_standard_sql(self): + def to_standard_sql(self) -> types.StandardSqlField: """Return the field as the standard SQL field representation object. Returns: @@ -375,7 +376,7 @@ def __repr__(self): return "PolicyTagList{}".format(self._key()) @classmethod - def from_api_repr(cls, api_repr): + def from_api_repr(cls, api_repr: dict) -> "PolicyTagList": """Return a :class:`PolicyTagList` object deserialized from a dict. This method creates a new ``PolicyTagList`` instance that points to @@ -398,7 +399,7 @@ def from_api_repr(cls, api_repr): names = api_repr.get("names", ()) return cls(names=names) - def to_api_repr(self): + def to_api_repr(self) -> dict: """Return a dictionary representing this object. This method returns the properties dict of the ``PolicyTagList`` diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py index bd5bca30fe8a..5ab649a2534a 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py @@ -22,6 +22,8 @@ import logging import operator import pytz +import typing +from typing import Any, Dict, Iterable, Tuple import warnings try: @@ -47,6 +49,13 @@ from google.cloud.bigquery.external_config import ExternalConfig from google.cloud.bigquery.encryption_configuration import EncryptionConfiguration +if typing.TYPE_CHECKING: # pragma: NO COVER + # Unconditionally import optional dependencies again to tell pytype that + # they are not None, avoiding false "no attribute" errors. + import pandas + import pyarrow + from google.cloud import bigquery_storage + _LOGGER = logging.getLogger(__name__) @@ -143,7 +152,9 @@ def path(self): ) @classmethod - def from_string(cls, table_id, default_project=None): + def from_string( + cls, table_id: str, default_project: str = None + ) -> "TableReference": """Construct a table reference from table ID string. Args: @@ -182,7 +193,7 @@ def from_string(cls, table_id, default_project=None): ) @classmethod - def from_api_repr(cls, resource): + def from_api_repr(cls, resource: dict) -> "TableReference": """Factory: construct a table reference given its API representation Args: @@ -200,7 +211,7 @@ def from_api_repr(cls, resource): table_id = resource["tableId"] return cls(DatasetReference(project, dataset_id), table_id) - def to_api_repr(self): + def to_api_repr(self) -> dict: """Construct the API resource representation of this table reference. Returns: @@ -212,7 +223,7 @@ def to_api_repr(self): "tableId": self._table_id, } - def to_bqstorage(self): + def to_bqstorage(self) -> str: """Construct a BigQuery Storage API representation of this table. Install the ``google-cloud-bigquery-storage`` package to use this @@ -847,7 +858,7 @@ def external_data_configuration(self, value): self._properties["externalDataConfiguration"] = api_repr @classmethod - def from_string(cls, full_table_id): + def from_string(cls, full_table_id: str) -> "Table": """Construct a table from fully-qualified table ID. Args: @@ -871,7 +882,7 @@ def from_string(cls, full_table_id): return cls(TableReference.from_string(full_table_id)) @classmethod - def from_api_repr(cls, resource): + def from_api_repr(cls, resource: dict) -> "Table": """Factory: construct a table given its API representation Args: @@ -907,7 +918,7 @@ def from_api_repr(cls, resource): return table - def to_api_repr(self): + def to_api_repr(self) -> dict: """Constructs the API resource of this table Returns: @@ -915,7 +926,7 @@ def to_api_repr(self): """ return copy.deepcopy(self._properties) - def to_bqstorage(self): + def to_bqstorage(self) -> str: """Construct a BigQuery Storage API representation of this table. Returns: @@ -1104,7 +1115,7 @@ def clustering_fields(self): return list(prop.get("fields", ())) @classmethod - def from_string(cls, full_table_id): + def from_string(cls, full_table_id: str) -> "TableListItem": """Construct a table from fully-qualified table ID. Args: @@ -1129,7 +1140,7 @@ def from_string(cls, full_table_id): {"tableReference": TableReference.from_string(full_table_id).to_api_repr()} ) - def to_bqstorage(self): + def to_bqstorage(self) -> str: """Construct a BigQuery Storage API representation of this table. Returns: @@ -1137,7 +1148,7 @@ def to_bqstorage(self): """ return self.reference.to_bqstorage() - def to_api_repr(self): + def to_api_repr(self) -> dict: """Constructs the API resource of this table Returns: @@ -1231,7 +1242,7 @@ def values(self): """ return copy.deepcopy(self._xxx_values) - def keys(self): + def keys(self) -> Iterable[str]: """Return the keys for using a row as a dict. Returns: @@ -1244,7 +1255,7 @@ def keys(self): """ return self._xxx_field_to_index.keys() - def items(self): + def items(self) -> Iterable[Tuple[str, Any]]: """Return items as ``(key, value)`` pairs. Returns: @@ -1259,7 +1270,7 @@ def items(self): for key, index in self._xxx_field_to_index.items(): yield (key, copy.deepcopy(self._xxx_values[index])) - def get(self, key, default=None): + def get(self, key: str, default: Any = None) -> Any: """Return a value for key, with a default value if it does not exist. Args: @@ -1520,10 +1531,10 @@ def _to_arrow_iterable(self, bqstorage_client=None): # changes to job.QueryJob.to_arrow() def to_arrow( self, - progress_bar_type=None, - bqstorage_client=None, - create_bqstorage_client=True, - ): + progress_bar_type: str = None, + bqstorage_client: "bigquery_storage.BigQueryReadClient" = None, + create_bqstorage_client: bool = True, + ) -> "pyarrow.Table": """[Beta] Create a class:`pyarrow.Table` by loading all pages of a table or query. @@ -1623,10 +1634,10 @@ def to_arrow( def to_dataframe_iterable( self, - bqstorage_client=None, - dtypes=None, - max_queue_size=_pandas_helpers._MAX_QUEUE_SIZE_DEFAULT, - ): + bqstorage_client: "bigquery_storage.BigQueryReadClient" = None, + dtypes: Dict[str, Any] = None, + max_queue_size: int = _pandas_helpers._MAX_QUEUE_SIZE_DEFAULT, + ) -> "pandas.DataFrame": """Create an iterable of pandas DataFrames, to process the table as a stream. Args: @@ -1698,12 +1709,12 @@ def to_dataframe_iterable( # changes to job.QueryJob.to_dataframe() def to_dataframe( self, - bqstorage_client=None, - dtypes=None, - progress_bar_type=None, - create_bqstorage_client=True, - date_as_object=True, - ): + bqstorage_client: "bigquery_storage.BigQueryReadClient" = None, + dtypes: Dict[str, Any] = None, + progress_bar_type: str = None, + create_bqstorage_client: bool = True, + date_as_object: bool = True, + ) -> "pandas.DataFrame": """Create a pandas DataFrame by loading all pages of a query. Args: @@ -1831,7 +1842,7 @@ def to_arrow( progress_bar_type=None, bqstorage_client=None, create_bqstorage_client=True, - ): + ) -> "pyarrow.Table": """[Beta] Create an empty class:`pyarrow.Table`. Args: @@ -1853,7 +1864,7 @@ def to_dataframe( progress_bar_type=None, create_bqstorage_client=True, date_as_object=True, - ): + ) -> "pandas.DataFrame": """Create an empty dataframe. Args: @@ -2164,7 +2175,7 @@ def require_partition_filter(self, value): self._properties["requirePartitionFilter"] = value @classmethod - def from_api_repr(cls, api_repr): + def from_api_repr(cls, api_repr: dict) -> "TimePartitioning": """Return a :class:`TimePartitioning` object deserialized from a dict. This method creates a new ``TimePartitioning`` instance that points to @@ -2192,7 +2203,7 @@ def from_api_repr(cls, api_repr): instance._properties = api_repr return instance - def to_api_repr(self): + def to_api_repr(self) -> dict: """Return a dictionary representing this object. This method returns the properties dict of the ``TimePartitioning`` diff --git a/packages/google-cloud-bigquery/noxfile.py b/packages/google-cloud-bigquery/noxfile.py index bde3b990e23b..7ba081660354 100644 --- a/packages/google-cloud-bigquery/noxfile.py +++ b/packages/google-cloud-bigquery/noxfile.py @@ -21,6 +21,7 @@ import nox +PYTYPE_VERSION = "pytype==2021.4.9" BLACK_VERSION = "black==19.10b0" BLACK_PATHS = ("docs", "google", "samples", "tests", "noxfile.py", "setup.py") @@ -39,6 +40,7 @@ "lint", "lint_setup_py", "blacken", + "pytype", "docs", ] @@ -98,6 +100,15 @@ def unit_noextras(session): default(session, install_extras=False) +@nox.session(python=DEFAULT_PYTHON_VERSION) +def pytype(session): + """Run type checks.""" + session.install("-e", ".[all]") + session.install("ipython") + session.install(PYTYPE_VERSION) + session.run("pytype") + + @nox.session(python=SYSTEM_TEST_PYTHON_VERSIONS) def system(session): """Run the system test suite.""" diff --git a/packages/google-cloud-bigquery/samples/geography/requirements.txt b/packages/google-cloud-bigquery/samples/geography/requirements.txt index 6939c07e06e7..96819343cbf8 100644 --- a/packages/google-cloud-bigquery/samples/geography/requirements.txt +++ b/packages/google-cloud-bigquery/samples/geography/requirements.txt @@ -1,3 +1,4 @@ geojson==2.5.0 google-cloud-bigquery==2.13.1 +google-cloud-bigquery-storage==2.1.0 Shapely==1.7.1 diff --git a/packages/google-cloud-bigquery/setup.cfg b/packages/google-cloud-bigquery/setup.cfg index c3a2b39f6528..8eefc4435900 100644 --- a/packages/google-cloud-bigquery/setup.cfg +++ b/packages/google-cloud-bigquery/setup.cfg @@ -17,3 +17,17 @@ # Generated by synthtool. DO NOT EDIT! [bdist_wheel] universal = 1 + +[pytype] +python_version = 3.8 +inputs = + google/cloud/ +exclude = + tests/ + google/cloud/bigquery_v2/ +output = .pytype/ +disable = + # There's some issue with finding some pyi files, thus disabling. + # The issue https://github.com/google/pytype/issues/150 is closed, but the + # error still occurs for some reason. + pyi-error diff --git a/packages/google-cloud-bigquery/synth.py b/packages/google-cloud-bigquery/synth.py index 3c64406001ca..d99f368cc4f2 100644 --- a/packages/google-cloud-bigquery/synth.py +++ b/packages/google-cloud-bigquery/synth.py @@ -13,6 +13,7 @@ # limitations under the License. """This script is used to synthesize generated parts of this library.""" +import textwrap import synthtool as s from synthtool import gcp @@ -120,4 +121,32 @@ '\g<0>\n "bigquery_v2/services.rst", # generated by the code generator', ) +# ---------------------------------------------------------------------------- +# pytype-related changes +# ---------------------------------------------------------------------------- + +# Add .pytype to .gitignore +s.replace(".gitignore", r"\.pytest_cache", "\g<0>\n.pytype") + +# Add pytype config to setup.cfg +s.replace( + "setup.cfg", + r"universal = 1", + textwrap.dedent(""" \g<0> + + [pytype] + python_version = 3.8 + inputs = + google/cloud/ + exclude = + tests/ + google/cloud/bigquery_v2/ + output = .pytype/ + disable = + # There's some issue with finding some pyi files, thus disabling. + # The issue https://github.com/google/pytype/issues/150 is closed, but the + # error still occurs for some reason. + pyi-error""") +) + s.shell.run(["nox", "-s", "blacken"], hide_output=False) diff --git a/packages/google-cloud-bigquery/tests/unit/test_signature_compatibility.py b/packages/google-cloud-bigquery/tests/unit/test_signature_compatibility.py index 6002ae3e87c9..e5016b0e59bc 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_signature_compatibility.py +++ b/packages/google-cloud-bigquery/tests/unit/test_signature_compatibility.py @@ -31,20 +31,12 @@ def row_iterator_class(): return RowIterator -@pytest.mark.skipif( - not hasattr(inspect, "signature"), - reason="inspect.signature() is not availalbe in older Python versions", -) def test_to_arrow_method_signatures_match(query_job_class, row_iterator_class): sig = inspect.signature(query_job_class.to_arrow) sig2 = inspect.signature(row_iterator_class.to_arrow) assert sig == sig2 -@pytest.mark.skipif( - not hasattr(inspect, "signature"), - reason="inspect.signature() is not availalbe in older Python versions", -) def test_to_dataframe_method_signatures_match(query_job_class, row_iterator_class): sig = inspect.signature(query_job_class.to_dataframe) sig2 = inspect.signature(row_iterator_class.to_dataframe) From af75e1aefd32ec3fa9ecdf59bbf62e2c553843ab Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Mon, 26 Apr 2021 02:34:02 -0500 Subject: [PATCH 1126/2016] feat: add `Client.delete_job_metadata` method to remove job metadata (#610) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Note: this only removes job metadata. Use `Client.cancel_job` to stop a running job. Also, this feature is in preview and has not rolled out to all regions yet Thank you for opening a Pull Request! Before submitting your PR, there are a few things you can do to make sure it goes smoothly: - [ ] Make sure to open an issue as a [bug/issue](https://github.com/googleapis/python-bigquery/issues/new/choose) before writing your code! That way we can discuss the change, evaluate designs, and agree on the general idea - [ ] Ensure the tests and linter pass - [ ] Code coverage does not decrease (if any source code was changed) - [ ] Appropriate docs were updated (if necessary) Towards internal issue 176186229 🦕 --- .../google/cloud/bigquery/client.py | 71 +++++++++++++++++++ .../tests/system/test_client.py | 22 +++++- .../tests/unit/test_client.py | 60 ++++++++++++++++ 3 files changed, 151 insertions(+), 2 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py index 5aa8608a5316..8d0acb86787a 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py @@ -1545,6 +1545,77 @@ def delete_model( if not not_found_ok: raise + def delete_job_metadata( + self, + job_id, + project=None, + location=None, + retry=DEFAULT_RETRY, + timeout=None, + not_found_ok=False, + ): + """[Beta] Delete job metadata from job history. + + Note: This does not stop a running job. Use + :func:`~google.cloud.bigquery.client.Client.cancel_job` instead. + + Args: + job_id (Union[ \ + str, \ + google.cloud.bigquery.job.LoadJob, \ + google.cloud.bigquery.job.CopyJob, \ + google.cloud.bigquery.job.ExtractJob, \ + google.cloud.bigquery.job.QueryJob \ + ]): Job identifier. + + Keyword Arguments: + project (Optional[str]): + ID of the project which owns the job (defaults to the client's project). + location (Optional[str]): + Location where the job was run. Ignored if ``job_id`` is a job + object. + retry (Optional[google.api_core.retry.Retry]): + How to retry the RPC. + timeout (Optional[float]): + The number of seconds to wait for the underlying HTTP transport + before using ``retry``. + not_found_ok (Optional[bool]): + Defaults to ``False``. If ``True``, ignore "not found" errors + when deleting the job. + """ + extra_params = {} + + project, location, job_id = _extract_job_reference( + job_id, project=project, location=location + ) + + if project is None: + project = self.project + + if location is None: + location = self.location + + # Location is always required for jobs.delete() + extra_params["location"] = location + + path = f"/projects/{project}/jobs/{job_id}/delete" + + span_attributes = {"path": path, "job_id": job_id, "location": location} + + try: + self._call_api( + retry, + span_name="BigQuery.deleteJob", + span_attributes=span_attributes, + method="DELETE", + path=path, + query_params=extra_params, + timeout=timeout, + ) + except google.api_core.exceptions.NotFound: + if not not_found_ok: + raise + def delete_routine( self, routine: Union[Routine, RoutineReference, str], diff --git a/packages/google-cloud-bigquery/tests/system/test_client.py b/packages/google-cloud-bigquery/tests/system/test_client.py index f31d994cae8d..e71788a43e0c 100644 --- a/packages/google-cloud-bigquery/tests/system/test_client.py +++ b/packages/google-cloud-bigquery/tests/system/test_client.py @@ -25,6 +25,7 @@ import time import unittest import uuid +from typing import Optional import psutil import pytest @@ -62,6 +63,7 @@ from google.cloud import bigquery_v2 from google.cloud.bigquery.dataset import Dataset from google.cloud.bigquery.dataset import DatasetReference +from google.cloud.bigquery.schema import SchemaField from google.cloud.bigquery.table import Table from google.cloud._helpers import UTC from google.cloud.bigquery import dbapi, enums @@ -123,7 +125,7 @@ def _has_rows(result): def _make_dataset_id(prefix): - return "%s%s" % (prefix, unique_resource_id()) + return f"python_bigquery_tests_system_{prefix}{unique_resource_id()}" def _load_json_schema(filename="schema.json"): @@ -142,7 +144,7 @@ class Config(object): global state. """ - CLIENT = None + CLIENT: Optional[bigquery.Client] = None CURSOR = None DATASET = None @@ -430,6 +432,22 @@ def test_delete_dataset_delete_contents_false(self): with self.assertRaises(exceptions.BadRequest): Config.CLIENT.delete_dataset(dataset) + def test_delete_job_metadata(self): + dataset_id = _make_dataset_id("us_east1") + self.temp_dataset(dataset_id, location="us-east1") + full_table_id = f"{Config.CLIENT.project}.{dataset_id}.test_delete_job_metadata" + table = Table(full_table_id, schema=[SchemaField("col", "STRING")]) + Config.CLIENT.create_table(table) + query_job: bigquery.QueryJob = Config.CLIENT.query( + f"SELECT COUNT(*) FROM `{full_table_id}`", location="us-east1", + ) + query_job.result() + self.assertIsNotNone(Config.CLIENT.get_job(query_job)) + + Config.CLIENT.delete_job_metadata(query_job) + with self.assertRaises(NotFound): + Config.CLIENT.get_job(query_job) + def test_get_table_w_public_dataset(self): public = "bigquery-public-data" dataset_id = "samples" diff --git a/packages/google-cloud-bigquery/tests/unit/test_client.py b/packages/google-cloud-bigquery/tests/unit/test_client.py index 860f25f35426..8f535145b613 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_client.py +++ b/packages/google-cloud-bigquery/tests/unit/test_client.py @@ -2498,6 +2498,66 @@ def test_update_table_delete_property(self): self.assertEqual(req[1]["data"], sent) self.assertIsNone(table3.description) + def test_delete_job_metadata_not_found(self): + creds = _make_credentials() + client = self._make_one("client-proj", creds, location="client-loc") + conn = client._connection = make_connection( + google.api_core.exceptions.NotFound("job not found"), + google.api_core.exceptions.NotFound("job not found"), + ) + + with self.assertRaises(google.api_core.exceptions.NotFound): + client.delete_job_metadata("my-job") + + conn.api_request.reset_mock() + client.delete_job_metadata("my-job", not_found_ok=True) + + conn.api_request.assert_called_once_with( + method="DELETE", + path="/projects/client-proj/jobs/my-job/delete", + query_params={"location": "client-loc"}, + timeout=None, + ) + + def test_delete_job_metadata_with_id(self): + creds = _make_credentials() + client = self._make_one(self.PROJECT, creds) + conn = client._connection = make_connection({}) + + client.delete_job_metadata("my-job", project="param-proj", location="param-loc") + + conn.api_request.assert_called_once_with( + method="DELETE", + path="/projects/param-proj/jobs/my-job/delete", + query_params={"location": "param-loc"}, + timeout=None, + ) + + def test_delete_job_metadata_with_resource(self): + from google.cloud.bigquery.job import QueryJob + + query_resource = { + "jobReference": { + "projectId": "job-based-proj", + "jobId": "query_job", + "location": "us-east1", + }, + "configuration": {"query": {}}, + } + creds = _make_credentials() + client = self._make_one(self.PROJECT, creds) + conn = client._connection = make_connection(query_resource) + job_from_resource = QueryJob.from_api_repr(query_resource, client) + + client.delete_job_metadata(job_from_resource) + + conn.api_request.assert_called_once_with( + method="DELETE", + path="/projects/job-based-proj/jobs/query_job/delete", + query_params={"location": "us-east1"}, + timeout=None, + ) + def test_delete_model(self): from google.cloud.bigquery.model import Model From 1a5739b2c39fa066768e49f0d35ea01f4ba7a657 Mon Sep 17 00:00:00 2001 From: Dan Lee <71398022+dandhlee@users.noreply.github.com> Date: Mon, 26 Apr 2021 10:31:29 -0400 Subject: [PATCH 1127/2016] chore(revert): revert preventing normalization (#625) --- packages/google-cloud-bigquery/setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/setup.py b/packages/google-cloud-bigquery/setup.py index 46a1284261da..607ffb63fbaf 100644 --- a/packages/google-cloud-bigquery/setup.py +++ b/packages/google-cloud-bigquery/setup.py @@ -100,7 +100,7 @@ setuptools.setup( name=name, - version=setuptools.sic(version), + version=version, description=description, long_description=readme, author="Google LLC", From 43d3f77e1616d0bdc05de9736f32664603f11723 Mon Sep 17 00:00:00 2001 From: Peter Lamut Date: Mon, 26 Apr 2021 16:35:16 +0200 Subject: [PATCH 1128/2016] fix: unsetting clustering fileds on Table is now possible (#622) * fix: unsetting clustering fields from Table * Remove unused stuff from table.py * Use _PROPERTY_TO_API_FIELD in Table properties * Clarify why a property is set to explicit None --- .../google/cloud/bigquery/table.py | 204 +++++++++++------- .../tests/system/test_client.py | 25 +++ .../tests/unit/test_table.py | 8 +- 3 files changed, 160 insertions(+), 77 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py index 5ab649a2534a..b91c91a392b2 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py @@ -19,7 +19,6 @@ import copy import datetime import functools -import logging import operator import pytz import typing @@ -57,12 +56,6 @@ from google.cloud import bigquery_storage -_LOGGER = logging.getLogger(__name__) - -_NO_BQSTORAGE_ERROR = ( - "The google-cloud-bigquery-storage library is not installed, " - "please install google-cloud-bigquery-storage to use bqstorage features." -) _NO_PANDAS_ERROR = ( "The pandas library is not installed, please install " "pandas to use the to_dataframe() function." @@ -302,16 +295,36 @@ class Table(object): """ _PROPERTY_TO_API_FIELD = { + "clustering_fields": "clustering", + "created": "creationTime", + "dataset_id": ["tableReference", "datasetId"], + "description": "description", "encryption_configuration": "encryptionConfiguration", + "etag": "etag", "expires": "expirationTime", "external_data_configuration": "externalDataConfiguration", "friendly_name": "friendlyName", + "full_table_id": "id", + "labels": "labels", + "location": "location", + "modified": "lastModifiedTime", "mview_enable_refresh": "materializedView", + "mview_last_refresh_time": ["materializedView", "lastRefreshTime"], "mview_query": "materializedView", "mview_refresh_interval": "materializedView", + "num_bytes": "numBytes", + "num_rows": "numRows", "partition_expiration": "timePartitioning", "partitioning_type": "timePartitioning", + "project": ["tableReference", "projectId"], + "range_partitioning": "rangePartitioning", + "time_partitioning": "timePartitioning", + "schema": "schema", + "streaming_buffer": "streamingBuffer", + "self_link": "selfLink", + "table_id": ["tableReference", "tableId"], "time_partitioning": "timePartitioning", + "type": "type", "view_use_legacy_sql": "view", "view_query": "view", "require_partition_filter": "requirePartitionFilter", @@ -327,17 +340,23 @@ def __init__(self, table_ref, schema=None): @property def project(self): """str: Project bound to the table.""" - return self._properties["tableReference"]["projectId"] + return _helpers._get_sub_prop( + self._properties, self._PROPERTY_TO_API_FIELD["project"] + ) @property def dataset_id(self): """str: ID of dataset containing the table.""" - return self._properties["tableReference"]["datasetId"] + return _helpers._get_sub_prop( + self._properties, self._PROPERTY_TO_API_FIELD["dataset_id"] + ) @property def table_id(self): """str: ID of the table.""" - return self._properties["tableReference"]["tableId"] + return _helpers._get_sub_prop( + self._properties, self._PROPERTY_TO_API_FIELD["table_id"] + ) reference = property(_reference_getter) @@ -356,11 +375,15 @@ def require_partition_filter(self): partition filter that can be used for partition elimination to be specified. """ - return self._properties.get("requirePartitionFilter") + return self._properties.get( + self._PROPERTY_TO_API_FIELD["require_partition_filter"] + ) @require_partition_filter.setter def require_partition_filter(self, value): - self._properties["requirePartitionFilter"] = value + self._properties[ + self._PROPERTY_TO_API_FIELD["require_partition_filter"] + ] = value @property def schema(self): @@ -376,7 +399,7 @@ def schema(self): is not a :class:`~google.cloud.bigquery.schema.SchemaField` instance or a compatible mapping representation of the field. """ - prop = self._properties.get("schema") + prop = self._properties.get(self._PROPERTY_TO_API_FIELD["schema"]) if not prop: return [] else: @@ -384,11 +407,13 @@ def schema(self): @schema.setter def schema(self, value): + api_field = self._PROPERTY_TO_API_FIELD["schema"] + if value is None: - self._properties["schema"] = None + self._properties[api_field] = None else: value = _to_schema_fields(value) - self._properties["schema"] = {"fields": _build_schema_resource(value)} + self._properties[api_field] = {"fields": _build_schema_resource(value)} @property def labels(self): @@ -401,13 +426,13 @@ def labels(self): Raises: ValueError: If ``value`` type is invalid. """ - return self._properties.setdefault("labels", {}) + return self._properties.setdefault(self._PROPERTY_TO_API_FIELD["labels"], {}) @labels.setter def labels(self, value): if not isinstance(value, dict): raise ValueError("Pass a dict") - self._properties["labels"] = value + self._properties[self._PROPERTY_TO_API_FIELD["labels"]] = value @property def encryption_configuration(self): @@ -421,7 +446,9 @@ def encryption_configuration(self): `_ in the BigQuery documentation. """ - prop = self._properties.get("encryptionConfiguration") + prop = self._properties.get( + self._PROPERTY_TO_API_FIELD["encryption_configuration"] + ) if prop is not None: prop = EncryptionConfiguration.from_api_repr(prop) return prop @@ -431,14 +458,16 @@ def encryption_configuration(self, value): api_repr = value if value is not None: api_repr = value.to_api_repr() - self._properties["encryptionConfiguration"] = api_repr + self._properties[ + self._PROPERTY_TO_API_FIELD["encryption_configuration"] + ] = api_repr @property def created(self): """Union[datetime.datetime, None]: Datetime at which the table was created (:data:`None` until set from the server). """ - creation_time = self._properties.get("creationTime") + creation_time = self._properties.get(self._PROPERTY_TO_API_FIELD["created"]) if creation_time is not None: # creation_time will be in milliseconds. return google.cloud._helpers._datetime_from_microseconds( @@ -450,14 +479,14 @@ def etag(self): """Union[str, None]: ETag for the table resource (:data:`None` until set from the server). """ - return self._properties.get("etag") + return self._properties.get(self._PROPERTY_TO_API_FIELD["etag"]) @property def modified(self): """Union[datetime.datetime, None]: Datetime at which the table was last modified (:data:`None` until set from the server). """ - modified_time = self._properties.get("lastModifiedTime") + modified_time = self._properties.get(self._PROPERTY_TO_API_FIELD["modified"]) if modified_time is not None: # modified_time will be in milliseconds. return google.cloud._helpers._datetime_from_microseconds( @@ -469,21 +498,25 @@ def num_bytes(self): """Union[int, None]: The size of the table in bytes (:data:`None` until set from the server). """ - return _helpers._int_or_none(self._properties.get("numBytes")) + return _helpers._int_or_none( + self._properties.get(self._PROPERTY_TO_API_FIELD["num_bytes"]) + ) @property def num_rows(self): """Union[int, None]: The number of rows in the table (:data:`None` until set from the server). """ - return _helpers._int_or_none(self._properties.get("numRows")) + return _helpers._int_or_none( + self._properties.get(self._PROPERTY_TO_API_FIELD["num_rows"]) + ) @property def self_link(self): """Union[str, None]: URL for the table resource (:data:`None` until set from the server). """ - return self._properties.get("selfLink") + return self._properties.get(self._PROPERTY_TO_API_FIELD["self_link"]) @property def full_table_id(self): @@ -492,7 +525,7 @@ def full_table_id(self): In the format ``project-id:dataset_id.table_id``. """ - return self._properties.get("id") + return self._properties.get(self._PROPERTY_TO_API_FIELD["full_table_id"]) @property def table_type(self): @@ -502,7 +535,7 @@ def table_type(self): Possible values are ``'TABLE'``, ``'VIEW'``, ``'MATERIALIZED_VIEW'`` or ``'EXTERNAL'``. """ - return self._properties.get("type") + return self._properties.get(self._PROPERTY_TO_API_FIELD["type"]) @property def range_partitioning(self): @@ -523,7 +556,9 @@ def range_partitioning(self): :class:`~google.cloud.bigquery.table.RangePartitioning` or :data:`None`. """ - resource = self._properties.get("rangePartitioning") + resource = self._properties.get( + self._PROPERTY_TO_API_FIELD["range_partitioning"] + ) if resource is not None: return RangePartitioning(_properties=resource) @@ -536,7 +571,7 @@ def range_partitioning(self, value): raise ValueError( "Expected value to be RangePartitioning or None, got {}.".format(value) ) - self._properties["rangePartitioning"] = resource + self._properties[self._PROPERTY_TO_API_FIELD["range_partitioning"]] = resource @property def time_partitioning(self): @@ -553,7 +588,7 @@ def time_partitioning(self): :class:`~google.cloud.bigquery.table.TimePartitioning` or :data:`None`. """ - prop = self._properties.get("timePartitioning") + prop = self._properties.get(self._PROPERTY_TO_API_FIELD["time_partitioning"]) if prop is not None: return TimePartitioning.from_api_repr(prop) @@ -566,7 +601,7 @@ def time_partitioning(self, value): raise ValueError( "value must be google.cloud.bigquery.table.TimePartitioning " "or None" ) - self._properties["timePartitioning"] = api_repr + self._properties[self._PROPERTY_TO_API_FIELD["time_partitioning"]] = api_repr @property def partitioning_type(self): @@ -591,9 +626,10 @@ def partitioning_type(self, value): PendingDeprecationWarning, stacklevel=2, ) + api_field = self._PROPERTY_TO_API_FIELD["partitioning_type"] if self.time_partitioning is None: - self._properties["timePartitioning"] = {} - self._properties["timePartitioning"]["type"] = value + self._properties[api_field] = {} + self._properties[api_field]["type"] = value @property def partition_expiration(self): @@ -620,9 +656,11 @@ def partition_expiration(self, value): PendingDeprecationWarning, stacklevel=2, ) + api_field = self._PROPERTY_TO_API_FIELD["partition_expiration"] + if self.time_partitioning is None: - self._properties["timePartitioning"] = {"type": TimePartitioningType.DAY} - self._properties["timePartitioning"]["expirationMs"] = str(value) + self._properties[api_field] = {"type": TimePartitioningType.DAY} + self._properties[api_field]["expirationMs"] = str(value) @property def clustering_fields(self): @@ -637,7 +675,7 @@ def clustering_fields(self): BigQuery supports clustering for both partitioned and non-partitioned tables. """ - prop = self._properties.get("clustering") + prop = self._properties.get(self._PROPERTY_TO_API_FIELD["clustering_fields"]) if prop is not None: return list(prop.get("fields", ())) @@ -647,12 +685,15 @@ def clustering_fields(self, value): (Defaults to :data:`None`). """ + api_field = self._PROPERTY_TO_API_FIELD["clustering_fields"] + if value is not None: - prop = self._properties.setdefault("clustering", {}) + prop = self._properties.setdefault(api_field, {}) prop["fields"] = value else: - if "clustering" in self._properties: - del self._properties["clustering"] + # In order to allow unsetting clustering fields completely, we explicitly + # set this property to None (as oposed to merely removing the key). + self._properties[api_field] = None @property def description(self): @@ -662,13 +703,13 @@ def description(self): Raises: ValueError: For invalid value types. """ - return self._properties.get("description") + return self._properties.get(self._PROPERTY_TO_API_FIELD["description"]) @description.setter def description(self, value): if not isinstance(value, str) and value is not None: raise ValueError("Pass a string, or None") - self._properties["description"] = value + self._properties[self._PROPERTY_TO_API_FIELD["description"]] = value @property def expires(self): @@ -678,7 +719,7 @@ def expires(self): Raises: ValueError: For invalid value types. """ - expiration_time = self._properties.get("expirationTime") + expiration_time = self._properties.get(self._PROPERTY_TO_API_FIELD["expires"]) if expiration_time is not None: # expiration_time will be in milliseconds. return google.cloud._helpers._datetime_from_microseconds( @@ -690,7 +731,9 @@ def expires(self, value): if not isinstance(value, datetime.datetime) and value is not None: raise ValueError("Pass a datetime, or None") value_ms = google.cloud._helpers._millis_from_datetime(value) - self._properties["expirationTime"] = _helpers._str_or_none(value_ms) + self._properties[ + self._PROPERTY_TO_API_FIELD["expires"] + ] = _helpers._str_or_none(value_ms) @property def friendly_name(self): @@ -699,13 +742,13 @@ def friendly_name(self): Raises: ValueError: For invalid value types. """ - return self._properties.get("friendlyName") + return self._properties.get(self._PROPERTY_TO_API_FIELD["friendly_name"]) @friendly_name.setter def friendly_name(self, value): if not isinstance(value, str) and value is not None: raise ValueError("Pass a string, or None") - self._properties["friendlyName"] = value + self._properties[self._PROPERTY_TO_API_FIELD["friendly_name"]] = value @property def location(self): @@ -713,7 +756,7 @@ def location(self): Defaults to :data:`None`. """ - return self._properties.get("location") + return self._properties.get(self._PROPERTY_TO_API_FIELD["location"]) @property def view_query(self): @@ -726,14 +769,17 @@ def view_query(self): Raises: ValueError: For invalid value types. """ - return _helpers._get_sub_prop(self._properties, ["view", "query"]) + api_field = self._PROPERTY_TO_API_FIELD["view_query"] + return _helpers._get_sub_prop(self._properties, [api_field, "query"]) @view_query.setter def view_query(self, value): if not isinstance(value, str): raise ValueError("Pass a string") - _helpers._set_sub_prop(self._properties, ["view", "query"], value) - view = self._properties["view"] + + api_field = self._PROPERTY_TO_API_FIELD["view_query"] + _helpers._set_sub_prop(self._properties, [api_field, "query"], value) + view = self._properties[api_field] # The service defaults useLegacySql to True, but this # client uses Standard SQL by default. if view.get("useLegacySql") is None: @@ -742,7 +788,7 @@ def view_query(self, value): @view_query.deleter def view_query(self): """Delete SQL query defining the table as a view.""" - self._properties.pop("view", None) + self._properties.pop(self._PROPERTY_TO_API_FIELD["view_query"], None) view_use_legacy_sql = property(_view_use_legacy_sql_getter) @@ -750,27 +796,29 @@ def view_query(self): def view_use_legacy_sql(self, value): if not isinstance(value, bool): raise ValueError("Pass a boolean") - if self._properties.get("view") is None: - self._properties["view"] = {} - self._properties["view"]["useLegacySql"] = value + + api_field = self._PROPERTY_TO_API_FIELD["view_query"] + if self._properties.get(api_field) is None: + self._properties[api_field] = {} + self._properties[api_field]["useLegacySql"] = value @property def mview_query(self): """Optional[str]: SQL query defining the table as a materialized view (defaults to :data:`None`). """ - return _helpers._get_sub_prop(self._properties, ["materializedView", "query"]) + api_field = self._PROPERTY_TO_API_FIELD["mview_query"] + return _helpers._get_sub_prop(self._properties, [api_field, "query"]) @mview_query.setter def mview_query(self, value): - _helpers._set_sub_prop( - self._properties, ["materializedView", "query"], str(value) - ) + api_field = self._PROPERTY_TO_API_FIELD["mview_query"] + _helpers._set_sub_prop(self._properties, [api_field, "query"], str(value)) @mview_query.deleter def mview_query(self): """Delete SQL query defining the table as a materialized view.""" - self._properties.pop("materializedView", None) + self._properties.pop(self._PROPERTY_TO_API_FIELD["mview_query"], None) @property def mview_last_refresh_time(self): @@ -778,7 +826,7 @@ def mview_last_refresh_time(self): refreshed (:data:`None` until set from the server). """ refresh_time = _helpers._get_sub_prop( - self._properties, ["materializedView", "lastRefreshTime"] + self._properties, self._PROPERTY_TO_API_FIELD["mview_last_refresh_time"] ) if refresh_time is not None: # refresh_time will be in milliseconds. @@ -791,14 +839,14 @@ def mview_enable_refresh(self): """Optional[bool]: Enable automatic refresh of the materialized view when the base table is updated. The default value is :data:`True`. """ - return _helpers._get_sub_prop( - self._properties, ["materializedView", "enableRefresh"] - ) + api_field = self._PROPERTY_TO_API_FIELD["mview_enable_refresh"] + return _helpers._get_sub_prop(self._properties, [api_field, "enableRefresh"]) @mview_enable_refresh.setter def mview_enable_refresh(self, value): + api_field = self._PROPERTY_TO_API_FIELD["mview_enable_refresh"] return _helpers._set_sub_prop( - self._properties, ["materializedView", "enableRefresh"], value + self._properties, [api_field, "enableRefresh"], value ) @property @@ -807,8 +855,9 @@ def mview_refresh_interval(self): materialized view will be refreshed. The default value is 1800000 milliseconds (30 minutes). """ + api_field = self._PROPERTY_TO_API_FIELD["mview_refresh_interval"] refresh_interval = _helpers._get_sub_prop( - self._properties, ["materializedView", "refreshIntervalMs"] + self._properties, [api_field, "refreshIntervalMs"] ) if refresh_interval is not None: return datetime.timedelta(milliseconds=int(refresh_interval)) @@ -820,10 +869,9 @@ def mview_refresh_interval(self, value): else: refresh_interval_ms = str(value // datetime.timedelta(milliseconds=1)) + api_field = self._PROPERTY_TO_API_FIELD["mview_refresh_interval"] _helpers._set_sub_prop( - self._properties, - ["materializedView", "refreshIntervalMs"], - refresh_interval_ms, + self._properties, [api_field, "refreshIntervalMs"], refresh_interval_ms, ) @property @@ -831,7 +879,7 @@ def streaming_buffer(self): """google.cloud.bigquery.StreamingBuffer: Information about a table's streaming buffer. """ - sb = self._properties.get("streamingBuffer") + sb = self._properties.get(self._PROPERTY_TO_API_FIELD["streaming_buffer"]) if sb is not None: return StreamingBuffer(sb) @@ -843,7 +891,9 @@ def external_data_configuration(self): Raises: ValueError: For invalid value types. """ - prop = self._properties.get("externalDataConfiguration") + prop = self._properties.get( + self._PROPERTY_TO_API_FIELD["external_data_configuration"] + ) if prop is not None: prop = ExternalConfig.from_api_repr(prop) return prop @@ -855,7 +905,9 @@ def external_data_configuration(self, value): api_repr = value if value is not None: api_repr = value.to_api_repr() - self._properties["externalDataConfiguration"] = api_repr + self._properties[ + self._PROPERTY_TO_API_FIELD["external_data_configuration"] + ] = api_repr @classmethod def from_string(cls, full_table_id: str) -> "Table": @@ -908,9 +960,15 @@ def from_api_repr(cls, resource: dict) -> "Table": "Resource lacks required identity information:" '["tableReference"]["tableId"]' ) - project_id = resource["tableReference"]["projectId"] - table_id = resource["tableReference"]["tableId"] - dataset_id = resource["tableReference"]["datasetId"] + project_id = _helpers._get_sub_prop( + resource, cls._PROPERTY_TO_API_FIELD["project"] + ) + table_id = _helpers._get_sub_prop( + resource, cls._PROPERTY_TO_API_FIELD["table_id"] + ) + dataset_id = _helpers._get_sub_prop( + resource, cls._PROPERTY_TO_API_FIELD["dataset_id"] + ) dataset_ref = dataset.DatasetReference(project_id, dataset_id) table = cls(dataset_ref.table(table_id)) diff --git a/packages/google-cloud-bigquery/tests/system/test_client.py b/packages/google-cloud-bigquery/tests/system/test_client.py index e71788a43e0c..7c8ef50fa168 100644 --- a/packages/google-cloud-bigquery/tests/system/test_client.py +++ b/packages/google-cloud-bigquery/tests/system/test_client.py @@ -90,6 +90,12 @@ bigquery.SchemaField("full_name", "STRING", mode="REQUIRED"), bigquery.SchemaField("age", "INTEGER", mode="REQUIRED"), ] +CLUSTERING_SCHEMA = [ + bigquery.SchemaField("full_name", "STRING", mode="REQUIRED"), + bigquery.SchemaField("age", "INTEGER", mode="REQUIRED"), + bigquery.SchemaField("body_height_cm", "INTEGER", mode="REQUIRED"), + bigquery.SchemaField("date_of_birth", "DATE", mode="REQUIRED"), +] TIME_PARTITIONING_CLUSTERING_FIELDS_SCHEMA = [ bigquery.SchemaField("transaction_time", "TIMESTAMP", mode="REQUIRED"), bigquery.SchemaField("transaction_id", "INTEGER", mode="REQUIRED"), @@ -579,6 +585,25 @@ def test_update_table_schema(self): self.assertEqual(found.field_type, expected.field_type) self.assertEqual(found.mode, expected.mode) + def test_update_table_clustering_configuration(self): + dataset = self.temp_dataset(_make_dataset_id("update_table")) + + TABLE_NAME = "test_table" + table_arg = Table(dataset.table(TABLE_NAME), schema=CLUSTERING_SCHEMA) + self.assertFalse(_table_exists(table_arg)) + + table = helpers.retry_403(Config.CLIENT.create_table)(table_arg) + self.to_delete.insert(0, table) + self.assertTrue(_table_exists(table)) + + table.clustering_fields = ["full_name", "date_of_birth"] + table2 = Config.CLIENT.update_table(table, ["clustering_fields"]) + self.assertEqual(table2.clustering_fields, ["full_name", "date_of_birth"]) + + table2.clustering_fields = None + table3 = Config.CLIENT.update_table(table2, ["clustering_fields"]) + self.assertIsNone(table3.clustering_fields, None) + @staticmethod def _fetch_single_page(table, selected_fields=None): iterator = Config.CLIENT.list_rows(table, selected_fields=selected_fields) diff --git a/packages/google-cloud-bigquery/tests/unit/test_table.py b/packages/google-cloud-bigquery/tests/unit/test_table.py index 3373528e03f8..ce4a1576123b 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_table.py +++ b/packages/google-cloud-bigquery/tests/unit/test_table.py @@ -1210,8 +1210,8 @@ def test_clustering_fields_setter_w_none(self): table._properties["clustering"] = {"fields": fields} table.clustering_fields = None - self.assertEqual(table.clustering_fields, None) - self.assertFalse("clustering" in table._properties) + self.assertIsNone(table.clustering_fields) + self.assertTrue("clustering" in table._properties) # None stored explicitly def test_clustering_fields_setter_w_none_noop(self): dataset = DatasetReference(self.PROJECT, self.DS_ID) @@ -1219,8 +1219,8 @@ def test_clustering_fields_setter_w_none_noop(self): table = self._make_one(table_ref) table.clustering_fields = None - self.assertEqual(table.clustering_fields, None) - self.assertFalse("clustering" in table._properties) + self.assertIsNone(table.clustering_fields) + self.assertTrue("clustering" in table._properties) # None stored explicitly def test_encryption_configuration_setter(self): # Previously, the EncryptionConfiguration class was in the table module, not the From 74972cd031b1a139136a0629b57b4a41c92810f4 Mon Sep 17 00:00:00 2001 From: "release-please[bot]" <55107282+release-please[bot]@users.noreply.github.com> Date: Mon, 26 Apr 2021 15:56:03 +0000 Subject: [PATCH 1129/2016] chore: release 2.14.0 (#602) :robot: I have created a release \*beep\* \*boop\* --- ## [2.14.0](https://www.github.com/googleapis/python-bigquery/compare/v2.13.1...v2.14.0) (2021-04-26) ### Features * accept DatasetListItem where DatasetReference is accepted ([#597](https://www.github.com/googleapis/python-bigquery/issues/597)) ([c8b5581](https://www.github.com/googleapis/python-bigquery/commit/c8b5581ea3c94005d69755c4a3b5a0d8900f3fe2)) * accept job object as argument to `get_job` and `cancel_job` ([#617](https://www.github.com/googleapis/python-bigquery/issues/617)) ([f75dcdf](https://www.github.com/googleapis/python-bigquery/commit/f75dcdf3943b87daba60011c9a3b42e34ff81910)) * add `Client.delete_job_metadata` method to remove job metadata ([#610](https://www.github.com/googleapis/python-bigquery/issues/610)) ([0abb566](https://www.github.com/googleapis/python-bigquery/commit/0abb56669c097c59fbffce007c702e7a55f2d9c1)) * add `max_queue_size` argument to `RowIterator.to_dataframe_iterable` ([#575](https://www.github.com/googleapis/python-bigquery/issues/575)) ([f95f415](https://www.github.com/googleapis/python-bigquery/commit/f95f415d3441b3928f6cc705cb8a75603d790fd6)) * add type hints for public methods ([#613](https://www.github.com/googleapis/python-bigquery/issues/613)) ([f8d4aaa](https://www.github.com/googleapis/python-bigquery/commit/f8d4aaa335a0eef915e73596fc9b43b11d11be9f)) * DB API cursors are now iterable ([#618](https://www.github.com/googleapis/python-bigquery/issues/618)) ([e0b373d](https://www.github.com/googleapis/python-bigquery/commit/e0b373d0e721a70656ed8faceb7f5c70f642d144)) * retry google.auth TransportError by default ([#624](https://www.github.com/googleapis/python-bigquery/issues/624)) ([34ecc3f](https://www.github.com/googleapis/python-bigquery/commit/34ecc3f1ca0ff073330c0c605673d89b43af7ed9)) * use pyarrow stream compression, if available ([#593](https://www.github.com/googleapis/python-bigquery/issues/593)) ([dde9dc5](https://www.github.com/googleapis/python-bigquery/commit/dde9dc5114c2311fb76fafc5b222fff561e8abf1)) ### Bug Fixes * consistent percents handling in DB API query ([#619](https://www.github.com/googleapis/python-bigquery/issues/619)) ([6502a60](https://www.github.com/googleapis/python-bigquery/commit/6502a602337ae562652a20b20270949f2c9d5073)) * missing license headers in new test files ([#604](https://www.github.com/googleapis/python-bigquery/issues/604)) ([df48cc5](https://www.github.com/googleapis/python-bigquery/commit/df48cc5a0be99ad39d5835652d1b7422209afc5d)) * unsetting clustering fileds on Table is now possible ([#622](https://www.github.com/googleapis/python-bigquery/issues/622)) ([33a871f](https://www.github.com/googleapis/python-bigquery/commit/33a871f06329f9bf5a6a92fab9ead65bf2bee75d)) ### Documentation * add sample to run DML query ([#591](https://www.github.com/googleapis/python-bigquery/issues/591)) ([ff2ec3a](https://www.github.com/googleapis/python-bigquery/commit/ff2ec3abe418a443cd07751c08e654f94e8b3155)) * update the description of the return value of `_QueryResults.rows()` ([#594](https://www.github.com/googleapis/python-bigquery/issues/594)) ([8f4c0b8](https://www.github.com/googleapis/python-bigquery/commit/8f4c0b84dac3840532d7865247b8ad94b625b897)) --- This PR was generated with [Release Please](https://github.com/googleapis/release-please). See [documentation](https://github.com/googleapis/release-please#release-please). --- packages/google-cloud-bigquery/CHANGELOG.md | 27 +++++++++++++++++++ .../google/cloud/bigquery/version.py | 2 +- 2 files changed, 28 insertions(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/CHANGELOG.md b/packages/google-cloud-bigquery/CHANGELOG.md index 5dc2c883838a..9aee40510984 100644 --- a/packages/google-cloud-bigquery/CHANGELOG.md +++ b/packages/google-cloud-bigquery/CHANGELOG.md @@ -4,6 +4,33 @@ [1]: https://pypi.org/project/google-cloud-bigquery/#history +## [2.14.0](https://www.github.com/googleapis/python-bigquery/compare/v2.13.1...v2.14.0) (2021-04-26) + + +### Features + +* accept DatasetListItem where DatasetReference is accepted ([#597](https://www.github.com/googleapis/python-bigquery/issues/597)) ([c8b5581](https://www.github.com/googleapis/python-bigquery/commit/c8b5581ea3c94005d69755c4a3b5a0d8900f3fe2)) +* accept job object as argument to `get_job` and `cancel_job` ([#617](https://www.github.com/googleapis/python-bigquery/issues/617)) ([f75dcdf](https://www.github.com/googleapis/python-bigquery/commit/f75dcdf3943b87daba60011c9a3b42e34ff81910)) +* add `Client.delete_job_metadata` method to remove job metadata ([#610](https://www.github.com/googleapis/python-bigquery/issues/610)) ([0abb566](https://www.github.com/googleapis/python-bigquery/commit/0abb56669c097c59fbffce007c702e7a55f2d9c1)) +* add `max_queue_size` argument to `RowIterator.to_dataframe_iterable` ([#575](https://www.github.com/googleapis/python-bigquery/issues/575)) ([f95f415](https://www.github.com/googleapis/python-bigquery/commit/f95f415d3441b3928f6cc705cb8a75603d790fd6)) +* add type hints for public methods ([#613](https://www.github.com/googleapis/python-bigquery/issues/613)) ([f8d4aaa](https://www.github.com/googleapis/python-bigquery/commit/f8d4aaa335a0eef915e73596fc9b43b11d11be9f)) +* DB API cursors are now iterable ([#618](https://www.github.com/googleapis/python-bigquery/issues/618)) ([e0b373d](https://www.github.com/googleapis/python-bigquery/commit/e0b373d0e721a70656ed8faceb7f5c70f642d144)) +* retry google.auth TransportError by default ([#624](https://www.github.com/googleapis/python-bigquery/issues/624)) ([34ecc3f](https://www.github.com/googleapis/python-bigquery/commit/34ecc3f1ca0ff073330c0c605673d89b43af7ed9)) +* use pyarrow stream compression, if available ([#593](https://www.github.com/googleapis/python-bigquery/issues/593)) ([dde9dc5](https://www.github.com/googleapis/python-bigquery/commit/dde9dc5114c2311fb76fafc5b222fff561e8abf1)) + + +### Bug Fixes + +* consistent percents handling in DB API query ([#619](https://www.github.com/googleapis/python-bigquery/issues/619)) ([6502a60](https://www.github.com/googleapis/python-bigquery/commit/6502a602337ae562652a20b20270949f2c9d5073)) +* missing license headers in new test files ([#604](https://www.github.com/googleapis/python-bigquery/issues/604)) ([df48cc5](https://www.github.com/googleapis/python-bigquery/commit/df48cc5a0be99ad39d5835652d1b7422209afc5d)) +* unsetting clustering fields on Table is now possible ([#622](https://www.github.com/googleapis/python-bigquery/issues/622)) ([33a871f](https://www.github.com/googleapis/python-bigquery/commit/33a871f06329f9bf5a6a92fab9ead65bf2bee75d)) + + +### Documentation + +* add sample to run DML query ([#591](https://www.github.com/googleapis/python-bigquery/issues/591)) ([ff2ec3a](https://www.github.com/googleapis/python-bigquery/commit/ff2ec3abe418a443cd07751c08e654f94e8b3155)) +* update the description of the return value of `_QueryResults.rows()` ([#594](https://www.github.com/googleapis/python-bigquery/issues/594)) ([8f4c0b8](https://www.github.com/googleapis/python-bigquery/commit/8f4c0b84dac3840532d7865247b8ad94b625b897)) + ### [2.13.1](https://www.github.com/googleapis/python-bigquery/compare/v2.13.0...v2.13.1) (2021-03-23) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/version.py b/packages/google-cloud-bigquery/google/cloud/bigquery/version.py index 2330d0c2cdfb..ba8b4e8af8c9 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/version.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/version.py @@ -12,4 +12,4 @@ # See the License for the specific language governing permissions and # limitations under the License. -__version__ = "2.13.1" +__version__ = "2.14.0" From bef6befc7df60226e84d8da7b7cd619953f8d1e8 Mon Sep 17 00:00:00 2001 From: WhiteSource Renovate Date: Tue, 27 Apr 2021 17:08:05 +0200 Subject: [PATCH 1130/2016] chore(deps): update dependency google-cloud-bigquery to v2.14.0 (#627) [![WhiteSource Renovate](https://app.renovatebot.com/images/banner.svg)](https://renovatebot.com) This PR contains the following updates: | Package | Change | Age | Adoption | Passing | Confidence | |---|---|---|---|---|---| | [google-cloud-bigquery](https://togithub.com/googleapis/python-bigquery) | `==2.13.1` -> `==2.14.0` | [![age](https://badges.renovateapi.com/packages/pypi/google-cloud-bigquery/2.14.0/age-slim)](https://docs.renovatebot.com/merge-confidence/) | [![adoption](https://badges.renovateapi.com/packages/pypi/google-cloud-bigquery/2.14.0/adoption-slim)](https://docs.renovatebot.com/merge-confidence/) | [![passing](https://badges.renovateapi.com/packages/pypi/google-cloud-bigquery/2.14.0/compatibility-slim/2.13.1)](https://docs.renovatebot.com/merge-confidence/) | [![confidence](https://badges.renovateapi.com/packages/pypi/google-cloud-bigquery/2.14.0/confidence-slim/2.13.1)](https://docs.renovatebot.com/merge-confidence/) | --- ### Release Notes
googleapis/python-bigquery ### [`v2.14.0`](https://togithub.com/googleapis/python-bigquery/blob/master/CHANGELOG.md#​2140-httpswwwgithubcomgoogleapispython-bigquerycomparev2131v2140-2021-04-26) [Compare Source](https://togithub.com/googleapis/python-bigquery/compare/v2.13.1...v2.14.0) ##### Features - accept DatasetListItem where DatasetReference is accepted ([#​597](https://www.github.com/googleapis/python-bigquery/issues/597)) ([c8b5581](https://www.github.com/googleapis/python-bigquery/commit/c8b5581ea3c94005d69755c4a3b5a0d8900f3fe2)) - accept job object as argument to `get_job` and `cancel_job` ([#​617](https://www.github.com/googleapis/python-bigquery/issues/617)) ([f75dcdf](https://www.github.com/googleapis/python-bigquery/commit/f75dcdf3943b87daba60011c9a3b42e34ff81910)) - add `Client.delete_job_metadata` method to remove job metadata ([#​610](https://www.github.com/googleapis/python-bigquery/issues/610)) ([0abb566](https://www.github.com/googleapis/python-bigquery/commit/0abb56669c097c59fbffce007c702e7a55f2d9c1)) - add `max_queue_size` argument to `RowIterator.to_dataframe_iterable` ([#​575](https://www.github.com/googleapis/python-bigquery/issues/575)) ([f95f415](https://www.github.com/googleapis/python-bigquery/commit/f95f415d3441b3928f6cc705cb8a75603d790fd6)) - add type hints for public methods ([#​613](https://www.github.com/googleapis/python-bigquery/issues/613)) ([f8d4aaa](https://www.github.com/googleapis/python-bigquery/commit/f8d4aaa335a0eef915e73596fc9b43b11d11be9f)) - DB API cursors are now iterable ([#​618](https://www.github.com/googleapis/python-bigquery/issues/618)) ([e0b373d](https://www.github.com/googleapis/python-bigquery/commit/e0b373d0e721a70656ed8faceb7f5c70f642d144)) - retry google.auth TransportError by default ([#​624](https://www.github.com/googleapis/python-bigquery/issues/624)) ([34ecc3f](https://www.github.com/googleapis/python-bigquery/commit/34ecc3f1ca0ff073330c0c605673d89b43af7ed9)) - use pyarrow stream compression, if available ([#​593](https://www.github.com/googleapis/python-bigquery/issues/593)) ([dde9dc5](https://www.github.com/googleapis/python-bigquery/commit/dde9dc5114c2311fb76fafc5b222fff561e8abf1)) ##### Bug Fixes - consistent percents handling in DB API query ([#​619](https://www.github.com/googleapis/python-bigquery/issues/619)) ([6502a60](https://www.github.com/googleapis/python-bigquery/commit/6502a602337ae562652a20b20270949f2c9d5073)) - missing license headers in new test files ([#​604](https://www.github.com/googleapis/python-bigquery/issues/604)) ([df48cc5](https://www.github.com/googleapis/python-bigquery/commit/df48cc5a0be99ad39d5835652d1b7422209afc5d)) - unsetting clustering fields on Table is now possible ([#​622](https://www.github.com/googleapis/python-bigquery/issues/622)) ([33a871f](https://www.github.com/googleapis/python-bigquery/commit/33a871f06329f9bf5a6a92fab9ead65bf2bee75d)) ##### Documentation - add sample to run DML query ([#​591](https://www.github.com/googleapis/python-bigquery/issues/591)) ([ff2ec3a](https://www.github.com/googleapis/python-bigquery/commit/ff2ec3abe418a443cd07751c08e654f94e8b3155)) - update the description of the return value of `_QueryResults.rows()` ([#​594](https://www.github.com/googleapis/python-bigquery/issues/594)) ([8f4c0b8](https://www.github.com/googleapis/python-bigquery/commit/8f4c0b84dac3840532d7865247b8ad94b625b897)) ##### [2.13.1](https://www.github.com/googleapis/python-bigquery/compare/v2.13.0...v2.13.1) (2021-03-23) ##### Bug Fixes - add ConnectionError to default retry ([#​571](https://www.github.com/googleapis/python-bigquery/issues/571)) ([a3edb8b](https://www.github.com/googleapis/python-bigquery/commit/a3edb8b921e029e2c03d33302d408ad5d4e9d4ad))
--- ### Configuration :date: **Schedule**: At any time (no schedule defined). :vertical_traffic_light: **Automerge**: Disabled by config. Please merge this manually once you are satisfied. :recycle: **Rebasing**: Renovate will not automatically rebase this PR, because other commits have been found. :no_bell: **Ignore**: Close this PR and you won't be reminded about this update again. --- - [ ] If you want to rebase/retry this PR, check this box. --- This PR has been generated by [WhiteSource Renovate](https://renovate.whitesourcesoftware.com). View repository job log [here](https://app.renovatebot.com/dashboard#github/googleapis/python-bigquery). --- .../samples/geography/requirements.txt | 2 +- .../samples/snippets/conftest.py | 12 ++++++++++-- .../samples/snippets/requirements.txt | 2 +- 3 files changed, 12 insertions(+), 4 deletions(-) diff --git a/packages/google-cloud-bigquery/samples/geography/requirements.txt b/packages/google-cloud-bigquery/samples/geography/requirements.txt index 96819343cbf8..7e017e283086 100644 --- a/packages/google-cloud-bigquery/samples/geography/requirements.txt +++ b/packages/google-cloud-bigquery/samples/geography/requirements.txt @@ -1,4 +1,4 @@ geojson==2.5.0 -google-cloud-bigquery==2.13.1 +google-cloud-bigquery==2.14.0 google-cloud-bigquery-storage==2.1.0 Shapely==1.7.1 diff --git a/packages/google-cloud-bigquery/samples/snippets/conftest.py b/packages/google-cloud-bigquery/samples/snippets/conftest.py index 31c6ba104687..0d0299ee560e 100644 --- a/packages/google-cloud-bigquery/samples/snippets/conftest.py +++ b/packages/google-cloud-bigquery/samples/snippets/conftest.py @@ -20,21 +20,29 @@ RESOURCE_PREFIX = "python_bigquery_samples_snippets" +RESOURCE_DATE_FORMAT = "%Y%m%d_%H%M%S" +RESOURCE_DATE_LENGTH = 4 + 2 + 2 + 1 + 2 + 2 + 2 def resource_prefix() -> str: - timestamp = datetime.datetime.utcnow().strftime("%Y%m%d_%H%M%S") + timestamp = datetime.datetime.utcnow().strftime(RESOURCE_DATE_FORMAT) random_string = hex(random.randrange(1000000))[2:] return f"{RESOURCE_PREFIX}_{timestamp}_{random_string}" +def resource_name_to_date(resource_name: str): + start_date = len(RESOURCE_PREFIX) + 1 + date_string = resource_name[start_date : start_date + RESOURCE_DATE_LENGTH] + return datetime.strptime(date_string, RESOURCE_DATE_FORMAT) + + @pytest.fixture(scope="session", autouse=True) def cleanup_datasets(bigquery_client: bigquery.Client): yesterday = datetime.datetime.utcnow() - datetime.timedelta(days=1) for dataset in bigquery_client.list_datasets(): if ( dataset.dataset_id.startswith(RESOURCE_PREFIX) - and dataset.created < yesterday + and resource_name_to_date(dataset.dataset_id) < yesterday ): bigquery_client.delete_dataset( dataset, delete_contents=True, not_found_ok=True diff --git a/packages/google-cloud-bigquery/samples/snippets/requirements.txt b/packages/google-cloud-bigquery/samples/snippets/requirements.txt index 74a18981e2e9..d7e60f77dd5d 100644 --- a/packages/google-cloud-bigquery/samples/snippets/requirements.txt +++ b/packages/google-cloud-bigquery/samples/snippets/requirements.txt @@ -1,4 +1,4 @@ -google-cloud-bigquery==2.13.1 +google-cloud-bigquery==2.14.0 google-cloud-bigquery-storage==2.3.0 google-auth-oauthlib==0.4.4 grpcio==1.37.0 From 90f22e9cf01e0a1ba7920c888a450e02226530db Mon Sep 17 00:00:00 2001 From: Jim Fulton Date: Tue, 27 Apr 2021 14:15:52 -0600 Subject: [PATCH 1131/2016] fix: The DB API Binary function accepts bytes data (#630) * fix: The DB API Binary function accepts bytes data * Binary should accept bytes-like objects. * check for an integer before converting to bytes. Because we don't want to accidentally create a giant bytes. * blackened. * Fixed exception string. * parameterized binary tests and rearranged imports. * typo * Blackened --- .../google/cloud/bigquery/dbapi/types.py | 20 +++++++++--- .../tests/unit/test_dbapi_types.py | 32 ++++++++++++++++--- 2 files changed, 44 insertions(+), 8 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/dbapi/types.py b/packages/google-cloud-bigquery/google/cloud/bigquery/dbapi/types.py index 20eca9b00c65..717593ae1336 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/dbapi/types.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/dbapi/types.py @@ -30,16 +30,28 @@ TimestampFromTicks = datetime.datetime.fromtimestamp -def Binary(string): +def Binary(data): """Contruct a DB-API binary value. Args: - string (str): A string to encode as a binary value. + data (bytes-like): An object containing binary data and that + can be converted to bytes with the `bytes` builtin. Returns: - bytes: The UTF-8 encoded bytes representing the string. + bytes: The binary data as a bytes object. """ - return string.encode("utf-8") + if isinstance(data, int): + # This is not the conversion we're looking for, because it + # will simply create a bytes object of the given size. + raise TypeError("cannot convert `int` object to binary") + + try: + return bytes(data) + except TypeError: + if isinstance(data, str): + return data.encode("utf-8") + else: + raise def TimeFromTicks(ticks, tz=None): diff --git a/packages/google-cloud-bigquery/tests/unit/test_dbapi_types.py b/packages/google-cloud-bigquery/tests/unit/test_dbapi_types.py index e05660ffed14..cf282c68b989 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_dbapi_types.py +++ b/packages/google-cloud-bigquery/tests/unit/test_dbapi_types.py @@ -15,6 +15,8 @@ import datetime import unittest +import pytest + import google.cloud._helpers from google.cloud.bigquery.dbapi import types @@ -26,10 +28,6 @@ def test_binary_type(self): self.assertEqual("STRUCT", types.BINARY) self.assertNotEqual("STRING", types.BINARY) - def test_binary_constructor(self): - self.assertEqual(types.Binary(u"hello"), b"hello") - self.assertEqual(types.Binary(u"\u1f60"), u"\u1f60".encode("utf-8")) - def test_timefromticks(self): somedatetime = datetime.datetime( 2017, 2, 18, 12, 47, 26, tzinfo=google.cloud._helpers.UTC @@ -40,3 +38,29 @@ def test_timefromticks(self): types.TimeFromTicks(ticks, google.cloud._helpers.UTC), datetime.time(12, 47, 26, tzinfo=google.cloud._helpers.UTC), ) + + +class CustomBinary: + def __bytes__(self): + return b"Google" + + +@pytest.mark.parametrize( + "raw,expected", + [ + (u"hello", b"hello"), + (u"\u1f60", u"\u1f60".encode("utf-8")), + (b"hello", b"hello"), + (bytearray(b"hello"), b"hello"), + (memoryview(b"hello"), b"hello"), + (CustomBinary(), b"Google"), + ], +) +def test_binary_constructor(raw, expected): + assert types.Binary(raw) == expected + + +@pytest.mark.parametrize("bad", (42, 42.0, None)) +def test_invalid_binary_constructor(bad): + with pytest.raises(TypeError): + types.Binary(bad) From 5f709bf07745316709c536d2be758b9dd319fc0c Mon Sep 17 00:00:00 2001 From: WhiteSource Renovate Date: Wed, 28 Apr 2021 16:28:10 +0200 Subject: [PATCH 1132/2016] chore(deps): update dependency google-cloud-bigquery-storage to v2.4.0 (#595) --- .../google-cloud-bigquery/samples/geography/requirements.txt | 2 +- .../google-cloud-bigquery/samples/snippets/requirements.txt | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/packages/google-cloud-bigquery/samples/geography/requirements.txt b/packages/google-cloud-bigquery/samples/geography/requirements.txt index 7e017e283086..f46b141fd0a5 100644 --- a/packages/google-cloud-bigquery/samples/geography/requirements.txt +++ b/packages/google-cloud-bigquery/samples/geography/requirements.txt @@ -1,4 +1,4 @@ geojson==2.5.0 google-cloud-bigquery==2.14.0 -google-cloud-bigquery-storage==2.1.0 +google-cloud-bigquery-storage==2.4.0 Shapely==1.7.1 diff --git a/packages/google-cloud-bigquery/samples/snippets/requirements.txt b/packages/google-cloud-bigquery/samples/snippets/requirements.txt index d7e60f77dd5d..f7b5cebe9ca6 100644 --- a/packages/google-cloud-bigquery/samples/snippets/requirements.txt +++ b/packages/google-cloud-bigquery/samples/snippets/requirements.txt @@ -1,5 +1,5 @@ google-cloud-bigquery==2.14.0 -google-cloud-bigquery-storage==2.3.0 +google-cloud-bigquery-storage==2.4.0 google-auth-oauthlib==0.4.4 grpcio==1.37.0 ipython==7.16.1; python_version < '3.7' From 176947906ac70e6f3e65c4578e821c059981bf25 Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Wed, 28 Apr 2021 15:44:25 -0500 Subject: [PATCH 1133/2016] chore: add yoshi to CODEOWNERS (#634) --- packages/google-cloud-bigquery/.github/CODEOWNERS | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/packages/google-cloud-bigquery/.github/CODEOWNERS b/packages/google-cloud-bigquery/.github/CODEOWNERS index 10f4ee7c0f7a..ae570eb01219 100644 --- a/packages/google-cloud-bigquery/.github/CODEOWNERS +++ b/packages/google-cloud-bigquery/.github/CODEOWNERS @@ -5,8 +5,7 @@ # https://help.github.com/en/github/creating-cloning-and-archiving-repositories/about-code-owners#codeowners-syntax # The @googleapis/api-bigquery is the default owner for changes in this repo -* @googleapis/api-bigquery +* @googleapis/api-bigquery @googleapis/yoshi-python # The python-samples-reviewers team is the default owner for samples changes /samples/ @googleapis/python-samples-owners - From 4ccdac4c672d6356a37f61fba088eb8b5628908a Mon Sep 17 00:00:00 2001 From: Jim Fulton Date: Thu, 29 Apr 2021 07:19:56 -0600 Subject: [PATCH 1134/2016] feat: Extended DB API parameter syntax to optionally provide parameter types (#626) * Added explicit type documentation. * Extended query-parameter system for specifying parameter types.assed. * Serialize non-floats (e.g. Decimals) using in FLOAT64 parameters. Co-authored-by: Tim Swast * De-reference aliases in SqlParameterScalarTypes when checking types Co-authored-by: Tim Swast --- packages/google-cloud-bigquery/docs/dbapi.rst | 37 ++++++ .../google/cloud/bigquery/_helpers.py | 2 +- .../google/cloud/bigquery/dbapi/_helpers.py | 107 ++++++++++++------ .../google/cloud/bigquery/dbapi/cursor.py | 93 +++++++++++++-- .../google/cloud/bigquery/magics/magics.py | 2 +- .../tests/unit/test__helpers.py | 15 +++ .../tests/unit/test_dbapi__helpers.py | 94 +++++++++++++-- .../tests/unit/test_dbapi_cursor.py | 106 +++++++++++++++-- 8 files changed, 396 insertions(+), 60 deletions(-) diff --git a/packages/google-cloud-bigquery/docs/dbapi.rst b/packages/google-cloud-bigquery/docs/dbapi.rst index ca0256d3c8de..41ec85833d49 100644 --- a/packages/google-cloud-bigquery/docs/dbapi.rst +++ b/packages/google-cloud-bigquery/docs/dbapi.rst @@ -4,3 +4,40 @@ DB-API Reference .. automodule:: google.cloud.bigquery.dbapi :members: :show-inheritance: + + +DB-API Query-Parameter Syntax +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +The BigQuery DB-API uses the `qmark` `parameter style +`_ for +unnamed/positional parameters and the `pyformat` parameter style for +named parameters. + +An example of a query using unnamed parameters:: + + insert into people (name, income) values (?, ?) + +and using named parameters:: + + insert into people (name, income) values (%(name)s, %(income)s) + +Providing explicit type information +----------------------------------- + +BigQuery requires type information for parameters. The The BigQuery +DB-API can usually determine parameter types for parameters based on +provided values. Sometimes, however, types can't be determined (for +example when `None` is passed) or are determined incorrectly (for +example when passing a floating-point value to a numeric column). + +The BigQuery DB-API provides an extended parameter syntax. For named +parameters, a BigQuery type is provided after the name separated by a +colon, as in:: + + insert into people (name, income) values (%(name:string)s, %(income:numeric)s) + +For unnamed parameters, use the named syntax with a type, but now +name, as in:: + + insert into people (name, income) values (%(:string)s, %(:numeric)s) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py b/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py index daa14b92a460..ad8e3f00323a 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py @@ -275,7 +275,7 @@ def _int_to_json(value): def _float_to_json(value): """Coerce 'value' to an JSON-compatible representation.""" - return value + return value if value is None else float(value) def _decimal_to_json(value): diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/dbapi/_helpers.py b/packages/google-cloud-bigquery/google/cloud/bigquery/dbapi/_helpers.py index beb3c5e712cb..3b0d8134ccbd 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/dbapi/_helpers.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/dbapi/_helpers.py @@ -20,7 +20,7 @@ import numbers from google.cloud import bigquery -from google.cloud.bigquery import table +from google.cloud.bigquery import table, enums from google.cloud.bigquery.dbapi import exceptions @@ -28,7 +28,28 @@ _NUMERIC_SERVER_MAX = decimal.Decimal("9.9999999999999999999999999999999999999E+28") -def scalar_to_query_parameter(value, name=None): +def _parameter_type(name, value, query_parameter_type=None, value_doc=""): + if query_parameter_type: + try: + parameter_type = getattr( + enums.SqlParameterScalarTypes, query_parameter_type.upper() + )._type + except AttributeError: + raise exceptions.ProgrammingError( + f"The given parameter type, {query_parameter_type}," + f" for {name} is not a valid BigQuery scalar type." + ) + else: + parameter_type = bigquery_scalar_type(value) + if parameter_type is None: + raise exceptions.ProgrammingError( + f"Encountered parameter {name} with " + f"{value_doc} value {value} of unexpected type." + ) + return parameter_type + + +def scalar_to_query_parameter(value, name=None, query_parameter_type=None): """Convert a scalar value into a query parameter. Args: @@ -37,6 +58,7 @@ def scalar_to_query_parameter(value, name=None): name (str): (Optional) Name of the query parameter. + query_parameter_type (Optional[str]): Given type for the parameter. Returns: google.cloud.bigquery.ScalarQueryParameter: @@ -47,24 +69,19 @@ def scalar_to_query_parameter(value, name=None): google.cloud.bigquery.dbapi.exceptions.ProgrammingError: if the type cannot be determined. """ - parameter_type = bigquery_scalar_type(value) - - if parameter_type is None: - raise exceptions.ProgrammingError( - "encountered parameter {} with value {} of unexpected type".format( - name, value - ) - ) - return bigquery.ScalarQueryParameter(name, parameter_type, value) + return bigquery.ScalarQueryParameter( + name, _parameter_type(name, value, query_parameter_type), value + ) -def array_to_query_parameter(value, name=None): +def array_to_query_parameter(value, name=None, query_parameter_type=None): """Convert an array-like value into a query parameter. Args: value (Sequence[Any]): The elements of the array (should not be a string-like Sequence). name (Optional[str]): Name of the query parameter. + query_parameter_type (Optional[str]): Given type for the parameter. Returns: A query parameter corresponding with the type and value of the plain @@ -80,29 +97,30 @@ def array_to_query_parameter(value, name=None): "not string-like.".format(name) ) - if not value: + if query_parameter_type or value: + array_type = _parameter_type( + name, + value[0] if value else None, + query_parameter_type, + value_doc="array element ", + ) + else: raise exceptions.ProgrammingError( "Encountered an empty array-like value of parameter {}, cannot " "determine array elements type.".format(name) ) - # Assume that all elements are of the same type, and let the backend handle - # any type incompatibilities among the array elements - array_type = bigquery_scalar_type(value[0]) - if array_type is None: - raise exceptions.ProgrammingError( - "Encountered unexpected first array element of parameter {}, " - "cannot determine array elements type.".format(name) - ) - return bigquery.ArrayQueryParameter(name, array_type, value) -def to_query_parameters_list(parameters): +def to_query_parameters_list(parameters, parameter_types): """Converts a sequence of parameter values into query parameters. Args: parameters (Sequence[Any]): Sequence of query parameter values. + parameter_types: + A list of parameter types, one for each parameter. + Unknown types are provided as None. Returns: List[google.cloud.bigquery.query._AbstractQueryParameter]: @@ -110,23 +128,27 @@ def to_query_parameters_list(parameters): """ result = [] - for value in parameters: + for value, type_ in zip(parameters, parameter_types): if isinstance(value, collections_abc.Mapping): raise NotImplementedError("STRUCT-like parameter values are not supported.") elif array_like(value): - param = array_to_query_parameter(value) + param = array_to_query_parameter(value, None, type_) else: - param = scalar_to_query_parameter(value) + param = scalar_to_query_parameter(value, None, type_) + result.append(param) return result -def to_query_parameters_dict(parameters): +def to_query_parameters_dict(parameters, query_parameter_types): """Converts a dictionary of parameter values into query parameters. Args: parameters (Mapping[str, Any]): Dictionary of query parameter values. + parameter_types: + A dictionary of parameter types. It needn't have a key for each + parameter. Returns: List[google.cloud.bigquery.query._AbstractQueryParameter]: @@ -140,21 +162,38 @@ def to_query_parameters_dict(parameters): "STRUCT-like parameter values are not supported " "(parameter {}).".format(name) ) - elif array_like(value): - param = array_to_query_parameter(value, name=name) else: - param = scalar_to_query_parameter(value, name=name) + query_parameter_type = query_parameter_types.get(name) + if array_like(value): + param = array_to_query_parameter( + value, name=name, query_parameter_type=query_parameter_type + ) + else: + param = scalar_to_query_parameter( + value, name=name, query_parameter_type=query_parameter_type, + ) + result.append(param) return result -def to_query_parameters(parameters): +def to_query_parameters(parameters, parameter_types): """Converts DB-API parameter values into query parameters. Args: parameters (Union[Mapping[str, Any], Sequence[Any]]): A dictionary or sequence of query parameter values. + parameter_types (Union[Mapping[str, str], Sequence[str]]): + A dictionary or list of parameter types. + + If parameters is a mapping, then this must be a dictionary + of parameter types. It needn't have a key for each + parameter. + + If parameters is a sequence, then this must be a list of + parameter types, one for each paramater. Unknown types + are provided as None. Returns: List[google.cloud.bigquery.query._AbstractQueryParameter]: @@ -164,9 +203,9 @@ def to_query_parameters(parameters): return [] if isinstance(parameters, collections_abc.Mapping): - return to_query_parameters_dict(parameters) - - return to_query_parameters_list(parameters) + return to_query_parameters_dict(parameters, parameter_types) + else: + return to_query_parameters_list(parameters, parameter_types) def bigquery_scalar_type(value): diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/dbapi/cursor.py b/packages/google-cloud-bigquery/google/cloud/bigquery/dbapi/cursor.py index ca78d3907ed8..f74781df9728 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/dbapi/cursor.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/dbapi/cursor.py @@ -18,6 +18,7 @@ from collections import abc as collections_abc import copy import logging +import re try: from google.cloud.bigquery_storage import ArrowSerializationOptions @@ -161,6 +162,14 @@ def execute(self, operation, parameters=None, job_id=None, job_config=None): job_config (google.cloud.bigquery.job.QueryJobConfig): (Optional) Extra configuration options for the query job. """ + formatted_operation, parameter_types = _format_operation(operation, parameters) + self._execute( + formatted_operation, parameters, job_id, job_config, parameter_types + ) + + def _execute( + self, formatted_operation, parameters, job_id, job_config, parameter_types + ): self._query_data = None self._query_job = None client = self.connection._client @@ -169,8 +178,7 @@ def execute(self, operation, parameters=None, job_id=None, job_config=None): # query parameters was not one of the standard options. Convert both # the query and the parameters to the format expected by the client # libraries. - formatted_operation = _format_operation(operation, parameters=parameters) - query_parameters = _helpers.to_query_parameters(parameters) + query_parameters = _helpers.to_query_parameters(parameters, parameter_types) if client._default_query_job_config: if job_config: @@ -209,8 +217,19 @@ def executemany(self, operation, seq_of_parameters): seq_of_parameters (Union[Sequence[Mapping[str, Any], Sequence[Any]]]): Sequence of many sets of parameter values. """ - for parameters in seq_of_parameters: - self.execute(operation, parameters) + if seq_of_parameters: + # There's no reason to format the line more than once, as + # the operation only barely depends on the parameters. So + # we just use the first set of parameters. If there are + # different numbers or types of parameters, we'll error + # anyway. + formatted_operation, parameter_types = _format_operation( + operation, seq_of_parameters[0] + ) + for parameters in seq_of_parameters: + self._execute( + formatted_operation, parameters, None, None, parameter_types + ) def _try_fetch(self, size=None): """Try to start fetching data, if not yet started. @@ -427,7 +446,7 @@ def _format_operation_dict(operation, parameters): raise exceptions.ProgrammingError(exc) -def _format_operation(operation, parameters=None): +def _format_operation(operation, parameters): """Formats parameters in operation in way BigQuery expects. Args: @@ -445,9 +464,67 @@ def _format_operation(operation, parameters=None): ``parameters`` argument. """ if parameters is None or len(parameters) == 0: - return operation.replace("%%", "%") # Still do percent de-escaping. + return operation.replace("%%", "%"), None # Still do percent de-escaping. + + operation, parameter_types = _extract_types(operation) + if parameter_types is None: + raise exceptions.ProgrammingError( + f"Parameters were provided, but {repr(operation)} has no placeholders." + ) if isinstance(parameters, collections_abc.Mapping): - return _format_operation_dict(operation, parameters) + return _format_operation_dict(operation, parameters), parameter_types + + return _format_operation_list(operation, parameters), parameter_types + + +def _extract_types( + operation, extra_type_sub=re.compile(r"(%*)%(?:\(([^:)]*)(?::(\w+))?\))?s").sub +): + """Remove type information from parameter placeholders. + + For every parameter of the form %(name:type)s, replace with %(name)s and add the + item name->type to dict that's returned. + + Returns operation without type information and a dictionary of names and types. + """ + parameter_types = None + + def repl(m): + nonlocal parameter_types + prefix, name, type_ = m.groups() + if len(prefix) % 2: + # The prefix has an odd number of %s, the last of which + # escapes the % we're looking for, so we don't want to + # change anything. + return m.group(0) + + try: + if name: + if not parameter_types: + parameter_types = {} + if type_: + if name in parameter_types: + if type_ != parameter_types[name]: + raise exceptions.ProgrammingError( + f"Conflicting types for {name}: " + f"{parameter_types[name]} and {type_}." + ) + else: + parameter_types[name] = type_ + else: + if not isinstance(parameter_types, dict): + raise TypeError() + + return f"{prefix}%({name})s" + else: + if parameter_types is None: + parameter_types = [] + parameter_types.append(type_) + return f"{prefix}%s" + except (AttributeError, TypeError): + raise exceptions.ProgrammingError( + f"{repr(operation)} mixes named and unamed parameters." + ) - return _format_operation_list(operation, parameters) + return extra_type_sub(repl, operation), parameter_types diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/magics/magics.py b/packages/google-cloud-bigquery/google/cloud/bigquery/magics/magics.py index 6ae7cae12345..474d9a74ae3d 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/magics/magics.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/magics/magics.py @@ -615,7 +615,7 @@ def _cell_magic(line, query): ) raise NameError(msg) - params = _helpers.to_query_parameters(ast.literal_eval(params_option_value)) + params = _helpers.to_query_parameters(ast.literal_eval(params_option_value), {}) project = args.project or context.project diff --git a/packages/google-cloud-bigquery/tests/unit/test__helpers.py b/packages/google-cloud-bigquery/tests/unit/test__helpers.py index 0fdf1142f110..2437f3568277 100644 --- a/packages/google-cloud-bigquery/tests/unit/test__helpers.py +++ b/packages/google-cloud-bigquery/tests/unit/test__helpers.py @@ -1159,3 +1159,18 @@ def fake_isinstance(instance, target_class): "google.cloud.bigquery.schema.isinstance", side_effect=fake_isinstance ) return patcher + + +def test_decimal_as_float_api_repr(): + """Make sure decimals get converted to float.""" + import google.cloud.bigquery.query + from decimal import Decimal + + param = google.cloud.bigquery.query.ScalarQueryParameter( + "x", "FLOAT64", Decimal(42) + ) + assert param.to_api_repr() == { + "parameterType": {"type": "FLOAT64"}, + "parameterValue": {"value": 42.0}, + "name": "x", + } diff --git a/packages/google-cloud-bigquery/tests/unit/test_dbapi__helpers.py b/packages/google-cloud-bigquery/tests/unit/test_dbapi__helpers.py index 4b2724de0bd1..250ba46d981b 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_dbapi__helpers.py +++ b/packages/google-cloud-bigquery/tests/unit/test_dbapi__helpers.py @@ -18,13 +18,15 @@ import operator as op import unittest +import pytest + try: import pyarrow except ImportError: # pragma: NO COVER pyarrow = None import google.cloud._helpers -from google.cloud.bigquery import table +from google.cloud.bigquery import table, enums from google.cloud.bigquery.dbapi import _helpers from google.cloud.bigquery.dbapi import exceptions from tests.unit.helpers import _to_pyarrow @@ -142,7 +144,7 @@ def test_array_to_query_parameter_sequence_w_invalid_elements(self): def test_to_query_parameters_w_dict(self): parameters = {"somebool": True, "somestring": "a-string-value"} - query_parameters = _helpers.to_query_parameters(parameters) + query_parameters = _helpers.to_query_parameters(parameters, {}) query_parameter_tuples = [] for param in query_parameters: query_parameter_tuples.append((param.name, param.type_, param.value)) @@ -158,7 +160,7 @@ def test_to_query_parameters_w_dict(self): def test_to_query_parameters_w_dict_array_param(self): parameters = {"somelist": [10, 20]} - query_parameters = _helpers.to_query_parameters(parameters) + query_parameters = _helpers.to_query_parameters(parameters, {}) self.assertEqual(len(query_parameters), 1) param = query_parameters[0] @@ -171,11 +173,11 @@ def test_to_query_parameters_w_dict_dict_param(self): parameters = {"my_param": {"foo": "bar"}} with self.assertRaises(NotImplementedError): - _helpers.to_query_parameters(parameters) + _helpers.to_query_parameters(parameters, {}) def test_to_query_parameters_w_list(self): parameters = [True, "a-string-value"] - query_parameters = _helpers.to_query_parameters(parameters) + query_parameters = _helpers.to_query_parameters(parameters, [None, None]) query_parameter_tuples = [] for param in query_parameters: query_parameter_tuples.append((param.name, param.type_, param.value)) @@ -186,7 +188,7 @@ def test_to_query_parameters_w_list(self): def test_to_query_parameters_w_list_array_param(self): parameters = [[10, 20]] - query_parameters = _helpers.to_query_parameters(parameters) + query_parameters = _helpers.to_query_parameters(parameters, [None]) self.assertEqual(len(query_parameters), 1) param = query_parameters[0] @@ -199,10 +201,10 @@ def test_to_query_parameters_w_list_dict_param(self): parameters = [{"foo": "bar"}] with self.assertRaises(NotImplementedError): - _helpers.to_query_parameters(parameters) + _helpers.to_query_parameters(parameters, [None]) def test_to_query_parameters_none_argument(self): - query_parameters = _helpers.to_query_parameters(None) + query_parameters = _helpers.to_query_parameters(None, None) self.assertEqual(query_parameters, []) @@ -338,3 +340,79 @@ def test_custom_on_closed_error_type(self): with self.assertRaisesRegex(RuntimeError, "I'm closed!"): instance.instance_method() + + +VALID_BQ_TYPES = [ + (name, getattr(enums.SqlParameterScalarTypes, name)._type) + for name in dir(enums.SqlParameterScalarTypes) + if not name.startswith("_") +] + + +@pytest.mark.parametrize("alias, type_", VALID_BQ_TYPES) +def test_scalar_to_query_parameter_honors_given_type(alias, type_): + from google.cloud import bigquery + + assert _helpers.scalar_to_query_parameter(1.23, None, alias) == ( + bigquery.ScalarQueryParameter(None, type_, 1.23) + ) + assert _helpers.scalar_to_query_parameter(None, "foo", alias) == ( + bigquery.ScalarQueryParameter("foo", type_, None) + ) + + +def test_scalar_to_query_parameter_honors_given_type_errors_on_invalid(): + with pytest.raises( + google.cloud.bigquery.dbapi.exceptions.ProgrammingError, + match="The given parameter type, INT, for foo is not a valid BigQuery scalar type.", + ): + _helpers.scalar_to_query_parameter(None, "foo", "INT") + + +@pytest.mark.parametrize("alias, type_", VALID_BQ_TYPES) +def test_array_to_query_parameter_honors_given_type(alias, type_): + from google.cloud import bigquery + + assert _helpers.array_to_query_parameter([1.23], None, alias) == ( + bigquery.ArrayQueryParameter(None, type_, [1.23]) + ) + assert _helpers.array_to_query_parameter((), "foo", alias) == ( + bigquery.ArrayQueryParameter("foo", type_, ()) + ) + + +def test_array_to_query_parameter_honors_given_type_errors_on_invalid(): + with pytest.raises( + google.cloud.bigquery.dbapi.exceptions.ProgrammingError, + match="The given parameter type, INT, for foo is not a valid BigQuery scalar type.", + ): + _helpers.array_to_query_parameter((), "foo", "INT") + + +def test_to_query_parameters_dict_w_types(): + from google.cloud import bigquery + + assert sorted( + _helpers.to_query_parameters( + dict(i=1, x=1.2, y=None, z=[]), dict(x="numeric", y="string", z="float64") + ), + key=lambda p: p.name, + ) == [ + bigquery.ScalarQueryParameter("i", "INT64", 1), + bigquery.ScalarQueryParameter("x", "NUMERIC", 1.2), + bigquery.ScalarQueryParameter("y", "STRING", None), + bigquery.ArrayQueryParameter("z", "FLOAT64", []), + ] + + +def test_to_query_parameters_list_w_types(): + from google.cloud import bigquery + + assert _helpers.to_query_parameters( + [1, 1.2, None, []], [None, "numeric", "string", "float64"] + ) == [ + bigquery.ScalarQueryParameter(None, "INT64", 1), + bigquery.ScalarQueryParameter(None, "NUMERIC", 1.2), + bigquery.ScalarQueryParameter(None, "STRING", None), + bigquery.ArrayQueryParameter(None, "FLOAT64", []), + ] diff --git a/packages/google-cloud-bigquery/tests/unit/test_dbapi_cursor.py b/packages/google-cloud-bigquery/tests/unit/test_dbapi_cursor.py index 039ef3b4c41e..5afe269efaca 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_dbapi_cursor.py +++ b/packages/google-cloud-bigquery/tests/unit/test_dbapi_cursor.py @@ -12,10 +12,12 @@ # See the License for the specific language governing permissions and # limitations under the License. +import mock import operator as op import unittest -import mock +import pytest + try: import pyarrow @@ -612,6 +614,15 @@ def test_executemany_w_dml(self): self.assertIsNone(cursor.description) self.assertEqual(cursor.rowcount, 12) + def test_executemany_empty(self): + from google.cloud.bigquery.dbapi import connect + + connection = connect(self._mock_client(rows=[], num_dml_affected_rows=12)) + cursor = connection.cursor() + cursor.executemany((), ()) + self.assertIsNone(cursor.description) + self.assertEqual(cursor.rowcount, -1) + def test_is_iterable(self): from google.cloud.bigquery import dbapi @@ -638,13 +649,15 @@ def test_is_iterable(self): def test__format_operation_w_dict(self): from google.cloud.bigquery.dbapi import cursor - formatted_operation = cursor._format_operation( - "SELECT %(somevalue)s, %(a `weird` one)s;", + parameter_types = {} + formatted_operation, parameter_types = cursor._format_operation( + "SELECT %(somevalue)s, %(a `weird` one:STRING)s;", {"somevalue": "hi", "a `weird` one": "world"}, ) self.assertEqual( formatted_operation, "SELECT @`somevalue`, @`a \\`weird\\` one`;" ) + self.assertEqual(parameter_types, {"a `weird` one": "STRING"}) def test__format_operation_w_wrong_dict(self): from google.cloud.bigquery import dbapi @@ -660,7 +673,7 @@ def test__format_operation_w_wrong_dict(self): def test__format_operation_w_redundant_dict_key(self): from google.cloud.bigquery.dbapi import cursor - formatted_operation = cursor._format_operation( + formatted_operation, _ = cursor._format_operation( "SELECT %(somevalue)s;", {"somevalue": "foo", "value-not-used": "bar"} ) self.assertEqual(formatted_operation, "SELECT @`somevalue`;") @@ -668,7 +681,7 @@ def test__format_operation_w_redundant_dict_key(self): def test__format_operation_w_sequence(self): from google.cloud.bigquery.dbapi import cursor - formatted_operation = cursor._format_operation( + formatted_operation, _ = cursor._format_operation( "SELECT %s, %s;", ("hello", "world") ) self.assertEqual(formatted_operation, "SELECT ?, ?;") @@ -698,19 +711,19 @@ def test__format_operation_w_too_long_sequence(self): def test__format_operation_w_empty_dict(self): from google.cloud.bigquery.dbapi import cursor - formatted_operation = cursor._format_operation("SELECT '%f'", {}) + formatted_operation, _ = cursor._format_operation("SELECT '%f'", {}) self.assertEqual(formatted_operation, "SELECT '%f'") def test__format_operation_wo_params_single_percent(self): from google.cloud.bigquery.dbapi import cursor - formatted_operation = cursor._format_operation("SELECT '%'", {}) + formatted_operation, _ = cursor._format_operation("SELECT '%'", {}) self.assertEqual(formatted_operation, "SELECT '%'") def test__format_operation_wo_params_double_percents(self): from google.cloud.bigquery.dbapi import cursor - formatted_operation = cursor._format_operation("SELECT '%%'", {}) + formatted_operation, _ = cursor._format_operation("SELECT '%%'", {}) self.assertEqual(formatted_operation, "SELECT '%'") def test__format_operation_unescaped_percent_w_dict_param(self): @@ -734,3 +747,80 @@ def test__format_operation_unescaped_percent_w_list_param(self): "SELECT %s, %s, '100 %';", ["foo", "bar"], ) + + def test__format_operation_no_placeholders(self): + from google.cloud.bigquery import dbapi + from google.cloud.bigquery.dbapi import cursor + + self.assertRaises( + dbapi.ProgrammingError, + cursor._format_operation, + "SELECT 42", + ["foo", "bar"], + ) + + +@pytest.mark.parametrize( + "inp,expect", + [ + ("", ("", None)), + ("values(%(foo)s, %(bar)s)", ("values(%(foo)s, %(bar)s)", {})), + ( + "values('%%(oof:INT64)s', %(foo)s, %(bar)s)", + ("values('%%(oof:INT64)s', %(foo)s, %(bar)s)", {}), + ), + ( + "values(%(foo:INT64)s, %(bar)s)", + ("values(%(foo)s, %(bar)s)", dict(foo="INT64")), + ), + ( + "values('%%(oof:INT64)s, %(foo:INT64)s, %(foo)s)", + ("values('%%(oof:INT64)s, %(foo)s, %(foo)s)", dict(foo="INT64")), + ), + ( + "values(%(foo:INT64)s, %(foo:INT64)s)", + ("values(%(foo)s, %(foo)s)", dict(foo="INT64")), + ), + ( + "values(%(foo:INT64)s, %(bar:NUMERIC)s) 100 %", + ("values(%(foo)s, %(bar)s) 100 %", dict(foo="INT64", bar="NUMERIC")), + ), + (" %s %()s %(:int64)s ", (" %s %s %s ", [None, None, "int64"])), + (" %%s %s %()s %(:int64)s ", (" %%s %s %s %s ", [None, None, "int64"])), + ( + "values(%%%(foo:INT64)s, %(bar)s)", + ("values(%%%(foo)s, %(bar)s)", dict(foo="INT64")), + ), + ( + "values(%%%%(foo:INT64)s, %(bar)s)", + ("values(%%%%(foo:INT64)s, %(bar)s)", dict()), + ), + ( + "values(%%%%%(foo:INT64)s, %(bar)s)", + ("values(%%%%%(foo)s, %(bar)s)", dict(foo="INT64")), + ), + ], +) +def test__extract_types(inp, expect): + from google.cloud.bigquery.dbapi.cursor import _extract_types as et + + assert et(inp) == expect + + +@pytest.mark.parametrize( + "match,inp", + [ + ( + "Conflicting types for foo: numeric and int64.", + " %(foo:numeric)s %(foo:int64)s ", + ), + (r"' %s %\(foo\)s ' mixes named and unamed parameters.", " %s %(foo)s "), + (r"' %\(foo\)s %s ' mixes named and unamed parameters.", " %(foo)s %s "), + ], +) +def test__extract_types_fail(match, inp): + from google.cloud.bigquery.dbapi.cursor import _extract_types as et + from google.cloud.bigquery.dbapi import exceptions + + with pytest.raises(exceptions.ProgrammingError, match=match): + et(inp) From 5f0473628808f7806bf9b8cc86cec73bc15da49b Mon Sep 17 00:00:00 2001 From: "google-cloud-policy-bot[bot]" <80869356+google-cloud-policy-bot[bot]@users.noreply.github.com> Date: Thu, 29 Apr 2021 10:35:16 -0600 Subject: [PATCH 1135/2016] chore: add SECURITY.md (#636) Co-authored-by: google-cloud-policy-bot[bot] <80869356+google-cloud-policy-bot[bot]@users.noreply.github.com> --- packages/google-cloud-bigquery/SECURITY.md | 7 +++++++ 1 file changed, 7 insertions(+) create mode 100644 packages/google-cloud-bigquery/SECURITY.md diff --git a/packages/google-cloud-bigquery/SECURITY.md b/packages/google-cloud-bigquery/SECURITY.md new file mode 100644 index 000000000000..8b58ae9c01ae --- /dev/null +++ b/packages/google-cloud-bigquery/SECURITY.md @@ -0,0 +1,7 @@ +# Security Policy + +To report a security issue, please use [g.co/vulnz](https://g.co/vulnz). + +The Google Security Team will respond within 5 working days of your report on g.co/vulnz. + +We use g.co/vulnz for our intake, and do coordination and disclosure here using GitHub Security Advisory to privately discuss and fix the issue. From 812b5186381e7109203a987a04259b8d059c91da Mon Sep 17 00:00:00 2001 From: Jim Fulton Date: Thu, 29 Apr 2021 10:35:37 -0600 Subject: [PATCH 1136/2016] fix: add DECIMAL and BIGDECIMAL as aliases for NUMERIC and BIGNUMERIC (#638) * Added decimal types to SqlTypeNames and SqlParameterScalarTypes * Go ahead and alias on the client To convey to the observant that these are aliases, even though they could be used (more or less) directly. * Make sure that DECIMAL data are converted when making API calls. This is mainly as a backstop -- DECIMAL requests should be converted to NUMERIC. * blacken --- .../google-cloud-bigquery/google/cloud/bigquery/_helpers.py | 5 +++++ .../google-cloud-bigquery/google/cloud/bigquery/enums.py | 6 ++++-- 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py b/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py index ad8e3f00323a..4fe29291dc79 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py @@ -363,6 +363,11 @@ def _time_to_json(value): "DATETIME": _datetime_to_json, "DATE": _date_to_json, "TIME": _time_to_json, + # Make sure DECIMAL and BIGDECIMAL are handled, even though + # requests for them should be converted to NUMERIC. Better safe + # than sorry. + "DECIMAL": _decimal_to_json, + "BIGDECIMAL": _decimal_to_json, } diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/enums.py b/packages/google-cloud-bigquery/google/cloud/bigquery/enums.py index b378f091b28c..787c2449d963 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/enums.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/enums.py @@ -203,8 +203,8 @@ class SqlTypeNames(str, enum.Enum): INT64 = "INTEGER" FLOAT = "FLOAT" FLOAT64 = "FLOAT" - NUMERIC = "NUMERIC" - BIGNUMERIC = "BIGNUMERIC" + DECIMAL = NUMERIC = "NUMERIC" + BIGDECIMAL = BIGNUMERIC = "BIGNUMERIC" BOOLEAN = "BOOLEAN" BOOL = "BOOLEAN" GEOGRAPHY = "GEOGRAPHY" # NOTE: not available in legacy types @@ -227,6 +227,8 @@ class SqlParameterScalarTypes: FLOAT64 = ScalarQueryParameterType("FLOAT64") NUMERIC = ScalarQueryParameterType("NUMERIC") BIGNUMERIC = ScalarQueryParameterType("BIGNUMERIC") + DECIMAL = ScalarQueryParameterType("NUMERIC") + BIGDECIMAL = ScalarQueryParameterType("BIGNUMERIC") BOOLEAN = ScalarQueryParameterType("BOOL") BOOL = ScalarQueryParameterType("BOOL") GEOGRAPHY = ScalarQueryParameterType("GEOGRAPHY") From bc6ddefedcc795b0de1bafb1115fdff6a2f54481 Mon Sep 17 00:00:00 2001 From: "release-please[bot]" <55107282+release-please[bot]@users.noreply.github.com> Date: Thu, 29 Apr 2021 17:20:06 +0000 Subject: [PATCH 1137/2016] chore: release 2.15.0 (#637) :robot: I have created a release \*beep\* \*boop\* --- ## [2.15.0](https://www.github.com/googleapis/python-bigquery/compare/v2.14.0...v2.15.0) (2021-04-29) ### Features * Extended DB API parameter syntax to optionally provide parameter types ([#626](https://www.github.com/googleapis/python-bigquery/issues/626)) ([8bcf397](https://www.github.com/googleapis/python-bigquery/commit/8bcf397fbe2527e06317741875a059b109cfcd9c)) ### Bug Fixes * add DECIMAL and BIGDECIMAL as aliases for NUMERIC and BIGNUMERIC ([#638](https://www.github.com/googleapis/python-bigquery/issues/638)) ([aa59023](https://www.github.com/googleapis/python-bigquery/commit/aa59023317b1c63720fb717b3544f755652da58d)) * The DB API Binary function accepts bytes data ([#630](https://www.github.com/googleapis/python-bigquery/issues/630)) ([4396e70](https://www.github.com/googleapis/python-bigquery/commit/4396e70771af6889d3242c37c5ff2e80241023a2)) --- This PR was generated with [Release Please](https://github.com/googleapis/release-please). See [documentation](https://github.com/googleapis/release-please#release-please). --- packages/google-cloud-bigquery/CHANGELOG.md | 13 +++++++++++++ .../google/cloud/bigquery/version.py | 2 +- 2 files changed, 14 insertions(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/CHANGELOG.md b/packages/google-cloud-bigquery/CHANGELOG.md index 9aee40510984..6a222a710685 100644 --- a/packages/google-cloud-bigquery/CHANGELOG.md +++ b/packages/google-cloud-bigquery/CHANGELOG.md @@ -4,6 +4,19 @@ [1]: https://pypi.org/project/google-cloud-bigquery/#history +## [2.15.0](https://www.github.com/googleapis/python-bigquery/compare/v2.14.0...v2.15.0) (2021-04-29) + + +### Features + +* Extended DB API parameter syntax to optionally provide parameter types ([#626](https://www.github.com/googleapis/python-bigquery/issues/626)) ([8bcf397](https://www.github.com/googleapis/python-bigquery/commit/8bcf397fbe2527e06317741875a059b109cfcd9c)) + + +### Bug Fixes + +* add DECIMAL and BIGDECIMAL as aliases for NUMERIC and BIGNUMERIC ([#638](https://www.github.com/googleapis/python-bigquery/issues/638)) ([aa59023](https://www.github.com/googleapis/python-bigquery/commit/aa59023317b1c63720fb717b3544f755652da58d)) +* The DB API Binary function accepts bytes data ([#630](https://www.github.com/googleapis/python-bigquery/issues/630)) ([4396e70](https://www.github.com/googleapis/python-bigquery/commit/4396e70771af6889d3242c37c5ff2e80241023a2)) + ## [2.14.0](https://www.github.com/googleapis/python-bigquery/compare/v2.13.1...v2.14.0) (2021-04-26) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/version.py b/packages/google-cloud-bigquery/google/cloud/bigquery/version.py index ba8b4e8af8c9..a8381fff6ee7 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/version.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/version.py @@ -12,4 +12,4 @@ # See the License for the specific language governing permissions and # limitations under the License. -__version__ = "2.14.0" +__version__ = "2.15.0" From b75f4309abd08c40511cfdcf00ce43eb1fbeec25 Mon Sep 17 00:00:00 2001 From: WhiteSource Renovate Date: Fri, 30 Apr 2021 16:20:25 +0200 Subject: [PATCH 1138/2016] chore(deps): update dependency google-cloud-bigquery to v2.15.0 (#639) --- .../google-cloud-bigquery/samples/geography/requirements.txt | 2 +- .../google-cloud-bigquery/samples/snippets/requirements.txt | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/packages/google-cloud-bigquery/samples/geography/requirements.txt b/packages/google-cloud-bigquery/samples/geography/requirements.txt index f46b141fd0a5..324ece4ef441 100644 --- a/packages/google-cloud-bigquery/samples/geography/requirements.txt +++ b/packages/google-cloud-bigquery/samples/geography/requirements.txt @@ -1,4 +1,4 @@ geojson==2.5.0 -google-cloud-bigquery==2.14.0 +google-cloud-bigquery==2.15.0 google-cloud-bigquery-storage==2.4.0 Shapely==1.7.1 diff --git a/packages/google-cloud-bigquery/samples/snippets/requirements.txt b/packages/google-cloud-bigquery/samples/snippets/requirements.txt index f7b5cebe9ca6..077896cb37c6 100644 --- a/packages/google-cloud-bigquery/samples/snippets/requirements.txt +++ b/packages/google-cloud-bigquery/samples/snippets/requirements.txt @@ -1,4 +1,4 @@ -google-cloud-bigquery==2.14.0 +google-cloud-bigquery==2.15.0 google-cloud-bigquery-storage==2.4.0 google-auth-oauthlib==0.4.4 grpcio==1.37.0 From 52a97eeb899d300de04475b2615192ee343e27b8 Mon Sep 17 00:00:00 2001 From: WhiteSource Renovate Date: Fri, 30 Apr 2021 16:45:10 +0200 Subject: [PATCH 1139/2016] chore(deps): update dependency pyarrow to v4 (#641) --- .../google-cloud-bigquery/samples/snippets/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/samples/snippets/requirements.txt b/packages/google-cloud-bigquery/samples/snippets/requirements.txt index 077896cb37c6..7e04b06b5a81 100644 --- a/packages/google-cloud-bigquery/samples/snippets/requirements.txt +++ b/packages/google-cloud-bigquery/samples/snippets/requirements.txt @@ -8,5 +8,5 @@ matplotlib==3.3.4; python_version < '3.7' matplotlib==3.4.1; python_version >= '3.7' pandas==1.1.5; python_version < '3.7' pandas==1.2.0; python_version >= '3.7' -pyarrow==3.0.0 +pyarrow==4.0.0 pytz==2021.1 From aa418974bfd770bd7e3aec40b70b6b8c4ab0b501 Mon Sep 17 00:00:00 2001 From: WhiteSource Renovate Date: Fri, 30 Apr 2021 16:45:32 +0200 Subject: [PATCH 1140/2016] chore(deps): update dependency grpcio to v1.37.1 (#640) --- .../google-cloud-bigquery/samples/snippets/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/samples/snippets/requirements.txt b/packages/google-cloud-bigquery/samples/snippets/requirements.txt index 7e04b06b5a81..04883477ad7b 100644 --- a/packages/google-cloud-bigquery/samples/snippets/requirements.txt +++ b/packages/google-cloud-bigquery/samples/snippets/requirements.txt @@ -1,7 +1,7 @@ google-cloud-bigquery==2.15.0 google-cloud-bigquery-storage==2.4.0 google-auth-oauthlib==0.4.4 -grpcio==1.37.0 +grpcio==1.37.1 ipython==7.16.1; python_version < '3.7' ipython==7.17.0; python_version >= '3.7' matplotlib==3.3.4; python_version < '3.7' From 0f3df2bb82e25514bfe7a55793e2128b7603d884 Mon Sep 17 00:00:00 2001 From: WhiteSource Renovate Date: Tue, 4 May 2021 19:32:02 +0200 Subject: [PATCH 1141/2016] chore(deps): update dependency pytest to v6.2.4 (#647) --- .../samples/geography/requirements-test.txt | 2 +- .../samples/snippets/requirements-test.txt | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/packages/google-cloud-bigquery/samples/geography/requirements-test.txt b/packages/google-cloud-bigquery/samples/geography/requirements-test.txt index 299d90b65200..b0cf76724270 100644 --- a/packages/google-cloud-bigquery/samples/geography/requirements-test.txt +++ b/packages/google-cloud-bigquery/samples/geography/requirements-test.txt @@ -1,2 +1,2 @@ -pytest==6.2.3 +pytest==6.2.4 mock==4.0.3 diff --git a/packages/google-cloud-bigquery/samples/snippets/requirements-test.txt b/packages/google-cloud-bigquery/samples/snippets/requirements-test.txt index 299d90b65200..b0cf76724270 100644 --- a/packages/google-cloud-bigquery/samples/snippets/requirements-test.txt +++ b/packages/google-cloud-bigquery/samples/snippets/requirements-test.txt @@ -1,2 +1,2 @@ -pytest==6.2.3 +pytest==6.2.4 mock==4.0.3 From 45a03d6cc4d33a05adac92d68a3c78048fe05fad Mon Sep 17 00:00:00 2001 From: Peter Lamut Date: Wed, 5 May 2021 14:41:46 +0200 Subject: [PATCH 1142/2016] feat: add with_name() to ScalarQueryParameterType (#644) * feat: add with_name() to ScalarQueryParameterType * Clarify unsetting a name, add extra test --- .../google/cloud/bigquery/query.py | 15 ++++++++++++++ .../tests/unit/test_query.py | 20 +++++++++++++++++++ 2 files changed, 35 insertions(+) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/query.py b/packages/google-cloud-bigquery/google/cloud/bigquery/query.py index 3751eb12403b..d1e9a45a5b1b 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/query.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/query.py @@ -16,6 +16,7 @@ from collections import OrderedDict import copy +from typing import Union from google.cloud.bigquery.table import _parse_schema_resource from google.cloud.bigquery._helpers import _rows_from_json @@ -119,6 +120,20 @@ def to_api_repr(self): # attributes in the API representation when needed. Here we omit them. return {"type": self._type} + def with_name(self, new_name: Union[str, None]): + """Return a copy of the instance with ``name`` set to ``new_name``. + + Args: + name (Union[str, None]): + The new name of the query parameter type. If ``None``, the existing + name is cleared. + + Returns: + google.cloud.bigquery.query.ScalarQueryParameterType: + A new instance with updated name. + """ + return type(self)(self._type, name=new_name, description=self.description) + def __repr__(self): name = f", name={self.name!r}" if self.name is not None else "" description = ( diff --git a/packages/google-cloud-bigquery/tests/unit/test_query.py b/packages/google-cloud-bigquery/tests/unit/test_query.py index c8be2911f2fb..90fc30b2074f 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_query.py +++ b/packages/google-cloud-bigquery/tests/unit/test_query.py @@ -98,6 +98,26 @@ def test_repr_all_optional_attrs(self): "ScalarQueryParameterType('BYTES', name='foo', description='this is foo')", ) + def test_with_name_returns_copy_w_changed_name(self): + param_type = self._make_one("BOOLEAN", name=None, description="Some checkbox.") + modified_type = param_type.with_name("allow_emails") + + self.assertIsNot(modified_type, param_type) # Result is a copy. + self.assertEqual(modified_type.name, "allow_emails") + + # The rest of the The rest of the fields should have been preserved. + self.assertEqual(modified_type._type, param_type._type) + self.assertEqual(modified_type.description, param_type.description) + + def test_with_name_clearing_the_value(self): + param_type = self._make_one( + "BOOLEAN", name="allow_emails", description="Some checkbox." + ) + modified_type = param_type.with_name(None) + + self.assertIsNone(modified_type.name) + self.assertEqual(param_type.name, "allow_emails") # original unchanged + class Test_ArrayQueryParameterType(unittest.TestCase): @staticmethod From 39bfcdc3d85bb82d8da9abb680d5beecef7be99d Mon Sep 17 00:00:00 2001 From: Peter Lamut Date: Wed, 5 May 2021 15:49:10 +0200 Subject: [PATCH 1143/2016] deps: expand supported pyarrow versions to v4 (#643) * deps: expand supported pyarrow versions to v4 * Expand *all* pyarrow pins. * Constrain pyarrow to v4.0.0+ in Python 3.9 tests --- packages/google-cloud-bigquery/setup.py | 6 +++--- packages/google-cloud-bigquery/testing/constraints-3.9.txt | 7 +++++++ 2 files changed, 10 insertions(+), 3 deletions(-) diff --git a/packages/google-cloud-bigquery/setup.py b/packages/google-cloud-bigquery/setup.py index 607ffb63fbaf..6a6202ef9aa1 100644 --- a/packages/google-cloud-bigquery/setup.py +++ b/packages/google-cloud-bigquery/setup.py @@ -47,10 +47,10 @@ # grpc.Channel.close() method isn't added until 1.32.0. # https://github.com/grpc/grpc/pull/15254 "grpcio >= 1.32.0, < 2.0dev", - "pyarrow >= 1.0.0, < 4.0dev", + "pyarrow >= 1.0.0, < 5.0dev", ], - "pandas": ["pandas>=0.23.0", "pyarrow >= 1.0.0, < 4.0dev"], - "bignumeric_type": ["pyarrow >= 3.0.0, < 4.0dev"], + "pandas": ["pandas>=0.23.0", "pyarrow >= 1.0.0, < 5.0dev"], + "bignumeric_type": ["pyarrow >= 3.0.0, < 5.0dev"], "tqdm": ["tqdm >= 4.7.4, <5.0.0dev"], "opentelemetry": [ "opentelemetry-api >= 0.11b0", diff --git a/packages/google-cloud-bigquery/testing/constraints-3.9.txt b/packages/google-cloud-bigquery/testing/constraints-3.9.txt index e69de29bb2d1..39dc6250ef66 100644 --- a/packages/google-cloud-bigquery/testing/constraints-3.9.txt +++ b/packages/google-cloud-bigquery/testing/constraints-3.9.txt @@ -0,0 +1,7 @@ +# This constraints file is used to make sure that the latest dependency versions +# we claim to support in setup.py are indeed installed in test sessions in the most +# recent Python version supported (3.9 at the time of writing - 2021-05-05). +# +# NOTE: Not comprehensive yet, will eventually be maintained semi-automatically by +# the renovate bot. +pyarrow>=4.0.0 From cb338e13ba11bc40749cdebd59af845b0227c40b Mon Sep 17 00:00:00 2001 From: Peter Lamut Date: Wed, 5 May 2021 17:17:19 +0200 Subject: [PATCH 1144/2016] chore: use file paths for --cov args in noxfile (#648) --- packages/google-cloud-bigquery/noxfile.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/packages/google-cloud-bigquery/noxfile.py b/packages/google-cloud-bigquery/noxfile.py index 7ba081660354..654bbd09399b 100644 --- a/packages/google-cloud-bigquery/noxfile.py +++ b/packages/google-cloud-bigquery/noxfile.py @@ -77,8 +77,8 @@ def default(session, install_extras=True): session.run( "py.test", "--quiet", - "--cov=google.cloud.bigquery", - "--cov=tests.unit", + "--cov=google/cloud/bigquery", + "--cov=tests/unit", "--cov-append", "--cov-config=.coveragerc", "--cov-report=", From 08ee023c2b99970f807e8cf1a9f616f7c3f9fd69 Mon Sep 17 00:00:00 2001 From: "release-please[bot]" <55107282+release-please[bot]@users.noreply.github.com> Date: Thu, 6 May 2021 11:36:10 +0200 Subject: [PATCH 1145/2016] chore: release 2.16.0 (#649) Co-authored-by: release-please[bot] <55107282+release-please[bot]@users.noreply.github.com> --- packages/google-cloud-bigquery/CHANGELOG.md | 12 ++++++++++++ .../google/cloud/bigquery/version.py | 2 +- 2 files changed, 13 insertions(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/CHANGELOG.md b/packages/google-cloud-bigquery/CHANGELOG.md index 6a222a710685..15d594c1b13a 100644 --- a/packages/google-cloud-bigquery/CHANGELOG.md +++ b/packages/google-cloud-bigquery/CHANGELOG.md @@ -4,6 +4,18 @@ [1]: https://pypi.org/project/google-cloud-bigquery/#history +## [2.16.0](https://www.github.com/googleapis/python-bigquery/compare/v2.15.0...v2.16.0) (2021-05-05) + + +### Features + +* add with_name() to ScalarQueryParameterType ([#644](https://www.github.com/googleapis/python-bigquery/issues/644)) ([6cc6876](https://www.github.com/googleapis/python-bigquery/commit/6cc6876eb0e5bf49fdc047256a945dcf1b289576)) + + +### Dependencies + +* expand supported pyarrow versions to v4 ([#643](https://www.github.com/googleapis/python-bigquery/issues/643)) ([9e1d386](https://www.github.com/googleapis/python-bigquery/commit/9e1d3869c2024fe7a8af57ff59838d904ca5db03)) + ## [2.15.0](https://www.github.com/googleapis/python-bigquery/compare/v2.14.0...v2.15.0) (2021-04-29) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/version.py b/packages/google-cloud-bigquery/google/cloud/bigquery/version.py index a8381fff6ee7..a93d72c2b64e 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/version.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/version.py @@ -12,4 +12,4 @@ # See the License for the specific language governing permissions and # limitations under the License. -__version__ = "2.15.0" +__version__ = "2.16.0" From 52849ae89bf36b33d79ea6902fe766afb0ffd80d Mon Sep 17 00:00:00 2001 From: Peter Lamut Date: Fri, 7 May 2021 16:55:55 +0200 Subject: [PATCH 1146/2016] chore: avoid pytype error caused by attrs==21.1.0 (#656) --- packages/google-cloud-bigquery/noxfile.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/packages/google-cloud-bigquery/noxfile.py b/packages/google-cloud-bigquery/noxfile.py index 654bbd09399b..dc77be3b73c3 100644 --- a/packages/google-cloud-bigquery/noxfile.py +++ b/packages/google-cloud-bigquery/noxfile.py @@ -103,6 +103,10 @@ def unit_noextras(session): @nox.session(python=DEFAULT_PYTHON_VERSION) def pytype(session): """Run type checks.""" + # An indirect dependecy attrs==21.1.0 breaks the check, and installing a less + # recent version avoids the error until a possibly better fix is found. + # https://github.com/googleapis/python-bigquery/issues/655 + session.install("attrs==20.3.0") session.install("-e", ".[all]") session.install("ipython") session.install(PYTYPE_VERSION) From 344653a03dbcb4b446648e692b9d30da6a710f88 Mon Sep 17 00:00:00 2001 From: Bu Sun Kim <8822365+busunkim96@users.noreply.github.com> Date: Fri, 7 May 2021 16:14:02 -0600 Subject: [PATCH 1147/2016] chore: use 3.8 for blacken session (#653) The Autosynth build now has 3.8: https://github.com/googleapis/synthtool/commit/fd33d7df9ecfc79cc6dbe552b497a4fb36f2e635#diff-f80f936e0eac73417c05535c764a44906afd70a37096ea3c58934a9f6f1e7fcd Should fix unexpected style in #651 --- packages/google-cloud-bigquery/noxfile.py | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/packages/google-cloud-bigquery/noxfile.py b/packages/google-cloud-bigquery/noxfile.py index dc77be3b73c3..a52025635812 100644 --- a/packages/google-cloud-bigquery/noxfile.py +++ b/packages/google-cloud-bigquery/noxfile.py @@ -257,15 +257,12 @@ def lint_setup_py(session): session.run("python", "setup.py", "check", "--restructuredtext", "--strict") -@nox.session(python="3.6") +@nox.session(python=DEFAULT_PYTHON_VERSION) def blacken(session): """Run black. Format code to uniform standard. - - This currently uses Python 3.6 due to the automated Kokoro run of synthtool. - That run uses an image that doesn't have 3.6 installed. Before updating this - check the state of the `gcp_ubuntu_config` we use for that Kokoro run. """ + session.install(BLACK_VERSION) session.run("black", *BLACK_PATHS) From 2dc63e8ed60246400af4ff7a47750489a2500a45 Mon Sep 17 00:00:00 2001 From: Anthonios Partheniou Date: Sun, 9 May 2021 04:05:30 -0400 Subject: [PATCH 1148/2016] chore: add library type to .repo-metadata.json (#658) --- packages/google-cloud-bigquery/.repo-metadata.json | 1 + 1 file changed, 1 insertion(+) diff --git a/packages/google-cloud-bigquery/.repo-metadata.json b/packages/google-cloud-bigquery/.repo-metadata.json index f50dbbeb2b51..f132056d5a39 100644 --- a/packages/google-cloud-bigquery/.repo-metadata.json +++ b/packages/google-cloud-bigquery/.repo-metadata.json @@ -6,6 +6,7 @@ "issue_tracker": "https://issuetracker.google.com/savedsearches/559654", "release_level": "ga", "language": "python", + "library_type": "GAPIC_COMBO", "repo": "googleapis/python-bigquery", "distribution_name": "google-cloud-bigquery", "api_id": "bigquery.googleapis.com", From bbf9d5cfb1ee9f51d9f0b60f918a41ffb5b5df86 Mon Sep 17 00:00:00 2001 From: Jim Fulton Date: Wed, 12 May 2021 04:22:20 -0600 Subject: [PATCH 1149/2016] fix: executemany rowcount only reflected the last execution (#660) --- .../google/cloud/bigquery/dbapi/cursor.py | 4 ++++ .../google-cloud-bigquery/tests/unit/test_dbapi_cursor.py | 2 +- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/dbapi/cursor.py b/packages/google-cloud-bigquery/google/cloud/bigquery/dbapi/cursor.py index f74781df9728..c8fc49378e2b 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/dbapi/cursor.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/dbapi/cursor.py @@ -218,6 +218,7 @@ def executemany(self, operation, seq_of_parameters): Sequence of many sets of parameter values. """ if seq_of_parameters: + rowcount = 0 # There's no reason to format the line more than once, as # the operation only barely depends on the parameters. So # we just use the first set of parameters. If there are @@ -230,6 +231,9 @@ def executemany(self, operation, seq_of_parameters): self._execute( formatted_operation, parameters, None, None, parameter_types ) + rowcount += self.rowcount + + self.rowcount = rowcount def _try_fetch(self, size=None): """Try to start fetching data, if not yet started. diff --git a/packages/google-cloud-bigquery/tests/unit/test_dbapi_cursor.py b/packages/google-cloud-bigquery/tests/unit/test_dbapi_cursor.py index 5afe269efaca..55e45325443b 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_dbapi_cursor.py +++ b/packages/google-cloud-bigquery/tests/unit/test_dbapi_cursor.py @@ -612,7 +612,7 @@ def test_executemany_w_dml(self): (("test",), ("anothertest",)), ) self.assertIsNone(cursor.description) - self.assertEqual(cursor.rowcount, 12) + self.assertEqual(cursor.rowcount, 24) # 24 because 2 * 12 because cumulatve. def test_executemany_empty(self): from google.cloud.bigquery.dbapi import connect From 0442ded7cea40f23f7c51ed0b9c4f7b51a7b0592 Mon Sep 17 00:00:00 2001 From: "release-please[bot]" <55107282+release-please[bot]@users.noreply.github.com> Date: Thu, 13 May 2021 09:17:21 +0200 Subject: [PATCH 1150/2016] chore: release 2.16.1 (#662) Co-authored-by: release-please[bot] <55107282+release-please[bot]@users.noreply.github.com> --- packages/google-cloud-bigquery/CHANGELOG.md | 7 +++++++ .../google-cloud-bigquery/google/cloud/bigquery/version.py | 2 +- 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/CHANGELOG.md b/packages/google-cloud-bigquery/CHANGELOG.md index 15d594c1b13a..ef184dffb3f9 100644 --- a/packages/google-cloud-bigquery/CHANGELOG.md +++ b/packages/google-cloud-bigquery/CHANGELOG.md @@ -4,6 +4,13 @@ [1]: https://pypi.org/project/google-cloud-bigquery/#history +### [2.16.1](https://www.github.com/googleapis/python-bigquery/compare/v2.16.0...v2.16.1) (2021-05-12) + + +### Bug Fixes + +* executemany rowcount only reflected the last execution ([#660](https://www.github.com/googleapis/python-bigquery/issues/660)) ([aeadc8c](https://www.github.com/googleapis/python-bigquery/commit/aeadc8c2d614bb9f0883ec901fca48930f3aaf19)) + ## [2.16.0](https://www.github.com/googleapis/python-bigquery/compare/v2.15.0...v2.16.0) (2021-05-05) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/version.py b/packages/google-cloud-bigquery/google/cloud/bigquery/version.py index a93d72c2b64e..61e0c0a8360c 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/version.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/version.py @@ -12,4 +12,4 @@ # See the License for the specific language governing permissions and # limitations under the License. -__version__ = "2.16.0" +__version__ = "2.16.1" From 6a1951b3ac3173bdf731e53ee98c85bf94299992 Mon Sep 17 00:00:00 2001 From: WhiteSource Renovate Date: Thu, 13 May 2021 12:30:51 +0200 Subject: [PATCH 1151/2016] chore(deps): update dependency google-cloud-bigquery to v2.16.1 (#652) --- .../google-cloud-bigquery/samples/geography/requirements.txt | 2 +- .../google-cloud-bigquery/samples/snippets/requirements.txt | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/packages/google-cloud-bigquery/samples/geography/requirements.txt b/packages/google-cloud-bigquery/samples/geography/requirements.txt index 324ece4ef441..e494fbaaeb1e 100644 --- a/packages/google-cloud-bigquery/samples/geography/requirements.txt +++ b/packages/google-cloud-bigquery/samples/geography/requirements.txt @@ -1,4 +1,4 @@ geojson==2.5.0 -google-cloud-bigquery==2.15.0 +google-cloud-bigquery==2.16.1 google-cloud-bigquery-storage==2.4.0 Shapely==1.7.1 diff --git a/packages/google-cloud-bigquery/samples/snippets/requirements.txt b/packages/google-cloud-bigquery/samples/snippets/requirements.txt index 04883477ad7b..2dfee39b5590 100644 --- a/packages/google-cloud-bigquery/samples/snippets/requirements.txt +++ b/packages/google-cloud-bigquery/samples/snippets/requirements.txt @@ -1,4 +1,4 @@ -google-cloud-bigquery==2.15.0 +google-cloud-bigquery==2.16.1 google-cloud-bigquery-storage==2.4.0 google-auth-oauthlib==0.4.4 grpcio==1.37.1 From d1f8a31aaf8add2e756487d43efbcd51bb8a84d8 Mon Sep 17 00:00:00 2001 From: Anthonios Partheniou Date: Thu, 13 May 2021 10:59:52 -0400 Subject: [PATCH 1152/2016] chore: migrate to owl bot (#663) * chore: migrate to owl bot * chore: copy files from googleapis-gen f2de93abafa306b2ebadf1d10d947db8bcf2bf15 * chore: run the post processor --- .../.github/.OwlBot.lock.yaml | 4 + .../.github/.OwlBot.yaml | 26 ++++ .../.pre-commit-config.yaml | 2 +- .../google-cloud-bigquery/CONTRIBUTING.rst | 16 +-- .../{synth.py => owlbot.py} | 110 +++++++------- packages/google-cloud-bigquery/synth.metadata | 134 ------------------ 6 files changed, 85 insertions(+), 207 deletions(-) create mode 100644 packages/google-cloud-bigquery/.github/.OwlBot.lock.yaml create mode 100644 packages/google-cloud-bigquery/.github/.OwlBot.yaml rename packages/google-cloud-bigquery/{synth.py => owlbot.py} (60%) delete mode 100644 packages/google-cloud-bigquery/synth.metadata diff --git a/packages/google-cloud-bigquery/.github/.OwlBot.lock.yaml b/packages/google-cloud-bigquery/.github/.OwlBot.lock.yaml new file mode 100644 index 000000000000..d49860b32e70 --- /dev/null +++ b/packages/google-cloud-bigquery/.github/.OwlBot.lock.yaml @@ -0,0 +1,4 @@ +docker: + digest: sha256:457583330eec64daa02aeb7a72a04d33e7be2428f646671ce4045dcbc0191b1e + image: gcr.io/repo-automation-bots/owlbot-python:latest + diff --git a/packages/google-cloud-bigquery/.github/.OwlBot.yaml b/packages/google-cloud-bigquery/.github/.OwlBot.yaml new file mode 100644 index 000000000000..2b6451c193d1 --- /dev/null +++ b/packages/google-cloud-bigquery/.github/.OwlBot.yaml @@ -0,0 +1,26 @@ +# Copyright 2021 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +docker: + image: gcr.io/repo-automation-bots/owlbot-python:latest + +deep-remove-regex: + - /owl-bot-staging + +deep-copy-regex: + - source: /google/cloud/bigquery/(v.*)/.*-py/(.*) + dest: /owl-bot-staging/$1/$2 + +begin-after-commit-hash: f2de93abafa306b2ebadf1d10d947db8bcf2bf15 + diff --git a/packages/google-cloud-bigquery/.pre-commit-config.yaml b/packages/google-cloud-bigquery/.pre-commit-config.yaml index 8912e9b5d7d7..1bbd787833ec 100644 --- a/packages/google-cloud-bigquery/.pre-commit-config.yaml +++ b/packages/google-cloud-bigquery/.pre-commit-config.yaml @@ -26,6 +26,6 @@ repos: hooks: - id: black - repo: https://gitlab.com/pycqa/flake8 - rev: 3.9.0 + rev: 3.9.1 hooks: - id: flake8 diff --git a/packages/google-cloud-bigquery/CONTRIBUTING.rst b/packages/google-cloud-bigquery/CONTRIBUTING.rst index a0e330e44c1e..20ba9e62efee 100644 --- a/packages/google-cloud-bigquery/CONTRIBUTING.rst +++ b/packages/google-cloud-bigquery/CONTRIBUTING.rst @@ -160,21 +160,7 @@ Running System Tests auth settings and change some configuration in your project to run all the tests. -- System tests will be run against an actual project and - so you'll need to provide some environment variables to facilitate - authentication to your project: - - - ``GOOGLE_APPLICATION_CREDENTIALS``: The path to a JSON key file; - Such a file can be downloaded directly from the developer's console by clicking - "Generate new JSON key". See private key - `docs `__ - for more details. - -- Once you have downloaded your json keys, set the environment variable - ``GOOGLE_APPLICATION_CREDENTIALS`` to the absolute path of the json file:: - - $ export GOOGLE_APPLICATION_CREDENTIALS="/Users//path/to/app_credentials.json" - +- System tests will be run against an actual project. You should use local credentials from gcloud when possible. See `Best practices for application authentication `__. Some tests require a service account. For those tests see `Authenticating as a service account `__. ************* Test Coverage diff --git a/packages/google-cloud-bigquery/synth.py b/packages/google-cloud-bigquery/owlbot.py similarity index 60% rename from packages/google-cloud-bigquery/synth.py rename to packages/google-cloud-bigquery/owlbot.py index d99f368cc4f2..f45c24fbb51d 100644 --- a/packages/google-cloud-bigquery/synth.py +++ b/packages/google-cloud-bigquery/owlbot.py @@ -19,36 +19,61 @@ from synthtool import gcp from synthtool.languages import python -gapic = gcp.GAPICBazel() common = gcp.CommonTemplates() -version = "v2" -library = gapic.py_library( - service="bigquery", - version=version, - bazel_target=f"//google/cloud/bigquery/{version}:bigquery-{version}-py", - include_protos=True, -) - -s.move( - library, - excludes=[ - "*.tar.gz", - "docs/index.rst", - "docs/bigquery_v2/*_service.rst", - "docs/bigquery_v2/services.rst", - "README.rst", - "noxfile.py", - "setup.py", - "scripts/fixup_bigquery_v2_keywords.py", - library / f"google/cloud/bigquery/__init__.py", - library / f"google/cloud/bigquery/py.typed", - # There are no public API endpoints for the generated ModelServiceClient, - # thus there's no point in generating it and its tests. - library / f"google/cloud/bigquery_{version}/services/**", - library / f"tests/unit/gapic/bigquery_{version}/**", - ], -) +default_version = "v2" + +for library in s.get_staging_dirs(default_version): + # Do not expose ModelServiceClient, as there is no public API endpoint for the + # models service. + s.replace( + library / f"google/cloud/bigquery_{library.name}/__init__.py", + r"from \.services\.model_service import ModelServiceClient", + "", + ) + s.replace( + library / f"google/cloud/bigquery_{library.name}/__init__.py", + r"""["']ModelServiceClient["'],""", + "", + ) + + # Adjust Model docstring so that Sphinx does not think that "predicted_" is + # a reference to something, issuing a false warning. + s.replace( + library / f"google/cloud/bigquery_{library.name}/types/model.py", + r'will have a "predicted_"', + "will have a `predicted_`", + ) + + # Avoid breaking change due to change in field renames. + # https://github.com/googleapis/python-bigquery/issues/319 + s.replace( + library / f"google/cloud/bigquery_{library.name}/types/standard_sql.py", + r"type_ ", + "type " + ) + + s.move( + library, + excludes=[ + "*.tar.gz", + "docs/index.rst", + f"docs/bigquery_{library.name}/*_service.rst", + f"docs/bigquery_{library.name}/services.rst", + "README.rst", + "noxfile.py", + "setup.py", + f"scripts/fixup_bigquery_{library.name}_keywords.py", + f"google/cloud/bigquery/__init__.py", + f"google/cloud/bigquery/py.typed", + # There are no public API endpoints for the generated ModelServiceClient, + # thus there's no point in generating it and its tests. + f"google/cloud/bigquery_{library.name}/services/**", + f"tests/unit/gapic/bigquery_{library.name}/**", + ], + ) + +s.remove_staging_dirs() # ---------------------------------------------------------------------------- # Add templated files @@ -79,41 +104,12 @@ python.py_samples() -# Do not expose ModelServiceClient, as there is no public API endpoint for the -# models service. -s.replace( - "google/cloud/bigquery_v2/__init__.py", - r"from \.services\.model_service import ModelServiceClient", - "", -) -s.replace( - "google/cloud/bigquery_v2/__init__.py", - r"""["']ModelServiceClient["'],""", - "", -) - -# Adjust Model docstring so that Sphinx does not think that "predicted_" is -# a reference to something, issuing a false warning. -s.replace( - "google/cloud/bigquery_v2/types/model.py", - r'will have a "predicted_"', - "will have a `predicted_`", -) - s.replace( "docs/conf.py", r'\{"members": True\}', '{"members": True, "inherited-members": True}' ) -# Avoid breaking change due to change in field renames. -# https://github.com/googleapis/python-bigquery/issues/319 -s.replace( - "google/cloud/bigquery_v2/types/standard_sql.py", - r"type_ ", - "type " -) - # Tell Sphinx to ingore autogenerated docs files. s.replace( "docs/conf.py", diff --git a/packages/google-cloud-bigquery/synth.metadata b/packages/google-cloud-bigquery/synth.metadata deleted file mode 100644 index b031618b0e0b..000000000000 --- a/packages/google-cloud-bigquery/synth.metadata +++ /dev/null @@ -1,134 +0,0 @@ -{ - "sources": [ - { - "git": { - "name": ".", - "remote": "https://github.com/googleapis/python-bigquery.git", - "sha": "f95f415d3441b3928f6cc705cb8a75603d790fd6" - } - }, - { - "git": { - "name": "googleapis", - "remote": "https://github.com/googleapis/googleapis.git", - "sha": "28a591963253d52ce3a25a918cafbdd9928de8cf", - "internalRef": "361662015" - } - }, - { - "git": { - "name": "synthtool", - "remote": "https://github.com/googleapis/synthtool.git", - "sha": "043cc620d6a6111816d9e09f2a97208565fde958" - } - }, - { - "git": { - "name": "synthtool", - "remote": "https://github.com/googleapis/synthtool.git", - "sha": "043cc620d6a6111816d9e09f2a97208565fde958" - } - } - ], - "destinations": [ - { - "client": { - "source": "googleapis", - "apiName": "bigquery", - "apiVersion": "v2", - "language": "python", - "generator": "bazel" - } - } - ], - "generatedFiles": [ - ".coveragerc", - ".flake8", - ".github/CONTRIBUTING.md", - ".github/ISSUE_TEMPLATE/bug_report.md", - ".github/ISSUE_TEMPLATE/feature_request.md", - ".github/ISSUE_TEMPLATE/support_request.md", - ".github/PULL_REQUEST_TEMPLATE.md", - ".github/header-checker-lint.yml", - ".github/release-please.yml", - ".github/snippet-bot.yml", - ".gitignore", - ".kokoro/build.sh", - ".kokoro/continuous/common.cfg", - ".kokoro/continuous/continuous.cfg", - ".kokoro/docker/docs/Dockerfile", - ".kokoro/docker/docs/fetch_gpg_keys.sh", - ".kokoro/docs/common.cfg", - ".kokoro/docs/docs-presubmit.cfg", - ".kokoro/docs/docs.cfg", - ".kokoro/populate-secrets.sh", - ".kokoro/presubmit/common.cfg", - ".kokoro/presubmit/system-3.8.cfg", - ".kokoro/publish-docs.sh", - ".kokoro/release.sh", - ".kokoro/release/common.cfg", - ".kokoro/release/release.cfg", - ".kokoro/samples/lint/common.cfg", - ".kokoro/samples/lint/continuous.cfg", - ".kokoro/samples/lint/periodic.cfg", - ".kokoro/samples/lint/presubmit.cfg", - ".kokoro/samples/python3.6/common.cfg", - ".kokoro/samples/python3.6/continuous.cfg", - ".kokoro/samples/python3.6/periodic-head.cfg", - ".kokoro/samples/python3.6/periodic.cfg", - ".kokoro/samples/python3.6/presubmit.cfg", - ".kokoro/samples/python3.7/common.cfg", - ".kokoro/samples/python3.7/continuous.cfg", - ".kokoro/samples/python3.7/periodic-head.cfg", - ".kokoro/samples/python3.7/periodic.cfg", - ".kokoro/samples/python3.7/presubmit.cfg", - ".kokoro/samples/python3.8/common.cfg", - ".kokoro/samples/python3.8/continuous.cfg", - ".kokoro/samples/python3.8/periodic-head.cfg", - ".kokoro/samples/python3.8/periodic.cfg", - ".kokoro/samples/python3.8/presubmit.cfg", - ".kokoro/test-samples-against-head.sh", - ".kokoro/test-samples-impl.sh", - ".kokoro/test-samples.sh", - ".kokoro/trampoline.sh", - ".kokoro/trampoline_v2.sh", - ".pre-commit-config.yaml", - ".trampolinerc", - "CODE_OF_CONDUCT.md", - "CONTRIBUTING.rst", - "LICENSE", - "MANIFEST.in", - "docs/_static/custom.css", - "docs/_templates/layout.html", - "docs/bigquery_v2/types.rst", - "docs/conf.py", - "google/cloud/bigquery_v2/__init__.py", - "google/cloud/bigquery_v2/proto/encryption_config.proto", - "google/cloud/bigquery_v2/proto/model.proto", - "google/cloud/bigquery_v2/proto/model_reference.proto", - "google/cloud/bigquery_v2/proto/standard_sql.proto", - "google/cloud/bigquery_v2/proto/table_reference.proto", - "google/cloud/bigquery_v2/py.typed", - "google/cloud/bigquery_v2/types/__init__.py", - "google/cloud/bigquery_v2/types/encryption_config.py", - "google/cloud/bigquery_v2/types/model.py", - "google/cloud/bigquery_v2/types/model_reference.py", - "google/cloud/bigquery_v2/types/standard_sql.py", - "google/cloud/bigquery_v2/types/table_reference.py", - "mypy.ini", - "renovate.json", - "samples/AUTHORING_GUIDE.md", - "samples/CONTRIBUTING.md", - "samples/geography/noxfile.py", - "samples/snippets/noxfile.py", - "scripts/decrypt-secrets.sh", - "scripts/readme-gen/readme_gen.py", - "scripts/readme-gen/templates/README.tmpl.rst", - "scripts/readme-gen/templates/auth.tmpl.rst", - "scripts/readme-gen/templates/auth_api_key.tmpl.rst", - "scripts/readme-gen/templates/install_deps.tmpl.rst", - "scripts/readme-gen/templates/install_portaudio.tmpl.rst", - "setup.cfg", - "testing/.gitignore" - ] -} \ No newline at end of file From 5b3bf17de8aefe457f604b664a3efde9d9c26890 Mon Sep 17 00:00:00 2001 From: "gcf-owl-bot[bot]" <78513119+gcf-owl-bot[bot]@users.noreply.github.com> Date: Sun, 16 May 2021 11:14:01 +0000 Subject: [PATCH 1153/2016] chore: new owl bot post processor docker image (#665) gcr.io/repo-automation-bots/owlbot-python:latest@sha256:4c981a6b6f2b8914a448d7b3a01688365be03e3ed26dfee399a6aa77fb112eaa --- packages/google-cloud-bigquery/.github/.OwlBot.lock.yaml | 5 ++--- packages/google-cloud-bigquery/.pre-commit-config.yaml | 2 +- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/packages/google-cloud-bigquery/.github/.OwlBot.lock.yaml b/packages/google-cloud-bigquery/.github/.OwlBot.lock.yaml index d49860b32e70..864c17653f80 100644 --- a/packages/google-cloud-bigquery/.github/.OwlBot.lock.yaml +++ b/packages/google-cloud-bigquery/.github/.OwlBot.lock.yaml @@ -1,4 +1,3 @@ docker: - digest: sha256:457583330eec64daa02aeb7a72a04d33e7be2428f646671ce4045dcbc0191b1e - image: gcr.io/repo-automation-bots/owlbot-python:latest - + image: gcr.io/repo-automation-bots/owlbot-python:latest + digest: sha256:4c981a6b6f2b8914a448d7b3a01688365be03e3ed26dfee399a6aa77fb112eaa diff --git a/packages/google-cloud-bigquery/.pre-commit-config.yaml b/packages/google-cloud-bigquery/.pre-commit-config.yaml index 1bbd787833ec..4f00c7cffcfd 100644 --- a/packages/google-cloud-bigquery/.pre-commit-config.yaml +++ b/packages/google-cloud-bigquery/.pre-commit-config.yaml @@ -26,6 +26,6 @@ repos: hooks: - id: black - repo: https://gitlab.com/pycqa/flake8 - rev: 3.9.1 + rev: 3.9.2 hooks: - id: flake8 From f8d4d20ab281e1182eec1140860395122e777bda Mon Sep 17 00:00:00 2001 From: Peter Lamut Date: Thu, 20 May 2021 09:56:53 +0200 Subject: [PATCH 1154/2016] feat: detect obsolete BQ Storage extra at runtime (#666) * feat: detect obsolete BQ Storage extra at runtime * Cover the changes with unit tests * Skip BQ Storage version tests if extra missing * Rename and improve _create_bqstorage_client() The method is renamed to _ensure_bqstorage_client() and now performs a check if BQ Storage dependency is recent enough. * Remove BQ Storage check from dbapi.Cursor The check is now performed in dbapi.Connection, which is sufficient. * Remove BQ Storage check in _pandas_helpers The methods in higher layers already do the same check before a BQ Storage client instance is passed to _pandas_helpers._download_table_bqstorage() helper. * Simplify BQ Storage client factory in magics Lean more heavily on client._ensure_bqstorage_client() to de-duplicate logic. * Cover missing code lines with tests --- .../google/cloud/bigquery/__init__.py | 3 + .../google/cloud/bigquery/_helpers.py | 30 ++++++++ .../google/cloud/bigquery/client.py | 57 ++++++++++++-- .../google/cloud/bigquery/dbapi/connection.py | 6 +- .../google/cloud/bigquery/exceptions.py | 21 +++++ .../google/cloud/bigquery/magics/magics.py | 11 ++- .../google/cloud/bigquery/table.py | 14 +++- .../tests/unit/test__helpers.py | 38 ++++++++++ .../tests/unit/test_client.py | 76 +++++++++++++++++-- .../tests/unit/test_dbapi_connection.py | 20 ++++- .../tests/unit/test_dbapi_cursor.py | 12 ++- .../tests/unit/test_magics.py | 44 ++++++++++- .../tests/unit/test_table.py | 61 ++++++++++++--- 13 files changed, 357 insertions(+), 36 deletions(-) create mode 100644 packages/google-cloud-bigquery/google/cloud/bigquery/exceptions.py diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/__init__.py b/packages/google-cloud-bigquery/google/cloud/bigquery/__init__.py index f609468f5844..ec08b2c843ec 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/__init__.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/__init__.py @@ -39,6 +39,7 @@ from google.cloud.bigquery import enums from google.cloud.bigquery.enums import SqlTypeNames from google.cloud.bigquery.enums import StandardSqlDataTypes +from google.cloud.bigquery.exceptions import LegacyBigQueryStorageError from google.cloud.bigquery.external_config import ExternalConfig from google.cloud.bigquery.external_config import BigtableOptions from google.cloud.bigquery.external_config import BigtableColumnFamily @@ -152,6 +153,8 @@ "WriteDisposition", # EncryptionConfiguration "EncryptionConfiguration", + # Custom exceptions + "LegacyBigQueryStorageError", ] diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py b/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py index 4fe29291dc79..7602483c2edf 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py @@ -25,6 +25,10 @@ from google.cloud._helpers import _RFC3339_MICROS from google.cloud._helpers import _RFC3339_NO_FRACTION from google.cloud._helpers import _to_bytes +import pkg_resources + +from google.cloud.bigquery.exceptions import LegacyBigQueryStorageError + _RFC3339_MICROS_NO_ZULU = "%Y-%m-%dT%H:%M:%S.%f" _TIMEONLY_WO_MICROS = "%H:%M:%S" @@ -36,6 +40,32 @@ re.VERBOSE, ) +_MIN_BQ_STORAGE_VERSION = pkg_resources.parse_version("2.0.0") + + +def _verify_bq_storage_version(): + """Verify that a recent enough version of BigQuery Storage extra is installed. + + The function assumes that google-cloud-bigquery-storage extra is installed, and + should thus be used in places where this assumption holds. + + Because `pip` can install an outdated version of this extra despite the constraints + in setup.py, the the calling code can use this helper to verify the version + compatibility at runtime. + """ + from google.cloud import bigquery_storage + + installed_version = pkg_resources.parse_version( + getattr(bigquery_storage, "__version__", "legacy") + ) + + if installed_version < _MIN_BQ_STORAGE_VERSION: + msg = ( + "Dependency google-cloud-bigquery-storage is outdated, please upgrade " + f"it to version >= 2.0.0 (version found: {installed_version})." + ) + raise LegacyBigQueryStorageError(msg) + def _not_null(value, field): """Check whether 'value' should be coerced to 'field' type.""" diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py index 8d0acb86787a..7ef3795a889a 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py @@ -50,16 +50,25 @@ from google.cloud import exceptions # pytype: disable=import-error from google.cloud.client import ClientWithProject # pytype: disable=import-error +try: + from google.cloud.bigquery_storage_v1.services.big_query_read.client import ( + DEFAULT_CLIENT_INFO as DEFAULT_BQSTORAGE_CLIENT_INFO, + ) +except ImportError: + DEFAULT_BQSTORAGE_CLIENT_INFO = None + from google.cloud.bigquery._helpers import _del_sub_prop from google.cloud.bigquery._helpers import _get_sub_prop from google.cloud.bigquery._helpers import _record_field_to_json from google.cloud.bigquery._helpers import _str_or_none +from google.cloud.bigquery._helpers import _verify_bq_storage_version from google.cloud.bigquery._helpers import _verify_job_config_type from google.cloud.bigquery._http import Connection from google.cloud.bigquery import _pandas_helpers from google.cloud.bigquery.dataset import Dataset from google.cloud.bigquery.dataset import DatasetListItem from google.cloud.bigquery.dataset import DatasetReference +from google.cloud.bigquery.exceptions import LegacyBigQueryStorageError from google.cloud.bigquery.opentelemetry_tracing import create_span from google.cloud.bigquery import job from google.cloud.bigquery.job import ( @@ -445,15 +454,38 @@ def dataset(self, dataset_id: str, project: str = None) -> DatasetReference: ) return DatasetReference(project, dataset_id) - def _create_bqstorage_client(self): + def _ensure_bqstorage_client( + self, + bqstorage_client: Optional[ + "google.cloud.bigquery_storage.BigQueryReadClient" + ] = None, + client_options: Optional[google.api_core.client_options.ClientOptions] = None, + client_info: Optional[ + "google.api_core.gapic_v1.client_info.ClientInfo" + ] = DEFAULT_BQSTORAGE_CLIENT_INFO, + ) -> Optional["google.cloud.bigquery_storage.BigQueryReadClient"]: """Create a BigQuery Storage API client using this client's credentials. - If a client cannot be created due to missing dependencies, raise a - warning and return ``None``. + If a client cannot be created due to a missing or outdated dependency + `google-cloud-bigquery-storage`, raise a warning and return ``None``. + + If the `bqstorage_client` argument is not ``None``, still perform the version + check and return the argument back to the caller if the check passes. If it + fails, raise a warning and return ``None``. + + Args: + bqstorage_client: + An existing BigQuery Storage client instance to check for version + compatibility. If ``None``, a new instance is created and returned. + client_options: + Custom options used with a new BigQuery Storage client instance if one + is created. + client_info: + The client info used with a new BigQuery Storage client instance if one + is created. Returns: - Optional[google.cloud.bigquery_storage.BigQueryReadClient]: - A BigQuery Storage API client. + A BigQuery Storage API client. """ try: from google.cloud import bigquery_storage @@ -464,7 +496,20 @@ def _create_bqstorage_client(self): ) return None - return bigquery_storage.BigQueryReadClient(credentials=self._credentials) + try: + _verify_bq_storage_version() + except LegacyBigQueryStorageError as exc: + warnings.warn(str(exc)) + return None + + if bqstorage_client is None: + bqstorage_client = bigquery_storage.BigQueryReadClient( + credentials=self._credentials, + client_options=client_options, + client_info=client_info, + ) + + return bqstorage_client def _dataset_from_arg(self, dataset): if isinstance(dataset, str): diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/dbapi/connection.py b/packages/google-cloud-bigquery/google/cloud/bigquery/dbapi/connection.py index 459fc82aa4a0..66dee7dfb8c3 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/dbapi/connection.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/dbapi/connection.py @@ -47,12 +47,14 @@ def __init__(self, client=None, bqstorage_client=None): else: self._owns_client = False + # A warning is already raised by the BQ Storage client factory factory if + # instantiation fails, or if the given BQ Storage client instance is outdated. if bqstorage_client is None: - # A warning is already raised by the factory if instantiation fails. - bqstorage_client = client._create_bqstorage_client() + bqstorage_client = client._ensure_bqstorage_client() self._owns_bqstorage_client = bqstorage_client is not None else: self._owns_bqstorage_client = False + bqstorage_client = client._ensure_bqstorage_client(bqstorage_client) self._client = client self._bqstorage_client = bqstorage_client diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/exceptions.py b/packages/google-cloud-bigquery/google/cloud/bigquery/exceptions.py new file mode 100644 index 000000000000..6e5c27eb1d5c --- /dev/null +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/exceptions.py @@ -0,0 +1,21 @@ +# Copyright 2021 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +class BigQueryError(Exception): + """Base class for all custom exceptions defined by the BigQuery client.""" + + +class LegacyBigQueryStorageError(BigQueryError): + """Raised when too old a version of BigQuery Storage extra is detected at runtime.""" diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/magics/magics.py b/packages/google-cloud-bigquery/google/cloud/bigquery/magics/magics.py index 474d9a74ae3d..2b8c2928e5c7 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/magics/magics.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/magics/magics.py @@ -644,7 +644,7 @@ def _cell_magic(line, query): bqstorage_client_options.api_endpoint = args.bqstorage_api_endpoint bqstorage_client = _make_bqstorage_client( - use_bqstorage_api, context.credentials, bqstorage_client_options, + client, use_bqstorage_api, bqstorage_client_options, ) close_transports = functools.partial(_close_transports, client, bqstorage_client) @@ -762,12 +762,12 @@ def _split_args_line(line): return params_option_value, rest_of_args -def _make_bqstorage_client(use_bqstorage_api, credentials, client_options): +def _make_bqstorage_client(client, use_bqstorage_api, client_options): if not use_bqstorage_api: return None try: - from google.cloud import bigquery_storage + from google.cloud import bigquery_storage # noqa: F401 except ImportError as err: customized_error = ImportError( "The default BigQuery Storage API client cannot be used, install " @@ -785,10 +785,9 @@ def _make_bqstorage_client(use_bqstorage_api, credentials, client_options): ) raise customized_error from err - return bigquery_storage.BigQueryReadClient( - credentials=credentials, - client_info=gapic_client_info.ClientInfo(user_agent=IPYTHON_USER_AGENT), + return client._ensure_bqstorage_client( client_options=client_options, + client_info=gapic_client_info.ClientInfo(user_agent=IPYTHON_USER_AGENT), ) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py index b91c91a392b2..b12209252bcd 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py @@ -41,6 +41,7 @@ import google.cloud._helpers from google.cloud.bigquery import _helpers from google.cloud.bigquery import _pandas_helpers +from google.cloud.bigquery.exceptions import LegacyBigQueryStorageError from google.cloud.bigquery.schema import _build_schema_resource from google.cloud.bigquery.schema import _parse_schema_resource from google.cloud.bigquery.schema import _to_schema_fields @@ -1519,6 +1520,17 @@ def _validate_bqstorage(self, bqstorage_client, create_bqstorage_client): ) return False + try: + from google.cloud import bigquery_storage # noqa: F401 + except ImportError: + return False + + try: + _helpers._verify_bq_storage_version() + except LegacyBigQueryStorageError as exc: + warnings.warn(str(exc)) + return False + return True def _get_next_page_response(self): @@ -1655,7 +1667,7 @@ def to_arrow( owns_bqstorage_client = False if not bqstorage_client and create_bqstorage_client: - bqstorage_client = self.client._create_bqstorage_client() + bqstorage_client = self.client._ensure_bqstorage_client() owns_bqstorage_client = bqstorage_client is not None try: diff --git a/packages/google-cloud-bigquery/tests/unit/test__helpers.py b/packages/google-cloud-bigquery/tests/unit/test__helpers.py index 2437f3568277..0ac76d424283 100644 --- a/packages/google-cloud-bigquery/tests/unit/test__helpers.py +++ b/packages/google-cloud-bigquery/tests/unit/test__helpers.py @@ -19,6 +19,44 @@ import mock +try: + from google.cloud import bigquery_storage +except ImportError: # pragma: NO COVER + bigquery_storage = None + + +@unittest.skipIf(bigquery_storage is None, "Requires `google-cloud-bigquery-storage`") +class Test_verify_bq_storage_version(unittest.TestCase): + def _call_fut(self): + from google.cloud.bigquery._helpers import _verify_bq_storage_version + + return _verify_bq_storage_version() + + def test_raises_no_error_w_recent_bqstorage(self): + from google.cloud.bigquery.exceptions import LegacyBigQueryStorageError + + with mock.patch("google.cloud.bigquery_storage.__version__", new="2.0.0"): + try: + self._call_fut() + except LegacyBigQueryStorageError: # pragma: NO COVER + self.fail("Legacy error raised with a non-legacy dependency version.") + + def test_raises_error_w_legacy_bqstorage(self): + from google.cloud.bigquery.exceptions import LegacyBigQueryStorageError + + with mock.patch("google.cloud.bigquery_storage.__version__", new="1.9.9"): + with self.assertRaises(LegacyBigQueryStorageError): + self._call_fut() + + def test_raises_error_w_unknown_bqstorage_version(self): + from google.cloud.bigquery.exceptions import LegacyBigQueryStorageError + + with mock.patch("google.cloud.bigquery_storage", autospec=True) as fake_module: + del fake_module.__version__ + error_pattern = r"version found: legacy" + with self.assertRaisesRegex(LegacyBigQueryStorageError, error_pattern): + self._call_fut() + class Test_not_null(unittest.TestCase): def _call_fut(self, value, field): diff --git a/packages/google-cloud-bigquery/tests/unit/test_client.py b/packages/google-cloud-bigquery/tests/unit/test_client.py index 8f535145b613..1346a1ef6688 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_client.py +++ b/packages/google-cloud-bigquery/tests/unit/test_client.py @@ -822,7 +822,7 @@ def test_get_dataset(self): @unittest.skipIf( bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" ) - def test_create_bqstorage_client(self): + def test_ensure_bqstorage_client_creating_new_instance(self): mock_client = mock.create_autospec(bigquery_storage.BigQueryReadClient) mock_client_instance = object() mock_client.return_value = mock_client_instance @@ -832,12 +832,19 @@ def test_create_bqstorage_client(self): with mock.patch( "google.cloud.bigquery_storage.BigQueryReadClient", mock_client ): - bqstorage_client = client._create_bqstorage_client() + bqstorage_client = client._ensure_bqstorage_client( + client_options=mock.sentinel.client_options, + client_info=mock.sentinel.client_info, + ) self.assertIs(bqstorage_client, mock_client_instance) - mock_client.assert_called_once_with(credentials=creds) + mock_client.assert_called_once_with( + credentials=creds, + client_options=mock.sentinel.client_options, + client_info=mock.sentinel.client_info, + ) - def test_create_bqstorage_client_missing_dependency(self): + def test_ensure_bqstorage_client_missing_dependency(self): creds = _make_credentials() client = self._make_one(project=self.PROJECT, credentials=creds) @@ -850,7 +857,7 @@ def fail_bqstorage_import(name, globals, locals, fromlist, level): no_bqstorage = maybe_fail_import(predicate=fail_bqstorage_import) with no_bqstorage, warnings.catch_warnings(record=True) as warned: - bqstorage_client = client._create_bqstorage_client() + bqstorage_client = client._ensure_bqstorage_client() self.assertIsNone(bqstorage_client) matching_warnings = [ @@ -861,6 +868,65 @@ def fail_bqstorage_import(name, globals, locals, fromlist, level): ] assert matching_warnings, "Missing dependency warning not raised." + @unittest.skipIf( + bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" + ) + def test_ensure_bqstorage_client_obsolete_dependency(self): + from google.cloud.bigquery.exceptions import LegacyBigQueryStorageError + + creds = _make_credentials() + client = self._make_one(project=self.PROJECT, credentials=creds) + + patcher = mock.patch( + "google.cloud.bigquery.client._verify_bq_storage_version", + side_effect=LegacyBigQueryStorageError("BQ Storage too old"), + ) + with patcher, warnings.catch_warnings(record=True) as warned: + bqstorage_client = client._ensure_bqstorage_client() + + self.assertIsNone(bqstorage_client) + matching_warnings = [ + warning for warning in warned if "BQ Storage too old" in str(warning) + ] + assert matching_warnings, "Obsolete dependency warning not raised." + + @unittest.skipIf( + bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" + ) + def test_ensure_bqstorage_client_existing_client_check_passes(self): + creds = _make_credentials() + client = self._make_one(project=self.PROJECT, credentials=creds) + mock_storage_client = mock.sentinel.mock_storage_client + + bqstorage_client = client._ensure_bqstorage_client( + bqstorage_client=mock_storage_client + ) + + self.assertIs(bqstorage_client, mock_storage_client) + + @unittest.skipIf( + bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" + ) + def test_ensure_bqstorage_client_existing_client_check_fails(self): + from google.cloud.bigquery.exceptions import LegacyBigQueryStorageError + + creds = _make_credentials() + client = self._make_one(project=self.PROJECT, credentials=creds) + mock_storage_client = mock.sentinel.mock_storage_client + + patcher = mock.patch( + "google.cloud.bigquery.client._verify_bq_storage_version", + side_effect=LegacyBigQueryStorageError("BQ Storage too old"), + ) + with patcher, warnings.catch_warnings(record=True) as warned: + bqstorage_client = client._ensure_bqstorage_client(mock_storage_client) + + self.assertIsNone(bqstorage_client) + matching_warnings = [ + warning for warning in warned if "BQ Storage too old" in str(warning) + ] + assert matching_warnings, "Obsolete dependency warning not raised." + def test_create_routine_w_minimal_resource(self): from google.cloud.bigquery.routine import Routine from google.cloud.bigquery.routine import RoutineReference diff --git a/packages/google-cloud-bigquery/tests/unit/test_dbapi_connection.py b/packages/google-cloud-bigquery/tests/unit/test_dbapi_connection.py index 74da318bfade..0576cad3813e 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_dbapi_connection.py +++ b/packages/google-cloud-bigquery/tests/unit/test_dbapi_connection.py @@ -51,7 +51,7 @@ def test_ctor_wo_bqstorage_client(self): from google.cloud.bigquery.dbapi import Connection mock_client = self._mock_client() - mock_client._create_bqstorage_client.return_value = None + mock_client._ensure_bqstorage_client.return_value = None connection = self._make_one(client=mock_client) self.assertIsInstance(connection, Connection) @@ -66,9 +66,15 @@ def test_ctor_w_bqstorage_client(self): mock_client = self._mock_client() mock_bqstorage_client = self._mock_bqstorage_client() + mock_client._ensure_bqstorage_client.return_value = mock_bqstorage_client + connection = self._make_one( client=mock_client, bqstorage_client=mock_bqstorage_client, ) + + mock_client._ensure_bqstorage_client.assert_called_once_with( + mock_bqstorage_client + ) self.assertIsInstance(connection, Connection) self.assertIs(connection._client, mock_client) self.assertIs(connection._bqstorage_client, mock_bqstorage_client) @@ -92,9 +98,11 @@ def test_connect_w_client(self): mock_client = self._mock_client() mock_bqstorage_client = self._mock_bqstorage_client() - mock_client._create_bqstorage_client.return_value = mock_bqstorage_client + mock_client._ensure_bqstorage_client.return_value = mock_bqstorage_client connection = connect(client=mock_client) + + mock_client._ensure_bqstorage_client.assert_called_once_with() self.assertIsInstance(connection, Connection) self.assertIs(connection._client, mock_client) self.assertIs(connection._bqstorage_client, mock_bqstorage_client) @@ -108,9 +116,15 @@ def test_connect_w_both_clients(self): mock_client = self._mock_client() mock_bqstorage_client = self._mock_bqstorage_client() + mock_client._ensure_bqstorage_client.return_value = mock_bqstorage_client + connection = connect( client=mock_client, bqstorage_client=mock_bqstorage_client, ) + + mock_client._ensure_bqstorage_client.assert_called_once_with( + mock_bqstorage_client + ) self.assertIsInstance(connection, Connection) self.assertIs(connection._client, mock_client) self.assertIs(connection._bqstorage_client, mock_bqstorage_client) @@ -140,7 +154,7 @@ def test_close_closes_all_created_bigquery_clients(self): return_value=client, ) bqstorage_client_patcher = mock.patch.object( - client, "_create_bqstorage_client", return_value=bqstorage_client, + client, "_ensure_bqstorage_client", return_value=bqstorage_client, ) with client_patcher, bqstorage_client_patcher: diff --git a/packages/google-cloud-bigquery/tests/unit/test_dbapi_cursor.py b/packages/google-cloud-bigquery/tests/unit/test_dbapi_cursor.py index 55e45325443b..a2d6693d00b7 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_dbapi_cursor.py +++ b/packages/google-cloud-bigquery/tests/unit/test_dbapi_cursor.py @@ -72,7 +72,7 @@ def _mock_client( mock_client._default_query_job_config = default_query_job_config # Assure that the REST client gets used, not the BQ Storage client. - mock_client._create_bqstorage_client.return_value = None + mock_client._ensure_bqstorage_client.return_value = None return mock_client @@ -311,6 +311,7 @@ def test_fetchall_w_bqstorage_client_fetch_success(self): mock_bqstorage_client = self._mock_bqstorage_client( stream_count=1, rows=bqstorage_streamed_rows, ) + mock_client._ensure_bqstorage_client.return_value = mock_bqstorage_client connection = dbapi.connect( client=mock_client, bqstorage_client=mock_bqstorage_client, @@ -341,6 +342,7 @@ def test_fetchall_w_bqstorage_client_fetch_no_rows(self): mock_client = self._mock_client(rows=[]) mock_bqstorage_client = self._mock_bqstorage_client(stream_count=0) + mock_client._ensure_bqstorage_client.return_value = mock_bqstorage_client connection = dbapi.connect( client=mock_client, bqstorage_client=mock_bqstorage_client, @@ -365,7 +367,11 @@ def test_fetchall_w_bqstorage_client_fetch_error_no_fallback(self): row_data = [table.Row([1.1, 1.2], {"foo": 0, "bar": 1})] + def fake_ensure_bqstorage_client(bqstorage_client=None, **kwargs): + return bqstorage_client + mock_client = self._mock_client(rows=row_data) + mock_client._ensure_bqstorage_client.side_effect = fake_ensure_bqstorage_client mock_bqstorage_client = self._mock_bqstorage_client( stream_count=1, rows=row_data, ) @@ -396,7 +402,11 @@ def test_fetchall_w_bqstorage_client_no_arrow_compression(self): row_data = [table.Row([1.2, 1.1], {"bar": 1, "foo": 0})] bqstorage_streamed_rows = [{"bar": _to_pyarrow(1.2), "foo": _to_pyarrow(1.1)}] + def fake_ensure_bqstorage_client(bqstorage_client=None, **kwargs): + return bqstorage_client + mock_client = self._mock_client(rows=row_data) + mock_client._ensure_bqstorage_client.side_effect = fake_ensure_bqstorage_client mock_bqstorage_client = self._mock_bqstorage_client( stream_count=1, rows=bqstorage_streamed_rows, ) diff --git a/packages/google-cloud-bigquery/tests/unit/test_magics.py b/packages/google-cloud-bigquery/tests/unit/test_magics.py index ff41fe7206c1..5e9bf28a949c 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_magics.py +++ b/packages/google-cloud-bigquery/tests/unit/test_magics.py @@ -317,7 +317,10 @@ def test__make_bqstorage_client_false(): credentials_mock = mock.create_autospec( google.auth.credentials.Credentials, instance=True ) - got = magics._make_bqstorage_client(False, credentials_mock, {}) + test_client = bigquery.Client( + project="test_project", credentials=credentials_mock, location="test_location" + ) + got = magics._make_bqstorage_client(test_client, False, {}) assert got is None @@ -328,7 +331,10 @@ def test__make_bqstorage_client_true(): credentials_mock = mock.create_autospec( google.auth.credentials.Credentials, instance=True ) - got = magics._make_bqstorage_client(True, credentials_mock, {}) + test_client = bigquery.Client( + project="test_project", credentials=credentials_mock, location="test_location" + ) + got = magics._make_bqstorage_client(test_client, True, {}) assert isinstance(got, bigquery_storage.BigQueryReadClient) @@ -336,15 +342,46 @@ def test__make_bqstorage_client_true_raises_import_error(missing_bq_storage): credentials_mock = mock.create_autospec( google.auth.credentials.Credentials, instance=True ) + test_client = bigquery.Client( + project="test_project", credentials=credentials_mock, location="test_location" + ) with pytest.raises(ImportError) as exc_context, missing_bq_storage: - magics._make_bqstorage_client(True, credentials_mock, {}) + magics._make_bqstorage_client(test_client, True, {}) error_msg = str(exc_context.value) assert "google-cloud-bigquery-storage" in error_msg assert "pyarrow" in error_msg +@pytest.mark.skipif( + bigquery_storage is None, reason="Requires `google-cloud-bigquery-storage`" +) +def test__make_bqstorage_client_true_obsolete_dependency(): + from google.cloud.bigquery.exceptions import LegacyBigQueryStorageError + + credentials_mock = mock.create_autospec( + google.auth.credentials.Credentials, instance=True + ) + test_client = bigquery.Client( + project="test_project", credentials=credentials_mock, location="test_location" + ) + + patcher = mock.patch( + "google.cloud.bigquery.client._verify_bq_storage_version", + side_effect=LegacyBigQueryStorageError("BQ Storage too old"), + ) + with patcher, warnings.catch_warnings(record=True) as warned: + got = magics._make_bqstorage_client(test_client, True, {}) + + assert got is None + + matching_warnings = [ + warning for warning in warned if "BQ Storage too old" in str(warning) + ] + assert matching_warnings, "Obsolete dependency warning not raised." + + @pytest.mark.skipif( bigquery_storage is None, reason="Requires `google-cloud-bigquery-storage`" ) @@ -887,6 +924,7 @@ def test_bigquery_magic_w_table_id_and_bqstorage_client(): table_id = "bigquery-public-data.samples.shakespeare" with default_patch, client_patch as client_mock, bqstorage_client_patch: + client_mock()._ensure_bqstorage_client.return_value = bqstorage_instance_mock client_mock().list_rows.return_value = row_iterator_mock ip.run_cell_magic("bigquery", "--max_results=5", table_id) diff --git a/packages/google-cloud-bigquery/tests/unit/test_table.py b/packages/google-cloud-bigquery/tests/unit/test_table.py index ce4a1576123b..0f2ab00c1e23 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_table.py +++ b/packages/google-cloud-bigquery/tests/unit/test_table.py @@ -24,6 +24,7 @@ import pytz import google.api_core.exceptions +from test_utils.imports import maybe_fail_import try: from google.cloud import bigquery_storage @@ -1768,6 +1769,48 @@ def test__validate_bqstorage_returns_false_when_completely_cached(self): ) ) + def test__validate_bqstorage_returns_false_if_missing_dependency(self): + iterator = self._make_one(first_page_response=None) # not cached + + def fail_bqstorage_import(name, globals, locals, fromlist, level): + # NOTE: *very* simplified, assuming a straightforward absolute import + return "bigquery_storage" in name or ( + fromlist is not None and "bigquery_storage" in fromlist + ) + + no_bqstorage = maybe_fail_import(predicate=fail_bqstorage_import) + + with no_bqstorage: + result = iterator._validate_bqstorage( + bqstorage_client=None, create_bqstorage_client=True + ) + + self.assertFalse(result) + + @unittest.skipIf( + bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" + ) + def test__validate_bqstorage_returns_false_w_warning_if_obsolete_version(self): + from google.cloud.bigquery.exceptions import LegacyBigQueryStorageError + + iterator = self._make_one(first_page_response=None) # not cached + + patcher = mock.patch( + "google.cloud.bigquery.table._helpers._verify_bq_storage_version", + side_effect=LegacyBigQueryStorageError("BQ Storage too old"), + ) + with patcher, warnings.catch_warnings(record=True) as warned: + result = iterator._validate_bqstorage( + bqstorage_client=None, create_bqstorage_client=True + ) + + self.assertFalse(result) + + matching_warnings = [ + warning for warning in warned if "BQ Storage too old" in str(warning) + ] + assert matching_warnings, "Obsolete dependency warning not raised." + @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_to_arrow(self): from google.cloud.bigquery.schema import SchemaField @@ -2003,7 +2046,7 @@ def test_to_arrow_max_results_w_create_bqstorage_warning(self): and "REST" in str(warning) ] self.assertEqual(len(matches), 1, msg="User warning was not emitted.") - mock_client._create_bqstorage_client.assert_not_called() + mock_client._ensure_bqstorage_client.assert_not_called() @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") @unittest.skipIf( @@ -2099,7 +2142,7 @@ def test_to_arrow_w_bqstorage_creates_client(self): bqstorage_client._transport = mock.create_autospec( big_query_read_grpc_transport.BigQueryReadGrpcTransport ) - mock_client._create_bqstorage_client.return_value = bqstorage_client + mock_client._ensure_bqstorage_client.return_value = bqstorage_client session = bigquery_storage.types.ReadSession() bqstorage_client.create_read_session.return_value = session row_iterator = mut.RowIterator( @@ -2114,11 +2157,11 @@ def test_to_arrow_w_bqstorage_creates_client(self): table=mut.TableReference.from_string("proj.dset.tbl"), ) row_iterator.to_arrow(create_bqstorage_client=True) - mock_client._create_bqstorage_client.assert_called_once() + mock_client._ensure_bqstorage_client.assert_called_once() bqstorage_client._transport.grpc_channel.close.assert_called_once() @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") - def test_to_arrow_create_bqstorage_client_wo_bqstorage(self): + def test_to_arrow_ensure_bqstorage_client_wo_bqstorage(self): from google.cloud.bigquery.schema import SchemaField schema = [ @@ -2133,14 +2176,14 @@ def test_to_arrow_create_bqstorage_client_wo_bqstorage(self): api_request = mock.Mock(return_value={"rows": rows}) mock_client = _mock_client() - mock_client._create_bqstorage_client.return_value = None + mock_client._ensure_bqstorage_client.return_value = None row_iterator = self._make_one(mock_client, api_request, path, schema) tbl = row_iterator.to_arrow(create_bqstorage_client=True) # The client attempted to create a BQ Storage client, and even though # that was not possible, results were still returned without errors. - mock_client._create_bqstorage_client.assert_called_once() + mock_client._ensure_bqstorage_client.assert_called_once() self.assertIsInstance(tbl, pyarrow.Table) self.assertEqual(tbl.num_rows, 2) @@ -2824,7 +2867,7 @@ def test_to_dataframe_max_results_w_create_bqstorage_warning(self): and "REST" in str(warning) ] self.assertEqual(len(matches), 1, msg="User warning was not emitted.") - mock_client._create_bqstorage_client.assert_not_called() + mock_client._ensure_bqstorage_client.assert_not_called() @unittest.skipIf(pandas is None, "Requires `pandas`") @unittest.skipIf( @@ -2839,7 +2882,7 @@ def test_to_dataframe_w_bqstorage_creates_client(self): bqstorage_client._transport = mock.create_autospec( big_query_read_grpc_transport.BigQueryReadGrpcTransport ) - mock_client._create_bqstorage_client.return_value = bqstorage_client + mock_client._ensure_bqstorage_client.return_value = bqstorage_client session = bigquery_storage.types.ReadSession() bqstorage_client.create_read_session.return_value = session row_iterator = mut.RowIterator( @@ -2854,7 +2897,7 @@ def test_to_dataframe_w_bqstorage_creates_client(self): table=mut.TableReference.from_string("proj.dset.tbl"), ) row_iterator.to_dataframe(create_bqstorage_client=True) - mock_client._create_bqstorage_client.assert_called_once() + mock_client._ensure_bqstorage_client.assert_called_once() bqstorage_client._transport.grpc_channel.close.assert_called_once() @unittest.skipIf(pandas is None, "Requires `pandas`") From 3482953596ed6626dabf4c6f7866437d2eec65a4 Mon Sep 17 00:00:00 2001 From: "gcf-owl-bot[bot]" <78513119+gcf-owl-bot[bot]@users.noreply.github.com> Date: Thu, 20 May 2021 22:24:04 +0000 Subject: [PATCH 1155/2016] chore: upgrade gapic-generator-python to 0.46.3 (#664) PiperOrigin-RevId: 373649163 Source-Link: https://github.com/googleapis/googleapis/commit/7e1b14e6c7a9ab96d2db7e4a131981f162446d34 Source-Link: https://github.com/googleapis/googleapis-gen/commit/0a3c7d272d697796db75857bac73905c68e498c3 --- .../google/cloud/bigquery_v2/__init__.py | 2 - .../cloud/bigquery_v2/gapic_metadata.json | 63 +++ .../cloud/bigquery_v2/types/__init__.py | 2 - .../bigquery_v2/types/encryption_config.py | 10 +- .../google/cloud/bigquery_v2/types/model.py | 444 ++++++------------ .../bigquery_v2/types/model_reference.py | 11 +- .../cloud/bigquery_v2/types/standard_sql.py | 9 +- .../bigquery_v2/types/table_reference.py | 11 +- packages/google-cloud-bigquery/owlbot.py | 17 +- .../google-cloud-bigquery/tests/__init__.py | 15 + .../tests/unit/__init__.py | 4 +- .../tests/unit/gapic/__init__.py | 15 + 12 files changed, 271 insertions(+), 332 deletions(-) create mode 100644 packages/google-cloud-bigquery/google/cloud/bigquery_v2/gapic_metadata.json create mode 100644 packages/google-cloud-bigquery/tests/unit/gapic/__init__.py diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery_v2/__init__.py b/packages/google-cloud-bigquery/google/cloud/bigquery_v2/__init__.py index ebcc26befdc1..476bd5747e9f 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery_v2/__init__.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery_v2/__init__.py @@ -1,5 +1,4 @@ # -*- coding: utf-8 -*- - # Copyright 2020 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -29,7 +28,6 @@ from .types.standard_sql import StandardSqlStructType from .types.table_reference import TableReference - __all__ = ( "DeleteModelRequest", "EncryptionConfiguration", diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery_v2/gapic_metadata.json b/packages/google-cloud-bigquery/google/cloud/bigquery_v2/gapic_metadata.json new file mode 100644 index 000000000000..3251a2630bd9 --- /dev/null +++ b/packages/google-cloud-bigquery/google/cloud/bigquery_v2/gapic_metadata.json @@ -0,0 +1,63 @@ + { + "comment": "This file maps proto services/RPCs to the corresponding library clients/methods", + "language": "python", + "libraryPackage": "google.cloud.bigquery_v2", + "protoPackage": "google.cloud.bigquery.v2", + "schema": "1.0", + "services": { + "ModelService": { + "clients": { + "grpc": { + "libraryClient": "ModelServiceClient", + "rpcs": { + "DeleteModel": { + "methods": [ + "delete_model" + ] + }, + "GetModel": { + "methods": [ + "get_model" + ] + }, + "ListModels": { + "methods": [ + "list_models" + ] + }, + "PatchModel": { + "methods": [ + "patch_model" + ] + } + } + }, + "grpc-async": { + "libraryClient": "ModelServiceAsyncClient", + "rpcs": { + "DeleteModel": { + "methods": [ + "delete_model" + ] + }, + "GetModel": { + "methods": [ + "get_model" + ] + }, + "ListModels": { + "methods": [ + "list_models" + ] + }, + "PatchModel": { + "methods": [ + "patch_model" + ] + } + } + } + } + } + } +} diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery_v2/types/__init__.py b/packages/google-cloud-bigquery/google/cloud/bigquery_v2/types/__init__.py index b76e65c65b25..9c850dca13a7 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery_v2/types/__init__.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery_v2/types/__init__.py @@ -1,5 +1,4 @@ # -*- coding: utf-8 -*- - # Copyright 2020 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -14,7 +13,6 @@ # See the License for the specific language governing permissions and # limitations under the License. # - from .encryption_config import EncryptionConfiguration from .model import ( DeleteModelRequest, diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery_v2/types/encryption_config.py b/packages/google-cloud-bigquery/google/cloud/bigquery_v2/types/encryption_config.py index 2d801bde35f3..4b9139733964 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery_v2/types/encryption_config.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery_v2/types/encryption_config.py @@ -1,5 +1,4 @@ # -*- coding: utf-8 -*- - # Copyright 2020 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -14,11 +13,9 @@ # See the License for the specific language governing permissions and # limitations under the License. # - import proto # type: ignore - -from google.protobuf import wrappers_pb2 as wrappers # type: ignore +from google.protobuf import wrappers_pb2 # type: ignore __protobuf__ = proto.module( @@ -28,7 +25,6 @@ class EncryptionConfiguration(proto.Message): r""" - Attributes: kms_key_name (google.protobuf.wrappers_pb2.StringValue): Optional. Describes the Cloud KMS encryption @@ -38,7 +34,9 @@ class EncryptionConfiguration(proto.Message): this encryption key. """ - kms_key_name = proto.Field(proto.MESSAGE, number=1, message=wrappers.StringValue,) + kms_key_name = proto.Field( + proto.MESSAGE, number=1, message=wrappers_pb2.StringValue, + ) __all__ = tuple(sorted(__protobuf__.manifest)) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery_v2/types/model.py b/packages/google-cloud-bigquery/google/cloud/bigquery_v2/types/model.py index 8ae158b6409c..17e101d25ff3 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery_v2/types/model.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery_v2/types/model.py @@ -1,5 +1,4 @@ # -*- coding: utf-8 -*- - # Copyright 2020 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -14,16 +13,14 @@ # See the License for the specific language governing permissions and # limitations under the License. # - import proto # type: ignore - from google.cloud.bigquery_v2.types import encryption_config from google.cloud.bigquery_v2.types import model_reference as gcb_model_reference from google.cloud.bigquery_v2.types import standard_sql from google.cloud.bigquery_v2.types import table_reference -from google.protobuf import timestamp_pb2 as timestamp # type: ignore -from google.protobuf import wrappers_pb2 as wrappers # type: ignore +from google.protobuf import timestamp_pb2 # type: ignore +from google.protobuf import wrappers_pb2 # type: ignore __protobuf__ = proto.module( @@ -41,7 +38,6 @@ class Model(proto.Message): r""" - Attributes: etag (str): Output only. A hash of this resource. @@ -251,7 +247,7 @@ class FeedbackType(proto.Enum): EXPLICIT = 2 class SeasonalPeriod(proto.Message): - r"""""" + r""" """ class SeasonalPeriodType(proto.Enum): r"""""" @@ -264,7 +260,7 @@ class SeasonalPeriodType(proto.Enum): YEARLY = 6 class KmeansEnums(proto.Message): - r"""""" + r""" """ class KmeansInitializationMethod(proto.Enum): r"""Indicates the method used to initialize the centroids for @@ -293,22 +289,20 @@ class RegressionMetrics(proto.Message): """ mean_absolute_error = proto.Field( - proto.MESSAGE, number=1, message=wrappers.DoubleValue, + proto.MESSAGE, number=1, message=wrappers_pb2.DoubleValue, ) - mean_squared_error = proto.Field( - proto.MESSAGE, number=2, message=wrappers.DoubleValue, + proto.MESSAGE, number=2, message=wrappers_pb2.DoubleValue, ) - mean_squared_log_error = proto.Field( - proto.MESSAGE, number=3, message=wrappers.DoubleValue, + proto.MESSAGE, number=3, message=wrappers_pb2.DoubleValue, ) - median_absolute_error = proto.Field( - proto.MESSAGE, number=4, message=wrappers.DoubleValue, + proto.MESSAGE, number=4, message=wrappers_pb2.DoubleValue, + ) + r_squared = proto.Field( + proto.MESSAGE, number=5, message=wrappers_pb2.DoubleValue, ) - - r_squared = proto.Field(proto.MESSAGE, number=5, message=wrappers.DoubleValue,) class AggregateClassificationMetrics(proto.Message): r"""Aggregate metrics for classification/classifier models. For @@ -350,19 +344,25 @@ class AggregateClassificationMetrics(proto.Message): is a macro-averaged metric. """ - precision = proto.Field(proto.MESSAGE, number=1, message=wrappers.DoubleValue,) - - recall = proto.Field(proto.MESSAGE, number=2, message=wrappers.DoubleValue,) - - accuracy = proto.Field(proto.MESSAGE, number=3, message=wrappers.DoubleValue,) - - threshold = proto.Field(proto.MESSAGE, number=4, message=wrappers.DoubleValue,) - - f1_score = proto.Field(proto.MESSAGE, number=5, message=wrappers.DoubleValue,) - - log_loss = proto.Field(proto.MESSAGE, number=6, message=wrappers.DoubleValue,) - - roc_auc = proto.Field(proto.MESSAGE, number=7, message=wrappers.DoubleValue,) + precision = proto.Field( + proto.MESSAGE, number=1, message=wrappers_pb2.DoubleValue, + ) + recall = proto.Field(proto.MESSAGE, number=2, message=wrappers_pb2.DoubleValue,) + accuracy = proto.Field( + proto.MESSAGE, number=3, message=wrappers_pb2.DoubleValue, + ) + threshold = proto.Field( + proto.MESSAGE, number=4, message=wrappers_pb2.DoubleValue, + ) + f1_score = proto.Field( + proto.MESSAGE, number=5, message=wrappers_pb2.DoubleValue, + ) + log_loss = proto.Field( + proto.MESSAGE, number=6, message=wrappers_pb2.DoubleValue, + ) + roc_auc = proto.Field( + proto.MESSAGE, number=7, message=wrappers_pb2.DoubleValue, + ) class BinaryClassificationMetrics(proto.Message): r"""Evaluation metrics for binary classification/classifier @@ -382,7 +382,6 @@ class BinaryClassificationMetrics(proto.Message): class BinaryConfusionMatrix(proto.Message): r"""Confusion matrix for binary classification models. - Attributes: positive_class_threshold (google.protobuf.wrappers_pb2.DoubleValue): Threshold value used when computing each of @@ -410,52 +409,43 @@ class BinaryConfusionMatrix(proto.Message): """ positive_class_threshold = proto.Field( - proto.MESSAGE, number=1, message=wrappers.DoubleValue, + proto.MESSAGE, number=1, message=wrappers_pb2.DoubleValue, ) - true_positives = proto.Field( - proto.MESSAGE, number=2, message=wrappers.Int64Value, + proto.MESSAGE, number=2, message=wrappers_pb2.Int64Value, ) - false_positives = proto.Field( - proto.MESSAGE, number=3, message=wrappers.Int64Value, + proto.MESSAGE, number=3, message=wrappers_pb2.Int64Value, ) - true_negatives = proto.Field( - proto.MESSAGE, number=4, message=wrappers.Int64Value, + proto.MESSAGE, number=4, message=wrappers_pb2.Int64Value, ) - false_negatives = proto.Field( - proto.MESSAGE, number=5, message=wrappers.Int64Value, + proto.MESSAGE, number=5, message=wrappers_pb2.Int64Value, ) - precision = proto.Field( - proto.MESSAGE, number=6, message=wrappers.DoubleValue, + proto.MESSAGE, number=6, message=wrappers_pb2.DoubleValue, + ) + recall = proto.Field( + proto.MESSAGE, number=7, message=wrappers_pb2.DoubleValue, ) - - recall = proto.Field(proto.MESSAGE, number=7, message=wrappers.DoubleValue,) - f1_score = proto.Field( - proto.MESSAGE, number=8, message=wrappers.DoubleValue, + proto.MESSAGE, number=8, message=wrappers_pb2.DoubleValue, ) - accuracy = proto.Field( - proto.MESSAGE, number=9, message=wrappers.DoubleValue, + proto.MESSAGE, number=9, message=wrappers_pb2.DoubleValue, ) aggregate_classification_metrics = proto.Field( proto.MESSAGE, number=1, message="Model.AggregateClassificationMetrics", ) - binary_confusion_matrix_list = proto.RepeatedField( proto.MESSAGE, number=2, message="Model.BinaryClassificationMetrics.BinaryConfusionMatrix", ) - - positive_label = proto.Field(proto.STRING, number=3) - - negative_label = proto.Field(proto.STRING, number=4) + positive_label = proto.Field(proto.STRING, number=3,) + negative_label = proto.Field(proto.STRING, number=4,) class MultiClassClassificationMetrics(proto.Message): r"""Evaluation metrics for multi-class classification/classifier @@ -470,7 +460,6 @@ class MultiClassClassificationMetrics(proto.Message): class ConfusionMatrix(proto.Message): r"""Confusion matrix for multi-class classification models. - Attributes: confidence_threshold (google.protobuf.wrappers_pb2.DoubleValue): Confidence threshold used when computing the @@ -481,7 +470,6 @@ class ConfusionMatrix(proto.Message): class Entry(proto.Message): r"""A single entry in the confusion matrix. - Attributes: predicted_label (str): The predicted label. For confidence_threshold > 0, we will @@ -492,15 +480,13 @@ class Entry(proto.Message): label. """ - predicted_label = proto.Field(proto.STRING, number=1) - + predicted_label = proto.Field(proto.STRING, number=1,) item_count = proto.Field( - proto.MESSAGE, number=2, message=wrappers.Int64Value, + proto.MESSAGE, number=2, message=wrappers_pb2.Int64Value, ) class Row(proto.Message): r"""A single row in the confusion matrix. - Attributes: actual_label (str): The original label of this row. @@ -508,8 +494,7 @@ class Row(proto.Message): Info describing predicted label distribution. """ - actual_label = proto.Field(proto.STRING, number=1) - + actual_label = proto.Field(proto.STRING, number=1,) entries = proto.RepeatedField( proto.MESSAGE, number=2, @@ -517,9 +502,8 @@ class Row(proto.Message): ) confidence_threshold = proto.Field( - proto.MESSAGE, number=1, message=wrappers.DoubleValue, + proto.MESSAGE, number=1, message=wrappers_pb2.DoubleValue, ) - rows = proto.RepeatedField( proto.MESSAGE, number=2, @@ -529,7 +513,6 @@ class Row(proto.Message): aggregate_classification_metrics = proto.Field( proto.MESSAGE, number=1, message="Model.AggregateClassificationMetrics", ) - confusion_matrix_list = proto.RepeatedField( proto.MESSAGE, number=2, @@ -538,7 +521,6 @@ class Row(proto.Message): class ClusteringMetrics(proto.Message): r"""Evaluation metrics for clustering models. - Attributes: davies_bouldin_index (google.protobuf.wrappers_pb2.DoubleValue): Davies-Bouldin index. @@ -551,7 +533,6 @@ class ClusteringMetrics(proto.Message): class Cluster(proto.Message): r"""Message containing the information about one cluster. - Attributes: centroid_id (int): Centroid id. @@ -565,7 +546,6 @@ class Cluster(proto.Message): class FeatureValue(proto.Message): r"""Representative value of a single feature within the cluster. - Attributes: feature_column (str): The feature column name. @@ -578,7 +558,6 @@ class FeatureValue(proto.Message): class CategoricalValue(proto.Message): r"""Representative value of a categorical feature. - Attributes: category_counts (Sequence[google.cloud.bigquery_v2.types.Model.ClusteringMetrics.Cluster.FeatureValue.CategoricalValue.CategoryCount]): Counts of all categories for the categorical feature. If @@ -590,7 +569,6 @@ class CategoricalValue(proto.Message): class CategoryCount(proto.Message): r"""Represents the count of a single category within the cluster. - Attributes: category (str): The name of category. @@ -599,10 +577,9 @@ class CategoryCount(proto.Message): category within the cluster. """ - category = proto.Field(proto.STRING, number=1) - + category = proto.Field(proto.STRING, number=1,) count = proto.Field( - proto.MESSAGE, number=2, message=wrappers.Int64Value, + proto.MESSAGE, number=2, message=wrappers_pb2.Int64Value, ) category_counts = proto.RepeatedField( @@ -611,15 +588,13 @@ class CategoryCount(proto.Message): message="Model.ClusteringMetrics.Cluster.FeatureValue.CategoricalValue.CategoryCount", ) - feature_column = proto.Field(proto.STRING, number=1) - + feature_column = proto.Field(proto.STRING, number=1,) numerical_value = proto.Field( proto.MESSAGE, number=2, oneof="value", - message=wrappers.DoubleValue, + message=wrappers_pb2.DoubleValue, ) - categorical_value = proto.Field( proto.MESSAGE, number=3, @@ -627,24 +602,22 @@ class CategoryCount(proto.Message): message="Model.ClusteringMetrics.Cluster.FeatureValue.CategoricalValue", ) - centroid_id = proto.Field(proto.INT64, number=1) - + centroid_id = proto.Field(proto.INT64, number=1,) feature_values = proto.RepeatedField( proto.MESSAGE, number=2, message="Model.ClusteringMetrics.Cluster.FeatureValue", ) - - count = proto.Field(proto.MESSAGE, number=3, message=wrappers.Int64Value,) + count = proto.Field( + proto.MESSAGE, number=3, message=wrappers_pb2.Int64Value, + ) davies_bouldin_index = proto.Field( - proto.MESSAGE, number=1, message=wrappers.DoubleValue, + proto.MESSAGE, number=1, message=wrappers_pb2.DoubleValue, ) - mean_squared_distance = proto.Field( - proto.MESSAGE, number=2, message=wrappers.DoubleValue, + proto.MESSAGE, number=2, message=wrappers_pb2.DoubleValue, ) - clusters = proto.RepeatedField( proto.MESSAGE, number=3, message="Model.ClusteringMetrics.Cluster", ) @@ -677,24 +650,20 @@ class RankingMetrics(proto.Message): """ mean_average_precision = proto.Field( - proto.MESSAGE, number=1, message=wrappers.DoubleValue, + proto.MESSAGE, number=1, message=wrappers_pb2.DoubleValue, ) - mean_squared_error = proto.Field( - proto.MESSAGE, number=2, message=wrappers.DoubleValue, + proto.MESSAGE, number=2, message=wrappers_pb2.DoubleValue, ) - normalized_discounted_cumulative_gain = proto.Field( - proto.MESSAGE, number=3, message=wrappers.DoubleValue, + proto.MESSAGE, number=3, message=wrappers_pb2.DoubleValue, ) - average_rank = proto.Field( - proto.MESSAGE, number=4, message=wrappers.DoubleValue, + proto.MESSAGE, number=4, message=wrappers_pb2.DoubleValue, ) class ArimaForecastingMetrics(proto.Message): r"""Model evaluation metrics for ARIMA forecasting models. - Attributes: non_seasonal_order (Sequence[google.cloud.bigquery_v2.types.Model.ArimaOrder]): Non-seasonal order. @@ -737,15 +706,11 @@ class ArimaSingleModelForecastingMetrics(proto.Message): non_seasonal_order = proto.Field( proto.MESSAGE, number=1, message="Model.ArimaOrder", ) - arima_fitting_metrics = proto.Field( proto.MESSAGE, number=2, message="Model.ArimaFittingMetrics", ) - - has_drift = proto.Field(proto.BOOL, number=3) - - time_series_id = proto.Field(proto.STRING, number=4) - + has_drift = proto.Field(proto.BOOL, number=3,) + time_series_id = proto.Field(proto.STRING, number=4,) seasonal_periods = proto.RepeatedField( proto.ENUM, number=5, enum="Model.SeasonalPeriod.SeasonalPeriodType", ) @@ -753,19 +718,14 @@ class ArimaSingleModelForecastingMetrics(proto.Message): non_seasonal_order = proto.RepeatedField( proto.MESSAGE, number=1, message="Model.ArimaOrder", ) - arima_fitting_metrics = proto.RepeatedField( proto.MESSAGE, number=2, message="Model.ArimaFittingMetrics", ) - seasonal_periods = proto.RepeatedField( proto.ENUM, number=3, enum="Model.SeasonalPeriod.SeasonalPeriodType", ) - - has_drift = proto.RepeatedField(proto.BOOL, number=4) - - time_series_id = proto.RepeatedField(proto.STRING, number=5) - + has_drift = proto.RepeatedField(proto.BOOL, number=4,) + time_series_id = proto.RepeatedField(proto.STRING, number=5,) arima_single_model_forecasting_metrics = proto.RepeatedField( proto.MESSAGE, number=6, @@ -800,29 +760,24 @@ class EvaluationMetrics(proto.Message): regression_metrics = proto.Field( proto.MESSAGE, number=1, oneof="metrics", message="Model.RegressionMetrics", ) - binary_classification_metrics = proto.Field( proto.MESSAGE, number=2, oneof="metrics", message="Model.BinaryClassificationMetrics", ) - multi_class_classification_metrics = proto.Field( proto.MESSAGE, number=3, oneof="metrics", message="Model.MultiClassClassificationMetrics", ) - clustering_metrics = proto.Field( proto.MESSAGE, number=4, oneof="metrics", message="Model.ClusteringMetrics", ) - ranking_metrics = proto.Field( proto.MESSAGE, number=5, oneof="metrics", message="Model.RankingMetrics", ) - arima_forecasting_metrics = proto.Field( proto.MESSAGE, number=6, @@ -846,7 +801,6 @@ class DataSplitResult(proto.Message): training_table = proto.Field( proto.MESSAGE, number=1, message=table_reference.TableReference, ) - evaluation_table = proto.Field( proto.MESSAGE, number=2, message=table_reference.TableReference, ) @@ -864,15 +818,12 @@ class ArimaOrder(proto.Message): Order of the moving-average part. """ - p = proto.Field(proto.INT64, number=1) - - d = proto.Field(proto.INT64, number=2) - - q = proto.Field(proto.INT64, number=3) + p = proto.Field(proto.INT64, number=1,) + d = proto.Field(proto.INT64, number=2,) + q = proto.Field(proto.INT64, number=3,) class ArimaFittingMetrics(proto.Message): r"""ARIMA model fitting metrics. - Attributes: log_likelihood (float): Log-likelihood. @@ -882,11 +833,9 @@ class ArimaFittingMetrics(proto.Message): Variance. """ - log_likelihood = proto.Field(proto.DOUBLE, number=1) - - aic = proto.Field(proto.DOUBLE, number=2) - - variance = proto.Field(proto.DOUBLE, number=3) + log_likelihood = proto.Field(proto.DOUBLE, number=1,) + aic = proto.Field(proto.DOUBLE, number=2,) + variance = proto.Field(proto.DOUBLE, number=3,) class GlobalExplanation(proto.Message): r"""Global explanations containing the top most important @@ -906,7 +855,6 @@ class GlobalExplanation(proto.Message): class Explanation(proto.Message): r"""Explanation for a single feature. - Attributes: feature_name (str): Full name of the feature. For non-numerical features, will @@ -917,21 +865,18 @@ class Explanation(proto.Message): Attribution of feature. """ - feature_name = proto.Field(proto.STRING, number=1) - + feature_name = proto.Field(proto.STRING, number=1,) attribution = proto.Field( - proto.MESSAGE, number=2, message=wrappers.DoubleValue, + proto.MESSAGE, number=2, message=wrappers_pb2.DoubleValue, ) explanations = proto.RepeatedField( proto.MESSAGE, number=1, message="Model.GlobalExplanation.Explanation", ) - - class_label = proto.Field(proto.STRING, number=2) + class_label = proto.Field(proto.STRING, number=2,) class TrainingRun(proto.Message): r"""Information about a single training query run for the model. - Attributes: training_options (google.cloud.bigquery_v2.types.Model.TrainingRun.TrainingOptions): Options that were used for this training run, @@ -957,7 +902,6 @@ class TrainingRun(proto.Message): class TrainingOptions(proto.Message): r""" - Attributes: max_iterations (int): The maximum number of iterations in training. @@ -1100,131 +1044,88 @@ class TrainingOptions(proto.Message): The max value of non-seasonal p and q. """ - max_iterations = proto.Field(proto.INT64, number=1) - + max_iterations = proto.Field(proto.INT64, number=1,) loss_type = proto.Field(proto.ENUM, number=2, enum="Model.LossType",) - - learn_rate = proto.Field(proto.DOUBLE, number=3) - + learn_rate = proto.Field(proto.DOUBLE, number=3,) l1_regularization = proto.Field( - proto.MESSAGE, number=4, message=wrappers.DoubleValue, + proto.MESSAGE, number=4, message=wrappers_pb2.DoubleValue, ) - l2_regularization = proto.Field( - proto.MESSAGE, number=5, message=wrappers.DoubleValue, + proto.MESSAGE, number=5, message=wrappers_pb2.DoubleValue, ) - min_relative_progress = proto.Field( - proto.MESSAGE, number=6, message=wrappers.DoubleValue, + proto.MESSAGE, number=6, message=wrappers_pb2.DoubleValue, ) - warm_start = proto.Field( - proto.MESSAGE, number=7, message=wrappers.BoolValue, + proto.MESSAGE, number=7, message=wrappers_pb2.BoolValue, ) - early_stop = proto.Field( - proto.MESSAGE, number=8, message=wrappers.BoolValue, + proto.MESSAGE, number=8, message=wrappers_pb2.BoolValue, ) - - input_label_columns = proto.RepeatedField(proto.STRING, number=9) - + input_label_columns = proto.RepeatedField(proto.STRING, number=9,) data_split_method = proto.Field( proto.ENUM, number=10, enum="Model.DataSplitMethod", ) - - data_split_eval_fraction = proto.Field(proto.DOUBLE, number=11) - - data_split_column = proto.Field(proto.STRING, number=12) - + data_split_eval_fraction = proto.Field(proto.DOUBLE, number=11,) + data_split_column = proto.Field(proto.STRING, number=12,) learn_rate_strategy = proto.Field( proto.ENUM, number=13, enum="Model.LearnRateStrategy", ) - - initial_learn_rate = proto.Field(proto.DOUBLE, number=16) - - label_class_weights = proto.MapField(proto.STRING, proto.DOUBLE, number=17) - - user_column = proto.Field(proto.STRING, number=18) - - item_column = proto.Field(proto.STRING, number=19) - + initial_learn_rate = proto.Field(proto.DOUBLE, number=16,) + label_class_weights = proto.MapField(proto.STRING, proto.DOUBLE, number=17,) + user_column = proto.Field(proto.STRING, number=18,) + item_column = proto.Field(proto.STRING, number=19,) distance_type = proto.Field( proto.ENUM, number=20, enum="Model.DistanceType", ) - - num_clusters = proto.Field(proto.INT64, number=21) - - model_uri = proto.Field(proto.STRING, number=22) - + num_clusters = proto.Field(proto.INT64, number=21,) + model_uri = proto.Field(proto.STRING, number=22,) optimization_strategy = proto.Field( proto.ENUM, number=23, enum="Model.OptimizationStrategy", ) - - hidden_units = proto.RepeatedField(proto.INT64, number=24) - - batch_size = proto.Field(proto.INT64, number=25) - + hidden_units = proto.RepeatedField(proto.INT64, number=24,) + batch_size = proto.Field(proto.INT64, number=25,) dropout = proto.Field( - proto.MESSAGE, number=26, message=wrappers.DoubleValue, + proto.MESSAGE, number=26, message=wrappers_pb2.DoubleValue, ) - - max_tree_depth = proto.Field(proto.INT64, number=27) - - subsample = proto.Field(proto.DOUBLE, number=28) - + max_tree_depth = proto.Field(proto.INT64, number=27,) + subsample = proto.Field(proto.DOUBLE, number=28,) min_split_loss = proto.Field( - proto.MESSAGE, number=29, message=wrappers.DoubleValue, + proto.MESSAGE, number=29, message=wrappers_pb2.DoubleValue, ) - - num_factors = proto.Field(proto.INT64, number=30) - + num_factors = proto.Field(proto.INT64, number=30,) feedback_type = proto.Field( proto.ENUM, number=31, enum="Model.FeedbackType", ) - wals_alpha = proto.Field( - proto.MESSAGE, number=32, message=wrappers.DoubleValue, + proto.MESSAGE, number=32, message=wrappers_pb2.DoubleValue, ) - kmeans_initialization_method = proto.Field( proto.ENUM, number=33, enum="Model.KmeansEnums.KmeansInitializationMethod", ) - - kmeans_initialization_column = proto.Field(proto.STRING, number=34) - - time_series_timestamp_column = proto.Field(proto.STRING, number=35) - - time_series_data_column = proto.Field(proto.STRING, number=36) - - auto_arima = proto.Field(proto.BOOL, number=37) - + kmeans_initialization_column = proto.Field(proto.STRING, number=34,) + time_series_timestamp_column = proto.Field(proto.STRING, number=35,) + time_series_data_column = proto.Field(proto.STRING, number=36,) + auto_arima = proto.Field(proto.BOOL, number=37,) non_seasonal_order = proto.Field( proto.MESSAGE, number=38, message="Model.ArimaOrder", ) - data_frequency = proto.Field( proto.ENUM, number=39, enum="Model.DataFrequency", ) - - include_drift = proto.Field(proto.BOOL, number=41) - + include_drift = proto.Field(proto.BOOL, number=41,) holiday_region = proto.Field( proto.ENUM, number=42, enum="Model.HolidayRegion", ) - - time_series_id_column = proto.Field(proto.STRING, number=43) - - horizon = proto.Field(proto.INT64, number=44) - - preserve_input_structs = proto.Field(proto.BOOL, number=45) - - auto_arima_max_order = proto.Field(proto.INT64, number=46) + time_series_id_column = proto.Field(proto.STRING, number=43,) + horizon = proto.Field(proto.INT64, number=44,) + preserve_input_structs = proto.Field(proto.BOOL, number=45,) + auto_arima_max_order = proto.Field(proto.INT64, number=46,) class IterationResult(proto.Message): r"""Information about a single iteration of the training run. - Attributes: index (google.protobuf.wrappers_pb2.Int32Value): Index of the iteration, 0 based. @@ -1248,7 +1149,6 @@ class IterationResult(proto.Message): class ClusterInfo(proto.Message): r"""Information about a single cluster for clustering model. - Attributes: centroid_id (int): Centroid id. @@ -1260,14 +1160,12 @@ class ClusterInfo(proto.Message): assigned to the cluster. """ - centroid_id = proto.Field(proto.INT64, number=1) - + centroid_id = proto.Field(proto.INT64, number=1,) cluster_radius = proto.Field( - proto.MESSAGE, number=2, message=wrappers.DoubleValue, + proto.MESSAGE, number=2, message=wrappers_pb2.DoubleValue, ) - cluster_size = proto.Field( - proto.MESSAGE, number=3, message=wrappers.Int64Value, + proto.MESSAGE, number=3, message=wrappers_pb2.Int64Value, ) class ArimaResult(proto.Message): @@ -1287,7 +1185,6 @@ class ArimaResult(proto.Message): class ArimaCoefficients(proto.Message): r"""Arima coefficients. - Attributes: auto_regressive_coefficients (Sequence[float]): Auto-regressive coefficients, an array of @@ -1301,18 +1198,15 @@ class ArimaCoefficients(proto.Message): """ auto_regressive_coefficients = proto.RepeatedField( - proto.DOUBLE, number=1 + proto.DOUBLE, number=1, ) - moving_average_coefficients = proto.RepeatedField( - proto.DOUBLE, number=2 + proto.DOUBLE, number=2, ) - - intercept_coefficient = proto.Field(proto.DOUBLE, number=3) + intercept_coefficient = proto.Field(proto.DOUBLE, number=3,) class ArimaModelInfo(proto.Message): r"""Arima model information. - Attributes: non_seasonal_order (google.cloud.bigquery_v2.types.Model.ArimaOrder): Non-seasonal order. @@ -1333,21 +1227,16 @@ class ArimaModelInfo(proto.Message): non_seasonal_order = proto.Field( proto.MESSAGE, number=1, message="Model.ArimaOrder", ) - arima_coefficients = proto.Field( proto.MESSAGE, number=2, message="Model.TrainingRun.IterationResult.ArimaResult.ArimaCoefficients", ) - arima_fitting_metrics = proto.Field( proto.MESSAGE, number=3, message="Model.ArimaFittingMetrics", ) - - has_drift = proto.Field(proto.BOOL, number=4) - - time_series_id = proto.Field(proto.STRING, number=5) - + has_drift = proto.Field(proto.BOOL, number=4,) + time_series_id = proto.Field(proto.STRING, number=5,) seasonal_periods = proto.RepeatedField( proto.ENUM, number=6, @@ -1359,35 +1248,30 @@ class ArimaModelInfo(proto.Message): number=1, message="Model.TrainingRun.IterationResult.ArimaResult.ArimaModelInfo", ) - seasonal_periods = proto.RepeatedField( proto.ENUM, number=2, enum="Model.SeasonalPeriod.SeasonalPeriodType", ) - index = proto.Field(proto.MESSAGE, number=1, message=wrappers.Int32Value,) - + index = proto.Field( + proto.MESSAGE, number=1, message=wrappers_pb2.Int32Value, + ) duration_ms = proto.Field( - proto.MESSAGE, number=4, message=wrappers.Int64Value, + proto.MESSAGE, number=4, message=wrappers_pb2.Int64Value, ) - training_loss = proto.Field( - proto.MESSAGE, number=5, message=wrappers.DoubleValue, + proto.MESSAGE, number=5, message=wrappers_pb2.DoubleValue, ) - eval_loss = proto.Field( - proto.MESSAGE, number=6, message=wrappers.DoubleValue, + proto.MESSAGE, number=6, message=wrappers_pb2.DoubleValue, ) - - learn_rate = proto.Field(proto.DOUBLE, number=7) - + learn_rate = proto.Field(proto.DOUBLE, number=7,) cluster_infos = proto.RepeatedField( proto.MESSAGE, number=8, message="Model.TrainingRun.IterationResult.ClusterInfo", ) - arima_result = proto.Field( proto.MESSAGE, number=9, @@ -1397,57 +1281,41 @@ class ArimaModelInfo(proto.Message): training_options = proto.Field( proto.MESSAGE, number=1, message="Model.TrainingRun.TrainingOptions", ) - - start_time = proto.Field(proto.MESSAGE, number=8, message=timestamp.Timestamp,) - + start_time = proto.Field( + proto.MESSAGE, number=8, message=timestamp_pb2.Timestamp, + ) results = proto.RepeatedField( proto.MESSAGE, number=6, message="Model.TrainingRun.IterationResult", ) - evaluation_metrics = proto.Field( proto.MESSAGE, number=7, message="Model.EvaluationMetrics", ) - data_split_result = proto.Field( proto.MESSAGE, number=9, message="Model.DataSplitResult", ) - global_explanations = proto.RepeatedField( proto.MESSAGE, number=10, message="Model.GlobalExplanation", ) - etag = proto.Field(proto.STRING, number=1) - + etag = proto.Field(proto.STRING, number=1,) model_reference = proto.Field( proto.MESSAGE, number=2, message=gcb_model_reference.ModelReference, ) - - creation_time = proto.Field(proto.INT64, number=5) - - last_modified_time = proto.Field(proto.INT64, number=6) - - description = proto.Field(proto.STRING, number=12) - - friendly_name = proto.Field(proto.STRING, number=14) - - labels = proto.MapField(proto.STRING, proto.STRING, number=15) - - expiration_time = proto.Field(proto.INT64, number=16) - - location = proto.Field(proto.STRING, number=13) - + creation_time = proto.Field(proto.INT64, number=5,) + last_modified_time = proto.Field(proto.INT64, number=6,) + description = proto.Field(proto.STRING, number=12,) + friendly_name = proto.Field(proto.STRING, number=14,) + labels = proto.MapField(proto.STRING, proto.STRING, number=15,) + expiration_time = proto.Field(proto.INT64, number=16,) + location = proto.Field(proto.STRING, number=13,) encryption_configuration = proto.Field( proto.MESSAGE, number=17, message=encryption_config.EncryptionConfiguration, ) - model_type = proto.Field(proto.ENUM, number=7, enum=ModelType,) - training_runs = proto.RepeatedField(proto.MESSAGE, number=9, message=TrainingRun,) - feature_columns = proto.RepeatedField( proto.MESSAGE, number=10, message=standard_sql.StandardSqlField, ) - label_columns = proto.RepeatedField( proto.MESSAGE, number=11, message=standard_sql.StandardSqlField, ) @@ -1455,7 +1323,6 @@ class ArimaModelInfo(proto.Message): class GetModelRequest(proto.Message): r""" - Attributes: project_id (str): Required. Project ID of the requested model. @@ -1465,16 +1332,13 @@ class GetModelRequest(proto.Message): Required. Model ID of the requested model. """ - project_id = proto.Field(proto.STRING, number=1) - - dataset_id = proto.Field(proto.STRING, number=2) - - model_id = proto.Field(proto.STRING, number=3) + project_id = proto.Field(proto.STRING, number=1,) + dataset_id = proto.Field(proto.STRING, number=2,) + model_id = proto.Field(proto.STRING, number=3,) class PatchModelRequest(proto.Message): r""" - Attributes: project_id (str): Required. Project ID of the model to patch. @@ -1489,18 +1353,14 @@ class PatchModelRequest(proto.Message): set to default value. """ - project_id = proto.Field(proto.STRING, number=1) - - dataset_id = proto.Field(proto.STRING, number=2) - - model_id = proto.Field(proto.STRING, number=3) - + project_id = proto.Field(proto.STRING, number=1,) + dataset_id = proto.Field(proto.STRING, number=2,) + model_id = proto.Field(proto.STRING, number=3,) model = proto.Field(proto.MESSAGE, number=4, message="Model",) class DeleteModelRequest(proto.Message): r""" - Attributes: project_id (str): Required. Project ID of the model to delete. @@ -1510,16 +1370,13 @@ class DeleteModelRequest(proto.Message): Required. Model ID of the model to delete. """ - project_id = proto.Field(proto.STRING, number=1) - - dataset_id = proto.Field(proto.STRING, number=2) - - model_id = proto.Field(proto.STRING, number=3) + project_id = proto.Field(proto.STRING, number=1,) + dataset_id = proto.Field(proto.STRING, number=2,) + model_id = proto.Field(proto.STRING, number=3,) class ListModelsRequest(proto.Message): r""" - Attributes: project_id (str): Required. Project ID of the models to list. @@ -1534,18 +1391,16 @@ class ListModelsRequest(proto.Message): request the next page of results """ - project_id = proto.Field(proto.STRING, number=1) - - dataset_id = proto.Field(proto.STRING, number=2) - - max_results = proto.Field(proto.MESSAGE, number=3, message=wrappers.UInt32Value,) - - page_token = proto.Field(proto.STRING, number=4) + project_id = proto.Field(proto.STRING, number=1,) + dataset_id = proto.Field(proto.STRING, number=2,) + max_results = proto.Field( + proto.MESSAGE, number=3, message=wrappers_pb2.UInt32Value, + ) + page_token = proto.Field(proto.STRING, number=4,) class ListModelsResponse(proto.Message): r""" - Attributes: models (Sequence[google.cloud.bigquery_v2.types.Model]): Models in the requested dataset. Only the following fields @@ -1560,8 +1415,7 @@ def raw_page(self): return self models = proto.RepeatedField(proto.MESSAGE, number=1, message="Model",) - - next_page_token = proto.Field(proto.STRING, number=2) + next_page_token = proto.Field(proto.STRING, number=2,) __all__ = tuple(sorted(__protobuf__.manifest)) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery_v2/types/model_reference.py b/packages/google-cloud-bigquery/google/cloud/bigquery_v2/types/model_reference.py index e3891d6c17ca..a9ebad61361b 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery_v2/types/model_reference.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery_v2/types/model_reference.py @@ -1,5 +1,4 @@ # -*- coding: utf-8 -*- - # Copyright 2020 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -14,7 +13,6 @@ # See the License for the specific language governing permissions and # limitations under the License. # - import proto # type: ignore @@ -25,7 +23,6 @@ class ModelReference(proto.Message): r"""Id path of a model. - Attributes: project_id (str): Required. The ID of the project containing @@ -39,11 +36,9 @@ class ModelReference(proto.Message): maximum length is 1,024 characters. """ - project_id = proto.Field(proto.STRING, number=1) - - dataset_id = proto.Field(proto.STRING, number=2) - - model_id = proto.Field(proto.STRING, number=3) + project_id = proto.Field(proto.STRING, number=1,) + dataset_id = proto.Field(proto.STRING, number=2,) + model_id = proto.Field(proto.STRING, number=3,) __all__ = tuple(sorted(__protobuf__.manifest)) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery_v2/types/standard_sql.py b/packages/google-cloud-bigquery/google/cloud/bigquery_v2/types/standard_sql.py index 3bc6afedce92..b2191a41760d 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery_v2/types/standard_sql.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery_v2/types/standard_sql.py @@ -1,5 +1,4 @@ # -*- coding: utf-8 -*- - # Copyright 2020 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -14,7 +13,6 @@ # See the License for the specific language governing permissions and # limitations under the License. # - import proto # type: ignore @@ -63,11 +61,9 @@ class TypeKind(proto.Enum): STRUCT = 17 type_kind = proto.Field(proto.ENUM, number=1, enum=TypeKind,) - array_element_type = proto.Field( proto.MESSAGE, number=2, oneof="sub_type", message="StandardSqlDataType", ) - struct_type = proto.Field( proto.MESSAGE, number=3, oneof="sub_type", message="StandardSqlStructType", ) @@ -75,7 +71,6 @@ class TypeKind(proto.Enum): class StandardSqlField(proto.Message): r"""A field or a column. - Attributes: name (str): Optional. The name of this field. Can be @@ -88,14 +83,12 @@ class StandardSqlField(proto.Message): this "type" field). """ - name = proto.Field(proto.STRING, number=1) - + name = proto.Field(proto.STRING, number=1,) type = proto.Field(proto.MESSAGE, number=2, message="StandardSqlDataType",) class StandardSqlStructType(proto.Message): r""" - Attributes: fields (Sequence[google.cloud.bigquery_v2.types.StandardSqlField]): diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery_v2/types/table_reference.py b/packages/google-cloud-bigquery/google/cloud/bigquery_v2/types/table_reference.py index d213e8bb6d82..a0a8ee4c95fe 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery_v2/types/table_reference.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery_v2/types/table_reference.py @@ -1,5 +1,4 @@ # -*- coding: utf-8 -*- - # Copyright 2020 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -14,7 +13,6 @@ # See the License for the specific language governing permissions and # limitations under the License. # - import proto # type: ignore @@ -25,7 +23,6 @@ class TableReference(proto.Message): r""" - Attributes: project_id (str): Required. The ID of the project containing @@ -41,11 +38,9 @@ class TableReference(proto.Message): as ``sample_table$20190123``. """ - project_id = proto.Field(proto.STRING, number=1) - - dataset_id = proto.Field(proto.STRING, number=2) - - table_id = proto.Field(proto.STRING, number=3) + project_id = proto.Field(proto.STRING, number=1,) + dataset_id = proto.Field(proto.STRING, number=2,) + table_id = proto.Field(proto.STRING, number=3,) __all__ = tuple(sorted(__protobuf__.manifest)) diff --git a/packages/google-cloud-bigquery/owlbot.py b/packages/google-cloud-bigquery/owlbot.py index f45c24fbb51d..476c5ee5dce0 100644 --- a/packages/google-cloud-bigquery/owlbot.py +++ b/packages/google-cloud-bigquery/owlbot.py @@ -24,19 +24,32 @@ default_version = "v2" for library in s.get_staging_dirs(default_version): - # Do not expose ModelServiceClient, as there is no public API endpoint for the - # models service. + # Do not expose ModelServiceClient and ModelServiceAsyncClient, as there + # is no public API endpoint for the models service. s.replace( library / f"google/cloud/bigquery_{library.name}/__init__.py", r"from \.services\.model_service import ModelServiceClient", "", ) + + s.replace( + library / f"google/cloud/bigquery_{library.name}/__init__.py", + r"from \.services\.model_service import ModelServiceAsyncClient", + "", + ) + s.replace( library / f"google/cloud/bigquery_{library.name}/__init__.py", r"""["']ModelServiceClient["'],""", "", ) + s.replace( + library / f"google/cloud/bigquery_{library.name}/__init__.py", + r"""["']ModelServiceAsyncClient["'],""", + "", + ) + # Adjust Model docstring so that Sphinx does not think that "predicted_" is # a reference to something, issuing a false warning. s.replace( diff --git a/packages/google-cloud-bigquery/tests/__init__.py b/packages/google-cloud-bigquery/tests/__init__.py index e69de29bb2d1..4de65971c238 100644 --- a/packages/google-cloud-bigquery/tests/__init__.py +++ b/packages/google-cloud-bigquery/tests/__init__.py @@ -0,0 +1,15 @@ +# -*- coding: utf-8 -*- +# Copyright 2020 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# diff --git a/packages/google-cloud-bigquery/tests/unit/__init__.py b/packages/google-cloud-bigquery/tests/unit/__init__.py index df379f1e9d88..4de65971c238 100644 --- a/packages/google-cloud-bigquery/tests/unit/__init__.py +++ b/packages/google-cloud-bigquery/tests/unit/__init__.py @@ -1,4 +1,5 @@ -# Copyright 2016 Google LLC +# -*- coding: utf-8 -*- +# Copyright 2020 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -11,3 +12,4 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +# diff --git a/packages/google-cloud-bigquery/tests/unit/gapic/__init__.py b/packages/google-cloud-bigquery/tests/unit/gapic/__init__.py new file mode 100644 index 000000000000..4de65971c238 --- /dev/null +++ b/packages/google-cloud-bigquery/tests/unit/gapic/__init__.py @@ -0,0 +1,15 @@ +# -*- coding: utf-8 -*- +# Copyright 2020 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# From 193543debd788d6a3d8df7ec211346438addd025 Mon Sep 17 00:00:00 2001 From: WhiteSource Renovate Date: Fri, 21 May 2021 00:26:03 +0200 Subject: [PATCH 1156/2016] chore(deps): update dependency grpcio to v1.38.0 (#674) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [![WhiteSource Renovate](https://app.renovatebot.com/images/banner.svg)](https://renovatebot.com) This PR contains the following updates: | Package | Change | Age | Adoption | Passing | Confidence | |---|---|---|---|---|---| | [grpcio](https://grpc.io) | `==1.37.1` -> `==1.38.0` | [![age](https://badges.renovateapi.com/packages/pypi/grpcio/1.38.0/age-slim)](https://docs.renovatebot.com/merge-confidence/) | [![adoption](https://badges.renovateapi.com/packages/pypi/grpcio/1.38.0/adoption-slim)](https://docs.renovatebot.com/merge-confidence/) | [![passing](https://badges.renovateapi.com/packages/pypi/grpcio/1.38.0/compatibility-slim/1.37.1)](https://docs.renovatebot.com/merge-confidence/) | [![confidence](https://badges.renovateapi.com/packages/pypi/grpcio/1.38.0/confidence-slim/1.37.1)](https://docs.renovatebot.com/merge-confidence/) | --- ### Configuration 📅 **Schedule**: At any time (no schedule defined). 🚦 **Automerge**: Disabled by config. Please merge this manually once you are satisfied. ♻️ **Rebasing**: Whenever PR becomes conflicted, or you tick the rebase/retry checkbox. 🔕 **Ignore**: Close this PR and you won't be reminded about this update again. --- - [ ] If you want to rebase/retry this PR, check this box. --- This PR has been generated by [WhiteSource Renovate](https://renovate.whitesourcesoftware.com). View repository job log [here](https://app.renovatebot.com/dashboard#github/googleapis/python-bigquery). --- .../google-cloud-bigquery/samples/snippets/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/samples/snippets/requirements.txt b/packages/google-cloud-bigquery/samples/snippets/requirements.txt index 2dfee39b5590..fcb481f48216 100644 --- a/packages/google-cloud-bigquery/samples/snippets/requirements.txt +++ b/packages/google-cloud-bigquery/samples/snippets/requirements.txt @@ -1,7 +1,7 @@ google-cloud-bigquery==2.16.1 google-cloud-bigquery-storage==2.4.0 google-auth-oauthlib==0.4.4 -grpcio==1.37.1 +grpcio==1.38.0 ipython==7.16.1; python_version < '3.7' ipython==7.17.0; python_version >= '3.7' matplotlib==3.3.4; python_version < '3.7' From 7fea7b7d72f7be7355504fd2f0a5b25fed746440 Mon Sep 17 00:00:00 2001 From: Peter Lamut Date: Fri, 21 May 2021 11:03:35 +0200 Subject: [PATCH 1157/2016] fix(tests): invalid path to strptime() (#672) --- packages/google-cloud-bigquery/samples/snippets/conftest.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/samples/snippets/conftest.py b/packages/google-cloud-bigquery/samples/snippets/conftest.py index 0d0299ee560e..cb11eb68f636 100644 --- a/packages/google-cloud-bigquery/samples/snippets/conftest.py +++ b/packages/google-cloud-bigquery/samples/snippets/conftest.py @@ -33,7 +33,7 @@ def resource_prefix() -> str: def resource_name_to_date(resource_name: str): start_date = len(RESOURCE_PREFIX) + 1 date_string = resource_name[start_date : start_date + RESOURCE_DATE_LENGTH] - return datetime.strptime(date_string, RESOURCE_DATE_FORMAT) + return datetime.datetime.strptime(date_string, RESOURCE_DATE_FORMAT) @pytest.fixture(scope="session", autouse=True) From 966a206b7fa1a53fe93422374743b71d69e1a689 Mon Sep 17 00:00:00 2001 From: Jim Fulton Date: Fri, 21 May 2021 10:50:55 -0600 Subject: [PATCH 1158/2016] feat: Support parameterized NUMERIC, BIGNUMERIC, STRING, and BYTES types (#673) * parse parameterized schema info * Fixed SchemaField repr/key * Fix code duplication between _parse_schema_resource and from_api_repr Move new parameterized-type code from _parse_schema_resource to from_api_repr and implement _parse_schema_resource in terms of from_api_repr. * empty schemas are lists now, just like non-empty schemas. * changed new parameterized-type tests to use from_api_repr Because that's more direct and it uncovered duplicate code. * paramaterized the from_api_repr tests and added to_api_repr tests * Test BYTES and _key (repr) too. * Added a round-trip parameterized types schema tests * handle BYTES in _key/repr * blacken * Move _get_int close to use * Updated documentation. * Oops, forgot BIGNUMERIC * Improve argument doc and better argument name to __get_int * doom tables before creating them. * Use max_length in the Python for the REST api maxLength --- .../google/cloud/bigquery/schema.py | 71 +++++++--- .../tests/system/test_client.py | 29 +++++ .../tests/unit/test_query.py | 4 +- .../tests/unit/test_schema.py | 123 ++++++++++++++++++ 4 files changed, 209 insertions(+), 18 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/schema.py b/packages/google-cloud-bigquery/google/cloud/bigquery/schema.py index cb221d6de982..919d78b232ad 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/schema.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/schema.py @@ -67,6 +67,15 @@ class SchemaField(object): policy_tags (Optional[PolicyTagList]): The policy tag list for the field. + precision (Optional[int]): + Precison (number of digits) of fields with NUMERIC or BIGNUMERIC type. + + scale (Optional[int]): + Scale (digits after decimal) of fields with NUMERIC or BIGNUMERIC type. + + max_length (Optional[int]): + Maximim length of fields with STRING or BYTES type. + """ def __init__( @@ -77,6 +86,9 @@ def __init__( description=_DEFAULT_VALUE, fields=(), policy_tags=None, + precision=_DEFAULT_VALUE, + scale=_DEFAULT_VALUE, + max_length=_DEFAULT_VALUE, ): self._properties = { "name": name, @@ -86,9 +98,22 @@ def __init__( self._properties["mode"] = mode.upper() if description is not _DEFAULT_VALUE: self._properties["description"] = description + if precision is not _DEFAULT_VALUE: + self._properties["precision"] = precision + if scale is not _DEFAULT_VALUE: + self._properties["scale"] = scale + if max_length is not _DEFAULT_VALUE: + self._properties["maxLength"] = max_length self._fields = tuple(fields) self._policy_tags = policy_tags + @staticmethod + def __get_int(api_repr, name): + v = api_repr.get(name, _DEFAULT_VALUE) + if v is not _DEFAULT_VALUE: + v = int(v) + return v + @classmethod def from_api_repr(cls, api_repr: dict) -> "SchemaField": """Return a ``SchemaField`` object deserialized from a dictionary. @@ -113,6 +138,9 @@ def from_api_repr(cls, api_repr: dict) -> "SchemaField": description=description, name=api_repr["name"], policy_tags=PolicyTagList.from_api_repr(api_repr.get("policyTags")), + precision=cls.__get_int(api_repr, "precision"), + scale=cls.__get_int(api_repr, "scale"), + max_length=cls.__get_int(api_repr, "maxLength"), ) @property @@ -148,6 +176,21 @@ def description(self): """Optional[str]: description for the field.""" return self._properties.get("description") + @property + def precision(self): + """Optional[int]: Precision (number of digits) for the NUMERIC field.""" + return self._properties.get("precision") + + @property + def scale(self): + """Optional[int]: Scale (digits after decimal) for the NUMERIC field.""" + return self._properties.get("scale") + + @property + def max_length(self): + """Optional[int]: Maximum length for the STRING or BYTES field.""" + return self._properties.get("maxLength") + @property def fields(self): """Optional[tuple]: Subfields contained in this field. @@ -191,9 +234,19 @@ def _key(self): Returns: Tuple: The contents of this :class:`~google.cloud.bigquery.schema.SchemaField`. """ + field_type = self.field_type.upper() + if field_type == "STRING" or field_type == "BYTES": + if self.max_length is not None: + field_type = f"{field_type}({self.max_length})" + elif field_type.endswith("NUMERIC"): + if self.precision is not None: + if self.scale is not None: + field_type = f"{field_type}({self.precision}, {self.scale})" + else: + field_type = f"{field_type}({self.precision})" return ( self.name, - self.field_type.upper(), + field_type, # Mode is always str, if not given it defaults to a str value self.mode.upper(), # pytype: disable=attribute-error self.description, @@ -269,21 +322,7 @@ def _parse_schema_resource(info): Optional[Sequence[google.cloud.bigquery.schema.SchemaField`]: A list of parsed fields, or ``None`` if no "fields" key found. """ - if "fields" not in info: - return () - - schema = [] - for r_field in info["fields"]: - name = r_field["name"] - field_type = r_field["type"] - mode = r_field.get("mode", "NULLABLE") - description = r_field.get("description") - sub_fields = _parse_schema_resource(r_field) - policy_tags = PolicyTagList.from_api_repr(r_field.get("policyTags")) - schema.append( - SchemaField(name, field_type, mode, description, sub_fields, policy_tags) - ) - return schema + return [SchemaField.from_api_repr(f) for f in info.get("fields", ())] def _build_schema_resource(fields): diff --git a/packages/google-cloud-bigquery/tests/system/test_client.py b/packages/google-cloud-bigquery/tests/system/test_client.py index 7c8ef50fa168..b4b0c053d431 100644 --- a/packages/google-cloud-bigquery/tests/system/test_client.py +++ b/packages/google-cloud-bigquery/tests/system/test_client.py @@ -2173,6 +2173,35 @@ def test_list_rows_page_size(self): page = next(pages) self.assertEqual(page.num_items, num_last_page) + def test_parameterized_types_round_trip(self): + client = Config.CLIENT + table_id = f"{Config.DATASET}.test_parameterized_types_round_trip" + fields = ( + ("n", "NUMERIC"), + ("n9", "NUMERIC(9)"), + ("n92", "NUMERIC(9, 2)"), + ("bn", "BIGNUMERIC"), + ("bn9", "BIGNUMERIC(38)"), + ("bn92", "BIGNUMERIC(38, 22)"), + ("s", "STRING"), + ("s9", "STRING(9)"), + ("b", "BYTES"), + ("b9", "BYTES(9)"), + ) + self.to_delete.insert(0, Table(f"{client.project}.{table_id}")) + client.query( + "create table {} ({})".format( + table_id, ", ".join(" ".join(f) for f in fields) + ) + ).result() + table = client.get_table(table_id) + table_id2 = table_id + "2" + self.to_delete.insert(0, Table(f"{client.project}.{table_id2}")) + client.create_table(Table(f"{client.project}.{table_id2}", table.schema)) + table2 = client.get_table(table_id2) + + self.assertEqual(tuple(s._key()[:2] for s in table2.schema), fields) + def temp_dataset(self, dataset_id, location=None): project = Config.CLIENT.project dataset_ref = bigquery.DatasetReference(project, dataset_id) diff --git a/packages/google-cloud-bigquery/tests/unit/test_query.py b/packages/google-cloud-bigquery/tests/unit/test_query.py index 90fc30b2074f..9483fe8dd743 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_query.py +++ b/packages/google-cloud-bigquery/tests/unit/test_query.py @@ -1302,7 +1302,7 @@ def _verifySchema(self, query, resource): self.assertEqual(found.description, expected.get("description")) self.assertEqual(found.fields, expected.get("fields", ())) else: - self.assertEqual(query.schema, ()) + self.assertEqual(query.schema, []) def test_ctor_defaults(self): query = self._make_one(self._make_resource()) @@ -1312,7 +1312,7 @@ def test_ctor_defaults(self): self.assertIsNone(query.page_token) self.assertEqual(query.project, self.PROJECT) self.assertEqual(query.rows, []) - self.assertEqual(query.schema, ()) + self.assertEqual(query.schema, []) self.assertIsNone(query.total_rows) self.assertIsNone(query.total_bytes_processed) diff --git a/packages/google-cloud-bigquery/tests/unit/test_schema.py b/packages/google-cloud-bigquery/tests/unit/test_schema.py index 87baaf3790f8..29c3bace5d63 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_schema.py +++ b/packages/google-cloud-bigquery/tests/unit/test_schema.py @@ -15,6 +15,7 @@ import unittest import mock +import pytest class TestSchemaField(unittest.TestCase): @@ -715,3 +716,125 @@ def test___hash__not_equals(self): set_one = {policy1} set_two = {policy2} self.assertNotEqual(set_one, set_two) + + +@pytest.mark.parametrize( + "api,expect,key2", + [ + ( + dict(name="n", type="NUMERIC"), + ("n", "NUMERIC", None, None, None), + ("n", "NUMERIC"), + ), + ( + dict(name="n", type="NUMERIC", precision=9), + ("n", "NUMERIC", 9, None, None), + ("n", "NUMERIC(9)"), + ), + ( + dict(name="n", type="NUMERIC", precision=9, scale=2), + ("n", "NUMERIC", 9, 2, None), + ("n", "NUMERIC(9, 2)"), + ), + ( + dict(name="n", type="BIGNUMERIC"), + ("n", "BIGNUMERIC", None, None, None), + ("n", "BIGNUMERIC"), + ), + ( + dict(name="n", type="BIGNUMERIC", precision=40), + ("n", "BIGNUMERIC", 40, None, None), + ("n", "BIGNUMERIC(40)"), + ), + ( + dict(name="n", type="BIGNUMERIC", precision=40, scale=2), + ("n", "BIGNUMERIC", 40, 2, None), + ("n", "BIGNUMERIC(40, 2)"), + ), + ( + dict(name="n", type="STRING"), + ("n", "STRING", None, None, None), + ("n", "STRING"), + ), + ( + dict(name="n", type="STRING", maxLength=9), + ("n", "STRING", None, None, 9), + ("n", "STRING(9)"), + ), + ( + dict(name="n", type="BYTES"), + ("n", "BYTES", None, None, None), + ("n", "BYTES"), + ), + ( + dict(name="n", type="BYTES", maxLength=9), + ("n", "BYTES", None, None, 9), + ("n", "BYTES(9)"), + ), + ], +) +def test_from_api_repr_parameterized(api, expect, key2): + from google.cloud.bigquery.schema import SchemaField + + field = SchemaField.from_api_repr(api) + + assert ( + field.name, + field.field_type, + field.precision, + field.scale, + field.max_length, + ) == expect + + assert field._key()[:2] == key2 + + +@pytest.mark.parametrize( + "field,api", + [ + ( + dict(name="n", field_type="NUMERIC"), + dict(name="n", type="NUMERIC", mode="NULLABLE"), + ), + ( + dict(name="n", field_type="NUMERIC", precision=9), + dict(name="n", type="NUMERIC", mode="NULLABLE", precision=9), + ), + ( + dict(name="n", field_type="NUMERIC", precision=9, scale=2), + dict(name="n", type="NUMERIC", mode="NULLABLE", precision=9, scale=2), + ), + ( + dict(name="n", field_type="BIGNUMERIC"), + dict(name="n", type="BIGNUMERIC", mode="NULLABLE"), + ), + ( + dict(name="n", field_type="BIGNUMERIC", precision=40), + dict(name="n", type="BIGNUMERIC", mode="NULLABLE", precision=40), + ), + ( + dict(name="n", field_type="BIGNUMERIC", precision=40, scale=2), + dict(name="n", type="BIGNUMERIC", mode="NULLABLE", precision=40, scale=2), + ), + ( + dict(name="n", field_type="STRING"), + dict(name="n", type="STRING", mode="NULLABLE"), + ), + ( + dict(name="n", field_type="STRING", max_length=9), + dict(name="n", type="STRING", mode="NULLABLE", maxLength=9), + ), + ( + dict(name="n", field_type="BYTES"), + dict(name="n", type="BYTES", mode="NULLABLE"), + ), + ( + dict(name="n", field_type="BYTES", max_length=9), + dict(name="n", type="BYTES", mode="NULLABLE", maxLength=9), + ), + ], +) +def test_to_api_repr_parameterized(field, api): + from google.cloud.bigquery.schema import SchemaField + + assert SchemaField(**field).to_api_repr() == api From be1f5031ace2a4efea132e05a5c302a054ef7779 Mon Sep 17 00:00:00 2001 From: "release-please[bot]" <55107282+release-please[bot]@users.noreply.github.com> Date: Fri, 21 May 2021 11:37:20 -0600 Subject: [PATCH 1159/2016] chore: release 2.17.0 (#670) Co-authored-by: release-please[bot] <55107282+release-please[bot]@users.noreply.github.com> --- packages/google-cloud-bigquery/CHANGELOG.md | 13 +++++++++++++ .../google/cloud/bigquery/version.py | 2 +- 2 files changed, 14 insertions(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/CHANGELOG.md b/packages/google-cloud-bigquery/CHANGELOG.md index ef184dffb3f9..2a0227118793 100644 --- a/packages/google-cloud-bigquery/CHANGELOG.md +++ b/packages/google-cloud-bigquery/CHANGELOG.md @@ -4,6 +4,19 @@ [1]: https://pypi.org/project/google-cloud-bigquery/#history +## [2.17.0](https://www.github.com/googleapis/python-bigquery/compare/v2.16.1...v2.17.0) (2021-05-21) + + +### Features + +* detect obsolete BQ Storage extra at runtime ([#666](https://www.github.com/googleapis/python-bigquery/issues/666)) ([bd7dbda](https://www.github.com/googleapis/python-bigquery/commit/bd7dbdae5c972b16bafc53c67911eeaa3255a880)) +* Support parameterized NUMERIC, BIGNUMERIC, STRING, and BYTES types ([#673](https://www.github.com/googleapis/python-bigquery/issues/673)) ([45421e7](https://www.github.com/googleapis/python-bigquery/commit/45421e73bfcddb244822e6a5cd43be6bd1ca2256)) + + +### Bug Fixes + +* **tests:** invalid path to strptime() ([#672](https://www.github.com/googleapis/python-bigquery/issues/672)) ([591cdd8](https://www.github.com/googleapis/python-bigquery/commit/591cdd851bb1321b048a05a378a0ef48d3ade462)) + ### [2.16.1](https://www.github.com/googleapis/python-bigquery/compare/v2.16.0...v2.16.1) (2021-05-12) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/version.py b/packages/google-cloud-bigquery/google/cloud/bigquery/version.py index 61e0c0a8360c..422b383cc30d 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/version.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/version.py @@ -12,4 +12,4 @@ # See the License for the specific language governing permissions and # limitations under the License. -__version__ = "2.16.1" +__version__ = "2.17.0" From 0f93572646b68a0d3c26d2b5c55bff3bf915fa59 Mon Sep 17 00:00:00 2001 From: WhiteSource Renovate Date: Sat, 22 May 2021 08:42:02 +0200 Subject: [PATCH 1160/2016] chore(deps): update dependency google-cloud-bigquery to v2.17.0 (#675) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [![WhiteSource Renovate](https://app.renovatebot.com/images/banner.svg)](https://renovatebot.com) This PR contains the following updates: | Package | Change | Age | Adoption | Passing | Confidence | |---|---|---|---|---|---| | [google-cloud-bigquery](https://togithub.com/googleapis/python-bigquery) | `==2.16.1` -> `==2.17.0` | [![age](https://badges.renovateapi.com/packages/pypi/google-cloud-bigquery/2.17.0/age-slim)](https://docs.renovatebot.com/merge-confidence/) | [![adoption](https://badges.renovateapi.com/packages/pypi/google-cloud-bigquery/2.17.0/adoption-slim)](https://docs.renovatebot.com/merge-confidence/) | [![passing](https://badges.renovateapi.com/packages/pypi/google-cloud-bigquery/2.17.0/compatibility-slim/2.16.1)](https://docs.renovatebot.com/merge-confidence/) | [![confidence](https://badges.renovateapi.com/packages/pypi/google-cloud-bigquery/2.17.0/confidence-slim/2.16.1)](https://docs.renovatebot.com/merge-confidence/) | --- ### Release Notes
googleapis/python-bigquery ### [`v2.17.0`](https://togithub.com/googleapis/python-bigquery/blob/master/CHANGELOG.md#​2170-httpswwwgithubcomgoogleapispython-bigquerycomparev2161v2170-2021-05-21) [Compare Source](https://togithub.com/googleapis/python-bigquery/compare/v2.16.1...v2.17.0) ##### Features - detect obsolete BQ Storage extra at runtime ([#​666](https://www.github.com/googleapis/python-bigquery/issues/666)) ([bd7dbda](https://www.github.com/googleapis/python-bigquery/commit/bd7dbdae5c972b16bafc53c67911eeaa3255a880)) - Support parameterized NUMERIC, BIGNUMERIC, STRING, and BYTES types ([#​673](https://www.github.com/googleapis/python-bigquery/issues/673)) ([45421e7](https://www.github.com/googleapis/python-bigquery/commit/45421e73bfcddb244822e6a5cd43be6bd1ca2256)) ##### Bug Fixes - **tests:** invalid path to strptime() ([#​672](https://www.github.com/googleapis/python-bigquery/issues/672)) ([591cdd8](https://www.github.com/googleapis/python-bigquery/commit/591cdd851bb1321b048a05a378a0ef48d3ade462)) ##### [2.16.1](https://www.github.com/googleapis/python-bigquery/compare/v2.16.0...v2.16.1) (2021-05-12) ##### Bug Fixes - executemany rowcount only reflected the last execution ([#​660](https://www.github.com/googleapis/python-bigquery/issues/660)) ([aeadc8c](https://www.github.com/googleapis/python-bigquery/commit/aeadc8c2d614bb9f0883ec901fca48930f3aaf19))
--- ### Configuration 📅 **Schedule**: At any time (no schedule defined). 🚦 **Automerge**: Disabled by config. Please merge this manually once you are satisfied. ♻️ **Rebasing**: Whenever PR becomes conflicted, or you tick the rebase/retry checkbox. 🔕 **Ignore**: Close this PR and you won't be reminded about this update again. --- - [ ] If you want to rebase/retry this PR, check this box. --- This PR has been generated by [WhiteSource Renovate](https://renovate.whitesourcesoftware.com). View repository job log [here](https://app.renovatebot.com/dashboard#github/googleapis/python-bigquery). --- .../google-cloud-bigquery/samples/geography/requirements.txt | 2 +- .../google-cloud-bigquery/samples/snippets/requirements.txt | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/packages/google-cloud-bigquery/samples/geography/requirements.txt b/packages/google-cloud-bigquery/samples/geography/requirements.txt index e494fbaaeb1e..9fbe85970222 100644 --- a/packages/google-cloud-bigquery/samples/geography/requirements.txt +++ b/packages/google-cloud-bigquery/samples/geography/requirements.txt @@ -1,4 +1,4 @@ geojson==2.5.0 -google-cloud-bigquery==2.16.1 +google-cloud-bigquery==2.17.0 google-cloud-bigquery-storage==2.4.0 Shapely==1.7.1 diff --git a/packages/google-cloud-bigquery/samples/snippets/requirements.txt b/packages/google-cloud-bigquery/samples/snippets/requirements.txt index fcb481f48216..2ed5b0df8cbd 100644 --- a/packages/google-cloud-bigquery/samples/snippets/requirements.txt +++ b/packages/google-cloud-bigquery/samples/snippets/requirements.txt @@ -1,4 +1,4 @@ -google-cloud-bigquery==2.16.1 +google-cloud-bigquery==2.17.0 google-cloud-bigquery-storage==2.4.0 google-auth-oauthlib==0.4.4 grpcio==1.38.0 From 169e2aa254adae180c988e8fb89189de6f5231f8 Mon Sep 17 00:00:00 2001 From: "gcf-owl-bot[bot]" <78513119+gcf-owl-bot[bot]@users.noreply.github.com> Date: Sat, 22 May 2021 09:18:05 +0000 Subject: [PATCH 1161/2016] chore: new owl bot post processor docker image (#677) gcr.io/repo-automation-bots/owlbot-python:latest@sha256:3c3a445b3ddc99ccd5d31edc4b4519729635d20693900db32c4f587ed51f7479 --- packages/google-cloud-bigquery/.github/.OwlBot.lock.yaml | 2 +- .../google-cloud-bigquery/samples/geography/noxfile.py | 9 ++++++++- .../google-cloud-bigquery/samples/snippets/noxfile.py | 9 ++++++++- 3 files changed, 17 insertions(+), 3 deletions(-) diff --git a/packages/google-cloud-bigquery/.github/.OwlBot.lock.yaml b/packages/google-cloud-bigquery/.github/.OwlBot.lock.yaml index 864c17653f80..46e3f021cc72 100644 --- a/packages/google-cloud-bigquery/.github/.OwlBot.lock.yaml +++ b/packages/google-cloud-bigquery/.github/.OwlBot.lock.yaml @@ -1,3 +1,3 @@ docker: image: gcr.io/repo-automation-bots/owlbot-python:latest - digest: sha256:4c981a6b6f2b8914a448d7b3a01688365be03e3ed26dfee399a6aa77fb112eaa + digest: sha256:3c3a445b3ddc99ccd5d31edc4b4519729635d20693900db32c4f587ed51f7479 diff --git a/packages/google-cloud-bigquery/samples/geography/noxfile.py b/packages/google-cloud-bigquery/samples/geography/noxfile.py index be1a3f251496..160fe728648f 100644 --- a/packages/google-cloud-bigquery/samples/geography/noxfile.py +++ b/packages/google-cloud-bigquery/samples/geography/noxfile.py @@ -48,6 +48,10 @@ # to use your own Cloud project. "gcloud_project_env": "GOOGLE_CLOUD_PROJECT", # 'gcloud_project_env': 'BUILD_SPECIFIC_GCLOUD_PROJECT', + # If you need to use a specific version of pip, + # change pip_version_override to the string representation + # of the version number, for example, "20.2.4" + "pip_version_override": None, # A dictionary you want to inject into your test. Don't put any # secrets here. These values will override predefined values. "envs": {}, @@ -172,6 +176,9 @@ def blacken(session: nox.sessions.Session) -> None: def _session_tests( session: nox.sessions.Session, post_install: Callable = None ) -> None: + if TEST_CONFIG["pip_version_override"]: + pip_version = TEST_CONFIG["pip_version_override"] + session.install(f"pip=={pip_version}") """Runs py.test for a particular project.""" if os.path.exists("requirements.txt"): if os.path.exists("constraints.txt"): @@ -198,7 +205,7 @@ def _session_tests( # on travis where slow and flaky tests are excluded. # See http://doc.pytest.org/en/latest/_modules/_pytest/main.html success_codes=[0, 5], - env=get_pytest_env_vars() + env=get_pytest_env_vars(), ) diff --git a/packages/google-cloud-bigquery/samples/snippets/noxfile.py b/packages/google-cloud-bigquery/samples/snippets/noxfile.py index be1a3f251496..160fe728648f 100644 --- a/packages/google-cloud-bigquery/samples/snippets/noxfile.py +++ b/packages/google-cloud-bigquery/samples/snippets/noxfile.py @@ -48,6 +48,10 @@ # to use your own Cloud project. "gcloud_project_env": "GOOGLE_CLOUD_PROJECT", # 'gcloud_project_env': 'BUILD_SPECIFIC_GCLOUD_PROJECT', + # If you need to use a specific version of pip, + # change pip_version_override to the string representation + # of the version number, for example, "20.2.4" + "pip_version_override": None, # A dictionary you want to inject into your test. Don't put any # secrets here. These values will override predefined values. "envs": {}, @@ -172,6 +176,9 @@ def blacken(session: nox.sessions.Session) -> None: def _session_tests( session: nox.sessions.Session, post_install: Callable = None ) -> None: + if TEST_CONFIG["pip_version_override"]: + pip_version = TEST_CONFIG["pip_version_override"] + session.install(f"pip=={pip_version}") """Runs py.test for a particular project.""" if os.path.exists("requirements.txt"): if os.path.exists("constraints.txt"): @@ -198,7 +205,7 @@ def _session_tests( # on travis where slow and flaky tests are excluded. # See http://doc.pytest.org/en/latest/_modules/_pytest/main.html success_codes=[0, 5], - env=get_pytest_env_vars() + env=get_pytest_env_vars(), ) From 899655d467d707770339b4033e2fa7c799bedfab Mon Sep 17 00:00:00 2001 From: Anthonios Partheniou Date: Mon, 24 May 2021 02:16:36 -0400 Subject: [PATCH 1162/2016] chore: delete unused protos (#676) --- .../bigquery_v2/proto/encryption_config.proto | 32 - .../proto/encryption_config_pb2.py | 104 - .../bigquery_v2/proto/location_metadata.proto | 34 - .../cloud/bigquery_v2/proto/model.proto | 1208 ----- .../cloud/bigquery_v2/proto/model_pb2.py | 4298 ----------------- .../bigquery_v2/proto/model_reference.proto | 38 - .../bigquery_v2/proto/model_reference_pb2.py | 142 - .../bigquery_v2/proto/standard_sql.proto | 112 - .../bigquery_v2/proto/standard_sql_pb2.py | 442 -- .../bigquery_v2/proto/table_reference.proto | 39 - 10 files changed, 6449 deletions(-) delete mode 100644 packages/google-cloud-bigquery/google/cloud/bigquery_v2/proto/encryption_config.proto delete mode 100644 packages/google-cloud-bigquery/google/cloud/bigquery_v2/proto/encryption_config_pb2.py delete mode 100644 packages/google-cloud-bigquery/google/cloud/bigquery_v2/proto/location_metadata.proto delete mode 100644 packages/google-cloud-bigquery/google/cloud/bigquery_v2/proto/model.proto delete mode 100644 packages/google-cloud-bigquery/google/cloud/bigquery_v2/proto/model_pb2.py delete mode 100644 packages/google-cloud-bigquery/google/cloud/bigquery_v2/proto/model_reference.proto delete mode 100644 packages/google-cloud-bigquery/google/cloud/bigquery_v2/proto/model_reference_pb2.py delete mode 100644 packages/google-cloud-bigquery/google/cloud/bigquery_v2/proto/standard_sql.proto delete mode 100644 packages/google-cloud-bigquery/google/cloud/bigquery_v2/proto/standard_sql_pb2.py delete mode 100644 packages/google-cloud-bigquery/google/cloud/bigquery_v2/proto/table_reference.proto diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery_v2/proto/encryption_config.proto b/packages/google-cloud-bigquery/google/cloud/bigquery_v2/proto/encryption_config.proto deleted file mode 100644 index 1c0512a17ae6..000000000000 --- a/packages/google-cloud-bigquery/google/cloud/bigquery_v2/proto/encryption_config.proto +++ /dev/null @@ -1,32 +0,0 @@ -// Copyright 2020 Google LLC -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -syntax = "proto3"; - -package google.cloud.bigquery.v2; - -import "google/api/field_behavior.proto"; -import "google/protobuf/wrappers.proto"; -import "google/api/annotations.proto"; - -option go_package = "google.golang.org/genproto/googleapis/cloud/bigquery/v2;bigquery"; -option java_outer_classname = "EncryptionConfigProto"; -option java_package = "com.google.cloud.bigquery.v2"; - -message EncryptionConfiguration { - // Optional. Describes the Cloud KMS encryption key that will be used to - // protect destination BigQuery table. The BigQuery Service Account associated - // with your project requires access to this encryption key. - google.protobuf.StringValue kms_key_name = 1 [(google.api.field_behavior) = OPTIONAL]; -} diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery_v2/proto/encryption_config_pb2.py b/packages/google-cloud-bigquery/google/cloud/bigquery_v2/proto/encryption_config_pb2.py deleted file mode 100644 index 5ae21ea6f49f..000000000000 --- a/packages/google-cloud-bigquery/google/cloud/bigquery_v2/proto/encryption_config_pb2.py +++ /dev/null @@ -1,104 +0,0 @@ -# -*- coding: utf-8 -*- -# Generated by the protocol buffer compiler. DO NOT EDIT! -# source: google/cloud/bigquery_v2/proto/encryption_config.proto -"""Generated protocol buffer code.""" -from google.protobuf import descriptor as _descriptor -from google.protobuf import message as _message -from google.protobuf import reflection as _reflection -from google.protobuf import symbol_database as _symbol_database - -# @@protoc_insertion_point(imports) - -_sym_db = _symbol_database.Default() - - -from google.api import field_behavior_pb2 as google_dot_api_dot_field__behavior__pb2 -from google.protobuf import wrappers_pb2 as google_dot_protobuf_dot_wrappers__pb2 -from google.api import annotations_pb2 as google_dot_api_dot_annotations__pb2 - - -DESCRIPTOR = _descriptor.FileDescriptor( - name="google/cloud/bigquery_v2/proto/encryption_config.proto", - package="google.cloud.bigquery.v2", - syntax="proto3", - serialized_options=b"\n\034com.google.cloud.bigquery.v2B\025EncryptionConfigProtoZ@google.golang.org/genproto/googleapis/cloud/bigquery/v2;bigquery", - create_key=_descriptor._internal_create_key, - serialized_pb=b'\n6google/cloud/bigquery_v2/proto/encryption_config.proto\x12\x18google.cloud.bigquery.v2\x1a\x1fgoogle/api/field_behavior.proto\x1a\x1egoogle/protobuf/wrappers.proto\x1a\x1cgoogle/api/annotations.proto"R\n\x17\x45ncryptionConfiguration\x12\x37\n\x0ckms_key_name\x18\x01 \x01(\x0b\x32\x1c.google.protobuf.StringValueB\x03\xe0\x41\x01\x42w\n\x1c\x63om.google.cloud.bigquery.v2B\x15\x45ncryptionConfigProtoZ@google.golang.org/genproto/googleapis/cloud/bigquery/v2;bigqueryb\x06proto3', - dependencies=[ - google_dot_api_dot_field__behavior__pb2.DESCRIPTOR, - google_dot_protobuf_dot_wrappers__pb2.DESCRIPTOR, - google_dot_api_dot_annotations__pb2.DESCRIPTOR, - ], -) - - -_ENCRYPTIONCONFIGURATION = _descriptor.Descriptor( - name="EncryptionConfiguration", - full_name="google.cloud.bigquery.v2.EncryptionConfiguration", - filename=None, - file=DESCRIPTOR, - containing_type=None, - create_key=_descriptor._internal_create_key, - fields=[ - _descriptor.FieldDescriptor( - name="kms_key_name", - full_name="google.cloud.bigquery.v2.EncryptionConfiguration.kms_key_name", - index=0, - number=1, - type=11, - cpp_type=10, - label=1, - has_default_value=False, - default_value=None, - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=b"\340A\001", - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - ], - extensions=[], - nested_types=[], - enum_types=[], - serialized_options=None, - is_extendable=False, - syntax="proto3", - extension_ranges=[], - oneofs=[], - serialized_start=179, - serialized_end=261, -) - -_ENCRYPTIONCONFIGURATION.fields_by_name[ - "kms_key_name" -].message_type = google_dot_protobuf_dot_wrappers__pb2._STRINGVALUE -DESCRIPTOR.message_types_by_name["EncryptionConfiguration"] = _ENCRYPTIONCONFIGURATION -_sym_db.RegisterFileDescriptor(DESCRIPTOR) - -EncryptionConfiguration = _reflection.GeneratedProtocolMessageType( - "EncryptionConfiguration", - (_message.Message,), - { - "DESCRIPTOR": _ENCRYPTIONCONFIGURATION, - "__module__": "google.cloud.bigquery_v2.proto.encryption_config_pb2", - "__doc__": """Encryption configuration. - - Attributes: - kms_key_name: - Optional. Describes the Cloud KMS encryption key that will be - used to protect destination BigQuery table. The BigQuery - Service Account associated with your project requires access - to this encryption key. - """, - # @@protoc_insertion_point(class_scope:google.cloud.bigquery.v2.EncryptionConfiguration) - }, -) -_sym_db.RegisterMessage(EncryptionConfiguration) - - -DESCRIPTOR._options = None -_ENCRYPTIONCONFIGURATION.fields_by_name["kms_key_name"]._options = None -# @@protoc_insertion_point(module_scope) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery_v2/proto/location_metadata.proto b/packages/google-cloud-bigquery/google/cloud/bigquery_v2/proto/location_metadata.proto deleted file mode 100644 index 95a3133c5755..000000000000 --- a/packages/google-cloud-bigquery/google/cloud/bigquery_v2/proto/location_metadata.proto +++ /dev/null @@ -1,34 +0,0 @@ -// Copyright 2019 Google LLC. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// - -syntax = "proto3"; - -package google.cloud.bigquery.v2; - -import "google/api/annotations.proto"; - -option go_package = "google.golang.org/genproto/googleapis/cloud/bigquery/v2;bigquery"; -option java_outer_classname = "LocationMetadataProto"; -option java_package = "com.google.cloud.bigquery.v2"; - - -// BigQuery-specific metadata about a location. This will be set on -// google.cloud.location.Location.metadata in Cloud Location API -// responses. -message LocationMetadata { - // The legacy BigQuery location ID, e.g. “EU” for the “europe” location. - // This is for any API consumers that need the legacy “US” and “EU” locations. - string legacy_location_id = 1; -} diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery_v2/proto/model.proto b/packages/google-cloud-bigquery/google/cloud/bigquery_v2/proto/model.proto deleted file mode 100644 index 2d400dddd44e..000000000000 --- a/packages/google-cloud-bigquery/google/cloud/bigquery_v2/proto/model.proto +++ /dev/null @@ -1,1208 +0,0 @@ -// Copyright 2020 Google LLC -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -syntax = "proto3"; - -package google.cloud.bigquery.v2; - -import "google/api/client.proto"; -import "google/api/field_behavior.proto"; -import "google/cloud/bigquery/v2/encryption_config.proto"; -import "google/cloud/bigquery/v2/model_reference.proto"; -import "google/cloud/bigquery/v2/standard_sql.proto"; -import "google/cloud/bigquery/v2/table_reference.proto"; -import "google/protobuf/empty.proto"; -import "google/protobuf/timestamp.proto"; -import "google/protobuf/wrappers.proto"; -import "google/api/annotations.proto"; - -option go_package = "google.golang.org/genproto/googleapis/cloud/bigquery/v2;bigquery"; -option java_outer_classname = "ModelProto"; -option java_package = "com.google.cloud.bigquery.v2"; - -service ModelService { - option (google.api.default_host) = "bigquery.googleapis.com"; - option (google.api.oauth_scopes) = - "https://www.googleapis.com/auth/bigquery," - "https://www.googleapis.com/auth/bigquery.readonly," - "https://www.googleapis.com/auth/cloud-platform," - "https://www.googleapis.com/auth/cloud-platform.read-only"; - - // Gets the specified model resource by model ID. - rpc GetModel(GetModelRequest) returns (Model) { - option (google.api.method_signature) = "project_id,dataset_id,model_id"; - } - - // Lists all models in the specified dataset. Requires the READER dataset - // role. - rpc ListModels(ListModelsRequest) returns (ListModelsResponse) { - option (google.api.method_signature) = "project_id,dataset_id,max_results"; - } - - // Patch specific fields in the specified model. - rpc PatchModel(PatchModelRequest) returns (Model) { - option (google.api.method_signature) = "project_id,dataset_id,model_id,model"; - } - - // Deletes the model specified by modelId from the dataset. - rpc DeleteModel(DeleteModelRequest) returns (google.protobuf.Empty) { - option (google.api.method_signature) = "project_id,dataset_id,model_id"; - } -} - -message Model { - message SeasonalPeriod { - enum SeasonalPeriodType { - SEASONAL_PERIOD_TYPE_UNSPECIFIED = 0; - - // No seasonality - NO_SEASONALITY = 1; - - // Daily period, 24 hours. - DAILY = 2; - - // Weekly period, 7 days. - WEEKLY = 3; - - // Monthly period, 30 days or irregular. - MONTHLY = 4; - - // Quarterly period, 90 days or irregular. - QUARTERLY = 5; - - // Yearly period, 365 days or irregular. - YEARLY = 6; - } - - - } - - message KmeansEnums { - // Indicates the method used to initialize the centroids for KMeans - // clustering algorithm. - enum KmeansInitializationMethod { - KMEANS_INITIALIZATION_METHOD_UNSPECIFIED = 0; - - // Initializes the centroids randomly. - RANDOM = 1; - - // Initializes the centroids using data specified in - // kmeans_initialization_column. - CUSTOM = 2; - - // Initializes with kmeans++. - KMEANS_PLUS_PLUS = 3; - } - - - } - - // Evaluation metrics for regression and explicit feedback type matrix - // factorization models. - message RegressionMetrics { - // Mean absolute error. - google.protobuf.DoubleValue mean_absolute_error = 1; - - // Mean squared error. - google.protobuf.DoubleValue mean_squared_error = 2; - - // Mean squared log error. - google.protobuf.DoubleValue mean_squared_log_error = 3; - - // Median absolute error. - google.protobuf.DoubleValue median_absolute_error = 4; - - // R^2 score. - google.protobuf.DoubleValue r_squared = 5; - } - - // Aggregate metrics for classification/classifier models. For multi-class - // models, the metrics are either macro-averaged or micro-averaged. When - // macro-averaged, the metrics are calculated for each label and then an - // unweighted average is taken of those values. When micro-averaged, the - // metric is calculated globally by counting the total number of correctly - // predicted rows. - message AggregateClassificationMetrics { - // Precision is the fraction of actual positive predictions that had - // positive actual labels. For multiclass this is a macro-averaged - // metric treating each class as a binary classifier. - google.protobuf.DoubleValue precision = 1; - - // Recall is the fraction of actual positive labels that were given a - // positive prediction. For multiclass this is a macro-averaged metric. - google.protobuf.DoubleValue recall = 2; - - // Accuracy is the fraction of predictions given the correct label. For - // multiclass this is a micro-averaged metric. - google.protobuf.DoubleValue accuracy = 3; - - // Threshold at which the metrics are computed. For binary - // classification models this is the positive class threshold. - // For multi-class classfication models this is the confidence - // threshold. - google.protobuf.DoubleValue threshold = 4; - - // The F1 score is an average of recall and precision. For multiclass - // this is a macro-averaged metric. - google.protobuf.DoubleValue f1_score = 5; - - // Logarithmic Loss. For multiclass this is a macro-averaged metric. - google.protobuf.DoubleValue log_loss = 6; - - // Area Under a ROC Curve. For multiclass this is a macro-averaged - // metric. - google.protobuf.DoubleValue roc_auc = 7; - } - - // Evaluation metrics for binary classification/classifier models. - message BinaryClassificationMetrics { - // Confusion matrix for binary classification models. - message BinaryConfusionMatrix { - // Threshold value used when computing each of the following metric. - google.protobuf.DoubleValue positive_class_threshold = 1; - - // Number of true samples predicted as true. - google.protobuf.Int64Value true_positives = 2; - - // Number of false samples predicted as true. - google.protobuf.Int64Value false_positives = 3; - - // Number of true samples predicted as false. - google.protobuf.Int64Value true_negatives = 4; - - // Number of false samples predicted as false. - google.protobuf.Int64Value false_negatives = 5; - - // The fraction of actual positive predictions that had positive actual - // labels. - google.protobuf.DoubleValue precision = 6; - - // The fraction of actual positive labels that were given a positive - // prediction. - google.protobuf.DoubleValue recall = 7; - - // The equally weighted average of recall and precision. - google.protobuf.DoubleValue f1_score = 8; - - // The fraction of predictions given the correct label. - google.protobuf.DoubleValue accuracy = 9; - } - - // Aggregate classification metrics. - AggregateClassificationMetrics aggregate_classification_metrics = 1; - - // Binary confusion matrix at multiple thresholds. - repeated BinaryConfusionMatrix binary_confusion_matrix_list = 2; - - // Label representing the positive class. - string positive_label = 3; - - // Label representing the negative class. - string negative_label = 4; - } - - // Evaluation metrics for multi-class classification/classifier models. - message MultiClassClassificationMetrics { - // Confusion matrix for multi-class classification models. - message ConfusionMatrix { - // A single entry in the confusion matrix. - message Entry { - // The predicted label. For confidence_threshold > 0, we will - // also add an entry indicating the number of items under the - // confidence threshold. - string predicted_label = 1; - - // Number of items being predicted as this label. - google.protobuf.Int64Value item_count = 2; - } - - // A single row in the confusion matrix. - message Row { - // The original label of this row. - string actual_label = 1; - - // Info describing predicted label distribution. - repeated Entry entries = 2; - } - - // Confidence threshold used when computing the entries of the - // confusion matrix. - google.protobuf.DoubleValue confidence_threshold = 1; - - // One row per actual label. - repeated Row rows = 2; - } - - // Aggregate classification metrics. - AggregateClassificationMetrics aggregate_classification_metrics = 1; - - // Confusion matrix at different thresholds. - repeated ConfusionMatrix confusion_matrix_list = 2; - } - - // Evaluation metrics for clustering models. - message ClusteringMetrics { - // Message containing the information about one cluster. - message Cluster { - // Representative value of a single feature within the cluster. - message FeatureValue { - // Representative value of a categorical feature. - message CategoricalValue { - // Represents the count of a single category within the cluster. - message CategoryCount { - // The name of category. - string category = 1; - - // The count of training samples matching the category within the - // cluster. - google.protobuf.Int64Value count = 2; - } - - // Counts of all categories for the categorical feature. If there are - // more than ten categories, we return top ten (by count) and return - // one more CategoryCount with category "_OTHER_" and count as - // aggregate counts of remaining categories. - repeated CategoryCount category_counts = 1; - } - - // The feature column name. - string feature_column = 1; - - oneof value { - // The numerical feature value. This is the centroid value for this - // feature. - google.protobuf.DoubleValue numerical_value = 2; - - // The categorical feature value. - CategoricalValue categorical_value = 3; - } - } - - // Centroid id. - int64 centroid_id = 1; - - // Values of highly variant features for this cluster. - repeated FeatureValue feature_values = 2; - - // Count of training data rows that were assigned to this cluster. - google.protobuf.Int64Value count = 3; - } - - // Davies-Bouldin index. - google.protobuf.DoubleValue davies_bouldin_index = 1; - - // Mean of squared distances between each sample to its cluster centroid. - google.protobuf.DoubleValue mean_squared_distance = 2; - - // [Beta] Information for all clusters. - repeated Cluster clusters = 3; - } - - // Evaluation metrics used by weighted-ALS models specified by - // feedback_type=implicit. - message RankingMetrics { - // Calculates a precision per user for all the items by ranking them and - // then averages all the precisions across all the users. - google.protobuf.DoubleValue mean_average_precision = 1; - - // Similar to the mean squared error computed in regression and explicit - // recommendation models except instead of computing the rating directly, - // the output from evaluate is computed against a preference which is 1 or 0 - // depending on if the rating exists or not. - google.protobuf.DoubleValue mean_squared_error = 2; - - // A metric to determine the goodness of a ranking calculated from the - // predicted confidence by comparing it to an ideal rank measured by the - // original ratings. - google.protobuf.DoubleValue normalized_discounted_cumulative_gain = 3; - - // Determines the goodness of a ranking by computing the percentile rank - // from the predicted confidence and dividing it by the original rank. - google.protobuf.DoubleValue average_rank = 4; - } - - // Model evaluation metrics for ARIMA forecasting models. - message ArimaForecastingMetrics { - // Model evaluation metrics for a single ARIMA forecasting model. - message ArimaSingleModelForecastingMetrics { - // Non-seasonal order. - ArimaOrder non_seasonal_order = 1; - - // Arima fitting metrics. - ArimaFittingMetrics arima_fitting_metrics = 2; - - // Is arima model fitted with drift or not. It is always false when d - // is not 1. - bool has_drift = 3; - - // The id to indicate different time series. - string time_series_id = 4; - - // Seasonal periods. Repeated because multiple periods are supported - // for one time series. - repeated SeasonalPeriod.SeasonalPeriodType seasonal_periods = 5; - } - - // Non-seasonal order. - repeated ArimaOrder non_seasonal_order = 1; - - // Arima model fitting metrics. - repeated ArimaFittingMetrics arima_fitting_metrics = 2; - - // Seasonal periods. Repeated because multiple periods are supported for one - // time series. - repeated SeasonalPeriod.SeasonalPeriodType seasonal_periods = 3; - - // Whether Arima model fitted with drift or not. It is always false when d - // is not 1. - repeated bool has_drift = 4; - - // Id to differentiate different time series for the large-scale case. - repeated string time_series_id = 5; - - // Repeated as there can be many metric sets (one for each model) in - // auto-arima and the large-scale case. - repeated ArimaSingleModelForecastingMetrics arima_single_model_forecasting_metrics = 6; - } - - // Evaluation metrics of a model. These are either computed on all training - // data or just the eval data based on whether eval data was used during - // training. These are not present for imported models. - message EvaluationMetrics { - oneof metrics { - // Populated for regression models and explicit feedback type matrix - // factorization models. - RegressionMetrics regression_metrics = 1; - - // Populated for binary classification/classifier models. - BinaryClassificationMetrics binary_classification_metrics = 2; - - // Populated for multi-class classification/classifier models. - MultiClassClassificationMetrics multi_class_classification_metrics = 3; - - // Populated for clustering models. - ClusteringMetrics clustering_metrics = 4; - - // Populated for implicit feedback type matrix factorization models. - RankingMetrics ranking_metrics = 5; - - // Populated for ARIMA models. - ArimaForecastingMetrics arima_forecasting_metrics = 6; - } - } - - // Data split result. This contains references to the training and evaluation - // data tables that were used to train the model. - message DataSplitResult { - // Table reference of the training data after split. - TableReference training_table = 1; - - // Table reference of the evaluation data after split. - TableReference evaluation_table = 2; - } - - // Arima order, can be used for both non-seasonal and seasonal parts. - message ArimaOrder { - // Order of the autoregressive part. - int64 p = 1; - - // Order of the differencing part. - int64 d = 2; - - // Order of the moving-average part. - int64 q = 3; - } - - // ARIMA model fitting metrics. - message ArimaFittingMetrics { - // Log-likelihood. - double log_likelihood = 1; - - // AIC. - double aic = 2; - - // Variance. - double variance = 3; - } - - // Global explanations containing the top most important features - // after training. - message GlobalExplanation { - // Explanation for a single feature. - message Explanation { - // Full name of the feature. For non-numerical features, will be - // formatted like .. Overall size of - // feature name will always be truncated to first 120 characters. - string feature_name = 1; - - // Attribution of feature. - google.protobuf.DoubleValue attribution = 2; - } - - // A list of the top global explanations. Sorted by absolute value of - // attribution in descending order. - repeated Explanation explanations = 1; - - // Class label for this set of global explanations. Will be empty/null for - // binary logistic and linear regression models. Sorted alphabetically in - // descending order. - string class_label = 2; - } - - // Information about a single training query run for the model. - message TrainingRun { - message TrainingOptions { - // The maximum number of iterations in training. Used only for iterative - // training algorithms. - int64 max_iterations = 1; - - // Type of loss function used during training run. - LossType loss_type = 2; - - // Learning rate in training. Used only for iterative training algorithms. - double learn_rate = 3; - - // L1 regularization coefficient. - google.protobuf.DoubleValue l1_regularization = 4; - - // L2 regularization coefficient. - google.protobuf.DoubleValue l2_regularization = 5; - - // When early_stop is true, stops training when accuracy improvement is - // less than 'min_relative_progress'. Used only for iterative training - // algorithms. - google.protobuf.DoubleValue min_relative_progress = 6; - - // Whether to train a model from the last checkpoint. - google.protobuf.BoolValue warm_start = 7; - - // Whether to stop early when the loss doesn't improve significantly - // any more (compared to min_relative_progress). Used only for iterative - // training algorithms. - google.protobuf.BoolValue early_stop = 8; - - // Name of input label columns in training data. - repeated string input_label_columns = 9; - - // The data split type for training and evaluation, e.g. RANDOM. - DataSplitMethod data_split_method = 10; - - // The fraction of evaluation data over the whole input data. The rest - // of data will be used as training data. The format should be double. - // Accurate to two decimal places. - // Default value is 0.2. - double data_split_eval_fraction = 11; - - // The column to split data with. This column won't be used as a - // feature. - // 1. When data_split_method is CUSTOM, the corresponding column should - // be boolean. The rows with true value tag are eval data, and the false - // are training data. - // 2. When data_split_method is SEQ, the first DATA_SPLIT_EVAL_FRACTION - // rows (from smallest to largest) in the corresponding column are used - // as training data, and the rest are eval data. It respects the order - // in Orderable data types: - // https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#data-type-properties - string data_split_column = 12; - - // The strategy to determine learn rate for the current iteration. - LearnRateStrategy learn_rate_strategy = 13; - - // Specifies the initial learning rate for the line search learn rate - // strategy. - double initial_learn_rate = 16; - - // Weights associated with each label class, for rebalancing the - // training data. Only applicable for classification models. - map label_class_weights = 17; - - // User column specified for matrix factorization models. - string user_column = 18; - - // Item column specified for matrix factorization models. - string item_column = 19; - - // Distance type for clustering models. - DistanceType distance_type = 20; - - // Number of clusters for clustering models. - int64 num_clusters = 21; - - // [Beta] Google Cloud Storage URI from which the model was imported. Only - // applicable for imported models. - string model_uri = 22; - - // Optimization strategy for training linear regression models. - OptimizationStrategy optimization_strategy = 23; - - // Hidden units for dnn models. - repeated int64 hidden_units = 24; - - // Batch size for dnn models. - int64 batch_size = 25; - - // Dropout probability for dnn models. - google.protobuf.DoubleValue dropout = 26; - - // Maximum depth of a tree for boosted tree models. - int64 max_tree_depth = 27; - - // Subsample fraction of the training data to grow tree to prevent - // overfitting for boosted tree models. - double subsample = 28; - - // Minimum split loss for boosted tree models. - google.protobuf.DoubleValue min_split_loss = 29; - - // Num factors specified for matrix factorization models. - int64 num_factors = 30; - - // Feedback type that specifies which algorithm to run for matrix - // factorization. - FeedbackType feedback_type = 31; - - // Hyperparameter for matrix factoration when implicit feedback type is - // specified. - google.protobuf.DoubleValue wals_alpha = 32; - - // The method used to initialize the centroids for kmeans algorithm. - KmeansEnums.KmeansInitializationMethod kmeans_initialization_method = 33; - - // The column used to provide the initial centroids for kmeans algorithm - // when kmeans_initialization_method is CUSTOM. - string kmeans_initialization_column = 34; - - // Column to be designated as time series timestamp for ARIMA model. - string time_series_timestamp_column = 35; - - // Column to be designated as time series data for ARIMA model. - string time_series_data_column = 36; - - // Whether to enable auto ARIMA or not. - bool auto_arima = 37; - - // A specification of the non-seasonal part of the ARIMA model: the three - // components (p, d, q) are the AR order, the degree of differencing, and - // the MA order. - ArimaOrder non_seasonal_order = 38; - - // The data frequency of a time series. - DataFrequency data_frequency = 39; - - // Include drift when fitting an ARIMA model. - bool include_drift = 41; - - // The geographical region based on which the holidays are considered in - // time series modeling. If a valid value is specified, then holiday - // effects modeling is enabled. - HolidayRegion holiday_region = 42; - - // The id column that will be used to indicate different time series to - // forecast in parallel. - string time_series_id_column = 43; - - // The number of periods ahead that need to be forecasted. - int64 horizon = 44; - - // Whether to preserve the input structs in output feature names. - // Suppose there is a struct A with field b. - // When false (default), the output feature name is A_b. - // When true, the output feature name is A.b. - bool preserve_input_structs = 45; - - // The max value of non-seasonal p and q. - int64 auto_arima_max_order = 46; - } - - // Information about a single iteration of the training run. - message IterationResult { - // Information about a single cluster for clustering model. - message ClusterInfo { - // Centroid id. - int64 centroid_id = 1; - - // Cluster radius, the average distance from centroid - // to each point assigned to the cluster. - google.protobuf.DoubleValue cluster_radius = 2; - - // Cluster size, the total number of points assigned to the cluster. - google.protobuf.Int64Value cluster_size = 3; - } - - // (Auto-)arima fitting result. Wrap everything in ArimaResult for easier - // refactoring if we want to use model-specific iteration results. - message ArimaResult { - // Arima coefficients. - message ArimaCoefficients { - // Auto-regressive coefficients, an array of double. - repeated double auto_regressive_coefficients = 1; - - // Moving-average coefficients, an array of double. - repeated double moving_average_coefficients = 2; - - // Intercept coefficient, just a double not an array. - double intercept_coefficient = 3; - } - - // Arima model information. - message ArimaModelInfo { - // Non-seasonal order. - ArimaOrder non_seasonal_order = 1; - - // Arima coefficients. - ArimaCoefficients arima_coefficients = 2; - - // Arima fitting metrics. - ArimaFittingMetrics arima_fitting_metrics = 3; - - // Whether Arima model fitted with drift or not. It is always false - // when d is not 1. - bool has_drift = 4; - - // The id to indicate different time series. - string time_series_id = 5; - - // Seasonal periods. Repeated because multiple periods are supported - // for one time series. - repeated SeasonalPeriod.SeasonalPeriodType seasonal_periods = 6; - } - - // This message is repeated because there are multiple arima models - // fitted in auto-arima. For non-auto-arima model, its size is one. - repeated ArimaModelInfo arima_model_info = 1; - - // Seasonal periods. Repeated because multiple periods are supported for - // one time series. - repeated SeasonalPeriod.SeasonalPeriodType seasonal_periods = 2; - } - - // Index of the iteration, 0 based. - google.protobuf.Int32Value index = 1; - - // Time taken to run the iteration in milliseconds. - google.protobuf.Int64Value duration_ms = 4; - - // Loss computed on the training data at the end of iteration. - google.protobuf.DoubleValue training_loss = 5; - - // Loss computed on the eval data at the end of iteration. - google.protobuf.DoubleValue eval_loss = 6; - - // Learn rate used for this iteration. - double learn_rate = 7; - - // Information about top clusters for clustering models. - repeated ClusterInfo cluster_infos = 8; - - ArimaResult arima_result = 9; - } - - // Options that were used for this training run, includes - // user specified and default options that were used. - TrainingOptions training_options = 1; - - // The start time of this training run. - google.protobuf.Timestamp start_time = 8; - - // Output of each iteration run, results.size() <= max_iterations. - repeated IterationResult results = 6; - - // The evaluation metrics over training/eval data that were computed at the - // end of training. - EvaluationMetrics evaluation_metrics = 7; - - // Data split result of the training run. Only set when the input data is - // actually split. - DataSplitResult data_split_result = 9; - - // Global explanations for important features of the model. For multi-class - // models, there is one entry for each label class. For other models, there - // is only one entry in the list. - repeated GlobalExplanation global_explanations = 10; - } - - // Indicates the type of the Model. - enum ModelType { - MODEL_TYPE_UNSPECIFIED = 0; - - // Linear regression model. - LINEAR_REGRESSION = 1; - - // Logistic regression based classification model. - LOGISTIC_REGRESSION = 2; - - // K-means clustering model. - KMEANS = 3; - - // Matrix factorization model. - MATRIX_FACTORIZATION = 4; - - // [Beta] DNN classifier model. - DNN_CLASSIFIER = 5; - - // [Beta] An imported TensorFlow model. - TENSORFLOW = 6; - - // [Beta] DNN regressor model. - DNN_REGRESSOR = 7; - - // [Beta] Boosted tree regressor model. - BOOSTED_TREE_REGRESSOR = 9; - - // [Beta] Boosted tree classifier model. - BOOSTED_TREE_CLASSIFIER = 10; - - // [Beta] ARIMA model. - ARIMA = 11; - - // [Beta] AutoML Tables regression model. - AUTOML_REGRESSOR = 12; - - // [Beta] AutoML Tables classification model. - AUTOML_CLASSIFIER = 13; - } - - // Loss metric to evaluate model training performance. - enum LossType { - LOSS_TYPE_UNSPECIFIED = 0; - - // Mean squared loss, used for linear regression. - MEAN_SQUARED_LOSS = 1; - - // Mean log loss, used for logistic regression. - MEAN_LOG_LOSS = 2; - } - - // Distance metric used to compute the distance between two points. - enum DistanceType { - DISTANCE_TYPE_UNSPECIFIED = 0; - - // Eculidean distance. - EUCLIDEAN = 1; - - // Cosine distance. - COSINE = 2; - } - - // Indicates the method to split input data into multiple tables. - enum DataSplitMethod { - DATA_SPLIT_METHOD_UNSPECIFIED = 0; - - // Splits data randomly. - RANDOM = 1; - - // Splits data with the user provided tags. - CUSTOM = 2; - - // Splits data sequentially. - SEQUENTIAL = 3; - - // Data split will be skipped. - NO_SPLIT = 4; - - // Splits data automatically: Uses NO_SPLIT if the data size is small. - // Otherwise uses RANDOM. - AUTO_SPLIT = 5; - } - - // Type of supported data frequency for time series forecasting models. - enum DataFrequency { - DATA_FREQUENCY_UNSPECIFIED = 0; - - // Automatically inferred from timestamps. - AUTO_FREQUENCY = 1; - - // Yearly data. - YEARLY = 2; - - // Quarterly data. - QUARTERLY = 3; - - // Monthly data. - MONTHLY = 4; - - // Weekly data. - WEEKLY = 5; - - // Daily data. - DAILY = 6; - - // Hourly data. - HOURLY = 7; - } - - // Type of supported holiday regions for time series forecasting models. - enum HolidayRegion { - // Holiday region unspecified. - HOLIDAY_REGION_UNSPECIFIED = 0; - - // Global. - GLOBAL = 1; - - // North America. - NA = 2; - - // Japan and Asia Pacific: Korea, Greater China, India, Australia, and New - // Zealand. - JAPAC = 3; - - // Europe, the Middle East and Africa. - EMEA = 4; - - // Latin America and the Caribbean. - LAC = 5; - - // United Arab Emirates - AE = 6; - - // Argentina - AR = 7; - - // Austria - AT = 8; - - // Australia - AU = 9; - - // Belgium - BE = 10; - - // Brazil - BR = 11; - - // Canada - CA = 12; - - // Switzerland - CH = 13; - - // Chile - CL = 14; - - // China - CN = 15; - - // Colombia - CO = 16; - - // Czechoslovakia - CS = 17; - - // Czech Republic - CZ = 18; - - // Germany - DE = 19; - - // Denmark - DK = 20; - - // Algeria - DZ = 21; - - // Ecuador - EC = 22; - - // Estonia - EE = 23; - - // Egypt - EG = 24; - - // Spain - ES = 25; - - // Finland - FI = 26; - - // France - FR = 27; - - // Great Britain (United Kingdom) - GB = 28; - - // Greece - GR = 29; - - // Hong Kong - HK = 30; - - // Hungary - HU = 31; - - // Indonesia - ID = 32; - - // Ireland - IE = 33; - - // Israel - IL = 34; - - // India - IN = 35; - - // Iran - IR = 36; - - // Italy - IT = 37; - - // Japan - JP = 38; - - // Korea (South) - KR = 39; - - // Latvia - LV = 40; - - // Morocco - MA = 41; - - // Mexico - MX = 42; - - // Malaysia - MY = 43; - - // Nigeria - NG = 44; - - // Netherlands - NL = 45; - - // Norway - NO = 46; - - // New Zealand - NZ = 47; - - // Peru - PE = 48; - - // Philippines - PH = 49; - - // Pakistan - PK = 50; - - // Poland - PL = 51; - - // Portugal - PT = 52; - - // Romania - RO = 53; - - // Serbia - RS = 54; - - // Russian Federation - RU = 55; - - // Saudi Arabia - SA = 56; - - // Sweden - SE = 57; - - // Singapore - SG = 58; - - // Slovenia - SI = 59; - - // Slovakia - SK = 60; - - // Thailand - TH = 61; - - // Turkey - TR = 62; - - // Taiwan - TW = 63; - - // Ukraine - UA = 64; - - // United States - US = 65; - - // Venezuela - VE = 66; - - // Viet Nam - VN = 67; - - // South Africa - ZA = 68; - } - - // Indicates the learning rate optimization strategy to use. - enum LearnRateStrategy { - LEARN_RATE_STRATEGY_UNSPECIFIED = 0; - - // Use line search to determine learning rate. - LINE_SEARCH = 1; - - // Use a constant learning rate. - CONSTANT = 2; - } - - // Indicates the optimization strategy used for training. - enum OptimizationStrategy { - OPTIMIZATION_STRATEGY_UNSPECIFIED = 0; - - // Uses an iterative batch gradient descent algorithm. - BATCH_GRADIENT_DESCENT = 1; - - // Uses a normal equation to solve linear regression problem. - NORMAL_EQUATION = 2; - } - - // Indicates the training algorithm to use for matrix factorization models. - enum FeedbackType { - FEEDBACK_TYPE_UNSPECIFIED = 0; - - // Use weighted-als for implicit feedback problems. - IMPLICIT = 1; - - // Use nonweighted-als for explicit feedback problems. - EXPLICIT = 2; - } - - // Output only. A hash of this resource. - string etag = 1 [(google.api.field_behavior) = OUTPUT_ONLY]; - - // Required. Unique identifier for this model. - ModelReference model_reference = 2 [(google.api.field_behavior) = REQUIRED]; - - // Output only. The time when this model was created, in millisecs since the epoch. - int64 creation_time = 5 [(google.api.field_behavior) = OUTPUT_ONLY]; - - // Output only. The time when this model was last modified, in millisecs since the epoch. - int64 last_modified_time = 6 [(google.api.field_behavior) = OUTPUT_ONLY]; - - // Optional. A user-friendly description of this model. - string description = 12 [(google.api.field_behavior) = OPTIONAL]; - - // Optional. A descriptive name for this model. - string friendly_name = 14 [(google.api.field_behavior) = OPTIONAL]; - - // The labels associated with this model. You can use these to organize - // and group your models. Label keys and values can be no longer - // than 63 characters, can only contain lowercase letters, numeric - // characters, underscores and dashes. International characters are allowed. - // Label values are optional. Label keys must start with a letter and each - // label in the list must have a different key. - map labels = 15; - - // Optional. The time when this model expires, in milliseconds since the epoch. - // If not present, the model will persist indefinitely. Expired models - // will be deleted and their storage reclaimed. The defaultTableExpirationMs - // property of the encapsulating dataset can be used to set a default - // expirationTime on newly created models. - int64 expiration_time = 16 [(google.api.field_behavior) = OPTIONAL]; - - // Output only. The geographic location where the model resides. This value - // is inherited from the dataset. - string location = 13 [(google.api.field_behavior) = OUTPUT_ONLY]; - - // Custom encryption configuration (e.g., Cloud KMS keys). This shows the - // encryption configuration of the model data while stored in BigQuery - // storage. This field can be used with PatchModel to update encryption key - // for an already encrypted model. - EncryptionConfiguration encryption_configuration = 17; - - // Output only. Type of the model resource. - ModelType model_type = 7 [(google.api.field_behavior) = OUTPUT_ONLY]; - - // Output only. Information for all training runs in increasing order of start_time. - repeated TrainingRun training_runs = 9 [(google.api.field_behavior) = OUTPUT_ONLY]; - - // Output only. Input feature columns that were used to train this model. - repeated StandardSqlField feature_columns = 10 [(google.api.field_behavior) = OUTPUT_ONLY]; - - // Output only. Label columns that were used to train this model. - // The output of the model will have a "predicted_" prefix to these columns. - repeated StandardSqlField label_columns = 11 [(google.api.field_behavior) = OUTPUT_ONLY]; -} - -message GetModelRequest { - // Required. Project ID of the requested model. - string project_id = 1 [(google.api.field_behavior) = REQUIRED]; - - // Required. Dataset ID of the requested model. - string dataset_id = 2 [(google.api.field_behavior) = REQUIRED]; - - // Required. Model ID of the requested model. - string model_id = 3 [(google.api.field_behavior) = REQUIRED]; -} - -message PatchModelRequest { - // Required. Project ID of the model to patch. - string project_id = 1 [(google.api.field_behavior) = REQUIRED]; - - // Required. Dataset ID of the model to patch. - string dataset_id = 2 [(google.api.field_behavior) = REQUIRED]; - - // Required. Model ID of the model to patch. - string model_id = 3 [(google.api.field_behavior) = REQUIRED]; - - // Required. Patched model. - // Follows RFC5789 patch semantics. Missing fields are not updated. - // To clear a field, explicitly set to default value. - Model model = 4 [(google.api.field_behavior) = REQUIRED]; -} - -message DeleteModelRequest { - // Required. Project ID of the model to delete. - string project_id = 1 [(google.api.field_behavior) = REQUIRED]; - - // Required. Dataset ID of the model to delete. - string dataset_id = 2 [(google.api.field_behavior) = REQUIRED]; - - // Required. Model ID of the model to delete. - string model_id = 3 [(google.api.field_behavior) = REQUIRED]; -} - -message ListModelsRequest { - // Required. Project ID of the models to list. - string project_id = 1 [(google.api.field_behavior) = REQUIRED]; - - // Required. Dataset ID of the models to list. - string dataset_id = 2 [(google.api.field_behavior) = REQUIRED]; - - // The maximum number of results to return in a single response page. - // Leverage the page tokens to iterate through the entire collection. - google.protobuf.UInt32Value max_results = 3; - - // Page token, returned by a previous call to request the next page of - // results - string page_token = 4; -} - -message ListModelsResponse { - // Models in the requested dataset. Only the following fields are populated: - // model_reference, model_type, creation_time, last_modified_time and - // labels. - repeated Model models = 1; - - // A token to request the next page of results. - string next_page_token = 2; -} diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery_v2/proto/model_pb2.py b/packages/google-cloud-bigquery/google/cloud/bigquery_v2/proto/model_pb2.py deleted file mode 100644 index 7b66be8f7131..000000000000 --- a/packages/google-cloud-bigquery/google/cloud/bigquery_v2/proto/model_pb2.py +++ /dev/null @@ -1,4298 +0,0 @@ -# -*- coding: utf-8 -*- -# Generated by the protocol buffer compiler. DO NOT EDIT! -# source: google/cloud/bigquery_v2/proto/model.proto -"""Generated protocol buffer code.""" -from google.protobuf import descriptor as _descriptor -from google.protobuf import message as _message -from google.protobuf import reflection as _reflection -from google.protobuf import symbol_database as _symbol_database - -# @@protoc_insertion_point(imports) - -_sym_db = _symbol_database.Default() - - -from google.api import client_pb2 as google_dot_api_dot_client__pb2 -from google.api import field_behavior_pb2 as google_dot_api_dot_field__behavior__pb2 -from google.cloud.bigquery_v2.proto import ( - encryption_config_pb2 as google_dot_cloud_dot_bigquery__v2_dot_proto_dot_encryption__config__pb2, -) -from google.cloud.bigquery_v2.proto import ( - model_reference_pb2 as google_dot_cloud_dot_bigquery__v2_dot_proto_dot_model__reference__pb2, -) -from google.cloud.bigquery_v2.proto import ( - standard_sql_pb2 as google_dot_cloud_dot_bigquery__v2_dot_proto_dot_standard__sql__pb2, -) -from google.protobuf import empty_pb2 as google_dot_protobuf_dot_empty__pb2 -from google.protobuf import timestamp_pb2 as google_dot_protobuf_dot_timestamp__pb2 -from google.protobuf import wrappers_pb2 as google_dot_protobuf_dot_wrappers__pb2 -from google.api import annotations_pb2 as google_dot_api_dot_annotations__pb2 - - -DESCRIPTOR = _descriptor.FileDescriptor( - name="google/cloud/bigquery_v2/proto/model.proto", - package="google.cloud.bigquery.v2", - syntax="proto3", - serialized_options=b"\n\034com.google.cloud.bigquery.v2B\nModelProtoZ@google.golang.org/genproto/googleapis/cloud/bigquery/v2;bigquery", - create_key=_descriptor._internal_create_key, - serialized_pb=b'\n*google/cloud/bigquery_v2/proto/model.proto\x12\x18google.cloud.bigquery.v2\x1a\x17google/api/client.proto\x1a\x1fgoogle/api/field_behavior.proto\x1a\x36google/cloud/bigquery_v2/proto/encryption_config.proto\x1a\x34google/cloud/bigquery_v2/proto/model_reference.proto\x1a\x31google/cloud/bigquery_v2/proto/standard_sql.proto\x1a\x1bgoogle/protobuf/empty.proto\x1a\x1fgoogle/protobuf/timestamp.proto\x1a\x1egoogle/protobuf/wrappers.proto\x1a\x1cgoogle/api/annotations.proto"\x9b\x35\n\x05Model\x12\x11\n\x04\x65tag\x18\x01 \x01(\tB\x03\xe0\x41\x03\x12\x46\n\x0fmodel_reference\x18\x02 \x01(\x0b\x32(.google.cloud.bigquery.v2.ModelReferenceB\x03\xe0\x41\x02\x12\x1a\n\rcreation_time\x18\x05 \x01(\x03\x42\x03\xe0\x41\x03\x12\x1f\n\x12last_modified_time\x18\x06 \x01(\x03\x42\x03\xe0\x41\x03\x12\x18\n\x0b\x64\x65scription\x18\x0c \x01(\tB\x03\xe0\x41\x01\x12\x1a\n\rfriendly_name\x18\x0e \x01(\tB\x03\xe0\x41\x01\x12;\n\x06labels\x18\x0f \x03(\x0b\x32+.google.cloud.bigquery.v2.Model.LabelsEntry\x12\x1c\n\x0f\x65xpiration_time\x18\x10 \x01(\x03\x42\x03\xe0\x41\x01\x12\x15\n\x08location\x18\r \x01(\tB\x03\xe0\x41\x03\x12S\n\x18\x65ncryption_configuration\x18\x11 \x01(\x0b\x32\x31.google.cloud.bigquery.v2.EncryptionConfiguration\x12\x42\n\nmodel_type\x18\x07 \x01(\x0e\x32).google.cloud.bigquery.v2.Model.ModelTypeB\x03\xe0\x41\x03\x12G\n\rtraining_runs\x18\t \x03(\x0b\x32+.google.cloud.bigquery.v2.Model.TrainingRunB\x03\xe0\x41\x03\x12H\n\x0f\x66\x65\x61ture_columns\x18\n \x03(\x0b\x32*.google.cloud.bigquery.v2.StandardSqlFieldB\x03\xe0\x41\x03\x12\x46\n\rlabel_columns\x18\x0b \x03(\x0b\x32*.google.cloud.bigquery.v2.StandardSqlFieldB\x03\xe0\x41\x03\x1aq\n\x0bKmeansEnums"b\n\x1aKmeansInitializationMethod\x12,\n(KMEANS_INITIALIZATION_METHOD_UNSPECIFIED\x10\x00\x12\n\n\x06RANDOM\x10\x01\x12\n\n\x06\x43USTOM\x10\x02\x1a\xb4\x02\n\x11RegressionMetrics\x12\x39\n\x13mean_absolute_error\x18\x01 \x01(\x0b\x32\x1c.google.protobuf.DoubleValue\x12\x38\n\x12mean_squared_error\x18\x02 \x01(\x0b\x32\x1c.google.protobuf.DoubleValue\x12<\n\x16mean_squared_log_error\x18\x03 \x01(\x0b\x32\x1c.google.protobuf.DoubleValue\x12;\n\x15median_absolute_error\x18\x04 \x01(\x0b\x32\x1c.google.protobuf.DoubleValue\x12/\n\tr_squared\x18\x05 \x01(\x0b\x32\x1c.google.protobuf.DoubleValue\x1a\xef\x02\n\x1e\x41ggregateClassificationMetrics\x12/\n\tprecision\x18\x01 \x01(\x0b\x32\x1c.google.protobuf.DoubleValue\x12,\n\x06recall\x18\x02 \x01(\x0b\x32\x1c.google.protobuf.DoubleValue\x12.\n\x08\x61\x63\x63uracy\x18\x03 \x01(\x0b\x32\x1c.google.protobuf.DoubleValue\x12/\n\tthreshold\x18\x04 \x01(\x0b\x32\x1c.google.protobuf.DoubleValue\x12.\n\x08\x66\x31_score\x18\x05 \x01(\x0b\x32\x1c.google.protobuf.DoubleValue\x12.\n\x08log_loss\x18\x06 \x01(\x0b\x32\x1c.google.protobuf.DoubleValue\x12-\n\x07roc_auc\x18\x07 \x01(\x0b\x32\x1c.google.protobuf.DoubleValue\x1a\x9f\x06\n\x1b\x42inaryClassificationMetrics\x12h\n aggregate_classification_metrics\x18\x01 \x01(\x0b\x32>.google.cloud.bigquery.v2.Model.AggregateClassificationMetrics\x12w\n\x1c\x62inary_confusion_matrix_list\x18\x02 \x03(\x0b\x32Q.google.cloud.bigquery.v2.Model.BinaryClassificationMetrics.BinaryConfusionMatrix\x12\x16\n\x0epositive_label\x18\x03 \x01(\t\x12\x16\n\x0enegative_label\x18\x04 \x01(\t\x1a\xec\x03\n\x15\x42inaryConfusionMatrix\x12>\n\x18positive_class_threshold\x18\x01 \x01(\x0b\x32\x1c.google.protobuf.DoubleValue\x12\x33\n\x0etrue_positives\x18\x02 \x01(\x0b\x32\x1b.google.protobuf.Int64Value\x12\x34\n\x0f\x66\x61lse_positives\x18\x03 \x01(\x0b\x32\x1b.google.protobuf.Int64Value\x12\x33\n\x0etrue_negatives\x18\x04 \x01(\x0b\x32\x1b.google.protobuf.Int64Value\x12\x34\n\x0f\x66\x61lse_negatives\x18\x05 \x01(\x0b\x32\x1b.google.protobuf.Int64Value\x12/\n\tprecision\x18\x06 \x01(\x0b\x32\x1c.google.protobuf.DoubleValue\x12,\n\x06recall\x18\x07 \x01(\x0b\x32\x1c.google.protobuf.DoubleValue\x12.\n\x08\x66\x31_score\x18\x08 \x01(\x0b\x32\x1c.google.protobuf.DoubleValue\x12.\n\x08\x61\x63\x63uracy\x18\t \x01(\x0b\x32\x1c.google.protobuf.DoubleValue\x1a\x87\x05\n\x1fMultiClassClassificationMetrics\x12h\n aggregate_classification_metrics\x18\x01 \x01(\x0b\x32>.google.cloud.bigquery.v2.Model.AggregateClassificationMetrics\x12n\n\x15\x63onfusion_matrix_list\x18\x02 \x03(\x0b\x32O.google.cloud.bigquery.v2.Model.MultiClassClassificationMetrics.ConfusionMatrix\x1a\x89\x03\n\x0f\x43onfusionMatrix\x12:\n\x14\x63onfidence_threshold\x18\x01 \x01(\x0b\x32\x1c.google.protobuf.DoubleValue\x12\x61\n\x04rows\x18\x02 \x03(\x0b\x32S.google.cloud.bigquery.v2.Model.MultiClassClassificationMetrics.ConfusionMatrix.Row\x1aQ\n\x05\x45ntry\x12\x17\n\x0fpredicted_label\x18\x01 \x01(\t\x12/\n\nitem_count\x18\x02 \x01(\x0b\x32\x1b.google.protobuf.Int64Value\x1a\x83\x01\n\x03Row\x12\x14\n\x0c\x61\x63tual_label\x18\x01 \x01(\t\x12\x66\n\x07\x65ntries\x18\x02 \x03(\x0b\x32U.google.cloud.bigquery.v2.Model.MultiClassClassificationMetrics.ConfusionMatrix.Entry\x1a\xcb\x06\n\x11\x43lusteringMetrics\x12:\n\x14\x64\x61vies_bouldin_index\x18\x01 \x01(\x0b\x32\x1c.google.protobuf.DoubleValue\x12;\n\x15mean_squared_distance\x18\x02 \x01(\x0b\x32\x1c.google.protobuf.DoubleValue\x12K\n\x08\x63lusters\x18\x03 \x03(\x0b\x32\x39.google.cloud.bigquery.v2.Model.ClusteringMetrics.Cluster\x1a\xef\x04\n\x07\x43luster\x12\x13\n\x0b\x63\x65ntroid_id\x18\x01 \x01(\x03\x12^\n\x0e\x66\x65\x61ture_values\x18\x02 \x03(\x0b\x32\x46.google.cloud.bigquery.v2.Model.ClusteringMetrics.Cluster.FeatureValue\x12*\n\x05\x63ount\x18\x03 \x01(\x0b\x32\x1b.google.protobuf.Int64Value\x1a\xc2\x03\n\x0c\x46\x65\x61tureValue\x12\x16\n\x0e\x66\x65\x61ture_column\x18\x01 \x01(\t\x12\x37\n\x0fnumerical_value\x18\x02 \x01(\x0b\x32\x1c.google.protobuf.DoubleValueH\x00\x12t\n\x11\x63\x61tegorical_value\x18\x03 \x01(\x0b\x32W.google.cloud.bigquery.v2.Model.ClusteringMetrics.Cluster.FeatureValue.CategoricalValueH\x00\x1a\xe1\x01\n\x10\x43\x61tegoricalValue\x12~\n\x0f\x63\x61tegory_counts\x18\x01 \x03(\x0b\x32\x65.google.cloud.bigquery.v2.Model.ClusteringMetrics.Cluster.FeatureValue.CategoricalValue.CategoryCount\x1aM\n\rCategoryCount\x12\x10\n\x08\x63\x61tegory\x18\x01 \x01(\t\x12*\n\x05\x63ount\x18\x02 \x01(\x0b\x32\x1b.google.protobuf.Int64ValueB\x07\n\x05value\x1a\x95\x03\n\x11\x45valuationMetrics\x12O\n\x12regression_metrics\x18\x01 \x01(\x0b\x32\x31.google.cloud.bigquery.v2.Model.RegressionMetricsH\x00\x12\x64\n\x1d\x62inary_classification_metrics\x18\x02 \x01(\x0b\x32;.google.cloud.bigquery.v2.Model.BinaryClassificationMetricsH\x00\x12m\n"multi_class_classification_metrics\x18\x03 \x01(\x0b\x32?.google.cloud.bigquery.v2.Model.MultiClassClassificationMetricsH\x00\x12O\n\x12\x63lustering_metrics\x18\x04 \x01(\x0b\x32\x31.google.cloud.bigquery.v2.Model.ClusteringMetricsH\x00\x42\t\n\x07metrics\x1a\xab\x0f\n\x0bTrainingRun\x12U\n\x10training_options\x18\x01 \x01(\x0b\x32;.google.cloud.bigquery.v2.Model.TrainingRun.TrainingOptions\x12.\n\nstart_time\x18\x08 \x01(\x0b\x32\x1a.google.protobuf.Timestamp\x12L\n\x07results\x18\x06 \x03(\x0b\x32;.google.cloud.bigquery.v2.Model.TrainingRun.IterationResult\x12M\n\x12\x65valuation_metrics\x18\x07 \x01(\x0b\x32\x31.google.cloud.bigquery.v2.Model.EvaluationMetrics\x1a\x9d\t\n\x0fTrainingOptions\x12\x16\n\x0emax_iterations\x18\x01 \x01(\x03\x12;\n\tloss_type\x18\x02 \x01(\x0e\x32(.google.cloud.bigquery.v2.Model.LossType\x12\x12\n\nlearn_rate\x18\x03 \x01(\x01\x12\x37\n\x11l1_regularization\x18\x04 \x01(\x0b\x32\x1c.google.protobuf.DoubleValue\x12\x37\n\x11l2_regularization\x18\x05 \x01(\x0b\x32\x1c.google.protobuf.DoubleValue\x12;\n\x15min_relative_progress\x18\x06 \x01(\x0b\x32\x1c.google.protobuf.DoubleValue\x12.\n\nwarm_start\x18\x07 \x01(\x0b\x32\x1a.google.protobuf.BoolValue\x12.\n\nearly_stop\x18\x08 \x01(\x0b\x32\x1a.google.protobuf.BoolValue\x12\x1b\n\x13input_label_columns\x18\t \x03(\t\x12J\n\x11\x64\x61ta_split_method\x18\n \x01(\x0e\x32/.google.cloud.bigquery.v2.Model.DataSplitMethod\x12 \n\x18\x64\x61ta_split_eval_fraction\x18\x0b \x01(\x01\x12\x19\n\x11\x64\x61ta_split_column\x18\x0c \x01(\t\x12N\n\x13learn_rate_strategy\x18\r \x01(\x0e\x32\x31.google.cloud.bigquery.v2.Model.LearnRateStrategy\x12\x1a\n\x12initial_learn_rate\x18\x10 \x01(\x01\x12o\n\x13label_class_weights\x18\x11 \x03(\x0b\x32R.google.cloud.bigquery.v2.Model.TrainingRun.TrainingOptions.LabelClassWeightsEntry\x12\x43\n\rdistance_type\x18\x14 \x01(\x0e\x32,.google.cloud.bigquery.v2.Model.DistanceType\x12\x14\n\x0cnum_clusters\x18\x15 \x01(\x03\x12\x11\n\tmodel_uri\x18\x16 \x01(\t\x12S\n\x15optimization_strategy\x18\x17 \x01(\x0e\x32\x34.google.cloud.bigquery.v2.Model.OptimizationStrategy\x12l\n\x1ckmeans_initialization_method\x18! \x01(\x0e\x32\x46.google.cloud.bigquery.v2.Model.KmeansEnums.KmeansInitializationMethod\x12$\n\x1ckmeans_initialization_column\x18" \x01(\t\x1a\x38\n\x16LabelClassWeightsEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\x01:\x02\x38\x01\x1a\xd7\x03\n\x0fIterationResult\x12*\n\x05index\x18\x01 \x01(\x0b\x32\x1b.google.protobuf.Int32Value\x12\x30\n\x0b\x64uration_ms\x18\x04 \x01(\x0b\x32\x1b.google.protobuf.Int64Value\x12\x33\n\rtraining_loss\x18\x05 \x01(\x0b\x32\x1c.google.protobuf.DoubleValue\x12/\n\teval_loss\x18\x06 \x01(\x0b\x32\x1c.google.protobuf.DoubleValue\x12\x12\n\nlearn_rate\x18\x07 \x01(\x01\x12^\n\rcluster_infos\x18\x08 \x03(\x0b\x32G.google.cloud.bigquery.v2.Model.TrainingRun.IterationResult.ClusterInfo\x1a\x8b\x01\n\x0b\x43lusterInfo\x12\x13\n\x0b\x63\x65ntroid_id\x18\x01 \x01(\x03\x12\x34\n\x0e\x63luster_radius\x18\x02 \x01(\x0b\x32\x1c.google.protobuf.DoubleValue\x12\x31\n\x0c\x63luster_size\x18\x03 \x01(\x0b\x32\x1b.google.protobuf.Int64Value\x1a-\n\x0bLabelsEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\t:\x02\x38\x01"s\n\tModelType\x12\x1a\n\x16MODEL_TYPE_UNSPECIFIED\x10\x00\x12\x15\n\x11LINEAR_REGRESSION\x10\x01\x12\x17\n\x13LOGISTIC_REGRESSION\x10\x02\x12\n\n\x06KMEANS\x10\x03\x12\x0e\n\nTENSORFLOW\x10\x06"O\n\x08LossType\x12\x19\n\x15LOSS_TYPE_UNSPECIFIED\x10\x00\x12\x15\n\x11MEAN_SQUARED_LOSS\x10\x01\x12\x11\n\rMEAN_LOG_LOSS\x10\x02"H\n\x0c\x44istanceType\x12\x1d\n\x19\x44ISTANCE_TYPE_UNSPECIFIED\x10\x00\x12\r\n\tEUCLIDEAN\x10\x01\x12\n\n\x06\x43OSINE\x10\x02"z\n\x0f\x44\x61taSplitMethod\x12!\n\x1d\x44\x41TA_SPLIT_METHOD_UNSPECIFIED\x10\x00\x12\n\n\x06RANDOM\x10\x01\x12\n\n\x06\x43USTOM\x10\x02\x12\x0e\n\nSEQUENTIAL\x10\x03\x12\x0c\n\x08NO_SPLIT\x10\x04\x12\x0e\n\nAUTO_SPLIT\x10\x05"W\n\x11LearnRateStrategy\x12#\n\x1fLEARN_RATE_STRATEGY_UNSPECIFIED\x10\x00\x12\x0f\n\x0bLINE_SEARCH\x10\x01\x12\x0c\n\x08\x43ONSTANT\x10\x02"n\n\x14OptimizationStrategy\x12%\n!OPTIMIZATION_STRATEGY_UNSPECIFIED\x10\x00\x12\x1a\n\x16\x42\x41TCH_GRADIENT_DESCENT\x10\x01\x12\x13\n\x0fNORMAL_EQUATION\x10\x02"Z\n\x0fGetModelRequest\x12\x17\n\nproject_id\x18\x01 \x01(\tB\x03\xe0\x41\x02\x12\x17\n\ndataset_id\x18\x02 \x01(\tB\x03\xe0\x41\x02\x12\x15\n\x08model_id\x18\x03 \x01(\tB\x03\xe0\x41\x02"\x91\x01\n\x11PatchModelRequest\x12\x17\n\nproject_id\x18\x01 \x01(\tB\x03\xe0\x41\x02\x12\x17\n\ndataset_id\x18\x02 \x01(\tB\x03\xe0\x41\x02\x12\x15\n\x08model_id\x18\x03 \x01(\tB\x03\xe0\x41\x02\x12\x33\n\x05model\x18\x04 \x01(\x0b\x32\x1f.google.cloud.bigquery.v2.ModelB\x03\xe0\x41\x02"]\n\x12\x44\x65leteModelRequest\x12\x17\n\nproject_id\x18\x01 \x01(\tB\x03\xe0\x41\x02\x12\x17\n\ndataset_id\x18\x02 \x01(\tB\x03\xe0\x41\x02\x12\x15\n\x08model_id\x18\x03 \x01(\tB\x03\xe0\x41\x02"\x8c\x01\n\x11ListModelsRequest\x12\x17\n\nproject_id\x18\x01 \x01(\tB\x03\xe0\x41\x02\x12\x17\n\ndataset_id\x18\x02 \x01(\tB\x03\xe0\x41\x02\x12\x31\n\x0bmax_results\x18\x03 \x01(\x0b\x32\x1c.google.protobuf.UInt32Value\x12\x12\n\npage_token\x18\x04 \x01(\t"^\n\x12ListModelsResponse\x12/\n\x06models\x18\x01 \x03(\x0b\x32\x1f.google.cloud.bigquery.v2.Model\x12\x17\n\x0fnext_page_token\x18\x02 \x01(\t2\xfa\x05\n\x0cModelService\x12y\n\x08GetModel\x12).google.cloud.bigquery.v2.GetModelRequest\x1a\x1f.google.cloud.bigquery.v2.Model"!\xda\x41\x1eproject_id,dataset_id,model_id\x12\x8d\x01\n\nListModels\x12+.google.cloud.bigquery.v2.ListModelsRequest\x1a,.google.cloud.bigquery.v2.ListModelsResponse"$\xda\x41!project_id,dataset_id,max_results\x12\x83\x01\n\nPatchModel\x12+.google.cloud.bigquery.v2.PatchModelRequest\x1a\x1f.google.cloud.bigquery.v2.Model"\'\xda\x41$project_id,dataset_id,model_id,model\x12v\n\x0b\x44\x65leteModel\x12,.google.cloud.bigquery.v2.DeleteModelRequest\x1a\x16.google.protobuf.Empty"!\xda\x41\x1eproject_id,dataset_id,model_id\x1a\xe0\x01\xca\x41\x17\x62igquery.googleapis.com\xd2\x41\xc2\x01https://www.googleapis.com/auth/bigquery,https://www.googleapis.com/auth/bigquery.readonly,https://www.googleapis.com/auth/cloud-platform,https://www.googleapis.com/auth/cloud-platform.read-onlyBl\n\x1c\x63om.google.cloud.bigquery.v2B\nModelProtoZ@google.golang.org/genproto/googleapis/cloud/bigquery/v2;bigqueryb\x06proto3', - dependencies=[ - google_dot_api_dot_client__pb2.DESCRIPTOR, - google_dot_api_dot_field__behavior__pb2.DESCRIPTOR, - google_dot_cloud_dot_bigquery__v2_dot_proto_dot_encryption__config__pb2.DESCRIPTOR, - google_dot_cloud_dot_bigquery__v2_dot_proto_dot_model__reference__pb2.DESCRIPTOR, - google_dot_cloud_dot_bigquery__v2_dot_proto_dot_standard__sql__pb2.DESCRIPTOR, - google_dot_protobuf_dot_empty__pb2.DESCRIPTOR, - google_dot_protobuf_dot_timestamp__pb2.DESCRIPTOR, - google_dot_protobuf_dot_wrappers__pb2.DESCRIPTOR, - google_dot_api_dot_annotations__pb2.DESCRIPTOR, - ], -) - - -_MODEL_KMEANSENUMS_KMEANSINITIALIZATIONMETHOD = _descriptor.EnumDescriptor( - name="KmeansInitializationMethod", - full_name="google.cloud.bigquery.v2.Model.KmeansEnums.KmeansInitializationMethod", - filename=None, - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - values=[ - _descriptor.EnumValueDescriptor( - name="KMEANS_INITIALIZATION_METHOD_UNSPECIFIED", - index=0, - number=0, - serialized_options=None, - type=None, - create_key=_descriptor._internal_create_key, - ), - _descriptor.EnumValueDescriptor( - name="RANDOM", - index=1, - number=1, - serialized_options=None, - type=None, - create_key=_descriptor._internal_create_key, - ), - _descriptor.EnumValueDescriptor( - name="CUSTOM", - index=2, - number=2, - serialized_options=None, - type=None, - create_key=_descriptor._internal_create_key, - ), - ], - containing_type=None, - serialized_options=None, - serialized_start=1132, - serialized_end=1230, -) -_sym_db.RegisterEnumDescriptor(_MODEL_KMEANSENUMS_KMEANSINITIALIZATIONMETHOD) - -_MODEL_MODELTYPE = _descriptor.EnumDescriptor( - name="ModelType", - full_name="google.cloud.bigquery.v2.Model.ModelType", - filename=None, - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - values=[ - _descriptor.EnumValueDescriptor( - name="MODEL_TYPE_UNSPECIFIED", - index=0, - number=0, - serialized_options=None, - type=None, - create_key=_descriptor._internal_create_key, - ), - _descriptor.EnumValueDescriptor( - name="LINEAR_REGRESSION", - index=1, - number=1, - serialized_options=None, - type=None, - create_key=_descriptor._internal_create_key, - ), - _descriptor.EnumValueDescriptor( - name="LOGISTIC_REGRESSION", - index=2, - number=2, - serialized_options=None, - type=None, - create_key=_descriptor._internal_create_key, - ), - _descriptor.EnumValueDescriptor( - name="KMEANS", - index=3, - number=3, - serialized_options=None, - type=None, - create_key=_descriptor._internal_create_key, - ), - _descriptor.EnumValueDescriptor( - name="TENSORFLOW", - index=4, - number=6, - serialized_options=None, - type=None, - create_key=_descriptor._internal_create_key, - ), - ], - containing_type=None, - serialized_options=None, - serialized_start=6632, - serialized_end=6747, -) -_sym_db.RegisterEnumDescriptor(_MODEL_MODELTYPE) - -_MODEL_LOSSTYPE = _descriptor.EnumDescriptor( - name="LossType", - full_name="google.cloud.bigquery.v2.Model.LossType", - filename=None, - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - values=[ - _descriptor.EnumValueDescriptor( - name="LOSS_TYPE_UNSPECIFIED", - index=0, - number=0, - serialized_options=None, - type=None, - create_key=_descriptor._internal_create_key, - ), - _descriptor.EnumValueDescriptor( - name="MEAN_SQUARED_LOSS", - index=1, - number=1, - serialized_options=None, - type=None, - create_key=_descriptor._internal_create_key, - ), - _descriptor.EnumValueDescriptor( - name="MEAN_LOG_LOSS", - index=2, - number=2, - serialized_options=None, - type=None, - create_key=_descriptor._internal_create_key, - ), - ], - containing_type=None, - serialized_options=None, - serialized_start=6749, - serialized_end=6828, -) -_sym_db.RegisterEnumDescriptor(_MODEL_LOSSTYPE) - -_MODEL_DISTANCETYPE = _descriptor.EnumDescriptor( - name="DistanceType", - full_name="google.cloud.bigquery.v2.Model.DistanceType", - filename=None, - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - values=[ - _descriptor.EnumValueDescriptor( - name="DISTANCE_TYPE_UNSPECIFIED", - index=0, - number=0, - serialized_options=None, - type=None, - create_key=_descriptor._internal_create_key, - ), - _descriptor.EnumValueDescriptor( - name="EUCLIDEAN", - index=1, - number=1, - serialized_options=None, - type=None, - create_key=_descriptor._internal_create_key, - ), - _descriptor.EnumValueDescriptor( - name="COSINE", - index=2, - number=2, - serialized_options=None, - type=None, - create_key=_descriptor._internal_create_key, - ), - ], - containing_type=None, - serialized_options=None, - serialized_start=6830, - serialized_end=6902, -) -_sym_db.RegisterEnumDescriptor(_MODEL_DISTANCETYPE) - -_MODEL_DATASPLITMETHOD = _descriptor.EnumDescriptor( - name="DataSplitMethod", - full_name="google.cloud.bigquery.v2.Model.DataSplitMethod", - filename=None, - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - values=[ - _descriptor.EnumValueDescriptor( - name="DATA_SPLIT_METHOD_UNSPECIFIED", - index=0, - number=0, - serialized_options=None, - type=None, - create_key=_descriptor._internal_create_key, - ), - _descriptor.EnumValueDescriptor( - name="RANDOM", - index=1, - number=1, - serialized_options=None, - type=None, - create_key=_descriptor._internal_create_key, - ), - _descriptor.EnumValueDescriptor( - name="CUSTOM", - index=2, - number=2, - serialized_options=None, - type=None, - create_key=_descriptor._internal_create_key, - ), - _descriptor.EnumValueDescriptor( - name="SEQUENTIAL", - index=3, - number=3, - serialized_options=None, - type=None, - create_key=_descriptor._internal_create_key, - ), - _descriptor.EnumValueDescriptor( - name="NO_SPLIT", - index=4, - number=4, - serialized_options=None, - type=None, - create_key=_descriptor._internal_create_key, - ), - _descriptor.EnumValueDescriptor( - name="AUTO_SPLIT", - index=5, - number=5, - serialized_options=None, - type=None, - create_key=_descriptor._internal_create_key, - ), - ], - containing_type=None, - serialized_options=None, - serialized_start=6904, - serialized_end=7026, -) -_sym_db.RegisterEnumDescriptor(_MODEL_DATASPLITMETHOD) - -_MODEL_LEARNRATESTRATEGY = _descriptor.EnumDescriptor( - name="LearnRateStrategy", - full_name="google.cloud.bigquery.v2.Model.LearnRateStrategy", - filename=None, - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - values=[ - _descriptor.EnumValueDescriptor( - name="LEARN_RATE_STRATEGY_UNSPECIFIED", - index=0, - number=0, - serialized_options=None, - type=None, - create_key=_descriptor._internal_create_key, - ), - _descriptor.EnumValueDescriptor( - name="LINE_SEARCH", - index=1, - number=1, - serialized_options=None, - type=None, - create_key=_descriptor._internal_create_key, - ), - _descriptor.EnumValueDescriptor( - name="CONSTANT", - index=2, - number=2, - serialized_options=None, - type=None, - create_key=_descriptor._internal_create_key, - ), - ], - containing_type=None, - serialized_options=None, - serialized_start=7028, - serialized_end=7115, -) -_sym_db.RegisterEnumDescriptor(_MODEL_LEARNRATESTRATEGY) - -_MODEL_OPTIMIZATIONSTRATEGY = _descriptor.EnumDescriptor( - name="OptimizationStrategy", - full_name="google.cloud.bigquery.v2.Model.OptimizationStrategy", - filename=None, - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - values=[ - _descriptor.EnumValueDescriptor( - name="OPTIMIZATION_STRATEGY_UNSPECIFIED", - index=0, - number=0, - serialized_options=None, - type=None, - create_key=_descriptor._internal_create_key, - ), - _descriptor.EnumValueDescriptor( - name="BATCH_GRADIENT_DESCENT", - index=1, - number=1, - serialized_options=None, - type=None, - create_key=_descriptor._internal_create_key, - ), - _descriptor.EnumValueDescriptor( - name="NORMAL_EQUATION", - index=2, - number=2, - serialized_options=None, - type=None, - create_key=_descriptor._internal_create_key, - ), - ], - containing_type=None, - serialized_options=None, - serialized_start=7117, - serialized_end=7227, -) -_sym_db.RegisterEnumDescriptor(_MODEL_OPTIMIZATIONSTRATEGY) - - -_MODEL_KMEANSENUMS = _descriptor.Descriptor( - name="KmeansEnums", - full_name="google.cloud.bigquery.v2.Model.KmeansEnums", - filename=None, - file=DESCRIPTOR, - containing_type=None, - create_key=_descriptor._internal_create_key, - fields=[], - extensions=[], - nested_types=[], - enum_types=[_MODEL_KMEANSENUMS_KMEANSINITIALIZATIONMETHOD,], - serialized_options=None, - is_extendable=False, - syntax="proto3", - extension_ranges=[], - oneofs=[], - serialized_start=1117, - serialized_end=1230, -) - -_MODEL_REGRESSIONMETRICS = _descriptor.Descriptor( - name="RegressionMetrics", - full_name="google.cloud.bigquery.v2.Model.RegressionMetrics", - filename=None, - file=DESCRIPTOR, - containing_type=None, - create_key=_descriptor._internal_create_key, - fields=[ - _descriptor.FieldDescriptor( - name="mean_absolute_error", - full_name="google.cloud.bigquery.v2.Model.RegressionMetrics.mean_absolute_error", - index=0, - number=1, - type=11, - cpp_type=10, - label=1, - has_default_value=False, - default_value=None, - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - _descriptor.FieldDescriptor( - name="mean_squared_error", - full_name="google.cloud.bigquery.v2.Model.RegressionMetrics.mean_squared_error", - index=1, - number=2, - type=11, - cpp_type=10, - label=1, - has_default_value=False, - default_value=None, - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - _descriptor.FieldDescriptor( - name="mean_squared_log_error", - full_name="google.cloud.bigquery.v2.Model.RegressionMetrics.mean_squared_log_error", - index=2, - number=3, - type=11, - cpp_type=10, - label=1, - has_default_value=False, - default_value=None, - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - _descriptor.FieldDescriptor( - name="median_absolute_error", - full_name="google.cloud.bigquery.v2.Model.RegressionMetrics.median_absolute_error", - index=3, - number=4, - type=11, - cpp_type=10, - label=1, - has_default_value=False, - default_value=None, - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - _descriptor.FieldDescriptor( - name="r_squared", - full_name="google.cloud.bigquery.v2.Model.RegressionMetrics.r_squared", - index=4, - number=5, - type=11, - cpp_type=10, - label=1, - has_default_value=False, - default_value=None, - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - ], - extensions=[], - nested_types=[], - enum_types=[], - serialized_options=None, - is_extendable=False, - syntax="proto3", - extension_ranges=[], - oneofs=[], - serialized_start=1233, - serialized_end=1541, -) - -_MODEL_AGGREGATECLASSIFICATIONMETRICS = _descriptor.Descriptor( - name="AggregateClassificationMetrics", - full_name="google.cloud.bigquery.v2.Model.AggregateClassificationMetrics", - filename=None, - file=DESCRIPTOR, - containing_type=None, - create_key=_descriptor._internal_create_key, - fields=[ - _descriptor.FieldDescriptor( - name="precision", - full_name="google.cloud.bigquery.v2.Model.AggregateClassificationMetrics.precision", - index=0, - number=1, - type=11, - cpp_type=10, - label=1, - has_default_value=False, - default_value=None, - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - _descriptor.FieldDescriptor( - name="recall", - full_name="google.cloud.bigquery.v2.Model.AggregateClassificationMetrics.recall", - index=1, - number=2, - type=11, - cpp_type=10, - label=1, - has_default_value=False, - default_value=None, - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - _descriptor.FieldDescriptor( - name="accuracy", - full_name="google.cloud.bigquery.v2.Model.AggregateClassificationMetrics.accuracy", - index=2, - number=3, - type=11, - cpp_type=10, - label=1, - has_default_value=False, - default_value=None, - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - _descriptor.FieldDescriptor( - name="threshold", - full_name="google.cloud.bigquery.v2.Model.AggregateClassificationMetrics.threshold", - index=3, - number=4, - type=11, - cpp_type=10, - label=1, - has_default_value=False, - default_value=None, - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - _descriptor.FieldDescriptor( - name="f1_score", - full_name="google.cloud.bigquery.v2.Model.AggregateClassificationMetrics.f1_score", - index=4, - number=5, - type=11, - cpp_type=10, - label=1, - has_default_value=False, - default_value=None, - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - _descriptor.FieldDescriptor( - name="log_loss", - full_name="google.cloud.bigquery.v2.Model.AggregateClassificationMetrics.log_loss", - index=5, - number=6, - type=11, - cpp_type=10, - label=1, - has_default_value=False, - default_value=None, - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - _descriptor.FieldDescriptor( - name="roc_auc", - full_name="google.cloud.bigquery.v2.Model.AggregateClassificationMetrics.roc_auc", - index=6, - number=7, - type=11, - cpp_type=10, - label=1, - has_default_value=False, - default_value=None, - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - ], - extensions=[], - nested_types=[], - enum_types=[], - serialized_options=None, - is_extendable=False, - syntax="proto3", - extension_ranges=[], - oneofs=[], - serialized_start=1544, - serialized_end=1911, -) - -_MODEL_BINARYCLASSIFICATIONMETRICS_BINARYCONFUSIONMATRIX = _descriptor.Descriptor( - name="BinaryConfusionMatrix", - full_name="google.cloud.bigquery.v2.Model.BinaryClassificationMetrics.BinaryConfusionMatrix", - filename=None, - file=DESCRIPTOR, - containing_type=None, - create_key=_descriptor._internal_create_key, - fields=[ - _descriptor.FieldDescriptor( - name="positive_class_threshold", - full_name="google.cloud.bigquery.v2.Model.BinaryClassificationMetrics.BinaryConfusionMatrix.positive_class_threshold", - index=0, - number=1, - type=11, - cpp_type=10, - label=1, - has_default_value=False, - default_value=None, - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - _descriptor.FieldDescriptor( - name="true_positives", - full_name="google.cloud.bigquery.v2.Model.BinaryClassificationMetrics.BinaryConfusionMatrix.true_positives", - index=1, - number=2, - type=11, - cpp_type=10, - label=1, - has_default_value=False, - default_value=None, - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - _descriptor.FieldDescriptor( - name="false_positives", - full_name="google.cloud.bigquery.v2.Model.BinaryClassificationMetrics.BinaryConfusionMatrix.false_positives", - index=2, - number=3, - type=11, - cpp_type=10, - label=1, - has_default_value=False, - default_value=None, - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - _descriptor.FieldDescriptor( - name="true_negatives", - full_name="google.cloud.bigquery.v2.Model.BinaryClassificationMetrics.BinaryConfusionMatrix.true_negatives", - index=3, - number=4, - type=11, - cpp_type=10, - label=1, - has_default_value=False, - default_value=None, - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - _descriptor.FieldDescriptor( - name="false_negatives", - full_name="google.cloud.bigquery.v2.Model.BinaryClassificationMetrics.BinaryConfusionMatrix.false_negatives", - index=4, - number=5, - type=11, - cpp_type=10, - label=1, - has_default_value=False, - default_value=None, - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - _descriptor.FieldDescriptor( - name="precision", - full_name="google.cloud.bigquery.v2.Model.BinaryClassificationMetrics.BinaryConfusionMatrix.precision", - index=5, - number=6, - type=11, - cpp_type=10, - label=1, - has_default_value=False, - default_value=None, - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - _descriptor.FieldDescriptor( - name="recall", - full_name="google.cloud.bigquery.v2.Model.BinaryClassificationMetrics.BinaryConfusionMatrix.recall", - index=6, - number=7, - type=11, - cpp_type=10, - label=1, - has_default_value=False, - default_value=None, - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - _descriptor.FieldDescriptor( - name="f1_score", - full_name="google.cloud.bigquery.v2.Model.BinaryClassificationMetrics.BinaryConfusionMatrix.f1_score", - index=7, - number=8, - type=11, - cpp_type=10, - label=1, - has_default_value=False, - default_value=None, - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - _descriptor.FieldDescriptor( - name="accuracy", - full_name="google.cloud.bigquery.v2.Model.BinaryClassificationMetrics.BinaryConfusionMatrix.accuracy", - index=8, - number=9, - type=11, - cpp_type=10, - label=1, - has_default_value=False, - default_value=None, - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - ], - extensions=[], - nested_types=[], - enum_types=[], - serialized_options=None, - is_extendable=False, - syntax="proto3", - extension_ranges=[], - oneofs=[], - serialized_start=2221, - serialized_end=2713, -) - -_MODEL_BINARYCLASSIFICATIONMETRICS = _descriptor.Descriptor( - name="BinaryClassificationMetrics", - full_name="google.cloud.bigquery.v2.Model.BinaryClassificationMetrics", - filename=None, - file=DESCRIPTOR, - containing_type=None, - create_key=_descriptor._internal_create_key, - fields=[ - _descriptor.FieldDescriptor( - name="aggregate_classification_metrics", - full_name="google.cloud.bigquery.v2.Model.BinaryClassificationMetrics.aggregate_classification_metrics", - index=0, - number=1, - type=11, - cpp_type=10, - label=1, - has_default_value=False, - default_value=None, - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - _descriptor.FieldDescriptor( - name="binary_confusion_matrix_list", - full_name="google.cloud.bigquery.v2.Model.BinaryClassificationMetrics.binary_confusion_matrix_list", - index=1, - number=2, - type=11, - cpp_type=10, - label=3, - has_default_value=False, - default_value=[], - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - _descriptor.FieldDescriptor( - name="positive_label", - full_name="google.cloud.bigquery.v2.Model.BinaryClassificationMetrics.positive_label", - index=2, - number=3, - type=9, - cpp_type=9, - label=1, - has_default_value=False, - default_value=b"".decode("utf-8"), - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - _descriptor.FieldDescriptor( - name="negative_label", - full_name="google.cloud.bigquery.v2.Model.BinaryClassificationMetrics.negative_label", - index=3, - number=4, - type=9, - cpp_type=9, - label=1, - has_default_value=False, - default_value=b"".decode("utf-8"), - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - ], - extensions=[], - nested_types=[_MODEL_BINARYCLASSIFICATIONMETRICS_BINARYCONFUSIONMATRIX,], - enum_types=[], - serialized_options=None, - is_extendable=False, - syntax="proto3", - extension_ranges=[], - oneofs=[], - serialized_start=1914, - serialized_end=2713, -) - -_MODEL_MULTICLASSCLASSIFICATIONMETRICS_CONFUSIONMATRIX_ENTRY = _descriptor.Descriptor( - name="Entry", - full_name="google.cloud.bigquery.v2.Model.MultiClassClassificationMetrics.ConfusionMatrix.Entry", - filename=None, - file=DESCRIPTOR, - containing_type=None, - create_key=_descriptor._internal_create_key, - fields=[ - _descriptor.FieldDescriptor( - name="predicted_label", - full_name="google.cloud.bigquery.v2.Model.MultiClassClassificationMetrics.ConfusionMatrix.Entry.predicted_label", - index=0, - number=1, - type=9, - cpp_type=9, - label=1, - has_default_value=False, - default_value=b"".decode("utf-8"), - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - _descriptor.FieldDescriptor( - name="item_count", - full_name="google.cloud.bigquery.v2.Model.MultiClassClassificationMetrics.ConfusionMatrix.Entry.item_count", - index=1, - number=2, - type=11, - cpp_type=10, - label=1, - has_default_value=False, - default_value=None, - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - ], - extensions=[], - nested_types=[], - enum_types=[], - serialized_options=None, - is_extendable=False, - syntax="proto3", - extension_ranges=[], - oneofs=[], - serialized_start=3148, - serialized_end=3229, -) - -_MODEL_MULTICLASSCLASSIFICATIONMETRICS_CONFUSIONMATRIX_ROW = _descriptor.Descriptor( - name="Row", - full_name="google.cloud.bigquery.v2.Model.MultiClassClassificationMetrics.ConfusionMatrix.Row", - filename=None, - file=DESCRIPTOR, - containing_type=None, - create_key=_descriptor._internal_create_key, - fields=[ - _descriptor.FieldDescriptor( - name="actual_label", - full_name="google.cloud.bigquery.v2.Model.MultiClassClassificationMetrics.ConfusionMatrix.Row.actual_label", - index=0, - number=1, - type=9, - cpp_type=9, - label=1, - has_default_value=False, - default_value=b"".decode("utf-8"), - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - _descriptor.FieldDescriptor( - name="entries", - full_name="google.cloud.bigquery.v2.Model.MultiClassClassificationMetrics.ConfusionMatrix.Row.entries", - index=1, - number=2, - type=11, - cpp_type=10, - label=3, - has_default_value=False, - default_value=[], - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - ], - extensions=[], - nested_types=[], - enum_types=[], - serialized_options=None, - is_extendable=False, - syntax="proto3", - extension_ranges=[], - oneofs=[], - serialized_start=3232, - serialized_end=3363, -) - -_MODEL_MULTICLASSCLASSIFICATIONMETRICS_CONFUSIONMATRIX = _descriptor.Descriptor( - name="ConfusionMatrix", - full_name="google.cloud.bigquery.v2.Model.MultiClassClassificationMetrics.ConfusionMatrix", - filename=None, - file=DESCRIPTOR, - containing_type=None, - create_key=_descriptor._internal_create_key, - fields=[ - _descriptor.FieldDescriptor( - name="confidence_threshold", - full_name="google.cloud.bigquery.v2.Model.MultiClassClassificationMetrics.ConfusionMatrix.confidence_threshold", - index=0, - number=1, - type=11, - cpp_type=10, - label=1, - has_default_value=False, - default_value=None, - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - _descriptor.FieldDescriptor( - name="rows", - full_name="google.cloud.bigquery.v2.Model.MultiClassClassificationMetrics.ConfusionMatrix.rows", - index=1, - number=2, - type=11, - cpp_type=10, - label=3, - has_default_value=False, - default_value=[], - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - ], - extensions=[], - nested_types=[ - _MODEL_MULTICLASSCLASSIFICATIONMETRICS_CONFUSIONMATRIX_ENTRY, - _MODEL_MULTICLASSCLASSIFICATIONMETRICS_CONFUSIONMATRIX_ROW, - ], - enum_types=[], - serialized_options=None, - is_extendable=False, - syntax="proto3", - extension_ranges=[], - oneofs=[], - serialized_start=2970, - serialized_end=3363, -) - -_MODEL_MULTICLASSCLASSIFICATIONMETRICS = _descriptor.Descriptor( - name="MultiClassClassificationMetrics", - full_name="google.cloud.bigquery.v2.Model.MultiClassClassificationMetrics", - filename=None, - file=DESCRIPTOR, - containing_type=None, - create_key=_descriptor._internal_create_key, - fields=[ - _descriptor.FieldDescriptor( - name="aggregate_classification_metrics", - full_name="google.cloud.bigquery.v2.Model.MultiClassClassificationMetrics.aggregate_classification_metrics", - index=0, - number=1, - type=11, - cpp_type=10, - label=1, - has_default_value=False, - default_value=None, - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - _descriptor.FieldDescriptor( - name="confusion_matrix_list", - full_name="google.cloud.bigquery.v2.Model.MultiClassClassificationMetrics.confusion_matrix_list", - index=1, - number=2, - type=11, - cpp_type=10, - label=3, - has_default_value=False, - default_value=[], - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - ], - extensions=[], - nested_types=[_MODEL_MULTICLASSCLASSIFICATIONMETRICS_CONFUSIONMATRIX,], - enum_types=[], - serialized_options=None, - is_extendable=False, - syntax="proto3", - extension_ranges=[], - oneofs=[], - serialized_start=2716, - serialized_end=3363, -) - -_MODEL_CLUSTERINGMETRICS_CLUSTER_FEATUREVALUE_CATEGORICALVALUE_CATEGORYCOUNT = _descriptor.Descriptor( - name="CategoryCount", - full_name="google.cloud.bigquery.v2.Model.ClusteringMetrics.Cluster.FeatureValue.CategoricalValue.CategoryCount", - filename=None, - file=DESCRIPTOR, - containing_type=None, - create_key=_descriptor._internal_create_key, - fields=[ - _descriptor.FieldDescriptor( - name="category", - full_name="google.cloud.bigquery.v2.Model.ClusteringMetrics.Cluster.FeatureValue.CategoricalValue.CategoryCount.category", - index=0, - number=1, - type=9, - cpp_type=9, - label=1, - has_default_value=False, - default_value=b"".decode("utf-8"), - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - _descriptor.FieldDescriptor( - name="count", - full_name="google.cloud.bigquery.v2.Model.ClusteringMetrics.Cluster.FeatureValue.CategoricalValue.CategoryCount.count", - index=1, - number=2, - type=11, - cpp_type=10, - label=1, - has_default_value=False, - default_value=None, - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - ], - extensions=[], - nested_types=[], - enum_types=[], - serialized_options=None, - is_extendable=False, - syntax="proto3", - extension_ranges=[], - oneofs=[], - serialized_start=4123, - serialized_end=4200, -) - -_MODEL_CLUSTERINGMETRICS_CLUSTER_FEATUREVALUE_CATEGORICALVALUE = _descriptor.Descriptor( - name="CategoricalValue", - full_name="google.cloud.bigquery.v2.Model.ClusteringMetrics.Cluster.FeatureValue.CategoricalValue", - filename=None, - file=DESCRIPTOR, - containing_type=None, - create_key=_descriptor._internal_create_key, - fields=[ - _descriptor.FieldDescriptor( - name="category_counts", - full_name="google.cloud.bigquery.v2.Model.ClusteringMetrics.Cluster.FeatureValue.CategoricalValue.category_counts", - index=0, - number=1, - type=11, - cpp_type=10, - label=3, - has_default_value=False, - default_value=[], - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - ], - extensions=[], - nested_types=[ - _MODEL_CLUSTERINGMETRICS_CLUSTER_FEATUREVALUE_CATEGORICALVALUE_CATEGORYCOUNT, - ], - enum_types=[], - serialized_options=None, - is_extendable=False, - syntax="proto3", - extension_ranges=[], - oneofs=[], - serialized_start=3975, - serialized_end=4200, -) - -_MODEL_CLUSTERINGMETRICS_CLUSTER_FEATUREVALUE = _descriptor.Descriptor( - name="FeatureValue", - full_name="google.cloud.bigquery.v2.Model.ClusteringMetrics.Cluster.FeatureValue", - filename=None, - file=DESCRIPTOR, - containing_type=None, - create_key=_descriptor._internal_create_key, - fields=[ - _descriptor.FieldDescriptor( - name="feature_column", - full_name="google.cloud.bigquery.v2.Model.ClusteringMetrics.Cluster.FeatureValue.feature_column", - index=0, - number=1, - type=9, - cpp_type=9, - label=1, - has_default_value=False, - default_value=b"".decode("utf-8"), - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - _descriptor.FieldDescriptor( - name="numerical_value", - full_name="google.cloud.bigquery.v2.Model.ClusteringMetrics.Cluster.FeatureValue.numerical_value", - index=1, - number=2, - type=11, - cpp_type=10, - label=1, - has_default_value=False, - default_value=None, - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - _descriptor.FieldDescriptor( - name="categorical_value", - full_name="google.cloud.bigquery.v2.Model.ClusteringMetrics.Cluster.FeatureValue.categorical_value", - index=2, - number=3, - type=11, - cpp_type=10, - label=1, - has_default_value=False, - default_value=None, - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - ], - extensions=[], - nested_types=[_MODEL_CLUSTERINGMETRICS_CLUSTER_FEATUREVALUE_CATEGORICALVALUE,], - enum_types=[], - serialized_options=None, - is_extendable=False, - syntax="proto3", - extension_ranges=[], - oneofs=[ - _descriptor.OneofDescriptor( - name="value", - full_name="google.cloud.bigquery.v2.Model.ClusteringMetrics.Cluster.FeatureValue.value", - index=0, - containing_type=None, - create_key=_descriptor._internal_create_key, - fields=[], - ), - ], - serialized_start=3759, - serialized_end=4209, -) - -_MODEL_CLUSTERINGMETRICS_CLUSTER = _descriptor.Descriptor( - name="Cluster", - full_name="google.cloud.bigquery.v2.Model.ClusteringMetrics.Cluster", - filename=None, - file=DESCRIPTOR, - containing_type=None, - create_key=_descriptor._internal_create_key, - fields=[ - _descriptor.FieldDescriptor( - name="centroid_id", - full_name="google.cloud.bigquery.v2.Model.ClusteringMetrics.Cluster.centroid_id", - index=0, - number=1, - type=3, - cpp_type=2, - label=1, - has_default_value=False, - default_value=0, - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - _descriptor.FieldDescriptor( - name="feature_values", - full_name="google.cloud.bigquery.v2.Model.ClusteringMetrics.Cluster.feature_values", - index=1, - number=2, - type=11, - cpp_type=10, - label=3, - has_default_value=False, - default_value=[], - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - _descriptor.FieldDescriptor( - name="count", - full_name="google.cloud.bigquery.v2.Model.ClusteringMetrics.Cluster.count", - index=2, - number=3, - type=11, - cpp_type=10, - label=1, - has_default_value=False, - default_value=None, - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - ], - extensions=[], - nested_types=[_MODEL_CLUSTERINGMETRICS_CLUSTER_FEATUREVALUE,], - enum_types=[], - serialized_options=None, - is_extendable=False, - syntax="proto3", - extension_ranges=[], - oneofs=[], - serialized_start=3586, - serialized_end=4209, -) - -_MODEL_CLUSTERINGMETRICS = _descriptor.Descriptor( - name="ClusteringMetrics", - full_name="google.cloud.bigquery.v2.Model.ClusteringMetrics", - filename=None, - file=DESCRIPTOR, - containing_type=None, - create_key=_descriptor._internal_create_key, - fields=[ - _descriptor.FieldDescriptor( - name="davies_bouldin_index", - full_name="google.cloud.bigquery.v2.Model.ClusteringMetrics.davies_bouldin_index", - index=0, - number=1, - type=11, - cpp_type=10, - label=1, - has_default_value=False, - default_value=None, - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - _descriptor.FieldDescriptor( - name="mean_squared_distance", - full_name="google.cloud.bigquery.v2.Model.ClusteringMetrics.mean_squared_distance", - index=1, - number=2, - type=11, - cpp_type=10, - label=1, - has_default_value=False, - default_value=None, - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - _descriptor.FieldDescriptor( - name="clusters", - full_name="google.cloud.bigquery.v2.Model.ClusteringMetrics.clusters", - index=2, - number=3, - type=11, - cpp_type=10, - label=3, - has_default_value=False, - default_value=[], - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - ], - extensions=[], - nested_types=[_MODEL_CLUSTERINGMETRICS_CLUSTER,], - enum_types=[], - serialized_options=None, - is_extendable=False, - syntax="proto3", - extension_ranges=[], - oneofs=[], - serialized_start=3366, - serialized_end=4209, -) - -_MODEL_EVALUATIONMETRICS = _descriptor.Descriptor( - name="EvaluationMetrics", - full_name="google.cloud.bigquery.v2.Model.EvaluationMetrics", - filename=None, - file=DESCRIPTOR, - containing_type=None, - create_key=_descriptor._internal_create_key, - fields=[ - _descriptor.FieldDescriptor( - name="regression_metrics", - full_name="google.cloud.bigquery.v2.Model.EvaluationMetrics.regression_metrics", - index=0, - number=1, - type=11, - cpp_type=10, - label=1, - has_default_value=False, - default_value=None, - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - _descriptor.FieldDescriptor( - name="binary_classification_metrics", - full_name="google.cloud.bigquery.v2.Model.EvaluationMetrics.binary_classification_metrics", - index=1, - number=2, - type=11, - cpp_type=10, - label=1, - has_default_value=False, - default_value=None, - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - _descriptor.FieldDescriptor( - name="multi_class_classification_metrics", - full_name="google.cloud.bigquery.v2.Model.EvaluationMetrics.multi_class_classification_metrics", - index=2, - number=3, - type=11, - cpp_type=10, - label=1, - has_default_value=False, - default_value=None, - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - _descriptor.FieldDescriptor( - name="clustering_metrics", - full_name="google.cloud.bigquery.v2.Model.EvaluationMetrics.clustering_metrics", - index=3, - number=4, - type=11, - cpp_type=10, - label=1, - has_default_value=False, - default_value=None, - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - ], - extensions=[], - nested_types=[], - enum_types=[], - serialized_options=None, - is_extendable=False, - syntax="proto3", - extension_ranges=[], - oneofs=[ - _descriptor.OneofDescriptor( - name="metrics", - full_name="google.cloud.bigquery.v2.Model.EvaluationMetrics.metrics", - index=0, - containing_type=None, - create_key=_descriptor._internal_create_key, - fields=[], - ), - ], - serialized_start=4212, - serialized_end=4617, -) - -_MODEL_TRAININGRUN_TRAININGOPTIONS_LABELCLASSWEIGHTSENTRY = _descriptor.Descriptor( - name="LabelClassWeightsEntry", - full_name="google.cloud.bigquery.v2.Model.TrainingRun.TrainingOptions.LabelClassWeightsEntry", - filename=None, - file=DESCRIPTOR, - containing_type=None, - create_key=_descriptor._internal_create_key, - fields=[ - _descriptor.FieldDescriptor( - name="key", - full_name="google.cloud.bigquery.v2.Model.TrainingRun.TrainingOptions.LabelClassWeightsEntry.key", - index=0, - number=1, - type=9, - cpp_type=9, - label=1, - has_default_value=False, - default_value=b"".decode("utf-8"), - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - _descriptor.FieldDescriptor( - name="value", - full_name="google.cloud.bigquery.v2.Model.TrainingRun.TrainingOptions.LabelClassWeightsEntry.value", - index=1, - number=2, - type=1, - cpp_type=5, - label=1, - has_default_value=False, - default_value=float(0), - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - ], - extensions=[], - nested_types=[], - enum_types=[], - serialized_options=b"8\001", - is_extendable=False, - syntax="proto3", - extension_ranges=[], - oneofs=[], - serialized_start=6053, - serialized_end=6109, -) - -_MODEL_TRAININGRUN_TRAININGOPTIONS = _descriptor.Descriptor( - name="TrainingOptions", - full_name="google.cloud.bigquery.v2.Model.TrainingRun.TrainingOptions", - filename=None, - file=DESCRIPTOR, - containing_type=None, - create_key=_descriptor._internal_create_key, - fields=[ - _descriptor.FieldDescriptor( - name="max_iterations", - full_name="google.cloud.bigquery.v2.Model.TrainingRun.TrainingOptions.max_iterations", - index=0, - number=1, - type=3, - cpp_type=2, - label=1, - has_default_value=False, - default_value=0, - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - _descriptor.FieldDescriptor( - name="loss_type", - full_name="google.cloud.bigquery.v2.Model.TrainingRun.TrainingOptions.loss_type", - index=1, - number=2, - type=14, - cpp_type=8, - label=1, - has_default_value=False, - default_value=0, - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - _descriptor.FieldDescriptor( - name="learn_rate", - full_name="google.cloud.bigquery.v2.Model.TrainingRun.TrainingOptions.learn_rate", - index=2, - number=3, - type=1, - cpp_type=5, - label=1, - has_default_value=False, - default_value=float(0), - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - _descriptor.FieldDescriptor( - name="l1_regularization", - full_name="google.cloud.bigquery.v2.Model.TrainingRun.TrainingOptions.l1_regularization", - index=3, - number=4, - type=11, - cpp_type=10, - label=1, - has_default_value=False, - default_value=None, - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - _descriptor.FieldDescriptor( - name="l2_regularization", - full_name="google.cloud.bigquery.v2.Model.TrainingRun.TrainingOptions.l2_regularization", - index=4, - number=5, - type=11, - cpp_type=10, - label=1, - has_default_value=False, - default_value=None, - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - _descriptor.FieldDescriptor( - name="min_relative_progress", - full_name="google.cloud.bigquery.v2.Model.TrainingRun.TrainingOptions.min_relative_progress", - index=5, - number=6, - type=11, - cpp_type=10, - label=1, - has_default_value=False, - default_value=None, - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - _descriptor.FieldDescriptor( - name="warm_start", - full_name="google.cloud.bigquery.v2.Model.TrainingRun.TrainingOptions.warm_start", - index=6, - number=7, - type=11, - cpp_type=10, - label=1, - has_default_value=False, - default_value=None, - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - _descriptor.FieldDescriptor( - name="early_stop", - full_name="google.cloud.bigquery.v2.Model.TrainingRun.TrainingOptions.early_stop", - index=7, - number=8, - type=11, - cpp_type=10, - label=1, - has_default_value=False, - default_value=None, - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - _descriptor.FieldDescriptor( - name="input_label_columns", - full_name="google.cloud.bigquery.v2.Model.TrainingRun.TrainingOptions.input_label_columns", - index=8, - number=9, - type=9, - cpp_type=9, - label=3, - has_default_value=False, - default_value=[], - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - _descriptor.FieldDescriptor( - name="data_split_method", - full_name="google.cloud.bigquery.v2.Model.TrainingRun.TrainingOptions.data_split_method", - index=9, - number=10, - type=14, - cpp_type=8, - label=1, - has_default_value=False, - default_value=0, - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - _descriptor.FieldDescriptor( - name="data_split_eval_fraction", - full_name="google.cloud.bigquery.v2.Model.TrainingRun.TrainingOptions.data_split_eval_fraction", - index=10, - number=11, - type=1, - cpp_type=5, - label=1, - has_default_value=False, - default_value=float(0), - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - _descriptor.FieldDescriptor( - name="data_split_column", - full_name="google.cloud.bigquery.v2.Model.TrainingRun.TrainingOptions.data_split_column", - index=11, - number=12, - type=9, - cpp_type=9, - label=1, - has_default_value=False, - default_value=b"".decode("utf-8"), - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - _descriptor.FieldDescriptor( - name="learn_rate_strategy", - full_name="google.cloud.bigquery.v2.Model.TrainingRun.TrainingOptions.learn_rate_strategy", - index=12, - number=13, - type=14, - cpp_type=8, - label=1, - has_default_value=False, - default_value=0, - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - _descriptor.FieldDescriptor( - name="initial_learn_rate", - full_name="google.cloud.bigquery.v2.Model.TrainingRun.TrainingOptions.initial_learn_rate", - index=13, - number=16, - type=1, - cpp_type=5, - label=1, - has_default_value=False, - default_value=float(0), - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - _descriptor.FieldDescriptor( - name="label_class_weights", - full_name="google.cloud.bigquery.v2.Model.TrainingRun.TrainingOptions.label_class_weights", - index=14, - number=17, - type=11, - cpp_type=10, - label=3, - has_default_value=False, - default_value=[], - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - _descriptor.FieldDescriptor( - name="distance_type", - full_name="google.cloud.bigquery.v2.Model.TrainingRun.TrainingOptions.distance_type", - index=15, - number=20, - type=14, - cpp_type=8, - label=1, - has_default_value=False, - default_value=0, - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - _descriptor.FieldDescriptor( - name="num_clusters", - full_name="google.cloud.bigquery.v2.Model.TrainingRun.TrainingOptions.num_clusters", - index=16, - number=21, - type=3, - cpp_type=2, - label=1, - has_default_value=False, - default_value=0, - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - _descriptor.FieldDescriptor( - name="model_uri", - full_name="google.cloud.bigquery.v2.Model.TrainingRun.TrainingOptions.model_uri", - index=17, - number=22, - type=9, - cpp_type=9, - label=1, - has_default_value=False, - default_value=b"".decode("utf-8"), - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - _descriptor.FieldDescriptor( - name="optimization_strategy", - full_name="google.cloud.bigquery.v2.Model.TrainingRun.TrainingOptions.optimization_strategy", - index=18, - number=23, - type=14, - cpp_type=8, - label=1, - has_default_value=False, - default_value=0, - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - _descriptor.FieldDescriptor( - name="kmeans_initialization_method", - full_name="google.cloud.bigquery.v2.Model.TrainingRun.TrainingOptions.kmeans_initialization_method", - index=19, - number=33, - type=14, - cpp_type=8, - label=1, - has_default_value=False, - default_value=0, - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - _descriptor.FieldDescriptor( - name="kmeans_initialization_column", - full_name="google.cloud.bigquery.v2.Model.TrainingRun.TrainingOptions.kmeans_initialization_column", - index=20, - number=34, - type=9, - cpp_type=9, - label=1, - has_default_value=False, - default_value=b"".decode("utf-8"), - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - ], - extensions=[], - nested_types=[_MODEL_TRAININGRUN_TRAININGOPTIONS_LABELCLASSWEIGHTSENTRY,], - enum_types=[], - serialized_options=None, - is_extendable=False, - syntax="proto3", - extension_ranges=[], - oneofs=[], - serialized_start=4928, - serialized_end=6109, -) - -_MODEL_TRAININGRUN_ITERATIONRESULT_CLUSTERINFO = _descriptor.Descriptor( - name="ClusterInfo", - full_name="google.cloud.bigquery.v2.Model.TrainingRun.IterationResult.ClusterInfo", - filename=None, - file=DESCRIPTOR, - containing_type=None, - create_key=_descriptor._internal_create_key, - fields=[ - _descriptor.FieldDescriptor( - name="centroid_id", - full_name="google.cloud.bigquery.v2.Model.TrainingRun.IterationResult.ClusterInfo.centroid_id", - index=0, - number=1, - type=3, - cpp_type=2, - label=1, - has_default_value=False, - default_value=0, - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - _descriptor.FieldDescriptor( - name="cluster_radius", - full_name="google.cloud.bigquery.v2.Model.TrainingRun.IterationResult.ClusterInfo.cluster_radius", - index=1, - number=2, - type=11, - cpp_type=10, - label=1, - has_default_value=False, - default_value=None, - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - _descriptor.FieldDescriptor( - name="cluster_size", - full_name="google.cloud.bigquery.v2.Model.TrainingRun.IterationResult.ClusterInfo.cluster_size", - index=2, - number=3, - type=11, - cpp_type=10, - label=1, - has_default_value=False, - default_value=None, - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - ], - extensions=[], - nested_types=[], - enum_types=[], - serialized_options=None, - is_extendable=False, - syntax="proto3", - extension_ranges=[], - oneofs=[], - serialized_start=6444, - serialized_end=6583, -) - -_MODEL_TRAININGRUN_ITERATIONRESULT = _descriptor.Descriptor( - name="IterationResult", - full_name="google.cloud.bigquery.v2.Model.TrainingRun.IterationResult", - filename=None, - file=DESCRIPTOR, - containing_type=None, - create_key=_descriptor._internal_create_key, - fields=[ - _descriptor.FieldDescriptor( - name="index", - full_name="google.cloud.bigquery.v2.Model.TrainingRun.IterationResult.index", - index=0, - number=1, - type=11, - cpp_type=10, - label=1, - has_default_value=False, - default_value=None, - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - _descriptor.FieldDescriptor( - name="duration_ms", - full_name="google.cloud.bigquery.v2.Model.TrainingRun.IterationResult.duration_ms", - index=1, - number=4, - type=11, - cpp_type=10, - label=1, - has_default_value=False, - default_value=None, - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - _descriptor.FieldDescriptor( - name="training_loss", - full_name="google.cloud.bigquery.v2.Model.TrainingRun.IterationResult.training_loss", - index=2, - number=5, - type=11, - cpp_type=10, - label=1, - has_default_value=False, - default_value=None, - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - _descriptor.FieldDescriptor( - name="eval_loss", - full_name="google.cloud.bigquery.v2.Model.TrainingRun.IterationResult.eval_loss", - index=3, - number=6, - type=11, - cpp_type=10, - label=1, - has_default_value=False, - default_value=None, - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - _descriptor.FieldDescriptor( - name="learn_rate", - full_name="google.cloud.bigquery.v2.Model.TrainingRun.IterationResult.learn_rate", - index=4, - number=7, - type=1, - cpp_type=5, - label=1, - has_default_value=False, - default_value=float(0), - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - _descriptor.FieldDescriptor( - name="cluster_infos", - full_name="google.cloud.bigquery.v2.Model.TrainingRun.IterationResult.cluster_infos", - index=5, - number=8, - type=11, - cpp_type=10, - label=3, - has_default_value=False, - default_value=[], - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - ], - extensions=[], - nested_types=[_MODEL_TRAININGRUN_ITERATIONRESULT_CLUSTERINFO,], - enum_types=[], - serialized_options=None, - is_extendable=False, - syntax="proto3", - extension_ranges=[], - oneofs=[], - serialized_start=6112, - serialized_end=6583, -) - -_MODEL_TRAININGRUN = _descriptor.Descriptor( - name="TrainingRun", - full_name="google.cloud.bigquery.v2.Model.TrainingRun", - filename=None, - file=DESCRIPTOR, - containing_type=None, - create_key=_descriptor._internal_create_key, - fields=[ - _descriptor.FieldDescriptor( - name="training_options", - full_name="google.cloud.bigquery.v2.Model.TrainingRun.training_options", - index=0, - number=1, - type=11, - cpp_type=10, - label=1, - has_default_value=False, - default_value=None, - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - _descriptor.FieldDescriptor( - name="start_time", - full_name="google.cloud.bigquery.v2.Model.TrainingRun.start_time", - index=1, - number=8, - type=11, - cpp_type=10, - label=1, - has_default_value=False, - default_value=None, - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - _descriptor.FieldDescriptor( - name="results", - full_name="google.cloud.bigquery.v2.Model.TrainingRun.results", - index=2, - number=6, - type=11, - cpp_type=10, - label=3, - has_default_value=False, - default_value=[], - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - _descriptor.FieldDescriptor( - name="evaluation_metrics", - full_name="google.cloud.bigquery.v2.Model.TrainingRun.evaluation_metrics", - index=3, - number=7, - type=11, - cpp_type=10, - label=1, - has_default_value=False, - default_value=None, - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - ], - extensions=[], - nested_types=[ - _MODEL_TRAININGRUN_TRAININGOPTIONS, - _MODEL_TRAININGRUN_ITERATIONRESULT, - ], - enum_types=[], - serialized_options=None, - is_extendable=False, - syntax="proto3", - extension_ranges=[], - oneofs=[], - serialized_start=4620, - serialized_end=6583, -) - -_MODEL_LABELSENTRY = _descriptor.Descriptor( - name="LabelsEntry", - full_name="google.cloud.bigquery.v2.Model.LabelsEntry", - filename=None, - file=DESCRIPTOR, - containing_type=None, - create_key=_descriptor._internal_create_key, - fields=[ - _descriptor.FieldDescriptor( - name="key", - full_name="google.cloud.bigquery.v2.Model.LabelsEntry.key", - index=0, - number=1, - type=9, - cpp_type=9, - label=1, - has_default_value=False, - default_value=b"".decode("utf-8"), - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - _descriptor.FieldDescriptor( - name="value", - full_name="google.cloud.bigquery.v2.Model.LabelsEntry.value", - index=1, - number=2, - type=9, - cpp_type=9, - label=1, - has_default_value=False, - default_value=b"".decode("utf-8"), - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - ], - extensions=[], - nested_types=[], - enum_types=[], - serialized_options=b"8\001", - is_extendable=False, - syntax="proto3", - extension_ranges=[], - oneofs=[], - serialized_start=6585, - serialized_end=6630, -) - -_MODEL = _descriptor.Descriptor( - name="Model", - full_name="google.cloud.bigquery.v2.Model", - filename=None, - file=DESCRIPTOR, - containing_type=None, - create_key=_descriptor._internal_create_key, - fields=[ - _descriptor.FieldDescriptor( - name="etag", - full_name="google.cloud.bigquery.v2.Model.etag", - index=0, - number=1, - type=9, - cpp_type=9, - label=1, - has_default_value=False, - default_value=b"".decode("utf-8"), - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=b"\340A\003", - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - _descriptor.FieldDescriptor( - name="model_reference", - full_name="google.cloud.bigquery.v2.Model.model_reference", - index=1, - number=2, - type=11, - cpp_type=10, - label=1, - has_default_value=False, - default_value=None, - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=b"\340A\002", - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - _descriptor.FieldDescriptor( - name="creation_time", - full_name="google.cloud.bigquery.v2.Model.creation_time", - index=2, - number=5, - type=3, - cpp_type=2, - label=1, - has_default_value=False, - default_value=0, - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=b"\340A\003", - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - _descriptor.FieldDescriptor( - name="last_modified_time", - full_name="google.cloud.bigquery.v2.Model.last_modified_time", - index=3, - number=6, - type=3, - cpp_type=2, - label=1, - has_default_value=False, - default_value=0, - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=b"\340A\003", - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - _descriptor.FieldDescriptor( - name="description", - full_name="google.cloud.bigquery.v2.Model.description", - index=4, - number=12, - type=9, - cpp_type=9, - label=1, - has_default_value=False, - default_value=b"".decode("utf-8"), - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=b"\340A\001", - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - _descriptor.FieldDescriptor( - name="friendly_name", - full_name="google.cloud.bigquery.v2.Model.friendly_name", - index=5, - number=14, - type=9, - cpp_type=9, - label=1, - has_default_value=False, - default_value=b"".decode("utf-8"), - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=b"\340A\001", - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - _descriptor.FieldDescriptor( - name="labels", - full_name="google.cloud.bigquery.v2.Model.labels", - index=6, - number=15, - type=11, - cpp_type=10, - label=3, - has_default_value=False, - default_value=[], - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - _descriptor.FieldDescriptor( - name="expiration_time", - full_name="google.cloud.bigquery.v2.Model.expiration_time", - index=7, - number=16, - type=3, - cpp_type=2, - label=1, - has_default_value=False, - default_value=0, - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=b"\340A\001", - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - _descriptor.FieldDescriptor( - name="location", - full_name="google.cloud.bigquery.v2.Model.location", - index=8, - number=13, - type=9, - cpp_type=9, - label=1, - has_default_value=False, - default_value=b"".decode("utf-8"), - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=b"\340A\003", - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - _descriptor.FieldDescriptor( - name="encryption_configuration", - full_name="google.cloud.bigquery.v2.Model.encryption_configuration", - index=9, - number=17, - type=11, - cpp_type=10, - label=1, - has_default_value=False, - default_value=None, - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - _descriptor.FieldDescriptor( - name="model_type", - full_name="google.cloud.bigquery.v2.Model.model_type", - index=10, - number=7, - type=14, - cpp_type=8, - label=1, - has_default_value=False, - default_value=0, - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=b"\340A\003", - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - _descriptor.FieldDescriptor( - name="training_runs", - full_name="google.cloud.bigquery.v2.Model.training_runs", - index=11, - number=9, - type=11, - cpp_type=10, - label=3, - has_default_value=False, - default_value=[], - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=b"\340A\003", - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - _descriptor.FieldDescriptor( - name="feature_columns", - full_name="google.cloud.bigquery.v2.Model.feature_columns", - index=12, - number=10, - type=11, - cpp_type=10, - label=3, - has_default_value=False, - default_value=[], - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=b"\340A\003", - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - _descriptor.FieldDescriptor( - name="label_columns", - full_name="google.cloud.bigquery.v2.Model.label_columns", - index=13, - number=11, - type=11, - cpp_type=10, - label=3, - has_default_value=False, - default_value=[], - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=b"\340A\003", - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - ], - extensions=[], - nested_types=[ - _MODEL_KMEANSENUMS, - _MODEL_REGRESSIONMETRICS, - _MODEL_AGGREGATECLASSIFICATIONMETRICS, - _MODEL_BINARYCLASSIFICATIONMETRICS, - _MODEL_MULTICLASSCLASSIFICATIONMETRICS, - _MODEL_CLUSTERINGMETRICS, - _MODEL_EVALUATIONMETRICS, - _MODEL_TRAININGRUN, - _MODEL_LABELSENTRY, - ], - enum_types=[ - _MODEL_MODELTYPE, - _MODEL_LOSSTYPE, - _MODEL_DISTANCETYPE, - _MODEL_DATASPLITMETHOD, - _MODEL_LEARNRATESTRATEGY, - _MODEL_OPTIMIZATIONSTRATEGY, - ], - serialized_options=None, - is_extendable=False, - syntax="proto3", - extension_ranges=[], - oneofs=[], - serialized_start=416, - serialized_end=7227, -) - - -_GETMODELREQUEST = _descriptor.Descriptor( - name="GetModelRequest", - full_name="google.cloud.bigquery.v2.GetModelRequest", - filename=None, - file=DESCRIPTOR, - containing_type=None, - create_key=_descriptor._internal_create_key, - fields=[ - _descriptor.FieldDescriptor( - name="project_id", - full_name="google.cloud.bigquery.v2.GetModelRequest.project_id", - index=0, - number=1, - type=9, - cpp_type=9, - label=1, - has_default_value=False, - default_value=b"".decode("utf-8"), - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=b"\340A\002", - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - _descriptor.FieldDescriptor( - name="dataset_id", - full_name="google.cloud.bigquery.v2.GetModelRequest.dataset_id", - index=1, - number=2, - type=9, - cpp_type=9, - label=1, - has_default_value=False, - default_value=b"".decode("utf-8"), - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=b"\340A\002", - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - _descriptor.FieldDescriptor( - name="model_id", - full_name="google.cloud.bigquery.v2.GetModelRequest.model_id", - index=2, - number=3, - type=9, - cpp_type=9, - label=1, - has_default_value=False, - default_value=b"".decode("utf-8"), - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=b"\340A\002", - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - ], - extensions=[], - nested_types=[], - enum_types=[], - serialized_options=None, - is_extendable=False, - syntax="proto3", - extension_ranges=[], - oneofs=[], - serialized_start=7229, - serialized_end=7319, -) - - -_PATCHMODELREQUEST = _descriptor.Descriptor( - name="PatchModelRequest", - full_name="google.cloud.bigquery.v2.PatchModelRequest", - filename=None, - file=DESCRIPTOR, - containing_type=None, - create_key=_descriptor._internal_create_key, - fields=[ - _descriptor.FieldDescriptor( - name="project_id", - full_name="google.cloud.bigquery.v2.PatchModelRequest.project_id", - index=0, - number=1, - type=9, - cpp_type=9, - label=1, - has_default_value=False, - default_value=b"".decode("utf-8"), - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=b"\340A\002", - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - _descriptor.FieldDescriptor( - name="dataset_id", - full_name="google.cloud.bigquery.v2.PatchModelRequest.dataset_id", - index=1, - number=2, - type=9, - cpp_type=9, - label=1, - has_default_value=False, - default_value=b"".decode("utf-8"), - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=b"\340A\002", - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - _descriptor.FieldDescriptor( - name="model_id", - full_name="google.cloud.bigquery.v2.PatchModelRequest.model_id", - index=2, - number=3, - type=9, - cpp_type=9, - label=1, - has_default_value=False, - default_value=b"".decode("utf-8"), - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=b"\340A\002", - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - _descriptor.FieldDescriptor( - name="model", - full_name="google.cloud.bigquery.v2.PatchModelRequest.model", - index=3, - number=4, - type=11, - cpp_type=10, - label=1, - has_default_value=False, - default_value=None, - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=b"\340A\002", - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - ], - extensions=[], - nested_types=[], - enum_types=[], - serialized_options=None, - is_extendable=False, - syntax="proto3", - extension_ranges=[], - oneofs=[], - serialized_start=7322, - serialized_end=7467, -) - - -_DELETEMODELREQUEST = _descriptor.Descriptor( - name="DeleteModelRequest", - full_name="google.cloud.bigquery.v2.DeleteModelRequest", - filename=None, - file=DESCRIPTOR, - containing_type=None, - create_key=_descriptor._internal_create_key, - fields=[ - _descriptor.FieldDescriptor( - name="project_id", - full_name="google.cloud.bigquery.v2.DeleteModelRequest.project_id", - index=0, - number=1, - type=9, - cpp_type=9, - label=1, - has_default_value=False, - default_value=b"".decode("utf-8"), - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=b"\340A\002", - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - _descriptor.FieldDescriptor( - name="dataset_id", - full_name="google.cloud.bigquery.v2.DeleteModelRequest.dataset_id", - index=1, - number=2, - type=9, - cpp_type=9, - label=1, - has_default_value=False, - default_value=b"".decode("utf-8"), - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=b"\340A\002", - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - _descriptor.FieldDescriptor( - name="model_id", - full_name="google.cloud.bigquery.v2.DeleteModelRequest.model_id", - index=2, - number=3, - type=9, - cpp_type=9, - label=1, - has_default_value=False, - default_value=b"".decode("utf-8"), - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=b"\340A\002", - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - ], - extensions=[], - nested_types=[], - enum_types=[], - serialized_options=None, - is_extendable=False, - syntax="proto3", - extension_ranges=[], - oneofs=[], - serialized_start=7469, - serialized_end=7562, -) - - -_LISTMODELSREQUEST = _descriptor.Descriptor( - name="ListModelsRequest", - full_name="google.cloud.bigquery.v2.ListModelsRequest", - filename=None, - file=DESCRIPTOR, - containing_type=None, - create_key=_descriptor._internal_create_key, - fields=[ - _descriptor.FieldDescriptor( - name="project_id", - full_name="google.cloud.bigquery.v2.ListModelsRequest.project_id", - index=0, - number=1, - type=9, - cpp_type=9, - label=1, - has_default_value=False, - default_value=b"".decode("utf-8"), - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=b"\340A\002", - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - _descriptor.FieldDescriptor( - name="dataset_id", - full_name="google.cloud.bigquery.v2.ListModelsRequest.dataset_id", - index=1, - number=2, - type=9, - cpp_type=9, - label=1, - has_default_value=False, - default_value=b"".decode("utf-8"), - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=b"\340A\002", - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - _descriptor.FieldDescriptor( - name="max_results", - full_name="google.cloud.bigquery.v2.ListModelsRequest.max_results", - index=2, - number=3, - type=11, - cpp_type=10, - label=1, - has_default_value=False, - default_value=None, - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - _descriptor.FieldDescriptor( - name="page_token", - full_name="google.cloud.bigquery.v2.ListModelsRequest.page_token", - index=3, - number=4, - type=9, - cpp_type=9, - label=1, - has_default_value=False, - default_value=b"".decode("utf-8"), - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - ], - extensions=[], - nested_types=[], - enum_types=[], - serialized_options=None, - is_extendable=False, - syntax="proto3", - extension_ranges=[], - oneofs=[], - serialized_start=7565, - serialized_end=7705, -) - - -_LISTMODELSRESPONSE = _descriptor.Descriptor( - name="ListModelsResponse", - full_name="google.cloud.bigquery.v2.ListModelsResponse", - filename=None, - file=DESCRIPTOR, - containing_type=None, - create_key=_descriptor._internal_create_key, - fields=[ - _descriptor.FieldDescriptor( - name="models", - full_name="google.cloud.bigquery.v2.ListModelsResponse.models", - index=0, - number=1, - type=11, - cpp_type=10, - label=3, - has_default_value=False, - default_value=[], - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - _descriptor.FieldDescriptor( - name="next_page_token", - full_name="google.cloud.bigquery.v2.ListModelsResponse.next_page_token", - index=1, - number=2, - type=9, - cpp_type=9, - label=1, - has_default_value=False, - default_value=b"".decode("utf-8"), - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - ], - extensions=[], - nested_types=[], - enum_types=[], - serialized_options=None, - is_extendable=False, - syntax="proto3", - extension_ranges=[], - oneofs=[], - serialized_start=7707, - serialized_end=7801, -) - -_MODEL_KMEANSENUMS.containing_type = _MODEL -_MODEL_KMEANSENUMS_KMEANSINITIALIZATIONMETHOD.containing_type = _MODEL_KMEANSENUMS -_MODEL_REGRESSIONMETRICS.fields_by_name[ - "mean_absolute_error" -].message_type = google_dot_protobuf_dot_wrappers__pb2._DOUBLEVALUE -_MODEL_REGRESSIONMETRICS.fields_by_name[ - "mean_squared_error" -].message_type = google_dot_protobuf_dot_wrappers__pb2._DOUBLEVALUE -_MODEL_REGRESSIONMETRICS.fields_by_name[ - "mean_squared_log_error" -].message_type = google_dot_protobuf_dot_wrappers__pb2._DOUBLEVALUE -_MODEL_REGRESSIONMETRICS.fields_by_name[ - "median_absolute_error" -].message_type = google_dot_protobuf_dot_wrappers__pb2._DOUBLEVALUE -_MODEL_REGRESSIONMETRICS.fields_by_name[ - "r_squared" -].message_type = google_dot_protobuf_dot_wrappers__pb2._DOUBLEVALUE -_MODEL_REGRESSIONMETRICS.containing_type = _MODEL -_MODEL_AGGREGATECLASSIFICATIONMETRICS.fields_by_name[ - "precision" -].message_type = google_dot_protobuf_dot_wrappers__pb2._DOUBLEVALUE -_MODEL_AGGREGATECLASSIFICATIONMETRICS.fields_by_name[ - "recall" -].message_type = google_dot_protobuf_dot_wrappers__pb2._DOUBLEVALUE -_MODEL_AGGREGATECLASSIFICATIONMETRICS.fields_by_name[ - "accuracy" -].message_type = google_dot_protobuf_dot_wrappers__pb2._DOUBLEVALUE -_MODEL_AGGREGATECLASSIFICATIONMETRICS.fields_by_name[ - "threshold" -].message_type = google_dot_protobuf_dot_wrappers__pb2._DOUBLEVALUE -_MODEL_AGGREGATECLASSIFICATIONMETRICS.fields_by_name[ - "f1_score" -].message_type = google_dot_protobuf_dot_wrappers__pb2._DOUBLEVALUE -_MODEL_AGGREGATECLASSIFICATIONMETRICS.fields_by_name[ - "log_loss" -].message_type = google_dot_protobuf_dot_wrappers__pb2._DOUBLEVALUE -_MODEL_AGGREGATECLASSIFICATIONMETRICS.fields_by_name[ - "roc_auc" -].message_type = google_dot_protobuf_dot_wrappers__pb2._DOUBLEVALUE -_MODEL_AGGREGATECLASSIFICATIONMETRICS.containing_type = _MODEL -_MODEL_BINARYCLASSIFICATIONMETRICS_BINARYCONFUSIONMATRIX.fields_by_name[ - "positive_class_threshold" -].message_type = google_dot_protobuf_dot_wrappers__pb2._DOUBLEVALUE -_MODEL_BINARYCLASSIFICATIONMETRICS_BINARYCONFUSIONMATRIX.fields_by_name[ - "true_positives" -].message_type = google_dot_protobuf_dot_wrappers__pb2._INT64VALUE -_MODEL_BINARYCLASSIFICATIONMETRICS_BINARYCONFUSIONMATRIX.fields_by_name[ - "false_positives" -].message_type = google_dot_protobuf_dot_wrappers__pb2._INT64VALUE -_MODEL_BINARYCLASSIFICATIONMETRICS_BINARYCONFUSIONMATRIX.fields_by_name[ - "true_negatives" -].message_type = google_dot_protobuf_dot_wrappers__pb2._INT64VALUE -_MODEL_BINARYCLASSIFICATIONMETRICS_BINARYCONFUSIONMATRIX.fields_by_name[ - "false_negatives" -].message_type = google_dot_protobuf_dot_wrappers__pb2._INT64VALUE -_MODEL_BINARYCLASSIFICATIONMETRICS_BINARYCONFUSIONMATRIX.fields_by_name[ - "precision" -].message_type = google_dot_protobuf_dot_wrappers__pb2._DOUBLEVALUE -_MODEL_BINARYCLASSIFICATIONMETRICS_BINARYCONFUSIONMATRIX.fields_by_name[ - "recall" -].message_type = google_dot_protobuf_dot_wrappers__pb2._DOUBLEVALUE -_MODEL_BINARYCLASSIFICATIONMETRICS_BINARYCONFUSIONMATRIX.fields_by_name[ - "f1_score" -].message_type = google_dot_protobuf_dot_wrappers__pb2._DOUBLEVALUE -_MODEL_BINARYCLASSIFICATIONMETRICS_BINARYCONFUSIONMATRIX.fields_by_name[ - "accuracy" -].message_type = google_dot_protobuf_dot_wrappers__pb2._DOUBLEVALUE -_MODEL_BINARYCLASSIFICATIONMETRICS_BINARYCONFUSIONMATRIX.containing_type = ( - _MODEL_BINARYCLASSIFICATIONMETRICS -) -_MODEL_BINARYCLASSIFICATIONMETRICS.fields_by_name[ - "aggregate_classification_metrics" -].message_type = _MODEL_AGGREGATECLASSIFICATIONMETRICS -_MODEL_BINARYCLASSIFICATIONMETRICS.fields_by_name[ - "binary_confusion_matrix_list" -].message_type = _MODEL_BINARYCLASSIFICATIONMETRICS_BINARYCONFUSIONMATRIX -_MODEL_BINARYCLASSIFICATIONMETRICS.containing_type = _MODEL -_MODEL_MULTICLASSCLASSIFICATIONMETRICS_CONFUSIONMATRIX_ENTRY.fields_by_name[ - "item_count" -].message_type = google_dot_protobuf_dot_wrappers__pb2._INT64VALUE -_MODEL_MULTICLASSCLASSIFICATIONMETRICS_CONFUSIONMATRIX_ENTRY.containing_type = ( - _MODEL_MULTICLASSCLASSIFICATIONMETRICS_CONFUSIONMATRIX -) -_MODEL_MULTICLASSCLASSIFICATIONMETRICS_CONFUSIONMATRIX_ROW.fields_by_name[ - "entries" -].message_type = _MODEL_MULTICLASSCLASSIFICATIONMETRICS_CONFUSIONMATRIX_ENTRY -_MODEL_MULTICLASSCLASSIFICATIONMETRICS_CONFUSIONMATRIX_ROW.containing_type = ( - _MODEL_MULTICLASSCLASSIFICATIONMETRICS_CONFUSIONMATRIX -) -_MODEL_MULTICLASSCLASSIFICATIONMETRICS_CONFUSIONMATRIX.fields_by_name[ - "confidence_threshold" -].message_type = google_dot_protobuf_dot_wrappers__pb2._DOUBLEVALUE -_MODEL_MULTICLASSCLASSIFICATIONMETRICS_CONFUSIONMATRIX.fields_by_name[ - "rows" -].message_type = _MODEL_MULTICLASSCLASSIFICATIONMETRICS_CONFUSIONMATRIX_ROW -_MODEL_MULTICLASSCLASSIFICATIONMETRICS_CONFUSIONMATRIX.containing_type = ( - _MODEL_MULTICLASSCLASSIFICATIONMETRICS -) -_MODEL_MULTICLASSCLASSIFICATIONMETRICS.fields_by_name[ - "aggregate_classification_metrics" -].message_type = _MODEL_AGGREGATECLASSIFICATIONMETRICS -_MODEL_MULTICLASSCLASSIFICATIONMETRICS.fields_by_name[ - "confusion_matrix_list" -].message_type = _MODEL_MULTICLASSCLASSIFICATIONMETRICS_CONFUSIONMATRIX -_MODEL_MULTICLASSCLASSIFICATIONMETRICS.containing_type = _MODEL -_MODEL_CLUSTERINGMETRICS_CLUSTER_FEATUREVALUE_CATEGORICALVALUE_CATEGORYCOUNT.fields_by_name[ - "count" -].message_type = google_dot_protobuf_dot_wrappers__pb2._INT64VALUE -_MODEL_CLUSTERINGMETRICS_CLUSTER_FEATUREVALUE_CATEGORICALVALUE_CATEGORYCOUNT.containing_type = ( - _MODEL_CLUSTERINGMETRICS_CLUSTER_FEATUREVALUE_CATEGORICALVALUE -) -_MODEL_CLUSTERINGMETRICS_CLUSTER_FEATUREVALUE_CATEGORICALVALUE.fields_by_name[ - "category_counts" -].message_type = ( - _MODEL_CLUSTERINGMETRICS_CLUSTER_FEATUREVALUE_CATEGORICALVALUE_CATEGORYCOUNT -) -_MODEL_CLUSTERINGMETRICS_CLUSTER_FEATUREVALUE_CATEGORICALVALUE.containing_type = ( - _MODEL_CLUSTERINGMETRICS_CLUSTER_FEATUREVALUE -) -_MODEL_CLUSTERINGMETRICS_CLUSTER_FEATUREVALUE.fields_by_name[ - "numerical_value" -].message_type = google_dot_protobuf_dot_wrappers__pb2._DOUBLEVALUE -_MODEL_CLUSTERINGMETRICS_CLUSTER_FEATUREVALUE.fields_by_name[ - "categorical_value" -].message_type = _MODEL_CLUSTERINGMETRICS_CLUSTER_FEATUREVALUE_CATEGORICALVALUE -_MODEL_CLUSTERINGMETRICS_CLUSTER_FEATUREVALUE.containing_type = ( - _MODEL_CLUSTERINGMETRICS_CLUSTER -) -_MODEL_CLUSTERINGMETRICS_CLUSTER_FEATUREVALUE.oneofs_by_name["value"].fields.append( - _MODEL_CLUSTERINGMETRICS_CLUSTER_FEATUREVALUE.fields_by_name["numerical_value"] -) -_MODEL_CLUSTERINGMETRICS_CLUSTER_FEATUREVALUE.fields_by_name[ - "numerical_value" -].containing_oneof = _MODEL_CLUSTERINGMETRICS_CLUSTER_FEATUREVALUE.oneofs_by_name[ - "value" -] -_MODEL_CLUSTERINGMETRICS_CLUSTER_FEATUREVALUE.oneofs_by_name["value"].fields.append( - _MODEL_CLUSTERINGMETRICS_CLUSTER_FEATUREVALUE.fields_by_name["categorical_value"] -) -_MODEL_CLUSTERINGMETRICS_CLUSTER_FEATUREVALUE.fields_by_name[ - "categorical_value" -].containing_oneof = _MODEL_CLUSTERINGMETRICS_CLUSTER_FEATUREVALUE.oneofs_by_name[ - "value" -] -_MODEL_CLUSTERINGMETRICS_CLUSTER.fields_by_name[ - "feature_values" -].message_type = _MODEL_CLUSTERINGMETRICS_CLUSTER_FEATUREVALUE -_MODEL_CLUSTERINGMETRICS_CLUSTER.fields_by_name[ - "count" -].message_type = google_dot_protobuf_dot_wrappers__pb2._INT64VALUE -_MODEL_CLUSTERINGMETRICS_CLUSTER.containing_type = _MODEL_CLUSTERINGMETRICS -_MODEL_CLUSTERINGMETRICS.fields_by_name[ - "davies_bouldin_index" -].message_type = google_dot_protobuf_dot_wrappers__pb2._DOUBLEVALUE -_MODEL_CLUSTERINGMETRICS.fields_by_name[ - "mean_squared_distance" -].message_type = google_dot_protobuf_dot_wrappers__pb2._DOUBLEVALUE -_MODEL_CLUSTERINGMETRICS.fields_by_name[ - "clusters" -].message_type = _MODEL_CLUSTERINGMETRICS_CLUSTER -_MODEL_CLUSTERINGMETRICS.containing_type = _MODEL -_MODEL_EVALUATIONMETRICS.fields_by_name[ - "regression_metrics" -].message_type = _MODEL_REGRESSIONMETRICS -_MODEL_EVALUATIONMETRICS.fields_by_name[ - "binary_classification_metrics" -].message_type = _MODEL_BINARYCLASSIFICATIONMETRICS -_MODEL_EVALUATIONMETRICS.fields_by_name[ - "multi_class_classification_metrics" -].message_type = _MODEL_MULTICLASSCLASSIFICATIONMETRICS -_MODEL_EVALUATIONMETRICS.fields_by_name[ - "clustering_metrics" -].message_type = _MODEL_CLUSTERINGMETRICS -_MODEL_EVALUATIONMETRICS.containing_type = _MODEL -_MODEL_EVALUATIONMETRICS.oneofs_by_name["metrics"].fields.append( - _MODEL_EVALUATIONMETRICS.fields_by_name["regression_metrics"] -) -_MODEL_EVALUATIONMETRICS.fields_by_name[ - "regression_metrics" -].containing_oneof = _MODEL_EVALUATIONMETRICS.oneofs_by_name["metrics"] -_MODEL_EVALUATIONMETRICS.oneofs_by_name["metrics"].fields.append( - _MODEL_EVALUATIONMETRICS.fields_by_name["binary_classification_metrics"] -) -_MODEL_EVALUATIONMETRICS.fields_by_name[ - "binary_classification_metrics" -].containing_oneof = _MODEL_EVALUATIONMETRICS.oneofs_by_name["metrics"] -_MODEL_EVALUATIONMETRICS.oneofs_by_name["metrics"].fields.append( - _MODEL_EVALUATIONMETRICS.fields_by_name["multi_class_classification_metrics"] -) -_MODEL_EVALUATIONMETRICS.fields_by_name[ - "multi_class_classification_metrics" -].containing_oneof = _MODEL_EVALUATIONMETRICS.oneofs_by_name["metrics"] -_MODEL_EVALUATIONMETRICS.oneofs_by_name["metrics"].fields.append( - _MODEL_EVALUATIONMETRICS.fields_by_name["clustering_metrics"] -) -_MODEL_EVALUATIONMETRICS.fields_by_name[ - "clustering_metrics" -].containing_oneof = _MODEL_EVALUATIONMETRICS.oneofs_by_name["metrics"] -_MODEL_TRAININGRUN_TRAININGOPTIONS_LABELCLASSWEIGHTSENTRY.containing_type = ( - _MODEL_TRAININGRUN_TRAININGOPTIONS -) -_MODEL_TRAININGRUN_TRAININGOPTIONS.fields_by_name[ - "loss_type" -].enum_type = _MODEL_LOSSTYPE -_MODEL_TRAININGRUN_TRAININGOPTIONS.fields_by_name[ - "l1_regularization" -].message_type = google_dot_protobuf_dot_wrappers__pb2._DOUBLEVALUE -_MODEL_TRAININGRUN_TRAININGOPTIONS.fields_by_name[ - "l2_regularization" -].message_type = google_dot_protobuf_dot_wrappers__pb2._DOUBLEVALUE -_MODEL_TRAININGRUN_TRAININGOPTIONS.fields_by_name[ - "min_relative_progress" -].message_type = google_dot_protobuf_dot_wrappers__pb2._DOUBLEVALUE -_MODEL_TRAININGRUN_TRAININGOPTIONS.fields_by_name[ - "warm_start" -].message_type = google_dot_protobuf_dot_wrappers__pb2._BOOLVALUE -_MODEL_TRAININGRUN_TRAININGOPTIONS.fields_by_name[ - "early_stop" -].message_type = google_dot_protobuf_dot_wrappers__pb2._BOOLVALUE -_MODEL_TRAININGRUN_TRAININGOPTIONS.fields_by_name[ - "data_split_method" -].enum_type = _MODEL_DATASPLITMETHOD -_MODEL_TRAININGRUN_TRAININGOPTIONS.fields_by_name[ - "learn_rate_strategy" -].enum_type = _MODEL_LEARNRATESTRATEGY -_MODEL_TRAININGRUN_TRAININGOPTIONS.fields_by_name[ - "label_class_weights" -].message_type = _MODEL_TRAININGRUN_TRAININGOPTIONS_LABELCLASSWEIGHTSENTRY -_MODEL_TRAININGRUN_TRAININGOPTIONS.fields_by_name[ - "distance_type" -].enum_type = _MODEL_DISTANCETYPE -_MODEL_TRAININGRUN_TRAININGOPTIONS.fields_by_name[ - "optimization_strategy" -].enum_type = _MODEL_OPTIMIZATIONSTRATEGY -_MODEL_TRAININGRUN_TRAININGOPTIONS.fields_by_name[ - "kmeans_initialization_method" -].enum_type = _MODEL_KMEANSENUMS_KMEANSINITIALIZATIONMETHOD -_MODEL_TRAININGRUN_TRAININGOPTIONS.containing_type = _MODEL_TRAININGRUN -_MODEL_TRAININGRUN_ITERATIONRESULT_CLUSTERINFO.fields_by_name[ - "cluster_radius" -].message_type = google_dot_protobuf_dot_wrappers__pb2._DOUBLEVALUE -_MODEL_TRAININGRUN_ITERATIONRESULT_CLUSTERINFO.fields_by_name[ - "cluster_size" -].message_type = google_dot_protobuf_dot_wrappers__pb2._INT64VALUE -_MODEL_TRAININGRUN_ITERATIONRESULT_CLUSTERINFO.containing_type = ( - _MODEL_TRAININGRUN_ITERATIONRESULT -) -_MODEL_TRAININGRUN_ITERATIONRESULT.fields_by_name[ - "index" -].message_type = google_dot_protobuf_dot_wrappers__pb2._INT32VALUE -_MODEL_TRAININGRUN_ITERATIONRESULT.fields_by_name[ - "duration_ms" -].message_type = google_dot_protobuf_dot_wrappers__pb2._INT64VALUE -_MODEL_TRAININGRUN_ITERATIONRESULT.fields_by_name[ - "training_loss" -].message_type = google_dot_protobuf_dot_wrappers__pb2._DOUBLEVALUE -_MODEL_TRAININGRUN_ITERATIONRESULT.fields_by_name[ - "eval_loss" -].message_type = google_dot_protobuf_dot_wrappers__pb2._DOUBLEVALUE -_MODEL_TRAININGRUN_ITERATIONRESULT.fields_by_name[ - "cluster_infos" -].message_type = _MODEL_TRAININGRUN_ITERATIONRESULT_CLUSTERINFO -_MODEL_TRAININGRUN_ITERATIONRESULT.containing_type = _MODEL_TRAININGRUN -_MODEL_TRAININGRUN.fields_by_name[ - "training_options" -].message_type = _MODEL_TRAININGRUN_TRAININGOPTIONS -_MODEL_TRAININGRUN.fields_by_name[ - "start_time" -].message_type = google_dot_protobuf_dot_timestamp__pb2._TIMESTAMP -_MODEL_TRAININGRUN.fields_by_name[ - "results" -].message_type = _MODEL_TRAININGRUN_ITERATIONRESULT -_MODEL_TRAININGRUN.fields_by_name[ - "evaluation_metrics" -].message_type = _MODEL_EVALUATIONMETRICS -_MODEL_TRAININGRUN.containing_type = _MODEL -_MODEL_LABELSENTRY.containing_type = _MODEL -_MODEL.fields_by_name[ - "model_reference" -].message_type = ( - google_dot_cloud_dot_bigquery__v2_dot_proto_dot_model__reference__pb2._MODELREFERENCE -) -_MODEL.fields_by_name["labels"].message_type = _MODEL_LABELSENTRY -_MODEL.fields_by_name[ - "encryption_configuration" -].message_type = ( - google_dot_cloud_dot_bigquery__v2_dot_proto_dot_encryption__config__pb2._ENCRYPTIONCONFIGURATION -) -_MODEL.fields_by_name["model_type"].enum_type = _MODEL_MODELTYPE -_MODEL.fields_by_name["training_runs"].message_type = _MODEL_TRAININGRUN -_MODEL.fields_by_name[ - "feature_columns" -].message_type = ( - google_dot_cloud_dot_bigquery__v2_dot_proto_dot_standard__sql__pb2._STANDARDSQLFIELD -) -_MODEL.fields_by_name[ - "label_columns" -].message_type = ( - google_dot_cloud_dot_bigquery__v2_dot_proto_dot_standard__sql__pb2._STANDARDSQLFIELD -) -_MODEL_MODELTYPE.containing_type = _MODEL -_MODEL_LOSSTYPE.containing_type = _MODEL -_MODEL_DISTANCETYPE.containing_type = _MODEL -_MODEL_DATASPLITMETHOD.containing_type = _MODEL -_MODEL_LEARNRATESTRATEGY.containing_type = _MODEL -_MODEL_OPTIMIZATIONSTRATEGY.containing_type = _MODEL -_PATCHMODELREQUEST.fields_by_name["model"].message_type = _MODEL -_LISTMODELSREQUEST.fields_by_name[ - "max_results" -].message_type = google_dot_protobuf_dot_wrappers__pb2._UINT32VALUE -_LISTMODELSRESPONSE.fields_by_name["models"].message_type = _MODEL -DESCRIPTOR.message_types_by_name["Model"] = _MODEL -DESCRIPTOR.message_types_by_name["GetModelRequest"] = _GETMODELREQUEST -DESCRIPTOR.message_types_by_name["PatchModelRequest"] = _PATCHMODELREQUEST -DESCRIPTOR.message_types_by_name["DeleteModelRequest"] = _DELETEMODELREQUEST -DESCRIPTOR.message_types_by_name["ListModelsRequest"] = _LISTMODELSREQUEST -DESCRIPTOR.message_types_by_name["ListModelsResponse"] = _LISTMODELSRESPONSE -_sym_db.RegisterFileDescriptor(DESCRIPTOR) - -Model = _reflection.GeneratedProtocolMessageType( - "Model", - (_message.Message,), - { - "KmeansEnums": _reflection.GeneratedProtocolMessageType( - "KmeansEnums", - (_message.Message,), - { - "DESCRIPTOR": _MODEL_KMEANSENUMS, - "__module__": "google.cloud.bigquery_v2.proto.model_pb2" - # @@protoc_insertion_point(class_scope:google.cloud.bigquery.v2.Model.KmeansEnums) - }, - ), - "RegressionMetrics": _reflection.GeneratedProtocolMessageType( - "RegressionMetrics", - (_message.Message,), - { - "DESCRIPTOR": _MODEL_REGRESSIONMETRICS, - "__module__": "google.cloud.bigquery_v2.proto.model_pb2", - "__doc__": """Evaluation metrics for regression and explicit feedback type matrix - factorization models. - - Attributes: - mean_absolute_error: - Mean absolute error. - mean_squared_error: - Mean squared error. - mean_squared_log_error: - Mean squared log error. - median_absolute_error: - Median absolute error. - r_squared: - R^2 score. - """, - # @@protoc_insertion_point(class_scope:google.cloud.bigquery.v2.Model.RegressionMetrics) - }, - ), - "AggregateClassificationMetrics": _reflection.GeneratedProtocolMessageType( - "AggregateClassificationMetrics", - (_message.Message,), - { - "DESCRIPTOR": _MODEL_AGGREGATECLASSIFICATIONMETRICS, - "__module__": "google.cloud.bigquery_v2.proto.model_pb2", - "__doc__": """Aggregate metrics for classification/classifier models. For multi- - class models, the metrics are either macro-averaged or micro-averaged. - When macro-averaged, the metrics are calculated for each label and - then an unweighted average is taken of those values. When micro- - averaged, the metric is calculated globally by counting the total - number of correctly predicted rows. - - Attributes: - precision: - Precision is the fraction of actual positive predictions that - had positive actual labels. For multiclass this is a macro- - averaged metric treating each class as a binary classifier. - recall: - Recall is the fraction of actual positive labels that were - given a positive prediction. For multiclass this is a macro- - averaged metric. - accuracy: - Accuracy is the fraction of predictions given the correct - label. For multiclass this is a micro-averaged metric. - threshold: - Threshold at which the metrics are computed. For binary - classification models this is the positive class threshold. - For multi-class classfication models this is the confidence - threshold. - f1_score: - The F1 score is an average of recall and precision. For - multiclass this is a macro-averaged metric. - log_loss: - Logarithmic Loss. For multiclass this is a macro-averaged - metric. - roc_auc: - Area Under a ROC Curve. For multiclass this is a macro- - averaged metric. - """, - # @@protoc_insertion_point(class_scope:google.cloud.bigquery.v2.Model.AggregateClassificationMetrics) - }, - ), - "BinaryClassificationMetrics": _reflection.GeneratedProtocolMessageType( - "BinaryClassificationMetrics", - (_message.Message,), - { - "BinaryConfusionMatrix": _reflection.GeneratedProtocolMessageType( - "BinaryConfusionMatrix", - (_message.Message,), - { - "DESCRIPTOR": _MODEL_BINARYCLASSIFICATIONMETRICS_BINARYCONFUSIONMATRIX, - "__module__": "google.cloud.bigquery_v2.proto.model_pb2", - "__doc__": """Confusion matrix for binary classification models. - - Attributes: - positive_class_threshold: - Threshold value used when computing each of the following - metric. - true_positives: - Number of true samples predicted as true. - false_positives: - Number of false samples predicted as true. - true_negatives: - Number of true samples predicted as false. - false_negatives: - Number of false samples predicted as false. - precision: - The fraction of actual positive predictions that had positive - actual labels. - recall: - The fraction of actual positive labels that were given a - positive prediction. - f1_score: - The equally weighted average of recall and precision. - accuracy: - The fraction of predictions given the correct label. - """, - # @@protoc_insertion_point(class_scope:google.cloud.bigquery.v2.Model.BinaryClassificationMetrics.BinaryConfusionMatrix) - }, - ), - "DESCRIPTOR": _MODEL_BINARYCLASSIFICATIONMETRICS, - "__module__": "google.cloud.bigquery_v2.proto.model_pb2", - "__doc__": """Evaluation metrics for binary classification/classifier models. - - Attributes: - aggregate_classification_metrics: - Aggregate classification metrics. - binary_confusion_matrix_list: - Binary confusion matrix at multiple thresholds. - positive_label: - Label representing the positive class. - negative_label: - Label representing the negative class. - """, - # @@protoc_insertion_point(class_scope:google.cloud.bigquery.v2.Model.BinaryClassificationMetrics) - }, - ), - "MultiClassClassificationMetrics": _reflection.GeneratedProtocolMessageType( - "MultiClassClassificationMetrics", - (_message.Message,), - { - "ConfusionMatrix": _reflection.GeneratedProtocolMessageType( - "ConfusionMatrix", - (_message.Message,), - { - "Entry": _reflection.GeneratedProtocolMessageType( - "Entry", - (_message.Message,), - { - "DESCRIPTOR": _MODEL_MULTICLASSCLASSIFICATIONMETRICS_CONFUSIONMATRIX_ENTRY, - "__module__": "google.cloud.bigquery_v2.proto.model_pb2", - "__doc__": """A single entry in the confusion matrix. - - Attributes: - predicted_label: - The predicted label. For confidence_threshold > 0, we will - also add an entry indicating the number of items under the - confidence threshold. - item_count: - Number of items being predicted as this label. - """, - # @@protoc_insertion_point(class_scope:google.cloud.bigquery.v2.Model.MultiClassClassificationMetrics.ConfusionMatrix.Entry) - }, - ), - "Row": _reflection.GeneratedProtocolMessageType( - "Row", - (_message.Message,), - { - "DESCRIPTOR": _MODEL_MULTICLASSCLASSIFICATIONMETRICS_CONFUSIONMATRIX_ROW, - "__module__": "google.cloud.bigquery_v2.proto.model_pb2", - "__doc__": """A single row in the confusion matrix. - - Attributes: - actual_label: - The original label of this row. - entries: - Info describing predicted label distribution. - """, - # @@protoc_insertion_point(class_scope:google.cloud.bigquery.v2.Model.MultiClassClassificationMetrics.ConfusionMatrix.Row) - }, - ), - "DESCRIPTOR": _MODEL_MULTICLASSCLASSIFICATIONMETRICS_CONFUSIONMATRIX, - "__module__": "google.cloud.bigquery_v2.proto.model_pb2", - "__doc__": """Confusion matrix for multi-class classification models. - - Attributes: - confidence_threshold: - Confidence threshold used when computing the entries of the - confusion matrix. - rows: - One row per actual label. - """, - # @@protoc_insertion_point(class_scope:google.cloud.bigquery.v2.Model.MultiClassClassificationMetrics.ConfusionMatrix) - }, - ), - "DESCRIPTOR": _MODEL_MULTICLASSCLASSIFICATIONMETRICS, - "__module__": "google.cloud.bigquery_v2.proto.model_pb2", - "__doc__": """Evaluation metrics for multi-class classification/classifier models. - - Attributes: - aggregate_classification_metrics: - Aggregate classification metrics. - confusion_matrix_list: - Confusion matrix at different thresholds. - """, - # @@protoc_insertion_point(class_scope:google.cloud.bigquery.v2.Model.MultiClassClassificationMetrics) - }, - ), - "ClusteringMetrics": _reflection.GeneratedProtocolMessageType( - "ClusteringMetrics", - (_message.Message,), - { - "Cluster": _reflection.GeneratedProtocolMessageType( - "Cluster", - (_message.Message,), - { - "FeatureValue": _reflection.GeneratedProtocolMessageType( - "FeatureValue", - (_message.Message,), - { - "CategoricalValue": _reflection.GeneratedProtocolMessageType( - "CategoricalValue", - (_message.Message,), - { - "CategoryCount": _reflection.GeneratedProtocolMessageType( - "CategoryCount", - (_message.Message,), - { - "DESCRIPTOR": _MODEL_CLUSTERINGMETRICS_CLUSTER_FEATUREVALUE_CATEGORICALVALUE_CATEGORYCOUNT, - "__module__": "google.cloud.bigquery_v2.proto.model_pb2", - "__doc__": """Represents the count of a single category within the cluster. - - Attributes: - category: - The name of category. - count: - The count of training samples matching the category within the - cluster. - """, - # @@protoc_insertion_point(class_scope:google.cloud.bigquery.v2.Model.ClusteringMetrics.Cluster.FeatureValue.CategoricalValue.CategoryCount) - }, - ), - "DESCRIPTOR": _MODEL_CLUSTERINGMETRICS_CLUSTER_FEATUREVALUE_CATEGORICALVALUE, - "__module__": "google.cloud.bigquery_v2.proto.model_pb2", - "__doc__": """Representative value of a categorical feature. - - Attributes: - category_counts: - Counts of all categories for the categorical feature. If there - are more than ten categories, we return top ten (by count) and - return one more CategoryCount with category ``*OTHER*`` and - count as aggregate counts of remaining categories. - """, - # @@protoc_insertion_point(class_scope:google.cloud.bigquery.v2.Model.ClusteringMetrics.Cluster.FeatureValue.CategoricalValue) - }, - ), - "DESCRIPTOR": _MODEL_CLUSTERINGMETRICS_CLUSTER_FEATUREVALUE, - "__module__": "google.cloud.bigquery_v2.proto.model_pb2", - "__doc__": """Representative value of a single feature within the cluster. - - Attributes: - feature_column: - The feature column name. - numerical_value: - The numerical feature value. This is the centroid value for - this feature. - categorical_value: - The categorical feature value. - """, - # @@protoc_insertion_point(class_scope:google.cloud.bigquery.v2.Model.ClusteringMetrics.Cluster.FeatureValue) - }, - ), - "DESCRIPTOR": _MODEL_CLUSTERINGMETRICS_CLUSTER, - "__module__": "google.cloud.bigquery_v2.proto.model_pb2", - "__doc__": """Message containing the information about one cluster. - - Attributes: - centroid_id: - Centroid id. - feature_values: - Values of highly variant features for this cluster. - count: - Count of training data rows that were assigned to this - cluster. - """, - # @@protoc_insertion_point(class_scope:google.cloud.bigquery.v2.Model.ClusteringMetrics.Cluster) - }, - ), - "DESCRIPTOR": _MODEL_CLUSTERINGMETRICS, - "__module__": "google.cloud.bigquery_v2.proto.model_pb2", - "__doc__": """Evaluation metrics for clustering models. - - Attributes: - davies_bouldin_index: - Davies-Bouldin index. - mean_squared_distance: - Mean of squared distances between each sample to its cluster - centroid. - clusters: - [Beta] Information for all clusters. - """, - # @@protoc_insertion_point(class_scope:google.cloud.bigquery.v2.Model.ClusteringMetrics) - }, - ), - "EvaluationMetrics": _reflection.GeneratedProtocolMessageType( - "EvaluationMetrics", - (_message.Message,), - { - "DESCRIPTOR": _MODEL_EVALUATIONMETRICS, - "__module__": "google.cloud.bigquery_v2.proto.model_pb2", - "__doc__": """Evaluation metrics of a model. These are either computed on all - training data or just the eval data based on whether eval data was - used during training. These are not present for imported models. - - Attributes: - regression_metrics: - Populated for regression models and explicit feedback type - matrix factorization models. - binary_classification_metrics: - Populated for binary classification/classifier models. - multi_class_classification_metrics: - Populated for multi-class classification/classifier models. - clustering_metrics: - Populated for clustering models. - """, - # @@protoc_insertion_point(class_scope:google.cloud.bigquery.v2.Model.EvaluationMetrics) - }, - ), - "TrainingRun": _reflection.GeneratedProtocolMessageType( - "TrainingRun", - (_message.Message,), - { - "TrainingOptions": _reflection.GeneratedProtocolMessageType( - "TrainingOptions", - (_message.Message,), - { - "LabelClassWeightsEntry": _reflection.GeneratedProtocolMessageType( - "LabelClassWeightsEntry", - (_message.Message,), - { - "DESCRIPTOR": _MODEL_TRAININGRUN_TRAININGOPTIONS_LABELCLASSWEIGHTSENTRY, - "__module__": "google.cloud.bigquery_v2.proto.model_pb2" - # @@protoc_insertion_point(class_scope:google.cloud.bigquery.v2.Model.TrainingRun.TrainingOptions.LabelClassWeightsEntry) - }, - ), - "DESCRIPTOR": _MODEL_TRAININGRUN_TRAININGOPTIONS, - "__module__": "google.cloud.bigquery_v2.proto.model_pb2", - "__doc__": """Protocol buffer. - - Attributes: - max_iterations: - The maximum number of iterations in training. Used only for - iterative training algorithms. - loss_type: - Type of loss function used during training run. - learn_rate: - Learning rate in training. Used only for iterative training - algorithms. - l1_regularization: - L1 regularization coefficient. - l2_regularization: - L2 regularization coefficient. - min_relative_progress: - When early_stop is true, stops training when accuracy - improvement is less than ‘min_relative_progress’. Used only - for iterative training algorithms. - warm_start: - Whether to train a model from the last checkpoint. - early_stop: - Whether to stop early when the loss doesn’t improve - significantly any more (compared to min_relative_progress). - Used only for iterative training algorithms. - input_label_columns: - Name of input label columns in training data. - data_split_method: - The data split type for training and evaluation, e.g. RANDOM. - data_split_eval_fraction: - The fraction of evaluation data over the whole input data. The - rest of data will be used as training data. The format should - be double. Accurate to two decimal places. Default value is - 0.2. - data_split_column: - The column to split data with. This column won’t be used as a - feature. 1. When data_split_method is CUSTOM, the - corresponding column should be boolean. The rows with true - value tag are eval data, and the false are training data. 2. - When data_split_method is SEQ, the first - DATA_SPLIT_EVAL_FRACTION rows (from smallest to largest) in - the corresponding column are used as training data, and the - rest are eval data. It respects the order in Orderable data - types: - https://cloud.google.com/bigquery/docs/reference/standard- - sql/data-types#data-type-properties - learn_rate_strategy: - The strategy to determine learn rate for the current - iteration. - initial_learn_rate: - Specifies the initial learning rate for the line search learn - rate strategy. - label_class_weights: - Weights associated with each label class, for rebalancing the - training data. Only applicable for classification models. - distance_type: - Distance type for clustering models. - num_clusters: - Number of clusters for clustering models. - model_uri: - [Beta] Google Cloud Storage URI from which the model was - imported. Only applicable for imported models. - optimization_strategy: - Optimization strategy for training linear regression models. - kmeans_initialization_method: - The method used to initialize the centroids for kmeans - algorithm. - kmeans_initialization_column: - The column used to provide the initial centroids for kmeans - algorithm when kmeans_initialization_method is CUSTOM. - """, - # @@protoc_insertion_point(class_scope:google.cloud.bigquery.v2.Model.TrainingRun.TrainingOptions) - }, - ), - "IterationResult": _reflection.GeneratedProtocolMessageType( - "IterationResult", - (_message.Message,), - { - "ClusterInfo": _reflection.GeneratedProtocolMessageType( - "ClusterInfo", - (_message.Message,), - { - "DESCRIPTOR": _MODEL_TRAININGRUN_ITERATIONRESULT_CLUSTERINFO, - "__module__": "google.cloud.bigquery_v2.proto.model_pb2", - "__doc__": """Information about a single cluster for clustering model. - - Attributes: - centroid_id: - Centroid id. - cluster_radius: - Cluster radius, the average distance from centroid to each - point assigned to the cluster. - cluster_size: - Cluster size, the total number of points assigned to the - cluster. - """, - # @@protoc_insertion_point(class_scope:google.cloud.bigquery.v2.Model.TrainingRun.IterationResult.ClusterInfo) - }, - ), - "DESCRIPTOR": _MODEL_TRAININGRUN_ITERATIONRESULT, - "__module__": "google.cloud.bigquery_v2.proto.model_pb2", - "__doc__": """Information about a single iteration of the training run. - - Attributes: - index: - Index of the iteration, 0 based. - duration_ms: - Time taken to run the iteration in milliseconds. - training_loss: - Loss computed on the training data at the end of iteration. - eval_loss: - Loss computed on the eval data at the end of iteration. - learn_rate: - Learn rate used for this iteration. - cluster_infos: - Information about top clusters for clustering models. - """, - # @@protoc_insertion_point(class_scope:google.cloud.bigquery.v2.Model.TrainingRun.IterationResult) - }, - ), - "DESCRIPTOR": _MODEL_TRAININGRUN, - "__module__": "google.cloud.bigquery_v2.proto.model_pb2", - "__doc__": """Information about a single training query run for the model. - - Attributes: - training_options: - Options that were used for this training run, includes user - specified and default options that were used. - start_time: - The start time of this training run. - results: - Output of each iteration run, results.size() <= - max_iterations. - evaluation_metrics: - The evaluation metrics over training/eval data that were - computed at the end of training. - """, - # @@protoc_insertion_point(class_scope:google.cloud.bigquery.v2.Model.TrainingRun) - }, - ), - "LabelsEntry": _reflection.GeneratedProtocolMessageType( - "LabelsEntry", - (_message.Message,), - { - "DESCRIPTOR": _MODEL_LABELSENTRY, - "__module__": "google.cloud.bigquery_v2.proto.model_pb2" - # @@protoc_insertion_point(class_scope:google.cloud.bigquery.v2.Model.LabelsEntry) - }, - ), - "DESCRIPTOR": _MODEL, - "__module__": "google.cloud.bigquery_v2.proto.model_pb2", - "__doc__": """Protocol buffer. - - Attributes: - etag: - Output only. A hash of this resource. - model_reference: - Required. Unique identifier for this model. - creation_time: - Output only. The time when this model was created, in - millisecs since the epoch. - last_modified_time: - Output only. The time when this model was last modified, in - millisecs since the epoch. - description: - Optional. A user-friendly description of this model. - friendly_name: - Optional. A descriptive name for this model. - labels: - The labels associated with this model. You can use these to - organize and group your models. Label keys and values can be - no longer than 63 characters, can only contain lowercase - letters, numeric characters, underscores and dashes. - International characters are allowed. Label values are - optional. Label keys must start with a letter and each label - in the list must have a different key. - expiration_time: - Optional. The time when this model expires, in milliseconds - since the epoch. If not present, the model will persist - indefinitely. Expired models will be deleted and their storage - reclaimed. The defaultTableExpirationMs property of the - encapsulating dataset can be used to set a default - expirationTime on newly created models. - location: - Output only. The geographic location where the model resides. - This value is inherited from the dataset. - encryption_configuration: - Custom encryption configuration (e.g., Cloud KMS keys). This - shows the encryption configuration of the model data while - stored in BigQuery storage. - model_type: - Output only. Type of the model resource. - training_runs: - Output only. Information for all training runs in increasing - order of start_time. - feature_columns: - Output only. Input feature columns that were used to train - this model. - label_columns: - Output only. Label columns that were used to train this model. - The output of the model will have a ``predicted\_`` prefix to - these columns. - """, - # @@protoc_insertion_point(class_scope:google.cloud.bigquery.v2.Model) - }, -) -_sym_db.RegisterMessage(Model) -_sym_db.RegisterMessage(Model.KmeansEnums) -_sym_db.RegisterMessage(Model.RegressionMetrics) -_sym_db.RegisterMessage(Model.AggregateClassificationMetrics) -_sym_db.RegisterMessage(Model.BinaryClassificationMetrics) -_sym_db.RegisterMessage(Model.BinaryClassificationMetrics.BinaryConfusionMatrix) -_sym_db.RegisterMessage(Model.MultiClassClassificationMetrics) -_sym_db.RegisterMessage(Model.MultiClassClassificationMetrics.ConfusionMatrix) -_sym_db.RegisterMessage(Model.MultiClassClassificationMetrics.ConfusionMatrix.Entry) -_sym_db.RegisterMessage(Model.MultiClassClassificationMetrics.ConfusionMatrix.Row) -_sym_db.RegisterMessage(Model.ClusteringMetrics) -_sym_db.RegisterMessage(Model.ClusteringMetrics.Cluster) -_sym_db.RegisterMessage(Model.ClusteringMetrics.Cluster.FeatureValue) -_sym_db.RegisterMessage(Model.ClusteringMetrics.Cluster.FeatureValue.CategoricalValue) -_sym_db.RegisterMessage( - Model.ClusteringMetrics.Cluster.FeatureValue.CategoricalValue.CategoryCount -) -_sym_db.RegisterMessage(Model.EvaluationMetrics) -_sym_db.RegisterMessage(Model.TrainingRun) -_sym_db.RegisterMessage(Model.TrainingRun.TrainingOptions) -_sym_db.RegisterMessage(Model.TrainingRun.TrainingOptions.LabelClassWeightsEntry) -_sym_db.RegisterMessage(Model.TrainingRun.IterationResult) -_sym_db.RegisterMessage(Model.TrainingRun.IterationResult.ClusterInfo) -_sym_db.RegisterMessage(Model.LabelsEntry) - -GetModelRequest = _reflection.GeneratedProtocolMessageType( - "GetModelRequest", - (_message.Message,), - { - "DESCRIPTOR": _GETMODELREQUEST, - "__module__": "google.cloud.bigquery_v2.proto.model_pb2", - "__doc__": """Protocol buffer. - - Attributes: - project_id: - Required. Project ID of the requested model. - dataset_id: - Required. Dataset ID of the requested model. - model_id: - Required. Model ID of the requested model. - """, - # @@protoc_insertion_point(class_scope:google.cloud.bigquery.v2.GetModelRequest) - }, -) -_sym_db.RegisterMessage(GetModelRequest) - -PatchModelRequest = _reflection.GeneratedProtocolMessageType( - "PatchModelRequest", - (_message.Message,), - { - "DESCRIPTOR": _PATCHMODELREQUEST, - "__module__": "google.cloud.bigquery_v2.proto.model_pb2", - "__doc__": """Protocol buffer. - - Attributes: - project_id: - Required. Project ID of the model to patch. - dataset_id: - Required. Dataset ID of the model to patch. - model_id: - Required. Model ID of the model to patch. - model: - Required. Patched model. Follows RFC5789 patch semantics. - Missing fields are not updated. To clear a field, explicitly - set to default value. - """, - # @@protoc_insertion_point(class_scope:google.cloud.bigquery.v2.PatchModelRequest) - }, -) -_sym_db.RegisterMessage(PatchModelRequest) - -DeleteModelRequest = _reflection.GeneratedProtocolMessageType( - "DeleteModelRequest", - (_message.Message,), - { - "DESCRIPTOR": _DELETEMODELREQUEST, - "__module__": "google.cloud.bigquery_v2.proto.model_pb2", - "__doc__": """Protocol buffer. - - Attributes: - project_id: - Required. Project ID of the model to delete. - dataset_id: - Required. Dataset ID of the model to delete. - model_id: - Required. Model ID of the model to delete. - """, - # @@protoc_insertion_point(class_scope:google.cloud.bigquery.v2.DeleteModelRequest) - }, -) -_sym_db.RegisterMessage(DeleteModelRequest) - -ListModelsRequest = _reflection.GeneratedProtocolMessageType( - "ListModelsRequest", - (_message.Message,), - { - "DESCRIPTOR": _LISTMODELSREQUEST, - "__module__": "google.cloud.bigquery_v2.proto.model_pb2", - "__doc__": """Protocol buffer. - - Attributes: - project_id: - Required. Project ID of the models to list. - dataset_id: - Required. Dataset ID of the models to list. - max_results: - The maximum number of results to return in a single response - page. Leverage the page tokens to iterate through the entire - collection. - page_token: - Page token, returned by a previous call to request the next - page of results - """, - # @@protoc_insertion_point(class_scope:google.cloud.bigquery.v2.ListModelsRequest) - }, -) -_sym_db.RegisterMessage(ListModelsRequest) - -ListModelsResponse = _reflection.GeneratedProtocolMessageType( - "ListModelsResponse", - (_message.Message,), - { - "DESCRIPTOR": _LISTMODELSRESPONSE, - "__module__": "google.cloud.bigquery_v2.proto.model_pb2", - "__doc__": """Protocol buffer. - - Attributes: - models: - Models in the requested dataset. Only the following fields are - populated: model_reference, model_type, creation_time, - last_modified_time and labels. - next_page_token: - A token to request the next page of results. - """, - # @@protoc_insertion_point(class_scope:google.cloud.bigquery.v2.ListModelsResponse) - }, -) -_sym_db.RegisterMessage(ListModelsResponse) - - -DESCRIPTOR._options = None -_MODEL_TRAININGRUN_TRAININGOPTIONS_LABELCLASSWEIGHTSENTRY._options = None -_MODEL_LABELSENTRY._options = None -_MODEL.fields_by_name["etag"]._options = None -_MODEL.fields_by_name["model_reference"]._options = None -_MODEL.fields_by_name["creation_time"]._options = None -_MODEL.fields_by_name["last_modified_time"]._options = None -_MODEL.fields_by_name["description"]._options = None -_MODEL.fields_by_name["friendly_name"]._options = None -_MODEL.fields_by_name["expiration_time"]._options = None -_MODEL.fields_by_name["location"]._options = None -_MODEL.fields_by_name["model_type"]._options = None -_MODEL.fields_by_name["training_runs"]._options = None -_MODEL.fields_by_name["feature_columns"]._options = None -_MODEL.fields_by_name["label_columns"]._options = None -_GETMODELREQUEST.fields_by_name["project_id"]._options = None -_GETMODELREQUEST.fields_by_name["dataset_id"]._options = None -_GETMODELREQUEST.fields_by_name["model_id"]._options = None -_PATCHMODELREQUEST.fields_by_name["project_id"]._options = None -_PATCHMODELREQUEST.fields_by_name["dataset_id"]._options = None -_PATCHMODELREQUEST.fields_by_name["model_id"]._options = None -_PATCHMODELREQUEST.fields_by_name["model"]._options = None -_DELETEMODELREQUEST.fields_by_name["project_id"]._options = None -_DELETEMODELREQUEST.fields_by_name["dataset_id"]._options = None -_DELETEMODELREQUEST.fields_by_name["model_id"]._options = None -_LISTMODELSREQUEST.fields_by_name["project_id"]._options = None -_LISTMODELSREQUEST.fields_by_name["dataset_id"]._options = None - -_MODELSERVICE = _descriptor.ServiceDescriptor( - name="ModelService", - full_name="google.cloud.bigquery.v2.ModelService", - file=DESCRIPTOR, - index=0, - serialized_options=b"\312A\027bigquery.googleapis.com\322A\302\001https://www.googleapis.com/auth/bigquery,https://www.googleapis.com/auth/bigquery.readonly,https://www.googleapis.com/auth/cloud-platform,https://www.googleapis.com/auth/cloud-platform.read-only", - create_key=_descriptor._internal_create_key, - serialized_start=7804, - serialized_end=8566, - methods=[ - _descriptor.MethodDescriptor( - name="GetModel", - full_name="google.cloud.bigquery.v2.ModelService.GetModel", - index=0, - containing_service=None, - input_type=_GETMODELREQUEST, - output_type=_MODEL, - serialized_options=b"\332A\036project_id,dataset_id,model_id", - create_key=_descriptor._internal_create_key, - ), - _descriptor.MethodDescriptor( - name="ListModels", - full_name="google.cloud.bigquery.v2.ModelService.ListModels", - index=1, - containing_service=None, - input_type=_LISTMODELSREQUEST, - output_type=_LISTMODELSRESPONSE, - serialized_options=b"\332A!project_id,dataset_id,max_results", - create_key=_descriptor._internal_create_key, - ), - _descriptor.MethodDescriptor( - name="PatchModel", - full_name="google.cloud.bigquery.v2.ModelService.PatchModel", - index=2, - containing_service=None, - input_type=_PATCHMODELREQUEST, - output_type=_MODEL, - serialized_options=b"\332A$project_id,dataset_id,model_id,model", - create_key=_descriptor._internal_create_key, - ), - _descriptor.MethodDescriptor( - name="DeleteModel", - full_name="google.cloud.bigquery.v2.ModelService.DeleteModel", - index=3, - containing_service=None, - input_type=_DELETEMODELREQUEST, - output_type=google_dot_protobuf_dot_empty__pb2._EMPTY, - serialized_options=b"\332A\036project_id,dataset_id,model_id", - create_key=_descriptor._internal_create_key, - ), - ], -) -_sym_db.RegisterServiceDescriptor(_MODELSERVICE) - -DESCRIPTOR.services_by_name["ModelService"] = _MODELSERVICE - -# @@protoc_insertion_point(module_scope) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery_v2/proto/model_reference.proto b/packages/google-cloud-bigquery/google/cloud/bigquery_v2/proto/model_reference.proto deleted file mode 100644 index c3d1a49a8bdd..000000000000 --- a/packages/google-cloud-bigquery/google/cloud/bigquery_v2/proto/model_reference.proto +++ /dev/null @@ -1,38 +0,0 @@ -// Copyright 2020 Google LLC -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -syntax = "proto3"; - -package google.cloud.bigquery.v2; - -import "google/api/field_behavior.proto"; -import "google/api/annotations.proto"; - -option go_package = "google.golang.org/genproto/googleapis/cloud/bigquery/v2;bigquery"; -option java_outer_classname = "ModelReferenceProto"; -option java_package = "com.google.cloud.bigquery.v2"; - -// Id path of a model. -message ModelReference { - // Required. The ID of the project containing this model. - string project_id = 1 [(google.api.field_behavior) = REQUIRED]; - - // Required. The ID of the dataset containing this model. - string dataset_id = 2 [(google.api.field_behavior) = REQUIRED]; - - // Required. The ID of the model. The ID must contain only - // letters (a-z, A-Z), numbers (0-9), or underscores (_). The maximum - // length is 1,024 characters. - string model_id = 3 [(google.api.field_behavior) = REQUIRED]; -} diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery_v2/proto/model_reference_pb2.py b/packages/google-cloud-bigquery/google/cloud/bigquery_v2/proto/model_reference_pb2.py deleted file mode 100644 index 2411c48632c8..000000000000 --- a/packages/google-cloud-bigquery/google/cloud/bigquery_v2/proto/model_reference_pb2.py +++ /dev/null @@ -1,142 +0,0 @@ -# -*- coding: utf-8 -*- -# Generated by the protocol buffer compiler. DO NOT EDIT! -# source: google/cloud/bigquery_v2/proto/model_reference.proto -"""Generated protocol buffer code.""" -from google.protobuf import descriptor as _descriptor -from google.protobuf import message as _message -from google.protobuf import reflection as _reflection -from google.protobuf import symbol_database as _symbol_database - -# @@protoc_insertion_point(imports) - -_sym_db = _symbol_database.Default() - - -from google.api import field_behavior_pb2 as google_dot_api_dot_field__behavior__pb2 -from google.api import annotations_pb2 as google_dot_api_dot_annotations__pb2 - - -DESCRIPTOR = _descriptor.FileDescriptor( - name="google/cloud/bigquery_v2/proto/model_reference.proto", - package="google.cloud.bigquery.v2", - syntax="proto3", - serialized_options=b"\n\034com.google.cloud.bigquery.v2B\023ModelReferenceProtoZ@google.golang.org/genproto/googleapis/cloud/bigquery/v2;bigquery", - create_key=_descriptor._internal_create_key, - serialized_pb=b'\n4google/cloud/bigquery_v2/proto/model_reference.proto\x12\x18google.cloud.bigquery.v2\x1a\x1fgoogle/api/field_behavior.proto\x1a\x1cgoogle/api/annotations.proto"Y\n\x0eModelReference\x12\x17\n\nproject_id\x18\x01 \x01(\tB\x03\xe0\x41\x02\x12\x17\n\ndataset_id\x18\x02 \x01(\tB\x03\xe0\x41\x02\x12\x15\n\x08model_id\x18\x03 \x01(\tB\x03\xe0\x41\x02\x42u\n\x1c\x63om.google.cloud.bigquery.v2B\x13ModelReferenceProtoZ@google.golang.org/genproto/googleapis/cloud/bigquery/v2;bigqueryb\x06proto3', - dependencies=[ - google_dot_api_dot_field__behavior__pb2.DESCRIPTOR, - google_dot_api_dot_annotations__pb2.DESCRIPTOR, - ], -) - - -_MODELREFERENCE = _descriptor.Descriptor( - name="ModelReference", - full_name="google.cloud.bigquery.v2.ModelReference", - filename=None, - file=DESCRIPTOR, - containing_type=None, - create_key=_descriptor._internal_create_key, - fields=[ - _descriptor.FieldDescriptor( - name="project_id", - full_name="google.cloud.bigquery.v2.ModelReference.project_id", - index=0, - number=1, - type=9, - cpp_type=9, - label=1, - has_default_value=False, - default_value=b"".decode("utf-8"), - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=b"\340A\002", - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - _descriptor.FieldDescriptor( - name="dataset_id", - full_name="google.cloud.bigquery.v2.ModelReference.dataset_id", - index=1, - number=2, - type=9, - cpp_type=9, - label=1, - has_default_value=False, - default_value=b"".decode("utf-8"), - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=b"\340A\002", - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - _descriptor.FieldDescriptor( - name="model_id", - full_name="google.cloud.bigquery.v2.ModelReference.model_id", - index=2, - number=3, - type=9, - cpp_type=9, - label=1, - has_default_value=False, - default_value=b"".decode("utf-8"), - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=b"\340A\002", - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - ], - extensions=[], - nested_types=[], - enum_types=[], - serialized_options=None, - is_extendable=False, - syntax="proto3", - extension_ranges=[], - oneofs=[], - serialized_start=145, - serialized_end=234, -) - -DESCRIPTOR.message_types_by_name["ModelReference"] = _MODELREFERENCE -_sym_db.RegisterFileDescriptor(DESCRIPTOR) - -ModelReference = _reflection.GeneratedProtocolMessageType( - "ModelReference", - (_message.Message,), - { - "DESCRIPTOR": _MODELREFERENCE, - "__module__": "google.cloud.bigquery_v2.proto.model_reference_pb2", - "__doc__": """Id path of a model. - - Attributes: - project_id: - Required. The ID of the project containing this model. - dataset_id: - Required. The ID of the dataset containing this model. - model_id: - Required. The ID of the model. The ID must contain only - letters (a-z, A-Z), numbers (0-9), or underscores (_). The - maximum length is 1,024 characters. - """, - # @@protoc_insertion_point(class_scope:google.cloud.bigquery.v2.ModelReference) - }, -) -_sym_db.RegisterMessage(ModelReference) - - -DESCRIPTOR._options = None -_MODELREFERENCE.fields_by_name["project_id"]._options = None -_MODELREFERENCE.fields_by_name["dataset_id"]._options = None -_MODELREFERENCE.fields_by_name["model_id"]._options = None -# @@protoc_insertion_point(module_scope) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery_v2/proto/standard_sql.proto b/packages/google-cloud-bigquery/google/cloud/bigquery_v2/proto/standard_sql.proto deleted file mode 100644 index 1514eccbb81a..000000000000 --- a/packages/google-cloud-bigquery/google/cloud/bigquery_v2/proto/standard_sql.proto +++ /dev/null @@ -1,112 +0,0 @@ -// Copyright 2020 Google LLC -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -syntax = "proto3"; - -package google.cloud.bigquery.v2; - -import "google/api/field_behavior.proto"; -import "google/api/annotations.proto"; - -option go_package = "google.golang.org/genproto/googleapis/cloud/bigquery/v2;bigquery"; -option java_outer_classname = "StandardSqlProto"; -option java_package = "com.google.cloud.bigquery.v2"; - -// The type of a variable, e.g., a function argument. -// Examples: -// INT64: {type_kind="INT64"} -// ARRAY: {type_kind="ARRAY", array_element_type="STRING"} -// STRUCT>: -// {type_kind="STRUCT", -// struct_type={fields=[ -// {name="x", type={type_kind="STRING"}}, -// {name="y", type={type_kind="ARRAY", array_element_type="DATE"}} -// ]}} -message StandardSqlDataType { - enum TypeKind { - // Invalid type. - TYPE_KIND_UNSPECIFIED = 0; - - // Encoded as a string in decimal format. - INT64 = 2; - - // Encoded as a boolean "false" or "true". - BOOL = 5; - - // Encoded as a number, or string "NaN", "Infinity" or "-Infinity". - FLOAT64 = 7; - - // Encoded as a string value. - STRING = 8; - - // Encoded as a base64 string per RFC 4648, section 4. - BYTES = 9; - - // Encoded as an RFC 3339 timestamp with mandatory "Z" time zone string: - // 1985-04-12T23:20:50.52Z - TIMESTAMP = 19; - - // Encoded as RFC 3339 full-date format string: 1985-04-12 - DATE = 10; - - // Encoded as RFC 3339 partial-time format string: 23:20:50.52 - TIME = 20; - - // Encoded as RFC 3339 full-date "T" partial-time: 1985-04-12T23:20:50.52 - DATETIME = 21; - - // Encoded as WKT - GEOGRAPHY = 22; - - // Encoded as a decimal string. - NUMERIC = 23; - - // Encoded as a decimal string. - BIGNUMERIC = 24; - - // Encoded as a list with types matching Type.array_type. - ARRAY = 16; - - // Encoded as a list with fields of type Type.struct_type[i]. List is used - // because a JSON object cannot have duplicate field names. - STRUCT = 17; - } - - // Required. The top level type of this field. - // Can be any standard SQL data type (e.g., "INT64", "DATE", "ARRAY"). - TypeKind type_kind = 1 [(google.api.field_behavior) = REQUIRED]; - - oneof sub_type { - // The type of the array's elements, if type_kind = "ARRAY". - StandardSqlDataType array_element_type = 2; - - // The fields of this struct, in order, if type_kind = "STRUCT". - StandardSqlStructType struct_type = 3; - } -} - -// A field or a column. -message StandardSqlField { - // Optional. The name of this field. Can be absent for struct fields. - string name = 1 [(google.api.field_behavior) = OPTIONAL]; - - // Optional. The type of this parameter. Absent if not explicitly - // specified (e.g., CREATE FUNCTION statement can omit the return type; - // in this case the output parameter does not have this "type" field). - StandardSqlDataType type = 2 [(google.api.field_behavior) = OPTIONAL]; -} - -message StandardSqlStructType { - repeated StandardSqlField fields = 1; -} diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery_v2/proto/standard_sql_pb2.py b/packages/google-cloud-bigquery/google/cloud/bigquery_v2/proto/standard_sql_pb2.py deleted file mode 100644 index bfe77f934338..000000000000 --- a/packages/google-cloud-bigquery/google/cloud/bigquery_v2/proto/standard_sql_pb2.py +++ /dev/null @@ -1,442 +0,0 @@ -# -*- coding: utf-8 -*- -# Generated by the protocol buffer compiler. DO NOT EDIT! -# source: google/cloud/bigquery_v2/proto/standard_sql.proto -"""Generated protocol buffer code.""" -from google.protobuf import descriptor as _descriptor -from google.protobuf import message as _message -from google.protobuf import reflection as _reflection -from google.protobuf import symbol_database as _symbol_database - -# @@protoc_insertion_point(imports) - -_sym_db = _symbol_database.Default() - - -from google.api import field_behavior_pb2 as google_dot_api_dot_field__behavior__pb2 -from google.api import annotations_pb2 as google_dot_api_dot_annotations__pb2 - - -DESCRIPTOR = _descriptor.FileDescriptor( - name="google/cloud/bigquery_v2/proto/standard_sql.proto", - package="google.cloud.bigquery.v2", - syntax="proto3", - serialized_options=b"\n\034com.google.cloud.bigquery.v2B\020StandardSqlProtoZ@google.golang.org/genproto/googleapis/cloud/bigquery/v2;bigquery", - create_key=_descriptor._internal_create_key, - serialized_pb=b'\n1google/cloud/bigquery_v2/proto/standard_sql.proto\x12\x18google.cloud.bigquery.v2\x1a\x1fgoogle/api/field_behavior.proto\x1a\x1cgoogle/api/annotations.proto"\xcb\x03\n\x13StandardSqlDataType\x12N\n\ttype_kind\x18\x01 \x01(\x0e\x32\x36.google.cloud.bigquery.v2.StandardSqlDataType.TypeKindB\x03\xe0\x41\x02\x12K\n\x12\x61rray_element_type\x18\x02 \x01(\x0b\x32-.google.cloud.bigquery.v2.StandardSqlDataTypeH\x00\x12\x46\n\x0bstruct_type\x18\x03 \x01(\x0b\x32/.google.cloud.bigquery.v2.StandardSqlStructTypeH\x00"\xc2\x01\n\x08TypeKind\x12\x19\n\x15TYPE_KIND_UNSPECIFIED\x10\x00\x12\t\n\x05INT64\x10\x02\x12\x08\n\x04\x42OOL\x10\x05\x12\x0b\n\x07\x46LOAT64\x10\x07\x12\n\n\x06STRING\x10\x08\x12\t\n\x05\x42YTES\x10\t\x12\r\n\tTIMESTAMP\x10\x13\x12\x08\n\x04\x44\x41TE\x10\n\x12\x08\n\x04TIME\x10\x14\x12\x0c\n\x08\x44\x41TETIME\x10\x15\x12\r\n\tGEOGRAPHY\x10\x16\x12\x0b\n\x07NUMERIC\x10\x17\x12\t\n\x05\x41RRAY\x10\x10\x12\n\n\x06STRUCT\x10\x11\x42\n\n\x08sub_type"g\n\x10StandardSqlField\x12\x11\n\x04name\x18\x01 \x01(\tB\x03\xe0\x41\x01\x12@\n\x04type\x18\x02 \x01(\x0b\x32-.google.cloud.bigquery.v2.StandardSqlDataTypeB\x03\xe0\x41\x01"S\n\x15StandardSqlStructType\x12:\n\x06\x66ields\x18\x01 \x03(\x0b\x32*.google.cloud.bigquery.v2.StandardSqlFieldBr\n\x1c\x63om.google.cloud.bigquery.v2B\x10StandardSqlProtoZ@google.golang.org/genproto/googleapis/cloud/bigquery/v2;bigqueryb\x06proto3', - dependencies=[ - google_dot_api_dot_field__behavior__pb2.DESCRIPTOR, - google_dot_api_dot_annotations__pb2.DESCRIPTOR, - ], -) - - -_STANDARDSQLDATATYPE_TYPEKIND = _descriptor.EnumDescriptor( - name="TypeKind", - full_name="google.cloud.bigquery.v2.StandardSqlDataType.TypeKind", - filename=None, - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - values=[ - _descriptor.EnumValueDescriptor( - name="TYPE_KIND_UNSPECIFIED", - index=0, - number=0, - serialized_options=None, - type=None, - create_key=_descriptor._internal_create_key, - ), - _descriptor.EnumValueDescriptor( - name="INT64", - index=1, - number=2, - serialized_options=None, - type=None, - create_key=_descriptor._internal_create_key, - ), - _descriptor.EnumValueDescriptor( - name="BOOL", - index=2, - number=5, - serialized_options=None, - type=None, - create_key=_descriptor._internal_create_key, - ), - _descriptor.EnumValueDescriptor( - name="FLOAT64", - index=3, - number=7, - serialized_options=None, - type=None, - create_key=_descriptor._internal_create_key, - ), - _descriptor.EnumValueDescriptor( - name="STRING", - index=4, - number=8, - serialized_options=None, - type=None, - create_key=_descriptor._internal_create_key, - ), - _descriptor.EnumValueDescriptor( - name="BYTES", - index=5, - number=9, - serialized_options=None, - type=None, - create_key=_descriptor._internal_create_key, - ), - _descriptor.EnumValueDescriptor( - name="TIMESTAMP", - index=6, - number=19, - serialized_options=None, - type=None, - create_key=_descriptor._internal_create_key, - ), - _descriptor.EnumValueDescriptor( - name="DATE", - index=7, - number=10, - serialized_options=None, - type=None, - create_key=_descriptor._internal_create_key, - ), - _descriptor.EnumValueDescriptor( - name="TIME", - index=8, - number=20, - serialized_options=None, - type=None, - create_key=_descriptor._internal_create_key, - ), - _descriptor.EnumValueDescriptor( - name="DATETIME", - index=9, - number=21, - serialized_options=None, - type=None, - create_key=_descriptor._internal_create_key, - ), - _descriptor.EnumValueDescriptor( - name="GEOGRAPHY", - index=10, - number=22, - serialized_options=None, - type=None, - create_key=_descriptor._internal_create_key, - ), - _descriptor.EnumValueDescriptor( - name="NUMERIC", - index=11, - number=23, - serialized_options=None, - type=None, - create_key=_descriptor._internal_create_key, - ), - _descriptor.EnumValueDescriptor( - name="ARRAY", - index=12, - number=16, - serialized_options=None, - type=None, - create_key=_descriptor._internal_create_key, - ), - _descriptor.EnumValueDescriptor( - name="STRUCT", - index=13, - number=17, - serialized_options=None, - type=None, - create_key=_descriptor._internal_create_key, - ), - ], - containing_type=None, - serialized_options=None, - serialized_start=396, - serialized_end=590, -) -_sym_db.RegisterEnumDescriptor(_STANDARDSQLDATATYPE_TYPEKIND) - - -_STANDARDSQLDATATYPE = _descriptor.Descriptor( - name="StandardSqlDataType", - full_name="google.cloud.bigquery.v2.StandardSqlDataType", - filename=None, - file=DESCRIPTOR, - containing_type=None, - create_key=_descriptor._internal_create_key, - fields=[ - _descriptor.FieldDescriptor( - name="type_kind", - full_name="google.cloud.bigquery.v2.StandardSqlDataType.type_kind", - index=0, - number=1, - type=14, - cpp_type=8, - label=1, - has_default_value=False, - default_value=0, - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=b"\340A\002", - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - _descriptor.FieldDescriptor( - name="array_element_type", - full_name="google.cloud.bigquery.v2.StandardSqlDataType.array_element_type", - index=1, - number=2, - type=11, - cpp_type=10, - label=1, - has_default_value=False, - default_value=None, - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - _descriptor.FieldDescriptor( - name="struct_type", - full_name="google.cloud.bigquery.v2.StandardSqlDataType.struct_type", - index=2, - number=3, - type=11, - cpp_type=10, - label=1, - has_default_value=False, - default_value=None, - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - ], - extensions=[], - nested_types=[], - enum_types=[_STANDARDSQLDATATYPE_TYPEKIND,], - serialized_options=None, - is_extendable=False, - syntax="proto3", - extension_ranges=[], - oneofs=[ - _descriptor.OneofDescriptor( - name="sub_type", - full_name="google.cloud.bigquery.v2.StandardSqlDataType.sub_type", - index=0, - containing_type=None, - create_key=_descriptor._internal_create_key, - fields=[], - ), - ], - serialized_start=143, - serialized_end=602, -) - - -_STANDARDSQLFIELD = _descriptor.Descriptor( - name="StandardSqlField", - full_name="google.cloud.bigquery.v2.StandardSqlField", - filename=None, - file=DESCRIPTOR, - containing_type=None, - create_key=_descriptor._internal_create_key, - fields=[ - _descriptor.FieldDescriptor( - name="name", - full_name="google.cloud.bigquery.v2.StandardSqlField.name", - index=0, - number=1, - type=9, - cpp_type=9, - label=1, - has_default_value=False, - default_value=b"".decode("utf-8"), - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=b"\340A\001", - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - _descriptor.FieldDescriptor( - name="type", - full_name="google.cloud.bigquery.v2.StandardSqlField.type", - index=1, - number=2, - type=11, - cpp_type=10, - label=1, - has_default_value=False, - default_value=None, - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=b"\340A\001", - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - ], - extensions=[], - nested_types=[], - enum_types=[], - serialized_options=None, - is_extendable=False, - syntax="proto3", - extension_ranges=[], - oneofs=[], - serialized_start=604, - serialized_end=707, -) - - -_STANDARDSQLSTRUCTTYPE = _descriptor.Descriptor( - name="StandardSqlStructType", - full_name="google.cloud.bigquery.v2.StandardSqlStructType", - filename=None, - file=DESCRIPTOR, - containing_type=None, - create_key=_descriptor._internal_create_key, - fields=[ - _descriptor.FieldDescriptor( - name="fields", - full_name="google.cloud.bigquery.v2.StandardSqlStructType.fields", - index=0, - number=1, - type=11, - cpp_type=10, - label=3, - has_default_value=False, - default_value=[], - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - ], - extensions=[], - nested_types=[], - enum_types=[], - serialized_options=None, - is_extendable=False, - syntax="proto3", - extension_ranges=[], - oneofs=[], - serialized_start=709, - serialized_end=792, -) - -_STANDARDSQLDATATYPE.fields_by_name[ - "type_kind" -].enum_type = _STANDARDSQLDATATYPE_TYPEKIND -_STANDARDSQLDATATYPE.fields_by_name[ - "array_element_type" -].message_type = _STANDARDSQLDATATYPE -_STANDARDSQLDATATYPE.fields_by_name["struct_type"].message_type = _STANDARDSQLSTRUCTTYPE -_STANDARDSQLDATATYPE_TYPEKIND.containing_type = _STANDARDSQLDATATYPE -_STANDARDSQLDATATYPE.oneofs_by_name["sub_type"].fields.append( - _STANDARDSQLDATATYPE.fields_by_name["array_element_type"] -) -_STANDARDSQLDATATYPE.fields_by_name[ - "array_element_type" -].containing_oneof = _STANDARDSQLDATATYPE.oneofs_by_name["sub_type"] -_STANDARDSQLDATATYPE.oneofs_by_name["sub_type"].fields.append( - _STANDARDSQLDATATYPE.fields_by_name["struct_type"] -) -_STANDARDSQLDATATYPE.fields_by_name[ - "struct_type" -].containing_oneof = _STANDARDSQLDATATYPE.oneofs_by_name["sub_type"] -_STANDARDSQLFIELD.fields_by_name["type"].message_type = _STANDARDSQLDATATYPE -_STANDARDSQLSTRUCTTYPE.fields_by_name["fields"].message_type = _STANDARDSQLFIELD -DESCRIPTOR.message_types_by_name["StandardSqlDataType"] = _STANDARDSQLDATATYPE -DESCRIPTOR.message_types_by_name["StandardSqlField"] = _STANDARDSQLFIELD -DESCRIPTOR.message_types_by_name["StandardSqlStructType"] = _STANDARDSQLSTRUCTTYPE -_sym_db.RegisterFileDescriptor(DESCRIPTOR) - -StandardSqlDataType = _reflection.GeneratedProtocolMessageType( - "StandardSqlDataType", - (_message.Message,), - { - "DESCRIPTOR": _STANDARDSQLDATATYPE, - "__module__": "google.cloud.bigquery_v2.proto.standard_sql_pb2", - "__doc__": """The type of a variable, e.g., a function argument. Examples: INT64: - {type_kind=``INT64``} ARRAY: {type_kind=``ARRAY``, - array_element_type=``STRING``} STRUCT: - {type_kind=``STRUCT``, struct_type={fields=[ {name=``x``, - type={type_kind=``STRING``}}, {name=``y``, type={type_kind=``ARRAY``, - array_element_type=``DATE``}} ]}} - - Attributes: - type_kind: - Required. The top level type of this field. Can be any - standard SQL data type (e.g., ``INT64``, ``DATE``, ``ARRAY``). - array_element_type: - The type of the array’s elements, if type_kind = ``ARRAY``. - struct_type: - The fields of this struct, in order, if type_kind = ``STRUCT``. - """, - # @@protoc_insertion_point(class_scope:google.cloud.bigquery.v2.StandardSqlDataType) - }, -) -_sym_db.RegisterMessage(StandardSqlDataType) - -StandardSqlField = _reflection.GeneratedProtocolMessageType( - "StandardSqlField", - (_message.Message,), - { - "DESCRIPTOR": _STANDARDSQLFIELD, - "__module__": "google.cloud.bigquery_v2.proto.standard_sql_pb2", - "__doc__": """A field or a column. - - Attributes: - name: - Optional. The name of this field. Can be absent for struct - fields. - type: - Optional. The type of this parameter. Absent if not explicitly - specified (e.g., CREATE FUNCTION statement can omit the return - type; in this case the output parameter does not have this - ``type`` field). - """, - # @@protoc_insertion_point(class_scope:google.cloud.bigquery.v2.StandardSqlField) - }, -) -_sym_db.RegisterMessage(StandardSqlField) - -StandardSqlStructType = _reflection.GeneratedProtocolMessageType( - "StandardSqlStructType", - (_message.Message,), - { - "DESCRIPTOR": _STANDARDSQLSTRUCTTYPE, - "__module__": "google.cloud.bigquery_v2.proto.standard_sql_pb2" - # @@protoc_insertion_point(class_scope:google.cloud.bigquery.v2.StandardSqlStructType) - }, -) -_sym_db.RegisterMessage(StandardSqlStructType) - - -DESCRIPTOR._options = None -_STANDARDSQLDATATYPE.fields_by_name["type_kind"]._options = None -_STANDARDSQLFIELD.fields_by_name["name"]._options = None -_STANDARDSQLFIELD.fields_by_name["type"]._options = None -# @@protoc_insertion_point(module_scope) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery_v2/proto/table_reference.proto b/packages/google-cloud-bigquery/google/cloud/bigquery_v2/proto/table_reference.proto deleted file mode 100644 index ba02f80c4dec..000000000000 --- a/packages/google-cloud-bigquery/google/cloud/bigquery_v2/proto/table_reference.proto +++ /dev/null @@ -1,39 +0,0 @@ -// Copyright 2020 Google LLC -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -syntax = "proto3"; - -package google.cloud.bigquery.v2; - -import "google/api/field_behavior.proto"; -import "google/api/annotations.proto"; - -option go_package = "google.golang.org/genproto/googleapis/cloud/bigquery/v2;bigquery"; -option java_outer_classname = "TableReferenceProto"; -option java_package = "com.google.cloud.bigquery.v2"; - -message TableReference { - // Required. The ID of the project containing this table. - string project_id = 1 [(google.api.field_behavior) = REQUIRED]; - - // Required. The ID of the dataset containing this table. - string dataset_id = 2 [(google.api.field_behavior) = REQUIRED]; - - // Required. The ID of the table. The ID must contain only - // letters (a-z, A-Z), numbers (0-9), or underscores (_). The maximum - // length is 1,024 characters. Certain operations allow - // suffixing of the table ID with a partition decorator, such as - // `sample_table$20190123`. - string table_id = 3 [(google.api.field_behavior) = REQUIRED]; -} From 398f779d2537095faf08d6fd0f52adeeb566b249 Mon Sep 17 00:00:00 2001 From: "gcf-owl-bot[bot]" <78513119+gcf-owl-bot[bot]@users.noreply.github.com> Date: Fri, 28 May 2021 16:56:04 +0000 Subject: [PATCH 1163/2016] chore: new owl bot post processor docker image (#680) Post-Processor: gcr.io/repo-automation-bots/owlbot-python:latest@sha256:c66ba3c8d7bc8566f47df841f98cd0097b28fff0b1864c86f5817f4c8c3e8600 --- packages/google-cloud-bigquery/.github/.OwlBot.lock.yaml | 2 +- packages/google-cloud-bigquery/docs/conf.py | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/.github/.OwlBot.lock.yaml b/packages/google-cloud-bigquery/.github/.OwlBot.lock.yaml index 46e3f021cc72..da616c91a3b6 100644 --- a/packages/google-cloud-bigquery/.github/.OwlBot.lock.yaml +++ b/packages/google-cloud-bigquery/.github/.OwlBot.lock.yaml @@ -1,3 +1,3 @@ docker: image: gcr.io/repo-automation-bots/owlbot-python:latest - digest: sha256:3c3a445b3ddc99ccd5d31edc4b4519729635d20693900db32c4f587ed51f7479 + digest: sha256:c66ba3c8d7bc8566f47df841f98cd0097b28fff0b1864c86f5817f4c8c3e8600 diff --git a/packages/google-cloud-bigquery/docs/conf.py b/packages/google-cloud-bigquery/docs/conf.py index fdea01aadf03..1275fe3f1107 100644 --- a/packages/google-cloud-bigquery/docs/conf.py +++ b/packages/google-cloud-bigquery/docs/conf.py @@ -364,6 +364,7 @@ "google.api_core": ("https://googleapis.dev/python/google-api-core/latest/", None,), "grpc": ("https://grpc.github.io/grpc/python/", None), "proto-plus": ("https://proto-plus-python.readthedocs.io/en/latest/", None), + "protobuf": ("https://googleapis.dev/python/protobuf/latest/", None), } From 4f7019d2c56211e727b6dcbb4fc981e8930b36cd Mon Sep 17 00:00:00 2001 From: Peter Lamut Date: Wed, 2 Jun 2021 09:17:20 +0200 Subject: [PATCH 1164/2016] feat: add support for Parquet options (#679) * feat: add support for Parquet options For load jobs and external tables config. * Simplify ParquetOptions.to_api_repr() Co-authored by Tres Seaver. * Expose ParquetOptions in top level namespace * Parquet options should be reflected in options --- .../google/cloud/bigquery/__init__.py | 2 + .../google/cloud/bigquery/external_config.py | 28 ++++- .../google/cloud/bigquery/format_options.py | 80 ++++++++++++++ .../google/cloud/bigquery/job/load.py | 21 ++++ .../tests/unit/job/test_load_config.py | 35 ++++++ .../tests/unit/test_external_config.py | 100 ++++++++++++++++++ .../tests/unit/test_format_options.py | 41 +++++++ 7 files changed, 306 insertions(+), 1 deletion(-) create mode 100644 packages/google-cloud-bigquery/google/cloud/bigquery/format_options.py create mode 100644 packages/google-cloud-bigquery/tests/unit/test_format_options.py diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/__init__.py b/packages/google-cloud-bigquery/google/cloud/bigquery/__init__.py index ec08b2c843ec..f031cd81d056 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/__init__.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/__init__.py @@ -47,6 +47,7 @@ from google.cloud.bigquery.external_config import CSVOptions from google.cloud.bigquery.external_config import GoogleSheetsOptions from google.cloud.bigquery.external_config import ExternalSourceFormat +from google.cloud.bigquery.format_options import ParquetOptions from google.cloud.bigquery.job import Compression from google.cloud.bigquery.job import CopyJob from google.cloud.bigquery.job import CopyJobConfig @@ -136,6 +137,7 @@ "BigtableColumn", "CSVOptions", "GoogleSheetsOptions", + "ParquetOptions", "DEFAULT_RETRY", # Enum Constants "enums", diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/external_config.py b/packages/google-cloud-bigquery/google/cloud/bigquery/external_config.py index ef4d569fa8e5..0c49d2d764b3 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/external_config.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/external_config.py @@ -27,6 +27,7 @@ from google.cloud.bigquery._helpers import _bytes_to_json from google.cloud.bigquery._helpers import _int_or_none from google.cloud.bigquery._helpers import _str_or_none +from google.cloud.bigquery.format_options import ParquetOptions from google.cloud.bigquery.schema import SchemaField @@ -53,6 +54,12 @@ class ExternalSourceFormat(object): DATASTORE_BACKUP = "DATASTORE_BACKUP" """Specifies datastore backup format""" + ORC = "ORC" + """Specifies ORC format.""" + + PARQUET = "PARQUET" + """Specifies Parquet format.""" + BIGTABLE = "BIGTABLE" """Specifies Bigtable format.""" @@ -540,7 +547,7 @@ def from_api_repr(cls, resource: dict) -> "GoogleSheetsOptions": return config -_OPTION_CLASSES = (BigtableOptions, CSVOptions, GoogleSheetsOptions) +_OPTION_CLASSES = (BigtableOptions, CSVOptions, GoogleSheetsOptions, ParquetOptions) class HivePartitioningOptions(object): @@ -784,6 +791,25 @@ def schema(self, value): prop = {"fields": [field.to_api_repr() for field in value]} self._properties["schema"] = prop + @property + def parquet_options(self): + """Optional[google.cloud.bigquery.format_options.ParquetOptions]: Additional + properties to set if ``sourceFormat`` is set to PARQUET. + + See: + https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#ExternalDataConfiguration.FIELDS.parquet_options + """ + if self.source_format != ExternalSourceFormat.PARQUET: + return None + return self._options + + @parquet_options.setter + def parquet_options(self, value): + if self.source_format != ExternalSourceFormat.PARQUET: + msg = f"Cannot set Parquet options, source format is {self.source_format}" + raise TypeError(msg) + self._options = value + def to_api_repr(self) -> dict: """Build an API representation of this object. diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/format_options.py b/packages/google-cloud-bigquery/google/cloud/bigquery/format_options.py new file mode 100644 index 000000000000..2c9a2ce20e78 --- /dev/null +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/format_options.py @@ -0,0 +1,80 @@ +# Copyright 2021 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import copy +from typing import Dict + + +class ParquetOptions: + """Additional options if the PARQUET source format is used.""" + + _SOURCE_FORMAT = "PARQUET" + _RESOURCE_NAME = "parquetOptions" + + def __init__(self): + self._properties = {} + + @property + def enum_as_string(self) -> bool: + """Indicates whether to infer Parquet ENUM logical type as STRING instead of + BYTES by default. + + See + https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#ParquetOptions.FIELDS.enum_as_string + """ + return self._properties.get("enumAsString") + + @enum_as_string.setter + def enum_as_string(self, value: bool) -> None: + self._properties["enumAsString"] = value + + @property + def enable_list_inference(self) -> bool: + """Indicates whether to use schema inference specifically for Parquet LIST + logical type. + + See + https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#ParquetOptions.FIELDS.enable_list_inference + """ + return self._properties.get("enableListInference") + + @enable_list_inference.setter + def enable_list_inference(self, value: bool) -> None: + self._properties["enableListInference"] = value + + @classmethod + def from_api_repr(cls, resource: Dict[str, bool]) -> "ParquetOptions": + """Factory: construct an instance from a resource dict. + + Args: + resource (Dict[str, bool]): + Definition of a :class:`~.format_options.ParquetOptions` instance in + the same representation as is returned from the API. + + Returns: + :class:`~.format_options.ParquetOptions`: + Configuration parsed from ``resource``. + """ + config = cls() + config._properties = copy.deepcopy(resource) + return config + + def to_api_repr(self) -> dict: + """Build an API representation of this object. + + Returns: + Dict[str, bool]: + A dictionary in the format used by the BigQuery API. + """ + return copy.deepcopy(self._properties) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/job/load.py b/packages/google-cloud-bigquery/google/cloud/bigquery/job/load.py index b8174af3ea08..41d38dd7435d 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/job/load.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/job/load.py @@ -16,6 +16,7 @@ from google.cloud.bigquery.encryption_configuration import EncryptionConfiguration from google.cloud.bigquery.external_config import HivePartitioningOptions +from google.cloud.bigquery.format_options import ParquetOptions from google.cloud.bigquery import _helpers from google.cloud.bigquery.schema import SchemaField from google.cloud.bigquery.schema import _to_schema_fields @@ -439,6 +440,26 @@ def write_disposition(self): def write_disposition(self, value): self._set_sub_prop("writeDisposition", value) + @property + def parquet_options(self): + """Optional[google.cloud.bigquery.format_options.ParquetOptions]: Additional + properties to set if ``sourceFormat`` is set to PARQUET. + + See: + https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationLoad.FIELDS.parquet_options + """ + prop = self._get_sub_prop("parquetOptions") + if prop is not None: + prop = ParquetOptions.from_api_repr(prop) + return prop + + @parquet_options.setter + def parquet_options(self, value): + if value is not None: + self._set_sub_prop("parquetOptions", value.to_api_repr()) + else: + self._del_sub_prop("parquetOptions") + class LoadJob(_AsyncJob): """Asynchronous job for loading data into a table. diff --git a/packages/google-cloud-bigquery/tests/unit/job/test_load_config.py b/packages/google-cloud-bigquery/tests/unit/job/test_load_config.py index 63f15ec5a4eb..b0729e428c0d 100644 --- a/packages/google-cloud-bigquery/tests/unit/job/test_load_config.py +++ b/packages/google-cloud-bigquery/tests/unit/job/test_load_config.py @@ -700,3 +700,38 @@ def test_write_disposition_setter(self): self.assertEqual( config._properties["load"]["writeDisposition"], write_disposition ) + + def test_parquet_options_missing(self): + config = self._get_target_class()() + self.assertIsNone(config.parquet_options) + + def test_parquet_options_hit(self): + config = self._get_target_class()() + config._properties["load"]["parquetOptions"] = dict( + enumAsString=True, enableListInference=False + ) + self.assertTrue(config.parquet_options.enum_as_string) + self.assertFalse(config.parquet_options.enable_list_inference) + + def test_parquet_options_setter(self): + from google.cloud.bigquery.format_options import ParquetOptions + + parquet_options = ParquetOptions.from_api_repr( + dict(enumAsString=False, enableListInference=True) + ) + config = self._get_target_class()() + + config.parquet_options = parquet_options + self.assertEqual( + config._properties["load"]["parquetOptions"], + {"enumAsString": False, "enableListInference": True}, + ) + + def test_parquet_options_setter_clearing(self): + config = self._get_target_class()() + config._properties["load"]["parquetOptions"] = dict( + enumAsString=False, enableListInference=True + ) + + config.parquet_options = None + self.assertNotIn("parquetOptions", config._properties["load"]) diff --git a/packages/google-cloud-bigquery/tests/unit/test_external_config.py b/packages/google-cloud-bigquery/tests/unit/test_external_config.py index 648a8717ee7f..7178367ea406 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_external_config.py +++ b/packages/google-cloud-bigquery/tests/unit/test_external_config.py @@ -425,6 +425,106 @@ def test_to_api_repr_bigtable(self): self.assertEqual(got_resource, exp_resource) + def test_parquet_options_getter(self): + from google.cloud.bigquery.format_options import ParquetOptions + + parquet_options = ParquetOptions.from_api_repr( + {"enumAsString": True, "enableListInference": False} + ) + ec = external_config.ExternalConfig( + external_config.ExternalSourceFormat.PARQUET + ) + + self.assertIsNone(ec.parquet_options.enum_as_string) + self.assertIsNone(ec.parquet_options.enable_list_inference) + + ec._options = parquet_options + + self.assertTrue(ec.parquet_options.enum_as_string) + self.assertFalse(ec.parquet_options.enable_list_inference) + + self.assertIs(ec.parquet_options, ec.options) + + def test_parquet_options_getter_non_parquet_format(self): + ec = external_config.ExternalConfig(external_config.ExternalSourceFormat.CSV) + self.assertIsNone(ec.parquet_options) + + def test_parquet_options_setter(self): + from google.cloud.bigquery.format_options import ParquetOptions + + parquet_options = ParquetOptions.from_api_repr( + {"enumAsString": False, "enableListInference": True} + ) + ec = external_config.ExternalConfig( + external_config.ExternalSourceFormat.PARQUET + ) + + ec.parquet_options = parquet_options + + # Setting Parquet options should be reflected in the generic options attribute. + self.assertFalse(ec.options.enum_as_string) + self.assertTrue(ec.options.enable_list_inference) + + def test_parquet_options_setter_non_parquet_format(self): + from google.cloud.bigquery.format_options import ParquetOptions + + parquet_options = ParquetOptions.from_api_repr( + {"enumAsString": False, "enableListInference": True} + ) + ec = external_config.ExternalConfig(external_config.ExternalSourceFormat.CSV) + + with self.assertRaisesRegex(TypeError, "Cannot set.*source format is CSV"): + ec.parquet_options = parquet_options + + def test_from_api_repr_parquet(self): + from google.cloud.bigquery.format_options import ParquetOptions + + resource = _copy_and_update( + self.BASE_RESOURCE, + { + "sourceFormat": "PARQUET", + "parquetOptions": {"enumAsString": True, "enableListInference": False}, + }, + ) + + ec = external_config.ExternalConfig.from_api_repr(resource) + + self._verify_base(ec) + self.assertEqual(ec.source_format, external_config.ExternalSourceFormat.PARQUET) + self.assertIsInstance(ec.options, ParquetOptions) + self.assertTrue(ec.parquet_options.enum_as_string) + self.assertFalse(ec.parquet_options.enable_list_inference) + + got_resource = ec.to_api_repr() + + self.assertEqual(got_resource, resource) + + del resource["parquetOptions"]["enableListInference"] + ec = external_config.ExternalConfig.from_api_repr(resource) + self.assertIsNone(ec.options.enable_list_inference) + got_resource = ec.to_api_repr() + self.assertEqual(got_resource, resource) + + def test_to_api_repr_parquet(self): + from google.cloud.bigquery.format_options import ParquetOptions + + ec = external_config.ExternalConfig( + external_config.ExternalSourceFormat.PARQUET + ) + options = ParquetOptions.from_api_repr( + dict(enumAsString=False, enableListInference=True) + ) + ec._options = options + + exp_resource = { + "sourceFormat": external_config.ExternalSourceFormat.PARQUET, + "parquetOptions": {"enumAsString": False, "enableListInference": True}, + } + + got_resource = ec.to_api_repr() + + self.assertEqual(got_resource, exp_resource) + def _copy_and_update(d, u): d = copy.deepcopy(d) diff --git a/packages/google-cloud-bigquery/tests/unit/test_format_options.py b/packages/google-cloud-bigquery/tests/unit/test_format_options.py new file mode 100644 index 000000000000..ab5f9e05cbe4 --- /dev/null +++ b/packages/google-cloud-bigquery/tests/unit/test_format_options.py @@ -0,0 +1,41 @@ +# Copyright 2021 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +class TestParquetOptions: + @staticmethod + def _get_target_class(): + from google.cloud.bigquery.format_options import ParquetOptions + + return ParquetOptions + + def test_ctor(self): + config = self._get_target_class()() + assert config.enum_as_string is None + assert config.enable_list_inference is None + + def test_from_api_repr(self): + config = self._get_target_class().from_api_repr( + {"enumAsString": False, "enableListInference": True} + ) + assert not config.enum_as_string + assert config.enable_list_inference + + def test_to_api_repr(self): + config = self._get_target_class()() + config.enum_as_string = True + config.enable_list_inference = False + + result = config.to_api_repr() + assert result == {"enumAsString": True, "enableListInference": False} From 87b8a04ecf39df36e099318db0b95736a2da466f Mon Sep 17 00:00:00 2001 From: "release-please[bot]" <55107282+release-please[bot]@users.noreply.github.com> Date: Wed, 2 Jun 2021 09:48:10 +0200 Subject: [PATCH 1165/2016] chore: release 2.18.0 (#682) Co-authored-by: release-please[bot] <55107282+release-please[bot]@users.noreply.github.com> --- packages/google-cloud-bigquery/CHANGELOG.md | 7 +++++++ .../google-cloud-bigquery/google/cloud/bigquery/version.py | 2 +- 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/CHANGELOG.md b/packages/google-cloud-bigquery/CHANGELOG.md index 2a0227118793..b8abc5abb3ff 100644 --- a/packages/google-cloud-bigquery/CHANGELOG.md +++ b/packages/google-cloud-bigquery/CHANGELOG.md @@ -4,6 +4,13 @@ [1]: https://pypi.org/project/google-cloud-bigquery/#history +## [2.18.0](https://www.github.com/googleapis/python-bigquery/compare/v2.17.0...v2.18.0) (2021-06-02) + + +### Features + +* add support for Parquet options ([#679](https://www.github.com/googleapis/python-bigquery/issues/679)) ([d792ce0](https://www.github.com/googleapis/python-bigquery/commit/d792ce09388a6ee3706777915dd2818d4c854f79)) + ## [2.17.0](https://www.github.com/googleapis/python-bigquery/compare/v2.16.1...v2.17.0) (2021-05-21) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/version.py b/packages/google-cloud-bigquery/google/cloud/bigquery/version.py index 422b383cc30d..a613e5ea2f7e 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/version.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/version.py @@ -12,4 +12,4 @@ # See the License for the specific language governing permissions and # limitations under the License. -__version__ = "2.17.0" +__version__ = "2.18.0" From 1c9cb7f2debfba2cf0a7a8e05c48524b74339ab3 Mon Sep 17 00:00:00 2001 From: WhiteSource Renovate Date: Thu, 3 Jun 2021 00:02:00 +0200 Subject: [PATCH 1166/2016] chore(deps): update dependency pyarrow to v4.0.1 (#681) --- .../google-cloud-bigquery/samples/snippets/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/samples/snippets/requirements.txt b/packages/google-cloud-bigquery/samples/snippets/requirements.txt index 2ed5b0df8cbd..31a4ca5b8e11 100644 --- a/packages/google-cloud-bigquery/samples/snippets/requirements.txt +++ b/packages/google-cloud-bigquery/samples/snippets/requirements.txt @@ -8,5 +8,5 @@ matplotlib==3.3.4; python_version < '3.7' matplotlib==3.4.1; python_version >= '3.7' pandas==1.1.5; python_version < '3.7' pandas==1.2.0; python_version >= '3.7' -pyarrow==4.0.0 +pyarrow==4.0.1 pytz==2021.1 From ff64b665dd8ee6e28ef0257d604c8a5988ad5a75 Mon Sep 17 00:00:00 2001 From: WhiteSource Renovate Date: Thu, 3 Jun 2021 10:29:17 +0200 Subject: [PATCH 1167/2016] chore(deps): update dependency google-cloud-bigquery to v2.18.0 (#683) --- .../google-cloud-bigquery/samples/geography/requirements.txt | 2 +- .../google-cloud-bigquery/samples/snippets/requirements.txt | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/packages/google-cloud-bigquery/samples/geography/requirements.txt b/packages/google-cloud-bigquery/samples/geography/requirements.txt index 9fbe85970222..4577dff02f42 100644 --- a/packages/google-cloud-bigquery/samples/geography/requirements.txt +++ b/packages/google-cloud-bigquery/samples/geography/requirements.txt @@ -1,4 +1,4 @@ geojson==2.5.0 -google-cloud-bigquery==2.17.0 +google-cloud-bigquery==2.18.0 google-cloud-bigquery-storage==2.4.0 Shapely==1.7.1 diff --git a/packages/google-cloud-bigquery/samples/snippets/requirements.txt b/packages/google-cloud-bigquery/samples/snippets/requirements.txt index 31a4ca5b8e11..94fe39299774 100644 --- a/packages/google-cloud-bigquery/samples/snippets/requirements.txt +++ b/packages/google-cloud-bigquery/samples/snippets/requirements.txt @@ -1,4 +1,4 @@ -google-cloud-bigquery==2.17.0 +google-cloud-bigquery==2.18.0 google-cloud-bigquery-storage==2.4.0 google-auth-oauthlib==0.4.4 grpcio==1.38.0 From 6ca78486de455eb0504f358f8fdb7d17e1f48db2 Mon Sep 17 00:00:00 2001 From: Jim Fulton Date: Sun, 6 Jun 2021 10:18:13 -0400 Subject: [PATCH 1168/2016] feat: list_tables, list_projects, list_datasets, list_models, list_routines, and list_jobs now accept a page_size parameter to control page size (#686) --- .../google/cloud/bigquery/client.py | 39 +- packages/google-cloud-bigquery/setup.py | 2 +- .../testing/constraints-3.6.txt | 2 +- .../tests/unit/test_client.py | 494 ------------------ .../tests/unit/test_list_datasets.py | 124 +++++ .../tests/unit/test_list_jobs.py | 291 +++++++++++ .../tests/unit/test_list_models.py | 11 +- .../tests/unit/test_list_projects.py | 119 +++++ .../tests/unit/test_list_routines.py | 11 +- .../tests/unit/test_list_tables.py | 19 + 10 files changed, 605 insertions(+), 507 deletions(-) create mode 100644 packages/google-cloud-bigquery/tests/unit/test_list_datasets.py create mode 100644 packages/google-cloud-bigquery/tests/unit/test_list_jobs.py create mode 100644 packages/google-cloud-bigquery/tests/unit/test_list_projects.py diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py index 7ef3795a889a..2b7a5273ecdc 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py @@ -286,6 +286,7 @@ def list_projects( page_token: str = None, retry: retries.Retry = DEFAULT_RETRY, timeout: float = None, + page_size: int = None, ) -> page_iterator.Iterator: """List projects for the project associated with this client. @@ -294,8 +295,8 @@ def list_projects( Args: max_results (Optional[int]): - Maximum number of projects to return, If not passed, - defaults to a value set by the API. + Maximum number of projects to return. + Defaults to a value set by the API. page_token (Optional[str]): Token representing a cursor into the projects. If not passed, @@ -310,6 +311,10 @@ def list_projects( The number of seconds to wait for the underlying HTTP transport before using ``retry``. + page_size (Optional[int]): + Maximum number of projects to return in each page. + Defaults to a value set by the API. + Returns: google.api_core.page_iterator.Iterator: Iterator of :class:`~google.cloud.bigquery.client.Project` @@ -335,6 +340,7 @@ def api_request(*args, **kwargs): items_key="projects", page_token=page_token, max_results=max_results, + page_size=page_size, ) def list_datasets( @@ -346,6 +352,7 @@ def list_datasets( page_token: str = None, retry: retries.Retry = DEFAULT_RETRY, timeout: float = None, + page_size: int = None, ) -> page_iterator.Iterator: """List datasets for the project associated with this client. @@ -375,6 +382,8 @@ def list_datasets( timeout (Optional[float]): The number of seconds to wait for the underlying HTTP transport before using ``retry``. + page_size (Optional[int]): + Maximum number of datasets to return per page. Returns: google.api_core.page_iterator.Iterator: @@ -414,6 +423,7 @@ def api_request(*args, **kwargs): page_token=page_token, max_results=max_results, extra_params=extra_params, + page_size=page_size, ) def dataset(self, dataset_id: str, project: str = None) -> DatasetReference: @@ -1270,6 +1280,7 @@ def list_models( page_token: str = None, retry: retries.Retry = DEFAULT_RETRY, timeout: float = None, + page_size: int = None, ) -> page_iterator.Iterator: """[Beta] List models in the dataset. @@ -1288,7 +1299,7 @@ def list_models( to create a dataset reference from a string using :func:`google.cloud.bigquery.dataset.DatasetReference.from_string`. max_results (Optional[int]): - Maximum number of models to return. If not passed, defaults to a + Maximum number of models to return. Defaults to a value set by the API. page_token (Optional[str]): Token representing a cursor into the models. If not passed, @@ -1301,6 +1312,9 @@ def list_models( timeout (Optional[float]): The number of seconds to wait for the underlying HTTP transport before using ``retry``. + page_size (Optional[int]): + Maximum number of models to return per page. + Defaults to a value set by the API. Returns: google.api_core.page_iterator.Iterator: @@ -1331,6 +1345,7 @@ def api_request(*args, **kwargs): items_key="models", page_token=page_token, max_results=max_results, + page_size=page_size, ) result.dataset = dataset return result @@ -1342,6 +1357,7 @@ def list_routines( page_token: str = None, retry: retries.Retry = DEFAULT_RETRY, timeout: float = None, + page_size: int = None, ) -> page_iterator.Iterator: """[Beta] List routines in the dataset. @@ -1360,7 +1376,7 @@ def list_routines( to create a dataset reference from a string using :func:`google.cloud.bigquery.dataset.DatasetReference.from_string`. max_results (Optional[int]): - Maximum number of routines to return. If not passed, defaults + Maximum number of routines to return. Defaults to a value set by the API. page_token (Optional[str]): Token representing a cursor into the routines. If not passed, @@ -1373,6 +1389,9 @@ def list_routines( timeout (Optional[float]): The number of seconds to wait for the underlying HTTP transport before using ``retry``. + page_size (Optional[int]): + Maximum number of routines to return per page. + Defaults to a value set by the API. Returns: google.api_core.page_iterator.Iterator: @@ -1403,6 +1422,7 @@ def api_request(*args, **kwargs): items_key="routines", page_token=page_token, max_results=max_results, + page_size=page_size, ) result.dataset = dataset return result @@ -1414,6 +1434,7 @@ def list_tables( page_token: str = None, retry: retries.Retry = DEFAULT_RETRY, timeout: float = None, + page_size: int = None, ) -> page_iterator.Iterator: """List tables in the dataset. @@ -1432,7 +1453,7 @@ def list_tables( to create a dataset reference from a string using :func:`google.cloud.bigquery.dataset.DatasetReference.from_string`. max_results (Optional[int]): - Maximum number of tables to return. If not passed, defaults + Maximum number of tables to return. Defaults to a value set by the API. page_token (Optional[str]): Token representing a cursor into the tables. If not passed, @@ -1445,6 +1466,9 @@ def list_tables( timeout (Optional[float]): The number of seconds to wait for the underlying HTTP transport before using ``retry``. + page_size (Optional[int]): + Maximum number of tables to return per page. + Defaults to a value set by the API. Returns: google.api_core.page_iterator.Iterator: @@ -1474,6 +1498,7 @@ def api_request(*args, **kwargs): items_key="tables", page_token=page_token, max_results=max_results, + page_size=page_size, ) result.dataset = dataset return result @@ -2112,6 +2137,7 @@ def list_jobs( timeout: float = None, min_creation_time: datetime.datetime = None, max_creation_time: datetime.datetime = None, + page_size: int = None, ) -> page_iterator.Iterator: """List jobs for the project associated with this client. @@ -2157,6 +2183,8 @@ def list_jobs( Max value for job creation time. If set, only jobs created before or at this timestamp are returned. If the datetime has no time zone assumes UTC time. + page_size (Optional[int]): + Maximum number of jobs to return per page. Returns: google.api_core.page_iterator.Iterator: @@ -2208,6 +2236,7 @@ def api_request(*args, **kwargs): page_token=page_token, max_results=max_results, extra_params=extra_params, + page_size=page_size, ) def load_table_from_uri( diff --git a/packages/google-cloud-bigquery/setup.py b/packages/google-cloud-bigquery/setup.py index 6a6202ef9aa1..963eb73ec285 100644 --- a/packages/google-cloud-bigquery/setup.py +++ b/packages/google-cloud-bigquery/setup.py @@ -29,7 +29,7 @@ # 'Development Status :: 5 - Production/Stable' release_status = "Development Status :: 5 - Production/Stable" dependencies = [ - "google-api-core[grpc] >= 1.23.0, < 2.0.0dev", + "google-api-core[grpc] >= 1.29.0, < 2.0.0dev", "proto-plus >= 1.10.0", "google-cloud-core >= 1.4.1, < 2.0dev", "google-resumable-media >= 0.6.0, < 2.0dev", diff --git a/packages/google-cloud-bigquery/testing/constraints-3.6.txt b/packages/google-cloud-bigquery/testing/constraints-3.6.txt index 322373eba20c..71c9ff49ab1b 100644 --- a/packages/google-cloud-bigquery/testing/constraints-3.6.txt +++ b/packages/google-cloud-bigquery/testing/constraints-3.6.txt @@ -5,7 +5,7 @@ # # e.g., if setup.py has "foo >= 1.14.0, < 2.0.0dev", # Then this file should have foo==1.14.0 -google-api-core==1.23.0 +google-api-core==1.29.0 google-cloud-bigquery-storage==2.0.0 google-cloud-core==1.4.1 google-resumable-media==0.6.0 diff --git a/packages/google-cloud-bigquery/tests/unit/test_client.py b/packages/google-cloud-bigquery/tests/unit/test_client.py index 1346a1ef6688..7a28ef248e1b 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_client.py +++ b/packages/google-cloud-bigquery/tests/unit/test_client.py @@ -471,221 +471,6 @@ def test_get_service_account_email_w_custom_retry(self): ], ) - def test_list_projects_defaults(self): - from google.cloud.bigquery.client import Project - - PROJECT_1 = "PROJECT_ONE" - PROJECT_2 = "PROJECT_TWO" - TOKEN = "TOKEN" - DATA = { - "nextPageToken": TOKEN, - "projects": [ - { - "kind": "bigquery#project", - "id": PROJECT_1, - "numericId": 1, - "projectReference": {"projectId": PROJECT_1}, - "friendlyName": "One", - }, - { - "kind": "bigquery#project", - "id": PROJECT_2, - "numericId": 2, - "projectReference": {"projectId": PROJECT_2}, - "friendlyName": "Two", - }, - ], - } - creds = _make_credentials() - client = self._make_one(PROJECT_1, creds) - conn = client._connection = make_connection(DATA) - iterator = client.list_projects() - - with mock.patch( - "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" - ) as final_attributes: - page = next(iterator.pages) - - final_attributes.assert_called_once_with({"path": "/projects"}, client, None) - projects = list(page) - token = iterator.next_page_token - - self.assertEqual(len(projects), len(DATA["projects"])) - for found, expected in zip(projects, DATA["projects"]): - self.assertIsInstance(found, Project) - self.assertEqual(found.project_id, expected["id"]) - self.assertEqual(found.numeric_id, expected["numericId"]) - self.assertEqual(found.friendly_name, expected["friendlyName"]) - self.assertEqual(token, TOKEN) - - conn.api_request.assert_called_once_with( - method="GET", path="/projects", query_params={}, timeout=None - ) - - def test_list_projects_w_timeout(self): - PROJECT_1 = "PROJECT_ONE" - TOKEN = "TOKEN" - DATA = { - "nextPageToken": TOKEN, - "projects": [], - } - creds = _make_credentials() - client = self._make_one(PROJECT_1, creds) - conn = client._connection = make_connection(DATA) - - iterator = client.list_projects(timeout=7.5) - - with mock.patch( - "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" - ) as final_attributes: - next(iterator.pages) - - final_attributes.assert_called_once_with({"path": "/projects"}, client, None) - - conn.api_request.assert_called_once_with( - method="GET", path="/projects", query_params={}, timeout=7.5 - ) - - def test_list_projects_explicit_response_missing_projects_key(self): - TOKEN = "TOKEN" - DATA = {} - creds = _make_credentials() - client = self._make_one(self.PROJECT, creds) - conn = client._connection = make_connection(DATA) - - iterator = client.list_projects(max_results=3, page_token=TOKEN) - - with mock.patch( - "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" - ) as final_attributes: - page = next(iterator.pages) - - final_attributes.assert_called_once_with({"path": "/projects"}, client, None) - projects = list(page) - token = iterator.next_page_token - - self.assertEqual(len(projects), 0) - self.assertIsNone(token) - - conn.api_request.assert_called_once_with( - method="GET", - path="/projects", - query_params={"maxResults": 3, "pageToken": TOKEN}, - timeout=None, - ) - - def test_list_datasets_defaults(self): - from google.cloud.bigquery.dataset import DatasetListItem - - DATASET_1 = "dataset_one" - DATASET_2 = "dataset_two" - PATH = "projects/%s/datasets" % self.PROJECT - TOKEN = "TOKEN" - DATA = { - "nextPageToken": TOKEN, - "datasets": [ - { - "kind": "bigquery#dataset", - "id": "%s:%s" % (self.PROJECT, DATASET_1), - "datasetReference": { - "datasetId": DATASET_1, - "projectId": self.PROJECT, - }, - "friendlyName": None, - }, - { - "kind": "bigquery#dataset", - "id": "%s:%s" % (self.PROJECT, DATASET_2), - "datasetReference": { - "datasetId": DATASET_2, - "projectId": self.PROJECT, - }, - "friendlyName": "Two", - }, - ], - } - creds = _make_credentials() - client = self._make_one(self.PROJECT, creds) - conn = client._connection = make_connection(DATA) - - iterator = client.list_datasets() - with mock.patch( - "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" - ) as final_attributes: - page = next(iterator.pages) - - final_attributes.assert_called_once_with({"path": "/%s" % PATH}, client, None) - datasets = list(page) - token = iterator.next_page_token - - self.assertEqual(len(datasets), len(DATA["datasets"])) - for found, expected in zip(datasets, DATA["datasets"]): - self.assertIsInstance(found, DatasetListItem) - self.assertEqual(found.full_dataset_id, expected["id"]) - self.assertEqual(found.friendly_name, expected["friendlyName"]) - self.assertEqual(token, TOKEN) - - conn.api_request.assert_called_once_with( - method="GET", path="/%s" % PATH, query_params={}, timeout=None - ) - - def test_list_datasets_w_project_and_timeout(self): - creds = _make_credentials() - client = self._make_one(self.PROJECT, creds) - conn = client._connection = make_connection({}) - - with mock.patch( - "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" - ) as final_attributes: - list(client.list_datasets(project="other-project", timeout=7.5)) - - final_attributes.assert_called_once_with( - {"path": "/projects/other-project/datasets"}, client, None - ) - - conn.api_request.assert_called_once_with( - method="GET", - path="/projects/other-project/datasets", - query_params={}, - timeout=7.5, - ) - - def test_list_datasets_explicit_response_missing_datasets_key(self): - PATH = "projects/%s/datasets" % self.PROJECT - TOKEN = "TOKEN" - FILTER = "FILTER" - DATA = {} - creds = _make_credentials() - client = self._make_one(self.PROJECT, creds) - conn = client._connection = make_connection(DATA) - - iterator = client.list_datasets( - include_all=True, filter=FILTER, max_results=3, page_token=TOKEN - ) - with mock.patch( - "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" - ) as final_attributes: - page = next(iterator.pages) - - final_attributes.assert_called_once_with({"path": "/%s" % PATH}, client, None) - datasets = list(page) - token = iterator.next_page_token - - self.assertEqual(len(datasets), 0) - self.assertIsNone(token) - - conn.api_request.assert_called_once_with( - method="GET", - path="/%s" % PATH, - query_params={ - "all": True, - "filter": FILTER, - "maxResults": 3, - "pageToken": TOKEN, - }, - timeout=None, - ) - def test_dataset_with_specified_project(self): from google.cloud.bigquery.dataset import DatasetReference @@ -3239,285 +3024,6 @@ def test_cancel_job_w_timeout(self): timeout=7.5, ) - def test_list_jobs_defaults(self): - from google.cloud.bigquery.job import CopyJob - from google.cloud.bigquery.job import CreateDisposition - from google.cloud.bigquery.job import ExtractJob - from google.cloud.bigquery.job import LoadJob - from google.cloud.bigquery.job import QueryJob - from google.cloud.bigquery.job import WriteDisposition - - SOURCE_TABLE = "source_table" - DESTINATION_TABLE = "destination_table" - QUERY_DESTINATION_TABLE = "query_destination_table" - SOURCE_URI = "gs://test_bucket/src_object*" - DESTINATION_URI = "gs://test_bucket/dst_object*" - JOB_TYPES = { - "load_job": LoadJob, - "copy_job": CopyJob, - "extract_job": ExtractJob, - "query_job": QueryJob, - } - PATH = "projects/%s/jobs" % self.PROJECT - TOKEN = "TOKEN" - QUERY = "SELECT * from test_dataset:test_table" - ASYNC_QUERY_DATA = { - "id": "%s:%s" % (self.PROJECT, "query_job"), - "jobReference": {"projectId": self.PROJECT, "jobId": "query_job"}, - "state": "DONE", - "configuration": { - "query": { - "query": QUERY, - "destinationTable": { - "projectId": self.PROJECT, - "datasetId": self.DS_ID, - "tableId": QUERY_DESTINATION_TABLE, - }, - "createDisposition": CreateDisposition.CREATE_IF_NEEDED, - "writeDisposition": WriteDisposition.WRITE_TRUNCATE, - } - }, - } - EXTRACT_DATA = { - "id": "%s:%s" % (self.PROJECT, "extract_job"), - "jobReference": {"projectId": self.PROJECT, "jobId": "extract_job"}, - "state": "DONE", - "configuration": { - "extract": { - "sourceTable": { - "projectId": self.PROJECT, - "datasetId": self.DS_ID, - "tableId": SOURCE_TABLE, - }, - "destinationUris": [DESTINATION_URI], - } - }, - } - COPY_DATA = { - "id": "%s:%s" % (self.PROJECT, "copy_job"), - "jobReference": {"projectId": self.PROJECT, "jobId": "copy_job"}, - "state": "DONE", - "configuration": { - "copy": { - "sourceTables": [ - { - "projectId": self.PROJECT, - "datasetId": self.DS_ID, - "tableId": SOURCE_TABLE, - } - ], - "destinationTable": { - "projectId": self.PROJECT, - "datasetId": self.DS_ID, - "tableId": DESTINATION_TABLE, - }, - } - }, - } - LOAD_DATA = { - "id": "%s:%s" % (self.PROJECT, "load_job"), - "jobReference": {"projectId": self.PROJECT, "jobId": "load_job"}, - "state": "DONE", - "configuration": { - "load": { - "destinationTable": { - "projectId": self.PROJECT, - "datasetId": self.DS_ID, - "tableId": SOURCE_TABLE, - }, - "sourceUris": [SOURCE_URI], - } - }, - } - DATA = { - "nextPageToken": TOKEN, - "jobs": [ASYNC_QUERY_DATA, EXTRACT_DATA, COPY_DATA, LOAD_DATA], - } - creds = _make_credentials() - client = self._make_one(self.PROJECT, creds) - conn = client._connection = make_connection(DATA) - - iterator = client.list_jobs() - with mock.patch( - "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" - ) as final_attributes: - page = next(iterator.pages) - - final_attributes.assert_called_once_with({"path": "/%s" % PATH}, client, None) - jobs = list(page) - token = iterator.next_page_token - - self.assertEqual(len(jobs), len(DATA["jobs"])) - for found, expected in zip(jobs, DATA["jobs"]): - name = expected["jobReference"]["jobId"] - self.assertIsInstance(found, JOB_TYPES[name]) - self.assertEqual(found.job_id, name) - self.assertEqual(token, TOKEN) - - conn.api_request.assert_called_once_with( - method="GET", - path="/%s" % PATH, - query_params={"projection": "full"}, - timeout=None, - ) - - def test_list_jobs_load_job_wo_sourceUris(self): - from google.cloud.bigquery.job import LoadJob - - SOURCE_TABLE = "source_table" - JOB_TYPES = {"load_job": LoadJob} - PATH = "projects/%s/jobs" % self.PROJECT - TOKEN = "TOKEN" - LOAD_DATA = { - "id": "%s:%s" % (self.PROJECT, "load_job"), - "jobReference": {"projectId": self.PROJECT, "jobId": "load_job"}, - "state": "DONE", - "configuration": { - "load": { - "destinationTable": { - "projectId": self.PROJECT, - "datasetId": self.DS_ID, - "tableId": SOURCE_TABLE, - } - } - }, - } - DATA = {"nextPageToken": TOKEN, "jobs": [LOAD_DATA]} - creds = _make_credentials() - client = self._make_one(self.PROJECT, creds) - conn = client._connection = make_connection(DATA) - - iterator = client.list_jobs() - with mock.patch( - "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" - ) as final_attributes: - page = next(iterator.pages) - - final_attributes.assert_called_once_with({"path": "/%s" % PATH}, client, None) - jobs = list(page) - token = iterator.next_page_token - - self.assertEqual(len(jobs), len(DATA["jobs"])) - for found, expected in zip(jobs, DATA["jobs"]): - name = expected["jobReference"]["jobId"] - self.assertIsInstance(found, JOB_TYPES[name]) - self.assertEqual(found.job_id, name) - self.assertEqual(token, TOKEN) - - conn.api_request.assert_called_once_with( - method="GET", - path="/%s" % PATH, - query_params={"projection": "full"}, - timeout=None, - ) - - def test_list_jobs_explicit_missing(self): - PATH = "projects/%s/jobs" % self.PROJECT - DATA = {} - TOKEN = "TOKEN" - creds = _make_credentials() - client = self._make_one(self.PROJECT, creds) - conn = client._connection = make_connection(DATA) - - iterator = client.list_jobs( - max_results=1000, page_token=TOKEN, all_users=True, state_filter="done" - ) - with mock.patch( - "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" - ) as final_attributes: - page = next(iterator.pages) - - final_attributes.assert_called_once_with({"path": "/%s" % PATH}, client, None) - jobs = list(page) - token = iterator.next_page_token - - self.assertEqual(len(jobs), 0) - self.assertIsNone(token) - - conn.api_request.assert_called_once_with( - method="GET", - path="/%s" % PATH, - query_params={ - "projection": "full", - "maxResults": 1000, - "pageToken": TOKEN, - "allUsers": True, - "stateFilter": "done", - }, - timeout=None, - ) - - def test_list_jobs_w_project(self): - creds = _make_credentials() - client = self._make_one(self.PROJECT, creds) - conn = client._connection = make_connection({}) - - list(client.list_jobs(project="other-project")) - - conn.api_request.assert_called_once_with( - method="GET", - path="/projects/other-project/jobs", - query_params={"projection": "full"}, - timeout=None, - ) - - def test_list_jobs_w_timeout(self): - creds = _make_credentials() - client = self._make_one(self.PROJECT, creds) - conn = client._connection = make_connection({}) - - list(client.list_jobs(timeout=7.5)) - - conn.api_request.assert_called_once_with( - method="GET", - path="/projects/{}/jobs".format(self.PROJECT), - query_params={"projection": "full"}, - timeout=7.5, - ) - - def test_list_jobs_w_time_filter(self): - creds = _make_credentials() - client = self._make_one(self.PROJECT, creds) - conn = client._connection = make_connection({}) - - # One millisecond after the unix epoch. - start_time = datetime.datetime(1970, 1, 1, 0, 0, 0, 1000) - # One millisecond after the the 2038 31-bit signed int rollover - end_time = datetime.datetime(2038, 1, 19, 3, 14, 7, 1000) - end_time_millis = (((2 ** 31) - 1) * 1000) + 1 - - list(client.list_jobs(min_creation_time=start_time, max_creation_time=end_time)) - - conn.api_request.assert_called_once_with( - method="GET", - path="/projects/%s/jobs" % self.PROJECT, - query_params={ - "projection": "full", - "minCreationTime": "1", - "maxCreationTime": str(end_time_millis), - }, - timeout=None, - ) - - def test_list_jobs_w_parent_job_filter(self): - from google.cloud.bigquery import job - - creds = _make_credentials() - client = self._make_one(self.PROJECT, creds) - conn = client._connection = make_connection({}, {}) - - parent_job_args = ["parent-job-123", job._AsyncJob("parent-job-123", client)] - - for parent_job in parent_job_args: - list(client.list_jobs(parent_job=parent_job)) - conn.api_request.assert_called_once_with( - method="GET", - path="/projects/%s/jobs" % self.PROJECT, - query_params={"projection": "full", "parentJobId": "parent-job-123"}, - timeout=None, - ) - conn.api_request.reset_mock() - def test_load_table_from_uri(self): from google.cloud.bigquery.job import LoadJob, LoadJobConfig diff --git a/packages/google-cloud-bigquery/tests/unit/test_list_datasets.py b/packages/google-cloud-bigquery/tests/unit/test_list_datasets.py new file mode 100644 index 000000000000..7793a7ba69a7 --- /dev/null +++ b/packages/google-cloud-bigquery/tests/unit/test_list_datasets.py @@ -0,0 +1,124 @@ +# Copyright 2021 Google LLC + +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at + +# https://www.apache.org/licenses/LICENSE-2.0 + +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import mock +import pytest + +from .helpers import make_connection + + +@pytest.mark.parametrize( + "extra,query", [({}, {}), (dict(page_size=42), dict(maxResults=42))] +) +def test_list_datasets_defaults(client, PROJECT, extra, query): + from google.cloud.bigquery.dataset import DatasetListItem + + DATASET_1 = "dataset_one" + DATASET_2 = "dataset_two" + PATH = "projects/%s/datasets" % PROJECT + TOKEN = "TOKEN" + DATA = { + "nextPageToken": TOKEN, + "datasets": [ + { + "kind": "bigquery#dataset", + "id": "%s:%s" % (PROJECT, DATASET_1), + "datasetReference": {"datasetId": DATASET_1, "projectId": PROJECT}, + "friendlyName": None, + }, + { + "kind": "bigquery#dataset", + "id": "%s:%s" % (PROJECT, DATASET_2), + "datasetReference": {"datasetId": DATASET_2, "projectId": PROJECT}, + "friendlyName": "Two", + }, + ], + } + conn = client._connection = make_connection(DATA) + + iterator = client.list_datasets(**extra) + with mock.patch( + "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" + ) as final_attributes: + page = next(iterator.pages) + + final_attributes.assert_called_once_with({"path": "/%s" % PATH}, client, None) + datasets = list(page) + token = iterator.next_page_token + + assert len(datasets) == len(DATA["datasets"]) + for found, expected in zip(datasets, DATA["datasets"]): + assert isinstance(found, DatasetListItem) + assert found.full_dataset_id == expected["id"] + assert found.friendly_name == expected["friendlyName"] + assert token == TOKEN + + conn.api_request.assert_called_once_with( + method="GET", path="/%s" % PATH, query_params=query, timeout=None + ) + + +def test_list_datasets_w_project_and_timeout(client, PROJECT): + conn = client._connection = make_connection({}) + + with mock.patch( + "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" + ) as final_attributes: + list(client.list_datasets(project="other-project", timeout=7.5)) + + final_attributes.assert_called_once_with( + {"path": "/projects/other-project/datasets"}, client, None + ) + + conn.api_request.assert_called_once_with( + method="GET", + path="/projects/other-project/datasets", + query_params={}, + timeout=7.5, + ) + + +def test_list_datasets_explicit_response_missing_datasets_key(client, PROJECT): + PATH = "projects/%s/datasets" % PROJECT + TOKEN = "TOKEN" + FILTER = "FILTER" + DATA = {} + conn = client._connection = make_connection(DATA) + + iterator = client.list_datasets( + include_all=True, filter=FILTER, max_results=3, page_token=TOKEN + ) + with mock.patch( + "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" + ) as final_attributes: + page = next(iterator.pages) + + final_attributes.assert_called_once_with({"path": "/%s" % PATH}, client, None) + datasets = list(page) + token = iterator.next_page_token + + assert len(datasets) == 0 + assert token is None + + conn.api_request.assert_called_once_with( + method="GET", + path="/%s" % PATH, + query_params={ + "all": True, + "filter": FILTER, + "maxResults": 3, + "pageToken": TOKEN, + }, + timeout=None, + ) diff --git a/packages/google-cloud-bigquery/tests/unit/test_list_jobs.py b/packages/google-cloud-bigquery/tests/unit/test_list_jobs.py new file mode 100644 index 000000000000..f348be72490b --- /dev/null +++ b/packages/google-cloud-bigquery/tests/unit/test_list_jobs.py @@ -0,0 +1,291 @@ +# Copyright 2021 Google LLC + +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at + +# https://www.apache.org/licenses/LICENSE-2.0 + +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import datetime + +import mock +import pytest + +from .helpers import make_connection + + +@pytest.mark.parametrize( + "extra,query", [({}, {}), (dict(page_size=42), dict(maxResults=42))] +) +def test_list_jobs_defaults(client, PROJECT, DS_ID, extra, query): + from google.cloud.bigquery.job import CopyJob + from google.cloud.bigquery.job import CreateDisposition + from google.cloud.bigquery.job import ExtractJob + from google.cloud.bigquery.job import LoadJob + from google.cloud.bigquery.job import QueryJob + from google.cloud.bigquery.job import WriteDisposition + + SOURCE_TABLE = "source_table" + DESTINATION_TABLE = "destination_table" + QUERY_DESTINATION_TABLE = "query_destination_table" + SOURCE_URI = "gs://test_bucket/src_object*" + DESTINATION_URI = "gs://test_bucket/dst_object*" + JOB_TYPES = { + "load_job": LoadJob, + "copy_job": CopyJob, + "extract_job": ExtractJob, + "query_job": QueryJob, + } + PATH = "projects/%s/jobs" % PROJECT + TOKEN = "TOKEN" + QUERY = "SELECT * from test_dataset:test_table" + ASYNC_QUERY_DATA = { + "id": "%s:%s" % (PROJECT, "query_job"), + "jobReference": {"projectId": PROJECT, "jobId": "query_job"}, + "state": "DONE", + "configuration": { + "query": { + "query": QUERY, + "destinationTable": { + "projectId": PROJECT, + "datasetId": DS_ID, + "tableId": QUERY_DESTINATION_TABLE, + }, + "createDisposition": CreateDisposition.CREATE_IF_NEEDED, + "writeDisposition": WriteDisposition.WRITE_TRUNCATE, + } + }, + } + EXTRACT_DATA = { + "id": "%s:%s" % (PROJECT, "extract_job"), + "jobReference": {"projectId": PROJECT, "jobId": "extract_job"}, + "state": "DONE", + "configuration": { + "extract": { + "sourceTable": { + "projectId": PROJECT, + "datasetId": DS_ID, + "tableId": SOURCE_TABLE, + }, + "destinationUris": [DESTINATION_URI], + } + }, + } + COPY_DATA = { + "id": "%s:%s" % (PROJECT, "copy_job"), + "jobReference": {"projectId": PROJECT, "jobId": "copy_job"}, + "state": "DONE", + "configuration": { + "copy": { + "sourceTables": [ + {"projectId": PROJECT, "datasetId": DS_ID, "tableId": SOURCE_TABLE} + ], + "destinationTable": { + "projectId": PROJECT, + "datasetId": DS_ID, + "tableId": DESTINATION_TABLE, + }, + } + }, + } + LOAD_DATA = { + "id": "%s:%s" % (PROJECT, "load_job"), + "jobReference": {"projectId": PROJECT, "jobId": "load_job"}, + "state": "DONE", + "configuration": { + "load": { + "destinationTable": { + "projectId": PROJECT, + "datasetId": DS_ID, + "tableId": SOURCE_TABLE, + }, + "sourceUris": [SOURCE_URI], + } + }, + } + DATA = { + "nextPageToken": TOKEN, + "jobs": [ASYNC_QUERY_DATA, EXTRACT_DATA, COPY_DATA, LOAD_DATA], + } + conn = client._connection = make_connection(DATA) + + iterator = client.list_jobs(**extra) + with mock.patch( + "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" + ) as final_attributes: + page = next(iterator.pages) + + final_attributes.assert_called_once_with({"path": "/%s" % PATH}, client, None) + jobs = list(page) + token = iterator.next_page_token + + assert len(jobs) == len(DATA["jobs"]) + for found, expected in zip(jobs, DATA["jobs"]): + name = expected["jobReference"]["jobId"] + assert isinstance(found, JOB_TYPES[name]) + assert found.job_id == name + assert token == TOKEN + + conn.api_request.assert_called_once_with( + method="GET", + path="/%s" % PATH, + query_params=dict({"projection": "full"}, **query), + timeout=None, + ) + + +def test_list_jobs_load_job_wo_sourceUris(client, PROJECT, DS_ID): + from google.cloud.bigquery.job import LoadJob + + SOURCE_TABLE = "source_table" + JOB_TYPES = {"load_job": LoadJob} + PATH = "projects/%s/jobs" % PROJECT + TOKEN = "TOKEN" + LOAD_DATA = { + "id": "%s:%s" % (PROJECT, "load_job"), + "jobReference": {"projectId": PROJECT, "jobId": "load_job"}, + "state": "DONE", + "configuration": { + "load": { + "destinationTable": { + "projectId": PROJECT, + "datasetId": DS_ID, + "tableId": SOURCE_TABLE, + } + } + }, + } + DATA = {"nextPageToken": TOKEN, "jobs": [LOAD_DATA]} + conn = client._connection = make_connection(DATA) + + iterator = client.list_jobs() + with mock.patch( + "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" + ) as final_attributes: + page = next(iterator.pages) + + final_attributes.assert_called_once_with({"path": "/%s" % PATH}, client, None) + jobs = list(page) + token = iterator.next_page_token + + assert len(jobs) == len(DATA["jobs"]) + for found, expected in zip(jobs, DATA["jobs"]): + name = expected["jobReference"]["jobId"] + assert isinstance(found, JOB_TYPES[name]) + assert found.job_id == name + assert token == TOKEN + + conn.api_request.assert_called_once_with( + method="GET", + path="/%s" % PATH, + query_params={"projection": "full"}, + timeout=None, + ) + + +def test_list_jobs_explicit_missing(client, PROJECT): + PATH = "projects/%s/jobs" % PROJECT + DATA = {} + TOKEN = "TOKEN" + conn = client._connection = make_connection(DATA) + + iterator = client.list_jobs( + max_results=1000, page_token=TOKEN, all_users=True, state_filter="done" + ) + with mock.patch( + "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" + ) as final_attributes: + page = next(iterator.pages) + + final_attributes.assert_called_once_with({"path": "/%s" % PATH}, client, None) + jobs = list(page) + token = iterator.next_page_token + + assert len(jobs) == 0 + assert token is None + + conn.api_request.assert_called_once_with( + method="GET", + path="/%s" % PATH, + query_params={ + "projection": "full", + "maxResults": 1000, + "pageToken": TOKEN, + "allUsers": True, + "stateFilter": "done", + }, + timeout=None, + ) + + +def test_list_jobs_w_project(client, PROJECT): + conn = client._connection = make_connection({}) + + list(client.list_jobs(project="other-project")) + + conn.api_request.assert_called_once_with( + method="GET", + path="/projects/other-project/jobs", + query_params={"projection": "full"}, + timeout=None, + ) + + +def test_list_jobs_w_timeout(client, PROJECT): + conn = client._connection = make_connection({}) + + list(client.list_jobs(timeout=7.5)) + + conn.api_request.assert_called_once_with( + method="GET", + path="/projects/{}/jobs".format(PROJECT), + query_params={"projection": "full"}, + timeout=7.5, + ) + + +def test_list_jobs_w_time_filter(client, PROJECT): + conn = client._connection = make_connection({}) + + # One millisecond after the unix epoch. + start_time = datetime.datetime(1970, 1, 1, 0, 0, 0, 1000) + # One millisecond after the the 2038 31-bit signed int rollover + end_time = datetime.datetime(2038, 1, 19, 3, 14, 7, 1000) + end_time_millis = (((2 ** 31) - 1) * 1000) + 1 + + list(client.list_jobs(min_creation_time=start_time, max_creation_time=end_time)) + + conn.api_request.assert_called_once_with( + method="GET", + path="/projects/%s/jobs" % PROJECT, + query_params={ + "projection": "full", + "minCreationTime": "1", + "maxCreationTime": str(end_time_millis), + }, + timeout=None, + ) + + +def test_list_jobs_w_parent_job_filter(client, PROJECT): + from google.cloud.bigquery import job + + conn = client._connection = make_connection({}, {}) + + parent_job_args = ["parent-job-123", job._AsyncJob("parent-job-123", client)] + + for parent_job in parent_job_args: + list(client.list_jobs(parent_job=parent_job)) + conn.api_request.assert_called_once_with( + method="GET", + path="/projects/%s/jobs" % PROJECT, + query_params={"projection": "full", "parentJobId": "parent-job-123"}, + timeout=None, + ) + conn.api_request.reset_mock() diff --git a/packages/google-cloud-bigquery/tests/unit/test_list_models.py b/packages/google-cloud-bigquery/tests/unit/test_list_models.py index 56aa66126753..4ede9a7ddfe2 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_list_models.py +++ b/packages/google-cloud-bigquery/tests/unit/test_list_models.py @@ -33,8 +33,13 @@ def test_list_models_empty_w_timeout(client, PROJECT, DS_ID): ) +@pytest.mark.parametrize( + "extra,query", [({}, {}), (dict(page_size=42), dict(maxResults=42))] +) @dataset_polymorphic -def test_list_models_defaults(make_dataset, get_reference, client, PROJECT, DS_ID): +def test_list_models_defaults( + make_dataset, get_reference, client, PROJECT, DS_ID, extra, query, +): from google.cloud.bigquery.model import Model MODEL_1 = "model_one" @@ -64,7 +69,7 @@ def test_list_models_defaults(make_dataset, get_reference, client, PROJECT, DS_I conn = client._connection = make_connection(DATA) dataset = make_dataset(PROJECT, DS_ID) - iterator = client.list_models(dataset) + iterator = client.list_models(dataset, **extra) assert iterator.dataset == get_reference(dataset) page = next(iterator.pages) models = list(page) @@ -77,7 +82,7 @@ def test_list_models_defaults(make_dataset, get_reference, client, PROJECT, DS_I assert token == TOKEN conn.api_request.assert_called_once_with( - method="GET", path="/%s" % PATH, query_params={}, timeout=None + method="GET", path="/%s" % PATH, query_params=query, timeout=None ) diff --git a/packages/google-cloud-bigquery/tests/unit/test_list_projects.py b/packages/google-cloud-bigquery/tests/unit/test_list_projects.py new file mode 100644 index 000000000000..a88540dd524e --- /dev/null +++ b/packages/google-cloud-bigquery/tests/unit/test_list_projects.py @@ -0,0 +1,119 @@ +# Copyright 2021 Google LLC + +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at + +# https://www.apache.org/licenses/LICENSE-2.0 + +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import mock +import pytest + +from .helpers import make_connection + + +@pytest.mark.parametrize( + "extra,query", [({}, {}), (dict(page_size=42), dict(maxResults=42))] +) +def test_list_projects_defaults(client, PROJECT, extra, query): + from google.cloud.bigquery.client import Project + + PROJECT_2 = "PROJECT_TWO" + TOKEN = "TOKEN" + DATA = { + "nextPageToken": TOKEN, + "projects": [ + { + "kind": "bigquery#project", + "id": PROJECT, + "numericId": 1, + "projectReference": {"projectId": PROJECT}, + "friendlyName": "One", + }, + { + "kind": "bigquery#project", + "id": PROJECT_2, + "numericId": 2, + "projectReference": {"projectId": PROJECT_2}, + "friendlyName": "Two", + }, + ], + } + conn = client._connection = make_connection(DATA) + iterator = client.list_projects(**extra) + + with mock.patch( + "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" + ) as final_attributes: + page = next(iterator.pages) + + final_attributes.assert_called_once_with({"path": "/projects"}, client, None) + projects = list(page) + token = iterator.next_page_token + + assert len(projects) == len(DATA["projects"]) + for found, expected in zip(projects, DATA["projects"]): + assert isinstance(found, Project) + assert found.project_id == expected["id"] + assert found.numeric_id == expected["numericId"] + assert found.friendly_name == expected["friendlyName"] + assert token == TOKEN + + conn.api_request.assert_called_once_with( + method="GET", path="/projects", query_params=query, timeout=None + ) + + +def test_list_projects_w_timeout(client): + TOKEN = "TOKEN" + DATA = { + "nextPageToken": TOKEN, + "projects": [], + } + conn = client._connection = make_connection(DATA) + + iterator = client.list_projects(timeout=7.5) + + with mock.patch( + "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" + ) as final_attributes: + next(iterator.pages) + + final_attributes.assert_called_once_with({"path": "/projects"}, client, None) + + conn.api_request.assert_called_once_with( + method="GET", path="/projects", query_params={}, timeout=7.5 + ) + + +def test_list_projects_explicit_response_missing_projects_key(client): + TOKEN = "TOKEN" + DATA = {} + conn = client._connection = make_connection(DATA) + + iterator = client.list_projects(max_results=3, page_token=TOKEN) + + with mock.patch( + "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" + ) as final_attributes: + page = next(iterator.pages) + + final_attributes.assert_called_once_with({"path": "/projects"}, client, None) + projects = list(page) + token = iterator.next_page_token + + assert len(projects) == 0 + assert token is None + + conn.api_request.assert_called_once_with( + method="GET", + path="/projects", + query_params={"maxResults": 3, "pageToken": TOKEN}, + timeout=None, + ) diff --git a/packages/google-cloud-bigquery/tests/unit/test_list_routines.py b/packages/google-cloud-bigquery/tests/unit/test_list_routines.py index 714ede0d420f..0699665424b0 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_list_routines.py +++ b/packages/google-cloud-bigquery/tests/unit/test_list_routines.py @@ -34,8 +34,13 @@ def test_list_routines_empty_w_timeout(client): ) +@pytest.mark.parametrize( + "extra,query", [({}, {}), (dict(page_size=42), dict(maxResults=42))] +) @dataset_polymorphic -def test_list_routines_defaults(make_dataset, get_reference, client, PROJECT): +def test_list_routines_defaults( + make_dataset, get_reference, client, PROJECT, extra, query +): from google.cloud.bigquery.routine import Routine project_id = PROJECT @@ -67,7 +72,7 @@ def test_list_routines_defaults(make_dataset, get_reference, client, PROJECT): conn = client._connection = make_connection(resource) dataset = make_dataset(client.project, dataset_id) - iterator = client.list_routines(dataset) + iterator = client.list_routines(dataset, **extra) assert iterator.dataset == get_reference(dataset) page = next(iterator.pages) routines = list(page) @@ -80,7 +85,7 @@ def test_list_routines_defaults(make_dataset, get_reference, client, PROJECT): assert actual_token == token conn.api_request.assert_called_once_with( - method="GET", path=path, query_params={}, timeout=None + method="GET", path=path, query_params=query, timeout=None ) diff --git a/packages/google-cloud-bigquery/tests/unit/test_list_tables.py b/packages/google-cloud-bigquery/tests/unit/test_list_tables.py index 9acee95807ba..45d15bed30e0 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_list_tables.py +++ b/packages/google-cloud-bigquery/tests/unit/test_list_tables.py @@ -157,3 +157,22 @@ def test_list_tables_explicit(client, PROJECT, DS_ID): def test_list_tables_wrong_type(client): with pytest.raises(TypeError): client.list_tables(42) + + +@dataset_polymorphic +def test_list_tables_page_size(make_dataset, get_reference, client, PROJECT, DS_ID): + path = "/projects/{}/datasets/{}/tables".format(PROJECT, DS_ID) + conn = client._connection = make_connection({}) + + dataset = make_dataset(PROJECT, DS_ID) + iterator = client.list_tables(dataset, timeout=7.5, page_size=42) + assert iterator.dataset == get_reference(dataset) + page = next(iterator.pages) + tables = list(page) + token = iterator.next_page_token + + assert tables == [] + assert token is None + conn.api_request.assert_called_once_with( + method="GET", path=path, query_params=dict(maxResults=42), timeout=7.5 + ) From 3188aefb2ae3d42d698f9870dedbcde3d987f55f Mon Sep 17 00:00:00 2001 From: "release-please[bot]" <55107282+release-please[bot]@users.noreply.github.com> Date: Sun, 6 Jun 2021 08:57:21 -0600 Subject: [PATCH 1169/2016] chore: release 2.19.0 (#688) Co-authored-by: release-please[bot] <55107282+release-please[bot]@users.noreply.github.com> --- packages/google-cloud-bigquery/CHANGELOG.md | 7 +++++++ .../google-cloud-bigquery/google/cloud/bigquery/version.py | 2 +- 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/CHANGELOG.md b/packages/google-cloud-bigquery/CHANGELOG.md index b8abc5abb3ff..a7d62cd36220 100644 --- a/packages/google-cloud-bigquery/CHANGELOG.md +++ b/packages/google-cloud-bigquery/CHANGELOG.md @@ -4,6 +4,13 @@ [1]: https://pypi.org/project/google-cloud-bigquery/#history +## [2.19.0](https://www.github.com/googleapis/python-bigquery/compare/v2.18.0...v2.19.0) (2021-06-06) + + +### Features + +* list_tables, list_projects, list_datasets, list_models, list_routines, and list_jobs now accept a page_size parameter to control page size ([#686](https://www.github.com/googleapis/python-bigquery/issues/686)) ([1f1c4b7](https://www.github.com/googleapis/python-bigquery/commit/1f1c4b7ba4390fc4c5c8186bc22b83b45304ca06)) + ## [2.18.0](https://www.github.com/googleapis/python-bigquery/compare/v2.17.0...v2.18.0) (2021-06-02) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/version.py b/packages/google-cloud-bigquery/google/cloud/bigquery/version.py index a613e5ea2f7e..2605c08a3adf 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/version.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/version.py @@ -12,4 +12,4 @@ # See the License for the specific language governing permissions and # limitations under the License. -__version__ = "2.18.0" +__version__ = "2.19.0" From 77d7ca88d6980af1cdeed51751f999325e4aa48c Mon Sep 17 00:00:00 2001 From: Peter Lamut Date: Mon, 7 Jun 2021 14:55:58 +0200 Subject: [PATCH 1170/2016] feat: support script options in query job config (#690) --- .../google/cloud/bigquery/__init__.py | 4 + .../google/cloud/bigquery/enums.py | 13 +++ .../google/cloud/bigquery/job/__init__.py | 2 + .../google/cloud/bigquery/job/query.py | 96 ++++++++++++++++++- .../tests/unit/job/test_query_config.py | 56 +++++++++++ 5 files changed, 170 insertions(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/__init__.py b/packages/google-cloud-bigquery/google/cloud/bigquery/__init__.py index f031cd81d056..94f87304a975 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/__init__.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/__init__.py @@ -37,6 +37,7 @@ from google.cloud.bigquery.dataset import Dataset from google.cloud.bigquery.dataset import DatasetReference from google.cloud.bigquery import enums +from google.cloud.bigquery.enums import KeyResultStatementKind from google.cloud.bigquery.enums import SqlTypeNames from google.cloud.bigquery.enums import StandardSqlDataTypes from google.cloud.bigquery.exceptions import LegacyBigQueryStorageError @@ -62,6 +63,7 @@ from google.cloud.bigquery.job import QueryJobConfig from google.cloud.bigquery.job import QueryPriority from google.cloud.bigquery.job import SchemaUpdateOption +from google.cloud.bigquery.job import ScriptOptions from google.cloud.bigquery.job import SourceFormat from google.cloud.bigquery.job import UnknownJob from google.cloud.bigquery.job import WriteDisposition @@ -138,6 +140,7 @@ "CSVOptions", "GoogleSheetsOptions", "ParquetOptions", + "ScriptOptions", "DEFAULT_RETRY", # Enum Constants "enums", @@ -147,6 +150,7 @@ "DeterminismLevel", "ExternalSourceFormat", "Encoding", + "KeyResultStatementKind", "QueryPriority", "SchemaUpdateOption", "SourceFormat", diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/enums.py b/packages/google-cloud-bigquery/google/cloud/bigquery/enums.py index 787c2449d963..edf991b6fdd3 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/enums.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/enums.py @@ -142,6 +142,19 @@ class SourceFormat(object): """Specifies Orc format.""" +class KeyResultStatementKind: + """Determines which statement in the script represents the "key result". + + The "key result" is used to populate the schema and query results of the script job. + + https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#keyresultstatementkind + """ + + KEY_RESULT_STATEMENT_KIND_UNSPECIFIED = "KEY_RESULT_STATEMENT_KIND_UNSPECIFIED" + LAST = "LAST" + FIRST_SELECT = "FIRST_SELECT" + + _SQL_SCALAR_TYPES = frozenset( ( "INT64", diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/job/__init__.py b/packages/google-cloud-bigquery/google/cloud/bigquery/job/__init__.py index 4945841d960f..cdab92e05953 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/job/__init__.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/job/__init__.py @@ -34,6 +34,7 @@ from google.cloud.bigquery.job.query import QueryJobConfig from google.cloud.bigquery.job.query import QueryPlanEntry from google.cloud.bigquery.job.query import QueryPlanEntryStep +from google.cloud.bigquery.job.query import ScriptOptions from google.cloud.bigquery.job.query import TimelineEntry from google.cloud.bigquery.enums import Compression from google.cloud.bigquery.enums import CreateDisposition @@ -67,6 +68,7 @@ "QueryJobConfig", "QueryPlanEntry", "QueryPlanEntryStep", + "ScriptOptions", "TimelineEntry", "Compression", "CreateDisposition", diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/job/query.py b/packages/google-cloud-bigquery/google/cloud/bigquery/job/query.py index f52f9c621f3d..455ef46327d1 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/job/query.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/job/query.py @@ -18,7 +18,7 @@ import copy import re import typing -from typing import Any, Dict, Union +from typing import Any, Dict, Optional, Union from google.api_core import exceptions from google.api_core.future import polling as polling_future @@ -28,6 +28,7 @@ from google.cloud.bigquery.dataset import DatasetListItem from google.cloud.bigquery.dataset import DatasetReference from google.cloud.bigquery.encryption_configuration import EncryptionConfiguration +from google.cloud.bigquery.enums import KeyResultStatementKind from google.cloud.bigquery.external_config import ExternalConfig from google.cloud.bigquery import _helpers from google.cloud.bigquery.query import _query_param_from_api_repr @@ -113,6 +114,82 @@ def _to_api_repr_table_defs(value): return {k: ExternalConfig.to_api_repr(v) for k, v in value.items()} +class ScriptOptions: + """Options controlling the execution of scripts. + + https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#ScriptOptions + """ + + def __init__( + self, + statement_timeout_ms: Optional[int] = None, + statement_byte_budget: Optional[int] = None, + key_result_statement: Optional[KeyResultStatementKind] = None, + ): + self._properties = {} + self.statement_timeout_ms = statement_timeout_ms + self.statement_byte_budget = statement_byte_budget + self.key_result_statement = key_result_statement + + @classmethod + def from_api_repr(cls, resource: Dict[str, Any]) -> "ScriptOptions": + """Factory: construct instance from the JSON repr. + + Args: + resource(Dict[str: Any]): + ScriptOptions representation returned from API. + + Returns: + google.cloud.bigquery.ScriptOptions: + ScriptOptions sample parsed from ``resource``. + """ + entry = cls() + entry._properties = copy.deepcopy(resource) + return entry + + def to_api_repr(self) -> Dict[str, Any]: + """Construct the API resource representation.""" + return copy.deepcopy(self._properties) + + @property + def statement_timeout_ms(self) -> Union[int, None]: + """Timeout period for each statement in a script.""" + return _helpers._int_or_none(self._properties.get("statementTimeoutMs")) + + @statement_timeout_ms.setter + def statement_timeout_ms(self, value: Union[int, None]): + if value is not None: + value = str(value) + self._properties["statementTimeoutMs"] = value + + @property + def statement_byte_budget(self) -> Union[int, None]: + """Limit on the number of bytes billed per statement. + + Exceeding this budget results in an error. + """ + return _helpers._int_or_none(self._properties.get("statementByteBudget")) + + @statement_byte_budget.setter + def statement_byte_budget(self, value: Union[int, None]): + if value is not None: + value = str(value) + self._properties["statementByteBudget"] = value + + @property + def key_result_statement(self) -> Union[KeyResultStatementKind, None]: + """Determines which statement in the script represents the "key result". + + This is used to populate the schema and query results of the script job. + Default is ``KeyResultStatementKind.LAST``. + """ + return self._properties.get("keyResultStatement") + + @key_result_statement.setter + def key_result_statement(self, value: Union[KeyResultStatementKind, None]): + self._properties["keyResultStatement"] = value + + class QueryJobConfig(_JobConfig): """Configuration options for query jobs. @@ -502,6 +579,23 @@ def schema_update_options(self): def schema_update_options(self, values): self._set_sub_prop("schemaUpdateOptions", values) + @property + def script_options(self) -> ScriptOptions: + """Connection properties which can modify the query behavior. + + https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#scriptoptions + """ + prop = self._get_sub_prop("scriptOptions") + if prop is not None: + prop = ScriptOptions.from_api_repr(prop) + return prop + + @script_options.setter + def script_options(self, value: Union[ScriptOptions, None]): + if value is not None: + value = value.to_api_repr() + self._set_sub_prop("scriptOptions", value) + def to_api_repr(self) -> dict: """Build an API representation of the query job config. diff --git a/packages/google-cloud-bigquery/tests/unit/job/test_query_config.py b/packages/google-cloud-bigquery/tests/unit/job/test_query_config.py index db03d6a3b246..109cf7e44d8b 100644 --- a/packages/google-cloud-bigquery/tests/unit/job/test_query_config.py +++ b/packages/google-cloud-bigquery/tests/unit/job/test_query_config.py @@ -253,3 +253,59 @@ def test_from_api_repr_with_encryption(self): self.assertEqual( config.destination_encryption_configuration.kms_key_name, self.KMS_KEY_NAME ) + + def test_to_api_repr_with_script_options_none(self): + config = self._make_one() + config.script_options = None + + resource = config.to_api_repr() + + self.assertEqual(resource, {"query": {"scriptOptions": None}}) + self.assertIsNone(config.script_options) + + def test_to_api_repr_with_script_options(self): + from google.cloud.bigquery import KeyResultStatementKind + from google.cloud.bigquery import ScriptOptions + + config = self._make_one() + config.script_options = ScriptOptions( + statement_timeout_ms=60, + statement_byte_budget=999, + key_result_statement=KeyResultStatementKind.FIRST_SELECT, + ) + + resource = config.to_api_repr() + + expected_script_options_repr = { + "statementTimeoutMs": "60", + "statementByteBudget": "999", + "keyResultStatement": KeyResultStatementKind.FIRST_SELECT, + } + self.assertEqual( + resource, {"query": {"scriptOptions": expected_script_options_repr}} + ) + + def test_from_api_repr_with_script_options(self): + from google.cloud.bigquery import KeyResultStatementKind + from google.cloud.bigquery import ScriptOptions + + resource = { + "query": { + "scriptOptions": { + "statementTimeoutMs": "42", + "statementByteBudget": "123", + "keyResultStatement": KeyResultStatementKind.LAST, + }, + }, + } + klass = self._get_target_class() + + config = klass.from_api_repr(resource) + + script_options = config.script_options + self.assertIsInstance(script_options, ScriptOptions) + self.assertEqual(script_options.statement_timeout_ms, 42) + self.assertEqual(script_options.statement_byte_budget, 123) + self.assertEqual( + script_options.key_result_statement, KeyResultStatementKind.LAST + ) From 72e706f814509df651a2be1286ed28e670bbed9b Mon Sep 17 00:00:00 2001 From: WhiteSource Renovate Date: Mon, 7 Jun 2021 14:56:55 +0200 Subject: [PATCH 1171/2016] chore(deps): update dependency google-cloud-bigquery to v2.19.0 (#691) --- .../google-cloud-bigquery/samples/geography/requirements.txt | 2 +- .../google-cloud-bigquery/samples/snippets/requirements.txt | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/packages/google-cloud-bigquery/samples/geography/requirements.txt b/packages/google-cloud-bigquery/samples/geography/requirements.txt index 4577dff02f42..b4dae32e9c24 100644 --- a/packages/google-cloud-bigquery/samples/geography/requirements.txt +++ b/packages/google-cloud-bigquery/samples/geography/requirements.txt @@ -1,4 +1,4 @@ geojson==2.5.0 -google-cloud-bigquery==2.18.0 +google-cloud-bigquery==2.19.0 google-cloud-bigquery-storage==2.4.0 Shapely==1.7.1 diff --git a/packages/google-cloud-bigquery/samples/snippets/requirements.txt b/packages/google-cloud-bigquery/samples/snippets/requirements.txt index 94fe39299774..0188bde52bee 100644 --- a/packages/google-cloud-bigquery/samples/snippets/requirements.txt +++ b/packages/google-cloud-bigquery/samples/snippets/requirements.txt @@ -1,4 +1,4 @@ -google-cloud-bigquery==2.18.0 +google-cloud-bigquery==2.19.0 google-cloud-bigquery-storage==2.4.0 google-auth-oauthlib==0.4.4 grpcio==1.38.0 From bc4865d1f1d93a53777225ee5b91649c3f57ce9a Mon Sep 17 00:00:00 2001 From: "release-please[bot]" <55107282+release-please[bot]@users.noreply.github.com> Date: Mon, 7 Jun 2021 15:30:36 +0200 Subject: [PATCH 1172/2016] chore: release 2.20.0 (#693) Co-authored-by: release-please[bot] <55107282+release-please[bot]@users.noreply.github.com> --- packages/google-cloud-bigquery/CHANGELOG.md | 7 +++++++ .../google-cloud-bigquery/google/cloud/bigquery/version.py | 2 +- 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/CHANGELOG.md b/packages/google-cloud-bigquery/CHANGELOG.md index a7d62cd36220..b08cd98c7cbc 100644 --- a/packages/google-cloud-bigquery/CHANGELOG.md +++ b/packages/google-cloud-bigquery/CHANGELOG.md @@ -4,6 +4,13 @@ [1]: https://pypi.org/project/google-cloud-bigquery/#history +## [2.20.0](https://www.github.com/googleapis/python-bigquery/compare/v2.19.0...v2.20.0) (2021-06-07) + + +### Features + +* support script options in query job config ([#690](https://www.github.com/googleapis/python-bigquery/issues/690)) ([1259e16](https://www.github.com/googleapis/python-bigquery/commit/1259e16394784315368e8be959c1ac097782b62e)) + ## [2.19.0](https://www.github.com/googleapis/python-bigquery/compare/v2.18.0...v2.19.0) (2021-06-06) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/version.py b/packages/google-cloud-bigquery/google/cloud/bigquery/version.py index 2605c08a3adf..9fea4fece7f8 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/version.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/version.py @@ -12,4 +12,4 @@ # See the License for the specific language governing permissions and # limitations under the License. -__version__ = "2.19.0" +__version__ = "2.20.0" From 5aa264eda354230f6ed448a0f128599d02b8e498 Mon Sep 17 00:00:00 2001 From: WhiteSource Renovate Date: Tue, 8 Jun 2021 13:38:51 +0200 Subject: [PATCH 1173/2016] chore(deps): update dependency google-cloud-bigquery to v2.20.0 (#694) --- .../google-cloud-bigquery/samples/geography/requirements.txt | 2 +- .../google-cloud-bigquery/samples/snippets/requirements.txt | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/packages/google-cloud-bigquery/samples/geography/requirements.txt b/packages/google-cloud-bigquery/samples/geography/requirements.txt index b4dae32e9c24..80fa8e454acb 100644 --- a/packages/google-cloud-bigquery/samples/geography/requirements.txt +++ b/packages/google-cloud-bigquery/samples/geography/requirements.txt @@ -1,4 +1,4 @@ geojson==2.5.0 -google-cloud-bigquery==2.19.0 +google-cloud-bigquery==2.20.0 google-cloud-bigquery-storage==2.4.0 Shapely==1.7.1 diff --git a/packages/google-cloud-bigquery/samples/snippets/requirements.txt b/packages/google-cloud-bigquery/samples/snippets/requirements.txt index 0188bde52bee..391c85ae331a 100644 --- a/packages/google-cloud-bigquery/samples/snippets/requirements.txt +++ b/packages/google-cloud-bigquery/samples/snippets/requirements.txt @@ -1,4 +1,4 @@ -google-cloud-bigquery==2.19.0 +google-cloud-bigquery==2.20.0 google-cloud-bigquery-storage==2.4.0 google-auth-oauthlib==0.4.4 grpcio==1.38.0 From fd1f5d4a0ec342f0aa981d092e828b0cb3426671 Mon Sep 17 00:00:00 2001 From: Anthonios Partheniou Date: Wed, 16 Jun 2021 07:17:37 -0400 Subject: [PATCH 1174/2016] chore: pin sphinx to version 4.0.1 (#701) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * chore: pin sphinx to version 4.0.1 Fixes #700 * 🦉 Updates from OwlBot Co-authored-by: Owl Bot --- packages/google-cloud-bigquery/noxfile.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/packages/google-cloud-bigquery/noxfile.py b/packages/google-cloud-bigquery/noxfile.py index a52025635812..662abbd7848a 100644 --- a/packages/google-cloud-bigquery/noxfile.py +++ b/packages/google-cloud-bigquery/noxfile.py @@ -271,7 +271,7 @@ def blacken(session): def docs(session): """Build the docs.""" - session.install("ipython", "recommonmark", "sphinx", "sphinx_rtd_theme") + session.install("ipython", "recommonmark", "sphinx==4.0.1", "sphinx_rtd_theme") session.install("google-cloud-storage") session.install("-e", ".[all]") @@ -295,7 +295,9 @@ def docfx(session): """Build the docfx yaml files for this library.""" session.install("-e", ".") - session.install("sphinx", "alabaster", "recommonmark", "gcp-sphinx-docfx-yaml") + session.install( + "sphinx==4.0.1", "alabaster", "recommonmark", "gcp-sphinx-docfx-yaml" + ) shutil.rmtree(os.path.join("docs", "_build"), ignore_errors=True) session.run( From ee58fb814b0b1b129591de4d48f0f34f8a7854e2 Mon Sep 17 00:00:00 2001 From: "gcf-owl-bot[bot]" <78513119+gcf-owl-bot[bot]@users.noreply.github.com> Date: Thu, 17 Jun 2021 06:02:02 +0000 Subject: [PATCH 1175/2016] chore: remove u'' prefixes from strings in docs/conf.py (#702) Post-Processor: gcr.io/repo-automation-bots/owlbot-python:latest@sha256:58c7342b0bccf85028100adaa3d856cb4a871c22ca9c01960d996e66c40548ce --- .../google-cloud-bigquery/.github/.OwlBot.lock.yaml | 2 +- packages/google-cloud-bigquery/docs/conf.py | 12 ++++++------ 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/packages/google-cloud-bigquery/.github/.OwlBot.lock.yaml b/packages/google-cloud-bigquery/.github/.OwlBot.lock.yaml index da616c91a3b6..ea06d395ea2b 100644 --- a/packages/google-cloud-bigquery/.github/.OwlBot.lock.yaml +++ b/packages/google-cloud-bigquery/.github/.OwlBot.lock.yaml @@ -1,3 +1,3 @@ docker: image: gcr.io/repo-automation-bots/owlbot-python:latest - digest: sha256:c66ba3c8d7bc8566f47df841f98cd0097b28fff0b1864c86f5817f4c8c3e8600 + digest: sha256:58c7342b0bccf85028100adaa3d856cb4a871c22ca9c01960d996e66c40548ce diff --git a/packages/google-cloud-bigquery/docs/conf.py b/packages/google-cloud-bigquery/docs/conf.py index 1275fe3f1107..cb347160de42 100644 --- a/packages/google-cloud-bigquery/docs/conf.py +++ b/packages/google-cloud-bigquery/docs/conf.py @@ -80,9 +80,9 @@ master_doc = "index" # General information about the project. -project = u"google-cloud-bigquery" -copyright = u"2019, Google" -author = u"Google APIs" +project = "google-cloud-bigquery" +copyright = "2019, Google" +author = "Google APIs" # The version info for the project you're documenting, acts as replacement for # |version| and |release|, also used in various other places throughout the @@ -282,7 +282,7 @@ ( master_doc, "google-cloud-bigquery.tex", - u"google-cloud-bigquery Documentation", + "google-cloud-bigquery Documentation", author, "manual", ) @@ -317,7 +317,7 @@ ( master_doc, "google-cloud-bigquery", - u"google-cloud-bigquery Documentation", + "google-cloud-bigquery Documentation", [author], 1, ) @@ -336,7 +336,7 @@ ( master_doc, "google-cloud-bigquery", - u"google-cloud-bigquery Documentation", + "google-cloud-bigquery Documentation", author, "google-cloud-bigquery", "google-cloud-bigquery Library", From d5f4e2cc70004dbf8e3714fc2ddc5d6d0bd28027 Mon Sep 17 00:00:00 2001 From: Peter Lamut Date: Thu, 17 Jun 2021 22:59:26 +0200 Subject: [PATCH 1176/2016] test: add column ACLs test with real policy tag (#678) * test: add column ACLs test with real policy tag * Use v1 version of the datacatalog client * Install datacatalog in pre-releease tests * Adjust test to actually make it work * Make sure taxonomy is properly cleaned up --- packages/google-cloud-bigquery/noxfile.py | 4 ++ .../tests/system/test_client.py | 68 +++++++++++++++++++ 2 files changed, 72 insertions(+) diff --git a/packages/google-cloud-bigquery/noxfile.py b/packages/google-cloud-bigquery/noxfile.py index 662abbd7848a..0dfe7bf93350 100644 --- a/packages/google-cloud-bigquery/noxfile.py +++ b/packages/google-cloud-bigquery/noxfile.py @@ -142,6 +142,9 @@ def system(session): else: session.install("google-cloud-storage", "-c", constraints_path) + # Data Catalog needed for the column ACL test with a real Policy Tag. + session.install("google-cloud-datacatalog", "-c", constraints_path) + session.install("-e", ".[all]", "-c", constraints_path) session.install("ipython", "-c", constraints_path) @@ -211,6 +214,7 @@ def prerelease_deps(session): session.install("--pre", "grpcio", "pandas") session.install( "freezegun", + "google-cloud-datacatalog", "google-cloud-storage", "google-cloud-testutils", "IPython", diff --git a/packages/google-cloud-bigquery/tests/system/test_client.py b/packages/google-cloud-bigquery/tests/system/test_client.py index b4b0c053d431..f91004eac033 100644 --- a/packages/google-cloud-bigquery/tests/system/test_client.py +++ b/packages/google-cloud-bigquery/tests/system/test_client.py @@ -68,6 +68,8 @@ from google.cloud._helpers import UTC from google.cloud.bigquery import dbapi, enums from google.cloud import storage +from google.cloud.datacatalog_v1 import types as datacatalog_types +from google.cloud.datacatalog_v1 import PolicyTagManagerClient from test_utils.retry import RetryErrors from test_utils.retry import RetryInstanceState @@ -167,6 +169,8 @@ def setUp(self): self.to_delete = [dataset] def tearDown(self): + policy_tag_client = PolicyTagManagerClient() + def _still_in_use(bad_request): return any( error["reason"] == "resourceInUse" for error in bad_request._errors @@ -183,6 +187,8 @@ def _still_in_use(bad_request): retry_in_use(Config.CLIENT.delete_dataset)(doomed, delete_contents=True) elif isinstance(doomed, (Table, bigquery.TableReference)): retry_in_use(Config.CLIENT.delete_table)(doomed) + elif isinstance(doomed, datacatalog_types.Taxonomy): + policy_tag_client.delete_taxonomy(name=doomed.name) else: doomed.delete() @@ -381,6 +387,68 @@ def test_create_table_with_policy(self): table2 = Config.CLIENT.update_table(table, ["schema"]) self.assertEqual(policy_2, table2.schema[1].policy_tags) + def test_create_table_with_real_custom_policy(self): + from google.cloud.bigquery.schema import PolicyTagList + + policy_tag_client = PolicyTagManagerClient() + taxonomy_parent = f"projects/{Config.CLIENT.project}/locations/us" + + new_taxonomy = datacatalog_types.Taxonomy( + display_name="Custom test taxonomy", + description="This taxonomy is ony used for a test.", + activated_policy_types=[ + datacatalog_types.Taxonomy.PolicyType.FINE_GRAINED_ACCESS_CONTROL + ], + ) + + taxonomy = policy_tag_client.create_taxonomy( + parent=taxonomy_parent, taxonomy=new_taxonomy + ) + self.to_delete.insert(0, taxonomy) + + parent_policy_tag = policy_tag_client.create_policy_tag( + parent=taxonomy.name, + policy_tag=datacatalog_types.PolicyTag( + display_name="Parent policy tag", parent_policy_tag=None + ), + ) + child_policy_tag = policy_tag_client.create_policy_tag( + parent=taxonomy.name, + policy_tag=datacatalog_types.PolicyTag( + display_name="Child policy tag", + parent_policy_tag=parent_policy_tag.name, + ), + ) + + dataset = self.temp_dataset( + _make_dataset_id("create_table_with_real_custom_policy") + ) + table_id = "test_table" + policy_1 = PolicyTagList(names=[parent_policy_tag.name]) + policy_2 = PolicyTagList(names=[child_policy_tag.name]) + + schema = [ + bigquery.SchemaField( + "first_name", "STRING", mode="REQUIRED", policy_tags=policy_1 + ), + bigquery.SchemaField( + "age", "INTEGER", mode="REQUIRED", policy_tags=policy_2 + ), + ] + table_arg = Table(dataset.table(table_id), schema=schema) + self.assertFalse(_table_exists(table_arg)) + + table = helpers.retry_403(Config.CLIENT.create_table)(table_arg) + self.to_delete.insert(0, table) + + self.assertTrue(_table_exists(table)) + self.assertCountEqual( + list(table.schema[0].policy_tags.names), [parent_policy_tag.name] + ) + self.assertCountEqual( + list(table.schema[1].policy_tags.names), [child_policy_tag.name] + ) + def test_create_table_w_time_partitioning_w_clustering_fields(self): from google.cloud.bigquery.table import TimePartitioning from google.cloud.bigquery.table import TimePartitioningType From 1ad8ac036f938f765f373bfe162f10bc0a5b1109 Mon Sep 17 00:00:00 2001 From: Peter Lamut Date: Fri, 18 Jun 2021 22:35:35 +0200 Subject: [PATCH 1177/2016] chore: resolve deprecation warnings from sys tests (#705) --- .../tests/system/test_client.py | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) diff --git a/packages/google-cloud-bigquery/tests/system/test_client.py b/packages/google-cloud-bigquery/tests/system/test_client.py index f91004eac033..c4caadbe99f3 100644 --- a/packages/google-cloud-bigquery/tests/system/test_client.py +++ b/packages/google-cloud-bigquery/tests/system/test_client.py @@ -1123,7 +1123,7 @@ def test_extract_table(self): job.result(timeout=100) self.to_delete.insert(0, destination) - got_bytes = retry_storage_errors(destination.download_as_string)() + got_bytes = retry_storage_errors(destination.download_as_bytes)() got = got_bytes.decode("utf-8") self.assertIn("Bharney Rhubble", got) @@ -2178,15 +2178,11 @@ def test_nested_table_to_arrow(self): self.assertEqual(tbl.num_rows, 1) self.assertEqual(tbl.num_columns, 3) # Columns may not appear in the requested order. - self.assertTrue( - pyarrow.types.is_float64(tbl.schema.field_by_name("float_col").type) - ) - self.assertTrue( - pyarrow.types.is_string(tbl.schema.field_by_name("string_col").type) - ) - record_col = tbl.schema.field_by_name("record_col").type + self.assertTrue(pyarrow.types.is_float64(tbl.schema.field("float_col").type)) + self.assertTrue(pyarrow.types.is_string(tbl.schema.field("string_col").type)) + record_col = tbl.schema.field("record_col").type self.assertTrue(pyarrow.types.is_struct(record_col)) - self.assertEqual(record_col.num_children, 2) + self.assertEqual(record_col.num_fields, 2) self.assertEqual(record_col[0].name, "nested_string") self.assertTrue(pyarrow.types.is_string(record_col[0].type)) self.assertEqual(record_col[1].name, "nested_repeated") From db528baff4162af14eca562facdfc820b203e898 Mon Sep 17 00:00:00 2001 From: "gcf-owl-bot[bot]" <78513119+gcf-owl-bot[bot]@users.noreply.github.com> Date: Sat, 19 Jun 2021 02:26:03 +0000 Subject: [PATCH 1178/2016] docs: omit mention of Python 2.7 in `CONTRIBUTING.rst` (#706) Source-Link: https://github.com/googleapis/synthtool/commit/b91f129527853d5b756146a0b5044481fb4e09a8 Post-Processor: gcr.io/repo-automation-bots/owlbot-python:latest@sha256:b6169fc6a5207b11800a7c002d0c5c2bc6d82697185ca12e666f44031468cfcd --- packages/google-cloud-bigquery/.github/.OwlBot.lock.yaml | 2 +- packages/google-cloud-bigquery/CONTRIBUTING.rst | 7 ++----- 2 files changed, 3 insertions(+), 6 deletions(-) diff --git a/packages/google-cloud-bigquery/.github/.OwlBot.lock.yaml b/packages/google-cloud-bigquery/.github/.OwlBot.lock.yaml index ea06d395ea2b..cc49c6a3dfac 100644 --- a/packages/google-cloud-bigquery/.github/.OwlBot.lock.yaml +++ b/packages/google-cloud-bigquery/.github/.OwlBot.lock.yaml @@ -1,3 +1,3 @@ docker: image: gcr.io/repo-automation-bots/owlbot-python:latest - digest: sha256:58c7342b0bccf85028100adaa3d856cb4a871c22ca9c01960d996e66c40548ce + digest: sha256:b6169fc6a5207b11800a7c002d0c5c2bc6d82697185ca12e666f44031468cfcd diff --git a/packages/google-cloud-bigquery/CONTRIBUTING.rst b/packages/google-cloud-bigquery/CONTRIBUTING.rst index 20ba9e62efee..a9b389e83771 100644 --- a/packages/google-cloud-bigquery/CONTRIBUTING.rst +++ b/packages/google-cloud-bigquery/CONTRIBUTING.rst @@ -69,7 +69,6 @@ We use `nox `__ to instrument our tests. - To test your changes, run unit tests with ``nox``:: - $ nox -s unit-2.7 $ nox -s unit-3.8 $ ... @@ -144,7 +143,6 @@ Running System Tests # Run all system tests $ nox -s system-3.8 - $ nox -s system-2.7 # Run a single system test $ nox -s system-3.8 -- -k @@ -152,9 +150,8 @@ Running System Tests .. note:: - System tests are only configured to run under Python 2.7 and - Python 3.8. For expediency, we do not run them in older versions - of Python 3. + System tests are only configured to run under Python 3.8. + For expediency, we do not run them in older versions of Python 3. This alone will not run the tests. You'll need to change some local auth settings and change some configuration in your project to From 83007c6cc3763ddc1e8db5e518763a2b512cbe97 Mon Sep 17 00:00:00 2001 From: "gcf-owl-bot[bot]" <78513119+gcf-owl-bot[bot]@users.noreply.github.com> Date: Sun, 20 Jun 2021 01:04:03 +0000 Subject: [PATCH 1179/2016] chore: update precommit hook pre-commit/pre-commit-hooks to v4 (#1083) (#709) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [![WhiteSource Renovate](https://app.renovatebot.com/images/banner.svg)](https://renovatebot.com) This PR contains the following updates: | Package | Type | Update | Change | |---|---|---|---| | [pre-commit/pre-commit-hooks](https://togithub.com/pre-commit/pre-commit-hooks) | repository | major | `v3.4.0` -> `v4.0.1` | --- ### Release Notes
pre-commit/pre-commit-hooks ### [`v4.0.1`](https://togithub.com/pre-commit/pre-commit-hooks/releases/v4.0.1) [Compare Source](https://togithub.com/pre-commit/pre-commit-hooks/compare/v4.0.0...v4.0.1) ##### Fixes - `check-shebang-scripts-are-executable` fix entry point. - [#​602](https://togithub.com/pre-commit/pre-commit-hooks/issues/602) issue by [@​Person-93](https://togithub.com/Person-93). - [#​603](https://togithub.com/pre-commit/pre-commit-hooks/issues/603) PR by [@​scop](https://togithub.com/scop). ### [`v4.0.0`](https://togithub.com/pre-commit/pre-commit-hooks/releases/v4.0.0) [Compare Source](https://togithub.com/pre-commit/pre-commit-hooks/compare/v3.4.0...v4.0.0) ##### Features - `check-json`: report duplicate keys. - [#​558](https://togithub.com/pre-commit/pre-commit-hooks/issues/558) PR by [@​AdityaKhursale](https://togithub.com/AdityaKhursale). - [#​554](https://togithub.com/pre-commit/pre-commit-hooks/issues/554) issue by [@​adamchainz](https://togithub.com/adamchainz). - `no-commit-to-branch`: add `main` to default blocked branches. - [#​565](https://togithub.com/pre-commit/pre-commit-hooks/issues/565) PR by [@​ndevenish](https://togithub.com/ndevenish). - `check-case-conflict`: check conflicts in directory names as well. - [#​575](https://togithub.com/pre-commit/pre-commit-hooks/issues/575) PR by [@​slsyy](https://togithub.com/slsyy). - [#​70](https://togithub.com/pre-commit/pre-commit-hooks/issues/70) issue by [@​andyjack](https://togithub.com/andyjack). - `check-vcs-permalinks`: forbid other branch names. - [#​582](https://togithub.com/pre-commit/pre-commit-hooks/issues/582) PR by [@​jack1142](https://togithub.com/jack1142). - [#​581](https://togithub.com/pre-commit/pre-commit-hooks/issues/581) issue by [@​jack1142](https://togithub.com/jack1142). - `check-shebang-scripts-are-executable`: new hook which ensures shebang'd scripts are executable. - [#​545](https://togithub.com/pre-commit/pre-commit-hooks/issues/545) PR by [@​scop](https://togithub.com/scop). ##### Fixes - `check-executables-have-shebangs`: Short circuit shebang lookup on windows. - [#​544](https://togithub.com/pre-commit/pre-commit-hooks/issues/544) PR by [@​scop](https://togithub.com/scop). - `requirements-txt-fixer`: Fix comments which have indentation - [#​549](https://togithub.com/pre-commit/pre-commit-hooks/issues/549) PR by [@​greshilov](https://togithub.com/greshilov). - [#​548](https://togithub.com/pre-commit/pre-commit-hooks/issues/548) issue by [@​greshilov](https://togithub.com/greshilov). - `pretty-format-json`: write to stdout using UTF-8 encoding. - [#​571](https://togithub.com/pre-commit/pre-commit-hooks/issues/571) PR by [@​jack1142](https://togithub.com/jack1142). - [#​570](https://togithub.com/pre-commit/pre-commit-hooks/issues/570) issue by [@​jack1142](https://togithub.com/jack1142). - Use more inclusive language. - [#​599](https://togithub.com/pre-commit/pre-commit-hooks/issues/599) PR by [@​asottile](https://togithub.com/asottile). ##### Breaking changes - Remove deprecated hooks: `flake8`, `pyflakes`, `autopep8-wrapper`. - [#​597](https://togithub.com/pre-commit/pre-commit-hooks/issues/597) PR by [@​asottile](https://togithub.com/asottile).
--- ### Configuration 📅 **Schedule**: At any time (no schedule defined). 🚦 **Automerge**: Disabled by config. Please merge this manually once you are satisfied. ♻️ **Rebasing**: Renovate will not automatically rebase this PR, because other commits have been found. 🔕 **Ignore**: Close this PR and you won't be reminded about this update again. --- - [ ] If you want to rebase/retry this PR, check this box. --- This PR has been generated by [WhiteSource Renovate](https://renovate.whitesourcesoftware.com). View repository job log [here](https://app.renovatebot.com/dashboard#github/googleapis/synthtool). Source-Link: https://github.com/googleapis/synthtool/commit/333fd90856f1454380514bc59fc0936cdaf1c202 Post-Processor: gcr.io/repo-automation-bots/owlbot-python:latest@sha256:b8c131c558606d3cea6e18f8e87befbd448c1482319b0db3c5d5388fa6ea72e3 --- packages/google-cloud-bigquery/.github/.OwlBot.lock.yaml | 2 +- packages/google-cloud-bigquery/.pre-commit-config.yaml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/packages/google-cloud-bigquery/.github/.OwlBot.lock.yaml b/packages/google-cloud-bigquery/.github/.OwlBot.lock.yaml index cc49c6a3dfac..9602d540595e 100644 --- a/packages/google-cloud-bigquery/.github/.OwlBot.lock.yaml +++ b/packages/google-cloud-bigquery/.github/.OwlBot.lock.yaml @@ -1,3 +1,3 @@ docker: image: gcr.io/repo-automation-bots/owlbot-python:latest - digest: sha256:b6169fc6a5207b11800a7c002d0c5c2bc6d82697185ca12e666f44031468cfcd + digest: sha256:b8c131c558606d3cea6e18f8e87befbd448c1482319b0db3c5d5388fa6ea72e3 diff --git a/packages/google-cloud-bigquery/.pre-commit-config.yaml b/packages/google-cloud-bigquery/.pre-commit-config.yaml index 4f00c7cffcfd..62eb5a77d9a3 100644 --- a/packages/google-cloud-bigquery/.pre-commit-config.yaml +++ b/packages/google-cloud-bigquery/.pre-commit-config.yaml @@ -16,7 +16,7 @@ # See https://pre-commit.com/hooks.html for more hooks repos: - repo: https://github.com/pre-commit/pre-commit-hooks - rev: v3.4.0 + rev: v4.0.1 hooks: - id: trailing-whitespace - id: end-of-file-fixer From 7b808ddfd6de1a8462f47af7f33d1783a174b156 Mon Sep 17 00:00:00 2001 From: Peter Lamut Date: Mon, 21 Jun 2021 16:18:27 +0200 Subject: [PATCH 1180/2016] feat: enable unsetting policy tags on schema fields (#703) * feat: enable unsetting policy tags on schema fields * Adjust API representation for STRUCT schema fields * De-dup logic for converting None policy tags --- .../google/cloud/bigquery/schema.py | 45 ++++- .../tests/system/test_client.py | 50 ++++++ .../tests/unit/job/test_load_config.py | 4 + .../tests/unit/test_client.py | 49 +++++- .../tests/unit/test_external_config.py | 9 +- .../tests/unit/test_schema.py | 154 ++++++++++++++++-- 6 files changed, 279 insertions(+), 32 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/schema.py b/packages/google-cloud-bigquery/google/cloud/bigquery/schema.py index 919d78b232ad..157db7ce60e4 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/schema.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/schema.py @@ -15,6 +15,7 @@ """Schemas for BigQuery tables / queries.""" import collections +from typing import Optional from google.cloud.bigquery_v2 import types @@ -105,7 +106,26 @@ def __init__( if max_length is not _DEFAULT_VALUE: self._properties["maxLength"] = max_length self._fields = tuple(fields) - self._policy_tags = policy_tags + + self._policy_tags = self._determine_policy_tags(field_type, policy_tags) + + @staticmethod + def _determine_policy_tags( + field_type: str, given_policy_tags: Optional["PolicyTagList"] + ) -> Optional["PolicyTagList"]: + """Return the given policy tags, or their suitable representation if `None`. + + Args: + field_type: The type of the schema field. + given_policy_tags: The policy tags to maybe ajdust. + """ + if given_policy_tags is not None: + return given_policy_tags + + if field_type is not None and field_type.upper() in _STRUCT_TYPES: + return None + + return PolicyTagList() @staticmethod def __get_int(api_repr, name): @@ -126,18 +146,24 @@ def from_api_repr(cls, api_repr: dict) -> "SchemaField": Returns: google.cloud.biquery.schema.SchemaField: The ``SchemaField`` object. """ + field_type = api_repr["type"].upper() + # Handle optional properties with default values mode = api_repr.get("mode", "NULLABLE") description = api_repr.get("description", _DEFAULT_VALUE) fields = api_repr.get("fields", ()) + policy_tags = cls._determine_policy_tags( + field_type, PolicyTagList.from_api_repr(api_repr.get("policyTags")) + ) + return cls( - field_type=api_repr["type"].upper(), + field_type=field_type, fields=[cls.from_api_repr(f) for f in fields], mode=mode.upper(), description=description, name=api_repr["name"], - policy_tags=PolicyTagList.from_api_repr(api_repr.get("policyTags")), + policy_tags=policy_tags, precision=cls.__get_int(api_repr, "precision"), scale=cls.__get_int(api_repr, "scale"), max_length=cls.__get_int(api_repr, "maxLength"), @@ -218,9 +244,9 @@ def to_api_repr(self) -> dict: # add this to the serialized representation. if self.field_type.upper() in _STRUCT_TYPES: answer["fields"] = [f.to_api_repr() for f in self.fields] - - # If this contains a policy tag definition, include that as well: - if self.policy_tags is not None: + else: + # Explicitly include policy tag definition (we must not do it for RECORD + # fields, because those are not leaf fields). answer["policyTags"] = self.policy_tags.to_api_repr() # Done; return the serialized dictionary. @@ -244,6 +270,11 @@ def _key(self): field_type = f"{field_type}({self.precision}, {self.scale})" else: field_type = f"{field_type}({self.precision})" + + policy_tags = ( + () if self._policy_tags is None else tuple(sorted(self._policy_tags.names)) + ) + return ( self.name, field_type, @@ -251,7 +282,7 @@ def _key(self): self.mode.upper(), # pytype: disable=attribute-error self.description, self._fields, - self._policy_tags, + policy_tags, ) def to_standard_sql(self) -> types.StandardSqlField: diff --git a/packages/google-cloud-bigquery/tests/system/test_client.py b/packages/google-cloud-bigquery/tests/system/test_client.py index c4caadbe99f3..ce3021399038 100644 --- a/packages/google-cloud-bigquery/tests/system/test_client.py +++ b/packages/google-cloud-bigquery/tests/system/test_client.py @@ -653,6 +653,56 @@ def test_update_table_schema(self): self.assertEqual(found.field_type, expected.field_type) self.assertEqual(found.mode, expected.mode) + def test_unset_table_schema_attributes(self): + from google.cloud.bigquery.schema import PolicyTagList + + dataset = self.temp_dataset(_make_dataset_id("unset_policy_tags")) + table_id = "test_table" + policy_tags = PolicyTagList( + names=[ + "projects/{}/locations/us/taxonomies/1/policyTags/2".format( + Config.CLIENT.project + ), + ] + ) + + schema = [ + bigquery.SchemaField("full_name", "STRING", mode="REQUIRED"), + bigquery.SchemaField( + "secret_int", + "INTEGER", + mode="REQUIRED", + description="This field is numeric", + policy_tags=policy_tags, + ), + ] + table_arg = Table(dataset.table(table_id), schema=schema) + self.assertFalse(_table_exists(table_arg)) + + table = helpers.retry_403(Config.CLIENT.create_table)(table_arg) + self.to_delete.insert(0, table) + + self.assertTrue(_table_exists(table)) + self.assertEqual(policy_tags, table.schema[1].policy_tags) + + # Amend the schema to replace the policy tags + new_schema = table.schema[:] + old_field = table.schema[1] + new_schema[1] = bigquery.SchemaField( + name=old_field.name, + field_type=old_field.field_type, + mode=old_field.mode, + description=None, + fields=old_field.fields, + policy_tags=None, + ) + + table.schema = new_schema + updated_table = Config.CLIENT.update_table(table, ["schema"]) + + self.assertFalse(updated_table.schema[1].description) # Empty string or None. + self.assertEqual(updated_table.schema[1].policy_tags.names, ()) + def test_update_table_clustering_configuration(self): dataset = self.temp_dataset(_make_dataset_id("update_table")) diff --git a/packages/google-cloud-bigquery/tests/unit/job/test_load_config.py b/packages/google-cloud-bigquery/tests/unit/job/test_load_config.py index b0729e428c0d..eafe7e0462df 100644 --- a/packages/google-cloud-bigquery/tests/unit/job/test_load_config.py +++ b/packages/google-cloud-bigquery/tests/unit/job/test_load_config.py @@ -434,11 +434,13 @@ def test_schema_setter_fields(self): "name": "full_name", "type": "STRING", "mode": "REQUIRED", + "policyTags": {"names": []}, } age_repr = { "name": "age", "type": "INTEGER", "mode": "REQUIRED", + "policyTags": {"names": []}, } self.assertEqual( config._properties["load"]["schema"], {"fields": [full_name_repr, age_repr]} @@ -451,11 +453,13 @@ def test_schema_setter_valid_mappings_list(self): "name": "full_name", "type": "STRING", "mode": "REQUIRED", + "policyTags": {"names": []}, } age_repr = { "name": "age", "type": "INTEGER", "mode": "REQUIRED", + "policyTags": {"names": []}, } schema = [full_name_repr, age_repr] config.schema = schema diff --git a/packages/google-cloud-bigquery/tests/unit/test_client.py b/packages/google-cloud-bigquery/tests/unit/test_client.py index 7a28ef248e1b..f6811e207c56 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_client.py +++ b/packages/google-cloud-bigquery/tests/unit/test_client.py @@ -1019,8 +1019,18 @@ def test_create_table_w_schema_and_query(self): { "schema": { "fields": [ - {"name": "full_name", "type": "STRING", "mode": "REQUIRED"}, - {"name": "age", "type": "INTEGER", "mode": "REQUIRED"}, + { + "name": "full_name", + "type": "STRING", + "mode": "REQUIRED", + "policyTags": {"names": []}, + }, + { + "name": "age", + "type": "INTEGER", + "mode": "REQUIRED", + "policyTags": {"names": []}, + }, ] }, "view": {"query": query}, @@ -1054,8 +1064,18 @@ def test_create_table_w_schema_and_query(self): }, "schema": { "fields": [ - {"name": "full_name", "type": "STRING", "mode": "REQUIRED"}, - {"name": "age", "type": "INTEGER", "mode": "REQUIRED"}, + { + "name": "full_name", + "type": "STRING", + "mode": "REQUIRED", + "policyTags": {"names": []}, + }, + { + "name": "age", + "type": "INTEGER", + "mode": "REQUIRED", + "policyTags": {"names": []}, + }, ] }, "view": {"query": query, "useLegacySql": False}, @@ -2000,12 +2020,14 @@ def test_update_table(self): "type": "STRING", "mode": "REQUIRED", "description": None, + "policyTags": {"names": []}, }, { "name": "age", "type": "INTEGER", "mode": "REQUIRED", "description": "New field description", + "policyTags": {"names": []}, }, ] }, @@ -2047,12 +2069,14 @@ def test_update_table(self): "type": "STRING", "mode": "REQUIRED", "description": None, + "policyTags": {"names": []}, }, { "name": "age", "type": "INTEGER", "mode": "REQUIRED", "description": "New field description", + "policyTags": {"names": []}, }, ] }, @@ -2173,14 +2197,21 @@ def test_update_table_w_query(self): "type": "STRING", "mode": "REQUIRED", "description": None, + "policyTags": {"names": []}, }, { "name": "age", "type": "INTEGER", "mode": "REQUIRED", "description": "this is a column", + "policyTags": {"names": []}, + }, + { + "name": "country", + "type": "STRING", + "mode": "NULLABLE", + "policyTags": {"names": []}, }, - {"name": "country", "type": "STRING", "mode": "NULLABLE"}, ] } schema = [ @@ -6516,10 +6547,10 @@ def test_load_table_from_dataframe(self): assert field["type"] == table_field.field_type assert field["mode"] == table_field.mode assert len(field.get("fields", [])) == len(table_field.fields) + assert field["policyTags"]["names"] == [] # Omit unnecessary fields when they come from getting the table # (not passed in via job_config) assert "description" not in field - assert "policyTags" not in field @unittest.skipIf(pandas is None, "Requires `pandas`") @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") @@ -7718,18 +7749,21 @@ def test_schema_to_json_with_file_path(self): "description": "quarter", "mode": "REQUIRED", "name": "qtr", + "policyTags": {"names": []}, "type": "STRING", }, { "description": "sales representative", "mode": "NULLABLE", "name": "rep", + "policyTags": {"names": []}, "type": "STRING", }, { "description": "total sales", "mode": "NULLABLE", "name": "sales", + "policyTags": {"names": []}, "type": "FLOAT", }, ] @@ -7762,18 +7796,21 @@ def test_schema_to_json_with_file_object(self): "description": "quarter", "mode": "REQUIRED", "name": "qtr", + "policyTags": {"names": []}, "type": "STRING", }, { "description": "sales representative", "mode": "NULLABLE", "name": "rep", + "policyTags": {"names": []}, "type": "STRING", }, { "description": "total sales", "mode": "NULLABLE", "name": "sales", + "policyTags": {"names": []}, "type": "FLOAT", }, ] diff --git a/packages/google-cloud-bigquery/tests/unit/test_external_config.py b/packages/google-cloud-bigquery/tests/unit/test_external_config.py index 7178367ea406..393df931e3b5 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_external_config.py +++ b/packages/google-cloud-bigquery/tests/unit/test_external_config.py @@ -78,7 +78,14 @@ def test_to_api_repr_base(self): ec.schema = [schema.SchemaField("full_name", "STRING", mode="REQUIRED")] exp_schema = { - "fields": [{"name": "full_name", "type": "STRING", "mode": "REQUIRED"}] + "fields": [ + { + "name": "full_name", + "type": "STRING", + "mode": "REQUIRED", + "policyTags": {"names": []}, + } + ] } got_resource = ec.to_api_repr() exp_resource = { diff --git a/packages/google-cloud-bigquery/tests/unit/test_schema.py b/packages/google-cloud-bigquery/tests/unit/test_schema.py index 29c3bace5d63..d0b5ca54c52b 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_schema.py +++ b/packages/google-cloud-bigquery/tests/unit/test_schema.py @@ -12,6 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. +from google.cloud.bigquery.schema import PolicyTagList import unittest import mock @@ -41,6 +42,7 @@ def test_constructor_defaults(self): self.assertEqual(field.mode, "NULLABLE") self.assertIsNone(field.description) self.assertEqual(field.fields, ()) + self.assertEqual(field.policy_tags, PolicyTagList()) def test_constructor_explicit(self): field = self._make_one("test", "STRING", mode="REQUIRED", description="Testing") @@ -104,7 +106,14 @@ def test_to_api_repr_with_subfield(self): self.assertEqual( field.to_api_repr(), { - "fields": [{"mode": "NULLABLE", "name": "bar", "type": "INTEGER"}], + "fields": [ + { + "mode": "NULLABLE", + "name": "bar", + "type": "INTEGER", + "policyTags": {"names": []}, + } + ], "mode": "REQUIRED", "name": "foo", "type": record_type, @@ -404,6 +413,23 @@ def test___eq___hit_w_fields(self): other = self._make_one("test", "RECORD", fields=[sub1, sub2]) self.assertEqual(field, other) + def test___eq___hit_w_policy_tags(self): + field = self._make_one( + "test", + "STRING", + mode="REQUIRED", + description="Testing", + policy_tags=PolicyTagList(names=["foo", "bar"]), + ) + other = self._make_one( + "test", + "STRING", + mode="REQUIRED", + description="Testing", + policy_tags=PolicyTagList(names=["bar", "foo"]), + ) + self.assertEqual(field, other) # Policy tags order does not matter. + def test___ne___wrong_type(self): field = self._make_one("toast", "INTEGER") other = object() @@ -426,6 +452,23 @@ def test___ne___different_values(self): ) self.assertNotEqual(field1, field2) + def test___ne___different_policy_tags(self): + field = self._make_one( + "test", + "STRING", + mode="REQUIRED", + description="Testing", + policy_tags=PolicyTagList(names=["foo", "bar"]), + ) + other = self._make_one( + "test", + "STRING", + mode="REQUIRED", + description="Testing", + policy_tags=PolicyTagList(names=["foo", "baz"]), + ) + self.assertNotEqual(field, other) + def test___hash__set_equality(self): sub1 = self._make_one("sub1", "STRING") sub2 = self._make_one("sub2", "STRING") @@ -446,7 +489,7 @@ def test___hash__not_equals(self): def test___repr__(self): field1 = self._make_one("field1", "STRING") - expected = "SchemaField('field1', 'STRING', 'NULLABLE', None, (), None)" + expected = "SchemaField('field1', 'STRING', 'NULLABLE', None, (), ())" self.assertEqual(repr(field1), expected) @@ -524,10 +567,22 @@ def test_defaults(self): resource = self._call_fut([full_name, age]) self.assertEqual(len(resource), 2) self.assertEqual( - resource[0], {"name": "full_name", "type": "STRING", "mode": "REQUIRED"}, + resource[0], + { + "name": "full_name", + "type": "STRING", + "mode": "REQUIRED", + "policyTags": {"names": []}, + }, ) self.assertEqual( - resource[1], {"name": "age", "type": "INTEGER", "mode": "REQUIRED"} + resource[1], + { + "name": "age", + "type": "INTEGER", + "mode": "REQUIRED", + "policyTags": {"names": []}, + }, ) def test_w_description(self): @@ -553,11 +608,18 @@ def test_w_description(self): "type": "STRING", "mode": "REQUIRED", "description": DESCRIPTION, + "policyTags": {"names": []}, }, ) self.assertEqual( resource[1], - {"name": "age", "type": "INTEGER", "mode": "REQUIRED", "description": None}, + { + "name": "age", + "type": "INTEGER", + "mode": "REQUIRED", + "description": None, + "policyTags": {"names": []}, + }, ) def test_w_subfields(self): @@ -572,7 +634,13 @@ def test_w_subfields(self): resource = self._call_fut([full_name, phone]) self.assertEqual(len(resource), 2) self.assertEqual( - resource[0], {"name": "full_name", "type": "STRING", "mode": "REQUIRED"}, + resource[0], + { + "name": "full_name", + "type": "STRING", + "mode": "REQUIRED", + "policyTags": {"names": []}, + }, ) self.assertEqual( resource[1], @@ -581,8 +649,18 @@ def test_w_subfields(self): "type": "RECORD", "mode": "REPEATED", "fields": [ - {"name": "type", "type": "STRING", "mode": "REQUIRED"}, - {"name": "number", "type": "STRING", "mode": "REQUIRED"}, + { + "name": "type", + "type": "STRING", + "mode": "REQUIRED", + "policyTags": {"names": []}, + }, + { + "name": "number", + "type": "STRING", + "mode": "REQUIRED", + "policyTags": {"names": []}, + }, ], }, ) @@ -794,43 +872,83 @@ def test_from_api_repr_parameterized(api, expect, key2): [ ( dict(name="n", field_type="NUMERIC"), - dict(name="n", type="NUMERIC", mode="NULLABLE"), + dict(name="n", type="NUMERIC", mode="NULLABLE", policyTags={"names": []}), ), ( dict(name="n", field_type="NUMERIC", precision=9), - dict(name="n", type="NUMERIC", mode="NULLABLE", precision=9), + dict( + name="n", + type="NUMERIC", + mode="NULLABLE", + precision=9, + policyTags={"names": []}, + ), ), ( dict(name="n", field_type="NUMERIC", precision=9, scale=2), - dict(name="n", type="NUMERIC", mode="NULLABLE", precision=9, scale=2), + dict( + name="n", + type="NUMERIC", + mode="NULLABLE", + precision=9, + scale=2, + policyTags={"names": []}, + ), ), ( dict(name="n", field_type="BIGNUMERIC"), - dict(name="n", type="BIGNUMERIC", mode="NULLABLE"), + dict( + name="n", type="BIGNUMERIC", mode="NULLABLE", policyTags={"names": []} + ), ), ( dict(name="n", field_type="BIGNUMERIC", precision=40), - dict(name="n", type="BIGNUMERIC", mode="NULLABLE", precision=40), + dict( + name="n", + type="BIGNUMERIC", + mode="NULLABLE", + precision=40, + policyTags={"names": []}, + ), ), ( dict(name="n", field_type="BIGNUMERIC", precision=40, scale=2), - dict(name="n", type="BIGNUMERIC", mode="NULLABLE", precision=40, scale=2), + dict( + name="n", + type="BIGNUMERIC", + mode="NULLABLE", + precision=40, + scale=2, + policyTags={"names": []}, + ), ), ( dict(name="n", field_type="STRING"), - dict(name="n", type="STRING", mode="NULLABLE"), + dict(name="n", type="STRING", mode="NULLABLE", policyTags={"names": []}), ), ( dict(name="n", field_type="STRING", max_length=9), - dict(name="n", type="STRING", mode="NULLABLE", maxLength=9), + dict( + name="n", + type="STRING", + mode="NULLABLE", + maxLength=9, + policyTags={"names": []}, + ), ), ( dict(name="n", field_type="BYTES"), - dict(name="n", type="BYTES", mode="NULLABLE"), + dict(name="n", type="BYTES", mode="NULLABLE", policyTags={"names": []}), ), ( dict(name="n", field_type="BYTES", max_length=9), - dict(name="n", type="BYTES", mode="NULLABLE", maxLength=9), + dict( + name="n", + type="BYTES", + mode="NULLABLE", + maxLength=9, + policyTags={"names": []}, + ), ), ], ) From f69e8a16e5889e5466076c4a2848c9a3d3df23a7 Mon Sep 17 00:00:00 2001 From: "gcf-owl-bot[bot]" <78513119+gcf-owl-bot[bot]@users.noreply.github.com> Date: Tue, 22 Jun 2021 18:30:12 +0000 Subject: [PATCH 1181/2016] chore: add kokoro 3.9 config templates (#712) Source-Link: https://github.com/googleapis/synthtool/commit/b0eb8a8b30b46a3c98d23c23107acb748c6601a1 Post-Processor: gcr.io/repo-automation-bots/owlbot-python:latest@sha256:df50e8d462f86d6bcb42f27ecad55bb12c404f1c65de9c6fe4c4d25120080bd6 --- .../.github/.OwlBot.lock.yaml | 2 +- .../.kokoro/samples/python3.9/common.cfg | 40 +++++++++++++++++++ .../.kokoro/samples/python3.9/continuous.cfg | 6 +++ .../samples/python3.9/periodic-head.cfg | 11 +++++ .../.kokoro/samples/python3.9/periodic.cfg | 6 +++ .../.kokoro/samples/python3.9/presubmit.cfg | 6 +++ 6 files changed, 70 insertions(+), 1 deletion(-) create mode 100644 packages/google-cloud-bigquery/.kokoro/samples/python3.9/common.cfg create mode 100644 packages/google-cloud-bigquery/.kokoro/samples/python3.9/continuous.cfg create mode 100644 packages/google-cloud-bigquery/.kokoro/samples/python3.9/periodic-head.cfg create mode 100644 packages/google-cloud-bigquery/.kokoro/samples/python3.9/periodic.cfg create mode 100644 packages/google-cloud-bigquery/.kokoro/samples/python3.9/presubmit.cfg diff --git a/packages/google-cloud-bigquery/.github/.OwlBot.lock.yaml b/packages/google-cloud-bigquery/.github/.OwlBot.lock.yaml index 9602d540595e..0954585f2833 100644 --- a/packages/google-cloud-bigquery/.github/.OwlBot.lock.yaml +++ b/packages/google-cloud-bigquery/.github/.OwlBot.lock.yaml @@ -1,3 +1,3 @@ docker: image: gcr.io/repo-automation-bots/owlbot-python:latest - digest: sha256:b8c131c558606d3cea6e18f8e87befbd448c1482319b0db3c5d5388fa6ea72e3 + digest: sha256:df50e8d462f86d6bcb42f27ecad55bb12c404f1c65de9c6fe4c4d25120080bd6 diff --git a/packages/google-cloud-bigquery/.kokoro/samples/python3.9/common.cfg b/packages/google-cloud-bigquery/.kokoro/samples/python3.9/common.cfg new file mode 100644 index 000000000000..f179577a5400 --- /dev/null +++ b/packages/google-cloud-bigquery/.kokoro/samples/python3.9/common.cfg @@ -0,0 +1,40 @@ +# Format: //devtools/kokoro/config/proto/build.proto + +# Build logs will be here +action { + define_artifacts { + regex: "**/*sponge_log.xml" + } +} + +# Specify which tests to run +env_vars: { + key: "RUN_TESTS_SESSION" + value: "py-3.9" +} + +# Declare build specific Cloud project. +env_vars: { + key: "BUILD_SPECIFIC_GCLOUD_PROJECT" + value: "python-docs-samples-tests-py39" +} + +env_vars: { + key: "TRAMPOLINE_BUILD_FILE" + value: "github/python-bigquery/.kokoro/test-samples.sh" +} + +# Configure the docker image for kokoro-trampoline. +env_vars: { + key: "TRAMPOLINE_IMAGE" + value: "gcr.io/cloud-devrel-kokoro-resources/python-samples-testing-docker" +} + +# Download secrets for samples +gfile_resources: "/bigstore/cloud-devrel-kokoro-resources/python-docs-samples" + +# Download trampoline resources. +gfile_resources: "/bigstore/cloud-devrel-kokoro-resources/trampoline" + +# Use the trampoline script to run in docker. +build_file: "python-bigquery/.kokoro/trampoline.sh" \ No newline at end of file diff --git a/packages/google-cloud-bigquery/.kokoro/samples/python3.9/continuous.cfg b/packages/google-cloud-bigquery/.kokoro/samples/python3.9/continuous.cfg new file mode 100644 index 000000000000..a1c8d9759c88 --- /dev/null +++ b/packages/google-cloud-bigquery/.kokoro/samples/python3.9/continuous.cfg @@ -0,0 +1,6 @@ +# Format: //devtools/kokoro/config/proto/build.proto + +env_vars: { + key: "INSTALL_LIBRARY_FROM_SOURCE" + value: "True" +} \ No newline at end of file diff --git a/packages/google-cloud-bigquery/.kokoro/samples/python3.9/periodic-head.cfg b/packages/google-cloud-bigquery/.kokoro/samples/python3.9/periodic-head.cfg new file mode 100644 index 000000000000..f9cfcd33e058 --- /dev/null +++ b/packages/google-cloud-bigquery/.kokoro/samples/python3.9/periodic-head.cfg @@ -0,0 +1,11 @@ +# Format: //devtools/kokoro/config/proto/build.proto + +env_vars: { + key: "INSTALL_LIBRARY_FROM_SOURCE" + value: "True" +} + +env_vars: { + key: "TRAMPOLINE_BUILD_FILE" + value: "github/python-pubsub/.kokoro/test-samples-against-head.sh" +} diff --git a/packages/google-cloud-bigquery/.kokoro/samples/python3.9/periodic.cfg b/packages/google-cloud-bigquery/.kokoro/samples/python3.9/periodic.cfg new file mode 100644 index 000000000000..50fec9649732 --- /dev/null +++ b/packages/google-cloud-bigquery/.kokoro/samples/python3.9/periodic.cfg @@ -0,0 +1,6 @@ +# Format: //devtools/kokoro/config/proto/build.proto + +env_vars: { + key: "INSTALL_LIBRARY_FROM_SOURCE" + value: "False" +} \ No newline at end of file diff --git a/packages/google-cloud-bigquery/.kokoro/samples/python3.9/presubmit.cfg b/packages/google-cloud-bigquery/.kokoro/samples/python3.9/presubmit.cfg new file mode 100644 index 000000000000..a1c8d9759c88 --- /dev/null +++ b/packages/google-cloud-bigquery/.kokoro/samples/python3.9/presubmit.cfg @@ -0,0 +1,6 @@ +# Format: //devtools/kokoro/config/proto/build.proto + +env_vars: { + key: "INSTALL_LIBRARY_FROM_SOURCE" + value: "True" +} \ No newline at end of file From 60944dce844210092c77c6e325b3870f1e50c69a Mon Sep 17 00:00:00 2001 From: "gcf-owl-bot[bot]" <78513119+gcf-owl-bot[bot]@users.noreply.github.com> Date: Wed, 23 Jun 2021 20:24:30 +0000 Subject: [PATCH 1182/2016] feat: add always_use_jwt_access (#714) ... chore: update gapic-generator-ruby to the latest commit chore: release gapic-generator-typescript 1.5.0 Committer: @miraleung PiperOrigin-RevId: 380641501 Source-Link: https://github.com/googleapis/googleapis/commit/076f7e9f0b258bdb54338895d7251b202e8f0de3 Source-Link: https://github.com/googleapis/googleapis-gen/commit/27e4c88b4048e5f56508d4e1aa417d60a3380892 --- packages/google-cloud-bigquery/.coveragerc | 1 - 1 file changed, 1 deletion(-) diff --git a/packages/google-cloud-bigquery/.coveragerc b/packages/google-cloud-bigquery/.coveragerc index 23861a8eb51f..33ea00ba9672 100644 --- a/packages/google-cloud-bigquery/.coveragerc +++ b/packages/google-cloud-bigquery/.coveragerc @@ -2,7 +2,6 @@ branch = True [report] -fail_under = 100 show_missing = True omit = google/cloud/bigquery/__init__.py From 67099b1d57e5389ae78d4088ea7043517c737583 Mon Sep 17 00:00:00 2001 From: Dan Lee <71398022+dandhlee@users.noreply.github.com> Date: Thu, 24 Jun 2021 08:08:14 -0400 Subject: [PATCH 1183/2016] chore: pin sphinx plugin version to working one (#715) --- packages/google-cloud-bigquery/noxfile.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/noxfile.py b/packages/google-cloud-bigquery/noxfile.py index 0dfe7bf93350..2bc2afde1d08 100644 --- a/packages/google-cloud-bigquery/noxfile.py +++ b/packages/google-cloud-bigquery/noxfile.py @@ -300,7 +300,7 @@ def docfx(session): session.install("-e", ".") session.install( - "sphinx==4.0.1", "alabaster", "recommonmark", "gcp-sphinx-docfx-yaml" + "sphinx==4.0.1", "alabaster", "recommonmark", "gcp-sphinx-docfx-yaml==0.2.0" ) shutil.rmtree(os.path.join("docs", "_build"), ignore_errors=True) From d27247d01964d9b9ec3ff3846ba580184ccf0af7 Mon Sep 17 00:00:00 2001 From: WhiteSource Renovate Date: Thu, 24 Jun 2021 20:07:55 +0200 Subject: [PATCH 1184/2016] chore(deps): update dependency grpcio to v1.38.1 (#713) Co-authored-by: Anthonios Partheniou --- .../google-cloud-bigquery/samples/snippets/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/samples/snippets/requirements.txt b/packages/google-cloud-bigquery/samples/snippets/requirements.txt index 391c85ae331a..669b3ac85dc4 100644 --- a/packages/google-cloud-bigquery/samples/snippets/requirements.txt +++ b/packages/google-cloud-bigquery/samples/snippets/requirements.txt @@ -1,7 +1,7 @@ google-cloud-bigquery==2.20.0 google-cloud-bigquery-storage==2.4.0 google-auth-oauthlib==0.4.4 -grpcio==1.38.0 +grpcio==1.38.1 ipython==7.16.1; python_version < '3.7' ipython==7.17.0; python_version >= '3.7' matplotlib==3.3.4; python_version < '3.7' From 1bf70ab5ed763ed30553d45fa320ce8d0aacb74c Mon Sep 17 00:00:00 2001 From: Dan Lee <71398022+dandhlee@users.noreply.github.com> Date: Thu, 24 Jun 2021 16:10:31 -0400 Subject: [PATCH 1185/2016] Revert "chore: pin sphinx plugin version to working one (#715)" (#719) This reverts commit 5e9494eb51ca5d31b7277f9f5d6d2d58ea2dd018. --- packages/google-cloud-bigquery/noxfile.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/noxfile.py b/packages/google-cloud-bigquery/noxfile.py index 2bc2afde1d08..0dfe7bf93350 100644 --- a/packages/google-cloud-bigquery/noxfile.py +++ b/packages/google-cloud-bigquery/noxfile.py @@ -300,7 +300,7 @@ def docfx(session): session.install("-e", ".") session.install( - "sphinx==4.0.1", "alabaster", "recommonmark", "gcp-sphinx-docfx-yaml==0.2.0" + "sphinx==4.0.1", "alabaster", "recommonmark", "gcp-sphinx-docfx-yaml" ) shutil.rmtree(os.path.join("docs", "_build"), ignore_errors=True) From dc619f4ac0ff6229de0776496f354f6046a0fb00 Mon Sep 17 00:00:00 2001 From: Peter Lamut Date: Fri, 25 Jun 2021 08:56:40 +0200 Subject: [PATCH 1186/2016] feat: add max_results parameter to some of the QueryJob methods (#698) * feat: add max_results to a few QueryJob methods It is now possible to cap the number of result rows returned when invoking `to_dataframe()` or `to_arrow()` method on a `QueryJob` instance. * Work around a pytype complaint * Make _EmptyRowIterator a subclass of RowIterator Co-authored-by: Dan Lee <71398022+dandhlee@users.noreply.github.com> --- .../google/cloud/bigquery/_tqdm_helpers.py | 37 ++++++- .../google/cloud/bigquery/job/query.py | 22 +++- .../google/cloud/bigquery/table.py | 53 ++++++++- .../tests/unit/job/test_query_pandas.py | 101 +++++++++++++++++- .../unit/test_signature_compatibility.py | 31 ++++-- .../tests/unit/test_table.py | 19 ++++ 6 files changed, 240 insertions(+), 23 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/_tqdm_helpers.py b/packages/google-cloud-bigquery/google/cloud/bigquery/_tqdm_helpers.py index 2fcf2a981802..99e720e2b044 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/_tqdm_helpers.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/_tqdm_helpers.py @@ -16,6 +16,8 @@ import concurrent.futures import time +import typing +from typing import Optional import warnings try: @@ -23,6 +25,10 @@ except ImportError: # pragma: NO COVER tqdm = None +if typing.TYPE_CHECKING: # pragma: NO COVER + from google.cloud.bigquery import QueryJob + from google.cloud.bigquery.table import RowIterator + _NO_TQDM_ERROR = ( "A progress bar was requested, but there was an error loading the tqdm " "library. Please install tqdm to use the progress bar functionality." @@ -32,7 +38,7 @@ def get_progress_bar(progress_bar_type, description, total, unit): - """Construct a tqdm progress bar object, if tqdm is .""" + """Construct a tqdm progress bar object, if tqdm is installed.""" if tqdm is None: if progress_bar_type is not None: warnings.warn(_NO_TQDM_ERROR, UserWarning, stacklevel=3) @@ -53,16 +59,34 @@ def get_progress_bar(progress_bar_type, description, total, unit): return None -def wait_for_query(query_job, progress_bar_type=None): - """Return query result and display a progress bar while the query running, if tqdm is installed.""" +def wait_for_query( + query_job: "QueryJob", + progress_bar_type: Optional[str] = None, + max_results: Optional[int] = None, +) -> "RowIterator": + """Return query result and display a progress bar while the query running, if tqdm is installed. + + Args: + query_job: + The job representing the execution of the query on the server. + progress_bar_type: + The type of progress bar to use to show query progress. + max_results: + The maximum number of rows the row iterator should return. + + Returns: + A row iterator over the query results. + """ default_total = 1 current_stage = None start_time = time.time() + progress_bar = get_progress_bar( progress_bar_type, "Query is running", default_total, "query" ) if progress_bar is None: - return query_job.result() + return query_job.result(max_results=max_results) + i = 0 while True: if query_job.query_plan: @@ -75,7 +99,9 @@ def wait_for_query(query_job, progress_bar_type=None): ), ) try: - query_result = query_job.result(timeout=_PROGRESS_BAR_UPDATE_INTERVAL) + query_result = query_job.result( + timeout=_PROGRESS_BAR_UPDATE_INTERVAL, max_results=max_results + ) progress_bar.update(default_total) progress_bar.set_description( "Query complete after {:0.2f}s".format(time.time() - start_time), @@ -89,5 +115,6 @@ def wait_for_query(query_job, progress_bar_type=None): progress_bar.update(i + 1) i += 1 continue + progress_bar.close() return query_result diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/job/query.py b/packages/google-cloud-bigquery/google/cloud/bigquery/job/query.py index 455ef46327d1..6ff9f2647025 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/job/query.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/job/query.py @@ -1300,12 +1300,14 @@ def result( return rows # If changing the signature of this method, make sure to apply the same - # changes to table.RowIterator.to_arrow() + # changes to table.RowIterator.to_arrow(), except for the max_results parameter + # that should only exist here in the QueryJob method. def to_arrow( self, progress_bar_type: str = None, bqstorage_client: "bigquery_storage.BigQueryReadClient" = None, create_bqstorage_client: bool = True, + max_results: Optional[int] = None, ) -> "pyarrow.Table": """[Beta] Create a class:`pyarrow.Table` by loading all pages of a table or query. @@ -1349,6 +1351,11 @@ def to_arrow( ..versionadded:: 1.24.0 + max_results (Optional[int]): + Maximum number of rows to include in the result. No limit by default. + + ..versionadded:: 2.21.0 + Returns: pyarrow.Table A :class:`pyarrow.Table` populated with row data and column @@ -1361,7 +1368,7 @@ def to_arrow( ..versionadded:: 1.17.0 """ - query_result = wait_for_query(self, progress_bar_type) + query_result = wait_for_query(self, progress_bar_type, max_results=max_results) return query_result.to_arrow( progress_bar_type=progress_bar_type, bqstorage_client=bqstorage_client, @@ -1369,7 +1376,8 @@ def to_arrow( ) # If changing the signature of this method, make sure to apply the same - # changes to table.RowIterator.to_dataframe() + # changes to table.RowIterator.to_dataframe(), except for the max_results parameter + # that should only exist here in the QueryJob method. def to_dataframe( self, bqstorage_client: "bigquery_storage.BigQueryReadClient" = None, @@ -1377,6 +1385,7 @@ def to_dataframe( progress_bar_type: str = None, create_bqstorage_client: bool = True, date_as_object: bool = True, + max_results: Optional[int] = None, ) -> "pandas.DataFrame": """Return a pandas DataFrame from a QueryJob @@ -1423,6 +1432,11 @@ def to_dataframe( ..versionadded:: 1.26.0 + max_results (Optional[int]): + Maximum number of rows to include in the result. No limit by default. + + ..versionadded:: 2.21.0 + Returns: A :class:`~pandas.DataFrame` populated with row data and column headers from the query results. The column headers are derived @@ -1431,7 +1445,7 @@ def to_dataframe( Raises: ValueError: If the `pandas` library cannot be imported. """ - query_result = wait_for_query(self, progress_bar_type) + query_result = wait_for_query(self, progress_bar_type, max_results=max_results) return query_result.to_dataframe( bqstorage_client=bqstorage_client, dtypes=dtypes, diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py index b12209252bcd..a1c13c85d009 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py @@ -22,7 +22,7 @@ import operator import pytz import typing -from typing import Any, Dict, Iterable, Tuple +from typing import Any, Dict, Iterable, Iterator, Optional, Tuple import warnings try: @@ -1415,7 +1415,9 @@ class RowIterator(HTTPIterator): """A class for iterating through HTTP/JSON API row list responses. Args: - client (google.cloud.bigquery.Client): The API client. + client (Optional[google.cloud.bigquery.Client]): + The API client instance. This should always be non-`None`, except for + subclasses that do not use it, namely the ``_EmptyRowIterator``. api_request (Callable[google.cloud._http.JSONConnection.api_request]): The function to use to make API requests. path (str): The method path to query for the list of items. @@ -1480,7 +1482,7 @@ def __init__( self._field_to_index = _helpers._field_to_index_mapping(schema) self._page_size = page_size self._preserve_order = False - self._project = client.project + self._project = client.project if client is not None else None self._schema = schema self._selected_fields = selected_fields self._table = table @@ -1895,7 +1897,7 @@ def to_dataframe( return df -class _EmptyRowIterator(object): +class _EmptyRowIterator(RowIterator): """An empty row iterator. This class prevents API requests when there are no rows to fetch or rows @@ -1907,6 +1909,18 @@ class _EmptyRowIterator(object): pages = () total_rows = 0 + def __init__( + self, client=None, api_request=None, path=None, schema=(), *args, **kwargs + ): + super().__init__( + client=client, + api_request=api_request, + path=path, + schema=schema, + *args, + **kwargs, + ) + def to_arrow( self, progress_bar_type=None, @@ -1951,6 +1965,37 @@ def to_dataframe( raise ValueError(_NO_PANDAS_ERROR) return pandas.DataFrame() + def to_dataframe_iterable( + self, + bqstorage_client: Optional["bigquery_storage.BigQueryReadClient"] = None, + dtypes: Optional[Dict[str, Any]] = None, + max_queue_size: Optional[int] = None, + ) -> Iterator["pandas.DataFrame"]: + """Create an iterable of pandas DataFrames, to process the table as a stream. + + ..versionadded:: 2.21.0 + + Args: + bqstorage_client: + Ignored. Added for compatibility with RowIterator. + + dtypes (Optional[Map[str, Union[str, pandas.Series.dtype]]]): + Ignored. Added for compatibility with RowIterator. + + max_queue_size: + Ignored. Added for compatibility with RowIterator. + + Returns: + An iterator yielding a single empty :class:`~pandas.DataFrame`. + + Raises: + ValueError: + If the :mod:`pandas` library cannot be imported. + """ + if pandas is None: + raise ValueError(_NO_PANDAS_ERROR) + return iter((pandas.DataFrame(),)) + def __iter__(self): return iter(()) diff --git a/packages/google-cloud-bigquery/tests/unit/job/test_query_pandas.py b/packages/google-cloud-bigquery/tests/unit/job/test_query_pandas.py index 0f96232036d0..c537802f4eee 100644 --- a/packages/google-cloud-bigquery/tests/unit/job/test_query_pandas.py +++ b/packages/google-cloud-bigquery/tests/unit/job/test_query_pandas.py @@ -238,6 +238,41 @@ def test_to_arrow(): ] +@pytest.mark.skipif(pyarrow is None, reason="Requires `pyarrow`") +def test_to_arrow_max_results_no_progress_bar(): + from google.cloud.bigquery import table + from google.cloud.bigquery.job import QueryJob as target_class + from google.cloud.bigquery.schema import SchemaField + + connection = _make_connection({}) + client = _make_client(connection=connection) + begun_resource = _make_job_resource(job_type="query") + job = target_class.from_api_repr(begun_resource, client) + + schema = [ + SchemaField("name", "STRING", mode="REQUIRED"), + SchemaField("age", "INTEGER", mode="REQUIRED"), + ] + rows = [ + {"f": [{"v": "Bharney Rhubble"}, {"v": "33"}]}, + {"f": [{"v": "Wylma Phlyntstone"}, {"v": "29"}]}, + ] + path = "/foo" + api_request = mock.Mock(return_value={"rows": rows}) + row_iterator = table.RowIterator(client, api_request, path, schema) + + result_patch = mock.patch( + "google.cloud.bigquery.job.QueryJob.result", return_value=row_iterator, + ) + with result_patch as result_patch_tqdm: + tbl = job.to_arrow(create_bqstorage_client=False, max_results=123) + + result_patch_tqdm.assert_called_once_with(max_results=123) + + assert isinstance(tbl, pyarrow.Table) + assert tbl.num_rows == 2 + + @pytest.mark.skipif(pyarrow is None, reason="Requires `pyarrow`") @pytest.mark.skipif(tqdm is None, reason="Requires `tqdm`") def test_to_arrow_w_tqdm_w_query_plan(): @@ -290,7 +325,9 @@ def test_to_arrow_w_tqdm_w_query_plan(): assert result_patch_tqdm.call_count == 3 assert isinstance(tbl, pyarrow.Table) assert tbl.num_rows == 2 - result_patch_tqdm.assert_called_with(timeout=_PROGRESS_BAR_UPDATE_INTERVAL) + result_patch_tqdm.assert_called_with( + timeout=_PROGRESS_BAR_UPDATE_INTERVAL, max_results=None + ) @pytest.mark.skipif(pyarrow is None, reason="Requires `pyarrow`") @@ -341,7 +378,9 @@ def test_to_arrow_w_tqdm_w_pending_status(): assert result_patch_tqdm.call_count == 2 assert isinstance(tbl, pyarrow.Table) assert tbl.num_rows == 2 - result_patch_tqdm.assert_called_with(timeout=_PROGRESS_BAR_UPDATE_INTERVAL) + result_patch_tqdm.assert_called_with( + timeout=_PROGRESS_BAR_UPDATE_INTERVAL, max_results=None + ) @pytest.mark.skipif(pyarrow is None, reason="Requires `pyarrow`") @@ -716,7 +755,9 @@ def test_to_dataframe_w_tqdm_pending(): assert isinstance(df, pandas.DataFrame) assert len(df) == 4 # verify the number of rows assert list(df) == ["name", "age"] # verify the column names - result_patch_tqdm.assert_called_with(timeout=_PROGRESS_BAR_UPDATE_INTERVAL) + result_patch_tqdm.assert_called_with( + timeout=_PROGRESS_BAR_UPDATE_INTERVAL, max_results=None + ) @pytest.mark.skipif(pandas is None, reason="Requires `pandas`") @@ -774,4 +815,56 @@ def test_to_dataframe_w_tqdm(): assert isinstance(df, pandas.DataFrame) assert len(df) == 4 # verify the number of rows assert list(df), ["name", "age"] # verify the column names - result_patch_tqdm.assert_called_with(timeout=_PROGRESS_BAR_UPDATE_INTERVAL) + result_patch_tqdm.assert_called_with( + timeout=_PROGRESS_BAR_UPDATE_INTERVAL, max_results=None + ) + + +@pytest.mark.skipif(pandas is None, reason="Requires `pandas`") +@pytest.mark.skipif(tqdm is None, reason="Requires `tqdm`") +def test_to_dataframe_w_tqdm_max_results(): + from google.cloud.bigquery import table + from google.cloud.bigquery.job import QueryJob as target_class + from google.cloud.bigquery.schema import SchemaField + from google.cloud.bigquery._tqdm_helpers import _PROGRESS_BAR_UPDATE_INTERVAL + + begun_resource = _make_job_resource(job_type="query") + schema = [ + SchemaField("name", "STRING", mode="NULLABLE"), + SchemaField("age", "INTEGER", mode="NULLABLE"), + ] + rows = [{"f": [{"v": "Phred Phlyntstone"}, {"v": "32"}]}] + + connection = _make_connection({}) + client = _make_client(connection=connection) + job = target_class.from_api_repr(begun_resource, client) + + path = "/foo" + api_request = mock.Mock(return_value={"rows": rows}) + row_iterator = table.RowIterator(client, api_request, path, schema) + + job._properties["statistics"] = { + "query": { + "queryPlan": [ + {"name": "S00: Input", "id": "0", "status": "COMPLETE"}, + {"name": "S01: Output", "id": "1", "status": "COMPLETE"}, + ] + }, + } + reload_patch = mock.patch( + "google.cloud.bigquery.job._AsyncJob.reload", autospec=True + ) + result_patch = mock.patch( + "google.cloud.bigquery.job.QueryJob.result", + side_effect=[concurrent.futures.TimeoutError, row_iterator], + ) + + with result_patch as result_patch_tqdm, reload_patch: + job.to_dataframe( + progress_bar_type="tqdm", create_bqstorage_client=False, max_results=3 + ) + + assert result_patch_tqdm.call_count == 2 + result_patch_tqdm.assert_called_with( + timeout=_PROGRESS_BAR_UPDATE_INTERVAL, max_results=3 + ) diff --git a/packages/google-cloud-bigquery/tests/unit/test_signature_compatibility.py b/packages/google-cloud-bigquery/tests/unit/test_signature_compatibility.py index e5016b0e59bc..07b823e2c438 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_signature_compatibility.py +++ b/packages/google-cloud-bigquery/tests/unit/test_signature_compatibility.py @@ -12,6 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. +from collections import OrderedDict import inspect import pytest @@ -32,12 +33,30 @@ def row_iterator_class(): def test_to_arrow_method_signatures_match(query_job_class, row_iterator_class): - sig = inspect.signature(query_job_class.to_arrow) - sig2 = inspect.signature(row_iterator_class.to_arrow) - assert sig == sig2 + query_job_sig = inspect.signature(query_job_class.to_arrow) + iterator_sig = inspect.signature(row_iterator_class.to_arrow) + + assert "max_results" in query_job_sig.parameters + + # Compare the signatures while ignoring the max_results parameter, which is + # specific to the method on QueryJob. + params = OrderedDict(query_job_sig.parameters) + del params["max_results"] + query_job_sig = query_job_sig.replace(parameters=params.values()) + + assert query_job_sig == iterator_sig def test_to_dataframe_method_signatures_match(query_job_class, row_iterator_class): - sig = inspect.signature(query_job_class.to_dataframe) - sig2 = inspect.signature(row_iterator_class.to_dataframe) - assert sig == sig2 + query_job_sig = inspect.signature(query_job_class.to_dataframe) + iterator_sig = inspect.signature(row_iterator_class.to_dataframe) + + assert "max_results" in query_job_sig.parameters + + # Compare the signatures while ignoring the max_results parameter, which is + # specific to the method on QueryJob. + params = OrderedDict(query_job_sig.parameters) + del params["max_results"] + query_job_sig = query_job_sig.replace(parameters=params.values()) + + assert query_job_sig == iterator_sig diff --git a/packages/google-cloud-bigquery/tests/unit/test_table.py b/packages/google-cloud-bigquery/tests/unit/test_table.py index 0f2ab00c1e23..f4038835cd30 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_table.py +++ b/packages/google-cloud-bigquery/tests/unit/test_table.py @@ -1571,6 +1571,25 @@ def test_to_dataframe(self): self.assertIsInstance(df, pandas.DataFrame) self.assertEqual(len(df), 0) # verify the number of rows + @mock.patch("google.cloud.bigquery.table.pandas", new=None) + def test_to_dataframe_iterable_error_if_pandas_is_none(self): + row_iterator = self._make_one() + with self.assertRaises(ValueError): + row_iterator.to_dataframe_iterable() + + @unittest.skipIf(pandas is None, "Requires `pandas`") + def test_to_dataframe_iterable(self): + row_iterator = self._make_one() + df_iter = row_iterator.to_dataframe_iterable() + + result = list(df_iter) + + self.assertEqual(len(result), 1) + df = result[0] + self.assertIsInstance(df, pandas.DataFrame) + self.assertEqual(len(df), 0) # Verify the number of rows. + self.assertEqual(len(df.columns), 0) + class TestRowIterator(unittest.TestCase): def _class_under_test(self): From 9b8d0a7568743fd21d9a65b775354dace7973779 Mon Sep 17 00:00:00 2001 From: Tres Seaver Date: Fri, 25 Jun 2021 16:59:16 -0400 Subject: [PATCH 1187/2016] Revert "feat: add always_use_jwt_access (#714)" (#722) This reverts commit 92fbd4ade37e0be49dc278080ef73c83eafeea18. --- packages/google-cloud-bigquery/.coveragerc | 1 + 1 file changed, 1 insertion(+) diff --git a/packages/google-cloud-bigquery/.coveragerc b/packages/google-cloud-bigquery/.coveragerc index 33ea00ba9672..23861a8eb51f 100644 --- a/packages/google-cloud-bigquery/.coveragerc +++ b/packages/google-cloud-bigquery/.coveragerc @@ -2,6 +2,7 @@ branch = True [report] +fail_under = 100 show_missing = True omit = google/cloud/bigquery/__init__.py From 2926e6940302d684113ed3f19a4f53cad7ab1c45 Mon Sep 17 00:00:00 2001 From: "gcf-owl-bot[bot]" <78513119+gcf-owl-bot[bot]@users.noreply.github.com> Date: Fri, 25 Jun 2021 22:52:15 +0000 Subject: [PATCH 1188/2016] chore(python): simplify nox steps in CONTRIBUTING.rst (#721) Source-Link: https://github.com/googleapis/synthtool/commit/26558bae8976a985d73c2d98c31d8612273f907d Post-Processor: gcr.io/repo-automation-bots/owlbot-python:latest@sha256:99d90d097e4a4710cc8658ee0b5b963f4426d0e424819787c3ac1405c9a26719 --- .../.github/.OwlBot.lock.yaml | 2 +- packages/google-cloud-bigquery/CONTRIBUTING.rst | 14 ++++++-------- 2 files changed, 7 insertions(+), 9 deletions(-) diff --git a/packages/google-cloud-bigquery/.github/.OwlBot.lock.yaml b/packages/google-cloud-bigquery/.github/.OwlBot.lock.yaml index 0954585f2833..e2b39f946040 100644 --- a/packages/google-cloud-bigquery/.github/.OwlBot.lock.yaml +++ b/packages/google-cloud-bigquery/.github/.OwlBot.lock.yaml @@ -1,3 +1,3 @@ docker: image: gcr.io/repo-automation-bots/owlbot-python:latest - digest: sha256:df50e8d462f86d6bcb42f27ecad55bb12c404f1c65de9c6fe4c4d25120080bd6 + digest: sha256:99d90d097e4a4710cc8658ee0b5b963f4426d0e424819787c3ac1405c9a26719 diff --git a/packages/google-cloud-bigquery/CONTRIBUTING.rst b/packages/google-cloud-bigquery/CONTRIBUTING.rst index a9b389e83771..102355b3a1dc 100644 --- a/packages/google-cloud-bigquery/CONTRIBUTING.rst +++ b/packages/google-cloud-bigquery/CONTRIBUTING.rst @@ -68,14 +68,12 @@ Using ``nox`` We use `nox `__ to instrument our tests. - To test your changes, run unit tests with ``nox``:: + $ nox -s unit - $ nox -s unit-3.8 - $ ... +- To run a single unit test:: -- Args to pytest can be passed through the nox command separated by a `--`. For - example, to run a single test:: + $ nox -s unit-3.9 -- -k - $ nox -s unit-3.8 -- -k .. note:: @@ -142,7 +140,7 @@ Running System Tests - To run system tests, you can execute:: # Run all system tests - $ nox -s system-3.8 + $ nox -s system # Run a single system test $ nox -s system-3.8 -- -k @@ -215,8 +213,8 @@ Supported versions can be found in our ``noxfile.py`` `config`_. .. _config: https://github.com/googleapis/python-bigquery/blob/master/noxfile.py -We also explicitly decided to support Python 3 beginning with version -3.6. Reasons for this include: +We also explicitly decided to support Python 3 beginning with version 3.6. +Reasons for this include: - Encouraging use of newest versions of Python 3 - Taking the lead of `prominent`_ open-source `projects`_ From fe85e9bb531143f4fed9484b411689dbd5395756 Mon Sep 17 00:00:00 2001 From: Peter Lamut Date: Mon, 28 Jun 2021 18:26:24 +0200 Subject: [PATCH 1189/2016] chore: require grpcio >= 1.38.1 (#725) --- packages/google-cloud-bigquery/setup.py | 3 ++- packages/google-cloud-bigquery/testing/constraints-3.6.txt | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/packages/google-cloud-bigquery/setup.py b/packages/google-cloud-bigquery/setup.py index 963eb73ec285..fcb1dd966bd8 100644 --- a/packages/google-cloud-bigquery/setup.py +++ b/packages/google-cloud-bigquery/setup.py @@ -29,6 +29,7 @@ # 'Development Status :: 5 - Production/Stable' release_status = "Development Status :: 5 - Production/Stable" dependencies = [ + "grpcio >= 1.38.1, < 2.0dev", # https://github.com/googleapis/python-bigquery/issues/695 "google-api-core[grpc] >= 1.29.0, < 2.0.0dev", "proto-plus >= 1.10.0", "google-cloud-core >= 1.4.1, < 2.0dev", @@ -46,7 +47,7 @@ # See: https://github.com/googleapis/python-bigquery/issues/83 The # grpc.Channel.close() method isn't added until 1.32.0. # https://github.com/grpc/grpc/pull/15254 - "grpcio >= 1.32.0, < 2.0dev", + "grpcio >= 1.38.1, < 2.0dev", "pyarrow >= 1.0.0, < 5.0dev", ], "pandas": ["pandas>=0.23.0", "pyarrow >= 1.0.0, < 5.0dev"], diff --git a/packages/google-cloud-bigquery/testing/constraints-3.6.txt b/packages/google-cloud-bigquery/testing/constraints-3.6.txt index 71c9ff49ab1b..af6e82efd9a3 100644 --- a/packages/google-cloud-bigquery/testing/constraints-3.6.txt +++ b/packages/google-cloud-bigquery/testing/constraints-3.6.txt @@ -9,7 +9,7 @@ google-api-core==1.29.0 google-cloud-bigquery-storage==2.0.0 google-cloud-core==1.4.1 google-resumable-media==0.6.0 -grpcio==1.32.0 +grpcio==1.38.1 opentelemetry-api==0.11b0 opentelemetry-instrumentation==0.11b0 opentelemetry-sdk==0.11b0 From 024795468e8f6f387c5be7c7acfaa536ce478949 Mon Sep 17 00:00:00 2001 From: Peter Lamut Date: Thu, 1 Jul 2021 09:24:21 +0200 Subject: [PATCH 1190/2016] fix: inserting non-finite floats with insert_rows() (#728) --- .../google/cloud/bigquery/_helpers.py | 8 +++++++- .../tests/unit/test__helpers.py | 15 +++++++++++++++ 2 files changed, 22 insertions(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py b/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py index 7602483c2edf..77054542acd2 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py @@ -17,6 +17,7 @@ import base64 import datetime import decimal +import math import re from google.cloud._helpers import UTC @@ -305,7 +306,12 @@ def _int_to_json(value): def _float_to_json(value): """Coerce 'value' to an JSON-compatible representation.""" - return value if value is None else float(value) + if value is None: + return None + elif math.isnan(value) or math.isinf(value): + return str(value) + else: + return float(value) def _decimal_to_json(value): diff --git a/packages/google-cloud-bigquery/tests/unit/test__helpers.py b/packages/google-cloud-bigquery/tests/unit/test__helpers.py index 0ac76d424283..c62947d3745f 100644 --- a/packages/google-cloud-bigquery/tests/unit/test__helpers.py +++ b/packages/google-cloud-bigquery/tests/unit/test__helpers.py @@ -656,9 +656,24 @@ def _call_fut(self, value): return _float_to_json(value) + def test_w_none(self): + self.assertEqual(self._call_fut(None), None) + def test_w_float(self): self.assertEqual(self._call_fut(1.23), 1.23) + def test_w_nan(self): + result = self._call_fut(float("nan")) + self.assertEqual(result.lower(), "nan") + + def test_w_infinity(self): + result = self._call_fut(float("inf")) + self.assertEqual(result.lower(), "inf") + + def test_w_negative_infinity(self): + result = self._call_fut(float("-inf")) + self.assertEqual(result.lower(), "-inf") + class Test_decimal_to_json(unittest.TestCase): def _call_fut(self, value): From cc54f9b1af276bb5ee999664c52a76a8edf4761f Mon Sep 17 00:00:00 2001 From: WhiteSource Renovate Date: Thu, 1 Jul 2021 12:22:26 +0200 Subject: [PATCH 1191/2016] chore(deps): update dependency google-cloud-bigquery-storage to v2.5.0 (#731) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [![WhiteSource Renovate](https://app.renovatebot.com/images/banner.svg)](https://renovatebot.com) This PR contains the following updates: | Package | Change | Age | Adoption | Passing | Confidence | |---|---|---|---|---|---| | [google-cloud-bigquery-storage](https://togithub.com/googleapis/python-bigquery-storage) | `==2.4.0` -> `==2.5.0` | [![age](https://badges.renovateapi.com/packages/pypi/google-cloud-bigquery-storage/2.5.0/age-slim)](https://docs.renovatebot.com/merge-confidence/) | [![adoption](https://badges.renovateapi.com/packages/pypi/google-cloud-bigquery-storage/2.5.0/adoption-slim)](https://docs.renovatebot.com/merge-confidence/) | [![passing](https://badges.renovateapi.com/packages/pypi/google-cloud-bigquery-storage/2.5.0/compatibility-slim/2.4.0)](https://docs.renovatebot.com/merge-confidence/) | [![confidence](https://badges.renovateapi.com/packages/pypi/google-cloud-bigquery-storage/2.5.0/confidence-slim/2.4.0)](https://docs.renovatebot.com/merge-confidence/) | --- ### Release Notes
googleapis/python-bigquery-storage ### [`v2.5.0`](https://togithub.com/googleapis/python-bigquery-storage/blob/master/CHANGELOG.md#​250-httpswwwgithubcomgoogleapispython-bigquery-storagecomparev240v250-2021-06-29) [Compare Source](https://togithub.com/googleapis/python-bigquery-storage/compare/v2.4.0...v2.5.0) ##### ⚠ BREAKING CHANGES - remove default deadline for AppendRows API ([#​205](https://togithub.com/googleapis/python-bigquery-storage/issues/205)) ##### Features - Add ZSTD compression as an option for Arrow ([#​197](https://www.github.com/googleapis/python-bigquery-storage/issues/197)) ([f941446](https://www.github.com/googleapis/python-bigquery-storage/commit/f9414469fac37bf05db28230a1a6c1e3f7342e8d)) - new JSON type through BigQuery Write ([#​178](https://www.github.com/googleapis/python-bigquery-storage/issues/178)) ([a6d6afa](https://www.github.com/googleapis/python-bigquery-storage/commit/a6d6afa8654907701aab2724f940be8f63edd0ea)) ##### Bug Fixes - **deps:** add packaging requirement ([#​200](https://www.github.com/googleapis/python-bigquery-storage/issues/200)) ([f2203fe](https://www.github.com/googleapis/python-bigquery-storage/commit/f2203fefe36dd043a258adb85e970fef14cf6ebc)) - remove default deadline for AppendRows API ([#​205](https://www.github.com/googleapis/python-bigquery-storage/issues/205)) ([cd4e637](https://www.github.com/googleapis/python-bigquery-storage/commit/cd4e637c4c74f21be50c3b0ebdfeebb1dfb88cbb)) ##### Documentation - omit mention of Python 2.7 in 'CONTRIBUTING.rst' ([#​1127](https://www.github.com/googleapis/python-bigquery-storage/issues/1127)) ([#​212](https://www.github.com/googleapis/python-bigquery-storage/issues/212)) ([8bcc4cd](https://www.github.com/googleapis/python-bigquery-storage/commit/8bcc4cd298eb0f5da03ecf66670982ab41e35c88)) ##### Miscellaneous Chores - release 2.5.0 ([#​220](https://www.github.com/googleapis/python-bigquery-storage/issues/220)) ([946c8a9](https://www.github.com/googleapis/python-bigquery-storage/commit/946c8a91c2d74c6bf37b333a4d0483f4483dcbce))
--- ### Configuration 📅 **Schedule**: At any time (no schedule defined). 🚦 **Automerge**: Disabled by config. Please merge this manually once you are satisfied. ♻ **Rebasing**: Whenever PR becomes conflicted, or you tick the rebase/retry checkbox. 🔕 **Ignore**: Close this PR and you won't be reminded about this update again. --- - [ ] If you want to rebase/retry this PR, check this box. --- This PR has been generated by [WhiteSource Renovate](https://renovate.whitesourcesoftware.com). View repository job log [here](https://app.renovatebot.com/dashboard#github/googleapis/python-bigquery). --- .../google-cloud-bigquery/samples/geography/requirements.txt | 2 +- .../google-cloud-bigquery/samples/snippets/requirements.txt | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/packages/google-cloud-bigquery/samples/geography/requirements.txt b/packages/google-cloud-bigquery/samples/geography/requirements.txt index 80fa8e454acb..83ab92ee5cf9 100644 --- a/packages/google-cloud-bigquery/samples/geography/requirements.txt +++ b/packages/google-cloud-bigquery/samples/geography/requirements.txt @@ -1,4 +1,4 @@ geojson==2.5.0 google-cloud-bigquery==2.20.0 -google-cloud-bigquery-storage==2.4.0 +google-cloud-bigquery-storage==2.5.0 Shapely==1.7.1 diff --git a/packages/google-cloud-bigquery/samples/snippets/requirements.txt b/packages/google-cloud-bigquery/samples/snippets/requirements.txt index 669b3ac85dc4..6b966fb074af 100644 --- a/packages/google-cloud-bigquery/samples/snippets/requirements.txt +++ b/packages/google-cloud-bigquery/samples/snippets/requirements.txt @@ -1,5 +1,5 @@ google-cloud-bigquery==2.20.0 -google-cloud-bigquery-storage==2.4.0 +google-cloud-bigquery-storage==2.5.0 google-auth-oauthlib==0.4.4 grpcio==1.38.1 ipython==7.16.1; python_version < '3.7' From 7747d4c8e0d827e1f9710c9fbfc2c239b7ead095 Mon Sep 17 00:00:00 2001 From: Jim Fulton Date: Thu, 1 Jul 2021 10:49:50 -0400 Subject: [PATCH 1192/2016] feat: Support passing struct data to the DB API (#718) --- packages/google-cloud-bigquery/docs/dbapi.rst | 11 +- .../google/cloud/bigquery/dbapi/_helpers.py | 252 +++++++++++++--- .../google/cloud/bigquery/dbapi/cursor.py | 28 +- .../tests/system/conftest.py | 7 +- .../tests/system/test_pandas.py | 11 +- .../tests/system/test_structs.py | 31 ++ .../tests/unit/test_dbapi__helpers.py | 282 +++++++++++++++++- .../tests/unit/test_dbapi_cursor.py | 26 ++ 8 files changed, 597 insertions(+), 51 deletions(-) create mode 100644 packages/google-cloud-bigquery/tests/system/test_structs.py diff --git a/packages/google-cloud-bigquery/docs/dbapi.rst b/packages/google-cloud-bigquery/docs/dbapi.rst index 41ec85833d49..81f000bc7c85 100644 --- a/packages/google-cloud-bigquery/docs/dbapi.rst +++ b/packages/google-cloud-bigquery/docs/dbapi.rst @@ -25,7 +25,7 @@ and using named parameters:: Providing explicit type information ----------------------------------- -BigQuery requires type information for parameters. The The BigQuery +BigQuery requires type information for parameters. The BigQuery DB-API can usually determine parameter types for parameters based on provided values. Sometimes, however, types can't be determined (for example when `None` is passed) or are determined incorrectly (for @@ -37,7 +37,14 @@ colon, as in:: insert into people (name, income) values (%(name:string)s, %(income:numeric)s) -For unnamed parameters, use the named syntax with a type, but now +For unnamed parameters, use the named syntax with a type, but no name, as in:: insert into people (name, income) values (%(:string)s, %(:numeric)s) + +Providing type information is the *only* way to pass `struct` data:: + + cursor.execute( + "insert into points (point) values (%(:struct)s)", + [{"x": 10, "y": 20}], + ) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/dbapi/_helpers.py b/packages/google-cloud-bigquery/google/cloud/bigquery/dbapi/_helpers.py index 3b0d8134ccbd..9c134b47ce67 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/dbapi/_helpers.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/dbapi/_helpers.py @@ -18,18 +18,34 @@ import decimal import functools import numbers +import re +import typing from google.cloud import bigquery -from google.cloud.bigquery import table, enums +from google.cloud.bigquery import table, enums, query from google.cloud.bigquery.dbapi import exceptions _NUMERIC_SERVER_MIN = decimal.Decimal("-9.9999999999999999999999999999999999999E+28") _NUMERIC_SERVER_MAX = decimal.Decimal("9.9999999999999999999999999999999999999E+28") +type_parameters_re = re.compile( + r""" + \( + \s*[0-9]+\s* + (, + \s*[0-9]+\s* + )* + \) + """, + re.VERBOSE, +) + def _parameter_type(name, value, query_parameter_type=None, value_doc=""): if query_parameter_type: + # Strip type parameters + query_parameter_type = type_parameters_re.sub("", query_parameter_type) try: parameter_type = getattr( enums.SqlParameterScalarTypes, query_parameter_type.upper() @@ -113,6 +129,197 @@ def array_to_query_parameter(value, name=None, query_parameter_type=None): return bigquery.ArrayQueryParameter(name, array_type, value) +def _parse_struct_fields( + fields, + base, + parse_struct_field=re.compile( + r""" + (?:(\w+)\s+) # field name + ([A-Z0-9<> ,()]+) # Field type + $""", + re.VERBOSE | re.IGNORECASE, + ).match, +): + # Split a string of struct fields. They're defined by commas, but + # we have to avoid splitting on commas internal to fields. For + # example: + # name string, children array> + # + # only has 2 top-level fields. + fields = fields.split(",") + fields = list(reversed(fields)) # in the off chance that there are very many + while fields: + field = fields.pop() + while fields and field.count("<") != field.count(">"): + field += "," + fields.pop() + + m = parse_struct_field(field.strip()) + if not m: + raise exceptions.ProgrammingError( + f"Invalid struct field, {field}, in {base}" + ) + yield m.group(1, 2) + + +SCALAR, ARRAY, STRUCT = "sar" + + +def _parse_type( + type_, + name, + base, + complex_query_parameter_parse=re.compile( + r""" + \s* + (ARRAY|STRUCT|RECORD) # Type + \s* + <([A-Z0-9<> ,()]+)> # Subtype(s) + \s*$ + """, + re.IGNORECASE | re.VERBOSE, + ).match, +): + if "<" not in type_: + # Scalar + + # Strip type parameters + type_ = type_parameters_re.sub("", type_).strip() + try: + type_ = getattr(enums.SqlParameterScalarTypes, type_.upper()) + except AttributeError: + raise exceptions.ProgrammingError( + f"The given parameter type, {type_}," + f"{' for ' + name if name else ''}" + f" is not a valid BigQuery scalar type, in {base}." + ) + if name: + type_ = type_.with_name(name) + return SCALAR, type_ + + m = complex_query_parameter_parse(type_) + if not m: + raise exceptions.ProgrammingError(f"Invalid parameter type, {type_}") + tname, sub = m.group(1, 2) + if tname.upper() == "ARRAY": + sub_type = complex_query_parameter_type(None, sub, base) + if isinstance(sub_type, query.ArrayQueryParameterType): + raise exceptions.ProgrammingError(f"Array can't contain an array in {base}") + sub_type._complex__src = sub + return ARRAY, sub_type + else: + return STRUCT, _parse_struct_fields(sub, base) + + +def complex_query_parameter_type(name: typing.Optional[str], type_: str, base: str): + """Construct a parameter type (`StructQueryParameterType`) for a complex type + + or a non-complex type that's part of a complex type. + + Examples: + + array> + + struct>> + + This is used for computing array types. + """ + + type_type, sub_type = _parse_type(type_, name, base) + if type_type == SCALAR: + type_ = sub_type + elif type_type == ARRAY: + type_ = query.ArrayQueryParameterType(sub_type, name=name) + elif type_type == STRUCT: + fields = [ + complex_query_parameter_type(field_name, field_type, base) + for field_name, field_type in sub_type + ] + type_ = query.StructQueryParameterType(*fields, name=name) + else: # pragma: NO COVER + raise AssertionError("Bad type_type", type_type) # Can't happen :) + + return type_ + + +def complex_query_parameter( + name: typing.Optional[str], value, type_: str, base: typing.Optional[str] = None +): + """ + Construct a query parameter for a complex type (array or struct record) + + or for a subtype, which may not be complex + + Examples: + + array> + + struct>> + + """ + base = base or type_ + + type_type, sub_type = _parse_type(type_, name, base) + + if type_type == SCALAR: + param = query.ScalarQueryParameter(name, sub_type._type, value) + elif type_type == ARRAY: + if not array_like(value): + raise exceptions.ProgrammingError( + f"Array type with non-array-like value" + f" with type {type(value).__name__}" + ) + param = query.ArrayQueryParameter( + name, + sub_type, + value + if isinstance(sub_type, query.ScalarQueryParameterType) + else [ + complex_query_parameter(None, v, sub_type._complex__src, base) + for v in value + ], + ) + elif type_type == STRUCT: + if not isinstance(value, collections_abc.Mapping): + raise exceptions.ProgrammingError(f"Non-mapping value for type {type_}") + value_keys = set(value) + fields = [] + for field_name, field_type in sub_type: + if field_name not in value: + raise exceptions.ProgrammingError( + f"No field value for {field_name} in {type_}" + ) + value_keys.remove(field_name) + fields.append( + complex_query_parameter(field_name, value[field_name], field_type, base) + ) + if value_keys: + raise exceptions.ProgrammingError(f"Extra data keys for {type_}") + + param = query.StructQueryParameter(name, *fields) + else: # pragma: NO COVER + raise AssertionError("Bad type_type", type_type) # Can't happen :) + + return param + + +def _dispatch_parameter(type_, value, name=None): + if type_ is not None and "<" in type_: + param = complex_query_parameter(name, value, type_) + elif isinstance(value, collections_abc.Mapping): + raise NotImplementedError( + f"STRUCT-like parameter values are not supported" + f"{' (parameter ' + name + ')' if name else ''}," + f" unless an explicit type is give in the parameter placeholder" + f" (e.g. '%({name if name else ''}:struct<...>)s')." + ) + elif array_like(value): + param = array_to_query_parameter(value, name, type_) + else: + param = scalar_to_query_parameter(value, name, type_) + + return param + + def to_query_parameters_list(parameters, parameter_types): """Converts a sequence of parameter values into query parameters. @@ -126,19 +333,10 @@ def to_query_parameters_list(parameters, parameter_types): List[google.cloud.bigquery.query._AbstractQueryParameter]: A list of query parameters. """ - result = [] - - for value, type_ in zip(parameters, parameter_types): - if isinstance(value, collections_abc.Mapping): - raise NotImplementedError("STRUCT-like parameter values are not supported.") - elif array_like(value): - param = array_to_query_parameter(value, None, type_) - else: - param = scalar_to_query_parameter(value, None, type_) - - result.append(param) - - return result + return [ + _dispatch_parameter(type_, value) + for value, type_ in zip(parameters, parameter_types) + ] def to_query_parameters_dict(parameters, query_parameter_types): @@ -154,28 +352,10 @@ def to_query_parameters_dict(parameters, query_parameter_types): List[google.cloud.bigquery.query._AbstractQueryParameter]: A list of named query parameters. """ - result = [] - - for name, value in parameters.items(): - if isinstance(value, collections_abc.Mapping): - raise NotImplementedError( - "STRUCT-like parameter values are not supported " - "(parameter {}).".format(name) - ) - else: - query_parameter_type = query_parameter_types.get(name) - if array_like(value): - param = array_to_query_parameter( - value, name=name, query_parameter_type=query_parameter_type - ) - else: - param = scalar_to_query_parameter( - value, name=name, query_parameter_type=query_parameter_type, - ) - - result.append(param) - - return result + return [ + _dispatch_parameter(query_parameter_types.get(name), value, name) + for name, value in parameters.items() + ] def to_query_parameters(parameters, parameter_types): diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/dbapi/cursor.py b/packages/google-cloud-bigquery/google/cloud/bigquery/dbapi/cursor.py index c8fc49378e2b..587598d5f208 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/dbapi/cursor.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/dbapi/cursor.py @@ -483,7 +483,33 @@ def _format_operation(operation, parameters): def _extract_types( - operation, extra_type_sub=re.compile(r"(%*)%(?:\(([^:)]*)(?::(\w+))?\))?s").sub + operation, + extra_type_sub=re.compile( + r""" + (%*) # Extra %s. We'll deal with these in the replacement code + + % # Beginning of replacement, %s, %(...)s + + (?:\( # Begin of optional name and/or type + ([^:)]*) # name + (?:: # ':' introduces type + ( # start of type group + [a-zA-Z0-9<>, ]+ # First part, no parens + + (?: # start sets of parens + non-paren text + \([0-9 ,]+\) # comma-separated groups of digits in parens + # (e.g. string(10)) + (?=[, >)]) # Must be followed by ,>) or space + [a-zA-Z0-9<>, ]* # Optional non-paren chars + )* # Can be zero or more of parens and following text + ) # end of type group + )? # close type clause ":type" + \))? # End of optional name and/or type + + s # End of replacement + """, + re.VERBOSE, + ).sub, ): """Remove type information from parameter placeholders. diff --git a/packages/google-cloud-bigquery/tests/system/conftest.py b/packages/google-cloud-bigquery/tests/system/conftest.py index 4b5fcb54319b..4eef60e924df 100644 --- a/packages/google-cloud-bigquery/tests/system/conftest.py +++ b/packages/google-cloud-bigquery/tests/system/conftest.py @@ -31,9 +31,14 @@ def bqstorage_client(bigquery_client): return bigquery_storage.BigQueryReadClient(credentials=bigquery_client._credentials) -@pytest.fixture +@pytest.fixture(scope="session") def dataset_id(bigquery_client): dataset_id = f"bqsystem_{helpers.temp_suffix()}" bigquery_client.create_dataset(dataset_id) yield dataset_id bigquery_client.delete_dataset(dataset_id, delete_contents=True) + + +@pytest.fixture +def table_id(dataset_id): + return f"{dataset_id}.table_{helpers.temp_suffix()}" diff --git a/packages/google-cloud-bigquery/tests/system/test_pandas.py b/packages/google-cloud-bigquery/tests/system/test_pandas.py index 1164e36da8b0..ddf5eaf43c2c 100644 --- a/packages/google-cloud-bigquery/tests/system/test_pandas.py +++ b/packages/google-cloud-bigquery/tests/system/test_pandas.py @@ -149,7 +149,7 @@ def test_load_table_from_dataframe_w_nullable_int64_datatype( reason="Only `pandas version >=1.0.0` is supported", ) def test_load_table_from_dataframe_w_nullable_int64_datatype_automatic_schema( - bigquery_client, dataset_id + bigquery_client, dataset_id, table_id ): """Test that a DataFrame containing column with None-type values and int64 datatype can be uploaded without specifying a schema. @@ -157,9 +157,6 @@ def test_load_table_from_dataframe_w_nullable_int64_datatype_automatic_schema( https://github.com/googleapis/python-bigquery/issues/22 """ - table_id = "{}.{}.load_table_from_dataframe_w_nullable_int64_datatype".format( - bigquery_client.project, dataset_id - ) df_data = collections.OrderedDict( [("x", pandas.Series([1, 2, None, 4], dtype="Int64"))] ) @@ -511,7 +508,7 @@ def test_load_table_from_dataframe_w_explicit_schema_source_format_csv( def test_load_table_from_dataframe_w_explicit_schema_source_format_csv_floats( - bigquery_client, dataset_id + bigquery_client, dataset_id, table_id ): from google.cloud.bigquery.job import SourceFormat @@ -536,10 +533,6 @@ def test_load_table_from_dataframe_w_explicit_schema_source_format_csv_floats( ) dataframe = pandas.DataFrame(df_data, dtype="object", columns=df_data.keys()) - table_id = "{}.{}.load_table_from_dataframe_w_explicit_schema_csv".format( - bigquery_client.project, dataset_id - ) - job_config = bigquery.LoadJobConfig( schema=table_schema, source_format=SourceFormat.CSV ) diff --git a/packages/google-cloud-bigquery/tests/system/test_structs.py b/packages/google-cloud-bigquery/tests/system/test_structs.py new file mode 100644 index 000000000000..20740f61405d --- /dev/null +++ b/packages/google-cloud-bigquery/tests/system/test_structs.py @@ -0,0 +1,31 @@ +import datetime + +import pytest + +from google.cloud.bigquery.dbapi import connect + +person_type = "struct>>" +person_type_sized = ( + "struct>>" +) + + +@pytest.mark.parametrize("person_type_decl", [person_type, person_type_sized]) +def test_structs(bigquery_client, dataset_id, person_type_decl, table_id): + conn = connect(bigquery_client) + cursor = conn.cursor() + cursor.execute(f"create table {table_id} (person {person_type_decl})") + data = dict( + name="par", + children=[ + dict(name="ch1", bdate=datetime.date(2021, 1, 1)), + dict(name="ch2", bdate=datetime.date(2021, 1, 2)), + ], + ) + cursor.execute( + f"insert into {table_id} (person) values (%(v:{person_type})s)", dict(v=data), + ) + + cursor.execute(f"select * from {table_id}") + [[result]] = list(cursor) + assert result == data diff --git a/packages/google-cloud-bigquery/tests/unit/test_dbapi__helpers.py b/packages/google-cloud-bigquery/tests/unit/test_dbapi__helpers.py index 250ba46d981b..b3320335404b 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_dbapi__helpers.py +++ b/packages/google-cloud-bigquery/tests/unit/test_dbapi__helpers.py @@ -16,6 +16,7 @@ import decimal import math import operator as op +import re import unittest import pytest @@ -394,11 +395,13 @@ def test_to_query_parameters_dict_w_types(): assert sorted( _helpers.to_query_parameters( - dict(i=1, x=1.2, y=None, z=[]), dict(x="numeric", y="string", z="float64") + dict(i=1, x=1.2, y=None, q="hi", z=[]), + dict(x="numeric", y="string", q="string(9)", z="float64"), ), key=lambda p: p.name, ) == [ bigquery.ScalarQueryParameter("i", "INT64", 1), + bigquery.ScalarQueryParameter("q", "STRING", "hi"), bigquery.ScalarQueryParameter("x", "NUMERIC", 1.2), bigquery.ScalarQueryParameter("y", "STRING", None), bigquery.ArrayQueryParameter("z", "FLOAT64", []), @@ -409,10 +412,285 @@ def test_to_query_parameters_list_w_types(): from google.cloud import bigquery assert _helpers.to_query_parameters( - [1, 1.2, None, []], [None, "numeric", "string", "float64"] + [1, 1.2, None, "hi", []], [None, "numeric", "string", "string(9)", "float64"] ) == [ bigquery.ScalarQueryParameter(None, "INT64", 1), bigquery.ScalarQueryParameter(None, "NUMERIC", 1.2), bigquery.ScalarQueryParameter(None, "STRING", None), + bigquery.ScalarQueryParameter(None, "STRING", "hi"), bigquery.ArrayQueryParameter(None, "FLOAT64", []), ] + + +@pytest.mark.parametrize( + "value,type_,expect", + [ + ( + [], + "ARRAY", + { + "parameterType": {"type": "ARRAY", "arrayType": {"type": "INT64"}}, + "parameterValue": {"arrayValues": []}, + }, + ), + ( + [1, 2], + "ARRAY", + { + "parameterType": {"type": "ARRAY", "arrayType": {"type": "INT64"}}, + "parameterValue": {"arrayValues": [{"value": "1"}, {"value": "2"}]}, + }, + ), + ( + dict( + name="par", + children=[ + dict(name="ch1", bdate=datetime.date(2021, 1, 1)), + dict(name="ch2", bdate=datetime.date(2021, 1, 2)), + ], + ), + "struct>>", + { + "parameterType": { + "structTypes": [ + {"name": "name", "type": {"type": "STRING"}}, + { + "name": "children", + "type": { + "arrayType": { + "structTypes": [ + {"name": "name", "type": {"type": "STRING"}}, + {"name": "bdate", "type": {"type": "DATE"}}, + ], + "type": "STRUCT", + }, + "type": "ARRAY", + }, + }, + ], + "type": "STRUCT", + }, + "parameterValue": { + "structValues": { + "children": { + "arrayValues": [ + { + "structValues": { + "bdate": {"value": "2021-01-01"}, + "name": {"value": "ch1"}, + } + }, + { + "structValues": { + "bdate": {"value": "2021-01-02"}, + "name": {"value": "ch2"}, + } + }, + ] + }, + "name": {"value": "par"}, + } + }, + }, + ), + ( + dict( + name="par", + children=[ + dict(name="ch1", bdate=datetime.date(2021, 1, 1)), + dict(name="ch2", bdate=datetime.date(2021, 1, 2)), + ], + ), + "struct>>", + { + "parameterType": { + "structTypes": [ + {"name": "name", "type": {"type": "STRING"}}, + { + "name": "children", + "type": { + "arrayType": { + "structTypes": [ + {"name": "name", "type": {"type": "STRING"}}, + {"name": "bdate", "type": {"type": "DATE"}}, + ], + "type": "STRUCT", + }, + "type": "ARRAY", + }, + }, + ], + "type": "STRUCT", + }, + "parameterValue": { + "structValues": { + "children": { + "arrayValues": [ + { + "structValues": { + "bdate": {"value": "2021-01-01"}, + "name": {"value": "ch1"}, + } + }, + { + "structValues": { + "bdate": {"value": "2021-01-02"}, + "name": {"value": "ch2"}, + } + }, + ] + }, + "name": {"value": "par"}, + } + }, + }, + ), + ( + ["1", "hi"], + "ARRAY", + { + "parameterType": {"type": "ARRAY", "arrayType": {"type": "STRING"}}, + "parameterValue": {"arrayValues": [{"value": "1"}, {"value": "hi"}]}, + }, + ), + ], +) +def test_complex_query_parameter_type(type_, value, expect): + from google.cloud.bigquery.dbapi._helpers import complex_query_parameter + + param = complex_query_parameter("test", value, type_).to_api_repr() + assert param.pop("name") == "test" + assert param == expect + + +def _expected_error_match(expect): + return "^" + re.escape(expect) + "$" + + +@pytest.mark.parametrize( + "value,type_,expect", + [ + ( + [], + "ARRAY", + "The given parameter type, INT," + " is not a valid BigQuery scalar type, in ARRAY.", + ), + ([], "x", "Invalid parameter type, x"), + ({}, "struct", "Invalid struct field, int, in struct"), + ( + {"x": 1}, + "struct", + "The given parameter type, int," + " for x is not a valid BigQuery scalar type, in struct.", + ), + ([], "x<", "Invalid parameter type, x<"), + (0, "ARRAY", "Array type with non-array-like value with type int"), + ( + [], + "ARRAY>", + "Array can't contain an array in ARRAY>", + ), + ([], "struct", "Non-mapping value for type struct"), + ({}, "struct", "No field value for x in struct"), + ({"x": 1, "y": 1}, "struct", "Extra data keys for struct"), + ([], "array>", "Invalid struct field, xxx, in array>"), + ([], "array<<>>", "Invalid parameter type, <>"), + ], +) +def test_complex_query_parameter_type_errors(type_, value, expect): + from google.cloud.bigquery.dbapi._helpers import complex_query_parameter + from google.cloud.bigquery.dbapi import exceptions + + with pytest.raises( + exceptions.ProgrammingError, match=_expected_error_match(expect), + ): + complex_query_parameter("test", value, type_) + + +@pytest.mark.parametrize( + "parameters,parameter_types,expect", + [ + ( + [[], dict(name="ch1", bdate=datetime.date(2021, 1, 1))], + ["ARRAY", "struct"], + [ + { + "parameterType": {"arrayType": {"type": "INT64"}, "type": "ARRAY"}, + "parameterValue": {"arrayValues": []}, + }, + { + "parameterType": { + "structTypes": [ + {"name": "name", "type": {"type": "STRING"}}, + {"name": "bdate", "type": {"type": "DATE"}}, + ], + "type": "STRUCT", + }, + "parameterValue": { + "structValues": { + "bdate": {"value": "2021-01-01"}, + "name": {"value": "ch1"}, + } + }, + }, + ], + ), + ( + dict(ids=[], child=dict(name="ch1", bdate=datetime.date(2021, 1, 1))), + dict(ids="ARRAY", child="struct"), + [ + { + "name": "ids", + "parameterType": {"arrayType": {"type": "INT64"}, "type": "ARRAY"}, + "parameterValue": {"arrayValues": []}, + }, + { + "name": "child", + "parameterType": { + "structTypes": [ + {"name": "name", "type": {"type": "STRING"}}, + {"name": "bdate", "type": {"type": "DATE"}}, + ], + "type": "STRUCT", + }, + "parameterValue": { + "structValues": { + "bdate": {"value": "2021-01-01"}, + "name": {"value": "ch1"}, + } + }, + }, + ], + ), + ], +) +def test_to_query_parameters_complex_types(parameters, parameter_types, expect): + from google.cloud.bigquery.dbapi._helpers import to_query_parameters + + result = [p.to_api_repr() for p in to_query_parameters(parameters, parameter_types)] + assert result == expect + + +def test_to_query_parameters_struct_error(): + from google.cloud.bigquery.dbapi._helpers import to_query_parameters + + with pytest.raises( + NotImplementedError, + match=_expected_error_match( + "STRUCT-like parameter values are not supported, " + "unless an explicit type is give in the parameter placeholder " + "(e.g. '%(:struct<...>)s')." + ), + ): + to_query_parameters([dict(x=1)], [None]) + + with pytest.raises( + NotImplementedError, + match=_expected_error_match( + "STRUCT-like parameter values are not supported (parameter foo), " + "unless an explicit type is give in the parameter placeholder " + "(e.g. '%(foo:struct<...>)s')." + ), + ): + to_query_parameters(dict(foo=dict(x=1)), {}) diff --git a/packages/google-cloud-bigquery/tests/unit/test_dbapi_cursor.py b/packages/google-cloud-bigquery/tests/unit/test_dbapi_cursor.py index a2d6693d00b7..026810aaf5c7 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_dbapi_cursor.py +++ b/packages/google-cloud-bigquery/tests/unit/test_dbapi_cursor.py @@ -809,6 +809,32 @@ def test__format_operation_no_placeholders(self): "values(%%%%%(foo:INT64)s, %(bar)s)", ("values(%%%%%(foo)s, %(bar)s)", dict(foo="INT64")), ), + ( + "values(%%%%%(foo:struct)s, %(bar)s)", + ("values(%%%%%(foo)s, %(bar)s)", dict(foo="struct")), + ), + ( + "values(%%%%%(foo:struct)s, %(bar)s)", + ("values(%%%%%(foo)s, %(bar)s)", dict(foo="struct")), + ), + ( + "values(%(foo:struct)s, %(bar)s)", + ( + "values(%(foo)s, %(bar)s)", + dict(foo="struct"), + ), + ), + ( + "values(%(foo:struct)s, %(bar)s)", + ( + "values(%(foo)s, %(bar)s)", + dict(foo="struct"), + ), + ), + ( + "values(%(foo:string(10))s, %(bar)s)", + ("values(%(foo)s, %(bar)s)", dict(foo="string(10)")), + ), ], ) def test__extract_types(inp, expect): From cad9ceb2c65a29a3aee1dc0e3ec2eb11c206fd80 Mon Sep 17 00:00:00 2001 From: Peter Lamut Date: Thu, 1 Jul 2021 19:17:01 +0200 Subject: [PATCH 1193/2016] feat: make it easier to disable best-effort deduplication with streaming inserts (#734) * feat: make it easier to disable row insert IDs * Also accept any iterables for row_ids --- .../google/cloud/bigquery/__init__.py | 2 + .../google/cloud/bigquery/client.py | 47 +++++- .../google/cloud/bigquery/enums.py | 7 + .../tests/unit/test_client.py | 153 +++++++++++++++++- 4 files changed, 195 insertions(+), 14 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/__init__.py b/packages/google-cloud-bigquery/google/cloud/bigquery/__init__.py index 94f87304a975..dfe3a6320595 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/__init__.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/__init__.py @@ -37,6 +37,7 @@ from google.cloud.bigquery.dataset import Dataset from google.cloud.bigquery.dataset import DatasetReference from google.cloud.bigquery import enums +from google.cloud.bigquery.enums import AutoRowIDs from google.cloud.bigquery.enums import KeyResultStatementKind from google.cloud.bigquery.enums import SqlTypeNames from google.cloud.bigquery.enums import StandardSqlDataTypes @@ -144,6 +145,7 @@ "DEFAULT_RETRY", # Enum Constants "enums", + "AutoRowIDs", "Compression", "CreateDisposition", "DestinationFormat", diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py index 2b7a5273ecdc..2a02c7629d9b 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py @@ -68,6 +68,7 @@ from google.cloud.bigquery.dataset import Dataset from google.cloud.bigquery.dataset import DatasetListItem from google.cloud.bigquery.dataset import DatasetReference +from google.cloud.bigquery.enums import AutoRowIDs from google.cloud.bigquery.exceptions import LegacyBigQueryStorageError from google.cloud.bigquery.opentelemetry_tracing import create_span from google.cloud.bigquery import job @@ -3349,7 +3350,7 @@ def insert_rows_json( self, table: Union[Table, TableReference, str], json_rows: Sequence[Dict], - row_ids: Sequence[str] = None, + row_ids: Union[Iterable[str], AutoRowIDs, None] = AutoRowIDs.GENERATE_UUID, skip_invalid_rows: bool = None, ignore_unknown_values: bool = None, template_suffix: str = None, @@ -3371,11 +3372,20 @@ def insert_rows_json( json_rows (Sequence[Dict]): Row data to be inserted. Keys must match the table schema fields and values must be JSON-compatible representations. - row_ids (Optional[Sequence[Optional[str]]]): + row_ids (Union[Iterable[str], AutoRowIDs, None]): Unique IDs, one per row being inserted. An ID can also be ``None``, indicating that an explicit insert ID should **not** be used for that row. If the argument is omitted altogether, unique IDs are created automatically. + + .. versionchanged:: 2.21.0 + Can also be an iterable, not just a sequence, or an + :class:`AutoRowIDs` enum member. + + .. deprecated:: 2.21.0 + Passing ``None`` to explicitly request autogenerating insert IDs is + deprecated, use :attr:`AutoRowIDs.GENERATE_UUID` instead. + skip_invalid_rows (Optional[bool]): Insert all valid rows of a request, even if invalid rows exist. The default value is ``False``, which causes the entire request @@ -3415,12 +3425,37 @@ def insert_rows_json( rows_info = [] data = {"rows": rows_info} - for index, row in enumerate(json_rows): + if row_ids is None: + warnings.warn( + "Passing None for row_ids is deprecated. To explicitly request " + "autogenerated insert IDs, use AutoRowIDs.GENERATE_UUID instead", + category=DeprecationWarning, + ) + row_ids = AutoRowIDs.GENERATE_UUID + + if not isinstance(row_ids, AutoRowIDs): + try: + row_ids_iter = iter(row_ids) + except TypeError: + msg = "row_ids is neither an iterable nor an AutoRowIDs enum member" + raise TypeError(msg) + + for i, row in enumerate(json_rows): info = {"json": row} - if row_ids is not None: - info["insertId"] = row_ids[index] - else: + + if row_ids is AutoRowIDs.GENERATE_UUID: info["insertId"] = str(uuid.uuid4()) + elif row_ids is AutoRowIDs.DISABLED: + info["insertId"] = None + else: + try: + insert_id = next(row_ids_iter) + except StopIteration: + msg = f"row_ids did not generate enough IDs, error at index {i}" + raise ValueError(msg) + else: + info["insertId"] = insert_id + rows_info.append(info) if skip_invalid_rows is not None: diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/enums.py b/packages/google-cloud-bigquery/google/cloud/bigquery/enums.py index edf991b6fdd3..dbbd026358c1 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/enums.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/enums.py @@ -21,6 +21,13 @@ from google.cloud.bigquery.query import ScalarQueryParameterType +class AutoRowIDs(enum.Enum): + """How to handle automatic insert IDs when inserting rows as a stream.""" + + DISABLED = enum.auto() + GENERATE_UUID = enum.auto() + + class Compression(object): """The compression type to use for exported files. The default value is :attr:`NONE`. diff --git a/packages/google-cloud-bigquery/tests/unit/test_client.py b/packages/google-cloud-bigquery/tests/unit/test_client.py index f6811e207c56..dffe7bdbabc8 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_client.py +++ b/packages/google-cloud-bigquery/tests/unit/test_client.py @@ -5434,7 +5434,7 @@ def test_insert_rows_from_dataframe_w_explicit_none_insert_ids(self): method="POST", path=API_PATH, data=EXPECTED_SENT_DATA, timeout=None ) - def test_insert_rows_json(self): + def test_insert_rows_json_default_behavior(self): from google.cloud.bigquery.dataset import DatasetReference from google.cloud.bigquery.schema import SchemaField from google.cloud.bigquery.table import Table @@ -5481,8 +5481,10 @@ def test_insert_rows_json(self): method="POST", path="/%s" % PATH, data=SENT, timeout=7.5, ) - def test_insert_rows_json_with_string_id(self): - rows = [{"col1": "val1"}] + def test_insert_rows_json_w_explicitly_requested_autogenerated_insert_ids(self): + from google.cloud.bigquery import AutoRowIDs + + rows = [{"col1": "val1"}, {"col2": "val2"}] creds = _make_credentials() http = object() client = self._make_one( @@ -5490,20 +5492,116 @@ def test_insert_rows_json_with_string_id(self): ) conn = client._connection = make_connection({}) - with mock.patch("uuid.uuid4", side_effect=map(str, range(len(rows)))): - errors = client.insert_rows_json("proj.dset.tbl", rows) + uuid_patcher = mock.patch("uuid.uuid4", side_effect=map(str, range(len(rows)))) + with uuid_patcher: + errors = client.insert_rows_json( + "proj.dset.tbl", rows, row_ids=AutoRowIDs.GENERATE_UUID + ) self.assertEqual(len(errors), 0) - expected = { - "rows": [{"json": row, "insertId": str(i)} for i, row in enumerate(rows)] + + # Check row data sent to the backend. + expected_row_data = { + "rows": [ + {"json": {"col1": "val1"}, "insertId": "0"}, + {"json": {"col2": "val2"}, "insertId": "1"}, + ] } conn.api_request.assert_called_once_with( method="POST", path="/projects/proj/datasets/dset/tables/tbl/insertAll", - data=expected, + data=expected_row_data, + timeout=None, + ) + + def test_insert_rows_json_w_explicitly_disabled_insert_ids(self): + from google.cloud.bigquery import AutoRowIDs + + rows = [{"col1": "val1"}, {"col2": "val2"}] + creds = _make_credentials() + http = object() + client = self._make_one( + project="default-project", credentials=creds, _http=http + ) + conn = client._connection = make_connection({}) + + errors = client.insert_rows_json( + "proj.dset.tbl", rows, row_ids=AutoRowIDs.DISABLED, + ) + + self.assertEqual(len(errors), 0) + + expected_row_data = { + "rows": [ + {"json": {"col1": "val1"}, "insertId": None}, + {"json": {"col2": "val2"}, "insertId": None}, + ] + } + conn.api_request.assert_called_once_with( + method="POST", + path="/projects/proj/datasets/dset/tables/tbl/insertAll", + data=expected_row_data, + timeout=None, + ) + + def test_insert_rows_json_with_iterator_row_ids(self): + rows = [{"col1": "val1"}, {"col2": "val2"}, {"col3": "val3"}] + creds = _make_credentials() + http = object() + client = self._make_one( + project="default-project", credentials=creds, _http=http + ) + conn = client._connection = make_connection({}) + + row_ids_iter = map(str, itertools.count(42)) + errors = client.insert_rows_json("proj.dset.tbl", rows, row_ids=row_ids_iter) + + self.assertEqual(len(errors), 0) + expected_row_data = { + "rows": [ + {"json": {"col1": "val1"}, "insertId": "42"}, + {"json": {"col2": "val2"}, "insertId": "43"}, + {"json": {"col3": "val3"}, "insertId": "44"}, + ] + } + conn.api_request.assert_called_once_with( + method="POST", + path="/projects/proj/datasets/dset/tables/tbl/insertAll", + data=expected_row_data, timeout=None, ) + def test_insert_rows_json_with_non_iterable_row_ids(self): + rows = [{"col1": "val1"}] + creds = _make_credentials() + http = object() + client = self._make_one( + project="default-project", credentials=creds, _http=http + ) + client._connection = make_connection({}) + + with self.assertRaises(TypeError) as exc: + client.insert_rows_json("proj.dset.tbl", rows, row_ids=object()) + + err_msg = str(exc.exception) + self.assertIn("row_ids", err_msg) + self.assertIn("iterable", err_msg) + + def test_insert_rows_json_with_too_few_row_ids(self): + rows = [{"col1": "val1"}, {"col2": "val2"}, {"col3": "val3"}] + creds = _make_credentials() + http = object() + client = self._make_one( + project="default-project", credentials=creds, _http=http + ) + client._connection = make_connection({}) + + insert_ids = ["10", "20"] + + error_msg_pattern = "row_ids did not generate enough IDs.*index 2" + with self.assertRaisesRegex(ValueError, error_msg_pattern): + client.insert_rows_json("proj.dset.tbl", rows, row_ids=insert_ids) + def test_insert_rows_json_w_explicit_none_insert_ids(self): rows = [{"col1": "val1"}, {"col2": "val2"}] creds = _make_credentials() @@ -5526,6 +5624,45 @@ def test_insert_rows_json_w_explicit_none_insert_ids(self): timeout=None, ) + def test_insert_rows_json_w_none_insert_ids_sequence(self): + rows = [{"col1": "val1"}, {"col2": "val2"}] + creds = _make_credentials() + http = object() + client = self._make_one( + project="default-project", credentials=creds, _http=http + ) + conn = client._connection = make_connection({}) + + uuid_patcher = mock.patch("uuid.uuid4", side_effect=map(str, range(len(rows)))) + with warnings.catch_warnings(record=True) as warned, uuid_patcher: + errors = client.insert_rows_json("proj.dset.tbl", rows, row_ids=None) + + self.assertEqual(len(errors), 0) + + # Passing row_ids=None should have resulted in a deprecation warning. + matches = [ + warning + for warning in warned + if issubclass(warning.category, DeprecationWarning) + and "row_ids" in str(warning) + and "AutoRowIDs.GENERATE_UUID" in str(warning) + ] + assert matches, "The expected deprecation warning was not raised." + + # Check row data sent to the backend. + expected_row_data = { + "rows": [ + {"json": {"col1": "val1"}, "insertId": "0"}, + {"json": {"col2": "val2"}, "insertId": "1"}, + ] + } + conn.api_request.assert_called_once_with( + method="POST", + path="/projects/proj/datasets/dset/tables/tbl/insertAll", + data=expected_row_data, + timeout=None, + ) + def test_insert_rows_w_wrong_arg(self): from google.cloud.bigquery.dataset import DatasetReference from google.cloud.bigquery.schema import SchemaField From 104b91c721e3faaa2a02bf9fd12e2d85b18d0bb1 Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Fri, 9 Jul 2021 14:21:09 -0500 Subject: [PATCH 1194/2016] docs: add docs for all enums in module (#745) --- packages/google-cloud-bigquery/docs/conf.py | 1 + packages/google-cloud-bigquery/docs/enums.rst | 6 ++++++ packages/google-cloud-bigquery/docs/reference.rst | 7 ++++--- 3 files changed, 11 insertions(+), 3 deletions(-) create mode 100644 packages/google-cloud-bigquery/docs/enums.rst diff --git a/packages/google-cloud-bigquery/docs/conf.py b/packages/google-cloud-bigquery/docs/conf.py index cb347160de42..09f7ea414883 100644 --- a/packages/google-cloud-bigquery/docs/conf.py +++ b/packages/google-cloud-bigquery/docs/conf.py @@ -110,6 +110,7 @@ # directories to ignore when looking for source files. exclude_patterns = [ "_build", + "**/.nox/**/*", "samples/AUTHORING_GUIDE.md", "samples/CONTRIBUTING.md", "samples/snippets/README.rst", diff --git a/packages/google-cloud-bigquery/docs/enums.rst b/packages/google-cloud-bigquery/docs/enums.rst new file mode 100644 index 000000000000..57608968a3fa --- /dev/null +++ b/packages/google-cloud-bigquery/docs/enums.rst @@ -0,0 +1,6 @@ +BigQuery Enums +============== + +.. automodule:: google.cloud.bigquery.enums + :members: + :undoc-members: diff --git a/packages/google-cloud-bigquery/docs/reference.rst b/packages/google-cloud-bigquery/docs/reference.rst index 52d916f96ac0..694379cd276e 100644 --- a/packages/google-cloud-bigquery/docs/reference.rst +++ b/packages/google-cloud-bigquery/docs/reference.rst @@ -173,10 +173,11 @@ Magics Enums ===== -.. autosummary:: - :toctree: generated +.. toctree:: + :maxdepth: 2 + + enums - enums.StandardSqlDataTypes Encryption Configuration ======================== From 3930a36ec1754fd47104947b6c9279ca36cac6a9 Mon Sep 17 00:00:00 2001 From: WhiteSource Renovate Date: Sat, 10 Jul 2021 11:05:31 +0200 Subject: [PATCH 1195/2016] chore(deps): update dependency google-cloud-bigquery-storage to v2.6.0 (#743) --- .../google-cloud-bigquery/samples/geography/requirements.txt | 2 +- .../google-cloud-bigquery/samples/snippets/requirements.txt | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/packages/google-cloud-bigquery/samples/geography/requirements.txt b/packages/google-cloud-bigquery/samples/geography/requirements.txt index 83ab92ee5cf9..30a59c15ae7b 100644 --- a/packages/google-cloud-bigquery/samples/geography/requirements.txt +++ b/packages/google-cloud-bigquery/samples/geography/requirements.txt @@ -1,4 +1,4 @@ geojson==2.5.0 google-cloud-bigquery==2.20.0 -google-cloud-bigquery-storage==2.5.0 +google-cloud-bigquery-storage==2.6.0 Shapely==1.7.1 diff --git a/packages/google-cloud-bigquery/samples/snippets/requirements.txt b/packages/google-cloud-bigquery/samples/snippets/requirements.txt index 6b966fb074af..ce02ac7ed844 100644 --- a/packages/google-cloud-bigquery/samples/snippets/requirements.txt +++ b/packages/google-cloud-bigquery/samples/snippets/requirements.txt @@ -1,5 +1,5 @@ google-cloud-bigquery==2.20.0 -google-cloud-bigquery-storage==2.5.0 +google-cloud-bigquery-storage==2.6.0 google-auth-oauthlib==0.4.4 grpcio==1.38.1 ipython==7.16.1; python_version < '3.7' From 58c581e092ad7352e24648275be0fd03b5f954f1 Mon Sep 17 00:00:00 2001 From: Peter Lamut Date: Sat, 10 Jul 2021 11:07:00 +0200 Subject: [PATCH 1196/2016] feat: add support for decimal target types (#735) * feat: add support for decimal target types * Add decimal target types support to ExternalConfig * Remove ambiguous parts of DecimalTargetType docs. --- .../google/cloud/bigquery/__init__.py | 2 + .../google/cloud/bigquery/enums.py | 18 ++++++ .../google/cloud/bigquery/external_config.py | 23 +++++++ .../google/cloud/bigquery/job/load.py | 23 +++++++ .../tests/data/numeric_38_12.parquet | Bin 0 -> 307 bytes .../tests/system/test_client.py | 54 ++++++++++++++++ .../tests/unit/job/test_load_config.py | 39 ++++++++++++ .../tests/unit/test_external_config.py | 58 ++++++++++++++++++ 8 files changed, 217 insertions(+) create mode 100644 packages/google-cloud-bigquery/tests/data/numeric_38_12.parquet diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/__init__.py b/packages/google-cloud-bigquery/google/cloud/bigquery/__init__.py index dfe3a6320595..b972241761de 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/__init__.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/__init__.py @@ -38,6 +38,7 @@ from google.cloud.bigquery.dataset import DatasetReference from google.cloud.bigquery import enums from google.cloud.bigquery.enums import AutoRowIDs +from google.cloud.bigquery.enums import DecimalTargetType from google.cloud.bigquery.enums import KeyResultStatementKind from google.cloud.bigquery.enums import SqlTypeNames from google.cloud.bigquery.enums import StandardSqlDataTypes @@ -148,6 +149,7 @@ "AutoRowIDs", "Compression", "CreateDisposition", + "DecimalTargetType", "DestinationFormat", "DeterminismLevel", "ExternalSourceFormat", diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/enums.py b/packages/google-cloud-bigquery/google/cloud/bigquery/enums.py index dbbd026358c1..ef35dffe0639 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/enums.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/enums.py @@ -49,6 +49,24 @@ class Compression(object): """Specifies no compression.""" +class DecimalTargetType: + """The data types that could be used as a target type when converting decimal values. + + https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#DecimalTargetType + + .. versionadded:: 2.21.0 + """ + + NUMERIC = "NUMERIC" + """Decimal values could be converted to NUMERIC type.""" + + BIGNUMERIC = "BIGNUMERIC" + """Decimal values could be converted to BIGNUMERIC type.""" + + STRING = "STRING" + """Decimal values could be converted to STRING type.""" + + class CreateDisposition(object): """Specifies whether the job is allowed to create new tables. The default value is :attr:`CREATE_IF_NEEDED`. diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/external_config.py b/packages/google-cloud-bigquery/google/cloud/bigquery/external_config.py index 0c49d2d764b3..f1692ba50289 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/external_config.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/external_config.py @@ -22,6 +22,7 @@ import base64 import copy +from typing import FrozenSet, Iterable, Optional from google.cloud.bigquery._helpers import _to_bytes from google.cloud.bigquery._helpers import _bytes_to_json @@ -693,6 +694,28 @@ def compression(self): def compression(self, value): self._properties["compression"] = value + @property + def decimal_target_types(self) -> Optional[FrozenSet[str]]: + """Possible SQL data types to which the source decimal values are converted. + + See: + https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#ExternalDataConfiguration.FIELDS.decimal_target_types + + .. versionadded:: 2.21.0 + """ + prop = self._properties.get("decimalTargetTypes") + if prop is not None: + prop = frozenset(prop) + return prop + + @decimal_target_types.setter + def decimal_target_types(self, value: Optional[Iterable[str]]): + if value is not None: + self._properties["decimalTargetTypes"] = list(value) + else: + if "decimalTargetTypes" in self._properties: + del self._properties["decimalTargetTypes"] + @property def hive_partitioning(self): """Optional[:class:`~.external_config.HivePartitioningOptions`]: [Beta] When set, \ diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/job/load.py b/packages/google-cloud-bigquery/google/cloud/bigquery/job/load.py index 41d38dd7435d..bdee5cb6bd3d 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/job/load.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/job/load.py @@ -14,6 +14,8 @@ """Classes for load jobs.""" +from typing import FrozenSet, Iterable, Optional + from google.cloud.bigquery.encryption_configuration import EncryptionConfiguration from google.cloud.bigquery.external_config import HivePartitioningOptions from google.cloud.bigquery.format_options import ParquetOptions @@ -121,6 +123,27 @@ def create_disposition(self): def create_disposition(self, value): self._set_sub_prop("createDisposition", value) + @property + def decimal_target_types(self) -> Optional[FrozenSet[str]]: + """Possible SQL data types to which the source decimal values are converted. + + See: + https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationLoad.FIELDS.decimal_target_types + + .. versionadded:: 2.21.0 + """ + prop = self._get_sub_prop("decimalTargetTypes") + if prop is not None: + prop = frozenset(prop) + return prop + + @decimal_target_types.setter + def decimal_target_types(self, value: Optional[Iterable[str]]): + if value is not None: + self._set_sub_prop("decimalTargetTypes", list(value)) + else: + self._del_sub_prop("decimalTargetTypes") + @property def destination_encryption_configuration(self): """Optional[google.cloud.bigquery.encryption_configuration.EncryptionConfiguration]: Custom diff --git a/packages/google-cloud-bigquery/tests/data/numeric_38_12.parquet b/packages/google-cloud-bigquery/tests/data/numeric_38_12.parquet new file mode 100644 index 0000000000000000000000000000000000000000..ef4db91ea9a90b2e230b8057fb05446e3f25dfe4 GIT binary patch literal 307 zcmWG=3^EjD5cLuD(Gg_MA>{7D-4+Ihd3~*3z?EmNe|HVLj zCLm^JMpq11$pBF*$|R}d!JUy=mKtB2oRONFD9Xej$|tHI$|S)WpPZj#pvomGA?m{* z#v&;rDWk?ABg!Xf%D}-U36&NF%82b_1R^yK8=~C?(!s)@;aQMaR9Ko?qMKW!P?lO$ zoSC1eV5n!IXQ-f&RGOKSqF|DklxUKYm};JsWRPN#nv#^9W^QR@oS158Y;2ikWRaF) ToMxVuoTkZe0_d>-U~mBdp9NGq literal 0 HcmV?d00001 diff --git a/packages/google-cloud-bigquery/tests/system/test_client.py b/packages/google-cloud-bigquery/tests/system/test_client.py index ce3021399038..460296b2f21a 100644 --- a/packages/google-cloud-bigquery/tests/system/test_client.py +++ b/packages/google-cloud-bigquery/tests/system/test_client.py @@ -864,6 +864,60 @@ def test_load_table_from_local_avro_file_then_dump_table(self): sorted(row_tuples, key=by_wavelength), sorted(ROWS, key=by_wavelength) ) + def test_load_table_from_local_parquet_file_decimal_types(self): + from google.cloud.bigquery.enums import DecimalTargetType + from google.cloud.bigquery.job import SourceFormat + from google.cloud.bigquery.job import WriteDisposition + + TABLE_NAME = "test_table_parquet" + + expected_rows = [ + (decimal.Decimal("123.999999999999"),), + (decimal.Decimal("99999999999999999999999999.999999999999"),), + ] + + dataset = self.temp_dataset(_make_dataset_id("load_local_parquet_then_dump")) + table_ref = dataset.table(TABLE_NAME) + table = Table(table_ref) + self.to_delete.insert(0, table) + + job_config = bigquery.LoadJobConfig() + job_config.source_format = SourceFormat.PARQUET + job_config.write_disposition = WriteDisposition.WRITE_TRUNCATE + job_config.decimal_target_types = [ + DecimalTargetType.NUMERIC, + DecimalTargetType.BIGNUMERIC, + DecimalTargetType.STRING, + ] + + with open(DATA_PATH / "numeric_38_12.parquet", "rb") as parquet_file: + job = Config.CLIENT.load_table_from_file( + parquet_file, table_ref, job_config=job_config + ) + + job.result(timeout=JOB_TIMEOUT) # Retry until done. + + self.assertEqual(job.output_rows, len(expected_rows)) + + table = Config.CLIENT.get_table(table) + rows = self._fetch_single_page(table) + row_tuples = [r.values() for r in rows] + self.assertEqual(sorted(row_tuples), sorted(expected_rows)) + + # Forcing the NUMERIC type, however, should result in an error. + job_config.decimal_target_types = [DecimalTargetType.NUMERIC] + + with open(DATA_PATH / "numeric_38_12.parquet", "rb") as parquet_file: + job = Config.CLIENT.load_table_from_file( + parquet_file, table_ref, job_config=job_config + ) + + with self.assertRaises(BadRequest) as exc_info: + job.result(timeout=JOB_TIMEOUT) + + exc_msg = str(exc_info.exception) + self.assertIn("out of valid NUMERIC range", exc_msg) + def test_load_table_from_json_basic_use(self): table_schema = ( bigquery.SchemaField("name", "STRING", mode="REQUIRED"), diff --git a/packages/google-cloud-bigquery/tests/unit/job/test_load_config.py b/packages/google-cloud-bigquery/tests/unit/job/test_load_config.py index eafe7e0462df..190bd16dcf4a 100644 --- a/packages/google-cloud-bigquery/tests/unit/job/test_load_config.py +++ b/packages/google-cloud-bigquery/tests/unit/job/test_load_config.py @@ -122,6 +122,45 @@ def test_create_disposition_setter(self): config.create_disposition = disposition self.assertEqual(config._properties["load"]["createDisposition"], disposition) + def test_decimal_target_types_miss(self): + config = self._get_target_class()() + self.assertIsNone(config.decimal_target_types) + + def test_decimal_target_types_hit(self): + from google.cloud.bigquery.enums import DecimalTargetType + + config = self._get_target_class()() + decimal_target_types = [DecimalTargetType.NUMERIC, DecimalTargetType.STRING] + config._properties["load"]["decimalTargetTypes"] = decimal_target_types + + expected = frozenset(decimal_target_types) + self.assertEqual(config.decimal_target_types, expected) + + def test_decimal_target_types_setter(self): + from google.cloud.bigquery.enums import DecimalTargetType + + decimal_target_types = (DecimalTargetType.NUMERIC, DecimalTargetType.BIGNUMERIC) + config = self._get_target_class()() + config.decimal_target_types = decimal_target_types + self.assertEqual( + config._properties["load"]["decimalTargetTypes"], + list(decimal_target_types), + ) + + def test_decimal_target_types_setter_w_none(self): + from google.cloud.bigquery.enums import DecimalTargetType + + config = self._get_target_class()() + decimal_target_types = [DecimalTargetType.BIGNUMERIC] + config._properties["load"]["decimalTargetTypes"] = decimal_target_types + + config.decimal_target_types = None + + self.assertIsNone(config.decimal_target_types) + self.assertNotIn("decimalTargetTypes", config._properties["load"]) + + config.decimal_target_types = None # No error if unsetting an unset property. + def test_destination_encryption_configuration_missing(self): config = self._get_target_class()() self.assertIsNone(config.destination_encryption_configuration) diff --git a/packages/google-cloud-bigquery/tests/unit/test_external_config.py b/packages/google-cloud-bigquery/tests/unit/test_external_config.py index 393df931e3b5..1f49dba5df98 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_external_config.py +++ b/packages/google-cloud-bigquery/tests/unit/test_external_config.py @@ -532,6 +532,64 @@ def test_to_api_repr_parquet(self): self.assertEqual(got_resource, exp_resource) + def test_from_api_repr_decimal_target_types(self): + from google.cloud.bigquery.enums import DecimalTargetType + + resource = _copy_and_update( + self.BASE_RESOURCE, + { + "sourceFormat": "FORMAT_FOO", + "decimalTargetTypes": [DecimalTargetType.NUMERIC], + }, + ) + + ec = external_config.ExternalConfig.from_api_repr(resource) + + self._verify_base(ec) + self.assertEqual(ec.source_format, "FORMAT_FOO") + self.assertEqual( + ec.decimal_target_types, frozenset([DecimalTargetType.NUMERIC]) + ) + + # converting back to API representation should yield the same result + got_resource = ec.to_api_repr() + self.assertEqual(got_resource, resource) + + del resource["decimalTargetTypes"] + ec = external_config.ExternalConfig.from_api_repr(resource) + self.assertIsNone(ec.decimal_target_types) + + got_resource = ec.to_api_repr() + self.assertEqual(got_resource, resource) + + def test_to_api_repr_decimal_target_types(self): + from google.cloud.bigquery.enums import DecimalTargetType + + ec = external_config.ExternalConfig("FORMAT_FOO") + ec.decimal_target_types = [DecimalTargetType.NUMERIC, DecimalTargetType.STRING] + + got_resource = ec.to_api_repr() + + expected_resource = { + "sourceFormat": "FORMAT_FOO", + "decimalTargetTypes": [DecimalTargetType.NUMERIC, DecimalTargetType.STRING], + } + self.assertEqual(got_resource, expected_resource) + + def test_to_api_repr_decimal_target_types_unset(self): + from google.cloud.bigquery.enums import DecimalTargetType + + ec = external_config.ExternalConfig("FORMAT_FOO") + ec._properties["decimalTargetTypes"] = [DecimalTargetType.NUMERIC] + ec.decimal_target_types = None + + got_resource = ec.to_api_repr() + + expected_resource = {"sourceFormat": "FORMAT_FOO"} + self.assertEqual(got_resource, expected_resource) + + ec.decimal_target_types = None # No error if unsetting when already unset. + def _copy_and_update(d, u): d = copy.deepcopy(d) From 3784dd13432b183c0087644a4cdac479d9b1701e Mon Sep 17 00:00:00 2001 From: Peter Lamut Date: Sat, 10 Jul 2021 11:29:00 +0200 Subject: [PATCH 1197/2016] feat: add support for table snapshots (#740) * feat: add support for table snapshots * Add system test for table snapshots * Make test taxonomy resource name unique * Store timezone aware snapshot time on snapshots * Make copy config tests more detailed * Use unique resource ID differently for display name * Add new classes to docs --- .../google-cloud-bigquery/docs/reference.rst | 2 + .../google/cloud/bigquery/__init__.py | 4 + .../google/cloud/bigquery/job/__init__.py | 2 + .../google/cloud/bigquery/job/copy_.py | 38 ++++++++++ .../google/cloud/bigquery/table.py | 37 ++++++++++ .../tests/system/test_client.py | 71 +++++++++++++++++- .../tests/unit/job/test_copy.py | 34 ++++++++- .../tests/unit/test_table.py | 74 +++++++++++++++++++ 8 files changed, 260 insertions(+), 2 deletions(-) diff --git a/packages/google-cloud-bigquery/docs/reference.rst b/packages/google-cloud-bigquery/docs/reference.rst index 694379cd276e..cb2faa5ec388 100644 --- a/packages/google-cloud-bigquery/docs/reference.rst +++ b/packages/google-cloud-bigquery/docs/reference.rst @@ -59,6 +59,7 @@ Job-Related Types job.CreateDisposition job.DestinationFormat job.Encoding + job.OperationType job.QueryPlanEntry job.QueryPlanEntryStep job.QueryPriority @@ -90,6 +91,7 @@ Table table.RangePartitioning table.Row table.RowIterator + table.SnapshotDefinition table.Table table.TableListItem table.TableReference diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/__init__.py b/packages/google-cloud-bigquery/google/cloud/bigquery/__init__.py index b972241761de..65dde5d94552 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/__init__.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/__init__.py @@ -61,6 +61,7 @@ from google.cloud.bigquery.job import ExtractJobConfig from google.cloud.bigquery.job import LoadJob from google.cloud.bigquery.job import LoadJobConfig +from google.cloud.bigquery.job import OperationType from google.cloud.bigquery.job import QueryJob from google.cloud.bigquery.job import QueryJobConfig from google.cloud.bigquery.job import QueryPriority @@ -87,6 +88,7 @@ from google.cloud.bigquery.table import PartitionRange from google.cloud.bigquery.table import RangePartitioning from google.cloud.bigquery.table import Row +from google.cloud.bigquery.table import SnapshotDefinition from google.cloud.bigquery.table import Table from google.cloud.bigquery.table import TableReference from google.cloud.bigquery.table import TimePartitioningType @@ -115,6 +117,7 @@ "PartitionRange", "RangePartitioning", "Row", + "SnapshotDefinition", "TimePartitioning", "TimePartitioningType", # Jobs @@ -155,6 +158,7 @@ "ExternalSourceFormat", "Encoding", "KeyResultStatementKind", + "OperationType", "QueryPriority", "SchemaUpdateOption", "SourceFormat", diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/job/__init__.py b/packages/google-cloud-bigquery/google/cloud/bigquery/job/__init__.py index cdab92e05953..6bdfa09be449 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/job/__init__.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/job/__init__.py @@ -25,6 +25,7 @@ from google.cloud.bigquery.job.base import UnknownJob from google.cloud.bigquery.job.copy_ import CopyJob from google.cloud.bigquery.job.copy_ import CopyJobConfig +from google.cloud.bigquery.job.copy_ import OperationType from google.cloud.bigquery.job.extract import ExtractJob from google.cloud.bigquery.job.extract import ExtractJobConfig from google.cloud.bigquery.job.load import LoadJob @@ -59,6 +60,7 @@ "UnknownJob", "CopyJob", "CopyJobConfig", + "OperationType", "ExtractJob", "ExtractJobConfig", "LoadJob", diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/job/copy_.py b/packages/google-cloud-bigquery/google/cloud/bigquery/job/copy_.py index 95f4b613b5cb..c6ee989441fb 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/job/copy_.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/job/copy_.py @@ -14,6 +14,8 @@ """Classes for copy jobs.""" +from typing import Optional + from google.cloud.bigquery.encryption_configuration import EncryptionConfiguration from google.cloud.bigquery import _helpers from google.cloud.bigquery.table import TableReference @@ -23,6 +25,25 @@ from google.cloud.bigquery.job.base import _JobReference +class OperationType: + """Different operation types supported in table copy job. + + https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#operationtype + """ + + OPERATION_TYPE_UNSPECIFIED = "OPERATION_TYPE_UNSPECIFIED" + """Unspecified operation type.""" + + COPY = "COPY" + """The source and destination table have the same table type.""" + + SNAPSHOT = "SNAPSHOT" + """The source table type is TABLE and the destination table type is SNAPSHOT.""" + + RESTORE = "RESTORE" + """The source table type is SNAPSHOT and the destination table type is TABLE.""" + + class CopyJobConfig(_JobConfig): """Configuration options for copy jobs. @@ -85,6 +106,23 @@ def destination_encryption_configuration(self, value): api_repr = value.to_api_repr() self._set_sub_prop("destinationEncryptionConfiguration", api_repr) + @property + def operation_type(self) -> str: + """The operation to perform with this copy job. + + See + https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationTableCopy.FIELDS.operation_type + """ + return self._get_sub_prop( + "operationType", OperationType.OPERATION_TYPE_UNSPECIFIED + ) + + @operation_type.setter + def operation_type(self, value: Optional[str]): + if value is None: + value = OperationType.OPERATION_TYPE_UNSPECIFIED + self._set_sub_prop("operationType", value) + class CopyJob(_AsyncJob): """Asynchronous job: copy data into a table from other tables. diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py index a1c13c85d009..765110ae6c29 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py @@ -321,6 +321,7 @@ class Table(object): "range_partitioning": "rangePartitioning", "time_partitioning": "timePartitioning", "schema": "schema", + "snapshot_definition": "snapshotDefinition", "streaming_buffer": "streamingBuffer", "self_link": "selfLink", "table_id": ["tableReference", "tableId"], @@ -910,6 +911,19 @@ def external_data_configuration(self, value): self._PROPERTY_TO_API_FIELD["external_data_configuration"] ] = api_repr + @property + def snapshot_definition(self) -> Optional["SnapshotDefinition"]: + """Information about the snapshot. This value is set via snapshot creation. + + See: https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#Table.FIELDS.snapshot_definition + """ + snapshot_info = self._properties.get( + self._PROPERTY_TO_API_FIELD["snapshot_definition"] + ) + if snapshot_info is not None: + snapshot_info = SnapshotDefinition(snapshot_info) + return snapshot_info + @classmethod def from_string(cls, full_table_id: str) -> "Table": """Construct a table from fully-qualified table ID. @@ -1274,6 +1288,29 @@ def __init__(self, resource): ) +class SnapshotDefinition: + """Information about base table and snapshot time of the snapshot. + + See https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#snapshotdefinition + + Args: + resource: Snapshot definition representation returned from the API. + """ + + def __init__(self, resource: Dict[str, Any]): + self.base_table_reference = None + if "baseTableReference" in resource: + self.base_table_reference = TableReference.from_api_repr( + resource["baseTableReference"] + ) + + self.snapshot_time = None + if "snapshotTime" in resource: + self.snapshot_time = google.cloud._helpers._rfc3339_to_datetime( + resource["snapshotTime"] + ) + + class Row(object): """A BigQuery row. diff --git a/packages/google-cloud-bigquery/tests/system/test_client.py b/packages/google-cloud-bigquery/tests/system/test_client.py index 460296b2f21a..7234333a2ee6 100644 --- a/packages/google-cloud-bigquery/tests/system/test_client.py +++ b/packages/google-cloud-bigquery/tests/system/test_client.py @@ -394,7 +394,7 @@ def test_create_table_with_real_custom_policy(self): taxonomy_parent = f"projects/{Config.CLIENT.project}/locations/us" new_taxonomy = datacatalog_types.Taxonomy( - display_name="Custom test taxonomy", + display_name="Custom test taxonomy" + unique_resource_id(), description="This taxonomy is ony used for a test.", activated_policy_types=[ datacatalog_types.Taxonomy.PolicyType.FINE_GRAINED_ACCESS_CONTROL @@ -2370,6 +2370,75 @@ def test_parameterized_types_round_trip(self): self.assertEqual(tuple(s._key()[:2] for s in table2.schema), fields) + def test_table_snapshots(self): + from google.cloud.bigquery import CopyJobConfig + from google.cloud.bigquery import OperationType + + client = Config.CLIENT + + source_table_path = f"{client.project}.{Config.DATASET}.test_table" + snapshot_table_path = f"{source_table_path}_snapshot" + + # Create the table before loading so that the column order is predictable. + schema = [ + bigquery.SchemaField("foo", "INTEGER"), + bigquery.SchemaField("bar", "STRING"), + ] + source_table = helpers.retry_403(Config.CLIENT.create_table)( + Table(source_table_path, schema=schema) + ) + self.to_delete.insert(0, source_table) + + # Populate the table with initial data. + rows = [{"foo": 1, "bar": "one"}, {"foo": 2, "bar": "two"}] + load_job = Config.CLIENT.load_table_from_json(rows, source_table) + load_job.result() + + # Now create a snapshot before modifying the original table data. + copy_config = CopyJobConfig() + copy_config.operation_type = OperationType.SNAPSHOT + + copy_job = client.copy_table( + sources=source_table_path, + destination=snapshot_table_path, + job_config=copy_config, + ) + copy_job.result() + + snapshot_table = client.get_table(snapshot_table_path) + self.to_delete.insert(0, snapshot_table) + + # Modify data in original table. + sql = f'INSERT INTO `{source_table_path}`(foo, bar) VALUES (3, "three")' + query_job = client.query(sql) + query_job.result() + + # List rows from the source table and compare them to rows from the snapshot. + rows_iter = client.list_rows(source_table_path) + rows = sorted(row.values() for row in rows_iter) + assert rows == [(1, "one"), (2, "two"), (3, "three")] + + rows_iter = client.list_rows(snapshot_table_path) + rows = sorted(row.values() for row in rows_iter) + assert rows == [(1, "one"), (2, "two")] + + # Now restore the table from the snapshot and it should again contain the old + # set of rows. + copy_config = CopyJobConfig() + copy_config.operation_type = OperationType.RESTORE + copy_config.write_disposition = bigquery.WriteDisposition.WRITE_TRUNCATE + + copy_job = client.copy_table( + sources=snapshot_table_path, + destination=source_table_path, + job_config=copy_config, + ) + copy_job.result() + + rows_iter = client.list_rows(source_table_path) + rows = sorted(row.values() for row in rows_iter) + assert rows == [(1, "one"), (2, "two")] + def temp_dataset(self, dataset_id, location=None): project = Config.CLIENT.project dataset_ref = bigquery.DatasetReference(project, dataset_id) diff --git a/packages/google-cloud-bigquery/tests/unit/job/test_copy.py b/packages/google-cloud-bigquery/tests/unit/job/test_copy.py index fb0c87391f78..992efcf6bf8c 100644 --- a/packages/google-cloud-bigquery/tests/unit/job/test_copy.py +++ b/packages/google-cloud-bigquery/tests/unit/job/test_copy.py @@ -28,18 +28,34 @@ def _get_target_class(): return CopyJobConfig + def test_ctor_defaults(self): + from google.cloud.bigquery.job import OperationType + + config = self._make_one() + + assert config.create_disposition is None + assert config.write_disposition is None + assert config.destination_encryption_configuration is None + assert config.operation_type == OperationType.OPERATION_TYPE_UNSPECIFIED + def test_ctor_w_properties(self): from google.cloud.bigquery.job import CreateDisposition + from google.cloud.bigquery.job import OperationType from google.cloud.bigquery.job import WriteDisposition create_disposition = CreateDisposition.CREATE_NEVER write_disposition = WriteDisposition.WRITE_TRUNCATE + snapshot_operation = OperationType.SNAPSHOT + config = self._get_target_class()( - create_disposition=create_disposition, write_disposition=write_disposition + create_disposition=create_disposition, + write_disposition=write_disposition, + operation_type=snapshot_operation, ) self.assertEqual(config.create_disposition, create_disposition) self.assertEqual(config.write_disposition, write_disposition) + self.assertEqual(config.operation_type, snapshot_operation) def test_to_api_repr_with_encryption(self): from google.cloud.bigquery.encryption_configuration import ( @@ -70,6 +86,22 @@ def test_to_api_repr_with_encryption_none(self): resource, {"copy": {"destinationEncryptionConfiguration": None}} ) + def test_operation_type_setting_none(self): + from google.cloud.bigquery.job import OperationType + + config = self._make_one(operation_type=OperationType.SNAPSHOT) + + # Setting it to None is the same as setting it to OPERATION_TYPE_UNSPECIFIED. + config.operation_type = None + assert config.operation_type == OperationType.OPERATION_TYPE_UNSPECIFIED + + def test_operation_type_setting_non_none(self): + from google.cloud.bigquery.job import OperationType + + config = self._make_one(operation_type=None) + config.operation_type = OperationType.RESTORE + assert config.operation_type == OperationType.RESTORE + class TestCopyJob(_Base): JOB_TYPE = "copy" diff --git a/packages/google-cloud-bigquery/tests/unit/test_table.py b/packages/google-cloud-bigquery/tests/unit/test_table.py index f4038835cd30..b30f16fe0e64 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_table.py +++ b/packages/google-cloud-bigquery/tests/unit/test_table.py @@ -684,6 +684,40 @@ def test_props_set_by_server(self): self.assertEqual(table.full_table_id, TABLE_FULL_ID) self.assertEqual(table.table_type, "TABLE") + def test_snapshot_definition_not_set(self): + dataset = DatasetReference(self.PROJECT, self.DS_ID) + table_ref = dataset.table(self.TABLE_NAME) + table = self._make_one(table_ref) + + assert table.snapshot_definition is None + + def test_snapshot_definition_set(self): + from google.cloud._helpers import UTC + from google.cloud.bigquery.table import SnapshotDefinition + + dataset = DatasetReference(self.PROJECT, self.DS_ID) + table_ref = dataset.table(self.TABLE_NAME) + table = self._make_one(table_ref) + + table._properties["snapshotDefinition"] = { + "baseTableReference": { + "projectId": "project_x", + "datasetId": "dataset_y", + "tableId": "table_z", + }, + "snapshotTime": "2010-09-28T10:20:30.123Z", + } + + snapshot = table.snapshot_definition + + assert isinstance(snapshot, SnapshotDefinition) + assert snapshot.base_table_reference.path == ( + "/projects/project_x/datasets/dataset_y/tables/table_z" + ) + assert snapshot.snapshot_time == datetime.datetime( + 2010, 9, 28, 10, 20, 30, 123000, tzinfo=UTC + ) + def test_description_setter_bad_value(self): dataset = DatasetReference(self.PROJECT, self.DS_ID) table_ref = dataset.table(self.TABLE_NAME) @@ -1509,6 +1543,46 @@ def test_to_api_repr(self): self.assertEqual(table.to_api_repr(), resource) +class TestSnapshotDefinition: + @staticmethod + def _get_target_class(): + from google.cloud.bigquery.table import SnapshotDefinition + + return SnapshotDefinition + + @classmethod + def _make_one(cls, *args, **kwargs): + klass = cls._get_target_class() + return klass(*args, **kwargs) + + def test_ctor_empty_resource(self): + instance = self._make_one(resource={}) + assert instance.base_table_reference is None + assert instance.snapshot_time is None + + def test_ctor_full_resource(self): + from google.cloud._helpers import UTC + from google.cloud.bigquery.table import TableReference + + resource = { + "baseTableReference": { + "projectId": "my-project", + "datasetId": "your-dataset", + "tableId": "our-table", + }, + "snapshotTime": "2005-06-07T19:35:02.123Z", + } + instance = self._make_one(resource) + + expected_table_ref = TableReference.from_string( + "my-project.your-dataset.our-table" + ) + assert instance.base_table_reference == expected_table_ref + + expected_time = datetime.datetime(2005, 6, 7, 19, 35, 2, 123000, tzinfo=UTC) + assert instance.snapshot_time == expected_time + + class TestRow(unittest.TestCase): def test_row(self): from google.cloud.bigquery.table import Row From 64b8c8facd4e6acbf7749552be8def9cad1ebc16 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20Br=C3=A6dstrup?= <3591721+LinuxChristian@users.noreply.github.com> Date: Mon, 12 Jul 2021 21:21:24 +0200 Subject: [PATCH 1198/2016] fix: use pandas function to check for NaN (#750) * fix: use pandas function to check for NaN Starting with pandas 1.0, an experimental pandas.NA value (singleton) is available to represent scalar missing values as opposed to numpy.nan. Comparing the variable with itself results in a pandas.NA value that doesn't support type-casting to boolean. Using the build-in pandas.isna function handles all pandas supported NaN values. * tests: Skip tests if pandas below required version * tests: compare expected and actual directly as lists * Fix pytest.mark.skipif spelling Co-authored-by: Peter Lamut --- .../google/cloud/bigquery/_pandas_helpers.py | 2 +- .../tests/unit/test__pandas_helpers.py | 40 +++++++++++++++++++ 2 files changed, 41 insertions(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/_pandas_helpers.py b/packages/google-cloud-bigquery/google/cloud/bigquery/_pandas_helpers.py index e93a99eba010..285c0e83c797 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/_pandas_helpers.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/_pandas_helpers.py @@ -780,7 +780,7 @@ def dataframe_to_json_generator(dataframe): output = {} for column, value in zip(dataframe.columns, row): # Omit NaN values. - if value != value: + if pandas.isna(value): continue output[column] = value yield output diff --git a/packages/google-cloud-bigquery/tests/unit/test__pandas_helpers.py b/packages/google-cloud-bigquery/tests/unit/test__pandas_helpers.py index 39a3d845bc6b..aa87e28f55a3 100644 --- a/packages/google-cloud-bigquery/tests/unit/test__pandas_helpers.py +++ b/packages/google-cloud-bigquery/tests/unit/test__pandas_helpers.py @@ -19,6 +19,7 @@ import operator import queue import warnings +import pkg_resources import mock @@ -47,6 +48,14 @@ except ImportError: # pragma: NO COVER bigquery_storage = None +PANDAS_MINIUM_VERSION = pkg_resources.parse_version("1.0.0") + +if pandas is not None: + PANDAS_INSTALLED_VERSION = pkg_resources.get_distribution("pandas").parsed_version +else: + # Set to less than MIN version. + PANDAS_INSTALLED_VERSION = pkg_resources.parse_version("0.0.0") + skip_if_no_bignumeric = pytest.mark.skipif( not _BIGNUMERIC_SUPPORT, reason="BIGNUMERIC support requires pyarrow>=3.0.0", @@ -734,6 +743,37 @@ def test_list_columns_and_indexes_with_named_index_same_as_column_name( assert columns_and_indexes == expected +@pytest.mark.skipif( + pandas is None or PANDAS_INSTALLED_VERSION < PANDAS_MINIUM_VERSION, + reason="Requires `pandas version >= 1.0.0` which introduces pandas.NA", +) +def test_dataframe_to_json_generator(module_under_test): + utcnow = datetime.datetime.utcnow() + df_data = collections.OrderedDict( + [ + ("a_series", [pandas.NA, 2, 3, 4]), + ("b_series", [0.1, float("NaN"), 0.3, 0.4]), + ("c_series", ["a", "b", pandas.NA, "d"]), + ("d_series", [utcnow, utcnow, utcnow, pandas.NaT]), + ("e_series", [True, False, True, None]), + ] + ) + dataframe = pandas.DataFrame( + df_data, index=pandas.Index([4, 5, 6, 7], name="a_index") + ) + + dataframe = dataframe.astype({"a_series": pandas.Int64Dtype()}) + + rows = module_under_test.dataframe_to_json_generator(dataframe) + expected = [ + {"b_series": 0.1, "c_series": "a", "d_series": utcnow, "e_series": True}, + {"a_series": 2, "c_series": "b", "d_series": utcnow, "e_series": False}, + {"a_series": 3, "b_series": 0.3, "d_series": utcnow, "e_series": True}, + {"a_series": 4, "b_series": 0.4, "c_series": "d"}, + ] + assert list(rows) == expected + + @pytest.mark.skipif(pandas is None, reason="Requires `pandas`") def test_list_columns_and_indexes_with_named_index(module_under_test): df_data = collections.OrderedDict( From 020c5ee4df5ea29769c6c26d0dea8168dfaba035 Mon Sep 17 00:00:00 2001 From: Peter Lamut Date: Tue, 13 Jul 2021 19:06:15 +0200 Subject: [PATCH 1199/2016] chore: release 2.21.0 (#753) Supersedes #711. ## [2.21.0](https://www.github.com/googleapis/python-bigquery/compare/v2.20.0...v2.21.0) (2021-07-13) ### Features * Add max_results parameter to some of the `QueryJob` methods. ([#698](https://www.github.com/googleapis/python-bigquery/issues/698)) ([2a9618f](https://www.github.com/googleapis/python-bigquery/commit/2a9618f4daaa4a014161e1a2f7376844eec9e8da)) * Add support for decimal target types. ([#735](https://www.github.com/googleapis/python-bigquery/issues/735)) ([7d2d3e9](https://www.github.com/googleapis/python-bigquery/commit/7d2d3e906a9eb161911a198fb925ad79de5df934)) * Add support for table snapshots. ([#740](https://www.github.com/googleapis/python-bigquery/issues/740)) ([ba86b2a](https://www.github.com/googleapis/python-bigquery/commit/ba86b2a6300ae5a9f3c803beeb42bda4c522e34c)) * Enable unsetting policy tags on schema fields. ([#703](https://www.github.com/googleapis/python-bigquery/issues/703)) ([18bb443](https://www.github.com/googleapis/python-bigquery/commit/18bb443c7acd0a75dcb57d9aebe38b2d734ff8c7)) * Make it easier to disable best-effort deduplication with streaming inserts. ([#734](https://www.github.com/googleapis/python-bigquery/issues/734)) ([1246da8](https://www.github.com/googleapis/python-bigquery/commit/1246da86b78b03ca1aa2c45ec71649e294cfb2f1)) * Support passing struct data to the DB API. ([#718](https://www.github.com/googleapis/python-bigquery/issues/718)) ([38b3ef9](https://www.github.com/googleapis/python-bigquery/commit/38b3ef96c3dedc139b84f0ff06885141ae7ce78c)) ### Bug Fixes * Inserting non-finite floats with `insert_rows()`. ([#728](https://www.github.com/googleapis/python-bigquery/issues/728)) ([d047419](https://www.github.com/googleapis/python-bigquery/commit/d047419879e807e123296da2eee89a5253050166)) * Use `pandas` function to check for `NaN`. ([#750](https://www.github.com/googleapis/python-bigquery/issues/750)) ([67bc5fb](https://www.github.com/googleapis/python-bigquery/commit/67bc5fbd306be7cdffd216f3791d4024acfa95b3)) ### Documentation * Add docs for all enums in module. ([#745](https://www.github.com/googleapis/python-bigquery/issues/745)) ([145944f](https://www.github.com/googleapis/python-bigquery/commit/145944f24fedc4d739687399a8309f9d51d43dfd)) * Omit mention of Python 2.7 in `CONTRIBUTING.rst`. ([#706](https://www.github.com/googleapis/python-bigquery/issues/706)) ([27d6839](https://www.github.com/googleapis/python-bigquery/commit/27d6839ee8a40909e4199cfa0da8b6b64705b2e9)) --- packages/google-cloud-bigquery/CHANGELOG.md | 24 +++++++++++++++++++ packages/google-cloud-bigquery/docs/conf.py | 1 - .../google/cloud/bigquery/version.py | 2 +- 3 files changed, 25 insertions(+), 2 deletions(-) diff --git a/packages/google-cloud-bigquery/CHANGELOG.md b/packages/google-cloud-bigquery/CHANGELOG.md index b08cd98c7cbc..7344542b403a 100644 --- a/packages/google-cloud-bigquery/CHANGELOG.md +++ b/packages/google-cloud-bigquery/CHANGELOG.md @@ -4,6 +4,30 @@ [1]: https://pypi.org/project/google-cloud-bigquery/#history +## [2.21.0](https://www.github.com/googleapis/python-bigquery/compare/v2.20.0...v2.21.0) (2021-07-12) + + +### Features + +* Add max_results parameter to some of the `QueryJob` methods. ([#698](https://www.github.com/googleapis/python-bigquery/issues/698)) ([2a9618f](https://www.github.com/googleapis/python-bigquery/commit/2a9618f4daaa4a014161e1a2f7376844eec9e8da)) +* Add support for decimal target types. ([#735](https://www.github.com/googleapis/python-bigquery/issues/735)) ([7d2d3e9](https://www.github.com/googleapis/python-bigquery/commit/7d2d3e906a9eb161911a198fb925ad79de5df934)) +* Add support for table snapshots. ([#740](https://www.github.com/googleapis/python-bigquery/issues/740)) ([ba86b2a](https://www.github.com/googleapis/python-bigquery/commit/ba86b2a6300ae5a9f3c803beeb42bda4c522e34c)) +* Enable unsetting policy tags on schema fields. ([#703](https://www.github.com/googleapis/python-bigquery/issues/703)) ([18bb443](https://www.github.com/googleapis/python-bigquery/commit/18bb443c7acd0a75dcb57d9aebe38b2d734ff8c7)) +* Make it easier to disable best-effort deduplication with streaming inserts. ([#734](https://www.github.com/googleapis/python-bigquery/issues/734)) ([1246da8](https://www.github.com/googleapis/python-bigquery/commit/1246da86b78b03ca1aa2c45ec71649e294cfb2f1)) +* Support passing struct data to the DB API. ([#718](https://www.github.com/googleapis/python-bigquery/issues/718)) ([38b3ef9](https://www.github.com/googleapis/python-bigquery/commit/38b3ef96c3dedc139b84f0ff06885141ae7ce78c)) + + +### Bug Fixes + +* Inserting non-finite floats with `insert_rows()`. ([#728](https://www.github.com/googleapis/python-bigquery/issues/728)) ([d047419](https://www.github.com/googleapis/python-bigquery/commit/d047419879e807e123296da2eee89a5253050166)) +* Use `pandas` function to check for `NaN`. ([#750](https://www.github.com/googleapis/python-bigquery/issues/750)) ([67bc5fb](https://www.github.com/googleapis/python-bigquery/commit/67bc5fbd306be7cdffd216f3791d4024acfa95b3)) + + +### Documentation + +* Add docs for all enums in module. ([#745](https://www.github.com/googleapis/python-bigquery/issues/745)) ([145944f](https://www.github.com/googleapis/python-bigquery/commit/145944f24fedc4d739687399a8309f9d51d43dfd)) +* Omit mention of Python 2.7 in `CONTRIBUTING.rst`. ([#706](https://www.github.com/googleapis/python-bigquery/issues/706)) ([27d6839](https://www.github.com/googleapis/python-bigquery/commit/27d6839ee8a40909e4199cfa0da8b6b64705b2e9)) + ## [2.20.0](https://www.github.com/googleapis/python-bigquery/compare/v2.19.0...v2.20.0) (2021-06-07) diff --git a/packages/google-cloud-bigquery/docs/conf.py b/packages/google-cloud-bigquery/docs/conf.py index 09f7ea414883..cb347160de42 100644 --- a/packages/google-cloud-bigquery/docs/conf.py +++ b/packages/google-cloud-bigquery/docs/conf.py @@ -110,7 +110,6 @@ # directories to ignore when looking for source files. exclude_patterns = [ "_build", - "**/.nox/**/*", "samples/AUTHORING_GUIDE.md", "samples/CONTRIBUTING.md", "samples/snippets/README.rst", diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/version.py b/packages/google-cloud-bigquery/google/cloud/bigquery/version.py index 9fea4fece7f8..563b0e160e7a 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/version.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/version.py @@ -12,4 +12,4 @@ # See the License for the specific language governing permissions and # limitations under the License. -__version__ = "2.20.0" +__version__ = "2.21.0" From 9af7e74a013e97e873a52c046b6282b5efe7d6a6 Mon Sep 17 00:00:00 2001 From: WhiteSource Renovate Date: Tue, 13 Jul 2021 19:42:22 +0200 Subject: [PATCH 1200/2016] chore(deps): update dependency google-cloud-bigquery to v2.21.0 (#755) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [![WhiteSource Renovate](https://app.renovatebot.com/images/banner.svg)](https://renovatebot.com) This PR contains the following updates: | Package | Change | Age | Adoption | Passing | Confidence | |---|---|---|---|---|---| | [google-cloud-bigquery](https://togithub.com/googleapis/python-bigquery) | `==2.20.0` -> `==2.21.0` | [![age](https://badges.renovateapi.com/packages/pypi/google-cloud-bigquery/2.21.0/age-slim)](https://docs.renovatebot.com/merge-confidence/) | [![adoption](https://badges.renovateapi.com/packages/pypi/google-cloud-bigquery/2.21.0/adoption-slim)](https://docs.renovatebot.com/merge-confidence/) | [![passing](https://badges.renovateapi.com/packages/pypi/google-cloud-bigquery/2.21.0/compatibility-slim/2.20.0)](https://docs.renovatebot.com/merge-confidence/) | [![confidence](https://badges.renovateapi.com/packages/pypi/google-cloud-bigquery/2.21.0/confidence-slim/2.20.0)](https://docs.renovatebot.com/merge-confidence/) | --- ### Release Notes
googleapis/python-bigquery ### [`v2.21.0`](https://togithub.com/googleapis/python-bigquery/blob/master/CHANGELOG.md#​2210-httpswwwgithubcomgoogleapispython-bigquerycomparev2200v2210-2021-07-12) ##### Features - Add max_results parameter to some of the `QueryJob` methods. ([#​698](https://www.github.com/googleapis/python-bigquery/issues/698)) ([2a9618f](https://www.github.com/googleapis/python-bigquery/commit/2a9618f4daaa4a014161e1a2f7376844eec9e8da)) - Add support for decimal target types. ([#​735](https://www.github.com/googleapis/python-bigquery/issues/735)) ([7d2d3e9](https://www.github.com/googleapis/python-bigquery/commit/7d2d3e906a9eb161911a198fb925ad79de5df934)) - Add support for table snapshots. ([#​740](https://www.github.com/googleapis/python-bigquery/issues/740)) ([ba86b2a](https://www.github.com/googleapis/python-bigquery/commit/ba86b2a6300ae5a9f3c803beeb42bda4c522e34c)) - Enable unsetting policy tags on schema fields. ([#​703](https://www.github.com/googleapis/python-bigquery/issues/703)) ([18bb443](https://www.github.com/googleapis/python-bigquery/commit/18bb443c7acd0a75dcb57d9aebe38b2d734ff8c7)) - Make it easier to disable best-effort deduplication with streaming inserts. ([#​734](https://www.github.com/googleapis/python-bigquery/issues/734)) ([1246da8](https://www.github.com/googleapis/python-bigquery/commit/1246da86b78b03ca1aa2c45ec71649e294cfb2f1)) - Support passing struct data to the DB API. ([#​718](https://www.github.com/googleapis/python-bigquery/issues/718)) ([38b3ef9](https://www.github.com/googleapis/python-bigquery/commit/38b3ef96c3dedc139b84f0ff06885141ae7ce78c)) ##### Bug Fixes - Inserting non-finite floats with `insert_rows()`. ([#​728](https://www.github.com/googleapis/python-bigquery/issues/728)) ([d047419](https://www.github.com/googleapis/python-bigquery/commit/d047419879e807e123296da2eee89a5253050166)) - Use `pandas` function to check for `NaN`. ([#​750](https://www.github.com/googleapis/python-bigquery/issues/750)) ([67bc5fb](https://www.github.com/googleapis/python-bigquery/commit/67bc5fbd306be7cdffd216f3791d4024acfa95b3)) ##### Documentation - Add docs for all enums in module. ([#​745](https://www.github.com/googleapis/python-bigquery/issues/745)) ([145944f](https://www.github.com/googleapis/python-bigquery/commit/145944f24fedc4d739687399a8309f9d51d43dfd)) - Omit mention of Python 2.7 in `CONTRIBUTING.rst`. ([#​706](https://www.github.com/googleapis/python-bigquery/issues/706)) ([27d6839](https://www.github.com/googleapis/python-bigquery/commit/27d6839ee8a40909e4199cfa0da8b6b64705b2e9))
--- ### Configuration 📅 **Schedule**: At any time (no schedule defined). 🚦 **Automerge**: Disabled by config. Please merge this manually once you are satisfied. ♻ **Rebasing**: Whenever PR becomes conflicted, or you tick the rebase/retry checkbox. 🔕 **Ignore**: Close this PR and you won't be reminded about this update again. --- - [ ] If you want to rebase/retry this PR, check this box. --- This PR has been generated by [WhiteSource Renovate](https://renovate.whitesourcesoftware.com). View repository job log [here](https://app.renovatebot.com/dashboard#github/googleapis/python-bigquery). --- .../google-cloud-bigquery/samples/geography/requirements.txt | 2 +- .../google-cloud-bigquery/samples/snippets/requirements.txt | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/packages/google-cloud-bigquery/samples/geography/requirements.txt b/packages/google-cloud-bigquery/samples/geography/requirements.txt index 30a59c15ae7b..c7aa209adc8d 100644 --- a/packages/google-cloud-bigquery/samples/geography/requirements.txt +++ b/packages/google-cloud-bigquery/samples/geography/requirements.txt @@ -1,4 +1,4 @@ geojson==2.5.0 -google-cloud-bigquery==2.20.0 +google-cloud-bigquery==2.21.0 google-cloud-bigquery-storage==2.6.0 Shapely==1.7.1 diff --git a/packages/google-cloud-bigquery/samples/snippets/requirements.txt b/packages/google-cloud-bigquery/samples/snippets/requirements.txt index ce02ac7ed844..b62c84c3365a 100644 --- a/packages/google-cloud-bigquery/samples/snippets/requirements.txt +++ b/packages/google-cloud-bigquery/samples/snippets/requirements.txt @@ -1,4 +1,4 @@ -google-cloud-bigquery==2.20.0 +google-cloud-bigquery==2.21.0 google-cloud-bigquery-storage==2.6.0 google-auth-oauthlib==0.4.4 grpcio==1.38.1 From da972944fdc0770d6bf06edd58cfa7da3a7df7d2 Mon Sep 17 00:00:00 2001 From: "gcf-owl-bot[bot]" <78513119+gcf-owl-bot[bot]@users.noreply.github.com> Date: Wed, 14 Jul 2021 15:24:57 +0000 Subject: [PATCH 1201/2016] build(python): exit with success status if no samples found (#759) Source-Link: https://github.com/googleapis/synthtool/commit/53ea3896a52f87c758e79b5a19fa338c83925a98 Post-Processor: gcr.io/repo-automation-bots/owlbot-python:latest@sha256:e1793a23ae0ee9aafb2e3a53b564a351f74790dbe3c2d75f8fc3b8c43e5c036c --- packages/google-cloud-bigquery/.github/.OwlBot.lock.yaml | 2 +- packages/google-cloud-bigquery/.kokoro/test-samples-impl.sh | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/packages/google-cloud-bigquery/.github/.OwlBot.lock.yaml b/packages/google-cloud-bigquery/.github/.OwlBot.lock.yaml index e2b39f946040..a5d3697f2167 100644 --- a/packages/google-cloud-bigquery/.github/.OwlBot.lock.yaml +++ b/packages/google-cloud-bigquery/.github/.OwlBot.lock.yaml @@ -1,3 +1,3 @@ docker: image: gcr.io/repo-automation-bots/owlbot-python:latest - digest: sha256:99d90d097e4a4710cc8658ee0b5b963f4426d0e424819787c3ac1405c9a26719 + digest: sha256:e1793a23ae0ee9aafb2e3a53b564a351f74790dbe3c2d75f8fc3b8c43e5c036c diff --git a/packages/google-cloud-bigquery/.kokoro/test-samples-impl.sh b/packages/google-cloud-bigquery/.kokoro/test-samples-impl.sh index cf5de74c17a5..311a8d54b9f1 100755 --- a/packages/google-cloud-bigquery/.kokoro/test-samples-impl.sh +++ b/packages/google-cloud-bigquery/.kokoro/test-samples-impl.sh @@ -20,9 +20,9 @@ set -eo pipefail # Enables `**` to include files nested inside sub-folders shopt -s globstar -# Exit early if samples directory doesn't exist -if [ ! -d "./samples" ]; then - echo "No tests run. `./samples` not found" +# Exit early if samples don't exist +if ! find samples -name 'requirements.txt' | grep -q .; then + echo "No tests run. './samples/**/requirements.txt' not found" exit 0 fi From 49191522557e6204418fc3e7971ba32f96f2974c Mon Sep 17 00:00:00 2001 From: "release-please[bot]" <55107282+release-please[bot]@users.noreply.github.com> Date: Wed, 14 Jul 2021 16:16:13 +0000 Subject: [PATCH 1202/2016] chore: release 2.21.0 (#760) :robot: I have created a release \*beep\* \*boop\* --- ## [2.21.0](https://www.github.com/googleapis/python-bigquery/compare/v2.20.0...v2.21.0) (2021-07-14) ### Features * add always_use_jwt_access ([#714](https://www.github.com/googleapis/python-bigquery/issues/714)) ([92fbd4a](https://www.github.com/googleapis/python-bigquery/commit/92fbd4ade37e0be49dc278080ef73c83eafeea18)) * add max_results parameter to some of the QueryJob methods ([#698](https://www.github.com/googleapis/python-bigquery/issues/698)) ([2a9618f](https://www.github.com/googleapis/python-bigquery/commit/2a9618f4daaa4a014161e1a2f7376844eec9e8da)) * add support for decimal target types ([#735](https://www.github.com/googleapis/python-bigquery/issues/735)) ([7d2d3e9](https://www.github.com/googleapis/python-bigquery/commit/7d2d3e906a9eb161911a198fb925ad79de5df934)) * add support for table snapshots ([#740](https://www.github.com/googleapis/python-bigquery/issues/740)) ([ba86b2a](https://www.github.com/googleapis/python-bigquery/commit/ba86b2a6300ae5a9f3c803beeb42bda4c522e34c)) * enable unsetting policy tags on schema fields ([#703](https://www.github.com/googleapis/python-bigquery/issues/703)) ([18bb443](https://www.github.com/googleapis/python-bigquery/commit/18bb443c7acd0a75dcb57d9aebe38b2d734ff8c7)) * make it easier to disable best-effort deduplication with streaming inserts ([#734](https://www.github.com/googleapis/python-bigquery/issues/734)) ([1246da8](https://www.github.com/googleapis/python-bigquery/commit/1246da86b78b03ca1aa2c45ec71649e294cfb2f1)) * Support passing struct data to the DB API ([#718](https://www.github.com/googleapis/python-bigquery/issues/718)) ([38b3ef9](https://www.github.com/googleapis/python-bigquery/commit/38b3ef96c3dedc139b84f0ff06885141ae7ce78c)) ### Bug Fixes * inserting non-finite floats with insert_rows() ([#728](https://www.github.com/googleapis/python-bigquery/issues/728)) ([d047419](https://www.github.com/googleapis/python-bigquery/commit/d047419879e807e123296da2eee89a5253050166)) * use pandas function to check for NaN ([#750](https://www.github.com/googleapis/python-bigquery/issues/750)) ([67bc5fb](https://www.github.com/googleapis/python-bigquery/commit/67bc5fbd306be7cdffd216f3791d4024acfa95b3)) ### Documentation * add docs for all enums in module ([#745](https://www.github.com/googleapis/python-bigquery/issues/745)) ([145944f](https://www.github.com/googleapis/python-bigquery/commit/145944f24fedc4d739687399a8309f9d51d43dfd)) * omit mention of Python 2.7 in `CONTRIBUTING.rst` ([#706](https://www.github.com/googleapis/python-bigquery/issues/706)) ([27d6839](https://www.github.com/googleapis/python-bigquery/commit/27d6839ee8a40909e4199cfa0da8b6b64705b2e9)) --- This PR was generated with [Release Please](https://github.com/googleapis/release-please). See [documentation](https://github.com/googleapis/release-please#release-please). --- packages/google-cloud-bigquery/CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/packages/google-cloud-bigquery/CHANGELOG.md b/packages/google-cloud-bigquery/CHANGELOG.md index 7344542b403a..5fba4c5176ff 100644 --- a/packages/google-cloud-bigquery/CHANGELOG.md +++ b/packages/google-cloud-bigquery/CHANGELOG.md @@ -4,6 +4,7 @@ [1]: https://pypi.org/project/google-cloud-bigquery/#history + ## [2.21.0](https://www.github.com/googleapis/python-bigquery/compare/v2.20.0...v2.21.0) (2021-07-12) From 9cd7567411bdf5d6fcc07509473ba32761e3d82c Mon Sep 17 00:00:00 2001 From: "gcf-owl-bot[bot]" <78513119+gcf-owl-bot[bot]@users.noreply.github.com> Date: Wed, 14 Jul 2021 12:49:33 -0400 Subject: [PATCH 1203/2016] build(python): remove python 3.7 from kokoro Dockerfile (#762) Source-Link: https://github.com/googleapis/synthtool/commit/e44dc0c742b1230887a73552357e0c18dcc30b92 Post-Processor: gcr.io/repo-automation-bots/owlbot-python:latest@sha256:5ff7446edeaede81c3ed58b23a4e76a5403fba1350ce28478045657303b6479d Co-authored-by: Owl Bot --- .../.github/.OwlBot.lock.yaml | 2 +- .../.kokoro/docker/docs/Dockerfile | 35 ++----------------- 2 files changed, 3 insertions(+), 34 deletions(-) diff --git a/packages/google-cloud-bigquery/.github/.OwlBot.lock.yaml b/packages/google-cloud-bigquery/.github/.OwlBot.lock.yaml index a5d3697f2167..cb06536dab0b 100644 --- a/packages/google-cloud-bigquery/.github/.OwlBot.lock.yaml +++ b/packages/google-cloud-bigquery/.github/.OwlBot.lock.yaml @@ -1,3 +1,3 @@ docker: image: gcr.io/repo-automation-bots/owlbot-python:latest - digest: sha256:e1793a23ae0ee9aafb2e3a53b564a351f74790dbe3c2d75f8fc3b8c43e5c036c + digest: sha256:5ff7446edeaede81c3ed58b23a4e76a5403fba1350ce28478045657303b6479d diff --git a/packages/google-cloud-bigquery/.kokoro/docker/docs/Dockerfile b/packages/google-cloud-bigquery/.kokoro/docker/docs/Dockerfile index 412b0b56a921..4e1b1fb8b5a5 100644 --- a/packages/google-cloud-bigquery/.kokoro/docker/docs/Dockerfile +++ b/packages/google-cloud-bigquery/.kokoro/docker/docs/Dockerfile @@ -40,6 +40,7 @@ RUN apt-get update \ libssl-dev \ libsqlite3-dev \ portaudio19-dev \ + python3-distutils \ redis-server \ software-properties-common \ ssh \ @@ -59,40 +60,8 @@ RUN apt-get update \ && rm -rf /var/lib/apt/lists/* \ && rm -f /var/cache/apt/archives/*.deb - -COPY fetch_gpg_keys.sh /tmp -# Install the desired versions of Python. -RUN set -ex \ - && export GNUPGHOME="$(mktemp -d)" \ - && echo "disable-ipv6" >> "${GNUPGHOME}/dirmngr.conf" \ - && /tmp/fetch_gpg_keys.sh \ - && for PYTHON_VERSION in 3.7.8 3.8.5; do \ - wget --no-check-certificate -O python-${PYTHON_VERSION}.tar.xz "https://www.python.org/ftp/python/${PYTHON_VERSION%%[a-z]*}/Python-$PYTHON_VERSION.tar.xz" \ - && wget --no-check-certificate -O python-${PYTHON_VERSION}.tar.xz.asc "https://www.python.org/ftp/python/${PYTHON_VERSION%%[a-z]*}/Python-$PYTHON_VERSION.tar.xz.asc" \ - && gpg --batch --verify python-${PYTHON_VERSION}.tar.xz.asc python-${PYTHON_VERSION}.tar.xz \ - && rm -r python-${PYTHON_VERSION}.tar.xz.asc \ - && mkdir -p /usr/src/python-${PYTHON_VERSION} \ - && tar -xJC /usr/src/python-${PYTHON_VERSION} --strip-components=1 -f python-${PYTHON_VERSION}.tar.xz \ - && rm python-${PYTHON_VERSION}.tar.xz \ - && cd /usr/src/python-${PYTHON_VERSION} \ - && ./configure \ - --enable-shared \ - # This works only on Python 2.7 and throws a warning on every other - # version, but seems otherwise harmless. - --enable-unicode=ucs4 \ - --with-system-ffi \ - --without-ensurepip \ - && make -j$(nproc) \ - && make install \ - && ldconfig \ - ; done \ - && rm -rf "${GNUPGHOME}" \ - && rm -rf /usr/src/python* \ - && rm -rf ~/.cache/ - RUN wget -O /tmp/get-pip.py 'https://bootstrap.pypa.io/get-pip.py' \ - && python3.7 /tmp/get-pip.py \ && python3.8 /tmp/get-pip.py \ && rm /tmp/get-pip.py -CMD ["python3.7"] +CMD ["python3.8"] From 823f2ea2b6fc3a7f9c3dc69c5590789eb0aeea67 Mon Sep 17 00:00:00 2001 From: Peter Lamut Date: Wed, 14 Jul 2021 21:20:27 +0200 Subject: [PATCH 1204/2016] fix: avoid possible job already exists error (#751) * fix: avoid possible job already exists error If job create request fails, a query job might still have started successfully. This commit handles this edge case and returns such query job one can be found. * Catch only Conflict errors on query job create --- .../google/cloud/bigquery/client.py | 26 ++++++- .../tests/unit/test_client.py | 75 +++++++++++++++++++ 2 files changed, 99 insertions(+), 2 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py index 2a02c7629d9b..de259abcea6f 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py @@ -3190,6 +3190,7 @@ def query( If ``job_config`` is not an instance of :class:`~google.cloud.bigquery.job.QueryJobConfig` class. """ + job_id_given = job_id is not None job_id = _make_job_id(job_id, job_id_prefix) if project is None: @@ -3221,9 +3222,30 @@ def query( job_ref = job._JobReference(job_id, project=project, location=location) query_job = job.QueryJob(job_ref, query, client=self, job_config=job_config) - query_job._begin(retry=retry, timeout=timeout) - return query_job + try: + query_job._begin(retry=retry, timeout=timeout) + except core_exceptions.Conflict as create_exc: + # The thought is if someone is providing their own job IDs and they get + # their job ID generation wrong, this could end up returning results for + # the wrong query. We thus only try to recover if job ID was not given. + if job_id_given: + raise create_exc + + try: + query_job = self.get_job( + job_id, + project=project, + location=location, + retry=retry, + timeout=timeout, + ) + except core_exceptions.GoogleAPIError: # (includes RetryError) + raise create_exc + else: + return query_job + else: + return query_job def insert_rows( self, diff --git a/packages/google-cloud-bigquery/tests/unit/test_client.py b/packages/google-cloud-bigquery/tests/unit/test_client.py index dffe7bdbabc8..2be8daab65b4 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_client.py +++ b/packages/google-cloud-bigquery/tests/unit/test_client.py @@ -4617,6 +4617,81 @@ def test_query_w_query_parameters(self): }, ) + def test_query_job_rpc_fail_w_random_error(self): + from google.api_core.exceptions import Unknown + from google.cloud.bigquery.job import QueryJob + + creds = _make_credentials() + http = object() + client = self._make_one(project=self.PROJECT, credentials=creds, _http=http) + + job_create_error = Unknown("Not sure what went wrong.") + job_begin_patcher = mock.patch.object( + QueryJob, "_begin", side_effect=job_create_error + ) + with job_begin_patcher: + with pytest.raises(Unknown, match="Not sure what went wrong."): + client.query("SELECT 1;", job_id="123") + + def test_query_job_rpc_fail_w_conflict_job_id_given(self): + from google.api_core.exceptions import Conflict + from google.cloud.bigquery.job import QueryJob + + creds = _make_credentials() + http = object() + client = self._make_one(project=self.PROJECT, credentials=creds, _http=http) + + job_create_error = Conflict("Job already exists.") + job_begin_patcher = mock.patch.object( + QueryJob, "_begin", side_effect=job_create_error + ) + with job_begin_patcher: + with pytest.raises(Conflict, match="Job already exists."): + client.query("SELECT 1;", job_id="123") + + def test_query_job_rpc_fail_w_conflict_random_id_job_fetch_fails(self): + from google.api_core.exceptions import Conflict + from google.api_core.exceptions import DataLoss + from google.cloud.bigquery.job import QueryJob + + creds = _make_credentials() + http = object() + client = self._make_one(project=self.PROJECT, credentials=creds, _http=http) + + job_create_error = Conflict("Job already exists.") + job_begin_patcher = mock.patch.object( + QueryJob, "_begin", side_effect=job_create_error + ) + get_job_patcher = mock.patch.object( + client, "get_job", side_effect=DataLoss("we lost yor job, sorry") + ) + + with job_begin_patcher, get_job_patcher: + # If get job request fails, the original exception should be raised. + with pytest.raises(Conflict, match="Job already exists."): + client.query("SELECT 1;", job_id=None) + + def test_query_job_rpc_fail_w_conflict_random_id_job_fetch_succeeds(self): + from google.api_core.exceptions import Conflict + from google.cloud.bigquery.job import QueryJob + + creds = _make_credentials() + http = object() + client = self._make_one(project=self.PROJECT, credentials=creds, _http=http) + + job_create_error = Conflict("Job already exists.") + job_begin_patcher = mock.patch.object( + QueryJob, "_begin", side_effect=job_create_error + ) + get_job_patcher = mock.patch.object( + client, "get_job", return_value=mock.sentinel.query_job + ) + + with job_begin_patcher, get_job_patcher: + result = client.query("SELECT 1;", job_id=None) + + assert result is mock.sentinel.query_job + def test_insert_rows_w_timeout(self): from google.cloud.bigquery.schema import SchemaField from google.cloud.bigquery.table import Table From 479259851ec0df368cf06d5ad983d342e80c6e64 Mon Sep 17 00:00:00 2001 From: Tres Seaver Date: Wed, 14 Jul 2021 17:29:00 -0400 Subject: [PATCH 1205/2016] chore: expand range to allow 2.x versions (#768) api-core, cloud-core, and resumable-media wil all be releasing Python3-only 2.x versions shortly. Closes #767. --- packages/google-cloud-bigquery/setup.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/packages/google-cloud-bigquery/setup.py b/packages/google-cloud-bigquery/setup.py index fcb1dd966bd8..71958ccf99c5 100644 --- a/packages/google-cloud-bigquery/setup.py +++ b/packages/google-cloud-bigquery/setup.py @@ -30,10 +30,10 @@ release_status = "Development Status :: 5 - Production/Stable" dependencies = [ "grpcio >= 1.38.1, < 2.0dev", # https://github.com/googleapis/python-bigquery/issues/695 - "google-api-core[grpc] >= 1.29.0, < 2.0.0dev", + "google-api-core[grpc] >= 1.29.0, < 3.0.0dev", "proto-plus >= 1.10.0", - "google-cloud-core >= 1.4.1, < 2.0dev", - "google-resumable-media >= 0.6.0, < 2.0dev", + "google-cloud-core >= 1.4.1, < 3.0dev", + "google-resumable-media >= 0.6.0, < 3.0dev", "packaging >= 14.3", "protobuf >= 3.12.0", "requests >= 2.18.0, < 3.0.0dev", From ce9e9b644bb1e0c8c7b68cc6f51d9974d58d1272 Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Thu, 15 Jul 2021 04:19:25 -0500 Subject: [PATCH 1206/2016] feat: add `LoadJobConfig.projection_fields` to select DATASTORE_BACKUP fields (#736) * feat: add LoadJobConfig.projection_fields to select DATASTORE_BACKUP fields * add type annotations * annotate setter too Co-authored-by: Peter Lamut --- .../google/cloud/bigquery/job/load.py | 24 +++++++++++++++++-- .../tests/unit/job/test_load_config.py | 11 +++++++++ 2 files changed, 33 insertions(+), 2 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/job/load.py b/packages/google-cloud-bigquery/google/cloud/bigquery/job/load.py index bdee5cb6bd3d..f1b0454120b4 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/job/load.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/job/load.py @@ -14,7 +14,7 @@ """Classes for load jobs.""" -from typing import FrozenSet, Iterable, Optional +from typing import FrozenSet, List, Iterable, Optional from google.cloud.bigquery.encryption_configuration import EncryptionConfiguration from google.cloud.bigquery.external_config import HivePartitioningOptions @@ -25,7 +25,6 @@ from google.cloud.bigquery.table import RangePartitioning from google.cloud.bigquery.table import TableReference from google.cloud.bigquery.table import TimePartitioning - from google.cloud.bigquery.job.base import _AsyncJob from google.cloud.bigquery.job.base import _JobConfig from google.cloud.bigquery.job.base import _JobReference @@ -300,6 +299,27 @@ def null_marker(self): def null_marker(self, value): self._set_sub_prop("nullMarker", value) + @property + def projection_fields(self) -> Optional[List[str]]: + """Optional[List[str]]: If + :attr:`google.cloud.bigquery.job.LoadJobConfig.source_format` is set to + "DATASTORE_BACKUP", indicates which entity properties to load into + BigQuery from a Cloud Datastore backup. + + Property names are case sensitive and must be top-level properties. If + no properties are specified, BigQuery loads all properties. If any + named property isn't found in the Cloud Datastore backup, an invalid + error is returned in the job result. + + See: + https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationLoad.FIELDS.projection_fields + """ + return self._get_sub_prop("projectionFields") + + @projection_fields.setter + def projection_fields(self, value: Optional[List[str]]): + self._set_sub_prop("projectionFields", value) + @property def quote_character(self): """Optional[str]: Character used to quote data sections (CSV only). diff --git a/packages/google-cloud-bigquery/tests/unit/job/test_load_config.py b/packages/google-cloud-bigquery/tests/unit/job/test_load_config.py index 190bd16dcf4a..cbe087dacc72 100644 --- a/packages/google-cloud-bigquery/tests/unit/job/test_load_config.py +++ b/packages/google-cloud-bigquery/tests/unit/job/test_load_config.py @@ -424,6 +424,17 @@ def test_null_marker_setter(self): config.null_marker = null_marker self.assertEqual(config._properties["load"]["nullMarker"], null_marker) + def test_projection_fields_miss(self): + config = self._get_target_class()() + self.assertIsNone(config.projection_fields) + + def test_projection_fields_hit(self): + config = self._get_target_class()() + fields = ["email", "postal_code"] + config.projection_fields = fields + self.assertEqual(config._properties["load"]["projectionFields"], fields) + self.assertEqual(config.projection_fields, fields) + def test_quote_character_missing(self): config = self._get_target_class()() self.assertIsNone(config.quote_character) From 41b43eff0f9fc2c6c2af4362f1793be891237c31 Mon Sep 17 00:00:00 2001 From: Peter Lamut Date: Thu, 15 Jul 2021 19:37:17 +0200 Subject: [PATCH 1207/2016] feat: add support for more detailed DML stats (#758) * feat: add support for more detailed DML stats * Move is None check of DmlStats one level higher --- .../google-cloud-bigquery/docs/reference.rst | 1 + .../google/cloud/bigquery/__init__.py | 2 + .../google/cloud/bigquery/job/__init__.py | 2 + .../google/cloud/bigquery/job/query.py | 37 +++++++++++ .../tests/system/test_client.py | 56 ++++++++++++++++ .../tests/unit/job/test_query.py | 64 +++++++++++++++++++ .../tests/unit/job/test_query_stats.py | 37 +++++++++++ 7 files changed, 199 insertions(+) diff --git a/packages/google-cloud-bigquery/docs/reference.rst b/packages/google-cloud-bigquery/docs/reference.rst index cb2faa5ec388..8c38d0c44d72 100644 --- a/packages/google-cloud-bigquery/docs/reference.rst +++ b/packages/google-cloud-bigquery/docs/reference.rst @@ -58,6 +58,7 @@ Job-Related Types job.Compression job.CreateDisposition job.DestinationFormat + job.DmlStats job.Encoding job.OperationType job.QueryPlanEntry diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/__init__.py b/packages/google-cloud-bigquery/google/cloud/bigquery/__init__.py index 65dde5d94552..ced8cefaef78 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/__init__.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/__init__.py @@ -56,6 +56,7 @@ from google.cloud.bigquery.job import CopyJobConfig from google.cloud.bigquery.job import CreateDisposition from google.cloud.bigquery.job import DestinationFormat +from google.cloud.bigquery.job import DmlStats from google.cloud.bigquery.job import Encoding from google.cloud.bigquery.job import ExtractJob from google.cloud.bigquery.job import ExtractJobConfig @@ -142,6 +143,7 @@ "BigtableOptions", "BigtableColumnFamily", "BigtableColumn", + "DmlStats", "CSVOptions", "GoogleSheetsOptions", "ParquetOptions", diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/job/__init__.py b/packages/google-cloud-bigquery/google/cloud/bigquery/job/__init__.py index 6bdfa09be449..4c16d0e20219 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/job/__init__.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/job/__init__.py @@ -31,6 +31,7 @@ from google.cloud.bigquery.job.load import LoadJob from google.cloud.bigquery.job.load import LoadJobConfig from google.cloud.bigquery.job.query import _contains_order_by +from google.cloud.bigquery.job.query import DmlStats from google.cloud.bigquery.job.query import QueryJob from google.cloud.bigquery.job.query import QueryJobConfig from google.cloud.bigquery.job.query import QueryPlanEntry @@ -66,6 +67,7 @@ "LoadJob", "LoadJobConfig", "_contains_order_by", + "DmlStats", "QueryJob", "QueryJobConfig", "QueryPlanEntry", diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/job/query.py b/packages/google-cloud-bigquery/google/cloud/bigquery/job/query.py index 6ff9f2647025..d588e9b5a61f 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/job/query.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/job/query.py @@ -114,6 +114,35 @@ def _to_api_repr_table_defs(value): return {k: ExternalConfig.to_api_repr(v) for k, v in value.items()} +class DmlStats(typing.NamedTuple): + """Detailed statistics for DML statements. + + https://cloud.google.com/bigquery/docs/reference/rest/v2/DmlStats + """ + + inserted_row_count: int = 0 + """Number of inserted rows. Populated by DML INSERT and MERGE statements.""" + + deleted_row_count: int = 0 + """Number of deleted rows. populated by DML DELETE, MERGE and TRUNCATE statements. + """ + + updated_row_count: int = 0 + """Number of updated rows. Populated by DML UPDATE and MERGE statements.""" + + @classmethod + def from_api_repr(cls, stats: Dict[str, str]) -> "DmlStats": + # NOTE: The field order here must match the order of fields set at the + # class level. + api_fields = ("insertedRowCount", "deletedRowCount", "updatedRowCount") + + args = ( + int(stats.get(api_field, default_val)) + for api_field, default_val in zip(api_fields, cls.__new__.__defaults__) + ) + return cls(*args) + + class ScriptOptions: """Options controlling the execution of scripts. @@ -1079,6 +1108,14 @@ def estimated_bytes_processed(self): result = int(result) return result + @property + def dml_stats(self) -> Optional[DmlStats]: + stats = self._job_statistics().get("dmlStats") + if stats is None: + return None + else: + return DmlStats.from_api_repr(stats) + def _blocking_poll(self, timeout=None, **kwargs): self._done_timeout = timeout self._transport_timeout = timeout diff --git a/packages/google-cloud-bigquery/tests/system/test_client.py b/packages/google-cloud-bigquery/tests/system/test_client.py index 7234333a2ee6..cbca73619b19 100644 --- a/packages/google-cloud-bigquery/tests/system/test_client.py +++ b/packages/google-cloud-bigquery/tests/system/test_client.py @@ -1521,6 +1521,62 @@ def test_query_statistics(self): self.assertGreater(stages_with_inputs, 0) self.assertGreater(len(plan), stages_with_inputs) + def test_dml_statistics(self): + table_schema = ( + bigquery.SchemaField("foo", "STRING"), + bigquery.SchemaField("bar", "INTEGER"), + ) + + dataset_id = _make_dataset_id("bq_system_test") + self.temp_dataset(dataset_id) + table_id = "{}.{}.test_dml_statistics".format(Config.CLIENT.project, dataset_id) + + # Create the table before loading so that the column order is deterministic. + table = helpers.retry_403(Config.CLIENT.create_table)( + Table(table_id, schema=table_schema) + ) + self.to_delete.insert(0, table) + + # Insert a few rows and check the stats. + sql = f""" + INSERT INTO `{table_id}` + VALUES ("one", 1), ("two", 2), ("three", 3), ("four", 4); + """ + query_job = Config.CLIENT.query(sql) + query_job.result() + + assert query_job.dml_stats is not None + assert query_job.dml_stats.inserted_row_count == 4 + assert query_job.dml_stats.updated_row_count == 0 + assert query_job.dml_stats.deleted_row_count == 0 + + # Update some of the rows. + sql = f""" + UPDATE `{table_id}` + SET bar = bar + 1 + WHERE bar > 2; + """ + query_job = Config.CLIENT.query(sql) + query_job.result() + + assert query_job.dml_stats is not None + assert query_job.dml_stats.inserted_row_count == 0 + assert query_job.dml_stats.updated_row_count == 2 + assert query_job.dml_stats.deleted_row_count == 0 + + # Now delete a few rows and check the stats. + sql = f""" + DELETE FROM `{table_id}` + WHERE foo != "two"; + """ + query_job = Config.CLIENT.query(sql) + query_job.result() + + assert query_job.dml_stats is not None + assert query_job.dml_stats.inserted_row_count == 0 + assert query_job.dml_stats.updated_row_count == 0 + assert query_job.dml_stats.deleted_row_count == 3 + def test_dbapi_w_standard_sql_types(self): for sql, expected in helpers.STANDARD_SQL_EXAMPLES: Config.CURSOR.execute(sql) diff --git a/packages/google-cloud-bigquery/tests/unit/job/test_query.py b/packages/google-cloud-bigquery/tests/unit/job/test_query.py index 4665933eae42..482f7f3afac1 100644 --- a/packages/google-cloud-bigquery/tests/unit/job/test_query.py +++ b/packages/google-cloud-bigquery/tests/unit/job/test_query.py @@ -110,6 +110,24 @@ def _verify_table_definitions(self, job, config): self.assertIsNotNone(expected_ec) self.assertEqual(found_ec.to_api_repr(), expected_ec) + def _verify_dml_stats_resource_properties(self, job, resource): + query_stats = resource.get("statistics", {}).get("query", {}) + + if "dmlStats" in query_stats: + resource_dml_stats = query_stats["dmlStats"] + job_dml_stats = job.dml_stats + assert str(job_dml_stats.inserted_row_count) == resource_dml_stats.get( + "insertedRowCount", "0" + ) + assert str(job_dml_stats.updated_row_count) == resource_dml_stats.get( + "updatedRowCount", "0" + ) + assert str(job_dml_stats.deleted_row_count) == resource_dml_stats.get( + "deletedRowCount", "0" + ) + else: + assert job.dml_stats is None + def _verify_configuration_properties(self, job, configuration): if "dryRun" in configuration: self.assertEqual(job.dry_run, configuration["dryRun"]) @@ -118,6 +136,7 @@ def _verify_configuration_properties(self, job, configuration): def _verifyResourceProperties(self, job, resource): self._verifyReadonlyResourceProperties(job, resource) + self._verify_dml_stats_resource_properties(job, resource) configuration = resource.get("configuration", {}) self._verify_configuration_properties(job, configuration) @@ -130,16 +149,19 @@ def _verifyResourceProperties(self, job, resource): self._verify_table_definitions(job, query_config) self.assertEqual(job.query, query_config["query"]) + if "createDisposition" in query_config: self.assertEqual(job.create_disposition, query_config["createDisposition"]) else: self.assertIsNone(job.create_disposition) + if "defaultDataset" in query_config: ds_ref = job.default_dataset ds_ref = {"projectId": ds_ref.project, "datasetId": ds_ref.dataset_id} self.assertEqual(ds_ref, query_config["defaultDataset"]) else: self.assertIsNone(job.default_dataset) + if "destinationTable" in query_config: table = job.destination tb_ref = { @@ -150,14 +172,17 @@ def _verifyResourceProperties(self, job, resource): self.assertEqual(tb_ref, query_config["destinationTable"]) else: self.assertIsNone(job.destination) + if "priority" in query_config: self.assertEqual(job.priority, query_config["priority"]) else: self.assertIsNone(job.priority) + if "writeDisposition" in query_config: self.assertEqual(job.write_disposition, query_config["writeDisposition"]) else: self.assertIsNone(job.write_disposition) + if "destinationEncryptionConfiguration" in query_config: self.assertIsNotNone(job.destination_encryption_configuration) self.assertEqual( @@ -166,6 +191,7 @@ def _verifyResourceProperties(self, job, resource): ) else: self.assertIsNone(job.destination_encryption_configuration) + if "schemaUpdateOptions" in query_config: self.assertEqual( job.schema_update_options, query_config["schemaUpdateOptions"] @@ -190,6 +216,7 @@ def test_ctor_defaults(self): self.assertIsNone(job.create_disposition) self.assertIsNone(job.default_dataset) self.assertIsNone(job.destination) + self.assertIsNone(job.dml_stats) self.assertIsNone(job.flatten_results) self.assertIsNone(job.priority) self.assertIsNone(job.use_query_cache) @@ -278,6 +305,26 @@ def test_from_api_repr_with_encryption(self): self.assertIs(job._client, client) self._verifyResourceProperties(job, RESOURCE) + def test_from_api_repr_with_dml_stats(self): + self._setUpConstants() + client = _make_client(project=self.PROJECT) + RESOURCE = { + "id": self.JOB_ID, + "jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID}, + "configuration": {"query": {"query": self.QUERY}}, + "statistics": { + "query": { + "dmlStats": {"insertedRowCount": "15", "updatedRowCount": "2"}, + }, + }, + } + klass = self._get_target_class() + + job = klass.from_api_repr(RESOURCE, client=client) + + self.assertIs(job._client, client) + self._verifyResourceProperties(job, RESOURCE) + def test_from_api_repr_w_properties(self): from google.cloud.bigquery.job import CreateDisposition from google.cloud.bigquery.job import SchemaUpdateOption @@ -815,6 +862,23 @@ def test_estimated_bytes_processed(self): query_stats["estimatedBytesProcessed"] = str(est_bytes) self.assertEqual(job.estimated_bytes_processed, est_bytes) + def test_dml_stats(self): + from google.cloud.bigquery.job.query import DmlStats + + client = _make_client(project=self.PROJECT) + job = self._make_one(self.JOB_ID, self.QUERY, client) + assert job.dml_stats is None + + statistics = job._properties["statistics"] = {} + assert job.dml_stats is None + + query_stats = statistics["query"] = {} + assert job.dml_stats is None + + query_stats["dmlStats"] = {"insertedRowCount": "35"} + assert isinstance(job.dml_stats, DmlStats) + assert job.dml_stats.inserted_row_count == 35 + def test_result(self): from google.cloud.bigquery.table import RowIterator diff --git a/packages/google-cloud-bigquery/tests/unit/job/test_query_stats.py b/packages/google-cloud-bigquery/tests/unit/job/test_query_stats.py index 09a0efc45866..e70eb097c545 100644 --- a/packages/google-cloud-bigquery/tests/unit/job/test_query_stats.py +++ b/packages/google-cloud-bigquery/tests/unit/job/test_query_stats.py @@ -15,6 +15,43 @@ from .helpers import _Base +class TestDmlStats: + @staticmethod + def _get_target_class(): + from google.cloud.bigquery.job import DmlStats + + return DmlStats + + def _make_one(self, *args, **kw): + return self._get_target_class()(*args, **kw) + + def test_ctor_defaults(self): + dml_stats = self._make_one() + assert dml_stats.inserted_row_count == 0 + assert dml_stats.deleted_row_count == 0 + assert dml_stats.updated_row_count == 0 + + def test_from_api_repr_partial_stats(self): + klass = self._get_target_class() + result = klass.from_api_repr({"deletedRowCount": "12"}) + + assert isinstance(result, klass) + assert result.inserted_row_count == 0 + assert result.deleted_row_count == 12 + assert result.updated_row_count == 0 + + def test_from_api_repr_full_stats(self): + klass = self._get_target_class() + result = klass.from_api_repr( + {"updatedRowCount": "4", "insertedRowCount": "7", "deletedRowCount": "25"} + ) + + assert isinstance(result, klass) + assert result.inserted_row_count == 7 + assert result.deleted_row_count == 25 + assert result.updated_row_count == 4 + + class TestQueryPlanEntryStep(_Base): KIND = "KIND" SUBSTEPS = ("SUB1", "SUB2") From 895e98b13daf3ad65cc85258ae5e74a92d8a3d3e Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Fri, 16 Jul 2021 03:18:14 -0500 Subject: [PATCH 1208/2016] refactor: omit `read_session` with latest google-cloud-bigquery-storage (#748) * refactor: omit `read_session` with latest google-cloud-bigquery-storage `read_session` is unnecessary as of `google-cloud-bigquery-storage>=2.6.0`. This will allow us to more loudly deprecate the use of `rows(read_session)`. Rather than require 2.6.0, version switches will allow us to keep our requirements range wider. Will want to give this version some time to bake before making it required. * optimize _verify_bq_storage_version * fix failing tests due to optimization * fix unit tests * create BQStorageVersions class for version comparisons * add type annotations Also, use packaging directly, since that's all pkg_resources does https://github.com/pypa/setuptools/blob/a4dbe3457d89cf67ee3aa571fdb149e6eb544e88/pkg_resources/__init__.py\#L112 * allow legacy versions * fix coverage * fix coverage * add tests for version helpers --- .../google/cloud/bigquery/_helpers.py | 74 ++++++++++++++----- .../google/cloud/bigquery/_pandas_helpers.py | 10 ++- .../google/cloud/bigquery/client.py | 4 +- .../google/cloud/bigquery/table.py | 2 +- .../tests/unit/test__helpers.py | 39 +++++++++- .../tests/unit/test__pandas_helpers.py | 69 +++++++++++++++++ .../tests/unit/test_client.py | 4 +- .../tests/unit/test_magics.py | 2 +- .../tests/unit/test_table.py | 2 +- 9 files changed, 174 insertions(+), 32 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py b/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py index 77054542acd2..bf0f80e22192 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py @@ -26,7 +26,7 @@ from google.cloud._helpers import _RFC3339_MICROS from google.cloud._helpers import _RFC3339_NO_FRACTION from google.cloud._helpers import _to_bytes -import pkg_resources +import packaging.version from google.cloud.bigquery.exceptions import LegacyBigQueryStorageError @@ -41,31 +41,65 @@ re.VERBOSE, ) -_MIN_BQ_STORAGE_VERSION = pkg_resources.parse_version("2.0.0") +_MIN_BQ_STORAGE_VERSION = packaging.version.Version("2.0.0") +_BQ_STORAGE_OPTIONAL_READ_SESSION_VERSION = packaging.version.Version("2.6.0") -def _verify_bq_storage_version(): - """Verify that a recent enough version of BigQuery Storage extra is installed. +class BQStorageVersions: + """Version comparisons for google-cloud-bigqueyr-storage package.""" - The function assumes that google-cloud-bigquery-storage extra is installed, and - should thus be used in places where this assumption holds. + def __init__(self): + self._installed_version = None - Because `pip` can install an outdated version of this extra despite the constraints - in setup.py, the the calling code can use this helper to verify the version - compatibility at runtime. - """ - from google.cloud import bigquery_storage + @property + def installed_version(self) -> packaging.version.Version: + """Return the parsed version of google-cloud-bigquery-storage.""" + if self._installed_version is None: + from google.cloud import bigquery_storage - installed_version = pkg_resources.parse_version( - getattr(bigquery_storage, "__version__", "legacy") - ) + self._installed_version = packaging.version.parse( + # Use 0.0.0, since it is earlier than any released version. + # Legacy versions also have the same property, but + # creating a LegacyVersion has been deprecated. + # https://github.com/pypa/packaging/issues/321 + getattr(bigquery_storage, "__version__", "0.0.0") + ) - if installed_version < _MIN_BQ_STORAGE_VERSION: - msg = ( - "Dependency google-cloud-bigquery-storage is outdated, please upgrade " - f"it to version >= 2.0.0 (version found: {installed_version})." - ) - raise LegacyBigQueryStorageError(msg) + return self._installed_version + + @property + def is_read_session_optional(self) -> bool: + """True if read_session is optional to rows(). + + See: https://github.com/googleapis/python-bigquery-storage/pull/228 + """ + return self.installed_version >= _BQ_STORAGE_OPTIONAL_READ_SESSION_VERSION + + def verify_version(self): + """Verify that a recent enough version of BigQuery Storage extra is + installed. + + The function assumes that google-cloud-bigquery-storage extra is + installed, and should thus be used in places where this assumption + holds. + + Because `pip` can install an outdated version of this extra despite the + constraints in `setup.py`, the calling code can use this helper to + verify the version compatibility at runtime. + + Raises: + LegacyBigQueryStorageError: + If the google-cloud-bigquery-storage package is outdated. + """ + if self.installed_version < _MIN_BQ_STORAGE_VERSION: + msg = ( + "Dependency google-cloud-bigquery-storage is outdated, please upgrade " + f"it to version >= 2.0.0 (version found: {self.installed_version})." + ) + raise LegacyBigQueryStorageError(msg) + + +BQ_STORAGE_VERSIONS = BQStorageVersions() def _not_null(value, field): diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/_pandas_helpers.py b/packages/google-cloud-bigquery/google/cloud/bigquery/_pandas_helpers.py index 285c0e83c797..2ff96da4d44b 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/_pandas_helpers.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/_pandas_helpers.py @@ -41,6 +41,7 @@ # Having BQ Storage available implies that pyarrow >=1.0.0 is available, too. _ARROW_COMPRESSION_SUPPORT = True +from google.cloud.bigquery import _helpers from google.cloud.bigquery import schema @@ -590,7 +591,14 @@ def _bqstorage_page_to_dataframe(column_names, dtypes, page): def _download_table_bqstorage_stream( download_state, bqstorage_client, session, stream, worker_queue, page_to_item ): - rowstream = bqstorage_client.read_rows(stream.name).rows(session) + reader = bqstorage_client.read_rows(stream.name) + + # Avoid deprecation warnings for passing in unnecessary read session. + # https://github.com/googleapis/python-bigquery-storage/issues/229 + if _helpers.BQ_STORAGE_VERSIONS.is_read_session_optional: + rowstream = reader.rows() + else: + rowstream = reader.rows(session) for page in rowstream.pages: if download_state.done: diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py index de259abcea6f..8572ba911008 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py @@ -61,7 +61,7 @@ from google.cloud.bigquery._helpers import _get_sub_prop from google.cloud.bigquery._helpers import _record_field_to_json from google.cloud.bigquery._helpers import _str_or_none -from google.cloud.bigquery._helpers import _verify_bq_storage_version +from google.cloud.bigquery._helpers import BQ_STORAGE_VERSIONS from google.cloud.bigquery._helpers import _verify_job_config_type from google.cloud.bigquery._http import Connection from google.cloud.bigquery import _pandas_helpers @@ -508,7 +508,7 @@ def _ensure_bqstorage_client( return None try: - _verify_bq_storage_version() + BQ_STORAGE_VERSIONS.verify_version() except LegacyBigQueryStorageError as exc: warnings.warn(str(exc)) return None diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py index 765110ae6c29..2d9c15f50f03 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py @@ -1565,7 +1565,7 @@ def _validate_bqstorage(self, bqstorage_client, create_bqstorage_client): return False try: - _helpers._verify_bq_storage_version() + _helpers.BQ_STORAGE_VERSIONS.verify_version() except LegacyBigQueryStorageError as exc: warnings.warn(str(exc)) return False diff --git a/packages/google-cloud-bigquery/tests/unit/test__helpers.py b/packages/google-cloud-bigquery/tests/unit/test__helpers.py index c62947d3745f..af026ccbe58f 100644 --- a/packages/google-cloud-bigquery/tests/unit/test__helpers.py +++ b/packages/google-cloud-bigquery/tests/unit/test__helpers.py @@ -26,11 +26,17 @@ @unittest.skipIf(bigquery_storage is None, "Requires `google-cloud-bigquery-storage`") -class Test_verify_bq_storage_version(unittest.TestCase): +class TestBQStorageVersions(unittest.TestCase): + def _object_under_test(self): + from google.cloud.bigquery import _helpers + + return _helpers.BQStorageVersions() + def _call_fut(self): - from google.cloud.bigquery._helpers import _verify_bq_storage_version + from google.cloud.bigquery import _helpers - return _verify_bq_storage_version() + _helpers.BQ_STORAGE_VERSIONS._installed_version = None + return _helpers.BQ_STORAGE_VERSIONS.verify_version() def test_raises_no_error_w_recent_bqstorage(self): from google.cloud.bigquery.exceptions import LegacyBigQueryStorageError @@ -53,10 +59,35 @@ def test_raises_error_w_unknown_bqstorage_version(self): with mock.patch("google.cloud.bigquery_storage", autospec=True) as fake_module: del fake_module.__version__ - error_pattern = r"version found: legacy" + error_pattern = r"version found: 0.0.0" with self.assertRaisesRegex(LegacyBigQueryStorageError, error_pattern): self._call_fut() + def test_installed_version_returns_cached(self): + versions = self._object_under_test() + versions._installed_version = object() + assert versions.installed_version is versions._installed_version + + def test_installed_version_returns_parsed_version(self): + versions = self._object_under_test() + + with mock.patch("google.cloud.bigquery_storage.__version__", new="1.2.3"): + version = versions.installed_version + + assert version.major == 1 + assert version.minor == 2 + assert version.micro == 3 + + def test_is_read_session_optional_true(self): + versions = self._object_under_test() + with mock.patch("google.cloud.bigquery_storage.__version__", new="2.6.0"): + assert versions.is_read_session_optional + + def test_is_read_session_optional_false(self): + versions = self._object_under_test() + with mock.patch("google.cloud.bigquery_storage.__version__", new="2.5.0"): + assert not versions.is_read_session_optional + class Test_not_null(unittest.TestCase): def _call_fut(self, value, field): diff --git a/packages/google-cloud-bigquery/tests/unit/test__pandas_helpers.py b/packages/google-cloud-bigquery/tests/unit/test__pandas_helpers.py index aa87e28f55a3..0ba671cd987e 100644 --- a/packages/google-cloud-bigquery/tests/unit/test__pandas_helpers.py +++ b/packages/google-cloud-bigquery/tests/unit/test__pandas_helpers.py @@ -40,11 +40,14 @@ import pytz from google import api_core +from google.cloud.bigquery import _helpers from google.cloud.bigquery import schema from google.cloud.bigquery._pandas_helpers import _BIGNUMERIC_SUPPORT try: from google.cloud import bigquery_storage + + _helpers.BQ_STORAGE_VERSIONS.verify_version() except ImportError: # pragma: NO COVER bigquery_storage = None @@ -1311,6 +1314,72 @@ def test_dataframe_to_parquet_dict_sequence_schema(module_under_test): assert schema_arg == expected_schema_arg +@pytest.mark.skipif( + bigquery_storage is None, reason="Requires `google-cloud-bigquery-storage`" +) +def test__download_table_bqstorage_stream_includes_read_session( + monkeypatch, module_under_test +): + import google.cloud.bigquery_storage_v1.reader + import google.cloud.bigquery_storage_v1.types + + monkeypatch.setattr(_helpers.BQ_STORAGE_VERSIONS, "_installed_version", None) + monkeypatch.setattr(bigquery_storage, "__version__", "2.5.0") + bqstorage_client = mock.create_autospec( + bigquery_storage.BigQueryReadClient, instance=True + ) + reader = mock.create_autospec( + google.cloud.bigquery_storage_v1.reader.ReadRowsStream, instance=True + ) + bqstorage_client.read_rows.return_value = reader + session = google.cloud.bigquery_storage_v1.types.ReadSession() + + module_under_test._download_table_bqstorage_stream( + module_under_test._DownloadState(), + bqstorage_client, + session, + google.cloud.bigquery_storage_v1.types.ReadStream(name="test"), + queue.Queue(), + mock.Mock(), + ) + + reader.rows.assert_called_once_with(session) + + +@pytest.mark.skipif( + bigquery_storage is None + or not _helpers.BQ_STORAGE_VERSIONS.is_read_session_optional, + reason="Requires `google-cloud-bigquery-storage` >= 2.6.0", +) +def test__download_table_bqstorage_stream_omits_read_session( + monkeypatch, module_under_test +): + import google.cloud.bigquery_storage_v1.reader + import google.cloud.bigquery_storage_v1.types + + monkeypatch.setattr(_helpers.BQ_STORAGE_VERSIONS, "_installed_version", None) + monkeypatch.setattr(bigquery_storage, "__version__", "2.6.0") + bqstorage_client = mock.create_autospec( + bigquery_storage.BigQueryReadClient, instance=True + ) + reader = mock.create_autospec( + google.cloud.bigquery_storage_v1.reader.ReadRowsStream, instance=True + ) + bqstorage_client.read_rows.return_value = reader + session = google.cloud.bigquery_storage_v1.types.ReadSession() + + module_under_test._download_table_bqstorage_stream( + module_under_test._DownloadState(), + bqstorage_client, + session, + google.cloud.bigquery_storage_v1.types.ReadStream(name="test"), + queue.Queue(), + mock.Mock(), + ) + + reader.rows.assert_called_once_with() + + @pytest.mark.parametrize( "stream_count,maxsize_kwarg,expected_call_count,expected_maxsize", [ diff --git a/packages/google-cloud-bigquery/tests/unit/test_client.py b/packages/google-cloud-bigquery/tests/unit/test_client.py index 2be8daab65b4..6b62eb85b23e 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_client.py +++ b/packages/google-cloud-bigquery/tests/unit/test_client.py @@ -663,7 +663,7 @@ def test_ensure_bqstorage_client_obsolete_dependency(self): client = self._make_one(project=self.PROJECT, credentials=creds) patcher = mock.patch( - "google.cloud.bigquery.client._verify_bq_storage_version", + "google.cloud.bigquery.client.BQ_STORAGE_VERSIONS.verify_version", side_effect=LegacyBigQueryStorageError("BQ Storage too old"), ) with patcher, warnings.catch_warnings(record=True) as warned: @@ -700,7 +700,7 @@ def test_ensure_bqstorage_client_existing_client_check_fails(self): mock_storage_client = mock.sentinel.mock_storage_client patcher = mock.patch( - "google.cloud.bigquery.client._verify_bq_storage_version", + "google.cloud.bigquery.client.BQ_STORAGE_VERSIONS.verify_version", side_effect=LegacyBigQueryStorageError("BQ Storage too old"), ) with patcher, warnings.catch_warnings(record=True) as warned: diff --git a/packages/google-cloud-bigquery/tests/unit/test_magics.py b/packages/google-cloud-bigquery/tests/unit/test_magics.py index 5e9bf28a949c..d030482cc60d 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_magics.py +++ b/packages/google-cloud-bigquery/tests/unit/test_magics.py @@ -368,7 +368,7 @@ def test__make_bqstorage_client_true_obsolete_dependency(): ) patcher = mock.patch( - "google.cloud.bigquery.client._verify_bq_storage_version", + "google.cloud.bigquery.client.BQ_STORAGE_VERSIONS.verify_version", side_effect=LegacyBigQueryStorageError("BQ Storage too old"), ) with patcher, warnings.catch_warnings(record=True) as warned: diff --git a/packages/google-cloud-bigquery/tests/unit/test_table.py b/packages/google-cloud-bigquery/tests/unit/test_table.py index b30f16fe0e64..37650cd27400 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_table.py +++ b/packages/google-cloud-bigquery/tests/unit/test_table.py @@ -1889,7 +1889,7 @@ def test__validate_bqstorage_returns_false_w_warning_if_obsolete_version(self): iterator = self._make_one(first_page_response=None) # not cached patcher = mock.patch( - "google.cloud.bigquery.table._helpers._verify_bq_storage_version", + "google.cloud.bigquery.table._helpers.BQ_STORAGE_VERSIONS.verify_version", side_effect=LegacyBigQueryStorageError("BQ Storage too old"), ) with patcher, warnings.catch_warnings(record=True) as warned: From b6d4c2393df25d6aeffeb23be09aeb765e3fc602 Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Fri, 16 Jul 2021 12:02:15 -0500 Subject: [PATCH 1209/2016] docs: add loading data from Firestore backup sample (#737) Follow-up to https://github.com/googleapis/python-bigquery/pull/736 To be included here: https://cloud.google.com/bigquery/docs/loading-data-cloud-firestore Also * Use `google-cloud-testutils` for cleanup as described in https://github.com/googleapis/python-test-utils/pull/39 --- .../samples/snippets/conftest.py | 39 +++++-------- .../snippets/load_table_uri_firestore.py | 55 +++++++++++++++++++ .../snippets/load_table_uri_firestore_test.py | 21 +++++++ .../samples/snippets/requirements-test.txt | 1 + .../samples/snippets/test_update_with_dml.py | 4 +- 5 files changed, 94 insertions(+), 26 deletions(-) create mode 100644 packages/google-cloud-bigquery/samples/snippets/load_table_uri_firestore.py create mode 100644 packages/google-cloud-bigquery/samples/snippets/load_table_uri_firestore_test.py diff --git a/packages/google-cloud-bigquery/samples/snippets/conftest.py b/packages/google-cloud-bigquery/samples/snippets/conftest.py index cb11eb68f636..000e5f85cc16 100644 --- a/packages/google-cloud-bigquery/samples/snippets/conftest.py +++ b/packages/google-cloud-bigquery/samples/snippets/conftest.py @@ -12,38 +12,18 @@ # See the License for the specific language governing permissions and # limitations under the License. -import datetime -import random - from google.cloud import bigquery import pytest +import test_utils.prefixer -RESOURCE_PREFIX = "python_bigquery_samples_snippets" -RESOURCE_DATE_FORMAT = "%Y%m%d_%H%M%S" -RESOURCE_DATE_LENGTH = 4 + 2 + 2 + 1 + 2 + 2 + 2 - - -def resource_prefix() -> str: - timestamp = datetime.datetime.utcnow().strftime(RESOURCE_DATE_FORMAT) - random_string = hex(random.randrange(1000000))[2:] - return f"{RESOURCE_PREFIX}_{timestamp}_{random_string}" - - -def resource_name_to_date(resource_name: str): - start_date = len(RESOURCE_PREFIX) + 1 - date_string = resource_name[start_date : start_date + RESOURCE_DATE_LENGTH] - return datetime.datetime.strptime(date_string, RESOURCE_DATE_FORMAT) +prefixer = test_utils.prefixer.Prefixer("python-bigquery", "samples/snippets") @pytest.fixture(scope="session", autouse=True) def cleanup_datasets(bigquery_client: bigquery.Client): - yesterday = datetime.datetime.utcnow() - datetime.timedelta(days=1) for dataset in bigquery_client.list_datasets(): - if ( - dataset.dataset_id.startswith(RESOURCE_PREFIX) - and resource_name_to_date(dataset.dataset_id) < yesterday - ): + if prefixer.should_cleanup(dataset.dataset_id): bigquery_client.delete_dataset( dataset, delete_contents=True, not_found_ok=True ) @@ -62,7 +42,7 @@ def project_id(bigquery_client): @pytest.fixture(scope="session") def dataset_id(bigquery_client: bigquery.Client, project_id: str): - dataset_id = resource_prefix() + dataset_id = prefixer.create_prefix() full_dataset_id = f"{project_id}.{dataset_id}" dataset = bigquery.Dataset(full_dataset_id) bigquery_client.create_dataset(dataset) @@ -70,6 +50,17 @@ def dataset_id(bigquery_client: bigquery.Client, project_id: str): bigquery_client.delete_dataset(dataset, delete_contents=True, not_found_ok=True) +@pytest.fixture +def random_table_id(bigquery_client: bigquery.Client, project_id: str, dataset_id: str): + """Create a new table ID each time, so random_table_id can be used as + target for load jobs. + """ + random_table_id = prefixer.create_prefix() + full_table_id = f"{project_id}.{dataset_id}.{random_table_id}" + yield full_table_id + bigquery_client.delete_table(full_table_id, not_found_ok=True) + + @pytest.fixture def bigquery_client_patch(monkeypatch, bigquery_client): monkeypatch.setattr(bigquery, "Client", lambda: bigquery_client) diff --git a/packages/google-cloud-bigquery/samples/snippets/load_table_uri_firestore.py b/packages/google-cloud-bigquery/samples/snippets/load_table_uri_firestore.py new file mode 100644 index 000000000000..bf9d0134931e --- /dev/null +++ b/packages/google-cloud-bigquery/samples/snippets/load_table_uri_firestore.py @@ -0,0 +1,55 @@ +# Copyright 2021 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +def load_table_uri_firestore(table_id): + orig_table_id = table_id + # [START bigquery_load_table_gcs_firestore] + # TODO(developer): Set table_id to the ID of the table to create. + table_id = "your-project.your_dataset.your_table_name" + + # TODO(developer): Set uri to the path of the kind export metadata + uri = ( + "gs://cloud-samples-data/bigquery/us-states" + "/2021-07-02T16:04:48_70344/all_namespaces/kind_us-states" + "/all_namespaces_kind_us-states.export_metadata" + ) + + # TODO(developer): Set projection_fields to a list of document properties + # to import. Leave unset or set to `None` for all fields. + projection_fields = ["name", "post_abbr"] + + # [END bigquery_load_table_gcs_firestore] + table_id = orig_table_id + + # [START bigquery_load_table_gcs_firestore] + from google.cloud import bigquery + + # Construct a BigQuery client object. + client = bigquery.Client() + + job_config = bigquery.LoadJobConfig( + source_format=bigquery.SourceFormat.DATASTORE_BACKUP, + projection_fields=projection_fields, + ) + + load_job = client.load_table_from_uri( + uri, table_id, job_config=job_config + ) # Make an API request. + + load_job.result() # Waits for the job to complete. + + destination_table = client.get_table(table_id) + print("Loaded {} rows.".format(destination_table.num_rows)) + # [END bigquery_load_table_gcs_firestore] diff --git a/packages/google-cloud-bigquery/samples/snippets/load_table_uri_firestore_test.py b/packages/google-cloud-bigquery/samples/snippets/load_table_uri_firestore_test.py new file mode 100644 index 000000000000..ffa02cdf9024 --- /dev/null +++ b/packages/google-cloud-bigquery/samples/snippets/load_table_uri_firestore_test.py @@ -0,0 +1,21 @@ +# Copyright 2021 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import load_table_uri_firestore + + +def test_load_table_uri_firestore(capsys, random_table_id): + load_table_uri_firestore.load_table_uri_firestore(random_table_id) + out, _ = capsys.readouterr() + assert "Loaded 50 rows." in out diff --git a/packages/google-cloud-bigquery/samples/snippets/requirements-test.txt b/packages/google-cloud-bigquery/samples/snippets/requirements-test.txt index b0cf76724270..9e9d4e40f406 100644 --- a/packages/google-cloud-bigquery/samples/snippets/requirements-test.txt +++ b/packages/google-cloud-bigquery/samples/snippets/requirements-test.txt @@ -1,2 +1,3 @@ +google-cloud-testutils==0.3.0 pytest==6.2.4 mock==4.0.3 diff --git a/packages/google-cloud-bigquery/samples/snippets/test_update_with_dml.py b/packages/google-cloud-bigquery/samples/snippets/test_update_with_dml.py index 3cca7a649c59..912fd76e24ee 100644 --- a/packages/google-cloud-bigquery/samples/snippets/test_update_with_dml.py +++ b/packages/google-cloud-bigquery/samples/snippets/test_update_with_dml.py @@ -15,13 +15,13 @@ from google.cloud import bigquery import pytest -from conftest import resource_prefix +from conftest import prefixer import update_with_dml @pytest.fixture def table_id(bigquery_client: bigquery.Client, project_id: str, dataset_id: str): - table_id = f"{resource_prefix()}_update_with_dml" + table_id = f"{prefixer.create_prefix()}_update_with_dml" yield table_id full_table_id = f"{project_id}.{dataset_id}.{table_id}" bigquery_client.delete_table(full_table_id, not_found_ok=True) From 92eb4f27da390432238080a5fcf60d0cd3f61ecd Mon Sep 17 00:00:00 2001 From: "gcf-owl-bot[bot]" <78513119+gcf-owl-bot[bot]@users.noreply.github.com> Date: Mon, 19 Jul 2021 10:00:41 +0200 Subject: [PATCH 1210/2016] feat: add standard sql table type, update scalar type enums (#777) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * feat: add standard sql table type, update scalar type enums Committer: @shollyman PiperOrigin-RevId: 385164907 Source-Link: https://github.com/googleapis/googleapis/commit/9ae82b82bdb634058af4b2bafe53c37b8566f68d Source-Link: https://github.com/googleapis/googleapis-gen/commit/bc1724b0b544bdcd9b5b2f4e3d8676f75adacfdf * 🦉 Updates from OwlBot See https://github.com/googleapis/repo-automation-bots/blob/master/packages/owl-bot/README.md * fix: exclude copying microgenerated '.coveragrc' * fix: add 'INTERVAL'/'JSON' to _SQL_SCALAR_TYPES Co-authored-by: Owl Bot Co-authored-by: Tres Seaver --- .../google/cloud/bigquery/enums.py | 2 ++ .../google/cloud/bigquery_v2/__init__.py | 2 ++ .../cloud/bigquery_v2/types/__init__.py | 2 ++ .../cloud/bigquery_v2/types/standard_sql.py | 19 ++++++++++++++++++- packages/google-cloud-bigquery/owlbot.py | 1 + 5 files changed, 25 insertions(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/enums.py b/packages/google-cloud-bigquery/google/cloud/bigquery/enums.py index ef35dffe0639..0da01d665e51 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/enums.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/enums.py @@ -191,9 +191,11 @@ class KeyResultStatementKind: "DATE", "TIME", "DATETIME", + "INTERVAL", "GEOGRAPHY", "NUMERIC", "BIGNUMERIC", + "JSON", ) ) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery_v2/__init__.py b/packages/google-cloud-bigquery/google/cloud/bigquery_v2/__init__.py index 476bd5747e9f..f9957efa980f 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery_v2/__init__.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery_v2/__init__.py @@ -26,6 +26,7 @@ from .types.standard_sql import StandardSqlDataType from .types.standard_sql import StandardSqlField from .types.standard_sql import StandardSqlStructType +from .types.standard_sql import StandardSqlTableType from .types.table_reference import TableReference __all__ = ( @@ -40,5 +41,6 @@ "StandardSqlDataType", "StandardSqlField", "StandardSqlStructType", + "StandardSqlTableType", "TableReference", ) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery_v2/types/__init__.py b/packages/google-cloud-bigquery/google/cloud/bigquery_v2/types/__init__.py index 9c850dca13a7..83bbb3a54261 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery_v2/types/__init__.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery_v2/types/__init__.py @@ -27,6 +27,7 @@ StandardSqlDataType, StandardSqlField, StandardSqlStructType, + StandardSqlTableType, ) from .table_reference import TableReference @@ -42,5 +43,6 @@ "StandardSqlDataType", "StandardSqlField", "StandardSqlStructType", + "StandardSqlTableType", "TableReference", ) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery_v2/types/standard_sql.py b/packages/google-cloud-bigquery/google/cloud/bigquery_v2/types/standard_sql.py index b2191a41760d..7a845fc4885e 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery_v2/types/standard_sql.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery_v2/types/standard_sql.py @@ -18,7 +18,12 @@ __protobuf__ = proto.module( package="google.cloud.bigquery.v2", - manifest={"StandardSqlDataType", "StandardSqlField", "StandardSqlStructType",}, + manifest={ + "StandardSqlDataType", + "StandardSqlField", + "StandardSqlStructType", + "StandardSqlTableType", + }, ) @@ -54,9 +59,11 @@ class TypeKind(proto.Enum): DATE = 10 TIME = 20 DATETIME = 21 + INTERVAL = 26 GEOGRAPHY = 22 NUMERIC = 23 BIGNUMERIC = 24 + JSON = 25 ARRAY = 16 STRUCT = 17 @@ -97,4 +104,14 @@ class StandardSqlStructType(proto.Message): fields = proto.RepeatedField(proto.MESSAGE, number=1, message="StandardSqlField",) +class StandardSqlTableType(proto.Message): + r"""A table type + Attributes: + columns (Sequence[google.cloud.bigquery_v2.types.StandardSqlField]): + The columns in this table type + """ + + columns = proto.RepeatedField(proto.MESSAGE, number=1, message="StandardSqlField",) + + __all__ = tuple(sorted(__protobuf__.manifest)) diff --git a/packages/google-cloud-bigquery/owlbot.py b/packages/google-cloud-bigquery/owlbot.py index 476c5ee5dce0..09845480a8af 100644 --- a/packages/google-cloud-bigquery/owlbot.py +++ b/packages/google-cloud-bigquery/owlbot.py @@ -70,6 +70,7 @@ library, excludes=[ "*.tar.gz", + ".coveragerc", "docs/index.rst", f"docs/bigquery_{library.name}/*_service.rst", f"docs/bigquery_{library.name}/services.rst", From 2e6268db4af2f884934499e7fe3fe4e7823735db Mon Sep 17 00:00:00 2001 From: Peter Lamut Date: Mon, 19 Jul 2021 22:39:44 +0200 Subject: [PATCH 1211/2016] feat: add support for user defined Table View Functions (#724) * Add auxiliary classes for TVF routines * Add return_table_type property to Routine * Add system test for TVF routines * Use the generated StandardSqlTableType class * Update docs with new changes * Add missing space in misc. Sphinx directives --- .../google-cloud-bigquery/docs/reference.rst | 1 + .../google/cloud/bigquery/__init__.py | 2 + .../google/cloud/bigquery/job/query.py | 14 +- .../google/cloud/bigquery/routine/__init__.py | 2 + .../google/cloud/bigquery/routine/routine.py | 45 +++++++ .../google/cloud/bigquery/table.py | 14 +- .../tests/system/test_client.py | 79 +++++++++++ .../tests/unit/routine/test_routine.py | 127 ++++++++++++++++++ 8 files changed, 270 insertions(+), 14 deletions(-) diff --git a/packages/google-cloud-bigquery/docs/reference.rst b/packages/google-cloud-bigquery/docs/reference.rst index 8c38d0c44d72..8a5bff9a47c4 100644 --- a/packages/google-cloud-bigquery/docs/reference.rst +++ b/packages/google-cloud-bigquery/docs/reference.rst @@ -118,6 +118,7 @@ Routine routine.Routine routine.RoutineArgument routine.RoutineReference + routine.RoutineType Schema ====== diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/__init__.py b/packages/google-cloud-bigquery/google/cloud/bigquery/__init__.py index ced8cefaef78..222aadcc9208 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/__init__.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/__init__.py @@ -85,6 +85,7 @@ from google.cloud.bigquery.routine import Routine from google.cloud.bigquery.routine import RoutineArgument from google.cloud.bigquery.routine import RoutineReference +from google.cloud.bigquery.routine import RoutineType from google.cloud.bigquery.schema import SchemaField from google.cloud.bigquery.table import PartitionRange from google.cloud.bigquery.table import RangePartitioning @@ -162,6 +163,7 @@ "KeyResultStatementKind", "OperationType", "QueryPriority", + "RoutineType", "SchemaUpdateOption", "SourceFormat", "SqlTypeNames", diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/job/query.py b/packages/google-cloud-bigquery/google/cloud/bigquery/job/query.py index d588e9b5a61f..2cb7ee28e88f 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/job/query.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/job/query.py @@ -1386,12 +1386,12 @@ def to_arrow( This argument does nothing if ``bqstorage_client`` is supplied. - ..versionadded:: 1.24.0 + .. versionadded:: 1.24.0 max_results (Optional[int]): Maximum number of rows to include in the result. No limit by default. - ..versionadded:: 2.21.0 + .. versionadded:: 2.21.0 Returns: pyarrow.Table @@ -1403,7 +1403,7 @@ def to_arrow( ValueError: If the :mod:`pyarrow` library cannot be imported. - ..versionadded:: 1.17.0 + .. versionadded:: 1.17.0 """ query_result = wait_for_query(self, progress_bar_type, max_results=max_results) return query_result.to_arrow( @@ -1452,7 +1452,7 @@ def to_dataframe( :func:`~google.cloud.bigquery.table.RowIterator.to_dataframe` for details. - ..versionadded:: 1.11.0 + .. versionadded:: 1.11.0 create_bqstorage_client (Optional[bool]): If ``True`` (default), create a BigQuery Storage API client using the default API settings. The BigQuery Storage API @@ -1461,18 +1461,18 @@ def to_dataframe( This argument does nothing if ``bqstorage_client`` is supplied. - ..versionadded:: 1.24.0 + .. versionadded:: 1.24.0 date_as_object (Optional[bool]): If ``True`` (default), cast dates to objects. If ``False``, convert to datetime64[ns] dtype. - ..versionadded:: 1.26.0 + .. versionadded:: 1.26.0 max_results (Optional[int]): Maximum number of rows to include in the result. No limit by default. - ..versionadded:: 2.21.0 + .. versionadded:: 2.21.0 Returns: A :class:`~pandas.DataFrame` populated with row data and column diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/routine/__init__.py b/packages/google-cloud-bigquery/google/cloud/bigquery/routine/__init__.py index d1c79b05eee9..7353073c8c1f 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/routine/__init__.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/routine/__init__.py @@ -19,6 +19,7 @@ from google.cloud.bigquery.routine.routine import Routine from google.cloud.bigquery.routine.routine import RoutineArgument from google.cloud.bigquery.routine.routine import RoutineReference +from google.cloud.bigquery.routine.routine import RoutineType __all__ = ( @@ -26,4 +27,5 @@ "Routine", "RoutineArgument", "RoutineReference", + "RoutineType", ) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/routine/routine.py b/packages/google-cloud-bigquery/google/cloud/bigquery/routine/routine.py index bbc0a7693739..a776212c3000 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/routine/routine.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/routine/routine.py @@ -21,6 +21,21 @@ import google.cloud._helpers from google.cloud.bigquery import _helpers import google.cloud.bigquery_v2.types +from google.cloud.bigquery_v2.types import StandardSqlTableType + + +class RoutineType: + """The fine-grained type of the routine. + + https://cloud.google.com/bigquery/docs/reference/rest/v2/routines#routinetype + + .. versionadded:: 2.22.0 + """ + + ROUTINE_TYPE_UNSPECIFIED = "ROUTINE_TYPE_UNSPECIFIED" + SCALAR_FUNCTION = "SCALAR_FUNCTION" + PROCEDURE = "PROCEDURE" + TABLE_VALUED_FUNCTION = "TABLE_VALUED_FUNCTION" class Routine(object): @@ -48,6 +63,7 @@ class Routine(object): "modified": "lastModifiedTime", "reference": "routineReference", "return_type": "returnType", + "return_table_type": "returnTableType", "type_": "routineType", "description": "description", "determinism_level": "determinismLevel", @@ -204,6 +220,35 @@ def return_type(self, value): resource = None self._properties[self._PROPERTY_TO_API_FIELD["return_type"]] = resource + @property + def return_table_type(self) -> StandardSqlTableType: + """The return type of a Table Valued Function (TVF) routine. + + .. versionadded:: 2.22.0 + """ + resource = self._properties.get( + self._PROPERTY_TO_API_FIELD["return_table_type"] + ) + if not resource: + return resource + + output = google.cloud.bigquery_v2.types.StandardSqlTableType() + raw_protobuf = json_format.ParseDict( + resource, output._pb, ignore_unknown_fields=True + ) + return type(output).wrap(raw_protobuf) + + @return_table_type.setter + def return_table_type(self, value): + if not value: + resource = None + else: + resource = { + "columns": [json_format.MessageToDict(col._pb) for col in value.columns] + } + + self._properties[self._PROPERTY_TO_API_FIELD["return_table_type"]] = resource + @property def imported_libraries(self): """List[str]: The path of the imported JavaScript libraries. diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py index 2d9c15f50f03..18d969a3f078 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py @@ -1684,7 +1684,7 @@ def to_arrow( This argument does nothing if ``bqstorage_client`` is supplied. - ..versionadded:: 1.24.0 + .. versionadded:: 1.24.0 Returns: pyarrow.Table @@ -1695,7 +1695,7 @@ def to_arrow( Raises: ValueError: If the :mod:`pyarrow` library cannot be imported. - ..versionadded:: 1.17.0 + .. versionadded:: 1.17.0 """ if pyarrow is None: raise ValueError(_NO_PYARROW_ERROR) @@ -1775,7 +1775,7 @@ def to_dataframe_iterable( created by the server. If ``max_queue_size`` is :data:`None`, the queue size is infinite. - ..versionadded:: 2.14.0 + .. versionadded:: 2.14.0 Returns: pandas.DataFrame: @@ -1861,7 +1861,7 @@ def to_dataframe( Use the :func:`tqdm.tqdm_gui` function to display a progress bar as a graphical dialog box. - ..versionadded:: 1.11.0 + .. versionadded:: 1.11.0 create_bqstorage_client (Optional[bool]): If ``True`` (default), create a BigQuery Storage API client using the default API settings. The BigQuery Storage API @@ -1870,13 +1870,13 @@ def to_dataframe( This argument does nothing if ``bqstorage_client`` is supplied. - ..versionadded:: 1.24.0 + .. versionadded:: 1.24.0 date_as_object (Optional[bool]): If ``True`` (default), cast dates to objects. If ``False``, convert to datetime64[ns] dtype. - ..versionadded:: 1.26.0 + .. versionadded:: 1.26.0 Returns: pandas.DataFrame: @@ -2010,7 +2010,7 @@ def to_dataframe_iterable( ) -> Iterator["pandas.DataFrame"]: """Create an iterable of pandas DataFrames, to process the table as a stream. - ..versionadded:: 2.21.0 + .. versionadded:: 2.21.0 Args: bqstorage_client: diff --git a/packages/google-cloud-bigquery/tests/system/test_client.py b/packages/google-cloud-bigquery/tests/system/test_client.py index cbca73619b19..ceb62b8cd1e9 100644 --- a/packages/google-cloud-bigquery/tests/system/test_client.py +++ b/packages/google-cloud-bigquery/tests/system/test_client.py @@ -2228,6 +2228,85 @@ def test_create_routine(self): assert len(rows) == 1 assert rows[0].max_value == 100.0 + def test_create_tvf_routine(self): + from google.cloud.bigquery import Routine, RoutineArgument, RoutineType + + StandardSqlDataType = bigquery_v2.types.StandardSqlDataType + StandardSqlField = bigquery_v2.types.StandardSqlField + StandardSqlTableType = bigquery_v2.types.StandardSqlTableType + + INT64 = StandardSqlDataType.TypeKind.INT64 + STRING = StandardSqlDataType.TypeKind.STRING + + client = Config.CLIENT + + dataset = self.temp_dataset(_make_dataset_id("create_tvf_routine")) + routine_ref = dataset.routine("test_tvf_routine") + + routine_body = """ + SELECT int_col, str_col + FROM ( + UNNEST([1, 2, 3]) int_col + JOIN + (SELECT str_col FROM UNNEST(["one", "two", "three"]) str_col) + ON TRUE + ) + WHERE int_col > threshold + """ + + return_table_type = StandardSqlTableType( + columns=[ + StandardSqlField( + name="int_col", type=StandardSqlDataType(type_kind=INT64), + ), + StandardSqlField( + name="str_col", type=StandardSqlDataType(type_kind=STRING), + ), + ] + ) + + routine_args = [ + RoutineArgument( + name="threshold", data_type=StandardSqlDataType(type_kind=INT64), + ) + ] + + routine_def = Routine( + routine_ref, + type_=RoutineType.TABLE_VALUED_FUNCTION, + arguments=routine_args, + return_table_type=return_table_type, + body=routine_body, + ) + + # Create TVF routine. + client.delete_routine(routine_ref, not_found_ok=True) + routine = client.create_routine(routine_def) + + assert routine.body == routine_body + assert routine.return_table_type == return_table_type + assert routine.arguments == routine_args + + # Execute the routine to see if it's working as expected. + query_job = client.query( + f""" + SELECT int_col, str_col + FROM `{routine.reference}`(1) + ORDER BY int_col, str_col ASC + """ + ) + + result_rows = [tuple(row) for row in query_job.result()] + expected = [ + (2, "one"), + (2, "three"), + (2, "two"), + (3, "one"), + (3, "three"), + (3, "two"), + ] + assert result_rows == expected + def test_create_table_rows_fetch_nested_schema(self): table_name = "test_table" dataset = self.temp_dataset(_make_dataset_id("create_table_nested_schema")) diff --git a/packages/google-cloud-bigquery/tests/unit/routine/test_routine.py b/packages/google-cloud-bigquery/tests/unit/routine/test_routine.py index 0a59e7c5fed7..fdaf13324189 100644 --- a/packages/google-cloud-bigquery/tests/unit/routine/test_routine.py +++ b/packages/google-cloud-bigquery/tests/unit/routine/test_routine.py @@ -156,12 +156,86 @@ def test_from_api_repr(target_class): assert actual_routine.return_type == bigquery_v2.types.StandardSqlDataType( type_kind=bigquery_v2.types.StandardSqlDataType.TypeKind.INT64 ) + assert actual_routine.return_table_type is None assert actual_routine.type_ == "SCALAR_FUNCTION" assert actual_routine._properties["someNewField"] == "someValue" assert actual_routine.description == "A routine description." assert actual_routine.determinism_level == "DETERMINISTIC" +def test_from_api_repr_tvf_function(target_class): + from google.cloud.bigquery.routine import RoutineArgument + from google.cloud.bigquery.routine import RoutineReference + from google.cloud.bigquery.routine import RoutineType + + StandardSqlDataType = bigquery_v2.types.StandardSqlDataType + StandardSqlField = bigquery_v2.types.StandardSqlField + StandardSqlTableType = bigquery_v2.types.StandardSqlTableType + + creation_time = datetime.datetime( + 2010, 5, 19, 16, 0, 0, tzinfo=google.cloud._helpers.UTC + ) + modified_time = datetime.datetime( + 2011, 10, 1, 16, 0, 0, tzinfo=google.cloud._helpers.UTC + ) + resource = { + "routineReference": { + "projectId": "my-project", + "datasetId": "my_dataset", + "routineId": "my_routine", + }, + "etag": "abcdefg", + "creationTime": str(google.cloud._helpers._millis(creation_time)), + "lastModifiedTime": str(google.cloud._helpers._millis(modified_time)), + "definitionBody": "SELECT x FROM UNNEST([1,2,3]) x WHERE x > a", + "arguments": [{"name": "a", "dataType": {"typeKind": "INT64"}}], + "language": "SQL", + "returnTableType": { + "columns": [{"name": "int_col", "type": {"typeKind": "INT64"}}] + }, + "routineType": "TABLE_VALUED_FUNCTION", + "someNewField": "someValue", + "description": "A routine description.", + "determinismLevel": bigquery.DeterminismLevel.DETERMINISTIC, + } + actual_routine = target_class.from_api_repr(resource) + + assert actual_routine.project == "my-project" + assert actual_routine.dataset_id == "my_dataset" + assert actual_routine.routine_id == "my_routine" + assert ( + actual_routine.path + == "/projects/my-project/datasets/my_dataset/routines/my_routine" + ) + assert actual_routine.reference == RoutineReference.from_string( + "my-project.my_dataset.my_routine" + ) + assert actual_routine.etag == "abcdefg" + assert actual_routine.created == creation_time + assert actual_routine.modified == modified_time + assert actual_routine.arguments == [ + RoutineArgument( + name="a", + data_type=StandardSqlDataType(type_kind=StandardSqlDataType.TypeKind.INT64), + ) + ] + assert actual_routine.body == "SELECT x FROM UNNEST([1,2,3]) x WHERE x > a" + assert actual_routine.language == "SQL" + assert actual_routine.return_type is None + assert actual_routine.return_table_type == StandardSqlTableType( + columns=[ + StandardSqlField( + name="int_col", + type=StandardSqlDataType(type_kind=StandardSqlDataType.TypeKind.INT64), + ) + ] + ) + assert actual_routine.type_ == RoutineType.TABLE_VALUED_FUNCTION + assert actual_routine._properties["someNewField"] == "someValue" + assert actual_routine.description == "A routine description." + assert actual_routine.determinism_level == "DETERMINISTIC" + + def test_from_api_repr_w_minimal_resource(target_class): from google.cloud.bigquery.routine import RoutineReference @@ -261,6 +335,24 @@ def test_from_api_repr_w_unknown_fields(target_class): ["return_type"], {"returnType": {"typeKind": "INT64"}}, ), + ( + { + "definitionBody": "SELECT x FROM UNNEST([1,2,3]) x WHERE x > 1", + "language": "SQL", + "returnTableType": { + "columns": [{"name": "int_col", "type": {"typeKind": "INT64"}}] + }, + "routineType": "TABLE_VALUED_FUNCTION", + "description": "A routine description.", + "determinismLevel": bigquery.DeterminismLevel.DETERMINISM_LEVEL_UNSPECIFIED, + }, + ["return_table_type"], + { + "returnTableType": { + "columns": [{"name": "int_col", "type": {"typeKind": "INT64"}}] + } + }, + ), ( { "arguments": [{"name": "x", "dataType": {"typeKind": "INT64"}}], @@ -361,6 +453,41 @@ def test_set_return_type_w_none(object_under_test): assert object_under_test._properties["returnType"] is None +def test_set_return_table_type_w_none(object_under_test): + object_under_test.return_table_type = None + assert object_under_test.return_table_type is None + assert object_under_test._properties["returnTableType"] is None + + +def test_set_return_table_type_w_not_none(object_under_test): + StandardSqlDataType = bigquery_v2.types.StandardSqlDataType + StandardSqlField = bigquery_v2.types.StandardSqlField + StandardSqlTableType = bigquery_v2.types.StandardSqlTableType + + table_type = StandardSqlTableType( + columns=[ + StandardSqlField( + name="int_col", + type=StandardSqlDataType(type_kind=StandardSqlDataType.TypeKind.INT64), + ), + StandardSqlField( + name="str_col", + type=StandardSqlDataType(type_kind=StandardSqlDataType.TypeKind.STRING), + ), + ] + ) + + object_under_test.return_table_type = table_type + + assert object_under_test.return_table_type == table_type + assert object_under_test._properties["returnTableType"] == { + "columns": [ + {"name": "int_col", "type": {"typeKind": "INT64"}}, + {"name": "str_col", "type": {"typeKind": "STRING"}}, + ] + } + + def test_set_description_w_none(object_under_test): object_under_test.description = None assert object_under_test.description is None From ff79006029c4ff8ae4cb683c78a6584e6869b001 Mon Sep 17 00:00:00 2001 From: "release-please[bot]" <55107282+release-please[bot]@users.noreply.github.com> Date: Mon, 19 Jul 2021 20:58:25 +0000 Subject: [PATCH 1212/2016] chore: release 2.22.0 (#771) :robot: I have created a release \*beep\* \*boop\* --- ## [2.22.0](https://www.github.com/googleapis/python-bigquery/compare/v2.21.0...v2.22.0) (2021-07-19) ### Features * add `LoadJobConfig.projection_fields` to select DATASTORE_BACKUP fields ([#736](https://www.github.com/googleapis/python-bigquery/issues/736)) ([c45a738](https://www.github.com/googleapis/python-bigquery/commit/c45a7380871af3dfbd3c45524cb606c60e1a01d1)) * add standard sql table type, update scalar type enums ([#777](https://www.github.com/googleapis/python-bigquery/issues/777)) ([b8b5433](https://www.github.com/googleapis/python-bigquery/commit/b8b5433898ec881f8da1303614780a660d94733a)) * add support for more detailed DML stats ([#758](https://www.github.com/googleapis/python-bigquery/issues/758)) ([36fe86f](https://www.github.com/googleapis/python-bigquery/commit/36fe86f41c1a8f46167284f752a6d6bbf886a04b)) * add support for user defined Table View Functions ([#724](https://www.github.com/googleapis/python-bigquery/issues/724)) ([8c7b839](https://www.github.com/googleapis/python-bigquery/commit/8c7b839a6ac1491c1c3b6b0e8755f4b70ed72ee3)) ### Bug Fixes * avoid possible job already exists error ([#751](https://www.github.com/googleapis/python-bigquery/issues/751)) ([45b9308](https://www.github.com/googleapis/python-bigquery/commit/45b93089f5398740413104285cc8acfd5ebc9c08)) ### Dependencies * allow 2.x versions of `google-api-core`, `google-cloud-core`, `google-resumable-media` ([#770](https://www.github.com/googleapis/python-bigquery/issues/770)) ([87a09fa](https://www.github.com/googleapis/python-bigquery/commit/87a09fa3f2a9ab35728a1ac925f9d5f2e6616c65)) ### Documentation * add loading data from Firestore backup sample ([#737](https://www.github.com/googleapis/python-bigquery/issues/737)) ([22fd848](https://www.github.com/googleapis/python-bigquery/commit/22fd848cae4af1148040e1faa31dd15a4d674687)) --- This PR was generated with [Release Please](https://github.com/googleapis/release-please). See [documentation](https://github.com/googleapis/release-please#release-please). --- packages/google-cloud-bigquery/CHANGELOG.md | 25 +++++++++++++++++++ .../google/cloud/bigquery/version.py | 2 +- 2 files changed, 26 insertions(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/CHANGELOG.md b/packages/google-cloud-bigquery/CHANGELOG.md index 5fba4c5176ff..2439d64b0b54 100644 --- a/packages/google-cloud-bigquery/CHANGELOG.md +++ b/packages/google-cloud-bigquery/CHANGELOG.md @@ -5,6 +5,31 @@ [1]: https://pypi.org/project/google-cloud-bigquery/#history +## [2.22.0](https://www.github.com/googleapis/python-bigquery/compare/v2.21.0...v2.22.0) (2021-07-19) + + +### Features + +* add `LoadJobConfig.projection_fields` to select DATASTORE_BACKUP fields ([#736](https://www.github.com/googleapis/python-bigquery/issues/736)) ([c45a738](https://www.github.com/googleapis/python-bigquery/commit/c45a7380871af3dfbd3c45524cb606c60e1a01d1)) +* add standard sql table type, update scalar type enums ([#777](https://www.github.com/googleapis/python-bigquery/issues/777)) ([b8b5433](https://www.github.com/googleapis/python-bigquery/commit/b8b5433898ec881f8da1303614780a660d94733a)) +* add support for more detailed DML stats ([#758](https://www.github.com/googleapis/python-bigquery/issues/758)) ([36fe86f](https://www.github.com/googleapis/python-bigquery/commit/36fe86f41c1a8f46167284f752a6d6bbf886a04b)) +* add support for user defined Table View Functions ([#724](https://www.github.com/googleapis/python-bigquery/issues/724)) ([8c7b839](https://www.github.com/googleapis/python-bigquery/commit/8c7b839a6ac1491c1c3b6b0e8755f4b70ed72ee3)) + + +### Bug Fixes + +* avoid possible job already exists error ([#751](https://www.github.com/googleapis/python-bigquery/issues/751)) ([45b9308](https://www.github.com/googleapis/python-bigquery/commit/45b93089f5398740413104285cc8acfd5ebc9c08)) + + +### Dependencies + +* allow 2.x versions of `google-api-core`, `google-cloud-core`, `google-resumable-media` ([#770](https://www.github.com/googleapis/python-bigquery/issues/770)) ([87a09fa](https://www.github.com/googleapis/python-bigquery/commit/87a09fa3f2a9ab35728a1ac925f9d5f2e6616c65)) + + +### Documentation + +* add loading data from Firestore backup sample ([#737](https://www.github.com/googleapis/python-bigquery/issues/737)) ([22fd848](https://www.github.com/googleapis/python-bigquery/commit/22fd848cae4af1148040e1faa31dd15a4d674687)) + ## [2.21.0](https://www.github.com/googleapis/python-bigquery/compare/v2.20.0...v2.21.0) (2021-07-12) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/version.py b/packages/google-cloud-bigquery/google/cloud/bigquery/version.py index 563b0e160e7a..2db0ca5183ed 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/version.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/version.py @@ -12,4 +12,4 @@ # See the License for the specific language governing permissions and # limitations under the License. -__version__ = "2.21.0" +__version__ = "2.22.0" From 5be82297f99d2741c9767ff388d891c7e7be1017 Mon Sep 17 00:00:00 2001 From: Bu Sun Kim <8822365+busunkim96@users.noreply.github.com> Date: Tue, 20 Jul 2021 02:07:00 -0600 Subject: [PATCH 1213/2016] chore: add note to preserve >1, <3 version range for google-api-core, google-cloud-core (#784) * fix(deps): pin 'google-{api,cloud}-core', 'google-auth' to allow 2.x versions * Update setup.py --- packages/google-cloud-bigquery/setup.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/packages/google-cloud-bigquery/setup.py b/packages/google-cloud-bigquery/setup.py index 71958ccf99c5..0ca19b5761bd 100644 --- a/packages/google-cloud-bigquery/setup.py +++ b/packages/google-cloud-bigquery/setup.py @@ -30,9 +30,15 @@ release_status = "Development Status :: 5 - Production/Stable" dependencies = [ "grpcio >= 1.38.1, < 2.0dev", # https://github.com/googleapis/python-bigquery/issues/695 - "google-api-core[grpc] >= 1.29.0, < 3.0.0dev", + # NOTE: Maintainers, please do not require google-api-core>=2.x.x + # Until this issue is closed + # https://github.com/googleapis/google-cloud-python/issues/10566 + "google-api-core[grpc] >= 1.29.0, <3.0.0dev", "proto-plus >= 1.10.0", - "google-cloud-core >= 1.4.1, < 3.0dev", + # NOTE: Maintainers, please do not require google-cloud-core>=2.x.x + # Until this issue is closed + # https://github.com/googleapis/google-cloud-python/issues/10566 + "google-cloud-core >= 1.4.1, <3.0.0dev", "google-resumable-media >= 0.6.0, < 3.0dev", "packaging >= 14.3", "protobuf >= 3.12.0", From 4e28ea9cbbbe51129f5913a1d98128a4f9d6b2ec Mon Sep 17 00:00:00 2001 From: WhiteSource Renovate Date: Tue, 20 Jul 2021 10:07:30 +0200 Subject: [PATCH 1214/2016] chore(deps): update dependency google-cloud-bigquery to v2.22.0 (#783) --- .../google-cloud-bigquery/samples/geography/requirements.txt | 2 +- .../google-cloud-bigquery/samples/snippets/requirements.txt | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/packages/google-cloud-bigquery/samples/geography/requirements.txt b/packages/google-cloud-bigquery/samples/geography/requirements.txt index c7aa209adc8d..d70ac3fa4d58 100644 --- a/packages/google-cloud-bigquery/samples/geography/requirements.txt +++ b/packages/google-cloud-bigquery/samples/geography/requirements.txt @@ -1,4 +1,4 @@ geojson==2.5.0 -google-cloud-bigquery==2.21.0 +google-cloud-bigquery==2.22.0 google-cloud-bigquery-storage==2.6.0 Shapely==1.7.1 diff --git a/packages/google-cloud-bigquery/samples/snippets/requirements.txt b/packages/google-cloud-bigquery/samples/snippets/requirements.txt index b62c84c3365a..7b4721eacd1c 100644 --- a/packages/google-cloud-bigquery/samples/snippets/requirements.txt +++ b/packages/google-cloud-bigquery/samples/snippets/requirements.txt @@ -1,4 +1,4 @@ -google-cloud-bigquery==2.21.0 +google-cloud-bigquery==2.22.0 google-cloud-bigquery-storage==2.6.0 google-auth-oauthlib==0.4.4 grpcio==1.38.1 From 0c0bd32be27675e4233386ee88b5d5838e527f25 Mon Sep 17 00:00:00 2001 From: Peter Lamut Date: Wed, 21 Jul 2021 18:59:49 +0200 Subject: [PATCH 1215/2016] fix: issue a warning if buggy pyarrow is detected (#787) Some pyarrow versions can cause issue when loading data from dataframe. This commit detects if such pyarrow version is installed and warns the user. --- .../google/cloud/bigquery/client.py | 15 ++++++++ .../tests/unit/test_client.py | 37 +++++++++++++++++++ 2 files changed, 52 insertions(+) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py index 8572ba911008..273cf5f77681 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py @@ -27,6 +27,7 @@ import json import math import os +import packaging.version import tempfile from typing import Any, BinaryIO, Dict, Iterable, Optional, Sequence, Tuple, Union import uuid @@ -34,6 +35,8 @@ try: import pyarrow + + _PYARROW_VERSION = packaging.version.parse(pyarrow.__version__) except ImportError: # pragma: NO COVER pyarrow = None @@ -118,6 +121,9 @@ # https://github.com/googleapis/python-bigquery/issues/438 _MIN_GET_QUERY_RESULTS_TIMEOUT = 120 +# https://github.com/googleapis/python-bigquery/issues/781#issuecomment-883497414 +_PYARROW_BAD_VERSIONS = frozenset([packaging.version.Version("2.0.0")]) + class Project(object): """Wrapper for resource describing a BigQuery project. @@ -2609,6 +2615,15 @@ def load_table_from_dataframe( try: if job_config.source_format == job.SourceFormat.PARQUET: + if _PYARROW_VERSION in _PYARROW_BAD_VERSIONS: + msg = ( + "Loading dataframe data in PARQUET format with pyarrow " + f"{_PYARROW_VERSION} can result in data corruption. It is " + "therefore *strongly* advised to use a different pyarrow " + "version or a different source format. " + "See: https://github.com/googleapis/python-bigquery/issues/781" + ) + warnings.warn(msg, category=RuntimeWarning) if job_config.schema: if parquet_compression == "snappy": # adjust the default value diff --git a/packages/google-cloud-bigquery/tests/unit/test_client.py b/packages/google-cloud-bigquery/tests/unit/test_client.py index 6b62eb85b23e..c1aba9b6727f 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_client.py +++ b/packages/google-cloud-bigquery/tests/unit/test_client.py @@ -27,6 +27,7 @@ import warnings import mock +import packaging import requests import pytest import pytz @@ -7510,6 +7511,42 @@ def test_load_table_from_dataframe_wo_pyarrow_raises_error(self): parquet_compression="gzip", ) + def test_load_table_from_dataframe_w_bad_pyarrow_issues_warning(self): + pytest.importorskip("pandas", reason="Requires `pandas`") + pytest.importorskip("pyarrow", reason="Requires `pyarrow`") + + client = self._make_client() + records = [{"id": 1, "age": 100}, {"id": 2, "age": 60}] + dataframe = pandas.DataFrame(records) + + pyarrow_version_patch = mock.patch( + "google.cloud.bigquery.client._PYARROW_VERSION", + packaging.version.parse("2.0.0"), # A known bad version of pyarrow. + ) + get_table_patch = mock.patch( + "google.cloud.bigquery.client.Client.get_table", + autospec=True, + side_effect=google.api_core.exceptions.NotFound("Table not found"), + ) + load_patch = mock.patch( + "google.cloud.bigquery.client.Client.load_table_from_file", autospec=True + ) + + with load_patch, get_table_patch, pyarrow_version_patch: + with warnings.catch_warnings(record=True) as warned: + client.load_table_from_dataframe( + dataframe, self.TABLE_REF, location=self.LOCATION, + ) + + expected_warnings = [ + warning for warning in warned if "pyarrow" in str(warning).lower() + ] + assert len(expected_warnings) == 1 + assert issubclass(expected_warnings[0].category, RuntimeWarning) + msg = str(expected_warnings[0].message) + assert "pyarrow 2.0.0" in msg + assert "data corruption" in msg + @unittest.skipIf(pandas is None, "Requires `pandas`") @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_load_table_from_dataframe_w_nulls(self): From 34843ba5b891c1c51d1e6603708435abcc692e4b Mon Sep 17 00:00:00 2001 From: "gcf-owl-bot[bot]" <78513119+gcf-owl-bot[bot]@users.noreply.github.com> Date: Wed, 21 Jul 2021 19:37:06 +0200 Subject: [PATCH 1216/2016] feat: add Samples section to CONTRIBUTING.rst (#785) Source-Link: https://github.com/googleapis/synthtool/commit/52e4e46eff2a0b70e3ff5506a02929d089d077d4 Post-Processor: gcr.io/repo-automation-bots/owlbot-python:latest@sha256:6186535cbdbf6b9fe61f00294929221d060634dae4a0795c1cefdbc995b2d605 Co-authored-by: Owl Bot --- .../.github/.OwlBot.lock.yaml | 2 +- .../google-cloud-bigquery/CONTRIBUTING.rst | 24 +++++++++++++++++++ .../samples/geography/noxfile.py | 5 ++-- .../samples/snippets/noxfile.py | 5 ++-- 4 files changed, 31 insertions(+), 5 deletions(-) diff --git a/packages/google-cloud-bigquery/.github/.OwlBot.lock.yaml b/packages/google-cloud-bigquery/.github/.OwlBot.lock.yaml index cb06536dab0b..d57f74204625 100644 --- a/packages/google-cloud-bigquery/.github/.OwlBot.lock.yaml +++ b/packages/google-cloud-bigquery/.github/.OwlBot.lock.yaml @@ -1,3 +1,3 @@ docker: image: gcr.io/repo-automation-bots/owlbot-python:latest - digest: sha256:5ff7446edeaede81c3ed58b23a4e76a5403fba1350ce28478045657303b6479d + digest: sha256:6186535cbdbf6b9fe61f00294929221d060634dae4a0795c1cefdbc995b2d605 diff --git a/packages/google-cloud-bigquery/CONTRIBUTING.rst b/packages/google-cloud-bigquery/CONTRIBUTING.rst index 102355b3a1dc..2faf5aed3771 100644 --- a/packages/google-cloud-bigquery/CONTRIBUTING.rst +++ b/packages/google-cloud-bigquery/CONTRIBUTING.rst @@ -177,6 +177,30 @@ Build the docs via: $ nox -s docs +************************* +Samples and code snippets +************************* + +Code samples and snippets live in the `samples/` catalogue. Feel free to +provide more examples, but make sure to write tests for those examples. +Each folder containing example code requires its own `noxfile.py` script +which automates testing. If you decide to create a new folder, you can +base it on the `samples/snippets` folder (providing `noxfile.py` and +the requirements files). + +The tests will run against a real Google Cloud Project, so you should +configure them just like the System Tests. + +- To run sample tests, you can execute:: + + # Run all tests in a folder + $ cd samples/snippets + $ nox -s py-3.8 + + # Run a single sample test + $ cd samples/snippets + $ nox -s py-3.8 -- -k + ******************************************** Note About ``README`` as it pertains to PyPI ******************************************** diff --git a/packages/google-cloud-bigquery/samples/geography/noxfile.py b/packages/google-cloud-bigquery/samples/geography/noxfile.py index 160fe728648f..9fc7f17820d4 100644 --- a/packages/google-cloud-bigquery/samples/geography/noxfile.py +++ b/packages/google-cloud-bigquery/samples/geography/noxfile.py @@ -28,8 +28,9 @@ # WARNING - WARNING - WARNING - WARNING - WARNING # WARNING - WARNING - WARNING - WARNING - WARNING -# Copy `noxfile_config.py` to your directory and modify it instead. +BLACK_VERSION = "black==19.10b0" +# Copy `noxfile_config.py` to your directory and modify it instead. # `TEST_CONFIG` dict is a configuration hook that allows users to # modify the test configurations. The values here should be in sync @@ -159,7 +160,7 @@ def lint(session: nox.sessions.Session) -> None: @nox.session def blacken(session: nox.sessions.Session) -> None: - session.install("black") + session.install(BLACK_VERSION) python_files = [path for path in os.listdir(".") if path.endswith(".py")] session.run("black", *python_files) diff --git a/packages/google-cloud-bigquery/samples/snippets/noxfile.py b/packages/google-cloud-bigquery/samples/snippets/noxfile.py index 160fe728648f..9fc7f17820d4 100644 --- a/packages/google-cloud-bigquery/samples/snippets/noxfile.py +++ b/packages/google-cloud-bigquery/samples/snippets/noxfile.py @@ -28,8 +28,9 @@ # WARNING - WARNING - WARNING - WARNING - WARNING # WARNING - WARNING - WARNING - WARNING - WARNING -# Copy `noxfile_config.py` to your directory and modify it instead. +BLACK_VERSION = "black==19.10b0" +# Copy `noxfile_config.py` to your directory and modify it instead. # `TEST_CONFIG` dict is a configuration hook that allows users to # modify the test configurations. The values here should be in sync @@ -159,7 +160,7 @@ def lint(session: nox.sessions.Session) -> None: @nox.session def blacken(session: nox.sessions.Session) -> None: - session.install("black") + session.install(BLACK_VERSION) python_files = [path for path in os.listdir(".") if path.endswith(".py")] session.run("black", *python_files) From 2fe0f4f755086d0b6f88265a69537ac3406f6faa Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Thu, 22 Jul 2021 10:21:02 -0500 Subject: [PATCH 1217/2016] docs: add sample to delete job metadata (#798) Planned to be included in https://cloud.google.com/bigquery/docs/managing-jobs --- .../samples/snippets/conftest.py | 25 +++++++++++ .../samples/snippets/delete_job.py | 44 +++++++++++++++++++ .../samples/snippets/delete_job_test.py | 33 ++++++++++++++ .../tests/system/test_client.py | 17 ------- 4 files changed, 102 insertions(+), 17 deletions(-) create mode 100644 packages/google-cloud-bigquery/samples/snippets/delete_job.py create mode 100644 packages/google-cloud-bigquery/samples/snippets/delete_job_test.py diff --git a/packages/google-cloud-bigquery/samples/snippets/conftest.py b/packages/google-cloud-bigquery/samples/snippets/conftest.py index 000e5f85cc16..74984f902574 100644 --- a/packages/google-cloud-bigquery/samples/snippets/conftest.py +++ b/packages/google-cloud-bigquery/samples/snippets/conftest.py @@ -50,6 +50,31 @@ def dataset_id(bigquery_client: bigquery.Client, project_id: str): bigquery_client.delete_dataset(dataset, delete_contents=True, not_found_ok=True) +@pytest.fixture(scope="session") +def dataset_id_us_east1(bigquery_client: bigquery.Client, project_id: str): + dataset_id = prefixer.create_prefix() + full_dataset_id = f"{project_id}.{dataset_id}" + dataset = bigquery.Dataset(full_dataset_id) + dataset.location = "us-east1" + bigquery_client.create_dataset(dataset) + yield dataset_id + bigquery_client.delete_dataset(dataset, delete_contents=True, not_found_ok=True) + + +@pytest.fixture(scope="session") +def table_id_us_east1( + bigquery_client: bigquery.Client, project_id: str, dataset_id_us_east1: str +): + table_id = prefixer.create_prefix() + full_table_id = f"{project_id}.{dataset_id_us_east1}.{table_id}" + table = bigquery.Table( + full_table_id, schema=[bigquery.SchemaField("string_col", "STRING")] + ) + bigquery_client.create_table(table) + yield full_table_id + bigquery_client.delete_table(table, not_found_ok=True) + + @pytest.fixture def random_table_id(bigquery_client: bigquery.Client, project_id: str, dataset_id: str): """Create a new table ID each time, so random_table_id can be used as diff --git a/packages/google-cloud-bigquery/samples/snippets/delete_job.py b/packages/google-cloud-bigquery/samples/snippets/delete_job.py new file mode 100644 index 000000000000..abed0c90dcad --- /dev/null +++ b/packages/google-cloud-bigquery/samples/snippets/delete_job.py @@ -0,0 +1,44 @@ +# Copyright 2021 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +def delete_job_metadata(job_id: str, location: str): + orig_job_id = job_id + orig_location = location + # [START bigquery_delete_job] + from google.cloud import bigquery + from google.api_core import exceptions + + # TODO(developer): Set the job ID to the ID of the job whose metadata you + # wish to delete. + job_id = "abcd-efgh-ijkl-mnop" + + # TODO(developer): Set the location to the region or multi-region + # containing the job. + location = "us-east1" + + # [END bigquery_delete_job] + job_id = orig_job_id + location = orig_location + + # [START bigquery_delete_job] + client = bigquery.Client() + + client.delete_job_metadata(job_id, location=location) + + try: + client.get_job(job_id, location=location) + except exceptions.NotFound: + print(f"Job metadata for job {location}:{job_id} was deleted.") + # [END bigquery_delete_job] diff --git a/packages/google-cloud-bigquery/samples/snippets/delete_job_test.py b/packages/google-cloud-bigquery/samples/snippets/delete_job_test.py new file mode 100644 index 000000000000..c9baa817d63d --- /dev/null +++ b/packages/google-cloud-bigquery/samples/snippets/delete_job_test.py @@ -0,0 +1,33 @@ +# Copyright 2021 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from google.cloud import bigquery + +import delete_job + + +def test_delete_job_metadata( + capsys, bigquery_client: bigquery.Client, table_id_us_east1: str +): + query_job: bigquery.QueryJob = bigquery_client.query( + f"SELECT COUNT(*) FROM `{table_id_us_east1}`", location="us-east1", + ) + query_job.result() + assert query_job.job_id is not None + + delete_job.delete_job_metadata(query_job.job_id, "us-east1") + + out, _ = capsys.readouterr() + assert "deleted" in out + assert f"us-east1:{query_job.job_id}" in out diff --git a/packages/google-cloud-bigquery/tests/system/test_client.py b/packages/google-cloud-bigquery/tests/system/test_client.py index ceb62b8cd1e9..2536af9fc4fd 100644 --- a/packages/google-cloud-bigquery/tests/system/test_client.py +++ b/packages/google-cloud-bigquery/tests/system/test_client.py @@ -63,7 +63,6 @@ from google.cloud import bigquery_v2 from google.cloud.bigquery.dataset import Dataset from google.cloud.bigquery.dataset import DatasetReference -from google.cloud.bigquery.schema import SchemaField from google.cloud.bigquery.table import Table from google.cloud._helpers import UTC from google.cloud.bigquery import dbapi, enums @@ -506,22 +505,6 @@ def test_delete_dataset_delete_contents_false(self): with self.assertRaises(exceptions.BadRequest): Config.CLIENT.delete_dataset(dataset) - def test_delete_job_metadata(self): - dataset_id = _make_dataset_id("us_east1") - self.temp_dataset(dataset_id, location="us-east1") - full_table_id = f"{Config.CLIENT.project}.{dataset_id}.test_delete_job_metadata" - table = Table(full_table_id, schema=[SchemaField("col", "STRING")]) - Config.CLIENT.create_table(table) - query_job: bigquery.QueryJob = Config.CLIENT.query( - f"SELECT COUNT(*) FROM `{full_table_id}`", location="us-east1", - ) - query_job.result() - self.assertIsNotNone(Config.CLIENT.get_job(query_job)) - - Config.CLIENT.delete_job_metadata(query_job) - with self.assertRaises(NotFound): - Config.CLIENT.get_job(query_job) - def test_get_table_w_public_dataset(self): public = "bigquery-public-data" dataset_id = "samples" From cee1efe6ca25b3216ffdee4552fc2676e6b57539 Mon Sep 17 00:00:00 2001 From: WhiteSource Renovate Date: Thu, 22 Jul 2021 17:43:54 +0200 Subject: [PATCH 1218/2016] chore(deps): update dependency grpcio to v1.39.0 (#796) --- .../google-cloud-bigquery/samples/snippets/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/samples/snippets/requirements.txt b/packages/google-cloud-bigquery/samples/snippets/requirements.txt index 7b4721eacd1c..3d9dce7182df 100644 --- a/packages/google-cloud-bigquery/samples/snippets/requirements.txt +++ b/packages/google-cloud-bigquery/samples/snippets/requirements.txt @@ -1,7 +1,7 @@ google-cloud-bigquery==2.22.0 google-cloud-bigquery-storage==2.6.0 google-auth-oauthlib==0.4.4 -grpcio==1.38.1 +grpcio==1.39.0 ipython==7.16.1; python_version < '3.7' ipython==7.17.0; python_version >= '3.7' matplotlib==3.3.4; python_version < '3.7' From f99a2ef28a3a385c364eb1df8f7c55254e357277 Mon Sep 17 00:00:00 2001 From: Jim Fulton Date: Thu, 22 Jul 2021 14:36:30 -0400 Subject: [PATCH 1219/2016] fix: use a larger chunk size when loading data (#799) * The chunk size used for data uploads was too small (1MB). Now it's 100MB. * fix: The chunk size used for data uploads was too small --- .../google/cloud/bigquery/client.py | 2 +- .../tests/unit/test_client.py | 20 +++++++++++++++++++ 2 files changed, 21 insertions(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py index 273cf5f77681..742ecac2e742 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py @@ -98,7 +98,7 @@ from google.cloud.bigquery.table import RowIterator -_DEFAULT_CHUNKSIZE = 1048576 # 1024 * 1024 B = 1 MB +_DEFAULT_CHUNKSIZE = 100 * 1024 * 1024 # 100 MB _MAX_MULTIPART_SIZE = 5 * 1024 * 1024 _DEFAULT_NUM_RETRIES = 6 _BASE_UPLOAD_TEMPLATE = "{host}/upload/bigquery/v2/projects/{project}/jobs?uploadType=" diff --git a/packages/google-cloud-bigquery/tests/unit/test_client.py b/packages/google-cloud-bigquery/tests/unit/test_client.py index c1aba9b6727f..5356855113dc 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_client.py +++ b/packages/google-cloud-bigquery/tests/unit/test_client.py @@ -8076,3 +8076,23 @@ def test_schema_to_json_with_file_object(self): client.schema_to_json(schema_list, fake_file) assert file_content == json.loads(fake_file.getvalue()) + + +def test_upload_chunksize(client): + with mock.patch("google.cloud.bigquery.client.ResumableUpload") as RU: + upload = RU.return_value + + upload.finished = False + + def transmit_next_chunk(transport): + upload.finished = True + result = mock.MagicMock() + result.json.return_value = {} + return result + + upload.transmit_next_chunk = transmit_next_chunk + f = io.BytesIO() + client.load_table_from_file(f, "foo.bar") + + chunk_size = RU.call_args_list[0][0][1] + assert chunk_size == 100 * (1 << 20) From a26c7ef2de1aa79c7fd61700ce69b8a4adbd54a5 Mon Sep 17 00:00:00 2001 From: "release-please[bot]" <55107282+release-please[bot]@users.noreply.github.com> Date: Thu, 22 Jul 2021 14:26:01 -0500 Subject: [PATCH 1220/2016] chore: release 2.22.1 (#797) * chore: release 2.22.1 * remove misc Co-authored-by: release-please[bot] <55107282+release-please[bot]@users.noreply.github.com> Co-authored-by: Tim Swast --- packages/google-cloud-bigquery/CHANGELOG.md | 14 ++++++++++++++ .../google/cloud/bigquery/version.py | 2 +- 2 files changed, 15 insertions(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/CHANGELOG.md b/packages/google-cloud-bigquery/CHANGELOG.md index 2439d64b0b54..7dbc5d4da09a 100644 --- a/packages/google-cloud-bigquery/CHANGELOG.md +++ b/packages/google-cloud-bigquery/CHANGELOG.md @@ -5,6 +5,20 @@ [1]: https://pypi.org/project/google-cloud-bigquery/#history +### [2.22.1](https://www.github.com/googleapis/python-bigquery/compare/v2.22.0...v2.22.1) (2021-07-22) + + +### Bug Fixes + +* issue a warning if buggy pyarrow is detected ([#787](https://www.github.com/googleapis/python-bigquery/issues/787)) ([e403721](https://www.github.com/googleapis/python-bigquery/commit/e403721af1373eb1f1a1c7be5b2182e3819ed1f9)) +* use a larger chunk size when loading data ([#799](https://www.github.com/googleapis/python-bigquery/issues/799)) ([b804373](https://www.github.com/googleapis/python-bigquery/commit/b804373277c1c1baa3370ebfb4783503b7ff360f)) + + +### Documentation + +* add Samples section to CONTRIBUTING.rst ([#785](https://www.github.com/googleapis/python-bigquery/issues/785)) ([e587029](https://www.github.com/googleapis/python-bigquery/commit/e58702967d572e83b4c774278818302594a511b7)) +* add sample to delete job metadata ([#798](https://www.github.com/googleapis/python-bigquery/issues/798)) ([be9b242](https://www.github.com/googleapis/python-bigquery/commit/be9b242f2180f5b795dfb3a168a97af1682999fd)) + ## [2.22.0](https://www.github.com/googleapis/python-bigquery/compare/v2.21.0...v2.22.0) (2021-07-19) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/version.py b/packages/google-cloud-bigquery/google/cloud/bigquery/version.py index 2db0ca5183ed..dbc524478e00 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/version.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/version.py @@ -12,4 +12,4 @@ # See the License for the specific language governing permissions and # limitations under the License. -__version__ = "2.22.0" +__version__ = "2.22.1" From b1c6daf65925b8ce638f81f25c19b702f605d411 Mon Sep 17 00:00:00 2001 From: WhiteSource Renovate Date: Fri, 23 Jul 2021 00:48:22 +0200 Subject: [PATCH 1221/2016] chore(deps): update dependency google-cloud-bigquery to v2.22.1 (#800) --- .../google-cloud-bigquery/samples/geography/requirements.txt | 2 +- .../google-cloud-bigquery/samples/snippets/requirements.txt | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/packages/google-cloud-bigquery/samples/geography/requirements.txt b/packages/google-cloud-bigquery/samples/geography/requirements.txt index d70ac3fa4d58..3a83eda6413e 100644 --- a/packages/google-cloud-bigquery/samples/geography/requirements.txt +++ b/packages/google-cloud-bigquery/samples/geography/requirements.txt @@ -1,4 +1,4 @@ geojson==2.5.0 -google-cloud-bigquery==2.22.0 +google-cloud-bigquery==2.22.1 google-cloud-bigquery-storage==2.6.0 Shapely==1.7.1 diff --git a/packages/google-cloud-bigquery/samples/snippets/requirements.txt b/packages/google-cloud-bigquery/samples/snippets/requirements.txt index 3d9dce7182df..ffa689a9e8df 100644 --- a/packages/google-cloud-bigquery/samples/snippets/requirements.txt +++ b/packages/google-cloud-bigquery/samples/snippets/requirements.txt @@ -1,4 +1,4 @@ -google-cloud-bigquery==2.22.0 +google-cloud-bigquery==2.22.1 google-cloud-bigquery-storage==2.6.0 google-auth-oauthlib==0.4.4 grpcio==1.39.0 From bd7d8261e7387f45cfdc0be113618ff82d056354 Mon Sep 17 00:00:00 2001 From: Peter Lamut Date: Fri, 23 Jul 2021 16:42:21 +0200 Subject: [PATCH 1222/2016] fix: retry ChunkedEncodingError by default (#802) --- packages/google-cloud-bigquery/google/cloud/bigquery/retry.py | 1 + packages/google-cloud-bigquery/tests/unit/test_retry.py | 4 ++++ 2 files changed, 5 insertions(+) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/retry.py b/packages/google-cloud-bigquery/google/cloud/bigquery/retry.py index 5e9075fe1b86..2df4de08b856 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/retry.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/retry.py @@ -27,6 +27,7 @@ exceptions.TooManyRequests, exceptions.InternalServerError, exceptions.BadGateway, + requests.exceptions.ChunkedEncodingError, requests.exceptions.ConnectionError, auth_exceptions.TransportError, ) diff --git a/packages/google-cloud-bigquery/tests/unit/test_retry.py b/packages/google-cloud-bigquery/tests/unit/test_retry.py index 0bef1e5e185a..6fb7f93fde51 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_retry.py +++ b/packages/google-cloud-bigquery/tests/unit/test_retry.py @@ -51,6 +51,10 @@ def test_w_unstructured_requests_connectionerror(self): exc = requests.exceptions.ConnectionError() self.assertTrue(self._call_fut(exc)) + def test_w_unstructured_requests_chunked_encoding_error(self): + exc = requests.exceptions.ChunkedEncodingError() + self.assertTrue(self._call_fut(exc)) + def test_w_auth_transporterror(self): from google.auth.exceptions import TransportError From dbd3d05b53e809a412518984cf0eaad815a2edfb Mon Sep 17 00:00:00 2001 From: "gcf-owl-bot[bot]" <78513119+gcf-owl-bot[bot]@users.noreply.github.com> Date: Fri, 23 Jul 2021 15:32:40 +0000 Subject: [PATCH 1223/2016] chore: fix kokoro config for samples (#804) Source-Link: https://github.com/googleapis/synthtool/commit/dd05f9d12f134871c9e45282349c9856fbebecdd Post-Processor: gcr.io/repo-automation-bots/owlbot-python:latest@sha256:aea14a583128771ae8aefa364e1652f3c56070168ef31beb203534222d842b8b --- packages/google-cloud-bigquery/.github/.OwlBot.lock.yaml | 2 +- .../.kokoro/samples/python3.6/periodic-head.cfg | 2 +- .../.kokoro/samples/python3.7/periodic-head.cfg | 2 +- .../.kokoro/samples/python3.8/periodic-head.cfg | 2 +- .../.kokoro/samples/python3.9/periodic-head.cfg | 2 +- 5 files changed, 5 insertions(+), 5 deletions(-) diff --git a/packages/google-cloud-bigquery/.github/.OwlBot.lock.yaml b/packages/google-cloud-bigquery/.github/.OwlBot.lock.yaml index d57f74204625..9ee60f7e4850 100644 --- a/packages/google-cloud-bigquery/.github/.OwlBot.lock.yaml +++ b/packages/google-cloud-bigquery/.github/.OwlBot.lock.yaml @@ -1,3 +1,3 @@ docker: image: gcr.io/repo-automation-bots/owlbot-python:latest - digest: sha256:6186535cbdbf6b9fe61f00294929221d060634dae4a0795c1cefdbc995b2d605 + digest: sha256:aea14a583128771ae8aefa364e1652f3c56070168ef31beb203534222d842b8b diff --git a/packages/google-cloud-bigquery/.kokoro/samples/python3.6/periodic-head.cfg b/packages/google-cloud-bigquery/.kokoro/samples/python3.6/periodic-head.cfg index f9cfcd33e058..5aa01bab5bf3 100644 --- a/packages/google-cloud-bigquery/.kokoro/samples/python3.6/periodic-head.cfg +++ b/packages/google-cloud-bigquery/.kokoro/samples/python3.6/periodic-head.cfg @@ -7,5 +7,5 @@ env_vars: { env_vars: { key: "TRAMPOLINE_BUILD_FILE" - value: "github/python-pubsub/.kokoro/test-samples-against-head.sh" + value: "github/python-bigquery/.kokoro/test-samples-against-head.sh" } diff --git a/packages/google-cloud-bigquery/.kokoro/samples/python3.7/periodic-head.cfg b/packages/google-cloud-bigquery/.kokoro/samples/python3.7/periodic-head.cfg index f9cfcd33e058..5aa01bab5bf3 100644 --- a/packages/google-cloud-bigquery/.kokoro/samples/python3.7/periodic-head.cfg +++ b/packages/google-cloud-bigquery/.kokoro/samples/python3.7/periodic-head.cfg @@ -7,5 +7,5 @@ env_vars: { env_vars: { key: "TRAMPOLINE_BUILD_FILE" - value: "github/python-pubsub/.kokoro/test-samples-against-head.sh" + value: "github/python-bigquery/.kokoro/test-samples-against-head.sh" } diff --git a/packages/google-cloud-bigquery/.kokoro/samples/python3.8/periodic-head.cfg b/packages/google-cloud-bigquery/.kokoro/samples/python3.8/periodic-head.cfg index f9cfcd33e058..5aa01bab5bf3 100644 --- a/packages/google-cloud-bigquery/.kokoro/samples/python3.8/periodic-head.cfg +++ b/packages/google-cloud-bigquery/.kokoro/samples/python3.8/periodic-head.cfg @@ -7,5 +7,5 @@ env_vars: { env_vars: { key: "TRAMPOLINE_BUILD_FILE" - value: "github/python-pubsub/.kokoro/test-samples-against-head.sh" + value: "github/python-bigquery/.kokoro/test-samples-against-head.sh" } diff --git a/packages/google-cloud-bigquery/.kokoro/samples/python3.9/periodic-head.cfg b/packages/google-cloud-bigquery/.kokoro/samples/python3.9/periodic-head.cfg index f9cfcd33e058..5aa01bab5bf3 100644 --- a/packages/google-cloud-bigquery/.kokoro/samples/python3.9/periodic-head.cfg +++ b/packages/google-cloud-bigquery/.kokoro/samples/python3.9/periodic-head.cfg @@ -7,5 +7,5 @@ env_vars: { env_vars: { key: "TRAMPOLINE_BUILD_FILE" - value: "github/python-pubsub/.kokoro/test-samples-against-head.sh" + value: "github/python-bigquery/.kokoro/test-samples-against-head.sh" } From 80b5dc17cb91ab1efb382458d4083a71f201224e Mon Sep 17 00:00:00 2001 From: Jim Fulton Date: Sun, 25 Jul 2021 09:51:55 -0400 Subject: [PATCH 1224/2016] test: Stop creating extra datasets (#791) --- .../tests/system/conftest.py | 20 +- .../tests/system/test_client.py | 222 +++++++++--------- 2 files changed, 122 insertions(+), 120 deletions(-) diff --git a/packages/google-cloud-bigquery/tests/system/conftest.py b/packages/google-cloud-bigquery/tests/system/conftest.py index 4eef60e924df..7b389013f784 100644 --- a/packages/google-cloud-bigquery/tests/system/conftest.py +++ b/packages/google-cloud-bigquery/tests/system/conftest.py @@ -14,13 +14,25 @@ import pytest +from google.cloud import bigquery +import test_utils.prefixer + from . import helpers +prefixer = test_utils.prefixer.Prefixer("python-bigquery", "tests/system") + + +@pytest.fixture(scope="session", autouse=True) +def cleanup_datasets(bigquery_client: bigquery.Client): + for dataset in bigquery_client.list_datasets(): + if prefixer.should_cleanup(dataset.dataset_id): + bigquery_client.delete_dataset( + dataset, delete_contents=True, not_found_ok=True + ) + @pytest.fixture(scope="session") def bigquery_client(): - from google.cloud import bigquery - return bigquery.Client() @@ -33,10 +45,10 @@ def bqstorage_client(bigquery_client): @pytest.fixture(scope="session") def dataset_id(bigquery_client): - dataset_id = f"bqsystem_{helpers.temp_suffix()}" + dataset_id = prefixer.create_prefix() bigquery_client.create_dataset(dataset_id) yield dataset_id - bigquery_client.delete_dataset(dataset_id, delete_contents=True) + bigquery_client.delete_dataset(dataset_id, delete_contents=True, not_found_ok=True) @pytest.fixture diff --git a/packages/google-cloud-bigquery/tests/system/test_client.py b/packages/google-cloud-bigquery/tests/system/test_client.py index 2536af9fc4fd..baa2b6ad8159 100644 --- a/packages/google-cloud-bigquery/tests/system/test_client.py +++ b/packages/google-cloud-bigquery/tests/system/test_client.py @@ -153,7 +153,6 @@ class Config(object): CLIENT: Optional[bigquery.Client] = None CURSOR = None - DATASET = None def setUpModule(): @@ -163,9 +162,7 @@ def setUpModule(): class TestBigQuery(unittest.TestCase): def setUp(self): - Config.DATASET = _make_dataset_id("bq_system_tests") - dataset = Config.CLIENT.create_dataset(Config.DATASET) - self.to_delete = [dataset] + self.to_delete = [] def tearDown(self): policy_tag_client = PolicyTagManagerClient() @@ -1605,20 +1602,6 @@ def test_dbapi_fetchall_from_script(self): row_tuples = [r.values() for r in rows] self.assertEqual(row_tuples, [(5, "foo"), (6, "bar"), (7, "baz")]) - def test_dbapi_create_view(self): - - query = """ - CREATE VIEW {}.dbapi_create_view - AS SELECT name, SUM(number) AS total - FROM `bigquery-public-data.usa_names.usa_1910_2013` - GROUP BY name; - """.format( - Config.DATASET - ) - - Config.CURSOR.execute(query) - self.assertEqual(Config.CURSOR.rowcount, 0, "expected 0 rows") - @unittest.skipIf( bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" ) @@ -2459,104 +2442,6 @@ def test_list_rows_page_size(self): page = next(pages) self.assertEqual(page.num_items, num_last_page) - def test_parameterized_types_round_trip(self): - client = Config.CLIENT - table_id = f"{Config.DATASET}.test_parameterized_types_round_trip" - fields = ( - ("n", "NUMERIC"), - ("n9", "NUMERIC(9)"), - ("n92", "NUMERIC(9, 2)"), - ("bn", "BIGNUMERIC"), - ("bn9", "BIGNUMERIC(38)"), - ("bn92", "BIGNUMERIC(38, 22)"), - ("s", "STRING"), - ("s9", "STRING(9)"), - ("b", "BYTES"), - ("b9", "BYTES(9)"), - ) - self.to_delete.insert(0, Table(f"{client.project}.{table_id}")) - client.query( - "create table {} ({})".format( - table_id, ", ".join(" ".join(f) for f in fields) - ) - ).result() - table = client.get_table(table_id) - table_id2 = table_id + "2" - self.to_delete.insert(0, Table(f"{client.project}.{table_id2}")) - client.create_table(Table(f"{client.project}.{table_id2}", table.schema)) - table2 = client.get_table(table_id2) - - self.assertEqual(tuple(s._key()[:2] for s in table2.schema), fields) - - def test_table_snapshots(self): - from google.cloud.bigquery import CopyJobConfig - from google.cloud.bigquery import OperationType - - client = Config.CLIENT - - source_table_path = f"{client.project}.{Config.DATASET}.test_table" - snapshot_table_path = f"{source_table_path}_snapshot" - - # Create the table before loading so that the column order is predictable. - schema = [ - bigquery.SchemaField("foo", "INTEGER"), - bigquery.SchemaField("bar", "STRING"), - ] - source_table = helpers.retry_403(Config.CLIENT.create_table)( - Table(source_table_path, schema=schema) - ) - self.to_delete.insert(0, source_table) - - # Populate the table with initial data. - rows = [{"foo": 1, "bar": "one"}, {"foo": 2, "bar": "two"}] - load_job = Config.CLIENT.load_table_from_json(rows, source_table) - load_job.result() - - # Now create a snapshot before modifying the original table data. - copy_config = CopyJobConfig() - copy_config.operation_type = OperationType.SNAPSHOT - - copy_job = client.copy_table( - sources=source_table_path, - destination=snapshot_table_path, - job_config=copy_config, - ) - copy_job.result() - - snapshot_table = client.get_table(snapshot_table_path) - self.to_delete.insert(0, snapshot_table) - - # Modify data in original table. - sql = f'INSERT INTO `{source_table_path}`(foo, bar) VALUES (3, "three")' - query_job = client.query(sql) - query_job.result() - - # List rows from the source table and compare them to rows from the snapshot. - rows_iter = client.list_rows(source_table_path) - rows = sorted(row.values() for row in rows_iter) - assert rows == [(1, "one"), (2, "two"), (3, "three")] - - rows_iter = client.list_rows(snapshot_table_path) - rows = sorted(row.values() for row in rows_iter) - assert rows == [(1, "one"), (2, "two")] - - # Now restore the table from the snapshot and it should again contain the old - # set of rows. - copy_config = CopyJobConfig() - copy_config.operation_type = OperationType.RESTORE - copy_config.write_disposition = bigquery.WriteDisposition.WRITE_TRUNCATE - - copy_job = client.copy_table( - sources=snapshot_table_path, - destination=source_table_path, - job_config=copy_config, - ) - copy_job.result() - - rows_iter = client.list_rows(source_table_path) - rows = sorted(row.values() for row in rows_iter) - assert rows == [(1, "one"), (2, "two")] - def temp_dataset(self, dataset_id, location=None): project = Config.CLIENT.project dataset_ref = bigquery.DatasetReference(project, dataset_id) @@ -2587,3 +2472,108 @@ def _table_exists(t): return True except NotFound: return False + + +def test_dbapi_create_view(dataset_id): + + query = f""" + CREATE VIEW {dataset_id}.dbapi_create_view + AS SELECT name, SUM(number) AS total + FROM `bigquery-public-data.usa_names.usa_1910_2013` + GROUP BY name; + """ + + Config.CURSOR.execute(query) + assert Config.CURSOR.rowcount == 0, "expected 0 rows" + + +def test_parameterized_types_round_trip(dataset_id): + client = Config.CLIENT + table_id = f"{dataset_id}.test_parameterized_types_round_trip" + fields = ( + ("n", "NUMERIC"), + ("n9", "NUMERIC(9)"), + ("n92", "NUMERIC(9, 2)"), + ("bn", "BIGNUMERIC"), + ("bn9", "BIGNUMERIC(38)"), + ("bn92", "BIGNUMERIC(38, 22)"), + ("s", "STRING"), + ("s9", "STRING(9)"), + ("b", "BYTES"), + ("b9", "BYTES(9)"), + ) + client.query( + "create table {} ({})".format(table_id, ", ".join(" ".join(f) for f in fields)) + ).result() + table = client.get_table(table_id) + table_id2 = table_id + "2" + client.create_table(Table(f"{client.project}.{table_id2}", table.schema)) + table2 = client.get_table(table_id2) + + assert tuple(s._key()[:2] for s in table2.schema) == fields + + +def test_table_snapshots(dataset_id): + from google.cloud.bigquery import CopyJobConfig + from google.cloud.bigquery import OperationType + + client = Config.CLIENT + + source_table_path = f"{client.project}.{dataset_id}.test_table" + snapshot_table_path = f"{source_table_path}_snapshot" + + # Create the table before loading so that the column order is predictable. + schema = [ + bigquery.SchemaField("foo", "INTEGER"), + bigquery.SchemaField("bar", "STRING"), + ] + source_table = helpers.retry_403(Config.CLIENT.create_table)( + Table(source_table_path, schema=schema) + ) + + # Populate the table with initial data. + rows = [{"foo": 1, "bar": "one"}, {"foo": 2, "bar": "two"}] + load_job = Config.CLIENT.load_table_from_json(rows, source_table) + load_job.result() + + # Now create a snapshot before modifying the original table data. + copy_config = CopyJobConfig() + copy_config.operation_type = OperationType.SNAPSHOT + + copy_job = client.copy_table( + sources=source_table_path, + destination=snapshot_table_path, + job_config=copy_config, + ) + copy_job.result() + + # Modify data in original table. + sql = f'INSERT INTO `{source_table_path}`(foo, bar) VALUES (3, "three")' + query_job = client.query(sql) + query_job.result() + + # List rows from the source table and compare them to rows from the snapshot. + rows_iter = client.list_rows(source_table_path) + rows = sorted(row.values() for row in rows_iter) + assert rows == [(1, "one"), (2, "two"), (3, "three")] + + rows_iter = client.list_rows(snapshot_table_path) + rows = sorted(row.values() for row in rows_iter) + assert rows == [(1, "one"), (2, "two")] + + # Now restore the table from the snapshot and it should again contain the old + # set of rows. + copy_config = CopyJobConfig() + copy_config.operation_type = OperationType.RESTORE + copy_config.write_disposition = bigquery.WriteDisposition.WRITE_TRUNCATE + + copy_job = client.copy_table( + sources=snapshot_table_path, + destination=source_table_path, + job_config=copy_config, + ) + copy_job.result() + + rows_iter = client.list_rows(source_table_path) + rows = sorted(row.values() for row in rows_iter) + assert rows == [(1, "one"), (2, "two")] From 10926596fa845479100f3dd64c74d91055938818 Mon Sep 17 00:00:00 2001 From: mgorsk1 Date: Mon, 26 Jul 2021 19:44:38 +0200 Subject: [PATCH 1225/2016] docs: correct docs for `LoadJobConfig.destination_table_description` (#810) Thank you for opening a Pull Request! Before submitting your PR, there are a few things you can do to make sure it goes smoothly: - [x] Make sure to open an issue as a [bug/issue](https://github.com/googleapis/python-bigquery/issues/new/choose) before writing your code! That way we can discuss the change, evaluate designs, and agree on the general idea - [x] Ensure the tests and linter pass - [x] Code coverage does not decrease (if any source code was changed) - [x] Appropriate docs were updated (if necessary) Fixes #811. --- .../google-cloud-bigquery/google/cloud/bigquery/job/load.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/job/load.py b/packages/google-cloud-bigquery/google/cloud/bigquery/job/load.py index f1b0454120b4..aee055c1c05b 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/job/load.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/job/load.py @@ -170,7 +170,7 @@ def destination_encryption_configuration(self, value): @property def destination_table_description(self): - """Optional[str]: Name given to destination table. + """Optional[str]: Description of the destination table. See: https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#DestinationTableProperties.FIELDS.description From 84c1cb3cb194280a625459e1c959e268902feaa4 Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Tue, 27 Jul 2021 11:21:21 -0500 Subject: [PATCH 1226/2016] tests: add system tests for `to_arrow` with extreme values (#813) * tests: add system tests for `to_arrow` with extreme values * fix bad merge * revert pandas tests * revert pandas tests * fix link to decimal types Co-authored-by: Peter Lamut * use north and south pole as extreme geography points * add another row of extreme values * base64 encode bytes columns Co-authored-by: Peter Lamut --- .../google/cloud/bigquery/_pandas_helpers.py | 2 + .../tests/data/scalars.jsonl | 2 + .../tests/data/scalars_extreme.jsonl | 5 ++ .../tests/data/scalars_schema.json | 62 +++++++++++++ .../tests/system/conftest.py | 48 +++++++++- .../tests/system/test_arrow.py | 88 +++++++++++++++++++ 6 files changed, 205 insertions(+), 2 deletions(-) create mode 100644 packages/google-cloud-bigquery/tests/data/scalars.jsonl create mode 100644 packages/google-cloud-bigquery/tests/data/scalars_extreme.jsonl create mode 100644 packages/google-cloud-bigquery/tests/data/scalars_schema.json create mode 100644 packages/google-cloud-bigquery/tests/system/test_arrow.py diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/_pandas_helpers.py b/packages/google-cloud-bigquery/google/cloud/bigquery/_pandas_helpers.py index 2ff96da4d44b..b381fa5f7aa1 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/_pandas_helpers.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/_pandas_helpers.py @@ -93,6 +93,8 @@ def pyarrow_numeric(): def pyarrow_bignumeric(): + # 77th digit is partial. + # https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#decimal_types return pyarrow.decimal256(76, 38) diff --git a/packages/google-cloud-bigquery/tests/data/scalars.jsonl b/packages/google-cloud-bigquery/tests/data/scalars.jsonl new file mode 100644 index 000000000000..4419a6e9ab0d --- /dev/null +++ b/packages/google-cloud-bigquery/tests/data/scalars.jsonl @@ -0,0 +1,2 @@ +{"bool_col": true, "bytes_col": "abcd", "date_col": "2021-07-21", "datetime_col": "2021-07-21 11:39:45", "geography_col": "POINT(-122.0838511 37.3860517)", "int64_col": "123456789", "numeric_col": "1.23456789", "bignumeric_col": "10.111213141516171819", "float64_col": "1.25", "string_col": "Hello, World", "time_col": "11:41:43.07616", "timestamp_col": "2021-07-21T17:43:43.945289Z"} +{"bool_col": null, "bytes_col": null, "date_col": null, "datetime_col": null, "geography_col": null, "int64_col": null, "numeric_col": null, "bignumeric_col": null, "float64_col": null, "string_col": null, "time_col": null, "timestamp_col": null} diff --git a/packages/google-cloud-bigquery/tests/data/scalars_extreme.jsonl b/packages/google-cloud-bigquery/tests/data/scalars_extreme.jsonl new file mode 100644 index 000000000000..ceccd8dbcbc6 --- /dev/null +++ b/packages/google-cloud-bigquery/tests/data/scalars_extreme.jsonl @@ -0,0 +1,5 @@ +{"bool_col": true, "bytes_col": "DQo=\n", "date_col": "9999-12-31", "datetime_col": "9999-12-31 23:59:59.999999", "geography_col": "POINT(-135.0000 90.0000)", "int64_col": "9223372036854775807", "numeric_col": "9.9999999999999999999999999999999999999E+28", "bignumeric_col": "9.999999999999999999999999999999999999999999999999999999999999999999999999999E+37", "float64_col": "+inf", "string_col": "Hello, World", "time_col": "23:59:59.99999", "timestamp_col": "9999-12-31T23:59:59.999999Z"} +{"bool_col": false, "bytes_col": "8J+Zgw==\n", "date_col": "0001-01-01", "datetime_col": "0001-01-01 00:00:00", "geography_col": "POINT(45.0000 -90.0000)", "int64_col": "-9223372036854775808", "numeric_col": "-9.9999999999999999999999999999999999999E+28", "bignumeric_col": "-9.999999999999999999999999999999999999999999999999999999999999999999999999999E+37", "float64_col": "-inf", "string_col": "Hello, World", "time_col": "00:00:00", "timestamp_col": "0001-01-01T00:00:00.000000Z"} +{"bool_col": true, "bytes_col": "AA==\n", "date_col": "1900-01-01", "datetime_col": "1900-01-01 00:00:00", "geography_col": "POINT(-180.0000 0.0000)", "int64_col": "-1", "numeric_col": "0.000000001", "bignumeric_col": "-0.00000000000000000000000000000000000001", "float64_col": "nan", "string_col": "こんにちは", "time_col": "00:00:00.000001", "timestamp_col": "1900-01-01T00:00:00.000000Z"} +{"bool_col": false, "bytes_col": "", "date_col": "1970-01-01", "datetime_col": "1970-01-01 00:00:00", "geography_col": "POINT(0 0)", "int64_col": "0", "numeric_col": "0.0", "bignumeric_col": "0.0", "float64_col": 0.0, "string_col": "", "time_col": "12:00:00", "timestamp_col": "1970-01-01T00:00:00.000000Z"} +{"bool_col": null, "bytes_col": null, "date_col": null, "datetime_col": null, "geography_col": null, "int64_col": null, "numeric_col": null, "bignumeric_col": null, "float64_col": null, "string_col": null, "time_col": null, "timestamp_col": null} diff --git a/packages/google-cloud-bigquery/tests/data/scalars_schema.json b/packages/google-cloud-bigquery/tests/data/scalars_schema.json new file mode 100644 index 000000000000..00bd150fd835 --- /dev/null +++ b/packages/google-cloud-bigquery/tests/data/scalars_schema.json @@ -0,0 +1,62 @@ +[ + { + "mode": "NULLABLE", + "name": "timestamp_col", + "type": "TIMESTAMP" + }, + { + "mode": "NULLABLE", + "name": "time_col", + "type": "TIME" + }, + { + "mode": "NULLABLE", + "name": "float64_col", + "type": "FLOAT" + }, + { + "mode": "NULLABLE", + "name": "datetime_col", + "type": "DATETIME" + }, + { + "mode": "NULLABLE", + "name": "bignumeric_col", + "type": "BIGNUMERIC" + }, + { + "mode": "NULLABLE", + "name": "numeric_col", + "type": "NUMERIC" + }, + { + "mode": "NULLABLE", + "name": "geography_col", + "type": "GEOGRAPHY" + }, + { + "mode": "NULLABLE", + "name": "date_col", + "type": "DATE" + }, + { + "mode": "NULLABLE", + "name": "string_col", + "type": "STRING" + }, + { + "mode": "NULLABLE", + "name": "bool_col", + "type": "BOOLEAN" + }, + { + "mode": "NULLABLE", + "name": "bytes_col", + "type": "BYTES" + }, + { + "mode": "NULLABLE", + "name": "int64_col", + "type": "INTEGER" + } +] diff --git a/packages/google-cloud-bigquery/tests/system/conftest.py b/packages/google-cloud-bigquery/tests/system/conftest.py index 7b389013f784..cc2c2a4dcbaf 100644 --- a/packages/google-cloud-bigquery/tests/system/conftest.py +++ b/packages/google-cloud-bigquery/tests/system/conftest.py @@ -12,15 +12,20 @@ # See the License for the specific language governing permissions and # limitations under the License. -import pytest +import pathlib -from google.cloud import bigquery +import pytest import test_utils.prefixer +from google.cloud import bigquery +from google.cloud.bigquery import enums from . import helpers + prefixer = test_utils.prefixer.Prefixer("python-bigquery", "tests/system") +DATA_DIR = pathlib.Path(__file__).parent.parent / "data" + @pytest.fixture(scope="session", autouse=True) def cleanup_datasets(bigquery_client: bigquery.Client): @@ -36,6 +41,11 @@ def bigquery_client(): return bigquery.Client() +@pytest.fixture(scope="session") +def project_id(bigquery_client: bigquery.Client): + return bigquery_client.project + + @pytest.fixture(scope="session") def bqstorage_client(bigquery_client): from google.cloud import bigquery_storage @@ -54,3 +64,37 @@ def dataset_id(bigquery_client): @pytest.fixture def table_id(dataset_id): return f"{dataset_id}.table_{helpers.temp_suffix()}" + + +@pytest.fixture(scope="session") +def scalars_table(bigquery_client: bigquery.Client, project_id: str, dataset_id: str): + schema = bigquery_client.schema_from_json(DATA_DIR / "scalars_schema.json") + job_config = bigquery.LoadJobConfig() + job_config.schema = schema + job_config.source_format = enums.SourceFormat.NEWLINE_DELIMITED_JSON + full_table_id = f"{project_id}.{dataset_id}.scalars" + with open(DATA_DIR / "scalars.jsonl", "rb") as data_file: + job = bigquery_client.load_table_from_file( + data_file, full_table_id, job_config=job_config + ) + job.result() + yield full_table_id + bigquery_client.delete_table(full_table_id) + + +@pytest.fixture(scope="session") +def scalars_extreme_table( + bigquery_client: bigquery.Client, project_id: str, dataset_id: str +): + schema = bigquery_client.schema_from_json(DATA_DIR / "scalars_schema.json") + job_config = bigquery.LoadJobConfig() + job_config.schema = schema + job_config.source_format = enums.SourceFormat.NEWLINE_DELIMITED_JSON + full_table_id = f"{project_id}.{dataset_id}.scalars_extreme" + with open(DATA_DIR / "scalars_extreme.jsonl", "rb") as data_file: + job = bigquery_client.load_table_from_file( + data_file, full_table_id, job_config=job_config + ) + job.result() + yield full_table_id + bigquery_client.delete_table(full_table_id) diff --git a/packages/google-cloud-bigquery/tests/system/test_arrow.py b/packages/google-cloud-bigquery/tests/system/test_arrow.py new file mode 100644 index 000000000000..f97488e39b0c --- /dev/null +++ b/packages/google-cloud-bigquery/tests/system/test_arrow.py @@ -0,0 +1,88 @@ +# Copyright 2021 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""System tests for Arrow connector.""" + +import pytest + +pyarrow = pytest.importorskip( + "pyarrow", minversion="3.0.0" +) # Needs decimal256 for BIGNUMERIC columns. + + +@pytest.mark.parametrize( + ("max_results", "scalars_table_name"), + ( + (None, "scalars_table"), # Use BQ Storage API. + (10, "scalars_table"), # Use REST API. + (None, "scalars_extreme_table"), # Use BQ Storage API. + (10, "scalars_extreme_table"), # Use REST API. + ), +) +def test_list_rows_nullable_scalars_dtypes( + bigquery_client, + scalars_table, + scalars_extreme_table, + max_results, + scalars_table_name, +): + table_id = scalars_table + if scalars_table_name == "scalars_extreme_table": + table_id = scalars_extreme_table + arrow_table = bigquery_client.list_rows( + table_id, max_results=max_results, + ).to_arrow() + + schema = arrow_table.schema + bignumeric_type = schema.field("bignumeric_col").type + # 77th digit is partial. + # https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#decimal_types + assert bignumeric_type.precision in {76, 77} + assert bignumeric_type.scale == 38 + + bool_type = schema.field("bool_col").type + assert bool_type.equals(pyarrow.bool_()) + + bytes_type = schema.field("bytes_col").type + assert bytes_type.equals(pyarrow.binary()) + + date_type = schema.field("date_col").type + assert date_type.equals(pyarrow.date32()) + + datetime_type = schema.field("datetime_col").type + assert datetime_type.unit == "us" + assert datetime_type.tz is None + + float64_type = schema.field("float64_col").type + assert float64_type.equals(pyarrow.float64()) + + geography_type = schema.field("geography_col").type + assert geography_type.equals(pyarrow.string()) + + int64_type = schema.field("int64_col").type + assert int64_type.equals(pyarrow.int64()) + + numeric_type = schema.field("numeric_col").type + assert numeric_type.precision == 38 + assert numeric_type.scale == 9 + + string_type = schema.field("string_col").type + assert string_type.equals(pyarrow.string()) + + time_type = schema.field("time_col").type + assert time_type.equals(pyarrow.time64("us")) + + timestamp_type = schema.field("timestamp_col").type + assert timestamp_type.unit == "us" + assert timestamp_type.tz is not None From f06d9fc2dd4611223f15f96c4815d5dd2cafd3b1 Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Tue, 27 Jul 2021 13:04:10 -0500 Subject: [PATCH 1227/2016] chore: protect v3.x.x branch (#816) * chore: protect v3.x.x branch In preparation for breaking changes. * force pattern to be a string * simplify branch name --- packages/google-cloud-bigquery/.github/sync-repo-settings.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/.github/sync-repo-settings.yaml b/packages/google-cloud-bigquery/.github/sync-repo-settings.yaml index b18fb9c29d68..2697f214cd93 100644 --- a/packages/google-cloud-bigquery/.github/sync-repo-settings.yaml +++ b/packages/google-cloud-bigquery/.github/sync-repo-settings.yaml @@ -3,7 +3,7 @@ branchProtectionRules: # Identifies the protection rule pattern. Name of the branch to be protected. # Defaults to `master` -- pattern: master +- pattern: '{master,v3}' requiredStatusCheckContexts: - 'Kokoro' - 'Kokoro snippets-3.8' From e1d2b642b0389dace424083fc5921db3c0056f23 Mon Sep 17 00:00:00 2001 From: Peter Lamut Date: Tue, 27 Jul 2021 20:08:37 +0200 Subject: [PATCH 1228/2016] fix: no longer raise a warning in `to_dataframe` if `max_results` set (#815) That warning should only be used when BQ Storage client is explicitly passed in to RowIterator methods when max_results value is also set. --- .../google/cloud/bigquery/table.py | 30 +++- .../tests/unit/test_table.py | 160 +++++++++++++++++- 2 files changed, 179 insertions(+), 11 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py index 18d969a3f078..daade1ac678f 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py @@ -1552,11 +1552,6 @@ def _validate_bqstorage(self, bqstorage_client, create_bqstorage_client): return False if self.max_results is not None: - warnings.warn( - "Cannot use bqstorage_client if max_results is set, " - "reverting to fetching data with the REST endpoint.", - stacklevel=2, - ) return False try: @@ -1604,6 +1599,25 @@ def total_rows(self): """int: The total number of rows in the table.""" return self._total_rows + def _maybe_warn_max_results( + self, bqstorage_client: Optional["bigquery_storage.BigQueryReadClient"], + ): + """Issue a warning if BQ Storage client is not ``None`` with ``max_results`` set. + + This helper method should be used directly in the relevant top-level public + methods, so that the warning is issued for the correct line in user code. + + Args: + bqstorage_client: + The BigQuery Storage client intended to use for downloading result rows. + """ + if bqstorage_client is not None and self.max_results is not None: + warnings.warn( + "Cannot use bqstorage_client if max_results is set, " + "reverting to fetching data with the REST endpoint.", + stacklevel=3, + ) + def _to_page_iterable( self, bqstorage_download, tabledata_list_download, bqstorage_client=None ): @@ -1700,6 +1714,8 @@ def to_arrow( if pyarrow is None: raise ValueError(_NO_PYARROW_ERROR) + self._maybe_warn_max_results(bqstorage_client) + if not self._validate_bqstorage(bqstorage_client, create_bqstorage_client): create_bqstorage_client = False bqstorage_client = None @@ -1790,6 +1806,8 @@ def to_dataframe_iterable( if dtypes is None: dtypes = {} + self._maybe_warn_max_results(bqstorage_client) + column_names = [field.name for field in self._schema] bqstorage_download = functools.partial( _pandas_helpers.download_dataframe_bqstorage, @@ -1896,6 +1914,8 @@ def to_dataframe( if dtypes is None: dtypes = {} + self._maybe_warn_max_results(bqstorage_client) + if not self._validate_bqstorage(bqstorage_client, create_bqstorage_client): create_bqstorage_client = False bqstorage_client = None diff --git a/packages/google-cloud-bigquery/tests/unit/test_table.py b/packages/google-cloud-bigquery/tests/unit/test_table.py index 37650cd27400..4b1fd833b141 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_table.py +++ b/packages/google-cloud-bigquery/tests/unit/test_table.py @@ -15,6 +15,7 @@ import datetime import logging import time +import types import unittest import warnings @@ -1862,6 +1863,15 @@ def test__validate_bqstorage_returns_false_when_completely_cached(self): ) ) + def test__validate_bqstorage_returns_false_if_max_results_set(self): + iterator = self._make_one( + max_results=10, first_page_response=None # not cached + ) + result = iterator._validate_bqstorage( + bqstorage_client=None, create_bqstorage_client=True + ) + self.assertFalse(result) + def test__validate_bqstorage_returns_false_if_missing_dependency(self): iterator = self._make_one(first_page_response=None) # not cached @@ -2105,7 +2115,7 @@ def test_to_arrow_w_empty_table(self): @unittest.skipIf( bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" ) - def test_to_arrow_max_results_w_create_bqstorage_warning(self): + def test_to_arrow_max_results_w_explicit_bqstorage_client_warning(self): from google.cloud.bigquery.schema import SchemaField schema = [ @@ -2119,6 +2129,7 @@ def test_to_arrow_max_results_w_create_bqstorage_warning(self): path = "/foo" api_request = mock.Mock(return_value={"rows": rows}) mock_client = _mock_client() + mock_bqstorage_client = mock.sentinel.bq_storage_client row_iterator = self._make_one( client=mock_client, @@ -2129,7 +2140,7 @@ def test_to_arrow_max_results_w_create_bqstorage_warning(self): ) with warnings.catch_warnings(record=True) as warned: - row_iterator.to_arrow(create_bqstorage_client=True) + row_iterator.to_arrow(bqstorage_client=mock_bqstorage_client) matches = [ warning @@ -2139,6 +2150,49 @@ def test_to_arrow_max_results_w_create_bqstorage_warning(self): and "REST" in str(warning) ] self.assertEqual(len(matches), 1, msg="User warning was not emitted.") + self.assertIn( + __file__, str(matches[0]), msg="Warning emitted with incorrect stacklevel" + ) + mock_client._ensure_bqstorage_client.assert_not_called() + + @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") + @unittest.skipIf( + bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" + ) + def test_to_arrow_max_results_w_create_bqstorage_client_no_warning(self): + from google.cloud.bigquery.schema import SchemaField + + schema = [ + SchemaField("name", "STRING", mode="REQUIRED"), + SchemaField("age", "INTEGER", mode="REQUIRED"), + ] + rows = [ + {"f": [{"v": "Phred Phlyntstone"}, {"v": "32"}]}, + {"f": [{"v": "Bharney Rhubble"}, {"v": "33"}]}, + ] + path = "/foo" + api_request = mock.Mock(return_value={"rows": rows}) + mock_client = _mock_client() + + row_iterator = self._make_one( + client=mock_client, + api_request=api_request, + path=path, + schema=schema, + max_results=42, + ) + + with warnings.catch_warnings(record=True) as warned: + row_iterator.to_arrow(create_bqstorage_client=True) + + matches = [ + warning + for warning in warned + if warning.category is UserWarning + and "cannot use bqstorage_client" in str(warning).lower() + and "REST" in str(warning) + ] + self.assertFalse(matches) mock_client._ensure_bqstorage_client.assert_not_called() @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") @@ -2372,7 +2426,6 @@ def test_to_arrow_w_pyarrow_none(self): @unittest.skipIf(pandas is None, "Requires `pandas`") def test_to_dataframe_iterable(self): from google.cloud.bigquery.schema import SchemaField - import types schema = [ SchemaField("name", "STRING", mode="REQUIRED"), @@ -2415,7 +2468,6 @@ def test_to_dataframe_iterable(self): @unittest.skipIf(pandas is None, "Requires `pandas`") def test_to_dataframe_iterable_with_dtypes(self): from google.cloud.bigquery.schema import SchemaField - import types schema = [ SchemaField("name", "STRING", mode="REQUIRED"), @@ -2527,6 +2579,61 @@ def test_to_dataframe_iterable_w_bqstorage(self): # Don't close the client if it was passed in. bqstorage_client._transport.grpc_channel.close.assert_not_called() + @unittest.skipIf(pandas is None, "Requires `pandas`") + @unittest.skipIf( + bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" + ) + @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") + def test_to_dataframe_iterable_w_bqstorage_max_results_warning(self): + from google.cloud.bigquery import schema + from google.cloud.bigquery import table as mut + + bqstorage_client = mock.create_autospec(bigquery_storage.BigQueryReadClient) + + iterator_schema = [ + schema.SchemaField("name", "STRING", mode="REQUIRED"), + schema.SchemaField("age", "INTEGER", mode="REQUIRED"), + ] + path = "/foo" + api_request = mock.Mock( + side_effect=[ + { + "rows": [{"f": [{"v": "Bengt"}, {"v": "32"}]}], + "pageToken": "NEXTPAGE", + }, + {"rows": [{"f": [{"v": "Sven"}, {"v": "33"}]}]}, + ] + ) + row_iterator = mut.RowIterator( + _mock_client(), + api_request, + path, + iterator_schema, + table=mut.TableReference.from_string("proj.dset.tbl"), + selected_fields=iterator_schema, + max_results=25, + ) + + with warnings.catch_warnings(record=True) as warned: + dfs = row_iterator.to_dataframe_iterable(bqstorage_client=bqstorage_client) + + # Was a warning emitted? + matches = [ + warning + for warning in warned + if warning.category is UserWarning + and "cannot use bqstorage_client" in str(warning).lower() + and "REST" in str(warning) + ] + assert len(matches) == 1, "User warning was not emitted." + assert __file__ in str(matches[0]), "Warning emitted with incorrect stacklevel" + + # Basic check of what we got as a result. + dataframes = list(dfs) + assert len(dataframes) == 2 + assert isinstance(dataframes[0], pandas.DataFrame) + assert isinstance(dataframes[1], pandas.DataFrame) + @mock.patch("google.cloud.bigquery.table.pandas", new=None) def test_to_dataframe_iterable_error_if_pandas_is_none(self): from google.cloud.bigquery.schema import SchemaField @@ -2926,7 +3033,7 @@ def test_to_dataframe_max_results_w_bqstorage_warning(self): self.assertEqual(len(matches), 1, msg="User warning was not emitted.") @unittest.skipIf(pandas is None, "Requires `pandas`") - def test_to_dataframe_max_results_w_create_bqstorage_warning(self): + def test_to_dataframe_max_results_w_explicit_bqstorage_client_warning(self): from google.cloud.bigquery.schema import SchemaField schema = [ @@ -2940,6 +3047,7 @@ def test_to_dataframe_max_results_w_create_bqstorage_warning(self): path = "/foo" api_request = mock.Mock(return_value={"rows": rows}) mock_client = _mock_client() + mock_bqstorage_client = mock.sentinel.bq_storage_client row_iterator = self._make_one( client=mock_client, @@ -2950,7 +3058,7 @@ def test_to_dataframe_max_results_w_create_bqstorage_warning(self): ) with warnings.catch_warnings(record=True) as warned: - row_iterator.to_dataframe(create_bqstorage_client=True) + row_iterator.to_dataframe(bqstorage_client=mock_bqstorage_client) matches = [ warning @@ -2960,6 +3068,46 @@ def test_to_dataframe_max_results_w_create_bqstorage_warning(self): and "REST" in str(warning) ] self.assertEqual(len(matches), 1, msg="User warning was not emitted.") + self.assertIn( + __file__, str(matches[0]), msg="Warning emitted with incorrect stacklevel" + ) + mock_client._ensure_bqstorage_client.assert_not_called() + + @unittest.skipIf(pandas is None, "Requires `pandas`") + def test_to_dataframe_max_results_w_create_bqstorage_client_no_warning(self): + from google.cloud.bigquery.schema import SchemaField + + schema = [ + SchemaField("name", "STRING", mode="REQUIRED"), + SchemaField("age", "INTEGER", mode="REQUIRED"), + ] + rows = [ + {"f": [{"v": "Phred Phlyntstone"}, {"v": "32"}]}, + {"f": [{"v": "Bharney Rhubble"}, {"v": "33"}]}, + ] + path = "/foo" + api_request = mock.Mock(return_value={"rows": rows}) + mock_client = _mock_client() + + row_iterator = self._make_one( + client=mock_client, + api_request=api_request, + path=path, + schema=schema, + max_results=42, + ) + + with warnings.catch_warnings(record=True) as warned: + row_iterator.to_dataframe(create_bqstorage_client=True) + + matches = [ + warning + for warning in warned + if warning.category is UserWarning + and "cannot use bqstorage_client" in str(warning).lower() + and "REST" in str(warning) + ] + self.assertFalse(matches) mock_client._ensure_bqstorage_client.assert_not_called() @unittest.skipIf(pandas is None, "Requires `pandas`") From 290f55597d432faa06f4607d2611e4e00e356bdc Mon Sep 17 00:00:00 2001 From: "gcf-owl-bot[bot]" <78513119+gcf-owl-bot[bot]@users.noreply.github.com> Date: Tue, 27 Jul 2021 18:14:09 +0000 Subject: [PATCH 1229/2016] feat: Update proto definitions for bigquery/v2 to support new proto fields for BQML. (#817) PiperOrigin-RevId: 387137741 Source-Link: https://github.com/googleapis/googleapis/commit/8962c92e97495d0795b427d4aa4326b0d06e33eb Source-Link: https://github.com/googleapis/googleapis-gen/commit/102f1b4277cc5a049663535d9eeb77831b67de25 --- .../google/cloud/bigquery_v2/types/model.py | 104 ++++++++++++++++-- .../bigquery_v2/types/table_reference.py | 12 ++ 2 files changed, 107 insertions(+), 9 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery_v2/types/model.py b/packages/google-cloud-bigquery/google/cloud/bigquery_v2/types/model.py index 17e101d25ff3..70641840121b 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery_v2/types/model.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery_v2/types/model.py @@ -96,6 +96,8 @@ class Model(proto.Message): Output only. Label columns that were used to train this model. The output of the model will have a `predicted_` prefix to these columns. + best_trial_id (int): + The best trial_id across all training runs. """ class ModelType(proto.Enum): @@ -113,6 +115,7 @@ class ModelType(proto.Enum): ARIMA = 11 AUTOML_REGRESSOR = 12 AUTOML_CLASSIFIER = 13 + ARIMA_PLUS = 19 class LossType(proto.Enum): r"""Loss metric to evaluate model training performance.""" @@ -151,6 +154,7 @@ class DataFrequency(proto.Enum): WEEKLY = 5 DAILY = 6 HOURLY = 7 + PER_MINUTE = 8 class HolidayRegion(proto.Enum): r"""Type of supported holiday regions for time series forecasting @@ -285,7 +289,7 @@ class RegressionMetrics(proto.Message): median_absolute_error (google.protobuf.wrappers_pb2.DoubleValue): Median absolute error. r_squared (google.protobuf.wrappers_pb2.DoubleValue): - R^2 score. + R^2 score. This corresponds to r2_score in ML.EVALUATE. """ mean_absolute_error = proto.Field( @@ -528,7 +532,7 @@ class ClusteringMetrics(proto.Message): Mean of squared distances between each sample to its cluster centroid. clusters (Sequence[google.cloud.bigquery_v2.types.Model.ClusteringMetrics.Cluster]): - [Beta] Information for all clusters. + Information for all clusters. """ class Cluster(proto.Message): @@ -697,10 +701,29 @@ class ArimaSingleModelForecastingMetrics(proto.Message): Is arima model fitted with drift or not. It is always false when d is not 1. time_series_id (str): - The id to indicate different time series. + The time_series_id value for this time series. It will be + one of the unique values from the time_series_id_column + specified during ARIMA model training. Only present when + time_series_id_column training option was used. + time_series_ids (Sequence[str]): + The tuple of time_series_ids identifying this time series. + It will be one of the unique tuples of values present in the + time_series_id_columns specified during ARIMA model + training. Only present when time_series_id_columns training + option was used and the order of values here are same as the + order of time_series_id_columns. seasonal_periods (Sequence[google.cloud.bigquery_v2.types.Model.SeasonalPeriod.SeasonalPeriodType]): Seasonal periods. Repeated because multiple periods are supported for one time series. + has_holiday_effect (google.protobuf.wrappers_pb2.BoolValue): + If true, holiday_effect is a part of time series + decomposition result. + has_spikes_and_dips (google.protobuf.wrappers_pb2.BoolValue): + If true, spikes_and_dips is a part of time series + decomposition result. + has_step_changes (google.protobuf.wrappers_pb2.BoolValue): + If true, step_changes is a part of time series decomposition + result. """ non_seasonal_order = proto.Field( @@ -711,9 +734,19 @@ class ArimaSingleModelForecastingMetrics(proto.Message): ) has_drift = proto.Field(proto.BOOL, number=3,) time_series_id = proto.Field(proto.STRING, number=4,) + time_series_ids = proto.RepeatedField(proto.STRING, number=9,) seasonal_periods = proto.RepeatedField( proto.ENUM, number=5, enum="Model.SeasonalPeriod.SeasonalPeriodType", ) + has_holiday_effect = proto.Field( + proto.MESSAGE, number=6, message=wrappers_pb2.BoolValue, + ) + has_spikes_and_dips = proto.Field( + proto.MESSAGE, number=7, message=wrappers_pb2.BoolValue, + ) + has_step_changes = proto.Field( + proto.MESSAGE, number=8, message=wrappers_pb2.BoolValue, + ) non_seasonal_order = proto.RepeatedField( proto.MESSAGE, number=1, message="Model.ArimaOrder", @@ -901,7 +934,7 @@ class TrainingRun(proto.Message): """ class TrainingOptions(proto.Message): - r""" + r"""Options used in model training. Attributes: max_iterations (int): The maximum number of iterations in training. @@ -972,8 +1005,9 @@ class TrainingOptions(proto.Message): num_clusters (int): Number of clusters for clustering models. model_uri (str): - [Beta] Google Cloud Storage URI from which the model was - imported. Only applicable for imported models. + Google Cloud Storage URI from which the model + was imported. Only applicable for imported + models. optimization_strategy (google.cloud.bigquery_v2.types.Model.OptimizationStrategy): Optimization strategy for training linear regression models. @@ -1030,8 +1064,11 @@ class TrainingOptions(proto.Message): If a valid value is specified, then holiday effects modeling is enabled. time_series_id_column (str): - The id column that will be used to indicate - different time series to forecast in parallel. + The time series id column that was used + during ARIMA model training. + time_series_id_columns (Sequence[str]): + The time series id columns that were used + during ARIMA model training. horizon (int): The number of periods ahead that need to be forecasted. @@ -1042,6 +1079,15 @@ class TrainingOptions(proto.Message): output feature name is A.b. auto_arima_max_order (int): The max value of non-seasonal p and q. + decompose_time_series (google.protobuf.wrappers_pb2.BoolValue): + If true, perform decompose time series and + save the results. + clean_spikes_and_dips (google.protobuf.wrappers_pb2.BoolValue): + If true, clean spikes and dips in the input + time series. + adjust_step_changes (google.protobuf.wrappers_pb2.BoolValue): + If true, detect step changes and make data + adjustment in the input time series. """ max_iterations = proto.Field(proto.INT64, number=1,) @@ -1120,9 +1166,19 @@ class TrainingOptions(proto.Message): proto.ENUM, number=42, enum="Model.HolidayRegion", ) time_series_id_column = proto.Field(proto.STRING, number=43,) + time_series_id_columns = proto.RepeatedField(proto.STRING, number=51,) horizon = proto.Field(proto.INT64, number=44,) preserve_input_structs = proto.Field(proto.BOOL, number=45,) auto_arima_max_order = proto.Field(proto.INT64, number=46,) + decompose_time_series = proto.Field( + proto.MESSAGE, number=50, message=wrappers_pb2.BoolValue, + ) + clean_spikes_and_dips = proto.Field( + proto.MESSAGE, number=52, message=wrappers_pb2.BoolValue, + ) + adjust_step_changes = proto.Field( + proto.MESSAGE, number=53, message=wrappers_pb2.BoolValue, + ) class IterationResult(proto.Message): r"""Information about a single iteration of the training run. @@ -1218,10 +1274,29 @@ class ArimaModelInfo(proto.Message): Whether Arima model fitted with drift or not. It is always false when d is not 1. time_series_id (str): - The id to indicate different time series. + The time_series_id value for this time series. It will be + one of the unique values from the time_series_id_column + specified during ARIMA model training. Only present when + time_series_id_column training option was used. + time_series_ids (Sequence[str]): + The tuple of time_series_ids identifying this time series. + It will be one of the unique tuples of values present in the + time_series_id_columns specified during ARIMA model + training. Only present when time_series_id_columns training + option was used and the order of values here are same as the + order of time_series_id_columns. seasonal_periods (Sequence[google.cloud.bigquery_v2.types.Model.SeasonalPeriod.SeasonalPeriodType]): Seasonal periods. Repeated because multiple periods are supported for one time series. + has_holiday_effect (google.protobuf.wrappers_pb2.BoolValue): + If true, holiday_effect is a part of time series + decomposition result. + has_spikes_and_dips (google.protobuf.wrappers_pb2.BoolValue): + If true, spikes_and_dips is a part of time series + decomposition result. + has_step_changes (google.protobuf.wrappers_pb2.BoolValue): + If true, step_changes is a part of time series decomposition + result. """ non_seasonal_order = proto.Field( @@ -1237,11 +1312,21 @@ class ArimaModelInfo(proto.Message): ) has_drift = proto.Field(proto.BOOL, number=4,) time_series_id = proto.Field(proto.STRING, number=5,) + time_series_ids = proto.RepeatedField(proto.STRING, number=10,) seasonal_periods = proto.RepeatedField( proto.ENUM, number=6, enum="Model.SeasonalPeriod.SeasonalPeriodType", ) + has_holiday_effect = proto.Field( + proto.MESSAGE, number=7, message=wrappers_pb2.BoolValue, + ) + has_spikes_and_dips = proto.Field( + proto.MESSAGE, number=8, message=wrappers_pb2.BoolValue, + ) + has_step_changes = proto.Field( + proto.MESSAGE, number=9, message=wrappers_pb2.BoolValue, + ) arima_model_info = proto.RepeatedField( proto.MESSAGE, @@ -1319,6 +1404,7 @@ class ArimaModelInfo(proto.Message): label_columns = proto.RepeatedField( proto.MESSAGE, number=11, message=standard_sql.StandardSqlField, ) + best_trial_id = proto.Field(proto.INT64, number=19,) class GetModelRequest(proto.Message): diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery_v2/types/table_reference.py b/packages/google-cloud-bigquery/google/cloud/bigquery_v2/types/table_reference.py index a0a8ee4c95fe..d56e5b09fcce 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery_v2/types/table_reference.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery_v2/types/table_reference.py @@ -36,11 +36,23 @@ class TableReference(proto.Message): maximum length is 1,024 characters. Certain operations allow suffixing of the table ID with a partition decorator, such as ``sample_table$20190123``. + project_id_alternative (Sequence[str]): + The alternative field that will be used when ESF is not able + to translate the received data to the project_id field. + dataset_id_alternative (Sequence[str]): + The alternative field that will be used when ESF is not able + to translate the received data to the project_id field. + table_id_alternative (Sequence[str]): + The alternative field that will be used when ESF is not able + to translate the received data to the project_id field. """ project_id = proto.Field(proto.STRING, number=1,) dataset_id = proto.Field(proto.STRING, number=2,) table_id = proto.Field(proto.STRING, number=3,) + project_id_alternative = proto.RepeatedField(proto.STRING, number=4,) + dataset_id_alternative = proto.RepeatedField(proto.STRING, number=5,) + table_id_alternative = proto.RepeatedField(proto.STRING, number=6,) __all__ = tuple(sorted(__protobuf__.manifest)) From 7e180a01b7881648f6aad2e25af8be119ac7805a Mon Sep 17 00:00:00 2001 From: "release-please[bot]" <55107282+release-please[bot]@users.noreply.github.com> Date: Tue, 27 Jul 2021 13:50:04 -0500 Subject: [PATCH 1230/2016] chore: release 2.23.0 (#819) Co-authored-by: release-please[bot] <55107282+release-please[bot]@users.noreply.github.com> --- packages/google-cloud-bigquery/CHANGELOG.md | 18 ++++++++++++++++++ .../google/cloud/bigquery/version.py | 2 +- 2 files changed, 19 insertions(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/CHANGELOG.md b/packages/google-cloud-bigquery/CHANGELOG.md index 7dbc5d4da09a..966a8744aa6f 100644 --- a/packages/google-cloud-bigquery/CHANGELOG.md +++ b/packages/google-cloud-bigquery/CHANGELOG.md @@ -5,6 +5,24 @@ [1]: https://pypi.org/project/google-cloud-bigquery/#history +## [2.23.0](https://www.github.com/googleapis/python-bigquery/compare/v2.22.1...v2.23.0) (2021-07-27) + + +### Features + +* Update proto definitions for bigquery/v2 to support new proto fields for BQML. ([#817](https://www.github.com/googleapis/python-bigquery/issues/817)) ([fe7a902](https://www.github.com/googleapis/python-bigquery/commit/fe7a902e8b3e723ace335c9b499aea6d180a025b)) + + +### Bug Fixes + +* no longer raise a warning in `to_dataframe` if `max_results` set ([#815](https://www.github.com/googleapis/python-bigquery/issues/815)) ([3c1be14](https://www.github.com/googleapis/python-bigquery/commit/3c1be149e76b1d1d8879fdcf0924ddb1c1839e94)) +* retry ChunkedEncodingError by default ([#802](https://www.github.com/googleapis/python-bigquery/issues/802)) ([419d36d](https://www.github.com/googleapis/python-bigquery/commit/419d36d6b1887041e5795dbc8fc808890e91ab11)) + + +### Documentation + +* correct docs for `LoadJobConfig.destination_table_description` ([#810](https://www.github.com/googleapis/python-bigquery/issues/810)) ([da87fd9](https://www.github.com/googleapis/python-bigquery/commit/da87fd921cc8067b187d7985c978aac8eb58d107)) + ### [2.22.1](https://www.github.com/googleapis/python-bigquery/compare/v2.22.0...v2.22.1) (2021-07-22) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/version.py b/packages/google-cloud-bigquery/google/cloud/bigquery/version.py index dbc524478e00..416bf20ed647 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/version.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/version.py @@ -12,4 +12,4 @@ # See the License for the specific language governing permissions and # limitations under the License. -__version__ = "2.22.1" +__version__ = "2.23.0" From c655936f9c238ead721d0c6b37ca09d1155f9cec Mon Sep 17 00:00:00 2001 From: WhiteSource Renovate Date: Wed, 28 Jul 2021 16:30:44 +0200 Subject: [PATCH 1231/2016] chore(deps): update dependency google-cloud-bigquery to v2.23.0 (#820) --- .../google-cloud-bigquery/samples/geography/requirements.txt | 2 +- .../google-cloud-bigquery/samples/snippets/requirements.txt | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/packages/google-cloud-bigquery/samples/geography/requirements.txt b/packages/google-cloud-bigquery/samples/geography/requirements.txt index 3a83eda6413e..0f9c3a2e368b 100644 --- a/packages/google-cloud-bigquery/samples/geography/requirements.txt +++ b/packages/google-cloud-bigquery/samples/geography/requirements.txt @@ -1,4 +1,4 @@ geojson==2.5.0 -google-cloud-bigquery==2.22.1 +google-cloud-bigquery==2.23.0 google-cloud-bigquery-storage==2.6.0 Shapely==1.7.1 diff --git a/packages/google-cloud-bigquery/samples/snippets/requirements.txt b/packages/google-cloud-bigquery/samples/snippets/requirements.txt index ffa689a9e8df..81ef4df2f2be 100644 --- a/packages/google-cloud-bigquery/samples/snippets/requirements.txt +++ b/packages/google-cloud-bigquery/samples/snippets/requirements.txt @@ -1,4 +1,4 @@ -google-cloud-bigquery==2.22.1 +google-cloud-bigquery==2.23.0 google-cloud-bigquery-storage==2.6.0 google-auth-oauthlib==0.4.4 grpcio==1.39.0 From 35cd25d4defc6505935cab2c6bbc1ab60bd0da81 Mon Sep 17 00:00:00 2001 From: Peter Lamut Date: Wed, 28 Jul 2021 17:18:18 +0200 Subject: [PATCH 1232/2016] fix: `insert_rows()` accepts float column values as strings again (#824) --- .../google/cloud/bigquery/_helpers.py | 12 ++++++---- .../tests/unit/test__helpers.py | 24 +++++++++++++++++++ 2 files changed, 31 insertions(+), 5 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py b/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py index bf0f80e22192..0a1f71444ae0 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py @@ -19,6 +19,7 @@ import decimal import math import re +from typing import Union from google.cloud._helpers import UTC from google.cloud._helpers import _date_from_iso8601_date @@ -338,14 +339,15 @@ def _int_to_json(value): return value -def _float_to_json(value): +def _float_to_json(value) -> Union[None, str, float]: """Coerce 'value' to an JSON-compatible representation.""" if value is None: return None - elif math.isnan(value) or math.isinf(value): - return str(value) - else: - return float(value) + + if isinstance(value, str): + value = float(value) + + return str(value) if (math.isnan(value) or math.isinf(value)) else float(value) def _decimal_to_json(value): diff --git a/packages/google-cloud-bigquery/tests/unit/test__helpers.py b/packages/google-cloud-bigquery/tests/unit/test__helpers.py index af026ccbe58f..f8d00e67d8d6 100644 --- a/packages/google-cloud-bigquery/tests/unit/test__helpers.py +++ b/packages/google-cloud-bigquery/tests/unit/test__helpers.py @@ -690,21 +690,45 @@ def _call_fut(self, value): def test_w_none(self): self.assertEqual(self._call_fut(None), None) + def test_w_non_numeric(self): + with self.assertRaises(TypeError): + self._call_fut(object()) + + def test_w_integer(self): + result = self._call_fut(123) + self.assertIsInstance(result, float) + self.assertEqual(result, 123.0) + def test_w_float(self): self.assertEqual(self._call_fut(1.23), 1.23) + def test_w_float_as_string(self): + self.assertEqual(self._call_fut("1.23"), 1.23) + def test_w_nan(self): result = self._call_fut(float("nan")) self.assertEqual(result.lower(), "nan") + def test_w_nan_as_string(self): + result = self._call_fut("NaN") + self.assertEqual(result.lower(), "nan") + def test_w_infinity(self): result = self._call_fut(float("inf")) self.assertEqual(result.lower(), "inf") + def test_w_infinity_as_string(self): + result = self._call_fut("inf") + self.assertEqual(result.lower(), "inf") + def test_w_negative_infinity(self): result = self._call_fut(float("-inf")) self.assertEqual(result.lower(), "-inf") + def test_w_negative_infinity_as_string(self): + result = self._call_fut("-inf") + self.assertEqual(result.lower(), "-inf") + class Test_decimal_to_json(unittest.TestCase): def _call_fut(self, value): From 449d3a1a20e95a459cb85264031215f7204dee37 Mon Sep 17 00:00:00 2001 From: "release-please[bot]" <55107282+release-please[bot]@users.noreply.github.com> Date: Wed, 28 Jul 2021 10:45:37 -0500 Subject: [PATCH 1233/2016] chore: release 2.23.1 (#825) Co-authored-by: release-please[bot] <55107282+release-please[bot]@users.noreply.github.com> --- packages/google-cloud-bigquery/CHANGELOG.md | 7 +++++++ .../google-cloud-bigquery/google/cloud/bigquery/version.py | 2 +- 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/CHANGELOG.md b/packages/google-cloud-bigquery/CHANGELOG.md index 966a8744aa6f..be4eab7694d9 100644 --- a/packages/google-cloud-bigquery/CHANGELOG.md +++ b/packages/google-cloud-bigquery/CHANGELOG.md @@ -5,6 +5,13 @@ [1]: https://pypi.org/project/google-cloud-bigquery/#history +### [2.23.1](https://www.github.com/googleapis/python-bigquery/compare/v2.23.0...v2.23.1) (2021-07-28) + + +### Bug Fixes + +* `insert_rows()` accepts float column values as strings again ([#824](https://www.github.com/googleapis/python-bigquery/issues/824)) ([d9378af](https://www.github.com/googleapis/python-bigquery/commit/d9378af13add879118a1d004529b811f72c325d6)) + ## [2.23.0](https://www.github.com/googleapis/python-bigquery/compare/v2.22.1...v2.23.0) (2021-07-27) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/version.py b/packages/google-cloud-bigquery/google/cloud/bigquery/version.py index 416bf20ed647..0195d572ced3 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/version.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/version.py @@ -12,4 +12,4 @@ # See the License for the specific language governing permissions and # limitations under the License. -__version__ = "2.23.0" +__version__ = "2.23.1" From bac8f9854c53376ef8b66e0898f09fed9bec3b02 Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Wed, 28 Jul 2021 13:03:13 -0500 Subject: [PATCH 1234/2016] chore: add second protection rule for v3 branch (#828) --- .../.github/sync-repo-settings.yaml | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/.github/sync-repo-settings.yaml b/packages/google-cloud-bigquery/.github/sync-repo-settings.yaml index 2697f214cd93..cc69b2551eb4 100644 --- a/packages/google-cloud-bigquery/.github/sync-repo-settings.yaml +++ b/packages/google-cloud-bigquery/.github/sync-repo-settings.yaml @@ -3,7 +3,16 @@ branchProtectionRules: # Identifies the protection rule pattern. Name of the branch to be protected. # Defaults to `master` -- pattern: '{master,v3}' +- pattern: master + requiredStatusCheckContexts: + - 'Kokoro' + - 'Kokoro snippets-3.8' + - 'cla/google' + - 'Samples - Lint' + - 'Samples - Python 3.6' + - 'Samples - Python 3.7' + - 'Samples - Python 3.8' +- pattern: v3 requiredStatusCheckContexts: - 'Kokoro' - 'Kokoro snippets-3.8' From ff92859e0ef7e33f85a23e8a4994f54d4003eacb Mon Sep 17 00:00:00 2001 From: WhiteSource Renovate Date: Wed, 28 Jul 2021 21:43:43 +0200 Subject: [PATCH 1235/2016] chore(deps): update dependency google-cloud-bigquery to v2.23.1 (#827) --- .../google-cloud-bigquery/samples/geography/requirements.txt | 2 +- .../google-cloud-bigquery/samples/snippets/requirements.txt | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/packages/google-cloud-bigquery/samples/geography/requirements.txt b/packages/google-cloud-bigquery/samples/geography/requirements.txt index 0f9c3a2e368b..6f6e670ab5a2 100644 --- a/packages/google-cloud-bigquery/samples/geography/requirements.txt +++ b/packages/google-cloud-bigquery/samples/geography/requirements.txt @@ -1,4 +1,4 @@ geojson==2.5.0 -google-cloud-bigquery==2.23.0 +google-cloud-bigquery==2.23.1 google-cloud-bigquery-storage==2.6.0 Shapely==1.7.1 diff --git a/packages/google-cloud-bigquery/samples/snippets/requirements.txt b/packages/google-cloud-bigquery/samples/snippets/requirements.txt index 81ef4df2f2be..dd36b5fe4edb 100644 --- a/packages/google-cloud-bigquery/samples/snippets/requirements.txt +++ b/packages/google-cloud-bigquery/samples/snippets/requirements.txt @@ -1,4 +1,4 @@ -google-cloud-bigquery==2.23.0 +google-cloud-bigquery==2.23.1 google-cloud-bigquery-storage==2.6.0 google-auth-oauthlib==0.4.4 grpcio==1.39.0 From 036fbd3c1894e31644977d4871d656c49abca274 Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Thu, 29 Jul 2021 03:59:03 -0500 Subject: [PATCH 1236/2016] test: retry getting rows after streaming them in `test_insert_rows_from_dataframe` (#832) --- .../tests/system/test_pandas.py | 40 ++++++++++++------- 1 file changed, 26 insertions(+), 14 deletions(-) diff --git a/packages/google-cloud-bigquery/tests/system/test_pandas.py b/packages/google-cloud-bigquery/tests/system/test_pandas.py index ddf5eaf43c2c..821b375e1969 100644 --- a/packages/google-cloud-bigquery/tests/system/test_pandas.py +++ b/packages/google-cloud-bigquery/tests/system/test_pandas.py @@ -21,6 +21,7 @@ import io import operator +import google.api_core.retry import pkg_resources import pytest import pytz @@ -41,6 +42,10 @@ PANDAS_INT64_VERSION = pkg_resources.parse_version("1.0.0") +class MissingDataError(Exception): + pass + + def test_load_table_from_dataframe_w_automatic_schema(bigquery_client, dataset_id): """Test that a DataFrame with dtypes that map well to BigQuery types can be uploaded without specifying a schema. @@ -666,19 +671,6 @@ def test_insert_rows_from_dataframe(bigquery_client, dataset_id): ) for errors in chunk_errors: assert not errors - - # Use query to fetch rows instead of listing directly from the table so - # that we get values from the streaming buffer. - rows = list( - bigquery_client.query( - "SELECT * FROM `{}.{}.{}`".format( - table.project, table.dataset_id, table.table_id - ) - ) - ) - - sorted_rows = sorted(rows, key=operator.attrgetter("int_col")) - row_tuples = [r.values() for r in sorted_rows] expected = [ # Pandas often represents NULL values as NaN. Convert to None for # easier comparison. @@ -686,7 +678,27 @@ def test_insert_rows_from_dataframe(bigquery_client, dataset_id): for data_row in dataframe.itertuples(index=False) ] - assert len(row_tuples) == len(expected) + # Use query to fetch rows instead of listing directly from the table so + # that we get values from the streaming buffer "within a few seconds". + # https://cloud.google.com/bigquery/streaming-data-into-bigquery#dataavailability + @google.api_core.retry.Retry( + predicate=google.api_core.retry.if_exception_type(MissingDataError) + ) + def get_rows(): + rows = list( + bigquery_client.query( + "SELECT * FROM `{}.{}.{}`".format( + table.project, table.dataset_id, table.table_id + ) + ) + ) + if len(rows) != len(expected): + raise MissingDataError() + return rows + + rows = get_rows() + sorted_rows = sorted(rows, key=operator.attrgetter("int_col")) + row_tuples = [r.values() for r in sorted_rows] for row, expected_row in zip(row_tuples, expected): assert ( From 0502c0d4ac2ed93d5e5f6190fa5063eaa244a23b Mon Sep 17 00:00:00 2001 From: WhiteSource Renovate Date: Thu, 29 Jul 2021 13:00:02 +0200 Subject: [PATCH 1237/2016] chore(deps): update dependency pyarrow to v5 (#834) --- .../google-cloud-bigquery/samples/snippets/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/samples/snippets/requirements.txt b/packages/google-cloud-bigquery/samples/snippets/requirements.txt index dd36b5fe4edb..73badd1f32e5 100644 --- a/packages/google-cloud-bigquery/samples/snippets/requirements.txt +++ b/packages/google-cloud-bigquery/samples/snippets/requirements.txt @@ -8,5 +8,5 @@ matplotlib==3.3.4; python_version < '3.7' matplotlib==3.4.1; python_version >= '3.7' pandas==1.1.5; python_version < '3.7' pandas==1.2.0; python_version >= '3.7' -pyarrow==4.0.1 +pyarrow==5.0.0 pytz==2021.1 From f0792fc428a2013339bd8d8a1ccba5cda813ae72 Mon Sep 17 00:00:00 2001 From: WhiteSource Renovate Date: Thu, 29 Jul 2021 13:00:38 +0200 Subject: [PATCH 1238/2016] chore(deps): update dependency google-cloud-bigquery-storage to v2.6.2 (#795) --- .../google-cloud-bigquery/samples/geography/requirements.txt | 2 +- .../google-cloud-bigquery/samples/snippets/requirements.txt | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/packages/google-cloud-bigquery/samples/geography/requirements.txt b/packages/google-cloud-bigquery/samples/geography/requirements.txt index 6f6e670ab5a2..eca0275a534c 100644 --- a/packages/google-cloud-bigquery/samples/geography/requirements.txt +++ b/packages/google-cloud-bigquery/samples/geography/requirements.txt @@ -1,4 +1,4 @@ geojson==2.5.0 google-cloud-bigquery==2.23.1 -google-cloud-bigquery-storage==2.6.0 +google-cloud-bigquery-storage==2.6.2 Shapely==1.7.1 diff --git a/packages/google-cloud-bigquery/samples/snippets/requirements.txt b/packages/google-cloud-bigquery/samples/snippets/requirements.txt index 73badd1f32e5..8f4ea0406367 100644 --- a/packages/google-cloud-bigquery/samples/snippets/requirements.txt +++ b/packages/google-cloud-bigquery/samples/snippets/requirements.txt @@ -1,5 +1,5 @@ google-cloud-bigquery==2.23.1 -google-cloud-bigquery-storage==2.6.0 +google-cloud-bigquery-storage==2.6.2 google-auth-oauthlib==0.4.4 grpcio==1.39.0 ipython==7.16.1; python_version < '3.7' From ba6545049001e2a2430d0341f6cea1d43d139fd7 Mon Sep 17 00:00:00 2001 From: Peter Lamut Date: Thu, 29 Jul 2021 16:42:35 +0200 Subject: [PATCH 1239/2016] deps: expand pyarrow pins to support 5.x releases (#833) --- packages/google-cloud-bigquery/setup.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/packages/google-cloud-bigquery/setup.py b/packages/google-cloud-bigquery/setup.py index 0ca19b5761bd..e9deaf117a91 100644 --- a/packages/google-cloud-bigquery/setup.py +++ b/packages/google-cloud-bigquery/setup.py @@ -54,10 +54,10 @@ # grpc.Channel.close() method isn't added until 1.32.0. # https://github.com/grpc/grpc/pull/15254 "grpcio >= 1.38.1, < 2.0dev", - "pyarrow >= 1.0.0, < 5.0dev", + "pyarrow >= 1.0.0, < 6.0dev", ], - "pandas": ["pandas>=0.23.0", "pyarrow >= 1.0.0, < 5.0dev"], - "bignumeric_type": ["pyarrow >= 3.0.0, < 5.0dev"], + "pandas": ["pandas>=0.23.0", "pyarrow >= 1.0.0, < 6.0dev"], + "bignumeric_type": ["pyarrow >= 3.0.0, < 6.0dev"], "tqdm": ["tqdm >= 4.7.4, <5.0.0dev"], "opentelemetry": [ "opentelemetry-api >= 0.11b0", From 75179dfc7d33b40bdc65825e751e705f2ea96043 Mon Sep 17 00:00:00 2001 From: "release-please[bot]" <55107282+release-please[bot]@users.noreply.github.com> Date: Thu, 29 Jul 2021 10:04:05 -0500 Subject: [PATCH 1240/2016] chore: release 2.23.2 (#835) Co-authored-by: release-please[bot] <55107282+release-please[bot]@users.noreply.github.com> --- packages/google-cloud-bigquery/CHANGELOG.md | 7 +++++++ .../google-cloud-bigquery/google/cloud/bigquery/version.py | 2 +- 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/CHANGELOG.md b/packages/google-cloud-bigquery/CHANGELOG.md index be4eab7694d9..0c08e791009a 100644 --- a/packages/google-cloud-bigquery/CHANGELOG.md +++ b/packages/google-cloud-bigquery/CHANGELOG.md @@ -5,6 +5,13 @@ [1]: https://pypi.org/project/google-cloud-bigquery/#history +### [2.23.2](https://www.github.com/googleapis/python-bigquery/compare/v2.23.1...v2.23.2) (2021-07-29) + + +### Dependencies + +* expand pyarrow pins to support 5.x releases ([#833](https://www.github.com/googleapis/python-bigquery/issues/833)) ([80e3a61](https://www.github.com/googleapis/python-bigquery/commit/80e3a61c60419fb19b70b664c6415cd01ba82f5b)) + ### [2.23.1](https://www.github.com/googleapis/python-bigquery/compare/v2.23.0...v2.23.1) (2021-07-28) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/version.py b/packages/google-cloud-bigquery/google/cloud/bigquery/version.py index 0195d572ced3..0460e7bb9a4a 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/version.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/version.py @@ -12,4 +12,4 @@ # See the License for the specific language governing permissions and # limitations under the License. -__version__ = "2.23.1" +__version__ = "2.23.2" From a8630e06d0ad7d3f2d5277c215fa1a2b970a7425 Mon Sep 17 00:00:00 2001 From: WhiteSource Renovate Date: Thu, 29 Jul 2021 19:57:59 +0200 Subject: [PATCH 1241/2016] chore(deps): update dependency google-auth-oauthlib to v0.4.5 (#839) --- .../google-cloud-bigquery/samples/snippets/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/samples/snippets/requirements.txt b/packages/google-cloud-bigquery/samples/snippets/requirements.txt index 8f4ea0406367..d7a99a8bdef3 100644 --- a/packages/google-cloud-bigquery/samples/snippets/requirements.txt +++ b/packages/google-cloud-bigquery/samples/snippets/requirements.txt @@ -1,6 +1,6 @@ google-cloud-bigquery==2.23.1 google-cloud-bigquery-storage==2.6.2 -google-auth-oauthlib==0.4.4 +google-auth-oauthlib==0.4.5 grpcio==1.39.0 ipython==7.16.1; python_version < '3.7' ipython==7.17.0; python_version >= '3.7' From 668839efd11f5102ea606b3ba1e8797e04d8f32c Mon Sep 17 00:00:00 2001 From: WhiteSource Renovate Date: Thu, 29 Jul 2021 21:36:10 +0200 Subject: [PATCH 1242/2016] chore(deps): update dependency google-cloud-bigquery to v2.23.2 (#838) --- .../google-cloud-bigquery/samples/geography/requirements.txt | 2 +- .../google-cloud-bigquery/samples/snippets/requirements.txt | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/packages/google-cloud-bigquery/samples/geography/requirements.txt b/packages/google-cloud-bigquery/samples/geography/requirements.txt index eca0275a534c..5aa967b24634 100644 --- a/packages/google-cloud-bigquery/samples/geography/requirements.txt +++ b/packages/google-cloud-bigquery/samples/geography/requirements.txt @@ -1,4 +1,4 @@ geojson==2.5.0 -google-cloud-bigquery==2.23.1 +google-cloud-bigquery==2.23.2 google-cloud-bigquery-storage==2.6.2 Shapely==1.7.1 diff --git a/packages/google-cloud-bigquery/samples/snippets/requirements.txt b/packages/google-cloud-bigquery/samples/snippets/requirements.txt index d7a99a8bdef3..4f2eaf90bbaa 100644 --- a/packages/google-cloud-bigquery/samples/snippets/requirements.txt +++ b/packages/google-cloud-bigquery/samples/snippets/requirements.txt @@ -1,4 +1,4 @@ -google-cloud-bigquery==2.23.1 +google-cloud-bigquery==2.23.2 google-cloud-bigquery-storage==2.6.2 google-auth-oauthlib==0.4.5 grpcio==1.39.0 From 455e4da71d8eb132e1c32a3fa85fbdfbd25863a9 Mon Sep 17 00:00:00 2001 From: WhiteSource Renovate Date: Tue, 3 Aug 2021 03:14:34 +0200 Subject: [PATCH 1243/2016] chore(deps): update dependency google-cloud-testutils to v1 (#845) --- .../samples/snippets/requirements-test.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/samples/snippets/requirements-test.txt b/packages/google-cloud-bigquery/samples/snippets/requirements-test.txt index 9e9d4e40f406..b8dee50d040a 100644 --- a/packages/google-cloud-bigquery/samples/snippets/requirements-test.txt +++ b/packages/google-cloud-bigquery/samples/snippets/requirements-test.txt @@ -1,3 +1,3 @@ -google-cloud-testutils==0.3.0 +google-cloud-testutils==1.0.0 pytest==6.2.4 mock==4.0.3 From f73ed02c9a9d0f8313a36fff7596e4d03518688e Mon Sep 17 00:00:00 2001 From: Bu Sun Kim <8822365+busunkim96@users.noreply.github.com> Date: Mon, 2 Aug 2021 19:20:21 -0600 Subject: [PATCH 1244/2016] chore: require CODEOWNER review and up to date branches (#846) These two lines bring the rules on this repo in line with the defaults: https://github.com/googleapis/repo-automation-bots/blob/63c858e539e1f4d9bb8ea66e12f9c0a0de5fef55/packages/sync-repo-settings/src/required-checks.json#L40-L50 --- .../google-cloud-bigquery/.github/sync-repo-settings.yaml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/packages/google-cloud-bigquery/.github/sync-repo-settings.yaml b/packages/google-cloud-bigquery/.github/sync-repo-settings.yaml index cc69b2551eb4..8634a304314a 100644 --- a/packages/google-cloud-bigquery/.github/sync-repo-settings.yaml +++ b/packages/google-cloud-bigquery/.github/sync-repo-settings.yaml @@ -4,6 +4,8 @@ branchProtectionRules: # Identifies the protection rule pattern. Name of the branch to be protected. # Defaults to `master` - pattern: master + requiresCodeOwnerReviews: true + requiresStrictStatusChecks: true requiredStatusCheckContexts: - 'Kokoro' - 'Kokoro snippets-3.8' @@ -13,6 +15,8 @@ branchProtectionRules: - 'Samples - Python 3.7' - 'Samples - Python 3.8' - pattern: v3 + requiresCodeOwnerReviews: true + requiresStrictStatusChecks: true requiredStatusCheckContexts: - 'Kokoro' - 'Kokoro snippets-3.8' From 848a41a5fdbe1cfc9ccccd5ae1d67b5b47e2e14d Mon Sep 17 00:00:00 2001 From: Bu Sun Kim <8822365+busunkim96@users.noreply.github.com> Date: Thu, 5 Aug 2021 08:59:15 -0600 Subject: [PATCH 1245/2016] chore: add api-bigquery as a samples owner (#852) --- packages/google-cloud-bigquery/.github/CODEOWNERS | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/.github/CODEOWNERS b/packages/google-cloud-bigquery/.github/CODEOWNERS index ae570eb01219..76112476b4a6 100644 --- a/packages/google-cloud-bigquery/.github/CODEOWNERS +++ b/packages/google-cloud-bigquery/.github/CODEOWNERS @@ -8,4 +8,4 @@ * @googleapis/api-bigquery @googleapis/yoshi-python # The python-samples-reviewers team is the default owner for samples changes -/samples/ @googleapis/python-samples-owners +/samples/ @googleapis/api-bigquery @googleapis/python-samples-owners From fc0b56c95d80b63b1b4e9c33bffcc20ceb697772 Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Fri, 6 Aug 2021 12:14:24 -0500 Subject: [PATCH 1246/2016] fix: increase default retry deadline to 10 minutes (#859) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The backend API has a timeout of 4 minutes, so the default of 2 minutes was not allowing for any retries to happen in some cases. Thank you for opening a Pull Request! Before submitting your PR, there are a few things you can do to make sure it goes smoothly: - [ ] Make sure to open an issue as a [bug/issue](https://github.com/googleapis/python-bigquery/issues/new/choose) before writing your code! That way we can discuss the change, evaluate designs, and agree on the general idea - [ ] Ensure the tests and linter pass - [ ] Code coverage does not decrease (if any source code was changed) - [ ] Appropriate docs were updated (if necessary) Fixes #853 🦕 --- packages/google-cloud-bigquery/google/cloud/bigquery/retry.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/retry.py b/packages/google-cloud-bigquery/google/cloud/bigquery/retry.py index 2df4de08b856..bab28aacb786 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/retry.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/retry.py @@ -47,7 +47,7 @@ def _should_retry(exc): return reason in _RETRYABLE_REASONS -DEFAULT_RETRY = retry.Retry(predicate=_should_retry) +DEFAULT_RETRY = retry.Retry(predicate=_should_retry, deadline=600.0) """The default retry object. Any method with a ``retry`` parameter will be retried automatically, From dbe83084ce77c87008ec10efa01169cebfeff2d2 Mon Sep 17 00:00:00 2001 From: Peter Lamut Date: Fri, 6 Aug 2021 22:34:42 +0200 Subject: [PATCH 1247/2016] process: add yoshi-python to samples CODEOWNERS (#858) Closes #857. --- packages/google-cloud-bigquery/.github/CODEOWNERS | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/.github/CODEOWNERS b/packages/google-cloud-bigquery/.github/CODEOWNERS index 76112476b4a6..6763f258cdb5 100644 --- a/packages/google-cloud-bigquery/.github/CODEOWNERS +++ b/packages/google-cloud-bigquery/.github/CODEOWNERS @@ -8,4 +8,4 @@ * @googleapis/api-bigquery @googleapis/yoshi-python # The python-samples-reviewers team is the default owner for samples changes -/samples/ @googleapis/api-bigquery @googleapis/python-samples-owners +/samples/ @googleapis/api-bigquery @googleapis/python-samples-owners @googleapis/yoshi-python From 6da27cd819e538c7ebd3904db325d99e1c29227f Mon Sep 17 00:00:00 2001 From: "release-please[bot]" <55107282+release-please[bot]@users.noreply.github.com> Date: Mon, 9 Aug 2021 12:24:20 -0500 Subject: [PATCH 1248/2016] chore: release 2.23.3 (#860) Co-authored-by: release-please[bot] <55107282+release-please[bot]@users.noreply.github.com> Co-authored-by: Tim Swast --- packages/google-cloud-bigquery/CHANGELOG.md | 7 +++++++ .../google-cloud-bigquery/google/cloud/bigquery/version.py | 2 +- 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/CHANGELOG.md b/packages/google-cloud-bigquery/CHANGELOG.md index 0c08e791009a..856f1ecd1c0a 100644 --- a/packages/google-cloud-bigquery/CHANGELOG.md +++ b/packages/google-cloud-bigquery/CHANGELOG.md @@ -5,6 +5,13 @@ [1]: https://pypi.org/project/google-cloud-bigquery/#history +### [2.23.3](https://www.github.com/googleapis/python-bigquery/compare/v2.23.2...v2.23.3) (2021-08-06) + + +### Bug Fixes + +* increase default retry deadline to 10 minutes ([#859](https://www.github.com/googleapis/python-bigquery/issues/859)) ([30770fd](https://www.github.com/googleapis/python-bigquery/commit/30770fd0575fbd5aaa70c14196a4cc54627aecd2)) + ### [2.23.2](https://www.github.com/googleapis/python-bigquery/compare/v2.23.1...v2.23.2) (2021-07-29) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/version.py b/packages/google-cloud-bigquery/google/cloud/bigquery/version.py index 0460e7bb9a4a..df992a051baf 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/version.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/version.py @@ -12,4 +12,4 @@ # See the License for the specific language governing permissions and # limitations under the License. -__version__ = "2.23.2" +__version__ = "2.23.3" From b8a920941e7dfcf01a3714291598842a39998825 Mon Sep 17 00:00:00 2001 From: WhiteSource Renovate Date: Mon, 9 Aug 2021 20:10:11 +0200 Subject: [PATCH 1249/2016] chore(deps): update dependency google-cloud-bigquery to v2.23.3 (#866) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [![WhiteSource Renovate](https://app.renovatebot.com/images/banner.svg)](https://renovatebot.com) This PR contains the following updates: | Package | Change | Age | Adoption | Passing | Confidence | |---|---|---|---|---|---| | [google-cloud-bigquery](https://togithub.com/googleapis/python-bigquery) | `==2.23.2` -> `==2.23.3` | [![age](https://badges.renovateapi.com/packages/pypi/google-cloud-bigquery/2.23.3/age-slim)](https://docs.renovatebot.com/merge-confidence/) | [![adoption](https://badges.renovateapi.com/packages/pypi/google-cloud-bigquery/2.23.3/adoption-slim)](https://docs.renovatebot.com/merge-confidence/) | [![passing](https://badges.renovateapi.com/packages/pypi/google-cloud-bigquery/2.23.3/compatibility-slim/2.23.2)](https://docs.renovatebot.com/merge-confidence/) | [![confidence](https://badges.renovateapi.com/packages/pypi/google-cloud-bigquery/2.23.3/confidence-slim/2.23.2)](https://docs.renovatebot.com/merge-confidence/) | *** ### Release Notes
googleapis/python-bigquery ### [`v2.23.3`](https://togithub.com/googleapis/python-bigquery/blob/master/CHANGELOG.md#​2233-httpswwwgithubcomgoogleapispython-bigquerycomparev2232v2233-2021-08-06) [Compare Source](https://togithub.com/googleapis/python-bigquery/compare/v2.23.2...v2.23.3)
*** ### Configuration 📅 **Schedule**: At any time (no schedule defined). 🚦 **Automerge**: Disabled by config. Please merge this manually once you are satisfied. ♻ **Rebasing**: Whenever PR becomes conflicted, or you tick the rebase/retry checkbox. 🔕 **Ignore**: Close this PR and you won't be reminded about this update again. *** * \[ ] If you want to rebase/retry this PR, check this box. *** This PR has been generated by [WhiteSource Renovate](https://renovate.whitesourcesoftware.com). View repository job log [here](https://app.renovatebot.com/dashboard#github/googleapis/python-bigquery). --- .../google-cloud-bigquery/samples/geography/requirements.txt | 2 +- .../google-cloud-bigquery/samples/snippets/requirements.txt | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/packages/google-cloud-bigquery/samples/geography/requirements.txt b/packages/google-cloud-bigquery/samples/geography/requirements.txt index 5aa967b24634..d55d0f254d67 100644 --- a/packages/google-cloud-bigquery/samples/geography/requirements.txt +++ b/packages/google-cloud-bigquery/samples/geography/requirements.txt @@ -1,4 +1,4 @@ geojson==2.5.0 -google-cloud-bigquery==2.23.2 +google-cloud-bigquery==2.23.3 google-cloud-bigquery-storage==2.6.2 Shapely==1.7.1 diff --git a/packages/google-cloud-bigquery/samples/snippets/requirements.txt b/packages/google-cloud-bigquery/samples/snippets/requirements.txt index 4f2eaf90bbaa..69f537de4f81 100644 --- a/packages/google-cloud-bigquery/samples/snippets/requirements.txt +++ b/packages/google-cloud-bigquery/samples/snippets/requirements.txt @@ -1,4 +1,4 @@ -google-cloud-bigquery==2.23.2 +google-cloud-bigquery==2.23.3 google-cloud-bigquery-storage==2.6.2 google-auth-oauthlib==0.4.5 grpcio==1.39.0 From f467b697099e5b91d3a46e082e54c3941dd68ff9 Mon Sep 17 00:00:00 2001 From: Peter Lamut Date: Tue, 10 Aug 2021 19:21:41 +0200 Subject: [PATCH 1250/2016] feat: add support for transaction statistics (#849) * feat: add support for transaction statistics * Hoist transaction_info into base job class * Add versionadded directive to new property and class * Include new class in docs reference --- .../google-cloud-bigquery/docs/reference.rst | 1 + .../google/cloud/bigquery/__init__.py | 2 ++ .../google/cloud/bigquery/job/__init__.py | 2 ++ .../google/cloud/bigquery/job/base.py | 29 ++++++++++++++++ .../tests/system/test_client.py | 34 +++++++++++++++++++ .../tests/unit/job/helpers.py | 1 + .../tests/unit/job/test_base.py | 14 ++++++++ .../tests/unit/job/test_query.py | 29 ++++++++++++++++ 8 files changed, 112 insertions(+) diff --git a/packages/google-cloud-bigquery/docs/reference.rst b/packages/google-cloud-bigquery/docs/reference.rst index 8a5bff9a47c4..5ac59637084d 100644 --- a/packages/google-cloud-bigquery/docs/reference.rst +++ b/packages/google-cloud-bigquery/docs/reference.rst @@ -68,6 +68,7 @@ Job-Related Types job.SourceFormat job.WriteDisposition job.SchemaUpdateOption + job.TransactionInfo Dataset diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/__init__.py b/packages/google-cloud-bigquery/google/cloud/bigquery/__init__.py index 222aadcc9208..a7a0da3dd284 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/__init__.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/__init__.py @@ -70,6 +70,7 @@ from google.cloud.bigquery.job import ScriptOptions from google.cloud.bigquery.job import SourceFormat from google.cloud.bigquery.job import UnknownJob +from google.cloud.bigquery.job import TransactionInfo from google.cloud.bigquery.job import WriteDisposition from google.cloud.bigquery.model import Model from google.cloud.bigquery.model import ModelReference @@ -149,6 +150,7 @@ "GoogleSheetsOptions", "ParquetOptions", "ScriptOptions", + "TransactionInfo", "DEFAULT_RETRY", # Enum Constants "enums", diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/job/__init__.py b/packages/google-cloud-bigquery/google/cloud/bigquery/job/__init__.py index 4c16d0e20219..f51311b0bb01 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/job/__init__.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/job/__init__.py @@ -22,6 +22,7 @@ from google.cloud.bigquery.job.base import ReservationUsage from google.cloud.bigquery.job.base import ScriptStatistics from google.cloud.bigquery.job.base import ScriptStackFrame +from google.cloud.bigquery.job.base import TransactionInfo from google.cloud.bigquery.job.base import UnknownJob from google.cloud.bigquery.job.copy_ import CopyJob from google.cloud.bigquery.job.copy_ import CopyJobConfig @@ -81,5 +82,6 @@ "QueryPriority", "SchemaUpdateOption", "SourceFormat", + "TransactionInfo", "WriteDisposition", ] diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/job/base.py b/packages/google-cloud-bigquery/google/cloud/bigquery/job/base.py index 20ad81c0b9e1..e5fc592a6006 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/job/base.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/job/base.py @@ -19,6 +19,7 @@ import http import threading import typing +from typing import Dict, Optional from google.api_core import exceptions import google.api_core.future.polling @@ -88,6 +89,22 @@ def _error_result_to_exception(error_result): ) +class TransactionInfo(typing.NamedTuple): + """[Alpha] Information of a multi-statement transaction. + + https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#TransactionInfo + + .. versionadded:: 2.24.0 + """ + + transaction_id: str + """Output only. ID of the transaction.""" + + @classmethod + def from_api_repr(cls, transaction_info: Dict[str, str]) -> "TransactionInfo": + return cls(transaction_info["transactionId"]) + + class _JobReference(object): """A reference to a job. @@ -336,6 +353,18 @@ def reservation_usage(self): for usage in usage_stats_raw ] + @property + def transaction_info(self) -> Optional[TransactionInfo]: + """Information of the multi-statement transaction if this job is part of one. + + .. versionadded:: 2.24.0 + """ + info = self._properties.get("statistics", {}).get("transactionInfo") + if info is None: + return None + else: + return TransactionInfo.from_api_repr(info) + @property def error_result(self): """Error information about the job as a whole. diff --git a/packages/google-cloud-bigquery/tests/system/test_client.py b/packages/google-cloud-bigquery/tests/system/test_client.py index baa2b6ad8159..f540611a63c1 100644 --- a/packages/google-cloud-bigquery/tests/system/test_client.py +++ b/packages/google-cloud-bigquery/tests/system/test_client.py @@ -1557,6 +1557,40 @@ def test_dml_statistics(self): assert query_job.dml_stats.updated_row_count == 0 assert query_job.dml_stats.deleted_row_count == 3 + def test_transaction_info(self): + table_schema = ( + bigquery.SchemaField("foo", "STRING"), + bigquery.SchemaField("bar", "INTEGER"), + ) + + dataset_id = _make_dataset_id("bq_system_test") + self.temp_dataset(dataset_id) + table_id = f"{Config.CLIENT.project}.{dataset_id}.test_dml_statistics" + + # Create the table before loading so that the column order is deterministic. + table = helpers.retry_403(Config.CLIENT.create_table)( + Table(table_id, schema=table_schema) + ) + self.to_delete.insert(0, table) + + # Insert a few rows and check the stats. + sql = f""" + BEGIN TRANSACTION; + INSERT INTO `{table_id}` + VALUES ("one", 1), ("two", 2), ("three", 3), ("four", 4); + + UPDATE `{table_id}` + SET bar = bar + 1 + WHERE bar > 2; + COMMIT TRANSACTION; + """ + query_job = Config.CLIENT.query(sql) + query_job.result() + + # Transaction ID set by the server should be accessible + assert query_job.transaction_info is not None + assert query_job.transaction_info.transaction_id != "" + def test_dbapi_w_standard_sql_types(self): for sql, expected in helpers.STANDARD_SQL_EXAMPLES: Config.CURSOR.execute(sql) diff --git a/packages/google-cloud-bigquery/tests/unit/job/helpers.py b/packages/google-cloud-bigquery/tests/unit/job/helpers.py index ea071c5acd59..c792214e7579 100644 --- a/packages/google-cloud-bigquery/tests/unit/job/helpers.py +++ b/packages/google-cloud-bigquery/tests/unit/job/helpers.py @@ -162,6 +162,7 @@ def _verifyInitialReadonlyProperties(self, job): self.assertIsNone(job.created) self.assertIsNone(job.started) self.assertIsNone(job.ended) + self.assertIsNone(job.transaction_info) # derived from resource['status'] self.assertIsNone(job.error_result) diff --git a/packages/google-cloud-bigquery/tests/unit/job/test_base.py b/packages/google-cloud-bigquery/tests/unit/job/test_base.py index 405ad6ee58e7..0ac1d05b5363 100644 --- a/packages/google-cloud-bigquery/tests/unit/job/test_base.py +++ b/packages/google-cloud-bigquery/tests/unit/job/test_base.py @@ -227,6 +227,20 @@ def test_script_statistics(self): self.assertEqual(stack_frame.end_column, 14) self.assertEqual(stack_frame.text, "QUERY TEXT") + def test_transaction_info(self): + from google.cloud.bigquery.job.base import TransactionInfo + + client = _make_client(project=self.PROJECT) + job = self._make_one(self.JOB_ID, client) + assert job.transaction_info is None + + statistics = job._properties["statistics"] = {} + assert job.transaction_info is None + + statistics["transactionInfo"] = {"transactionId": "123-abc-xyz"} + assert isinstance(job.transaction_info, TransactionInfo) + assert job.transaction_info.transaction_id == "123-abc-xyz" + def test_num_child_jobs(self): client = _make_client(project=self.PROJECT) job = self._make_one(self.JOB_ID, client) diff --git a/packages/google-cloud-bigquery/tests/unit/job/test_query.py b/packages/google-cloud-bigquery/tests/unit/job/test_query.py index 482f7f3afac1..d4137052006d 100644 --- a/packages/google-cloud-bigquery/tests/unit/job/test_query.py +++ b/packages/google-cloud-bigquery/tests/unit/job/test_query.py @@ -128,6 +128,18 @@ def _verify_dml_stats_resource_properties(self, job, resource): else: assert job.dml_stats is None + def _verify_transaction_info_resource_properties(self, job, resource): + resource_stats = resource.get("statistics", {}) + + if "transactionInfo" in resource_stats: + resource_transaction_info = resource_stats["transactionInfo"] + job_transaction_info = job.transaction_info + assert job_transaction_info.transaction_id == resource_transaction_info.get( + "transactionId" + ) + else: + assert job.transaction_info is None + def _verify_configuration_properties(self, job, configuration): if "dryRun" in configuration: self.assertEqual(job.dry_run, configuration["dryRun"]) @@ -137,6 +149,7 @@ def _verify_configuration_properties(self, job, configuration): def _verifyResourceProperties(self, job, resource): self._verifyReadonlyResourceProperties(job, resource) self._verify_dml_stats_resource_properties(job, resource) + self._verify_transaction_info_resource_properties(job, resource) configuration = resource.get("configuration", {}) self._verify_configuration_properties(job, configuration) @@ -325,6 +338,22 @@ def test_from_api_repr_with_dml_stats(self): self.assertIs(job._client, client) self._verifyResourceProperties(job, RESOURCE) + def test_from_api_repr_with_transaction_info(self): + self._setUpConstants() + client = _make_client(project=self.PROJECT) + RESOURCE = { + "id": self.JOB_ID, + "jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID}, + "configuration": {"query": {"query": self.QUERY}}, + "statistics": {"transactionInfo": {"transactionId": "1a2b-3c4d"}}, + } + klass = self._get_target_class() + + job = klass.from_api_repr(RESOURCE, client=client) + + self.assertIs(job._client, client) + self._verifyResourceProperties(job, RESOURCE) + def test_from_api_repr_w_properties(self): from google.cloud.bigquery.job import CreateDisposition from google.cloud.bigquery.job import SchemaUpdateOption From 0ddf81056fc356367cd2d9fee5437bcc4f18ba8a Mon Sep 17 00:00:00 2001 From: WhiteSource Renovate Date: Tue, 10 Aug 2021 20:02:10 +0200 Subject: [PATCH 1251/2016] chore(deps): update dependency google-cloud-bigquery-storage to v2.6.3 (#863) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [![WhiteSource Renovate](https://app.renovatebot.com/images/banner.svg)](https://renovatebot.com) This PR contains the following updates: | Package | Change | Age | Adoption | Passing | Confidence | |---|---|---|---|---|---| | [google-cloud-bigquery-storage](https://togithub.com/googleapis/python-bigquery-storage) | `==2.6.2` -> `==2.6.3` | [![age](https://badges.renovateapi.com/packages/pypi/google-cloud-bigquery-storage/2.6.3/age-slim)](https://docs.renovatebot.com/merge-confidence/) | [![adoption](https://badges.renovateapi.com/packages/pypi/google-cloud-bigquery-storage/2.6.3/adoption-slim)](https://docs.renovatebot.com/merge-confidence/) | [![passing](https://badges.renovateapi.com/packages/pypi/google-cloud-bigquery-storage/2.6.3/compatibility-slim/2.6.2)](https://docs.renovatebot.com/merge-confidence/) | [![confidence](https://badges.renovateapi.com/packages/pypi/google-cloud-bigquery-storage/2.6.3/confidence-slim/2.6.2)](https://docs.renovatebot.com/merge-confidence/) | *** ### Release Notes
googleapis/python-bigquery-storage ### [`v2.6.3`](https://togithub.com/googleapis/python-bigquery-storage/blob/master/CHANGELOG.md#​263-httpswwwgithubcomgoogleapispython-bigquery-storagecomparev262v263-2021-08-06) [Compare Source](https://togithub.com/googleapis/python-bigquery-storage/compare/v2.6.2...v2.6.3)
*** ### Configuration 📅 **Schedule**: At any time (no schedule defined). 🚦 **Automerge**: Disabled by config. Please merge this manually once you are satisfied. ♻ **Rebasing**: Whenever PR becomes conflicted, or you tick the rebase/retry checkbox. 🔕 **Ignore**: Close this PR and you won't be reminded about this update again. *** * \[x] If you want to rebase/retry this PR, check this box. *** This PR has been generated by [WhiteSource Renovate](https://renovate.whitesourcesoftware.com). View repository job log [here](https://app.renovatebot.com/dashboard#github/googleapis/python-bigquery). --- .../google-cloud-bigquery/samples/geography/requirements.txt | 2 +- .../google-cloud-bigquery/samples/snippets/requirements.txt | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/packages/google-cloud-bigquery/samples/geography/requirements.txt b/packages/google-cloud-bigquery/samples/geography/requirements.txt index d55d0f254d67..d3e599101bb4 100644 --- a/packages/google-cloud-bigquery/samples/geography/requirements.txt +++ b/packages/google-cloud-bigquery/samples/geography/requirements.txt @@ -1,4 +1,4 @@ geojson==2.5.0 google-cloud-bigquery==2.23.3 -google-cloud-bigquery-storage==2.6.2 +google-cloud-bigquery-storage==2.6.3 Shapely==1.7.1 diff --git a/packages/google-cloud-bigquery/samples/snippets/requirements.txt b/packages/google-cloud-bigquery/samples/snippets/requirements.txt index 69f537de4f81..1545ed96e2be 100644 --- a/packages/google-cloud-bigquery/samples/snippets/requirements.txt +++ b/packages/google-cloud-bigquery/samples/snippets/requirements.txt @@ -1,5 +1,5 @@ google-cloud-bigquery==2.23.3 -google-cloud-bigquery-storage==2.6.2 +google-cloud-bigquery-storage==2.6.3 google-auth-oauthlib==0.4.5 grpcio==1.39.0 ipython==7.16.1; python_version < '3.7' From d1ea44aee0efdd485e86ed91118809e86d186449 Mon Sep 17 00:00:00 2001 From: "gcf-owl-bot[bot]" <78513119+gcf-owl-bot[bot]@users.noreply.github.com> Date: Wed, 11 Aug 2021 12:29:39 +0200 Subject: [PATCH 1252/2016] chore: fix INSTALL_LIBRARY_FROM_SOURCE in noxfile.py (#869) Source-Link: https://github.com/googleapis/synthtool/commit/6252f2cd074c38f37b44abe5e96d128733eb1b61 Post-Processor: gcr.io/repo-automation-bots/owlbot-python:latest@sha256:50e35228649c47b6ca82aa0be3ff9eb2afce51c82b66c4a03fe4afeb5ff6c0fc Co-authored-by: Owl Bot --- packages/google-cloud-bigquery/.github/.OwlBot.lock.yaml | 2 +- packages/google-cloud-bigquery/samples/geography/noxfile.py | 5 ++++- packages/google-cloud-bigquery/samples/snippets/noxfile.py | 5 ++++- 3 files changed, 9 insertions(+), 3 deletions(-) diff --git a/packages/google-cloud-bigquery/.github/.OwlBot.lock.yaml b/packages/google-cloud-bigquery/.github/.OwlBot.lock.yaml index 9ee60f7e4850..649877dc494c 100644 --- a/packages/google-cloud-bigquery/.github/.OwlBot.lock.yaml +++ b/packages/google-cloud-bigquery/.github/.OwlBot.lock.yaml @@ -1,3 +1,3 @@ docker: image: gcr.io/repo-automation-bots/owlbot-python:latest - digest: sha256:aea14a583128771ae8aefa364e1652f3c56070168ef31beb203534222d842b8b + digest: sha256:50e35228649c47b6ca82aa0be3ff9eb2afce51c82b66c4a03fe4afeb5ff6c0fc diff --git a/packages/google-cloud-bigquery/samples/geography/noxfile.py b/packages/google-cloud-bigquery/samples/geography/noxfile.py index 9fc7f17820d4..7dbea091476d 100644 --- a/packages/google-cloud-bigquery/samples/geography/noxfile.py +++ b/packages/google-cloud-bigquery/samples/geography/noxfile.py @@ -94,7 +94,10 @@ def get_pytest_env_vars() -> Dict[str, str]: TESTED_VERSIONS = sorted([v for v in ALL_VERSIONS if v not in IGNORED_VERSIONS]) -INSTALL_LIBRARY_FROM_SOURCE = bool(os.environ.get("INSTALL_LIBRARY_FROM_SOURCE", False)) +INSTALL_LIBRARY_FROM_SOURCE = os.environ.get("INSTALL_LIBRARY_FROM_SOURCE", False) in ( + "True", + "true", +) # # Style Checks # diff --git a/packages/google-cloud-bigquery/samples/snippets/noxfile.py b/packages/google-cloud-bigquery/samples/snippets/noxfile.py index 9fc7f17820d4..7dbea091476d 100644 --- a/packages/google-cloud-bigquery/samples/snippets/noxfile.py +++ b/packages/google-cloud-bigquery/samples/snippets/noxfile.py @@ -94,7 +94,10 @@ def get_pytest_env_vars() -> Dict[str, str]: TESTED_VERSIONS = sorted([v for v in ALL_VERSIONS if v not in IGNORED_VERSIONS]) -INSTALL_LIBRARY_FROM_SOURCE = bool(os.environ.get("INSTALL_LIBRARY_FROM_SOURCE", False)) +INSTALL_LIBRARY_FROM_SOURCE = os.environ.get("INSTALL_LIBRARY_FROM_SOURCE", False) in ( + "True", + "true", +) # # Style Checks # From 5b5fd9bb632cdf820bcaada4c2d9e0829be299cc Mon Sep 17 00:00:00 2001 From: Peter Lamut Date: Wed, 11 Aug 2021 16:28:43 +0200 Subject: [PATCH 1253/2016] feat: make the same `Table*` instances equal to each other (#867) * feat: make the same Table instances equal to each other * Table equality should ignore metadata differences * Compare instances through tableReference property * Make Table instances hashable * Make Table* classes interchangeable If these classes reference the same table, they are now considered equal. --- .../google/cloud/bigquery/table.py | 42 +++- .../tests/unit/test_table.py | 225 ++++++++++++++++-- 2 files changed, 244 insertions(+), 23 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py index daade1ac678f..d23885ebf472 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py @@ -255,9 +255,16 @@ def _key(self): return (self._project, self._dataset_id, self._table_id) def __eq__(self, other): - if not isinstance(other, TableReference): + if isinstance(other, (Table, TableListItem)): + return ( + self.project == other.project + and self.dataset_id == other.dataset_id + and self.table_id == other.table_id + ) + elif isinstance(other, TableReference): + return self._key() == other._key() + else: return NotImplemented - return self._key() == other._key() def __ne__(self, other): return not self == other @@ -1011,6 +1018,24 @@ def _build_resource(self, filter_fields): """Generate a resource for ``update``.""" return _helpers._build_resource_from_properties(self, filter_fields) + def __eq__(self, other): + if isinstance(other, Table): + return ( + self._properties["tableReference"] + == other._properties["tableReference"] + ) + elif isinstance(other, (TableReference, TableListItem)): + return ( + self.project == other.project + and self.dataset_id == other.dataset_id + and self.table_id == other.table_id + ) + else: + return NotImplemented + + def __hash__(self): + return hash((self.project, self.dataset_id, self.table_id)) + def __repr__(self): return "Table({})".format(repr(self.reference)) @@ -1229,6 +1254,19 @@ def to_api_repr(self) -> dict: """ return copy.deepcopy(self._properties) + def __eq__(self, other): + if isinstance(other, (Table, TableReference, TableListItem)): + return ( + self.project == other.project + and self.dataset_id == other.dataset_id + and self.table_id == other.table_id + ) + else: + return NotImplemented + + def __hash__(self): + return hash((self.project, self.dataset_id, self.table_id)) + def _row_from_mapping(mapping, schema): """Convert a mapping to a row tuple using the schema. diff --git a/packages/google-cloud-bigquery/tests/unit/test_table.py b/packages/google-cloud-bigquery/tests/unit/test_table.py index 4b1fd833b141..a5badc66c620 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_table.py +++ b/packages/google-cloud-bigquery/tests/unit/test_table.py @@ -115,8 +115,6 @@ def _make_one(self, *args, **kw): return self._get_target_class()(*args, **kw) def test_ctor_defaults(self): - from google.cloud.bigquery.dataset import DatasetReference - dataset_ref = DatasetReference("project_1", "dataset_1") table_ref = self._make_one(dataset_ref, "table_1") @@ -124,8 +122,6 @@ def test_ctor_defaults(self): self.assertEqual(table_ref.table_id, "table_1") def test_to_api_repr(self): - from google.cloud.bigquery.dataset import DatasetReference - dataset_ref = DatasetReference("project_1", "dataset_1") table_ref = self._make_one(dataset_ref, "table_1") @@ -137,7 +133,6 @@ def test_to_api_repr(self): ) def test_from_api_repr(self): - from google.cloud.bigquery.dataset import DatasetReference from google.cloud.bigquery.table import TableReference dataset_ref = DatasetReference("project_1", "dataset_1") @@ -204,8 +199,6 @@ def test_from_string_ignores_default_project(self): self.assertEqual(got.table_id, "string_table") def test___eq___wrong_type(self): - from google.cloud.bigquery.dataset import DatasetReference - dataset_ref = DatasetReference("project_1", "dataset_1") table = self._make_one(dataset_ref, "table_1") other = object() @@ -213,8 +206,6 @@ def test___eq___wrong_type(self): self.assertEqual(table, mock.ANY) def test___eq___project_mismatch(self): - from google.cloud.bigquery.dataset import DatasetReference - dataset = DatasetReference("project_1", "dataset_1") other_dataset = DatasetReference("project_2", "dataset_1") table = self._make_one(dataset, "table_1") @@ -222,8 +213,6 @@ def test___eq___project_mismatch(self): self.assertNotEqual(table, other) def test___eq___dataset_mismatch(self): - from google.cloud.bigquery.dataset import DatasetReference - dataset = DatasetReference("project_1", "dataset_1") other_dataset = DatasetReference("project_1", "dataset_2") table = self._make_one(dataset, "table_1") @@ -231,24 +220,18 @@ def test___eq___dataset_mismatch(self): self.assertNotEqual(table, other) def test___eq___table_mismatch(self): - from google.cloud.bigquery.dataset import DatasetReference - dataset = DatasetReference("project_1", "dataset_1") table = self._make_one(dataset, "table_1") other = self._make_one(dataset, "table_2") self.assertNotEqual(table, other) def test___eq___equality(self): - from google.cloud.bigquery.dataset import DatasetReference - dataset = DatasetReference("project_1", "dataset_1") table = self._make_one(dataset, "table_1") other = self._make_one(dataset, "table_1") self.assertEqual(table, other) def test___hash__set_equality(self): - from google.cloud.bigquery.dataset import DatasetReference - dataset = DatasetReference("project_1", "dataset_1") table1 = self._make_one(dataset, "table1") table2 = self._make_one(dataset, "table2") @@ -257,8 +240,6 @@ def test___hash__set_equality(self): self.assertEqual(set_one, set_two) def test___hash__not_equals(self): - from google.cloud.bigquery.dataset import DatasetReference - dataset = DatasetReference("project_1", "dataset_1") table1 = self._make_one(dataset, "table1") table2 = self._make_one(dataset, "table2") @@ -294,8 +275,6 @@ def _get_target_class(): return Table def _make_one(self, *args, **kw): - from google.cloud.bigquery.dataset import DatasetReference - if len(args) == 0: dataset = DatasetReference(self.PROJECT, self.DS_ID) table_ref = dataset.table(self.TABLE_NAME) @@ -581,6 +560,68 @@ def test_num_rows_getter(self): with self.assertRaises(ValueError): getattr(table, "num_rows") + def test__eq__wrong_type(self): + table = self._make_one("project_foo.dataset_bar.table_baz") + + class TableWannabe: + pass + + not_a_table = TableWannabe() + not_a_table._properties = table._properties + + assert table != not_a_table # Can't fake it. + + def test__eq__same_table_basic(self): + table_1 = self._make_one("project_foo.dataset_bar.table_baz") + table_2 = self._make_one("project_foo.dataset_bar.table_baz") + assert table_1 == table_2 + + def test__eq__same_table_multiple_properties(self): + from google.cloud.bigquery import SchemaField + + table_1 = self._make_one("project_foo.dataset_bar.table_baz") + table_1.require_partition_filter = True + table_1.labels = {"first": "one", "second": "two"} + + table_1.schema = [ + SchemaField("name", "STRING", "REQUIRED"), + SchemaField("age", "INTEGER", "NULLABLE"), + ] + + table_2 = self._make_one("project_foo.dataset_bar.table_baz") + table_2.require_partition_filter = True + table_2.labels = {"first": "one", "second": "two"} + table_2.schema = [ + SchemaField("name", "STRING", "REQUIRED"), + SchemaField("age", "INTEGER", "NULLABLE"), + ] + + assert table_1 == table_2 + + def test__eq__same_table_property_different(self): + table_1 = self._make_one("project_foo.dataset_bar.table_baz") + table_1.description = "This is table baz" + + table_2 = self._make_one("project_foo.dataset_bar.table_baz") + table_2.description = "This is also table baz" + + assert table_1 == table_2 # Still equal, only table reference is important. + + def test__eq__different_table(self): + table_1 = self._make_one("project_foo.dataset_bar.table_baz") + table_2 = self._make_one("project_foo.dataset_bar.table_baz_2") + + assert table_1 != table_2 + + def test_hashable(self): + table_1 = self._make_one("project_foo.dataset_bar.table_baz") + table_1.description = "This is a table" + + table_1b = self._make_one("project_foo.dataset_bar.table_baz") + table_1b.description = "Metadata is irrelevant for hashes" + + assert hash(table_1) == hash(table_1b) + def test_schema_setter_non_sequence(self): dataset = DatasetReference(self.PROJECT, self.DS_ID) table_ref = dataset.table(self.TABLE_NAME) @@ -1543,6 +1584,148 @@ def test_to_api_repr(self): table = self._make_one(resource) self.assertEqual(table.to_api_repr(), resource) + def test__eq__wrong_type(self): + resource = { + "tableReference": { + "projectId": "project_foo", + "datasetId": "dataset_bar", + "tableId": "table_baz", + } + } + table = self._make_one(resource) + + class FakeTableListItem: + project = "project_foo" + dataset_id = "dataset_bar" + table_id = "table_baz" + + not_a_table = FakeTableListItem() + + assert table != not_a_table # Can't fake it. + + def test__eq__same_table(self): + resource = { + "tableReference": { + "projectId": "project_foo", + "datasetId": "dataset_bar", + "tableId": "table_baz", + } + } + table_1 = self._make_one(resource) + table_2 = self._make_one(resource) + + assert table_1 == table_2 + + def test__eq__same_table_property_different(self): + table_ref_resource = { + "projectId": "project_foo", + "datasetId": "dataset_bar", + "tableId": "table_baz", + } + + resource_1 = {"tableReference": table_ref_resource, "friendlyName": "Table One"} + table_1 = self._make_one(resource_1) + + resource_2 = {"tableReference": table_ref_resource, "friendlyName": "Table Two"} + table_2 = self._make_one(resource_2) + + assert table_1 == table_2 # Still equal, only table reference is important. + + def test__eq__different_table(self): + resource_1 = { + "tableReference": { + "projectId": "project_foo", + "datasetId": "dataset_bar", + "tableId": "table_baz", + } + } + table_1 = self._make_one(resource_1) + + resource_2 = { + "tableReference": { + "projectId": "project_foo", + "datasetId": "dataset_bar", + "tableId": "table_quux", + } + } + table_2 = self._make_one(resource_2) + + assert table_1 != table_2 + + def test_hashable(self): + resource = { + "tableReference": { + "projectId": "project_foo", + "datasetId": "dataset_bar", + "tableId": "table_baz", + } + } + table_item = self._make_one(resource) + table_item_2 = self._make_one(resource) + + assert hash(table_item) == hash(table_item_2) + + +class TestTableClassesInterchangeability: + @staticmethod + def _make_table(*args, **kwargs): + from google.cloud.bigquery.table import Table + + return Table(*args, **kwargs) + + @staticmethod + def _make_table_ref(*args, **kwargs): + from google.cloud.bigquery.table import TableReference + + return TableReference(*args, **kwargs) + + @staticmethod + def _make_table_list_item(*args, **kwargs): + from google.cloud.bigquery.table import TableListItem + + return TableListItem(*args, **kwargs) + + def test_table_eq_table_ref(self): + + table = self._make_table("project_foo.dataset_bar.table_baz") + dataset_ref = DatasetReference("project_foo", "dataset_bar") + table_ref = self._make_table_ref(dataset_ref, "table_baz") + + assert table == table_ref + assert table_ref == table + + def test_table_eq_table_list_item(self): + table = self._make_table("project_foo.dataset_bar.table_baz") + table_list_item = self._make_table_list_item( + { + "tableReference": { + "projectId": "project_foo", + "datasetId": "dataset_bar", + "tableId": "table_baz", + } + } + ) + + assert table == table_list_item + assert table_list_item == table + + def test_table_ref_eq_table_list_item(self): + + dataset_ref = DatasetReference("project_foo", "dataset_bar") + table_ref = self._make_table_ref(dataset_ref, "table_baz") + table_list_item = self._make_table_list_item( + { + "tableReference": { + "projectId": "project_foo", + "datasetId": "dataset_bar", + "tableId": "table_baz", + } + } + ) + + assert table_ref == table_list_item + assert table_list_item == table_ref + class TestSnapshotDefinition: @staticmethod From c18874ab5a22b44374a8a5c9fbfaf49b73152236 Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Wed, 11 Aug 2021 10:12:23 -0500 Subject: [PATCH 1254/2016] feat: support `ScalarQueryParameterType` for `type_` argument in `ScalarQueryParameter` constructor (#850) Follow-up to https://github.com/googleapis/python-bigquery/pull/840/files#r679880582 Thank you for opening a Pull Request! Before submitting your PR, there are a few things you can do to make sure it goes smoothly: - [ ] Make sure to open an issue as a [bug/issue](https://github.com/googleapis/python-bigquery/issues/new/choose) before writing your code! That way we can discuss the change, evaluate designs, and agree on the general idea - [ ] Ensure the tests and linter pass - [ ] Code coverage does not decrease (if any source code was changed) - [ ] Appropriate docs were updated (if necessary) --- packages/google-cloud-bigquery/docs/conf.py | 1 + .../google-cloud-bigquery/docs/reference.rst | 1 + .../google/cloud/bigquery/enums.py | 24 +++++------ .../google/cloud/bigquery/query.py | 42 +++++++++++++------ .../tests/unit/test_query.py | 13 ++++++ 5 files changed, 57 insertions(+), 24 deletions(-) diff --git a/packages/google-cloud-bigquery/docs/conf.py b/packages/google-cloud-bigquery/docs/conf.py index cb347160de42..09f7ea414883 100644 --- a/packages/google-cloud-bigquery/docs/conf.py +++ b/packages/google-cloud-bigquery/docs/conf.py @@ -110,6 +110,7 @@ # directories to ignore when looking for source files. exclude_patterns = [ "_build", + "**/.nox/**/*", "samples/AUTHORING_GUIDE.md", "samples/CONTRIBUTING.md", "samples/snippets/README.rst", diff --git a/packages/google-cloud-bigquery/docs/reference.rst b/packages/google-cloud-bigquery/docs/reference.rst index 5ac59637084d..d8738e67bfaf 100644 --- a/packages/google-cloud-bigquery/docs/reference.rst +++ b/packages/google-cloud-bigquery/docs/reference.rst @@ -138,6 +138,7 @@ Query query.ArrayQueryParameter query.ScalarQueryParameter + query.ScalarQueryParameterType query.StructQueryParameter query.UDFResource diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/enums.py b/packages/google-cloud-bigquery/google/cloud/bigquery/enums.py index 0da01d665e51..d67cebd4c8d1 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/enums.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/enums.py @@ -259,23 +259,23 @@ class SqlTypeNames(str, enum.Enum): class SqlParameterScalarTypes: """Supported scalar SQL query parameter types as type objects.""" - STRING = ScalarQueryParameterType("STRING") + BOOL = ScalarQueryParameterType("BOOL") + BOOLEAN = ScalarQueryParameterType("BOOL") + BIGDECIMAL = ScalarQueryParameterType("BIGNUMERIC") + BIGNUMERIC = ScalarQueryParameterType("BIGNUMERIC") BYTES = ScalarQueryParameterType("BYTES") - INTEGER = ScalarQueryParameterType("INT64") - INT64 = ScalarQueryParameterType("INT64") + DATE = ScalarQueryParameterType("DATE") + DATETIME = ScalarQueryParameterType("DATETIME") + DECIMAL = ScalarQueryParameterType("NUMERIC") FLOAT = ScalarQueryParameterType("FLOAT64") FLOAT64 = ScalarQueryParameterType("FLOAT64") - NUMERIC = ScalarQueryParameterType("NUMERIC") - BIGNUMERIC = ScalarQueryParameterType("BIGNUMERIC") - DECIMAL = ScalarQueryParameterType("NUMERIC") - BIGDECIMAL = ScalarQueryParameterType("BIGNUMERIC") - BOOLEAN = ScalarQueryParameterType("BOOL") - BOOL = ScalarQueryParameterType("BOOL") GEOGRAPHY = ScalarQueryParameterType("GEOGRAPHY") - TIMESTAMP = ScalarQueryParameterType("TIMESTAMP") - DATE = ScalarQueryParameterType("DATE") + INT64 = ScalarQueryParameterType("INT64") + INTEGER = ScalarQueryParameterType("INT64") + NUMERIC = ScalarQueryParameterType("NUMERIC") + STRING = ScalarQueryParameterType("STRING") TIME = ScalarQueryParameterType("TIME") - DATETIME = ScalarQueryParameterType("DATETIME") + TIMESTAMP = ScalarQueryParameterType("TIMESTAMP") class WriteDisposition(object): diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/query.py b/packages/google-cloud-bigquery/google/cloud/bigquery/query.py index d1e9a45a5b1b..1f449f189aa1 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/query.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/query.py @@ -16,7 +16,9 @@ from collections import OrderedDict import copy -from typing import Union +import datetime +import decimal +from typing import Optional, Union from google.cloud.bigquery.table import _parse_schema_resource from google.cloud.bigquery._helpers import _rows_from_json @@ -24,6 +26,11 @@ from google.cloud.bigquery._helpers import _SCALAR_VALUE_TO_JSON_PARAM +_SCALAR_VALUE_TYPE = Optional[ + Union[str, int, float, decimal.Decimal, bool, datetime.datetime, datetime.date] +] + + class UDFResource(object): """Describe a single user-defined function (UDF) resource. @@ -325,35 +332,46 @@ class ScalarQueryParameter(_AbstractQueryParameter): """Named / positional query parameters for scalar values. Args: - name (Optional[str]): + name: Parameter name, used via ``@foo`` syntax. If None, the parameter can only be addressed via position (``?``). - type_ (str): - Name of parameter type. One of 'STRING', 'INT64', - 'FLOAT64', 'NUMERIC', 'BIGNUMERIC', 'BOOL', 'TIMESTAMP', 'DATETIME', or - 'DATE'. + type_: + Name of parameter type. See + :class:`google.cloud.bigquery.enums.SqlTypeNames` and + :class:`google.cloud.bigquery.enums.SqlParameterScalarTypes` for + supported types. - value (Union[str, int, float, decimal.Decimal, bool, datetime.datetime, datetime.date]): + value: The scalar parameter value. """ - def __init__(self, name, type_, value): + def __init__( + self, + name: Optional[str], + type_: Optional[Union[str, ScalarQueryParameterType]], + value: _SCALAR_VALUE_TYPE, + ): self.name = name - self.type_ = type_ + if isinstance(type_, ScalarQueryParameterType): + self.type_ = type_._type + else: + self.type_ = type_ self.value = value @classmethod - def positional(cls, type_: str, value) -> "ScalarQueryParameter": + def positional( + cls, type_: Union[str, ScalarQueryParameterType], value: _SCALAR_VALUE_TYPE + ) -> "ScalarQueryParameter": """Factory for positional paramater. Args: - type_ (str): + type_: Name of parameter type. One of 'STRING', 'INT64', 'FLOAT64', 'NUMERIC', 'BIGNUMERIC', 'BOOL', 'TIMESTAMP', 'DATETIME', or 'DATE'. - value (Union[str, int, float, decimal.Decimal, bool, datetime.datetime, datetime.date]): + value: The scalar parameter value. Returns: diff --git a/packages/google-cloud-bigquery/tests/unit/test_query.py b/packages/google-cloud-bigquery/tests/unit/test_query.py index 9483fe8dd743..69a6772e5e92 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_query.py +++ b/packages/google-cloud-bigquery/tests/unit/test_query.py @@ -13,6 +13,7 @@ # limitations under the License. import datetime +import decimal import unittest import mock @@ -430,6 +431,18 @@ def test_positional(self): self.assertEqual(param.type_, "INT64") self.assertEqual(param.value, 123) + def test_ctor_w_scalar_query_parameter_type(self): + from google.cloud.bigquery import enums + + param = self._make_one( + name="foo", + type_=enums.SqlParameterScalarTypes.BIGNUMERIC, + value=decimal.Decimal("123.456"), + ) + self.assertEqual(param.name, "foo") + self.assertEqual(param.type_, "BIGNUMERIC") + self.assertEqual(param.value, decimal.Decimal("123.456")) + def test_from_api_repr_w_name(self): RESOURCE = { "name": "foo", From ab1602a92a1dc85ca5e8a895529c2261eb103c1d Mon Sep 17 00:00:00 2001 From: Jim Fulton Date: Wed, 11 Aug 2021 14:24:28 -0400 Subject: [PATCH 1255/2016] feat: retry failed query jobs in `result()` (#837) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Thank you for opening a Pull Request! Before submitting your PR, there are a few things you can do to make sure it goes smoothly: - [x] Make sure to open an issue as a [bug/issue](https://github.com/googleapis/python-bigquery/issues/new/choose) before writing your code! That way we can discuss the change, evaluate designs, and agree on the general idea - [x] Ensure the tests and linter pass - [x] Code coverage does not decrease (if any source code was changed) - [x] Appropriate docs were updated (if necessary) Fixes #539 🦕 Previously, we only retried failed API requests. Now, we retry failed jobs (according to the predicate of the `Retry` object passed to `job.result()`). --- .../google/cloud/bigquery/client.py | 110 ++++++-- .../google/cloud/bigquery/job/query.py | 84 +++++- .../google/cloud/bigquery/retry.py | 20 ++ .../tests/system/test_job_retry.py | 72 +++++ .../tests/unit/test_job_retry.py | 247 ++++++++++++++++++ .../tests/unit/test_retry.py | 24 ++ 6 files changed, 518 insertions(+), 39 deletions(-) create mode 100644 packages/google-cloud-bigquery/tests/system/test_job_retry.py create mode 100644 packages/google-cloud-bigquery/tests/unit/test_job_retry.py diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py index 742ecac2e742..8142c59cd7f7 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py @@ -86,7 +86,7 @@ from google.cloud.bigquery.model import ModelReference from google.cloud.bigquery.model import _model_arg_to_model_ref from google.cloud.bigquery.query import _QueryResults -from google.cloud.bigquery.retry import DEFAULT_RETRY +from google.cloud.bigquery.retry import DEFAULT_RETRY, DEFAULT_JOB_RETRY from google.cloud.bigquery.routine import Routine from google.cloud.bigquery.routine import RoutineReference from google.cloud.bigquery.schema import SchemaField @@ -3163,6 +3163,7 @@ def query( project: str = None, retry: retries.Retry = DEFAULT_RETRY, timeout: float = None, + job_retry: retries.Retry = DEFAULT_JOB_RETRY, ) -> job.QueryJob: """Run a SQL query. @@ -3192,21 +3193,52 @@ def query( Project ID of the project of where to run the job. Defaults to the client's project. retry (Optional[google.api_core.retry.Retry]): - How to retry the RPC. + How to retry the RPC. This only applies to making RPC + calls. It isn't used to retry failed jobs. This has + a reasonable default that should only be overridden + with care. timeout (Optional[float]): The number of seconds to wait for the underlying HTTP transport before using ``retry``. + job_retry (Optional[google.api_core.retry.Retry]): + How to retry failed jobs. The default retries + rate-limit-exceeded errors. Passing ``None`` disables + job retry. + + Not all jobs can be retried. If ``job_id`` is + provided, then the job returned by the query will not + be retryable, and an exception will be raised if a + non-``None`` (and non-default) value for ``job_retry`` + is also provided. + + Note that errors aren't detected until ``result()`` is + called on the job returned. The ``job_retry`` + specified here becomes the default ``job_retry`` for + ``result()``, where it can also be specified. Returns: google.cloud.bigquery.job.QueryJob: A new query job instance. Raises: TypeError: - If ``job_config`` is not an instance of :class:`~google.cloud.bigquery.job.QueryJobConfig` - class. + If ``job_config`` is not an instance of + :class:`~google.cloud.bigquery.job.QueryJobConfig` + class, or if both ``job_id`` and non-``None`` non-default + ``job_retry`` are provided. """ job_id_given = job_id is not None - job_id = _make_job_id(job_id, job_id_prefix) + if ( + job_id_given + and job_retry is not None + and job_retry is not DEFAULT_JOB_RETRY + ): + raise TypeError( + "`job_retry` was provided, but the returned job is" + " not retryable, because a custom `job_id` was" + " provided." + ) + + job_id_save = job_id if project is None: project = self.project @@ -3214,8 +3246,6 @@ def query( if location is None: location = self.location - job_config = copy.deepcopy(job_config) - if self._default_query_job_config: if job_config: _verify_job_config_type( @@ -3225,6 +3255,8 @@ def query( # that is in the default, # should be filled in with the default # the incoming therefore has precedence + # + # Note that _fill_from_default doesn't mutate the receiver job_config = job_config._fill_from_default( self._default_query_job_config ) @@ -3233,34 +3265,54 @@ def query( self._default_query_job_config, google.cloud.bigquery.job.QueryJobConfig, ) - job_config = copy.deepcopy(self._default_query_job_config) + job_config = self._default_query_job_config - job_ref = job._JobReference(job_id, project=project, location=location) - query_job = job.QueryJob(job_ref, query, client=self, job_config=job_config) + # Note that we haven't modified the original job_config (or + # _default_query_job_config) up to this point. + job_config_save = job_config - try: - query_job._begin(retry=retry, timeout=timeout) - except core_exceptions.Conflict as create_exc: - # The thought is if someone is providing their own job IDs and they get - # their job ID generation wrong, this could end up returning results for - # the wrong query. We thus only try to recover if job ID was not given. - if job_id_given: - raise create_exc + def do_query(): + # Make a copy now, so that original doesn't get changed by the process + # below and to facilitate retry + job_config = copy.deepcopy(job_config_save) + + job_id = _make_job_id(job_id_save, job_id_prefix) + job_ref = job._JobReference(job_id, project=project, location=location) + query_job = job.QueryJob(job_ref, query, client=self, job_config=job_config) try: - query_job = self.get_job( - job_id, - project=project, - location=location, - retry=retry, - timeout=timeout, - ) - except core_exceptions.GoogleAPIError: # (includes RetryError) - raise create_exc + query_job._begin(retry=retry, timeout=timeout) + except core_exceptions.Conflict as create_exc: + # The thought is if someone is providing their own job IDs and they get + # their job ID generation wrong, this could end up returning results for + # the wrong query. We thus only try to recover if job ID was not given. + if job_id_given: + raise create_exc + + try: + query_job = self.get_job( + job_id, + project=project, + location=location, + retry=retry, + timeout=timeout, + ) + except core_exceptions.GoogleAPIError: # (includes RetryError) + raise create_exc + else: + return query_job else: return query_job - else: - return query_job + + future = do_query() + # The future might be in a failed state now, but if it's + # unrecoverable, we'll find out when we ask for it's result, at which + # point, we may retry. + if not job_id_given: + future._retry_do_query = do_query # in case we have to retry later + future._job_retry = job_retry + + return future def insert_rows( self, diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/job/query.py b/packages/google-cloud-bigquery/google/cloud/bigquery/job/query.py index 2cb7ee28e88f..3ab47b0f9b72 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/job/query.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/job/query.py @@ -36,7 +36,7 @@ from google.cloud.bigquery.query import ScalarQueryParameter from google.cloud.bigquery.query import StructQueryParameter from google.cloud.bigquery.query import UDFResource -from google.cloud.bigquery.retry import DEFAULT_RETRY +from google.cloud.bigquery.retry import DEFAULT_RETRY, DEFAULT_JOB_RETRY from google.cloud.bigquery.routine import RoutineReference from google.cloud.bigquery.table import _EmptyRowIterator from google.cloud.bigquery.table import RangePartitioning @@ -1260,6 +1260,7 @@ def result( retry: "retries.Retry" = DEFAULT_RETRY, timeout: float = None, start_index: int = None, + job_retry: "retries.Retry" = DEFAULT_JOB_RETRY, ) -> Union["RowIterator", _EmptyRowIterator]: """Start the job and wait for it to complete and get the result. @@ -1270,9 +1271,13 @@ def result( max_results (Optional[int]): The maximum total number of rows from this request. retry (Optional[google.api_core.retry.Retry]): - How to retry the call that retrieves rows. If the job state is - ``DONE``, retrying is aborted early even if the results are not - available, as this will not change anymore. + How to retry the call that retrieves rows. This only + applies to making RPC calls. It isn't used to retry + failed jobs. This has a reasonable default that + should only be overridden with care. If the job state + is ``DONE``, retrying is aborted early even if the + results are not available, as this will not change + anymore. timeout (Optional[float]): The number of seconds to wait for the underlying HTTP transport before using ``retry``. @@ -1280,6 +1285,16 @@ def result( applies to each individual request. start_index (Optional[int]): The zero-based index of the starting row to read. + job_retry (Optional[google.api_core.retry.Retry]): + How to retry failed jobs. The default retries + rate-limit-exceeded errors. Passing ``None`` disables + job retry. + + Not all jobs can be retried. If ``job_id`` was + provided to the query that created this job, then the + job returned by the query will not be retryable, and + an exception will be raised if non-``None`` + non-default ``job_retry`` is also provided. Returns: google.cloud.bigquery.table.RowIterator: @@ -1295,17 +1310,66 @@ def result( Raises: google.cloud.exceptions.GoogleAPICallError: - If the job failed. + If the job failed and retries aren't successful. concurrent.futures.TimeoutError: If the job did not complete in the given timeout. + TypeError: + If Non-``None`` and non-default ``job_retry`` is + provided and the job is not retryable. """ try: - super(QueryJob, self).result(retry=retry, timeout=timeout) + retry_do_query = getattr(self, "_retry_do_query", None) + if retry_do_query is not None: + if job_retry is DEFAULT_JOB_RETRY: + job_retry = self._job_retry + else: + if job_retry is not None and job_retry is not DEFAULT_JOB_RETRY: + raise TypeError( + "`job_retry` was provided, but this job is" + " not retryable, because a custom `job_id` was" + " provided to the query that created this job." + ) + + first = True + + def do_get_result(): + nonlocal first + + if first: + first = False + else: + # Note that we won't get here if retry_do_query is + # None, because we won't use a retry. + + # The orinal job is failed. Create a new one. + job = retry_do_query() + + # If it's already failed, we might as well stop: + if job.done() and job.exception() is not None: + raise job.exception() + + # Become the new job: + self.__dict__.clear() + self.__dict__.update(job.__dict__) + + # This shouldn't be necessary, because once we have a good + # job, it should stay good,and we shouldn't have to retry. + # But let's be paranoid. :) + self._retry_do_query = retry_do_query + self._job_retry = job_retry + + super(QueryJob, self).result(retry=retry, timeout=timeout) + + # Since the job could already be "done" (e.g. got a finished job + # via client.get_job), the superclass call to done() might not + # set the self._query_results cache. + self._reload_query_results(retry=retry, timeout=timeout) + + if retry_do_query is not None and job_retry is not None: + do_get_result = job_retry(do_get_result) + + do_get_result() - # Since the job could already be "done" (e.g. got a finished job - # via client.get_job), the superclass call to done() might not - # set the self._query_results cache. - self._reload_query_results(retry=retry, timeout=timeout) except exceptions.GoogleAPICallError as exc: exc.message += self._format_for_exception(self.query, self.job_id) exc.query_job = self diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/retry.py b/packages/google-cloud-bigquery/google/cloud/bigquery/retry.py index bab28aacb786..e9286055c56d 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/retry.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/retry.py @@ -32,6 +32,8 @@ auth_exceptions.TransportError, ) +_DEFAULT_JOB_DEADLINE = 60.0 * 10.0 # seconds + def _should_retry(exc): """Predicate for determining when to retry. @@ -56,3 +58,21 @@ def _should_retry(exc): on ``DEFAULT_RETRY``. For example, to change the deadline to 30 seconds, pass ``retry=bigquery.DEFAULT_RETRY.with_deadline(30)``. """ + +job_retry_reasons = "rateLimitExceeded", "backendError" + + +def _job_should_retry(exc): + if not hasattr(exc, "errors") or len(exc.errors) == 0: + return False + + reason = exc.errors[0]["reason"] + return reason in job_retry_reasons + + +DEFAULT_JOB_RETRY = retry.Retry( + predicate=_job_should_retry, deadline=_DEFAULT_JOB_DEADLINE +) +""" +The default job retry object. +""" diff --git a/packages/google-cloud-bigquery/tests/system/test_job_retry.py b/packages/google-cloud-bigquery/tests/system/test_job_retry.py new file mode 100644 index 000000000000..520545493290 --- /dev/null +++ b/packages/google-cloud-bigquery/tests/system/test_job_retry.py @@ -0,0 +1,72 @@ +# Copyright 2021 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import contextlib +import threading +import time + +import google.api_core.exceptions +import google.cloud.bigquery +import pytest + + +def thread(func): + thread = threading.Thread(target=func, daemon=True) + thread.start() + return thread + + +@pytest.mark.parametrize("job_retry_on_query", [True, False]) +def test_query_retry_539(bigquery_client, dataset_id, job_retry_on_query): + """ + Test job_retry + + See: https://github.com/googleapis/python-bigquery/issues/539 + """ + from google.api_core import exceptions + from google.api_core.retry import if_exception_type, Retry + + table_name = f"{dataset_id}.t539" + + # Without a custom retry, we fail: + with pytest.raises(google.api_core.exceptions.NotFound): + bigquery_client.query(f"select count(*) from {table_name}").result() + + retry_notfound = Retry(predicate=if_exception_type(exceptions.NotFound)) + + job_retry = dict(job_retry=retry_notfound) if job_retry_on_query else {} + job = bigquery_client.query(f"select count(*) from {table_name}", **job_retry) + job_id = job.job_id + + # We can already know that the job failed, but we're not supposed + # to find out until we call result, which is where retry happend + assert job.done() + assert job.exception() is not None + + @thread + def create_table(): + time.sleep(1) # Give the first retry attempt time to fail. + with contextlib.closing(google.cloud.bigquery.Client()) as client: + client.query(f"create table {table_name} (id int64)").result() + + job_retry = {} if job_retry_on_query else dict(job_retry=retry_notfound) + [[count]] = list(job.result(**job_retry)) + assert count == 0 + + # The job was retried, and thus got a new job id + assert job.job_id != job_id + + # Make sure we don't leave a thread behind: + create_table.join() + bigquery_client.query(f"drop table {table_name}").result() diff --git a/packages/google-cloud-bigquery/tests/unit/test_job_retry.py b/packages/google-cloud-bigquery/tests/unit/test_job_retry.py new file mode 100644 index 000000000000..b2095d2f212b --- /dev/null +++ b/packages/google-cloud-bigquery/tests/unit/test_job_retry.py @@ -0,0 +1,247 @@ +# Copyright 2021 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import datetime +import re + +import mock +import pytest + +import google.api_core.exceptions +import google.api_core.retry + +from .helpers import make_connection + + +# With job_retry_on_query, we're testing 4 scenarios: +# - No `job_retry` passed, retry on default rateLimitExceeded. +# - Pass NotFound retry to `query`. +# - Pass NotFound retry to `result`. +# - Pass BadRequest retry to query, with the value passed to `result` overriding. +@pytest.mark.parametrize("job_retry_on_query", [None, "Query", "Result", "Both"]) +@mock.patch("time.sleep") +def test_retry_failed_jobs(sleep, client, job_retry_on_query): + """ + Test retry of job failures, as opposed to API-invocation failures. + """ + + retry_notfound = google.api_core.retry.Retry( + predicate=google.api_core.retry.if_exception_type( + google.api_core.exceptions.NotFound + ) + ) + retry_badrequest = google.api_core.retry.Retry( + predicate=google.api_core.retry.if_exception_type( + google.api_core.exceptions.BadRequest + ) + ) + + if job_retry_on_query is None: + reason = "rateLimitExceeded" + else: + reason = "notFound" + + err = dict(reason=reason) + responses = [ + dict(status=dict(state="DONE", errors=[err], errorResult=err)), + dict(status=dict(state="DONE", errors=[err], errorResult=err)), + dict(status=dict(state="DONE", errors=[err], errorResult=err)), + dict(status=dict(state="DONE")), + dict(rows=[{"f": [{"v": "1"}]}], totalRows="1"), + ] + + def api_request(method, path, query_params=None, data=None, **kw): + response = responses.pop(0) + if data: + response["jobReference"] = data["jobReference"] + else: + response["jobReference"] = dict( + jobId=path.split("/")[-1], projectId="PROJECT" + ) + return response + + conn = client._connection = make_connection() + conn.api_request.side_effect = api_request + + if job_retry_on_query == "Query": + job_retry = dict(job_retry=retry_notfound) + elif job_retry_on_query == "Both": + # This will be overridden in `result` + job_retry = dict(job_retry=retry_badrequest) + else: + job_retry = {} + job = client.query("select 1", **job_retry) + + orig_job_id = job.job_id + job_retry = ( + dict(job_retry=retry_notfound) + if job_retry_on_query in ("Result", "Both") + else {} + ) + result = job.result(**job_retry) + assert result.total_rows == 1 + assert not responses # We made all the calls we expected to. + + # The job adjusts it's job id based on the id of the last attempt. + assert job.job_id != orig_job_id + assert job.job_id == conn.mock_calls[3][2]["data"]["jobReference"]["jobId"] + + # We had to sleep three times + assert len(sleep.mock_calls) == 3 + + # Sleeps are random, however they're more than 0 + assert min(c[1][0] for c in sleep.mock_calls) > 0 + + # They're at most 2 * (multiplier**(number of sleeps - 1)) * initial + # The default multiplier is 2 + assert max(c[1][0] for c in sleep.mock_calls) <= 8 + + # We can ask for the result again: + responses = [ + dict(rows=[{"f": [{"v": "1"}]}], totalRows="1"), + ] + orig_job_id = job.job_id + result = job.result() + assert result.total_rows == 1 + assert not responses # We made all the calls we expected to. + + # We wouldn't (and didn't) fail, because we're dealing with a successful job. + # So the job id hasn't changed. + assert job.job_id == orig_job_id + + +# With job_retry_on_query, we're testing 4 scenarios: +# - Pass None retry to `query`. +# - Pass None retry to `result`. +@pytest.mark.parametrize("job_retry_on_query", ["Query", "Result"]) +@mock.patch("time.sleep") +def test_disable_retry_failed_jobs(sleep, client, job_retry_on_query): + """ + Test retry of job failures, as opposed to API-invocation failures. + """ + err = dict(reason="rateLimitExceeded") + responses = [dict(status=dict(state="DONE", errors=[err], errorResult=err))] * 3 + + def api_request(method, path, query_params=None, data=None, **kw): + response = responses.pop(0) + response["jobReference"] = data["jobReference"] + return response + + conn = client._connection = make_connection() + conn.api_request.side_effect = api_request + + if job_retry_on_query == "Query": + job_retry = dict(job_retry=None) + else: + job_retry = {} + job = client.query("select 1", **job_retry) + + orig_job_id = job.job_id + job_retry = dict(job_retry=None) if job_retry_on_query == "Result" else {} + with pytest.raises(google.api_core.exceptions.Forbidden): + job.result(**job_retry) + + assert job.job_id == orig_job_id + assert len(sleep.mock_calls) == 0 + + +@mock.patch("google.api_core.retry.datetime_helpers") +@mock.patch("time.sleep") +def test_retry_failed_jobs_after_retry_failed(sleep, datetime_helpers, client): + """ + If at first you don't succeed, maybe you will later. :) + """ + conn = client._connection = make_connection() + + datetime_helpers.utcnow.return_value = datetime.datetime(2021, 7, 29, 10, 43, 2) + + err = dict(reason="rateLimitExceeded") + + def api_request(method, path, query_params=None, data=None, **kw): + calls = sleep.mock_calls + if calls: + datetime_helpers.utcnow.return_value += datetime.timedelta( + seconds=calls[-1][1][0] + ) + response = dict(status=dict(state="DONE", errors=[err], errorResult=err)) + response["jobReference"] = data["jobReference"] + return response + + conn.api_request.side_effect = api_request + + job = client.query("select 1") + orig_job_id = job.job_id + + with pytest.raises(google.api_core.exceptions.RetryError): + job.result() + + # We never got a successful job, so the job id never changed: + assert job.job_id == orig_job_id + + # We failed because we couldn't succeed after 120 seconds. + # But we can try again: + err2 = dict(reason="backendError") # We also retry on this + responses = [ + dict(status=dict(state="DONE", errors=[err2], errorResult=err2)), + dict(status=dict(state="DONE", errors=[err], errorResult=err)), + dict(status=dict(state="DONE", errors=[err2], errorResult=err2)), + dict(status=dict(state="DONE")), + dict(rows=[{"f": [{"v": "1"}]}], totalRows="1"), + ] + + def api_request(method, path, query_params=None, data=None, **kw): + calls = sleep.mock_calls + datetime_helpers.utcnow.return_value += datetime.timedelta( + seconds=calls[-1][1][0] + ) + response = responses.pop(0) + if data: + response["jobReference"] = data["jobReference"] + else: + response["jobReference"] = dict( + jobId=path.split("/")[-1], projectId="PROJECT" + ) + return response + + conn.api_request.side_effect = api_request + result = job.result() + assert result.total_rows == 1 + assert not responses # We made all the calls we expected to. + assert job.job_id != orig_job_id + + +def test_raises_on_job_retry_on_query_with_non_retryable_jobs(client): + with pytest.raises( + TypeError, + match=re.escape( + "`job_retry` was provided, but the returned job is" + " not retryable, because a custom `job_id` was" + " provided." + ), + ): + client.query("select 42", job_id=42, job_retry=google.api_core.retry.Retry()) + + +def test_raises_on_job_retry_on_result_with_non_retryable_jobs(client): + client._connection = make_connection({}) + job = client.query("select 42", job_id=42) + with pytest.raises( + TypeError, + match=re.escape( + "`job_retry` was provided, but this job is" + " not retryable, because a custom `job_id` was" + " provided to the query that created this job." + ), + ): + job.result(job_retry=google.api_core.retry.Retry()) diff --git a/packages/google-cloud-bigquery/tests/unit/test_retry.py b/packages/google-cloud-bigquery/tests/unit/test_retry.py index 6fb7f93fde51..c7c25e0363de 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_retry.py +++ b/packages/google-cloud-bigquery/tests/unit/test_retry.py @@ -86,3 +86,27 @@ def test_w_unstructured_bad_gateway(self): exc = BadGateway("testing") self.assertTrue(self._call_fut(exc)) + + +def test_DEFAULT_JOB_RETRY_predicate(): + from google.cloud.bigquery.retry import DEFAULT_JOB_RETRY + from google.api_core.exceptions import ClientError + + assert not DEFAULT_JOB_RETRY._predicate(TypeError()) + assert not DEFAULT_JOB_RETRY._predicate(ClientError("fail")) + assert not DEFAULT_JOB_RETRY._predicate( + ClientError("fail", errors=[dict(reason="idk")]) + ) + + assert DEFAULT_JOB_RETRY._predicate( + ClientError("fail", errors=[dict(reason="rateLimitExceeded")]) + ) + assert DEFAULT_JOB_RETRY._predicate( + ClientError("fail", errors=[dict(reason="backendError")]) + ) + + +def test_DEFAULT_JOB_RETRY_deadline(): + from google.cloud.bigquery.retry import DEFAULT_JOB_RETRY + + assert DEFAULT_JOB_RETRY._deadline == 600 From bf615e83045a36f2740a4b5543635b6e083236cf Mon Sep 17 00:00:00 2001 From: Grimmer Date: Thu, 12 Aug 2021 03:23:48 +0800 Subject: [PATCH 1256/2016] fix: make unicode characters working well in load_table_from_json (#865) Co-authored-by: Tim Swast Co-authored-by: Tres Seaver --- .../google/cloud/bigquery/client.py | 2 +- .../tests/unit/test_client.py | 36 +++++++++++++++++++ 2 files changed, 37 insertions(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py index 8142c59cd7f7..cbac82548cbe 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py @@ -2762,7 +2762,7 @@ def load_table_from_json( destination = _table_arg_to_table_ref(destination, default_project=self.project) - data_str = "\n".join(json.dumps(item) for item in json_rows) + data_str = "\n".join(json.dumps(item, ensure_ascii=False) for item in json_rows) encoded_str = data_str.encode() data_file = io.BytesIO(encoded_str) return self.load_table_from_file( diff --git a/packages/google-cloud-bigquery/tests/unit/test_client.py b/packages/google-cloud-bigquery/tests/unit/test_client.py index 5356855113dc..671dd8da1d7b 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_client.py +++ b/packages/google-cloud-bigquery/tests/unit/test_client.py @@ -7775,6 +7775,42 @@ def test_load_table_from_json_w_invalid_job_config(self): err_msg = str(exc.value) assert "Expected an instance of LoadJobConfig" in err_msg + def test_load_table_from_json_unicode_emoji_data_case(self): + from google.cloud.bigquery.client import _DEFAULT_NUM_RETRIES + + client = self._make_client() + + emoji = "\U0001F3E6" + json_row = {"emoji": emoji} + json_rows = [json_row] + + load_patch = mock.patch( + "google.cloud.bigquery.client.Client.load_table_from_file", autospec=True + ) + + with load_patch as load_table_from_file: + client.load_table_from_json(json_rows, self.TABLE_REF) + + load_table_from_file.assert_called_once_with( + client, + mock.ANY, + self.TABLE_REF, + size=mock.ANY, + num_retries=_DEFAULT_NUM_RETRIES, + job_id=mock.ANY, + job_id_prefix=None, + location=client.location, + project=client.project, + job_config=mock.ANY, + timeout=None, + ) + + sent_data_file = load_table_from_file.mock_calls[0][1][1] + + # make sure json_row's unicode characters are only encoded one time + expected_bytes = b'{"emoji": "' + emoji.encode("utf8") + b'"}' + assert sent_data_file.getvalue() == expected_bytes + # Low-level tests @classmethod From b479f511f1bb7fdf9df307f7e869909cc1d622ea Mon Sep 17 00:00:00 2001 From: "release-please[bot]" <55107282+release-please[bot]@users.noreply.github.com> Date: Wed, 11 Aug 2021 19:46:11 +0000 Subject: [PATCH 1257/2016] chore: release 2.24.0 (#868) :robot: I have created a release \*beep\* \*boop\* --- ## [2.24.0](https://www.github.com/googleapis/python-bigquery/compare/v2.23.3...v2.24.0) (2021-08-11) ### Features * add support for transaction statistics ([#849](https://www.github.com/googleapis/python-bigquery/issues/849)) ([7f7b1a8](https://www.github.com/googleapis/python-bigquery/commit/7f7b1a808d50558772a0deb534ca654da65d629e)) * make the same `Table*` instances equal to each other ([#867](https://www.github.com/googleapis/python-bigquery/issues/867)) ([c1a3d44](https://www.github.com/googleapis/python-bigquery/commit/c1a3d4435739a21d25aa154145e36d3a7c42eeb6)) * retry failed query jobs in `result()` ([#837](https://www.github.com/googleapis/python-bigquery/issues/837)) ([519d99c](https://www.github.com/googleapis/python-bigquery/commit/519d99c20e7d1101f76981f3de036fdf3c7a4ecc)) * support `ScalarQueryParameterType` for `type_` argument in `ScalarQueryParameter` constructor ([#850](https://www.github.com/googleapis/python-bigquery/issues/850)) ([93d15e2](https://www.github.com/googleapis/python-bigquery/commit/93d15e2e5405c2cc6d158c4e5737361344193dbc)) ### Bug Fixes * make unicode characters working well in load_table_from_json ([#865](https://www.github.com/googleapis/python-bigquery/issues/865)) ([ad9c802](https://www.github.com/googleapis/python-bigquery/commit/ad9c8026f0e667f13dd754279f9dc40d06f4fa78)) --- This PR was generated with [Release Please](https://github.com/googleapis/release-please). See [documentation](https://github.com/googleapis/release-please#release-please). --- packages/google-cloud-bigquery/CHANGELOG.md | 15 +++++++++++++++ packages/google-cloud-bigquery/docs/conf.py | 1 - .../google/cloud/bigquery/version.py | 2 +- 3 files changed, 16 insertions(+), 2 deletions(-) diff --git a/packages/google-cloud-bigquery/CHANGELOG.md b/packages/google-cloud-bigquery/CHANGELOG.md index 856f1ecd1c0a..83b409015fa4 100644 --- a/packages/google-cloud-bigquery/CHANGELOG.md +++ b/packages/google-cloud-bigquery/CHANGELOG.md @@ -5,6 +5,21 @@ [1]: https://pypi.org/project/google-cloud-bigquery/#history +## [2.24.0](https://www.github.com/googleapis/python-bigquery/compare/v2.23.3...v2.24.0) (2021-08-11) + + +### Features + +* add support for transaction statistics ([#849](https://www.github.com/googleapis/python-bigquery/issues/849)) ([7f7b1a8](https://www.github.com/googleapis/python-bigquery/commit/7f7b1a808d50558772a0deb534ca654da65d629e)) +* make the same `Table*` instances equal to each other ([#867](https://www.github.com/googleapis/python-bigquery/issues/867)) ([c1a3d44](https://www.github.com/googleapis/python-bigquery/commit/c1a3d4435739a21d25aa154145e36d3a7c42eeb6)) +* retry failed query jobs in `result()` ([#837](https://www.github.com/googleapis/python-bigquery/issues/837)) ([519d99c](https://www.github.com/googleapis/python-bigquery/commit/519d99c20e7d1101f76981f3de036fdf3c7a4ecc)) +* support `ScalarQueryParameterType` for `type_` argument in `ScalarQueryParameter` constructor ([#850](https://www.github.com/googleapis/python-bigquery/issues/850)) ([93d15e2](https://www.github.com/googleapis/python-bigquery/commit/93d15e2e5405c2cc6d158c4e5737361344193dbc)) + + +### Bug Fixes + +* make unicode characters working well in load_table_from_json ([#865](https://www.github.com/googleapis/python-bigquery/issues/865)) ([ad9c802](https://www.github.com/googleapis/python-bigquery/commit/ad9c8026f0e667f13dd754279f9dc40d06f4fa78)) + ### [2.23.3](https://www.github.com/googleapis/python-bigquery/compare/v2.23.2...v2.23.3) (2021-08-06) diff --git a/packages/google-cloud-bigquery/docs/conf.py b/packages/google-cloud-bigquery/docs/conf.py index 09f7ea414883..cb347160de42 100644 --- a/packages/google-cloud-bigquery/docs/conf.py +++ b/packages/google-cloud-bigquery/docs/conf.py @@ -110,7 +110,6 @@ # directories to ignore when looking for source files. exclude_patterns = [ "_build", - "**/.nox/**/*", "samples/AUTHORING_GUIDE.md", "samples/CONTRIBUTING.md", "samples/snippets/README.rst", diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/version.py b/packages/google-cloud-bigquery/google/cloud/bigquery/version.py index df992a051baf..84f6b4643038 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/version.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/version.py @@ -12,4 +12,4 @@ # See the License for the specific language governing permissions and # limitations under the License. -__version__ = "2.23.3" +__version__ = "2.24.0" From 01025da542c5aa126b23147786b14eaf108e857a Mon Sep 17 00:00:00 2001 From: WhiteSource Renovate Date: Thu, 12 Aug 2021 12:15:45 +0200 Subject: [PATCH 1258/2016] chore(deps): update dependency google-cloud-bigquery to v2.24.0 (#873) --- .../google-cloud-bigquery/samples/geography/requirements.txt | 2 +- .../google-cloud-bigquery/samples/snippets/requirements.txt | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/packages/google-cloud-bigquery/samples/geography/requirements.txt b/packages/google-cloud-bigquery/samples/geography/requirements.txt index d3e599101bb4..dfee339d426a 100644 --- a/packages/google-cloud-bigquery/samples/geography/requirements.txt +++ b/packages/google-cloud-bigquery/samples/geography/requirements.txt @@ -1,4 +1,4 @@ geojson==2.5.0 -google-cloud-bigquery==2.23.3 +google-cloud-bigquery==2.24.0 google-cloud-bigquery-storage==2.6.3 Shapely==1.7.1 diff --git a/packages/google-cloud-bigquery/samples/snippets/requirements.txt b/packages/google-cloud-bigquery/samples/snippets/requirements.txt index 1545ed96e2be..264899dff14d 100644 --- a/packages/google-cloud-bigquery/samples/snippets/requirements.txt +++ b/packages/google-cloud-bigquery/samples/snippets/requirements.txt @@ -1,4 +1,4 @@ -google-cloud-bigquery==2.23.3 +google-cloud-bigquery==2.24.0 google-cloud-bigquery-storage==2.6.3 google-auth-oauthlib==0.4.5 grpcio==1.39.0 From fe5d2518b88f304cce2dae0e892f69009784da55 Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Fri, 13 Aug 2021 02:00:19 -0500 Subject: [PATCH 1259/2016] test: refactor `list_rows` tests and add test for scalars (#829) * test: refactor `list_rows` tests and add test for scalars * fix JSON formatting * add TODO for INTERVAL Arrow support * format tests --- .../tests/data/scalars.jsonl | 4 +- .../tests/data/scalars_extreme.jsonl | 10 +- .../tests/data/scalars_schema.json | 54 +++++---- .../tests/system/test_arrow.py | 36 +++++- .../tests/system/test_client.py | 48 -------- .../tests/system/test_list_rows.py | 112 ++++++++++++++++++ 6 files changed, 181 insertions(+), 83 deletions(-) create mode 100644 packages/google-cloud-bigquery/tests/system/test_list_rows.py diff --git a/packages/google-cloud-bigquery/tests/data/scalars.jsonl b/packages/google-cloud-bigquery/tests/data/scalars.jsonl index 4419a6e9ab0d..e06139e5c848 100644 --- a/packages/google-cloud-bigquery/tests/data/scalars.jsonl +++ b/packages/google-cloud-bigquery/tests/data/scalars.jsonl @@ -1,2 +1,2 @@ -{"bool_col": true, "bytes_col": "abcd", "date_col": "2021-07-21", "datetime_col": "2021-07-21 11:39:45", "geography_col": "POINT(-122.0838511 37.3860517)", "int64_col": "123456789", "numeric_col": "1.23456789", "bignumeric_col": "10.111213141516171819", "float64_col": "1.25", "string_col": "Hello, World", "time_col": "11:41:43.07616", "timestamp_col": "2021-07-21T17:43:43.945289Z"} -{"bool_col": null, "bytes_col": null, "date_col": null, "datetime_col": null, "geography_col": null, "int64_col": null, "numeric_col": null, "bignumeric_col": null, "float64_col": null, "string_col": null, "time_col": null, "timestamp_col": null} +{"bool_col": true, "bytes_col": "SGVsbG8sIFdvcmxkIQ==", "date_col": "2021-07-21", "datetime_col": "2021-07-21 11:39:45", "geography_col": "POINT(-122.0838511 37.3860517)", "int64_col": "123456789", "interval_col": "P7Y11M9DT4H15M37.123456S", "numeric_col": "1.23456789", "bignumeric_col": "10.111213141516171819", "float64_col": "1.25", "rowindex": 0, "string_col": "Hello, World!", "time_col": "11:41:43.07616", "timestamp_col": "2021-07-21T17:43:43.945289Z"} +{"bool_col": null, "bytes_col": null, "date_col": null, "datetime_col": null, "geography_col": null, "int64_col": null, "interval_col": null, "numeric_col": null, "bignumeric_col": null, "float64_col": null, "rowindex": 1, "string_col": null, "time_col": null, "timestamp_col": null} diff --git a/packages/google-cloud-bigquery/tests/data/scalars_extreme.jsonl b/packages/google-cloud-bigquery/tests/data/scalars_extreme.jsonl index ceccd8dbcbc6..d0a33fdba066 100644 --- a/packages/google-cloud-bigquery/tests/data/scalars_extreme.jsonl +++ b/packages/google-cloud-bigquery/tests/data/scalars_extreme.jsonl @@ -1,5 +1,5 @@ -{"bool_col": true, "bytes_col": "DQo=\n", "date_col": "9999-12-31", "datetime_col": "9999-12-31 23:59:59.999999", "geography_col": "POINT(-135.0000 90.0000)", "int64_col": "9223372036854775807", "numeric_col": "9.9999999999999999999999999999999999999E+28", "bignumeric_col": "9.999999999999999999999999999999999999999999999999999999999999999999999999999E+37", "float64_col": "+inf", "string_col": "Hello, World", "time_col": "23:59:59.99999", "timestamp_col": "9999-12-31T23:59:59.999999Z"} -{"bool_col": false, "bytes_col": "8J+Zgw==\n", "date_col": "0001-01-01", "datetime_col": "0001-01-01 00:00:00", "geography_col": "POINT(45.0000 -90.0000)", "int64_col": "-9223372036854775808", "numeric_col": "-9.9999999999999999999999999999999999999E+28", "bignumeric_col": "-9.999999999999999999999999999999999999999999999999999999999999999999999999999E+37", "float64_col": "-inf", "string_col": "Hello, World", "time_col": "00:00:00", "timestamp_col": "0001-01-01T00:00:00.000000Z"} -{"bool_col": true, "bytes_col": "AA==\n", "date_col": "1900-01-01", "datetime_col": "1900-01-01 00:00:00", "geography_col": "POINT(-180.0000 0.0000)", "int64_col": "-1", "numeric_col": "0.000000001", "bignumeric_col": "-0.00000000000000000000000000000000000001", "float64_col": "nan", "string_col": "こんにちは", "time_col": "00:00:00.000001", "timestamp_col": "1900-01-01T00:00:00.000000Z"} -{"bool_col": false, "bytes_col": "", "date_col": "1970-01-01", "datetime_col": "1970-01-01 00:00:00", "geography_col": "POINT(0 0)", "int64_col": "0", "numeric_col": "0.0", "bignumeric_col": "0.0", "float64_col": 0.0, "string_col": "", "time_col": "12:00:00", "timestamp_col": "1970-01-01T00:00:00.000000Z"} -{"bool_col": null, "bytes_col": null, "date_col": null, "datetime_col": null, "geography_col": null, "int64_col": null, "numeric_col": null, "bignumeric_col": null, "float64_col": null, "string_col": null, "time_col": null, "timestamp_col": null} +{"bool_col": true, "bytes_col": "DQo=\n", "date_col": "9999-12-31", "datetime_col": "9999-12-31 23:59:59.999999", "geography_col": "POINT(-135.0000 90.0000)", "int64_col": "9223372036854775807", "interval_col": "P-10000Y0M-3660000DT-87840000H0M0S", "numeric_col": "9.9999999999999999999999999999999999999E+28", "bignumeric_col": "9.999999999999999999999999999999999999999999999999999999999999999999999999999E+37", "float64_col": "+inf", "rowindex": 0, "string_col": "Hello, World", "time_col": "23:59:59.999999", "timestamp_col": "9999-12-31T23:59:59.999999Z"} +{"bool_col": false, "bytes_col": "8J+Zgw==\n", "date_col": "0001-01-01", "datetime_col": "0001-01-01 00:00:00", "geography_col": "POINT(45.0000 -90.0000)", "int64_col": "-9223372036854775808", "interval_col": "P10000Y0M3660000DT87840000H0M0S", "numeric_col": "-9.9999999999999999999999999999999999999E+28", "bignumeric_col": "-9.999999999999999999999999999999999999999999999999999999999999999999999999999E+37", "float64_col": "-inf", "rowindex": 1, "string_col": "Hello, World", "time_col": "00:00:00", "timestamp_col": "0001-01-01T00:00:00.000000Z"} +{"bool_col": true, "bytes_col": "AA==\n", "date_col": "1900-01-01", "datetime_col": "1900-01-01 00:00:00", "geography_col": "POINT(-180.0000 0.0000)", "int64_col": "-1", "interval_col": "P0Y0M0DT0H0M0.000001S", "numeric_col": "0.000000001", "bignumeric_col": "-0.00000000000000000000000000000000000001", "float64_col": "nan", "rowindex": 2, "string_col": "こんにちは", "time_col": "00:00:00.000001", "timestamp_col": "1900-01-01T00:00:00.000000Z"} +{"bool_col": false, "bytes_col": "", "date_col": "1970-01-01", "datetime_col": "1970-01-01 00:00:00", "geography_col": "POINT(0 0)", "int64_col": "0", "interval_col": "P0Y0M0DT0H0M0S", "numeric_col": "0.0", "bignumeric_col": "0.0", "float64_col": 0.0, "rowindex": 3, "string_col": "", "time_col": "12:00:00", "timestamp_col": "1970-01-01T00:00:00.000000Z"} +{"bool_col": null, "bytes_col": null, "date_col": null, "datetime_col": null, "geography_col": null, "int64_col": null, "interval_col": null, "numeric_col": null, "bignumeric_col": null, "float64_col": null, "rowindex": 4, "string_col": null, "time_col": null, "timestamp_col": null} diff --git a/packages/google-cloud-bigquery/tests/data/scalars_schema.json b/packages/google-cloud-bigquery/tests/data/scalars_schema.json index 00bd150fd835..676d37d56141 100644 --- a/packages/google-cloud-bigquery/tests/data/scalars_schema.json +++ b/packages/google-cloud-bigquery/tests/data/scalars_schema.json @@ -1,33 +1,33 @@ [ { "mode": "NULLABLE", - "name": "timestamp_col", - "type": "TIMESTAMP" + "name": "bool_col", + "type": "BOOLEAN" }, { "mode": "NULLABLE", - "name": "time_col", - "type": "TIME" + "name": "bignumeric_col", + "type": "BIGNUMERIC" }, { "mode": "NULLABLE", - "name": "float64_col", - "type": "FLOAT" + "name": "bytes_col", + "type": "BYTES" }, { "mode": "NULLABLE", - "name": "datetime_col", - "type": "DATETIME" + "name": "date_col", + "type": "DATE" }, { "mode": "NULLABLE", - "name": "bignumeric_col", - "type": "BIGNUMERIC" + "name": "datetime_col", + "type": "DATETIME" }, { "mode": "NULLABLE", - "name": "numeric_col", - "type": "NUMERIC" + "name": "float64_col", + "type": "FLOAT" }, { "mode": "NULLABLE", @@ -36,27 +36,37 @@ }, { "mode": "NULLABLE", - "name": "date_col", - "type": "DATE" + "name": "int64_col", + "type": "INTEGER" }, { "mode": "NULLABLE", - "name": "string_col", - "type": "STRING" + "name": "interval_col", + "type": "INTERVAL" }, { "mode": "NULLABLE", - "name": "bool_col", - "type": "BOOLEAN" + "name": "numeric_col", + "type": "NUMERIC" + }, + { + "mode": "REQUIRED", + "name": "rowindex", + "type": "INTEGER" }, { "mode": "NULLABLE", - "name": "bytes_col", - "type": "BYTES" + "name": "string_col", + "type": "STRING" }, { "mode": "NULLABLE", - "name": "int64_col", - "type": "INTEGER" + "name": "time_col", + "type": "TIME" + }, + { + "mode": "NULLABLE", + "name": "timestamp_col", + "type": "TIMESTAMP" } ] diff --git a/packages/google-cloud-bigquery/tests/system/test_arrow.py b/packages/google-cloud-bigquery/tests/system/test_arrow.py index f97488e39b0c..12f7af9cb1d6 100644 --- a/packages/google-cloud-bigquery/tests/system/test_arrow.py +++ b/packages/google-cloud-bigquery/tests/system/test_arrow.py @@ -14,8 +14,14 @@ """System tests for Arrow connector.""" +from typing import Optional + import pytest +from google.cloud import bigquery +from google.cloud.bigquery import enums + + pyarrow = pytest.importorskip( "pyarrow", minversion="3.0.0" ) # Needs decimal256 for BIGNUMERIC columns. @@ -31,17 +37,35 @@ ), ) def test_list_rows_nullable_scalars_dtypes( - bigquery_client, - scalars_table, - scalars_extreme_table, - max_results, - scalars_table_name, + bigquery_client: bigquery.Client, + scalars_table: str, + scalars_extreme_table: str, + max_results: Optional[int], + scalars_table_name: str, ): table_id = scalars_table if scalars_table_name == "scalars_extreme_table": table_id = scalars_extreme_table + + # TODO(GH#836): Avoid INTERVAL columns until they are supported by the + # BigQuery Storage API and pyarrow. + schema = [ + bigquery.SchemaField("bool_col", enums.SqlTypeNames.BOOLEAN), + bigquery.SchemaField("bignumeric_col", enums.SqlTypeNames.BIGNUMERIC), + bigquery.SchemaField("bytes_col", enums.SqlTypeNames.BYTES), + bigquery.SchemaField("date_col", enums.SqlTypeNames.DATE), + bigquery.SchemaField("datetime_col", enums.SqlTypeNames.DATETIME), + bigquery.SchemaField("float64_col", enums.SqlTypeNames.FLOAT64), + bigquery.SchemaField("geography_col", enums.SqlTypeNames.GEOGRAPHY), + bigquery.SchemaField("int64_col", enums.SqlTypeNames.INT64), + bigquery.SchemaField("numeric_col", enums.SqlTypeNames.NUMERIC), + bigquery.SchemaField("string_col", enums.SqlTypeNames.STRING), + bigquery.SchemaField("time_col", enums.SqlTypeNames.TIME), + bigquery.SchemaField("timestamp_col", enums.SqlTypeNames.TIMESTAMP), + ] + arrow_table = bigquery_client.list_rows( - table_id, max_results=max_results, + table_id, max_results=max_results, selected_fields=schema, ).to_arrow() schema = arrow_table.schema diff --git a/packages/google-cloud-bigquery/tests/system/test_client.py b/packages/google-cloud-bigquery/tests/system/test_client.py index f540611a63c1..06ef40126b23 100644 --- a/packages/google-cloud-bigquery/tests/system/test_client.py +++ b/packages/google-cloud-bigquery/tests/system/test_client.py @@ -2428,54 +2428,6 @@ def test_nested_table_to_arrow(self): self.assertTrue(pyarrow.types.is_list(record_col[1].type)) self.assertTrue(pyarrow.types.is_int64(record_col[1].type.value_type)) - def test_list_rows_empty_table(self): - from google.cloud.bigquery.table import RowIterator - - dataset_id = _make_dataset_id("empty_table") - dataset = self.temp_dataset(dataset_id) - table_ref = dataset.table("empty_table") - table = Config.CLIENT.create_table(bigquery.Table(table_ref)) - - # It's a bit silly to list rows for an empty table, but this does - # happen as the result of a DDL query from an IPython magic command. - rows = Config.CLIENT.list_rows(table) - self.assertIsInstance(rows, RowIterator) - self.assertEqual(tuple(rows), ()) - - def test_list_rows_page_size(self): - from google.cloud.bigquery.job import SourceFormat - from google.cloud.bigquery.job import WriteDisposition - - num_items = 7 - page_size = 3 - num_pages, num_last_page = divmod(num_items, page_size) - - SF = bigquery.SchemaField - schema = [SF("string_col", "STRING", mode="NULLABLE")] - to_insert = [{"string_col": "item%d" % i} for i in range(num_items)] - rows = [json.dumps(row) for row in to_insert] - body = io.BytesIO("{}\n".format("\n".join(rows)).encode("ascii")) - - table_id = "test_table" - dataset = self.temp_dataset(_make_dataset_id("nested_df")) - table = dataset.table(table_id) - self.to_delete.insert(0, table) - job_config = bigquery.LoadJobConfig() - job_config.write_disposition = WriteDisposition.WRITE_TRUNCATE - job_config.source_format = SourceFormat.NEWLINE_DELIMITED_JSON - job_config.schema = schema - # Load a table using a local JSON file from memory. - Config.CLIENT.load_table_from_file(body, table, job_config=job_config).result() - - df = Config.CLIENT.list_rows(table, selected_fields=schema, page_size=page_size) - pages = df.pages - - for i in range(num_pages): - page = next(pages) - self.assertEqual(page.num_items, page_size) - page = next(pages) - self.assertEqual(page.num_items, num_last_page) - def temp_dataset(self, dataset_id, location=None): project = Config.CLIENT.project dataset_ref = bigquery.DatasetReference(project, dataset_id) diff --git a/packages/google-cloud-bigquery/tests/system/test_list_rows.py b/packages/google-cloud-bigquery/tests/system/test_list_rows.py new file mode 100644 index 000000000000..70388059ee5a --- /dev/null +++ b/packages/google-cloud-bigquery/tests/system/test_list_rows.py @@ -0,0 +1,112 @@ +# Copyright 2021 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import datetime +import decimal + +from google.cloud import bigquery +from google.cloud.bigquery import enums + + +def test_list_rows_empty_table(bigquery_client: bigquery.Client, table_id: str): + from google.cloud.bigquery.table import RowIterator + + table = bigquery_client.create_table(table_id) + + # It's a bit silly to list rows for an empty table, but this does + # happen as the result of a DDL query from an IPython magic command. + rows = bigquery_client.list_rows(table) + assert isinstance(rows, RowIterator) + assert tuple(rows) == () + + +def test_list_rows_page_size(bigquery_client: bigquery.Client, table_id: str): + num_items = 7 + page_size = 3 + num_pages, num_last_page = divmod(num_items, page_size) + + to_insert = [{"string_col": "item%d" % i, "rowindex": i} for i in range(num_items)] + bigquery_client.load_table_from_json(to_insert, table_id).result() + + df = bigquery_client.list_rows( + table_id, + selected_fields=[bigquery.SchemaField("string_col", enums.SqlTypeNames.STRING)], + page_size=page_size, + ) + pages = df.pages + + for i in range(num_pages): + page = next(pages) + assert page.num_items == page_size + page = next(pages) + assert page.num_items == num_last_page + + +def test_list_rows_scalars(bigquery_client: bigquery.Client, scalars_table: str): + rows = sorted( + bigquery_client.list_rows(scalars_table), key=lambda row: row["rowindex"] + ) + row = rows[0] + assert row["bool_col"] # True + assert row["bytes_col"] == b"Hello, World!" + assert row["date_col"] == datetime.date(2021, 7, 21) + assert row["datetime_col"] == datetime.datetime(2021, 7, 21, 11, 39, 45) + assert row["geography_col"] == "POINT(-122.0838511 37.3860517)" + assert row["int64_col"] == 123456789 + assert row["numeric_col"] == decimal.Decimal("1.23456789") + assert row["bignumeric_col"] == decimal.Decimal("10.111213141516171819") + assert row["float64_col"] == 1.25 + assert row["string_col"] == "Hello, World!" + assert row["time_col"] == datetime.time(11, 41, 43, 76160) + assert row["timestamp_col"] == datetime.datetime( + 2021, 7, 21, 17, 43, 43, 945289, tzinfo=datetime.timezone.utc + ) + + nullrow = rows[1] + for column, value in nullrow.items(): + if column == "rowindex": + assert value == 1 + else: + assert value is None + + +def test_list_rows_scalars_extreme( + bigquery_client: bigquery.Client, scalars_extreme_table: str +): + rows = sorted( + bigquery_client.list_rows(scalars_extreme_table), + key=lambda row: row["rowindex"], + ) + row = rows[0] + assert row["bool_col"] # True + assert row["bytes_col"] == b"\r\n" + assert row["date_col"] == datetime.date(9999, 12, 31) + assert row["datetime_col"] == datetime.datetime(9999, 12, 31, 23, 59, 59, 999999) + assert row["geography_col"] == "POINT(-135 90)" + assert row["int64_col"] == 9223372036854775807 + assert row["numeric_col"] == decimal.Decimal(f"9.{'9' * 37}E+28") + assert row["bignumeric_col"] == decimal.Decimal(f"9.{'9' * 75}E+37") + assert row["float64_col"] == float("Inf") + assert row["string_col"] == "Hello, World" + assert row["time_col"] == datetime.time(23, 59, 59, 999999) + assert row["timestamp_col"] == datetime.datetime( + 9999, 12, 31, 23, 59, 59, 999999, tzinfo=datetime.timezone.utc + ) + + nullrow = rows[4] + for column, value in nullrow.items(): + if column == "rowindex": + assert value == 4 + else: + assert value is None From 4e4b89fb875c3563a143e296096f02fee9c43717 Mon Sep 17 00:00:00 2001 From: "gcf-owl-bot[bot]" <78513119+gcf-owl-bot[bot]@users.noreply.github.com> Date: Fri, 13 Aug 2021 11:17:31 -0400 Subject: [PATCH 1260/2016] chore: drop mention of Python 2.7 from templates (#877) Source-Link: https://github.com/googleapis/synthtool/commit/facee4cc1ea096cd8bcc008bb85929daa7c414c0 Post-Processor: gcr.io/repo-automation-bots/owlbot-python:latest@sha256:9743664022bd63a8084be67f144898314c7ca12f0a03e422ac17c733c129d803 Co-authored-by: Owl Bot --- packages/google-cloud-bigquery/.github/.OwlBot.lock.yaml | 2 +- packages/google-cloud-bigquery/docs/conf.py | 1 + packages/google-cloud-bigquery/samples/geography/noxfile.py | 6 +++--- packages/google-cloud-bigquery/samples/snippets/noxfile.py | 6 +++--- .../scripts/readme-gen/templates/install_deps.tmpl.rst | 2 +- 5 files changed, 9 insertions(+), 8 deletions(-) diff --git a/packages/google-cloud-bigquery/.github/.OwlBot.lock.yaml b/packages/google-cloud-bigquery/.github/.OwlBot.lock.yaml index 649877dc494c..a9fcd07cc43b 100644 --- a/packages/google-cloud-bigquery/.github/.OwlBot.lock.yaml +++ b/packages/google-cloud-bigquery/.github/.OwlBot.lock.yaml @@ -1,3 +1,3 @@ docker: image: gcr.io/repo-automation-bots/owlbot-python:latest - digest: sha256:50e35228649c47b6ca82aa0be3ff9eb2afce51c82b66c4a03fe4afeb5ff6c0fc + digest: sha256:9743664022bd63a8084be67f144898314c7ca12f0a03e422ac17c733c129d803 diff --git a/packages/google-cloud-bigquery/docs/conf.py b/packages/google-cloud-bigquery/docs/conf.py index cb347160de42..09f7ea414883 100644 --- a/packages/google-cloud-bigquery/docs/conf.py +++ b/packages/google-cloud-bigquery/docs/conf.py @@ -110,6 +110,7 @@ # directories to ignore when looking for source files. exclude_patterns = [ "_build", + "**/.nox/**/*", "samples/AUTHORING_GUIDE.md", "samples/CONTRIBUTING.md", "samples/snippets/README.rst", diff --git a/packages/google-cloud-bigquery/samples/geography/noxfile.py b/packages/google-cloud-bigquery/samples/geography/noxfile.py index 7dbea091476d..b008613f03ff 100644 --- a/packages/google-cloud-bigquery/samples/geography/noxfile.py +++ b/packages/google-cloud-bigquery/samples/geography/noxfile.py @@ -39,7 +39,7 @@ TEST_CONFIG = { # You can opt out from the test for specific Python versions. - "ignored_versions": ["2.7"], + "ignored_versions": [], # Old samples are opted out of enforcing Python type hints # All new samples should feature them "enforce_type_hints": False, @@ -86,8 +86,8 @@ def get_pytest_env_vars() -> Dict[str, str]: # DO NOT EDIT - automatically generated. -# All versions used to tested samples. -ALL_VERSIONS = ["2.7", "3.6", "3.7", "3.8", "3.9"] +# All versions used to test samples. +ALL_VERSIONS = ["3.6", "3.7", "3.8", "3.9"] # Any default versions that should be ignored. IGNORED_VERSIONS = TEST_CONFIG["ignored_versions"] diff --git a/packages/google-cloud-bigquery/samples/snippets/noxfile.py b/packages/google-cloud-bigquery/samples/snippets/noxfile.py index 7dbea091476d..b008613f03ff 100644 --- a/packages/google-cloud-bigquery/samples/snippets/noxfile.py +++ b/packages/google-cloud-bigquery/samples/snippets/noxfile.py @@ -39,7 +39,7 @@ TEST_CONFIG = { # You can opt out from the test for specific Python versions. - "ignored_versions": ["2.7"], + "ignored_versions": [], # Old samples are opted out of enforcing Python type hints # All new samples should feature them "enforce_type_hints": False, @@ -86,8 +86,8 @@ def get_pytest_env_vars() -> Dict[str, str]: # DO NOT EDIT - automatically generated. -# All versions used to tested samples. -ALL_VERSIONS = ["2.7", "3.6", "3.7", "3.8", "3.9"] +# All versions used to test samples. +ALL_VERSIONS = ["3.6", "3.7", "3.8", "3.9"] # Any default versions that should be ignored. IGNORED_VERSIONS = TEST_CONFIG["ignored_versions"] diff --git a/packages/google-cloud-bigquery/scripts/readme-gen/templates/install_deps.tmpl.rst b/packages/google-cloud-bigquery/scripts/readme-gen/templates/install_deps.tmpl.rst index a0406dba8c84..275d649890d7 100644 --- a/packages/google-cloud-bigquery/scripts/readme-gen/templates/install_deps.tmpl.rst +++ b/packages/google-cloud-bigquery/scripts/readme-gen/templates/install_deps.tmpl.rst @@ -12,7 +12,7 @@ Install Dependencies .. _Python Development Environment Setup Guide: https://cloud.google.com/python/setup -#. Create a virtualenv. Samples are compatible with Python 2.7 and 3.4+. +#. Create a virtualenv. Samples are compatible with Python 3.6+. .. code-block:: bash From 44ee9bf93969e701642639701b00f11e3cb9f408 Mon Sep 17 00:00:00 2001 From: Bu Sun Kim <8822365+busunkim96@users.noreply.github.com> Date: Fri, 13 Aug 2021 12:54:09 -0600 Subject: [PATCH 1261/2016] fix: remove pytz dependency and require pyarrow>=3.0.0 (#875) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * fix: remove pytz dependency * 🦉 Updates from OwlBot See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md * fix(deps): require pyarrow>=3.0.0 * remove version check for pyarrow * require pyarrow 3.0 in pandas extra * remove _BIGNUMERIC_SUPPORT references from tests Co-authored-by: Owl Bot Co-authored-by: Dina Graves Portman Co-authored-by: Tim Swast --- .../google-cloud-bigquery/docs/snippets.py | 5 +- .../google/cloud/bigquery/_pandas_helpers.py | 17 ++--- .../google/cloud/bigquery/table.py | 3 +- .../client_query_w_timestamp_params.py | 3 +- packages/google-cloud-bigquery/setup.py | 4 +- .../testing/constraints-3.6.txt | 2 +- .../tests/system/test_client.py | 14 ++-- .../tests/system/test_pandas.py | 42 +++++------ .../tests/unit/job/test_base.py | 4 +- .../tests/unit/test__pandas_helpers.py | 69 +++++++------------ .../tests/unit/test_client.py | 21 ++++-- .../tests/unit/test_table.py | 17 ++--- 12 files changed, 78 insertions(+), 123 deletions(-) diff --git a/packages/google-cloud-bigquery/docs/snippets.py b/packages/google-cloud-bigquery/docs/snippets.py index 3f9b9a88c7e2..c62001fc0317 100644 --- a/packages/google-cloud-bigquery/docs/snippets.py +++ b/packages/google-cloud-bigquery/docs/snippets.py @@ -363,7 +363,6 @@ def test_update_table_expiration(client, to_delete): # [START bigquery_update_table_expiration] import datetime - import pytz # from google.cloud import bigquery # client = bigquery.Client() @@ -375,7 +374,9 @@ def test_update_table_expiration(client, to_delete): assert table.expires is None # set table to expire 5 days from now - expiration = datetime.datetime.now(pytz.utc) + datetime.timedelta(days=5) + expiration = datetime.datetime.now(datetime.timezone.utc) + datetime.timedelta( + days=5 + ) table.expires = expiration table = client.update_table(table, ["expires"]) # API request diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/_pandas_helpers.py b/packages/google-cloud-bigquery/google/cloud/bigquery/_pandas_helpers.py index b381fa5f7aa1..f49980645d47 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/_pandas_helpers.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/_pandas_helpers.py @@ -20,8 +20,6 @@ import queue import warnings -from packaging import version - try: import pandas except ImportError: # pragma: NO COVER @@ -110,6 +108,7 @@ def pyarrow_timestamp(): # This dictionary is duplicated in bigquery_storage/test/unite/test_reader.py # When modifying it be sure to update it there as well. BQ_TO_ARROW_SCALARS = { + "BIGNUMERIC": pyarrow_bignumeric, "BOOL": pyarrow.bool_, "BOOLEAN": pyarrow.bool_, "BYTES": pyarrow.binary, @@ -146,23 +145,15 @@ def pyarrow_timestamp(): pyarrow.date64().id: "DATETIME", # because millisecond resolution pyarrow.binary().id: "BYTES", pyarrow.string().id: "STRING", # also alias for pyarrow.utf8() - # The exact scale and precision don't matter, see below. - pyarrow.decimal128(38, scale=9).id: "NUMERIC", - } - - if version.parse(pyarrow.__version__) >= version.parse("3.0.0"): - BQ_TO_ARROW_SCALARS["BIGNUMERIC"] = pyarrow_bignumeric # The exact decimal's scale and precision are not important, as only # the type ID matters, and it's the same for all decimal256 instances. - ARROW_SCALAR_IDS_TO_BQ[pyarrow.decimal256(76, scale=38).id] = "BIGNUMERIC" - _BIGNUMERIC_SUPPORT = True - else: - _BIGNUMERIC_SUPPORT = False + pyarrow.decimal128(38, scale=9).id: "NUMERIC", + pyarrow.decimal256(76, scale=38).id: "BIGNUMERIC", + } else: # pragma: NO COVER BQ_TO_ARROW_SCALARS = {} # pragma: NO COVER ARROW_SCALAR_IDS_TO_BQ = {} # pragma: NO_COVER - _BIGNUMERIC_SUPPORT = False # pragma: NO COVER def bq_to_arrow_struct_data_type(field): diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py index d23885ebf472..62f8880018c5 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py @@ -20,7 +20,6 @@ import datetime import functools import operator -import pytz import typing from typing import Any, Dict, Iterable, Iterator, Optional, Tuple import warnings @@ -1969,7 +1968,7 @@ def to_dataframe( # Pandas, we set the timestamp_as_object parameter to True, if necessary. types_to_check = { pyarrow.timestamp("us"), - pyarrow.timestamp("us", tz=pytz.UTC), + pyarrow.timestamp("us", tz=datetime.timezone.utc), } for column in record_batch: diff --git a/packages/google-cloud-bigquery/samples/client_query_w_timestamp_params.py b/packages/google-cloud-bigquery/samples/client_query_w_timestamp_params.py index ca8eec0b5e89..41a27770e771 100644 --- a/packages/google-cloud-bigquery/samples/client_query_w_timestamp_params.py +++ b/packages/google-cloud-bigquery/samples/client_query_w_timestamp_params.py @@ -18,7 +18,6 @@ def client_query_w_timestamp_params(): # [START bigquery_query_params_timestamps] import datetime - import pytz from google.cloud import bigquery # Construct a BigQuery client object. @@ -30,7 +29,7 @@ def client_query_w_timestamp_params(): bigquery.ScalarQueryParameter( "ts_value", "TIMESTAMP", - datetime.datetime(2016, 12, 7, 8, 0, tzinfo=pytz.UTC), + datetime.datetime(2016, 12, 7, 8, 0, tzinfo=datetime.timezone.utc), ) ] ) diff --git a/packages/google-cloud-bigquery/setup.py b/packages/google-cloud-bigquery/setup.py index e9deaf117a91..a1b3b61a01f4 100644 --- a/packages/google-cloud-bigquery/setup.py +++ b/packages/google-cloud-bigquery/setup.py @@ -54,9 +54,9 @@ # grpc.Channel.close() method isn't added until 1.32.0. # https://github.com/grpc/grpc/pull/15254 "grpcio >= 1.38.1, < 2.0dev", - "pyarrow >= 1.0.0, < 6.0dev", + "pyarrow >= 3.0.0, < 6.0dev", ], - "pandas": ["pandas>=0.23.0", "pyarrow >= 1.0.0, < 6.0dev"], + "pandas": ["pandas>=0.23.0", "pyarrow >= 3.0.0, < 6.0dev"], "bignumeric_type": ["pyarrow >= 3.0.0, < 6.0dev"], "tqdm": ["tqdm >= 4.7.4, <5.0.0dev"], "opentelemetry": [ diff --git a/packages/google-cloud-bigquery/testing/constraints-3.6.txt b/packages/google-cloud-bigquery/testing/constraints-3.6.txt index af6e82efd9a3..ce012f0d7b69 100644 --- a/packages/google-cloud-bigquery/testing/constraints-3.6.txt +++ b/packages/google-cloud-bigquery/testing/constraints-3.6.txt @@ -16,7 +16,7 @@ opentelemetry-sdk==0.11b0 pandas==0.23.0 proto-plus==1.10.0 protobuf==3.12.0 -pyarrow==1.0.0 +pyarrow==3.0.0 requests==2.18.0 six==1.13.0 tqdm==4.7.4 diff --git a/packages/google-cloud-bigquery/tests/system/test_client.py b/packages/google-cloud-bigquery/tests/system/test_client.py index 06ef40126b23..4250111b4fed 100644 --- a/packages/google-cloud-bigquery/tests/system/test_client.py +++ b/packages/google-cloud-bigquery/tests/system/test_client.py @@ -30,7 +30,6 @@ import psutil import pytest -from google.cloud.bigquery._pandas_helpers import _BIGNUMERIC_SUPPORT from . import helpers try: @@ -1972,15 +1971,12 @@ def test_query_w_query_params(self): "expected": {"friends": [phred_name, bharney_name]}, "query_parameters": [with_friends_param], }, + { + "sql": "SELECT @bignum_param", + "expected": bignum, + "query_parameters": [bignum_param], + }, ] - if _BIGNUMERIC_SUPPORT: - examples.append( - { - "sql": "SELECT @bignum_param", - "expected": bignum, - "query_parameters": [bignum_param], - } - ) for example in examples: jconfig = QueryJobConfig() diff --git a/packages/google-cloud-bigquery/tests/system/test_pandas.py b/packages/google-cloud-bigquery/tests/system/test_pandas.py index 821b375e1969..371dcea71359 100644 --- a/packages/google-cloud-bigquery/tests/system/test_pandas.py +++ b/packages/google-cloud-bigquery/tests/system/test_pandas.py @@ -24,10 +24,8 @@ import google.api_core.retry import pkg_resources import pytest -import pytz from google.cloud import bigquery -from google.cloud.bigquery._pandas_helpers import _BIGNUMERIC_SUPPORT from . import helpers @@ -64,7 +62,7 @@ def test_load_table_from_dataframe_w_automatic_schema(bigquery_client, dataset_i datetime.datetime(2012, 3, 14, 15, 16), ], dtype="datetime64[ns]", - ).dt.tz_localize(pytz.utc), + ).dt.tz_localize(datetime.timezone.utc), ), ( "dt_col", @@ -189,12 +187,11 @@ def test_load_table_from_dataframe_w_nulls(bigquery_client, dataset_id): bigquery.SchemaField("geo_col", "GEOGRAPHY"), bigquery.SchemaField("int_col", "INTEGER"), bigquery.SchemaField("num_col", "NUMERIC"), + bigquery.SchemaField("bignum_col", "BIGNUMERIC"), bigquery.SchemaField("str_col", "STRING"), bigquery.SchemaField("time_col", "TIME"), bigquery.SchemaField("ts_col", "TIMESTAMP"), ) - if _BIGNUMERIC_SUPPORT: - scalars_schema += (bigquery.SchemaField("bignum_col", "BIGNUMERIC"),) table_schema = scalars_schema + ( # TODO: Array columns can't be read due to NULLABLE versus REPEATED @@ -216,12 +213,11 @@ def test_load_table_from_dataframe_w_nulls(bigquery_client, dataset_id): ("geo_col", nulls), ("int_col", nulls), ("num_col", nulls), + ("bignum_col", nulls), ("str_col", nulls), ("time_col", nulls), ("ts_col", nulls), ] - if _BIGNUMERIC_SUPPORT: - df_data.append(("bignum_col", nulls)) df_data = collections.OrderedDict(df_data) dataframe = pandas.DataFrame(df_data, columns=df_data.keys()) @@ -297,12 +293,11 @@ def test_load_table_from_dataframe_w_explicit_schema(bigquery_client, dataset_id bigquery.SchemaField("geo_col", "GEOGRAPHY"), bigquery.SchemaField("int_col", "INTEGER"), bigquery.SchemaField("num_col", "NUMERIC"), + bigquery.SchemaField("bignum_col", "BIGNUMERIC"), bigquery.SchemaField("str_col", "STRING"), bigquery.SchemaField("time_col", "TIME"), bigquery.SchemaField("ts_col", "TIMESTAMP"), ) - if _BIGNUMERIC_SUPPORT: - scalars_schema += (bigquery.SchemaField("bignum_col", "BIGNUMERIC"),) table_schema = scalars_schema + ( # TODO: Array columns can't be read due to NULLABLE versus REPEATED @@ -340,6 +335,14 @@ def test_load_table_from_dataframe_w_explicit_schema(bigquery_client, dataset_id decimal.Decimal("99999999999999999999999999999.999999999"), ], ), + ( + "bignum_col", + [ + decimal.Decimal("-{d38}.{d38}".format(d38="9" * 38)), + None, + decimal.Decimal("{d38}.{d38}".format(d38="9" * 38)), + ], + ), ("str_col", ["abc", None, "def"]), ( "time_col", @@ -348,23 +351,14 @@ def test_load_table_from_dataframe_w_explicit_schema(bigquery_client, dataset_id ( "ts_col", [ - datetime.datetime(1, 1, 1, 0, 0, 0, tzinfo=pytz.utc), + datetime.datetime(1, 1, 1, 0, 0, 0, tzinfo=datetime.timezone.utc), None, - datetime.datetime(9999, 12, 31, 23, 59, 59, 999999, tzinfo=pytz.utc), + datetime.datetime( + 9999, 12, 31, 23, 59, 59, 999999, tzinfo=datetime.timezone.utc + ), ], ), ] - if _BIGNUMERIC_SUPPORT: - df_data.append( - ( - "bignum_col", - [ - decimal.Decimal("-{d38}.{d38}".format(d38="9" * 38)), - None, - decimal.Decimal("{d38}.{d38}".format(d38="9" * 38)), - ], - ) - ) df_data = collections.OrderedDict(df_data) dataframe = pandas.DataFrame(df_data, dtype="object", columns=df_data.keys()) @@ -484,10 +478,10 @@ def test_load_table_from_dataframe_w_explicit_schema_source_format_csv( ( "ts_col", [ - datetime.datetime(1, 1, 1, 0, 0, 0, tzinfo=pytz.utc), + datetime.datetime(1, 1, 1, 0, 0, 0, tzinfo=datetime.timezone.utc), None, datetime.datetime( - 9999, 12, 31, 23, 59, 59, 999999, tzinfo=pytz.utc + 9999, 12, 31, 23, 59, 59, 999999, tzinfo=datetime.timezone.utc ), ], ), diff --git a/packages/google-cloud-bigquery/tests/unit/job/test_base.py b/packages/google-cloud-bigquery/tests/unit/job/test_base.py index 0ac1d05b5363..c3f7854e349e 100644 --- a/packages/google-cloud-bigquery/tests/unit/job/test_base.py +++ b/packages/google-cloud-bigquery/tests/unit/job/test_base.py @@ -295,11 +295,11 @@ def test_user_email(self): @staticmethod def _datetime_and_millis(): import datetime - import pytz from google.cloud._helpers import _millis now = datetime.datetime.utcnow().replace( - microsecond=123000, tzinfo=pytz.UTC # stats timestamps have ms precision + microsecond=123000, + tzinfo=datetime.timezone.utc, # stats timestamps have ms precision ) return now, _millis(now) diff --git a/packages/google-cloud-bigquery/tests/unit/test__pandas_helpers.py b/packages/google-cloud-bigquery/tests/unit/test__pandas_helpers.py index 0ba671cd987e..b9cb56572cfe 100644 --- a/packages/google-cloud-bigquery/tests/unit/test__pandas_helpers.py +++ b/packages/google-cloud-bigquery/tests/unit/test__pandas_helpers.py @@ -37,12 +37,10 @@ # used in test parameterization. pyarrow = mock.Mock() import pytest -import pytz from google import api_core from google.cloud.bigquery import _helpers from google.cloud.bigquery import schema -from google.cloud.bigquery._pandas_helpers import _BIGNUMERIC_SUPPORT try: from google.cloud import bigquery_storage @@ -60,11 +58,6 @@ PANDAS_INSTALLED_VERSION = pkg_resources.parse_version("0.0.0") -skip_if_no_bignumeric = pytest.mark.skipif( - not _BIGNUMERIC_SUPPORT, reason="BIGNUMERIC support requires pyarrow>=3.0.0", -) - - @pytest.fixture def module_under_test(): from google.cloud.bigquery import _pandas_helpers @@ -153,9 +146,7 @@ def test_all_(): ("FLOAT", "NULLABLE", pyarrow.types.is_float64), ("FLOAT64", "NULLABLE", pyarrow.types.is_float64), ("NUMERIC", "NULLABLE", is_numeric), - pytest.param( - "BIGNUMERIC", "NULLABLE", is_bignumeric, marks=skip_if_no_bignumeric, - ), + ("BIGNUMERIC", "NULLABLE", is_bignumeric), ("BOOLEAN", "NULLABLE", pyarrow.types.is_boolean), ("BOOL", "NULLABLE", pyarrow.types.is_boolean), ("TIMESTAMP", "NULLABLE", is_timestamp), @@ -234,11 +225,10 @@ def test_all_(): "REPEATED", all_(pyarrow.types.is_list, lambda type_: is_numeric(type_.value_type)), ), - pytest.param( + ( "BIGNUMERIC", "REPEATED", all_(pyarrow.types.is_list, lambda type_: is_bignumeric(type_.value_type)), - marks=skip_if_no_bignumeric, ), ( "BOOLEAN", @@ -312,6 +302,7 @@ def test_bq_to_arrow_data_type_w_struct(module_under_test, bq_type): schema.SchemaField("field05", "FLOAT"), schema.SchemaField("field06", "FLOAT64"), schema.SchemaField("field07", "NUMERIC"), + schema.SchemaField("field08", "BIGNUMERIC"), schema.SchemaField("field09", "BOOLEAN"), schema.SchemaField("field10", "BOOL"), schema.SchemaField("field11", "TIMESTAMP"), @@ -321,9 +312,6 @@ def test_bq_to_arrow_data_type_w_struct(module_under_test, bq_type): schema.SchemaField("field15", "GEOGRAPHY"), ) - if _BIGNUMERIC_SUPPORT: - fields += (schema.SchemaField("field08", "BIGNUMERIC"),) - field = schema.SchemaField("ignored_name", bq_type, mode="NULLABLE", fields=fields) actual = module_under_test.bq_to_arrow_data_type(field) @@ -335,6 +323,7 @@ def test_bq_to_arrow_data_type_w_struct(module_under_test, bq_type): pyarrow.field("field05", pyarrow.float64()), pyarrow.field("field06", pyarrow.float64()), pyarrow.field("field07", module_under_test.pyarrow_numeric()), + pyarrow.field("field08", module_under_test.pyarrow_bignumeric()), pyarrow.field("field09", pyarrow.bool_()), pyarrow.field("field10", pyarrow.bool_()), pyarrow.field("field11", module_under_test.pyarrow_timestamp()), @@ -343,8 +332,6 @@ def test_bq_to_arrow_data_type_w_struct(module_under_test, bq_type): pyarrow.field("field14", module_under_test.pyarrow_datetime()), pyarrow.field("field15", pyarrow.string()), ) - if _BIGNUMERIC_SUPPORT: - expected += (pyarrow.field("field08", module_under_test.pyarrow_bignumeric()),) expected = pyarrow.struct(expected) assert pyarrow.types.is_struct(actual) @@ -363,6 +350,7 @@ def test_bq_to_arrow_data_type_w_array_struct(module_under_test, bq_type): schema.SchemaField("field05", "FLOAT"), schema.SchemaField("field06", "FLOAT64"), schema.SchemaField("field07", "NUMERIC"), + schema.SchemaField("field08", "BIGNUMERIC"), schema.SchemaField("field09", "BOOLEAN"), schema.SchemaField("field10", "BOOL"), schema.SchemaField("field11", "TIMESTAMP"), @@ -372,9 +360,6 @@ def test_bq_to_arrow_data_type_w_array_struct(module_under_test, bq_type): schema.SchemaField("field15", "GEOGRAPHY"), ) - if _BIGNUMERIC_SUPPORT: - fields += (schema.SchemaField("field08", "BIGNUMERIC"),) - field = schema.SchemaField("ignored_name", bq_type, mode="REPEATED", fields=fields) actual = module_under_test.bq_to_arrow_data_type(field) @@ -386,6 +371,7 @@ def test_bq_to_arrow_data_type_w_array_struct(module_under_test, bq_type): pyarrow.field("field05", pyarrow.float64()), pyarrow.field("field06", pyarrow.float64()), pyarrow.field("field07", module_under_test.pyarrow_numeric()), + pyarrow.field("field08", module_under_test.pyarrow_bignumeric()), pyarrow.field("field09", pyarrow.bool_()), pyarrow.field("field10", pyarrow.bool_()), pyarrow.field("field11", module_under_test.pyarrow_timestamp()), @@ -394,8 +380,6 @@ def test_bq_to_arrow_data_type_w_array_struct(module_under_test, bq_type): pyarrow.field("field14", module_under_test.pyarrow_datetime()), pyarrow.field("field15", pyarrow.string()), ) - if _BIGNUMERIC_SUPPORT: - expected += (pyarrow.field("field08", module_under_test.pyarrow_bignumeric()),) expected_value_type = pyarrow.struct(expected) assert pyarrow.types.is_list(actual) @@ -441,7 +425,7 @@ def test_bq_to_arrow_data_type_w_struct_unknown_subfield(module_under_test): decimal.Decimal("999.123456789"), ], ), - pytest.param( + ( "BIGNUMERIC", [ decimal.Decimal("-{d38}.{d38}".format(d38="9" * 38)), @@ -449,17 +433,18 @@ def test_bq_to_arrow_data_type_w_struct_unknown_subfield(module_under_test): decimal.Decimal("{d38}.{d38}".format(d38="9" * 38)), decimal.Decimal("3.141592653589793238462643383279"), ], - marks=skip_if_no_bignumeric, ), ("BOOLEAN", [True, None, False, None]), ("BOOL", [False, None, True, None]), ( "TIMESTAMP", [ - datetime.datetime(1, 1, 1, 0, 0, 0, tzinfo=pytz.utc), + datetime.datetime(1, 1, 1, 0, 0, 0, tzinfo=datetime.timezone.utc), None, - datetime.datetime(9999, 12, 31, 23, 59, 59, 999999, tzinfo=pytz.utc), - datetime.datetime(1970, 1, 1, 0, 0, 0, tzinfo=pytz.utc), + datetime.datetime( + 9999, 12, 31, 23, 59, 59, 999999, tzinfo=datetime.timezone.utc + ), + datetime.datetime(1970, 1, 1, 0, 0, 0, tzinfo=datetime.timezone.utc), ], ), ( @@ -938,6 +923,7 @@ def test_dataframe_to_arrow_with_required_fields(module_under_test): schema.SchemaField("field05", "FLOAT", mode="REQUIRED"), schema.SchemaField("field06", "FLOAT64", mode="REQUIRED"), schema.SchemaField("field07", "NUMERIC", mode="REQUIRED"), + schema.SchemaField("field08", "BIGNUMERIC", mode="REQUIRED"), schema.SchemaField("field09", "BOOLEAN", mode="REQUIRED"), schema.SchemaField("field10", "BOOL", mode="REQUIRED"), schema.SchemaField("field11", "TIMESTAMP", mode="REQUIRED"), @@ -946,8 +932,6 @@ def test_dataframe_to_arrow_with_required_fields(module_under_test): schema.SchemaField("field14", "DATETIME", mode="REQUIRED"), schema.SchemaField("field15", "GEOGRAPHY", mode="REQUIRED"), ) - if _BIGNUMERIC_SUPPORT: - bq_schema += (schema.SchemaField("field08", "BIGNUMERIC", mode="REQUIRED"),) data = { "field01": ["hello", "world"], @@ -957,11 +941,15 @@ def test_dataframe_to_arrow_with_required_fields(module_under_test): "field05": [1.25, 9.75], "field06": [-1.75, -3.5], "field07": [decimal.Decimal("1.2345"), decimal.Decimal("6.7891")], + "field08": [ + decimal.Decimal("-{d38}.{d38}".format(d38="9" * 38)), + decimal.Decimal("{d38}.{d38}".format(d38="9" * 38)), + ], "field09": [True, False], "field10": [False, True], "field11": [ - datetime.datetime(1970, 1, 1, 0, 0, 0, tzinfo=pytz.utc), - datetime.datetime(2012, 12, 21, 9, 7, 42, tzinfo=pytz.utc), + datetime.datetime(1970, 1, 1, 0, 0, 0, tzinfo=datetime.timezone.utc), + datetime.datetime(2012, 12, 21, 9, 7, 42, tzinfo=datetime.timezone.utc), ], "field12": [datetime.date(9999, 12, 31), datetime.date(1970, 1, 1)], "field13": [datetime.time(23, 59, 59, 999999), datetime.time(12, 0, 0)], @@ -971,11 +959,6 @@ def test_dataframe_to_arrow_with_required_fields(module_under_test): ], "field15": ["POINT(30 10)", "POLYGON ((30 10, 40 40, 20 40, 10 20, 30 10))"], } - if _BIGNUMERIC_SUPPORT: - data["field08"] = [ - decimal.Decimal("-{d38}.{d38}".format(d38="9" * 38)), - decimal.Decimal("{d38}.{d38}".format(d38="9" * 38)), - ] dataframe = pandas.DataFrame(data) arrow_table = module_under_test.dataframe_to_arrow(dataframe, bq_schema) @@ -1210,11 +1193,8 @@ def test_augment_schema_type_detection_succeeds(module_under_test): schema.SchemaField("bytes_field", field_type=None, mode="NULLABLE"), schema.SchemaField("string_field", field_type=None, mode="NULLABLE"), schema.SchemaField("numeric_field", field_type=None, mode="NULLABLE"), + schema.SchemaField("bignumeric_field", field_type=None, mode="NULLABLE"), ) - if _BIGNUMERIC_SUPPORT: - current_schema += ( - schema.SchemaField("bignumeric_field", field_type=None, mode="NULLABLE"), - ) with warnings.catch_warnings(record=True) as warned: augmented_schema = module_under_test.augment_schema(dataframe, current_schema) @@ -1236,13 +1216,10 @@ def test_augment_schema_type_detection_succeeds(module_under_test): schema.SchemaField("bytes_field", field_type="BYTES", mode="NULLABLE"), schema.SchemaField("string_field", field_type="STRING", mode="NULLABLE"), schema.SchemaField("numeric_field", field_type="NUMERIC", mode="NULLABLE"), + schema.SchemaField( + "bignumeric_field", field_type="BIGNUMERIC", mode="NULLABLE" + ), ) - if _BIGNUMERIC_SUPPORT: - expected_schema += ( - schema.SchemaField( - "bignumeric_field", field_type="BIGNUMERIC", mode="NULLABLE" - ), - ) by_name = operator.attrgetter("name") assert sorted(augmented_schema, key=by_name) == sorted(expected_schema, key=by_name) diff --git a/packages/google-cloud-bigquery/tests/unit/test_client.py b/packages/google-cloud-bigquery/tests/unit/test_client.py index 671dd8da1d7b..ca0dca975496 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_client.py +++ b/packages/google-cloud-bigquery/tests/unit/test_client.py @@ -30,7 +30,6 @@ import packaging import requests import pytest -import pytz import pkg_resources try: @@ -5018,16 +5017,24 @@ def test_insert_rows_w_repeated_fields(self): ( 12, [ - datetime.datetime(2018, 12, 1, 12, 0, 0, tzinfo=pytz.utc), - datetime.datetime(2018, 12, 1, 13, 0, 0, tzinfo=pytz.utc), + datetime.datetime( + 2018, 12, 1, 12, 0, 0, tzinfo=datetime.timezone.utc + ), + datetime.datetime( + 2018, 12, 1, 13, 0, 0, tzinfo=datetime.timezone.utc + ), ], [1.25, 2.5], ), { "score": 13, "times": [ - datetime.datetime(2018, 12, 2, 12, 0, 0, tzinfo=pytz.utc), - datetime.datetime(2018, 12, 2, 13, 0, 0, tzinfo=pytz.utc), + datetime.datetime( + 2018, 12, 2, 12, 0, 0, tzinfo=datetime.timezone.utc + ), + datetime.datetime( + 2018, 12, 2, 13, 0, 0, tzinfo=datetime.timezone.utc + ), ], "distances": [-1.25, -2.5], }, @@ -6974,7 +6981,7 @@ def test_load_table_from_dataframe_w_automatic_schema(self): datetime.datetime(2012, 3, 14, 15, 16), ], dtype="datetime64[ns]", - ).dt.tz_localize(pytz.utc), + ).dt.tz_localize(datetime.timezone.utc), ), ] ) @@ -7306,7 +7313,7 @@ def test_load_table_from_dataframe_w_partial_schema(self): datetime.datetime(2012, 3, 14, 15, 16), ], dtype="datetime64[ns]", - ).dt.tz_localize(pytz.utc), + ).dt.tz_localize(datetime.timezone.utc), ), ("string_col", ["abc", None, "def"]), ("bytes_col", [b"abc", b"def", None]), diff --git a/packages/google-cloud-bigquery/tests/unit/test_table.py b/packages/google-cloud-bigquery/tests/unit/test_table.py index a5badc66c620..50d5733457a2 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_table.py +++ b/packages/google-cloud-bigquery/tests/unit/test_table.py @@ -20,9 +20,7 @@ import warnings import mock -import pkg_resources import pytest -import pytz import google.api_core.exceptions from test_utils.imports import maybe_fail_import @@ -44,11 +42,8 @@ try: import pyarrow import pyarrow.types - - PYARROW_VERSION = pkg_resources.parse_version(pyarrow.__version__) except ImportError: # pragma: NO COVER pyarrow = None - PYARROW_VERSION = pkg_resources.parse_version("0.0.1") try: from tqdm import tqdm @@ -58,9 +53,6 @@ from google.cloud.bigquery.dataset import DatasetReference -PYARROW_TIMESTAMP_VERSION = pkg_resources.parse_version("2.0.0") - - def _mock_client(): from google.cloud.bigquery import client @@ -914,7 +906,9 @@ def test_mview_last_refresh_time(self): } self.assertEqual( table.mview_last_refresh_time, - datetime.datetime(2020, 11, 30, 15, 57, 22, 496000, tzinfo=pytz.utc), + datetime.datetime( + 2020, 11, 30, 15, 57, 22, 496000, tzinfo=datetime.timezone.utc + ), ) def test_mview_enable_refresh(self): @@ -2878,10 +2872,7 @@ def test_to_dataframe_timestamp_out_of_pyarrow_bounds(self): df = row_iterator.to_dataframe(create_bqstorage_client=False) - tzinfo = None - if PYARROW_VERSION >= PYARROW_TIMESTAMP_VERSION: - tzinfo = datetime.timezone.utc - + tzinfo = datetime.timezone.utc self.assertIsInstance(df, pandas.DataFrame) self.assertEqual(len(df), 2) # verify the number of rows self.assertEqual(list(df.columns), ["some_timestamp"]) From 485d5917b9cdeaa88dffe1587e3637cda25b1405 Mon Sep 17 00:00:00 2001 From: "release-please[bot]" <55107282+release-please[bot]@users.noreply.github.com> Date: Thu, 19 Aug 2021 10:01:53 -0400 Subject: [PATCH 1262/2016] chore: release 2.24.1 (#879) Co-authored-by: release-please[bot] <55107282+release-please[bot]@users.noreply.github.com> --- packages/google-cloud-bigquery/CHANGELOG.md | 7 +++++++ .../google-cloud-bigquery/google/cloud/bigquery/version.py | 2 +- 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/CHANGELOG.md b/packages/google-cloud-bigquery/CHANGELOG.md index 83b409015fa4..5a3e74fd09cb 100644 --- a/packages/google-cloud-bigquery/CHANGELOG.md +++ b/packages/google-cloud-bigquery/CHANGELOG.md @@ -5,6 +5,13 @@ [1]: https://pypi.org/project/google-cloud-bigquery/#history +### [2.24.1](https://www.github.com/googleapis/python-bigquery/compare/v2.24.0...v2.24.1) (2021-08-13) + + +### Bug Fixes + +* remove pytz dependency and require pyarrow>=3.0.0 ([#875](https://www.github.com/googleapis/python-bigquery/issues/875)) ([2cb3563](https://www.github.com/googleapis/python-bigquery/commit/2cb3563ee863edef7eaf5d04d739bcfe7bc6438e)) + ## [2.24.0](https://www.github.com/googleapis/python-bigquery/compare/v2.23.3...v2.24.0) (2021-08-11) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/version.py b/packages/google-cloud-bigquery/google/cloud/bigquery/version.py index 84f6b4643038..96f84438af2d 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/version.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/version.py @@ -12,4 +12,4 @@ # See the License for the specific language governing permissions and # limitations under the License. -__version__ = "2.24.0" +__version__ = "2.24.1" From 7d78e48b6a3f22c1118a3e6df03d2d731fa3e9cf Mon Sep 17 00:00:00 2001 From: WhiteSource Renovate Date: Thu, 19 Aug 2021 17:29:00 +0200 Subject: [PATCH 1263/2016] chore(deps): update dependency google-cloud-bigquery to v2.24.1 (#887) --- .../google-cloud-bigquery/samples/geography/requirements.txt | 2 +- .../google-cloud-bigquery/samples/snippets/requirements.txt | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/packages/google-cloud-bigquery/samples/geography/requirements.txt b/packages/google-cloud-bigquery/samples/geography/requirements.txt index dfee339d426a..ac804c81c5cd 100644 --- a/packages/google-cloud-bigquery/samples/geography/requirements.txt +++ b/packages/google-cloud-bigquery/samples/geography/requirements.txt @@ -1,4 +1,4 @@ geojson==2.5.0 -google-cloud-bigquery==2.24.0 +google-cloud-bigquery==2.24.1 google-cloud-bigquery-storage==2.6.3 Shapely==1.7.1 diff --git a/packages/google-cloud-bigquery/samples/snippets/requirements.txt b/packages/google-cloud-bigquery/samples/snippets/requirements.txt index 264899dff14d..484e10516dc4 100644 --- a/packages/google-cloud-bigquery/samples/snippets/requirements.txt +++ b/packages/google-cloud-bigquery/samples/snippets/requirements.txt @@ -1,4 +1,4 @@ -google-cloud-bigquery==2.24.0 +google-cloud-bigquery==2.24.1 google-cloud-bigquery-storage==2.6.3 google-auth-oauthlib==0.4.5 grpcio==1.39.0 From c9726c96fbf958bd441daf396a9e3b6d639350cf Mon Sep 17 00:00:00 2001 From: Jim Fulton Date: Tue, 24 Aug 2021 10:29:04 -0400 Subject: [PATCH 1264/2016] feat: Support using GeoPandas for GEOGRAPHY columns (#848) --- packages/google-cloud-bigquery/docs/conf.py | 2 + .../docs/usage/pandas.rst | 15 ++ .../google/cloud/bigquery/_pandas_helpers.py | 71 ++++- .../google/cloud/bigquery/job/query.py | 119 ++++++++- .../google/cloud/bigquery/table.py | 196 +++++++++++++- packages/google-cloud-bigquery/owlbot.py | 4 + .../samples/geography/requirements.txt | 44 ++++ .../samples/geography/to_geodataframe.py | 32 +++ .../samples/geography/to_geodataframe_test.py | 25 ++ packages/google-cloud-bigquery/setup.py | 1 + .../testing/constraints-3.6.txt | 4 +- .../tests/system/test_client.py | 3 - .../tests/system/test_pandas.py | 143 +++++++++++ .../tests/unit/job/test_query_pandas.py | 130 +++++++++- .../tests/unit/test__pandas_helpers.py | 100 ++++++++ .../tests/unit/test_table.py | 242 ++++++++++++++++++ 16 files changed, 1102 insertions(+), 29 deletions(-) create mode 100644 packages/google-cloud-bigquery/samples/geography/to_geodataframe.py create mode 100644 packages/google-cloud-bigquery/samples/geography/to_geodataframe_test.py diff --git a/packages/google-cloud-bigquery/docs/conf.py b/packages/google-cloud-bigquery/docs/conf.py index 09f7ea414883..59a2d8fb3429 100644 --- a/packages/google-cloud-bigquery/docs/conf.py +++ b/packages/google-cloud-bigquery/docs/conf.py @@ -366,6 +366,8 @@ "grpc": ("https://grpc.github.io/grpc/python/", None), "proto-plus": ("https://proto-plus-python.readthedocs.io/en/latest/", None), "protobuf": ("https://googleapis.dev/python/protobuf/latest/", None), + "pandas": ("http://pandas.pydata.org/pandas-docs/dev", None), + "geopandas": ("https://geopandas.org/", None), } diff --git a/packages/google-cloud-bigquery/docs/usage/pandas.rst b/packages/google-cloud-bigquery/docs/usage/pandas.rst index 9db98dfbbccb..92eee67cf1b8 100644 --- a/packages/google-cloud-bigquery/docs/usage/pandas.rst +++ b/packages/google-cloud-bigquery/docs/usage/pandas.rst @@ -37,6 +37,21 @@ To retrieve table rows as a :class:`pandas.DataFrame`: :start-after: [START bigquery_list_rows_dataframe] :end-before: [END bigquery_list_rows_dataframe] + +Retrieve BigQuery GEOGRAPHY data as a GeoPandas GeoDataFrame +------------------------------------------------------------ + +`GeoPandas `_ adds geospatial analytics +capabilities to Pandas. To retrieve query results containing +GEOGRAPHY data as a :class:`geopandas.GeoDataFrame`: + +.. literalinclude:: ../samples/geography/to_geodataframe.py + :language: python + :dedent: 4 + :start-after: [START bigquery_query_results_geodataframe] + :end-before: [END bigquery_query_results_geodataframe] + + Load a Pandas DataFrame to a BigQuery Table ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/_pandas_helpers.py b/packages/google-cloud-bigquery/google/cloud/bigquery/_pandas_helpers.py index f49980645d47..ab58b17292d7 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/_pandas_helpers.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/_pandas_helpers.py @@ -24,6 +24,36 @@ import pandas except ImportError: # pragma: NO COVER pandas = None +else: + import numpy + +try: + # _BaseGeometry is used to detect shapely objevys in `bq_to_arrow_array` + from shapely.geometry.base import BaseGeometry as _BaseGeometry +except ImportError: # pragma: NO COVER + # No shapely, use NoneType for _BaseGeometry as a placeholder. + _BaseGeometry = type(None) +else: + if pandas is not None: # pragma: NO COVER + + def _to_wkb(): + # Create a closure that: + # - Adds a not-null check. This allows the returned function to + # be used directly with apply, unlike `shapely.wkb.dumps`. + # - Avoid extra work done by `shapely.wkb.dumps` that we don't need. + # - Caches the WKBWriter (and write method lookup :) ) + # - Avoids adding WKBWriter, lgeos, and notnull to the module namespace. + from shapely.geos import WKBWriter, lgeos + + write = WKBWriter(lgeos).write + notnull = pandas.notnull + + def _to_wkb(v): + return write(v) if notnull(v) else v + + return _to_wkb + + _to_wkb = _to_wkb() try: import pyarrow @@ -69,6 +99,7 @@ "uint8": "INTEGER", "uint16": "INTEGER", "uint32": "INTEGER", + "geometry": "GEOGRAPHY", } @@ -193,14 +224,16 @@ def bq_to_arrow_data_type(field): return data_type_constructor() -def bq_to_arrow_field(bq_field): +def bq_to_arrow_field(bq_field, array_type=None): """Return the Arrow field, corresponding to a given BigQuery column. Returns: None: if the Arrow type cannot be determined. """ arrow_type = bq_to_arrow_data_type(bq_field) - if arrow_type: + if arrow_type is not None: + if array_type is not None: + arrow_type = array_type # For GEOGRAPHY, at least initially is_nullable = bq_field.mode.upper() == "NULLABLE" return pyarrow.field(bq_field.name, arrow_type, nullable=is_nullable) @@ -225,7 +258,24 @@ def bq_to_arrow_schema(bq_schema): def bq_to_arrow_array(series, bq_field): - arrow_type = bq_to_arrow_data_type(bq_field) + if bq_field.field_type.upper() == "GEOGRAPHY": + arrow_type = None + first = _first_valid(series) + if first is not None: + if series.dtype.name == "geometry" or isinstance(first, _BaseGeometry): + arrow_type = pyarrow.binary() + # Convert shapey geometry to WKB binary format: + series = series.apply(_to_wkb) + elif isinstance(first, bytes): + arrow_type = pyarrow.binary() + elif series.dtype.name == "geometry": + # We have a GeoSeries containing all nulls, convert it to a pandas series + series = pandas.Series(numpy.array(series)) + + if arrow_type is None: + arrow_type = bq_to_arrow_data_type(bq_field) + else: + arrow_type = bq_to_arrow_data_type(bq_field) field_type_upper = bq_field.field_type.upper() if bq_field.field_type else "" @@ -279,6 +329,12 @@ def list_columns_and_indexes(dataframe): return columns_and_indexes +def _first_valid(series): + first_valid_index = series.first_valid_index() + if first_valid_index is not None: + return series.at[first_valid_index] + + def dataframe_to_bq_schema(dataframe, bq_schema): """Convert a pandas DataFrame schema to a BigQuery schema. @@ -319,6 +375,13 @@ def dataframe_to_bq_schema(dataframe, bq_schema): # Otherwise, try to automatically determine the type based on the # pandas dtype. bq_type = _PANDAS_DTYPE_TO_BQ.get(dtype.name) + if bq_type is None: + sample_data = _first_valid(dataframe[column]) + if ( + isinstance(sample_data, _BaseGeometry) + and sample_data is not None # Paranoia + ): + bq_type = "GEOGRAPHY" bq_field = schema.SchemaField(column, bq_type) bq_schema_out.append(bq_field) @@ -450,11 +513,11 @@ def dataframe_to_arrow(dataframe, bq_schema): arrow_names = [] arrow_fields = [] for bq_field in bq_schema: - arrow_fields.append(bq_to_arrow_field(bq_field)) arrow_names.append(bq_field.name) arrow_arrays.append( bq_to_arrow_array(get_column_or_index(dataframe, bq_field.name), bq_field) ) + arrow_fields.append(bq_to_arrow_field(bq_field, arrow_arrays[-1].type)) if all((field is not None for field in arrow_fields)): return pyarrow.Table.from_arrays( diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/job/query.py b/packages/google-cloud-bigquery/google/cloud/bigquery/job/query.py index 3ab47b0f9b72..0cb4798be07b 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/job/query.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/job/query.py @@ -53,6 +53,7 @@ # Assumption: type checks are only used by library developers and CI environments # that have all optional dependencies installed, thus no conditional imports. import pandas + import geopandas import pyarrow from google.api_core import retry as retries from google.cloud import bigquery_storage @@ -1487,6 +1488,7 @@ def to_dataframe( create_bqstorage_client: bool = True, date_as_object: bool = True, max_results: Optional[int] = None, + geography_as_object: bool = False, ) -> "pandas.DataFrame": """Return a pandas DataFrame from a QueryJob @@ -1538,13 +1540,27 @@ def to_dataframe( .. versionadded:: 2.21.0 + geography_as_object (Optional[bool]): + If ``True``, convert GEOGRAPHY data to :mod:`shapely` + geometry objects. If ``False`` (default), don't cast + geography data to :mod:`shapely` geometry objects. + + .. versionadded:: 2.24.0 + Returns: - A :class:`~pandas.DataFrame` populated with row data and column - headers from the query results. The column headers are derived - from the destination table's schema. + pandas.DataFrame: + A :class:`~pandas.DataFrame` populated with row data + and column headers from the query results. The column + headers are derived from the destination table's + schema. Raises: - ValueError: If the `pandas` library cannot be imported. + ValueError: + If the :mod:`pandas` library cannot be imported, or + the :mod:`google.cloud.bigquery_storage_v1` module is + required but cannot be imported. Also if + `geography_as_object` is `True`, but the + :mod:`shapely` library cannot be imported. """ query_result = wait_for_query(self, progress_bar_type, max_results=max_results) return query_result.to_dataframe( @@ -1553,6 +1569,101 @@ def to_dataframe( progress_bar_type=progress_bar_type, create_bqstorage_client=create_bqstorage_client, date_as_object=date_as_object, + geography_as_object=geography_as_object, + ) + + # If changing the signature of this method, make sure to apply the same + # changes to table.RowIterator.to_dataframe(), except for the max_results parameter + # that should only exist here in the QueryJob method. + def to_geodataframe( + self, + bqstorage_client: "bigquery_storage.BigQueryReadClient" = None, + dtypes: Dict[str, Any] = None, + progress_bar_type: str = None, + create_bqstorage_client: bool = True, + date_as_object: bool = True, + max_results: Optional[int] = None, + geography_column: Optional[str] = None, + ) -> "geopandas.GeoDataFrame": + """Return a GeoPandas GeoDataFrame from a QueryJob + + Args: + bqstorage_client (Optional[google.cloud.bigquery_storage_v1.BigQueryReadClient]): + A BigQuery Storage API client. If supplied, use the faster + BigQuery Storage API to fetch rows from BigQuery. This + API is a billable API. + + This method requires the ``fastavro`` and + ``google-cloud-bigquery-storage`` libraries. + + Reading from a specific partition or snapshot is not + currently supported by this method. + + dtypes (Optional[Map[str, Union[str, pandas.Series.dtype]]]): + A dictionary of column names pandas ``dtype``s. The provided + ``dtype`` is used when constructing the series for the column + specified. Otherwise, the default pandas behavior is used. + + progress_bar_type (Optional[str]): + If set, use the `tqdm `_ library to + display a progress bar while the data downloads. Install the + ``tqdm`` package to use this feature. + + See + :func:`~google.cloud.bigquery.table.RowIterator.to_dataframe` + for details. + + .. versionadded:: 1.11.0 + create_bqstorage_client (Optional[bool]): + If ``True`` (default), create a BigQuery Storage API client + using the default API settings. The BigQuery Storage API + is a faster way to fetch rows from BigQuery. See the + ``bqstorage_client`` parameter for more information. + + This argument does nothing if ``bqstorage_client`` is supplied. + + .. versionadded:: 1.24.0 + + date_as_object (Optional[bool]): + If ``True`` (default), cast dates to objects. If ``False``, convert + to datetime64[ns] dtype. + + .. versionadded:: 1.26.0 + + max_results (Optional[int]): + Maximum number of rows to include in the result. No limit by default. + + .. versionadded:: 2.21.0 + + geography_column (Optional[str]): + If there are more than one GEOGRAPHY column, + identifies which one to use to construct a GeoPandas + GeoDataFrame. This option can be ommitted if there's + only one GEOGRAPHY column. + + Returns: + geopandas.GeoDataFrame: + A :class:`geopandas.GeoDataFrame` populated with row + data and column headers from the query results. The + column headers are derived from the destination + table's schema. + + Raises: + ValueError: + If the :mod:`geopandas` library cannot be imported, or the + :mod:`google.cloud.bigquery_storage_v1` module is + required but cannot be imported. + + .. versionadded:: 2.24.0 + """ + query_result = wait_for_query(self, progress_bar_type, max_results=max_results) + return query_result.to_geodataframe( + bqstorage_client=bqstorage_client, + dtypes=dtypes, + progress_bar_type=progress_bar_type, + create_bqstorage_client=create_bqstorage_client, + date_as_object=date_as_object, + geography_column=geography_column, ) def __iter__(self): diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py index 62f8880018c5..609c0b57edfa 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py @@ -29,6 +29,20 @@ except ImportError: # pragma: NO COVER pandas = None +try: + import geopandas +except ImportError: + geopandas = None +else: + _COORDINATE_REFERENCE_SYSTEM = "EPSG:4326" + +try: + import shapely.geos +except ImportError: + shapely = None +else: + _read_wkt = shapely.geos.WKTReader(shapely.geos.lgeos).read + try: import pyarrow except ImportError: # pragma: NO COVER @@ -52,6 +66,7 @@ # Unconditionally import optional dependencies again to tell pytype that # they are not None, avoiding false "no attribute" errors. import pandas + import geopandas import pyarrow from google.cloud import bigquery_storage @@ -60,6 +75,14 @@ "The pandas library is not installed, please install " "pandas to use the to_dataframe() function." ) +_NO_GEOPANDAS_ERROR = ( + "The geopandas library is not installed, please install " + "geopandas to use the to_geodataframe() function." +) +_NO_SHAPELY_ERROR = ( + "The shapely library is not installed, please install " + "shapely to use the geography_as_object option." +) _NO_PYARROW_ERROR = ( "The pyarrow library is not installed, please install " "pyarrow to use the to_arrow() function." @@ -1878,6 +1901,7 @@ def to_dataframe( progress_bar_type: str = None, create_bqstorage_client: bool = True, date_as_object: bool = True, + geography_as_object: bool = False, ) -> "pandas.DataFrame": """Create a pandas DataFrame by loading all pages of a query. @@ -1933,6 +1957,13 @@ def to_dataframe( .. versionadded:: 1.26.0 + geography_as_object (Optional[bool]): + If ``True``, convert GEOGRAPHY data to :mod:`shapely` + geometry objects. If ``False`` (default), don't cast + geography data to :mod:`shapely` geometry objects. + + .. versionadded:: 2.24.0 + Returns: pandas.DataFrame: A :class:`~pandas.DataFrame` populated with row data and column @@ -1941,13 +1972,18 @@ def to_dataframe( Raises: ValueError: - If the :mod:`pandas` library cannot be imported, or the - :mod:`google.cloud.bigquery_storage_v1` module is - required but cannot be imported. + If the :mod:`pandas` library cannot be imported, or + the :mod:`google.cloud.bigquery_storage_v1` module is + required but cannot be imported. Also if + `geography_as_object` is `True`, but the + :mod:`shapely` library cannot be imported. """ if pandas is None: raise ValueError(_NO_PANDAS_ERROR) + if geography_as_object and shapely is None: + raise ValueError(_NO_SHAPELY_ERROR) + if dtypes is None: dtypes = {} @@ -1988,8 +2024,136 @@ def to_dataframe( for column in dtypes: df[column] = pandas.Series(df[column], dtype=dtypes[column]) + if geography_as_object: + for field in self.schema: + if field.field_type.upper() == "GEOGRAPHY": + df[field.name] = df[field.name].dropna().apply(_read_wkt) + return df + # If changing the signature of this method, make sure to apply the same + # changes to job.QueryJob.to_geodataframe() + def to_geodataframe( + self, + bqstorage_client: "bigquery_storage.BigQueryReadClient" = None, + dtypes: Dict[str, Any] = None, + progress_bar_type: str = None, + create_bqstorage_client: bool = True, + date_as_object: bool = True, + geography_column: Optional[str] = None, + ) -> "geopandas.GeoDataFrame": + """Create a GeoPandas GeoDataFrame by loading all pages of a query. + + Args: + bqstorage_client (Optional[google.cloud.bigquery_storage_v1.BigQueryReadClient]): + A BigQuery Storage API client. If supplied, use the faster + BigQuery Storage API to fetch rows from BigQuery. + + This method requires the ``pyarrow`` and + ``google-cloud-bigquery-storage`` libraries. + + This method only exposes a subset of the capabilities of the + BigQuery Storage API. For full access to all features + (projections, filters, snapshots) use the Storage API directly. + + dtypes (Optional[Map[str, Union[str, pandas.Series.dtype]]]): + A dictionary of column names pandas ``dtype``s. The provided + ``dtype`` is used when constructing the series for the column + specified. Otherwise, the default pandas behavior is used. + progress_bar_type (Optional[str]): + If set, use the `tqdm `_ library to + display a progress bar while the data downloads. Install the + ``tqdm`` package to use this feature. + + Possible values of ``progress_bar_type`` include: + + ``None`` + No progress bar. + ``'tqdm'`` + Use the :func:`tqdm.tqdm` function to print a progress bar + to :data:`sys.stderr`. + ``'tqdm_notebook'`` + Use the :func:`tqdm.tqdm_notebook` function to display a + progress bar as a Jupyter notebook widget. + ``'tqdm_gui'`` + Use the :func:`tqdm.tqdm_gui` function to display a + progress bar as a graphical dialog box. + + create_bqstorage_client (Optional[bool]): + If ``True`` (default), create a BigQuery Storage API client + using the default API settings. The BigQuery Storage API + is a faster way to fetch rows from BigQuery. See the + ``bqstorage_client`` parameter for more information. + + This argument does nothing if ``bqstorage_client`` is supplied. + + date_as_object (Optional[bool]): + If ``True`` (default), cast dates to objects. If ``False``, convert + to datetime64[ns] dtype. + + geography_column (Optional[str]): + If there are more than one GEOGRAPHY column, + identifies which one to use to construct a geopandas + GeoDataFrame. This option can be ommitted if there's + only one GEOGRAPHY column. + + Returns: + geopandas.GeoDataFrame: + A :class:`geopandas.GeoDataFrame` populated with row + data and column headers from the query results. The + column headers are derived from the destination + table's schema. + + Raises: + ValueError: + If the :mod:`geopandas` library cannot be imported, or the + :mod:`google.cloud.bigquery_storage_v1` module is + required but cannot be imported. + + .. versionadded:: 2.24.0 + """ + if geopandas is None: + raise ValueError(_NO_GEOPANDAS_ERROR) + + geography_columns = set( + field.name + for field in self.schema + if field.field_type.upper() == "GEOGRAPHY" + ) + if not geography_columns: + raise TypeError( + "There must be at least one GEOGRAPHY column" + " to create a GeoDataFrame" + ) + + if geography_column: + if geography_column not in geography_columns: + raise ValueError( + f"The given geography column, {geography_column}, doesn't name" + f" a GEOGRAPHY column in the result." + ) + elif len(geography_columns) == 1: + [geography_column] = geography_columns + else: + raise ValueError( + "There is more than one GEOGRAPHY column in the result. " + "The geography_column argument must be used to specify which " + "one to use to create a GeoDataFrame" + ) + + df = self.to_dataframe( + bqstorage_client, + dtypes, + progress_bar_type, + create_bqstorage_client, + date_as_object, + geography_as_object=True, + ) + + return geopandas.GeoDataFrame( + df, crs=_COORDINATE_REFERENCE_SYSTEM, geometry=geography_column + ) + class _EmptyRowIterator(RowIterator): """An empty row iterator. @@ -2042,6 +2206,7 @@ def to_dataframe( progress_bar_type=None, create_bqstorage_client=True, date_as_object=True, + geography_as_object=False, ) -> "pandas.DataFrame": """Create an empty dataframe. @@ -2059,6 +2224,31 @@ def to_dataframe( raise ValueError(_NO_PANDAS_ERROR) return pandas.DataFrame() + def to_geodataframe( + self, + bqstorage_client=None, + dtypes=None, + progress_bar_type=None, + create_bqstorage_client=True, + date_as_object=True, + geography_column: Optional[str] = None, + ) -> "pandas.DataFrame": + """Create an empty dataframe. + + Args: + bqstorage_client (Any): Ignored. Added for compatibility with RowIterator. + dtypes (Any): Ignored. Added for compatibility with RowIterator. + progress_bar_type (Any): Ignored. Added for compatibility with RowIterator. + create_bqstorage_client (bool): Ignored. Added for compatibility with RowIterator. + date_as_object (bool): Ignored. Added for compatibility with RowIterator. + + Returns: + pandas.DataFrame: An empty :class:`~pandas.DataFrame`. + """ + if geopandas is None: + raise ValueError(_NO_GEOPANDAS_ERROR) + return geopandas.GeoDataFrame(crs=_COORDINATE_REFERENCE_SYSTEM) + def to_dataframe_iterable( self, bqstorage_client: Optional["bigquery_storage.BigQueryReadClient"] = None, diff --git a/packages/google-cloud-bigquery/owlbot.py b/packages/google-cloud-bigquery/owlbot.py index 09845480a8af..ea9904cdbf84 100644 --- a/packages/google-cloud-bigquery/owlbot.py +++ b/packages/google-cloud-bigquery/owlbot.py @@ -97,6 +97,10 @@ samples=True, microgenerator=True, split_system_tests=True, + intersphinx_dependencies={ + "pandas": 'http://pandas.pydata.org/pandas-docs/dev', + "geopandas": "https://geopandas.org/", + } ) # BigQuery has a custom multiprocessing note diff --git a/packages/google-cloud-bigquery/samples/geography/requirements.txt b/packages/google-cloud-bigquery/samples/geography/requirements.txt index ac804c81c5cd..7a76b4033ff6 100644 --- a/packages/google-cloud-bigquery/samples/geography/requirements.txt +++ b/packages/google-cloud-bigquery/samples/geography/requirements.txt @@ -1,4 +1,48 @@ +attrs==21.2.0 +cachetools==4.2.2 +certifi==2021.5.30 +cffi==1.14.6 +charset-normalizer==2.0.4 +click==8.0.1 +click-plugins==1.1.1 +cligj==0.7.2 +dataclasses==0.6; python_version < '3.7' +Fiona==1.8.20 geojson==2.5.0 +geopandas==0.9.0 +google-api-core==1.31.2 +google-auth==1.35.0 google-cloud-bigquery==2.24.1 google-cloud-bigquery-storage==2.6.3 +google-cloud-core==1.7.2 +google-crc32c==1.1.2 +google-resumable-media==1.3.3 +googleapis-common-protos==1.53.0 +grpcio==1.39.0 +idna==3.2 +importlib-metadata==4.6.4 +libcst==0.3.20 +munch==2.5.0 +mypy-extensions==0.4.3 +numpy==1.19.5 +packaging==21.0 +pandas==1.1.5 +proto-plus==1.19.0 +protobuf==3.17.3 +pyarrow==5.0.0 +pyasn1==0.4.8 +pyasn1-modules==0.2.8 +pycparser==2.20 +pyparsing==2.4.7 +pyproj==3.0.1 +python-dateutil==2.8.2 +pytz==2021.1 +PyYAML==5.4.1 +requests==2.26.0 +rsa==4.7.2 Shapely==1.7.1 +six==1.16.0 +typing-extensions==3.10.0.0 +typing-inspect==0.7.1 +urllib3==1.26.6 +zipp==3.5.0 diff --git a/packages/google-cloud-bigquery/samples/geography/to_geodataframe.py b/packages/google-cloud-bigquery/samples/geography/to_geodataframe.py new file mode 100644 index 000000000000..fa8073fefba4 --- /dev/null +++ b/packages/google-cloud-bigquery/samples/geography/to_geodataframe.py @@ -0,0 +1,32 @@ +# Copyright 2021 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from google.cloud import bigquery + +client = bigquery.Client() + + +def get_austin_service_requests_as_geography(): + # [START bigquery_query_results_geodataframe] + + sql = """ + SELECT created_date, complaint_description, + ST_GEOGPOINT(longitude, latitude) as location + FROM bigquery-public-data.austin_311.311_service_requests + LIMIT 10 + """ + + df = client.query(sql).to_geodataframe() + # [END bigquery_query_results_geodataframe] + return df diff --git a/packages/google-cloud-bigquery/samples/geography/to_geodataframe_test.py b/packages/google-cloud-bigquery/samples/geography/to_geodataframe_test.py new file mode 100644 index 000000000000..7a2ba6937c3d --- /dev/null +++ b/packages/google-cloud-bigquery/samples/geography/to_geodataframe_test.py @@ -0,0 +1,25 @@ +# Copyright 2021 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import pytest + +from .to_geodataframe import get_austin_service_requests_as_geography + + +def test_get_austin_service_requests_as_geography(): + geopandas = pytest.importorskip("geopandas") + df = get_austin_service_requests_as_geography() + assert isinstance(df, geopandas.GeoDataFrame) + assert len(list(df)) == 3 # verify the number of columns + assert len(df) == 10 # verify the number of rows diff --git a/packages/google-cloud-bigquery/setup.py b/packages/google-cloud-bigquery/setup.py index a1b3b61a01f4..e7515493d195 100644 --- a/packages/google-cloud-bigquery/setup.py +++ b/packages/google-cloud-bigquery/setup.py @@ -56,6 +56,7 @@ "grpcio >= 1.38.1, < 2.0dev", "pyarrow >= 3.0.0, < 6.0dev", ], + "geopandas": ["geopandas>=0.9.0, <1.0dev", "Shapely>=1.6.0, <2.0dev"], "pandas": ["pandas>=0.23.0", "pyarrow >= 3.0.0, < 6.0dev"], "bignumeric_type": ["pyarrow >= 3.0.0, < 6.0dev"], "tqdm": ["tqdm >= 4.7.4, <5.0.0dev"], diff --git a/packages/google-cloud-bigquery/testing/constraints-3.6.txt b/packages/google-cloud-bigquery/testing/constraints-3.6.txt index ce012f0d7b69..be1a992fadae 100644 --- a/packages/google-cloud-bigquery/testing/constraints-3.6.txt +++ b/packages/google-cloud-bigquery/testing/constraints-3.6.txt @@ -5,6 +5,7 @@ # # e.g., if setup.py has "foo >= 1.14.0, < 2.0.0dev", # Then this file should have foo==1.14.0 +geopandas==0.9.0 google-api-core==1.29.0 google-cloud-bigquery-storage==2.0.0 google-cloud-core==1.4.1 @@ -13,10 +14,11 @@ grpcio==1.38.1 opentelemetry-api==0.11b0 opentelemetry-instrumentation==0.11b0 opentelemetry-sdk==0.11b0 -pandas==0.23.0 +pandas==0.24.2 proto-plus==1.10.0 protobuf==3.12.0 pyarrow==3.0.0 requests==2.18.0 +shapely==1.6.0 six==1.13.0 tqdm==4.7.4 diff --git a/packages/google-cloud-bigquery/tests/system/test_client.py b/packages/google-cloud-bigquery/tests/system/test_client.py index 4250111b4fed..9da45ee6e226 100644 --- a/packages/google-cloud-bigquery/tests/system/test_client.py +++ b/packages/google-cloud-bigquery/tests/system/test_client.py @@ -2360,9 +2360,6 @@ def test_create_table_rows_fetch_nested_schema(self): self.assertEqual(found[7], e_favtime) self.assertEqual(found[8], decimal.Decimal(expected["FavoriteNumber"])) - def _fetch_dataframe(self, query): - return Config.CLIENT.query(query).result().to_dataframe() - @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") @unittest.skipIf( bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" diff --git a/packages/google-cloud-bigquery/tests/system/test_pandas.py b/packages/google-cloud-bigquery/tests/system/test_pandas.py index 371dcea71359..836f93210019 100644 --- a/packages/google-cloud-bigquery/tests/system/test_pandas.py +++ b/packages/google-cloud-bigquery/tests/system/test_pandas.py @@ -798,3 +798,146 @@ def test_list_rows_max_results_w_bqstorage(bigquery_client): dataframe = row_iterator.to_dataframe(bqstorage_client=bqstorage_client) assert len(dataframe.index) == 100 + + +def test_to_dataframe_geography_as_objects(bigquery_client, dataset_id): + wkt = pytest.importorskip("shapely.wkt") + bigquery_client.query( + f"create table {dataset_id}.lake (name string, geog geography)" + ).result() + bigquery_client.query( + f""" + insert into {dataset_id}.lake (name, geog) values + ('foo', st_geogfromtext('point(0 0)')), + ('bar', st_geogfromtext('point(0 1)')), + ('baz', null) + """ + ).result() + df = bigquery_client.query( + f"select * from {dataset_id}.lake order by name" + ).to_dataframe(geography_as_object=True) + assert list(df["name"]) == ["bar", "baz", "foo"] + assert df["geog"][0] == wkt.loads("point(0 1)") + assert pandas.isna(df["geog"][1]) + assert df["geog"][2] == wkt.loads("point(0 0)") + + +def test_to_geodataframe(bigquery_client, dataset_id): + geopandas = pytest.importorskip("geopandas") + from shapely import wkt + + bigquery_client.query( + f"create table {dataset_id}.geolake (name string, geog geography)" + ).result() + bigquery_client.query( + f""" + insert into {dataset_id}.geolake (name, geog) values + ('foo', st_geogfromtext('point(0 0)')), + ('bar', st_geogfromtext('polygon((0 0, 1 0, 1 1, 0 0))')), + ('baz', null) + """ + ).result() + df = bigquery_client.query( + f"select * from {dataset_id}.geolake order by name" + ).to_geodataframe() + assert df["geog"][0] == wkt.loads("polygon((0 0, 1 0, 1 1, 0 0))") + assert pandas.isna(df["geog"][1]) + assert df["geog"][2] == wkt.loads("point(0 0)") + assert isinstance(df, geopandas.GeoDataFrame) + assert isinstance(df["geog"], geopandas.GeoSeries) + assert df.area[0] == 0.5 + assert pandas.isna(df.area[1]) + assert df.area[2] == 0.0 + assert df.crs.srs == "EPSG:4326" + assert df.crs.name == "WGS 84" + assert df.geog.crs.srs == "EPSG:4326" + assert df.geog.crs.name == "WGS 84" + + +def test_load_geodataframe(bigquery_client, dataset_id): + geopandas = pytest.importorskip("geopandas") + import pandas + from shapely import wkt + from google.cloud.bigquery.schema import SchemaField + + df = geopandas.GeoDataFrame( + pandas.DataFrame( + dict( + name=["foo", "bar"], + geo1=[None, None], + geo2=[None, wkt.loads("Point(1 1)")], + ) + ), + geometry="geo1", + ) + + table_id = f"{dataset_id}.lake_from_gp" + bigquery_client.load_table_from_dataframe(df, table_id).result() + + table = bigquery_client.get_table(table_id) + assert table.schema == [ + SchemaField("name", "STRING", "NULLABLE"), + SchemaField("geo1", "GEOGRAPHY", "NULLABLE"), + SchemaField("geo2", "GEOGRAPHY", "NULLABLE"), + ] + assert sorted(map(list, bigquery_client.list_rows(table_id))) == [ + ["bar", None, "POINT(1 1)"], + ["foo", None, None], + ] + + +def test_load_dataframe_w_shapely(bigquery_client, dataset_id): + wkt = pytest.importorskip("shapely.wkt") + from google.cloud.bigquery.schema import SchemaField + + df = pandas.DataFrame( + dict(name=["foo", "bar"], geo=[None, wkt.loads("Point(1 1)")]) + ) + + table_id = f"{dataset_id}.lake_from_shapes" + bigquery_client.load_table_from_dataframe(df, table_id).result() + + table = bigquery_client.get_table(table_id) + assert table.schema == [ + SchemaField("name", "STRING", "NULLABLE"), + SchemaField("geo", "GEOGRAPHY", "NULLABLE"), + ] + assert sorted(map(list, bigquery_client.list_rows(table_id))) == [ + ["bar", "POINT(1 1)"], + ["foo", None], + ] + + bigquery_client.load_table_from_dataframe(df, table_id).result() + assert sorted(map(list, bigquery_client.list_rows(table_id))) == [ + ["bar", "POINT(1 1)"], + ["bar", "POINT(1 1)"], + ["foo", None], + ["foo", None], + ] + + +def test_load_dataframe_w_wkb(bigquery_client, dataset_id): + wkt = pytest.importorskip("shapely.wkt") + from shapely import wkb + from google.cloud.bigquery.schema import SchemaField + + df = pandas.DataFrame( + dict(name=["foo", "bar"], geo=[None, wkb.dumps(wkt.loads("Point(1 1)"))]) + ) + + table_id = f"{dataset_id}.lake_from_wkb" + # We create the table first, to inform the interpretation of the wkb data + bigquery_client.query( + f"create table {table_id} (name string, geo GEOGRAPHY)" + ).result() + bigquery_client.load_table_from_dataframe(df, table_id).result() + + table = bigquery_client.get_table(table_id) + assert table.schema == [ + SchemaField("name", "STRING", "NULLABLE"), + SchemaField("geo", "GEOGRAPHY", "NULLABLE"), + ] + assert sorted(map(list, bigquery_client.list_rows(table_id))) == [ + ["bar", "POINT(1 1)"], + ["foo", None], + ] diff --git a/packages/google-cloud-bigquery/tests/unit/job/test_query_pandas.py b/packages/google-cloud-bigquery/tests/unit/job/test_query_pandas.py index c537802f4eee..b5af90c0bc5e 100644 --- a/packages/google-cloud-bigquery/tests/unit/job/test_query_pandas.py +++ b/packages/google-cloud-bigquery/tests/unit/job/test_query_pandas.py @@ -23,6 +23,14 @@ import pandas except (ImportError, AttributeError): # pragma: NO COVER pandas = None +try: + import shapely +except (ImportError, AttributeError): # pragma: NO COVER + shapely = None +try: + import geopandas +except (ImportError, AttributeError): # pragma: NO COVER + geopandas = None try: import pyarrow except (ImportError, AttributeError): # pragma: NO COVER @@ -425,38 +433,41 @@ def test_to_arrow_w_tqdm_wo_query_plan(): result_patch_tqdm.assert_called() -@pytest.mark.skipif(pandas is None, reason="Requires `pandas`") -def test_to_dataframe(): +def _make_job(schema=(), rows=()): from google.cloud.bigquery.job import QueryJob as target_class begun_resource = _make_job_resource(job_type="query") query_resource = { "jobComplete": True, "jobReference": begun_resource["jobReference"], - "totalRows": "4", + "totalRows": str(len(rows)), "schema": { "fields": [ - {"name": "name", "type": "STRING", "mode": "NULLABLE"}, - {"name": "age", "type": "INTEGER", "mode": "NULLABLE"}, + dict(name=field[0], type=field[1], mode=field[2]) for field in schema ] }, } - tabledata_resource = { - "rows": [ - {"f": [{"v": "Phred Phlyntstone"}, {"v": "32"}]}, - {"f": [{"v": "Bharney Rhubble"}, {"v": "33"}]}, - {"f": [{"v": "Wylma Phlyntstone"}, {"v": "29"}]}, - {"f": [{"v": "Bhettye Rhubble"}, {"v": "27"}]}, - ] - } + tabledata_resource = {"rows": [{"f": [{"v": v} for v in row]} for row in rows]} done_resource = copy.deepcopy(begun_resource) done_resource["status"] = {"state": "DONE"} connection = _make_connection( begun_resource, query_resource, done_resource, tabledata_resource ) client = _make_client(connection=connection) - job = target_class.from_api_repr(begun_resource, client) + return target_class.from_api_repr(begun_resource, client) + +@pytest.mark.skipif(pandas is None, reason="Requires `pandas`") +def test_to_dataframe(): + job = _make_job( + (("name", "STRING", "NULLABLE"), ("age", "INTEGER", "NULLABLE")), + ( + ("Phred Phlyntstone", "32"), + ("Bharney Rhubble", "33"), + ("Wylma Phlyntstone", "29"), + ("Bhettye Rhubble", "27"), + ), + ) df = job.to_dataframe(create_bqstorage_client=False) assert isinstance(df, pandas.DataFrame) @@ -868,3 +879,94 @@ def test_to_dataframe_w_tqdm_max_results(): result_patch_tqdm.assert_called_with( timeout=_PROGRESS_BAR_UPDATE_INTERVAL, max_results=3 ) + + +@pytest.mark.skipif(pandas is None, reason="Requires `pandas`") +@pytest.mark.skipif(shapely is None, reason="Requires `shapely`") +def test_to_dataframe_geography_as_object(): + job = _make_job( + (("name", "STRING", "NULLABLE"), ("geog", "GEOGRAPHY", "NULLABLE")), + ( + ("Phred Phlyntstone", "Point(0 0)"), + ("Bharney Rhubble", "Point(0 1)"), + ("Wylma Phlyntstone", None), + ), + ) + df = job.to_dataframe(create_bqstorage_client=False, geography_as_object=True) + + assert isinstance(df, pandas.DataFrame) + assert len(df) == 3 # verify the number of rows + assert list(df) == ["name", "geog"] # verify the column names + assert [v.__class__.__name__ for v in df.geog] == [ + "Point", + "Point", + "float", + ] # float because nan + + +@pytest.mark.skipif(geopandas is None, reason="Requires `geopandas`") +def test_to_geodataframe(): + job = _make_job( + (("name", "STRING", "NULLABLE"), ("geog", "GEOGRAPHY", "NULLABLE")), + ( + ("Phred Phlyntstone", "Point(0 0)"), + ("Bharney Rhubble", "Point(0 1)"), + ("Wylma Phlyntstone", None), + ), + ) + df = job.to_geodataframe(create_bqstorage_client=False) + + assert isinstance(df, geopandas.GeoDataFrame) + assert len(df) == 3 # verify the number of rows + assert list(df) == ["name", "geog"] # verify the column names + assert [v.__class__.__name__ for v in df.geog] == [ + "Point", + "Point", + "NoneType", + ] # float because nan + assert isinstance(df.geog, geopandas.GeoSeries) + + +@pytest.mark.skipif(geopandas is None, reason="Requires `geopandas`") +@mock.patch("google.cloud.bigquery.job.query.wait_for_query") +def test_query_job_to_geodataframe_delegation(wait_for_query): + """ + QueryJob.to_geodataframe just delegates to RowIterator.to_geodataframe. + + This test just demonstrates that. We don't need to test all the + variations, which are tested for RowIterator. + """ + import numpy + + job = _make_job() + bqstorage_client = object() + dtypes = dict(xxx=numpy.dtype("int64")) + progress_bar_type = "normal" + create_bqstorage_client = False + date_as_object = False + max_results = 42 + geography_column = "g" + + df = job.to_geodataframe( + bqstorage_client=bqstorage_client, + dtypes=dtypes, + progress_bar_type=progress_bar_type, + create_bqstorage_client=create_bqstorage_client, + date_as_object=date_as_object, + max_results=max_results, + geography_column=geography_column, + ) + + wait_for_query.assert_called_once_with( + job, progress_bar_type, max_results=max_results + ) + row_iterator = wait_for_query.return_value + row_iterator.to_geodataframe.assert_called_once_with( + bqstorage_client=bqstorage_client, + dtypes=dtypes, + progress_bar_type=progress_bar_type, + create_bqstorage_client=create_bqstorage_client, + date_as_object=date_as_object, + geography_column=geography_column, + ) + assert df is row_iterator.to_geodataframe.return_value diff --git a/packages/google-cloud-bigquery/tests/unit/test__pandas_helpers.py b/packages/google-cloud-bigquery/tests/unit/test__pandas_helpers.py index b9cb56572cfe..a9b0ae21f402 100644 --- a/packages/google-cloud-bigquery/tests/unit/test__pandas_helpers.py +++ b/packages/google-cloud-bigquery/tests/unit/test__pandas_helpers.py @@ -36,6 +36,11 @@ # Mock out pyarrow when missing, because methods from pyarrow.types are # used in test parameterization. pyarrow = mock.Mock() +try: + import geopandas +except ImportError: # pragma: NO COVER + geopandas = None + import pytest from google import api_core @@ -584,6 +589,60 @@ def test_bq_to_arrow_array_w_special_floats(module_under_test): assert roundtrip[3] is None +@pytest.mark.skipif(geopandas is None, reason="Requires `geopandas`") +@pytest.mark.skipif(isinstance(pyarrow, mock.Mock), reason="Requires `pyarrow`") +def test_bq_to_arrow_array_w_geography_dtype(module_under_test): + from shapely import wkb, wkt + + bq_field = schema.SchemaField("field_name", "GEOGRAPHY") + + series = geopandas.GeoSeries([None, wkt.loads("point(0 0)")]) + array = module_under_test.bq_to_arrow_array(series, bq_field) + # The result is binary, because we use wkb format + assert array.type == pyarrow.binary() + assert array.to_pylist() == [None, wkb.dumps(series[1])] + + # All na: + series = geopandas.GeoSeries([None, None]) + array = module_under_test.bq_to_arrow_array(series, bq_field) + assert array.type == pyarrow.string() + assert array.to_pylist() == list(series) + + +@pytest.mark.skipif(geopandas is None, reason="Requires `geopandas`") +@pytest.mark.skipif(isinstance(pyarrow, mock.Mock), reason="Requires `pyarrow`") +def test_bq_to_arrow_array_w_geography_type_shapely_data(module_under_test): + from shapely import wkb, wkt + + bq_field = schema.SchemaField("field_name", "GEOGRAPHY") + + series = pandas.Series([None, wkt.loads("point(0 0)")]) + array = module_under_test.bq_to_arrow_array(series, bq_field) + # The result is binary, because we use wkb format + assert array.type == pyarrow.binary() + assert array.to_pylist() == [None, wkb.dumps(series[1])] + + # All na: + series = pandas.Series([None, None]) + array = module_under_test.bq_to_arrow_array(series, bq_field) + assert array.type == pyarrow.string() + assert array.to_pylist() == list(series) + + +@pytest.mark.skipif(geopandas is None, reason="Requires `geopandas`") +@pytest.mark.skipif(isinstance(pyarrow, mock.Mock), reason="Requires `pyarrow`") +def test_bq_to_arrow_array_w_geography_type_wkb_data(module_under_test): + from shapely import wkb, wkt + + bq_field = schema.SchemaField("field_name", "GEOGRAPHY") + + series = pandas.Series([None, wkb.dumps(wkt.loads("point(0 0)"))]) + array = module_under_test.bq_to_arrow_array(series, bq_field) + # The result is binary, because we use wkb format + assert array.type == pyarrow.binary() + assert array.to_pylist() == list(series) + + @pytest.mark.skipif(isinstance(pyarrow, mock.Mock), reason="Requires `pyarrow`") def test_bq_to_arrow_schema_w_unknown_type(module_under_test): fields = ( @@ -1158,6 +1217,28 @@ def test_dataframe_to_bq_schema_pyarrow_fallback_fails(module_under_test): assert "struct_field" in str(expected_warnings[0]) +@pytest.mark.skipif(geopandas is None, reason="Requires `geopandas`") +def test_dataframe_to_bq_schema_geography(module_under_test): + from shapely import wkt + + df = geopandas.GeoDataFrame( + pandas.DataFrame( + dict( + name=["foo", "bar"], + geo1=[None, None], + geo2=[None, wkt.loads("Point(1 1)")], + ) + ), + geometry="geo1", + ) + bq_schema = module_under_test.dataframe_to_bq_schema(df, []) + assert bq_schema == ( + schema.SchemaField("name", "STRING"), + schema.SchemaField("geo1", "GEOGRAPHY"), + schema.SchemaField("geo2", "GEOGRAPHY"), + ) + + @pytest.mark.skipif(pandas is None, reason="Requires `pandas`") @pytest.mark.skipif(isinstance(pyarrow, mock.Mock), reason="Requires `pyarrow`") def test_augment_schema_type_detection_succeeds(module_under_test): @@ -1554,3 +1635,22 @@ def test_download_dataframe_row_iterator_dict_sequence_schema(module_under_test) def test_table_data_listpage_to_dataframe_skips_stop_iteration(module_under_test): dataframe = module_under_test._row_iterator_page_to_dataframe([], [], {}) assert isinstance(dataframe, pandas.DataFrame) + + +@pytest.mark.skipif(isinstance(pyarrow, mock.Mock), reason="Requires `pyarrow`") +def test_bq_to_arrow_field_type_override(module_under_test): + # When loading pandas data, we may need to override the type + # decision based on data contents, because GEOGRAPHY data can be + # stored as either text or binary. + + assert ( + module_under_test.bq_to_arrow_field(schema.SchemaField("g", "GEOGRAPHY")).type + == pyarrow.string() + ) + + assert ( + module_under_test.bq_to_arrow_field( + schema.SchemaField("g", "GEOGRAPHY"), pyarrow.binary(), + ).type + == pyarrow.binary() + ) diff --git a/packages/google-cloud-bigquery/tests/unit/test_table.py b/packages/google-cloud-bigquery/tests/unit/test_table.py index 50d5733457a2..1ce930ee4b22 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_table.py +++ b/packages/google-cloud-bigquery/tests/unit/test_table.py @@ -14,6 +14,7 @@ import datetime import logging +import re import time import types import unittest @@ -39,6 +40,11 @@ except (ImportError, AttributeError): # pragma: NO COVER pandas = None +try: + import geopandas +except (ImportError, AttributeError): # pragma: NO COVER + geopandas = None + try: import pyarrow import pyarrow.types @@ -1842,6 +1848,27 @@ def test_to_dataframe_iterable(self): self.assertEqual(len(df), 0) # Verify the number of rows. self.assertEqual(len(df.columns), 0) + @mock.patch("google.cloud.bigquery.table.geopandas", new=None) + def test_to_geodataframe_if_geopandas_is_none(self): + row_iterator = self._make_one() + with self.assertRaisesRegex( + ValueError, + re.escape( + "The geopandas library is not installed, please install " + "geopandas to use the to_geodataframe() function." + ), + ): + row_iterator.to_geodataframe(create_bqstorage_client=False) + + @unittest.skipIf(geopandas is None, "Requires `geopandas`") + def test_to_geodataframe(self): + row_iterator = self._make_one() + df = row_iterator.to_geodataframe(create_bqstorage_client=False) + self.assertIsInstance(df, geopandas.GeoDataFrame) + self.assertEqual(len(df), 0) # verify the number of rows + self.assertEqual(df.crs.srs, "EPSG:4326") + self.assertEqual(df.crs.name, "WGS 84") + class TestRowIterator(unittest.TestCase): def _class_under_test(self): @@ -1879,6 +1906,16 @@ def _make_one( client, api_request, path, schema, table=table, **kwargs ) + def _make_one_from_data(self, schema=(), rows=()): + from google.cloud.bigquery.schema import SchemaField + + schema = [SchemaField(*a) for a in schema] + rows = [{"f": [{"v": v} for v in row]} for row in rows] + + path = "/foo" + api_request = mock.Mock(return_value={"rows": rows}) + return self._make_one(_mock_client(), api_request, path, schema) + def test_constructor(self): from google.cloud.bigquery.table import _item_to_row from google.cloud.bigquery.table import _rows_page_start @@ -3170,6 +3207,18 @@ def test_to_dataframe_error_if_pandas_is_none(self): with self.assertRaises(ValueError): row_iterator.to_dataframe() + @unittest.skipIf(pandas is None, "Requires `pandas`") + @mock.patch("google.cloud.bigquery.table.shapely", new=None) + def test_to_dataframe_error_if_shapely_is_none(self): + with self.assertRaisesRegex( + ValueError, + re.escape( + "The shapely library is not installed, please install " + "shapely to use the geography_as_object option." + ), + ): + self._make_one_from_data().to_dataframe(geography_as_object=True) + @unittest.skipIf(pandas is None, "Requires `pandas`") def test_to_dataframe_max_results_w_bqstorage_warning(self): from google.cloud.bigquery.schema import SchemaField @@ -3927,6 +3976,199 @@ def test_to_dataframe_concat_categorical_dtype_w_pyarrow(self): # Don't close the client if it was passed in. bqstorage_client._transport.grpc_channel.close.assert_not_called() + @unittest.skipIf(geopandas is None, "Requires `geopandas`") + def test_to_dataframe_geography_as_object(self): + row_iterator = self._make_one_from_data( + (("name", "STRING"), ("geog", "GEOGRAPHY")), + ( + ("foo", "Point(0 0)"), + ("bar", None), + ("baz", "Polygon((0 0, 0 1, 1 0, 0 0))"), + ), + ) + df = row_iterator.to_dataframe( + create_bqstorage_client=False, geography_as_object=True, + ) + self.assertIsInstance(df, pandas.DataFrame) + self.assertEqual(len(df), 3) # verify the number of rows + self.assertEqual(list(df), ["name", "geog"]) # verify the column names + self.assertEqual(df.name.dtype.name, "object") + self.assertEqual(df.geog.dtype.name, "object") + self.assertIsInstance(df.geog, pandas.Series) + self.assertEqual( + [v.__class__.__name__ for v in df.geog], ["Point", "float", "Polygon"] + ) + + @mock.patch("google.cloud.bigquery.table.geopandas", new=None) + def test_to_geodataframe_error_if_geopandas_is_none(self): + with self.assertRaisesRegex( + ValueError, + re.escape( + "The geopandas library is not installed, please install " + "geopandas to use the to_geodataframe() function." + ), + ): + self._make_one_from_data().to_geodataframe() + + @unittest.skipIf(geopandas is None, "Requires `geopandas`") + def test_to_geodataframe(self): + row_iterator = self._make_one_from_data( + (("name", "STRING"), ("geog", "GEOGRAPHY")), + ( + ("foo", "Point(0 0)"), + ("bar", None), + ("baz", "Polygon((0 0, 0 1, 1 0, 0 0))"), + ), + ) + df = row_iterator.to_geodataframe(create_bqstorage_client=False) + self.assertIsInstance(df, geopandas.GeoDataFrame) + self.assertEqual(len(df), 3) # verify the number of rows + self.assertEqual(list(df), ["name", "geog"]) # verify the column names + self.assertEqual(df.name.dtype.name, "object") + self.assertEqual(df.geog.dtype.name, "geometry") + self.assertIsInstance(df.geog, geopandas.GeoSeries) + self.assertEqual(list(map(str, df.area)), ["0.0", "nan", "0.5"]) + self.assertEqual(list(map(str, df.geog.area)), ["0.0", "nan", "0.5"]) + self.assertEqual(df.crs.srs, "EPSG:4326") + self.assertEqual(df.crs.name, "WGS 84") + self.assertEqual(df.geog.crs.srs, "EPSG:4326") + self.assertEqual(df.geog.crs.name, "WGS 84") + + @unittest.skipIf(geopandas is None, "Requires `geopandas`") + def test_to_geodataframe_ambiguous_geog(self): + row_iterator = self._make_one_from_data( + (("name", "STRING"), ("geog", "GEOGRAPHY"), ("geog2", "GEOGRAPHY")), () + ) + with self.assertRaisesRegex( + ValueError, + re.escape( + "There is more than one GEOGRAPHY column in the result. " + "The geography_column argument must be used to specify which " + "one to use to create a GeoDataFrame" + ), + ): + row_iterator.to_geodataframe(create_bqstorage_client=False) + + @unittest.skipIf(geopandas is None, "Requires `geopandas`") + def test_to_geodataframe_bad_geography_column(self): + row_iterator = self._make_one_from_data( + (("name", "STRING"), ("geog", "GEOGRAPHY"), ("geog2", "GEOGRAPHY")), () + ) + with self.assertRaisesRegex( + ValueError, + re.escape( + "The given geography column, xxx, doesn't name" + " a GEOGRAPHY column in the result." + ), + ): + row_iterator.to_geodataframe( + create_bqstorage_client=False, geography_column="xxx" + ) + + @unittest.skipIf(geopandas is None, "Requires `geopandas`") + def test_to_geodataframe_no_geog(self): + row_iterator = self._make_one_from_data( + (("name", "STRING"), ("geog", "STRING")), () + ) + with self.assertRaisesRegex( + TypeError, + re.escape( + "There must be at least one GEOGRAPHY column" + " to create a GeoDataFrame" + ), + ): + row_iterator.to_geodataframe(create_bqstorage_client=False) + + @unittest.skipIf(geopandas is None, "Requires `geopandas`") + def test_to_geodataframe_w_geography_column(self): + row_iterator = self._make_one_from_data( + (("name", "STRING"), ("geog", "GEOGRAPHY"), ("geog2", "GEOGRAPHY")), + ( + ("foo", "Point(0 0)", "Point(1 1)"), + ("bar", None, "Point(2 2)"), + ("baz", "Polygon((0 0, 0 1, 1 0, 0 0))", "Point(3 3)"), + ), + ) + df = row_iterator.to_geodataframe( + create_bqstorage_client=False, geography_column="geog" + ) + self.assertIsInstance(df, geopandas.GeoDataFrame) + self.assertEqual(len(df), 3) # verify the number of rows + self.assertEqual(list(df), ["name", "geog", "geog2"]) # verify the column names + self.assertEqual(df.name.dtype.name, "object") + self.assertEqual(df.geog.dtype.name, "geometry") + self.assertEqual(df.geog2.dtype.name, "object") + self.assertIsInstance(df.geog, geopandas.GeoSeries) + self.assertEqual(list(map(str, df.area)), ["0.0", "nan", "0.5"]) + self.assertEqual(list(map(str, df.geog.area)), ["0.0", "nan", "0.5"]) + self.assertEqual( + [v.__class__.__name__ for v in df.geog], ["Point", "NoneType", "Polygon"] + ) + + # Geog2 isn't a GeoSeries, but it contains geomentries: + self.assertIsInstance(df.geog2, pandas.Series) + self.assertEqual( + [v.__class__.__name__ for v in df.geog2], ["Point", "Point", "Point"] + ) + # and can easily be converted to a GeoSeries + self.assertEqual( + list(map(str, geopandas.GeoSeries(df.geog2).area)), ["0.0", "0.0", "0.0"] + ) + + @unittest.skipIf(geopandas is None, "Requires `geopandas`") + @mock.patch("google.cloud.bigquery.table.RowIterator.to_dataframe") + def test_rowiterator_to_geodataframe_delegation(self, to_dataframe): + """ + RowIterator.to_geodataframe just delegates to RowIterator.to_dataframe. + + This test just demonstrates that. We don't need to test all the + variations, which are tested for to_dataframe. + """ + import numpy + from shapely import wkt + + row_iterator = self._make_one_from_data( + (("name", "STRING"), ("g", "GEOGRAPHY")) + ) + bqstorage_client = object() + dtypes = dict(xxx=numpy.dtype("int64")) + progress_bar_type = "normal" + create_bqstorage_client = False + date_as_object = False + geography_column = "g" + + to_dataframe.return_value = pandas.DataFrame( + dict(name=["foo"], g=[wkt.loads("point(0 0)")],) + ) + + df = row_iterator.to_geodataframe( + bqstorage_client=bqstorage_client, + dtypes=dtypes, + progress_bar_type=progress_bar_type, + create_bqstorage_client=create_bqstorage_client, + date_as_object=date_as_object, + geography_column=geography_column, + ) + + to_dataframe.assert_called_once_with( + bqstorage_client, + dtypes, + progress_bar_type, + create_bqstorage_client, + date_as_object, + geography_as_object=True, + ) + + self.assertIsInstance(df, geopandas.GeoDataFrame) + self.assertEqual(len(df), 1) # verify the number of rows + self.assertEqual(list(df), ["name", "g"]) # verify the column names + self.assertEqual(df.name.dtype.name, "object") + self.assertEqual(df.g.dtype.name, "geometry") + self.assertIsInstance(df.g, geopandas.GeoSeries) + self.assertEqual(list(map(str, df.area)), ["0.0"]) + self.assertEqual(list(map(str, df.g.area)), ["0.0"]) + self.assertEqual([v.__class__.__name__ for v in df.g], ["Point"]) + class TestPartitionRange(unittest.TestCase): def _get_target_class(self): From d6d29142e11bcd16c66c066b933b9271b41b1e95 Mon Sep 17 00:00:00 2001 From: Jim Fulton Date: Tue, 24 Aug 2021 09:33:04 -0600 Subject: [PATCH 1265/2016] test: Add test of datetime and time pandas load (#895) --- .../tests/system/test_pandas.py | 64 +++++++++++++++---- 1 file changed, 53 insertions(+), 11 deletions(-) diff --git a/packages/google-cloud-bigquery/tests/system/test_pandas.py b/packages/google-cloud-bigquery/tests/system/test_pandas.py index 836f93210019..93ce23481f58 100644 --- a/packages/google-cloud-bigquery/tests/system/test_pandas.py +++ b/packages/google-cloud-bigquery/tests/system/test_pandas.py @@ -279,8 +279,6 @@ def test_load_table_from_dataframe_w_required(bigquery_client, dataset_id): def test_load_table_from_dataframe_w_explicit_schema(bigquery_client, dataset_id): # Schema with all scalar types. - # TODO: Uploading DATETIME columns currently fails, thus that field type - # is temporarily removed from the test. # See: # https://github.com/googleapis/python-bigquery/issues/61 # https://issuetracker.google.com/issues/151765076 @@ -288,7 +286,7 @@ def test_load_table_from_dataframe_w_explicit_schema(bigquery_client, dataset_id bigquery.SchemaField("bool_col", "BOOLEAN"), bigquery.SchemaField("bytes_col", "BYTES"), bigquery.SchemaField("date_col", "DATE"), - # bigquery.SchemaField("dt_col", "DATETIME"), + bigquery.SchemaField("dt_col", "DATETIME"), bigquery.SchemaField("float_col", "FLOAT"), bigquery.SchemaField("geo_col", "GEOGRAPHY"), bigquery.SchemaField("int_col", "INTEGER"), @@ -313,14 +311,14 @@ def test_load_table_from_dataframe_w_explicit_schema(bigquery_client, dataset_id ("bool_col", [True, None, False]), ("bytes_col", [b"abc", None, b"def"]), ("date_col", [datetime.date(1, 1, 1), None, datetime.date(9999, 12, 31)]), - # ( - # "dt_col", - # [ - # datetime.datetime(1, 1, 1, 0, 0, 0), - # None, - # datetime.datetime(9999, 12, 31, 23, 59, 59, 999999), - # ], - # ), + ( + "dt_col", + [ + datetime.datetime(1, 1, 1, 0, 0, 0), + None, + datetime.datetime(9999, 12, 31, 23, 59, 59, 999999), + ], + ), ("float_col", [float("-inf"), float("nan"), float("inf")]), ( "geo_col", @@ -800,6 +798,50 @@ def test_list_rows_max_results_w_bqstorage(bigquery_client): assert len(dataframe.index) == 100 +def test_upload_time_and_datetime_56(bigquery_client, dataset_id): + df = pandas.DataFrame( + dict( + dt=[ + datetime.datetime(2020, 1, 8, 8, 0, 0), + datetime.datetime( + 2020, + 1, + 8, + 8, + 0, + 0, + tzinfo=datetime.timezone(datetime.timedelta(hours=-7)), + ), + ], + t=[datetime.time(0, 0, 10, 100001), None], + ) + ) + table = f"{dataset_id}.test_upload_time_and_datetime" + bigquery_client.load_table_from_dataframe(df, table).result() + data = list(map(list, bigquery_client.list_rows(table))) + assert data == [ + [ + datetime.datetime(2020, 1, 8, 8, 0, tzinfo=datetime.timezone.utc), + datetime.time(0, 0, 10, 100001), + ], + [datetime.datetime(2020, 1, 8, 15, 0, tzinfo=datetime.timezone.utc), None], + ] + + from google.cloud.bigquery import job, schema + + table = f"{dataset_id}.test_upload_time_and_datetime_dt" + config = job.LoadJobConfig( + schema=[schema.SchemaField("dt", "DATETIME"), schema.SchemaField("t", "TIME")] + ) + + bigquery_client.load_table_from_dataframe(df, table, job_config=config).result() + data = list(map(list, bigquery_client.list_rows(table))) + assert data == [ + [datetime.datetime(2020, 1, 8, 8, 0), datetime.time(0, 0, 10, 100001)], + [datetime.datetime(2020, 1, 8, 15, 0), None], + ] + + def test_to_dataframe_geography_as_objects(bigquery_client, dataset_id): wkt = pytest.importorskip("shapely.wkt") bigquery_client.query( From 9c07710455677e1ea709051830a7c27ba51f5c14 Mon Sep 17 00:00:00 2001 From: "release-please[bot]" <55107282+release-please[bot]@users.noreply.github.com> Date: Tue, 24 Aug 2021 15:36:00 -0600 Subject: [PATCH 1266/2016] chore: release 2.25.0 (#898) Co-authored-by: release-please[bot] <55107282+release-please[bot]@users.noreply.github.com> Co-authored-by: Jim Fulton --- packages/google-cloud-bigquery/CHANGELOG.md | 7 +++++++ .../google-cloud-bigquery/google/cloud/bigquery/version.py | 2 +- 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/CHANGELOG.md b/packages/google-cloud-bigquery/CHANGELOG.md index 5a3e74fd09cb..7a5727ee74c9 100644 --- a/packages/google-cloud-bigquery/CHANGELOG.md +++ b/packages/google-cloud-bigquery/CHANGELOG.md @@ -5,6 +5,13 @@ [1]: https://pypi.org/project/google-cloud-bigquery/#history +## [2.25.0](https://www.github.com/googleapis/python-bigquery/compare/v2.24.1...v2.25.0) (2021-08-24) + + +### Features + +* Support using GeoPandas for GEOGRAPHY columns ([#848](https://www.github.com/googleapis/python-bigquery/issues/848)) ([16f65e6](https://www.github.com/googleapis/python-bigquery/commit/16f65e6ae15979217ceea6c6d398c9057a363a13)) + ### [2.24.1](https://www.github.com/googleapis/python-bigquery/compare/v2.24.0...v2.24.1) (2021-08-13) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/version.py b/packages/google-cloud-bigquery/google/cloud/bigquery/version.py index 96f84438af2d..f882cac3a292 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/version.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/version.py @@ -12,4 +12,4 @@ # See the License for the specific language governing permissions and # limitations under the License. -__version__ = "2.24.1" +__version__ = "2.25.0" From 49b684dbc64f91696ca9632ba6b269f6c34a7d97 Mon Sep 17 00:00:00 2001 From: WhiteSource Renovate Date: Wed, 25 Aug 2021 15:44:47 +0200 Subject: [PATCH 1267/2016] chore(deps): update dependency numpy to v1.21.2 (#899) * chore(deps): update dependency numpy to v1.21.2 * Update samples/geography/requirements.txt Co-authored-by: Leah E. Cole <6719667+leahecole@users.noreply.github.com> --- .../google-cloud-bigquery/samples/geography/requirements.txt | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/samples/geography/requirements.txt b/packages/google-cloud-bigquery/samples/geography/requirements.txt index 7a76b4033ff6..82a45e3e849c 100644 --- a/packages/google-cloud-bigquery/samples/geography/requirements.txt +++ b/packages/google-cloud-bigquery/samples/geography/requirements.txt @@ -24,7 +24,8 @@ importlib-metadata==4.6.4 libcst==0.3.20 munch==2.5.0 mypy-extensions==0.4.3 -numpy==1.19.5 +numpy==1.19.5; python_version < "3.7" +numpy==1.21.2; python_version > "3.6" packaging==21.0 pandas==1.1.5 proto-plus==1.19.0 From 5137b6c2737da8e4835a1a51cf1ec3f9f14747f3 Mon Sep 17 00:00:00 2001 From: WhiteSource Renovate Date: Wed, 25 Aug 2021 16:28:49 +0200 Subject: [PATCH 1268/2016] chore(deps): update dependency google-cloud-core to v2 (#904) --- .../google-cloud-bigquery/samples/geography/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/samples/geography/requirements.txt b/packages/google-cloud-bigquery/samples/geography/requirements.txt index 82a45e3e849c..853306d71658 100644 --- a/packages/google-cloud-bigquery/samples/geography/requirements.txt +++ b/packages/google-cloud-bigquery/samples/geography/requirements.txt @@ -14,7 +14,7 @@ google-api-core==1.31.2 google-auth==1.35.0 google-cloud-bigquery==2.24.1 google-cloud-bigquery-storage==2.6.3 -google-cloud-core==1.7.2 +google-cloud-core==2.0.0 google-crc32c==1.1.2 google-resumable-media==1.3.3 googleapis-common-protos==1.53.0 From 7ca8d3b4008ccd853fbaeec41b45af86b5087392 Mon Sep 17 00:00:00 2001 From: Peter Lamut Date: Wed, 25 Aug 2021 16:52:09 +0200 Subject: [PATCH 1269/2016] fix: use REST API in cell magic when requested (#892) Fixes #876. The `--use_rest_api` option did not work as expected and this commit fixes it. **PR checklist:** - [x] Make sure to open an issue as a [bug/issue](https://github.com/googleapis/python-bigquery/issues/new/choose) before writing your code! That way we can discuss the change, evaluate designs, and agree on the general idea - [x] Ensure the tests and linter pass - [x] Code coverage does not decrease (if any source code was changed) - [x] Appropriate docs were updated (if necessary) --- .../google/cloud/bigquery/magics/magics.py | 12 ++++++++--- .../tests/unit/test_magics.py | 21 ++++++++++++++----- 2 files changed, 25 insertions(+), 8 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/magics/magics.py b/packages/google-cloud-bigquery/google/cloud/bigquery/magics/magics.py index 2b8c2928e5c7..d368bbeaa7c0 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/magics/magics.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/magics/magics.py @@ -671,7 +671,9 @@ def _cell_magic(line, query): _handle_error(ex, args.destination_var) return - result = rows.to_dataframe(bqstorage_client=bqstorage_client) + result = rows.to_dataframe( + bqstorage_client=bqstorage_client, create_bqstorage_client=False, + ) if args.destination_var: IPython.get_ipython().push({args.destination_var: result}) return @@ -728,11 +730,15 @@ def _cell_magic(line, query): if max_results: result = query_job.result(max_results=max_results).to_dataframe( - bqstorage_client=bqstorage_client, progress_bar_type=progress_bar + bqstorage_client=None, + create_bqstorage_client=False, + progress_bar_type=progress_bar, ) else: result = query_job.to_dataframe( - bqstorage_client=bqstorage_client, progress_bar_type=progress_bar + bqstorage_client=bqstorage_client, + create_bqstorage_client=False, + progress_bar_type=progress_bar, ) if args.destination_var: diff --git a/packages/google-cloud-bigquery/tests/unit/test_magics.py b/packages/google-cloud-bigquery/tests/unit/test_magics.py index d030482cc60d..88c92a0709e6 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_magics.py +++ b/packages/google-cloud-bigquery/tests/unit/test_magics.py @@ -660,7 +660,9 @@ def warning_match(warning): assert client_info.user_agent == "ipython-" + IPython.__version__ query_job_mock.to_dataframe.assert_called_once_with( - bqstorage_client=bqstorage_instance_mock, progress_bar_type="tqdm" + bqstorage_client=bqstorage_instance_mock, + create_bqstorage_client=mock.ANY, + progress_bar_type="tqdm", ) assert isinstance(return_value, pandas.DataFrame) @@ -703,7 +705,9 @@ def test_bigquery_magic_with_rest_client_requested(monkeypatch): bqstorage_mock.assert_not_called() query_job_mock.to_dataframe.assert_called_once_with( - bqstorage_client=None, progress_bar_type="tqdm" + bqstorage_client=None, + create_bqstorage_client=False, + progress_bar_type="tqdm", ) assert isinstance(return_value, pandas.DataFrame) @@ -757,7 +761,12 @@ def test_bigquery_magic_w_max_results_valid_calls_queryjob_result(): client_query_mock.return_value = query_job_mock ip.run_cell_magic("bigquery", "--max_results=5", sql) - query_job_mock.result.assert_called_with(max_results=5) + query_job_mock.result.assert_called_with(max_results=5) + query_job_mock.result.return_value.to_dataframe.assert_called_once_with( + bqstorage_client=None, + create_bqstorage_client=False, + progress_bar_type=mock.ANY, + ) @pytest.mark.usefixtures("ipython_interactive") @@ -929,7 +938,7 @@ def test_bigquery_magic_w_table_id_and_bqstorage_client(): ip.run_cell_magic("bigquery", "--max_results=5", table_id) row_iterator_mock.to_dataframe.assert_called_once_with( - bqstorage_client=bqstorage_instance_mock + bqstorage_client=bqstorage_instance_mock, create_bqstorage_client=mock.ANY, ) @@ -1246,7 +1255,9 @@ def test_bigquery_magic_w_progress_bar_type_w_context_setter(monkeypatch): bqstorage_mock.assert_not_called() query_job_mock.to_dataframe.assert_called_once_with( - bqstorage_client=None, progress_bar_type=magics.context.progress_bar_type + bqstorage_client=None, + create_bqstorage_client=False, + progress_bar_type=magics.context.progress_bar_type, ) assert isinstance(return_value, pandas.DataFrame) From 4745e0c46da55b2f6fdec2b27a43d304c12e7a54 Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Wed, 25 Aug 2021 10:12:25 -0500 Subject: [PATCH 1270/2016] fix: populate default `timeout` and retry after client-side timeout (#896) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This addresses internal issue 195337762 where sometimes query job creation can take longer than expected and retrying the API call can be faster than waiting for the first query job request to fail. Thank you for opening a Pull Request! Before submitting your PR, there are a few things you can do to make sure it goes smoothly: - [x] Make sure to open an issue as a [bug/issue](https://github.com/googleapis/python-bigquery/issues/new/choose) before writing your code! That way we can discuss the change, evaluate designs, and agree on the general idea - [x] Ensure the tests and linter pass - [x] Code coverage does not decrease (if any source code was changed) - [x] Appropriate docs were updated (if necessary) Fixes #889 Towards https://github.com/googleapis/python-bigquery/issues/779 🦕 --- .../google/cloud/bigquery/client.py | 123 ++++----- .../google/cloud/bigquery/retry.py | 8 + packages/google-cloud-bigquery/noxfile.py | 4 - .../tests/unit/test_client.py | 242 ++++++++++-------- .../tests/unit/test_create_dataset.py | 19 +- .../tests/unit/test_delete_dataset.py | 7 +- .../tests/unit/test_list_datasets.py | 11 +- .../tests/unit/test_list_jobs.py | 19 +- .../tests/unit/test_list_models.py | 12 +- .../tests/unit/test_list_projects.py | 11 +- .../tests/unit/test_list_routines.py | 12 +- .../tests/unit/test_list_tables.py | 16 +- .../tests/unit/test_magics.py | 5 +- .../tests/unit/test_retry.py | 12 + 14 files changed, 282 insertions(+), 219 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py index cbac82548cbe..023346ffa292 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py @@ -76,17 +76,24 @@ from google.cloud.bigquery.opentelemetry_tracing import create_span from google.cloud.bigquery import job from google.cloud.bigquery.job import ( + CopyJob, + CopyJobConfig, + ExtractJob, + ExtractJobConfig, + LoadJob, LoadJobConfig, QueryJob, QueryJobConfig, - CopyJobConfig, - ExtractJobConfig, ) from google.cloud.bigquery.model import Model from google.cloud.bigquery.model import ModelReference from google.cloud.bigquery.model import _model_arg_to_model_ref from google.cloud.bigquery.query import _QueryResults -from google.cloud.bigquery.retry import DEFAULT_RETRY, DEFAULT_JOB_RETRY +from google.cloud.bigquery.retry import ( + DEFAULT_JOB_RETRY, + DEFAULT_RETRY, + DEFAULT_TIMEOUT, +) from google.cloud.bigquery.routine import Routine from google.cloud.bigquery.routine import RoutineReference from google.cloud.bigquery.schema import SchemaField @@ -245,7 +252,7 @@ def get_service_account_email( self, project: str = None, retry: retries.Retry = DEFAULT_RETRY, - timeout: float = None, + timeout: float = DEFAULT_TIMEOUT, ) -> str: """Get the email address of the project's BigQuery service account @@ -292,7 +299,7 @@ def list_projects( max_results: int = None, page_token: str = None, retry: retries.Retry = DEFAULT_RETRY, - timeout: float = None, + timeout: float = DEFAULT_TIMEOUT, page_size: int = None, ) -> page_iterator.Iterator: """List projects for the project associated with this client. @@ -358,7 +365,7 @@ def list_datasets( max_results: int = None, page_token: str = None, retry: retries.Retry = DEFAULT_RETRY, - timeout: float = None, + timeout: float = DEFAULT_TIMEOUT, page_size: int = None, ) -> page_iterator.Iterator: """List datasets for the project associated with this client. @@ -549,7 +556,7 @@ def create_dataset( dataset: Union[str, Dataset, DatasetReference], exists_ok: bool = False, retry: retries.Retry = DEFAULT_RETRY, - timeout: float = None, + timeout: float = DEFAULT_TIMEOUT, ) -> Dataset: """API call: create the dataset via a POST request. @@ -624,7 +631,7 @@ def create_routine( routine: Routine, exists_ok: bool = False, retry: retries.Retry = DEFAULT_RETRY, - timeout: float = None, + timeout: float = DEFAULT_TIMEOUT, ) -> Routine: """[Beta] Create a routine via a POST request. @@ -679,7 +686,7 @@ def create_table( table: Union[str, Table, TableReference], exists_ok: bool = False, retry: retries.Retry = DEFAULT_RETRY, - timeout: float = None, + timeout: float = DEFAULT_TIMEOUT, ) -> Table: """API call: create a table via a PUT request @@ -751,7 +758,7 @@ def get_dataset( self, dataset_ref: Union[DatasetReference, str], retry: retries.Retry = DEFAULT_RETRY, - timeout: float = None, + timeout: float = DEFAULT_TIMEOUT, ) -> Dataset: """Fetch the dataset referenced by ``dataset_ref`` @@ -795,7 +802,7 @@ def get_iam_policy( table: Union[Table, TableReference], requested_policy_version: int = 1, retry: retries.Retry = DEFAULT_RETRY, - timeout: float = None, + timeout: float = DEFAULT_TIMEOUT, ) -> Policy: if not isinstance(table, (Table, TableReference)): raise TypeError("table must be a Table or TableReference") @@ -825,7 +832,7 @@ def set_iam_policy( policy: Policy, updateMask: str = None, retry: retries.Retry = DEFAULT_RETRY, - timeout: float = None, + timeout: float = DEFAULT_TIMEOUT, ) -> Policy: if not isinstance(table, (Table, TableReference)): raise TypeError("table must be a Table or TableReference") @@ -858,7 +865,7 @@ def test_iam_permissions( table: Union[Table, TableReference], permissions: Sequence[str], retry: retries.Retry = DEFAULT_RETRY, - timeout: float = None, + timeout: float = DEFAULT_TIMEOUT, ) -> Dict[str, Any]: if not isinstance(table, (Table, TableReference)): raise TypeError("table must be a Table or TableReference") @@ -883,7 +890,7 @@ def get_model( self, model_ref: Union[ModelReference, str], retry: retries.Retry = DEFAULT_RETRY, - timeout: float = None, + timeout: float = DEFAULT_TIMEOUT, ) -> Model: """[Beta] Fetch the model referenced by ``model_ref``. @@ -926,7 +933,7 @@ def get_routine( self, routine_ref: Union[Routine, RoutineReference, str], retry: retries.Retry = DEFAULT_RETRY, - timeout: float = None, + timeout: float = DEFAULT_TIMEOUT, ) -> Routine: """[Beta] Get the routine referenced by ``routine_ref``. @@ -970,7 +977,7 @@ def get_table( self, table: Union[Table, TableReference, str], retry: retries.Retry = DEFAULT_RETRY, - timeout: float = None, + timeout: float = DEFAULT_TIMEOUT, ) -> Table: """Fetch the table referenced by ``table``. @@ -1012,7 +1019,7 @@ def update_dataset( dataset: Dataset, fields: Sequence[str], retry: retries.Retry = DEFAULT_RETRY, - timeout: float = None, + timeout: float = DEFAULT_TIMEOUT, ) -> Dataset: """Change some fields of a dataset. @@ -1082,7 +1089,7 @@ def update_model( model: Model, fields: Sequence[str], retry: retries.Retry = DEFAULT_RETRY, - timeout: float = None, + timeout: float = DEFAULT_TIMEOUT, ) -> Model: """[Beta] Change some fields of a model. @@ -1146,7 +1153,7 @@ def update_routine( routine: Routine, fields: Sequence[str], retry: retries.Retry = DEFAULT_RETRY, - timeout: float = None, + timeout: float = DEFAULT_TIMEOUT, ) -> Routine: """[Beta] Change some fields of a routine. @@ -1220,7 +1227,7 @@ def update_table( table: Table, fields: Sequence[str], retry: retries.Retry = DEFAULT_RETRY, - timeout: float = None, + timeout: float = DEFAULT_TIMEOUT, ) -> Table: """Change some fields of a table. @@ -1286,7 +1293,7 @@ def list_models( max_results: int = None, page_token: str = None, retry: retries.Retry = DEFAULT_RETRY, - timeout: float = None, + timeout: float = DEFAULT_TIMEOUT, page_size: int = None, ) -> page_iterator.Iterator: """[Beta] List models in the dataset. @@ -1363,7 +1370,7 @@ def list_routines( max_results: int = None, page_token: str = None, retry: retries.Retry = DEFAULT_RETRY, - timeout: float = None, + timeout: float = DEFAULT_TIMEOUT, page_size: int = None, ) -> page_iterator.Iterator: """[Beta] List routines in the dataset. @@ -1440,7 +1447,7 @@ def list_tables( max_results: int = None, page_token: str = None, retry: retries.Retry = DEFAULT_RETRY, - timeout: float = None, + timeout: float = DEFAULT_TIMEOUT, page_size: int = None, ) -> page_iterator.Iterator: """List tables in the dataset. @@ -1515,7 +1522,7 @@ def delete_dataset( dataset: Union[Dataset, DatasetReference, str], delete_contents: bool = False, retry: retries.Retry = DEFAULT_RETRY, - timeout: float = None, + timeout: float = DEFAULT_TIMEOUT, not_found_ok: bool = False, ) -> None: """Delete a dataset. @@ -1574,7 +1581,7 @@ def delete_model( self, model: Union[Model, ModelReference, str], retry: retries.Retry = DEFAULT_RETRY, - timeout: float = None, + timeout: float = DEFAULT_TIMEOUT, not_found_ok: bool = False, ) -> None: """[Beta] Delete a model @@ -1624,12 +1631,12 @@ def delete_model( def delete_job_metadata( self, - job_id, - project=None, - location=None, - retry=DEFAULT_RETRY, - timeout=None, - not_found_ok=False, + job_id: Union[str, LoadJob, CopyJob, ExtractJob, QueryJob], + project: Optional[str] = None, + location: Optional[str] = None, + retry: retries.Retry = DEFAULT_RETRY, + timeout: float = DEFAULT_TIMEOUT, + not_found_ok: bool = False, ): """[Beta] Delete job metadata from job history. @@ -1637,26 +1644,20 @@ def delete_job_metadata( :func:`~google.cloud.bigquery.client.Client.cancel_job` instead. Args: - job_id (Union[ \ - str, \ - google.cloud.bigquery.job.LoadJob, \ - google.cloud.bigquery.job.CopyJob, \ - google.cloud.bigquery.job.ExtractJob, \ - google.cloud.bigquery.job.QueryJob \ - ]): Job identifier. + job_id: Job or job identifier. Keyword Arguments: - project (Optional[str]): + project: ID of the project which owns the job (defaults to the client's project). - location (Optional[str]): + location: Location where the job was run. Ignored if ``job_id`` is a job object. - retry (Optional[google.api_core.retry.Retry]): + retry: How to retry the RPC. - timeout (Optional[float]): + timeout: The number of seconds to wait for the underlying HTTP transport before using ``retry``. - not_found_ok (Optional[bool]): + not_found_ok: Defaults to ``False``. If ``True``, ignore "not found" errors when deleting the job. """ @@ -1697,7 +1698,7 @@ def delete_routine( self, routine: Union[Routine, RoutineReference, str], retry: retries.Retry = DEFAULT_RETRY, - timeout: float = None, + timeout: float = DEFAULT_TIMEOUT, not_found_ok: bool = False, ) -> None: """[Beta] Delete a routine. @@ -1751,7 +1752,7 @@ def delete_table( self, table: Union[Table, TableReference, str], retry: retries.Retry = DEFAULT_RETRY, - timeout: float = None, + timeout: float = DEFAULT_TIMEOUT, not_found_ok: bool = False, ) -> None: """Delete a table @@ -1804,7 +1805,7 @@ def _get_query_results( project: str = None, timeout_ms: int = None, location: str = None, - timeout: float = None, + timeout: float = DEFAULT_TIMEOUT, ) -> _QueryResults: """Get the query results object for a query job. @@ -1893,7 +1894,7 @@ def create_job( self, job_config: dict, retry: retries.Retry = DEFAULT_RETRY, - timeout: float = None, + timeout: float = DEFAULT_TIMEOUT, ) -> Union[job.LoadJob, job.CopyJob, job.ExtractJob, job.QueryJob]: """Create a new job. Args: @@ -1990,7 +1991,7 @@ def get_job( project: str = None, location: str = None, retry: retries.Retry = DEFAULT_RETRY, - timeout: float = None, + timeout: float = DEFAULT_TIMEOUT, ) -> Union[job.LoadJob, job.CopyJob, job.ExtractJob, job.QueryJob]: """Fetch a job for the project associated with this client. @@ -2064,7 +2065,7 @@ def cancel_job( project: str = None, location: str = None, retry: retries.Retry = DEFAULT_RETRY, - timeout: float = None, + timeout: float = DEFAULT_TIMEOUT, ) -> Union[job.LoadJob, job.CopyJob, job.ExtractJob, job.QueryJob]: """Attempt to cancel a job from a job ID. @@ -2141,7 +2142,7 @@ def list_jobs( all_users: bool = None, state_filter: str = None, retry: retries.Retry = DEFAULT_RETRY, - timeout: float = None, + timeout: float = DEFAULT_TIMEOUT, min_creation_time: datetime.datetime = None, max_creation_time: datetime.datetime = None, page_size: int = None, @@ -2256,7 +2257,7 @@ def load_table_from_uri( project: str = None, job_config: LoadJobConfig = None, retry: retries.Retry = DEFAULT_RETRY, - timeout: float = None, + timeout: float = DEFAULT_TIMEOUT, ) -> job.LoadJob: """Starts a job for loading data into a table from CloudStorage. @@ -2340,7 +2341,7 @@ def load_table_from_file( location: str = None, project: str = None, job_config: LoadJobConfig = None, - timeout: float = None, + timeout: float = DEFAULT_TIMEOUT, ) -> job.LoadJob: """Upload the contents of this table from a file-like object. @@ -2443,7 +2444,7 @@ def load_table_from_dataframe( project: str = None, job_config: LoadJobConfig = None, parquet_compression: str = "snappy", - timeout: float = None, + timeout: float = DEFAULT_TIMEOUT, ) -> job.LoadJob: """Upload the contents of a table from a pandas DataFrame. @@ -2678,7 +2679,7 @@ def load_table_from_json( location: str = None, project: str = None, job_config: LoadJobConfig = None, - timeout: float = None, + timeout: float = DEFAULT_TIMEOUT, ) -> job.LoadJob: """Upload the contents of a table from a JSON string or dict. @@ -2961,7 +2962,7 @@ def copy_table( project: str = None, job_config: CopyJobConfig = None, retry: retries.Retry = DEFAULT_RETRY, - timeout: float = None, + timeout: float = DEFAULT_TIMEOUT, ) -> job.CopyJob: """Copy one or more tables to another table. @@ -3064,7 +3065,7 @@ def extract_table( project: str = None, job_config: ExtractJobConfig = None, retry: retries.Retry = DEFAULT_RETRY, - timeout: float = None, + timeout: float = DEFAULT_TIMEOUT, source_type: str = "Table", ) -> job.ExtractJob: """Start a job to extract a table into Cloud Storage files. @@ -3162,7 +3163,7 @@ def query( location: str = None, project: str = None, retry: retries.Retry = DEFAULT_RETRY, - timeout: float = None, + timeout: float = DEFAULT_TIMEOUT, job_retry: retries.Retry = DEFAULT_JOB_RETRY, ) -> job.QueryJob: """Run a SQL query. @@ -3444,7 +3445,7 @@ def insert_rows_json( ignore_unknown_values: bool = None, template_suffix: str = None, retry: retries.Retry = DEFAULT_RETRY, - timeout: float = None, + timeout: float = DEFAULT_TIMEOUT, ) -> Sequence[dict]: """Insert rows into a table without applying local type conversions. @@ -3579,7 +3580,7 @@ def list_partitions( self, table: Union[Table, TableReference, str], retry: retries.Retry = DEFAULT_RETRY, - timeout: float = None, + timeout: float = DEFAULT_TIMEOUT, ) -> Sequence[str]: """List the partitions in a table. @@ -3629,7 +3630,7 @@ def list_rows( start_index: int = None, page_size: int = None, retry: retries.Retry = DEFAULT_RETRY, - timeout: float = None, + timeout: float = DEFAULT_TIMEOUT, ) -> RowIterator: """List the rows of the table. @@ -3741,7 +3742,7 @@ def _list_rows_from_query_results( start_index: int = None, page_size: int = None, retry: retries.Retry = DEFAULT_RETRY, - timeout: float = None, + timeout: float = DEFAULT_TIMEOUT, ) -> RowIterator: """List the rows of a completed query. See diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/retry.py b/packages/google-cloud-bigquery/google/cloud/bigquery/retry.py index e9286055c56d..8305823225d4 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/retry.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/retry.py @@ -29,6 +29,7 @@ exceptions.BadGateway, requests.exceptions.ChunkedEncodingError, requests.exceptions.ConnectionError, + requests.exceptions.Timeout, auth_exceptions.TransportError, ) @@ -59,6 +60,13 @@ def _should_retry(exc): pass ``retry=bigquery.DEFAULT_RETRY.with_deadline(30)``. """ +DEFAULT_TIMEOUT = 5.0 * 60.0 +"""The default API timeout. + +This is the time to wait per request. To adjust the total wait time, set a +deadline on the retry object. +""" + job_retry_reasons = "rateLimitExceeded", "backendError" diff --git a/packages/google-cloud-bigquery/noxfile.py b/packages/google-cloud-bigquery/noxfile.py index 0dfe7bf93350..9077924e98ce 100644 --- a/packages/google-cloud-bigquery/noxfile.py +++ b/packages/google-cloud-bigquery/noxfile.py @@ -160,10 +160,6 @@ def snippets(session): if os.environ.get("RUN_SNIPPETS_TESTS", "true") == "false": session.skip("RUN_SNIPPETS_TESTS is set to false, skipping") - # Sanity check: Only run snippets tests if the environment variable is set. - if not os.environ.get("GOOGLE_APPLICATION_CREDENTIALS", ""): - session.skip("Credentials must be set via environment variable.") - constraints_path = str( CURRENT_DIRECTORY / "testing" / f"constraints-{session.python}.txt" ) diff --git a/packages/google-cloud-bigquery/tests/unit/test_client.py b/packages/google-cloud-bigquery/tests/unit/test_client.py index ca0dca975496..e9204f1de47b 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_client.py +++ b/packages/google-cloud-bigquery/tests/unit/test_client.py @@ -56,6 +56,7 @@ import google.cloud._helpers from google.cloud import bigquery_v2 from google.cloud.bigquery.dataset import DatasetReference +from google.cloud.bigquery.retry import DEFAULT_TIMEOUT try: from google.cloud import bigquery_storage @@ -367,7 +368,7 @@ def test__get_query_results_miss_w_client_location(self): method="GET", path="/projects/PROJECT/queries/nothere", query_params={"maxResults": 0, "location": self.LOCATION}, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) def test__get_query_results_hit(self): @@ -428,7 +429,9 @@ def test_get_service_account_email_w_alternate_project(self): service_account_email = client.get_service_account_email(project=project) final_attributes.assert_called_once_with({"path": path}, client, None) - conn.api_request.assert_called_once_with(method="GET", path=path, timeout=None) + conn.api_request.assert_called_once_with( + method="GET", path=path, timeout=DEFAULT_TIMEOUT + ) self.assertEqual(service_account_email, email) def test_get_service_account_email_w_custom_retry(self): @@ -771,7 +774,7 @@ def test_create_routine_w_conflict(self): } } conn.api_request.assert_called_once_with( - method="POST", path=path, data=resource, timeout=None, + method="POST", path=path, data=resource, timeout=DEFAULT_TIMEOUT, ) @unittest.skipIf(opentelemetry is None, "Requires `opentelemetry`") @@ -807,7 +810,7 @@ def test_span_status_is_set(self): } } conn.api_request.assert_called_once_with( - method="POST", path=path, data=resource, timeout=None, + method="POST", path=path, data=resource, timeout=DEFAULT_TIMEOUT, ) def test_create_routine_w_conflict_exists_ok(self): @@ -843,11 +846,13 @@ def test_create_routine_w_conflict_exists_ok(self): self.assertEqual(actual_routine.routine_id, "minimal_routine") conn.api_request.assert_has_calls( [ - mock.call(method="POST", path=path, data=resource, timeout=None,), + mock.call( + method="POST", path=path, data=resource, timeout=DEFAULT_TIMEOUT, + ), mock.call( method="GET", path="/projects/test-routine-project/datasets/test_routines/routines/minimal_routine", - timeout=None, + timeout=DEFAULT_TIMEOUT, ), ] ) @@ -923,7 +928,7 @@ def test_create_table_w_custom_property(self): "newAlphaProperty": "unreleased property", "labels": {}, }, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) self.assertEqual(got._properties["newAlphaProperty"], "unreleased property") self.assertEqual(got.table_id, self.TABLE_ID) @@ -964,7 +969,7 @@ def test_create_table_w_encryption_configuration(self): "labels": {}, "encryptionConfiguration": {"kmsKeyName": self.KMS_KEY_NAME}, }, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) self.assertEqual(got.table_id, self.TABLE_ID) @@ -1000,7 +1005,7 @@ def test_create_table_w_day_partition_and_expire(self): "timePartitioning": {"type": "DAY", "expirationMs": "100"}, "labels": {}, }, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) self.assertEqual(table.time_partitioning.type_, "DAY") self.assertEqual(table.time_partitioning.expiration_ms, 100) @@ -1081,7 +1086,7 @@ def test_create_table_w_schema_and_query(self): "view": {"query": query, "useLegacySql": False}, "labels": {}, }, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) self.assertEqual(got.table_id, self.TABLE_ID) self.assertEqual(got.project, self.PROJECT) @@ -1136,7 +1141,7 @@ def test_create_table_w_external(self): }, "labels": {}, }, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) self.assertEqual(got.table_id, self.TABLE_ID) self.assertEqual(got.project, self.PROJECT) @@ -1175,7 +1180,7 @@ def test_create_table_w_reference(self): }, "labels": {}, }, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) self.assertEqual(got.table_id, self.TABLE_ID) @@ -1209,7 +1214,7 @@ def test_create_table_w_fully_qualified_string(self): }, "labels": {}, }, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) self.assertEqual(got.table_id, self.TABLE_ID) @@ -1241,7 +1246,7 @@ def test_create_table_w_string(self): }, "labels": {}, }, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) self.assertEqual(got.table_id, self.TABLE_ID) @@ -1276,7 +1281,7 @@ def test_create_table_alreadyexists_w_exists_ok_false(self): }, "labels": {}, }, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) def test_create_table_alreadyexists_w_exists_ok_true(self): @@ -1319,9 +1324,9 @@ def test_create_table_alreadyexists_w_exists_ok_true(self): }, "labels": {}, }, - timeout=None, + timeout=DEFAULT_TIMEOUT, ), - mock.call(method="GET", path=get_path, timeout=None), + mock.call(method="GET", path=get_path, timeout=DEFAULT_TIMEOUT), ] ) @@ -1394,7 +1399,7 @@ def test_get_model_w_string(self): final_attributes.assert_called_once_with({"path": "/%s" % path}, client, None) conn.api_request.assert_called_once_with( - method="GET", path="/%s" % path, timeout=None + method="GET", path="/%s" % path, timeout=DEFAULT_TIMEOUT ) self.assertEqual(got.model_id, self.MODEL_ID) @@ -1503,7 +1508,7 @@ def test_get_table_sets_user_agent(self): "User-Agent": expected_user_agent, }, data=mock.ANY, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) self.assertIn("my-application/1.2.3", expected_user_agent) @@ -1846,7 +1851,7 @@ def test_update_dataset_w_custom_property(self): data={"newAlphaProperty": "unreleased property"}, path=path, headers=None, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) self.assertEqual(dataset.dataset_id, self.DS_ID) @@ -2136,7 +2141,7 @@ def test_update_table_w_custom_property(self): path="/%s" % path, data={"newAlphaProperty": "unreleased property"}, headers=None, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) self.assertEqual( updated_table._properties["newAlphaProperty"], "unreleased property" @@ -2171,7 +2176,7 @@ def test_update_table_only_use_legacy_sql(self): path="/%s" % path, data={"view": {"useLegacySql": True}}, headers=None, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) self.assertEqual(updated_table.view_use_legacy_sql, table.view_use_legacy_sql) @@ -2269,7 +2274,7 @@ def test_update_table_w_query(self): "schema": schema_resource, }, headers=None, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) def test_update_table_w_schema_None(self): @@ -2398,7 +2403,7 @@ def test_delete_job_metadata_not_found(self): method="DELETE", path="/projects/client-proj/jobs/my-job/delete", query_params={"location": "client-loc"}, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) def test_delete_job_metadata_with_id(self): @@ -2412,7 +2417,7 @@ def test_delete_job_metadata_with_id(self): method="DELETE", path="/projects/param-proj/jobs/my-job/delete", query_params={"location": "param-loc"}, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) def test_delete_job_metadata_with_resource(self): @@ -2437,7 +2442,7 @@ def test_delete_job_metadata_with_resource(self): method="DELETE", path="/projects/job-based-proj/jobs/query_job/delete", query_params={"location": "us-east1"}, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) def test_delete_model(self): @@ -2492,7 +2497,9 @@ def test_delete_model_w_not_found_ok_false(self): with self.assertRaises(google.api_core.exceptions.NotFound): client.delete_model("{}.{}".format(self.DS_ID, self.MODEL_ID)) - conn.api_request.assert_called_with(method="DELETE", path=path, timeout=None) + conn.api_request.assert_called_with( + method="DELETE", path=path, timeout=DEFAULT_TIMEOUT + ) def test_delete_model_w_not_found_ok_true(self): path = "/projects/{}/datasets/{}/models/{}".format( @@ -2513,7 +2520,9 @@ def test_delete_model_w_not_found_ok_true(self): final_attributes.assert_called_once_with({"path": path}, client, None) - conn.api_request.assert_called_with(method="DELETE", path=path, timeout=None) + conn.api_request.assert_called_with( + method="DELETE", path=path, timeout=DEFAULT_TIMEOUT + ) def test_delete_routine(self): from google.cloud.bigquery.routine import Routine @@ -2567,7 +2576,7 @@ def test_delete_routine_w_not_found_ok_false(self): final_attributes.assert_called_once_with({"path": path}, client, None) conn.api_request.assert_called_with( - method="DELETE", path=path, timeout=None, + method="DELETE", path=path, timeout=DEFAULT_TIMEOUT, ) def test_delete_routine_w_not_found_ok_true(self): @@ -2589,7 +2598,7 @@ def test_delete_routine_w_not_found_ok_true(self): final_attributes.assert_called_once_with({"path": path}, client, None) conn.api_request.assert_called_with( - method="DELETE", path=path, timeout=None, + method="DELETE", path=path, timeout=DEFAULT_TIMEOUT, ) def test_delete_table(self): @@ -2653,7 +2662,9 @@ def test_delete_table_w_not_found_ok_false(self): final_attributes.assert_called_once_with({"path": path}, client, None) - conn.api_request.assert_called_with(method="DELETE", path=path, timeout=None) + conn.api_request.assert_called_with( + method="DELETE", path=path, timeout=DEFAULT_TIMEOUT + ) def test_delete_table_w_not_found_ok_true(self): path = "/projects/{}/datasets/{}/tables/{}".format( @@ -2675,7 +2686,9 @@ def test_delete_table_w_not_found_ok_true(self): final_attributes.assert_called_once_with({"path": path}, client, None) - conn.api_request.assert_called_with(method="DELETE", path=path, timeout=None) + conn.api_request.assert_called_with( + method="DELETE", path=path, timeout=DEFAULT_TIMEOUT + ) def _create_job_helper(self, job_config): from google.cloud.bigquery import _helpers @@ -2697,7 +2710,7 @@ def _create_job_helper(self, job_config): method="POST", path="/projects/%s/jobs" % self.PROJECT, data=RESOURCE, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) def test_create_job_load_config(self): @@ -2846,7 +2859,7 @@ def test_create_job_query_config_w_rateLimitExceeded_error(self): method="POST", path="/projects/PROJECT/jobs", data=data_without_destination, - timeout=None, + timeout=DEFAULT_TIMEOUT, ), ) @@ -2886,7 +2899,7 @@ def test_get_job_miss_w_explict_project(self): method="GET", path="/projects/OTHER_PROJECT/jobs/NONESUCH", query_params={"projection": "full"}, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) def test_get_job_miss_w_client_location(self): @@ -2904,7 +2917,7 @@ def test_get_job_miss_w_client_location(self): method="GET", path="/projects/client-proj/jobs/NONESUCH", query_params={"projection": "full", "location": "client-loc"}, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) def test_get_job_hit_w_timeout(self): @@ -2973,7 +2986,7 @@ def test_cancel_job_miss_w_explict_project(self): method="POST", path="/projects/OTHER_PROJECT/jobs/NONESUCH/cancel", query_params={"projection": "full", "location": self.LOCATION}, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) def test_cancel_job_miss_w_client_location(self): @@ -2992,7 +3005,7 @@ def test_cancel_job_miss_w_client_location(self): method="POST", path="/projects/OTHER_PROJECT/jobs/NONESUCH/cancel", query_params={"projection": "full", "location": self.LOCATION}, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) def test_cancel_job_hit(self): @@ -3028,7 +3041,7 @@ def test_cancel_job_hit(self): method="POST", path="/projects/job-based-proj/jobs/query_job/cancel", query_params={"projection": "full", "location": "asia-northeast1"}, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) def test_cancel_job_w_timeout(self): @@ -3154,7 +3167,7 @@ def test_load_table_from_uri_w_explicit_project(self): method="POST", path="/projects/other-project/jobs", data=resource, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) def test_load_table_from_uri_w_client_location(self): @@ -3198,7 +3211,7 @@ def test_load_table_from_uri_w_client_location(self): method="POST", path="/projects/other-project/jobs", data=resource, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) def test_load_table_from_uri_w_invalid_job_config(self): @@ -3486,7 +3499,7 @@ def test_copy_table_w_multiple_sources(self): method="POST", path="/projects/%s/jobs" % self.PROJECT, data=expected_resource, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) self.assertIsInstance(job, CopyJob) self.assertIs(job._client, client) @@ -3548,7 +3561,7 @@ def test_copy_table_w_explicit_project(self): method="POST", path="/projects/other-project/jobs", data=resource, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) def test_copy_table_w_client_location(self): @@ -3598,7 +3611,7 @@ def test_copy_table_w_client_location(self): method="POST", path="/projects/other-project/jobs", data=resource, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) def test_copy_table_w_source_strings(self): @@ -3691,7 +3704,7 @@ def test_copy_table_w_valid_job_config(self): method="POST", path="/projects/%s/jobs" % self.PROJECT, data=RESOURCE, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) self.assertIsInstance(job._configuration, CopyJobConfig) @@ -3797,7 +3810,7 @@ def test_extract_table_w_explicit_project(self): method="POST", path="/projects/other-project/jobs", data=resource, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) def test_extract_table_w_client_location(self): @@ -3841,7 +3854,7 @@ def test_extract_table_w_client_location(self): method="POST", path="/projects/other-project/jobs", data=resource, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) def test_extract_table_generated_job_id(self): @@ -3884,7 +3897,7 @@ def test_extract_table_generated_job_id(self): self.assertEqual(req["method"], "POST") self.assertEqual(req["path"], "/projects/PROJECT/jobs") self.assertIsInstance(req["data"]["jobReference"]["jobId"], str) - self.assertIsNone(req["timeout"]) + self.assertEqual(req["timeout"], DEFAULT_TIMEOUT) # Check the job resource. self.assertIsInstance(job, ExtractJob) @@ -3929,7 +3942,7 @@ def test_extract_table_w_destination_uris(self): _, req = conn.api_request.call_args self.assertEqual(req["method"], "POST") self.assertEqual(req["path"], "/projects/PROJECT/jobs") - self.assertIsNone(req["timeout"]) + self.assertEqual(req["timeout"], DEFAULT_TIMEOUT) # Check the job resource. self.assertIsInstance(job, ExtractJob) @@ -4099,7 +4112,7 @@ def test_query_defaults(self): _, req = conn.api_request.call_args self.assertEqual(req["method"], "POST") self.assertEqual(req["path"], "/projects/PROJECT/jobs") - self.assertIsNone(req["timeout"]) + self.assertEqual(req["timeout"], DEFAULT_TIMEOUT) sent = req["data"] self.assertIsInstance(sent["jobReference"]["jobId"], str) sent_config = sent["configuration"]["query"] @@ -4152,7 +4165,7 @@ def test_query_w_explicit_project(self): method="POST", path="/projects/other-project/jobs", data=resource, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) def test_query_w_explicit_job_config(self): @@ -4208,7 +4221,10 @@ def test_query_w_explicit_job_config(self): # Check that query actually starts the job. conn.api_request.assert_called_once_with( - method="POST", path="/projects/PROJECT/jobs", data=resource, timeout=None + method="POST", + path="/projects/PROJECT/jobs", + data=resource, + timeout=DEFAULT_TIMEOUT, ) # the original config object should not have been modified @@ -4252,7 +4268,10 @@ def test_query_preserving_explicit_job_config(self): # Check that query actually starts the job. conn.api_request.assert_called_once_with( - method="POST", path="/projects/PROJECT/jobs", data=resource, timeout=None + method="POST", + path="/projects/PROJECT/jobs", + data=resource, + timeout=DEFAULT_TIMEOUT, ) # the original config object should not have been modified @@ -4304,7 +4323,10 @@ def test_query_preserving_explicit_default_job_config(self): # Check that query actually starts the job. conn.api_request.assert_called_once_with( - method="POST", path="/projects/PROJECT/jobs", data=resource, timeout=None + method="POST", + path="/projects/PROJECT/jobs", + data=resource, + timeout=DEFAULT_TIMEOUT, ) # the original default config object should not have been modified @@ -4389,7 +4411,10 @@ def test_query_w_explicit_job_config_override(self): # Check that query actually starts the job. conn.api_request.assert_called_once_with( - method="POST", path="/projects/PROJECT/jobs", data=resource, timeout=None + method="POST", + path="/projects/PROJECT/jobs", + data=resource, + timeout=DEFAULT_TIMEOUT, ) def test_query_w_client_default_config_no_incoming(self): @@ -4430,7 +4455,10 @@ def test_query_w_client_default_config_no_incoming(self): # Check that query actually starts the job. conn.api_request.assert_called_once_with( - method="POST", path="/projects/PROJECT/jobs", data=resource, timeout=None + method="POST", + path="/projects/PROJECT/jobs", + data=resource, + timeout=DEFAULT_TIMEOUT, ) def test_query_w_invalid_default_job_config(self): @@ -4475,7 +4503,7 @@ def test_query_w_client_location(self): method="POST", path="/projects/other-project/jobs", data=resource, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) def test_query_detect_location(self): @@ -4546,7 +4574,7 @@ def test_query_w_udf_resources(self): _, req = conn.api_request.call_args self.assertEqual(req["method"], "POST") self.assertEqual(req["path"], "/projects/PROJECT/jobs") - self.assertIsNone(req["timeout"]) + self.assertEqual(req["timeout"], DEFAULT_TIMEOUT) sent = req["data"] self.assertIsInstance(sent["jobReference"]["jobId"], str) sent_config = sent["configuration"]["query"] @@ -4602,7 +4630,7 @@ def test_query_w_query_parameters(self): _, req = conn.api_request.call_args self.assertEqual(req["method"], "POST") self.assertEqual(req["path"], "/projects/PROJECT/jobs") - self.assertIsNone(req["timeout"]) + self.assertEqual(req["timeout"], DEFAULT_TIMEOUT) sent = req["data"] self.assertEqual(sent["jobReference"]["jobId"], JOB) sent_config = sent["configuration"]["query"] @@ -4794,7 +4822,7 @@ def _row_data(row): self.assertEqual(req["method"], "POST") self.assertEqual(req["path"], "/%s" % PATH) self.assertEqual(req["data"], SENT) - self.assertIsNone(req["timeout"]) + self.assertEqual(req["timeout"], DEFAULT_TIMEOUT) def test_insert_rows_w_list_of_dictionaries(self): import datetime @@ -4862,7 +4890,7 @@ def _row_data(row): self.assertEqual(len(errors), 0) conn.api_request.assert_called_once_with( - method="POST", path="/%s" % PATH, data=SENT, timeout=None + method="POST", path="/%s" % PATH, data=SENT, timeout=DEFAULT_TIMEOUT ) def test_insert_rows_w_list_of_Rows(self): @@ -4907,7 +4935,7 @@ def _row_data(row): self.assertEqual(len(errors), 0) conn.api_request.assert_called_once_with( - method="POST", path="/%s" % PATH, data=SENT, timeout=None + method="POST", path="/%s" % PATH, data=SENT, timeout=DEFAULT_TIMEOUT ) def test_insert_rows_w_skip_invalid_and_ignore_unknown(self): @@ -4984,7 +5012,7 @@ def _row_data(row): errors[0]["errors"][0], RESPONSE["insertErrors"][0]["errors"][0] ) conn.api_request.assert_called_once_with( - method="POST", path="/%s" % PATH, data=SENT, timeout=None + method="POST", path="/%s" % PATH, data=SENT, timeout=DEFAULT_TIMEOUT ) def test_insert_rows_w_repeated_fields(self): @@ -5085,7 +5113,7 @@ def test_insert_rows_w_repeated_fields(self): self.assertEqual(len(errors), 0) conn.api_request.assert_called_once_with( - method="POST", path="/%s" % PATH, data=SENT, timeout=None, + method="POST", path="/%s" % PATH, data=SENT, timeout=DEFAULT_TIMEOUT, ) def test_insert_rows_w_record_schema(self): @@ -5151,7 +5179,7 @@ def test_insert_rows_w_record_schema(self): self.assertEqual(len(errors), 0) conn.api_request.assert_called_once_with( - method="POST", path="/%s" % PATH, data=SENT, timeout=None + method="POST", path="/%s" % PATH, data=SENT, timeout=DEFAULT_TIMEOUT ) def test_insert_rows_w_explicit_none_insert_ids(self): @@ -5185,7 +5213,7 @@ def _row_data(row): self.assertEqual(len(errors), 0) conn.api_request.assert_called_once_with( - method="POST", path="/{}".format(PATH), data=SENT, timeout=None, + method="POST", path="/{}".format(PATH), data=SENT, timeout=DEFAULT_TIMEOUT, ) def test_insert_rows_errors(self): @@ -5269,7 +5297,7 @@ def test_insert_rows_w_numeric(self): project, ds_id, table_id ), data=sent, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) @unittest.skipIf(pandas is None, "Requires `pandas`") @@ -5461,7 +5489,10 @@ def test_insert_rows_from_dataframe_many_columns(self): ] } expected_call = mock.call( - method="POST", path=API_PATH, data=EXPECTED_SENT_DATA, timeout=None + method="POST", + path=API_PATH, + data=EXPECTED_SENT_DATA, + timeout=DEFAULT_TIMEOUT, ) actual_calls = conn.api_request.call_args_list @@ -5514,7 +5545,10 @@ def test_insert_rows_from_dataframe_w_explicit_none_insert_ids(self): actual_calls = conn.api_request.call_args_list assert len(actual_calls) == 1 assert actual_calls[0] == mock.call( - method="POST", path=API_PATH, data=EXPECTED_SENT_DATA, timeout=None + method="POST", + path=API_PATH, + data=EXPECTED_SENT_DATA, + timeout=DEFAULT_TIMEOUT, ) def test_insert_rows_json_default_behavior(self): @@ -5594,7 +5628,7 @@ def test_insert_rows_json_w_explicitly_requested_autogenerated_insert_ids(self): method="POST", path="/projects/proj/datasets/dset/tables/tbl/insertAll", data=expected_row_data, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) def test_insert_rows_json_w_explicitly_disabled_insert_ids(self): @@ -5624,7 +5658,7 @@ def test_insert_rows_json_w_explicitly_disabled_insert_ids(self): method="POST", path="/projects/proj/datasets/dset/tables/tbl/insertAll", data=expected_row_data, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) def test_insert_rows_json_with_iterator_row_ids(self): @@ -5651,7 +5685,7 @@ def test_insert_rows_json_with_iterator_row_ids(self): method="POST", path="/projects/proj/datasets/dset/tables/tbl/insertAll", data=expected_row_data, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) def test_insert_rows_json_with_non_iterable_row_ids(self): @@ -5704,7 +5738,7 @@ def test_insert_rows_json_w_explicit_none_insert_ids(self): method="POST", path="/projects/proj/datasets/dset/tables/tbl/insertAll", data=expected, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) def test_insert_rows_json_w_none_insert_ids_sequence(self): @@ -5743,7 +5777,7 @@ def test_insert_rows_json_w_none_insert_ids_sequence(self): method="POST", path="/projects/proj/datasets/dset/tables/tbl/insertAll", data=expected_row_data, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) def test_insert_rows_w_wrong_arg(self): @@ -5938,7 +5972,7 @@ def test_list_rows_w_start_index_w_page_size(self): "maxResults": 2, "formatOptions.useInt64Timestamp": True, }, - timeout=None, + timeout=DEFAULT_TIMEOUT, ), mock.call( method="GET", @@ -5948,7 +5982,7 @@ def test_list_rows_w_start_index_w_page_size(self): "maxResults": 2, "formatOptions.useInt64Timestamp": True, }, - timeout=None, + timeout=DEFAULT_TIMEOUT, ), ] ) @@ -6099,7 +6133,7 @@ def test_list_rows_repeated_fields(self): "selectedFields": "color,struct", "formatOptions.useInt64Timestamp": True, }, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) def test_list_rows_w_record_schema(self): @@ -6169,7 +6203,7 @@ def test_list_rows_w_record_schema(self): method="GET", path="/%s" % PATH, query_params={"formatOptions.useInt64Timestamp": True}, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) def test_list_rows_with_missing_schema(self): @@ -6224,7 +6258,7 @@ def test_list_rows_with_missing_schema(self): row_iter = client.list_rows(table) conn.api_request.assert_called_once_with( - method="GET", path=table_path, timeout=None + method="GET", path=table_path, timeout=DEFAULT_TIMEOUT ) conn.api_request.reset_mock() self.assertEqual(row_iter.total_rows, 2, msg=repr(table)) @@ -6234,7 +6268,7 @@ def test_list_rows_with_missing_schema(self): method="GET", path=tabledata_path, query_params={"formatOptions.useInt64Timestamp": True}, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) self.assertEqual(row_iter.total_rows, 3, msg=repr(table)) self.assertEqual(rows[0].name, "Phred Phlyntstone", msg=repr(table)) @@ -6407,7 +6441,7 @@ def test_load_table_from_file_resumable(self): file_obj, self.EXPECTED_CONFIGURATION, _DEFAULT_NUM_RETRIES, - None, + DEFAULT_TIMEOUT, project=self.EXPECTED_CONFIGURATION["jobReference"]["projectId"], ) @@ -6440,7 +6474,7 @@ def test_load_table_from_file_w_explicit_project(self): file_obj, expected_resource, _DEFAULT_NUM_RETRIES, - None, + DEFAULT_TIMEOUT, project="other-project", ) @@ -6474,7 +6508,7 @@ def test_load_table_from_file_w_client_location(self): file_obj, expected_resource, _DEFAULT_NUM_RETRIES, - None, + DEFAULT_TIMEOUT, project="other-project", ) @@ -6536,7 +6570,7 @@ def test_load_table_from_file_resumable_metadata(self): file_obj, expected_config, _DEFAULT_NUM_RETRIES, - None, + DEFAULT_TIMEOUT, project=self.EXPECTED_CONFIGURATION["jobReference"]["projectId"], ) @@ -6565,7 +6599,7 @@ def test_load_table_from_file_multipart(self): self.EXPECTED_CONFIGURATION, file_obj_size, _DEFAULT_NUM_RETRIES, - None, + DEFAULT_TIMEOUT, project=self.PROJECT, ) @@ -6590,7 +6624,7 @@ def test_load_table_from_file_with_retries(self): file_obj, self.EXPECTED_CONFIGURATION, num_retries, - None, + DEFAULT_TIMEOUT, project=self.EXPECTED_CONFIGURATION["jobReference"]["projectId"], ) @@ -6627,7 +6661,7 @@ def test_load_table_from_file_with_readable_gzip(self): gzip_file, self.EXPECTED_CONFIGURATION, _DEFAULT_NUM_RETRIES, - None, + DEFAULT_TIMEOUT, project=self.EXPECTED_CONFIGURATION["jobReference"]["projectId"], ) @@ -6750,7 +6784,7 @@ def test_load_table_from_dataframe(self): location=None, project=None, job_config=mock.ANY, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) sent_file = load_table_from_file.mock_calls[0][1][1] @@ -6808,7 +6842,7 @@ def test_load_table_from_dataframe_w_client_location(self): location=self.LOCATION, project=None, job_config=mock.ANY, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) sent_file = load_table_from_file.mock_calls[0][1][1] @@ -6862,7 +6896,7 @@ def test_load_table_from_dataframe_w_custom_job_config_wihtout_source_format(sel location=self.LOCATION, project=None, job_config=mock.ANY, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) sent_config = load_table_from_file.mock_calls[0][2]["job_config"] @@ -6918,7 +6952,7 @@ def test_load_table_from_dataframe_w_custom_job_config_w_source_format(self): location=self.LOCATION, project=None, job_config=mock.ANY, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) sent_config = load_table_from_file.mock_calls[0][2]["job_config"] @@ -7012,7 +7046,7 @@ def test_load_table_from_dataframe_w_automatic_schema(self): location=self.LOCATION, project=None, job_config=mock.ANY, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) sent_config = load_table_from_file.mock_calls[0][2]["job_config"] @@ -7073,7 +7107,7 @@ def test_load_table_from_dataframe_w_index_and_auto_schema(self): location=self.LOCATION, project=None, job_config=mock.ANY, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) sent_config = load_table_from_file.mock_calls[0][2]["job_config"] @@ -7120,7 +7154,7 @@ def test_load_table_from_dataframe_unknown_table(self): location=None, project=None, job_config=mock.ANY, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) @unittest.skipIf( @@ -7162,7 +7196,7 @@ def test_load_table_from_dataframe_w_nullable_int64_datatype(self): location=self.LOCATION, project=None, job_config=mock.ANY, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) sent_config = load_table_from_file.mock_calls[0][2]["job_config"] @@ -7210,7 +7244,7 @@ def test_load_table_from_dataframe_w_nullable_int64_datatype_automatic_schema(se location=self.LOCATION, project=None, job_config=mock.ANY, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) sent_config = load_table_from_file.mock_calls[0][2]["job_config"] @@ -7272,7 +7306,7 @@ def test_load_table_from_dataframe_struct_fields(self): location=self.LOCATION, project=None, job_config=mock.ANY, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) sent_config = load_table_from_file.mock_calls[0][2]["job_config"] @@ -7347,7 +7381,7 @@ def test_load_table_from_dataframe_w_partial_schema(self): location=self.LOCATION, project=None, job_config=mock.ANY, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) sent_config = load_table_from_file.mock_calls[0][2]["job_config"] @@ -7442,7 +7476,7 @@ def test_load_table_from_dataframe_w_partial_schema_missing_types(self): location=self.LOCATION, project=None, job_config=mock.ANY, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) assert warned # there should be at least one warning @@ -7592,7 +7626,7 @@ def test_load_table_from_dataframe_w_nulls(self): location=self.LOCATION, project=None, job_config=mock.ANY, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) sent_config = load_table_from_file.mock_calls[0][2]["job_config"] @@ -7658,7 +7692,7 @@ def test_load_table_from_dataframe_with_csv_source_format(self): location=None, project=None, job_config=mock.ANY, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) sent_file = load_table_from_file.mock_calls[0][1][1] @@ -7696,7 +7730,7 @@ def test_load_table_from_json_basic_use(self): location=client.location, project=client.project, job_config=mock.ANY, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) sent_config = load_table_from_file.mock_calls[0][2]["job_config"] @@ -7749,7 +7783,7 @@ def test_load_table_from_json_non_default_args(self): location="EU", project="project-x", job_config=mock.ANY, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) sent_config = load_table_from_file.mock_calls[0][2]["job_config"] @@ -7809,7 +7843,7 @@ def test_load_table_from_json_unicode_emoji_data_case(self): location=client.location, project=client.project, job_config=mock.ANY, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) sent_data_file = load_table_from_file.mock_calls[0][1][1] diff --git a/packages/google-cloud-bigquery/tests/unit/test_create_dataset.py b/packages/google-cloud-bigquery/tests/unit/test_create_dataset.py index d07aaed4f004..67b21225d6e9 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_create_dataset.py +++ b/packages/google-cloud-bigquery/tests/unit/test_create_dataset.py @@ -15,6 +15,7 @@ from google.cloud.bigquery.dataset import Dataset, DatasetReference from .helpers import make_connection, dataset_polymorphic, make_client import google.cloud.bigquery.dataset +from google.cloud.bigquery.retry import DEFAULT_TIMEOUT import mock import pytest @@ -111,7 +112,7 @@ def test_create_dataset_w_attrs(client, PROJECT, DS_ID): "access": [{"role": "OWNER", "userByEmail": USER_EMAIL}, {"view": VIEW}], "labels": LABELS, }, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) @@ -143,7 +144,7 @@ def test_create_dataset_w_custom_property(client, PROJECT, DS_ID): "newAlphaProperty": "unreleased property", "labels": {}, }, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) @@ -176,7 +177,7 @@ def test_create_dataset_w_client_location_wo_dataset_location(PROJECT, DS_ID, LO "labels": {}, "location": LOCATION, }, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) @@ -211,7 +212,7 @@ def test_create_dataset_w_client_location_w_dataset_location(PROJECT, DS_ID, LOC "labels": {}, "location": OTHER_LOCATION, }, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) @@ -241,7 +242,7 @@ def test_create_dataset_w_reference(PROJECT, DS_ID, LOCATION): "labels": {}, "location": LOCATION, }, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) @@ -271,7 +272,7 @@ def test_create_dataset_w_fully_qualified_string(PROJECT, DS_ID, LOCATION): "labels": {}, "location": LOCATION, }, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) @@ -306,7 +307,7 @@ def test_create_dataset_w_string(PROJECT, DS_ID, LOCATION): "labels": {}, "location": LOCATION, }, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) @@ -356,8 +357,8 @@ def test_create_dataset_alreadyexists_w_exists_ok_true(PROJECT, DS_ID, LOCATION) "labels": {}, "location": LOCATION, }, - timeout=None, + timeout=DEFAULT_TIMEOUT, ), - mock.call(method="GET", path=get_path, timeout=None), + mock.call(method="GET", path=get_path, timeout=DEFAULT_TIMEOUT), ] ) diff --git a/packages/google-cloud-bigquery/tests/unit/test_delete_dataset.py b/packages/google-cloud-bigquery/tests/unit/test_delete_dataset.py index 3a65e031c2bf..b48beb1473d5 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_delete_dataset.py +++ b/packages/google-cloud-bigquery/tests/unit/test_delete_dataset.py @@ -14,6 +14,7 @@ from .helpers import make_connection, make_client, dataset_polymorphic import google.api_core.exceptions +from google.cloud.bigquery.retry import DEFAULT_TIMEOUT import pytest @@ -40,7 +41,7 @@ def test_delete_dataset_delete_contents( method="DELETE", path="/%s" % PATH, query_params={"deleteContents": "true"}, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) @@ -61,7 +62,7 @@ def test_delete_dataset_w_not_found_ok_false(PROJECT, DS_ID): client.delete_dataset(DS_ID) conn.api_request.assert_called_with( - method="DELETE", path=path, query_params={}, timeout=None + method="DELETE", path=path, query_params={}, timeout=DEFAULT_TIMEOUT ) @@ -74,5 +75,5 @@ def test_delete_dataset_w_not_found_ok_true(PROJECT, DS_ID): ) client.delete_dataset(DS_ID, not_found_ok=True) conn.api_request.assert_called_with( - method="DELETE", path=path, query_params={}, timeout=None + method="DELETE", path=path, query_params={}, timeout=DEFAULT_TIMEOUT ) diff --git a/packages/google-cloud-bigquery/tests/unit/test_list_datasets.py b/packages/google-cloud-bigquery/tests/unit/test_list_datasets.py index 7793a7ba69a7..6f0b55c5e5b0 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_list_datasets.py +++ b/packages/google-cloud-bigquery/tests/unit/test_list_datasets.py @@ -1,11 +1,11 @@ # Copyright 2021 Google LLC - +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at - +# # https://www.apache.org/licenses/LICENSE-2.0 - +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -15,6 +15,7 @@ import mock import pytest +from google.cloud.bigquery.retry import DEFAULT_TIMEOUT from .helpers import make_connection @@ -65,7 +66,7 @@ def test_list_datasets_defaults(client, PROJECT, extra, query): assert token == TOKEN conn.api_request.assert_called_once_with( - method="GET", path="/%s" % PATH, query_params=query, timeout=None + method="GET", path="/%s" % PATH, query_params=query, timeout=DEFAULT_TIMEOUT ) @@ -120,5 +121,5 @@ def test_list_datasets_explicit_response_missing_datasets_key(client, PROJECT): "maxResults": 3, "pageToken": TOKEN, }, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) diff --git a/packages/google-cloud-bigquery/tests/unit/test_list_jobs.py b/packages/google-cloud-bigquery/tests/unit/test_list_jobs.py index f348be72490b..1fb40d4462e5 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_list_jobs.py +++ b/packages/google-cloud-bigquery/tests/unit/test_list_jobs.py @@ -1,11 +1,11 @@ # Copyright 2021 Google LLC - +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at - +# # https://www.apache.org/licenses/LICENSE-2.0 - +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -17,6 +17,7 @@ import mock import pytest +from google.cloud.bigquery.retry import DEFAULT_TIMEOUT from .helpers import make_connection @@ -136,7 +137,7 @@ def test_list_jobs_defaults(client, PROJECT, DS_ID, extra, query): method="GET", path="/%s" % PATH, query_params=dict({"projection": "full"}, **query), - timeout=None, + timeout=DEFAULT_TIMEOUT, ) @@ -185,7 +186,7 @@ def test_list_jobs_load_job_wo_sourceUris(client, PROJECT, DS_ID): method="GET", path="/%s" % PATH, query_params={"projection": "full"}, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) @@ -220,7 +221,7 @@ def test_list_jobs_explicit_missing(client, PROJECT): "allUsers": True, "stateFilter": "done", }, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) @@ -233,7 +234,7 @@ def test_list_jobs_w_project(client, PROJECT): method="GET", path="/projects/other-project/jobs", query_params={"projection": "full"}, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) @@ -269,7 +270,7 @@ def test_list_jobs_w_time_filter(client, PROJECT): "minCreationTime": "1", "maxCreationTime": str(end_time_millis), }, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) @@ -286,6 +287,6 @@ def test_list_jobs_w_parent_job_filter(client, PROJECT): method="GET", path="/projects/%s/jobs" % PROJECT, query_params={"projection": "full", "parentJobId": "parent-job-123"}, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) conn.api_request.reset_mock() diff --git a/packages/google-cloud-bigquery/tests/unit/test_list_models.py b/packages/google-cloud-bigquery/tests/unit/test_list_models.py index 4ede9a7ddfe2..b1485233868c 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_list_models.py +++ b/packages/google-cloud-bigquery/tests/unit/test_list_models.py @@ -1,20 +1,22 @@ # Copyright 2021 Google LLC - +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at - +# # https://www.apache.org/licenses/LICENSE-2.0 - +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -from .helpers import make_connection, dataset_polymorphic import pytest +from google.cloud.bigquery.retry import DEFAULT_TIMEOUT +from .helpers import make_connection, dataset_polymorphic + def test_list_models_empty_w_timeout(client, PROJECT, DS_ID): path = "/projects/{}/datasets/{}/models".format(PROJECT, DS_ID) @@ -82,7 +84,7 @@ def test_list_models_defaults( assert token == TOKEN conn.api_request.assert_called_once_with( - method="GET", path="/%s" % PATH, query_params=query, timeout=None + method="GET", path="/%s" % PATH, query_params=query, timeout=DEFAULT_TIMEOUT ) diff --git a/packages/google-cloud-bigquery/tests/unit/test_list_projects.py b/packages/google-cloud-bigquery/tests/unit/test_list_projects.py index a88540dd524e..190612b44588 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_list_projects.py +++ b/packages/google-cloud-bigquery/tests/unit/test_list_projects.py @@ -1,11 +1,11 @@ # Copyright 2021 Google LLC - +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at - +# # https://www.apache.org/licenses/LICENSE-2.0 - +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -15,6 +15,7 @@ import mock import pytest +from google.cloud.bigquery.retry import DEFAULT_TIMEOUT from .helpers import make_connection @@ -66,7 +67,7 @@ def test_list_projects_defaults(client, PROJECT, extra, query): assert token == TOKEN conn.api_request.assert_called_once_with( - method="GET", path="/projects", query_params=query, timeout=None + method="GET", path="/projects", query_params=query, timeout=DEFAULT_TIMEOUT ) @@ -115,5 +116,5 @@ def test_list_projects_explicit_response_missing_projects_key(client): method="GET", path="/projects", query_params={"maxResults": 3, "pageToken": TOKEN}, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) diff --git a/packages/google-cloud-bigquery/tests/unit/test_list_routines.py b/packages/google-cloud-bigquery/tests/unit/test_list_routines.py index 0699665424b0..80e62d6bdf01 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_list_routines.py +++ b/packages/google-cloud-bigquery/tests/unit/test_list_routines.py @@ -1,20 +1,22 @@ # Copyright 2021 Google LLC - +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at - +# # https://www.apache.org/licenses/LICENSE-2.0 - +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -from .helpers import make_connection, dataset_polymorphic import pytest +from google.cloud.bigquery.retry import DEFAULT_TIMEOUT +from .helpers import make_connection, dataset_polymorphic + def test_list_routines_empty_w_timeout(client): conn = client._connection = make_connection({}) @@ -85,7 +87,7 @@ def test_list_routines_defaults( assert actual_token == token conn.api_request.assert_called_once_with( - method="GET", path=path, query_params=query, timeout=None + method="GET", path=path, query_params=query, timeout=DEFAULT_TIMEOUT ) diff --git a/packages/google-cloud-bigquery/tests/unit/test_list_tables.py b/packages/google-cloud-bigquery/tests/unit/test_list_tables.py index 45d15bed30e0..8360f6605630 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_list_tables.py +++ b/packages/google-cloud-bigquery/tests/unit/test_list_tables.py @@ -1,21 +1,23 @@ # Copyright 2021 Google LLC - +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at - +# # https://www.apache.org/licenses/LICENSE-2.0 - +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -from .helpers import make_connection, dataset_polymorphic -import google.cloud.bigquery.dataset import pytest +import google.cloud.bigquery.dataset +from google.cloud.bigquery.retry import DEFAULT_TIMEOUT +from .helpers import make_connection, dataset_polymorphic + @dataset_polymorphic def test_list_tables_empty_w_timeout( @@ -89,7 +91,7 @@ def test_list_tables_defaults(make_dataset, get_reference, client, PROJECT, DS_I assert token == TOKEN conn.api_request.assert_called_once_with( - method="GET", path="/%s" % PATH, query_params={}, timeout=None + method="GET", path="/%s" % PATH, query_params={}, timeout=DEFAULT_TIMEOUT ) @@ -150,7 +152,7 @@ def test_list_tables_explicit(client, PROJECT, DS_ID): method="GET", path="/%s" % PATH, query_params={"maxResults": 3, "pageToken": TOKEN}, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) diff --git a/packages/google-cloud-bigquery/tests/unit/test_magics.py b/packages/google-cloud-bigquery/tests/unit/test_magics.py index 88c92a0709e6..36cbf499391e 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_magics.py +++ b/packages/google-cloud-bigquery/tests/unit/test_magics.py @@ -32,6 +32,7 @@ from google.cloud.bigquery import job from google.cloud.bigquery import table from google.cloud.bigquery.magics import magics +from google.cloud.bigquery.retry import DEFAULT_TIMEOUT from tests.unit.helpers import make_connection from test_utils.imports import maybe_fail_import @@ -185,7 +186,7 @@ def test_context_with_default_connection(): method="POST", path="/projects/project-from-env/jobs", data=mock.ANY, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) query_results_call = mock.call( method="GET", @@ -249,7 +250,7 @@ def test_context_with_custom_connection(): method="POST", path="/projects/project-from-env/jobs", data=mock.ANY, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) query_results_call = mock.call( method="GET", diff --git a/packages/google-cloud-bigquery/tests/unit/test_retry.py b/packages/google-cloud-bigquery/tests/unit/test_retry.py index c7c25e0363de..e0a992f783d5 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_retry.py +++ b/packages/google-cloud-bigquery/tests/unit/test_retry.py @@ -55,6 +55,18 @@ def test_w_unstructured_requests_chunked_encoding_error(self): exc = requests.exceptions.ChunkedEncodingError() self.assertTrue(self._call_fut(exc)) + def test_w_unstructured_requests_connecttimeout(self): + exc = requests.exceptions.ConnectTimeout() + self.assertTrue(self._call_fut(exc)) + + def test_w_unstructured_requests_readtimeout(self): + exc = requests.exceptions.ReadTimeout() + self.assertTrue(self._call_fut(exc)) + + def test_w_unstructured_requests_timeout(self): + exc = requests.exceptions.Timeout() + self.assertTrue(self._call_fut(exc)) + def test_w_auth_transporterror(self): from google.auth.exceptions import TransportError From 12ced68fa1ec15f5c01c37f16f90b671edc2dc42 Mon Sep 17 00:00:00 2001 From: WhiteSource Renovate Date: Wed, 25 Aug 2021 17:34:24 +0200 Subject: [PATCH 1271/2016] chore(deps): update dependency google-cloud-bigquery to v2.25.0 (#907) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [![WhiteSource Renovate](https://app.renovatebot.com/images/banner.svg)](https://renovatebot.com) This PR contains the following updates: | Package | Change | Age | Adoption | Passing | Confidence | |---|---|---|---|---|---| | [google-cloud-bigquery](https://togithub.com/googleapis/python-bigquery) | `==2.24.1` -> `==2.25.0` | [![age](https://badges.renovateapi.com/packages/pypi/google-cloud-bigquery/2.25.0/age-slim)](https://docs.renovatebot.com/merge-confidence/) | [![adoption](https://badges.renovateapi.com/packages/pypi/google-cloud-bigquery/2.25.0/adoption-slim)](https://docs.renovatebot.com/merge-confidence/) | [![passing](https://badges.renovateapi.com/packages/pypi/google-cloud-bigquery/2.25.0/compatibility-slim/2.24.1)](https://docs.renovatebot.com/merge-confidence/) | [![confidence](https://badges.renovateapi.com/packages/pypi/google-cloud-bigquery/2.25.0/confidence-slim/2.24.1)](https://docs.renovatebot.com/merge-confidence/) | --- ### Release Notes
googleapis/python-bigquery ### [`v2.25.0`](https://togithub.com/googleapis/python-bigquery/blob/master/CHANGELOG.md#​2250-httpswwwgithubcomgoogleapispython-bigquerycomparev2241v2250-2021-08-24) [Compare Source](https://togithub.com/googleapis/python-bigquery/compare/v2.24.1...v2.25.0) ##### Features - Support using GeoPandas for GEOGRAPHY columns ([#​848](https://www.togithub.com/googleapis/python-bigquery/issues/848)) ([16f65e6](https://www.github.com/googleapis/python-bigquery/commit/16f65e6ae15979217ceea6c6d398c9057a363a13)) ##### [2.24.1](https://www.github.com/googleapis/python-bigquery/compare/v2.24.0...v2.24.1) (2021-08-13) ##### Bug Fixes - remove pytz dependency and require pyarrow>=3.0.0 ([#​875](https://www.togithub.com/googleapis/python-bigquery/issues/875)) ([2cb3563](https://www.github.com/googleapis/python-bigquery/commit/2cb3563ee863edef7eaf5d04d739bcfe7bc6438e))
--- ### Configuration 📅 **Schedule**: At any time (no schedule defined). 🚦 **Automerge**: Disabled by config. Please merge this manually once you are satisfied. ♻ **Rebasing**: Whenever PR becomes conflicted, or you tick the rebase/retry checkbox. 🔕 **Ignore**: Close this PR and you won't be reminded about this update again. --- - [ ] If you want to rebase/retry this PR, check this box. --- This PR has been generated by [WhiteSource Renovate](https://renovate.whitesourcesoftware.com). View repository job log [here](https://app.renovatebot.com/dashboard#github/googleapis/python-bigquery). --- .../google-cloud-bigquery/samples/geography/requirements.txt | 2 +- .../google-cloud-bigquery/samples/snippets/requirements.txt | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/packages/google-cloud-bigquery/samples/geography/requirements.txt b/packages/google-cloud-bigquery/samples/geography/requirements.txt index 853306d71658..d810e124157c 100644 --- a/packages/google-cloud-bigquery/samples/geography/requirements.txt +++ b/packages/google-cloud-bigquery/samples/geography/requirements.txt @@ -12,7 +12,7 @@ geojson==2.5.0 geopandas==0.9.0 google-api-core==1.31.2 google-auth==1.35.0 -google-cloud-bigquery==2.24.1 +google-cloud-bigquery==2.25.0 google-cloud-bigquery-storage==2.6.3 google-cloud-core==2.0.0 google-crc32c==1.1.2 diff --git a/packages/google-cloud-bigquery/samples/snippets/requirements.txt b/packages/google-cloud-bigquery/samples/snippets/requirements.txt index 484e10516dc4..07760b666c9d 100644 --- a/packages/google-cloud-bigquery/samples/snippets/requirements.txt +++ b/packages/google-cloud-bigquery/samples/snippets/requirements.txt @@ -1,4 +1,4 @@ -google-cloud-bigquery==2.24.1 +google-cloud-bigquery==2.25.0 google-cloud-bigquery-storage==2.6.3 google-auth-oauthlib==0.4.5 grpcio==1.39.0 From eb3456db4d27776c0918a2723c5462a2605f4871 Mon Sep 17 00:00:00 2001 From: WhiteSource Renovate Date: Wed, 25 Aug 2021 17:56:26 +0200 Subject: [PATCH 1272/2016] chore(deps): update dependency pandas to v1.3.2 (#900) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [![WhiteSource Renovate](https://app.renovatebot.com/images/banner.svg)](https://renovatebot.com) This PR contains the following updates: | Package | Change | Age | Adoption | Passing | Confidence | |---|---|---|---|---|---| | [pandas](https://pandas.pydata.org) ([source](https://togithub.com/pandas-dev/pandas)) | `==1.1.5` -> `==1.3.2` | [![age](https://badges.renovateapi.com/packages/pypi/pandas/1.3.2/age-slim)](https://docs.renovatebot.com/merge-confidence/) | [![adoption](https://badges.renovateapi.com/packages/pypi/pandas/1.3.2/adoption-slim)](https://docs.renovatebot.com/merge-confidence/) | [![passing](https://badges.renovateapi.com/packages/pypi/pandas/1.3.2/compatibility-slim/1.1.5)](https://docs.renovatebot.com/merge-confidence/) | [![confidence](https://badges.renovateapi.com/packages/pypi/pandas/1.3.2/confidence-slim/1.1.5)](https://docs.renovatebot.com/merge-confidence/) | --- ### Release Notes
pandas-dev/pandas ### [`v1.3.2`](https://togithub.com/pandas-dev/pandas/releases/v1.3.2) [Compare Source](https://togithub.com/pandas-dev/pandas/compare/v1.3.1...v1.3.2) This is a patch release in the 1.3.x series and includes some regression fixes and bug fixes. We recommend that all users upgrade to this version. See the [full whatsnew](https://pandas.pydata.org/pandas-docs/version/1.3.2/whatsnew/v1.3.2.html) for a list of all the changes. The release will be available on the defaults and conda-forge channels: conda install pandas Or via PyPI: python3 -m pip install --upgrade pandas Please report any issues with the release on the [pandas issue tracker](https://togithub.com/pandas-dev/pandas/issues). ### [`v1.3.1`](https://togithub.com/pandas-dev/pandas/releases/v1.3.1) [Compare Source](https://togithub.com/pandas-dev/pandas/compare/v1.3.0...v1.3.1) This is the first patch release in the 1.3.x series and includes some regression fixes and bug fixes. We recommend that all users upgrade to this version. See the [full whatsnew](https://pandas.pydata.org/pandas-docs/version/1.3.1/whatsnew/v1.3.1.html) for a list of all the changes. The release will be available on the defaults and conda-forge channels: conda install pandas Or via PyPI: python3 -m pip install --upgrade pandas Please report any issues with the release on the [pandas issue tracker](https://togithub.com/pandas-dev/pandas/issues). ### [`v1.3.0`](https://togithub.com/pandas-dev/pandas/releases/v1.3.0) [Compare Source](https://togithub.com/pandas-dev/pandas/compare/v1.2.5...v1.3.0) This release includes some new features, bug fixes, and performance improvements. We recommend that all users upgrade to this version. See the [full whatsnew](https://pandas.pydata.org/pandas-docs/version/1.3.0/whatsnew/v1.3.0.html) for a list of all the changes. The release will be available on the defaults and conda-forge channels: conda install -c conda-forge pandas Or via PyPI: python3 -m pip install --upgrade pandas Please report any issues with the release on the [pandas issue tracker](https://togithub.com/pandas-dev/pandas/issues). ### [`v1.2.5`](https://togithub.com/pandas-dev/pandas/releases/v1.2.5) [Compare Source](https://togithub.com/pandas-dev/pandas/compare/v1.2.4...v1.2.5) This is a patch release in the 1.2.x series and includes some regression fixes. We recommend that all users upgrade to this version. See the [full whatsnew](https://pandas.pydata.org/pandas-docs/version/1.2.5/whatsnew/v1.2.5.html) for a list of all the changes. The release will be available on the defaults and conda-forge channels: conda install pandas Or via PyPI: python3 -m pip install --upgrade pandas Please report any issues with the release on the [pandas issue tracker](https://togithub.com/pandas-dev/pandas/issues). ### [`v1.2.4`](https://togithub.com/pandas-dev/pandas/releases/v1.2.4) [Compare Source](https://togithub.com/pandas-dev/pandas/compare/v1.2.3...v1.2.4) This is a patch release in the 1.2.x series and includes some regression fixes. We recommend that all users upgrade to this version. See the [full whatsnew](https://pandas.pydata.org/pandas-docs/version/1.2.4/whatsnew/v1.2.4.html) for a list of all the changes. The release will be available on the defaults and conda-forge channels: conda install pandas Or via PyPI: python3 -m pip install --upgrade pandas Please report any issues with the release on the [pandas issue tracker](https://togithub.com/pandas-dev/pandas/issues). ### [`v1.2.3`](https://togithub.com/pandas-dev/pandas/releases/v1.2.3) [Compare Source](https://togithub.com/pandas-dev/pandas/compare/v1.2.2...v1.2.3) This is a patch release in the 1.2.x series and includes some regression fixes. We recommend that all users upgrade to this version. See the [full whatsnew](https://pandas.pydata.org/pandas-docs/version/1.2.3/whatsnew/v1.2.3.html) for a list of all the changes. The release will be available on the defaults and conda-forge channels: conda install pandas Or via PyPI: python3 -m pip install --upgrade pandas Please report any issues with the release on the [pandas issue tracker](https://togithub.com/pandas-dev/pandas/issues). ### [`v1.2.2`](https://togithub.com/pandas-dev/pandas/releases/v1.2.2) [Compare Source](https://togithub.com/pandas-dev/pandas/compare/v1.2.1...v1.2.2) This is a patch release in the 1.2.x series and includes some regression fixes and bug fixes. We recommend that all users upgrade to this version. See the [full whatsnew](https://pandas.pydata.org/pandas-docs/version/1.2.2/whatsnew/v1.2.2.html) for a list of all the changes. The release will be available on the defaults and conda-forge channels: conda install pandas Or via PyPI: python3 -m pip install --upgrade pandas Please report any issues with the release on the [pandas issue tracker](https://togithub.com/pandas-dev/pandas/issues). ### [`v1.2.1`](https://togithub.com/pandas-dev/pandas/releases/v1.2.1) [Compare Source](https://togithub.com/pandas-dev/pandas/compare/v1.2.0...v1.2.1) This is the first patch release in the 1.2.x series and includes some regression fixes and bug fixes. We recommend that all users upgrade to this version. See the [full whatsnew](https://pandas.pydata.org/pandas-docs/version/1.2.1/whatsnew/v1.2.1.html) for a list of all the changes. The release will be available on the defaults and conda-forge channels: conda install pandas Or via PyPI: python3 -m pip install --upgrade pandas Please report any issues with the release on the [pandas issue tracker](https://togithub.com/pandas-dev/pandas/issues). ### [`v1.2.0`](https://togithub.com/pandas-dev/pandas/releases/v1.2.0) [Compare Source](https://togithub.com/pandas-dev/pandas/compare/v1.1.5...v1.2.0) This release includes some new features, bug fixes, and performance improvements. We recommend that all users upgrade to this version. See the [full whatsnew](https://pandas.pydata.org/pandas-docs/version/1.2.0/whatsnew/v1.2.0.html) for a list of all the changes. The release will be available on the defaults and conda-forge channels: conda install -c conda-forge pandas Or via PyPI: python3 -m pip install --upgrade pandas Please report any issues with the release on the [pandas issue tracker](https://togithub.com/pandas-dev/pandas/issues).
--- ### Configuration 📅 **Schedule**: At any time (no schedule defined). 🚦 **Automerge**: Disabled by config. Please merge this manually once you are satisfied. ♻ **Rebasing**: Renovate will not automatically rebase this PR, because other commits have been found. 🔕 **Ignore**: Close this PR and you won't be reminded about this update again. --- - [ ] If you want to rebase/retry this PR, check this box. --- This PR has been generated by [WhiteSource Renovate](https://renovate.whitesourcesoftware.com). View repository job log [here](https://app.renovatebot.com/dashboard#github/googleapis/python-bigquery). --- .../google-cloud-bigquery/samples/geography/requirements.txt | 3 ++- .../google-cloud-bigquery/samples/snippets/requirements.txt | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/packages/google-cloud-bigquery/samples/geography/requirements.txt b/packages/google-cloud-bigquery/samples/geography/requirements.txt index d810e124157c..b5fe247cb1a8 100644 --- a/packages/google-cloud-bigquery/samples/geography/requirements.txt +++ b/packages/google-cloud-bigquery/samples/geography/requirements.txt @@ -27,7 +27,8 @@ mypy-extensions==0.4.3 numpy==1.19.5; python_version < "3.7" numpy==1.21.2; python_version > "3.6" packaging==21.0 -pandas==1.1.5 +pandas==1.1.5; python_version < '3.7' +pandas==1.3.2; python_version >= '3.7' proto-plus==1.19.0 protobuf==3.17.3 pyarrow==5.0.0 diff --git a/packages/google-cloud-bigquery/samples/snippets/requirements.txt b/packages/google-cloud-bigquery/samples/snippets/requirements.txt index 07760b666c9d..d75c747fb4f9 100644 --- a/packages/google-cloud-bigquery/samples/snippets/requirements.txt +++ b/packages/google-cloud-bigquery/samples/snippets/requirements.txt @@ -7,6 +7,6 @@ ipython==7.17.0; python_version >= '3.7' matplotlib==3.3.4; python_version < '3.7' matplotlib==3.4.1; python_version >= '3.7' pandas==1.1.5; python_version < '3.7' -pandas==1.2.0; python_version >= '3.7' +pandas==1.3.2; python_version >= '3.7' pyarrow==5.0.0 pytz==2021.1 From 3f4c8009c8b1d4295150ec6675a188fd0979ec27 Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Wed, 25 Aug 2021 11:38:24 -0500 Subject: [PATCH 1273/2016] chore: group all renovate PRs together (#911) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This excludes `renovate.json` from templated updates. If this works well, we can update the core templates (perhaps with a configuration option to `py_library`). Thank you for opening a Pull Request! Before submitting your PR, there are a few things you can do to make sure it goes smoothly: - [ ] Make sure to open an issue as a [bug/issue](https://github.com/googleapis/python-bigquery/issues/new/choose) before writing your code! That way we can discuss the change, evaluate designs, and agree on the general idea - [ ] Ensure the tests and linter pass - [ ] Code coverage does not decrease (if any source code was changed) - [ ] Appropriate docs were updated (if necessary) Fixes # 🦕 --- packages/google-cloud-bigquery/owlbot.py | 28 ++++++++++++-------- packages/google-cloud-bigquery/renovate.json | 2 +- 2 files changed, 18 insertions(+), 12 deletions(-) diff --git a/packages/google-cloud-bigquery/owlbot.py b/packages/google-cloud-bigquery/owlbot.py index ea9904cdbf84..8664b658a6dd 100644 --- a/packages/google-cloud-bigquery/owlbot.py +++ b/packages/google-cloud-bigquery/owlbot.py @@ -63,7 +63,7 @@ s.replace( library / f"google/cloud/bigquery_{library.name}/types/standard_sql.py", r"type_ ", - "type " + "type ", ) s.move( @@ -78,8 +78,8 @@ "noxfile.py", "setup.py", f"scripts/fixup_bigquery_{library.name}_keywords.py", - f"google/cloud/bigquery/__init__.py", - f"google/cloud/bigquery/py.typed", + "google/cloud/bigquery/__init__.py", + "google/cloud/bigquery/py.typed", # There are no public API endpoints for the generated ModelServiceClient, # thus there's no point in generating it and its tests. f"google/cloud/bigquery_{library.name}/services/**", @@ -98,9 +98,9 @@ microgenerator=True, split_system_tests=True, intersphinx_dependencies={ - "pandas": 'http://pandas.pydata.org/pandas-docs/dev', + "pandas": "http://pandas.pydata.org/pandas-docs/dev", "geopandas": "https://geopandas.org/", - } + }, ) # BigQuery has a custom multiprocessing note @@ -113,7 +113,11 @@ # Include custom SNIPPETS_TESTS job for performance. # https://github.com/googleapis/python-bigquery/issues/191 ".kokoro/presubmit/presubmit.cfg", - ] + # Group all renovate PRs together. If this works well, remove this and + # update the shared templates (possibly with configuration option to + # py_library.) + "renovate.json", + ], ) # ---------------------------------------------------------------------------- @@ -125,14 +129,14 @@ s.replace( "docs/conf.py", r'\{"members": True\}', - '{"members": True, "inherited-members": True}' + '{"members": True, "inherited-members": True}', ) # Tell Sphinx to ingore autogenerated docs files. s.replace( "docs/conf.py", r'"samples/snippets/README\.rst",', - '\g<0>\n "bigquery_v2/services.rst", # generated by the code generator', + '\\g<0>\n "bigquery_v2/services.rst", # generated by the code generator', ) # ---------------------------------------------------------------------------- @@ -140,13 +144,14 @@ # ---------------------------------------------------------------------------- # Add .pytype to .gitignore -s.replace(".gitignore", r"\.pytest_cache", "\g<0>\n.pytype") +s.replace(".gitignore", r"\.pytest_cache", "\\g<0>\n.pytype") # Add pytype config to setup.cfg s.replace( "setup.cfg", r"universal = 1", - textwrap.dedent(""" \g<0> + textwrap.dedent( + """ \\g<0> [pytype] python_version = 3.8 @@ -160,7 +165,8 @@ # There's some issue with finding some pyi files, thus disabling. # The issue https://github.com/google/pytype/issues/150 is closed, but the # error still occurs for some reason. - pyi-error""") + pyi-error""" + ), ) s.shell.run(["nox", "-s", "blacken"], hide_output=False) diff --git a/packages/google-cloud-bigquery/renovate.json b/packages/google-cloud-bigquery/renovate.json index c04895563e69..713c60bb40f0 100644 --- a/packages/google-cloud-bigquery/renovate.json +++ b/packages/google-cloud-bigquery/renovate.json @@ -1,6 +1,6 @@ { "extends": [ - "config:base", ":preserveSemverRanges" + "config:base", "group:all", ":preserveSemverRanges" ], "ignorePaths": [".pre-commit-config.yaml"], "pip_requirements": { From 4b0d55aa44d4a51c944964d1430ab102fc74ce2b Mon Sep 17 00:00:00 2001 From: "release-please[bot]" <55107282+release-please[bot]@users.noreply.github.com> Date: Wed, 25 Aug 2021 17:34:14 +0000 Subject: [PATCH 1274/2016] chore: release 2.25.1 (#912) :robot: I have created a release \*beep\* \*boop\* --- ### [2.25.1](https://www.github.com/googleapis/python-bigquery/compare/v2.25.0...v2.25.1) (2021-08-25) ### Bug Fixes * populate default `timeout` and retry after client-side timeout ([#896](https://www.github.com/googleapis/python-bigquery/issues/896)) ([b508809](https://www.github.com/googleapis/python-bigquery/commit/b508809c0f887575274309a463e763c56ddd017d)) * use REST API in cell magic when requested ([#892](https://www.github.com/googleapis/python-bigquery/issues/892)) ([1cb3e55](https://www.github.com/googleapis/python-bigquery/commit/1cb3e55253e824e3a1da5201f6ec09065fb6b627)) --- This PR was generated with [Release Please](https://github.com/googleapis/release-please). See [documentation](https://github.com/googleapis/release-please#release-please). --- packages/google-cloud-bigquery/CHANGELOG.md | 8 ++++++++ .../google/cloud/bigquery/version.py | 2 +- 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/CHANGELOG.md b/packages/google-cloud-bigquery/CHANGELOG.md index 7a5727ee74c9..8a21df6fe3ff 100644 --- a/packages/google-cloud-bigquery/CHANGELOG.md +++ b/packages/google-cloud-bigquery/CHANGELOG.md @@ -5,6 +5,14 @@ [1]: https://pypi.org/project/google-cloud-bigquery/#history +### [2.25.1](https://www.github.com/googleapis/python-bigquery/compare/v2.25.0...v2.25.1) (2021-08-25) + + +### Bug Fixes + +* populate default `timeout` and retry after client-side timeout ([#896](https://www.github.com/googleapis/python-bigquery/issues/896)) ([b508809](https://www.github.com/googleapis/python-bigquery/commit/b508809c0f887575274309a463e763c56ddd017d)) +* use REST API in cell magic when requested ([#892](https://www.github.com/googleapis/python-bigquery/issues/892)) ([1cb3e55](https://www.github.com/googleapis/python-bigquery/commit/1cb3e55253e824e3a1da5201f6ec09065fb6b627)) + ## [2.25.0](https://www.github.com/googleapis/python-bigquery/compare/v2.24.1...v2.25.0) (2021-08-24) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/version.py b/packages/google-cloud-bigquery/google/cloud/bigquery/version.py index f882cac3a292..21cbec9fe018 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/version.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/version.py @@ -12,4 +12,4 @@ # See the License for the specific language governing permissions and # limitations under the License. -__version__ = "2.25.0" +__version__ = "2.25.1" From 2f07586d7139735fb10842a6c6e7fec62187b645 Mon Sep 17 00:00:00 2001 From: Peter Lamut Date: Thu, 26 Aug 2021 12:03:02 +0200 Subject: [PATCH 1275/2016] docs: update docstring for bigquery_create_routine sample (#883) (#917) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fixed language issues. Thank you for opening a Pull Request! Before submitting your PR, there are a few things you can do to make sure it goes smoothly: - [ ] Make sure to open an issue as a [bug/issue](https://github.com/googleapis/python-bigquery/issues/new/choose) before writing your code! That way we can discuss the change, evaluate designs, and agree on the general idea - [ ] Ensure the tests and linter pass - [ ] Code coverage does not decrease (if any source code was changed) - [ ] Appropriate docs were updated (if necessary) Fixes # 🦕 Co-authored-by: pallabiwrites <87546424+pallabiwrites@users.noreply.github.com> --- packages/google-cloud-bigquery/samples/create_routine.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/samples/create_routine.py b/packages/google-cloud-bigquery/samples/create_routine.py index 012c7927a1aa..1cb4a80b4ea7 100644 --- a/packages/google-cloud-bigquery/samples/create_routine.py +++ b/packages/google-cloud-bigquery/samples/create_routine.py @@ -22,7 +22,7 @@ def create_routine(routine_id): # Construct a BigQuery client object. client = bigquery.Client() - # TODO(developer): Choose a fully-qualified ID for the routine. + # TODO(developer): Choose a fully qualified ID for the routine. # routine_id = "my-project.my_dataset.my_routine" routine = bigquery.Routine( From 145ec2e34da410b85708d74c5a0b040931f903bc Mon Sep 17 00:00:00 2001 From: Peter Lamut Date: Thu, 26 Aug 2021 12:39:23 +0200 Subject: [PATCH 1276/2016] chore: migrate default branch to main (#910) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * chore: migrate default branch to main * 🦉 Updates from OwlBot See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md * Add owlbot replacements to persist changes * Manually apply new replacements from owlbot.py * Move temp replacement rules after s.move() Co-authored-by: Owl Bot --- .../.github/sync-repo-settings.yaml | 8 ++-- .../google-cloud-bigquery/.kokoro/build.sh | 2 +- .../.kokoro/test-samples-impl.sh | 2 +- .../google-cloud-bigquery/CONTRIBUTING.rst | 12 ++--- packages/google-cloud-bigquery/docs/conf.py | 10 ++-- packages/google-cloud-bigquery/owlbot.py | 48 +++++++++++++++++++ 6 files changed, 65 insertions(+), 17 deletions(-) diff --git a/packages/google-cloud-bigquery/.github/sync-repo-settings.yaml b/packages/google-cloud-bigquery/.github/sync-repo-settings.yaml index 8634a304314a..6572e59822de 100644 --- a/packages/google-cloud-bigquery/.github/sync-repo-settings.yaml +++ b/packages/google-cloud-bigquery/.github/sync-repo-settings.yaml @@ -1,9 +1,9 @@ -# https://github.com/googleapis/repo-automation-bots/tree/master/packages/sync-repo-settings -# Rules for master branch protection +# https://github.com/googleapis/repo-automation-bots/tree/main/packages/sync-repo-settings +# Rules for main branch protection branchProtectionRules: # Identifies the protection rule pattern. Name of the branch to be protected. -# Defaults to `master` -- pattern: master +# Defaults to `main` +- pattern: main requiresCodeOwnerReviews: true requiresStrictStatusChecks: true requiredStatusCheckContexts: diff --git a/packages/google-cloud-bigquery/.kokoro/build.sh b/packages/google-cloud-bigquery/.kokoro/build.sh index 302cc1e1ac40..4d6a1d0f6a55 100755 --- a/packages/google-cloud-bigquery/.kokoro/build.sh +++ b/packages/google-cloud-bigquery/.kokoro/build.sh @@ -41,7 +41,7 @@ python3 -m pip install --upgrade --quiet nox python3 -m nox --version # If this is a continuous build, send the test log to the FlakyBot. -# See https://github.com/googleapis/repo-automation-bots/tree/master/packages/flakybot. +# See https://github.com/googleapis/repo-automation-bots/tree/main/packages/flakybot. if [[ $KOKORO_BUILD_ARTIFACTS_SUBDIR = *"continuous"* ]]; then cleanup() { chmod +x $KOKORO_GFILE_DIR/linux_amd64/flakybot diff --git a/packages/google-cloud-bigquery/.kokoro/test-samples-impl.sh b/packages/google-cloud-bigquery/.kokoro/test-samples-impl.sh index 311a8d54b9f1..8a324c9c7bc6 100755 --- a/packages/google-cloud-bigquery/.kokoro/test-samples-impl.sh +++ b/packages/google-cloud-bigquery/.kokoro/test-samples-impl.sh @@ -80,7 +80,7 @@ for file in samples/**/requirements.txt; do EXIT=$? # If this is a periodic build, send the test log to the FlakyBot. - # See https://github.com/googleapis/repo-automation-bots/tree/master/packages/flakybot. + # See https://github.com/googleapis/repo-automation-bots/tree/main/packages/flakybot. if [[ $KOKORO_BUILD_ARTIFACTS_SUBDIR = *"periodic"* ]]; then chmod +x $KOKORO_GFILE_DIR/linux_amd64/flakybot $KOKORO_GFILE_DIR/linux_amd64/flakybot diff --git a/packages/google-cloud-bigquery/CONTRIBUTING.rst b/packages/google-cloud-bigquery/CONTRIBUTING.rst index 2faf5aed3771..5b87973dd1a8 100644 --- a/packages/google-cloud-bigquery/CONTRIBUTING.rst +++ b/packages/google-cloud-bigquery/CONTRIBUTING.rst @@ -50,9 +50,9 @@ You'll have to create a development environment using a Git checkout: # Configure remotes such that you can pull changes from the googleapis/python-bigquery # repository into your local repository. $ git remote add upstream git@github.com:googleapis/python-bigquery.git - # fetch and merge changes from upstream into master + # fetch and merge changes from upstream into main $ git fetch upstream - $ git merge upstream/master + $ git merge upstream/main Now your local repo is set up such that you will push changes to your GitHub repo, from which you can submit a pull request. @@ -110,12 +110,12 @@ Coding Style variables:: export GOOGLE_CLOUD_TESTING_REMOTE="upstream" - export GOOGLE_CLOUD_TESTING_BRANCH="master" + export GOOGLE_CLOUD_TESTING_BRANCH="main" By doing this, you are specifying the location of the most up-to-date version of ``python-bigquery``. The the suggested remote name ``upstream`` should point to the official ``googleapis`` checkout and the - the branch should be the main branch on that remote (``master``). + the branch should be the main branch on that remote (``main``). - This repository contains configuration for the `pre-commit `__ tool, which automates checking @@ -209,7 +209,7 @@ The `description on PyPI`_ for the project comes directly from the ``README``. Due to the reStructuredText (``rst``) parser used by PyPI, relative links which will work on GitHub (e.g. ``CONTRIBUTING.rst`` instead of -``https://github.com/googleapis/python-bigquery/blob/master/CONTRIBUTING.rst``) +``https://github.com/googleapis/python-bigquery/blob/main/CONTRIBUTING.rst``) may cause problems creating links or rendering the description. .. _description on PyPI: https://pypi.org/project/google-cloud-bigquery @@ -234,7 +234,7 @@ We support: Supported versions can be found in our ``noxfile.py`` `config`_. -.. _config: https://github.com/googleapis/python-bigquery/blob/master/noxfile.py +.. _config: https://github.com/googleapis/python-bigquery/blob/main/noxfile.py We also explicitly decided to support Python 3 beginning with version 3.6. diff --git a/packages/google-cloud-bigquery/docs/conf.py b/packages/google-cloud-bigquery/docs/conf.py index 59a2d8fb3429..07e5d8c307d8 100644 --- a/packages/google-cloud-bigquery/docs/conf.py +++ b/packages/google-cloud-bigquery/docs/conf.py @@ -76,8 +76,8 @@ # The encoding of source files. # source_encoding = 'utf-8-sig' -# The master toctree document. -master_doc = "index" +# The root toctree document. +root_doc = "index" # General information about the project. project = "google-cloud-bigquery" @@ -281,7 +281,7 @@ # author, documentclass [howto, manual, or own class]). latex_documents = [ ( - master_doc, + root_doc, "google-cloud-bigquery.tex", "google-cloud-bigquery Documentation", author, @@ -316,7 +316,7 @@ # (source start file, name, description, authors, manual section). man_pages = [ ( - master_doc, + root_doc, "google-cloud-bigquery", "google-cloud-bigquery Documentation", [author], @@ -335,7 +335,7 @@ # dir menu entry, description, category) texinfo_documents = [ ( - master_doc, + root_doc, "google-cloud-bigquery", "google-cloud-bigquery Documentation", author, diff --git a/packages/google-cloud-bigquery/owlbot.py b/packages/google-cloud-bigquery/owlbot.py index 8664b658a6dd..09aa8ca6f33f 100644 --- a/packages/google-cloud-bigquery/owlbot.py +++ b/packages/google-cloud-bigquery/owlbot.py @@ -169,4 +169,52 @@ ), ) + +# Remove the replacements below once +# https://github.com/googleapis/synthtool/pull/1188 is merged + +# Update googleapis/repo-automation-bots repo to main in .kokoro/*.sh files +s.replace( + ".kokoro/*.sh", + "repo-automation-bots/tree/master", + "repo-automation-bots/tree/main", +) + +# Customize CONTRIBUTING.rst to replace master with main +s.replace( + "CONTRIBUTING.rst", + "fetch and merge changes from upstream into master", + "fetch and merge changes from upstream into main", +) + +s.replace( + "CONTRIBUTING.rst", "git merge upstream/master", "git merge upstream/main", +) + +s.replace( + "CONTRIBUTING.rst", + """export GOOGLE_CLOUD_TESTING_BRANCH=\"master\"""", + """export GOOGLE_CLOUD_TESTING_BRANCH=\"main\"""", +) + +s.replace( + "CONTRIBUTING.rst", r"remote \(``master``\)", "remote (``main``)", +) + +s.replace( + "CONTRIBUTING.rst", "blob/master/CONTRIBUTING.rst", "blob/main/CONTRIBUTING.rst", +) + +s.replace( + "CONTRIBUTING.rst", "blob/master/noxfile.py", "blob/main/noxfile.py", +) + +s.replace( + "docs/conf.py", "master_doc", "root_doc", +) + +s.replace( + "docs/conf.py", "# The master toctree document.", "# The root toctree document.", +) + s.shell.run(["nox", "-s", "blacken"], hide_output=False) From 5fe6e18a287d7fdf81d3fb0eddab6a42c2ef466b Mon Sep 17 00:00:00 2001 From: Jim Fulton Date: Fri, 27 Aug 2021 16:20:50 -0600 Subject: [PATCH 1277/2016] chore: invalid docstrings broke docfx (#924) --- .../google-cloud-bigquery/google/cloud/bigquery/job/copy_.py | 2 +- .../google-cloud-bigquery/google/cloud/bigquery/job/extract.py | 2 +- .../google-cloud-bigquery/google/cloud/bigquery/job/load.py | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/job/copy_.py b/packages/google-cloud-bigquery/google/cloud/bigquery/job/copy_.py index c6ee989441fb..f0dd3d668964 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/job/copy_.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/job/copy_.py @@ -240,7 +240,7 @@ def to_api_repr(self): def from_api_repr(cls, resource, client): """Factory: construct a job given its API representation - .. note: + .. note:: This method assumes that the project found in the resource matches the client's project. diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/job/extract.py b/packages/google-cloud-bigquery/google/cloud/bigquery/job/extract.py index 3373bcdefccb..52aa036c900a 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/job/extract.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/job/extract.py @@ -244,7 +244,7 @@ def to_api_repr(self): def from_api_repr(cls, resource: dict, client) -> "ExtractJob": """Factory: construct a job given its API representation - .. note: + .. note:: This method assumes that the project found in the resource matches the client's project. diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/job/load.py b/packages/google-cloud-bigquery/google/cloud/bigquery/job/load.py index aee055c1c05b..b12c3e621fad 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/job/load.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/job/load.py @@ -800,7 +800,7 @@ def to_api_repr(self): def from_api_repr(cls, resource: dict, client) -> "LoadJob": """Factory: construct a job given its API representation - .. note: + .. note:: This method assumes that the project found in the resource matches the client's project. From fb5d65e13e05f1f522772c0d37bb34bc51caef20 Mon Sep 17 00:00:00 2001 From: WhiteSource Renovate Date: Mon, 30 Aug 2021 21:37:44 +0200 Subject: [PATCH 1278/2016] chore(deps): update all dependencies (#914) * chore(deps): update all dependencies * Python version modifiers for pyproj Co-authored-by: Tim Swast --- .../samples/geography/requirements.txt | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/packages/google-cloud-bigquery/samples/geography/requirements.txt b/packages/google-cloud-bigquery/samples/geography/requirements.txt index b5fe247cb1a8..aaf1bbfd15f7 100644 --- a/packages/google-cloud-bigquery/samples/geography/requirements.txt +++ b/packages/google-cloud-bigquery/samples/geography/requirements.txt @@ -10,13 +10,13 @@ dataclasses==0.6; python_version < '3.7' Fiona==1.8.20 geojson==2.5.0 geopandas==0.9.0 -google-api-core==1.31.2 -google-auth==1.35.0 +google-api-core==2.0.0 +google-auth==2.0.1 google-cloud-bigquery==2.25.0 google-cloud-bigquery-storage==2.6.3 google-cloud-core==2.0.0 google-crc32c==1.1.2 -google-resumable-media==1.3.3 +google-resumable-media==2.0.0 googleapis-common-protos==1.53.0 grpcio==1.39.0 idna==3.2 @@ -36,7 +36,8 @@ pyasn1==0.4.8 pyasn1-modules==0.2.8 pycparser==2.20 pyparsing==2.4.7 -pyproj==3.0.1 +pyproj==3.0.1; python_version < "3.7" +pyproj==3.1.0; python_version > "3.6" python-dateutil==2.8.2 pytz==2021.1 PyYAML==5.4.1 From affb4fcbbe83cda1a7e64dff217d8dab53fa0b6d Mon Sep 17 00:00:00 2001 From: Peter Lamut Date: Tue, 31 Aug 2021 10:13:38 +0200 Subject: [PATCH 1279/2016] fix: error inserting DataFrame with REPEATED field (#925) Co-authored-by: Tim Swast --- .../google/cloud/bigquery/_pandas_helpers.py | 8 ++- .../tests/unit/test__pandas_helpers.py | 63 ++++++++++++++----- 2 files changed, 56 insertions(+), 15 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/_pandas_helpers.py b/packages/google-cloud-bigquery/google/cloud/bigquery/_pandas_helpers.py index ab58b17292d7..29139ae09f56 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/_pandas_helpers.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/_pandas_helpers.py @@ -844,7 +844,13 @@ def dataframe_to_json_generator(dataframe): output = {} for column, value in zip(dataframe.columns, row): # Omit NaN values. - if pandas.isna(value): + is_nan = pandas.isna(value) + + # isna() can also return an array-like of bools, but the latter's boolean + # value is ambiguous, hence an extra check. An array-like value is *not* + # considered a NaN, however. + if isinstance(is_nan, bool) and is_nan: continue output[column] = value + yield output diff --git a/packages/google-cloud-bigquery/tests/unit/test__pandas_helpers.py b/packages/google-cloud-bigquery/tests/unit/test__pandas_helpers.py index a9b0ae21f402..f0975ef65989 100644 --- a/packages/google-cloud-bigquery/tests/unit/test__pandas_helpers.py +++ b/packages/google-cloud-bigquery/tests/unit/test__pandas_helpers.py @@ -821,6 +821,41 @@ def test_dataframe_to_json_generator(module_under_test): assert list(rows) == expected +def test_dataframe_to_json_generator_repeated_field(module_under_test): + pytest.importorskip( + "pandas", + minversion=str(PANDAS_MINIUM_VERSION), + reason=( + f"Requires `pandas version >= {PANDAS_MINIUM_VERSION}` " + "which introduces pandas.NA" + ), + ) + + df_data = [ + collections.OrderedDict( + [("repeated_col", [pandas.NA, 2, None, 4]), ("not_repeated_col", "first")] + ), + collections.OrderedDict( + [ + ("repeated_col", ["a", "b", mock.sentinel.foo, "d"]), + ("not_repeated_col", "second"), + ] + ), + ] + dataframe = pandas.DataFrame(df_data) + + rows = module_under_test.dataframe_to_json_generator(dataframe) + + expected = [ + {"repeated_col": [pandas.NA, 2, None, 4], "not_repeated_col": "first"}, + { + "repeated_col": ["a", "b", mock.sentinel.foo, "d"], + "not_repeated_col": "second", + }, + ] + assert list(rows) == expected + + @pytest.mark.skipif(pandas is None, reason="Requires `pandas`") def test_list_columns_and_indexes_with_named_index(module_under_test): df_data = collections.OrderedDict( @@ -882,7 +917,7 @@ def test_list_columns_and_indexes_with_multiindex(module_under_test): def test_dataframe_to_bq_schema_dict_sequence(module_under_test): df_data = collections.OrderedDict( [ - ("str_column", [u"hello", u"world"]), + ("str_column", ["hello", "world"]), ("int_column", [42, 8]), ("bool_column", [True, False]), ] @@ -1070,7 +1105,7 @@ def test_dataframe_to_arrow_dict_sequence_schema(module_under_test): ] dataframe = pandas.DataFrame( - {"field01": [u"hello", u"world"], "field02": [True, False]} + {"field01": ["hello", "world"], "field02": [True, False]} ) arrow_table = module_under_test.dataframe_to_arrow(dataframe, dict_schema) @@ -1139,8 +1174,8 @@ def test_dataframe_to_parquet_compression_method(module_under_test): def test_dataframe_to_bq_schema_fallback_needed_wo_pyarrow(module_under_test): dataframe = pandas.DataFrame( data=[ - {"id": 10, "status": u"FOO", "execution_date": datetime.date(2019, 5, 10)}, - {"id": 20, "status": u"BAR", "created_at": datetime.date(2018, 9, 12)}, + {"id": 10, "status": "FOO", "execution_date": datetime.date(2019, 5, 10)}, + {"id": 20, "status": "BAR", "created_at": datetime.date(2018, 9, 12)}, ] ) @@ -1167,8 +1202,8 @@ def test_dataframe_to_bq_schema_fallback_needed_wo_pyarrow(module_under_test): def test_dataframe_to_bq_schema_fallback_needed_w_pyarrow(module_under_test): dataframe = pandas.DataFrame( data=[ - {"id": 10, "status": u"FOO", "created_at": datetime.date(2019, 5, 10)}, - {"id": 20, "status": u"BAR", "created_at": datetime.date(2018, 9, 12)}, + {"id": 10, "status": "FOO", "created_at": datetime.date(2019, 5, 10)}, + {"id": 20, "status": "BAR", "created_at": datetime.date(2018, 9, 12)}, ] ) @@ -1197,8 +1232,8 @@ def test_dataframe_to_bq_schema_fallback_needed_w_pyarrow(module_under_test): def test_dataframe_to_bq_schema_pyarrow_fallback_fails(module_under_test): dataframe = pandas.DataFrame( data=[ - {"struct_field": {"one": 2}, "status": u"FOO"}, - {"struct_field": {"two": u"222"}, "status": u"BAR"}, + {"struct_field": {"one": 2}, "status": "FOO"}, + {"struct_field": {"two": "222"}, "status": "BAR"}, ] ) @@ -1252,7 +1287,7 @@ def test_augment_schema_type_detection_succeeds(module_under_test): "timestamp_field": datetime.datetime(2005, 5, 31, 14, 25, 55), "date_field": datetime.date(2005, 5, 31), "bytes_field": b"some bytes", - "string_field": u"some characters", + "string_field": "some characters", "numeric_field": decimal.Decimal("123.456"), "bignumeric_field": decimal.Decimal("{d38}.{d38}".format(d38="9" * 38)), } @@ -1312,13 +1347,13 @@ def test_augment_schema_type_detection_fails(module_under_test): dataframe = pandas.DataFrame( data=[ { - "status": u"FOO", + "status": "FOO", "struct_field": {"one": 1}, - "struct_field_2": {"foo": u"123"}, + "struct_field_2": {"foo": "123"}, }, { - "status": u"BAR", - "struct_field": {"two": u"111"}, + "status": "BAR", + "struct_field": {"two": "111"}, "struct_field_2": {"bar": 27}, }, ] @@ -1351,7 +1386,7 @@ def test_dataframe_to_parquet_dict_sequence_schema(module_under_test): ] dataframe = pandas.DataFrame( - {"field01": [u"hello", u"world"], "field02": [True, False]} + {"field01": ["hello", "world"], "field02": [True, False]} ) write_table_patch = mock.patch.object( From d27d47bd2d43ecd6ad9d3038e0097be46873a5b4 Mon Sep 17 00:00:00 2001 From: WhiteSource Renovate Date: Tue, 31 Aug 2021 16:46:24 +0200 Subject: [PATCH 1280/2016] chore(deps): update all dependencies (#926) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [![WhiteSource Renovate](https://app.renovatebot.com/images/banner.svg)](https://renovatebot.com) This PR contains the following updates: | Package | Change | Age | Adoption | Passing | Confidence | |---|---|---|---|---|---| | [google-cloud-bigquery](https://togithub.com/googleapis/python-bigquery) | `==2.25.0` -> `==2.25.1` | [![age](https://badges.renovateapi.com/packages/pypi/google-cloud-bigquery/2.25.1/age-slim)](https://docs.renovatebot.com/merge-confidence/) | [![adoption](https://badges.renovateapi.com/packages/pypi/google-cloud-bigquery/2.25.1/adoption-slim)](https://docs.renovatebot.com/merge-confidence/) | [![passing](https://badges.renovateapi.com/packages/pypi/google-cloud-bigquery/2.25.1/compatibility-slim/2.25.0)](https://docs.renovatebot.com/merge-confidence/) | [![confidence](https://badges.renovateapi.com/packages/pypi/google-cloud-bigquery/2.25.1/confidence-slim/2.25.0)](https://docs.renovatebot.com/merge-confidence/) | | [google-cloud-testutils](https://togithub.com/googleapis/python-test-utils) | `==1.0.0` -> `==1.1.0` | [![age](https://badges.renovateapi.com/packages/pypi/google-cloud-testutils/1.1.0/age-slim)](https://docs.renovatebot.com/merge-confidence/) | [![adoption](https://badges.renovateapi.com/packages/pypi/google-cloud-testutils/1.1.0/adoption-slim)](https://docs.renovatebot.com/merge-confidence/) | [![passing](https://badges.renovateapi.com/packages/pypi/google-cloud-testutils/1.1.0/compatibility-slim/1.0.0)](https://docs.renovatebot.com/merge-confidence/) | [![confidence](https://badges.renovateapi.com/packages/pypi/google-cloud-testutils/1.1.0/confidence-slim/1.0.0)](https://docs.renovatebot.com/merge-confidence/) | | [google-crc32c](https://togithub.com/googleapis/python-crc32c) | `==1.1.2` -> `==1.1.3` | [![age](https://badges.renovateapi.com/packages/pypi/google-crc32c/1.1.3/age-slim)](https://docs.renovatebot.com/merge-confidence/) | [![adoption](https://badges.renovateapi.com/packages/pypi/google-crc32c/1.1.3/adoption-slim)](https://docs.renovatebot.com/merge-confidence/) | [![passing](https://badges.renovateapi.com/packages/pypi/google-crc32c/1.1.3/compatibility-slim/1.1.2)](https://docs.renovatebot.com/merge-confidence/) | [![confidence](https://badges.renovateapi.com/packages/pypi/google-crc32c/1.1.3/confidence-slim/1.1.2)](https://docs.renovatebot.com/merge-confidence/) | | [importlib-metadata](https://togithub.com/python/importlib_metadata) | `==4.6.4` -> `==4.8.1` | [![age](https://badges.renovateapi.com/packages/pypi/importlib-metadata/4.8.1/age-slim)](https://docs.renovatebot.com/merge-confidence/) | [![adoption](https://badges.renovateapi.com/packages/pypi/importlib-metadata/4.8.1/adoption-slim)](https://docs.renovatebot.com/merge-confidence/) | [![passing](https://badges.renovateapi.com/packages/pypi/importlib-metadata/4.8.1/compatibility-slim/4.6.4)](https://docs.renovatebot.com/merge-confidence/) | [![confidence](https://badges.renovateapi.com/packages/pypi/importlib-metadata/4.8.1/confidence-slim/4.6.4)](https://docs.renovatebot.com/merge-confidence/) | | [pytest](https://docs.pytest.org/en/latest/) ([source](https://togithub.com/pytest-dev/pytest), [changelog](https://docs.pytest.org/en/stable/changelog.html)) | `==6.2.4` -> `==6.2.5` | [![age](https://badges.renovateapi.com/packages/pypi/pytest/6.2.5/age-slim)](https://docs.renovatebot.com/merge-confidence/) | [![adoption](https://badges.renovateapi.com/packages/pypi/pytest/6.2.5/adoption-slim)](https://docs.renovatebot.com/merge-confidence/) | [![passing](https://badges.renovateapi.com/packages/pypi/pytest/6.2.5/compatibility-slim/6.2.4)](https://docs.renovatebot.com/merge-confidence/) | [![confidence](https://badges.renovateapi.com/packages/pypi/pytest/6.2.5/confidence-slim/6.2.4)](https://docs.renovatebot.com/merge-confidence/) | | [typing-extensions](https://togithub.com/python/typing) | `==3.10.0.0` -> `==3.10.0.2` | [![age](https://badges.renovateapi.com/packages/pypi/typing-extensions/3.10.0.2/age-slim)](https://docs.renovatebot.com/merge-confidence/) | [![adoption](https://badges.renovateapi.com/packages/pypi/typing-extensions/3.10.0.2/adoption-slim)](https://docs.renovatebot.com/merge-confidence/) | [![passing](https://badges.renovateapi.com/packages/pypi/typing-extensions/3.10.0.2/compatibility-slim/3.10.0.0)](https://docs.renovatebot.com/merge-confidence/) | [![confidence](https://badges.renovateapi.com/packages/pypi/typing-extensions/3.10.0.2/confidence-slim/3.10.0.0)](https://docs.renovatebot.com/merge-confidence/) | --- ### Release Notes
googleapis/python-bigquery ### [`v2.25.1`](https://togithub.com/googleapis/python-bigquery/blob/master/CHANGELOG.md#​2251-httpswwwgithubcomgoogleapispython-bigquerycomparev2250v2251-2021-08-25) [Compare Source](https://togithub.com/googleapis/python-bigquery/compare/v2.25.0...v2.25.1)
googleapis/python-test-utils ### [`v1.1.0`](https://togithub.com/googleapis/python-test-utils/compare/v1.0.0...v1.1.0) [Compare Source](https://togithub.com/googleapis/python-test-utils/compare/v1.0.0...v1.1.0)
googleapis/python-crc32c ### [`v1.1.3`](https://togithub.com/googleapis/python-crc32c/blob/master/CHANGELOG.md#​113-httpswwwgithubcomgoogleapispython-crc32ccomparev112v113-2021-08-30) [Compare Source](https://togithub.com/googleapis/python-crc32c/compare/v1.1.2...v1.1.3)
python/importlib_metadata ### [`v4.8.1`](https://togithub.com/python/importlib_metadata/blob/master/CHANGES.rst#v481) [Compare Source](https://togithub.com/python/importlib_metadata/compare/v4.8.0...v4.8.1) \====== - [#​348](https://togithub.com/python/importlib_metadata/issues/348): Restored support for `EntryPoint` access by item, deprecating support in the process. Users are advised to use direct member access instead of item-based access:: - ep\[0] -> ep.name - ep\[1] -> ep.value - ep\[2] -> ep.group - ep\[:] -> ep.name, ep.value, ep.group ### [`v4.8.0`](https://togithub.com/python/importlib_metadata/blob/master/CHANGES.rst#v480) [Compare Source](https://togithub.com/python/importlib_metadata/compare/v4.7.1...v4.8.0) \====== - [#​337](https://togithub.com/python/importlib_metadata/issues/337): Rewrote `EntryPoint` as a simple class, still immutable and still with the attributes, but without any expectation for `namedtuple` functionality such as `_asdict`. ### [`v4.7.1`](https://togithub.com/python/importlib_metadata/blob/master/CHANGES.rst#v471) [Compare Source](https://togithub.com/python/importlib_metadata/compare/v4.7.0...v4.7.1) \====== - [#​344](https://togithub.com/python/importlib_metadata/issues/344): Fixed regression in `packages_distributions` when neither top-level.txt nor a files manifest is present. ### [`v4.7.0`](https://togithub.com/python/importlib_metadata/blob/master/CHANGES.rst#v470) [Compare Source](https://togithub.com/python/importlib_metadata/compare/v4.6.4...v4.7.0) \====== - [#​330](https://togithub.com/python/importlib_metadata/issues/330): In `packages_distributions`, now infer top-level names from `.files()` when a `top-level.txt` (Setuptools-specific metadata) is not present.
pytest-dev/pytest ### [`v6.2.5`](https://togithub.com/pytest-dev/pytest/compare/6.2.4...6.2.5) [Compare Source](https://togithub.com/pytest-dev/pytest/compare/6.2.4...6.2.5)
python/typing ### [`v3.10.0.2`](https://togithub.com/python/typing/compare/3.10.0.1...3.10.0.2) [Compare Source](https://togithub.com/python/typing/compare/3.10.0.1...3.10.0.2) ### [`v3.10.0.1`](https://togithub.com/python/typing/compare/3.10.0.0...3.10.0.1) [Compare Source](https://togithub.com/python/typing/compare/3.10.0.0...3.10.0.1)
--- ### Configuration 📅 **Schedule**: At any time (no schedule defined). 🚦 **Automerge**: Disabled by config. Please merge this manually once you are satisfied. ♻ **Rebasing**: Whenever PR becomes conflicted, or you tick the rebase/retry checkbox. 👻 **Immortal**: This PR will be recreated if closed unmerged. Get [config help](https://togithub.com/renovatebot/renovate/discussions) if that's undesired. --- - [ ] If you want to rebase/retry this PR, check this box. --- This PR has been generated by [WhiteSource Renovate](https://renovate.whitesourcesoftware.com). View repository job log [here](https://app.renovatebot.com/dashboard#github/googleapis/python-bigquery). --- .../samples/geography/requirements-test.txt | 2 +- .../samples/geography/requirements.txt | 8 ++++---- .../samples/snippets/requirements-test.txt | 4 ++-- .../samples/snippets/requirements.txt | 2 +- 4 files changed, 8 insertions(+), 8 deletions(-) diff --git a/packages/google-cloud-bigquery/samples/geography/requirements-test.txt b/packages/google-cloud-bigquery/samples/geography/requirements-test.txt index b0cf76724270..5d836a5c5184 100644 --- a/packages/google-cloud-bigquery/samples/geography/requirements-test.txt +++ b/packages/google-cloud-bigquery/samples/geography/requirements-test.txt @@ -1,2 +1,2 @@ -pytest==6.2.4 +pytest==6.2.5 mock==4.0.3 diff --git a/packages/google-cloud-bigquery/samples/geography/requirements.txt b/packages/google-cloud-bigquery/samples/geography/requirements.txt index aaf1bbfd15f7..c325ee5e45dd 100644 --- a/packages/google-cloud-bigquery/samples/geography/requirements.txt +++ b/packages/google-cloud-bigquery/samples/geography/requirements.txt @@ -12,15 +12,15 @@ geojson==2.5.0 geopandas==0.9.0 google-api-core==2.0.0 google-auth==2.0.1 -google-cloud-bigquery==2.25.0 +google-cloud-bigquery==2.25.1 google-cloud-bigquery-storage==2.6.3 google-cloud-core==2.0.0 -google-crc32c==1.1.2 +google-crc32c==1.1.3 google-resumable-media==2.0.0 googleapis-common-protos==1.53.0 grpcio==1.39.0 idna==3.2 -importlib-metadata==4.6.4 +importlib-metadata==4.8.1 libcst==0.3.20 munch==2.5.0 mypy-extensions==0.4.3 @@ -45,7 +45,7 @@ requests==2.26.0 rsa==4.7.2 Shapely==1.7.1 six==1.16.0 -typing-extensions==3.10.0.0 +typing-extensions==3.10.0.2 typing-inspect==0.7.1 urllib3==1.26.6 zipp==3.5.0 diff --git a/packages/google-cloud-bigquery/samples/snippets/requirements-test.txt b/packages/google-cloud-bigquery/samples/snippets/requirements-test.txt index b8dee50d040a..caa48813a911 100644 --- a/packages/google-cloud-bigquery/samples/snippets/requirements-test.txt +++ b/packages/google-cloud-bigquery/samples/snippets/requirements-test.txt @@ -1,3 +1,3 @@ -google-cloud-testutils==1.0.0 -pytest==6.2.4 +google-cloud-testutils==1.1.0 +pytest==6.2.5 mock==4.0.3 diff --git a/packages/google-cloud-bigquery/samples/snippets/requirements.txt b/packages/google-cloud-bigquery/samples/snippets/requirements.txt index d75c747fb4f9..3b30ceebf805 100644 --- a/packages/google-cloud-bigquery/samples/snippets/requirements.txt +++ b/packages/google-cloud-bigquery/samples/snippets/requirements.txt @@ -1,4 +1,4 @@ -google-cloud-bigquery==2.25.0 +google-cloud-bigquery==2.25.1 google-cloud-bigquery-storage==2.6.3 google-auth-oauthlib==0.4.5 grpcio==1.39.0 From db6d4f8ce5270d7ef3f3e413fb63145173e115ca Mon Sep 17 00:00:00 2001 From: Jim Fulton Date: Tue, 31 Aug 2021 16:56:40 -0600 Subject: [PATCH 1281/2016] fix: underscores weren't allowed in struct field names when passing parameters to the DB API (#930) --- .../google/cloud/bigquery/dbapi/_helpers.py | 2 +- .../google/cloud/bigquery/dbapi/cursor.py | 2 +- .../tests/unit/test_dbapi__helpers.py | 8 ++++---- .../google-cloud-bigquery/tests/unit/test_dbapi_cursor.py | 4 ++++ 4 files changed, 10 insertions(+), 6 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/dbapi/_helpers.py b/packages/google-cloud-bigquery/google/cloud/bigquery/dbapi/_helpers.py index 9c134b47ce67..72e711bcf428 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/dbapi/_helpers.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/dbapi/_helpers.py @@ -173,7 +173,7 @@ def _parse_type( \s* (ARRAY|STRUCT|RECORD) # Type \s* - <([A-Z0-9<> ,()]+)> # Subtype(s) + <([A-Z0-9_<> ,()]+)> # Subtype(s) \s*$ """, re.IGNORECASE | re.VERBOSE, diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/dbapi/cursor.py b/packages/google-cloud-bigquery/google/cloud/bigquery/dbapi/cursor.py index 587598d5f208..b1239ff57a19 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/dbapi/cursor.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/dbapi/cursor.py @@ -494,7 +494,7 @@ def _extract_types( ([^:)]*) # name (?:: # ':' introduces type ( # start of type group - [a-zA-Z0-9<>, ]+ # First part, no parens + [a-zA-Z0-9_<>, ]+ # First part, no parens (?: # start sets of parens + non-paren text \([0-9 ,]+\) # comma-separated groups of digits in parens diff --git a/packages/google-cloud-bigquery/tests/unit/test_dbapi__helpers.py b/packages/google-cloud-bigquery/tests/unit/test_dbapi__helpers.py index b3320335404b..5965a4817740 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_dbapi__helpers.py +++ b/packages/google-cloud-bigquery/tests/unit/test_dbapi__helpers.py @@ -612,8 +612,8 @@ def test_complex_query_parameter_type_errors(type_, value, expect): "parameters,parameter_types,expect", [ ( - [[], dict(name="ch1", bdate=datetime.date(2021, 1, 1))], - ["ARRAY", "struct"], + [[], dict(name="ch1", b_date=datetime.date(2021, 1, 1))], + ["ARRAY", "struct"], [ { "parameterType": {"arrayType": {"type": "INT64"}, "type": "ARRAY"}, @@ -623,13 +623,13 @@ def test_complex_query_parameter_type_errors(type_, value, expect): "parameterType": { "structTypes": [ {"name": "name", "type": {"type": "STRING"}}, - {"name": "bdate", "type": {"type": "DATE"}}, + {"name": "b_date", "type": {"type": "DATE"}}, ], "type": "STRUCT", }, "parameterValue": { "structValues": { - "bdate": {"value": "2021-01-01"}, + "b_date": {"value": "2021-01-01"}, "name": {"value": "ch1"}, } }, diff --git a/packages/google-cloud-bigquery/tests/unit/test_dbapi_cursor.py b/packages/google-cloud-bigquery/tests/unit/test_dbapi_cursor.py index 026810aaf5c7..cb55da889c62 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_dbapi_cursor.py +++ b/packages/google-cloud-bigquery/tests/unit/test_dbapi_cursor.py @@ -809,6 +809,10 @@ def test__format_operation_no_placeholders(self): "values(%%%%%(foo:INT64)s, %(bar)s)", ("values(%%%%%(foo)s, %(bar)s)", dict(foo="INT64")), ), + ( + "values(%%%%%(foo:struct)s, %(bar)s)", + ("values(%%%%%(foo)s, %(bar)s)", dict(foo="struct")), + ), ( "values(%%%%%(foo:struct)s, %(bar)s)", ("values(%%%%%(foo)s, %(bar)s)", dict(foo="struct")), From 505d059955bfd6f7bfccff969623ee7836827d9f Mon Sep 17 00:00:00 2001 From: "release-please[bot]" <55107282+release-please[bot]@users.noreply.github.com> Date: Wed, 1 Sep 2021 06:56:59 -0600 Subject: [PATCH 1282/2016] chore: release 2.25.2 (#916) Co-authored-by: release-please[bot] <55107282+release-please[bot]@users.noreply.github.com> --- packages/google-cloud-bigquery/CHANGELOG.md | 13 +++++++++++++ .../google/cloud/bigquery/version.py | 2 +- 2 files changed, 14 insertions(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/CHANGELOG.md b/packages/google-cloud-bigquery/CHANGELOG.md index 8a21df6fe3ff..b4c8e5fb7584 100644 --- a/packages/google-cloud-bigquery/CHANGELOG.md +++ b/packages/google-cloud-bigquery/CHANGELOG.md @@ -5,6 +5,19 @@ [1]: https://pypi.org/project/google-cloud-bigquery/#history +### [2.25.2](https://www.github.com/googleapis/python-bigquery/compare/v2.25.1...v2.25.2) (2021-08-31) + + +### Bug Fixes + +* error inserting DataFrame with REPEATED field ([#925](https://www.github.com/googleapis/python-bigquery/issues/925)) ([656d2fa](https://www.github.com/googleapis/python-bigquery/commit/656d2fa6f870573a21235c83463752a2d084caba)) +* underscores weren't allowed in struct field names when passing parameters to the DB API ([#930](https://www.github.com/googleapis/python-bigquery/issues/930)) ([fcb0bc6](https://www.github.com/googleapis/python-bigquery/commit/fcb0bc68c972c2c98bb8542f54e9228308177ecb)) + + +### Documentation + +* update docstring for bigquery_create_routine sample ([#883](https://www.github.com/googleapis/python-bigquery/issues/883)) ([#917](https://www.github.com/googleapis/python-bigquery/issues/917)) ([e2d12b7](https://www.github.com/googleapis/python-bigquery/commit/e2d12b795ef2dc51b0ee36f1b3000edb1e64ce05)) + ### [2.25.1](https://www.github.com/googleapis/python-bigquery/compare/v2.25.0...v2.25.1) (2021-08-25) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/version.py b/packages/google-cloud-bigquery/google/cloud/bigquery/version.py index 21cbec9fe018..e8672849f11d 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/version.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/version.py @@ -12,4 +12,4 @@ # See the License for the specific language governing permissions and # limitations under the License. -__version__ = "2.25.1" +__version__ = "2.25.2" From 52fdbbaa2c18770f1d29253ff5c0e9d47dd0703a Mon Sep 17 00:00:00 2001 From: WhiteSource Renovate Date: Wed, 1 Sep 2021 16:23:46 +0200 Subject: [PATCH 1283/2016] chore(deps): update all dependencies (#928) --- .../samples/geography/requirements.txt | 8 ++++---- .../samples/snippets/requirements.txt | 4 ++-- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/packages/google-cloud-bigquery/samples/geography/requirements.txt b/packages/google-cloud-bigquery/samples/geography/requirements.txt index c325ee5e45dd..cfccab8b06d0 100644 --- a/packages/google-cloud-bigquery/samples/geography/requirements.txt +++ b/packages/google-cloud-bigquery/samples/geography/requirements.txt @@ -10,13 +10,13 @@ dataclasses==0.6; python_version < '3.7' Fiona==1.8.20 geojson==2.5.0 geopandas==0.9.0 -google-api-core==2.0.0 -google-auth==2.0.1 -google-cloud-bigquery==2.25.1 +google-api-core==2.0.1 +google-auth==2.0.2 +google-cloud-bigquery==2.25.2 google-cloud-bigquery-storage==2.6.3 google-cloud-core==2.0.0 google-crc32c==1.1.3 -google-resumable-media==2.0.0 +google-resumable-media==2.0.1 googleapis-common-protos==1.53.0 grpcio==1.39.0 idna==3.2 diff --git a/packages/google-cloud-bigquery/samples/snippets/requirements.txt b/packages/google-cloud-bigquery/samples/snippets/requirements.txt index 3b30ceebf805..57f55d1869b5 100644 --- a/packages/google-cloud-bigquery/samples/snippets/requirements.txt +++ b/packages/google-cloud-bigquery/samples/snippets/requirements.txt @@ -1,6 +1,6 @@ -google-cloud-bigquery==2.25.1 +google-cloud-bigquery==2.25.2 google-cloud-bigquery-storage==2.6.3 -google-auth-oauthlib==0.4.5 +google-auth-oauthlib==0.4.6 grpcio==1.39.0 ipython==7.16.1; python_version < '3.7' ipython==7.17.0; python_version >= '3.7' From 935d30c67c1b02191a17a03d64ef6949b3d2bfe0 Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Wed, 1 Sep 2021 14:58:09 -0500 Subject: [PATCH 1284/2016] fix: guard imports against unsupported pyarrow versions (#934) * fix: guard imports against unsupported pyarrow versions * add unit tests * fix pytype * second try at fixing pytype --- .../google/cloud/bigquery/_helpers.py | 72 ++++++++++++++++++- .../google/cloud/bigquery/_pandas_helpers.py | 19 ++--- .../google/cloud/bigquery/exceptions.py | 4 ++ packages/google-cloud-bigquery/noxfile.py | 9 ++- .../testing/constraints-3.6.txt | 2 +- .../tests/unit/job/test_query_pandas.py | 8 +-- .../tests/unit/test__helpers.py | 68 ++++++++++++++++++ .../tests/unit/test__pandas_helpers.py | 29 +++++--- .../tests/unit/test_table.py | 12 ++-- 9 files changed, 184 insertions(+), 39 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py b/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py index 0a1f71444ae0..9df0f3d0ad6d 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py @@ -19,7 +19,7 @@ import decimal import math import re -from typing import Union +from typing import Any, Union from google.cloud._helpers import UTC from google.cloud._helpers import _date_from_iso8601_date @@ -29,7 +29,10 @@ from google.cloud._helpers import _to_bytes import packaging.version -from google.cloud.bigquery.exceptions import LegacyBigQueryStorageError +from google.cloud.bigquery.exceptions import ( + LegacyBigQueryStorageError, + LegacyPyarrowError, +) _RFC3339_MICROS_NO_ZULU = "%Y-%m-%dT%H:%M:%S.%f" @@ -42,6 +45,7 @@ re.VERBOSE, ) +_MIN_PYARROW_VERSION = packaging.version.Version("3.0.0") _MIN_BQ_STORAGE_VERSION = packaging.version.Version("2.0.0") _BQ_STORAGE_OPTIONAL_READ_SESSION_VERSION = packaging.version.Version("2.6.0") @@ -95,12 +99,74 @@ def verify_version(self): if self.installed_version < _MIN_BQ_STORAGE_VERSION: msg = ( "Dependency google-cloud-bigquery-storage is outdated, please upgrade " - f"it to version >= 2.0.0 (version found: {self.installed_version})." + f"it to version >= {_MIN_BQ_STORAGE_VERSION} (version found: {self.installed_version})." ) raise LegacyBigQueryStorageError(msg) +class PyarrowVersions: + """Version comparisons for pyarrow package.""" + + def __init__(self): + self._installed_version = None + + @property + def installed_version(self) -> packaging.version.Version: + """Return the parsed version of pyarrow.""" + if self._installed_version is None: + import pyarrow + + self._installed_version = packaging.version.parse( + # Use 0.0.0, since it is earlier than any released version. + # Legacy versions also have the same property, but + # creating a LegacyVersion has been deprecated. + # https://github.com/pypa/packaging/issues/321 + getattr(pyarrow, "__version__", "0.0.0") + ) + + return self._installed_version + + def try_import(self, raise_if_error: bool = False) -> Any: + """Verify that a recent enough version of pyarrow extra is + installed. + + The function assumes that pyarrow extra is installed, and should thus + be used in places where this assumption holds. + + Because `pip` can install an outdated version of this extra despite the + constraints in `setup.py`, the calling code can use this helper to + verify the version compatibility at runtime. + + Returns: + The ``pyarrow`` module or ``None``. + + Raises: + LegacyPyarrowError: + If the pyarrow package is outdated and ``raise_if_error`` is ``True``. + """ + try: + import pyarrow + except ImportError as exc: # pragma: NO COVER + if raise_if_error: + raise LegacyPyarrowError( + f"pyarrow package not found. Install pyarrow version >= {_MIN_PYARROW_VERSION}." + ) from exc + return None + + if self.installed_version < _MIN_PYARROW_VERSION: + if raise_if_error: + msg = ( + "Dependency pyarrow is outdated, please upgrade " + f"it to version >= {_MIN_PYARROW_VERSION} (version found: {self.installed_version})." + ) + raise LegacyPyarrowError(msg) + return None + + return pyarrow + + BQ_STORAGE_VERSIONS = BQStorageVersions() +PYARROW_VERSIONS = PyarrowVersions() def _not_null(value, field): diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/_pandas_helpers.py b/packages/google-cloud-bigquery/google/cloud/bigquery/_pandas_helpers.py index 29139ae09f56..0a22043a3d69 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/_pandas_helpers.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/_pandas_helpers.py @@ -55,12 +55,6 @@ def _to_wkb(v): _to_wkb = _to_wkb() -try: - import pyarrow - import pyarrow.parquet -except ImportError: # pragma: NO COVER - pyarrow = None - try: from google.cloud.bigquery_storage import ArrowSerializationOptions except ImportError: @@ -73,12 +67,10 @@ def _to_wkb(v): from google.cloud.bigquery import schema -_LOGGER = logging.getLogger(__name__) +pyarrow = _helpers.PYARROW_VERSIONS.try_import() -_NO_BQSTORAGE_ERROR = ( - "The google-cloud-bigquery-storage library is not installed, " - "please install google-cloud-bigquery-storage to use bqstorage features." -) + +_LOGGER = logging.getLogger(__name__) _PROGRESS_INTERVAL = 0.2 # Maximum time between download status checks, in seconds. @@ -548,8 +540,9 @@ def dataframe_to_parquet(dataframe, bq_schema, filepath, parquet_compression="SN serializing method. Defaults to "SNAPPY". https://arrow.apache.org/docs/python/generated/pyarrow.parquet.write_table.html#pyarrow-parquet-write-table """ - if pyarrow is None: - raise ValueError("pyarrow is required for BigQuery schema conversion.") + pyarrow = _helpers.PYARROW_VERSIONS.try_import(raise_if_error=True) + + import pyarrow.parquet bq_schema = schema._to_schema_fields(bq_schema) arrow_table = dataframe_to_arrow(dataframe, bq_schema) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/exceptions.py b/packages/google-cloud-bigquery/google/cloud/bigquery/exceptions.py index 6e5c27eb1d5c..fb1188eee780 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/exceptions.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/exceptions.py @@ -19,3 +19,7 @@ class BigQueryError(Exception): class LegacyBigQueryStorageError(BigQueryError): """Raised when too old a version of BigQuery Storage extra is detected at runtime.""" + + +class LegacyPyarrowError(BigQueryError): + """Raised when too old a version of pyarrow package is detected at runtime.""" diff --git a/packages/google-cloud-bigquery/noxfile.py b/packages/google-cloud-bigquery/noxfile.py index 9077924e98ce..d53b33121111 100644 --- a/packages/google-cloud-bigquery/noxfile.py +++ b/packages/google-cloud-bigquery/noxfile.py @@ -94,9 +94,16 @@ def unit(session): default(session) -@nox.session(python=UNIT_TEST_PYTHON_VERSIONS[-1]) +@nox.session(python=[UNIT_TEST_PYTHON_VERSIONS[0], UNIT_TEST_PYTHON_VERSIONS[-1]]) def unit_noextras(session): """Run the unit test suite.""" + + # Install optional dependencies that are out-of-date. + # https://github.com/googleapis/python-bigquery/issues/933 + # There is no pyarrow 1.0.0 package for Python 3.9. + if session.python == UNIT_TEST_PYTHON_VERSIONS[0]: + session.install("pyarrow==1.0.0") + default(session, install_extras=False) diff --git a/packages/google-cloud-bigquery/testing/constraints-3.6.txt b/packages/google-cloud-bigquery/testing/constraints-3.6.txt index be1a992fadae..23d2724f7bff 100644 --- a/packages/google-cloud-bigquery/testing/constraints-3.6.txt +++ b/packages/google-cloud-bigquery/testing/constraints-3.6.txt @@ -19,6 +19,6 @@ proto-plus==1.10.0 protobuf==3.12.0 pyarrow==3.0.0 requests==2.18.0 -shapely==1.6.0 +Shapely==1.6.0 six==1.13.0 tqdm==4.7.4 diff --git a/packages/google-cloud-bigquery/tests/unit/job/test_query_pandas.py b/packages/google-cloud-bigquery/tests/unit/job/test_query_pandas.py index b5af90c0bc5e..580b41c789e2 100644 --- a/packages/google-cloud-bigquery/tests/unit/job/test_query_pandas.py +++ b/packages/google-cloud-bigquery/tests/unit/job/test_query_pandas.py @@ -31,10 +31,6 @@ import geopandas except (ImportError, AttributeError): # pragma: NO COVER geopandas = None -try: - import pyarrow -except (ImportError, AttributeError): # pragma: NO COVER - pyarrow = None try: from google.cloud import bigquery_storage except (ImportError, AttributeError): # pragma: NO COVER @@ -44,11 +40,15 @@ except (ImportError, AttributeError): # pragma: NO COVER tqdm = None +from google.cloud.bigquery import _helpers from .helpers import _make_client from .helpers import _make_connection from .helpers import _make_job_resource +pyarrow = _helpers.PYARROW_VERSIONS.try_import() + + @pytest.fixture def table_read_options_kwarg(): # Create a BigQuery Storage table read options object with pyarrow compression diff --git a/packages/google-cloud-bigquery/tests/unit/test__helpers.py b/packages/google-cloud-bigquery/tests/unit/test__helpers.py index f8d00e67d8d6..945b95d1b1a2 100644 --- a/packages/google-cloud-bigquery/tests/unit/test__helpers.py +++ b/packages/google-cloud-bigquery/tests/unit/test__helpers.py @@ -24,9 +24,20 @@ except ImportError: # pragma: NO COVER bigquery_storage = None +try: + import pyarrow +except ImportError: # pragma: NO COVER + pyarrow = None + @unittest.skipIf(bigquery_storage is None, "Requires `google-cloud-bigquery-storage`") class TestBQStorageVersions(unittest.TestCase): + def tearDown(self): + from google.cloud.bigquery import _helpers + + # Reset any cached versions since it may not match reality. + _helpers.BQ_STORAGE_VERSIONS._installed_version = None + def _object_under_test(self): from google.cloud.bigquery import _helpers @@ -89,6 +100,63 @@ def test_is_read_session_optional_false(self): assert not versions.is_read_session_optional +@unittest.skipIf(pyarrow is None, "Requires `pyarrow`") +class TestPyarrowVersions(unittest.TestCase): + def tearDown(self): + from google.cloud.bigquery import _helpers + + # Reset any cached versions since it may not match reality. + _helpers.PYARROW_VERSIONS._installed_version = None + + def _object_under_test(self): + from google.cloud.bigquery import _helpers + + return _helpers.PyarrowVersions() + + def _call_try_import(self, **kwargs): + from google.cloud.bigquery import _helpers + + _helpers.PYARROW_VERSIONS._installed_version = None + return _helpers.PYARROW_VERSIONS.try_import(**kwargs) + + def test_try_import_raises_no_error_w_recent_pyarrow(self): + from google.cloud.bigquery.exceptions import LegacyPyarrowError + + with mock.patch("pyarrow.__version__", new="5.0.0"): + try: + pyarrow = self._call_try_import(raise_if_error=True) + self.assertIsNotNone(pyarrow) + except LegacyPyarrowError: # pragma: NO COVER + self.fail("Legacy error raised with a non-legacy dependency version.") + + def test_try_import_returns_none_w_legacy_pyarrow(self): + with mock.patch("pyarrow.__version__", new="2.0.0"): + pyarrow = self._call_try_import() + self.assertIsNone(pyarrow) + + def test_try_import_raises_error_w_legacy_pyarrow(self): + from google.cloud.bigquery.exceptions import LegacyPyarrowError + + with mock.patch("pyarrow.__version__", new="2.0.0"): + with self.assertRaises(LegacyPyarrowError): + self._call_try_import(raise_if_error=True) + + def test_installed_version_returns_cached(self): + versions = self._object_under_test() + versions._installed_version = object() + assert versions.installed_version is versions._installed_version + + def test_installed_version_returns_parsed_version(self): + versions = self._object_under_test() + + with mock.patch("pyarrow.__version__", new="1.2.3"): + version = versions.installed_version + + assert version.major == 1 + assert version.minor == 2 + assert version.micro == 3 + + class Test_not_null(unittest.TestCase): def _call_fut(self, value, field): from google.cloud.bigquery._helpers import _not_null diff --git a/packages/google-cloud-bigquery/tests/unit/test__pandas_helpers.py b/packages/google-cloud-bigquery/tests/unit/test__pandas_helpers.py index f0975ef65989..80b226a3a64d 100644 --- a/packages/google-cloud-bigquery/tests/unit/test__pandas_helpers.py +++ b/packages/google-cloud-bigquery/tests/unit/test__pandas_helpers.py @@ -29,13 +29,6 @@ import pandas.testing except ImportError: # pragma: NO COVER pandas = None -try: - import pyarrow - import pyarrow.types -except ImportError: # pragma: NO COVER - # Mock out pyarrow when missing, because methods from pyarrow.types are - # used in test parameterization. - pyarrow = mock.Mock() try: import geopandas except ImportError: # pragma: NO COVER @@ -44,9 +37,19 @@ import pytest from google import api_core +from google.cloud.bigquery import exceptions from google.cloud.bigquery import _helpers from google.cloud.bigquery import schema + +pyarrow = _helpers.PYARROW_VERSIONS.try_import() +if pyarrow: + import pyarrow.types +else: # pragma: NO COVER + # Mock out pyarrow when missing, because methods from pyarrow.types are + # used in test parameterization. + pyarrow = mock.Mock() + try: from google.cloud import bigquery_storage @@ -1120,15 +1123,19 @@ def test_dataframe_to_arrow_dict_sequence_schema(module_under_test): @pytest.mark.skipif(pandas is None, reason="Requires `pandas`") def test_dataframe_to_parquet_without_pyarrow(module_under_test, monkeypatch): - monkeypatch.setattr(module_under_test, "pyarrow", None) - with pytest.raises(ValueError) as exc_context: + mock_pyarrow_import = mock.Mock() + mock_pyarrow_import.side_effect = exceptions.LegacyPyarrowError( + "pyarrow not installed" + ) + monkeypatch.setattr(_helpers.PYARROW_VERSIONS, "try_import", mock_pyarrow_import) + + with pytest.raises(exceptions.LegacyPyarrowError): module_under_test.dataframe_to_parquet(pandas.DataFrame(), (), None) - assert "pyarrow is required" in str(exc_context.value) @pytest.mark.skipif(pandas is None, reason="Requires `pandas`") @pytest.mark.skipif(isinstance(pyarrow, mock.Mock), reason="Requires `pyarrow`") -def test_dataframe_to_parquet_w_extra_fields(module_under_test, monkeypatch): +def test_dataframe_to_parquet_w_extra_fields(module_under_test): with pytest.raises(ValueError) as exc_context: module_under_test.dataframe_to_parquet( pandas.DataFrame(), (schema.SchemaField("not_in_df", "STRING"),), None diff --git a/packages/google-cloud-bigquery/tests/unit/test_table.py b/packages/google-cloud-bigquery/tests/unit/test_table.py index 1ce930ee4b22..c64620a48c67 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_table.py +++ b/packages/google-cloud-bigquery/tests/unit/test_table.py @@ -45,18 +45,18 @@ except (ImportError, AttributeError): # pragma: NO COVER geopandas = None -try: - import pyarrow - import pyarrow.types -except ImportError: # pragma: NO COVER - pyarrow = None - try: from tqdm import tqdm except (ImportError, AttributeError): # pragma: NO COVER tqdm = None from google.cloud.bigquery.dataset import DatasetReference +from google.cloud.bigquery import _helpers + + +pyarrow = _helpers.PYARROW_VERSIONS.try_import() +if pyarrow: + import pyarrow.types def _mock_client(): From 469608839ea034692f7276e860a7a3f64b1a79ca Mon Sep 17 00:00:00 2001 From: Jim Fulton Date: Wed, 1 Sep 2021 14:22:16 -0600 Subject: [PATCH 1285/2016] feat: set the X-Server-Timeout header when timeout is set (#927) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Thank you for opening a Pull Request! Before submitting your PR, there are a few things you can do to make sure it goes smoothly: - [x] Make sure to open an issue as a [bug/issue](https://github.com/googleapis/python-bigquery/issues/new/choose) before writing your code! That way we can discuss the change, evaluate designs, and agree on the general idea - [x] Ensure the tests and linter pass - [x] Code coverage does not decrease (if any source code was changed) - [x] Appropriate docs were updated (if necessary) Fixes #919 🦕 --- .../google/cloud/bigquery/client.py | 27 ++++++++++++++++++- .../tests/unit/conftest.py | 19 +++++++++++++ .../tests/unit/test_client.py | 27 +++++++++++++------ 3 files changed, 64 insertions(+), 9 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py index 023346ffa292..47ff83c5df01 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py @@ -131,6 +131,8 @@ # https://github.com/googleapis/python-bigquery/issues/781#issuecomment-883497414 _PYARROW_BAD_VERSIONS = frozenset([packaging.version.Version("2.0.0")]) +TIMEOUT_HEADER = "X-Server-Timeout" + class Project(object): """Wrapper for resource describing a BigQuery project. @@ -742,16 +744,26 @@ def create_table( return self.get_table(table.reference, retry=retry) def _call_api( - self, retry, span_name=None, span_attributes=None, job_ref=None, **kwargs + self, + retry, + span_name=None, + span_attributes=None, + job_ref=None, + headers: Optional[Dict[str, str]] = None, + **kwargs, ): + kwargs = _add_server_timeout_header(headers, kwargs) call = functools.partial(self._connection.api_request, **kwargs) + if retry: call = retry(call) + if span_name is not None: with create_span( name=span_name, attributes=span_attributes, client=self, job_ref=job_ref ): return call() + return call() def get_dataset( @@ -4045,3 +4057,16 @@ def _get_upload_headers(user_agent): "User-Agent": user_agent, "content-type": "application/json", } + + +def _add_server_timeout_header(headers: Optional[Dict[str, str]], kwargs): + timeout = kwargs.get("timeout") + if timeout is not None: + if headers is None: + headers = {} + headers[TIMEOUT_HEADER] = str(timeout) + + if headers: + kwargs["headers"] = headers + + return kwargs diff --git a/packages/google-cloud-bigquery/tests/unit/conftest.py b/packages/google-cloud-bigquery/tests/unit/conftest.py index 7a67ea6b5c77..feba65aa5f7f 100644 --- a/packages/google-cloud-bigquery/tests/unit/conftest.py +++ b/packages/google-cloud-bigquery/tests/unit/conftest.py @@ -12,6 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. +import mock import pytest from .helpers import make_client @@ -35,3 +36,21 @@ def DS_ID(): @pytest.fixture def LOCATION(): yield "us-central" + + +def noop_add_server_timeout_header(headers, kwargs): + if headers: + kwargs["headers"] = headers + return kwargs + + +@pytest.fixture(autouse=True) +def disable_add_server_timeout_header(request): + if "enable_add_server_timeout_header" in request.keywords: + yield + else: + with mock.patch( + "google.cloud.bigquery.client._add_server_timeout_header", + noop_add_server_timeout_header, + ): + yield diff --git a/packages/google-cloud-bigquery/tests/unit/test_client.py b/packages/google-cloud-bigquery/tests/unit/test_client.py index e9204f1de47b..d2a75413f4fe 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_client.py +++ b/packages/google-cloud-bigquery/tests/unit/test_client.py @@ -1806,7 +1806,6 @@ def test_update_dataset(self): "access": ACCESS, }, path="/" + PATH, - headers=None, timeout=7.5, ) self.assertEqual(ds2.description, ds.description) @@ -1850,7 +1849,6 @@ def test_update_dataset_w_custom_property(self): method="PATCH", data={"newAlphaProperty": "unreleased property"}, path=path, - headers=None, timeout=DEFAULT_TIMEOUT, ) @@ -1909,7 +1907,7 @@ def test_update_model(self): "labels": {"x": "y"}, } conn.api_request.assert_called_once_with( - method="PATCH", data=sent, path="/" + path, headers=None, timeout=7.5 + method="PATCH", data=sent, path="/" + path, timeout=7.5 ) self.assertEqual(updated_model.model_id, model.model_id) self.assertEqual(updated_model.description, model.description) @@ -1982,7 +1980,6 @@ def test_update_routine(self): method="PUT", data=sent, path="/projects/routines-project/datasets/test_routines/routines/updated_routine", - headers=None, timeout=7.5, ) self.assertEqual(actual_routine.arguments, routine.arguments) @@ -2090,7 +2087,7 @@ def test_update_table(self): "labels": {"x": "y"}, } conn.api_request.assert_called_once_with( - method="PATCH", data=sent, path="/" + path, headers=None, timeout=7.5 + method="PATCH", data=sent, path="/" + path, timeout=7.5 ) self.assertEqual(updated_table.description, table.description) self.assertEqual(updated_table.friendly_name, table.friendly_name) @@ -2140,7 +2137,6 @@ def test_update_table_w_custom_property(self): method="PATCH", path="/%s" % path, data={"newAlphaProperty": "unreleased property"}, - headers=None, timeout=DEFAULT_TIMEOUT, ) self.assertEqual( @@ -2175,7 +2171,6 @@ def test_update_table_only_use_legacy_sql(self): method="PATCH", path="/%s" % path, data={"view": {"useLegacySql": True}}, - headers=None, timeout=DEFAULT_TIMEOUT, ) self.assertEqual(updated_table.view_use_legacy_sql, table.view_use_legacy_sql) @@ -2273,7 +2268,6 @@ def test_update_table_w_query(self): "expirationTime": str(_millis(exp_time)), "schema": schema_resource, }, - headers=None, timeout=DEFAULT_TIMEOUT, ) @@ -8173,3 +8167,20 @@ def transmit_next_chunk(transport): chunk_size = RU.call_args_list[0][0][1] assert chunk_size == 100 * (1 << 20) + + +@pytest.mark.enable_add_server_timeout_header +@pytest.mark.parametrize("headers", [None, {}]) +def test__call_api_add_server_timeout_w_timeout(client, headers): + client._connection = make_connection({}) + client._call_api(None, method="GET", path="/", headers=headers, timeout=42) + client._connection.api_request.assert_called_with( + method="GET", path="/", timeout=42, headers={"X-Server-Timeout": "42"} + ) + + +@pytest.mark.enable_add_server_timeout_header +def test__call_api_no_add_server_timeout_wo_timeout(client): + client._connection = make_connection({}) + client._call_api(None, method="GET", path="/") + client._connection.api_request.assert_called_with(method="GET", path="/") From d2fdd9dece14d39cb83f5781087270232f245579 Mon Sep 17 00:00:00 2001 From: "release-please[bot]" <55107282+release-please[bot]@users.noreply.github.com> Date: Wed, 1 Sep 2021 20:44:16 +0000 Subject: [PATCH 1286/2016] chore: release 2.26.0 (#937) :robot: I have created a release \*beep\* \*boop\* --- ## [2.26.0](https://www.github.com/googleapis/python-bigquery/compare/v2.25.2...v2.26.0) (2021-09-01) ### Features * set the X-Server-Timeout header when timeout is set ([#927](https://www.github.com/googleapis/python-bigquery/issues/927)) ([ba02f24](https://www.github.com/googleapis/python-bigquery/commit/ba02f248ba9c449c34859579a4011f4bfd2f4a93)) ### Bug Fixes * guard imports against unsupported pyarrow versions ([#934](https://www.github.com/googleapis/python-bigquery/issues/934)) ([b289076](https://www.github.com/googleapis/python-bigquery/commit/b28907693bbe889becc1b9c8963f0a7e1ee6c35a)) --- This PR was generated with [Release Please](https://github.com/googleapis/release-please). See [documentation](https://github.com/googleapis/release-please#release-please). --- packages/google-cloud-bigquery/CHANGELOG.md | 12 ++++++++++++ .../google/cloud/bigquery/version.py | 2 +- 2 files changed, 13 insertions(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/CHANGELOG.md b/packages/google-cloud-bigquery/CHANGELOG.md index b4c8e5fb7584..5a3cb6bee822 100644 --- a/packages/google-cloud-bigquery/CHANGELOG.md +++ b/packages/google-cloud-bigquery/CHANGELOG.md @@ -5,6 +5,18 @@ [1]: https://pypi.org/project/google-cloud-bigquery/#history +## [2.26.0](https://www.github.com/googleapis/python-bigquery/compare/v2.25.2...v2.26.0) (2021-09-01) + + +### Features + +* set the X-Server-Timeout header when timeout is set ([#927](https://www.github.com/googleapis/python-bigquery/issues/927)) ([ba02f24](https://www.github.com/googleapis/python-bigquery/commit/ba02f248ba9c449c34859579a4011f4bfd2f4a93)) + + +### Bug Fixes + +* guard imports against unsupported pyarrow versions ([#934](https://www.github.com/googleapis/python-bigquery/issues/934)) ([b289076](https://www.github.com/googleapis/python-bigquery/commit/b28907693bbe889becc1b9c8963f0a7e1ee6c35a)) + ### [2.25.2](https://www.github.com/googleapis/python-bigquery/compare/v2.25.1...v2.25.2) (2021-08-31) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/version.py b/packages/google-cloud-bigquery/google/cloud/bigquery/version.py index e8672849f11d..1f7d79ab980b 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/version.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/version.py @@ -12,4 +12,4 @@ # See the License for the specific language governing permissions and # limitations under the License. -__version__ = "2.25.2" +__version__ = "2.26.0" From c724cf42e2e9a6312bdfc0d48e4acec075c62286 Mon Sep 17 00:00:00 2001 From: WhiteSource Renovate Date: Thu, 2 Sep 2021 02:29:35 +0200 Subject: [PATCH 1287/2016] chore(deps): update dependency google-cloud-bigquery to v2.26.0 (#938) --- .../google-cloud-bigquery/samples/geography/requirements.txt | 2 +- .../google-cloud-bigquery/samples/snippets/requirements.txt | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/packages/google-cloud-bigquery/samples/geography/requirements.txt b/packages/google-cloud-bigquery/samples/geography/requirements.txt index cfccab8b06d0..aa4b05325cb8 100644 --- a/packages/google-cloud-bigquery/samples/geography/requirements.txt +++ b/packages/google-cloud-bigquery/samples/geography/requirements.txt @@ -12,7 +12,7 @@ geojson==2.5.0 geopandas==0.9.0 google-api-core==2.0.1 google-auth==2.0.2 -google-cloud-bigquery==2.25.2 +google-cloud-bigquery==2.26.0 google-cloud-bigquery-storage==2.6.3 google-cloud-core==2.0.0 google-crc32c==1.1.3 diff --git a/packages/google-cloud-bigquery/samples/snippets/requirements.txt b/packages/google-cloud-bigquery/samples/snippets/requirements.txt index 57f55d1869b5..cf123794e4e4 100644 --- a/packages/google-cloud-bigquery/samples/snippets/requirements.txt +++ b/packages/google-cloud-bigquery/samples/snippets/requirements.txt @@ -1,4 +1,4 @@ -google-cloud-bigquery==2.25.2 +google-cloud-bigquery==2.26.0 google-cloud-bigquery-storage==2.6.3 google-auth-oauthlib==0.4.6 grpcio==1.39.0 From 609dcb8409b92767ceffc8c45bddfe1e13932934 Mon Sep 17 00:00:00 2001 From: Jim Fulton Date: Thu, 2 Sep 2021 11:10:38 -0600 Subject: [PATCH 1288/2016] chore: update system tests and samples to use and @google.com email address (#942) * chore: update system tests and samples to use and @google.com email address * Add group prefix * fixed access entry some more --- .../google-cloud-bigquery/samples/update_dataset_access.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/packages/google-cloud-bigquery/samples/update_dataset_access.py b/packages/google-cloud-bigquery/samples/update_dataset_access.py index 6e844cc90799..a5c2670e7e1a 100644 --- a/packages/google-cloud-bigquery/samples/update_dataset_access.py +++ b/packages/google-cloud-bigquery/samples/update_dataset_access.py @@ -28,8 +28,8 @@ def update_dataset_access(dataset_id): entry = bigquery.AccessEntry( role="READER", - entity_type="userByEmail", - entity_id="sample.bigquery.dev@gmail.com", + entity_type="groupByEmail", + entity_id="cloud-developer-relations@google.com", ) entries = list(dataset.access_entries) From 447948a61a323a613d82e2586d7b90f845993186 Mon Sep 17 00:00:00 2001 From: "gcf-owl-bot[bot]" <78513119+gcf-owl-bot[bot]@users.noreply.github.com> Date: Fri, 3 Sep 2021 06:14:46 -0400 Subject: [PATCH 1289/2016] chore(python): rename default branch to main (#935) Source-Link: https://github.com/googleapis/synthtool/commit/5c0fa62eea9c33ebe61e582424b659eb264e1ba4 Post-Processor: gcr.io/repo-automation-bots/owlbot-python:latest@sha256:0ffe3bdd6c7159692df5f7744da74e5ef19966288a6bf76023e8e04e0c424d7d Co-authored-by: Owl Bot Co-authored-by: Tim Swast Co-authored-by: Peter Lamut Co-authored-by: Anthonios Partheniou --- packages/google-cloud-bigquery/.github/.OwlBot.lock.yaml | 2 +- packages/google-cloud-bigquery/CONTRIBUTING.rst | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/packages/google-cloud-bigquery/.github/.OwlBot.lock.yaml b/packages/google-cloud-bigquery/.github/.OwlBot.lock.yaml index a9fcd07cc43b..c07f148f0b0b 100644 --- a/packages/google-cloud-bigquery/.github/.OwlBot.lock.yaml +++ b/packages/google-cloud-bigquery/.github/.OwlBot.lock.yaml @@ -1,3 +1,3 @@ docker: image: gcr.io/repo-automation-bots/owlbot-python:latest - digest: sha256:9743664022bd63a8084be67f144898314c7ca12f0a03e422ac17c733c129d803 + digest: sha256:0ffe3bdd6c7159692df5f7744da74e5ef19966288a6bf76023e8e04e0c424d7d diff --git a/packages/google-cloud-bigquery/CONTRIBUTING.rst b/packages/google-cloud-bigquery/CONTRIBUTING.rst index 5b87973dd1a8..8aecf9dd2aa7 100644 --- a/packages/google-cloud-bigquery/CONTRIBUTING.rst +++ b/packages/google-cloud-bigquery/CONTRIBUTING.rst @@ -113,9 +113,9 @@ Coding Style export GOOGLE_CLOUD_TESTING_BRANCH="main" By doing this, you are specifying the location of the most up-to-date - version of ``python-bigquery``. The the suggested remote name ``upstream`` - should point to the official ``googleapis`` checkout and the - the branch should be the main branch on that remote (``main``). + version of ``python-bigquery``. The + remote name ``upstream`` should point to the official ``googleapis`` + checkout and the branch should be the default branch on that remote (``main``). - This repository contains configuration for the `pre-commit `__ tool, which automates checking From 794a379b2feece511996ea355a7f23482cef9e3b Mon Sep 17 00:00:00 2001 From: Jim Fulton Date: Fri, 3 Sep 2021 10:19:42 -0600 Subject: [PATCH 1290/2016] chore: Reduce duplicated code betweem tests/unit and tests/unit/job (#940) * chore: Reduce duplicated code betweem tests/unit and tests/unit/job * reuse parent make_client --- .../tests/unit/job/helpers.py | 22 +------- .../tests/unit/job/test_base.py | 13 +++-- .../tests/unit/job/test_copy.py | 21 +++---- .../tests/unit/job/test_extract.py | 21 +++---- .../tests/unit/job/test_load.py | 41 +++++++------- .../tests/unit/job/test_query.py | 56 ++++++++++--------- .../tests/unit/job/test_query_pandas.py | 36 ++++++------ 7 files changed, 101 insertions(+), 109 deletions(-) diff --git a/packages/google-cloud-bigquery/tests/unit/job/helpers.py b/packages/google-cloud-bigquery/tests/unit/job/helpers.py index c792214e7579..3642c7229647 100644 --- a/packages/google-cloud-bigquery/tests/unit/job/helpers.py +++ b/packages/google-cloud-bigquery/tests/unit/job/helpers.py @@ -14,36 +14,20 @@ import unittest -import mock from google.api_core import exceptions - -def _make_credentials(): - import google.auth.credentials - - return mock.Mock(spec=google.auth.credentials.Credentials) +from ..helpers import make_connection, make_client as __make_client def _make_client(project="test-project", connection=None): - from google.cloud.bigquery.client import Client - + client = __make_client(project) if connection is None: - connection = _make_connection() + connection = make_connection() - client = Client(project=project, credentials=_make_credentials(), _http=object()) client._connection = connection return client -def _make_connection(*responses): - import google.cloud.bigquery._http - from google.cloud.exceptions import NotFound - - mock_conn = mock.create_autospec(google.cloud.bigquery._http.Connection) - mock_conn.api_request.side_effect = list(responses) + [NotFound("miss")] - return mock_conn - - def _make_retriable_exception(): return exceptions.TooManyRequests( "retriable exception", errors=[{"reason": "rateLimitExceeded"}] diff --git a/packages/google-cloud-bigquery/tests/unit/job/test_base.py b/packages/google-cloud-bigquery/tests/unit/job/test_base.py index c3f7854e349e..aa8e9c045191 100644 --- a/packages/google-cloud-bigquery/tests/unit/job/test_base.py +++ b/packages/google-cloud-bigquery/tests/unit/job/test_base.py @@ -21,8 +21,9 @@ import mock import pytest +from ..helpers import make_connection + from .helpers import _make_client -from .helpers import _make_connection from .helpers import _make_retriable_exception from .helpers import _make_job_resource @@ -740,7 +741,7 @@ def test_cancel_defaults(self): response = {"job": resource} job = self._set_properties_job() job._properties["jobReference"]["location"] = self.LOCATION - connection = job._client._connection = _make_connection(response) + connection = job._client._connection = make_connection(response) with mock.patch( "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" ) as final_attributes: @@ -769,7 +770,7 @@ def test_cancel_explicit(self): response = {"job": resource} job = self._set_properties_job() client = _make_client(project=other_project) - connection = client._connection = _make_connection(response) + connection = client._connection = make_connection(response) with mock.patch( "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" ) as final_attributes: @@ -930,7 +931,7 @@ def test_result_default_wo_state(self): started=True, ended=True, ) - conn = _make_connection( + conn = make_connection( _make_retriable_exception(), begun_job_resource, _make_retriable_exception(), @@ -968,7 +969,7 @@ def test_result_w_retry_wo_state(self): started=True, ended=True, ) - conn = _make_connection( + conn = make_connection( exceptions.NotFound("not normally retriable"), begun_job_resource, exceptions.NotFound("not normally retriable"), @@ -1008,7 +1009,7 @@ def test_result_w_retry_wo_state(self): ) def test_result_explicit_w_state(self): - conn = _make_connection() + conn = make_connection() client = _make_client(project=self.PROJECT, connection=conn) job = self._make_one(self.JOB_ID, client) # Use _set_properties() instead of directly modifying _properties so diff --git a/packages/google-cloud-bigquery/tests/unit/job/test_copy.py b/packages/google-cloud-bigquery/tests/unit/job/test_copy.py index 992efcf6bf8c..d94e5bc884e1 100644 --- a/packages/google-cloud-bigquery/tests/unit/job/test_copy.py +++ b/packages/google-cloud-bigquery/tests/unit/job/test_copy.py @@ -14,9 +14,10 @@ import mock +from ..helpers import make_connection + from .helpers import _Base from .helpers import _make_client -from .helpers import _make_connection class TestCopyJobConfig(_Base): @@ -333,7 +334,7 @@ def test_begin_w_bound_client(self): del RESOURCE["etag"] del RESOURCE["selfLink"] del RESOURCE["user_email"] - conn = _make_connection(RESOURCE) + conn = make_connection(RESOURCE) client = _make_client(project=self.PROJECT, connection=conn) source = self._table_ref(self.SOURCE_TABLE) destination = self._table_ref(self.DESTINATION_TABLE) @@ -396,9 +397,9 @@ def test_begin_w_alternate_client(self): "writeDisposition": WriteDisposition.WRITE_TRUNCATE, } RESOURCE["configuration"]["copy"] = COPY_CONFIGURATION - conn1 = _make_connection() + conn1 = make_connection() client1 = _make_client(project=self.PROJECT, connection=conn1) - conn2 = _make_connection(RESOURCE) + conn2 = make_connection(RESOURCE) client2 = _make_client(project=self.PROJECT, connection=conn2) source = self._table_ref(self.SOURCE_TABLE) destination = self._table_ref(self.DESTINATION_TABLE) @@ -427,7 +428,7 @@ def test_begin_w_alternate_client(self): def test_exists_miss_w_bound_client(self): PATH = "/projects/%s/jobs/%s" % (self.PROJECT, self.JOB_ID) - conn = _make_connection() + conn = make_connection() client = _make_client(project=self.PROJECT, connection=conn) source = self._table_ref(self.SOURCE_TABLE) @@ -446,9 +447,9 @@ def test_exists_miss_w_bound_client(self): def test_exists_hit_w_alternate_client(self): PATH = "/projects/%s/jobs/%s" % (self.PROJECT, self.JOB_ID) - conn1 = _make_connection() + conn1 = make_connection() client1 = _make_client(project=self.PROJECT, connection=conn1) - conn2 = _make_connection({}) + conn2 = make_connection({}) client2 = _make_client(project=self.PROJECT, connection=conn2) source = self._table_ref(self.SOURCE_TABLE) destination = self._table_ref(self.DESTINATION_TABLE) @@ -468,7 +469,7 @@ def test_exists_hit_w_alternate_client(self): def test_reload_w_bound_client(self): PATH = "/projects/%s/jobs/%s" % (self.PROJECT, self.JOB_ID) RESOURCE = self._make_resource() - conn = _make_connection(RESOURCE) + conn = make_connection(RESOURCE) client = _make_client(project=self.PROJECT, connection=conn) source = self._table_ref(self.SOURCE_TABLE) destination = self._table_ref(self.DESTINATION_TABLE) @@ -488,9 +489,9 @@ def test_reload_w_bound_client(self): def test_reload_w_alternate_client(self): PATH = "/projects/%s/jobs/%s" % (self.PROJECT, self.JOB_ID) RESOURCE = self._make_resource() - conn1 = _make_connection() + conn1 = make_connection() client1 = _make_client(project=self.PROJECT, connection=conn1) - conn2 = _make_connection(RESOURCE) + conn2 = make_connection(RESOURCE) client2 = _make_client(project=self.PROJECT, connection=conn2) source = self._table_ref(self.SOURCE_TABLE) destination = self._table_ref(self.DESTINATION_TABLE) diff --git a/packages/google-cloud-bigquery/tests/unit/job/test_extract.py b/packages/google-cloud-bigquery/tests/unit/job/test_extract.py index 4c9411d0d154..8bada51af30c 100644 --- a/packages/google-cloud-bigquery/tests/unit/job/test_extract.py +++ b/packages/google-cloud-bigquery/tests/unit/job/test_extract.py @@ -14,9 +14,10 @@ import mock +from ..helpers import make_connection + from .helpers import _Base from .helpers import _make_client -from .helpers import _make_connection class TestExtractJobConfig(_Base): @@ -265,7 +266,7 @@ def test_begin_w_bound_client(self): del RESOURCE["etag"] del RESOURCE["selfLink"] del RESOURCE["user_email"] - conn = _make_connection(RESOURCE) + conn = make_connection(RESOURCE) client = _make_client(project=self.PROJECT, connection=conn) source_dataset = DatasetReference(self.PROJECT, self.DS_ID) source = source_dataset.table(self.SOURCE_TABLE) @@ -318,9 +319,9 @@ def test_begin_w_alternate_client(self): "printHeader": False, } RESOURCE["configuration"]["extract"] = EXTRACT_CONFIGURATION - conn1 = _make_connection() + conn1 = make_connection() client1 = _make_client(project=self.PROJECT, connection=conn1) - conn2 = _make_connection(RESOURCE) + conn2 = make_connection(RESOURCE) client2 = _make_client(project=self.PROJECT, connection=conn2) source_dataset = DatasetReference(self.PROJECT, self.DS_ID) source = source_dataset.table(self.SOURCE_TABLE) @@ -353,7 +354,7 @@ def test_begin_w_alternate_client(self): def test_exists_miss_w_bound_client(self): PATH = "/projects/%s/jobs/%s" % (self.PROJECT, self.JOB_ID) - conn = _make_connection() + conn = make_connection() client = _make_client(project=self.PROJECT, connection=conn) job = self._make_one( self.JOB_ID, self.TABLE_REF, [self.DESTINATION_URI], client @@ -371,9 +372,9 @@ def test_exists_miss_w_bound_client(self): def test_exists_hit_w_alternate_client(self): PATH = "/projects/%s/jobs/%s" % (self.PROJECT, self.JOB_ID) - conn1 = _make_connection() + conn1 = make_connection() client1 = _make_client(project=self.PROJECT, connection=conn1) - conn2 = _make_connection({}) + conn2 = make_connection({}) client2 = _make_client(project=self.PROJECT, connection=conn2) job = self._make_one( self.JOB_ID, self.TABLE_REF, [self.DESTINATION_URI], client1 @@ -395,7 +396,7 @@ def test_reload_w_bound_client(self): PATH = "/projects/%s/jobs/%s" % (self.PROJECT, self.JOB_ID) RESOURCE = self._make_resource() - conn = _make_connection(RESOURCE) + conn = make_connection(RESOURCE) client = _make_client(project=self.PROJECT, connection=conn) source_dataset = DatasetReference(self.PROJECT, self.DS_ID) source = source_dataset.table(self.SOURCE_TABLE) @@ -416,9 +417,9 @@ def test_reload_w_alternate_client(self): PATH = "/projects/%s/jobs/%s" % (self.PROJECT, self.JOB_ID) RESOURCE = self._make_resource() - conn1 = _make_connection() + conn1 = make_connection() client1 = _make_client(project=self.PROJECT, connection=conn1) - conn2 = _make_connection(RESOURCE) + conn2 = make_connection(RESOURCE) client2 = _make_client(project=self.PROJECT, connection=conn2) source_dataset = DatasetReference(self.PROJECT, self.DS_ID) source = source_dataset.table(self.SOURCE_TABLE) diff --git a/packages/google-cloud-bigquery/tests/unit/job/test_load.py b/packages/google-cloud-bigquery/tests/unit/job/test_load.py index 70e7860a7904..cf2096b8be4e 100644 --- a/packages/google-cloud-bigquery/tests/unit/job/test_load.py +++ b/packages/google-cloud-bigquery/tests/unit/job/test_load.py @@ -16,9 +16,10 @@ import mock +from ..helpers import make_connection + from .helpers import _Base from .helpers import _make_client -from .helpers import _make_connection class TestLoadJob(_Base): @@ -238,7 +239,7 @@ def test_result_invokes_begin(self): begun_resource = self._make_resource() done_resource = copy.deepcopy(begun_resource) done_resource["status"] = {"state": "DONE"} - connection = _make_connection(begun_resource, done_resource) + connection = make_connection(begun_resource, done_resource) client = _make_client(self.PROJECT) client._connection = connection @@ -421,7 +422,7 @@ def test_from_api_repr_w_properties(self): self._verifyResourceProperties(job, RESOURCE) def test_begin_w_already_running(self): - conn = _make_connection() + conn = make_connection() client = _make_client(project=self.PROJECT, connection=conn) job = self._make_one(self.JOB_ID, [self.SOURCE1], self.TABLE_REF, client) job._properties["status"] = {"state": "RUNNING"} @@ -436,7 +437,7 @@ def test_begin_w_bound_client(self): del RESOURCE["etag"] del RESOURCE["selfLink"] del RESOURCE["user_email"] - conn = _make_connection(RESOURCE) + conn = make_connection(RESOURCE) client = _make_client(project=self.PROJECT, connection=conn) job = self._make_one(self.JOB_ID, [self.SOURCE1], self.TABLE_REF, client) path = "/projects/{}/jobs".format(self.PROJECT) @@ -478,7 +479,7 @@ def test_begin_w_autodetect(self): del resource["etag"] del resource["selfLink"] del resource["user_email"] - conn = _make_connection(resource) + conn = make_connection(resource) client = _make_client(project=self.PROJECT, connection=conn) config = LoadJobConfig() config.autodetect = True @@ -559,9 +560,9 @@ def test_begin_w_alternate_client(self): "schemaUpdateOptions": [SchemaUpdateOption.ALLOW_FIELD_ADDITION], } RESOURCE["configuration"]["load"] = LOAD_CONFIGURATION - conn1 = _make_connection() + conn1 = make_connection() client1 = _make_client(project=self.PROJECT, connection=conn1) - conn2 = _make_connection(RESOURCE) + conn2 = make_connection(RESOURCE) client2 = _make_client(project=self.PROJECT, connection=conn2) full_name = SchemaField("full_name", "STRING", mode="REQUIRED") age = SchemaField("age", "INTEGER", mode="REQUIRED") @@ -611,7 +612,7 @@ def test_begin_w_job_reference(self): resource["jobReference"]["projectId"] = "alternative-project" resource["jobReference"]["location"] = "US" job_ref = job._JobReference(self.JOB_ID, "alternative-project", "US") - conn = _make_connection(resource) + conn = make_connection(resource) client = _make_client(project=self.PROJECT, connection=conn) load_job = self._make_one(job_ref, [self.SOURCE1], self.TABLE_REF, client) with mock.patch( @@ -634,7 +635,7 @@ def test_begin_w_job_reference(self): def test_exists_miss_w_bound_client(self): PATH = "/projects/%s/jobs/%s" % (self.PROJECT, self.JOB_ID) - conn = _make_connection() + conn = make_connection() client = _make_client(project=self.PROJECT, connection=conn) job = self._make_one(self.JOB_ID, [self.SOURCE1], self.TABLE_REF, client) with mock.patch( @@ -654,9 +655,9 @@ def test_exists_miss_w_bound_client(self): def test_exists_hit_w_alternate_client(self): PATH = "/projects/%s/jobs/%s" % (self.PROJECT, self.JOB_ID) - conn1 = _make_connection() + conn1 = make_connection() client1 = _make_client(project=self.PROJECT, connection=conn1) - conn2 = _make_connection({}) + conn2 = make_connection({}) client2 = _make_client(project=self.PROJECT, connection=conn2) job = self._make_one(self.JOB_ID, [self.SOURCE1], self.TABLE_REF, client1) with mock.patch( @@ -679,7 +680,7 @@ def test_exists_miss_w_job_reference(self): from google.cloud.bigquery import job job_ref = job._JobReference("my-job-id", "other-project", "US") - conn = _make_connection() + conn = make_connection() client = _make_client(project=self.PROJECT, connection=conn) load_job = self._make_one(job_ref, [self.SOURCE1], self.TABLE_REF, client) with mock.patch( @@ -701,7 +702,7 @@ def test_exists_miss_w_job_reference(self): def test_reload_w_bound_client(self): PATH = "/projects/%s/jobs/%s" % (self.PROJECT, self.JOB_ID) RESOURCE = self._make_resource() - conn = _make_connection(RESOURCE) + conn = make_connection(RESOURCE) client = _make_client(project=self.PROJECT, connection=conn) job = self._make_one(self.JOB_ID, [self.SOURCE1], self.TABLE_REF, client) with mock.patch( @@ -719,9 +720,9 @@ def test_reload_w_bound_client(self): def test_reload_w_alternate_client(self): PATH = "/projects/%s/jobs/%s" % (self.PROJECT, self.JOB_ID) RESOURCE = self._make_resource() - conn1 = _make_connection() + conn1 = make_connection() client1 = _make_client(project=self.PROJECT, connection=conn1) - conn2 = _make_connection(RESOURCE) + conn2 = make_connection(RESOURCE) client2 = _make_client(project=self.PROJECT, connection=conn2) job = self._make_one(self.JOB_ID, [self.SOURCE1], self.TABLE_REF, client1) with mock.patch( @@ -744,7 +745,7 @@ def test_reload_w_job_reference(self): resource["jobReference"]["projectId"] = "alternative-project" resource["jobReference"]["location"] = "US" job_ref = job._JobReference(self.JOB_ID, "alternative-project", "US") - conn = _make_connection(resource) + conn = make_connection(resource) client = _make_client(project=self.PROJECT, connection=conn) load_job = self._make_one(job_ref, [self.SOURCE1], self.TABLE_REF, client) with mock.patch( @@ -769,7 +770,7 @@ def test_cancel_w_bound_client(self): PATH = "/projects/%s/jobs/%s/cancel" % (self.PROJECT, self.JOB_ID) RESOURCE = self._make_resource(ended=True) RESPONSE = {"job": RESOURCE} - conn = _make_connection(RESPONSE) + conn = make_connection(RESPONSE) client = _make_client(project=self.PROJECT, connection=conn) job = self._make_one(self.JOB_ID, [self.SOURCE1], self.TABLE_REF, client) with mock.patch( @@ -788,9 +789,9 @@ def test_cancel_w_alternate_client(self): PATH = "/projects/%s/jobs/%s/cancel" % (self.PROJECT, self.JOB_ID) RESOURCE = self._make_resource(ended=True) RESPONSE = {"job": RESOURCE} - conn1 = _make_connection() + conn1 = make_connection() client1 = _make_client(project=self.PROJECT, connection=conn1) - conn2 = _make_connection(RESPONSE) + conn2 = make_connection(RESPONSE) client2 = _make_client(project=self.PROJECT, connection=conn2) job = self._make_one(self.JOB_ID, [self.SOURCE1], self.TABLE_REF, client1) with mock.patch( @@ -813,7 +814,7 @@ def test_cancel_w_job_reference(self): resource["jobReference"]["projectId"] = "alternative-project" resource["jobReference"]["location"] = "US" job_ref = job._JobReference(self.JOB_ID, "alternative-project", "US") - conn = _make_connection({"job": resource}) + conn = make_connection({"job": resource}) client = _make_client(project=self.PROJECT, connection=conn) load_job = self._make_one(job_ref, [self.SOURCE1], self.TABLE_REF, client) with mock.patch( diff --git a/packages/google-cloud-bigquery/tests/unit/job/test_query.py b/packages/google-cloud-bigquery/tests/unit/job/test_query.py index d4137052006d..4c598d797bcd 100644 --- a/packages/google-cloud-bigquery/tests/unit/job/test_query.py +++ b/packages/google-cloud-bigquery/tests/unit/job/test_query.py @@ -26,9 +26,11 @@ from google.cloud.bigquery.client import _LIST_ROWS_FROM_QUERY_RESULTS_FIELDS import google.cloud.bigquery.query + +from ..helpers import make_connection + from .helpers import _Base from .helpers import _make_client -from .helpers import _make_connection class TestQueryJob(_Base): @@ -943,7 +945,7 @@ def test_result(self): "pageToken": None, "rows": [{"f": [{"v": "abc"}]}], } - conn = _make_connection( + conn = make_connection( query_resource, query_resource_done, job_resource_done, query_page_resource ) client = _make_client(self.PROJECT, connection=conn) @@ -1005,7 +1007,7 @@ def test_result_with_done_job_calls_get_query_results(self): "pageToken": None, "rows": [{"f": [{"v": "abc"}]}], } - conn = _make_connection(query_resource_done, results_page_resource) + conn = make_connection(query_resource_done, results_page_resource) client = _make_client(self.PROJECT, connection=conn) job = self._get_target_class().from_api_repr(job_resource, client) @@ -1052,7 +1054,7 @@ def test_result_with_max_results(self): {"f": [{"v": "ghi"}]}, ], } - connection = _make_connection(query_resource, query_page_resource) + connection = make_connection(query_resource, query_page_resource) client = _make_client(self.PROJECT, connection=connection) resource = self._make_resource(ended=True) job = self._get_target_class().from_api_repr(resource, client) @@ -1096,7 +1098,7 @@ def test_result_w_retry(self): "tableId": "dest_table", } - connection = _make_connection( + connection = make_connection( exceptions.NotFound("not normally retriable"), query_resource, exceptions.NotFound("not normally retriable"), @@ -1144,7 +1146,7 @@ def test_result_w_empty_schema(self): "jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID}, "schema": {"fields": []}, } - connection = _make_connection(query_resource, query_resource) + connection = make_connection(query_resource, query_resource) client = _make_client(self.PROJECT, connection=connection) resource = self._make_resource(ended=True) job = self._get_target_class().from_api_repr(resource, client) @@ -1165,7 +1167,7 @@ def test_result_invokes_begins(self): query_resource["jobComplete"] = True done_resource = copy.deepcopy(begun_resource) done_resource["status"] = {"state": "DONE"} - connection = _make_connection( + connection = make_connection( begun_resource, incomplete_resource, query_resource, @@ -1196,7 +1198,7 @@ def test_result_w_timeout(self): } done_resource = copy.deepcopy(begun_resource) done_resource["status"] = {"state": "DONE"} - connection = _make_connection(begun_resource, query_resource, done_resource) + connection = make_connection(begun_resource, query_resource, done_resource) client = _make_client(project=self.PROJECT, connection=connection) job = self._make_one(self.JOB_ID, self.QUERY, client) @@ -1245,7 +1247,7 @@ def test_result_w_page_size(self): ], } query_page_resource_2 = {"totalRows": 4, "rows": [{"f": [{"v": "row4"}]}]} - conn = _make_connection( + conn = make_connection( query_results_resource, query_page_resource, query_page_resource_2 ) client = _make_client(self.PROJECT, connection=conn) @@ -1303,7 +1305,7 @@ def test_result_with_start_index(self): {"f": [{"v": "jkl"}]}, ], } - connection = _make_connection(query_resource, tabledata_resource) + connection = make_connection(query_resource, tabledata_resource) client = _make_client(self.PROJECT, connection=connection) resource = self._make_resource(ended=True) job = self._get_target_class().from_api_repr(resource, client) @@ -1428,7 +1430,7 @@ def test__begin_w_timeout(self): PATH = "/projects/%s/jobs" % (self.PROJECT,) RESOURCE = self._make_resource() - conn = _make_connection(RESOURCE) + conn = make_connection(RESOURCE) client = _make_client(project=self.PROJECT, connection=conn) job = self._make_one(self.JOB_ID, self.QUERY, client) with mock.patch( @@ -1462,7 +1464,7 @@ def test_begin_w_bound_client(self): del RESOURCE["etag"] del RESOURCE["selfLink"] del RESOURCE["user_email"] - conn = _make_connection(RESOURCE) + conn = make_connection(RESOURCE) client = _make_client(project=self.PROJECT, connection=conn) config = QueryJobConfig() @@ -1530,9 +1532,9 @@ def test_begin_w_alternate_client(self): } RESOURCE["configuration"]["query"] = QUERY_CONFIGURATION RESOURCE["configuration"]["dryRun"] = True - conn1 = _make_connection() + conn1 = make_connection() client1 = _make_client(project=self.PROJECT, connection=conn1) - conn2 = _make_connection(RESOURCE) + conn2 = make_connection(RESOURCE) client2 = _make_client(project=self.PROJECT, connection=conn2) dataset_ref = DatasetReference(self.PROJECT, DS_ID) table_ref = dataset_ref.table(TABLE) @@ -1588,7 +1590,7 @@ def test_begin_w_udf(self): {"resourceUri": RESOURCE_URI}, {"inlineCode": INLINE_UDF_CODE}, ] - conn = _make_connection(RESOURCE) + conn = make_connection(RESOURCE) client = _make_client(project=self.PROJECT, connection=conn) udf_resources = [ UDFResource("resourceUri", RESOURCE_URI), @@ -1647,7 +1649,7 @@ def test_begin_w_named_query_parameter(self): "parameterValue": {"value": "123"}, } ] - conn = _make_connection(RESOURCE) + conn = make_connection(RESOURCE) client = _make_client(project=self.PROJECT, connection=conn) jconfig = QueryJobConfig() jconfig.query_parameters = query_parameters @@ -1695,7 +1697,7 @@ def test_begin_w_positional_query_parameter(self): config["queryParameters"] = [ {"parameterType": {"type": "INT64"}, "parameterValue": {"value": "123"}} ] - conn = _make_connection(RESOURCE) + conn = make_connection(RESOURCE) client = _make_client(project=self.PROJECT, connection=conn) jconfig = QueryJobConfig() jconfig.query_parameters = query_parameters @@ -1774,7 +1776,7 @@ def test_begin_w_table_defs(self): csv_table: CSV_CONFIG_RESOURCE, } want_resource = copy.deepcopy(RESOURCE) - conn = _make_connection(RESOURCE) + conn = make_connection(RESOURCE) client = _make_client(project=self.PROJECT, connection=conn) config = QueryJobConfig() config.table_definitions = {bt_table: bt_config, csv_table: csv_config} @@ -1818,7 +1820,7 @@ def test_dry_run_query(self): del RESOURCE["selfLink"] del RESOURCE["user_email"] RESOURCE["configuration"]["dryRun"] = True - conn = _make_connection(RESOURCE) + conn = make_connection(RESOURCE) client = _make_client(project=self.PROJECT, connection=conn) config = QueryJobConfig() config.dry_run = True @@ -1846,7 +1848,7 @@ def test_dry_run_query(self): def test_exists_miss_w_bound_client(self): PATH = "/projects/%s/jobs/%s" % (self.PROJECT, self.JOB_ID) - conn = _make_connection() + conn = make_connection() client = _make_client(project=self.PROJECT, connection=conn) job = self._make_one(self.JOB_ID, self.QUERY, client) with mock.patch( @@ -1862,9 +1864,9 @@ def test_exists_miss_w_bound_client(self): def test_exists_hit_w_alternate_client(self): PATH = "/projects/%s/jobs/%s" % (self.PROJECT, self.JOB_ID) - conn1 = _make_connection() + conn1 = make_connection() client1 = _make_client(project=self.PROJECT, connection=conn1) - conn2 = _make_connection({}) + conn2 = make_connection({}) client2 = _make_client(project=self.PROJECT, connection=conn2) job = self._make_one(self.JOB_ID, self.QUERY, client1) with mock.patch( @@ -1887,7 +1889,7 @@ def test_reload_w_bound_client(self): DS_ID = "DATASET" DEST_TABLE = "dest_table" RESOURCE = self._make_resource() - conn = _make_connection(RESOURCE) + conn = make_connection(RESOURCE) client = _make_client(project=self.PROJECT, connection=conn) dataset_ref = DatasetReference(self.PROJECT, DS_ID) table_ref = dataset_ref.table(DEST_TABLE) @@ -1919,9 +1921,9 @@ def test_reload_w_alternate_client(self): "datasetId": DS_ID, "tableId": DEST_TABLE, } - conn1 = _make_connection() + conn1 = make_connection() client1 = _make_client(project=self.PROJECT, connection=conn1) - conn2 = _make_connection(RESOURCE) + conn2 = make_connection(RESOURCE) client2 = _make_client(project=self.PROJECT, connection=conn2) job = self._make_one(self.JOB_ID, self.QUERY, client1) with mock.patch( @@ -1945,7 +1947,7 @@ def test_reload_w_timeout(self): DS_ID = "DATASET" DEST_TABLE = "dest_table" RESOURCE = self._make_resource() - conn = _make_connection(RESOURCE) + conn = make_connection(RESOURCE) client = _make_client(project=self.PROJECT, connection=conn) dataset_ref = DatasetReference(self.PROJECT, DS_ID) table_ref = dataset_ref.table(DEST_TABLE) @@ -1975,7 +1977,7 @@ def test_iter(self): } done_resource = copy.deepcopy(begun_resource) done_resource["status"] = {"state": "DONE"} - connection = _make_connection(begun_resource, query_resource, done_resource) + connection = make_connection(begun_resource, query_resource, done_resource) client = _make_client(project=self.PROJECT, connection=connection) job = self._make_one(self.JOB_ID, self.QUERY, client) diff --git a/packages/google-cloud-bigquery/tests/unit/job/test_query_pandas.py b/packages/google-cloud-bigquery/tests/unit/job/test_query_pandas.py index 580b41c789e2..1b44f65d3404 100644 --- a/packages/google-cloud-bigquery/tests/unit/job/test_query_pandas.py +++ b/packages/google-cloud-bigquery/tests/unit/job/test_query_pandas.py @@ -41,8 +41,10 @@ tqdm = None from google.cloud.bigquery import _helpers + +from ..helpers import make_connection + from .helpers import _make_client -from .helpers import _make_connection from .helpers import _make_job_resource @@ -125,7 +127,7 @@ def test_to_dataframe_bqstorage_preserve_order(query, table_read_options_kwarg): }, "totalRows": "4", } - connection = _make_connection(get_query_results_resource, job_resource) + connection = make_connection(get_query_results_resource, job_resource) client = _make_client(connection=connection) job = target_class.from_api_repr(job_resource, client) bqstorage_client = mock.create_autospec(bigquery_storage.BigQueryReadClient) @@ -207,7 +209,7 @@ def test_to_arrow(): } done_resource = copy.deepcopy(begun_resource) done_resource["status"] = {"state": "DONE"} - connection = _make_connection( + connection = make_connection( begun_resource, query_resource, done_resource, tabledata_resource ) client = _make_client(connection=connection) @@ -252,7 +254,7 @@ def test_to_arrow_max_results_no_progress_bar(): from google.cloud.bigquery.job import QueryJob as target_class from google.cloud.bigquery.schema import SchemaField - connection = _make_connection({}) + connection = make_connection({}) client = _make_client(connection=connection) begun_resource = _make_job_resource(job_type="query") job = target_class.from_api_repr(begun_resource, client) @@ -299,7 +301,7 @@ def test_to_arrow_w_tqdm_w_query_plan(): SchemaField("name", "STRING", mode="REQUIRED"), SchemaField("age", "INTEGER", mode="REQUIRED"), ] - connection = _make_connection({}) + connection = make_connection({}) client = _make_client(connection=connection) job = target_class.from_api_repr(begun_resource, client) @@ -356,7 +358,7 @@ def test_to_arrow_w_tqdm_w_pending_status(): SchemaField("name", "STRING", mode="REQUIRED"), SchemaField("age", "INTEGER", mode="REQUIRED"), ] - connection = _make_connection({}) + connection = make_connection({}) client = _make_client(connection=connection) job = target_class.from_api_repr(begun_resource, client) @@ -408,7 +410,7 @@ def test_to_arrow_w_tqdm_wo_query_plan(): SchemaField("name", "STRING", mode="REQUIRED"), SchemaField("age", "INTEGER", mode="REQUIRED"), ] - connection = _make_connection({}) + connection = make_connection({}) client = _make_client(connection=connection) job = target_class.from_api_repr(begun_resource, client) @@ -450,7 +452,7 @@ def _make_job(schema=(), rows=()): tabledata_resource = {"rows": [{"f": [{"v": v} for v in row]} for row in rows]} done_resource = copy.deepcopy(begun_resource) done_resource["status"] = {"state": "DONE"} - connection = _make_connection( + connection = make_connection( begun_resource, query_resource, done_resource, tabledata_resource ) client = _make_client(connection=connection) @@ -486,7 +488,7 @@ def test_to_dataframe_ddl_query(): "jobReference": resource["jobReference"], "schema": {"fields": []}, } - connection = _make_connection(query_resource) + connection = make_connection(query_resource) client = _make_client(connection=connection) job = target_class.from_api_repr(resource, client) @@ -514,7 +516,7 @@ def test_to_dataframe_bqstorage(table_read_options_kwarg): ] }, } - connection = _make_connection(query_resource) + connection = make_connection(query_resource) client = _make_client(connection=connection) job = target_class.from_api_repr(resource, client) bqstorage_client = mock.create_autospec(bigquery_storage.BigQueryReadClient) @@ -562,7 +564,7 @@ def test_to_dataframe_bqstorage_no_pyarrow_compression(): "totalRows": "4", "schema": {"fields": [{"name": "name", "type": "STRING", "mode": "NULLABLE"}]}, } - connection = _make_connection(query_resource) + connection = make_connection(query_resource) client = _make_client(connection=connection) job = target_class.from_api_repr(resource, client) bqstorage_client = mock.create_autospec(bigquery_storage.BigQueryReadClient) @@ -632,7 +634,7 @@ def test_to_dataframe_column_dtypes(): query_resource["rows"] = rows done_resource = copy.deepcopy(begun_resource) done_resource["status"] = {"state": "DONE"} - connection = _make_connection( + connection = make_connection( begun_resource, query_resource, done_resource, query_resource ) client = _make_client(connection=connection) @@ -673,7 +675,7 @@ def test_to_dataframe_column_date_dtypes(): query_resource["rows"] = rows done_resource = copy.deepcopy(begun_resource) done_resource["status"] = {"state": "DONE"} - connection = _make_connection( + connection = make_connection( begun_resource, query_resource, done_resource, query_resource ) client = _make_client(connection=connection) @@ -702,7 +704,7 @@ def test_to_dataframe_with_progress_bar(tqdm_mock): } done_resource = copy.deepcopy(begun_resource) done_resource["status"] = {"state": "DONE"} - connection = _make_connection( + connection = make_connection( begun_resource, query_resource, done_resource, query_resource, query_resource, ) client = _make_client(connection=connection) @@ -735,7 +737,7 @@ def test_to_dataframe_w_tqdm_pending(): {"f": [{"v": "Bhettye Rhubble"}, {"v": "27"}]}, ] - connection = _make_connection({}) + connection = make_connection({}) client = _make_client(connection=connection) job = target_class.from_api_repr(begun_resource, client) @@ -791,7 +793,7 @@ def test_to_dataframe_w_tqdm(): {"f": [{"v": "Bhettye Rhubble"}, {"v": "27"}]}, ] - connection = _make_connection({}) + connection = make_connection({}) client = _make_client(connection=connection) job = target_class.from_api_repr(begun_resource, client) @@ -846,7 +848,7 @@ def test_to_dataframe_w_tqdm_max_results(): ] rows = [{"f": [{"v": "Phred Phlyntstone"}, {"v": "32"}]}] - connection = _make_connection({}) + connection = make_connection({}) client = _make_client(connection=connection) job = target_class.from_api_repr(begun_resource, client) From 4ed8f146fea385449eec4f744fb68ded3c77aa6b Mon Sep 17 00:00:00 2001 From: Peter Lamut Date: Fri, 3 Sep 2021 19:51:10 +0200 Subject: [PATCH 1291/2016] test: fix routine DDL sample test exits too early (#932) Co-authored-by: Tres Seaver Co-authored-by: Tim Swast --- .../google-cloud-bigquery/samples/tests/test_routine_samples.py | 1 - 1 file changed, 1 deletion(-) diff --git a/packages/google-cloud-bigquery/samples/tests/test_routine_samples.py b/packages/google-cloud-bigquery/samples/tests/test_routine_samples.py index 59ec1fae94ae..c1b0bb5a7cd0 100644 --- a/packages/google-cloud-bigquery/samples/tests/test_routine_samples.py +++ b/packages/google-cloud-bigquery/samples/tests/test_routine_samples.py @@ -32,7 +32,6 @@ def test_create_routine_ddl(capsys, random_routine_id, client): out, err = capsys.readouterr() assert "Created routine {}".format(random_routine_id) in out - return routine assert routine.type_ == "SCALAR_FUNCTION" assert routine.language == "SQL" expected_arguments = [ From 76447b87028f8a2c64a0e2ae138bae0f8e84bb5b Mon Sep 17 00:00:00 2001 From: WhiteSource Renovate Date: Fri, 3 Sep 2021 20:12:26 +0200 Subject: [PATCH 1292/2016] chore(deps): update all dependencies (#939) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [![WhiteSource Renovate](https://app.renovatebot.com/images/banner.svg)](https://renovatebot.com) This PR contains the following updates: | Package | Change | Age | Adoption | Passing | Confidence | |---|---|---|---|---|---| | [google-cloud-bigquery-storage](https://togithub.com/googleapis/python-bigquery-storage) | `==2.6.3` -> `==2.7.0` | [![age](https://badges.renovateapi.com/packages/pypi/google-cloud-bigquery-storage/2.7.0/age-slim)](https://docs.renovatebot.com/merge-confidence/) | [![adoption](https://badges.renovateapi.com/packages/pypi/google-cloud-bigquery-storage/2.7.0/adoption-slim)](https://docs.renovatebot.com/merge-confidence/) | [![passing](https://badges.renovateapi.com/packages/pypi/google-cloud-bigquery-storage/2.7.0/compatibility-slim/2.6.3)](https://docs.renovatebot.com/merge-confidence/) | [![confidence](https://badges.renovateapi.com/packages/pypi/google-cloud-bigquery-storage/2.7.0/confidence-slim/2.6.3)](https://docs.renovatebot.com/merge-confidence/) | | [google-crc32c](https://togithub.com/googleapis/python-crc32c) | `==1.1.3` -> `==1.1.4` | [![age](https://badges.renovateapi.com/packages/pypi/google-crc32c/1.1.4/age-slim)](https://docs.renovatebot.com/merge-confidence/) | [![adoption](https://badges.renovateapi.com/packages/pypi/google-crc32c/1.1.4/adoption-slim)](https://docs.renovatebot.com/merge-confidence/) | [![passing](https://badges.renovateapi.com/packages/pypi/google-crc32c/1.1.4/compatibility-slim/1.1.3)](https://docs.renovatebot.com/merge-confidence/) | [![confidence](https://badges.renovateapi.com/packages/pypi/google-crc32c/1.1.4/confidence-slim/1.1.3)](https://docs.renovatebot.com/merge-confidence/) | | [google-resumable-media](https://togithub.com/googleapis/google-resumable-media-python) | `==2.0.1` -> `==2.0.2` | [![age](https://badges.renovateapi.com/packages/pypi/google-resumable-media/2.0.2/age-slim)](https://docs.renovatebot.com/merge-confidence/) | [![adoption](https://badges.renovateapi.com/packages/pypi/google-resumable-media/2.0.2/adoption-slim)](https://docs.renovatebot.com/merge-confidence/) | [![passing](https://badges.renovateapi.com/packages/pypi/google-resumable-media/2.0.2/compatibility-slim/2.0.1)](https://docs.renovatebot.com/merge-confidence/) | [![confidence](https://badges.renovateapi.com/packages/pypi/google-resumable-media/2.0.2/confidence-slim/2.0.1)](https://docs.renovatebot.com/merge-confidence/) | --- ### Release Notes
googleapis/python-bigquery-storage ### [`v2.7.0`](https://togithub.com/googleapis/python-bigquery-storage/blob/master/CHANGELOG.md#​270-httpswwwgithubcomgoogleapispython-bigquery-storagecomparev263v270-2021-09-02) [Compare Source](https://togithub.com/googleapis/python-bigquery-storage/compare/v2.6.3...v2.7.0) ##### Features - **v1beta2:** Align ReadRows timeout with other versions of the API ([#​293](https://www.togithub.com/googleapis/python-bigquery-storage/issues/293)) ([43e36a1](https://www.github.com/googleapis/python-bigquery-storage/commit/43e36a13ece8d876763d88bad0252a1b2421c52a)) ##### Documentation - **v1beta2:** Align session length with public documentation ([43e36a1](https://www.github.com/googleapis/python-bigquery-storage/commit/43e36a13ece8d876763d88bad0252a1b2421c52a)) ##### [2.6.3](https://www.github.com/googleapis/python-bigquery-storage/compare/v2.6.2...v2.6.3) (2021-08-06) ##### Bug Fixes - resume read stream on `Unknown` transport-layer exception ([#​263](https://www.togithub.com/googleapis/python-bigquery-storage/issues/263)) ([127caa0](https://www.github.com/googleapis/python-bigquery-storage/commit/127caa06144b9cec04b23914b561be6a264bcb36)) ##### [2.6.2](https://www.github.com/googleapis/python-bigquery-storage/compare/v2.6.1...v2.6.2) (2021-07-28) ##### Bug Fixes - enable self signed jwt for grpc ([#​249](https://www.togithub.com/googleapis/python-bigquery-storage/issues/249)) ([a7e8d91](https://www.github.com/googleapis/python-bigquery-storage/commit/a7e8d913fc3de67a3f38ecbd35af2f9d1a33aa8d)) ##### Documentation - remove duplicate code samples ([#​246](https://www.togithub.com/googleapis/python-bigquery-storage/issues/246)) ([303f273](https://www.github.com/googleapis/python-bigquery-storage/commit/303f2732ced38e491df92e965dd37bac24a61d2f)) - add Samples section to CONTRIBUTING.rst ([#​241](https://www.togithub.com/googleapis/python-bigquery-storage/issues/241)) ([5d02358](https://www.github.com/googleapis/python-bigquery-storage/commit/5d02358fbd397cafcc1169d829859fe2dd568645)) ##### [2.6.1](https://www.github.com/googleapis/python-bigquery-storage/compare/v2.6.0...v2.6.1) (2021-07-20) ##### Bug Fixes - **deps:** pin 'google-{api,cloud}-core', 'google-auth' to allow 2.x versions ([#​240](https://www.togithub.com/googleapis/python-bigquery-storage/issues/240)) ([8f848e1](https://www.github.com/googleapis/python-bigquery-storage/commit/8f848e18379085160492cdd2d12dc8de50a46c8e)) ##### Documentation - pandas DataFrame samples are more standalone ([#​224](https://www.togithub.com/googleapis/python-bigquery-storage/issues/224)) ([4026997](https://www.github.com/googleapis/python-bigquery-storage/commit/4026997d7a286b63ed2b969c0bd49de59635326d))
googleapis/python-crc32c ### [`v1.1.4`](https://togithub.com/googleapis/python-crc32c/blob/master/CHANGELOG.md#​114-httpswwwgithubcomgoogleapispython-crc32ccomparev114v114-2021-09-02) [Compare Source](https://togithub.com/googleapis/python-crc32c/compare/v1.1.3...v1.1.4)
googleapis/google-resumable-media-python ### [`v2.0.2`](https://togithub.com/googleapis/google-resumable-media-python/blob/master/CHANGELOG.md#​202-httpswwwgithubcomgoogleapisgoogle-resumable-media-pythoncomparev201v202-2021-09-02) [Compare Source](https://togithub.com/googleapis/google-resumable-media-python/compare/v2.0.1...v2.0.2)
--- ### Configuration 📅 **Schedule**: At any time (no schedule defined). 🚦 **Automerge**: Disabled by config. Please merge this manually once you are satisfied. ♻ **Rebasing**: Renovate will not automatically rebase this PR, because other commits have been found. 👻 **Immortal**: This PR will be recreated if closed unmerged. Get [config help](https://togithub.com/renovatebot/renovate/discussions) if that's undesired. --- - [ ] If you want to rebase/retry this PR, check this box. --- This PR has been generated by [WhiteSource Renovate](https://renovate.whitesourcesoftware.com). View repository job log [here](https://app.renovatebot.com/dashboard#github/googleapis/python-bigquery). --- .../samples/geography/requirements.txt | 6 +++--- .../google-cloud-bigquery/samples/snippets/requirements.txt | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/packages/google-cloud-bigquery/samples/geography/requirements.txt b/packages/google-cloud-bigquery/samples/geography/requirements.txt index aa4b05325cb8..8fb578018e5a 100644 --- a/packages/google-cloud-bigquery/samples/geography/requirements.txt +++ b/packages/google-cloud-bigquery/samples/geography/requirements.txt @@ -13,10 +13,10 @@ geopandas==0.9.0 google-api-core==2.0.1 google-auth==2.0.2 google-cloud-bigquery==2.26.0 -google-cloud-bigquery-storage==2.6.3 +google-cloud-bigquery-storage==2.7.0 google-cloud-core==2.0.0 -google-crc32c==1.1.3 -google-resumable-media==2.0.1 +google-crc32c==1.1.2 +google-resumable-media==2.0.2 googleapis-common-protos==1.53.0 grpcio==1.39.0 idna==3.2 diff --git a/packages/google-cloud-bigquery/samples/snippets/requirements.txt b/packages/google-cloud-bigquery/samples/snippets/requirements.txt index cf123794e4e4..e096af157869 100644 --- a/packages/google-cloud-bigquery/samples/snippets/requirements.txt +++ b/packages/google-cloud-bigquery/samples/snippets/requirements.txt @@ -1,5 +1,5 @@ google-cloud-bigquery==2.26.0 -google-cloud-bigquery-storage==2.6.3 +google-cloud-bigquery-storage==2.7.0 google-auth-oauthlib==0.4.6 grpcio==1.39.0 ipython==7.16.1; python_version < '3.7' From d57cbd5b0e726ba9a58d0e7263b989259fe3715e Mon Sep 17 00:00:00 2001 From: Peter Lamut Date: Tue, 7 Sep 2021 16:38:33 +0200 Subject: [PATCH 1293/2016] tests: add more dependencies to pre-release tests (#948) --- packages/google-cloud-bigquery/noxfile.py | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/noxfile.py b/packages/google-cloud-bigquery/noxfile.py index d53b33121111..4b12e8138815 100644 --- a/packages/google-cloud-bigquery/noxfile.py +++ b/packages/google-cloud-bigquery/noxfile.py @@ -214,7 +214,15 @@ def prerelease_deps(session): session.install( "--extra-index-url", "https://pypi.fury.io/arrow-nightlies/", "--pre", "pyarrow" ) - session.install("--pre", "grpcio", "pandas") + session.install( + "--pre", + "google-api-core", + "google-cloud-bigquery-storage", + "google-cloud-core", + "google-resumable-media", + "grpcio", + "pandas", + ) session.install( "freezegun", "google-cloud-datacatalog", From 18a60bfdb753a60f98a9bb6391cb231fee7e576e Mon Sep 17 00:00:00 2001 From: Jim Fulton Date: Tue, 7 Sep 2021 09:16:10 -0600 Subject: [PATCH 1294/2016] fix: Arrow extension-type metadata was not set when calling the REST API or when there are no rows (#946) --- .../google/cloud/bigquery/_pandas_helpers.py | 14 ++++- .../google/cloud/bigquery/table.py | 8 ++- .../tests/system/conftest.py | 17 ++++++ .../tests/system/test_arrow.py | 59 +++++++++++++++++++ .../tests/unit/test__pandas_helpers.py | 23 ++++++++ 5 files changed, 118 insertions(+), 3 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/_pandas_helpers.py b/packages/google-cloud-bigquery/google/cloud/bigquery/_pandas_helpers.py index 0a22043a3d69..869c0215d5b4 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/_pandas_helpers.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/_pandas_helpers.py @@ -173,6 +173,13 @@ def pyarrow_timestamp(): pyarrow.decimal128(38, scale=9).id: "NUMERIC", pyarrow.decimal256(76, scale=38).id: "BIGNUMERIC", } + BQ_FIELD_TYPE_TO_ARROW_FIELD_METADATA = { + "GEOGRAPHY": { + b"ARROW:extension:name": b"google:sqlType:geography", + b"ARROW:extension:metadata": b'{"encoding": "WKT"}', + }, + "DATETIME": {b"ARROW:extension:name": b"google:sqlType:datetime"}, + } else: # pragma: NO COVER BQ_TO_ARROW_SCALARS = {} # pragma: NO COVER @@ -227,7 +234,12 @@ def bq_to_arrow_field(bq_field, array_type=None): if array_type is not None: arrow_type = array_type # For GEOGRAPHY, at least initially is_nullable = bq_field.mode.upper() == "NULLABLE" - return pyarrow.field(bq_field.name, arrow_type, nullable=is_nullable) + metadata = BQ_FIELD_TYPE_TO_ARROW_FIELD_METADATA.get( + bq_field.field_type.upper() if bq_field.field_type else "" + ) + return pyarrow.field( + bq_field.name, arrow_type, nullable=is_nullable, metadata=metadata + ) warnings.warn("Unable to determine type for field '{}'.".format(bq_field.name)) return None diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py index 609c0b57edfa..c4a45dc8371d 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py @@ -1810,10 +1810,14 @@ def to_arrow( if owns_bqstorage_client: bqstorage_client._transport.grpc_channel.close() - if record_batches: + if record_batches and bqstorage_client is not None: return pyarrow.Table.from_batches(record_batches) else: - # No records, use schema based on BigQuery schema. + # No records (not record_batches), use schema based on BigQuery schema + # **or** + # we used the REST API (bqstorage_client is None), + # which doesn't add arrow extension metadata, so we let + # `bq_to_arrow_schema` do it. arrow_schema = _pandas_helpers.bq_to_arrow_schema(self._schema) return pyarrow.Table.from_batches(record_batches, schema=arrow_schema) diff --git a/packages/google-cloud-bigquery/tests/system/conftest.py b/packages/google-cloud-bigquery/tests/system/conftest.py index cc2c2a4dcbaf..7eec76a32e21 100644 --- a/packages/google-cloud-bigquery/tests/system/conftest.py +++ b/packages/google-cloud-bigquery/tests/system/conftest.py @@ -13,6 +13,7 @@ # limitations under the License. import pathlib +import re import pytest import test_utils.prefixer @@ -61,6 +62,17 @@ def dataset_id(bigquery_client): bigquery_client.delete_dataset(dataset_id, delete_contents=True, not_found_ok=True) +@pytest.fixture() +def dataset_client(bigquery_client, dataset_id): + import google.cloud.bigquery.job + + return bigquery.Client( + default_query_job_config=google.cloud.bigquery.job.QueryJobConfig( + default_dataset=f"{bigquery_client.project}.{dataset_id}", + ) + ) + + @pytest.fixture def table_id(dataset_id): return f"{dataset_id}.table_{helpers.temp_suffix()}" @@ -98,3 +110,8 @@ def scalars_extreme_table( job.result() yield full_table_id bigquery_client.delete_table(full_table_id) + + +@pytest.fixture +def test_table_name(request, replace_non_anum=re.compile(r"[^a-zA-Z0-9_]").sub): + return replace_non_anum("_", request.node.name) diff --git a/packages/google-cloud-bigquery/tests/system/test_arrow.py b/packages/google-cloud-bigquery/tests/system/test_arrow.py index 12f7af9cb1d6..96f9dea25565 100644 --- a/packages/google-cloud-bigquery/tests/system/test_arrow.py +++ b/packages/google-cloud-bigquery/tests/system/test_arrow.py @@ -110,3 +110,62 @@ def test_list_rows_nullable_scalars_dtypes( timestamp_type = schema.field("timestamp_col").type assert timestamp_type.unit == "us" assert timestamp_type.tz is not None + + +@pytest.mark.parametrize("do_insert", [True, False]) +def test_arrow_extension_types_same_for_storage_and_REST_APIs_894( + dataset_client, test_table_name, do_insert +): + types = dict( + astring=("STRING", "'x'"), + astring9=("STRING(9)", "'x'"), + abytes=("BYTES", "b'x'"), + abytes9=("BYTES(9)", "b'x'"), + anumeric=("NUMERIC", "42"), + anumeric9=("NUMERIC(9)", "42"), + anumeric92=("NUMERIC(9,2)", "42"), + abignumeric=("BIGNUMERIC", "42e30"), + abignumeric49=("BIGNUMERIC(37)", "42e30"), + abignumeric492=("BIGNUMERIC(37,2)", "42e30"), + abool=("BOOL", "true"), + adate=("DATE", "'2021-09-06'"), + adatetime=("DATETIME", "'2021-09-06T09:57:26'"), + ageography=("GEOGRAPHY", "ST_GEOGFROMTEXT('point(0 0)')"), + # Can't get arrow data for interval :( + # ainterval=('INTERVAL', "make_interval(1, 2, 3, 4, 5, 6)"), + aint64=("INT64", "42"), + afloat64=("FLOAT64", "42.0"), + astruct=("STRUCT", "struct(42)"), + atime=("TIME", "'1:2:3'"), + atimestamp=("TIMESTAMP", "'2021-09-06T09:57:26'"), + ) + columns = ", ".join(f"{k} {t[0]}" for k, t in types.items()) + dataset_client.query(f"create table {test_table_name} ({columns})").result() + if do_insert: + names = list(types) + values = ", ".join(types[name][1] for name in names) + names = ", ".join(names) + dataset_client.query( + f"insert into {test_table_name} ({names}) values ({values})" + ).result() + at = dataset_client.query(f"select * from {test_table_name}").result().to_arrow() + storage_api_metadata = { + at.field(i).name: at.field(i).metadata for i in range(at.num_columns) + } + at = ( + dataset_client.query(f"select * from {test_table_name}") + .result() + .to_arrow(create_bqstorage_client=False) + ) + rest_api_metadata = { + at.field(i).name: at.field(i).metadata for i in range(at.num_columns) + } + + assert rest_api_metadata == storage_api_metadata + assert rest_api_metadata["adatetime"] == { + b"ARROW:extension:name": b"google:sqlType:datetime" + } + assert rest_api_metadata["ageography"] == { + b"ARROW:extension:name": b"google:sqlType:geography", + b"ARROW:extension:metadata": b'{"encoding": "WKT"}', + } diff --git a/packages/google-cloud-bigquery/tests/unit/test__pandas_helpers.py b/packages/google-cloud-bigquery/tests/unit/test__pandas_helpers.py index 80b226a3a64d..ef8c80c81342 100644 --- a/packages/google-cloud-bigquery/tests/unit/test__pandas_helpers.py +++ b/packages/google-cloud-bigquery/tests/unit/test__pandas_helpers.py @@ -1696,3 +1696,26 @@ def test_bq_to_arrow_field_type_override(module_under_test): ).type == pyarrow.binary() ) + + +@pytest.mark.skipif(isinstance(pyarrow, mock.Mock), reason="Requires `pyarrow`") +@pytest.mark.parametrize( + "field_type, metadata", + [ + ("datetime", {b"ARROW:extension:name": b"google:sqlType:datetime"}), + ( + "geography", + { + b"ARROW:extension:name": b"google:sqlType:geography", + b"ARROW:extension:metadata": b'{"encoding": "WKT"}', + }, + ), + ], +) +def test_bq_to_arrow_field_metadata(module_under_test, field_type, metadata): + assert ( + module_under_test.bq_to_arrow_field( + schema.SchemaField("g", field_type) + ).metadata + == metadata + ) From 727805cc711fa508b4764d8cfd8053642f1e4d56 Mon Sep 17 00:00:00 2001 From: Bu Sun Kim <8822365+busunkim96@users.noreply.github.com> Date: Tue, 7 Sep 2021 10:12:26 -0600 Subject: [PATCH 1295/2016] chore: reference main branch of google-cloud-python (#953) Adjust google-cloud-python links to reference main branch. --- packages/google-cloud-bigquery/README.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/README.rst b/packages/google-cloud-bigquery/README.rst index 8454cf9c0475..d0ad059a23fb 100644 --- a/packages/google-cloud-bigquery/README.rst +++ b/packages/google-cloud-bigquery/README.rst @@ -12,7 +12,7 @@ processing power of Google's infrastructure. - `Product Documentation`_ .. |GA| image:: https://img.shields.io/badge/support-GA-gold.svg - :target: https://github.com/googleapis/google-cloud-python/blob/master/README.rst#general-availability + :target: https://github.com/googleapis/google-cloud-python/blob/main/README.rst#general-availability .. |pypi| image:: https://img.shields.io/pypi/v/google-cloud-bigquery.svg :target: https://pypi.org/project/google-cloud-bigquery/ .. |versions| image:: https://img.shields.io/pypi/pyversions/google-cloud-bigquery.svg From 358d7d6ff2f8b272dc3a5a8b6ca479474762f8f4 Mon Sep 17 00:00:00 2001 From: WhiteSource Renovate Date: Sat, 11 Sep 2021 00:04:13 +0200 Subject: [PATCH 1296/2016] chore(deps): update all dependencies (#943) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [![WhiteSource Renovate](https://app.renovatebot.com/images/banner.svg)](https://renovatebot.com) This PR contains the following updates: | Package | Change | Age | Adoption | Passing | Confidence | |---|---|---|---|---|---| | [google-crc32c](https://togithub.com/googleapis/python-crc32c) | `==1.1.2` -> `==1.1.5` | [![age](https://badges.renovateapi.com/packages/pypi/google-crc32c/1.1.5/age-slim)](https://docs.renovatebot.com/merge-confidence/) | [![adoption](https://badges.renovateapi.com/packages/pypi/google-crc32c/1.1.5/adoption-slim)](https://docs.renovatebot.com/merge-confidence/) | [![passing](https://badges.renovateapi.com/packages/pypi/google-crc32c/1.1.5/compatibility-slim/1.1.2)](https://docs.renovatebot.com/merge-confidence/) | [![confidence](https://badges.renovateapi.com/packages/pypi/google-crc32c/1.1.5/confidence-slim/1.1.2)](https://docs.renovatebot.com/merge-confidence/) | | [grpcio](https://grpc.io) | `==1.39.0` -> `==1.40.0` | [![age](https://badges.renovateapi.com/packages/pypi/grpcio/1.40.0/age-slim)](https://docs.renovatebot.com/merge-confidence/) | [![adoption](https://badges.renovateapi.com/packages/pypi/grpcio/1.40.0/adoption-slim)](https://docs.renovatebot.com/merge-confidence/) | [![passing](https://badges.renovateapi.com/packages/pypi/grpcio/1.40.0/compatibility-slim/1.39.0)](https://docs.renovatebot.com/merge-confidence/) | [![confidence](https://badges.renovateapi.com/packages/pypi/grpcio/1.40.0/confidence-slim/1.39.0)](https://docs.renovatebot.com/merge-confidence/) | --- ### Release Notes
googleapis/python-crc32c ### [`v1.1.5`](https://togithub.com/googleapis/python-crc32c/blob/master/CHANGELOG.md#​115-httpswwwgithubcomgoogleapispython-crc32ccomparev114v115-2021-09-07) [Compare Source](https://togithub.com/googleapis/python-crc32c/compare/v1.1.4...v1.1.5) ### [`v1.1.4`](https://togithub.com/googleapis/python-crc32c/blob/master/CHANGELOG.md#​114-httpswwwgithubcomgoogleapispython-crc32ccomparev114v114-2021-09-02) [Compare Source](https://togithub.com/googleapis/python-crc32c/compare/v1.1.3...v1.1.4) ### [`v1.1.3`](https://togithub.com/googleapis/python-crc32c/blob/master/CHANGELOG.md#​113-httpswwwgithubcomgoogleapispython-crc32ccomparev112v113-2021-08-30) [Compare Source](https://togithub.com/googleapis/python-crc32c/compare/v1.1.2...v1.1.3)
--- ### Configuration 📅 **Schedule**: At any time (no schedule defined). 🚦 **Automerge**: Disabled by config. Please merge this manually once you are satisfied. ♻ **Rebasing**: Whenever PR becomes conflicted, or you tick the rebase/retry checkbox. 👻 **Immortal**: This PR will be recreated if closed unmerged. Get [config help](https://togithub.com/renovatebot/renovate/discussions) if that's undesired. --- - [ ] If you want to rebase/retry this PR, check this box. --- This PR has been generated by [WhiteSource Renovate](https://renovate.whitesourcesoftware.com). View repository job log [here](https://app.renovatebot.com/dashboard#github/googleapis/python-bigquery). --- .../google-cloud-bigquery/samples/geography/requirements.txt | 2 +- .../google-cloud-bigquery/samples/snippets/requirements.txt | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/packages/google-cloud-bigquery/samples/geography/requirements.txt b/packages/google-cloud-bigquery/samples/geography/requirements.txt index 8fb578018e5a..a4b4ed692302 100644 --- a/packages/google-cloud-bigquery/samples/geography/requirements.txt +++ b/packages/google-cloud-bigquery/samples/geography/requirements.txt @@ -18,7 +18,7 @@ google-cloud-core==2.0.0 google-crc32c==1.1.2 google-resumable-media==2.0.2 googleapis-common-protos==1.53.0 -grpcio==1.39.0 +grpcio==1.40.0 idna==3.2 importlib-metadata==4.8.1 libcst==0.3.20 diff --git a/packages/google-cloud-bigquery/samples/snippets/requirements.txt b/packages/google-cloud-bigquery/samples/snippets/requirements.txt index e096af157869..f575e41b1ac3 100644 --- a/packages/google-cloud-bigquery/samples/snippets/requirements.txt +++ b/packages/google-cloud-bigquery/samples/snippets/requirements.txt @@ -1,7 +1,7 @@ google-cloud-bigquery==2.26.0 google-cloud-bigquery-storage==2.7.0 google-auth-oauthlib==0.4.6 -grpcio==1.39.0 +grpcio==1.40.0 ipython==7.16.1; python_version < '3.7' ipython==7.17.0; python_version >= '3.7' matplotlib==3.3.4; python_version < '3.7' From cbfaf436c63009416ed69c89b3e5e1a487016e80 Mon Sep 17 00:00:00 2001 From: Peter Lamut Date: Mon, 13 Sep 2021 16:28:53 +0200 Subject: [PATCH 1297/2016] feat: include key metadata in Job representation (#964) --- .../google/cloud/bigquery/job/base.py | 8 ++++++++ .../google-cloud-bigquery/tests/unit/job/test_base.py | 6 ++++++ 2 files changed, 14 insertions(+) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/job/base.py b/packages/google-cloud-bigquery/google/cloud/bigquery/job/base.py index e5fc592a6006..72db5a63c6b6 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/job/base.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/job/base.py @@ -722,6 +722,14 @@ def cancelled(self): and self.error_result.get("reason") == _STOPPED_REASON ) + def __repr__(self): + result = ( + f"{self.__class__.__name__}<" + f"project={self.project}, location={self.location}, id={self.job_id}" + ">" + ) + return result + class _JobConfig(object): """Abstract base class for job configuration objects. diff --git a/packages/google-cloud-bigquery/tests/unit/job/test_base.py b/packages/google-cloud-bigquery/tests/unit/job/test_base.py index aa8e9c045191..e320c72cbfa7 100644 --- a/packages/google-cloud-bigquery/tests/unit/job/test_base.py +++ b/packages/google-cloud-bigquery/tests/unit/job/test_base.py @@ -1043,6 +1043,12 @@ def test_cancelled_w_error_result_w_stopped(self): self.assertTrue(job.cancelled()) + def test_repr(self): + client = _make_client(project="project-foo") + job = self._make_one("job-99", client) + job._properties.setdefault("jobReference", {})["location"] = "ABC" + assert repr(job) == "_AsyncJob" + class Test_JobConfig(unittest.TestCase): JOB_TYPE = "testing" From 0108cad1cf453ccdf5bd9d52905826acf4abcef9 Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Tue, 14 Sep 2021 09:32:46 -0500 Subject: [PATCH 1298/2016] test: ensure prerelease versions of pandas and arrow are tested nightly (#961) * test: ensure prerelease versions of pandas and arrow are tested nightly * use regex to find package names rather than filter out comment lines --- .../continuous/prerelease-deps-3.8.cfg | 7 +++ packages/google-cloud-bigquery/noxfile.py | 44 +++++++++++++++++-- 2 files changed, 48 insertions(+), 3 deletions(-) create mode 100644 packages/google-cloud-bigquery/.kokoro/continuous/prerelease-deps-3.8.cfg diff --git a/packages/google-cloud-bigquery/.kokoro/continuous/prerelease-deps-3.8.cfg b/packages/google-cloud-bigquery/.kokoro/continuous/prerelease-deps-3.8.cfg new file mode 100644 index 000000000000..3595fb43f5c0 --- /dev/null +++ b/packages/google-cloud-bigquery/.kokoro/continuous/prerelease-deps-3.8.cfg @@ -0,0 +1,7 @@ +# Format: //devtools/kokoro/config/proto/build.proto + +# Only run this nox session. +env_vars: { + key: "NOX_SESSION" + value: "prerelease_deps" +} diff --git a/packages/google-cloud-bigquery/noxfile.py b/packages/google-cloud-bigquery/noxfile.py index 4b12e8138815..d41573407aed 100644 --- a/packages/google-cloud-bigquery/noxfile.py +++ b/packages/google-cloud-bigquery/noxfile.py @@ -16,6 +16,7 @@ import pathlib import os +import re import shutil import nox @@ -212,16 +213,30 @@ def prerelease_deps(session): # PyArrow prerelease packages are published to an alternative PyPI host. # https://arrow.apache.org/docs/python/install.html#installing-nightly-packages session.install( - "--extra-index-url", "https://pypi.fury.io/arrow-nightlies/", "--pre", "pyarrow" + "--extra-index-url", + "https://pypi.fury.io/arrow-nightlies/", + "--prefer-binary", + "--pre", + "--upgrade", + "pyarrow", ) session.install( + "--extra-index-url", + "https://pypi.anaconda.org/scipy-wheels-nightly/simple", + "--prefer-binary", "--pre", + "--upgrade", + "pandas", + ) + + session.install( + "--pre", + "--upgrade", "google-api-core", "google-cloud-bigquery-storage", "google-cloud-core", "google-resumable-media", "grpcio", - "pandas", ) session.install( "freezegun", @@ -234,7 +249,30 @@ def prerelease_deps(session): "pytest", "pytest-cov", ) - session.install("-e", ".[all]") + + # Because we test minimum dependency versions on the minimum Python + # version, the first version we test with in the unit tests sessions has a + # constraints file containing all dependencies and extras. + with open( + CURRENT_DIRECTORY + / "testing" + / f"constraints-{UNIT_TEST_PYTHON_VERSIONS[0]}.txt", + encoding="utf-8", + ) as constraints_file: + constraints_text = constraints_file.read() + + # Ignore leading whitespace and comment lines. + deps = [ + match.group(1) + for match in re.finditer( + r"^\s*(\S+)(?===\S+)", constraints_text, flags=re.MULTILINE + ) + ] + + # We use --no-deps to ensure that pre-release versions aren't overwritten + # by the version ranges in setup.py. + session.install(*deps) + session.install("--no-deps", "-e", ".[all]") # Print out prerelease package versions. session.run("python", "-c", "import grpc; print(grpc.__version__)") From 5aa618da457c0abb3a032482b75cded9c728d5d2 Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Tue, 14 Sep 2021 09:58:28 -0500 Subject: [PATCH 1299/2016] docs: simplify destination table sample with f-strings (#966) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Thank you for opening a Pull Request! Before submitting your PR, there are a few things you can do to make sure it goes smoothly: - [ ] Make sure to open an issue as a [bug/issue](https://github.com/googleapis/python-bigquery/issues/new/choose) before writing your code! That way we can discuss the change, evaluate designs, and agree on the general idea - [ ] Ensure the tests and linter pass - [ ] Code coverage does not decrease (if any source code was changed) - [ ] Appropriate docs were updated (if necessary) Fixes internal issue 199420466 🦕 --- .../samples/snippets/natality_tutorial.py | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/packages/google-cloud-bigquery/samples/snippets/natality_tutorial.py b/packages/google-cloud-bigquery/samples/snippets/natality_tutorial.py index a8d90501ac79..ed08b279a93c 100644 --- a/packages/google-cloud-bigquery/samples/snippets/natality_tutorial.py +++ b/packages/google-cloud-bigquery/samples/snippets/natality_tutorial.py @@ -38,12 +38,12 @@ def run_natality_tutorial(override_values={}): # Prepare a reference to a new dataset for storing the query results. dataset_id = "natality_regression" - dataset_id_full = "{}.{}".format(client.project, dataset_id) + dataset_id_full = f"{client.project}.{dataset_id}" # [END bigquery_query_natality_tutorial] # To facilitate testing, we replace values with alternatives # provided by the testing harness. dataset_id = override_values.get("dataset_id", dataset_id) - dataset_id_full = "{}.{}".format(client.project, dataset_id) + dataset_id_full = f"{client.project}.{dataset_id}" # [START bigquery_query_natality_tutorial] dataset = bigquery.Dataset(dataset_id_full) @@ -51,15 +51,13 @@ def run_natality_tutorial(override_values={}): # Create the new BigQuery dataset. dataset = client.create_dataset(dataset) - # In the new BigQuery dataset, create a reference to a new table for - # storing the query results. - table_ref = dataset.table("regression_input") - # Configure the query job. job_config = bigquery.QueryJobConfig() - # Set the destination table to the table reference created above. - job_config.destination = table_ref + # Set the destination table to where you want to store query results. + # As of google-cloud-bigquery 1.11.0, a fully qualified table ID can be + # used in place of a TableReference. + job_config.destination = f"{dataset_id_full}.regression_input" # Set up a query in Standard SQL, which is the default for the BigQuery # Python client library. From 8ce89aaa28bca4cc066e3d8c6704b196a829a94a Mon Sep 17 00:00:00 2001 From: Peter Lamut Date: Fri, 17 Sep 2021 09:50:57 +0200 Subject: [PATCH 1300/2016] cleanup: get rid of unit test warnings caused by our code (#973) * Explicitly register custom pytest marker * Avoid/silence user warnings in geopandas tests --- .../google/cloud/bigquery/table.py | 5 ++- .../tests/unit/conftest.py | 5 +++ .../tests/unit/test_table.py | 43 ++++++++++++++----- 3 files changed, 41 insertions(+), 12 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py index c4a45dc8371d..3378b3378a8c 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py @@ -2251,7 +2251,10 @@ def to_geodataframe( """ if geopandas is None: raise ValueError(_NO_GEOPANDAS_ERROR) - return geopandas.GeoDataFrame(crs=_COORDINATE_REFERENCE_SYSTEM) + + # Since an empty GeoDataFrame has no geometry column, we do not CRS on it, + # because that's deprecated. + return geopandas.GeoDataFrame() def to_dataframe_iterable( self, diff --git a/packages/google-cloud-bigquery/tests/unit/conftest.py b/packages/google-cloud-bigquery/tests/unit/conftest.py index feba65aa5f7f..c2ae78eaa07c 100644 --- a/packages/google-cloud-bigquery/tests/unit/conftest.py +++ b/packages/google-cloud-bigquery/tests/unit/conftest.py @@ -54,3 +54,8 @@ def disable_add_server_timeout_header(request): noop_add_server_timeout_header, ): yield + + +def pytest_configure(config): + # Explicitly register custom test markers to avoid warnings. + config.addinivalue_line("markers", "enable_add_server_timeout_header") diff --git a/packages/google-cloud-bigquery/tests/unit/test_table.py b/packages/google-cloud-bigquery/tests/unit/test_table.py index c64620a48c67..c94bf3a7faf0 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_table.py +++ b/packages/google-cloud-bigquery/tests/unit/test_table.py @@ -1866,8 +1866,7 @@ def test_to_geodataframe(self): df = row_iterator.to_geodataframe(create_bqstorage_client=False) self.assertIsInstance(df, geopandas.GeoDataFrame) self.assertEqual(len(df), 0) # verify the number of rows - self.assertEqual(df.crs.srs, "EPSG:4326") - self.assertEqual(df.crs.name, "WGS 84") + self.assertIsNone(df.crs) class TestRowIterator(unittest.TestCase): @@ -4027,8 +4026,14 @@ def test_to_geodataframe(self): self.assertEqual(df.name.dtype.name, "object") self.assertEqual(df.geog.dtype.name, "geometry") self.assertIsInstance(df.geog, geopandas.GeoSeries) - self.assertEqual(list(map(str, df.area)), ["0.0", "nan", "0.5"]) - self.assertEqual(list(map(str, df.geog.area)), ["0.0", "nan", "0.5"]) + + with warnings.catch_warnings(): + # Computing the area on a GeoDataFrame that uses a geographic Coordinate + # Reference System (CRS) produces a warning that we are not interested in. + warnings.filterwarnings("ignore", category=UserWarning) + self.assertEqual(list(map(str, df.area)), ["0.0", "nan", "0.5"]) + self.assertEqual(list(map(str, df.geog.area)), ["0.0", "nan", "0.5"]) + self.assertEqual(df.crs.srs, "EPSG:4326") self.assertEqual(df.crs.name, "WGS 84") self.assertEqual(df.geog.crs.srs, "EPSG:4326") @@ -4099,8 +4104,14 @@ def test_to_geodataframe_w_geography_column(self): self.assertEqual(df.geog.dtype.name, "geometry") self.assertEqual(df.geog2.dtype.name, "object") self.assertIsInstance(df.geog, geopandas.GeoSeries) - self.assertEqual(list(map(str, df.area)), ["0.0", "nan", "0.5"]) - self.assertEqual(list(map(str, df.geog.area)), ["0.0", "nan", "0.5"]) + + with warnings.catch_warnings(): + # Computing the area on a GeoDataFrame that uses a geographic Coordinate + # Reference System (CRS) produces a warning that we are not interested in. + warnings.filterwarnings("ignore", category=UserWarning) + self.assertEqual(list(map(str, df.area)), ["0.0", "nan", "0.5"]) + self.assertEqual(list(map(str, df.geog.area)), ["0.0", "nan", "0.5"]) + self.assertEqual( [v.__class__.__name__ for v in df.geog], ["Point", "NoneType", "Polygon"] ) @@ -4110,10 +4121,14 @@ def test_to_geodataframe_w_geography_column(self): self.assertEqual( [v.__class__.__name__ for v in df.geog2], ["Point", "Point", "Point"] ) + # and can easily be converted to a GeoSeries - self.assertEqual( - list(map(str, geopandas.GeoSeries(df.geog2).area)), ["0.0", "0.0", "0.0"] - ) + with warnings.catch_warnings(): + warnings.filterwarnings("ignore", category=UserWarning) + self.assertEqual( + list(map(str, geopandas.GeoSeries(df.geog2).area)), + ["0.0", "0.0", "0.0"], + ) @unittest.skipIf(geopandas is None, "Requires `geopandas`") @mock.patch("google.cloud.bigquery.table.RowIterator.to_dataframe") @@ -4165,8 +4180,14 @@ def test_rowiterator_to_geodataframe_delegation(self, to_dataframe): self.assertEqual(df.name.dtype.name, "object") self.assertEqual(df.g.dtype.name, "geometry") self.assertIsInstance(df.g, geopandas.GeoSeries) - self.assertEqual(list(map(str, df.area)), ["0.0"]) - self.assertEqual(list(map(str, df.g.area)), ["0.0"]) + + with warnings.catch_warnings(): + # Computing the area on a GeoDataFrame that uses a geographic Coordinate + # Reference System (CRS) produces a warning that we are not interested in. + warnings.filterwarnings("ignore", category=UserWarning) + self.assertEqual(list(map(str, df.area)), ["0.0"]) + self.assertEqual(list(map(str, df.g.area)), ["0.0"]) + self.assertEqual([v.__class__.__name__ for v in df.g], ["Point"]) From 0d98b2660adc353bb0c74d6bb62fc4e26432923f Mon Sep 17 00:00:00 2001 From: Peter Lamut Date: Fri, 17 Sep 2021 10:39:33 +0200 Subject: [PATCH 1301/2016] refactor: extract common logic to TableBase class (#956) * refactor: extract common logic to TableBase class * Adress pytype's missing attribute false warning * Mark TableBase class as private * Simplify TableReference.to_api_repr() logic * Avoid get/set subproperty helper gotcha * Test _TableBase class directly --- .../google/cloud/bigquery/_helpers.py | 16 +- .../google/cloud/bigquery/table.py | 236 +++++------- .../tests/unit/test__helpers.py | 13 +- .../tests/unit/test_table.py | 342 ++++++++++-------- 4 files changed, 293 insertions(+), 314 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py b/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py index 9df0f3d0ad6d..28a76206ee97 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py @@ -672,8 +672,9 @@ def _get_sub_prop(container, keys, default=None): container (Dict): A dictionary which may contain other dictionaries as values. keys (Iterable): - A sequence of keys to attempt to get the value for. Each item in - the sequence represents a deeper nesting. The first key is for + A sequence of keys to attempt to get the value for. If ``keys`` is a + string, it is treated as sequence containing a single string key. Each item + in the sequence represents a deeper nesting. The first key is for the top level. If there is a dictionary there, the second key attempts to get the value within that, and so on. default (Optional[object]): @@ -700,6 +701,9 @@ def _get_sub_prop(container, keys, default=None): Returns: object: The value if present or the default. """ + if isinstance(keys, str): + keys = [keys] + sub_val = container for key in keys: if key not in sub_val: @@ -715,8 +719,9 @@ def _set_sub_prop(container, keys, value): container (Dict): A dictionary which may contain other dictionaries as values. keys (Iterable): - A sequence of keys to attempt to set the value for. Each item in - the sequence represents a deeper nesting. The first key is for + A sequence of keys to attempt to set the value for. If ``keys`` is a + string, it is treated as sequence containing a single string key. Each item + in the sequence represents a deeper nesting. The first key is for the top level. If there is a dictionary there, the second key attempts to get the value within that, and so on. value (object): Value to set within the container. @@ -743,6 +748,9 @@ def _set_sub_prop(container, keys, value): >>> container {'key': {'subkey': 'new'}} """ + if isinstance(keys, str): + keys = [keys] + sub_val = container for key in keys[:-1]: if key not in sub_val: diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py index 3378b3378a8c..608218fdc33a 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py @@ -69,6 +69,7 @@ import geopandas import pyarrow from google.cloud import bigquery_storage + from google.cloud.bigquery.dataset import DatasetReference _NO_PANDAS_ERROR = ( @@ -126,45 +127,93 @@ def _view_use_legacy_sql_getter(table): return True -class TableReference(object): - """TableReferences are pointers to tables. +class _TableBase: + """Base class for Table-related classes with common functionality.""" - See - https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#tablereference - - Args: - dataset_ref (google.cloud.bigquery.dataset.DatasetReference): - A pointer to the dataset - table_id (str): The ID of the table - """ + _PROPERTY_TO_API_FIELD = { + "dataset_id": ["tableReference", "datasetId"], + "project": ["tableReference", "projectId"], + "table_id": ["tableReference", "tableId"], + } - def __init__(self, dataset_ref, table_id): - self._project = dataset_ref.project - self._dataset_id = dataset_ref.dataset_id - self._table_id = table_id + def __init__(self): + self._properties = {} @property - def project(self): - """str: Project bound to the table""" - return self._project + def project(self) -> str: + """Project bound to the table.""" + return _helpers._get_sub_prop( + self._properties, self._PROPERTY_TO_API_FIELD["project"] + ) @property - def dataset_id(self): - """str: ID of dataset containing the table.""" - return self._dataset_id + def dataset_id(self) -> str: + """ID of dataset containing the table.""" + return _helpers._get_sub_prop( + self._properties, self._PROPERTY_TO_API_FIELD["dataset_id"] + ) @property - def table_id(self): - """str: The table ID.""" - return self._table_id + def table_id(self) -> str: + """The table ID.""" + return _helpers._get_sub_prop( + self._properties, self._PROPERTY_TO_API_FIELD["table_id"] + ) @property - def path(self): - """str: URL path for the table's APIs.""" - return "/projects/%s/datasets/%s/tables/%s" % ( - self._project, - self._dataset_id, - self._table_id, + def path(self) -> str: + """URL path for the table's APIs.""" + return ( + f"/projects/{self.project}/datasets/{self.dataset_id}" + f"/tables/{self.table_id}" + ) + + def __eq__(self, other): + if isinstance(other, _TableBase): + return ( + self.project == other.project + and self.dataset_id == other.dataset_id + and self.table_id == other.table_id + ) + else: + return NotImplemented + + def __hash__(self): + return hash((self.project, self.dataset_id, self.table_id)) + + +class TableReference(_TableBase): + """TableReferences are pointers to tables. + + See + https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#tablereference + + Args: + dataset_ref: A pointer to the dataset + table_id: The ID of the table + """ + + _PROPERTY_TO_API_FIELD = { + "dataset_id": "datasetId", + "project": "projectId", + "table_id": "tableId", + } + + def __init__(self, dataset_ref: "DatasetReference", table_id: str): + self._properties = {} + + _helpers._set_sub_prop( + self._properties, + self._PROPERTY_TO_API_FIELD["project"], + dataset_ref.project, + ) + _helpers._set_sub_prop( + self._properties, + self._PROPERTY_TO_API_FIELD["dataset_id"], + dataset_ref.dataset_id, + ) + _helpers._set_sub_prop( + self._properties, self._PROPERTY_TO_API_FIELD["table_id"], table_id, ) @classmethod @@ -233,11 +282,7 @@ def to_api_repr(self) -> dict: Returns: Dict[str, object]: Table reference represented as an API resource """ - return { - "projectId": self._project, - "datasetId": self._dataset_id, - "tableId": self._table_id, - } + return copy.deepcopy(self._properties) def to_bqstorage(self) -> str: """Construct a BigQuery Storage API representation of this table. @@ -257,54 +302,25 @@ def to_bqstorage(self) -> str: str: A reference to this table in the BigQuery Storage API. """ - table_id, _, _ = self._table_id.partition("@") + table_id, _, _ = self.table_id.partition("@") table_id, _, _ = table_id.partition("$") - table_ref = "projects/{}/datasets/{}/tables/{}".format( - self._project, self._dataset_id, table_id, + table_ref = ( + f"projects/{self.project}/datasets/{self.dataset_id}/tables/{table_id}" ) - return table_ref - def _key(self): - """A tuple key that uniquely describes this field. - - Used to compute this instance's hashcode and evaluate equality. - - Returns: - Tuple[str]: The contents of this :class:`DatasetReference`. - """ - return (self._project, self._dataset_id, self._table_id) - - def __eq__(self, other): - if isinstance(other, (Table, TableListItem)): - return ( - self.project == other.project - and self.dataset_id == other.dataset_id - and self.table_id == other.table_id - ) - elif isinstance(other, TableReference): - return self._key() == other._key() - else: - return NotImplemented - - def __ne__(self, other): - return not self == other - - def __hash__(self): - return hash(self._key()) - def __str__(self): return f"{self.project}.{self.dataset_id}.{self.table_id}" def __repr__(self): from google.cloud.bigquery.dataset import DatasetReference - dataset_ref = DatasetReference(self._project, self._dataset_id) - return "TableReference({}, '{}')".format(repr(dataset_ref), self._table_id) + dataset_ref = DatasetReference(self.project, self.dataset_id) + return f"TableReference({dataset_ref!r}, '{self.table_id}')" -class Table(object): +class Table(_TableBase): """Tables represent a set of rows whose values correspond to a schema. See @@ -325,9 +341,9 @@ class Table(object): """ _PROPERTY_TO_API_FIELD = { + **_TableBase._PROPERTY_TO_API_FIELD, "clustering_fields": "clustering", "created": "creationTime", - "dataset_id": ["tableReference", "datasetId"], "description": "description", "encryption_configuration": "encryptionConfiguration", "etag": "etag", @@ -346,14 +362,12 @@ class Table(object): "num_rows": "numRows", "partition_expiration": "timePartitioning", "partitioning_type": "timePartitioning", - "project": ["tableReference", "projectId"], "range_partitioning": "rangePartitioning", "time_partitioning": "timePartitioning", "schema": "schema", "snapshot_definition": "snapshotDefinition", "streaming_buffer": "streamingBuffer", "self_link": "selfLink", - "table_id": ["tableReference", "tableId"], "time_partitioning": "timePartitioning", "type": "type", "view_use_legacy_sql": "view", @@ -368,38 +382,8 @@ def __init__(self, table_ref, schema=None): if schema is not None: self.schema = schema - @property - def project(self): - """str: Project bound to the table.""" - return _helpers._get_sub_prop( - self._properties, self._PROPERTY_TO_API_FIELD["project"] - ) - - @property - def dataset_id(self): - """str: ID of dataset containing the table.""" - return _helpers._get_sub_prop( - self._properties, self._PROPERTY_TO_API_FIELD["dataset_id"] - ) - - @property - def table_id(self): - """str: ID of the table.""" - return _helpers._get_sub_prop( - self._properties, self._PROPERTY_TO_API_FIELD["table_id"] - ) - reference = property(_reference_getter) - @property - def path(self): - """str: URL path for the table's APIs.""" - return "/projects/%s/datasets/%s/tables/%s" % ( - self.project, - self.dataset_id, - self.table_id, - ) - @property def require_partition_filter(self): """bool: If set to true, queries over the partitioned table require a @@ -1040,29 +1024,11 @@ def _build_resource(self, filter_fields): """Generate a resource for ``update``.""" return _helpers._build_resource_from_properties(self, filter_fields) - def __eq__(self, other): - if isinstance(other, Table): - return ( - self._properties["tableReference"] - == other._properties["tableReference"] - ) - elif isinstance(other, (TableReference, TableListItem)): - return ( - self.project == other.project - and self.dataset_id == other.dataset_id - and self.table_id == other.table_id - ) - else: - return NotImplemented - - def __hash__(self): - return hash((self.project, self.dataset_id, self.table_id)) - def __repr__(self): return "Table({})".format(repr(self.reference)) -class TableListItem(object): +class TableListItem(_TableBase): """A read-only table resource from a list operation. For performance reasons, the BigQuery API only includes some of the table @@ -1126,21 +1092,6 @@ def expires(self): 1000.0 * float(expiration_time) ) - @property - def project(self): - """str: Project bound to the table.""" - return self._properties["tableReference"]["projectId"] - - @property - def dataset_id(self): - """str: ID of dataset containing the table.""" - return self._properties["tableReference"]["datasetId"] - - @property - def table_id(self): - """str: ID of the table.""" - return self._properties["tableReference"]["tableId"] - reference = property(_reference_getter) @property @@ -1276,19 +1227,6 @@ def to_api_repr(self) -> dict: """ return copy.deepcopy(self._properties) - def __eq__(self, other): - if isinstance(other, (Table, TableReference, TableListItem)): - return ( - self.project == other.project - and self.dataset_id == other.dataset_id - and self.table_id == other.table_id - ) - else: - return NotImplemented - - def __hash__(self): - return hash((self.project, self.dataset_id, self.table_id)) - def _row_from_mapping(mapping, schema): """Convert a mapping to a row tuple using the schema. diff --git a/packages/google-cloud-bigquery/tests/unit/test__helpers.py b/packages/google-cloud-bigquery/tests/unit/test__helpers.py index 945b95d1b1a2..94e63fd639b4 100644 --- a/packages/google-cloud-bigquery/tests/unit/test__helpers.py +++ b/packages/google-cloud-bigquery/tests/unit/test__helpers.py @@ -1214,9 +1214,13 @@ def test_w_empty_container_default_default(self): def test_w_missing_key_explicit_default(self): self.assertEqual(self._call_fut({"key2": 2}, ["key1"], default=1), 1) - def test_w_matching_single_key(self): + def test_w_matching_single_key_in_sequence(self): self.assertEqual(self._call_fut({"key1": 1}, ["key1"]), 1) + def test_w_matching_single_string_key(self): + data = {"k": {"e": {"y": "foo"}}, "key": "bar"} + self.assertEqual(self._call_fut(data, "key"), "bar") + def test_w_matching_first_key_missing_second_key(self): self.assertIsNone(self._call_fut({"key1": {"key3": 3}}, ["key1", "key2"])) @@ -1230,11 +1234,16 @@ def _call_fut(self, container, keys, value): return _set_sub_prop(container, keys, value) - def test_w_empty_container_single_key(self): + def test_w_empty_container_single_key_in_sequence(self): container = {} self._call_fut(container, ["key1"], "value") self.assertEqual(container, {"key1": "value"}) + def test_w_empty_container_single_string_key(self): + container = {} + self._call_fut(container, "key", "value") + self.assertEqual(container, {"key": "value"}) + def test_w_empty_container_nested_keys(self): container = {} self._call_fut(container, ["key1", "key2", "key3"], "value") diff --git a/packages/google-cloud-bigquery/tests/unit/test_table.py b/packages/google-cloud-bigquery/tests/unit/test_table.py index c94bf3a7faf0..3c68e3c5e75b 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_table.py +++ b/packages/google-cloud-bigquery/tests/unit/test_table.py @@ -102,6 +102,189 @@ def test_ctor_with_key(self): self.assertEqual(encryption_config.kms_key_name, self.KMS_KEY_NAME) +class TestTableBase: + @staticmethod + def _get_target_class(): + from google.cloud.bigquery.table import _TableBase + + return _TableBase + + def _make_one(self, *args, **kw): + return self._get_target_class()(*args, **kw) + + def test_ctor_defaults(self): + instance = self._make_one() + assert instance._properties == {} + + def test_project(self): + instance = self._make_one() + instance._properties = {"tableReference": {"projectId": "p_1"}} + assert instance.project == "p_1" + + def test_dataset_id(self): + instance = self._make_one() + instance._properties = {"tableReference": {"datasetId": "ds_1"}} + assert instance.dataset_id == "ds_1" + + def test_table_id(self): + instance = self._make_one() + instance._properties = {"tableReference": {"tableId": "tbl_1"}} + assert instance.table_id == "tbl_1" + + def test_path(self): + instance = self._make_one() + instance._properties = { + "tableReference": { + "projectId": "p_1", + "datasetId": "ds_1", + "tableId": "tbl_1", + } + } + assert instance.path == "/projects/p_1/datasets/ds_1/tables/tbl_1" + + def test___eq___wrong_type(self): + instance = self._make_one() + instance._properties = { + "tableReference": { + "projectId": "p_1", + "datasetId": "ds_1", + "tableId": "tbl_1", + } + } + + class TableWannabe: + pass + + wannabe_other = TableWannabe() + wannabe_other._properties = instance._properties + wannabe_other.project = "p_1" + wannabe_other.dataset_id = "ds_1" + wannabe_other.table_id = "tbl_1" + + assert instance != wannabe_other # Can't fake it. + assert instance == mock.ANY # ...but delegation to other object works. + + def test___eq___project_mismatch(self): + instance = self._make_one() + instance._properties = { + "tableReference": { + "projectId": "p_1", + "datasetId": "ds_1", + "tableId": "tbl_1", + } + } + other = self._make_one() + other._properties = { + "projectId": "p_2", + "datasetId": "ds_1", + "tableId": "tbl_1", + } + assert instance != other + + def test___eq___dataset_mismatch(self): + instance = self._make_one() + instance._properties = { + "tableReference": { + "projectId": "p_1", + "datasetId": "ds_1", + "tableId": "tbl_1", + } + } + other = self._make_one() + other._properties = { + "tableReference": { + "projectId": "p_1", + "datasetId": "ds_2", + "tableId": "tbl_1", + } + } + assert instance != other + + def test___eq___table_mismatch(self): + instance = self._make_one() + instance._properties = { + "tableReference": { + "projectId": "p_1", + "datasetId": "ds_1", + "tableId": "tbl_1", + } + } + other = self._make_one() + other._properties = { + "tableReference": { + "projectId": "p_1", + "datasetId": "ds_1", + "tableId": "tbl_2", + } + } + assert instance != other + + def test___eq___equality(self): + instance = self._make_one() + instance._properties = { + "tableReference": { + "projectId": "p_1", + "datasetId": "ds_1", + "tableId": "tbl_1", + } + } + other = self._make_one() + other._properties = { + "tableReference": { + "projectId": "p_1", + "datasetId": "ds_1", + "tableId": "tbl_1", + } + } + assert instance == other + + def test___hash__set_equality(self): + instance_1 = self._make_one() + instance_1._properties = { + "tableReference": { + "projectId": "p_1", + "datasetId": "ds_1", + "tableId": "tbl_1", + } + } + + instance_2 = self._make_one() + instance_2._properties = { + "tableReference": { + "projectId": "p_2", + "datasetId": "ds_2", + "tableId": "tbl_2", + } + } + + set_one = {instance_1, instance_2} + set_two = {instance_1, instance_2} + assert set_one == set_two + + def test___hash__sets_not_equal(self): + instance_1 = self._make_one() + instance_1._properties = { + "tableReference": { + "projectId": "p_1", + "datasetId": "ds_1", + "tableId": "tbl_1", + } + } + + instance_2 = self._make_one() + instance_2._properties = { + "tableReference": { + "projectId": "p_2", + "datasetId": "ds_2", + "tableId": "tbl_2", + } + } + + set_one = {instance_1} + set_two = {instance_2} + assert set_one != set_two + + class TestTableReference(unittest.TestCase): @staticmethod def _get_target_class(): @@ -196,55 +379,6 @@ def test_from_string_ignores_default_project(self): self.assertEqual(got.dataset_id, "string_dataset") self.assertEqual(got.table_id, "string_table") - def test___eq___wrong_type(self): - dataset_ref = DatasetReference("project_1", "dataset_1") - table = self._make_one(dataset_ref, "table_1") - other = object() - self.assertNotEqual(table, other) - self.assertEqual(table, mock.ANY) - - def test___eq___project_mismatch(self): - dataset = DatasetReference("project_1", "dataset_1") - other_dataset = DatasetReference("project_2", "dataset_1") - table = self._make_one(dataset, "table_1") - other = self._make_one(other_dataset, "table_1") - self.assertNotEqual(table, other) - - def test___eq___dataset_mismatch(self): - dataset = DatasetReference("project_1", "dataset_1") - other_dataset = DatasetReference("project_1", "dataset_2") - table = self._make_one(dataset, "table_1") - other = self._make_one(other_dataset, "table_1") - self.assertNotEqual(table, other) - - def test___eq___table_mismatch(self): - dataset = DatasetReference("project_1", "dataset_1") - table = self._make_one(dataset, "table_1") - other = self._make_one(dataset, "table_2") - self.assertNotEqual(table, other) - - def test___eq___equality(self): - dataset = DatasetReference("project_1", "dataset_1") - table = self._make_one(dataset, "table_1") - other = self._make_one(dataset, "table_1") - self.assertEqual(table, other) - - def test___hash__set_equality(self): - dataset = DatasetReference("project_1", "dataset_1") - table1 = self._make_one(dataset, "table1") - table2 = self._make_one(dataset, "table2") - set_one = {table1, table2} - set_two = {table1, table2} - self.assertEqual(set_one, set_two) - - def test___hash__not_equals(self): - dataset = DatasetReference("project_1", "dataset_1") - table1 = self._make_one(dataset, "table1") - table2 = self._make_one(dataset, "table2") - set_one = {table1} - set_two = {table2} - self.assertNotEqual(set_one, set_two) - def test___repr__(self): dataset = DatasetReference("project1", "dataset1") table1 = self._make_one(dataset, "table1") @@ -558,44 +692,6 @@ def test_num_rows_getter(self): with self.assertRaises(ValueError): getattr(table, "num_rows") - def test__eq__wrong_type(self): - table = self._make_one("project_foo.dataset_bar.table_baz") - - class TableWannabe: - pass - - not_a_table = TableWannabe() - not_a_table._properties = table._properties - - assert table != not_a_table # Can't fake it. - - def test__eq__same_table_basic(self): - table_1 = self._make_one("project_foo.dataset_bar.table_baz") - table_2 = self._make_one("project_foo.dataset_bar.table_baz") - assert table_1 == table_2 - - def test__eq__same_table_multiple_properties(self): - from google.cloud.bigquery import SchemaField - - table_1 = self._make_one("project_foo.dataset_bar.table_baz") - table_1.require_partition_filter = True - table_1.labels = {"first": "one", "second": "two"} - - table_1.schema = [ - SchemaField("name", "STRING", "REQUIRED"), - SchemaField("age", "INTEGER", "NULLABLE"), - ] - - table_2 = self._make_one("project_foo.dataset_bar.table_baz") - table_2.require_partition_filter = True - table_2.labels = {"first": "one", "second": "two"} - table_2.schema = [ - SchemaField("name", "STRING", "REQUIRED"), - SchemaField("age", "INTEGER", "NULLABLE"), - ] - - assert table_1 == table_2 - def test__eq__same_table_property_different(self): table_1 = self._make_one("project_foo.dataset_bar.table_baz") table_1.description = "This is table baz" @@ -605,12 +701,6 @@ def test__eq__same_table_property_different(self): assert table_1 == table_2 # Still equal, only table reference is important. - def test__eq__different_table(self): - table_1 = self._make_one("project_foo.dataset_bar.table_baz") - table_2 = self._make_one("project_foo.dataset_bar.table_baz_2") - - assert table_1 != table_2 - def test_hashable(self): table_1 = self._make_one("project_foo.dataset_bar.table_baz") table_1.description = "This is a table" @@ -1584,38 +1674,6 @@ def test_to_api_repr(self): table = self._make_one(resource) self.assertEqual(table.to_api_repr(), resource) - def test__eq__wrong_type(self): - resource = { - "tableReference": { - "projectId": "project_foo", - "datasetId": "dataset_bar", - "tableId": "table_baz", - } - } - table = self._make_one(resource) - - class FakeTableListItem: - project = "project_foo" - dataset_id = "dataset_bar" - table_id = "table_baz" - - not_a_table = FakeTableListItem() - - assert table != not_a_table # Can't fake it. - - def test__eq__same_table(self): - resource = { - "tableReference": { - "projectId": "project_foo", - "datasetId": "dataset_bar", - "tableId": "table_baz", - } - } - table_1 = self._make_one(resource) - table_2 = self._make_one(resource) - - assert table_1 == table_2 - def test__eq__same_table_property_different(self): table_ref_resource = { "projectId": "project_foo", @@ -1631,40 +1689,6 @@ def test__eq__same_table_property_different(self): assert table_1 == table_2 # Still equal, only table reference is important. - def test__eq__different_table(self): - resource_1 = { - "tableReference": { - "projectId": "project_foo", - "datasetId": "dataset_bar", - "tableId": "table_baz", - } - } - table_1 = self._make_one(resource_1) - - resource_2 = { - "tableReference": { - "projectId": "project_foo", - "datasetId": "dataset_bar", - "tableId": "table_quux", - } - } - table_2 = self._make_one(resource_2) - - assert table_1 != table_2 - - def test_hashable(self): - resource = { - "tableReference": { - "projectId": "project_foo", - "datasetId": "dataset_bar", - "tableId": "table_baz", - } - } - table_item = self._make_one(resource) - table_item_2 = self._make_one(resource) - - assert hash(table_item) == hash(table_item_2) - class TestTableClassesInterchangeability: @staticmethod From 64fb090e5efc77a3cd54778f082d38328a61d1ca Mon Sep 17 00:00:00 2001 From: Jon Dufresne Date: Mon, 20 Sep 2021 08:25:42 -0700 Subject: [PATCH 1302/2016] feat: Add py.typed for PEP 561 compliance (#976) Type annotations were added in commit f8d4aaa335a0eef915e73596fc9b43b11d11be9f. For these annotations to be useful by library users, the package should install a py.typed file. This tells mypy and other tools to consume and use these types. For more details, see: https://mypy.readthedocs.io/en/stable/installed_packages.html#creating-pep-561-compatible-packages --- packages/google-cloud-bigquery/google/cloud/bigquery/py.typed | 2 ++ 1 file changed, 2 insertions(+) create mode 100644 packages/google-cloud-bigquery/google/cloud/bigquery/py.typed diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/py.typed b/packages/google-cloud-bigquery/google/cloud/bigquery/py.typed new file mode 100644 index 000000000000..e73777993c34 --- /dev/null +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/py.typed @@ -0,0 +1,2 @@ +# Marker file for PEP 561. +# The google-cloud-bigquery package uses inline types. From ce64de90aa691a630e9a8ab9a3655aa69018fb7e Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Tue, 21 Sep 2021 02:18:28 -0500 Subject: [PATCH 1303/2016] fix: remove default timeout (#974) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Internal folks, see: go/microgenerator-retries > "Methods will **not** hedge by default." (emphasis mine) Thank you for opening a Pull Request! Before submitting your PR, there are a few things you can do to make sure it goes smoothly: - [ ] Make sure to open an issue as a [bug/issue](https://github.com/googleapis/python-bigquery/issues/new/choose) before writing your code! That way we can discuss the change, evaluate designs, and agree on the general idea - [ ] Ensure the tests and linter pass - [ ] Code coverage does not decrease (if any source code was changed) - [ ] Appropriate docs were updated (if necessary) Fixes #970 🦕 --- packages/google-cloud-bigquery/google/cloud/bigquery/retry.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/retry.py b/packages/google-cloud-bigquery/google/cloud/bigquery/retry.py index 8305823225d4..8a86973cde7e 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/retry.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/retry.py @@ -60,7 +60,7 @@ def _should_retry(exc): pass ``retry=bigquery.DEFAULT_RETRY.with_deadline(30)``. """ -DEFAULT_TIMEOUT = 5.0 * 60.0 +DEFAULT_TIMEOUT = None """The default API timeout. This is the time to wait per request. To adjust the total wait time, set a From 16ae01832c66cb03f840ac0d8159a9b80225281a Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Tue, 21 Sep 2021 11:43:52 -0500 Subject: [PATCH 1304/2016] test: look for transaction info on child job, not parent job (#978) * test: look for transaction info on child job, not parent job * clarify transaction_info docstring * use sphinx method directive --- .../google/cloud/bigquery/job/base.py | 5 +++++ .../tests/system/test_client.py | 12 +++++++++--- 2 files changed, 14 insertions(+), 3 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/job/base.py b/packages/google-cloud-bigquery/google/cloud/bigquery/job/base.py index 72db5a63c6b6..69818109201c 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/job/base.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/job/base.py @@ -357,6 +357,11 @@ def reservation_usage(self): def transaction_info(self) -> Optional[TransactionInfo]: """Information of the multi-statement transaction if this job is part of one. + Since a scripting query job can execute multiple transactions, this + property is only expected on child jobs. Use the + :meth:`google.cloud.bigquery.client.Client.list_jobs` method with the + ``parent_job`` parameter to iterate over child jobs. + .. versionadded:: 2.24.0 """ info = self._properties.get("statistics", {}).get("transactionInfo") diff --git a/packages/google-cloud-bigquery/tests/system/test_client.py b/packages/google-cloud-bigquery/tests/system/test_client.py index 9da45ee6e226..6c8da4d23538 100644 --- a/packages/google-cloud-bigquery/tests/system/test_client.py +++ b/packages/google-cloud-bigquery/tests/system/test_client.py @@ -1586,9 +1586,15 @@ def test_transaction_info(self): query_job = Config.CLIENT.query(sql) query_job.result() - # Transaction ID set by the server should be accessible - assert query_job.transaction_info is not None - assert query_job.transaction_info.transaction_id != "" + child_jobs = Config.CLIENT.list_jobs(parent_job=query_job) + begin_transaction_job = next(iter(child_jobs)) + + # Transaction ID set by the server should be accessible on the child + # job responsible for `BEGIN TRANSACTION`. It is not expected to be + # present on the parent job itself. + # https://github.com/googleapis/python-bigquery/issues/975 + assert begin_transaction_job.transaction_info is not None + assert begin_transaction_job.transaction_info.transaction_id != "" def test_dbapi_w_standard_sql_types(self): for sql, expected in helpers.STANDARD_SQL_EXAMPLES: From 34897906fde1fdfb9c34ee7fa8cdbe39a69ce0b9 Mon Sep 17 00:00:00 2001 From: Jeffrey Rennie Date: Tue, 21 Sep 2021 12:36:15 -0700 Subject: [PATCH 1305/2016] chore: relocate owl bot post processor (#979) chore: relocate owl bot post processor --- packages/google-cloud-bigquery/.github/.OwlBot.lock.yaml | 4 ++-- packages/google-cloud-bigquery/.github/.OwlBot.yaml | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/packages/google-cloud-bigquery/.github/.OwlBot.lock.yaml b/packages/google-cloud-bigquery/.github/.OwlBot.lock.yaml index c07f148f0b0b..2567653c000d 100644 --- a/packages/google-cloud-bigquery/.github/.OwlBot.lock.yaml +++ b/packages/google-cloud-bigquery/.github/.OwlBot.lock.yaml @@ -1,3 +1,3 @@ docker: - image: gcr.io/repo-automation-bots/owlbot-python:latest - digest: sha256:0ffe3bdd6c7159692df5f7744da74e5ef19966288a6bf76023e8e04e0c424d7d + image: gcr.io/cloud-devrel-public-resources/owlbot-python:latest + digest: sha256:87eee22d276554e4e52863ec9b1cb6a7245815dfae20439712bf644348215a5a diff --git a/packages/google-cloud-bigquery/.github/.OwlBot.yaml b/packages/google-cloud-bigquery/.github/.OwlBot.yaml index 2b6451c193d1..e540511573a2 100644 --- a/packages/google-cloud-bigquery/.github/.OwlBot.yaml +++ b/packages/google-cloud-bigquery/.github/.OwlBot.yaml @@ -13,7 +13,7 @@ # limitations under the License. docker: - image: gcr.io/repo-automation-bots/owlbot-python:latest + image: gcr.io/cloud-devrel-public-resources/owlbot-python:latest deep-remove-regex: - /owl-bot-staging From 7db42d07308a82fc9a79fcb5574a5b14ef53dae4 Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Fri, 24 Sep 2021 02:06:11 -0500 Subject: [PATCH 1306/2016] fix: disambiguate missing policy tags from explicitly unset policy tags (#983) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Thank you for opening a Pull Request! Before submitting your PR, there are a few things you can do to make sure it goes smoothly: - [ ] Make sure to open an issue as a [bug/issue](https://github.com/googleapis/python-bigquery/issues/new/choose) before writing your code! That way we can discuss the change, evaluate designs, and agree on the general idea - [ ] Ensure the tests and linter pass - [ ] Code coverage does not decrease (if any source code was changed) - [ ] Appropriate docs were updated (if necessary) Fixes #981 Fixes #982 Towards https://github.com/googleapis/python-bigquery-pandas/issues/387 🦕 --- .../google-cloud-bigquery/docs/reference.rst | 1 + .../google/cloud/bigquery/__init__.py | 2 + .../google/cloud/bigquery/schema.py | 100 +++++----- .../tests/system/test_client.py | 5 +- .../tests/unit/job/test_load_config.py | 4 - .../tests/unit/test_client.py | 64 +++---- .../tests/unit/test_external_config.py | 9 +- .../tests/unit/test_schema.py | 173 +++++++----------- 8 files changed, 139 insertions(+), 219 deletions(-) diff --git a/packages/google-cloud-bigquery/docs/reference.rst b/packages/google-cloud-bigquery/docs/reference.rst index d8738e67bfaf..f4f11abc92dc 100644 --- a/packages/google-cloud-bigquery/docs/reference.rst +++ b/packages/google-cloud-bigquery/docs/reference.rst @@ -128,6 +128,7 @@ Schema :toctree: generated schema.SchemaField + schema.PolicyTagList Query diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/__init__.py b/packages/google-cloud-bigquery/google/cloud/bigquery/__init__.py index a7a0da3dd284..931e0f3e6f7a 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/__init__.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/__init__.py @@ -88,6 +88,7 @@ from google.cloud.bigquery.routine import RoutineReference from google.cloud.bigquery.routine import RoutineType from google.cloud.bigquery.schema import SchemaField +from google.cloud.bigquery.schema import PolicyTagList from google.cloud.bigquery.table import PartitionRange from google.cloud.bigquery.table import RangePartitioning from google.cloud.bigquery.table import Row @@ -140,6 +141,7 @@ "RoutineReference", # Shared helpers "SchemaField", + "PolicyTagList", "UDFResource", "ExternalConfig", "BigtableOptions", diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/schema.py b/packages/google-cloud-bigquery/google/cloud/bigquery/schema.py index 157db7ce60e4..5bad522732c3 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/schema.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/schema.py @@ -15,12 +15,12 @@ """Schemas for BigQuery tables / queries.""" import collections -from typing import Optional +import enum +from typing import Iterable, Union from google.cloud.bigquery_v2 import types -_DEFAULT_VALUE = object() _STRUCT_TYPES = ("RECORD", "STRUCT") # SQL types reference: @@ -49,47 +49,62 @@ """String names of the legacy SQL types to integer codes of Standard SQL types.""" +class _DefaultSentinel(enum.Enum): + """Object used as 'sentinel' indicating default value should be used. + + Uses enum so that pytype/mypy knows that this is the only possible value. + https://stackoverflow.com/a/60605919/101923 + + Literal[_DEFAULT_VALUE] is an alternative, but only added in Python 3.8. + https://docs.python.org/3/library/typing.html#typing.Literal + """ + + DEFAULT_VALUE = object() + + +_DEFAULT_VALUE = _DefaultSentinel.DEFAULT_VALUE + + class SchemaField(object): """Describe a single field within a table schema. Args: - name (str): The name of the field. + name: The name of the field. - field_type (str): The type of the field. See + field_type: + The type of the field. See https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#TableFieldSchema.FIELDS.type - mode (Optional[str]): The mode of the field. See + mode: + Defaults to ``'NULLABLE'``. The mode of the field. See https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#TableFieldSchema.FIELDS.mode - description (Optional[str]): Description for the field. + description: Description for the field. - fields (Optional[Tuple[google.cloud.bigquery.schema.SchemaField]]): - Subfields (requires ``field_type`` of 'RECORD'). + fields: Subfields (requires ``field_type`` of 'RECORD'). - policy_tags (Optional[PolicyTagList]): The policy tag list for the field. + policy_tags: The policy tag list for the field. - precision (Optional[int]): + precision: Precison (number of digits) of fields with NUMERIC or BIGNUMERIC type. - scale (Optional[int]): + scale: Scale (digits after decimal) of fields with NUMERIC or BIGNUMERIC type. - max_length (Optional[int]): - Maximim length of fields with STRING or BYTES type. - + max_length: Maximum length of fields with STRING or BYTES type. """ def __init__( self, - name, - field_type, - mode="NULLABLE", - description=_DEFAULT_VALUE, - fields=(), - policy_tags=None, - precision=_DEFAULT_VALUE, - scale=_DEFAULT_VALUE, - max_length=_DEFAULT_VALUE, + name: str, + field_type: str, + mode: str = "NULLABLE", + description: Union[str, _DefaultSentinel] = _DEFAULT_VALUE, + fields: Iterable["SchemaField"] = (), + policy_tags: Union["PolicyTagList", None, _DefaultSentinel] = _DEFAULT_VALUE, + precision: Union[int, _DefaultSentinel] = _DEFAULT_VALUE, + scale: Union[int, _DefaultSentinel] = _DEFAULT_VALUE, + max_length: Union[int, _DefaultSentinel] = _DEFAULT_VALUE, ): self._properties = { "name": name, @@ -105,28 +120,12 @@ def __init__( self._properties["scale"] = scale if max_length is not _DEFAULT_VALUE: self._properties["maxLength"] = max_length + if policy_tags is not _DEFAULT_VALUE: + self._properties["policyTags"] = ( + policy_tags.to_api_repr() if policy_tags is not None else None + ) self._fields = tuple(fields) - self._policy_tags = self._determine_policy_tags(field_type, policy_tags) - - @staticmethod - def _determine_policy_tags( - field_type: str, given_policy_tags: Optional["PolicyTagList"] - ) -> Optional["PolicyTagList"]: - """Return the given policy tags, or their suitable representation if `None`. - - Args: - field_type: The type of the schema field. - given_policy_tags: The policy tags to maybe ajdust. - """ - if given_policy_tags is not None: - return given_policy_tags - - if field_type is not None and field_type.upper() in _STRUCT_TYPES: - return None - - return PolicyTagList() - @staticmethod def __get_int(api_repr, name): v = api_repr.get(name, _DEFAULT_VALUE) @@ -152,10 +151,10 @@ def from_api_repr(cls, api_repr: dict) -> "SchemaField": mode = api_repr.get("mode", "NULLABLE") description = api_repr.get("description", _DEFAULT_VALUE) fields = api_repr.get("fields", ()) + policy_tags = api_repr.get("policyTags", _DEFAULT_VALUE) - policy_tags = cls._determine_policy_tags( - field_type, PolicyTagList.from_api_repr(api_repr.get("policyTags")) - ) + if policy_tags is not None and policy_tags is not _DEFAULT_VALUE: + policy_tags = PolicyTagList.from_api_repr(policy_tags) return cls( field_type=field_type, @@ -230,7 +229,8 @@ def policy_tags(self): """Optional[google.cloud.bigquery.schema.PolicyTagList]: Policy tag list definition for this field. """ - return self._policy_tags + resource = self._properties.get("policyTags") + return PolicyTagList.from_api_repr(resource) if resource is not None else None def to_api_repr(self) -> dict: """Return a dictionary representing this schema field. @@ -244,10 +244,6 @@ def to_api_repr(self) -> dict: # add this to the serialized representation. if self.field_type.upper() in _STRUCT_TYPES: answer["fields"] = [f.to_api_repr() for f in self.fields] - else: - # Explicitly include policy tag definition (we must not do it for RECORD - # fields, because those are not leaf fields). - answer["policyTags"] = self.policy_tags.to_api_repr() # Done; return the serialized dictionary. return answer @@ -272,7 +268,7 @@ def _key(self): field_type = f"{field_type}({self.precision})" policy_tags = ( - () if self._policy_tags is None else tuple(sorted(self._policy_tags.names)) + () if self.policy_tags is None else tuple(sorted(self.policy_tags.names)) ) return ( diff --git a/packages/google-cloud-bigquery/tests/system/test_client.py b/packages/google-cloud-bigquery/tests/system/test_client.py index 6c8da4d23538..f6f95c184c31 100644 --- a/packages/google-cloud-bigquery/tests/system/test_client.py +++ b/packages/google-cloud-bigquery/tests/system/test_client.py @@ -673,14 +673,15 @@ def test_unset_table_schema_attributes(self): mode=old_field.mode, description=None, fields=old_field.fields, - policy_tags=None, + policy_tags=PolicyTagList(), ) table.schema = new_schema updated_table = Config.CLIENT.update_table(table, ["schema"]) self.assertFalse(updated_table.schema[1].description) # Empty string or None. - self.assertEqual(updated_table.schema[1].policy_tags.names, ()) + # policyTags key expected to be missing from response. + self.assertIsNone(updated_table.schema[1].policy_tags) def test_update_table_clustering_configuration(self): dataset = self.temp_dataset(_make_dataset_id("update_table")) diff --git a/packages/google-cloud-bigquery/tests/unit/job/test_load_config.py b/packages/google-cloud-bigquery/tests/unit/job/test_load_config.py index cbe087dacc72..5a0c5a83f306 100644 --- a/packages/google-cloud-bigquery/tests/unit/job/test_load_config.py +++ b/packages/google-cloud-bigquery/tests/unit/job/test_load_config.py @@ -484,13 +484,11 @@ def test_schema_setter_fields(self): "name": "full_name", "type": "STRING", "mode": "REQUIRED", - "policyTags": {"names": []}, } age_repr = { "name": "age", "type": "INTEGER", "mode": "REQUIRED", - "policyTags": {"names": []}, } self.assertEqual( config._properties["load"]["schema"], {"fields": [full_name_repr, age_repr]} @@ -503,13 +501,11 @@ def test_schema_setter_valid_mappings_list(self): "name": "full_name", "type": "STRING", "mode": "REQUIRED", - "policyTags": {"names": []}, } age_repr = { "name": "age", "type": "INTEGER", "mode": "REQUIRED", - "policyTags": {"names": []}, } schema = [full_name_repr, age_repr] config.schema = schema diff --git a/packages/google-cloud-bigquery/tests/unit/test_client.py b/packages/google-cloud-bigquery/tests/unit/test_client.py index d2a75413f4fe..eb70470b54f6 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_client.py +++ b/packages/google-cloud-bigquery/tests/unit/test_client.py @@ -1024,18 +1024,8 @@ def test_create_table_w_schema_and_query(self): { "schema": { "fields": [ - { - "name": "full_name", - "type": "STRING", - "mode": "REQUIRED", - "policyTags": {"names": []}, - }, - { - "name": "age", - "type": "INTEGER", - "mode": "REQUIRED", - "policyTags": {"names": []}, - }, + {"name": "full_name", "type": "STRING", "mode": "REQUIRED"}, + {"name": "age", "type": "INTEGER", "mode": "REQUIRED"}, ] }, "view": {"query": query}, @@ -1069,18 +1059,8 @@ def test_create_table_w_schema_and_query(self): }, "schema": { "fields": [ - { - "name": "full_name", - "type": "STRING", - "mode": "REQUIRED", - "policyTags": {"names": []}, - }, - { - "name": "age", - "type": "INTEGER", - "mode": "REQUIRED", - "policyTags": {"names": []}, - }, + {"name": "full_name", "type": "STRING", "mode": "REQUIRED"}, + {"name": "age", "type": "INTEGER", "mode": "REQUIRED"}, ] }, "view": {"query": query, "useLegacySql": False}, @@ -2003,6 +1983,7 @@ def test_update_routine(self): def test_update_table(self): from google.cloud.bigquery.schema import SchemaField + from google.cloud.bigquery.schema import PolicyTagList from google.cloud.bigquery.table import Table path = "projects/%s/datasets/%s/tables/%s" % ( @@ -2029,7 +2010,6 @@ def test_update_table(self): "type": "INTEGER", "mode": "REQUIRED", "description": "New field description", - "policyTags": {"names": []}, }, ] }, @@ -2040,7 +2020,15 @@ def test_update_table(self): } ) schema = [ - SchemaField("full_name", "STRING", mode="REQUIRED", description=None), + # Explicly setting policyTags to no names should be included in the sent resource. + # https://github.com/googleapis/python-bigquery/issues/981 + SchemaField( + "full_name", + "STRING", + mode="REQUIRED", + description=None, + policy_tags=PolicyTagList(names=()), + ), SchemaField( "age", "INTEGER", mode="REQUIRED", description="New field description" ), @@ -2078,7 +2066,6 @@ def test_update_table(self): "type": "INTEGER", "mode": "REQUIRED", "description": "New field description", - "policyTags": {"names": []}, }, ] }, @@ -2197,21 +2184,14 @@ def test_update_table_w_query(self): "type": "STRING", "mode": "REQUIRED", "description": None, - "policyTags": {"names": []}, }, { "name": "age", "type": "INTEGER", "mode": "REQUIRED", "description": "this is a column", - "policyTags": {"names": []}, - }, - { - "name": "country", - "type": "STRING", - "mode": "NULLABLE", - "policyTags": {"names": []}, }, + {"name": "country", "type": "STRING", "mode": "NULLABLE"}, ] } schema = [ @@ -6795,7 +6775,13 @@ def test_load_table_from_dataframe(self): assert field["type"] == table_field.field_type assert field["mode"] == table_field.mode assert len(field.get("fields", [])) == len(table_field.fields) - assert field["policyTags"]["names"] == [] + # Avoid accidentally updating policy tags when not explicitly included. + # https://github.com/googleapis/python-bigquery/issues/981 + # Also, avoid 403 if someone has permission to write to table but + # not update policy tags by omitting policy tags we might have + # received from a get table request. + # https://github.com/googleapis/python-bigquery/pull/557 + assert "policyTags" not in field # Omit unnecessary fields when they come from getting the table # (not passed in via job_config) assert "description" not in field @@ -8069,21 +8055,18 @@ def test_schema_to_json_with_file_path(self): "description": "quarter", "mode": "REQUIRED", "name": "qtr", - "policyTags": {"names": []}, "type": "STRING", }, { "description": "sales representative", "mode": "NULLABLE", "name": "rep", - "policyTags": {"names": []}, "type": "STRING", }, { "description": "total sales", "mode": "NULLABLE", "name": "sales", - "policyTags": {"names": []}, "type": "FLOAT", }, ] @@ -8116,21 +8099,18 @@ def test_schema_to_json_with_file_object(self): "description": "quarter", "mode": "REQUIRED", "name": "qtr", - "policyTags": {"names": []}, "type": "STRING", }, { "description": "sales representative", "mode": "NULLABLE", "name": "rep", - "policyTags": {"names": []}, "type": "STRING", }, { "description": "total sales", "mode": "NULLABLE", "name": "sales", - "policyTags": {"names": []}, "type": "FLOAT", }, ] diff --git a/packages/google-cloud-bigquery/tests/unit/test_external_config.py b/packages/google-cloud-bigquery/tests/unit/test_external_config.py index 1f49dba5df98..3dc9dd179b65 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_external_config.py +++ b/packages/google-cloud-bigquery/tests/unit/test_external_config.py @@ -78,14 +78,7 @@ def test_to_api_repr_base(self): ec.schema = [schema.SchemaField("full_name", "STRING", mode="REQUIRED")] exp_schema = { - "fields": [ - { - "name": "full_name", - "type": "STRING", - "mode": "REQUIRED", - "policyTags": {"names": []}, - } - ] + "fields": [{"name": "full_name", "type": "STRING", "mode": "REQUIRED"}] } got_resource = ec.to_api_repr() exp_resource = { diff --git a/packages/google-cloud-bigquery/tests/unit/test_schema.py b/packages/google-cloud-bigquery/tests/unit/test_schema.py index d0b5ca54c52b..2180e1f6e276 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_schema.py +++ b/packages/google-cloud-bigquery/tests/unit/test_schema.py @@ -42,15 +42,40 @@ def test_constructor_defaults(self): self.assertEqual(field.mode, "NULLABLE") self.assertIsNone(field.description) self.assertEqual(field.fields, ()) - self.assertEqual(field.policy_tags, PolicyTagList()) + self.assertIsNone(field.policy_tags) def test_constructor_explicit(self): - field = self._make_one("test", "STRING", mode="REQUIRED", description="Testing") + field = self._make_one( + "test", + "STRING", + mode="REQUIRED", + description="Testing", + policy_tags=PolicyTagList( + names=( + "projects/a/locations/b/taxonomies/c/policyTags/e", + "projects/f/locations/g/taxonomies/h/policyTags/i", + ) + ), + ) self.assertEqual(field.name, "test") self.assertEqual(field.field_type, "STRING") self.assertEqual(field.mode, "REQUIRED") self.assertEqual(field.description, "Testing") self.assertEqual(field.fields, ()) + self.assertEqual( + field.policy_tags, + PolicyTagList( + names=( + "projects/a/locations/b/taxonomies/c/policyTags/e", + "projects/f/locations/g/taxonomies/h/policyTags/i", + ) + ), + ) + + def test_constructor_explicit_none(self): + field = self._make_one("test", "STRING", description=None, policy_tags=None) + self.assertIsNone(field.description) + self.assertIsNone(field.policy_tags) def test_constructor_subfields(self): sub_field1 = self._make_one("area_code", "STRING") @@ -66,20 +91,6 @@ def test_constructor_subfields(self): self.assertEqual(field.fields[0], sub_field1) self.assertEqual(field.fields[1], sub_field2) - def test_constructor_with_policy_tags(self): - from google.cloud.bigquery.schema import PolicyTagList - - policy = PolicyTagList(names=("foo", "bar")) - field = self._make_one( - "test", "STRING", mode="REQUIRED", description="Testing", policy_tags=policy - ) - self.assertEqual(field.name, "test") - self.assertEqual(field.field_type, "STRING") - self.assertEqual(field.mode, "REQUIRED") - self.assertEqual(field.description, "Testing") - self.assertEqual(field.fields, ()) - self.assertEqual(field.policy_tags, policy) - def test_to_api_repr(self): from google.cloud.bigquery.schema import PolicyTagList @@ -88,17 +99,28 @@ def test_to_api_repr(self): policy.to_api_repr(), {"names": ["foo", "bar"]}, ) - field = self._make_one("foo", "INTEGER", "NULLABLE", policy_tags=policy) + field = self._make_one( + "foo", "INTEGER", "NULLABLE", description="hello world", policy_tags=policy + ) self.assertEqual( field.to_api_repr(), { "mode": "NULLABLE", "name": "foo", "type": "INTEGER", + "description": "hello world", "policyTags": {"names": ["foo", "bar"]}, }, ) + def test_to_api_repr_omits_unset_properties(self): + # Prevent accidentally modifying fields that aren't explicitly set. + # https://github.com/googleapis/python-bigquery/issues/981 + field = self._make_one("foo", "INTEGER") + resource = field.to_api_repr() + self.assertNotIn("description", resource) + self.assertNotIn("policyTags", resource) + def test_to_api_repr_with_subfield(self): for record_type in ("RECORD", "STRUCT"): subfield = self._make_one("bar", "INTEGER", "NULLABLE") @@ -106,14 +128,7 @@ def test_to_api_repr_with_subfield(self): self.assertEqual( field.to_api_repr(), { - "fields": [ - { - "mode": "NULLABLE", - "name": "bar", - "type": "INTEGER", - "policyTags": {"names": []}, - } - ], + "fields": [{"mode": "NULLABLE", "name": "bar", "type": "INTEGER"}], "mode": "REQUIRED", "name": "foo", "type": record_type, @@ -163,9 +178,15 @@ def test_from_api_repr_defaults(self): self.assertEqual(field.name, "foo") self.assertEqual(field.field_type, "RECORD") self.assertEqual(field.mode, "NULLABLE") - self.assertEqual(field.description, None) self.assertEqual(len(field.fields), 0) + # Keys not present in API representation shouldn't be included in + # _properties. + self.assertIsNone(field.description) + self.assertIsNone(field.policy_tags) + self.assertNotIn("description", field._properties) + self.assertNotIn("policyTags", field._properties) + def test_name_property(self): name = "lemon-ness" schema_field = self._make_one(name, "INTEGER") @@ -567,22 +588,10 @@ def test_defaults(self): resource = self._call_fut([full_name, age]) self.assertEqual(len(resource), 2) self.assertEqual( - resource[0], - { - "name": "full_name", - "type": "STRING", - "mode": "REQUIRED", - "policyTags": {"names": []}, - }, + resource[0], {"name": "full_name", "type": "STRING", "mode": "REQUIRED"}, ) self.assertEqual( - resource[1], - { - "name": "age", - "type": "INTEGER", - "mode": "REQUIRED", - "policyTags": {"names": []}, - }, + resource[1], {"name": "age", "type": "INTEGER", "mode": "REQUIRED"}, ) def test_w_description(self): @@ -608,7 +617,6 @@ def test_w_description(self): "type": "STRING", "mode": "REQUIRED", "description": DESCRIPTION, - "policyTags": {"names": []}, }, ) self.assertEqual( @@ -618,7 +626,6 @@ def test_w_description(self): "type": "INTEGER", "mode": "REQUIRED", "description": None, - "policyTags": {"names": []}, }, ) @@ -634,13 +641,7 @@ def test_w_subfields(self): resource = self._call_fut([full_name, phone]) self.assertEqual(len(resource), 2) self.assertEqual( - resource[0], - { - "name": "full_name", - "type": "STRING", - "mode": "REQUIRED", - "policyTags": {"names": []}, - }, + resource[0], {"name": "full_name", "type": "STRING", "mode": "REQUIRED"}, ) self.assertEqual( resource[1], @@ -649,18 +650,8 @@ def test_w_subfields(self): "type": "RECORD", "mode": "REPEATED", "fields": [ - { - "name": "type", - "type": "STRING", - "mode": "REQUIRED", - "policyTags": {"names": []}, - }, - { - "name": "number", - "type": "STRING", - "mode": "REQUIRED", - "policyTags": {"names": []}, - }, + {"name": "type", "type": "STRING", "mode": "REQUIRED"}, + {"name": "number", "type": "STRING", "mode": "REQUIRED"}, ], }, ) @@ -872,83 +863,43 @@ def test_from_api_repr_parameterized(api, expect, key2): [ ( dict(name="n", field_type="NUMERIC"), - dict(name="n", type="NUMERIC", mode="NULLABLE", policyTags={"names": []}), + dict(name="n", type="NUMERIC", mode="NULLABLE"), ), ( dict(name="n", field_type="NUMERIC", precision=9), - dict( - name="n", - type="NUMERIC", - mode="NULLABLE", - precision=9, - policyTags={"names": []}, - ), + dict(name="n", type="NUMERIC", mode="NULLABLE", precision=9,), ), ( dict(name="n", field_type="NUMERIC", precision=9, scale=2), - dict( - name="n", - type="NUMERIC", - mode="NULLABLE", - precision=9, - scale=2, - policyTags={"names": []}, - ), + dict(name="n", type="NUMERIC", mode="NULLABLE", precision=9, scale=2,), ), ( dict(name="n", field_type="BIGNUMERIC"), - dict( - name="n", type="BIGNUMERIC", mode="NULLABLE", policyTags={"names": []} - ), + dict(name="n", type="BIGNUMERIC", mode="NULLABLE"), ), ( dict(name="n", field_type="BIGNUMERIC", precision=40), - dict( - name="n", - type="BIGNUMERIC", - mode="NULLABLE", - precision=40, - policyTags={"names": []}, - ), + dict(name="n", type="BIGNUMERIC", mode="NULLABLE", precision=40,), ), ( dict(name="n", field_type="BIGNUMERIC", precision=40, scale=2), - dict( - name="n", - type="BIGNUMERIC", - mode="NULLABLE", - precision=40, - scale=2, - policyTags={"names": []}, - ), + dict(name="n", type="BIGNUMERIC", mode="NULLABLE", precision=40, scale=2,), ), ( dict(name="n", field_type="STRING"), - dict(name="n", type="STRING", mode="NULLABLE", policyTags={"names": []}), + dict(name="n", type="STRING", mode="NULLABLE"), ), ( dict(name="n", field_type="STRING", max_length=9), - dict( - name="n", - type="STRING", - mode="NULLABLE", - maxLength=9, - policyTags={"names": []}, - ), + dict(name="n", type="STRING", mode="NULLABLE", maxLength=9,), ), ( dict(name="n", field_type="BYTES"), - dict(name="n", type="BYTES", mode="NULLABLE", policyTags={"names": []}), + dict(name="n", type="BYTES", mode="NULLABLE"), ), ( dict(name="n", field_type="BYTES", max_length=9), - dict( - name="n", - type="BYTES", - mode="NULLABLE", - maxLength=9, - policyTags={"names": []}, - ), + dict(name="n", type="BYTES", mode="NULLABLE", maxLength=9,), ), ], ) From e52d8c8aaf616b9da00983da68bab8be9237794d Mon Sep 17 00:00:00 2001 From: "release-please[bot]" <55107282+release-please[bot]@users.noreply.github.com> Date: Mon, 27 Sep 2021 10:43:17 +0200 Subject: [PATCH 1307/2016] chore: release 2.27.0 (#965) Co-authored-by: release-please[bot] <55107282+release-please[bot]@users.noreply.github.com> --- packages/google-cloud-bigquery/CHANGELOG.md | 20 +++++++++++++++++++ .../google/cloud/bigquery/version.py | 2 +- 2 files changed, 21 insertions(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/CHANGELOG.md b/packages/google-cloud-bigquery/CHANGELOG.md index 5a3cb6bee822..80a195b1ea86 100644 --- a/packages/google-cloud-bigquery/CHANGELOG.md +++ b/packages/google-cloud-bigquery/CHANGELOG.md @@ -5,6 +5,26 @@ [1]: https://pypi.org/project/google-cloud-bigquery/#history +## [2.27.0](https://www.github.com/googleapis/python-bigquery/compare/v2.26.0...v2.27.0) (2021-09-24) + + +### Features + +* Add py.typed for PEP 561 compliance ([#976](https://www.github.com/googleapis/python-bigquery/issues/976)) ([96e6bee](https://www.github.com/googleapis/python-bigquery/commit/96e6beef3c63b663b7e5879b1458f4dd1a47a5b5)) +* include key metadata in Job representation ([#964](https://www.github.com/googleapis/python-bigquery/issues/964)) ([acca1cb](https://www.github.com/googleapis/python-bigquery/commit/acca1cb7baaa3b00508246c994ade40314d421c3)) + + +### Bug Fixes + +* Arrow extension-type metadata was not set when calling the REST API or when there are no rows ([#946](https://www.github.com/googleapis/python-bigquery/issues/946)) ([864383b](https://www.github.com/googleapis/python-bigquery/commit/864383bc01636b3774f7da194587b8b7edd0383d)) +* disambiguate missing policy tags from explicitly unset policy tags ([#983](https://www.github.com/googleapis/python-bigquery/issues/983)) ([f83c00a](https://www.github.com/googleapis/python-bigquery/commit/f83c00acead70fc0ce9959eefb133a672d816277)) +* remove default timeout ([#974](https://www.github.com/googleapis/python-bigquery/issues/974)) ([1cef0d4](https://www.github.com/googleapis/python-bigquery/commit/1cef0d4664bf448168b26487a71795144b7f4d6b)) + + +### Documentation + +* simplify destination table sample with f-strings ([#966](https://www.github.com/googleapis/python-bigquery/issues/966)) ([ab6e76f](https://www.github.com/googleapis/python-bigquery/commit/ab6e76f9489262fd9c1876a1c4f93d7e139aa999)) + ## [2.26.0](https://www.github.com/googleapis/python-bigquery/compare/v2.25.2...v2.26.0) (2021-09-01) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/version.py b/packages/google-cloud-bigquery/google/cloud/bigquery/version.py index 1f7d79ab980b..4f038c462c4c 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/version.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/version.py @@ -12,4 +12,4 @@ # See the License for the specific language governing permissions and # limitations under the License. -__version__ = "2.26.0" +__version__ = "2.27.0" From c03f4534bfc1835489d6e598fa48dff95e62fd2c Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Mon, 27 Sep 2021 14:01:04 -0500 Subject: [PATCH 1308/2016] fix: remove py.typed since package fails mypy check (#988) This reverts commit 96e6beef3c63b663b7e5879b1458f4dd1a47a5b5. --- packages/google-cloud-bigquery/google/cloud/bigquery/py.typed | 2 -- 1 file changed, 2 deletions(-) delete mode 100644 packages/google-cloud-bigquery/google/cloud/bigquery/py.typed diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/py.typed b/packages/google-cloud-bigquery/google/cloud/bigquery/py.typed deleted file mode 100644 index e73777993c34..000000000000 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/py.typed +++ /dev/null @@ -1,2 +0,0 @@ -# Marker file for PEP 561. -# The google-cloud-bigquery package uses inline types. From 217cd5e6dba96c8de381a72127a2b4033bc398fa Mon Sep 17 00:00:00 2001 From: "release-please[bot]" <55107282+release-please[bot]@users.noreply.github.com> Date: Mon, 27 Sep 2021 19:22:42 +0000 Subject: [PATCH 1309/2016] chore: release 2.27.1 (#989) :robot: I have created a release \*beep\* \*boop\* --- ### [2.27.1](https://www.github.com/googleapis/python-bigquery/compare/v2.27.0...v2.27.1) (2021-09-27) ### Bug Fixes * remove py.typed since package fails mypy check ([#988](https://www.github.com/googleapis/python-bigquery/issues/988)) ([39030f2](https://www.github.com/googleapis/python-bigquery/commit/39030f26ce081cfacd456b84694c68e3f04ed48d)) --- This PR was generated with [Release Please](https://github.com/googleapis/release-please). See [documentation](https://github.com/googleapis/release-please#release-please). --- packages/google-cloud-bigquery/CHANGELOG.md | 7 +++++++ .../google-cloud-bigquery/google/cloud/bigquery/version.py | 2 +- 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/CHANGELOG.md b/packages/google-cloud-bigquery/CHANGELOG.md index 80a195b1ea86..d531ec477741 100644 --- a/packages/google-cloud-bigquery/CHANGELOG.md +++ b/packages/google-cloud-bigquery/CHANGELOG.md @@ -5,6 +5,13 @@ [1]: https://pypi.org/project/google-cloud-bigquery/#history +### [2.27.1](https://www.github.com/googleapis/python-bigquery/compare/v2.27.0...v2.27.1) (2021-09-27) + + +### Bug Fixes + +* remove py.typed since package fails mypy check ([#988](https://www.github.com/googleapis/python-bigquery/issues/988)) ([39030f2](https://www.github.com/googleapis/python-bigquery/commit/39030f26ce081cfacd456b84694c68e3f04ed48d)) + ## [2.27.0](https://www.github.com/googleapis/python-bigquery/compare/v2.26.0...v2.27.0) (2021-09-24) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/version.py b/packages/google-cloud-bigquery/google/cloud/bigquery/version.py index 4f038c462c4c..3e5c77edee64 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/version.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/version.py @@ -12,4 +12,4 @@ # See the License for the specific language governing permissions and # limitations under the License. -__version__ = "2.27.0" +__version__ = "2.27.1" From 1508459d94d99a7823ca7561b12e0c7f3564ae83 Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Tue, 28 Sep 2021 04:44:59 -0500 Subject: [PATCH 1310/2016] docs: link to stable pandas docs (#990) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * docs: link to stable pandas docs * 🦉 Updates from OwlBot See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md * trailing slash * 🦉 Updates from OwlBot See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md Co-authored-by: Owl Bot --- packages/google-cloud-bigquery/docs/conf.py | 2 +- packages/google-cloud-bigquery/owlbot.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/packages/google-cloud-bigquery/docs/conf.py b/packages/google-cloud-bigquery/docs/conf.py index 07e5d8c307d8..32995163606e 100644 --- a/packages/google-cloud-bigquery/docs/conf.py +++ b/packages/google-cloud-bigquery/docs/conf.py @@ -366,7 +366,7 @@ "grpc": ("https://grpc.github.io/grpc/python/", None), "proto-plus": ("https://proto-plus-python.readthedocs.io/en/latest/", None), "protobuf": ("https://googleapis.dev/python/protobuf/latest/", None), - "pandas": ("http://pandas.pydata.org/pandas-docs/dev", None), + "pandas": ("http://pandas.pydata.org/pandas-docs/stable/", None), "geopandas": ("https://geopandas.org/", None), } diff --git a/packages/google-cloud-bigquery/owlbot.py b/packages/google-cloud-bigquery/owlbot.py index 09aa8ca6f33f..c39433d3ca81 100644 --- a/packages/google-cloud-bigquery/owlbot.py +++ b/packages/google-cloud-bigquery/owlbot.py @@ -98,7 +98,7 @@ microgenerator=True, split_system_tests=True, intersphinx_dependencies={ - "pandas": "http://pandas.pydata.org/pandas-docs/dev", + "pandas": "http://pandas.pydata.org/pandas-docs/stable/", "geopandas": "https://geopandas.org/", }, ) From 678bfd108091e9689453c29cdbf3f874ea0615b9 Mon Sep 17 00:00:00 2001 From: Tres Seaver Date: Tue, 28 Sep 2021 06:10:27 -0400 Subject: [PATCH 1311/2016] chore: exclude 'CODEOWNERS' from templated files (#986) See: https://github.com/googleapis/synthtool/pull/1201 --- packages/google-cloud-bigquery/owlbot.py | 1 + 1 file changed, 1 insertion(+) diff --git a/packages/google-cloud-bigquery/owlbot.py b/packages/google-cloud-bigquery/owlbot.py index c39433d3ca81..ace99e687a21 100644 --- a/packages/google-cloud-bigquery/owlbot.py +++ b/packages/google-cloud-bigquery/owlbot.py @@ -110,6 +110,7 @@ "noxfile.py", "docs/multiprocessing.rst", ".coveragerc", + ".github/CODEOWNERS", # Include custom SNIPPETS_TESTS job for performance. # https://github.com/googleapis/python-bigquery/issues/191 ".kokoro/presubmit/presubmit.cfg", From 64de5093d95f126c3e21eade14e18ba8aba7c971 Mon Sep 17 00:00:00 2001 From: WhiteSource Renovate Date: Tue, 28 Sep 2021 17:57:00 +0200 Subject: [PATCH 1312/2016] chore(deps): update all dependencies (#963) --- .../samples/geography/requirements.txt | 20 +++++++++---------- .../samples/snippets/requirements.txt | 6 +++--- 2 files changed, 13 insertions(+), 13 deletions(-) diff --git a/packages/google-cloud-bigquery/samples/geography/requirements.txt b/packages/google-cloud-bigquery/samples/geography/requirements.txt index a4b4ed692302..f808c5ec2949 100644 --- a/packages/google-cloud-bigquery/samples/geography/requirements.txt +++ b/packages/google-cloud-bigquery/samples/geography/requirements.txt @@ -2,7 +2,7 @@ attrs==21.2.0 cachetools==4.2.2 certifi==2021.5.30 cffi==1.14.6 -charset-normalizer==2.0.4 +charset-normalizer==2.0.6 click==8.0.1 click-plugins==1.1.1 cligj==0.7.2 @@ -11,17 +11,17 @@ Fiona==1.8.20 geojson==2.5.0 geopandas==0.9.0 google-api-core==2.0.1 -google-auth==2.0.2 -google-cloud-bigquery==2.26.0 -google-cloud-bigquery-storage==2.7.0 +google-auth==2.2.0 +google-cloud-bigquery==2.27.1 +google-cloud-bigquery-storage==2.9.0 google-cloud-core==2.0.0 -google-crc32c==1.1.2 -google-resumable-media==2.0.2 +google-crc32c==1.2.0 +google-resumable-media==2.0.3 googleapis-common-protos==1.53.0 -grpcio==1.40.0 +grpcio==1.41.0 idna==3.2 importlib-metadata==4.8.1 -libcst==0.3.20 +libcst==0.3.21 munch==2.5.0 mypy-extensions==0.4.3 numpy==1.19.5; python_version < "3.7" @@ -30,7 +30,7 @@ packaging==21.0 pandas==1.1.5; python_version < '3.7' pandas==1.3.2; python_version >= '3.7' proto-plus==1.19.0 -protobuf==3.17.3 +protobuf==3.18.0 pyarrow==5.0.0 pyasn1==0.4.8 pyasn1-modules==0.2.8 @@ -47,5 +47,5 @@ Shapely==1.7.1 six==1.16.0 typing-extensions==3.10.0.2 typing-inspect==0.7.1 -urllib3==1.26.6 +urllib3==1.26.7 zipp==3.5.0 diff --git a/packages/google-cloud-bigquery/samples/snippets/requirements.txt b/packages/google-cloud-bigquery/samples/snippets/requirements.txt index f575e41b1ac3..f9b9d023cce9 100644 --- a/packages/google-cloud-bigquery/samples/snippets/requirements.txt +++ b/packages/google-cloud-bigquery/samples/snippets/requirements.txt @@ -1,7 +1,7 @@ -google-cloud-bigquery==2.26.0 -google-cloud-bigquery-storage==2.7.0 +google-cloud-bigquery==2.27.1 +google-cloud-bigquery-storage==2.9.0 google-auth-oauthlib==0.4.6 -grpcio==1.40.0 +grpcio==1.41.0 ipython==7.16.1; python_version < '3.7' ipython==7.17.0; python_version >= '3.7' matplotlib==3.3.4; python_version < '3.7' From bc4ce5b6486a55587b1b5f0e91cde8733a95db94 Mon Sep 17 00:00:00 2001 From: WhiteSource Renovate Date: Wed, 29 Sep 2021 16:59:37 +0200 Subject: [PATCH 1313/2016] chore(deps): update all dependencies (#992) --- .../google-cloud-bigquery/samples/geography/requirements.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/packages/google-cloud-bigquery/samples/geography/requirements.txt b/packages/google-cloud-bigquery/samples/geography/requirements.txt index f808c5ec2949..035b652a8bbc 100644 --- a/packages/google-cloud-bigquery/samples/geography/requirements.txt +++ b/packages/google-cloud-bigquery/samples/geography/requirements.txt @@ -11,7 +11,7 @@ Fiona==1.8.20 geojson==2.5.0 geopandas==0.9.0 google-api-core==2.0.1 -google-auth==2.2.0 +google-auth==2.2.1 google-cloud-bigquery==2.27.1 google-cloud-bigquery-storage==2.9.0 google-cloud-core==2.0.0 @@ -48,4 +48,4 @@ six==1.16.0 typing-extensions==3.10.0.2 typing-inspect==0.7.1 urllib3==1.26.7 -zipp==3.5.0 +zipp==3.5.1 From 0b9c76785f36fd7f378131493f81207b487b5de4 Mon Sep 17 00:00:00 2001 From: "gcf-owl-bot[bot]" <78513119+gcf-owl-bot[bot]@users.noreply.github.com> Date: Thu, 30 Sep 2021 15:44:15 +0000 Subject: [PATCH 1314/2016] chore: fail samples nox session if python version is missing (#997) --- packages/google-cloud-bigquery/.github/.OwlBot.lock.yaml | 2 +- packages/google-cloud-bigquery/samples/geography/noxfile.py | 4 ++++ packages/google-cloud-bigquery/samples/snippets/noxfile.py | 4 ++++ 3 files changed, 9 insertions(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/.github/.OwlBot.lock.yaml b/packages/google-cloud-bigquery/.github/.OwlBot.lock.yaml index 2567653c000d..ae6c57fad807 100644 --- a/packages/google-cloud-bigquery/.github/.OwlBot.lock.yaml +++ b/packages/google-cloud-bigquery/.github/.OwlBot.lock.yaml @@ -1,3 +1,3 @@ docker: image: gcr.io/cloud-devrel-public-resources/owlbot-python:latest - digest: sha256:87eee22d276554e4e52863ec9b1cb6a7245815dfae20439712bf644348215a5a + digest: sha256:82b12321da4446a73cb11bcb6812fbec8c105abda3946d46e6394e5fbfb64c0f diff --git a/packages/google-cloud-bigquery/samples/geography/noxfile.py b/packages/google-cloud-bigquery/samples/geography/noxfile.py index b008613f03ff..1fd8956fbf01 100644 --- a/packages/google-cloud-bigquery/samples/geography/noxfile.py +++ b/packages/google-cloud-bigquery/samples/geography/noxfile.py @@ -98,6 +98,10 @@ def get_pytest_env_vars() -> Dict[str, str]: "True", "true", ) + +# Error if a python version is missing +nox.options.error_on_missing_interpreters = True + # # Style Checks # diff --git a/packages/google-cloud-bigquery/samples/snippets/noxfile.py b/packages/google-cloud-bigquery/samples/snippets/noxfile.py index b008613f03ff..1fd8956fbf01 100644 --- a/packages/google-cloud-bigquery/samples/snippets/noxfile.py +++ b/packages/google-cloud-bigquery/samples/snippets/noxfile.py @@ -98,6 +98,10 @@ def get_pytest_env_vars() -> Dict[str, str]: "True", "true", ) + +# Error if a python version is missing +nox.options.error_on_missing_interpreters = True + # # Style Checks # From b053688524e09328ba4d9f635a02953a8ca39dfc Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Thu, 30 Sep 2021 11:53:30 -0500 Subject: [PATCH 1315/2016] feat: add `AvroOptions` to configure AVRO external data (#994) * feat: add `AvroOptions` to configure AVRO external data Also: * Unify `ExternalConfig` class to use `_properties` for everything. This does result in more code, but it should make maintenance easier as it aligns with our other mutable resource classes. * Adds `bigtable_options`, `csv_options`, and `google_sheets_options` properties. This aligns with `parquet_options`. * remove unnecessary check for options in to_api_repr * add missing tests for to_api_repr * remove redundant type identifiers --- .../docs/format_options.rst | 6 + .../google-cloud-bigquery/docs/reference.rst | 5 + .../google/cloud/bigquery/__init__.py | 2 + .../google/cloud/bigquery/external_config.py | 149 ++++++-- .../google/cloud/bigquery/format_options.py | 54 ++- .../tests/unit/test_external_config.py | 327 ++++++++++++++++-- .../tests/unit/test_format_options.py | 23 ++ 7 files changed, 518 insertions(+), 48 deletions(-) create mode 100644 packages/google-cloud-bigquery/docs/format_options.rst diff --git a/packages/google-cloud-bigquery/docs/format_options.rst b/packages/google-cloud-bigquery/docs/format_options.rst new file mode 100644 index 000000000000..b3948209ec8c --- /dev/null +++ b/packages/google-cloud-bigquery/docs/format_options.rst @@ -0,0 +1,6 @@ +BigQuery Format Options +======================= + +.. automodule:: google.cloud.bigquery.format_options + :members: + :undoc-members: diff --git a/packages/google-cloud-bigquery/docs/reference.rst b/packages/google-cloud-bigquery/docs/reference.rst index f4f11abc92dc..d2d2eed311a1 100644 --- a/packages/google-cloud-bigquery/docs/reference.rst +++ b/packages/google-cloud-bigquery/docs/reference.rst @@ -167,6 +167,11 @@ External Configuration external_config.CSVOptions external_config.GoogleSheetsOptions +.. toctree:: + :maxdepth: 2 + + format_options + Magics ====== diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/__init__.py b/packages/google-cloud-bigquery/google/cloud/bigquery/__init__.py index 931e0f3e6f7a..d2b1dd26d914 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/__init__.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/__init__.py @@ -50,6 +50,7 @@ from google.cloud.bigquery.external_config import CSVOptions from google.cloud.bigquery.external_config import GoogleSheetsOptions from google.cloud.bigquery.external_config import ExternalSourceFormat +from google.cloud.bigquery.format_options import AvroOptions from google.cloud.bigquery.format_options import ParquetOptions from google.cloud.bigquery.job import Compression from google.cloud.bigquery.job import CopyJob @@ -144,6 +145,7 @@ "PolicyTagList", "UDFResource", "ExternalConfig", + "AvroOptions", "BigtableOptions", "BigtableColumnFamily", "BigtableColumn", diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/external_config.py b/packages/google-cloud-bigquery/google/cloud/bigquery/external_config.py index f1692ba50289..5f284c639849 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/external_config.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/external_config.py @@ -22,13 +22,13 @@ import base64 import copy -from typing import FrozenSet, Iterable, Optional +from typing import FrozenSet, Iterable, Optional, Union from google.cloud.bigquery._helpers import _to_bytes from google.cloud.bigquery._helpers import _bytes_to_json from google.cloud.bigquery._helpers import _int_or_none from google.cloud.bigquery._helpers import _str_or_none -from google.cloud.bigquery.format_options import ParquetOptions +from google.cloud.bigquery.format_options import AvroOptions, ParquetOptions from google.cloud.bigquery.schema import SchemaField @@ -548,7 +548,13 @@ def from_api_repr(cls, resource: dict) -> "GoogleSheetsOptions": return config -_OPTION_CLASSES = (BigtableOptions, CSVOptions, GoogleSheetsOptions, ParquetOptions) +_OPTION_CLASSES = ( + AvroOptions, + BigtableOptions, + CSVOptions, + GoogleSheetsOptions, + ParquetOptions, +) class HivePartitioningOptions(object): @@ -646,11 +652,6 @@ class ExternalConfig(object): def __init__(self, source_format): self._properties = {"sourceFormat": source_format} - self._options = None - for optcls in _OPTION_CLASSES: - if source_format == optcls._SOURCE_FORMAT: - self._options = optcls() - break @property def source_format(self): @@ -663,9 +664,17 @@ def source_format(self): return self._properties["sourceFormat"] @property - def options(self): - """Optional[Dict[str, Any]]: Source-specific options.""" - return self._options + def options(self) -> Optional[Union[_OPTION_CLASSES]]: + """Source-specific options.""" + for optcls in _OPTION_CLASSES: + if self.source_format == optcls._SOURCE_FORMAT: + options = optcls() + self._properties.setdefault(optcls._RESOURCE_NAME, {}) + options._properties = self._properties[optcls._RESOURCE_NAME] + return options + + # No matching source format found. + return None @property def autodetect(self): @@ -815,23 +824,120 @@ def schema(self, value): self._properties["schema"] = prop @property - def parquet_options(self): - """Optional[google.cloud.bigquery.format_options.ParquetOptions]: Additional - properties to set if ``sourceFormat`` is set to PARQUET. + def avro_options(self) -> Optional[AvroOptions]: + """Additional properties to set if ``sourceFormat`` is set to AVRO. + + See: + https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#ExternalDataConfiguration.FIELDS.avro_options + """ + if self.source_format == ExternalSourceFormat.AVRO: + self._properties.setdefault(AvroOptions._RESOURCE_NAME, {}) + resource = self._properties.get(AvroOptions._RESOURCE_NAME) + if resource is None: + return None + options = AvroOptions() + options._properties = resource + return options + + @avro_options.setter + def avro_options(self, value): + if self.source_format != ExternalSourceFormat.AVRO: + msg = f"Cannot set Avro options, source format is {self.source_format}" + raise TypeError(msg) + self._properties[AvroOptions._RESOURCE_NAME] = value._properties + + @property + def bigtable_options(self) -> Optional[BigtableOptions]: + """Additional properties to set if ``sourceFormat`` is set to BIGTABLE. + + See: + https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#ExternalDataConfiguration.FIELDS.bigtable_options + """ + if self.source_format == ExternalSourceFormat.BIGTABLE: + self._properties.setdefault(BigtableOptions._RESOURCE_NAME, {}) + resource = self._properties.get(BigtableOptions._RESOURCE_NAME) + if resource is None: + return None + options = BigtableOptions() + options._properties = resource + return options + + @bigtable_options.setter + def bigtable_options(self, value): + if self.source_format != ExternalSourceFormat.BIGTABLE: + msg = f"Cannot set Bigtable options, source format is {self.source_format}" + raise TypeError(msg) + self._properties[BigtableOptions._RESOURCE_NAME] = value._properties + + @property + def csv_options(self) -> Optional[CSVOptions]: + """Additional properties to set if ``sourceFormat`` is set to CSV. + + See: + https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#ExternalDataConfiguration.FIELDS.csv_options + """ + if self.source_format == ExternalSourceFormat.CSV: + self._properties.setdefault(CSVOptions._RESOURCE_NAME, {}) + resource = self._properties.get(CSVOptions._RESOURCE_NAME) + if resource is None: + return None + options = CSVOptions() + options._properties = resource + return options + + @csv_options.setter + def csv_options(self, value): + if self.source_format != ExternalSourceFormat.CSV: + msg = f"Cannot set CSV options, source format is {self.source_format}" + raise TypeError(msg) + self._properties[CSVOptions._RESOURCE_NAME] = value._properties + + @property + def google_sheets_options(self) -> Optional[GoogleSheetsOptions]: + """Additional properties to set if ``sourceFormat`` is set to + GOOGLE_SHEETS. + + See: + https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#ExternalDataConfiguration.FIELDS.google_sheets_options + """ + if self.source_format == ExternalSourceFormat.GOOGLE_SHEETS: + self._properties.setdefault(GoogleSheetsOptions._RESOURCE_NAME, {}) + resource = self._properties.get(GoogleSheetsOptions._RESOURCE_NAME) + if resource is None: + return None + options = GoogleSheetsOptions() + options._properties = resource + return options + + @google_sheets_options.setter + def google_sheets_options(self, value): + if self.source_format != ExternalSourceFormat.GOOGLE_SHEETS: + msg = f"Cannot set Google Sheets options, source format is {self.source_format}" + raise TypeError(msg) + self._properties[GoogleSheetsOptions._RESOURCE_NAME] = value._properties + + @property + def parquet_options(self) -> Optional[ParquetOptions]: + """Additional properties to set if ``sourceFormat`` is set to PARQUET. See: https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#ExternalDataConfiguration.FIELDS.parquet_options """ - if self.source_format != ExternalSourceFormat.PARQUET: + if self.source_format == ExternalSourceFormat.PARQUET: + self._properties.setdefault(ParquetOptions._RESOURCE_NAME, {}) + resource = self._properties.get(ParquetOptions._RESOURCE_NAME) + if resource is None: return None - return self._options + options = ParquetOptions() + options._properties = resource + return options @parquet_options.setter def parquet_options(self, value): if self.source_format != ExternalSourceFormat.PARQUET: msg = f"Cannot set Parquet options, source format is {self.source_format}" raise TypeError(msg) - self._options = value + self._properties[ParquetOptions._RESOURCE_NAME] = value._properties def to_api_repr(self) -> dict: """Build an API representation of this object. @@ -841,10 +947,6 @@ def to_api_repr(self) -> dict: A dictionary in the format used by the BigQuery API. """ config = copy.deepcopy(self._properties) - if self.options is not None: - r = self.options.to_api_repr() - if r != {}: - config[self.options._RESOURCE_NAME] = r return config @classmethod @@ -862,10 +964,5 @@ def from_api_repr(cls, resource: dict) -> "ExternalConfig": ExternalConfig: Configuration parsed from ``resource``. """ config = cls(resource["sourceFormat"]) - for optcls in _OPTION_CLASSES: - opts = resource.get(optcls._RESOURCE_NAME) - if opts is not None: - config._options = optcls.from_api_repr(opts) - break config._properties = copy.deepcopy(resource) return config diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/format_options.py b/packages/google-cloud-bigquery/google/cloud/bigquery/format_options.py index 2c9a2ce20e78..1208565a9565 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/format_options.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/format_options.py @@ -13,7 +13,59 @@ # limitations under the License. import copy -from typing import Dict +from typing import Dict, Optional + + +class AvroOptions: + """Options if source format is set to AVRO.""" + + _SOURCE_FORMAT = "AVRO" + _RESOURCE_NAME = "avroOptions" + + def __init__(self): + self._properties = {} + + @property + def use_avro_logical_types(self) -> Optional[bool]: + """[Optional] If sourceFormat is set to 'AVRO', indicates whether to + interpret logical types as the corresponding BigQuery data type (for + example, TIMESTAMP), instead of using the raw type (for example, + INTEGER). + + See + https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#AvroOptions.FIELDS.use_avro_logical_types + """ + return self._properties.get("useAvroLogicalTypes") + + @use_avro_logical_types.setter + def use_avro_logical_types(self, value): + self._properties["useAvroLogicalTypes"] = value + + @classmethod + def from_api_repr(cls, resource: Dict[str, bool]) -> "AvroOptions": + """Factory: construct an instance from a resource dict. + + Args: + resource (Dict[str, bool]): + Definition of a :class:`~.format_options.AvroOptions` instance in + the same representation as is returned from the API. + + Returns: + :class:`~.format_options.AvroOptions`: + Configuration parsed from ``resource``. + """ + config = cls() + config._properties = copy.deepcopy(resource) + return config + + def to_api_repr(self) -> dict: + """Build an API representation of this object. + + Returns: + Dict[str, bool]: + A dictionary in the format used by the BigQuery API. + """ + return copy.deepcopy(self._properties) class ParquetOptions: diff --git a/packages/google-cloud-bigquery/tests/unit/test_external_config.py b/packages/google-cloud-bigquery/tests/unit/test_external_config.py index 3dc9dd179b65..3ef61d738345 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_external_config.py +++ b/packages/google-cloud-bigquery/tests/unit/test_external_config.py @@ -163,7 +163,7 @@ def test_to_api_repr_sheets(self): options = external_config.GoogleSheetsOptions() options.skip_leading_rows = 123 options.range = "Sheet1!A5:B10" - ec._options = options + ec.google_sheets_options = options exp_resource = { "sourceFormat": "GOOGLE_SHEETS", @@ -277,7 +277,7 @@ def test_to_api_repr_csv(self): options.quote_character = "quote" options.skip_leading_rows = 123 options.allow_jagged_rows = False - ec._options = options + ec.csv_options = options exp_resource = { "sourceFormat": "CSV", @@ -368,7 +368,7 @@ def test_to_api_repr_bigtable(self): options = external_config.BigtableOptions() options.ignore_unspecified_column_families = True options.read_rowkey_as_string = False - ec._options = options + ec.bigtable_options = options fam1 = external_config.BigtableColumnFamily() fam1.family_id = "familyId" @@ -425,10 +425,166 @@ def test_to_api_repr_bigtable(self): self.assertEqual(got_resource, exp_resource) - def test_parquet_options_getter(self): + def test_avro_options_getter_and_setter(self): + from google.cloud.bigquery.external_config import AvroOptions + + options = AvroOptions.from_api_repr({"useAvroLogicalTypes": True}) + ec = external_config.ExternalConfig(external_config.ExternalSourceFormat.AVRO) + + self.assertIsNone(ec.avro_options.use_avro_logical_types) + + ec.avro_options = options + + self.assertTrue(ec.avro_options.use_avro_logical_types) + self.assertIs( + ec.options._properties, ec._properties[AvroOptions._RESOURCE_NAME] + ) + self.assertIs( + ec.avro_options._properties, ec._properties[AvroOptions._RESOURCE_NAME] + ) + + def test_avro_options_getter_empty(self): + ec = external_config.ExternalConfig(external_config.ExternalSourceFormat.AVRO) + self.assertIsNotNone(ec.avro_options) + + def test_avro_options_getter_wrong_format(self): + ec = external_config.ExternalConfig(external_config.ExternalSourceFormat.CSV) + self.assertIsNone(ec.avro_options) + + def test_avro_options_setter_wrong_format(self): + from google.cloud.bigquery.format_options import AvroOptions + + options = AvroOptions() + ec = external_config.ExternalConfig(external_config.ExternalSourceFormat.CSV) + + with self.assertRaisesRegex(TypeError, "Cannot set.*source format is CSV"): + ec.avro_options = options + + def test_bigtable_options_getter_and_setter(self): + from google.cloud.bigquery.external_config import BigtableOptions + + options = BigtableOptions.from_api_repr( + {"ignoreUnspecifiedColumnFamilies": True, "readRowkeyAsString": False} + ) + ec = external_config.ExternalConfig( + external_config.ExternalSourceFormat.BIGTABLE + ) + + self.assertIsNone(ec.bigtable_options.ignore_unspecified_column_families) + self.assertIsNone(ec.bigtable_options.read_rowkey_as_string) + + ec.bigtable_options = options + + self.assertTrue(ec.bigtable_options.ignore_unspecified_column_families) + self.assertFalse(ec.bigtable_options.read_rowkey_as_string) + self.assertIs( + ec.options._properties, ec._properties[BigtableOptions._RESOURCE_NAME] + ) + self.assertIs( + ec.bigtable_options._properties, + ec._properties[BigtableOptions._RESOURCE_NAME], + ) + + def test_bigtable_options_getter_empty(self): + ec = external_config.ExternalConfig( + external_config.ExternalSourceFormat.BIGTABLE + ) + self.assertIsNotNone(ec.bigtable_options) + + def test_bigtable_options_getter_wrong_format(self): + ec = external_config.ExternalConfig(external_config.ExternalSourceFormat.CSV) + self.assertIsNone(ec.bigtable_options) + + def test_bigtable_options_setter_wrong_format(self): + from google.cloud.bigquery.external_config import BigtableOptions + + options = BigtableOptions() + ec = external_config.ExternalConfig(external_config.ExternalSourceFormat.CSV) + + with self.assertRaisesRegex(TypeError, "Cannot set.*source format is CSV"): + ec.bigtable_options = options + + def test_csv_options_getter_and_setter(self): + from google.cloud.bigquery.external_config import CSVOptions + + options = CSVOptions.from_api_repr( + {"allowJaggedRows": True, "allowQuotedNewlines": False} + ) + ec = external_config.ExternalConfig(external_config.ExternalSourceFormat.CSV) + + self.assertIsNone(ec.csv_options.allow_jagged_rows) + self.assertIsNone(ec.csv_options.allow_quoted_newlines) + + ec.csv_options = options + + self.assertTrue(ec.csv_options.allow_jagged_rows) + self.assertFalse(ec.csv_options.allow_quoted_newlines) + self.assertIs(ec.options._properties, ec._properties[CSVOptions._RESOURCE_NAME]) + self.assertIs( + ec.csv_options._properties, ec._properties[CSVOptions._RESOURCE_NAME] + ) + + def test_csv_options_getter_empty(self): + ec = external_config.ExternalConfig(external_config.ExternalSourceFormat.CSV) + self.assertIsNotNone(ec.csv_options) + + def test_csv_options_getter_wrong_format(self): + ec = external_config.ExternalConfig(external_config.ExternalSourceFormat.AVRO) + self.assertIsNone(ec.csv_options) + + def test_csv_options_setter_wrong_format(self): + from google.cloud.bigquery.external_config import CSVOptions + + options = CSVOptions() + ec = external_config.ExternalConfig(external_config.ExternalSourceFormat.AVRO) + + with self.assertRaisesRegex(TypeError, "Cannot set.*source format is AVRO"): + ec.csv_options = options + + def test_google_sheets_options_getter_and_setter(self): + from google.cloud.bigquery.external_config import GoogleSheetsOptions + + options = GoogleSheetsOptions.from_api_repr({"skipLeadingRows": "123"}) + ec = external_config.ExternalConfig( + external_config.ExternalSourceFormat.GOOGLE_SHEETS + ) + + self.assertIsNone(ec.google_sheets_options.skip_leading_rows) + + ec.google_sheets_options = options + + self.assertEqual(ec.google_sheets_options.skip_leading_rows, 123) + self.assertIs( + ec.options._properties, ec._properties[GoogleSheetsOptions._RESOURCE_NAME] + ) + self.assertIs( + ec.google_sheets_options._properties, + ec._properties[GoogleSheetsOptions._RESOURCE_NAME], + ) + + def test_google_sheets_options_getter_empty(self): + ec = external_config.ExternalConfig( + external_config.ExternalSourceFormat.GOOGLE_SHEETS + ) + self.assertIsNotNone(ec.google_sheets_options) + + def test_google_sheets_options_getter_wrong_format(self): + ec = external_config.ExternalConfig(external_config.ExternalSourceFormat.CSV) + self.assertIsNone(ec.google_sheets_options) + + def test_google_sheets_options_setter_wrong_format(self): + from google.cloud.bigquery.external_config import GoogleSheetsOptions + + options = GoogleSheetsOptions() + ec = external_config.ExternalConfig(external_config.ExternalSourceFormat.CSV) + + with self.assertRaisesRegex(TypeError, "Cannot set.*source format is CSV"): + ec.google_sheets_options = options + + def test_parquet_options_getter_and_setter(self): from google.cloud.bigquery.format_options import ParquetOptions - parquet_options = ParquetOptions.from_api_repr( + options = ParquetOptions.from_api_repr( {"enumAsString": True, "enableListInference": False} ) ec = external_config.ExternalConfig( @@ -438,32 +594,50 @@ def test_parquet_options_getter(self): self.assertIsNone(ec.parquet_options.enum_as_string) self.assertIsNone(ec.parquet_options.enable_list_inference) - ec._options = parquet_options + ec.parquet_options = options self.assertTrue(ec.parquet_options.enum_as_string) self.assertFalse(ec.parquet_options.enable_list_inference) + self.assertIs( + ec.options._properties, ec._properties[ParquetOptions._RESOURCE_NAME] + ) + self.assertIs( + ec.parquet_options._properties, + ec._properties[ParquetOptions._RESOURCE_NAME], + ) - self.assertIs(ec.parquet_options, ec.options) - - def test_parquet_options_getter_non_parquet_format(self): - ec = external_config.ExternalConfig(external_config.ExternalSourceFormat.CSV) - self.assertIsNone(ec.parquet_options) + def test_parquet_options_set_properties(self): + """Check that setting sub-properties works without having to create a + new ParquetOptions instance. - def test_parquet_options_setter(self): - from google.cloud.bigquery.format_options import ParquetOptions + This is required for compatibility with previous + ExternalConfig._options implementation. + """ - parquet_options = ParquetOptions.from_api_repr( - {"enumAsString": False, "enableListInference": True} - ) ec = external_config.ExternalConfig( external_config.ExternalSourceFormat.PARQUET ) - ec.parquet_options = parquet_options + self.assertIsNone(ec.parquet_options.enum_as_string) + self.assertIsNone(ec.parquet_options.enable_list_inference) + + ec.parquet_options.enum_as_string = True + ec.parquet_options.enable_list_inference = False + + self.assertTrue(ec.options.enum_as_string) + self.assertFalse(ec.options.enable_list_inference) + self.assertTrue(ec.parquet_options.enum_as_string) + self.assertFalse(ec.parquet_options.enable_list_inference) + + def test_parquet_options_getter_empty(self): + ec = external_config.ExternalConfig( + external_config.ExternalSourceFormat.PARQUET + ) + self.assertIsNotNone(ec.parquet_options) - # Setting Parquet options should be reflected in the generic options attribute. - self.assertFalse(ec.options.enum_as_string) - self.assertTrue(ec.options.enable_list_inference) + def test_parquet_options_getter_non_parquet_format(self): + ec = external_config.ExternalConfig(external_config.ExternalSourceFormat.CSV) + self.assertIsNone(ec.parquet_options) def test_parquet_options_setter_non_parquet_format(self): from google.cloud.bigquery.format_options import ParquetOptions @@ -514,7 +688,7 @@ def test_to_api_repr_parquet(self): options = ParquetOptions.from_api_repr( dict(enumAsString=False, enableListInference=True) ) - ec._options = options + ec.parquet_options = options exp_resource = { "sourceFormat": external_config.ExternalSourceFormat.PARQUET, @@ -584,6 +758,117 @@ def test_to_api_repr_decimal_target_types_unset(self): ec.decimal_target_types = None # No error if unsetting when already unset. +class BigtableOptions(unittest.TestCase): + def test_to_api_repr(self): + options = external_config.BigtableOptions() + family1 = external_config.BigtableColumnFamily() + column1 = external_config.BigtableColumn() + column1.qualifier_string = "col1" + column1.field_name = "bqcol1" + column1.type_ = "FLOAT" + column1.encoding = "TEXT" + column1.only_read_latest = True + column2 = external_config.BigtableColumn() + column2.qualifier_encoded = b"col2" + column2.field_name = "bqcol2" + column2.type_ = "STRING" + column2.only_read_latest = False + family1.family_id = "family1" + family1.type_ = "INTEGER" + family1.encoding = "BINARY" + family1.columns = [column1, column2] + family1.only_read_latest = False + family2 = external_config.BigtableColumnFamily() + column3 = external_config.BigtableColumn() + column3.qualifier_string = "col3" + family2.family_id = "family2" + family2.type_ = "BYTES" + family2.encoding = "TEXT" + family2.columns = [column3] + family2.only_read_latest = True + options.column_families = [family1, family2] + options.ignore_unspecified_column_families = False + options.read_rowkey_as_string = True + + resource = options.to_api_repr() + + expected_column_families = [ + { + "familyId": "family1", + "type": "INTEGER", + "encoding": "BINARY", + "columns": [ + { + "qualifierString": "col1", + "fieldName": "bqcol1", + "type": "FLOAT", + "encoding": "TEXT", + "onlyReadLatest": True, + }, + { + "qualifierEncoded": "Y29sMg==", + "fieldName": "bqcol2", + "type": "STRING", + "onlyReadLatest": False, + }, + ], + "onlyReadLatest": False, + }, + { + "familyId": "family2", + "type": "BYTES", + "encoding": "TEXT", + "columns": [{"qualifierString": "col3"}], + "onlyReadLatest": True, + }, + ] + self.maxDiff = None + self.assertEqual( + resource, + { + "columnFamilies": expected_column_families, + "ignoreUnspecifiedColumnFamilies": False, + "readRowkeyAsString": True, + }, + ) + + +class CSVOptions(unittest.TestCase): + def test_to_api_repr(self): + options = external_config.CSVOptions() + options.field_delimiter = "\t" + options.skip_leading_rows = 42 + options.quote_character = '"' + options.allow_quoted_newlines = True + options.allow_jagged_rows = False + options.encoding = "UTF-8" + + resource = options.to_api_repr() + + self.assertEqual( + resource, + { + "fieldDelimiter": "\t", + "skipLeadingRows": "42", + "quote": '"', + "allowQuotedNewlines": True, + "allowJaggedRows": False, + "encoding": "UTF-8", + }, + ) + + +class TestGoogleSheetsOptions(unittest.TestCase): + def test_to_api_repr(self): + options = external_config.GoogleSheetsOptions() + options.range = "sheet1!A1:B20" + options.skip_leading_rows = 107 + + resource = options.to_api_repr() + + self.assertEqual(resource, {"range": "sheet1!A1:B20", "skipLeadingRows": "107"}) + + def _copy_and_update(d, u): d = copy.deepcopy(d) d.update(u) diff --git a/packages/google-cloud-bigquery/tests/unit/test_format_options.py b/packages/google-cloud-bigquery/tests/unit/test_format_options.py index ab5f9e05cbe4..c8fecbfa6692 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_format_options.py +++ b/packages/google-cloud-bigquery/tests/unit/test_format_options.py @@ -13,6 +13,29 @@ # limitations under the License. +class TestAvroOptions: + @staticmethod + def _get_target_class(): + from google.cloud.bigquery.format_options import AvroOptions + + return AvroOptions + + def test_ctor(self): + config = self._get_target_class()() + assert config.use_avro_logical_types is None + + def test_from_api_repr(self): + config = self._get_target_class().from_api_repr({"useAvroLogicalTypes": True}) + assert config.use_avro_logical_types + + def test_to_api_repr(self): + config = self._get_target_class()() + config.use_avro_logical_types = False + + result = config.to_api_repr() + assert result == {"useAvroLogicalTypes": False} + + class TestParquetOptions: @staticmethod def _get_target_class(): From 4d1ed638c4d61b5dc4917392262daeb58041c3e2 Mon Sep 17 00:00:00 2001 From: "release-please[bot]" <55107282+release-please[bot]@users.noreply.github.com> Date: Thu, 30 Sep 2021 13:09:14 -0500 Subject: [PATCH 1316/2016] chore: release 2.28.0 (#998) Co-authored-by: release-please[bot] <55107282+release-please[bot]@users.noreply.github.com> --- packages/google-cloud-bigquery/CHANGELOG.md | 12 ++++++++++++ .../google/cloud/bigquery/version.py | 2 +- 2 files changed, 13 insertions(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/CHANGELOG.md b/packages/google-cloud-bigquery/CHANGELOG.md index d531ec477741..0bb149f01d39 100644 --- a/packages/google-cloud-bigquery/CHANGELOG.md +++ b/packages/google-cloud-bigquery/CHANGELOG.md @@ -5,6 +5,18 @@ [1]: https://pypi.org/project/google-cloud-bigquery/#history +## [2.28.0](https://www.github.com/googleapis/python-bigquery/compare/v2.27.1...v2.28.0) (2021-09-30) + + +### Features + +* add `AvroOptions` to configure AVRO external data ([#994](https://www.github.com/googleapis/python-bigquery/issues/994)) ([1a9431d](https://www.github.com/googleapis/python-bigquery/commit/1a9431d9e02eeb99e4712b61c623f9cca80134a6)) + + +### Documentation + +* link to stable pandas docs ([#990](https://www.github.com/googleapis/python-bigquery/issues/990)) ([ea50e80](https://www.github.com/googleapis/python-bigquery/commit/ea50e8031fc035b3772a338bc00982de263cefad)) + ### [2.27.1](https://www.github.com/googleapis/python-bigquery/compare/v2.27.0...v2.27.1) (2021-09-27) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/version.py b/packages/google-cloud-bigquery/google/cloud/bigquery/version.py index 3e5c77edee64..10e7fb4f5ece 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/version.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/version.py @@ -12,4 +12,4 @@ # See the License for the specific language governing permissions and # limitations under the License. -__version__ = "2.27.1" +__version__ = "2.28.0" From 171fdd59d2672d1749356fada6f1f9f6e5c539cb Mon Sep 17 00:00:00 2001 From: "gcf-owl-bot[bot]" <78513119+gcf-owl-bot[bot]@users.noreply.github.com> Date: Tue, 5 Oct 2021 13:21:15 -0600 Subject: [PATCH 1317/2016] build: use trampoline_v2 for python samples and allow custom dockerfile (#1002) Source-Link: https://github.com/googleapis/synthtool/commit/a7ed11ec0863c422ba2e73aafa75eab22c32b33d Post-Processor: gcr.io/cloud-devrel-public-resources/owlbot-python:latest@sha256:6e7328583be8edd3ba8f35311c76a1ecbc823010279ccb6ab46b7a76e25eafcc Co-authored-by: Owl Bot --- .../.github/.OwlBot.lock.yaml | 2 +- .../.kokoro/samples/lint/common.cfg | 2 +- .../.kokoro/samples/python3.6/common.cfg | 2 +- .../.kokoro/samples/python3.6/periodic.cfg | 2 +- .../.kokoro/samples/python3.7/common.cfg | 2 +- .../.kokoro/samples/python3.7/periodic.cfg | 2 +- .../.kokoro/samples/python3.8/common.cfg | 2 +- .../.kokoro/samples/python3.8/periodic.cfg | 2 +- .../.kokoro/samples/python3.9/common.cfg | 2 +- .../.kokoro/samples/python3.9/periodic.cfg | 2 +- .../.kokoro/test-samples-against-head.sh | 2 -- .../.kokoro/test-samples.sh | 2 -- packages/google-cloud-bigquery/.trampolinerc | 17 ++++++++++++++--- 13 files changed, 24 insertions(+), 17 deletions(-) diff --git a/packages/google-cloud-bigquery/.github/.OwlBot.lock.yaml b/packages/google-cloud-bigquery/.github/.OwlBot.lock.yaml index ae6c57fad807..ee94722ab57b 100644 --- a/packages/google-cloud-bigquery/.github/.OwlBot.lock.yaml +++ b/packages/google-cloud-bigquery/.github/.OwlBot.lock.yaml @@ -1,3 +1,3 @@ docker: image: gcr.io/cloud-devrel-public-resources/owlbot-python:latest - digest: sha256:82b12321da4446a73cb11bcb6812fbec8c105abda3946d46e6394e5fbfb64c0f + digest: sha256:6e7328583be8edd3ba8f35311c76a1ecbc823010279ccb6ab46b7a76e25eafcc diff --git a/packages/google-cloud-bigquery/.kokoro/samples/lint/common.cfg b/packages/google-cloud-bigquery/.kokoro/samples/lint/common.cfg index 3e41df313979..153746cccae7 100644 --- a/packages/google-cloud-bigquery/.kokoro/samples/lint/common.cfg +++ b/packages/google-cloud-bigquery/.kokoro/samples/lint/common.cfg @@ -31,4 +31,4 @@ gfile_resources: "/bigstore/cloud-devrel-kokoro-resources/python-docs-samples" gfile_resources: "/bigstore/cloud-devrel-kokoro-resources/trampoline" # Use the trampoline script to run in docker. -build_file: "python-bigquery/.kokoro/trampoline.sh" \ No newline at end of file +build_file: "python-bigquery/.kokoro/trampoline_v2.sh" \ No newline at end of file diff --git a/packages/google-cloud-bigquery/.kokoro/samples/python3.6/common.cfg b/packages/google-cloud-bigquery/.kokoro/samples/python3.6/common.cfg index f3b930960b41..20f6b9691d91 100644 --- a/packages/google-cloud-bigquery/.kokoro/samples/python3.6/common.cfg +++ b/packages/google-cloud-bigquery/.kokoro/samples/python3.6/common.cfg @@ -37,4 +37,4 @@ gfile_resources: "/bigstore/cloud-devrel-kokoro-resources/python-docs-samples" gfile_resources: "/bigstore/cloud-devrel-kokoro-resources/trampoline" # Use the trampoline script to run in docker. -build_file: "python-bigquery/.kokoro/trampoline.sh" \ No newline at end of file +build_file: "python-bigquery/.kokoro/trampoline_v2.sh" \ No newline at end of file diff --git a/packages/google-cloud-bigquery/.kokoro/samples/python3.6/periodic.cfg b/packages/google-cloud-bigquery/.kokoro/samples/python3.6/periodic.cfg index 50fec9649732..71cd1e597e38 100644 --- a/packages/google-cloud-bigquery/.kokoro/samples/python3.6/periodic.cfg +++ b/packages/google-cloud-bigquery/.kokoro/samples/python3.6/periodic.cfg @@ -3,4 +3,4 @@ env_vars: { key: "INSTALL_LIBRARY_FROM_SOURCE" value: "False" -} \ No newline at end of file +} diff --git a/packages/google-cloud-bigquery/.kokoro/samples/python3.7/common.cfg b/packages/google-cloud-bigquery/.kokoro/samples/python3.7/common.cfg index fc06545655d4..d30dc6018ebc 100644 --- a/packages/google-cloud-bigquery/.kokoro/samples/python3.7/common.cfg +++ b/packages/google-cloud-bigquery/.kokoro/samples/python3.7/common.cfg @@ -37,4 +37,4 @@ gfile_resources: "/bigstore/cloud-devrel-kokoro-resources/python-docs-samples" gfile_resources: "/bigstore/cloud-devrel-kokoro-resources/trampoline" # Use the trampoline script to run in docker. -build_file: "python-bigquery/.kokoro/trampoline.sh" \ No newline at end of file +build_file: "python-bigquery/.kokoro/trampoline_v2.sh" \ No newline at end of file diff --git a/packages/google-cloud-bigquery/.kokoro/samples/python3.7/periodic.cfg b/packages/google-cloud-bigquery/.kokoro/samples/python3.7/periodic.cfg index 50fec9649732..71cd1e597e38 100644 --- a/packages/google-cloud-bigquery/.kokoro/samples/python3.7/periodic.cfg +++ b/packages/google-cloud-bigquery/.kokoro/samples/python3.7/periodic.cfg @@ -3,4 +3,4 @@ env_vars: { key: "INSTALL_LIBRARY_FROM_SOURCE" value: "False" -} \ No newline at end of file +} diff --git a/packages/google-cloud-bigquery/.kokoro/samples/python3.8/common.cfg b/packages/google-cloud-bigquery/.kokoro/samples/python3.8/common.cfg index 2b0bf59b3ef5..46759c6d61bf 100644 --- a/packages/google-cloud-bigquery/.kokoro/samples/python3.8/common.cfg +++ b/packages/google-cloud-bigquery/.kokoro/samples/python3.8/common.cfg @@ -37,4 +37,4 @@ gfile_resources: "/bigstore/cloud-devrel-kokoro-resources/python-docs-samples" gfile_resources: "/bigstore/cloud-devrel-kokoro-resources/trampoline" # Use the trampoline script to run in docker. -build_file: "python-bigquery/.kokoro/trampoline.sh" \ No newline at end of file +build_file: "python-bigquery/.kokoro/trampoline_v2.sh" \ No newline at end of file diff --git a/packages/google-cloud-bigquery/.kokoro/samples/python3.8/periodic.cfg b/packages/google-cloud-bigquery/.kokoro/samples/python3.8/periodic.cfg index 50fec9649732..71cd1e597e38 100644 --- a/packages/google-cloud-bigquery/.kokoro/samples/python3.8/periodic.cfg +++ b/packages/google-cloud-bigquery/.kokoro/samples/python3.8/periodic.cfg @@ -3,4 +3,4 @@ env_vars: { key: "INSTALL_LIBRARY_FROM_SOURCE" value: "False" -} \ No newline at end of file +} diff --git a/packages/google-cloud-bigquery/.kokoro/samples/python3.9/common.cfg b/packages/google-cloud-bigquery/.kokoro/samples/python3.9/common.cfg index f179577a5400..58d56ce743c9 100644 --- a/packages/google-cloud-bigquery/.kokoro/samples/python3.9/common.cfg +++ b/packages/google-cloud-bigquery/.kokoro/samples/python3.9/common.cfg @@ -37,4 +37,4 @@ gfile_resources: "/bigstore/cloud-devrel-kokoro-resources/python-docs-samples" gfile_resources: "/bigstore/cloud-devrel-kokoro-resources/trampoline" # Use the trampoline script to run in docker. -build_file: "python-bigquery/.kokoro/trampoline.sh" \ No newline at end of file +build_file: "python-bigquery/.kokoro/trampoline_v2.sh" \ No newline at end of file diff --git a/packages/google-cloud-bigquery/.kokoro/samples/python3.9/periodic.cfg b/packages/google-cloud-bigquery/.kokoro/samples/python3.9/periodic.cfg index 50fec9649732..71cd1e597e38 100644 --- a/packages/google-cloud-bigquery/.kokoro/samples/python3.9/periodic.cfg +++ b/packages/google-cloud-bigquery/.kokoro/samples/python3.9/periodic.cfg @@ -3,4 +3,4 @@ env_vars: { key: "INSTALL_LIBRARY_FROM_SOURCE" value: "False" -} \ No newline at end of file +} diff --git a/packages/google-cloud-bigquery/.kokoro/test-samples-against-head.sh b/packages/google-cloud-bigquery/.kokoro/test-samples-against-head.sh index 689948a23005..ba3a707b040c 100755 --- a/packages/google-cloud-bigquery/.kokoro/test-samples-against-head.sh +++ b/packages/google-cloud-bigquery/.kokoro/test-samples-against-head.sh @@ -23,6 +23,4 @@ set -eo pipefail # Enables `**` to include files nested inside sub-folders shopt -s globstar -cd github/python-bigquery - exec .kokoro/test-samples-impl.sh diff --git a/packages/google-cloud-bigquery/.kokoro/test-samples.sh b/packages/google-cloud-bigquery/.kokoro/test-samples.sh index 62ef534cd59e..11c042d342d7 100755 --- a/packages/google-cloud-bigquery/.kokoro/test-samples.sh +++ b/packages/google-cloud-bigquery/.kokoro/test-samples.sh @@ -24,8 +24,6 @@ set -eo pipefail # Enables `**` to include files nested inside sub-folders shopt -s globstar -cd github/python-bigquery - # Run periodic samples tests at latest release if [[ $KOKORO_BUILD_ARTIFACTS_SUBDIR = *"periodic"* ]]; then # preserving the test runner implementation. diff --git a/packages/google-cloud-bigquery/.trampolinerc b/packages/google-cloud-bigquery/.trampolinerc index 383b6ec89fbc..0eee72ab62aa 100644 --- a/packages/google-cloud-bigquery/.trampolinerc +++ b/packages/google-cloud-bigquery/.trampolinerc @@ -16,15 +16,26 @@ # Add required env vars here. required_envvars+=( - "STAGING_BUCKET" - "V2_STAGING_BUCKET" ) # Add env vars which are passed down into the container here. pass_down_envvars+=( + "NOX_SESSION" + ############### + # Docs builds + ############### "STAGING_BUCKET" "V2_STAGING_BUCKET" - "NOX_SESSION" + ################## + # Samples builds + ################## + "INSTALL_LIBRARY_FROM_SOURCE" + "RUN_TESTS_SESSION" + "BUILD_SPECIFIC_GCLOUD_PROJECT" + # Target directories. + "RUN_TESTS_DIRS" + # The nox session to run. + "RUN_TESTS_SESSION" ) # Prevent unintentional override on the default image. From 0d278830aed6571544b5e736756c800e608e477b Mon Sep 17 00:00:00 2001 From: Anthonios Partheniou Date: Wed, 6 Oct 2021 10:56:15 -0400 Subject: [PATCH 1318/2016] chore: add default_version and codeowner_team to .repo-metadata.json (#1001) Set codeowner_team to googleapis/api-bigquery as codeowner. Set default_version to v2. This change is needed for the following synthtool PRs. googleapis/synthtool#1201 googleapis/synthtool#1114 --- .../google-cloud-bigquery/.repo-metadata.json | 28 ++++++----- packages/google-cloud-bigquery/owlbot.py | 49 +------------------ 2 files changed, 16 insertions(+), 61 deletions(-) diff --git a/packages/google-cloud-bigquery/.repo-metadata.json b/packages/google-cloud-bigquery/.repo-metadata.json index f132056d5a39..124b40eb9b5f 100644 --- a/packages/google-cloud-bigquery/.repo-metadata.json +++ b/packages/google-cloud-bigquery/.repo-metadata.json @@ -1,14 +1,16 @@ { - "name": "bigquery", - "name_pretty": "Google Cloud BigQuery", - "product_documentation": "https://cloud.google.com/bigquery", - "client_documentation": "https://googleapis.dev/python/bigquery/latest", - "issue_tracker": "https://issuetracker.google.com/savedsearches/559654", - "release_level": "ga", - "language": "python", - "library_type": "GAPIC_COMBO", - "repo": "googleapis/python-bigquery", - "distribution_name": "google-cloud-bigquery", - "api_id": "bigquery.googleapis.com", - "requires_billing": false -} \ No newline at end of file + "name": "bigquery", + "name_pretty": "Google Cloud BigQuery", + "product_documentation": "https://cloud.google.com/bigquery", + "client_documentation": "https://googleapis.dev/python/bigquery/latest", + "issue_tracker": "https://issuetracker.google.com/savedsearches/559654", + "release_level": "ga", + "language": "python", + "library_type": "GAPIC_COMBO", + "repo": "googleapis/python-bigquery", + "distribution_name": "google-cloud-bigquery", + "api_id": "bigquery.googleapis.com", + "requires_billing": false, + "default_version": "v2", + "codeowner_team": "@googleapis/api-bigquery" +} diff --git a/packages/google-cloud-bigquery/owlbot.py b/packages/google-cloud-bigquery/owlbot.py index ace99e687a21..0f6f8fe99ae9 100644 --- a/packages/google-cloud-bigquery/owlbot.py +++ b/packages/google-cloud-bigquery/owlbot.py @@ -109,6 +109,7 @@ excludes=[ "noxfile.py", "docs/multiprocessing.rst", + "docs/index.rst", ".coveragerc", ".github/CODEOWNERS", # Include custom SNIPPETS_TESTS job for performance. @@ -170,52 +171,4 @@ ), ) - -# Remove the replacements below once -# https://github.com/googleapis/synthtool/pull/1188 is merged - -# Update googleapis/repo-automation-bots repo to main in .kokoro/*.sh files -s.replace( - ".kokoro/*.sh", - "repo-automation-bots/tree/master", - "repo-automation-bots/tree/main", -) - -# Customize CONTRIBUTING.rst to replace master with main -s.replace( - "CONTRIBUTING.rst", - "fetch and merge changes from upstream into master", - "fetch and merge changes from upstream into main", -) - -s.replace( - "CONTRIBUTING.rst", "git merge upstream/master", "git merge upstream/main", -) - -s.replace( - "CONTRIBUTING.rst", - """export GOOGLE_CLOUD_TESTING_BRANCH=\"master\"""", - """export GOOGLE_CLOUD_TESTING_BRANCH=\"main\"""", -) - -s.replace( - "CONTRIBUTING.rst", r"remote \(``master``\)", "remote (``main``)", -) - -s.replace( - "CONTRIBUTING.rst", "blob/master/CONTRIBUTING.rst", "blob/main/CONTRIBUTING.rst", -) - -s.replace( - "CONTRIBUTING.rst", "blob/master/noxfile.py", "blob/main/noxfile.py", -) - -s.replace( - "docs/conf.py", "master_doc", "root_doc", -) - -s.replace( - "docs/conf.py", "# The master toctree document.", "# The root toctree document.", -) - s.shell.run(["nox", "-s", "blacken"], hide_output=False) From d6e81443d5daed7308f0497f49de59c0a0380a09 Mon Sep 17 00:00:00 2001 From: WhiteSource Renovate Date: Thu, 7 Oct 2021 14:00:25 +0200 Subject: [PATCH 1319/2016] chore(deps): update all dependencies (#993) Co-authored-by: Anthonios Partheniou --- .../samples/geography/requirements.txt | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/packages/google-cloud-bigquery/samples/geography/requirements.txt b/packages/google-cloud-bigquery/samples/geography/requirements.txt index 035b652a8bbc..ecd428ab95e5 100644 --- a/packages/google-cloud-bigquery/samples/geography/requirements.txt +++ b/packages/google-cloud-bigquery/samples/geography/requirements.txt @@ -1,5 +1,5 @@ attrs==21.2.0 -cachetools==4.2.2 +cachetools==4.2.4 certifi==2021.5.30 cffi==1.14.6 charset-normalizer==2.0.6 @@ -29,7 +29,7 @@ numpy==1.21.2; python_version > "3.6" packaging==21.0 pandas==1.1.5; python_version < '3.7' pandas==1.3.2; python_version >= '3.7' -proto-plus==1.19.0 +proto-plus==1.19.2 protobuf==3.18.0 pyarrow==5.0.0 pyasn1==0.4.8 @@ -48,4 +48,4 @@ six==1.16.0 typing-extensions==3.10.0.2 typing-inspect==0.7.1 urllib3==1.26.7 -zipp==3.5.1 +zipp==3.6.0 From 219866d1395aae5b1412b307269cbbb18c115844 Mon Sep 17 00:00:00 2001 From: Judah Rand <17158624+judahrand@users.noreply.github.com> Date: Thu, 7 Oct 2021 16:28:12 +0100 Subject: [PATCH 1320/2016] fix: support ARRAY data type when loading from DataFrame with Parquet (#980) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Thank you for opening a Pull Request! Before submitting your PR, there are a few things you can do to make sure it goes smoothly: - [x] Make sure to open an issue as a [bug/issue](https://github.com/googleapis/python-bigquery/issues/new/choose) before writing your code! That way we can discuss the change, evaluate designs, and agree on the general idea - [x] Ensure the tests and linter pass - [x] Code coverage does not decrease (if any source code was changed) - [x] Appropriate docs were updated (if necessary) Fixes #19 🦕 --- .../google/cloud/bigquery/_helpers.py | 11 + .../google/cloud/bigquery/_pandas_helpers.py | 50 ++- .../google/cloud/bigquery/client.py | 72 +++-- .../tests/system/test_pandas.py | 95 +++++- .../tests/unit/test_client.py | 300 +++++++++++++++++- 5 files changed, 483 insertions(+), 45 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py b/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py index 28a76206ee97..d7189d322f20 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py @@ -107,6 +107,9 @@ def verify_version(self): class PyarrowVersions: """Version comparisons for pyarrow package.""" + # https://github.com/googleapis/python-bigquery/issues/781#issuecomment-883497414 + _PYARROW_BAD_VERSIONS = frozenset([packaging.version.Version("2.0.0")]) + def __init__(self): self._installed_version = None @@ -126,6 +129,14 @@ def installed_version(self) -> packaging.version.Version: return self._installed_version + @property + def is_bad_version(self) -> bool: + return self.installed_version in self._PYARROW_BAD_VERSIONS + + @property + def use_compliant_nested_type(self) -> bool: + return self.installed_version.major >= 4 + def try_import(self, raise_if_error: bool = False) -> Any: """Verify that a recent enough version of pyarrow extra is installed. diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/_pandas_helpers.py b/packages/google-cloud-bigquery/google/cloud/bigquery/_pandas_helpers.py index 869c0215d5b4..0cb85146949e 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/_pandas_helpers.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/_pandas_helpers.py @@ -79,8 +79,8 @@ def _to_wkb(v): _PANDAS_DTYPE_TO_BQ = { "bool": "BOOLEAN", "datetime64[ns, UTC]": "TIMESTAMP", - # BigQuery does not support uploading DATETIME values from Parquet files. - # See: https://github.com/googleapis/google-cloud-python/issues/9996 + # TODO: Update to DATETIME in V3 + # https://github.com/googleapis/python-bigquery/issues/985 "datetime64[ns]": "TIMESTAMP", "float32": "FLOAT", "float64": "FLOAT", @@ -396,7 +396,7 @@ def dataframe_to_bq_schema(dataframe, bq_schema): # column, but it was not found. if bq_schema_unused: raise ValueError( - u"bq_schema contains fields not present in dataframe: {}".format( + "bq_schema contains fields not present in dataframe: {}".format( bq_schema_unused ) ) @@ -405,7 +405,7 @@ def dataframe_to_bq_schema(dataframe, bq_schema): # pyarrow, if available. if unknown_type_fields: if not pyarrow: - msg = u"Could not determine the type of columns: {}".format( + msg = "Could not determine the type of columns: {}".format( ", ".join(field.name for field in unknown_type_fields) ) warnings.warn(msg) @@ -444,7 +444,14 @@ def augment_schema(dataframe, current_bq_schema): continue arrow_table = pyarrow.array(dataframe[field.name]) - detected_type = ARROW_SCALAR_IDS_TO_BQ.get(arrow_table.type.id) + + if pyarrow.types.is_list(arrow_table.type): + # `pyarrow.ListType` + detected_mode = "REPEATED" + detected_type = ARROW_SCALAR_IDS_TO_BQ.get(arrow_table.values.type.id) + else: + detected_mode = field.mode + detected_type = ARROW_SCALAR_IDS_TO_BQ.get(arrow_table.type.id) if detected_type is None: unknown_type_fields.append(field) @@ -453,7 +460,7 @@ def augment_schema(dataframe, current_bq_schema): new_field = schema.SchemaField( name=field.name, field_type=detected_type, - mode=field.mode, + mode=detected_mode, description=field.description, fields=field.fields, ) @@ -461,7 +468,7 @@ def augment_schema(dataframe, current_bq_schema): if unknown_type_fields: warnings.warn( - u"Pyarrow could not determine the type of columns: {}.".format( + "Pyarrow could not determine the type of columns: {}.".format( ", ".join(field.name for field in unknown_type_fields) ) ) @@ -500,7 +507,7 @@ def dataframe_to_arrow(dataframe, bq_schema): extra_fields = bq_field_names - column_and_index_names if extra_fields: raise ValueError( - u"bq_schema contains fields not present in dataframe: {}".format( + "bq_schema contains fields not present in dataframe: {}".format( extra_fields ) ) @@ -510,7 +517,7 @@ def dataframe_to_arrow(dataframe, bq_schema): missing_fields = column_names - bq_field_names if missing_fields: raise ValueError( - u"bq_schema is missing fields from dataframe: {}".format(missing_fields) + "bq_schema is missing fields from dataframe: {}".format(missing_fields) ) arrow_arrays = [] @@ -530,7 +537,13 @@ def dataframe_to_arrow(dataframe, bq_schema): return pyarrow.Table.from_arrays(arrow_arrays, names=arrow_names) -def dataframe_to_parquet(dataframe, bq_schema, filepath, parquet_compression="SNAPPY"): +def dataframe_to_parquet( + dataframe, + bq_schema, + filepath, + parquet_compression="SNAPPY", + parquet_use_compliant_nested_type=True, +): """Write dataframe as a Parquet file, according to the desired BQ schema. This function requires the :mod:`pyarrow` package. Arrow is used as an @@ -551,14 +564,29 @@ def dataframe_to_parquet(dataframe, bq_schema, filepath, parquet_compression="SN The compression codec to use by the the ``pyarrow.parquet.write_table`` serializing method. Defaults to "SNAPPY". https://arrow.apache.org/docs/python/generated/pyarrow.parquet.write_table.html#pyarrow-parquet-write-table + parquet_use_compliant_nested_type (bool): + Whether the ``pyarrow.parquet.write_table`` serializing method should write + compliant Parquet nested type (lists). Defaults to ``True``. + https://github.com/apache/parquet-format/blob/master/LogicalTypes.md#nested-types + https://arrow.apache.org/docs/python/generated/pyarrow.parquet.write_table.html#pyarrow-parquet-write-table + + This argument is ignored for ``pyarrow`` versions earlier than ``4.0.0``. """ pyarrow = _helpers.PYARROW_VERSIONS.try_import(raise_if_error=True) import pyarrow.parquet + kwargs = ( + {"use_compliant_nested_type": parquet_use_compliant_nested_type} + if _helpers.PYARROW_VERSIONS.use_compliant_nested_type + else {} + ) + bq_schema = schema._to_schema_fields(bq_schema) arrow_table = dataframe_to_arrow(dataframe, bq_schema) - pyarrow.parquet.write_table(arrow_table, filepath, compression=parquet_compression) + pyarrow.parquet.write_table( + arrow_table, filepath, compression=parquet_compression, **kwargs, + ) def _row_iterator_page_to_arrow(page, column_names, arrow_types): diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py index 47ff83c5df01..a8a1c1e16c01 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py @@ -27,19 +27,11 @@ import json import math import os -import packaging.version import tempfile from typing import Any, BinaryIO, Dict, Iterable, Optional, Sequence, Tuple, Union import uuid import warnings -try: - import pyarrow - - _PYARROW_VERSION = packaging.version.parse(pyarrow.__version__) -except ImportError: # pragma: NO COVER - pyarrow = None - from google import resumable_media # type: ignore from google.resumable_media.requests import MultipartUpload from google.resumable_media.requests import ResumableUpload @@ -103,6 +95,10 @@ from google.cloud.bigquery.table import TableListItem from google.cloud.bigquery.table import TableReference from google.cloud.bigquery.table import RowIterator +from google.cloud.bigquery.format_options import ParquetOptions +from google.cloud.bigquery import _helpers + +pyarrow = _helpers.PYARROW_VERSIONS.try_import() _DEFAULT_CHUNKSIZE = 100 * 1024 * 1024 # 100 MB @@ -128,8 +124,6 @@ # https://github.com/googleapis/python-bigquery/issues/438 _MIN_GET_QUERY_RESULTS_TIMEOUT = 120 -# https://github.com/googleapis/python-bigquery/issues/781#issuecomment-883497414 -_PYARROW_BAD_VERSIONS = frozenset([packaging.version.Version("2.0.0")]) TIMEOUT_HEADER = "X-Server-Timeout" @@ -2469,10 +2463,10 @@ def load_table_from_dataframe( They are supported when using the PARQUET source format, but due to the way they are encoded in the ``parquet`` file, a mismatch with the existing table schema can occur, so - 100% compatibility cannot be guaranteed for REPEATED fields when + REPEATED fields are not properly supported when using ``pyarrow<4.0.0`` using the parquet format. - https://github.com/googleapis/python-bigquery/issues/17 + https://github.com/googleapis/python-bigquery/issues/19 Args: dataframe (pandas.DataFrame): @@ -2519,18 +2513,18 @@ def load_table_from_dataframe( :attr:`~google.cloud.bigquery.job.SourceFormat.PARQUET` are supported. parquet_compression (Optional[str]): - [Beta] The compression method to use if intermittently - serializing ``dataframe`` to a parquet file. - - The argument is directly passed as the ``compression`` - argument to the underlying ``pyarrow.parquet.write_table()`` - method (the default value "snappy" gets converted to uppercase). - https://arrow.apache.org/docs/python/generated/pyarrow.parquet.write_table.html#pyarrow-parquet-write-table - - If the job config schema is missing, the argument is directly - passed as the ``compression`` argument to the underlying - ``DataFrame.to_parquet()`` method. - https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.to_parquet.html#pandas.DataFrame.to_parquet + [Beta] The compression method to use if intermittently + serializing ``dataframe`` to a parquet file. + + The argument is directly passed as the ``compression`` + argument to the underlying ``pyarrow.parquet.write_table()`` + method (the default value "snappy" gets converted to uppercase). + https://arrow.apache.org/docs/python/generated/pyarrow.parquet.write_table.html#pyarrow-parquet-write-table + + If the job config schema is missing, the argument is directly + passed as the ``compression`` argument to the underlying + ``DataFrame.to_parquet()`` method. + https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.to_parquet.html#pandas.DataFrame.to_parquet timeout (Optional[float]): The number of seconds to wait for the underlying HTTP transport before using ``retry``. @@ -2562,6 +2556,16 @@ def load_table_from_dataframe( if job_config.source_format is None: # default value job_config.source_format = job.SourceFormat.PARQUET + + if ( + job_config.source_format == job.SourceFormat.PARQUET + and job_config.parquet_options is None + ): + parquet_options = ParquetOptions() + # default value + parquet_options.enable_list_inference = True + job_config.parquet_options = parquet_options + if job_config.source_format not in supported_formats: raise ValueError( "Got unexpected source_format: '{}'. Currently, only PARQUET and CSV are supported".format( @@ -2628,12 +2632,12 @@ def load_table_from_dataframe( try: if job_config.source_format == job.SourceFormat.PARQUET: - if _PYARROW_VERSION in _PYARROW_BAD_VERSIONS: + if _helpers.PYARROW_VERSIONS.is_bad_version: msg = ( "Loading dataframe data in PARQUET format with pyarrow " - f"{_PYARROW_VERSION} can result in data corruption. It is " - "therefore *strongly* advised to use a different pyarrow " - "version or a different source format. " + f"{_helpers.PYARROW_VERSIONS.installed_version} can result in data " + "corruption. It is therefore *strongly* advised to use a " + "different pyarrow version or a different source format. " "See: https://github.com/googleapis/python-bigquery/issues/781" ) warnings.warn(msg, category=RuntimeWarning) @@ -2647,9 +2651,19 @@ def load_table_from_dataframe( job_config.schema, tmppath, parquet_compression=parquet_compression, + parquet_use_compliant_nested_type=True, ) else: - dataframe.to_parquet(tmppath, compression=parquet_compression) + dataframe.to_parquet( + tmppath, + engine="pyarrow", + compression=parquet_compression, + **( + {"use_compliant_nested_type": True} + if _helpers.PYARROW_VERSIONS.use_compliant_nested_type + else {} + ), + ) else: diff --git a/packages/google-cloud-bigquery/tests/system/test_pandas.py b/packages/google-cloud-bigquery/tests/system/test_pandas.py index 93ce23481f58..1f43a369a2c2 100644 --- a/packages/google-cloud-bigquery/tests/system/test_pandas.py +++ b/packages/google-cloud-bigquery/tests/system/test_pandas.py @@ -24,6 +24,7 @@ import google.api_core.retry import pkg_resources import pytest +import numpy from google.cloud import bigquery from . import helpers @@ -84,6 +85,81 @@ def test_load_table_from_dataframe_w_automatic_schema(bigquery_client, dataset_i ("uint8_col", pandas.Series([0, 1, 2], dtype="uint8")), ("uint16_col", pandas.Series([3, 4, 5], dtype="uint16")), ("uint32_col", pandas.Series([6, 7, 8], dtype="uint32")), + ("array_bool_col", pandas.Series([[True], [False], [True]])), + ( + "array_ts_col", + pandas.Series( + [ + [ + datetime.datetime( + 2010, 1, 2, 3, 44, 50, tzinfo=datetime.timezone.utc + ), + ], + [ + datetime.datetime( + 2011, 2, 3, 14, 50, 59, tzinfo=datetime.timezone.utc + ), + ], + [ + datetime.datetime( + 2012, 3, 14, 15, 16, tzinfo=datetime.timezone.utc + ), + ], + ], + ), + ), + ( + "array_dt_col", + pandas.Series( + [ + [datetime.datetime(2010, 1, 2, 3, 44, 50)], + [datetime.datetime(2011, 2, 3, 14, 50, 59)], + [datetime.datetime(2012, 3, 14, 15, 16)], + ], + ), + ), + ( + "array_float32_col", + pandas.Series( + [numpy.array([_], dtype="float32") for _ in [1.0, 2.0, 3.0]] + ), + ), + ( + "array_float64_col", + pandas.Series( + [numpy.array([_], dtype="float64") for _ in [4.0, 5.0, 6.0]] + ), + ), + ( + "array_int8_col", + pandas.Series( + [numpy.array([_], dtype="int8") for _ in [-12, -11, -10]] + ), + ), + ( + "array_int16_col", + pandas.Series([numpy.array([_], dtype="int16") for _ in [-9, -8, -7]]), + ), + ( + "array_int32_col", + pandas.Series([numpy.array([_], dtype="int32") for _ in [-6, -5, -4]]), + ), + ( + "array_int64_col", + pandas.Series([numpy.array([_], dtype="int64") for _ in [-3, -2, -1]]), + ), + ( + "array_uint8_col", + pandas.Series([numpy.array([_], dtype="uint8") for _ in [0, 1, 2]]), + ), + ( + "array_uint16_col", + pandas.Series([numpy.array([_], dtype="uint16") for _ in [3, 4, 5]]), + ), + ( + "array_uint32_col", + pandas.Series([numpy.array([_], dtype="uint32") for _ in [6, 7, 8]]), + ), ] ) dataframe = pandas.DataFrame(df_data, columns=df_data.keys()) @@ -99,9 +175,8 @@ def test_load_table_from_dataframe_w_automatic_schema(bigquery_client, dataset_i assert tuple(table.schema) == ( bigquery.SchemaField("bool_col", "BOOLEAN"), bigquery.SchemaField("ts_col", "TIMESTAMP"), - # BigQuery does not support uploading DATETIME values from - # Parquet files. See: - # https://github.com/googleapis/google-cloud-python/issues/9996 + # TODO: Update to DATETIME in V3 + # https://github.com/googleapis/python-bigquery/issues/985 bigquery.SchemaField("dt_col", "TIMESTAMP"), bigquery.SchemaField("float32_col", "FLOAT"), bigquery.SchemaField("float64_col", "FLOAT"), @@ -112,6 +187,20 @@ def test_load_table_from_dataframe_w_automatic_schema(bigquery_client, dataset_i bigquery.SchemaField("uint8_col", "INTEGER"), bigquery.SchemaField("uint16_col", "INTEGER"), bigquery.SchemaField("uint32_col", "INTEGER"), + bigquery.SchemaField("array_bool_col", "BOOLEAN", mode="REPEATED"), + bigquery.SchemaField("array_ts_col", "TIMESTAMP", mode="REPEATED"), + # TODO: Update to DATETIME in V3 + # https://github.com/googleapis/python-bigquery/issues/985 + bigquery.SchemaField("array_dt_col", "TIMESTAMP", mode="REPEATED"), + bigquery.SchemaField("array_float32_col", "FLOAT", mode="REPEATED"), + bigquery.SchemaField("array_float64_col", "FLOAT", mode="REPEATED"), + bigquery.SchemaField("array_int8_col", "INTEGER", mode="REPEATED"), + bigquery.SchemaField("array_int16_col", "INTEGER", mode="REPEATED"), + bigquery.SchemaField("array_int32_col", "INTEGER", mode="REPEATED"), + bigquery.SchemaField("array_int64_col", "INTEGER", mode="REPEATED"), + bigquery.SchemaField("array_uint8_col", "INTEGER", mode="REPEATED"), + bigquery.SchemaField("array_uint16_col", "INTEGER", mode="REPEATED"), + bigquery.SchemaField("array_uint32_col", "INTEGER", mode="REPEATED"), ) assert table.num_rows == 3 diff --git a/packages/google-cloud-bigquery/tests/unit/test_client.py b/packages/google-cloud-bigquery/tests/unit/test_client.py index eb70470b54f6..48dacf7e2e7f 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_client.py +++ b/packages/google-cloud-bigquery/tests/unit/test_client.py @@ -57,6 +57,7 @@ from google.cloud import bigquery_v2 from google.cloud.bigquery.dataset import DatasetReference from google.cloud.bigquery.retry import DEFAULT_TIMEOUT +from google.cloud.bigquery import ParquetOptions try: from google.cloud import bigquery_storage @@ -6942,6 +6943,179 @@ def test_load_table_from_dataframe_w_custom_job_config_w_source_format(self): # the original config object should not have been modified assert job_config.to_api_repr() == original_config_copy.to_api_repr() + @unittest.skipIf(pandas is None, "Requires `pandas`") + @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") + def test_load_table_from_dataframe_w_parquet_options_none(self): + from google.cloud.bigquery.client import _DEFAULT_NUM_RETRIES + from google.cloud.bigquery import job + from google.cloud.bigquery.schema import SchemaField + + client = self._make_client() + records = [{"id": 1, "age": 100}, {"id": 2, "age": 60}] + dataframe = pandas.DataFrame(records) + + job_config = job.LoadJobConfig( + write_disposition=job.WriteDisposition.WRITE_TRUNCATE, + source_format=job.SourceFormat.PARQUET, + ) + + get_table_patch = mock.patch( + "google.cloud.bigquery.client.Client.get_table", + autospec=True, + return_value=mock.Mock( + schema=[SchemaField("id", "INTEGER"), SchemaField("age", "INTEGER")] + ), + ) + load_patch = mock.patch( + "google.cloud.bigquery.client.Client.load_table_from_file", autospec=True + ) + with load_patch as load_table_from_file, get_table_patch as get_table: + client.load_table_from_dataframe( + dataframe, self.TABLE_REF, job_config=job_config, location=self.LOCATION + ) + + # no need to fetch and inspect table schema for WRITE_TRUNCATE jobs + assert not get_table.called + + load_table_from_file.assert_called_once_with( + client, + mock.ANY, + self.TABLE_REF, + num_retries=_DEFAULT_NUM_RETRIES, + rewind=True, + size=mock.ANY, + job_id=mock.ANY, + job_id_prefix=None, + location=self.LOCATION, + project=None, + job_config=mock.ANY, + timeout=DEFAULT_TIMEOUT, + ) + + sent_config = load_table_from_file.mock_calls[0][2]["job_config"] + assert sent_config.parquet_options.enable_list_inference is True + + @unittest.skipIf(pandas is None, "Requires `pandas`") + @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") + def test_load_table_from_dataframe_w_list_inference_none(self): + from google.cloud.bigquery.client import _DEFAULT_NUM_RETRIES + from google.cloud.bigquery import job + from google.cloud.bigquery.schema import SchemaField + + client = self._make_client() + records = [{"id": 1, "age": 100}, {"id": 2, "age": 60}] + dataframe = pandas.DataFrame(records) + + parquet_options = ParquetOptions() + + job_config = job.LoadJobConfig( + write_disposition=job.WriteDisposition.WRITE_TRUNCATE, + source_format=job.SourceFormat.PARQUET, + ) + job_config.parquet_options = parquet_options + + original_config_copy = copy.deepcopy(job_config) + + get_table_patch = mock.patch( + "google.cloud.bigquery.client.Client.get_table", + autospec=True, + return_value=mock.Mock( + schema=[SchemaField("id", "INTEGER"), SchemaField("age", "INTEGER")] + ), + ) + load_patch = mock.patch( + "google.cloud.bigquery.client.Client.load_table_from_file", autospec=True + ) + with load_patch as load_table_from_file, get_table_patch as get_table: + client.load_table_from_dataframe( + dataframe, self.TABLE_REF, job_config=job_config, location=self.LOCATION + ) + + # no need to fetch and inspect table schema for WRITE_TRUNCATE jobs + assert not get_table.called + + load_table_from_file.assert_called_once_with( + client, + mock.ANY, + self.TABLE_REF, + num_retries=_DEFAULT_NUM_RETRIES, + rewind=True, + size=mock.ANY, + job_id=mock.ANY, + job_id_prefix=None, + location=self.LOCATION, + project=None, + job_config=mock.ANY, + timeout=DEFAULT_TIMEOUT, + ) + + sent_config = load_table_from_file.mock_calls[0][2]["job_config"] + assert sent_config.parquet_options.enable_list_inference is None + + # the original config object should not have been modified + assert job_config.to_api_repr() == original_config_copy.to_api_repr() + + @unittest.skipIf(pandas is None, "Requires `pandas`") + @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") + def test_load_table_from_dataframe_w_list_inference_false(self): + from google.cloud.bigquery.client import _DEFAULT_NUM_RETRIES + from google.cloud.bigquery import job + from google.cloud.bigquery.schema import SchemaField + + client = self._make_client() + records = [{"id": 1, "age": 100}, {"id": 2, "age": 60}] + dataframe = pandas.DataFrame(records) + + parquet_options = ParquetOptions() + parquet_options.enable_list_inference = False + + job_config = job.LoadJobConfig( + write_disposition=job.WriteDisposition.WRITE_TRUNCATE, + source_format=job.SourceFormat.PARQUET, + ) + job_config.parquet_options = parquet_options + + original_config_copy = copy.deepcopy(job_config) + + get_table_patch = mock.patch( + "google.cloud.bigquery.client.Client.get_table", + autospec=True, + return_value=mock.Mock( + schema=[SchemaField("id", "INTEGER"), SchemaField("age", "INTEGER")] + ), + ) + load_patch = mock.patch( + "google.cloud.bigquery.client.Client.load_table_from_file", autospec=True + ) + with load_patch as load_table_from_file, get_table_patch as get_table: + client.load_table_from_dataframe( + dataframe, self.TABLE_REF, job_config=job_config, location=self.LOCATION + ) + + # no need to fetch and inspect table schema for WRITE_TRUNCATE jobs + assert not get_table.called + + load_table_from_file.assert_called_once_with( + client, + mock.ANY, + self.TABLE_REF, + num_retries=_DEFAULT_NUM_RETRIES, + rewind=True, + size=mock.ANY, + job_id=mock.ANY, + job_id_prefix=None, + location=self.LOCATION, + project=None, + job_config=mock.ANY, + timeout=DEFAULT_TIMEOUT, + ) + + sent_config = load_table_from_file.mock_calls[0][2]["job_config"] + assert sent_config.parquet_options.enable_list_inference is False + + # the original config object should not have been modified + assert job_config.to_api_repr() == original_config_copy.to_api_repr() + @unittest.skipIf(pandas is None, "Requires `pandas`") @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_load_table_from_dataframe_w_custom_job_config_w_wrong_source_format(self): @@ -7293,6 +7467,124 @@ def test_load_table_from_dataframe_struct_fields(self): assert sent_config.source_format == job.SourceFormat.PARQUET assert sent_config.schema == schema + @unittest.skipIf(pandas is None, "Requires `pandas`") + @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") + def test_load_table_from_dataframe_array_fields(self): + """Test that a DataFrame with array columns can be uploaded correctly. + + See: https://github.com/googleapis/python-bigquery/issues/19 + """ + from google.cloud.bigquery.client import _DEFAULT_NUM_RETRIES + from google.cloud.bigquery import job + from google.cloud.bigquery.schema import SchemaField + + client = self._make_client() + + records = [(3.14, [1, 2])] + dataframe = pandas.DataFrame( + data=records, columns=["float_column", "array_column"] + ) + + schema = [ + SchemaField("float_column", "FLOAT"), + SchemaField("array_column", "INTEGER", mode="REPEATED",), + ] + job_config = job.LoadJobConfig(schema=schema) + + load_patch = mock.patch( + "google.cloud.bigquery.client.Client.load_table_from_file", autospec=True + ) + + get_table_patch = mock.patch( + "google.cloud.bigquery.client.Client.get_table", + autospec=True, + side_effect=google.api_core.exceptions.NotFound("Table not found"), + ) + + with load_patch as load_table_from_file, get_table_patch: + client.load_table_from_dataframe( + dataframe, + self.TABLE_REF, + job_config=job_config, + location=self.LOCATION, + ) + + load_table_from_file.assert_called_once_with( + client, + mock.ANY, + self.TABLE_REF, + num_retries=_DEFAULT_NUM_RETRIES, + rewind=True, + size=mock.ANY, + job_id=mock.ANY, + job_id_prefix=None, + location=self.LOCATION, + project=None, + job_config=mock.ANY, + timeout=DEFAULT_TIMEOUT, + ) + + sent_config = load_table_from_file.mock_calls[0][2]["job_config"] + assert sent_config.source_format == job.SourceFormat.PARQUET + assert sent_config.schema == schema + + @unittest.skipIf(pandas is None, "Requires `pandas`") + @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") + def test_load_table_from_dataframe_array_fields_w_auto_schema(self): + """Test that a DataFrame with array columns can be uploaded correctly. + + See: https://github.com/googleapis/python-bigquery/issues/19 + """ + from google.cloud.bigquery.client import _DEFAULT_NUM_RETRIES + from google.cloud.bigquery import job + from google.cloud.bigquery.schema import SchemaField + + client = self._make_client() + + records = [(3.14, [1, 2])] + dataframe = pandas.DataFrame( + data=records, columns=["float_column", "array_column"] + ) + + expected_schema = [ + SchemaField("float_column", "FLOAT"), + SchemaField("array_column", "INT64", mode="REPEATED",), + ] + + load_patch = mock.patch( + "google.cloud.bigquery.client.Client.load_table_from_file", autospec=True + ) + + get_table_patch = mock.patch( + "google.cloud.bigquery.client.Client.get_table", + autospec=True, + side_effect=google.api_core.exceptions.NotFound("Table not found"), + ) + + with load_patch as load_table_from_file, get_table_patch: + client.load_table_from_dataframe( + dataframe, self.TABLE_REF, location=self.LOCATION, + ) + + load_table_from_file.assert_called_once_with( + client, + mock.ANY, + self.TABLE_REF, + num_retries=_DEFAULT_NUM_RETRIES, + rewind=True, + size=mock.ANY, + job_id=mock.ANY, + job_id_prefix=None, + location=self.LOCATION, + project=None, + job_config=mock.ANY, + timeout=DEFAULT_TIMEOUT, + ) + + sent_config = load_table_from_file.mock_calls[0][2]["job_config"] + assert sent_config.source_format == job.SourceFormat.PARQUET + assert sent_config.schema == expected_schema + @unittest.skipIf(pandas is None, "Requires `pandas`") @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_load_table_from_dataframe_w_partial_schema(self): @@ -7540,9 +7832,13 @@ def test_load_table_from_dataframe_w_bad_pyarrow_issues_warning(self): records = [{"id": 1, "age": 100}, {"id": 2, "age": 60}] dataframe = pandas.DataFrame(records) + _helpers_mock = mock.MagicMock() + _helpers_mock.PYARROW_VERSIONS = mock.MagicMock() + _helpers_mock.PYARROW_VERSIONS.installed_version = packaging.version.parse( + "2.0.0" + ) # A known bad version of pyarrow. pyarrow_version_patch = mock.patch( - "google.cloud.bigquery.client._PYARROW_VERSION", - packaging.version.parse("2.0.0"), # A known bad version of pyarrow. + "google.cloud.bigquery.client._helpers", _helpers_mock ) get_table_patch = mock.patch( "google.cloud.bigquery.client.Client.get_table", From 37cc50df37c2518d664d11140b9845df7e5ef014 Mon Sep 17 00:00:00 2001 From: "release-please[bot]" <55107282+release-please[bot]@users.noreply.github.com> Date: Thu, 7 Oct 2021 12:15:50 -0500 Subject: [PATCH 1321/2016] chore: release 2.28.1 (#1007) Co-authored-by: release-please[bot] <55107282+release-please[bot]@users.noreply.github.com> --- packages/google-cloud-bigquery/CHANGELOG.md | 7 +++++++ .../google-cloud-bigquery/google/cloud/bigquery/version.py | 2 +- 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/CHANGELOG.md b/packages/google-cloud-bigquery/CHANGELOG.md index 0bb149f01d39..d15f2285182a 100644 --- a/packages/google-cloud-bigquery/CHANGELOG.md +++ b/packages/google-cloud-bigquery/CHANGELOG.md @@ -5,6 +5,13 @@ [1]: https://pypi.org/project/google-cloud-bigquery/#history +### [2.28.1](https://www.github.com/googleapis/python-bigquery/compare/v2.28.0...v2.28.1) (2021-10-07) + + +### Bug Fixes + +* support ARRAY data type when loading from DataFrame with Parquet ([#980](https://www.github.com/googleapis/python-bigquery/issues/980)) ([1e59083](https://www.github.com/googleapis/python-bigquery/commit/1e5908302d36e15442013af6f46b1c20af28255e)) + ## [2.28.0](https://www.github.com/googleapis/python-bigquery/compare/v2.27.1...v2.28.0) (2021-09-30) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/version.py b/packages/google-cloud-bigquery/google/cloud/bigquery/version.py index 10e7fb4f5ece..967959b05d09 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/version.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/version.py @@ -12,4 +12,4 @@ # See the License for the specific language governing permissions and # limitations under the License. -__version__ = "2.28.0" +__version__ = "2.28.1" From 7c8e6e00b5354e56731f5c9a2e28fa263c77472f Mon Sep 17 00:00:00 2001 From: "gcf-owl-bot[bot]" <78513119+gcf-owl-bot[bot]@users.noreply.github.com> Date: Thu, 7 Oct 2021 18:02:30 +0000 Subject: [PATCH 1322/2016] chore(python): fix formatting issue in noxfile.py.j2 (#1008) --- packages/google-cloud-bigquery/.github/.OwlBot.lock.yaml | 2 +- packages/google-cloud-bigquery/CONTRIBUTING.rst | 6 ++++-- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/packages/google-cloud-bigquery/.github/.OwlBot.lock.yaml b/packages/google-cloud-bigquery/.github/.OwlBot.lock.yaml index ee94722ab57b..76d0baa0a49d 100644 --- a/packages/google-cloud-bigquery/.github/.OwlBot.lock.yaml +++ b/packages/google-cloud-bigquery/.github/.OwlBot.lock.yaml @@ -1,3 +1,3 @@ docker: image: gcr.io/cloud-devrel-public-resources/owlbot-python:latest - digest: sha256:6e7328583be8edd3ba8f35311c76a1ecbc823010279ccb6ab46b7a76e25eafcc + digest: sha256:4370ced27a324687ede5da07132dcdc5381993502a5e8a3e31e16dc631d026f0 diff --git a/packages/google-cloud-bigquery/CONTRIBUTING.rst b/packages/google-cloud-bigquery/CONTRIBUTING.rst index 8aecf9dd2aa7..f183b63b4969 100644 --- a/packages/google-cloud-bigquery/CONTRIBUTING.rst +++ b/packages/google-cloud-bigquery/CONTRIBUTING.rst @@ -22,7 +22,7 @@ In order to add a feature: documentation. - The feature must work fully on the following CPython versions: - 3.6, 3.7, 3.8 and 3.9 on both UNIX and Windows. + 3.6, 3.7, 3.8, 3.9 and 3.10 on both UNIX and Windows. - The feature must not add unnecessary dependencies (where "unnecessary" is of course subjective, but new dependencies should @@ -72,7 +72,7 @@ We use `nox `__ to instrument our tests. - To run a single unit test:: - $ nox -s unit-3.9 -- -k + $ nox -s unit-3.10 -- -k .. note:: @@ -225,11 +225,13 @@ We support: - `Python 3.7`_ - `Python 3.8`_ - `Python 3.9`_ +- `Python 3.10`_ .. _Python 3.6: https://docs.python.org/3.6/ .. _Python 3.7: https://docs.python.org/3.7/ .. _Python 3.8: https://docs.python.org/3.8/ .. _Python 3.9: https://docs.python.org/3.9/ +.. _Python 3.10: https://docs.python.org/3.10/ Supported versions can be found in our ``noxfile.py`` `config`_. From 50bf79d46e2d6e188f867cc8b9216f8d80249e0a Mon Sep 17 00:00:00 2001 From: "gcf-owl-bot[bot]" <78513119+gcf-owl-bot[bot]@users.noreply.github.com> Date: Fri, 8 Oct 2021 18:34:14 +0000 Subject: [PATCH 1323/2016] chore(python): Add kokoro configs for python 3.10 samples testing (#1010) --- .../.github/.OwlBot.lock.yaml | 2 +- .../.kokoro/samples/python3.10/common.cfg | 40 +++++++++++++++++++ .../.kokoro/samples/python3.10/continuous.cfg | 6 +++ .../samples/python3.10/periodic-head.cfg | 11 +++++ .../.kokoro/samples/python3.10/periodic.cfg | 6 +++ .../.kokoro/samples/python3.10/presubmit.cfg | 6 +++ .../samples/geography/noxfile.py | 2 +- .../samples/snippets/noxfile.py | 2 +- 8 files changed, 72 insertions(+), 3 deletions(-) create mode 100644 packages/google-cloud-bigquery/.kokoro/samples/python3.10/common.cfg create mode 100644 packages/google-cloud-bigquery/.kokoro/samples/python3.10/continuous.cfg create mode 100644 packages/google-cloud-bigquery/.kokoro/samples/python3.10/periodic-head.cfg create mode 100644 packages/google-cloud-bigquery/.kokoro/samples/python3.10/periodic.cfg create mode 100644 packages/google-cloud-bigquery/.kokoro/samples/python3.10/presubmit.cfg diff --git a/packages/google-cloud-bigquery/.github/.OwlBot.lock.yaml b/packages/google-cloud-bigquery/.github/.OwlBot.lock.yaml index 76d0baa0a49d..7d98291cc35f 100644 --- a/packages/google-cloud-bigquery/.github/.OwlBot.lock.yaml +++ b/packages/google-cloud-bigquery/.github/.OwlBot.lock.yaml @@ -1,3 +1,3 @@ docker: image: gcr.io/cloud-devrel-public-resources/owlbot-python:latest - digest: sha256:4370ced27a324687ede5da07132dcdc5381993502a5e8a3e31e16dc631d026f0 + digest: sha256:58f73ba196b5414782605236dd0712a73541b44ff2ff4d3a36ec41092dd6fa5b diff --git a/packages/google-cloud-bigquery/.kokoro/samples/python3.10/common.cfg b/packages/google-cloud-bigquery/.kokoro/samples/python3.10/common.cfg new file mode 100644 index 000000000000..da4003d76d91 --- /dev/null +++ b/packages/google-cloud-bigquery/.kokoro/samples/python3.10/common.cfg @@ -0,0 +1,40 @@ +# Format: //devtools/kokoro/config/proto/build.proto + +# Build logs will be here +action { + define_artifacts { + regex: "**/*sponge_log.xml" + } +} + +# Specify which tests to run +env_vars: { + key: "RUN_TESTS_SESSION" + value: "py-3.10" +} + +# Declare build specific Cloud project. +env_vars: { + key: "BUILD_SPECIFIC_GCLOUD_PROJECT" + value: "python-docs-samples-tests-310" +} + +env_vars: { + key: "TRAMPOLINE_BUILD_FILE" + value: "github/python-bigquery/.kokoro/test-samples.sh" +} + +# Configure the docker image for kokoro-trampoline. +env_vars: { + key: "TRAMPOLINE_IMAGE" + value: "gcr.io/cloud-devrel-kokoro-resources/python-samples-testing-docker" +} + +# Download secrets for samples +gfile_resources: "/bigstore/cloud-devrel-kokoro-resources/python-docs-samples" + +# Download trampoline resources. +gfile_resources: "/bigstore/cloud-devrel-kokoro-resources/trampoline" + +# Use the trampoline script to run in docker. +build_file: "python-bigquery/.kokoro/trampoline_v2.sh" \ No newline at end of file diff --git a/packages/google-cloud-bigquery/.kokoro/samples/python3.10/continuous.cfg b/packages/google-cloud-bigquery/.kokoro/samples/python3.10/continuous.cfg new file mode 100644 index 000000000000..a1c8d9759c88 --- /dev/null +++ b/packages/google-cloud-bigquery/.kokoro/samples/python3.10/continuous.cfg @@ -0,0 +1,6 @@ +# Format: //devtools/kokoro/config/proto/build.proto + +env_vars: { + key: "INSTALL_LIBRARY_FROM_SOURCE" + value: "True" +} \ No newline at end of file diff --git a/packages/google-cloud-bigquery/.kokoro/samples/python3.10/periodic-head.cfg b/packages/google-cloud-bigquery/.kokoro/samples/python3.10/periodic-head.cfg new file mode 100644 index 000000000000..5aa01bab5bf3 --- /dev/null +++ b/packages/google-cloud-bigquery/.kokoro/samples/python3.10/periodic-head.cfg @@ -0,0 +1,11 @@ +# Format: //devtools/kokoro/config/proto/build.proto + +env_vars: { + key: "INSTALL_LIBRARY_FROM_SOURCE" + value: "True" +} + +env_vars: { + key: "TRAMPOLINE_BUILD_FILE" + value: "github/python-bigquery/.kokoro/test-samples-against-head.sh" +} diff --git a/packages/google-cloud-bigquery/.kokoro/samples/python3.10/periodic.cfg b/packages/google-cloud-bigquery/.kokoro/samples/python3.10/periodic.cfg new file mode 100644 index 000000000000..71cd1e597e38 --- /dev/null +++ b/packages/google-cloud-bigquery/.kokoro/samples/python3.10/periodic.cfg @@ -0,0 +1,6 @@ +# Format: //devtools/kokoro/config/proto/build.proto + +env_vars: { + key: "INSTALL_LIBRARY_FROM_SOURCE" + value: "False" +} diff --git a/packages/google-cloud-bigquery/.kokoro/samples/python3.10/presubmit.cfg b/packages/google-cloud-bigquery/.kokoro/samples/python3.10/presubmit.cfg new file mode 100644 index 000000000000..a1c8d9759c88 --- /dev/null +++ b/packages/google-cloud-bigquery/.kokoro/samples/python3.10/presubmit.cfg @@ -0,0 +1,6 @@ +# Format: //devtools/kokoro/config/proto/build.proto + +env_vars: { + key: "INSTALL_LIBRARY_FROM_SOURCE" + value: "True" +} \ No newline at end of file diff --git a/packages/google-cloud-bigquery/samples/geography/noxfile.py b/packages/google-cloud-bigquery/samples/geography/noxfile.py index 1fd8956fbf01..93a9122cc457 100644 --- a/packages/google-cloud-bigquery/samples/geography/noxfile.py +++ b/packages/google-cloud-bigquery/samples/geography/noxfile.py @@ -87,7 +87,7 @@ def get_pytest_env_vars() -> Dict[str, str]: # DO NOT EDIT - automatically generated. # All versions used to test samples. -ALL_VERSIONS = ["3.6", "3.7", "3.8", "3.9"] +ALL_VERSIONS = ["3.6", "3.7", "3.8", "3.9", "3.10"] # Any default versions that should be ignored. IGNORED_VERSIONS = TEST_CONFIG["ignored_versions"] diff --git a/packages/google-cloud-bigquery/samples/snippets/noxfile.py b/packages/google-cloud-bigquery/samples/snippets/noxfile.py index 1fd8956fbf01..93a9122cc457 100644 --- a/packages/google-cloud-bigquery/samples/snippets/noxfile.py +++ b/packages/google-cloud-bigquery/samples/snippets/noxfile.py @@ -87,7 +87,7 @@ def get_pytest_env_vars() -> Dict[str, str]: # DO NOT EDIT - automatically generated. # All versions used to test samples. -ALL_VERSIONS = ["3.6", "3.7", "3.8", "3.9"] +ALL_VERSIONS = ["3.6", "3.7", "3.8", "3.9", "3.10"] # Any default versions that should be ignored. IGNORED_VERSIONS = TEST_CONFIG["ignored_versions"] From 34a5773fa986a18e6dc03c310b6c35b9b4b6eb30 Mon Sep 17 00:00:00 2001 From: "gcf-owl-bot[bot]" <78513119+gcf-owl-bot[bot]@users.noreply.github.com> Date: Fri, 8 Oct 2021 15:23:10 -0500 Subject: [PATCH 1324/2016] docs: fix formatting of generated client docstrings (#1009) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit chore: fix docstring for first attribute of protos committer: @busunkim96 PiperOrigin-RevId: 401271153 Source-Link: https://github.com/googleapis/googleapis/commit/787f8c9a731f44e74a90b9847d48659ca9462d10 Source-Link: https://github.com/googleapis/googleapis-gen/commit/81decffe9fc72396a8153e756d1d67a6eecfd620 Copy-Tag: eyJwIjoiLmdpdGh1Yi8uT3dsQm90LnlhbWwiLCJoIjoiODFkZWNmZmU5ZmM3MjM5NmE4MTUzZTc1NmQxZDY3YTZlZWNmZDYyMCJ9 * 🦉 Updates from OwlBot See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md Co-authored-by: Owl Bot Co-authored-by: Tim Swast --- .../bigquery_v2/types/encryption_config.py | 1 + .../google/cloud/bigquery_v2/types/model.py | 30 +++++++++++++++++-- .../bigquery_v2/types/model_reference.py | 1 + .../cloud/bigquery_v2/types/standard_sql.py | 3 ++ .../bigquery_v2/types/table_reference.py | 1 + 5 files changed, 34 insertions(+), 2 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery_v2/types/encryption_config.py b/packages/google-cloud-bigquery/google/cloud/bigquery_v2/types/encryption_config.py index 4b9139733964..a95954a303aa 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery_v2/types/encryption_config.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery_v2/types/encryption_config.py @@ -25,6 +25,7 @@ class EncryptionConfiguration(proto.Message): r""" + Attributes: kms_key_name (google.protobuf.wrappers_pb2.StringValue): Optional. Describes the Cloud KMS encryption diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery_v2/types/model.py b/packages/google-cloud-bigquery/google/cloud/bigquery_v2/types/model.py index 70641840121b..6e3ca0095b23 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery_v2/types/model.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery_v2/types/model.py @@ -38,6 +38,7 @@ class Model(proto.Message): r""" + Attributes: etag (str): Output only. A hash of this resource. @@ -251,7 +252,8 @@ class FeedbackType(proto.Enum): EXPLICIT = 2 class SeasonalPeriod(proto.Message): - r""" """ + r""" + """ class SeasonalPeriodType(proto.Enum): r"""""" @@ -264,7 +266,8 @@ class SeasonalPeriodType(proto.Enum): YEARLY = 6 class KmeansEnums(proto.Message): - r""" """ + r""" + """ class KmeansInitializationMethod(proto.Enum): r"""Indicates the method used to initialize the centroids for @@ -386,6 +389,7 @@ class BinaryClassificationMetrics(proto.Message): class BinaryConfusionMatrix(proto.Message): r"""Confusion matrix for binary classification models. + Attributes: positive_class_threshold (google.protobuf.wrappers_pb2.DoubleValue): Threshold value used when computing each of @@ -464,6 +468,7 @@ class MultiClassClassificationMetrics(proto.Message): class ConfusionMatrix(proto.Message): r"""Confusion matrix for multi-class classification models. + Attributes: confidence_threshold (google.protobuf.wrappers_pb2.DoubleValue): Confidence threshold used when computing the @@ -474,6 +479,7 @@ class ConfusionMatrix(proto.Message): class Entry(proto.Message): r"""A single entry in the confusion matrix. + Attributes: predicted_label (str): The predicted label. For confidence_threshold > 0, we will @@ -491,6 +497,7 @@ class Entry(proto.Message): class Row(proto.Message): r"""A single row in the confusion matrix. + Attributes: actual_label (str): The original label of this row. @@ -525,6 +532,7 @@ class Row(proto.Message): class ClusteringMetrics(proto.Message): r"""Evaluation metrics for clustering models. + Attributes: davies_bouldin_index (google.protobuf.wrappers_pb2.DoubleValue): Davies-Bouldin index. @@ -537,6 +545,7 @@ class ClusteringMetrics(proto.Message): class Cluster(proto.Message): r"""Message containing the information about one cluster. + Attributes: centroid_id (int): Centroid id. @@ -550,6 +559,7 @@ class Cluster(proto.Message): class FeatureValue(proto.Message): r"""Representative value of a single feature within the cluster. + Attributes: feature_column (str): The feature column name. @@ -562,6 +572,7 @@ class FeatureValue(proto.Message): class CategoricalValue(proto.Message): r"""Representative value of a categorical feature. + Attributes: category_counts (Sequence[google.cloud.bigquery_v2.types.Model.ClusteringMetrics.Cluster.FeatureValue.CategoricalValue.CategoryCount]): Counts of all categories for the categorical feature. If @@ -573,6 +584,7 @@ class CategoricalValue(proto.Message): class CategoryCount(proto.Message): r"""Represents the count of a single category within the cluster. + Attributes: category (str): The name of category. @@ -668,6 +680,7 @@ class RankingMetrics(proto.Message): class ArimaForecastingMetrics(proto.Message): r"""Model evaluation metrics for ARIMA forecasting models. + Attributes: non_seasonal_order (Sequence[google.cloud.bigquery_v2.types.Model.ArimaOrder]): Non-seasonal order. @@ -857,6 +870,7 @@ class ArimaOrder(proto.Message): class ArimaFittingMetrics(proto.Message): r"""ARIMA model fitting metrics. + Attributes: log_likelihood (float): Log-likelihood. @@ -888,6 +902,7 @@ class GlobalExplanation(proto.Message): class Explanation(proto.Message): r"""Explanation for a single feature. + Attributes: feature_name (str): Full name of the feature. For non-numerical features, will @@ -910,6 +925,7 @@ class Explanation(proto.Message): class TrainingRun(proto.Message): r"""Information about a single training query run for the model. + Attributes: training_options (google.cloud.bigquery_v2.types.Model.TrainingRun.TrainingOptions): Options that were used for this training run, @@ -935,6 +951,7 @@ class TrainingRun(proto.Message): class TrainingOptions(proto.Message): r"""Options used in model training. + Attributes: max_iterations (int): The maximum number of iterations in training. @@ -1182,6 +1199,7 @@ class TrainingOptions(proto.Message): class IterationResult(proto.Message): r"""Information about a single iteration of the training run. + Attributes: index (google.protobuf.wrappers_pb2.Int32Value): Index of the iteration, 0 based. @@ -1205,6 +1223,7 @@ class IterationResult(proto.Message): class ClusterInfo(proto.Message): r"""Information about a single cluster for clustering model. + Attributes: centroid_id (int): Centroid id. @@ -1241,6 +1260,7 @@ class ArimaResult(proto.Message): class ArimaCoefficients(proto.Message): r"""Arima coefficients. + Attributes: auto_regressive_coefficients (Sequence[float]): Auto-regressive coefficients, an array of @@ -1263,6 +1283,7 @@ class ArimaCoefficients(proto.Message): class ArimaModelInfo(proto.Message): r"""Arima model information. + Attributes: non_seasonal_order (google.cloud.bigquery_v2.types.Model.ArimaOrder): Non-seasonal order. @@ -1409,6 +1430,7 @@ class ArimaModelInfo(proto.Message): class GetModelRequest(proto.Message): r""" + Attributes: project_id (str): Required. Project ID of the requested model. @@ -1425,6 +1447,7 @@ class GetModelRequest(proto.Message): class PatchModelRequest(proto.Message): r""" + Attributes: project_id (str): Required. Project ID of the model to patch. @@ -1447,6 +1470,7 @@ class PatchModelRequest(proto.Message): class DeleteModelRequest(proto.Message): r""" + Attributes: project_id (str): Required. Project ID of the model to delete. @@ -1463,6 +1487,7 @@ class DeleteModelRequest(proto.Message): class ListModelsRequest(proto.Message): r""" + Attributes: project_id (str): Required. Project ID of the models to list. @@ -1487,6 +1512,7 @@ class ListModelsRequest(proto.Message): class ListModelsResponse(proto.Message): r""" + Attributes: models (Sequence[google.cloud.bigquery_v2.types.Model]): Models in the requested dataset. Only the following fields diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery_v2/types/model_reference.py b/packages/google-cloud-bigquery/google/cloud/bigquery_v2/types/model_reference.py index a9ebad61361b..544377f61454 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery_v2/types/model_reference.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery_v2/types/model_reference.py @@ -23,6 +23,7 @@ class ModelReference(proto.Message): r"""Id path of a model. + Attributes: project_id (str): Required. The ID of the project containing diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery_v2/types/standard_sql.py b/packages/google-cloud-bigquery/google/cloud/bigquery_v2/types/standard_sql.py index 7a845fc4885e..69a221c3c733 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery_v2/types/standard_sql.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery_v2/types/standard_sql.py @@ -78,6 +78,7 @@ class TypeKind(proto.Enum): class StandardSqlField(proto.Message): r"""A field or a column. + Attributes: name (str): Optional. The name of this field. Can be @@ -96,6 +97,7 @@ class StandardSqlField(proto.Message): class StandardSqlStructType(proto.Message): r""" + Attributes: fields (Sequence[google.cloud.bigquery_v2.types.StandardSqlField]): @@ -106,6 +108,7 @@ class StandardSqlStructType(proto.Message): class StandardSqlTableType(proto.Message): r"""A table type + Attributes: columns (Sequence[google.cloud.bigquery_v2.types.StandardSqlField]): The columns in this table type diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery_v2/types/table_reference.py b/packages/google-cloud-bigquery/google/cloud/bigquery_v2/types/table_reference.py index d56e5b09fcce..da206b4d7e95 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery_v2/types/table_reference.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery_v2/types/table_reference.py @@ -23,6 +23,7 @@ class TableReference(proto.Message): r""" + Attributes: project_id (str): Required. The ID of the project containing From c3350b4be95aa0e25f54fc0827808ca2246b04c0 Mon Sep 17 00:00:00 2001 From: Steffany Brown <30247553+steffnay@users.noreply.github.com> Date: Mon, 11 Oct 2021 15:10:30 -0700 Subject: [PATCH 1325/2016] chore: add DatasetListItem type (#1017) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit _Thank_ you for opening a Pull Request! Before submitting your PR, there are a few things you can do to make sure it goes smoothly: - [ ] Make sure to open an issue as a [bug/issue](https://github.com/googleapis/python-bigquery/issues/new/choose) before writing your code! That way we can discuss the change, evaluate designs, and agree on the general idea - [ ] Ensure the tests and linter pass - [ ] Code coverage does not decrease (if any source code was changed) - [ ] Appropriate docs were updated (if necessary) Fixes #995 🦕 --- .../google/cloud/bigquery/client.py | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py index a8a1c1e16c01..9cb6af8f0485 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py @@ -549,7 +549,7 @@ def _dataset_from_arg(self, dataset): def create_dataset( self, - dataset: Union[str, Dataset, DatasetReference], + dataset: Union[str, Dataset, DatasetReference, DatasetListItem], exists_ok: bool = False, retry: retries.Retry = DEFAULT_RETRY, timeout: float = DEFAULT_TIMEOUT, @@ -679,7 +679,7 @@ def create_routine( def create_table( self, - table: Union[str, Table, TableReference], + table: Union[str, Table, TableReference, TableListItem], exists_ok: bool = False, retry: retries.Retry = DEFAULT_RETRY, timeout: float = DEFAULT_TIMEOUT, @@ -693,6 +693,7 @@ def create_table( table (Union[ \ google.cloud.bigquery.table.Table, \ google.cloud.bigquery.table.TableReference, \ + google.cloud.bigquery.table.TableListItem, \ str, \ ]): A :class:`~google.cloud.bigquery.table.Table` to create. @@ -1295,7 +1296,7 @@ def update_table( def list_models( self, - dataset: Union[Dataset, DatasetReference, str], + dataset: Union[Dataset, DatasetReference, DatasetListItem, str], max_results: int = None, page_token: str = None, retry: retries.Retry = DEFAULT_RETRY, @@ -1372,7 +1373,7 @@ def api_request(*args, **kwargs): def list_routines( self, - dataset: Union[Dataset, DatasetReference, str], + dataset: Union[Dataset, DatasetReference, DatasetListItem, str], max_results: int = None, page_token: str = None, retry: retries.Retry = DEFAULT_RETRY, @@ -1449,7 +1450,7 @@ def api_request(*args, **kwargs): def list_tables( self, - dataset: Union[Dataset, DatasetReference, str], + dataset: Union[Dataset, DatasetReference, DatasetListItem, str], max_results: int = None, page_token: str = None, retry: retries.Retry = DEFAULT_RETRY, @@ -1525,7 +1526,7 @@ def api_request(*args, **kwargs): def delete_dataset( self, - dataset: Union[Dataset, DatasetReference, str], + dataset: Union[Dataset, DatasetReference, DatasetListItem, str], delete_contents: bool = False, retry: retries.Retry = DEFAULT_RETRY, timeout: float = DEFAULT_TIMEOUT, From 6ab78c9f6bc644f2866df2b65efb3d20a55638f4 Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Tue, 12 Oct 2021 15:16:33 -0500 Subject: [PATCH 1326/2016] chore: disable required linear history on v3 branch (#1018) This should prevent conflicts when we sync v3 with main. Thank you for opening a Pull Request! Before submitting your PR, there are a few things you can do to make sure it goes smoothly: - [ ] Make sure to open an issue as a [bug/issue](https://github.com/googleapis/python-bigquery/issues/new/choose) before writing your code! That way we can discuss the change, evaluate designs, and agree on the general idea - [ ] Ensure the tests and linter pass - [ ] Code coverage does not decrease (if any source code was changed) - [ ] Appropriate docs were updated (if necessary) --- .../google-cloud-bigquery/.github/sync-repo-settings.yaml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/packages/google-cloud-bigquery/.github/sync-repo-settings.yaml b/packages/google-cloud-bigquery/.github/sync-repo-settings.yaml index 6572e59822de..01affbae53a8 100644 --- a/packages/google-cloud-bigquery/.github/sync-repo-settings.yaml +++ b/packages/google-cloud-bigquery/.github/sync-repo-settings.yaml @@ -1,9 +1,12 @@ # https://github.com/googleapis/repo-automation-bots/tree/main/packages/sync-repo-settings +# Allow merge commits to sync main and v3 with fewer conflicts. +mergeCommitAllowed: true # Rules for main branch protection branchProtectionRules: # Identifies the protection rule pattern. Name of the branch to be protected. # Defaults to `main` - pattern: main + requiresLinearHistory: true requiresCodeOwnerReviews: true requiresStrictStatusChecks: true requiredStatusCheckContexts: @@ -15,6 +18,7 @@ branchProtectionRules: - 'Samples - Python 3.7' - 'Samples - Python 3.8' - pattern: v3 + requiresLinearHistory: false requiresCodeOwnerReviews: true requiresStrictStatusChecks: true requiredStatusCheckContexts: From 4ae4d66f68cd77cfe19871c398ed9635335edd21 Mon Sep 17 00:00:00 2001 From: Steffany Brown <30247553+steffnay@users.noreply.github.com> Date: Wed, 13 Oct 2021 13:17:26 -0700 Subject: [PATCH 1327/2016] feat: allow queryJob.result() to be called on a dryRun (#1015) * feat: allow queryJob.result() to be called on a dryRun * update to return EmptyRowIterator --- .../google/cloud/bigquery/job/query.py | 2 ++ .../tests/unit/job/test_query.py | 14 ++++++++++++++ 2 files changed, 16 insertions(+) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/job/query.py b/packages/google-cloud-bigquery/google/cloud/bigquery/job/query.py index 0cb4798be07b..d9c796cf7326 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/job/query.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/job/query.py @@ -1318,6 +1318,8 @@ def result( If Non-``None`` and non-default ``job_retry`` is provided and the job is not retryable. """ + if self.dry_run: + return _EmptyRowIterator() try: retry_do_query = getattr(self, "_retry_do_query", None) if retry_do_query is not None: diff --git a/packages/google-cloud-bigquery/tests/unit/job/test_query.py b/packages/google-cloud-bigquery/tests/unit/job/test_query.py index 4c598d797bcd..8c0b944b0087 100644 --- a/packages/google-cloud-bigquery/tests/unit/job/test_query.py +++ b/packages/google-cloud-bigquery/tests/unit/job/test_query.py @@ -26,6 +26,7 @@ from google.cloud.bigquery.client import _LIST_ROWS_FROM_QUERY_RESULTS_FIELDS import google.cloud.bigquery.query +from google.cloud.bigquery.table import _EmptyRowIterator from ..helpers import make_connection @@ -989,6 +990,19 @@ def test_result(self): [query_results_call, query_results_call, reload_call, query_page_call] ) + def test_result_dry_run(self): + job_resource = self._make_resource(started=True, location="EU") + job_resource["configuration"]["dryRun"] = True + conn = make_connection() + client = _make_client(self.PROJECT, connection=conn) + job = self._get_target_class().from_api_repr(job_resource, client) + + result = job.result() + + calls = conn.api_request.mock_calls + self.assertIsInstance(result, _EmptyRowIterator) + self.assertEqual(calls, []) + def test_result_with_done_job_calls_get_query_results(self): query_resource_done = { "jobComplete": True, From 0a7b3b9c8b2febf59b7014f8cbf3b8fbb8038eaf Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Thu, 14 Oct 2021 10:32:11 -0500 Subject: [PATCH 1328/2016] feat: add `QueryJob.schema` property for dry run queries (#1014) Thank you for opening a Pull Request! Before submitting your PR, there are a few things you can do to make sure it goes smoothly: - [ ] Make sure to open an issue as a [bug/issue](https://github.com/googleapis/python-bigquery/issues/new/choose) before writing your code! That way we can discuss the change, evaluate designs, and agree on the general idea - [ ] Ensure the tests and linter pass - [ ] Code coverage does not decrease (if any source code was changed) - [ ] Appropriate docs were updated (if necessary) Issue discovered while investigating what properties are needed in #967 --- .../google/cloud/bigquery/job/base.py | 4 +- .../google/cloud/bigquery/job/query.py | 24 ++++++-- .../tests/system/test_query.py | 29 ++++++++++ .../tests/unit/job/test_query.py | 56 ++++++++++++------- 4 files changed, 89 insertions(+), 24 deletions(-) create mode 100644 packages/google-cloud-bigquery/tests/system/test_query.py diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/job/base.py b/packages/google-cloud-bigquery/google/cloud/bigquery/job/base.py index 69818109201c..23c5aa8db3ca 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/job/base.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/job/base.py @@ -1005,7 +1005,9 @@ def from_api_repr(cls, resource: dict, client) -> "UnknownJob": Returns: UnknownJob: Job corresponding to the resource. """ - job_ref_properties = resource.get("jobReference", {"projectId": client.project}) + job_ref_properties = resource.get( + "jobReference", {"projectId": client.project, "jobId": None} + ) job_ref = _JobReference._from_api_repr(job_ref_properties) job = cls(job_ref, client) # Populate the job reference with the project, even if it has been diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/job/query.py b/packages/google-cloud-bigquery/google/cloud/bigquery/job/query.py index d9c796cf7326..ab24af202e89 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/job/query.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/job/query.py @@ -18,7 +18,7 @@ import copy import re import typing -from typing import Any, Dict, Optional, Union +from typing import Any, Dict, List, Optional, Union from google.api_core import exceptions from google.api_core.future import polling as polling_future @@ -38,6 +38,7 @@ from google.cloud.bigquery.query import UDFResource from google.cloud.bigquery.retry import DEFAULT_RETRY, DEFAULT_JOB_RETRY from google.cloud.bigquery.routine import RoutineReference +from google.cloud.bigquery.schema import SchemaField from google.cloud.bigquery.table import _EmptyRowIterator from google.cloud.bigquery.table import RangePartitioning from google.cloud.bigquery.table import _table_arg_to_table_ref @@ -57,6 +58,7 @@ import pyarrow from google.api_core import retry as retries from google.cloud import bigquery_storage + from google.cloud.bigquery.client import Client from google.cloud.bigquery.table import RowIterator @@ -853,7 +855,7 @@ def to_api_repr(self): } @classmethod - def from_api_repr(cls, resource: dict, client) -> "QueryJob": + def from_api_repr(cls, resource: dict, client: "Client") -> "QueryJob": """Factory: construct a job given its API representation Args: @@ -866,8 +868,10 @@ def from_api_repr(cls, resource: dict, client) -> "QueryJob": Returns: google.cloud.bigquery.job.QueryJob: Job parsed from ``resource``. """ - cls._check_resource_config(resource) - job_ref = _JobReference._from_api_repr(resource["jobReference"]) + job_ref_properties = resource.setdefault( + "jobReference", {"projectId": client.project, "jobId": None} + ) + job_ref = _JobReference._from_api_repr(job_ref_properties) job = cls(job_ref, None, client=client) job._set_properties(resource) return job @@ -887,6 +891,18 @@ def query_plan(self): plan_entries = self._job_statistics().get("queryPlan", ()) return [QueryPlanEntry.from_api_repr(entry) for entry in plan_entries] + @property + def schema(self) -> Optional[List[SchemaField]]: + """The schema of the results. + + Present only for successful dry run of non-legacy SQL queries. + """ + resource = self._job_statistics().get("schema") + if resource is None: + return None + fields = resource.get("fields", []) + return [SchemaField.from_api_repr(field) for field in fields] + @property def timeline(self): """List(TimelineEntry): Return the query execution timeline diff --git a/packages/google-cloud-bigquery/tests/system/test_query.py b/packages/google-cloud-bigquery/tests/system/test_query.py new file mode 100644 index 000000000000..24758595be81 --- /dev/null +++ b/packages/google-cloud-bigquery/tests/system/test_query.py @@ -0,0 +1,29 @@ +# Copyright 2021 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from google.cloud import bigquery + + +def test_dry_run(bigquery_client: bigquery.Client, scalars_table: str): + query_config = bigquery.QueryJobConfig() + query_config.dry_run = True + + query_string = f"SELECT * FROM {scalars_table}" + query_job = bigquery_client.query(query_string, job_config=query_config,) + + # Note: `query_job.result()` is not necessary on a dry run query. All + # necessary information is returned in the initial response. + assert query_job.dry_run is True + assert query_job.total_bytes_processed > 0 + assert len(query_job.schema) > 0 diff --git a/packages/google-cloud-bigquery/tests/unit/job/test_query.py b/packages/google-cloud-bigquery/tests/unit/job/test_query.py index 8c0b944b0087..17baacf5b5a4 100644 --- a/packages/google-cloud-bigquery/tests/unit/job/test_query.py +++ b/packages/google-cloud-bigquery/tests/unit/job/test_query.py @@ -269,25 +269,6 @@ def test_ctor_w_query_parameters(self): job = self._make_one(self.JOB_ID, self.QUERY, client, job_config=config) self.assertEqual(job.query_parameters, query_parameters) - def test_from_api_repr_missing_identity(self): - self._setUpConstants() - client = _make_client(project=self.PROJECT) - RESOURCE = {} - klass = self._get_target_class() - with self.assertRaises(KeyError): - klass.from_api_repr(RESOURCE, client=client) - - def test_from_api_repr_missing_config(self): - self._setUpConstants() - client = _make_client(project=self.PROJECT) - RESOURCE = { - "id": "%s:%s" % (self.PROJECT, self.DS_ID), - "jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID}, - } - klass = self._get_target_class() - with self.assertRaises(KeyError): - klass.from_api_repr(RESOURCE, client=client) - def test_from_api_repr_bare(self): self._setUpConstants() client = _make_client(project=self.PROJECT) @@ -1405,6 +1386,43 @@ def test_result_transport_timeout_error(self): with call_api_patch, self.assertRaises(concurrent.futures.TimeoutError): job.result(timeout=1) + def test_no_schema(self): + client = _make_client(project=self.PROJECT) + resource = {} + klass = self._get_target_class() + job = klass.from_api_repr(resource, client=client) + assert job.schema is None + + def test_schema(self): + client = _make_client(project=self.PROJECT) + resource = { + "statistics": { + "query": { + "schema": { + "fields": [ + {"mode": "NULLABLE", "name": "bool_col", "type": "BOOLEAN"}, + { + "mode": "NULLABLE", + "name": "string_col", + "type": "STRING", + }, + { + "mode": "NULLABLE", + "name": "timestamp_col", + "type": "TIMESTAMP", + }, + ] + }, + }, + }, + } + klass = self._get_target_class() + job = klass.from_api_repr(resource, client=client) + assert len(job.schema) == 3 + assert job.schema[0].field_type == "BOOLEAN" + assert job.schema[1].field_type == "STRING" + assert job.schema[2].field_type == "TIMESTAMP" + def test__begin_error(self): from google.cloud import exceptions From c74d33d33e07fba7c0c67d2a7d94dca9a098c89a Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Fri, 15 Oct 2021 15:06:13 -0500 Subject: [PATCH 1329/2016] docs: document ScriptStatistics and other missing resource classes (#1023) While implementing Sessions, I noticed several missing classes in the reference docs. Using "automodule" since experience has shown we often forget to add new classes to `docs/reference.rst`. --- .../google-cloud-bigquery/docs/job_base.rst | 5 +++++ packages/google-cloud-bigquery/docs/query.rst | 5 +++++ .../google-cloud-bigquery/docs/reference.rst | 17 ++++++++--------- .../google/cloud/bigquery/job/base.py | 12 ++++++------ 4 files changed, 24 insertions(+), 15 deletions(-) create mode 100644 packages/google-cloud-bigquery/docs/job_base.rst create mode 100644 packages/google-cloud-bigquery/docs/query.rst diff --git a/packages/google-cloud-bigquery/docs/job_base.rst b/packages/google-cloud-bigquery/docs/job_base.rst new file mode 100644 index 000000000000..f5ef06b88e47 --- /dev/null +++ b/packages/google-cloud-bigquery/docs/job_base.rst @@ -0,0 +1,5 @@ +Common Job Resource Classes +=========================== + +.. automodule:: google.cloud.bigquery.job.base + :members: diff --git a/packages/google-cloud-bigquery/docs/query.rst b/packages/google-cloud-bigquery/docs/query.rst new file mode 100644 index 000000000000..d3cb8fe83537 --- /dev/null +++ b/packages/google-cloud-bigquery/docs/query.rst @@ -0,0 +1,5 @@ +Query Resource Classes +====================== + +.. automodule:: google.cloud.bigquery.query + :members: diff --git a/packages/google-cloud-bigquery/docs/reference.rst b/packages/google-cloud-bigquery/docs/reference.rst index d2d2eed311a1..00f64746f09f 100644 --- a/packages/google-cloud-bigquery/docs/reference.rst +++ b/packages/google-cloud-bigquery/docs/reference.rst @@ -47,7 +47,6 @@ Job Classes job.CopyJob job.LoadJob job.ExtractJob - job.UnknownJob Job-Related Types ----------------- @@ -68,7 +67,11 @@ Job-Related Types job.SourceFormat job.WriteDisposition job.SchemaUpdateOption - job.TransactionInfo + +.. toctree:: + :maxdepth: 2 + + job_base Dataset @@ -134,14 +137,10 @@ Schema Query ===== -.. autosummary:: - :toctree: generated +.. toctree:: + :maxdepth: 2 - query.ArrayQueryParameter - query.ScalarQueryParameter - query.ScalarQueryParameterType - query.StructQueryParameter - query.UDFResource + query Retries diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/job/base.py b/packages/google-cloud-bigquery/google/cloud/bigquery/job/base.py index 23c5aa8db3ca..9e381ded6f5e 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/job/base.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/job/base.py @@ -19,7 +19,7 @@ import http import threading import typing -from typing import Dict, Optional +from typing import Dict, Optional, Sequence from google.api_core import exceptions import google.api_core.future.polling @@ -193,7 +193,8 @@ def parent_job_id(self): return _helpers._get_sub_prop(self._properties, ["statistics", "parentJobId"]) @property - def script_statistics(self): + def script_statistics(self) -> Optional["ScriptStatistics"]: + """Statistics for a child job of a script.""" resource = _helpers._get_sub_prop( self._properties, ["statistics", "scriptStatistics"] ) @@ -968,9 +969,8 @@ def __init__(self, resource): self._properties = resource @property - def stack_frames(self): - """List[ScriptStackFrame]: Stack trace where the current evaluation - happened. + def stack_frames(self) -> Sequence[ScriptStackFrame]: + """Stack trace where the current evaluation happened. Shows line/column/procedure name of each frame on the stack at the point where the current evaluation happened. @@ -982,7 +982,7 @@ def stack_frames(self): ] @property - def evaluation_kind(self): + def evaluation_kind(self) -> Optional[str]: """str: Indicates the type of child job. Possible values include ``STATEMENT`` and ``EXPRESSION``. From 820060727b482ee7f45a3659ed35a334bf7fcfbc Mon Sep 17 00:00:00 2001 From: "gcf-owl-bot[bot]" <78513119+gcf-owl-bot[bot]@users.noreply.github.com> Date: Mon, 25 Oct 2021 18:04:07 -0400 Subject: [PATCH 1330/2016] chore(python): push cloud library docs to staging bucket for Cloud RAD (#1030) Source-Link: https://github.com/googleapis/synthtool/commit/7fd61f8efae782a7cfcecc599faf52f9737fe584 Post-Processor: gcr.io/cloud-devrel-public-resources/owlbot-python:latest@sha256:4ee57a76a176ede9087c14330c625a71553cf9c72828b2c0ca12f5338171ba60 Co-authored-by: Owl Bot --- packages/google-cloud-bigquery/.github/.OwlBot.lock.yaml | 2 +- packages/google-cloud-bigquery/.kokoro/docs/common.cfg | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/.github/.OwlBot.lock.yaml b/packages/google-cloud-bigquery/.github/.OwlBot.lock.yaml index 7d98291cc35f..108063d4dee4 100644 --- a/packages/google-cloud-bigquery/.github/.OwlBot.lock.yaml +++ b/packages/google-cloud-bigquery/.github/.OwlBot.lock.yaml @@ -1,3 +1,3 @@ docker: image: gcr.io/cloud-devrel-public-resources/owlbot-python:latest - digest: sha256:58f73ba196b5414782605236dd0712a73541b44ff2ff4d3a36ec41092dd6fa5b + digest: sha256:4ee57a76a176ede9087c14330c625a71553cf9c72828b2c0ca12f5338171ba60 diff --git a/packages/google-cloud-bigquery/.kokoro/docs/common.cfg b/packages/google-cloud-bigquery/.kokoro/docs/common.cfg index 0c99ae611dfe..41b86fc29f15 100644 --- a/packages/google-cloud-bigquery/.kokoro/docs/common.cfg +++ b/packages/google-cloud-bigquery/.kokoro/docs/common.cfg @@ -30,6 +30,7 @@ env_vars: { env_vars: { key: "V2_STAGING_BUCKET" + # Push google cloud library docs to the Cloud RAD bucket `docs-staging-v2` value: "docs-staging-v2" } From 60f4cc95751c7d85faa157e82117bf0582d863ca Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Tue, 26 Oct 2021 11:04:13 -0500 Subject: [PATCH 1331/2016] feat: add support for INTERVAL data type to `list_rows` (#840) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * test: refactor `list_rows` tests and add test for scalars * WIP: INTERVAL support * feat: add support for INTERVAL data type to `list_rows` * fix relativedelta construction for non-microseconds * WIP: support INTERVAL query params * remove dead code * INTERVAL not supported in query parameters * revert query parameter changes * add validation error for interval * add unit tests for extreme intervals * add dateutil to intersphinx * use dictionary for intersphinx * 🦉 Updates from OwlBot See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md * 🦉 Updates from OwlBot See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md * add test case for trailing . * explicit none * 🦉 Updates from OwlBot See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md * truncate nanoseconds * use \d group for digits * use \d for consistency Co-authored-by: Owl Bot Co-authored-by: Peter Lamut --- packages/google-cloud-bigquery/docs/conf.py | 3 +- .../google/cloud/bigquery/_helpers.py | 47 +++++- .../google/cloud/bigquery/enums.py | 1 + packages/google-cloud-bigquery/owlbot.py | 7 +- packages/google-cloud-bigquery/renovate.json | 5 +- packages/google-cloud-bigquery/setup.py | 1 + .../testing/constraints-3.6.txt | 1 + .../tests/system/test_client.py | 5 - .../tests/system/test_list_rows.py | 8 + .../tests/unit/helpers/test_from_json.py | 157 ++++++++++++++++++ 10 files changed, 222 insertions(+), 13 deletions(-) create mode 100644 packages/google-cloud-bigquery/tests/unit/helpers/test_from_json.py diff --git a/packages/google-cloud-bigquery/docs/conf.py b/packages/google-cloud-bigquery/docs/conf.py index 32995163606e..0784da0b2a78 100644 --- a/packages/google-cloud-bigquery/docs/conf.py +++ b/packages/google-cloud-bigquery/docs/conf.py @@ -366,8 +366,9 @@ "grpc": ("https://grpc.github.io/grpc/python/", None), "proto-plus": ("https://proto-plus-python.readthedocs.io/en/latest/", None), "protobuf": ("https://googleapis.dev/python/protobuf/latest/", None), - "pandas": ("http://pandas.pydata.org/pandas-docs/stable/", None), + "dateutil": ("https://dateutil.readthedocs.io/en/latest/", None), "geopandas": ("https://geopandas.org/", None), + "pandas": ("https://pandas.pydata.org/pandas-docs/dev", None), } diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py b/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py index d7189d322f20..e95d38545598 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py @@ -19,8 +19,9 @@ import decimal import math import re -from typing import Any, Union +from typing import Any, Optional, Union +from dateutil import relativedelta from google.cloud._helpers import UTC from google.cloud._helpers import _date_from_iso8601_date from google.cloud._helpers import _datetime_from_microseconds @@ -45,6 +46,14 @@ re.VERBOSE, ) +# BigQuery sends INTERVAL data in "canonical format" +# https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#interval_type +_INTERVAL_PATTERN = re.compile( + r"(?P-?)(?P\d+)-(?P\d+) " + r"(?P-?\d+) " + r"(?P-?)(?P\d+):(?P\d+):(?P\d+)\.?(?P\d*)?$" +) + _MIN_PYARROW_VERSION = packaging.version.Version("3.0.0") _MIN_BQ_STORAGE_VERSION = packaging.version.Version("2.0.0") _BQ_STORAGE_OPTIONAL_READ_SESSION_VERSION = packaging.version.Version("2.6.0") @@ -191,6 +200,41 @@ def _int_from_json(value, field): return int(value) +def _interval_from_json( + value: Optional[str], field +) -> Optional[relativedelta.relativedelta]: + """Coerce 'value' to an interval, if set or not nullable.""" + if not _not_null(value, field): + return None + if value is None: + raise TypeError(f"got {value} for REQUIRED field: {repr(field)}") + + parsed = _INTERVAL_PATTERN.match(value) + if parsed is None: + raise ValueError(f"got interval: '{value}' with unexpected format") + + calendar_sign = -1 if parsed.group("calendar_sign") == "-" else 1 + years = calendar_sign * int(parsed.group("years")) + months = calendar_sign * int(parsed.group("months")) + days = int(parsed.group("days")) + time_sign = -1 if parsed.group("time_sign") == "-" else 1 + hours = time_sign * int(parsed.group("hours")) + minutes = time_sign * int(parsed.group("minutes")) + seconds = time_sign * int(parsed.group("seconds")) + fraction = parsed.group("fraction") + microseconds = time_sign * int(fraction.ljust(6, "0")[:6]) if fraction else 0 + + return relativedelta.relativedelta( + years=years, + months=months, + days=days, + hours=hours, + minutes=minutes, + seconds=seconds, + microseconds=microseconds, + ) + + def _float_from_json(value, field): """Coerce 'value' to a float, if set or not nullable.""" if _not_null(value, field): @@ -327,6 +371,7 @@ def _record_from_json(value, field): _CELLDATA_FROM_JSON = { "INTEGER": _int_from_json, "INT64": _int_from_json, + "INTERVAL": _interval_from_json, "FLOAT": _float_from_json, "FLOAT64": _float_from_json, "NUMERIC": _decimal_from_json, diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/enums.py b/packages/google-cloud-bigquery/google/cloud/bigquery/enums.py index d67cebd4c8d1..0eaaffd2ef49 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/enums.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/enums.py @@ -254,6 +254,7 @@ class SqlTypeNames(str, enum.Enum): DATE = "DATE" TIME = "TIME" DATETIME = "DATETIME" + INTERVAL = "INTERVAL" # NOTE: not available in legacy types class SqlParameterScalarTypes: diff --git a/packages/google-cloud-bigquery/owlbot.py b/packages/google-cloud-bigquery/owlbot.py index 0f6f8fe99ae9..f2f8bea5413e 100644 --- a/packages/google-cloud-bigquery/owlbot.py +++ b/packages/google-cloud-bigquery/owlbot.py @@ -98,8 +98,9 @@ microgenerator=True, split_system_tests=True, intersphinx_dependencies={ - "pandas": "http://pandas.pydata.org/pandas-docs/stable/", + "dateutil": "https://dateutil.readthedocs.io/en/latest/", "geopandas": "https://geopandas.org/", + "pandas": "https://pandas.pydata.org/pandas-docs/dev", }, ) @@ -115,10 +116,6 @@ # Include custom SNIPPETS_TESTS job for performance. # https://github.com/googleapis/python-bigquery/issues/191 ".kokoro/presubmit/presubmit.cfg", - # Group all renovate PRs together. If this works well, remove this and - # update the shared templates (possibly with configuration option to - # py_library.) - "renovate.json", ], ) diff --git a/packages/google-cloud-bigquery/renovate.json b/packages/google-cloud-bigquery/renovate.json index 713c60bb40f0..c21036d385e5 100644 --- a/packages/google-cloud-bigquery/renovate.json +++ b/packages/google-cloud-bigquery/renovate.json @@ -1,6 +1,9 @@ { "extends": [ - "config:base", "group:all", ":preserveSemverRanges" + "config:base", + "group:all", + ":preserveSemverRanges", + ":disableDependencyDashboard" ], "ignorePaths": [".pre-commit-config.yaml"], "pip_requirements": { diff --git a/packages/google-cloud-bigquery/setup.py b/packages/google-cloud-bigquery/setup.py index e7515493d195..eb8066abcd4c 100644 --- a/packages/google-cloud-bigquery/setup.py +++ b/packages/google-cloud-bigquery/setup.py @@ -42,6 +42,7 @@ "google-resumable-media >= 0.6.0, < 3.0dev", "packaging >= 14.3", "protobuf >= 3.12.0", + "python-dateutil >= 2.7.2, <3.0dev", "requests >= 2.18.0, < 3.0.0dev", ] extras = { diff --git a/packages/google-cloud-bigquery/testing/constraints-3.6.txt b/packages/google-cloud-bigquery/testing/constraints-3.6.txt index 23d2724f7bff..59913d588332 100644 --- a/packages/google-cloud-bigquery/testing/constraints-3.6.txt +++ b/packages/google-cloud-bigquery/testing/constraints-3.6.txt @@ -18,6 +18,7 @@ pandas==0.24.2 proto-plus==1.10.0 protobuf==3.12.0 pyarrow==3.0.0 +python-dateutil==2.7.2 requests==2.18.0 Shapely==1.6.0 six==1.13.0 diff --git a/packages/google-cloud-bigquery/tests/system/test_client.py b/packages/google-cloud-bigquery/tests/system/test_client.py index f6f95c184c31..91bcff15549a 100644 --- a/packages/google-cloud-bigquery/tests/system/test_client.py +++ b/packages/google-cloud-bigquery/tests/system/test_client.py @@ -37,11 +37,6 @@ except ImportError: # pragma: NO COVER bigquery_storage = None -try: - import fastavro # to parse BQ storage client results -except ImportError: # pragma: NO COVER - fastavro = None - try: import pyarrow import pyarrow.types diff --git a/packages/google-cloud-bigquery/tests/system/test_list_rows.py b/packages/google-cloud-bigquery/tests/system/test_list_rows.py index 70388059ee5a..4c08958c37ac 100644 --- a/packages/google-cloud-bigquery/tests/system/test_list_rows.py +++ b/packages/google-cloud-bigquery/tests/system/test_list_rows.py @@ -15,6 +15,8 @@ import datetime import decimal +from dateutil import relativedelta + from google.cloud import bigquery from google.cloud.bigquery import enums @@ -64,6 +66,9 @@ def test_list_rows_scalars(bigquery_client: bigquery.Client, scalars_table: str) assert row["datetime_col"] == datetime.datetime(2021, 7, 21, 11, 39, 45) assert row["geography_col"] == "POINT(-122.0838511 37.3860517)" assert row["int64_col"] == 123456789 + assert row["interval_col"] == relativedelta.relativedelta( + years=7, months=11, days=9, hours=4, minutes=15, seconds=37, microseconds=123456 + ) assert row["numeric_col"] == decimal.Decimal("1.23456789") assert row["bignumeric_col"] == decimal.Decimal("10.111213141516171819") assert row["float64_col"] == 1.25 @@ -95,6 +100,9 @@ def test_list_rows_scalars_extreme( assert row["datetime_col"] == datetime.datetime(9999, 12, 31, 23, 59, 59, 999999) assert row["geography_col"] == "POINT(-135 90)" assert row["int64_col"] == 9223372036854775807 + assert row["interval_col"] == relativedelta.relativedelta( + years=-10000, days=-3660000, hours=-87840000 + ) assert row["numeric_col"] == decimal.Decimal(f"9.{'9' * 37}E+28") assert row["bignumeric_col"] == decimal.Decimal(f"9.{'9' * 75}E+37") assert row["float64_col"] == float("Inf") diff --git a/packages/google-cloud-bigquery/tests/unit/helpers/test_from_json.py b/packages/google-cloud-bigquery/tests/unit/helpers/test_from_json.py new file mode 100644 index 000000000000..65b054f446bb --- /dev/null +++ b/packages/google-cloud-bigquery/tests/unit/helpers/test_from_json.py @@ -0,0 +1,157 @@ +# Copyright 2021 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from dateutil.relativedelta import relativedelta +import pytest + +from google.cloud.bigquery.schema import SchemaField + + +def create_field(mode="NULLABLE", type_="IGNORED"): + return SchemaField("test_field", type_, mode=mode) + + +@pytest.fixture +def mut(): + from google.cloud.bigquery import _helpers + + return _helpers + + +def test_interval_from_json_w_none_nullable(mut): + got = mut._interval_from_json(None, create_field()) + assert got is None + + +def test_interval_from_json_w_none_required(mut): + with pytest.raises(TypeError): + mut._interval_from_json(None, create_field(mode="REQUIRED")) + + +def test_interval_from_json_w_invalid_format(mut): + with pytest.raises(ValueError, match="NOT_AN_INTERVAL"): + mut._interval_from_json("NOT_AN_INTERVAL", create_field()) + + +@pytest.mark.parametrize( + ("value", "expected"), + ( + ("0-0 0 0:0:0", relativedelta()), + # SELECT INTERVAL X YEAR + ("-10000-0 0 0:0:0", relativedelta(years=-10000)), + ("-1-0 0 0:0:0", relativedelta(years=-1)), + ("1-0 0 0:0:0", relativedelta(years=1)), + ("10000-0 0 0:0:0", relativedelta(years=10000)), + # SELECT INTERVAL X MONTH + ("-0-11 0 0:0:0", relativedelta(months=-11)), + ("-0-1 0 0:0:0", relativedelta(months=-1)), + ("0-1 0 0:0:0", relativedelta(months=1)), + ("0-11 0 0:0:0", relativedelta(months=11)), + # SELECT INTERVAL X DAY + ("0-0 -3660000 0:0:0", relativedelta(days=-3660000)), + ("0-0 -1 0:0:0", relativedelta(days=-1)), + ("0-0 1 0:0:0", relativedelta(days=1)), + ("0-0 3660000 0:0:0", relativedelta(days=3660000)), + # SELECT INTERVAL X HOUR + ("0-0 0 -87840000:0:0", relativedelta(hours=-87840000)), + ("0-0 0 -1:0:0", relativedelta(hours=-1)), + ("0-0 0 1:0:0", relativedelta(hours=1)), + ("0-0 0 87840000:0:0", relativedelta(hours=87840000)), + # SELECT INTERVAL X MINUTE + ("0-0 0 -0:59:0", relativedelta(minutes=-59)), + ("0-0 0 -0:1:0", relativedelta(minutes=-1)), + ("0-0 0 0:1:0", relativedelta(minutes=1)), + ("0-0 0 0:59:0", relativedelta(minutes=59)), + # SELECT INTERVAL X SECOND + ("0-0 0 -0:0:59", relativedelta(seconds=-59)), + ("0-0 0 -0:0:1", relativedelta(seconds=-1)), + ("0-0 0 0:0:1", relativedelta(seconds=1)), + ("0-0 0 0:0:59", relativedelta(seconds=59)), + # SELECT (INTERVAL -1 SECOND) / 1000000 + ("0-0 0 -0:0:0.000001", relativedelta(microseconds=-1)), + ("0-0 0 -0:0:59.999999", relativedelta(seconds=-59, microseconds=-999999)), + ("0-0 0 -0:0:59.999", relativedelta(seconds=-59, microseconds=-999000)), + ("0-0 0 0:0:59.999", relativedelta(seconds=59, microseconds=999000)), + ("0-0 0 0:0:59.999999", relativedelta(seconds=59, microseconds=999999)), + # Test with multiple digits in each section. + ( + "32-11 45 67:16:23.987654", + relativedelta( + years=32, + months=11, + days=45, + hours=67, + minutes=16, + seconds=23, + microseconds=987654, + ), + ), + ( + "-32-11 -45 -67:16:23.987654", + relativedelta( + years=-32, + months=-11, + days=-45, + hours=-67, + minutes=-16, + seconds=-23, + microseconds=-987654, + ), + ), + # Test with mixed +/- sections. + ( + "9999-9 -999999 9999999:59:59.999999", + relativedelta( + years=9999, + months=9, + days=-999999, + hours=9999999, + minutes=59, + seconds=59, + microseconds=999999, + ), + ), + # Test with fraction that is not microseconds. + ("0-0 0 0:0:42.", relativedelta(seconds=42)), + ("0-0 0 0:0:59.1", relativedelta(seconds=59, microseconds=100000)), + ("0-0 0 0:0:0.12", relativedelta(microseconds=120000)), + ("0-0 0 0:0:0.123", relativedelta(microseconds=123000)), + ("0-0 0 0:0:0.1234", relativedelta(microseconds=123400)), + # Fractional seconds can cause rounding problems if cast to float. See: + # https://github.com/googleapis/python-db-dtypes-pandas/issues/18 + ("0-0 0 0:0:59.876543", relativedelta(seconds=59, microseconds=876543)), + ( + "0-0 0 01:01:01.010101", + relativedelta(hours=1, minutes=1, seconds=1, microseconds=10101), + ), + ( + "0-0 0 09:09:09.090909", + relativedelta(hours=9, minutes=9, seconds=9, microseconds=90909), + ), + ( + "0-0 0 11:11:11.111111", + relativedelta(hours=11, minutes=11, seconds=11, microseconds=111111), + ), + ( + "0-0 0 19:16:23.987654", + relativedelta(hours=19, minutes=16, seconds=23, microseconds=987654), + ), + # Nanoseconds are not expected, but should not cause error. + ("0-0 0 0:0:00.123456789", relativedelta(microseconds=123456)), + ("0-0 0 0:0:59.87654321", relativedelta(seconds=59, microseconds=876543)), + ), +) +def test_w_string_values(mut, value, expected): + got = mut._interval_from_json(value, create_field()) + assert got == expected From bff0653173532d7b3b1aaa8eaaf7ad75190a03a6 Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Wed, 27 Oct 2021 02:12:20 -0500 Subject: [PATCH 1332/2016] feat: add session and connection properties to QueryJobConfig (#1024) * feat: add session and connection properties to QueryJobConfig * add unit tests * adjust types and add versionadded * add missing url * link to ConnectionProperty docs * add resource classes to root module --- .../google/cloud/bigquery/__init__.py | 4 + .../google/cloud/bigquery/job/base.py | 31 ++++++++ .../google/cloud/bigquery/job/query.py | 74 +++++++++++++++++-- .../google/cloud/bigquery/query.py | 61 ++++++++++++++- .../tests/system/test_query.py | 26 +++++++ .../tests/unit/job/test_base.py | 9 +++ .../tests/unit/job/test_query.py | 2 + .../tests/unit/job/test_query_config.py | 21 ++++++ 8 files changed, 220 insertions(+), 8 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/__init__.py b/packages/google-cloud-bigquery/google/cloud/bigquery/__init__.py index d2b1dd26d914..b3c492125cd3 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/__init__.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/__init__.py @@ -52,6 +52,7 @@ from google.cloud.bigquery.external_config import ExternalSourceFormat from google.cloud.bigquery.format_options import AvroOptions from google.cloud.bigquery.format_options import ParquetOptions +from google.cloud.bigquery.job.base import SessionInfo from google.cloud.bigquery.job import Compression from google.cloud.bigquery.job import CopyJob from google.cloud.bigquery.job import CopyJobConfig @@ -77,6 +78,7 @@ from google.cloud.bigquery.model import ModelReference from google.cloud.bigquery.query import ArrayQueryParameter from google.cloud.bigquery.query import ArrayQueryParameterType +from google.cloud.bigquery.query import ConnectionProperty from google.cloud.bigquery.query import ScalarQueryParameter from google.cloud.bigquery.query import ScalarQueryParameterType from google.cloud.bigquery.query import StructQueryParameter @@ -104,6 +106,7 @@ "__version__", "Client", # Queries + "ConnectionProperty", "QueryJob", "QueryJobConfig", "ArrayQueryParameter", @@ -132,6 +135,7 @@ "ExtractJobConfig", "LoadJob", "LoadJobConfig", + "SessionInfo", "UnknownJob", # Models "Model", diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/job/base.py b/packages/google-cloud-bigquery/google/cloud/bigquery/job/base.py index 9e381ded6f5e..88d6bec149c1 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/job/base.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/job/base.py @@ -202,6 +202,19 @@ def script_statistics(self) -> Optional["ScriptStatistics"]: return None return ScriptStatistics(resource) + @property + def session_info(self) -> Optional["SessionInfo"]: + """[Preview] Information of the session if this job is part of one. + + .. versionadded:: 2.29.0 + """ + resource = _helpers._get_sub_prop( + self._properties, ["statistics", "sessionInfo"] + ) + if resource is None: + return None + return SessionInfo(resource) + @property def num_child_jobs(self): """The number of child jobs executed. @@ -990,6 +1003,24 @@ def evaluation_kind(self) -> Optional[str]: return self._properties.get("evaluationKind") +class SessionInfo: + """[Preview] Information of the session if this job is part of one. + + .. versionadded:: 2.29.0 + + Args: + resource (Map[str, Any]): JSON representation of object. + """ + + def __init__(self, resource): + self._properties = resource + + @property + def session_id(self) -> Optional[str]: + """The ID of the session.""" + return self._properties.get("sessionId") + + class UnknownJob(_AsyncJob): """A job whose type cannot be determined.""" diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/job/query.py b/packages/google-cloud-bigquery/google/cloud/bigquery/job/query.py index ab24af202e89..942c85fc3d4c 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/job/query.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/job/query.py @@ -18,7 +18,7 @@ import copy import re import typing -from typing import Any, Dict, List, Optional, Union +from typing import Any, Dict, Iterable, List, Optional, Union from google.api_core import exceptions from google.api_core.future import polling as polling_future @@ -31,11 +31,14 @@ from google.cloud.bigquery.enums import KeyResultStatementKind from google.cloud.bigquery.external_config import ExternalConfig from google.cloud.bigquery import _helpers -from google.cloud.bigquery.query import _query_param_from_api_repr -from google.cloud.bigquery.query import ArrayQueryParameter -from google.cloud.bigquery.query import ScalarQueryParameter -from google.cloud.bigquery.query import StructQueryParameter -from google.cloud.bigquery.query import UDFResource +from google.cloud.bigquery.query import ( + _query_param_from_api_repr, + ArrayQueryParameter, + ConnectionProperty, + ScalarQueryParameter, + StructQueryParameter, + UDFResource, +) from google.cloud.bigquery.retry import DEFAULT_RETRY, DEFAULT_JOB_RETRY from google.cloud.bigquery.routine import RoutineReference from google.cloud.bigquery.schema import SchemaField @@ -269,6 +272,24 @@ def allow_large_results(self): def allow_large_results(self, value): self._set_sub_prop("allowLargeResults", value) + @property + def connection_properties(self) -> List[ConnectionProperty]: + """Connection properties. + + See + https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationQuery.FIELDS.connection_properties + + .. versionadded:: 2.29.0 + """ + resource = self._get_sub_prop("connectionProperties", []) + return [ConnectionProperty.from_api_repr(prop) for prop in resource] + + @connection_properties.setter + def connection_properties(self, value: Iterable[ConnectionProperty]): + self._set_sub_prop( + "connectionProperties", [prop.to_api_repr() for prop in value], + ) + @property def create_disposition(self): """google.cloud.bigquery.job.CreateDisposition: Specifies behavior @@ -283,6 +304,27 @@ def create_disposition(self): def create_disposition(self, value): self._set_sub_prop("createDisposition", value) + @property + def create_session(self) -> Optional[bool]: + """[Preview] If :data:`True`, creates a new session, where + :attr:`~google.cloud.bigquery.job.QueryJob.session_info` will contain a + random server generated session id. + + If :data:`False`, runs query with an existing ``session_id`` passed in + :attr:`~google.cloud.bigquery.job.QueryJobConfig.connection_properties`, + otherwise runs query in non-session mode. + + See + https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationQuery.FIELDS.create_session + + .. versionadded:: 2.29.0 + """ + return self._get_sub_prop("createSession") + + @create_session.setter + def create_session(self, value: Optional[bool]): + self._set_sub_prop("createSession", value) + @property def default_dataset(self): """google.cloud.bigquery.dataset.DatasetReference: the default dataset @@ -613,7 +655,7 @@ def schema_update_options(self, values): @property def script_options(self) -> ScriptOptions: - """Connection properties which can modify the query behavior. + """Options controlling the execution of scripts. https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#scriptoptions """ @@ -694,6 +736,15 @@ def allow_large_results(self): """ return self._configuration.allow_large_results + @property + def connection_properties(self) -> List[ConnectionProperty]: + """See + :attr:`google.cloud.bigquery.job.QueryJobConfig.connection_properties`. + + .. versionadded:: 2.29.0 + """ + return self._configuration.connection_properties + @property def create_disposition(self): """See @@ -701,6 +752,15 @@ def create_disposition(self): """ return self._configuration.create_disposition + @property + def create_session(self) -> Optional[bool]: + """See + :attr:`google.cloud.bigquery.job.QueryJobConfig.create_session`. + + .. versionadded:: 2.29.0 + """ + return self._configuration.create_session + @property def default_dataset(self): """See diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/query.py b/packages/google-cloud-bigquery/google/cloud/bigquery/query.py index 1f449f189aa1..708f5f47bace 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/query.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/query.py @@ -18,7 +18,7 @@ import copy import datetime import decimal -from typing import Optional, Union +from typing import Any, Optional, Dict, Union from google.cloud.bigquery.table import _parse_schema_resource from google.cloud.bigquery._helpers import _rows_from_json @@ -31,6 +31,65 @@ ] +class ConnectionProperty: + """A connection-level property to customize query behavior. + + See + https://cloud.google.com/bigquery/docs/reference/rest/v2/ConnectionProperty + + Args: + key: + The key of the property to set, for example, ``'time_zone'`` or + ``'session_id'``. + value: The value of the property to set. + """ + + def __init__(self, key: str = "", value: str = ""): + self._properties = { + "key": key, + "value": value, + } + + @property + def key(self) -> str: + """Name of the property. + + For example: + + * ``time_zone`` + * ``session_id`` + """ + return self._properties["key"] + + @property + def value(self) -> str: + """Value of the property.""" + return self._properties["value"] + + @classmethod + def from_api_repr(cls, resource) -> "ConnectionProperty": + """Construct :class:`~google.cloud.bigquery.query.ConnectionProperty` + from JSON resource. + + Args: + resource: JSON representation. + + Returns: + A connection property. + """ + value = cls() + value._properties = resource + return value + + def to_api_repr(self) -> Dict[str, Any]: + """Construct JSON API representation for the connection property. + + Returns: + JSON mapping + """ + return self._properties + + class UDFResource(object): """Describe a single user-defined function (UDF) resource. diff --git a/packages/google-cloud-bigquery/tests/system/test_query.py b/packages/google-cloud-bigquery/tests/system/test_query.py index 24758595be81..649120a7ef43 100644 --- a/packages/google-cloud-bigquery/tests/system/test_query.py +++ b/packages/google-cloud-bigquery/tests/system/test_query.py @@ -27,3 +27,29 @@ def test_dry_run(bigquery_client: bigquery.Client, scalars_table: str): assert query_job.dry_run is True assert query_job.total_bytes_processed > 0 assert len(query_job.schema) > 0 + + +def test_session(bigquery_client: bigquery.Client): + initial_config = bigquery.QueryJobConfig() + initial_config.create_session = True + initial_query = """ + CREATE TEMPORARY TABLE numbers(id INT64) + AS + SELECT * FROM UNNEST([1, 2, 3, 4, 5]) AS id; + """ + initial_job = bigquery_client.query(initial_query, job_config=initial_config) + initial_job.result() + session_id = initial_job.session_info.session_id + assert session_id is not None + + second_config = bigquery.QueryJobConfig() + second_config.connection_properties = [ + bigquery.ConnectionProperty("session_id", session_id), + ] + second_job = bigquery_client.query( + "SELECT COUNT(*) FROM numbers;", job_config=second_config + ) + rows = list(second_job.result()) + + assert len(rows) == 1 + assert rows[0][0] == 5 diff --git a/packages/google-cloud-bigquery/tests/unit/job/test_base.py b/packages/google-cloud-bigquery/tests/unit/job/test_base.py index e320c72cbfa7..250be83bb488 100644 --- a/packages/google-cloud-bigquery/tests/unit/job/test_base.py +++ b/packages/google-cloud-bigquery/tests/unit/job/test_base.py @@ -228,6 +228,15 @@ def test_script_statistics(self): self.assertEqual(stack_frame.end_column, 14) self.assertEqual(stack_frame.text, "QUERY TEXT") + def test_session_info(self): + client = _make_client(project=self.PROJECT) + job = self._make_one(self.JOB_ID, client) + + self.assertIsNone(job.session_info) + job._properties["statistics"] = {"sessionInfo": {"sessionId": "abcdefg"}} + self.assertIsNotNone(job.session_info) + self.assertEqual(job.session_info.session_id, "abcdefg") + def test_transaction_info(self): from google.cloud.bigquery.job.base import TransactionInfo diff --git a/packages/google-cloud-bigquery/tests/unit/job/test_query.py b/packages/google-cloud-bigquery/tests/unit/job/test_query.py index 17baacf5b5a4..4da035b78cb6 100644 --- a/packages/google-cloud-bigquery/tests/unit/job/test_query.py +++ b/packages/google-cloud-bigquery/tests/unit/job/test_query.py @@ -281,6 +281,8 @@ def test_from_api_repr_bare(self): job = klass.from_api_repr(RESOURCE, client=client) self.assertIs(job._client, client) self._verifyResourceProperties(job, RESOURCE) + self.assertEqual(len(job.connection_properties), 0) + self.assertIsNone(job.create_session) def test_from_api_repr_with_encryption(self): self._setUpConstants() diff --git a/packages/google-cloud-bigquery/tests/unit/job/test_query_config.py b/packages/google-cloud-bigquery/tests/unit/job/test_query_config.py index 109cf7e44d8b..7818236f4846 100644 --- a/packages/google-cloud-bigquery/tests/unit/job/test_query_config.py +++ b/packages/google-cloud-bigquery/tests/unit/job/test_query_config.py @@ -152,6 +152,27 @@ def test_clustering_fields(self): config.clustering_fields = None self.assertIsNone(config.clustering_fields) + def test_connection_properties(self): + from google.cloud.bigquery.job.query import ConnectionProperty + + config = self._get_target_class()() + self.assertEqual(len(config.connection_properties), 0) + + session_id = ConnectionProperty("session_id", "abcd") + time_zone = ConnectionProperty("time_zone", "America/Chicago") + config.connection_properties = [session_id, time_zone] + self.assertEqual(len(config.connection_properties), 2) + self.assertEqual(config.connection_properties[0].key, "session_id") + self.assertEqual(config.connection_properties[0].value, "abcd") + self.assertEqual(config.connection_properties[1].key, "time_zone") + self.assertEqual(config.connection_properties[1].value, "America/Chicago") + + def test_create_session(self): + config = self._get_target_class()() + self.assertIsNone(config.create_session) + config.create_session = True + self.assertTrue(config.create_session) + def test_from_api_repr_empty(self): klass = self._get_target_class() config = klass.from_api_repr({}) From b9d6d59505fa67bc909b58dadcfb38818b871b8f Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Wed, 27 Oct 2021 11:40:35 -0500 Subject: [PATCH 1333/2016] deps: allow pyarrow 6.x (#1031) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * deps: allow pyarrow 6.x * dry pyarrow deps * 🦉 Updates from OwlBot See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md Co-authored-by: Owl Bot --- packages/google-cloud-bigquery/setup.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/packages/google-cloud-bigquery/setup.py b/packages/google-cloud-bigquery/setup.py index eb8066abcd4c..95dad190a42e 100644 --- a/packages/google-cloud-bigquery/setup.py +++ b/packages/google-cloud-bigquery/setup.py @@ -28,6 +28,7 @@ # 'Development Status :: 4 - Beta' # 'Development Status :: 5 - Production/Stable' release_status = "Development Status :: 5 - Production/Stable" +pyarrow_dep = ["pyarrow >= 3.0.0, < 7.0dev"] dependencies = [ "grpcio >= 1.38.1, < 2.0dev", # https://github.com/googleapis/python-bigquery/issues/695 # NOTE: Maintainers, please do not require google-api-core>=2.x.x @@ -55,11 +56,11 @@ # grpc.Channel.close() method isn't added until 1.32.0. # https://github.com/grpc/grpc/pull/15254 "grpcio >= 1.38.1, < 2.0dev", - "pyarrow >= 3.0.0, < 6.0dev", - ], + ] + + pyarrow_dep, "geopandas": ["geopandas>=0.9.0, <1.0dev", "Shapely>=1.6.0, <2.0dev"], - "pandas": ["pandas>=0.23.0", "pyarrow >= 3.0.0, < 6.0dev"], - "bignumeric_type": ["pyarrow >= 3.0.0, < 6.0dev"], + "pandas": ["pandas>=0.23.0"] + pyarrow_dep, + "bignumeric_type": pyarrow_dep, "tqdm": ["tqdm >= 4.7.4, <5.0.0dev"], "opentelemetry": [ "opentelemetry-api >= 0.11b0", From 61c06383b98e87f29cb2a504d742ae03681c0a94 Mon Sep 17 00:00:00 2001 From: "release-please[bot]" <55107282+release-please[bot]@users.noreply.github.com> Date: Wed, 27 Oct 2021 12:04:03 -0500 Subject: [PATCH 1334/2016] chore: release 2.29.0 (#1022) Co-authored-by: release-please[bot] <55107282+release-please[bot]@users.noreply.github.com> --- packages/google-cloud-bigquery/CHANGELOG.md | 21 +++++++++++++++++++ .../google/cloud/bigquery/version.py | 2 +- 2 files changed, 22 insertions(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/CHANGELOG.md b/packages/google-cloud-bigquery/CHANGELOG.md index d15f2285182a..0d45d501d8c7 100644 --- a/packages/google-cloud-bigquery/CHANGELOG.md +++ b/packages/google-cloud-bigquery/CHANGELOG.md @@ -5,6 +5,27 @@ [1]: https://pypi.org/project/google-cloud-bigquery/#history +## [2.29.0](https://www.github.com/googleapis/python-bigquery/compare/v2.28.1...v2.29.0) (2021-10-27) + + +### Features + +* add `QueryJob.schema` property for dry run queries ([#1014](https://www.github.com/googleapis/python-bigquery/issues/1014)) ([2937fa1](https://www.github.com/googleapis/python-bigquery/commit/2937fa1386898766c561579fd39d42958182d260)) +* add session and connection properties to QueryJobConfig ([#1024](https://www.github.com/googleapis/python-bigquery/issues/1024)) ([e4c94f4](https://www.github.com/googleapis/python-bigquery/commit/e4c94f446c27eb474f30b033c1b62d11bd0acd98)) +* add support for INTERVAL data type to `list_rows` ([#840](https://www.github.com/googleapis/python-bigquery/issues/840)) ([e37380a](https://www.github.com/googleapis/python-bigquery/commit/e37380a959cbd5bb9cbbf6807f0a8ea147e0a713)) +* allow queryJob.result() to be called on a dryRun ([#1015](https://www.github.com/googleapis/python-bigquery/issues/1015)) ([685f06a](https://www.github.com/googleapis/python-bigquery/commit/685f06a5e7b5df17a53e9eb340ff04ecd1e51d1d)) + + +### Documentation + +* document ScriptStatistics and other missing resource classes ([#1023](https://www.github.com/googleapis/python-bigquery/issues/1023)) ([6679109](https://www.github.com/googleapis/python-bigquery/commit/66791093c61f262ea063d2a7950fc643915ee693)) +* fix formatting of generated client docstrings ([#1009](https://www.github.com/googleapis/python-bigquery/issues/1009)) ([f7b0ee4](https://www.github.com/googleapis/python-bigquery/commit/f7b0ee45a664295ccc9f209eeeac122af8de3c80)) + + +### Dependencies + +* allow pyarrow 6.x ([#1031](https://www.github.com/googleapis/python-bigquery/issues/1031)) ([1c2de74](https://www.github.com/googleapis/python-bigquery/commit/1c2de74a55046a343bcf9474f67100a82fb05401)) + ### [2.28.1](https://www.github.com/googleapis/python-bigquery/compare/v2.28.0...v2.28.1) (2021-10-07) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/version.py b/packages/google-cloud-bigquery/google/cloud/bigquery/version.py index 967959b05d09..c8ba30be05f2 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/version.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/version.py @@ -12,4 +12,4 @@ # See the License for the specific language governing permissions and # limitations under the License. -__version__ = "2.28.1" +__version__ = "2.29.0" From 19f0764aebb1aed426f798aefae3d5c216ba2c28 Mon Sep 17 00:00:00 2001 From: Steffany Brown <30247553+steffnay@users.noreply.github.com> Date: Wed, 27 Oct 2021 14:09:08 -0700 Subject: [PATCH 1335/2016] docs(samples): add create external table with hive partitioning (#1033) * docs(samples): add create table hive partitioning sample * refactor --- .../create_table_external_hive_partitioned.py | 73 +++++++++++++++++++ ...te_table_external_hive_partitioned_test.py | 31 ++++++++ 2 files changed, 104 insertions(+) create mode 100644 packages/google-cloud-bigquery/samples/snippets/create_table_external_hive_partitioned.py create mode 100644 packages/google-cloud-bigquery/samples/snippets/create_table_external_hive_partitioned_test.py diff --git a/packages/google-cloud-bigquery/samples/snippets/create_table_external_hive_partitioned.py b/packages/google-cloud-bigquery/samples/snippets/create_table_external_hive_partitioned.py new file mode 100644 index 000000000000..2ff8a2220c55 --- /dev/null +++ b/packages/google-cloud-bigquery/samples/snippets/create_table_external_hive_partitioned.py @@ -0,0 +1,73 @@ +# Copyright 2021 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +def create_table_external_hive_partitioned(table_id: str): + original_table_id = table_id + # [START bigquery_create_table_external_hivepartitioned] + # Demonstrates creating an external table with hive partitioning. + + # TODO(developer): Set table_id to the ID of the table to create. + table_id = "your-project.your_dataset.your_table_name" + + # TODO(developer): Set source uri. + # Example file: + # gs://cloud-samples-data/bigquery/hive-partitioning-samples/autolayout/dt=2020-11-15/file1.parquet + uri = "gs://cloud-samples-data/bigquery/hive-partitioning-samples/autolayout/*" + + # TODO(developer): Set source uri prefix. + source_uri_prefix = ( + "gs://cloud-samples-data/bigquery/hive-partitioning-samples/autolayout/" + ) + + # [END bigquery_create_table_external_hivepartitioned] + table_id = original_table_id + # [START bigquery_create_table_external_hivepartitioned] + from google.cloud import bigquery + + # Construct a BigQuery client object. + client = bigquery.Client() + + # Configure the external data source. + external_config = bigquery.ExternalConfig("PARQUET") + external_config.source_uris = [uri] + external_config.autodetect = True + + # Configure partitioning options. + hive_partitioning_opts = bigquery.external_config.HivePartitioningOptions() + + # The layout of the files in here is compatible with the layout requirements for hive partitioning, + # so we can add an optional Hive partitioning configuration to leverage the object paths for deriving + # partitioning column information. + + # For more information on how partitions are extracted, see: + # https://cloud.google.com/bigquery/docs/hive-partitioned-queries-gcs + + # We have a "/dt=YYYY-MM-DD/" path component in our example files as documented above. + # Autolayout will expose this as a column named "dt" of type DATE. + hive_partitioning_opts.mode = "AUTO" + hive_partitioning_opts.require_partition_filter = True + hive_partitioning_opts.source_uri_prefix = source_uri_prefix + + external_config.hive_partitioning = hive_partitioning_opts + + table = bigquery.Table(table_id) + table.external_data_configuration = external_config + + table = client.create_table(table) # Make an API request. + print( + "Created table {}.{}.{}".format(table.project, table.dataset_id, table.table_id) + ) + # [END bigquery_create_table_external_hivepartitioned] + return table diff --git a/packages/google-cloud-bigquery/samples/snippets/create_table_external_hive_partitioned_test.py b/packages/google-cloud-bigquery/samples/snippets/create_table_external_hive_partitioned_test.py new file mode 100644 index 000000000000..c3cdddb5565e --- /dev/null +++ b/packages/google-cloud-bigquery/samples/snippets/create_table_external_hive_partitioned_test.py @@ -0,0 +1,31 @@ +# Copyright 2021 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import create_table_external_hive_partitioned + + +def test_create_table_external_hive_partitioned(capsys, random_table_id): + table = create_table_external_hive_partitioned.create_table_external_hive_partitioned( + random_table_id + ) + + out, _ = capsys.readouterr() + hive_partioning = table.external_data_configuration.hive_partitioning + assert "Created table {}".format(random_table_id) in out + assert ( + hive_partioning.source_uri_prefix + == "gs://cloud-samples-data/bigquery/hive-partitioning-samples/autolayout/" + ) + assert hive_partioning.require_partition_filter is True + assert hive_partioning.mode == "AUTO" From eeec273a63f097f1e3eb7c05622682516ff46c8c Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Fri, 29 Oct 2021 13:30:34 -0500 Subject: [PATCH 1336/2016] docs: add code samples for Jupyter/IPython magics (#1013) Follow-up to https://github.com/GoogleCloudPlatform/python-docs-samples/pull/6889, which removed a BigQuery magics sample for using query parameters. Note: jupyter_tutorial_test.py is a copy of what is in the `samples/snippets` folder. Once the docs have been updated to point to this new version, we can remove that copy and remove the Jupyter/IPython depedencencies from `samples/snippets/requirements.txt`. Thank you for opening a Pull Request! Before submitting your PR, there are a few things you can do to make sure it goes smoothly: - [ ] Make sure to open an issue as a [bug/issue](https://github.com/googleapis/python-bigquery/issues/new/choose) before writing your code! That way we can discuss the change, evaluate designs, and agree on the general idea - [ ] Ensure the tests and linter pass - [ ] Code coverage does not decrease (if any source code was changed) - [ ] Appropriate docs were updated (if necessary) --- .../google-cloud-bigquery/docs/magics.rst | 29 ++ .../google/cloud/bigquery/magics/magics.py | 66 ----- packages/google-cloud-bigquery/noxfile.py | 3 +- .../samples/magics/__init__.py | 13 + .../samples/magics/_helpers.py | 21 ++ .../samples/magics/conftest.py | 36 +++ .../samples/magics/noxfile.py | 266 ++++++++++++++++++ .../samples/magics/query.py | 37 +++ .../samples/magics/query_params_scalars.py | 38 +++ .../magics/query_params_scalars_test.py | 23 ++ .../samples/magics/query_test.py | 23 ++ .../samples/magics/requirements-test.txt | 3 + .../samples/magics/requirements.txt | 12 + 13 files changed, 503 insertions(+), 67 deletions(-) create mode 100644 packages/google-cloud-bigquery/samples/magics/__init__.py create mode 100644 packages/google-cloud-bigquery/samples/magics/_helpers.py create mode 100644 packages/google-cloud-bigquery/samples/magics/conftest.py create mode 100644 packages/google-cloud-bigquery/samples/magics/noxfile.py create mode 100644 packages/google-cloud-bigquery/samples/magics/query.py create mode 100644 packages/google-cloud-bigquery/samples/magics/query_params_scalars.py create mode 100644 packages/google-cloud-bigquery/samples/magics/query_params_scalars_test.py create mode 100644 packages/google-cloud-bigquery/samples/magics/query_test.py create mode 100644 packages/google-cloud-bigquery/samples/magics/requirements-test.txt create mode 100644 packages/google-cloud-bigquery/samples/magics/requirements.txt diff --git a/packages/google-cloud-bigquery/docs/magics.rst b/packages/google-cloud-bigquery/docs/magics.rst index bcaad8fa3683..aa14c6bfa482 100644 --- a/packages/google-cloud-bigquery/docs/magics.rst +++ b/packages/google-cloud-bigquery/docs/magics.rst @@ -1,5 +1,34 @@ IPython Magics for BigQuery =========================== +To use these magics, you must first register them. Run the ``%load_ext`` magic +in a Jupyter notebook cell. + +.. code:: + + %load_ext google.cloud.bigquery + +This makes the ``%%bigquery`` magic available. + +Code Samples +------------ + +Running a query: + +.. literalinclude:: ./samples/magics/query.py + :dedent: 4 + :start-after: [START bigquery_jupyter_query] + :end-before: [END bigquery_jupyter_query] + +Running a parameterized query: + +.. literalinclude:: ./samples/magics/query_params_scalars.py + :dedent: 4 + :start-after: [START bigquery_jupyter_query_params_scalars] + :end-before: [END bigquery_jupyter_query_params_scalars] + +API Reference +------------- + .. automodule:: google.cloud.bigquery.magics.magics :members: diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/magics/magics.py b/packages/google-cloud-bigquery/google/cloud/bigquery/magics/magics.py index d368bbeaa7c0..ec0430518867 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/magics/magics.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/magics/magics.py @@ -14,15 +14,6 @@ """IPython Magics -To use these magics, you must first register them. Run the ``%load_ext`` magic -in a Jupyter notebook cell. - -.. code:: - - %load_ext google.cloud.bigquery - -This makes the ``%%bigquery`` magic available. - .. function:: %%bigquery IPython cell magic to run a query and display the result as a DataFrame @@ -85,63 +76,6 @@ .. note:: All queries run using this magic will run using the context :attr:`~google.cloud.bigquery.magics.Context.credentials`. - - Examples: - The following examples can be run in an IPython notebook after loading - the bigquery IPython extension (see ``In[1]``) and setting up - Application Default Credentials. - - .. code-block:: none - - In [1]: %load_ext google.cloud.bigquery - - In [2]: %%bigquery - ...: SELECT name, SUM(number) as count - ...: FROM `bigquery-public-data.usa_names.usa_1910_current` - ...: GROUP BY name - ...: ORDER BY count DESC - ...: LIMIT 3 - - Out[2]: name count - ...: ------------------- - ...: 0 James 4987296 - ...: 1 John 4866302 - ...: 2 Robert 4738204 - - In [3]: %%bigquery df --project my-alternate-project --verbose - ...: SELECT name, SUM(number) as count - ...: FROM `bigquery-public-data.usa_names.usa_1910_current` - ...: WHERE gender = 'F' - ...: GROUP BY name - ...: ORDER BY count DESC - ...: LIMIT 3 - Executing query with job ID: bf633912-af2c-4780-b568-5d868058632b - Query executing: 2.61s - Query complete after 2.92s - - In [4]: df - - Out[4]: name count - ...: ---------------------- - ...: 0 Mary 3736239 - ...: 1 Patricia 1568495 - ...: 2 Elizabeth 1519946 - - In [5]: %%bigquery --params {"num": 17} - ...: SELECT @num AS num - - Out[5]: num - ...: ------- - ...: 0 17 - - In [6]: params = {"num": 17} - - In [7]: %%bigquery --params $params - ...: SELECT @num AS num - - Out[7]: num - ...: ------- - ...: 0 17 """ from __future__ import print_function diff --git a/packages/google-cloud-bigquery/noxfile.py b/packages/google-cloud-bigquery/noxfile.py index d41573407aed..64eacaff5f0a 100644 --- a/packages/google-cloud-bigquery/noxfile.py +++ b/packages/google-cloud-bigquery/noxfile.py @@ -186,8 +186,9 @@ def snippets(session): session.run( "py.test", "samples", - "--ignore=samples/snippets", + "--ignore=samples/magics", "--ignore=samples/geography", + "--ignore=samples/snippets", *session.posargs, ) diff --git a/packages/google-cloud-bigquery/samples/magics/__init__.py b/packages/google-cloud-bigquery/samples/magics/__init__.py new file mode 100644 index 000000000000..4fbd93bb2ca4 --- /dev/null +++ b/packages/google-cloud-bigquery/samples/magics/__init__.py @@ -0,0 +1,13 @@ +# Copyright 2021 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/packages/google-cloud-bigquery/samples/magics/_helpers.py b/packages/google-cloud-bigquery/samples/magics/_helpers.py new file mode 100644 index 000000000000..18a513b99ab3 --- /dev/null +++ b/packages/google-cloud-bigquery/samples/magics/_helpers.py @@ -0,0 +1,21 @@ +# Copyright 2021 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +def strip_region_tags(sample_text): + """Remove blank lines and region tags from sample text""" + magic_lines = [ + line for line in sample_text.split("\n") if len(line) > 0 and "# [" not in line + ] + return "\n".join(magic_lines) diff --git a/packages/google-cloud-bigquery/samples/magics/conftest.py b/packages/google-cloud-bigquery/samples/magics/conftest.py new file mode 100644 index 000000000000..bf86022355ff --- /dev/null +++ b/packages/google-cloud-bigquery/samples/magics/conftest.py @@ -0,0 +1,36 @@ +# Copyright 2021 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import pytest + +interactiveshell = pytest.importorskip("IPython.terminal.interactiveshell") +tools = pytest.importorskip("IPython.testing.tools") + + +@pytest.fixture(scope="session") +def ipython(): + config = tools.default_config() + config.TerminalInteractiveShell.simple_prompt = True + shell = interactiveshell.TerminalInteractiveShell.instance(config=config) + return shell + + +@pytest.fixture(autouse=True) +def ipython_interactive(ipython): + """Activate IPython's builtin hooks + + for the duration of the test scope. + """ + with ipython.builtin_trap: + yield ipython diff --git a/packages/google-cloud-bigquery/samples/magics/noxfile.py b/packages/google-cloud-bigquery/samples/magics/noxfile.py new file mode 100644 index 000000000000..b008613f03ff --- /dev/null +++ b/packages/google-cloud-bigquery/samples/magics/noxfile.py @@ -0,0 +1,266 @@ +# Copyright 2019 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import print_function + +import os +from pathlib import Path +import sys +from typing import Callable, Dict, List, Optional + +import nox + + +# WARNING - WARNING - WARNING - WARNING - WARNING +# WARNING - WARNING - WARNING - WARNING - WARNING +# DO NOT EDIT THIS FILE EVER! +# WARNING - WARNING - WARNING - WARNING - WARNING +# WARNING - WARNING - WARNING - WARNING - WARNING + +BLACK_VERSION = "black==19.10b0" + +# Copy `noxfile_config.py` to your directory and modify it instead. + +# `TEST_CONFIG` dict is a configuration hook that allows users to +# modify the test configurations. The values here should be in sync +# with `noxfile_config.py`. Users will copy `noxfile_config.py` into +# their directory and modify it. + +TEST_CONFIG = { + # You can opt out from the test for specific Python versions. + "ignored_versions": [], + # Old samples are opted out of enforcing Python type hints + # All new samples should feature them + "enforce_type_hints": False, + # An envvar key for determining the project id to use. Change it + # to 'BUILD_SPECIFIC_GCLOUD_PROJECT' if you want to opt in using a + # build specific Cloud project. You can also use your own string + # to use your own Cloud project. + "gcloud_project_env": "GOOGLE_CLOUD_PROJECT", + # 'gcloud_project_env': 'BUILD_SPECIFIC_GCLOUD_PROJECT', + # If you need to use a specific version of pip, + # change pip_version_override to the string representation + # of the version number, for example, "20.2.4" + "pip_version_override": None, + # A dictionary you want to inject into your test. Don't put any + # secrets here. These values will override predefined values. + "envs": {}, +} + + +try: + # Ensure we can import noxfile_config in the project's directory. + sys.path.append(".") + from noxfile_config import TEST_CONFIG_OVERRIDE +except ImportError as e: + print("No user noxfile_config found: detail: {}".format(e)) + TEST_CONFIG_OVERRIDE = {} + +# Update the TEST_CONFIG with the user supplied values. +TEST_CONFIG.update(TEST_CONFIG_OVERRIDE) + + +def get_pytest_env_vars() -> Dict[str, str]: + """Returns a dict for pytest invocation.""" + ret = {} + + # Override the GCLOUD_PROJECT and the alias. + env_key = TEST_CONFIG["gcloud_project_env"] + # This should error out if not set. + ret["GOOGLE_CLOUD_PROJECT"] = os.environ[env_key] + + # Apply user supplied envs. + ret.update(TEST_CONFIG["envs"]) + return ret + + +# DO NOT EDIT - automatically generated. +# All versions used to test samples. +ALL_VERSIONS = ["3.6", "3.7", "3.8", "3.9"] + +# Any default versions that should be ignored. +IGNORED_VERSIONS = TEST_CONFIG["ignored_versions"] + +TESTED_VERSIONS = sorted([v for v in ALL_VERSIONS if v not in IGNORED_VERSIONS]) + +INSTALL_LIBRARY_FROM_SOURCE = os.environ.get("INSTALL_LIBRARY_FROM_SOURCE", False) in ( + "True", + "true", +) +# +# Style Checks +# + + +def _determine_local_import_names(start_dir: str) -> List[str]: + """Determines all import names that should be considered "local". + + This is used when running the linter to insure that import order is + properly checked. + """ + file_ext_pairs = [os.path.splitext(path) for path in os.listdir(start_dir)] + return [ + basename + for basename, extension in file_ext_pairs + if extension == ".py" + or os.path.isdir(os.path.join(start_dir, basename)) + and basename not in ("__pycache__") + ] + + +# Linting with flake8. +# +# We ignore the following rules: +# E203: whitespace before ‘:’ +# E266: too many leading ‘#’ for block comment +# E501: line too long +# I202: Additional newline in a section of imports +# +# We also need to specify the rules which are ignored by default: +# ['E226', 'W504', 'E126', 'E123', 'W503', 'E24', 'E704', 'E121'] +FLAKE8_COMMON_ARGS = [ + "--show-source", + "--builtin=gettext", + "--max-complexity=20", + "--import-order-style=google", + "--exclude=.nox,.cache,env,lib,generated_pb2,*_pb2.py,*_pb2_grpc.py", + "--ignore=E121,E123,E126,E203,E226,E24,E266,E501,E704,W503,W504,I202", + "--max-line-length=88", +] + + +@nox.session +def lint(session: nox.sessions.Session) -> None: + if not TEST_CONFIG["enforce_type_hints"]: + session.install("flake8", "flake8-import-order") + else: + session.install("flake8", "flake8-import-order", "flake8-annotations") + + local_names = _determine_local_import_names(".") + args = FLAKE8_COMMON_ARGS + [ + "--application-import-names", + ",".join(local_names), + ".", + ] + session.run("flake8", *args) + + +# +# Black +# + + +@nox.session +def blacken(session: nox.sessions.Session) -> None: + session.install(BLACK_VERSION) + python_files = [path for path in os.listdir(".") if path.endswith(".py")] + + session.run("black", *python_files) + + +# +# Sample Tests +# + + +PYTEST_COMMON_ARGS = ["--junitxml=sponge_log.xml"] + + +def _session_tests( + session: nox.sessions.Session, post_install: Callable = None +) -> None: + if TEST_CONFIG["pip_version_override"]: + pip_version = TEST_CONFIG["pip_version_override"] + session.install(f"pip=={pip_version}") + """Runs py.test for a particular project.""" + if os.path.exists("requirements.txt"): + if os.path.exists("constraints.txt"): + session.install("-r", "requirements.txt", "-c", "constraints.txt") + else: + session.install("-r", "requirements.txt") + + if os.path.exists("requirements-test.txt"): + if os.path.exists("constraints-test.txt"): + session.install("-r", "requirements-test.txt", "-c", "constraints-test.txt") + else: + session.install("-r", "requirements-test.txt") + + if INSTALL_LIBRARY_FROM_SOURCE: + session.install("-e", _get_repo_root()) + + if post_install: + post_install(session) + + session.run( + "pytest", + *(PYTEST_COMMON_ARGS + session.posargs), + # Pytest will return 5 when no tests are collected. This can happen + # on travis where slow and flaky tests are excluded. + # See http://doc.pytest.org/en/latest/_modules/_pytest/main.html + success_codes=[0, 5], + env=get_pytest_env_vars(), + ) + + +@nox.session(python=ALL_VERSIONS) +def py(session: nox.sessions.Session) -> None: + """Runs py.test for a sample using the specified version of Python.""" + if session.python in TESTED_VERSIONS: + _session_tests(session) + else: + session.skip( + "SKIPPED: {} tests are disabled for this sample.".format(session.python) + ) + + +# +# Readmegen +# + + +def _get_repo_root() -> Optional[str]: + """ Returns the root folder of the project. """ + # Get root of this repository. Assume we don't have directories nested deeper than 10 items. + p = Path(os.getcwd()) + for i in range(10): + if p is None: + break + if Path(p / ".git").exists(): + return str(p) + # .git is not available in repos cloned via Cloud Build + # setup.py is always in the library's root, so use that instead + # https://github.com/googleapis/synthtool/issues/792 + if Path(p / "setup.py").exists(): + return str(p) + p = p.parent + raise Exception("Unable to detect repository root.") + + +GENERATED_READMES = sorted([x for x in Path(".").rglob("*.rst.in")]) + + +@nox.session +@nox.parametrize("path", GENERATED_READMES) +def readmegen(session: nox.sessions.Session, path: str) -> None: + """(Re-)generates the readme for a sample.""" + session.install("jinja2", "pyyaml") + dir_ = os.path.dirname(path) + + if os.path.exists(os.path.join(dir_, "requirements.txt")): + session.install("-r", os.path.join(dir_, "requirements.txt")) + + in_file = os.path.join(dir_, "README.rst.in") + session.run( + "python", _get_repo_root() + "/scripts/readme-gen/readme_gen.py", in_file + ) diff --git a/packages/google-cloud-bigquery/samples/magics/query.py b/packages/google-cloud-bigquery/samples/magics/query.py new file mode 100644 index 000000000000..c2739eacebfc --- /dev/null +++ b/packages/google-cloud-bigquery/samples/magics/query.py @@ -0,0 +1,37 @@ +# Copyright 2021 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import IPython + +from . import _helpers + + +def query(): + ip = IPython.get_ipython() + ip.extension_manager.load_extension("google.cloud.bigquery") + + sample = """ + # [START bigquery_jupyter_query] + %%bigquery + SELECT name, SUM(number) as count + FROM `bigquery-public-data.usa_names.usa_1910_current` + GROUP BY name + ORDER BY count DESC + LIMIT 3 + # [END bigquery_jupyter_query] + """ + result = ip.run_cell(_helpers.strip_region_tags(sample)) + result.raise_error() # Throws an exception if the cell failed. + df = ip.user_ns["_"] # Retrieves last returned object in notebook session + return df diff --git a/packages/google-cloud-bigquery/samples/magics/query_params_scalars.py b/packages/google-cloud-bigquery/samples/magics/query_params_scalars.py new file mode 100644 index 000000000000..a26f25aea708 --- /dev/null +++ b/packages/google-cloud-bigquery/samples/magics/query_params_scalars.py @@ -0,0 +1,38 @@ +# Copyright 2021 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import IPython + +from . import _helpers + + +def query_with_parameters(): + ip = IPython.get_ipython() + ip.extension_manager.load_extension("google.cloud.bigquery") + + sample = """ + # [START bigquery_jupyter_query_params_scalars] + %%bigquery --params {"corpus_name": "hamlet", "limit": 10} + SELECT word, SUM(word_count) as count + FROM `bigquery-public-data.samples.shakespeare` + WHERE corpus = @corpus_name + GROUP BY word + ORDER BY count DESC + LIMIT @limit + # [END bigquery_jupyter_query_params_scalars] + """ + result = ip.run_cell(_helpers.strip_region_tags(sample)) + result.raise_error() # Throws an exception if the cell failed. + df = ip.user_ns["_"] # Retrieves last returned object in notebook session + return df diff --git a/packages/google-cloud-bigquery/samples/magics/query_params_scalars_test.py b/packages/google-cloud-bigquery/samples/magics/query_params_scalars_test.py new file mode 100644 index 000000000000..9b4159667501 --- /dev/null +++ b/packages/google-cloud-bigquery/samples/magics/query_params_scalars_test.py @@ -0,0 +1,23 @@ +# Copyright 2021 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import pandas + +from . import query_params_scalars + + +def test_query_with_parameters(): + df = query_params_scalars.query_with_parameters() + assert isinstance(df, pandas.DataFrame) + assert len(df) == 10 diff --git a/packages/google-cloud-bigquery/samples/magics/query_test.py b/packages/google-cloud-bigquery/samples/magics/query_test.py new file mode 100644 index 000000000000..d20797908827 --- /dev/null +++ b/packages/google-cloud-bigquery/samples/magics/query_test.py @@ -0,0 +1,23 @@ +# Copyright 2021 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import pandas + +from . import query + + +def test_query(): + df = query.query() + assert isinstance(df, pandas.DataFrame) + assert len(df) == 3 diff --git a/packages/google-cloud-bigquery/samples/magics/requirements-test.txt b/packages/google-cloud-bigquery/samples/magics/requirements-test.txt new file mode 100644 index 000000000000..caa48813a911 --- /dev/null +++ b/packages/google-cloud-bigquery/samples/magics/requirements-test.txt @@ -0,0 +1,3 @@ +google-cloud-testutils==1.1.0 +pytest==6.2.5 +mock==4.0.3 diff --git a/packages/google-cloud-bigquery/samples/magics/requirements.txt b/packages/google-cloud-bigquery/samples/magics/requirements.txt new file mode 100644 index 000000000000..f9b9d023cce9 --- /dev/null +++ b/packages/google-cloud-bigquery/samples/magics/requirements.txt @@ -0,0 +1,12 @@ +google-cloud-bigquery==2.27.1 +google-cloud-bigquery-storage==2.9.0 +google-auth-oauthlib==0.4.6 +grpcio==1.41.0 +ipython==7.16.1; python_version < '3.7' +ipython==7.17.0; python_version >= '3.7' +matplotlib==3.3.4; python_version < '3.7' +matplotlib==3.4.1; python_version >= '3.7' +pandas==1.1.5; python_version < '3.7' +pandas==1.3.2; python_version >= '3.7' +pyarrow==5.0.0 +pytz==2021.1 From 44d7be700c75be32a56fab9c30e818823142c39f Mon Sep 17 00:00:00 2001 From: "gcf-owl-bot[bot]" <78513119+gcf-owl-bot[bot]@users.noreply.github.com> Date: Mon, 1 Nov 2021 09:17:36 -0500 Subject: [PATCH 1337/2016] chore: use gapic-generator-python 0.53.4 (#1037) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * chore: use gapic-generator-python 0.53.4 docs: list oneofs in docstring committer: busunkim96@ PiperOrigin-RevId: 406468269 Source-Link: https://github.com/googleapis/googleapis/commit/83d81b0c8fc22291a13398d6d77f02dc97a5b6f4 Source-Link: https://github.com/googleapis/googleapis-gen/commit/2ff001fbacb9e77e71d734de5f955c05fdae8526 Copy-Tag: eyJwIjoiLmdpdGh1Yi8uT3dsQm90LnlhbWwiLCJoIjoiMmZmMDAxZmJhY2I5ZTc3ZTcxZDczNGRlNWY5NTVjMDVmZGFlODUyNiJ9 * 🦉 Updates from OwlBot See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md Co-authored-by: Owl Bot --- .../google/cloud/bigquery_v2/types/model.py | 22 +++++++++++++++++++ .../cloud/bigquery_v2/types/standard_sql.py | 9 ++++++++ .../samples/magics/noxfile.py | 6 ++++- 3 files changed, 36 insertions(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery_v2/types/model.py b/packages/google-cloud-bigquery/google/cloud/bigquery_v2/types/model.py index 6e3ca0095b23..a56b214910f3 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery_v2/types/model.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery_v2/types/model.py @@ -560,14 +560,23 @@ class Cluster(proto.Message): class FeatureValue(proto.Message): r"""Representative value of a single feature within the cluster. + This message has `oneof`_ fields (mutually exclusive fields). + For each oneof, at most one member field can be set at the same time. + Setting any member of the oneof automatically clears all other + members. + + .. _oneof: https://proto-plus-python.readthedocs.io/en/stable/fields.html#oneofs-mutually-exclusive-fields + Attributes: feature_column (str): The feature column name. numerical_value (google.protobuf.wrappers_pb2.DoubleValue): The numerical feature value. This is the centroid value for this feature. + This field is a member of `oneof`_ ``value``. categorical_value (google.cloud.bigquery_v2.types.Model.ClusteringMetrics.Cluster.FeatureValue.CategoricalValue): The categorical feature value. + This field is a member of `oneof`_ ``value``. """ class CategoricalValue(proto.Message): @@ -784,23 +793,36 @@ class EvaluationMetrics(proto.Message): data was used during training. These are not present for imported models. + This message has `oneof`_ fields (mutually exclusive fields). + For each oneof, at most one member field can be set at the same time. + Setting any member of the oneof automatically clears all other + members. + + .. _oneof: https://proto-plus-python.readthedocs.io/en/stable/fields.html#oneofs-mutually-exclusive-fields + Attributes: regression_metrics (google.cloud.bigquery_v2.types.Model.RegressionMetrics): Populated for regression models and explicit feedback type matrix factorization models. + This field is a member of `oneof`_ ``metrics``. binary_classification_metrics (google.cloud.bigquery_v2.types.Model.BinaryClassificationMetrics): Populated for binary classification/classifier models. + This field is a member of `oneof`_ ``metrics``. multi_class_classification_metrics (google.cloud.bigquery_v2.types.Model.MultiClassClassificationMetrics): Populated for multi-class classification/classifier models. + This field is a member of `oneof`_ ``metrics``. clustering_metrics (google.cloud.bigquery_v2.types.Model.ClusteringMetrics): Populated for clustering models. + This field is a member of `oneof`_ ``metrics``. ranking_metrics (google.cloud.bigquery_v2.types.Model.RankingMetrics): Populated for implicit feedback type matrix factorization models. + This field is a member of `oneof`_ ``metrics``. arima_forecasting_metrics (google.cloud.bigquery_v2.types.Model.ArimaForecastingMetrics): Populated for ARIMA models. + This field is a member of `oneof`_ ``metrics``. """ regression_metrics = proto.Field( diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery_v2/types/standard_sql.py b/packages/google-cloud-bigquery/google/cloud/bigquery_v2/types/standard_sql.py index 69a221c3c733..d6c13363447d 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery_v2/types/standard_sql.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery_v2/types/standard_sql.py @@ -35,6 +35,13 @@ class StandardSqlDataType(proto.Message): type={type_kind="STRING"}}, {name="y", type={type_kind="ARRAY", array_element_type="DATE"}} ]}} + This message has `oneof`_ fields (mutually exclusive fields). + For each oneof, at most one member field can be set at the same time. + Setting any member of the oneof automatically clears all other + members. + + .. _oneof: https://proto-plus-python.readthedocs.io/en/stable/fields.html#oneofs-mutually-exclusive-fields + Attributes: type_kind (google.cloud.bigquery_v2.types.StandardSqlDataType.TypeKind): Required. The top level type of this field. @@ -42,9 +49,11 @@ class StandardSqlDataType(proto.Message): "INT64", "DATE", "ARRAY"). array_element_type (google.cloud.bigquery_v2.types.StandardSqlDataType): The type of the array's elements, if type_kind = "ARRAY". + This field is a member of `oneof`_ ``sub_type``. struct_type (google.cloud.bigquery_v2.types.StandardSqlStructType): The fields of this struct, in order, if type_kind = "STRUCT". + This field is a member of `oneof`_ ``sub_type``. """ class TypeKind(proto.Enum): diff --git a/packages/google-cloud-bigquery/samples/magics/noxfile.py b/packages/google-cloud-bigquery/samples/magics/noxfile.py index b008613f03ff..93a9122cc457 100644 --- a/packages/google-cloud-bigquery/samples/magics/noxfile.py +++ b/packages/google-cloud-bigquery/samples/magics/noxfile.py @@ -87,7 +87,7 @@ def get_pytest_env_vars() -> Dict[str, str]: # DO NOT EDIT - automatically generated. # All versions used to test samples. -ALL_VERSIONS = ["3.6", "3.7", "3.8", "3.9"] +ALL_VERSIONS = ["3.6", "3.7", "3.8", "3.9", "3.10"] # Any default versions that should be ignored. IGNORED_VERSIONS = TEST_CONFIG["ignored_versions"] @@ -98,6 +98,10 @@ def get_pytest_env_vars() -> Dict[str, str]: "True", "true", ) + +# Error if a python version is missing +nox.options.error_on_missing_interpreters = True + # # Style Checks # From bcde48c1f5eb17d2a8132ec6dec27503715101d6 Mon Sep 17 00:00:00 2001 From: Steffany Brown <30247553+steffnay@users.noreply.github.com> Date: Mon, 1 Nov 2021 08:48:12 -0700 Subject: [PATCH 1338/2016] feat: accept TableListItem where TableReference is accepted (#1016) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Thank you for opening a Pull Request! Before submitting your PR, there are a few things you can do to make sure it goes smoothly: - [ ] Make sure to open an issue as a [bug/issue](https://github.com/googleapis/python-bigquery/issues/new/choose) before writing your code! That way we can discuss the change, evaluate designs, and agree on the general idea - [ ] Ensure the tests and linter pass - [ ] Code coverage does not decrease (if any source code was changed) - [ ] Appropriate docs were updated (if necessary) 🦕 --- .../google/cloud/bigquery/client.py | 50 ++++++++++++------- .../tests/unit/test_client.py | 6 +-- 2 files changed, 34 insertions(+), 22 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py index 9cb6af8f0485..4bdd43e8ff49 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py @@ -806,13 +806,12 @@ def get_dataset( def get_iam_policy( self, - table: Union[Table, TableReference], + table: Union[Table, TableReference, TableListItem, str], requested_policy_version: int = 1, retry: retries.Retry = DEFAULT_RETRY, timeout: float = DEFAULT_TIMEOUT, ) -> Policy: - if not isinstance(table, (Table, TableReference)): - raise TypeError("table must be a Table or TableReference") + table = _table_arg_to_table_ref(table, default_project=self.project) if requested_policy_version != 1: raise ValueError("only IAM policy version 1 is supported") @@ -835,14 +834,13 @@ def get_iam_policy( def set_iam_policy( self, - table: Union[Table, TableReference], + table: Union[Table, TableReference, TableListItem, str], policy: Policy, updateMask: str = None, retry: retries.Retry = DEFAULT_RETRY, timeout: float = DEFAULT_TIMEOUT, ) -> Policy: - if not isinstance(table, (Table, TableReference)): - raise TypeError("table must be a Table or TableReference") + table = _table_arg_to_table_ref(table, default_project=self.project) if not isinstance(policy, (Policy)): raise TypeError("policy must be a Policy") @@ -869,13 +867,12 @@ def set_iam_policy( def test_iam_permissions( self, - table: Union[Table, TableReference], + table: Union[Table, TableReference, TableListItem, str], permissions: Sequence[str], retry: retries.Retry = DEFAULT_RETRY, timeout: float = DEFAULT_TIMEOUT, ) -> Dict[str, Any]: - if not isinstance(table, (Table, TableReference)): - raise TypeError("table must be a Table or TableReference") + table = _table_arg_to_table_ref(table, default_project=self.project) body = {"permissions": permissions} @@ -982,7 +979,7 @@ def get_routine( def get_table( self, - table: Union[Table, TableReference, str], + table: Union[Table, TableReference, TableListItem, str], retry: retries.Retry = DEFAULT_RETRY, timeout: float = DEFAULT_TIMEOUT, ) -> Table: @@ -992,6 +989,7 @@ def get_table( table (Union[ \ google.cloud.bigquery.table.Table, \ google.cloud.bigquery.table.TableReference, \ + google.cloud.bigquery.table.TableListItem, \ str, \ ]): A reference to the table to fetch from the BigQuery API. @@ -1757,7 +1755,7 @@ def delete_routine( def delete_table( self, - table: Union[Table, TableReference, str], + table: Union[Table, TableReference, TableListItem, str], retry: retries.Retry = DEFAULT_RETRY, timeout: float = DEFAULT_TIMEOUT, not_found_ok: bool = False, @@ -1771,6 +1769,7 @@ def delete_table( table (Union[ \ google.cloud.bigquery.table.Table, \ google.cloud.bigquery.table.TableReference, \ + google.cloud.bigquery.table.TableListItem, \ str, \ ]): A reference to the table to delete. If a string is passed in, @@ -2257,7 +2256,7 @@ def api_request(*args, **kwargs): def load_table_from_uri( self, source_uris: Union[str, Sequence[str]], - destination: Union[Table, TableReference, str], + destination: Union[Table, TableReference, TableListItem, str], job_id: str = None, job_id_prefix: str = None, location: str = None, @@ -2278,6 +2277,7 @@ def load_table_from_uri( destination (Union[ \ google.cloud.bigquery.table.Table, \ google.cloud.bigquery.table.TableReference, \ + google.cloud.bigquery.table.TableListItem, \ str, \ ]): Table into which data is to be loaded. If a string is passed @@ -2339,7 +2339,7 @@ def load_table_from_uri( def load_table_from_file( self, file_obj: BinaryIO, - destination: Union[Table, TableReference, str], + destination: Union[Table, TableReference, TableListItem, str], rewind: bool = False, size: int = None, num_retries: int = _DEFAULT_NUM_RETRIES, @@ -2360,6 +2360,7 @@ def load_table_from_file( destination (Union[ \ google.cloud.bigquery.table.Table, \ google.cloud.bigquery.table.TableReference, \ + google.cloud.bigquery.table.TableListItem, \ str, \ ]): Table into which data is to be loaded. If a string is passed @@ -2699,7 +2700,7 @@ def load_table_from_dataframe( def load_table_from_json( self, json_rows: Iterable[Dict[str, Any]], - destination: Union[Table, TableReference, str], + destination: Union[Table, TableReference, TableListItem, str], num_retries: int = _DEFAULT_NUM_RETRIES, job_id: str = None, job_id_prefix: str = None, @@ -2733,6 +2734,7 @@ def load_table_from_json( destination (Union[ \ google.cloud.bigquery.table.Table, \ google.cloud.bigquery.table.TableReference, \ + google.cloud.bigquery.table.TableListItem, \ str, \ ]): Table into which data is to be loaded. If a string is passed @@ -2980,9 +2982,13 @@ def _do_multipart_upload( def copy_table( self, sources: Union[ - Table, TableReference, str, Sequence[Union[Table, TableReference, str]] + Table, + TableReference, + TableListItem, + str, + Sequence[Union[Table, TableReference, TableListItem, str]], ], - destination: Union[Table, TableReference, str], + destination: Union[Table, TableReference, TableListItem, str], job_id: str = None, job_id_prefix: str = None, location: str = None, @@ -3000,11 +3006,13 @@ def copy_table( sources (Union[ \ google.cloud.bigquery.table.Table, \ google.cloud.bigquery.table.TableReference, \ + google.cloud.bigquery.table.TableListItem, \ str, \ Sequence[ \ Union[ \ google.cloud.bigquery.table.Table, \ google.cloud.bigquery.table.TableReference, \ + google.cloud.bigquery.table.TableListItem, \ str, \ ] \ ], \ @@ -3013,6 +3021,7 @@ def copy_table( destination (Union[ \ google.cloud.bigquery.table.Table, \ google.cloud.bigquery.table.TableReference, \ + google.cloud.bigquery.table.TableListItem, \ str, \ ]): Table into which data is to be copied. @@ -3084,7 +3093,7 @@ def copy_table( def extract_table( self, - source: Union[Table, TableReference, Model, ModelReference, str], + source: Union[Table, TableReference, TableListItem, Model, ModelReference, str], destination_uris: Union[str, Sequence[str]], job_id: str = None, job_id_prefix: str = None, @@ -3104,6 +3113,7 @@ def extract_table( source (Union[ \ google.cloud.bigquery.table.Table, \ google.cloud.bigquery.table.TableReference, \ + google.cloud.bigquery.table.TableListItem, \ google.cloud.bigquery.model.Model, \ google.cloud.bigquery.model.ModelReference, \ src, \ @@ -3465,7 +3475,7 @@ def insert_rows_from_dataframe( def insert_rows_json( self, - table: Union[Table, TableReference, str], + table: Union[Table, TableReference, TableListItem, str], json_rows: Sequence[Dict], row_ids: Union[Iterable[str], AutoRowIDs, None] = AutoRowIDs.GENERATE_UUID, skip_invalid_rows: bool = None, @@ -3483,6 +3493,7 @@ def insert_rows_json( table (Union[ \ google.cloud.bigquery.table.Table \ google.cloud.bigquery.table.TableReference, \ + google.cloud.bigquery.table.TableListItem, \ str \ ]): The destination table for the row data, or a reference to it. @@ -3605,7 +3616,7 @@ def insert_rows_json( def list_partitions( self, - table: Union[Table, TableReference, str], + table: Union[Table, TableReference, TableListItem, str], retry: retries.Retry = DEFAULT_RETRY, timeout: float = DEFAULT_TIMEOUT, ) -> Sequence[str]: @@ -3615,6 +3626,7 @@ def list_partitions( table (Union[ \ google.cloud.bigquery.table.Table, \ google.cloud.bigquery.table.TableReference, \ + google.cloud.bigquery.table.TableListItem, \ str, \ ]): The table or reference from which to get partition info diff --git a/packages/google-cloud-bigquery/tests/unit/test_client.py b/packages/google-cloud-bigquery/tests/unit/test_client.py index 48dacf7e2e7f..11b336728e78 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_client.py +++ b/packages/google-cloud-bigquery/tests/unit/test_client.py @@ -1554,7 +1554,7 @@ def test_get_iam_policy_w_invalid_table(self): self.PROJECT, self.DS_ID, self.TABLE_ID, ) - with self.assertRaises(TypeError): + with self.assertRaises(ValueError): client.get_iam_policy(table_resource_string) def test_get_iam_policy_w_invalid_version(self): @@ -1675,7 +1675,7 @@ def test_set_iam_policy_w_invalid_table(self): self.TABLE_ID, ) - with self.assertRaises(TypeError): + with self.assertRaises(ValueError): client.set_iam_policy(table_resource_string, policy) def test_test_iam_permissions(self): @@ -1717,7 +1717,7 @@ def test_test_iam_permissions_w_invalid_table(self): PERMISSIONS = ["bigquery.tables.get", "bigquery.tables.update"] - with self.assertRaises(TypeError): + with self.assertRaises(ValueError): client.test_iam_permissions(table_resource_string, PERMISSIONS) def test_update_dataset_w_invalid_field(self): From f7e88ded964b70c86d8ff5709a1aad7a1983f903 Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Wed, 3 Nov 2021 14:09:53 -0500 Subject: [PATCH 1339/2016] feat: support Python 3.10 (#1043) * feat: support Python 3.10 * fix pandas deps to match reality * run system tests with 3.10 * avoid geopandas on Python 3.10 * install google-cloud-bigquery from source * update kokoro configs * remove Python 2.7 config --- .../.kokoro/presubmit/prerelease-deps-3.8.cfg | 4 ++-- .../.kokoro/presubmit/snippets-3.10.cfg | 7 +++++++ .../{snippets-2.7.cfg => system-3.10.cfg} | 2 +- packages/google-cloud-bigquery/noxfile.py | 17 +++++++++++++---- .../samples/geography/noxfile_config.py | 7 ++++++- .../samples/geography/requirements.txt | 8 +++----- .../samples/magics/requirements.txt | 9 ++++----- .../samples/snippets/requirements.txt | 7 +++---- packages/google-cloud-bigquery/setup.py | 5 +++-- 9 files changed, 42 insertions(+), 24 deletions(-) create mode 100644 packages/google-cloud-bigquery/.kokoro/presubmit/snippets-3.10.cfg rename packages/google-cloud-bigquery/.kokoro/presubmit/{snippets-2.7.cfg => system-3.10.cfg} (82%) diff --git a/packages/google-cloud-bigquery/.kokoro/presubmit/prerelease-deps-3.8.cfg b/packages/google-cloud-bigquery/.kokoro/presubmit/prerelease-deps-3.8.cfg index f06806baf188..fabe3e347f7b 100644 --- a/packages/google-cloud-bigquery/.kokoro/presubmit/prerelease-deps-3.8.cfg +++ b/packages/google-cloud-bigquery/.kokoro/presubmit/prerelease-deps-3.8.cfg @@ -3,5 +3,5 @@ # Only run this nox session. env_vars: { key: "NOX_SESSION" - value: "prerelease_deps" -} \ No newline at end of file + value: "prerelease_deps-3.8" +} diff --git a/packages/google-cloud-bigquery/.kokoro/presubmit/snippets-3.10.cfg b/packages/google-cloud-bigquery/.kokoro/presubmit/snippets-3.10.cfg new file mode 100644 index 000000000000..dde182fb9286 --- /dev/null +++ b/packages/google-cloud-bigquery/.kokoro/presubmit/snippets-3.10.cfg @@ -0,0 +1,7 @@ +# Format: //devtools/kokoro/config/proto/build.proto + +# Only run this nox session. +env_vars: { + key: "NOX_SESSION" + value: "snippets-3.10" +} diff --git a/packages/google-cloud-bigquery/.kokoro/presubmit/snippets-2.7.cfg b/packages/google-cloud-bigquery/.kokoro/presubmit/system-3.10.cfg similarity index 82% rename from packages/google-cloud-bigquery/.kokoro/presubmit/snippets-2.7.cfg rename to packages/google-cloud-bigquery/.kokoro/presubmit/system-3.10.cfg index 3bd6134d2bb4..30956a3ab936 100644 --- a/packages/google-cloud-bigquery/.kokoro/presubmit/snippets-2.7.cfg +++ b/packages/google-cloud-bigquery/.kokoro/presubmit/system-3.10.cfg @@ -3,5 +3,5 @@ # Only run this nox session. env_vars: { key: "NOX_SESSION" - value: "snippets-2.7" + value: "system-3.10" } diff --git a/packages/google-cloud-bigquery/noxfile.py b/packages/google-cloud-bigquery/noxfile.py index 64eacaff5f0a..6f04940c9fed 100644 --- a/packages/google-cloud-bigquery/noxfile.py +++ b/packages/google-cloud-bigquery/noxfile.py @@ -27,8 +27,8 @@ BLACK_PATHS = ("docs", "google", "samples", "tests", "noxfile.py", "setup.py") DEFAULT_PYTHON_VERSION = "3.8" -SYSTEM_TEST_PYTHON_VERSIONS = ["3.8"] -UNIT_TEST_PYTHON_VERSIONS = ["3.6", "3.7", "3.8", "3.9"] +SYSTEM_TEST_PYTHON_VERSIONS = ["3.8", "3.10"] +UNIT_TEST_PYTHON_VERSIONS = ["3.6", "3.7", "3.8", "3.9", "3.10"] CURRENT_DIRECTORY = pathlib.Path(__file__).parent.absolute() # 'docfx' is excluded since it only needs to run in 'docs-presubmit' @@ -69,7 +69,12 @@ def default(session, install_extras=True): constraints_path, ) - install_target = ".[all]" if install_extras else "." + if install_extras and session.python == "3.10": + install_target = ".[bqstorage,pandas,tqdm,opentelemetry]" + elif install_extras: + install_target = ".[all]" + else: + install_target = "." session.install("-e", install_target, "-c", constraints_path) session.install("ipython", "-c", constraints_path) @@ -153,7 +158,11 @@ def system(session): # Data Catalog needed for the column ACL test with a real Policy Tag. session.install("google-cloud-datacatalog", "-c", constraints_path) - session.install("-e", ".[all]", "-c", constraints_path) + if session.python == "3.10": + extras = "[bqstorage,pandas,tqdm,opentelemetry]" + else: + extras = "[all]" + session.install("-e", f".{extras}", "-c", constraints_path) session.install("ipython", "-c", constraints_path) # Run py.test against the system tests. diff --git a/packages/google-cloud-bigquery/samples/geography/noxfile_config.py b/packages/google-cloud-bigquery/samples/geography/noxfile_config.py index 7d2e02346071..315bd5be8cd9 100644 --- a/packages/google-cloud-bigquery/samples/geography/noxfile_config.py +++ b/packages/google-cloud-bigquery/samples/geography/noxfile_config.py @@ -22,7 +22,12 @@ TEST_CONFIG_OVERRIDE = { # You can opt out from the test for specific Python versions. - "ignored_versions": ["2.7"], + "ignored_versions": [ + "2.7", + # TODO: Enable 3.10 once there is a geopandas/fiona release. + # https://github.com/Toblerity/Fiona/issues/1043 + "3.10", + ], # An envvar key for determining the project id to use. Change it # to 'BUILD_SPECIFIC_GCLOUD_PROJECT' if you want to opt in using a # build specific Cloud project. You can also use your own string diff --git a/packages/google-cloud-bigquery/samples/geography/requirements.txt b/packages/google-cloud-bigquery/samples/geography/requirements.txt index ecd428ab95e5..e2de866731ff 100644 --- a/packages/google-cloud-bigquery/samples/geography/requirements.txt +++ b/packages/google-cloud-bigquery/samples/geography/requirements.txt @@ -24,14 +24,12 @@ importlib-metadata==4.8.1 libcst==0.3.21 munch==2.5.0 mypy-extensions==0.4.3 -numpy==1.19.5; python_version < "3.7" -numpy==1.21.2; python_version > "3.6" packaging==21.0 pandas==1.1.5; python_version < '3.7' -pandas==1.3.2; python_version >= '3.7' +pandas==1.3.4; python_version >= '3.7' proto-plus==1.19.2 protobuf==3.18.0 -pyarrow==5.0.0 +pyarrow==6.0.0 pyasn1==0.4.8 pyasn1-modules==0.2.8 pycparser==2.20 @@ -43,7 +41,7 @@ pytz==2021.1 PyYAML==5.4.1 requests==2.26.0 rsa==4.7.2 -Shapely==1.7.1 +Shapely==1.8.0 six==1.16.0 typing-extensions==3.10.0.2 typing-inspect==0.7.1 diff --git a/packages/google-cloud-bigquery/samples/magics/requirements.txt b/packages/google-cloud-bigquery/samples/magics/requirements.txt index f9b9d023cce9..5cc7ec33f8b4 100644 --- a/packages/google-cloud-bigquery/samples/magics/requirements.txt +++ b/packages/google-cloud-bigquery/samples/magics/requirements.txt @@ -1,12 +1,11 @@ -google-cloud-bigquery==2.27.1 google-cloud-bigquery-storage==2.9.0 google-auth-oauthlib==0.4.6 grpcio==1.41.0 ipython==7.16.1; python_version < '3.7' -ipython==7.17.0; python_version >= '3.7' +ipython==7.29.0; python_version >= '3.7' matplotlib==3.3.4; python_version < '3.7' -matplotlib==3.4.1; python_version >= '3.7' +matplotlib==3.5.0rc1; python_version >= '3.7' pandas==1.1.5; python_version < '3.7' -pandas==1.3.2; python_version >= '3.7' -pyarrow==5.0.0 +pandas==1.3.4; python_version >= '3.7' +pyarrow==6.0.0 pytz==2021.1 diff --git a/packages/google-cloud-bigquery/samples/snippets/requirements.txt b/packages/google-cloud-bigquery/samples/snippets/requirements.txt index f9b9d023cce9..f795523929ee 100644 --- a/packages/google-cloud-bigquery/samples/snippets/requirements.txt +++ b/packages/google-cloud-bigquery/samples/snippets/requirements.txt @@ -1,12 +1,11 @@ -google-cloud-bigquery==2.27.1 google-cloud-bigquery-storage==2.9.0 google-auth-oauthlib==0.4.6 grpcio==1.41.0 ipython==7.16.1; python_version < '3.7' -ipython==7.17.0; python_version >= '3.7' +ipython==7.29.0; python_version >= '3.7' matplotlib==3.3.4; python_version < '3.7' matplotlib==3.4.1; python_version >= '3.7' pandas==1.1.5; python_version < '3.7' -pandas==1.3.2; python_version >= '3.7' -pyarrow==5.0.0 +pandas==1.3.4; python_version >= '3.7' +pyarrow==6.0.0 pytz==2021.1 diff --git a/packages/google-cloud-bigquery/setup.py b/packages/google-cloud-bigquery/setup.py index 95dad190a42e..db69c45b14ee 100644 --- a/packages/google-cloud-bigquery/setup.py +++ b/packages/google-cloud-bigquery/setup.py @@ -59,7 +59,7 @@ ] + pyarrow_dep, "geopandas": ["geopandas>=0.9.0, <1.0dev", "Shapely>=1.6.0, <2.0dev"], - "pandas": ["pandas>=0.23.0"] + pyarrow_dep, + "pandas": ["pandas>=0.24.2"] + pyarrow_dep, "bignumeric_type": pyarrow_dep, "tqdm": ["tqdm >= 4.7.4, <5.0.0dev"], "opentelemetry": [ @@ -127,6 +127,7 @@ "Programming Language :: Python :: 3.7", "Programming Language :: Python :: 3.8", "Programming Language :: Python :: 3.9", + "Programming Language :: Python :: 3.10", "Operating System :: OS Independent", "Topic :: Internet", ], @@ -135,7 +136,7 @@ namespace_packages=namespaces, install_requires=dependencies, extras_require=extras, - python_requires=">=3.6, <3.10", + python_requires=">=3.6, <3.11", include_package_data=True, zip_safe=False, ) From 79455d2395a218ce0ec22e2db15501c1ab37c31e Mon Sep 17 00:00:00 2001 From: "release-please[bot]" <55107282+release-please[bot]@users.noreply.github.com> Date: Wed, 3 Nov 2021 19:34:15 +0000 Subject: [PATCH 1340/2016] chore: release 2.30.0 (#1039) :robot: I have created a release \*beep\* \*boop\* --- ## [2.30.0](https://www.github.com/googleapis/python-bigquery/compare/v2.29.0...v2.30.0) (2021-11-03) ### Features * accept TableListItem where TableReference is accepted ([#1016](https://www.github.com/googleapis/python-bigquery/issues/1016)) ([fe16adc](https://www.github.com/googleapis/python-bigquery/commit/fe16adc86a170d0992c32091b349b036f8b43884)) * support Python 3.10 ([#1043](https://www.github.com/googleapis/python-bigquery/issues/1043)) ([5bbb832](https://www.github.com/googleapis/python-bigquery/commit/5bbb832a83ebb66db4b5ee740cdfc53f4df8430b)) ### Documentation * add code samples for Jupyter/IPython magics ([#1013](https://www.github.com/googleapis/python-bigquery/issues/1013)) ([61141ee](https://www.github.com/googleapis/python-bigquery/commit/61141ee0634024ad261d1595c95cd14a896fb87e)) * **samples:** add create external table with hive partitioning ([#1033](https://www.github.com/googleapis/python-bigquery/issues/1033)) ([d64f5b6](https://www.github.com/googleapis/python-bigquery/commit/d64f5b682854a2293244426316890df4ab1e079e)) --- This PR was generated with [Release Please](https://github.com/googleapis/release-please). See [documentation](https://github.com/googleapis/release-please#release-please). --- packages/google-cloud-bigquery/CHANGELOG.md | 14 ++++++++++++++ .../google/cloud/bigquery/version.py | 2 +- 2 files changed, 15 insertions(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/CHANGELOG.md b/packages/google-cloud-bigquery/CHANGELOG.md index 0d45d501d8c7..b3bbee86a93e 100644 --- a/packages/google-cloud-bigquery/CHANGELOG.md +++ b/packages/google-cloud-bigquery/CHANGELOG.md @@ -5,6 +5,20 @@ [1]: https://pypi.org/project/google-cloud-bigquery/#history +## [2.30.0](https://www.github.com/googleapis/python-bigquery/compare/v2.29.0...v2.30.0) (2021-11-03) + + +### Features + +* accept TableListItem where TableReference is accepted ([#1016](https://www.github.com/googleapis/python-bigquery/issues/1016)) ([fe16adc](https://www.github.com/googleapis/python-bigquery/commit/fe16adc86a170d0992c32091b349b036f8b43884)) +* support Python 3.10 ([#1043](https://www.github.com/googleapis/python-bigquery/issues/1043)) ([5bbb832](https://www.github.com/googleapis/python-bigquery/commit/5bbb832a83ebb66db4b5ee740cdfc53f4df8430b)) + + +### Documentation + +* add code samples for Jupyter/IPython magics ([#1013](https://www.github.com/googleapis/python-bigquery/issues/1013)) ([61141ee](https://www.github.com/googleapis/python-bigquery/commit/61141ee0634024ad261d1595c95cd14a896fb87e)) +* **samples:** add create external table with hive partitioning ([#1033](https://www.github.com/googleapis/python-bigquery/issues/1033)) ([d64f5b6](https://www.github.com/googleapis/python-bigquery/commit/d64f5b682854a2293244426316890df4ab1e079e)) + ## [2.29.0](https://www.github.com/googleapis/python-bigquery/compare/v2.28.1...v2.29.0) (2021-10-27) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/version.py b/packages/google-cloud-bigquery/google/cloud/bigquery/version.py index c8ba30be05f2..e89661993908 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/version.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/version.py @@ -12,4 +12,4 @@ # See the License for the specific language governing permissions and # limitations under the License. -__version__ = "2.29.0" +__version__ = "2.30.0" From b815c5b2f20a848e3032bb3bbe26b573fbe90bd8 Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Thu, 4 Nov 2021 07:02:13 -0500 Subject: [PATCH 1341/2016] docs: show gcloud command to authorize against sheets (#1045) Fixes https://github.com/googleapis/python-bigquery/issues/805 --- .../samples/query_external_sheets_permanent_table.py | 6 ++++++ .../samples/query_external_sheets_temporary_table.py | 8 +++++++- 2 files changed, 13 insertions(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/samples/query_external_sheets_permanent_table.py b/packages/google-cloud-bigquery/samples/query_external_sheets_permanent_table.py index 915e9acc303a..31143d1b0816 100644 --- a/packages/google-cloud-bigquery/samples/query_external_sheets_permanent_table.py +++ b/packages/google-cloud-bigquery/samples/query_external_sheets_permanent_table.py @@ -21,6 +21,12 @@ def query_external_sheets_permanent_table(dataset_id): # Create credentials with Drive & BigQuery API scopes. # Both APIs must be enabled for your project before running this code. + # + # If you are using credentials from gcloud, you must authorize the + # application first with the following command: + # + # gcloud auth application-default login \ + # --scopes=https://www.googleapis.com/auth/drive,https://www.googleapis.com/auth/cloud-platform credentials, project = google.auth.default( scopes=[ "https://www.googleapis.com/auth/drive", diff --git a/packages/google-cloud-bigquery/samples/query_external_sheets_temporary_table.py b/packages/google-cloud-bigquery/samples/query_external_sheets_temporary_table.py index 1b70e9531f96..a9d58e388db8 100644 --- a/packages/google-cloud-bigquery/samples/query_external_sheets_temporary_table.py +++ b/packages/google-cloud-bigquery/samples/query_external_sheets_temporary_table.py @@ -22,10 +22,16 @@ def query_external_sheets_temporary_table(): # Create credentials with Drive & BigQuery API scopes. # Both APIs must be enabled for your project before running this code. + # + # If you are using credentials from gcloud, you must authorize the + # application first with the following command: + # + # gcloud auth application-default login \ + # --scopes=https://www.googleapis.com/auth/drive,https://www.googleapis.com/auth/cloud-platform credentials, project = google.auth.default( scopes=[ "https://www.googleapis.com/auth/drive", - "https://www.googleapis.com/auth/bigquery", + "https://www.googleapis.com/auth/cloud-platform", ] ) From 4ed42b61b8298c5aee8dd3dd068c4e6c17a88e3b Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Thu, 4 Nov 2021 08:16:20 -0500 Subject: [PATCH 1342/2016] test: exclude geopandas from snippets-3.10 session (#1044) --- packages/google-cloud-bigquery/noxfile.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/noxfile.py b/packages/google-cloud-bigquery/noxfile.py index 6f04940c9fed..1879a5cd81f6 100644 --- a/packages/google-cloud-bigquery/noxfile.py +++ b/packages/google-cloud-bigquery/noxfile.py @@ -186,7 +186,11 @@ def snippets(session): session.install("google-cloud-storage", "-c", constraints_path) session.install("grpcio", "-c", constraints_path) - session.install("-e", ".[all]", "-c", constraints_path) + if session.python == "3.10": + extras = "[bqstorage,pandas,tqdm,opentelemetry]" + else: + extras = "[all]" + session.install("-e", f".{extras}", "-c", constraints_path) # Run py.test against the snippets tests. # Skip tests in samples/snippets, as those are run in a different session From 0651039337749d2446c317cdfd67218397376b97 Mon Sep 17 00:00:00 2001 From: Peter Lamut Date: Thu, 4 Nov 2021 18:55:15 +0100 Subject: [PATCH 1343/2016] fix: error if eval()-ing repr(SchemaField) (#1046) * fix: error if eval()-ing repr(SchemaField) * Make repr(PolicyTagList) evaluable * Fix SchemaField repr with policy tags The repr() should be evaluable. Co-authored-by: Tres Seaver --- .../google/cloud/bigquery/schema.py | 16 +++--- .../tests/unit/test_schema.py | 51 ++++++++++++++++++- 2 files changed, 60 insertions(+), 7 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/schema.py b/packages/google-cloud-bigquery/google/cloud/bigquery/schema.py index 5bad522732c3..22594223489b 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/schema.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/schema.py @@ -268,7 +268,7 @@ def _key(self): field_type = f"{field_type}({self.precision})" policy_tags = ( - () if self.policy_tags is None else tuple(sorted(self.policy_tags.names)) + None if self.policy_tags is None else tuple(sorted(self.policy_tags.names)) ) return ( @@ -336,7 +336,11 @@ def __hash__(self): return hash(self._key()) def __repr__(self): - return "SchemaField{}".format(self._key()) + key = self._key() + policy_tags = key[-1] + policy_tags_inst = None if policy_tags is None else PolicyTagList(policy_tags) + adjusted_key = key[:-1] + (policy_tags_inst,) + return f"{self.__class__.__name__}{adjusted_key}" def _parse_schema_resource(info): @@ -407,7 +411,7 @@ class PolicyTagList(object): `projects/*/locations/*/taxonomies/*/policyTags/*`. """ - def __init__(self, names=()): + def __init__(self, names: Iterable[str] = ()): self._properties = {} self._properties["names"] = tuple(names) @@ -425,7 +429,7 @@ def _key(self): Returns: Tuple: The contents of this :class:`~google.cloud.bigquery.schema.PolicyTagList`. """ - return tuple(sorted(self._properties.items())) + return tuple(sorted(self._properties.get("names", ()))) def __eq__(self, other): if not isinstance(other, PolicyTagList): @@ -439,7 +443,7 @@ def __hash__(self): return hash(self._key()) def __repr__(self): - return "PolicyTagList{}".format(self._key()) + return f"{self.__class__.__name__}(names={self._key()})" @classmethod def from_api_repr(cls, api_repr: dict) -> "PolicyTagList": @@ -478,5 +482,5 @@ def to_api_repr(self) -> dict: A dictionary representing the PolicyTagList object in serialized form. """ - answer = {"names": [name for name in self.names]} + answer = {"names": list(self.names)} return answer diff --git a/packages/google-cloud-bigquery/tests/unit/test_schema.py b/packages/google-cloud-bigquery/tests/unit/test_schema.py index 2180e1f6e276..03ff837c0565 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_schema.py +++ b/packages/google-cloud-bigquery/tests/unit/test_schema.py @@ -510,9 +510,30 @@ def test___hash__not_equals(self): def test___repr__(self): field1 = self._make_one("field1", "STRING") - expected = "SchemaField('field1', 'STRING', 'NULLABLE', None, (), ())" + expected = "SchemaField('field1', 'STRING', 'NULLABLE', None, (), None)" self.assertEqual(repr(field1), expected) + def test___repr__evaluable_no_policy_tags(self): + field = self._make_one("field1", "STRING", "REQUIRED", "Description") + field_repr = repr(field) + SchemaField = self._get_target_class() # needed for eval # noqa + + evaled_field = eval(field_repr) + + assert field == evaled_field + + def test___repr__evaluable_with_policy_tags(self): + policy_tags = PolicyTagList(names=["foo", "bar"]) + field = self._make_one( + "field1", "STRING", "REQUIRED", "Description", policy_tags=policy_tags, + ) + field_repr = repr(field) + SchemaField = self._get_target_class() # needed for eval # noqa + + evaled_field = eval(field_repr) + + assert field == evaled_field + # TODO: dedup with the same class in test_table.py. class _SchemaBase(object): @@ -786,6 +807,34 @@ def test___hash__not_equals(self): set_two = {policy2} self.assertNotEqual(set_one, set_two) + def test___repr__no_tags(self): + policy = self._make_one() + assert repr(policy) == "PolicyTagList(names=())" + + def test___repr__with_tags(self): + policy1 = self._make_one(["foo", "bar", "baz"]) + policy2 = self._make_one(["baz", "bar", "foo"]) + expected_repr = "PolicyTagList(names=('bar', 'baz', 'foo'))" # alphabetical + + assert repr(policy1) == expected_repr + assert repr(policy2) == expected_repr + + def test___repr__evaluable_no_tags(self): + policy = self._make_one(names=[]) + policy_repr = repr(policy) + + evaled_policy = eval(policy_repr) + + assert policy == evaled_policy + + def test___repr__evaluable_with_tags(self): + policy = self._make_one(names=["foo", "bar"]) + policy_repr = repr(policy) + + evaled_policy = eval(policy_repr) + + assert policy == evaled_policy + @pytest.mark.parametrize( "api,expect,key2", From 4276fc9bc8c4ea6ddc4721fc550b0c9a3a63ddee Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Thu, 4 Nov 2021 13:20:40 -0500 Subject: [PATCH 1344/2016] docs: use stable URL for pandas intersphinx links (#1048) --- packages/google-cloud-bigquery/docs/conf.py | 2 +- packages/google-cloud-bigquery/owlbot.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/packages/google-cloud-bigquery/docs/conf.py b/packages/google-cloud-bigquery/docs/conf.py index 0784da0b2a78..bb16445ca5ae 100644 --- a/packages/google-cloud-bigquery/docs/conf.py +++ b/packages/google-cloud-bigquery/docs/conf.py @@ -368,7 +368,7 @@ "protobuf": ("https://googleapis.dev/python/protobuf/latest/", None), "dateutil": ("https://dateutil.readthedocs.io/en/latest/", None), "geopandas": ("https://geopandas.org/", None), - "pandas": ("https://pandas.pydata.org/pandas-docs/dev", None), + "pandas": ("https://pandas.pydata.org/pandas-docs/stable/", None), } diff --git a/packages/google-cloud-bigquery/owlbot.py b/packages/google-cloud-bigquery/owlbot.py index f2f8bea5413e..e6f36905b94f 100644 --- a/packages/google-cloud-bigquery/owlbot.py +++ b/packages/google-cloud-bigquery/owlbot.py @@ -100,7 +100,7 @@ intersphinx_dependencies={ "dateutil": "https://dateutil.readthedocs.io/en/latest/", "geopandas": "https://geopandas.org/", - "pandas": "https://pandas.pydata.org/pandas-docs/dev", + "pandas": "https://pandas.pydata.org/pandas-docs/stable/", }, ) From e8f39fc6c4454290d862ad6393b83f74a46bc46b Mon Sep 17 00:00:00 2001 From: "release-please[bot]" <55107282+release-please[bot]@users.noreply.github.com> Date: Thu, 4 Nov 2021 14:54:48 -0500 Subject: [PATCH 1345/2016] chore: release 2.30.1 (#1047) Co-authored-by: release-please[bot] <55107282+release-please[bot]@users.noreply.github.com> --- packages/google-cloud-bigquery/CHANGELOG.md | 13 +++++++++++++ .../google/cloud/bigquery/version.py | 2 +- 2 files changed, 14 insertions(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/CHANGELOG.md b/packages/google-cloud-bigquery/CHANGELOG.md index b3bbee86a93e..4e10ad82670c 100644 --- a/packages/google-cloud-bigquery/CHANGELOG.md +++ b/packages/google-cloud-bigquery/CHANGELOG.md @@ -5,6 +5,19 @@ [1]: https://pypi.org/project/google-cloud-bigquery/#history +### [2.30.1](https://www.github.com/googleapis/python-bigquery/compare/v2.30.0...v2.30.1) (2021-11-04) + + +### Bug Fixes + +* error if eval()-ing repr(SchemaField) ([#1046](https://www.github.com/googleapis/python-bigquery/issues/1046)) ([13ac860](https://www.github.com/googleapis/python-bigquery/commit/13ac860de689ea13b35932c67042bc35e388cb30)) + + +### Documentation + +* show gcloud command to authorize against sheets ([#1045](https://www.github.com/googleapis/python-bigquery/issues/1045)) ([20c9024](https://www.github.com/googleapis/python-bigquery/commit/20c9024b5760f7ae41301f4da54568496922cbe2)) +* use stable URL for pandas intersphinx links ([#1048](https://www.github.com/googleapis/python-bigquery/issues/1048)) ([73312f8](https://www.github.com/googleapis/python-bigquery/commit/73312f8f0f22ff9175a4f5f7db9bb438a496c164)) + ## [2.30.0](https://www.github.com/googleapis/python-bigquery/compare/v2.29.0...v2.30.0) (2021-11-03) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/version.py b/packages/google-cloud-bigquery/google/cloud/bigquery/version.py index e89661993908..877ea53d83f3 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/version.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/version.py @@ -12,4 +12,4 @@ # See the License for the specific language governing permissions and # limitations under the License. -__version__ = "2.30.0" +__version__ = "2.30.1" From f054d1b6072c24d19abb4d7fcc676ac203289c2d Mon Sep 17 00:00:00 2001 From: Peter Lamut Date: Mon, 8 Nov 2021 17:21:24 +0100 Subject: [PATCH 1346/2016] deps: support OpenTelemetry >= 1.1.0 (#1050) --- .../cloud/bigquery/opentelemetry_tracing.py | 11 ++++--- packages/google-cloud-bigquery/setup.py | 6 ++-- .../testing/constraints-3.6.txt | 6 ++-- .../tests/unit/test_client.py | 31 +++++++++++++------ .../tests/unit/test_opentelemetry_tracing.py | 21 ++++++++----- 5 files changed, 47 insertions(+), 28 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/opentelemetry_tracing.py b/packages/google-cloud-bigquery/google/cloud/bigquery/opentelemetry_tracing.py index 57f258ac4db8..b1a1027d2a64 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/opentelemetry_tracing.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/opentelemetry_tracing.py @@ -19,7 +19,7 @@ logger = logging.getLogger(__name__) try: from opentelemetry import trace - from opentelemetry.instrumentation.utils import http_status_to_canonical_code + from opentelemetry.instrumentation.utils import http_status_to_status_code from opentelemetry.trace.status import Status HAS_OPENTELEMETRY = True @@ -65,9 +65,10 @@ def create_span(name, attributes=None, client=None, job_ref=None): if not _warned_telemetry: logger.debug( "This service is instrumented using OpenTelemetry. " - "OpenTelemetry could not be imported; please " - "add opentelemetry-api and opentelemetry-instrumentation " - "packages in order to get BigQuery Tracing data." + "OpenTelemetry or one of its components could not be imported; " + "please add compatible versions of opentelemetry-api and " + "opentelemetry-instrumentation packages in order to get BigQuery " + "Tracing data." ) _warned_telemetry = True @@ -81,7 +82,7 @@ def create_span(name, attributes=None, client=None, job_ref=None): yield span except GoogleAPICallError as error: if error.code is not None: - span.set_status(Status(http_status_to_canonical_code(error.code))) + span.set_status(Status(http_status_to_status_code(error.code))) raise diff --git a/packages/google-cloud-bigquery/setup.py b/packages/google-cloud-bigquery/setup.py index db69c45b14ee..5c0b80f7c3a2 100644 --- a/packages/google-cloud-bigquery/setup.py +++ b/packages/google-cloud-bigquery/setup.py @@ -63,9 +63,9 @@ "bignumeric_type": pyarrow_dep, "tqdm": ["tqdm >= 4.7.4, <5.0.0dev"], "opentelemetry": [ - "opentelemetry-api >= 0.11b0", - "opentelemetry-sdk >= 0.11b0", - "opentelemetry-instrumentation >= 0.11b0", + "opentelemetry-api >= 1.1.0", + "opentelemetry-sdk >= 1.1.0", + "opentelemetry-instrumentation >= 0.20b0", ], } diff --git a/packages/google-cloud-bigquery/testing/constraints-3.6.txt b/packages/google-cloud-bigquery/testing/constraints-3.6.txt index 59913d588332..f967077bc621 100644 --- a/packages/google-cloud-bigquery/testing/constraints-3.6.txt +++ b/packages/google-cloud-bigquery/testing/constraints-3.6.txt @@ -11,9 +11,9 @@ google-cloud-bigquery-storage==2.0.0 google-cloud-core==1.4.1 google-resumable-media==0.6.0 grpcio==1.38.1 -opentelemetry-api==0.11b0 -opentelemetry-instrumentation==0.11b0 -opentelemetry-sdk==0.11b0 +opentelemetry-api==1.1.0 +opentelemetry-instrumentation==0.20b0 +opentelemetry-sdk==1.1.0 pandas==0.24.2 proto-plus==1.10.0 protobuf==3.12.0 diff --git a/packages/google-cloud-bigquery/tests/unit/test_client.py b/packages/google-cloud-bigquery/tests/unit/test_client.py index 11b336728e78..97aa2eedbf22 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_client.py +++ b/packages/google-cloud-bigquery/tests/unit/test_client.py @@ -36,16 +36,24 @@ import pandas except (ImportError, AttributeError): # pragma: NO COVER pandas = None + try: import opentelemetry - from opentelemetry import trace - from opentelemetry.sdk.trace import TracerProvider - from opentelemetry.sdk.trace.export import SimpleExportSpanProcessor - from opentelemetry.sdk.trace.export.in_memory_span_exporter import ( - InMemorySpanExporter, - ) -except (ImportError, AttributeError): # pragma: NO COVER +except ImportError: opentelemetry = None + +if opentelemetry is not None: + try: + from opentelemetry import trace + from opentelemetry.sdk.trace import TracerProvider + from opentelemetry.sdk.trace.export import SimpleSpanProcessor + from opentelemetry.sdk.trace.export.in_memory_span_exporter import ( + InMemorySpanExporter, + ) + except (ImportError, AttributeError) as exc: # pragma: NO COVER + msg = "Error importing from opentelemetry, is the installed version compatible?" + raise ImportError(msg) from exc + try: import pyarrow except (ImportError, AttributeError): # pragma: NO COVER @@ -784,9 +792,12 @@ def test_span_status_is_set(self): tracer_provider = TracerProvider() memory_exporter = InMemorySpanExporter() - span_processor = SimpleExportSpanProcessor(memory_exporter) + span_processor = SimpleSpanProcessor(memory_exporter) tracer_provider.add_span_processor(span_processor) - trace.set_tracer_provider(tracer_provider) + + # OpenTelemetry API >= 0.12b0 does not allow overriding the tracer once + # initialized, thus directly override the internal global var. + tracer_patcher = mock.patch.object(trace, "_TRACER_PROVIDER", tracer_provider) creds = _make_credentials() client = self._make_one(project=self.PROJECT, credentials=creds) @@ -797,7 +808,7 @@ def test_span_status_is_set(self): full_routine_id = "test-routine-project.test_routines.minimal_routine" routine = Routine(full_routine_id) - with pytest.raises(google.api_core.exceptions.AlreadyExists): + with pytest.raises(google.api_core.exceptions.AlreadyExists), tracer_patcher: client.create_routine(routine) span_list = memory_exporter.get_finished_spans() diff --git a/packages/google-cloud-bigquery/tests/unit/test_opentelemetry_tracing.py b/packages/google-cloud-bigquery/tests/unit/test_opentelemetry_tracing.py index 726e3cf6f2d6..cc1ca79039e7 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_opentelemetry_tracing.py +++ b/packages/google-cloud-bigquery/tests/unit/test_opentelemetry_tracing.py @@ -20,14 +20,21 @@ try: import opentelemetry - from opentelemetry import trace - from opentelemetry.sdk.trace import TracerProvider - from opentelemetry.sdk.trace.export import SimpleExportSpanProcessor - from opentelemetry.sdk.trace.export.in_memory_span_exporter import ( - InMemorySpanExporter, - ) except ImportError: # pragma: NO COVER opentelemetry = None + +if opentelemetry is not None: + try: + from opentelemetry import trace + from opentelemetry.sdk.trace import TracerProvider + from opentelemetry.sdk.trace.export import SimpleSpanProcessor + from opentelemetry.sdk.trace.export.in_memory_span_exporter import ( + InMemorySpanExporter, + ) + except (ImportError, AttributeError) as exc: # pragma: NO COVER + msg = "Error importing from opentelemetry, is the installed version compatible?" + raise ImportError(msg) from exc + import pytest from google.cloud.bigquery import opentelemetry_tracing @@ -42,7 +49,7 @@ def setup(): importlib.reload(opentelemetry_tracing) tracer_provider = TracerProvider() memory_exporter = InMemorySpanExporter() - span_processor = SimpleExportSpanProcessor(memory_exporter) + span_processor = SimpleSpanProcessor(memory_exporter) tracer_provider.add_span_processor(span_processor) trace.set_tracer_provider(tracer_provider) yield memory_exporter From 59d99fb0bd973d9cc29f53b87d78ee9ec96357c4 Mon Sep 17 00:00:00 2001 From: Peter Lamut Date: Mon, 8 Nov 2021 22:47:37 +0100 Subject: [PATCH 1347/2016] process: make mypy happy with type annotations (#1036) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * process: add mypy types check to nox sessions * Ignore type errors for not annotated modules Several dependencies lack type annotations, or they don't advertise themselves as type-annotated. We do not want `mypy` to complain about these. * Fix mypy complaints (batch 1) * Fix mypy complaints (batch 2) * Fix mypy complaints (batch 3) * Fix mypy false positive errors * Simplify external config options instantiation * Do not ignore api-core in type checks More recent releases of google-api-core have typing enabled. * Remove unneeded __hash__ = None lines * Use an alias for timeout type in client.py * Fix PathLike subscription error in pre-Python 3.9 * Fix a typo in docstring Co-authored-by: Tim Swast * Add mypy to the list of nox sessions to run * Fix opentelemetry type error The Opentelemetry APi has changed from the minimum version the BigQuery client currently uses, we thus need to bound the maximum Opentelemetry version. In addition, that maximum version does not yet support type checks, thus it is ignored. * 🦉 Updates from OwlBot See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md * Exclude type-checking code from coverage * Fix patching opentelemetry tracer pvoider * Adjust get_job() return type, ignore opentelemetry Co-authored-by: Tim Swast Co-authored-by: Owl Bot --- .../google/cloud/__init__.py | 2 +- .../google/cloud/bigquery/_helpers.py | 4 +- .../google/cloud/bigquery/_http.py | 2 +- .../google/cloud/bigquery/_pandas_helpers.py | 8 +- .../google/cloud/bigquery/_tqdm_helpers.py | 2 +- .../google/cloud/bigquery/client.py | 211 +++++++++--------- .../google/cloud/bigquery/dataset.py | 2 +- .../google/cloud/bigquery/dbapi/_helpers.py | 16 +- .../google/cloud/bigquery/dbapi/cursor.py | 2 +- .../google/cloud/bigquery/external_config.py | 30 ++- .../google/cloud/bigquery/job/base.py | 4 +- .../google/cloud/bigquery/job/query.py | 31 ++- .../bigquery/magics/line_arg_parser/lexer.py | 2 +- .../google/cloud/bigquery/magics/magics.py | 8 +- .../google/cloud/bigquery/model.py | 4 +- .../cloud/bigquery/opentelemetry_tracing.py | 2 +- .../google/cloud/bigquery/query.py | 8 +- .../google/cloud/bigquery/retry.py | 2 +- .../google/cloud/bigquery/routine/routine.py | 2 +- .../google/cloud/bigquery/schema.py | 4 +- .../google/cloud/bigquery/table.py | 28 +-- packages/google-cloud-bigquery/noxfile.py | 19 +- .../tests/unit/test_opentelemetry_tracing.py | 9 +- 23 files changed, 220 insertions(+), 182 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/__init__.py b/packages/google-cloud-bigquery/google/cloud/__init__.py index 8fcc60e2b9c6..8e60d8439d0d 100644 --- a/packages/google-cloud-bigquery/google/cloud/__init__.py +++ b/packages/google-cloud-bigquery/google/cloud/__init__.py @@ -21,4 +21,4 @@ except ImportError: import pkgutil - __path__ = pkgutil.extend_path(__path__, __name__) + __path__ = pkgutil.extend_path(__path__, __name__) # type: ignore diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py b/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py index e95d38545598..e2ca7fa075a6 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py @@ -22,7 +22,7 @@ from typing import Any, Optional, Union from dateutil import relativedelta -from google.cloud._helpers import UTC +from google.cloud._helpers import UTC # type: ignore from google.cloud._helpers import _date_from_iso8601_date from google.cloud._helpers import _datetime_from_microseconds from google.cloud._helpers import _RFC3339_MICROS @@ -126,7 +126,7 @@ def __init__(self): def installed_version(self) -> packaging.version.Version: """Return the parsed version of pyarrow.""" if self._installed_version is None: - import pyarrow + import pyarrow # type: ignore self._installed_version = packaging.version.parse( # Use 0.0.0, since it is earlier than any released version. diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/_http.py b/packages/google-cloud-bigquery/google/cloud/bigquery/_http.py index 81e7922e6e34..f7207f32e150 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/_http.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/_http.py @@ -17,7 +17,7 @@ import os import pkg_resources -from google.cloud import _http # pytype: disable=import-error +from google.cloud import _http # type: ignore # pytype: disable=import-error from google.cloud.bigquery import __version__ diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/_pandas_helpers.py b/packages/google-cloud-bigquery/google/cloud/bigquery/_pandas_helpers.py index 0cb85146949e..de6356c2a58a 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/_pandas_helpers.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/_pandas_helpers.py @@ -21,7 +21,7 @@ import warnings try: - import pandas + import pandas # type: ignore except ImportError: # pragma: NO COVER pandas = None else: @@ -29,7 +29,7 @@ try: # _BaseGeometry is used to detect shapely objevys in `bq_to_arrow_array` - from shapely.geometry.base import BaseGeometry as _BaseGeometry + from shapely.geometry.base import BaseGeometry as _BaseGeometry # type: ignore except ImportError: # pragma: NO COVER # No shapely, use NoneType for _BaseGeometry as a placeholder. _BaseGeometry = type(None) @@ -43,7 +43,7 @@ def _to_wkb(): # - Avoid extra work done by `shapely.wkb.dumps` that we don't need. # - Caches the WKBWriter (and write method lookup :) ) # - Avoids adding WKBWriter, lgeos, and notnull to the module namespace. - from shapely.geos import WKBWriter, lgeos + from shapely.geos import WKBWriter, lgeos # type: ignore write = WKBWriter(lgeos).write notnull = pandas.notnull @@ -574,7 +574,7 @@ def dataframe_to_parquet( """ pyarrow = _helpers.PYARROW_VERSIONS.try_import(raise_if_error=True) - import pyarrow.parquet + import pyarrow.parquet # type: ignore kwargs = ( {"use_compliant_nested_type": parquet_use_compliant_nested_type} diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/_tqdm_helpers.py b/packages/google-cloud-bigquery/google/cloud/bigquery/_tqdm_helpers.py index 99e720e2b044..632f70f87215 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/_tqdm_helpers.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/_tqdm_helpers.py @@ -21,7 +21,7 @@ import warnings try: - import tqdm + import tqdm # type: ignore except ImportError: # pragma: NO COVER tqdm = None diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py index 4bdd43e8ff49..3e641e1952cc 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py @@ -28,12 +28,23 @@ import math import os import tempfile -from typing import Any, BinaryIO, Dict, Iterable, Optional, Sequence, Tuple, Union +import typing +from typing import ( + Any, + BinaryIO, + Dict, + Iterable, + List, + Optional, + Sequence, + Tuple, + Union, +) import uuid import warnings from google import resumable_media # type: ignore -from google.resumable_media.requests import MultipartUpload +from google.resumable_media.requests import MultipartUpload # type: ignore from google.resumable_media.requests import ResumableUpload import google.api_core.client_options @@ -41,16 +52,16 @@ from google.api_core.iam import Policy from google.api_core import page_iterator from google.api_core import retry as retries -import google.cloud._helpers +import google.cloud._helpers # type: ignore from google.cloud import exceptions # pytype: disable=import-error -from google.cloud.client import ClientWithProject # pytype: disable=import-error +from google.cloud.client import ClientWithProject # type: ignore # pytype: disable=import-error try: from google.cloud.bigquery_storage_v1.services.big_query_read.client import ( DEFAULT_CLIENT_INFO as DEFAULT_BQSTORAGE_CLIENT_INFO, ) except ImportError: - DEFAULT_BQSTORAGE_CLIENT_INFO = None + DEFAULT_BQSTORAGE_CLIENT_INFO = None # type: ignore from google.cloud.bigquery._helpers import _del_sub_prop from google.cloud.bigquery._helpers import _get_sub_prop @@ -100,6 +111,11 @@ pyarrow = _helpers.PYARROW_VERSIONS.try_import() +TimeoutType = Union[float, None] + +if typing.TYPE_CHECKING: # pragma: NO COVER + # os.PathLike is only subscriptable in Python 3.9+, thus shielding with a condition. + PathType = Union[str, bytes, os.PathLike[str], os.PathLike[bytes]] _DEFAULT_CHUNKSIZE = 100 * 1024 * 1024 # 100 MB _MAX_MULTIPART_SIZE = 5 * 1024 * 1024 @@ -248,7 +264,7 @@ def get_service_account_email( self, project: str = None, retry: retries.Retry = DEFAULT_RETRY, - timeout: float = DEFAULT_TIMEOUT, + timeout: TimeoutType = DEFAULT_TIMEOUT, ) -> str: """Get the email address of the project's BigQuery service account @@ -295,7 +311,7 @@ def list_projects( max_results: int = None, page_token: str = None, retry: retries.Retry = DEFAULT_RETRY, - timeout: float = DEFAULT_TIMEOUT, + timeout: TimeoutType = DEFAULT_TIMEOUT, page_size: int = None, ) -> page_iterator.Iterator: """List projects for the project associated with this client. @@ -361,7 +377,7 @@ def list_datasets( max_results: int = None, page_token: str = None, retry: retries.Retry = DEFAULT_RETRY, - timeout: float = DEFAULT_TIMEOUT, + timeout: TimeoutType = DEFAULT_TIMEOUT, page_size: int = None, ) -> page_iterator.Iterator: """List datasets for the project associated with this client. @@ -400,7 +416,7 @@ def list_datasets( Iterator of :class:`~google.cloud.bigquery.dataset.DatasetListItem`. associated with the project. """ - extra_params = {} + extra_params: Dict[str, Any] = {} if project is None: project = self.project if include_all: @@ -526,12 +542,12 @@ def _ensure_bqstorage_client( bqstorage_client = bigquery_storage.BigQueryReadClient( credentials=self._credentials, client_options=client_options, - client_info=client_info, + client_info=client_info, # type: ignore # (None is also accepted) ) return bqstorage_client - def _dataset_from_arg(self, dataset): + def _dataset_from_arg(self, dataset) -> Union[Dataset, DatasetReference]: if isinstance(dataset, str): dataset = DatasetReference.from_string( dataset, default_project=self.project @@ -552,7 +568,7 @@ def create_dataset( dataset: Union[str, Dataset, DatasetReference, DatasetListItem], exists_ok: bool = False, retry: retries.Retry = DEFAULT_RETRY, - timeout: float = DEFAULT_TIMEOUT, + timeout: TimeoutType = DEFAULT_TIMEOUT, ) -> Dataset: """API call: create the dataset via a POST request. @@ -627,7 +643,7 @@ def create_routine( routine: Routine, exists_ok: bool = False, retry: retries.Retry = DEFAULT_RETRY, - timeout: float = DEFAULT_TIMEOUT, + timeout: TimeoutType = DEFAULT_TIMEOUT, ) -> Routine: """[Beta] Create a routine via a POST request. @@ -682,7 +698,7 @@ def create_table( table: Union[str, Table, TableReference, TableListItem], exists_ok: bool = False, retry: retries.Retry = DEFAULT_RETRY, - timeout: float = DEFAULT_TIMEOUT, + timeout: TimeoutType = DEFAULT_TIMEOUT, ) -> Table: """API call: create a table via a PUT request @@ -765,7 +781,7 @@ def get_dataset( self, dataset_ref: Union[DatasetReference, str], retry: retries.Retry = DEFAULT_RETRY, - timeout: float = DEFAULT_TIMEOUT, + timeout: TimeoutType = DEFAULT_TIMEOUT, ) -> Dataset: """Fetch the dataset referenced by ``dataset_ref`` @@ -809,7 +825,7 @@ def get_iam_policy( table: Union[Table, TableReference, TableListItem, str], requested_policy_version: int = 1, retry: retries.Retry = DEFAULT_RETRY, - timeout: float = DEFAULT_TIMEOUT, + timeout: TimeoutType = DEFAULT_TIMEOUT, ) -> Policy: table = _table_arg_to_table_ref(table, default_project=self.project) @@ -838,7 +854,7 @@ def set_iam_policy( policy: Policy, updateMask: str = None, retry: retries.Retry = DEFAULT_RETRY, - timeout: float = DEFAULT_TIMEOUT, + timeout: TimeoutType = DEFAULT_TIMEOUT, ) -> Policy: table = _table_arg_to_table_ref(table, default_project=self.project) @@ -870,7 +886,7 @@ def test_iam_permissions( table: Union[Table, TableReference, TableListItem, str], permissions: Sequence[str], retry: retries.Retry = DEFAULT_RETRY, - timeout: float = DEFAULT_TIMEOUT, + timeout: TimeoutType = DEFAULT_TIMEOUT, ) -> Dict[str, Any]: table = _table_arg_to_table_ref(table, default_project=self.project) @@ -894,7 +910,7 @@ def get_model( self, model_ref: Union[ModelReference, str], retry: retries.Retry = DEFAULT_RETRY, - timeout: float = DEFAULT_TIMEOUT, + timeout: TimeoutType = DEFAULT_TIMEOUT, ) -> Model: """[Beta] Fetch the model referenced by ``model_ref``. @@ -937,7 +953,7 @@ def get_routine( self, routine_ref: Union[Routine, RoutineReference, str], retry: retries.Retry = DEFAULT_RETRY, - timeout: float = DEFAULT_TIMEOUT, + timeout: TimeoutType = DEFAULT_TIMEOUT, ) -> Routine: """[Beta] Get the routine referenced by ``routine_ref``. @@ -981,7 +997,7 @@ def get_table( self, table: Union[Table, TableReference, TableListItem, str], retry: retries.Retry = DEFAULT_RETRY, - timeout: float = DEFAULT_TIMEOUT, + timeout: TimeoutType = DEFAULT_TIMEOUT, ) -> Table: """Fetch the table referenced by ``table``. @@ -1024,7 +1040,7 @@ def update_dataset( dataset: Dataset, fields: Sequence[str], retry: retries.Retry = DEFAULT_RETRY, - timeout: float = DEFAULT_TIMEOUT, + timeout: TimeoutType = DEFAULT_TIMEOUT, ) -> Dataset: """Change some fields of a dataset. @@ -1071,7 +1087,7 @@ def update_dataset( """ partial = dataset._build_resource(fields) if dataset.etag is not None: - headers = {"If-Match": dataset.etag} + headers: Optional[Dict[str, str]] = {"If-Match": dataset.etag} else: headers = None path = dataset.path @@ -1094,7 +1110,7 @@ def update_model( model: Model, fields: Sequence[str], retry: retries.Retry = DEFAULT_RETRY, - timeout: float = DEFAULT_TIMEOUT, + timeout: TimeoutType = DEFAULT_TIMEOUT, ) -> Model: """[Beta] Change some fields of a model. @@ -1135,7 +1151,7 @@ def update_model( """ partial = model._build_resource(fields) if model.etag: - headers = {"If-Match": model.etag} + headers: Optional[Dict[str, str]] = {"If-Match": model.etag} else: headers = None path = model.path @@ -1158,7 +1174,7 @@ def update_routine( routine: Routine, fields: Sequence[str], retry: retries.Retry = DEFAULT_RETRY, - timeout: float = DEFAULT_TIMEOUT, + timeout: TimeoutType = DEFAULT_TIMEOUT, ) -> Routine: """[Beta] Change some fields of a routine. @@ -1205,7 +1221,7 @@ def update_routine( """ partial = routine._build_resource(fields) if routine.etag: - headers = {"If-Match": routine.etag} + headers: Optional[Dict[str, str]] = {"If-Match": routine.etag} else: headers = None @@ -1232,7 +1248,7 @@ def update_table( table: Table, fields: Sequence[str], retry: retries.Retry = DEFAULT_RETRY, - timeout: float = DEFAULT_TIMEOUT, + timeout: TimeoutType = DEFAULT_TIMEOUT, ) -> Table: """Change some fields of a table. @@ -1273,7 +1289,7 @@ def update_table( """ partial = table._build_resource(fields) if table.etag is not None: - headers = {"If-Match": table.etag} + headers: Optional[Dict[str, str]] = {"If-Match": table.etag} else: headers = None @@ -1298,7 +1314,7 @@ def list_models( max_results: int = None, page_token: str = None, retry: retries.Retry = DEFAULT_RETRY, - timeout: float = DEFAULT_TIMEOUT, + timeout: TimeoutType = DEFAULT_TIMEOUT, page_size: int = None, ) -> page_iterator.Iterator: """[Beta] List models in the dataset. @@ -1366,7 +1382,7 @@ def api_request(*args, **kwargs): max_results=max_results, page_size=page_size, ) - result.dataset = dataset + result.dataset = dataset # type: ignore return result def list_routines( @@ -1375,7 +1391,7 @@ def list_routines( max_results: int = None, page_token: str = None, retry: retries.Retry = DEFAULT_RETRY, - timeout: float = DEFAULT_TIMEOUT, + timeout: TimeoutType = DEFAULT_TIMEOUT, page_size: int = None, ) -> page_iterator.Iterator: """[Beta] List routines in the dataset. @@ -1443,7 +1459,7 @@ def api_request(*args, **kwargs): max_results=max_results, page_size=page_size, ) - result.dataset = dataset + result.dataset = dataset # type: ignore return result def list_tables( @@ -1452,7 +1468,7 @@ def list_tables( max_results: int = None, page_token: str = None, retry: retries.Retry = DEFAULT_RETRY, - timeout: float = DEFAULT_TIMEOUT, + timeout: TimeoutType = DEFAULT_TIMEOUT, page_size: int = None, ) -> page_iterator.Iterator: """List tables in the dataset. @@ -1519,7 +1535,7 @@ def api_request(*args, **kwargs): max_results=max_results, page_size=page_size, ) - result.dataset = dataset + result.dataset = dataset # type: ignore return result def delete_dataset( @@ -1527,7 +1543,7 @@ def delete_dataset( dataset: Union[Dataset, DatasetReference, DatasetListItem, str], delete_contents: bool = False, retry: retries.Retry = DEFAULT_RETRY, - timeout: float = DEFAULT_TIMEOUT, + timeout: TimeoutType = DEFAULT_TIMEOUT, not_found_ok: bool = False, ) -> None: """Delete a dataset. @@ -1586,7 +1602,7 @@ def delete_model( self, model: Union[Model, ModelReference, str], retry: retries.Retry = DEFAULT_RETRY, - timeout: float = DEFAULT_TIMEOUT, + timeout: TimeoutType = DEFAULT_TIMEOUT, not_found_ok: bool = False, ) -> None: """[Beta] Delete a model @@ -1640,7 +1656,7 @@ def delete_job_metadata( project: Optional[str] = None, location: Optional[str] = None, retry: retries.Retry = DEFAULT_RETRY, - timeout: float = DEFAULT_TIMEOUT, + timeout: TimeoutType = DEFAULT_TIMEOUT, not_found_ok: bool = False, ): """[Beta] Delete job metadata from job history. @@ -1703,7 +1719,7 @@ def delete_routine( self, routine: Union[Routine, RoutineReference, str], retry: retries.Retry = DEFAULT_RETRY, - timeout: float = DEFAULT_TIMEOUT, + timeout: TimeoutType = DEFAULT_TIMEOUT, not_found_ok: bool = False, ) -> None: """[Beta] Delete a routine. @@ -1757,7 +1773,7 @@ def delete_table( self, table: Union[Table, TableReference, TableListItem, str], retry: retries.Retry = DEFAULT_RETRY, - timeout: float = DEFAULT_TIMEOUT, + timeout: TimeoutType = DEFAULT_TIMEOUT, not_found_ok: bool = False, ) -> None: """Delete a table @@ -1811,7 +1827,7 @@ def _get_query_results( project: str = None, timeout_ms: int = None, location: str = None, - timeout: float = DEFAULT_TIMEOUT, + timeout: TimeoutType = DEFAULT_TIMEOUT, ) -> _QueryResults: """Get the query results object for a query job. @@ -1836,7 +1852,7 @@ def _get_query_results( A new ``_QueryResults`` instance. """ - extra_params = {"maxResults": 0} + extra_params: Dict[str, Any] = {"maxResults": 0} if timeout is not None: timeout = max(timeout, _MIN_GET_QUERY_RESULTS_TIMEOUT) @@ -1870,20 +1886,18 @@ def _get_query_results( ) return _QueryResults.from_api_repr(resource) - def job_from_resource(self, resource: dict) -> job.UnknownJob: + def job_from_resource( + self, resource: dict + ) -> Union[ + job.CopyJob, job.ExtractJob, job.LoadJob, job.QueryJob, job.UnknownJob, + ]: """Detect correct job type from resource and instantiate. Args: resource (Dict): one job resource from API response Returns: - Union[ \ - google.cloud.bigquery.job.LoadJob, \ - google.cloud.bigquery.job.CopyJob, \ - google.cloud.bigquery.job.ExtractJob, \ - google.cloud.bigquery.job.QueryJob \ - ]: - The job instance, constructed via the resource. + The job instance, constructed via the resource. """ config = resource.get("configuration", {}) if "load" in config: @@ -1900,7 +1914,7 @@ def create_job( self, job_config: dict, retry: retries.Retry = DEFAULT_RETRY, - timeout: float = DEFAULT_TIMEOUT, + timeout: TimeoutType = DEFAULT_TIMEOUT, ) -> Union[job.LoadJob, job.CopyJob, job.ExtractJob, job.QueryJob]: """Create a new job. Args: @@ -1933,7 +1947,7 @@ def create_job( return self.load_table_from_uri( source_uris, destination, - job_config=load_job_config, + job_config=typing.cast(LoadJobConfig, load_job_config), retry=retry, timeout=timeout, ) @@ -1953,7 +1967,7 @@ def create_job( return self.copy_table( sources, destination, - job_config=copy_job_config, + job_config=typing.cast(CopyJobConfig, copy_job_config), retry=retry, timeout=timeout, ) @@ -1973,7 +1987,7 @@ def create_job( return self.extract_table( source, destination_uris, - job_config=extract_job_config, + job_config=typing.cast(ExtractJobConfig, extract_job_config), retry=retry, timeout=timeout, source_type=source_type, @@ -1986,32 +2000,30 @@ def create_job( ) query = _get_sub_prop(copy_config, ["query", "query"]) return self.query( - query, job_config=query_job_config, retry=retry, timeout=timeout + query, + job_config=typing.cast(QueryJobConfig, query_job_config), + retry=retry, + timeout=timeout, ) else: raise TypeError("Invalid job configuration received.") def get_job( self, - job_id: str, + job_id: Union[str, job.LoadJob, job.CopyJob, job.ExtractJob, job.QueryJob], project: str = None, location: str = None, retry: retries.Retry = DEFAULT_RETRY, - timeout: float = DEFAULT_TIMEOUT, - ) -> Union[job.LoadJob, job.CopyJob, job.ExtractJob, job.QueryJob]: + timeout: TimeoutType = DEFAULT_TIMEOUT, + ) -> Union[job.LoadJob, job.CopyJob, job.ExtractJob, job.QueryJob, job.UnknownJob]: """Fetch a job for the project associated with this client. See https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs/get Args: - job_id (Union[ \ - str, \ - google.cloud.bigquery.job.LoadJob, \ - google.cloud.bigquery.job.CopyJob, \ - google.cloud.bigquery.job.ExtractJob, \ - google.cloud.bigquery.job.QueryJob \ - ]): Job identifier. + job_id: + Job identifier. Keyword Arguments: project (Optional[str]): @@ -2026,13 +2038,7 @@ def get_job( before using ``retry``. Returns: - Union[ \ - google.cloud.bigquery.job.LoadJob, \ - google.cloud.bigquery.job.CopyJob, \ - google.cloud.bigquery.job.ExtractJob, \ - google.cloud.bigquery.job.QueryJob \ - ]: - Job instance, based on the resource returned by the API. + Job instance, based on the resource returned by the API. """ extra_params = {"projection": "full"} @@ -2071,7 +2077,7 @@ def cancel_job( project: str = None, location: str = None, retry: retries.Retry = DEFAULT_RETRY, - timeout: float = DEFAULT_TIMEOUT, + timeout: TimeoutType = DEFAULT_TIMEOUT, ) -> Union[job.LoadJob, job.CopyJob, job.ExtractJob, job.QueryJob]: """Attempt to cancel a job from a job ID. @@ -2137,7 +2143,11 @@ def cancel_job( timeout=timeout, ) - return self.job_from_resource(resource["job"]) + job_instance = self.job_from_resource(resource["job"]) # never an UnknownJob + + return typing.cast( + Union[job.LoadJob, job.CopyJob, job.ExtractJob, job.QueryJob], job_instance, + ) def list_jobs( self, @@ -2148,7 +2158,7 @@ def list_jobs( all_users: bool = None, state_filter: str = None, retry: retries.Retry = DEFAULT_RETRY, - timeout: float = DEFAULT_TIMEOUT, + timeout: TimeoutType = DEFAULT_TIMEOUT, min_creation_time: datetime.datetime = None, max_creation_time: datetime.datetime = None, page_size: int = None, @@ -2263,9 +2273,9 @@ def load_table_from_uri( project: str = None, job_config: LoadJobConfig = None, retry: retries.Retry = DEFAULT_RETRY, - timeout: float = DEFAULT_TIMEOUT, + timeout: TimeoutType = DEFAULT_TIMEOUT, ) -> job.LoadJob: - """Starts a job for loading data into a table from CloudStorage. + """Starts a job for loading data into a table from Cloud Storage. See https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#jobconfigurationload @@ -2348,7 +2358,7 @@ def load_table_from_file( location: str = None, project: str = None, job_config: LoadJobConfig = None, - timeout: float = DEFAULT_TIMEOUT, + timeout: TimeoutType = DEFAULT_TIMEOUT, ) -> job.LoadJob: """Upload the contents of this table from a file-like object. @@ -2439,7 +2449,7 @@ def load_table_from_file( except resumable_media.InvalidResponse as exc: raise exceptions.from_http_response(exc.response) - return self.job_from_resource(response.json()) + return typing.cast(LoadJob, self.job_from_resource(response.json())) def load_table_from_dataframe( self, @@ -2452,7 +2462,7 @@ def load_table_from_dataframe( project: str = None, job_config: LoadJobConfig = None, parquet_compression: str = "snappy", - timeout: float = DEFAULT_TIMEOUT, + timeout: TimeoutType = DEFAULT_TIMEOUT, ) -> job.LoadJob: """Upload the contents of a table from a pandas DataFrame. @@ -2592,7 +2602,7 @@ def load_table_from_dataframe( try: table = self.get_table(destination) except core_exceptions.NotFound: - table = None + pass else: columns_and_indexes = frozenset( name @@ -2707,7 +2717,7 @@ def load_table_from_json( location: str = None, project: str = None, job_config: LoadJobConfig = None, - timeout: float = DEFAULT_TIMEOUT, + timeout: TimeoutType = DEFAULT_TIMEOUT, ) -> job.LoadJob: """Upload the contents of a table from a JSON string or dict. @@ -2995,7 +3005,7 @@ def copy_table( project: str = None, job_config: CopyJobConfig = None, retry: retries.Retry = DEFAULT_RETRY, - timeout: float = DEFAULT_TIMEOUT, + timeout: TimeoutType = DEFAULT_TIMEOUT, ) -> job.CopyJob: """Copy one or more tables to another table. @@ -3101,7 +3111,7 @@ def extract_table( project: str = None, job_config: ExtractJobConfig = None, retry: retries.Retry = DEFAULT_RETRY, - timeout: float = DEFAULT_TIMEOUT, + timeout: TimeoutType = DEFAULT_TIMEOUT, source_type: str = "Table", ) -> job.ExtractJob: """Start a job to extract a table into Cloud Storage files. @@ -3200,7 +3210,7 @@ def query( location: str = None, project: str = None, retry: retries.Retry = DEFAULT_RETRY, - timeout: float = DEFAULT_TIMEOUT, + timeout: TimeoutType = DEFAULT_TIMEOUT, job_retry: retries.Retry = DEFAULT_JOB_RETRY, ) -> job.QueryJob: """Run a SQL query. @@ -3357,7 +3367,7 @@ def insert_rows( table: Union[Table, TableReference, str], rows: Union[Iterable[Tuple], Iterable[Dict]], selected_fields: Sequence[SchemaField] = None, - **kwargs: dict, + **kwargs, ) -> Sequence[dict]: """Insert rows into a table via the streaming API. @@ -3482,7 +3492,7 @@ def insert_rows_json( ignore_unknown_values: bool = None, template_suffix: str = None, retry: retries.Retry = DEFAULT_RETRY, - timeout: float = DEFAULT_TIMEOUT, + timeout: TimeoutType = DEFAULT_TIMEOUT, ) -> Sequence[dict]: """Insert rows into a table without applying local type conversions. @@ -3550,8 +3560,8 @@ def insert_rows_json( # insert_rows_json doesn't need the table schema. It's not doing any # type conversions. table = _table_arg_to_table_ref(table, default_project=self.project) - rows_info = [] - data = {"rows": rows_info} + rows_info: List[Any] = [] + data: Dict[str, Any] = {"rows": rows_info} if row_ids is None: warnings.warn( @@ -3569,7 +3579,7 @@ def insert_rows_json( raise TypeError(msg) for i, row in enumerate(json_rows): - info = {"json": row} + info: Dict[str, Any] = {"json": row} if row_ids is AutoRowIDs.GENERATE_UUID: info["insertId"] = str(uuid.uuid4()) @@ -3618,7 +3628,7 @@ def list_partitions( self, table: Union[Table, TableReference, TableListItem, str], retry: retries.Retry = DEFAULT_RETRY, - timeout: float = DEFAULT_TIMEOUT, + timeout: TimeoutType = DEFAULT_TIMEOUT, ) -> Sequence[str]: """List the partitions in a table. @@ -3669,7 +3679,7 @@ def list_rows( start_index: int = None, page_size: int = None, retry: retries.Retry = DEFAULT_RETRY, - timeout: float = DEFAULT_TIMEOUT, + timeout: TimeoutType = DEFAULT_TIMEOUT, ) -> RowIterator: """List the rows of the table. @@ -3745,7 +3755,7 @@ def list_rows( table = self.get_table(table.reference, retry=retry, timeout=timeout) schema = table.schema - params = {} + params: Dict[str, Any] = {} if selected_fields is not None: params["selectedFields"] = ",".join(field.name for field in selected_fields) if start_index is not None: @@ -3781,7 +3791,7 @@ def _list_rows_from_query_results( start_index: int = None, page_size: int = None, retry: retries.Retry = DEFAULT_RETRY, - timeout: float = DEFAULT_TIMEOUT, + timeout: TimeoutType = DEFAULT_TIMEOUT, ) -> RowIterator: """List the rows of a completed query. See @@ -3826,7 +3836,7 @@ def _list_rows_from_query_results( Iterator of row data :class:`~google.cloud.bigquery.table.Row`-s. """ - params = { + params: Dict[str, Any] = { "fields": _LIST_ROWS_FROM_QUERY_RESULTS_FIELDS, "location": location, } @@ -3867,7 +3877,7 @@ def _schema_to_json_file_object(self, schema_list, file_obj): """ json.dump(schema_list, file_obj, indent=2, sort_keys=True) - def schema_from_json(self, file_or_path: Union[str, BinaryIO]): + def schema_from_json(self, file_or_path: "PathType"): """Takes a file object or file path that contains json that describes a table schema. @@ -3881,7 +3891,7 @@ def schema_from_json(self, file_or_path: Union[str, BinaryIO]): return self._schema_from_json_file_object(file_obj) def schema_to_json( - self, schema_list: Sequence[SchemaField], destination: Union[str, BinaryIO] + self, schema_list: Sequence[SchemaField], destination: "PathType" ): """Takes a list of schema field objects. @@ -4023,13 +4033,12 @@ def _extract_job_reference(job, project=None, location=None): return (project, location, job_id) -def _make_job_id(job_id, prefix=None): +def _make_job_id(job_id: Optional[str], prefix: Optional[str] = None) -> str: """Construct an ID for a new job. Args: - job_id (Optional[str]): the user-provided job ID. - - prefix (Optional[str]): the user-provided prefix for a job ID. + job_id: the user-provided job ID. + prefix: the user-provided prefix for a job ID. Returns: str: A job ID diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/dataset.py b/packages/google-cloud-bigquery/google/cloud/bigquery/dataset.py index 21e56f305d73..ff015d6059c8 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/dataset.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/dataset.py @@ -18,7 +18,7 @@ import copy -import google.cloud._helpers +import google.cloud._helpers # type: ignore from google.cloud.bigquery import _helpers from google.cloud.bigquery.model import ModelReference diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/dbapi/_helpers.py b/packages/google-cloud-bigquery/google/cloud/bigquery/dbapi/_helpers.py index 72e711bcf428..e5c7ef7ec7ea 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/dbapi/_helpers.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/dbapi/_helpers.py @@ -161,7 +161,7 @@ def _parse_struct_fields( yield m.group(1, 2) -SCALAR, ARRAY, STRUCT = "sar" +SCALAR, ARRAY, STRUCT = ("s", "a", "r") def _parse_type( @@ -226,19 +226,19 @@ def complex_query_parameter_type(name: typing.Optional[str], type_: str, base: s type_type, sub_type = _parse_type(type_, name, base) if type_type == SCALAR: - type_ = sub_type + result_type = sub_type elif type_type == ARRAY: - type_ = query.ArrayQueryParameterType(sub_type, name=name) + result_type = query.ArrayQueryParameterType(sub_type, name=name) elif type_type == STRUCT: fields = [ complex_query_parameter_type(field_name, field_type, base) for field_name, field_type in sub_type ] - type_ = query.StructQueryParameterType(*fields, name=name) + result_type = query.StructQueryParameterType(*fields, name=name) else: # pragma: NO COVER raise AssertionError("Bad type_type", type_type) # Can't happen :) - return type_ + return result_type def complex_query_parameter( @@ -256,6 +256,12 @@ def complex_query_parameter( struct>> """ + param: typing.Union[ + query.ScalarQueryParameter, + query.ArrayQueryParameter, + query.StructQueryParameter, + ] + base = base or type_ type_type, sub_type = _parse_type(type_, name, base) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/dbapi/cursor.py b/packages/google-cloud-bigquery/google/cloud/bigquery/dbapi/cursor.py index b1239ff57a19..03f3b72ca017 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/dbapi/cursor.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/dbapi/cursor.py @@ -31,7 +31,7 @@ from google.cloud.bigquery import job from google.cloud.bigquery.dbapi import _helpers from google.cloud.bigquery.dbapi import exceptions -import google.cloud.exceptions +import google.cloud.exceptions # type: ignore _LOGGER = logging.getLogger(__name__) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/external_config.py b/packages/google-cloud-bigquery/google/cloud/bigquery/external_config.py index 5f284c639849..e6f6a97c3388 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/external_config.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/external_config.py @@ -556,6 +556,10 @@ def from_api_repr(cls, resource: dict) -> "GoogleSheetsOptions": ParquetOptions, ) +OptionsType = Union[ + AvroOptions, BigtableOptions, CSVOptions, GoogleSheetsOptions, ParquetOptions, +] + class HivePartitioningOptions(object): """[Beta] Options that configure hive partitioning. @@ -664,13 +668,15 @@ def source_format(self): return self._properties["sourceFormat"] @property - def options(self) -> Optional[Union[_OPTION_CLASSES]]: + def options(self) -> Optional[OptionsType]: """Source-specific options.""" for optcls in _OPTION_CLASSES: - if self.source_format == optcls._SOURCE_FORMAT: - options = optcls() - self._properties.setdefault(optcls._RESOURCE_NAME, {}) - options._properties = self._properties[optcls._RESOURCE_NAME] + # The code below is too much magic for mypy to handle. + if self.source_format == optcls._SOURCE_FORMAT: # type: ignore + options: OptionsType = optcls() # type: ignore + options._properties = self._properties.setdefault( + optcls._RESOURCE_NAME, {} # type: ignore + ) return options # No matching source format found. @@ -799,6 +805,13 @@ def schema(self): prop = self._properties.get("schema", {}) return [SchemaField.from_api_repr(field) for field in prop.get("fields", [])] + @schema.setter + def schema(self, value): + prop = value + if value is not None: + prop = {"fields": [field.to_api_repr() for field in value]} + self._properties["schema"] = prop + @property def connection_id(self): """Optional[str]: [Experimental] ID of a BigQuery Connection API @@ -816,13 +829,6 @@ def connection_id(self): def connection_id(self, value): self._properties["connectionId"] = value - @schema.setter - def schema(self, value): - prop = value - if value is not None: - prop = {"fields": [field.to_api_repr() for field in value]} - self._properties["schema"] = prop - @property def avro_options(self) -> Optional[AvroOptions]: """Additional properties to set if ``sourceFormat`` is set to AVRO. diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/job/base.py b/packages/google-cloud-bigquery/google/cloud/bigquery/job/base.py index 88d6bec149c1..97acab5d2456 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/job/base.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/job/base.py @@ -696,7 +696,7 @@ def done( self.reload(retry=retry, timeout=timeout) return self.state == _DONE_STATE - def result( + def result( # type: ignore # (signature complaint) self, retry: "retries.Retry" = DEFAULT_RETRY, timeout: float = None ) -> "_AsyncJob": """Start the job and wait for it to complete and get the result. @@ -921,7 +921,7 @@ def from_api_repr(cls, resource: dict) -> "_JobConfig": # cls is one of the job config subclasses that provides the job_type argument to # this base class on instantiation, thus missing-parameter warning is a false # positive here. - job_config = cls() # pytype: disable=missing-parameter + job_config = cls() # type: ignore # pytype: disable=missing-parameter job_config._properties = resource return job_config diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/job/query.py b/packages/google-cloud-bigquery/google/cloud/bigquery/job/query.py index 942c85fc3d4c..36e3882385b8 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/job/query.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/job/query.py @@ -56,9 +56,9 @@ if typing.TYPE_CHECKING: # pragma: NO COVER # Assumption: type checks are only used by library developers and CI environments # that have all optional dependencies installed, thus no conditional imports. - import pandas - import geopandas - import pyarrow + import pandas # type: ignore + import geopandas # type: ignore + import pyarrow # type: ignore from google.api_core import retry as retries from google.cloud import bigquery_storage from google.cloud.bigquery.client import Client @@ -144,7 +144,7 @@ def from_api_repr(cls, stats: Dict[str, str]) -> "DmlStats": args = ( int(stats.get(api_field, default_val)) - for api_field, default_val in zip(api_fields, cls.__new__.__defaults__) + for api_field, default_val in zip(api_fields, cls.__new__.__defaults__) # type: ignore ) return cls(*args) @@ -161,7 +161,7 @@ def __init__( statement_byte_budget: Optional[int] = None, key_result_statement: Optional[KeyResultStatementKind] = None, ): - self._properties = {} + self._properties: Dict[str, Any] = {} self.statement_timeout_ms = statement_timeout_ms self.statement_byte_budget = statement_byte_budget self.key_result_statement = key_result_statement @@ -193,9 +193,8 @@ def statement_timeout_ms(self) -> Union[int, None]: @statement_timeout_ms.setter def statement_timeout_ms(self, value: Union[int, None]): - if value is not None: - value = str(value) - self._properties["statementTimeoutMs"] = value + new_value = None if value is None else str(value) + self._properties["statementTimeoutMs"] = new_value @property def statement_byte_budget(self) -> Union[int, None]: @@ -207,9 +206,8 @@ def statement_byte_budget(self) -> Union[int, None]: @statement_byte_budget.setter def statement_byte_budget(self, value: Union[int, None]): - if value is not None: - value = str(value) - self._properties["statementByteBudget"] = value + new_value = None if value is None else str(value) + self._properties["statementByteBudget"] = new_value @property def key_result_statement(self) -> Union[KeyResultStatementKind, None]: @@ -666,9 +664,8 @@ def script_options(self) -> ScriptOptions: @script_options.setter def script_options(self, value: Union[ScriptOptions, None]): - if value is not None: - value = value.to_api_repr() - self._set_sub_prop("scriptOptions", value) + new_value = None if value is None else value.to_api_repr() + self._set_sub_prop("scriptOptions", new_value) def to_api_repr(self) -> dict: """Build an API representation of the query job config. @@ -1330,7 +1327,7 @@ def _done_or_raise(self, retry=DEFAULT_RETRY, timeout=None): except exceptions.GoogleAPIError as exc: self.set_exception(exc) - def result( + def result( # type: ignore # (complaints about the overloaded signature) self, page_size: int = None, max_results: int = None, @@ -1400,7 +1397,7 @@ def result( retry_do_query = getattr(self, "_retry_do_query", None) if retry_do_query is not None: if job_retry is DEFAULT_JOB_RETRY: - job_retry = self._job_retry + job_retry = self._job_retry # type: ignore else: if job_retry is not None and job_retry is not DEFAULT_JOB_RETRY: raise TypeError( @@ -1451,7 +1448,7 @@ def do_get_result(): except exceptions.GoogleAPICallError as exc: exc.message += self._format_for_exception(self.query, self.job_id) - exc.query_job = self + exc.query_job = self # type: ignore raise except requests.exceptions.Timeout as exc: raise concurrent.futures.TimeoutError from exc diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/magics/line_arg_parser/lexer.py b/packages/google-cloud-bigquery/google/cloud/bigquery/magics/line_arg_parser/lexer.py index cd809c389213..71b287d01e83 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/magics/line_arg_parser/lexer.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/magics/line_arg_parser/lexer.py @@ -98,7 +98,7 @@ def _generate_next_value_(name, start, count, last_values): return name -TokenType = AutoStrEnum( # pytype: disable=wrong-arg-types +TokenType = AutoStrEnum( # type: ignore # pytype: disable=wrong-arg-types "TokenType", [ (name, enum.auto()) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/magics/magics.py b/packages/google-cloud-bigquery/google/cloud/bigquery/magics/magics.py index ec0430518867..1d8d8ed3085e 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/magics/magics.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/magics/magics.py @@ -90,16 +90,16 @@ from concurrent import futures try: - import IPython - from IPython import display - from IPython.core import magic_arguments + import IPython # type: ignore + from IPython import display # type: ignore + from IPython.core import magic_arguments # type: ignore except ImportError: # pragma: NO COVER raise ImportError("This module can only be loaded in IPython.") from google.api_core import client_info from google.api_core import client_options from google.api_core.exceptions import NotFound -import google.auth +import google.auth # type: ignore from google.cloud import bigquery import google.cloud.bigquery.dataset from google.cloud.bigquery.dbapi import _helpers diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/model.py b/packages/google-cloud-bigquery/google/cloud/bigquery/model.py index 2d3f6660f17c..cdb411e089be 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/model.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/model.py @@ -20,8 +20,8 @@ from google.protobuf import json_format -import google.cloud._helpers -from google.api_core import datetime_helpers +import google.cloud._helpers # type: ignore +from google.api_core import datetime_helpers # type: ignore from google.cloud.bigquery import _helpers from google.cloud.bigquery_v2 import types from google.cloud.bigquery.encryption_configuration import EncryptionConfiguration diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/opentelemetry_tracing.py b/packages/google-cloud-bigquery/google/cloud/bigquery/opentelemetry_tracing.py index b1a1027d2a64..748f2136ddb7 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/opentelemetry_tracing.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/opentelemetry_tracing.py @@ -14,7 +14,7 @@ import logging from contextlib import contextmanager -from google.api_core.exceptions import GoogleAPICallError +from google.api_core.exceptions import GoogleAPICallError # type: ignore logger = logging.getLogger(__name__) try: diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/query.py b/packages/google-cloud-bigquery/google/cloud/bigquery/query.py index 708f5f47bace..637be62befd6 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/query.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/query.py @@ -367,14 +367,14 @@ class _AbstractQueryParameter(object): """ @classmethod - def from_api_repr(cls, resource: dict) -> "ScalarQueryParameter": + def from_api_repr(cls, resource: dict) -> "_AbstractQueryParameter": """Factory: construct parameter from JSON resource. Args: resource (Dict): JSON mapping of parameter Returns: - google.cloud.bigquery.query.ScalarQueryParameter + A new instance of _AbstractQueryParameter subclass. """ raise NotImplementedError @@ -471,7 +471,7 @@ def to_api_repr(self) -> dict: converter = _SCALAR_VALUE_TO_JSON_PARAM.get(self.type_) if converter is not None: value = converter(value) - resource = { + resource: Dict[str, Any] = { "parameterType": {"type": self.type_}, "parameterValue": {"value": value}, } @@ -734,7 +734,7 @@ def from_api_repr(cls, resource: dict) -> "StructQueryParameter": struct_values = resource["parameterValue"]["structValues"] for key, value in struct_values.items(): type_ = types[key] - converted = None + converted: Optional[Union[ArrayQueryParameter, StructQueryParameter]] = None if type_ == "STRUCT": struct_resource = { "name": key, diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/retry.py b/packages/google-cloud-bigquery/google/cloud/bigquery/retry.py index 8a86973cde7e..254b26608c65 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/retry.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/retry.py @@ -14,7 +14,7 @@ from google.api_core import exceptions from google.api_core import retry -from google.auth import exceptions as auth_exceptions +from google.auth import exceptions as auth_exceptions # type: ignore import requests.exceptions diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/routine/routine.py b/packages/google-cloud-bigquery/google/cloud/bigquery/routine/routine.py index a776212c3000..a66434300983 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/routine/routine.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/routine/routine.py @@ -18,7 +18,7 @@ from google.protobuf import json_format -import google.cloud._helpers +import google.cloud._helpers # type: ignore from google.cloud.bigquery import _helpers import google.cloud.bigquery_v2.types from google.cloud.bigquery_v2.types import StandardSqlTableType diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/schema.py b/packages/google-cloud-bigquery/google/cloud/bigquery/schema.py index 22594223489b..2af61b672297 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/schema.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/schema.py @@ -16,7 +16,7 @@ import collections import enum -from typing import Iterable, Union +from typing import Any, Dict, Iterable, Union from google.cloud.bigquery_v2 import types @@ -106,7 +106,7 @@ def __init__( scale: Union[int, _DefaultSentinel] = _DEFAULT_VALUE, max_length: Union[int, _DefaultSentinel] = _DEFAULT_VALUE, ): - self._properties = { + self._properties: Dict[str, Any] = { "name": name, "type": field_type, } diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py index 608218fdc33a..60c8593c7e79 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py @@ -21,37 +21,37 @@ import functools import operator import typing -from typing import Any, Dict, Iterable, Iterator, Optional, Tuple +from typing import Any, Dict, Iterable, Iterator, List, Optional, Tuple, Union import warnings try: - import pandas + import pandas # type: ignore except ImportError: # pragma: NO COVER pandas = None try: - import geopandas + import geopandas # type: ignore except ImportError: geopandas = None else: _COORDINATE_REFERENCE_SYSTEM = "EPSG:4326" try: - import shapely.geos + import shapely.geos # type: ignore except ImportError: shapely = None else: _read_wkt = shapely.geos.WKTReader(shapely.geos.lgeos).read try: - import pyarrow + import pyarrow # type: ignore except ImportError: # pragma: NO COVER pyarrow = None import google.api_core.exceptions from google.api_core.page_iterator import HTTPIterator -import google.cloud._helpers +import google.cloud._helpers # type: ignore from google.cloud.bigquery import _helpers from google.cloud.bigquery import _pandas_helpers from google.cloud.bigquery.exceptions import LegacyBigQueryStorageError @@ -130,7 +130,7 @@ def _view_use_legacy_sql_getter(table): class _TableBase: """Base class for Table-related classes with common functionality.""" - _PROPERTY_TO_API_FIELD = { + _PROPERTY_TO_API_FIELD: Dict[str, Union[str, List[str]]] = { "dataset_id": ["tableReference", "datasetId"], "project": ["tableReference", "projectId"], "table_id": ["tableReference", "tableId"], @@ -807,7 +807,7 @@ def view_query(self): view_use_legacy_sql = property(_view_use_legacy_sql_getter) - @view_use_legacy_sql.setter + @view_use_legacy_sql.setter # type: ignore # (redefinition from above) def view_use_legacy_sql(self, value): if not isinstance(value, bool): raise ValueError("Pass a boolean") @@ -1746,7 +1746,7 @@ def to_arrow( progress_bar.close() finally: if owns_bqstorage_client: - bqstorage_client._transport.grpc_channel.close() + bqstorage_client._transport.grpc_channel.close() # type: ignore if record_batches and bqstorage_client is not None: return pyarrow.Table.from_batches(record_batches) @@ -1763,7 +1763,7 @@ def to_dataframe_iterable( self, bqstorage_client: "bigquery_storage.BigQueryReadClient" = None, dtypes: Dict[str, Any] = None, - max_queue_size: int = _pandas_helpers._MAX_QUEUE_SIZE_DEFAULT, + max_queue_size: int = _pandas_helpers._MAX_QUEUE_SIZE_DEFAULT, # type: ignore ) -> "pandas.DataFrame": """Create an iterable of pandas DataFrames, to process the table as a stream. @@ -2307,8 +2307,6 @@ def __repr__(self): key_vals = ["{}={}".format(key, val) for key, val in self._key()] return "PartitionRange({})".format(", ".join(key_vals)) - __hash__ = None - class RangePartitioning(object): """Range-based partitioning configuration for a table. @@ -2387,8 +2385,6 @@ def __repr__(self): key_vals = ["{}={}".format(key, repr(val)) for key, val in self._key()] return "RangePartitioning({})".format(", ".join(key_vals)) - __hash__ = None - class TimePartitioningType(object): """Specifies the type of time partitioning to perform.""" @@ -2657,7 +2653,7 @@ def _rows_page_start(iterator, page, response): # pylint: enable=unused-argument -def _table_arg_to_table_ref(value, default_project=None): +def _table_arg_to_table_ref(value, default_project=None) -> TableReference: """Helper to convert a string or Table to TableReference. This function keeps TableReference and other kinds of objects unchanged. @@ -2669,7 +2665,7 @@ def _table_arg_to_table_ref(value, default_project=None): return value -def _table_arg_to_table(value, default_project=None): +def _table_arg_to_table(value, default_project=None) -> Table: """Helper to convert a string or TableReference to a Table. This function keeps Table and other kinds of objects unchanged. diff --git a/packages/google-cloud-bigquery/noxfile.py b/packages/google-cloud-bigquery/noxfile.py index 1879a5cd81f6..5059118611c6 100644 --- a/packages/google-cloud-bigquery/noxfile.py +++ b/packages/google-cloud-bigquery/noxfile.py @@ -22,6 +22,7 @@ import nox +MYPY_VERSION = "mypy==0.910" PYTYPE_VERSION = "pytype==2021.4.9" BLACK_VERSION = "black==19.10b0" BLACK_PATHS = ("docs", "google", "samples", "tests", "noxfile.py", "setup.py") @@ -41,6 +42,7 @@ "lint", "lint_setup_py", "blacken", + "mypy", "pytype", "docs", ] @@ -113,9 +115,24 @@ def unit_noextras(session): default(session, install_extras=False) +@nox.session(python=DEFAULT_PYTHON_VERSION) +def mypy(session): + """Run type checks with mypy.""" + session.install("-e", ".[all]") + session.install("ipython") + session.install(MYPY_VERSION) + + # Just install the dependencies' type info directly, since "mypy --install-types" + # might require an additional pass. + session.install( + "types-protobuf", "types-python-dateutil", "types-requests", "types-setuptools", + ) + session.run("mypy", "google/cloud") + + @nox.session(python=DEFAULT_PYTHON_VERSION) def pytype(session): - """Run type checks.""" + """Run type checks with pytype.""" # An indirect dependecy attrs==21.1.0 breaks the check, and installing a less # recent version avoids the error until a possibly better fix is found. # https://github.com/googleapis/python-bigquery/issues/655 diff --git a/packages/google-cloud-bigquery/tests/unit/test_opentelemetry_tracing.py b/packages/google-cloud-bigquery/tests/unit/test_opentelemetry_tracing.py index cc1ca79039e7..3021a3dbff73 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_opentelemetry_tracing.py +++ b/packages/google-cloud-bigquery/tests/unit/test_opentelemetry_tracing.py @@ -51,9 +51,16 @@ def setup(): memory_exporter = InMemorySpanExporter() span_processor = SimpleSpanProcessor(memory_exporter) tracer_provider.add_span_processor(span_processor) - trace.set_tracer_provider(tracer_provider) + + # OpenTelemetry API >= 0.12b0 does not allow overriding the tracer once + # initialized, thus directly override (and then restore) the internal global var. + orig_trace_provider = trace._TRACER_PROVIDER + trace._TRACER_PROVIDER = tracer_provider + yield memory_exporter + trace._TRACER_PROVIDER = orig_trace_provider + @pytest.mark.skipif(opentelemetry is None, reason="Require `opentelemetry`") def test_opentelemetry_not_installed(setup, monkeypatch): From 968a2ef9c59aeab0e35e166182062d139981dc9e Mon Sep 17 00:00:00 2001 From: "gcf-owl-bot[bot]" <78513119+gcf-owl-bot[bot]@users.noreply.github.com> Date: Wed, 10 Nov 2021 11:50:58 -0600 Subject: [PATCH 1348/2016] chore: use gapic-generator-python 0.56.2 (#1056) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * chore: update Java and Python dependencies PiperOrigin-RevId: 408420890 Source-Link: https://github.com/googleapis/googleapis/commit/2921f9fb3bfbd16f6b2da0104373e2b47a80a65e Source-Link: https://github.com/googleapis/googleapis-gen/commit/6598ca8cbbf5226733a099c4506518a5af6ff74c Copy-Tag: eyJwIjoiLmdpdGh1Yi8uT3dsQm90LnlhbWwiLCJoIjoiNjU5OGNhOGNiYmY1MjI2NzMzYTA5OWM0NTA2NTE4YTVhZjZmZjc0YyJ9 * 🦉 Updates from OwlBot See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md Co-authored-by: Owl Bot --- .../google/cloud/bigquery_v2/types/model.py | 8 ++++++++ .../google/cloud/bigquery_v2/types/standard_sql.py | 2 ++ 2 files changed, 10 insertions(+) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery_v2/types/model.py b/packages/google-cloud-bigquery/google/cloud/bigquery_v2/types/model.py index a56b214910f3..440bc0805ec3 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery_v2/types/model.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery_v2/types/model.py @@ -573,9 +573,11 @@ class FeatureValue(proto.Message): numerical_value (google.protobuf.wrappers_pb2.DoubleValue): The numerical feature value. This is the centroid value for this feature. + This field is a member of `oneof`_ ``value``. categorical_value (google.cloud.bigquery_v2.types.Model.ClusteringMetrics.Cluster.FeatureValue.CategoricalValue): The categorical feature value. + This field is a member of `oneof`_ ``value``. """ @@ -804,24 +806,30 @@ class EvaluationMetrics(proto.Message): regression_metrics (google.cloud.bigquery_v2.types.Model.RegressionMetrics): Populated for regression models and explicit feedback type matrix factorization models. + This field is a member of `oneof`_ ``metrics``. binary_classification_metrics (google.cloud.bigquery_v2.types.Model.BinaryClassificationMetrics): Populated for binary classification/classifier models. + This field is a member of `oneof`_ ``metrics``. multi_class_classification_metrics (google.cloud.bigquery_v2.types.Model.MultiClassClassificationMetrics): Populated for multi-class classification/classifier models. + This field is a member of `oneof`_ ``metrics``. clustering_metrics (google.cloud.bigquery_v2.types.Model.ClusteringMetrics): Populated for clustering models. + This field is a member of `oneof`_ ``metrics``. ranking_metrics (google.cloud.bigquery_v2.types.Model.RankingMetrics): Populated for implicit feedback type matrix factorization models. + This field is a member of `oneof`_ ``metrics``. arima_forecasting_metrics (google.cloud.bigquery_v2.types.Model.ArimaForecastingMetrics): Populated for ARIMA models. + This field is a member of `oneof`_ ``metrics``. """ diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery_v2/types/standard_sql.py b/packages/google-cloud-bigquery/google/cloud/bigquery_v2/types/standard_sql.py index d6c13363447d..e10619482193 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery_v2/types/standard_sql.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery_v2/types/standard_sql.py @@ -49,10 +49,12 @@ class StandardSqlDataType(proto.Message): "INT64", "DATE", "ARRAY"). array_element_type (google.cloud.bigquery_v2.types.StandardSqlDataType): The type of the array's elements, if type_kind = "ARRAY". + This field is a member of `oneof`_ ``sub_type``. struct_type (google.cloud.bigquery_v2.types.StandardSqlStructType): The fields of this struct, in order, if type_kind = "STRUCT". + This field is a member of `oneof`_ ``sub_type``. """ From abf964031e1551c98d2bd29ef0fc6d6c47ac968c Mon Sep 17 00:00:00 2001 From: Dan Lee <71398022+dandhlee@users.noreply.github.com> Date: Tue, 16 Nov 2021 03:23:22 -0500 Subject: [PATCH 1349/2016] chore: update doc links from googleapis.dev to cloud.google.com (#1062) --- packages/google-cloud-bigquery/.repo-metadata.json | 2 +- packages/google-cloud-bigquery/README.rst | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/packages/google-cloud-bigquery/.repo-metadata.json b/packages/google-cloud-bigquery/.repo-metadata.json index 124b40eb9b5f..48cc057198bb 100644 --- a/packages/google-cloud-bigquery/.repo-metadata.json +++ b/packages/google-cloud-bigquery/.repo-metadata.json @@ -2,7 +2,7 @@ "name": "bigquery", "name_pretty": "Google Cloud BigQuery", "product_documentation": "https://cloud.google.com/bigquery", - "client_documentation": "https://googleapis.dev/python/bigquery/latest", + "client_documentation": "https://cloud.google.com/python/docs/reference/bigquery/latest", "issue_tracker": "https://issuetracker.google.com/savedsearches/559654", "release_level": "ga", "language": "python", diff --git a/packages/google-cloud-bigquery/README.rst b/packages/google-cloud-bigquery/README.rst index d0ad059a23fb..7b890e87e455 100644 --- a/packages/google-cloud-bigquery/README.rst +++ b/packages/google-cloud-bigquery/README.rst @@ -18,7 +18,7 @@ processing power of Google's infrastructure. .. |versions| image:: https://img.shields.io/pypi/pyversions/google-cloud-bigquery.svg :target: https://pypi.org/project/google-cloud-bigquery/ .. _BigQuery: https://cloud.google.com/bigquery/what-is-bigquery -.. _Client Library Documentation: https://googleapis.dev/python/bigquery/latest +.. _Client Library Documentation: https://cloud.google.com/python/docs/reference/bigquery/latest .. _Product Documentation: https://cloud.google.com/bigquery/docs/reference/v2/ Quick Start From 0f57ca92fda78970d9864080879f1b5286b27946 Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Tue, 16 Nov 2021 11:20:07 -0600 Subject: [PATCH 1350/2016] chore: revert update doc links from googleapis.dev to cloud.google.com (#1063) This reverts commit 942930e4e8972c8a8161e94ac01633c5754e60f6. --- packages/google-cloud-bigquery/.repo-metadata.json | 2 +- packages/google-cloud-bigquery/README.rst | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/packages/google-cloud-bigquery/.repo-metadata.json b/packages/google-cloud-bigquery/.repo-metadata.json index 48cc057198bb..124b40eb9b5f 100644 --- a/packages/google-cloud-bigquery/.repo-metadata.json +++ b/packages/google-cloud-bigquery/.repo-metadata.json @@ -2,7 +2,7 @@ "name": "bigquery", "name_pretty": "Google Cloud BigQuery", "product_documentation": "https://cloud.google.com/bigquery", - "client_documentation": "https://cloud.google.com/python/docs/reference/bigquery/latest", + "client_documentation": "https://googleapis.dev/python/bigquery/latest", "issue_tracker": "https://issuetracker.google.com/savedsearches/559654", "release_level": "ga", "language": "python", diff --git a/packages/google-cloud-bigquery/README.rst b/packages/google-cloud-bigquery/README.rst index 7b890e87e455..d0ad059a23fb 100644 --- a/packages/google-cloud-bigquery/README.rst +++ b/packages/google-cloud-bigquery/README.rst @@ -18,7 +18,7 @@ processing power of Google's infrastructure. .. |versions| image:: https://img.shields.io/pypi/pyversions/google-cloud-bigquery.svg :target: https://pypi.org/project/google-cloud-bigquery/ .. _BigQuery: https://cloud.google.com/bigquery/what-is-bigquery -.. _Client Library Documentation: https://cloud.google.com/python/docs/reference/bigquery/latest +.. _Client Library Documentation: https://googleapis.dev/python/bigquery/latest .. _Product Documentation: https://cloud.google.com/bigquery/docs/reference/v2/ Quick Start From 42c082c6502a9f623f0b016223f0700a9b087675 Mon Sep 17 00:00:00 2001 From: Peter Lamut Date: Tue, 16 Nov 2021 22:57:51 +0100 Subject: [PATCH 1351/2016] feat: allow cell magic body to be a $variable (#1053) * feat: allow cell magic body to be a $variable * Fix missing indefinitive article in error msg * Adjust test assertion to error message change * Refactor logic for extracting query variable * Explicitly warn about missing query variable name * Thest the query "variable" is not identifier case --- .../google/cloud/bigquery/magics/magics.py | 23 +++ .../tests/unit/test_magics.py | 139 +++++++++++++++++- 2 files changed, 161 insertions(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/magics/magics.py b/packages/google-cloud-bigquery/google/cloud/bigquery/magics/magics.py index 1d8d8ed3085e..5af0a3b5131b 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/magics/magics.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/magics/magics.py @@ -596,6 +596,29 @@ def _cell_magic(line, query): _handle_error(error, args.destination_var) return + # Check if query is given as a reference to a variable. + if query.startswith("$"): + query_var_name = query[1:] + + if not query_var_name: + missing_msg = 'Missing query variable name, empty "$" is not allowed.' + raise NameError(missing_msg) + + if query_var_name.isidentifier(): + ip = IPython.get_ipython() + query = ip.user_ns.get(query_var_name, ip) # ip serves as a sentinel + + if query is ip: + raise NameError( + f"Unknown query, variable {query_var_name} does not exist." + ) + else: + if not isinstance(query, (str, bytes)): + raise TypeError( + f"Query variable {query_var_name} must be a string " + "or a bytes-like value." + ) + # Any query that does not contain whitespace (aside from leading and trailing whitespace) # is assumed to be a table id if not re.search(r"\s", query): diff --git a/packages/google-cloud-bigquery/tests/unit/test_magics.py b/packages/google-cloud-bigquery/tests/unit/test_magics.py index 36cbf499391e..e18d04d6418b 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_magics.py +++ b/packages/google-cloud-bigquery/tests/unit/test_magics.py @@ -584,7 +584,7 @@ def test_bigquery_magic_does_not_clear_display_in_verbose_mode(): @pytest.mark.usefixtures("ipython_interactive") -def test_bigquery_magic_clears_display_in_verbose_mode(): +def test_bigquery_magic_clears_display_in_non_verbose_mode(): ip = IPython.get_ipython() ip.extension_manager.load_extension("google.cloud.bigquery") magics.context.credentials = mock.create_autospec( @@ -1710,6 +1710,143 @@ def test_bigquery_magic_with_improperly_formatted_params(): ip.run_cell_magic("bigquery", "--params {17}", sql) +@pytest.mark.parametrize( + "raw_sql", ("SELECT answer AS 42", " \t SELECT answer AS 42 \t ") +) +@pytest.mark.usefixtures("ipython_interactive") +@pytest.mark.skipif(pandas is None, reason="Requires `pandas`") +def test_bigquery_magic_valid_query_in_existing_variable(ipython_ns_cleanup, raw_sql): + ip = IPython.get_ipython() + ip.extension_manager.load_extension("google.cloud.bigquery") + magics.context.credentials = mock.create_autospec( + google.auth.credentials.Credentials, instance=True + ) + + ipython_ns_cleanup.append((ip, "custom_query")) + ipython_ns_cleanup.append((ip, "query_results_df")) + + run_query_patch = mock.patch( + "google.cloud.bigquery.magics.magics._run_query", autospec=True + ) + query_job_mock = mock.create_autospec( + google.cloud.bigquery.job.QueryJob, instance=True + ) + mock_result = pandas.DataFrame([42], columns=["answer"]) + query_job_mock.to_dataframe.return_value = mock_result + + ip.user_ns["custom_query"] = raw_sql + cell_body = "$custom_query" # Referring to an existing variable name (custom_query) + assert "query_results_df" not in ip.user_ns + + with run_query_patch as run_query_mock: + run_query_mock.return_value = query_job_mock + + ip.run_cell_magic("bigquery", "query_results_df", cell_body) + + run_query_mock.assert_called_once_with(mock.ANY, raw_sql, mock.ANY) + + assert "query_results_df" in ip.user_ns # verify that the variable exists + df = ip.user_ns["query_results_df"] + assert len(df) == len(mock_result) # verify row count + assert list(df) == list(mock_result) # verify column names + assert list(df["answer"]) == [42] + + +@pytest.mark.usefixtures("ipython_interactive") +@pytest.mark.skipif(pandas is None, reason="Requires `pandas`") +def test_bigquery_magic_nonexisting_query_variable(): + ip = IPython.get_ipython() + ip.extension_manager.load_extension("google.cloud.bigquery") + magics.context.credentials = mock.create_autospec( + google.auth.credentials.Credentials, instance=True + ) + + run_query_patch = mock.patch( + "google.cloud.bigquery.magics.magics._run_query", autospec=True + ) + + ip.user_ns.pop("custom_query", None) # Make sure the variable does NOT exist. + cell_body = "$custom_query" # Referring to a non-existing variable name. + + with pytest.raises( + NameError, match=r".*custom_query does not exist.*" + ), run_query_patch as run_query_mock: + ip.run_cell_magic("bigquery", "", cell_body) + + run_query_mock.assert_not_called() + + +@pytest.mark.usefixtures("ipython_interactive") +@pytest.mark.skipif(pandas is None, reason="Requires `pandas`") +def test_bigquery_magic_empty_query_variable_name(): + ip = IPython.get_ipython() + ip.extension_manager.load_extension("google.cloud.bigquery") + magics.context.credentials = mock.create_autospec( + google.auth.credentials.Credentials, instance=True + ) + + run_query_patch = mock.patch( + "google.cloud.bigquery.magics.magics._run_query", autospec=True + ) + cell_body = "$" # Not referring to any variable (name omitted). + + with pytest.raises( + NameError, match=r"(?i).*missing query variable name.*" + ), run_query_patch as run_query_mock: + ip.run_cell_magic("bigquery", "", cell_body) + + run_query_mock.assert_not_called() + + +@pytest.mark.usefixtures("ipython_interactive") +@pytest.mark.skipif(pandas is None, reason="Requires `pandas`") +def test_bigquery_magic_query_variable_non_string(ipython_ns_cleanup): + ip = IPython.get_ipython() + ip.extension_manager.load_extension("google.cloud.bigquery") + magics.context.credentials = mock.create_autospec( + google.auth.credentials.Credentials, instance=True + ) + + run_query_patch = mock.patch( + "google.cloud.bigquery.magics.magics._run_query", autospec=True + ) + + ipython_ns_cleanup.append((ip, "custom_query")) + + ip.user_ns["custom_query"] = object() + cell_body = "$custom_query" # Referring to a non-string variable. + + with pytest.raises( + TypeError, match=r".*must be a string or a bytes-like.*" + ), run_query_patch as run_query_mock: + ip.run_cell_magic("bigquery", "", cell_body) + + run_query_mock.assert_not_called() + + +@pytest.mark.usefixtures("ipython_interactive") +@pytest.mark.skipif(pandas is None, reason="Requires `pandas`") +def test_bigquery_magic_query_variable_not_identifier(): + ip = IPython.get_ipython() + ip.extension_manager.load_extension("google.cloud.bigquery") + magics.context.credentials = mock.create_autospec( + google.auth.credentials.Credentials, instance=True + ) + + cell_body = "$123foo" # 123foo is not valid Python identifier + + with io.capture_output() as captured_io: + ip.run_cell_magic("bigquery", "", cell_body) + + # If "$" prefixes a string that is not a Python identifier, we do not treat such + # cell_body as a variable reference and just treat is as any other cell body input. + # If at the same time the cell body does not contain any whitespace, it is + # considered a table name, thus we expect an error that the table ID is not valid. + output = captured_io.stderr + assert "ERROR:" in output + assert "must be a fully-qualified ID" in output + + @pytest.mark.usefixtures("ipython_interactive") @pytest.mark.skipif(pandas is None, reason="Requires `pandas`") def test_bigquery_magic_with_invalid_multiple_option_values(): From 198c1dfe076610512e44a6e9196eb61bdfbba6b3 Mon Sep 17 00:00:00 2001 From: Peter Lamut Date: Thu, 18 Nov 2021 08:22:57 +0100 Subject: [PATCH 1352/2016] cleanup: silence non-relevant system test warnings (#1068) --- .../tests/system/test_pandas.py | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/packages/google-cloud-bigquery/tests/system/test_pandas.py b/packages/google-cloud-bigquery/tests/system/test_pandas.py index 1f43a369a2c2..1541dd3b9179 100644 --- a/packages/google-cloud-bigquery/tests/system/test_pandas.py +++ b/packages/google-cloud-bigquery/tests/system/test_pandas.py @@ -20,6 +20,7 @@ import json import io import operator +import warnings import google.api_core.retry import pkg_resources @@ -976,9 +977,17 @@ def test_to_geodataframe(bigquery_client, dataset_id): assert df["geog"][2] == wkt.loads("point(0 0)") assert isinstance(df, geopandas.GeoDataFrame) assert isinstance(df["geog"], geopandas.GeoSeries) - assert df.area[0] == 0.5 - assert pandas.isna(df.area[1]) - assert df.area[2] == 0.0 + + with warnings.catch_warnings(): + # Computing the area on a GeoDataFrame that uses a geographic Coordinate + # Reference System (CRS) produces a warning that we are not interested in. + # We do not mind if the computed area is incorrect with respect to the + # GeoDataFrame data, as long as it matches the expected "incorrect" value. + warnings.filterwarnings("ignore", category=UserWarning) + assert df.area[0] == 0.5 + assert pandas.isna(df.area[1]) + assert df.area[2] == 0.0 + assert df.crs.srs == "EPSG:4326" assert df.crs.name == "WGS 84" assert df.geog.crs.srs == "EPSG:4326" From 9d4da77d5137a63864be6bcfacb7142a7213afda Mon Sep 17 00:00:00 2001 From: Judah Rand <17158624+judahrand@users.noreply.github.com> Date: Fri, 19 Nov 2021 22:40:42 +0000 Subject: [PATCH 1353/2016] feat: promote `RowIterator.to_arrow_iterable` to public method (#1073) * feat: promote `to_arrow_iterable` to public method * use correct version number * Update google/cloud/bigquery/table.py Co-authored-by: Tim Swast --- .../google/cloud/bigquery/_pandas_helpers.py | 8 +- .../google/cloud/bigquery/table.py | 75 +++++- .../tests/unit/test_table.py | 218 ++++++++++++++++++ 3 files changed, 297 insertions(+), 4 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/_pandas_helpers.py b/packages/google-cloud-bigquery/google/cloud/bigquery/_pandas_helpers.py index de6356c2a58a..263a1a9cff8f 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/_pandas_helpers.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/_pandas_helpers.py @@ -838,7 +838,12 @@ def _download_table_bqstorage( def download_arrow_bqstorage( - project_id, table, bqstorage_client, preserve_order=False, selected_fields=None, + project_id, + table, + bqstorage_client, + preserve_order=False, + selected_fields=None, + max_queue_size=_MAX_QUEUE_SIZE_DEFAULT, ): return _download_table_bqstorage( project_id, @@ -847,6 +852,7 @@ def download_arrow_bqstorage( preserve_order=preserve_order, selected_fields=selected_fields, page_to_item=_bqstorage_page_to_arrow, + max_queue_size=max_queue_size, ) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py index 60c8593c7e79..a0696f83f6bb 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py @@ -1629,8 +1629,49 @@ def _to_page_iterable( ) yield from result_pages - def _to_arrow_iterable(self, bqstorage_client=None): - """Create an iterable of arrow RecordBatches, to process the table as a stream.""" + def to_arrow_iterable( + self, + bqstorage_client: "bigquery_storage.BigQueryReadClient" = None, + max_queue_size: int = _pandas_helpers._MAX_QUEUE_SIZE_DEFAULT, # type: ignore + ) -> Iterator["pyarrow.RecordBatch"]: + """[Beta] Create an iterable of class:`pyarrow.RecordBatch`, to process the table as a stream. + + Args: + bqstorage_client (Optional[google.cloud.bigquery_storage_v1.BigQueryReadClient]): + A BigQuery Storage API client. If supplied, use the faster + BigQuery Storage API to fetch rows from BigQuery. + + This method requires the ``pyarrow`` and + ``google-cloud-bigquery-storage`` libraries. + + This method only exposes a subset of the capabilities of the + BigQuery Storage API. For full access to all features + (projections, filters, snapshots) use the Storage API directly. + + max_queue_size (Optional[int]): + The maximum number of result pages to hold in the internal queue when + streaming query results over the BigQuery Storage API. Ignored if + Storage API is not used. + + By default, the max queue size is set to the number of BQ Storage streams + created by the server. If ``max_queue_size`` is :data:`None`, the queue + size is infinite. + + Returns: + pyarrow.RecordBatch: + A generator of :class:`~pyarrow.RecordBatch`. + + Raises: + ValueError: + If the :mod:`pyarrow` library cannot be imported. + + .. versionadded:: 2.31.0 + """ + if pyarrow is None: + raise ValueError(_NO_PYARROW_ERROR) + + self._maybe_warn_max_results(bqstorage_client) + bqstorage_download = functools.partial( _pandas_helpers.download_arrow_bqstorage, self._project, @@ -1638,6 +1679,7 @@ def _to_arrow_iterable(self, bqstorage_client=None): bqstorage_client, preserve_order=self._preserve_order, selected_fields=self._selected_fields, + max_queue_size=max_queue_size, ) tabledata_list_download = functools.partial( _pandas_helpers.download_arrow_row_iterator, iter(self.pages), self.schema @@ -1729,7 +1771,7 @@ def to_arrow( ) record_batches = [] - for record_batch in self._to_arrow_iterable( + for record_batch in self.to_arrow_iterable( bqstorage_client=bqstorage_client ): record_batches.append(record_batch) @@ -2225,6 +2267,33 @@ def to_dataframe_iterable( raise ValueError(_NO_PANDAS_ERROR) return iter((pandas.DataFrame(),)) + def to_arrow_iterable( + self, + bqstorage_client: Optional["bigquery_storage.BigQueryReadClient"] = None, + max_queue_size: Optional[int] = None, + ) -> Iterator["pyarrow.RecordBatch"]: + """Create an iterable of pandas DataFrames, to process the table as a stream. + + .. versionadded:: 2.31.0 + + Args: + bqstorage_client: + Ignored. Added for compatibility with RowIterator. + + max_queue_size: + Ignored. Added for compatibility with RowIterator. + + Returns: + An iterator yielding a single empty :class:`~pyarrow.RecordBatch`. + + Raises: + ValueError: + If the :mod:`pyarrow` library cannot be imported. + """ + if pyarrow is None: + raise ValueError(_NO_PYARROW_ERROR) + return iter((pyarrow.record_batch([]),)) + def __iter__(self): return iter(()) diff --git a/packages/google-cloud-bigquery/tests/unit/test_table.py b/packages/google-cloud-bigquery/tests/unit/test_table.py index 3c68e3c5e75b..4f45eac3db4f 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_table.py +++ b/packages/google-cloud-bigquery/tests/unit/test_table.py @@ -1840,6 +1840,25 @@ def test_to_arrow(self): self.assertIsInstance(tbl, pyarrow.Table) self.assertEqual(tbl.num_rows, 0) + @mock.patch("google.cloud.bigquery.table.pyarrow", new=None) + def test_to_arrow_iterable_error_if_pyarrow_is_none(self): + row_iterator = self._make_one() + with self.assertRaises(ValueError): + row_iterator.to_arrow_iterable() + + @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") + def test_to_arrow_iterable(self): + row_iterator = self._make_one() + arrow_iter = row_iterator.to_arrow_iterable() + + result = list(arrow_iter) + + self.assertEqual(len(result), 1) + record_batch = result[0] + self.assertIsInstance(record_batch, pyarrow.RecordBatch) + self.assertEqual(record_batch.num_rows, 0) + self.assertEqual(record_batch.num_columns, 0) + @mock.patch("google.cloud.bigquery.table.pandas", new=None) def test_to_dataframe_error_if_pandas_is_none(self): row_iterator = self._make_one() @@ -2151,6 +2170,205 @@ def test__validate_bqstorage_returns_false_w_warning_if_obsolete_version(self): ] assert matching_warnings, "Obsolete dependency warning not raised." + @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") + def test_to_arrow_iterable(self): + from google.cloud.bigquery.schema import SchemaField + + schema = [ + SchemaField("name", "STRING", mode="REQUIRED"), + SchemaField("age", "INTEGER", mode="REQUIRED"), + SchemaField( + "child", + "RECORD", + mode="REPEATED", + fields=[ + SchemaField("name", "STRING", mode="REQUIRED"), + SchemaField("age", "INTEGER", mode="REQUIRED"), + ], + ), + ] + rows = [ + { + "f": [ + {"v": "Bharney Rhubble"}, + {"v": "33"}, + { + "v": [ + {"v": {"f": [{"v": "Whamm-Whamm Rhubble"}, {"v": "3"}]}}, + {"v": {"f": [{"v": "Hoppy"}, {"v": "1"}]}}, + ] + }, + ] + }, + { + "f": [ + {"v": "Wylma Phlyntstone"}, + {"v": "29"}, + { + "v": [ + {"v": {"f": [{"v": "Bepples Phlyntstone"}, {"v": "0"}]}}, + {"v": {"f": [{"v": "Dino"}, {"v": "4"}]}}, + ] + }, + ] + }, + ] + path = "/foo" + api_request = mock.Mock( + side_effect=[ + {"rows": [rows[0]], "pageToken": "NEXTPAGE"}, + {"rows": [rows[1]]}, + ] + ) + row_iterator = self._make_one( + _mock_client(), api_request, path, schema, page_size=1, max_results=5 + ) + + record_batches = row_iterator.to_arrow_iterable() + self.assertIsInstance(record_batches, types.GeneratorType) + record_batches = list(record_batches) + self.assertEqual(len(record_batches), 2) + + # Check the schema. + for record_batch in record_batches: + self.assertIsInstance(record_batch, pyarrow.RecordBatch) + self.assertEqual(record_batch.schema[0].name, "name") + self.assertTrue(pyarrow.types.is_string(record_batch.schema[0].type)) + self.assertEqual(record_batch.schema[1].name, "age") + self.assertTrue(pyarrow.types.is_int64(record_batch.schema[1].type)) + child_field = record_batch.schema[2] + self.assertEqual(child_field.name, "child") + self.assertTrue(pyarrow.types.is_list(child_field.type)) + self.assertTrue(pyarrow.types.is_struct(child_field.type.value_type)) + self.assertEqual(child_field.type.value_type[0].name, "name") + self.assertEqual(child_field.type.value_type[1].name, "age") + + # Check the data. + record_batch_1 = record_batches[0].to_pydict() + names = record_batch_1["name"] + ages = record_batch_1["age"] + children = record_batch_1["child"] + self.assertEqual(names, ["Bharney Rhubble"]) + self.assertEqual(ages, [33]) + self.assertEqual( + children, + [ + [ + {"name": "Whamm-Whamm Rhubble", "age": 3}, + {"name": "Hoppy", "age": 1}, + ], + ], + ) + + record_batch_2 = record_batches[1].to_pydict() + names = record_batch_2["name"] + ages = record_batch_2["age"] + children = record_batch_2["child"] + self.assertEqual(names, ["Wylma Phlyntstone"]) + self.assertEqual(ages, [29]) + self.assertEqual( + children, + [[{"name": "Bepples Phlyntstone", "age": 0}, {"name": "Dino", "age": 4}]], + ) + + @mock.patch("google.cloud.bigquery.table.pyarrow", new=None) + def test_to_arrow_iterable_error_if_pyarrow_is_none(self): + from google.cloud.bigquery.schema import SchemaField + + schema = [ + SchemaField("name", "STRING", mode="REQUIRED"), + SchemaField("age", "INTEGER", mode="REQUIRED"), + ] + rows = [ + {"f": [{"v": "Phred Phlyntstone"}, {"v": "32"}]}, + {"f": [{"v": "Bharney Rhubble"}, {"v": "33"}]}, + ] + path = "/foo" + api_request = mock.Mock(return_value={"rows": rows}) + row_iterator = self._make_one(_mock_client(), api_request, path, schema) + + with pytest.raises(ValueError, match="pyarrow"): + row_iterator.to_arrow_iterable() + + @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") + @unittest.skipIf( + bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" + ) + def test_to_arrow_iterable_w_bqstorage(self): + from google.cloud.bigquery import schema + from google.cloud.bigquery import table as mut + from google.cloud.bigquery_storage_v1 import reader + + bqstorage_client = mock.create_autospec(bigquery_storage.BigQueryReadClient) + bqstorage_client._transport = mock.create_autospec( + big_query_read_grpc_transport.BigQueryReadGrpcTransport + ) + streams = [ + # Use two streams we want to check frames are read from each stream. + {"name": "/projects/proj/dataset/dset/tables/tbl/streams/1234"}, + {"name": "/projects/proj/dataset/dset/tables/tbl/streams/5678"}, + ] + session = bigquery_storage.types.ReadSession(streams=streams) + arrow_schema = pyarrow.schema( + [ + pyarrow.field("colA", pyarrow.int64()), + # Not alphabetical to test column order. + pyarrow.field("colC", pyarrow.float64()), + pyarrow.field("colB", pyarrow.string()), + ] + ) + session.arrow_schema.serialized_schema = arrow_schema.serialize().to_pybytes() + bqstorage_client.create_read_session.return_value = session + + mock_rowstream = mock.create_autospec(reader.ReadRowsStream) + bqstorage_client.read_rows.return_value = mock_rowstream + + mock_rows = mock.create_autospec(reader.ReadRowsIterable) + mock_rowstream.rows.return_value = mock_rows + page_items = [ + pyarrow.array([1, -1]), + pyarrow.array([2.0, 4.0]), + pyarrow.array(["abc", "def"]), + ] + + expected_record_batch = pyarrow.RecordBatch.from_arrays( + page_items, schema=arrow_schema + ) + expected_num_record_batches = 3 + + mock_page = mock.create_autospec(reader.ReadRowsPage) + mock_page.to_arrow.return_value = expected_record_batch + mock_pages = (mock_page,) * expected_num_record_batches + type(mock_rows).pages = mock.PropertyMock(return_value=mock_pages) + + schema = [ + schema.SchemaField("colA", "INTEGER"), + schema.SchemaField("colC", "FLOAT"), + schema.SchemaField("colB", "STRING"), + ] + + row_iterator = mut.RowIterator( + _mock_client(), + None, # api_request: ignored + None, # path: ignored + schema, + table=mut.TableReference.from_string("proj.dset.tbl"), + selected_fields=schema, + ) + + record_batches = list( + row_iterator.to_arrow_iterable(bqstorage_client=bqstorage_client) + ) + total_record_batches = len(streams) * len(mock_pages) + self.assertEqual(len(record_batches), total_record_batches) + + for record_batch in record_batches: + # Are the record batches return as expected? + self.assertEqual(record_batch, expected_record_batch) + + # Don't close the client if it was passed in. + bqstorage_client._transport.grpc_channel.close.assert_not_called() + @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_to_arrow(self): from google.cloud.bigquery.schema import SchemaField From 70f9b87b6b1ef767141d291bfad9e89c78c34b1b Mon Sep 17 00:00:00 2001 From: Peter Lamut Date: Wed, 24 Nov 2021 16:12:10 +0100 Subject: [PATCH 1354/2016] fix: apply timeout to all resumable upload requests (#1070) * fix: apply timeout to all resumable upload requests * Fix stub in test case * Improve timeout type and other type annotations * Annnotate return type of _do_resumable_upload() --- .../google/cloud/bigquery/client.py | 186 +++++++++++------- .../tests/unit/test_client.py | 18 +- 2 files changed, 128 insertions(+), 76 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py index 3e641e1952cc..a5f3d5419d7e 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py @@ -31,9 +31,10 @@ import typing from typing import ( Any, - BinaryIO, Dict, + IO, Iterable, + Mapping, List, Optional, Sequence, @@ -112,10 +113,15 @@ pyarrow = _helpers.PYARROW_VERSIONS.try_import() TimeoutType = Union[float, None] +ResumableTimeoutType = Union[ + None, float, Tuple[float, float] +] # for resumable media methods if typing.TYPE_CHECKING: # pragma: NO COVER # os.PathLike is only subscriptable in Python 3.9+, thus shielding with a condition. PathType = Union[str, bytes, os.PathLike[str], os.PathLike[bytes]] + import pandas # type: ignore + import requests # required by api-core _DEFAULT_CHUNKSIZE = 100 * 1024 * 1024 # 100 MB _MAX_MULTIPART_SIZE = 5 * 1024 * 1024 @@ -2348,7 +2354,7 @@ def load_table_from_uri( def load_table_from_file( self, - file_obj: BinaryIO, + file_obj: IO[bytes], destination: Union[Table, TableReference, TableListItem, str], rewind: bool = False, size: int = None, @@ -2358,7 +2364,7 @@ def load_table_from_file( location: str = None, project: str = None, job_config: LoadJobConfig = None, - timeout: TimeoutType = DEFAULT_TIMEOUT, + timeout: ResumableTimeoutType = DEFAULT_TIMEOUT, ) -> job.LoadJob: """Upload the contents of this table from a file-like object. @@ -2366,42 +2372,42 @@ def load_table_from_file( returns a :class:`~google.cloud.bigquery.job.LoadJob`. Args: - file_obj (file): A file handle opened in binary mode for reading. - destination (Union[ \ - google.cloud.bigquery.table.Table, \ - google.cloud.bigquery.table.TableReference, \ - google.cloud.bigquery.table.TableListItem, \ - str, \ - ]): + file_obj: + A file handle opened in binary mode for reading. + destination: Table into which data is to be loaded. If a string is passed in, this method attempts to create a table reference from a string using :func:`google.cloud.bigquery.table.TableReference.from_string`. Keyword Arguments: - rewind (Optional[bool]): + rewind: If True, seek to the beginning of the file handle before reading the file. - size (Optional[int]): + size: The number of bytes to read from the file handle. If size is ``None`` or large, resumable upload will be used. Otherwise, multipart upload will be used. - num_retries (Optional[int]): Number of upload retries. Defaults to 6. - job_id (Optional[str]): Name of the job. - job_id_prefix (Optional[str]): + num_retries: Number of upload retries. Defaults to 6. + job_id: Name of the job. + job_id_prefix: The user-provided prefix for a randomly generated job ID. This parameter will be ignored if a ``job_id`` is also given. - location (Optional[str]): + location: Location where to run the job. Must match the location of the destination table. - project (Optional[str]): + project: Project ID of the project of where to run the job. Defaults to the client's project. - job_config (Optional[google.cloud.bigquery.job.LoadJobConfig]): + job_config: Extra configuration options for the job. - timeout (Optional[float]): + timeout: The number of seconds to wait for the underlying HTTP transport - before using ``retry``. + before using ``retry``. Depending on the retry strategy, a request + may be repeated several times using the same timeout each time. + + Can also be passed as a tuple (connect_timeout, read_timeout). + See :meth:`requests.Session.request` documentation for details. Returns: google.cloud.bigquery.job.LoadJob: A new load job. @@ -2453,7 +2459,7 @@ def load_table_from_file( def load_table_from_dataframe( self, - dataframe, + dataframe: "pandas.DataFrame", destination: Union[Table, TableReference, str], num_retries: int = _DEFAULT_NUM_RETRIES, job_id: str = None, @@ -2462,7 +2468,7 @@ def load_table_from_dataframe( project: str = None, job_config: LoadJobConfig = None, parquet_compression: str = "snappy", - timeout: TimeoutType = DEFAULT_TIMEOUT, + timeout: ResumableTimeoutType = DEFAULT_TIMEOUT, ) -> job.LoadJob: """Upload the contents of a table from a pandas DataFrame. @@ -2481,9 +2487,9 @@ def load_table_from_dataframe( https://github.com/googleapis/python-bigquery/issues/19 Args: - dataframe (pandas.DataFrame): + dataframe: A :class:`~pandas.DataFrame` containing the data to load. - destination (google.cloud.bigquery.table.TableReference): + destination: The destination table to use for loading the data. If it is an existing table, the schema of the :class:`~pandas.DataFrame` must match the schema of the destination table. If the table @@ -2495,19 +2501,19 @@ def load_table_from_dataframe( :func:`google.cloud.bigquery.table.TableReference.from_string`. Keyword Arguments: - num_retries (Optional[int]): Number of upload retries. - job_id (Optional[str]): Name of the job. - job_id_prefix (Optional[str]): + num_retries: Number of upload retries. + job_id: Name of the job. + job_id_prefix: The user-provided prefix for a randomly generated job ID. This parameter will be ignored if a ``job_id`` is also given. - location (Optional[str]): + location: Location where to run the job. Must match the location of the destination table. - project (Optional[str]): + project: Project ID of the project of where to run the job. Defaults to the client's project. - job_config (Optional[google.cloud.bigquery.job.LoadJobConfig]): + job_config: Extra configuration options for the job. To override the default pandas data type conversions, supply @@ -2524,7 +2530,7 @@ def load_table_from_dataframe( :attr:`~google.cloud.bigquery.job.SourceFormat.CSV` and :attr:`~google.cloud.bigquery.job.SourceFormat.PARQUET` are supported. - parquet_compression (Optional[str]): + parquet_compression: [Beta] The compression method to use if intermittently serializing ``dataframe`` to a parquet file. @@ -2537,9 +2543,13 @@ def load_table_from_dataframe( passed as the ``compression`` argument to the underlying ``DataFrame.to_parquet()`` method. https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.to_parquet.html#pandas.DataFrame.to_parquet - timeout (Optional[float]): + timeout: The number of seconds to wait for the underlying HTTP transport - before using ``retry``. + before using ``retry``. Depending on the retry strategy, a request may + be repeated several times using the same timeout each time. + + Can also be passed as a tuple (connect_timeout, read_timeout). + See :meth:`requests.Session.request` documentation for details. Returns: google.cloud.bigquery.job.LoadJob: A new load job. @@ -2717,7 +2727,7 @@ def load_table_from_json( location: str = None, project: str = None, job_config: LoadJobConfig = None, - timeout: TimeoutType = DEFAULT_TIMEOUT, + timeout: ResumableTimeoutType = DEFAULT_TIMEOUT, ) -> job.LoadJob: """Upload the contents of a table from a JSON string or dict. @@ -2741,36 +2751,35 @@ def load_table_from_json( client = bigquery.Client() client.load_table_from_file(data_as_file, ...) - destination (Union[ \ - google.cloud.bigquery.table.Table, \ - google.cloud.bigquery.table.TableReference, \ - google.cloud.bigquery.table.TableListItem, \ - str, \ - ]): + destination: Table into which data is to be loaded. If a string is passed in, this method attempts to create a table reference from a string using :func:`google.cloud.bigquery.table.TableReference.from_string`. Keyword Arguments: - num_retries (Optional[int]): Number of upload retries. - job_id (Optional[str]): Name of the job. - job_id_prefix (Optional[str]): + num_retries: Number of upload retries. + job_id: Name of the job. + job_id_prefix: The user-provided prefix for a randomly generated job ID. This parameter will be ignored if a ``job_id`` is also given. - location (Optional[str]): + location: Location where to run the job. Must match the location of the destination table. - project (Optional[str]): + project: Project ID of the project of where to run the job. Defaults to the client's project. - job_config (Optional[google.cloud.bigquery.job.LoadJobConfig]): + job_config: Extra configuration options for the job. The ``source_format`` setting is always set to :attr:`~google.cloud.bigquery.job.SourceFormat.NEWLINE_DELIMITED_JSON`. - timeout (Optional[float]): + timeout: The number of seconds to wait for the underlying HTTP transport - before using ``retry``. + before using ``retry``. Depending on the retry strategy, a request may + be repeated several times using the same timeout each time. + + Can also be passed as a tuple (connect_timeout, read_timeout). + See :meth:`requests.Session.request` documentation for details. Returns: google.cloud.bigquery.job.LoadJob: A new load job. @@ -2819,60 +2828,77 @@ def load_table_from_json( ) def _do_resumable_upload( - self, stream, metadata, num_retries, timeout, project=None - ): + self, + stream: IO[bytes], + metadata: Mapping[str, str], + num_retries: int, + timeout: Optional[ResumableTimeoutType], + project: Optional[str] = None, + ) -> "requests.Response": """Perform a resumable upload. Args: - stream (IO[bytes]): A bytes IO object open for reading. + stream: A bytes IO object open for reading. - metadata (Dict): The metadata associated with the upload. + metadata: The metadata associated with the upload. - num_retries (int): + num_retries: Number of upload retries. (Deprecated: This argument will be removed in a future release.) - timeout (float): + timeout: The number of seconds to wait for the underlying HTTP transport - before using ``retry``. + before using ``retry``. Depending on the retry strategy, a request may + be repeated several times using the same timeout each time. - project (Optional[str]): + Can also be passed as a tuple (connect_timeout, read_timeout). + See :meth:`requests.Session.request` documentation for details. + + project: Project ID of the project of where to run the upload. Defaults to the client's project. Returns: - requests.Response: - The "200 OK" response object returned after the final chunk - is uploaded. + The "200 OK" response object returned after the final chunk + is uploaded. """ upload, transport = self._initiate_resumable_upload( stream, metadata, num_retries, timeout, project=project ) while not upload.finished: - response = upload.transmit_next_chunk(transport) + response = upload.transmit_next_chunk(transport, timeout=timeout) return response def _initiate_resumable_upload( - self, stream, metadata, num_retries, timeout, project=None + self, + stream: IO[bytes], + metadata: Mapping[str, str], + num_retries: int, + timeout: Optional[ResumableTimeoutType], + project: Optional[str] = None, ): """Initiate a resumable upload. Args: - stream (IO[bytes]): A bytes IO object open for reading. + stream: A bytes IO object open for reading. - metadata (Dict): The metadata associated with the upload. + metadata: The metadata associated with the upload. - num_retries (int): + num_retries: Number of upload retries. (Deprecated: This argument will be removed in a future release.) - timeout (float): + timeout: The number of seconds to wait for the underlying HTTP transport - before using ``retry``. + before using ``retry``. Depending on the retry strategy, a request may + be repeated several times using the same timeout each time. - project (Optional[str]): + Can also be passed as a tuple (connect_timeout, read_timeout). + See :meth:`requests.Session.request` documentation for details. + + project: Project ID of the project of where to run the upload. Defaults to the client's project. @@ -2921,29 +2947,39 @@ def _initiate_resumable_upload( return upload, transport def _do_multipart_upload( - self, stream, metadata, size, num_retries, timeout, project=None + self, + stream: IO[bytes], + metadata: Mapping[str, str], + size: int, + num_retries: int, + timeout: Optional[ResumableTimeoutType], + project: Optional[str] = None, ): """Perform a multipart upload. Args: - stream (IO[bytes]): A bytes IO object open for reading. + stream: A bytes IO object open for reading. - metadata (Dict): The metadata associated with the upload. + metadata: The metadata associated with the upload. - size (int): + size: The number of bytes to be uploaded (which will be read from ``stream``). If not provided, the upload will be concluded once ``stream`` is exhausted (or :data:`None`). - num_retries (int): + num_retries: Number of upload retries. (Deprecated: This argument will be removed in a future release.) - timeout (float): + timeout: The number of seconds to wait for the underlying HTTP transport - before using ``retry``. + before using ``retry``. Depending on the retry strategy, a request may + be repeated several times using the same timeout each time. - project (Optional[str]): + Can also be passed as a tuple (connect_timeout, read_timeout). + See :meth:`requests.Session.request` documentation for details. + + project: Project ID of the project of where to run the upload. Defaults to the client's project. diff --git a/packages/google-cloud-bigquery/tests/unit/test_client.py b/packages/google-cloud-bigquery/tests/unit/test_client.py index 97aa2eedbf22..9c93765e8b03 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_client.py +++ b/packages/google-cloud-bigquery/tests/unit/test_client.py @@ -8235,6 +8235,22 @@ def test__do_resumable_upload_custom_project(self): assert initiation_url is not None assert "projects/custom-project" in initiation_url + def test__do_resumable_upload_custom_timeout(self): + file_obj = self._make_file_obj() + file_obj_len = len(file_obj.getvalue()) + transport = self._make_transport( + self._make_resumable_upload_responses(file_obj_len) + ) + client = self._make_client(transport) + + client._do_resumable_upload( + file_obj, self.EXPECTED_CONFIGURATION, num_retries=0, timeout=3.14 + ) + + # The timeout should be applied to all underlying calls. + for call_args in transport.request.call_args_list: + assert call_args.kwargs.get("timeout") == 3.14 + def test__do_multipart_upload(self): transport = self._make_transport([self._make_response(http.client.OK)]) client = self._make_client(transport) @@ -8442,7 +8458,7 @@ def test_upload_chunksize(client): upload.finished = False - def transmit_next_chunk(transport): + def transmit_next_chunk(transport, *args, **kwargs): upload.finished = True result = mock.MagicMock() result.json.return_value = {} From e73f1b438044172dc1e500a54649178dbafdf601 Mon Sep 17 00:00:00 2001 From: "release-please[bot]" <55107282+release-please[bot]@users.noreply.github.com> Date: Thu, 2 Dec 2021 14:14:11 -0600 Subject: [PATCH 1355/2016] chore: release 2.31.0 (#1066) Co-authored-by: release-please[bot] <55107282+release-please[bot]@users.noreply.github.com> --- packages/google-cloud-bigquery/CHANGELOG.md | 18 ++++++++++++++++++ .../google/cloud/bigquery/version.py | 2 +- 2 files changed, 19 insertions(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/CHANGELOG.md b/packages/google-cloud-bigquery/CHANGELOG.md index 4e10ad82670c..5ba219d20953 100644 --- a/packages/google-cloud-bigquery/CHANGELOG.md +++ b/packages/google-cloud-bigquery/CHANGELOG.md @@ -5,6 +5,24 @@ [1]: https://pypi.org/project/google-cloud-bigquery/#history +## [2.31.0](https://www.github.com/googleapis/python-bigquery/compare/v2.30.1...v2.31.0) (2021-11-24) + + +### Features + +* allow cell magic body to be a $variable ([#1053](https://www.github.com/googleapis/python-bigquery/issues/1053)) ([3a681e0](https://www.github.com/googleapis/python-bigquery/commit/3a681e046819df18118aa0b2b5733416d004c9b3)) +* promote `RowIterator.to_arrow_iterable` to public method ([#1073](https://www.github.com/googleapis/python-bigquery/issues/1073)) ([21cd710](https://www.github.com/googleapis/python-bigquery/commit/21cd71022d60c32104f8f90ee2ca445fbb43f7f3)) + + +### Bug Fixes + +* apply timeout to all resumable upload requests ([#1070](https://www.github.com/googleapis/python-bigquery/issues/1070)) ([3314dfb](https://www.github.com/googleapis/python-bigquery/commit/3314dfbed62488503dc41b11e403a672fcf71048)) + + +### Dependencies + +* support OpenTelemetry >= 1.1.0 ([#1050](https://www.github.com/googleapis/python-bigquery/issues/1050)) ([4616cd5](https://www.github.com/googleapis/python-bigquery/commit/4616cd58d3c6da641fb881ce99a87dcdedc20ba2)) + ### [2.30.1](https://www.github.com/googleapis/python-bigquery/compare/v2.30.0...v2.30.1) (2021-11-04) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/version.py b/packages/google-cloud-bigquery/google/cloud/bigquery/version.py index 877ea53d83f3..6329658af5f2 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/version.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/version.py @@ -12,4 +12,4 @@ # See the License for the specific language governing permissions and # limitations under the License. -__version__ = "2.30.1" +__version__ = "2.31.0" From 34477be9d36364f612ab0bd688dba33780a507c6 Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Thu, 2 Dec 2021 14:42:14 -0600 Subject: [PATCH 1356/2016] test: check extreme DATE/DATETIME values can be loaded from pandas DataFrame (#1078) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Thank you for opening a Pull Request! Before submitting your PR, there are a few things you can do to make sure it goes smoothly: - [ ] Make sure to open an issue as a [bug/issue](https://github.com/googleapis/python-bigquery/issues/new/choose) before writing your code! That way we can discuss the change, evaluate designs, and agree on the general idea - [ ] Ensure the tests and linter pass - [ ] Code coverage does not decrease (if any source code was changed) - [ ] Appropriate docs were updated (if necessary) Towards #1076 🦕 (edit: moved to https://github.com/googleapis/python-db-dtypes-pandas/issues/45 ) --- .../tests/system/test_pandas.py | 41 +++++++++---------- 1 file changed, 20 insertions(+), 21 deletions(-) diff --git a/packages/google-cloud-bigquery/tests/system/test_pandas.py b/packages/google-cloud-bigquery/tests/system/test_pandas.py index 1541dd3b9179..f3534cd19051 100644 --- a/packages/google-cloud-bigquery/tests/system/test_pandas.py +++ b/packages/google-cloud-bigquery/tests/system/test_pandas.py @@ -268,7 +268,7 @@ def test_load_table_from_dataframe_w_nulls(bigquery_client, dataset_id): See: https://github.com/googleapis/google-cloud-python/issues/7370 """ # Schema with all scalar types. - scalars_schema = ( + table_schema = ( bigquery.SchemaField("bool_col", "BOOLEAN"), bigquery.SchemaField("bytes_col", "BYTES"), bigquery.SchemaField("date_col", "DATE"), @@ -283,15 +283,6 @@ def test_load_table_from_dataframe_w_nulls(bigquery_client, dataset_id): bigquery.SchemaField("ts_col", "TIMESTAMP"), ) - table_schema = scalars_schema + ( - # TODO: Array columns can't be read due to NULLABLE versus REPEATED - # mode mismatch. See: - # https://issuetracker.google.com/133415569#comment3 - # bigquery.SchemaField("array_col", "INTEGER", mode="REPEATED"), - # TODO: Support writing StructArrays to Parquet. See: - # https://jira.apache.org/jira/browse/ARROW-2587 - # bigquery.SchemaField("struct_col", "RECORD", fields=scalars_schema), - ) num_rows = 100 nulls = [None] * num_rows df_data = [ @@ -372,7 +363,8 @@ def test_load_table_from_dataframe_w_explicit_schema(bigquery_client, dataset_id # See: # https://github.com/googleapis/python-bigquery/issues/61 # https://issuetracker.google.com/issues/151765076 - scalars_schema = ( + table_schema = ( + bigquery.SchemaField("row_num", "INTEGER"), bigquery.SchemaField("bool_col", "BOOLEAN"), bigquery.SchemaField("bytes_col", "BYTES"), bigquery.SchemaField("date_col", "DATE"), @@ -387,17 +379,8 @@ def test_load_table_from_dataframe_w_explicit_schema(bigquery_client, dataset_id bigquery.SchemaField("ts_col", "TIMESTAMP"), ) - table_schema = scalars_schema + ( - # TODO: Array columns can't be read due to NULLABLE versus REPEATED - # mode mismatch. See: - # https://issuetracker.google.com/133415569#comment3 - # bigquery.SchemaField("array_col", "INTEGER", mode="REPEATED"), - # TODO: Support writing StructArrays to Parquet. See: - # https://jira.apache.org/jira/browse/ARROW-2587 - # bigquery.SchemaField("struct_col", "RECORD", fields=scalars_schema), - ) - df_data = [ + ("row_num", [1, 2, 3]), ("bool_col", [True, None, False]), ("bytes_col", [b"abc", None, b"def"]), ("date_col", [datetime.date(1, 1, 1), None, datetime.date(9999, 12, 31)]), @@ -464,6 +447,22 @@ def test_load_table_from_dataframe_w_explicit_schema(bigquery_client, dataset_id assert tuple(table.schema) == table_schema assert table.num_rows == 3 + result = bigquery_client.list_rows(table).to_dataframe() + result.sort_values("row_num", inplace=True) + + # Check that extreme DATE/DATETIME values are loaded correctly. + # https://github.com/googleapis/python-bigquery/issues/1076 + assert result["date_col"][0] == datetime.date(1, 1, 1) + assert result["date_col"][2] == datetime.date(9999, 12, 31) + assert result["dt_col"][0] == datetime.datetime(1, 1, 1, 0, 0, 0) + assert result["dt_col"][2] == datetime.datetime(9999, 12, 31, 23, 59, 59, 999999) + assert result["ts_col"][0] == datetime.datetime( + 1, 1, 1, 0, 0, 0, tzinfo=datetime.timezone.utc + ) + assert result["ts_col"][2] == datetime.datetime( + 9999, 12, 31, 23, 59, 59, 999999, tzinfo=datetime.timezone.utc + ) + def test_load_table_from_dataframe_w_struct_datatype(bigquery_client, dataset_id): """Test that a DataFrame with struct datatype can be uploaded if a From 8b245d4074248fbc8405684bcc393d42237e09f7 Mon Sep 17 00:00:00 2001 From: Lo Ferris <50979514+loferris@users.noreply.github.com> Date: Mon, 6 Dec 2021 09:50:44 -0800 Subject: [PATCH 1357/2016] docs: add sample for revoking dataset access (#778) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * revoke dataset access setup * basic template for sample * sample + test * revoke dataset access sample * 🦉 Updates from OwlBot See https://github.com/googleapis/repo-automation-bots/blob/master/packages/owl-bot/README.md * docs: add sample for revoking dataset access - update year and string formatting * docs: add sample for revoking dataset access - move to snippets and change parameter pattern for readibility * moving update_dataset to /snippets and adjusting imports on both revoke_access and update_access * Update samples/snippets/revoke_dataset_access.py removed nested START/END tags Co-authored-by: Tim Swast * Update samples/snippets/revoke_dataset_access.py update readability in API request Co-authored-by: Tim Swast * 🦉 Updates from OwlBot See https://github.com/googleapis/repo-automation-bots/blob/master/packages/owl-bot/README.md * updated test * 🦉 Updates from OwlBot See https://github.com/googleapis/repo-automation-bots/blob/master/packages/owl-bot/README.md * change after running test * resolving linting failure, rewriting test * removed relative import errors * remove relative mport from update_dataset_access * adding fixture to conftest.py * updated sample * updating sample to match new update_access sample * fixing region tags * consolidated tests into one file for both methods * updating test to full_dataset format * updated revoke sample * updating test * refactored sample * Update samples/snippets/conftest.py * Update samples/snippets/revoke_dataset_access.py Co-authored-by: Tim Swast * Update samples/snippets/update_dataset_access.py Co-authored-by: Tim Swast * Update samples/snippets/revoke_dataset_access.py Co-authored-by: Tim Swast * Update samples/snippets/revoke_dataset_access.py Co-authored-by: Tim Swast * refactoring entry * added comment for entry access * Update samples/snippets/README.rst Co-authored-by: Tim Swast * Update samples/snippets/dataset_access_test.py Co-authored-by: Tim Swast * Update samples/snippets/dataset_access_test.py Co-authored-by: Tim Swast * added develper TODO in sample * add comments to samples Co-authored-by: Owl Bot Co-authored-by: Tim Swast Co-authored-by: Peter Lamut Co-authored-by: Anthonios Partheniou Co-authored-by: meredithslota --- .../samples/snippets/README.rst | 27 ++----- .../samples/snippets/conftest.py | 5 ++ .../samples/snippets/dataset_access_test.py | 48 +++++++++++++ .../samples/snippets/revoke_dataset_access.py | 52 ++++++++++++++ .../samples/snippets/update_dataset_access.py | 70 +++++++++++++++++++ 5 files changed, 180 insertions(+), 22 deletions(-) create mode 100644 packages/google-cloud-bigquery/samples/snippets/dataset_access_test.py create mode 100644 packages/google-cloud-bigquery/samples/snippets/revoke_dataset_access.py create mode 100644 packages/google-cloud-bigquery/samples/snippets/update_dataset_access.py diff --git a/packages/google-cloud-bigquery/samples/snippets/README.rst b/packages/google-cloud-bigquery/samples/snippets/README.rst index 7c3e19e68b16..05af1e812038 100644 --- a/packages/google-cloud-bigquery/samples/snippets/README.rst +++ b/packages/google-cloud-bigquery/samples/snippets/README.rst @@ -1,4 +1,3 @@ - .. This file is automatically generated. Do not edit this file directly. Google BigQuery Python Samples @@ -16,11 +15,14 @@ This directory contains samples for Google BigQuery. `Google BigQuery`_ is Googl .. _Google BigQuery: https://cloud.google.com/bigquery/docs +To run the sample, you need to have the `BigQuery Admin` role. + + + Setup ------------------------------------------------------------------------------- - Authentication ++++++++++++++ @@ -31,9 +33,6 @@ credentials for applications. .. _Authentication Getting Started Guide: https://cloud.google.com/docs/authentication/getting-started - - - Install Dependencies ++++++++++++++++++++ @@ -64,15 +63,9 @@ Install Dependencies .. _pip: https://pip.pypa.io/ .. _virtualenv: https://virtualenv.pypa.io/ - - - - - Samples ------------------------------------------------------------------------------- - Quickstart +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ @@ -89,8 +82,6 @@ To run this sample: $ python quickstart.py - - Simple Application +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ @@ -107,8 +98,6 @@ To run this sample: $ python simple_app.py - - User Credentials +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ @@ -124,7 +113,6 @@ To run this sample: $ python user_credentials.py - usage: user_credentials.py [-h] [--launch-browser] project Command-line application to run a query using user credentials. @@ -143,10 +131,6 @@ To run this sample: - - - - The client library ------------------------------------------------------------------------------- @@ -162,5 +146,4 @@ to `browse the source`_ and `report issues`_. https://github.com/GoogleCloudPlatform/google-cloud-python/issues - -.. _Google Cloud SDK: https://cloud.google.com/sdk/ +.. _Google Cloud SDK: https://cloud.google.com/sdk/ \ No newline at end of file diff --git a/packages/google-cloud-bigquery/samples/snippets/conftest.py b/packages/google-cloud-bigquery/samples/snippets/conftest.py index 74984f902574..e8aa084873e9 100644 --- a/packages/google-cloud-bigquery/samples/snippets/conftest.py +++ b/packages/google-cloud-bigquery/samples/snippets/conftest.py @@ -50,6 +50,11 @@ def dataset_id(bigquery_client: bigquery.Client, project_id: str): bigquery_client.delete_dataset(dataset, delete_contents=True, not_found_ok=True) +@pytest.fixture(scope="session") +def entity_id(bigquery_client: bigquery.Client, dataset_id: str): + return "cloud-developer-relations@google.com" + + @pytest.fixture(scope="session") def dataset_id_us_east1(bigquery_client: bigquery.Client, project_id: str): dataset_id = prefixer.create_prefix() diff --git a/packages/google-cloud-bigquery/samples/snippets/dataset_access_test.py b/packages/google-cloud-bigquery/samples/snippets/dataset_access_test.py new file mode 100644 index 000000000000..21776c149252 --- /dev/null +++ b/packages/google-cloud-bigquery/samples/snippets/dataset_access_test.py @@ -0,0 +1,48 @@ +# Copyright 2021 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import revoke_dataset_access +import update_dataset_access + + +def test_dataset_access_permissions(capsys, dataset_id, entity_id, bigquery_client): + original_dataset = bigquery_client.get_dataset(dataset_id) + update_dataset_access.update_dataset_access(dataset_id, entity_id) + full_dataset_id = "{}.{}".format( + original_dataset.project, original_dataset.dataset_id + ) + + out, err = capsys.readouterr() + assert ( + "Updated dataset '{}' with modified user permissions.".format(full_dataset_id) + in out + ) + + updated_dataset = bigquery_client.get_dataset(dataset_id) + updated_dataset_entries = list(updated_dataset.access_entries) + updated_dataset_entity_ids = {entry.entity_id for entry in updated_dataset_entries} + assert entity_id in updated_dataset_entity_ids + revoke_dataset_access.revoke_dataset_access(dataset_id, entity_id) + revoked_dataset = bigquery_client.get_dataset(dataset_id) + revoked_dataset_entries = list(revoked_dataset.access_entries) + + full_dataset_id = f"{updated_dataset.project}.{updated_dataset.dataset_id}" + out, err = capsys.readouterr() + assert ( + f"Revoked dataset access for '{entity_id}' to ' dataset '{full_dataset_id}.'" + in out + ) + assert len(revoked_dataset_entries) == len(updated_dataset_entries) - 1 + revoked_dataset_entity_ids = {entry.entity_id for entry in revoked_dataset_entries} + assert entity_id not in revoked_dataset_entity_ids diff --git a/packages/google-cloud-bigquery/samples/snippets/revoke_dataset_access.py b/packages/google-cloud-bigquery/samples/snippets/revoke_dataset_access.py new file mode 100644 index 000000000000..ce78f5750316 --- /dev/null +++ b/packages/google-cloud-bigquery/samples/snippets/revoke_dataset_access.py @@ -0,0 +1,52 @@ +# Copyright 2021 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +def revoke_dataset_access(dataset_id: str, entity_id: str): + original_dataset_id = dataset_id + original_entity_id = entity_id + + # [START bigquery_revoke_dataset_access] + + # TODO(developer): Set dataset_id to the ID of the dataset to fetch. + dataset_id = "your-project.your_dataset" + + # TODO(developer): Set entity_id to the ID of the email or group from whom you are revoking access. + entity_id = "user-or-group-to-remove@example.com" + # [END bigquery_revoke_dataset_access] + dataset_id = original_dataset_id + entity_id = original_entity_id + # [START bigquery_revoke_dataset_access] + + from google.cloud import bigquery + + # Construct a BigQuery client object. + client = bigquery.Client() + + dataset = client.get_dataset(dataset_id) # Make an API request. + + entries = list(dataset.access_entries) + dataset.access_entries = [ + entry for entry in entries if entry.entity_id != entity_id + ] + + dataset = client.update_dataset( + dataset, + # Update just the `access_entries` property of the dataset. + ["access_entries"], + ) # Make an API request. + + full_dataset_id = f"{dataset.project}.{dataset.dataset_id}" + print(f"Revoked dataset access for '{entity_id}' to ' dataset '{full_dataset_id}.'") + # [END bigquery_revoke_dataset_access] diff --git a/packages/google-cloud-bigquery/samples/snippets/update_dataset_access.py b/packages/google-cloud-bigquery/samples/snippets/update_dataset_access.py new file mode 100644 index 000000000000..fb3bfa14ffad --- /dev/null +++ b/packages/google-cloud-bigquery/samples/snippets/update_dataset_access.py @@ -0,0 +1,70 @@ +# Copyright 2019 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +def update_dataset_access(dataset_id: str, entity_id: str): + original_dataset_id = dataset_id + original_entity_id = entity_id + + # [START bigquery_update_dataset_access] + + # TODO(developer): Set dataset_id to the ID of the dataset to fetch. + dataset_id = "your-project.your_dataset" + + # TODO(developer): Set entity_id to the ID of the email or group from whom + # you are adding access. Alternatively, to the JSON REST API representation + # of the entity, such as a view's table reference. + entity_id = "user-or-group-to-add@example.com" + + # TODO(developer): Set entity_type to the type of entity you are granting access to. + # Common types include: + # + # * "userByEmail" -- A single user or service account. For example "fred@example.com" + # * "groupByEmail" -- A group of users. For example "example@googlegroups.com" + # * "view" -- An authorized view. For example + # {"projectId": "p", "datasetId": "d", "tableId": "v"} + # + # For a complete reference, see the REST API reference documentation: + # https://cloud.google.com/bigquery/docs/reference/rest/v2/datasets#Dataset.FIELDS.access + entity_type = "groupByEmail" + + # TODO(developer): Set role to a one of the "Basic roles for datasets" + # described here: + # https://cloud.google.com/bigquery/docs/access-control-basic-roles#dataset-basic-roles + role = "READER" + # [END bigquery_update_dataset_access] + dataset_id = original_dataset_id + entity_id = original_entity_id + # [START bigquery_update_dataset_access] + + from google.cloud import bigquery + + # Construct a BigQuery client object. + client = bigquery.Client() + + dataset = client.get_dataset(dataset_id) # Make an API request. + + entries = list(dataset.access_entries) + entries.append( + bigquery.AccessEntry(role=role, entity_type=entity_type, entity_id=entity_id,) + ) + dataset.access_entries = entries + + dataset = client.update_dataset(dataset, ["access_entries"]) # Make an API request. + + full_dataset_id = "{}.{}".format(dataset.project, dataset.dataset_id) + print( + "Updated dataset '{}' with modified user permissions.".format(full_dataset_id) + ) + # [END bigquery_update_dataset_access] From 3a8adae61afee5a66ff39b7143fe1f6a0581c0be Mon Sep 17 00:00:00 2001 From: "gcf-owl-bot[bot]" <78513119+gcf-owl-bot[bot]@users.noreply.github.com> Date: Thu, 9 Dec 2021 15:46:17 +0000 Subject: [PATCH 1358/2016] chore: update python-docs-samples link to main branch (#1082) --- packages/google-cloud-bigquery/.github/.OwlBot.lock.yaml | 2 +- packages/google-cloud-bigquery/samples/AUTHORING_GUIDE.md | 2 +- packages/google-cloud-bigquery/samples/CONTRIBUTING.md | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/packages/google-cloud-bigquery/.github/.OwlBot.lock.yaml b/packages/google-cloud-bigquery/.github/.OwlBot.lock.yaml index 108063d4dee4..0b3c8cd98f89 100644 --- a/packages/google-cloud-bigquery/.github/.OwlBot.lock.yaml +++ b/packages/google-cloud-bigquery/.github/.OwlBot.lock.yaml @@ -1,3 +1,3 @@ docker: image: gcr.io/cloud-devrel-public-resources/owlbot-python:latest - digest: sha256:4ee57a76a176ede9087c14330c625a71553cf9c72828b2c0ca12f5338171ba60 + digest: sha256:2f90537dd7df70f6b663cd654b1fa5dee483cf6a4edcfd46072b2775be8a23ec diff --git a/packages/google-cloud-bigquery/samples/AUTHORING_GUIDE.md b/packages/google-cloud-bigquery/samples/AUTHORING_GUIDE.md index 55c97b32f4c1..8249522ffc2d 100644 --- a/packages/google-cloud-bigquery/samples/AUTHORING_GUIDE.md +++ b/packages/google-cloud-bigquery/samples/AUTHORING_GUIDE.md @@ -1 +1 @@ -See https://github.com/GoogleCloudPlatform/python-docs-samples/blob/master/AUTHORING_GUIDE.md \ No newline at end of file +See https://github.com/GoogleCloudPlatform/python-docs-samples/blob/main/AUTHORING_GUIDE.md \ No newline at end of file diff --git a/packages/google-cloud-bigquery/samples/CONTRIBUTING.md b/packages/google-cloud-bigquery/samples/CONTRIBUTING.md index 34c882b6f1a3..f5fe2e6baf13 100644 --- a/packages/google-cloud-bigquery/samples/CONTRIBUTING.md +++ b/packages/google-cloud-bigquery/samples/CONTRIBUTING.md @@ -1 +1 @@ -See https://github.com/GoogleCloudPlatform/python-docs-samples/blob/master/CONTRIBUTING.md \ No newline at end of file +See https://github.com/GoogleCloudPlatform/python-docs-samples/blob/main/CONTRIBUTING.md \ No newline at end of file From f1c4a0eda0ce9c0db9a4985b77d2e7bc1407f39e Mon Sep 17 00:00:00 2001 From: Steffany Brown <30247553+steffnay@users.noreply.github.com> Date: Tue, 14 Dec 2021 08:11:21 -0800 Subject: [PATCH 1359/2016] feat: support authorized dataset entity (#1075) * feat: support authorized dataset entity * cleanup * add test and cache the resource from from_api_repr in a _properties value * lint * update samples to use enums * update to_api_repr and add tests * refactor --- .../google/cloud/bigquery/dataset.py | 73 +++++++++---------- .../google/cloud/bigquery/enums.py | 13 ++++ .../snippets/authorized_view_tutorial.py | 5 +- .../samples/snippets/update_dataset_access.py | 4 +- .../tests/unit/test_dataset.py | 40 +++++++++- 5 files changed, 92 insertions(+), 43 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/dataset.py b/packages/google-cloud-bigquery/google/cloud/bigquery/dataset.py index ff015d6059c8..499072de2a76 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/dataset.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/dataset.py @@ -77,10 +77,10 @@ def _get_routine_reference(self, routine_id): class AccessEntry(object): """Represents grant of an access role to an entity. - An entry must have exactly one of the allowed :attr:`ENTITY_TYPES`. If - anything but ``view`` or ``routine`` are set, a ``role`` is also required. - ``role`` is omitted for ``view`` and ``routine``, because they are always - read-only. + An entry must have exactly one of the allowed + :class:`google.cloud.bigquery.enums.EntityTypes`. If anything but ``view``, ``routine``, + or ``dataset`` are set, a ``role`` is also required. ``role`` is omitted for ``view``, + ``routine``, ``dataset``, because they are always read-only. See https://cloud.google.com/bigquery/docs/reference/rest/v2/datasets. @@ -88,17 +88,18 @@ class AccessEntry(object): role (str): Role granted to the entity. The following string values are supported: `'READER'`, `'WRITER'`, `'OWNER'`. It may also be - :data:`None` if the ``entity_type`` is ``view`` or ``routine``. + :data:`None` if the ``entity_type`` is ``view``, ``routine``, or ``dataset``. entity_type (str): - Type of entity being granted the role. One of :attr:`ENTITY_TYPES`. + Type of entity being granted the role. See + :class:`google.cloud.bigquery.enums.EntityTypes` for supported types. entity_id (Union[str, Dict[str, str]]): - If the ``entity_type`` is not 'view' or 'routine', the ``entity_id`` - is the ``str`` ID of the entity being granted the role. If the - ``entity_type`` is 'view' or 'routine', the ``entity_id`` is a ``dict`` - representing the view or routine from a different dataset to grant - access to in the following format for views:: + If the ``entity_type`` is not 'view', 'routine', or 'dataset', the + ``entity_id`` is the ``str`` ID of the entity being granted the role. If + the ``entity_type`` is 'view' or 'routine', the ``entity_id`` is a ``dict`` + representing the view or routine from a different dataset to grant access + to in the following format for views:: { 'projectId': string, @@ -114,11 +115,22 @@ class AccessEntry(object): 'routineId': string } + If the ``entity_type`` is 'dataset', the ``entity_id`` is a ``dict`` that includes + a 'dataset' field with a ``dict`` representing the dataset and a 'target_types' + field with a ``str`` value of the dataset's resource type:: + + { + 'dataset': { + 'projectId': string, + 'datasetId': string, + }, + 'target_types: 'VIEWS' + } + Raises: ValueError: - If the ``entity_type`` is not among :attr:`ENTITY_TYPES`, or if a - ``view`` or a ``routine`` has ``role`` set, or a non ``view`` and - non ``routine`` **does not** have a ``role`` set. + If a ``view``, ``routine``, or ``dataset`` has ``role`` set, or a non ``view``, + non ``routine``, and non ``dataset`` **does not** have a ``role`` set. Examples: >>> entry = AccessEntry('OWNER', 'userByEmail', 'user@example.com') @@ -131,27 +143,9 @@ class AccessEntry(object): >>> entry = AccessEntry(None, 'view', view) """ - ENTITY_TYPES = frozenset( - [ - "userByEmail", - "groupByEmail", - "domain", - "specialGroup", - "view", - "iamMember", - "routine", - ] - ) - """Allowed entity types.""" - - def __init__(self, role, entity_type, entity_id): - if entity_type not in self.ENTITY_TYPES: - message = "Entity type %r not among: %s" % ( - entity_type, - ", ".join(self.ENTITY_TYPES), - ) - raise ValueError(message) - if entity_type in ("view", "routine"): + def __init__(self, role=None, entity_type=None, entity_id=None): + self._properties = {} + if entity_type in ("view", "routine", "dataset"): if role is not None: raise ValueError( "Role must be None for a %r. Received " @@ -162,7 +156,6 @@ def __init__(self, role, entity_type, entity_id): raise ValueError( "Role must be set for entity " "type %r" % (entity_type,) ) - self._role = role self._entity_type = entity_type self._entity_id = entity_id @@ -214,7 +207,8 @@ def to_api_repr(self): Returns: Dict[str, object]: Access entry represented as an API resource """ - resource = {self._entity_type: self._entity_id} + resource = copy.deepcopy(self._properties) + resource[self._entity_type] = self._entity_id if self._role is not None: resource["role"] = self._role return resource @@ -241,7 +235,10 @@ def from_api_repr(cls, resource: dict) -> "AccessEntry": entity_type, entity_id = entry.popitem() if len(entry) != 0: raise ValueError("Entry has unexpected keys remaining.", entry) - return cls(role, entity_type, entity_id) + + config = cls(role, entity_type, entity_id) + config._properties = copy.deepcopy(resource) + return config class DatasetReference(object): diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/enums.py b/packages/google-cloud-bigquery/google/cloud/bigquery/enums.py index 0eaaffd2ef49..7fc0a5fd6b1b 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/enums.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/enums.py @@ -232,6 +232,19 @@ def _make_sql_scalars_enum(): StandardSqlDataTypes = _make_sql_scalars_enum() +class EntityTypes(str, enum.Enum): + """Enum of allowed entity type names in AccessEntry""" + + USER_BY_EMAIL = "userByEmail" + GROUP_BY_EMAIL = "groupByEmail" + DOMAIN = "domain" + DATASET = "dataset" + SPECIAL_GROUP = "specialGroup" + VIEW = "view" + IAM_MEMBER = "iamMember" + ROUTINE = "routine" + + # See also: https://cloud.google.com/bigquery/data-types#legacy_sql_data_types # and https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types class SqlTypeNames(str, enum.Enum): diff --git a/packages/google-cloud-bigquery/samples/snippets/authorized_view_tutorial.py b/packages/google-cloud-bigquery/samples/snippets/authorized_view_tutorial.py index b6a20c6ec27d..66810c03694c 100644 --- a/packages/google-cloud-bigquery/samples/snippets/authorized_view_tutorial.py +++ b/packages/google-cloud-bigquery/samples/snippets/authorized_view_tutorial.py @@ -24,6 +24,7 @@ def run_authorized_view_tutorial(override_values={}): # Create a source dataset # [START bigquery_avt_create_source_dataset] from google.cloud import bigquery + from google.cloud.bigquery.enums import EntityTypes client = bigquery.Client() source_dataset_id = "github_source_data" @@ -106,7 +107,7 @@ def run_authorized_view_tutorial(override_values={}): # analyst_group_email = 'data_analysts@example.com' access_entries = shared_dataset.access_entries access_entries.append( - bigquery.AccessEntry("READER", "groupByEmail", analyst_group_email) + bigquery.AccessEntry("READER", EntityTypes.GROUP_BY_EMAIL, analyst_group_email) ) shared_dataset.access_entries = access_entries shared_dataset = client.update_dataset( @@ -118,7 +119,7 @@ def run_authorized_view_tutorial(override_values={}): # [START bigquery_avt_source_dataset_access] access_entries = source_dataset.access_entries access_entries.append( - bigquery.AccessEntry(None, "view", view.reference.to_api_repr()) + bigquery.AccessEntry(None, EntityTypes.VIEW, view.reference.to_api_repr()) ) source_dataset.access_entries = access_entries source_dataset = client.update_dataset( diff --git a/packages/google-cloud-bigquery/samples/snippets/update_dataset_access.py b/packages/google-cloud-bigquery/samples/snippets/update_dataset_access.py index fb3bfa14ffad..1448213a62c6 100644 --- a/packages/google-cloud-bigquery/samples/snippets/update_dataset_access.py +++ b/packages/google-cloud-bigquery/samples/snippets/update_dataset_access.py @@ -27,6 +27,8 @@ def update_dataset_access(dataset_id: str, entity_id: str): # of the entity, such as a view's table reference. entity_id = "user-or-group-to-add@example.com" + from google.cloud.bigquery.enums import EntityTypes + # TODO(developer): Set entity_type to the type of entity you are granting access to. # Common types include: # @@ -37,7 +39,7 @@ def update_dataset_access(dataset_id: str, entity_id: str): # # For a complete reference, see the REST API reference documentation: # https://cloud.google.com/bigquery/docs/reference/rest/v2/datasets#Dataset.FIELDS.access - entity_type = "groupByEmail" + entity_type = EntityTypes.GROUP_BY_EMAIL # TODO(developer): Set role to a one of the "Basic roles for datasets" # described here: diff --git a/packages/google-cloud-bigquery/tests/unit/test_dataset.py b/packages/google-cloud-bigquery/tests/unit/test_dataset.py index b3a53a08dcb9..c554782bfa96 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_dataset.py +++ b/packages/google-cloud-bigquery/tests/unit/test_dataset.py @@ -141,6 +141,28 @@ def test_to_api_repr_routine(self): exp_resource = {"routine": routine} self.assertEqual(resource, exp_resource) + def test_to_api_repr_dataset(self): + dataset = { + "dataset": {"projectId": "my-project", "datasetId": "my_dataset"}, + "target_types": "VIEWS", + } + entry = self._make_one(None, "dataset", dataset) + resource = entry.to_api_repr() + exp_resource = {"dataset": dataset} + self.assertEqual(resource, exp_resource) + + def test_to_api_w_incorrect_role(self): + dataset = { + "dataset": { + "projectId": "my-project", + "datasetId": "my_dataset", + "tableId": "my_table", + }, + "target_type": "VIEW", + } + with self.assertRaises(ValueError): + self._make_one("READER", "dataset", dataset) + def test_from_api_repr(self): resource = {"role": "OWNER", "userByEmail": "salmon@example.com"} entry = self._get_target_class().from_api_repr(resource) @@ -150,8 +172,22 @@ def test_from_api_repr(self): def test_from_api_repr_w_unknown_entity_type(self): resource = {"role": "READER", "unknown": "UNKNOWN"} - with self.assertRaises(ValueError): - self._get_target_class().from_api_repr(resource) + entry = self._get_target_class().from_api_repr(resource) + self.assertEqual(entry.role, "READER") + self.assertEqual(entry.entity_type, "unknown") + self.assertEqual(entry.entity_id, "UNKNOWN") + exp_resource = entry.to_api_repr() + self.assertEqual(resource, exp_resource) + + def test_to_api_repr_w_extra_properties(self): + resource = { + "role": "READER", + "userByEmail": "salmon@example.com", + } + entry = self._get_target_class().from_api_repr(resource) + entry._properties["specialGroup"] = resource["specialGroup"] = "projectReaders" + exp_resource = entry.to_api_repr() + self.assertEqual(resource, exp_resource) def test_from_api_repr_entries_w_extra_keys(self): resource = { From 50900c6aa7784e940fecea7f5ccaf46ca09d8ae9 Mon Sep 17 00:00:00 2001 From: WhiteSource Renovate Date: Tue, 14 Dec 2021 20:06:20 +0100 Subject: [PATCH 1360/2016] chore(deps): update all dependencies (#1005) * chore(deps): update all dependencies * exclude new geopandas from python 3.6 Co-authored-by: Tim Swast --- .../samples/geography/requirements.txt | 55 ++++++++++--------- .../samples/magics/requirements-test.txt | 2 +- .../samples/magics/requirements.txt | 8 +-- .../samples/snippets/requirements-test.txt | 2 +- .../samples/snippets/requirements.txt | 8 +-- 5 files changed, 38 insertions(+), 37 deletions(-) diff --git a/packages/google-cloud-bigquery/samples/geography/requirements.txt b/packages/google-cloud-bigquery/samples/geography/requirements.txt index e2de866731ff..a10f01e72e5c 100644 --- a/packages/google-cloud-bigquery/samples/geography/requirements.txt +++ b/packages/google-cloud-bigquery/samples/geography/requirements.txt @@ -1,49 +1,50 @@ attrs==21.2.0 cachetools==4.2.4 -certifi==2021.5.30 -cffi==1.14.6 -charset-normalizer==2.0.6 -click==8.0.1 +certifi==2021.10.8 +cffi==1.15.0 +charset-normalizer==2.0.9 +click==8.0.3 click-plugins==1.1.1 cligj==0.7.2 dataclasses==0.6; python_version < '3.7' Fiona==1.8.20 geojson==2.5.0 -geopandas==0.9.0 -google-api-core==2.0.1 -google-auth==2.2.1 -google-cloud-bigquery==2.27.1 -google-cloud-bigquery-storage==2.9.0 -google-cloud-core==2.0.0 -google-crc32c==1.2.0 -google-resumable-media==2.0.3 -googleapis-common-protos==1.53.0 -grpcio==1.41.0 -idna==3.2 -importlib-metadata==4.8.1 -libcst==0.3.21 +geopandas==0.9.0; python_version < '3.7' +geopandas==0.10.2; python_version >= '3.7' +google-api-core==2.3.0 +google-auth==2.3.3 +google-cloud-bigquery==2.31.0 +google-cloud-bigquery-storage==2.10.1 +google-cloud-core==2.2.1 +google-crc32c==1.3.0 +google-resumable-media==2.1.0 +googleapis-common-protos==1.54.0 +grpcio==1.42.0 +idna==3.3 +importlib-metadata==4.8.2 +libcst==0.3.23 munch==2.5.0 mypy-extensions==0.4.3 -packaging==21.0 +packaging==21.3 pandas==1.1.5; python_version < '3.7' pandas==1.3.4; python_version >= '3.7' -proto-plus==1.19.2 -protobuf==3.18.0 -pyarrow==6.0.0 +proto-plus==1.19.8 +protobuf==3.19.1 +pyarrow==6.0.1 pyasn1==0.4.8 pyasn1-modules==0.2.8 -pycparser==2.20 -pyparsing==2.4.7 +pycparser==2.21 +pyparsing==3.0.6 pyproj==3.0.1; python_version < "3.7" pyproj==3.1.0; python_version > "3.6" python-dateutil==2.8.2 -pytz==2021.1 -PyYAML==5.4.1 +pytz==2021.3 +PyYAML==6.0 requests==2.26.0 -rsa==4.7.2 +rsa==4.8 Shapely==1.8.0 six==1.16.0 -typing-extensions==3.10.0.2 +typing-extensions==4.0.1 typing-inspect==0.7.1 urllib3==1.26.7 zipp==3.6.0 diff --git a/packages/google-cloud-bigquery/samples/magics/requirements-test.txt b/packages/google-cloud-bigquery/samples/magics/requirements-test.txt index caa48813a911..5b73f1fd5d67 100644 --- a/packages/google-cloud-bigquery/samples/magics/requirements-test.txt +++ b/packages/google-cloud-bigquery/samples/magics/requirements-test.txt @@ -1,3 +1,3 @@ -google-cloud-testutils==1.1.0 +google-cloud-testutils==1.3.1 pytest==6.2.5 mock==4.0.3 diff --git a/packages/google-cloud-bigquery/samples/magics/requirements.txt b/packages/google-cloud-bigquery/samples/magics/requirements.txt index 5cc7ec33f8b4..7fc6561b0537 100644 --- a/packages/google-cloud-bigquery/samples/magics/requirements.txt +++ b/packages/google-cloud-bigquery/samples/magics/requirements.txt @@ -1,11 +1,11 @@ -google-cloud-bigquery-storage==2.9.0 +google-cloud-bigquery-storage==2.10.1 google-auth-oauthlib==0.4.6 -grpcio==1.41.0 +grpcio==1.42.0 ipython==7.16.1; python_version < '3.7' ipython==7.29.0; python_version >= '3.7' matplotlib==3.3.4; python_version < '3.7' matplotlib==3.5.0rc1; python_version >= '3.7' pandas==1.1.5; python_version < '3.7' pandas==1.3.4; python_version >= '3.7' -pyarrow==6.0.0 -pytz==2021.1 +pyarrow==6.0.1 +pytz==2021.3 diff --git a/packages/google-cloud-bigquery/samples/snippets/requirements-test.txt b/packages/google-cloud-bigquery/samples/snippets/requirements-test.txt index caa48813a911..5b73f1fd5d67 100644 --- a/packages/google-cloud-bigquery/samples/snippets/requirements-test.txt +++ b/packages/google-cloud-bigquery/samples/snippets/requirements-test.txt @@ -1,3 +1,3 @@ -google-cloud-testutils==1.1.0 +google-cloud-testutils==1.3.1 pytest==6.2.5 mock==4.0.3 diff --git a/packages/google-cloud-bigquery/samples/snippets/requirements.txt b/packages/google-cloud-bigquery/samples/snippets/requirements.txt index f795523929ee..0ee32e1fd119 100644 --- a/packages/google-cloud-bigquery/samples/snippets/requirements.txt +++ b/packages/google-cloud-bigquery/samples/snippets/requirements.txt @@ -1,11 +1,11 @@ -google-cloud-bigquery-storage==2.9.0 +google-cloud-bigquery-storage==2.10.1 google-auth-oauthlib==0.4.6 -grpcio==1.41.0 +grpcio==1.42.0 ipython==7.16.1; python_version < '3.7' ipython==7.29.0; python_version >= '3.7' matplotlib==3.3.4; python_version < '3.7' matplotlib==3.4.1; python_version >= '3.7' pandas==1.1.5; python_version < '3.7' pandas==1.3.4; python_version >= '3.7' -pyarrow==6.0.0 -pytz==2021.1 +pyarrow==6.0.1 +pytz==2021.3 From a1ab7d97683ea4bca2fc09a6e6aa73484637a617 Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Thu, 16 Dec 2021 10:51:21 -0600 Subject: [PATCH 1361/2016] test: skip failing copy table tests (#1090) --- .../google-cloud-bigquery/samples/tests/test_copy_table.py | 3 +++ .../samples/tests/test_copy_table_cmek.py | 3 +++ packages/google-cloud-bigquery/tests/system/test_client.py | 2 ++ 3 files changed, 8 insertions(+) diff --git a/packages/google-cloud-bigquery/samples/tests/test_copy_table.py b/packages/google-cloud-bigquery/samples/tests/test_copy_table.py index 0b95c5443777..726410e86bb4 100644 --- a/packages/google-cloud-bigquery/samples/tests/test_copy_table.py +++ b/packages/google-cloud-bigquery/samples/tests/test_copy_table.py @@ -12,10 +12,13 @@ # See the License for the specific language governing permissions and # limitations under the License. +import pytest + from .. import copy_table def test_copy_table(capsys, table_with_data_id, random_table_id, client): + pytest.skip("b/210907595: copy fails for shakespeare table") copy_table.copy_table(table_with_data_id, random_table_id) out, err = capsys.readouterr() diff --git a/packages/google-cloud-bigquery/samples/tests/test_copy_table_cmek.py b/packages/google-cloud-bigquery/samples/tests/test_copy_table_cmek.py index ac04675c989d..63163d563564 100644 --- a/packages/google-cloud-bigquery/samples/tests/test_copy_table_cmek.py +++ b/packages/google-cloud-bigquery/samples/tests/test_copy_table_cmek.py @@ -12,10 +12,13 @@ # See the License for the specific language governing permissions and # limitations under the License. +import pytest + from .. import copy_table_cmek def test_copy_table_cmek(capsys, random_table_id, table_with_data_id, kms_key_name): + pytest.skip("b/210907595: copy fails for shakespeare table") copy_table_cmek.copy_table_cmek(random_table_id, table_with_data_id, kms_key_name) out, err = capsys.readouterr() diff --git a/packages/google-cloud-bigquery/tests/system/test_client.py b/packages/google-cloud-bigquery/tests/system/test_client.py index 91bcff15549a..43b3b6ae7758 100644 --- a/packages/google-cloud-bigquery/tests/system/test_client.py +++ b/packages/google-cloud-bigquery/tests/system/test_client.py @@ -1207,6 +1207,8 @@ def test_extract_table(self): self.assertIn("Bharney Rhubble", got) def test_copy_table(self): + pytest.skip("b/210907595: copy fails for shakespeare table") + # If we create a new table to copy from, the test won't work # because the new rows will be stored in the streaming buffer, # and copy jobs don't read the streaming buffer. From 8a5d18bc56aa82e7c6ac1a85d558684aa563c770 Mon Sep 17 00:00:00 2001 From: WhiteSource Renovate Date: Thu, 16 Dec 2021 20:22:35 +0100 Subject: [PATCH 1362/2016] chore(deps): update all dependencies (#1089) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [![WhiteSource Renovate](https://app.renovatebot.com/images/banner.svg)](https://renovatebot.com) This PR contains the following updates: | Package | Change | Age | Adoption | Passing | Confidence | |---|---|---|---|---|---| | [google-api-core](https://togithub.com/googleapis/python-api-core) | `==2.3.0` -> `==2.3.2` | [![age](https://badges.renovateapi.com/packages/pypi/google-api-core/2.3.2/age-slim)](https://docs.renovatebot.com/merge-confidence/) | [![adoption](https://badges.renovateapi.com/packages/pypi/google-api-core/2.3.2/adoption-slim)](https://docs.renovatebot.com/merge-confidence/) | [![passing](https://badges.renovateapi.com/packages/pypi/google-api-core/2.3.2/compatibility-slim/2.3.0)](https://docs.renovatebot.com/merge-confidence/) | [![confidence](https://badges.renovateapi.com/packages/pypi/google-api-core/2.3.2/confidence-slim/2.3.0)](https://docs.renovatebot.com/merge-confidence/) | | [importlib-metadata](https://togithub.com/python/importlib_metadata) | `==4.8.2` -> `==4.8.3` | [![age](https://badges.renovateapi.com/packages/pypi/importlib-metadata/4.8.3/age-slim)](https://docs.renovatebot.com/merge-confidence/) | [![adoption](https://badges.renovateapi.com/packages/pypi/importlib-metadata/4.8.3/adoption-slim)](https://docs.renovatebot.com/merge-confidence/) | [![passing](https://badges.renovateapi.com/packages/pypi/importlib-metadata/4.8.3/compatibility-slim/4.8.2)](https://docs.renovatebot.com/merge-confidence/) | [![confidence](https://badges.renovateapi.com/packages/pypi/importlib-metadata/4.8.3/confidence-slim/4.8.2)](https://docs.renovatebot.com/merge-confidence/) | --- ### Release Notes
googleapis/python-api-core ### [`v2.3.2`](https://togithub.com/googleapis/python-api-core/releases/v2.3.2) [Compare Source](https://togithub.com/googleapis/python-api-core/compare/v2.3.1...v2.3.2) ##### Bug Fixes - fix: exclude function target from retry deadline exceeded exception message ([#​318](https://www.togithub.com/googleapis/python-api-core/issues/318)) ([34ebdcc](https://togithub.com/googleapis/python-api-core/commit/34ebdcc251d4f3d7d496e8e0b78847645a06650b)) ### [`v2.3.1`](https://togithub.com/googleapis/python-api-core/compare/v2.3.0...v2.3.1) [Compare Source](https://togithub.com/googleapis/python-api-core/compare/v2.3.0...v2.3.1)
python/importlib_metadata ### [`v4.8.3`](https://togithub.com/python/importlib_metadata/blob/HEAD/CHANGES.rst#v483) [Compare Source](https://togithub.com/python/importlib_metadata/compare/v4.8.2...v4.8.3) \====== - [#​357](https://togithub.com/python/importlib_metadata/issues/357): Fixed requirement generation from egg-info when a URL requirement is given.
--- ### Configuration 📅 **Schedule**: At any time (no schedule defined). 🚦 **Automerge**: Disabled by config. Please merge this manually once you are satisfied. ♻ **Rebasing**: Whenever PR becomes conflicted, or you tick the rebase/retry checkbox. 👻 **Immortal**: This PR will be recreated if closed unmerged. Get [config help](https://togithub.com/renovatebot/renovate/discussions) if that's undesired. --- - [ ] If you want to rebase/retry this PR, click this checkbox. --- This PR has been generated by [WhiteSource Renovate](https://renovate.whitesourcesoftware.com). View repository job log [here](https://app.renovatebot.com/dashboard#github/googleapis/python-bigquery). --- .../google-cloud-bigquery/samples/geography/requirements.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/packages/google-cloud-bigquery/samples/geography/requirements.txt b/packages/google-cloud-bigquery/samples/geography/requirements.txt index a10f01e72e5c..4f7956f4b415 100644 --- a/packages/google-cloud-bigquery/samples/geography/requirements.txt +++ b/packages/google-cloud-bigquery/samples/geography/requirements.txt @@ -11,7 +11,7 @@ Fiona==1.8.20 geojson==2.5.0 geopandas==0.9.0; python_version < '3.7' geopandas==0.10.2; python_version >= '3.7' -google-api-core==2.3.0 +google-api-core==2.3.2 google-auth==2.3.3 google-cloud-bigquery==2.31.0 google-cloud-bigquery-storage==2.10.1 @@ -21,7 +21,7 @@ google-resumable-media==2.1.0 googleapis-common-protos==1.54.0 grpcio==1.42.0 idna==3.3 -importlib-metadata==4.8.2 +importlib-metadata==4.8.3 libcst==0.3.23 munch==2.5.0 mypy-extensions==0.4.3 From 832ea0c8fb020db9ff8dfafebaa5a664e612aca5 Mon Sep 17 00:00:00 2001 From: Anthonios Partheniou Date: Tue, 28 Dec 2021 13:33:13 -0500 Subject: [PATCH 1363/2016] chore: update .repo-metadata.json (#1093) --- packages/google-cloud-bigquery/.repo-metadata.json | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/packages/google-cloud-bigquery/.repo-metadata.json b/packages/google-cloud-bigquery/.repo-metadata.json index 124b40eb9b5f..670aba79392b 100644 --- a/packages/google-cloud-bigquery/.repo-metadata.json +++ b/packages/google-cloud-bigquery/.repo-metadata.json @@ -2,9 +2,9 @@ "name": "bigquery", "name_pretty": "Google Cloud BigQuery", "product_documentation": "https://cloud.google.com/bigquery", - "client_documentation": "https://googleapis.dev/python/bigquery/latest", + "client_documentation": "https://cloud.google.com/python/docs/reference/bigquery/latest", "issue_tracker": "https://issuetracker.google.com/savedsearches/559654", - "release_level": "ga", + "release_level": "stable", "language": "python", "library_type": "GAPIC_COMBO", "repo": "googleapis/python-bigquery", @@ -12,5 +12,6 @@ "api_id": "bigquery.googleapis.com", "requires_billing": false, "default_version": "v2", - "codeowner_team": "@googleapis/api-bigquery" + "codeowner_team": "@googleapis/api-bigquery", + "api_shortname": "bigquery" } From 21f0550e1b993be71310b96965b504a8537a8fee Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Tue, 4 Jan 2022 15:57:07 -0600 Subject: [PATCH 1364/2016] test: avoid flake if connections were already open at start (#1096) Addresses flakes such as https://source.cloud.google.com/results/invocations/bb7ce2a8-d410-45bd-af12-7c04bb8cbb25/targets/cloud-devrel%2Fclient-libraries%2Fpython%2Fgoogleapis%2Fpython-bigquery%2Fpresubmit%2Fsystem-3.10/log --- packages/google-cloud-bigquery/tests/system/test_client.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/tests/system/test_client.py b/packages/google-cloud-bigquery/tests/system/test_client.py index 43b3b6ae7758..077bb28861fe 100644 --- a/packages/google-cloud-bigquery/tests/system/test_client.py +++ b/packages/google-cloud-bigquery/tests/system/test_client.py @@ -1725,7 +1725,7 @@ def test_dbapi_connection_does_not_leak_sockets(self): connection.close() conn_count_end = len(current_process.connections()) - self.assertEqual(conn_count_end, conn_count_start) + self.assertLessEqual(conn_count_end, conn_count_start) def _load_table_for_dml(self, rows, dataset_id, table_id): from google.cloud._testing import _NamedTemporaryFile From 420aa1fdf7c512c41fde0fc9adf48db5e7568650 Mon Sep 17 00:00:00 2001 From: WhiteSource Renovate Date: Wed, 5 Jan 2022 20:44:10 +0100 Subject: [PATCH 1365/2016] chore(deps): update all dependencies (#1091) * chore(deps): update all dependencies * don't directly depend on cachetools * remove importlib metadata as direct dependency * remove zipp which comes from transitive dependency Co-authored-by: Tim Swast --- .../samples/geography/requirements.txt | 9 +++------ .../samples/magics/requirements.txt | 2 +- .../samples/snippets/requirements.txt | 2 +- 3 files changed, 5 insertions(+), 8 deletions(-) diff --git a/packages/google-cloud-bigquery/samples/geography/requirements.txt b/packages/google-cloud-bigquery/samples/geography/requirements.txt index 4f7956f4b415..543076cdcfac 100644 --- a/packages/google-cloud-bigquery/samples/geography/requirements.txt +++ b/packages/google-cloud-bigquery/samples/geography/requirements.txt @@ -1,5 +1,4 @@ -attrs==21.2.0 -cachetools==4.2.4 +attrs==21.4.0 certifi==2021.10.8 cffi==1.15.0 charset-normalizer==2.0.9 @@ -19,9 +18,8 @@ google-cloud-core==2.2.1 google-crc32c==1.3.0 google-resumable-media==2.1.0 googleapis-common-protos==1.54.0 -grpcio==1.42.0 +grpcio==1.43.0 idna==3.3 -importlib-metadata==4.8.3 libcst==0.3.23 munch==2.5.0 mypy-extensions==0.4.3 @@ -40,11 +38,10 @@ pyproj==3.1.0; python_version > "3.6" python-dateutil==2.8.2 pytz==2021.3 PyYAML==6.0 -requests==2.26.0 +requests==2.27.0 rsa==4.8 Shapely==1.8.0 six==1.16.0 typing-extensions==4.0.1 typing-inspect==0.7.1 urllib3==1.26.7 -zipp==3.6.0 diff --git a/packages/google-cloud-bigquery/samples/magics/requirements.txt b/packages/google-cloud-bigquery/samples/magics/requirements.txt index 7fc6561b0537..f4337e8fbb1b 100644 --- a/packages/google-cloud-bigquery/samples/magics/requirements.txt +++ b/packages/google-cloud-bigquery/samples/magics/requirements.txt @@ -1,6 +1,6 @@ google-cloud-bigquery-storage==2.10.1 google-auth-oauthlib==0.4.6 -grpcio==1.42.0 +grpcio==1.43.0 ipython==7.16.1; python_version < '3.7' ipython==7.29.0; python_version >= '3.7' matplotlib==3.3.4; python_version < '3.7' diff --git a/packages/google-cloud-bigquery/samples/snippets/requirements.txt b/packages/google-cloud-bigquery/samples/snippets/requirements.txt index 0ee32e1fd119..bef33372099f 100644 --- a/packages/google-cloud-bigquery/samples/snippets/requirements.txt +++ b/packages/google-cloud-bigquery/samples/snippets/requirements.txt @@ -1,6 +1,6 @@ google-cloud-bigquery-storage==2.10.1 google-auth-oauthlib==0.4.6 -grpcio==1.42.0 +grpcio==1.43.0 ipython==7.16.1; python_version < '3.7' ipython==7.29.0; python_version >= '3.7' matplotlib==3.3.4; python_version < '3.7' From 7cb8c04705b7de956954f8bf895a1b0942b90c8e Mon Sep 17 00:00:00 2001 From: WhiteSource Renovate Date: Thu, 6 Jan 2022 20:21:39 +0100 Subject: [PATCH 1366/2016] chore(deps): update all dependencies (#1098) --- .../google-cloud-bigquery/samples/geography/requirements.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/packages/google-cloud-bigquery/samples/geography/requirements.txt b/packages/google-cloud-bigquery/samples/geography/requirements.txt index 543076cdcfac..ca7e38f844a5 100644 --- a/packages/google-cloud-bigquery/samples/geography/requirements.txt +++ b/packages/google-cloud-bigquery/samples/geography/requirements.txt @@ -1,7 +1,7 @@ attrs==21.4.0 certifi==2021.10.8 cffi==1.15.0 -charset-normalizer==2.0.9 +charset-normalizer==2.0.10 click==8.0.3 click-plugins==1.1.1 cligj==0.7.2 @@ -38,7 +38,7 @@ pyproj==3.1.0; python_version > "3.6" python-dateutil==2.8.2 pytz==2021.3 PyYAML==6.0 -requests==2.27.0 +requests==2.27.1 rsa==4.8 Shapely==1.8.0 six==1.16.0 From acda57312e5c8c0748ed928dd3cfa49fa37c2306 Mon Sep 17 00:00:00 2001 From: Hugo van Kemenade Date: Mon, 10 Jan 2022 11:20:10 +0200 Subject: [PATCH 1367/2016] README: Update supported Python versions (#1100) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Thank you for opening a Pull Request! Before submitting your PR, there are a few things you can do to make sure it goes smoothly: - [ ] Make sure to open an issue as a [bug/issue](https://github.com/googleapis/python-bigquery/issues/new/choose) before writing your code! That way we can discuss the change, evaluate designs, and agree on the general idea - [ ] Ensure the tests and linter pass - [ ] Code coverage does not decrease (if any source code was changed) - [x] Appropriate docs were updated (if necessary) Fixes # 🦕 To match the Trove classifiers on PyPI: ![image](https://user-images.githubusercontent.com/1324225/148646991-cc536f1f-4389-4cd9-9995-276c9aa7245a.png) https://pypi.org/project/google-cloud-bigquery/ --- packages/google-cloud-bigquery/README.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/README.rst b/packages/google-cloud-bigquery/README.rst index d0ad059a23fb..bafa0669352e 100644 --- a/packages/google-cloud-bigquery/README.rst +++ b/packages/google-cloud-bigquery/README.rst @@ -52,7 +52,7 @@ dependencies. Supported Python Versions ^^^^^^^^^^^^^^^^^^^^^^^^^ -Python >= 3.6, < 3.10 +Python >= 3.6, < 3.11 Unsupported Python Versions ^^^^^^^^^^^^^^^^^^^^^^^^^^^ From a3a222447b6e633c11a626a1123819ac586311ac Mon Sep 17 00:00:00 2001 From: "gcf-owl-bot[bot]" <78513119+gcf-owl-bot[bot]@users.noreply.github.com> Date: Tue, 11 Jan 2022 10:19:40 -0500 Subject: [PATCH 1368/2016] chore(samples): Add check for tests in directory (#1103) Source-Link: https://github.com/googleapis/synthtool/commit/52aef91f8d25223d9dbdb4aebd94ba8eea2101f3 Post-Processor: gcr.io/cloud-devrel-public-resources/owlbot-python:latest@sha256:36a95b8f494e4674dc9eee9af98961293b51b86b3649942aac800ae6c1f796d4 Co-authored-by: Owl Bot --- .../.github/.OwlBot.lock.yaml | 2 +- .../samples/geography/noxfile.py | 70 +++++++++++-------- .../samples/magics/noxfile.py | 70 +++++++++++-------- .../samples/snippets/noxfile.py | 70 +++++++++++-------- 4 files changed, 118 insertions(+), 94 deletions(-) diff --git a/packages/google-cloud-bigquery/.github/.OwlBot.lock.yaml b/packages/google-cloud-bigquery/.github/.OwlBot.lock.yaml index 0b3c8cd98f89..6b8a73b31465 100644 --- a/packages/google-cloud-bigquery/.github/.OwlBot.lock.yaml +++ b/packages/google-cloud-bigquery/.github/.OwlBot.lock.yaml @@ -1,3 +1,3 @@ docker: image: gcr.io/cloud-devrel-public-resources/owlbot-python:latest - digest: sha256:2f90537dd7df70f6b663cd654b1fa5dee483cf6a4edcfd46072b2775be8a23ec + digest: sha256:36a95b8f494e4674dc9eee9af98961293b51b86b3649942aac800ae6c1f796d4 diff --git a/packages/google-cloud-bigquery/samples/geography/noxfile.py b/packages/google-cloud-bigquery/samples/geography/noxfile.py index 93a9122cc457..3bbef5d54f44 100644 --- a/packages/google-cloud-bigquery/samples/geography/noxfile.py +++ b/packages/google-cloud-bigquery/samples/geography/noxfile.py @@ -14,6 +14,7 @@ from __future__ import print_function +import glob import os from pathlib import Path import sys @@ -184,37 +185,44 @@ def blacken(session: nox.sessions.Session) -> None: def _session_tests( session: nox.sessions.Session, post_install: Callable = None ) -> None: - if TEST_CONFIG["pip_version_override"]: - pip_version = TEST_CONFIG["pip_version_override"] - session.install(f"pip=={pip_version}") - """Runs py.test for a particular project.""" - if os.path.exists("requirements.txt"): - if os.path.exists("constraints.txt"): - session.install("-r", "requirements.txt", "-c", "constraints.txt") - else: - session.install("-r", "requirements.txt") - - if os.path.exists("requirements-test.txt"): - if os.path.exists("constraints-test.txt"): - session.install("-r", "requirements-test.txt", "-c", "constraints-test.txt") - else: - session.install("-r", "requirements-test.txt") - - if INSTALL_LIBRARY_FROM_SOURCE: - session.install("-e", _get_repo_root()) - - if post_install: - post_install(session) - - session.run( - "pytest", - *(PYTEST_COMMON_ARGS + session.posargs), - # Pytest will return 5 when no tests are collected. This can happen - # on travis where slow and flaky tests are excluded. - # See http://doc.pytest.org/en/latest/_modules/_pytest/main.html - success_codes=[0, 5], - env=get_pytest_env_vars(), - ) + # check for presence of tests + test_list = glob.glob("*_test.py") + glob.glob("test_*.py") + if len(test_list) == 0: + print("No tests found, skipping directory.") + else: + if TEST_CONFIG["pip_version_override"]: + pip_version = TEST_CONFIG["pip_version_override"] + session.install(f"pip=={pip_version}") + """Runs py.test for a particular project.""" + if os.path.exists("requirements.txt"): + if os.path.exists("constraints.txt"): + session.install("-r", "requirements.txt", "-c", "constraints.txt") + else: + session.install("-r", "requirements.txt") + + if os.path.exists("requirements-test.txt"): + if os.path.exists("constraints-test.txt"): + session.install( + "-r", "requirements-test.txt", "-c", "constraints-test.txt" + ) + else: + session.install("-r", "requirements-test.txt") + + if INSTALL_LIBRARY_FROM_SOURCE: + session.install("-e", _get_repo_root()) + + if post_install: + post_install(session) + + session.run( + "pytest", + *(PYTEST_COMMON_ARGS + session.posargs), + # Pytest will return 5 when no tests are collected. This can happen + # on travis where slow and flaky tests are excluded. + # See http://doc.pytest.org/en/latest/_modules/_pytest/main.html + success_codes=[0, 5], + env=get_pytest_env_vars(), + ) @nox.session(python=ALL_VERSIONS) diff --git a/packages/google-cloud-bigquery/samples/magics/noxfile.py b/packages/google-cloud-bigquery/samples/magics/noxfile.py index 93a9122cc457..3bbef5d54f44 100644 --- a/packages/google-cloud-bigquery/samples/magics/noxfile.py +++ b/packages/google-cloud-bigquery/samples/magics/noxfile.py @@ -14,6 +14,7 @@ from __future__ import print_function +import glob import os from pathlib import Path import sys @@ -184,37 +185,44 @@ def blacken(session: nox.sessions.Session) -> None: def _session_tests( session: nox.sessions.Session, post_install: Callable = None ) -> None: - if TEST_CONFIG["pip_version_override"]: - pip_version = TEST_CONFIG["pip_version_override"] - session.install(f"pip=={pip_version}") - """Runs py.test for a particular project.""" - if os.path.exists("requirements.txt"): - if os.path.exists("constraints.txt"): - session.install("-r", "requirements.txt", "-c", "constraints.txt") - else: - session.install("-r", "requirements.txt") - - if os.path.exists("requirements-test.txt"): - if os.path.exists("constraints-test.txt"): - session.install("-r", "requirements-test.txt", "-c", "constraints-test.txt") - else: - session.install("-r", "requirements-test.txt") - - if INSTALL_LIBRARY_FROM_SOURCE: - session.install("-e", _get_repo_root()) - - if post_install: - post_install(session) - - session.run( - "pytest", - *(PYTEST_COMMON_ARGS + session.posargs), - # Pytest will return 5 when no tests are collected. This can happen - # on travis where slow and flaky tests are excluded. - # See http://doc.pytest.org/en/latest/_modules/_pytest/main.html - success_codes=[0, 5], - env=get_pytest_env_vars(), - ) + # check for presence of tests + test_list = glob.glob("*_test.py") + glob.glob("test_*.py") + if len(test_list) == 0: + print("No tests found, skipping directory.") + else: + if TEST_CONFIG["pip_version_override"]: + pip_version = TEST_CONFIG["pip_version_override"] + session.install(f"pip=={pip_version}") + """Runs py.test for a particular project.""" + if os.path.exists("requirements.txt"): + if os.path.exists("constraints.txt"): + session.install("-r", "requirements.txt", "-c", "constraints.txt") + else: + session.install("-r", "requirements.txt") + + if os.path.exists("requirements-test.txt"): + if os.path.exists("constraints-test.txt"): + session.install( + "-r", "requirements-test.txt", "-c", "constraints-test.txt" + ) + else: + session.install("-r", "requirements-test.txt") + + if INSTALL_LIBRARY_FROM_SOURCE: + session.install("-e", _get_repo_root()) + + if post_install: + post_install(session) + + session.run( + "pytest", + *(PYTEST_COMMON_ARGS + session.posargs), + # Pytest will return 5 when no tests are collected. This can happen + # on travis where slow and flaky tests are excluded. + # See http://doc.pytest.org/en/latest/_modules/_pytest/main.html + success_codes=[0, 5], + env=get_pytest_env_vars(), + ) @nox.session(python=ALL_VERSIONS) diff --git a/packages/google-cloud-bigquery/samples/snippets/noxfile.py b/packages/google-cloud-bigquery/samples/snippets/noxfile.py index 93a9122cc457..3bbef5d54f44 100644 --- a/packages/google-cloud-bigquery/samples/snippets/noxfile.py +++ b/packages/google-cloud-bigquery/samples/snippets/noxfile.py @@ -14,6 +14,7 @@ from __future__ import print_function +import glob import os from pathlib import Path import sys @@ -184,37 +185,44 @@ def blacken(session: nox.sessions.Session) -> None: def _session_tests( session: nox.sessions.Session, post_install: Callable = None ) -> None: - if TEST_CONFIG["pip_version_override"]: - pip_version = TEST_CONFIG["pip_version_override"] - session.install(f"pip=={pip_version}") - """Runs py.test for a particular project.""" - if os.path.exists("requirements.txt"): - if os.path.exists("constraints.txt"): - session.install("-r", "requirements.txt", "-c", "constraints.txt") - else: - session.install("-r", "requirements.txt") - - if os.path.exists("requirements-test.txt"): - if os.path.exists("constraints-test.txt"): - session.install("-r", "requirements-test.txt", "-c", "constraints-test.txt") - else: - session.install("-r", "requirements-test.txt") - - if INSTALL_LIBRARY_FROM_SOURCE: - session.install("-e", _get_repo_root()) - - if post_install: - post_install(session) - - session.run( - "pytest", - *(PYTEST_COMMON_ARGS + session.posargs), - # Pytest will return 5 when no tests are collected. This can happen - # on travis where slow and flaky tests are excluded. - # See http://doc.pytest.org/en/latest/_modules/_pytest/main.html - success_codes=[0, 5], - env=get_pytest_env_vars(), - ) + # check for presence of tests + test_list = glob.glob("*_test.py") + glob.glob("test_*.py") + if len(test_list) == 0: + print("No tests found, skipping directory.") + else: + if TEST_CONFIG["pip_version_override"]: + pip_version = TEST_CONFIG["pip_version_override"] + session.install(f"pip=={pip_version}") + """Runs py.test for a particular project.""" + if os.path.exists("requirements.txt"): + if os.path.exists("constraints.txt"): + session.install("-r", "requirements.txt", "-c", "constraints.txt") + else: + session.install("-r", "requirements.txt") + + if os.path.exists("requirements-test.txt"): + if os.path.exists("constraints-test.txt"): + session.install( + "-r", "requirements-test.txt", "-c", "constraints-test.txt" + ) + else: + session.install("-r", "requirements-test.txt") + + if INSTALL_LIBRARY_FROM_SOURCE: + session.install("-e", _get_repo_root()) + + if post_install: + post_install(session) + + session.run( + "pytest", + *(PYTEST_COMMON_ARGS + session.posargs), + # Pytest will return 5 when no tests are collected. This can happen + # on travis where slow and flaky tests are excluded. + # See http://doc.pytest.org/en/latest/_modules/_pytest/main.html + success_codes=[0, 5], + env=get_pytest_env_vars(), + ) @nox.session(python=ALL_VERSIONS) From f3fa7952f3f013ab95a8f6f8d1bf3c930330caab Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Wed, 12 Jan 2022 15:25:16 -0600 Subject: [PATCH 1369/2016] fix: remove query text from exception message, use `exception.debug_message` instead (#1105) Since query text can potentially contain sensitive information, remove it from the default exception message. This information is useful for debugging, so provide a `debug_message` attribute, which is not included in the exception representation (and thus the logs). Fixes internal issue 211616590 --- .../google/cloud/bigquery/job/query.py | 19 ++++++++++++------ .../tests/unit/job/test_query.py | 20 +++++++++++++++---- 2 files changed, 29 insertions(+), 10 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/job/query.py b/packages/google-cloud-bigquery/google/cloud/bigquery/job/query.py index 36e3882385b8..2dd9459842ff 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/job/query.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/job/query.py @@ -66,6 +66,7 @@ _CONTAINS_ORDER_BY = re.compile(r"ORDER\s+BY", re.IGNORECASE) +_EXCEPTION_FOOTER_TEMPLATE = "{message}\n\nLocation: {location}\nJob ID: {job_id}\n" _TIMEOUT_BUFFER_SECS = 0.1 @@ -1196,17 +1197,17 @@ def _blocking_poll(self, timeout=None, **kwargs): super(QueryJob, self)._blocking_poll(timeout=timeout, **kwargs) @staticmethod - def _format_for_exception(query, job_id): + def _format_for_exception(message: str, query: str): """Format a query for the output in exception message. Args: + message (str): The original exception message. query (str): The SQL query to format. - job_id (str): The ID of the job that ran the query. Returns: str: A formatted query text. """ - template = "\n\n(job ID: {job_id})\n\n{header}\n\n{ruler}\n{body}\n{ruler}" + template = "{message}\n\n{header}\n\n{ruler}\n{body}\n{ruler}" lines = query.splitlines() max_line_len = max(len(line) for line in lines) @@ -1223,7 +1224,7 @@ def _format_for_exception(query, job_id): "{:4}:{}".format(n, line) for n, line in enumerate(lines, start=1) ) - return template.format(job_id=job_id, header=header, ruler=ruler, body=body) + return template.format(message=message, header=header, ruler=ruler, body=body) def _begin(self, client=None, retry=DEFAULT_RETRY, timeout=None): """API call: begin the job via a POST request @@ -1248,7 +1249,10 @@ def _begin(self, client=None, retry=DEFAULT_RETRY, timeout=None): try: super(QueryJob, self)._begin(client=client, retry=retry, timeout=timeout) except exceptions.GoogleAPICallError as exc: - exc.message += self._format_for_exception(self.query, self.job_id) + exc.message = _EXCEPTION_FOOTER_TEMPLATE.format( + message=exc.message, location=self.location, job_id=self.job_id + ) + exc.debug_message = self._format_for_exception(exc.message, self.query) exc.query_job = self raise @@ -1447,7 +1451,10 @@ def do_get_result(): do_get_result() except exceptions.GoogleAPICallError as exc: - exc.message += self._format_for_exception(self.query, self.job_id) + exc.message = _EXCEPTION_FOOTER_TEMPLATE.format( + message=exc.message, location=self.location, job_id=self.job_id + ) + exc.debug_message = self._format_for_exception(exc.message, self.query) # type: ignore exc.query_job = self # type: ignore raise except requests.exceptions.Timeout as exc: diff --git a/packages/google-cloud-bigquery/tests/unit/job/test_query.py b/packages/google-cloud-bigquery/tests/unit/job/test_query.py index 4da035b78cb6..5fb76b9e91cb 100644 --- a/packages/google-cloud-bigquery/tests/unit/job/test_query.py +++ b/packages/google-cloud-bigquery/tests/unit/job/test_query.py @@ -1360,13 +1360,19 @@ def test_result_error(self): exc_job_instance = getattr(exc_info.exception, "query_job", None) self.assertIs(exc_job_instance, job) + # Query text could contain sensitive information, so it must not be + # included in logs / exception representation. full_text = str(exc_info.exception) assert job.job_id in full_text - assert "Query Job SQL Follows" in full_text + assert "Query Job SQL Follows" not in full_text + # It is useful to have query text available, so it is provided in a + # debug_message property. + debug_message = exc_info.exception.debug_message + assert "Query Job SQL Follows" in debug_message for i, line in enumerate(query.splitlines(), start=1): expected_line = "{}:{}".format(i, line) - assert expected_line in full_text + assert expected_line in debug_message def test_result_transport_timeout_error(self): query = textwrap.dedent( @@ -1452,13 +1458,19 @@ def test__begin_error(self): exc_job_instance = getattr(exc_info.exception, "query_job", None) self.assertIs(exc_job_instance, job) + # Query text could contain sensitive information, so it must not be + # included in logs / exception representation. full_text = str(exc_info.exception) assert job.job_id in full_text - assert "Query Job SQL Follows" in full_text + assert "Query Job SQL Follows" not in full_text + # It is useful to have query text available, so it is provided in a + # debug_message property. + debug_message = exc_info.exception.debug_message + assert "Query Job SQL Follows" in debug_message for i, line in enumerate(query.splitlines(), start=1): expected_line = "{}:{}".format(i, line) - assert expected_line in full_text + assert expected_line in debug_message def test__begin_w_timeout(self): PATH = "/projects/%s/jobs" % (self.PROJECT,) From 40d93a2fc03e17e28761f9428633a7b7d10aff3b Mon Sep 17 00:00:00 2001 From: "gcf-owl-bot[bot]" <78513119+gcf-owl-bot[bot]@users.noreply.github.com> Date: Wed, 12 Jan 2022 23:08:21 +0000 Subject: [PATCH 1370/2016] build: switch to release-please for tagging (#1107) --- packages/google-cloud-bigquery/.github/.OwlBot.lock.yaml | 2 +- packages/google-cloud-bigquery/.github/release-please.yml | 1 + packages/google-cloud-bigquery/.github/release-trigger.yml | 1 + 3 files changed, 3 insertions(+), 1 deletion(-) create mode 100644 packages/google-cloud-bigquery/.github/release-trigger.yml diff --git a/packages/google-cloud-bigquery/.github/.OwlBot.lock.yaml b/packages/google-cloud-bigquery/.github/.OwlBot.lock.yaml index 6b8a73b31465..ff5126c188d0 100644 --- a/packages/google-cloud-bigquery/.github/.OwlBot.lock.yaml +++ b/packages/google-cloud-bigquery/.github/.OwlBot.lock.yaml @@ -1,3 +1,3 @@ docker: image: gcr.io/cloud-devrel-public-resources/owlbot-python:latest - digest: sha256:36a95b8f494e4674dc9eee9af98961293b51b86b3649942aac800ae6c1f796d4 + digest: sha256:dfa9b663b32de8b5b327e32c1da665a80de48876558dd58091d8160c60ad7355 diff --git a/packages/google-cloud-bigquery/.github/release-please.yml b/packages/google-cloud-bigquery/.github/release-please.yml index 4507ad0598a5..466597e5b196 100644 --- a/packages/google-cloud-bigquery/.github/release-please.yml +++ b/packages/google-cloud-bigquery/.github/release-please.yml @@ -1 +1,2 @@ releaseType: python +handleGHRelease: true diff --git a/packages/google-cloud-bigquery/.github/release-trigger.yml b/packages/google-cloud-bigquery/.github/release-trigger.yml new file mode 100644 index 000000000000..d4ca94189e16 --- /dev/null +++ b/packages/google-cloud-bigquery/.github/release-trigger.yml @@ -0,0 +1 @@ +enabled: true From a19a2f130c0cc40a5b8979d1e16df3521b9bcca8 Mon Sep 17 00:00:00 2001 From: "release-please[bot]" <55107282+release-please[bot]@users.noreply.github.com> Date: Thu, 13 Jan 2022 16:52:17 +0000 Subject: [PATCH 1371/2016] chore(main): release 2.32.0 (#1104) :robot: I have created a release *beep* *boop* --- ## [2.32.0](https://github.com/googleapis/python-bigquery/compare/v2.31.0...v2.32.0) (2022-01-12) ### Features * support authorized dataset entity ([#1075](https://github.com/googleapis/python-bigquery/issues/1075)) ([c098cd0](https://github.com/googleapis/python-bigquery/commit/c098cd01c755633bfaba7193dd5c044a489a5b61)) ### Bug Fixes * remove query text from exception message, use `exception.debug_message` instead ([#1105](https://github.com/googleapis/python-bigquery/issues/1105)) ([e23114c](https://github.com/googleapis/python-bigquery/commit/e23114ce362e09ac72f733a640e53a561cc9ce69)) --- This PR was generated with [Release Please](https://github.com/googleapis/release-please). See [documentation](https://github.com/googleapis/release-please#release-please). --- packages/google-cloud-bigquery/CHANGELOG.md | 12 ++++++++++++ .../google/cloud/bigquery/version.py | 2 +- 2 files changed, 13 insertions(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/CHANGELOG.md b/packages/google-cloud-bigquery/CHANGELOG.md index 5ba219d20953..6e69fa62143d 100644 --- a/packages/google-cloud-bigquery/CHANGELOG.md +++ b/packages/google-cloud-bigquery/CHANGELOG.md @@ -5,6 +5,18 @@ [1]: https://pypi.org/project/google-cloud-bigquery/#history +## [2.32.0](https://github.com/googleapis/python-bigquery/compare/v2.31.0...v2.32.0) (2022-01-12) + + +### Features + +* support authorized dataset entity ([#1075](https://github.com/googleapis/python-bigquery/issues/1075)) ([c098cd0](https://github.com/googleapis/python-bigquery/commit/c098cd01c755633bfaba7193dd5c044a489a5b61)) + + +### Bug Fixes + +* remove query text from exception message, use `exception.debug_message` instead ([#1105](https://github.com/googleapis/python-bigquery/issues/1105)) ([e23114c](https://github.com/googleapis/python-bigquery/commit/e23114ce362e09ac72f733a640e53a561cc9ce69)) + ## [2.31.0](https://www.github.com/googleapis/python-bigquery/compare/v2.30.1...v2.31.0) (2021-11-24) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/version.py b/packages/google-cloud-bigquery/google/cloud/bigquery/version.py index 6329658af5f2..b8c5af9a24bc 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/version.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/version.py @@ -12,4 +12,4 @@ # See the License for the specific language governing permissions and # limitations under the License. -__version__ = "2.31.0" +__version__ = "2.32.0" From c252adff89523547d865df0e2f27028427c0aee5 Mon Sep 17 00:00:00 2001 From: WhiteSource Renovate Date: Fri, 14 Jan 2022 21:43:58 +0100 Subject: [PATCH 1372/2016] chore(deps): update all dependencies (#1101) * chore(deps): update all dependencies * test: limit deps versions by python * remove pyproj, as it is only a transitive dependency Co-authored-by: Tim Swast Co-authored-by: Anthonios Partheniou --- .../samples/geography/requirements.txt | 14 ++++++-------- .../samples/magics/requirements.txt | 7 ++++--- .../samples/snippets/requirements.txt | 7 ++++--- 3 files changed, 14 insertions(+), 14 deletions(-) diff --git a/packages/google-cloud-bigquery/samples/geography/requirements.txt b/packages/google-cloud-bigquery/samples/geography/requirements.txt index ca7e38f844a5..b07ba50bf7a4 100644 --- a/packages/google-cloud-bigquery/samples/geography/requirements.txt +++ b/packages/google-cloud-bigquery/samples/geography/requirements.txt @@ -5,12 +5,12 @@ charset-normalizer==2.0.10 click==8.0.3 click-plugins==1.1.1 cligj==0.7.2 -dataclasses==0.6; python_version < '3.7' +dataclasses==0.8; python_version < '3.7' Fiona==1.8.20 geojson==2.5.0 geopandas==0.9.0; python_version < '3.7' geopandas==0.10.2; python_version >= '3.7' -google-api-core==2.3.2 +google-api-core==2.4.0 google-auth==2.3.3 google-cloud-bigquery==2.31.0 google-cloud-bigquery-storage==2.10.1 @@ -20,21 +20,19 @@ google-resumable-media==2.1.0 googleapis-common-protos==1.54.0 grpcio==1.43.0 idna==3.3 -libcst==0.3.23 +libcst==0.4.0 munch==2.5.0 mypy-extensions==0.4.3 packaging==21.3 pandas==1.1.5; python_version < '3.7' -pandas==1.3.4; python_version >= '3.7' +pandas==1.3.5; python_version >= '3.7' proto-plus==1.19.8 -protobuf==3.19.1 +protobuf==3.19.3 pyarrow==6.0.1 pyasn1==0.4.8 pyasn1-modules==0.2.8 pycparser==2.21 pyparsing==3.0.6 -pyproj==3.0.1; python_version < "3.7" -pyproj==3.1.0; python_version > "3.6" python-dateutil==2.8.2 pytz==2021.3 PyYAML==6.0 @@ -44,4 +42,4 @@ Shapely==1.8.0 six==1.16.0 typing-extensions==4.0.1 typing-inspect==0.7.1 -urllib3==1.26.7 +urllib3==1.26.8 diff --git a/packages/google-cloud-bigquery/samples/magics/requirements.txt b/packages/google-cloud-bigquery/samples/magics/requirements.txt index f4337e8fbb1b..b47dcbc4fd2a 100644 --- a/packages/google-cloud-bigquery/samples/magics/requirements.txt +++ b/packages/google-cloud-bigquery/samples/magics/requirements.txt @@ -2,10 +2,11 @@ google-cloud-bigquery-storage==2.10.1 google-auth-oauthlib==0.4.6 grpcio==1.43.0 ipython==7.16.1; python_version < '3.7' -ipython==7.29.0; python_version >= '3.7' +ipython==7.29.0; python_version == '3.7' +ipython==8.0.0; python_version >= '3.8' matplotlib==3.3.4; python_version < '3.7' -matplotlib==3.5.0rc1; python_version >= '3.7' +matplotlib==3.5.1; python_version >= '3.7' pandas==1.1.5; python_version < '3.7' -pandas==1.3.4; python_version >= '3.7' +pandas==1.3.5; python_version >= '3.7' pyarrow==6.0.1 pytz==2021.3 diff --git a/packages/google-cloud-bigquery/samples/snippets/requirements.txt b/packages/google-cloud-bigquery/samples/snippets/requirements.txt index bef33372099f..b47dcbc4fd2a 100644 --- a/packages/google-cloud-bigquery/samples/snippets/requirements.txt +++ b/packages/google-cloud-bigquery/samples/snippets/requirements.txt @@ -2,10 +2,11 @@ google-cloud-bigquery-storage==2.10.1 google-auth-oauthlib==0.4.6 grpcio==1.43.0 ipython==7.16.1; python_version < '3.7' -ipython==7.29.0; python_version >= '3.7' +ipython==7.29.0; python_version == '3.7' +ipython==8.0.0; python_version >= '3.8' matplotlib==3.3.4; python_version < '3.7' -matplotlib==3.4.1; python_version >= '3.7' +matplotlib==3.5.1; python_version >= '3.7' pandas==1.1.5; python_version < '3.7' -pandas==1.3.4; python_version >= '3.7' +pandas==1.3.5; python_version >= '3.7' pyarrow==6.0.1 pytz==2021.3 From 473d0aa683e320727e5b727cfc3969f6f798315c Mon Sep 17 00:00:00 2001 From: "gcf-owl-bot[bot]" <78513119+gcf-owl-bot[bot]@users.noreply.github.com> Date: Fri, 14 Jan 2022 16:26:41 -0500 Subject: [PATCH 1373/2016] chore(python): update release.sh to use keystore (#1111) Source-Link: https://github.com/googleapis/synthtool/commit/69fda12e2994f0b595a397e8bb6e3e9f380524eb Post-Processor: gcr.io/cloud-devrel-public-resources/owlbot-python:latest@sha256:ae600f36b6bc972b368367b6f83a1d91ec2c82a4a116b383d67d547c56fe6de3 Co-authored-by: Owl Bot Co-authored-by: Tim Swast --- .../google-cloud-bigquery/.github/.OwlBot.lock.yaml | 2 +- packages/google-cloud-bigquery/.kokoro/release.sh | 2 +- .../google-cloud-bigquery/.kokoro/release/common.cfg | 12 +++++++++++- 3 files changed, 13 insertions(+), 3 deletions(-) diff --git a/packages/google-cloud-bigquery/.github/.OwlBot.lock.yaml b/packages/google-cloud-bigquery/.github/.OwlBot.lock.yaml index ff5126c188d0..eecb84c21b27 100644 --- a/packages/google-cloud-bigquery/.github/.OwlBot.lock.yaml +++ b/packages/google-cloud-bigquery/.github/.OwlBot.lock.yaml @@ -1,3 +1,3 @@ docker: image: gcr.io/cloud-devrel-public-resources/owlbot-python:latest - digest: sha256:dfa9b663b32de8b5b327e32c1da665a80de48876558dd58091d8160c60ad7355 + digest: sha256:ae600f36b6bc972b368367b6f83a1d91ec2c82a4a116b383d67d547c56fe6de3 diff --git a/packages/google-cloud-bigquery/.kokoro/release.sh b/packages/google-cloud-bigquery/.kokoro/release.sh index 3abba6e06681..b030caeefa5a 100755 --- a/packages/google-cloud-bigquery/.kokoro/release.sh +++ b/packages/google-cloud-bigquery/.kokoro/release.sh @@ -26,7 +26,7 @@ python3 -m pip install --upgrade twine wheel setuptools export PYTHONUNBUFFERED=1 # Move into the package, build the distribution and upload. -TWINE_PASSWORD=$(cat "${KOKORO_GFILE_DIR}/secret_manager/google-cloud-pypi-token") +TWINE_PASSWORD=$(cat "${KOKORO_KEYSTORE_DIR}/73713_google-cloud-pypi-token-keystore-1") cd github/python-bigquery python3 setup.py sdist bdist_wheel twine upload --username __token__ --password "${TWINE_PASSWORD}" dist/* diff --git a/packages/google-cloud-bigquery/.kokoro/release/common.cfg b/packages/google-cloud-bigquery/.kokoro/release/common.cfg index 922d7fe504bd..6ae81b74308d 100644 --- a/packages/google-cloud-bigquery/.kokoro/release/common.cfg +++ b/packages/google-cloud-bigquery/.kokoro/release/common.cfg @@ -23,8 +23,18 @@ env_vars: { value: "github/python-bigquery/.kokoro/release.sh" } +# Fetch PyPI password +before_action { + fetch_keystore { + keystore_resource { + keystore_config_id: 73713 + keyname: "google-cloud-pypi-token-keystore-1" + } + } +} + # Tokens needed to report release status back to GitHub env_vars: { key: "SECRET_MANAGER_KEYS" - value: "releasetool-publish-reporter-app,releasetool-publish-reporter-googleapis-installation,releasetool-publish-reporter-pem,google-cloud-pypi-token" + value: "releasetool-publish-reporter-app,releasetool-publish-reporter-googleapis-installation,releasetool-publish-reporter-pem" } From ce217575d43bbc3cc07e59c8e7768e9b75320819 Mon Sep 17 00:00:00 2001 From: Matthew Suozzo Date: Tue, 18 Jan 2022 11:21:23 -0500 Subject: [PATCH 1374/2016] test: make GC check resilient to broken weakrefs (#1099) * fix: Make gc check resilient to broken weakrefs In some python deployments (I believe macOS is one), weakref objects aren't guaranteed to exist across gc collections. The proposed check pattern is also used in tensorflow [0] which ran into similar issues a few years ago. [0]: https://github.com/tensorflow/tensorflow/blob/master/tensorflow/python/framework/errors_test.py#L33 * fix: add pragma for coverage * Fix whitespace --- .../tests/unit/test_dbapi_connection.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/packages/google-cloud-bigquery/tests/unit/test_dbapi_connection.py b/packages/google-cloud-bigquery/tests/unit/test_dbapi_connection.py index 0576cad3813e..11a268c68297 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_dbapi_connection.py +++ b/packages/google-cloud-bigquery/tests/unit/test_dbapi_connection.py @@ -219,8 +219,14 @@ def test_does_not_keep_cursor_instances_alive(self): # Connections should not hold strong references to the Cursor instances # they created, unnecessarily keeping them alive. gc.collect() - cursors = [obj for obj in gc.get_objects() if isinstance(obj, Cursor)] - self.assertEqual(len(cursors), 2) + cursor_count = 0 + for obj in gc.get_objects(): + try: + if isinstance(obj, Cursor): + cursor_count += 1 + except ReferenceError: # pragma: NO COVER + pass + self.assertEqual(cursor_count, 2) def test_commit(self): connection = self._make_one(client=self._mock_client()) From a960de092b6c47897a8c588f93e06203e08208ac Mon Sep 17 00:00:00 2001 From: "gcf-owl-bot[bot]" <78513119+gcf-owl-bot[bot]@users.noreply.github.com> Date: Tue, 18 Jan 2022 20:24:46 -0500 Subject: [PATCH 1375/2016] chore(python): Noxfile recognizes that tests can live in a folder (#1115) Source-Link: https://github.com/googleapis/synthtool/commit/4760d8dce1351d93658cb11d02a1b7ceb23ae5d7 Post-Processor: gcr.io/cloud-devrel-public-resources/owlbot-python:latest@sha256:f0e4b51deef56bed74d3e2359c583fc104a8d6367da3984fc5c66938db738828 Co-authored-by: Owl Bot --- packages/google-cloud-bigquery/.github/.OwlBot.lock.yaml | 2 +- packages/google-cloud-bigquery/samples/geography/noxfile.py | 1 + packages/google-cloud-bigquery/samples/magics/noxfile.py | 1 + packages/google-cloud-bigquery/samples/snippets/noxfile.py | 1 + 4 files changed, 4 insertions(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/.github/.OwlBot.lock.yaml b/packages/google-cloud-bigquery/.github/.OwlBot.lock.yaml index eecb84c21b27..52d79c11f3ad 100644 --- a/packages/google-cloud-bigquery/.github/.OwlBot.lock.yaml +++ b/packages/google-cloud-bigquery/.github/.OwlBot.lock.yaml @@ -1,3 +1,3 @@ docker: image: gcr.io/cloud-devrel-public-resources/owlbot-python:latest - digest: sha256:ae600f36b6bc972b368367b6f83a1d91ec2c82a4a116b383d67d547c56fe6de3 + digest: sha256:f0e4b51deef56bed74d3e2359c583fc104a8d6367da3984fc5c66938db738828 diff --git a/packages/google-cloud-bigquery/samples/geography/noxfile.py b/packages/google-cloud-bigquery/samples/geography/noxfile.py index 3bbef5d54f44..20cdfc620138 100644 --- a/packages/google-cloud-bigquery/samples/geography/noxfile.py +++ b/packages/google-cloud-bigquery/samples/geography/noxfile.py @@ -187,6 +187,7 @@ def _session_tests( ) -> None: # check for presence of tests test_list = glob.glob("*_test.py") + glob.glob("test_*.py") + test_list.extend(glob.glob("tests")) if len(test_list) == 0: print("No tests found, skipping directory.") else: diff --git a/packages/google-cloud-bigquery/samples/magics/noxfile.py b/packages/google-cloud-bigquery/samples/magics/noxfile.py index 3bbef5d54f44..20cdfc620138 100644 --- a/packages/google-cloud-bigquery/samples/magics/noxfile.py +++ b/packages/google-cloud-bigquery/samples/magics/noxfile.py @@ -187,6 +187,7 @@ def _session_tests( ) -> None: # check for presence of tests test_list = glob.glob("*_test.py") + glob.glob("test_*.py") + test_list.extend(glob.glob("tests")) if len(test_list) == 0: print("No tests found, skipping directory.") else: diff --git a/packages/google-cloud-bigquery/samples/snippets/noxfile.py b/packages/google-cloud-bigquery/samples/snippets/noxfile.py index 3bbef5d54f44..20cdfc620138 100644 --- a/packages/google-cloud-bigquery/samples/snippets/noxfile.py +++ b/packages/google-cloud-bigquery/samples/snippets/noxfile.py @@ -187,6 +187,7 @@ def _session_tests( ) -> None: # check for presence of tests test_list = glob.glob("*_test.py") + glob.glob("test_*.py") + test_list.extend(glob.glob("tests")) if len(test_list) == 0: print("No tests found, skipping directory.") else: From 487ed558d0d8ec571b2f0af0a1dcee6b5fcfb1aa Mon Sep 17 00:00:00 2001 From: "gcf-owl-bot[bot]" <78513119+gcf-owl-bot[bot]@users.noreply.github.com> Date: Fri, 21 Jan 2022 20:12:50 -0500 Subject: [PATCH 1376/2016] chore(python): exclude templated GH action workflows (#1116) * ci(python): run lint / unit tests / docs as GH actions Source-Link: https://github.com/googleapis/synthtool/commit/57be0cdb0b94e1669cee0ca38d790de1dfdbcd44 Post-Processor: gcr.io/cloud-devrel-public-resources/owlbot-python:latest@sha256:ed1f9983d5a935a89fe8085e8bb97d94e41015252c5b6c9771257cf8624367e6 * exclude templated GH action workflows Co-authored-by: Owl Bot Co-authored-by: Anthonios Partheniou --- .../.github/.OwlBot.lock.yaml | 15 ++++++++++++++- packages/google-cloud-bigquery/owlbot.py | 1 + 2 files changed, 15 insertions(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/.github/.OwlBot.lock.yaml b/packages/google-cloud-bigquery/.github/.OwlBot.lock.yaml index 52d79c11f3ad..8cb43804d999 100644 --- a/packages/google-cloud-bigquery/.github/.OwlBot.lock.yaml +++ b/packages/google-cloud-bigquery/.github/.OwlBot.lock.yaml @@ -1,3 +1,16 @@ +# Copyright 2022 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. docker: image: gcr.io/cloud-devrel-public-resources/owlbot-python:latest - digest: sha256:f0e4b51deef56bed74d3e2359c583fc104a8d6367da3984fc5c66938db738828 + digest: sha256:ed1f9983d5a935a89fe8085e8bb97d94e41015252c5b6c9771257cf8624367e6 diff --git a/packages/google-cloud-bigquery/owlbot.py b/packages/google-cloud-bigquery/owlbot.py index e6f36905b94f..095759d48888 100644 --- a/packages/google-cloud-bigquery/owlbot.py +++ b/packages/google-cloud-bigquery/owlbot.py @@ -116,6 +116,7 @@ # Include custom SNIPPETS_TESTS job for performance. # https://github.com/googleapis/python-bigquery/issues/191 ".kokoro/presubmit/presubmit.cfg", + ".github/workflows", # exclude gh actions as credentials are needed for tests ], ) From cd781778df8d18eb54c3def2579fb648e019b8bc Mon Sep 17 00:00:00 2001 From: Bu Sun Kim <8822365+busunkim96@users.noreply.github.com> Date: Mon, 24 Jan 2022 13:08:06 -0700 Subject: [PATCH 1377/2016] chore: make samples 3.6 check optional (#1120) --- packages/google-cloud-bigquery/.github/sync-repo-settings.yaml | 2 -- 1 file changed, 2 deletions(-) diff --git a/packages/google-cloud-bigquery/.github/sync-repo-settings.yaml b/packages/google-cloud-bigquery/.github/sync-repo-settings.yaml index 01affbae53a8..73cc3bcef668 100644 --- a/packages/google-cloud-bigquery/.github/sync-repo-settings.yaml +++ b/packages/google-cloud-bigquery/.github/sync-repo-settings.yaml @@ -14,7 +14,6 @@ branchProtectionRules: - 'Kokoro snippets-3.8' - 'cla/google' - 'Samples - Lint' - - 'Samples - Python 3.6' - 'Samples - Python 3.7' - 'Samples - Python 3.8' - pattern: v3 @@ -26,6 +25,5 @@ branchProtectionRules: - 'Kokoro snippets-3.8' - 'cla/google' - 'Samples - Lint' - - 'Samples - Python 3.6' - 'Samples - Python 3.7' - 'Samples - Python 3.8' From 70b5bfdc8a03c195a80ed61ef767fcf5d7c2ef6a Mon Sep 17 00:00:00 2001 From: "gcf-owl-bot[bot]" <78513119+gcf-owl-bot[bot]@users.noreply.github.com> Date: Thu, 3 Feb 2022 20:18:01 -0500 Subject: [PATCH 1378/2016] chore: use gapic-generator-python 0.62.1 (#1127) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit committer: parthea PiperOrigin-RevId: 425964861 Source-Link: https://github.com/googleapis/googleapis/commit/84b1a5a4f6fb2d04905be58e586b8a7a4310a8cf Source-Link: https://github.com/googleapis/googleapis-gen/commit/4fb761bbd8506ac156f49bac5f18306aa8eb3aa8 Copy-Tag: eyJwIjoiLmdpdGh1Yi8uT3dsQm90LnlhbWwiLCJoIjoiNGZiNzYxYmJkODUwNmFjMTU2ZjQ5YmFjNWYxODMwNmFhOGViM2FhOCJ9 * 🦉 Updates from OwlBot See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md Co-authored-by: Owl Bot --- .../google/cloud/bigquery_v2/types/model.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery_v2/types/model.py b/packages/google-cloud-bigquery/google/cloud/bigquery_v2/types/model.py index 440bc0805ec3..84188e40c6f2 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery_v2/types/model.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery_v2/types/model.py @@ -341,8 +341,8 @@ class AggregateClassificationMetrics(proto.Message): threshold. f1_score (google.protobuf.wrappers_pb2.DoubleValue): The F1 score is an average of recall and - precision. For multiclass this is a macro- - averaged metric. + precision. For multiclass this is a + macro-averaged metric. log_loss (google.protobuf.wrappers_pb2.DoubleValue): Logarithmic Loss. For multiclass this is a macro-averaged metric. From d93ed6f4f448e789d483c6f8fac700f948f8650e Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Mon, 14 Feb 2022 16:02:30 -0600 Subject: [PATCH 1379/2016] docs: show common job properties in `get_job` and `cancel_job` samples (#1137) * docs: show common job properties in `get_job` and `cancel_job` samples * flake8 --- .../google-cloud-bigquery/docs/snippets.py | 1 + .../samples/snippets/manage_job_cancel.py | 26 +++++++++++++ .../samples/snippets/manage_job_get.py | 33 ++++++++++++++++ .../samples/snippets/manage_job_test.py | 39 +++++++++++++++++++ 4 files changed, 99 insertions(+) create mode 100644 packages/google-cloud-bigquery/samples/snippets/manage_job_cancel.py create mode 100644 packages/google-cloud-bigquery/samples/snippets/manage_job_get.py create mode 100644 packages/google-cloud-bigquery/samples/snippets/manage_job_test.py diff --git a/packages/google-cloud-bigquery/docs/snippets.py b/packages/google-cloud-bigquery/docs/snippets.py index c62001fc0317..499285eebbc0 100644 --- a/packages/google-cloud-bigquery/docs/snippets.py +++ b/packages/google-cloud-bigquery/docs/snippets.py @@ -757,6 +757,7 @@ def test_client_query_total_rows(client, capsys): def test_manage_job(client): + # TODO(b/199162556): delete after migrating docs sql = """ SELECT corpus FROM `bigquery-public-data.samples.shakespeare` diff --git a/packages/google-cloud-bigquery/samples/snippets/manage_job_cancel.py b/packages/google-cloud-bigquery/samples/snippets/manage_job_cancel.py new file mode 100644 index 000000000000..3e0fc5218fdd --- /dev/null +++ b/packages/google-cloud-bigquery/samples/snippets/manage_job_cancel.py @@ -0,0 +1,26 @@ +# Copyright 2016-2022 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# [START bigquery_cancel_job] +from google.cloud import bigquery + + +def cancel_job( + client: bigquery.Client, location: str = "us", job_id: str = "abcd-efgh-ijkl-mnop", +): + job = client.cancel_job(job_id, location=location) + print(f"{job.location}:{job.job_id} cancelled") + + +# [END bigquery_cancel_job] diff --git a/packages/google-cloud-bigquery/samples/snippets/manage_job_get.py b/packages/google-cloud-bigquery/samples/snippets/manage_job_get.py new file mode 100644 index 000000000000..256d79e5b4f8 --- /dev/null +++ b/packages/google-cloud-bigquery/samples/snippets/manage_job_get.py @@ -0,0 +1,33 @@ +# Copyright 2016-2022 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# [START bigquery_get_job] +from google.cloud import bigquery + + +def get_job( + client: bigquery.Client, location: str = "us", job_id: str = "abcd-efgh-ijkl-mnop", +): + job = client.get_job(job_id, location=location) + + # All job classes have "location" and "job_id" string properties. + # Use these properties for job operations such as "cancel_job" and + # "delete_job". + print(f"{job.location}:{job.job_id}") + print(f"Type: {job.job_type}") + print(f"State: {job.state}") + print(f"Created: {job.created.isoformat()}") + + +# [END bigquery_get_job] diff --git a/packages/google-cloud-bigquery/samples/snippets/manage_job_test.py b/packages/google-cloud-bigquery/samples/snippets/manage_job_test.py new file mode 100644 index 000000000000..745b7bbbeb83 --- /dev/null +++ b/packages/google-cloud-bigquery/samples/snippets/manage_job_test.py @@ -0,0 +1,39 @@ +# Copyright 2022 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from google.cloud import bigquery +import pytest + +import manage_job_cancel +import manage_job_get + + +def test_manage_job(capsys: pytest.CaptureFixture): + client = bigquery.Client() + sql = """ + SELECT corpus + FROM `bigquery-public-data.samples.shakespeare` + GROUP BY corpus; + """ + location = "us" + job = client.query(sql, location=location) + + manage_job_cancel.cancel_job(client, location=location, job_id=job.job_id) + out, _ = capsys.readouterr() + assert f"{job.location}:{job.job_id} cancelled" in out + + manage_job_get.get_job(client, location=location, job_id=job.job_id) + out, _ = capsys.readouterr() + assert f"{job.location}:{job.job_id}" in out + assert "Type: query" in out From b8bf83d070ad4e7805355795dd7b281032c66f43 Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Mon, 14 Feb 2022 17:19:09 -0600 Subject: [PATCH 1380/2016] =?UTF-8?q?docs:=20reference=20BigQuery=20REST?= =?UTF-8?q?=20API=20defaults=20in=20`LoadJobConfig`=20descrip=E2=80=A6=20(?= =?UTF-8?q?#1132)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit …tion Thank you for opening a Pull Request! Before submitting your PR, there are a few things you can do to make sure it goes smoothly: - [ ] Make sure to open an issue as a [bug/issue](https://github.com/googleapis/python-bigquery/issues/new/choose) before writing your code! That way we can discuss the change, evaluate designs, and agree on the general idea - [ ] Ensure the tests and linter pass - [ ] Code coverage does not decrease (if any source code was changed) - [ ] Appropriate docs were updated (if necessary) Fixes internal issue 213442215 🦕 --- .../google/cloud/bigquery/job/load.py | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/job/load.py b/packages/google-cloud-bigquery/google/cloud/bigquery/job/load.py index b12c3e621fad..2d68f7f71570 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/job/load.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/job/load.py @@ -33,9 +33,21 @@ class LoadJobConfig(_JobConfig): """Configuration options for load jobs. - All properties in this class are optional. Values which are :data:`None` -> - server defaults. Set properties on the constructed configuration by using - the property name as the name of a keyword argument. + Set properties on the constructed configuration by using the property name + as the name of a keyword argument. Values which are unset or :data:`None` + use the BigQuery REST API default values. See the `BigQuery REST API + reference documentation + `_ + for a list of default values. + + Required options differ based on the + :attr:`~google.cloud.bigquery.job.LoadJobConfig.source_format` value. + For example, the BigQuery API's default value for + :attr:`~google.cloud.bigquery.job.LoadJobConfig.source_format` is ``"CSV"``. + When loading a CSV file, either + :attr:`~google.cloud.bigquery.job.LoadJobConfig.schema` must be set or + :attr:`~google.cloud.bigquery.job.LoadJobConfig.autodetect` must be set to + :data:`True`. """ def __init__(self, **kwargs): From f5a518514008422b8e9b3c80203346c347776b38 Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Wed, 16 Feb 2022 13:51:32 -0600 Subject: [PATCH 1381/2016] chore: remove unused code snippets (#1139) --- .../google-cloud-bigquery/docs/snippets.py | 40 ------------------- 1 file changed, 40 deletions(-) diff --git a/packages/google-cloud-bigquery/docs/snippets.py b/packages/google-cloud-bigquery/docs/snippets.py index 499285eebbc0..f67823249e01 100644 --- a/packages/google-cloud-bigquery/docs/snippets.py +++ b/packages/google-cloud-bigquery/docs/snippets.py @@ -756,46 +756,6 @@ def test_client_query_total_rows(client, capsys): assert "Got 100 rows." in out -def test_manage_job(client): - # TODO(b/199162556): delete after migrating docs - sql = """ - SELECT corpus - FROM `bigquery-public-data.samples.shakespeare` - GROUP BY corpus; - """ - location = "us" - job = client.query(sql, location=location) - job_id = job.job_id - - # [START bigquery_cancel_job] - # TODO(developer): Uncomment the lines below and replace with your values. - # from google.cloud import bigquery - # client = bigquery.Client() - # job_id = 'bq-job-123x456-123y123z123c' # replace with your job ID - # location = 'us' # replace with your location - - job = client.cancel_job(job_id, location=location) - # [END bigquery_cancel_job] - - # [START bigquery_get_job] - # TODO(developer): Uncomment the lines below and replace with your values. - # from google.cloud import bigquery - # client = bigquery.Client() - # job_id = 'bq-job-123x456-123y123z123c' # replace with your job ID - # location = 'us' # replace with your location - - job = client.get_job(job_id, location=location) # API request - - # Print selected job properties - print("Details for job {} running in {}:".format(job_id, location)) - print( - "\tType: {}\n\tState: {}\n\tCreated: {}".format( - job.job_type, job.state, job.created - ) - ) - # [END bigquery_get_job] - - def test_query_external_gcs_permanent_table(client, to_delete): dataset_id = "query_external_gcs_{}".format(_millis()) project = client.project From d673ae963f958283f4c101ab38990f016fb886b8 Mon Sep 17 00:00:00 2001 From: Erroration2022 <99040129+Erroration2022@users.noreply.github.com> Date: Wed, 16 Feb 2022 12:20:47 -0800 Subject: [PATCH 1382/2016] fix: return 403 when VPC-SC violation happens (#1131) This is fixing the issue where VPC-SC violation is not returning 403. Error message map does not recognize VPCSC policy violation error and will default to return an internal server error. Co-authored-by: Tim Swast --- packages/google-cloud-bigquery/google/cloud/bigquery/job/base.py | 1 + 1 file changed, 1 insertion(+) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/job/base.py b/packages/google-cloud-bigquery/google/cloud/bigquery/job/base.py index 97acab5d2456..86701e295777 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/job/base.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/job/base.py @@ -45,6 +45,7 @@ "invalidQuery": http.client.BAD_REQUEST, "notFound": http.client.NOT_FOUND, "notImplemented": http.client.NOT_IMPLEMENTED, + "policyViolation": http.client.FORBIDDEN, "quotaExceeded": http.client.FORBIDDEN, "rateLimitExceeded": http.client.FORBIDDEN, "resourceInUse": http.client.BAD_REQUEST, From 4bbd5916c8a6925930315e8c73d84c72d27e21a2 Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Wed, 16 Feb 2022 17:07:47 -0600 Subject: [PATCH 1383/2016] feat: add `--no_query_cache` option to `%%bigquery` magics to disable query cache (#1141) --- .../google/cloud/bigquery/magics/magics.py | 12 ++++ .../tests/unit/test_magics.py | 58 +++++++++++++++++++ 2 files changed, 70 insertions(+) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/magics/magics.py b/packages/google-cloud-bigquery/google/cloud/bigquery/magics/magics.py index 5af0a3b5131b..7b4d584fb70b 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/magics/magics.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/magics/magics.py @@ -35,6 +35,8 @@ A dataset and table to store the query results. If table does not exists, it will be created. If table already exists, its data will be overwritten. Variable should be in a format .. + * ``--no_query_cache`` (Optional[line argument]): + Do not use cached query results. * ``--project `` (Optional[line argument]): Project to use for running the query. Defaults to the context :attr:`~google.cloud.bigquery.magics.Context.project`. @@ -442,6 +444,12 @@ def _create_dataset_if_necessary(client, dataset_id): "this option's value in the context bqstorage_client_options." ), ) +@magic_arguments.argument( + "--no_query_cache", + action="store_true", + default=False, + help=("Do not use cached query results."), +) @magic_arguments.argument( "--use_bqstorage_api", action="store_true", @@ -642,6 +650,10 @@ def _cell_magic(line, query): job_config.use_legacy_sql = args.use_legacy_sql job_config.dry_run = args.dry_run + # Don't override context job config unless --no_query_cache is explicitly set. + if args.no_query_cache: + job_config.use_query_cache = False + if args.destination_table: split = args.destination_table.split(".") if len(split) != 2: diff --git a/packages/google-cloud-bigquery/tests/unit/test_magics.py b/packages/google-cloud-bigquery/tests/unit/test_magics.py index e18d04d6418b..2801768f8618 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_magics.py +++ b/packages/google-cloud-bigquery/tests/unit/test_magics.py @@ -1217,6 +1217,64 @@ def test_bigquery_magic_w_maximum_bytes_billed_w_context_setter(): assert sent_config["maximumBytesBilled"] == "10203" +@pytest.mark.usefixtures("ipython_interactive") +@pytest.mark.skipif(pandas is None, reason="Requires `pandas`") +def test_bigquery_magic_with_no_query_cache(monkeypatch): + ip = IPython.get_ipython() + ip.extension_manager.load_extension("google.cloud.bigquery") + conn = make_connection() + monkeypatch.setattr(magics.context, "_connection", conn) + monkeypatch.setattr(magics.context, "project", "project-from-context") + + # --no_query_cache option should override context. + monkeypatch.setattr( + magics.context.default_query_job_config, "use_query_cache", True + ) + + ip.run_cell_magic("bigquery", "--no_query_cache", QUERY_STRING) + + conn.api_request.assert_called_with( + method="POST", + path="/projects/project-from-context/jobs", + data=mock.ANY, + timeout=DEFAULT_TIMEOUT, + ) + jobs_insert_call = [ + call + for call in conn.api_request.call_args_list + if call[1]["path"] == "/projects/project-from-context/jobs" + ][0] + assert not jobs_insert_call[1]["data"]["configuration"]["query"]["useQueryCache"] + + +@pytest.mark.usefixtures("ipython_interactive") +@pytest.mark.skipif(pandas is None, reason="Requires `pandas`") +def test_context_with_no_query_cache_from_context(monkeypatch): + ip = IPython.get_ipython() + ip.extension_manager.load_extension("google.cloud.bigquery") + conn = make_connection() + monkeypatch.setattr(magics.context, "_connection", conn) + monkeypatch.setattr(magics.context, "project", "project-from-context") + monkeypatch.setattr( + magics.context.default_query_job_config, "use_query_cache", False + ) + + ip.run_cell_magic("bigquery", "", QUERY_STRING) + + conn.api_request.assert_called_with( + method="POST", + path="/projects/project-from-context/jobs", + data=mock.ANY, + timeout=DEFAULT_TIMEOUT, + ) + jobs_insert_call = [ + call + for call in conn.api_request.call_args_list + if call[1]["path"] == "/projects/project-from-context/jobs" + ][0] + assert not jobs_insert_call[1]["data"]["configuration"]["query"]["useQueryCache"] + + @pytest.mark.usefixtures("ipython_interactive") @pytest.mark.skipif(pandas is None, reason="Requires `pandas`") def test_bigquery_magic_w_progress_bar_type_w_context_setter(monkeypatch): From 35dba42ee7a368e3151a66f8ca0fb93e150da01e Mon Sep 17 00:00:00 2001 From: "release-please[bot]" <55107282+release-please[bot]@users.noreply.github.com> Date: Thu, 17 Feb 2022 08:08:58 -0600 Subject: [PATCH 1384/2016] chore(main): release 2.33.0 (#1138) Co-authored-by: release-please[bot] <55107282+release-please[bot]@users.noreply.github.com> --- packages/google-cloud-bigquery/CHANGELOG.md | 18 ++++++++++++++++++ .../google/cloud/bigquery/version.py | 2 +- 2 files changed, 19 insertions(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/CHANGELOG.md b/packages/google-cloud-bigquery/CHANGELOG.md index 6e69fa62143d..e83cb97880b1 100644 --- a/packages/google-cloud-bigquery/CHANGELOG.md +++ b/packages/google-cloud-bigquery/CHANGELOG.md @@ -5,6 +5,24 @@ [1]: https://pypi.org/project/google-cloud-bigquery/#history +## [2.33.0](https://github.com/googleapis/python-bigquery/compare/v2.32.0...v2.33.0) (2022-02-16) + + +### Features + +* add `--no_query_cache` option to `%%bigquery` magics to disable query cache ([#1141](https://github.com/googleapis/python-bigquery/issues/1141)) ([7dd30af](https://github.com/googleapis/python-bigquery/commit/7dd30af41b8a595b96176c964ba14aa41645ef0d)) + + +### Bug Fixes + +* return 403 when VPC-SC violation happens ([#1131](https://github.com/googleapis/python-bigquery/issues/1131)) ([f5daa9b](https://github.com/googleapis/python-bigquery/commit/f5daa9b41377a58cb3220bb2ab7c72adc6462196)) + + +### Documentation + +* reference BigQuery REST API defaults in `LoadJobConfig` descrip… ([#1132](https://github.com/googleapis/python-bigquery/issues/1132)) ([18d9580](https://github.com/googleapis/python-bigquery/commit/18d958062721d6be81e7bd7a5bd66f277344a864)) +* show common job properties in `get_job` and `cancel_job` samples ([#1137](https://github.com/googleapis/python-bigquery/issues/1137)) ([8edc10d](https://github.com/googleapis/python-bigquery/commit/8edc10d019bd96defebc4f92a47774901e9b956f)) + ## [2.32.0](https://github.com/googleapis/python-bigquery/compare/v2.31.0...v2.32.0) (2022-01-12) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/version.py b/packages/google-cloud-bigquery/google/cloud/bigquery/version.py index b8c5af9a24bc..17811b65f86e 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/version.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/version.py @@ -12,4 +12,4 @@ # See the License for the specific language governing permissions and # limitations under the License. -__version__ = "2.32.0" +__version__ = "2.33.0" From b1b06e999a97d68e8540dfcd74545fcd9737b64d Mon Sep 17 00:00:00 2001 From: Anmol Date: Fri, 18 Feb 2022 13:57:50 -0500 Subject: [PATCH 1385/2016] feat: support BI Engine statistics in query job (#1144) * chore: Add support for accessing BI Engine statistics The REST API returns BiEngineStatistics for a query which denotes if the query was accelerated by BI Engine or not. This commit adds the necessary function to access this information for executed queries. * fix: Removed enums and replaced with string constants * fix: Fixed logic for creating BIEngineStats and added test case * Attempt at mypy fix Co-authored-by: Tim Swast --- .../google/cloud/bigquery/job/query.py | 47 ++++++++++++++++ .../tests/unit/job/test_query.py | 17 ++++++ .../tests/unit/job/test_query_stats.py | 56 +++++++++++++++++++ 3 files changed, 120 insertions(+) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/job/query.py b/packages/google-cloud-bigquery/google/cloud/bigquery/job/query.py index 2dd9459842ff..2fd7afb7615b 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/job/query.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/job/query.py @@ -121,6 +121,44 @@ def _to_api_repr_table_defs(value): return {k: ExternalConfig.to_api_repr(v) for k, v in value.items()} +class BiEngineReason(typing.NamedTuple): + """Reason for BI Engine acceleration failure + + https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#bienginereason + """ + + code: str = "CODE_UNSPECIFIED" + + reason: str = "" + + @classmethod + def from_api_repr(cls, reason: Dict[str, str]) -> "BiEngineReason": + return cls(reason.get("code", "CODE_UNSPECIFIED"), reason.get("message", "")) + + +class BiEngineStats(typing.NamedTuple): + """Statistics for a BI Engine query + + https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#bienginestatistics + """ + + mode: str = "ACCELERATION_MODE_UNSPECIFIED" + """ Specifies which mode of BI Engine acceleration was performed (if any) + """ + + reasons: List[BiEngineReason] = [] + """ Contains explanatory messages in case of DISABLED / PARTIAL acceleration + """ + + @classmethod + def from_api_repr(cls, stats: Dict[str, Any]) -> "BiEngineStats": + mode = stats.get("biEngineMode", "ACCELERATION_MODE_UNSPECIFIED") + reasons = [ + BiEngineReason.from_api_repr(r) for r in stats.get("biEngineReasons", []) + ] + return cls(mode, reasons) + + class DmlStats(typing.NamedTuple): """Detailed statistics for DML statements. @@ -1191,6 +1229,15 @@ def dml_stats(self) -> Optional[DmlStats]: else: return DmlStats.from_api_repr(stats) + @property + def bi_engine_stats(self) -> Optional[BiEngineStats]: + stats = self._job_statistics().get("biEngineStatistics") + + if stats is None: + return None + else: + return BiEngineStats.from_api_repr(stats) + def _blocking_poll(self, timeout=None, **kwargs): self._done_timeout = timeout self._transport_timeout = timeout diff --git a/packages/google-cloud-bigquery/tests/unit/job/test_query.py b/packages/google-cloud-bigquery/tests/unit/job/test_query.py index 5fb76b9e91cb..33a52cfec54e 100644 --- a/packages/google-cloud-bigquery/tests/unit/job/test_query.py +++ b/packages/google-cloud-bigquery/tests/unit/job/test_query.py @@ -877,6 +877,23 @@ def test_estimated_bytes_processed(self): query_stats["estimatedBytesProcessed"] = str(est_bytes) self.assertEqual(job.estimated_bytes_processed, est_bytes) + def test_bi_engine_stats(self): + from google.cloud.bigquery.job.query import BiEngineStats + + client = _make_client(project=self.PROJECT) + job = self._make_one(self.JOB_ID, self.QUERY, client) + assert job.bi_engine_stats is None + + statistics = job._properties["statistics"] = {} + assert job.bi_engine_stats is None + + query_stats = statistics["query"] = {} + assert job.bi_engine_stats is None + + query_stats["biEngineStatistics"] = {"biEngineMode": "FULL"} + assert isinstance(job.bi_engine_stats, BiEngineStats) + assert job.bi_engine_stats.mode == "FULL" + def test_dml_stats(self): from google.cloud.bigquery.job.query import DmlStats diff --git a/packages/google-cloud-bigquery/tests/unit/job/test_query_stats.py b/packages/google-cloud-bigquery/tests/unit/job/test_query_stats.py index e70eb097c545..13e022ced386 100644 --- a/packages/google-cloud-bigquery/tests/unit/job/test_query_stats.py +++ b/packages/google-cloud-bigquery/tests/unit/job/test_query_stats.py @@ -15,6 +15,62 @@ from .helpers import _Base +class TestBiEngineStats: + @staticmethod + def _get_target_class(): + from google.cloud.bigquery.job.query import BiEngineStats + + return BiEngineStats + + def _make_one(self, *args, **kw): + return self._get_target_class()(*args, **kw) + + def test_ctor_defaults(self): + bi_engine_stats = self._make_one() + assert bi_engine_stats.mode == "ACCELERATION_MODE_UNSPECIFIED" + assert bi_engine_stats.reasons == [] + + def test_from_api_repr_unspecified(self): + klass = self._get_target_class() + result = klass.from_api_repr({"biEngineMode": "ACCELERATION_MODE_UNSPECIFIED"}) + + assert isinstance(result, klass) + assert result.mode == "ACCELERATION_MODE_UNSPECIFIED" + assert result.reasons == [] + + def test_from_api_repr_full(self): + klass = self._get_target_class() + result = klass.from_api_repr({"biEngineMode": "FULL"}) + + assert isinstance(result, klass) + assert result.mode == "FULL" + assert result.reasons == [] + + def test_from_api_repr_disabled(self): + klass = self._get_target_class() + result = klass.from_api_repr( + { + "biEngineMode": "DISABLED", + "biEngineReasons": [ + { + "code": "OTHER_REASON", + "message": "Unable to support input table xyz due to an internal error.", + } + ], + } + ) + + assert isinstance(result, klass) + assert result.mode == "DISABLED" + + reason = result.reasons[0] + assert reason.code == "OTHER_REASON" + assert ( + reason.reason + == "Unable to support input table xyz due to an internal error." + ) + + class TestDmlStats: @staticmethod def _get_target_class(): From a28101308d07bb332fbde30f074571643c2ab2cc Mon Sep 17 00:00:00 2001 From: "release-please[bot]" <55107282+release-please[bot]@users.noreply.github.com> Date: Fri, 18 Feb 2022 11:47:20 -0800 Subject: [PATCH 1386/2016] chore(main): release 2.34.0 (#1145) Co-authored-by: release-please[bot] <55107282+release-please[bot]@users.noreply.github.com> --- packages/google-cloud-bigquery/CHANGELOG.md | 7 +++++++ .../google-cloud-bigquery/google/cloud/bigquery/version.py | 2 +- 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/CHANGELOG.md b/packages/google-cloud-bigquery/CHANGELOG.md index e83cb97880b1..2db3cf3b2c9d 100644 --- a/packages/google-cloud-bigquery/CHANGELOG.md +++ b/packages/google-cloud-bigquery/CHANGELOG.md @@ -5,6 +5,13 @@ [1]: https://pypi.org/project/google-cloud-bigquery/#history +## [2.34.0](https://github.com/googleapis/python-bigquery/compare/v2.33.0...v2.34.0) (2022-02-18) + + +### Features + +* support BI Engine statistics in query job ([#1144](https://github.com/googleapis/python-bigquery/issues/1144)) ([7482549](https://github.com/googleapis/python-bigquery/commit/7482549cb42ed5302634ab4fb7b4efcd97b35c68)) + ## [2.33.0](https://github.com/googleapis/python-bigquery/compare/v2.32.0...v2.33.0) (2022-02-16) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/version.py b/packages/google-cloud-bigquery/google/cloud/bigquery/version.py index 17811b65f86e..328c4cdc296f 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/version.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/version.py @@ -12,4 +12,4 @@ # See the License for the specific language governing permissions and # limitations under the License. -__version__ = "2.33.0" +__version__ = "2.34.0" From aed4a1b5e9d19b27e04bc900e4fb95e786f96a7d Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Tue, 1 Mar 2022 16:03:34 -0600 Subject: [PATCH 1387/2016] deps: add "extra" for IPython, exclude bad IPython release (#1151) * deps: add "extra" for IPython, exclude bad IPython release * skip magics tests when IPython not installed * sort importorskips * add 3.10 prerelease session * add continuous session --- .../continuous/prerelease-deps-3.10.cfg | 7 +++++ .../continuous/prerelease-deps-3.8.cfg | 2 +- .../presubmit/prerelease-deps-3.10.cfg | 7 +++++ packages/google-cloud-bigquery/noxfile.py | 7 +---- packages/google-cloud-bigquery/setup.py | 1 + .../testing/constraints-3.6.txt | 1 + .../tests/unit/line_arg_parser/test_lexer.py | 2 ++ .../tests/unit/line_arg_parser/test_parser.py | 2 ++ .../unit/line_arg_parser/test_visitors.py | 2 ++ .../tests/unit/test_magics.py | 27 +++++++++---------- 10 files changed, 37 insertions(+), 21 deletions(-) create mode 100644 packages/google-cloud-bigquery/.kokoro/continuous/prerelease-deps-3.10.cfg create mode 100644 packages/google-cloud-bigquery/.kokoro/presubmit/prerelease-deps-3.10.cfg diff --git a/packages/google-cloud-bigquery/.kokoro/continuous/prerelease-deps-3.10.cfg b/packages/google-cloud-bigquery/.kokoro/continuous/prerelease-deps-3.10.cfg new file mode 100644 index 000000000000..339980bdd7e5 --- /dev/null +++ b/packages/google-cloud-bigquery/.kokoro/continuous/prerelease-deps-3.10.cfg @@ -0,0 +1,7 @@ +# Format: //devtools/kokoro/config/proto/build.proto + +# Only run this nox session. +env_vars: { + key: "NOX_SESSION" + value: "prerelease_deps-3.10" +} diff --git a/packages/google-cloud-bigquery/.kokoro/continuous/prerelease-deps-3.8.cfg b/packages/google-cloud-bigquery/.kokoro/continuous/prerelease-deps-3.8.cfg index 3595fb43f5c0..fabe3e347f7b 100644 --- a/packages/google-cloud-bigquery/.kokoro/continuous/prerelease-deps-3.8.cfg +++ b/packages/google-cloud-bigquery/.kokoro/continuous/prerelease-deps-3.8.cfg @@ -3,5 +3,5 @@ # Only run this nox session. env_vars: { key: "NOX_SESSION" - value: "prerelease_deps" + value: "prerelease_deps-3.8" } diff --git a/packages/google-cloud-bigquery/.kokoro/presubmit/prerelease-deps-3.10.cfg b/packages/google-cloud-bigquery/.kokoro/presubmit/prerelease-deps-3.10.cfg new file mode 100644 index 000000000000..339980bdd7e5 --- /dev/null +++ b/packages/google-cloud-bigquery/.kokoro/presubmit/prerelease-deps-3.10.cfg @@ -0,0 +1,7 @@ +# Format: //devtools/kokoro/config/proto/build.proto + +# Only run this nox session. +env_vars: { + key: "NOX_SESSION" + value: "prerelease_deps-3.10" +} diff --git a/packages/google-cloud-bigquery/noxfile.py b/packages/google-cloud-bigquery/noxfile.py index 5059118611c6..6304747d178c 100644 --- a/packages/google-cloud-bigquery/noxfile.py +++ b/packages/google-cloud-bigquery/noxfile.py @@ -79,8 +79,6 @@ def default(session, install_extras=True): install_target = "." session.install("-e", install_target, "-c", constraints_path) - session.install("ipython", "-c", constraints_path) - # Run py.test against the unit tests. session.run( "py.test", @@ -119,7 +117,6 @@ def unit_noextras(session): def mypy(session): """Run type checks with mypy.""" session.install("-e", ".[all]") - session.install("ipython") session.install(MYPY_VERSION) # Just install the dependencies' type info directly, since "mypy --install-types" @@ -138,7 +135,6 @@ def pytype(session): # https://github.com/googleapis/python-bigquery/issues/655 session.install("attrs==20.3.0") session.install("-e", ".[all]") - session.install("ipython") session.install(PYTYPE_VERSION) session.run("pytype") @@ -180,7 +176,6 @@ def system(session): else: extras = "[all]" session.install("-e", f".{extras}", "-c", constraints_path) - session.install("ipython", "-c", constraints_path) # Run py.test against the system tests. session.run("py.test", "--quiet", os.path.join("tests", "system"), *session.posargs) @@ -355,7 +350,7 @@ def blacken(session): def docs(session): """Build the docs.""" - session.install("ipython", "recommonmark", "sphinx==4.0.1", "sphinx_rtd_theme") + session.install("recommonmark", "sphinx==4.0.1", "sphinx_rtd_theme") session.install("google-cloud-storage") session.install("-e", ".[all]") diff --git a/packages/google-cloud-bigquery/setup.py b/packages/google-cloud-bigquery/setup.py index 5c0b80f7c3a2..f6194f50e399 100644 --- a/packages/google-cloud-bigquery/setup.py +++ b/packages/google-cloud-bigquery/setup.py @@ -61,6 +61,7 @@ "geopandas": ["geopandas>=0.9.0, <1.0dev", "Shapely>=1.6.0, <2.0dev"], "pandas": ["pandas>=0.24.2"] + pyarrow_dep, "bignumeric_type": pyarrow_dep, + "ipython": ["ipython>=7.0.1,!=8.1.0"], "tqdm": ["tqdm >= 4.7.4, <5.0.0dev"], "opentelemetry": [ "opentelemetry-api >= 1.1.0", diff --git a/packages/google-cloud-bigquery/testing/constraints-3.6.txt b/packages/google-cloud-bigquery/testing/constraints-3.6.txt index f967077bc621..7f52e441d2ae 100644 --- a/packages/google-cloud-bigquery/testing/constraints-3.6.txt +++ b/packages/google-cloud-bigquery/testing/constraints-3.6.txt @@ -11,6 +11,7 @@ google-cloud-bigquery-storage==2.0.0 google-cloud-core==1.4.1 google-resumable-media==0.6.0 grpcio==1.38.1 +ipython==7.0.1 opentelemetry-api==1.1.0 opentelemetry-instrumentation==0.20b0 opentelemetry-sdk==1.1.0 diff --git a/packages/google-cloud-bigquery/tests/unit/line_arg_parser/test_lexer.py b/packages/google-cloud-bigquery/tests/unit/line_arg_parser/test_lexer.py index 22fa96f228eb..3624ed0f36e6 100644 --- a/packages/google-cloud-bigquery/tests/unit/line_arg_parser/test_lexer.py +++ b/packages/google-cloud-bigquery/tests/unit/line_arg_parser/test_lexer.py @@ -14,6 +14,8 @@ import pytest +IPython = pytest.importorskip("IPython") + @pytest.fixture(scope="session") def lexer_class(): diff --git a/packages/google-cloud-bigquery/tests/unit/line_arg_parser/test_parser.py b/packages/google-cloud-bigquery/tests/unit/line_arg_parser/test_parser.py index 3edff88e9675..3f9e9ff419f8 100644 --- a/packages/google-cloud-bigquery/tests/unit/line_arg_parser/test_parser.py +++ b/packages/google-cloud-bigquery/tests/unit/line_arg_parser/test_parser.py @@ -14,6 +14,8 @@ import pytest +IPython = pytest.importorskip("IPython") + @pytest.fixture(scope="session") def parser_class(): diff --git a/packages/google-cloud-bigquery/tests/unit/line_arg_parser/test_visitors.py b/packages/google-cloud-bigquery/tests/unit/line_arg_parser/test_visitors.py index 51d4f837a91c..288ef5f7179e 100644 --- a/packages/google-cloud-bigquery/tests/unit/line_arg_parser/test_visitors.py +++ b/packages/google-cloud-bigquery/tests/unit/line_arg_parser/test_visitors.py @@ -14,6 +14,8 @@ import pytest +IPython = pytest.importorskip("IPython") + @pytest.fixture def base_visitor(): diff --git a/packages/google-cloud-bigquery/tests/unit/test_magics.py b/packages/google-cloud-bigquery/tests/unit/test_magics.py index 2801768f8618..a4214f32f40b 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_magics.py +++ b/packages/google-cloud-bigquery/tests/unit/test_magics.py @@ -17,31 +17,30 @@ from concurrent import futures import warnings -import mock -import pytest - -try: - import pandas -except ImportError: # pragma: NO COVER - pandas = None - from google.api_core import exceptions import google.auth.credentials +import mock +import pytest +from tests.unit.helpers import make_connection +from test_utils.imports import maybe_fail_import from google.cloud import bigquery from google.cloud.bigquery import job from google.cloud.bigquery import table -from google.cloud.bigquery.magics import magics from google.cloud.bigquery.retry import DEFAULT_TIMEOUT -from tests.unit.helpers import make_connection -from test_utils.imports import maybe_fail_import +try: + from google.cloud.bigquery.magics import magics +except ImportError: + magics = None + +bigquery_storage = pytest.importorskip("google.cloud.bigquery_storage") IPython = pytest.importorskip("IPython") -io = pytest.importorskip("IPython.utils.io") -tools = pytest.importorskip("IPython.testing.tools") interactiveshell = pytest.importorskip("IPython.terminal.interactiveshell") -bigquery_storage = pytest.importorskip("google.cloud.bigquery_storage") +tools = pytest.importorskip("IPython.testing.tools") +io = pytest.importorskip("IPython.utils.io") +pandas = pytest.importorskip("pandas") @pytest.fixture(scope="session") From e9a8b62e999109c3605fe6b2fc0464fdffcfcfe2 Mon Sep 17 00:00:00 2001 From: WhiteSource Renovate Date: Wed, 2 Mar 2022 00:28:21 +0100 Subject: [PATCH 1388/2016] chore(deps): update dependency ipython to v8.0.1 [security] (#1126) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [![WhiteSource Renovate](https://app.renovatebot.com/images/banner.svg)](https://renovatebot.com) This PR contains the following updates: | Package | Change | Age | Adoption | Passing | Confidence | |---|---|---|---|---|---| | [ipython](https://ipython.org) ([source](https://togithub.com/ipython/ipython)) | `==8.0.0` -> `==8.0.1` | [![age](https://badges.renovateapi.com/packages/pypi/ipython/8.0.1/age-slim)](https://docs.renovatebot.com/merge-confidence/) | [![adoption](https://badges.renovateapi.com/packages/pypi/ipython/8.0.1/adoption-slim)](https://docs.renovatebot.com/merge-confidence/) | [![passing](https://badges.renovateapi.com/packages/pypi/ipython/8.0.1/compatibility-slim/8.0.0)](https://docs.renovatebot.com/merge-confidence/) | [![confidence](https://badges.renovateapi.com/packages/pypi/ipython/8.0.1/confidence-slim/8.0.0)](https://docs.renovatebot.com/merge-confidence/) | ### GitHub Vulnerability Alerts #### [CVE-2022-21699](https://togithub.com/ipython/ipython/security/advisories/GHSA-pq7m-3gw7-gq5x) We’d like to disclose an arbitrary code execution vulnerability in IPython that stems from IPython executing untrusted files in CWD. This vulnerability allows one user to run code as another. Proof of concept User1: ``` mkdir -m 777 /tmp/profile_default mkdir -m 777 /tmp/profile_default/startup echo 'print("stealing your private secrets")' > /tmp/profile_default/startup/foo.py ``` User2: ``` cd /tmp ipython ``` User2 will see: ``` Python 3.9.7 (default, Oct 25 2021, 01:04:21) Type 'copyright', 'credits' or 'license' for more information IPython 7.29.0 -- An enhanced Interactive Python. Type '?' for help. stealing your private secrets ``` ## Patched release and documentation See https://ipython.readthedocs.io/en/stable/whatsnew/version8.html#ipython-8-0-1-cve-2022-21699, Version 8.0.1, 7.31.1 for current Python version are recommended. Version 7.16.3 has also been published for Python 3.6 users, Version 5.11 (source only, 5.x branch on github) for older Python versions. --- ### Release Notes
ipython/ipython ### [`v8.0.1`](https://togithub.com/ipython/ipython/compare/8.0.0...8.0.1) [Compare Source](https://togithub.com/ipython/ipython/compare/8.0.0...8.0.1)
--- ### Configuration 📅 **Schedule**: "" (UTC). 🚦 **Automerge**: Disabled by config. Please merge this manually once you are satisfied. ♻ **Rebasing**: Renovate will not automatically rebase this PR, because other commits have been found. 🔕 **Ignore**: Close this PR and you won't be reminded about this update again. --- - [ ] If you want to rebase/retry this PR, click this checkbox. --- This PR has been generated by [WhiteSource Renovate](https://renovate.whitesourcesoftware.com). View repository job log [here](https://app.renovatebot.com/dashboard#github/googleapis/python-bigquery). --- packages/google-cloud-bigquery/samples/magics/requirements.txt | 2 +- .../google-cloud-bigquery/samples/snippets/requirements.txt | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/packages/google-cloud-bigquery/samples/magics/requirements.txt b/packages/google-cloud-bigquery/samples/magics/requirements.txt index b47dcbc4fd2a..72e9027d2b1e 100644 --- a/packages/google-cloud-bigquery/samples/magics/requirements.txt +++ b/packages/google-cloud-bigquery/samples/magics/requirements.txt @@ -3,7 +3,7 @@ google-auth-oauthlib==0.4.6 grpcio==1.43.0 ipython==7.16.1; python_version < '3.7' ipython==7.29.0; python_version == '3.7' -ipython==8.0.0; python_version >= '3.8' +ipython==8.0.1; python_version >= '3.8' matplotlib==3.3.4; python_version < '3.7' matplotlib==3.5.1; python_version >= '3.7' pandas==1.1.5; python_version < '3.7' diff --git a/packages/google-cloud-bigquery/samples/snippets/requirements.txt b/packages/google-cloud-bigquery/samples/snippets/requirements.txt index b47dcbc4fd2a..72e9027d2b1e 100644 --- a/packages/google-cloud-bigquery/samples/snippets/requirements.txt +++ b/packages/google-cloud-bigquery/samples/snippets/requirements.txt @@ -3,7 +3,7 @@ google-auth-oauthlib==0.4.6 grpcio==1.43.0 ipython==7.16.1; python_version < '3.7' ipython==7.29.0; python_version == '3.7' -ipython==8.0.0; python_version >= '3.8' +ipython==8.0.1; python_version >= '3.8' matplotlib==3.3.4; python_version < '3.7' matplotlib==3.5.1; python_version >= '3.7' pandas==1.1.5; python_version < '3.7' From 6e645f5d17c09b4d6a35183e1553c800dd007105 Mon Sep 17 00:00:00 2001 From: WhiteSource Renovate Date: Wed, 2 Mar 2022 16:34:18 +0100 Subject: [PATCH 1389/2016] chore(deps): update dependency ipython [security] (#1154) --- .../google-cloud-bigquery/samples/magics/requirements.txt | 4 ++-- .../google-cloud-bigquery/samples/snippets/requirements.txt | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/packages/google-cloud-bigquery/samples/magics/requirements.txt b/packages/google-cloud-bigquery/samples/magics/requirements.txt index 72e9027d2b1e..bb6cf2cddc6d 100644 --- a/packages/google-cloud-bigquery/samples/magics/requirements.txt +++ b/packages/google-cloud-bigquery/samples/magics/requirements.txt @@ -1,8 +1,8 @@ google-cloud-bigquery-storage==2.10.1 google-auth-oauthlib==0.4.6 grpcio==1.43.0 -ipython==7.16.1; python_version < '3.7' -ipython==7.29.0; python_version == '3.7' +ipython==7.16.3; python_version < '3.7' +ipython==7.31.1; python_version == '3.7' ipython==8.0.1; python_version >= '3.8' matplotlib==3.3.4; python_version < '3.7' matplotlib==3.5.1; python_version >= '3.7' diff --git a/packages/google-cloud-bigquery/samples/snippets/requirements.txt b/packages/google-cloud-bigquery/samples/snippets/requirements.txt index 72e9027d2b1e..bb6cf2cddc6d 100644 --- a/packages/google-cloud-bigquery/samples/snippets/requirements.txt +++ b/packages/google-cloud-bigquery/samples/snippets/requirements.txt @@ -1,8 +1,8 @@ google-cloud-bigquery-storage==2.10.1 google-auth-oauthlib==0.4.6 grpcio==1.43.0 -ipython==7.16.1; python_version < '3.7' -ipython==7.29.0; python_version == '3.7' +ipython==7.16.3; python_version < '3.7' +ipython==7.31.1; python_version == '3.7' ipython==8.0.1; python_version >= '3.8' matplotlib==3.3.4; python_version < '3.7' matplotlib==3.5.1; python_version >= '3.7' From 00a128f5368873e1269de4b0f073b98baa996df0 Mon Sep 17 00:00:00 2001 From: "gcf-owl-bot[bot]" <78513119+gcf-owl-bot[bot]@users.noreply.github.com> Date: Wed, 2 Mar 2022 10:16:35 -0600 Subject: [PATCH 1390/2016] chore: update copyright year to 2022 (#1150) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * chore: update copyright year to 2022 PiperOrigin-RevId: 431037888 Source-Link: https://github.com/googleapis/googleapis/commit/b3397f5febbf21dfc69b875ddabaf76bee765058 Source-Link: https://github.com/googleapis/googleapis-gen/commit/510b54e1cdefd53173984df16645081308fe897e Copy-Tag: eyJwIjoiLmdpdGh1Yi8uT3dsQm90LnlhbWwiLCJoIjoiNTEwYjU0ZTFjZGVmZDUzMTczOTg0ZGYxNjY0NTA4MTMwOGZlODk3ZSJ9 * 🦉 Updates from OwlBot post-processor See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md * 🦉 Updates from OwlBot post-processor See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md Co-authored-by: Owl Bot Co-authored-by: Tim Swast --- .../google-cloud-bigquery/google/cloud/bigquery_v2/__init__.py | 2 +- .../google/cloud/bigquery_v2/types/__init__.py | 2 +- .../google/cloud/bigquery_v2/types/encryption_config.py | 2 +- .../google/cloud/bigquery_v2/types/model.py | 2 +- .../google/cloud/bigquery_v2/types/model_reference.py | 2 +- .../google/cloud/bigquery_v2/types/standard_sql.py | 2 +- .../google/cloud/bigquery_v2/types/table_reference.py | 2 +- packages/google-cloud-bigquery/tests/__init__.py | 2 +- packages/google-cloud-bigquery/tests/unit/__init__.py | 2 +- packages/google-cloud-bigquery/tests/unit/gapic/__init__.py | 2 +- 10 files changed, 10 insertions(+), 10 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery_v2/__init__.py b/packages/google-cloud-bigquery/google/cloud/bigquery_v2/__init__.py index f9957efa980f..bb11be3b3254 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery_v2/__init__.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery_v2/__init__.py @@ -1,5 +1,5 @@ # -*- coding: utf-8 -*- -# Copyright 2020 Google LLC +# Copyright 2022 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery_v2/types/__init__.py b/packages/google-cloud-bigquery/google/cloud/bigquery_v2/types/__init__.py index 83bbb3a54261..c038bcd74899 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery_v2/types/__init__.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery_v2/types/__init__.py @@ -1,5 +1,5 @@ # -*- coding: utf-8 -*- -# Copyright 2020 Google LLC +# Copyright 2022 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery_v2/types/encryption_config.py b/packages/google-cloud-bigquery/google/cloud/bigquery_v2/types/encryption_config.py index a95954a303aa..d300a417c7ca 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery_v2/types/encryption_config.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery_v2/types/encryption_config.py @@ -1,5 +1,5 @@ # -*- coding: utf-8 -*- -# Copyright 2020 Google LLC +# Copyright 2022 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery_v2/types/model.py b/packages/google-cloud-bigquery/google/cloud/bigquery_v2/types/model.py index 84188e40c6f2..f2c03171514d 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery_v2/types/model.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery_v2/types/model.py @@ -1,5 +1,5 @@ # -*- coding: utf-8 -*- -# Copyright 2020 Google LLC +# Copyright 2022 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery_v2/types/model_reference.py b/packages/google-cloud-bigquery/google/cloud/bigquery_v2/types/model_reference.py index 544377f61454..a4aab59f7227 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery_v2/types/model_reference.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery_v2/types/model_reference.py @@ -1,5 +1,5 @@ # -*- coding: utf-8 -*- -# Copyright 2020 Google LLC +# Copyright 2022 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery_v2/types/standard_sql.py b/packages/google-cloud-bigquery/google/cloud/bigquery_v2/types/standard_sql.py index e10619482193..579c3b138110 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery_v2/types/standard_sql.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery_v2/types/standard_sql.py @@ -1,5 +1,5 @@ # -*- coding: utf-8 -*- -# Copyright 2020 Google LLC +# Copyright 2022 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery_v2/types/table_reference.py b/packages/google-cloud-bigquery/google/cloud/bigquery_v2/types/table_reference.py index da206b4d7e95..aaab83e85ecb 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery_v2/types/table_reference.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery_v2/types/table_reference.py @@ -1,5 +1,5 @@ # -*- coding: utf-8 -*- -# Copyright 2020 Google LLC +# Copyright 2022 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/packages/google-cloud-bigquery/tests/__init__.py b/packages/google-cloud-bigquery/tests/__init__.py index 4de65971c238..e8e1c3845db5 100644 --- a/packages/google-cloud-bigquery/tests/__init__.py +++ b/packages/google-cloud-bigquery/tests/__init__.py @@ -1,5 +1,5 @@ # -*- coding: utf-8 -*- -# Copyright 2020 Google LLC +# Copyright 2022 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/packages/google-cloud-bigquery/tests/unit/__init__.py b/packages/google-cloud-bigquery/tests/unit/__init__.py index 4de65971c238..e8e1c3845db5 100644 --- a/packages/google-cloud-bigquery/tests/unit/__init__.py +++ b/packages/google-cloud-bigquery/tests/unit/__init__.py @@ -1,5 +1,5 @@ # -*- coding: utf-8 -*- -# Copyright 2020 Google LLC +# Copyright 2022 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/packages/google-cloud-bigquery/tests/unit/gapic/__init__.py b/packages/google-cloud-bigquery/tests/unit/gapic/__init__.py index 4de65971c238..e8e1c3845db5 100644 --- a/packages/google-cloud-bigquery/tests/unit/gapic/__init__.py +++ b/packages/google-cloud-bigquery/tests/unit/gapic/__init__.py @@ -1,5 +1,5 @@ # -*- coding: utf-8 -*- -# Copyright 2020 Google LLC +# Copyright 2022 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. From fd127019ab49583ff676c9e71aa92f7aece609d5 Mon Sep 17 00:00:00 2001 From: Bu Sun Kim <8822365+busunkim96@users.noreply.github.com> Date: Wed, 2 Mar 2022 10:01:40 -0700 Subject: [PATCH 1391/2016] samples: remove browser option from user credentials sample (#1147) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * samples: remove run_console from user credentials sample * Update samples/snippets/user_credentials.py Co-authored-by: Tim Swast * 🦉 Updates from OwlBot post-processor See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md Co-authored-by: Tim Swast Co-authored-by: Owl Bot --- .../samples/snippets/user_credentials.py | 22 ++++++++----------- .../samples/snippets/user_credentials_test.py | 2 +- 2 files changed, 10 insertions(+), 14 deletions(-) diff --git a/packages/google-cloud-bigquery/samples/snippets/user_credentials.py b/packages/google-cloud-bigquery/samples/snippets/user_credentials.py index 6089d9fd9da1..e8dccf143cb0 100644 --- a/packages/google-cloud-bigquery/samples/snippets/user_credentials.py +++ b/packages/google-cloud-bigquery/samples/snippets/user_credentials.py @@ -23,26 +23,22 @@ import argparse -def main(project, launch_browser=True): +def main(project): # [START bigquery_auth_user_flow] from google_auth_oauthlib import flow - # TODO: Uncomment the line below to set the `launch_browser` variable. - # launch_browser = True - # - # The `launch_browser` boolean variable indicates if a local server is used - # as the callback URL in the auth flow. A value of `True` is recommended, - # but a local server does not work if accessing the application remotely, - # such as over SSH or from a remote Jupyter notebook. - + # A local server is used as the callback URL in the auth flow. appflow = flow.InstalledAppFlow.from_client_secrets_file( "client_secrets.json", scopes=["https://www.googleapis.com/auth/bigquery"] ) - if launch_browser: - appflow.run_local_server() - else: - appflow.run_console() + # This launches a local server to be used as the callback URL in the desktop + # app auth flow. If you are accessing the application remotely, such as over + # SSH or a remote Jupyter notebook, this flow will not work. Use the + # `gcloud auth application-default login --no-browser` command or workload + # identity federation to get authentication tokens, instead. + # + appflow.run_local_server() credentials = appflow.credentials # [END bigquery_auth_user_flow] diff --git a/packages/google-cloud-bigquery/samples/snippets/user_credentials_test.py b/packages/google-cloud-bigquery/samples/snippets/user_credentials_test.py index 829502d25fc7..66c1bddb7f2d 100644 --- a/packages/google-cloud-bigquery/samples/snippets/user_credentials_test.py +++ b/packages/google-cloud-bigquery/samples/snippets/user_credentials_test.py @@ -35,7 +35,7 @@ def mock_flow(): def test_auth_query_console(mock_flow, capsys): - main(PROJECT, launch_browser=False) + main(PROJECT) out, _ = capsys.readouterr() # Fun fact: William P. Wood was the 1st director of the US Secret Service. assert "William" in out From 834eee1acfda2b75c4008ae5d67b7d794fa4b741 Mon Sep 17 00:00:00 2001 From: WhiteSource Renovate Date: Wed, 2 Mar 2022 22:19:58 +0100 Subject: [PATCH 1392/2016] deps: allow pyarrow 7.0 (#1112) * chore(deps): update all dependencies * Apply suggestions from code review * Apply suggestions from code review Co-authored-by: Tim Swast --- .../samples/geography/requirements-test.txt | 2 +- .../samples/geography/requirements.txt | 39 ++++++++++--------- .../samples/magics/requirements-test.txt | 2 +- .../samples/magics/requirements.txt | 16 ++++---- .../samples/snippets/requirements-test.txt | 2 +- .../samples/snippets/requirements.txt | 16 ++++---- packages/google-cloud-bigquery/setup.py | 2 +- 7 files changed, 42 insertions(+), 37 deletions(-) diff --git a/packages/google-cloud-bigquery/samples/geography/requirements-test.txt b/packages/google-cloud-bigquery/samples/geography/requirements-test.txt index 5d836a5c5184..4bd417eba1fa 100644 --- a/packages/google-cloud-bigquery/samples/geography/requirements-test.txt +++ b/packages/google-cloud-bigquery/samples/geography/requirements-test.txt @@ -1,2 +1,2 @@ -pytest==6.2.5 +pytest==7.0.1 mock==4.0.3 diff --git a/packages/google-cloud-bigquery/samples/geography/requirements.txt b/packages/google-cloud-bigquery/samples/geography/requirements.txt index b07ba50bf7a4..710f4de0afb0 100644 --- a/packages/google-cloud-bigquery/samples/geography/requirements.txt +++ b/packages/google-cloud-bigquery/samples/geography/requirements.txt @@ -1,45 +1,46 @@ attrs==21.4.0 certifi==2021.10.8 cffi==1.15.0 -charset-normalizer==2.0.10 -click==8.0.3 +charset-normalizer==2.0.12 +click==8.0.4 click-plugins==1.1.1 cligj==0.7.2 dataclasses==0.8; python_version < '3.7' -Fiona==1.8.20 +Fiona==1.8.21 geojson==2.5.0 geopandas==0.9.0; python_version < '3.7' geopandas==0.10.2; python_version >= '3.7' -google-api-core==2.4.0 -google-auth==2.3.3 -google-cloud-bigquery==2.31.0 -google-cloud-bigquery-storage==2.10.1 -google-cloud-core==2.2.1 +google-api-core==2.5.0 +google-auth==2.6.0 +google-cloud-bigquery==2.34.0 +google-cloud-bigquery-storage==2.12.0 +google-cloud-core==2.2.2 google-crc32c==1.3.0 -google-resumable-media==2.1.0 -googleapis-common-protos==1.54.0 -grpcio==1.43.0 +google-resumable-media==2.3.0 +googleapis-common-protos==1.55.0 +grpcio==1.44.0 idna==3.3 -libcst==0.4.0 +libcst==0.4.1 munch==2.5.0 mypy-extensions==0.4.3 packaging==21.3 pandas==1.1.5; python_version < '3.7' -pandas==1.3.5; python_version >= '3.7' -proto-plus==1.19.8 -protobuf==3.19.3 -pyarrow==6.0.1 +pandas==1.3.5; python_version == '3.7' +pandas==1.4.1; python_version >= '3.8' +proto-plus==1.20.3 +protobuf==3.19.4 +pyarrow==7.0.0 pyasn1==0.4.8 pyasn1-modules==0.2.8 pycparser==2.21 -pyparsing==3.0.6 +pyparsing==3.0.7 python-dateutil==2.8.2 pytz==2021.3 PyYAML==6.0 requests==2.27.1 rsa==4.8 -Shapely==1.8.0 +Shapely==1.8.1.post1 six==1.16.0 -typing-extensions==4.0.1 +typing-extensions==4.1.1 typing-inspect==0.7.1 urllib3==1.26.8 diff --git a/packages/google-cloud-bigquery/samples/magics/requirements-test.txt b/packages/google-cloud-bigquery/samples/magics/requirements-test.txt index 5b73f1fd5d67..bafc3de2ae2c 100644 --- a/packages/google-cloud-bigquery/samples/magics/requirements-test.txt +++ b/packages/google-cloud-bigquery/samples/magics/requirements-test.txt @@ -1,3 +1,3 @@ google-cloud-testutils==1.3.1 -pytest==6.2.5 +pytest==7.0.1 mock==4.0.3 diff --git a/packages/google-cloud-bigquery/samples/magics/requirements.txt b/packages/google-cloud-bigquery/samples/magics/requirements.txt index bb6cf2cddc6d..0ea9c5710fae 100644 --- a/packages/google-cloud-bigquery/samples/magics/requirements.txt +++ b/packages/google-cloud-bigquery/samples/magics/requirements.txt @@ -1,12 +1,14 @@ -google-cloud-bigquery-storage==2.10.1 -google-auth-oauthlib==0.4.6 -grpcio==1.43.0 +google-cloud-bigquery-storage==2.12.0 +google-auth-oauthlib==0.5.0 +grpcio==1.44.0 ipython==7.16.3; python_version < '3.7' ipython==7.31.1; python_version == '3.7' -ipython==8.0.1; python_version >= '3.8' -matplotlib==3.3.4; python_version < '3.7' +ipython==8.0.1; python_version == '3.8' +ipython==8.1.0; python_version >= '3.9' +matplotlib==3.5.1; python_version < '3.7' matplotlib==3.5.1; python_version >= '3.7' pandas==1.1.5; python_version < '3.7' -pandas==1.3.5; python_version >= '3.7' -pyarrow==6.0.1 +pandas==1.3.5; python_version == '3.7' +pandas==1.4.1; python_version >= '3.8' +pyarrow==7.0.0 pytz==2021.3 diff --git a/packages/google-cloud-bigquery/samples/snippets/requirements-test.txt b/packages/google-cloud-bigquery/samples/snippets/requirements-test.txt index 5b73f1fd5d67..bafc3de2ae2c 100644 --- a/packages/google-cloud-bigquery/samples/snippets/requirements-test.txt +++ b/packages/google-cloud-bigquery/samples/snippets/requirements-test.txt @@ -1,3 +1,3 @@ google-cloud-testutils==1.3.1 -pytest==6.2.5 +pytest==7.0.1 mock==4.0.3 diff --git a/packages/google-cloud-bigquery/samples/snippets/requirements.txt b/packages/google-cloud-bigquery/samples/snippets/requirements.txt index bb6cf2cddc6d..0ea9c5710fae 100644 --- a/packages/google-cloud-bigquery/samples/snippets/requirements.txt +++ b/packages/google-cloud-bigquery/samples/snippets/requirements.txt @@ -1,12 +1,14 @@ -google-cloud-bigquery-storage==2.10.1 -google-auth-oauthlib==0.4.6 -grpcio==1.43.0 +google-cloud-bigquery-storage==2.12.0 +google-auth-oauthlib==0.5.0 +grpcio==1.44.0 ipython==7.16.3; python_version < '3.7' ipython==7.31.1; python_version == '3.7' -ipython==8.0.1; python_version >= '3.8' -matplotlib==3.3.4; python_version < '3.7' +ipython==8.0.1; python_version == '3.8' +ipython==8.1.0; python_version >= '3.9' +matplotlib==3.5.1; python_version < '3.7' matplotlib==3.5.1; python_version >= '3.7' pandas==1.1.5; python_version < '3.7' -pandas==1.3.5; python_version >= '3.7' -pyarrow==6.0.1 +pandas==1.3.5; python_version == '3.7' +pandas==1.4.1; python_version >= '3.8' +pyarrow==7.0.0 pytz==2021.3 diff --git a/packages/google-cloud-bigquery/setup.py b/packages/google-cloud-bigquery/setup.py index f6194f50e399..6a28d21c3d8f 100644 --- a/packages/google-cloud-bigquery/setup.py +++ b/packages/google-cloud-bigquery/setup.py @@ -28,7 +28,7 @@ # 'Development Status :: 4 - Beta' # 'Development Status :: 5 - Production/Stable' release_status = "Development Status :: 5 - Production/Stable" -pyarrow_dep = ["pyarrow >= 3.0.0, < 7.0dev"] +pyarrow_dep = ["pyarrow >=3.0.0, <8.0dev"] dependencies = [ "grpcio >= 1.38.1, < 2.0dev", # https://github.com/googleapis/python-bigquery/issues/695 # NOTE: Maintainers, please do not require google-api-core>=2.x.x From 3ecff3b0b40160023f711789c4e08b55d3d1a589 Mon Sep 17 00:00:00 2001 From: "release-please[bot]" <55107282+release-please[bot]@users.noreply.github.com> Date: Wed, 2 Mar 2022 21:46:22 +0000 Subject: [PATCH 1393/2016] chore(main): release 2.34.1 (#1153) :robot: I have created a release *beep* *boop* --- ### [2.34.1](https://github.com/googleapis/python-bigquery/compare/v2.34.0...v2.34.1) (2022-03-02) ### Dependencies * add "extra" for IPython, exclude bad IPython release ([#1151](https://github.com/googleapis/python-bigquery/issues/1151)) ([0fbe12d](https://github.com/googleapis/python-bigquery/commit/0fbe12d1ababa45aa774b026a93c0af9e8f343d9)) * allow pyarrow 7.0 ([#1112](https://github.com/googleapis/python-bigquery/issues/1112)) ([57f8ea9](https://github.com/googleapis/python-bigquery/commit/57f8ea95e152dc238e7a4941f96c54be53f7eaf3)) --- This PR was generated with [Release Please](https://github.com/googleapis/release-please). See [documentation](https://github.com/googleapis/release-please#release-please). --- packages/google-cloud-bigquery/CHANGELOG.md | 8 ++++++++ .../google/cloud/bigquery/version.py | 2 +- 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/CHANGELOG.md b/packages/google-cloud-bigquery/CHANGELOG.md index 2db3cf3b2c9d..48a1509462b1 100644 --- a/packages/google-cloud-bigquery/CHANGELOG.md +++ b/packages/google-cloud-bigquery/CHANGELOG.md @@ -5,6 +5,14 @@ [1]: https://pypi.org/project/google-cloud-bigquery/#history +### [2.34.1](https://github.com/googleapis/python-bigquery/compare/v2.34.0...v2.34.1) (2022-03-02) + + +### Dependencies + +* add "extra" for IPython, exclude bad IPython release ([#1151](https://github.com/googleapis/python-bigquery/issues/1151)) ([0fbe12d](https://github.com/googleapis/python-bigquery/commit/0fbe12d1ababa45aa774b026a93c0af9e8f343d9)) +* allow pyarrow 7.0 ([#1112](https://github.com/googleapis/python-bigquery/issues/1112)) ([57f8ea9](https://github.com/googleapis/python-bigquery/commit/57f8ea95e152dc238e7a4941f96c54be53f7eaf3)) + ## [2.34.0](https://github.com/googleapis/python-bigquery/compare/v2.33.0...v2.34.0) (2022-02-18) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/version.py b/packages/google-cloud-bigquery/google/cloud/bigquery/version.py index 328c4cdc296f..e1d00b4bf3b7 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/version.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/version.py @@ -12,4 +12,4 @@ # See the License for the specific language governing permissions and # limitations under the License. -__version__ = "2.34.0" +__version__ = "2.34.1" From 8c631868d5406cb2dddb4489049697ec1495bc5d Mon Sep 17 00:00:00 2001 From: Anthonios Partheniou Date: Thu, 3 Mar 2022 18:06:21 -0500 Subject: [PATCH 1394/2016] fix(deps): require google-api-core>=1.31.5, >=2.3.2 (#1157) fix(deps): require proto-plus>=1.15.0 --- packages/google-cloud-bigquery/setup.py | 4 ++-- packages/google-cloud-bigquery/testing/constraints-3.6.txt | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/packages/google-cloud-bigquery/setup.py b/packages/google-cloud-bigquery/setup.py index 6a28d21c3d8f..63cdf747c37e 100644 --- a/packages/google-cloud-bigquery/setup.py +++ b/packages/google-cloud-bigquery/setup.py @@ -34,8 +34,8 @@ # NOTE: Maintainers, please do not require google-api-core>=2.x.x # Until this issue is closed # https://github.com/googleapis/google-cloud-python/issues/10566 - "google-api-core[grpc] >= 1.29.0, <3.0.0dev", - "proto-plus >= 1.10.0", + "google-api-core[grpc] >= 1.31.5, <3.0.0dev,!=2.0.*,!=2.1.*,!=2.2.*,!=2.3.0", + "proto-plus >= 1.15.0", # NOTE: Maintainers, please do not require google-cloud-core>=2.x.x # Until this issue is closed # https://github.com/googleapis/google-cloud-python/issues/10566 diff --git a/packages/google-cloud-bigquery/testing/constraints-3.6.txt b/packages/google-cloud-bigquery/testing/constraints-3.6.txt index 7f52e441d2ae..0258515eba8a 100644 --- a/packages/google-cloud-bigquery/testing/constraints-3.6.txt +++ b/packages/google-cloud-bigquery/testing/constraints-3.6.txt @@ -6,7 +6,7 @@ # e.g., if setup.py has "foo >= 1.14.0, < 2.0.0dev", # Then this file should have foo==1.14.0 geopandas==0.9.0 -google-api-core==1.29.0 +google-api-core==1.31.5 google-cloud-bigquery-storage==2.0.0 google-cloud-core==1.4.1 google-resumable-media==0.6.0 @@ -16,7 +16,7 @@ opentelemetry-api==1.1.0 opentelemetry-instrumentation==0.20b0 opentelemetry-sdk==1.1.0 pandas==0.24.2 -proto-plus==1.10.0 +proto-plus==1.15.0 protobuf==3.12.0 pyarrow==3.0.0 python-dateutil==2.7.2 From aaba8eeb9dcaf69921e0e59dacb9cc5438741b1c Mon Sep 17 00:00:00 2001 From: "gcf-owl-bot[bot]" <78513119+gcf-owl-bot[bot]@users.noreply.github.com> Date: Fri, 4 Mar 2022 12:57:22 -0500 Subject: [PATCH 1395/2016] chore: Adding support for pytest-xdist and pytest-parallel (#1160) Source-Link: https://github.com/googleapis/synthtool/commit/82f5cb283efffe96e1b6cd634738e0e7de2cd90a Post-Processor: gcr.io/cloud-devrel-public-resources/owlbot-python:latest@sha256:5d8da01438ece4021d135433f2cf3227aa39ef0eaccc941d62aa35e6902832ae Co-authored-by: Owl Bot --- .../.github/.OwlBot.lock.yaml | 2 +- .../samples/geography/noxfile.py | 78 +++++++++++-------- .../samples/magics/noxfile.py | 78 +++++++++++-------- .../samples/snippets/noxfile.py | 78 +++++++++++-------- 4 files changed, 133 insertions(+), 103 deletions(-) diff --git a/packages/google-cloud-bigquery/.github/.OwlBot.lock.yaml b/packages/google-cloud-bigquery/.github/.OwlBot.lock.yaml index 8cb43804d999..7e08e05a380c 100644 --- a/packages/google-cloud-bigquery/.github/.OwlBot.lock.yaml +++ b/packages/google-cloud-bigquery/.github/.OwlBot.lock.yaml @@ -13,4 +13,4 @@ # limitations under the License. docker: image: gcr.io/cloud-devrel-public-resources/owlbot-python:latest - digest: sha256:ed1f9983d5a935a89fe8085e8bb97d94e41015252c5b6c9771257cf8624367e6 + digest: sha256:5d8da01438ece4021d135433f2cf3227aa39ef0eaccc941d62aa35e6902832ae diff --git a/packages/google-cloud-bigquery/samples/geography/noxfile.py b/packages/google-cloud-bigquery/samples/geography/noxfile.py index 20cdfc620138..85f5836dba3a 100644 --- a/packages/google-cloud-bigquery/samples/geography/noxfile.py +++ b/packages/google-cloud-bigquery/samples/geography/noxfile.py @@ -188,42 +188,52 @@ def _session_tests( # check for presence of tests test_list = glob.glob("*_test.py") + glob.glob("test_*.py") test_list.extend(glob.glob("tests")) + if len(test_list) == 0: print("No tests found, skipping directory.") - else: - if TEST_CONFIG["pip_version_override"]: - pip_version = TEST_CONFIG["pip_version_override"] - session.install(f"pip=={pip_version}") - """Runs py.test for a particular project.""" - if os.path.exists("requirements.txt"): - if os.path.exists("constraints.txt"): - session.install("-r", "requirements.txt", "-c", "constraints.txt") - else: - session.install("-r", "requirements.txt") - - if os.path.exists("requirements-test.txt"): - if os.path.exists("constraints-test.txt"): - session.install( - "-r", "requirements-test.txt", "-c", "constraints-test.txt" - ) - else: - session.install("-r", "requirements-test.txt") - - if INSTALL_LIBRARY_FROM_SOURCE: - session.install("-e", _get_repo_root()) - - if post_install: - post_install(session) - - session.run( - "pytest", - *(PYTEST_COMMON_ARGS + session.posargs), - # Pytest will return 5 when no tests are collected. This can happen - # on travis where slow and flaky tests are excluded. - # See http://doc.pytest.org/en/latest/_modules/_pytest/main.html - success_codes=[0, 5], - env=get_pytest_env_vars(), - ) + return + + if TEST_CONFIG["pip_version_override"]: + pip_version = TEST_CONFIG["pip_version_override"] + session.install(f"pip=={pip_version}") + """Runs py.test for a particular project.""" + concurrent_args = [] + if os.path.exists("requirements.txt"): + if os.path.exists("constraints.txt"): + session.install("-r", "requirements.txt", "-c", "constraints.txt") + else: + session.install("-r", "requirements.txt") + with open("requirements.txt") as rfile: + packages = rfile.read() + + if os.path.exists("requirements-test.txt"): + if os.path.exists("constraints-test.txt"): + session.install("-r", "requirements-test.txt", "-c", "constraints-test.txt") + else: + session.install("-r", "requirements-test.txt") + with open("requirements-test.txt") as rtfile: + packages += rtfile.read() + + if INSTALL_LIBRARY_FROM_SOURCE: + session.install("-e", _get_repo_root()) + + if post_install: + post_install(session) + + if "pytest-parallel" in packages: + concurrent_args.extend(["--workers", "auto", "--tests-per-worker", "auto"]) + elif "pytest-xdist" in packages: + concurrent_args.extend(["-n", "auto"]) + + session.run( + "pytest", + *(PYTEST_COMMON_ARGS + session.posargs + concurrent_args), + # Pytest will return 5 when no tests are collected. This can happen + # on travis where slow and flaky tests are excluded. + # See http://doc.pytest.org/en/latest/_modules/_pytest/main.html + success_codes=[0, 5], + env=get_pytest_env_vars(), + ) @nox.session(python=ALL_VERSIONS) diff --git a/packages/google-cloud-bigquery/samples/magics/noxfile.py b/packages/google-cloud-bigquery/samples/magics/noxfile.py index 20cdfc620138..85f5836dba3a 100644 --- a/packages/google-cloud-bigquery/samples/magics/noxfile.py +++ b/packages/google-cloud-bigquery/samples/magics/noxfile.py @@ -188,42 +188,52 @@ def _session_tests( # check for presence of tests test_list = glob.glob("*_test.py") + glob.glob("test_*.py") test_list.extend(glob.glob("tests")) + if len(test_list) == 0: print("No tests found, skipping directory.") - else: - if TEST_CONFIG["pip_version_override"]: - pip_version = TEST_CONFIG["pip_version_override"] - session.install(f"pip=={pip_version}") - """Runs py.test for a particular project.""" - if os.path.exists("requirements.txt"): - if os.path.exists("constraints.txt"): - session.install("-r", "requirements.txt", "-c", "constraints.txt") - else: - session.install("-r", "requirements.txt") - - if os.path.exists("requirements-test.txt"): - if os.path.exists("constraints-test.txt"): - session.install( - "-r", "requirements-test.txt", "-c", "constraints-test.txt" - ) - else: - session.install("-r", "requirements-test.txt") - - if INSTALL_LIBRARY_FROM_SOURCE: - session.install("-e", _get_repo_root()) - - if post_install: - post_install(session) - - session.run( - "pytest", - *(PYTEST_COMMON_ARGS + session.posargs), - # Pytest will return 5 when no tests are collected. This can happen - # on travis where slow and flaky tests are excluded. - # See http://doc.pytest.org/en/latest/_modules/_pytest/main.html - success_codes=[0, 5], - env=get_pytest_env_vars(), - ) + return + + if TEST_CONFIG["pip_version_override"]: + pip_version = TEST_CONFIG["pip_version_override"] + session.install(f"pip=={pip_version}") + """Runs py.test for a particular project.""" + concurrent_args = [] + if os.path.exists("requirements.txt"): + if os.path.exists("constraints.txt"): + session.install("-r", "requirements.txt", "-c", "constraints.txt") + else: + session.install("-r", "requirements.txt") + with open("requirements.txt") as rfile: + packages = rfile.read() + + if os.path.exists("requirements-test.txt"): + if os.path.exists("constraints-test.txt"): + session.install("-r", "requirements-test.txt", "-c", "constraints-test.txt") + else: + session.install("-r", "requirements-test.txt") + with open("requirements-test.txt") as rtfile: + packages += rtfile.read() + + if INSTALL_LIBRARY_FROM_SOURCE: + session.install("-e", _get_repo_root()) + + if post_install: + post_install(session) + + if "pytest-parallel" in packages: + concurrent_args.extend(["--workers", "auto", "--tests-per-worker", "auto"]) + elif "pytest-xdist" in packages: + concurrent_args.extend(["-n", "auto"]) + + session.run( + "pytest", + *(PYTEST_COMMON_ARGS + session.posargs + concurrent_args), + # Pytest will return 5 when no tests are collected. This can happen + # on travis where slow and flaky tests are excluded. + # See http://doc.pytest.org/en/latest/_modules/_pytest/main.html + success_codes=[0, 5], + env=get_pytest_env_vars(), + ) @nox.session(python=ALL_VERSIONS) diff --git a/packages/google-cloud-bigquery/samples/snippets/noxfile.py b/packages/google-cloud-bigquery/samples/snippets/noxfile.py index 20cdfc620138..85f5836dba3a 100644 --- a/packages/google-cloud-bigquery/samples/snippets/noxfile.py +++ b/packages/google-cloud-bigquery/samples/snippets/noxfile.py @@ -188,42 +188,52 @@ def _session_tests( # check for presence of tests test_list = glob.glob("*_test.py") + glob.glob("test_*.py") test_list.extend(glob.glob("tests")) + if len(test_list) == 0: print("No tests found, skipping directory.") - else: - if TEST_CONFIG["pip_version_override"]: - pip_version = TEST_CONFIG["pip_version_override"] - session.install(f"pip=={pip_version}") - """Runs py.test for a particular project.""" - if os.path.exists("requirements.txt"): - if os.path.exists("constraints.txt"): - session.install("-r", "requirements.txt", "-c", "constraints.txt") - else: - session.install("-r", "requirements.txt") - - if os.path.exists("requirements-test.txt"): - if os.path.exists("constraints-test.txt"): - session.install( - "-r", "requirements-test.txt", "-c", "constraints-test.txt" - ) - else: - session.install("-r", "requirements-test.txt") - - if INSTALL_LIBRARY_FROM_SOURCE: - session.install("-e", _get_repo_root()) - - if post_install: - post_install(session) - - session.run( - "pytest", - *(PYTEST_COMMON_ARGS + session.posargs), - # Pytest will return 5 when no tests are collected. This can happen - # on travis where slow and flaky tests are excluded. - # See http://doc.pytest.org/en/latest/_modules/_pytest/main.html - success_codes=[0, 5], - env=get_pytest_env_vars(), - ) + return + + if TEST_CONFIG["pip_version_override"]: + pip_version = TEST_CONFIG["pip_version_override"] + session.install(f"pip=={pip_version}") + """Runs py.test for a particular project.""" + concurrent_args = [] + if os.path.exists("requirements.txt"): + if os.path.exists("constraints.txt"): + session.install("-r", "requirements.txt", "-c", "constraints.txt") + else: + session.install("-r", "requirements.txt") + with open("requirements.txt") as rfile: + packages = rfile.read() + + if os.path.exists("requirements-test.txt"): + if os.path.exists("constraints-test.txt"): + session.install("-r", "requirements-test.txt", "-c", "constraints-test.txt") + else: + session.install("-r", "requirements-test.txt") + with open("requirements-test.txt") as rtfile: + packages += rtfile.read() + + if INSTALL_LIBRARY_FROM_SOURCE: + session.install("-e", _get_repo_root()) + + if post_install: + post_install(session) + + if "pytest-parallel" in packages: + concurrent_args.extend(["--workers", "auto", "--tests-per-worker", "auto"]) + elif "pytest-xdist" in packages: + concurrent_args.extend(["-n", "auto"]) + + session.run( + "pytest", + *(PYTEST_COMMON_ARGS + session.posargs + concurrent_args), + # Pytest will return 5 when no tests are collected. This can happen + # on travis where slow and flaky tests are excluded. + # See http://doc.pytest.org/en/latest/_modules/_pytest/main.html + success_codes=[0, 5], + env=get_pytest_env_vars(), + ) @nox.session(python=ALL_VERSIONS) From af1ba0d08acf6c1c5b01569fb19e49d0737d8dcf Mon Sep 17 00:00:00 2001 From: WhiteSource Renovate Date: Sat, 5 Mar 2022 17:57:25 +0100 Subject: [PATCH 1396/2016] chore(deps): update all dependencies (#1156) * chore(deps): update all dependencies * revert ipython changes for python_version < 3.9 * revert changes * revert ipython changes for python < 3.9 * revert changes * revert changes Co-authored-by: Anthonios Partheniou --- .../samples/geography/requirements.txt | 6 +++--- .../google-cloud-bigquery/samples/magics/requirements.txt | 2 +- .../google-cloud-bigquery/samples/snippets/requirements.txt | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/packages/google-cloud-bigquery/samples/geography/requirements.txt b/packages/google-cloud-bigquery/samples/geography/requirements.txt index 710f4de0afb0..4c566cfb3b6d 100644 --- a/packages/google-cloud-bigquery/samples/geography/requirements.txt +++ b/packages/google-cloud-bigquery/samples/geography/requirements.txt @@ -10,13 +10,13 @@ Fiona==1.8.21 geojson==2.5.0 geopandas==0.9.0; python_version < '3.7' geopandas==0.10.2; python_version >= '3.7' -google-api-core==2.5.0 +google-api-core==2.6.0 google-auth==2.6.0 -google-cloud-bigquery==2.34.0 +google-cloud-bigquery==2.34.1 google-cloud-bigquery-storage==2.12.0 google-cloud-core==2.2.2 google-crc32c==1.3.0 -google-resumable-media==2.3.0 +google-resumable-media==2.3.1 googleapis-common-protos==1.55.0 grpcio==1.44.0 idna==3.3 diff --git a/packages/google-cloud-bigquery/samples/magics/requirements.txt b/packages/google-cloud-bigquery/samples/magics/requirements.txt index 0ea9c5710fae..ba873ed3a458 100644 --- a/packages/google-cloud-bigquery/samples/magics/requirements.txt +++ b/packages/google-cloud-bigquery/samples/magics/requirements.txt @@ -4,7 +4,7 @@ grpcio==1.44.0 ipython==7.16.3; python_version < '3.7' ipython==7.31.1; python_version == '3.7' ipython==8.0.1; python_version == '3.8' -ipython==8.1.0; python_version >= '3.9' +ipython==8.1.1; python_version >= '3.9' matplotlib==3.5.1; python_version < '3.7' matplotlib==3.5.1; python_version >= '3.7' pandas==1.1.5; python_version < '3.7' diff --git a/packages/google-cloud-bigquery/samples/snippets/requirements.txt b/packages/google-cloud-bigquery/samples/snippets/requirements.txt index 0ea9c5710fae..ba873ed3a458 100644 --- a/packages/google-cloud-bigquery/samples/snippets/requirements.txt +++ b/packages/google-cloud-bigquery/samples/snippets/requirements.txt @@ -4,7 +4,7 @@ grpcio==1.44.0 ipython==7.16.3; python_version < '3.7' ipython==7.31.1; python_version == '3.7' ipython==8.0.1; python_version == '3.8' -ipython==8.1.0; python_version >= '3.9' +ipython==8.1.1; python_version >= '3.9' matplotlib==3.5.1; python_version < '3.7' matplotlib==3.5.1; python_version >= '3.7' pandas==1.1.5; python_version < '3.7' From cfdb21cd6407162bda126380382134f76350596e Mon Sep 17 00:00:00 2001 From: "release-please[bot]" <55107282+release-please[bot]@users.noreply.github.com> Date: Mon, 7 Mar 2022 08:46:58 -0600 Subject: [PATCH 1397/2016] chore(main): release 2.34.2 (#1158) Co-authored-by: release-please[bot] <55107282+release-please[bot]@users.noreply.github.com> --- packages/google-cloud-bigquery/CHANGELOG.md | 8 ++++++++ .../google/cloud/bigquery/version.py | 2 +- 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/CHANGELOG.md b/packages/google-cloud-bigquery/CHANGELOG.md index 48a1509462b1..8fff7ddb099d 100644 --- a/packages/google-cloud-bigquery/CHANGELOG.md +++ b/packages/google-cloud-bigquery/CHANGELOG.md @@ -5,6 +5,14 @@ [1]: https://pypi.org/project/google-cloud-bigquery/#history +### [2.34.2](https://github.com/googleapis/python-bigquery/compare/v2.34.1...v2.34.2) (2022-03-05) + + +### Bug Fixes + +* **deps:** require google-api-core>=1.31.5, >=2.3.2 ([#1157](https://github.com/googleapis/python-bigquery/issues/1157)) ([0c15790](https://github.com/googleapis/python-bigquery/commit/0c15790720ff573a501cfe760dd74ee166e1a353)) +* **deps:** require proto-plus>=1.15.0 ([0c15790](https://github.com/googleapis/python-bigquery/commit/0c15790720ff573a501cfe760dd74ee166e1a353)) + ### [2.34.1](https://github.com/googleapis/python-bigquery/compare/v2.34.0...v2.34.1) (2022-03-02) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/version.py b/packages/google-cloud-bigquery/google/cloud/bigquery/version.py index e1d00b4bf3b7..66368e2cd175 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/version.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/version.py @@ -12,4 +12,4 @@ # See the License for the specific language governing permissions and # limitations under the License. -__version__ = "2.34.1" +__version__ = "2.34.2" From 3098c6ff3edcb83f1635bac0421dd62b4a7f1b93 Mon Sep 17 00:00:00 2001 From: WhiteSource Renovate Date: Tue, 8 Mar 2022 02:39:22 +0100 Subject: [PATCH 1398/2016] chore(deps): update all dependencies (#1162) * chore(deps): update all dependencies * remove py36 pin * remove py36 pin * remove py36 pin Co-authored-by: Anthonios Partheniou --- .../google-cloud-bigquery/samples/geography/requirements.txt | 5 +---- .../google-cloud-bigquery/samples/magics/requirements.txt | 5 +---- .../google-cloud-bigquery/samples/snippets/requirements.txt | 5 +---- 3 files changed, 3 insertions(+), 12 deletions(-) diff --git a/packages/google-cloud-bigquery/samples/geography/requirements.txt b/packages/google-cloud-bigquery/samples/geography/requirements.txt index 4c566cfb3b6d..a31643f6fe42 100644 --- a/packages/google-cloud-bigquery/samples/geography/requirements.txt +++ b/packages/google-cloud-bigquery/samples/geography/requirements.txt @@ -5,11 +5,9 @@ charset-normalizer==2.0.12 click==8.0.4 click-plugins==1.1.1 cligj==0.7.2 -dataclasses==0.8; python_version < '3.7' Fiona==1.8.21 geojson==2.5.0 -geopandas==0.9.0; python_version < '3.7' -geopandas==0.10.2; python_version >= '3.7' +geopandas==0.10.2 google-api-core==2.6.0 google-auth==2.6.0 google-cloud-bigquery==2.34.1 @@ -24,7 +22,6 @@ libcst==0.4.1 munch==2.5.0 mypy-extensions==0.4.3 packaging==21.3 -pandas==1.1.5; python_version < '3.7' pandas==1.3.5; python_version == '3.7' pandas==1.4.1; python_version >= '3.8' proto-plus==1.20.3 diff --git a/packages/google-cloud-bigquery/samples/magics/requirements.txt b/packages/google-cloud-bigquery/samples/magics/requirements.txt index ba873ed3a458..f047c46b6719 100644 --- a/packages/google-cloud-bigquery/samples/magics/requirements.txt +++ b/packages/google-cloud-bigquery/samples/magics/requirements.txt @@ -1,13 +1,10 @@ google-cloud-bigquery-storage==2.12.0 google-auth-oauthlib==0.5.0 grpcio==1.44.0 -ipython==7.16.3; python_version < '3.7' ipython==7.31.1; python_version == '3.7' ipython==8.0.1; python_version == '3.8' ipython==8.1.1; python_version >= '3.9' -matplotlib==3.5.1; python_version < '3.7' -matplotlib==3.5.1; python_version >= '3.7' -pandas==1.1.5; python_version < '3.7' +matplotlib==3.5.1 pandas==1.3.5; python_version == '3.7' pandas==1.4.1; python_version >= '3.8' pyarrow==7.0.0 diff --git a/packages/google-cloud-bigquery/samples/snippets/requirements.txt b/packages/google-cloud-bigquery/samples/snippets/requirements.txt index ba873ed3a458..f047c46b6719 100644 --- a/packages/google-cloud-bigquery/samples/snippets/requirements.txt +++ b/packages/google-cloud-bigquery/samples/snippets/requirements.txt @@ -1,13 +1,10 @@ google-cloud-bigquery-storage==2.12.0 google-auth-oauthlib==0.5.0 grpcio==1.44.0 -ipython==7.16.3; python_version < '3.7' ipython==7.31.1; python_version == '3.7' ipython==8.0.1; python_version == '3.8' ipython==8.1.1; python_version >= '3.9' -matplotlib==3.5.1; python_version < '3.7' -matplotlib==3.5.1; python_version >= '3.7' -pandas==1.1.5; python_version < '3.7' +matplotlib==3.5.1 pandas==1.3.5; python_version == '3.7' pandas==1.4.1; python_version >= '3.8' pyarrow==7.0.0 From bf04693ff7f962f801d6b7054d88f8d2efa5712b Mon Sep 17 00:00:00 2001 From: WhiteSource Renovate Date: Tue, 8 Mar 2022 18:29:28 +0100 Subject: [PATCH 1399/2016] chore(deps): update all dependencies (#1163) * chore(deps): update all dependencies * revert * revert * fix typo * revert Co-authored-by: Anthonios Partheniou --- .../samples/geography/requirements.txt | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/packages/google-cloud-bigquery/samples/geography/requirements.txt b/packages/google-cloud-bigquery/samples/geography/requirements.txt index a31643f6fe42..41f3849ce997 100644 --- a/packages/google-cloud-bigquery/samples/geography/requirements.txt +++ b/packages/google-cloud-bigquery/samples/geography/requirements.txt @@ -8,11 +8,11 @@ cligj==0.7.2 Fiona==1.8.21 geojson==2.5.0 geopandas==0.10.2 -google-api-core==2.6.0 +google-api-core==2.6.1 google-auth==2.6.0 -google-cloud-bigquery==2.34.1 +google-cloud-bigquery==2.34.2 google-cloud-bigquery-storage==2.12.0 -google-cloud-core==2.2.2 +google-cloud-core==2.2.3 google-crc32c==1.3.0 google-resumable-media==2.3.1 googleapis-common-protos==1.55.0 From 0cf5dc053f8547e201a2fe063a693d327ee77b5d Mon Sep 17 00:00:00 2001 From: Steffany Brown <30247553+steffnay@users.noreply.github.com> Date: Fri, 18 Mar 2022 11:03:42 -0700 Subject: [PATCH 1400/2016] fix: update content-type header (#1171) * update content-type header * fix: update content-type header --- packages/google-cloud-bigquery/google/cloud/bigquery/client.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py index a5f3d5419d7e..1612730941df 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py @@ -4127,7 +4127,7 @@ def _get_upload_headers(user_agent): "Accept": "application/json", "Accept-Encoding": "gzip, deflate", "User-Agent": user_agent, - "content-type": "application/json", + "content-type": "application/json; charset=UTF-8", } From f3cf67238a6dca7946d39b7be37b637e5bfb8c5c Mon Sep 17 00:00:00 2001 From: Walt Askew Date: Tue, 22 Mar 2022 09:54:41 -0700 Subject: [PATCH 1401/2016] test: Move tests/unit/helpers to tests/unit/_helpers (#1169) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Remove ambiguity between importing tests/unit/helpers and tests/unit/helpers.py which can lead to import errors. Renamed directory _helpers matches the google.cloud.bigquery._helpers module being tested. Fixes #1168 🦕 --- packages/google-cloud-bigquery/tests/unit/_helpers/__init__.py | 0 .../tests/unit/{helpers => _helpers}/test_from_json.py | 0 2 files changed, 0 insertions(+), 0 deletions(-) create mode 100644 packages/google-cloud-bigquery/tests/unit/_helpers/__init__.py rename packages/google-cloud-bigquery/tests/unit/{helpers => _helpers}/test_from_json.py (100%) diff --git a/packages/google-cloud-bigquery/tests/unit/_helpers/__init__.py b/packages/google-cloud-bigquery/tests/unit/_helpers/__init__.py new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/packages/google-cloud-bigquery/tests/unit/helpers/test_from_json.py b/packages/google-cloud-bigquery/tests/unit/_helpers/test_from_json.py similarity index 100% rename from packages/google-cloud-bigquery/tests/unit/helpers/test_from_json.py rename to packages/google-cloud-bigquery/tests/unit/_helpers/test_from_json.py From 3367f1f331e73b923e2fbe75edd50327750a41ce Mon Sep 17 00:00:00 2001 From: Steffany Brown <30247553+steffnay@users.noreply.github.com> Date: Tue, 29 Mar 2022 07:27:41 -0700 Subject: [PATCH 1402/2016] fix: upgrade black (#1177) * fix: upgrade black * upgrade black --- packages/google-cloud-bigquery/docs/conf.py | 5 +- .../google/cloud/bigquery/_pandas_helpers.py | 5 +- .../google/cloud/bigquery/_tqdm_helpers.py | 4 +- .../google/cloud/bigquery/client.py | 11 +- .../google/cloud/bigquery/dataset.py | 2 +- .../google/cloud/bigquery/dbapi/_helpers.py | 6 +- .../google/cloud/bigquery/external_config.py | 9 +- .../google/cloud/bigquery/job/query.py | 3 +- .../google/cloud/bigquery/magics/magics.py | 7 +- .../cloud/bigquery/opentelemetry_tracing.py | 44 +- .../google/cloud/bigquery/query.py | 3 +- .../google/cloud/bigquery/schema.py | 3 +- .../google/cloud/bigquery/table.py | 11 +- .../bigquery_v2/types/encryption_config.py | 9 +- .../google/cloud/bigquery_v2/types/model.py | 783 ++++++++++++++---- .../bigquery_v2/types/model_reference.py | 20 +- .../cloud/bigquery_v2/types/standard_sql.py | 39 +- .../bigquery_v2/types/table_reference.py | 35 +- packages/google-cloud-bigquery/noxfile.py | 7 +- .../samples/geography/noxfile.py | 4 +- .../samples/load_table_dataframe.py | 10 +- .../samples/load_table_file.py | 4 +- .../samples/load_table_uri_parquet.py | 4 +- .../samples/magics/noxfile.py | 4 +- .../snippets/authenticate_service_account.py | 8 +- ...te_table_external_hive_partitioned_test.py | 6 +- .../samples/snippets/delete_job_test.py | 3 +- .../samples/snippets/manage_job_cancel.py | 4 +- .../samples/snippets/manage_job_get.py | 4 +- .../samples/snippets/noxfile.py | 4 +- .../samples/snippets/update_dataset_access.py | 6 +- .../samples/table_insert_rows.py | 4 +- ...le_insert_rows_explicit_none_insert_ids.py | 4 +- .../samples/tests/test_client_query.py | 4 +- .../samples/tests/test_client_query_batch.py | 4 +- .../tests/test_client_query_dry_run.py | 4 +- .../tests/test_client_query_legacy_sql.py | 4 +- .../tests/test_client_query_w_array_params.py | 4 +- .../tests/test_client_query_w_named_params.py | 4 +- .../test_client_query_w_positional_params.py | 4 +- .../test_client_query_w_struct_params.py | 4 +- .../test_client_query_w_timestamp_params.py | 4 +- .../tests/test_load_table_dataframe.py | 10 +- ...test_query_external_gcs_temporary_table.py | 4 +- .../samples/tests/test_query_no_cache.py | 4 +- .../samples/tests/test_query_pagination.py | 4 +- .../samples/tests/test_query_script.py | 4 +- .../samples/tests/test_query_to_arrow.py | 4 +- .../tests/system/helpers.py | 13 +- .../tests/system/test_arrow.py | 4 +- .../tests/system/test_client.py | 9 +- .../tests/system/test_pandas.py | 5 +- .../tests/system/test_query.py | 5 +- .../tests/system/test_structs.py | 3 +- .../tests/unit/job/test_base.py | 5 +- .../tests/unit/job/test_query_pandas.py | 30 +- .../tests/unit/model/test_model.py | 8 +- .../tests/unit/test__helpers.py | 6 +- .../tests/unit/test__pandas_helpers.py | 6 +- .../tests/unit/test_client.py | 177 +++- .../tests/unit/test_dbapi__helpers.py | 3 +- .../tests/unit/test_dbapi_connection.py | 10 +- .../tests/unit/test_dbapi_cursor.py | 21 +- .../tests/unit/test_dbapi_types.py | 4 +- .../tests/unit/test_list_jobs.py | 2 +- .../tests/unit/test_list_models.py | 8 +- .../tests/unit/test_magics.py | 15 +- .../tests/unit/test_query.py | 3 +- .../tests/unit/test_schema.py | 68 +- .../tests/unit/test_table.py | 50 +- 70 files changed, 1186 insertions(+), 410 deletions(-) diff --git a/packages/google-cloud-bigquery/docs/conf.py b/packages/google-cloud-bigquery/docs/conf.py index bb16445ca5ae..296eac02ae27 100644 --- a/packages/google-cloud-bigquery/docs/conf.py +++ b/packages/google-cloud-bigquery/docs/conf.py @@ -362,7 +362,10 @@ intersphinx_mapping = { "python": ("https://python.readthedocs.org/en/latest/", None), "google-auth": ("https://googleapis.dev/python/google-auth/latest/", None), - "google.api_core": ("https://googleapis.dev/python/google-api-core/latest/", None,), + "google.api_core": ( + "https://googleapis.dev/python/google-api-core/latest/", + None, + ), "grpc": ("https://grpc.github.io/grpc/python/", None), "proto-plus": ("https://proto-plus-python.readthedocs.io/en/latest/", None), "protobuf": ("https://googleapis.dev/python/protobuf/latest/", None), diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/_pandas_helpers.py b/packages/google-cloud-bigquery/google/cloud/bigquery/_pandas_helpers.py index 263a1a9cff8f..da7c999bd1de 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/_pandas_helpers.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/_pandas_helpers.py @@ -585,7 +585,10 @@ def dataframe_to_parquet( bq_schema = schema._to_schema_fields(bq_schema) arrow_table = dataframe_to_arrow(dataframe, bq_schema) pyarrow.parquet.write_table( - arrow_table, filepath, compression=parquet_compression, **kwargs, + arrow_table, + filepath, + compression=parquet_compression, + **kwargs, ) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/_tqdm_helpers.py b/packages/google-cloud-bigquery/google/cloud/bigquery/_tqdm_helpers.py index 632f70f87215..f2355ab3bb64 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/_tqdm_helpers.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/_tqdm_helpers.py @@ -95,7 +95,9 @@ def wait_for_query( progress_bar.total = len(query_job.query_plan) progress_bar.set_description( "Query executing stage {} and status {} : {:0.2f}s".format( - current_stage.name, current_stage.status, time.time() - start_time, + current_stage.name, + current_stage.status, + time.time() - start_time, ), ) try: diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py index 1612730941df..a99e8fcb4bbb 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py @@ -1894,9 +1894,7 @@ def _get_query_results( def job_from_resource( self, resource: dict - ) -> Union[ - job.CopyJob, job.ExtractJob, job.LoadJob, job.QueryJob, job.UnknownJob, - ]: + ) -> Union[job.CopyJob, job.ExtractJob, job.LoadJob, job.QueryJob, job.UnknownJob]: """Detect correct job type from resource and instantiate. Args: @@ -1978,8 +1976,8 @@ def create_job( timeout=timeout, ) elif "extract" in job_config: - extract_job_config = google.cloud.bigquery.job.ExtractJobConfig.from_api_repr( - job_config + extract_job_config = ( + google.cloud.bigquery.job.ExtractJobConfig.from_api_repr(job_config) ) source = _get_sub_prop(job_config, ["extract", "sourceTable"]) if source: @@ -2152,7 +2150,8 @@ def cancel_job( job_instance = self.job_from_resource(resource["job"]) # never an UnknownJob return typing.cast( - Union[job.LoadJob, job.CopyJob, job.ExtractJob, job.QueryJob], job_instance, + Union[job.LoadJob, job.CopyJob, job.ExtractJob, job.QueryJob], + job_instance, ) def list_jobs( diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/dataset.py b/packages/google-cloud-bigquery/google/cloud/bigquery/dataset.py index 499072de2a76..cf317024f881 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/dataset.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/dataset.py @@ -191,7 +191,7 @@ def __repr__(self): ) def _key(self): - """ A tuple key that uniquely describes this field. + """A tuple key that uniquely describes this field. Used to compute this instance's hashcode and evaluate equality. Returns: Tuple: The contents of this :class:`~google.cloud.bigquery.dataset.AccessEntry`. diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/dbapi/_helpers.py b/packages/google-cloud-bigquery/google/cloud/bigquery/dbapi/_helpers.py index e5c7ef7ec7ea..30f40ea07394 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/dbapi/_helpers.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/dbapi/_helpers.py @@ -486,8 +486,7 @@ def raise_on_closed( """Make public instance methods raise an error if the instance is closed.""" def _raise_on_closed(method): - """Make a non-static method raise an error if its containing instance is closed. - """ + """Make a non-static method raise an error if its containing instance is closed.""" def with_closed_check(self, *args, **kwargs): if getattr(self, closed_attr_name): @@ -498,8 +497,7 @@ def with_closed_check(self, *args, **kwargs): return with_closed_check def decorate_public_methods(klass): - """Apply ``_raise_on_closed()`` decorator to public instance methods. - """ + """Apply ``_raise_on_closed()`` decorator to public instance methods.""" for name in dir(klass): if name.startswith("_") and name != "__iter__": continue diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/external_config.py b/packages/google-cloud-bigquery/google/cloud/bigquery/external_config.py index e6f6a97c3388..8470498090a7 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/external_config.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/external_config.py @@ -289,8 +289,7 @@ def from_api_repr(cls, resource: dict) -> "BigtableColumnFamily": class BigtableOptions(object): - """Options that describe how to treat Bigtable tables as BigQuery tables. - """ + """Options that describe how to treat Bigtable tables as BigQuery tables.""" _SOURCE_FORMAT = "BIGTABLE" _RESOURCE_NAME = "bigtableOptions" @@ -557,7 +556,11 @@ def from_api_repr(cls, resource: dict) -> "GoogleSheetsOptions": ) OptionsType = Union[ - AvroOptions, BigtableOptions, CSVOptions, GoogleSheetsOptions, ParquetOptions, + AvroOptions, + BigtableOptions, + CSVOptions, + GoogleSheetsOptions, + ParquetOptions, ] diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/job/query.py b/packages/google-cloud-bigquery/google/cloud/bigquery/job/query.py index 2fd7afb7615b..54f950a6620b 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/job/query.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/job/query.py @@ -324,7 +324,8 @@ def connection_properties(self) -> List[ConnectionProperty]: @connection_properties.setter def connection_properties(self, value: Iterable[ConnectionProperty]): self._set_sub_prop( - "connectionProperties", [prop.to_api_repr() for prop in value], + "connectionProperties", + [prop.to_api_repr() for prop in value], ) @property diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/magics/magics.py b/packages/google-cloud-bigquery/google/cloud/bigquery/magics/magics.py index 7b4d584fb70b..a5941158e898 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/magics/magics.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/magics/magics.py @@ -586,7 +586,9 @@ def _cell_magic(line, query): bqstorage_client_options.api_endpoint = args.bqstorage_api_endpoint bqstorage_client = _make_bqstorage_client( - client, use_bqstorage_api, bqstorage_client_options, + client, + use_bqstorage_api, + bqstorage_client_options, ) close_transports = functools.partial(_close_transports, client, bqstorage_client) @@ -637,7 +639,8 @@ def _cell_magic(line, query): return result = rows.to_dataframe( - bqstorage_client=bqstorage_client, create_bqstorage_client=False, + bqstorage_client=bqstorage_client, + create_bqstorage_client=False, ) if args.destination_var: IPython.get_ipython().push({args.destination_var: result}) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/opentelemetry_tracing.py b/packages/google-cloud-bigquery/google/cloud/bigquery/opentelemetry_tracing.py index 748f2136ddb7..adecea1213ac 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/opentelemetry_tracing.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/opentelemetry_tracing.py @@ -37,28 +37,28 @@ @contextmanager def create_span(name, attributes=None, client=None, job_ref=None): """Creates a ContextManager for a Span to be exported to the configured exporter. - If no configuration exists yields None. - - Args: - name (str): Name that will be set for the span being created - attributes (Optional[dict]): - Additional attributes that pertain to - the specific API call (i.e. not a default attribute) - client (Optional[google.cloud.bigquery.client.Client]): - Pass in a Client object to extract any attributes that may be - relevant to it and add them to the created spans. - job_ref (Optional[google.cloud.bigquery.job._AsyncJob]) - Pass in a _AsyncJob object to extract any attributes that may be - relevant to it and add them to the created spans. - - Yields: - opentelemetry.trace.Span: Yields the newly created Span. - - Raises: - google.api_core.exceptions.GoogleAPICallError: - Raised if a span could not be yielded or issue with call to - OpenTelemetry. - """ + If no configuration exists yields None. + + Args: + name (str): Name that will be set for the span being created + attributes (Optional[dict]): + Additional attributes that pertain to + the specific API call (i.e. not a default attribute) + client (Optional[google.cloud.bigquery.client.Client]): + Pass in a Client object to extract any attributes that may be + relevant to it and add them to the created spans. + job_ref (Optional[google.cloud.bigquery.job._AsyncJob]) + Pass in a _AsyncJob object to extract any attributes that may be + relevant to it and add them to the created spans. + + Yields: + opentelemetry.trace.Span: Yields the newly created Span. + + Raises: + google.api_core.exceptions.GoogleAPICallError: + Raised if a span could not be yielded or issue with call to + OpenTelemetry. + """ global _warned_telemetry final_attributes = _get_final_span_attributes(attributes, client, job_ref) if not HAS_OPENTELEMETRY: diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/query.py b/packages/google-cloud-bigquery/google/cloud/bigquery/query.py index 637be62befd6..0b90b6954786 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/query.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/query.py @@ -363,8 +363,7 @@ def __repr__(self): class _AbstractQueryParameter(object): - """Base class for named / positional query parameters. - """ + """Base class for named / positional query parameters.""" @classmethod def from_api_repr(cls, resource: dict) -> "_AbstractQueryParameter": diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/schema.py b/packages/google-cloud-bigquery/google/cloud/bigquery/schema.py index 2af61b672297..84272228f2f6 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/schema.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/schema.py @@ -417,8 +417,7 @@ def __init__(self, names: Iterable[str] = ()): @property def names(self): - """Tuple[str]: Policy tags associated with this definition. - """ + """Tuple[str]: Policy tags associated with this definition.""" return self._properties.get("names", ()) def _key(self): diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py index a0696f83f6bb..f39945fe48ae 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py @@ -213,7 +213,9 @@ def __init__(self, dataset_ref: "DatasetReference", table_id: str): dataset_ref.dataset_id, ) _helpers._set_sub_prop( - self._properties, self._PROPERTY_TO_API_FIELD["table_id"], table_id, + self._properties, + self._PROPERTY_TO_API_FIELD["table_id"], + table_id, ) @classmethod @@ -886,7 +888,9 @@ def mview_refresh_interval(self, value): api_field = self._PROPERTY_TO_API_FIELD["mview_refresh_interval"] _helpers._set_sub_prop( - self._properties, [api_field, "refreshIntervalMs"], refresh_interval_ms, + self._properties, + [api_field, "refreshIntervalMs"], + refresh_interval_ms, ) @property @@ -1598,7 +1602,8 @@ def total_rows(self): return self._total_rows def _maybe_warn_max_results( - self, bqstorage_client: Optional["bigquery_storage.BigQueryReadClient"], + self, + bqstorage_client: Optional["bigquery_storage.BigQueryReadClient"], ): """Issue a warning if BQ Storage client is not ``None`` with ``max_results`` set. diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery_v2/types/encryption_config.py b/packages/google-cloud-bigquery/google/cloud/bigquery_v2/types/encryption_config.py index d300a417c7ca..9f57acb7c1f5 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery_v2/types/encryption_config.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery_v2/types/encryption_config.py @@ -19,7 +19,10 @@ __protobuf__ = proto.module( - package="google.cloud.bigquery.v2", manifest={"EncryptionConfiguration",}, + package="google.cloud.bigquery.v2", + manifest={ + "EncryptionConfiguration", + }, ) @@ -36,7 +39,9 @@ class EncryptionConfiguration(proto.Message): """ kms_key_name = proto.Field( - proto.MESSAGE, number=1, message=wrappers_pb2.StringValue, + proto.MESSAGE, + number=1, + message=wrappers_pb2.StringValue, ) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery_v2/types/model.py b/packages/google-cloud-bigquery/google/cloud/bigquery_v2/types/model.py index f2c03171514d..7786d8ea4b5a 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery_v2/types/model.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery_v2/types/model.py @@ -252,8 +252,7 @@ class FeedbackType(proto.Enum): EXPLICIT = 2 class SeasonalPeriod(proto.Message): - r""" - """ + r""" """ class SeasonalPeriodType(proto.Enum): r"""""" @@ -266,8 +265,7 @@ class SeasonalPeriodType(proto.Enum): YEARLY = 6 class KmeansEnums(proto.Message): - r""" - """ + r""" """ class KmeansInitializationMethod(proto.Enum): r"""Indicates the method used to initialize the centroids for @@ -296,19 +294,29 @@ class RegressionMetrics(proto.Message): """ mean_absolute_error = proto.Field( - proto.MESSAGE, number=1, message=wrappers_pb2.DoubleValue, + proto.MESSAGE, + number=1, + message=wrappers_pb2.DoubleValue, ) mean_squared_error = proto.Field( - proto.MESSAGE, number=2, message=wrappers_pb2.DoubleValue, + proto.MESSAGE, + number=2, + message=wrappers_pb2.DoubleValue, ) mean_squared_log_error = proto.Field( - proto.MESSAGE, number=3, message=wrappers_pb2.DoubleValue, + proto.MESSAGE, + number=3, + message=wrappers_pb2.DoubleValue, ) median_absolute_error = proto.Field( - proto.MESSAGE, number=4, message=wrappers_pb2.DoubleValue, + proto.MESSAGE, + number=4, + message=wrappers_pb2.DoubleValue, ) r_squared = proto.Field( - proto.MESSAGE, number=5, message=wrappers_pb2.DoubleValue, + proto.MESSAGE, + number=5, + message=wrappers_pb2.DoubleValue, ) class AggregateClassificationMetrics(proto.Message): @@ -352,23 +360,39 @@ class AggregateClassificationMetrics(proto.Message): """ precision = proto.Field( - proto.MESSAGE, number=1, message=wrappers_pb2.DoubleValue, + proto.MESSAGE, + number=1, + message=wrappers_pb2.DoubleValue, + ) + recall = proto.Field( + proto.MESSAGE, + number=2, + message=wrappers_pb2.DoubleValue, ) - recall = proto.Field(proto.MESSAGE, number=2, message=wrappers_pb2.DoubleValue,) accuracy = proto.Field( - proto.MESSAGE, number=3, message=wrappers_pb2.DoubleValue, + proto.MESSAGE, + number=3, + message=wrappers_pb2.DoubleValue, ) threshold = proto.Field( - proto.MESSAGE, number=4, message=wrappers_pb2.DoubleValue, + proto.MESSAGE, + number=4, + message=wrappers_pb2.DoubleValue, ) f1_score = proto.Field( - proto.MESSAGE, number=5, message=wrappers_pb2.DoubleValue, + proto.MESSAGE, + number=5, + message=wrappers_pb2.DoubleValue, ) log_loss = proto.Field( - proto.MESSAGE, number=6, message=wrappers_pb2.DoubleValue, + proto.MESSAGE, + number=6, + message=wrappers_pb2.DoubleValue, ) roc_auc = proto.Field( - proto.MESSAGE, number=7, message=wrappers_pb2.DoubleValue, + proto.MESSAGE, + number=7, + message=wrappers_pb2.DoubleValue, ) class BinaryClassificationMetrics(proto.Message): @@ -417,43 +441,69 @@ class BinaryConfusionMatrix(proto.Message): """ positive_class_threshold = proto.Field( - proto.MESSAGE, number=1, message=wrappers_pb2.DoubleValue, + proto.MESSAGE, + number=1, + message=wrappers_pb2.DoubleValue, ) true_positives = proto.Field( - proto.MESSAGE, number=2, message=wrappers_pb2.Int64Value, + proto.MESSAGE, + number=2, + message=wrappers_pb2.Int64Value, ) false_positives = proto.Field( - proto.MESSAGE, number=3, message=wrappers_pb2.Int64Value, + proto.MESSAGE, + number=3, + message=wrappers_pb2.Int64Value, ) true_negatives = proto.Field( - proto.MESSAGE, number=4, message=wrappers_pb2.Int64Value, + proto.MESSAGE, + number=4, + message=wrappers_pb2.Int64Value, ) false_negatives = proto.Field( - proto.MESSAGE, number=5, message=wrappers_pb2.Int64Value, + proto.MESSAGE, + number=5, + message=wrappers_pb2.Int64Value, ) precision = proto.Field( - proto.MESSAGE, number=6, message=wrappers_pb2.DoubleValue, + proto.MESSAGE, + number=6, + message=wrappers_pb2.DoubleValue, ) recall = proto.Field( - proto.MESSAGE, number=7, message=wrappers_pb2.DoubleValue, + proto.MESSAGE, + number=7, + message=wrappers_pb2.DoubleValue, ) f1_score = proto.Field( - proto.MESSAGE, number=8, message=wrappers_pb2.DoubleValue, + proto.MESSAGE, + number=8, + message=wrappers_pb2.DoubleValue, ) accuracy = proto.Field( - proto.MESSAGE, number=9, message=wrappers_pb2.DoubleValue, + proto.MESSAGE, + number=9, + message=wrappers_pb2.DoubleValue, ) aggregate_classification_metrics = proto.Field( - proto.MESSAGE, number=1, message="Model.AggregateClassificationMetrics", + proto.MESSAGE, + number=1, + message="Model.AggregateClassificationMetrics", ) binary_confusion_matrix_list = proto.RepeatedField( proto.MESSAGE, number=2, message="Model.BinaryClassificationMetrics.BinaryConfusionMatrix", ) - positive_label = proto.Field(proto.STRING, number=3,) - negative_label = proto.Field(proto.STRING, number=4,) + positive_label = proto.Field( + proto.STRING, + number=3, + ) + negative_label = proto.Field( + proto.STRING, + number=4, + ) class MultiClassClassificationMetrics(proto.Message): r"""Evaluation metrics for multi-class classification/classifier @@ -490,9 +540,14 @@ class Entry(proto.Message): label. """ - predicted_label = proto.Field(proto.STRING, number=1,) + predicted_label = proto.Field( + proto.STRING, + number=1, + ) item_count = proto.Field( - proto.MESSAGE, number=2, message=wrappers_pb2.Int64Value, + proto.MESSAGE, + number=2, + message=wrappers_pb2.Int64Value, ) class Row(proto.Message): @@ -505,7 +560,10 @@ class Row(proto.Message): Info describing predicted label distribution. """ - actual_label = proto.Field(proto.STRING, number=1,) + actual_label = proto.Field( + proto.STRING, + number=1, + ) entries = proto.RepeatedField( proto.MESSAGE, number=2, @@ -513,7 +571,9 @@ class Row(proto.Message): ) confidence_threshold = proto.Field( - proto.MESSAGE, number=1, message=wrappers_pb2.DoubleValue, + proto.MESSAGE, + number=1, + message=wrappers_pb2.DoubleValue, ) rows = proto.RepeatedField( proto.MESSAGE, @@ -522,7 +582,9 @@ class Row(proto.Message): ) aggregate_classification_metrics = proto.Field( - proto.MESSAGE, number=1, message="Model.AggregateClassificationMetrics", + proto.MESSAGE, + number=1, + message="Model.AggregateClassificationMetrics", ) confusion_matrix_list = proto.RepeatedField( proto.MESSAGE, @@ -604,9 +666,14 @@ class CategoryCount(proto.Message): category within the cluster. """ - category = proto.Field(proto.STRING, number=1,) + category = proto.Field( + proto.STRING, + number=1, + ) count = proto.Field( - proto.MESSAGE, number=2, message=wrappers_pb2.Int64Value, + proto.MESSAGE, + number=2, + message=wrappers_pb2.Int64Value, ) category_counts = proto.RepeatedField( @@ -615,7 +682,10 @@ class CategoryCount(proto.Message): message="Model.ClusteringMetrics.Cluster.FeatureValue.CategoricalValue.CategoryCount", ) - feature_column = proto.Field(proto.STRING, number=1,) + feature_column = proto.Field( + proto.STRING, + number=1, + ) numerical_value = proto.Field( proto.MESSAGE, number=2, @@ -629,24 +699,35 @@ class CategoryCount(proto.Message): message="Model.ClusteringMetrics.Cluster.FeatureValue.CategoricalValue", ) - centroid_id = proto.Field(proto.INT64, number=1,) + centroid_id = proto.Field( + proto.INT64, + number=1, + ) feature_values = proto.RepeatedField( proto.MESSAGE, number=2, message="Model.ClusteringMetrics.Cluster.FeatureValue", ) count = proto.Field( - proto.MESSAGE, number=3, message=wrappers_pb2.Int64Value, + proto.MESSAGE, + number=3, + message=wrappers_pb2.Int64Value, ) davies_bouldin_index = proto.Field( - proto.MESSAGE, number=1, message=wrappers_pb2.DoubleValue, + proto.MESSAGE, + number=1, + message=wrappers_pb2.DoubleValue, ) mean_squared_distance = proto.Field( - proto.MESSAGE, number=2, message=wrappers_pb2.DoubleValue, + proto.MESSAGE, + number=2, + message=wrappers_pb2.DoubleValue, ) clusters = proto.RepeatedField( - proto.MESSAGE, number=3, message="Model.ClusteringMetrics.Cluster", + proto.MESSAGE, + number=3, + message="Model.ClusteringMetrics.Cluster", ) class RankingMetrics(proto.Message): @@ -677,16 +758,24 @@ class RankingMetrics(proto.Message): """ mean_average_precision = proto.Field( - proto.MESSAGE, number=1, message=wrappers_pb2.DoubleValue, + proto.MESSAGE, + number=1, + message=wrappers_pb2.DoubleValue, ) mean_squared_error = proto.Field( - proto.MESSAGE, number=2, message=wrappers_pb2.DoubleValue, + proto.MESSAGE, + number=2, + message=wrappers_pb2.DoubleValue, ) normalized_discounted_cumulative_gain = proto.Field( - proto.MESSAGE, number=3, message=wrappers_pb2.DoubleValue, + proto.MESSAGE, + number=3, + message=wrappers_pb2.DoubleValue, ) average_rank = proto.Field( - proto.MESSAGE, number=4, message=wrappers_pb2.DoubleValue, + proto.MESSAGE, + number=4, + message=wrappers_pb2.DoubleValue, ) class ArimaForecastingMetrics(proto.Message): @@ -751,38 +840,71 @@ class ArimaSingleModelForecastingMetrics(proto.Message): """ non_seasonal_order = proto.Field( - proto.MESSAGE, number=1, message="Model.ArimaOrder", + proto.MESSAGE, + number=1, + message="Model.ArimaOrder", ) arima_fitting_metrics = proto.Field( - proto.MESSAGE, number=2, message="Model.ArimaFittingMetrics", + proto.MESSAGE, + number=2, + message="Model.ArimaFittingMetrics", + ) + has_drift = proto.Field( + proto.BOOL, + number=3, + ) + time_series_id = proto.Field( + proto.STRING, + number=4, + ) + time_series_ids = proto.RepeatedField( + proto.STRING, + number=9, ) - has_drift = proto.Field(proto.BOOL, number=3,) - time_series_id = proto.Field(proto.STRING, number=4,) - time_series_ids = proto.RepeatedField(proto.STRING, number=9,) seasonal_periods = proto.RepeatedField( - proto.ENUM, number=5, enum="Model.SeasonalPeriod.SeasonalPeriodType", + proto.ENUM, + number=5, + enum="Model.SeasonalPeriod.SeasonalPeriodType", ) has_holiday_effect = proto.Field( - proto.MESSAGE, number=6, message=wrappers_pb2.BoolValue, + proto.MESSAGE, + number=6, + message=wrappers_pb2.BoolValue, ) has_spikes_and_dips = proto.Field( - proto.MESSAGE, number=7, message=wrappers_pb2.BoolValue, + proto.MESSAGE, + number=7, + message=wrappers_pb2.BoolValue, ) has_step_changes = proto.Field( - proto.MESSAGE, number=8, message=wrappers_pb2.BoolValue, + proto.MESSAGE, + number=8, + message=wrappers_pb2.BoolValue, ) non_seasonal_order = proto.RepeatedField( - proto.MESSAGE, number=1, message="Model.ArimaOrder", + proto.MESSAGE, + number=1, + message="Model.ArimaOrder", ) arima_fitting_metrics = proto.RepeatedField( - proto.MESSAGE, number=2, message="Model.ArimaFittingMetrics", + proto.MESSAGE, + number=2, + message="Model.ArimaFittingMetrics", ) seasonal_periods = proto.RepeatedField( - proto.ENUM, number=3, enum="Model.SeasonalPeriod.SeasonalPeriodType", + proto.ENUM, + number=3, + enum="Model.SeasonalPeriod.SeasonalPeriodType", + ) + has_drift = proto.RepeatedField( + proto.BOOL, + number=4, + ) + time_series_id = proto.RepeatedField( + proto.STRING, + number=5, ) - has_drift = proto.RepeatedField(proto.BOOL, number=4,) - time_series_id = proto.RepeatedField(proto.STRING, number=5,) arima_single_model_forecasting_metrics = proto.RepeatedField( proto.MESSAGE, number=6, @@ -834,7 +956,10 @@ class EvaluationMetrics(proto.Message): """ regression_metrics = proto.Field( - proto.MESSAGE, number=1, oneof="metrics", message="Model.RegressionMetrics", + proto.MESSAGE, + number=1, + oneof="metrics", + message="Model.RegressionMetrics", ) binary_classification_metrics = proto.Field( proto.MESSAGE, @@ -849,10 +974,16 @@ class EvaluationMetrics(proto.Message): message="Model.MultiClassClassificationMetrics", ) clustering_metrics = proto.Field( - proto.MESSAGE, number=4, oneof="metrics", message="Model.ClusteringMetrics", + proto.MESSAGE, + number=4, + oneof="metrics", + message="Model.ClusteringMetrics", ) ranking_metrics = proto.Field( - proto.MESSAGE, number=5, oneof="metrics", message="Model.RankingMetrics", + proto.MESSAGE, + number=5, + oneof="metrics", + message="Model.RankingMetrics", ) arima_forecasting_metrics = proto.Field( proto.MESSAGE, @@ -875,10 +1006,14 @@ class DataSplitResult(proto.Message): """ training_table = proto.Field( - proto.MESSAGE, number=1, message=table_reference.TableReference, + proto.MESSAGE, + number=1, + message=table_reference.TableReference, ) evaluation_table = proto.Field( - proto.MESSAGE, number=2, message=table_reference.TableReference, + proto.MESSAGE, + number=2, + message=table_reference.TableReference, ) class ArimaOrder(proto.Message): @@ -894,9 +1029,18 @@ class ArimaOrder(proto.Message): Order of the moving-average part. """ - p = proto.Field(proto.INT64, number=1,) - d = proto.Field(proto.INT64, number=2,) - q = proto.Field(proto.INT64, number=3,) + p = proto.Field( + proto.INT64, + number=1, + ) + d = proto.Field( + proto.INT64, + number=2, + ) + q = proto.Field( + proto.INT64, + number=3, + ) class ArimaFittingMetrics(proto.Message): r"""ARIMA model fitting metrics. @@ -910,9 +1054,18 @@ class ArimaFittingMetrics(proto.Message): Variance. """ - log_likelihood = proto.Field(proto.DOUBLE, number=1,) - aic = proto.Field(proto.DOUBLE, number=2,) - variance = proto.Field(proto.DOUBLE, number=3,) + log_likelihood = proto.Field( + proto.DOUBLE, + number=1, + ) + aic = proto.Field( + proto.DOUBLE, + number=2, + ) + variance = proto.Field( + proto.DOUBLE, + number=3, + ) class GlobalExplanation(proto.Message): r"""Global explanations containing the top most important @@ -943,15 +1096,25 @@ class Explanation(proto.Message): Attribution of feature. """ - feature_name = proto.Field(proto.STRING, number=1,) + feature_name = proto.Field( + proto.STRING, + number=1, + ) attribution = proto.Field( - proto.MESSAGE, number=2, message=wrappers_pb2.DoubleValue, + proto.MESSAGE, + number=2, + message=wrappers_pb2.DoubleValue, ) explanations = proto.RepeatedField( - proto.MESSAGE, number=1, message="Model.GlobalExplanation.Explanation", + proto.MESSAGE, + number=1, + message="Model.GlobalExplanation.Explanation", + ) + class_label = proto.Field( + proto.STRING, + number=2, ) - class_label = proto.Field(proto.STRING, number=2,) class TrainingRun(proto.Message): r"""Information about a single training query run for the model. @@ -1137,94 +1300,215 @@ class TrainingOptions(proto.Message): adjustment in the input time series. """ - max_iterations = proto.Field(proto.INT64, number=1,) - loss_type = proto.Field(proto.ENUM, number=2, enum="Model.LossType",) - learn_rate = proto.Field(proto.DOUBLE, number=3,) + max_iterations = proto.Field( + proto.INT64, + number=1, + ) + loss_type = proto.Field( + proto.ENUM, + number=2, + enum="Model.LossType", + ) + learn_rate = proto.Field( + proto.DOUBLE, + number=3, + ) l1_regularization = proto.Field( - proto.MESSAGE, number=4, message=wrappers_pb2.DoubleValue, + proto.MESSAGE, + number=4, + message=wrappers_pb2.DoubleValue, ) l2_regularization = proto.Field( - proto.MESSAGE, number=5, message=wrappers_pb2.DoubleValue, + proto.MESSAGE, + number=5, + message=wrappers_pb2.DoubleValue, ) min_relative_progress = proto.Field( - proto.MESSAGE, number=6, message=wrappers_pb2.DoubleValue, + proto.MESSAGE, + number=6, + message=wrappers_pb2.DoubleValue, ) warm_start = proto.Field( - proto.MESSAGE, number=7, message=wrappers_pb2.BoolValue, + proto.MESSAGE, + number=7, + message=wrappers_pb2.BoolValue, ) early_stop = proto.Field( - proto.MESSAGE, number=8, message=wrappers_pb2.BoolValue, + proto.MESSAGE, + number=8, + message=wrappers_pb2.BoolValue, + ) + input_label_columns = proto.RepeatedField( + proto.STRING, + number=9, ) - input_label_columns = proto.RepeatedField(proto.STRING, number=9,) data_split_method = proto.Field( - proto.ENUM, number=10, enum="Model.DataSplitMethod", + proto.ENUM, + number=10, + enum="Model.DataSplitMethod", + ) + data_split_eval_fraction = proto.Field( + proto.DOUBLE, + number=11, + ) + data_split_column = proto.Field( + proto.STRING, + number=12, ) - data_split_eval_fraction = proto.Field(proto.DOUBLE, number=11,) - data_split_column = proto.Field(proto.STRING, number=12,) learn_rate_strategy = proto.Field( - proto.ENUM, number=13, enum="Model.LearnRateStrategy", + proto.ENUM, + number=13, + enum="Model.LearnRateStrategy", + ) + initial_learn_rate = proto.Field( + proto.DOUBLE, + number=16, + ) + label_class_weights = proto.MapField( + proto.STRING, + proto.DOUBLE, + number=17, + ) + user_column = proto.Field( + proto.STRING, + number=18, + ) + item_column = proto.Field( + proto.STRING, + number=19, ) - initial_learn_rate = proto.Field(proto.DOUBLE, number=16,) - label_class_weights = proto.MapField(proto.STRING, proto.DOUBLE, number=17,) - user_column = proto.Field(proto.STRING, number=18,) - item_column = proto.Field(proto.STRING, number=19,) distance_type = proto.Field( - proto.ENUM, number=20, enum="Model.DistanceType", + proto.ENUM, + number=20, + enum="Model.DistanceType", + ) + num_clusters = proto.Field( + proto.INT64, + number=21, + ) + model_uri = proto.Field( + proto.STRING, + number=22, ) - num_clusters = proto.Field(proto.INT64, number=21,) - model_uri = proto.Field(proto.STRING, number=22,) optimization_strategy = proto.Field( - proto.ENUM, number=23, enum="Model.OptimizationStrategy", + proto.ENUM, + number=23, + enum="Model.OptimizationStrategy", + ) + hidden_units = proto.RepeatedField( + proto.INT64, + number=24, + ) + batch_size = proto.Field( + proto.INT64, + number=25, ) - hidden_units = proto.RepeatedField(proto.INT64, number=24,) - batch_size = proto.Field(proto.INT64, number=25,) dropout = proto.Field( - proto.MESSAGE, number=26, message=wrappers_pb2.DoubleValue, + proto.MESSAGE, + number=26, + message=wrappers_pb2.DoubleValue, + ) + max_tree_depth = proto.Field( + proto.INT64, + number=27, + ) + subsample = proto.Field( + proto.DOUBLE, + number=28, ) - max_tree_depth = proto.Field(proto.INT64, number=27,) - subsample = proto.Field(proto.DOUBLE, number=28,) min_split_loss = proto.Field( - proto.MESSAGE, number=29, message=wrappers_pb2.DoubleValue, + proto.MESSAGE, + number=29, + message=wrappers_pb2.DoubleValue, + ) + num_factors = proto.Field( + proto.INT64, + number=30, ) - num_factors = proto.Field(proto.INT64, number=30,) feedback_type = proto.Field( - proto.ENUM, number=31, enum="Model.FeedbackType", + proto.ENUM, + number=31, + enum="Model.FeedbackType", ) wals_alpha = proto.Field( - proto.MESSAGE, number=32, message=wrappers_pb2.DoubleValue, + proto.MESSAGE, + number=32, + message=wrappers_pb2.DoubleValue, ) kmeans_initialization_method = proto.Field( proto.ENUM, number=33, enum="Model.KmeansEnums.KmeansInitializationMethod", ) - kmeans_initialization_column = proto.Field(proto.STRING, number=34,) - time_series_timestamp_column = proto.Field(proto.STRING, number=35,) - time_series_data_column = proto.Field(proto.STRING, number=36,) - auto_arima = proto.Field(proto.BOOL, number=37,) + kmeans_initialization_column = proto.Field( + proto.STRING, + number=34, + ) + time_series_timestamp_column = proto.Field( + proto.STRING, + number=35, + ) + time_series_data_column = proto.Field( + proto.STRING, + number=36, + ) + auto_arima = proto.Field( + proto.BOOL, + number=37, + ) non_seasonal_order = proto.Field( - proto.MESSAGE, number=38, message="Model.ArimaOrder", + proto.MESSAGE, + number=38, + message="Model.ArimaOrder", ) data_frequency = proto.Field( - proto.ENUM, number=39, enum="Model.DataFrequency", + proto.ENUM, + number=39, + enum="Model.DataFrequency", + ) + include_drift = proto.Field( + proto.BOOL, + number=41, ) - include_drift = proto.Field(proto.BOOL, number=41,) holiday_region = proto.Field( - proto.ENUM, number=42, enum="Model.HolidayRegion", + proto.ENUM, + number=42, + enum="Model.HolidayRegion", + ) + time_series_id_column = proto.Field( + proto.STRING, + number=43, + ) + time_series_id_columns = proto.RepeatedField( + proto.STRING, + number=51, + ) + horizon = proto.Field( + proto.INT64, + number=44, + ) + preserve_input_structs = proto.Field( + proto.BOOL, + number=45, + ) + auto_arima_max_order = proto.Field( + proto.INT64, + number=46, ) - time_series_id_column = proto.Field(proto.STRING, number=43,) - time_series_id_columns = proto.RepeatedField(proto.STRING, number=51,) - horizon = proto.Field(proto.INT64, number=44,) - preserve_input_structs = proto.Field(proto.BOOL, number=45,) - auto_arima_max_order = proto.Field(proto.INT64, number=46,) decompose_time_series = proto.Field( - proto.MESSAGE, number=50, message=wrappers_pb2.BoolValue, + proto.MESSAGE, + number=50, + message=wrappers_pb2.BoolValue, ) clean_spikes_and_dips = proto.Field( - proto.MESSAGE, number=52, message=wrappers_pb2.BoolValue, + proto.MESSAGE, + number=52, + message=wrappers_pb2.BoolValue, ) adjust_step_changes = proto.Field( - proto.MESSAGE, number=53, message=wrappers_pb2.BoolValue, + proto.MESSAGE, + number=53, + message=wrappers_pb2.BoolValue, ) class IterationResult(proto.Message): @@ -1265,12 +1549,19 @@ class ClusterInfo(proto.Message): assigned to the cluster. """ - centroid_id = proto.Field(proto.INT64, number=1,) + centroid_id = proto.Field( + proto.INT64, + number=1, + ) cluster_radius = proto.Field( - proto.MESSAGE, number=2, message=wrappers_pb2.DoubleValue, + proto.MESSAGE, + number=2, + message=wrappers_pb2.DoubleValue, ) cluster_size = proto.Field( - proto.MESSAGE, number=3, message=wrappers_pb2.Int64Value, + proto.MESSAGE, + number=3, + message=wrappers_pb2.Int64Value, ) class ArimaResult(proto.Message): @@ -1304,12 +1595,17 @@ class ArimaCoefficients(proto.Message): """ auto_regressive_coefficients = proto.RepeatedField( - proto.DOUBLE, number=1, + proto.DOUBLE, + number=1, ) moving_average_coefficients = proto.RepeatedField( - proto.DOUBLE, number=2, + proto.DOUBLE, + number=2, + ) + intercept_coefficient = proto.Field( + proto.DOUBLE, + number=3, ) - intercept_coefficient = proto.Field(proto.DOUBLE, number=3,) class ArimaModelInfo(proto.Message): r"""Arima model information. @@ -1351,7 +1647,9 @@ class ArimaModelInfo(proto.Message): """ non_seasonal_order = proto.Field( - proto.MESSAGE, number=1, message="Model.ArimaOrder", + proto.MESSAGE, + number=1, + message="Model.ArimaOrder", ) arima_coefficients = proto.Field( proto.MESSAGE, @@ -1359,24 +1657,41 @@ class ArimaModelInfo(proto.Message): message="Model.TrainingRun.IterationResult.ArimaResult.ArimaCoefficients", ) arima_fitting_metrics = proto.Field( - proto.MESSAGE, number=3, message="Model.ArimaFittingMetrics", + proto.MESSAGE, + number=3, + message="Model.ArimaFittingMetrics", + ) + has_drift = proto.Field( + proto.BOOL, + number=4, + ) + time_series_id = proto.Field( + proto.STRING, + number=5, + ) + time_series_ids = proto.RepeatedField( + proto.STRING, + number=10, ) - has_drift = proto.Field(proto.BOOL, number=4,) - time_series_id = proto.Field(proto.STRING, number=5,) - time_series_ids = proto.RepeatedField(proto.STRING, number=10,) seasonal_periods = proto.RepeatedField( proto.ENUM, number=6, enum="Model.SeasonalPeriod.SeasonalPeriodType", ) has_holiday_effect = proto.Field( - proto.MESSAGE, number=7, message=wrappers_pb2.BoolValue, + proto.MESSAGE, + number=7, + message=wrappers_pb2.BoolValue, ) has_spikes_and_dips = proto.Field( - proto.MESSAGE, number=8, message=wrappers_pb2.BoolValue, + proto.MESSAGE, + number=8, + message=wrappers_pb2.BoolValue, ) has_step_changes = proto.Field( - proto.MESSAGE, number=9, message=wrappers_pb2.BoolValue, + proto.MESSAGE, + number=9, + message=wrappers_pb2.BoolValue, ) arima_model_info = proto.RepeatedField( @@ -1391,18 +1706,29 @@ class ArimaModelInfo(proto.Message): ) index = proto.Field( - proto.MESSAGE, number=1, message=wrappers_pb2.Int32Value, + proto.MESSAGE, + number=1, + message=wrappers_pb2.Int32Value, ) duration_ms = proto.Field( - proto.MESSAGE, number=4, message=wrappers_pb2.Int64Value, + proto.MESSAGE, + number=4, + message=wrappers_pb2.Int64Value, ) training_loss = proto.Field( - proto.MESSAGE, number=5, message=wrappers_pb2.DoubleValue, + proto.MESSAGE, + number=5, + message=wrappers_pb2.DoubleValue, ) eval_loss = proto.Field( - proto.MESSAGE, number=6, message=wrappers_pb2.DoubleValue, + proto.MESSAGE, + number=6, + message=wrappers_pb2.DoubleValue, + ) + learn_rate = proto.Field( + proto.DOUBLE, + number=7, ) - learn_rate = proto.Field(proto.DOUBLE, number=7,) cluster_infos = proto.RepeatedField( proto.MESSAGE, number=8, @@ -1415,47 +1741,103 @@ class ArimaModelInfo(proto.Message): ) training_options = proto.Field( - proto.MESSAGE, number=1, message="Model.TrainingRun.TrainingOptions", + proto.MESSAGE, + number=1, + message="Model.TrainingRun.TrainingOptions", ) start_time = proto.Field( - proto.MESSAGE, number=8, message=timestamp_pb2.Timestamp, + proto.MESSAGE, + number=8, + message=timestamp_pb2.Timestamp, ) results = proto.RepeatedField( - proto.MESSAGE, number=6, message="Model.TrainingRun.IterationResult", + proto.MESSAGE, + number=6, + message="Model.TrainingRun.IterationResult", ) evaluation_metrics = proto.Field( - proto.MESSAGE, number=7, message="Model.EvaluationMetrics", + proto.MESSAGE, + number=7, + message="Model.EvaluationMetrics", ) data_split_result = proto.Field( - proto.MESSAGE, number=9, message="Model.DataSplitResult", + proto.MESSAGE, + number=9, + message="Model.DataSplitResult", ) global_explanations = proto.RepeatedField( - proto.MESSAGE, number=10, message="Model.GlobalExplanation", + proto.MESSAGE, + number=10, + message="Model.GlobalExplanation", ) - etag = proto.Field(proto.STRING, number=1,) + etag = proto.Field( + proto.STRING, + number=1, + ) model_reference = proto.Field( - proto.MESSAGE, number=2, message=gcb_model_reference.ModelReference, + proto.MESSAGE, + number=2, + message=gcb_model_reference.ModelReference, + ) + creation_time = proto.Field( + proto.INT64, + number=5, + ) + last_modified_time = proto.Field( + proto.INT64, + number=6, + ) + description = proto.Field( + proto.STRING, + number=12, + ) + friendly_name = proto.Field( + proto.STRING, + number=14, + ) + labels = proto.MapField( + proto.STRING, + proto.STRING, + number=15, + ) + expiration_time = proto.Field( + proto.INT64, + number=16, + ) + location = proto.Field( + proto.STRING, + number=13, ) - creation_time = proto.Field(proto.INT64, number=5,) - last_modified_time = proto.Field(proto.INT64, number=6,) - description = proto.Field(proto.STRING, number=12,) - friendly_name = proto.Field(proto.STRING, number=14,) - labels = proto.MapField(proto.STRING, proto.STRING, number=15,) - expiration_time = proto.Field(proto.INT64, number=16,) - location = proto.Field(proto.STRING, number=13,) encryption_configuration = proto.Field( - proto.MESSAGE, number=17, message=encryption_config.EncryptionConfiguration, + proto.MESSAGE, + number=17, + message=encryption_config.EncryptionConfiguration, + ) + model_type = proto.Field( + proto.ENUM, + number=7, + enum=ModelType, + ) + training_runs = proto.RepeatedField( + proto.MESSAGE, + number=9, + message=TrainingRun, ) - model_type = proto.Field(proto.ENUM, number=7, enum=ModelType,) - training_runs = proto.RepeatedField(proto.MESSAGE, number=9, message=TrainingRun,) feature_columns = proto.RepeatedField( - proto.MESSAGE, number=10, message=standard_sql.StandardSqlField, + proto.MESSAGE, + number=10, + message=standard_sql.StandardSqlField, ) label_columns = proto.RepeatedField( - proto.MESSAGE, number=11, message=standard_sql.StandardSqlField, + proto.MESSAGE, + number=11, + message=standard_sql.StandardSqlField, + ) + best_trial_id = proto.Field( + proto.INT64, + number=19, ) - best_trial_id = proto.Field(proto.INT64, number=19,) class GetModelRequest(proto.Message): @@ -1470,9 +1852,18 @@ class GetModelRequest(proto.Message): Required. Model ID of the requested model. """ - project_id = proto.Field(proto.STRING, number=1,) - dataset_id = proto.Field(proto.STRING, number=2,) - model_id = proto.Field(proto.STRING, number=3,) + project_id = proto.Field( + proto.STRING, + number=1, + ) + dataset_id = proto.Field( + proto.STRING, + number=2, + ) + model_id = proto.Field( + proto.STRING, + number=3, + ) class PatchModelRequest(proto.Message): @@ -1492,10 +1883,23 @@ class PatchModelRequest(proto.Message): set to default value. """ - project_id = proto.Field(proto.STRING, number=1,) - dataset_id = proto.Field(proto.STRING, number=2,) - model_id = proto.Field(proto.STRING, number=3,) - model = proto.Field(proto.MESSAGE, number=4, message="Model",) + project_id = proto.Field( + proto.STRING, + number=1, + ) + dataset_id = proto.Field( + proto.STRING, + number=2, + ) + model_id = proto.Field( + proto.STRING, + number=3, + ) + model = proto.Field( + proto.MESSAGE, + number=4, + message="Model", + ) class DeleteModelRequest(proto.Message): @@ -1510,9 +1914,18 @@ class DeleteModelRequest(proto.Message): Required. Model ID of the model to delete. """ - project_id = proto.Field(proto.STRING, number=1,) - dataset_id = proto.Field(proto.STRING, number=2,) - model_id = proto.Field(proto.STRING, number=3,) + project_id = proto.Field( + proto.STRING, + number=1, + ) + dataset_id = proto.Field( + proto.STRING, + number=2, + ) + model_id = proto.Field( + proto.STRING, + number=3, + ) class ListModelsRequest(proto.Message): @@ -1532,12 +1945,23 @@ class ListModelsRequest(proto.Message): request the next page of results """ - project_id = proto.Field(proto.STRING, number=1,) - dataset_id = proto.Field(proto.STRING, number=2,) + project_id = proto.Field( + proto.STRING, + number=1, + ) + dataset_id = proto.Field( + proto.STRING, + number=2, + ) max_results = proto.Field( - proto.MESSAGE, number=3, message=wrappers_pb2.UInt32Value, + proto.MESSAGE, + number=3, + message=wrappers_pb2.UInt32Value, + ) + page_token = proto.Field( + proto.STRING, + number=4, ) - page_token = proto.Field(proto.STRING, number=4,) class ListModelsResponse(proto.Message): @@ -1556,8 +1980,15 @@ class ListModelsResponse(proto.Message): def raw_page(self): return self - models = proto.RepeatedField(proto.MESSAGE, number=1, message="Model",) - next_page_token = proto.Field(proto.STRING, number=2,) + models = proto.RepeatedField( + proto.MESSAGE, + number=1, + message="Model", + ) + next_page_token = proto.Field( + proto.STRING, + number=2, + ) __all__ = tuple(sorted(__protobuf__.manifest)) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery_v2/types/model_reference.py b/packages/google-cloud-bigquery/google/cloud/bigquery_v2/types/model_reference.py index a4aab59f7227..cde139ebe5cd 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery_v2/types/model_reference.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery_v2/types/model_reference.py @@ -17,7 +17,10 @@ __protobuf__ = proto.module( - package="google.cloud.bigquery.v2", manifest={"ModelReference",}, + package="google.cloud.bigquery.v2", + manifest={ + "ModelReference", + }, ) @@ -37,9 +40,18 @@ class ModelReference(proto.Message): maximum length is 1,024 characters. """ - project_id = proto.Field(proto.STRING, number=1,) - dataset_id = proto.Field(proto.STRING, number=2,) - model_id = proto.Field(proto.STRING, number=3,) + project_id = proto.Field( + proto.STRING, + number=1, + ) + dataset_id = proto.Field( + proto.STRING, + number=2, + ) + model_id = proto.Field( + proto.STRING, + number=3, + ) __all__ = tuple(sorted(__protobuf__.manifest)) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery_v2/types/standard_sql.py b/packages/google-cloud-bigquery/google/cloud/bigquery_v2/types/standard_sql.py index 579c3b138110..3be5304fc153 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery_v2/types/standard_sql.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery_v2/types/standard_sql.py @@ -78,12 +78,22 @@ class TypeKind(proto.Enum): ARRAY = 16 STRUCT = 17 - type_kind = proto.Field(proto.ENUM, number=1, enum=TypeKind,) + type_kind = proto.Field( + proto.ENUM, + number=1, + enum=TypeKind, + ) array_element_type = proto.Field( - proto.MESSAGE, number=2, oneof="sub_type", message="StandardSqlDataType", + proto.MESSAGE, + number=2, + oneof="sub_type", + message="StandardSqlDataType", ) struct_type = proto.Field( - proto.MESSAGE, number=3, oneof="sub_type", message="StandardSqlStructType", + proto.MESSAGE, + number=3, + oneof="sub_type", + message="StandardSqlStructType", ) @@ -102,8 +112,15 @@ class StandardSqlField(proto.Message): this "type" field). """ - name = proto.Field(proto.STRING, number=1,) - type = proto.Field(proto.MESSAGE, number=2, message="StandardSqlDataType",) + name = proto.Field( + proto.STRING, + number=1, + ) + type = proto.Field( + proto.MESSAGE, + number=2, + message="StandardSqlDataType", + ) class StandardSqlStructType(proto.Message): @@ -114,7 +131,11 @@ class StandardSqlStructType(proto.Message): """ - fields = proto.RepeatedField(proto.MESSAGE, number=1, message="StandardSqlField",) + fields = proto.RepeatedField( + proto.MESSAGE, + number=1, + message="StandardSqlField", + ) class StandardSqlTableType(proto.Message): @@ -125,7 +146,11 @@ class StandardSqlTableType(proto.Message): The columns in this table type """ - columns = proto.RepeatedField(proto.MESSAGE, number=1, message="StandardSqlField",) + columns = proto.RepeatedField( + proto.MESSAGE, + number=1, + message="StandardSqlField", + ) __all__ = tuple(sorted(__protobuf__.manifest)) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery_v2/types/table_reference.py b/packages/google-cloud-bigquery/google/cloud/bigquery_v2/types/table_reference.py index aaab83e85ecb..c02eb206f364 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery_v2/types/table_reference.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery_v2/types/table_reference.py @@ -17,7 +17,10 @@ __protobuf__ = proto.module( - package="google.cloud.bigquery.v2", manifest={"TableReference",}, + package="google.cloud.bigquery.v2", + manifest={ + "TableReference", + }, ) @@ -48,12 +51,30 @@ class TableReference(proto.Message): to translate the received data to the project_id field. """ - project_id = proto.Field(proto.STRING, number=1,) - dataset_id = proto.Field(proto.STRING, number=2,) - table_id = proto.Field(proto.STRING, number=3,) - project_id_alternative = proto.RepeatedField(proto.STRING, number=4,) - dataset_id_alternative = proto.RepeatedField(proto.STRING, number=5,) - table_id_alternative = proto.RepeatedField(proto.STRING, number=6,) + project_id = proto.Field( + proto.STRING, + number=1, + ) + dataset_id = proto.Field( + proto.STRING, + number=2, + ) + table_id = proto.Field( + proto.STRING, + number=3, + ) + project_id_alternative = proto.RepeatedField( + proto.STRING, + number=4, + ) + dataset_id_alternative = proto.RepeatedField( + proto.STRING, + number=5, + ) + table_id_alternative = proto.RepeatedField( + proto.STRING, + number=6, + ) __all__ = tuple(sorted(__protobuf__.manifest)) diff --git a/packages/google-cloud-bigquery/noxfile.py b/packages/google-cloud-bigquery/noxfile.py index 6304747d178c..8d1cb056c188 100644 --- a/packages/google-cloud-bigquery/noxfile.py +++ b/packages/google-cloud-bigquery/noxfile.py @@ -24,7 +24,7 @@ MYPY_VERSION = "mypy==0.910" PYTYPE_VERSION = "pytype==2021.4.9" -BLACK_VERSION = "black==19.10b0" +BLACK_VERSION = "black==22.3.0" BLACK_PATHS = ("docs", "google", "samples", "tests", "noxfile.py", "setup.py") DEFAULT_PYTHON_VERSION = "3.8" @@ -122,7 +122,10 @@ def mypy(session): # Just install the dependencies' type info directly, since "mypy --install-types" # might require an additional pass. session.install( - "types-protobuf", "types-python-dateutil", "types-requests", "types-setuptools", + "types-protobuf", + "types-python-dateutil", + "types-requests", + "types-setuptools", ) session.run("mypy", "google/cloud") diff --git a/packages/google-cloud-bigquery/samples/geography/noxfile.py b/packages/google-cloud-bigquery/samples/geography/noxfile.py index 85f5836dba3a..25f87a215d4c 100644 --- a/packages/google-cloud-bigquery/samples/geography/noxfile.py +++ b/packages/google-cloud-bigquery/samples/geography/noxfile.py @@ -29,7 +29,7 @@ # WARNING - WARNING - WARNING - WARNING - WARNING # WARNING - WARNING - WARNING - WARNING - WARNING -BLACK_VERSION = "black==19.10b0" +BLACK_VERSION = "black==22.3.0" # Copy `noxfile_config.py` to your directory and modify it instead. @@ -253,7 +253,7 @@ def py(session: nox.sessions.Session) -> None: def _get_repo_root() -> Optional[str]: - """ Returns the root folder of the project. """ + """Returns the root folder of the project.""" # Get root of this repository. Assume we don't have directories nested deeper than 10 items. p = Path(os.getcwd()) for i in range(10): diff --git a/packages/google-cloud-bigquery/samples/load_table_dataframe.py b/packages/google-cloud-bigquery/samples/load_table_dataframe.py index 91dd6e9f09fc..b75224d11f6b 100644 --- a/packages/google-cloud-bigquery/samples/load_table_dataframe.py +++ b/packages/google-cloud-bigquery/samples/load_table_dataframe.py @@ -30,7 +30,7 @@ def load_table_dataframe(table_id): records = [ { - "title": u"The Meaning of Life", + "title": "The Meaning of Life", "release_year": 1983, "length_minutes": 112.5, "release_date": pytz.timezone("Europe/Paris") @@ -40,7 +40,7 @@ def load_table_dataframe(table_id): "dvd_release": datetime.datetime(2002, 1, 22, 7, 0, 0), }, { - "title": u"Monty Python and the Holy Grail", + "title": "Monty Python and the Holy Grail", "release_year": 1975, "length_minutes": 91.5, "release_date": pytz.timezone("Europe/London") @@ -49,7 +49,7 @@ def load_table_dataframe(table_id): "dvd_release": datetime.datetime(2002, 7, 16, 9, 0, 0), }, { - "title": u"Life of Brian", + "title": "Life of Brian", "release_year": 1979, "length_minutes": 94.25, "release_date": pytz.timezone("America/New_York") @@ -58,7 +58,7 @@ def load_table_dataframe(table_id): "dvd_release": datetime.datetime(2008, 1, 14, 8, 0, 0), }, { - "title": u"And Now for Something Completely Different", + "title": "And Now for Something Completely Different", "release_year": 1971, "length_minutes": 88.0, "release_date": pytz.timezone("Europe/London") @@ -81,7 +81,7 @@ def load_table_dataframe(table_id): # Optionally, set a named index, which can also be written to the # BigQuery table. index=pandas.Index( - [u"Q24980", u"Q25043", u"Q24953", u"Q16403"], name="wikidata_id" + ["Q24980", "Q25043", "Q24953", "Q16403"], name="wikidata_id" ), ) job_config = bigquery.LoadJobConfig( diff --git a/packages/google-cloud-bigquery/samples/load_table_file.py b/packages/google-cloud-bigquery/samples/load_table_file.py index b7e45dac3a30..41f0bf98473b 100644 --- a/packages/google-cloud-bigquery/samples/load_table_file.py +++ b/packages/google-cloud-bigquery/samples/load_table_file.py @@ -25,7 +25,9 @@ def load_table_file(file_path, table_id): # table_id = "your-project.your_dataset.your_table_name" job_config = bigquery.LoadJobConfig( - source_format=bigquery.SourceFormat.CSV, skip_leading_rows=1, autodetect=True, + source_format=bigquery.SourceFormat.CSV, + skip_leading_rows=1, + autodetect=True, ) with open(file_path, "rb") as source_file: diff --git a/packages/google-cloud-bigquery/samples/load_table_uri_parquet.py b/packages/google-cloud-bigquery/samples/load_table_uri_parquet.py index 3dce5e8efda8..9df2ab1e7f02 100644 --- a/packages/google-cloud-bigquery/samples/load_table_uri_parquet.py +++ b/packages/google-cloud-bigquery/samples/load_table_uri_parquet.py @@ -23,7 +23,9 @@ def load_table_uri_parquet(table_id): # TODO(developer): Set table_id to the ID of the table to create. # table_id = "your-project.your_dataset.your_table_name" - job_config = bigquery.LoadJobConfig(source_format=bigquery.SourceFormat.PARQUET,) + job_config = bigquery.LoadJobConfig( + source_format=bigquery.SourceFormat.PARQUET, + ) uri = "gs://cloud-samples-data/bigquery/us-states/us-states.parquet" load_job = client.load_table_from_uri( diff --git a/packages/google-cloud-bigquery/samples/magics/noxfile.py b/packages/google-cloud-bigquery/samples/magics/noxfile.py index 85f5836dba3a..25f87a215d4c 100644 --- a/packages/google-cloud-bigquery/samples/magics/noxfile.py +++ b/packages/google-cloud-bigquery/samples/magics/noxfile.py @@ -29,7 +29,7 @@ # WARNING - WARNING - WARNING - WARNING - WARNING # WARNING - WARNING - WARNING - WARNING - WARNING -BLACK_VERSION = "black==19.10b0" +BLACK_VERSION = "black==22.3.0" # Copy `noxfile_config.py` to your directory and modify it instead. @@ -253,7 +253,7 @@ def py(session: nox.sessions.Session) -> None: def _get_repo_root() -> Optional[str]: - """ Returns the root folder of the project. """ + """Returns the root folder of the project.""" # Get root of this repository. Assume we don't have directories nested deeper than 10 items. p = Path(os.getcwd()) for i in range(10): diff --git a/packages/google-cloud-bigquery/samples/snippets/authenticate_service_account.py b/packages/google-cloud-bigquery/samples/snippets/authenticate_service_account.py index c07848beea47..fa3c53cda066 100644 --- a/packages/google-cloud-bigquery/samples/snippets/authenticate_service_account.py +++ b/packages/google-cloud-bigquery/samples/snippets/authenticate_service_account.py @@ -27,7 +27,8 @@ def main(): # key_path = "path/to/service_account.json" credentials = service_account.Credentials.from_service_account_file( - key_path, scopes=["https://www.googleapis.com/auth/cloud-platform"], + key_path, + scopes=["https://www.googleapis.com/auth/cloud-platform"], ) # Alternatively, use service_account.Credentials.from_service_account_info() @@ -35,7 +36,10 @@ def main(): # TODO(developer): Set key_json to the content of the service account key file. # credentials = service_account.Credentials.from_service_account_info(key_json) - client = bigquery.Client(credentials=credentials, project=credentials.project_id,) + client = bigquery.Client( + credentials=credentials, + project=credentials.project_id, + ) # [END bigquery_client_json_credentials] return client diff --git a/packages/google-cloud-bigquery/samples/snippets/create_table_external_hive_partitioned_test.py b/packages/google-cloud-bigquery/samples/snippets/create_table_external_hive_partitioned_test.py index c3cdddb5565e..fccc2d408080 100644 --- a/packages/google-cloud-bigquery/samples/snippets/create_table_external_hive_partitioned_test.py +++ b/packages/google-cloud-bigquery/samples/snippets/create_table_external_hive_partitioned_test.py @@ -16,8 +16,10 @@ def test_create_table_external_hive_partitioned(capsys, random_table_id): - table = create_table_external_hive_partitioned.create_table_external_hive_partitioned( - random_table_id + table = ( + create_table_external_hive_partitioned.create_table_external_hive_partitioned( + random_table_id + ) ) out, _ = capsys.readouterr() diff --git a/packages/google-cloud-bigquery/samples/snippets/delete_job_test.py b/packages/google-cloud-bigquery/samples/snippets/delete_job_test.py index c9baa817d63d..fb407ab4b06a 100644 --- a/packages/google-cloud-bigquery/samples/snippets/delete_job_test.py +++ b/packages/google-cloud-bigquery/samples/snippets/delete_job_test.py @@ -21,7 +21,8 @@ def test_delete_job_metadata( capsys, bigquery_client: bigquery.Client, table_id_us_east1: str ): query_job: bigquery.QueryJob = bigquery_client.query( - f"SELECT COUNT(*) FROM `{table_id_us_east1}`", location="us-east1", + f"SELECT COUNT(*) FROM `{table_id_us_east1}`", + location="us-east1", ) query_job.result() assert query_job.job_id is not None diff --git a/packages/google-cloud-bigquery/samples/snippets/manage_job_cancel.py b/packages/google-cloud-bigquery/samples/snippets/manage_job_cancel.py index 3e0fc5218fdd..c08a32addfbd 100644 --- a/packages/google-cloud-bigquery/samples/snippets/manage_job_cancel.py +++ b/packages/google-cloud-bigquery/samples/snippets/manage_job_cancel.py @@ -17,7 +17,9 @@ def cancel_job( - client: bigquery.Client, location: str = "us", job_id: str = "abcd-efgh-ijkl-mnop", + client: bigquery.Client, + location: str = "us", + job_id: str = "abcd-efgh-ijkl-mnop", ): job = client.cancel_job(job_id, location=location) print(f"{job.location}:{job.job_id} cancelled") diff --git a/packages/google-cloud-bigquery/samples/snippets/manage_job_get.py b/packages/google-cloud-bigquery/samples/snippets/manage_job_get.py index 256d79e5b4f8..cb54fd7bb09f 100644 --- a/packages/google-cloud-bigquery/samples/snippets/manage_job_get.py +++ b/packages/google-cloud-bigquery/samples/snippets/manage_job_get.py @@ -17,7 +17,9 @@ def get_job( - client: bigquery.Client, location: str = "us", job_id: str = "abcd-efgh-ijkl-mnop", + client: bigquery.Client, + location: str = "us", + job_id: str = "abcd-efgh-ijkl-mnop", ): job = client.get_job(job_id, location=location) diff --git a/packages/google-cloud-bigquery/samples/snippets/noxfile.py b/packages/google-cloud-bigquery/samples/snippets/noxfile.py index 85f5836dba3a..25f87a215d4c 100644 --- a/packages/google-cloud-bigquery/samples/snippets/noxfile.py +++ b/packages/google-cloud-bigquery/samples/snippets/noxfile.py @@ -29,7 +29,7 @@ # WARNING - WARNING - WARNING - WARNING - WARNING # WARNING - WARNING - WARNING - WARNING - WARNING -BLACK_VERSION = "black==19.10b0" +BLACK_VERSION = "black==22.3.0" # Copy `noxfile_config.py` to your directory and modify it instead. @@ -253,7 +253,7 @@ def py(session: nox.sessions.Session) -> None: def _get_repo_root() -> Optional[str]: - """ Returns the root folder of the project. """ + """Returns the root folder of the project.""" # Get root of this repository. Assume we don't have directories nested deeper than 10 items. p = Path(os.getcwd()) for i in range(10): diff --git a/packages/google-cloud-bigquery/samples/snippets/update_dataset_access.py b/packages/google-cloud-bigquery/samples/snippets/update_dataset_access.py index 1448213a62c6..a606a2d56a83 100644 --- a/packages/google-cloud-bigquery/samples/snippets/update_dataset_access.py +++ b/packages/google-cloud-bigquery/samples/snippets/update_dataset_access.py @@ -59,7 +59,11 @@ def update_dataset_access(dataset_id: str, entity_id: str): entries = list(dataset.access_entries) entries.append( - bigquery.AccessEntry(role=role, entity_type=entity_type, entity_id=entity_id,) + bigquery.AccessEntry( + role=role, + entity_type=entity_type, + entity_id=entity_id, + ) ) dataset.access_entries = entries diff --git a/packages/google-cloud-bigquery/samples/table_insert_rows.py b/packages/google-cloud-bigquery/samples/table_insert_rows.py index 24d73987175b..80048b411d86 100644 --- a/packages/google-cloud-bigquery/samples/table_insert_rows.py +++ b/packages/google-cloud-bigquery/samples/table_insert_rows.py @@ -25,8 +25,8 @@ def table_insert_rows(table_id): # table_id = "your-project.your_dataset.your_table" rows_to_insert = [ - {u"full_name": u"Phred Phlyntstone", u"age": 32}, - {u"full_name": u"Wylma Phlyntstone", u"age": 29}, + {"full_name": "Phred Phlyntstone", "age": 32}, + {"full_name": "Wylma Phlyntstone", "age": 29}, ] errors = client.insert_rows_json(table_id, rows_to_insert) # Make an API request. diff --git a/packages/google-cloud-bigquery/samples/table_insert_rows_explicit_none_insert_ids.py b/packages/google-cloud-bigquery/samples/table_insert_rows_explicit_none_insert_ids.py index d91792b82233..202064bda598 100644 --- a/packages/google-cloud-bigquery/samples/table_insert_rows_explicit_none_insert_ids.py +++ b/packages/google-cloud-bigquery/samples/table_insert_rows_explicit_none_insert_ids.py @@ -25,8 +25,8 @@ def table_insert_rows_explicit_none_insert_ids(table_id): # table_id = "your-project.your_dataset.your_table" rows_to_insert = [ - {u"full_name": u"Phred Phlyntstone", u"age": 32}, - {u"full_name": u"Wylma Phlyntstone", u"age": 29}, + {"full_name": "Phred Phlyntstone", "age": 32}, + {"full_name": "Wylma Phlyntstone", "age": 29}, ] errors = client.insert_rows_json( diff --git a/packages/google-cloud-bigquery/samples/tests/test_client_query.py b/packages/google-cloud-bigquery/samples/tests/test_client_query.py index 810c46a17943..673ed2b668e1 100644 --- a/packages/google-cloud-bigquery/samples/tests/test_client_query.py +++ b/packages/google-cloud-bigquery/samples/tests/test_client_query.py @@ -15,7 +15,9 @@ from .. import client_query -def test_client_query(capsys,): +def test_client_query( + capsys, +): client_query.client_query() out, err = capsys.readouterr() diff --git a/packages/google-cloud-bigquery/samples/tests/test_client_query_batch.py b/packages/google-cloud-bigquery/samples/tests/test_client_query_batch.py index c5e19985dda9..3335950ade5a 100644 --- a/packages/google-cloud-bigquery/samples/tests/test_client_query_batch.py +++ b/packages/google-cloud-bigquery/samples/tests/test_client_query_batch.py @@ -15,7 +15,9 @@ from .. import client_query_batch -def test_client_query_batch(capsys,): +def test_client_query_batch( + capsys, +): job = client_query_batch.client_query_batch() out, err = capsys.readouterr() diff --git a/packages/google-cloud-bigquery/samples/tests/test_client_query_dry_run.py b/packages/google-cloud-bigquery/samples/tests/test_client_query_dry_run.py index 5cbf2e3fae6e..2141435f253c 100644 --- a/packages/google-cloud-bigquery/samples/tests/test_client_query_dry_run.py +++ b/packages/google-cloud-bigquery/samples/tests/test_client_query_dry_run.py @@ -15,7 +15,9 @@ from .. import client_query_dry_run -def test_client_query_dry_run(capsys,): +def test_client_query_dry_run( + capsys, +): query_job = client_query_dry_run.client_query_dry_run() out, err = capsys.readouterr() diff --git a/packages/google-cloud-bigquery/samples/tests/test_client_query_legacy_sql.py b/packages/google-cloud-bigquery/samples/tests/test_client_query_legacy_sql.py index ab240fad1a1e..9d3f8ab99a55 100644 --- a/packages/google-cloud-bigquery/samples/tests/test_client_query_legacy_sql.py +++ b/packages/google-cloud-bigquery/samples/tests/test_client_query_legacy_sql.py @@ -17,7 +17,9 @@ from .. import client_query_legacy_sql -def test_client_query_legacy_sql(capsys,): +def test_client_query_legacy_sql( + capsys, +): client_query_legacy_sql.client_query_legacy_sql() out, err = capsys.readouterr() diff --git a/packages/google-cloud-bigquery/samples/tests/test_client_query_w_array_params.py b/packages/google-cloud-bigquery/samples/tests/test_client_query_w_array_params.py index 07e0294e93d1..6608ff0a4547 100644 --- a/packages/google-cloud-bigquery/samples/tests/test_client_query_w_array_params.py +++ b/packages/google-cloud-bigquery/samples/tests/test_client_query_w_array_params.py @@ -15,7 +15,9 @@ from .. import client_query_w_array_params -def test_client_query_w_array_params(capsys,): +def test_client_query_w_array_params( + capsys, +): client_query_w_array_params.client_query_w_array_params() out, err = capsys.readouterr() diff --git a/packages/google-cloud-bigquery/samples/tests/test_client_query_w_named_params.py b/packages/google-cloud-bigquery/samples/tests/test_client_query_w_named_params.py index 2970dfdc47bd..f53f72fdfa2a 100644 --- a/packages/google-cloud-bigquery/samples/tests/test_client_query_w_named_params.py +++ b/packages/google-cloud-bigquery/samples/tests/test_client_query_w_named_params.py @@ -15,7 +15,9 @@ from .. import client_query_w_named_params -def test_client_query_w_named_params(capsys,): +def test_client_query_w_named_params( + capsys, +): client_query_w_named_params.client_query_w_named_params() out, err = capsys.readouterr() diff --git a/packages/google-cloud-bigquery/samples/tests/test_client_query_w_positional_params.py b/packages/google-cloud-bigquery/samples/tests/test_client_query_w_positional_params.py index e41ffa825584..c91b10f211e8 100644 --- a/packages/google-cloud-bigquery/samples/tests/test_client_query_w_positional_params.py +++ b/packages/google-cloud-bigquery/samples/tests/test_client_query_w_positional_params.py @@ -15,7 +15,9 @@ from .. import client_query_w_positional_params -def test_client_query_w_positional_params(capsys,): +def test_client_query_w_positional_params( + capsys, +): client_query_w_positional_params.client_query_w_positional_params() out, err = capsys.readouterr() diff --git a/packages/google-cloud-bigquery/samples/tests/test_client_query_w_struct_params.py b/packages/google-cloud-bigquery/samples/tests/test_client_query_w_struct_params.py index 03083a3a72c7..dfb86fb654cd 100644 --- a/packages/google-cloud-bigquery/samples/tests/test_client_query_w_struct_params.py +++ b/packages/google-cloud-bigquery/samples/tests/test_client_query_w_struct_params.py @@ -15,7 +15,9 @@ from .. import client_query_w_struct_params -def test_client_query_w_struct_params(capsys,): +def test_client_query_w_struct_params( + capsys, +): client_query_w_struct_params.client_query_w_struct_params() out, err = capsys.readouterr() diff --git a/packages/google-cloud-bigquery/samples/tests/test_client_query_w_timestamp_params.py b/packages/google-cloud-bigquery/samples/tests/test_client_query_w_timestamp_params.py index 9dddcb9a0e5d..51dfa129613b 100644 --- a/packages/google-cloud-bigquery/samples/tests/test_client_query_w_timestamp_params.py +++ b/packages/google-cloud-bigquery/samples/tests/test_client_query_w_timestamp_params.py @@ -15,7 +15,9 @@ from .. import client_query_w_timestamp_params -def test_client_query_w_timestamp_params(capsys,): +def test_client_query_w_timestamp_params( + capsys, +): client_query_w_timestamp_params.client_query_w_timestamp_params() out, err = capsys.readouterr() diff --git a/packages/google-cloud-bigquery/samples/tests/test_load_table_dataframe.py b/packages/google-cloud-bigquery/samples/tests/test_load_table_dataframe.py index 2286660469ff..6528edc98599 100644 --- a/packages/google-cloud-bigquery/samples/tests/test_load_table_dataframe.py +++ b/packages/google-cloud-bigquery/samples/tests/test_load_table_dataframe.py @@ -50,10 +50,10 @@ def test_load_table_dataframe(capsys, client, random_table_id): df = client.list_rows(table).to_dataframe() df.sort_values("release_year", inplace=True) assert df["title"].tolist() == [ - u"And Now for Something Completely Different", - u"Monty Python and the Holy Grail", - u"Life of Brian", - u"The Meaning of Life", + "And Now for Something Completely Different", + "Monty Python and the Holy Grail", + "Life of Brian", + "The Meaning of Life", ] assert df["release_year"].tolist() == [1971, 1975, 1979, 1983] assert df["length_minutes"].tolist() == [88.0, 91.5, 94.25, 112.5] @@ -69,4 +69,4 @@ def test_load_table_dataframe(capsys, client, random_table_id): pandas.Timestamp("2008-01-14T08:00:00+00:00"), pandas.Timestamp("2002-01-22T07:00:00+00:00"), ] - assert df["wikidata_id"].tolist() == [u"Q16403", u"Q25043", u"Q24953", u"Q24980"] + assert df["wikidata_id"].tolist() == ["Q16403", "Q25043", "Q24953", "Q24980"] diff --git a/packages/google-cloud-bigquery/samples/tests/test_query_external_gcs_temporary_table.py b/packages/google-cloud-bigquery/samples/tests/test_query_external_gcs_temporary_table.py index 022b327be21b..e6a825233266 100644 --- a/packages/google-cloud-bigquery/samples/tests/test_query_external_gcs_temporary_table.py +++ b/packages/google-cloud-bigquery/samples/tests/test_query_external_gcs_temporary_table.py @@ -15,7 +15,9 @@ from .. import query_external_gcs_temporary_table -def test_query_external_gcs_temporary_table(capsys,): +def test_query_external_gcs_temporary_table( + capsys, +): query_external_gcs_temporary_table.query_external_gcs_temporary_table() out, err = capsys.readouterr() diff --git a/packages/google-cloud-bigquery/samples/tests/test_query_no_cache.py b/packages/google-cloud-bigquery/samples/tests/test_query_no_cache.py index df17d0d0b04f..f72bee3f73c4 100644 --- a/packages/google-cloud-bigquery/samples/tests/test_query_no_cache.py +++ b/packages/google-cloud-bigquery/samples/tests/test_query_no_cache.py @@ -17,7 +17,9 @@ from .. import query_no_cache -def test_query_no_cache(capsys,): +def test_query_no_cache( + capsys, +): query_no_cache.query_no_cache() out, err = capsys.readouterr() diff --git a/packages/google-cloud-bigquery/samples/tests/test_query_pagination.py b/packages/google-cloud-bigquery/samples/tests/test_query_pagination.py index 7ab049c8ce7c..eb1ca4b2c19b 100644 --- a/packages/google-cloud-bigquery/samples/tests/test_query_pagination.py +++ b/packages/google-cloud-bigquery/samples/tests/test_query_pagination.py @@ -15,7 +15,9 @@ from .. import query_pagination -def test_query_pagination(capsys,): +def test_query_pagination( + capsys, +): query_pagination.query_pagination() out, _ = capsys.readouterr() diff --git a/packages/google-cloud-bigquery/samples/tests/test_query_script.py b/packages/google-cloud-bigquery/samples/tests/test_query_script.py index 037664d369ee..2c75478736e6 100644 --- a/packages/google-cloud-bigquery/samples/tests/test_query_script.py +++ b/packages/google-cloud-bigquery/samples/tests/test_query_script.py @@ -15,7 +15,9 @@ from .. import query_script -def test_query_script(capsys,): +def test_query_script( + capsys, +): query_script.query_script() out, _ = capsys.readouterr() diff --git a/packages/google-cloud-bigquery/samples/tests/test_query_to_arrow.py b/packages/google-cloud-bigquery/samples/tests/test_query_to_arrow.py index f14ce55613ac..9511def58b74 100644 --- a/packages/google-cloud-bigquery/samples/tests/test_query_to_arrow.py +++ b/packages/google-cloud-bigquery/samples/tests/test_query_to_arrow.py @@ -19,7 +19,9 @@ pyarrow = pytest.importorskip("pyarrow") -def test_query_to_arrow(capsys,): +def test_query_to_arrow( + capsys, +): arrow_table = query_to_arrow.query_to_arrow() out, err = capsys.readouterr() diff --git a/packages/google-cloud-bigquery/tests/system/helpers.py b/packages/google-cloud-bigquery/tests/system/helpers.py index 76e609345a10..721f55040349 100644 --- a/packages/google-cloud-bigquery/tests/system/helpers.py +++ b/packages/google-cloud-bigquery/tests/system/helpers.py @@ -39,9 +39,15 @@ ('SELECT "ABC"', "ABC"), ('SELECT CAST("foo" AS BYTES)', b"foo"), ('SELECT TIMESTAMP "%s"' % (_stamp,), _zoned), - ('SELECT TIMESTAMP "%s"' % (_stamp_microseconds,), _zoned_microseconds,), + ( + 'SELECT TIMESTAMP "%s"' % (_stamp_microseconds,), + _zoned_microseconds, + ), ('SELECT DATETIME(TIMESTAMP "%s")' % (_stamp,), _naive), - ('SELECT DATETIME(TIMESTAMP "%s")' % (_stamp_microseconds,), _naive_microseconds,), + ( + 'SELECT DATETIME(TIMESTAMP "%s")' % (_stamp_microseconds,), + _naive_microseconds, + ), ('SELECT DATE(TIMESTAMP "%s")' % (_stamp,), _naive.date()), ('SELECT TIME(TIMESTAMP "%s")' % (_stamp,), _naive.time()), ('SELECT NUMERIC "%s"' % (_numeric,), _numeric), @@ -90,5 +96,6 @@ def _rate_limit_exceeded(forbidden): # they return instead of the more appropriate 429. # See https://cloud.google.com/bigquery/quota-policy retry_403 = test_utils.retry.RetryErrors( - google.api_core.exceptions.Forbidden, error_predicate=_rate_limit_exceeded, + google.api_core.exceptions.Forbidden, + error_predicate=_rate_limit_exceeded, ) diff --git a/packages/google-cloud-bigquery/tests/system/test_arrow.py b/packages/google-cloud-bigquery/tests/system/test_arrow.py index 96f9dea25565..cc090ba26d65 100644 --- a/packages/google-cloud-bigquery/tests/system/test_arrow.py +++ b/packages/google-cloud-bigquery/tests/system/test_arrow.py @@ -65,7 +65,9 @@ def test_list_rows_nullable_scalars_dtypes( ] arrow_table = bigquery_client.list_rows( - table_id, max_results=max_results, selected_fields=schema, + table_id, + max_results=max_results, + selected_fields=schema, ).to_arrow() schema = arrow_table.schema diff --git a/packages/google-cloud-bigquery/tests/system/test_client.py b/packages/google-cloud-bigquery/tests/system/test_client.py index 077bb28861fe..1e328e2e1223 100644 --- a/packages/google-cloud-bigquery/tests/system/test_client.py +++ b/packages/google-cloud-bigquery/tests/system/test_client.py @@ -2257,17 +2257,20 @@ def test_create_tvf_routine(self): return_table_type = StandardSqlTableType( columns=[ StandardSqlField( - name="int_col", type=StandardSqlDataType(type_kind=INT64), + name="int_col", + type=StandardSqlDataType(type_kind=INT64), ), StandardSqlField( - name="str_col", type=StandardSqlDataType(type_kind=STRING), + name="str_col", + type=StandardSqlDataType(type_kind=STRING), ), ] ) routine_args = [ RoutineArgument( - name="threshold", data_type=StandardSqlDataType(type_kind=INT64), + name="threshold", + data_type=StandardSqlDataType(type_kind=INT64), ) ] diff --git a/packages/google-cloud-bigquery/tests/system/test_pandas.py b/packages/google-cloud-bigquery/tests/system/test_pandas.py index f3534cd19051..ab0fb03f49e6 100644 --- a/packages/google-cloud-bigquery/tests/system/test_pandas.py +++ b/packages/google-cloud-bigquery/tests/system/test_pandas.py @@ -522,7 +522,10 @@ def test_load_table_from_dataframe_w_explicit_schema_source_format_csv( [ ("bool_col", [True, None, False]), ("bytes_col", ["abc", None, "def"]), - ("date_col", [datetime.date(1, 1, 1), None, datetime.date(9999, 12, 31)],), + ( + "date_col", + [datetime.date(1, 1, 1), None, datetime.date(9999, 12, 31)], + ), ( "dt_col", [ diff --git a/packages/google-cloud-bigquery/tests/system/test_query.py b/packages/google-cloud-bigquery/tests/system/test_query.py index 649120a7ef43..c402f66bab3d 100644 --- a/packages/google-cloud-bigquery/tests/system/test_query.py +++ b/packages/google-cloud-bigquery/tests/system/test_query.py @@ -20,7 +20,10 @@ def test_dry_run(bigquery_client: bigquery.Client, scalars_table: str): query_config.dry_run = True query_string = f"SELECT * FROM {scalars_table}" - query_job = bigquery_client.query(query_string, job_config=query_config,) + query_job = bigquery_client.query( + query_string, + job_config=query_config, + ) # Note: `query_job.result()` is not necessary on a dry run query. All # necessary information is returned in the initial response. diff --git a/packages/google-cloud-bigquery/tests/system/test_structs.py b/packages/google-cloud-bigquery/tests/system/test_structs.py index 20740f61405d..1812b6fdee27 100644 --- a/packages/google-cloud-bigquery/tests/system/test_structs.py +++ b/packages/google-cloud-bigquery/tests/system/test_structs.py @@ -23,7 +23,8 @@ def test_structs(bigquery_client, dataset_id, person_type_decl, table_id): ], ) cursor.execute( - f"insert into {table_id} (person) values (%(v:{person_type})s)", dict(v=data), + f"insert into {table_id} (person) values (%(v:{person_type})s)", + dict(v=data), ) cursor.execute(f"select * from {table_id}") diff --git a/packages/google-cloud-bigquery/tests/unit/job/test_base.py b/packages/google-cloud-bigquery/tests/unit/job/test_base.py index 250be83bb488..f0525c22a33e 100644 --- a/packages/google-cloud-bigquery/tests/unit/job/test_base.py +++ b/packages/google-cloud-bigquery/tests/unit/job/test_base.py @@ -991,7 +991,10 @@ def test_result_w_retry_wo_state(self): custom_predicate = mock.Mock() custom_predicate.return_value = True custom_retry = google.api_core.retry.Retry( - predicate=custom_predicate, initial=0.001, maximum=0.001, deadline=0.1, + predicate=custom_predicate, + initial=0.001, + maximum=0.001, + deadline=0.1, ) self.assertIs(job.result(retry=custom_retry), job) diff --git a/packages/google-cloud-bigquery/tests/unit/job/test_query_pandas.py b/packages/google-cloud-bigquery/tests/unit/job/test_query_pandas.py index 1b44f65d3404..775c5a3026d8 100644 --- a/packages/google-cloud-bigquery/tests/unit/job/test_query_pandas.py +++ b/packages/google-cloud-bigquery/tests/unit/job/test_query_pandas.py @@ -146,8 +146,10 @@ def test_to_dataframe_bqstorage_preserve_order(query, table_read_options_kwarg): job.to_dataframe(bqstorage_client=bqstorage_client) - destination_table = "projects/{projectId}/datasets/{datasetId}/tables/{tableId}".format( - **job_resource["configuration"]["query"]["destinationTable"] + destination_table = ( + "projects/{projectId}/datasets/{datasetId}/tables/{tableId}".format( + **job_resource["configuration"]["query"]["destinationTable"] + ) ) expected_session = bigquery_storage.ReadSession( table=destination_table, @@ -272,7 +274,8 @@ def test_to_arrow_max_results_no_progress_bar(): row_iterator = table.RowIterator(client, api_request, path, schema) result_patch = mock.patch( - "google.cloud.bigquery.job.QueryJob.result", return_value=row_iterator, + "google.cloud.bigquery.job.QueryJob.result", + return_value=row_iterator, ) with result_patch as result_patch_tqdm: tbl = job.to_arrow(create_bqstorage_client=False, max_results=123) @@ -535,8 +538,10 @@ def test_to_dataframe_bqstorage(table_read_options_kwarg): job.to_dataframe(bqstorage_client=bqstorage_client) - destination_table = "projects/{projectId}/datasets/{datasetId}/tables/{tableId}".format( - **resource["configuration"]["query"]["destinationTable"] + destination_table = ( + "projects/{projectId}/datasets/{datasetId}/tables/{tableId}".format( + **resource["configuration"]["query"]["destinationTable"] + ) ) expected_session = bigquery_storage.ReadSession( table=destination_table, @@ -583,11 +588,14 @@ def test_to_dataframe_bqstorage_no_pyarrow_compression(): ): job.to_dataframe(bqstorage_client=bqstorage_client) - destination_table = "projects/{projectId}/datasets/{datasetId}/tables/{tableId}".format( - **resource["configuration"]["query"]["destinationTable"] + destination_table = ( + "projects/{projectId}/datasets/{datasetId}/tables/{tableId}".format( + **resource["configuration"]["query"]["destinationTable"] + ) ) expected_session = bigquery_storage.ReadSession( - table=destination_table, data_format=bigquery_storage.DataFormat.ARROW, + table=destination_table, + data_format=bigquery_storage.DataFormat.ARROW, ) bqstorage_client.create_read_session.assert_called_once_with( parent=f"projects/{client.project}", @@ -705,7 +713,11 @@ def test_to_dataframe_with_progress_bar(tqdm_mock): done_resource = copy.deepcopy(begun_resource) done_resource["status"] = {"state": "DONE"} connection = make_connection( - begun_resource, query_resource, done_resource, query_resource, query_resource, + begun_resource, + query_resource, + done_resource, + query_resource, + query_resource, ) client = _make_client(connection=connection) job = target_class.from_api_repr(begun_resource, client) diff --git a/packages/google-cloud-bigquery/tests/unit/model/test_model.py b/packages/google-cloud-bigquery/tests/unit/model/test_model.py index 8f0bf58d5750..4790b858ba12 100644 --- a/packages/google-cloud-bigquery/tests/unit/model/test_model.py +++ b/packages/google-cloud-bigquery/tests/unit/model/test_model.py @@ -79,7 +79,7 @@ def test_from_api_repr(target_class): "description": "A friendly description.", "friendlyName": "A friendly name.", "modelType": "LOGISTIC_REGRESSION", - "labels": {"greeting": u"こんにちは"}, + "labels": {"greeting": "こんにちは"}, "trainingRuns": [ { "trainingOptions": {"initialLearnRate": 1.0}, @@ -115,10 +115,10 @@ def test_from_api_repr(target_class): assert got.created == creation_time assert got.modified == modified_time assert got.expires == expiration_time - assert got.description == u"A friendly description." - assert got.friendly_name == u"A friendly name." + assert got.description == "A friendly description." + assert got.friendly_name == "A friendly name." assert got.model_type == types.Model.ModelType.LOGISTIC_REGRESSION - assert got.labels == {"greeting": u"こんにちは"} + assert got.labels == {"greeting": "こんにちは"} assert got.encryption_configuration.kms_key_name == KMS_KEY_NAME assert got.training_runs[0].training_options.initial_learn_rate == 1.0 assert ( diff --git a/packages/google-cloud-bigquery/tests/unit/test__helpers.py b/packages/google-cloud-bigquery/tests/unit/test__helpers.py index 94e63fd639b4..0dd1c273674d 100644 --- a/packages/google-cloud-bigquery/tests/unit/test__helpers.py +++ b/packages/google-cloud-bigquery/tests/unit/test__helpers.py @@ -1150,7 +1150,8 @@ def test_w_dict_unknown_fields(self): # Unknown fields should be included (if not None), but converted as strings. self.assertEqual( - converted, {"whoami": "2020-07-20", "one": "111", "two": "222"}, + converted, + {"whoami": "2020-07-20", "one": "111", "two": "222"}, ) @@ -1327,8 +1328,7 @@ def __init__(self, mode, name="unknown", field_type="UNKNOWN", fields=()): def _field_isinstance_patcher(): - """A patcher thank makes _Field instances seem like SchemaField instances. - """ + """A patcher thank makes _Field instances seem like SchemaField instances.""" from google.cloud.bigquery.schema import SchemaField def fake_isinstance(instance, target_class): diff --git a/packages/google-cloud-bigquery/tests/unit/test__pandas_helpers.py b/packages/google-cloud-bigquery/tests/unit/test__pandas_helpers.py index ef8c80c81342..c849461fd010 100644 --- a/packages/google-cloud-bigquery/tests/unit/test__pandas_helpers.py +++ b/packages/google-cloud-bigquery/tests/unit/test__pandas_helpers.py @@ -1510,7 +1510,8 @@ def test__download_table_bqstorage( bqstorage_client.create_read_session.return_value = fake_session table_ref = table.TableReference( - dataset.DatasetReference("project-x", "dataset-y"), "table-z", + dataset.DatasetReference("project-x", "dataset-y"), + "table-z", ) def fake_download_stream( @@ -1692,7 +1693,8 @@ def test_bq_to_arrow_field_type_override(module_under_test): assert ( module_under_test.bq_to_arrow_field( - schema.SchemaField("g", "GEOGRAPHY"), pyarrow.binary(), + schema.SchemaField("g", "GEOGRAPHY"), + pyarrow.binary(), ).type == pyarrow.binary() ) diff --git a/packages/google-cloud-bigquery/tests/unit/test_client.py b/packages/google-cloud-bigquery/tests/unit/test_client.py index 9c93765e8b03..92ecb72deee6 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_client.py +++ b/packages/google-cloud-bigquery/tests/unit/test_client.py @@ -246,7 +246,9 @@ def test__call_api_applying_custom_retry_on_timeout(self): client = self._make_one(project=self.PROJECT, credentials=creds) api_request_patcher = mock.patch.object( - client._connection, "api_request", side_effect=[TimeoutError, "result"], + client._connection, + "api_request", + side_effect=[TimeoutError, "result"], ) retry = DEFAULT_RETRY.with_deadline(1).with_predicate( lambda exc: isinstance(exc, TimeoutError) @@ -269,7 +271,9 @@ def test__call_api_span_creator_not_called(self): client = self._make_one(project=self.PROJECT, credentials=creds) api_request_patcher = mock.patch.object( - client._connection, "api_request", side_effect=[TimeoutError, "result"], + client._connection, + "api_request", + side_effect=[TimeoutError, "result"], ) retry = DEFAULT_RETRY.with_deadline(1).with_predicate( lambda exc: isinstance(exc, TimeoutError) @@ -291,7 +295,9 @@ def test__call_api_span_creator_called(self): client = self._make_one(project=self.PROJECT, credentials=creds) api_request_patcher = mock.patch.object( - client._connection, "api_request", side_effect=[TimeoutError, "result"], + client._connection, + "api_request", + side_effect=[TimeoutError, "result"], ) retry = DEFAULT_RETRY.with_deadline(1).with_predicate( lambda exc: isinstance(exc, TimeoutError) @@ -456,7 +462,9 @@ def test_get_service_account_email_w_custom_retry(self): "email": "bq-123@bigquery-encryption.iam.gserviceaccount.com", } api_request_patcher = mock.patch.object( - client._connection, "api_request", side_effect=[ValueError, resource], + client._connection, + "api_request", + side_effect=[ValueError, resource], ) retry = DEFAULT_RETRY.with_deadline(1).with_predicate( @@ -749,7 +757,10 @@ def test_create_routine_w_minimal_resource(self): final_attributes.assert_called_once_with({"path": path}, client, None) conn.api_request.assert_called_once_with( - method="POST", path=path, data=resource, timeout=7.5, + method="POST", + path=path, + data=resource, + timeout=7.5, ) self.assertEqual( actual_routine.reference, RoutineReference.from_string(full_routine_id) @@ -783,7 +794,10 @@ def test_create_routine_w_conflict(self): } } conn.api_request.assert_called_once_with( - method="POST", path=path, data=resource, timeout=DEFAULT_TIMEOUT, + method="POST", + path=path, + data=resource, + timeout=DEFAULT_TIMEOUT, ) @unittest.skipIf(opentelemetry is None, "Requires `opentelemetry`") @@ -822,7 +836,10 @@ def test_span_status_is_set(self): } } conn.api_request.assert_called_once_with( - method="POST", path=path, data=resource, timeout=DEFAULT_TIMEOUT, + method="POST", + path=path, + data=resource, + timeout=DEFAULT_TIMEOUT, ) def test_create_routine_w_conflict_exists_ok(self): @@ -859,7 +876,10 @@ def test_create_routine_w_conflict_exists_ok(self): conn.api_request.assert_has_calls( [ mock.call( - method="POST", path=path, data=resource, timeout=DEFAULT_TIMEOUT, + method="POST", + path=path, + data=resource, + timeout=DEFAULT_TIMEOUT, ), mock.call( method="GET", @@ -1259,7 +1279,9 @@ def test_create_table_alreadyexists_w_exists_ok_false(self): client.create_table("{}.{}".format(self.DS_ID, self.TABLE_ID)) final_attributes.assert_called_with( - {"path": post_path, "dataset_id": self.TABLE_REF.dataset_id}, client, None, + {"path": post_path, "dataset_id": self.TABLE_REF.dataset_id}, + client, + None, ) conn.api_request.assert_called_once_with( @@ -1429,7 +1451,9 @@ def test_get_routine(self): final_attributes.assert_called_once_with({"path": path}, client, None) conn.api_request.assert_called_once_with( - method="GET", path=path, timeout=7.5, + method="GET", + path=path, + timeout=7.5, ) self.assertEqual( actual_routine.reference, @@ -1511,7 +1535,9 @@ def test_get_iam_policy(self): from google.api_core.iam import Policy PATH = "/projects/{}/datasets/{}/tables/{}:getIamPolicy".format( - self.PROJECT, self.DS_ID, self.TABLE_ID, + self.PROJECT, + self.DS_ID, + self.TABLE_ID, ) BODY = {"options": {"requestedPolicyVersion": 1}} ETAG = "CARDI" @@ -1562,7 +1588,9 @@ def test_get_iam_policy_w_invalid_table(self): client = self._make_one(project=self.PROJECT, credentials=creds, _http=http) table_resource_string = "projects/{}/datasets/{}/tables/{}".format( - self.PROJECT, self.DS_ID, self.TABLE_ID, + self.PROJECT, + self.DS_ID, + self.TABLE_ID, ) with self.assertRaises(ValueError): @@ -1782,7 +1810,11 @@ def test_update_dataset(self): with mock.patch( "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" ) as final_attributes: - ds2 = client.update_dataset(ds, fields=fields, timeout=7.5,) + ds2 = client.update_dataset( + ds, + fields=fields, + timeout=7.5, + ) final_attributes.assert_called_once_with( {"path": "/%s" % PATH, "fields": fields}, client, None @@ -1959,7 +1991,11 @@ def test_update_routine(self): with mock.patch( "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" ) as final_attributes: - actual_routine = client.update_routine(routine, fields, timeout=7.5,) + actual_routine = client.update_routine( + routine, + fields, + timeout=7.5, + ) final_attributes.assert_called_once_with( {"path": routine.path, "fields": fields}, client, None @@ -2129,7 +2165,9 @@ def test_update_table_w_custom_property(self): updated_table = client.update_table(table, ["newAlphaProperty"]) final_attributes.assert_called_once_with( - {"path": "/%s" % path, "fields": ["newAlphaProperty"]}, client, None, + {"path": "/%s" % path, "fields": ["newAlphaProperty"]}, + client, + None, ) conn.api_request.assert_called_once_with( @@ -2163,7 +2201,9 @@ def test_update_table_only_use_legacy_sql(self): updated_table = client.update_table(table, ["view_use_legacy_sql"]) final_attributes.assert_called_once_with( - {"path": "/%s" % path, "fields": ["view_use_legacy_sql"]}, client, None, + {"path": "/%s" % path, "fields": ["view_use_legacy_sql"]}, + client, + None, ) conn.api_request.assert_called_once_with( @@ -2243,7 +2283,9 @@ def test_update_table_w_query(self): updated_table = client.update_table(table, updated_properties) final_attributes.assert_called_once_with( - {"path": "/%s" % path, "fields": updated_properties}, client, None, + {"path": "/%s" % path, "fields": updated_properties}, + client, + None, ) self.assertEqual(updated_table.schema, table.schema) @@ -2535,7 +2577,9 @@ def test_delete_routine(self): final_attributes.assert_called_once_with({"path": path}, client, None) conn.api_request.assert_called_with( - method="DELETE", path=path, timeout=7.5, + method="DELETE", + path=path, + timeout=7.5, ) def test_delete_routine_w_wrong_type(self): @@ -2562,7 +2606,9 @@ def test_delete_routine_w_not_found_ok_false(self): final_attributes.assert_called_once_with({"path": path}, client, None) conn.api_request.assert_called_with( - method="DELETE", path=path, timeout=DEFAULT_TIMEOUT, + method="DELETE", + path=path, + timeout=DEFAULT_TIMEOUT, ) def test_delete_routine_w_not_found_ok_true(self): @@ -2584,7 +2630,9 @@ def test_delete_routine_w_not_found_ok_true(self): final_attributes.assert_called_once_with({"path": path}, client, None) conn.api_request.assert_called_with( - method="DELETE", path=path, timeout=DEFAULT_TIMEOUT, + method="DELETE", + path=path, + timeout=DEFAULT_TIMEOUT, ) def test_delete_table(self): @@ -3727,7 +3775,10 @@ def test_extract_table(self): # Check that extract_table actually starts the job. conn.api_request.assert_called_once_with( - method="POST", path="/projects/PROJECT/jobs", data=RESOURCE, timeout=7.5, + method="POST", + path="/projects/PROJECT/jobs", + data=RESOURCE, + timeout=7.5, ) # Check the job resource. @@ -3969,7 +4020,10 @@ def test_extract_table_for_source_type_model(self): # Check that extract_table actually starts the job. conn.api_request.assert_called_once_with( - method="POST", path="/projects/PROJECT/jobs", data=RESOURCE, timeout=7.5, + method="POST", + path="/projects/PROJECT/jobs", + data=RESOURCE, + timeout=7.5, ) # Check the job resource. @@ -4012,7 +4066,10 @@ def test_extract_table_for_source_type_model_w_string_model_id(self): # Check that extract_table actually starts the job. conn.api_request.assert_called_once_with( - method="POST", path="/projects/PROJECT/jobs", data=RESOURCE, timeout=7.5, + method="POST", + path="/projects/PROJECT/jobs", + data=RESOURCE, + timeout=7.5, ) def test_extract_table_for_source_type_model_w_model_object(self): @@ -4051,7 +4108,10 @@ def test_extract_table_for_source_type_model_w_model_object(self): # Check that extract_table actually starts the job. conn.api_request.assert_called_once_with( - method="POST", path="/projects/PROJECT/jobs", data=RESOURCE, timeout=7.5, + method="POST", + path="/projects/PROJECT/jobs", + data=RESOURCE, + timeout=7.5, ) def test_extract_table_for_invalid_source_type_model(self): @@ -4240,7 +4300,11 @@ def test_query_preserving_explicit_job_config(self): from google.cloud.bigquery import QueryJobConfig - client = self._make_one(project=self.PROJECT, credentials=creds, _http=http,) + client = self._make_one( + project=self.PROJECT, + credentials=creds, + _http=http, + ) conn = client._connection = make_connection(resource) job_config = QueryJobConfig() @@ -5099,7 +5163,10 @@ def test_insert_rows_w_repeated_fields(self): self.assertEqual(len(errors), 0) conn.api_request.assert_called_once_with( - method="POST", path="/%s" % PATH, data=SENT, timeout=DEFAULT_TIMEOUT, + method="POST", + path="/%s" % PATH, + data=SENT, + timeout=DEFAULT_TIMEOUT, ) def test_insert_rows_w_record_schema(self): @@ -5173,7 +5240,9 @@ def test_insert_rows_w_explicit_none_insert_ids(self): from google.cloud.bigquery.table import Table PATH = "projects/{}/datasets/{}/tables/{}/insertAll".format( - self.PROJECT, self.DS_ID, self.TABLE_ID, + self.PROJECT, + self.DS_ID, + self.TABLE_ID, ) creds = _make_credentials() http = object() @@ -5199,7 +5268,10 @@ def _row_data(row): self.assertEqual(len(errors), 0) conn.api_request.assert_called_once_with( - method="POST", path="/{}".format(PATH), data=SENT, timeout=DEFAULT_TIMEOUT, + method="POST", + path="/{}".format(PATH), + data=SENT, + timeout=DEFAULT_TIMEOUT, ) def test_insert_rows_errors(self): @@ -5581,7 +5653,10 @@ def test_insert_rows_json_default_behavior(self): self.assertEqual(len(errors), 0) conn.api_request.assert_called_once_with( - method="POST", path="/%s" % PATH, data=SENT, timeout=7.5, + method="POST", + path="/%s" % PATH, + data=SENT, + timeout=7.5, ) def test_insert_rows_json_w_explicitly_requested_autogenerated_insert_ids(self): @@ -5629,7 +5704,9 @@ def test_insert_rows_json_w_explicitly_disabled_insert_ids(self): conn = client._connection = make_connection({}) errors = client.insert_rows_json( - "proj.dset.tbl", rows, row_ids=AutoRowIDs.DISABLED, + "proj.dset.tbl", + rows, + row_ids=AutoRowIDs.DISABLED, ) self.assertEqual(len(errors), 0) @@ -5715,7 +5792,9 @@ def test_insert_rows_json_w_explicit_none_insert_ids(self): conn = client._connection = make_connection({}) errors = client.insert_rows_json( - "proj.dset.tbl", rows, row_ids=[None] * len(rows), + "proj.dset.tbl", + rows, + row_ids=[None] * len(rows), ) self.assertEqual(len(errors), 0) @@ -6420,7 +6499,10 @@ def test_load_table_from_file_resumable(self): ) with do_upload_patch as do_upload: client.load_table_from_file( - file_obj, self.TABLE_REF, job_id="job_id", job_config=job_config, + file_obj, + self.TABLE_REF, + job_id="job_id", + job_config=job_config, ) do_upload.assert_called_once_with( @@ -6738,7 +6820,10 @@ def test_load_table_from_dataframe(self): policy_tags=PolicyTagList(names=("baz",)), ), "accounts": SchemaField( - "accounts", "INTEGER", mode="REPEATED", description="array column", + "accounts", + "INTEGER", + mode="REPEATED", + description="array column", ), } get_table_schema = [ @@ -7498,7 +7583,11 @@ def test_load_table_from_dataframe_array_fields(self): schema = [ SchemaField("float_column", "FLOAT"), - SchemaField("array_column", "INTEGER", mode="REPEATED",), + SchemaField( + "array_column", + "INTEGER", + mode="REPEATED", + ), ] job_config = job.LoadJobConfig(schema=schema) @@ -7559,7 +7648,11 @@ def test_load_table_from_dataframe_array_fields_w_auto_schema(self): expected_schema = [ SchemaField("float_column", "FLOAT"), - SchemaField("array_column", "INT64", mode="REPEATED",), + SchemaField( + "array_column", + "INT64", + mode="REPEATED", + ), ] load_patch = mock.patch( @@ -7574,7 +7667,9 @@ def test_load_table_from_dataframe_array_fields_w_auto_schema(self): with load_patch as load_table_from_file, get_table_patch: client.load_table_from_dataframe( - dataframe, self.TABLE_REF, location=self.LOCATION, + dataframe, + self.TABLE_REF, + location=self.LOCATION, ) load_table_from_file.assert_called_once_with( @@ -7863,7 +7958,9 @@ def test_load_table_from_dataframe_w_bad_pyarrow_issues_warning(self): with load_patch, get_table_patch, pyarrow_version_patch: with warnings.catch_warnings(record=True) as warned: client.load_table_from_dataframe( - dataframe, self.TABLE_REF, location=self.LOCATION, + dataframe, + self.TABLE_REF, + location=self.LOCATION, ) expected_warnings = [ @@ -8206,7 +8303,11 @@ def test__do_resumable_upload_custom_project(self): client = self._make_client(transport) result = client._do_resumable_upload( - file_obj, self.EXPECTED_CONFIGURATION, None, None, project="custom-project", + file_obj, + self.EXPECTED_CONFIGURATION, + None, + None, + project="custom-project", ) content = result.content.decode("utf-8") diff --git a/packages/google-cloud-bigquery/tests/unit/test_dbapi__helpers.py b/packages/google-cloud-bigquery/tests/unit/test_dbapi__helpers.py index 5965a4817740..3c1673f4f582 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_dbapi__helpers.py +++ b/packages/google-cloud-bigquery/tests/unit/test_dbapi__helpers.py @@ -603,7 +603,8 @@ def test_complex_query_parameter_type_errors(type_, value, expect): from google.cloud.bigquery.dbapi import exceptions with pytest.raises( - exceptions.ProgrammingError, match=_expected_error_match(expect), + exceptions.ProgrammingError, + match=_expected_error_match(expect), ): complex_query_parameter("test", value, type_) diff --git a/packages/google-cloud-bigquery/tests/unit/test_dbapi_connection.py b/packages/google-cloud-bigquery/tests/unit/test_dbapi_connection.py index 11a268c68297..d9d09821262e 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_dbapi_connection.py +++ b/packages/google-cloud-bigquery/tests/unit/test_dbapi_connection.py @@ -69,7 +69,8 @@ def test_ctor_w_bqstorage_client(self): mock_client._ensure_bqstorage_client.return_value = mock_bqstorage_client connection = self._make_one( - client=mock_client, bqstorage_client=mock_bqstorage_client, + client=mock_client, + bqstorage_client=mock_bqstorage_client, ) mock_client._ensure_bqstorage_client.assert_called_once_with( @@ -119,7 +120,8 @@ def test_connect_w_both_clients(self): mock_client._ensure_bqstorage_client.return_value = mock_bqstorage_client connection = connect( - client=mock_client, bqstorage_client=mock_bqstorage_client, + client=mock_client, + bqstorage_client=mock_bqstorage_client, ) mock_client._ensure_bqstorage_client.assert_called_once_with( @@ -154,7 +156,9 @@ def test_close_closes_all_created_bigquery_clients(self): return_value=client, ) bqstorage_client_patcher = mock.patch.object( - client, "_ensure_bqstorage_client", return_value=bqstorage_client, + client, + "_ensure_bqstorage_client", + return_value=bqstorage_client, ) with client_patcher, bqstorage_client_patcher: diff --git a/packages/google-cloud-bigquery/tests/unit/test_dbapi_cursor.py b/packages/google-cloud-bigquery/tests/unit/test_dbapi_cursor.py index cb55da889c62..8ad62f75f824 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_dbapi_cursor.py +++ b/packages/google-cloud-bigquery/tests/unit/test_dbapi_cursor.py @@ -309,12 +309,14 @@ def test_fetchall_w_bqstorage_client_fetch_success(self): mock_client = self._mock_client(rows=row_data) mock_bqstorage_client = self._mock_bqstorage_client( - stream_count=1, rows=bqstorage_streamed_rows, + stream_count=1, + rows=bqstorage_streamed_rows, ) mock_client._ensure_bqstorage_client.return_value = mock_bqstorage_client connection = dbapi.connect( - client=mock_client, bqstorage_client=mock_bqstorage_client, + client=mock_client, + bqstorage_client=mock_bqstorage_client, ) cursor = connection.cursor() cursor.execute("SELECT foo, bar FROM some_table") @@ -345,7 +347,8 @@ def test_fetchall_w_bqstorage_client_fetch_no_rows(self): mock_client._ensure_bqstorage_client.return_value = mock_bqstorage_client connection = dbapi.connect( - client=mock_client, bqstorage_client=mock_bqstorage_client, + client=mock_client, + bqstorage_client=mock_bqstorage_client, ) cursor = connection.cursor() cursor.execute("SELECT foo, bar FROM some_table") @@ -373,13 +376,15 @@ def fake_ensure_bqstorage_client(bqstorage_client=None, **kwargs): mock_client = self._mock_client(rows=row_data) mock_client._ensure_bqstorage_client.side_effect = fake_ensure_bqstorage_client mock_bqstorage_client = self._mock_bqstorage_client( - stream_count=1, rows=row_data, + stream_count=1, + rows=row_data, ) no_access_error = exceptions.Forbidden("invalid credentials") mock_bqstorage_client.create_read_session.side_effect = no_access_error connection = dbapi.connect( - client=mock_client, bqstorage_client=mock_bqstorage_client, + client=mock_client, + bqstorage_client=mock_bqstorage_client, ) cursor = connection.cursor() cursor.execute("SELECT foo, bar FROM some_table") @@ -408,11 +413,13 @@ def fake_ensure_bqstorage_client(bqstorage_client=None, **kwargs): mock_client = self._mock_client(rows=row_data) mock_client._ensure_bqstorage_client.side_effect = fake_ensure_bqstorage_client mock_bqstorage_client = self._mock_bqstorage_client( - stream_count=1, rows=bqstorage_streamed_rows, + stream_count=1, + rows=bqstorage_streamed_rows, ) connection = dbapi.connect( - client=mock_client, bqstorage_client=mock_bqstorage_client, + client=mock_client, + bqstorage_client=mock_bqstorage_client, ) cursor = connection.cursor() cursor.execute("SELECT foo, bar FROM some_table") diff --git a/packages/google-cloud-bigquery/tests/unit/test_dbapi_types.py b/packages/google-cloud-bigquery/tests/unit/test_dbapi_types.py index cf282c68b989..7319aa0161c3 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_dbapi_types.py +++ b/packages/google-cloud-bigquery/tests/unit/test_dbapi_types.py @@ -48,8 +48,8 @@ def __bytes__(self): @pytest.mark.parametrize( "raw,expected", [ - (u"hello", b"hello"), - (u"\u1f60", u"\u1f60".encode("utf-8")), + ("hello", b"hello"), + ("\u1f60", "\u1f60".encode("utf-8")), (b"hello", b"hello"), (bytearray(b"hello"), b"hello"), (memoryview(b"hello"), b"hello"), diff --git a/packages/google-cloud-bigquery/tests/unit/test_list_jobs.py b/packages/google-cloud-bigquery/tests/unit/test_list_jobs.py index 1fb40d4462e5..1db6b5668b1a 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_list_jobs.py +++ b/packages/google-cloud-bigquery/tests/unit/test_list_jobs.py @@ -258,7 +258,7 @@ def test_list_jobs_w_time_filter(client, PROJECT): start_time = datetime.datetime(1970, 1, 1, 0, 0, 0, 1000) # One millisecond after the the 2038 31-bit signed int rollover end_time = datetime.datetime(2038, 1, 19, 3, 14, 7, 1000) - end_time_millis = (((2 ** 31) - 1) * 1000) + 1 + end_time_millis = (((2**31) - 1) * 1000) + 1 list(client.list_jobs(min_creation_time=start_time, max_creation_time=end_time)) diff --git a/packages/google-cloud-bigquery/tests/unit/test_list_models.py b/packages/google-cloud-bigquery/tests/unit/test_list_models.py index b1485233868c..04932d3572b3 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_list_models.py +++ b/packages/google-cloud-bigquery/tests/unit/test_list_models.py @@ -40,7 +40,13 @@ def test_list_models_empty_w_timeout(client, PROJECT, DS_ID): ) @dataset_polymorphic def test_list_models_defaults( - make_dataset, get_reference, client, PROJECT, DS_ID, extra, query, + make_dataset, + get_reference, + client, + PROJECT, + DS_ID, + extra, + query, ): from google.cloud.bigquery.model import Model diff --git a/packages/google-cloud-bigquery/tests/unit/test_magics.py b/packages/google-cloud-bigquery/tests/unit/test_magics.py index a4214f32f40b..72ae4af21ea5 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_magics.py +++ b/packages/google-cloud-bigquery/tests/unit/test_magics.py @@ -571,7 +571,8 @@ def test_bigquery_magic_does_not_clear_display_in_verbose_mode(): ) clear_patch = mock.patch( - "google.cloud.bigquery.magics.magics.display.clear_output", autospec=True, + "google.cloud.bigquery.magics.magics.display.clear_output", + autospec=True, ) run_query_patch = mock.patch( "google.cloud.bigquery.magics.magics._run_query", autospec=True @@ -591,7 +592,8 @@ def test_bigquery_magic_clears_display_in_non_verbose_mode(): ) clear_patch = mock.patch( - "google.cloud.bigquery.magics.magics.display.clear_output", autospec=True, + "google.cloud.bigquery.magics.magics.display.clear_output", + autospec=True, ) run_query_patch = mock.patch( "google.cloud.bigquery.magics.magics._run_query", autospec=True @@ -785,7 +787,8 @@ def test_bigquery_magic_w_max_results_query_job_results_fails(): "google.cloud.bigquery.client.Client.query", autospec=True ) close_transports_patch = mock.patch( - "google.cloud.bigquery.magics.magics._close_transports", autospec=True, + "google.cloud.bigquery.magics.magics._close_transports", + autospec=True, ) sql = "SELECT 17 AS num" @@ -938,7 +941,8 @@ def test_bigquery_magic_w_table_id_and_bqstorage_client(): ip.run_cell_magic("bigquery", "--max_results=5", table_id) row_iterator_mock.to_dataframe.assert_called_once_with( - bqstorage_client=bqstorage_instance_mock, create_bqstorage_client=mock.ANY, + bqstorage_client=bqstorage_instance_mock, + create_bqstorage_client=mock.ANY, ) @@ -2023,7 +2027,8 @@ def test_bigquery_magic_create_dataset_fails(): side_effect=OSError, ) close_transports_patch = mock.patch( - "google.cloud.bigquery.magics.magics._close_transports", autospec=True, + "google.cloud.bigquery.magics.magics._close_transports", + autospec=True, ) with pytest.raises( diff --git a/packages/google-cloud-bigquery/tests/unit/test_query.py b/packages/google-cloud-bigquery/tests/unit/test_query.py index 69a6772e5e92..a966b88b1770 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_query.py +++ b/packages/google-cloud-bigquery/tests/unit/test_query.py @@ -825,7 +825,8 @@ def test_to_api_repr_array_type_as_type_instance(self): } klass = self._get_target_class() param = klass.positional( - array_type=ScalarQueryParameterType("BOOLEAN"), values=[True, False], + array_type=ScalarQueryParameterType("BOOLEAN"), + values=[True, False], ) self.assertEqual(param.to_api_repr(), EXPECTED) diff --git a/packages/google-cloud-bigquery/tests/unit/test_schema.py b/packages/google-cloud-bigquery/tests/unit/test_schema.py index 03ff837c0565..a0b1b5d1142d 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_schema.py +++ b/packages/google-cloud-bigquery/tests/unit/test_schema.py @@ -96,7 +96,8 @@ def test_to_api_repr(self): policy = PolicyTagList(names=("foo", "bar")) self.assertEqual( - policy.to_api_repr(), {"names": ["foo", "bar"]}, + policy.to_api_repr(), + {"names": ["foo", "bar"]}, ) field = self._make_one( @@ -525,7 +526,11 @@ def test___repr__evaluable_no_policy_tags(self): def test___repr__evaluable_with_policy_tags(self): policy_tags = PolicyTagList(names=["foo", "bar"]) field = self._make_one( - "field1", "STRING", "REQUIRED", "Description", policy_tags=policy_tags, + "field1", + "STRING", + "REQUIRED", + "Description", + policy_tags=policy_tags, ) field_repr = repr(field) SchemaField = self._get_target_class() # needed for eval # noqa @@ -609,10 +614,12 @@ def test_defaults(self): resource = self._call_fut([full_name, age]) self.assertEqual(len(resource), 2) self.assertEqual( - resource[0], {"name": "full_name", "type": "STRING", "mode": "REQUIRED"}, + resource[0], + {"name": "full_name", "type": "STRING", "mode": "REQUIRED"}, ) self.assertEqual( - resource[1], {"name": "age", "type": "INTEGER", "mode": "REQUIRED"}, + resource[1], + {"name": "age", "type": "INTEGER", "mode": "REQUIRED"}, ) def test_w_description(self): @@ -662,7 +669,8 @@ def test_w_subfields(self): resource = self._call_fut([full_name, phone]) self.assertEqual(len(resource), 2) self.assertEqual( - resource[0], {"name": "full_name", "type": "STRING", "mode": "REQUIRED"}, + resource[0], + {"name": "full_name", "type": "STRING", "mode": "REQUIRED"}, ) self.assertEqual( resource[1], @@ -775,11 +783,13 @@ def test_from_api_repr(self): def test_to_api_repr(self): taglist = self._make_one(names=["foo", "bar"]) self.assertEqual( - taglist.to_api_repr(), {"names": ["foo", "bar"]}, + taglist.to_api_repr(), + {"names": ["foo", "bar"]}, ) taglist2 = self._make_one(names=("foo", "bar")) self.assertEqual( - taglist2.to_api_repr(), {"names": ["foo", "bar"]}, + taglist2.to_api_repr(), + {"names": ["foo", "bar"]}, ) def test___eq___wrong_type(self): @@ -916,11 +926,22 @@ def test_from_api_repr_parameterized(api, expect, key2): ), ( dict(name="n", field_type="NUMERIC", precision=9), - dict(name="n", type="NUMERIC", mode="NULLABLE", precision=9,), + dict( + name="n", + type="NUMERIC", + mode="NULLABLE", + precision=9, + ), ), ( dict(name="n", field_type="NUMERIC", precision=9, scale=2), - dict(name="n", type="NUMERIC", mode="NULLABLE", precision=9, scale=2,), + dict( + name="n", + type="NUMERIC", + mode="NULLABLE", + precision=9, + scale=2, + ), ), ( dict(name="n", field_type="BIGNUMERIC"), @@ -928,11 +949,22 @@ def test_from_api_repr_parameterized(api, expect, key2): ), ( dict(name="n", field_type="BIGNUMERIC", precision=40), - dict(name="n", type="BIGNUMERIC", mode="NULLABLE", precision=40,), + dict( + name="n", + type="BIGNUMERIC", + mode="NULLABLE", + precision=40, + ), ), ( dict(name="n", field_type="BIGNUMERIC", precision=40, scale=2), - dict(name="n", type="BIGNUMERIC", mode="NULLABLE", precision=40, scale=2,), + dict( + name="n", + type="BIGNUMERIC", + mode="NULLABLE", + precision=40, + scale=2, + ), ), ( dict(name="n", field_type="STRING"), @@ -940,7 +972,12 @@ def test_from_api_repr_parameterized(api, expect, key2): ), ( dict(name="n", field_type="STRING", max_length=9), - dict(name="n", type="STRING", mode="NULLABLE", maxLength=9,), + dict( + name="n", + type="STRING", + mode="NULLABLE", + maxLength=9, + ), ), ( dict(name="n", field_type="BYTES"), @@ -948,7 +985,12 @@ def test_from_api_repr_parameterized(api, expect, key2): ), ( dict(name="n", field_type="BYTES", max_length=9), - dict(name="n", type="BYTES", mode="NULLABLE", maxLength=9,), + dict( + name="n", + type="BYTES", + mode="NULLABLE", + maxLength=9, + ), ), ], ) diff --git a/packages/google-cloud-bigquery/tests/unit/test_table.py b/packages/google-cloud-bigquery/tests/unit/test_table.py index 4f45eac3db4f..23c7a84615be 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_table.py +++ b/packages/google-cloud-bigquery/tests/unit/test_table.py @@ -2859,7 +2859,8 @@ def test_to_arrow_progress_bar(self, tqdm_mock, tqdm_notebook_mock, tqdm_gui_moc for progress_bar_type, progress_bar_mock in progress_bars: row_iterator = self._make_one(_mock_client(), api_request, path, schema) tbl = row_iterator.to_arrow( - progress_bar_type=progress_bar_type, create_bqstorage_client=False, + progress_bar_type=progress_bar_type, + create_bqstorage_client=False, ) progress_bar_mock.assert_called() @@ -3218,7 +3219,8 @@ def test_to_dataframe_progress_bar( for progress_bar_type, progress_bar_mock in progress_bars: row_iterator = self._make_one(_mock_client(), api_request, path, schema) df = row_iterator.to_dataframe( - progress_bar_type=progress_bar_type, create_bqstorage_client=False, + progress_bar_type=progress_bar_type, + create_bqstorage_client=False, ) progress_bar_mock.assert_called() @@ -3275,7 +3277,8 @@ def test_to_dataframe_no_tqdm(self): with warnings.catch_warnings(record=True) as warned: df = row_iterator.to_dataframe( - progress_bar_type="tqdm", create_bqstorage_client=False, + progress_bar_type="tqdm", + create_bqstorage_client=False, ) user_warnings = [ @@ -3313,7 +3316,8 @@ def test_to_dataframe_tqdm_error(self): with warnings.catch_warnings(record=True) as warned: df = row_iterator.to_dataframe( - progress_bar_type=progress_bar_type, create_bqstorage_client=False, + progress_bar_type=progress_bar_type, + create_bqstorage_client=False, ) self.assertEqual(len(df), 4) # all should be well @@ -3355,9 +3359,9 @@ def test_to_dataframe_w_various_types_nullable(self): ] row_data = [ [None, None, None, None, None, None], - ["1433836800000000", "420", "1.1", u"Cash", "true", "1999-12-01"], - ["1387811700000000", "2580", "17.7", u"Cash", "false", "1953-06-14"], - ["1385565300000000", "2280", "4.4", u"Credit", "true", "1981-11-04"], + ["1433836800000000", "420", "1.1", "Cash", "true", "1999-12-01"], + ["1387811700000000", "2580", "17.7", "Cash", "false", "1953-06-14"], + ["1385565300000000", "2280", "4.4", "Credit", "true", "1981-11-04"], ] rows = [{"f": [{"v": field} for field in row]} for row in row_data] path = "/foo" @@ -3395,17 +3399,17 @@ def test_to_dataframe_column_dtypes(self): SchemaField("date", "DATE"), ] row_data = [ - ["1433836800000000", "420", "1.1", "1.77", u"Cash", "true", "1999-12-01"], + ["1433836800000000", "420", "1.1", "1.77", "Cash", "true", "1999-12-01"], [ "1387811700000000", "2580", "17.7", "28.5", - u"Cash", + "Cash", "false", "1953-06-14", ], - ["1385565300000000", "2280", "4.4", "7.1", u"Credit", "true", "1981-11-04"], + ["1385565300000000", "2280", "4.4", "7.1", "Credit", "true", "1981-11-04"], ] rows = [{"f": [{"v": field} for field in row]} for row in row_data] path = "/foo" @@ -3413,7 +3417,8 @@ def test_to_dataframe_column_dtypes(self): row_iterator = self._make_one(_mock_client(), api_request, path, schema) df = row_iterator.to_dataframe( - dtypes={"km": "float16"}, create_bqstorage_client=False, + dtypes={"km": "float16"}, + create_bqstorage_client=False, ) self.assertIsInstance(df, pandas.DataFrame) @@ -4037,7 +4042,8 @@ def test_to_dataframe_tabledata_list_w_multiple_pages_return_unique_index(self): ) df = row_iterator.to_dataframe( - bqstorage_client=None, create_bqstorage_client=False, + bqstorage_client=None, + create_bqstorage_client=False, ) self.assertIsInstance(df, pandas.DataFrame) @@ -4054,8 +4060,10 @@ def test_to_dataframe_w_bqstorage_raises_auth_error(self): from google.cloud.bigquery import table as mut bqstorage_client = mock.create_autospec(bigquery_storage.BigQueryReadClient) - bqstorage_client.create_read_session.side_effect = google.api_core.exceptions.Forbidden( - "TEST BigQuery Storage API not enabled. TEST" + bqstorage_client.create_read_session.side_effect = ( + google.api_core.exceptions.Forbidden( + "TEST BigQuery Storage API not enabled. TEST" + ) ) path = "/foo" api_request = mock.Mock(return_value={"rows": []}) @@ -4185,7 +4193,8 @@ def test_to_dataframe_concat_categorical_dtype_w_pyarrow(self): bqstorage_client=bqstorage_client, dtypes={ "col_category": pandas.core.dtypes.dtypes.CategoricalDtype( - categories=["low", "medium", "high"], ordered=False, + categories=["low", "medium", "high"], + ordered=False, ), }, ) @@ -4203,7 +4212,8 @@ def test_to_dataframe_concat_categorical_dtype_w_pyarrow(self): expected_dtypes = [ pandas.core.dtypes.dtypes.np.dtype("O"), # the default for string data pandas.core.dtypes.dtypes.CategoricalDtype( - categories=["low", "medium", "high"], ordered=False, + categories=["low", "medium", "high"], + ordered=False, ), ] self.assertEqual(list(got.dtypes), expected_dtypes) @@ -4228,7 +4238,8 @@ def test_to_dataframe_geography_as_object(self): ), ) df = row_iterator.to_dataframe( - create_bqstorage_client=False, geography_as_object=True, + create_bqstorage_client=False, + geography_as_object=True, ) self.assertIsInstance(df, pandas.DataFrame) self.assertEqual(len(df), 3) # verify the number of rows @@ -4395,7 +4406,10 @@ def test_rowiterator_to_geodataframe_delegation(self, to_dataframe): geography_column = "g" to_dataframe.return_value = pandas.DataFrame( - dict(name=["foo"], g=[wkt.loads("point(0 0)")],) + dict( + name=["foo"], + g=[wkt.loads("point(0 0)")], + ) ) df = row_iterator.to_geodataframe( From f14bc5d7b38e795907db01a6806eec840eb23d48 Mon Sep 17 00:00:00 2001 From: "gcf-owl-bot[bot]" <78513119+gcf-owl-bot[bot]@users.noreply.github.com> Date: Tue, 29 Mar 2022 10:51:58 -0400 Subject: [PATCH 1403/2016] chore(python): use black==22.3.0 (#1176) Source-Link: https://github.com/googleapis/synthtool/commit/6fab84af09f2cf89a031fd8671d1def6b2931b11 Post-Processor: gcr.io/cloud-devrel-public-resources/owlbot-python:latest@sha256:7cffbc10910c3ab1b852c05114a08d374c195a81cdec1d4a67a1d129331d0bfe Co-authored-by: Owl Bot Co-authored-by: Anthonios Partheniou --- packages/google-cloud-bigquery/.github/.OwlBot.lock.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/.github/.OwlBot.lock.yaml b/packages/google-cloud-bigquery/.github/.OwlBot.lock.yaml index 7e08e05a380c..87dd00611576 100644 --- a/packages/google-cloud-bigquery/.github/.OwlBot.lock.yaml +++ b/packages/google-cloud-bigquery/.github/.OwlBot.lock.yaml @@ -13,4 +13,4 @@ # limitations under the License. docker: image: gcr.io/cloud-devrel-public-resources/owlbot-python:latest - digest: sha256:5d8da01438ece4021d135433f2cf3227aa39ef0eaccc941d62aa35e6902832ae + digest: sha256:7cffbc10910c3ab1b852c05114a08d374c195a81cdec1d4a67a1d129331d0bfe From 1a86b2c431372ea44580f4a6053f698fa0101356 Mon Sep 17 00:00:00 2001 From: "release-please[bot]" <55107282+release-please[bot]@users.noreply.github.com> Date: Tue, 29 Mar 2022 11:04:59 -0400 Subject: [PATCH 1404/2016] chore(main): release 2.34.3 (#1172) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * chore(main): release 2.34.3 * 🦉 Updates from OwlBot post-processor See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md * 🦉 Updates from OwlBot post-processor See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md Co-authored-by: release-please[bot] <55107282+release-please[bot]@users.noreply.github.com> Co-authored-by: Owl Bot Co-authored-by: Anthonios Partheniou --- packages/google-cloud-bigquery/CHANGELOG.md | 7 +++++++ .../google-cloud-bigquery/google/cloud/bigquery/version.py | 2 +- 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/CHANGELOG.md b/packages/google-cloud-bigquery/CHANGELOG.md index 8fff7ddb099d..f3970bfd22be 100644 --- a/packages/google-cloud-bigquery/CHANGELOG.md +++ b/packages/google-cloud-bigquery/CHANGELOG.md @@ -5,6 +5,13 @@ [1]: https://pypi.org/project/google-cloud-bigquery/#history +### [2.34.3](https://github.com/googleapis/python-bigquery/compare/v2.34.2...v2.34.3) (2022-03-29) + + +### Bug Fixes + +* update content-type header ([#1171](https://github.com/googleapis/python-bigquery/issues/1171)) ([921b440](https://github.com/googleapis/python-bigquery/commit/921b440fdd151e88ee5b3e0d9fb90177877dc11a)) + ### [2.34.2](https://github.com/googleapis/python-bigquery/compare/v2.34.1...v2.34.2) (2022-03-05) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/version.py b/packages/google-cloud-bigquery/google/cloud/bigquery/version.py index 66368e2cd175..385cb3c75b45 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/version.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/version.py @@ -12,4 +12,4 @@ # See the License for the specific language governing permissions and # limitations under the License. -__version__ = "2.34.2" +__version__ = "2.34.3" From b9c6ec6e7ce1dea8060c3adcd942ac475a576f78 Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Tue, 29 Mar 2022 12:54:58 -0500 Subject: [PATCH 1405/2016] fix!: remove out-of-date BigQuery ML protocol buffers (#1178) deps!: BigQuery Storage and pyarrow are required dependencies (#776) fix!: use nullable `Int64` and `boolean` dtypes in `to_dataframe` (#786) feat!: destination tables are no-longer removed by `create_job` (#891) feat!: In `to_dataframe`, use `dbdate` and `dbtime` dtypes from db-dtypes package for BigQuery DATE and TIME columns (#972) fix!: automatically convert out-of-bounds dates in `to_dataframe`, remove `date_as_object` argument (#972) feat!: mark the package as type-checked (#1058) feat!: default to DATETIME type when loading timezone-naive datetimes from Pandas (#1061) feat: add `api_method` parameter to `Client.query` to select `INSERT` or `QUERY` API (#967) fix: improve type annotations for mypy validation (#1081) feat: use `StandardSqlField` class for `Model.feature_columns` and `Model.label_columns` (#1117) docs: Add migration guide from version 2.x to 3.x (#1027) Release-As: 3.0.0 --- packages/google-cloud-bigquery/.coveragerc | 1 + packages/google-cloud-bigquery/README.rst | 5 +- packages/google-cloud-bigquery/UPGRADING.md | 186 +++++- .../docs/bigquery/legacy_proto_types.rst | 14 + .../types.rst => bigquery/standard_sql.rst} | 2 +- packages/google-cloud-bigquery/docs/conf.py | 2 +- packages/google-cloud-bigquery/docs/index.rst | 3 +- .../google-cloud-bigquery/docs/reference.rst | 19 +- .../google-cloud-bigquery/docs/snippets.py | 4 - .../docs/usage/pandas.rst | 38 +- .../google/cloud/bigquery/__init__.py | 20 +- .../google/cloud/bigquery/_helpers.py | 77 +-- .../google/cloud/bigquery/_http.py | 4 +- .../google/cloud/bigquery/_job_helpers.py | 259 ++++++++ .../google/cloud/bigquery/_pandas_helpers.py | 218 ++++--- .../google/cloud/bigquery/client.py | 176 ++---- .../google/cloud/bigquery/dataset.py | 9 +- .../google/cloud/bigquery/dbapi/_helpers.py | 6 +- .../bigquery/encryption_configuration.py | 2 +- .../google/cloud/bigquery/enums.py | 138 ++-- .../google/cloud/bigquery/exceptions.py | 25 - .../google/cloud/bigquery/external_config.py | 8 +- .../google/cloud/bigquery/job/copy_.py | 2 +- .../google/cloud/bigquery/job/load.py | 2 +- .../google/cloud/bigquery/job/query.py | 31 +- .../google/cloud/bigquery/magics/magics.py | 11 - .../google/cloud/bigquery/model.py | 339 +++++----- .../cloud/{bigquery_v2 => bigquery}/py.typed | 0 .../google/cloud/bigquery/query.py | 35 +- .../google/cloud/bigquery/routine/routine.py | 51 +- .../google/cloud/bigquery/schema.py | 108 ++-- .../google/cloud/bigquery/standard_sql.py | 355 +++++++++++ .../google/cloud/bigquery/table.py | 154 ++--- .../google/cloud/bigquery_v2/__init__.py | 10 + .../cloud/bigquery_v2/gapic_metadata.json | 63 -- packages/google-cloud-bigquery/noxfile.py | 23 + packages/google-cloud-bigquery/owlbot.py | 78 +-- .../samples/add_empty_column.py | 2 +- .../samples/browse_table_data.py | 14 +- .../samples/client_list_jobs.py | 2 +- .../samples/client_load_partitioned_table.py | 2 +- .../samples/client_query.py | 2 +- .../samples/client_query_add_column.py | 2 +- .../samples/client_query_batch.py | 16 +- .../samples/client_query_destination_table.py | 2 +- ...lient_query_destination_table_clustered.py | 2 +- .../client_query_destination_table_cmek.py | 2 +- .../client_query_destination_table_legacy.py | 2 +- .../samples/client_query_dry_run.py | 7 +- .../samples/client_query_legacy_sql.py | 2 +- .../samples/client_query_relax_column.py | 2 +- .../samples/client_query_w_array_params.py | 2 +- .../samples/client_query_w_named_params.py | 2 +- .../client_query_w_positional_params.py | 2 +- .../samples/client_query_w_struct_params.py | 2 +- .../client_query_w_timestamp_params.py | 2 +- .../samples/copy_table.py | 2 +- .../samples/copy_table_cmek.py | 2 +- .../samples/copy_table_multiple_source.py | 4 +- .../samples/create_dataset.py | 2 +- .../samples/create_job.py | 7 +- .../samples/create_routine.py | 12 +- .../samples/create_routine_ddl.py | 2 +- .../samples/create_table.py | 2 +- .../samples/create_table_clustered.py | 7 +- .../samples/create_table_range_partitioned.py | 7 +- .../samples/dataset_exists.py | 2 +- .../samples/delete_dataset.py | 2 +- .../samples/delete_dataset_labels.py | 7 +- .../samples/delete_model.py | 2 +- .../samples/delete_routine.py | 2 +- .../samples/delete_table.py | 2 +- .../samples/download_public_data.py | 2 +- .../samples/download_public_data_sandbox.py | 2 +- .../samples/geography/conftest.py | 13 +- .../samples/geography/insert_geojson.py | 10 +- .../samples/geography/insert_geojson_test.py | 2 +- .../samples/geography/insert_wkt.py | 10 +- .../samples/geography/insert_wkt_test.py | 2 +- .../samples/geography/mypy.ini | 8 + .../samples/geography/requirements.txt | 2 + .../samples/geography/to_geodataframe.py | 10 +- .../samples/geography/to_geodataframe_test.py | 2 +- .../samples/get_dataset.py | 2 +- .../samples/get_dataset_labels.py | 2 +- .../samples/get_model.py | 2 +- .../samples/get_routine.py | 7 +- .../samples/get_table.py | 2 +- .../samples/label_dataset.py | 2 +- .../samples/list_datasets.py | 2 +- .../samples/list_datasets_by_label.py | 2 +- .../samples/list_models.py | 2 +- .../samples/list_routines.py | 2 +- .../samples/list_tables.py | 2 +- .../samples/load_table_clustered.py | 7 +- .../samples/load_table_dataframe.py | 7 +- .../samples/load_table_file.py | 7 +- .../samples/load_table_uri_autodetect_csv.py | 2 +- .../samples/load_table_uri_autodetect_json.py | 2 +- .../samples/load_table_uri_avro.py | 2 +- .../samples/load_table_uri_cmek.py | 2 +- .../samples/load_table_uri_csv.py | 2 +- .../samples/load_table_uri_json.py | 2 +- .../samples/load_table_uri_orc.py | 2 +- .../samples/load_table_uri_parquet.py | 2 +- .../samples/load_table_uri_truncate_avro.py | 2 +- .../samples/load_table_uri_truncate_csv.py | 2 +- .../samples/load_table_uri_truncate_json.py | 2 +- .../samples/load_table_uri_truncate_orc.py | 2 +- .../load_table_uri_truncate_parquet.py | 2 +- .../samples/magics/_helpers.py | 2 +- .../samples/magics/conftest.py | 12 +- .../samples/magics/mypy.ini | 8 + .../samples/magics/query.py | 7 +- .../samples/magics/query_params_scalars.py | 7 +- .../magics/query_params_scalars_test.py | 2 +- .../samples/magics/query_test.py | 2 +- .../samples/magics/requirements.txt | 2 + .../google-cloud-bigquery/samples/mypy.ini | 12 + .../query_external_gcs_temporary_table.py | 6 +- .../query_external_sheets_permanent_table.py | 8 +- .../query_external_sheets_temporary_table.py | 8 +- .../samples/query_no_cache.py | 2 +- .../samples/query_pagination.py | 2 +- .../samples/query_script.py | 2 +- .../samples/query_to_arrow.py | 7 +- .../snippets/authenticate_service_account.py | 6 +- .../authenticate_service_account_test.py | 10 +- .../snippets/authorized_view_tutorial.py | 9 +- .../snippets/authorized_view_tutorial_test.py | 11 +- .../samples/snippets/conftest.py | 27 +- .../create_table_external_hive_partitioned.py | 7 +- ...te_table_external_hive_partitioned_test.py | 9 +- .../samples/snippets/dataset_access_test.py | 13 +- .../samples/snippets/delete_job.py | 2 +- .../samples/snippets/delete_job_test.py | 11 +- .../samples/snippets/jupyter_tutorial_test.py | 17 +- .../snippets/load_table_uri_firestore.py | 2 +- .../snippets/load_table_uri_firestore_test.py | 9 +- .../samples/snippets/manage_job_cancel.py | 2 +- .../samples/snippets/manage_job_get.py | 2 +- .../samples/snippets/manage_job_test.py | 2 +- .../samples/snippets/materialized_view.py | 25 +- .../snippets/materialized_view_test.py | 24 +- .../samples/snippets/mypy.ini | 8 + .../samples/snippets/natality_tutorial.py | 7 +- .../snippets/natality_tutorial_test.py | 11 +- .../samples/snippets/quickstart.py | 8 +- .../samples/snippets/quickstart_test.py | 13 +- .../samples/snippets/requirements.txt | 2 + .../samples/snippets/revoke_dataset_access.py | 2 +- .../samples/snippets/simple_app.py | 2 +- .../samples/snippets/simple_app_test.py | 7 +- .../samples/snippets/test_update_with_dml.py | 10 +- .../samples/snippets/update_dataset_access.py | 2 +- .../samples/snippets/update_with_dml.py | 12 +- .../samples/snippets/user_credentials.py | 11 +- .../samples/snippets/user_credentials_test.py | 9 +- .../samples/snippets/view.py | 43 +- .../samples/snippets/view_test.py | 31 +- .../samples/table_exists.py | 2 +- .../samples/table_insert_rows.py | 2 +- ...le_insert_rows_explicit_none_insert_ids.py | 2 +- .../samples/tests/conftest.py | 30 +- .../samples/tests/test_add_empty_column.py | 7 +- .../samples/tests/test_browse_table_data.py | 9 +- .../samples/tests/test_client_list_jobs.py | 10 +- .../test_client_load_partitioned_table.py | 9 +- .../samples/tests/test_client_query.py | 9 +- .../tests/test_client_query_add_column.py | 9 +- .../samples/tests/test_client_query_batch.py | 9 +- .../test_client_query_destination_table.py | 9 +- ...lient_query_destination_table_clustered.py | 9 +- ...est_client_query_destination_table_cmek.py | 9 +- ...t_client_query_destination_table_legacy.py | 9 +- .../tests/test_client_query_dry_run.py | 9 +- .../tests/test_client_query_legacy_sql.py | 8 +- .../tests/test_client_query_relax_column.py | 11 +- .../tests/test_client_query_w_array_params.py | 9 +- .../tests/test_client_query_w_named_params.py | 9 +- .../test_client_query_w_positional_params.py | 9 +- .../test_client_query_w_struct_params.py | 9 +- .../test_client_query_w_timestamp_params.py | 9 +- .../samples/tests/test_copy_table.py | 12 +- .../samples/tests/test_copy_table_cmek.py | 7 +- .../tests/test_copy_table_multiple_source.py | 12 +- .../samples/tests/test_create_dataset.py | 9 +- .../samples/tests/test_create_job.py | 10 +- .../samples/tests/test_create_table.py | 9 +- .../tests/test_create_table_clustered.py | 9 +- .../test_create_table_range_partitioned.py | 9 +- .../samples/tests/test_dataset_exists.py | 11 +- .../tests/test_dataset_label_samples.py | 9 +- .../samples/tests/test_delete_dataset.py | 7 +- .../samples/tests/test_delete_table.py | 7 +- .../tests/test_download_public_data.py | 4 +- .../test_download_public_data_sandbox.py | 4 +- .../samples/tests/test_get_dataset.py | 7 +- .../samples/tests/test_get_table.py | 9 +- .../samples/tests/test_list_datasets.py | 10 +- .../tests/test_list_datasets_by_label.py | 10 +- .../samples/tests/test_list_tables.py | 9 +- .../tests/test_load_table_clustered.py | 12 +- .../tests/test_load_table_dataframe.py | 21 +- .../samples/tests/test_load_table_file.py | 7 +- .../test_load_table_uri_autodetect_csv.py | 9 +- .../test_load_table_uri_autodetect_json.py | 9 +- .../samples/tests/test_load_table_uri_avro.py | 9 +- .../samples/tests/test_load_table_uri_cmek.py | 9 +- .../samples/tests/test_load_table_uri_csv.py | 9 +- .../samples/tests/test_load_table_uri_json.py | 9 +- .../samples/tests/test_load_table_uri_orc.py | 9 +- .../tests/test_load_table_uri_parquet.py | 9 +- .../test_load_table_uri_truncate_avro.py | 9 +- .../tests/test_load_table_uri_truncate_csv.py | 9 +- .../test_load_table_uri_truncate_json.py | 9 +- .../tests/test_load_table_uri_truncate_orc.py | 9 +- .../test_load_table_uri_truncate_parquet.py | 9 +- .../samples/tests/test_model_samples.py | 9 +- ...test_query_external_gcs_temporary_table.py | 9 +- ...t_query_external_sheets_permanent_table.py | 9 +- ...t_query_external_sheets_temporary_table.py | 9 +- .../samples/tests/test_query_no_cache.py | 8 +- .../samples/tests/test_query_pagination.py | 9 +- .../samples/tests/test_query_script.py | 9 +- .../samples/tests/test_query_to_arrow.py | 4 +- .../samples/tests/test_routine_samples.py | 50 +- .../samples/tests/test_table_exists.py | 9 +- .../samples/tests/test_table_insert_rows.py | 11 +- ...le_insert_rows_explicit_none_insert_ids.py | 9 +- .../samples/tests/test_undelete_table.py | 11 +- .../tests/test_update_dataset_access.py | 9 +- ...te_dataset_default_partition_expiration.py | 9 +- ...update_dataset_default_table_expiration.py | 9 +- .../tests/test_update_dataset_description.py | 9 +- ...t_update_table_require_partition_filter.py | 11 +- .../samples/undelete_table.py | 4 +- .../samples/update_dataset_access.py | 2 +- ...te_dataset_default_partition_expiration.py | 2 +- ...update_dataset_default_table_expiration.py | 2 +- .../samples/update_dataset_description.py | 2 +- .../samples/update_model.py | 2 +- .../samples/update_routine.py | 7 +- .../update_table_require_partition_filter.py | 2 +- packages/google-cloud-bigquery/setup.cfg | 2 +- packages/google-cloud-bigquery/setup.py | 29 +- .../testing/constraints-3.6.txt | 3 +- .../testing/constraints-3.7.txt | 1 + .../testing/constraints-3.8.txt | 1 + .../tests/system/conftest.py | 71 ++- .../tests/system/test_arrow.py | 6 +- .../tests/system/test_client.py | 417 +----------- .../tests/system/test_pandas.py | 248 +++++++- .../tests/system/test_query.py | 453 ++++++++++++- .../tests/unit/enums/__init__.py | 13 - .../enums/test_standard_sql_data_types.py | 76 --- .../tests/unit/job/test_query_pandas.py | 145 +++-- .../tests/unit/model/test_model.py | 97 ++- .../tests/unit/routine/test_routine.py | 41 +- .../unit/routine/test_routine_argument.py | 14 +- .../tests/unit/test__helpers.py | 71 --- .../tests/unit/test__job_helpers.py | 337 ++++++++++ .../tests/unit/test__pandas_helpers.py | 222 ++++--- .../tests/unit/test_client.py | 595 +++++++++--------- .../tests/unit/test_dbapi__helpers.py | 12 +- .../tests/unit/test_dbapi_connection.py | 22 +- .../tests/unit/test_dbapi_cursor.py | 26 +- .../__init__.py => test_legacy_types.py} | 14 +- .../tests/unit/test_magics.py | 79 +-- .../tests/unit/test_query.py | 4 +- .../tests/unit/test_schema.py | 109 ++-- .../tests/unit/test_standard_sql_types.py | 594 +++++++++++++++++ .../tests/unit/test_table.py | 226 ++----- .../tests/unit/test_table_pandas.py | 194 ++++++ 274 files changed, 5282 insertions(+), 2797 deletions(-) create mode 100644 packages/google-cloud-bigquery/docs/bigquery/legacy_proto_types.rst rename packages/google-cloud-bigquery/docs/{bigquery_v2/types.rst => bigquery/standard_sql.rst} (72%) create mode 100644 packages/google-cloud-bigquery/google/cloud/bigquery/_job_helpers.py delete mode 100644 packages/google-cloud-bigquery/google/cloud/bigquery/exceptions.py rename packages/google-cloud-bigquery/google/cloud/{bigquery_v2 => bigquery}/py.typed (100%) create mode 100644 packages/google-cloud-bigquery/google/cloud/bigquery/standard_sql.py delete mode 100644 packages/google-cloud-bigquery/google/cloud/bigquery_v2/gapic_metadata.json create mode 100644 packages/google-cloud-bigquery/samples/geography/mypy.ini create mode 100644 packages/google-cloud-bigquery/samples/magics/mypy.ini create mode 100644 packages/google-cloud-bigquery/samples/mypy.ini create mode 100644 packages/google-cloud-bigquery/samples/snippets/mypy.ini delete mode 100644 packages/google-cloud-bigquery/tests/unit/enums/__init__.py delete mode 100644 packages/google-cloud-bigquery/tests/unit/enums/test_standard_sql_data_types.py create mode 100644 packages/google-cloud-bigquery/tests/unit/test__job_helpers.py rename packages/google-cloud-bigquery/tests/unit/{gapic/__init__.py => test_legacy_types.py} (60%) create mode 100644 packages/google-cloud-bigquery/tests/unit/test_standard_sql_types.py create mode 100644 packages/google-cloud-bigquery/tests/unit/test_table_pandas.py diff --git a/packages/google-cloud-bigquery/.coveragerc b/packages/google-cloud-bigquery/.coveragerc index 23861a8eb51f..1ed1a9704f83 100644 --- a/packages/google-cloud-bigquery/.coveragerc +++ b/packages/google-cloud-bigquery/.coveragerc @@ -6,6 +6,7 @@ fail_under = 100 show_missing = True omit = google/cloud/bigquery/__init__.py + google/cloud/bigquery_v2/* # Legacy proto-based types. exclude_lines = # Re-enable the standard pragma pragma: NO COVER diff --git a/packages/google-cloud-bigquery/README.rst b/packages/google-cloud-bigquery/README.rst index bafa0669352e..e8578916a272 100644 --- a/packages/google-cloud-bigquery/README.rst +++ b/packages/google-cloud-bigquery/README.rst @@ -1,7 +1,7 @@ Python Client for Google BigQuery ================================= -|GA| |pypi| |versions| +|GA| |pypi| |versions| Querying massive datasets can be time consuming and expensive without the right hardware and infrastructure. Google `BigQuery`_ solves this problem by @@ -140,6 +140,3 @@ In this example all tracing data will be published to the Google .. _OpenTelemetry documentation: https://opentelemetry-python.readthedocs.io .. _Cloud Trace: https://cloud.google.com/trace - - - diff --git a/packages/google-cloud-bigquery/UPGRADING.md b/packages/google-cloud-bigquery/UPGRADING.md index a4ba0efd2cab..95f87f7ee9d8 100644 --- a/packages/google-cloud-bigquery/UPGRADING.md +++ b/packages/google-cloud-bigquery/UPGRADING.md @@ -11,6 +11,190 @@ See the License for the specific language governing permissions and limitations under the License. --> +# 3.0.0 Migration Guide + +## New Required Dependencies + +Some of the previously optional dependencies are now *required* in `3.x` versions of the +library, namely +[google-cloud-bigquery-storage](https://pypi.org/project/google-cloud-bigquery-storage/) +(minimum version `2.0.0`) and [pyarrow](https://pypi.org/project/pyarrow/) (minimum +version `3.0.0`). + +The behavior of some of the package "extras" has thus also changed: + * The `pandas` extra now requires the [db-types](https://pypi.org/project/db-dtypes/) + package. + * The `bqstorage` extra has been preserved for comaptibility reasons, but it is now a + no-op and should be omitted when installing the BigQuery client library. + + **Before:** + ``` + $ pip install google-cloud-bigquery[bqstorage] + ``` + + **After:** + ``` + $ pip install google-cloud-bigquery + ``` + + * The `bignumeric_type` extra has been removed, as `BIGNUMERIC` type is now + automatically supported. That extra should thus not be used. + + **Before:** + ``` + $ pip install google-cloud-bigquery[bignumeric_type] + ``` + + **After:** + ``` + $ pip install google-cloud-bigquery + ``` + + +## Type Annotations + +The library is now type-annotated and declares itself as such. If you use a static +type checker such as `mypy`, you might start getting errors in places where +`google-cloud-bigquery` package is used. + +It is recommended to update your code and/or type annotations to fix these errors, but +if this is not feasible in the short term, you can temporarily ignore type annotations +in `google-cloud-bigquery`, for example by using a special `# type: ignore` comment: + +```py +from google.cloud import bigquery # type: ignore +``` + +But again, this is only recommended as a possible short-term workaround if immediately +fixing the type check errors in your project is not feasible. + +## Re-organized Types + +The auto-generated parts of the library has been removed, and proto-based types formerly +found in `google.cloud.bigquery_v2` have been replaced by the new implementation (but +see the [section](#legacy-types) below). + +For example, the standard SQL data types should new be imported from a new location: + +**Before:** +```py +from google.cloud.bigquery_v2 import StandardSqlDataType +from google.cloud.bigquery_v2.types import StandardSqlField +from google.cloud.bigquery_v2.types.standard_sql import StandardSqlStructType +``` + +**After:** +```py +from google.cloud.bigquery import StandardSqlDataType +from google.cloud.bigquery.standard_sql import StandardSqlField +from google.cloud.bigquery.standard_sql import StandardSqlStructType +``` + +The `TypeKind` enum defining all possible SQL types for schema fields has been renamed +and is not nested anymore under `StandardSqlDataType`: + + +**Before:** +```py +from google.cloud.bigquery_v2 import StandardSqlDataType + +if field_type == StandardSqlDataType.TypeKind.STRING: + ... +``` + +**After:** +```py + +from google.cloud.bigquery import StandardSqlTypeNames + +if field_type == StandardSqlTypeNames.STRING: + ... +``` + + +## Issuing queries with `Client.create_job` preserves destination table + +The `Client.create_job` method no longer removes the destination table from a +query job's configuration. Destination table for the query can thus be +explicitly defined by the user. + + +## Changes to data types when reading a pandas DataFrame + +The default dtypes returned by the `to_dataframe` method have changed. + +* Now, the BigQuery `BOOLEAN` data type maps to the pandas `boolean` dtype. + Previously, this mapped to the pandas `bool` dtype when the column did not + contain `NULL` values and the pandas `object` dtype when `NULL` values are + present. +* Now, the BigQuery `INT64` data type maps to the pandas `Int64` dtype. + Previously, this mapped to the pandas `int64` dtype when the column did not + contain `NULL` values and the pandas `float64` dtype when `NULL` values are + present. +* Now, the BigQuery `DATE` data type maps to the pandas `dbdate` dtype, which + is provided by the + [db-dtypes](https://googleapis.dev/python/db-dtypes/latest/index.html) + package. If any date value is outside of the range of + [pandas.Timestamp.min](https://pandas.pydata.org/docs/reference/api/pandas.Timestamp.min.html) + (1677-09-22) and + [pandas.Timestamp.max](https://pandas.pydata.org/docs/reference/api/pandas.Timestamp.max.html) + (2262-04-11), the data type maps to the pandas `object` dtype. The + `date_as_object` parameter has been removed. +* Now, the BigQuery `TIME` data type maps to the pandas `dbtime` dtype, which + is provided by the + [db-dtypes](https://googleapis.dev/python/db-dtypes/latest/index.html) + package. + + +## Changes to data types loading a pandas DataFrame + +In the absence of schema information, pandas columns with naive +`datetime64[ns]` values, i.e. without timezone information, are recognized and +loaded using the `DATETIME` type. On the other hand, for columns with +timezone-aware `datetime64[ns, UTC]` values, the `TIMESTAMP` type is continued +to be used. + +## Changes to `Model`, `Client.get_model`, `Client.update_model`, and `Client.list_models` + +The types of several `Model` properties have been changed. + +- `Model.feature_columns` now returns a sequence of `google.cloud.bigquery.standard_sql.StandardSqlField`. +- `Model.label_columns` now returns a sequence of `google.cloud.bigquery.standard_sql.StandardSqlField`. +- `Model.model_type` now returns a string. +- `Model.training_runs` now returns a sequence of dictionaries, as recieved from the [BigQuery REST API](https://cloud.google.com/bigquery/docs/reference/rest/v2/models#Model.FIELDS.training_runs). + + +## Legacy Protocol Buffers Types + +For compatibility reasons, the legacy proto-based types still exists as static code +and can be imported: + +```py +from google.cloud.bigquery_v2 import Model # a sublcass of proto.Message +``` + +Mind, however, that importing them will issue a warning, because aside from +being importable, these types **are not maintained anymore**. They may differ +both from the types in `google.cloud.bigquery`, and from the types supported on +the backend. + +### Maintaining compatibility with `google-cloud-bigquery` version 2.0 + +If you maintain a library or system that needs to support both +`google-cloud-bigquery` version 2.x and 3.x, it is recommended that you detect +when version 2.x is in use and convert properties that use the legacy protocol +buffer types, such as `Model.training_runs`, into the types used in 3.x. + +Call the [`to_dict` +method](https://proto-plus-python.readthedocs.io/en/latest/reference/message.html#proto.message.Message.to_dict) +on the protocol buffers objects to get a JSON-compatible dictionary. + +```py +from google.cloud.bigquery_v2 import Model + +training_run: Model.TrainingRun = ... +training_run_dict = training_run.to_dict() +``` # 2.0.0 Migration Guide @@ -56,4 +240,4 @@ distance_type = enums.Model.DistanceType.COSINE from google.cloud.bigquery_v2 import types distance_type = types.Model.DistanceType.COSINE -``` \ No newline at end of file +``` diff --git a/packages/google-cloud-bigquery/docs/bigquery/legacy_proto_types.rst b/packages/google-cloud-bigquery/docs/bigquery/legacy_proto_types.rst new file mode 100644 index 000000000000..bc1e9371597b --- /dev/null +++ b/packages/google-cloud-bigquery/docs/bigquery/legacy_proto_types.rst @@ -0,0 +1,14 @@ +Legacy proto-based Types for Google Cloud Bigquery v2 API +========================================================= + +.. warning:: + These types are provided for backward compatibility only, and are not maintained + anymore. They might also differ from the types uspported on the backend. It is + therefore strongly advised to migrate to the types found in :doc:`standard_sql`. + + Also see the :doc:`3.0.0 Migration Guide<../UPGRADING>` for more information. + +.. automodule:: google.cloud.bigquery_v2.types + :members: + :undoc-members: + :show-inheritance: diff --git a/packages/google-cloud-bigquery/docs/bigquery_v2/types.rst b/packages/google-cloud-bigquery/docs/bigquery/standard_sql.rst similarity index 72% rename from packages/google-cloud-bigquery/docs/bigquery_v2/types.rst rename to packages/google-cloud-bigquery/docs/bigquery/standard_sql.rst index c36a83e0b415..bd52bb78fcb0 100644 --- a/packages/google-cloud-bigquery/docs/bigquery_v2/types.rst +++ b/packages/google-cloud-bigquery/docs/bigquery/standard_sql.rst @@ -1,7 +1,7 @@ Types for Google Cloud Bigquery v2 API ====================================== -.. automodule:: google.cloud.bigquery_v2.types +.. automodule:: google.cloud.bigquery.standard_sql :members: :undoc-members: :show-inheritance: diff --git a/packages/google-cloud-bigquery/docs/conf.py b/packages/google-cloud-bigquery/docs/conf.py index 296eac02ae27..5c83fd79ee73 100644 --- a/packages/google-cloud-bigquery/docs/conf.py +++ b/packages/google-cloud-bigquery/docs/conf.py @@ -109,12 +109,12 @@ # List of patterns, relative to source directory, that match files and # directories to ignore when looking for source files. exclude_patterns = [ + "google/cloud/bigquery_v2/**", # Legacy proto-based types. "_build", "**/.nox/**/*", "samples/AUTHORING_GUIDE.md", "samples/CONTRIBUTING.md", "samples/snippets/README.rst", - "bigquery_v2/services.rst", # generated by the code generator ] # The reST default role (used for this markup: `text`) to use for all diff --git a/packages/google-cloud-bigquery/docs/index.rst b/packages/google-cloud-bigquery/docs/index.rst index 3f8ba23046aa..4ab0a298dc84 100644 --- a/packages/google-cloud-bigquery/docs/index.rst +++ b/packages/google-cloud-bigquery/docs/index.rst @@ -30,7 +30,8 @@ API Reference Migration Guide --------------- -See the guide below for instructions on migrating to the 2.x release of this library. +See the guides below for instructions on migrating from older to newer *major* releases +of this library (from ``1.x`` to ``2.x``, or from ``2.x`` to ``3.x``). .. toctree:: :maxdepth: 2 diff --git a/packages/google-cloud-bigquery/docs/reference.rst b/packages/google-cloud-bigquery/docs/reference.rst index 00f64746f09f..4f655b09e36e 100644 --- a/packages/google-cloud-bigquery/docs/reference.rst +++ b/packages/google-cloud-bigquery/docs/reference.rst @@ -202,9 +202,24 @@ Encryption Configuration Additional Types ================ -Protocol buffer classes for working with the Models API. +Helper SQL type classes. .. toctree:: :maxdepth: 2 - bigquery_v2/types + bigquery/standard_sql + + +Legacy proto-based Types (deprecated) +===================================== + +The legacy type classes based on protocol buffers. + +.. deprecated:: 3.0.0 + These types are provided for backward compatibility only, and are not maintained + anymore. + +.. toctree:: + :maxdepth: 2 + + bigquery/legacy_proto_types diff --git a/packages/google-cloud-bigquery/docs/snippets.py b/packages/google-cloud-bigquery/docs/snippets.py index f67823249e01..238fd52c3a4e 100644 --- a/packages/google-cloud-bigquery/docs/snippets.py +++ b/packages/google-cloud-bigquery/docs/snippets.py @@ -30,10 +30,6 @@ import pandas except (ImportError, AttributeError): pandas = None -try: - import pyarrow -except (ImportError, AttributeError): - pyarrow = None from google.api_core.exceptions import InternalServerError from google.api_core.exceptions import ServiceUnavailable diff --git a/packages/google-cloud-bigquery/docs/usage/pandas.rst b/packages/google-cloud-bigquery/docs/usage/pandas.rst index 92eee67cf1b8..550a6779256c 100644 --- a/packages/google-cloud-bigquery/docs/usage/pandas.rst +++ b/packages/google-cloud-bigquery/docs/usage/pandas.rst @@ -14,12 +14,12 @@ First, ensure that the :mod:`pandas` library is installed by running: pip install --upgrade pandas -Alternatively, you can install the BigQuery python client library with +Alternatively, you can install the BigQuery Python client library with :mod:`pandas` by running: .. code-block:: bash - pip install --upgrade google-cloud-bigquery[pandas] + pip install --upgrade 'google-cloud-bigquery[pandas]' To retrieve query results as a :class:`pandas.DataFrame`: @@ -37,6 +37,38 @@ To retrieve table rows as a :class:`pandas.DataFrame`: :start-after: [START bigquery_list_rows_dataframe] :end-before: [END bigquery_list_rows_dataframe] +The following data types are used when creating a pandas DataFrame. + +.. list-table:: Pandas Data Type Mapping + :header-rows: 1 + + * - BigQuery + - pandas + - Notes + * - BOOL + - boolean + - + * - DATETIME + - datetime64[ns], object + - The object dtype is used when there are values not representable in a + pandas nanosecond-precision timestamp. + * - DATE + - dbdate, object + - The object dtype is used when there are values not representable in a + pandas nanosecond-precision timestamp. + + Requires the ``db-dtypes`` package. See the `db-dtypes usage guide + `_ + * - FLOAT64 + - float64 + - + * - INT64 + - Int64 + - + * - TIME + - dbtime + - Requires the ``db-dtypes`` package. See the `db-dtypes usage guide + `_ Retrieve BigQuery GEOGRAPHY data as a GeoPandas GeoDataFrame ------------------------------------------------------------ @@ -60,7 +92,7 @@ As of version 1.3.0, you can use the to load data from a :class:`pandas.DataFrame` to a :class:`~google.cloud.bigquery.table.Table`. To use this function, in addition to :mod:`pandas`, you will need to install the :mod:`pyarrow` library. You can -install the BigQuery python client library with :mod:`pandas` and +install the BigQuery Python client library with :mod:`pandas` and :mod:`pyarrow` by running: .. code-block:: bash diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/__init__.py b/packages/google-cloud-bigquery/google/cloud/bigquery/__init__.py index b3c492125cd3..1ac04d50c356 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/__init__.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/__init__.py @@ -41,8 +41,7 @@ from google.cloud.bigquery.enums import DecimalTargetType from google.cloud.bigquery.enums import KeyResultStatementKind from google.cloud.bigquery.enums import SqlTypeNames -from google.cloud.bigquery.enums import StandardSqlDataTypes -from google.cloud.bigquery.exceptions import LegacyBigQueryStorageError +from google.cloud.bigquery.enums import StandardSqlTypeNames from google.cloud.bigquery.external_config import ExternalConfig from google.cloud.bigquery.external_config import BigtableOptions from google.cloud.bigquery.external_config import BigtableColumnFamily @@ -81,6 +80,7 @@ from google.cloud.bigquery.query import ConnectionProperty from google.cloud.bigquery.query import ScalarQueryParameter from google.cloud.bigquery.query import ScalarQueryParameterType +from google.cloud.bigquery.query import SqlParameterScalarTypes from google.cloud.bigquery.query import StructQueryParameter from google.cloud.bigquery.query import StructQueryParameterType from google.cloud.bigquery.query import UDFResource @@ -90,8 +90,12 @@ from google.cloud.bigquery.routine import RoutineArgument from google.cloud.bigquery.routine import RoutineReference from google.cloud.bigquery.routine import RoutineType -from google.cloud.bigquery.schema import SchemaField from google.cloud.bigquery.schema import PolicyTagList +from google.cloud.bigquery.schema import SchemaField +from google.cloud.bigquery.standard_sql import StandardSqlDataType +from google.cloud.bigquery.standard_sql import StandardSqlField +from google.cloud.bigquery.standard_sql import StandardSqlStructType +from google.cloud.bigquery.standard_sql import StandardSqlTableType from google.cloud.bigquery.table import PartitionRange from google.cloud.bigquery.table import RangePartitioning from google.cloud.bigquery.table import Row @@ -114,6 +118,7 @@ "StructQueryParameter", "ArrayQueryParameterType", "ScalarQueryParameterType", + "SqlParameterScalarTypes", "StructQueryParameterType", # Datasets "Dataset", @@ -160,6 +165,11 @@ "ScriptOptions", "TransactionInfo", "DEFAULT_RETRY", + # Standard SQL types + "StandardSqlDataType", + "StandardSqlField", + "StandardSqlStructType", + "StandardSqlTableType", # Enum Constants "enums", "AutoRowIDs", @@ -177,12 +187,10 @@ "SchemaUpdateOption", "SourceFormat", "SqlTypeNames", - "StandardSqlDataTypes", + "StandardSqlTypeNames", "WriteDisposition", # EncryptionConfiguration "EncryptionConfiguration", - # Custom exceptions - "LegacyBigQueryStorageError", ] diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py b/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py index e2ca7fa075a6..6faa32606ebd 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py @@ -19,7 +19,7 @@ import decimal import math import re -from typing import Any, Optional, Union +from typing import Optional, Union from dateutil import relativedelta from google.cloud._helpers import UTC # type: ignore @@ -30,11 +30,6 @@ from google.cloud._helpers import _to_bytes import packaging.version -from google.cloud.bigquery.exceptions import ( - LegacyBigQueryStorageError, - LegacyPyarrowError, -) - _RFC3339_MICROS_NO_ZULU = "%Y-%m-%dT%H:%M:%S.%f" _TIMEONLY_WO_MICROS = "%H:%M:%S" @@ -54,8 +49,6 @@ r"(?P-?)(?P\d+):(?P\d+):(?P\d+)\.?(?P\d*)?$" ) -_MIN_PYARROW_VERSION = packaging.version.Version("3.0.0") -_MIN_BQ_STORAGE_VERSION = packaging.version.Version("2.0.0") _BQ_STORAGE_OPTIONAL_READ_SESSION_VERSION = packaging.version.Version("2.6.0") @@ -89,36 +82,10 @@ def is_read_session_optional(self) -> bool: """ return self.installed_version >= _BQ_STORAGE_OPTIONAL_READ_SESSION_VERSION - def verify_version(self): - """Verify that a recent enough version of BigQuery Storage extra is - installed. - - The function assumes that google-cloud-bigquery-storage extra is - installed, and should thus be used in places where this assumption - holds. - - Because `pip` can install an outdated version of this extra despite the - constraints in `setup.py`, the calling code can use this helper to - verify the version compatibility at runtime. - - Raises: - LegacyBigQueryStorageError: - If the google-cloud-bigquery-storage package is outdated. - """ - if self.installed_version < _MIN_BQ_STORAGE_VERSION: - msg = ( - "Dependency google-cloud-bigquery-storage is outdated, please upgrade " - f"it to version >= {_MIN_BQ_STORAGE_VERSION} (version found: {self.installed_version})." - ) - raise LegacyBigQueryStorageError(msg) - class PyarrowVersions: """Version comparisons for pyarrow package.""" - # https://github.com/googleapis/python-bigquery/issues/781#issuecomment-883497414 - _PYARROW_BAD_VERSIONS = frozenset([packaging.version.Version("2.0.0")]) - def __init__(self): self._installed_version = None @@ -138,52 +105,10 @@ def installed_version(self) -> packaging.version.Version: return self._installed_version - @property - def is_bad_version(self) -> bool: - return self.installed_version in self._PYARROW_BAD_VERSIONS - @property def use_compliant_nested_type(self) -> bool: return self.installed_version.major >= 4 - def try_import(self, raise_if_error: bool = False) -> Any: - """Verify that a recent enough version of pyarrow extra is - installed. - - The function assumes that pyarrow extra is installed, and should thus - be used in places where this assumption holds. - - Because `pip` can install an outdated version of this extra despite the - constraints in `setup.py`, the calling code can use this helper to - verify the version compatibility at runtime. - - Returns: - The ``pyarrow`` module or ``None``. - - Raises: - LegacyPyarrowError: - If the pyarrow package is outdated and ``raise_if_error`` is ``True``. - """ - try: - import pyarrow - except ImportError as exc: # pragma: NO COVER - if raise_if_error: - raise LegacyPyarrowError( - f"pyarrow package not found. Install pyarrow version >= {_MIN_PYARROW_VERSION}." - ) from exc - return None - - if self.installed_version < _MIN_PYARROW_VERSION: - if raise_if_error: - msg = ( - "Dependency pyarrow is outdated, please upgrade " - f"it to version >= {_MIN_PYARROW_VERSION} (version found: {self.installed_version})." - ) - raise LegacyPyarrowError(msg) - return None - - return pyarrow - BQ_STORAGE_VERSIONS = BQStorageVersions() PYARROW_VERSIONS = PyarrowVersions() diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/_http.py b/packages/google-cloud-bigquery/google/cloud/bigquery/_http.py index f7207f32e150..789ef9243f5c 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/_http.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/_http.py @@ -52,8 +52,8 @@ def __init__(self, client, client_info=None, api_endpoint=None): self._client_info.gapic_version = __version__ self._client_info.client_library_version = __version__ - API_VERSION = "v2" + API_VERSION = "v2" # type: ignore """The version of the API, used in building the API call's URL.""" - API_URL_TEMPLATE = "{api_base_url}/bigquery/{api_version}{path}" + API_URL_TEMPLATE = "{api_base_url}/bigquery/{api_version}{path}" # type: ignore """A template for the URL of a particular API call.""" diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/_job_helpers.py b/packages/google-cloud-bigquery/google/cloud/bigquery/_job_helpers.py new file mode 100644 index 000000000000..33fc722611f0 --- /dev/null +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/_job_helpers.py @@ -0,0 +1,259 @@ +# Copyright 2021 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Helpers for interacting with the job REST APIs from the client.""" + +import copy +import uuid +from typing import Any, Dict, TYPE_CHECKING, Optional + +import google.api_core.exceptions as core_exceptions +from google.api_core import retry as retries + +from google.cloud.bigquery import job + +# Avoid circular imports +if TYPE_CHECKING: # pragma: NO COVER + from google.cloud.bigquery.client import Client + + +# The purpose of _TIMEOUT_BUFFER_MILLIS is to allow the server-side timeout to +# happen before the client-side timeout. This is not strictly neccessary, as the +# client retries client-side timeouts, but the hope by making the server-side +# timeout slightly shorter is that it can save the server from some unncessary +# processing time. +# +# 250 milliseconds is chosen arbitrarily, though should be about the right +# order of magnitude for network latency and switching delays. It is about the +# amount of time for light to circumnavigate the world twice. +_TIMEOUT_BUFFER_MILLIS = 250 + + +def make_job_id(job_id: Optional[str] = None, prefix: Optional[str] = None) -> str: + """Construct an ID for a new job. + + Args: + job_id: the user-provided job ID. + prefix: the user-provided prefix for a job ID. + + Returns: + str: A job ID + """ + if job_id is not None: + return job_id + elif prefix is not None: + return str(prefix) + str(uuid.uuid4()) + else: + return str(uuid.uuid4()) + + +def query_jobs_insert( + client: "Client", + query: str, + job_config: Optional[job.QueryJobConfig], + job_id: Optional[str], + job_id_prefix: Optional[str], + location: str, + project: str, + retry: retries.Retry, + timeout: Optional[float], + job_retry: retries.Retry, +) -> job.QueryJob: + """Initiate a query using jobs.insert. + + See: https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs/insert + """ + job_id_given = job_id is not None + job_id_save = job_id + job_config_save = job_config + + def do_query(): + # Make a copy now, so that original doesn't get changed by the process + # below and to facilitate retry + job_config = copy.deepcopy(job_config_save) + + job_id = make_job_id(job_id_save, job_id_prefix) + job_ref = job._JobReference(job_id, project=project, location=location) + query_job = job.QueryJob(job_ref, query, client=client, job_config=job_config) + + try: + query_job._begin(retry=retry, timeout=timeout) + except core_exceptions.Conflict as create_exc: + # The thought is if someone is providing their own job IDs and they get + # their job ID generation wrong, this could end up returning results for + # the wrong query. We thus only try to recover if job ID was not given. + if job_id_given: + raise create_exc + + try: + query_job = client.get_job( + job_id, + project=project, + location=location, + retry=retry, + timeout=timeout, + ) + except core_exceptions.GoogleAPIError: # (includes RetryError) + raise create_exc + else: + return query_job + else: + return query_job + + future = do_query() + # The future might be in a failed state now, but if it's + # unrecoverable, we'll find out when we ask for it's result, at which + # point, we may retry. + if not job_id_given: + future._retry_do_query = do_query # in case we have to retry later + future._job_retry = job_retry + + return future + + +def _to_query_request(job_config: Optional[job.QueryJobConfig]) -> Dict[str, Any]: + """Transform from Job resource to QueryRequest resource. + + Most of the keys in job.configuration.query are in common with + QueryRequest. If any configuration property is set that is not available in + jobs.query, it will result in a server-side error. + """ + request_body = {} + job_config_resource = job_config.to_api_repr() if job_config else {} + query_config_resource = job_config_resource.get("query", {}) + + request_body.update(query_config_resource) + + # These keys are top level in job resource and query resource. + if "labels" in job_config_resource: + request_body["labels"] = job_config_resource["labels"] + if "dryRun" in job_config_resource: + request_body["dryRun"] = job_config_resource["dryRun"] + + # Default to standard SQL. + request_body.setdefault("useLegacySql", False) + + # Since jobs.query can return results, ensure we use the lossless timestamp + # format. See: https://github.com/googleapis/python-bigquery/issues/395 + request_body.setdefault("formatOptions", {}) + request_body["formatOptions"]["useInt64Timestamp"] = True # type: ignore + + return request_body + + +def _to_query_job( + client: "Client", + query: str, + request_config: Optional[job.QueryJobConfig], + query_response: Dict[str, Any], +) -> job.QueryJob: + job_ref_resource = query_response["jobReference"] + job_ref = job._JobReference._from_api_repr(job_ref_resource) + query_job = job.QueryJob(job_ref, query, client=client) + query_job._properties.setdefault("configuration", {}) + + # Not all relevant properties are in the jobs.query response. Populate some + # expected properties based on the job configuration. + if request_config is not None: + query_job._properties["configuration"].update(request_config.to_api_repr()) + + query_job._properties["configuration"].setdefault("query", {}) + query_job._properties["configuration"]["query"]["query"] = query + query_job._properties["configuration"]["query"].setdefault("useLegacySql", False) + + query_job._properties.setdefault("statistics", {}) + query_job._properties["statistics"].setdefault("query", {}) + query_job._properties["statistics"]["query"]["cacheHit"] = query_response.get( + "cacheHit" + ) + query_job._properties["statistics"]["query"]["schema"] = query_response.get( + "schema" + ) + query_job._properties["statistics"]["query"][ + "totalBytesProcessed" + ] = query_response.get("totalBytesProcessed") + + # Set errors if any were encountered. + query_job._properties.setdefault("status", {}) + if "errors" in query_response: + # Set errors but not errorResult. If there was an error that failed + # the job, jobs.query behaves like jobs.getQueryResults and returns a + # non-success HTTP status code. + errors = query_response["errors"] + query_job._properties["status"]["errors"] = errors + + # Transform job state so that QueryJob doesn't try to restart the query. + job_complete = query_response.get("jobComplete") + if job_complete: + query_job._properties["status"]["state"] = "DONE" + # TODO: https://github.com/googleapis/python-bigquery/issues/589 + # Set the first page of results if job is "complete" and there is + # only 1 page of results. Otherwise, use the existing logic that + # refreshes the job stats. + # + # This also requires updates to `to_dataframe` and the DB API connector + # so that they don't try to read from a destination table if all the + # results are present. + else: + query_job._properties["status"]["state"] = "PENDING" + + return query_job + + +def query_jobs_query( + client: "Client", + query: str, + job_config: Optional[job.QueryJobConfig], + location: str, + project: str, + retry: retries.Retry, + timeout: Optional[float], + job_retry: retries.Retry, +) -> job.QueryJob: + """Initiate a query using jobs.query. + + See: https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs/query + """ + path = f"/projects/{project}/queries" + request_body = _to_query_request(job_config) + + if timeout is not None: + # Subtract a buffer for context switching, network latency, etc. + request_body["timeoutMs"] = max(0, int(1000 * timeout) - _TIMEOUT_BUFFER_MILLIS) + request_body["location"] = location + request_body["query"] = query + + def do_query(): + request_body["requestId"] = make_job_id() + span_attributes = {"path": path} + api_response = client._call_api( + retry, + span_name="BigQuery.query", + span_attributes=span_attributes, + method="POST", + path=path, + data=request_body, + timeout=timeout, + ) + return _to_query_job(client, query, job_config, api_response) + + future = do_query() + + # The future might be in a failed state now, but if it's + # unrecoverable, we'll find out when we ask for it's result, at which + # point, we may retry. + future._retry_do_query = do_query # in case we have to retry later + future._job_retry = job_retry + + return future diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/_pandas_helpers.py b/packages/google-cloud-bigquery/google/cloud/bigquery/_pandas_helpers.py index da7c999bd1de..17de6830a5f0 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/_pandas_helpers.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/_pandas_helpers.py @@ -15,7 +15,9 @@ """Shared helper functions for connecting BigQuery and pandas.""" import concurrent.futures +from datetime import datetime import functools +from itertools import islice import logging import queue import warnings @@ -24,9 +26,18 @@ import pandas # type: ignore except ImportError: # pragma: NO COVER pandas = None + date_dtype_name = time_dtype_name = "" # Use '' rather than None because pytype else: import numpy + from db_dtypes import DateDtype, TimeDtype # type: ignore + + date_dtype_name = DateDtype.name + time_dtype_name = TimeDtype.name + +import pyarrow # type: ignore +import pyarrow.parquet # type: ignore + try: # _BaseGeometry is used to detect shapely objevys in `bq_to_arrow_array` from shapely.geometry.base import BaseGeometry as _BaseGeometry # type: ignore @@ -67,9 +78,6 @@ def _to_wkb(v): from google.cloud.bigquery import schema -pyarrow = _helpers.PYARROW_VERSIONS.try_import() - - _LOGGER = logging.getLogger(__name__) _PROGRESS_INTERVAL = 0.2 # Maximum time between download status checks, in seconds. @@ -79,9 +87,7 @@ def _to_wkb(v): _PANDAS_DTYPE_TO_BQ = { "bool": "BOOLEAN", "datetime64[ns, UTC]": "TIMESTAMP", - # TODO: Update to DATETIME in V3 - # https://github.com/googleapis/python-bigquery/issues/985 - "datetime64[ns]": "TIMESTAMP", + "datetime64[ns]": "DATETIME", "float32": "FLOAT", "float64": "FLOAT", "int8": "INTEGER", @@ -92,6 +98,8 @@ def _to_wkb(v): "uint16": "INTEGER", "uint32": "INTEGER", "geometry": "GEOGRAPHY", + date_dtype_name: "DATE", + time_dtype_name: "TIME", } @@ -127,63 +135,59 @@ def pyarrow_timestamp(): return pyarrow.timestamp("us", tz="UTC") -if pyarrow: - # This dictionary is duplicated in bigquery_storage/test/unite/test_reader.py - # When modifying it be sure to update it there as well. - BQ_TO_ARROW_SCALARS = { - "BIGNUMERIC": pyarrow_bignumeric, - "BOOL": pyarrow.bool_, - "BOOLEAN": pyarrow.bool_, - "BYTES": pyarrow.binary, - "DATE": pyarrow.date32, - "DATETIME": pyarrow_datetime, - "FLOAT": pyarrow.float64, - "FLOAT64": pyarrow.float64, - "GEOGRAPHY": pyarrow.string, - "INT64": pyarrow.int64, - "INTEGER": pyarrow.int64, - "NUMERIC": pyarrow_numeric, - "STRING": pyarrow.string, - "TIME": pyarrow_time, - "TIMESTAMP": pyarrow_timestamp, - } - ARROW_SCALAR_IDS_TO_BQ = { - # https://arrow.apache.org/docs/python/api/datatypes.html#type-classes - pyarrow.bool_().id: "BOOL", - pyarrow.int8().id: "INT64", - pyarrow.int16().id: "INT64", - pyarrow.int32().id: "INT64", - pyarrow.int64().id: "INT64", - pyarrow.uint8().id: "INT64", - pyarrow.uint16().id: "INT64", - pyarrow.uint32().id: "INT64", - pyarrow.uint64().id: "INT64", - pyarrow.float16().id: "FLOAT64", - pyarrow.float32().id: "FLOAT64", - pyarrow.float64().id: "FLOAT64", - pyarrow.time32("ms").id: "TIME", - pyarrow.time64("ns").id: "TIME", - pyarrow.timestamp("ns").id: "TIMESTAMP", - pyarrow.date32().id: "DATE", - pyarrow.date64().id: "DATETIME", # because millisecond resolution - pyarrow.binary().id: "BYTES", - pyarrow.string().id: "STRING", # also alias for pyarrow.utf8() - # The exact decimal's scale and precision are not important, as only - # the type ID matters, and it's the same for all decimal256 instances. - pyarrow.decimal128(38, scale=9).id: "NUMERIC", - pyarrow.decimal256(76, scale=38).id: "BIGNUMERIC", - } - BQ_FIELD_TYPE_TO_ARROW_FIELD_METADATA = { - "GEOGRAPHY": { - b"ARROW:extension:name": b"google:sqlType:geography", - b"ARROW:extension:metadata": b'{"encoding": "WKT"}', - }, - "DATETIME": {b"ARROW:extension:name": b"google:sqlType:datetime"}, - } - -else: # pragma: NO COVER - BQ_TO_ARROW_SCALARS = {} # pragma: NO COVER - ARROW_SCALAR_IDS_TO_BQ = {} # pragma: NO_COVER +# This dictionary is duplicated in bigquery_storage/test/unite/test_reader.py +# When modifying it be sure to update it there as well. +BQ_TO_ARROW_SCALARS = { + "BIGNUMERIC": pyarrow_bignumeric, + "BOOL": pyarrow.bool_, + "BOOLEAN": pyarrow.bool_, + "BYTES": pyarrow.binary, + "DATE": pyarrow.date32, + "DATETIME": pyarrow_datetime, + "FLOAT": pyarrow.float64, + "FLOAT64": pyarrow.float64, + "GEOGRAPHY": pyarrow.string, + "INT64": pyarrow.int64, + "INTEGER": pyarrow.int64, + "NUMERIC": pyarrow_numeric, + "STRING": pyarrow.string, + "TIME": pyarrow_time, + "TIMESTAMP": pyarrow_timestamp, +} +ARROW_SCALAR_IDS_TO_BQ = { + # https://arrow.apache.org/docs/python/api/datatypes.html#type-classes + pyarrow.bool_().id: "BOOL", + pyarrow.int8().id: "INT64", + pyarrow.int16().id: "INT64", + pyarrow.int32().id: "INT64", + pyarrow.int64().id: "INT64", + pyarrow.uint8().id: "INT64", + pyarrow.uint16().id: "INT64", + pyarrow.uint32().id: "INT64", + pyarrow.uint64().id: "INT64", + pyarrow.float16().id: "FLOAT64", + pyarrow.float32().id: "FLOAT64", + pyarrow.float64().id: "FLOAT64", + pyarrow.time32("ms").id: "TIME", + pyarrow.time64("ns").id: "TIME", + pyarrow.timestamp("ns").id: "TIMESTAMP", + pyarrow.date32().id: "DATE", + pyarrow.date64().id: "DATETIME", # because millisecond resolution + pyarrow.binary().id: "BYTES", + pyarrow.string().id: "STRING", # also alias for pyarrow.utf8() + # The exact scale and precision don't matter, see below. + pyarrow.decimal128(38, scale=9).id: "NUMERIC", + # The exact decimal's scale and precision are not important, as only + # the type ID matters, and it's the same for all decimal256 instances. + pyarrow.decimal256(76, scale=38).id: "BIGNUMERIC", +} +BQ_FIELD_TYPE_TO_ARROW_FIELD_METADATA = { + "GEOGRAPHY": { + b"ARROW:extension:name": b"google:sqlType:geography", + b"ARROW:extension:metadata": b'{"encoding": "WKT"}', + }, + "DATETIME": {b"ARROW:extension:name": b"google:sqlType:datetime"}, +} def bq_to_arrow_struct_data_type(field): @@ -261,6 +265,42 @@ def bq_to_arrow_schema(bq_schema): return pyarrow.schema(arrow_fields) +def default_types_mapper(date_as_object: bool = False): + """Create a mapping from pyarrow types to pandas types. + + This overrides the pandas defaults to use null-safe extension types where + available. + + See: https://arrow.apache.org/docs/python/api/datatypes.html for a list of + data types. See: + tests/unit/test__pandas_helpers.py::test_bq_to_arrow_data_type for + BigQuery to Arrow type mapping. + + Note to google-cloud-bigquery developers: If you update the default dtypes, + also update the docs at docs/usage/pandas.rst. + """ + + def types_mapper(arrow_data_type): + if pyarrow.types.is_boolean(arrow_data_type): + return pandas.BooleanDtype() + + elif ( + # If date_as_object is True, we know some DATE columns are + # out-of-bounds of what is supported by pandas. + not date_as_object + and pyarrow.types.is_date(arrow_data_type) + ): + return DateDtype() + + elif pyarrow.types.is_integer(arrow_data_type): + return pandas.Int64Dtype() + + elif pyarrow.types.is_time(arrow_data_type): + return TimeDtype() + + return types_mapper + + def bq_to_arrow_array(series, bq_field): if bq_field.field_type.upper() == "GEOGRAPHY": arrow_type = None @@ -339,6 +379,36 @@ def _first_valid(series): return series.at[first_valid_index] +def _first_array_valid(series): + """Return the first "meaningful" element from the array series. + + Here, "meaningful" means the first non-None element in one of the arrays that can + be used for type detextion. + """ + first_valid_index = series.first_valid_index() + if first_valid_index is None: + return None + + valid_array = series.at[first_valid_index] + valid_item = next((item for item in valid_array if not pandas.isna(item)), None) + + if valid_item is not None: + return valid_item + + # Valid item is None because all items in the "valid" array are invalid. Try + # to find a true valid array manually. + for array in islice(series, first_valid_index + 1, None): + try: + array_iter = iter(array) + except TypeError: + continue # Not an array, apparently, e.g. None, thus skip. + valid_item = next((item for item in array_iter if not pandas.isna(item)), None) + if valid_item is not None: + break + + return valid_item + + def dataframe_to_bq_schema(dataframe, bq_schema): """Convert a pandas DataFrame schema to a BigQuery schema. @@ -404,13 +474,6 @@ def dataframe_to_bq_schema(dataframe, bq_schema): # If schema detection was not successful for all columns, also try with # pyarrow, if available. if unknown_type_fields: - if not pyarrow: - msg = "Could not determine the type of columns: {}".format( - ", ".join(field.name for field in unknown_type_fields) - ) - warnings.warn(msg) - return None # We cannot detect the schema in full. - # The augment_schema() helper itself will also issue unknown type # warnings if detection still fails for any of the fields. bq_schema_out = augment_schema(dataframe, bq_schema_out) @@ -449,6 +512,19 @@ def augment_schema(dataframe, current_bq_schema): # `pyarrow.ListType` detected_mode = "REPEATED" detected_type = ARROW_SCALAR_IDS_TO_BQ.get(arrow_table.values.type.id) + + # For timezone-naive datetimes, pyarrow assumes the UTC timezone and adds + # it to such datetimes, causing them to be recognized as TIMESTAMP type. + # We thus additionally check the actual data to see if we need to overrule + # that and choose DATETIME instead. + # Note that this should only be needed for datetime values inside a list, + # since scalar datetime values have a proper Pandas dtype that allows + # distinguishing between timezone-naive and timezone-aware values before + # even requiring the additional schema augment logic in this method. + if detected_type == "TIMESTAMP": + valid_item = _first_array_valid(dataframe[field.name]) + if isinstance(valid_item, datetime) and valid_item.tzinfo is None: + detected_type = "DATETIME" else: detected_mode = field.mode detected_type = ARROW_SCALAR_IDS_TO_BQ.get(arrow_table.type.id) @@ -572,8 +648,6 @@ def dataframe_to_parquet( This argument is ignored for ``pyarrow`` versions earlier than ``4.0.0``. """ - pyarrow = _helpers.PYARROW_VERSIONS.try_import(raise_if_error=True) - import pyarrow.parquet # type: ignore kwargs = ( diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py index a99e8fcb4bbb..b388f1d4c2d4 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py @@ -57,26 +57,23 @@ from google.cloud import exceptions # pytype: disable=import-error from google.cloud.client import ClientWithProject # type: ignore # pytype: disable=import-error -try: - from google.cloud.bigquery_storage_v1.services.big_query_read.client import ( - DEFAULT_CLIENT_INFO as DEFAULT_BQSTORAGE_CLIENT_INFO, - ) -except ImportError: - DEFAULT_BQSTORAGE_CLIENT_INFO = None # type: ignore +from google.cloud.bigquery_storage_v1.services.big_query_read.client import ( + DEFAULT_CLIENT_INFO as DEFAULT_BQSTORAGE_CLIENT_INFO, +) -from google.cloud.bigquery._helpers import _del_sub_prop +from google.cloud.bigquery import _job_helpers +from google.cloud.bigquery._job_helpers import make_job_id as _make_job_id from google.cloud.bigquery._helpers import _get_sub_prop from google.cloud.bigquery._helpers import _record_field_to_json from google.cloud.bigquery._helpers import _str_or_none -from google.cloud.bigquery._helpers import BQ_STORAGE_VERSIONS from google.cloud.bigquery._helpers import _verify_job_config_type from google.cloud.bigquery._http import Connection from google.cloud.bigquery import _pandas_helpers from google.cloud.bigquery.dataset import Dataset from google.cloud.bigquery.dataset import DatasetListItem from google.cloud.bigquery.dataset import DatasetReference +from google.cloud.bigquery import enums from google.cloud.bigquery.enums import AutoRowIDs -from google.cloud.bigquery.exceptions import LegacyBigQueryStorageError from google.cloud.bigquery.opentelemetry_tracing import create_span from google.cloud.bigquery import job from google.cloud.bigquery.job import ( @@ -110,8 +107,6 @@ from google.cloud.bigquery.format_options import ParquetOptions from google.cloud.bigquery import _helpers -pyarrow = _helpers.PYARROW_VERSIONS.try_import() - TimeoutType = Union[float, None] ResumableTimeoutType = Union[ None, float, Tuple[float, float] @@ -146,7 +141,6 @@ # https://github.com/googleapis/python-bigquery/issues/438 _MIN_GET_QUERY_RESULTS_TIMEOUT = 120 - TIMEOUT_HEADER = "X-Server-Timeout" @@ -212,7 +206,7 @@ class Client(ClientWithProject): to acquire default credentials. """ - SCOPE = ( + SCOPE = ( # type: ignore "https://www.googleapis.com/auth/bigquery", "https://www.googleapis.com/auth/cloud-platform", ) @@ -227,7 +221,7 @@ def __init__( default_query_job_config=None, client_info=None, client_options=None, - ): + ) -> None: super(Client, self).__init__( project=project, credentials=credentials, @@ -508,17 +502,10 @@ def _ensure_bqstorage_client( ) -> Optional["google.cloud.bigquery_storage.BigQueryReadClient"]: """Create a BigQuery Storage API client using this client's credentials. - If a client cannot be created due to a missing or outdated dependency - `google-cloud-bigquery-storage`, raise a warning and return ``None``. - - If the `bqstorage_client` argument is not ``None``, still perform the version - check and return the argument back to the caller if the check passes. If it - fails, raise a warning and return ``None``. - Args: bqstorage_client: - An existing BigQuery Storage client instance to check for version - compatibility. If ``None``, a new instance is created and returned. + An existing BigQuery Storage client instance. If ``None``, a new + instance is created and returned. client_options: Custom options used with a new BigQuery Storage client instance if one is created. @@ -529,20 +516,7 @@ def _ensure_bqstorage_client( Returns: A BigQuery Storage API client. """ - try: - from google.cloud import bigquery_storage - except ImportError: - warnings.warn( - "Cannot create BigQuery Storage client, the dependency " - "google-cloud-bigquery-storage is not installed." - ) - return None - - try: - BQ_STORAGE_VERSIONS.verify_version() - except LegacyBigQueryStorageError as exc: - warnings.warn(str(exc)) - return None + from google.cloud import bigquery_storage if bqstorage_client is None: bqstorage_client = bigquery_storage.BigQueryReadClient( @@ -1997,12 +1971,10 @@ def create_job( source_type=source_type, ) elif "query" in job_config: - copy_config = copy.deepcopy(job_config) - _del_sub_prop(copy_config, ["query", "destinationTable"]) query_job_config = google.cloud.bigquery.job.QueryJobConfig.from_api_repr( - copy_config + job_config ) - query = _get_sub_prop(copy_config, ["query", "query"]) + query = _get_sub_prop(job_config, ["query", "query"]) return self.query( query, job_config=typing.cast(QueryJobConfig, query_job_config), @@ -2520,7 +2492,7 @@ def load_table_from_dataframe( :attr:`~google.cloud.bigquery.job.LoadJobConfig.schema` with column names matching those of the dataframe. The BigQuery schema is used to determine the correct data type conversion. - Indexes are not loaded. Requires the :mod:`pyarrow` library. + Indexes are not loaded. By default, this method uses the parquet source format. To override this, supply a value for @@ -2554,9 +2526,6 @@ def load_table_from_dataframe( google.cloud.bigquery.job.LoadJob: A new load job. Raises: - ValueError: - If a usable parquet engine cannot be found. This method - requires :mod:`pyarrow` to be installed. TypeError: If ``job_config`` is not an instance of :class:`~google.cloud.bigquery.job.LoadJobConfig` class. @@ -2594,10 +2563,6 @@ def load_table_from_dataframe( ) ) - if pyarrow is None and job_config.source_format == job.SourceFormat.PARQUET: - # pyarrow is now the only supported parquet engine. - raise ValueError("This method requires pyarrow to be installed") - if location is None: location = self.location @@ -2653,16 +2618,6 @@ def load_table_from_dataframe( try: if job_config.source_format == job.SourceFormat.PARQUET: - if _helpers.PYARROW_VERSIONS.is_bad_version: - msg = ( - "Loading dataframe data in PARQUET format with pyarrow " - f"{_helpers.PYARROW_VERSIONS.installed_version} can result in data " - "corruption. It is therefore *strongly* advised to use a " - "different pyarrow version or a different source format. " - "See: https://github.com/googleapis/python-bigquery/issues/781" - ) - warnings.warn(msg, category=RuntimeWarning) - if job_config.schema: if parquet_compression == "snappy": # adjust the default value parquet_compression = parquet_compression.upper() @@ -3247,6 +3202,7 @@ def query( retry: retries.Retry = DEFAULT_RETRY, timeout: TimeoutType = DEFAULT_TIMEOUT, job_retry: retries.Retry = DEFAULT_JOB_RETRY, + api_method: Union[str, enums.QueryApiMethod] = enums.QueryApiMethod.INSERT, ) -> job.QueryJob: """Run a SQL query. @@ -3298,6 +3254,11 @@ def query( called on the job returned. The ``job_retry`` specified here becomes the default ``job_retry`` for ``result()``, where it can also be specified. + api_method (Union[str, enums.QueryApiMethod]): + Method with which to start the query job. + + See :class:`google.cloud.bigquery.enums.QueryApiMethod` for + details on the difference between the query start methods. Returns: google.cloud.bigquery.job.QueryJob: A new query job instance. @@ -3321,7 +3282,10 @@ def query( " provided." ) - job_id_save = job_id + if job_id_given and api_method == enums.QueryApiMethod.QUERY: + raise TypeError( + "`job_id` was provided, but the 'QUERY' `api_method` was requested." + ) if project is None: project = self.project @@ -3352,50 +3316,32 @@ def query( # Note that we haven't modified the original job_config (or # _default_query_job_config) up to this point. - job_config_save = job_config - - def do_query(): - # Make a copy now, so that original doesn't get changed by the process - # below and to facilitate retry - job_config = copy.deepcopy(job_config_save) - - job_id = _make_job_id(job_id_save, job_id_prefix) - job_ref = job._JobReference(job_id, project=project, location=location) - query_job = job.QueryJob(job_ref, query, client=self, job_config=job_config) - - try: - query_job._begin(retry=retry, timeout=timeout) - except core_exceptions.Conflict as create_exc: - # The thought is if someone is providing their own job IDs and they get - # their job ID generation wrong, this could end up returning results for - # the wrong query. We thus only try to recover if job ID was not given. - if job_id_given: - raise create_exc - - try: - query_job = self.get_job( - job_id, - project=project, - location=location, - retry=retry, - timeout=timeout, - ) - except core_exceptions.GoogleAPIError: # (includes RetryError) - raise create_exc - else: - return query_job - else: - return query_job - - future = do_query() - # The future might be in a failed state now, but if it's - # unrecoverable, we'll find out when we ask for it's result, at which - # point, we may retry. - if not job_id_given: - future._retry_do_query = do_query # in case we have to retry later - future._job_retry = job_retry - - return future + if api_method == enums.QueryApiMethod.QUERY: + return _job_helpers.query_jobs_query( + self, + query, + job_config, + location, + project, + retry, + timeout, + job_retry, + ) + elif api_method == enums.QueryApiMethod.INSERT: + return _job_helpers.query_jobs_insert( + self, + query, + job_config, + job_id, + job_id_prefix, + location, + project, + retry, + timeout, + job_retry, + ) + else: + raise ValueError(f"Got unexpected value for api_method: {repr(api_method)}") def insert_rows( self, @@ -3522,7 +3468,9 @@ def insert_rows_json( self, table: Union[Table, TableReference, TableListItem, str], json_rows: Sequence[Dict], - row_ids: Union[Iterable[str], AutoRowIDs, None] = AutoRowIDs.GENERATE_UUID, + row_ids: Union[ + Iterable[Optional[str]], AutoRowIDs, None + ] = AutoRowIDs.GENERATE_UUID, skip_invalid_rows: bool = None, ignore_unknown_values: bool = None, template_suffix: str = None, @@ -4068,24 +4016,6 @@ def _extract_job_reference(job, project=None, location=None): return (project, location, job_id) -def _make_job_id(job_id: Optional[str], prefix: Optional[str] = None) -> str: - """Construct an ID for a new job. - - Args: - job_id: the user-provided job ID. - prefix: the user-provided prefix for a job ID. - - Returns: - str: A job ID - """ - if job_id is not None: - return job_id - elif prefix is not None: - return str(prefix) + str(uuid.uuid4()) - else: - return str(uuid.uuid4()) - - def _check_mode(stream): """Check that a stream was opened in read-binary mode. diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/dataset.py b/packages/google-cloud-bigquery/google/cloud/bigquery/dataset.py index cf317024f881..0fafd578384a 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/dataset.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/dataset.py @@ -17,6 +17,7 @@ from __future__ import absolute_import import copy +from typing import Dict, Any import google.cloud._helpers # type: ignore @@ -27,7 +28,7 @@ from google.cloud.bigquery.encryption_configuration import EncryptionConfiguration -def _get_table_reference(self, table_id): +def _get_table_reference(self, table_id: str) -> TableReference: """Constructs a TableReference. Args: @@ -143,8 +144,8 @@ class AccessEntry(object): >>> entry = AccessEntry(None, 'view', view) """ - def __init__(self, role=None, entity_type=None, entity_id=None): - self._properties = {} + def __init__(self, role=None, entity_type=None, entity_id=None) -> None: + self._properties: Dict[str, Any] = {} if entity_type in ("view", "routine", "dataset"): if role is not None: raise ValueError( @@ -404,7 +405,7 @@ class Dataset(object): "default_encryption_configuration": "defaultEncryptionConfiguration", } - def __init__(self, dataset_ref): + def __init__(self, dataset_ref) -> None: if isinstance(dataset_ref, str): dataset_ref = DatasetReference.from_string(dataset_ref) self._properties = {"datasetReference": dataset_ref.to_api_repr(), "labels": {}} diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/dbapi/_helpers.py b/packages/google-cloud-bigquery/google/cloud/bigquery/dbapi/_helpers.py index 30f40ea07394..117fa8ae70b4 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/dbapi/_helpers.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/dbapi/_helpers.py @@ -22,7 +22,7 @@ import typing from google.cloud import bigquery -from google.cloud.bigquery import table, enums, query +from google.cloud.bigquery import table, query from google.cloud.bigquery.dbapi import exceptions @@ -48,7 +48,7 @@ def _parameter_type(name, value, query_parameter_type=None, value_doc=""): query_parameter_type = type_parameters_re.sub("", query_parameter_type) try: parameter_type = getattr( - enums.SqlParameterScalarTypes, query_parameter_type.upper() + query.SqlParameterScalarTypes, query_parameter_type.upper() )._type except AttributeError: raise exceptions.ProgrammingError( @@ -185,7 +185,7 @@ def _parse_type( # Strip type parameters type_ = type_parameters_re.sub("", type_).strip() try: - type_ = getattr(enums.SqlParameterScalarTypes, type_.upper()) + type_ = getattr(query.SqlParameterScalarTypes, type_.upper()) except AttributeError: raise exceptions.ProgrammingError( f"The given parameter type, {type_}," diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/encryption_configuration.py b/packages/google-cloud-bigquery/google/cloud/bigquery/encryption_configuration.py index ba04ae2c45a7..d0b6f36774c6 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/encryption_configuration.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/encryption_configuration.py @@ -24,7 +24,7 @@ class EncryptionConfiguration(object): kms_key_name (str): resource ID of Cloud KMS key used for encryption """ - def __init__(self, kms_key_name=None): + def __init__(self, kms_key_name=None) -> None: self._properties = {} if kms_key_name is not None: self._properties["kmsKeyName"] = kms_key_name diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/enums.py b/packages/google-cloud-bigquery/google/cloud/bigquery/enums.py index 7fc0a5fd6b1b..45d43a2a7c62 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/enums.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/enums.py @@ -12,13 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -import re - import enum -import itertools - -from google.cloud.bigquery_v2 import types as gapic_types -from google.cloud.bigquery.query import ScalarQueryParameterType class AutoRowIDs(enum.Enum): @@ -128,6 +122,45 @@ class QueryPriority(object): """Specifies batch priority.""" +class QueryApiMethod(str, enum.Enum): + """API method used to start the query. The default value is + :attr:`INSERT`. + """ + + INSERT = "INSERT" + """Submit a query job by using the `jobs.insert REST API method + `_. + + This supports all job configuration options. + """ + + QUERY = "QUERY" + """Submit a query job by using the `jobs.query REST API method + `_. + + Differences from ``INSERT``: + + * Many parameters and job configuration options, including job ID and + destination table, cannot be used + with this API method. See the `jobs.query REST API documentation + `_ for + the complete list of supported configuration options. + + * API blocks up to a specified timeout, waiting for the query to + finish. + + * The full job resource (including job statistics) may not be available. + Call :meth:`~google.cloud.bigquery.job.QueryJob.reload` or + :meth:`~google.cloud.bigquery.client.Client.get_job` to get full job + statistics and configuration. + + * :meth:`~google.cloud.bigquery.Client.query` can raise API exceptions if + the query fails, whereas the same errors don't appear until calling + :meth:`~google.cloud.bigquery.job.QueryJob.result` when the ``INSERT`` + API method is used. + """ + + class SchemaUpdateOption(object): """Specifies an update to the destination table schema as a side effect of a load job. @@ -180,56 +213,27 @@ class KeyResultStatementKind: FIRST_SELECT = "FIRST_SELECT" -_SQL_SCALAR_TYPES = frozenset( - ( - "INT64", - "BOOL", - "FLOAT64", - "STRING", - "BYTES", - "TIMESTAMP", - "DATE", - "TIME", - "DATETIME", - "INTERVAL", - "GEOGRAPHY", - "NUMERIC", - "BIGNUMERIC", - "JSON", - ) -) - -_SQL_NONSCALAR_TYPES = frozenset(("TYPE_KIND_UNSPECIFIED", "ARRAY", "STRUCT")) - - -def _make_sql_scalars_enum(): - """Create an enum based on a gapic enum containing only SQL scalar types.""" - - new_enum = enum.Enum( - "StandardSqlDataTypes", - ( - (member.name, member.value) - for member in gapic_types.StandardSqlDataType.TypeKind - if member.name in _SQL_SCALAR_TYPES - ), - ) - - # make sure the docstring for the new enum is also correct - orig_doc = gapic_types.StandardSqlDataType.TypeKind.__doc__ - skip_pattern = re.compile( - "|".join(_SQL_NONSCALAR_TYPES) - + "|because a JSON object" # the second description line of STRUCT member - ) - - new_doc = "\n".join( - itertools.filterfalse(skip_pattern.search, orig_doc.splitlines()) - ) - new_enum.__doc__ = "An Enum of scalar SQL types.\n" + new_doc - - return new_enum - - -StandardSqlDataTypes = _make_sql_scalars_enum() +class StandardSqlTypeNames(str, enum.Enum): + def _generate_next_value_(name, start, count, last_values): + return name + + TYPE_KIND_UNSPECIFIED = enum.auto() + INT64 = enum.auto() + BOOL = enum.auto() + FLOAT64 = enum.auto() + STRING = enum.auto() + BYTES = enum.auto() + TIMESTAMP = enum.auto() + DATE = enum.auto() + TIME = enum.auto() + DATETIME = enum.auto() + INTERVAL = enum.auto() + GEOGRAPHY = enum.auto() + NUMERIC = enum.auto() + BIGNUMERIC = enum.auto() + JSON = enum.auto() + ARRAY = enum.auto() + STRUCT = enum.auto() class EntityTypes(str, enum.Enum): @@ -270,28 +274,6 @@ class SqlTypeNames(str, enum.Enum): INTERVAL = "INTERVAL" # NOTE: not available in legacy types -class SqlParameterScalarTypes: - """Supported scalar SQL query parameter types as type objects.""" - - BOOL = ScalarQueryParameterType("BOOL") - BOOLEAN = ScalarQueryParameterType("BOOL") - BIGDECIMAL = ScalarQueryParameterType("BIGNUMERIC") - BIGNUMERIC = ScalarQueryParameterType("BIGNUMERIC") - BYTES = ScalarQueryParameterType("BYTES") - DATE = ScalarQueryParameterType("DATE") - DATETIME = ScalarQueryParameterType("DATETIME") - DECIMAL = ScalarQueryParameterType("NUMERIC") - FLOAT = ScalarQueryParameterType("FLOAT64") - FLOAT64 = ScalarQueryParameterType("FLOAT64") - GEOGRAPHY = ScalarQueryParameterType("GEOGRAPHY") - INT64 = ScalarQueryParameterType("INT64") - INTEGER = ScalarQueryParameterType("INT64") - NUMERIC = ScalarQueryParameterType("NUMERIC") - STRING = ScalarQueryParameterType("STRING") - TIME = ScalarQueryParameterType("TIME") - TIMESTAMP = ScalarQueryParameterType("TIMESTAMP") - - class WriteDisposition(object): """Specifies the action that occurs if destination table already exists. diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/exceptions.py b/packages/google-cloud-bigquery/google/cloud/bigquery/exceptions.py deleted file mode 100644 index fb1188eee780..000000000000 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/exceptions.py +++ /dev/null @@ -1,25 +0,0 @@ -# Copyright 2021 Google LLC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - - -class BigQueryError(Exception): - """Base class for all custom exceptions defined by the BigQuery client.""" - - -class LegacyBigQueryStorageError(BigQueryError): - """Raised when too old a version of BigQuery Storage extra is detected at runtime.""" - - -class LegacyPyarrowError(BigQueryError): - """Raised when too old a version of pyarrow package is detected at runtime.""" diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/external_config.py b/packages/google-cloud-bigquery/google/cloud/bigquery/external_config.py index 8470498090a7..640b2d16b689 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/external_config.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/external_config.py @@ -22,7 +22,7 @@ import base64 import copy -from typing import FrozenSet, Iterable, Optional, Union +from typing import Any, Dict, FrozenSet, Iterable, Optional, Union from google.cloud.bigquery._helpers import _to_bytes from google.cloud.bigquery._helpers import _bytes_to_json @@ -575,8 +575,8 @@ class HivePartitioningOptions(object): https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#HivePartitioningOptions """ - def __init__(self): - self._properties = {} + def __init__(self) -> None: + self._properties: Dict[str, Any] = {} @property def mode(self): @@ -657,7 +657,7 @@ class ExternalConfig(object): See :attr:`source_format`. """ - def __init__(self, source_format): + def __init__(self, source_format) -> None: self._properties = {"sourceFormat": source_format} @property diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/job/copy_.py b/packages/google-cloud-bigquery/google/cloud/bigquery/job/copy_.py index f0dd3d668964..29558c01f605 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/job/copy_.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/job/copy_.py @@ -52,7 +52,7 @@ class CopyJobConfig(_JobConfig): the property name as the name of a keyword argument. """ - def __init__(self, **kwargs): + def __init__(self, **kwargs) -> None: super(CopyJobConfig, self).__init__("copy", **kwargs) @property diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/job/load.py b/packages/google-cloud-bigquery/google/cloud/bigquery/job/load.py index 2d68f7f71570..e4b44395e7f5 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/job/load.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/job/load.py @@ -50,7 +50,7 @@ class LoadJobConfig(_JobConfig): :data:`True`. """ - def __init__(self, **kwargs): + def __init__(self, **kwargs) -> None: super(LoadJobConfig, self).__init__("load", **kwargs) @property diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/job/query.py b/packages/google-cloud-bigquery/google/cloud/bigquery/job/query.py index 54f950a6620b..c2d304e30c7b 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/job/query.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/job/query.py @@ -270,7 +270,7 @@ class QueryJobConfig(_JobConfig): the property name as the name of a keyword argument. """ - def __init__(self, **kwargs): + def __init__(self, **kwargs) -> None: super(QueryJobConfig, self).__init__("query", **kwargs) @property @@ -1107,7 +1107,7 @@ def ddl_target_table(self): return prop @property - def num_dml_affected_rows(self): + def num_dml_affected_rows(self) -> Optional[int]: """Return the number of DML rows affected by the job. See: @@ -1537,7 +1537,7 @@ def do_get_result(): def to_arrow( self, progress_bar_type: str = None, - bqstorage_client: "bigquery_storage.BigQueryReadClient" = None, + bqstorage_client: Optional["bigquery_storage.BigQueryReadClient"] = None, create_bqstorage_client: bool = True, max_results: Optional[int] = None, ) -> "pyarrow.Table": @@ -1568,8 +1568,7 @@ def to_arrow( BigQuery Storage API to fetch rows from BigQuery. This API is a billable API. - This method requires the ``pyarrow`` and - ``google-cloud-bigquery-storage`` libraries. + This method requires ``google-cloud-bigquery-storage`` library. Reading from a specific partition or snapshot is not currently supported by this method. @@ -1594,10 +1593,6 @@ def to_arrow( headers from the query results. The column headers are derived from the destination table's schema. - Raises: - ValueError: - If the :mod:`pyarrow` library cannot be imported. - .. versionadded:: 1.17.0 """ query_result = wait_for_query(self, progress_bar_type, max_results=max_results) @@ -1612,11 +1607,10 @@ def to_arrow( # that should only exist here in the QueryJob method. def to_dataframe( self, - bqstorage_client: "bigquery_storage.BigQueryReadClient" = None, + bqstorage_client: Optional["bigquery_storage.BigQueryReadClient"] = None, dtypes: Dict[str, Any] = None, progress_bar_type: str = None, create_bqstorage_client: bool = True, - date_as_object: bool = True, max_results: Optional[int] = None, geography_as_object: bool = False, ) -> "pandas.DataFrame": @@ -1659,12 +1653,6 @@ def to_dataframe( .. versionadded:: 1.24.0 - date_as_object (Optional[bool]): - If ``True`` (default), cast dates to objects. If ``False``, convert - to datetime64[ns] dtype. - - .. versionadded:: 1.26.0 - max_results (Optional[int]): Maximum number of rows to include in the result. No limit by default. @@ -1698,7 +1686,6 @@ def to_dataframe( dtypes=dtypes, progress_bar_type=progress_bar_type, create_bqstorage_client=create_bqstorage_client, - date_as_object=date_as_object, geography_as_object=geography_as_object, ) @@ -1711,7 +1698,6 @@ def to_geodataframe( dtypes: Dict[str, Any] = None, progress_bar_type: str = None, create_bqstorage_client: bool = True, - date_as_object: bool = True, max_results: Optional[int] = None, geography_column: Optional[str] = None, ) -> "geopandas.GeoDataFrame": @@ -1754,12 +1740,6 @@ def to_geodataframe( .. versionadded:: 1.24.0 - date_as_object (Optional[bool]): - If ``True`` (default), cast dates to objects. If ``False``, convert - to datetime64[ns] dtype. - - .. versionadded:: 1.26.0 - max_results (Optional[int]): Maximum number of rows to include in the result. No limit by default. @@ -1792,7 +1772,6 @@ def to_geodataframe( dtypes=dtypes, progress_bar_type=progress_bar_type, create_bqstorage_client=create_bqstorage_client, - date_as_object=date_as_object, geography_column=geography_column, ) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/magics/magics.py b/packages/google-cloud-bigquery/google/cloud/bigquery/magics/magics.py index a5941158e898..14819aa59f20 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/magics/magics.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/magics/magics.py @@ -744,17 +744,6 @@ def _make_bqstorage_client(client, use_bqstorage_api, client_options): if not use_bqstorage_api: return None - try: - from google.cloud import bigquery_storage # noqa: F401 - except ImportError as err: - customized_error = ImportError( - "The default BigQuery Storage API client cannot be used, install " - "the missing google-cloud-bigquery-storage and pyarrow packages " - "to use it. Alternatively, use the classic REST API by specifying " - "the --use_rest_api magic option." - ) - raise customized_error from err - try: from google.api_core.gapic_v1 import client_info as gapic_client_info except ImportError as err: diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/model.py b/packages/google-cloud-bigquery/google/cloud/bigquery/model.py index cdb411e089be..4d2bc346c92d 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/model.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/model.py @@ -17,24 +17,24 @@ """Define resources for the BigQuery ML Models API.""" import copy - -from google.protobuf import json_format +import datetime +import typing +from typing import Any, Dict, Optional, Sequence, Union import google.cloud._helpers # type: ignore -from google.api_core import datetime_helpers # type: ignore from google.cloud.bigquery import _helpers -from google.cloud.bigquery_v2 import types +from google.cloud.bigquery import standard_sql from google.cloud.bigquery.encryption_configuration import EncryptionConfiguration -class Model(object): +class Model: """Model represents a machine learning model resource. See https://cloud.google.com/bigquery/docs/reference/rest/v2/models Args: - model_ref (Union[google.cloud.bigquery.model.ModelReference, str]): + model_ref: A pointer to a model. If ``model_ref`` is a string, it must included a project ID, dataset ID, and model ID, each separated by ``.``. @@ -51,11 +51,7 @@ class Model(object): "encryption_configuration": "encryptionConfiguration", } - def __init__(self, model_ref): - # Use _proto on read-only properties to use it's built-in type - # conversion. - self._proto = types.Model()._pb - + def __init__(self, model_ref: Union["ModelReference", str, None]): # Use _properties on read-write properties to match the REST API # semantics. The BigQuery API makes a distinction between an unset # value, a null value, and a default value (0 or ""), but the protocol @@ -66,198 +62,221 @@ def __init__(self, model_ref): model_ref = ModelReference.from_string(model_ref) if model_ref: - self._proto.model_reference.CopyFrom(model_ref._proto) + self._properties["modelReference"] = model_ref.to_api_repr() @property - def reference(self): - """A :class:`~google.cloud.bigquery.model.ModelReference` pointing to - this model. + def reference(self) -> Optional["ModelReference"]: + """A model reference pointing to this model. Read-only. - - Returns: - google.cloud.bigquery.model.ModelReference: pointer to this model. """ - ref = ModelReference() - ref._proto = self._proto.model_reference - return ref + resource = self._properties.get("modelReference") + if resource is None: + return None + else: + return ModelReference.from_api_repr(resource) @property - def project(self): - """str: Project bound to the model""" - return self.reference.project + def project(self) -> Optional[str]: + """Project bound to the model.""" + ref = self.reference + return ref.project if ref is not None else None @property - def dataset_id(self): - """str: ID of dataset containing the model.""" - return self.reference.dataset_id + def dataset_id(self) -> Optional[str]: + """ID of dataset containing the model.""" + ref = self.reference + return ref.dataset_id if ref is not None else None @property - def model_id(self): - """str: The model ID.""" - return self.reference.model_id + def model_id(self) -> Optional[str]: + """The model ID.""" + ref = self.reference + return ref.model_id if ref is not None else None @property - def path(self): - """str: URL path for the model's APIs.""" - return self.reference.path + def path(self) -> Optional[str]: + """URL path for the model's APIs.""" + ref = self.reference + return ref.path if ref is not None else None @property - def location(self): - """str: The geographic location where the model resides. This value - is inherited from the dataset. + def location(self) -> Optional[str]: + """The geographic location where the model resides. + + This value is inherited from the dataset. Read-only. """ - return self._proto.location + return typing.cast(Optional[str], self._properties.get("location")) @property - def etag(self): - """str: ETag for the model resource (:data:`None` until - set from the server). + def etag(self) -> Optional[str]: + """ETag for the model resource (:data:`None` until set from the server). Read-only. """ - return self._proto.etag + return typing.cast(Optional[str], self._properties.get("etag")) @property - def created(self): - """Union[datetime.datetime, None]: Datetime at which the model was - created (:data:`None` until set from the server). + def created(self) -> Optional[datetime.datetime]: + """Datetime at which the model was created (:data:`None` until set from the server). Read-only. """ - value = self._proto.creation_time - if value is not None and value != 0: + value = typing.cast(Optional[float], self._properties.get("creationTime")) + if value is None: + return None + else: # value will be in milliseconds. return google.cloud._helpers._datetime_from_microseconds( 1000.0 * float(value) ) @property - def modified(self): - """Union[datetime.datetime, None]: Datetime at which the model was last - modified (:data:`None` until set from the server). + def modified(self) -> Optional[datetime.datetime]: + """Datetime at which the model was last modified (:data:`None` until set from the server). Read-only. """ - value = self._proto.last_modified_time - if value is not None and value != 0: + value = typing.cast(Optional[float], self._properties.get("lastModifiedTime")) + if value is None: + return None + else: # value will be in milliseconds. return google.cloud._helpers._datetime_from_microseconds( 1000.0 * float(value) ) @property - def model_type(self): - """google.cloud.bigquery_v2.types.Model.ModelType: Type of the - model resource. + def model_type(self) -> str: + """Type of the model resource. Read-only. - - The value is one of elements of the - :class:`~google.cloud.bigquery_v2.types.Model.ModelType` - enumeration. """ - return self._proto.model_type + return typing.cast( + str, self._properties.get("modelType", "MODEL_TYPE_UNSPECIFIED") + ) @property - def training_runs(self): - """Sequence[google.cloud.bigquery_v2.types.Model.TrainingRun]: Information - for all training runs in increasing order of start time. + def training_runs(self) -> Sequence[Dict[str, Any]]: + """Information for all training runs in increasing order of start time. - Read-only. + Dictionaries are in REST API format. See: + https://cloud.google.com/bigquery/docs/reference/rest/v2/models#trainingrun - An iterable of :class:`~google.cloud.bigquery_v2.types.Model.TrainingRun`. + Read-only. """ - return self._proto.training_runs + return typing.cast( + Sequence[Dict[str, Any]], self._properties.get("trainingRuns", []) + ) @property - def feature_columns(self): - """Sequence[google.cloud.bigquery_v2.types.StandardSqlField]: Input - feature columns that were used to train this model. + def feature_columns(self) -> Sequence[standard_sql.StandardSqlField]: + """Input feature columns that were used to train this model. Read-only. - - An iterable of :class:`~google.cloud.bigquery_v2.types.StandardSqlField`. """ - return self._proto.feature_columns + resource: Sequence[Dict[str, Any]] = typing.cast( + Sequence[Dict[str, Any]], self._properties.get("featureColumns", []) + ) + return [ + standard_sql.StandardSqlField.from_api_repr(column) for column in resource + ] @property - def label_columns(self): - """Sequence[google.cloud.bigquery_v2.types.StandardSqlField]: Label - columns that were used to train this model. The output of the model - will have a ``predicted_`` prefix to these columns. + def label_columns(self) -> Sequence[standard_sql.StandardSqlField]: + """Label columns that were used to train this model. - Read-only. + The output of the model will have a ``predicted_`` prefix to these columns. - An iterable of :class:`~google.cloud.bigquery_v2.types.StandardSqlField`. + Read-only. """ - return self._proto.label_columns + resource: Sequence[Dict[str, Any]] = typing.cast( + Sequence[Dict[str, Any]], self._properties.get("labelColumns", []) + ) + return [ + standard_sql.StandardSqlField.from_api_repr(column) for column in resource + ] @property - def expires(self): - """Union[datetime.datetime, None]: The datetime when this model - expires. If not present, the model will persist indefinitely. Expired - models will be deleted and their storage reclaimed. + def best_trial_id(self) -> Optional[int]: + """The best trial_id across all training runs. + + .. deprecated:: + This property is deprecated! + + Read-only. """ - value = self._properties.get("expirationTime") + value = typing.cast(Optional[int], self._properties.get("bestTrialId")) if value is not None: + value = int(value) + return value + + @property + def expires(self) -> Optional[datetime.datetime]: + """The datetime when this model expires. + + If not present, the model will persist indefinitely. Expired models will be + deleted and their storage reclaimed. + """ + value = typing.cast(Optional[float], self._properties.get("expirationTime")) + if value is None: + return None + else: # value will be in milliseconds. return google.cloud._helpers._datetime_from_microseconds( 1000.0 * float(value) ) @expires.setter - def expires(self, value): - if value is not None: - value = str(google.cloud._helpers._millis_from_datetime(value)) - self._properties["expirationTime"] = value + def expires(self, value: Optional[datetime.datetime]): + if value is None: + value_to_store: Optional[str] = None + else: + value_to_store = str(google.cloud._helpers._millis_from_datetime(value)) + # TODO: Consider using typing.TypedDict when only Python 3.8+ is supported. + self._properties["expirationTime"] = value_to_store # type: ignore @property - def description(self): - """Optional[str]: Description of the model (defaults to - :data:`None`). - """ - return self._properties.get("description") + def description(self) -> Optional[str]: + """Description of the model (defaults to :data:`None`).""" + return typing.cast(Optional[str], self._properties.get("description")) @description.setter - def description(self, value): - self._properties["description"] = value + def description(self, value: Optional[str]): + # TODO: Consider using typing.TypedDict when only Python 3.8+ is supported. + self._properties["description"] = value # type: ignore @property - def friendly_name(self): - """Optional[str]: Title of the table (defaults to :data:`None`). - - Raises: - ValueError: For invalid value types. - """ - return self._properties.get("friendlyName") + def friendly_name(self) -> Optional[str]: + """Title of the table (defaults to :data:`None`).""" + return typing.cast(Optional[str], self._properties.get("friendlyName")) @friendly_name.setter - def friendly_name(self, value): - self._properties["friendlyName"] = value + def friendly_name(self, value: Optional[str]): + # TODO: Consider using typing.TypedDict when only Python 3.8+ is supported. + self._properties["friendlyName"] = value # type: ignore @property - def labels(self): - """Optional[Dict[str, str]]: Labels for the table. + def labels(self) -> Dict[str, str]: + """Labels for the table. - This method always returns a dict. To change a model's labels, - modify the dict, then call ``Client.update_model``. To delete a - label, set its value to :data:`None` before updating. + This method always returns a dict. To change a model's labels, modify the dict, + then call ``Client.update_model``. To delete a label, set its value to + :data:`None` before updating. """ return self._properties.setdefault("labels", {}) @labels.setter - def labels(self, value): + def labels(self, value: Optional[Dict[str, str]]): if value is None: value = {} self._properties["labels"] = value @property - def encryption_configuration(self): - """Optional[google.cloud.bigquery.encryption_configuration.EncryptionConfiguration]: Custom - encryption configuration for the model. + def encryption_configuration(self) -> Optional[EncryptionConfiguration]: + """Custom encryption configuration for the model. Custom encryption configuration (e.g., Cloud KMS keys) or :data:`None` if using default encryption. @@ -269,50 +288,27 @@ def encryption_configuration(self): prop = self._properties.get("encryptionConfiguration") if prop: prop = EncryptionConfiguration.from_api_repr(prop) - return prop + return typing.cast(Optional[EncryptionConfiguration], prop) @encryption_configuration.setter - def encryption_configuration(self, value): - api_repr = value - if value: - api_repr = value.to_api_repr() + def encryption_configuration(self, value: Optional[EncryptionConfiguration]): + api_repr = value.to_api_repr() if value else value self._properties["encryptionConfiguration"] = api_repr @classmethod - def from_api_repr(cls, resource: dict) -> "Model": + def from_api_repr(cls, resource: Dict[str, Any]) -> "Model": """Factory: construct a model resource given its API representation Args: - resource (Dict[str, object]): + resource: Model resource representation from the API Returns: - google.cloud.bigquery.model.Model: Model parsed from ``resource``. + Model parsed from ``resource``. """ this = cls(None) - # Keep a reference to the resource as a workaround to find unknown - # field values. - this._properties = resource - - # Convert from millis-from-epoch to timestamp well-known type. - # TODO: Remove this hack once CL 238585470 hits prod. resource = copy.deepcopy(resource) - for training_run in resource.get("trainingRuns", ()): - start_time = training_run.get("startTime") - if not start_time or "-" in start_time: # Already right format? - continue - start_time = datetime_helpers.from_microseconds(1e3 * float(start_time)) - training_run["startTime"] = datetime_helpers.to_rfc3339(start_time) - - try: - this._proto = json_format.ParseDict( - resource, types.Model()._pb, ignore_unknown_fields=True - ) - except json_format.ParseError: - resource["modelType"] = "MODEL_TYPE_UNSPECIFIED" - this._proto = json_format.ParseDict( - resource, types.Model()._pb, ignore_unknown_fields=True - ) + this._properties = resource return this def _build_resource(self, filter_fields): @@ -320,18 +316,18 @@ def _build_resource(self, filter_fields): return _helpers._build_resource_from_properties(self, filter_fields) def __repr__(self): - return "Model(reference={})".format(repr(self.reference)) + return f"Model(reference={self.reference!r})" - def to_api_repr(self) -> dict: + def to_api_repr(self) -> Dict[str, Any]: """Construct the API resource representation of this model. Returns: - Dict[str, object]: Model reference represented as an API resource + Model reference represented as an API resource """ - return json_format.MessageToDict(self._proto) + return copy.deepcopy(self._properties) -class ModelReference(object): +class ModelReference: """ModelReferences are pointers to models. See @@ -339,73 +335,60 @@ class ModelReference(object): """ def __init__(self): - self._proto = types.ModelReference()._pb self._properties = {} @property def project(self): """str: Project bound to the model""" - return self._proto.project_id + return self._properties.get("projectId") @property def dataset_id(self): """str: ID of dataset containing the model.""" - return self._proto.dataset_id + return self._properties.get("datasetId") @property def model_id(self): """str: The model ID.""" - return self._proto.model_id + return self._properties.get("modelId") @property - def path(self): - """str: URL path for the model's APIs.""" - return "/projects/%s/datasets/%s/models/%s" % ( - self._proto.project_id, - self._proto.dataset_id, - self._proto.model_id, - ) + def path(self) -> str: + """URL path for the model's APIs.""" + return f"/projects/{self.project}/datasets/{self.dataset_id}/models/{self.model_id}" @classmethod - def from_api_repr(cls, resource): - """Factory: construct a model reference given its API representation + def from_api_repr(cls, resource: Dict[str, Any]) -> "ModelReference": + """Factory: construct a model reference given its API representation. Args: - resource (Dict[str, object]): + resource: Model reference representation returned from the API Returns: - google.cloud.bigquery.model.ModelReference: - Model reference parsed from ``resource``. + Model reference parsed from ``resource``. """ ref = cls() - # Keep a reference to the resource as a workaround to find unknown - # field values. ref._properties = resource - ref._proto = json_format.ParseDict( - resource, types.ModelReference()._pb, ignore_unknown_fields=True - ) - return ref @classmethod def from_string( - cls, model_id: str, default_project: str = None + cls, model_id: str, default_project: Optional[str] = None ) -> "ModelReference": """Construct a model reference from model ID string. Args: - model_id (str): + model_id: A model ID in standard SQL format. If ``default_project`` is not specified, this must included a project ID, dataset ID, and model ID, each separated by ``.``. - default_project (Optional[str]): + default_project: The project ID to use when ``model_id`` does not include a project ID. Returns: - google.cloud.bigquery.model.ModelReference: - Model reference parsed from ``model_id``. + Model reference parsed from ``model_id``. Raises: ValueError: @@ -419,13 +402,13 @@ def from_string( {"projectId": proj, "datasetId": dset, "modelId": model} ) - def to_api_repr(self) -> dict: + def to_api_repr(self) -> Dict[str, Any]: """Construct the API resource representation of this model reference. Returns: - Dict[str, object]: Model reference represented as an API resource + Model reference represented as an API resource. """ - return json_format.MessageToDict(self._proto) + return copy.deepcopy(self._properties) def _key(self): """Unique key for this model. @@ -437,7 +420,7 @@ def _key(self): def __eq__(self, other): if not isinstance(other, ModelReference): return NotImplemented - return self._proto == other._proto + return self._properties == other._properties def __ne__(self, other): return not self == other diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery_v2/py.typed b/packages/google-cloud-bigquery/google/cloud/bigquery/py.typed similarity index 100% rename from packages/google-cloud-bigquery/google/cloud/bigquery_v2/py.typed rename to packages/google-cloud-bigquery/google/cloud/bigquery/py.typed diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/query.py b/packages/google-cloud-bigquery/google/cloud/bigquery/query.py index 0b90b6954786..0469cb2711bc 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/query.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/query.py @@ -397,7 +397,7 @@ class ScalarQueryParameter(_AbstractQueryParameter): type_: Name of parameter type. See :class:`google.cloud.bigquery.enums.SqlTypeNames` and - :class:`google.cloud.bigquery.enums.SqlParameterScalarTypes` for + :class:`google.cloud.bigquery.query.SqlParameterScalarTypes` for supported types. value: @@ -519,7 +519,7 @@ class ArrayQueryParameter(_AbstractQueryParameter): values (List[appropriate type]): The parameter array values. """ - def __init__(self, name, array_type, values): + def __init__(self, name, array_type, values) -> None: self.name = name self.values = values @@ -682,10 +682,13 @@ class StructQueryParameter(_AbstractQueryParameter): ]]): The sub-parameters for the struct """ - def __init__(self, name, *sub_params): + def __init__(self, name, *sub_params) -> None: self.name = name - types = self.struct_types = OrderedDict() - values = self.struct_values = {} + self.struct_types: Dict[str, Any] = OrderedDict() + self.struct_values: Dict[str, Any] = {} + + types = self.struct_types + values = self.struct_values for sub in sub_params: if isinstance(sub, self.__class__): types[sub.name] = "STRUCT" @@ -808,6 +811,28 @@ def __repr__(self): return "StructQueryParameter{}".format(self._key()) +class SqlParameterScalarTypes: + """Supported scalar SQL query parameter types as type objects.""" + + BOOL = ScalarQueryParameterType("BOOL") + BOOLEAN = ScalarQueryParameterType("BOOL") + BIGDECIMAL = ScalarQueryParameterType("BIGNUMERIC") + BIGNUMERIC = ScalarQueryParameterType("BIGNUMERIC") + BYTES = ScalarQueryParameterType("BYTES") + DATE = ScalarQueryParameterType("DATE") + DATETIME = ScalarQueryParameterType("DATETIME") + DECIMAL = ScalarQueryParameterType("NUMERIC") + FLOAT = ScalarQueryParameterType("FLOAT64") + FLOAT64 = ScalarQueryParameterType("FLOAT64") + GEOGRAPHY = ScalarQueryParameterType("GEOGRAPHY") + INT64 = ScalarQueryParameterType("INT64") + INTEGER = ScalarQueryParameterType("INT64") + NUMERIC = ScalarQueryParameterType("NUMERIC") + STRING = ScalarQueryParameterType("STRING") + TIME = ScalarQueryParameterType("TIME") + TIMESTAMP = ScalarQueryParameterType("TIMESTAMP") + + class _QueryResults(object): """Results of a query. diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/routine/routine.py b/packages/google-cloud-bigquery/google/cloud/bigquery/routine/routine.py index a66434300983..3c0919003292 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/routine/routine.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/routine/routine.py @@ -16,12 +16,12 @@ """Define resources for the BigQuery Routines API.""" -from google.protobuf import json_format +from typing import Any, Dict, Optional import google.cloud._helpers # type: ignore from google.cloud.bigquery import _helpers -import google.cloud.bigquery_v2.types -from google.cloud.bigquery_v2.types import StandardSqlTableType +from google.cloud.bigquery.standard_sql import StandardSqlDataType +from google.cloud.bigquery.standard_sql import StandardSqlTableType class RoutineType: @@ -69,7 +69,7 @@ class Routine(object): "determinism_level": "determinismLevel", } - def __init__(self, routine_ref, **kwargs): + def __init__(self, routine_ref, **kwargs) -> None: if isinstance(routine_ref, str): routine_ref = RoutineReference.from_string(routine_ref) @@ -190,7 +190,7 @@ def arguments(self, value): @property def return_type(self): - """google.cloud.bigquery_v2.types.StandardSqlDataType: Return type of + """google.cloud.bigquery.StandardSqlDataType: Return type of the routine. If absent, the return type is inferred from @@ -206,22 +206,15 @@ def return_type(self): if not resource: return resource - output = google.cloud.bigquery_v2.types.StandardSqlDataType() - raw_protobuf = json_format.ParseDict( - resource, output._pb, ignore_unknown_fields=True - ) - return type(output).wrap(raw_protobuf) + return StandardSqlDataType.from_api_repr(resource) @return_type.setter - def return_type(self, value): - if value: - resource = json_format.MessageToDict(value._pb) - else: - resource = None + def return_type(self, value: StandardSqlDataType): + resource = None if not value else value.to_api_repr() self._properties[self._PROPERTY_TO_API_FIELD["return_type"]] = resource @property - def return_table_type(self) -> StandardSqlTableType: + def return_table_type(self) -> Optional[StandardSqlTableType]: """The return type of a Table Valued Function (TVF) routine. .. versionadded:: 2.22.0 @@ -232,20 +225,14 @@ def return_table_type(self) -> StandardSqlTableType: if not resource: return resource - output = google.cloud.bigquery_v2.types.StandardSqlTableType() - raw_protobuf = json_format.ParseDict( - resource, output._pb, ignore_unknown_fields=True - ) - return type(output).wrap(raw_protobuf) + return StandardSqlTableType.from_api_repr(resource) @return_table_type.setter - def return_table_type(self, value): + def return_table_type(self, value: Optional[StandardSqlTableType]): if not value: resource = None else: - resource = { - "columns": [json_format.MessageToDict(col._pb) for col in value.columns] - } + resource = value.to_api_repr() self._properties[self._PROPERTY_TO_API_FIELD["return_table_type"]] = resource @@ -365,8 +352,8 @@ class RoutineArgument(object): "mode": "mode", } - def __init__(self, **kwargs): - self._properties = {} + def __init__(self, **kwargs) -> None: + self._properties: Dict[str, Any] = {} for property_name in kwargs: setattr(self, property_name, kwargs[property_name]) @@ -407,7 +394,7 @@ def mode(self, value): @property def data_type(self): - """Optional[google.cloud.bigquery_v2.types.StandardSqlDataType]: Type + """Optional[google.cloud.bigquery.StandardSqlDataType]: Type of a variable, e.g., a function argument. See: @@ -417,16 +404,12 @@ def data_type(self): if not resource: return resource - output = google.cloud.bigquery_v2.types.StandardSqlDataType() - raw_protobuf = json_format.ParseDict( - resource, output._pb, ignore_unknown_fields=True - ) - return type(output).wrap(raw_protobuf) + return StandardSqlDataType.from_api_repr(resource) @data_type.setter def data_type(self, value): if value: - resource = json_format.MessageToDict(value._pb) + resource = value.to_api_repr() else: resource = None self._properties[self._PROPERTY_TO_API_FIELD["data_type"]] = resource diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/schema.py b/packages/google-cloud-bigquery/google/cloud/bigquery/schema.py index 84272228f2f6..5580a2ae9223 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/schema.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/schema.py @@ -18,7 +18,8 @@ import enum from typing import Any, Dict, Iterable, Union -from google.cloud.bigquery_v2 import types +from google.cloud.bigquery import standard_sql +from google.cloud.bigquery.enums import StandardSqlTypeNames _STRUCT_TYPES = ("RECORD", "STRUCT") @@ -27,26 +28,26 @@ # https://cloud.google.com/bigquery/data-types#legacy_sql_data_types # https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types LEGACY_TO_STANDARD_TYPES = { - "STRING": types.StandardSqlDataType.TypeKind.STRING, - "BYTES": types.StandardSqlDataType.TypeKind.BYTES, - "INTEGER": types.StandardSqlDataType.TypeKind.INT64, - "INT64": types.StandardSqlDataType.TypeKind.INT64, - "FLOAT": types.StandardSqlDataType.TypeKind.FLOAT64, - "FLOAT64": types.StandardSqlDataType.TypeKind.FLOAT64, - "NUMERIC": types.StandardSqlDataType.TypeKind.NUMERIC, - "BIGNUMERIC": types.StandardSqlDataType.TypeKind.BIGNUMERIC, - "BOOLEAN": types.StandardSqlDataType.TypeKind.BOOL, - "BOOL": types.StandardSqlDataType.TypeKind.BOOL, - "GEOGRAPHY": types.StandardSqlDataType.TypeKind.GEOGRAPHY, - "RECORD": types.StandardSqlDataType.TypeKind.STRUCT, - "STRUCT": types.StandardSqlDataType.TypeKind.STRUCT, - "TIMESTAMP": types.StandardSqlDataType.TypeKind.TIMESTAMP, - "DATE": types.StandardSqlDataType.TypeKind.DATE, - "TIME": types.StandardSqlDataType.TypeKind.TIME, - "DATETIME": types.StandardSqlDataType.TypeKind.DATETIME, + "STRING": StandardSqlTypeNames.STRING, + "BYTES": StandardSqlTypeNames.BYTES, + "INTEGER": StandardSqlTypeNames.INT64, + "INT64": StandardSqlTypeNames.INT64, + "FLOAT": StandardSqlTypeNames.FLOAT64, + "FLOAT64": StandardSqlTypeNames.FLOAT64, + "NUMERIC": StandardSqlTypeNames.NUMERIC, + "BIGNUMERIC": StandardSqlTypeNames.BIGNUMERIC, + "BOOLEAN": StandardSqlTypeNames.BOOL, + "BOOL": StandardSqlTypeNames.BOOL, + "GEOGRAPHY": StandardSqlTypeNames.GEOGRAPHY, + "RECORD": StandardSqlTypeNames.STRUCT, + "STRUCT": StandardSqlTypeNames.STRUCT, + "TIMESTAMP": StandardSqlTypeNames.TIMESTAMP, + "DATE": StandardSqlTypeNames.DATE, + "TIME": StandardSqlTypeNames.TIME, + "DATETIME": StandardSqlTypeNames.DATETIME, # no direct conversion from ARRAY, the latter is represented by mode="REPEATED" } -"""String names of the legacy SQL types to integer codes of Standard SQL types.""" +"""String names of the legacy SQL types to integer codes of Standard SQL standard_sql.""" class _DefaultSentinel(enum.Enum): @@ -256,16 +257,20 @@ def _key(self): Returns: Tuple: The contents of this :class:`~google.cloud.bigquery.schema.SchemaField`. """ - field_type = self.field_type.upper() - if field_type == "STRING" or field_type == "BYTES": - if self.max_length is not None: - field_type = f"{field_type}({self.max_length})" - elif field_type.endswith("NUMERIC"): - if self.precision is not None: - if self.scale is not None: - field_type = f"{field_type}({self.precision}, {self.scale})" - else: - field_type = f"{field_type}({self.precision})" + field_type = self.field_type.upper() if self.field_type is not None else None + + # Type can temporarily be set to None if the code needs a SchemaField instance, + # but has npt determined the exact type of the field yet. + if field_type is not None: + if field_type == "STRING" or field_type == "BYTES": + if self.max_length is not None: + field_type = f"{field_type}({self.max_length})" + elif field_type.endswith("NUMERIC"): + if self.precision is not None: + if self.scale is not None: + field_type = f"{field_type}({self.precision}, {self.scale})" + else: + field_type = f"{field_type}({self.precision})" policy_tags = ( None if self.policy_tags is None else tuple(sorted(self.policy_tags.names)) @@ -281,48 +286,41 @@ def _key(self): policy_tags, ) - def to_standard_sql(self) -> types.StandardSqlField: - """Return the field as the standard SQL field representation object. - - Returns: - An instance of :class:`~google.cloud.bigquery_v2.types.StandardSqlField`. - """ - sql_type = types.StandardSqlDataType() + def to_standard_sql(self) -> standard_sql.StandardSqlField: + """Return the field as the standard SQL field representation object.""" + sql_type = standard_sql.StandardSqlDataType() if self.mode == "REPEATED": - sql_type.type_kind = types.StandardSqlDataType.TypeKind.ARRAY + sql_type.type_kind = StandardSqlTypeNames.ARRAY else: sql_type.type_kind = LEGACY_TO_STANDARD_TYPES.get( self.field_type, - types.StandardSqlDataType.TypeKind.TYPE_KIND_UNSPECIFIED, + StandardSqlTypeNames.TYPE_KIND_UNSPECIFIED, ) - if sql_type.type_kind == types.StandardSqlDataType.TypeKind.ARRAY: # noqa: E721 + if sql_type.type_kind == StandardSqlTypeNames.ARRAY: # noqa: E721 array_element_type = LEGACY_TO_STANDARD_TYPES.get( self.field_type, - types.StandardSqlDataType.TypeKind.TYPE_KIND_UNSPECIFIED, + StandardSqlTypeNames.TYPE_KIND_UNSPECIFIED, + ) + sql_type.array_element_type = standard_sql.StandardSqlDataType( + type_kind=array_element_type ) - sql_type.array_element_type.type_kind = array_element_type # ARRAY cannot directly contain other arrays, only scalar types and STRUCTs # https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#array-type - if ( - array_element_type - == types.StandardSqlDataType.TypeKind.STRUCT # noqa: E721 - ): - sql_type.array_element_type.struct_type.fields.extend( - field.to_standard_sql() for field in self.fields + if array_element_type == StandardSqlTypeNames.STRUCT: # noqa: E721 + sql_type.array_element_type.struct_type = ( + standard_sql.StandardSqlStructType( + fields=(field.to_standard_sql() for field in self.fields) + ) ) - - elif ( - sql_type.type_kind - == types.StandardSqlDataType.TypeKind.STRUCT # noqa: E721 - ): - sql_type.struct_type.fields.extend( - field.to_standard_sql() for field in self.fields + elif sql_type.type_kind == StandardSqlTypeNames.STRUCT: # noqa: E721 + sql_type.struct_type = standard_sql.StandardSqlStructType( + fields=(field.to_standard_sql() for field in self.fields) ) - return types.StandardSqlField(name=self.name, type=sql_type) + return standard_sql.StandardSqlField(name=self.name, type=sql_type) def __eq__(self, other): if not isinstance(other, SchemaField): diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/standard_sql.py b/packages/google-cloud-bigquery/google/cloud/bigquery/standard_sql.py new file mode 100644 index 000000000000..e0f22b2de0a9 --- /dev/null +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/standard_sql.py @@ -0,0 +1,355 @@ +# Copyright 2021 Google LLC + +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at + +# https://www.apache.org/licenses/LICENSE-2.0 + +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import copy +import typing +from typing import Any, Dict, Iterable, List, Optional + +from google.cloud.bigquery.enums import StandardSqlTypeNames + + +class StandardSqlDataType: + """The type of a variable, e.g., a function argument. + + See: + https://cloud.google.com/bigquery/docs/reference/rest/v2/StandardSqlDataType + + Examples: + + .. code-block:: text + + INT64: {type_kind="INT64"} + ARRAY: {type_kind="ARRAY", array_element_type="STRING"} + STRUCT: { + type_kind="STRUCT", + struct_type={ + fields=[ + {name="x", type={type_kind="STRING"}}, + { + name="y", + type={type_kind="ARRAY", array_element_type="DATE"} + } + ] + } + } + + Args: + type_kind: + The top level type of this field. Can be any standard SQL data type, + e.g. INT64, DATE, ARRAY. + array_element_type: + The type of the array's elements, if type_kind is ARRAY. + struct_type: + The fields of this struct, in order, if type_kind is STRUCT. + """ + + def __init__( + self, + type_kind: Optional[ + StandardSqlTypeNames + ] = StandardSqlTypeNames.TYPE_KIND_UNSPECIFIED, + array_element_type: Optional["StandardSqlDataType"] = None, + struct_type: Optional["StandardSqlStructType"] = None, + ): + self._properties: Dict[str, Any] = {} + + self.type_kind = type_kind + self.array_element_type = array_element_type + self.struct_type = struct_type + + @property + def type_kind(self) -> Optional[StandardSqlTypeNames]: + """The top level type of this field. + + Can be any standard SQL data type, e.g. INT64, DATE, ARRAY. + """ + kind = self._properties["typeKind"] + return StandardSqlTypeNames[kind] # pytype: disable=missing-parameter + + @type_kind.setter + def type_kind(self, value: Optional[StandardSqlTypeNames]): + if not value: + kind = StandardSqlTypeNames.TYPE_KIND_UNSPECIFIED.value + else: + kind = value.value + self._properties["typeKind"] = kind + + @property + def array_element_type(self) -> Optional["StandardSqlDataType"]: + """The type of the array's elements, if type_kind is ARRAY.""" + element_type = self._properties.get("arrayElementType") + + if element_type is None: + return None + + result = StandardSqlDataType() + result._properties = element_type # We do not use a copy on purpose. + return result + + @array_element_type.setter + def array_element_type(self, value: Optional["StandardSqlDataType"]): + element_type = None if value is None else value.to_api_repr() + + if element_type is None: + self._properties.pop("arrayElementType", None) + else: + self._properties["arrayElementType"] = element_type + + @property + def struct_type(self) -> Optional["StandardSqlStructType"]: + """The fields of this struct, in order, if type_kind is STRUCT.""" + struct_info = self._properties.get("structType") + + if struct_info is None: + return None + + result = StandardSqlStructType() + result._properties = struct_info # We do not use a copy on purpose. + return result + + @struct_type.setter + def struct_type(self, value: Optional["StandardSqlStructType"]): + struct_type = None if value is None else value.to_api_repr() + + if struct_type is None: + self._properties.pop("structType", None) + else: + self._properties["structType"] = struct_type + + def to_api_repr(self) -> Dict[str, Any]: + """Construct the API resource representation of this SQL data type.""" + return copy.deepcopy(self._properties) + + @classmethod + def from_api_repr(cls, resource: Dict[str, Any]): + """Construct an SQL data type instance given its API representation.""" + type_kind = resource.get("typeKind") + if type_kind not in StandardSqlTypeNames.__members__: + type_kind = StandardSqlTypeNames.TYPE_KIND_UNSPECIFIED + else: + # Convert string to an enum member. + type_kind = StandardSqlTypeNames[ # pytype: disable=missing-parameter + typing.cast(str, type_kind) + ] + + array_element_type = None + if type_kind == StandardSqlTypeNames.ARRAY: + element_type = resource.get("arrayElementType") + if element_type: + array_element_type = cls.from_api_repr(element_type) + + struct_type = None + if type_kind == StandardSqlTypeNames.STRUCT: + struct_info = resource.get("structType") + if struct_info: + struct_type = StandardSqlStructType.from_api_repr(struct_info) + + return cls(type_kind, array_element_type, struct_type) + + def __eq__(self, other): + if not isinstance(other, StandardSqlDataType): + return NotImplemented + else: + return ( + self.type_kind == other.type_kind + and self.array_element_type == other.array_element_type + and self.struct_type == other.struct_type + ) + + def __str__(self): + result = f"{self.__class__.__name__}(type_kind={self.type_kind!r}, ...)" + return result + + +class StandardSqlField: + """A field or a column. + + See: + https://cloud.google.com/bigquery/docs/reference/rest/v2/StandardSqlField + + Args: + name: + The name of this field. Can be absent for struct fields. + type: + The type of this parameter. Absent if not explicitly specified. + + For example, CREATE FUNCTION statement can omit the return type; in this + case the output parameter does not have this "type" field). + """ + + def __init__( + self, name: Optional[str] = None, type: Optional[StandardSqlDataType] = None + ): + type_repr = None if type is None else type.to_api_repr() + self._properties = {"name": name, "type": type_repr} + + @property + def name(self) -> Optional[str]: + """The name of this field. Can be absent for struct fields.""" + return typing.cast(Optional[str], self._properties["name"]) + + @name.setter + def name(self, value: Optional[str]): + self._properties["name"] = value + + @property + def type(self) -> Optional[StandardSqlDataType]: + """The type of this parameter. Absent if not explicitly specified. + + For example, CREATE FUNCTION statement can omit the return type; in this + case the output parameter does not have this "type" field). + """ + type_info = self._properties["type"] + + if type_info is None: + return None + + result = StandardSqlDataType() + # We do not use a properties copy on purpose. + result._properties = typing.cast(Dict[str, Any], type_info) + + return result + + @type.setter + def type(self, value: Optional[StandardSqlDataType]): + value_repr = None if value is None else value.to_api_repr() + self._properties["type"] = value_repr + + def to_api_repr(self) -> Dict[str, Any]: + """Construct the API resource representation of this SQL field.""" + return copy.deepcopy(self._properties) + + @classmethod + def from_api_repr(cls, resource: Dict[str, Any]): + """Construct an SQL field instance given its API representation.""" + result = cls( + name=resource.get("name"), + type=StandardSqlDataType.from_api_repr(resource.get("type", {})), + ) + return result + + def __eq__(self, other): + if not isinstance(other, StandardSqlField): + return NotImplemented + else: + return self.name == other.name and self.type == other.type + + +class StandardSqlStructType: + """Type of a struct field. + + See: + https://cloud.google.com/bigquery/docs/reference/rest/v2/StandardSqlDataType#StandardSqlStructType + + Args: + fields: The fields in this struct. + """ + + def __init__(self, fields: Optional[Iterable[StandardSqlField]] = None): + if fields is None: + fields = [] + self._properties = {"fields": [field.to_api_repr() for field in fields]} + + @property + def fields(self) -> List[StandardSqlField]: + """The fields in this struct.""" + result = [] + + for field_resource in self._properties.get("fields", []): + field = StandardSqlField() + field._properties = field_resource # We do not use a copy on purpose. + result.append(field) + + return result + + @fields.setter + def fields(self, value: Iterable[StandardSqlField]): + self._properties["fields"] = [field.to_api_repr() for field in value] + + def to_api_repr(self) -> Dict[str, Any]: + """Construct the API resource representation of this SQL struct type.""" + return copy.deepcopy(self._properties) + + @classmethod + def from_api_repr(cls, resource: Dict[str, Any]) -> "StandardSqlStructType": + """Construct an SQL struct type instance given its API representation.""" + fields = ( + StandardSqlField.from_api_repr(field_resource) + for field_resource in resource.get("fields", []) + ) + return cls(fields=fields) + + def __eq__(self, other): + if not isinstance(other, StandardSqlStructType): + return NotImplemented + else: + return self.fields == other.fields + + +class StandardSqlTableType: + """A table type. + + See: + https://cloud.google.com/workflows/docs/reference/googleapis/bigquery/v2/Overview#StandardSqlTableType + + Args: + columns: The columns in this table type. + """ + + def __init__(self, columns: Iterable[StandardSqlField]): + self._properties = {"columns": [col.to_api_repr() for col in columns]} + + @property + def columns(self) -> List[StandardSqlField]: + """The columns in this table type.""" + result = [] + + for column_resource in self._properties.get("columns", []): + column = StandardSqlField() + column._properties = column_resource # We do not use a copy on purpose. + result.append(column) + + return result + + @columns.setter + def columns(self, value: Iterable[StandardSqlField]): + self._properties["columns"] = [col.to_api_repr() for col in value] + + def to_api_repr(self) -> Dict[str, Any]: + """Construct the API resource representation of this SQL table type.""" + return copy.deepcopy(self._properties) + + @classmethod + def from_api_repr(cls, resource: Dict[str, Any]) -> "StandardSqlTableType": + """Construct an SQL table type instance given its API representation.""" + columns = [] + + for column_resource in resource.get("columns", []): + type_ = column_resource.get("type") + if type_ is None: + type_ = {} + + column = StandardSqlField( + name=column_resource.get("name"), + type=StandardSqlDataType.from_api_repr(type_), + ) + columns.append(column) + + return cls(columns=columns) + + def __eq__(self, other): + if not isinstance(other, StandardSqlTableType): + return NotImplemented + else: + return self.columns == other.columns diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py index f39945fe48ae..ed4f214ce927 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py @@ -28,6 +28,10 @@ import pandas # type: ignore except ImportError: # pragma: NO COVER pandas = None +else: + import db_dtypes # type: ignore # noqa + +import pyarrow # type: ignore try: import geopandas # type: ignore @@ -43,18 +47,12 @@ else: _read_wkt = shapely.geos.WKTReader(shapely.geos.lgeos).read -try: - import pyarrow # type: ignore -except ImportError: # pragma: NO COVER - pyarrow = None - import google.api_core.exceptions from google.api_core.page_iterator import HTTPIterator import google.cloud._helpers # type: ignore from google.cloud.bigquery import _helpers from google.cloud.bigquery import _pandas_helpers -from google.cloud.bigquery.exceptions import LegacyBigQueryStorageError from google.cloud.bigquery.schema import _build_schema_resource from google.cloud.bigquery.schema import _parse_schema_resource from google.cloud.bigquery.schema import _to_schema_fields @@ -67,7 +65,6 @@ # they are not None, avoiding false "no attribute" errors. import pandas import geopandas - import pyarrow from google.cloud import bigquery_storage from google.cloud.bigquery.dataset import DatasetReference @@ -84,10 +81,6 @@ "The shapely library is not installed, please install " "shapely to use the geography_as_object option." ) -_NO_PYARROW_ERROR = ( - "The pyarrow library is not installed, please install " - "pyarrow to use the to_arrow() function." -) _TABLE_HAS_NO_SCHEMA = 'Table has no schema: call "client.get_table()"' @@ -276,6 +269,7 @@ def from_api_repr(cls, resource: dict) -> "TableReference": project = resource["projectId"] dataset_id = resource["datasetId"] table_id = resource["tableId"] + return cls(DatasetReference(project, dataset_id), table_id) def to_api_repr(self) -> dict: @@ -377,7 +371,7 @@ class Table(_TableBase): "require_partition_filter": "requirePartitionFilter", } - def __init__(self, table_ref, schema=None): + def __init__(self, table_ref, schema=None) -> None: table_ref = _table_arg_to_table_ref(table_ref) self._properties = {"tableReference": table_ref.to_api_repr(), "labels": {}} # Let the @property do validation. @@ -1328,7 +1322,7 @@ class Row(object): # Choose unusual field names to try to avoid conflict with schema fields. __slots__ = ("_xxx_values", "_xxx_field_to_index") - def __init__(self, values, field_to_index): + def __init__(self, values, field_to_index) -> None: self._xxx_values = values self._xxx_field_to_index = field_to_index @@ -1556,17 +1550,6 @@ def _validate_bqstorage(self, bqstorage_client, create_bqstorage_client): if self.max_results is not None: return False - try: - from google.cloud import bigquery_storage # noqa: F401 - except ImportError: - return False - - try: - _helpers.BQ_STORAGE_VERSIONS.verify_version() - except LegacyBigQueryStorageError as exc: - warnings.warn(str(exc)) - return False - return True def _get_next_page_response(self): @@ -1666,15 +1649,8 @@ def to_arrow_iterable( pyarrow.RecordBatch: A generator of :class:`~pyarrow.RecordBatch`. - Raises: - ValueError: - If the :mod:`pyarrow` library cannot be imported. - .. versionadded:: 2.31.0 """ - if pyarrow is None: - raise ValueError(_NO_PYARROW_ERROR) - self._maybe_warn_max_results(bqstorage_client) bqstorage_download = functools.partial( @@ -1700,7 +1676,7 @@ def to_arrow_iterable( def to_arrow( self, progress_bar_type: str = None, - bqstorage_client: "bigquery_storage.BigQueryReadClient" = None, + bqstorage_client: Optional["bigquery_storage.BigQueryReadClient"] = None, create_bqstorage_client: bool = True, ) -> "pyarrow.Table": """[Beta] Create a class:`pyarrow.Table` by loading all pages of a @@ -1729,8 +1705,7 @@ def to_arrow( A BigQuery Storage API client. If supplied, use the faster BigQuery Storage API to fetch rows from BigQuery. This API is a billable API. - This method requires the ``pyarrow`` and - ``google-cloud-bigquery-storage`` libraries. + This method requires ``google-cloud-bigquery-storage`` library. This method only exposes a subset of the capabilities of the BigQuery Storage API. For full access to all features @@ -1751,14 +1726,8 @@ def to_arrow( headers from the query results. The column headers are derived from the destination table's schema. - Raises: - ValueError: If the :mod:`pyarrow` library cannot be imported. - .. versionadded:: 1.17.0 """ - if pyarrow is None: - raise ValueError(_NO_PYARROW_ERROR) - self._maybe_warn_max_results(bqstorage_client) if not self._validate_bqstorage(bqstorage_client, create_bqstorage_client): @@ -1808,7 +1777,7 @@ def to_arrow( def to_dataframe_iterable( self, - bqstorage_client: "bigquery_storage.BigQueryReadClient" = None, + bqstorage_client: Optional["bigquery_storage.BigQueryReadClient"] = None, dtypes: Dict[str, Any] = None, max_queue_size: int = _pandas_helpers._MAX_QUEUE_SIZE_DEFAULT, # type: ignore ) -> "pandas.DataFrame": @@ -1819,8 +1788,7 @@ def to_dataframe_iterable( A BigQuery Storage API client. If supplied, use the faster BigQuery Storage API to fetch rows from BigQuery. - This method requires the ``pyarrow`` and - ``google-cloud-bigquery-storage`` libraries. + This method requires ``google-cloud-bigquery-storage`` library. This method only exposes a subset of the capabilities of the BigQuery Storage API. For full access to all features @@ -1885,11 +1853,10 @@ def to_dataframe_iterable( # changes to job.QueryJob.to_dataframe() def to_dataframe( self, - bqstorage_client: "bigquery_storage.BigQueryReadClient" = None, + bqstorage_client: Optional["bigquery_storage.BigQueryReadClient"] = None, dtypes: Dict[str, Any] = None, progress_bar_type: str = None, create_bqstorage_client: bool = True, - date_as_object: bool = True, geography_as_object: bool = False, ) -> "pandas.DataFrame": """Create a pandas DataFrame by loading all pages of a query. @@ -1899,8 +1866,7 @@ def to_dataframe( A BigQuery Storage API client. If supplied, use the faster BigQuery Storage API to fetch rows from BigQuery. - This method requires the ``pyarrow`` and - ``google-cloud-bigquery-storage`` libraries. + This method requires ``google-cloud-bigquery-storage`` library. This method only exposes a subset of the capabilities of the BigQuery Storage API. For full access to all features @@ -1940,12 +1906,6 @@ def to_dataframe( .. versionadded:: 1.24.0 - date_as_object (Optional[bool]): - If ``True`` (default), cast dates to objects. If ``False``, convert - to datetime64[ns] dtype. - - .. versionadded:: 1.26.0 - geography_as_object (Optional[bool]): If ``True``, convert GEOGRAPHY data to :mod:`shapely` geometry objects. If ``False`` (default), don't cast @@ -1988,30 +1948,43 @@ def to_dataframe( create_bqstorage_client=create_bqstorage_client, ) - # When converting timestamp values to nanosecond precision, the result + # When converting date or timestamp values to nanosecond precision, the result # can be out of pyarrow bounds. To avoid the error when converting to - # Pandas, we set the timestamp_as_object parameter to True, if necessary. - types_to_check = { - pyarrow.timestamp("us"), - pyarrow.timestamp("us", tz=datetime.timezone.utc), - } - - for column in record_batch: - if column.type in types_to_check: - try: - column.cast("timestamp[ns]") - except pyarrow.lib.ArrowInvalid: - timestamp_as_object = True - break - else: - timestamp_as_object = False + # Pandas, we set the date_as_object or timestamp_as_object parameter to True, + # if necessary. + date_as_object = not all( + self.__can_cast_timestamp_ns(col) + for col in record_batch + # Type can be date32 or date64 (plus units). + # See: https://arrow.apache.org/docs/python/api/datatypes.html + if str(col.type).startswith("date") + ) - extra_kwargs = {"timestamp_as_object": timestamp_as_object} + timestamp_as_object = not all( + self.__can_cast_timestamp_ns(col) + for col in record_batch + # Type can be timestamp (plus units and time zone). + # See: https://arrow.apache.org/docs/python/api/datatypes.html + if str(col.type).startswith("timestamp") + ) - df = record_batch.to_pandas(date_as_object=date_as_object, **extra_kwargs) + if len(record_batch) > 0: + df = record_batch.to_pandas( + date_as_object=date_as_object, + timestamp_as_object=timestamp_as_object, + integer_object_nulls=True, + types_mapper=_pandas_helpers.default_types_mapper( + date_as_object=date_as_object + ), + ) + else: + # Avoid "ValueError: need at least one array to concatenate" on + # older versions of pandas when converting empty RecordBatch to + # DataFrame. See: https://github.com/pandas-dev/pandas/issues/41241 + df = pandas.DataFrame([], columns=record_batch.schema.names) for column in dtypes: - df[column] = pandas.Series(df[column], dtype=dtypes[column]) + df[column] = pandas.Series(df[column], dtype=dtypes[column], copy=False) if geography_as_object: for field in self.schema: @@ -2020,6 +1993,15 @@ def to_dataframe( return df + @staticmethod + def __can_cast_timestamp_ns(column): + try: + column.cast("timestamp[ns]") + except pyarrow.lib.ArrowInvalid: + return False + else: + return True + # If changing the signature of this method, make sure to apply the same # changes to job.QueryJob.to_geodataframe() def to_geodataframe( @@ -2028,7 +2010,6 @@ def to_geodataframe( dtypes: Dict[str, Any] = None, progress_bar_type: str = None, create_bqstorage_client: bool = True, - date_as_object: bool = True, geography_column: Optional[str] = None, ) -> "geopandas.GeoDataFrame": """Create a GeoPandas GeoDataFrame by loading all pages of a query. @@ -2076,10 +2057,6 @@ def to_geodataframe( This argument does nothing if ``bqstorage_client`` is supplied. - date_as_object (Optional[bool]): - If ``True`` (default), cast dates to objects. If ``False``, convert - to datetime64[ns] dtype. - geography_column (Optional[str]): If there are more than one GEOGRAPHY column, identifies which one to use to construct a geopandas @@ -2135,7 +2112,6 @@ def to_geodataframe( dtypes, progress_bar_type, create_bqstorage_client, - date_as_object, geography_as_object=True, ) @@ -2184,8 +2160,6 @@ def to_arrow( Returns: pyarrow.Table: An empty :class:`pyarrow.Table`. """ - if pyarrow is None: - raise ValueError(_NO_PYARROW_ERROR) return pyarrow.Table.from_arrays(()) def to_dataframe( @@ -2194,7 +2168,6 @@ def to_dataframe( dtypes=None, progress_bar_type=None, create_bqstorage_client=True, - date_as_object=True, geography_as_object=False, ) -> "pandas.DataFrame": """Create an empty dataframe. @@ -2204,7 +2177,6 @@ def to_dataframe( dtypes (Any): Ignored. Added for compatibility with RowIterator. progress_bar_type (Any): Ignored. Added for compatibility with RowIterator. create_bqstorage_client (bool): Ignored. Added for compatibility with RowIterator. - date_as_object (bool): Ignored. Added for compatibility with RowIterator. Returns: pandas.DataFrame: An empty :class:`~pandas.DataFrame`. @@ -2219,7 +2191,6 @@ def to_geodataframe( dtypes=None, progress_bar_type=None, create_bqstorage_client=True, - date_as_object=True, geography_column: Optional[str] = None, ) -> "pandas.DataFrame": """Create an empty dataframe. @@ -2229,7 +2200,6 @@ def to_geodataframe( dtypes (Any): Ignored. Added for compatibility with RowIterator. progress_bar_type (Any): Ignored. Added for compatibility with RowIterator. create_bqstorage_client (bool): Ignored. Added for compatibility with RowIterator. - date_as_object (bool): Ignored. Added for compatibility with RowIterator. Returns: pandas.DataFrame: An empty :class:`~pandas.DataFrame`. @@ -2290,13 +2260,7 @@ def to_arrow_iterable( Returns: An iterator yielding a single empty :class:`~pyarrow.RecordBatch`. - - Raises: - ValueError: - If the :mod:`pyarrow` library cannot be imported. """ - if pyarrow is None: - raise ValueError(_NO_PYARROW_ERROR) return iter((pyarrow.record_batch([]),)) def __iter__(self): @@ -2327,7 +2291,7 @@ class PartitionRange(object): Private. Used to construct object from API resource. """ - def __init__(self, start=None, end=None, interval=None, _properties=None): + def __init__(self, start=None, end=None, interval=None, _properties=None) -> None: if _properties is None: _properties = {} self._properties = _properties @@ -2402,10 +2366,10 @@ class RangePartitioning(object): Private. Used to construct object from API resource. """ - def __init__(self, range_=None, field=None, _properties=None): + def __init__(self, range_=None, field=None, _properties=None) -> None: if _properties is None: _properties = {} - self._properties = _properties + self._properties: Dict[str, Any] = _properties if range_ is not None: self.range_ = range_ @@ -2511,8 +2475,8 @@ class TimePartitioning(object): def __init__( self, type_=None, field=None, expiration_ms=None, require_partition_filter=None - ): - self._properties = {} + ) -> None: + self._properties: Dict[str, Any] = {} if type_ is None: self.type_ = TimePartitioningType.DAY else: diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery_v2/__init__.py b/packages/google-cloud-bigquery/google/cloud/bigquery_v2/__init__.py index bb11be3b3254..55486a39a2de 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery_v2/__init__.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery_v2/__init__.py @@ -14,6 +14,7 @@ # limitations under the License. # +import warnings from .types.encryption_config import EncryptionConfiguration from .types.model import DeleteModelRequest @@ -29,6 +30,15 @@ from .types.standard_sql import StandardSqlTableType from .types.table_reference import TableReference + +_LEGACY_MSG = ( + "Legacy proto-based types from bigquery_v2 are not maintained anymore, " + "use types defined in google.cloud.bigquery instead." +) + +warnings.warn(_LEGACY_MSG, category=DeprecationWarning) + + __all__ = ( "DeleteModelRequest", "EncryptionConfiguration", diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery_v2/gapic_metadata.json b/packages/google-cloud-bigquery/google/cloud/bigquery_v2/gapic_metadata.json deleted file mode 100644 index 3251a2630bd9..000000000000 --- a/packages/google-cloud-bigquery/google/cloud/bigquery_v2/gapic_metadata.json +++ /dev/null @@ -1,63 +0,0 @@ - { - "comment": "This file maps proto services/RPCs to the corresponding library clients/methods", - "language": "python", - "libraryPackage": "google.cloud.bigquery_v2", - "protoPackage": "google.cloud.bigquery.v2", - "schema": "1.0", - "services": { - "ModelService": { - "clients": { - "grpc": { - "libraryClient": "ModelServiceClient", - "rpcs": { - "DeleteModel": { - "methods": [ - "delete_model" - ] - }, - "GetModel": { - "methods": [ - "get_model" - ] - }, - "ListModels": { - "methods": [ - "list_models" - ] - }, - "PatchModel": { - "methods": [ - "patch_model" - ] - } - } - }, - "grpc-async": { - "libraryClient": "ModelServiceAsyncClient", - "rpcs": { - "DeleteModel": { - "methods": [ - "delete_model" - ] - }, - "GetModel": { - "methods": [ - "get_model" - ] - }, - "ListModels": { - "methods": [ - "list_models" - ] - }, - "PatchModel": { - "methods": [ - "patch_model" - ] - } - } - } - } - } - } -} diff --git a/packages/google-cloud-bigquery/noxfile.py b/packages/google-cloud-bigquery/noxfile.py index 8d1cb056c188..f088e10c228c 100644 --- a/packages/google-cloud-bigquery/noxfile.py +++ b/packages/google-cloud-bigquery/noxfile.py @@ -43,6 +43,7 @@ "lint_setup_py", "blacken", "mypy", + "mypy_samples", "pytype", "docs", ] @@ -184,6 +185,28 @@ def system(session): session.run("py.test", "--quiet", os.path.join("tests", "system"), *session.posargs) +@nox.session(python=DEFAULT_PYTHON_VERSION) +def mypy_samples(session): + """Run type checks with mypy.""" + session.install("-e", ".[all]") + + session.install("ipython", "pytest") + session.install(MYPY_VERSION) + + # Just install the dependencies' type info directly, since "mypy --install-types" + # might require an additional pass. + session.install("types-mock", "types-pytz") + session.install("typing-extensions") # for TypedDict in pre-3.8 Python versions + + session.run( + "mypy", + "--config-file", + str(CURRENT_DIRECTORY / "samples" / "mypy.ini"), + "--no-incremental", # Required by warn-unused-configs from mypy.ini to work + "samples/", + ) + + @nox.session(python=SYSTEM_TEST_PYTHON_VERSIONS) def snippets(session): """Run the snippets test suite.""" diff --git a/packages/google-cloud-bigquery/owlbot.py b/packages/google-cloud-bigquery/owlbot.py index 095759d48888..a445b2be9fc5 100644 --- a/packages/google-cloud-bigquery/owlbot.py +++ b/packages/google-cloud-bigquery/owlbot.py @@ -21,74 +21,6 @@ common = gcp.CommonTemplates() -default_version = "v2" - -for library in s.get_staging_dirs(default_version): - # Do not expose ModelServiceClient and ModelServiceAsyncClient, as there - # is no public API endpoint for the models service. - s.replace( - library / f"google/cloud/bigquery_{library.name}/__init__.py", - r"from \.services\.model_service import ModelServiceClient", - "", - ) - - s.replace( - library / f"google/cloud/bigquery_{library.name}/__init__.py", - r"from \.services\.model_service import ModelServiceAsyncClient", - "", - ) - - s.replace( - library / f"google/cloud/bigquery_{library.name}/__init__.py", - r"""["']ModelServiceClient["'],""", - "", - ) - - s.replace( - library / f"google/cloud/bigquery_{library.name}/__init__.py", - r"""["']ModelServiceAsyncClient["'],""", - "", - ) - - # Adjust Model docstring so that Sphinx does not think that "predicted_" is - # a reference to something, issuing a false warning. - s.replace( - library / f"google/cloud/bigquery_{library.name}/types/model.py", - r'will have a "predicted_"', - "will have a `predicted_`", - ) - - # Avoid breaking change due to change in field renames. - # https://github.com/googleapis/python-bigquery/issues/319 - s.replace( - library / f"google/cloud/bigquery_{library.name}/types/standard_sql.py", - r"type_ ", - "type ", - ) - - s.move( - library, - excludes=[ - "*.tar.gz", - ".coveragerc", - "docs/index.rst", - f"docs/bigquery_{library.name}/*_service.rst", - f"docs/bigquery_{library.name}/services.rst", - "README.rst", - "noxfile.py", - "setup.py", - f"scripts/fixup_bigquery_{library.name}_keywords.py", - "google/cloud/bigquery/__init__.py", - "google/cloud/bigquery/py.typed", - # There are no public API endpoints for the generated ModelServiceClient, - # thus there's no point in generating it and its tests. - f"google/cloud/bigquery_{library.name}/services/**", - f"tests/unit/gapic/bigquery_{library.name}/**", - ], - ) - -s.remove_staging_dirs() - # ---------------------------------------------------------------------------- # Add templated files # ---------------------------------------------------------------------------- @@ -116,7 +48,7 @@ # Include custom SNIPPETS_TESTS job for performance. # https://github.com/googleapis/python-bigquery/issues/191 ".kokoro/presubmit/presubmit.cfg", - ".github/workflows", # exclude gh actions as credentials are needed for tests + ".github/workflows", # exclude gh actions as credentials are needed for tests ], ) @@ -131,12 +63,10 @@ r'\{"members": True\}', '{"members": True, "inherited-members": True}', ) - -# Tell Sphinx to ingore autogenerated docs files. s.replace( "docs/conf.py", - r'"samples/snippets/README\.rst",', - '\\g<0>\n "bigquery_v2/services.rst", # generated by the code generator', + r"exclude_patterns = \[", + '\\g<0>\n "google/cloud/bigquery_v2/**", # Legacy proto-based types.', ) # ---------------------------------------------------------------------------- @@ -159,7 +89,7 @@ google/cloud/ exclude = tests/ - google/cloud/bigquery_v2/ + google/cloud/bigquery_v2/ # Legacy proto-based types. output = .pytype/ disable = # There's some issue with finding some pyi files, thus disabling. diff --git a/packages/google-cloud-bigquery/samples/add_empty_column.py b/packages/google-cloud-bigquery/samples/add_empty_column.py index cd7cf5018e1f..6d449d6e20d4 100644 --- a/packages/google-cloud-bigquery/samples/add_empty_column.py +++ b/packages/google-cloud-bigquery/samples/add_empty_column.py @@ -13,7 +13,7 @@ # limitations under the License. -def add_empty_column(table_id): +def add_empty_column(table_id: str) -> None: # [START bigquery_add_empty_column] from google.cloud import bigquery diff --git a/packages/google-cloud-bigquery/samples/browse_table_data.py b/packages/google-cloud-bigquery/samples/browse_table_data.py index 29a1c2ff61e0..6a56253bf37f 100644 --- a/packages/google-cloud-bigquery/samples/browse_table_data.py +++ b/packages/google-cloud-bigquery/samples/browse_table_data.py @@ -13,7 +13,7 @@ # limitations under the License. -def browse_table_data(table_id): +def browse_table_data(table_id: str) -> None: # [START bigquery_browse_table] @@ -41,15 +41,17 @@ def browse_table_data(table_id): table = client.get_table(table_id) # Make an API request. fields = table.schema[:2] # First two columns. rows_iter = client.list_rows(table_id, selected_fields=fields, max_results=10) - rows = list(rows_iter) print("Selected {} columns from table {}.".format(len(rows_iter.schema), table_id)) + + rows = list(rows_iter) print("Downloaded {} rows from table {}".format(len(rows), table_id)) # Print row data in tabular format. - rows = client.list_rows(table, max_results=10) - format_string = "{!s:<16} " * len(rows.schema) - field_names = [field.name for field in rows.schema] + rows_iter = client.list_rows(table, max_results=10) + format_string = "{!s:<16} " * len(rows_iter.schema) + field_names = [field.name for field in rows_iter.schema] print(format_string.format(*field_names)) # Prints column headers. - for row in rows: + + for row in rows_iter: print(format_string.format(*row)) # Prints row data. # [END bigquery_browse_table] diff --git a/packages/google-cloud-bigquery/samples/client_list_jobs.py b/packages/google-cloud-bigquery/samples/client_list_jobs.py index b2344e23c7f7..7f1e39cb8e53 100644 --- a/packages/google-cloud-bigquery/samples/client_list_jobs.py +++ b/packages/google-cloud-bigquery/samples/client_list_jobs.py @@ -13,7 +13,7 @@ # limitations under the License. -def client_list_jobs(): +def client_list_jobs() -> None: # [START bigquery_list_jobs] diff --git a/packages/google-cloud-bigquery/samples/client_load_partitioned_table.py b/packages/google-cloud-bigquery/samples/client_load_partitioned_table.py index e4e8a296c9a3..9956f3f0056c 100644 --- a/packages/google-cloud-bigquery/samples/client_load_partitioned_table.py +++ b/packages/google-cloud-bigquery/samples/client_load_partitioned_table.py @@ -13,7 +13,7 @@ # limitations under the License. -def client_load_partitioned_table(table_id): +def client_load_partitioned_table(table_id: str) -> None: # [START bigquery_load_table_partitioned] from google.cloud import bigquery diff --git a/packages/google-cloud-bigquery/samples/client_query.py b/packages/google-cloud-bigquery/samples/client_query.py index 7fedc3f90b1e..091d3f98bfbc 100644 --- a/packages/google-cloud-bigquery/samples/client_query.py +++ b/packages/google-cloud-bigquery/samples/client_query.py @@ -13,7 +13,7 @@ # limitations under the License. -def client_query(): +def client_query() -> None: # [START bigquery_query] diff --git a/packages/google-cloud-bigquery/samples/client_query_add_column.py b/packages/google-cloud-bigquery/samples/client_query_add_column.py index ff7d5aa68add..2da200bc51d3 100644 --- a/packages/google-cloud-bigquery/samples/client_query_add_column.py +++ b/packages/google-cloud-bigquery/samples/client_query_add_column.py @@ -13,7 +13,7 @@ # limitations under the License. -def client_query_add_column(table_id): +def client_query_add_column(table_id: str) -> None: # [START bigquery_add_column_query_append] from google.cloud import bigquery diff --git a/packages/google-cloud-bigquery/samples/client_query_batch.py b/packages/google-cloud-bigquery/samples/client_query_batch.py index e1680f4a18d2..df164d1be11c 100644 --- a/packages/google-cloud-bigquery/samples/client_query_batch.py +++ b/packages/google-cloud-bigquery/samples/client_query_batch.py @@ -12,8 +12,13 @@ # See the License for the specific language governing permissions and # limitations under the License. +import typing + +if typing.TYPE_CHECKING: + from google.cloud import bigquery -def client_query_batch(): + +def client_query_batch() -> "bigquery.QueryJob": # [START bigquery_query_batch] from google.cloud import bigquery @@ -37,9 +42,12 @@ def client_query_batch(): # Check on the progress by getting the job's updated state. Once the state # is `DONE`, the results are ready. - query_job = client.get_job( - query_job.job_id, location=query_job.location - ) # Make an API request. + query_job = typing.cast( + "bigquery.QueryJob", + client.get_job( + query_job.job_id, location=query_job.location + ), # Make an API request. + ) print("Job {} is currently in state {}".format(query_job.job_id, query_job.state)) # [END bigquery_query_batch] diff --git a/packages/google-cloud-bigquery/samples/client_query_destination_table.py b/packages/google-cloud-bigquery/samples/client_query_destination_table.py index 303ce5a0cc36..b200f1cc6c3a 100644 --- a/packages/google-cloud-bigquery/samples/client_query_destination_table.py +++ b/packages/google-cloud-bigquery/samples/client_query_destination_table.py @@ -13,7 +13,7 @@ # limitations under the License. -def client_query_destination_table(table_id): +def client_query_destination_table(table_id: str) -> None: # [START bigquery_query_destination_table] from google.cloud import bigquery diff --git a/packages/google-cloud-bigquery/samples/client_query_destination_table_clustered.py b/packages/google-cloud-bigquery/samples/client_query_destination_table_clustered.py index 5a109ed104a0..c4ab305f5357 100644 --- a/packages/google-cloud-bigquery/samples/client_query_destination_table_clustered.py +++ b/packages/google-cloud-bigquery/samples/client_query_destination_table_clustered.py @@ -13,7 +13,7 @@ # limitations under the License. -def client_query_destination_table_clustered(table_id): +def client_query_destination_table_clustered(table_id: str) -> None: # [START bigquery_query_clustered_table] from google.cloud import bigquery diff --git a/packages/google-cloud-bigquery/samples/client_query_destination_table_cmek.py b/packages/google-cloud-bigquery/samples/client_query_destination_table_cmek.py index 24d4f22228c7..0fd44d189c5b 100644 --- a/packages/google-cloud-bigquery/samples/client_query_destination_table_cmek.py +++ b/packages/google-cloud-bigquery/samples/client_query_destination_table_cmek.py @@ -13,7 +13,7 @@ # limitations under the License. -def client_query_destination_table_cmek(table_id, kms_key_name): +def client_query_destination_table_cmek(table_id: str, kms_key_name: str) -> None: # [START bigquery_query_destination_table_cmek] from google.cloud import bigquery diff --git a/packages/google-cloud-bigquery/samples/client_query_destination_table_legacy.py b/packages/google-cloud-bigquery/samples/client_query_destination_table_legacy.py index c8fdd606f9f4..ee45d9a01c58 100644 --- a/packages/google-cloud-bigquery/samples/client_query_destination_table_legacy.py +++ b/packages/google-cloud-bigquery/samples/client_query_destination_table_legacy.py @@ -13,7 +13,7 @@ # limitations under the License. -def client_query_destination_table_legacy(table_id): +def client_query_destination_table_legacy(table_id: str) -> None: # [START bigquery_query_legacy_large_results] from google.cloud import bigquery diff --git a/packages/google-cloud-bigquery/samples/client_query_dry_run.py b/packages/google-cloud-bigquery/samples/client_query_dry_run.py index 1f7bd0c9c4e7..418b43cb5862 100644 --- a/packages/google-cloud-bigquery/samples/client_query_dry_run.py +++ b/packages/google-cloud-bigquery/samples/client_query_dry_run.py @@ -12,8 +12,13 @@ # See the License for the specific language governing permissions and # limitations under the License. +import typing -def client_query_dry_run(): +if typing.TYPE_CHECKING: + from google.cloud import bigquery + + +def client_query_dry_run() -> "bigquery.QueryJob": # [START bigquery_query_dry_run] from google.cloud import bigquery diff --git a/packages/google-cloud-bigquery/samples/client_query_legacy_sql.py b/packages/google-cloud-bigquery/samples/client_query_legacy_sql.py index 3f94657795fb..c054e1f28e4a 100644 --- a/packages/google-cloud-bigquery/samples/client_query_legacy_sql.py +++ b/packages/google-cloud-bigquery/samples/client_query_legacy_sql.py @@ -13,7 +13,7 @@ # limitations under the License. -def client_query_legacy_sql(): +def client_query_legacy_sql() -> None: # [START bigquery_query_legacy] from google.cloud import bigquery diff --git a/packages/google-cloud-bigquery/samples/client_query_relax_column.py b/packages/google-cloud-bigquery/samples/client_query_relax_column.py index 5e2ec8056a00..c96a1e7aaf4f 100644 --- a/packages/google-cloud-bigquery/samples/client_query_relax_column.py +++ b/packages/google-cloud-bigquery/samples/client_query_relax_column.py @@ -13,7 +13,7 @@ # limitations under the License. -def client_query_relax_column(table_id): +def client_query_relax_column(table_id: str) -> None: # [START bigquery_relax_column_query_append] from google.cloud import bigquery diff --git a/packages/google-cloud-bigquery/samples/client_query_w_array_params.py b/packages/google-cloud-bigquery/samples/client_query_w_array_params.py index 4077be2c7d3e..66971318216c 100644 --- a/packages/google-cloud-bigquery/samples/client_query_w_array_params.py +++ b/packages/google-cloud-bigquery/samples/client_query_w_array_params.py @@ -13,7 +13,7 @@ # limitations under the License. -def client_query_w_array_params(): +def client_query_w_array_params() -> None: # [START bigquery_query_params_arrays] from google.cloud import bigquery diff --git a/packages/google-cloud-bigquery/samples/client_query_w_named_params.py b/packages/google-cloud-bigquery/samples/client_query_w_named_params.py index a0de8f63aa99..f42be1dc8e1f 100644 --- a/packages/google-cloud-bigquery/samples/client_query_w_named_params.py +++ b/packages/google-cloud-bigquery/samples/client_query_w_named_params.py @@ -13,7 +13,7 @@ # limitations under the License. -def client_query_w_named_params(): +def client_query_w_named_params() -> None: # [START bigquery_query_params_named] from google.cloud import bigquery diff --git a/packages/google-cloud-bigquery/samples/client_query_w_positional_params.py b/packages/google-cloud-bigquery/samples/client_query_w_positional_params.py index ee316044bda3..b088b305ea22 100644 --- a/packages/google-cloud-bigquery/samples/client_query_w_positional_params.py +++ b/packages/google-cloud-bigquery/samples/client_query_w_positional_params.py @@ -13,7 +13,7 @@ # limitations under the License. -def client_query_w_positional_params(): +def client_query_w_positional_params() -> None: # [START bigquery_query_params_positional] from google.cloud import bigquery diff --git a/packages/google-cloud-bigquery/samples/client_query_w_struct_params.py b/packages/google-cloud-bigquery/samples/client_query_w_struct_params.py index 041a3a0e3839..6c5b78113b1a 100644 --- a/packages/google-cloud-bigquery/samples/client_query_w_struct_params.py +++ b/packages/google-cloud-bigquery/samples/client_query_w_struct_params.py @@ -13,7 +13,7 @@ # limitations under the License. -def client_query_w_struct_params(): +def client_query_w_struct_params() -> None: # [START bigquery_query_params_structs] from google.cloud import bigquery diff --git a/packages/google-cloud-bigquery/samples/client_query_w_timestamp_params.py b/packages/google-cloud-bigquery/samples/client_query_w_timestamp_params.py index 41a27770e771..07d64cc94214 100644 --- a/packages/google-cloud-bigquery/samples/client_query_w_timestamp_params.py +++ b/packages/google-cloud-bigquery/samples/client_query_w_timestamp_params.py @@ -13,7 +13,7 @@ # limitations under the License. -def client_query_w_timestamp_params(): +def client_query_w_timestamp_params() -> None: # [START bigquery_query_params_timestamps] import datetime diff --git a/packages/google-cloud-bigquery/samples/copy_table.py b/packages/google-cloud-bigquery/samples/copy_table.py index 91c58e109cb9..8c6153fef238 100644 --- a/packages/google-cloud-bigquery/samples/copy_table.py +++ b/packages/google-cloud-bigquery/samples/copy_table.py @@ -13,7 +13,7 @@ # limitations under the License. -def copy_table(source_table_id, destination_table_id): +def copy_table(source_table_id: str, destination_table_id: str) -> None: # [START bigquery_copy_table] diff --git a/packages/google-cloud-bigquery/samples/copy_table_cmek.py b/packages/google-cloud-bigquery/samples/copy_table_cmek.py index 52ccb5f7b1df..f2e8a90f93bd 100644 --- a/packages/google-cloud-bigquery/samples/copy_table_cmek.py +++ b/packages/google-cloud-bigquery/samples/copy_table_cmek.py @@ -13,7 +13,7 @@ # limitations under the License. -def copy_table_cmek(dest_table_id, orig_table_id, kms_key_name): +def copy_table_cmek(dest_table_id: str, orig_table_id: str, kms_key_name: str) -> None: # [START bigquery_copy_table_cmek] from google.cloud import bigquery diff --git a/packages/google-cloud-bigquery/samples/copy_table_multiple_source.py b/packages/google-cloud-bigquery/samples/copy_table_multiple_source.py index d86e380d0682..1163b166470a 100644 --- a/packages/google-cloud-bigquery/samples/copy_table_multiple_source.py +++ b/packages/google-cloud-bigquery/samples/copy_table_multiple_source.py @@ -12,8 +12,10 @@ # See the License for the specific language governing permissions and # limitations under the License. +from typing import Sequence -def copy_table_multiple_source(dest_table_id, table_ids): + +def copy_table_multiple_source(dest_table_id: str, table_ids: Sequence[str]) -> None: # [START bigquery_copy_table_multiple_source] diff --git a/packages/google-cloud-bigquery/samples/create_dataset.py b/packages/google-cloud-bigquery/samples/create_dataset.py index 6af3c67eb623..dea91798d99c 100644 --- a/packages/google-cloud-bigquery/samples/create_dataset.py +++ b/packages/google-cloud-bigquery/samples/create_dataset.py @@ -13,7 +13,7 @@ # limitations under the License. -def create_dataset(dataset_id): +def create_dataset(dataset_id: str) -> None: # [START bigquery_create_dataset] from google.cloud import bigquery diff --git a/packages/google-cloud-bigquery/samples/create_job.py b/packages/google-cloud-bigquery/samples/create_job.py index feed04ca00e8..39922f7aefca 100644 --- a/packages/google-cloud-bigquery/samples/create_job.py +++ b/packages/google-cloud-bigquery/samples/create_job.py @@ -12,8 +12,13 @@ # See the License for the specific language governing permissions and # limitations under the License. +import typing -def create_job(): +if typing.TYPE_CHECKING: + from google.cloud import bigquery + + +def create_job() -> "bigquery.QueryJob": # [START bigquery_create_job] from google.cloud import bigquery diff --git a/packages/google-cloud-bigquery/samples/create_routine.py b/packages/google-cloud-bigquery/samples/create_routine.py index 1cb4a80b4ea7..96dc242107f3 100644 --- a/packages/google-cloud-bigquery/samples/create_routine.py +++ b/packages/google-cloud-bigquery/samples/create_routine.py @@ -12,12 +12,16 @@ # See the License for the specific language governing permissions and # limitations under the License. +import typing -def create_routine(routine_id): +if typing.TYPE_CHECKING: + from google.cloud import bigquery + + +def create_routine(routine_id: str) -> "bigquery.Routine": # [START bigquery_create_routine] from google.cloud import bigquery - from google.cloud import bigquery_v2 # Construct a BigQuery client object. client = bigquery.Client() @@ -33,8 +37,8 @@ def create_routine(routine_id): arguments=[ bigquery.RoutineArgument( name="x", - data_type=bigquery_v2.types.StandardSqlDataType( - type_kind=bigquery_v2.types.StandardSqlDataType.TypeKind.INT64 + data_type=bigquery.StandardSqlDataType( + type_kind=bigquery.StandardSqlTypeNames.INT64 ), ) ], diff --git a/packages/google-cloud-bigquery/samples/create_routine_ddl.py b/packages/google-cloud-bigquery/samples/create_routine_ddl.py index c191bd385041..56c7cfe24e98 100644 --- a/packages/google-cloud-bigquery/samples/create_routine_ddl.py +++ b/packages/google-cloud-bigquery/samples/create_routine_ddl.py @@ -13,7 +13,7 @@ # limitations under the License. -def create_routine_ddl(routine_id): +def create_routine_ddl(routine_id: str) -> None: # [START bigquery_create_routine_ddl] diff --git a/packages/google-cloud-bigquery/samples/create_table.py b/packages/google-cloud-bigquery/samples/create_table.py index d62e86681afc..eaac54696a9a 100644 --- a/packages/google-cloud-bigquery/samples/create_table.py +++ b/packages/google-cloud-bigquery/samples/create_table.py @@ -13,7 +13,7 @@ # limitations under the License. -def create_table(table_id): +def create_table(table_id: str) -> None: # [START bigquery_create_table] from google.cloud import bigquery diff --git a/packages/google-cloud-bigquery/samples/create_table_clustered.py b/packages/google-cloud-bigquery/samples/create_table_clustered.py index 2b45b747e8e4..1686c519a402 100644 --- a/packages/google-cloud-bigquery/samples/create_table_clustered.py +++ b/packages/google-cloud-bigquery/samples/create_table_clustered.py @@ -12,8 +12,13 @@ # See the License for the specific language governing permissions and # limitations under the License. +import typing -def create_table_clustered(table_id): +if typing.TYPE_CHECKING: + from google.cloud import bigquery + + +def create_table_clustered(table_id: str) -> "bigquery.Table": # [START bigquery_create_table_clustered] from google.cloud import bigquery diff --git a/packages/google-cloud-bigquery/samples/create_table_range_partitioned.py b/packages/google-cloud-bigquery/samples/create_table_range_partitioned.py index 260041aa5d5d..4dc45ed5853a 100644 --- a/packages/google-cloud-bigquery/samples/create_table_range_partitioned.py +++ b/packages/google-cloud-bigquery/samples/create_table_range_partitioned.py @@ -12,8 +12,13 @@ # See the License for the specific language governing permissions and # limitations under the License. +import typing -def create_table_range_partitioned(table_id): +if typing.TYPE_CHECKING: + from google.cloud import bigquery + + +def create_table_range_partitioned(table_id: str) -> "bigquery.Table": # [START bigquery_create_table_range_partitioned] from google.cloud import bigquery diff --git a/packages/google-cloud-bigquery/samples/dataset_exists.py b/packages/google-cloud-bigquery/samples/dataset_exists.py index b4db9353b37f..221899a650ba 100644 --- a/packages/google-cloud-bigquery/samples/dataset_exists.py +++ b/packages/google-cloud-bigquery/samples/dataset_exists.py @@ -13,7 +13,7 @@ # limitations under the License. -def dataset_exists(dataset_id): +def dataset_exists(dataset_id: str) -> None: # [START bigquery_dataset_exists] from google.cloud import bigquery diff --git a/packages/google-cloud-bigquery/samples/delete_dataset.py b/packages/google-cloud-bigquery/samples/delete_dataset.py index e25740baaff0..b340ed57a9cd 100644 --- a/packages/google-cloud-bigquery/samples/delete_dataset.py +++ b/packages/google-cloud-bigquery/samples/delete_dataset.py @@ -13,7 +13,7 @@ # limitations under the License. -def delete_dataset(dataset_id): +def delete_dataset(dataset_id: str) -> None: # [START bigquery_delete_dataset] diff --git a/packages/google-cloud-bigquery/samples/delete_dataset_labels.py b/packages/google-cloud-bigquery/samples/delete_dataset_labels.py index a52de2967e70..ec5df09c11be 100644 --- a/packages/google-cloud-bigquery/samples/delete_dataset_labels.py +++ b/packages/google-cloud-bigquery/samples/delete_dataset_labels.py @@ -12,8 +12,13 @@ # See the License for the specific language governing permissions and # limitations under the License. +import typing -def delete_dataset_labels(dataset_id): +if typing.TYPE_CHECKING: + from google.cloud import bigquery + + +def delete_dataset_labels(dataset_id: str) -> "bigquery.Dataset": # [START bigquery_delete_label_dataset] diff --git a/packages/google-cloud-bigquery/samples/delete_model.py b/packages/google-cloud-bigquery/samples/delete_model.py index 0190315c6bed..2703ba3f5b13 100644 --- a/packages/google-cloud-bigquery/samples/delete_model.py +++ b/packages/google-cloud-bigquery/samples/delete_model.py @@ -13,7 +13,7 @@ # limitations under the License. -def delete_model(model_id): +def delete_model(model_id: str) -> None: """Sample ID: go/samples-tracker/1534""" # [START bigquery_delete_model] diff --git a/packages/google-cloud-bigquery/samples/delete_routine.py b/packages/google-cloud-bigquery/samples/delete_routine.py index 679cbee4bc94..7362a5fea18d 100644 --- a/packages/google-cloud-bigquery/samples/delete_routine.py +++ b/packages/google-cloud-bigquery/samples/delete_routine.py @@ -13,7 +13,7 @@ # limitations under the License. -def delete_routine(routine_id): +def delete_routine(routine_id: str) -> None: # [START bigquery_delete_routine] diff --git a/packages/google-cloud-bigquery/samples/delete_table.py b/packages/google-cloud-bigquery/samples/delete_table.py index 3d0a6f0babc2..9e7ee170a384 100644 --- a/packages/google-cloud-bigquery/samples/delete_table.py +++ b/packages/google-cloud-bigquery/samples/delete_table.py @@ -13,7 +13,7 @@ # limitations under the License. -def delete_table(table_id): +def delete_table(table_id: str) -> None: # [START bigquery_delete_table] diff --git a/packages/google-cloud-bigquery/samples/download_public_data.py b/packages/google-cloud-bigquery/samples/download_public_data.py index d10ed161a5da..a488bbbb503f 100644 --- a/packages/google-cloud-bigquery/samples/download_public_data.py +++ b/packages/google-cloud-bigquery/samples/download_public_data.py @@ -13,7 +13,7 @@ # limitations under the License. -def download_public_data(): +def download_public_data() -> None: # [START bigquery_pandas_public_data] diff --git a/packages/google-cloud-bigquery/samples/download_public_data_sandbox.py b/packages/google-cloud-bigquery/samples/download_public_data_sandbox.py index afb50b15c3a9..ce5200b4e152 100644 --- a/packages/google-cloud-bigquery/samples/download_public_data_sandbox.py +++ b/packages/google-cloud-bigquery/samples/download_public_data_sandbox.py @@ -13,7 +13,7 @@ # limitations under the License. -def download_public_data_sandbox(): +def download_public_data_sandbox() -> None: # [START bigquery_pandas_public_data_sandbox] diff --git a/packages/google-cloud-bigquery/samples/geography/conftest.py b/packages/google-cloud-bigquery/samples/geography/conftest.py index 265900f5a264..14823d10a7f6 100644 --- a/packages/google-cloud-bigquery/samples/geography/conftest.py +++ b/packages/google-cloud-bigquery/samples/geography/conftest.py @@ -13,30 +13,31 @@ # limitations under the License. import datetime +from typing import Iterator import uuid from google.cloud import bigquery import pytest -def temp_suffix(): +def temp_suffix() -> str: now = datetime.datetime.now() return f"{now.strftime('%Y%m%d%H%M%S')}_{uuid.uuid4().hex[:8]}" @pytest.fixture(scope="session") -def bigquery_client(): +def bigquery_client() -> bigquery.Client: bigquery_client = bigquery.Client() return bigquery_client @pytest.fixture(scope="session") -def project_id(bigquery_client): +def project_id(bigquery_client: bigquery.Client) -> str: return bigquery_client.project @pytest.fixture -def dataset_id(bigquery_client): +def dataset_id(bigquery_client: bigquery.Client) -> Iterator[str]: dataset_id = f"geography_{temp_suffix()}" bigquery_client.create_dataset(dataset_id) yield dataset_id @@ -44,7 +45,9 @@ def dataset_id(bigquery_client): @pytest.fixture -def table_id(bigquery_client, project_id, dataset_id): +def table_id( + bigquery_client: bigquery.Client, project_id: str, dataset_id: str +) -> Iterator[str]: table_id = f"{project_id}.{dataset_id}.geography_{temp_suffix()}" table = bigquery.Table(table_id) table.schema = [ diff --git a/packages/google-cloud-bigquery/samples/geography/insert_geojson.py b/packages/google-cloud-bigquery/samples/geography/insert_geojson.py index 23f249c15beb..2db407b55641 100644 --- a/packages/google-cloud-bigquery/samples/geography/insert_geojson.py +++ b/packages/google-cloud-bigquery/samples/geography/insert_geojson.py @@ -12,8 +12,16 @@ # See the License for the specific language governing permissions and # limitations under the License. +from typing import Dict, Mapping, Optional, Sequence + + +def insert_geojson( + override_values: Optional[Mapping[str, str]] = None +) -> Sequence[Dict[str, object]]: + + if override_values is None: + override_values = {} -def insert_geojson(override_values={}): # [START bigquery_insert_geojson] import geojson from google.cloud import bigquery diff --git a/packages/google-cloud-bigquery/samples/geography/insert_geojson_test.py b/packages/google-cloud-bigquery/samples/geography/insert_geojson_test.py index 5ef15ee13864..507201872cc1 100644 --- a/packages/google-cloud-bigquery/samples/geography/insert_geojson_test.py +++ b/packages/google-cloud-bigquery/samples/geography/insert_geojson_test.py @@ -15,6 +15,6 @@ from . import insert_geojson -def test_insert_geojson(table_id): +def test_insert_geojson(table_id: str) -> None: errors = insert_geojson.insert_geojson(override_values={"table_id": table_id}) assert not errors diff --git a/packages/google-cloud-bigquery/samples/geography/insert_wkt.py b/packages/google-cloud-bigquery/samples/geography/insert_wkt.py index d7d3accdef2a..25c7ee727f0c 100644 --- a/packages/google-cloud-bigquery/samples/geography/insert_wkt.py +++ b/packages/google-cloud-bigquery/samples/geography/insert_wkt.py @@ -12,8 +12,16 @@ # See the License for the specific language governing permissions and # limitations under the License. +from typing import Dict, Mapping, Optional, Sequence + + +def insert_wkt( + override_values: Optional[Mapping[str, str]] = None +) -> Sequence[Dict[str, object]]: + + if override_values is None: + override_values = {} -def insert_wkt(override_values={}): # [START bigquery_insert_geography_wkt] from google.cloud import bigquery import shapely.geometry diff --git a/packages/google-cloud-bigquery/samples/geography/insert_wkt_test.py b/packages/google-cloud-bigquery/samples/geography/insert_wkt_test.py index 8bcb62cec1a0..a7c3d4ed3fa3 100644 --- a/packages/google-cloud-bigquery/samples/geography/insert_wkt_test.py +++ b/packages/google-cloud-bigquery/samples/geography/insert_wkt_test.py @@ -15,6 +15,6 @@ from . import insert_wkt -def test_insert_wkt(table_id): +def test_insert_wkt(table_id: str) -> None: errors = insert_wkt.insert_wkt(override_values={"table_id": table_id}) assert not errors diff --git a/packages/google-cloud-bigquery/samples/geography/mypy.ini b/packages/google-cloud-bigquery/samples/geography/mypy.ini new file mode 100644 index 000000000000..41898432f6b9 --- /dev/null +++ b/packages/google-cloud-bigquery/samples/geography/mypy.ini @@ -0,0 +1,8 @@ +[mypy] +; We require type annotations in all samples. +strict = True +exclude = noxfile\.py +warn_unused_configs = True + +[mypy-geojson,pandas,shapely.*] +ignore_missing_imports = True diff --git a/packages/google-cloud-bigquery/samples/geography/requirements.txt b/packages/google-cloud-bigquery/samples/geography/requirements.txt index 41f3849ce997..fed8be7f9534 100644 --- a/packages/google-cloud-bigquery/samples/geography/requirements.txt +++ b/packages/google-cloud-bigquery/samples/geography/requirements.txt @@ -5,6 +5,8 @@ charset-normalizer==2.0.12 click==8.0.4 click-plugins==1.1.1 cligj==0.7.2 +dataclasses==0.8; python_version < '3.7' +db-dtypes==0.4.0 Fiona==1.8.21 geojson==2.5.0 geopandas==0.10.2 diff --git a/packages/google-cloud-bigquery/samples/geography/to_geodataframe.py b/packages/google-cloud-bigquery/samples/geography/to_geodataframe.py index fa8073fefba4..e36331f270ef 100644 --- a/packages/google-cloud-bigquery/samples/geography/to_geodataframe.py +++ b/packages/google-cloud-bigquery/samples/geography/to_geodataframe.py @@ -12,12 +12,18 @@ # See the License for the specific language governing permissions and # limitations under the License. +import typing + from google.cloud import bigquery -client = bigquery.Client() +if typing.TYPE_CHECKING: + import pandas + + +client: bigquery.Client = bigquery.Client() -def get_austin_service_requests_as_geography(): +def get_austin_service_requests_as_geography() -> "pandas.DataFrame": # [START bigquery_query_results_geodataframe] sql = """ diff --git a/packages/google-cloud-bigquery/samples/geography/to_geodataframe_test.py b/packages/google-cloud-bigquery/samples/geography/to_geodataframe_test.py index 7a2ba6937c3d..7499d7001d5f 100644 --- a/packages/google-cloud-bigquery/samples/geography/to_geodataframe_test.py +++ b/packages/google-cloud-bigquery/samples/geography/to_geodataframe_test.py @@ -17,7 +17,7 @@ from .to_geodataframe import get_austin_service_requests_as_geography -def test_get_austin_service_requests_as_geography(): +def test_get_austin_service_requests_as_geography() -> None: geopandas = pytest.importorskip("geopandas") df = get_austin_service_requests_as_geography() assert isinstance(df, geopandas.GeoDataFrame) diff --git a/packages/google-cloud-bigquery/samples/get_dataset.py b/packages/google-cloud-bigquery/samples/get_dataset.py index 54ba05781dd6..5654cbdceee2 100644 --- a/packages/google-cloud-bigquery/samples/get_dataset.py +++ b/packages/google-cloud-bigquery/samples/get_dataset.py @@ -13,7 +13,7 @@ # limitations under the License. -def get_dataset(dataset_id): +def get_dataset(dataset_id: str) -> None: # [START bigquery_get_dataset] diff --git a/packages/google-cloud-bigquery/samples/get_dataset_labels.py b/packages/google-cloud-bigquery/samples/get_dataset_labels.py index 18a9ca985f51..d97ee3c01e52 100644 --- a/packages/google-cloud-bigquery/samples/get_dataset_labels.py +++ b/packages/google-cloud-bigquery/samples/get_dataset_labels.py @@ -13,7 +13,7 @@ # limitations under the License. -def get_dataset_labels(dataset_id): +def get_dataset_labels(dataset_id: str) -> None: # [START bigquery_get_dataset_labels] diff --git a/packages/google-cloud-bigquery/samples/get_model.py b/packages/google-cloud-bigquery/samples/get_model.py index 1570ef816895..dab4146ab36e 100644 --- a/packages/google-cloud-bigquery/samples/get_model.py +++ b/packages/google-cloud-bigquery/samples/get_model.py @@ -13,7 +13,7 @@ # limitations under the License. -def get_model(model_id): +def get_model(model_id: str) -> None: """Sample ID: go/samples-tracker/1510""" # [START bigquery_get_model] diff --git a/packages/google-cloud-bigquery/samples/get_routine.py b/packages/google-cloud-bigquery/samples/get_routine.py index 72715ee1bcd7..031d9a127b5e 100644 --- a/packages/google-cloud-bigquery/samples/get_routine.py +++ b/packages/google-cloud-bigquery/samples/get_routine.py @@ -12,8 +12,13 @@ # See the License for the specific language governing permissions and # limitations under the License. +import typing -def get_routine(routine_id): +if typing.TYPE_CHECKING: + from google.cloud import bigquery + + +def get_routine(routine_id: str) -> "bigquery.Routine": # [START bigquery_get_routine] diff --git a/packages/google-cloud-bigquery/samples/get_table.py b/packages/google-cloud-bigquery/samples/get_table.py index 0d1d809ba791..6195aaf9acee 100644 --- a/packages/google-cloud-bigquery/samples/get_table.py +++ b/packages/google-cloud-bigquery/samples/get_table.py @@ -13,7 +13,7 @@ # limitations under the License. -def get_table(table_id): +def get_table(table_id: str) -> None: # [START bigquery_get_table] diff --git a/packages/google-cloud-bigquery/samples/label_dataset.py b/packages/google-cloud-bigquery/samples/label_dataset.py index bd4cd6721a57..a59743e5d073 100644 --- a/packages/google-cloud-bigquery/samples/label_dataset.py +++ b/packages/google-cloud-bigquery/samples/label_dataset.py @@ -13,7 +13,7 @@ # limitations under the License. -def label_dataset(dataset_id): +def label_dataset(dataset_id: str) -> None: # [START bigquery_label_dataset] diff --git a/packages/google-cloud-bigquery/samples/list_datasets.py b/packages/google-cloud-bigquery/samples/list_datasets.py index 6a1b93d00bb6..c1b6639a97b4 100644 --- a/packages/google-cloud-bigquery/samples/list_datasets.py +++ b/packages/google-cloud-bigquery/samples/list_datasets.py @@ -13,7 +13,7 @@ # limitations under the License. -def list_datasets(): +def list_datasets() -> None: # [START bigquery_list_datasets] diff --git a/packages/google-cloud-bigquery/samples/list_datasets_by_label.py b/packages/google-cloud-bigquery/samples/list_datasets_by_label.py index 1b310049b4b3..d1f264872f9a 100644 --- a/packages/google-cloud-bigquery/samples/list_datasets_by_label.py +++ b/packages/google-cloud-bigquery/samples/list_datasets_by_label.py @@ -13,7 +13,7 @@ # limitations under the License. -def list_datasets_by_label(): +def list_datasets_by_label() -> None: # [START bigquery_list_datasets_by_label] diff --git a/packages/google-cloud-bigquery/samples/list_models.py b/packages/google-cloud-bigquery/samples/list_models.py index 7251c001a770..df8ae0e1b4bf 100644 --- a/packages/google-cloud-bigquery/samples/list_models.py +++ b/packages/google-cloud-bigquery/samples/list_models.py @@ -13,7 +13,7 @@ # limitations under the License. -def list_models(dataset_id): +def list_models(dataset_id: str) -> None: """Sample ID: go/samples-tracker/1512""" # [START bigquery_list_models] diff --git a/packages/google-cloud-bigquery/samples/list_routines.py b/packages/google-cloud-bigquery/samples/list_routines.py index 718d40d680aa..bee7c23be3b8 100644 --- a/packages/google-cloud-bigquery/samples/list_routines.py +++ b/packages/google-cloud-bigquery/samples/list_routines.py @@ -13,7 +13,7 @@ # limitations under the License. -def list_routines(dataset_id): +def list_routines(dataset_id: str) -> None: # [START bigquery_list_routines] diff --git a/packages/google-cloud-bigquery/samples/list_tables.py b/packages/google-cloud-bigquery/samples/list_tables.py index 9ab527a4915f..df846961d222 100644 --- a/packages/google-cloud-bigquery/samples/list_tables.py +++ b/packages/google-cloud-bigquery/samples/list_tables.py @@ -13,7 +13,7 @@ # limitations under the License. -def list_tables(dataset_id): +def list_tables(dataset_id: str) -> None: # [START bigquery_list_tables] diff --git a/packages/google-cloud-bigquery/samples/load_table_clustered.py b/packages/google-cloud-bigquery/samples/load_table_clustered.py index 20d412cb3878..87b6c76ce5b2 100644 --- a/packages/google-cloud-bigquery/samples/load_table_clustered.py +++ b/packages/google-cloud-bigquery/samples/load_table_clustered.py @@ -12,8 +12,13 @@ # See the License for the specific language governing permissions and # limitations under the License. +import typing -def load_table_clustered(table_id): +if typing.TYPE_CHECKING: + from google.cloud import bigquery + + +def load_table_clustered(table_id: str) -> "bigquery.Table": # [START bigquery_load_table_clustered] from google.cloud import bigquery diff --git a/packages/google-cloud-bigquery/samples/load_table_dataframe.py b/packages/google-cloud-bigquery/samples/load_table_dataframe.py index b75224d11f6b..db4c131f2ef0 100644 --- a/packages/google-cloud-bigquery/samples/load_table_dataframe.py +++ b/packages/google-cloud-bigquery/samples/load_table_dataframe.py @@ -12,8 +12,13 @@ # See the License for the specific language governing permissions and # limitations under the License. +import typing -def load_table_dataframe(table_id): +if typing.TYPE_CHECKING: + from google.cloud import bigquery + + +def load_table_dataframe(table_id: str) -> "bigquery.Table": # [START bigquery_load_table_dataframe] import datetime diff --git a/packages/google-cloud-bigquery/samples/load_table_file.py b/packages/google-cloud-bigquery/samples/load_table_file.py index 41f0bf98473b..00226eb3c943 100644 --- a/packages/google-cloud-bigquery/samples/load_table_file.py +++ b/packages/google-cloud-bigquery/samples/load_table_file.py @@ -12,8 +12,13 @@ # See the License for the specific language governing permissions and # limitations under the License. +import typing -def load_table_file(file_path, table_id): +if typing.TYPE_CHECKING: + from google.cloud import bigquery + + +def load_table_file(file_path: str, table_id: str) -> "bigquery.Table": # [START bigquery_load_from_file] from google.cloud import bigquery diff --git a/packages/google-cloud-bigquery/samples/load_table_uri_autodetect_csv.py b/packages/google-cloud-bigquery/samples/load_table_uri_autodetect_csv.py index 09a5d708d437..c412c63f1209 100644 --- a/packages/google-cloud-bigquery/samples/load_table_uri_autodetect_csv.py +++ b/packages/google-cloud-bigquery/samples/load_table_uri_autodetect_csv.py @@ -13,7 +13,7 @@ # limitations under the License. -def load_table_uri_autodetect_csv(table_id): +def load_table_uri_autodetect_csv(table_id: str) -> None: # [START bigquery_load_table_gcs_csv_autodetect] from google.cloud import bigquery diff --git a/packages/google-cloud-bigquery/samples/load_table_uri_autodetect_json.py b/packages/google-cloud-bigquery/samples/load_table_uri_autodetect_json.py index 61b7aab1287d..9d0bc3f22a03 100644 --- a/packages/google-cloud-bigquery/samples/load_table_uri_autodetect_json.py +++ b/packages/google-cloud-bigquery/samples/load_table_uri_autodetect_json.py @@ -13,7 +13,7 @@ # limitations under the License. -def load_table_uri_autodetect_json(table_id): +def load_table_uri_autodetect_json(table_id: str) -> None: # [START bigquery_load_table_gcs_json_autodetect] from google.cloud import bigquery diff --git a/packages/google-cloud-bigquery/samples/load_table_uri_avro.py b/packages/google-cloud-bigquery/samples/load_table_uri_avro.py index 5c25eed226b6..e9f7c39ede7f 100644 --- a/packages/google-cloud-bigquery/samples/load_table_uri_avro.py +++ b/packages/google-cloud-bigquery/samples/load_table_uri_avro.py @@ -13,7 +13,7 @@ # limitations under the License. -def load_table_uri_avro(table_id): +def load_table_uri_avro(table_id: str) -> None: # [START bigquery_load_table_gcs_avro] from google.cloud import bigquery diff --git a/packages/google-cloud-bigquery/samples/load_table_uri_cmek.py b/packages/google-cloud-bigquery/samples/load_table_uri_cmek.py index 8bd84993c293..4dfc0d3b4bb5 100644 --- a/packages/google-cloud-bigquery/samples/load_table_uri_cmek.py +++ b/packages/google-cloud-bigquery/samples/load_table_uri_cmek.py @@ -13,7 +13,7 @@ # limitations under the License. -def load_table_uri_cmek(table_id, kms_key_name): +def load_table_uri_cmek(table_id: str, kms_key_name: str) -> None: # [START bigquery_load_table_gcs_json_cmek] from google.cloud import bigquery diff --git a/packages/google-cloud-bigquery/samples/load_table_uri_csv.py b/packages/google-cloud-bigquery/samples/load_table_uri_csv.py index 0736a560cc75..9cb8c6f20c27 100644 --- a/packages/google-cloud-bigquery/samples/load_table_uri_csv.py +++ b/packages/google-cloud-bigquery/samples/load_table_uri_csv.py @@ -13,7 +13,7 @@ # limitations under the License. -def load_table_uri_csv(table_id): +def load_table_uri_csv(table_id: str) -> None: # [START bigquery_load_table_gcs_csv] from google.cloud import bigquery diff --git a/packages/google-cloud-bigquery/samples/load_table_uri_json.py b/packages/google-cloud-bigquery/samples/load_table_uri_json.py index 3c21972c80a0..409a83e8ed5f 100644 --- a/packages/google-cloud-bigquery/samples/load_table_uri_json.py +++ b/packages/google-cloud-bigquery/samples/load_table_uri_json.py @@ -13,7 +13,7 @@ # limitations under the License. -def load_table_uri_json(table_id): +def load_table_uri_json(table_id: str) -> None: # [START bigquery_load_table_gcs_json] from google.cloud import bigquery diff --git a/packages/google-cloud-bigquery/samples/load_table_uri_orc.py b/packages/google-cloud-bigquery/samples/load_table_uri_orc.py index 3ab6ff45aa0a..7babd26301ea 100644 --- a/packages/google-cloud-bigquery/samples/load_table_uri_orc.py +++ b/packages/google-cloud-bigquery/samples/load_table_uri_orc.py @@ -13,7 +13,7 @@ # limitations under the License. -def load_table_uri_orc(table_id): +def load_table_uri_orc(table_id: str) -> None: # [START bigquery_load_table_gcs_orc] from google.cloud import bigquery diff --git a/packages/google-cloud-bigquery/samples/load_table_uri_parquet.py b/packages/google-cloud-bigquery/samples/load_table_uri_parquet.py index 9df2ab1e7f02..e0ec59078954 100644 --- a/packages/google-cloud-bigquery/samples/load_table_uri_parquet.py +++ b/packages/google-cloud-bigquery/samples/load_table_uri_parquet.py @@ -13,7 +13,7 @@ # limitations under the License. -def load_table_uri_parquet(table_id): +def load_table_uri_parquet(table_id: str) -> None: # [START bigquery_load_table_gcs_parquet] from google.cloud import bigquery diff --git a/packages/google-cloud-bigquery/samples/load_table_uri_truncate_avro.py b/packages/google-cloud-bigquery/samples/load_table_uri_truncate_avro.py index 1aa0aa49c2e4..51c6636fa9fc 100644 --- a/packages/google-cloud-bigquery/samples/load_table_uri_truncate_avro.py +++ b/packages/google-cloud-bigquery/samples/load_table_uri_truncate_avro.py @@ -13,7 +13,7 @@ # limitations under the License. -def load_table_uri_truncate_avro(table_id): +def load_table_uri_truncate_avro(table_id: str) -> None: # [START bigquery_load_table_gcs_avro_truncate] import io diff --git a/packages/google-cloud-bigquery/samples/load_table_uri_truncate_csv.py b/packages/google-cloud-bigquery/samples/load_table_uri_truncate_csv.py index 198cdc281c12..ee8b3404372b 100644 --- a/packages/google-cloud-bigquery/samples/load_table_uri_truncate_csv.py +++ b/packages/google-cloud-bigquery/samples/load_table_uri_truncate_csv.py @@ -13,7 +13,7 @@ # limitations under the License. -def load_table_uri_truncate_csv(table_id): +def load_table_uri_truncate_csv(table_id: str) -> None: # [START bigquery_load_table_gcs_csv_truncate] import io diff --git a/packages/google-cloud-bigquery/samples/load_table_uri_truncate_json.py b/packages/google-cloud-bigquery/samples/load_table_uri_truncate_json.py index d67d93e7b0c4..e85e0808e573 100644 --- a/packages/google-cloud-bigquery/samples/load_table_uri_truncate_json.py +++ b/packages/google-cloud-bigquery/samples/load_table_uri_truncate_json.py @@ -13,7 +13,7 @@ # limitations under the License. -def load_table_uri_truncate_json(table_id): +def load_table_uri_truncate_json(table_id: str) -> None: # [START bigquery_load_table_gcs_json_truncate] import io diff --git a/packages/google-cloud-bigquery/samples/load_table_uri_truncate_orc.py b/packages/google-cloud-bigquery/samples/load_table_uri_truncate_orc.py index 90543b791a37..c730099d1469 100644 --- a/packages/google-cloud-bigquery/samples/load_table_uri_truncate_orc.py +++ b/packages/google-cloud-bigquery/samples/load_table_uri_truncate_orc.py @@ -13,7 +13,7 @@ # limitations under the License. -def load_table_uri_truncate_orc(table_id): +def load_table_uri_truncate_orc(table_id: str) -> None: # [START bigquery_load_table_gcs_orc_truncate] import io diff --git a/packages/google-cloud-bigquery/samples/load_table_uri_truncate_parquet.py b/packages/google-cloud-bigquery/samples/load_table_uri_truncate_parquet.py index e036fc180a99..3a0a55c8a9d2 100644 --- a/packages/google-cloud-bigquery/samples/load_table_uri_truncate_parquet.py +++ b/packages/google-cloud-bigquery/samples/load_table_uri_truncate_parquet.py @@ -13,7 +13,7 @@ # limitations under the License. -def load_table_uri_truncate_parquet(table_id): +def load_table_uri_truncate_parquet(table_id: str) -> None: # [START bigquery_load_table_gcs_parquet_truncate] import io diff --git a/packages/google-cloud-bigquery/samples/magics/_helpers.py b/packages/google-cloud-bigquery/samples/magics/_helpers.py index 18a513b99ab3..c7248ee3d67e 100644 --- a/packages/google-cloud-bigquery/samples/magics/_helpers.py +++ b/packages/google-cloud-bigquery/samples/magics/_helpers.py @@ -13,7 +13,7 @@ # limitations under the License. -def strip_region_tags(sample_text): +def strip_region_tags(sample_text: str) -> str: """Remove blank lines and region tags from sample text""" magic_lines = [ line for line in sample_text.split("\n") if len(line) > 0 and "# [" not in line diff --git a/packages/google-cloud-bigquery/samples/magics/conftest.py b/packages/google-cloud-bigquery/samples/magics/conftest.py index bf86022355ff..55ea30f90d70 100644 --- a/packages/google-cloud-bigquery/samples/magics/conftest.py +++ b/packages/google-cloud-bigquery/samples/magics/conftest.py @@ -12,14 +12,20 @@ # See the License for the specific language governing permissions and # limitations under the License. +import typing +from typing import Iterator + import pytest +if typing.TYPE_CHECKING: + from IPython.core.interactiveshell import TerminalInteractiveShell + interactiveshell = pytest.importorskip("IPython.terminal.interactiveshell") tools = pytest.importorskip("IPython.testing.tools") @pytest.fixture(scope="session") -def ipython(): +def ipython() -> "TerminalInteractiveShell": config = tools.default_config() config.TerminalInteractiveShell.simple_prompt = True shell = interactiveshell.TerminalInteractiveShell.instance(config=config) @@ -27,7 +33,9 @@ def ipython(): @pytest.fixture(autouse=True) -def ipython_interactive(ipython): +def ipython_interactive( + ipython: "TerminalInteractiveShell", +) -> Iterator["TerminalInteractiveShell"]: """Activate IPython's builtin hooks for the duration of the test scope. diff --git a/packages/google-cloud-bigquery/samples/magics/mypy.ini b/packages/google-cloud-bigquery/samples/magics/mypy.ini new file mode 100644 index 000000000000..af328dc5eba9 --- /dev/null +++ b/packages/google-cloud-bigquery/samples/magics/mypy.ini @@ -0,0 +1,8 @@ +[mypy] +; We require type annotations in all samples. +strict = True +exclude = noxfile\.py +warn_unused_configs = True + +[mypy-IPython.*,nox,noxfile_config,pandas] +ignore_missing_imports = True diff --git a/packages/google-cloud-bigquery/samples/magics/query.py b/packages/google-cloud-bigquery/samples/magics/query.py index c2739eacebfc..4d3b4418bb5a 100644 --- a/packages/google-cloud-bigquery/samples/magics/query.py +++ b/packages/google-cloud-bigquery/samples/magics/query.py @@ -12,12 +12,17 @@ # See the License for the specific language governing permissions and # limitations under the License. +import typing + import IPython from . import _helpers +if typing.TYPE_CHECKING: + import pandas + -def query(): +def query() -> "pandas.DataFrame": ip = IPython.get_ipython() ip.extension_manager.load_extension("google.cloud.bigquery") diff --git a/packages/google-cloud-bigquery/samples/magics/query_params_scalars.py b/packages/google-cloud-bigquery/samples/magics/query_params_scalars.py index a26f25aea708..e833ef93b2f6 100644 --- a/packages/google-cloud-bigquery/samples/magics/query_params_scalars.py +++ b/packages/google-cloud-bigquery/samples/magics/query_params_scalars.py @@ -12,12 +12,17 @@ # See the License for the specific language governing permissions and # limitations under the License. +import typing + import IPython from . import _helpers +if typing.TYPE_CHECKING: + import pandas + -def query_with_parameters(): +def query_with_parameters() -> "pandas.DataFrame": ip = IPython.get_ipython() ip.extension_manager.load_extension("google.cloud.bigquery") diff --git a/packages/google-cloud-bigquery/samples/magics/query_params_scalars_test.py b/packages/google-cloud-bigquery/samples/magics/query_params_scalars_test.py index 9b4159667501..4f481cbe9249 100644 --- a/packages/google-cloud-bigquery/samples/magics/query_params_scalars_test.py +++ b/packages/google-cloud-bigquery/samples/magics/query_params_scalars_test.py @@ -17,7 +17,7 @@ from . import query_params_scalars -def test_query_with_parameters(): +def test_query_with_parameters() -> None: df = query_params_scalars.query_with_parameters() assert isinstance(df, pandas.DataFrame) assert len(df) == 10 diff --git a/packages/google-cloud-bigquery/samples/magics/query_test.py b/packages/google-cloud-bigquery/samples/magics/query_test.py index d20797908827..1aaa9c1bb3bd 100644 --- a/packages/google-cloud-bigquery/samples/magics/query_test.py +++ b/packages/google-cloud-bigquery/samples/magics/query_test.py @@ -17,7 +17,7 @@ from . import query -def test_query(): +def test_query() -> None: df = query.query() assert isinstance(df, pandas.DataFrame) assert len(df) == 3 diff --git a/packages/google-cloud-bigquery/samples/magics/requirements.txt b/packages/google-cloud-bigquery/samples/magics/requirements.txt index f047c46b6719..5c54ecd839b4 100644 --- a/packages/google-cloud-bigquery/samples/magics/requirements.txt +++ b/packages/google-cloud-bigquery/samples/magics/requirements.txt @@ -1,3 +1,4 @@ +db-dtypes==0.4.0 google-cloud-bigquery-storage==2.12.0 google-auth-oauthlib==0.5.0 grpcio==1.44.0 @@ -9,3 +10,4 @@ pandas==1.3.5; python_version == '3.7' pandas==1.4.1; python_version >= '3.8' pyarrow==7.0.0 pytz==2021.3 +typing-extensions==3.10.0.2 diff --git a/packages/google-cloud-bigquery/samples/mypy.ini b/packages/google-cloud-bigquery/samples/mypy.ini new file mode 100644 index 000000000000..29757e47df7d --- /dev/null +++ b/packages/google-cloud-bigquery/samples/mypy.ini @@ -0,0 +1,12 @@ +[mypy] +# Should match DEFAULT_PYTHON_VERSION from root noxfile.py +python_version = 3.8 +exclude = noxfile\.py +strict = True +warn_unused_configs = True + +[mypy-google.auth,google.oauth2,geojson,google_auth_oauthlib,IPython.*] +ignore_missing_imports = True + +[mypy-pandas,pyarrow,shapely.*,test_utils.*] +ignore_missing_imports = True diff --git a/packages/google-cloud-bigquery/samples/query_external_gcs_temporary_table.py b/packages/google-cloud-bigquery/samples/query_external_gcs_temporary_table.py index 3c3caf695870..9bcb86aab6ff 100644 --- a/packages/google-cloud-bigquery/samples/query_external_gcs_temporary_table.py +++ b/packages/google-cloud-bigquery/samples/query_external_gcs_temporary_table.py @@ -13,7 +13,7 @@ # limitations under the License. -def query_external_gcs_temporary_table(): +def query_external_gcs_temporary_table() -> None: # [START bigquery_query_external_gcs_temp] from google.cloud import bigquery @@ -30,7 +30,9 @@ def query_external_gcs_temporary_table(): bigquery.SchemaField("name", "STRING"), bigquery.SchemaField("post_abbr", "STRING"), ] - external_config.options.skip_leading_rows = 1 + assert external_config.csv_options is not None + external_config.csv_options.skip_leading_rows = 1 + table_id = "us_states" job_config = bigquery.QueryJobConfig(table_definitions={table_id: external_config}) diff --git a/packages/google-cloud-bigquery/samples/query_external_sheets_permanent_table.py b/packages/google-cloud-bigquery/samples/query_external_sheets_permanent_table.py index 31143d1b0816..a5855e66a34b 100644 --- a/packages/google-cloud-bigquery/samples/query_external_sheets_permanent_table.py +++ b/packages/google-cloud-bigquery/samples/query_external_sheets_permanent_table.py @@ -13,7 +13,7 @@ # limitations under the License. -def query_external_sheets_permanent_table(dataset_id): +def query_external_sheets_permanent_table(dataset_id: str) -> None: # [START bigquery_query_external_sheets_perm] from google.cloud import bigquery @@ -56,8 +56,10 @@ def query_external_sheets_permanent_table(dataset_id): "/d/1i_QCL-7HcSyUZmIbP9E6lO_T5u3HnpLe7dnpHaijg_E/edit?usp=sharing" ) external_config.source_uris = [sheet_url] - external_config.options.skip_leading_rows = 1 # Optionally skip header row. - external_config.options.range = ( + options = external_config.google_sheets_options + assert options is not None + options.skip_leading_rows = 1 # Optionally skip header row. + options.range = ( "us-states!A20:B49" # Optionally set range of the sheet to query from. ) table.external_data_configuration = external_config diff --git a/packages/google-cloud-bigquery/samples/query_external_sheets_temporary_table.py b/packages/google-cloud-bigquery/samples/query_external_sheets_temporary_table.py index a9d58e388db8..944d3b826ed7 100644 --- a/packages/google-cloud-bigquery/samples/query_external_sheets_temporary_table.py +++ b/packages/google-cloud-bigquery/samples/query_external_sheets_temporary_table.py @@ -13,7 +13,7 @@ # limitations under the License. -def query_external_sheets_temporary_table(): +def query_external_sheets_temporary_table() -> None: # [START bigquery_query_external_sheets_temp] # [START bigquery_auth_drive_scope] @@ -53,8 +53,10 @@ def query_external_sheets_temporary_table(): bigquery.SchemaField("name", "STRING"), bigquery.SchemaField("post_abbr", "STRING"), ] - external_config.options.skip_leading_rows = 1 # Optionally skip header row. - external_config.options.range = ( + options = external_config.google_sheets_options + assert options is not None + options.skip_leading_rows = 1 # Optionally skip header row. + options.range = ( "us-states!A20:B49" # Optionally set range of the sheet to query from. ) table_id = "us_states" diff --git a/packages/google-cloud-bigquery/samples/query_no_cache.py b/packages/google-cloud-bigquery/samples/query_no_cache.py index e380f0b15de0..f39c01dbc297 100644 --- a/packages/google-cloud-bigquery/samples/query_no_cache.py +++ b/packages/google-cloud-bigquery/samples/query_no_cache.py @@ -13,7 +13,7 @@ # limitations under the License. -def query_no_cache(): +def query_no_cache() -> None: # [START bigquery_query_no_cache] from google.cloud import bigquery diff --git a/packages/google-cloud-bigquery/samples/query_pagination.py b/packages/google-cloud-bigquery/samples/query_pagination.py index 57a4212cf664..2e16540506e2 100644 --- a/packages/google-cloud-bigquery/samples/query_pagination.py +++ b/packages/google-cloud-bigquery/samples/query_pagination.py @@ -13,7 +13,7 @@ # limitations under the License. -def query_pagination(): +def query_pagination() -> None: # [START bigquery_query_pagination] diff --git a/packages/google-cloud-bigquery/samples/query_script.py b/packages/google-cloud-bigquery/samples/query_script.py index 9390d352dd40..89ff5518776d 100644 --- a/packages/google-cloud-bigquery/samples/query_script.py +++ b/packages/google-cloud-bigquery/samples/query_script.py @@ -13,7 +13,7 @@ # limitations under the License. -def query_script(): +def query_script() -> None: # [START bigquery_query_script] from google.cloud import bigquery diff --git a/packages/google-cloud-bigquery/samples/query_to_arrow.py b/packages/google-cloud-bigquery/samples/query_to_arrow.py index 4a57992d13c2..157a93638c1d 100644 --- a/packages/google-cloud-bigquery/samples/query_to_arrow.py +++ b/packages/google-cloud-bigquery/samples/query_to_arrow.py @@ -12,8 +12,13 @@ # See the License for the specific language governing permissions and # limitations under the License. +import typing -def query_to_arrow(): +if typing.TYPE_CHECKING: + import pyarrow + + +def query_to_arrow() -> "pyarrow.Table": # [START bigquery_query_to_arrow] diff --git a/packages/google-cloud-bigquery/samples/snippets/authenticate_service_account.py b/packages/google-cloud-bigquery/samples/snippets/authenticate_service_account.py index fa3c53cda066..8a8c9557dc9b 100644 --- a/packages/google-cloud-bigquery/samples/snippets/authenticate_service_account.py +++ b/packages/google-cloud-bigquery/samples/snippets/authenticate_service_account.py @@ -13,9 +13,13 @@ # limitations under the License. import os +import typing + +if typing.TYPE_CHECKING: + from google.cloud import bigquery -def main(): +def main() -> "bigquery.Client": key_path = os.environ.get("GOOGLE_APPLICATION_CREDENTIALS") # [START bigquery_client_json_credentials] diff --git a/packages/google-cloud-bigquery/samples/snippets/authenticate_service_account_test.py b/packages/google-cloud-bigquery/samples/snippets/authenticate_service_account_test.py index 131c69d2c658..4b5711f80d2f 100644 --- a/packages/google-cloud-bigquery/samples/snippets/authenticate_service_account_test.py +++ b/packages/google-cloud-bigquery/samples/snippets/authenticate_service_account_test.py @@ -12,19 +12,25 @@ # See the License for the specific language governing permissions and # limitations under the License. +import typing +from typing import Any + import google.auth import authenticate_service_account +if typing.TYPE_CHECKING: + import pytest + -def mock_credentials(*args, **kwargs): +def mock_credentials(*args: Any, **kwargs: Any) -> google.auth.credentials.Credentials: credentials, _ = google.auth.default( ["https://www.googleapis.com/auth/cloud-platform"] ) return credentials -def test_main(monkeypatch): +def test_main(monkeypatch: "pytest.MonkeyPatch") -> None: monkeypatch.setattr( "google.oauth2.service_account.Credentials.from_service_account_file", mock_credentials, diff --git a/packages/google-cloud-bigquery/samples/snippets/authorized_view_tutorial.py b/packages/google-cloud-bigquery/samples/snippets/authorized_view_tutorial.py index 66810c03694c..bfb61bc382d8 100644 --- a/packages/google-cloud-bigquery/samples/snippets/authorized_view_tutorial.py +++ b/packages/google-cloud-bigquery/samples/snippets/authorized_view_tutorial.py @@ -14,12 +14,19 @@ # See the License for the specific language governing permissions and # limitations under the License. +from typing import Dict, Optional -def run_authorized_view_tutorial(override_values={}): + +def run_authorized_view_tutorial( + override_values: Optional[Dict[str, str]] = None +) -> None: # Note to user: This is a group email for testing purposes. Replace with # your own group email address when running this code. analyst_group_email = "example-analyst-group@google.com" + if override_values is None: + override_values = {} + # [START bigquery_authorized_view_tutorial] # Create a source dataset # [START bigquery_avt_create_source_dataset] diff --git a/packages/google-cloud-bigquery/samples/snippets/authorized_view_tutorial_test.py b/packages/google-cloud-bigquery/samples/snippets/authorized_view_tutorial_test.py index eb247c5ebc7b..cae8704864eb 100644 --- a/packages/google-cloud-bigquery/samples/snippets/authorized_view_tutorial_test.py +++ b/packages/google-cloud-bigquery/samples/snippets/authorized_view_tutorial_test.py @@ -12,6 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. +from typing import Iterator, List import uuid from google.cloud import bigquery @@ -21,19 +22,21 @@ @pytest.fixture(scope="module") -def client(): +def client() -> bigquery.Client: return bigquery.Client() @pytest.fixture -def datasets_to_delete(client): - doomed = [] +def datasets_to_delete(client: bigquery.Client) -> Iterator[List[str]]: + doomed: List[str] = [] yield doomed for item in doomed: client.delete_dataset(item, delete_contents=True, not_found_ok=True) -def test_authorized_view_tutorial(client, datasets_to_delete): +def test_authorized_view_tutorial( + client: bigquery.Client, datasets_to_delete: List[str] +) -> None: override_values = { "source_dataset_id": "github_source_data_{}".format( str(uuid.uuid4()).replace("-", "_") diff --git a/packages/google-cloud-bigquery/samples/snippets/conftest.py b/packages/google-cloud-bigquery/samples/snippets/conftest.py index e8aa084873e9..37b52256bd95 100644 --- a/packages/google-cloud-bigquery/samples/snippets/conftest.py +++ b/packages/google-cloud-bigquery/samples/snippets/conftest.py @@ -12,6 +12,8 @@ # See the License for the specific language governing permissions and # limitations under the License. +from typing import Iterator + from google.cloud import bigquery import pytest import test_utils.prefixer @@ -21,7 +23,7 @@ @pytest.fixture(scope="session", autouse=True) -def cleanup_datasets(bigquery_client: bigquery.Client): +def cleanup_datasets(bigquery_client: bigquery.Client) -> None: for dataset in bigquery_client.list_datasets(): if prefixer.should_cleanup(dataset.dataset_id): bigquery_client.delete_dataset( @@ -30,18 +32,18 @@ def cleanup_datasets(bigquery_client: bigquery.Client): @pytest.fixture(scope="session") -def bigquery_client(): +def bigquery_client() -> bigquery.Client: bigquery_client = bigquery.Client() return bigquery_client @pytest.fixture(scope="session") -def project_id(bigquery_client): +def project_id(bigquery_client: bigquery.Client) -> str: return bigquery_client.project @pytest.fixture(scope="session") -def dataset_id(bigquery_client: bigquery.Client, project_id: str): +def dataset_id(bigquery_client: bigquery.Client, project_id: str) -> Iterator[str]: dataset_id = prefixer.create_prefix() full_dataset_id = f"{project_id}.{dataset_id}" dataset = bigquery.Dataset(full_dataset_id) @@ -51,12 +53,15 @@ def dataset_id(bigquery_client: bigquery.Client, project_id: str): @pytest.fixture(scope="session") -def entity_id(bigquery_client: bigquery.Client, dataset_id: str): +def entity_id(bigquery_client: bigquery.Client, dataset_id: str) -> str: return "cloud-developer-relations@google.com" @pytest.fixture(scope="session") -def dataset_id_us_east1(bigquery_client: bigquery.Client, project_id: str): +def dataset_id_us_east1( + bigquery_client: bigquery.Client, + project_id: str, +) -> Iterator[str]: dataset_id = prefixer.create_prefix() full_dataset_id = f"{project_id}.{dataset_id}" dataset = bigquery.Dataset(full_dataset_id) @@ -69,7 +74,7 @@ def dataset_id_us_east1(bigquery_client: bigquery.Client, project_id: str): @pytest.fixture(scope="session") def table_id_us_east1( bigquery_client: bigquery.Client, project_id: str, dataset_id_us_east1: str -): +) -> Iterator[str]: table_id = prefixer.create_prefix() full_table_id = f"{project_id}.{dataset_id_us_east1}.{table_id}" table = bigquery.Table( @@ -81,7 +86,9 @@ def table_id_us_east1( @pytest.fixture -def random_table_id(bigquery_client: bigquery.Client, project_id: str, dataset_id: str): +def random_table_id( + bigquery_client: bigquery.Client, project_id: str, dataset_id: str +) -> Iterator[str]: """Create a new table ID each time, so random_table_id can be used as target for load jobs. """ @@ -92,5 +99,7 @@ def random_table_id(bigquery_client: bigquery.Client, project_id: str, dataset_i @pytest.fixture -def bigquery_client_patch(monkeypatch, bigquery_client): +def bigquery_client_patch( + monkeypatch: pytest.MonkeyPatch, bigquery_client: bigquery.Client +) -> None: monkeypatch.setattr(bigquery, "Client", lambda: bigquery_client) diff --git a/packages/google-cloud-bigquery/samples/snippets/create_table_external_hive_partitioned.py b/packages/google-cloud-bigquery/samples/snippets/create_table_external_hive_partitioned.py index 2ff8a2220c55..1170c57da7ae 100644 --- a/packages/google-cloud-bigquery/samples/snippets/create_table_external_hive_partitioned.py +++ b/packages/google-cloud-bigquery/samples/snippets/create_table_external_hive_partitioned.py @@ -12,8 +12,13 @@ # See the License for the specific language governing permissions and # limitations under the License. +import typing -def create_table_external_hive_partitioned(table_id: str): +if typing.TYPE_CHECKING: + from google.cloud import bigquery + + +def create_table_external_hive_partitioned(table_id: str) -> "bigquery.Table": original_table_id = table_id # [START bigquery_create_table_external_hivepartitioned] # Demonstrates creating an external table with hive partitioning. diff --git a/packages/google-cloud-bigquery/samples/snippets/create_table_external_hive_partitioned_test.py b/packages/google-cloud-bigquery/samples/snippets/create_table_external_hive_partitioned_test.py index fccc2d408080..37deb8b12517 100644 --- a/packages/google-cloud-bigquery/samples/snippets/create_table_external_hive_partitioned_test.py +++ b/packages/google-cloud-bigquery/samples/snippets/create_table_external_hive_partitioned_test.py @@ -12,10 +12,17 @@ # See the License for the specific language governing permissions and # limitations under the License. +import typing + import create_table_external_hive_partitioned +if typing.TYPE_CHECKING: + import pytest + -def test_create_table_external_hive_partitioned(capsys, random_table_id): +def test_create_table_external_hive_partitioned( + capsys: "pytest.CaptureFixture[str]", random_table_id: str +) -> None: table = ( create_table_external_hive_partitioned.create_table_external_hive_partitioned( random_table_id diff --git a/packages/google-cloud-bigquery/samples/snippets/dataset_access_test.py b/packages/google-cloud-bigquery/samples/snippets/dataset_access_test.py index 21776c149252..4d1a70eb1ede 100644 --- a/packages/google-cloud-bigquery/samples/snippets/dataset_access_test.py +++ b/packages/google-cloud-bigquery/samples/snippets/dataset_access_test.py @@ -12,11 +12,22 @@ # See the License for the specific language governing permissions and # limitations under the License. +import typing + import revoke_dataset_access import update_dataset_access +if typing.TYPE_CHECKING: + import pytest + from google.cloud import bigquery + -def test_dataset_access_permissions(capsys, dataset_id, entity_id, bigquery_client): +def test_dataset_access_permissions( + capsys: "pytest.CaptureFixture[str]", + dataset_id: str, + entity_id: str, + bigquery_client: "bigquery.Client", +) -> None: original_dataset = bigquery_client.get_dataset(dataset_id) update_dataset_access.update_dataset_access(dataset_id, entity_id) full_dataset_id = "{}.{}".format( diff --git a/packages/google-cloud-bigquery/samples/snippets/delete_job.py b/packages/google-cloud-bigquery/samples/snippets/delete_job.py index abed0c90dcad..7c8640baf5fa 100644 --- a/packages/google-cloud-bigquery/samples/snippets/delete_job.py +++ b/packages/google-cloud-bigquery/samples/snippets/delete_job.py @@ -13,7 +13,7 @@ # limitations under the License. -def delete_job_metadata(job_id: str, location: str): +def delete_job_metadata(job_id: str, location: str) -> None: orig_job_id = job_id orig_location = location # [START bigquery_delete_job] diff --git a/packages/google-cloud-bigquery/samples/snippets/delete_job_test.py b/packages/google-cloud-bigquery/samples/snippets/delete_job_test.py index fb407ab4b06a..ac9d52dcf48b 100644 --- a/packages/google-cloud-bigquery/samples/snippets/delete_job_test.py +++ b/packages/google-cloud-bigquery/samples/snippets/delete_job_test.py @@ -12,14 +12,21 @@ # See the License for the specific language governing permissions and # limitations under the License. +import typing + from google.cloud import bigquery import delete_job +if typing.TYPE_CHECKING: + import pytest + def test_delete_job_metadata( - capsys, bigquery_client: bigquery.Client, table_id_us_east1: str -): + capsys: "pytest.CaptureFixture[str]", + bigquery_client: bigquery.Client, + table_id_us_east1: str, +) -> None: query_job: bigquery.QueryJob = bigquery_client.query( f"SELECT COUNT(*) FROM `{table_id_us_east1}`", location="us-east1", diff --git a/packages/google-cloud-bigquery/samples/snippets/jupyter_tutorial_test.py b/packages/google-cloud-bigquery/samples/snippets/jupyter_tutorial_test.py index 7fe1cde854f8..9d42a4eda7b6 100644 --- a/packages/google-cloud-bigquery/samples/snippets/jupyter_tutorial_test.py +++ b/packages/google-cloud-bigquery/samples/snippets/jupyter_tutorial_test.py @@ -11,8 +11,15 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. + +import typing +from typing import Iterator + import pytest +if typing.TYPE_CHECKING: + from IPython.terminal.interactiveshell import TerminalInteractiveShell + IPython = pytest.importorskip("IPython") interactiveshell = pytest.importorskip("IPython.terminal.interactiveshell") tools = pytest.importorskip("IPython.testing.tools") @@ -23,7 +30,7 @@ @pytest.fixture(scope="session") -def ipython(): +def ipython() -> "TerminalInteractiveShell": config = tools.default_config() config.TerminalInteractiveShell.simple_prompt = True shell = interactiveshell.TerminalInteractiveShell.instance(config=config) @@ -31,7 +38,9 @@ def ipython(): @pytest.fixture() -def ipython_interactive(request, ipython): +def ipython_interactive( + request: pytest.FixtureRequest, ipython: "TerminalInteractiveShell" +) -> Iterator["TerminalInteractiveShell"]: """Activate IPython's builtin hooks for the duration of the test scope. @@ -40,7 +49,7 @@ def ipython_interactive(request, ipython): yield ipython -def _strip_region_tags(sample_text): +def _strip_region_tags(sample_text: str) -> str: """Remove blank lines and region tags from sample text""" magic_lines = [ line for line in sample_text.split("\n") if len(line) > 0 and "# [" not in line @@ -48,7 +57,7 @@ def _strip_region_tags(sample_text): return "\n".join(magic_lines) -def test_jupyter_tutorial(ipython): +def test_jupyter_tutorial(ipython: "TerminalInteractiveShell") -> None: matplotlib.use("agg") ip = IPython.get_ipython() ip.extension_manager.load_extension("google.cloud.bigquery") diff --git a/packages/google-cloud-bigquery/samples/snippets/load_table_uri_firestore.py b/packages/google-cloud-bigquery/samples/snippets/load_table_uri_firestore.py index bf9d0134931e..6c33fd0ff576 100644 --- a/packages/google-cloud-bigquery/samples/snippets/load_table_uri_firestore.py +++ b/packages/google-cloud-bigquery/samples/snippets/load_table_uri_firestore.py @@ -13,7 +13,7 @@ # limitations under the License. -def load_table_uri_firestore(table_id): +def load_table_uri_firestore(table_id: str) -> None: orig_table_id = table_id # [START bigquery_load_table_gcs_firestore] # TODO(developer): Set table_id to the ID of the table to create. diff --git a/packages/google-cloud-bigquery/samples/snippets/load_table_uri_firestore_test.py b/packages/google-cloud-bigquery/samples/snippets/load_table_uri_firestore_test.py index ffa02cdf9024..552fa2e355d5 100644 --- a/packages/google-cloud-bigquery/samples/snippets/load_table_uri_firestore_test.py +++ b/packages/google-cloud-bigquery/samples/snippets/load_table_uri_firestore_test.py @@ -12,10 +12,17 @@ # See the License for the specific language governing permissions and # limitations under the License. +import typing + import load_table_uri_firestore +if typing.TYPE_CHECKING: + import pytest + -def test_load_table_uri_firestore(capsys, random_table_id): +def test_load_table_uri_firestore( + capsys: "pytest.CaptureFixture[str]", random_table_id: str +) -> None: load_table_uri_firestore.load_table_uri_firestore(random_table_id) out, _ = capsys.readouterr() assert "Loaded 50 rows." in out diff --git a/packages/google-cloud-bigquery/samples/snippets/manage_job_cancel.py b/packages/google-cloud-bigquery/samples/snippets/manage_job_cancel.py index c08a32addfbd..9cbdef4501e9 100644 --- a/packages/google-cloud-bigquery/samples/snippets/manage_job_cancel.py +++ b/packages/google-cloud-bigquery/samples/snippets/manage_job_cancel.py @@ -20,7 +20,7 @@ def cancel_job( client: bigquery.Client, location: str = "us", job_id: str = "abcd-efgh-ijkl-mnop", -): +) -> None: job = client.cancel_job(job_id, location=location) print(f"{job.location}:{job.job_id} cancelled") diff --git a/packages/google-cloud-bigquery/samples/snippets/manage_job_get.py b/packages/google-cloud-bigquery/samples/snippets/manage_job_get.py index cb54fd7bb09f..ca7ffc0c9bf1 100644 --- a/packages/google-cloud-bigquery/samples/snippets/manage_job_get.py +++ b/packages/google-cloud-bigquery/samples/snippets/manage_job_get.py @@ -20,7 +20,7 @@ def get_job( client: bigquery.Client, location: str = "us", job_id: str = "abcd-efgh-ijkl-mnop", -): +) -> None: job = client.get_job(job_id, location=location) # All job classes have "location" and "job_id" string properties. diff --git a/packages/google-cloud-bigquery/samples/snippets/manage_job_test.py b/packages/google-cloud-bigquery/samples/snippets/manage_job_test.py index 745b7bbbeb83..630be365bac5 100644 --- a/packages/google-cloud-bigquery/samples/snippets/manage_job_test.py +++ b/packages/google-cloud-bigquery/samples/snippets/manage_job_test.py @@ -19,7 +19,7 @@ import manage_job_get -def test_manage_job(capsys: pytest.CaptureFixture): +def test_manage_job(capsys: pytest.CaptureFixture[str]) -> None: client = bigquery.Client() sql = """ SELECT corpus diff --git a/packages/google-cloud-bigquery/samples/snippets/materialized_view.py b/packages/google-cloud-bigquery/samples/snippets/materialized_view.py index 429bd98b4fa1..adb3688a4010 100644 --- a/packages/google-cloud-bigquery/samples/snippets/materialized_view.py +++ b/packages/google-cloud-bigquery/samples/snippets/materialized_view.py @@ -12,8 +12,19 @@ # See the License for the specific language governing permissions and # limitations under the License. +import typing +from typing import Dict, Optional + +if typing.TYPE_CHECKING: + from google.cloud import bigquery + + +def create_materialized_view( + override_values: Optional[Dict[str, str]] = None +) -> "bigquery.Table": + if override_values is None: + override_values = {} -def create_materialized_view(override_values={}): # [START bigquery_create_materialized_view] from google.cloud import bigquery @@ -41,7 +52,12 @@ def create_materialized_view(override_values={}): return view -def update_materialized_view(override_values={}): +def update_materialized_view( + override_values: Optional[Dict[str, str]] = None +) -> "bigquery.Table": + if override_values is None: + override_values = {} + # [START bigquery_update_materialized_view] import datetime from google.cloud import bigquery @@ -69,7 +85,10 @@ def update_materialized_view(override_values={}): return view -def delete_materialized_view(override_values={}): +def delete_materialized_view(override_values: Optional[Dict[str, str]] = None) -> None: + if override_values is None: + override_values = {} + # [START bigquery_delete_materialized_view] from google.cloud import bigquery diff --git a/packages/google-cloud-bigquery/samples/snippets/materialized_view_test.py b/packages/google-cloud-bigquery/samples/snippets/materialized_view_test.py index 75c6b2106f39..70869346ff03 100644 --- a/packages/google-cloud-bigquery/samples/snippets/materialized_view_test.py +++ b/packages/google-cloud-bigquery/samples/snippets/materialized_view_test.py @@ -13,6 +13,7 @@ # limitations under the License. import datetime +from typing import Iterator import uuid from google.api_core import exceptions @@ -22,18 +23,20 @@ import materialized_view -def temp_suffix(): +def temp_suffix() -> str: now = datetime.datetime.now() return f"{now.strftime('%Y%m%d%H%M%S')}_{uuid.uuid4().hex[:8]}" @pytest.fixture(autouse=True) -def bigquery_client_patch(monkeypatch, bigquery_client): +def bigquery_client_patch( + monkeypatch: pytest.MonkeyPatch, bigquery_client: bigquery.Client +) -> None: monkeypatch.setattr(bigquery, "Client", lambda: bigquery_client) @pytest.fixture(scope="module") -def dataset_id(bigquery_client): +def dataset_id(bigquery_client: bigquery.Client) -> Iterator[str]: dataset_id = f"mvdataset_{temp_suffix()}" bigquery_client.create_dataset(dataset_id) yield dataset_id @@ -41,7 +44,9 @@ def dataset_id(bigquery_client): @pytest.fixture(scope="module") -def base_table_id(bigquery_client, project_id, dataset_id): +def base_table_id( + bigquery_client: bigquery.Client, project_id: str, dataset_id: str +) -> Iterator[str]: base_table_id = f"{project_id}.{dataset_id}.base_{temp_suffix()}" # Schema from materialized views guide: # https://cloud.google.com/bigquery/docs/materialized-views#create @@ -56,13 +61,20 @@ def base_table_id(bigquery_client, project_id, dataset_id): @pytest.fixture(scope="module") -def view_id(bigquery_client, project_id, dataset_id): +def view_id( + bigquery_client: bigquery.Client, project_id: str, dataset_id: str +) -> Iterator[str]: view_id = f"{project_id}.{dataset_id}.mview_{temp_suffix()}" yield view_id bigquery_client.delete_table(view_id, not_found_ok=True) -def test_materialized_view(capsys, bigquery_client, base_table_id, view_id): +def test_materialized_view( + capsys: pytest.CaptureFixture[str], + bigquery_client: bigquery.Client, + base_table_id: str, + view_id: str, +) -> None: override_values = { "base_table_id": base_table_id, "view_id": view_id, diff --git a/packages/google-cloud-bigquery/samples/snippets/mypy.ini b/packages/google-cloud-bigquery/samples/snippets/mypy.ini new file mode 100644 index 000000000000..3cc4b8965a68 --- /dev/null +++ b/packages/google-cloud-bigquery/samples/snippets/mypy.ini @@ -0,0 +1,8 @@ +[mypy] +; We require type annotations in all samples. +strict = True +exclude = noxfile\.py +warn_unused_configs = True + +[mypy-google.auth,google.oauth2,google_auth_oauthlib,IPython.*,test_utils.*] +ignore_missing_imports = True diff --git a/packages/google-cloud-bigquery/samples/snippets/natality_tutorial.py b/packages/google-cloud-bigquery/samples/snippets/natality_tutorial.py index ed08b279a93c..b330a3c211ab 100644 --- a/packages/google-cloud-bigquery/samples/snippets/natality_tutorial.py +++ b/packages/google-cloud-bigquery/samples/snippets/natality_tutorial.py @@ -14,8 +14,13 @@ # See the License for the specific language governing permissions and # limitations under the License. +from typing import Dict, Optional + + +def run_natality_tutorial(override_values: Optional[Dict[str, str]] = None) -> None: + if override_values is None: + override_values = {} -def run_natality_tutorial(override_values={}): # [START bigquery_query_natality_tutorial] """Create a Google BigQuery linear regression input table. diff --git a/packages/google-cloud-bigquery/samples/snippets/natality_tutorial_test.py b/packages/google-cloud-bigquery/samples/snippets/natality_tutorial_test.py index d9c89bef25d3..f5673852809d 100644 --- a/packages/google-cloud-bigquery/samples/snippets/natality_tutorial_test.py +++ b/packages/google-cloud-bigquery/samples/snippets/natality_tutorial_test.py @@ -12,6 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. +from typing import Iterator, List import uuid from google.cloud import bigquery @@ -21,19 +22,21 @@ @pytest.fixture(scope="module") -def client(): +def client() -> bigquery.Client: return bigquery.Client() @pytest.fixture -def datasets_to_delete(client): - doomed = [] +def datasets_to_delete(client: bigquery.Client) -> Iterator[List[str]]: + doomed: List[str] = [] yield doomed for item in doomed: client.delete_dataset(item, delete_contents=True) -def test_natality_tutorial(client, datasets_to_delete): +def test_natality_tutorial( + client: bigquery.Client, datasets_to_delete: List[str] +) -> None: override_values = { "dataset_id": "natality_regression_{}".format( str(uuid.uuid4()).replace("-", "_") diff --git a/packages/google-cloud-bigquery/samples/snippets/quickstart.py b/packages/google-cloud-bigquery/samples/snippets/quickstart.py index 1b0ef5b3ad85..f9628da7d4ae 100644 --- a/packages/google-cloud-bigquery/samples/snippets/quickstart.py +++ b/packages/google-cloud-bigquery/samples/snippets/quickstart.py @@ -14,8 +14,14 @@ # See the License for the specific language governing permissions and # limitations under the License. +from typing import Dict, Optional + + +def run_quickstart(override_values: Optional[Dict[str, str]] = None) -> None: + + if override_values is None: + override_values = {} -def run_quickstart(override_values={}): # [START bigquery_quickstart] # Imports the Google Cloud client library from google.cloud import bigquery diff --git a/packages/google-cloud-bigquery/samples/snippets/quickstart_test.py b/packages/google-cloud-bigquery/samples/snippets/quickstart_test.py index a5e3a13e3c81..b0bad5ee5356 100644 --- a/packages/google-cloud-bigquery/samples/snippets/quickstart_test.py +++ b/packages/google-cloud-bigquery/samples/snippets/quickstart_test.py @@ -12,6 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. +from typing import Iterator, List import uuid from google.cloud import bigquery @@ -26,19 +27,23 @@ @pytest.fixture(scope="module") -def client(): +def client() -> bigquery.Client: return bigquery.Client() @pytest.fixture -def datasets_to_delete(client): - doomed = [] +def datasets_to_delete(client: bigquery.Client) -> Iterator[List[str]]: + doomed: List[str] = [] yield doomed for item in doomed: client.delete_dataset(item, delete_contents=True) -def test_quickstart(capsys, client, datasets_to_delete): +def test_quickstart( + capsys: "pytest.CaptureFixture[str]", + client: bigquery.Client, + datasets_to_delete: List[str], +) -> None: override_values = { "dataset_id": "my_new_dataset_{}".format(str(uuid.uuid4()).replace("-", "_")), diff --git a/packages/google-cloud-bigquery/samples/snippets/requirements.txt b/packages/google-cloud-bigquery/samples/snippets/requirements.txt index f047c46b6719..5c54ecd839b4 100644 --- a/packages/google-cloud-bigquery/samples/snippets/requirements.txt +++ b/packages/google-cloud-bigquery/samples/snippets/requirements.txt @@ -1,3 +1,4 @@ +db-dtypes==0.4.0 google-cloud-bigquery-storage==2.12.0 google-auth-oauthlib==0.5.0 grpcio==1.44.0 @@ -9,3 +10,4 @@ pandas==1.3.5; python_version == '3.7' pandas==1.4.1; python_version >= '3.8' pyarrow==7.0.0 pytz==2021.3 +typing-extensions==3.10.0.2 diff --git a/packages/google-cloud-bigquery/samples/snippets/revoke_dataset_access.py b/packages/google-cloud-bigquery/samples/snippets/revoke_dataset_access.py index ce78f5750316..c8cb731ac497 100644 --- a/packages/google-cloud-bigquery/samples/snippets/revoke_dataset_access.py +++ b/packages/google-cloud-bigquery/samples/snippets/revoke_dataset_access.py @@ -13,7 +13,7 @@ # limitations under the License. -def revoke_dataset_access(dataset_id: str, entity_id: str): +def revoke_dataset_access(dataset_id: str, entity_id: str) -> None: original_dataset_id = dataset_id original_entity_id = entity_id diff --git a/packages/google-cloud-bigquery/samples/snippets/simple_app.py b/packages/google-cloud-bigquery/samples/snippets/simple_app.py index c21ae86f4a94..3d856d4bb5a0 100644 --- a/packages/google-cloud-bigquery/samples/snippets/simple_app.py +++ b/packages/google-cloud-bigquery/samples/snippets/simple_app.py @@ -22,7 +22,7 @@ # [END bigquery_simple_app_deps] -def query_stackoverflow(): +def query_stackoverflow() -> None: # [START bigquery_simple_app_client] client = bigquery.Client() # [END bigquery_simple_app_client] diff --git a/packages/google-cloud-bigquery/samples/snippets/simple_app_test.py b/packages/google-cloud-bigquery/samples/snippets/simple_app_test.py index 5c608e1fdc53..de4e1ce34409 100644 --- a/packages/google-cloud-bigquery/samples/snippets/simple_app_test.py +++ b/packages/google-cloud-bigquery/samples/snippets/simple_app_test.py @@ -12,10 +12,15 @@ # See the License for the specific language governing permissions and # limitations under the License. +import typing + import simple_app +if typing.TYPE_CHECKING: + import pytest + -def test_query_stackoverflow(capsys): +def test_query_stackoverflow(capsys: "pytest.CaptureFixture[str]") -> None: simple_app.query_stackoverflow() out, _ = capsys.readouterr() assert "views" in out diff --git a/packages/google-cloud-bigquery/samples/snippets/test_update_with_dml.py b/packages/google-cloud-bigquery/samples/snippets/test_update_with_dml.py index 912fd76e24ee..ef5ec196ac84 100644 --- a/packages/google-cloud-bigquery/samples/snippets/test_update_with_dml.py +++ b/packages/google-cloud-bigquery/samples/snippets/test_update_with_dml.py @@ -12,6 +12,8 @@ # See the License for the specific language governing permissions and # limitations under the License. +from typing import Iterator + from google.cloud import bigquery import pytest @@ -20,14 +22,18 @@ @pytest.fixture -def table_id(bigquery_client: bigquery.Client, project_id: str, dataset_id: str): +def table_id( + bigquery_client: bigquery.Client, project_id: str, dataset_id: str +) -> Iterator[str]: table_id = f"{prefixer.create_prefix()}_update_with_dml" yield table_id full_table_id = f"{project_id}.{dataset_id}.{table_id}" bigquery_client.delete_table(full_table_id, not_found_ok=True) -def test_update_with_dml(bigquery_client_patch, dataset_id, table_id): +def test_update_with_dml( + bigquery_client_patch: None, dataset_id: str, table_id: str +) -> None: override_values = { "dataset_id": dataset_id, "table_id": table_id, diff --git a/packages/google-cloud-bigquery/samples/snippets/update_dataset_access.py b/packages/google-cloud-bigquery/samples/snippets/update_dataset_access.py index a606a2d56a83..7b3293ea5570 100644 --- a/packages/google-cloud-bigquery/samples/snippets/update_dataset_access.py +++ b/packages/google-cloud-bigquery/samples/snippets/update_dataset_access.py @@ -13,7 +13,7 @@ # limitations under the License. -def update_dataset_access(dataset_id: str, entity_id: str): +def update_dataset_access(dataset_id: str, entity_id: str) -> None: original_dataset_id = dataset_id original_entity_id = entity_id diff --git a/packages/google-cloud-bigquery/samples/snippets/update_with_dml.py b/packages/google-cloud-bigquery/samples/snippets/update_with_dml.py index 7fd09dd80c13..2d0294eadec4 100644 --- a/packages/google-cloud-bigquery/samples/snippets/update_with_dml.py +++ b/packages/google-cloud-bigquery/samples/snippets/update_with_dml.py @@ -14,6 +14,7 @@ # [START bigquery_update_with_dml] import pathlib +from typing import Dict, Optional from google.cloud import bigquery from google.cloud.bigquery import enums @@ -25,7 +26,7 @@ def load_from_newline_delimited_json( project_id: str, dataset_id: str, table_id: str, -): +) -> None: full_table_id = f"{project_id}.{dataset_id}.{table_id}" job_config = bigquery.LoadJobConfig() job_config.source_format = enums.SourceFormat.NEWLINE_DELIMITED_JSON @@ -48,7 +49,7 @@ def load_from_newline_delimited_json( def update_with_dml( client: bigquery.Client, project_id: str, dataset_id: str, table_id: str -): +) -> int: query_text = f""" UPDATE `{project_id}.{dataset_id}.{table_id}` SET ip_address = REGEXP_REPLACE(ip_address, r"(\\.[0-9]+)$", ".0") @@ -59,11 +60,16 @@ def update_with_dml( # Wait for query job to finish. query_job.result() + assert query_job.num_dml_affected_rows is not None + print(f"DML query modified {query_job.num_dml_affected_rows} rows.") return query_job.num_dml_affected_rows -def run_sample(override_values={}): +def run_sample(override_values: Optional[Dict[str, str]] = None) -> int: + if override_values is None: + override_values = {} + client = bigquery.Client() filepath = pathlib.Path(__file__).parent / "user_sessions_data.json" project_id = client.project diff --git a/packages/google-cloud-bigquery/samples/snippets/user_credentials.py b/packages/google-cloud-bigquery/samples/snippets/user_credentials.py index e8dccf143cb0..487a56c5ff9c 100644 --- a/packages/google-cloud-bigquery/samples/snippets/user_credentials.py +++ b/packages/google-cloud-bigquery/samples/snippets/user_credentials.py @@ -23,7 +23,7 @@ import argparse -def main(project): +def main(project: str) -> None: # [START bigquery_auth_user_flow] from google_auth_oauthlib import flow @@ -73,13 +73,6 @@ def main(project): parser = argparse.ArgumentParser( description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter ) - parser.add_argument( - "--launch-browser", - help="Use a local server flow to authenticate. ", - action="store_true", - ) parser.add_argument("project", help="Project to use for BigQuery billing.") - args = parser.parse_args() - - main(args.project, launch_browser=args.launch_browser) + main(args.project) diff --git a/packages/google-cloud-bigquery/samples/snippets/user_credentials_test.py b/packages/google-cloud-bigquery/samples/snippets/user_credentials_test.py index 66c1bddb7f2d..e2794e83b681 100644 --- a/packages/google-cloud-bigquery/samples/snippets/user_credentials_test.py +++ b/packages/google-cloud-bigquery/samples/snippets/user_credentials_test.py @@ -13,6 +13,7 @@ # limitations under the License. import os +from typing import Iterator, Union import google.auth import mock @@ -23,9 +24,11 @@ PROJECT = os.environ["GOOGLE_CLOUD_PROJECT"] +MockType = Union[mock.mock.MagicMock, mock.mock.AsyncMock] + @pytest.fixture -def mock_flow(): +def mock_flow() -> Iterator[MockType]: flow_patch = mock.patch("google_auth_oauthlib.flow.InstalledAppFlow", autospec=True) with flow_patch as flow_mock: @@ -34,7 +37,9 @@ def mock_flow(): yield flow_mock -def test_auth_query_console(mock_flow, capsys): +def test_auth_query_console( + mock_flow: MockType, capsys: pytest.CaptureFixture[str] +) -> None: main(PROJECT) out, _ = capsys.readouterr() # Fun fact: William P. Wood was the 1st director of the US Secret Service. diff --git a/packages/google-cloud-bigquery/samples/snippets/view.py b/packages/google-cloud-bigquery/samples/snippets/view.py index ad3f117177cb..5e976f68a2b8 100644 --- a/packages/google-cloud-bigquery/samples/snippets/view.py +++ b/packages/google-cloud-bigquery/samples/snippets/view.py @@ -12,8 +12,31 @@ # See the License for the specific language governing permissions and # limitations under the License. +import typing +from typing import Dict, Optional, Tuple + +try: + from typing import TypedDict +except ImportError: + from typing_extensions import TypedDict + +if typing.TYPE_CHECKING: + from google.cloud import bigquery + + +class OverridesDict(TypedDict, total=False): + analyst_group_email: str + view_dataset_id: str + view_id: str + view_reference: Dict[str, str] + source_dataset_id: str + source_id: str + + +def create_view(override_values: Optional[Dict[str, str]] = None) -> "bigquery.Table": + if override_values is None: + override_values = {} -def create_view(override_values={}): # [START bigquery_create_view] from google.cloud import bigquery @@ -43,7 +66,10 @@ def create_view(override_values={}): return view -def get_view(override_values={}): +def get_view(override_values: Optional[Dict[str, str]] = None) -> "bigquery.Table": + if override_values is None: + override_values = {} + # [START bigquery_get_view] from google.cloud import bigquery @@ -65,7 +91,10 @@ def get_view(override_values={}): return view -def update_view(override_values={}): +def update_view(override_values: Optional[Dict[str, str]] = None) -> "bigquery.Table": + if override_values is None: + override_values = {} + # [START bigquery_update_view_query] from google.cloud import bigquery @@ -95,7 +124,13 @@ def update_view(override_values={}): return view -def grant_access(override_values={}): +def grant_access( + override_values: Optional[OverridesDict] = None, +) -> Tuple["bigquery.Dataset", "bigquery.Dataset"]: + + if override_values is None: + override_values = {} + # [START bigquery_grant_view_access] from google.cloud import bigquery diff --git a/packages/google-cloud-bigquery/samples/snippets/view_test.py b/packages/google-cloud-bigquery/samples/snippets/view_test.py index 77105b61ae6f..4d0d43b77507 100644 --- a/packages/google-cloud-bigquery/samples/snippets/view_test.py +++ b/packages/google-cloud-bigquery/samples/snippets/view_test.py @@ -13,6 +13,7 @@ # limitations under the License. import datetime +from typing import Iterator import uuid from google.cloud import bigquery @@ -21,18 +22,20 @@ import view -def temp_suffix(): +def temp_suffix() -> str: now = datetime.datetime.now() return f"{now.strftime('%Y%m%d%H%M%S')}_{uuid.uuid4().hex[:8]}" @pytest.fixture(autouse=True) -def bigquery_client_patch(monkeypatch, bigquery_client): +def bigquery_client_patch( + monkeypatch: pytest.MonkeyPatch, bigquery_client: bigquery.Client +) -> None: monkeypatch.setattr(bigquery, "Client", lambda: bigquery_client) @pytest.fixture(scope="module") -def view_dataset_id(bigquery_client, project_id): +def view_dataset_id(bigquery_client: bigquery.Client, project_id: str) -> Iterator[str]: dataset_id = f"{project_id}.view_{temp_suffix()}" bigquery_client.create_dataset(dataset_id) yield dataset_id @@ -40,14 +43,16 @@ def view_dataset_id(bigquery_client, project_id): @pytest.fixture(scope="module") -def view_id(bigquery_client, view_dataset_id): +def view_id(bigquery_client: bigquery.Client, view_dataset_id: str) -> Iterator[str]: view_id = f"{view_dataset_id}.my_view" yield view_id bigquery_client.delete_table(view_id, not_found_ok=True) @pytest.fixture(scope="module") -def source_dataset_id(bigquery_client, project_id): +def source_dataset_id( + bigquery_client: bigquery.Client, project_id: str +) -> Iterator[str]: dataset_id = f"{project_id}.view_{temp_suffix()}" bigquery_client.create_dataset(dataset_id) yield dataset_id @@ -55,7 +60,9 @@ def source_dataset_id(bigquery_client, project_id): @pytest.fixture(scope="module") -def source_table_id(bigquery_client, source_dataset_id): +def source_table_id( + bigquery_client: bigquery.Client, source_dataset_id: str +) -> Iterator[str]: source_table_id = f"{source_dataset_id}.us_states" job_config = bigquery.LoadJobConfig( schema=[ @@ -74,7 +81,13 @@ def source_table_id(bigquery_client, source_dataset_id): bigquery_client.delete_table(source_table_id, not_found_ok=True) -def test_view(capsys, view_id, view_dataset_id, source_table_id, source_dataset_id): +def test_view( + capsys: pytest.CaptureFixture[str], + view_id: str, + view_dataset_id: str, + source_table_id: str, + source_dataset_id: str, +) -> None: override_values = { "view_id": view_id, "source_id": source_table_id, @@ -99,7 +112,7 @@ def test_view(capsys, view_id, view_dataset_id, source_table_id, source_dataset_ assert view_id in out project_id, dataset_id, table_id = view_id.split(".") - override_values = { + overrides: view.OverridesDict = { "analyst_group_email": "cloud-dpes-bigquery@google.com", "view_dataset_id": view_dataset_id, "source_dataset_id": source_dataset_id, @@ -109,7 +122,7 @@ def test_view(capsys, view_id, view_dataset_id, source_table_id, source_dataset_ "tableId": table_id, }, } - view_dataset, source_dataset = view.grant_access(override_values) + view_dataset, source_dataset = view.grant_access(overrides) assert len(view_dataset.access_entries) != 0 assert len(source_dataset.access_entries) != 0 out, _ = capsys.readouterr() diff --git a/packages/google-cloud-bigquery/samples/table_exists.py b/packages/google-cloud-bigquery/samples/table_exists.py index 152d95534add..6edba9239012 100644 --- a/packages/google-cloud-bigquery/samples/table_exists.py +++ b/packages/google-cloud-bigquery/samples/table_exists.py @@ -13,7 +13,7 @@ # limitations under the License. -def table_exists(table_id): +def table_exists(table_id: str) -> None: # [START bigquery_table_exists] from google.cloud import bigquery diff --git a/packages/google-cloud-bigquery/samples/table_insert_rows.py b/packages/google-cloud-bigquery/samples/table_insert_rows.py index 80048b411d86..8aa723fe040c 100644 --- a/packages/google-cloud-bigquery/samples/table_insert_rows.py +++ b/packages/google-cloud-bigquery/samples/table_insert_rows.py @@ -13,7 +13,7 @@ # limitations under the License. -def table_insert_rows(table_id): +def table_insert_rows(table_id: str) -> None: # [START bigquery_table_insert_rows] from google.cloud import bigquery diff --git a/packages/google-cloud-bigquery/samples/table_insert_rows_explicit_none_insert_ids.py b/packages/google-cloud-bigquery/samples/table_insert_rows_explicit_none_insert_ids.py index 202064bda598..b2bd06372264 100644 --- a/packages/google-cloud-bigquery/samples/table_insert_rows_explicit_none_insert_ids.py +++ b/packages/google-cloud-bigquery/samples/table_insert_rows_explicit_none_insert_ids.py @@ -13,7 +13,7 @@ # limitations under the License. -def table_insert_rows_explicit_none_insert_ids(table_id): +def table_insert_rows_explicit_none_insert_ids(table_id: str) -> None: # [START bigquery_table_insert_rows_explicit_none_insert_ids] from google.cloud import bigquery diff --git a/packages/google-cloud-bigquery/samples/tests/conftest.py b/packages/google-cloud-bigquery/samples/tests/conftest.py index 0fdacaaec6e0..b7a2ad5874dc 100644 --- a/packages/google-cloud-bigquery/samples/tests/conftest.py +++ b/packages/google-cloud-bigquery/samples/tests/conftest.py @@ -13,6 +13,7 @@ # limitations under the License. import datetime +from typing import Iterator import uuid import google.auth @@ -20,11 +21,10 @@ import pytest from google.cloud import bigquery -from google.cloud import bigquery_v2 @pytest.fixture(scope="session", autouse=True) -def client(): +def client() -> bigquery.Client: credentials, project = google.auth.default( scopes=[ "https://www.googleapis.com/auth/drive", @@ -34,12 +34,12 @@ def client(): real_client = bigquery.Client(credentials=credentials, project=project) mock_client = mock.create_autospec(bigquery.Client) mock_client.return_value = real_client - bigquery.Client = mock_client + bigquery.Client = mock_client # type: ignore return real_client @pytest.fixture -def random_table_id(dataset_id): +def random_table_id(dataset_id: str) -> str: now = datetime.datetime.now() random_table_id = "example_table_{}_{}".format( now.strftime("%Y%m%d%H%M%S"), uuid.uuid4().hex[:8] @@ -48,7 +48,7 @@ def random_table_id(dataset_id): @pytest.fixture -def random_dataset_id(client): +def random_dataset_id(client: bigquery.Client) -> Iterator[str]: now = datetime.datetime.now() random_dataset_id = "example_dataset_{}_{}".format( now.strftime("%Y%m%d%H%M%S"), uuid.uuid4().hex[:8] @@ -58,7 +58,7 @@ def random_dataset_id(client): @pytest.fixture -def random_routine_id(dataset_id): +def random_routine_id(dataset_id: str) -> str: now = datetime.datetime.now() random_routine_id = "example_routine_{}_{}".format( now.strftime("%Y%m%d%H%M%S"), uuid.uuid4().hex[:8] @@ -67,7 +67,7 @@ def random_routine_id(dataset_id): @pytest.fixture -def dataset_id(client): +def dataset_id(client: bigquery.Client) -> Iterator[str]: now = datetime.datetime.now() dataset_id = "python_dataset_sample_{}_{}".format( now.strftime("%Y%m%d%H%M%S"), uuid.uuid4().hex[:8] @@ -78,7 +78,7 @@ def dataset_id(client): @pytest.fixture -def table_id(client, dataset_id): +def table_id(client: bigquery.Client, dataset_id: str) -> Iterator[str]: now = datetime.datetime.now() table_id = "python_table_sample_{}_{}".format( now.strftime("%Y%m%d%H%M%S"), uuid.uuid4().hex[:8] @@ -91,7 +91,7 @@ def table_id(client, dataset_id): @pytest.fixture -def table_with_schema_id(client, dataset_id): +def table_with_schema_id(client: bigquery.Client, dataset_id: str) -> Iterator[str]: now = datetime.datetime.now() table_id = "python_table_with_schema_{}_{}".format( now.strftime("%Y%m%d%H%M%S"), uuid.uuid4().hex[:8] @@ -107,12 +107,12 @@ def table_with_schema_id(client, dataset_id): @pytest.fixture -def table_with_data_id(): +def table_with_data_id() -> str: return "bigquery-public-data.samples.shakespeare" @pytest.fixture -def routine_id(client, dataset_id): +def routine_id(client: bigquery.Client, dataset_id: str) -> Iterator[str]: now = datetime.datetime.now() routine_id = "python_routine_sample_{}_{}".format( now.strftime("%Y%m%d%H%M%S"), uuid.uuid4().hex[:8] @@ -125,8 +125,8 @@ def routine_id(client, dataset_id): routine.arguments = [ bigquery.RoutineArgument( name="x", - data_type=bigquery_v2.types.StandardSqlDataType( - type_kind=bigquery_v2.types.StandardSqlDataType.TypeKind.INT64 + data_type=bigquery.StandardSqlDataType( + type_kind=bigquery.StandardSqlTypeNames.INT64 ), ) ] @@ -137,7 +137,7 @@ def routine_id(client, dataset_id): @pytest.fixture -def model_id(client, dataset_id): +def model_id(client: bigquery.Client, dataset_id: str) -> str: model_id = "{}.{}".format(dataset_id, uuid.uuid4().hex) # The only way to create a model resource is via SQL. @@ -163,5 +163,5 @@ def model_id(client, dataset_id): @pytest.fixture -def kms_key_name(): +def kms_key_name() -> str: return "projects/cloud-samples-tests/locations/us/keyRings/test/cryptoKeys/test" diff --git a/packages/google-cloud-bigquery/samples/tests/test_add_empty_column.py b/packages/google-cloud-bigquery/samples/tests/test_add_empty_column.py index d89fcb6b7022..5c7184766eca 100644 --- a/packages/google-cloud-bigquery/samples/tests/test_add_empty_column.py +++ b/packages/google-cloud-bigquery/samples/tests/test_add_empty_column.py @@ -12,10 +12,15 @@ # See the License for the specific language governing permissions and # limitations under the License. +import typing + from .. import add_empty_column +if typing.TYPE_CHECKING: + import pytest + -def test_add_empty_column(capsys, table_id): +def test_add_empty_column(capsys: "pytest.CaptureFixture[str]", table_id: str) -> None: add_empty_column.add_empty_column(table_id) out, err = capsys.readouterr() diff --git a/packages/google-cloud-bigquery/samples/tests/test_browse_table_data.py b/packages/google-cloud-bigquery/samples/tests/test_browse_table_data.py index a5f647bdbda2..368e5cad6c8f 100644 --- a/packages/google-cloud-bigquery/samples/tests/test_browse_table_data.py +++ b/packages/google-cloud-bigquery/samples/tests/test_browse_table_data.py @@ -12,10 +12,17 @@ # See the License for the specific language governing permissions and # limitations under the License. +import typing + from .. import browse_table_data +if typing.TYPE_CHECKING: + import pytest + -def test_browse_table_data(capsys, table_with_data_id): +def test_browse_table_data( + capsys: "pytest.CaptureFixture[str]", table_with_data_id: str +) -> None: browse_table_data.browse_table_data(table_with_data_id) out, err = capsys.readouterr() diff --git a/packages/google-cloud-bigquery/samples/tests/test_client_list_jobs.py b/packages/google-cloud-bigquery/samples/tests/test_client_list_jobs.py index 896950a8253f..a2845b7ad478 100644 --- a/packages/google-cloud-bigquery/samples/tests/test_client_list_jobs.py +++ b/packages/google-cloud-bigquery/samples/tests/test_client_list_jobs.py @@ -12,11 +12,19 @@ # See the License for the specific language governing permissions and # limitations under the License. +import typing + from .. import client_list_jobs from .. import create_job +if typing.TYPE_CHECKING: + from google.cloud import bigquery + import pytest + -def test_client_list_jobs(capsys, client): +def test_client_list_jobs( + capsys: "pytest.CaptureFixture[str]", client: "bigquery.Client" +) -> None: job = create_job.create_job() client.cancel_job(job.job_id) diff --git a/packages/google-cloud-bigquery/samples/tests/test_client_load_partitioned_table.py b/packages/google-cloud-bigquery/samples/tests/test_client_load_partitioned_table.py index f1d72a8587c6..24f86c700de2 100644 --- a/packages/google-cloud-bigquery/samples/tests/test_client_load_partitioned_table.py +++ b/packages/google-cloud-bigquery/samples/tests/test_client_load_partitioned_table.py @@ -12,10 +12,17 @@ # See the License for the specific language governing permissions and # limitations under the License. +import typing + from .. import client_load_partitioned_table +if typing.TYPE_CHECKING: + import pytest + -def test_client_load_partitioned_table(capsys, random_table_id): +def test_client_load_partitioned_table( + capsys: "pytest.CaptureFixture[str]", random_table_id: str +) -> None: client_load_partitioned_table.client_load_partitioned_table(random_table_id) out, err = capsys.readouterr() diff --git a/packages/google-cloud-bigquery/samples/tests/test_client_query.py b/packages/google-cloud-bigquery/samples/tests/test_client_query.py index 673ed2b668e1..a8e3c343e202 100644 --- a/packages/google-cloud-bigquery/samples/tests/test_client_query.py +++ b/packages/google-cloud-bigquery/samples/tests/test_client_query.py @@ -12,12 +12,15 @@ # See the License for the specific language governing permissions and # limitations under the License. +import typing + from .. import client_query +if typing.TYPE_CHECKING: + import pytest + -def test_client_query( - capsys, -): +def test_client_query(capsys: "pytest.CaptureFixture[str]") -> None: client_query.client_query() out, err = capsys.readouterr() diff --git a/packages/google-cloud-bigquery/samples/tests/test_client_query_add_column.py b/packages/google-cloud-bigquery/samples/tests/test_client_query_add_column.py index 254533f78778..1eb5a1ed65eb 100644 --- a/packages/google-cloud-bigquery/samples/tests/test_client_query_add_column.py +++ b/packages/google-cloud-bigquery/samples/tests/test_client_query_add_column.py @@ -12,12 +12,19 @@ # See the License for the specific language governing permissions and # limitations under the License. +import typing + from google.cloud import bigquery from .. import client_query_add_column +if typing.TYPE_CHECKING: + import pytest + -def test_client_query_add_column(capsys, random_table_id, client): +def test_client_query_add_column( + capsys: "pytest.CaptureFixture[str]", random_table_id: str, client: bigquery.Client +) -> None: schema = [ bigquery.SchemaField("full_name", "STRING", mode="REQUIRED"), diff --git a/packages/google-cloud-bigquery/samples/tests/test_client_query_batch.py b/packages/google-cloud-bigquery/samples/tests/test_client_query_batch.py index 3335950ade5a..548fe3ac31bc 100644 --- a/packages/google-cloud-bigquery/samples/tests/test_client_query_batch.py +++ b/packages/google-cloud-bigquery/samples/tests/test_client_query_batch.py @@ -12,12 +12,15 @@ # See the License for the specific language governing permissions and # limitations under the License. +import typing + from .. import client_query_batch +if typing.TYPE_CHECKING: + import pytest + -def test_client_query_batch( - capsys, -): +def test_client_query_batch(capsys: "pytest.CaptureFixture[str]") -> None: job = client_query_batch.client_query_batch() out, err = capsys.readouterr() diff --git a/packages/google-cloud-bigquery/samples/tests/test_client_query_destination_table.py b/packages/google-cloud-bigquery/samples/tests/test_client_query_destination_table.py index 6bcdd498a215..067bc16ec380 100644 --- a/packages/google-cloud-bigquery/samples/tests/test_client_query_destination_table.py +++ b/packages/google-cloud-bigquery/samples/tests/test_client_query_destination_table.py @@ -12,10 +12,17 @@ # See the License for the specific language governing permissions and # limitations under the License. +import typing + from .. import client_query_destination_table +if typing.TYPE_CHECKING: + import pytest + -def test_client_query_destination_table(capsys, table_id): +def test_client_query_destination_table( + capsys: "pytest.CaptureFixture[str]", table_id: str +) -> None: client_query_destination_table.client_query_destination_table(table_id) out, err = capsys.readouterr() diff --git a/packages/google-cloud-bigquery/samples/tests/test_client_query_destination_table_clustered.py b/packages/google-cloud-bigquery/samples/tests/test_client_query_destination_table_clustered.py index b4bdd588c57e..02b1315319c5 100644 --- a/packages/google-cloud-bigquery/samples/tests/test_client_query_destination_table_clustered.py +++ b/packages/google-cloud-bigquery/samples/tests/test_client_query_destination_table_clustered.py @@ -12,10 +12,17 @@ # See the License for the specific language governing permissions and # limitations under the License. +import typing + from .. import client_query_destination_table_clustered +if typing.TYPE_CHECKING: + import pytest + -def test_client_query_destination_table_clustered(capsys, random_table_id): +def test_client_query_destination_table_clustered( + capsys: "pytest.CaptureFixture[str]", random_table_id: str +) -> None: client_query_destination_table_clustered.client_query_destination_table_clustered( random_table_id diff --git a/packages/google-cloud-bigquery/samples/tests/test_client_query_destination_table_cmek.py b/packages/google-cloud-bigquery/samples/tests/test_client_query_destination_table_cmek.py index 4f9e3bc9a944..f2fe3bc39699 100644 --- a/packages/google-cloud-bigquery/samples/tests/test_client_query_destination_table_cmek.py +++ b/packages/google-cloud-bigquery/samples/tests/test_client_query_destination_table_cmek.py @@ -12,10 +12,17 @@ # See the License for the specific language governing permissions and # limitations under the License. +import typing + from .. import client_query_destination_table_cmek +if typing.TYPE_CHECKING: + import pytest + -def test_client_query_destination_table_cmek(capsys, random_table_id, kms_key_name): +def test_client_query_destination_table_cmek( + capsys: "pytest.CaptureFixture[str]", random_table_id: str, kms_key_name: str +) -> None: client_query_destination_table_cmek.client_query_destination_table_cmek( random_table_id, kms_key_name diff --git a/packages/google-cloud-bigquery/samples/tests/test_client_query_destination_table_legacy.py b/packages/google-cloud-bigquery/samples/tests/test_client_query_destination_table_legacy.py index 46077497b1c7..0071ee4a4681 100644 --- a/packages/google-cloud-bigquery/samples/tests/test_client_query_destination_table_legacy.py +++ b/packages/google-cloud-bigquery/samples/tests/test_client_query_destination_table_legacy.py @@ -12,10 +12,17 @@ # See the License for the specific language governing permissions and # limitations under the License. +import typing + from .. import client_query_destination_table_legacy +if typing.TYPE_CHECKING: + import pytest + -def test_client_query_destination_table_legacy(capsys, random_table_id): +def test_client_query_destination_table_legacy( + capsys: "pytest.CaptureFixture[str]", random_table_id: str +) -> None: client_query_destination_table_legacy.client_query_destination_table_legacy( random_table_id diff --git a/packages/google-cloud-bigquery/samples/tests/test_client_query_dry_run.py b/packages/google-cloud-bigquery/samples/tests/test_client_query_dry_run.py index 2141435f253c..cffb152efb3d 100644 --- a/packages/google-cloud-bigquery/samples/tests/test_client_query_dry_run.py +++ b/packages/google-cloud-bigquery/samples/tests/test_client_query_dry_run.py @@ -12,12 +12,15 @@ # See the License for the specific language governing permissions and # limitations under the License. +import typing + from .. import client_query_dry_run +if typing.TYPE_CHECKING: + import pytest + -def test_client_query_dry_run( - capsys, -): +def test_client_query_dry_run(capsys: "pytest.CaptureFixture[str]") -> None: query_job = client_query_dry_run.client_query_dry_run() out, err = capsys.readouterr() diff --git a/packages/google-cloud-bigquery/samples/tests/test_client_query_legacy_sql.py b/packages/google-cloud-bigquery/samples/tests/test_client_query_legacy_sql.py index 9d3f8ab99a55..b12b5a9344b4 100644 --- a/packages/google-cloud-bigquery/samples/tests/test_client_query_legacy_sql.py +++ b/packages/google-cloud-bigquery/samples/tests/test_client_query_legacy_sql.py @@ -13,13 +13,15 @@ # limitations under the License. import re +import typing from .. import client_query_legacy_sql +if typing.TYPE_CHECKING: + import pytest -def test_client_query_legacy_sql( - capsys, -): + +def test_client_query_legacy_sql(capsys: "pytest.CaptureFixture[str]") -> None: client_query_legacy_sql.client_query_legacy_sql() out, err = capsys.readouterr() diff --git a/packages/google-cloud-bigquery/samples/tests/test_client_query_relax_column.py b/packages/google-cloud-bigquery/samples/tests/test_client_query_relax_column.py index 0c5b7aa6f982..93fa0f3cf39c 100644 --- a/packages/google-cloud-bigquery/samples/tests/test_client_query_relax_column.py +++ b/packages/google-cloud-bigquery/samples/tests/test_client_query_relax_column.py @@ -12,12 +12,21 @@ # See the License for the specific language governing permissions and # limitations under the License. +import typing + from google.cloud import bigquery from .. import client_query_relax_column +if typing.TYPE_CHECKING: + import pytest + -def test_client_query_relax_column(capsys, random_table_id, client): +def test_client_query_relax_column( + capsys: "pytest.CaptureFixture[str]", + random_table_id: str, + client: bigquery.Client, +) -> None: schema = [ bigquery.SchemaField("full_name", "STRING", mode="REQUIRED"), diff --git a/packages/google-cloud-bigquery/samples/tests/test_client_query_w_array_params.py b/packages/google-cloud-bigquery/samples/tests/test_client_query_w_array_params.py index 6608ff0a4547..fcd3f69722b4 100644 --- a/packages/google-cloud-bigquery/samples/tests/test_client_query_w_array_params.py +++ b/packages/google-cloud-bigquery/samples/tests/test_client_query_w_array_params.py @@ -12,12 +12,15 @@ # See the License for the specific language governing permissions and # limitations under the License. +import typing + from .. import client_query_w_array_params +if typing.TYPE_CHECKING: + import pytest + -def test_client_query_w_array_params( - capsys, -): +def test_client_query_w_array_params(capsys: "pytest.CaptureFixture[str]") -> None: client_query_w_array_params.client_query_w_array_params() out, err = capsys.readouterr() diff --git a/packages/google-cloud-bigquery/samples/tests/test_client_query_w_named_params.py b/packages/google-cloud-bigquery/samples/tests/test_client_query_w_named_params.py index f53f72fdfa2a..85ef1dc4adda 100644 --- a/packages/google-cloud-bigquery/samples/tests/test_client_query_w_named_params.py +++ b/packages/google-cloud-bigquery/samples/tests/test_client_query_w_named_params.py @@ -12,12 +12,15 @@ # See the License for the specific language governing permissions and # limitations under the License. +import typing + from .. import client_query_w_named_params +if typing.TYPE_CHECKING: + import pytest + -def test_client_query_w_named_params( - capsys, -): +def test_client_query_w_named_params(capsys: "pytest.CaptureFixture[str]") -> None: client_query_w_named_params.client_query_w_named_params() out, err = capsys.readouterr() diff --git a/packages/google-cloud-bigquery/samples/tests/test_client_query_w_positional_params.py b/packages/google-cloud-bigquery/samples/tests/test_client_query_w_positional_params.py index c91b10f211e8..8ade676ab94f 100644 --- a/packages/google-cloud-bigquery/samples/tests/test_client_query_w_positional_params.py +++ b/packages/google-cloud-bigquery/samples/tests/test_client_query_w_positional_params.py @@ -12,12 +12,15 @@ # See the License for the specific language governing permissions and # limitations under the License. +import typing + from .. import client_query_w_positional_params +if typing.TYPE_CHECKING: + import pytest + -def test_client_query_w_positional_params( - capsys, -): +def test_client_query_w_positional_params(capsys: "pytest.CaptureFixture[str]") -> None: client_query_w_positional_params.client_query_w_positional_params() out, err = capsys.readouterr() diff --git a/packages/google-cloud-bigquery/samples/tests/test_client_query_w_struct_params.py b/packages/google-cloud-bigquery/samples/tests/test_client_query_w_struct_params.py index dfb86fb654cd..3198dbad51c4 100644 --- a/packages/google-cloud-bigquery/samples/tests/test_client_query_w_struct_params.py +++ b/packages/google-cloud-bigquery/samples/tests/test_client_query_w_struct_params.py @@ -12,12 +12,15 @@ # See the License for the specific language governing permissions and # limitations under the License. +import typing + from .. import client_query_w_struct_params +if typing.TYPE_CHECKING: + import pytest + -def test_client_query_w_struct_params( - capsys, -): +def test_client_query_w_struct_params(capsys: "pytest.CaptureFixture[str]") -> None: client_query_w_struct_params.client_query_w_struct_params() out, err = capsys.readouterr() diff --git a/packages/google-cloud-bigquery/samples/tests/test_client_query_w_timestamp_params.py b/packages/google-cloud-bigquery/samples/tests/test_client_query_w_timestamp_params.py index 51dfa129613b..a3bbccdd4360 100644 --- a/packages/google-cloud-bigquery/samples/tests/test_client_query_w_timestamp_params.py +++ b/packages/google-cloud-bigquery/samples/tests/test_client_query_w_timestamp_params.py @@ -12,12 +12,15 @@ # See the License for the specific language governing permissions and # limitations under the License. +import typing + from .. import client_query_w_timestamp_params +if typing.TYPE_CHECKING: + import pytest + -def test_client_query_w_timestamp_params( - capsys, -): +def test_client_query_w_timestamp_params(capsys: "pytest.CaptureFixture[str]") -> None: client_query_w_timestamp_params.client_query_w_timestamp_params() out, err = capsys.readouterr() diff --git a/packages/google-cloud-bigquery/samples/tests/test_copy_table.py b/packages/google-cloud-bigquery/samples/tests/test_copy_table.py index 726410e86bb4..d5a6c121e3fc 100644 --- a/packages/google-cloud-bigquery/samples/tests/test_copy_table.py +++ b/packages/google-cloud-bigquery/samples/tests/test_copy_table.py @@ -12,12 +12,22 @@ # See the License for the specific language governing permissions and # limitations under the License. +import typing + import pytest from .. import copy_table +if typing.TYPE_CHECKING: + from google.cloud import bigquery + -def test_copy_table(capsys, table_with_data_id, random_table_id, client): +def test_copy_table( + capsys: "pytest.CaptureFixture[str]", + table_with_data_id: str, + random_table_id: str, + client: "bigquery.Client", +) -> None: pytest.skip("b/210907595: copy fails for shakespeare table") copy_table.copy_table(table_with_data_id, random_table_id) diff --git a/packages/google-cloud-bigquery/samples/tests/test_copy_table_cmek.py b/packages/google-cloud-bigquery/samples/tests/test_copy_table_cmek.py index 63163d563564..1bdec2f35023 100644 --- a/packages/google-cloud-bigquery/samples/tests/test_copy_table_cmek.py +++ b/packages/google-cloud-bigquery/samples/tests/test_copy_table_cmek.py @@ -17,7 +17,12 @@ from .. import copy_table_cmek -def test_copy_table_cmek(capsys, random_table_id, table_with_data_id, kms_key_name): +def test_copy_table_cmek( + capsys: "pytest.CaptureFixture[str]", + random_table_id: str, + table_with_data_id: str, + kms_key_name: str, +) -> None: pytest.skip("b/210907595: copy fails for shakespeare table") copy_table_cmek.copy_table_cmek(random_table_id, table_with_data_id, kms_key_name) diff --git a/packages/google-cloud-bigquery/samples/tests/test_copy_table_multiple_source.py b/packages/google-cloud-bigquery/samples/tests/test_copy_table_multiple_source.py index 5bc4668b08fb..e8b27d2a98cc 100644 --- a/packages/google-cloud-bigquery/samples/tests/test_copy_table_multiple_source.py +++ b/packages/google-cloud-bigquery/samples/tests/test_copy_table_multiple_source.py @@ -13,12 +13,22 @@ # limitations under the License. import io +import typing + from google.cloud import bigquery from .. import copy_table_multiple_source +if typing.TYPE_CHECKING: + import pytest + -def test_copy_table_multiple_source(capsys, random_table_id, random_dataset_id, client): +def test_copy_table_multiple_source( + capsys: "pytest.CaptureFixture[str]", + random_table_id: str, + random_dataset_id: str, + client: bigquery.Client, +) -> None: dataset = bigquery.Dataset(random_dataset_id) dataset.location = "US" diff --git a/packages/google-cloud-bigquery/samples/tests/test_create_dataset.py b/packages/google-cloud-bigquery/samples/tests/test_create_dataset.py index a000038030e1..e7a897f8f932 100644 --- a/packages/google-cloud-bigquery/samples/tests/test_create_dataset.py +++ b/packages/google-cloud-bigquery/samples/tests/test_create_dataset.py @@ -12,10 +12,17 @@ # See the License for the specific language governing permissions and # limitations under the License. +import typing + from .. import create_dataset +if typing.TYPE_CHECKING: + import pytest + -def test_create_dataset(capsys, random_dataset_id): +def test_create_dataset( + capsys: "pytest.CaptureFixture[str]", random_dataset_id: str +) -> None: create_dataset.create_dataset(random_dataset_id) out, err = capsys.readouterr() diff --git a/packages/google-cloud-bigquery/samples/tests/test_create_job.py b/packages/google-cloud-bigquery/samples/tests/test_create_job.py index eab4b3e485f9..9e6621e91b59 100644 --- a/packages/google-cloud-bigquery/samples/tests/test_create_job.py +++ b/packages/google-cloud-bigquery/samples/tests/test_create_job.py @@ -12,10 +12,18 @@ # See the License for the specific language governing permissions and # limitations under the License. +import typing + from .. import create_job +if typing.TYPE_CHECKING: + import pytest + from google.cloud import bigquery + -def test_create_job(capsys, client): +def test_create_job( + capsys: "pytest.CaptureFixture[str]", client: "bigquery.Client" +) -> None: query_job = create_job.create_job() client.cancel_job(query_job.job_id, location=query_job.location) out, err = capsys.readouterr() diff --git a/packages/google-cloud-bigquery/samples/tests/test_create_table.py b/packages/google-cloud-bigquery/samples/tests/test_create_table.py index 48e52889acce..98a0fa936ed1 100644 --- a/packages/google-cloud-bigquery/samples/tests/test_create_table.py +++ b/packages/google-cloud-bigquery/samples/tests/test_create_table.py @@ -12,10 +12,17 @@ # See the License for the specific language governing permissions and # limitations under the License. +import typing + from .. import create_table +if typing.TYPE_CHECKING: + import pytest + -def test_create_table(capsys, random_table_id): +def test_create_table( + capsys: "pytest.CaptureFixture[str]", random_table_id: str +) -> None: create_table.create_table(random_table_id) out, err = capsys.readouterr() assert "Created table {}".format(random_table_id) in out diff --git a/packages/google-cloud-bigquery/samples/tests/test_create_table_clustered.py b/packages/google-cloud-bigquery/samples/tests/test_create_table_clustered.py index 8eab5d48b5e9..a3e483441354 100644 --- a/packages/google-cloud-bigquery/samples/tests/test_create_table_clustered.py +++ b/packages/google-cloud-bigquery/samples/tests/test_create_table_clustered.py @@ -12,10 +12,17 @@ # See the License for the specific language governing permissions and # limitations under the License. +import typing + from .. import create_table_clustered +if typing.TYPE_CHECKING: + import pytest + -def test_create_table_clustered(capsys, random_table_id): +def test_create_table_clustered( + capsys: "pytest.CaptureFixture[str]", random_table_id: str +) -> None: table = create_table_clustered.create_table_clustered(random_table_id) out, _ = capsys.readouterr() assert "Created clustered table {}".format(random_table_id) in out diff --git a/packages/google-cloud-bigquery/samples/tests/test_create_table_range_partitioned.py b/packages/google-cloud-bigquery/samples/tests/test_create_table_range_partitioned.py index 9745966bf02b..1c06b66fe18a 100644 --- a/packages/google-cloud-bigquery/samples/tests/test_create_table_range_partitioned.py +++ b/packages/google-cloud-bigquery/samples/tests/test_create_table_range_partitioned.py @@ -12,10 +12,17 @@ # See the License for the specific language governing permissions and # limitations under the License. +import typing + from .. import create_table_range_partitioned +if typing.TYPE_CHECKING: + import pytest + -def test_create_table_range_partitioned(capsys, random_table_id): +def test_create_table_range_partitioned( + capsys: "pytest.CaptureFixture[str]", random_table_id: str +) -> None: table = create_table_range_partitioned.create_table_range_partitioned( random_table_id ) diff --git a/packages/google-cloud-bigquery/samples/tests/test_dataset_exists.py b/packages/google-cloud-bigquery/samples/tests/test_dataset_exists.py index 6bc38b4d27ce..bfef4368f8ea 100644 --- a/packages/google-cloud-bigquery/samples/tests/test_dataset_exists.py +++ b/packages/google-cloud-bigquery/samples/tests/test_dataset_exists.py @@ -12,12 +12,21 @@ # See the License for the specific language governing permissions and # limitations under the License. +import typing + from google.cloud import bigquery from .. import dataset_exists +if typing.TYPE_CHECKING: + import pytest + -def test_dataset_exists(capsys, random_dataset_id, client): +def test_dataset_exists( + capsys: "pytest.CaptureFixture[str]", + random_dataset_id: str, + client: bigquery.Client, +) -> None: dataset_exists.dataset_exists(random_dataset_id) out, err = capsys.readouterr() diff --git a/packages/google-cloud-bigquery/samples/tests/test_dataset_label_samples.py b/packages/google-cloud-bigquery/samples/tests/test_dataset_label_samples.py index 0dbb2a76bdd9..75a024856635 100644 --- a/packages/google-cloud-bigquery/samples/tests/test_dataset_label_samples.py +++ b/packages/google-cloud-bigquery/samples/tests/test_dataset_label_samples.py @@ -12,12 +12,19 @@ # See the License for the specific language governing permissions and # limitations under the License. +import typing + from .. import delete_dataset_labels from .. import get_dataset_labels from .. import label_dataset +if typing.TYPE_CHECKING: + import pytest + -def test_dataset_label_samples(capsys, dataset_id): +def test_dataset_label_samples( + capsys: "pytest.CaptureFixture[str]", dataset_id: str +) -> None: label_dataset.label_dataset(dataset_id) out, err = capsys.readouterr() diff --git a/packages/google-cloud-bigquery/samples/tests/test_delete_dataset.py b/packages/google-cloud-bigquery/samples/tests/test_delete_dataset.py index 1f9b3c823fb9..9347bf185de5 100644 --- a/packages/google-cloud-bigquery/samples/tests/test_delete_dataset.py +++ b/packages/google-cloud-bigquery/samples/tests/test_delete_dataset.py @@ -12,10 +12,15 @@ # See the License for the specific language governing permissions and # limitations under the License. +import typing + from .. import delete_dataset +if typing.TYPE_CHECKING: + import pytest + -def test_delete_dataset(capsys, dataset_id): +def test_delete_dataset(capsys: "pytest.CaptureFixture[str]", dataset_id: str) -> None: delete_dataset.delete_dataset(dataset_id) out, err = capsys.readouterr() diff --git a/packages/google-cloud-bigquery/samples/tests/test_delete_table.py b/packages/google-cloud-bigquery/samples/tests/test_delete_table.py index 7065743b0485..aca2df62faf3 100644 --- a/packages/google-cloud-bigquery/samples/tests/test_delete_table.py +++ b/packages/google-cloud-bigquery/samples/tests/test_delete_table.py @@ -12,10 +12,15 @@ # See the License for the specific language governing permissions and # limitations under the License. +import typing + from .. import delete_table +if typing.TYPE_CHECKING: + import pytest + -def test_delete_table(capsys, table_id): +def test_delete_table(capsys: "pytest.CaptureFixture[str]", table_id: str) -> None: delete_table.delete_table(table_id) out, err = capsys.readouterr() diff --git a/packages/google-cloud-bigquery/samples/tests/test_download_public_data.py b/packages/google-cloud-bigquery/samples/tests/test_download_public_data.py index 2412c147f18a..02c2c6f9cbb5 100644 --- a/packages/google-cloud-bigquery/samples/tests/test_download_public_data.py +++ b/packages/google-cloud-bigquery/samples/tests/test_download_public_data.py @@ -21,7 +21,9 @@ pytest.importorskip("google.cloud.bigquery_storage_v1") -def test_download_public_data(caplog, capsys): +def test_download_public_data( + caplog: pytest.LogCaptureFixture, capsys: pytest.CaptureFixture[str] +) -> None: # Enable debug-level logging to verify the BigQuery Storage API is used. caplog.set_level(logging.DEBUG) diff --git a/packages/google-cloud-bigquery/samples/tests/test_download_public_data_sandbox.py b/packages/google-cloud-bigquery/samples/tests/test_download_public_data_sandbox.py index 08e1aab73fad..e86f604add79 100644 --- a/packages/google-cloud-bigquery/samples/tests/test_download_public_data_sandbox.py +++ b/packages/google-cloud-bigquery/samples/tests/test_download_public_data_sandbox.py @@ -21,7 +21,9 @@ pytest.importorskip("google.cloud.bigquery_storage_v1") -def test_download_public_data_sandbox(caplog, capsys): +def test_download_public_data_sandbox( + caplog: pytest.LogCaptureFixture, capsys: pytest.CaptureFixture[str] +) -> None: # Enable debug-level logging to verify the BigQuery Storage API is used. caplog.set_level(logging.DEBUG) diff --git a/packages/google-cloud-bigquery/samples/tests/test_get_dataset.py b/packages/google-cloud-bigquery/samples/tests/test_get_dataset.py index 3afdb00d39bd..97b30541b93d 100644 --- a/packages/google-cloud-bigquery/samples/tests/test_get_dataset.py +++ b/packages/google-cloud-bigquery/samples/tests/test_get_dataset.py @@ -12,10 +12,15 @@ # See the License for the specific language governing permissions and # limitations under the License. +import typing + from .. import get_dataset +if typing.TYPE_CHECKING: + import pytest + -def test_get_dataset(capsys, dataset_id): +def test_get_dataset(capsys: "pytest.CaptureFixture[str]", dataset_id: str) -> None: get_dataset.get_dataset(dataset_id) out, err = capsys.readouterr() diff --git a/packages/google-cloud-bigquery/samples/tests/test_get_table.py b/packages/google-cloud-bigquery/samples/tests/test_get_table.py index 8bbd0681b584..e6383010fec5 100644 --- a/packages/google-cloud-bigquery/samples/tests/test_get_table.py +++ b/packages/google-cloud-bigquery/samples/tests/test_get_table.py @@ -12,12 +12,19 @@ # See the License for the specific language governing permissions and # limitations under the License. +import typing + from google.cloud import bigquery from .. import get_table +if typing.TYPE_CHECKING: + import pytest + -def test_get_table(capsys, random_table_id, client): +def test_get_table( + capsys: "pytest.CaptureFixture[str]", random_table_id: str, client: bigquery.Client +) -> None: schema = [ bigquery.SchemaField("full_name", "STRING", mode="REQUIRED"), diff --git a/packages/google-cloud-bigquery/samples/tests/test_list_datasets.py b/packages/google-cloud-bigquery/samples/tests/test_list_datasets.py index 1610d0e4a3ba..f51fe18f1ba3 100644 --- a/packages/google-cloud-bigquery/samples/tests/test_list_datasets.py +++ b/packages/google-cloud-bigquery/samples/tests/test_list_datasets.py @@ -12,10 +12,18 @@ # See the License for the specific language governing permissions and # limitations under the License. +import typing + from .. import list_datasets +if typing.TYPE_CHECKING: + import pytest + from google.cloud import bigquery + -def test_list_datasets(capsys, dataset_id, client): +def test_list_datasets( + capsys: "pytest.CaptureFixture[str]", dataset_id: str, client: "bigquery.Client" +) -> None: list_datasets.list_datasets() out, err = capsys.readouterr() assert "Datasets in project {}:".format(client.project) in out diff --git a/packages/google-cloud-bigquery/samples/tests/test_list_datasets_by_label.py b/packages/google-cloud-bigquery/samples/tests/test_list_datasets_by_label.py index 5b375f4f4ee5..ee6b9a999065 100644 --- a/packages/google-cloud-bigquery/samples/tests/test_list_datasets_by_label.py +++ b/packages/google-cloud-bigquery/samples/tests/test_list_datasets_by_label.py @@ -12,10 +12,18 @@ # See the License for the specific language governing permissions and # limitations under the License. +import typing + from .. import list_datasets_by_label +if typing.TYPE_CHECKING: + import pytest + from google.cloud import bigquery + -def test_list_datasets_by_label(capsys, dataset_id, client): +def test_list_datasets_by_label( + capsys: "pytest.CaptureFixture[str]", dataset_id: str, client: "bigquery.Client" +) -> None: dataset = client.get_dataset(dataset_id) dataset.labels = {"color": "green"} dataset = client.update_dataset(dataset, ["labels"]) diff --git a/packages/google-cloud-bigquery/samples/tests/test_list_tables.py b/packages/google-cloud-bigquery/samples/tests/test_list_tables.py index f9426aa53d21..7c726accc306 100644 --- a/packages/google-cloud-bigquery/samples/tests/test_list_tables.py +++ b/packages/google-cloud-bigquery/samples/tests/test_list_tables.py @@ -12,10 +12,17 @@ # See the License for the specific language governing permissions and # limitations under the License. +import typing + from .. import list_tables +if typing.TYPE_CHECKING: + import pytest + -def test_list_tables(capsys, dataset_id, table_id): +def test_list_tables( + capsys: "pytest.CaptureFixture[str]", dataset_id: str, table_id: str +) -> None: list_tables.list_tables(dataset_id) out, err = capsys.readouterr() diff --git a/packages/google-cloud-bigquery/samples/tests/test_load_table_clustered.py b/packages/google-cloud-bigquery/samples/tests/test_load_table_clustered.py index bafdc2051f5d..bbf3c671f966 100644 --- a/packages/google-cloud-bigquery/samples/tests/test_load_table_clustered.py +++ b/packages/google-cloud-bigquery/samples/tests/test_load_table_clustered.py @@ -12,10 +12,20 @@ # See the License for the specific language governing permissions and # limitations under the License. +import typing + from .. import load_table_clustered +if typing.TYPE_CHECKING: + import pytest + from google.cloud import bigquery + -def test_load_table_clustered(capsys, random_table_id, client): +def test_load_table_clustered( + capsys: "pytest.CaptureFixture[str]", + random_table_id: str, + client: "bigquery.Client", +) -> None: table = load_table_clustered.load_table_clustered(random_table_id) diff --git a/packages/google-cloud-bigquery/samples/tests/test_load_table_dataframe.py b/packages/google-cloud-bigquery/samples/tests/test_load_table_dataframe.py index 6528edc98599..9a975493c54d 100644 --- a/packages/google-cloud-bigquery/samples/tests/test_load_table_dataframe.py +++ b/packages/google-cloud-bigquery/samples/tests/test_load_table_dataframe.py @@ -12,16 +12,25 @@ # See the License for the specific language governing permissions and # limitations under the License. +import typing + import pytest from .. import load_table_dataframe +if typing.TYPE_CHECKING: + from google.cloud import bigquery + pandas = pytest.importorskip("pandas") pyarrow = pytest.importorskip("pyarrow") -def test_load_table_dataframe(capsys, client, random_table_id): +def test_load_table_dataframe( + capsys: pytest.CaptureFixture[str], + client: "bigquery.Client", + random_table_id: str, +) -> None: table = load_table_dataframe.load_table_dataframe(random_table_id) out, _ = capsys.readouterr() @@ -44,7 +53,7 @@ def test_load_table_dataframe(capsys, client, random_table_id): "INTEGER", "FLOAT", "TIMESTAMP", - "TIMESTAMP", + "DATETIME", ] df = client.list_rows(table).to_dataframe() @@ -64,9 +73,9 @@ def test_load_table_dataframe(capsys, client, random_table_id): pandas.Timestamp("1983-05-09T11:00:00+00:00"), ] assert df["dvd_release"].tolist() == [ - pandas.Timestamp("2003-10-22T10:00:00+00:00"), - pandas.Timestamp("2002-07-16T09:00:00+00:00"), - pandas.Timestamp("2008-01-14T08:00:00+00:00"), - pandas.Timestamp("2002-01-22T07:00:00+00:00"), + pandas.Timestamp("2003-10-22T10:00:00"), + pandas.Timestamp("2002-07-16T09:00:00"), + pandas.Timestamp("2008-01-14T08:00:00"), + pandas.Timestamp("2002-01-22T07:00:00"), ] assert df["wikidata_id"].tolist() == ["Q16403", "Q25043", "Q24953", "Q24980"] diff --git a/packages/google-cloud-bigquery/samples/tests/test_load_table_file.py b/packages/google-cloud-bigquery/samples/tests/test_load_table_file.py index a7ebe768201a..95b06c7f6fd7 100644 --- a/packages/google-cloud-bigquery/samples/tests/test_load_table_file.py +++ b/packages/google-cloud-bigquery/samples/tests/test_load_table_file.py @@ -13,14 +13,19 @@ # limitations under the License. import os +import typing from google.cloud import bigquery from .. import load_table_file +if typing.TYPE_CHECKING: + import pytest -def test_load_table_file(capsys, random_table_id, client): +def test_load_table_file( + capsys: "pytest.CaptureFixture[str]", random_table_id: str, client: bigquery.Client +) -> None: samples_test_dir = os.path.abspath(os.path.dirname(__file__)) file_path = os.path.join( samples_test_dir, "..", "..", "tests", "data", "people.csv" diff --git a/packages/google-cloud-bigquery/samples/tests/test_load_table_uri_autodetect_csv.py b/packages/google-cloud-bigquery/samples/tests/test_load_table_uri_autodetect_csv.py index a407197834f0..c9b410850fb4 100644 --- a/packages/google-cloud-bigquery/samples/tests/test_load_table_uri_autodetect_csv.py +++ b/packages/google-cloud-bigquery/samples/tests/test_load_table_uri_autodetect_csv.py @@ -12,10 +12,17 @@ # See the License for the specific language governing permissions and # limitations under the License. +import typing + from .. import load_table_uri_autodetect_csv +if typing.TYPE_CHECKING: + import pytest + -def test_load_table_uri_autodetect_csv(capsys, random_table_id): +def test_load_table_uri_autodetect_csv( + capsys: "pytest.CaptureFixture[str]", random_table_id: str +) -> None: load_table_uri_autodetect_csv.load_table_uri_autodetect_csv(random_table_id) out, err = capsys.readouterr() diff --git a/packages/google-cloud-bigquery/samples/tests/test_load_table_uri_autodetect_json.py b/packages/google-cloud-bigquery/samples/tests/test_load_table_uri_autodetect_json.py index df14d26ed2c4..2c68a13db5a5 100644 --- a/packages/google-cloud-bigquery/samples/tests/test_load_table_uri_autodetect_json.py +++ b/packages/google-cloud-bigquery/samples/tests/test_load_table_uri_autodetect_json.py @@ -12,10 +12,17 @@ # See the License for the specific language governing permissions and # limitations under the License. +import typing + from .. import load_table_uri_autodetect_json +if typing.TYPE_CHECKING: + import pytest + -def test_load_table_uri_autodetect_csv(capsys, random_table_id): +def test_load_table_uri_autodetect_csv( + capsys: "pytest.CaptureFixture[str]", random_table_id: str +) -> None: load_table_uri_autodetect_json.load_table_uri_autodetect_json(random_table_id) out, err = capsys.readouterr() diff --git a/packages/google-cloud-bigquery/samples/tests/test_load_table_uri_avro.py b/packages/google-cloud-bigquery/samples/tests/test_load_table_uri_avro.py index 0be29d6b35ae..d0be44aca76a 100644 --- a/packages/google-cloud-bigquery/samples/tests/test_load_table_uri_avro.py +++ b/packages/google-cloud-bigquery/samples/tests/test_load_table_uri_avro.py @@ -12,10 +12,17 @@ # See the License for the specific language governing permissions and # limitations under the License. +import typing + from .. import load_table_uri_avro +if typing.TYPE_CHECKING: + import pytest + -def test_load_table_uri_avro(capsys, random_table_id): +def test_load_table_uri_avro( + capsys: "pytest.CaptureFixture[str]", random_table_id: str +) -> None: load_table_uri_avro.load_table_uri_avro(random_table_id) out, _ = capsys.readouterr() assert "Loaded 50 rows." in out diff --git a/packages/google-cloud-bigquery/samples/tests/test_load_table_uri_cmek.py b/packages/google-cloud-bigquery/samples/tests/test_load_table_uri_cmek.py index c15dad9a754f..1eb8738431b3 100644 --- a/packages/google-cloud-bigquery/samples/tests/test_load_table_uri_cmek.py +++ b/packages/google-cloud-bigquery/samples/tests/test_load_table_uri_cmek.py @@ -12,10 +12,17 @@ # See the License for the specific language governing permissions and # limitations under the License. +import typing + from .. import load_table_uri_cmek +if typing.TYPE_CHECKING: + import pytest + -def test_load_table_uri_cmek(capsys, random_table_id, kms_key_name): +def test_load_table_uri_cmek( + capsys: "pytest.CaptureFixture[str]", random_table_id: str, kms_key_name: str +) -> None: load_table_uri_cmek.load_table_uri_cmek(random_table_id, kms_key_name) out, _ = capsys.readouterr() diff --git a/packages/google-cloud-bigquery/samples/tests/test_load_table_uri_csv.py b/packages/google-cloud-bigquery/samples/tests/test_load_table_uri_csv.py index fbcc69358466..a57224c84a1b 100644 --- a/packages/google-cloud-bigquery/samples/tests/test_load_table_uri_csv.py +++ b/packages/google-cloud-bigquery/samples/tests/test_load_table_uri_csv.py @@ -12,10 +12,17 @@ # See the License for the specific language governing permissions and # limitations under the License. +import typing + from .. import load_table_uri_csv +if typing.TYPE_CHECKING: + import pytest + -def test_load_table_uri_csv(capsys, random_table_id): +def test_load_table_uri_csv( + capsys: "pytest.CaptureFixture[str]", random_table_id: str +) -> None: load_table_uri_csv.load_table_uri_csv(random_table_id) out, _ = capsys.readouterr() diff --git a/packages/google-cloud-bigquery/samples/tests/test_load_table_uri_json.py b/packages/google-cloud-bigquery/samples/tests/test_load_table_uri_json.py index e054cb07ac3f..3ad0ce29bf6c 100644 --- a/packages/google-cloud-bigquery/samples/tests/test_load_table_uri_json.py +++ b/packages/google-cloud-bigquery/samples/tests/test_load_table_uri_json.py @@ -12,10 +12,17 @@ # See the License for the specific language governing permissions and # limitations under the License. +import typing + from .. import load_table_uri_json +if typing.TYPE_CHECKING: + import pytest + -def test_load_table_uri_json(capsys, random_table_id): +def test_load_table_uri_json( + capsys: "pytest.CaptureFixture[str]", random_table_id: str +) -> None: load_table_uri_json.load_table_uri_json(random_table_id) out, _ = capsys.readouterr() diff --git a/packages/google-cloud-bigquery/samples/tests/test_load_table_uri_orc.py b/packages/google-cloud-bigquery/samples/tests/test_load_table_uri_orc.py index 96dc72022b0a..f31e8cabb66c 100644 --- a/packages/google-cloud-bigquery/samples/tests/test_load_table_uri_orc.py +++ b/packages/google-cloud-bigquery/samples/tests/test_load_table_uri_orc.py @@ -12,10 +12,17 @@ # See the License for the specific language governing permissions and # limitations under the License. +import typing + from .. import load_table_uri_orc +if typing.TYPE_CHECKING: + import pytest + -def test_load_table_uri_orc(capsys, random_table_id): +def test_load_table_uri_orc( + capsys: "pytest.CaptureFixture[str]", random_table_id: str +) -> None: load_table_uri_orc.load_table_uri_orc(random_table_id) out, _ = capsys.readouterr() diff --git a/packages/google-cloud-bigquery/samples/tests/test_load_table_uri_parquet.py b/packages/google-cloud-bigquery/samples/tests/test_load_table_uri_parquet.py index 81ba3fcef604..5404e8584e56 100644 --- a/packages/google-cloud-bigquery/samples/tests/test_load_table_uri_parquet.py +++ b/packages/google-cloud-bigquery/samples/tests/test_load_table_uri_parquet.py @@ -12,10 +12,17 @@ # See the License for the specific language governing permissions and # limitations under the License. +import typing + from .. import load_table_uri_parquet +if typing.TYPE_CHECKING: + import pytest + -def test_load_table_uri_json(capsys, random_table_id): +def test_load_table_uri_json( + capsys: "pytest.CaptureFixture[str]", random_table_id: str +) -> None: load_table_uri_parquet.load_table_uri_parquet(random_table_id) out, _ = capsys.readouterr() diff --git a/packages/google-cloud-bigquery/samples/tests/test_load_table_uri_truncate_avro.py b/packages/google-cloud-bigquery/samples/tests/test_load_table_uri_truncate_avro.py index ba680cabd49f..19b62fe7e063 100644 --- a/packages/google-cloud-bigquery/samples/tests/test_load_table_uri_truncate_avro.py +++ b/packages/google-cloud-bigquery/samples/tests/test_load_table_uri_truncate_avro.py @@ -12,10 +12,17 @@ # See the License for the specific language governing permissions and # limitations under the License. +import typing + from .. import load_table_uri_truncate_avro +if typing.TYPE_CHECKING: + import pytest + -def test_load_table_uri_truncate_avro(capsys, random_table_id): +def test_load_table_uri_truncate_avro( + capsys: "pytest.CaptureFixture[str]", random_table_id: str +) -> None: load_table_uri_truncate_avro.load_table_uri_truncate_avro(random_table_id) out, _ = capsys.readouterr() assert "Loaded 50 rows." in out diff --git a/packages/google-cloud-bigquery/samples/tests/test_load_table_uri_truncate_csv.py b/packages/google-cloud-bigquery/samples/tests/test_load_table_uri_truncate_csv.py index 5c1da7dcec62..9bc467cd04fa 100644 --- a/packages/google-cloud-bigquery/samples/tests/test_load_table_uri_truncate_csv.py +++ b/packages/google-cloud-bigquery/samples/tests/test_load_table_uri_truncate_csv.py @@ -12,10 +12,17 @@ # See the License for the specific language governing permissions and # limitations under the License. +import typing + from .. import load_table_uri_truncate_csv +if typing.TYPE_CHECKING: + import pytest + -def test_load_table_uri_truncate_csv(capsys, random_table_id): +def test_load_table_uri_truncate_csv( + capsys: "pytest.CaptureFixture[str]", random_table_id: str +) -> None: load_table_uri_truncate_csv.load_table_uri_truncate_csv(random_table_id) out, _ = capsys.readouterr() assert "Loaded 50 rows." in out diff --git a/packages/google-cloud-bigquery/samples/tests/test_load_table_uri_truncate_json.py b/packages/google-cloud-bigquery/samples/tests/test_load_table_uri_truncate_json.py index 180ca7f40b2e..cdf96454be57 100644 --- a/packages/google-cloud-bigquery/samples/tests/test_load_table_uri_truncate_json.py +++ b/packages/google-cloud-bigquery/samples/tests/test_load_table_uri_truncate_json.py @@ -12,10 +12,17 @@ # See the License for the specific language governing permissions and # limitations under the License. +import typing + from .. import load_table_uri_truncate_json +if typing.TYPE_CHECKING: + import pytest + -def test_load_table_uri_truncate_json(capsys, random_table_id): +def test_load_table_uri_truncate_json( + capsys: "pytest.CaptureFixture[str]", random_table_id: str +) -> None: load_table_uri_truncate_json.load_table_uri_truncate_json(random_table_id) out, _ = capsys.readouterr() assert "Loaded 50 rows." in out diff --git a/packages/google-cloud-bigquery/samples/tests/test_load_table_uri_truncate_orc.py b/packages/google-cloud-bigquery/samples/tests/test_load_table_uri_truncate_orc.py index 322bf31276c3..041923da911c 100644 --- a/packages/google-cloud-bigquery/samples/tests/test_load_table_uri_truncate_orc.py +++ b/packages/google-cloud-bigquery/samples/tests/test_load_table_uri_truncate_orc.py @@ -12,10 +12,17 @@ # See the License for the specific language governing permissions and # limitations under the License. +import typing + from .. import load_table_uri_truncate_orc +if typing.TYPE_CHECKING: + import pytest + -def test_load_table_uri_truncate_orc(capsys, random_table_id): +def test_load_table_uri_truncate_orc( + capsys: "pytest.CaptureFixture[str]", random_table_id: str +) -> None: load_table_uri_truncate_orc.load_table_uri_truncate_orc(random_table_id) out, _ = capsys.readouterr() assert "Loaded 50 rows." in out diff --git a/packages/google-cloud-bigquery/samples/tests/test_load_table_uri_truncate_parquet.py b/packages/google-cloud-bigquery/samples/tests/test_load_table_uri_truncate_parquet.py index ca901defa815..2139f316ff83 100644 --- a/packages/google-cloud-bigquery/samples/tests/test_load_table_uri_truncate_parquet.py +++ b/packages/google-cloud-bigquery/samples/tests/test_load_table_uri_truncate_parquet.py @@ -12,10 +12,17 @@ # See the License for the specific language governing permissions and # limitations under the License. +import typing + from .. import load_table_uri_truncate_parquet +if typing.TYPE_CHECKING: + import pytest + -def test_load_table_uri_truncate_parquet(capsys, random_table_id): +def test_load_table_uri_truncate_parquet( + capsys: "pytest.CaptureFixture[str]", random_table_id: str +) -> None: load_table_uri_truncate_parquet.load_table_uri_truncate_parquet(random_table_id) out, _ = capsys.readouterr() assert "Loaded 50 rows." in out diff --git a/packages/google-cloud-bigquery/samples/tests/test_model_samples.py b/packages/google-cloud-bigquery/samples/tests/test_model_samples.py index ebefad846642..ed82dd678c2f 100644 --- a/packages/google-cloud-bigquery/samples/tests/test_model_samples.py +++ b/packages/google-cloud-bigquery/samples/tests/test_model_samples.py @@ -12,13 +12,20 @@ # See the License for the specific language governing permissions and # limitations under the License. +import typing + from .. import delete_model from .. import get_model from .. import list_models from .. import update_model +if typing.TYPE_CHECKING: + import pytest + -def test_model_samples(capsys, dataset_id, model_id): +def test_model_samples( + capsys: "pytest.CaptureFixture[str]", dataset_id: str, model_id: str +) -> None: """Since creating a model is a long operation, test all model samples in the same test, following a typical end-to-end flow. """ diff --git a/packages/google-cloud-bigquery/samples/tests/test_query_external_gcs_temporary_table.py b/packages/google-cloud-bigquery/samples/tests/test_query_external_gcs_temporary_table.py index e6a825233266..9590f3d7a45e 100644 --- a/packages/google-cloud-bigquery/samples/tests/test_query_external_gcs_temporary_table.py +++ b/packages/google-cloud-bigquery/samples/tests/test_query_external_gcs_temporary_table.py @@ -12,12 +12,17 @@ # See the License for the specific language governing permissions and # limitations under the License. +import typing + from .. import query_external_gcs_temporary_table +if typing.TYPE_CHECKING: + import pytest + def test_query_external_gcs_temporary_table( - capsys, -): + capsys: "pytest.CaptureFixture[str]", +) -> None: query_external_gcs_temporary_table.query_external_gcs_temporary_table() out, err = capsys.readouterr() diff --git a/packages/google-cloud-bigquery/samples/tests/test_query_external_sheets_permanent_table.py b/packages/google-cloud-bigquery/samples/tests/test_query_external_sheets_permanent_table.py index a00930cad881..851839054010 100644 --- a/packages/google-cloud-bigquery/samples/tests/test_query_external_sheets_permanent_table.py +++ b/packages/google-cloud-bigquery/samples/tests/test_query_external_sheets_permanent_table.py @@ -12,10 +12,17 @@ # See the License for the specific language governing permissions and # limitations under the License. +import typing + from .. import query_external_sheets_permanent_table +if typing.TYPE_CHECKING: + import pytest + -def test_query_external_sheets_permanent_table(capsys, dataset_id): +def test_query_external_sheets_permanent_table( + capsys: "pytest.CaptureFixture[str]", dataset_id: str +) -> None: query_external_sheets_permanent_table.query_external_sheets_permanent_table( dataset_id diff --git a/packages/google-cloud-bigquery/samples/tests/test_query_external_sheets_temporary_table.py b/packages/google-cloud-bigquery/samples/tests/test_query_external_sheets_temporary_table.py index 8274787cb644..58e0cb3948c9 100644 --- a/packages/google-cloud-bigquery/samples/tests/test_query_external_sheets_temporary_table.py +++ b/packages/google-cloud-bigquery/samples/tests/test_query_external_sheets_temporary_table.py @@ -12,10 +12,17 @@ # See the License for the specific language governing permissions and # limitations under the License. +import typing + from .. import query_external_sheets_temporary_table +if typing.TYPE_CHECKING: + import pytest + -def test_query_external_sheets_temporary_table(capsys): +def test_query_external_sheets_temporary_table( + capsys: "pytest.CaptureFixture[str]", +) -> None: query_external_sheets_temporary_table.query_external_sheets_temporary_table() out, err = capsys.readouterr() diff --git a/packages/google-cloud-bigquery/samples/tests/test_query_no_cache.py b/packages/google-cloud-bigquery/samples/tests/test_query_no_cache.py index f72bee3f73c4..f3fb039c932d 100644 --- a/packages/google-cloud-bigquery/samples/tests/test_query_no_cache.py +++ b/packages/google-cloud-bigquery/samples/tests/test_query_no_cache.py @@ -13,13 +13,15 @@ # limitations under the License. import re +import typing from .. import query_no_cache +if typing.TYPE_CHECKING: + import pytest -def test_query_no_cache( - capsys, -): + +def test_query_no_cache(capsys: "pytest.CaptureFixture[str]") -> None: query_no_cache.query_no_cache() out, err = capsys.readouterr() diff --git a/packages/google-cloud-bigquery/samples/tests/test_query_pagination.py b/packages/google-cloud-bigquery/samples/tests/test_query_pagination.py index eb1ca4b2c19b..daf711e49113 100644 --- a/packages/google-cloud-bigquery/samples/tests/test_query_pagination.py +++ b/packages/google-cloud-bigquery/samples/tests/test_query_pagination.py @@ -12,12 +12,15 @@ # See the License for the specific language governing permissions and # limitations under the License. +import typing + from .. import query_pagination +if typing.TYPE_CHECKING: + import pytest + -def test_query_pagination( - capsys, -): +def test_query_pagination(capsys: "pytest.CaptureFixture[str]") -> None: query_pagination.query_pagination() out, _ = capsys.readouterr() diff --git a/packages/google-cloud-bigquery/samples/tests/test_query_script.py b/packages/google-cloud-bigquery/samples/tests/test_query_script.py index 2c75478736e6..98dd1253bf40 100644 --- a/packages/google-cloud-bigquery/samples/tests/test_query_script.py +++ b/packages/google-cloud-bigquery/samples/tests/test_query_script.py @@ -12,12 +12,15 @@ # See the License for the specific language governing permissions and # limitations under the License. +import typing + from .. import query_script +if typing.TYPE_CHECKING: + import pytest + -def test_query_script( - capsys, -): +def test_query_script(capsys: "pytest.CaptureFixture[str]") -> None: query_script.query_script() out, _ = capsys.readouterr() diff --git a/packages/google-cloud-bigquery/samples/tests/test_query_to_arrow.py b/packages/google-cloud-bigquery/samples/tests/test_query_to_arrow.py index 9511def58b74..d9b1aeb73eb5 100644 --- a/packages/google-cloud-bigquery/samples/tests/test_query_to_arrow.py +++ b/packages/google-cloud-bigquery/samples/tests/test_query_to_arrow.py @@ -19,9 +19,7 @@ pyarrow = pytest.importorskip("pyarrow") -def test_query_to_arrow( - capsys, -): +def test_query_to_arrow(capsys: "pytest.CaptureFixture[str]") -> None: arrow_table = query_to_arrow.query_to_arrow() out, err = capsys.readouterr() diff --git a/packages/google-cloud-bigquery/samples/tests/test_routine_samples.py b/packages/google-cloud-bigquery/samples/tests/test_routine_samples.py index c1b0bb5a7cd0..57bca074a7cd 100644 --- a/packages/google-cloud-bigquery/samples/tests/test_routine_samples.py +++ b/packages/google-cloud-bigquery/samples/tests/test_routine_samples.py @@ -12,11 +12,17 @@ # See the License for the specific language governing permissions and # limitations under the License. +import typing + from google.cloud import bigquery -from google.cloud import bigquery_v2 + +if typing.TYPE_CHECKING: + import pytest -def test_create_routine(capsys, random_routine_id): +def test_create_routine( + capsys: "pytest.CaptureFixture[str]", random_routine_id: str +) -> None: from .. import create_routine create_routine.create_routine(random_routine_id) @@ -24,7 +30,11 @@ def test_create_routine(capsys, random_routine_id): assert "Created routine {}".format(random_routine_id) in out -def test_create_routine_ddl(capsys, random_routine_id, client): +def test_create_routine_ddl( + capsys: "pytest.CaptureFixture[str]", + random_routine_id: str, + client: bigquery.Client, +) -> None: from .. import create_routine_ddl create_routine_ddl.create_routine_ddl(random_routine_id) @@ -37,22 +47,22 @@ def test_create_routine_ddl(capsys, random_routine_id, client): expected_arguments = [ bigquery.RoutineArgument( name="arr", - data_type=bigquery_v2.types.StandardSqlDataType( - type_kind=bigquery_v2.types.StandardSqlDataType.TypeKind.ARRAY, - array_element_type=bigquery_v2.types.StandardSqlDataType( - type_kind=bigquery_v2.types.StandardSqlDataType.TypeKind.STRUCT, - struct_type=bigquery_v2.types.StandardSqlStructType( + data_type=bigquery.StandardSqlDataType( + type_kind=bigquery.StandardSqlTypeNames.ARRAY, + array_element_type=bigquery.StandardSqlDataType( + type_kind=bigquery.StandardSqlTypeNames.STRUCT, + struct_type=bigquery.StandardSqlStructType( fields=[ - bigquery_v2.types.StandardSqlField( + bigquery.StandardSqlField( name="name", - type=bigquery_v2.types.StandardSqlDataType( - type_kind=bigquery_v2.types.StandardSqlDataType.TypeKind.STRING + type=bigquery.StandardSqlDataType( + type_kind=bigquery.StandardSqlTypeNames.STRING ), ), - bigquery_v2.types.StandardSqlField( + bigquery.StandardSqlField( name="val", - type=bigquery_v2.types.StandardSqlDataType( - type_kind=bigquery_v2.types.StandardSqlDataType.TypeKind.INT64 + type=bigquery.StandardSqlDataType( + type_kind=bigquery.StandardSqlTypeNames.INT64 ), ), ] @@ -64,7 +74,9 @@ def test_create_routine_ddl(capsys, random_routine_id, client): assert routine.arguments == expected_arguments -def test_list_routines(capsys, dataset_id, routine_id): +def test_list_routines( + capsys: "pytest.CaptureFixture[str]", dataset_id: str, routine_id: str +) -> None: from .. import list_routines list_routines.list_routines(dataset_id) @@ -73,7 +85,7 @@ def test_list_routines(capsys, dataset_id, routine_id): assert routine_id in out -def test_get_routine(capsys, routine_id): +def test_get_routine(capsys: "pytest.CaptureFixture[str]", routine_id: str) -> None: from .. import get_routine get_routine.get_routine(routine_id) @@ -82,10 +94,10 @@ def test_get_routine(capsys, routine_id): assert "Type: 'SCALAR_FUNCTION'" in out assert "Language: 'SQL'" in out assert "Name: 'x'" in out - assert "Type: 'type_kind: INT64\n'" in out + assert "type_kind=" in out -def test_delete_routine(capsys, routine_id): +def test_delete_routine(capsys: "pytest.CaptureFixture[str]", routine_id: str) -> None: from .. import delete_routine delete_routine.delete_routine(routine_id) @@ -93,7 +105,7 @@ def test_delete_routine(capsys, routine_id): assert "Deleted routine {}.".format(routine_id) in out -def test_update_routine(routine_id): +def test_update_routine(routine_id: str) -> None: from .. import update_routine routine = update_routine.update_routine(routine_id) diff --git a/packages/google-cloud-bigquery/samples/tests/test_table_exists.py b/packages/google-cloud-bigquery/samples/tests/test_table_exists.py index d1f579a64528..7317ba747bc4 100644 --- a/packages/google-cloud-bigquery/samples/tests/test_table_exists.py +++ b/packages/google-cloud-bigquery/samples/tests/test_table_exists.py @@ -12,12 +12,19 @@ # See the License for the specific language governing permissions and # limitations under the License. +import typing + from google.cloud import bigquery from .. import table_exists +if typing.TYPE_CHECKING: + import pytest + -def test_table_exists(capsys, random_table_id, client): +def test_table_exists( + capsys: "pytest.CaptureFixture[str]", random_table_id: str, client: bigquery.Client +) -> None: table_exists.table_exists(random_table_id) out, err = capsys.readouterr() diff --git a/packages/google-cloud-bigquery/samples/tests/test_table_insert_rows.py b/packages/google-cloud-bigquery/samples/tests/test_table_insert_rows.py index 72b51df9c485..59024fa959d1 100644 --- a/packages/google-cloud-bigquery/samples/tests/test_table_insert_rows.py +++ b/packages/google-cloud-bigquery/samples/tests/test_table_insert_rows.py @@ -12,12 +12,21 @@ # See the License for the specific language governing permissions and # limitations under the License. +import typing + from google.cloud import bigquery from .. import table_insert_rows +if typing.TYPE_CHECKING: + import pytest + -def test_table_insert_rows(capsys, random_table_id, client): +def test_table_insert_rows( + capsys: "pytest.CaptureFixture[str]", + random_table_id: str, + client: bigquery.Client, +) -> None: schema = [ bigquery.SchemaField("full_name", "STRING", mode="REQUIRED"), diff --git a/packages/google-cloud-bigquery/samples/tests/test_table_insert_rows_explicit_none_insert_ids.py b/packages/google-cloud-bigquery/samples/tests/test_table_insert_rows_explicit_none_insert_ids.py index c6199894a72c..00456ce84e41 100644 --- a/packages/google-cloud-bigquery/samples/tests/test_table_insert_rows_explicit_none_insert_ids.py +++ b/packages/google-cloud-bigquery/samples/tests/test_table_insert_rows_explicit_none_insert_ids.py @@ -12,12 +12,19 @@ # See the License for the specific language governing permissions and # limitations under the License. +import typing + from google.cloud import bigquery from .. import table_insert_rows_explicit_none_insert_ids as mut +if typing.TYPE_CHECKING: + import pytest + -def test_table_insert_rows_explicit_none_insert_ids(capsys, random_table_id, client): +def test_table_insert_rows_explicit_none_insert_ids( + capsys: "pytest.CaptureFixture[str]", random_table_id: str, client: bigquery.Client +) -> None: schema = [ bigquery.SchemaField("full_name", "STRING", mode="REQUIRED"), diff --git a/packages/google-cloud-bigquery/samples/tests/test_undelete_table.py b/packages/google-cloud-bigquery/samples/tests/test_undelete_table.py index a070abdbd36b..08841ad72922 100644 --- a/packages/google-cloud-bigquery/samples/tests/test_undelete_table.py +++ b/packages/google-cloud-bigquery/samples/tests/test_undelete_table.py @@ -12,10 +12,19 @@ # See the License for the specific language governing permissions and # limitations under the License. +import typing + from .. import undelete_table +if typing.TYPE_CHECKING: + import pytest + -def test_undelete_table(capsys, table_with_schema_id, random_table_id): +def test_undelete_table( + capsys: "pytest.CaptureFixture[str]", + table_with_schema_id: str, + random_table_id: str, +) -> None: undelete_table.undelete_table(table_with_schema_id, random_table_id) out, _ = capsys.readouterr() assert ( diff --git a/packages/google-cloud-bigquery/samples/tests/test_update_dataset_access.py b/packages/google-cloud-bigquery/samples/tests/test_update_dataset_access.py index 4c0aa835baf0..186a3b5757b5 100644 --- a/packages/google-cloud-bigquery/samples/tests/test_update_dataset_access.py +++ b/packages/google-cloud-bigquery/samples/tests/test_update_dataset_access.py @@ -12,10 +12,17 @@ # See the License for the specific language governing permissions and # limitations under the License. +import typing + from .. import update_dataset_access +if typing.TYPE_CHECKING: + import pytest + -def test_update_dataset_access(capsys, dataset_id): +def test_update_dataset_access( + capsys: "pytest.CaptureFixture[str]", dataset_id: str +) -> None: update_dataset_access.update_dataset_access(dataset_id) out, err = capsys.readouterr() diff --git a/packages/google-cloud-bigquery/samples/tests/test_update_dataset_default_partition_expiration.py b/packages/google-cloud-bigquery/samples/tests/test_update_dataset_default_partition_expiration.py index a5a8e6b5202c..b7787dde32a0 100644 --- a/packages/google-cloud-bigquery/samples/tests/test_update_dataset_default_partition_expiration.py +++ b/packages/google-cloud-bigquery/samples/tests/test_update_dataset_default_partition_expiration.py @@ -12,10 +12,17 @@ # See the License for the specific language governing permissions and # limitations under the License. +import typing + from .. import update_dataset_default_partition_expiration +if typing.TYPE_CHECKING: + import pytest + -def test_update_dataset_default_partition_expiration(capsys, dataset_id): +def test_update_dataset_default_partition_expiration( + capsys: "pytest.CaptureFixture[str]", dataset_id: str +) -> None: ninety_days_ms = 90 * 24 * 60 * 60 * 1000 # in milliseconds diff --git a/packages/google-cloud-bigquery/samples/tests/test_update_dataset_default_table_expiration.py b/packages/google-cloud-bigquery/samples/tests/test_update_dataset_default_table_expiration.py index b0f7013228e6..f780827f27b9 100644 --- a/packages/google-cloud-bigquery/samples/tests/test_update_dataset_default_table_expiration.py +++ b/packages/google-cloud-bigquery/samples/tests/test_update_dataset_default_table_expiration.py @@ -12,10 +12,17 @@ # See the License for the specific language governing permissions and # limitations under the License. +import typing + from .. import update_dataset_default_table_expiration +if typing.TYPE_CHECKING: + import pytest + -def test_update_dataset_default_table_expiration(capsys, dataset_id): +def test_update_dataset_default_table_expiration( + capsys: "pytest.CaptureFixture[str]", dataset_id: str +) -> None: one_day_ms = 24 * 60 * 60 * 1000 # in milliseconds diff --git a/packages/google-cloud-bigquery/samples/tests/test_update_dataset_description.py b/packages/google-cloud-bigquery/samples/tests/test_update_dataset_description.py index e4ff586c7bc2..5d1209e22d61 100644 --- a/packages/google-cloud-bigquery/samples/tests/test_update_dataset_description.py +++ b/packages/google-cloud-bigquery/samples/tests/test_update_dataset_description.py @@ -12,10 +12,17 @@ # See the License for the specific language governing permissions and # limitations under the License. +import typing + from .. import update_dataset_description +if typing.TYPE_CHECKING: + import pytest + -def test_update_dataset_description(capsys, dataset_id): +def test_update_dataset_description( + capsys: "pytest.CaptureFixture[str]", dataset_id: str +) -> None: update_dataset_description.update_dataset_description(dataset_id) out, err = capsys.readouterr() diff --git a/packages/google-cloud-bigquery/samples/tests/test_update_table_require_partition_filter.py b/packages/google-cloud-bigquery/samples/tests/test_update_table_require_partition_filter.py index 7e9ca6f2b44f..68e1c1e2bd79 100644 --- a/packages/google-cloud-bigquery/samples/tests/test_update_table_require_partition_filter.py +++ b/packages/google-cloud-bigquery/samples/tests/test_update_table_require_partition_filter.py @@ -12,12 +12,21 @@ # See the License for the specific language governing permissions and # limitations under the License. +import typing + from google.cloud import bigquery from .. import update_table_require_partition_filter +if typing.TYPE_CHECKING: + import pytest + -def test_update_table_require_partition_filter(capsys, random_table_id, client): +def test_update_table_require_partition_filter( + capsys: "pytest.CaptureFixture[str]", + random_table_id: str, + client: bigquery.Client, +) -> None: # Make a partitioned table. schema = [bigquery.SchemaField("transaction_timestamp", "TIMESTAMP")] diff --git a/packages/google-cloud-bigquery/samples/undelete_table.py b/packages/google-cloud-bigquery/samples/undelete_table.py index 18b15801ffee..c230a9230f8f 100644 --- a/packages/google-cloud-bigquery/samples/undelete_table.py +++ b/packages/google-cloud-bigquery/samples/undelete_table.py @@ -15,7 +15,7 @@ from google.api_core import datetime_helpers -def undelete_table(table_id, recovered_table_id): +def undelete_table(table_id: str, recovered_table_id: str) -> None: # [START bigquery_undelete_table] import time @@ -39,7 +39,7 @@ def undelete_table(table_id, recovered_table_id): # Due to very short lifecycle of the table, ensure we're not picking a time # prior to the table creation due to time drift between backend and client. table = client.get_table(table_id) - created_epoch = datetime_helpers.to_milliseconds(table.created) + created_epoch: int = datetime_helpers.to_milliseconds(table.created) # type: ignore if created_epoch > snapshot_epoch: snapshot_epoch = created_epoch # [END_EXCLUDE] diff --git a/packages/google-cloud-bigquery/samples/update_dataset_access.py b/packages/google-cloud-bigquery/samples/update_dataset_access.py index a5c2670e7e1a..fda784da5d13 100644 --- a/packages/google-cloud-bigquery/samples/update_dataset_access.py +++ b/packages/google-cloud-bigquery/samples/update_dataset_access.py @@ -13,7 +13,7 @@ # limitations under the License. -def update_dataset_access(dataset_id): +def update_dataset_access(dataset_id: str) -> None: # [START bigquery_update_dataset_access] from google.cloud import bigquery diff --git a/packages/google-cloud-bigquery/samples/update_dataset_default_partition_expiration.py b/packages/google-cloud-bigquery/samples/update_dataset_default_partition_expiration.py index 18cfb92db9b4..37456f3a0bfe 100644 --- a/packages/google-cloud-bigquery/samples/update_dataset_default_partition_expiration.py +++ b/packages/google-cloud-bigquery/samples/update_dataset_default_partition_expiration.py @@ -13,7 +13,7 @@ # limitations under the License. -def update_dataset_default_partition_expiration(dataset_id): +def update_dataset_default_partition_expiration(dataset_id: str) -> None: # [START bigquery_update_dataset_partition_expiration] diff --git a/packages/google-cloud-bigquery/samples/update_dataset_default_table_expiration.py b/packages/google-cloud-bigquery/samples/update_dataset_default_table_expiration.py index b7e5cea9b20d..cf6f50d9fccd 100644 --- a/packages/google-cloud-bigquery/samples/update_dataset_default_table_expiration.py +++ b/packages/google-cloud-bigquery/samples/update_dataset_default_table_expiration.py @@ -13,7 +13,7 @@ # limitations under the License. -def update_dataset_default_table_expiration(dataset_id): +def update_dataset_default_table_expiration(dataset_id: str) -> None: # [START bigquery_update_dataset_expiration] diff --git a/packages/google-cloud-bigquery/samples/update_dataset_description.py b/packages/google-cloud-bigquery/samples/update_dataset_description.py index 0732b1c618e8..98c5fed432c2 100644 --- a/packages/google-cloud-bigquery/samples/update_dataset_description.py +++ b/packages/google-cloud-bigquery/samples/update_dataset_description.py @@ -13,7 +13,7 @@ # limitations under the License. -def update_dataset_description(dataset_id): +def update_dataset_description(dataset_id: str) -> None: # [START bigquery_update_dataset_description] diff --git a/packages/google-cloud-bigquery/samples/update_model.py b/packages/google-cloud-bigquery/samples/update_model.py index db262d8cc43c..e11b6d5afded 100644 --- a/packages/google-cloud-bigquery/samples/update_model.py +++ b/packages/google-cloud-bigquery/samples/update_model.py @@ -13,7 +13,7 @@ # limitations under the License. -def update_model(model_id): +def update_model(model_id: str) -> None: """Sample ID: go/samples-tracker/1533""" # [START bigquery_update_model_description] diff --git a/packages/google-cloud-bigquery/samples/update_routine.py b/packages/google-cloud-bigquery/samples/update_routine.py index 61c6855b5041..1a975a253de6 100644 --- a/packages/google-cloud-bigquery/samples/update_routine.py +++ b/packages/google-cloud-bigquery/samples/update_routine.py @@ -12,8 +12,13 @@ # See the License for the specific language governing permissions and # limitations under the License. +import typing -def update_routine(routine_id): +if typing.TYPE_CHECKING: + from google.cloud import bigquery + + +def update_routine(routine_id: str) -> "bigquery.Routine": # [START bigquery_update_routine] diff --git a/packages/google-cloud-bigquery/samples/update_table_require_partition_filter.py b/packages/google-cloud-bigquery/samples/update_table_require_partition_filter.py index cf1d532774b2..8221238a73e6 100644 --- a/packages/google-cloud-bigquery/samples/update_table_require_partition_filter.py +++ b/packages/google-cloud-bigquery/samples/update_table_require_partition_filter.py @@ -13,7 +13,7 @@ # limitations under the License. -def update_table_require_partition_filter(table_id): +def update_table_require_partition_filter(table_id: str) -> None: # [START bigquery_update_table_require_partition_filter] diff --git a/packages/google-cloud-bigquery/setup.cfg b/packages/google-cloud-bigquery/setup.cfg index 8eefc4435900..25892161f048 100644 --- a/packages/google-cloud-bigquery/setup.cfg +++ b/packages/google-cloud-bigquery/setup.cfg @@ -24,7 +24,7 @@ inputs = google/cloud/ exclude = tests/ - google/cloud/bigquery_v2/ + google/cloud/bigquery_v2/ # Legacy proto-based types. output = .pytype/ disable = # There's some issue with finding some pyi files, thus disabling. diff --git a/packages/google-cloud-bigquery/setup.py b/packages/google-cloud-bigquery/setup.py index 63cdf747c37e..62fb3bbb3336 100644 --- a/packages/google-cloud-bigquery/setup.py +++ b/packages/google-cloud-bigquery/setup.py @@ -28,13 +28,13 @@ # 'Development Status :: 4 - Beta' # 'Development Status :: 5 - Production/Stable' release_status = "Development Status :: 5 - Production/Stable" -pyarrow_dep = ["pyarrow >=3.0.0, <8.0dev"] dependencies = [ "grpcio >= 1.38.1, < 2.0dev", # https://github.com/googleapis/python-bigquery/issues/695 # NOTE: Maintainers, please do not require google-api-core>=2.x.x # Until this issue is closed # https://github.com/googleapis/google-cloud-python/issues/10566 "google-api-core[grpc] >= 1.31.5, <3.0.0dev,!=2.0.*,!=2.1.*,!=2.2.*,!=2.3.0", + "google-cloud-bigquery-storage >= 2.0.0, <3.0.0dev", "proto-plus >= 1.15.0", # NOTE: Maintainers, please do not require google-cloud-core>=2.x.x # Until this issue is closed @@ -42,25 +42,17 @@ "google-cloud-core >= 1.4.1, <3.0.0dev", "google-resumable-media >= 0.6.0, < 3.0dev", "packaging >= 14.3", - "protobuf >= 3.12.0", - "python-dateutil >= 2.7.2, <3.0dev", + "proto-plus >= 1.10.0", # For the legacy proto-based types. + "protobuf >= 3.12.0", # For the legacy proto-based types. + "pyarrow >= 3.0.0, < 8.0dev", "requests >= 2.18.0, < 3.0.0dev", ] extras = { - "bqstorage": [ - "google-cloud-bigquery-storage >= 2.0.0, <3.0.0dev", - # Due to an issue in pip's dependency resolver, the `grpc` extra is not - # installed, even though `google-cloud-bigquery-storage` specifies it - # as `google-api-core[grpc]`. We thus need to explicitly specify it here. - # See: https://github.com/googleapis/python-bigquery/issues/83 The - # grpc.Channel.close() method isn't added until 1.32.0. - # https://github.com/grpc/grpc/pull/15254 - "grpcio >= 1.38.1, < 2.0dev", - ] - + pyarrow_dep, + # Keep the no-op bqstorage extra for backward compatibility. + # See: https://github.com/googleapis/python-bigquery/issues/757 + "bqstorage": [], + "pandas": ["pandas>=1.0.0", "db-dtypes>=0.3.0,<2.0.0dev"], "geopandas": ["geopandas>=0.9.0, <1.0dev", "Shapely>=1.6.0, <2.0dev"], - "pandas": ["pandas>=0.24.2"] + pyarrow_dep, - "bignumeric_type": pyarrow_dep, "ipython": ["ipython>=7.0.1,!=8.1.0"], "tqdm": ["tqdm >= 4.7.4, <5.0.0dev"], "opentelemetry": [ @@ -73,11 +65,6 @@ all_extras = [] for extra in extras: - # Exclude this extra from all to avoid overly strict dependencies on core - # libraries such as pyarrow. - # https://github.com/googleapis/python-bigquery/issues/563 - if extra in {"bignumeric_type"}: - continue all_extras.extend(extras[extra]) extras["all"] = all_extras diff --git a/packages/google-cloud-bigquery/testing/constraints-3.6.txt b/packages/google-cloud-bigquery/testing/constraints-3.6.txt index 0258515eba8a..47b842a6d6ba 100644 --- a/packages/google-cloud-bigquery/testing/constraints-3.6.txt +++ b/packages/google-cloud-bigquery/testing/constraints-3.6.txt @@ -5,6 +5,7 @@ # # e.g., if setup.py has "foo >= 1.14.0, < 2.0.0dev", # Then this file should have foo==1.14.0 +db-dtypes==0.3.0 geopandas==0.9.0 google-api-core==1.31.5 google-cloud-bigquery-storage==2.0.0 @@ -15,7 +16,7 @@ ipython==7.0.1 opentelemetry-api==1.1.0 opentelemetry-instrumentation==0.20b0 opentelemetry-sdk==1.1.0 -pandas==0.24.2 +pandas==1.0.0 proto-plus==1.15.0 protobuf==3.12.0 pyarrow==3.0.0 diff --git a/packages/google-cloud-bigquery/testing/constraints-3.7.txt b/packages/google-cloud-bigquery/testing/constraints-3.7.txt index e69de29bb2d1..684864f2bcde 100644 --- a/packages/google-cloud-bigquery/testing/constraints-3.7.txt +++ b/packages/google-cloud-bigquery/testing/constraints-3.7.txt @@ -0,0 +1 @@ +pandas==1.1.0 diff --git a/packages/google-cloud-bigquery/testing/constraints-3.8.txt b/packages/google-cloud-bigquery/testing/constraints-3.8.txt index e69de29bb2d1..3fd8886e64d1 100644 --- a/packages/google-cloud-bigquery/testing/constraints-3.8.txt +++ b/packages/google-cloud-bigquery/testing/constraints-3.8.txt @@ -0,0 +1 @@ +pandas==1.2.0 diff --git a/packages/google-cloud-bigquery/tests/system/conftest.py b/packages/google-cloud-bigquery/tests/system/conftest.py index 7eec76a32e21..784a1dd5c097 100644 --- a/packages/google-cloud-bigquery/tests/system/conftest.py +++ b/packages/google-cloud-bigquery/tests/system/conftest.py @@ -13,7 +13,9 @@ # limitations under the License. import pathlib +import random import re +from typing import Tuple import pytest import test_utils.prefixer @@ -26,6 +28,7 @@ prefixer = test_utils.prefixer.Prefixer("python-bigquery", "tests/system") DATA_DIR = pathlib.Path(__file__).parent.parent / "data" +TOKYO_LOCATION = "asia-northeast1" @pytest.fixture(scope="session", autouse=True) @@ -62,6 +65,16 @@ def dataset_id(bigquery_client): bigquery_client.delete_dataset(dataset_id, delete_contents=True, not_found_ok=True) +@pytest.fixture(scope="session") +def dataset_id_tokyo(bigquery_client: bigquery.Client, project_id: str): + dataset_id = prefixer.create_prefix() + "_tokyo" + dataset = bigquery.Dataset(f"{project_id}.{dataset_id}") + dataset.location = TOKYO_LOCATION + bigquery_client.create_dataset(dataset) + yield dataset_id + bigquery_client.delete_dataset(dataset_id, delete_contents=True, not_found_ok=True) + + @pytest.fixture() def dataset_client(bigquery_client, dataset_id): import google.cloud.bigquery.job @@ -78,38 +91,64 @@ def table_id(dataset_id): return f"{dataset_id}.table_{helpers.temp_suffix()}" -@pytest.fixture(scope="session") -def scalars_table(bigquery_client: bigquery.Client, project_id: str, dataset_id: str): +def load_scalars_table( + bigquery_client: bigquery.Client, + project_id: str, + dataset_id: str, + data_path: str = "scalars.jsonl", +) -> str: schema = bigquery_client.schema_from_json(DATA_DIR / "scalars_schema.json") + table_id = data_path.replace(".", "_") + hex(random.randrange(1000000)) job_config = bigquery.LoadJobConfig() job_config.schema = schema job_config.source_format = enums.SourceFormat.NEWLINE_DELIMITED_JSON - full_table_id = f"{project_id}.{dataset_id}.scalars" - with open(DATA_DIR / "scalars.jsonl", "rb") as data_file: + full_table_id = f"{project_id}.{dataset_id}.{table_id}" + with open(DATA_DIR / data_path, "rb") as data_file: job = bigquery_client.load_table_from_file( data_file, full_table_id, job_config=job_config ) job.result() + return full_table_id + + +@pytest.fixture(scope="session") +def scalars_table(bigquery_client: bigquery.Client, project_id: str, dataset_id: str): + full_table_id = load_scalars_table(bigquery_client, project_id, dataset_id) yield full_table_id - bigquery_client.delete_table(full_table_id) + bigquery_client.delete_table(full_table_id, not_found_ok=True) + + +@pytest.fixture(scope="session") +def scalars_table_tokyo( + bigquery_client: bigquery.Client, project_id: str, dataset_id_tokyo: str +): + full_table_id = load_scalars_table(bigquery_client, project_id, dataset_id_tokyo) + yield full_table_id + bigquery_client.delete_table(full_table_id, not_found_ok=True) @pytest.fixture(scope="session") def scalars_extreme_table( bigquery_client: bigquery.Client, project_id: str, dataset_id: str ): - schema = bigquery_client.schema_from_json(DATA_DIR / "scalars_schema.json") - job_config = bigquery.LoadJobConfig() - job_config.schema = schema - job_config.source_format = enums.SourceFormat.NEWLINE_DELIMITED_JSON - full_table_id = f"{project_id}.{dataset_id}.scalars_extreme" - with open(DATA_DIR / "scalars_extreme.jsonl", "rb") as data_file: - job = bigquery_client.load_table_from_file( - data_file, full_table_id, job_config=job_config - ) - job.result() + full_table_id = load_scalars_table( + bigquery_client, project_id, dataset_id, data_path="scalars_extreme.jsonl" + ) yield full_table_id - bigquery_client.delete_table(full_table_id) + bigquery_client.delete_table(full_table_id, not_found_ok=True) + + +@pytest.fixture(scope="session", params=["US", TOKYO_LOCATION]) +def scalars_table_multi_location( + request, scalars_table: str, scalars_table_tokyo: str +) -> Tuple[str, str]: + if request.param == "US": + full_table_id = scalars_table + elif request.param == TOKYO_LOCATION: + full_table_id = scalars_table_tokyo + else: + raise ValueError(f"got unexpected location: {request.param}") + return request.param, full_table_id @pytest.fixture diff --git a/packages/google-cloud-bigquery/tests/system/test_arrow.py b/packages/google-cloud-bigquery/tests/system/test_arrow.py index cc090ba26d65..8b88b6844d74 100644 --- a/packages/google-cloud-bigquery/tests/system/test_arrow.py +++ b/packages/google-cloud-bigquery/tests/system/test_arrow.py @@ -16,17 +16,13 @@ from typing import Optional +import pyarrow import pytest from google.cloud import bigquery from google.cloud.bigquery import enums -pyarrow = pytest.importorskip( - "pyarrow", minversion="3.0.0" -) # Needs decimal256 for BIGNUMERIC columns. - - @pytest.mark.parametrize( ("max_results", "scalars_table_name"), ( diff --git a/packages/google-cloud-bigquery/tests/system/test_client.py b/packages/google-cloud-bigquery/tests/system/test_client.py index 1e328e2e1223..773ef3c90b89 100644 --- a/packages/google-cloud-bigquery/tests/system/test_client.py +++ b/packages/google-cloud-bigquery/tests/system/test_client.py @@ -13,7 +13,6 @@ # limitations under the License. import base64 -import concurrent.futures import csv import datetime import decimal @@ -27,22 +26,6 @@ import uuid from typing import Optional -import psutil -import pytest - -from . import helpers - -try: - from google.cloud import bigquery_storage -except ImportError: # pragma: NO COVER - bigquery_storage = None - -try: - import pyarrow - import pyarrow.types -except ImportError: # pragma: NO COVER - pyarrow = None - from google.api_core.exceptions import PreconditionFailed from google.api_core.exceptions import BadRequest from google.api_core.exceptions import ClientError @@ -54,21 +37,26 @@ from google.api_core.exceptions import TooManyRequests from google.api_core.iam import Policy from google.cloud import bigquery -from google.cloud import bigquery_v2 from google.cloud.bigquery.dataset import Dataset from google.cloud.bigquery.dataset import DatasetReference from google.cloud.bigquery.table import Table from google.cloud._helpers import UTC from google.cloud.bigquery import dbapi, enums +from google.cloud import bigquery_storage from google.cloud import storage from google.cloud.datacatalog_v1 import types as datacatalog_types from google.cloud.datacatalog_v1 import PolicyTagManagerClient - +import psutil +import pytest +import pyarrow +import pyarrow.types from test_utils.retry import RetryErrors from test_utils.retry import RetryInstanceState from test_utils.retry import RetryResult from test_utils.system import unique_resource_id +from . import helpers + JOB_TIMEOUT = 120 # 2 minutes DATA_PATH = pathlib.Path(__file__).parent.parent / "data" @@ -703,64 +691,6 @@ def _fetch_single_page(table, selected_fields=None): page = next(iterator.pages) return list(page) - def _create_table_many_columns(self, rowcount): - # Generate a table of maximum width via CREATE TABLE AS SELECT. - # first column is named 'rowval', and has a value from 1..rowcount - # Subsequent column is named col_ and contains the value N*rowval, - # where N is between 1 and 9999 inclusive. - dsname = _make_dataset_id("wide_schema") - dataset = self.temp_dataset(dsname) - table_id = "many_columns" - table_ref = dataset.table(table_id) - self.to_delete.insert(0, table_ref) - colprojections = ",".join( - ["r * {} as col_{}".format(n, n) for n in range(1, 10000)] - ) - sql = """ - CREATE TABLE {}.{} - AS - SELECT - r as rowval, - {} - FROM - UNNEST(GENERATE_ARRAY(1,{},1)) as r - """.format( - dsname, table_id, colprojections, rowcount - ) - query_job = Config.CLIENT.query(sql) - query_job.result() - self.assertEqual(query_job.statement_type, "CREATE_TABLE_AS_SELECT") - self.assertEqual(query_job.ddl_operation_performed, "CREATE") - self.assertEqual(query_job.ddl_target_table, table_ref) - - return table_ref - - def test_query_many_columns(self): - # Test working with the widest schema BigQuery supports, 10k columns. - row_count = 2 - table_ref = self._create_table_many_columns(row_count) - rows = list( - Config.CLIENT.query( - "SELECT * FROM `{}.{}`".format(table_ref.dataset_id, table_ref.table_id) - ) - ) - - self.assertEqual(len(rows), row_count) - - # check field representations adhere to expected values. - correctwidth = 0 - badvals = 0 - for r in rows: - vals = r._xxx_values - rowval = vals[0] - if len(vals) == 10000: - correctwidth = correctwidth + 1 - for n in range(1, 10000): - if vals[n] != rowval * (n): - badvals = badvals + 1 - self.assertEqual(correctwidth, row_count) - self.assertEqual(badvals, 0) - def test_insert_rows_then_dump_table(self): NOW_SECONDS = 1448911495.484366 NOW = datetime.datetime.utcfromtimestamp(NOW_SECONDS).replace(tzinfo=UTC) @@ -1381,25 +1311,6 @@ def test_query_w_wrong_config(self): with self.assertRaises(Exception): Config.CLIENT.query(good_query, job_config=bad_config).result() - def test_query_w_timeout(self): - job_config = bigquery.QueryJobConfig() - job_config.use_query_cache = False - - query_job = Config.CLIENT.query( - "SELECT * FROM `bigquery-public-data.github_repos.commits`;", - job_id_prefix="test_query_w_timeout_", - location="US", - job_config=job_config, - ) - - with self.assertRaises(concurrent.futures.TimeoutError): - query_job.result(timeout=1) - - # Even though the query takes >1 second, the call to getQueryResults - # should succeed. - self.assertFalse(query_job.done(timeout=1)) - self.assertIsNotNone(Config.CLIENT.cancel_job(query_job)) - def test_query_w_page_size(self): page_size = 45 query_job = Config.CLIENT.query( @@ -1421,83 +1332,6 @@ def test_query_w_start_index(self): self.assertEqual(result1.extra_params["startIndex"], start_index) self.assertEqual(len(list(result1)), total_rows - start_index) - def test_query_statistics(self): - """ - A system test to exercise some of the extended query statistics. - - Note: We construct a query that should need at least three stages by - specifying a JOIN query. Exact plan and stats are effectively - non-deterministic, so we're largely interested in confirming values - are present. - """ - - job_config = bigquery.QueryJobConfig() - job_config.use_query_cache = False - - query_job = Config.CLIENT.query( - """ - SELECT - COUNT(1) - FROM - ( - SELECT - year, - wban_number - FROM `bigquery-public-data.samples.gsod` - LIMIT 1000 - ) lside - INNER JOIN - ( - SELECT - year, - state - FROM `bigquery-public-data.samples.natality` - LIMIT 1000 - ) rside - ON - lside.year = rside.year - """, - location="US", - job_config=job_config, - ) - - # run the job to completion - query_job.result() - - # Assert top-level stats - self.assertFalse(query_job.cache_hit) - self.assertIsNotNone(query_job.destination) - self.assertTrue(query_job.done) - self.assertFalse(query_job.dry_run) - self.assertIsNone(query_job.num_dml_affected_rows) - self.assertEqual(query_job.priority, "INTERACTIVE") - self.assertGreater(query_job.total_bytes_billed, 1) - self.assertGreater(query_job.total_bytes_processed, 1) - self.assertEqual(query_job.statement_type, "SELECT") - self.assertGreater(query_job.slot_millis, 1) - - # Make assertions on the shape of the query plan. - plan = query_job.query_plan - self.assertGreaterEqual(len(plan), 3) - first_stage = plan[0] - self.assertIsNotNone(first_stage.start) - self.assertIsNotNone(first_stage.end) - self.assertIsNotNone(first_stage.entry_id) - self.assertIsNotNone(first_stage.name) - self.assertGreater(first_stage.parallel_inputs, 0) - self.assertGreater(first_stage.completed_parallel_inputs, 0) - self.assertGreater(first_stage.shuffle_output_bytes, 0) - self.assertEqual(first_stage.status, "COMPLETE") - - # Query plan is a digraph. Ensure it has inter-stage links, - # but not every stage has inputs. - stages_with_inputs = 0 - for entry in plan: - if len(entry.input_stages) > 0: - stages_with_inputs = stages_with_inputs + 1 - self.assertGreater(stages_with_inputs, 0) - self.assertGreater(len(plan), stages_with_inputs) - def test_dml_statistics(self): table_schema = ( bigquery.SchemaField("foo", "STRING"), @@ -1639,10 +1473,6 @@ def test_dbapi_fetchall_from_script(self): row_tuples = [r.values() for r in rows] self.assertEqual(row_tuples, [(5, "foo"), (6, "bar"), (7, "baz")]) - @unittest.skipIf( - bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" - ) - @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_dbapi_fetch_w_bqstorage_client_large_result_set(self): bqstorage_client = bigquery_storage.BigQueryReadClient( credentials=Config.CLIENT._credentials @@ -1701,9 +1531,6 @@ def test_dbapi_dry_run_query(self): self.assertEqual(list(rows), []) - @unittest.skipIf( - bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" - ) def test_dbapi_connection_does_not_leak_sockets(self): current_process = psutil.Process() conn_count_start = len(current_process.connections()) @@ -1794,207 +1621,6 @@ def test_dbapi_w_dml(self): ) self.assertEqual(Config.CURSOR.rowcount, 1) - def test_query_w_query_params(self): - from google.cloud.bigquery.job import QueryJobConfig - from google.cloud.bigquery.query import ArrayQueryParameter - from google.cloud.bigquery.query import ScalarQueryParameter - from google.cloud.bigquery.query import ScalarQueryParameterType - from google.cloud.bigquery.query import StructQueryParameter - from google.cloud.bigquery.query import StructQueryParameterType - - question = "What is the answer to life, the universe, and everything?" - question_param = ScalarQueryParameter( - name="question", type_="STRING", value=question - ) - answer = 42 - answer_param = ScalarQueryParameter(name="answer", type_="INT64", value=answer) - pi = 3.1415926 - pi_param = ScalarQueryParameter(name="pi", type_="FLOAT64", value=pi) - pi_numeric = decimal.Decimal("3.141592654") - pi_numeric_param = ScalarQueryParameter( - name="pi_numeric_param", type_="NUMERIC", value=pi_numeric - ) - bignum = decimal.Decimal("-{d38}.{d38}".format(d38="9" * 38)) - bignum_param = ScalarQueryParameter( - name="bignum_param", type_="BIGNUMERIC", value=bignum - ) - truthy = True - truthy_param = ScalarQueryParameter(name="truthy", type_="BOOL", value=truthy) - beef = b"DEADBEEF" - beef_param = ScalarQueryParameter(name="beef", type_="BYTES", value=beef) - naive = datetime.datetime(2016, 12, 5, 12, 41, 9) - naive_param = ScalarQueryParameter(name="naive", type_="DATETIME", value=naive) - naive_date_param = ScalarQueryParameter( - name="naive_date", type_="DATE", value=naive.date() - ) - naive_time_param = ScalarQueryParameter( - name="naive_time", type_="TIME", value=naive.time() - ) - zoned = naive.replace(tzinfo=UTC) - zoned_param = ScalarQueryParameter(name="zoned", type_="TIMESTAMP", value=zoned) - array_param = ArrayQueryParameter( - name="array_param", array_type="INT64", values=[1, 2] - ) - struct_param = StructQueryParameter("hitchhiker", question_param, answer_param) - phred_name = "Phred Phlyntstone" - phred_name_param = ScalarQueryParameter( - name="name", type_="STRING", value=phred_name - ) - phred_age = 32 - phred_age_param = ScalarQueryParameter( - name="age", type_="INT64", value=phred_age - ) - phred_param = StructQueryParameter(None, phred_name_param, phred_age_param) - bharney_name = "Bharney Rhubbyl" - bharney_name_param = ScalarQueryParameter( - name="name", type_="STRING", value=bharney_name - ) - bharney_age = 31 - bharney_age_param = ScalarQueryParameter( - name="age", type_="INT64", value=bharney_age - ) - bharney_param = StructQueryParameter( - None, bharney_name_param, bharney_age_param - ) - characters_param = ArrayQueryParameter( - name=None, array_type="RECORD", values=[phred_param, bharney_param] - ) - empty_struct_array_param = ArrayQueryParameter( - name="empty_array_param", - values=[], - array_type=StructQueryParameterType( - ScalarQueryParameterType(name="foo", type_="INT64"), - ScalarQueryParameterType(name="bar", type_="STRING"), - ), - ) - hero_param = StructQueryParameter("hero", phred_name_param, phred_age_param) - sidekick_param = StructQueryParameter( - "sidekick", bharney_name_param, bharney_age_param - ) - roles_param = StructQueryParameter("roles", hero_param, sidekick_param) - friends_param = ArrayQueryParameter( - name="friends", array_type="STRING", values=[phred_name, bharney_name] - ) - with_friends_param = StructQueryParameter(None, friends_param) - top_left_param = StructQueryParameter( - "top_left", - ScalarQueryParameter("x", "INT64", 12), - ScalarQueryParameter("y", "INT64", 102), - ) - bottom_right_param = StructQueryParameter( - "bottom_right", - ScalarQueryParameter("x", "INT64", 22), - ScalarQueryParameter("y", "INT64", 92), - ) - rectangle_param = StructQueryParameter( - "rectangle", top_left_param, bottom_right_param - ) - examples = [ - { - "sql": "SELECT @question", - "expected": question, - "query_parameters": [question_param], - }, - { - "sql": "SELECT @answer", - "expected": answer, - "query_parameters": [answer_param], - }, - {"sql": "SELECT @pi", "expected": pi, "query_parameters": [pi_param]}, - { - "sql": "SELECT @pi_numeric_param", - "expected": pi_numeric, - "query_parameters": [pi_numeric_param], - }, - { - "sql": "SELECT @truthy", - "expected": truthy, - "query_parameters": [truthy_param], - }, - {"sql": "SELECT @beef", "expected": beef, "query_parameters": [beef_param]}, - { - "sql": "SELECT @naive", - "expected": naive, - "query_parameters": [naive_param], - }, - { - "sql": "SELECT @naive_date", - "expected": naive.date(), - "query_parameters": [naive_date_param], - }, - { - "sql": "SELECT @naive_time", - "expected": naive.time(), - "query_parameters": [naive_time_param], - }, - { - "sql": "SELECT @zoned", - "expected": zoned, - "query_parameters": [zoned_param], - }, - { - "sql": "SELECT @array_param", - "expected": [1, 2], - "query_parameters": [array_param], - }, - { - "sql": "SELECT (@hitchhiker.question, @hitchhiker.answer)", - "expected": ({"_field_1": question, "_field_2": answer}), - "query_parameters": [struct_param], - }, - { - "sql": "SELECT " - "((@rectangle.bottom_right.x - @rectangle.top_left.x) " - "* (@rectangle.top_left.y - @rectangle.bottom_right.y))", - "expected": 100, - "query_parameters": [rectangle_param], - }, - { - "sql": "SELECT ?", - "expected": [ - {"name": phred_name, "age": phred_age}, - {"name": bharney_name, "age": bharney_age}, - ], - "query_parameters": [characters_param], - }, - { - "sql": "SELECT @empty_array_param", - "expected": [], - "query_parameters": [empty_struct_array_param], - }, - { - "sql": "SELECT @roles", - "expected": { - "hero": {"name": phred_name, "age": phred_age}, - "sidekick": {"name": bharney_name, "age": bharney_age}, - }, - "query_parameters": [roles_param], - }, - { - "sql": "SELECT ?", - "expected": {"friends": [phred_name, bharney_name]}, - "query_parameters": [with_friends_param], - }, - { - "sql": "SELECT @bignum_param", - "expected": bignum, - "query_parameters": [bignum_param], - }, - ] - - for example in examples: - jconfig = QueryJobConfig() - jconfig.query_parameters = example["query_parameters"] - query_job = Config.CLIENT.query( - example["sql"], - job_config=jconfig, - job_id_prefix="test_query_w_query_params", - ) - rows = list(query_job.result()) - self.assertEqual(len(rows), 1) - self.assertEqual(len(rows[0]), 1) - self.assertEqual(rows[0][0], example["expected"]) - def test_dbapi_w_query_parameters(self): examples = [ { @@ -2194,8 +1820,8 @@ def test_insert_rows_nested_nested_dictionary(self): def test_create_routine(self): routine_name = "test_routine" dataset = self.temp_dataset(_make_dataset_id("create_routine")) - float64_type = bigquery_v2.types.StandardSqlDataType( - type_kind=bigquery_v2.types.StandardSqlDataType.TypeKind.FLOAT64 + float64_type = bigquery.StandardSqlDataType( + type_kind=bigquery.StandardSqlTypeNames.FLOAT64 ) routine = bigquery.Routine( dataset.routine(routine_name), @@ -2209,8 +1835,8 @@ def test_create_routine(self): routine.arguments = [ bigquery.RoutineArgument( name="arr", - data_type=bigquery_v2.types.StandardSqlDataType( - type_kind=bigquery_v2.types.StandardSqlDataType.TypeKind.ARRAY, + data_type=bigquery.StandardSqlDataType( + type_kind=bigquery.StandardSqlTypeNames.ARRAY, array_element_type=float64_type, ), ) @@ -2229,14 +1855,19 @@ def test_create_routine(self): assert rows[0].max_value == 100.0 def test_create_tvf_routine(self): - from google.cloud.bigquery import Routine, RoutineArgument, RoutineType + from google.cloud.bigquery import ( + Routine, + RoutineArgument, + RoutineType, + StandardSqlTypeNames, + ) - StandardSqlDataType = bigquery_v2.types.StandardSqlDataType - StandardSqlField = bigquery_v2.types.StandardSqlField - StandardSqlTableType = bigquery_v2.types.StandardSqlTableType + StandardSqlDataType = bigquery.StandardSqlDataType + StandardSqlField = bigquery.StandardSqlField + StandardSqlTableType = bigquery.StandardSqlTableType - INT64 = StandardSqlDataType.TypeKind.INT64 - STRING = StandardSqlDataType.TypeKind.STRING + INT64 = StandardSqlTypeNames.INT64 + STRING = StandardSqlTypeNames.STRING client = Config.CLIENT @@ -2367,10 +1998,6 @@ def test_create_table_rows_fetch_nested_schema(self): self.assertEqual(found[7], e_favtime) self.assertEqual(found[8], decimal.Decimal(expected["FavoriteNumber"])) - @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") - @unittest.skipIf( - bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" - ) def test_nested_table_to_arrow(self): from google.cloud.bigquery.job import SourceFormat from google.cloud.bigquery.job import WriteDisposition diff --git a/packages/google-cloud-bigquery/tests/system/test_pandas.py b/packages/google-cloud-bigquery/tests/system/test_pandas.py index ab0fb03f49e6..34e4243c4343 100644 --- a/packages/google-cloud-bigquery/tests/system/test_pandas.py +++ b/packages/google-cloud-bigquery/tests/system/test_pandas.py @@ -25,17 +25,16 @@ import google.api_core.retry import pkg_resources import pytest -import numpy from google.cloud import bigquery +from google.cloud import bigquery_storage +from google.cloud.bigquery import enums + from . import helpers -bigquery_storage = pytest.importorskip( - "google.cloud.bigquery_storage", minversion="2.0.0" -) pandas = pytest.importorskip("pandas", minversion="0.23.0") -pyarrow = pytest.importorskip("pyarrow", minversion="1.0.0") +numpy = pytest.importorskip("numpy") PANDAS_INSTALLED_VERSION = pkg_resources.get_distribution("pandas").parsed_version @@ -67,7 +66,7 @@ def test_load_table_from_dataframe_w_automatic_schema(bigquery_client, dataset_i ).dt.tz_localize(datetime.timezone.utc), ), ( - "dt_col", + "dt_col_no_tz", pandas.Series( [ datetime.datetime(2010, 1, 2, 3, 44, 50), @@ -86,6 +85,28 @@ def test_load_table_from_dataframe_w_automatic_schema(bigquery_client, dataset_i ("uint8_col", pandas.Series([0, 1, 2], dtype="uint8")), ("uint16_col", pandas.Series([3, 4, 5], dtype="uint16")), ("uint32_col", pandas.Series([6, 7, 8], dtype="uint32")), + ( + "date_col", + pandas.Series( + [ + datetime.date(2010, 1, 2), + datetime.date(2011, 2, 3), + datetime.date(2012, 3, 14), + ], + dtype="dbdate", + ), + ), + ( + "time_col", + pandas.Series( + [ + datetime.time(3, 44, 50), + datetime.time(14, 50, 59), + datetime.time(15, 16), + ], + dtype="dbtime", + ), + ), ("array_bool_col", pandas.Series([[True], [False], [True]])), ( "array_ts_col", @@ -110,7 +131,7 @@ def test_load_table_from_dataframe_w_automatic_schema(bigquery_client, dataset_i ), ), ( - "array_dt_col", + "array_dt_col_no_tz", pandas.Series( [ [datetime.datetime(2010, 1, 2, 3, 44, 50)], @@ -176,9 +197,7 @@ def test_load_table_from_dataframe_w_automatic_schema(bigquery_client, dataset_i assert tuple(table.schema) == ( bigquery.SchemaField("bool_col", "BOOLEAN"), bigquery.SchemaField("ts_col", "TIMESTAMP"), - # TODO: Update to DATETIME in V3 - # https://github.com/googleapis/python-bigquery/issues/985 - bigquery.SchemaField("dt_col", "TIMESTAMP"), + bigquery.SchemaField("dt_col_no_tz", "DATETIME"), bigquery.SchemaField("float32_col", "FLOAT"), bigquery.SchemaField("float64_col", "FLOAT"), bigquery.SchemaField("int8_col", "INTEGER"), @@ -188,11 +207,11 @@ def test_load_table_from_dataframe_w_automatic_schema(bigquery_client, dataset_i bigquery.SchemaField("uint8_col", "INTEGER"), bigquery.SchemaField("uint16_col", "INTEGER"), bigquery.SchemaField("uint32_col", "INTEGER"), + bigquery.SchemaField("date_col", "DATE"), + bigquery.SchemaField("time_col", "TIME"), bigquery.SchemaField("array_bool_col", "BOOLEAN", mode="REPEATED"), bigquery.SchemaField("array_ts_col", "TIMESTAMP", mode="REPEATED"), - # TODO: Update to DATETIME in V3 - # https://github.com/googleapis/python-bigquery/issues/985 - bigquery.SchemaField("array_dt_col", "TIMESTAMP", mode="REPEATED"), + bigquery.SchemaField("array_dt_col_no_tz", "DATETIME", mode="REPEATED"), bigquery.SchemaField("array_float32_col", "FLOAT", mode="REPEATED"), bigquery.SchemaField("array_float64_col", "FLOAT", mode="REPEATED"), bigquery.SchemaField("array_int8_col", "INTEGER", mode="REPEATED"), @@ -203,7 +222,84 @@ def test_load_table_from_dataframe_w_automatic_schema(bigquery_client, dataset_i bigquery.SchemaField("array_uint16_col", "INTEGER", mode="REPEATED"), bigquery.SchemaField("array_uint32_col", "INTEGER", mode="REPEATED"), ) - assert table.num_rows == 3 + + assert numpy.array( + sorted(map(list, bigquery_client.list_rows(table)), key=lambda r: r[5]), + dtype="object", + ).transpose().tolist() == [ + # bool_col + [True, False, True], + # ts_col + [ + datetime.datetime(2010, 1, 2, 3, 44, 50, tzinfo=datetime.timezone.utc), + datetime.datetime(2011, 2, 3, 14, 50, 59, tzinfo=datetime.timezone.utc), + datetime.datetime(2012, 3, 14, 15, 16, tzinfo=datetime.timezone.utc), + ], + # dt_col_no_tz + [ + datetime.datetime(2010, 1, 2, 3, 44, 50), + datetime.datetime(2011, 2, 3, 14, 50, 59), + datetime.datetime(2012, 3, 14, 15, 16), + ], + # float32_col + [1.0, 2.0, 3.0], + # float64_col + [4.0, 5.0, 6.0], + # int8_col + [-12, -11, -10], + # int16_col + [-9, -8, -7], + # int32_col + [-6, -5, -4], + # int64_col + [-3, -2, -1], + # uint8_col + [0, 1, 2], + # uint16_col + [3, 4, 5], + # uint32_col + [6, 7, 8], + # date_col + [ + datetime.date(2010, 1, 2), + datetime.date(2011, 2, 3), + datetime.date(2012, 3, 14), + ], + # time_col + [datetime.time(3, 44, 50), datetime.time(14, 50, 59), datetime.time(15, 16)], + # array_bool_col + [[True], [False], [True]], + # array_ts_col + [ + [datetime.datetime(2010, 1, 2, 3, 44, 50, tzinfo=datetime.timezone.utc)], + [datetime.datetime(2011, 2, 3, 14, 50, 59, tzinfo=datetime.timezone.utc)], + [datetime.datetime(2012, 3, 14, 15, 16, tzinfo=datetime.timezone.utc)], + ], + # array_dt_col + [ + [datetime.datetime(2010, 1, 2, 3, 44, 50)], + [datetime.datetime(2011, 2, 3, 14, 50, 59)], + [datetime.datetime(2012, 3, 14, 15, 16)], + ], + # array_float32_col + [[1.0], [2.0], [3.0]], + # array_float64_col + [[4.0], [5.0], [6.0]], + # array_int8_col + [[-12], [-11], [-10]], + # array_int16_col + [[-9], [-8], [-7]], + # array_int32_col + [[-6], [-5], [-4]], + # array_int64_col + [[-3], [-2], [-1]], + # array_uint8_col + [[0], [1], [2]], + # array_uint16_col + [[3], [4], [5]], + # array_uint32_col + [[6], [7], [8]], + ] @pytest.mark.skipif( @@ -660,7 +756,7 @@ def test_query_results_to_dataframe(bigquery_client): for _, row in df.iterrows(): for col in column_names: # all the schema fields are nullable, so None is acceptable - if not row[col] is None: + if not pandas.isna(row[col]): assert isinstance(row[col], exp_datatypes[col]) @@ -690,7 +786,7 @@ def test_query_results_to_dataframe_w_bqstorage(bigquery_client): for index, row in df.iterrows(): for col in column_names: # all the schema fields are nullable, so None is acceptable - if not row[col] is None: + if not pandas.isna(row[col]): assert isinstance(row[col], exp_datatypes[col]) @@ -701,6 +797,8 @@ def test_insert_rows_from_dataframe(bigquery_client, dataset_id): SF("int_col", "INTEGER", mode="REQUIRED"), SF("bool_col", "BOOLEAN", mode="REQUIRED"), SF("string_col", "STRING", mode="NULLABLE"), + SF("date_col", "DATE", mode="NULLABLE"), + SF("time_col", "TIME", mode="NULLABLE"), ] dataframe = pandas.DataFrame( @@ -710,30 +808,40 @@ def test_insert_rows_from_dataframe(bigquery_client, dataset_id): "bool_col": True, "string_col": "my string", "int_col": 10, + "date_col": datetime.date(2021, 1, 1), + "time_col": datetime.time(21, 1, 1), }, { "float_col": 2.22, "bool_col": False, "string_col": "another string", "int_col": 20, + "date_col": datetime.date(2021, 1, 2), + "time_col": datetime.time(21, 1, 2), }, { "float_col": 3.33, "bool_col": False, "string_col": "another string", "int_col": 30, + "date_col": datetime.date(2021, 1, 3), + "time_col": datetime.time(21, 1, 3), }, { "float_col": 4.44, "bool_col": True, "string_col": "another string", "int_col": 40, + "date_col": datetime.date(2021, 1, 4), + "time_col": datetime.time(21, 1, 4), }, { "float_col": 5.55, "bool_col": False, "string_col": "another string", "int_col": 50, + "date_col": datetime.date(2021, 1, 5), + "time_col": datetime.time(21, 1, 5), }, { "float_col": 6.66, @@ -742,9 +850,13 @@ def test_insert_rows_from_dataframe(bigquery_client, dataset_id): # NULL value indicator. "string_col": float("NaN"), "int_col": 60, + "date_col": datetime.date(2021, 1, 6), + "time_col": datetime.time(21, 1, 6), }, ] ) + dataframe["date_col"] = dataframe["date_col"].astype("dbdate") + dataframe["time_col"] = dataframe["time_col"].astype("dbtime") table_id = f"{bigquery_client.project}.{dataset_id}.test_insert_rows_from_dataframe" table_arg = bigquery.Table(table_id, schema=schema) @@ -890,6 +1002,110 @@ def test_list_rows_max_results_w_bqstorage(bigquery_client): assert len(dataframe.index) == 100 +@pytest.mark.parametrize( + ("max_results",), + ( + (None,), + (10,), + ), # Use BQ Storage API. # Use REST API. +) +def test_list_rows_nullable_scalars_dtypes(bigquery_client, scalars_table, max_results): + # TODO(GH#836): Avoid INTERVAL columns until they are supported by the + # BigQuery Storage API and pyarrow. + schema = [ + bigquery.SchemaField("bool_col", enums.SqlTypeNames.BOOLEAN), + bigquery.SchemaField("bignumeric_col", enums.SqlTypeNames.BIGNUMERIC), + bigquery.SchemaField("bytes_col", enums.SqlTypeNames.BYTES), + bigquery.SchemaField("date_col", enums.SqlTypeNames.DATE), + bigquery.SchemaField("datetime_col", enums.SqlTypeNames.DATETIME), + bigquery.SchemaField("float64_col", enums.SqlTypeNames.FLOAT64), + bigquery.SchemaField("geography_col", enums.SqlTypeNames.GEOGRAPHY), + bigquery.SchemaField("int64_col", enums.SqlTypeNames.INT64), + bigquery.SchemaField("numeric_col", enums.SqlTypeNames.NUMERIC), + bigquery.SchemaField("string_col", enums.SqlTypeNames.STRING), + bigquery.SchemaField("time_col", enums.SqlTypeNames.TIME), + bigquery.SchemaField("timestamp_col", enums.SqlTypeNames.TIMESTAMP), + ] + + df = bigquery_client.list_rows( + scalars_table, + max_results=max_results, + selected_fields=schema, + ).to_dataframe() + + assert df.dtypes["bool_col"].name == "boolean" + assert df.dtypes["datetime_col"].name == "datetime64[ns]" + assert df.dtypes["float64_col"].name == "float64" + assert df.dtypes["int64_col"].name == "Int64" + assert df.dtypes["timestamp_col"].name == "datetime64[ns, UTC]" + assert df.dtypes["date_col"].name == "dbdate" + assert df.dtypes["time_col"].name == "dbtime" + + # decimal.Decimal is used to avoid loss of precision. + assert df.dtypes["bignumeric_col"].name == "object" + assert df.dtypes["numeric_col"].name == "object" + + # pandas uses Python string and bytes objects. + assert df.dtypes["bytes_col"].name == "object" + assert df.dtypes["string_col"].name == "object" + + +@pytest.mark.parametrize( + ("max_results",), + ( + (None,), + (10,), + ), # Use BQ Storage API. # Use REST API. +) +def test_list_rows_nullable_scalars_extreme_dtypes( + bigquery_client, scalars_extreme_table, max_results +): + # TODO(GH#836): Avoid INTERVAL columns until they are supported by the + # BigQuery Storage API and pyarrow. + schema = [ + bigquery.SchemaField("bool_col", enums.SqlTypeNames.BOOLEAN), + bigquery.SchemaField("bignumeric_col", enums.SqlTypeNames.BIGNUMERIC), + bigquery.SchemaField("bytes_col", enums.SqlTypeNames.BYTES), + bigquery.SchemaField("date_col", enums.SqlTypeNames.DATE), + bigquery.SchemaField("datetime_col", enums.SqlTypeNames.DATETIME), + bigquery.SchemaField("float64_col", enums.SqlTypeNames.FLOAT64), + bigquery.SchemaField("geography_col", enums.SqlTypeNames.GEOGRAPHY), + bigquery.SchemaField("int64_col", enums.SqlTypeNames.INT64), + bigquery.SchemaField("numeric_col", enums.SqlTypeNames.NUMERIC), + bigquery.SchemaField("string_col", enums.SqlTypeNames.STRING), + bigquery.SchemaField("time_col", enums.SqlTypeNames.TIME), + bigquery.SchemaField("timestamp_col", enums.SqlTypeNames.TIMESTAMP), + ] + + df = bigquery_client.list_rows( + scalars_extreme_table, + max_results=max_results, + selected_fields=schema, + ).to_dataframe() + + # Extreme values are out-of-bounds for pandas datetime64 values, which use + # nanosecond precision. Values before 1677-09-21 and after 2262-04-11 must + # be represented with object. + # https://pandas.pydata.org/pandas-docs/stable/user_guide/timeseries.html#timestamp-limitations + assert df.dtypes["date_col"].name == "object" + assert df.dtypes["datetime_col"].name == "object" + assert df.dtypes["timestamp_col"].name == "object" + + # These pandas dtypes can handle the same ranges as BigQuery. + assert df.dtypes["bool_col"].name == "boolean" + assert df.dtypes["float64_col"].name == "float64" + assert df.dtypes["int64_col"].name == "Int64" + assert df.dtypes["time_col"].name == "dbtime" + + # decimal.Decimal is used to avoid loss of precision. + assert df.dtypes["numeric_col"].name == "object" + assert df.dtypes["bignumeric_col"].name == "object" + + # pandas uses Python string and bytes objects. + assert df.dtypes["bytes_col"].name == "object" + assert df.dtypes["string_col"].name == "object" + + def test_upload_time_and_datetime_56(bigquery_client, dataset_id): df = pandas.DataFrame( dict( diff --git a/packages/google-cloud-bigquery/tests/system/test_query.py b/packages/google-cloud-bigquery/tests/system/test_query.py index c402f66bab3d..723f927d7d92 100644 --- a/packages/google-cloud-bigquery/tests/system/test_query.py +++ b/packages/google-cloud-bigquery/tests/system/test_query.py @@ -12,17 +12,437 @@ # See the License for the specific language governing permissions and # limitations under the License. +import concurrent.futures +import datetime +import decimal +from typing import Tuple + +from google.api_core import exceptions +import pytest + from google.cloud import bigquery +from google.cloud.bigquery.query import ArrayQueryParameter +from google.cloud.bigquery.query import ScalarQueryParameter +from google.cloud.bigquery.query import ScalarQueryParameterType +from google.cloud.bigquery.query import StructQueryParameter +from google.cloud.bigquery.query import StructQueryParameterType + + +@pytest.fixture(params=["INSERT", "QUERY"]) +def query_api_method(request): + return request.param + + +@pytest.fixture(scope="session") +def table_with_9999_columns_10_rows(bigquery_client, project_id, dataset_id): + """Generate a table of maximum width via CREATE TABLE AS SELECT. + + The first column is named 'rowval', and has a value from 1..rowcount + Subsequent columns are named col_ and contain the value N*rowval, where + N is between 1 and 9999 inclusive. + """ + table_id = "many_columns" + row_count = 10 + col_projections = ",".join(f"r * {n} as col_{n}" for n in range(1, 10000)) + sql = f""" + CREATE TABLE `{project_id}.{dataset_id}.{table_id}` + AS + SELECT + r as rowval, + {col_projections} + FROM + UNNEST(GENERATE_ARRAY(1,{row_count},1)) as r + """ + query_job = bigquery_client.query(sql) + query_job.result() + + return f"{project_id}.{dataset_id}.{table_id}" + + +def test_query_many_columns( + bigquery_client, table_with_9999_columns_10_rows, query_api_method +): + # Test working with the widest schema BigQuery supports, 10k columns. + query_job = bigquery_client.query( + f"SELECT * FROM `{table_with_9999_columns_10_rows}`", + api_method=query_api_method, + ) + rows = list(query_job) + assert len(rows) == 10 + + # check field representations adhere to expected values. + for row in rows: + rowval = row["rowval"] + for column in range(1, 10000): + assert row[f"col_{column}"] == rowval * column + + +def test_query_w_timeout(bigquery_client, query_api_method): + job_config = bigquery.QueryJobConfig() + job_config.use_query_cache = False + + query_job = bigquery_client.query( + "SELECT * FROM `bigquery-public-data.github_repos.commits`;", + location="US", + job_config=job_config, + api_method=query_api_method, + ) + + with pytest.raises(concurrent.futures.TimeoutError): + query_job.result(timeout=1) + + # Even though the query takes >1 second, the call to getQueryResults + # should succeed. + assert not query_job.done(timeout=1) + assert bigquery_client.cancel_job(query_job) is not None + + +def test_query_statistics(bigquery_client, query_api_method): + """ + A system test to exercise some of the extended query statistics. + Note: We construct a query that should need at least three stages by + specifying a JOIN query. Exact plan and stats are effectively + non-deterministic, so we're largely interested in confirming values + are present. + """ + + job_config = bigquery.QueryJobConfig() + job_config.use_query_cache = False + + query_job = bigquery_client.query( + """ + SELECT + COUNT(1) + FROM + ( + SELECT + year, + wban_number + FROM `bigquery-public-data.samples.gsod` + LIMIT 1000 + ) lside + INNER JOIN + ( + SELECT + year, + state + FROM `bigquery-public-data.samples.natality` + LIMIT 1000 + ) rside + ON + lside.year = rside.year + """, + location="US", + job_config=job_config, + api_method=query_api_method, + ) + + # run the job to completion + query_job.result() + + # Must reload job to get stats if jobs.query was used. + if query_api_method == "QUERY": + query_job.reload() + + # Assert top-level stats + assert not query_job.cache_hit + assert query_job.destination is not None + assert query_job.done + assert not query_job.dry_run + assert query_job.num_dml_affected_rows is None + assert query_job.priority == "INTERACTIVE" + assert query_job.total_bytes_billed > 1 + assert query_job.total_bytes_processed > 1 + assert query_job.statement_type == "SELECT" + assert query_job.slot_millis > 1 + + # Make assertions on the shape of the query plan. + plan = query_job.query_plan + assert len(plan) >= 3 + first_stage = plan[0] + assert first_stage.start is not None + assert first_stage.end is not None + assert first_stage.entry_id is not None + assert first_stage.name is not None + assert first_stage.parallel_inputs > 0 + assert first_stage.completed_parallel_inputs > 0 + assert first_stage.shuffle_output_bytes > 0 + assert first_stage.status == "COMPLETE" + + # Query plan is a digraph. Ensure it has inter-stage links, + # but not every stage has inputs. + stages_with_inputs = 0 + for entry in plan: + if len(entry.input_stages) > 0: + stages_with_inputs = stages_with_inputs + 1 + assert stages_with_inputs > 0 + assert len(plan) > stages_with_inputs + + +@pytest.mark.parametrize( + ("sql", "expected", "query_parameters"), + ( + ( + "SELECT @question", + "What is the answer to life, the universe, and everything?", + [ + ScalarQueryParameter( + name="question", + type_="STRING", + value="What is the answer to life, the universe, and everything?", + ) + ], + ), + ( + "SELECT @answer", + 42, + [ScalarQueryParameter(name="answer", type_="INT64", value=42)], + ), + ( + "SELECT @pi", + 3.1415926, + [ScalarQueryParameter(name="pi", type_="FLOAT64", value=3.1415926)], + ), + ( + "SELECT @pi_numeric_param", + decimal.Decimal("3.141592654"), + [ + ScalarQueryParameter( + name="pi_numeric_param", + type_="NUMERIC", + value=decimal.Decimal("3.141592654"), + ) + ], + ), + ( + "SELECT @bignum_param", + decimal.Decimal("-{d38}.{d38}".format(d38="9" * 38)), + [ + ScalarQueryParameter( + name="bignum_param", + type_="BIGNUMERIC", + value=decimal.Decimal("-{d38}.{d38}".format(d38="9" * 38)), + ) + ], + ), + ( + "SELECT @truthy", + True, + [ScalarQueryParameter(name="truthy", type_="BOOL", value=True)], + ), + ( + "SELECT @beef", + b"DEADBEEF", + [ScalarQueryParameter(name="beef", type_="BYTES", value=b"DEADBEEF")], + ), + ( + "SELECT @naive", + datetime.datetime(2016, 12, 5, 12, 41, 9), + [ + ScalarQueryParameter( + name="naive", + type_="DATETIME", + value=datetime.datetime(2016, 12, 5, 12, 41, 9), + ) + ], + ), + ( + "SELECT @naive_date", + datetime.date(2016, 12, 5), + [ + ScalarQueryParameter( + name="naive_date", type_="DATE", value=datetime.date(2016, 12, 5) + ) + ], + ), + ( + "SELECT @naive_time", + datetime.time(12, 41, 9, 62500), + [ + ScalarQueryParameter( + name="naive_time", + type_="TIME", + value=datetime.time(12, 41, 9, 62500), + ) + ], + ), + ( + "SELECT @zoned", + datetime.datetime(2016, 12, 5, 12, 41, 9, tzinfo=datetime.timezone.utc), + [ + ScalarQueryParameter( + name="zoned", + type_="TIMESTAMP", + value=datetime.datetime( + 2016, 12, 5, 12, 41, 9, tzinfo=datetime.timezone.utc + ), + ) + ], + ), + ( + "SELECT @array_param", + [1, 2], + [ + ArrayQueryParameter( + name="array_param", array_type="INT64", values=[1, 2] + ) + ], + ), + ( + "SELECT (@hitchhiker.question, @hitchhiker.answer)", + ({"_field_1": "What is the answer?", "_field_2": 42}), + [ + StructQueryParameter( + "hitchhiker", + ScalarQueryParameter( + name="question", + type_="STRING", + value="What is the answer?", + ), + ScalarQueryParameter( + name="answer", + type_="INT64", + value=42, + ), + ), + ], + ), + ( + "SELECT " + "((@rectangle.bottom_right.x - @rectangle.top_left.x) " + "* (@rectangle.top_left.y - @rectangle.bottom_right.y))", + 100, + [ + StructQueryParameter( + "rectangle", + StructQueryParameter( + "top_left", + ScalarQueryParameter("x", "INT64", 12), + ScalarQueryParameter("y", "INT64", 102), + ), + StructQueryParameter( + "bottom_right", + ScalarQueryParameter("x", "INT64", 22), + ScalarQueryParameter("y", "INT64", 92), + ), + ) + ], + ), + ( + "SELECT ?", + [ + {"name": "Phred Phlyntstone", "age": 32}, + {"name": "Bharney Rhubbyl", "age": 31}, + ], + [ + ArrayQueryParameter( + name=None, + array_type="RECORD", + values=[ + StructQueryParameter( + None, + ScalarQueryParameter( + name="name", type_="STRING", value="Phred Phlyntstone" + ), + ScalarQueryParameter(name="age", type_="INT64", value=32), + ), + StructQueryParameter( + None, + ScalarQueryParameter( + name="name", type_="STRING", value="Bharney Rhubbyl" + ), + ScalarQueryParameter(name="age", type_="INT64", value=31), + ), + ], + ) + ], + ), + ( + "SELECT @empty_array_param", + [], + [ + ArrayQueryParameter( + name="empty_array_param", + values=[], + array_type=StructQueryParameterType( + ScalarQueryParameterType(name="foo", type_="INT64"), + ScalarQueryParameterType(name="bar", type_="STRING"), + ), + ) + ], + ), + ( + "SELECT @roles", + { + "hero": {"name": "Phred Phlyntstone", "age": 32}, + "sidekick": {"name": "Bharney Rhubbyl", "age": 31}, + }, + [ + StructQueryParameter( + "roles", + StructQueryParameter( + "hero", + ScalarQueryParameter( + name="name", type_="STRING", value="Phred Phlyntstone" + ), + ScalarQueryParameter(name="age", type_="INT64", value=32), + ), + StructQueryParameter( + "sidekick", + ScalarQueryParameter( + name="name", type_="STRING", value="Bharney Rhubbyl" + ), + ScalarQueryParameter(name="age", type_="INT64", value=31), + ), + ), + ], + ), + ( + "SELECT ?", + {"friends": ["Jack", "Jill"]}, + [ + StructQueryParameter( + None, + ArrayQueryParameter( + name="friends", array_type="STRING", values=["Jack", "Jill"] + ), + ) + ], + ), + ), +) +def test_query_parameters( + bigquery_client, query_api_method, sql, expected, query_parameters +): + jconfig = bigquery.QueryJobConfig() + jconfig.query_parameters = query_parameters + query_job = bigquery_client.query( + sql, + job_config=jconfig, + api_method=query_api_method, + ) + rows = list(query_job.result()) + assert len(rows) == 1 + assert len(rows[0]) == 1 + assert rows[0][0] == expected -def test_dry_run(bigquery_client: bigquery.Client, scalars_table: str): + +def test_dry_run( + bigquery_client: bigquery.Client, + query_api_method: str, + scalars_table_multi_location: Tuple[str, str], +): + location, full_table_id = scalars_table_multi_location query_config = bigquery.QueryJobConfig() query_config.dry_run = True - query_string = f"SELECT * FROM {scalars_table}" + query_string = f"SELECT * FROM {full_table_id}" query_job = bigquery_client.query( query_string, + location=location, job_config=query_config, + api_method=query_api_method, ) # Note: `query_job.result()` is not necessary on a dry run query. All @@ -32,7 +452,30 @@ def test_dry_run(bigquery_client: bigquery.Client, scalars_table: str): assert len(query_job.schema) > 0 -def test_session(bigquery_client: bigquery.Client): +def test_query_error_w_api_method_query(bigquery_client: bigquery.Client): + """No job is returned from jobs.query if the query fails.""" + + with pytest.raises(exceptions.NotFound, match="not_a_real_dataset"): + bigquery_client.query( + "SELECT * FROM not_a_real_dataset.doesnt_exist", api_method="QUERY" + ) + + +def test_query_error_w_api_method_default(bigquery_client: bigquery.Client): + """Test that an exception is not thrown until fetching the results. + + For backwards compatibility, jobs.insert is the default API method. With + jobs.insert, a failed query job is "sucessfully" created. An exception is + thrown when fetching the results. + """ + + query_job = bigquery_client.query("SELECT * FROM not_a_real_dataset.doesnt_exist") + + with pytest.raises(exceptions.NotFound, match="not_a_real_dataset"): + query_job.result() + + +def test_session(bigquery_client: bigquery.Client, query_api_method: str): initial_config = bigquery.QueryJobConfig() initial_config.create_session = True initial_query = """ @@ -40,7 +483,9 @@ def test_session(bigquery_client: bigquery.Client): AS SELECT * FROM UNNEST([1, 2, 3, 4, 5]) AS id; """ - initial_job = bigquery_client.query(initial_query, job_config=initial_config) + initial_job = bigquery_client.query( + initial_query, job_config=initial_config, api_method=query_api_method + ) initial_job.result() session_id = initial_job.session_info.session_id assert session_id is not None diff --git a/packages/google-cloud-bigquery/tests/unit/enums/__init__.py b/packages/google-cloud-bigquery/tests/unit/enums/__init__.py deleted file mode 100644 index c5cce043083c..000000000000 --- a/packages/google-cloud-bigquery/tests/unit/enums/__init__.py +++ /dev/null @@ -1,13 +0,0 @@ -# Copyright 2019, Google LLC All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. diff --git a/packages/google-cloud-bigquery/tests/unit/enums/test_standard_sql_data_types.py b/packages/google-cloud-bigquery/tests/unit/enums/test_standard_sql_data_types.py deleted file mode 100644 index 7f62c46fd34d..000000000000 --- a/packages/google-cloud-bigquery/tests/unit/enums/test_standard_sql_data_types.py +++ /dev/null @@ -1,76 +0,0 @@ -# Copyright 2019 Google LLC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import pytest - - -@pytest.fixture -def module_under_test(): - from google.cloud.bigquery import enums - - return enums - - -@pytest.fixture -def enum_under_test(): - from google.cloud.bigquery.enums import StandardSqlDataTypes - - return StandardSqlDataTypes - - -@pytest.fixture -def gapic_enum(): - """The referential autogenerated enum the enum under test is based on.""" - from google.cloud.bigquery_v2.types import StandardSqlDataType - - return StandardSqlDataType.TypeKind - - -def test_all_gapic_enum_members_are_known(module_under_test, gapic_enum): - gapic_names = set(type_.name for type_ in gapic_enum) - anticipated_names = ( - module_under_test._SQL_SCALAR_TYPES | module_under_test._SQL_NONSCALAR_TYPES - ) - assert not (gapic_names - anticipated_names) # no unhandled names - - -def test_standard_sql_types_enum_members(enum_under_test, gapic_enum): - # check the presence of a few typical SQL types - for name in ("INT64", "FLOAT64", "DATE", "BOOL", "GEOGRAPHY"): - assert name in enum_under_test.__members__ - - # the enum members must match those in the original gapic enum - for member in enum_under_test: - assert member.name in gapic_enum.__members__ - assert member.value == gapic_enum[member.name].value - - # check a few members that should *not* be copied over from the gapic enum - for name in ("STRUCT", "ARRAY"): - assert name in gapic_enum.__members__ - assert name not in enum_under_test.__members__ - - -@pytest.mark.skip(reason="Code generator issue, the docstring is not generated.") -def test_standard_sql_types_enum_docstring( - enum_under_test, gapic_enum -): # pragma: NO COVER - assert "STRUCT (int):" not in enum_under_test.__doc__ - assert "BOOL (int):" in enum_under_test.__doc__ - assert "TIME (int):" in enum_under_test.__doc__ - - # All lines in the docstring should actually come from the original docstring, - # except for the header. - assert "An Enum of scalar SQL types." in enum_under_test.__doc__ - doc_lines = enum_under_test.__doc__.splitlines() - assert set(doc_lines[1:]) <= set(gapic_enum.__doc__.splitlines()) diff --git a/packages/google-cloud-bigquery/tests/unit/job/test_query_pandas.py b/packages/google-cloud-bigquery/tests/unit/job/test_query_pandas.py index 775c5a3026d8..84aab3aca597 100644 --- a/packages/google-cloud-bigquery/tests/unit/job/test_query_pandas.py +++ b/packages/google-cloud-bigquery/tests/unit/job/test_query_pandas.py @@ -17,8 +17,13 @@ import json import mock +import pyarrow import pytest +from google.cloud import bigquery_storage +import google.cloud.bigquery_storage_v1.reader +import google.cloud.bigquery_storage_v1.services.big_query_read.client + try: import pandas except (ImportError, AttributeError): # pragma: NO COVER @@ -31,24 +36,16 @@ import geopandas except (ImportError, AttributeError): # pragma: NO COVER geopandas = None -try: - from google.cloud import bigquery_storage -except (ImportError, AttributeError): # pragma: NO COVER - bigquery_storage = None try: from tqdm import tqdm except (ImportError, AttributeError): # pragma: NO COVER tqdm = None -from google.cloud.bigquery import _helpers - from ..helpers import make_connection - from .helpers import _make_client from .helpers import _make_job_resource - -pyarrow = _helpers.PYARROW_VERSIONS.try_import() +pandas = pytest.importorskip("pandas") @pytest.fixture @@ -92,10 +89,6 @@ def test__contains_order_by(query, expected): assert not mut._contains_order_by(query) -@pytest.mark.skipif(pandas is None, reason="Requires `pandas`") -@pytest.mark.skipif( - bigquery_storage is None, reason="Requires `google-cloud-bigquery-storage`" -) @pytest.mark.parametrize( "query", ( @@ -116,7 +109,7 @@ def test_to_dataframe_bqstorage_preserve_order(query, table_read_options_kwarg): ) job_resource["configuration"]["query"]["query"] = query job_resource["status"] = {"state": "DONE"} - get_query_results_resource = { + query_resource = { "jobComplete": True, "jobReference": {"projectId": "test-project", "jobId": "test-job"}, "schema": { @@ -127,25 +120,48 @@ def test_to_dataframe_bqstorage_preserve_order(query, table_read_options_kwarg): }, "totalRows": "4", } - connection = make_connection(get_query_results_resource, job_resource) + stream_id = "projects/1/locations/2/sessions/3/streams/4" + name_array = pyarrow.array( + ["John", "Paul", "George", "Ringo"], type=pyarrow.string() + ) + age_array = pyarrow.array([17, 24, 21, 15], type=pyarrow.int64()) + arrow_schema = pyarrow.schema( + [ + pyarrow.field("name", pyarrow.string(), True), + pyarrow.field("age", pyarrow.int64(), True), + ] + ) + record_batch = pyarrow.RecordBatch.from_arrays( + [name_array, age_array], schema=arrow_schema + ) + connection = make_connection(query_resource) client = _make_client(connection=connection) job = target_class.from_api_repr(job_resource, client) bqstorage_client = mock.create_autospec(bigquery_storage.BigQueryReadClient) session = bigquery_storage.types.ReadSession() - session.avro_schema.schema = json.dumps( - { - "type": "record", - "name": "__root__", - "fields": [ - {"name": "name", "type": ["null", "string"]}, - {"name": "age", "type": ["null", "long"]}, - ], - } + session.arrow_schema.serialized_schema = arrow_schema.serialize().to_pybytes() + session.streams = [bigquery_storage.types.ReadStream(name=stream_id)] + reader = mock.create_autospec( + google.cloud.bigquery_storage_v1.reader.ReadRowsStream, instance=True + ) + row_iterable = mock.create_autospec( + google.cloud.bigquery_storage_v1.reader.ReadRowsIterable, instance=True + ) + page = mock.create_autospec( + google.cloud.bigquery_storage_v1.reader.ReadRowsPage, instance=True + ) + page.to_arrow.return_value = record_batch + type(row_iterable).pages = mock.PropertyMock(return_value=[page]) + reader.rows.return_value = row_iterable + bqstorage_client = mock.create_autospec( + bigquery_storage.BigQueryReadClient, instance=True ) bqstorage_client.create_read_session.return_value = session + bqstorage_client.read_rows.return_value = reader - job.to_dataframe(bqstorage_client=bqstorage_client) + dataframe = job.to_dataframe(bqstorage_client=bqstorage_client) + assert len(dataframe) == 4 destination_table = ( "projects/{projectId}/datasets/{datasetId}/tables/{tableId}".format( **job_resource["configuration"]["query"]["destinationTable"] @@ -163,7 +179,6 @@ def test_to_dataframe_bqstorage_preserve_order(query, table_read_options_kwarg): ) -@pytest.mark.skipif(pyarrow is None, reason="Requires `pyarrow`") def test_to_arrow(): from google.cloud.bigquery.job import QueryJob as target_class @@ -250,7 +265,6 @@ def test_to_arrow(): ] -@pytest.mark.skipif(pyarrow is None, reason="Requires `pyarrow`") def test_to_arrow_max_results_no_progress_bar(): from google.cloud.bigquery import table from google.cloud.bigquery.job import QueryJob as target_class @@ -286,7 +300,6 @@ def test_to_arrow_max_results_no_progress_bar(): assert tbl.num_rows == 2 -@pytest.mark.skipif(pyarrow is None, reason="Requires `pyarrow`") @pytest.mark.skipif(tqdm is None, reason="Requires `tqdm`") def test_to_arrow_w_tqdm_w_query_plan(): from google.cloud.bigquery import table @@ -343,7 +356,6 @@ def test_to_arrow_w_tqdm_w_query_plan(): ) -@pytest.mark.skipif(pyarrow is None, reason="Requires `pyarrow`") @pytest.mark.skipif(tqdm is None, reason="Requires `tqdm`") def test_to_arrow_w_tqdm_w_pending_status(): from google.cloud.bigquery import table @@ -396,7 +408,6 @@ def test_to_arrow_w_tqdm_w_pending_status(): ) -@pytest.mark.skipif(pyarrow is None, reason="Requires `pyarrow`") @pytest.mark.skipif(tqdm is None, reason="Requires `tqdm`") def test_to_arrow_w_tqdm_wo_query_plan(): from google.cloud.bigquery import table @@ -480,7 +491,6 @@ def test_to_dataframe(): assert list(df) == ["name", "age"] # verify the column names -@pytest.mark.skipif(pandas is None, reason="Requires `pandas`") def test_to_dataframe_ddl_query(): from google.cloud.bigquery.job import QueryJob as target_class @@ -500,10 +510,6 @@ def test_to_dataframe_ddl_query(): assert len(df) == 0 -@pytest.mark.skipif(pandas is None, reason="Requires `pandas`") -@pytest.mark.skipif( - bigquery_storage is None, reason="Requires `google-cloud-bigquery-storage`" -) def test_to_dataframe_bqstorage(table_read_options_kwarg): from google.cloud.bigquery.job import QueryJob as target_class @@ -519,25 +525,47 @@ def test_to_dataframe_bqstorage(table_read_options_kwarg): ] }, } + stream_id = "projects/1/locations/2/sessions/3/streams/4" + name_array = pyarrow.array( + ["John", "Paul", "George", "Ringo"], type=pyarrow.string() + ) + age_array = pyarrow.array([17, 24, 21, 15], type=pyarrow.int64()) + arrow_schema = pyarrow.schema( + [ + pyarrow.field("name", pyarrow.string(), True), + pyarrow.field("age", pyarrow.int64(), True), + ] + ) + record_batch = pyarrow.RecordBatch.from_arrays( + [name_array, age_array], schema=arrow_schema + ) connection = make_connection(query_resource) client = _make_client(connection=connection) job = target_class.from_api_repr(resource, client) - bqstorage_client = mock.create_autospec(bigquery_storage.BigQueryReadClient) session = bigquery_storage.types.ReadSession() - session.avro_schema.schema = json.dumps( - { - "type": "record", - "name": "__root__", - "fields": [ - {"name": "name", "type": ["null", "string"]}, - {"name": "age", "type": ["null", "long"]}, - ], - } + session.arrow_schema.serialized_schema = arrow_schema.serialize().to_pybytes() + session.streams = [bigquery_storage.types.ReadStream(name=stream_id)] + reader = mock.create_autospec( + google.cloud.bigquery_storage_v1.reader.ReadRowsStream, instance=True + ) + row_iterable = mock.create_autospec( + google.cloud.bigquery_storage_v1.reader.ReadRowsIterable, instance=True + ) + page = mock.create_autospec( + google.cloud.bigquery_storage_v1.reader.ReadRowsPage, instance=True + ) + page.to_arrow.return_value = record_batch + type(row_iterable).pages = mock.PropertyMock(return_value=[page]) + reader.rows.return_value = row_iterable + bqstorage_client = mock.create_autospec( + bigquery_storage.BigQueryReadClient, instance=True ) bqstorage_client.create_read_session.return_value = session + bqstorage_client.read_rows.return_value = reader - job.to_dataframe(bqstorage_client=bqstorage_client) + dataframe = job.to_dataframe(bqstorage_client=bqstorage_client) + assert len(dataframe) == 4 destination_table = ( "projects/{projectId}/datasets/{datasetId}/tables/{tableId}".format( **resource["configuration"]["query"]["destinationTable"] @@ -553,12 +581,9 @@ def test_to_dataframe_bqstorage(table_read_options_kwarg): read_session=expected_session, max_stream_count=0, # Use default number of streams for best performance. ) + bqstorage_client.read_rows.assert_called_once_with(stream_id) -@pytest.mark.skipif(pandas is None, reason="Requires `pandas`") -@pytest.mark.skipif( - bigquery_storage is None, reason="Requires `google-cloud-bigquery-storage`" -) def test_to_dataframe_bqstorage_no_pyarrow_compression(): from google.cloud.bigquery.job import QueryJob as target_class @@ -604,7 +629,6 @@ def test_to_dataframe_bqstorage_no_pyarrow_compression(): ) -@pytest.mark.skipif(pandas is None, reason="Requires `pandas`") def test_to_dataframe_column_dtypes(): from google.cloud.bigquery.job import QueryJob as target_class @@ -656,16 +680,14 @@ def test_to_dataframe_column_dtypes(): assert list(df) == exp_columns # verify the column names assert df.start_timestamp.dtype.name == "datetime64[ns, UTC]" - assert df.seconds.dtype.name == "int64" + assert df.seconds.dtype.name == "Int64" assert df.miles.dtype.name == "float64" assert df.km.dtype.name == "float16" assert df.payment_type.dtype.name == "object" - assert df.complete.dtype.name == "bool" - assert df.date.dtype.name == "object" + assert df.complete.dtype.name == "boolean" + assert df.date.dtype.name == "dbdate" -@pytest.mark.skipif(pyarrow is None, reason="Requires `pyarrow`") -@pytest.mark.skipif(pandas is None, reason="Requires `pandas`") def test_to_dataframe_column_date_dtypes(): from google.cloud.bigquery.job import QueryJob as target_class @@ -688,16 +710,15 @@ def test_to_dataframe_column_date_dtypes(): ) client = _make_client(connection=connection) job = target_class.from_api_repr(begun_resource, client) - df = job.to_dataframe(date_as_object=False, create_bqstorage_client=False) + df = job.to_dataframe(create_bqstorage_client=False) assert isinstance(df, pandas.DataFrame) assert len(df) == 1 # verify the number of rows exp_columns = [field["name"] for field in query_resource["schema"]["fields"]] assert list(df) == exp_columns # verify the column names - assert df.date.dtype.name == "datetime64[ns]" + assert df.date.dtype.name == "dbdate" -@pytest.mark.skipif(pandas is None, reason="Requires `pandas`") @pytest.mark.skipif(tqdm is None, reason="Requires `tqdm`") @mock.patch("tqdm.tqdm") def test_to_dataframe_with_progress_bar(tqdm_mock): @@ -729,7 +750,6 @@ def test_to_dataframe_with_progress_bar(tqdm_mock): tqdm_mock.assert_called() -@pytest.mark.skipif(pandas is None, reason="Requires `pandas`") @pytest.mark.skipif(tqdm is None, reason="Requires `tqdm`") def test_to_dataframe_w_tqdm_pending(): from google.cloud.bigquery import table @@ -785,7 +805,6 @@ def test_to_dataframe_w_tqdm_pending(): ) -@pytest.mark.skipif(pandas is None, reason="Requires `pandas`") @pytest.mark.skipif(tqdm is None, reason="Requires `tqdm`") def test_to_dataframe_w_tqdm(): from google.cloud.bigquery import table @@ -845,7 +864,6 @@ def test_to_dataframe_w_tqdm(): ) -@pytest.mark.skipif(pandas is None, reason="Requires `pandas`") @pytest.mark.skipif(tqdm is None, reason="Requires `tqdm`") def test_to_dataframe_w_tqdm_max_results(): from google.cloud.bigquery import table @@ -957,7 +975,6 @@ def test_query_job_to_geodataframe_delegation(wait_for_query): dtypes = dict(xxx=numpy.dtype("int64")) progress_bar_type = "normal" create_bqstorage_client = False - date_as_object = False max_results = 42 geography_column = "g" @@ -966,7 +983,6 @@ def test_query_job_to_geodataframe_delegation(wait_for_query): dtypes=dtypes, progress_bar_type=progress_bar_type, create_bqstorage_client=create_bqstorage_client, - date_as_object=date_as_object, max_results=max_results, geography_column=geography_column, ) @@ -980,7 +996,6 @@ def test_query_job_to_geodataframe_delegation(wait_for_query): dtypes=dtypes, progress_bar_type=progress_bar_type, create_bqstorage_client=create_bqstorage_client, - date_as_object=date_as_object, geography_column=geography_column, ) assert df is row_iterator.to_geodataframe.return_value diff --git a/packages/google-cloud-bigquery/tests/unit/model/test_model.py b/packages/google-cloud-bigquery/tests/unit/model/test_model.py index 4790b858ba12..1ae988414908 100644 --- a/packages/google-cloud-bigquery/tests/unit/model/test_model.py +++ b/packages/google-cloud-bigquery/tests/unit/model/test_model.py @@ -19,7 +19,6 @@ import pytest import google.cloud._helpers -from google.cloud.bigquery_v2 import types KMS_KEY_NAME = "projects/1/locations/us/keyRings/1/cryptoKeys/1" @@ -95,11 +94,12 @@ def test_from_api_repr(target_class): }, { "trainingOptions": {"initialLearnRate": 0.25}, - # Allow milliseconds since epoch format. - # TODO: Remove this hack once CL 238585470 hits prod. - "startTime": str(google.cloud._helpers._millis(expiration_time)), + "startTime": str( + google.cloud._helpers._datetime_to_rfc3339(expiration_time) + ), }, ], + "bestTrialId": "123", "featureColumns": [], "encryptionConfiguration": {"kmsKeyName": KMS_KEY_NAME}, } @@ -117,28 +117,23 @@ def test_from_api_repr(target_class): assert got.expires == expiration_time assert got.description == "A friendly description." assert got.friendly_name == "A friendly name." - assert got.model_type == types.Model.ModelType.LOGISTIC_REGRESSION + assert got.model_type == "LOGISTIC_REGRESSION" assert got.labels == {"greeting": "こんにちは"} assert got.encryption_configuration.kms_key_name == KMS_KEY_NAME - assert got.training_runs[0].training_options.initial_learn_rate == 1.0 + assert got.best_trial_id == 123 + assert got.training_runs[0]["trainingOptions"]["initialLearnRate"] == 1.0 assert ( - got.training_runs[0] - .start_time.ToDatetime() - .replace(tzinfo=google.cloud._helpers.UTC) + google.cloud._helpers._rfc3339_to_datetime(got.training_runs[0]["startTime"]) == creation_time ) - assert got.training_runs[1].training_options.initial_learn_rate == 0.5 + assert got.training_runs[1]["trainingOptions"]["initialLearnRate"] == 0.5 assert ( - got.training_runs[1] - .start_time.ToDatetime() - .replace(tzinfo=google.cloud._helpers.UTC) + google.cloud._helpers._rfc3339_to_datetime(got.training_runs[1]["startTime"]) == modified_time ) - assert got.training_runs[2].training_options.initial_learn_rate == 0.25 + assert got.training_runs[2]["trainingOptions"]["initialLearnRate"] == 0.25 assert ( - got.training_runs[2] - .start_time.ToDatetime() - .replace(tzinfo=google.cloud._helpers.UTC) + google.cloud._helpers._rfc3339_to_datetime(got.training_runs[2]["startTime"]) == expiration_time ) @@ -155,19 +150,20 @@ def test_from_api_repr_w_minimal_resource(target_class): } got = target_class.from_api_repr(resource) assert got.reference == ModelReference.from_string("my-project.my_dataset.my_model") - assert got.location == "" - assert got.etag == "" + assert got.location is None + assert got.etag is None assert got.created is None assert got.modified is None assert got.expires is None assert got.description is None assert got.friendly_name is None - assert got.model_type == types.Model.ModelType.MODEL_TYPE_UNSPECIFIED + assert got.model_type == "MODEL_TYPE_UNSPECIFIED" assert got.labels == {} assert got.encryption_configuration is None assert len(got.training_runs) == 0 assert len(got.feature_columns) == 0 assert len(got.label_columns) == 0 + assert got.best_trial_id is None def test_from_api_repr_w_unknown_fields(target_class): @@ -183,7 +179,7 @@ def test_from_api_repr_w_unknown_fields(target_class): } got = target_class.from_api_repr(resource) assert got.reference == ModelReference.from_string("my-project.my_dataset.my_model") - assert got._properties is resource + assert got._properties == resource def test_from_api_repr_w_unknown_type(target_class): @@ -195,12 +191,19 @@ def test_from_api_repr_w_unknown_type(target_class): "datasetId": "my_dataset", "modelId": "my_model", }, - "modelType": "BE_A_GOOD_ROLE_MODEL", + "modelType": "BE_A_GOOD_ROLE_MODEL", # This model type does not exist. } got = target_class.from_api_repr(resource) assert got.reference == ModelReference.from_string("my-project.my_dataset.my_model") - assert got.model_type == 0 - assert got._properties is resource + assert got.model_type == "BE_A_GOOD_ROLE_MODEL" # No checks for invalid types. + assert got._properties == resource + + +def test_from_api_repr_w_missing_reference(target_class): + resource = {} + got = target_class.from_api_repr(resource) + assert got.reference is None + assert got._properties == resource @pytest.mark.parametrize( @@ -270,6 +273,46 @@ def test_build_resource(object_under_test, resource, filter_fields, expected): assert got == expected +def test_feature_columns(object_under_test): + from google.cloud.bigquery import standard_sql + + object_under_test._properties["featureColumns"] = [ + {"name": "col_1", "type": {"typeKind": "STRING"}}, + {"name": "col_2", "type": {"typeKind": "FLOAT64"}}, + ] + expected = [ + standard_sql.StandardSqlField( + "col_1", + standard_sql.StandardSqlDataType(standard_sql.StandardSqlTypeNames.STRING), + ), + standard_sql.StandardSqlField( + "col_2", + standard_sql.StandardSqlDataType(standard_sql.StandardSqlTypeNames.FLOAT64), + ), + ] + assert object_under_test.feature_columns == expected + + +def test_label_columns(object_under_test): + from google.cloud.bigquery import standard_sql + + object_under_test._properties["labelColumns"] = [ + {"name": "col_1", "type": {"typeKind": "STRING"}}, + {"name": "col_2", "type": {"typeKind": "FLOAT64"}}, + ] + expected = [ + standard_sql.StandardSqlField( + "col_1", + standard_sql.StandardSqlDataType(standard_sql.StandardSqlTypeNames.STRING), + ), + standard_sql.StandardSqlField( + "col_2", + standard_sql.StandardSqlDataType(standard_sql.StandardSqlTypeNames.FLOAT64), + ), + ] + assert object_under_test.label_columns == expected + + def test_set_description(object_under_test): assert not object_under_test.description object_under_test.description = "A model description." @@ -338,8 +381,6 @@ def test_repr(target_class): def test_to_api_repr(target_class): - from google.protobuf import json_format - model = target_class("my-proj.my_dset.my_model") resource = { "etag": "abcdefg", @@ -374,8 +415,6 @@ def test_to_api_repr(target_class): "kmsKeyName": "projects/1/locations/us/keyRings/1/cryptoKeys/1" }, } - model._proto = json_format.ParseDict( - resource, types.Model()._pb, ignore_unknown_fields=True - ) + model._properties = resource got = model.to_api_repr() assert got == resource diff --git a/packages/google-cloud-bigquery/tests/unit/routine/test_routine.py b/packages/google-cloud-bigquery/tests/unit/routine/test_routine.py index fdaf13324189..80a3def7320a 100644 --- a/packages/google-cloud-bigquery/tests/unit/routine/test_routine.py +++ b/packages/google-cloud-bigquery/tests/unit/routine/test_routine.py @@ -19,7 +19,6 @@ import google.cloud._helpers from google.cloud import bigquery -from google.cloud import bigquery_v2 @pytest.fixture @@ -62,15 +61,15 @@ def test_ctor_w_properties(target_class): arguments = [ RoutineArgument( name="x", - data_type=bigquery_v2.types.StandardSqlDataType( - type_kind=bigquery_v2.types.StandardSqlDataType.TypeKind.INT64 + data_type=bigquery.standard_sql.StandardSqlDataType( + type_kind=bigquery.StandardSqlTypeNames.INT64 ), ) ] body = "x * 3" language = "SQL" - return_type = bigquery_v2.types.StandardSqlDataType( - type_kind=bigquery_v2.types.StandardSqlDataType.TypeKind.INT64 + return_type = bigquery.standard_sql.StandardSqlDataType( + type_kind=bigquery.StandardSqlTypeNames.INT64 ) type_ = "SCALAR_FUNCTION" description = "A routine description." @@ -146,15 +145,15 @@ def test_from_api_repr(target_class): assert actual_routine.arguments == [ RoutineArgument( name="x", - data_type=bigquery_v2.types.StandardSqlDataType( - type_kind=bigquery_v2.types.StandardSqlDataType.TypeKind.INT64 + data_type=bigquery.standard_sql.StandardSqlDataType( + type_kind=bigquery.StandardSqlTypeNames.INT64 ), ) ] assert actual_routine.body == "42" assert actual_routine.language == "SQL" - assert actual_routine.return_type == bigquery_v2.types.StandardSqlDataType( - type_kind=bigquery_v2.types.StandardSqlDataType.TypeKind.INT64 + assert actual_routine.return_type == bigquery.standard_sql.StandardSqlDataType( + type_kind=bigquery.StandardSqlTypeNames.INT64 ) assert actual_routine.return_table_type is None assert actual_routine.type_ == "SCALAR_FUNCTION" @@ -168,9 +167,9 @@ def test_from_api_repr_tvf_function(target_class): from google.cloud.bigquery.routine import RoutineReference from google.cloud.bigquery.routine import RoutineType - StandardSqlDataType = bigquery_v2.types.StandardSqlDataType - StandardSqlField = bigquery_v2.types.StandardSqlField - StandardSqlTableType = bigquery_v2.types.StandardSqlTableType + StandardSqlDataType = bigquery.standard_sql.StandardSqlDataType + StandardSqlField = bigquery.standard_sql.StandardSqlField + StandardSqlTableType = bigquery.standard_sql.StandardSqlTableType creation_time = datetime.datetime( 2010, 5, 19, 16, 0, 0, tzinfo=google.cloud._helpers.UTC @@ -216,7 +215,9 @@ def test_from_api_repr_tvf_function(target_class): assert actual_routine.arguments == [ RoutineArgument( name="a", - data_type=StandardSqlDataType(type_kind=StandardSqlDataType.TypeKind.INT64), + data_type=StandardSqlDataType( + type_kind=bigquery.StandardSqlTypeNames.INT64 + ), ) ] assert actual_routine.body == "SELECT x FROM UNNEST([1,2,3]) x WHERE x > a" @@ -226,7 +227,7 @@ def test_from_api_repr_tvf_function(target_class): columns=[ StandardSqlField( name="int_col", - type=StandardSqlDataType(type_kind=StandardSqlDataType.TypeKind.INT64), + type=StandardSqlDataType(type_kind=bigquery.StandardSqlTypeNames.INT64), ) ] ) @@ -460,19 +461,21 @@ def test_set_return_table_type_w_none(object_under_test): def test_set_return_table_type_w_not_none(object_under_test): - StandardSqlDataType = bigquery_v2.types.StandardSqlDataType - StandardSqlField = bigquery_v2.types.StandardSqlField - StandardSqlTableType = bigquery_v2.types.StandardSqlTableType + StandardSqlDataType = bigquery.standard_sql.StandardSqlDataType + StandardSqlField = bigquery.standard_sql.StandardSqlField + StandardSqlTableType = bigquery.standard_sql.StandardSqlTableType table_type = StandardSqlTableType( columns=[ StandardSqlField( name="int_col", - type=StandardSqlDataType(type_kind=StandardSqlDataType.TypeKind.INT64), + type=StandardSqlDataType(type_kind=bigquery.StandardSqlTypeNames.INT64), ), StandardSqlField( name="str_col", - type=StandardSqlDataType(type_kind=StandardSqlDataType.TypeKind.STRING), + type=StandardSqlDataType( + type_kind=bigquery.StandardSqlTypeNames.STRING + ), ), ] ) diff --git a/packages/google-cloud-bigquery/tests/unit/routine/test_routine_argument.py b/packages/google-cloud-bigquery/tests/unit/routine/test_routine_argument.py index e3bda95391fa..b7f168a301da 100644 --- a/packages/google-cloud-bigquery/tests/unit/routine/test_routine_argument.py +++ b/packages/google-cloud-bigquery/tests/unit/routine/test_routine_argument.py @@ -16,7 +16,7 @@ import pytest -from google.cloud import bigquery_v2 +from google.cloud import bigquery @pytest.fixture @@ -27,8 +27,8 @@ def target_class(): def test_ctor(target_class): - data_type = bigquery_v2.types.StandardSqlDataType( - type_kind=bigquery_v2.types.StandardSqlDataType.TypeKind.INT64 + data_type = bigquery.standard_sql.StandardSqlDataType( + type_kind=bigquery.StandardSqlTypeNames.INT64 ) actual_arg = target_class( name="field_name", kind="FIXED_TYPE", mode="IN", data_type=data_type @@ -50,8 +50,8 @@ def test_from_api_repr(target_class): assert actual_arg.name == "field_name" assert actual_arg.kind == "FIXED_TYPE" assert actual_arg.mode == "IN" - assert actual_arg.data_type == bigquery_v2.types.StandardSqlDataType( - type_kind=bigquery_v2.types.StandardSqlDataType.TypeKind.INT64 + assert actual_arg.data_type == bigquery.standard_sql.StandardSqlDataType( + type_kind=bigquery.StandardSqlTypeNames.INT64 ) @@ -71,8 +71,8 @@ def test_from_api_repr_w_unknown_fields(target_class): def test_eq(target_class): - data_type = bigquery_v2.types.StandardSqlDataType( - type_kind=bigquery_v2.types.StandardSqlDataType.TypeKind.INT64 + data_type = bigquery.standard_sql.StandardSqlDataType( + type_kind=bigquery.StandardSqlTypeNames.INT64 ) arg = target_class( name="field_name", kind="FIXED_TYPE", mode="IN", data_type=data_type diff --git a/packages/google-cloud-bigquery/tests/unit/test__helpers.py b/packages/google-cloud-bigquery/tests/unit/test__helpers.py index 0dd1c273674d..885e773d3ba3 100644 --- a/packages/google-cloud-bigquery/tests/unit/test__helpers.py +++ b/packages/google-cloud-bigquery/tests/unit/test__helpers.py @@ -19,18 +19,7 @@ import mock -try: - from google.cloud import bigquery_storage -except ImportError: # pragma: NO COVER - bigquery_storage = None -try: - import pyarrow -except ImportError: # pragma: NO COVER - pyarrow = None - - -@unittest.skipIf(bigquery_storage is None, "Requires `google-cloud-bigquery-storage`") class TestBQStorageVersions(unittest.TestCase): def tearDown(self): from google.cloud.bigquery import _helpers @@ -43,37 +32,6 @@ def _object_under_test(self): return _helpers.BQStorageVersions() - def _call_fut(self): - from google.cloud.bigquery import _helpers - - _helpers.BQ_STORAGE_VERSIONS._installed_version = None - return _helpers.BQ_STORAGE_VERSIONS.verify_version() - - def test_raises_no_error_w_recent_bqstorage(self): - from google.cloud.bigquery.exceptions import LegacyBigQueryStorageError - - with mock.patch("google.cloud.bigquery_storage.__version__", new="2.0.0"): - try: - self._call_fut() - except LegacyBigQueryStorageError: # pragma: NO COVER - self.fail("Legacy error raised with a non-legacy dependency version.") - - def test_raises_error_w_legacy_bqstorage(self): - from google.cloud.bigquery.exceptions import LegacyBigQueryStorageError - - with mock.patch("google.cloud.bigquery_storage.__version__", new="1.9.9"): - with self.assertRaises(LegacyBigQueryStorageError): - self._call_fut() - - def test_raises_error_w_unknown_bqstorage_version(self): - from google.cloud.bigquery.exceptions import LegacyBigQueryStorageError - - with mock.patch("google.cloud.bigquery_storage", autospec=True) as fake_module: - del fake_module.__version__ - error_pattern = r"version found: 0.0.0" - with self.assertRaisesRegex(LegacyBigQueryStorageError, error_pattern): - self._call_fut() - def test_installed_version_returns_cached(self): versions = self._object_under_test() versions._installed_version = object() @@ -100,7 +58,6 @@ def test_is_read_session_optional_false(self): assert not versions.is_read_session_optional -@unittest.skipIf(pyarrow is None, "Requires `pyarrow`") class TestPyarrowVersions(unittest.TestCase): def tearDown(self): from google.cloud.bigquery import _helpers @@ -113,34 +70,6 @@ def _object_under_test(self): return _helpers.PyarrowVersions() - def _call_try_import(self, **kwargs): - from google.cloud.bigquery import _helpers - - _helpers.PYARROW_VERSIONS._installed_version = None - return _helpers.PYARROW_VERSIONS.try_import(**kwargs) - - def test_try_import_raises_no_error_w_recent_pyarrow(self): - from google.cloud.bigquery.exceptions import LegacyPyarrowError - - with mock.patch("pyarrow.__version__", new="5.0.0"): - try: - pyarrow = self._call_try_import(raise_if_error=True) - self.assertIsNotNone(pyarrow) - except LegacyPyarrowError: # pragma: NO COVER - self.fail("Legacy error raised with a non-legacy dependency version.") - - def test_try_import_returns_none_w_legacy_pyarrow(self): - with mock.patch("pyarrow.__version__", new="2.0.0"): - pyarrow = self._call_try_import() - self.assertIsNone(pyarrow) - - def test_try_import_raises_error_w_legacy_pyarrow(self): - from google.cloud.bigquery.exceptions import LegacyPyarrowError - - with mock.patch("pyarrow.__version__", new="2.0.0"): - with self.assertRaises(LegacyPyarrowError): - self._call_try_import(raise_if_error=True) - def test_installed_version_returns_cached(self): versions = self._object_under_test() versions._installed_version = object() diff --git a/packages/google-cloud-bigquery/tests/unit/test__job_helpers.py b/packages/google-cloud-bigquery/tests/unit/test__job_helpers.py new file mode 100644 index 000000000000..012352f4eced --- /dev/null +++ b/packages/google-cloud-bigquery/tests/unit/test__job_helpers.py @@ -0,0 +1,337 @@ +# Copyright 2021 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from typing import Any, Dict, Optional +from unittest import mock + +from google.api_core import retry as retries +import pytest + +from google.cloud.bigquery.client import Client +from google.cloud.bigquery import _job_helpers +from google.cloud.bigquery.job.query import QueryJob, QueryJobConfig +from google.cloud.bigquery.query import ConnectionProperty, ScalarQueryParameter + + +def make_query_request(additional_properties: Optional[Dict[str, Any]] = None): + request = {"useLegacySql": False, "formatOptions": {"useInt64Timestamp": True}} + if additional_properties is not None: + request.update(additional_properties) + return request + + +def make_query_response( + completed: bool = False, + job_id: str = "abcd-efg-hijk-lmnop", + location="US", + project_id="test-project", + errors=None, +) -> Dict[str, Any]: + response = { + "jobReference": { + "projectId": project_id, + "jobId": job_id, + "location": location, + }, + "jobComplete": completed, + } + if errors is not None: + response["errors"] = errors + return response + + +@pytest.mark.parametrize( + ("job_config", "expected"), + ( + (None, make_query_request()), + (QueryJobConfig(), make_query_request()), + ( + QueryJobConfig(default_dataset="my-project.my_dataset"), + make_query_request( + { + "defaultDataset": { + "projectId": "my-project", + "datasetId": "my_dataset", + } + } + ), + ), + (QueryJobConfig(dry_run=True), make_query_request({"dryRun": True})), + ( + QueryJobConfig(use_query_cache=False), + make_query_request({"useQueryCache": False}), + ), + ( + QueryJobConfig(use_legacy_sql=True), + make_query_request({"useLegacySql": True}), + ), + ( + QueryJobConfig( + query_parameters=[ + ScalarQueryParameter("named_param1", "STRING", "param-value"), + ScalarQueryParameter("named_param2", "INT64", 123), + ] + ), + make_query_request( + { + "parameterMode": "NAMED", + "queryParameters": [ + { + "name": "named_param1", + "parameterType": {"type": "STRING"}, + "parameterValue": {"value": "param-value"}, + }, + { + "name": "named_param2", + "parameterType": {"type": "INT64"}, + "parameterValue": {"value": "123"}, + }, + ], + } + ), + ), + ( + QueryJobConfig( + query_parameters=[ + ScalarQueryParameter(None, "STRING", "param-value"), + ScalarQueryParameter(None, "INT64", 123), + ] + ), + make_query_request( + { + "parameterMode": "POSITIONAL", + "queryParameters": [ + { + "parameterType": {"type": "STRING"}, + "parameterValue": {"value": "param-value"}, + }, + { + "parameterType": {"type": "INT64"}, + "parameterValue": {"value": "123"}, + }, + ], + } + ), + ), + ( + QueryJobConfig( + connection_properties=[ + ConnectionProperty(key="time_zone", value="America/Chicago"), + ConnectionProperty(key="session_id", value="abcd-efgh-ijkl-mnop"), + ] + ), + make_query_request( + { + "connectionProperties": [ + {"key": "time_zone", "value": "America/Chicago"}, + {"key": "session_id", "value": "abcd-efgh-ijkl-mnop"}, + ] + } + ), + ), + ( + QueryJobConfig(labels={"abc": "def"}), + make_query_request({"labels": {"abc": "def"}}), + ), + ( + QueryJobConfig(maximum_bytes_billed=987654), + make_query_request({"maximumBytesBilled": "987654"}), + ), + ), +) +def test__to_query_request(job_config, expected): + result = _job_helpers._to_query_request(job_config) + assert result == expected + + +def test__to_query_job_defaults(): + mock_client = mock.create_autospec(Client) + response = make_query_response( + job_id="test-job", project_id="some-project", location="asia-northeast1" + ) + job: QueryJob = _job_helpers._to_query_job(mock_client, "query-str", None, response) + assert job.query == "query-str" + assert job._client is mock_client + assert job.job_id == "test-job" + assert job.project == "some-project" + assert job.location == "asia-northeast1" + assert job.error_result is None + assert job.errors is None + + +def test__to_query_job_dry_run(): + mock_client = mock.create_autospec(Client) + response = make_query_response( + job_id="test-job", project_id="some-project", location="asia-northeast1" + ) + job_config: QueryJobConfig = QueryJobConfig() + job_config.dry_run = True + job: QueryJob = _job_helpers._to_query_job( + mock_client, "query-str", job_config, response + ) + assert job.dry_run is True + + +@pytest.mark.parametrize( + ("completed", "expected_state"), + ( + (True, "DONE"), + (False, "PENDING"), + ), +) +def test__to_query_job_sets_state(completed, expected_state): + mock_client = mock.create_autospec(Client) + response = make_query_response(completed=completed) + job: QueryJob = _job_helpers._to_query_job(mock_client, "query-str", None, response) + assert job.state == expected_state + + +def test__to_query_job_sets_errors(): + mock_client = mock.create_autospec(Client) + response = make_query_response( + errors=[ + # https://cloud.google.com/bigquery/docs/reference/rest/v2/ErrorProto + {"reason": "backendError", "message": "something went wrong"}, + {"message": "something else went wrong"}, + ] + ) + job: QueryJob = _job_helpers._to_query_job(mock_client, "query-str", None, response) + assert len(job.errors) == 2 + # If we got back a response instead of an HTTP error status code, most + # likely the job didn't completely fail. + assert job.error_result is None + + +def test_query_jobs_query_defaults(): + mock_client = mock.create_autospec(Client) + mock_retry = mock.create_autospec(retries.Retry) + mock_job_retry = mock.create_autospec(retries.Retry) + mock_client._call_api.return_value = { + "jobReference": { + "projectId": "test-project", + "jobId": "abc", + "location": "asia-northeast1", + } + } + _job_helpers.query_jobs_query( + mock_client, + "SELECT * FROM test", + None, + "asia-northeast1", + "test-project", + mock_retry, + None, + mock_job_retry, + ) + + assert mock_client._call_api.call_count == 1 + call_args, call_kwargs = mock_client._call_api.call_args + assert call_args[0] is mock_retry + # See: https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs/query + assert call_kwargs["path"] == "/projects/test-project/queries" + assert call_kwargs["method"] == "POST" + # See: https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs/query#QueryRequest + request = call_kwargs["data"] + assert request["requestId"] is not None + assert request["query"] == "SELECT * FROM test" + assert request["location"] == "asia-northeast1" + assert request["formatOptions"]["useInt64Timestamp"] is True + assert "timeoutMs" not in request + + +def test_query_jobs_query_sets_format_options(): + """Since jobs.query can return results, ensure we use the lossless + timestamp format. + + See: https://github.com/googleapis/python-bigquery/issues/395 + """ + mock_client = mock.create_autospec(Client) + mock_retry = mock.create_autospec(retries.Retry) + mock_job_retry = mock.create_autospec(retries.Retry) + mock_client._call_api.return_value = { + "jobReference": {"projectId": "test-project", "jobId": "abc", "location": "US"} + } + _job_helpers.query_jobs_query( + mock_client, + "SELECT * FROM test", + None, + "US", + "test-project", + mock_retry, + None, + mock_job_retry, + ) + + assert mock_client._call_api.call_count == 1 + _, call_kwargs = mock_client._call_api.call_args + # See: https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs/query#QueryRequest + request = call_kwargs["data"] + assert request["formatOptions"]["useInt64Timestamp"] is True + + +@pytest.mark.parametrize( + ("timeout", "expected_timeout"), + ( + (-1, 0), + (0, 0), + (1, 1000 - _job_helpers._TIMEOUT_BUFFER_MILLIS), + ), +) +def test_query_jobs_query_sets_timeout(timeout, expected_timeout): + mock_client = mock.create_autospec(Client) + mock_retry = mock.create_autospec(retries.Retry) + mock_job_retry = mock.create_autospec(retries.Retry) + mock_client._call_api.return_value = { + "jobReference": {"projectId": "test-project", "jobId": "abc", "location": "US"} + } + _job_helpers.query_jobs_query( + mock_client, + "SELECT * FROM test", + None, + "US", + "test-project", + mock_retry, + timeout, + mock_job_retry, + ) + + assert mock_client._call_api.call_count == 1 + _, call_kwargs = mock_client._call_api.call_args + # See: https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs/query#QueryRequest + request = call_kwargs["data"] + assert request["timeoutMs"] == expected_timeout + + +def test_make_job_id_wo_suffix(): + job_id = _job_helpers.make_job_id("job_id") + assert job_id == "job_id" + + +def test_make_job_id_w_suffix(): + with mock.patch("uuid.uuid4", side_effect=["212345"]): + job_id = _job_helpers.make_job_id(None, prefix="job_id") + + assert job_id == "job_id212345" + + +def test_make_job_id_random(): + with mock.patch("uuid.uuid4", side_effect=["212345"]): + job_id = _job_helpers.make_job_id(None) + + assert job_id == "212345" + + +def test_make_job_id_w_job_id_overrides_prefix(): + job_id = _job_helpers.make_job_id("job_id", prefix="unused_prefix") + assert job_id == "job_id" diff --git a/packages/google-cloud-bigquery/tests/unit/test__pandas_helpers.py b/packages/google-cloud-bigquery/tests/unit/test__pandas_helpers.py index c849461fd010..5b2fadaf120c 100644 --- a/packages/google-cloud-bigquery/tests/unit/test__pandas_helpers.py +++ b/packages/google-cloud-bigquery/tests/unit/test__pandas_helpers.py @@ -29,6 +29,10 @@ import pandas.testing except ImportError: # pragma: NO COVER pandas = None + +import pyarrow +import pyarrow.types + try: import geopandas except ImportError: # pragma: NO COVER @@ -37,26 +41,11 @@ import pytest from google import api_core -from google.cloud.bigquery import exceptions +from google.cloud import bigquery_storage from google.cloud.bigquery import _helpers from google.cloud.bigquery import schema -pyarrow = _helpers.PYARROW_VERSIONS.try_import() -if pyarrow: - import pyarrow.types -else: # pragma: NO COVER - # Mock out pyarrow when missing, because methods from pyarrow.types are - # used in test parameterization. - pyarrow = mock.Mock() - -try: - from google.cloud import bigquery_storage - - _helpers.BQ_STORAGE_VERSIONS.verify_version() -except ImportError: # pragma: NO COVER - bigquery_storage = None - PANDAS_MINIUM_VERSION = pkg_resources.parse_version("1.0.0") if pandas is not None: @@ -121,7 +110,6 @@ def all_(*functions): return functools.partial(do_all, functions) -@pytest.mark.skipif(isinstance(pyarrow, mock.Mock), reason="Requires `pyarrow`") def test_is_datetime(): assert is_datetime(pyarrow.timestamp("us", tz=None)) assert not is_datetime(pyarrow.timestamp("ms", tz=None)) @@ -292,7 +280,6 @@ def test_all_(): ("UNKNOWN_TYPE", "REPEATED", is_none), ], ) -@pytest.mark.skipif(isinstance(pyarrow, mock.Mock), reason="Requires `pyarrow`") def test_bq_to_arrow_data_type(module_under_test, bq_type, bq_mode, is_correct_type): field = schema.SchemaField("ignored_name", bq_type, mode=bq_mode) actual = module_under_test.bq_to_arrow_data_type(field) @@ -300,7 +287,6 @@ def test_bq_to_arrow_data_type(module_under_test, bq_type, bq_mode, is_correct_t @pytest.mark.parametrize("bq_type", ["RECORD", "record", "STRUCT", "struct"]) -@pytest.mark.skipif(isinstance(pyarrow, mock.Mock), reason="Requires `pyarrow`") def test_bq_to_arrow_data_type_w_struct(module_under_test, bq_type): fields = ( schema.SchemaField("field01", "STRING"), @@ -348,7 +334,6 @@ def test_bq_to_arrow_data_type_w_struct(module_under_test, bq_type): @pytest.mark.parametrize("bq_type", ["RECORD", "record", "STRUCT", "struct"]) -@pytest.mark.skipif(isinstance(pyarrow, mock.Mock), reason="Requires `pyarrow`") def test_bq_to_arrow_data_type_w_array_struct(module_under_test, bq_type): fields = ( schema.SchemaField("field01", "STRING"), @@ -396,7 +381,6 @@ def test_bq_to_arrow_data_type_w_array_struct(module_under_test, bq_type): assert actual.value_type.equals(expected_value_type) -@pytest.mark.skipif(isinstance(pyarrow, mock.Mock), reason="Requires `pyarrow`") def test_bq_to_arrow_data_type_w_struct_unknown_subfield(module_under_test): fields = ( schema.SchemaField("field1", "STRING"), @@ -495,7 +479,6 @@ def test_bq_to_arrow_data_type_w_struct_unknown_subfield(module_under_test): ], ) @pytest.mark.skipif(pandas is None, reason="Requires `pandas`") -@pytest.mark.skipif(isinstance(pyarrow, mock.Mock), reason="Requires `pyarrow`") def test_bq_to_arrow_array_w_nullable_scalars(module_under_test, bq_type, rows): series = pandas.Series(rows, dtype="object") bq_field = schema.SchemaField("field_name", bq_type) @@ -530,7 +513,6 @@ def test_bq_to_arrow_array_w_nullable_scalars(module_under_test, bq_type, rows): ], ) @pytest.mark.skipif(pandas is None, reason="Requires `pandas`") -@pytest.mark.skipif(isinstance(pyarrow, mock.Mock), reason="Requires `pyarrow`") def test_bq_to_arrow_array_w_pandas_timestamp(module_under_test, bq_type, rows): rows = [pandas.Timestamp(row) for row in rows] series = pandas.Series(rows) @@ -541,7 +523,6 @@ def test_bq_to_arrow_array_w_pandas_timestamp(module_under_test, bq_type, rows): @pytest.mark.skipif(pandas is None, reason="Requires `pandas`") -@pytest.mark.skipif(isinstance(pyarrow, mock.Mock), reason="Requires `pyarrow`") def test_bq_to_arrow_array_w_arrays(module_under_test): rows = [[1, 2, 3], [], [4, 5, 6]] series = pandas.Series(rows, dtype="object") @@ -553,7 +534,6 @@ def test_bq_to_arrow_array_w_arrays(module_under_test): @pytest.mark.parametrize("bq_type", ["RECORD", "record", "STRUCT", "struct"]) @pytest.mark.skipif(pandas is None, reason="Requires `pandas`") -@pytest.mark.skipif(isinstance(pyarrow, mock.Mock), reason="Requires `pyarrow`") def test_bq_to_arrow_array_w_structs(module_under_test, bq_type): rows = [ {"int_col": 123, "string_col": "abc"}, @@ -575,7 +555,6 @@ def test_bq_to_arrow_array_w_structs(module_under_test, bq_type): @pytest.mark.skipif(pandas is None, reason="Requires `pandas`") -@pytest.mark.skipif(isinstance(pyarrow, mock.Mock), reason="Requires `pyarrow`") def test_bq_to_arrow_array_w_special_floats(module_under_test): bq_field = schema.SchemaField("field_name", "FLOAT64") rows = [float("-inf"), float("nan"), float("inf"), None] @@ -593,7 +572,6 @@ def test_bq_to_arrow_array_w_special_floats(module_under_test): @pytest.mark.skipif(geopandas is None, reason="Requires `geopandas`") -@pytest.mark.skipif(isinstance(pyarrow, mock.Mock), reason="Requires `pyarrow`") def test_bq_to_arrow_array_w_geography_dtype(module_under_test): from shapely import wkb, wkt @@ -613,7 +591,6 @@ def test_bq_to_arrow_array_w_geography_dtype(module_under_test): @pytest.mark.skipif(geopandas is None, reason="Requires `geopandas`") -@pytest.mark.skipif(isinstance(pyarrow, mock.Mock), reason="Requires `pyarrow`") def test_bq_to_arrow_array_w_geography_type_shapely_data(module_under_test): from shapely import wkb, wkt @@ -633,7 +610,6 @@ def test_bq_to_arrow_array_w_geography_type_shapely_data(module_under_test): @pytest.mark.skipif(geopandas is None, reason="Requires `geopandas`") -@pytest.mark.skipif(isinstance(pyarrow, mock.Mock), reason="Requires `pyarrow`") def test_bq_to_arrow_array_w_geography_type_wkb_data(module_under_test): from shapely import wkb, wkt @@ -646,7 +622,6 @@ def test_bq_to_arrow_array_w_geography_type_wkb_data(module_under_test): assert array.to_pylist() == list(series) -@pytest.mark.skipif(isinstance(pyarrow, mock.Mock), reason="Requires `pyarrow`") def test_bq_to_arrow_schema_w_unknown_type(module_under_test): fields = ( schema.SchemaField("field1", "STRING"), @@ -943,7 +918,6 @@ def test_dataframe_to_bq_schema_dict_sequence(module_under_test): @pytest.mark.skipif(pandas is None, reason="Requires `pandas`") -@pytest.mark.skipif(isinstance(pyarrow, mock.Mock), reason="Requires `pyarrow`") def test_dataframe_to_arrow_with_multiindex(module_under_test): bq_schema = ( schema.SchemaField("str_index", "STRING"), @@ -1010,7 +984,6 @@ def test_dataframe_to_arrow_with_multiindex(module_under_test): @pytest.mark.skipif(pandas is None, reason="Requires `pandas`") -@pytest.mark.skipif(isinstance(pyarrow, mock.Mock), reason="Requires `pyarrow`") def test_dataframe_to_arrow_with_required_fields(module_under_test): bq_schema = ( schema.SchemaField("field01", "STRING", mode="REQUIRED"), @@ -1067,7 +1040,6 @@ def test_dataframe_to_arrow_with_required_fields(module_under_test): @pytest.mark.skipif(pandas is None, reason="Requires `pandas`") -@pytest.mark.skipif(isinstance(pyarrow, mock.Mock), reason="Requires `pyarrow`") def test_dataframe_to_arrow_with_unknown_type(module_under_test): bq_schema = ( schema.SchemaField("field00", "UNKNOWN_TYPE"), @@ -1100,7 +1072,6 @@ def test_dataframe_to_arrow_with_unknown_type(module_under_test): @pytest.mark.skipif(pandas is None, reason="Requires `pandas`") -@pytest.mark.skipif(isinstance(pyarrow, mock.Mock), reason="Requires `pyarrow`") def test_dataframe_to_arrow_dict_sequence_schema(module_under_test): dict_schema = [ {"name": "field01", "type": "STRING", "mode": "REQUIRED"}, @@ -1122,19 +1093,6 @@ def test_dataframe_to_arrow_dict_sequence_schema(module_under_test): @pytest.mark.skipif(pandas is None, reason="Requires `pandas`") -def test_dataframe_to_parquet_without_pyarrow(module_under_test, monkeypatch): - mock_pyarrow_import = mock.Mock() - mock_pyarrow_import.side_effect = exceptions.LegacyPyarrowError( - "pyarrow not installed" - ) - monkeypatch.setattr(_helpers.PYARROW_VERSIONS, "try_import", mock_pyarrow_import) - - with pytest.raises(exceptions.LegacyPyarrowError): - module_under_test.dataframe_to_parquet(pandas.DataFrame(), (), None) - - -@pytest.mark.skipif(pandas is None, reason="Requires `pandas`") -@pytest.mark.skipif(isinstance(pyarrow, mock.Mock), reason="Requires `pyarrow`") def test_dataframe_to_parquet_w_extra_fields(module_under_test): with pytest.raises(ValueError) as exc_context: module_under_test.dataframe_to_parquet( @@ -1146,8 +1104,7 @@ def test_dataframe_to_parquet_w_extra_fields(module_under_test): @pytest.mark.skipif(pandas is None, reason="Requires `pandas`") -@pytest.mark.skipif(isinstance(pyarrow, mock.Mock), reason="Requires `pyarrow`") -def test_dataframe_to_parquet_w_missing_fields(module_under_test, monkeypatch): +def test_dataframe_to_parquet_w_missing_fields(module_under_test): with pytest.raises(ValueError) as exc_context: module_under_test.dataframe_to_parquet( pandas.DataFrame({"not_in_bq": [1, 2, 3]}), (), None @@ -1158,7 +1115,6 @@ def test_dataframe_to_parquet_w_missing_fields(module_under_test, monkeypatch): @pytest.mark.skipif(pandas is None, reason="Requires `pandas`") -@pytest.mark.skipif(isinstance(pyarrow, mock.Mock), reason="Requires `pyarrow`") def test_dataframe_to_parquet_compression_method(module_under_test): bq_schema = (schema.SchemaField("field00", "STRING"),) dataframe = pandas.DataFrame({"field00": ["foo", "bar"]}) @@ -1178,34 +1134,6 @@ def test_dataframe_to_parquet_compression_method(module_under_test): @pytest.mark.skipif(pandas is None, reason="Requires `pandas`") -def test_dataframe_to_bq_schema_fallback_needed_wo_pyarrow(module_under_test): - dataframe = pandas.DataFrame( - data=[ - {"id": 10, "status": "FOO", "execution_date": datetime.date(2019, 5, 10)}, - {"id": 20, "status": "BAR", "created_at": datetime.date(2018, 9, 12)}, - ] - ) - - no_pyarrow_patch = mock.patch(module_under_test.__name__ + ".pyarrow", None) - - with no_pyarrow_patch, warnings.catch_warnings(record=True) as warned: - detected_schema = module_under_test.dataframe_to_bq_schema( - dataframe, bq_schema=[] - ) - - assert detected_schema is None - - # a warning should also be issued - expected_warnings = [ - warning for warning in warned if "could not determine" in str(warning).lower() - ] - assert len(expected_warnings) == 1 - msg = str(expected_warnings[0]) - assert "execution_date" in msg and "created_at" in msg - - -@pytest.mark.skipif(pandas is None, reason="Requires `pandas`") -@pytest.mark.skipif(isinstance(pyarrow, mock.Mock), reason="Requires `pyarrow`") def test_dataframe_to_bq_schema_fallback_needed_w_pyarrow(module_under_test): dataframe = pandas.DataFrame( data=[ @@ -1235,7 +1163,6 @@ def test_dataframe_to_bq_schema_fallback_needed_w_pyarrow(module_under_test): @pytest.mark.skipif(pandas is None, reason="Requires `pandas`") -@pytest.mark.skipif(isinstance(pyarrow, mock.Mock), reason="Requires `pyarrow`") def test_dataframe_to_bq_schema_pyarrow_fallback_fails(module_under_test): dataframe = pandas.DataFrame( data=[ @@ -1282,7 +1209,46 @@ def test_dataframe_to_bq_schema_geography(module_under_test): @pytest.mark.skipif(pandas is None, reason="Requires `pandas`") -@pytest.mark.skipif(isinstance(pyarrow, mock.Mock), reason="Requires `pyarrow`") +def test__first_array_valid_no_valid_items(module_under_test): + series = pandas.Series([None, pandas.NA, float("NaN")]) + result = module_under_test._first_array_valid(series) + assert result is None + + +@pytest.mark.skipif(pandas is None, reason="Requires `pandas`") +def test__first_array_valid_valid_item_exists(module_under_test): + series = pandas.Series([None, [0], [1], None]) + result = module_under_test._first_array_valid(series) + assert result == 0 + + +@pytest.mark.skipif(pandas is None, reason="Requires `pandas`") +def test__first_array_valid_all_nan_items_in_first_valid_candidate(module_under_test): + import numpy + + series = pandas.Series( + [ + None, + [None, float("NaN"), pandas.NA, pandas.NaT, numpy.nan], + None, + [None, None], + [None, float("NaN"), pandas.NA, pandas.NaT, numpy.nan, 42, None], + [1, 2, 3], + None, + ] + ) + result = module_under_test._first_array_valid(series) + assert result == 42 + + +@pytest.mark.skipif(pandas is None, reason="Requires `pandas`") +def test__first_array_valid_no_arrays_with_valid_items(module_under_test): + series = pandas.Series([[None, None], [None, None]]) + result = module_under_test._first_array_valid(series) + assert result is None + + +@pytest.mark.skipif(pandas is None, reason="Requires `pandas`") def test_augment_schema_type_detection_succeeds(module_under_test): dataframe = pandas.DataFrame( data=[ @@ -1349,7 +1315,59 @@ def test_augment_schema_type_detection_succeeds(module_under_test): @pytest.mark.skipif(pandas is None, reason="Requires `pandas`") -@pytest.mark.skipif(isinstance(pyarrow, mock.Mock), reason="Requires `pyarrow`") +def test_augment_schema_repeated_fields(module_under_test): + dataframe = pandas.DataFrame( + data=[ + # Include some values useless for type detection to make sure the logic + # indeed finds the value that is suitable. + {"string_array": None, "timestamp_array": None, "datetime_array": None}, + { + "string_array": [None], + "timestamp_array": [None], + "datetime_array": [None], + }, + {"string_array": None, "timestamp_array": None, "datetime_array": None}, + { + "string_array": [None, "foo"], + "timestamp_array": [ + None, + datetime.datetime( + 2005, 5, 31, 14, 25, 55, tzinfo=datetime.timezone.utc + ), + ], + "datetime_array": [None, datetime.datetime(2005, 5, 31, 14, 25, 55)], + }, + {"string_array": None, "timestamp_array": None, "datetime_array": None}, + ] + ) + + current_schema = ( + schema.SchemaField("string_array", field_type=None, mode="NULLABLE"), + schema.SchemaField("timestamp_array", field_type=None, mode="NULLABLE"), + schema.SchemaField("datetime_array", field_type=None, mode="NULLABLE"), + ) + + with warnings.catch_warnings(record=True) as warned: + augmented_schema = module_under_test.augment_schema(dataframe, current_schema) + + # there should be no relevant warnings + unwanted_warnings = [ + warning for warning in warned if "Pyarrow could not" in str(warning) + ] + assert not unwanted_warnings + + # the augmented schema must match the expected + expected_schema = ( + schema.SchemaField("string_array", field_type="STRING", mode="REPEATED"), + schema.SchemaField("timestamp_array", field_type="TIMESTAMP", mode="REPEATED"), + schema.SchemaField("datetime_array", field_type="DATETIME", mode="REPEATED"), + ) + + by_name = operator.attrgetter("name") + assert sorted(augmented_schema, key=by_name) == sorted(expected_schema, key=by_name) + + +@pytest.mark.skipif(pandas is None, reason="Requires `pandas`") def test_augment_schema_type_detection_fails(module_under_test): dataframe = pandas.DataFrame( data=[ @@ -1385,8 +1403,33 @@ def test_augment_schema_type_detection_fails(module_under_test): assert "struct_field" in warning_msg and "struct_field_2" in warning_msg -@pytest.mark.skipif(isinstance(pyarrow, mock.Mock), reason="Requires `pyarrow`") +@pytest.mark.skipif(pandas is None, reason="Requires `pandas`") +def test_augment_schema_type_detection_fails_array_data(module_under_test): + dataframe = pandas.DataFrame( + data=[{"all_none_array": [None, float("NaN")], "empty_array": []}] + ) + current_schema = [ + schema.SchemaField("all_none_array", field_type=None, mode="NULLABLE"), + schema.SchemaField("empty_array", field_type=None, mode="NULLABLE"), + ] + + with warnings.catch_warnings(record=True) as warned: + augmented_schema = module_under_test.augment_schema(dataframe, current_schema) + + assert augmented_schema is None + + expected_warnings = [ + warning for warning in warned if "could not determine" in str(warning) + ] + assert len(expected_warnings) == 1 + warning_msg = str(expected_warnings[0]) + assert "pyarrow" in warning_msg.lower() + assert "all_none_array" in warning_msg and "empty_array" in warning_msg + + def test_dataframe_to_parquet_dict_sequence_schema(module_under_test): + pandas = pytest.importorskip("pandas") + dict_schema = [ {"name": "field01", "type": "STRING", "mode": "REQUIRED"}, {"name": "field02", "type": "BOOL", "mode": "NULLABLE"}, @@ -1414,9 +1457,6 @@ def test_dataframe_to_parquet_dict_sequence_schema(module_under_test): assert schema_arg == expected_schema_arg -@pytest.mark.skipif( - bigquery_storage is None, reason="Requires `google-cloud-bigquery-storage`" -) def test__download_table_bqstorage_stream_includes_read_session( monkeypatch, module_under_test ): @@ -1447,8 +1487,7 @@ def test__download_table_bqstorage_stream_includes_read_session( @pytest.mark.skipif( - bigquery_storage is None - or not _helpers.BQ_STORAGE_VERSIONS.is_read_session_optional, + not _helpers.BQ_STORAGE_VERSIONS.is_read_session_optional, reason="Requires `google-cloud-bigquery-storage` >= 2.6.0", ) def test__download_table_bqstorage_stream_omits_read_session( @@ -1488,9 +1527,6 @@ def test__download_table_bqstorage_stream_omits_read_session( (7, {"max_queue_size": None}, 7, 0), # infinite queue size ], ) -@pytest.mark.skipif( - bigquery_storage is None, reason="Requires `google-cloud-bigquery-storage`" -) def test__download_table_bqstorage( module_under_test, stream_count, @@ -1541,7 +1577,6 @@ def fake_download_stream( assert queue_used.maxsize == expected_maxsize -@pytest.mark.skipif(isinstance(pyarrow, mock.Mock), reason="Requires `pyarrow`") def test_download_arrow_row_iterator_unknown_field_type(module_under_test): fake_page = api_core.page_iterator.Page( parent=mock.Mock(), @@ -1577,7 +1612,6 @@ def test_download_arrow_row_iterator_unknown_field_type(module_under_test): assert col.to_pylist() == [2.2, 22.22, 222.222] -@pytest.mark.skipif(isinstance(pyarrow, mock.Mock), reason="Requires `pyarrow`") def test_download_arrow_row_iterator_known_field_type(module_under_test): fake_page = api_core.page_iterator.Page( parent=mock.Mock(), @@ -1612,7 +1646,6 @@ def test_download_arrow_row_iterator_known_field_type(module_under_test): assert col.to_pylist() == ["2.2", "22.22", "222.222"] -@pytest.mark.skipif(isinstance(pyarrow, mock.Mock), reason="Requires `pyarrow`") def test_download_arrow_row_iterator_dict_sequence_schema(module_under_test): fake_page = api_core.page_iterator.Page( parent=mock.Mock(), @@ -1640,7 +1673,6 @@ def test_download_arrow_row_iterator_dict_sequence_schema(module_under_test): @pytest.mark.skipif(pandas is None, reason="Requires `pandas`") -@pytest.mark.skipif(isinstance(pyarrow, mock.Mock), reason="Requires `pyarrow`") def test_download_dataframe_row_iterator_dict_sequence_schema(module_under_test): fake_page = api_core.page_iterator.Page( parent=mock.Mock(), @@ -1680,7 +1712,6 @@ def test_table_data_listpage_to_dataframe_skips_stop_iteration(module_under_test assert isinstance(dataframe, pandas.DataFrame) -@pytest.mark.skipif(isinstance(pyarrow, mock.Mock), reason="Requires `pyarrow`") def test_bq_to_arrow_field_type_override(module_under_test): # When loading pandas data, we may need to override the type # decision based on data contents, because GEOGRAPHY data can be @@ -1700,7 +1731,6 @@ def test_bq_to_arrow_field_type_override(module_under_test): ) -@pytest.mark.skipif(isinstance(pyarrow, mock.Mock), reason="Requires `pyarrow`") @pytest.mark.parametrize( "field_type, metadata", [ diff --git a/packages/google-cloud-bigquery/tests/unit/test_client.py b/packages/google-cloud-bigquery/tests/unit/test_client.py index 92ecb72deee6..30bab8fa9f65 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_client.py +++ b/packages/google-cloud-bigquery/tests/unit/test_client.py @@ -27,7 +27,6 @@ import warnings import mock -import packaging import requests import pytest import pkg_resources @@ -54,24 +53,15 @@ msg = "Error importing from opentelemetry, is the installed version compatible?" raise ImportError(msg) from exc -try: - import pyarrow -except (ImportError, AttributeError): # pragma: NO COVER - pyarrow = None - import google.api_core.exceptions from google.api_core import client_info import google.cloud._helpers -from google.cloud import bigquery_v2 +from google.cloud import bigquery +from google.cloud import bigquery_storage from google.cloud.bigquery.dataset import DatasetReference from google.cloud.bigquery.retry import DEFAULT_TIMEOUT from google.cloud.bigquery import ParquetOptions -try: - from google.cloud import bigquery_storage -except (ImportError, AttributeError): # pragma: NO COVER - bigquery_storage = None -from test_utils.imports import maybe_fail_import from tests.unit.helpers import make_connection PANDAS_MINIUM_VERSION = pkg_resources.parse_version("1.0.0") @@ -624,9 +614,6 @@ def test_get_dataset(self): self.assertEqual(dataset.dataset_id, self.DS_ID) - @unittest.skipIf( - bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" - ) def test_ensure_bqstorage_client_creating_new_instance(self): mock_client = mock.create_autospec(bigquery_storage.BigQueryReadClient) mock_client_instance = object() @@ -649,55 +636,6 @@ def test_ensure_bqstorage_client_creating_new_instance(self): client_info=mock.sentinel.client_info, ) - def test_ensure_bqstorage_client_missing_dependency(self): - creds = _make_credentials() - client = self._make_one(project=self.PROJECT, credentials=creds) - - def fail_bqstorage_import(name, globals, locals, fromlist, level): - # NOTE: *very* simplified, assuming a straightforward absolute import - return "bigquery_storage" in name or ( - fromlist is not None and "bigquery_storage" in fromlist - ) - - no_bqstorage = maybe_fail_import(predicate=fail_bqstorage_import) - - with no_bqstorage, warnings.catch_warnings(record=True) as warned: - bqstorage_client = client._ensure_bqstorage_client() - - self.assertIsNone(bqstorage_client) - matching_warnings = [ - warning - for warning in warned - if "not installed" in str(warning) - and "google-cloud-bigquery-storage" in str(warning) - ] - assert matching_warnings, "Missing dependency warning not raised." - - @unittest.skipIf( - bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" - ) - def test_ensure_bqstorage_client_obsolete_dependency(self): - from google.cloud.bigquery.exceptions import LegacyBigQueryStorageError - - creds = _make_credentials() - client = self._make_one(project=self.PROJECT, credentials=creds) - - patcher = mock.patch( - "google.cloud.bigquery.client.BQ_STORAGE_VERSIONS.verify_version", - side_effect=LegacyBigQueryStorageError("BQ Storage too old"), - ) - with patcher, warnings.catch_warnings(record=True) as warned: - bqstorage_client = client._ensure_bqstorage_client() - - self.assertIsNone(bqstorage_client) - matching_warnings = [ - warning for warning in warned if "BQ Storage too old" in str(warning) - ] - assert matching_warnings, "Obsolete dependency warning not raised." - - @unittest.skipIf( - bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" - ) def test_ensure_bqstorage_client_existing_client_check_passes(self): creds = _make_credentials() client = self._make_one(project=self.PROJECT, credentials=creds) @@ -709,29 +647,6 @@ def test_ensure_bqstorage_client_existing_client_check_passes(self): self.assertIs(bqstorage_client, mock_storage_client) - @unittest.skipIf( - bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" - ) - def test_ensure_bqstorage_client_existing_client_check_fails(self): - from google.cloud.bigquery.exceptions import LegacyBigQueryStorageError - - creds = _make_credentials() - client = self._make_one(project=self.PROJECT, credentials=creds) - mock_storage_client = mock.sentinel.mock_storage_client - - patcher = mock.patch( - "google.cloud.bigquery.client.BQ_STORAGE_VERSIONS.verify_version", - side_effect=LegacyBigQueryStorageError("BQ Storage too old"), - ) - with patcher, warnings.catch_warnings(record=True) as warned: - bqstorage_client = client._ensure_bqstorage_client(mock_storage_client) - - self.assertIsNone(bqstorage_client) - matching_warnings = [ - warning for warning in warned if "BQ Storage too old" in str(warning) - ] - assert matching_warnings, "Obsolete dependency warning not raised." - def test_create_routine_w_minimal_resource(self): from google.cloud.bigquery.routine import Routine from google.cloud.bigquery.routine import RoutineReference @@ -1940,7 +1855,7 @@ def test_update_model(self): self.assertEqual(updated_model.expires, model.expires) # ETag becomes If-Match header. - model._proto.etag = "etag" + model._properties["etag"] = "etag" client.update_model(model, []) req = conn.api_request.call_args self.assertEqual(req[1]["headers"]["If-Match"], "etag") @@ -1970,8 +1885,8 @@ def test_update_routine(self): routine.arguments = [ RoutineArgument( name="x", - data_type=bigquery_v2.types.StandardSqlDataType( - type_kind=bigquery_v2.types.StandardSqlDataType.TypeKind.INT64 + data_type=bigquery.standard_sql.StandardSqlDataType( + type_kind=bigquery.StandardSqlTypeNames.INT64 ), ) ] @@ -2725,8 +2640,6 @@ def test_delete_table_w_not_found_ok_true(self): ) def _create_job_helper(self, job_config): - from google.cloud.bigquery import _helpers - creds = _make_credentials() http = object() client = self._make_one(project=self.PROJECT, credentials=creds, _http=http) @@ -2737,8 +2650,6 @@ def _create_job_helper(self, job_config): } conn = client._connection = make_connection(RESOURCE) client.create_job(job_config=job_config) - if "query" in job_config: - _helpers._del_sub_prop(job_config, ["query", "destinationTable"]) conn.api_request.assert_called_once_with( method="POST", @@ -2863,7 +2774,7 @@ def test_create_job_query_config_w_rateLimitExceeded_error(self): } data_without_destination = { "jobReference": {"projectId": self.PROJECT, "jobId": mock.ANY}, - "configuration": {"query": {"query": query, "useLegacySql": False}}, + "configuration": configuration, } creds = _make_credentials() @@ -4165,6 +4076,160 @@ def test_query_defaults(self): self.assertEqual(sent_config["query"], QUERY) self.assertFalse(sent_config["useLegacySql"]) + def test_query_w_api_method_query(self): + query = "select count(*) from persons" + response = { + "jobReference": { + "projectId": self.PROJECT, + "location": "EU", + "jobId": "abcd", + }, + } + creds = _make_credentials() + http = object() + client = self._make_one(project=self.PROJECT, credentials=creds, _http=http) + conn = client._connection = make_connection(response) + + job = client.query(query, location="EU", api_method="QUERY") + + self.assertEqual(job.query, query) + self.assertEqual(job.job_id, "abcd") + self.assertEqual(job.location, "EU") + + # Check that query actually starts the job. + expected_resource = { + "query": query, + "useLegacySql": False, + "location": "EU", + "formatOptions": {"useInt64Timestamp": True}, + "requestId": mock.ANY, + } + conn.api_request.assert_called_once_with( + method="POST", + path=f"/projects/{self.PROJECT}/queries", + data=expected_resource, + timeout=None, + ) + + def test_query_w_api_method_query_legacy_sql(self): + from google.cloud.bigquery import QueryJobConfig + + query = "select count(*) from persons" + response = { + "jobReference": { + "projectId": self.PROJECT, + "location": "EU", + "jobId": "abcd", + }, + } + job_config = QueryJobConfig() + job_config.use_legacy_sql = True + job_config.maximum_bytes_billed = 100 + creds = _make_credentials() + http = object() + client = self._make_one(project=self.PROJECT, credentials=creds, _http=http) + conn = client._connection = make_connection(response) + + job = client.query( + query, location="EU", job_config=job_config, api_method="QUERY" + ) + + self.assertEqual(job.query, query) + self.assertEqual(job.job_id, "abcd") + self.assertEqual(job.location, "EU") + + # Check that query actually starts the job. + expected_resource = { + "query": query, + "useLegacySql": True, + "location": "EU", + "formatOptions": {"useInt64Timestamp": True}, + "requestId": mock.ANY, + "maximumBytesBilled": "100", + } + conn.api_request.assert_called_once_with( + method="POST", + path=f"/projects/{self.PROJECT}/queries", + data=expected_resource, + timeout=None, + ) + + def test_query_w_api_method_query_parameters(self): + from google.cloud.bigquery import QueryJobConfig, ScalarQueryParameter + + query = "select count(*) from persons" + response = { + "jobReference": { + "projectId": self.PROJECT, + "location": "EU", + "jobId": "abcd", + }, + } + job_config = QueryJobConfig() + job_config.dry_run = True + job_config.query_parameters = [ScalarQueryParameter("param1", "INTEGER", 123)] + creds = _make_credentials() + http = object() + client = self._make_one(project=self.PROJECT, credentials=creds, _http=http) + conn = client._connection = make_connection(response) + + job = client.query( + query, location="EU", job_config=job_config, api_method="QUERY" + ) + + self.assertEqual(job.query, query) + self.assertEqual(job.job_id, "abcd") + self.assertEqual(job.location, "EU") + + # Check that query actually starts the job. + expected_resource = { + "query": query, + "dryRun": True, + "useLegacySql": False, + "location": "EU", + "formatOptions": {"useInt64Timestamp": True}, + "requestId": mock.ANY, + "parameterMode": "NAMED", + "queryParameters": [ + { + "name": "param1", + "parameterType": {"type": "INTEGER"}, + "parameterValue": {"value": "123"}, + }, + ], + } + conn.api_request.assert_called_once_with( + method="POST", + path=f"/projects/{self.PROJECT}/queries", + data=expected_resource, + timeout=None, + ) + + def test_query_w_api_method_query_and_job_id_fails(self): + query = "select count(*) from persons" + creds = _make_credentials() + http = object() + client = self._make_one(project=self.PROJECT, credentials=creds, _http=http) + client._connection = make_connection({}) + + with self.assertRaises(TypeError) as exc: + client.query(query, job_id="abcd", api_method="QUERY") + self.assertIn( + "`job_id` was provided, but the 'QUERY' `api_method` was requested", + exc.exception.args[0], + ) + + def test_query_w_api_method_unknown(self): + query = "select count(*) from persons" + creds = _make_credentials() + http = object() + client = self._make_one(project=self.PROJECT, credentials=creds, _http=http) + client._connection = make_connection({}) + + with self.assertRaises(ValueError) as exc: + client.query(query, api_method="UNKNOWN") + self.assertIn("Got unexpected value for api_method: ", exc.exception.args[0]) + def test_query_w_explicit_timeout(self): query = "select count(*) from persons" resource = { @@ -5367,14 +5432,39 @@ def test_insert_rows_from_dataframe(self): self.PROJECT, self.DS_ID, self.TABLE_REF.table_id ) - dataframe = pandas.DataFrame( - [ - {"name": "Little One", "age": 10, "adult": False}, - {"name": "Young Gun", "age": 20, "adult": True}, - {"name": "Dad", "age": 30, "adult": True}, - {"name": "Stranger", "age": 40, "adult": True}, - ] - ) + data = [ + { + "name": "Little One", + "age": 10, + "adult": False, + "bdate": datetime.date(2011, 1, 2), + "btime": datetime.time(19, 1, 10), + }, + { + "name": "Young Gun", + "age": 20, + "adult": True, + "bdate": datetime.date(2001, 1, 2), + "btime": datetime.time(19, 1, 20), + }, + { + "name": "Dad", + "age": 30, + "adult": True, + "bdate": datetime.date(1991, 1, 2), + "btime": datetime.time(19, 1, 30), + }, + { + "name": "Stranger", + "age": 40, + "adult": True, + "bdate": datetime.date(1981, 1, 2), + "btime": datetime.time(19, 1, 40), + }, + ] + dataframe = pandas.DataFrame(data) + dataframe["bdate"] = dataframe["bdate"].astype("dbdate") + dataframe["btime"] = dataframe["btime"].astype("dbtime") # create client creds = _make_credentials() @@ -5387,6 +5477,8 @@ def test_insert_rows_from_dataframe(self): SchemaField("name", "STRING", mode="REQUIRED"), SchemaField("age", "INTEGER", mode="REQUIRED"), SchemaField("adult", "BOOLEAN", mode="REQUIRED"), + SchemaField("bdata", "DATE", mode="REQUIRED"), + SchemaField("btime", "TIME", mode="REQUIRED"), ] table = Table(self.TABLE_REF, schema=schema) @@ -5399,32 +5491,14 @@ def test_insert_rows_from_dataframe(self): for chunk_errors in error_info: assert chunk_errors == [] - EXPECTED_SENT_DATA = [ - { - "rows": [ - { - "insertId": "0", - "json": {"name": "Little One", "age": "10", "adult": "false"}, - }, - { - "insertId": "1", - "json": {"name": "Young Gun", "age": "20", "adult": "true"}, - }, - { - "insertId": "2", - "json": {"name": "Dad", "age": "30", "adult": "true"}, - }, - ] - }, - { - "rows": [ - { - "insertId": "3", - "json": {"name": "Stranger", "age": "40", "adult": "true"}, - } - ] - }, - ] + for row in data: + row["age"] = str(row["age"]) + row["adult"] = str(row["adult"]).lower() + row["bdate"] = row["bdate"].isoformat() + row["btime"] = row["btime"].isoformat() + + rows = [dict(insertId=str(i), json=row) for i, row in enumerate(data)] + EXPECTED_SENT_DATA = [dict(rows=rows[:3]), dict(rows=rows[3:])] actual_calls = conn.api_request.call_args_list @@ -6372,35 +6446,6 @@ def test_context_manager_exit_closes_client(self): fake_close.assert_called_once() -class Test_make_job_id(unittest.TestCase): - def _call_fut(self, job_id, prefix=None): - from google.cloud.bigquery.client import _make_job_id - - return _make_job_id(job_id, prefix=prefix) - - def test__make_job_id_wo_suffix(self): - job_id = self._call_fut("job_id") - - self.assertEqual(job_id, "job_id") - - def test__make_job_id_w_suffix(self): - with mock.patch("uuid.uuid4", side_effect=["212345"]): - job_id = self._call_fut(None, prefix="job_id") - - self.assertEqual(job_id, "job_id212345") - - def test__make_random_job_id(self): - with mock.patch("uuid.uuid4", side_effect=["212345"]): - job_id = self._call_fut(None) - - self.assertEqual(job_id, "212345") - - def test__make_job_id_w_job_id_overrides_prefix(self): - job_id = self._call_fut("job_id", prefix="unused_prefix") - - self.assertEqual(job_id, "job_id") - - class TestClientUpload(object): # NOTE: This is a "partner" to `TestClient` meant to test some of the # "load_table_from_file" portions of `Client`. It also uses @@ -6788,7 +6833,6 @@ def test_load_table_from_file_w_invalid_job_config(self): assert "Expected an instance of LoadJobConfig" in err_msg @unittest.skipIf(pandas is None, "Requires `pandas`") - @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_load_table_from_dataframe(self): from google.cloud.bigquery.client import _DEFAULT_NUM_RETRIES from google.cloud.bigquery import job @@ -6884,7 +6928,6 @@ def test_load_table_from_dataframe(self): assert "description" not in field @unittest.skipIf(pandas is None, "Requires `pandas`") - @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_load_table_from_dataframe_w_client_location(self): from google.cloud.bigquery.client import _DEFAULT_NUM_RETRIES from google.cloud.bigquery import job @@ -6929,7 +6972,6 @@ def test_load_table_from_dataframe_w_client_location(self): assert sent_config.source_format == job.SourceFormat.PARQUET @unittest.skipIf(pandas is None, "Requires `pandas`") - @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_load_table_from_dataframe_w_custom_job_config_wihtout_source_format(self): from google.cloud.bigquery.client import _DEFAULT_NUM_RETRIES from google.cloud.bigquery import job @@ -6984,7 +7026,6 @@ def test_load_table_from_dataframe_w_custom_job_config_wihtout_source_format(sel assert job_config.to_api_repr() == original_config_copy.to_api_repr() @unittest.skipIf(pandas is None, "Requires `pandas`") - @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_load_table_from_dataframe_w_custom_job_config_w_source_format(self): from google.cloud.bigquery.client import _DEFAULT_NUM_RETRIES from google.cloud.bigquery import job @@ -7040,7 +7081,6 @@ def test_load_table_from_dataframe_w_custom_job_config_w_source_format(self): assert job_config.to_api_repr() == original_config_copy.to_api_repr() @unittest.skipIf(pandas is None, "Requires `pandas`") - @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_load_table_from_dataframe_w_parquet_options_none(self): from google.cloud.bigquery.client import _DEFAULT_NUM_RETRIES from google.cloud.bigquery import job @@ -7092,7 +7132,6 @@ def test_load_table_from_dataframe_w_parquet_options_none(self): assert sent_config.parquet_options.enable_list_inference is True @unittest.skipIf(pandas is None, "Requires `pandas`") - @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_load_table_from_dataframe_w_list_inference_none(self): from google.cloud.bigquery.client import _DEFAULT_NUM_RETRIES from google.cloud.bigquery import job @@ -7152,7 +7191,6 @@ def test_load_table_from_dataframe_w_list_inference_none(self): assert job_config.to_api_repr() == original_config_copy.to_api_repr() @unittest.skipIf(pandas is None, "Requires `pandas`") - @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_load_table_from_dataframe_w_list_inference_false(self): from google.cloud.bigquery.client import _DEFAULT_NUM_RETRIES from google.cloud.bigquery import job @@ -7213,7 +7251,6 @@ def test_load_table_from_dataframe_w_list_inference_false(self): assert job_config.to_api_repr() == original_config_copy.to_api_repr() @unittest.skipIf(pandas is None, "Requires `pandas`") - @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_load_table_from_dataframe_w_custom_job_config_w_wrong_source_format(self): from google.cloud.bigquery import job @@ -7233,7 +7270,6 @@ def test_load_table_from_dataframe_w_custom_job_config_w_wrong_source_format(sel assert "Got unexpected source_format:" in str(exc.value) @unittest.skipIf(pandas is None, "Requires `pandas`") - @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_load_table_from_dataframe_w_automatic_schema(self): from google.cloud.bigquery.client import _DEFAULT_NUM_RETRIES from google.cloud.bigquery import job @@ -7267,6 +7303,28 @@ def test_load_table_from_dataframe_w_automatic_schema(self): dtype="datetime64[ns]", ).dt.tz_localize(datetime.timezone.utc), ), + ( + "date_col", + pandas.Series( + [ + datetime.date(2010, 1, 2), + datetime.date(2011, 2, 3), + datetime.date(2012, 3, 14), + ], + dtype="dbdate", + ), + ), + ( + "time_col", + pandas.Series( + [ + datetime.time(3, 44, 50), + datetime.time(14, 50, 59), + datetime.time(15, 16), + ], + dtype="dbtime", + ), + ), ] ) dataframe = pandas.DataFrame(df_data, columns=df_data.keys()) @@ -7305,12 +7363,72 @@ def test_load_table_from_dataframe_w_automatic_schema(self): SchemaField("int_col", "INTEGER"), SchemaField("float_col", "FLOAT"), SchemaField("bool_col", "BOOLEAN"), - SchemaField("dt_col", "TIMESTAMP"), + SchemaField("dt_col", "DATETIME"), SchemaField("ts_col", "TIMESTAMP"), + SchemaField("date_col", "DATE"), + SchemaField("time_col", "TIME"), + ) + + @unittest.skipIf(pandas is None, "Requires `pandas`") + def test_load_table_from_dataframe_w_automatic_schema_detection_fails(self): + from google.cloud.bigquery.client import _DEFAULT_NUM_RETRIES + from google.cloud.bigquery import job + + client = self._make_client() + + df_data = [ + [[{"name": "n1.1", "value": 1.1}, {"name": "n1.2", "value": 1.2}]], + [[{"name": "n2.1", "value": 2.1}, {"name": "n2.2", "value": 2.2}]], + ] + dataframe = pandas.DataFrame(df_data, columns=["col_record_list"]) + + load_patch = mock.patch( + "google.cloud.bigquery.client.Client.load_table_from_file", autospec=True + ) + get_table_patch = mock.patch( + "google.cloud.bigquery.client.Client.get_table", + autospec=True, + side_effect=google.api_core.exceptions.NotFound("Table not found"), + ) + + with load_patch as load_table_from_file, get_table_patch: + with warnings.catch_warnings(record=True) as warned: + client.load_table_from_dataframe( + dataframe, self.TABLE_REF, location=self.LOCATION + ) + + # There should be a warning that schema detection failed. + expected_warnings = [ + warning + for warning in warned + if "schema could not be detected" in str(warning).lower() + ] + assert len(expected_warnings) == 1 + assert issubclass( + expected_warnings[0].category, + (DeprecationWarning, PendingDeprecationWarning), + ) + + load_table_from_file.assert_called_once_with( + client, + mock.ANY, + self.TABLE_REF, + num_retries=_DEFAULT_NUM_RETRIES, + rewind=True, + size=mock.ANY, + job_id=mock.ANY, + job_id_prefix=None, + location=self.LOCATION, + project=None, + job_config=mock.ANY, + timeout=DEFAULT_TIMEOUT, ) + sent_config = load_table_from_file.mock_calls[0][2]["job_config"] + assert sent_config.source_format == job.SourceFormat.PARQUET + assert sent_config.schema is None + @unittest.skipIf(pandas is None, "Requires `pandas`") - @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_load_table_from_dataframe_w_index_and_auto_schema(self): from google.cloud.bigquery.client import _DEFAULT_NUM_RETRIES from google.cloud.bigquery import job @@ -7372,7 +7490,6 @@ def test_load_table_from_dataframe_w_index_and_auto_schema(self): assert sent_schema == expected_sent_schema @unittest.skipIf(pandas is None, "Requires `pandas`") - @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_load_table_from_dataframe_unknown_table(self): from google.cloud.bigquery.client import _DEFAULT_NUM_RETRIES @@ -7411,7 +7528,6 @@ def test_load_table_from_dataframe_unknown_table(self): pandas is None or PANDAS_INSTALLED_VERSION < PANDAS_MINIUM_VERSION, "Only `pandas version >=1.0.0` supported", ) - @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_load_table_from_dataframe_w_nullable_int64_datatype(self): from google.cloud.bigquery.client import _DEFAULT_NUM_RETRIES from google.cloud.bigquery import job @@ -7459,7 +7575,6 @@ def test_load_table_from_dataframe_w_nullable_int64_datatype(self): pandas is None or PANDAS_INSTALLED_VERSION < PANDAS_MINIUM_VERSION, "Only `pandas version >=1.0.0` supported", ) - @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_load_table_from_dataframe_w_nullable_int64_datatype_automatic_schema(self): from google.cloud.bigquery.client import _DEFAULT_NUM_RETRIES from google.cloud.bigquery import job @@ -7504,7 +7619,6 @@ def test_load_table_from_dataframe_w_nullable_int64_datatype_automatic_schema(se ) @unittest.skipIf(pandas is None, "Requires `pandas`") - @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_load_table_from_dataframe_struct_fields(self): from google.cloud.bigquery.client import _DEFAULT_NUM_RETRIES from google.cloud.bigquery import job @@ -7564,7 +7678,6 @@ def test_load_table_from_dataframe_struct_fields(self): assert sent_config.schema == schema @unittest.skipIf(pandas is None, "Requires `pandas`") - @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_load_table_from_dataframe_array_fields(self): """Test that a DataFrame with array columns can be uploaded correctly. @@ -7629,7 +7742,6 @@ def test_load_table_from_dataframe_array_fields(self): assert sent_config.schema == schema @unittest.skipIf(pandas is None, "Requires `pandas`") - @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_load_table_from_dataframe_array_fields_w_auto_schema(self): """Test that a DataFrame with array columns can be uploaded correctly. @@ -7692,7 +7804,6 @@ def test_load_table_from_dataframe_array_fields_w_auto_schema(self): assert sent_config.schema == expected_schema @unittest.skipIf(pandas is None, "Requires `pandas`") - @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_load_table_from_dataframe_w_partial_schema(self): from google.cloud.bigquery.client import _DEFAULT_NUM_RETRIES from google.cloud.bigquery import job @@ -7769,14 +7880,13 @@ def test_load_table_from_dataframe_w_partial_schema(self): SchemaField("int_as_float_col", "INTEGER"), SchemaField("float_col", "FLOAT"), SchemaField("bool_col", "BOOLEAN"), - SchemaField("dt_col", "TIMESTAMP"), + SchemaField("dt_col", "DATETIME"), SchemaField("ts_col", "TIMESTAMP"), SchemaField("string_col", "STRING"), SchemaField("bytes_col", "BYTES"), ) @unittest.skipIf(pandas is None, "Requires `pandas`") - @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_load_table_from_dataframe_w_partial_schema_extra_types(self): from google.cloud.bigquery import job from google.cloud.bigquery.schema import SchemaField @@ -7813,63 +7923,6 @@ def test_load_table_from_dataframe_w_partial_schema_extra_types(self): assert "unknown_col" in message @unittest.skipIf(pandas is None, "Requires `pandas`") - def test_load_table_from_dataframe_w_partial_schema_missing_types(self): - from google.cloud.bigquery.client import _DEFAULT_NUM_RETRIES - from google.cloud.bigquery import job - from google.cloud.bigquery.schema import SchemaField - - client = self._make_client() - df_data = collections.OrderedDict( - [ - ("string_col", ["abc", "def", "ghi"]), - ("unknown_col", [b"jkl", None, b"mno"]), - ] - ) - dataframe = pandas.DataFrame(df_data, columns=df_data.keys()) - load_patch = mock.patch( - "google.cloud.bigquery.client.Client.load_table_from_file", autospec=True - ) - pyarrow_patch = mock.patch( - "google.cloud.bigquery._pandas_helpers.pyarrow", None - ) - - schema = (SchemaField("string_col", "STRING"),) - job_config = job.LoadJobConfig(schema=schema) - with pyarrow_patch, load_patch as load_table_from_file, warnings.catch_warnings( - record=True - ) as warned: - client.load_table_from_dataframe( - dataframe, self.TABLE_REF, job_config=job_config, location=self.LOCATION - ) - - load_table_from_file.assert_called_once_with( - client, - mock.ANY, - self.TABLE_REF, - num_retries=_DEFAULT_NUM_RETRIES, - rewind=True, - size=mock.ANY, - job_id=mock.ANY, - job_id_prefix=None, - location=self.LOCATION, - project=None, - job_config=mock.ANY, - timeout=DEFAULT_TIMEOUT, - ) - - assert warned # there should be at least one warning - unknown_col_warnings = [ - warning for warning in warned if "unknown_col" in str(warning) - ] - assert unknown_col_warnings - assert unknown_col_warnings[0].category == UserWarning - - sent_config = load_table_from_file.mock_calls[0][2]["job_config"] - assert sent_config.source_format == job.SourceFormat.PARQUET - assert sent_config.schema is None - - @unittest.skipIf(pandas is None, "Requires `pandas`") - @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_load_table_from_dataframe_w_schema_arrow_custom_compression(self): from google.cloud.bigquery import job from google.cloud.bigquery.schema import SchemaField @@ -7902,78 +7955,6 @@ def test_load_table_from_dataframe_w_schema_arrow_custom_compression(self): assert call_args.kwargs.get("parquet_compression") == "LZ4" @unittest.skipIf(pandas is None, "Requires `pandas`") - @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") - def test_load_table_from_dataframe_wo_pyarrow_raises_error(self): - client = self._make_client() - records = [{"id": 1, "age": 100}, {"id": 2, "age": 60}] - dataframe = pandas.DataFrame(records) - - get_table_patch = mock.patch( - "google.cloud.bigquery.client.Client.get_table", - autospec=True, - side_effect=google.api_core.exceptions.NotFound("Table not found"), - ) - load_patch = mock.patch( - "google.cloud.bigquery.client.Client.load_table_from_file", autospec=True - ) - pyarrow_patch = mock.patch("google.cloud.bigquery.client.pyarrow", None) - to_parquet_patch = mock.patch.object( - dataframe, "to_parquet", wraps=dataframe.to_parquet - ) - - with load_patch, get_table_patch, pyarrow_patch, to_parquet_patch: - with pytest.raises(ValueError): - client.load_table_from_dataframe( - dataframe, - self.TABLE_REF, - location=self.LOCATION, - parquet_compression="gzip", - ) - - def test_load_table_from_dataframe_w_bad_pyarrow_issues_warning(self): - pytest.importorskip("pandas", reason="Requires `pandas`") - pytest.importorskip("pyarrow", reason="Requires `pyarrow`") - - client = self._make_client() - records = [{"id": 1, "age": 100}, {"id": 2, "age": 60}] - dataframe = pandas.DataFrame(records) - - _helpers_mock = mock.MagicMock() - _helpers_mock.PYARROW_VERSIONS = mock.MagicMock() - _helpers_mock.PYARROW_VERSIONS.installed_version = packaging.version.parse( - "2.0.0" - ) # A known bad version of pyarrow. - pyarrow_version_patch = mock.patch( - "google.cloud.bigquery.client._helpers", _helpers_mock - ) - get_table_patch = mock.patch( - "google.cloud.bigquery.client.Client.get_table", - autospec=True, - side_effect=google.api_core.exceptions.NotFound("Table not found"), - ) - load_patch = mock.patch( - "google.cloud.bigquery.client.Client.load_table_from_file", autospec=True - ) - - with load_patch, get_table_patch, pyarrow_version_patch: - with warnings.catch_warnings(record=True) as warned: - client.load_table_from_dataframe( - dataframe, - self.TABLE_REF, - location=self.LOCATION, - ) - - expected_warnings = [ - warning for warning in warned if "pyarrow" in str(warning).lower() - ] - assert len(expected_warnings) == 1 - assert issubclass(expected_warnings[0].category, RuntimeWarning) - msg = str(expected_warnings[0].message) - assert "pyarrow 2.0.0" in msg - assert "data corruption" in msg - - @unittest.skipIf(pandas is None, "Requires `pandas`") - @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_load_table_from_dataframe_w_nulls(self): """Test that a DataFrame with null columns can be uploaded if a BigQuery schema is specified. diff --git a/packages/google-cloud-bigquery/tests/unit/test_dbapi__helpers.py b/packages/google-cloud-bigquery/tests/unit/test_dbapi__helpers.py index 3c1673f4f582..7cc1f11c3a5c 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_dbapi__helpers.py +++ b/packages/google-cloud-bigquery/tests/unit/test_dbapi__helpers.py @@ -21,13 +21,8 @@ import pytest -try: - import pyarrow -except ImportError: # pragma: NO COVER - pyarrow = None - import google.cloud._helpers -from google.cloud.bigquery import table, enums +from google.cloud.bigquery import query, table from google.cloud.bigquery.dbapi import _helpers from google.cloud.bigquery.dbapi import exceptions from tests.unit.helpers import _to_pyarrow @@ -215,7 +210,6 @@ def test_empty_iterable(self): result = _helpers.to_bq_table_rows(rows_iterable) self.assertEqual(list(result), []) - @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_non_empty_iterable(self): rows_iterable = [ dict( @@ -344,8 +338,8 @@ def test_custom_on_closed_error_type(self): VALID_BQ_TYPES = [ - (name, getattr(enums.SqlParameterScalarTypes, name)._type) - for name in dir(enums.SqlParameterScalarTypes) + (name, getattr(query.SqlParameterScalarTypes, name)._type) + for name in dir(query.SqlParameterScalarTypes) if not name.startswith("_") ] diff --git a/packages/google-cloud-bigquery/tests/unit/test_dbapi_connection.py b/packages/google-cloud-bigquery/tests/unit/test_dbapi_connection.py index d9d09821262e..e96ab55d792c 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_dbapi_connection.py +++ b/packages/google-cloud-bigquery/tests/unit/test_dbapi_connection.py @@ -17,10 +17,7 @@ import mock -try: - from google.cloud import bigquery_storage -except ImportError: # pragma: NO COVER - bigquery_storage = None +from google.cloud import bigquery_storage class TestConnection(unittest.TestCase): @@ -40,8 +37,6 @@ def _mock_client(self): return mock_client def _mock_bqstorage_client(self): - # Assumption: bigquery_storage exists. It's the test's responisbility to - # not use this helper or skip itself if bqstroage is not installed. mock_client = mock.create_autospec(bigquery_storage.BigQueryReadClient) mock_client._transport = mock.Mock(spec=["channel"]) mock_client._transport.grpc_channel = mock.Mock(spec=["close"]) @@ -58,9 +53,6 @@ def test_ctor_wo_bqstorage_client(self): self.assertIs(connection._client, mock_client) self.assertIs(connection._bqstorage_client, None) - @unittest.skipIf( - bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" - ) def test_ctor_w_bqstorage_client(self): from google.cloud.bigquery.dbapi import Connection @@ -90,9 +82,6 @@ def test_connect_wo_client(self, mock_client): self.assertIsNotNone(connection._client) self.assertIsNotNone(connection._bqstorage_client) - @unittest.skipIf( - bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" - ) def test_connect_w_client(self): from google.cloud.bigquery.dbapi import connect from google.cloud.bigquery.dbapi import Connection @@ -108,9 +97,6 @@ def test_connect_w_client(self): self.assertIs(connection._client, mock_client) self.assertIs(connection._bqstorage_client, mock_bqstorage_client) - @unittest.skipIf( - bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" - ) def test_connect_w_both_clients(self): from google.cloud.bigquery.dbapi import connect from google.cloud.bigquery.dbapi import Connection @@ -144,9 +130,6 @@ def test_raises_error_if_closed(self): ): getattr(connection, method)() - @unittest.skipIf( - bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" - ) def test_close_closes_all_created_bigquery_clients(self): client = self._mock_client() bqstorage_client = self._mock_bqstorage_client() @@ -169,9 +152,6 @@ def test_close_closes_all_created_bigquery_clients(self): self.assertTrue(client.close.called) self.assertTrue(bqstorage_client._transport.grpc_channel.close.called) - @unittest.skipIf( - bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" - ) def test_close_does_not_close_bigquery_clients_passed_to_it(self): client = self._mock_client() bqstorage_client = self._mock_bqstorage_client() diff --git a/packages/google-cloud-bigquery/tests/unit/test_dbapi_cursor.py b/packages/google-cloud-bigquery/tests/unit/test_dbapi_cursor.py index 8ad62f75f824..d672c0f6cc02 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_dbapi_cursor.py +++ b/packages/google-cloud-bigquery/tests/unit/test_dbapi_cursor.py @@ -18,18 +18,8 @@ import pytest - -try: - import pyarrow -except ImportError: # pragma: NO COVER - pyarrow = None - from google.api_core import exceptions - -try: - from google.cloud import bigquery_storage -except ImportError: # pragma: NO COVER - bigquery_storage = None +from google.cloud import bigquery_storage from tests.unit.helpers import _to_pyarrow @@ -279,10 +269,6 @@ def test_fetchall_w_row(self): self.assertEqual(len(rows), 1) self.assertEqual(rows[0], (1,)) - @unittest.skipIf( - bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" - ) - @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_fetchall_w_bqstorage_client_fetch_success(self): from google.cloud.bigquery import dbapi from google.cloud.bigquery import table @@ -336,9 +322,6 @@ def test_fetchall_w_bqstorage_client_fetch_success(self): self.assertEqual(sorted_row_data, expected_row_data) - @unittest.skipIf( - bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" - ) def test_fetchall_w_bqstorage_client_fetch_no_rows(self): from google.cloud.bigquery import dbapi @@ -361,9 +344,6 @@ def test_fetchall_w_bqstorage_client_fetch_no_rows(self): # check the data returned self.assertEqual(rows, []) - @unittest.skipIf( - bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" - ) def test_fetchall_w_bqstorage_client_fetch_error_no_fallback(self): from google.cloud.bigquery import dbapi from google.cloud.bigquery import table @@ -395,10 +375,6 @@ def fake_ensure_bqstorage_client(bqstorage_client=None, **kwargs): # the default client was not used mock_client.list_rows.assert_not_called() - @unittest.skipIf( - bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" - ) - @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_fetchall_w_bqstorage_client_no_arrow_compression(self): from google.cloud.bigquery import dbapi from google.cloud.bigquery import table diff --git a/packages/google-cloud-bigquery/tests/unit/gapic/__init__.py b/packages/google-cloud-bigquery/tests/unit/test_legacy_types.py similarity index 60% rename from packages/google-cloud-bigquery/tests/unit/gapic/__init__.py rename to packages/google-cloud-bigquery/tests/unit/test_legacy_types.py index e8e1c3845db5..3f51cc5114db 100644 --- a/packages/google-cloud-bigquery/tests/unit/gapic/__init__.py +++ b/packages/google-cloud-bigquery/tests/unit/test_legacy_types.py @@ -12,4 +12,16 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -# + +import warnings + + +def test_importing_legacy_types_emits_warning(): + with warnings.catch_warnings(record=True) as warned: + from google.cloud.bigquery_v2 import types # noqa: F401 + + assert len(warned) == 1 + assert warned[0].category is DeprecationWarning + warning_msg = str(warned[0]) + assert "bigquery_v2" in warning_msg + assert "not maintained" in warning_msg diff --git a/packages/google-cloud-bigquery/tests/unit/test_magics.py b/packages/google-cloud-bigquery/tests/unit/test_magics.py index 72ae4af21ea5..ea8fe568fb74 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_magics.py +++ b/packages/google-cloud-bigquery/tests/unit/test_magics.py @@ -76,19 +76,6 @@ def ipython_ns_cleanup(): del ip.user_ns[name] -@pytest.fixture(scope="session") -def missing_bq_storage(): - """Provide a patcher that can make the bigquery storage import to fail.""" - - def fail_if(name, globals, locals, fromlist, level): - # NOTE: *very* simplified, assuming a straightforward absolute import - return "bigquery_storage" in name or ( - fromlist is not None and "bigquery_storage" in fromlist - ) - - return maybe_fail_import(predicate=fail_if) - - @pytest.fixture(scope="session") def missing_grpcio_lib(): """Provide a patcher that can make the gapic library import to fail.""" @@ -324,9 +311,6 @@ def test__make_bqstorage_client_false(): assert got is None -@pytest.mark.skipif( - bigquery_storage is None, reason="Requires `google-cloud-bigquery-storage`" -) def test__make_bqstorage_client_true(): credentials_mock = mock.create_autospec( google.auth.credentials.Credentials, instance=True @@ -338,53 +322,6 @@ def test__make_bqstorage_client_true(): assert isinstance(got, bigquery_storage.BigQueryReadClient) -def test__make_bqstorage_client_true_raises_import_error(missing_bq_storage): - credentials_mock = mock.create_autospec( - google.auth.credentials.Credentials, instance=True - ) - test_client = bigquery.Client( - project="test_project", credentials=credentials_mock, location="test_location" - ) - - with pytest.raises(ImportError) as exc_context, missing_bq_storage: - magics._make_bqstorage_client(test_client, True, {}) - - error_msg = str(exc_context.value) - assert "google-cloud-bigquery-storage" in error_msg - assert "pyarrow" in error_msg - - -@pytest.mark.skipif( - bigquery_storage is None, reason="Requires `google-cloud-bigquery-storage`" -) -def test__make_bqstorage_client_true_obsolete_dependency(): - from google.cloud.bigquery.exceptions import LegacyBigQueryStorageError - - credentials_mock = mock.create_autospec( - google.auth.credentials.Credentials, instance=True - ) - test_client = bigquery.Client( - project="test_project", credentials=credentials_mock, location="test_location" - ) - - patcher = mock.patch( - "google.cloud.bigquery.client.BQ_STORAGE_VERSIONS.verify_version", - side_effect=LegacyBigQueryStorageError("BQ Storage too old"), - ) - with patcher, warnings.catch_warnings(record=True) as warned: - got = magics._make_bqstorage_client(test_client, True, {}) - - assert got is None - - matching_warnings = [ - warning for warning in warned if "BQ Storage too old" in str(warning) - ] - assert matching_warnings, "Obsolete dependency warning not raised." - - -@pytest.mark.skipif( - bigquery_storage is None, reason="Requires `google-cloud-bigquery-storage`" -) @pytest.mark.skipif(pandas is None, reason="Requires `pandas`") def test__make_bqstorage_client_true_missing_gapic(missing_grpcio_lib): credentials_mock = mock.create_autospec( @@ -440,9 +377,6 @@ def test_extension_load(): @pytest.mark.usefixtures("ipython_interactive") @pytest.mark.skipif(pandas is None, reason="Requires `pandas`") -@pytest.mark.skipif( - bigquery_storage is None, reason="Requires `google-cloud-bigquery-storage`" -) def test_bigquery_magic_without_optional_arguments(monkeypatch): ip = IPython.get_ipython() ip.extension_manager.load_extension("google.cloud.bigquery") @@ -605,10 +539,9 @@ def test_bigquery_magic_clears_display_in_non_verbose_mode(): @pytest.mark.usefixtures("ipython_interactive") -@pytest.mark.skipif( - bigquery_storage is None, reason="Requires `google-cloud-bigquery-storage`" -) def test_bigquery_magic_with_bqstorage_from_argument(monkeypatch): + pandas = pytest.importorskip("pandas") + ip = IPython.get_ipython() ip.extension_manager.load_extension("google.cloud.bigquery") mock_credentials = mock.create_autospec( @@ -671,10 +604,9 @@ def warning_match(warning): @pytest.mark.usefixtures("ipython_interactive") -@pytest.mark.skipif( - bigquery_storage is None, reason="Requires `google-cloud-bigquery-storage`" -) def test_bigquery_magic_with_rest_client_requested(monkeypatch): + pandas = pytest.importorskip("pandas") + ip = IPython.get_ipython() ip.extension_manager.load_extension("google.cloud.bigquery") mock_credentials = mock.create_autospec( @@ -899,9 +831,6 @@ def test_bigquery_magic_w_table_id_and_destination_var(ipython_ns_cleanup): @pytest.mark.usefixtures("ipython_interactive") -@pytest.mark.skipif( - bigquery_storage is None, reason="Requires `google-cloud-bigquery-storage`" -) @pytest.mark.skipif(pandas is None, reason="Requires `pandas`") def test_bigquery_magic_w_table_id_and_bqstorage_client(): ip = IPython.get_ipython() diff --git a/packages/google-cloud-bigquery/tests/unit/test_query.py b/packages/google-cloud-bigquery/tests/unit/test_query.py index a966b88b1770..4b687152ff34 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_query.py +++ b/packages/google-cloud-bigquery/tests/unit/test_query.py @@ -432,11 +432,11 @@ def test_positional(self): self.assertEqual(param.value, 123) def test_ctor_w_scalar_query_parameter_type(self): - from google.cloud.bigquery import enums + from google.cloud.bigquery import query param = self._make_one( name="foo", - type_=enums.SqlParameterScalarTypes.BIGNUMERIC, + type_=query.SqlParameterScalarTypes.BIGNUMERIC, value=decimal.Decimal("123.456"), ) self.assertEqual(param.name, "foo") diff --git a/packages/google-cloud-bigquery/tests/unit/test_schema.py b/packages/google-cloud-bigquery/tests/unit/test_schema.py index a0b1b5d1142d..6a547cb13e88 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_schema.py +++ b/packages/google-cloud-bigquery/tests/unit/test_schema.py @@ -12,6 +12,8 @@ # See the License for the specific language governing permissions and # limitations under the License. +from google.cloud import bigquery +from google.cloud.bigquery.standard_sql import StandardSqlStructType from google.cloud.bigquery.schema import PolicyTagList import unittest @@ -28,9 +30,9 @@ def _get_target_class(): @staticmethod def _get_standard_sql_data_type_class(): - from google.cloud.bigquery_v2 import types + from google.cloud.bigquery import standard_sql - return types.StandardSqlDataType + return standard_sql.StandardSqlDataType def _make_one(self, *args, **kw): return self._get_target_class()(*args, **kw) @@ -226,18 +228,17 @@ def test_fields_property(self): self.assertEqual(schema_field.fields, fields) def test_to_standard_sql_simple_type(self): - sql_type = self._get_standard_sql_data_type_class() examples = ( # a few legacy types - ("INTEGER", sql_type.TypeKind.INT64), - ("FLOAT", sql_type.TypeKind.FLOAT64), - ("BOOLEAN", sql_type.TypeKind.BOOL), - ("DATETIME", sql_type.TypeKind.DATETIME), + ("INTEGER", bigquery.StandardSqlTypeNames.INT64), + ("FLOAT", bigquery.StandardSqlTypeNames.FLOAT64), + ("BOOLEAN", bigquery.StandardSqlTypeNames.BOOL), + ("DATETIME", bigquery.StandardSqlTypeNames.DATETIME), # a few standard types - ("INT64", sql_type.TypeKind.INT64), - ("FLOAT64", sql_type.TypeKind.FLOAT64), - ("BOOL", sql_type.TypeKind.BOOL), - ("GEOGRAPHY", sql_type.TypeKind.GEOGRAPHY), + ("INT64", bigquery.StandardSqlTypeNames.INT64), + ("FLOAT64", bigquery.StandardSqlTypeNames.FLOAT64), + ("BOOL", bigquery.StandardSqlTypeNames.BOOL), + ("GEOGRAPHY", bigquery.StandardSqlTypeNames.GEOGRAPHY), ) for legacy_type, standard_type in examples: field = self._make_one("some_field", legacy_type) @@ -246,7 +247,7 @@ def test_to_standard_sql_simple_type(self): self.assertEqual(standard_field.type.type_kind, standard_type) def test_to_standard_sql_struct_type(self): - from google.cloud.bigquery_v2 import types + from google.cloud.bigquery import standard_sql # Expected result object: # @@ -280,30 +281,39 @@ def test_to_standard_sql_struct_type(self): sql_type = self._get_standard_sql_data_type_class() # level 2 fields - sub_sub_field_date = types.StandardSqlField( - name="date_field", type=sql_type(type_kind=sql_type.TypeKind.DATE) + sub_sub_field_date = standard_sql.StandardSqlField( + name="date_field", + type=sql_type(type_kind=bigquery.StandardSqlTypeNames.DATE), ) - sub_sub_field_time = types.StandardSqlField( - name="time_field", type=sql_type(type_kind=sql_type.TypeKind.TIME) + sub_sub_field_time = standard_sql.StandardSqlField( + name="time_field", + type=sql_type(type_kind=bigquery.StandardSqlTypeNames.TIME), ) # level 1 fields - sub_field_struct = types.StandardSqlField( - name="last_used", type=sql_type(type_kind=sql_type.TypeKind.STRUCT) - ) - sub_field_struct.type.struct_type.fields.extend( - [sub_sub_field_date, sub_sub_field_time] + sub_field_struct = standard_sql.StandardSqlField( + name="last_used", + type=sql_type( + type_kind=bigquery.StandardSqlTypeNames.STRUCT, + struct_type=standard_sql.StandardSqlStructType( + fields=[sub_sub_field_date, sub_sub_field_time] + ), + ), ) - sub_field_bytes = types.StandardSqlField( - name="image_content", type=sql_type(type_kind=sql_type.TypeKind.BYTES) + sub_field_bytes = standard_sql.StandardSqlField( + name="image_content", + type=sql_type(type_kind=bigquery.StandardSqlTypeNames.BYTES), ) # level 0 (top level) - expected_result = types.StandardSqlField( - name="image_usage", type=sql_type(type_kind=sql_type.TypeKind.STRUCT) - ) - expected_result.type.struct_type.fields.extend( - [sub_field_bytes, sub_field_struct] + expected_result = standard_sql.StandardSqlField( + name="image_usage", + type=sql_type( + type_kind=bigquery.StandardSqlTypeNames.STRUCT, + struct_type=standard_sql.StandardSqlStructType( + fields=[sub_field_bytes, sub_field_struct] + ), + ), ) # construct legacy SchemaField object @@ -322,14 +332,16 @@ def test_to_standard_sql_struct_type(self): self.assertEqual(standard_field, expected_result) def test_to_standard_sql_array_type_simple(self): - from google.cloud.bigquery_v2 import types + from google.cloud.bigquery import standard_sql sql_type = self._get_standard_sql_data_type_class() # construct expected result object - expected_sql_type = sql_type(type_kind=sql_type.TypeKind.ARRAY) - expected_sql_type.array_element_type.type_kind = sql_type.TypeKind.INT64 - expected_result = types.StandardSqlField( + expected_sql_type = sql_type( + type_kind=bigquery.StandardSqlTypeNames.ARRAY, + array_element_type=sql_type(type_kind=bigquery.StandardSqlTypeNames.INT64), + ) + expected_result = standard_sql.StandardSqlField( name="valid_numbers", type=expected_sql_type ) @@ -340,27 +352,31 @@ def test_to_standard_sql_array_type_simple(self): self.assertEqual(standard_field, expected_result) def test_to_standard_sql_array_type_struct(self): - from google.cloud.bigquery_v2 import types + from google.cloud.bigquery import standard_sql sql_type = self._get_standard_sql_data_type_class() # define person STRUCT - name_field = types.StandardSqlField( - name="name", type=sql_type(type_kind=sql_type.TypeKind.STRING) + name_field = standard_sql.StandardSqlField( + name="name", type=sql_type(type_kind=bigquery.StandardSqlTypeNames.STRING) ) - age_field = types.StandardSqlField( - name="age", type=sql_type(type_kind=sql_type.TypeKind.INT64) + age_field = standard_sql.StandardSqlField( + name="age", type=sql_type(type_kind=bigquery.StandardSqlTypeNames.INT64) ) - person_struct = types.StandardSqlField( - name="person_info", type=sql_type(type_kind=sql_type.TypeKind.STRUCT) + person_struct = standard_sql.StandardSqlField( + name="person_info", + type=sql_type( + type_kind=bigquery.StandardSqlTypeNames.STRUCT, + struct_type=StandardSqlStructType(fields=[name_field, age_field]), + ), ) - person_struct.type.struct_type.fields.extend([name_field, age_field]) # define expected result - an ARRAY of person structs expected_sql_type = sql_type( - type_kind=sql_type.TypeKind.ARRAY, array_element_type=person_struct.type + type_kind=bigquery.StandardSqlTypeNames.ARRAY, + array_element_type=person_struct.type, ) - expected_result = types.StandardSqlField( + expected_result = standard_sql.StandardSqlField( name="known_people", type=expected_sql_type ) @@ -375,14 +391,14 @@ def test_to_standard_sql_array_type_struct(self): self.assertEqual(standard_field, expected_result) def test_to_standard_sql_unknown_type(self): - sql_type = self._get_standard_sql_data_type_class() field = self._make_one("weird_field", "TROOLEAN") standard_field = field.to_standard_sql() self.assertEqual(standard_field.name, "weird_field") self.assertEqual( - standard_field.type.type_kind, sql_type.TypeKind.TYPE_KIND_UNSPECIFIED + standard_field.type.type_kind, + bigquery.StandardSqlTypeNames.TYPE_KIND_UNSPECIFIED, ) def test___eq___wrong_type(self): @@ -514,6 +530,11 @@ def test___repr__(self): expected = "SchemaField('field1', 'STRING', 'NULLABLE', None, (), None)" self.assertEqual(repr(field1), expected) + def test___repr__type_not_set(self): + field1 = self._make_one("field1", field_type=None) + expected = "SchemaField('field1', None, 'NULLABLE', None, (), None)" + self.assertEqual(repr(field1), expected) + def test___repr__evaluable_no_policy_tags(self): field = self._make_one("field1", "STRING", "REQUIRED", "Description") field_repr = repr(field) diff --git a/packages/google-cloud-bigquery/tests/unit/test_standard_sql_types.py b/packages/google-cloud-bigquery/tests/unit/test_standard_sql_types.py new file mode 100644 index 000000000000..0ba0e0cfd721 --- /dev/null +++ b/packages/google-cloud-bigquery/tests/unit/test_standard_sql_types.py @@ -0,0 +1,594 @@ +# Copyright 2021 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from unittest import mock + +import pytest + +from google.cloud import bigquery as bq + + +class TestStandardSqlDataType: + @staticmethod + def _get_target_class(): + from google.cloud.bigquery.standard_sql import StandardSqlDataType + + return StandardSqlDataType + + def _make_one(self, *args, **kw): + return self._get_target_class()(*args, **kw) + + def test_ctor_default_type_kind(self): + instance = self._make_one() + assert instance.type_kind == bq.StandardSqlTypeNames.TYPE_KIND_UNSPECIFIED + + def test_to_api_repr_no_type_set(self): + instance = self._make_one() + instance.type_kind = None + + result = instance.to_api_repr() + + assert result == {"typeKind": "TYPE_KIND_UNSPECIFIED"} + + def test_to_api_repr_scalar_type(self): + instance = self._make_one(bq.StandardSqlTypeNames.FLOAT64) + + result = instance.to_api_repr() + + assert result == {"typeKind": "FLOAT64"} + + def test_to_api_repr_array_type_element_type_missing(self): + instance = self._make_one( + bq.StandardSqlTypeNames.ARRAY, array_element_type=None + ) + + result = instance.to_api_repr() + + expected = {"typeKind": "ARRAY"} + assert result == expected + + def test_to_api_repr_array_type_w_element_type(self): + array_element_type = self._make_one(type_kind=bq.StandardSqlTypeNames.BOOL) + instance = self._make_one( + bq.StandardSqlTypeNames.ARRAY, array_element_type=array_element_type + ) + + result = instance.to_api_repr() + + expected = {"typeKind": "ARRAY", "arrayElementType": {"typeKind": "BOOL"}} + assert result == expected + + def test_to_api_repr_struct_type_field_types_missing(self): + instance = self._make_one(bq.StandardSqlTypeNames.STRUCT, struct_type=None) + + result = instance.to_api_repr() + + assert result == {"typeKind": "STRUCT"} + + def test_to_api_repr_struct_type_w_field_types(self): + from google.cloud.bigquery.standard_sql import StandardSqlField + from google.cloud.bigquery.standard_sql import StandardSqlStructType + + StandardSqlDataType = self._get_target_class() + TypeNames = bq.StandardSqlTypeNames + + person_type = StandardSqlStructType( + fields=[ + StandardSqlField("name", StandardSqlDataType(TypeNames.STRING)), + StandardSqlField("age", StandardSqlDataType(TypeNames.INT64)), + ] + ) + employee_type = StandardSqlStructType( + fields=[ + StandardSqlField("job_title", StandardSqlDataType(TypeNames.STRING)), + StandardSqlField("salary", StandardSqlDataType(TypeNames.FLOAT64)), + StandardSqlField( + "employee_info", + StandardSqlDataType( + type_kind=TypeNames.STRUCT, + struct_type=person_type, + ), + ), + ] + ) + + instance = self._make_one(TypeNames.STRUCT, struct_type=employee_type) + result = instance.to_api_repr() + + expected = { + "typeKind": "STRUCT", + "structType": { + "fields": [ + {"name": "job_title", "type": {"typeKind": "STRING"}}, + {"name": "salary", "type": {"typeKind": "FLOAT64"}}, + { + "name": "employee_info", + "type": { + "typeKind": "STRUCT", + "structType": { + "fields": [ + {"name": "name", "type": {"typeKind": "STRING"}}, + {"name": "age", "type": {"typeKind": "INT64"}}, + ], + }, + }, + }, + ], + }, + } + assert result == expected + + def test_from_api_repr_empty_resource(self): + klass = self._get_target_class() + result = klass.from_api_repr(resource={}) + + expected = klass( + type_kind=bq.StandardSqlTypeNames.TYPE_KIND_UNSPECIFIED, + array_element_type=None, + struct_type=None, + ) + assert result == expected + + def test_from_api_repr_scalar_type(self): + klass = self._get_target_class() + resource = {"typeKind": "DATE"} + + result = klass.from_api_repr(resource=resource) + + expected = klass( + type_kind=bq.StandardSqlTypeNames.DATE, + array_element_type=None, + struct_type=None, + ) + assert result == expected + + def test_from_api_repr_array_type_full(self): + klass = self._get_target_class() + resource = {"typeKind": "ARRAY", "arrayElementType": {"typeKind": "BYTES"}} + + result = klass.from_api_repr(resource=resource) + + expected = klass( + type_kind=bq.StandardSqlTypeNames.ARRAY, + array_element_type=klass(type_kind=bq.StandardSqlTypeNames.BYTES), + struct_type=None, + ) + assert result == expected + + def test_from_api_repr_array_type_missing_element_type(self): + klass = self._get_target_class() + resource = {"typeKind": "ARRAY"} + + result = klass.from_api_repr(resource=resource) + + expected = klass( + type_kind=bq.StandardSqlTypeNames.ARRAY, + array_element_type=None, + struct_type=None, + ) + assert result == expected + + def test_from_api_repr_struct_type_nested(self): + from google.cloud.bigquery.standard_sql import StandardSqlField + from google.cloud.bigquery.standard_sql import StandardSqlStructType + + klass = self._get_target_class() + TypeNames = bq.StandardSqlTypeNames + + resource = { + "typeKind": "STRUCT", + "structType": { + "fields": [ + {"name": "job_title", "type": {"typeKind": "STRING"}}, + {"name": "salary", "type": {"typeKind": "FLOAT64"}}, + { + "name": "employee_info", + "type": { + "typeKind": "STRUCT", + "structType": { + "fields": [ + {"name": "name", "type": {"typeKind": "STRING"}}, + {"name": "age", "type": {"typeKind": "INT64"}}, + ], + }, + }, + }, + ], + }, + } + + result = klass.from_api_repr(resource=resource) + + expected = klass( + type_kind=TypeNames.STRUCT, + struct_type=StandardSqlStructType( + fields=[ + StandardSqlField("job_title", klass(TypeNames.STRING)), + StandardSqlField("salary", klass(TypeNames.FLOAT64)), + StandardSqlField( + "employee_info", + klass( + type_kind=TypeNames.STRUCT, + struct_type=StandardSqlStructType( + fields=[ + StandardSqlField("name", klass(TypeNames.STRING)), + StandardSqlField("age", klass(TypeNames.INT64)), + ] + ), + ), + ), + ] + ), + ) + assert result == expected + + def test_from_api_repr_struct_type_missing_struct_info(self): + klass = self._get_target_class() + resource = {"typeKind": "STRUCT"} + + result = klass.from_api_repr(resource=resource) + + expected = klass( + type_kind=bq.StandardSqlTypeNames.STRUCT, + array_element_type=None, + struct_type=None, + ) + assert result == expected + + def test_from_api_repr_struct_type_incomplete_field_info(self): + from google.cloud.bigquery.standard_sql import StandardSqlField + from google.cloud.bigquery.standard_sql import StandardSqlStructType + + klass = self._get_target_class() + TypeNames = bq.StandardSqlTypeNames + + resource = { + "typeKind": "STRUCT", + "structType": { + "fields": [ + {"type": {"typeKind": "STRING"}}, # missing name + {"name": "salary"}, # missing type + ], + }, + } + + result = klass.from_api_repr(resource=resource) + + expected = klass( + type_kind=TypeNames.STRUCT, + struct_type=StandardSqlStructType( + fields=[ + StandardSqlField(None, klass(TypeNames.STRING)), + StandardSqlField("salary", klass(TypeNames.TYPE_KIND_UNSPECIFIED)), + ] + ), + ) + assert result == expected + + def test__eq__another_type(self): + instance = self._make_one() + + class SqlTypeWannabe: + pass + + not_a_type = SqlTypeWannabe() + not_a_type._properties = instance._properties + + assert instance != not_a_type # Can't fake it. + + def test__eq__delegates_comparison_to_another_type(self): + instance = self._make_one() + assert instance == mock.ANY + + def test__eq__similar_instance(self): + kwargs = { + "type_kind": bq.StandardSqlTypeNames.GEOGRAPHY, + "array_element_type": bq.StandardSqlDataType( + type_kind=bq.StandardSqlTypeNames.INT64 + ), + "struct_type": bq.StandardSqlStructType(fields=[]), + } + instance = self._make_one(**kwargs) + instance2 = self._make_one(**kwargs) + assert instance == instance2 + + @pytest.mark.parametrize( + ("attr_name", "value", "value2"), + ( + ( + "type_kind", + bq.StandardSqlTypeNames.INT64, + bq.StandardSqlTypeNames.FLOAT64, + ), + ( + "array_element_type", + bq.StandardSqlDataType(type_kind=bq.StandardSqlTypeNames.STRING), + bq.StandardSqlDataType(type_kind=bq.StandardSqlTypeNames.BOOL), + ), + ( + "struct_type", + bq.StandardSqlStructType(fields=[bq.StandardSqlField(name="foo")]), + bq.StandardSqlStructType(fields=[bq.StandardSqlField(name="bar")]), + ), + ), + ) + def test__eq__attribute_differs(self, attr_name, value, value2): + instance = self._make_one(**{attr_name: value}) + instance2 = self._make_one(**{attr_name: value2}) + assert instance != instance2 + + def test_str(self): + instance = self._make_one(type_kind=bq.StandardSqlTypeNames.BOOL) + bool_type_repr = repr(bq.StandardSqlTypeNames.BOOL) + assert str(instance) == f"StandardSqlDataType(type_kind={bool_type_repr}, ...)" + + +class TestStandardSqlField: + # This class only contains minimum tests to cover what other tests don't + + @staticmethod + def _get_target_class(): + from google.cloud.bigquery.standard_sql import StandardSqlField + + return StandardSqlField + + def _make_one(self, *args, **kw): + return self._get_target_class()(*args, **kw) + + def test_name(self): + instance = self._make_one(name="foo") + assert instance.name == "foo" + instance.name = "bar" + assert instance.name == "bar" + + def test_type_missing(self): + instance = self._make_one(type=None) + assert instance.type is None + + def test_type_set_none(self): + instance = self._make_one( + type=bq.StandardSqlDataType(type_kind=bq.StandardSqlTypeNames.BOOL) + ) + instance.type = None + assert instance.type is None + + def test_type_set_not_none(self): + instance = self._make_one(type=bq.StandardSqlDataType(type_kind=None)) + instance.type = bq.StandardSqlDataType(type_kind=bq.StandardSqlTypeNames.INT64) + assert instance.type == bq.StandardSqlDataType( + type_kind=bq.StandardSqlTypeNames.INT64 + ) + + def test__eq__another_type(self): + instance = self._make_one( + name="foo", + type=bq.StandardSqlDataType(type_kind=bq.StandardSqlTypeNames.BOOL), + ) + + class FieldWannabe: + pass + + not_a_field = FieldWannabe() + not_a_field._properties = instance._properties + + assert instance != not_a_field # Can't fake it. + + def test__eq__delegates_comparison_to_another_type(self): + instance = self._make_one( + name="foo", + type=bq.StandardSqlDataType(type_kind=bq.StandardSqlTypeNames.BOOL), + ) + assert instance == mock.ANY + + def test__eq__similar_instance(self): + kwargs = { + "name": "foo", + "type": bq.StandardSqlDataType(type_kind=bq.StandardSqlTypeNames.INT64), + } + instance = self._make_one(**kwargs) + instance2 = self._make_one(**kwargs) + assert instance == instance2 + + @pytest.mark.parametrize( + ("attr_name", "value", "value2"), + ( + ( + "name", + "foo", + "bar", + ), + ( + "type", + bq.StandardSqlDataType(type_kind=bq.StandardSqlTypeNames.INTERVAL), + bq.StandardSqlDataType(type_kind=bq.StandardSqlTypeNames.TIME), + ), + ), + ) + def test__eq__attribute_differs(self, attr_name, value, value2): + instance = self._make_one(**{attr_name: value}) + instance2 = self._make_one(**{attr_name: value2}) + assert instance != instance2 + + +class TestStandardSqlStructType: + # This class only contains minimum tests to cover what other tests don't + + @staticmethod + def _get_target_class(): + from google.cloud.bigquery.standard_sql import StandardSqlStructType + + return StandardSqlStructType + + def _make_one(self, *args, **kw): + return self._get_target_class()(*args, **kw) + + def test_fields(self): + instance = self._make_one(fields=[]) + assert instance.fields == [] + + new_fields = [bq.StandardSqlField(name="foo"), bq.StandardSqlField(name="bar")] + instance.fields = new_fields + assert instance.fields == new_fields + + def test__eq__another_type(self): + instance = self._make_one(fields=[bq.StandardSqlField(name="foo")]) + + class StructTypeWannabe: + pass + + not_a_type = StructTypeWannabe() + not_a_type._properties = instance._properties + + assert instance != not_a_type # Can't fake it. + + def test__eq__delegates_comparison_to_another_type(self): + instance = self._make_one(fields=[bq.StandardSqlField(name="foo")]) + assert instance == mock.ANY + + def test__eq__similar_instance(self): + kwargs = { + "fields": [bq.StandardSqlField(name="foo"), bq.StandardSqlField(name="bar")] + } + instance = self._make_one(**kwargs) + instance2 = self._make_one(**kwargs) + assert instance == instance2 + + def test__eq__attribute_differs(self): + instance = self._make_one(fields=[bq.StandardSqlField(name="foo")]) + instance2 = self._make_one( + fields=[bq.StandardSqlField(name="foo"), bq.StandardSqlField(name="bar")] + ) + assert instance != instance2 + + +class TestStandardSqlTableType: + @staticmethod + def _get_target_class(): + from google.cloud.bigquery.standard_sql import StandardSqlTableType + + return StandardSqlTableType + + def _make_one(self, *args, **kw): + return self._get_target_class()(*args, **kw) + + def test_columns_shallow_copy(self): + from google.cloud.bigquery.standard_sql import StandardSqlField + + columns = [ + StandardSqlField("foo"), + StandardSqlField("bar"), + StandardSqlField("baz"), + ] + + instance = self._make_one(columns=columns) + + assert len(instance.columns) == 3 + columns.pop() + assert len(instance.columns) == 3 # Still the same. + + def test_columns_setter(self): + from google.cloud.bigquery.standard_sql import StandardSqlField + + columns = [StandardSqlField("foo")] + instance = self._make_one(columns=columns) + assert instance.columns == columns + + new_columns = [StandardSqlField(name="bar")] + instance.columns = new_columns + assert instance.columns == new_columns + + def test_to_api_repr_no_columns(self): + instance = self._make_one(columns=[]) + result = instance.to_api_repr() + assert result == {"columns": []} + + def test_to_api_repr_with_columns(self): + from google.cloud.bigquery.standard_sql import StandardSqlField + + columns = [StandardSqlField("foo"), StandardSqlField("bar")] + instance = self._make_one(columns=columns) + + result = instance.to_api_repr() + + expected = { + "columns": [{"name": "foo", "type": None}, {"name": "bar", "type": None}] + } + assert result == expected + + def test_from_api_repr_missing_columns(self): + resource = {} + result = self._get_target_class().from_api_repr(resource) + assert result.columns == [] + + def test_from_api_repr_with_incomplete_columns(self): + from google.cloud.bigquery.standard_sql import StandardSqlDataType + from google.cloud.bigquery.standard_sql import StandardSqlField + + resource = { + "columns": [ + {"type": {"typeKind": "BOOL"}}, # missing name + {"name": "bar"}, # missing type + ] + } + + result = self._get_target_class().from_api_repr(resource) + + assert len(result.columns) == 2 + + expected = StandardSqlField( + name=None, + type=StandardSqlDataType(type_kind=bq.StandardSqlTypeNames.BOOL), + ) + assert result.columns[0] == expected + + expected = StandardSqlField( + name="bar", + type=StandardSqlDataType( + type_kind=bq.StandardSqlTypeNames.TYPE_KIND_UNSPECIFIED + ), + ) + assert result.columns[1] == expected + + def test__eq__another_type(self): + instance = self._make_one(columns=[bq.StandardSqlField(name="foo")]) + + class TableTypeWannabe: + pass + + not_a_type = TableTypeWannabe() + not_a_type._properties = instance._properties + + assert instance != not_a_type # Can't fake it. + + def test__eq__delegates_comparison_to_another_type(self): + instance = self._make_one(columns=[bq.StandardSqlField(name="foo")]) + assert instance == mock.ANY + + def test__eq__similar_instance(self): + kwargs = { + "columns": [ + bq.StandardSqlField(name="foo"), + bq.StandardSqlField(name="bar"), + ] + } + instance = self._make_one(**kwargs) + instance2 = self._make_one(**kwargs) + assert instance == instance2 + + def test__eq__attribute_differs(self): + instance = self._make_one(columns=[bq.StandardSqlField(name="foo")]) + instance2 = self._make_one( + columns=[bq.StandardSqlField(name="foo"), bq.StandardSqlField(name="bar")] + ) + assert instance != instance2 diff --git a/packages/google-cloud-bigquery/tests/unit/test_table.py b/packages/google-cloud-bigquery/tests/unit/test_table.py index 23c7a84615be..5241230a410b 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_table.py +++ b/packages/google-cloud-bigquery/tests/unit/test_table.py @@ -21,19 +21,16 @@ import warnings import mock +import pyarrow +import pyarrow.types import pytest import google.api_core.exceptions -from test_utils.imports import maybe_fail_import -try: - from google.cloud import bigquery_storage - from google.cloud.bigquery_storage_v1.services.big_query_read.transports import ( - grpc as big_query_read_grpc_transport, - ) -except ImportError: # pragma: NO COVER - bigquery_storage = None - big_query_read_grpc_transport = None +from google.cloud import bigquery_storage +from google.cloud.bigquery_storage_v1.services.big_query_read.transports import ( + grpc as big_query_read_grpc_transport, +) try: import pandas @@ -51,12 +48,6 @@ tqdm = None from google.cloud.bigquery.dataset import DatasetReference -from google.cloud.bigquery import _helpers - - -pyarrow = _helpers.PYARROW_VERSIONS.try_import() -if pyarrow: - import pyarrow.types def _mock_client(): @@ -1827,26 +1818,12 @@ def test_total_rows_eq_zero(self): row_iterator = self._make_one() self.assertEqual(row_iterator.total_rows, 0) - @mock.patch("google.cloud.bigquery.table.pyarrow", new=None) - def test_to_arrow_error_if_pyarrow_is_none(self): - row_iterator = self._make_one() - with self.assertRaises(ValueError): - row_iterator.to_arrow() - - @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_to_arrow(self): row_iterator = self._make_one() tbl = row_iterator.to_arrow() self.assertIsInstance(tbl, pyarrow.Table) self.assertEqual(tbl.num_rows, 0) - @mock.patch("google.cloud.bigquery.table.pyarrow", new=None) - def test_to_arrow_iterable_error_if_pyarrow_is_none(self): - row_iterator = self._make_one() - with self.assertRaises(ValueError): - row_iterator.to_arrow_iterable() - - @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_to_arrow_iterable(self): row_iterator = self._make_one() arrow_iter = row_iterator.to_arrow_iterable() @@ -2128,49 +2105,6 @@ def test__validate_bqstorage_returns_false_if_max_results_set(self): ) self.assertFalse(result) - def test__validate_bqstorage_returns_false_if_missing_dependency(self): - iterator = self._make_one(first_page_response=None) # not cached - - def fail_bqstorage_import(name, globals, locals, fromlist, level): - # NOTE: *very* simplified, assuming a straightforward absolute import - return "bigquery_storage" in name or ( - fromlist is not None and "bigquery_storage" in fromlist - ) - - no_bqstorage = maybe_fail_import(predicate=fail_bqstorage_import) - - with no_bqstorage: - result = iterator._validate_bqstorage( - bqstorage_client=None, create_bqstorage_client=True - ) - - self.assertFalse(result) - - @unittest.skipIf( - bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" - ) - def test__validate_bqstorage_returns_false_w_warning_if_obsolete_version(self): - from google.cloud.bigquery.exceptions import LegacyBigQueryStorageError - - iterator = self._make_one(first_page_response=None) # not cached - - patcher = mock.patch( - "google.cloud.bigquery.table._helpers.BQ_STORAGE_VERSIONS.verify_version", - side_effect=LegacyBigQueryStorageError("BQ Storage too old"), - ) - with patcher, warnings.catch_warnings(record=True) as warned: - result = iterator._validate_bqstorage( - bqstorage_client=None, create_bqstorage_client=True - ) - - self.assertFalse(result) - - matching_warnings = [ - warning for warning in warned if "BQ Storage too old" in str(warning) - ] - assert matching_warnings, "Obsolete dependency warning not raised." - - @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_to_arrow_iterable(self): from google.cloud.bigquery.schema import SchemaField @@ -2271,29 +2205,6 @@ def test_to_arrow_iterable(self): [[{"name": "Bepples Phlyntstone", "age": 0}, {"name": "Dino", "age": 4}]], ) - @mock.patch("google.cloud.bigquery.table.pyarrow", new=None) - def test_to_arrow_iterable_error_if_pyarrow_is_none(self): - from google.cloud.bigquery.schema import SchemaField - - schema = [ - SchemaField("name", "STRING", mode="REQUIRED"), - SchemaField("age", "INTEGER", mode="REQUIRED"), - ] - rows = [ - {"f": [{"v": "Phred Phlyntstone"}, {"v": "32"}]}, - {"f": [{"v": "Bharney Rhubble"}, {"v": "33"}]}, - ] - path = "/foo" - api_request = mock.Mock(return_value={"rows": rows}) - row_iterator = self._make_one(_mock_client(), api_request, path, schema) - - with pytest.raises(ValueError, match="pyarrow"): - row_iterator.to_arrow_iterable() - - @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") - @unittest.skipIf( - bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" - ) def test_to_arrow_iterable_w_bqstorage(self): from google.cloud.bigquery import schema from google.cloud.bigquery import table as mut @@ -2369,7 +2280,6 @@ def test_to_arrow_iterable_w_bqstorage(self): # Don't close the client if it was passed in. bqstorage_client._transport.grpc_channel.close.assert_not_called() - @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_to_arrow(self): from google.cloud.bigquery.schema import SchemaField @@ -2451,7 +2361,6 @@ def test_to_arrow(self): ], ) - @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_to_arrow_w_nulls(self): from google.cloud.bigquery.schema import SchemaField @@ -2484,7 +2393,6 @@ def test_to_arrow_w_nulls(self): self.assertEqual(names, ["Donkey", "Diddy", "Dixie", None]) self.assertEqual(ages, [32, 29, None, 111]) - @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_to_arrow_w_unknown_type(self): from google.cloud.bigquery.schema import SchemaField @@ -2527,7 +2435,6 @@ def test_to_arrow_w_unknown_type(self): warning = warned[0] self.assertTrue("sport" in str(warning)) - @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_to_arrow_w_empty_table(self): from google.cloud.bigquery.schema import SchemaField @@ -2566,10 +2473,6 @@ def test_to_arrow_w_empty_table(self): self.assertEqual(child_field.type.value_type[0].name, "name") self.assertEqual(child_field.type.value_type[1].name, "age") - @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") - @unittest.skipIf( - bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" - ) def test_to_arrow_max_results_w_explicit_bqstorage_client_warning(self): from google.cloud.bigquery.schema import SchemaField @@ -2610,10 +2513,6 @@ def test_to_arrow_max_results_w_explicit_bqstorage_client_warning(self): ) mock_client._ensure_bqstorage_client.assert_not_called() - @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") - @unittest.skipIf( - bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" - ) def test_to_arrow_max_results_w_create_bqstorage_client_no_warning(self): from google.cloud.bigquery.schema import SchemaField @@ -2650,10 +2549,6 @@ def test_to_arrow_max_results_w_create_bqstorage_client_no_warning(self): self.assertFalse(matches) mock_client._ensure_bqstorage_client.assert_not_called() - @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") - @unittest.skipIf( - bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" - ) def test_to_arrow_w_bqstorage(self): from google.cloud.bigquery import schema from google.cloud.bigquery import table as mut @@ -2731,10 +2626,6 @@ def test_to_arrow_w_bqstorage(self): # Don't close the client if it was passed in. bqstorage_client._transport.grpc_channel.close.assert_not_called() - @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") - @unittest.skipIf( - bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" - ) def test_to_arrow_w_bqstorage_creates_client(self): from google.cloud.bigquery import schema from google.cloud.bigquery import table as mut @@ -2762,7 +2653,6 @@ def test_to_arrow_w_bqstorage_creates_client(self): mock_client._ensure_bqstorage_client.assert_called_once() bqstorage_client._transport.grpc_channel.close.assert_called_once() - @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_to_arrow_ensure_bqstorage_client_wo_bqstorage(self): from google.cloud.bigquery.schema import SchemaField @@ -2789,10 +2679,6 @@ def test_to_arrow_ensure_bqstorage_client_wo_bqstorage(self): self.assertIsInstance(tbl, pyarrow.Table) self.assertEqual(tbl.num_rows, 2) - @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") - @unittest.skipIf( - bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" - ) def test_to_arrow_w_bqstorage_no_streams(self): from google.cloud.bigquery import schema from google.cloud.bigquery import table as mut @@ -2829,7 +2715,6 @@ def test_to_arrow_w_bqstorage_no_streams(self): self.assertEqual(actual_table.schema[1].name, "colC") self.assertEqual(actual_table.schema[2].name, "colB") - @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") @unittest.skipIf(tqdm is None, "Requires `tqdm`") @mock.patch("tqdm.tqdm_gui") @mock.patch("tqdm.tqdm_notebook") @@ -2964,10 +2849,6 @@ def test_to_dataframe_iterable_with_dtypes(self): self.assertEqual(df_2["age"][0], 33) @unittest.skipIf(pandas is None, "Requires `pandas`") - @unittest.skipIf( - bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" - ) - @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_to_dataframe_iterable_w_bqstorage(self): from google.cloud.bigquery import schema from google.cloud.bigquery import table as mut @@ -3036,10 +2917,6 @@ def test_to_dataframe_iterable_w_bqstorage(self): bqstorage_client._transport.grpc_channel.close.assert_not_called() @unittest.skipIf(pandas is None, "Requires `pandas`") - @unittest.skipIf( - bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" - ) - @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_to_dataframe_iterable_w_bqstorage_max_results_warning(self): from google.cloud.bigquery import schema from google.cloud.bigquery import table as mut @@ -3133,10 +3010,9 @@ def test_to_dataframe(self): self.assertEqual(len(df), 4) # verify the number of rows self.assertEqual(list(df), ["name", "age"]) # verify the column names self.assertEqual(df.name.dtype.name, "object") - self.assertEqual(df.age.dtype.name, "int64") + self.assertEqual(df.age.dtype.name, "Int64") @unittest.skipIf(pandas is None, "Requires `pandas`") - @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_to_dataframe_timestamp_out_of_pyarrow_bounds(self): from google.cloud.bigquery.schema import SchemaField @@ -3164,7 +3040,6 @@ def test_to_dataframe_timestamp_out_of_pyarrow_bounds(self): ) @unittest.skipIf(pandas is None, "Requires `pandas`") - @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_to_dataframe_datetime_out_of_pyarrow_bounds(self): from google.cloud.bigquery.schema import SchemaField @@ -3380,7 +3255,7 @@ def test_to_dataframe_w_various_types_nullable(self): self.assertTrue(row.isnull().all()) else: self.assertIsInstance(row.start_timestamp, pandas.Timestamp) - self.assertIsInstance(row.seconds, float) + self.assertIsInstance(row.seconds, int) self.assertIsInstance(row.payment_type, str) self.assertIsInstance(row.complete, bool) self.assertIsInstance(row.date, datetime.date) @@ -3427,12 +3302,42 @@ def test_to_dataframe_column_dtypes(self): self.assertEqual(list(df), exp_columns) # verify the column names self.assertEqual(df.start_timestamp.dtype.name, "datetime64[ns, UTC]") - self.assertEqual(df.seconds.dtype.name, "int64") + self.assertEqual(df.seconds.dtype.name, "Int64") self.assertEqual(df.miles.dtype.name, "float64") self.assertEqual(df.km.dtype.name, "float16") self.assertEqual(df.payment_type.dtype.name, "object") - self.assertEqual(df.complete.dtype.name, "bool") - self.assertEqual(df.date.dtype.name, "object") + self.assertEqual(df.complete.dtype.name, "boolean") + self.assertEqual(df.date.dtype.name, "dbdate") + + @unittest.skipIf(pandas is None, "Requires `pandas`") + def test_to_dataframe_datetime_objects(self): + # When converting date or timestamp values to nanosecond + # precision, the result can be out of pyarrow bounds. To avoid + # the error when converting to Pandas, we use object type if + # necessary. + + from google.cloud.bigquery.schema import SchemaField + + schema = [ + SchemaField("ts", "TIMESTAMP"), + SchemaField("date", "DATE"), + ] + row_data = [ + ["-20000000000000000", "1111-01-01"], + ] + rows = [{"f": [{"v": field} for field in row]} for row in row_data] + path = "/foo" + api_request = mock.Mock(return_value={"rows": rows}) + row_iterator = self._make_one(_mock_client(), api_request, path, schema) + + df = row_iterator.to_dataframe(create_bqstorage_client=False) + + self.assertIsInstance(df, pandas.DataFrame) + self.assertEqual(len(df), 1) # verify the number of rows + self.assertEqual(df["ts"].dtype.name, "object") + self.assertEqual(df["date"].dtype.name, "object") + self.assertEqual(df["ts"][0].date(), datetime.date(1336, 3, 23)) + self.assertEqual(df["date"][0], datetime.date(1111, 1, 1)) @mock.patch("google.cloud.bigquery.table.pandas", new=None) def test_to_dataframe_error_if_pandas_is_none(self): @@ -3580,9 +3485,6 @@ def test_to_dataframe_max_results_w_create_bqstorage_client_no_warning(self): mock_client._ensure_bqstorage_client.assert_not_called() @unittest.skipIf(pandas is None, "Requires `pandas`") - @unittest.skipIf( - bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" - ) def test_to_dataframe_w_bqstorage_creates_client(self): from google.cloud.bigquery import schema from google.cloud.bigquery import table as mut @@ -3611,9 +3513,6 @@ def test_to_dataframe_w_bqstorage_creates_client(self): bqstorage_client._transport.grpc_channel.close.assert_called_once() @unittest.skipIf(pandas is None, "Requires `pandas`") - @unittest.skipIf( - bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" - ) def test_to_dataframe_w_bqstorage_no_streams(self): from google.cloud.bigquery import schema from google.cloud.bigquery import table as mut @@ -3639,11 +3538,7 @@ def test_to_dataframe_w_bqstorage_no_streams(self): self.assertEqual(list(got), column_names) self.assertTrue(got.empty) - @unittest.skipIf( - bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" - ) @unittest.skipIf(pandas is None, "Requires `pandas`") - @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_to_dataframe_w_bqstorage_logs_session(self): from google.cloud.bigquery.table import Table @@ -3665,10 +3560,6 @@ def test_to_dataframe_w_bqstorage_logs_session(self): ) @unittest.skipIf(pandas is None, "Requires `pandas`") - @unittest.skipIf( - bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" - ) - @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_to_dataframe_w_bqstorage_empty_streams(self): from google.cloud.bigquery import schema from google.cloud.bigquery import table as mut @@ -3720,10 +3611,6 @@ def test_to_dataframe_w_bqstorage_empty_streams(self): self.assertTrue(got.empty) @unittest.skipIf(pandas is None, "Requires `pandas`") - @unittest.skipIf( - bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" - ) - @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_to_dataframe_w_bqstorage_nonempty(self): from google.cloud.bigquery import schema from google.cloud.bigquery import table as mut @@ -3800,10 +3687,6 @@ def test_to_dataframe_w_bqstorage_nonempty(self): bqstorage_client._transport.grpc_channel.close.assert_not_called() @unittest.skipIf(pandas is None, "Requires `pandas`") - @unittest.skipIf( - bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" - ) - @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_to_dataframe_w_bqstorage_multiple_streams_return_unique_index(self): from google.cloud.bigquery import schema from google.cloud.bigquery import table as mut @@ -3854,11 +3737,7 @@ def test_to_dataframe_w_bqstorage_multiple_streams_return_unique_index(self): self.assertTrue(got.index.is_unique) @unittest.skipIf(pandas is None, "Requires `pandas`") - @unittest.skipIf( - bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" - ) @unittest.skipIf(tqdm is None, "Requires `tqdm`") - @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") @mock.patch("tqdm.tqdm") def test_to_dataframe_w_bqstorage_updates_progress_bar(self, tqdm_mock): from google.cloud.bigquery import schema @@ -3933,10 +3812,6 @@ def blocking_to_arrow(*args, **kwargs): tqdm_mock().close.assert_called_once() @unittest.skipIf(pandas is None, "Requires `pandas`") - @unittest.skipIf( - bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" - ) - @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_to_dataframe_w_bqstorage_exits_on_keyboardinterrupt(self): from google.cloud.bigquery import schema from google.cloud.bigquery import table as mut @@ -4053,9 +3928,6 @@ def test_to_dataframe_tabledata_list_w_multiple_pages_return_unique_index(self): self.assertTrue(df.index.is_unique) @unittest.skipIf(pandas is None, "Requires `pandas`") - @unittest.skipIf( - bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" - ) def test_to_dataframe_w_bqstorage_raises_auth_error(self): from google.cloud.bigquery import table as mut @@ -4074,9 +3946,6 @@ def test_to_dataframe_w_bqstorage_raises_auth_error(self): with pytest.raises(google.api_core.exceptions.Forbidden): row_iterator.to_dataframe(bqstorage_client=bqstorage_client) - @unittest.skipIf( - bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" - ) def test_to_dataframe_w_bqstorage_partition(self): from google.cloud.bigquery import schema from google.cloud.bigquery import table as mut @@ -4094,9 +3963,6 @@ def test_to_dataframe_w_bqstorage_partition(self): with pytest.raises(ValueError): row_iterator.to_dataframe(bqstorage_client) - @unittest.skipIf( - bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" - ) def test_to_dataframe_w_bqstorage_snapshot(self): from google.cloud.bigquery import schema from google.cloud.bigquery import table as mut @@ -4115,10 +3981,6 @@ def test_to_dataframe_w_bqstorage_snapshot(self): row_iterator.to_dataframe(bqstorage_client) @unittest.skipIf(pandas is None, "Requires `pandas`") - @unittest.skipIf( - bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" - ) - @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_to_dataframe_concat_categorical_dtype_w_pyarrow(self): from google.cloud.bigquery import schema from google.cloud.bigquery import table as mut @@ -4402,7 +4264,6 @@ def test_rowiterator_to_geodataframe_delegation(self, to_dataframe): dtypes = dict(xxx=numpy.dtype("int64")) progress_bar_type = "normal" create_bqstorage_client = False - date_as_object = False geography_column = "g" to_dataframe.return_value = pandas.DataFrame( @@ -4417,7 +4278,6 @@ def test_rowiterator_to_geodataframe_delegation(self, to_dataframe): dtypes=dtypes, progress_bar_type=progress_bar_type, create_bqstorage_client=create_bqstorage_client, - date_as_object=date_as_object, geography_column=geography_column, ) @@ -4426,7 +4286,6 @@ def test_rowiterator_to_geodataframe_delegation(self, to_dataframe): dtypes, progress_bar_type, create_bqstorage_client, - date_as_object, geography_as_object=True, ) @@ -4824,9 +4683,6 @@ def test_set_expiration_w_none(self): assert time_partitioning._properties["expirationMs"] is None -@pytest.mark.skipif( - bigquery_storage is None, reason="Requires `google-cloud-bigquery-storage`" -) @pytest.mark.parametrize( "table_path", ( diff --git a/packages/google-cloud-bigquery/tests/unit/test_table_pandas.py b/packages/google-cloud-bigquery/tests/unit/test_table_pandas.py new file mode 100644 index 000000000000..943baa326cc8 --- /dev/null +++ b/packages/google-cloud-bigquery/tests/unit/test_table_pandas.py @@ -0,0 +1,194 @@ +# Copyright 2021 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import datetime +import decimal +from unittest import mock + +import pyarrow +import pytest + +from google.cloud import bigquery + +pandas = pytest.importorskip("pandas") + + +TEST_PATH = "/v1/project/test-proj/dataset/test-dset/table/test-tbl/data" + + +@pytest.fixture +def class_under_test(): + from google.cloud.bigquery.table import RowIterator + + return RowIterator + + +def test_to_dataframe_nullable_scalars(monkeypatch, class_under_test): + # See tests/system/test_arrow.py for the actual types we get from the API. + arrow_schema = pyarrow.schema( + [ + pyarrow.field("bignumeric_col", pyarrow.decimal256(76, scale=38)), + pyarrow.field("bool_col", pyarrow.bool_()), + pyarrow.field("bytes_col", pyarrow.binary()), + pyarrow.field("date_col", pyarrow.date32()), + pyarrow.field("datetime_col", pyarrow.timestamp("us", tz=None)), + pyarrow.field("float64_col", pyarrow.float64()), + pyarrow.field("int64_col", pyarrow.int64()), + pyarrow.field("numeric_col", pyarrow.decimal128(38, scale=9)), + pyarrow.field("string_col", pyarrow.string()), + pyarrow.field("time_col", pyarrow.time64("us")), + pyarrow.field( + "timestamp_col", pyarrow.timestamp("us", tz=datetime.timezone.utc) + ), + ] + ) + arrow_table = pyarrow.Table.from_pydict( + { + "bignumeric_col": [decimal.Decimal("123.456789101112131415")], + "bool_col": [True], + "bytes_col": [b"Hello,\x00World!"], + "date_col": [datetime.date(2021, 8, 9)], + "datetime_col": [datetime.datetime(2021, 8, 9, 13, 30, 44, 123456)], + "float64_col": [1.25], + "int64_col": [-7], + "numeric_col": [decimal.Decimal("-123.456789")], + "string_col": ["abcdefg"], + "time_col": [datetime.time(14, 21, 17, 123456)], + "timestamp_col": [ + datetime.datetime( + 2021, 8, 9, 13, 30, 44, 123456, tzinfo=datetime.timezone.utc + ) + ], + }, + schema=arrow_schema, + ) + + nullable_schema = [ + bigquery.SchemaField("bignumeric_col", "BIGNUMERIC"), + bigquery.SchemaField("bool_col", "BOOLEAN"), + bigquery.SchemaField("bytes_col", "BYTES"), + bigquery.SchemaField("date_col", "DATE"), + bigquery.SchemaField("datetime_col", "DATETIME"), + bigquery.SchemaField("float64_col", "FLOAT"), + bigquery.SchemaField("int64_col", "INT64"), + bigquery.SchemaField("numeric_col", "NUMERIC"), + bigquery.SchemaField("string_col", "STRING"), + bigquery.SchemaField("time_col", "TIME"), + bigquery.SchemaField("timestamp_col", "TIMESTAMP"), + ] + mock_client = mock.create_autospec(bigquery.Client) + mock_client.project = "test-proj" + mock_api_request = mock.Mock() + mock_to_arrow = mock.Mock() + mock_to_arrow.return_value = arrow_table + rows = class_under_test(mock_client, mock_api_request, TEST_PATH, nullable_schema) + monkeypatch.setattr(rows, "to_arrow", mock_to_arrow) + df = rows.to_dataframe() + + # Check for expected dtypes. + # Keep these in sync with tests/system/test_pandas.py + assert df.dtypes["bignumeric_col"].name == "object" + assert df.dtypes["bool_col"].name == "boolean" + assert df.dtypes["bytes_col"].name == "object" + assert df.dtypes["date_col"].name == "dbdate" + assert df.dtypes["datetime_col"].name == "datetime64[ns]" + assert df.dtypes["float64_col"].name == "float64" + assert df.dtypes["int64_col"].name == "Int64" + assert df.dtypes["numeric_col"].name == "object" + assert df.dtypes["string_col"].name == "object" + assert df.dtypes["time_col"].name == "dbtime" + assert df.dtypes["timestamp_col"].name == "datetime64[ns, UTC]" + + # Check for expected values. + assert df["bignumeric_col"][0] == decimal.Decimal("123.456789101112131415") + assert df["bool_col"][0] # True + assert df["bytes_col"][0] == b"Hello,\x00World!" + + # object is used by default, but we can use "datetime64[ns]" automatically + # when data is within the supported range. + # https://github.com/googleapis/python-bigquery/issues/861 + assert df["date_col"][0] == datetime.date(2021, 8, 9) + + assert df["datetime_col"][0] == pandas.to_datetime("2021-08-09 13:30:44.123456") + assert df["float64_col"][0] == 1.25 + assert df["int64_col"][0] == -7 + assert df["numeric_col"][0] == decimal.Decimal("-123.456789") + assert df["string_col"][0] == "abcdefg" + + # Pandas timedelta64 might be a better choice for pandas time columns. Then + # they can more easily be combined with date columns to form datetimes. + # https://github.com/googleapis/python-bigquery/issues/862 + assert df["time_col"][0] == datetime.time(14, 21, 17, 123456) + + assert df["timestamp_col"][0] == pandas.to_datetime("2021-08-09 13:30:44.123456Z") + + +def test_to_dataframe_nullable_scalars_with_custom_dtypes( + monkeypatch, class_under_test +): + """Passing in explicit dtypes is merged with default behavior.""" + arrow_schema = pyarrow.schema( + [ + pyarrow.field("int64_col", pyarrow.int64()), + pyarrow.field("other_int_col", pyarrow.int64()), + ] + ) + arrow_table = pyarrow.Table.from_pydict( + {"int64_col": [1000], "other_int_col": [-7]}, + schema=arrow_schema, + ) + + nullable_schema = [ + bigquery.SchemaField("int64_col", "INT64"), + bigquery.SchemaField("other_int_col", "INT64"), + ] + mock_client = mock.create_autospec(bigquery.Client) + mock_client.project = "test-proj" + mock_api_request = mock.Mock() + mock_to_arrow = mock.Mock() + mock_to_arrow.return_value = arrow_table + rows = class_under_test(mock_client, mock_api_request, TEST_PATH, nullable_schema) + monkeypatch.setattr(rows, "to_arrow", mock_to_arrow) + df = rows.to_dataframe(dtypes={"other_int_col": "int8"}) + + assert df.dtypes["int64_col"].name == "Int64" + assert df["int64_col"][0] == 1000 + + assert df.dtypes["other_int_col"].name == "int8" + assert df["other_int_col"][0] == -7 + + +def test_to_dataframe_arrays(monkeypatch, class_under_test): + arrow_schema = pyarrow.schema( + [pyarrow.field("int64_repeated", pyarrow.list_(pyarrow.int64()))] + ) + arrow_table = pyarrow.Table.from_pydict( + {"int64_repeated": [[-1, 0, 2]]}, + schema=arrow_schema, + ) + + nullable_schema = [ + bigquery.SchemaField("int64_repeated", "INT64", mode="REPEATED"), + ] + mock_client = mock.create_autospec(bigquery.Client) + mock_client.project = "test-proj" + mock_api_request = mock.Mock() + mock_to_arrow = mock.Mock() + mock_to_arrow.return_value = arrow_table + rows = class_under_test(mock_client, mock_api_request, TEST_PATH, nullable_schema) + monkeypatch.setattr(rows, "to_arrow", mock_to_arrow) + df = rows.to_dataframe() + + assert df.dtypes["int64_repeated"].name == "object" + assert tuple(df["int64_repeated"][0]) == (-1, 0, 2) From 91f71faca6bf236f7dd92f02ce3e7292b5e33362 Mon Sep 17 00:00:00 2001 From: "release-please[bot]" <55107282+release-please[bot]@users.noreply.github.com> Date: Tue, 29 Mar 2022 13:30:50 -0500 Subject: [PATCH 1406/2016] chore(main): release 3.0.0 (#1179) Co-authored-by: release-please[bot] <55107282+release-please[bot]@users.noreply.github.com> --- packages/google-cloud-bigquery/CHANGELOG.md | 41 +++++++++++++++++++ .../google/cloud/bigquery/version.py | 2 +- 2 files changed, 42 insertions(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/CHANGELOG.md b/packages/google-cloud-bigquery/CHANGELOG.md index f3970bfd22be..c5c8447da8bc 100644 --- a/packages/google-cloud-bigquery/CHANGELOG.md +++ b/packages/google-cloud-bigquery/CHANGELOG.md @@ -5,6 +5,47 @@ [1]: https://pypi.org/project/google-cloud-bigquery/#history +## [3.0.0](https://github.com/googleapis/python-bigquery/compare/v2.34.3...v3.0.0) (2022-03-29) + + +### ⚠ BREAKING CHANGES + +* BigQuery Storage and pyarrow are required dependencies (#776) +* use nullable `Int64` and `boolean` dtypes in `to_dataframe` (#786) +* destination tables are no-longer removed by `create_job` (#891) +* In `to_dataframe`, use `dbdate` and `dbtime` dtypes from db-dtypes package for BigQuery DATE and TIME columns (#972) +* automatically convert out-of-bounds dates in `to_dataframe`, remove `date_as_object` argument (#972) +* mark the package as type-checked (#1058) +* default to DATETIME type when loading timezone-naive datetimes from Pandas (#1061) +* remove out-of-date BigQuery ML protocol buffers (#1178) + +### Features + +* add `api_method` parameter to `Client.query` to select `INSERT` or `QUERY` API ([#967](https://github.com/googleapis/python-bigquery/issues/967)) ([76d88fb](https://github.com/googleapis/python-bigquery/commit/76d88fbb1316317a61fa1a63c101bc6f42f23af8)) +* default to DATETIME type when loading timezone-naive datetimes from Pandas ([#1061](https://github.com/googleapis/python-bigquery/issues/1061)) ([76d88fb](https://github.com/googleapis/python-bigquery/commit/76d88fbb1316317a61fa1a63c101bc6f42f23af8)) +* destination tables are no-longer removed by `create_job` ([#891](https://github.com/googleapis/python-bigquery/issues/891)) ([76d88fb](https://github.com/googleapis/python-bigquery/commit/76d88fbb1316317a61fa1a63c101bc6f42f23af8)) +* In `to_dataframe`, use `dbdate` and `dbtime` dtypes from db-dtypes package for BigQuery DATE and TIME columns ([#972](https://github.com/googleapis/python-bigquery/issues/972)) ([76d88fb](https://github.com/googleapis/python-bigquery/commit/76d88fbb1316317a61fa1a63c101bc6f42f23af8)) +* mark the package as type-checked ([#1058](https://github.com/googleapis/python-bigquery/issues/1058)) ([76d88fb](https://github.com/googleapis/python-bigquery/commit/76d88fbb1316317a61fa1a63c101bc6f42f23af8)) +* use `StandardSqlField` class for `Model.feature_columns` and `Model.label_columns` ([#1117](https://github.com/googleapis/python-bigquery/issues/1117)) ([76d88fb](https://github.com/googleapis/python-bigquery/commit/76d88fbb1316317a61fa1a63c101bc6f42f23af8)) + + +### Bug Fixes + +* automatically convert out-of-bounds dates in `to_dataframe`, remove `date_as_object` argument ([#972](https://github.com/googleapis/python-bigquery/issues/972)) ([76d88fb](https://github.com/googleapis/python-bigquery/commit/76d88fbb1316317a61fa1a63c101bc6f42f23af8)) +* improve type annotations for mypy validation ([#1081](https://github.com/googleapis/python-bigquery/issues/1081)) ([76d88fb](https://github.com/googleapis/python-bigquery/commit/76d88fbb1316317a61fa1a63c101bc6f42f23af8)) +* remove out-of-date BigQuery ML protocol buffers ([#1178](https://github.com/googleapis/python-bigquery/issues/1178)) ([76d88fb](https://github.com/googleapis/python-bigquery/commit/76d88fbb1316317a61fa1a63c101bc6f42f23af8)) +* use nullable `Int64` and `boolean` dtypes in `to_dataframe` ([#786](https://github.com/googleapis/python-bigquery/issues/786)) ([76d88fb](https://github.com/googleapis/python-bigquery/commit/76d88fbb1316317a61fa1a63c101bc6f42f23af8)) + + +### Documentation + +* Add migration guide from version 2.x to 3.x ([#1027](https://github.com/googleapis/python-bigquery/issues/1027)) ([76d88fb](https://github.com/googleapis/python-bigquery/commit/76d88fbb1316317a61fa1a63c101bc6f42f23af8)) + + +### Dependencies + +* BigQuery Storage and pyarrow are required dependencies ([#776](https://github.com/googleapis/python-bigquery/issues/776)) ([76d88fb](https://github.com/googleapis/python-bigquery/commit/76d88fbb1316317a61fa1a63c101bc6f42f23af8)) + ### [2.34.3](https://github.com/googleapis/python-bigquery/compare/v2.34.2...v2.34.3) (2022-03-29) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/version.py b/packages/google-cloud-bigquery/google/cloud/bigquery/version.py index 385cb3c75b45..d6f7def8c3f2 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/version.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/version.py @@ -12,4 +12,4 @@ # See the License for the specific language governing permissions and # limitations under the License. -__version__ = "2.34.3" +__version__ = "3.0.0" From 5239dd9341d6723fb8d5a13521890115e0d69a2a Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Wed, 30 Mar 2022 04:54:59 -0500 Subject: [PATCH 1407/2016] chore: add branch protection rules for v2 (#1181) --- .../google-cloud-bigquery/.github/sync-repo-settings.yaml | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/packages/google-cloud-bigquery/.github/sync-repo-settings.yaml b/packages/google-cloud-bigquery/.github/sync-repo-settings.yaml index 73cc3bcef668..220c031b2864 100644 --- a/packages/google-cloud-bigquery/.github/sync-repo-settings.yaml +++ b/packages/google-cloud-bigquery/.github/sync-repo-settings.yaml @@ -1,6 +1,5 @@ # https://github.com/googleapis/repo-automation-bots/tree/main/packages/sync-repo-settings -# Allow merge commits to sync main and v3 with fewer conflicts. -mergeCommitAllowed: true +mergeCommitAllowed: false # Rules for main branch protection branchProtectionRules: # Identifies the protection rule pattern. Name of the branch to be protected. @@ -16,8 +15,8 @@ branchProtectionRules: - 'Samples - Lint' - 'Samples - Python 3.7' - 'Samples - Python 3.8' -- pattern: v3 - requiresLinearHistory: false +- pattern: v2 + requiresLinearHistory: true requiresCodeOwnerReviews: true requiresStrictStatusChecks: true requiredStatusCheckContexts: From 8b517b5747e211087da615c63f1de45730f0dc96 Mon Sep 17 00:00:00 2001 From: WhiteSource Renovate Date: Wed, 30 Mar 2022 15:03:58 +0200 Subject: [PATCH 1408/2016] chore(deps): update all dependencies (#1164) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * chore(deps): update all dependencies * revert * revert * revert * revert * revert * 🦉 Updates from OwlBot post-processor See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md * 🦉 Updates from OwlBot post-processor See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md Co-authored-by: Anthonios Partheniou Co-authored-by: Owl Bot --- .../google-cloud-bigquery/samples/geography/requirements.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/packages/google-cloud-bigquery/samples/geography/requirements.txt b/packages/google-cloud-bigquery/samples/geography/requirements.txt index fed8be7f9534..54d0985cb870 100644 --- a/packages/google-cloud-bigquery/samples/geography/requirements.txt +++ b/packages/google-cloud-bigquery/samples/geography/requirements.txt @@ -10,13 +10,13 @@ db-dtypes==0.4.0 Fiona==1.8.21 geojson==2.5.0 geopandas==0.10.2 -google-api-core==2.6.1 +google-api-core==2.7.0 google-auth==2.6.0 google-cloud-bigquery==2.34.2 google-cloud-bigquery-storage==2.12.0 google-cloud-core==2.2.3 google-crc32c==1.3.0 -google-resumable-media==2.3.1 +google-resumable-media==2.3.2 googleapis-common-protos==1.55.0 grpcio==1.44.0 idna==3.3 From 53bef611041deffce1cdda2f728a85c24d4719a7 Mon Sep 17 00:00:00 2001 From: WhiteSource Renovate Date: Wed, 30 Mar 2022 16:07:32 +0200 Subject: [PATCH 1409/2016] chore(deps): update all dependencies (#1183) * chore(deps): update all dependencies * revert environment specific pins * fix typo * revert environment specific pins * revert environment specific pins Co-authored-by: Anthonios Partheniou --- .../samples/geography/requirements-test.txt | 2 +- .../samples/geography/requirements.txt | 16 ++++++++-------- .../samples/magics/requirements-test.txt | 2 +- .../samples/magics/requirements.txt | 12 ++++++------ .../samples/snippets/requirements-test.txt | 2 +- .../samples/snippets/requirements.txt | 12 ++++++------ 6 files changed, 23 insertions(+), 23 deletions(-) diff --git a/packages/google-cloud-bigquery/samples/geography/requirements-test.txt b/packages/google-cloud-bigquery/samples/geography/requirements-test.txt index 4bd417eba1fa..5e29de931dd5 100644 --- a/packages/google-cloud-bigquery/samples/geography/requirements-test.txt +++ b/packages/google-cloud-bigquery/samples/geography/requirements-test.txt @@ -1,2 +1,2 @@ -pytest==7.0.1 +pytest==7.1.1 mock==4.0.3 diff --git a/packages/google-cloud-bigquery/samples/geography/requirements.txt b/packages/google-cloud-bigquery/samples/geography/requirements.txt index 54d0985cb870..3ad2b77224fc 100644 --- a/packages/google-cloud-bigquery/samples/geography/requirements.txt +++ b/packages/google-cloud-bigquery/samples/geography/requirements.txt @@ -2,22 +2,22 @@ attrs==21.4.0 certifi==2021.10.8 cffi==1.15.0 charset-normalizer==2.0.12 -click==8.0.4 +click==8.1.0 click-plugins==1.1.1 cligj==0.7.2 dataclasses==0.8; python_version < '3.7' -db-dtypes==0.4.0 +db-dtypes==1.0.0 Fiona==1.8.21 geojson==2.5.0 geopandas==0.10.2 -google-api-core==2.7.0 -google-auth==2.6.0 +google-api-core==2.7.1 +google-auth==2.6.2 google-cloud-bigquery==2.34.2 -google-cloud-bigquery-storage==2.12.0 +google-cloud-bigquery-storage==2.13.0 google-cloud-core==2.2.3 google-crc32c==1.3.0 google-resumable-media==2.3.2 -googleapis-common-protos==1.55.0 +googleapis-common-protos==1.56.0 grpcio==1.44.0 idna==3.3 libcst==0.4.1 @@ -34,7 +34,7 @@ pyasn1-modules==0.2.8 pycparser==2.21 pyparsing==3.0.7 python-dateutil==2.8.2 -pytz==2021.3 +pytz==2022.1 PyYAML==6.0 requests==2.27.1 rsa==4.8 @@ -42,4 +42,4 @@ Shapely==1.8.1.post1 six==1.16.0 typing-extensions==4.1.1 typing-inspect==0.7.1 -urllib3==1.26.8 +urllib3==1.26.9 diff --git a/packages/google-cloud-bigquery/samples/magics/requirements-test.txt b/packages/google-cloud-bigquery/samples/magics/requirements-test.txt index bafc3de2ae2c..c5864d4f7318 100644 --- a/packages/google-cloud-bigquery/samples/magics/requirements-test.txt +++ b/packages/google-cloud-bigquery/samples/magics/requirements-test.txt @@ -1,3 +1,3 @@ google-cloud-testutils==1.3.1 -pytest==7.0.1 +pytest==7.1.1 mock==4.0.3 diff --git a/packages/google-cloud-bigquery/samples/magics/requirements.txt b/packages/google-cloud-bigquery/samples/magics/requirements.txt index 5c54ecd839b4..1516125ffc25 100644 --- a/packages/google-cloud-bigquery/samples/magics/requirements.txt +++ b/packages/google-cloud-bigquery/samples/magics/requirements.txt @@ -1,13 +1,13 @@ -db-dtypes==0.4.0 -google-cloud-bigquery-storage==2.12.0 -google-auth-oauthlib==0.5.0 +db-dtypes==1.0.0 +google-cloud-bigquery-storage==2.13.0 +google-auth-oauthlib==0.5.1 grpcio==1.44.0 ipython==7.31.1; python_version == '3.7' ipython==8.0.1; python_version == '3.8' -ipython==8.1.1; python_version >= '3.9' +ipython==8.2.0; python_version >= '3.9' matplotlib==3.5.1 pandas==1.3.5; python_version == '3.7' pandas==1.4.1; python_version >= '3.8' pyarrow==7.0.0 -pytz==2021.3 -typing-extensions==3.10.0.2 +pytz==2022.1 +typing-extensions==4.1.1 diff --git a/packages/google-cloud-bigquery/samples/snippets/requirements-test.txt b/packages/google-cloud-bigquery/samples/snippets/requirements-test.txt index bafc3de2ae2c..c5864d4f7318 100644 --- a/packages/google-cloud-bigquery/samples/snippets/requirements-test.txt +++ b/packages/google-cloud-bigquery/samples/snippets/requirements-test.txt @@ -1,3 +1,3 @@ google-cloud-testutils==1.3.1 -pytest==7.0.1 +pytest==7.1.1 mock==4.0.3 diff --git a/packages/google-cloud-bigquery/samples/snippets/requirements.txt b/packages/google-cloud-bigquery/samples/snippets/requirements.txt index 5c54ecd839b4..1516125ffc25 100644 --- a/packages/google-cloud-bigquery/samples/snippets/requirements.txt +++ b/packages/google-cloud-bigquery/samples/snippets/requirements.txt @@ -1,13 +1,13 @@ -db-dtypes==0.4.0 -google-cloud-bigquery-storage==2.12.0 -google-auth-oauthlib==0.5.0 +db-dtypes==1.0.0 +google-cloud-bigquery-storage==2.13.0 +google-auth-oauthlib==0.5.1 grpcio==1.44.0 ipython==7.31.1; python_version == '3.7' ipython==8.0.1; python_version == '3.8' -ipython==8.1.1; python_version >= '3.9' +ipython==8.2.0; python_version >= '3.9' matplotlib==3.5.1 pandas==1.3.5; python_version == '3.7' pandas==1.4.1; python_version >= '3.8' pyarrow==7.0.0 -pytz==2021.3 -typing-extensions==3.10.0.2 +pytz==2022.1 +typing-extensions==4.1.1 From 8f03aa3f7fe08627b27f9dd94fcfe14d77ee044d Mon Sep 17 00:00:00 2001 From: WhiteSource Renovate Date: Wed, 30 Mar 2022 17:03:44 +0200 Subject: [PATCH 1410/2016] chore: use === in requirements.txt for environment specific pins (#1184) * chore(deps): update all dependencies * chore(deps): use === in requirements.txt for environment specific pins Co-authored-by: Anthonios Partheniou --- .../samples/geography/requirements.txt | 2 +- .../google-cloud-bigquery/samples/magics/requirements.txt | 6 +++--- .../google-cloud-bigquery/samples/snippets/requirements.txt | 6 +++--- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/packages/google-cloud-bigquery/samples/geography/requirements.txt b/packages/google-cloud-bigquery/samples/geography/requirements.txt index 3ad2b77224fc..0347ea61df7e 100644 --- a/packages/google-cloud-bigquery/samples/geography/requirements.txt +++ b/packages/google-cloud-bigquery/samples/geography/requirements.txt @@ -24,7 +24,7 @@ libcst==0.4.1 munch==2.5.0 mypy-extensions==0.4.3 packaging==21.3 -pandas==1.3.5; python_version == '3.7' +pandas==1.3.5; python_version === '3.7' pandas==1.4.1; python_version >= '3.8' proto-plus==1.20.3 protobuf==3.19.4 diff --git a/packages/google-cloud-bigquery/samples/magics/requirements.txt b/packages/google-cloud-bigquery/samples/magics/requirements.txt index 1516125ffc25..da15b025bff5 100644 --- a/packages/google-cloud-bigquery/samples/magics/requirements.txt +++ b/packages/google-cloud-bigquery/samples/magics/requirements.txt @@ -2,11 +2,11 @@ db-dtypes==1.0.0 google-cloud-bigquery-storage==2.13.0 google-auth-oauthlib==0.5.1 grpcio==1.44.0 -ipython==7.31.1; python_version == '3.7' -ipython==8.0.1; python_version == '3.8' +ipython==7.31.1; python_version === '3.7' +ipython==8.0.1; python_version === '3.8' ipython==8.2.0; python_version >= '3.9' matplotlib==3.5.1 -pandas==1.3.5; python_version == '3.7' +pandas==1.3.5; python_version === '3.7' pandas==1.4.1; python_version >= '3.8' pyarrow==7.0.0 pytz==2022.1 diff --git a/packages/google-cloud-bigquery/samples/snippets/requirements.txt b/packages/google-cloud-bigquery/samples/snippets/requirements.txt index 1516125ffc25..da15b025bff5 100644 --- a/packages/google-cloud-bigquery/samples/snippets/requirements.txt +++ b/packages/google-cloud-bigquery/samples/snippets/requirements.txt @@ -2,11 +2,11 @@ db-dtypes==1.0.0 google-cloud-bigquery-storage==2.13.0 google-auth-oauthlib==0.5.1 grpcio==1.44.0 -ipython==7.31.1; python_version == '3.7' -ipython==8.0.1; python_version == '3.8' +ipython==7.31.1; python_version === '3.7' +ipython==8.0.1; python_version === '3.8' ipython==8.2.0; python_version >= '3.9' matplotlib==3.5.1 -pandas==1.3.5; python_version == '3.7' +pandas==1.3.5; python_version === '3.7' pandas==1.4.1; python_version >= '3.8' pyarrow==7.0.0 pytz==2022.1 From 9a70b63e5ea8d3655afec856f71533a1a34464a0 Mon Sep 17 00:00:00 2001 From: Anthonios Partheniou Date: Wed, 30 Mar 2022 11:41:44 -0400 Subject: [PATCH 1411/2016] fix(deps): restore dependency on python-dateutil (#1187) * fix(deps): restore dependency on python-dateutil * fix(deps): remove redundante proto-plus Co-authored-by: Tim Swast --- packages/google-cloud-bigquery/setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/setup.py b/packages/google-cloud-bigquery/setup.py index 62fb3bbb3336..86eb2d41d399 100644 --- a/packages/google-cloud-bigquery/setup.py +++ b/packages/google-cloud-bigquery/setup.py @@ -42,8 +42,8 @@ "google-cloud-core >= 1.4.1, <3.0.0dev", "google-resumable-media >= 0.6.0, < 3.0dev", "packaging >= 14.3", - "proto-plus >= 1.10.0", # For the legacy proto-based types. "protobuf >= 3.12.0", # For the legacy proto-based types. + "python-dateutil >= 2.7.2, <3.0dev", "pyarrow >= 3.0.0, < 8.0dev", "requests >= 2.18.0, < 3.0.0dev", ] From c4bd526fc175bd21fc4396583885cbe4a6afb84b Mon Sep 17 00:00:00 2001 From: WhiteSource Renovate Date: Wed, 30 Mar 2022 18:35:47 +0200 Subject: [PATCH 1412/2016] chore(deps): update all dependencies (#1185) * chore(deps): update all dependencies * chore: revert version bump for environment specific pins Co-authored-by: Anthonios Partheniou --- .../samples/geography/requirements.txt | 4 ++-- .../google-cloud-bigquery/samples/magics/requirements.txt | 6 +++--- .../google-cloud-bigquery/samples/snippets/requirements.txt | 6 +++--- 3 files changed, 8 insertions(+), 8 deletions(-) diff --git a/packages/google-cloud-bigquery/samples/geography/requirements.txt b/packages/google-cloud-bigquery/samples/geography/requirements.txt index 0347ea61df7e..1b1b008e2f9c 100644 --- a/packages/google-cloud-bigquery/samples/geography/requirements.txt +++ b/packages/google-cloud-bigquery/samples/geography/requirements.txt @@ -12,7 +12,7 @@ geojson==2.5.0 geopandas==0.10.2 google-api-core==2.7.1 google-auth==2.6.2 -google-cloud-bigquery==2.34.2 +google-cloud-bigquery==3.0.0 google-cloud-bigquery-storage==2.13.0 google-cloud-core==2.2.3 google-crc32c==1.3.0 @@ -24,7 +24,7 @@ libcst==0.4.1 munch==2.5.0 mypy-extensions==0.4.3 packaging==21.3 -pandas==1.3.5; python_version === '3.7' +pandas===1.3.5; python_version == '3.7' pandas==1.4.1; python_version >= '3.8' proto-plus==1.20.3 protobuf==3.19.4 diff --git a/packages/google-cloud-bigquery/samples/magics/requirements.txt b/packages/google-cloud-bigquery/samples/magics/requirements.txt index da15b025bff5..94ce22b0009f 100644 --- a/packages/google-cloud-bigquery/samples/magics/requirements.txt +++ b/packages/google-cloud-bigquery/samples/magics/requirements.txt @@ -2,11 +2,11 @@ db-dtypes==1.0.0 google-cloud-bigquery-storage==2.13.0 google-auth-oauthlib==0.5.1 grpcio==1.44.0 -ipython==7.31.1; python_version === '3.7' -ipython==8.0.1; python_version === '3.8' +ipython===7.31.1; python_version == '3.7' +ipython===8.0.1; python_version == '3.8' ipython==8.2.0; python_version >= '3.9' matplotlib==3.5.1 -pandas==1.3.5; python_version === '3.7' +pandas===1.3.5; python_version == '3.7' pandas==1.4.1; python_version >= '3.8' pyarrow==7.0.0 pytz==2022.1 diff --git a/packages/google-cloud-bigquery/samples/snippets/requirements.txt b/packages/google-cloud-bigquery/samples/snippets/requirements.txt index da15b025bff5..94ce22b0009f 100644 --- a/packages/google-cloud-bigquery/samples/snippets/requirements.txt +++ b/packages/google-cloud-bigquery/samples/snippets/requirements.txt @@ -2,11 +2,11 @@ db-dtypes==1.0.0 google-cloud-bigquery-storage==2.13.0 google-auth-oauthlib==0.5.1 grpcio==1.44.0 -ipython==7.31.1; python_version === '3.7' -ipython==8.0.1; python_version === '3.8' +ipython===7.31.1; python_version == '3.7' +ipython===8.0.1; python_version == '3.8' ipython==8.2.0; python_version >= '3.9' matplotlib==3.5.1 -pandas==1.3.5; python_version === '3.7' +pandas===1.3.5; python_version == '3.7' pandas==1.4.1; python_version >= '3.8' pyarrow==7.0.0 pytz==2022.1 From 3ffc20bdd2f90238f929f9b16f4a03822122dcdb Mon Sep 17 00:00:00 2001 From: "gcf-owl-bot[bot]" <78513119+gcf-owl-bot[bot]@users.noreply.github.com> Date: Wed, 30 Mar 2022 18:24:13 +0000 Subject: [PATCH 1413/2016] chore(python): add E231 to .flake8 ignore list (#1190) Source-Link: https://github.com/googleapis/synthtool/commit/7ff4aad2ec5af0380e8bd6da1fa06eaadf24ec81 Post-Processor: gcr.io/cloud-devrel-public-resources/owlbot-python:latest@sha256:462782b0b492346b2d9099aaff52206dd30bc8e031ea97082e6facecc2373244 --- packages/google-cloud-bigquery/.flake8 | 2 +- packages/google-cloud-bigquery/.github/.OwlBot.lock.yaml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/packages/google-cloud-bigquery/.flake8 b/packages/google-cloud-bigquery/.flake8 index 29227d4cf419..2e438749863d 100644 --- a/packages/google-cloud-bigquery/.flake8 +++ b/packages/google-cloud-bigquery/.flake8 @@ -16,7 +16,7 @@ # Generated by synthtool. DO NOT EDIT! [flake8] -ignore = E203, E266, E501, W503 +ignore = E203, E231, E266, E501, W503 exclude = # Exclude generated code. **/proto/** diff --git a/packages/google-cloud-bigquery/.github/.OwlBot.lock.yaml b/packages/google-cloud-bigquery/.github/.OwlBot.lock.yaml index 87dd00611576..9e0a9356b6eb 100644 --- a/packages/google-cloud-bigquery/.github/.OwlBot.lock.yaml +++ b/packages/google-cloud-bigquery/.github/.OwlBot.lock.yaml @@ -13,4 +13,4 @@ # limitations under the License. docker: image: gcr.io/cloud-devrel-public-resources/owlbot-python:latest - digest: sha256:7cffbc10910c3ab1b852c05114a08d374c195a81cdec1d4a67a1d129331d0bfe + digest: sha256:462782b0b492346b2d9099aaff52206dd30bc8e031ea97082e6facecc2373244 From 6ab021b77f976274cb6034fe718b81dd3c6dc40b Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Wed, 30 Mar 2022 14:20:16 -0500 Subject: [PATCH 1414/2016] fix(deps): raise exception when pandas is installed but db-dtypes is not (#1191) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit `db-dtypes` is already present in the `pandas` "extras", but this PR ensures that if pandas is present and db-dtypes is not, a more understandable error message is raised. ``` google/cloud/bigquery/_pandas_helpers.py:991: ValueError ____________________________________ test_list_rows_nullable_scalars_extreme_dtypes[10] _____________________________________ # Copyright 2019 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. """Shared helper functions for connecting BigQuery and pandas.""" import concurrent.futures from datetime import datetime import functools from itertools import islice import logging import queue import warnings try: import pandas # type: ignore pandas_import_exception = None except ImportError as exc: # pragma: NO COVER pandas = None pandas_import_exception = exc else: import numpy try: > import db_dtypes # type: ignore E ModuleNotFoundError: No module named 'db_dtypes' google/cloud/bigquery/_pandas_helpers.py:36: ModuleNotFoundError The above exception was the direct cause of the following exception: bigquery_client = scalars_extreme_table = 'swast-scratch.python_bigquery_tests_system_20220330160830_ffff89.scalars_extreme_jsonl0x3ffeb' max_results = 10 @pytest.mark.parametrize( ("max_results",), ( (None,), (10,), ), # Use BQ Storage API. # Use REST API. ) def test_list_rows_nullable_scalars_extreme_dtypes( bigquery_client, scalars_extreme_table, max_results ): # TODO(GH#836): Avoid INTERVAL columns until they are supported by the # BigQuery Storage API and pyarrow. schema = [ bigquery.SchemaField("bool_col", enums.SqlTypeNames.BOOLEAN), bigquery.SchemaField("bignumeric_col", enums.SqlTypeNames.BIGNUMERIC), bigquery.SchemaField("bytes_col", enums.SqlTypeNames.BYTES), bigquery.SchemaField("date_col", enums.SqlTypeNames.DATE), bigquery.SchemaField("datetime_col", enums.SqlTypeNames.DATETIME), bigquery.SchemaField("float64_col", enums.SqlTypeNames.FLOAT64), bigquery.SchemaField("geography_col", enums.SqlTypeNames.GEOGRAPHY), bigquery.SchemaField("int64_col", enums.SqlTypeNames.INT64), bigquery.SchemaField("numeric_col", enums.SqlTypeNames.NUMERIC), bigquery.SchemaField("string_col", enums.SqlTypeNames.STRING), bigquery.SchemaField("time_col", enums.SqlTypeNames.TIME), bigquery.SchemaField("timestamp_col", enums.SqlTypeNames.TIMESTAMP), ] df = bigquery_client.list_rows( scalars_extreme_table, max_results=max_results, selected_fields=schema, > ).to_dataframe() tests/system/test_pandas.py:1084: _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ google/cloud/bigquery/table.py:1925: in to_dataframe _pandas_helpers.verify_pandas_imports() _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ def verify_pandas_imports(): if pandas is None: raise ValueError(_NO_PANDAS_ERROR) from pandas_import_exception if db_dtypes is None: > raise ValueError(_NO_DB_TYPES_ERROR) from db_dtypes_import_exception E ValueError: Please install the 'db-dtypes' package to use this function. google/cloud/bigquery/_pandas_helpers.py:991: ValueError ``` Thank you for opening a Pull Request! Before submitting your PR, there are a few things you can do to make sure it goes smoothly: - [ ] Make sure to open an issue as a [bug/issue](https://github.com/googleapis/python-bigquery/issues/new/choose) before writing your code! That way we can discuss the change, evaluate designs, and agree on the general idea - [ ] Ensure the tests and linter pass - [ ] Code coverage does not decrease (if any source code was changed) - [ ] Appropriate docs were updated (if necessary) Fixes #1188 🦕 --- .../google/cloud/bigquery/_pandas_helpers.py | 33 +++++++++++++++---- .../google/cloud/bigquery/table.py | 20 ++++------- .../tests/unit/test__pandas_helpers.py | 13 ++++++++ .../tests/unit/test_table.py | 8 ++--- 4 files changed, 49 insertions(+), 25 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/_pandas_helpers.py b/packages/google-cloud-bigquery/google/cloud/bigquery/_pandas_helpers.py index 17de6830a5f0..cc0ee75ffa6d 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/_pandas_helpers.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/_pandas_helpers.py @@ -24,16 +24,25 @@ try: import pandas # type: ignore -except ImportError: # pragma: NO COVER + + pandas_import_exception = None +except ImportError as exc: # pragma: NO COVER pandas = None - date_dtype_name = time_dtype_name = "" # Use '' rather than None because pytype + pandas_import_exception = exc else: import numpy - from db_dtypes import DateDtype, TimeDtype # type: ignore +try: + import db_dtypes # type: ignore + + date_dtype_name = db_dtypes.DateDtype.name + time_dtype_name = db_dtypes.TimeDtype.name + db_dtypes_import_exception = None +except ImportError as exc: # pragma: NO COVER + db_dtypes = None + db_dtypes_import_exception = exc + date_dtype_name = time_dtype_name = "" # Use '' rather than None because pytype - date_dtype_name = DateDtype.name - time_dtype_name = TimeDtype.name import pyarrow # type: ignore import pyarrow.parquet # type: ignore @@ -84,6 +93,9 @@ def _to_wkb(v): _MAX_QUEUE_SIZE_DEFAULT = object() # max queue size sentinel for BQ Storage downloads +_NO_PANDAS_ERROR = "Please install the 'pandas' package to use this function." +_NO_DB_TYPES_ERROR = "Please install the 'db-dtypes' package to use this function." + _PANDAS_DTYPE_TO_BQ = { "bool": "BOOLEAN", "datetime64[ns, UTC]": "TIMESTAMP", @@ -290,13 +302,13 @@ def types_mapper(arrow_data_type): not date_as_object and pyarrow.types.is_date(arrow_data_type) ): - return DateDtype() + return db_dtypes.DateDtype() elif pyarrow.types.is_integer(arrow_data_type): return pandas.Int64Dtype() elif pyarrow.types.is_time(arrow_data_type): - return TimeDtype() + return db_dtypes.TimeDtype() return types_mapper @@ -970,3 +982,10 @@ def dataframe_to_json_generator(dataframe): output[column] = value yield output + + +def verify_pandas_imports(): + if pandas is None: + raise ValueError(_NO_PANDAS_ERROR) from pandas_import_exception + if db_dtypes is None: + raise ValueError(_NO_DB_TYPES_ERROR) from db_dtypes_import_exception diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py index ed4f214ce927..5a4de6a0159f 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py @@ -28,8 +28,6 @@ import pandas # type: ignore except ImportError: # pragma: NO COVER pandas = None -else: - import db_dtypes # type: ignore # noqa import pyarrow # type: ignore @@ -69,10 +67,6 @@ from google.cloud.bigquery.dataset import DatasetReference -_NO_PANDAS_ERROR = ( - "The pandas library is not installed, please install " - "pandas to use the to_dataframe() function." -) _NO_GEOPANDAS_ERROR = ( "The geopandas library is not installed, please install " "geopandas to use the to_geodataframe() function." @@ -1818,8 +1812,8 @@ def to_dataframe_iterable( ValueError: If the :mod:`pandas` library cannot be imported. """ - if pandas is None: - raise ValueError(_NO_PANDAS_ERROR) + _pandas_helpers.verify_pandas_imports() + if dtypes is None: dtypes = {} @@ -1928,8 +1922,8 @@ def to_dataframe( :mod:`shapely` library cannot be imported. """ - if pandas is None: - raise ValueError(_NO_PANDAS_ERROR) + _pandas_helpers.verify_pandas_imports() + if geography_as_object and shapely is None: raise ValueError(_NO_SHAPELY_ERROR) @@ -2181,8 +2175,7 @@ def to_dataframe( Returns: pandas.DataFrame: An empty :class:`~pandas.DataFrame`. """ - if pandas is None: - raise ValueError(_NO_PANDAS_ERROR) + _pandas_helpers.verify_pandas_imports() return pandas.DataFrame() def to_geodataframe( @@ -2238,8 +2231,7 @@ def to_dataframe_iterable( ValueError: If the :mod:`pandas` library cannot be imported. """ - if pandas is None: - raise ValueError(_NO_PANDAS_ERROR) + _pandas_helpers.verify_pandas_imports() return iter((pandas.DataFrame(),)) def to_arrow_iterable( diff --git a/packages/google-cloud-bigquery/tests/unit/test__pandas_helpers.py b/packages/google-cloud-bigquery/tests/unit/test__pandas_helpers.py index 5b2fadaf120c..1a3f918eb14b 100644 --- a/packages/google-cloud-bigquery/tests/unit/test__pandas_helpers.py +++ b/packages/google-cloud-bigquery/tests/unit/test__pandas_helpers.py @@ -1751,3 +1751,16 @@ def test_bq_to_arrow_field_metadata(module_under_test, field_type, metadata): ).metadata == metadata ) + + +def test_verify_pandas_imports_no_pandas(module_under_test, monkeypatch): + monkeypatch.setattr(module_under_test, "pandas", None) + with pytest.raises(ValueError, match="Please install the 'pandas' package"): + module_under_test.verify_pandas_imports() + + +@pytest.mark.skipif(pandas is None, reason="Requires `pandas`") +def test_verify_pandas_imports_no_db_dtypes(module_under_test, monkeypatch): + monkeypatch.setattr(module_under_test, "db_dtypes", None) + with pytest.raises(ValueError, match="Please install the 'db-dtypes' package"): + module_under_test.verify_pandas_imports() diff --git a/packages/google-cloud-bigquery/tests/unit/test_table.py b/packages/google-cloud-bigquery/tests/unit/test_table.py index 5241230a410b..66bc1d3db794 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_table.py +++ b/packages/google-cloud-bigquery/tests/unit/test_table.py @@ -1836,7 +1836,7 @@ def test_to_arrow_iterable(self): self.assertEqual(record_batch.num_rows, 0) self.assertEqual(record_batch.num_columns, 0) - @mock.patch("google.cloud.bigquery.table.pandas", new=None) + @mock.patch("google.cloud.bigquery._pandas_helpers.pandas", new=None) def test_to_dataframe_error_if_pandas_is_none(self): row_iterator = self._make_one() with self.assertRaises(ValueError): @@ -1849,7 +1849,7 @@ def test_to_dataframe(self): self.assertIsInstance(df, pandas.DataFrame) self.assertEqual(len(df), 0) # verify the number of rows - @mock.patch("google.cloud.bigquery.table.pandas", new=None) + @mock.patch("google.cloud.bigquery._pandas_helpers.pandas", new=None) def test_to_dataframe_iterable_error_if_pandas_is_none(self): row_iterator = self._make_one() with self.assertRaises(ValueError): @@ -2967,7 +2967,7 @@ def test_to_dataframe_iterable_w_bqstorage_max_results_warning(self): assert isinstance(dataframes[0], pandas.DataFrame) assert isinstance(dataframes[1], pandas.DataFrame) - @mock.patch("google.cloud.bigquery.table.pandas", new=None) + @mock.patch("google.cloud.bigquery._pandas_helpers.pandas", new=None) def test_to_dataframe_iterable_error_if_pandas_is_none(self): from google.cloud.bigquery.schema import SchemaField @@ -3339,7 +3339,7 @@ def test_to_dataframe_datetime_objects(self): self.assertEqual(df["ts"][0].date(), datetime.date(1336, 3, 23)) self.assertEqual(df["date"][0], datetime.date(1111, 1, 1)) - @mock.patch("google.cloud.bigquery.table.pandas", new=None) + @mock.patch("google.cloud.bigquery._pandas_helpers.pandas", new=None) def test_to_dataframe_error_if_pandas_is_none(self): from google.cloud.bigquery.schema import SchemaField From ba8af86514d6db25af7a78dd4b038b22a8629858 Mon Sep 17 00:00:00 2001 From: "release-please[bot]" <55107282+release-please[bot]@users.noreply.github.com> Date: Wed, 30 Mar 2022 19:48:13 +0000 Subject: [PATCH 1415/2016] chore(main): release 3.0.1 (#1189) :robot: I have created a release *beep* *boop* --- ### [3.0.1](https://github.com/googleapis/python-bigquery/compare/v3.0.0...v3.0.1) (2022-03-30) ### Bug Fixes * **deps:** raise exception when pandas is installed but db-dtypes is not ([#1191](https://github.com/googleapis/python-bigquery/issues/1191)) ([4333910](https://github.com/googleapis/python-bigquery/commit/433391097bae57dd12a93db18fc2bab573d8f128)) * **deps:** restore dependency on python-dateutil ([#1187](https://github.com/googleapis/python-bigquery/issues/1187)) ([212d7ec](https://github.com/googleapis/python-bigquery/commit/212d7ec1f0740d04c26fb3ceffc9a4dd9eed6756)) --- This PR was generated with [Release Please](https://github.com/googleapis/release-please). See [documentation](https://github.com/googleapis/release-please#release-please). --- packages/google-cloud-bigquery/CHANGELOG.md | 8 ++++++++ .../google/cloud/bigquery/version.py | 2 +- 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/CHANGELOG.md b/packages/google-cloud-bigquery/CHANGELOG.md index c5c8447da8bc..ca99c969f6bb 100644 --- a/packages/google-cloud-bigquery/CHANGELOG.md +++ b/packages/google-cloud-bigquery/CHANGELOG.md @@ -5,6 +5,14 @@ [1]: https://pypi.org/project/google-cloud-bigquery/#history +### [3.0.1](https://github.com/googleapis/python-bigquery/compare/v3.0.0...v3.0.1) (2022-03-30) + + +### Bug Fixes + +* **deps:** raise exception when pandas is installed but db-dtypes is not ([#1191](https://github.com/googleapis/python-bigquery/issues/1191)) ([4333910](https://github.com/googleapis/python-bigquery/commit/433391097bae57dd12a93db18fc2bab573d8f128)) +* **deps:** restore dependency on python-dateutil ([#1187](https://github.com/googleapis/python-bigquery/issues/1187)) ([212d7ec](https://github.com/googleapis/python-bigquery/commit/212d7ec1f0740d04c26fb3ceffc9a4dd9eed6756)) + ## [3.0.0](https://github.com/googleapis/python-bigquery/compare/v2.34.3...v3.0.0) (2022-03-29) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/version.py b/packages/google-cloud-bigquery/google/cloud/bigquery/version.py index d6f7def8c3f2..ad3213664053 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/version.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/version.py @@ -12,4 +12,4 @@ # See the License for the specific language governing permissions and # limitations under the License. -__version__ = "3.0.0" +__version__ = "3.0.1" From cfda323df49e76ddf78f02480505a5cffd65e957 Mon Sep 17 00:00:00 2001 From: WhiteSource Renovate Date: Thu, 31 Mar 2022 01:34:19 +0200 Subject: [PATCH 1416/2016] chore(deps): update all dependencies (#1192) --- .../google-cloud-bigquery/samples/geography/requirements.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/packages/google-cloud-bigquery/samples/geography/requirements.txt b/packages/google-cloud-bigquery/samples/geography/requirements.txt index 1b1b008e2f9c..154f3448c4c8 100644 --- a/packages/google-cloud-bigquery/samples/geography/requirements.txt +++ b/packages/google-cloud-bigquery/samples/geography/requirements.txt @@ -2,7 +2,7 @@ attrs==21.4.0 certifi==2021.10.8 cffi==1.15.0 charset-normalizer==2.0.12 -click==8.1.0 +click==8.1.1 click-plugins==1.1.1 cligj==0.7.2 dataclasses==0.8; python_version < '3.7' @@ -12,7 +12,7 @@ geojson==2.5.0 geopandas==0.10.2 google-api-core==2.7.1 google-auth==2.6.2 -google-cloud-bigquery==3.0.0 +google-cloud-bigquery==3.0.1 google-cloud-bigquery-storage==2.13.0 google-cloud-core==2.2.3 google-crc32c==1.3.0 From 374e01b0487eccfbe10d5b414007076ec894b6b5 Mon Sep 17 00:00:00 2001 From: "gcf-owl-bot[bot]" <78513119+gcf-owl-bot[bot]@users.noreply.github.com> Date: Fri, 1 Apr 2022 00:10:10 +0000 Subject: [PATCH 1417/2016] chore(python): update .pre-commit-config.yaml to use black==22.3.0 (#1194) Source-Link: https://github.com/googleapis/synthtool/commit/7804ade3daae0d66649bee8df6c55484c6580b8d Post-Processor: gcr.io/cloud-devrel-public-resources/owlbot-python:latest@sha256:eede5672562a32821444a8e803fb984a6f61f2237ea3de229d2de24453f4ae7d --- packages/google-cloud-bigquery/.github/.OwlBot.lock.yaml | 3 ++- packages/google-cloud-bigquery/.pre-commit-config.yaml | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/packages/google-cloud-bigquery/.github/.OwlBot.lock.yaml b/packages/google-cloud-bigquery/.github/.OwlBot.lock.yaml index 9e0a9356b6eb..22cc254afa2c 100644 --- a/packages/google-cloud-bigquery/.github/.OwlBot.lock.yaml +++ b/packages/google-cloud-bigquery/.github/.OwlBot.lock.yaml @@ -13,4 +13,5 @@ # limitations under the License. docker: image: gcr.io/cloud-devrel-public-resources/owlbot-python:latest - digest: sha256:462782b0b492346b2d9099aaff52206dd30bc8e031ea97082e6facecc2373244 + digest: sha256:eede5672562a32821444a8e803fb984a6f61f2237ea3de229d2de24453f4ae7d +# created: 2022-03-30T23:44:26.560599165Z diff --git a/packages/google-cloud-bigquery/.pre-commit-config.yaml b/packages/google-cloud-bigquery/.pre-commit-config.yaml index 62eb5a77d9a3..46d237160f6d 100644 --- a/packages/google-cloud-bigquery/.pre-commit-config.yaml +++ b/packages/google-cloud-bigquery/.pre-commit-config.yaml @@ -22,7 +22,7 @@ repos: - id: end-of-file-fixer - id: check-yaml - repo: https://github.com/psf/black - rev: 19.10b0 + rev: 22.3.0 hooks: - id: black - repo: https://gitlab.com/pycqa/flake8 From ef5b630745950dde06c56a46177cb64144628918 Mon Sep 17 00:00:00 2001 From: WhiteSource Renovate Date: Fri, 1 Apr 2022 04:16:29 +0200 Subject: [PATCH 1418/2016] chore(deps): update dependency click to v8.1.2 (#1197) Co-authored-by: Anthonios Partheniou --- .../google-cloud-bigquery/samples/geography/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/samples/geography/requirements.txt b/packages/google-cloud-bigquery/samples/geography/requirements.txt index 154f3448c4c8..0d0d1fb19ef8 100644 --- a/packages/google-cloud-bigquery/samples/geography/requirements.txt +++ b/packages/google-cloud-bigquery/samples/geography/requirements.txt @@ -2,7 +2,7 @@ attrs==21.4.0 certifi==2021.10.8 cffi==1.15.0 charset-normalizer==2.0.12 -click==8.1.1 +click==8.1.2 click-plugins==1.1.1 cligj==0.7.2 dataclasses==0.8; python_version < '3.7' From 6857d2b50bb5c376652fcb25dc25c1a4f135922c Mon Sep 17 00:00:00 2001 From: "gcf-owl-bot[bot]" <78513119+gcf-owl-bot[bot]@users.noreply.github.com> Date: Fri, 1 Apr 2022 06:53:30 -0400 Subject: [PATCH 1419/2016] chore(python): Enable size-label bot (#1198) Source-Link: https://github.com/googleapis/synthtool/commit/06e82790dd719a165ad32b8a06f8f6ec3e3cae0f Post-Processor: gcr.io/cloud-devrel-public-resources/owlbot-python:latest@sha256:b3500c053313dc34e07b1632ba9e4e589f4f77036a7cf39e1fe8906811ae0fce Co-authored-by: Owl Bot Co-authored-by: Anthonios Partheniou --- packages/google-cloud-bigquery/.github/.OwlBot.lock.yaml | 4 ++-- packages/google-cloud-bigquery/.github/auto-label.yaml | 2 ++ 2 files changed, 4 insertions(+), 2 deletions(-) create mode 100644 packages/google-cloud-bigquery/.github/auto-label.yaml diff --git a/packages/google-cloud-bigquery/.github/.OwlBot.lock.yaml b/packages/google-cloud-bigquery/.github/.OwlBot.lock.yaml index 22cc254afa2c..58a0b153bf0e 100644 --- a/packages/google-cloud-bigquery/.github/.OwlBot.lock.yaml +++ b/packages/google-cloud-bigquery/.github/.OwlBot.lock.yaml @@ -13,5 +13,5 @@ # limitations under the License. docker: image: gcr.io/cloud-devrel-public-resources/owlbot-python:latest - digest: sha256:eede5672562a32821444a8e803fb984a6f61f2237ea3de229d2de24453f4ae7d -# created: 2022-03-30T23:44:26.560599165Z + digest: sha256:b3500c053313dc34e07b1632ba9e4e589f4f77036a7cf39e1fe8906811ae0fce +# created: 2022-04-01T01:42:03.609279246Z diff --git a/packages/google-cloud-bigquery/.github/auto-label.yaml b/packages/google-cloud-bigquery/.github/auto-label.yaml new file mode 100644 index 000000000000..09c8d735b456 --- /dev/null +++ b/packages/google-cloud-bigquery/.github/auto-label.yaml @@ -0,0 +1,2 @@ +requestsize: + enabled: true From 4c929f55ffa4f67c236b9c32fafafc9bc9f23d7c Mon Sep 17 00:00:00 2001 From: WhiteSource Renovate Date: Sun, 3 Apr 2022 21:46:21 +0200 Subject: [PATCH 1420/2016] chore(deps): update all dependencies (#1200) --- .../google-cloud-bigquery/samples/geography/requirements.txt | 4 ++-- .../google-cloud-bigquery/samples/magics/requirements.txt | 2 +- .../google-cloud-bigquery/samples/snippets/requirements.txt | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/packages/google-cloud-bigquery/samples/geography/requirements.txt b/packages/google-cloud-bigquery/samples/geography/requirements.txt index 0d0d1fb19ef8..fb5c518b4496 100644 --- a/packages/google-cloud-bigquery/samples/geography/requirements.txt +++ b/packages/google-cloud-bigquery/samples/geography/requirements.txt @@ -25,9 +25,9 @@ munch==2.5.0 mypy-extensions==0.4.3 packaging==21.3 pandas===1.3.5; python_version == '3.7' -pandas==1.4.1; python_version >= '3.8' +pandas==1.4.2; python_version >= '3.8' proto-plus==1.20.3 -protobuf==3.19.4 +protobuf==3.20.0 pyarrow==7.0.0 pyasn1==0.4.8 pyasn1-modules==0.2.8 diff --git a/packages/google-cloud-bigquery/samples/magics/requirements.txt b/packages/google-cloud-bigquery/samples/magics/requirements.txt index 94ce22b0009f..7406e71e8e12 100644 --- a/packages/google-cloud-bigquery/samples/magics/requirements.txt +++ b/packages/google-cloud-bigquery/samples/magics/requirements.txt @@ -7,7 +7,7 @@ ipython===8.0.1; python_version == '3.8' ipython==8.2.0; python_version >= '3.9' matplotlib==3.5.1 pandas===1.3.5; python_version == '3.7' -pandas==1.4.1; python_version >= '3.8' +pandas==1.4.2; python_version >= '3.8' pyarrow==7.0.0 pytz==2022.1 typing-extensions==4.1.1 diff --git a/packages/google-cloud-bigquery/samples/snippets/requirements.txt b/packages/google-cloud-bigquery/samples/snippets/requirements.txt index 94ce22b0009f..7406e71e8e12 100644 --- a/packages/google-cloud-bigquery/samples/snippets/requirements.txt +++ b/packages/google-cloud-bigquery/samples/snippets/requirements.txt @@ -7,7 +7,7 @@ ipython===8.0.1; python_version == '3.8' ipython==8.2.0; python_version >= '3.9' matplotlib==3.5.1 pandas===1.3.5; python_version == '3.7' -pandas==1.4.1; python_version >= '3.8' +pandas==1.4.2; python_version >= '3.8' pyarrow==7.0.0 pytz==2022.1 typing-extensions==4.1.1 From 589f7e9492e369d2dbb9232473244f009e7afbf1 Mon Sep 17 00:00:00 2001 From: Bu Sun Kim <8822365+busunkim96@users.noreply.github.com> Date: Sun, 3 Apr 2022 18:24:10 -0600 Subject: [PATCH 1421/2016] chore: allow releases from older version branches (#1201) Co-authored-by: Anthonios Partheniou --- .../google-cloud-bigquery/.github/release-please.yml | 12 ++++++++++++ packages/google-cloud-bigquery/owlbot.py | 1 + 2 files changed, 13 insertions(+) diff --git a/packages/google-cloud-bigquery/.github/release-please.yml b/packages/google-cloud-bigquery/.github/release-please.yml index 466597e5b196..5161ab347cdf 100644 --- a/packages/google-cloud-bigquery/.github/release-please.yml +++ b/packages/google-cloud-bigquery/.github/release-please.yml @@ -1,2 +1,14 @@ releaseType: python handleGHRelease: true +# NOTE: this section is generated by synthtool.languages.python +# See https://github.com/googleapis/synthtool/blob/master/synthtool/languages/python.py +branches: +- branch: v2 + handleGHRelease: true + releaseType: python +- branch: v1 + handleGHRelease: true + releaseType: python +- branch: v0 + handleGHRelease: true + releaseType: python diff --git a/packages/google-cloud-bigquery/owlbot.py b/packages/google-cloud-bigquery/owlbot.py index a445b2be9fc5..04628050189a 100644 --- a/packages/google-cloud-bigquery/owlbot.py +++ b/packages/google-cloud-bigquery/owlbot.py @@ -52,6 +52,7 @@ ], ) +python.configure_previous_major_version_branches() # ---------------------------------------------------------------------------- # Samples templates # ---------------------------------------------------------------------------- From 1f26cceb93a7556792c3ab56be6e1797f226819a Mon Sep 17 00:00:00 2001 From: WhiteSource Renovate Date: Tue, 5 Apr 2022 22:26:14 +0200 Subject: [PATCH 1422/2016] chore(deps): update dependency google-cloud-bigquery-storage to v2.13.1 (#1203) --- .../google-cloud-bigquery/samples/geography/requirements.txt | 2 +- packages/google-cloud-bigquery/samples/magics/requirements.txt | 2 +- .../google-cloud-bigquery/samples/snippets/requirements.txt | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/packages/google-cloud-bigquery/samples/geography/requirements.txt b/packages/google-cloud-bigquery/samples/geography/requirements.txt index fb5c518b4496..65140d3871e3 100644 --- a/packages/google-cloud-bigquery/samples/geography/requirements.txt +++ b/packages/google-cloud-bigquery/samples/geography/requirements.txt @@ -13,7 +13,7 @@ geopandas==0.10.2 google-api-core==2.7.1 google-auth==2.6.2 google-cloud-bigquery==3.0.1 -google-cloud-bigquery-storage==2.13.0 +google-cloud-bigquery-storage==2.13.1 google-cloud-core==2.2.3 google-crc32c==1.3.0 google-resumable-media==2.3.2 diff --git a/packages/google-cloud-bigquery/samples/magics/requirements.txt b/packages/google-cloud-bigquery/samples/magics/requirements.txt index 7406e71e8e12..99792b0d97d2 100644 --- a/packages/google-cloud-bigquery/samples/magics/requirements.txt +++ b/packages/google-cloud-bigquery/samples/magics/requirements.txt @@ -1,5 +1,5 @@ db-dtypes==1.0.0 -google-cloud-bigquery-storage==2.13.0 +google-cloud-bigquery-storage==2.13.1 google-auth-oauthlib==0.5.1 grpcio==1.44.0 ipython===7.31.1; python_version == '3.7' diff --git a/packages/google-cloud-bigquery/samples/snippets/requirements.txt b/packages/google-cloud-bigquery/samples/snippets/requirements.txt index 7406e71e8e12..99792b0d97d2 100644 --- a/packages/google-cloud-bigquery/samples/snippets/requirements.txt +++ b/packages/google-cloud-bigquery/samples/snippets/requirements.txt @@ -1,5 +1,5 @@ db-dtypes==1.0.0 -google-cloud-bigquery-storage==2.13.0 +google-cloud-bigquery-storage==2.13.1 google-auth-oauthlib==0.5.1 grpcio==1.44.0 ipython===7.31.1; python_version == '3.7' From 008fbbfd36b6b8661203cd472fc31b15b208de17 Mon Sep 17 00:00:00 2001 From: "gcf-owl-bot[bot]" <78513119+gcf-owl-bot[bot]@users.noreply.github.com> Date: Wed, 6 Apr 2022 11:06:10 +0000 Subject: [PATCH 1423/2016] chore(python): add license header to auto-label.yaml (#1204) Source-Link: https://github.com/googleapis/synthtool/commit/eb78c980b52c7c6746d2edb77d9cf7aaa99a2aab Post-Processor: gcr.io/cloud-devrel-public-resources/owlbot-python:latest@sha256:8a5d3f6a2e43ed8293f34e06a2f56931d1e88a2694c3bb11b15df4eb256ad163 --- .../google-cloud-bigquery/.github/.OwlBot.lock.yaml | 4 ++-- .../google-cloud-bigquery/.github/auto-label.yaml | 13 +++++++++++++ 2 files changed, 15 insertions(+), 2 deletions(-) diff --git a/packages/google-cloud-bigquery/.github/.OwlBot.lock.yaml b/packages/google-cloud-bigquery/.github/.OwlBot.lock.yaml index 58a0b153bf0e..bc893c979e20 100644 --- a/packages/google-cloud-bigquery/.github/.OwlBot.lock.yaml +++ b/packages/google-cloud-bigquery/.github/.OwlBot.lock.yaml @@ -13,5 +13,5 @@ # limitations under the License. docker: image: gcr.io/cloud-devrel-public-resources/owlbot-python:latest - digest: sha256:b3500c053313dc34e07b1632ba9e4e589f4f77036a7cf39e1fe8906811ae0fce -# created: 2022-04-01T01:42:03.609279246Z + digest: sha256:8a5d3f6a2e43ed8293f34e06a2f56931d1e88a2694c3bb11b15df4eb256ad163 +# created: 2022-04-06T10:30:21.687684602Z diff --git a/packages/google-cloud-bigquery/.github/auto-label.yaml b/packages/google-cloud-bigquery/.github/auto-label.yaml index 09c8d735b456..41bff0b5375a 100644 --- a/packages/google-cloud-bigquery/.github/auto-label.yaml +++ b/packages/google-cloud-bigquery/.github/auto-label.yaml @@ -1,2 +1,15 @@ +# Copyright 2022 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. requestsize: enabled: true From ca03a8dba9b860d997d4b789b709a7fba85dfce3 Mon Sep 17 00:00:00 2001 From: WhiteSource Renovate Date: Fri, 8 Apr 2022 01:13:16 +0200 Subject: [PATCH 1424/2016] chore(deps): update dependency google-auth to v2.6.3 (#1206) --- .../google-cloud-bigquery/samples/geography/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/samples/geography/requirements.txt b/packages/google-cloud-bigquery/samples/geography/requirements.txt index 65140d3871e3..03c7899e1bb0 100644 --- a/packages/google-cloud-bigquery/samples/geography/requirements.txt +++ b/packages/google-cloud-bigquery/samples/geography/requirements.txt @@ -11,7 +11,7 @@ Fiona==1.8.21 geojson==2.5.0 geopandas==0.10.2 google-api-core==2.7.1 -google-auth==2.6.2 +google-auth==2.6.3 google-cloud-bigquery==3.0.1 google-cloud-bigquery-storage==2.13.1 google-cloud-core==2.2.3 From ceb5b34cfb279e17229d67c8a934a5b67a77b011 Mon Sep 17 00:00:00 2001 From: Steffany Brown <30247553+steffnay@users.noreply.github.com> Date: Fri, 8 Apr 2022 12:40:34 -0700 Subject: [PATCH 1425/2016] feat: refactor AccessEntry to use _properties pattern (#1125) * add view, dataset, routine properties * add properties and unit tests * lint * add properties and tests * remove unused imports * update test * add tests * add tests * add tests * add more tests * update return type * Update google/cloud/bigquery/dataset.py Co-authored-by: Tim Swast * Update google/cloud/bigquery/dataset.py Co-authored-by: Tim Swast * Update google/cloud/bigquery/dataset.py Co-authored-by: Tim Swast * refactor to use get() and remove self._entity_id * delete unnecessary file * Update google/cloud/bigquery/dataset.py Co-authored-by: Tim Swast * Update google/cloud/bigquery/dataset.py Co-authored-by: Tim Swast * Update google/cloud/bigquery/dataset.py Co-authored-by: Tim Swast * add types, remove unnecessary checks * fix types * types * add type casting * refactor AccessEntry repr * update to return DatasetReference * update to use RoutineRef and TableRef * add table test * update to use api_repr * lint * Update google/cloud/bigquery/dataset.py Co-authored-by: Tim Swast * Update google/cloud/bigquery/dataset.py Co-authored-by: Tim Swast * Update google/cloud/bigquery/dataset.py Co-authored-by: Tim Swast * update tests * remove repeat type import Co-authored-by: Tim Swast --- .../google/cloud/bigquery/dataset.py | 461 +++++++++++------- .../tests/unit/test_create_dataset.py | 5 +- .../tests/unit/test_dataset.py | 356 ++++++++++++-- 3 files changed, 612 insertions(+), 210 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/dataset.py b/packages/google-cloud-bigquery/google/cloud/bigquery/dataset.py index 0fafd578384a..c302040675ff 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/dataset.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/dataset.py @@ -17,16 +17,19 @@ from __future__ import absolute_import import copy -from typing import Dict, Any + +import typing import google.cloud._helpers # type: ignore from google.cloud.bigquery import _helpers from google.cloud.bigquery.model import ModelReference -from google.cloud.bigquery.routine import RoutineReference -from google.cloud.bigquery.table import TableReference +from google.cloud.bigquery.routine import Routine, RoutineReference +from google.cloud.bigquery.table import Table, TableReference from google.cloud.bigquery.encryption_configuration import EncryptionConfiguration +from typing import Optional, List, Dict, Any, Union + def _get_table_reference(self, table_id: str) -> TableReference: """Constructs a TableReference. @@ -75,173 +78,6 @@ def _get_routine_reference(self, routine_id): ) -class AccessEntry(object): - """Represents grant of an access role to an entity. - - An entry must have exactly one of the allowed - :class:`google.cloud.bigquery.enums.EntityTypes`. If anything but ``view``, ``routine``, - or ``dataset`` are set, a ``role`` is also required. ``role`` is omitted for ``view``, - ``routine``, ``dataset``, because they are always read-only. - - See https://cloud.google.com/bigquery/docs/reference/rest/v2/datasets. - - Args: - role (str): - Role granted to the entity. The following string values are - supported: `'READER'`, `'WRITER'`, `'OWNER'`. It may also be - :data:`None` if the ``entity_type`` is ``view``, ``routine``, or ``dataset``. - - entity_type (str): - Type of entity being granted the role. See - :class:`google.cloud.bigquery.enums.EntityTypes` for supported types. - - entity_id (Union[str, Dict[str, str]]): - If the ``entity_type`` is not 'view', 'routine', or 'dataset', the - ``entity_id`` is the ``str`` ID of the entity being granted the role. If - the ``entity_type`` is 'view' or 'routine', the ``entity_id`` is a ``dict`` - representing the view or routine from a different dataset to grant access - to in the following format for views:: - - { - 'projectId': string, - 'datasetId': string, - 'tableId': string - } - - For routines:: - - { - 'projectId': string, - 'datasetId': string, - 'routineId': string - } - - If the ``entity_type`` is 'dataset', the ``entity_id`` is a ``dict`` that includes - a 'dataset' field with a ``dict`` representing the dataset and a 'target_types' - field with a ``str`` value of the dataset's resource type:: - - { - 'dataset': { - 'projectId': string, - 'datasetId': string, - }, - 'target_types: 'VIEWS' - } - - Raises: - ValueError: - If a ``view``, ``routine``, or ``dataset`` has ``role`` set, or a non ``view``, - non ``routine``, and non ``dataset`` **does not** have a ``role`` set. - - Examples: - >>> entry = AccessEntry('OWNER', 'userByEmail', 'user@example.com') - - >>> view = { - ... 'projectId': 'my-project', - ... 'datasetId': 'my_dataset', - ... 'tableId': 'my_table' - ... } - >>> entry = AccessEntry(None, 'view', view) - """ - - def __init__(self, role=None, entity_type=None, entity_id=None) -> None: - self._properties: Dict[str, Any] = {} - if entity_type in ("view", "routine", "dataset"): - if role is not None: - raise ValueError( - "Role must be None for a %r. Received " - "role: %r" % (entity_type, role) - ) - else: - if role is None: - raise ValueError( - "Role must be set for entity " "type %r" % (entity_type,) - ) - self._role = role - self._entity_type = entity_type - self._entity_id = entity_id - - @property - def role(self): - """str: The role of the entry.""" - return self._role - - @property - def entity_type(self): - """str: The entity_type of the entry.""" - return self._entity_type - - @property - def entity_id(self): - """str: The entity_id of the entry.""" - return self._entity_id - - def __eq__(self, other): - if not isinstance(other, AccessEntry): - return NotImplemented - return self._key() == other._key() - - def __ne__(self, other): - return not self == other - - def __repr__(self): - return "" % ( - self._role, - self._entity_type, - self._entity_id, - ) - - def _key(self): - """A tuple key that uniquely describes this field. - Used to compute this instance's hashcode and evaluate equality. - Returns: - Tuple: The contents of this :class:`~google.cloud.bigquery.dataset.AccessEntry`. - """ - return (self._role, self._entity_type, self._entity_id) - - def __hash__(self): - return hash(self._key()) - - def to_api_repr(self): - """Construct the API resource representation of this access entry - - Returns: - Dict[str, object]: Access entry represented as an API resource - """ - resource = copy.deepcopy(self._properties) - resource[self._entity_type] = self._entity_id - if self._role is not None: - resource["role"] = self._role - return resource - - @classmethod - def from_api_repr(cls, resource: dict) -> "AccessEntry": - """Factory: construct an access entry given its API representation - - Args: - resource (Dict[str, object]): - Access entry resource representation returned from the API - - Returns: - google.cloud.bigquery.dataset.AccessEntry: - Access entry parsed from ``resource``. - - Raises: - ValueError: - If the resource has more keys than ``role`` and one additional - key. - """ - entry = resource.copy() - role = entry.pop("role", None) - entity_type, entity_id = entry.popitem() - if len(entry) != 0: - raise ValueError("Entry has unexpected keys remaining.", entry) - - config = cls(role, entity_type, entity_id) - config._properties = copy.deepcopy(resource) - return config - - class DatasetReference(object): """DatasetReferences are pointers to datasets. @@ -383,6 +219,291 @@ def __repr__(self): return "DatasetReference{}".format(self._key()) +class AccessEntry(object): + """Represents grant of an access role to an entity. + + An entry must have exactly one of the allowed + :class:`google.cloud.bigquery.enums.EntityTypes`. If anything but ``view``, ``routine``, + or ``dataset`` are set, a ``role`` is also required. ``role`` is omitted for ``view``, + ``routine``, ``dataset``, because they are always read-only. + + See https://cloud.google.com/bigquery/docs/reference/rest/v2/datasets. + + Args: + role: + Role granted to the entity. The following string values are + supported: `'READER'`, `'WRITER'`, `'OWNER'`. It may also be + :data:`None` if the ``entity_type`` is ``view``, ``routine``, or ``dataset``. + + entity_type: + Type of entity being granted the role. See + :class:`google.cloud.bigquery.enums.EntityTypes` for supported types. + + entity_id: + If the ``entity_type`` is not 'view', 'routine', or 'dataset', the + ``entity_id`` is the ``str`` ID of the entity being granted the role. If + the ``entity_type`` is 'view' or 'routine', the ``entity_id`` is a ``dict`` + representing the view or routine from a different dataset to grant access + to in the following format for views:: + + { + 'projectId': string, + 'datasetId': string, + 'tableId': string + } + + For routines:: + + { + 'projectId': string, + 'datasetId': string, + 'routineId': string + } + + If the ``entity_type`` is 'dataset', the ``entity_id`` is a ``dict`` that includes + a 'dataset' field with a ``dict`` representing the dataset and a 'target_types' + field with a ``str`` value of the dataset's resource type:: + + { + 'dataset': { + 'projectId': string, + 'datasetId': string, + }, + 'target_types: 'VIEWS' + } + + Raises: + ValueError: + If a ``view``, ``routine``, or ``dataset`` has ``role`` set, or a non ``view``, + non ``routine``, and non ``dataset`` **does not** have a ``role`` set. + + Examples: + >>> entry = AccessEntry('OWNER', 'userByEmail', 'user@example.com') + + >>> view = { + ... 'projectId': 'my-project', + ... 'datasetId': 'my_dataset', + ... 'tableId': 'my_table' + ... } + >>> entry = AccessEntry(None, 'view', view) + """ + + def __init__( + self, + role: Optional[str] = None, + entity_type: Optional[str] = None, + entity_id: Optional[Union[Dict[str, Any], str]] = None, + ): + self._properties = {} + if entity_type is not None: + self._properties[entity_type] = entity_id + self._properties["role"] = role + self._entity_type = entity_type + + @property + def role(self) -> Optional[str]: + """The role of the entry.""" + return typing.cast(Optional[str], self._properties.get("role")) + + @role.setter + def role(self, value): + self._properties["role"] = value + + @property + def dataset(self) -> Optional[DatasetReference]: + """API resource representation of a dataset reference.""" + value = _helpers._get_sub_prop(self._properties, ["dataset", "dataset"]) + return DatasetReference.from_api_repr(value) if value else None + + @dataset.setter + def dataset(self, value): + if self.role is not None: + raise ValueError( + "Role must be None for a dataset. Current " "role: %r" % (self.role) + ) + + if isinstance(value, str): + value = DatasetReference.from_string(value).to_api_repr() + + if isinstance(value, (Dataset, DatasetListItem)): + value = value.reference.to_api_repr() + + _helpers._set_sub_prop(self._properties, ["dataset", "dataset"], value) + _helpers._set_sub_prop( + self._properties, + ["dataset", "targetTypes"], + self._properties.get("targetTypes"), + ) + + @property + def dataset_target_types(self) -> Optional[List[str]]: + """Which resources that the dataset in this entry applies to.""" + return typing.cast( + Optional[List[str]], + _helpers._get_sub_prop(self._properties, ["dataset", "targetTypes"]), + ) + + @dataset_target_types.setter + def dataset_target_types(self, value): + self._properties.setdefault("dataset", {}) + _helpers._set_sub_prop(self._properties, ["dataset", "targetTypes"], value) + + @property + def routine(self) -> Optional[RoutineReference]: + """API resource representation of a routine reference.""" + value = typing.cast(Optional[Dict], self._properties.get("routine")) + return RoutineReference.from_api_repr(value) if value else None + + @routine.setter + def routine(self, value): + if self.role is not None: + raise ValueError( + "Role must be None for a routine. Current " "role: %r" % (self.role) + ) + + if isinstance(value, str): + value = RoutineReference.from_string(value).to_api_repr() + + if isinstance(value, RoutineReference): + value = value.to_api_repr() + + if isinstance(value, Routine): + value = value.reference.to_api_repr() + + self._properties["routine"] = value + + @property + def view(self) -> Optional[TableReference]: + """API resource representation of a view reference.""" + value = typing.cast(Optional[Dict], self._properties.get("view")) + return TableReference.from_api_repr(value) if value else None + + @view.setter + def view(self, value): + if self.role is not None: + raise ValueError( + "Role must be None for a view. Current " "role: %r" % (self.role) + ) + + if isinstance(value, str): + value = TableReference.from_string(value).to_api_repr() + + if isinstance(value, TableReference): + value = value.to_api_repr() + + if isinstance(value, Table): + value = value.reference.to_api_repr() + + self._properties["view"] = value + + @property + def group_by_email(self) -> Optional[str]: + """An email address of a Google Group to grant access to.""" + return typing.cast(Optional[str], self._properties.get("groupByEmail")) + + @group_by_email.setter + def group_by_email(self, value): + self._properties["groupByEmail"] = value + + @property + def user_by_email(self) -> Optional[str]: + """An email address of a user to grant access to.""" + return typing.cast(Optional[str], self._properties.get("userByEmail")) + + @user_by_email.setter + def user_by_email(self, value): + self._properties["userByEmail"] = value + + @property + def domain(self) -> Optional[str]: + """A domain to grant access to.""" + return typing.cast(Optional[str], self._properties.get("domain")) + + @domain.setter + def domain(self, value): + self._properties["domain"] = value + + @property + def special_group(self) -> Optional[str]: + """A special group to grant access to.""" + return typing.cast(Optional[str], self._properties.get("specialGroup")) + + @special_group.setter + def special_group(self, value): + self._properties["specialGroup"] = value + + @property + def entity_type(self) -> Optional[str]: + """The entity_type of the entry.""" + return self._entity_type + + @property + def entity_id(self) -> Optional[Union[Dict[str, Any], str]]: + """The entity_id of the entry.""" + return self._properties.get(self._entity_type) if self._entity_type else None + + def __eq__(self, other): + if not isinstance(other, AccessEntry): + return NotImplemented + return self._key() == other._key() + + def __ne__(self, other): + return not self == other + + def __repr__(self): + + return f"" + + def _key(self): + """A tuple key that uniquely describes this field. + Used to compute this instance's hashcode and evaluate equality. + Returns: + Tuple: The contents of this :class:`~google.cloud.bigquery.dataset.AccessEntry`. + """ + properties = self._properties.copy() + prop_tup = tuple(sorted(properties.items())) + return (self.role, self._entity_type, self.entity_id, prop_tup) + + def __hash__(self): + return hash(self._key()) + + def to_api_repr(self): + """Construct the API resource representation of this access entry + + Returns: + Dict[str, object]: Access entry represented as an API resource + """ + resource = copy.deepcopy(self._properties) + return resource + + @classmethod + def from_api_repr(cls, resource: dict) -> "AccessEntry": + """Factory: construct an access entry given its API representation + + Args: + resource (Dict[str, object]): + Access entry resource representation returned from the API + + Returns: + google.cloud.bigquery.dataset.AccessEntry: + Access entry parsed from ``resource``. + + Raises: + ValueError: + If the resource has more keys than ``role`` and one additional + key. + """ + entry = resource.copy() + role = entry.pop("role", None) + entity_type, entity_id = entry.popitem() + if len(entry) != 0: + raise ValueError("Entry has unexpected keys remaining.", entry) + + config = cls(role, entity_type, entity_id) + config._properties = copy.deepcopy(resource) + return config + + class Dataset(object): """Datasets are containers for tables. diff --git a/packages/google-cloud-bigquery/tests/unit/test_create_dataset.py b/packages/google-cloud-bigquery/tests/unit/test_create_dataset.py index 67b21225d6e9..81af52261d2c 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_create_dataset.py +++ b/packages/google-cloud-bigquery/tests/unit/test_create_dataset.py @@ -109,7 +109,10 @@ def test_create_dataset_w_attrs(client, PROJECT, DS_ID): "friendlyName": FRIENDLY_NAME, "location": LOCATION, "defaultTableExpirationMs": "3600", - "access": [{"role": "OWNER", "userByEmail": USER_EMAIL}, {"view": VIEW}], + "access": [ + {"role": "OWNER", "userByEmail": USER_EMAIL}, + {"view": VIEW, "role": None}, + ], "labels": LABELS, }, timeout=DEFAULT_TIMEOUT, diff --git a/packages/google-cloud-bigquery/tests/unit/test_dataset.py b/packages/google-cloud-bigquery/tests/unit/test_dataset.py index c554782bfa96..856674dafa2c 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_dataset.py +++ b/packages/google-cloud-bigquery/tests/unit/test_dataset.py @@ -15,14 +15,20 @@ import unittest import mock +from google.cloud.bigquery.routine.routine import Routine, RoutineReference import pytest +from google.cloud.bigquery.dataset import ( + AccessEntry, + Dataset, + DatasetReference, + Table, + TableReference, +) class TestAccessEntry(unittest.TestCase): @staticmethod def _get_target_class(): - from google.cloud.bigquery.dataset import AccessEntry - return AccessEntry def _make_one(self, *args, **kw): @@ -34,16 +40,6 @@ def test_ctor_defaults(self): self.assertEqual(entry.entity_type, "userByEmail") self.assertEqual(entry.entity_id, "phred@example.com") - def test_ctor_bad_entity_type(self): - with self.assertRaises(ValueError): - self._make_one(None, "unknown", None) - - def test_ctor_view_with_role(self): - role = "READER" - entity_type = "view" - with self.assertRaises(ValueError): - self._make_one(role, entity_type, None) - def test_ctor_view_success(self): role = None entity_type = "view" @@ -53,12 +49,6 @@ def test_ctor_view_success(self): self.assertEqual(entry.entity_type, entity_type) self.assertEqual(entry.entity_id, entity_id) - def test_ctor_routine_with_role(self): - role = "READER" - entity_type = "routine" - with self.assertRaises(ValueError): - self._make_one(role, entity_type, None) - def test_ctor_routine_success(self): role = None entity_type = "routine" @@ -68,12 +58,6 @@ def test_ctor_routine_success(self): self.assertEqual(entry.entity_type, entity_type) self.assertEqual(entry.entity_id, entity_id) - def test_ctor_nonview_without_role(self): - role = None - entity_type = "userByEmail" - with self.assertRaises(ValueError): - self._make_one(role, entity_type, None) - def test___eq___role_mismatch(self): entry = self._make_one("OWNER", "userByEmail", "phred@example.com") other = self._make_one("WRITER", "userByEmail", "phred@example.com") @@ -127,7 +111,7 @@ def test_to_api_repr_view(self): } entry = self._make_one(None, "view", view) resource = entry.to_api_repr() - exp_resource = {"view": view} + exp_resource = {"view": view, "role": None} self.assertEqual(resource, exp_resource) def test_to_api_repr_routine(self): @@ -136,9 +120,10 @@ def test_to_api_repr_routine(self): "datasetId": "my_dataset", "routineId": "my_routine", } + entry = self._make_one(None, "routine", routine) resource = entry.to_api_repr() - exp_resource = {"routine": routine} + exp_resource = {"routine": routine, "role": None} self.assertEqual(resource, exp_resource) def test_to_api_repr_dataset(self): @@ -148,21 +133,9 @@ def test_to_api_repr_dataset(self): } entry = self._make_one(None, "dataset", dataset) resource = entry.to_api_repr() - exp_resource = {"dataset": dataset} + exp_resource = {"dataset": dataset, "role": None} self.assertEqual(resource, exp_resource) - def test_to_api_w_incorrect_role(self): - dataset = { - "dataset": { - "projectId": "my-project", - "datasetId": "my_dataset", - "tableId": "my_table", - }, - "target_type": "VIEW", - } - with self.assertRaises(ValueError): - self._make_one("READER", "dataset", dataset) - def test_from_api_repr(self): resource = {"role": "OWNER", "userByEmail": "salmon@example.com"} entry = self._get_target_class().from_api_repr(resource) @@ -198,6 +171,311 @@ def test_from_api_repr_entries_w_extra_keys(self): with self.assertRaises(ValueError): self._get_target_class().from_api_repr(resource) + def test_view_getter_setter(self): + view = { + "projectId": "my_project", + "datasetId": "my_dataset", + "tableId": "my_table", + } + view_ref = TableReference.from_api_repr(view) + entry = self._make_one(None) + entry.view = view + resource = entry.to_api_repr() + exp_resource = {"view": view, "role": None} + self.assertEqual(entry.view, view_ref) + self.assertEqual(resource, exp_resource) + + def test_view_getter_setter_none(self): + entry = self._make_one(None) + self.assertEqual(entry.view, None) + + def test_view_getter_setter_string(self): + project = "my_project" + dataset = "my_dataset" + table = "my_table" + view = { + "projectId": project, + "datasetId": dataset, + "tableId": table, + } + entry = self._make_one(None) + entry.view = f"{project}.{dataset}.{table}" + resource = entry.to_api_repr() + exp_resource = {"view": view, "role": None} + self.assertEqual(resource, exp_resource) + + def test_view_getter_setter_table(self): + project = "my_project" + dataset = "my_dataset" + table = "my_table" + view = { + "projectId": project, + "datasetId": dataset, + "tableId": table, + } + view_ref = Table.from_string(f"{project}.{dataset}.{table}") + entry = self._make_one(None) + entry.view = view_ref + resource = entry.to_api_repr() + exp_resource = {"view": view, "role": None} + self.assertEqual(resource, exp_resource) + + def test_view_getter_setter_table_ref(self): + project = "my_project" + dataset = "my_dataset" + table = "my_table" + view = { + "projectId": project, + "datasetId": dataset, + "tableId": table, + } + view_ref = TableReference.from_string(f"{project}.{dataset}.{table}") + entry = self._make_one(None) + entry.view = view_ref + resource = entry.to_api_repr() + exp_resource = {"view": view, "role": None} + self.assertEqual(resource, exp_resource) + + def test_view_getter_setter_incorrect_role(self): + view = { + "projectId": "my_project", + "datasetId": "my_dataset", + "tableId": "my_table", + } + view_ref = TableReference.from_api_repr(view) + entry = self._make_one("READER") + with self.assertRaises(ValueError): + entry.view = view_ref + + def test_dataset_getter_setter(self): + dataset = {"projectId": "my-project", "datasetId": "my_dataset"} + entry = self._make_one(None) + entry.dataset = dataset + resource = entry.to_api_repr() + exp_resource = { + "dataset": {"dataset": dataset, "targetTypes": None}, + "role": None, + } + dataset_ref = DatasetReference.from_api_repr(dataset) + prop = entry.dataset + self.assertEqual(resource, exp_resource) + self.assertEqual(prop, dataset_ref) + + def test_dataset_getter_setter_none(self): + entry = self._make_one(None) + self.assertEqual(entry.dataset, None) + + def test_dataset_getter_setter_string(self): + project = "my-project" + dataset_id = "my_dataset" + dataset = { + "projectId": project, + "datasetId": dataset_id, + } + entry = self._make_one(None) + string_ref = f"{project}.{dataset_id}" + entry.dataset = string_ref + resource = entry.to_api_repr() + exp_resource = { + "dataset": {"dataset": dataset, "targetTypes": None}, + "role": None, + } + self.assertEqual(resource, exp_resource) + + def test_dataset_getter_setter_dataset_ref(self): + project = "my-project" + dataset_id = "my_dataset" + dataset_ref = DatasetReference(project, dataset_id) + entry = self._make_one(None) + entry.dataset = dataset_ref + resource = entry.to_api_repr() + exp_resource = { + "dataset": {"dataset": dataset_ref, "targetTypes": None}, + "role": None, + } + self.assertEqual(resource, exp_resource) + + def test_dataset_getter_setter_dataset(self): + project = "my-project" + dataset_id = "my_dataset" + dataset_repr = { + "projectId": project, + "datasetId": dataset_id, + } + dataset = Dataset(f"{project}.{dataset_id}") + entry = self._make_one(None) + entry.dataset = dataset + resource = entry.to_api_repr() + exp_resource = { + "role": None, + "dataset": {"dataset": dataset_repr, "targetTypes": None}, + } + self.assertEqual(resource, exp_resource) + + def test_dataset_getter_setter_incorrect_role(self): + dataset = {"dataset": {"projectId": "my-project", "datasetId": "my_dataset"}} + entry = self._make_one("READER") + with self.assertRaises(ValueError): + entry.dataset = dataset + + def test_routine_getter_setter(self): + routine = { + "projectId": "my-project", + "datasetId": "my_dataset", + "routineId": "my_routine", + } + entry = self._make_one(None) + entry.routine = routine + resource = entry.to_api_repr() + exp_resource = {"routine": routine, "role": None} + self.assertEqual(resource, exp_resource) + + def test_routine_getter_setter_none(self): + entry = self._make_one(None) + self.assertEqual(entry.routine, None) + + def test_routine_getter_setter_string(self): + project = "my-project" + dataset_id = "my_dataset" + routine_id = "my_routine" + routine = { + "projectId": project, + "datasetId": dataset_id, + "routineId": routine_id, + } + entry = self._make_one(None) + entry.routine = f"{project}.{dataset_id}.{routine_id}" + resource = entry.to_api_repr() + exp_resource = { + "routine": routine, + "role": None, + } + self.assertEqual(resource, exp_resource) + + def test_routine_getter_setter_routine_ref(self): + routine = { + "projectId": "my-project", + "datasetId": "my_dataset", + "routineId": "my_routine", + } + entry = self._make_one(None) + entry.routine = RoutineReference.from_api_repr(routine) + resource = entry.to_api_repr() + exp_resource = { + "routine": routine, + "role": None, + } + self.assertEqual(resource, exp_resource) + + def test_routine_getter_setter_routine(self): + routine = { + "projectId": "my-project", + "datasetId": "my_dataset", + "routineId": "my_routine", + } + routine_ref = RoutineReference.from_api_repr(routine) + entry = self._make_one(None) + entry.routine = Routine(routine_ref) + resource = entry.to_api_repr() + exp_resource = { + "routine": routine, + "role": None, + } + self.assertEqual(entry.routine, routine_ref) + self.assertEqual(resource, exp_resource) + + def test_routine_getter_setter_incorrect_role(self): + routine = { + "projectId": "my-project", + "datasetId": "my_dataset", + "routineId": "my_routine", + } + entry = self._make_one("READER") + with self.assertRaises(ValueError): + entry.routine = routine + + def test_group_by_email_getter_setter(self): + email = "cloud-developer-relations@google.com" + entry = self._make_one(None) + entry.group_by_email = email + resource = entry.to_api_repr() + exp_resource = {"groupByEmail": email, "role": None} + self.assertEqual(entry.group_by_email, email) + self.assertEqual(resource, exp_resource) + + def test_group_by_email_getter_setter_none(self): + entry = self._make_one(None) + self.assertEqual(entry.group_by_email, None) + + def test_user_by_email_getter_setter(self): + email = "cloud-developer-relations@google.com" + entry = self._make_one(None) + entry.user_by_email = email + resource = entry.to_api_repr() + exp_resource = {"userByEmail": email, "role": None} + self.assertEqual(entry.user_by_email, email) + self.assertEqual(resource, exp_resource) + + def test_user_by_email_getter_setter_none(self): + entry = self._make_one(None) + self.assertEqual(entry.user_by_email, None) + + def test_domain_setter(self): + domain = "my_domain" + entry = self._make_one(None) + entry.domain = domain + resource = entry.to_api_repr() + exp_resource = {"domain": domain, "role": None} + self.assertEqual(entry.domain, domain) + self.assertEqual(resource, exp_resource) + + def test_domain_getter_setter_none(self): + entry = self._make_one(None) + self.assertEqual(entry.domain, None) + + def test_special_group_getter_setter(self): + special_group = "my_special_group" + entry = self._make_one(None) + entry.special_group = special_group + resource = entry.to_api_repr() + exp_resource = {"specialGroup": special_group, "role": None} + self.assertEqual(entry.special_group, special_group) + self.assertEqual(resource, exp_resource) + + def test_special_group_getter_setter_none(self): + entry = self._make_one(None) + self.assertEqual(entry.special_group, None) + + def test_role_getter_setter(self): + role = "READER" + entry = self._make_one(None) + entry.role = role + resource = entry.to_api_repr() + exp_resource = {"role": role} + self.assertEqual(resource, exp_resource) + + def test_role_getter_setter_none(self): + entry = self._make_one(None) + self.assertEqual(entry.role, None) + + def test_dataset_target_types_getter_setter(self): + target_types = ["VIEWS"] + entry = self._make_one(None) + entry.dataset_target_types = target_types + self.assertEqual(entry.dataset_target_types, target_types) + + def test_dataset_target_types_getter_setter_none(self): + entry = self._make_one(None) + self.assertEqual(entry.dataset_target_types, None) + + def test_dataset_target_types_getter_setter_w_dataset(self): + dataset = {"projectId": "my-project", "datasetId": "my_dataset"} + target_types = ["VIEWS"] + entry = self._make_one(None) + entry.dataset = dataset + entry.dataset_target_types = target_types + self.assertEqual(entry.dataset_target_types, target_types) + class TestDatasetReference(unittest.TestCase): @staticmethod From 2fd9bf6b8b9d6185a1ecf89cd143e5a7a351515b Mon Sep 17 00:00:00 2001 From: WhiteSource Renovate Date: Mon, 11 Apr 2022 02:00:42 +0200 Subject: [PATCH 1426/2016] chore(deps): update dependency pyparsing to v3.0.8 (#1210) --- .../google-cloud-bigquery/samples/geography/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/samples/geography/requirements.txt b/packages/google-cloud-bigquery/samples/geography/requirements.txt index 03c7899e1bb0..6d31583db09e 100644 --- a/packages/google-cloud-bigquery/samples/geography/requirements.txt +++ b/packages/google-cloud-bigquery/samples/geography/requirements.txt @@ -32,7 +32,7 @@ pyarrow==7.0.0 pyasn1==0.4.8 pyasn1-modules==0.2.8 pycparser==2.21 -pyparsing==3.0.7 +pyparsing==3.0.8 python-dateutil==2.8.2 pytz==2022.1 PyYAML==6.0 From 4d0cbb4a02b49e1252ad8eb4232b412077c9f93f Mon Sep 17 00:00:00 2001 From: WhiteSource Renovate Date: Mon, 11 Apr 2022 19:40:56 +0200 Subject: [PATCH 1427/2016] chore(deps): update dependency google-cloud-core to v2.3.0 (#1211) --- .../google-cloud-bigquery/samples/geography/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/samples/geography/requirements.txt b/packages/google-cloud-bigquery/samples/geography/requirements.txt index 6d31583db09e..727c0669f327 100644 --- a/packages/google-cloud-bigquery/samples/geography/requirements.txt +++ b/packages/google-cloud-bigquery/samples/geography/requirements.txt @@ -14,7 +14,7 @@ google-api-core==2.7.1 google-auth==2.6.3 google-cloud-bigquery==3.0.1 google-cloud-bigquery-storage==2.13.1 -google-cloud-core==2.2.3 +google-cloud-core==2.3.0 google-crc32c==1.3.0 google-resumable-media==2.3.2 googleapis-common-protos==1.56.0 From d237b2b6cd9ab937d3a003090542f0866143d93d Mon Sep 17 00:00:00 2001 From: WhiteSource Renovate Date: Wed, 13 Apr 2022 00:51:09 +0200 Subject: [PATCH 1428/2016] chore(deps): update dependency google-auth to v2.6.4 (#1214) --- .../google-cloud-bigquery/samples/geography/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/samples/geography/requirements.txt b/packages/google-cloud-bigquery/samples/geography/requirements.txt index 727c0669f327..b62b1deb028b 100644 --- a/packages/google-cloud-bigquery/samples/geography/requirements.txt +++ b/packages/google-cloud-bigquery/samples/geography/requirements.txt @@ -11,7 +11,7 @@ Fiona==1.8.21 geojson==2.5.0 geopandas==0.10.2 google-api-core==2.7.1 -google-auth==2.6.3 +google-auth==2.6.4 google-cloud-bigquery==3.0.1 google-cloud-bigquery-storage==2.13.1 google-cloud-core==2.3.0 From 2562b77c04a64524f1eb85ed0242b903e9b09023 Mon Sep 17 00:00:00 2001 From: WhiteSource Renovate Date: Thu, 14 Apr 2022 01:57:03 +0200 Subject: [PATCH 1429/2016] chore(deps): update dependency google-api-core to v2.7.2 (#1215) --- .../google-cloud-bigquery/samples/geography/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/samples/geography/requirements.txt b/packages/google-cloud-bigquery/samples/geography/requirements.txt index b62b1deb028b..8bef04f83448 100644 --- a/packages/google-cloud-bigquery/samples/geography/requirements.txt +++ b/packages/google-cloud-bigquery/samples/geography/requirements.txt @@ -10,7 +10,7 @@ db-dtypes==1.0.0 Fiona==1.8.21 geojson==2.5.0 geopandas==0.10.2 -google-api-core==2.7.1 +google-api-core==2.7.2 google-auth==2.6.4 google-cloud-bigquery==3.0.1 google-cloud-bigquery-storage==2.13.1 From 84c9443b8b24bc135bc0fbda08740e6def64ed88 Mon Sep 17 00:00:00 2001 From: john li Date: Thu, 14 Apr 2022 13:07:17 -0400 Subject: [PATCH 1430/2016] fix: export bigquery.HivePartitioningOptions (#1217) --- .../google-cloud-bigquery/google/cloud/bigquery/__init__.py | 2 ++ .../samples/snippets/create_table_external_hive_partitioned.py | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/__init__.py b/packages/google-cloud-bigquery/google/cloud/bigquery/__init__.py index 1ac04d50c356..81b1285e3fe5 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/__init__.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/__init__.py @@ -49,6 +49,7 @@ from google.cloud.bigquery.external_config import CSVOptions from google.cloud.bigquery.external_config import GoogleSheetsOptions from google.cloud.bigquery.external_config import ExternalSourceFormat +from google.cloud.bigquery.external_config import HivePartitioningOptions from google.cloud.bigquery.format_options import AvroOptions from google.cloud.bigquery.format_options import ParquetOptions from google.cloud.bigquery.job.base import SessionInfo @@ -161,6 +162,7 @@ "DmlStats", "CSVOptions", "GoogleSheetsOptions", + "HivePartitioningOptions", "ParquetOptions", "ScriptOptions", "TransactionInfo", diff --git a/packages/google-cloud-bigquery/samples/snippets/create_table_external_hive_partitioned.py b/packages/google-cloud-bigquery/samples/snippets/create_table_external_hive_partitioned.py index 1170c57da7ae..aecf8ca4c130 100644 --- a/packages/google-cloud-bigquery/samples/snippets/create_table_external_hive_partitioned.py +++ b/packages/google-cloud-bigquery/samples/snippets/create_table_external_hive_partitioned.py @@ -50,7 +50,7 @@ def create_table_external_hive_partitioned(table_id: str) -> "bigquery.Table": external_config.autodetect = True # Configure partitioning options. - hive_partitioning_opts = bigquery.external_config.HivePartitioningOptions() + hive_partitioning_opts = bigquery.HivePartitioningOptions() # The layout of the files in here is compatible with the layout requirements for hive partitioning, # so we can add an optional Hive partitioning configuration to leverage the object paths for deriving From 385214901996c762f445d7d2b579ace549674cae Mon Sep 17 00:00:00 2001 From: WhiteSource Renovate Date: Fri, 15 Apr 2022 02:44:43 +0200 Subject: [PATCH 1431/2016] chore(deps): update dependency google-auth to v2.6.5 (#1219) --- .../google-cloud-bigquery/samples/geography/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/samples/geography/requirements.txt b/packages/google-cloud-bigquery/samples/geography/requirements.txt index 8bef04f83448..eac73597fa12 100644 --- a/packages/google-cloud-bigquery/samples/geography/requirements.txt +++ b/packages/google-cloud-bigquery/samples/geography/requirements.txt @@ -11,7 +11,7 @@ Fiona==1.8.21 geojson==2.5.0 geopandas==0.10.2 google-api-core==2.7.2 -google-auth==2.6.4 +google-auth==2.6.5 google-cloud-bigquery==3.0.1 google-cloud-bigquery-storage==2.13.1 google-cloud-core==2.3.0 From b8cbb72cb069672fdb4fd89c405c404365c8f16e Mon Sep 17 00:00:00 2001 From: abecerrilsalas <47731219+abecerrilsalas@users.noreply.github.com> Date: Fri, 15 Apr 2022 21:19:58 -0700 Subject: [PATCH 1432/2016] feat: add str method to table (#1199) * feat: add str method to table * feat: add str method to table * added tests * linted Co-authored-by: Steffany Brown <30247553+steffnay@users.noreply.github.com> --- .../google-cloud-bigquery/google/cloud/bigquery/table.py | 3 +++ packages/google-cloud-bigquery/tests/unit/test_table.py | 7 +++++++ 2 files changed, 10 insertions(+) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py index 5a4de6a0159f..621ef2fa8976 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py @@ -1019,6 +1019,9 @@ def _build_resource(self, filter_fields): def __repr__(self): return "Table({})".format(repr(self.reference)) + def __str__(self): + return f"{self.project}.{self.dataset_id}.{self.table_id}" + class TableListItem(_TableBase): """A read-only table resource from a list operation. diff --git a/packages/google-cloud-bigquery/tests/unit/test_table.py b/packages/google-cloud-bigquery/tests/unit/test_table.py index 66bc1d3db794..ba35b22974c6 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_table.py +++ b/packages/google-cloud-bigquery/tests/unit/test_table.py @@ -27,6 +27,8 @@ import google.api_core.exceptions +from google.cloud.bigquery.table import TableReference + from google.cloud import bigquery_storage from google.cloud.bigquery_storage_v1.services.big_query_read.transports import ( grpc as big_query_read_grpc_transport, @@ -1410,6 +1412,11 @@ def test___repr__(self): ) self.assertEqual(repr(table1), expected) + def test___str__(self): + dataset = DatasetReference("project1", "dataset1") + table1 = self._make_one(TableReference(dataset, "table1")) + self.assertEqual(str(table1), "project1.dataset1.table1") + class Test_row_from_mapping(unittest.TestCase, _SchemaBase): From ebf1f37e5584822b0f5fc95b802aff6e6df2a2ec Mon Sep 17 00:00:00 2001 From: WhiteSource Renovate Date: Mon, 18 Apr 2022 23:19:20 +0200 Subject: [PATCH 1433/2016] chore(deps): update dependency typing-extensions to v4.2.0 (#1221) --- .../google-cloud-bigquery/samples/geography/requirements.txt | 2 +- packages/google-cloud-bigquery/samples/magics/requirements.txt | 2 +- .../google-cloud-bigquery/samples/snippets/requirements.txt | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/packages/google-cloud-bigquery/samples/geography/requirements.txt b/packages/google-cloud-bigquery/samples/geography/requirements.txt index eac73597fa12..24de2b62d202 100644 --- a/packages/google-cloud-bigquery/samples/geography/requirements.txt +++ b/packages/google-cloud-bigquery/samples/geography/requirements.txt @@ -40,6 +40,6 @@ requests==2.27.1 rsa==4.8 Shapely==1.8.1.post1 six==1.16.0 -typing-extensions==4.1.1 +typing-extensions==4.2.0 typing-inspect==0.7.1 urllib3==1.26.9 diff --git a/packages/google-cloud-bigquery/samples/magics/requirements.txt b/packages/google-cloud-bigquery/samples/magics/requirements.txt index 99792b0d97d2..6ce9559790e9 100644 --- a/packages/google-cloud-bigquery/samples/magics/requirements.txt +++ b/packages/google-cloud-bigquery/samples/magics/requirements.txt @@ -10,4 +10,4 @@ pandas===1.3.5; python_version == '3.7' pandas==1.4.2; python_version >= '3.8' pyarrow==7.0.0 pytz==2022.1 -typing-extensions==4.1.1 +typing-extensions==4.2.0 diff --git a/packages/google-cloud-bigquery/samples/snippets/requirements.txt b/packages/google-cloud-bigquery/samples/snippets/requirements.txt index 99792b0d97d2..6ce9559790e9 100644 --- a/packages/google-cloud-bigquery/samples/snippets/requirements.txt +++ b/packages/google-cloud-bigquery/samples/snippets/requirements.txt @@ -10,4 +10,4 @@ pandas===1.3.5; python_version == '3.7' pandas==1.4.2; python_version >= '3.8' pyarrow==7.0.0 pytz==2022.1 -typing-extensions==4.1.1 +typing-extensions==4.2.0 From 3dbffd430293e6187a6716140b79f7c637d7b1f5 Mon Sep 17 00:00:00 2001 From: Brett Naul Date: Wed, 20 Apr 2022 16:34:32 -0400 Subject: [PATCH 1434/2016] fix: Skip geography_as_object conversion for REPEATED fields (#1220) Co-authored-by: Tim Swast --- packages/google-cloud-bigquery/google/cloud/bigquery/table.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py index 621ef2fa8976..7b8c6441fa88 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py @@ -1985,7 +1985,7 @@ def to_dataframe( if geography_as_object: for field in self.schema: - if field.field_type.upper() == "GEOGRAPHY": + if field.field_type.upper() == "GEOGRAPHY" and field.mode != "REPEATED": df[field.name] = df[field.name].dropna().apply(_read_wkt) return df From 48760e04a31b8bc36c2598d774d6a84c9041f5d4 Mon Sep 17 00:00:00 2001 From: "gcf-owl-bot[bot]" <78513119+gcf-owl-bot[bot]@users.noreply.github.com> Date: Wed, 20 Apr 2022 21:30:00 -0400 Subject: [PATCH 1435/2016] chore(python): add nox session to sort python imports (#1224) Source-Link: https://github.com/googleapis/synthtool/commit/1b71c10e20de7ed3f97f692f99a0e3399b67049f Post-Processor: gcr.io/cloud-devrel-public-resources/owlbot-python:latest@sha256:00c9d764fd1cd56265f12a5ef4b99a0c9e87cf261018099141e2ca5158890416 Co-authored-by: Owl Bot --- .../.github/.OwlBot.lock.yaml | 4 ++-- .../samples/geography/noxfile.py | 22 +++++++++++++++++++ .../samples/magics/noxfile.py | 22 +++++++++++++++++++ .../samples/snippets/noxfile.py | 22 +++++++++++++++++++ 4 files changed, 68 insertions(+), 2 deletions(-) diff --git a/packages/google-cloud-bigquery/.github/.OwlBot.lock.yaml b/packages/google-cloud-bigquery/.github/.OwlBot.lock.yaml index bc893c979e20..7c454abf76f3 100644 --- a/packages/google-cloud-bigquery/.github/.OwlBot.lock.yaml +++ b/packages/google-cloud-bigquery/.github/.OwlBot.lock.yaml @@ -13,5 +13,5 @@ # limitations under the License. docker: image: gcr.io/cloud-devrel-public-resources/owlbot-python:latest - digest: sha256:8a5d3f6a2e43ed8293f34e06a2f56931d1e88a2694c3bb11b15df4eb256ad163 -# created: 2022-04-06T10:30:21.687684602Z + digest: sha256:00c9d764fd1cd56265f12a5ef4b99a0c9e87cf261018099141e2ca5158890416 +# created: 2022-04-20T23:42:53.970438194Z diff --git a/packages/google-cloud-bigquery/samples/geography/noxfile.py b/packages/google-cloud-bigquery/samples/geography/noxfile.py index 25f87a215d4c..a40410b56369 100644 --- a/packages/google-cloud-bigquery/samples/geography/noxfile.py +++ b/packages/google-cloud-bigquery/samples/geography/noxfile.py @@ -30,6 +30,7 @@ # WARNING - WARNING - WARNING - WARNING - WARNING BLACK_VERSION = "black==22.3.0" +ISORT_VERSION = "isort==5.10.1" # Copy `noxfile_config.py` to your directory and modify it instead. @@ -168,12 +169,33 @@ def lint(session: nox.sessions.Session) -> None: @nox.session def blacken(session: nox.sessions.Session) -> None: + """Run black. Format code to uniform standard.""" session.install(BLACK_VERSION) python_files = [path for path in os.listdir(".") if path.endswith(".py")] session.run("black", *python_files) +# +# format = isort + black +# + + +@nox.session +def format(session: nox.sessions.Session) -> None: + """ + Run isort to sort imports. Then run black + to format code to uniform standard. + """ + session.install(BLACK_VERSION, ISORT_VERSION) + python_files = [path for path in os.listdir(".") if path.endswith(".py")] + + # Use the --fss option to sort imports using strict alphabetical order. + # See https://pycqa.github.io/isort/docs/configuration/options.html#force-sort-within-sections + session.run("isort", "--fss", *python_files) + session.run("black", *python_files) + + # # Sample Tests # diff --git a/packages/google-cloud-bigquery/samples/magics/noxfile.py b/packages/google-cloud-bigquery/samples/magics/noxfile.py index 25f87a215d4c..a40410b56369 100644 --- a/packages/google-cloud-bigquery/samples/magics/noxfile.py +++ b/packages/google-cloud-bigquery/samples/magics/noxfile.py @@ -30,6 +30,7 @@ # WARNING - WARNING - WARNING - WARNING - WARNING BLACK_VERSION = "black==22.3.0" +ISORT_VERSION = "isort==5.10.1" # Copy `noxfile_config.py` to your directory and modify it instead. @@ -168,12 +169,33 @@ def lint(session: nox.sessions.Session) -> None: @nox.session def blacken(session: nox.sessions.Session) -> None: + """Run black. Format code to uniform standard.""" session.install(BLACK_VERSION) python_files = [path for path in os.listdir(".") if path.endswith(".py")] session.run("black", *python_files) +# +# format = isort + black +# + + +@nox.session +def format(session: nox.sessions.Session) -> None: + """ + Run isort to sort imports. Then run black + to format code to uniform standard. + """ + session.install(BLACK_VERSION, ISORT_VERSION) + python_files = [path for path in os.listdir(".") if path.endswith(".py")] + + # Use the --fss option to sort imports using strict alphabetical order. + # See https://pycqa.github.io/isort/docs/configuration/options.html#force-sort-within-sections + session.run("isort", "--fss", *python_files) + session.run("black", *python_files) + + # # Sample Tests # diff --git a/packages/google-cloud-bigquery/samples/snippets/noxfile.py b/packages/google-cloud-bigquery/samples/snippets/noxfile.py index 25f87a215d4c..a40410b56369 100644 --- a/packages/google-cloud-bigquery/samples/snippets/noxfile.py +++ b/packages/google-cloud-bigquery/samples/snippets/noxfile.py @@ -30,6 +30,7 @@ # WARNING - WARNING - WARNING - WARNING - WARNING BLACK_VERSION = "black==22.3.0" +ISORT_VERSION = "isort==5.10.1" # Copy `noxfile_config.py` to your directory and modify it instead. @@ -168,12 +169,33 @@ def lint(session: nox.sessions.Session) -> None: @nox.session def blacken(session: nox.sessions.Session) -> None: + """Run black. Format code to uniform standard.""" session.install(BLACK_VERSION) python_files = [path for path in os.listdir(".") if path.endswith(".py")] session.run("black", *python_files) +# +# format = isort + black +# + + +@nox.session +def format(session: nox.sessions.Session) -> None: + """ + Run isort to sort imports. Then run black + to format code to uniform standard. + """ + session.install(BLACK_VERSION, ISORT_VERSION) + python_files = [path for path in os.listdir(".") if path.endswith(".py")] + + # Use the --fss option to sort imports using strict alphabetical order. + # See https://pycqa.github.io/isort/docs/configuration/options.html#force-sort-within-sections + session.run("isort", "--fss", *python_files) + session.run("black", *python_files) + + # # Sample Tests # From a86e9d2401e73f399f3326d04d7a1ff564d65458 Mon Sep 17 00:00:00 2001 From: "gcf-owl-bot[bot]" <78513119+gcf-owl-bot[bot]@users.noreply.github.com> Date: Thu, 21 Apr 2022 16:20:13 +0000 Subject: [PATCH 1436/2016] chore(python): use ubuntu 22.04 in docs image (#1226) Source-Link: https://github.com/googleapis/synthtool/commit/f15cc72fb401b4861cedebb10af74afe428fb1f8 Post-Processor: gcr.io/cloud-devrel-public-resources/owlbot-python:latest@sha256:bc5eed3804aec2f05fad42aacf973821d9500c174015341f721a984a0825b6fd --- .../.github/.OwlBot.lock.yaml | 4 ++-- .../.kokoro/docker/docs/Dockerfile | 20 +++++++++++++++++-- 2 files changed, 20 insertions(+), 4 deletions(-) diff --git a/packages/google-cloud-bigquery/.github/.OwlBot.lock.yaml b/packages/google-cloud-bigquery/.github/.OwlBot.lock.yaml index 7c454abf76f3..64f82d6bf4bc 100644 --- a/packages/google-cloud-bigquery/.github/.OwlBot.lock.yaml +++ b/packages/google-cloud-bigquery/.github/.OwlBot.lock.yaml @@ -13,5 +13,5 @@ # limitations under the License. docker: image: gcr.io/cloud-devrel-public-resources/owlbot-python:latest - digest: sha256:00c9d764fd1cd56265f12a5ef4b99a0c9e87cf261018099141e2ca5158890416 -# created: 2022-04-20T23:42:53.970438194Z + digest: sha256:bc5eed3804aec2f05fad42aacf973821d9500c174015341f721a984a0825b6fd +# created: 2022-04-21T15:43:16.246106921Z diff --git a/packages/google-cloud-bigquery/.kokoro/docker/docs/Dockerfile b/packages/google-cloud-bigquery/.kokoro/docker/docs/Dockerfile index 4e1b1fb8b5a5..238b87b9d1c9 100644 --- a/packages/google-cloud-bigquery/.kokoro/docker/docs/Dockerfile +++ b/packages/google-cloud-bigquery/.kokoro/docker/docs/Dockerfile @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -from ubuntu:20.04 +from ubuntu:22.04 ENV DEBIAN_FRONTEND noninteractive @@ -60,8 +60,24 @@ RUN apt-get update \ && rm -rf /var/lib/apt/lists/* \ && rm -f /var/cache/apt/archives/*.deb +###################### Install python 3.8.11 + +# Download python 3.8.11 +RUN wget https://www.python.org/ftp/python/3.8.11/Python-3.8.11.tgz + +# Extract files +RUN tar -xvf Python-3.8.11.tgz + +# Install python 3.8.11 +RUN ./Python-3.8.11/configure --enable-optimizations +RUN make altinstall + +###################### Install pip RUN wget -O /tmp/get-pip.py 'https://bootstrap.pypa.io/get-pip.py' \ - && python3.8 /tmp/get-pip.py \ + && python3 /tmp/get-pip.py \ && rm /tmp/get-pip.py +# Test pip +RUN python3 -m pip + CMD ["python3.8"] From 529379f0243204d59ed95d482a5e3e40e9286bb9 Mon Sep 17 00:00:00 2001 From: WhiteSource Renovate Date: Fri, 22 Apr 2022 10:27:58 +0200 Subject: [PATCH 1437/2016] chore(deps): update all dependencies (#1227) --- .../google-cloud-bigquery/samples/geography/requirements.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/packages/google-cloud-bigquery/samples/geography/requirements.txt b/packages/google-cloud-bigquery/samples/geography/requirements.txt index 24de2b62d202..bbba15747379 100644 --- a/packages/google-cloud-bigquery/samples/geography/requirements.txt +++ b/packages/google-cloud-bigquery/samples/geography/requirements.txt @@ -11,7 +11,7 @@ Fiona==1.8.21 geojson==2.5.0 geopandas==0.10.2 google-api-core==2.7.2 -google-auth==2.6.5 +google-auth==2.6.6 google-cloud-bigquery==3.0.1 google-cloud-bigquery-storage==2.13.1 google-cloud-core==2.3.0 @@ -27,7 +27,7 @@ packaging==21.3 pandas===1.3.5; python_version == '3.7' pandas==1.4.2; python_version >= '3.8' proto-plus==1.20.3 -protobuf==3.20.0 +protobuf==3.20.1 pyarrow==7.0.0 pyasn1==0.4.8 pyasn1-modules==0.2.8 From 0df68b0a6f8471b8d69fa57077d4ec41612aa69a Mon Sep 17 00:00:00 2001 From: WhiteSource Renovate Date: Mon, 25 Apr 2022 17:20:18 +0200 Subject: [PATCH 1438/2016] chore(deps): update dependency pytest to v7.1.2 (#1229) --- .../samples/geography/requirements-test.txt | 2 +- .../google-cloud-bigquery/samples/magics/requirements-test.txt | 2 +- .../samples/snippets/requirements-test.txt | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/packages/google-cloud-bigquery/samples/geography/requirements-test.txt b/packages/google-cloud-bigquery/samples/geography/requirements-test.txt index 5e29de931dd5..fb466e5093ef 100644 --- a/packages/google-cloud-bigquery/samples/geography/requirements-test.txt +++ b/packages/google-cloud-bigquery/samples/geography/requirements-test.txt @@ -1,2 +1,2 @@ -pytest==7.1.1 +pytest==7.1.2 mock==4.0.3 diff --git a/packages/google-cloud-bigquery/samples/magics/requirements-test.txt b/packages/google-cloud-bigquery/samples/magics/requirements-test.txt index c5864d4f7318..d771b647d1b1 100644 --- a/packages/google-cloud-bigquery/samples/magics/requirements-test.txt +++ b/packages/google-cloud-bigquery/samples/magics/requirements-test.txt @@ -1,3 +1,3 @@ google-cloud-testutils==1.3.1 -pytest==7.1.1 +pytest==7.1.2 mock==4.0.3 diff --git a/packages/google-cloud-bigquery/samples/snippets/requirements-test.txt b/packages/google-cloud-bigquery/samples/snippets/requirements-test.txt index c5864d4f7318..d771b647d1b1 100644 --- a/packages/google-cloud-bigquery/samples/snippets/requirements-test.txt +++ b/packages/google-cloud-bigquery/samples/snippets/requirements-test.txt @@ -1,3 +1,3 @@ google-cloud-testutils==1.3.1 -pytest==7.1.1 +pytest==7.1.2 mock==4.0.3 From ecf9a0ba7599793ff064d8d0b47025124e1a4352 Mon Sep 17 00:00:00 2001 From: "gcf-owl-bot[bot]" <78513119+gcf-owl-bot[bot]@users.noreply.github.com> Date: Wed, 27 Apr 2022 12:14:09 -0400 Subject: [PATCH 1439/2016] chore: use gapic-generator-python 0.65.2 (#1232) * chore: use gapic-generator-python 0.65.2 PiperOrigin-RevId: 444333013 Source-Link: https://github.com/googleapis/googleapis/commit/f91b6cf82e929280f6562f6110957c654bd9e2e6 Source-Link: https://github.com/googleapis/googleapis-gen/commit/16eb36095c294e712c74a1bf23550817b42174e5 Copy-Tag: eyJwIjoiLmdpdGh1Yi8uT3dsQm90LnlhbWwiLCJoIjoiMTZlYjM2MDk1YzI5NGU3MTJjNzRhMWJmMjM1NTA4MTdiNDIxNzRlNSJ9 * update owlbot.py to pull in changes from gapic generator Co-authored-by: Owl Bot Co-authored-by: Anthonios Partheniou --- .../cloud/bigquery_v2/types/__init__.py | 12 +++++++++--- .../google/cloud/bigquery_v2/types/model.py | 4 ++-- packages/google-cloud-bigquery/owlbot.py | 19 +++++++++++++++++++ 3 files changed, 30 insertions(+), 5 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery_v2/types/__init__.py b/packages/google-cloud-bigquery/google/cloud/bigquery_v2/types/__init__.py index c038bcd74899..c36b30969b08 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery_v2/types/__init__.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery_v2/types/__init__.py @@ -13,7 +13,9 @@ # See the License for the specific language governing permissions and # limitations under the License. # -from .encryption_config import EncryptionConfiguration +from .encryption_config import ( + EncryptionConfiguration, +) from .model import ( DeleteModelRequest, GetModelRequest, @@ -22,14 +24,18 @@ Model, PatchModelRequest, ) -from .model_reference import ModelReference +from .model_reference import ( + ModelReference, +) from .standard_sql import ( StandardSqlDataType, StandardSqlField, StandardSqlStructType, StandardSqlTableType, ) -from .table_reference import TableReference +from .table_reference import ( + TableReference, +) __all__ = ( "EncryptionConfiguration", diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery_v2/types/model.py b/packages/google-cloud-bigquery/google/cloud/bigquery_v2/types/model.py index 7786d8ea4b5a..f32e15eb12b6 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery_v2/types/model.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery_v2/types/model.py @@ -55,7 +55,7 @@ class Model(proto.Message): model. friendly_name (str): Optional. A descriptive name for this model. - labels (Sequence[google.cloud.bigquery_v2.types.Model.LabelsEntry]): + labels (Mapping[str, str]): The labels associated with this model. You can use these to organize and group your models. Label keys and values can be no longer than 63 @@ -1200,7 +1200,7 @@ class TrainingOptions(proto.Message): initial_learn_rate (float): Specifies the initial learning rate for the line search learn rate strategy. - label_class_weights (Sequence[google.cloud.bigquery_v2.types.Model.TrainingRun.TrainingOptions.LabelClassWeightsEntry]): + label_class_weights (Mapping[str, float]): Weights associated with each label class, for rebalancing the training data. Only applicable for classification models. diff --git a/packages/google-cloud-bigquery/owlbot.py b/packages/google-cloud-bigquery/owlbot.py index 04628050189a..ca96f4e08145 100644 --- a/packages/google-cloud-bigquery/owlbot.py +++ b/packages/google-cloud-bigquery/owlbot.py @@ -19,6 +19,25 @@ from synthtool import gcp from synthtool.languages import python +default_version = "v2" + +for library in s.get_staging_dirs(default_version): + # Avoid breaking change due to change in field renames. + # https://github.com/googleapis/python-bigquery/issues/319 + s.replace( + library / f"google/cloud/bigquery_{library.name}/types/standard_sql.py", + r"type_ ", + "type ", + ) + # Patch docs issue + s.replace( + library / f"google/cloud/bigquery_{library.name}/types/model.py", + r"""\"predicted_\"""", + """`predicted_`""", + ) + s.move(library / f"google/cloud/bigquery_{library.name}/types") +s.remove_staging_dirs() + common = gcp.CommonTemplates() # ---------------------------------------------------------------------------- From 93cfc6a18cda6ddc3fb7110a3ff699a584151c10 Mon Sep 17 00:00:00 2001 From: abecerrilsalas <47731219+abecerrilsalas@users.noreply.github.com> Date: Thu, 28 Apr 2022 11:35:40 -0700 Subject: [PATCH 1440/2016] feat: support using BIGQUERY_EMULATOR_HOST environment variable (#1222) * feat:adding emulator * apb_feature_bq_emulator_test * apb_feature_bq_emulator_variable_name_change * abecerrilsalas_feature_bq_emulator_typo_changes * feat:fixing var names and start of test * abecerrilsalas_feature_additional_tests * feat: testing update * feat: fixed failed lint test --- .../google/cloud/bigquery/_helpers.py | 13 +++++++++- .../google/cloud/bigquery/client.py | 5 +++- .../tests/unit/test__helpers.py | 26 +++++++++++++++++++ 3 files changed, 42 insertions(+), 2 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py b/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py index 6faa32606ebd..b59bc86d3496 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py @@ -19,6 +19,7 @@ import decimal import math import re +import os from typing import Optional, Union from dateutil import relativedelta @@ -28,8 +29,8 @@ from google.cloud._helpers import _RFC3339_MICROS from google.cloud._helpers import _RFC3339_NO_FRACTION from google.cloud._helpers import _to_bytes -import packaging.version +import packaging.version _RFC3339_MICROS_NO_ZULU = "%Y-%m-%dT%H:%M:%S.%f" _TIMEONLY_WO_MICROS = "%H:%M:%S" @@ -51,6 +52,16 @@ _BQ_STORAGE_OPTIONAL_READ_SESSION_VERSION = packaging.version.Version("2.6.0") +BIGQUERY_EMULATOR_HOST = "BIGQUERY_EMULATOR_HOST" +"""Environment variable defining host for emulator.""" + +_DEFAULT_HOST = "https://bigquery.googleapis.com" +"""Default host for JSON API.""" + + +def _get_bigquery_host(): + return os.environ.get(BIGQUERY_EMULATOR_HOST, _DEFAULT_HOST) + class BQStorageVersions: """Version comparisons for google-cloud-bigqueyr-storage package.""" diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py index b388f1d4c2d4..fb772ea113e5 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py @@ -56,7 +56,6 @@ import google.cloud._helpers # type: ignore from google.cloud import exceptions # pytype: disable=import-error from google.cloud.client import ClientWithProject # type: ignore # pytype: disable=import-error - from google.cloud.bigquery_storage_v1.services.big_query_read.client import ( DEFAULT_CLIENT_INFO as DEFAULT_BQSTORAGE_CLIENT_INFO, ) @@ -67,6 +66,8 @@ from google.cloud.bigquery._helpers import _record_field_to_json from google.cloud.bigquery._helpers import _str_or_none from google.cloud.bigquery._helpers import _verify_job_config_type +from google.cloud.bigquery._helpers import _get_bigquery_host +from google.cloud.bigquery._helpers import _DEFAULT_HOST from google.cloud.bigquery._http import Connection from google.cloud.bigquery import _pandas_helpers from google.cloud.bigquery.dataset import Dataset @@ -230,6 +231,8 @@ def __init__( ) kw_args = {"client_info": client_info} + bq_host = _get_bigquery_host() + kw_args["api_endpoint"] = bq_host if bq_host != _DEFAULT_HOST else None if client_options: if type(client_options) == dict: client_options = google.api_core.client_options.from_dict( diff --git a/packages/google-cloud-bigquery/tests/unit/test__helpers.py b/packages/google-cloud-bigquery/tests/unit/test__helpers.py index 885e773d3ba3..2e714c707f84 100644 --- a/packages/google-cloud-bigquery/tests/unit/test__helpers.py +++ b/packages/google-cloud-bigquery/tests/unit/test__helpers.py @@ -1288,3 +1288,29 @@ def test_decimal_as_float_api_repr(): "parameterValue": {"value": 42.0}, "name": "x", } + + +class Test__get_bigquery_host(unittest.TestCase): + @staticmethod + def _call_fut(): + from google.cloud.bigquery._helpers import _get_bigquery_host + + return _get_bigquery_host() + + def test_wo_env_var(self): + from google.cloud.bigquery._helpers import _DEFAULT_HOST + + with mock.patch("os.environ", {}): + host = self._call_fut() + + self.assertEqual(host, _DEFAULT_HOST) + + def test_w_env_var(self): + from google.cloud.bigquery._helpers import BIGQUERY_EMULATOR_HOST + + HOST = "https://api.example.com" + + with mock.patch("os.environ", {BIGQUERY_EMULATOR_HOST: HOST}): + host = self._call_fut() + + self.assertEqual(host, HOST) From 2f0a4eef3f988df5397ce211abbf18398a51eaad Mon Sep 17 00:00:00 2001 From: WhiteSource Renovate Date: Thu, 28 Apr 2022 21:21:57 +0200 Subject: [PATCH 1441/2016] chore(deps): update dependency click to v8.1.3 (#1233) --- .../google-cloud-bigquery/samples/geography/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/samples/geography/requirements.txt b/packages/google-cloud-bigquery/samples/geography/requirements.txt index bbba15747379..8ba3c08d94a0 100644 --- a/packages/google-cloud-bigquery/samples/geography/requirements.txt +++ b/packages/google-cloud-bigquery/samples/geography/requirements.txt @@ -2,7 +2,7 @@ attrs==21.4.0 certifi==2021.10.8 cffi==1.15.0 charset-normalizer==2.0.12 -click==8.1.2 +click==8.1.3 click-plugins==1.1.1 cligj==0.7.2 dataclasses==0.8; python_version < '3.7' From 7dc49b58224fa17f532ec07ecfdbde4f96feae38 Mon Sep 17 00:00:00 2001 From: WhiteSource Renovate Date: Fri, 29 Apr 2022 17:31:06 +0200 Subject: [PATCH 1442/2016] chore(deps): update dependency ipython to v8.3.0 (#1234) --- packages/google-cloud-bigquery/samples/magics/requirements.txt | 2 +- .../google-cloud-bigquery/samples/snippets/requirements.txt | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/packages/google-cloud-bigquery/samples/magics/requirements.txt b/packages/google-cloud-bigquery/samples/magics/requirements.txt index 6ce9559790e9..2907b7f21107 100644 --- a/packages/google-cloud-bigquery/samples/magics/requirements.txt +++ b/packages/google-cloud-bigquery/samples/magics/requirements.txt @@ -4,7 +4,7 @@ google-auth-oauthlib==0.5.1 grpcio==1.44.0 ipython===7.31.1; python_version == '3.7' ipython===8.0.1; python_version == '3.8' -ipython==8.2.0; python_version >= '3.9' +ipython==8.3.0; python_version >= '3.9' matplotlib==3.5.1 pandas===1.3.5; python_version == '3.7' pandas==1.4.2; python_version >= '3.8' diff --git a/packages/google-cloud-bigquery/samples/snippets/requirements.txt b/packages/google-cloud-bigquery/samples/snippets/requirements.txt index 6ce9559790e9..2907b7f21107 100644 --- a/packages/google-cloud-bigquery/samples/snippets/requirements.txt +++ b/packages/google-cloud-bigquery/samples/snippets/requirements.txt @@ -4,7 +4,7 @@ google-auth-oauthlib==0.5.1 grpcio==1.44.0 ipython===7.31.1; python_version == '3.7' ipython===8.0.1; python_version == '3.8' -ipython==8.2.0; python_version >= '3.9' +ipython==8.3.0; python_version >= '3.9' matplotlib==3.5.1 pandas===1.3.5; python_version == '3.7' pandas==1.4.2; python_version >= '3.8' From eeca8329e85d3c57d7ba658dea24e1bb04bdadba Mon Sep 17 00:00:00 2001 From: WhiteSource Renovate Date: Mon, 2 May 2022 23:16:20 +0200 Subject: [PATCH 1443/2016] chore(deps): update dependency google-api-core to v2.7.3 (#1236) --- .../google-cloud-bigquery/samples/geography/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/samples/geography/requirements.txt b/packages/google-cloud-bigquery/samples/geography/requirements.txt index 8ba3c08d94a0..94268f7ca47f 100644 --- a/packages/google-cloud-bigquery/samples/geography/requirements.txt +++ b/packages/google-cloud-bigquery/samples/geography/requirements.txt @@ -10,7 +10,7 @@ db-dtypes==1.0.0 Fiona==1.8.21 geojson==2.5.0 geopandas==0.10.2 -google-api-core==2.7.2 +google-api-core==2.7.3 google-auth==2.6.6 google-cloud-bigquery==3.0.1 google-cloud-bigquery-storage==2.13.1 From 52d18a553de4b469bc861d30ed6ba465194db976 Mon Sep 17 00:00:00 2001 From: WhiteSource Renovate Date: Tue, 3 May 2022 16:16:46 +0200 Subject: [PATCH 1444/2016] chore(deps): update dependency matplotlib to v3.5.2 (#1237) --- packages/google-cloud-bigquery/samples/magics/requirements.txt | 2 +- .../google-cloud-bigquery/samples/snippets/requirements.txt | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/packages/google-cloud-bigquery/samples/magics/requirements.txt b/packages/google-cloud-bigquery/samples/magics/requirements.txt index 2907b7f21107..c1ff0715f35e 100644 --- a/packages/google-cloud-bigquery/samples/magics/requirements.txt +++ b/packages/google-cloud-bigquery/samples/magics/requirements.txt @@ -5,7 +5,7 @@ grpcio==1.44.0 ipython===7.31.1; python_version == '3.7' ipython===8.0.1; python_version == '3.8' ipython==8.3.0; python_version >= '3.9' -matplotlib==3.5.1 +matplotlib==3.5.2 pandas===1.3.5; python_version == '3.7' pandas==1.4.2; python_version >= '3.8' pyarrow==7.0.0 diff --git a/packages/google-cloud-bigquery/samples/snippets/requirements.txt b/packages/google-cloud-bigquery/samples/snippets/requirements.txt index 2907b7f21107..c1ff0715f35e 100644 --- a/packages/google-cloud-bigquery/samples/snippets/requirements.txt +++ b/packages/google-cloud-bigquery/samples/snippets/requirements.txt @@ -5,7 +5,7 @@ grpcio==1.44.0 ipython===7.31.1; python_version == '3.7' ipython===8.0.1; python_version == '3.8' ipython==8.3.0; python_version >= '3.9' -matplotlib==3.5.1 +matplotlib==3.5.2 pandas===1.3.5; python_version == '3.7' pandas==1.4.2; python_version >= '3.8' pyarrow==7.0.0 From 88972b44eda018e53886c864a13646e958e55f70 Mon Sep 17 00:00:00 2001 From: WhiteSource Renovate Date: Thu, 5 May 2022 02:43:19 +0200 Subject: [PATCH 1445/2016] chore(deps): update all dependencies (#1238) --- .../samples/geography/requirements.txt | 6 +++--- .../google-cloud-bigquery/samples/magics/requirements.txt | 2 +- .../google-cloud-bigquery/samples/snippets/requirements.txt | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/packages/google-cloud-bigquery/samples/geography/requirements.txt b/packages/google-cloud-bigquery/samples/geography/requirements.txt index 94268f7ca47f..dc2d9e72bcb0 100644 --- a/packages/google-cloud-bigquery/samples/geography/requirements.txt +++ b/packages/google-cloud-bigquery/samples/geography/requirements.txt @@ -18,9 +18,9 @@ google-cloud-core==2.3.0 google-crc32c==1.3.0 google-resumable-media==2.3.2 googleapis-common-protos==1.56.0 -grpcio==1.44.0 +grpcio==1.46.0 idna==3.3 -libcst==0.4.1 +libcst==0.4.2 munch==2.5.0 mypy-extensions==0.4.3 packaging==21.3 @@ -38,7 +38,7 @@ pytz==2022.1 PyYAML==6.0 requests==2.27.1 rsa==4.8 -Shapely==1.8.1.post1 +Shapely==1.8.2 six==1.16.0 typing-extensions==4.2.0 typing-inspect==0.7.1 diff --git a/packages/google-cloud-bigquery/samples/magics/requirements.txt b/packages/google-cloud-bigquery/samples/magics/requirements.txt index c1ff0715f35e..decfc476421c 100644 --- a/packages/google-cloud-bigquery/samples/magics/requirements.txt +++ b/packages/google-cloud-bigquery/samples/magics/requirements.txt @@ -1,7 +1,7 @@ db-dtypes==1.0.0 google-cloud-bigquery-storage==2.13.1 google-auth-oauthlib==0.5.1 -grpcio==1.44.0 +grpcio==1.46.0 ipython===7.31.1; python_version == '3.7' ipython===8.0.1; python_version == '3.8' ipython==8.3.0; python_version >= '3.9' diff --git a/packages/google-cloud-bigquery/samples/snippets/requirements.txt b/packages/google-cloud-bigquery/samples/snippets/requirements.txt index c1ff0715f35e..decfc476421c 100644 --- a/packages/google-cloud-bigquery/samples/snippets/requirements.txt +++ b/packages/google-cloud-bigquery/samples/snippets/requirements.txt @@ -1,7 +1,7 @@ db-dtypes==1.0.0 google-cloud-bigquery-storage==2.13.1 google-auth-oauthlib==0.5.1 -grpcio==1.44.0 +grpcio==1.46.0 ipython===7.31.1; python_version == '3.7' ipython===8.0.1; python_version == '3.8' ipython==8.3.0; python_version >= '3.9' From 071d1320314eb30293deaba9f912f052d7918a0a Mon Sep 17 00:00:00 2001 From: "gcf-owl-bot[bot]" <78513119+gcf-owl-bot[bot]@users.noreply.github.com> Date: Thu, 5 May 2022 12:13:38 -0400 Subject: [PATCH 1446/2016] chore: [autoapprove] update readme_gen.py to include autoescape True (#1240) Source-Link: https://github.com/googleapis/synthtool/commit/6b4d5a6407d740beb4158b302194a62a4108a8a6 Post-Processor: gcr.io/cloud-devrel-public-resources/owlbot-python:latest@sha256:f792ee1320e03eda2d13a5281a2989f7ed8a9e50b73ef6da97fac7e1e850b149 Co-authored-by: Owl Bot --- packages/google-cloud-bigquery/.github/.OwlBot.lock.yaml | 4 ++-- .../google-cloud-bigquery/scripts/readme-gen/readme_gen.py | 5 ++++- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/packages/google-cloud-bigquery/.github/.OwlBot.lock.yaml b/packages/google-cloud-bigquery/.github/.OwlBot.lock.yaml index 64f82d6bf4bc..b631901e99f4 100644 --- a/packages/google-cloud-bigquery/.github/.OwlBot.lock.yaml +++ b/packages/google-cloud-bigquery/.github/.OwlBot.lock.yaml @@ -13,5 +13,5 @@ # limitations under the License. docker: image: gcr.io/cloud-devrel-public-resources/owlbot-python:latest - digest: sha256:bc5eed3804aec2f05fad42aacf973821d9500c174015341f721a984a0825b6fd -# created: 2022-04-21T15:43:16.246106921Z + digest: sha256:f792ee1320e03eda2d13a5281a2989f7ed8a9e50b73ef6da97fac7e1e850b149 +# created: 2022-05-05T15:17:27.599381182Z diff --git a/packages/google-cloud-bigquery/scripts/readme-gen/readme_gen.py b/packages/google-cloud-bigquery/scripts/readme-gen/readme_gen.py index d309d6e97518..91b59676bfc7 100644 --- a/packages/google-cloud-bigquery/scripts/readme-gen/readme_gen.py +++ b/packages/google-cloud-bigquery/scripts/readme-gen/readme_gen.py @@ -28,7 +28,10 @@ jinja_env = jinja2.Environment( trim_blocks=True, loader=jinja2.FileSystemLoader( - os.path.abspath(os.path.join(os.path.dirname(__file__), 'templates')))) + os.path.abspath(os.path.join(os.path.dirname(__file__), "templates")) + ), + autoescape=True, +) README_TMPL = jinja_env.get_template('README.tmpl.rst') From db20600cce40c61b6c999d46a11faad1f5b3027b Mon Sep 17 00:00:00 2001 From: meredithslota Date: Thu, 5 May 2022 12:00:49 -0700 Subject: [PATCH 1447/2016] docs: updated variable typo in comment in code sample (#1239) Fixes b/210752711 --- packages/google-cloud-bigquery/samples/undelete_table.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/samples/undelete_table.py b/packages/google-cloud-bigquery/samples/undelete_table.py index c230a9230f8f..5ae345247e4c 100644 --- a/packages/google-cloud-bigquery/samples/undelete_table.py +++ b/packages/google-cloud-bigquery/samples/undelete_table.py @@ -28,7 +28,7 @@ def undelete_table(table_id: str, recovered_table_id: str) -> None: # table_id = "your-project.your_dataset.your_table" # TODO(developer): Choose a new table ID for the recovered table data. - # recovery_table_id = "your-project.your_dataset.your_table_recovered" + # recovered_table_id = "your-project.your_dataset.your_table_recovered" # TODO(developer): Choose an appropriate snapshot point as epoch # milliseconds. For this example, we choose the current time as we're about From 20d6bb3a9727563ae7b08ed04934ec4bb95e5bdd Mon Sep 17 00:00:00 2001 From: "gcf-owl-bot[bot]" <78513119+gcf-owl-bot[bot]@users.noreply.github.com> Date: Thu, 5 May 2022 23:08:25 +0000 Subject: [PATCH 1448/2016] chore(python): auto approve template changes (#1242) Source-Link: https://github.com/googleapis/synthtool/commit/453a5d9c9a55d1969240a37d36cec626d20a9024 Post-Processor: gcr.io/cloud-devrel-public-resources/owlbot-python:latest@sha256:81ed5ecdfc7cac5b699ba4537376f3563f6f04122c4ec9e735d3b3dc1d43dd32 --- packages/google-cloud-bigquery/.github/.OwlBot.lock.yaml | 4 ++-- packages/google-cloud-bigquery/.github/auto-approve.yml | 3 +++ 2 files changed, 5 insertions(+), 2 deletions(-) create mode 100644 packages/google-cloud-bigquery/.github/auto-approve.yml diff --git a/packages/google-cloud-bigquery/.github/.OwlBot.lock.yaml b/packages/google-cloud-bigquery/.github/.OwlBot.lock.yaml index b631901e99f4..757c9dca75ad 100644 --- a/packages/google-cloud-bigquery/.github/.OwlBot.lock.yaml +++ b/packages/google-cloud-bigquery/.github/.OwlBot.lock.yaml @@ -13,5 +13,5 @@ # limitations under the License. docker: image: gcr.io/cloud-devrel-public-resources/owlbot-python:latest - digest: sha256:f792ee1320e03eda2d13a5281a2989f7ed8a9e50b73ef6da97fac7e1e850b149 -# created: 2022-05-05T15:17:27.599381182Z + digest: sha256:81ed5ecdfc7cac5b699ba4537376f3563f6f04122c4ec9e735d3b3dc1d43dd32 +# created: 2022-05-05T22:08:23.383410683Z diff --git a/packages/google-cloud-bigquery/.github/auto-approve.yml b/packages/google-cloud-bigquery/.github/auto-approve.yml new file mode 100644 index 000000000000..311ebbb853a9 --- /dev/null +++ b/packages/google-cloud-bigquery/.github/auto-approve.yml @@ -0,0 +1,3 @@ +# https://github.com/googleapis/repo-automation-bots/tree/main/packages/auto-approve +processes: + - "OwlBotTemplateChanges" From a13871b7b492100aca9cde82a0b474ca3d4d5ef6 Mon Sep 17 00:00:00 2001 From: Anthonios Partheniou Date: Mon, 9 May 2022 11:49:40 -0400 Subject: [PATCH 1449/2016] fix(deps): allow pyarrow v8 (#1245) --- packages/google-cloud-bigquery/setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/setup.py b/packages/google-cloud-bigquery/setup.py index 86eb2d41d399..52ffac019dd8 100644 --- a/packages/google-cloud-bigquery/setup.py +++ b/packages/google-cloud-bigquery/setup.py @@ -44,7 +44,7 @@ "packaging >= 14.3", "protobuf >= 3.12.0", # For the legacy proto-based types. "python-dateutil >= 2.7.2, <3.0dev", - "pyarrow >= 3.0.0, < 8.0dev", + "pyarrow >= 3.0.0, < 9.0dev", "requests >= 2.18.0, < 3.0.0dev", ] extras = { From 887d0d97b0aed07335d75b5834e7c470558548f7 Mon Sep 17 00:00:00 2001 From: "release-please[bot]" <55107282+release-please[bot]@users.noreply.github.com> Date: Mon, 9 May 2022 13:04:52 -0400 Subject: [PATCH 1450/2016] chore(main): release 3.1.0 (#1207) Co-authored-by: release-please[bot] <55107282+release-please[bot]@users.noreply.github.com> --- packages/google-cloud-bigquery/CHANGELOG.md | 21 +++++++++++++++++++ .../google/cloud/bigquery/version.py | 2 +- 2 files changed, 22 insertions(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/CHANGELOG.md b/packages/google-cloud-bigquery/CHANGELOG.md index ca99c969f6bb..0771a8f49188 100644 --- a/packages/google-cloud-bigquery/CHANGELOG.md +++ b/packages/google-cloud-bigquery/CHANGELOG.md @@ -5,6 +5,27 @@ [1]: https://pypi.org/project/google-cloud-bigquery/#history +## [3.1.0](https://github.com/googleapis/python-bigquery/compare/v3.0.1...v3.1.0) (2022-05-09) + + +### Features + +* add str method to table ([#1199](https://github.com/googleapis/python-bigquery/issues/1199)) ([8da4fa9](https://github.com/googleapis/python-bigquery/commit/8da4fa9e77bcfd2b68818b5d65b38ccc59899a01)) +* refactor AccessEntry to use _properties pattern ([#1125](https://github.com/googleapis/python-bigquery/issues/1125)) ([acd5612](https://github.com/googleapis/python-bigquery/commit/acd5612d2fc469633936dbc463ce4d70951e7fdd)) +* support using BIGQUERY_EMULATOR_HOST environment variable ([#1222](https://github.com/googleapis/python-bigquery/issues/1222)) ([39294b4](https://github.com/googleapis/python-bigquery/commit/39294b4950896b084573bedb4c5adc2b8d371eac)) + + +### Bug Fixes + +* **deps:** allow pyarrow v8 ([#1245](https://github.com/googleapis/python-bigquery/issues/1245)) ([d258690](https://github.com/googleapis/python-bigquery/commit/d258690dbf01108e1426f0e28d792c418a88bce0)) +* export bigquery.HivePartitioningOptions ([#1217](https://github.com/googleapis/python-bigquery/issues/1217)) ([8eb757b](https://github.com/googleapis/python-bigquery/commit/8eb757bcded7a3ef3b2264f47ec080c0a8fca579)) +* Skip geography_as_object conversion for REPEATED fields ([#1220](https://github.com/googleapis/python-bigquery/issues/1220)) ([4d3d6ec](https://github.com/googleapis/python-bigquery/commit/4d3d6ec9e667a781f8cb4a3aee0376c6179d5ce1)) + + +### Documentation + +* updated variable typo in comment in code sample ([#1239](https://github.com/googleapis/python-bigquery/issues/1239)) ([e420112](https://github.com/googleapis/python-bigquery/commit/e4201128bdb7f49cb732e12609448bbdbc122736)) + ### [3.0.1](https://github.com/googleapis/python-bigquery/compare/v3.0.0...v3.0.1) (2022-03-30) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/version.py b/packages/google-cloud-bigquery/google/cloud/bigquery/version.py index ad3213664053..6ce498ba5542 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/version.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/version.py @@ -12,4 +12,4 @@ # See the License for the specific language governing permissions and # limitations under the License. -__version__ = "3.0.1" +__version__ = "3.1.0" From 5b4c3fe52a6d54260d086340443d41ae7912733e Mon Sep 17 00:00:00 2001 From: WhiteSource Renovate Date: Mon, 9 May 2022 22:56:08 +0200 Subject: [PATCH 1451/2016] chore: update all dependencies (#1244) * chore(deps): update dependency pyarrow to v8 * fix(deps): allow pyarrow v8 * chore(deps): update dependency db-dtypes to 1.0.1 * chore(deps): update dependency google-cloud-bigquery to v3.1.0 Co-authored-by: Anthonios Partheniou --- .../samples/geography/requirements.txt | 6 +++--- .../google-cloud-bigquery/samples/magics/requirements.txt | 4 ++-- .../google-cloud-bigquery/samples/snippets/requirements.txt | 4 ++-- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/packages/google-cloud-bigquery/samples/geography/requirements.txt b/packages/google-cloud-bigquery/samples/geography/requirements.txt index dc2d9e72bcb0..681409085ed1 100644 --- a/packages/google-cloud-bigquery/samples/geography/requirements.txt +++ b/packages/google-cloud-bigquery/samples/geography/requirements.txt @@ -6,13 +6,13 @@ click==8.1.3 click-plugins==1.1.1 cligj==0.7.2 dataclasses==0.8; python_version < '3.7' -db-dtypes==1.0.0 +db-dtypes==1.0.1 Fiona==1.8.21 geojson==2.5.0 geopandas==0.10.2 google-api-core==2.7.3 google-auth==2.6.6 -google-cloud-bigquery==3.0.1 +google-cloud-bigquery==3.1.0 google-cloud-bigquery-storage==2.13.1 google-cloud-core==2.3.0 google-crc32c==1.3.0 @@ -28,7 +28,7 @@ pandas===1.3.5; python_version == '3.7' pandas==1.4.2; python_version >= '3.8' proto-plus==1.20.3 protobuf==3.20.1 -pyarrow==7.0.0 +pyarrow==8.0.0 pyasn1==0.4.8 pyasn1-modules==0.2.8 pycparser==2.21 diff --git a/packages/google-cloud-bigquery/samples/magics/requirements.txt b/packages/google-cloud-bigquery/samples/magics/requirements.txt index decfc476421c..d8187f60585e 100644 --- a/packages/google-cloud-bigquery/samples/magics/requirements.txt +++ b/packages/google-cloud-bigquery/samples/magics/requirements.txt @@ -1,4 +1,4 @@ -db-dtypes==1.0.0 +db-dtypes==1.0.1 google-cloud-bigquery-storage==2.13.1 google-auth-oauthlib==0.5.1 grpcio==1.46.0 @@ -8,6 +8,6 @@ ipython==8.3.0; python_version >= '3.9' matplotlib==3.5.2 pandas===1.3.5; python_version == '3.7' pandas==1.4.2; python_version >= '3.8' -pyarrow==7.0.0 +pyarrow==8.0.0 pytz==2022.1 typing-extensions==4.2.0 diff --git a/packages/google-cloud-bigquery/samples/snippets/requirements.txt b/packages/google-cloud-bigquery/samples/snippets/requirements.txt index decfc476421c..d8187f60585e 100644 --- a/packages/google-cloud-bigquery/samples/snippets/requirements.txt +++ b/packages/google-cloud-bigquery/samples/snippets/requirements.txt @@ -1,4 +1,4 @@ -db-dtypes==1.0.0 +db-dtypes==1.0.1 google-cloud-bigquery-storage==2.13.1 google-auth-oauthlib==0.5.1 grpcio==1.46.0 @@ -8,6 +8,6 @@ ipython==8.3.0; python_version >= '3.9' matplotlib==3.5.2 pandas===1.3.5; python_version == '3.7' pandas==1.4.2; python_version >= '3.8' -pyarrow==7.0.0 +pyarrow==8.0.0 pytz==2022.1 typing-extensions==4.2.0 From 3b19eb23060f642e3b7d7f01a8e17bfaf7056acf Mon Sep 17 00:00:00 2001 From: WhiteSource Renovate Date: Wed, 11 May 2022 15:43:47 +0200 Subject: [PATCH 1452/2016] chore(deps): update dependency pyparsing to v3.0.9 (#1246) --- .../google-cloud-bigquery/samples/geography/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/samples/geography/requirements.txt b/packages/google-cloud-bigquery/samples/geography/requirements.txt index 681409085ed1..3154763bfe07 100644 --- a/packages/google-cloud-bigquery/samples/geography/requirements.txt +++ b/packages/google-cloud-bigquery/samples/geography/requirements.txt @@ -32,7 +32,7 @@ pyarrow==8.0.0 pyasn1==0.4.8 pyasn1-modules==0.2.8 pycparser==2.21 -pyparsing==3.0.8 +pyparsing==3.0.9 python-dateutil==2.8.2 pytz==2022.1 PyYAML==6.0 From 03dcb9a0a2e18b0a610c6c6ccb8b9720963980ad Mon Sep 17 00:00:00 2001 From: WhiteSource Renovate Date: Wed, 11 May 2022 18:27:09 +0200 Subject: [PATCH 1453/2016] chore(deps): update dependency libcst to v0.4.3 (#1247) --- .../google-cloud-bigquery/samples/geography/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/samples/geography/requirements.txt b/packages/google-cloud-bigquery/samples/geography/requirements.txt index 3154763bfe07..e880ace873b6 100644 --- a/packages/google-cloud-bigquery/samples/geography/requirements.txt +++ b/packages/google-cloud-bigquery/samples/geography/requirements.txt @@ -20,7 +20,7 @@ google-resumable-media==2.3.2 googleapis-common-protos==1.56.0 grpcio==1.46.0 idna==3.3 -libcst==0.4.2 +libcst==0.4.3 munch==2.5.0 mypy-extensions==0.4.3 packaging==21.3 From b2025b39f61fb07880cdd96b39f3b519b82d5af9 Mon Sep 17 00:00:00 2001 From: WhiteSource Renovate Date: Thu, 12 May 2022 20:25:02 +0200 Subject: [PATCH 1454/2016] chore(deps): update dependency grpcio to v1.46.1 (#1249) --- .../google-cloud-bigquery/samples/geography/requirements.txt | 2 +- packages/google-cloud-bigquery/samples/magics/requirements.txt | 2 +- .../google-cloud-bigquery/samples/snippets/requirements.txt | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/packages/google-cloud-bigquery/samples/geography/requirements.txt b/packages/google-cloud-bigquery/samples/geography/requirements.txt index e880ace873b6..90dccc33dc61 100644 --- a/packages/google-cloud-bigquery/samples/geography/requirements.txt +++ b/packages/google-cloud-bigquery/samples/geography/requirements.txt @@ -18,7 +18,7 @@ google-cloud-core==2.3.0 google-crc32c==1.3.0 google-resumable-media==2.3.2 googleapis-common-protos==1.56.0 -grpcio==1.46.0 +grpcio==1.46.1 idna==3.3 libcst==0.4.3 munch==2.5.0 diff --git a/packages/google-cloud-bigquery/samples/magics/requirements.txt b/packages/google-cloud-bigquery/samples/magics/requirements.txt index d8187f60585e..cf682fd77a8d 100644 --- a/packages/google-cloud-bigquery/samples/magics/requirements.txt +++ b/packages/google-cloud-bigquery/samples/magics/requirements.txt @@ -1,7 +1,7 @@ db-dtypes==1.0.1 google-cloud-bigquery-storage==2.13.1 google-auth-oauthlib==0.5.1 -grpcio==1.46.0 +grpcio==1.46.1 ipython===7.31.1; python_version == '3.7' ipython===8.0.1; python_version == '3.8' ipython==8.3.0; python_version >= '3.9' diff --git a/packages/google-cloud-bigquery/samples/snippets/requirements.txt b/packages/google-cloud-bigquery/samples/snippets/requirements.txt index d8187f60585e..cf682fd77a8d 100644 --- a/packages/google-cloud-bigquery/samples/snippets/requirements.txt +++ b/packages/google-cloud-bigquery/samples/snippets/requirements.txt @@ -1,7 +1,7 @@ db-dtypes==1.0.1 google-cloud-bigquery-storage==2.13.1 google-auth-oauthlib==0.5.1 -grpcio==1.46.0 +grpcio==1.46.1 ipython===7.31.1; python_version == '3.7' ipython===8.0.1; python_version == '3.8' ipython==8.3.0; python_version >= '3.9' From 646f16200ec87bff689db1dbc89df3b570b03071 Mon Sep 17 00:00:00 2001 From: WhiteSource Renovate Date: Fri, 13 May 2022 15:05:55 +0200 Subject: [PATCH 1455/2016] chore(deps): update dependency googleapis-common-protos to v1.56.1 (#1251) --- .../google-cloud-bigquery/samples/geography/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/samples/geography/requirements.txt b/packages/google-cloud-bigquery/samples/geography/requirements.txt index 90dccc33dc61..89ca511d2289 100644 --- a/packages/google-cloud-bigquery/samples/geography/requirements.txt +++ b/packages/google-cloud-bigquery/samples/geography/requirements.txt @@ -17,7 +17,7 @@ google-cloud-bigquery-storage==2.13.1 google-cloud-core==2.3.0 google-crc32c==1.3.0 google-resumable-media==2.3.2 -googleapis-common-protos==1.56.0 +googleapis-common-protos==1.56.1 grpcio==1.46.1 idna==3.3 libcst==0.4.3 From 9716b8cd54c44063ac7ece9822caf74e92f13e3f Mon Sep 17 00:00:00 2001 From: WhiteSource Renovate Date: Thu, 19 May 2022 14:00:31 +0200 Subject: [PATCH 1456/2016] chore(deps): update all dependencies (#1253) --- .../google-cloud-bigquery/samples/geography/requirements.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/packages/google-cloud-bigquery/samples/geography/requirements.txt b/packages/google-cloud-bigquery/samples/geography/requirements.txt index 89ca511d2289..29e17f6939e7 100644 --- a/packages/google-cloud-bigquery/samples/geography/requirements.txt +++ b/packages/google-cloud-bigquery/samples/geography/requirements.txt @@ -1,5 +1,5 @@ attrs==21.4.0 -certifi==2021.10.8 +certifi==2022.5.18 cffi==1.15.0 charset-normalizer==2.0.12 click==8.1.3 @@ -10,7 +10,7 @@ db-dtypes==1.0.1 Fiona==1.8.21 geojson==2.5.0 geopandas==0.10.2 -google-api-core==2.7.3 +google-api-core==2.8.0 google-auth==2.6.6 google-cloud-bigquery==3.1.0 google-cloud-bigquery-storage==2.13.1 From 40efd8e5bba36056ef2a91ce58024d0104b8348c Mon Sep 17 00:00:00 2001 From: WhiteSource Renovate Date: Thu, 19 May 2022 16:50:57 +0200 Subject: [PATCH 1457/2016] chore(deps): update all dependencies (#1255) --- .../google-cloud-bigquery/samples/geography/requirements.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/packages/google-cloud-bigquery/samples/geography/requirements.txt b/packages/google-cloud-bigquery/samples/geography/requirements.txt index 29e17f6939e7..b4865e9379c9 100644 --- a/packages/google-cloud-bigquery/samples/geography/requirements.txt +++ b/packages/google-cloud-bigquery/samples/geography/requirements.txt @@ -16,7 +16,7 @@ google-cloud-bigquery==3.1.0 google-cloud-bigquery-storage==2.13.1 google-cloud-core==2.3.0 google-crc32c==1.3.0 -google-resumable-media==2.3.2 +google-resumable-media==2.3.3 googleapis-common-protos==1.56.1 grpcio==1.46.1 idna==3.3 @@ -26,7 +26,7 @@ mypy-extensions==0.4.3 packaging==21.3 pandas===1.3.5; python_version == '3.7' pandas==1.4.2; python_version >= '3.8' -proto-plus==1.20.3 +proto-plus==1.20.4 protobuf==3.20.1 pyarrow==8.0.0 pyasn1==0.4.8 From 273c26741b51c90f5f7187175841509959ba8b03 Mon Sep 17 00:00:00 2001 From: Alma Becerril Salas <47731219+abecerrilsalas@users.noreply.github.com> Date: Fri, 20 May 2022 12:22:56 -0700 Subject: [PATCH 1458/2016] feat: add support for table clones (#1235) * feat: add support for table clones * feat: clone test * feat: debugging * feat: more debugging * feat: more debugging * feat: even more debugging * feat: debugging test * feat: even more test debugging * feat: check * feat: modify test * feat: deleting print statement * feat: testing * feat: test update * feat: change table name * feat: debugging table name * feat: cleaning up test * feat: degubbing test * feat: add properties check to test * feat: test change * feat: added more properties * Update samples/snippets/requirements.txt Co-authored-by: Leah E. Cole <6719667+leahecole@users.noreply.github.com> Co-authored-by: Leah E. Cole <6719667+leahecole@users.noreply.github.com> Co-authored-by: Steffany Brown <30247553+steffnay@users.noreply.github.com> --- .../google-cloud-bigquery/docs/reference.rst | 1 + .../google/cloud/bigquery/__init__.py | 2 + .../google/cloud/bigquery/job/copy_.py | 3 + .../google/cloud/bigquery/table.py | 37 ++++++++++ .../samples/magics/requirements.txt | 2 +- .../tests/system/test_client.py | 54 ++++++++++++++ .../tests/unit/test_table.py | 74 +++++++++++++++++++ 7 files changed, 172 insertions(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/docs/reference.rst b/packages/google-cloud-bigquery/docs/reference.rst index 4f655b09e36e..b886f11612f6 100644 --- a/packages/google-cloud-bigquery/docs/reference.rst +++ b/packages/google-cloud-bigquery/docs/reference.rst @@ -97,6 +97,7 @@ Table table.Row table.RowIterator table.SnapshotDefinition + table.CloneDefinition table.Table table.TableListItem table.TableReference diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/__init__.py b/packages/google-cloud-bigquery/google/cloud/bigquery/__init__.py index 81b1285e3fe5..5a4520476396 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/__init__.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/__init__.py @@ -101,6 +101,7 @@ from google.cloud.bigquery.table import RangePartitioning from google.cloud.bigquery.table import Row from google.cloud.bigquery.table import SnapshotDefinition +from google.cloud.bigquery.table import CloneDefinition from google.cloud.bigquery.table import Table from google.cloud.bigquery.table import TableReference from google.cloud.bigquery.table import TimePartitioningType @@ -132,6 +133,7 @@ "RangePartitioning", "Row", "SnapshotDefinition", + "CloneDefinition", "TimePartitioning", "TimePartitioningType", # Jobs diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/job/copy_.py b/packages/google-cloud-bigquery/google/cloud/bigquery/job/copy_.py index 29558c01f605..eb7f609a50d1 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/job/copy_.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/job/copy_.py @@ -40,6 +40,9 @@ class OperationType: SNAPSHOT = "SNAPSHOT" """The source table type is TABLE and the destination table type is SNAPSHOT.""" + CLONE = "CLONE" + """The source table type is TABLE and the destination table type is CLONE.""" + RESTORE = "RESTORE" """The source table type is SNAPSHOT and the destination table type is TABLE.""" diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py index 7b8c6441fa88..72eb1baf6430 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py @@ -356,6 +356,7 @@ class Table(_TableBase): "time_partitioning": "timePartitioning", "schema": "schema", "snapshot_definition": "snapshotDefinition", + "clone_definition": "cloneDefinition", "streaming_buffer": "streamingBuffer", "self_link": "selfLink", "time_partitioning": "timePartitioning", @@ -929,6 +930,19 @@ def snapshot_definition(self) -> Optional["SnapshotDefinition"]: snapshot_info = SnapshotDefinition(snapshot_info) return snapshot_info + @property + def clone_definition(self) -> Optional["CloneDefinition"]: + """Information about the clone. This value is set via clone creation. + + See: https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#Table.FIELDS.clone_definition + """ + clone_info = self._properties.get( + self._PROPERTY_TO_API_FIELD["clone_definition"] + ) + if clone_info is not None: + clone_info = CloneDefinition(clone_info) + return clone_info + @classmethod def from_string(cls, full_table_id: str) -> "Table": """Construct a table from fully-qualified table ID. @@ -1304,6 +1318,29 @@ def __init__(self, resource: Dict[str, Any]): ) +class CloneDefinition: + """Information about base table and clone time of the clone. + + See https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#clonedefinition + + Args: + resource: Clone definition representation returned from the API. + """ + + def __init__(self, resource: Dict[str, Any]): + self.base_table_reference = None + if "baseTableReference" in resource: + self.base_table_reference = TableReference.from_api_repr( + resource["baseTableReference"] + ) + + self.clone_time = None + if "cloneTime" in resource: + self.clone_time = google.cloud._helpers._rfc3339_to_datetime( + resource["cloneTime"] + ) + + class Row(object): """A BigQuery row. diff --git a/packages/google-cloud-bigquery/samples/magics/requirements.txt b/packages/google-cloud-bigquery/samples/magics/requirements.txt index cf682fd77a8d..f26b4dc9ba49 100644 --- a/packages/google-cloud-bigquery/samples/magics/requirements.txt +++ b/packages/google-cloud-bigquery/samples/magics/requirements.txt @@ -5,7 +5,7 @@ grpcio==1.46.1 ipython===7.31.1; python_version == '3.7' ipython===8.0.1; python_version == '3.8' ipython==8.3.0; python_version >= '3.9' -matplotlib==3.5.2 +matplotlib==3.5.1 pandas===1.3.5; python_version == '3.7' pandas==1.4.2; python_version >= '3.8' pyarrow==8.0.0 diff --git a/packages/google-cloud-bigquery/tests/system/test_client.py b/packages/google-cloud-bigquery/tests/system/test_client.py index 773ef3c90b89..49eb70a8b7b4 100644 --- a/packages/google-cloud-bigquery/tests/system/test_client.py +++ b/packages/google-cloud-bigquery/tests/system/test_client.py @@ -2190,3 +2190,57 @@ def test_table_snapshots(dataset_id): rows_iter = client.list_rows(source_table_path) rows = sorted(row.values() for row in rows_iter) assert rows == [(1, "one"), (2, "two")] + + +def test_table_clones(dataset_id): + from google.cloud.bigquery import CopyJobConfig + from google.cloud.bigquery import OperationType + + client = Config.CLIENT + + table_path_source = f"{client.project}.{dataset_id}.test_table_clone" + clone_table_path = f"{table_path_source}_clone" + + # Create the table before loading so that the column order is predictable. + schema = [ + bigquery.SchemaField("foo", "INTEGER"), + bigquery.SchemaField("bar", "STRING"), + ] + source_table = helpers.retry_403(Config.CLIENT.create_table)( + Table(table_path_source, schema=schema) + ) + + # Populate the table with initial data. + rows = [{"foo": 1, "bar": "one"}, {"foo": 2, "bar": "two"}] + load_job = Config.CLIENT.load_table_from_json(rows, source_table) + load_job.result() + + # Now create a clone before modifying the original table data. + copy_config = CopyJobConfig() + copy_config.operation_type = OperationType.CLONE + copy_config.write_disposition = bigquery.WriteDisposition.WRITE_TRUNCATE + + copy_job = client.copy_table( + sources=table_path_source, + destination=clone_table_path, + job_config=copy_config, + ) + copy_job.result() + + # List rows from the source table and compare them to rows from the clone. + rows_iter = client.list_rows(table_path_source) + rows = sorted(row.values() for row in rows_iter) + assert rows == [(1, "one"), (2, "two")] + + rows_iter = client.list_rows(clone_table_path) + rows = sorted(row.values() for row in rows_iter) + assert rows == [(1, "one"), (2, "two")] + + # Compare properties of the source and clone table. + source_table_props = client.get_table(table_path_source) + clone_table_props = client.get_table(clone_table_path) + + assert source_table_props.schema == clone_table_props.schema + assert source_table_props.num_bytes == clone_table_props.num_bytes + assert source_table_props.num_rows == clone_table_props.num_rows + assert source_table_props.description == clone_table_props.description diff --git a/packages/google-cloud-bigquery/tests/unit/test_table.py b/packages/google-cloud-bigquery/tests/unit/test_table.py index ba35b22974c6..b5f2e58c6406 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_table.py +++ b/packages/google-cloud-bigquery/tests/unit/test_table.py @@ -841,6 +841,40 @@ def test_snapshot_definition_set(self): 2010, 9, 28, 10, 20, 30, 123000, tzinfo=UTC ) + def test_clone_definition_not_set(self): + dataset = DatasetReference(self.PROJECT, self.DS_ID) + table_ref = dataset.table(self.TABLE_NAME) + table = self._make_one(table_ref) + + assert table.clone_definition is None + + def test_clone_definition_set(self): + from google.cloud._helpers import UTC + from google.cloud.bigquery.table import CloneDefinition + + dataset = DatasetReference(self.PROJECT, self.DS_ID) + table_ref = dataset.table(self.TABLE_NAME) + table = self._make_one(table_ref) + + table._properties["cloneDefinition"] = { + "baseTableReference": { + "projectId": "project_x", + "datasetId": "dataset_y", + "tableId": "table_z", + }, + "cloneTime": "2010-09-28T10:20:30.123Z", + } + + clone = table.clone_definition + + assert isinstance(clone, CloneDefinition) + assert clone.base_table_reference.path == ( + "/projects/project_x/datasets/dataset_y/tables/table_z" + ) + assert clone.clone_time == datetime.datetime( + 2010, 9, 28, 10, 20, 30, 123000, tzinfo=UTC + ) + def test_description_setter_bad_value(self): dataset = DatasetReference(self.PROJECT, self.DS_ID) table_ref = dataset.table(self.TABLE_NAME) @@ -1789,6 +1823,46 @@ def test_ctor_full_resource(self): assert instance.snapshot_time == expected_time +class TestCloneDefinition: + @staticmethod + def _get_target_class(): + from google.cloud.bigquery.table import CloneDefinition + + return CloneDefinition + + @classmethod + def _make_one(cls, *args, **kwargs): + klass = cls._get_target_class() + return klass(*args, **kwargs) + + def test_ctor_empty_resource(self): + instance = self._make_one(resource={}) + assert instance.base_table_reference is None + assert instance.clone_time is None + + def test_ctor_full_resource(self): + from google.cloud._helpers import UTC + from google.cloud.bigquery.table import TableReference + + resource = { + "baseTableReference": { + "projectId": "my-project", + "datasetId": "your-dataset", + "tableId": "our-table", + }, + "cloneTime": "2005-06-07T19:35:02.123Z", + } + instance = self._make_one(resource) + + expected_table_ref = TableReference.from_string( + "my-project.your-dataset.our-table" + ) + assert instance.base_table_reference == expected_table_ref + + expected_time = datetime.datetime(2005, 6, 7, 19, 35, 2, 123000, tzinfo=UTC) + assert instance.clone_time == expected_time + + class TestRow(unittest.TestCase): def test_row(self): from google.cloud.bigquery.table import Row From d35a0310c13df7ea8648aaf7a566784df524cae4 Mon Sep 17 00:00:00 2001 From: WhiteSource Renovate Date: Sun, 22 May 2022 20:07:34 +0200 Subject: [PATCH 1459/2016] chore(deps): update all dependencies (#1258) --- .../google-cloud-bigquery/samples/geography/requirements.txt | 4 ++-- .../google-cloud-bigquery/samples/magics/requirements.txt | 4 ++-- .../google-cloud-bigquery/samples/snippets/requirements.txt | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/packages/google-cloud-bigquery/samples/geography/requirements.txt b/packages/google-cloud-bigquery/samples/geography/requirements.txt index b4865e9379c9..c602a40fce09 100644 --- a/packages/google-cloud-bigquery/samples/geography/requirements.txt +++ b/packages/google-cloud-bigquery/samples/geography/requirements.txt @@ -1,5 +1,5 @@ attrs==21.4.0 -certifi==2022.5.18 +certifi==2022.5.18.1 cffi==1.15.0 charset-normalizer==2.0.12 click==8.1.3 @@ -18,7 +18,7 @@ google-cloud-core==2.3.0 google-crc32c==1.3.0 google-resumable-media==2.3.3 googleapis-common-protos==1.56.1 -grpcio==1.46.1 +grpcio==1.46.3 idna==3.3 libcst==0.4.3 munch==2.5.0 diff --git a/packages/google-cloud-bigquery/samples/magics/requirements.txt b/packages/google-cloud-bigquery/samples/magics/requirements.txt index f26b4dc9ba49..ad64565e2f51 100644 --- a/packages/google-cloud-bigquery/samples/magics/requirements.txt +++ b/packages/google-cloud-bigquery/samples/magics/requirements.txt @@ -1,11 +1,11 @@ db-dtypes==1.0.1 google-cloud-bigquery-storage==2.13.1 google-auth-oauthlib==0.5.1 -grpcio==1.46.1 +grpcio==1.46.3 ipython===7.31.1; python_version == '3.7' ipython===8.0.1; python_version == '3.8' ipython==8.3.0; python_version >= '3.9' -matplotlib==3.5.1 +matplotlib==3.5.2 pandas===1.3.5; python_version == '3.7' pandas==1.4.2; python_version >= '3.8' pyarrow==8.0.0 diff --git a/packages/google-cloud-bigquery/samples/snippets/requirements.txt b/packages/google-cloud-bigquery/samples/snippets/requirements.txt index cf682fd77a8d..ad64565e2f51 100644 --- a/packages/google-cloud-bigquery/samples/snippets/requirements.txt +++ b/packages/google-cloud-bigquery/samples/snippets/requirements.txt @@ -1,7 +1,7 @@ db-dtypes==1.0.1 google-cloud-bigquery-storage==2.13.1 google-auth-oauthlib==0.5.1 -grpcio==1.46.1 +grpcio==1.46.3 ipython===7.31.1; python_version == '3.7' ipython===8.0.1; python_version == '3.8' ipython==8.3.0; python_version >= '3.9' From 0df9b7cb60c37a99b7d092daea484850fbc3c1a1 Mon Sep 17 00:00:00 2001 From: Anthonios Partheniou Date: Thu, 26 May 2022 16:44:15 -0400 Subject: [PATCH 1460/2016] fix(deps): require protobuf>= 3.12.0, <4.0.0dev (#1263) fix(deps): require packaging >= 14.3, <22.0.0dev fix(deps): proto-plus >= 1.15.0, <2.0.0dev --- packages/google-cloud-bigquery/setup.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/packages/google-cloud-bigquery/setup.py b/packages/google-cloud-bigquery/setup.py index 52ffac019dd8..a040e96e75d0 100644 --- a/packages/google-cloud-bigquery/setup.py +++ b/packages/google-cloud-bigquery/setup.py @@ -35,14 +35,14 @@ # https://github.com/googleapis/google-cloud-python/issues/10566 "google-api-core[grpc] >= 1.31.5, <3.0.0dev,!=2.0.*,!=2.1.*,!=2.2.*,!=2.3.0", "google-cloud-bigquery-storage >= 2.0.0, <3.0.0dev", - "proto-plus >= 1.15.0", + "proto-plus >= 1.15.0, <2.0.0dev", # NOTE: Maintainers, please do not require google-cloud-core>=2.x.x # Until this issue is closed # https://github.com/googleapis/google-cloud-python/issues/10566 "google-cloud-core >= 1.4.1, <3.0.0dev", "google-resumable-media >= 0.6.0, < 3.0dev", - "packaging >= 14.3", - "protobuf >= 3.12.0", # For the legacy proto-based types. + "packaging >= 14.3, <22.0.0dev", + "protobuf >= 3.12.0, <4.0.0dev", # For the legacy proto-based types. "python-dateutil >= 2.7.2, <3.0dev", "pyarrow >= 3.0.0, < 9.0dev", "requests >= 2.18.0, < 3.0.0dev", From d7e809bd826f3e024a5d55561733576ca458fa52 Mon Sep 17 00:00:00 2001 From: WhiteSource Renovate Date: Mon, 30 May 2022 19:10:25 +0200 Subject: [PATCH 1461/2016] chore(deps): update all dependencies (#1264) * chore(deps): update all dependencies * revert protobuf * revert protobuf Co-authored-by: Anthonios Partheniou --- .../samples/geography/requirements.txt | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/packages/google-cloud-bigquery/samples/geography/requirements.txt b/packages/google-cloud-bigquery/samples/geography/requirements.txt index c602a40fce09..cbd7c4e5dcc2 100644 --- a/packages/google-cloud-bigquery/samples/geography/requirements.txt +++ b/packages/google-cloud-bigquery/samples/geography/requirements.txt @@ -10,14 +10,14 @@ db-dtypes==1.0.1 Fiona==1.8.21 geojson==2.5.0 geopandas==0.10.2 -google-api-core==2.8.0 +google-api-core==2.8.1 google-auth==2.6.6 google-cloud-bigquery==3.1.0 google-cloud-bigquery-storage==2.13.1 google-cloud-core==2.3.0 google-crc32c==1.3.0 google-resumable-media==2.3.3 -googleapis-common-protos==1.56.1 +googleapis-common-protos==1.56.2 grpcio==1.46.3 idna==3.3 libcst==0.4.3 @@ -26,7 +26,7 @@ mypy-extensions==0.4.3 packaging==21.3 pandas===1.3.5; python_version == '3.7' pandas==1.4.2; python_version >= '3.8' -proto-plus==1.20.4 +proto-plus==1.20.5 protobuf==3.20.1 pyarrow==8.0.0 pyasn1==0.4.8 From 0e64c2124d73480189ab2f6f4a4b710edff8441c Mon Sep 17 00:00:00 2001 From: Dan Lee <71398022+dandhlee@users.noreply.github.com> Date: Wed, 1 Jun 2022 16:41:34 -0400 Subject: [PATCH 1462/2016] docs: fix changelog header to consistent size (#1268) --- packages/google-cloud-bigquery/CHANGELOG.md | 42 ++++++++++----------- 1 file changed, 21 insertions(+), 21 deletions(-) diff --git a/packages/google-cloud-bigquery/CHANGELOG.md b/packages/google-cloud-bigquery/CHANGELOG.md index 0771a8f49188..9a2c1b26e9ce 100644 --- a/packages/google-cloud-bigquery/CHANGELOG.md +++ b/packages/google-cloud-bigquery/CHANGELOG.md @@ -26,7 +26,7 @@ * updated variable typo in comment in code sample ([#1239](https://github.com/googleapis/python-bigquery/issues/1239)) ([e420112](https://github.com/googleapis/python-bigquery/commit/e4201128bdb7f49cb732e12609448bbdbc122736)) -### [3.0.1](https://github.com/googleapis/python-bigquery/compare/v3.0.0...v3.0.1) (2022-03-30) +## [3.0.1](https://github.com/googleapis/python-bigquery/compare/v3.0.0...v3.0.1) (2022-03-30) ### Bug Fixes @@ -75,14 +75,14 @@ * BigQuery Storage and pyarrow are required dependencies ([#776](https://github.com/googleapis/python-bigquery/issues/776)) ([76d88fb](https://github.com/googleapis/python-bigquery/commit/76d88fbb1316317a61fa1a63c101bc6f42f23af8)) -### [2.34.3](https://github.com/googleapis/python-bigquery/compare/v2.34.2...v2.34.3) (2022-03-29) +## [2.34.3](https://github.com/googleapis/python-bigquery/compare/v2.34.2...v2.34.3) (2022-03-29) ### Bug Fixes * update content-type header ([#1171](https://github.com/googleapis/python-bigquery/issues/1171)) ([921b440](https://github.com/googleapis/python-bigquery/commit/921b440fdd151e88ee5b3e0d9fb90177877dc11a)) -### [2.34.2](https://github.com/googleapis/python-bigquery/compare/v2.34.1...v2.34.2) (2022-03-05) +## [2.34.2](https://github.com/googleapis/python-bigquery/compare/v2.34.1...v2.34.2) (2022-03-05) ### Bug Fixes @@ -90,7 +90,7 @@ * **deps:** require google-api-core>=1.31.5, >=2.3.2 ([#1157](https://github.com/googleapis/python-bigquery/issues/1157)) ([0c15790](https://github.com/googleapis/python-bigquery/commit/0c15790720ff573a501cfe760dd74ee166e1a353)) * **deps:** require proto-plus>=1.15.0 ([0c15790](https://github.com/googleapis/python-bigquery/commit/0c15790720ff573a501cfe760dd74ee166e1a353)) -### [2.34.1](https://github.com/googleapis/python-bigquery/compare/v2.34.0...v2.34.1) (2022-03-02) +## [2.34.1](https://github.com/googleapis/python-bigquery/compare/v2.34.0...v2.34.1) (2022-03-02) ### Dependencies @@ -153,7 +153,7 @@ * support OpenTelemetry >= 1.1.0 ([#1050](https://www.github.com/googleapis/python-bigquery/issues/1050)) ([4616cd5](https://www.github.com/googleapis/python-bigquery/commit/4616cd58d3c6da641fb881ce99a87dcdedc20ba2)) -### [2.30.1](https://www.github.com/googleapis/python-bigquery/compare/v2.30.0...v2.30.1) (2021-11-04) +## [2.30.1](https://www.github.com/googleapis/python-bigquery/compare/v2.30.0...v2.30.1) (2021-11-04) ### Bug Fixes @@ -201,7 +201,7 @@ * allow pyarrow 6.x ([#1031](https://www.github.com/googleapis/python-bigquery/issues/1031)) ([1c2de74](https://www.github.com/googleapis/python-bigquery/commit/1c2de74a55046a343bcf9474f67100a82fb05401)) -### [2.28.1](https://www.github.com/googleapis/python-bigquery/compare/v2.28.0...v2.28.1) (2021-10-07) +## [2.28.1](https://www.github.com/googleapis/python-bigquery/compare/v2.28.0...v2.28.1) (2021-10-07) ### Bug Fixes @@ -220,7 +220,7 @@ * link to stable pandas docs ([#990](https://www.github.com/googleapis/python-bigquery/issues/990)) ([ea50e80](https://www.github.com/googleapis/python-bigquery/commit/ea50e8031fc035b3772a338bc00982de263cefad)) -### [2.27.1](https://www.github.com/googleapis/python-bigquery/compare/v2.27.0...v2.27.1) (2021-09-27) +## [2.27.1](https://www.github.com/googleapis/python-bigquery/compare/v2.27.0...v2.27.1) (2021-09-27) ### Bug Fixes @@ -259,7 +259,7 @@ * guard imports against unsupported pyarrow versions ([#934](https://www.github.com/googleapis/python-bigquery/issues/934)) ([b289076](https://www.github.com/googleapis/python-bigquery/commit/b28907693bbe889becc1b9c8963f0a7e1ee6c35a)) -### [2.25.2](https://www.github.com/googleapis/python-bigquery/compare/v2.25.1...v2.25.2) (2021-08-31) +## [2.25.2](https://www.github.com/googleapis/python-bigquery/compare/v2.25.1...v2.25.2) (2021-08-31) ### Bug Fixes @@ -272,7 +272,7 @@ * update docstring for bigquery_create_routine sample ([#883](https://www.github.com/googleapis/python-bigquery/issues/883)) ([#917](https://www.github.com/googleapis/python-bigquery/issues/917)) ([e2d12b7](https://www.github.com/googleapis/python-bigquery/commit/e2d12b795ef2dc51b0ee36f1b3000edb1e64ce05)) -### [2.25.1](https://www.github.com/googleapis/python-bigquery/compare/v2.25.0...v2.25.1) (2021-08-25) +## [2.25.1](https://www.github.com/googleapis/python-bigquery/compare/v2.25.0...v2.25.1) (2021-08-25) ### Bug Fixes @@ -287,7 +287,7 @@ * Support using GeoPandas for GEOGRAPHY columns ([#848](https://www.github.com/googleapis/python-bigquery/issues/848)) ([16f65e6](https://www.github.com/googleapis/python-bigquery/commit/16f65e6ae15979217ceea6c6d398c9057a363a13)) -### [2.24.1](https://www.github.com/googleapis/python-bigquery/compare/v2.24.0...v2.24.1) (2021-08-13) +## [2.24.1](https://www.github.com/googleapis/python-bigquery/compare/v2.24.0...v2.24.1) (2021-08-13) ### Bug Fixes @@ -309,21 +309,21 @@ * make unicode characters working well in load_table_from_json ([#865](https://www.github.com/googleapis/python-bigquery/issues/865)) ([ad9c802](https://www.github.com/googleapis/python-bigquery/commit/ad9c8026f0e667f13dd754279f9dc40d06f4fa78)) -### [2.23.3](https://www.github.com/googleapis/python-bigquery/compare/v2.23.2...v2.23.3) (2021-08-06) +## [2.23.3](https://www.github.com/googleapis/python-bigquery/compare/v2.23.2...v2.23.3) (2021-08-06) ### Bug Fixes * increase default retry deadline to 10 minutes ([#859](https://www.github.com/googleapis/python-bigquery/issues/859)) ([30770fd](https://www.github.com/googleapis/python-bigquery/commit/30770fd0575fbd5aaa70c14196a4cc54627aecd2)) -### [2.23.2](https://www.github.com/googleapis/python-bigquery/compare/v2.23.1...v2.23.2) (2021-07-29) +## [2.23.2](https://www.github.com/googleapis/python-bigquery/compare/v2.23.1...v2.23.2) (2021-07-29) ### Dependencies * expand pyarrow pins to support 5.x releases ([#833](https://www.github.com/googleapis/python-bigquery/issues/833)) ([80e3a61](https://www.github.com/googleapis/python-bigquery/commit/80e3a61c60419fb19b70b664c6415cd01ba82f5b)) -### [2.23.1](https://www.github.com/googleapis/python-bigquery/compare/v2.23.0...v2.23.1) (2021-07-28) +## [2.23.1](https://www.github.com/googleapis/python-bigquery/compare/v2.23.0...v2.23.1) (2021-07-28) ### Bug Fixes @@ -348,7 +348,7 @@ * correct docs for `LoadJobConfig.destination_table_description` ([#810](https://www.github.com/googleapis/python-bigquery/issues/810)) ([da87fd9](https://www.github.com/googleapis/python-bigquery/commit/da87fd921cc8067b187d7985c978aac8eb58d107)) -### [2.22.1](https://www.github.com/googleapis/python-bigquery/compare/v2.22.0...v2.22.1) (2021-07-22) +## [2.22.1](https://www.github.com/googleapis/python-bigquery/compare/v2.22.0...v2.22.1) (2021-07-22) ### Bug Fixes @@ -445,7 +445,7 @@ * **tests:** invalid path to strptime() ([#672](https://www.github.com/googleapis/python-bigquery/issues/672)) ([591cdd8](https://www.github.com/googleapis/python-bigquery/commit/591cdd851bb1321b048a05a378a0ef48d3ade462)) -### [2.16.1](https://www.github.com/googleapis/python-bigquery/compare/v2.16.0...v2.16.1) (2021-05-12) +## [2.16.1](https://www.github.com/googleapis/python-bigquery/compare/v2.16.0...v2.16.1) (2021-05-12) ### Bug Fixes @@ -504,7 +504,7 @@ * add sample to run DML query ([#591](https://www.github.com/googleapis/python-bigquery/issues/591)) ([ff2ec3a](https://www.github.com/googleapis/python-bigquery/commit/ff2ec3abe418a443cd07751c08e654f94e8b3155)) * update the description of the return value of `_QueryResults.rows()` ([#594](https://www.github.com/googleapis/python-bigquery/issues/594)) ([8f4c0b8](https://www.github.com/googleapis/python-bigquery/commit/8f4c0b84dac3840532d7865247b8ad94b625b897)) -### [2.13.1](https://www.github.com/googleapis/python-bigquery/compare/v2.13.0...v2.13.1) (2021-03-23) +## [2.13.1](https://www.github.com/googleapis/python-bigquery/compare/v2.13.0...v2.13.1) (2021-03-23) ### Bug Fixes @@ -602,7 +602,7 @@ * declare support for Python 3.9 ([#488](https://www.github.com/googleapis/python-bigquery/issues/488)) ([55daa7d](https://www.github.com/googleapis/python-bigquery/commit/55daa7da9857a8a2fb14a80a4efa3f466386a85f)) -### [2.6.2](https://www.github.com/googleapis/python-bigquery/compare/v2.6.1...v2.6.2) (2021-01-11) +## [2.6.2](https://www.github.com/googleapis/python-bigquery/compare/v2.6.1...v2.6.2) (2021-01-11) ### Bug Fixes @@ -617,7 +617,7 @@ * fix Shapely import in GEOGRAPHY sample ([#431](https://www.github.com/googleapis/python-bigquery/issues/431)) ([96a1c5b](https://www.github.com/googleapis/python-bigquery/commit/96a1c5b3c72855ba6ae8c88dfd0cdb02d2faf909)) * move and refresh view samples ([#420](https://www.github.com/googleapis/python-bigquery/issues/420)) ([079b6a1](https://www.github.com/googleapis/python-bigquery/commit/079b6a162f6929bf801366d92f8daeb3318426c4)) -### [2.6.1](https://www.github.com/googleapis/python-bigquery/compare/v2.6.0...v2.6.1) (2020-12-09) +## [2.6.1](https://www.github.com/googleapis/python-bigquery/compare/v2.6.0...v2.6.1) (2020-12-09) ### Bug Fixes @@ -831,14 +831,14 @@ * recommend insert_rows_json to avoid call to tables.get ([#258](https://www.github.com/googleapis/python-bigquery/issues/258)) ([ae647eb](https://www.github.com/googleapis/python-bigquery/commit/ae647ebd68deff6e30ca2cffb5b7422c6de4940b)) -### [1.27.2](https://www.github.com/googleapis/python-bigquery/compare/v1.27.1...v1.27.2) (2020-08-18) +## [1.27.2](https://www.github.com/googleapis/python-bigquery/compare/v1.27.1...v1.27.2) (2020-08-18) ### Bug Fixes * rationalize platform constraints for 'pyarrow' extra ([#235](https://www.github.com/googleapis/python-bigquery/issues/235)) ([c9a0567](https://www.github.com/googleapis/python-bigquery/commit/c9a0567f59491b769a9e2fd535430423e39d4fa8)) -### [1.27.1](https://www.github.com/googleapis/python-bigquery/compare/v1.27.0...v1.27.1) (2020-08-18) +## [1.27.1](https://www.github.com/googleapis/python-bigquery/compare/v1.27.0...v1.27.1) (2020-08-18) ### Bug Fixes @@ -860,7 +860,7 @@ * converting to dataframe with out of bounds timestamps ([#209](https://www.github.com/googleapis/python-bigquery/issues/209)) ([8209203](https://www.github.com/googleapis/python-bigquery/commit/8209203e967f0624ad306166c0af6f6f1027c550)), closes [#168](https://www.github.com/googleapis/python-bigquery/issues/168) * raise error if inserting rows with unknown fields ([#163](https://www.github.com/googleapis/python-bigquery/issues/163)) ([8fe7254](https://www.github.com/googleapis/python-bigquery/commit/8fe725429541eed34ddc01cffc8b1ee846c14162)) -### [1.26.1](https://www.github.com/googleapis/python-bigquery/compare/v1.26.0...v1.26.1) (2020-07-25) +## [1.26.1](https://www.github.com/googleapis/python-bigquery/compare/v1.26.0...v1.26.1) (2020-07-25) ### Documentation From 412824609acf41557055b9cd2edee71f60c813d5 Mon Sep 17 00:00:00 2001 From: Anthonios Partheniou Date: Mon, 6 Jun 2022 11:27:21 -0400 Subject: [PATCH 1463/2016] chore: test minimum dependencies in python 3.7 (#1269) * chore: test minimum dependencies in python 3.7 * update constraints --- .../testing/constraints-3.7.txt | 26 +++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/packages/google-cloud-bigquery/testing/constraints-3.7.txt b/packages/google-cloud-bigquery/testing/constraints-3.7.txt index 684864f2bcde..e3c7a332c5d6 100644 --- a/packages/google-cloud-bigquery/testing/constraints-3.7.txt +++ b/packages/google-cloud-bigquery/testing/constraints-3.7.txt @@ -1 +1,27 @@ +# This constraints file is used to check that lower bounds +# are correct in setup.py +# List *all* library dependencies and extras in this file. +# Pin the version to the lower bound. +# +# e.g., if setup.py has "foo >= 1.14.0, < 2.0.0dev", +# Then this file should have foo==1.14.0 +db-dtypes==0.3.0 +geopandas==0.9.0 +google-api-core==1.31.5 +google-cloud-bigquery-storage==2.0.0 +google-cloud-core==1.4.1 +google-resumable-media==0.6.0 +grpcio==1.38.1 +ipython==7.0.1 +opentelemetry-api==1.1.0 +opentelemetry-instrumentation==0.20b0 +opentelemetry-sdk==1.1.0 pandas==1.1.0 +proto-plus==1.15.0 +protobuf==3.12.0 +pyarrow==3.0.0 +python-dateutil==2.7.3 +requests==2.18.0 +Shapely==1.6.4.post2 +six==1.13.0 +tqdm==4.7.4 From 817db8bcdf92960f17bcb4484fd8d4fd3fb616e1 Mon Sep 17 00:00:00 2001 From: "release-please[bot]" <55107282+release-please[bot]@users.noreply.github.com> Date: Mon, 6 Jun 2022 09:46:47 -0700 Subject: [PATCH 1464/2016] chore(main): release 3.2.0 (#1259) Co-authored-by: release-please[bot] <55107282+release-please[bot]@users.noreply.github.com> --- packages/google-cloud-bigquery/CHANGELOG.md | 19 +++++++++++++++++++ .../google/cloud/bigquery/version.py | 2 +- 2 files changed, 20 insertions(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/CHANGELOG.md b/packages/google-cloud-bigquery/CHANGELOG.md index 9a2c1b26e9ce..1f999aa108a5 100644 --- a/packages/google-cloud-bigquery/CHANGELOG.md +++ b/packages/google-cloud-bigquery/CHANGELOG.md @@ -5,6 +5,25 @@ [1]: https://pypi.org/project/google-cloud-bigquery/#history +## [3.2.0](https://github.com/googleapis/python-bigquery/compare/v3.1.0...v3.2.0) (2022-06-06) + + +### Features + +* add support for table clones ([#1235](https://github.com/googleapis/python-bigquery/issues/1235)) ([176fb2a](https://github.com/googleapis/python-bigquery/commit/176fb2afc9888c6b0cd74d590065b3002bdbf533)) + + +### Bug Fixes + +* **deps:** proto-plus >= 1.15.0, <2.0.0dev ([ba58d3a](https://github.com/googleapis/python-bigquery/commit/ba58d3af80ca796be09c813529d3aadb79e0413c)) +* **deps:** require packaging >= 14.3, <22.0.0dev ([ba58d3a](https://github.com/googleapis/python-bigquery/commit/ba58d3af80ca796be09c813529d3aadb79e0413c)) +* **deps:** require protobuf>= 3.12.0, <4.0.0dev ([#1263](https://github.com/googleapis/python-bigquery/issues/1263)) ([ba58d3a](https://github.com/googleapis/python-bigquery/commit/ba58d3af80ca796be09c813529d3aadb79e0413c)) + + +### Documentation + +* fix changelog header to consistent size ([#1268](https://github.com/googleapis/python-bigquery/issues/1268)) ([d03e2a2](https://github.com/googleapis/python-bigquery/commit/d03e2a29ecfa5d2ccd5599f5c0faac55286e52e7)) + ## [3.1.0](https://github.com/googleapis/python-bigquery/compare/v3.0.1...v3.1.0) (2022-05-09) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/version.py b/packages/google-cloud-bigquery/google/cloud/bigquery/version.py index 6ce498ba5542..c24ca23d6861 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/version.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/version.py @@ -12,4 +12,4 @@ # See the License for the specific language governing permissions and # limitations under the License. -__version__ = "3.1.0" +__version__ = "3.2.0" From bd60473434289ee59bb40b8b77fc3b86921139be Mon Sep 17 00:00:00 2001 From: Steffany Brown <30247553+steffnay@users.noreply.github.com> Date: Fri, 17 Jun 2022 11:20:43 -0700 Subject: [PATCH 1465/2016] feat: add destination_expiration_time property to copy job (#1277) * feat: add destination_expiration_time property to copy job * update test * refactor test * remove unused import * Update google/cloud/bigquery/job/copy_.py Co-authored-by: Anthonios Partheniou * Update google/cloud/bigquery/job/copy_.py Co-authored-by: Anthonios Partheniou Co-authored-by: Anthonios Partheniou --- .../google/cloud/bigquery/job/copy_.py | 14 ++++++++++++++ .../tests/system/test_client.py | 5 +++++ .../tests/unit/job/test_copy.py | 10 ++++++++++ 3 files changed, 29 insertions(+) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/job/copy_.py b/packages/google-cloud-bigquery/google/cloud/bigquery/job/copy_.py index eb7f609a50d1..9d7548ec5f46 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/job/copy_.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/job/copy_.py @@ -126,6 +126,20 @@ def operation_type(self, value: Optional[str]): value = OperationType.OPERATION_TYPE_UNSPECIFIED self._set_sub_prop("operationType", value) + @property + def destination_expiration_time(self) -> str: + """google.cloud.bigquery.job.DestinationExpirationTime: The time when the + destination table expires. Expired tables will be deleted and their storage reclaimed. + + See + https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationTableCopy.FIELDS.destination_expiration_time + """ + return self._get_sub_prop("destinationExpirationTime") + + @destination_expiration_time.setter + def destination_expiration_time(self, value: str): + self._set_sub_prop("destinationExpirationTime", value) + class CopyJob(_AsyncJob): """Asynchronous job: copy data into a table from other tables. diff --git a/packages/google-cloud-bigquery/tests/system/test_client.py b/packages/google-cloud-bigquery/tests/system/test_client.py index 49eb70a8b7b4..c99ee1c72dd1 100644 --- a/packages/google-cloud-bigquery/tests/system/test_client.py +++ b/packages/google-cloud-bigquery/tests/system/test_client.py @@ -2153,6 +2153,11 @@ def test_table_snapshots(dataset_id): copy_config = CopyJobConfig() copy_config.operation_type = OperationType.SNAPSHOT + today = datetime.date.today() + destination_expiration_time = f"{today.year + 1}-01-01T00:00:00Z" + + copy_config.destination_expiration_time = destination_expiration_time + copy_job = client.copy_table( sources=source_table_path, destination=snapshot_table_path, diff --git a/packages/google-cloud-bigquery/tests/unit/job/test_copy.py b/packages/google-cloud-bigquery/tests/unit/job/test_copy.py index d94e5bc884e1..a3b5c70e37e5 100644 --- a/packages/google-cloud-bigquery/tests/unit/job/test_copy.py +++ b/packages/google-cloud-bigquery/tests/unit/job/test_copy.py @@ -19,6 +19,8 @@ from .helpers import _Base from .helpers import _make_client +import datetime + class TestCopyJobConfig(_Base): JOB_TYPE = "copy" @@ -36,6 +38,7 @@ def test_ctor_defaults(self): assert config.create_disposition is None assert config.write_disposition is None + assert config.destination_expiration_time is None assert config.destination_encryption_configuration is None assert config.operation_type == OperationType.OPERATION_TYPE_UNSPECIFIED @@ -48,15 +51,22 @@ def test_ctor_w_properties(self): write_disposition = WriteDisposition.WRITE_TRUNCATE snapshot_operation = OperationType.SNAPSHOT + today = datetime.date.today() + destination_expiration_time = f"{today.year + 1}-01-01T00:00:00Z" + config = self._get_target_class()( create_disposition=create_disposition, write_disposition=write_disposition, operation_type=snapshot_operation, + destination_expiration_time=destination_expiration_time, ) self.assertEqual(config.create_disposition, create_disposition) self.assertEqual(config.write_disposition, write_disposition) self.assertEqual(config.operation_type, snapshot_operation) + self.assertEqual( + config.destination_expiration_time, destination_expiration_time + ) def test_to_api_repr_with_encryption(self): from google.cloud.bigquery.encryption_configuration import ( From e6a6bca246a98f8536a53d258f6de47baca3f4d8 Mon Sep 17 00:00:00 2001 From: Steffany Brown <30247553+steffnay@users.noreply.github.com> Date: Fri, 17 Jun 2022 14:11:32 -0700 Subject: [PATCH 1466/2016] docs(samples): add table snapshot sample (#1274) * docs(samples): add table snapshot sample * docs(samples): fix region tag --- .../samples/snippets/conftest.py | 15 ++++++- .../samples/snippets/create_table_snapshot.py | 43 +++++++++++++++++++ .../snippets/create_table_snapshot_test.py | 33 ++++++++++++++ 3 files changed, 90 insertions(+), 1 deletion(-) create mode 100644 packages/google-cloud-bigquery/samples/snippets/create_table_snapshot.py create mode 100644 packages/google-cloud-bigquery/samples/snippets/create_table_snapshot_test.py diff --git a/packages/google-cloud-bigquery/samples/snippets/conftest.py b/packages/google-cloud-bigquery/samples/snippets/conftest.py index 37b52256bd95..f53509d44cdb 100644 --- a/packages/google-cloud-bigquery/samples/snippets/conftest.py +++ b/packages/google-cloud-bigquery/samples/snippets/conftest.py @@ -18,7 +18,6 @@ import pytest import test_utils.prefixer - prefixer = test_utils.prefixer.Prefixer("python-bigquery", "samples/snippets") @@ -52,6 +51,20 @@ def dataset_id(bigquery_client: bigquery.Client, project_id: str) -> Iterator[st bigquery_client.delete_dataset(dataset, delete_contents=True, not_found_ok=True) +@pytest.fixture +def table_id( + bigquery_client: bigquery.Client, project_id: str, dataset_id: str +) -> Iterator[str]: + table_id = prefixer.create_prefix() + full_table_id = f"{project_id}.{dataset_id}.{table_id}" + table = bigquery.Table( + full_table_id, schema=[bigquery.SchemaField("string_col", "STRING")] + ) + bigquery_client.create_table(table) + yield full_table_id + bigquery_client.delete_table(table, not_found_ok=True) + + @pytest.fixture(scope="session") def entity_id(bigquery_client: bigquery.Client, dataset_id: str) -> str: return "cloud-developer-relations@google.com" diff --git a/packages/google-cloud-bigquery/samples/snippets/create_table_snapshot.py b/packages/google-cloud-bigquery/samples/snippets/create_table_snapshot.py new file mode 100644 index 000000000000..846495e5cfe9 --- /dev/null +++ b/packages/google-cloud-bigquery/samples/snippets/create_table_snapshot.py @@ -0,0 +1,43 @@ +# Copyright 2022 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +def create_table_snapshot(source_table_id: str, snapshot_table_id: str) -> None: + original_source_table_id = source_table_id + original_snapshot_table_id = snapshot_table_id + # [START bigquery_create_table_snapshot] + from google.cloud import bigquery + + # TODO(developer): Set table_id to the ID of the table to create. + source_table_id = "your-project.your_dataset.your_table_name" + snapshot_table_id = "your-project.your_dataset.snapshot_table_name" + # [END bigquery_create_table_snapshot] + source_table_id = original_source_table_id + snapshot_table_id = original_snapshot_table_id + # [START bigquery_create_table_snapshot] + + # Construct a BigQuery client object. + client = bigquery.Client() + copy_config = bigquery.CopyJobConfig() + copy_config.operation_type = bigquery.OperationType.SNAPSHOT + + copy_job = client.copy_table( + sources=source_table_id, + destination=snapshot_table_id, + job_config=copy_config, + ) + copy_job.result() + + print("Created table snapshot {}".format(snapshot_table_id)) + # [END bigquery_create_table_snapshot] diff --git a/packages/google-cloud-bigquery/samples/snippets/create_table_snapshot_test.py b/packages/google-cloud-bigquery/samples/snippets/create_table_snapshot_test.py new file mode 100644 index 000000000000..f1d8d0f7b43b --- /dev/null +++ b/packages/google-cloud-bigquery/samples/snippets/create_table_snapshot_test.py @@ -0,0 +1,33 @@ +# Copyright 2022 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import typing + +import create_table_snapshot + +if typing.TYPE_CHECKING: + import pytest + + +def test_create_table_snapshot( + capsys: "pytest.CaptureFixture[str]", + table_id: str, + random_table_id: str, +) -> None: + + create_table_snapshot.create_table_snapshot(table_id, random_table_id) + + out, _ = capsys.readouterr() + + assert "Created table snapshot {}".format(random_table_id) in out From 40891f9151a5b543b7465ee116465b525035e97e Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Mon, 27 Jun 2022 14:39:26 -0500 Subject: [PATCH 1467/2016] doc: share design document for query retry logic (#1123) * doc: share design document for query retry logic * add design document to contents tree * clarify a few points * Update docs/design/query-retries.md Co-authored-by: Dan Lee <71398022+dandhlee@users.noreply.github.com> Co-authored-by: Dan Lee <71398022+dandhlee@users.noreply.github.com> --- .../docs/design/index.rst | 11 ++ .../docs/design/query-retries.md | 115 ++++++++++++++++++ packages/google-cloud-bigquery/docs/index.rst | 1 + 3 files changed, 127 insertions(+) create mode 100644 packages/google-cloud-bigquery/docs/design/index.rst create mode 100644 packages/google-cloud-bigquery/docs/design/query-retries.md diff --git a/packages/google-cloud-bigquery/docs/design/index.rst b/packages/google-cloud-bigquery/docs/design/index.rst new file mode 100644 index 000000000000..5750c7a9890e --- /dev/null +++ b/packages/google-cloud-bigquery/docs/design/index.rst @@ -0,0 +1,11 @@ +Client Library Design +===================== + +Some features of this client library have complex requirements and/or +implementation. These documents describe the design decisions that contributued +to those features. + +.. toctree:: + :maxdepth: 2 + + query-retries diff --git a/packages/google-cloud-bigquery/docs/design/query-retries.md b/packages/google-cloud-bigquery/docs/design/query-retries.md new file mode 100644 index 000000000000..1bac82f5c282 --- /dev/null +++ b/packages/google-cloud-bigquery/docs/design/query-retries.md @@ -0,0 +1,115 @@ +# Design of query retries in the BigQuery client libraries for Python + + +## Overview + +The BigQuery client libraries for Python must safely retry API requests related to initiating a query. By "safely", it is meant that the BigQuery backend never successfully executes the query twice. This avoids duplicated rows from INSERT DML queries, among other problems. + +To achieve this goal, the client library only retries an API request relating to queries if at least one of the following is true: (1) issuing this exact request is idempotent, meaning that it won't result in a duplicate query being issued, or (2) the query has already failed in such a way that it is safe to re-issue the query. + + +## Background + + +### API-level retries + +Retries for nearly all API requests were [added in 2017](https://github.com/googleapis/google-cloud-python/pull/4148) and are [configurable via a Retry object](https://googleapis.dev/python/google-api-core/latest/retry.html#google.api_core.retry.Retry) passed to the retry argument. Notably, this includes the "query" method on the Python client, corresponding to the [jobs.insert REST API method](https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs/insert). The Python client always populates the [jobReference.jobId](https://cloud.google.com/bigquery/docs/reference/rest/v2/JobReference#FIELDS.job_id) field of the request body. If the BigQuery REST API receives a jobs.insert request for a job with the same ID, the REST API fails because the job already exists. + + +### jobs.insert and jobs.query API requests + +By default, the Python client starts a query using the [jobs.insert REST API +method](https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs/insert). +Support for the [jobs.query REST API +method](https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs/query) +was [added via the `api_method` +parameter](https://github.com/googleapis/python-bigquery/pull/967) and is +included in version 3.0 of the Python client library. + +The jobs.query REST API method differs from jobs.insert in that it does not accept a job ID. Instead, the [requestId parameter](https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs/query#QueryRequest.FIELDS.request_id) provides a window of idempotency for duplicate requests. + + +### Re-issuing a query + +The ability to re-issue a query automatically was a [long](https://github.com/googleapis/google-cloud-python/issues/5555) [requested](https://github.com/googleapis/python-bigquery/issues/14) [feature](https://github.com/googleapis/python-bigquery/issues/539). As work ramped up on the SQLAlchemy connector, it became clear that this feature was necessary to keep the test suite, which issues hundreds of queries, from being [too flakey](https://github.com/googleapis/python-bigquery-sqlalchemy/issues?q=is%3Aissue+is%3Aclosed+author%3Aapp%2Fflaky-bot+sort%3Acreated-asc). + +Retrying a query is not as simple as retrying a single API request. In many +cases the client library does not "know" about a query job failure until it +tries to fetch the query results. To solve this, the [client re-issues a +query](https://github.com/googleapis/python-bigquery/pull/837) as it was +originally issued only if the query job has failed for a retryable reason. + + +### getQueryResults error behavior + +The client library uses [the jobs.getQueryResults REST API method](https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs/getQueryResults) to wait for a query to finish. This REST API has a unique behavior in that it translates query job failures into HTTP error status codes. To disambiguate these error responses from one that may have occurred further up the REST API stack (such as from the Google load balancer), the client library inspects the error response body. + +When the error corresponds to a query job failure, BigQuery populates the +"errors" array field, with the first element in the list corresponding to the +error which directly caused the job failure. There are many [error response +messages](https://cloud.google.com/bigquery/docs/error-messages), but only some +of them indicate that re-issuing the query job may help. For example, if the +job fails due to invalid query syntax, re-issuing the query won't help. If a +query job fails due to "backendError" or "rateLimitExceeded", we know that the +job did not successfully execute for some other reason. + + +## Detailed design + +As mentioned in the "Overview" section, the Python client only retries a query request if at least one of the following is true: (1) issuing this exact request is idempotent, meaning that it won't result in a duplicate query being issued, or (2) the query has already failed in such a way that it is safe to re-issue the query. + +A developer can configure when to retry an API request (corresponding to #1 "issuing this exact request is idempotent") via the query method's `retry` parameter. A developer can configure when to re-issue a query job after a job failure (corresponding to #2 "the query has already failed") via the query method's `job_retry` parameter. + + +### Retrying API requests via the `retry` parameter + +The first set of retries are at the API layer. The client library sends an +identical request if the request is idempotent. + +#### Retrying the jobs.insert API via the retry parameter + +When the `api_method` parameter is set to `"INSERT"`, which is the default +value, the client library uses the jobs.insert REST API to start a query job. +Before it issues this request, it sets a job ID. This job ID remains constant +across API retries. + +If the job ID was randomly generated, and the jobs.insert request and all retries fail, the client library sends a request to the jobs.get API. This covers the case when a query request succeeded, but there was a transient issue that prevented the client from receiving a successful response. + + +#### Retrying the jobs.query API via the retry parameter + +When the `api_method` parameter is set to `"QUERY"` (available in version 3 of +the client library), the client library sends a request to the jobs.query REST +API. The client library automatically populates the `requestId` parameter in +the request body. The `requestId` remains constant across API retries, ensuring +that requests are idempotent. + +As there is no job ID available, the client library cannot call jobs.get if the query happened to succeed, but all retries resulted in an error response. In this case, the client library throws an exception. + + +#### Retrying the jobs.getQueryResults API via the retry parameter + +The jobs.getQueryResults REST API is read-only. Thus, it is always safe to +retry. As noted in the "Background" section, HTTP error response codes can +indicate that the job itself has failed, so this may retry more often than is +strictly needed +([Issue #1122](https://github.com/googleapis/python-bigquery/issues/1122) +has been opened to investigate this). + + +### Re-issuing queries via the `job_retry` parameter + +The first set of retries are at the "job" layer, called "re-issue" in this +document. The client library sends an identical query request (except for the +job or request identifier) if the query job has failed for a re-issuable reason. + + +#### Deciding when it is safe to re-issue a query + +The conditions when it is safe to re-issue a query are different from the conditions when it is safe to retry an individual API request. As such, the `job_retry` parameter is provided to configure this behavior. + +The `job_retry` parameter is only used if (1) a query job fails and (2) a job ID is not provided by the developer. This is because it must generate a new job ID (or request ID, depending on the method used to create the query job) to avoid getting the same failed job. + +The `job_retry` parameter logic only happens after the client makes a request to the `jobs.getQueryRequest` REST API, which fails. The client examines the exception to determine if this failure was caused by a failed job and that the failure reason (e.g. "backendError" or "rateLimitExceeded") indicates that re-issuing the query may help. + +If it is determined that the query job can be re-issued safely, the original logic to issue the query is executed. If the jobs.insert REST API was originally used, a new job ID is generated. Otherwise, if the jobs.query REST API was originally used, a new request ID is generated. All other parts of the request body remain identical to the original request body for the failed query job, and the process repeats until `job_retry` is exhausted. diff --git a/packages/google-cloud-bigquery/docs/index.rst b/packages/google-cloud-bigquery/docs/index.rst index 4ab0a298dc84..500c67a7fb4c 100644 --- a/packages/google-cloud-bigquery/docs/index.rst +++ b/packages/google-cloud-bigquery/docs/index.rst @@ -26,6 +26,7 @@ API Reference reference dbapi + design/index Migration Guide --------------- From 2adfbaab59fccee20518cb17391d3ea2ce19626a Mon Sep 17 00:00:00 2001 From: "gcf-owl-bot[bot]" <78513119+gcf-owl-bot[bot]@users.noreply.github.com> Date: Sat, 9 Jul 2022 13:46:56 -0400 Subject: [PATCH 1468/2016] fix: require python 3.7+ (#1284) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * chore(python): drop python 3.6 Source-Link: https://github.com/googleapis/synthtool/commit/4f89b13af10d086458f9b379e56a614f9d6dab7b Post-Processor: gcr.io/cloud-devrel-public-resources/owlbot-python:latest@sha256:e7bb19d47c13839fe8c147e50e02e8b6cf5da8edd1af8b82208cd6f66cc2829c * add api_description to .repo-metadata.json * require python 3.7+ in setup.py * remove python 3.6 sample configs * 🦉 Updates from OwlBot post-processor See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md * exclude templated README.rst * update python_requires * remove python 3.6 from noxfile.py Co-authored-by: Owl Bot Co-authored-by: Anthonios Partheniou --- .../.github/.OwlBot.lock.yaml | 4 +- .../.kokoro/continuous/prerelease-deps.cfg | 7 ++++ .../.kokoro/presubmit/prerelease-deps.cfg | 7 ++++ .../.kokoro/samples/python3.6/common.cfg | 40 ------------------- .../.kokoro/samples/python3.6/continuous.cfg | 7 ---- .../samples/python3.6/periodic-head.cfg | 11 ----- .../.kokoro/samples/python3.6/periodic.cfg | 6 --- .../.kokoro/samples/python3.6/presubmit.cfg | 6 --- .../.kokoro/test-samples-impl.sh | 4 +- .../google-cloud-bigquery/.repo-metadata.json | 3 +- .../google-cloud-bigquery/CONTRIBUTING.rst | 6 +-- packages/google-cloud-bigquery/README.rst | 4 +- packages/google-cloud-bigquery/noxfile.py | 2 +- packages/google-cloud-bigquery/owlbot.py | 1 + .../samples/geography/noxfile.py | 2 +- .../samples/magics/noxfile.py | 2 +- .../samples/snippets/README.rst | 2 +- .../samples/snippets/noxfile.py | 2 +- .../templates/install_deps.tmpl.rst | 2 +- packages/google-cloud-bigquery/setup.py | 3 +- 20 files changed, 32 insertions(+), 89 deletions(-) create mode 100644 packages/google-cloud-bigquery/.kokoro/continuous/prerelease-deps.cfg create mode 100644 packages/google-cloud-bigquery/.kokoro/presubmit/prerelease-deps.cfg delete mode 100644 packages/google-cloud-bigquery/.kokoro/samples/python3.6/common.cfg delete mode 100644 packages/google-cloud-bigquery/.kokoro/samples/python3.6/continuous.cfg delete mode 100644 packages/google-cloud-bigquery/.kokoro/samples/python3.6/periodic-head.cfg delete mode 100644 packages/google-cloud-bigquery/.kokoro/samples/python3.6/periodic.cfg delete mode 100644 packages/google-cloud-bigquery/.kokoro/samples/python3.6/presubmit.cfg diff --git a/packages/google-cloud-bigquery/.github/.OwlBot.lock.yaml b/packages/google-cloud-bigquery/.github/.OwlBot.lock.yaml index 757c9dca75ad..1ce608523524 100644 --- a/packages/google-cloud-bigquery/.github/.OwlBot.lock.yaml +++ b/packages/google-cloud-bigquery/.github/.OwlBot.lock.yaml @@ -13,5 +13,5 @@ # limitations under the License. docker: image: gcr.io/cloud-devrel-public-resources/owlbot-python:latest - digest: sha256:81ed5ecdfc7cac5b699ba4537376f3563f6f04122c4ec9e735d3b3dc1d43dd32 -# created: 2022-05-05T22:08:23.383410683Z + digest: sha256:e7bb19d47c13839fe8c147e50e02e8b6cf5da8edd1af8b82208cd6f66cc2829c +# created: 2022-07-05T18:31:20.838186805Z diff --git a/packages/google-cloud-bigquery/.kokoro/continuous/prerelease-deps.cfg b/packages/google-cloud-bigquery/.kokoro/continuous/prerelease-deps.cfg new file mode 100644 index 000000000000..3595fb43f5c0 --- /dev/null +++ b/packages/google-cloud-bigquery/.kokoro/continuous/prerelease-deps.cfg @@ -0,0 +1,7 @@ +# Format: //devtools/kokoro/config/proto/build.proto + +# Only run this nox session. +env_vars: { + key: "NOX_SESSION" + value: "prerelease_deps" +} diff --git a/packages/google-cloud-bigquery/.kokoro/presubmit/prerelease-deps.cfg b/packages/google-cloud-bigquery/.kokoro/presubmit/prerelease-deps.cfg new file mode 100644 index 000000000000..3595fb43f5c0 --- /dev/null +++ b/packages/google-cloud-bigquery/.kokoro/presubmit/prerelease-deps.cfg @@ -0,0 +1,7 @@ +# Format: //devtools/kokoro/config/proto/build.proto + +# Only run this nox session. +env_vars: { + key: "NOX_SESSION" + value: "prerelease_deps" +} diff --git a/packages/google-cloud-bigquery/.kokoro/samples/python3.6/common.cfg b/packages/google-cloud-bigquery/.kokoro/samples/python3.6/common.cfg deleted file mode 100644 index 20f6b9691d91..000000000000 --- a/packages/google-cloud-bigquery/.kokoro/samples/python3.6/common.cfg +++ /dev/null @@ -1,40 +0,0 @@ -# Format: //devtools/kokoro/config/proto/build.proto - -# Build logs will be here -action { - define_artifacts { - regex: "**/*sponge_log.xml" - } -} - -# Specify which tests to run -env_vars: { - key: "RUN_TESTS_SESSION" - value: "py-3.6" -} - -# Declare build specific Cloud project. -env_vars: { - key: "BUILD_SPECIFIC_GCLOUD_PROJECT" - value: "python-docs-samples-tests-py36" -} - -env_vars: { - key: "TRAMPOLINE_BUILD_FILE" - value: "github/python-bigquery/.kokoro/test-samples.sh" -} - -# Configure the docker image for kokoro-trampoline. -env_vars: { - key: "TRAMPOLINE_IMAGE" - value: "gcr.io/cloud-devrel-kokoro-resources/python-samples-testing-docker" -} - -# Download secrets for samples -gfile_resources: "/bigstore/cloud-devrel-kokoro-resources/python-docs-samples" - -# Download trampoline resources. -gfile_resources: "/bigstore/cloud-devrel-kokoro-resources/trampoline" - -# Use the trampoline script to run in docker. -build_file: "python-bigquery/.kokoro/trampoline_v2.sh" \ No newline at end of file diff --git a/packages/google-cloud-bigquery/.kokoro/samples/python3.6/continuous.cfg b/packages/google-cloud-bigquery/.kokoro/samples/python3.6/continuous.cfg deleted file mode 100644 index 7218af1499e5..000000000000 --- a/packages/google-cloud-bigquery/.kokoro/samples/python3.6/continuous.cfg +++ /dev/null @@ -1,7 +0,0 @@ -# Format: //devtools/kokoro/config/proto/build.proto - -env_vars: { - key: "INSTALL_LIBRARY_FROM_SOURCE" - value: "True" -} - diff --git a/packages/google-cloud-bigquery/.kokoro/samples/python3.6/periodic-head.cfg b/packages/google-cloud-bigquery/.kokoro/samples/python3.6/periodic-head.cfg deleted file mode 100644 index 5aa01bab5bf3..000000000000 --- a/packages/google-cloud-bigquery/.kokoro/samples/python3.6/periodic-head.cfg +++ /dev/null @@ -1,11 +0,0 @@ -# Format: //devtools/kokoro/config/proto/build.proto - -env_vars: { - key: "INSTALL_LIBRARY_FROM_SOURCE" - value: "True" -} - -env_vars: { - key: "TRAMPOLINE_BUILD_FILE" - value: "github/python-bigquery/.kokoro/test-samples-against-head.sh" -} diff --git a/packages/google-cloud-bigquery/.kokoro/samples/python3.6/periodic.cfg b/packages/google-cloud-bigquery/.kokoro/samples/python3.6/periodic.cfg deleted file mode 100644 index 71cd1e597e38..000000000000 --- a/packages/google-cloud-bigquery/.kokoro/samples/python3.6/periodic.cfg +++ /dev/null @@ -1,6 +0,0 @@ -# Format: //devtools/kokoro/config/proto/build.proto - -env_vars: { - key: "INSTALL_LIBRARY_FROM_SOURCE" - value: "False" -} diff --git a/packages/google-cloud-bigquery/.kokoro/samples/python3.6/presubmit.cfg b/packages/google-cloud-bigquery/.kokoro/samples/python3.6/presubmit.cfg deleted file mode 100644 index a1c8d9759c88..000000000000 --- a/packages/google-cloud-bigquery/.kokoro/samples/python3.6/presubmit.cfg +++ /dev/null @@ -1,6 +0,0 @@ -# Format: //devtools/kokoro/config/proto/build.proto - -env_vars: { - key: "INSTALL_LIBRARY_FROM_SOURCE" - value: "True" -} \ No newline at end of file diff --git a/packages/google-cloud-bigquery/.kokoro/test-samples-impl.sh b/packages/google-cloud-bigquery/.kokoro/test-samples-impl.sh index 8a324c9c7bc6..2c6500cae0b9 100755 --- a/packages/google-cloud-bigquery/.kokoro/test-samples-impl.sh +++ b/packages/google-cloud-bigquery/.kokoro/test-samples-impl.sh @@ -33,7 +33,7 @@ export PYTHONUNBUFFERED=1 env | grep KOKORO # Install nox -python3.6 -m pip install --upgrade --quiet nox +python3.9 -m pip install --upgrade --quiet nox # Use secrets acessor service account to get secrets if [[ -f "${KOKORO_GFILE_DIR}/secrets_viewer_service_account.json" ]]; then @@ -76,7 +76,7 @@ for file in samples/**/requirements.txt; do echo "------------------------------------------------------------" # Use nox to execute the tests for the project. - python3.6 -m nox -s "$RUN_TESTS_SESSION" + python3.9 -m nox -s "$RUN_TESTS_SESSION" EXIT=$? # If this is a periodic build, send the test log to the FlakyBot. diff --git a/packages/google-cloud-bigquery/.repo-metadata.json b/packages/google-cloud-bigquery/.repo-metadata.json index 670aba79392b..d1be7ec4d9d2 100644 --- a/packages/google-cloud-bigquery/.repo-metadata.json +++ b/packages/google-cloud-bigquery/.repo-metadata.json @@ -13,5 +13,6 @@ "requires_billing": false, "default_version": "v2", "codeowner_team": "@googleapis/api-bigquery", - "api_shortname": "bigquery" + "api_shortname": "bigquery", + "api_description": "is a fully managed, NoOps, low cost data analytics service.\nData can be streamed into BigQuery at millions of rows per second to enable real-time analysis.\nWith BigQuery you can easily deploy Petabyte-scale Databases." } diff --git a/packages/google-cloud-bigquery/CONTRIBUTING.rst b/packages/google-cloud-bigquery/CONTRIBUTING.rst index f183b63b4969..d06598b310bc 100644 --- a/packages/google-cloud-bigquery/CONTRIBUTING.rst +++ b/packages/google-cloud-bigquery/CONTRIBUTING.rst @@ -22,7 +22,7 @@ In order to add a feature: documentation. - The feature must work fully on the following CPython versions: - 3.6, 3.7, 3.8, 3.9 and 3.10 on both UNIX and Windows. + 3.7, 3.8, 3.9 and 3.10 on both UNIX and Windows. - The feature must not add unnecessary dependencies (where "unnecessary" is of course subjective, but new dependencies should @@ -221,13 +221,11 @@ Supported Python Versions We support: -- `Python 3.6`_ - `Python 3.7`_ - `Python 3.8`_ - `Python 3.9`_ - `Python 3.10`_ -.. _Python 3.6: https://docs.python.org/3.6/ .. _Python 3.7: https://docs.python.org/3.7/ .. _Python 3.8: https://docs.python.org/3.8/ .. _Python 3.9: https://docs.python.org/3.9/ @@ -239,7 +237,7 @@ Supported versions can be found in our ``noxfile.py`` `config`_. .. _config: https://github.com/googleapis/python-bigquery/blob/main/noxfile.py -We also explicitly decided to support Python 3 beginning with version 3.6. +We also explicitly decided to support Python 3 beginning with version 3.7. Reasons for this include: - Encouraging use of newest versions of Python 3 diff --git a/packages/google-cloud-bigquery/README.rst b/packages/google-cloud-bigquery/README.rst index e8578916a272..475d055a272f 100644 --- a/packages/google-cloud-bigquery/README.rst +++ b/packages/google-cloud-bigquery/README.rst @@ -52,11 +52,11 @@ dependencies. Supported Python Versions ^^^^^^^^^^^^^^^^^^^^^^^^^ -Python >= 3.6, < 3.11 +Python >= 3.7, < 3.11 Unsupported Python Versions ^^^^^^^^^^^^^^^^^^^^^^^^^^^ -Python == 2.7, Python == 3.5. +Python == 2.7, Python == 3.5, Python == 3.6. The last version of this library compatible with Python 2.7 and 3.5 is `google-cloud-bigquery==1.28.0`. diff --git a/packages/google-cloud-bigquery/noxfile.py b/packages/google-cloud-bigquery/noxfile.py index f088e10c228c..c6f7c76b1d89 100644 --- a/packages/google-cloud-bigquery/noxfile.py +++ b/packages/google-cloud-bigquery/noxfile.py @@ -29,7 +29,7 @@ DEFAULT_PYTHON_VERSION = "3.8" SYSTEM_TEST_PYTHON_VERSIONS = ["3.8", "3.10"] -UNIT_TEST_PYTHON_VERSIONS = ["3.6", "3.7", "3.8", "3.9", "3.10"] +UNIT_TEST_PYTHON_VERSIONS = ["3.7", "3.8", "3.9", "3.10"] CURRENT_DIRECTORY = pathlib.Path(__file__).parent.absolute() # 'docfx' is excluded since it only needs to run in 'docs-presubmit' diff --git a/packages/google-cloud-bigquery/owlbot.py b/packages/google-cloud-bigquery/owlbot.py index ca96f4e08145..4d287ac46bc5 100644 --- a/packages/google-cloud-bigquery/owlbot.py +++ b/packages/google-cloud-bigquery/owlbot.py @@ -68,6 +68,7 @@ # https://github.com/googleapis/python-bigquery/issues/191 ".kokoro/presubmit/presubmit.cfg", ".github/workflows", # exclude gh actions as credentials are needed for tests + "README.rst", ], ) diff --git a/packages/google-cloud-bigquery/samples/geography/noxfile.py b/packages/google-cloud-bigquery/samples/geography/noxfile.py index a40410b56369..29b5bc852183 100644 --- a/packages/google-cloud-bigquery/samples/geography/noxfile.py +++ b/packages/google-cloud-bigquery/samples/geography/noxfile.py @@ -89,7 +89,7 @@ def get_pytest_env_vars() -> Dict[str, str]: # DO NOT EDIT - automatically generated. # All versions used to test samples. -ALL_VERSIONS = ["3.6", "3.7", "3.8", "3.9", "3.10"] +ALL_VERSIONS = ["3.7", "3.8", "3.9", "3.10"] # Any default versions that should be ignored. IGNORED_VERSIONS = TEST_CONFIG["ignored_versions"] diff --git a/packages/google-cloud-bigquery/samples/magics/noxfile.py b/packages/google-cloud-bigquery/samples/magics/noxfile.py index a40410b56369..29b5bc852183 100644 --- a/packages/google-cloud-bigquery/samples/magics/noxfile.py +++ b/packages/google-cloud-bigquery/samples/magics/noxfile.py @@ -89,7 +89,7 @@ def get_pytest_env_vars() -> Dict[str, str]: # DO NOT EDIT - automatically generated. # All versions used to test samples. -ALL_VERSIONS = ["3.6", "3.7", "3.8", "3.9", "3.10"] +ALL_VERSIONS = ["3.7", "3.8", "3.9", "3.10"] # Any default versions that should be ignored. IGNORED_VERSIONS = TEST_CONFIG["ignored_versions"] diff --git a/packages/google-cloud-bigquery/samples/snippets/README.rst b/packages/google-cloud-bigquery/samples/snippets/README.rst index 05af1e812038..b5865a6ce9c1 100644 --- a/packages/google-cloud-bigquery/samples/snippets/README.rst +++ b/packages/google-cloud-bigquery/samples/snippets/README.rst @@ -47,7 +47,7 @@ Install Dependencies .. _Python Development Environment Setup Guide: https://cloud.google.com/python/setup -#. Create a virtualenv. Samples are compatible with Python 3.6+. +#. Create a virtualenv. Samples are compatible with Python 3.7+. .. code-block:: bash diff --git a/packages/google-cloud-bigquery/samples/snippets/noxfile.py b/packages/google-cloud-bigquery/samples/snippets/noxfile.py index a40410b56369..29b5bc852183 100644 --- a/packages/google-cloud-bigquery/samples/snippets/noxfile.py +++ b/packages/google-cloud-bigquery/samples/snippets/noxfile.py @@ -89,7 +89,7 @@ def get_pytest_env_vars() -> Dict[str, str]: # DO NOT EDIT - automatically generated. # All versions used to test samples. -ALL_VERSIONS = ["3.6", "3.7", "3.8", "3.9", "3.10"] +ALL_VERSIONS = ["3.7", "3.8", "3.9", "3.10"] # Any default versions that should be ignored. IGNORED_VERSIONS = TEST_CONFIG["ignored_versions"] diff --git a/packages/google-cloud-bigquery/scripts/readme-gen/templates/install_deps.tmpl.rst b/packages/google-cloud-bigquery/scripts/readme-gen/templates/install_deps.tmpl.rst index 275d649890d7..6f069c6c87a5 100644 --- a/packages/google-cloud-bigquery/scripts/readme-gen/templates/install_deps.tmpl.rst +++ b/packages/google-cloud-bigquery/scripts/readme-gen/templates/install_deps.tmpl.rst @@ -12,7 +12,7 @@ Install Dependencies .. _Python Development Environment Setup Guide: https://cloud.google.com/python/setup -#. Create a virtualenv. Samples are compatible with Python 3.6+. +#. Create a virtualenv. Samples are compatible with Python 3.7+. .. code-block:: bash diff --git a/packages/google-cloud-bigquery/setup.py b/packages/google-cloud-bigquery/setup.py index a040e96e75d0..a3d5c829ed34 100644 --- a/packages/google-cloud-bigquery/setup.py +++ b/packages/google-cloud-bigquery/setup.py @@ -111,7 +111,6 @@ "License :: OSI Approved :: Apache Software License", "Programming Language :: Python", "Programming Language :: Python :: 3", - "Programming Language :: Python :: 3.6", "Programming Language :: Python :: 3.7", "Programming Language :: Python :: 3.8", "Programming Language :: Python :: 3.9", @@ -124,7 +123,7 @@ namespace_packages=namespaces, install_requires=dependencies, extras_require=extras, - python_requires=">=3.6, <3.11", + python_requires=">=3.7, <3.11", include_package_data=True, zip_safe=False, ) From 9dfa8d106e14cca4a4127418a4772eba98b315ad Mon Sep 17 00:00:00 2001 From: "Leah E. Cole" <6719667+leahecole@users.noreply.github.com> Date: Tue, 19 Jul 2022 12:49:22 -0400 Subject: [PATCH 1469/2016] docs(samples): explicitly add bq to samples reqs, upgrade grpc to fix bug on m1 (#1290) * fix: explicitly add bq to samples reqs, upgrade grpc to fix bug on m1 * update grpc in setup.py * fix: rm 3.6 constraints, add grpcio to 3.7-3.10 constraints --- .../samples/snippets/requirements.txt | 3 ++- packages/google-cloud-bigquery/setup.py | 2 +- .../testing/constraints-3.10.txt | 1 + .../testing/constraints-3.6.txt | 27 ------------------- .../testing/constraints-3.7.txt | 2 +- .../testing/constraints-3.8.txt | 1 + .../testing/constraints-3.9.txt | 1 + 7 files changed, 7 insertions(+), 30 deletions(-) delete mode 100644 packages/google-cloud-bigquery/testing/constraints-3.6.txt diff --git a/packages/google-cloud-bigquery/samples/snippets/requirements.txt b/packages/google-cloud-bigquery/samples/snippets/requirements.txt index ad64565e2f51..a8322de0a349 100644 --- a/packages/google-cloud-bigquery/samples/snippets/requirements.txt +++ b/packages/google-cloud-bigquery/samples/snippets/requirements.txt @@ -1,7 +1,8 @@ db-dtypes==1.0.1 +google-cloud-bigquery==3.2.0 google-cloud-bigquery-storage==2.13.1 google-auth-oauthlib==0.5.1 -grpcio==1.46.3 +grpcio==1.47.0 ipython===7.31.1; python_version == '3.7' ipython===8.0.1; python_version == '3.8' ipython==8.3.0; python_version >= '3.9' diff --git a/packages/google-cloud-bigquery/setup.py b/packages/google-cloud-bigquery/setup.py index a3d5c829ed34..f811a47b1fe3 100644 --- a/packages/google-cloud-bigquery/setup.py +++ b/packages/google-cloud-bigquery/setup.py @@ -29,7 +29,7 @@ # 'Development Status :: 5 - Production/Stable' release_status = "Development Status :: 5 - Production/Stable" dependencies = [ - "grpcio >= 1.38.1, < 2.0dev", # https://github.com/googleapis/python-bigquery/issues/695 + "grpcio >= 1.47.0, < 2.0dev", # https://github.com/googleapis/python-bigquery/issues/1262 # NOTE: Maintainers, please do not require google-api-core>=2.x.x # Until this issue is closed # https://github.com/googleapis/google-cloud-python/issues/10566 diff --git a/packages/google-cloud-bigquery/testing/constraints-3.10.txt b/packages/google-cloud-bigquery/testing/constraints-3.10.txt index e69de29bb2d1..c5e37fc9b2dc 100644 --- a/packages/google-cloud-bigquery/testing/constraints-3.10.txt +++ b/packages/google-cloud-bigquery/testing/constraints-3.10.txt @@ -0,0 +1 @@ +grpcio==1.47.0 diff --git a/packages/google-cloud-bigquery/testing/constraints-3.6.txt b/packages/google-cloud-bigquery/testing/constraints-3.6.txt deleted file mode 100644 index 47b842a6d6ba..000000000000 --- a/packages/google-cloud-bigquery/testing/constraints-3.6.txt +++ /dev/null @@ -1,27 +0,0 @@ -# This constraints file is used to check that lower bounds -# are correct in setup.py -# List *all* library dependencies and extras in this file. -# Pin the version to the lower bound. -# -# e.g., if setup.py has "foo >= 1.14.0, < 2.0.0dev", -# Then this file should have foo==1.14.0 -db-dtypes==0.3.0 -geopandas==0.9.0 -google-api-core==1.31.5 -google-cloud-bigquery-storage==2.0.0 -google-cloud-core==1.4.1 -google-resumable-media==0.6.0 -grpcio==1.38.1 -ipython==7.0.1 -opentelemetry-api==1.1.0 -opentelemetry-instrumentation==0.20b0 -opentelemetry-sdk==1.1.0 -pandas==1.0.0 -proto-plus==1.15.0 -protobuf==3.12.0 -pyarrow==3.0.0 -python-dateutil==2.7.2 -requests==2.18.0 -Shapely==1.6.0 -six==1.13.0 -tqdm==4.7.4 diff --git a/packages/google-cloud-bigquery/testing/constraints-3.7.txt b/packages/google-cloud-bigquery/testing/constraints-3.7.txt index e3c7a332c5d6..c5803387e7c0 100644 --- a/packages/google-cloud-bigquery/testing/constraints-3.7.txt +++ b/packages/google-cloud-bigquery/testing/constraints-3.7.txt @@ -11,7 +11,7 @@ google-api-core==1.31.5 google-cloud-bigquery-storage==2.0.0 google-cloud-core==1.4.1 google-resumable-media==0.6.0 -grpcio==1.38.1 +grpcio==1.47.0 ipython==7.0.1 opentelemetry-api==1.1.0 opentelemetry-instrumentation==0.20b0 diff --git a/packages/google-cloud-bigquery/testing/constraints-3.8.txt b/packages/google-cloud-bigquery/testing/constraints-3.8.txt index 3fd8886e64d1..e5e73c5c7e2d 100644 --- a/packages/google-cloud-bigquery/testing/constraints-3.8.txt +++ b/packages/google-cloud-bigquery/testing/constraints-3.8.txt @@ -1 +1,2 @@ +grpcio==1.47.0 pandas==1.2.0 diff --git a/packages/google-cloud-bigquery/testing/constraints-3.9.txt b/packages/google-cloud-bigquery/testing/constraints-3.9.txt index 39dc6250ef66..d4c302867578 100644 --- a/packages/google-cloud-bigquery/testing/constraints-3.9.txt +++ b/packages/google-cloud-bigquery/testing/constraints-3.9.txt @@ -4,4 +4,5 @@ # # NOTE: Not comprehensive yet, will eventually be maintained semi-automatically by # the renovate bot. +grpcio==1.47.0 pyarrow>=4.0.0 From e909f6fc9765bba20a872144512b341281a70f35 Mon Sep 17 00:00:00 2001 From: Chalmer Lowe Date: Mon, 25 Jul 2022 13:57:12 -0400 Subject: [PATCH 1470/2016] chore: updates minor grammatical error (#1299) * Updates minor grammatical error * chore: update grammar/spelling --- packages/google-cloud-bigquery/google/cloud/bigquery/client.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py index fb772ea113e5..1200d78f9e0d 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py @@ -3230,7 +3230,7 @@ def query( will be ignored if a ``job_id`` is also given. location (Optional[str]): Location where to run the job. Must match the location of the - any table used in the query as well as the destination table. + table used in the query as well as the destination table. project (Optional[str]): Project ID of the project of where to run the job. Defaults to the client's project. From 1e7bfaa7468b1f0ee6b928d2f7f1842801a1a267 Mon Sep 17 00:00:00 2001 From: "release-please[bot]" <55107282+release-please[bot]@users.noreply.github.com> Date: Tue, 26 Jul 2022 13:06:19 -0700 Subject: [PATCH 1471/2016] chore(main): release 3.3.0 (#1279) Co-authored-by: release-please[bot] <55107282+release-please[bot]@users.noreply.github.com> --- packages/google-cloud-bigquery/CHANGELOG.md | 18 ++++++++++++++++++ .../google/cloud/bigquery/version.py | 2 +- 2 files changed, 19 insertions(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/CHANGELOG.md b/packages/google-cloud-bigquery/CHANGELOG.md index 1f999aa108a5..c7214ea167b7 100644 --- a/packages/google-cloud-bigquery/CHANGELOG.md +++ b/packages/google-cloud-bigquery/CHANGELOG.md @@ -5,6 +5,24 @@ [1]: https://pypi.org/project/google-cloud-bigquery/#history +## [3.3.0](https://github.com/googleapis/python-bigquery/compare/v3.2.0...v3.3.0) (2022-07-25) + + +### Features + +* add destination_expiration_time property to copy job ([#1277](https://github.com/googleapis/python-bigquery/issues/1277)) ([728b07c](https://github.com/googleapis/python-bigquery/commit/728b07c9177532bbbbfd1890f23e98950aea3f02)) + + +### Bug Fixes + +* require python 3.7+ ([#1284](https://github.com/googleapis/python-bigquery/issues/1284)) ([52d9f14](https://github.com/googleapis/python-bigquery/commit/52d9f14fb1d183f64a62fee1fddc0bf576a0a3e9)) + + +### Documentation + +* **samples:** add table snapshot sample ([#1274](https://github.com/googleapis/python-bigquery/issues/1274)) ([e760d1b](https://github.com/googleapis/python-bigquery/commit/e760d1bcb76561b4247adde2fd06ae0b686befb9)) +* **samples:** explicitly add bq to samples reqs, upgrade grpc to fix bug on m1 ([#1290](https://github.com/googleapis/python-bigquery/issues/1290)) ([9b7e3e4](https://github.com/googleapis/python-bigquery/commit/9b7e3e424cbd08af8b08c91e6397a3f1b7811064)) + ## [3.2.0](https://github.com/googleapis/python-bigquery/compare/v3.1.0...v3.2.0) (2022-06-06) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/version.py b/packages/google-cloud-bigquery/google/cloud/bigquery/version.py index c24ca23d6861..2279c3674ff0 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/version.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/version.py @@ -12,4 +12,4 @@ # See the License for the specific language governing permissions and # limitations under the License. -__version__ = "3.2.0" +__version__ = "3.3.0" From a56f406a93564858174059bd90409388f3f73a65 Mon Sep 17 00:00:00 2001 From: WhiteSource Renovate Date: Tue, 2 Aug 2022 17:06:11 +0200 Subject: [PATCH 1472/2016] chore(deps): update all dependencies (#1301) * chore(deps): update all dependencies * remove protobuf * revert * pin geopandas for python 3.7 Co-authored-by: Anthonios Partheniou --- .../samples/geography/requirements.txt | 42 +++++++++---------- .../samples/magics/requirements-test.txt | 2 +- .../samples/magics/requirements.txt | 14 +++---- .../samples/snippets/requirements-test.txt | 2 +- .../samples/snippets/requirements.txt | 14 +++---- 5 files changed, 37 insertions(+), 37 deletions(-) diff --git a/packages/google-cloud-bigquery/samples/geography/requirements.txt b/packages/google-cloud-bigquery/samples/geography/requirements.txt index cbd7c4e5dcc2..1ce52d17936b 100644 --- a/packages/google-cloud-bigquery/samples/geography/requirements.txt +++ b/packages/google-cloud-bigquery/samples/geography/requirements.txt @@ -1,33 +1,33 @@ -attrs==21.4.0 -certifi==2022.5.18.1 -cffi==1.15.0 -charset-normalizer==2.0.12 +attrs==22.1.0 +certifi==2022.6.15 +cffi==1.15.1 +charset-normalizer==2.1.0 click==8.1.3 click-plugins==1.1.1 cligj==0.7.2 dataclasses==0.8; python_version < '3.7' -db-dtypes==1.0.1 +db-dtypes==1.0.2 Fiona==1.8.21 geojson==2.5.0 -geopandas==0.10.2 -google-api-core==2.8.1 -google-auth==2.6.6 -google-cloud-bigquery==3.1.0 -google-cloud-bigquery-storage==2.13.1 -google-cloud-core==2.3.0 +geopandas===0.10.2; python_version == '3.7' +geopandas==0.11.1; python_version >= '3.8' +google-api-core==2.8.2 +google-auth==2.9.1 +google-cloud-bigquery==3.3.0 +google-cloud-bigquery-storage==2.14.1 +google-cloud-core==2.3.2 google-crc32c==1.3.0 google-resumable-media==2.3.3 -googleapis-common-protos==1.56.2 -grpcio==1.46.3 +googleapis-common-protos==1.56.4 +grpcio==1.47.0 idna==3.3 -libcst==0.4.3 +libcst==0.4.7 munch==2.5.0 mypy-extensions==0.4.3 packaging==21.3 pandas===1.3.5; python_version == '3.7' -pandas==1.4.2; python_version >= '3.8' -proto-plus==1.20.5 -protobuf==3.20.1 +pandas==1.4.3; python_version >= '3.8' +proto-plus==1.20.6 pyarrow==8.0.0 pyasn1==0.4.8 pyasn1-modules==0.2.8 @@ -36,10 +36,10 @@ pyparsing==3.0.9 python-dateutil==2.8.2 pytz==2022.1 PyYAML==6.0 -requests==2.27.1 -rsa==4.8 +requests==2.28.1 +rsa==4.9 Shapely==1.8.2 six==1.16.0 -typing-extensions==4.2.0 +typing-extensions==4.3.0 typing-inspect==0.7.1 -urllib3==1.26.9 +urllib3==1.26.11 diff --git a/packages/google-cloud-bigquery/samples/magics/requirements-test.txt b/packages/google-cloud-bigquery/samples/magics/requirements-test.txt index d771b647d1b1..856751fc17bc 100644 --- a/packages/google-cloud-bigquery/samples/magics/requirements-test.txt +++ b/packages/google-cloud-bigquery/samples/magics/requirements-test.txt @@ -1,3 +1,3 @@ -google-cloud-testutils==1.3.1 +google-cloud-testutils==1.3.3 pytest==7.1.2 mock==4.0.3 diff --git a/packages/google-cloud-bigquery/samples/magics/requirements.txt b/packages/google-cloud-bigquery/samples/magics/requirements.txt index ad64565e2f51..6ebe55a61f40 100644 --- a/packages/google-cloud-bigquery/samples/magics/requirements.txt +++ b/packages/google-cloud-bigquery/samples/magics/requirements.txt @@ -1,13 +1,13 @@ -db-dtypes==1.0.1 -google-cloud-bigquery-storage==2.13.1 -google-auth-oauthlib==0.5.1 -grpcio==1.46.3 +db-dtypes==1.0.2 +google-cloud-bigquery-storage==2.14.1 +google-auth-oauthlib==0.5.2 +grpcio==1.47.0 ipython===7.31.1; python_version == '3.7' ipython===8.0.1; python_version == '3.8' -ipython==8.3.0; python_version >= '3.9' +ipython==8.4.0; python_version >= '3.9' matplotlib==3.5.2 pandas===1.3.5; python_version == '3.7' -pandas==1.4.2; python_version >= '3.8' +pandas==1.4.3; python_version >= '3.8' pyarrow==8.0.0 pytz==2022.1 -typing-extensions==4.2.0 +typing-extensions==4.3.0 diff --git a/packages/google-cloud-bigquery/samples/snippets/requirements-test.txt b/packages/google-cloud-bigquery/samples/snippets/requirements-test.txt index d771b647d1b1..856751fc17bc 100644 --- a/packages/google-cloud-bigquery/samples/snippets/requirements-test.txt +++ b/packages/google-cloud-bigquery/samples/snippets/requirements-test.txt @@ -1,3 +1,3 @@ -google-cloud-testutils==1.3.1 +google-cloud-testutils==1.3.3 pytest==7.1.2 mock==4.0.3 diff --git a/packages/google-cloud-bigquery/samples/snippets/requirements.txt b/packages/google-cloud-bigquery/samples/snippets/requirements.txt index a8322de0a349..90c494e05e8d 100644 --- a/packages/google-cloud-bigquery/samples/snippets/requirements.txt +++ b/packages/google-cloud-bigquery/samples/snippets/requirements.txt @@ -1,14 +1,14 @@ -db-dtypes==1.0.1 -google-cloud-bigquery==3.2.0 -google-cloud-bigquery-storage==2.13.1 -google-auth-oauthlib==0.5.1 +db-dtypes==1.0.2 +google-cloud-bigquery==3.3.0 +google-cloud-bigquery-storage==2.14.1 +google-auth-oauthlib==0.5.2 grpcio==1.47.0 ipython===7.31.1; python_version == '3.7' ipython===8.0.1; python_version == '3.8' -ipython==8.3.0; python_version >= '3.9' +ipython==8.4.0; python_version >= '3.9' matplotlib==3.5.2 pandas===1.3.5; python_version == '3.7' -pandas==1.4.2; python_version >= '3.8' +pandas==1.4.3; python_version >= '3.8' pyarrow==8.0.0 pytz==2022.1 -typing-extensions==4.2.0 +typing-extensions==4.3.0 From bc73d6b4a244c389a6811be3dfd480876b9c6b32 Mon Sep 17 00:00:00 2001 From: WhiteSource Renovate Date: Tue, 9 Aug 2022 03:53:19 +0200 Subject: [PATCH 1473/2016] fix(deps): allow pyarrow < 10 (#1304) * chore(deps): update all dependencies * revert * fix(deps): allow pyarrow < 10 * chore: update dependency db-dtypes==1.0.3 * revert Co-authored-by: Anthonios Partheniou --- packages/google-cloud-bigquery/setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/setup.py b/packages/google-cloud-bigquery/setup.py index f811a47b1fe3..4260cdf4ff11 100644 --- a/packages/google-cloud-bigquery/setup.py +++ b/packages/google-cloud-bigquery/setup.py @@ -44,7 +44,7 @@ "packaging >= 14.3, <22.0.0dev", "protobuf >= 3.12.0, <4.0.0dev", # For the legacy proto-based types. "python-dateutil >= 2.7.2, <3.0dev", - "pyarrow >= 3.0.0, < 9.0dev", + "pyarrow >= 3.0.0, < 10.0dev", "requests >= 2.18.0, < 3.0.0dev", ] extras = { From bb6513f318d2150e06d2cf9fb22ef6350e5883a8 Mon Sep 17 00:00:00 2001 From: "release-please[bot]" <55107282+release-please[bot]@users.noreply.github.com> Date: Tue, 9 Aug 2022 06:07:08 -0400 Subject: [PATCH 1474/2016] chore(main): release 3.3.1 (#1306) Co-authored-by: release-please[bot] <55107282+release-please[bot]@users.noreply.github.com> --- packages/google-cloud-bigquery/CHANGELOG.md | 7 +++++++ .../google-cloud-bigquery/google/cloud/bigquery/version.py | 2 +- 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/CHANGELOG.md b/packages/google-cloud-bigquery/CHANGELOG.md index c7214ea167b7..635a08e1f3cb 100644 --- a/packages/google-cloud-bigquery/CHANGELOG.md +++ b/packages/google-cloud-bigquery/CHANGELOG.md @@ -5,6 +5,13 @@ [1]: https://pypi.org/project/google-cloud-bigquery/#history +## [3.3.1](https://github.com/googleapis/python-bigquery/compare/v3.3.0...v3.3.1) (2022-08-09) + + +### Bug Fixes + +* **deps:** allow pyarrow < 10 ([#1304](https://github.com/googleapis/python-bigquery/issues/1304)) ([13616a9](https://github.com/googleapis/python-bigquery/commit/13616a910ba2e9b7bc3595847229b56e70c99f84)) + ## [3.3.0](https://github.com/googleapis/python-bigquery/compare/v3.2.0...v3.3.0) (2022-07-25) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/version.py b/packages/google-cloud-bigquery/google/cloud/bigquery/version.py index 2279c3674ff0..a5a77c35d2d7 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/version.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/version.py @@ -12,4 +12,4 @@ # See the License for the specific language governing permissions and # limitations under the License. -__version__ = "3.3.0" +__version__ = "3.3.1" From ddbd327a4a13ad3458ad1ac742afc37fb4b960c7 Mon Sep 17 00:00:00 2001 From: WhiteSource Renovate Date: Tue, 9 Aug 2022 12:52:32 +0200 Subject: [PATCH 1475/2016] chore(deps): update all dependencies (#1305) * chore(deps): update all dependencies * chore: update dependency google-cloud-bigquery==3.3.1 Co-authored-by: Anthonios Partheniou --- .../samples/geography/requirements.txt | 8 ++++---- .../google-cloud-bigquery/samples/magics/requirements.txt | 4 ++-- .../samples/snippets/requirements.txt | 6 +++--- 3 files changed, 9 insertions(+), 9 deletions(-) diff --git a/packages/google-cloud-bigquery/samples/geography/requirements.txt b/packages/google-cloud-bigquery/samples/geography/requirements.txt index 1ce52d17936b..634e7d2e1845 100644 --- a/packages/google-cloud-bigquery/samples/geography/requirements.txt +++ b/packages/google-cloud-bigquery/samples/geography/requirements.txt @@ -6,14 +6,14 @@ click==8.1.3 click-plugins==1.1.1 cligj==0.7.2 dataclasses==0.8; python_version < '3.7' -db-dtypes==1.0.2 +db-dtypes==1.0.3 Fiona==1.8.21 geojson==2.5.0 geopandas===0.10.2; python_version == '3.7' geopandas==0.11.1; python_version >= '3.8' google-api-core==2.8.2 -google-auth==2.9.1 -google-cloud-bigquery==3.3.0 +google-auth==2.10.0 +google-cloud-bigquery==3.3.1 google-cloud-bigquery-storage==2.14.1 google-cloud-core==2.3.2 google-crc32c==1.3.0 @@ -28,7 +28,7 @@ packaging==21.3 pandas===1.3.5; python_version == '3.7' pandas==1.4.3; python_version >= '3.8' proto-plus==1.20.6 -pyarrow==8.0.0 +pyarrow==9.0.0 pyasn1==0.4.8 pyasn1-modules==0.2.8 pycparser==2.21 diff --git a/packages/google-cloud-bigquery/samples/magics/requirements.txt b/packages/google-cloud-bigquery/samples/magics/requirements.txt index 6ebe55a61f40..8de9f8f68ade 100644 --- a/packages/google-cloud-bigquery/samples/magics/requirements.txt +++ b/packages/google-cloud-bigquery/samples/magics/requirements.txt @@ -1,4 +1,4 @@ -db-dtypes==1.0.2 +db-dtypes==1.0.3 google-cloud-bigquery-storage==2.14.1 google-auth-oauthlib==0.5.2 grpcio==1.47.0 @@ -8,6 +8,6 @@ ipython==8.4.0; python_version >= '3.9' matplotlib==3.5.2 pandas===1.3.5; python_version == '3.7' pandas==1.4.3; python_version >= '3.8' -pyarrow==8.0.0 +pyarrow==9.0.0 pytz==2022.1 typing-extensions==4.3.0 diff --git a/packages/google-cloud-bigquery/samples/snippets/requirements.txt b/packages/google-cloud-bigquery/samples/snippets/requirements.txt index 90c494e05e8d..8664d2418dad 100644 --- a/packages/google-cloud-bigquery/samples/snippets/requirements.txt +++ b/packages/google-cloud-bigquery/samples/snippets/requirements.txt @@ -1,5 +1,5 @@ -db-dtypes==1.0.2 -google-cloud-bigquery==3.3.0 +db-dtypes==1.0.3 +google-cloud-bigquery==3.3.1 google-cloud-bigquery-storage==2.14.1 google-auth-oauthlib==0.5.2 grpcio==1.47.0 @@ -9,6 +9,6 @@ ipython==8.4.0; python_version >= '3.9' matplotlib==3.5.2 pandas===1.3.5; python_version == '3.7' pandas==1.4.3; python_version >= '3.8' -pyarrow==8.0.0 +pyarrow==9.0.0 pytz==2022.1 typing-extensions==4.3.0 From a3c1662696c517c3d2a72d730c7eb8b29bb1e1fa Mon Sep 17 00:00:00 2001 From: WhiteSource Renovate Date: Fri, 12 Aug 2022 13:14:37 +0200 Subject: [PATCH 1476/2016] chore(deps): update all dependencies (#1308) * chore(deps): update all dependencies * revert Co-authored-by: Anthonios Partheniou --- .../google-cloud-bigquery/samples/geography/requirements.txt | 2 +- packages/google-cloud-bigquery/samples/magics/requirements.txt | 2 +- .../google-cloud-bigquery/samples/snippets/requirements.txt | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/packages/google-cloud-bigquery/samples/geography/requirements.txt b/packages/google-cloud-bigquery/samples/geography/requirements.txt index 634e7d2e1845..3573f2494ad2 100644 --- a/packages/google-cloud-bigquery/samples/geography/requirements.txt +++ b/packages/google-cloud-bigquery/samples/geography/requirements.txt @@ -27,7 +27,7 @@ mypy-extensions==0.4.3 packaging==21.3 pandas===1.3.5; python_version == '3.7' pandas==1.4.3; python_version >= '3.8' -proto-plus==1.20.6 +proto-plus==1.22.0 pyarrow==9.0.0 pyasn1==0.4.8 pyasn1-modules==0.2.8 diff --git a/packages/google-cloud-bigquery/samples/magics/requirements.txt b/packages/google-cloud-bigquery/samples/magics/requirements.txt index 8de9f8f68ade..2bf0ceab48e4 100644 --- a/packages/google-cloud-bigquery/samples/magics/requirements.txt +++ b/packages/google-cloud-bigquery/samples/magics/requirements.txt @@ -5,7 +5,7 @@ grpcio==1.47.0 ipython===7.31.1; python_version == '3.7' ipython===8.0.1; python_version == '3.8' ipython==8.4.0; python_version >= '3.9' -matplotlib==3.5.2 +matplotlib==3.5.3 pandas===1.3.5; python_version == '3.7' pandas==1.4.3; python_version >= '3.8' pyarrow==9.0.0 diff --git a/packages/google-cloud-bigquery/samples/snippets/requirements.txt b/packages/google-cloud-bigquery/samples/snippets/requirements.txt index 8664d2418dad..e5969ceeb9c2 100644 --- a/packages/google-cloud-bigquery/samples/snippets/requirements.txt +++ b/packages/google-cloud-bigquery/samples/snippets/requirements.txt @@ -6,7 +6,7 @@ grpcio==1.47.0 ipython===7.31.1; python_version == '3.7' ipython===8.0.1; python_version == '3.8' ipython==8.4.0; python_version >= '3.9' -matplotlib==3.5.2 +matplotlib==3.5.3 pandas===1.3.5; python_version == '3.7' pandas==1.4.3; python_version >= '3.8' pyarrow==9.0.0 From aec12679759cbe534c1e7776f949be4f72f19c1c Mon Sep 17 00:00:00 2001 From: WhiteSource Renovate Date: Fri, 12 Aug 2022 18:13:12 +0200 Subject: [PATCH 1477/2016] chore(deps): update all dependencies (#1313) * chore(deps): update all dependencies * revert Co-authored-by: Anthonios Partheniou --- .../google-cloud-bigquery/samples/geography/requirements.txt | 2 +- packages/google-cloud-bigquery/samples/magics/requirements.txt | 2 +- .../google-cloud-bigquery/samples/snippets/requirements.txt | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/packages/google-cloud-bigquery/samples/geography/requirements.txt b/packages/google-cloud-bigquery/samples/geography/requirements.txt index 3573f2494ad2..a4f332cef358 100644 --- a/packages/google-cloud-bigquery/samples/geography/requirements.txt +++ b/packages/google-cloud-bigquery/samples/geography/requirements.txt @@ -34,7 +34,7 @@ pyasn1-modules==0.2.8 pycparser==2.21 pyparsing==3.0.9 python-dateutil==2.8.2 -pytz==2022.1 +pytz==2022.2 PyYAML==6.0 requests==2.28.1 rsa==4.9 diff --git a/packages/google-cloud-bigquery/samples/magics/requirements.txt b/packages/google-cloud-bigquery/samples/magics/requirements.txt index 2bf0ceab48e4..e93e1e75520f 100644 --- a/packages/google-cloud-bigquery/samples/magics/requirements.txt +++ b/packages/google-cloud-bigquery/samples/magics/requirements.txt @@ -9,5 +9,5 @@ matplotlib==3.5.3 pandas===1.3.5; python_version == '3.7' pandas==1.4.3; python_version >= '3.8' pyarrow==9.0.0 -pytz==2022.1 +pytz==2022.2 typing-extensions==4.3.0 diff --git a/packages/google-cloud-bigquery/samples/snippets/requirements.txt b/packages/google-cloud-bigquery/samples/snippets/requirements.txt index e5969ceeb9c2..d4f89f500a20 100644 --- a/packages/google-cloud-bigquery/samples/snippets/requirements.txt +++ b/packages/google-cloud-bigquery/samples/snippets/requirements.txt @@ -10,5 +10,5 @@ matplotlib==3.5.3 pandas===1.3.5; python_version == '3.7' pandas==1.4.3; python_version >= '3.8' pyarrow==9.0.0 -pytz==2022.1 +pytz==2022.2 typing-extensions==4.3.0 From fa706f2da180b1844a9a8940a66255934515d5d4 Mon Sep 17 00:00:00 2001 From: Anthonios Partheniou Date: Fri, 12 Aug 2022 13:50:10 -0400 Subject: [PATCH 1478/2016] fix(deps): require protobuf >=3.19, < 5.0.0 (#1311) fix(deps): require proto-plus >= 1.22.0 --- packages/google-cloud-bigquery/setup.py | 4 ++-- packages/google-cloud-bigquery/testing/constraints-3.7.txt | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/packages/google-cloud-bigquery/setup.py b/packages/google-cloud-bigquery/setup.py index 4260cdf4ff11..d8f2bb22605a 100644 --- a/packages/google-cloud-bigquery/setup.py +++ b/packages/google-cloud-bigquery/setup.py @@ -35,14 +35,14 @@ # https://github.com/googleapis/google-cloud-python/issues/10566 "google-api-core[grpc] >= 1.31.5, <3.0.0dev,!=2.0.*,!=2.1.*,!=2.2.*,!=2.3.0", "google-cloud-bigquery-storage >= 2.0.0, <3.0.0dev", - "proto-plus >= 1.15.0, <2.0.0dev", + "proto-plus >= 1.22.0, <2.0.0dev", # NOTE: Maintainers, please do not require google-cloud-core>=2.x.x # Until this issue is closed # https://github.com/googleapis/google-cloud-python/issues/10566 "google-cloud-core >= 1.4.1, <3.0.0dev", "google-resumable-media >= 0.6.0, < 3.0dev", "packaging >= 14.3, <22.0.0dev", - "protobuf >= 3.12.0, <4.0.0dev", # For the legacy proto-based types. + "protobuf >= 3.19.0, <5.0.0dev", # For the legacy proto-based types. "python-dateutil >= 2.7.2, <3.0dev", "pyarrow >= 3.0.0, < 10.0dev", "requests >= 2.18.0, < 3.0.0dev", diff --git a/packages/google-cloud-bigquery/testing/constraints-3.7.txt b/packages/google-cloud-bigquery/testing/constraints-3.7.txt index c5803387e7c0..67313f6b80d3 100644 --- a/packages/google-cloud-bigquery/testing/constraints-3.7.txt +++ b/packages/google-cloud-bigquery/testing/constraints-3.7.txt @@ -17,8 +17,8 @@ opentelemetry-api==1.1.0 opentelemetry-instrumentation==0.20b0 opentelemetry-sdk==1.1.0 pandas==1.1.0 -proto-plus==1.15.0 -protobuf==3.12.0 +proto-plus==1.22.0 +protobuf==3.19.0 pyarrow==3.0.0 python-dateutil==2.7.3 requests==2.18.0 From ddbc87716b05254d1afb066332e03f67a20e8a94 Mon Sep 17 00:00:00 2001 From: WhiteSource Renovate Date: Mon, 15 Aug 2022 15:54:51 +0200 Subject: [PATCH 1479/2016] chore(deps): update dependency pytz to v2022.2.1 (#1316) --- .../google-cloud-bigquery/samples/geography/requirements.txt | 2 +- packages/google-cloud-bigquery/samples/magics/requirements.txt | 2 +- .../google-cloud-bigquery/samples/snippets/requirements.txt | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/packages/google-cloud-bigquery/samples/geography/requirements.txt b/packages/google-cloud-bigquery/samples/geography/requirements.txt index a4f332cef358..5c366fa4d3cc 100644 --- a/packages/google-cloud-bigquery/samples/geography/requirements.txt +++ b/packages/google-cloud-bigquery/samples/geography/requirements.txt @@ -34,7 +34,7 @@ pyasn1-modules==0.2.8 pycparser==2.21 pyparsing==3.0.9 python-dateutil==2.8.2 -pytz==2022.2 +pytz==2022.2.1 PyYAML==6.0 requests==2.28.1 rsa==4.9 diff --git a/packages/google-cloud-bigquery/samples/magics/requirements.txt b/packages/google-cloud-bigquery/samples/magics/requirements.txt index e93e1e75520f..8bef948d0bda 100644 --- a/packages/google-cloud-bigquery/samples/magics/requirements.txt +++ b/packages/google-cloud-bigquery/samples/magics/requirements.txt @@ -9,5 +9,5 @@ matplotlib==3.5.3 pandas===1.3.5; python_version == '3.7' pandas==1.4.3; python_version >= '3.8' pyarrow==9.0.0 -pytz==2022.2 +pytz==2022.2.1 typing-extensions==4.3.0 diff --git a/packages/google-cloud-bigquery/samples/snippets/requirements.txt b/packages/google-cloud-bigquery/samples/snippets/requirements.txt index d4f89f500a20..20a7f913ffe5 100644 --- a/packages/google-cloud-bigquery/samples/snippets/requirements.txt +++ b/packages/google-cloud-bigquery/samples/snippets/requirements.txt @@ -10,5 +10,5 @@ matplotlib==3.5.3 pandas===1.3.5; python_version == '3.7' pandas==1.4.3; python_version >= '3.8' pyarrow==9.0.0 -pytz==2022.2 +pytz==2022.2.1 typing-extensions==4.3.0 From d0d555a24578f4f57ac6045a6daf940f3c050247 Mon Sep 17 00:00:00 2001 From: WhiteSource Renovate Date: Tue, 16 Aug 2022 17:02:05 +0200 Subject: [PATCH 1480/2016] chore(deps): update dependency google-cloud-bigquery-storage to v2.14.2 (#1317) --- .../google-cloud-bigquery/samples/geography/requirements.txt | 2 +- packages/google-cloud-bigquery/samples/magics/requirements.txt | 2 +- .../google-cloud-bigquery/samples/snippets/requirements.txt | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/packages/google-cloud-bigquery/samples/geography/requirements.txt b/packages/google-cloud-bigquery/samples/geography/requirements.txt index 5c366fa4d3cc..bf2da5f28963 100644 --- a/packages/google-cloud-bigquery/samples/geography/requirements.txt +++ b/packages/google-cloud-bigquery/samples/geography/requirements.txt @@ -14,7 +14,7 @@ geopandas==0.11.1; python_version >= '3.8' google-api-core==2.8.2 google-auth==2.10.0 google-cloud-bigquery==3.3.1 -google-cloud-bigquery-storage==2.14.1 +google-cloud-bigquery-storage==2.14.2 google-cloud-core==2.3.2 google-crc32c==1.3.0 google-resumable-media==2.3.3 diff --git a/packages/google-cloud-bigquery/samples/magics/requirements.txt b/packages/google-cloud-bigquery/samples/magics/requirements.txt index 8bef948d0bda..4c0a67a18a9f 100644 --- a/packages/google-cloud-bigquery/samples/magics/requirements.txt +++ b/packages/google-cloud-bigquery/samples/magics/requirements.txt @@ -1,5 +1,5 @@ db-dtypes==1.0.3 -google-cloud-bigquery-storage==2.14.1 +google-cloud-bigquery-storage==2.14.2 google-auth-oauthlib==0.5.2 grpcio==1.47.0 ipython===7.31.1; python_version == '3.7' diff --git a/packages/google-cloud-bigquery/samples/snippets/requirements.txt b/packages/google-cloud-bigquery/samples/snippets/requirements.txt index 20a7f913ffe5..5f57cb8b4864 100644 --- a/packages/google-cloud-bigquery/samples/snippets/requirements.txt +++ b/packages/google-cloud-bigquery/samples/snippets/requirements.txt @@ -1,6 +1,6 @@ db-dtypes==1.0.3 google-cloud-bigquery==3.3.1 -google-cloud-bigquery-storage==2.14.1 +google-cloud-bigquery-storage==2.14.2 google-auth-oauthlib==0.5.2 grpcio==1.47.0 ipython===7.31.1; python_version == '3.7' From 345d5dfcf4deaec6b8df5980655a945c1d2b1661 Mon Sep 17 00:00:00 2001 From: "release-please[bot]" <55107282+release-please[bot]@users.noreply.github.com> Date: Tue, 16 Aug 2022 14:13:59 -0400 Subject: [PATCH 1481/2016] chore(main): release 3.3.2 (#1314) Co-authored-by: release-please[bot] <55107282+release-please[bot]@users.noreply.github.com> --- packages/google-cloud-bigquery/CHANGELOG.md | 8 ++++++++ .../google/cloud/bigquery/version.py | 2 +- 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/CHANGELOG.md b/packages/google-cloud-bigquery/CHANGELOG.md index 635a08e1f3cb..6ba373179afa 100644 --- a/packages/google-cloud-bigquery/CHANGELOG.md +++ b/packages/google-cloud-bigquery/CHANGELOG.md @@ -5,6 +5,14 @@ [1]: https://pypi.org/project/google-cloud-bigquery/#history +## [3.3.2](https://github.com/googleapis/python-bigquery/compare/v3.3.1...v3.3.2) (2022-08-16) + + +### Bug Fixes + +* **deps:** require proto-plus >= 1.22.0 ([1de7a52](https://github.com/googleapis/python-bigquery/commit/1de7a52cb85d4876e4aa87346aff5725c8294c4e)) +* **deps:** require protobuf >=3.19, < 5.0.0 ([#1311](https://github.com/googleapis/python-bigquery/issues/1311)) ([1de7a52](https://github.com/googleapis/python-bigquery/commit/1de7a52cb85d4876e4aa87346aff5725c8294c4e)) + ## [3.3.1](https://github.com/googleapis/python-bigquery/compare/v3.3.0...v3.3.1) (2022-08-09) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/version.py b/packages/google-cloud-bigquery/google/cloud/bigquery/version.py index a5a77c35d2d7..eb307e8d6286 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/version.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/version.py @@ -12,4 +12,4 @@ # See the License for the specific language governing permissions and # limitations under the License. -__version__ = "3.3.1" +__version__ = "3.3.2" From 862bc1c811ddcf0fbd2360ae2a2dc98ee4852d75 Mon Sep 17 00:00:00 2001 From: WhiteSource Renovate Date: Wed, 17 Aug 2022 16:26:19 +0200 Subject: [PATCH 1482/2016] chore(deps): update dependency shapely to v1.8.3 (#1320) --- .../google-cloud-bigquery/samples/geography/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/samples/geography/requirements.txt b/packages/google-cloud-bigquery/samples/geography/requirements.txt index bf2da5f28963..60a04126f173 100644 --- a/packages/google-cloud-bigquery/samples/geography/requirements.txt +++ b/packages/google-cloud-bigquery/samples/geography/requirements.txt @@ -38,7 +38,7 @@ pytz==2022.2.1 PyYAML==6.0 requests==2.28.1 rsa==4.9 -Shapely==1.8.2 +Shapely==1.8.3 six==1.16.0 typing-extensions==4.3.0 typing-inspect==0.7.1 From c0a1874bf3b98d609e854ea22e89df37bf5cd50d Mon Sep 17 00:00:00 2001 From: WhiteSource Renovate Date: Fri, 19 Aug 2022 18:36:03 +0200 Subject: [PATCH 1483/2016] chore(deps): update all dependencies (#1322) --- .../samples/geography/requirements.txt | 6 +++--- .../google-cloud-bigquery/samples/snippets/requirements.txt | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/packages/google-cloud-bigquery/samples/geography/requirements.txt b/packages/google-cloud-bigquery/samples/geography/requirements.txt index 60a04126f173..f03b2eebd8fe 100644 --- a/packages/google-cloud-bigquery/samples/geography/requirements.txt +++ b/packages/google-cloud-bigquery/samples/geography/requirements.txt @@ -13,7 +13,7 @@ geopandas===0.10.2; python_version == '3.7' geopandas==0.11.1; python_version >= '3.8' google-api-core==2.8.2 google-auth==2.10.0 -google-cloud-bigquery==3.3.1 +google-cloud-bigquery==3.3.2 google-cloud-bigquery-storage==2.14.2 google-cloud-core==2.3.2 google-crc32c==1.3.0 @@ -38,8 +38,8 @@ pytz==2022.2.1 PyYAML==6.0 requests==2.28.1 rsa==4.9 -Shapely==1.8.3 +Shapely==1.8.4 six==1.16.0 typing-extensions==4.3.0 -typing-inspect==0.7.1 +typing-inspect==0.8.0 urllib3==1.26.11 diff --git a/packages/google-cloud-bigquery/samples/snippets/requirements.txt b/packages/google-cloud-bigquery/samples/snippets/requirements.txt index 5f57cb8b4864..bbef52e66b00 100644 --- a/packages/google-cloud-bigquery/samples/snippets/requirements.txt +++ b/packages/google-cloud-bigquery/samples/snippets/requirements.txt @@ -1,5 +1,5 @@ db-dtypes==1.0.3 -google-cloud-bigquery==3.3.1 +google-cloud-bigquery==3.3.2 google-cloud-bigquery-storage==2.14.2 google-auth-oauthlib==0.5.2 grpcio==1.47.0 From 6bbfcda95c41af053c325d97a924ef10ee965ecc Mon Sep 17 00:00:00 2001 From: WhiteSource Renovate Date: Tue, 23 Aug 2022 16:20:49 +0200 Subject: [PATCH 1484/2016] chore(deps): update all dependencies (#1323) --- .../samples/geography/requirements.txt | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/packages/google-cloud-bigquery/samples/geography/requirements.txt b/packages/google-cloud-bigquery/samples/geography/requirements.txt index f03b2eebd8fe..feca08cca2b1 100644 --- a/packages/google-cloud-bigquery/samples/geography/requirements.txt +++ b/packages/google-cloud-bigquery/samples/geography/requirements.txt @@ -1,7 +1,7 @@ attrs==22.1.0 certifi==2022.6.15 cffi==1.15.1 -charset-normalizer==2.1.0 +charset-normalizer==2.1.1 click==8.1.3 click-plugins==1.1.1 cligj==0.7.2 @@ -12,7 +12,7 @@ geojson==2.5.0 geopandas===0.10.2; python_version == '3.7' geopandas==0.11.1; python_version >= '3.8' google-api-core==2.8.2 -google-auth==2.10.0 +google-auth==2.11.0 google-cloud-bigquery==3.3.2 google-cloud-bigquery-storage==2.14.2 google-cloud-core==2.3.2 @@ -42,4 +42,4 @@ Shapely==1.8.4 six==1.16.0 typing-extensions==4.3.0 typing-inspect==0.8.0 -urllib3==1.26.11 +urllib3==1.26.12 From 3c009b65bc1a949006e2ecc5cb54efb90103098a Mon Sep 17 00:00:00 2001 From: "gcf-owl-bot[bot]" <78513119+gcf-owl-bot[bot]@users.noreply.github.com> Date: Wed, 24 Aug 2022 19:16:09 -0400 Subject: [PATCH 1485/2016] chore: remove 'pip install' statements from python_library templates [autoapprove] (#1325) Source-Link: https://github.com/googleapis/synthtool/commit/69fabaee9eca28af7ecaa02c86895e606fbbebd6 Post-Processor: gcr.io/cloud-devrel-public-resources/owlbot-python:latest@sha256:562802bfac02e012a6ac34eda282f81d06e77326b82a32d7bbb1369ff552b387 Co-authored-by: Owl Bot --- .../.github/.OwlBot.lock.yaml | 4 +- .../.kokoro/publish-docs.sh | 4 +- .../google-cloud-bigquery/.kokoro/release.sh | 5 +- .../.kokoro/requirements.in | 8 + .../.kokoro/requirements.txt | 464 ++++++++++++++++++ packages/google-cloud-bigquery/renovate.json | 2 +- 6 files changed, 477 insertions(+), 10 deletions(-) create mode 100644 packages/google-cloud-bigquery/.kokoro/requirements.in create mode 100644 packages/google-cloud-bigquery/.kokoro/requirements.txt diff --git a/packages/google-cloud-bigquery/.github/.OwlBot.lock.yaml b/packages/google-cloud-bigquery/.github/.OwlBot.lock.yaml index 1ce608523524..c6acdf3f90c4 100644 --- a/packages/google-cloud-bigquery/.github/.OwlBot.lock.yaml +++ b/packages/google-cloud-bigquery/.github/.OwlBot.lock.yaml @@ -13,5 +13,5 @@ # limitations under the License. docker: image: gcr.io/cloud-devrel-public-resources/owlbot-python:latest - digest: sha256:e7bb19d47c13839fe8c147e50e02e8b6cf5da8edd1af8b82208cd6f66cc2829c -# created: 2022-07-05T18:31:20.838186805Z + digest: sha256:562802bfac02e012a6ac34eda282f81d06e77326b82a32d7bbb1369ff552b387 +# created: 2022-08-24T17:07:22.006876712Z diff --git a/packages/google-cloud-bigquery/.kokoro/publish-docs.sh b/packages/google-cloud-bigquery/.kokoro/publish-docs.sh index 8acb14e802b0..1c4d62370042 100755 --- a/packages/google-cloud-bigquery/.kokoro/publish-docs.sh +++ b/packages/google-cloud-bigquery/.kokoro/publish-docs.sh @@ -21,14 +21,12 @@ export PYTHONUNBUFFERED=1 export PATH="${HOME}/.local/bin:${PATH}" # Install nox -python3 -m pip install --user --upgrade --quiet nox +python3 -m pip install --require-hashes -r .kokoro/requirements.txt python3 -m nox --version # build docs nox -s docs -python3 -m pip install --user gcp-docuploader - # create metadata python3 -m docuploader create-metadata \ --name=$(jq --raw-output '.name // empty' .repo-metadata.json) \ diff --git a/packages/google-cloud-bigquery/.kokoro/release.sh b/packages/google-cloud-bigquery/.kokoro/release.sh index b030caeefa5a..879f9ef84fa3 100755 --- a/packages/google-cloud-bigquery/.kokoro/release.sh +++ b/packages/google-cloud-bigquery/.kokoro/release.sh @@ -16,12 +16,9 @@ set -eo pipefail # Start the releasetool reporter -python3 -m pip install gcp-releasetool +python3 -m pip install --require-hashes -r .kokoro/requirements.txt python3 -m releasetool publish-reporter-script > /tmp/publisher-script; source /tmp/publisher-script -# Ensure that we have the latest versions of Twine, Wheel, and Setuptools. -python3 -m pip install --upgrade twine wheel setuptools - # Disable buffering, so that the logs stream through. export PYTHONUNBUFFERED=1 diff --git a/packages/google-cloud-bigquery/.kokoro/requirements.in b/packages/google-cloud-bigquery/.kokoro/requirements.in new file mode 100644 index 000000000000..7718391a34d7 --- /dev/null +++ b/packages/google-cloud-bigquery/.kokoro/requirements.in @@ -0,0 +1,8 @@ +gcp-docuploader +gcp-releasetool +importlib-metadata +typing-extensions +twine +wheel +setuptools +nox \ No newline at end of file diff --git a/packages/google-cloud-bigquery/.kokoro/requirements.txt b/packages/google-cloud-bigquery/.kokoro/requirements.txt new file mode 100644 index 000000000000..c4b824f247e3 --- /dev/null +++ b/packages/google-cloud-bigquery/.kokoro/requirements.txt @@ -0,0 +1,464 @@ +# +# This file is autogenerated by pip-compile with python 3.10 +# To update, run: +# +# pip-compile --allow-unsafe --generate-hashes requirements.in +# +argcomplete==2.0.0 \ + --hash=sha256:6372ad78c89d662035101418ae253668445b391755cfe94ea52f1b9d22425b20 \ + --hash=sha256:cffa11ea77999bb0dd27bb25ff6dc142a6796142f68d45b1a26b11f58724561e + # via nox +attrs==22.1.0 \ + --hash=sha256:29adc2665447e5191d0e7c568fde78b21f9672d344281d0c6e1ab085429b22b6 \ + --hash=sha256:86efa402f67bf2df34f51a335487cf46b1ec130d02b8d39fd248abfd30da551c + # via gcp-releasetool +bleach==5.0.1 \ + --hash=sha256:085f7f33c15bd408dd9b17a4ad77c577db66d76203e5984b1bd59baeee948b2a \ + --hash=sha256:0d03255c47eb9bd2f26aa9bb7f2107732e7e8fe195ca2f64709fcf3b0a4a085c + # via readme-renderer +cachetools==5.2.0 \ + --hash=sha256:6a94c6402995a99c3970cc7e4884bb60b4a8639938157eeed436098bf9831757 \ + --hash=sha256:f9f17d2aec496a9aa6b76f53e3b614c965223c061982d434d160f930c698a9db + # via google-auth +certifi==2022.6.15 \ + --hash=sha256:84c85a9078b11105f04f3036a9482ae10e4621616db313fe045dd24743a0820d \ + --hash=sha256:fe86415d55e84719d75f8b69414f6438ac3547d2078ab91b67e779ef69378412 + # via requests +cffi==1.15.1 \ + --hash=sha256:00a9ed42e88df81ffae7a8ab6d9356b371399b91dbdf0c3cb1e84c03a13aceb5 \ + --hash=sha256:03425bdae262c76aad70202debd780501fabeaca237cdfddc008987c0e0f59ef \ + --hash=sha256:04ed324bda3cda42b9b695d51bb7d54b680b9719cfab04227cdd1e04e5de3104 \ + --hash=sha256:0e2642fe3142e4cc4af0799748233ad6da94c62a8bec3a6648bf8ee68b1c7426 \ + --hash=sha256:173379135477dc8cac4bc58f45db08ab45d228b3363adb7af79436135d028405 \ + --hash=sha256:198caafb44239b60e252492445da556afafc7d1e3ab7a1fb3f0584ef6d742375 \ + --hash=sha256:1e74c6b51a9ed6589199c787bf5f9875612ca4a8a0785fb2d4a84429badaf22a \ + --hash=sha256:2012c72d854c2d03e45d06ae57f40d78e5770d252f195b93f581acf3ba44496e \ + --hash=sha256:21157295583fe8943475029ed5abdcf71eb3911894724e360acff1d61c1d54bc \ + --hash=sha256:2470043b93ff09bf8fb1d46d1cb756ce6132c54826661a32d4e4d132e1977adf \ + --hash=sha256:285d29981935eb726a4399badae8f0ffdff4f5050eaa6d0cfc3f64b857b77185 \ + --hash=sha256:30d78fbc8ebf9c92c9b7823ee18eb92f2e6ef79b45ac84db507f52fbe3ec4497 \ + --hash=sha256:320dab6e7cb2eacdf0e658569d2575c4dad258c0fcc794f46215e1e39f90f2c3 \ + --hash=sha256:33ab79603146aace82c2427da5ca6e58f2b3f2fb5da893ceac0c42218a40be35 \ + --hash=sha256:3548db281cd7d2561c9ad9984681c95f7b0e38881201e157833a2342c30d5e8c \ + --hash=sha256:3799aecf2e17cf585d977b780ce79ff0dc9b78d799fc694221ce814c2c19db83 \ + --hash=sha256:39d39875251ca8f612b6f33e6b1195af86d1b3e60086068be9cc053aa4376e21 \ + --hash=sha256:3b926aa83d1edb5aa5b427b4053dc420ec295a08e40911296b9eb1b6170f6cca \ + --hash=sha256:3bcde07039e586f91b45c88f8583ea7cf7a0770df3a1649627bf598332cb6984 \ + --hash=sha256:3d08afd128ddaa624a48cf2b859afef385b720bb4b43df214f85616922e6a5ac \ + --hash=sha256:3eb6971dcff08619f8d91607cfc726518b6fa2a9eba42856be181c6d0d9515fd \ + --hash=sha256:40f4774f5a9d4f5e344f31a32b5096977b5d48560c5592e2f3d2c4374bd543ee \ + --hash=sha256:4289fc34b2f5316fbb762d75362931e351941fa95fa18789191b33fc4cf9504a \ + --hash=sha256:470c103ae716238bbe698d67ad020e1db9d9dba34fa5a899b5e21577e6d52ed2 \ + --hash=sha256:4f2c9f67e9821cad2e5f480bc8d83b8742896f1242dba247911072d4fa94c192 \ + --hash=sha256:50a74364d85fd319352182ef59c5c790484a336f6db772c1a9231f1c3ed0cbd7 \ + --hash=sha256:54a2db7b78338edd780e7ef7f9f6c442500fb0d41a5a4ea24fff1c929d5af585 \ + --hash=sha256:5635bd9cb9731e6d4a1132a498dd34f764034a8ce60cef4f5319c0541159392f \ + --hash=sha256:59c0b02d0a6c384d453fece7566d1c7e6b7bae4fc5874ef2ef46d56776d61c9e \ + --hash=sha256:5d598b938678ebf3c67377cdd45e09d431369c3b1a5b331058c338e201f12b27 \ + --hash=sha256:5df2768244d19ab7f60546d0c7c63ce1581f7af8b5de3eb3004b9b6fc8a9f84b \ + --hash=sha256:5ef34d190326c3b1f822a5b7a45f6c4535e2f47ed06fec77d3d799c450b2651e \ + --hash=sha256:6975a3fac6bc83c4a65c9f9fcab9e47019a11d3d2cf7f3c0d03431bf145a941e \ + --hash=sha256:6c9a799e985904922a4d207a94eae35c78ebae90e128f0c4e521ce339396be9d \ + --hash=sha256:70df4e3b545a17496c9b3f41f5115e69a4f2e77e94e1d2a8e1070bc0c38c8a3c \ + --hash=sha256:7473e861101c9e72452f9bf8acb984947aa1661a7704553a9f6e4baa5ba64415 \ + --hash=sha256:8102eaf27e1e448db915d08afa8b41d6c7ca7a04b7d73af6514df10a3e74bd82 \ + --hash=sha256:87c450779d0914f2861b8526e035c5e6da0a3199d8f1add1a665e1cbc6fc6d02 \ + --hash=sha256:8b7ee99e510d7b66cdb6c593f21c043c248537a32e0bedf02e01e9553a172314 \ + --hash=sha256:91fc98adde3d7881af9b59ed0294046f3806221863722ba7d8d120c575314325 \ + --hash=sha256:94411f22c3985acaec6f83c6df553f2dbe17b698cc7f8ae751ff2237d96b9e3c \ + --hash=sha256:98d85c6a2bef81588d9227dde12db8a7f47f639f4a17c9ae08e773aa9c697bf3 \ + --hash=sha256:9ad5db27f9cabae298d151c85cf2bad1d359a1b9c686a275df03385758e2f914 \ + --hash=sha256:a0b71b1b8fbf2b96e41c4d990244165e2c9be83d54962a9a1d118fd8657d2045 \ + --hash=sha256:a0f100c8912c114ff53e1202d0078b425bee3649ae34d7b070e9697f93c5d52d \ + --hash=sha256:a591fe9e525846e4d154205572a029f653ada1a78b93697f3b5a8f1f2bc055b9 \ + --hash=sha256:a5c84c68147988265e60416b57fc83425a78058853509c1b0629c180094904a5 \ + --hash=sha256:a66d3508133af6e8548451b25058d5812812ec3798c886bf38ed24a98216fab2 \ + --hash=sha256:a8c4917bd7ad33e8eb21e9a5bbba979b49d9a97acb3a803092cbc1133e20343c \ + --hash=sha256:b3bbeb01c2b273cca1e1e0c5df57f12dce9a4dd331b4fa1635b8bec26350bde3 \ + --hash=sha256:cba9d6b9a7d64d4bd46167096fc9d2f835e25d7e4c121fb2ddfc6528fb0413b2 \ + --hash=sha256:cc4d65aeeaa04136a12677d3dd0b1c0c94dc43abac5860ab33cceb42b801c1e8 \ + --hash=sha256:ce4bcc037df4fc5e3d184794f27bdaab018943698f4ca31630bc7f84a7b69c6d \ + --hash=sha256:cec7d9412a9102bdc577382c3929b337320c4c4c4849f2c5cdd14d7368c5562d \ + --hash=sha256:d400bfb9a37b1351253cb402671cea7e89bdecc294e8016a707f6d1d8ac934f9 \ + --hash=sha256:d61f4695e6c866a23a21acab0509af1cdfd2c013cf256bbf5b6b5e2695827162 \ + --hash=sha256:db0fbb9c62743ce59a9ff687eb5f4afbe77e5e8403d6697f7446e5f609976f76 \ + --hash=sha256:dd86c085fae2efd48ac91dd7ccffcfc0571387fe1193d33b6394db7ef31fe2a4 \ + --hash=sha256:e00b098126fd45523dd056d2efba6c5a63b71ffe9f2bbe1a4fe1716e1d0c331e \ + --hash=sha256:e229a521186c75c8ad9490854fd8bbdd9a0c9aa3a524326b55be83b54d4e0ad9 \ + --hash=sha256:e263d77ee3dd201c3a142934a086a4450861778baaeeb45db4591ef65550b0a6 \ + --hash=sha256:ed9cb427ba5504c1dc15ede7d516b84757c3e3d7868ccc85121d9310d27eed0b \ + --hash=sha256:fa6693661a4c91757f4412306191b6dc88c1703f780c8234035eac011922bc01 \ + --hash=sha256:fcd131dd944808b5bdb38e6f5b53013c5aa4f334c5cad0c72742f6eba4b73db0 + # via cryptography +charset-normalizer==2.1.1 \ + --hash=sha256:5a3d016c7c547f69d6f81fb0db9449ce888b418b5b9952cc5e6e66843e9dd845 \ + --hash=sha256:83e9a75d1911279afd89352c68b45348559d1fc0506b054b346651b5e7fee29f + # via requests +click==8.0.4 \ + --hash=sha256:6a7a62563bbfabfda3a38f3023a1db4a35978c0abd76f6c9605ecd6554d6d9b1 \ + --hash=sha256:8458d7b1287c5fb128c90e23381cf99dcde74beaf6c7ff6384ce84d6fe090adb + # via + # gcp-docuploader + # gcp-releasetool +colorlog==6.6.0 \ + --hash=sha256:344f73204009e4c83c5b6beb00b3c45dc70fcdae3c80db919e0a4171d006fde8 \ + --hash=sha256:351c51e866c86c3217f08e4b067a7974a678be78f07f85fc2d55b8babde6d94e + # via + # gcp-docuploader + # nox +commonmark==0.9.1 \ + --hash=sha256:452f9dc859be7f06631ddcb328b6919c67984aca654e5fefb3914d54691aed60 \ + --hash=sha256:da2f38c92590f83de410ba1a3cbceafbc74fee9def35f9251ba9a971d6d66fd9 + # via rich +cryptography==37.0.4 \ + --hash=sha256:190f82f3e87033821828f60787cfa42bff98404483577b591429ed99bed39d59 \ + --hash=sha256:2be53f9f5505673eeda5f2736bea736c40f051a739bfae2f92d18aed1eb54596 \ + --hash=sha256:30788e070800fec9bbcf9faa71ea6d8068f5136f60029759fd8c3efec3c9dcb3 \ + --hash=sha256:3d41b965b3380f10e4611dbae366f6dc3cefc7c9ac4e8842a806b9672ae9add5 \ + --hash=sha256:4c590ec31550a724ef893c50f9a97a0c14e9c851c85621c5650d699a7b88f7ab \ + --hash=sha256:549153378611c0cca1042f20fd9c5030d37a72f634c9326e225c9f666d472884 \ + --hash=sha256:63f9c17c0e2474ccbebc9302ce2f07b55b3b3fcb211ded18a42d5764f5c10a82 \ + --hash=sha256:6bc95ed67b6741b2607298f9ea4932ff157e570ef456ef7ff0ef4884a134cc4b \ + --hash=sha256:7099a8d55cd49b737ffc99c17de504f2257e3787e02abe6d1a6d136574873441 \ + --hash=sha256:75976c217f10d48a8b5a8de3d70c454c249e4b91851f6838a4e48b8f41eb71aa \ + --hash=sha256:7bc997818309f56c0038a33b8da5c0bfbb3f1f067f315f9abd6fc07ad359398d \ + --hash=sha256:80f49023dd13ba35f7c34072fa17f604d2f19bf0989f292cedf7ab5770b87a0b \ + --hash=sha256:91ce48d35f4e3d3f1d83e29ef4a9267246e6a3be51864a5b7d2247d5086fa99a \ + --hash=sha256:a958c52505c8adf0d3822703078580d2c0456dd1d27fabfb6f76fe63d2971cd6 \ + --hash=sha256:b62439d7cd1222f3da897e9a9fe53bbf5c104fff4d60893ad1355d4c14a24157 \ + --hash=sha256:b7f8dd0d4c1f21759695c05a5ec8536c12f31611541f8904083f3dc582604280 \ + --hash=sha256:d204833f3c8a33bbe11eda63a54b1aad7aa7456ed769a982f21ec599ba5fa282 \ + --hash=sha256:e007f052ed10cc316df59bc90fbb7ff7950d7e2919c9757fd42a2b8ecf8a5f67 \ + --hash=sha256:f2dcb0b3b63afb6df7fd94ec6fbddac81b5492513f7b0436210d390c14d46ee8 \ + --hash=sha256:f721d1885ecae9078c3f6bbe8a88bc0786b6e749bf32ccec1ef2b18929a05046 \ + --hash=sha256:f7a6de3e98771e183645181b3627e2563dcde3ce94a9e42a3f427d2255190327 \ + --hash=sha256:f8c0a6e9e1dd3eb0414ba320f85da6b0dcbd543126e30fcc546e7372a7fbf3b9 + # via + # gcp-releasetool + # secretstorage +distlib==0.3.5 \ + --hash=sha256:a7f75737c70be3b25e2bee06288cec4e4c221de18455b2dd037fe2a795cab2fe \ + --hash=sha256:b710088c59f06338ca514800ad795a132da19fda270e3ce4affc74abf955a26c + # via virtualenv +docutils==0.19 \ + --hash=sha256:33995a6753c30b7f577febfc2c50411fec6aac7f7ffeb7c4cfe5991072dcf9e6 \ + --hash=sha256:5e1de4d849fee02c63b040a4a3fd567f4ab104defd8a5511fbbc24a8a017efbc + # via readme-renderer +filelock==3.8.0 \ + --hash=sha256:55447caa666f2198c5b6b13a26d2084d26fa5b115c00d065664b2124680c4edc \ + --hash=sha256:617eb4e5eedc82fc5f47b6d61e4d11cb837c56cb4544e39081099fa17ad109d4 + # via virtualenv +gcp-docuploader==0.6.3 \ + --hash=sha256:ba8c9d76b3bbac54b0311c503a373b00edc2dc02d6d54ea9507045adb8e870f7 \ + --hash=sha256:c0f5aaa82ce1854a386197e4e359b120ad6d4e57ae2c812fce42219a3288026b + # via -r requirements.in +gcp-releasetool==1.8.6 \ + --hash=sha256:42e51ab8e2e789bc8e22a03c09352962cd3452951c801a2230d564816630304a \ + --hash=sha256:a3518b79d1b243c494eac392a01c7fd65187fd6d52602dcab9b529bc934d4da1 + # via -r requirements.in +google-api-core==2.8.2 \ + --hash=sha256:06f7244c640322b508b125903bb5701bebabce8832f85aba9335ec00b3d02edc \ + --hash=sha256:93c6a91ccac79079ac6bbf8b74ee75db970cc899278b97d53bc012f35908cf50 + # via + # google-cloud-core + # google-cloud-storage +google-auth==2.11.0 \ + --hash=sha256:be62acaae38d0049c21ca90f27a23847245c9f161ff54ede13af2cb6afecbac9 \ + --hash=sha256:ed65ecf9f681832298e29328e1ef0a3676e3732b2e56f41532d45f70a22de0fb + # via + # gcp-releasetool + # google-api-core + # google-cloud-core + # google-cloud-storage +google-cloud-core==2.3.2 \ + --hash=sha256:8417acf6466be2fa85123441696c4badda48db314c607cf1e5d543fa8bdc22fe \ + --hash=sha256:b9529ee7047fd8d4bf4a2182de619154240df17fbe60ead399078c1ae152af9a + # via google-cloud-storage +google-cloud-storage==2.5.0 \ + --hash=sha256:19a26c66c317ce542cea0830b7e787e8dac2588b6bfa4d3fd3b871ba16305ab0 \ + --hash=sha256:382f34b91de2212e3c2e7b40ec079d27ee2e3dbbae99b75b1bcd8c63063ce235 + # via gcp-docuploader +google-crc32c==1.3.0 \ + --hash=sha256:04e7c220798a72fd0f08242bc8d7a05986b2a08a0573396187fd32c1dcdd58b3 \ + --hash=sha256:05340b60bf05b574159e9bd940152a47d38af3fb43803ffe71f11d704b7696a6 \ + --hash=sha256:12674a4c3b56b706153a358eaa1018c4137a5a04635b92b4652440d3d7386206 \ + --hash=sha256:127f9cc3ac41b6a859bd9dc4321097b1a4f6aa7fdf71b4f9227b9e3ebffb4422 \ + --hash=sha256:13af315c3a0eec8bb8b8d80b8b128cb3fcd17d7e4edafc39647846345a3f003a \ + --hash=sha256:1926fd8de0acb9d15ee757175ce7242e235482a783cd4ec711cc999fc103c24e \ + --hash=sha256:226f2f9b8e128a6ca6a9af9b9e8384f7b53a801907425c9a292553a3a7218ce0 \ + --hash=sha256:276de6273eb074a35bc598f8efbc00c7869c5cf2e29c90748fccc8c898c244df \ + --hash=sha256:318f73f5484b5671f0c7f5f63741ab020a599504ed81d209b5c7129ee4667407 \ + --hash=sha256:3bbce1be3687bbfebe29abdb7631b83e6b25da3f4e1856a1611eb21854b689ea \ + --hash=sha256:42ae4781333e331a1743445931b08ebdad73e188fd554259e772556fc4937c48 \ + --hash=sha256:58be56ae0529c664cc04a9c76e68bb92b091e0194d6e3c50bea7e0f266f73713 \ + --hash=sha256:5da2c81575cc3ccf05d9830f9e8d3c70954819ca9a63828210498c0774fda1a3 \ + --hash=sha256:6311853aa2bba4064d0c28ca54e7b50c4d48e3de04f6770f6c60ebda1e975267 \ + --hash=sha256:650e2917660e696041ab3dcd7abac160b4121cd9a484c08406f24c5964099829 \ + --hash=sha256:6a4db36f9721fdf391646685ecffa404eb986cbe007a3289499020daf72e88a2 \ + --hash=sha256:779cbf1ce375b96111db98fca913c1f5ec11b1d870e529b1dc7354b2681a8c3a \ + --hash=sha256:7f6fe42536d9dcd3e2ffb9d3053f5d05221ae3bbcefbe472bdf2c71c793e3183 \ + --hash=sha256:891f712ce54e0d631370e1f4997b3f182f3368179198efc30d477c75d1f44942 \ + --hash=sha256:95c68a4b9b7828ba0428f8f7e3109c5d476ca44996ed9a5f8aac6269296e2d59 \ + --hash=sha256:96a8918a78d5d64e07c8ea4ed2bc44354e3f93f46a4866a40e8db934e4c0d74b \ + --hash=sha256:9c3cf890c3c0ecfe1510a452a165431b5831e24160c5fcf2071f0f85ca5a47cd \ + --hash=sha256:9f58099ad7affc0754ae42e6d87443299f15d739b0ce03c76f515153a5cda06c \ + --hash=sha256:a0b9e622c3b2b8d0ce32f77eba617ab0d6768b82836391e4f8f9e2074582bf02 \ + --hash=sha256:a7f9cbea4245ee36190f85fe1814e2d7b1e5f2186381b082f5d59f99b7f11328 \ + --hash=sha256:bab4aebd525218bab4ee615786c4581952eadc16b1ff031813a2fd51f0cc7b08 \ + --hash=sha256:c124b8c8779bf2d35d9b721e52d4adb41c9bfbde45e6a3f25f0820caa9aba73f \ + --hash=sha256:c9da0a39b53d2fab3e5467329ed50e951eb91386e9d0d5b12daf593973c3b168 \ + --hash=sha256:ca60076c388728d3b6ac3846842474f4250c91efbfe5afa872d3ffd69dd4b318 \ + --hash=sha256:cb6994fff247987c66a8a4e550ef374671c2b82e3c0d2115e689d21e511a652d \ + --hash=sha256:d1c1d6236feab51200272d79b3d3e0f12cf2cbb12b208c835b175a21efdb0a73 \ + --hash=sha256:dd7760a88a8d3d705ff562aa93f8445ead54f58fd482e4f9e2bafb7e177375d4 \ + --hash=sha256:dda4d8a3bb0b50f540f6ff4b6033f3a74e8bf0bd5320b70fab2c03e512a62812 \ + --hash=sha256:e0f1ff55dde0ebcfbef027edc21f71c205845585fffe30d4ec4979416613e9b3 \ + --hash=sha256:e7a539b9be7b9c00f11ef16b55486141bc2cdb0c54762f84e3c6fc091917436d \ + --hash=sha256:eb0b14523758e37802f27b7f8cd973f5f3d33be7613952c0df904b68c4842f0e \ + --hash=sha256:ed447680ff21c14aaceb6a9f99a5f639f583ccfe4ce1a5e1d48eb41c3d6b3217 \ + --hash=sha256:f52a4ad2568314ee713715b1e2d79ab55fab11e8b304fd1462ff5cccf4264b3e \ + --hash=sha256:fbd60c6aaa07c31d7754edbc2334aef50601b7f1ada67a96eb1eb57c7c72378f \ + --hash=sha256:fc28e0db232c62ca0c3600884933178f0825c99be4474cdd645e378a10588125 \ + --hash=sha256:fe31de3002e7b08eb20823b3735b97c86c5926dd0581c7710a680b418a8709d4 \ + --hash=sha256:fec221a051150eeddfdfcff162e6db92c65ecf46cb0f7bb1bf812a1520ec026b \ + --hash=sha256:ff71073ebf0e42258a42a0b34f2c09ec384977e7f6808999102eedd5b49920e3 + # via google-resumable-media +google-resumable-media==2.3.3 \ + --hash=sha256:27c52620bd364d1c8116eaac4ea2afcbfb81ae9139fb3199652fcac1724bfb6c \ + --hash=sha256:5b52774ea7a829a8cdaa8bd2d4c3d4bc660c91b30857ab2668d0eb830f4ea8c5 + # via google-cloud-storage +googleapis-common-protos==1.56.4 \ + --hash=sha256:8eb2cbc91b69feaf23e32452a7ae60e791e09967d81d4fcc7fc388182d1bd394 \ + --hash=sha256:c25873c47279387cfdcbdafa36149887901d36202cb645a0e4f29686bf6e4417 + # via google-api-core +idna==3.3 \ + --hash=sha256:84d9dd047ffa80596e0f246e2eab0b391788b0503584e8945f2368256d2735ff \ + --hash=sha256:9d643ff0a55b762d5cdb124b8eaa99c66322e2157b69160bc32796e824360e6d + # via requests +importlib-metadata==4.12.0 \ + --hash=sha256:637245b8bab2b6502fcbc752cc4b7a6f6243bb02b31c5c26156ad103d3d45670 \ + --hash=sha256:7401a975809ea1fdc658c3aa4f78cc2195a0e019c5cbc4c06122884e9ae80c23 + # via + # -r requirements.in + # twine +jeepney==0.8.0 \ + --hash=sha256:5efe48d255973902f6badc3ce55e2aa6c5c3b3bc642059ef3a91247bcfcc5806 \ + --hash=sha256:c0a454ad016ca575060802ee4d590dd912e35c122fa04e70306de3d076cce755 + # via + # keyring + # secretstorage +jinja2==3.1.2 \ + --hash=sha256:31351a702a408a9e7595a8fc6150fc3f43bb6bf7e319770cbc0db9df9437e852 \ + --hash=sha256:6088930bfe239f0e6710546ab9c19c9ef35e29792895fed6e6e31a023a182a61 + # via gcp-releasetool +keyring==23.8.2 \ + --hash=sha256:0d9973f8891850f1ade5f26aafd06bb16865fbbae3fc56b0defb6a14a2624003 \ + --hash=sha256:10d2a8639663fe2090705a00b8c47c687cacdf97598ea9c11456679fa974473a + # via + # gcp-releasetool + # twine +markupsafe==2.1.1 \ + --hash=sha256:0212a68688482dc52b2d45013df70d169f542b7394fc744c02a57374a4207003 \ + --hash=sha256:089cf3dbf0cd6c100f02945abeb18484bd1ee57a079aefd52cffd17fba910b88 \ + --hash=sha256:10c1bfff05d95783da83491be968e8fe789263689c02724e0c691933c52994f5 \ + --hash=sha256:33b74d289bd2f5e527beadcaa3f401e0df0a89927c1559c8566c066fa4248ab7 \ + --hash=sha256:3799351e2336dc91ea70b034983ee71cf2f9533cdff7c14c90ea126bfd95d65a \ + --hash=sha256:3ce11ee3f23f79dbd06fb3d63e2f6af7b12db1d46932fe7bd8afa259a5996603 \ + --hash=sha256:421be9fbf0ffe9ffd7a378aafebbf6f4602d564d34be190fc19a193232fd12b1 \ + --hash=sha256:43093fb83d8343aac0b1baa75516da6092f58f41200907ef92448ecab8825135 \ + --hash=sha256:46d00d6cfecdde84d40e572d63735ef81423ad31184100411e6e3388d405e247 \ + --hash=sha256:4a33dea2b688b3190ee12bd7cfa29d39c9ed176bda40bfa11099a3ce5d3a7ac6 \ + --hash=sha256:4b9fe39a2ccc108a4accc2676e77da025ce383c108593d65cc909add5c3bd601 \ + --hash=sha256:56442863ed2b06d19c37f94d999035e15ee982988920e12a5b4ba29b62ad1f77 \ + --hash=sha256:671cd1187ed5e62818414afe79ed29da836dde67166a9fac6d435873c44fdd02 \ + --hash=sha256:694deca8d702d5db21ec83983ce0bb4b26a578e71fbdbd4fdcd387daa90e4d5e \ + --hash=sha256:6a074d34ee7a5ce3effbc526b7083ec9731bb3cbf921bbe1d3005d4d2bdb3a63 \ + --hash=sha256:6d0072fea50feec76a4c418096652f2c3238eaa014b2f94aeb1d56a66b41403f \ + --hash=sha256:6fbf47b5d3728c6aea2abb0589b5d30459e369baa772e0f37a0320185e87c980 \ + --hash=sha256:7f91197cc9e48f989d12e4e6fbc46495c446636dfc81b9ccf50bb0ec74b91d4b \ + --hash=sha256:86b1f75c4e7c2ac2ccdaec2b9022845dbb81880ca318bb7a0a01fbf7813e3812 \ + --hash=sha256:8dc1c72a69aa7e082593c4a203dcf94ddb74bb5c8a731e4e1eb68d031e8498ff \ + --hash=sha256:8e3dcf21f367459434c18e71b2a9532d96547aef8a871872a5bd69a715c15f96 \ + --hash=sha256:8e576a51ad59e4bfaac456023a78f6b5e6e7651dcd383bcc3e18d06f9b55d6d1 \ + --hash=sha256:96e37a3dc86e80bf81758c152fe66dbf60ed5eca3d26305edf01892257049925 \ + --hash=sha256:97a68e6ada378df82bc9f16b800ab77cbf4b2fada0081794318520138c088e4a \ + --hash=sha256:99a2a507ed3ac881b975a2976d59f38c19386d128e7a9a18b7df6fff1fd4c1d6 \ + --hash=sha256:a49907dd8420c5685cfa064a1335b6754b74541bbb3706c259c02ed65b644b3e \ + --hash=sha256:b09bf97215625a311f669476f44b8b318b075847b49316d3e28c08e41a7a573f \ + --hash=sha256:b7bd98b796e2b6553da7225aeb61f447f80a1ca64f41d83612e6139ca5213aa4 \ + --hash=sha256:b87db4360013327109564f0e591bd2a3b318547bcef31b468a92ee504d07ae4f \ + --hash=sha256:bcb3ed405ed3222f9904899563d6fc492ff75cce56cba05e32eff40e6acbeaa3 \ + --hash=sha256:d4306c36ca495956b6d568d276ac11fdd9c30a36f1b6eb928070dc5360b22e1c \ + --hash=sha256:d5ee4f386140395a2c818d149221149c54849dfcfcb9f1debfe07a8b8bd63f9a \ + --hash=sha256:dda30ba7e87fbbb7eab1ec9f58678558fd9a6b8b853530e176eabd064da81417 \ + --hash=sha256:e04e26803c9c3851c931eac40c695602c6295b8d432cbe78609649ad9bd2da8a \ + --hash=sha256:e1c0b87e09fa55a220f058d1d49d3fb8df88fbfab58558f1198e08c1e1de842a \ + --hash=sha256:e72591e9ecd94d7feb70c1cbd7be7b3ebea3f548870aa91e2732960fa4d57a37 \ + --hash=sha256:e8c843bbcda3a2f1e3c2ab25913c80a3c5376cd00c6e8c4a86a89a28c8dc5452 \ + --hash=sha256:efc1913fd2ca4f334418481c7e595c00aad186563bbc1ec76067848c7ca0a933 \ + --hash=sha256:f121a1420d4e173a5d96e47e9a0c0dcff965afdf1626d28de1460815f7c4ee7a \ + --hash=sha256:fc7b548b17d238737688817ab67deebb30e8073c95749d55538ed473130ec0c7 + # via jinja2 +nox==2022.8.7 \ + --hash=sha256:1b894940551dc5c389f9271d197ca5d655d40bdc6ccf93ed6880e4042760a34b \ + --hash=sha256:96cca88779e08282a699d672258ec01eb7c792d35bbbf538c723172bce23212c + # via -r requirements.in +packaging==21.3 \ + --hash=sha256:dd47c42927d89ab911e606518907cc2d3a1f38bbd026385970643f9c5b8ecfeb \ + --hash=sha256:ef103e05f519cdc783ae24ea4e2e0f508a9c99b2d4969652eed6a2e1ea5bd522 + # via + # gcp-releasetool + # nox +pkginfo==1.8.3 \ + --hash=sha256:848865108ec99d4901b2f7e84058b6e7660aae8ae10164e015a6dcf5b242a594 \ + --hash=sha256:a84da4318dd86f870a9447a8c98340aa06216bfc6f2b7bdc4b8766984ae1867c + # via twine +platformdirs==2.5.2 \ + --hash=sha256:027d8e83a2d7de06bbac4e5ef7e023c02b863d7ea5d079477e722bb41ab25788 \ + --hash=sha256:58c8abb07dcb441e6ee4b11d8df0ac856038f944ab98b7be6b27b2a3c7feef19 + # via virtualenv +protobuf==3.20.1 \ + --hash=sha256:06059eb6953ff01e56a25cd02cca1a9649a75a7e65397b5b9b4e929ed71d10cf \ + --hash=sha256:097c5d8a9808302fb0da7e20edf0b8d4703274d140fd25c5edabddcde43e081f \ + --hash=sha256:284f86a6207c897542d7e956eb243a36bb8f9564c1742b253462386e96c6b78f \ + --hash=sha256:32ca378605b41fd180dfe4e14d3226386d8d1b002ab31c969c366549e66a2bb7 \ + --hash=sha256:3cc797c9d15d7689ed507b165cd05913acb992d78b379f6014e013f9ecb20996 \ + --hash=sha256:62f1b5c4cd6c5402b4e2d63804ba49a327e0c386c99b1675c8a0fefda23b2067 \ + --hash=sha256:69ccfdf3657ba59569c64295b7d51325f91af586f8d5793b734260dfe2e94e2c \ + --hash=sha256:6f50601512a3d23625d8a85b1638d914a0970f17920ff39cec63aaef80a93fb7 \ + --hash=sha256:7403941f6d0992d40161aa8bb23e12575637008a5a02283a930addc0508982f9 \ + --hash=sha256:755f3aee41354ae395e104d62119cb223339a8f3276a0cd009ffabfcdd46bb0c \ + --hash=sha256:77053d28427a29987ca9caf7b72ccafee011257561259faba8dd308fda9a8739 \ + --hash=sha256:7e371f10abe57cee5021797126c93479f59fccc9693dafd6bd5633ab67808a91 \ + --hash=sha256:9016d01c91e8e625141d24ec1b20fed584703e527d28512aa8c8707f105a683c \ + --hash=sha256:9be73ad47579abc26c12024239d3540e6b765182a91dbc88e23658ab71767153 \ + --hash=sha256:adc31566d027f45efe3f44eeb5b1f329da43891634d61c75a5944e9be6dd42c9 \ + --hash=sha256:adfc6cf69c7f8c50fd24c793964eef18f0ac321315439d94945820612849c388 \ + --hash=sha256:af0ebadc74e281a517141daad9d0f2c5d93ab78e9d455113719a45a49da9db4e \ + --hash=sha256:cb29edb9eab15742d791e1025dd7b6a8f6fcb53802ad2f6e3adcb102051063ab \ + --hash=sha256:cd68be2559e2a3b84f517fb029ee611546f7812b1fdd0aa2ecc9bc6ec0e4fdde \ + --hash=sha256:cdee09140e1cd184ba9324ec1df410e7147242b94b5f8b0c64fc89e38a8ba531 \ + --hash=sha256:db977c4ca738dd9ce508557d4fce0f5aebd105e158c725beec86feb1f6bc20d8 \ + --hash=sha256:dd5789b2948ca702c17027c84c2accb552fc30f4622a98ab5c51fcfe8c50d3e7 \ + --hash=sha256:e250a42f15bf9d5b09fe1b293bdba2801cd520a9f5ea2d7fb7536d4441811d20 \ + --hash=sha256:ff8d8fa42675249bb456f5db06c00de6c2f4c27a065955917b28c4f15978b9c3 + # via + # gcp-docuploader + # gcp-releasetool + # google-api-core +py==1.11.0 \ + --hash=sha256:51c75c4126074b472f746a24399ad32f6053d1b34b68d2fa41e558e6f4a98719 \ + --hash=sha256:607c53218732647dff4acdfcd50cb62615cedf612e72d1724fb1a0cc6405b378 + # via nox +pyasn1==0.4.8 \ + --hash=sha256:39c7e2ec30515947ff4e87fb6f456dfc6e84857d34be479c9d4a4ba4bf46aa5d \ + --hash=sha256:aef77c9fb94a3ac588e87841208bdec464471d9871bd5050a287cc9a475cd0ba + # via + # pyasn1-modules + # rsa +pyasn1-modules==0.2.8 \ + --hash=sha256:905f84c712230b2c592c19470d3ca8d552de726050d1d1716282a1f6146be65e \ + --hash=sha256:a50b808ffeb97cb3601dd25981f6b016cbb3d31fbf57a8b8a87428e6158d0c74 + # via google-auth +pycparser==2.21 \ + --hash=sha256:8ee45429555515e1f6b185e78100aea234072576aa43ab53aefcae078162fca9 \ + --hash=sha256:e644fdec12f7872f86c58ff790da456218b10f863970249516d60a5eaca77206 + # via cffi +pygments==2.13.0 \ + --hash=sha256:56a8508ae95f98e2b9bdf93a6be5ae3f7d8af858b43e02c5a2ff083726be40c1 \ + --hash=sha256:f643f331ab57ba3c9d89212ee4a2dabc6e94f117cf4eefde99a0574720d14c42 + # via + # readme-renderer + # rich +pyjwt==2.4.0 \ + --hash=sha256:72d1d253f32dbd4f5c88eaf1fdc62f3a19f676ccbadb9dbc5d07e951b2b26daf \ + --hash=sha256:d42908208c699b3b973cbeb01a969ba6a96c821eefb1c5bfe4c390c01d67abba + # via gcp-releasetool +pyparsing==3.0.9 \ + --hash=sha256:2b020ecf7d21b687f219b71ecad3631f644a47f01403fa1d1036b0c6416d70fb \ + --hash=sha256:5026bae9a10eeaefb61dab2f09052b9f4307d44aee4eda64b309723d8d206bbc + # via packaging +pyperclip==1.8.2 \ + --hash=sha256:105254a8b04934f0bc84e9c24eb360a591aaf6535c9def5f29d92af107a9bf57 + # via gcp-releasetool +python-dateutil==2.8.2 \ + --hash=sha256:0123cacc1627ae19ddf3c27a5de5bd67ee4586fbdd6440d9748f8abb483d3e86 \ + --hash=sha256:961d03dc3453ebbc59dbdea9e4e11c5651520a876d0f4db161e8674aae935da9 + # via gcp-releasetool +readme-renderer==37.0 \ + --hash=sha256:07b7ea234e03e58f77cc222e206e6abb8f4c0435becce5104794ee591f9301c5 \ + --hash=sha256:9fa416704703e509eeb900696751c908ddeb2011319d93700d8f18baff887a69 + # via twine +requests==2.28.1 \ + --hash=sha256:7c5599b102feddaa661c826c56ab4fee28bfd17f5abca1ebbe3e7f19d7c97983 \ + --hash=sha256:8fefa2a1a1365bf5520aac41836fbee479da67864514bdb821f31ce07ce65349 + # via + # gcp-releasetool + # google-api-core + # google-cloud-storage + # requests-toolbelt + # twine +requests-toolbelt==0.9.1 \ + --hash=sha256:380606e1d10dc85c3bd47bf5a6095f815ec007be7a8b69c878507068df059e6f \ + --hash=sha256:968089d4584ad4ad7c171454f0a5c6dac23971e9472521ea3b6d49d610aa6fc0 + # via twine +rfc3986==2.0.0 \ + --hash=sha256:50b1502b60e289cb37883f3dfd34532b8873c7de9f49bb546641ce9cbd256ebd \ + --hash=sha256:97aacf9dbd4bfd829baad6e6309fa6573aaf1be3f6fa735c8ab05e46cecb261c + # via twine +rich==12.5.1 \ + --hash=sha256:2eb4e6894cde1e017976d2975ac210ef515d7548bc595ba20e195fb9628acdeb \ + --hash=sha256:63a5c5ce3673d3d5fbbf23cd87e11ab84b6b451436f1b7f19ec54b6bc36ed7ca + # via twine +rsa==4.9 \ + --hash=sha256:90260d9058e514786967344d0ef75fa8727eed8a7d2e43ce9f4bcf1b536174f7 \ + --hash=sha256:e38464a49c6c85d7f1351b0126661487a7e0a14a50f1675ec50eb34d4f20ef21 + # via google-auth +secretstorage==3.3.3 \ + --hash=sha256:2403533ef369eca6d2ba81718576c5e0f564d5cca1b58f73a8b23e7d4eeebd77 \ + --hash=sha256:f356e6628222568e3af06f2eba8df495efa13b3b63081dafd4f7d9a7b7bc9f99 + # via keyring +six==1.16.0 \ + --hash=sha256:1e61c37477a1626458e36f7b1d82aa5c9b094fa4802892072e49de9c60c4c926 \ + --hash=sha256:8abb2f1d86890a2dfb989f9a77cfcfd3e47c2a354b01111771326f8aa26e0254 + # via + # bleach + # gcp-docuploader + # google-auth + # python-dateutil +twine==4.0.1 \ + --hash=sha256:42026c18e394eac3e06693ee52010baa5313e4811d5a11050e7d48436cf41b9e \ + --hash=sha256:96b1cf12f7ae611a4a40b6ae8e9570215daff0611828f5fe1f37a16255ab24a0 + # via -r requirements.in +typing-extensions==4.3.0 \ + --hash=sha256:25642c956049920a5aa49edcdd6ab1e06d7e5d467fc00e0506c44ac86fbfca02 \ + --hash=sha256:e6d2677a32f47fc7eb2795db1dd15c1f34eff616bcaf2cfb5e997f854fa1c4a6 + # via -r requirements.in +urllib3==1.26.12 \ + --hash=sha256:3fa96cf423e6987997fc326ae8df396db2a8b7c667747d47ddd8ecba91f4a74e \ + --hash=sha256:b930dd878d5a8afb066a637fbb35144fe7901e3b209d1cd4f524bd0e9deee997 + # via + # requests + # twine +virtualenv==20.16.3 \ + --hash=sha256:4193b7bc8a6cd23e4eb251ac64f29b4398ab2c233531e66e40b19a6b7b0d30c1 \ + --hash=sha256:d86ea0bb50e06252d79e6c241507cb904fcd66090c3271381372d6221a3970f9 + # via nox +webencodings==0.5.1 \ + --hash=sha256:a0af1213f3c2226497a97e2b3aa01a7e4bee4f403f95be16fc9acd2947514a78 \ + --hash=sha256:b36a1c245f2d304965eb4e0a82848379241dc04b865afcc4aab16748587e1923 + # via bleach +wheel==0.37.1 \ + --hash=sha256:4bdcd7d840138086126cd09254dc6195fb4fc6f01c050a1d7236f2630db1d22a \ + --hash=sha256:e9a504e793efbca1b8e0e9cb979a249cf4a0a7b5b8c9e8b65a5e39d49529c1c4 + # via -r requirements.in +zipp==3.8.1 \ + --hash=sha256:05b45f1ee8f807d0cc928485ca40a07cb491cf092ff587c0df9cb1fd154848d2 \ + --hash=sha256:47c40d7fe183a6f21403a199b3e4192cca5774656965b0a4988ad2f8feb5f009 + # via importlib-metadata + +# The following packages are considered to be unsafe in a requirements file: +setuptools==65.2.0 \ + --hash=sha256:7f4bc85450898a09f76ebf28b72fa25bc7111f6c7d665d514a60bba9c75ef2a9 \ + --hash=sha256:a3ca5857c89f82f5c9410e8508cb32f4872a3bafd4aa7ae122a24ca33bccc750 + # via -r requirements.in diff --git a/packages/google-cloud-bigquery/renovate.json b/packages/google-cloud-bigquery/renovate.json index c21036d385e5..566a70f3cc3c 100644 --- a/packages/google-cloud-bigquery/renovate.json +++ b/packages/google-cloud-bigquery/renovate.json @@ -5,7 +5,7 @@ ":preserveSemverRanges", ":disableDependencyDashboard" ], - "ignorePaths": [".pre-commit-config.yaml"], + "ignorePaths": [".pre-commit-config.yaml", ".kokoro/requirements.txt"], "pip_requirements": { "fileMatch": ["requirements-test.txt", "samples/[\\S/]*constraints.txt", "samples/[\\S/]*constraints-test.txt"] } From f33806506d7dad83d7f12b6c7082736b56659778 Mon Sep 17 00:00:00 2001 From: shollyman Date: Mon, 29 Aug 2022 11:59:29 -0700 Subject: [PATCH 1486/2016] fix: validate opentelemetry span job attributes have values (#1327) fix: validate opentelemetry span job attributes have values There are several job properties that are optional. Existing opentelemetry instrumentation disallows span attribute keys without appropriate values, so this change validates field presence before propagating. --- .../cloud/bigquery/opentelemetry_tracing.py | 12 ++++++--- .../tests/unit/test_opentelemetry_tracing.py | 26 +++++++++++++++++++ 2 files changed, 35 insertions(+), 3 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/opentelemetry_tracing.py b/packages/google-cloud-bigquery/google/cloud/bigquery/opentelemetry_tracing.py index adecea1213ac..2345fd1bb94e 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/opentelemetry_tracing.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/opentelemetry_tracing.py @@ -107,10 +107,7 @@ def _set_client_attributes(client): def _set_job_attributes(job_ref): job_attributes = { "db.name": job_ref.project, - "location": job_ref.location, - "num_child_jobs": job_ref.num_child_jobs, "job_id": job_ref.job_id, - "parent_job_id": job_ref.parent_job_id, "state": job_ref.state, } @@ -125,4 +122,13 @@ def _set_job_attributes(job_ref): if job_ref.ended is not None: job_attributes["timeEnded"] = job_ref.ended.isoformat() + if job_ref.location is not None: + job_attributes["location"] = job_ref.location + + if job_ref.parent_job_id is not None: + job_attributes["parent_job_id"] = job_ref.parent_job_id + + if job_ref.num_child_jobs is not None: + job_attributes["num_child_jobs"] = job_ref.num_child_jobs + return job_attributes diff --git a/packages/google-cloud-bigquery/tests/unit/test_opentelemetry_tracing.py b/packages/google-cloud-bigquery/tests/unit/test_opentelemetry_tracing.py index 3021a3dbff73..4cc58713c83f 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_opentelemetry_tracing.py +++ b/packages/google-cloud-bigquery/tests/unit/test_opentelemetry_tracing.py @@ -164,6 +164,32 @@ def test_default_job_attributes(setup): assert span.attributes == expected_attributes +@pytest.mark.skipif(opentelemetry is None, reason="Require `opentelemetry`") +def test_optional_job_attributes(setup): + # This test ensures we don't propagate unset values into span attributes + import google.cloud._helpers + + time_created = datetime.datetime( + 2010, 5, 19, 16, 0, 0, tzinfo=google.cloud._helpers.UTC + ) + + with mock.patch("google.cloud.bigquery.job._AsyncJob") as test_job_ref: + test_job_ref.job_id = "test_job_id" + test_job_ref.location = None + test_job_ref.project = "test_project_id" + test_job_ref.created = time_created + test_job_ref.state = "some_job_state" + test_job_ref.num_child_jobs = None + test_job_ref.parent_job_id = None + + with opentelemetry_tracing.create_span( + TEST_SPAN_NAME, attributes=TEST_SPAN_ATTRIBUTES, job_ref=test_job_ref + ) as span: + assert span is not None + for val in span.attributes.values(): + assert val is not None + + @pytest.mark.skipif(opentelemetry is None, reason="Require `opentelemetry`") def test_default_no_data_leakage(setup): import google.auth.credentials From c6fa82b8633fac21dd78142fcdb21cf7cca38ae4 Mon Sep 17 00:00:00 2001 From: "gcf-owl-bot[bot]" <78513119+gcf-owl-bot[bot]@users.noreply.github.com> Date: Tue, 30 Aug 2022 09:32:14 -0400 Subject: [PATCH 1487/2016] chore(python): exclude grpcio==1.49.0rc1 in tests [autoapprove] (#1328) * chore(python): exclude `grpcio==1.49.0rc1` in tests Source-Link: https://github.com/googleapis/synthtool/commit/c4dd5953003d13b239f872d329c3146586bb417e Post-Processor: gcr.io/cloud-devrel-public-resources/owlbot-python:latest@sha256:ce3c1686bc81145c81dd269bd12c4025c6b275b22d14641358827334fddb1d72 * chore(python): exclude grpcio==1.49.0rc1 in tests Co-authored-by: Owl Bot Co-authored-by: Anthonios Partheniou --- packages/google-cloud-bigquery/.github/.OwlBot.lock.yaml | 4 ++-- packages/google-cloud-bigquery/.kokoro/requirements.txt | 6 +++--- packages/google-cloud-bigquery/noxfile.py | 7 +++++-- 3 files changed, 10 insertions(+), 7 deletions(-) diff --git a/packages/google-cloud-bigquery/.github/.OwlBot.lock.yaml b/packages/google-cloud-bigquery/.github/.OwlBot.lock.yaml index c6acdf3f90c4..23e106b65770 100644 --- a/packages/google-cloud-bigquery/.github/.OwlBot.lock.yaml +++ b/packages/google-cloud-bigquery/.github/.OwlBot.lock.yaml @@ -13,5 +13,5 @@ # limitations under the License. docker: image: gcr.io/cloud-devrel-public-resources/owlbot-python:latest - digest: sha256:562802bfac02e012a6ac34eda282f81d06e77326b82a32d7bbb1369ff552b387 -# created: 2022-08-24T17:07:22.006876712Z + digest: sha256:ce3c1686bc81145c81dd269bd12c4025c6b275b22d14641358827334fddb1d72 +# created: 2022-08-29T17:28:30.441852797Z diff --git a/packages/google-cloud-bigquery/.kokoro/requirements.txt b/packages/google-cloud-bigquery/.kokoro/requirements.txt index c4b824f247e3..4b29ef247bed 100644 --- a/packages/google-cloud-bigquery/.kokoro/requirements.txt +++ b/packages/google-cloud-bigquery/.kokoro/requirements.txt @@ -136,9 +136,9 @@ cryptography==37.0.4 \ # via # gcp-releasetool # secretstorage -distlib==0.3.5 \ - --hash=sha256:a7f75737c70be3b25e2bee06288cec4e4c221de18455b2dd037fe2a795cab2fe \ - --hash=sha256:b710088c59f06338ca514800ad795a132da19fda270e3ce4affc74abf955a26c +distlib==0.3.6 \ + --hash=sha256:14bad2d9b04d3a36127ac97f30b12a19268f211063d8f8ee4f47108896e11b46 \ + --hash=sha256:f35c4b692542ca110de7ef0bea44d73981caeb34ca0b9b6b2e6d7790dda8f80e # via virtualenv docutils==0.19 \ --hash=sha256:33995a6753c30b7f577febfc2c50411fec6aac7f7ffeb7c4cfe5991072dcf9e6 \ diff --git a/packages/google-cloud-bigquery/noxfile.py b/packages/google-cloud-bigquery/noxfile.py index c6f7c76b1d89..d9883d0690b5 100644 --- a/packages/google-cloud-bigquery/noxfile.py +++ b/packages/google-cloud-bigquery/noxfile.py @@ -160,7 +160,9 @@ def system(session): session.skip("Credentials must be set via environment variable.") # Use pre-release gRPC for system tests. - session.install("--pre", "grpcio", "-c", constraints_path) + # Exclude version 1.49.0rc1 which has a known issue. + # See https://github.com/grpc/grpc/pull/30642 + session.install("--pre", "grpcio!=1.49.0rc1", "-c", constraints_path) # Install all test dependencies, then install local packages in place. session.install( @@ -288,7 +290,8 @@ def prerelease_deps(session): "google-cloud-bigquery-storage", "google-cloud-core", "google-resumable-media", - "grpcio", + # Exclude version 1.49.0rc1 which has a known issue. See https://github.com/grpc/grpc/pull/30642 + "grpcio!=1.49.0rc1", ) session.install( "freezegun", From 1b4263640ff48f556473f87471f4ce6a9866efa4 Mon Sep 17 00:00:00 2001 From: WhiteSource Renovate Date: Tue, 30 Aug 2022 18:26:38 +0200 Subject: [PATCH 1488/2016] chore(deps): update dependency proto-plus to v1.22.1 (#1330) --- .../google-cloud-bigquery/samples/geography/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/samples/geography/requirements.txt b/packages/google-cloud-bigquery/samples/geography/requirements.txt index feca08cca2b1..99fd900e135a 100644 --- a/packages/google-cloud-bigquery/samples/geography/requirements.txt +++ b/packages/google-cloud-bigquery/samples/geography/requirements.txt @@ -27,7 +27,7 @@ mypy-extensions==0.4.3 packaging==21.3 pandas===1.3.5; python_version == '3.7' pandas==1.4.3; python_version >= '3.8' -proto-plus==1.22.0 +proto-plus==1.22.1 pyarrow==9.0.0 pyasn1==0.4.8 pyasn1-modules==0.2.8 From de9a95be2a8ac993952024b738fd9b67fc5008cd Mon Sep 17 00:00:00 2001 From: WhiteSource Renovate Date: Wed, 31 Aug 2022 17:46:08 +0200 Subject: [PATCH 1489/2016] chore(deps): update dependency pandas to v1.4.4 (#1332) --- .../google-cloud-bigquery/samples/geography/requirements.txt | 2 +- packages/google-cloud-bigquery/samples/magics/requirements.txt | 2 +- .../google-cloud-bigquery/samples/snippets/requirements.txt | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/packages/google-cloud-bigquery/samples/geography/requirements.txt b/packages/google-cloud-bigquery/samples/geography/requirements.txt index 99fd900e135a..9a5f3024454c 100644 --- a/packages/google-cloud-bigquery/samples/geography/requirements.txt +++ b/packages/google-cloud-bigquery/samples/geography/requirements.txt @@ -26,7 +26,7 @@ munch==2.5.0 mypy-extensions==0.4.3 packaging==21.3 pandas===1.3.5; python_version == '3.7' -pandas==1.4.3; python_version >= '3.8' +pandas==1.4.4; python_version >= '3.8' proto-plus==1.22.1 pyarrow==9.0.0 pyasn1==0.4.8 diff --git a/packages/google-cloud-bigquery/samples/magics/requirements.txt b/packages/google-cloud-bigquery/samples/magics/requirements.txt index 4c0a67a18a9f..c85c78961c16 100644 --- a/packages/google-cloud-bigquery/samples/magics/requirements.txt +++ b/packages/google-cloud-bigquery/samples/magics/requirements.txt @@ -7,7 +7,7 @@ ipython===8.0.1; python_version == '3.8' ipython==8.4.0; python_version >= '3.9' matplotlib==3.5.3 pandas===1.3.5; python_version == '3.7' -pandas==1.4.3; python_version >= '3.8' +pandas==1.4.4; python_version >= '3.8' pyarrow==9.0.0 pytz==2022.2.1 typing-extensions==4.3.0 diff --git a/packages/google-cloud-bigquery/samples/snippets/requirements.txt b/packages/google-cloud-bigquery/samples/snippets/requirements.txt index bbef52e66b00..dbfb880fd68e 100644 --- a/packages/google-cloud-bigquery/samples/snippets/requirements.txt +++ b/packages/google-cloud-bigquery/samples/snippets/requirements.txt @@ -8,7 +8,7 @@ ipython===8.0.1; python_version == '3.8' ipython==8.4.0; python_version >= '3.9' matplotlib==3.5.3 pandas===1.3.5; python_version == '3.7' -pandas==1.4.3; python_version >= '3.8' +pandas==1.4.4; python_version >= '3.8' pyarrow==9.0.0 pytz==2022.2.1 typing-extensions==4.3.0 From 3b099cfad23194db8d43c75e6cbe026f28ce2a15 Mon Sep 17 00:00:00 2001 From: WhiteSource Renovate Date: Thu, 1 Sep 2022 20:33:20 +0200 Subject: [PATCH 1490/2016] chore(deps): update dependency google-crc32c to v1.5.0 (#1334) --- .../google-cloud-bigquery/samples/geography/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/samples/geography/requirements.txt b/packages/google-cloud-bigquery/samples/geography/requirements.txt index 9a5f3024454c..1b479edf1d84 100644 --- a/packages/google-cloud-bigquery/samples/geography/requirements.txt +++ b/packages/google-cloud-bigquery/samples/geography/requirements.txt @@ -16,7 +16,7 @@ google-auth==2.11.0 google-cloud-bigquery==3.3.2 google-cloud-bigquery-storage==2.14.2 google-cloud-core==2.3.2 -google-crc32c==1.3.0 +google-crc32c==1.5.0 google-resumable-media==2.3.3 googleapis-common-protos==1.56.4 grpcio==1.47.0 From 281c14de86ec48827e4ddc287186cea574ed7ca3 Mon Sep 17 00:00:00 2001 From: "gcf-owl-bot[bot]" <78513119+gcf-owl-bot[bot]@users.noreply.github.com> Date: Thu, 1 Sep 2022 20:30:17 +0000 Subject: [PATCH 1491/2016] ci(python): fix path to requirements.txt in release script (#1335) Source-Link: https://github.com/googleapis/synthtool/commit/fdba3ed145bdb2f4f3eff434d4284b1d03b80d34 Post-Processor: gcr.io/cloud-devrel-public-resources/owlbot-python:latest@sha256:1f0dbd02745fb7cf255563dab5968345989308544e52b7f460deadd5e78e63b0 --- .../.github/.OwlBot.lock.yaml | 3 +-- .../google-cloud-bigquery/.kokoro/release.sh | 2 +- .../.kokoro/requirements.txt | 24 +++++++++---------- 3 files changed, 14 insertions(+), 15 deletions(-) diff --git a/packages/google-cloud-bigquery/.github/.OwlBot.lock.yaml b/packages/google-cloud-bigquery/.github/.OwlBot.lock.yaml index 23e106b65770..0d9eb2af9352 100644 --- a/packages/google-cloud-bigquery/.github/.OwlBot.lock.yaml +++ b/packages/google-cloud-bigquery/.github/.OwlBot.lock.yaml @@ -13,5 +13,4 @@ # limitations under the License. docker: image: gcr.io/cloud-devrel-public-resources/owlbot-python:latest - digest: sha256:ce3c1686bc81145c81dd269bd12c4025c6b275b22d14641358827334fddb1d72 -# created: 2022-08-29T17:28:30.441852797Z + digest: sha256:1f0dbd02745fb7cf255563dab5968345989308544e52b7f460deadd5e78e63b0 diff --git a/packages/google-cloud-bigquery/.kokoro/release.sh b/packages/google-cloud-bigquery/.kokoro/release.sh index 879f9ef84fa3..c6a7c94600e7 100755 --- a/packages/google-cloud-bigquery/.kokoro/release.sh +++ b/packages/google-cloud-bigquery/.kokoro/release.sh @@ -16,7 +16,7 @@ set -eo pipefail # Start the releasetool reporter -python3 -m pip install --require-hashes -r .kokoro/requirements.txt +python3 -m pip install --require-hashes -r github/python-bigquery/.kokoro/requirements.txt python3 -m releasetool publish-reporter-script > /tmp/publisher-script; source /tmp/publisher-script # Disable buffering, so that the logs stream through. diff --git a/packages/google-cloud-bigquery/.kokoro/requirements.txt b/packages/google-cloud-bigquery/.kokoro/requirements.txt index 4b29ef247bed..92b2f727e777 100644 --- a/packages/google-cloud-bigquery/.kokoro/requirements.txt +++ b/packages/google-cloud-bigquery/.kokoro/requirements.txt @@ -100,9 +100,9 @@ click==8.0.4 \ # via # gcp-docuploader # gcp-releasetool -colorlog==6.6.0 \ - --hash=sha256:344f73204009e4c83c5b6beb00b3c45dc70fcdae3c80db919e0a4171d006fde8 \ - --hash=sha256:351c51e866c86c3217f08e4b067a7974a678be78f07f85fc2d55b8babde6d94e +colorlog==6.7.0 \ + --hash=sha256:0d33ca236784a1ba3ff9c532d4964126d8a2c44f1f0cb1d2b0728196f512f662 \ + --hash=sha256:bd94bd21c1e13fac7bd3153f4bc3a7dc0eb0974b8bc2fdf1a989e474f6e582e5 # via # gcp-docuploader # nox @@ -152,9 +152,9 @@ gcp-docuploader==0.6.3 \ --hash=sha256:ba8c9d76b3bbac54b0311c503a373b00edc2dc02d6d54ea9507045adb8e870f7 \ --hash=sha256:c0f5aaa82ce1854a386197e4e359b120ad6d4e57ae2c812fce42219a3288026b # via -r requirements.in -gcp-releasetool==1.8.6 \ - --hash=sha256:42e51ab8e2e789bc8e22a03c09352962cd3452951c801a2230d564816630304a \ - --hash=sha256:a3518b79d1b243c494eac392a01c7fd65187fd6d52602dcab9b529bc934d4da1 +gcp-releasetool==1.8.7 \ + --hash=sha256:3d2a67c9db39322194afb3b427e9cb0476ce8f2a04033695f0aeb63979fc2b37 \ + --hash=sha256:5e4d28f66e90780d77f3ecf1e9155852b0c3b13cbccb08ab07e66b2357c8da8d # via -r requirements.in google-api-core==2.8.2 \ --hash=sha256:06f7244c640322b508b125903bb5701bebabce8832f85aba9335ec00b3d02edc \ @@ -251,9 +251,9 @@ jinja2==3.1.2 \ --hash=sha256:31351a702a408a9e7595a8fc6150fc3f43bb6bf7e319770cbc0db9df9437e852 \ --hash=sha256:6088930bfe239f0e6710546ab9c19c9ef35e29792895fed6e6e31a023a182a61 # via gcp-releasetool -keyring==23.8.2 \ - --hash=sha256:0d9973f8891850f1ade5f26aafd06bb16865fbbae3fc56b0defb6a14a2624003 \ - --hash=sha256:10d2a8639663fe2090705a00b8c47c687cacdf97598ea9c11456679fa974473a +keyring==23.9.0 \ + --hash=sha256:4c32a31174faaee48f43a7e2c7e9c3216ec5e95acf22a2bebfb4a1d05056ee44 \ + --hash=sha256:98f060ec95ada2ab910c195a2d4317be6ef87936a766b239c46aa3c7aac4f0db # via # gcp-releasetool # twine @@ -440,9 +440,9 @@ urllib3==1.26.12 \ # via # requests # twine -virtualenv==20.16.3 \ - --hash=sha256:4193b7bc8a6cd23e4eb251ac64f29b4398ab2c233531e66e40b19a6b7b0d30c1 \ - --hash=sha256:d86ea0bb50e06252d79e6c241507cb904fcd66090c3271381372d6221a3970f9 +virtualenv==20.16.4 \ + --hash=sha256:014f766e4134d0008dcaa1f95bafa0fb0f575795d07cae50b1bee514185d6782 \ + --hash=sha256:035ed57acce4ac35c82c9d8802202b0e71adac011a511ff650cbcf9635006a22 # via nox webencodings==0.5.1 \ --hash=sha256:a0af1213f3c2226497a97e2b3aa01a7e4bee4f403f95be16fc9acd2947514a78 \ From 75ff2e878753f1b3a63792998a7ae0e9b2d1359a Mon Sep 17 00:00:00 2001 From: Chalmer Lowe Date: Thu, 1 Sep 2022 20:35:40 -0400 Subject: [PATCH 1492/2016] fix: uses function (create_job) more appropriate to the described sample intent (#1309) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * fix: uses function more appropriate to the described title * adds additional explanation for the end users * adds REST API URL for reference * corrects flake 8 linter errors * blackens file * adds type hints * avoids unreliable version of grpcio * updates imports to fix linting error * better method to avoid grpcio 1.49.0rc1 * Update samples/create_job.py Co-authored-by: Dan Lee <71398022+dandhlee@users.noreply.github.com> * adds further explanation on when/why to use create_jobs * 🦉 Updates from OwlBot post-processor See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md * updates references Co-authored-by: Dan Lee <71398022+dandhlee@users.noreply.github.com> Co-authored-by: Owl Bot --- .../samples/create_job.py | 52 +++++++++++++------ 1 file changed, 37 insertions(+), 15 deletions(-) diff --git a/packages/google-cloud-bigquery/samples/create_job.py b/packages/google-cloud-bigquery/samples/create_job.py index 39922f7aefca..129a08a1bf70 100644 --- a/packages/google-cloud-bigquery/samples/create_job.py +++ b/packages/google-cloud-bigquery/samples/create_job.py @@ -13,12 +13,13 @@ # limitations under the License. import typing +from typing import Union if typing.TYPE_CHECKING: - from google.cloud import bigquery + from google.cloud.bigquery import LoadJob, CopyJob, ExtractJob, QueryJob -def create_job() -> "bigquery.QueryJob": +def create_job() -> "Union[LoadJob, CopyJob, ExtractJob, QueryJob]": # [START bigquery_create_job] from google.cloud import bigquery @@ -26,20 +27,41 @@ def create_job() -> "bigquery.QueryJob": # Construct a BigQuery client object. client = bigquery.Client() - query_job = client.query( - "SELECT country_name from `bigquery-public-data.utility_us.country_code_iso`", - # Explicitly force job execution to be routed to a specific processing - # location. - location="US", - # Specify a job configuration to set optional job resource properties. - job_config=bigquery.QueryJobConfig( - labels={"example-label": "example-value"}, maximum_bytes_billed=1000000 - ), - # The client libraries automatically generate a job ID. Override the - # generated ID with either the job_id_prefix or job_id parameters. - job_id_prefix="code_sample_", + query_job = client.create_job( + # Specify a job configuration, providing a query + # and/or optional job resource properties, as needed. + # The job instance can be a LoadJob, CopyJob, ExtractJob, QueryJob + # Here, we demonstrate a "query" job. + # References: + # https://googleapis.dev/python/bigquery/latest/generated/google.cloud.bigquery.client.Client.html#google.cloud.bigquery.client.Client.create_job + # https://cloud.google.com/bigquery/docs/reference/rest/v2/Job + # + # Example use cases for .create_job() include: + # * to retry failed jobs + # * to generate jobs with an experimental API property that hasn't + # been added to one of the manually written job configuration + # classes yet + # + # NOTE: unless it is necessary to create a job in this way, the + # preferred approach is to use one of the dedicated API calls: + # client.query() + # client.extract_table() + # client.copy_table() + # client.load_table_file(), client.load_table_from_dataframe(), etc + job_config={ + "query": { + "query": """ + SELECT country_name + FROM `bigquery-public-data.utility_us.country_code_iso` + LIMIT 5 + """, + }, + "labels": {"example-label": "example-value"}, + "maximum_bytes_billed": 10000000, + } ) # Make an API request. - print("Started job: {}".format(query_job.job_id)) + print(f"Started job: {query_job.job_id}") # [END bigquery_create_job] + return query_job From b31f0f7ea0b25386b53cdf2dce1480ea5ce69961 Mon Sep 17 00:00:00 2001 From: "gcf-owl-bot[bot]" <78513119+gcf-owl-bot[bot]@users.noreply.github.com> Date: Thu, 1 Sep 2022 18:27:08 -0700 Subject: [PATCH 1493/2016] chore(python): update .kokoro/requirements.txt (#1336) Source-Link: https://github.com/googleapis/synthtool/commit/703554a14c7479542335b62fa69279f93a9e38ec Post-Processor: gcr.io/cloud-devrel-public-resources/owlbot-python:latest@sha256:94961fdc5c9ca6d13530a6a414a49d2f607203168215d074cdb0a1df9ec31c0b Co-authored-by: Owl Bot Co-authored-by: Anthonios Partheniou --- packages/google-cloud-bigquery/.github/.OwlBot.lock.yaml | 2 +- packages/google-cloud-bigquery/.kokoro/requirements.txt | 8 ++++++++ 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/.github/.OwlBot.lock.yaml b/packages/google-cloud-bigquery/.github/.OwlBot.lock.yaml index 0d9eb2af9352..2fa0f7c4fe15 100644 --- a/packages/google-cloud-bigquery/.github/.OwlBot.lock.yaml +++ b/packages/google-cloud-bigquery/.github/.OwlBot.lock.yaml @@ -13,4 +13,4 @@ # limitations under the License. docker: image: gcr.io/cloud-devrel-public-resources/owlbot-python:latest - digest: sha256:1f0dbd02745fb7cf255563dab5968345989308544e52b7f460deadd5e78e63b0 + digest: sha256:94961fdc5c9ca6d13530a6a414a49d2f607203168215d074cdb0a1df9ec31c0b diff --git a/packages/google-cloud-bigquery/.kokoro/requirements.txt b/packages/google-cloud-bigquery/.kokoro/requirements.txt index 92b2f727e777..385f2d4d6106 100644 --- a/packages/google-cloud-bigquery/.kokoro/requirements.txt +++ b/packages/google-cloud-bigquery/.kokoro/requirements.txt @@ -241,6 +241,10 @@ importlib-metadata==4.12.0 \ # via # -r requirements.in # twine +jaraco-classes==3.2.2 \ + --hash=sha256:6745f113b0b588239ceb49532aa09c3ebb947433ce311ef2f8e3ad64ebb74594 \ + --hash=sha256:e6ef6fd3fcf4579a7a019d87d1e56a883f4e4c35cfe925f86731abc58804e647 + # via keyring jeepney==0.8.0 \ --hash=sha256:5efe48d255973902f6badc3ce55e2aa6c5c3b3bc642059ef3a91247bcfcc5806 \ --hash=sha256:c0a454ad016ca575060802ee4d590dd912e35c122fa04e70306de3d076cce755 @@ -299,6 +303,10 @@ markupsafe==2.1.1 \ --hash=sha256:f121a1420d4e173a5d96e47e9a0c0dcff965afdf1626d28de1460815f7c4ee7a \ --hash=sha256:fc7b548b17d238737688817ab67deebb30e8073c95749d55538ed473130ec0c7 # via jinja2 +more-itertools==8.14.0 \ + --hash=sha256:1bc4f91ee5b1b31ac7ceacc17c09befe6a40a503907baf9c839c229b5095cfd2 \ + --hash=sha256:c09443cd3d5438b8dafccd867a6bc1cb0894389e90cb53d227456b0b0bccb750 + # via jaraco-classes nox==2022.8.7 \ --hash=sha256:1b894940551dc5c389f9271d197ca5d655d40bdc6ccf93ed6880e4042760a34b \ --hash=sha256:96cca88779e08282a699d672258ec01eb7c792d35bbbf538c723172bce23212c From a6056a0ca3f4a0c6b348b8a8145483c3cb937ede Mon Sep 17 00:00:00 2001 From: "gcf-owl-bot[bot]" <78513119+gcf-owl-bot[bot]@users.noreply.github.com> Date: Fri, 2 Sep 2022 18:40:23 +0000 Subject: [PATCH 1494/2016] chore(python): exclude setup.py in renovate config (#1338) Source-Link: https://github.com/googleapis/synthtool/commit/56da63e80c384a871356d1ea6640802017f213b4 Post-Processor: gcr.io/cloud-devrel-public-resources/owlbot-python:latest@sha256:993a058718e84a82fda04c3177e58f0a43281a996c7c395e0a56ccc4d6d210d7 --- packages/google-cloud-bigquery/.github/.OwlBot.lock.yaml | 2 +- packages/google-cloud-bigquery/renovate.json | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/packages/google-cloud-bigquery/.github/.OwlBot.lock.yaml b/packages/google-cloud-bigquery/.github/.OwlBot.lock.yaml index 2fa0f7c4fe15..b8dcb4a4af99 100644 --- a/packages/google-cloud-bigquery/.github/.OwlBot.lock.yaml +++ b/packages/google-cloud-bigquery/.github/.OwlBot.lock.yaml @@ -13,4 +13,4 @@ # limitations under the License. docker: image: gcr.io/cloud-devrel-public-resources/owlbot-python:latest - digest: sha256:94961fdc5c9ca6d13530a6a414a49d2f607203168215d074cdb0a1df9ec31c0b + digest: sha256:993a058718e84a82fda04c3177e58f0a43281a996c7c395e0a56ccc4d6d210d7 diff --git a/packages/google-cloud-bigquery/renovate.json b/packages/google-cloud-bigquery/renovate.json index 566a70f3cc3c..39b2a0ec9296 100644 --- a/packages/google-cloud-bigquery/renovate.json +++ b/packages/google-cloud-bigquery/renovate.json @@ -5,7 +5,7 @@ ":preserveSemverRanges", ":disableDependencyDashboard" ], - "ignorePaths": [".pre-commit-config.yaml", ".kokoro/requirements.txt"], + "ignorePaths": [".pre-commit-config.yaml", ".kokoro/requirements.txt", "setup.py"], "pip_requirements": { "fileMatch": ["requirements-test.txt", "samples/[\\S/]*constraints.txt", "samples/[\\S/]*constraints-test.txt"] } From 119020b5b758df32f7832c095ee1a71e47f0f6d5 Mon Sep 17 00:00:00 2001 From: WhiteSource Renovate Date: Tue, 6 Sep 2022 16:15:19 +0200 Subject: [PATCH 1495/2016] chore(deps): update all dependencies (#1339) --- .../samples/geography/requirements-test.txt | 2 +- .../google-cloud-bigquery/samples/geography/requirements.txt | 4 ++-- .../samples/magics/requirements-test.txt | 2 +- .../google-cloud-bigquery/samples/magics/requirements.txt | 4 ++-- .../samples/snippets/requirements-test.txt | 2 +- .../google-cloud-bigquery/samples/snippets/requirements.txt | 4 ++-- 6 files changed, 9 insertions(+), 9 deletions(-) diff --git a/packages/google-cloud-bigquery/samples/geography/requirements-test.txt b/packages/google-cloud-bigquery/samples/geography/requirements-test.txt index fb466e5093ef..6f722c66e151 100644 --- a/packages/google-cloud-bigquery/samples/geography/requirements-test.txt +++ b/packages/google-cloud-bigquery/samples/geography/requirements-test.txt @@ -1,2 +1,2 @@ -pytest==7.1.2 +pytest==7.1.3 mock==4.0.3 diff --git a/packages/google-cloud-bigquery/samples/geography/requirements.txt b/packages/google-cloud-bigquery/samples/geography/requirements.txt index 1b479edf1d84..ff6754a35d35 100644 --- a/packages/google-cloud-bigquery/samples/geography/requirements.txt +++ b/packages/google-cloud-bigquery/samples/geography/requirements.txt @@ -11,7 +11,7 @@ Fiona==1.8.21 geojson==2.5.0 geopandas===0.10.2; python_version == '3.7' geopandas==0.11.1; python_version >= '3.8' -google-api-core==2.8.2 +google-api-core==2.10.0 google-auth==2.11.0 google-cloud-bigquery==3.3.2 google-cloud-bigquery-storage==2.14.2 @@ -19,7 +19,7 @@ google-cloud-core==2.3.2 google-crc32c==1.5.0 google-resumable-media==2.3.3 googleapis-common-protos==1.56.4 -grpcio==1.47.0 +grpcio==1.48.1 idna==3.3 libcst==0.4.7 munch==2.5.0 diff --git a/packages/google-cloud-bigquery/samples/magics/requirements-test.txt b/packages/google-cloud-bigquery/samples/magics/requirements-test.txt index 856751fc17bc..7902c72ef501 100644 --- a/packages/google-cloud-bigquery/samples/magics/requirements-test.txt +++ b/packages/google-cloud-bigquery/samples/magics/requirements-test.txt @@ -1,3 +1,3 @@ google-cloud-testutils==1.3.3 -pytest==7.1.2 +pytest==7.1.3 mock==4.0.3 diff --git a/packages/google-cloud-bigquery/samples/magics/requirements.txt b/packages/google-cloud-bigquery/samples/magics/requirements.txt index c85c78961c16..a0807f8a98bb 100644 --- a/packages/google-cloud-bigquery/samples/magics/requirements.txt +++ b/packages/google-cloud-bigquery/samples/magics/requirements.txt @@ -1,10 +1,10 @@ db-dtypes==1.0.3 google-cloud-bigquery-storage==2.14.2 google-auth-oauthlib==0.5.2 -grpcio==1.47.0 +grpcio==1.48.1 ipython===7.31.1; python_version == '3.7' ipython===8.0.1; python_version == '3.8' -ipython==8.4.0; python_version >= '3.9' +ipython==8.5.0; python_version >= '3.9' matplotlib==3.5.3 pandas===1.3.5; python_version == '3.7' pandas==1.4.4; python_version >= '3.8' diff --git a/packages/google-cloud-bigquery/samples/snippets/requirements-test.txt b/packages/google-cloud-bigquery/samples/snippets/requirements-test.txt index 856751fc17bc..7902c72ef501 100644 --- a/packages/google-cloud-bigquery/samples/snippets/requirements-test.txt +++ b/packages/google-cloud-bigquery/samples/snippets/requirements-test.txt @@ -1,3 +1,3 @@ google-cloud-testutils==1.3.3 -pytest==7.1.2 +pytest==7.1.3 mock==4.0.3 diff --git a/packages/google-cloud-bigquery/samples/snippets/requirements.txt b/packages/google-cloud-bigquery/samples/snippets/requirements.txt index dbfb880fd68e..e39d074ee0ce 100644 --- a/packages/google-cloud-bigquery/samples/snippets/requirements.txt +++ b/packages/google-cloud-bigquery/samples/snippets/requirements.txt @@ -2,10 +2,10 @@ db-dtypes==1.0.3 google-cloud-bigquery==3.3.2 google-cloud-bigquery-storage==2.14.2 google-auth-oauthlib==0.5.2 -grpcio==1.47.0 +grpcio==1.48.1 ipython===7.31.1; python_version == '3.7' ipython===8.0.1; python_version == '3.8' -ipython==8.4.0; python_version >= '3.9' +ipython==8.5.0; python_version >= '3.9' matplotlib==3.5.3 pandas===1.3.5; python_version == '3.7' pandas==1.4.4; python_version >= '3.8' From a5bd75f539f8287e55f9a07303c9e4817d506d46 Mon Sep 17 00:00:00 2001 From: WhiteSource Renovate Date: Tue, 6 Sep 2022 21:07:19 +0200 Subject: [PATCH 1496/2016] chore(deps): update dependency google-cloud-bigquery-storage to v2.15.0 (#1342) --- .../google-cloud-bigquery/samples/geography/requirements.txt | 2 +- packages/google-cloud-bigquery/samples/magics/requirements.txt | 2 +- .../google-cloud-bigquery/samples/snippets/requirements.txt | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/packages/google-cloud-bigquery/samples/geography/requirements.txt b/packages/google-cloud-bigquery/samples/geography/requirements.txt index ff6754a35d35..a73ea6b03156 100644 --- a/packages/google-cloud-bigquery/samples/geography/requirements.txt +++ b/packages/google-cloud-bigquery/samples/geography/requirements.txt @@ -14,7 +14,7 @@ geopandas==0.11.1; python_version >= '3.8' google-api-core==2.10.0 google-auth==2.11.0 google-cloud-bigquery==3.3.2 -google-cloud-bigquery-storage==2.14.2 +google-cloud-bigquery-storage==2.15.0 google-cloud-core==2.3.2 google-crc32c==1.5.0 google-resumable-media==2.3.3 diff --git a/packages/google-cloud-bigquery/samples/magics/requirements.txt b/packages/google-cloud-bigquery/samples/magics/requirements.txt index a0807f8a98bb..64e5e36e3b02 100644 --- a/packages/google-cloud-bigquery/samples/magics/requirements.txt +++ b/packages/google-cloud-bigquery/samples/magics/requirements.txt @@ -1,5 +1,5 @@ db-dtypes==1.0.3 -google-cloud-bigquery-storage==2.14.2 +google-cloud-bigquery-storage==2.15.0 google-auth-oauthlib==0.5.2 grpcio==1.48.1 ipython===7.31.1; python_version == '3.7' diff --git a/packages/google-cloud-bigquery/samples/snippets/requirements.txt b/packages/google-cloud-bigquery/samples/snippets/requirements.txt index e39d074ee0ce..a127709997e9 100644 --- a/packages/google-cloud-bigquery/samples/snippets/requirements.txt +++ b/packages/google-cloud-bigquery/samples/snippets/requirements.txt @@ -1,6 +1,6 @@ db-dtypes==1.0.3 google-cloud-bigquery==3.3.2 -google-cloud-bigquery-storage==2.14.2 +google-cloud-bigquery-storage==2.15.0 google-auth-oauthlib==0.5.2 grpcio==1.48.1 ipython===7.31.1; python_version == '3.7' From f4d0231ea73ea87d700c148fb0ee5d31797e3ac6 Mon Sep 17 00:00:00 2001 From: WhiteSource Renovate Date: Mon, 12 Sep 2022 16:45:17 +0200 Subject: [PATCH 1497/2016] chore(deps): update dependency certifi to v2022.6.15.1 (#1346) --- .../google-cloud-bigquery/samples/geography/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/samples/geography/requirements.txt b/packages/google-cloud-bigquery/samples/geography/requirements.txt index a73ea6b03156..5697f264c50c 100644 --- a/packages/google-cloud-bigquery/samples/geography/requirements.txt +++ b/packages/google-cloud-bigquery/samples/geography/requirements.txt @@ -1,5 +1,5 @@ attrs==22.1.0 -certifi==2022.6.15 +certifi==2022.6.15.1 cffi==1.15.1 charset-normalizer==2.1.1 click==8.1.3 From f5595564618cfdbf633712bd14114d8698545d9d Mon Sep 17 00:00:00 2001 From: "gcf-owl-bot[bot]" <78513119+gcf-owl-bot[bot]@users.noreply.github.com> Date: Tue, 13 Sep 2022 16:24:18 +0000 Subject: [PATCH 1498/2016] chore: detect samples tests in nested directories (#1349) Source-Link: https://github.com/googleapis/synthtool/commit/50db768f450a50d7c1fd62513c113c9bb96fd434 Post-Processor: gcr.io/cloud-devrel-public-resources/owlbot-python:latest@sha256:e09366bdf0fd9c8976592988390b24d53583dd9f002d476934da43725adbb978 --- packages/google-cloud-bigquery/.github/.OwlBot.lock.yaml | 2 +- packages/google-cloud-bigquery/samples/geography/noxfile.py | 6 ++++-- packages/google-cloud-bigquery/samples/magics/noxfile.py | 6 ++++-- packages/google-cloud-bigquery/samples/snippets/noxfile.py | 6 ++++-- 4 files changed, 13 insertions(+), 7 deletions(-) diff --git a/packages/google-cloud-bigquery/.github/.OwlBot.lock.yaml b/packages/google-cloud-bigquery/.github/.OwlBot.lock.yaml index b8dcb4a4af99..aa547962eb0a 100644 --- a/packages/google-cloud-bigquery/.github/.OwlBot.lock.yaml +++ b/packages/google-cloud-bigquery/.github/.OwlBot.lock.yaml @@ -13,4 +13,4 @@ # limitations under the License. docker: image: gcr.io/cloud-devrel-public-resources/owlbot-python:latest - digest: sha256:993a058718e84a82fda04c3177e58f0a43281a996c7c395e0a56ccc4d6d210d7 + digest: sha256:e09366bdf0fd9c8976592988390b24d53583dd9f002d476934da43725adbb978 diff --git a/packages/google-cloud-bigquery/samples/geography/noxfile.py b/packages/google-cloud-bigquery/samples/geography/noxfile.py index 29b5bc852183..b053ca568f63 100644 --- a/packages/google-cloud-bigquery/samples/geography/noxfile.py +++ b/packages/google-cloud-bigquery/samples/geography/noxfile.py @@ -208,8 +208,10 @@ def _session_tests( session: nox.sessions.Session, post_install: Callable = None ) -> None: # check for presence of tests - test_list = glob.glob("*_test.py") + glob.glob("test_*.py") - test_list.extend(glob.glob("tests")) + test_list = glob.glob("**/*_test.py", recursive=True) + glob.glob( + "**/test_*.py", recursive=True + ) + test_list.extend(glob.glob("**/tests", recursive=True)) if len(test_list) == 0: print("No tests found, skipping directory.") diff --git a/packages/google-cloud-bigquery/samples/magics/noxfile.py b/packages/google-cloud-bigquery/samples/magics/noxfile.py index 29b5bc852183..b053ca568f63 100644 --- a/packages/google-cloud-bigquery/samples/magics/noxfile.py +++ b/packages/google-cloud-bigquery/samples/magics/noxfile.py @@ -208,8 +208,10 @@ def _session_tests( session: nox.sessions.Session, post_install: Callable = None ) -> None: # check for presence of tests - test_list = glob.glob("*_test.py") + glob.glob("test_*.py") - test_list.extend(glob.glob("tests")) + test_list = glob.glob("**/*_test.py", recursive=True) + glob.glob( + "**/test_*.py", recursive=True + ) + test_list.extend(glob.glob("**/tests", recursive=True)) if len(test_list) == 0: print("No tests found, skipping directory.") diff --git a/packages/google-cloud-bigquery/samples/snippets/noxfile.py b/packages/google-cloud-bigquery/samples/snippets/noxfile.py index 29b5bc852183..b053ca568f63 100644 --- a/packages/google-cloud-bigquery/samples/snippets/noxfile.py +++ b/packages/google-cloud-bigquery/samples/snippets/noxfile.py @@ -208,8 +208,10 @@ def _session_tests( session: nox.sessions.Session, post_install: Callable = None ) -> None: # check for presence of tests - test_list = glob.glob("*_test.py") + glob.glob("test_*.py") - test_list.extend(glob.glob("tests")) + test_list = glob.glob("**/*_test.py", recursive=True) + glob.glob( + "**/test_*.py", recursive=True + ) + test_list.extend(glob.glob("**/tests", recursive=True)) if len(test_list) == 0: print("No tests found, skipping directory.") From 90e9836e15b17c422b7e2983b586d1dce679a8b5 Mon Sep 17 00:00:00 2001 From: WhiteSource Renovate Date: Wed, 14 Sep 2022 15:50:05 +0200 Subject: [PATCH 1499/2016] chore(deps): update all dependencies (#1351) --- .../google-cloud-bigquery/samples/geography/requirements.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/packages/google-cloud-bigquery/samples/geography/requirements.txt b/packages/google-cloud-bigquery/samples/geography/requirements.txt index 5697f264c50c..d210da445249 100644 --- a/packages/google-cloud-bigquery/samples/geography/requirements.txt +++ b/packages/google-cloud-bigquery/samples/geography/requirements.txt @@ -1,5 +1,5 @@ attrs==22.1.0 -certifi==2022.6.15.1 +certifi==2022.6.15.2 cffi==1.15.1 charset-normalizer==2.1.1 click==8.1.3 @@ -20,7 +20,7 @@ google-crc32c==1.5.0 google-resumable-media==2.3.3 googleapis-common-protos==1.56.4 grpcio==1.48.1 -idna==3.3 +idna==3.4 libcst==0.4.7 munch==2.5.0 mypy-extensions==0.4.3 From e0a865c44fa15894b7ff40b2f82cbf29a2651917 Mon Sep 17 00:00:00 2001 From: WhiteSource Renovate Date: Wed, 14 Sep 2022 19:47:46 +0200 Subject: [PATCH 1500/2016] chore(deps): update dependency google-api-core to v2.10.1 (#1353) --- .../google-cloud-bigquery/samples/geography/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/samples/geography/requirements.txt b/packages/google-cloud-bigquery/samples/geography/requirements.txt index d210da445249..e17b74bc6b3c 100644 --- a/packages/google-cloud-bigquery/samples/geography/requirements.txt +++ b/packages/google-cloud-bigquery/samples/geography/requirements.txt @@ -11,7 +11,7 @@ Fiona==1.8.21 geojson==2.5.0 geopandas===0.10.2; python_version == '3.7' geopandas==0.11.1; python_version >= '3.8' -google-api-core==2.10.0 +google-api-core==2.10.1 google-auth==2.11.0 google-cloud-bigquery==3.3.2 google-cloud-bigquery-storage==2.15.0 From acd8ecea2c9ea3817c815fc8f24fad513bdc838f Mon Sep 17 00:00:00 2001 From: WhiteSource Renovate Date: Thu, 15 Sep 2022 15:25:11 +0200 Subject: [PATCH 1501/2016] chore(deps): update all dependencies (#1354) --- .../google-cloud-bigquery/samples/geography/requirements.txt | 2 +- packages/google-cloud-bigquery/samples/magics/requirements.txt | 2 +- .../google-cloud-bigquery/samples/snippets/requirements.txt | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/packages/google-cloud-bigquery/samples/geography/requirements.txt b/packages/google-cloud-bigquery/samples/geography/requirements.txt index e17b74bc6b3c..ca8f79c3c704 100644 --- a/packages/google-cloud-bigquery/samples/geography/requirements.txt +++ b/packages/google-cloud-bigquery/samples/geography/requirements.txt @@ -1,5 +1,5 @@ attrs==22.1.0 -certifi==2022.6.15.2 +certifi==2022.9.14 cffi==1.15.1 charset-normalizer==2.1.1 click==8.1.3 diff --git a/packages/google-cloud-bigquery/samples/magics/requirements.txt b/packages/google-cloud-bigquery/samples/magics/requirements.txt index 64e5e36e3b02..8a86e149511b 100644 --- a/packages/google-cloud-bigquery/samples/magics/requirements.txt +++ b/packages/google-cloud-bigquery/samples/magics/requirements.txt @@ -1,6 +1,6 @@ db-dtypes==1.0.3 google-cloud-bigquery-storage==2.15.0 -google-auth-oauthlib==0.5.2 +google-auth-oauthlib==0.5.3 grpcio==1.48.1 ipython===7.31.1; python_version == '3.7' ipython===8.0.1; python_version == '3.8' diff --git a/packages/google-cloud-bigquery/samples/snippets/requirements.txt b/packages/google-cloud-bigquery/samples/snippets/requirements.txt index a127709997e9..3009f1899265 100644 --- a/packages/google-cloud-bigquery/samples/snippets/requirements.txt +++ b/packages/google-cloud-bigquery/samples/snippets/requirements.txt @@ -1,7 +1,7 @@ db-dtypes==1.0.3 google-cloud-bigquery==3.3.2 google-cloud-bigquery-storage==2.15.0 -google-auth-oauthlib==0.5.2 +google-auth-oauthlib==0.5.3 grpcio==1.48.1 ipython===7.31.1; python_version == '3.7' ipython===8.0.1; python_version == '3.8' From 9d7c4d89656561a9eb5aa0f385fee4e3cd50b53f Mon Sep 17 00:00:00 2001 From: Chalmer Lowe Date: Mon, 19 Sep 2022 17:18:35 -0400 Subject: [PATCH 1502/2016] Fix: Refactors code to account for a tdqm code deprecation (#1357) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * tests some options * refactors to use tqdm.* notation * refactors tqdm function calls to account for deprecation warning * refactors _tqdm_helpers to account for tqdm deprecation warnings * 🦉 Updates from OwlBot post-processor See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md * removes an extraneous reference to ipywidgets * removes unneeded import * removes import and fixes linting error Co-authored-by: Owl Bot --- .../google/cloud/bigquery/_tqdm_helpers.py | 3 ++- .../tests/unit/test_table.py | 15 ++++++++++----- 2 files changed, 12 insertions(+), 6 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/_tqdm_helpers.py b/packages/google-cloud-bigquery/google/cloud/bigquery/_tqdm_helpers.py index f2355ab3bb64..ae289d8a6b5c 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/_tqdm_helpers.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/_tqdm_helpers.py @@ -22,6 +22,7 @@ try: import tqdm # type: ignore + except ImportError: # pragma: NO COVER tqdm = None @@ -48,7 +49,7 @@ def get_progress_bar(progress_bar_type, description, total, unit): if progress_bar_type == "tqdm": return tqdm.tqdm(desc=description, total=total, unit=unit) elif progress_bar_type == "tqdm_notebook": - return tqdm.tqdm_notebook(desc=description, total=total, unit=unit) + return tqdm.notebook.tqdm(desc=description, total=total, unit=unit) elif progress_bar_type == "tqdm_gui": return tqdm.tqdm_gui(desc=description, total=total, unit=unit) except (KeyError, TypeError): diff --git a/packages/google-cloud-bigquery/tests/unit/test_table.py b/packages/google-cloud-bigquery/tests/unit/test_table.py index b5f2e58c6406..fca43f1eed88 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_table.py +++ b/packages/google-cloud-bigquery/tests/unit/test_table.py @@ -45,7 +45,9 @@ geopandas = None try: - from tqdm import tqdm + import tqdm + from tqdm.std import TqdmDeprecationWarning + except (ImportError, AttributeError): # pragma: NO COVER tqdm = None @@ -2798,7 +2800,7 @@ def test_to_arrow_w_bqstorage_no_streams(self): @unittest.skipIf(tqdm is None, "Requires `tqdm`") @mock.patch("tqdm.tqdm_gui") - @mock.patch("tqdm.tqdm_notebook") + @mock.patch("tqdm.notebook.tqdm") @mock.patch("tqdm.tqdm") def test_to_arrow_progress_bar(self, tqdm_mock, tqdm_notebook_mock, tqdm_gui_mock): from google.cloud.bigquery.schema import SchemaField @@ -3146,7 +3148,7 @@ def test_to_dataframe_datetime_out_of_pyarrow_bounds(self): @unittest.skipIf(pandas is None, "Requires `pandas`") @unittest.skipIf(tqdm is None, "Requires `tqdm`") @mock.patch("tqdm.tqdm_gui") - @mock.patch("tqdm.tqdm_notebook") + @mock.patch("tqdm.notebook.tqdm") @mock.patch("tqdm.tqdm") def test_to_dataframe_progress_bar( self, tqdm_mock, tqdm_notebook_mock, tqdm_gui_mock @@ -3249,7 +3251,7 @@ def test_to_dataframe_no_tqdm(self): @unittest.skipIf(pandas is None, "Requires `pandas`") @unittest.skipIf(tqdm is None, "Requires `tqdm`") @mock.patch("tqdm.tqdm_gui", new=None) # will raise TypeError on call - @mock.patch("tqdm.tqdm_notebook", new=None) # will raise TypeError on call + @mock.patch("tqdm.notebook.tqdm", new=None) # will raise TypeError on call @mock.patch("tqdm.tqdm", new=None) # will raise TypeError on call def test_to_dataframe_tqdm_error(self): from google.cloud.bigquery.schema import SchemaField @@ -3281,7 +3283,10 @@ def test_to_dataframe_tqdm_error(self): # Warn that a progress bar was requested, but creating the tqdm # progress bar failed. for warning in warned: - self.assertIs(warning.category, UserWarning) + self.assertIn( + warning.category, + [UserWarning, DeprecationWarning, TqdmDeprecationWarning], + ) @unittest.skipIf(pandas is None, "Requires `pandas`") def test_to_dataframe_w_empty_results(self): From 055a0a8280bd108217a4cba39f631a9b1f712b08 Mon Sep 17 00:00:00 2001 From: WhiteSource Renovate Date: Tue, 20 Sep 2022 13:22:01 +0200 Subject: [PATCH 1503/2016] chore(deps): update all dependencies (#1358) * chore(deps): update all dependencies * pin matplotlib version for python 3.7 * pin matplotlib version for python 3.7 Co-authored-by: Anthonios Partheniou --- .../google-cloud-bigquery/samples/geography/requirements.txt | 2 +- .../google-cloud-bigquery/samples/magics/requirements.txt | 5 +++-- .../google-cloud-bigquery/samples/snippets/requirements.txt | 5 +++-- 3 files changed, 7 insertions(+), 5 deletions(-) diff --git a/packages/google-cloud-bigquery/samples/geography/requirements.txt b/packages/google-cloud-bigquery/samples/geography/requirements.txt index ca8f79c3c704..5f0ee10ee744 100644 --- a/packages/google-cloud-bigquery/samples/geography/requirements.txt +++ b/packages/google-cloud-bigquery/samples/geography/requirements.txt @@ -19,7 +19,7 @@ google-cloud-core==2.3.2 google-crc32c==1.5.0 google-resumable-media==2.3.3 googleapis-common-protos==1.56.4 -grpcio==1.48.1 +grpcio==1.49.0 idna==3.4 libcst==0.4.7 munch==2.5.0 diff --git a/packages/google-cloud-bigquery/samples/magics/requirements.txt b/packages/google-cloud-bigquery/samples/magics/requirements.txt index 8a86e149511b..5ce01be2de7b 100644 --- a/packages/google-cloud-bigquery/samples/magics/requirements.txt +++ b/packages/google-cloud-bigquery/samples/magics/requirements.txt @@ -1,11 +1,12 @@ db-dtypes==1.0.3 google-cloud-bigquery-storage==2.15.0 google-auth-oauthlib==0.5.3 -grpcio==1.48.1 +grpcio==1.49.0 ipython===7.31.1; python_version == '3.7' ipython===8.0.1; python_version == '3.8' ipython==8.5.0; python_version >= '3.9' -matplotlib==3.5.3 +matplotlib==3.5.3; python_version == '3.7' +matplotlib==3.6.0; python_version >= '3.8' pandas===1.3.5; python_version == '3.7' pandas==1.4.4; python_version >= '3.8' pyarrow==9.0.0 diff --git a/packages/google-cloud-bigquery/samples/snippets/requirements.txt b/packages/google-cloud-bigquery/samples/snippets/requirements.txt index 3009f1899265..01bb943484d0 100644 --- a/packages/google-cloud-bigquery/samples/snippets/requirements.txt +++ b/packages/google-cloud-bigquery/samples/snippets/requirements.txt @@ -2,11 +2,12 @@ db-dtypes==1.0.3 google-cloud-bigquery==3.3.2 google-cloud-bigquery-storage==2.15.0 google-auth-oauthlib==0.5.3 -grpcio==1.48.1 +grpcio==1.49.0 ipython===7.31.1; python_version == '3.7' ipython===8.0.1; python_version == '3.8' ipython==8.5.0; python_version >= '3.9' -matplotlib==3.5.3 +matplotlib==3.5.3; python_version == '3.7' +matplotlib==3.6.0; python_version >= '3.8' pandas===1.3.5; python_version == '3.7' pandas==1.4.4; python_version >= '3.8' pyarrow==9.0.0 From d933a2635dbc594a03be4682739b86c7feac3613 Mon Sep 17 00:00:00 2001 From: WhiteSource Renovate Date: Tue, 20 Sep 2022 17:25:43 +0200 Subject: [PATCH 1504/2016] chore(deps): update all dependencies (#1360) * chore(deps): update all dependencies * revert * revert * pin matplotlib for py37 Co-authored-by: Anthonios Partheniou --- .../samples/geography/requirements.txt | 6 +++--- .../google-cloud-bigquery/samples/magics/requirements.txt | 6 +++--- .../google-cloud-bigquery/samples/snippets/requirements.txt | 6 +++--- 3 files changed, 9 insertions(+), 9 deletions(-) diff --git a/packages/google-cloud-bigquery/samples/geography/requirements.txt b/packages/google-cloud-bigquery/samples/geography/requirements.txt index 5f0ee10ee744..fafa43e9ac85 100644 --- a/packages/google-cloud-bigquery/samples/geography/requirements.txt +++ b/packages/google-cloud-bigquery/samples/geography/requirements.txt @@ -12,9 +12,9 @@ geojson==2.5.0 geopandas===0.10.2; python_version == '3.7' geopandas==0.11.1; python_version >= '3.8' google-api-core==2.10.1 -google-auth==2.11.0 +google-auth==2.11.1 google-cloud-bigquery==3.3.2 -google-cloud-bigquery-storage==2.15.0 +google-cloud-bigquery-storage==2.16.0 google-cloud-core==2.3.2 google-crc32c==1.5.0 google-resumable-media==2.3.3 @@ -26,7 +26,7 @@ munch==2.5.0 mypy-extensions==0.4.3 packaging==21.3 pandas===1.3.5; python_version == '3.7' -pandas==1.4.4; python_version >= '3.8' +pandas==1.5.0; python_version >= '3.8' proto-plus==1.22.1 pyarrow==9.0.0 pyasn1==0.4.8 diff --git a/packages/google-cloud-bigquery/samples/magics/requirements.txt b/packages/google-cloud-bigquery/samples/magics/requirements.txt index 5ce01be2de7b..05bd5ef89e63 100644 --- a/packages/google-cloud-bigquery/samples/magics/requirements.txt +++ b/packages/google-cloud-bigquery/samples/magics/requirements.txt @@ -1,14 +1,14 @@ db-dtypes==1.0.3 -google-cloud-bigquery-storage==2.15.0 +google-cloud-bigquery-storage==2.16.0 google-auth-oauthlib==0.5.3 grpcio==1.49.0 ipython===7.31.1; python_version == '3.7' ipython===8.0.1; python_version == '3.8' ipython==8.5.0; python_version >= '3.9' -matplotlib==3.5.3; python_version == '3.7' +matplotlib===3.5.3; python_version == '3.7' matplotlib==3.6.0; python_version >= '3.8' pandas===1.3.5; python_version == '3.7' -pandas==1.4.4; python_version >= '3.8' +pandas==1.5.0; python_version >= '3.8' pyarrow==9.0.0 pytz==2022.2.1 typing-extensions==4.3.0 diff --git a/packages/google-cloud-bigquery/samples/snippets/requirements.txt b/packages/google-cloud-bigquery/samples/snippets/requirements.txt index 01bb943484d0..241fba27af9e 100644 --- a/packages/google-cloud-bigquery/samples/snippets/requirements.txt +++ b/packages/google-cloud-bigquery/samples/snippets/requirements.txt @@ -1,15 +1,15 @@ db-dtypes==1.0.3 google-cloud-bigquery==3.3.2 -google-cloud-bigquery-storage==2.15.0 +google-cloud-bigquery-storage==2.16.0 google-auth-oauthlib==0.5.3 grpcio==1.49.0 ipython===7.31.1; python_version == '3.7' ipython===8.0.1; python_version == '3.8' ipython==8.5.0; python_version >= '3.9' -matplotlib==3.5.3; python_version == '3.7' +matplotlib===3.5.3; python_version == '3.7' matplotlib==3.6.0; python_version >= '3.8' pandas===1.3.5; python_version == '3.7' -pandas==1.4.4; python_version >= '3.8' +pandas==1.5.0; python_version >= '3.8' pyarrow==9.0.0 pytz==2022.2.1 typing-extensions==4.3.0 From 88774526a5bbb3173ebbc8a785de1f7020f94b12 Mon Sep 17 00:00:00 2001 From: WhiteSource Renovate Date: Wed, 21 Sep 2022 16:50:49 +0200 Subject: [PATCH 1505/2016] chore(deps): update dependency db-dtypes to v1.0.4 (#1361) --- .../google-cloud-bigquery/samples/geography/requirements.txt | 2 +- packages/google-cloud-bigquery/samples/magics/requirements.txt | 2 +- .../google-cloud-bigquery/samples/snippets/requirements.txt | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/packages/google-cloud-bigquery/samples/geography/requirements.txt b/packages/google-cloud-bigquery/samples/geography/requirements.txt index fafa43e9ac85..6b14f90ab2c7 100644 --- a/packages/google-cloud-bigquery/samples/geography/requirements.txt +++ b/packages/google-cloud-bigquery/samples/geography/requirements.txt @@ -6,7 +6,7 @@ click==8.1.3 click-plugins==1.1.1 cligj==0.7.2 dataclasses==0.8; python_version < '3.7' -db-dtypes==1.0.3 +db-dtypes==1.0.4 Fiona==1.8.21 geojson==2.5.0 geopandas===0.10.2; python_version == '3.7' diff --git a/packages/google-cloud-bigquery/samples/magics/requirements.txt b/packages/google-cloud-bigquery/samples/magics/requirements.txt index 05bd5ef89e63..dd61784f1086 100644 --- a/packages/google-cloud-bigquery/samples/magics/requirements.txt +++ b/packages/google-cloud-bigquery/samples/magics/requirements.txt @@ -1,4 +1,4 @@ -db-dtypes==1.0.3 +db-dtypes==1.0.4 google-cloud-bigquery-storage==2.16.0 google-auth-oauthlib==0.5.3 grpcio==1.49.0 diff --git a/packages/google-cloud-bigquery/samples/snippets/requirements.txt b/packages/google-cloud-bigquery/samples/snippets/requirements.txt index 241fba27af9e..1e91a5ec7df6 100644 --- a/packages/google-cloud-bigquery/samples/snippets/requirements.txt +++ b/packages/google-cloud-bigquery/samples/snippets/requirements.txt @@ -1,4 +1,4 @@ -db-dtypes==1.0.3 +db-dtypes==1.0.4 google-cloud-bigquery==3.3.2 google-cloud-bigquery-storage==2.16.0 google-auth-oauthlib==0.5.3 From 89324f026f233cb039e1aed96acdf59c32b4280b Mon Sep 17 00:00:00 2001 From: Chalmer Lowe Date: Wed, 28 Sep 2022 09:34:08 -0400 Subject: [PATCH 1506/2016] chore: updates blacken process to ensure all samples are processed (#1367) --- packages/google-cloud-bigquery/noxfile.py | 10 +++++++++- packages/google-cloud-bigquery/owlbot.py | 7 ++++++- 2 files changed, 15 insertions(+), 2 deletions(-) diff --git a/packages/google-cloud-bigquery/noxfile.py b/packages/google-cloud-bigquery/noxfile.py index d9883d0690b5..0b0800d3591a 100644 --- a/packages/google-cloud-bigquery/noxfile.py +++ b/packages/google-cloud-bigquery/noxfile.py @@ -25,7 +25,15 @@ MYPY_VERSION = "mypy==0.910" PYTYPE_VERSION = "pytype==2021.4.9" BLACK_VERSION = "black==22.3.0" -BLACK_PATHS = ("docs", "google", "samples", "tests", "noxfile.py", "setup.py") +BLACK_PATHS = ( + "docs", + "google", + "samples", + "samples/tests", + "tests", + "noxfile.py", + "setup.py", +) DEFAULT_PYTHON_VERSION = "3.8" SYSTEM_TEST_PYTHON_VERSIONS = ["3.8", "3.10"] diff --git a/packages/google-cloud-bigquery/owlbot.py b/packages/google-cloud-bigquery/owlbot.py index 4d287ac46bc5..b887449ff798 100644 --- a/packages/google-cloud-bigquery/owlbot.py +++ b/packages/google-cloud-bigquery/owlbot.py @@ -13,12 +13,15 @@ # limitations under the License. """This script is used to synthesize generated parts of this library.""" +from pathlib import Path import textwrap import synthtool as s from synthtool import gcp from synthtool.languages import python +REPO_ROOT = Path(__file__).parent.absolute() + default_version = "v2" for library in s.get_staging_dirs(default_version): @@ -120,4 +123,6 @@ ), ) -s.shell.run(["nox", "-s", "blacken"], hide_output=False) +# s.shell.run(["nox", "-s", "blacken"], hide_output=False) +for noxfile in REPO_ROOT.glob("samples/**/noxfile.py"): + s.shell.run(["nox", "-s", "blacken"], cwd=noxfile.parent, hide_output=False) \ No newline at end of file From b1f3b71f8ef9a6779aa58b8a8eab975019b734f6 Mon Sep 17 00:00:00 2001 From: "release-please[bot]" <55107282+release-please[bot]@users.noreply.github.com> Date: Thu, 29 Sep 2022 13:47:52 -0400 Subject: [PATCH 1507/2016] chore(main): release 3.3.3 (#1329) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * chore(main): release 3.3.3 * 🦉 Updates from OwlBot post-processor See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md * Update docs/conf.py * 🦉 Updates from OwlBot post-processor See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md * chore: also run blacken session in the root of the repo * 🦉 Updates from OwlBot post-processor See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md Co-authored-by: release-please[bot] <55107282+release-please[bot]@users.noreply.github.com> Co-authored-by: Owl Bot Co-authored-by: Chalmer Lowe Co-authored-by: Anthonios Partheniou --- packages/google-cloud-bigquery/CHANGELOG.md | 13 +++++++++++++ .../google/cloud/bigquery/version.py | 2 +- packages/google-cloud-bigquery/owlbot.py | 4 ++-- 3 files changed, 16 insertions(+), 3 deletions(-) diff --git a/packages/google-cloud-bigquery/CHANGELOG.md b/packages/google-cloud-bigquery/CHANGELOG.md index 6ba373179afa..1340fd396721 100644 --- a/packages/google-cloud-bigquery/CHANGELOG.md +++ b/packages/google-cloud-bigquery/CHANGELOG.md @@ -5,6 +5,19 @@ [1]: https://pypi.org/project/google-cloud-bigquery/#history +## [3.3.3](https://github.com/googleapis/python-bigquery/compare/v3.3.2...v3.3.3) (2022-09-28) + + +### Bug Fixes + +* Refactors code to account for a tdqm code deprecation ([#1357](https://github.com/googleapis/python-bigquery/issues/1357)) ([1369a9d](https://github.com/googleapis/python-bigquery/commit/1369a9d937b85d6a2a6bf9a672c71620648b1e3e)) +* Validate opentelemetry span job attributes have values ([#1327](https://github.com/googleapis/python-bigquery/issues/1327)) ([8287af1](https://github.com/googleapis/python-bigquery/commit/8287af1299169546f847126f03ae04e48890139e)) + + +### Documentation + +* **samples:** uses function (create_job) more appropriate to the described sample intent ([5aeedaa](https://github.com/googleapis/python-bigquery/commit/5aeedaa2f4e6a0200d50521dfd90f39f9a24d0cc)) + ## [3.3.2](https://github.com/googleapis/python-bigquery/compare/v3.3.1...v3.3.2) (2022-08-16) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/version.py b/packages/google-cloud-bigquery/google/cloud/bigquery/version.py index eb307e8d6286..c99682bc8032 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/version.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/version.py @@ -12,4 +12,4 @@ # See the License for the specific language governing permissions and # limitations under the License. -__version__ = "3.3.2" +__version__ = "3.3.3" diff --git a/packages/google-cloud-bigquery/owlbot.py b/packages/google-cloud-bigquery/owlbot.py index b887449ff798..c2de31042267 100644 --- a/packages/google-cloud-bigquery/owlbot.py +++ b/packages/google-cloud-bigquery/owlbot.py @@ -123,6 +123,6 @@ ), ) -# s.shell.run(["nox", "-s", "blacken"], hide_output=False) +s.shell.run(["nox", "-s", "blacken"], hide_output=False) for noxfile in REPO_ROOT.glob("samples/**/noxfile.py"): - s.shell.run(["nox", "-s", "blacken"], cwd=noxfile.parent, hide_output=False) \ No newline at end of file + s.shell.run(["nox", "-s", "blacken"], cwd=noxfile.parent, hide_output=False) From bdf362fd36064412b6c2dc491c2dcff30cf3bb69 Mon Sep 17 00:00:00 2001 From: "gcf-owl-bot[bot]" <78513119+gcf-owl-bot[bot]@users.noreply.github.com> Date: Thu, 29 Sep 2022 16:32:38 -0400 Subject: [PATCH 1508/2016] fix(deps): require protobuf >= 3.20.2 (#1369) * chore: exclude requirements.txt file from renovate-bot Source-Link: https://github.com/googleapis/synthtool/commit/f58d3135a2fab20e225d98741dbc06d57459b816 Post-Processor: gcr.io/cloud-devrel-public-resources/owlbot-python:latest@sha256:7a40313731a7cb1454eef6b33d3446ebb121836738dc3ab3d2d3ded5268c35b6 * update constraints files * fix(deps): require protobuf 3.20.2 Co-authored-by: Owl Bot Co-authored-by: Anthonios Partheniou Co-authored-by: Chalmer Lowe --- .../.github/.OwlBot.lock.yaml | 2 +- .../.kokoro/requirements.txt | 49 +++++++++---------- packages/google-cloud-bigquery/setup.py | 2 +- .../testing/constraints-3.7.txt | 2 +- 4 files changed, 27 insertions(+), 28 deletions(-) diff --git a/packages/google-cloud-bigquery/.github/.OwlBot.lock.yaml b/packages/google-cloud-bigquery/.github/.OwlBot.lock.yaml index aa547962eb0a..3815c983cb16 100644 --- a/packages/google-cloud-bigquery/.github/.OwlBot.lock.yaml +++ b/packages/google-cloud-bigquery/.github/.OwlBot.lock.yaml @@ -13,4 +13,4 @@ # limitations under the License. docker: image: gcr.io/cloud-devrel-public-resources/owlbot-python:latest - digest: sha256:e09366bdf0fd9c8976592988390b24d53583dd9f002d476934da43725adbb978 + digest: sha256:7a40313731a7cb1454eef6b33d3446ebb121836738dc3ab3d2d3ded5268c35b6 diff --git a/packages/google-cloud-bigquery/.kokoro/requirements.txt b/packages/google-cloud-bigquery/.kokoro/requirements.txt index 385f2d4d6106..d15994bac93c 100644 --- a/packages/google-cloud-bigquery/.kokoro/requirements.txt +++ b/packages/google-cloud-bigquery/.kokoro/requirements.txt @@ -325,31 +325,30 @@ platformdirs==2.5.2 \ --hash=sha256:027d8e83a2d7de06bbac4e5ef7e023c02b863d7ea5d079477e722bb41ab25788 \ --hash=sha256:58c8abb07dcb441e6ee4b11d8df0ac856038f944ab98b7be6b27b2a3c7feef19 # via virtualenv -protobuf==3.20.1 \ - --hash=sha256:06059eb6953ff01e56a25cd02cca1a9649a75a7e65397b5b9b4e929ed71d10cf \ - --hash=sha256:097c5d8a9808302fb0da7e20edf0b8d4703274d140fd25c5edabddcde43e081f \ - --hash=sha256:284f86a6207c897542d7e956eb243a36bb8f9564c1742b253462386e96c6b78f \ - --hash=sha256:32ca378605b41fd180dfe4e14d3226386d8d1b002ab31c969c366549e66a2bb7 \ - --hash=sha256:3cc797c9d15d7689ed507b165cd05913acb992d78b379f6014e013f9ecb20996 \ - --hash=sha256:62f1b5c4cd6c5402b4e2d63804ba49a327e0c386c99b1675c8a0fefda23b2067 \ - --hash=sha256:69ccfdf3657ba59569c64295b7d51325f91af586f8d5793b734260dfe2e94e2c \ - --hash=sha256:6f50601512a3d23625d8a85b1638d914a0970f17920ff39cec63aaef80a93fb7 \ - --hash=sha256:7403941f6d0992d40161aa8bb23e12575637008a5a02283a930addc0508982f9 \ - --hash=sha256:755f3aee41354ae395e104d62119cb223339a8f3276a0cd009ffabfcdd46bb0c \ - --hash=sha256:77053d28427a29987ca9caf7b72ccafee011257561259faba8dd308fda9a8739 \ - --hash=sha256:7e371f10abe57cee5021797126c93479f59fccc9693dafd6bd5633ab67808a91 \ - --hash=sha256:9016d01c91e8e625141d24ec1b20fed584703e527d28512aa8c8707f105a683c \ - --hash=sha256:9be73ad47579abc26c12024239d3540e6b765182a91dbc88e23658ab71767153 \ - --hash=sha256:adc31566d027f45efe3f44eeb5b1f329da43891634d61c75a5944e9be6dd42c9 \ - --hash=sha256:adfc6cf69c7f8c50fd24c793964eef18f0ac321315439d94945820612849c388 \ - --hash=sha256:af0ebadc74e281a517141daad9d0f2c5d93ab78e9d455113719a45a49da9db4e \ - --hash=sha256:cb29edb9eab15742d791e1025dd7b6a8f6fcb53802ad2f6e3adcb102051063ab \ - --hash=sha256:cd68be2559e2a3b84f517fb029ee611546f7812b1fdd0aa2ecc9bc6ec0e4fdde \ - --hash=sha256:cdee09140e1cd184ba9324ec1df410e7147242b94b5f8b0c64fc89e38a8ba531 \ - --hash=sha256:db977c4ca738dd9ce508557d4fce0f5aebd105e158c725beec86feb1f6bc20d8 \ - --hash=sha256:dd5789b2948ca702c17027c84c2accb552fc30f4622a98ab5c51fcfe8c50d3e7 \ - --hash=sha256:e250a42f15bf9d5b09fe1b293bdba2801cd520a9f5ea2d7fb7536d4441811d20 \ - --hash=sha256:ff8d8fa42675249bb456f5db06c00de6c2f4c27a065955917b28c4f15978b9c3 +protobuf==3.20.2 \ + --hash=sha256:03d76b7bd42ac4a6e109742a4edf81ffe26ffd87c5993126d894fe48a120396a \ + --hash=sha256:09e25909c4297d71d97612f04f41cea8fa8510096864f2835ad2f3b3df5a5559 \ + --hash=sha256:18e34a10ae10d458b027d7638a599c964b030c1739ebd035a1dfc0e22baa3bfe \ + --hash=sha256:291fb4307094bf5ccc29f424b42268640e00d5240bf0d9b86bf3079f7576474d \ + --hash=sha256:2c0b040d0b5d5d207936ca2d02f00f765906622c07d3fa19c23a16a8ca71873f \ + --hash=sha256:384164994727f274cc34b8abd41a9e7e0562801361ee77437099ff6dfedd024b \ + --hash=sha256:3cb608e5a0eb61b8e00fe641d9f0282cd0eedb603be372f91f163cbfbca0ded0 \ + --hash=sha256:5d9402bf27d11e37801d1743eada54372f986a372ec9679673bfcc5c60441151 \ + --hash=sha256:712dca319eee507a1e7df3591e639a2b112a2f4a62d40fe7832a16fd19151750 \ + --hash=sha256:7a5037af4e76c975b88c3becdf53922b5ffa3f2cddf657574a4920a3b33b80f3 \ + --hash=sha256:8228e56a865c27163d5d1d1771d94b98194aa6917bcfb6ce139cbfa8e3c27334 \ + --hash=sha256:84a1544252a933ef07bb0b5ef13afe7c36232a774affa673fc3636f7cee1db6c \ + --hash=sha256:84fe5953b18a383fd4495d375fe16e1e55e0a3afe7b4f7b4d01a3a0649fcda9d \ + --hash=sha256:9c673c8bfdf52f903081816b9e0e612186684f4eb4c17eeb729133022d6032e3 \ + --hash=sha256:9f876a69ca55aed879b43c295a328970306e8e80a263ec91cf6e9189243c613b \ + --hash=sha256:a9e5ae5a8e8985c67e8944c23035a0dff2c26b0f5070b2f55b217a1c33bbe8b1 \ + --hash=sha256:b4fdb29c5a7406e3f7ef176b2a7079baa68b5b854f364c21abe327bbeec01cdb \ + --hash=sha256:c184485e0dfba4dfd451c3bd348c2e685d6523543a0f91b9fd4ae90eb09e8422 \ + --hash=sha256:c9cdf251c582c16fd6a9f5e95836c90828d51b0069ad22f463761d27c6c19019 \ + --hash=sha256:e39cf61bb8582bda88cdfebc0db163b774e7e03364bbf9ce1ead13863e81e359 \ + --hash=sha256:e8fbc522303e09036c752a0afcc5c0603e917222d8bedc02813fd73b4b4ed804 \ + --hash=sha256:f34464ab1207114e73bba0794d1257c150a2b89b7a9faf504e00af7c9fd58978 \ + --hash=sha256:f52dabc96ca99ebd2169dadbe018824ebda08a795c7684a0b7d203a290f3adb0 # via # gcp-docuploader # gcp-releasetool diff --git a/packages/google-cloud-bigquery/setup.py b/packages/google-cloud-bigquery/setup.py index d8f2bb22605a..be02dc409391 100644 --- a/packages/google-cloud-bigquery/setup.py +++ b/packages/google-cloud-bigquery/setup.py @@ -42,7 +42,7 @@ "google-cloud-core >= 1.4.1, <3.0.0dev", "google-resumable-media >= 0.6.0, < 3.0dev", "packaging >= 14.3, <22.0.0dev", - "protobuf >= 3.19.0, <5.0.0dev", # For the legacy proto-based types. + "protobuf >= 3.20.2, <5.0.0dev", # For the legacy proto-based types. "python-dateutil >= 2.7.2, <3.0dev", "pyarrow >= 3.0.0, < 10.0dev", "requests >= 2.18.0, < 3.0.0dev", diff --git a/packages/google-cloud-bigquery/testing/constraints-3.7.txt b/packages/google-cloud-bigquery/testing/constraints-3.7.txt index 67313f6b80d3..3b07dc9fad4c 100644 --- a/packages/google-cloud-bigquery/testing/constraints-3.7.txt +++ b/packages/google-cloud-bigquery/testing/constraints-3.7.txt @@ -18,7 +18,7 @@ opentelemetry-instrumentation==0.20b0 opentelemetry-sdk==1.1.0 pandas==1.1.0 proto-plus==1.22.0 -protobuf==3.19.0 +protobuf==3.20.2 pyarrow==3.0.0 python-dateutil==2.7.3 requests==2.18.0 From fc88c1ad4e3336af52c2db8a9a81377e0c52cc45 Mon Sep 17 00:00:00 2001 From: "release-please[bot]" <55107282+release-please[bot]@users.noreply.github.com> Date: Mon, 3 Oct 2022 12:20:55 -0400 Subject: [PATCH 1509/2016] chore(main): release 3.3.4 (#1371) Co-authored-by: release-please[bot] <55107282+release-please[bot]@users.noreply.github.com> --- packages/google-cloud-bigquery/CHANGELOG.md | 7 +++++++ .../google-cloud-bigquery/google/cloud/bigquery/version.py | 2 +- 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/CHANGELOG.md b/packages/google-cloud-bigquery/CHANGELOG.md index 1340fd396721..d5efd7dd6a5f 100644 --- a/packages/google-cloud-bigquery/CHANGELOG.md +++ b/packages/google-cloud-bigquery/CHANGELOG.md @@ -5,6 +5,13 @@ [1]: https://pypi.org/project/google-cloud-bigquery/#history +## [3.3.4](https://github.com/googleapis/python-bigquery/compare/v3.3.3...v3.3.4) (2022-09-29) + + +### Bug Fixes + +* **deps:** Require protobuf >= 3.20.2 ([#1369](https://github.com/googleapis/python-bigquery/issues/1369)) ([f13383a](https://github.com/googleapis/python-bigquery/commit/f13383a22d7b1a0a714dc1b1210ad970146bd094)) + ## [3.3.3](https://github.com/googleapis/python-bigquery/compare/v3.3.2...v3.3.3) (2022-09-28) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/version.py b/packages/google-cloud-bigquery/google/cloud/bigquery/version.py index c99682bc8032..3e1a9869c3c3 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/version.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/version.py @@ -12,4 +12,4 @@ # See the License for the specific language governing permissions and # limitations under the License. -__version__ = "3.3.3" +__version__ = "3.3.4" From da5ee953b750b6a437a62075b3bf4e99f547a0dd Mon Sep 17 00:00:00 2001 From: Dan Lee <71398022+dandhlee@users.noreply.github.com> Date: Mon, 3 Oct 2022 20:56:22 -0400 Subject: [PATCH 1510/2016] docs: fix typos (#1372) --- packages/google-cloud-bigquery/google/cloud/bigquery/query.py | 2 +- packages/google-cloud-bigquery/google/cloud/bigquery/schema.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/query.py b/packages/google-cloud-bigquery/google/cloud/bigquery/query.py index 0469cb2711bc..944ad884e345 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/query.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/query.py @@ -795,7 +795,7 @@ def _key(self): Used to compute this instance's hashcode and evaluate equality. Returns: - Tuple: The contents of this :class:`~google.cloud.biquery.ArrayQueryParameter`. + Tuple: The contents of this :class:`~google.cloud.bigquery.ArrayQueryParameter`. """ return (self.name, self.struct_types, self.struct_values) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/schema.py b/packages/google-cloud-bigquery/google/cloud/bigquery/schema.py index 5580a2ae9223..1df78424d2d7 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/schema.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/schema.py @@ -144,7 +144,7 @@ def from_api_repr(cls, api_repr: dict) -> "SchemaField": :meth:`to_api_repr`. Returns: - google.cloud.biquery.schema.SchemaField: The ``SchemaField`` object. + google.cloud.bigquery.schema.SchemaField: The ``SchemaField`` object. """ field_type = api_repr["type"].upper() From 403541a758d759d5cee87028ca9113ead1d5d5c6 Mon Sep 17 00:00:00 2001 From: WhiteSource Renovate Date: Tue, 4 Oct 2022 16:05:14 +0200 Subject: [PATCH 1511/2016] chore(deps): update all dependencies (#1363) Co-authored-by: Anthonios Partheniou --- .../samples/geography/requirements.txt | 14 +++++++------- .../samples/magics/requirements.txt | 6 +++--- .../samples/snippets/requirements.txt | 8 ++++---- 3 files changed, 14 insertions(+), 14 deletions(-) diff --git a/packages/google-cloud-bigquery/samples/geography/requirements.txt b/packages/google-cloud-bigquery/samples/geography/requirements.txt index 6b14f90ab2c7..72dd950ea483 100644 --- a/packages/google-cloud-bigquery/samples/geography/requirements.txt +++ b/packages/google-cloud-bigquery/samples/geography/requirements.txt @@ -1,5 +1,5 @@ attrs==22.1.0 -certifi==2022.9.14 +certifi==2022.9.24 cffi==1.15.1 charset-normalizer==2.1.1 click==8.1.3 @@ -12,14 +12,14 @@ geojson==2.5.0 geopandas===0.10.2; python_version == '3.7' geopandas==0.11.1; python_version >= '3.8' google-api-core==2.10.1 -google-auth==2.11.1 -google-cloud-bigquery==3.3.2 -google-cloud-bigquery-storage==2.16.0 +google-auth==2.12.0 +google-cloud-bigquery==3.3.3 +google-cloud-bigquery-storage==2.16.1 google-cloud-core==2.3.2 google-crc32c==1.5.0 -google-resumable-media==2.3.3 +google-resumable-media==2.4.0 googleapis-common-protos==1.56.4 -grpcio==1.49.0 +grpcio==1.49.1 idna==3.4 libcst==0.4.7 munch==2.5.0 @@ -34,7 +34,7 @@ pyasn1-modules==0.2.8 pycparser==2.21 pyparsing==3.0.9 python-dateutil==2.8.2 -pytz==2022.2.1 +pytz==2022.4 PyYAML==6.0 requests==2.28.1 rsa==4.9 diff --git a/packages/google-cloud-bigquery/samples/magics/requirements.txt b/packages/google-cloud-bigquery/samples/magics/requirements.txt index dd61784f1086..212de247cc95 100644 --- a/packages/google-cloud-bigquery/samples/magics/requirements.txt +++ b/packages/google-cloud-bigquery/samples/magics/requirements.txt @@ -1,7 +1,7 @@ db-dtypes==1.0.4 -google-cloud-bigquery-storage==2.16.0 +google-cloud-bigquery-storage==2.16.1 google-auth-oauthlib==0.5.3 -grpcio==1.49.0 +grpcio==1.49.1 ipython===7.31.1; python_version == '3.7' ipython===8.0.1; python_version == '3.8' ipython==8.5.0; python_version >= '3.9' @@ -10,5 +10,5 @@ matplotlib==3.6.0; python_version >= '3.8' pandas===1.3.5; python_version == '3.7' pandas==1.5.0; python_version >= '3.8' pyarrow==9.0.0 -pytz==2022.2.1 +pytz==2022.4 typing-extensions==4.3.0 diff --git a/packages/google-cloud-bigquery/samples/snippets/requirements.txt b/packages/google-cloud-bigquery/samples/snippets/requirements.txt index 1e91a5ec7df6..d5a4696447fc 100644 --- a/packages/google-cloud-bigquery/samples/snippets/requirements.txt +++ b/packages/google-cloud-bigquery/samples/snippets/requirements.txt @@ -1,8 +1,8 @@ db-dtypes==1.0.4 -google-cloud-bigquery==3.3.2 -google-cloud-bigquery-storage==2.16.0 +google-cloud-bigquery==3.3.3 +google-cloud-bigquery-storage==2.16.1 google-auth-oauthlib==0.5.3 -grpcio==1.49.0 +grpcio==1.49.1 ipython===7.31.1; python_version == '3.7' ipython===8.0.1; python_version == '3.8' ipython==8.5.0; python_version >= '3.9' @@ -11,5 +11,5 @@ matplotlib==3.6.0; python_version >= '3.8' pandas===1.3.5; python_version == '3.7' pandas==1.5.0; python_version >= '3.8' pyarrow==9.0.0 -pytz==2022.2.1 +pytz==2022.4 typing-extensions==4.3.0 From 9e78cb96208997ee7d4d0e8ac0ab2bdceab4e2d9 Mon Sep 17 00:00:00 2001 From: Chalmer Lowe Date: Wed, 5 Oct 2022 14:17:58 -0400 Subject: [PATCH 1512/2016] Fix: refactor to adapt to changes to shapely dependency (#1376) * fix: refactored to account for changes in dependency * Removes comment and ensures linting success * refactor to use loads() function * fix: refactors to account for changes to shapely dependency * fix: refactors to account for changes to shapely dependency * blacken the code * add mypy ignore flag for shapely import --- .../google/cloud/bigquery/_pandas_helpers.py | 12 +++--------- .../google/cloud/bigquery/table.py | 4 ++-- 2 files changed, 5 insertions(+), 11 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/_pandas_helpers.py b/packages/google-cloud-bigquery/google/cloud/bigquery/_pandas_helpers.py index cc0ee75ffa6d..0d05f53a3271 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/_pandas_helpers.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/_pandas_helpers.py @@ -57,15 +57,9 @@ if pandas is not None: # pragma: NO COVER def _to_wkb(): - # Create a closure that: - # - Adds a not-null check. This allows the returned function to - # be used directly with apply, unlike `shapely.wkb.dumps`. - # - Avoid extra work done by `shapely.wkb.dumps` that we don't need. - # - Caches the WKBWriter (and write method lookup :) ) - # - Avoids adding WKBWriter, lgeos, and notnull to the module namespace. - from shapely.geos import WKBWriter, lgeos # type: ignore - - write = WKBWriter(lgeos).write + from shapely import wkb # type: ignore + + write = wkb.dumps notnull = pandas.notnull def _to_wkb(v): diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py index 72eb1baf6430..8e9e248c44e9 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py @@ -39,11 +39,11 @@ _COORDINATE_REFERENCE_SYSTEM = "EPSG:4326" try: - import shapely.geos # type: ignore + import shapely # type: ignore except ImportError: shapely = None else: - _read_wkt = shapely.geos.WKTReader(shapely.geos.lgeos).read + _read_wkt = shapely.wkt.loads import google.api_core.exceptions from google.api_core.page_iterator import HTTPIterator From 48e52ab3d19ba70ff8c08d04f9c21a94420788df Mon Sep 17 00:00:00 2001 From: Anthonios Partheniou Date: Fri, 7 Oct 2022 16:37:03 -0400 Subject: [PATCH 1513/2016] fix(deps): allow protobuf 3.19.5 (#1379) * fix(deps): allow protobuf 3.19.5 * explicitly exclude protobuf 4.21.0 --- packages/google-cloud-bigquery/setup.py | 2 +- packages/google-cloud-bigquery/testing/constraints-3.7.txt | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/packages/google-cloud-bigquery/setup.py b/packages/google-cloud-bigquery/setup.py index be02dc409391..695ffd7d3ab4 100644 --- a/packages/google-cloud-bigquery/setup.py +++ b/packages/google-cloud-bigquery/setup.py @@ -42,7 +42,7 @@ "google-cloud-core >= 1.4.1, <3.0.0dev", "google-resumable-media >= 0.6.0, < 3.0dev", "packaging >= 14.3, <22.0.0dev", - "protobuf >= 3.20.2, <5.0.0dev", # For the legacy proto-based types. + "protobuf>=3.19.5,<5.0.0dev,!=3.20.0,!=3.20.1,!=4.21.0,!=4.21.1,!=4.21.2,!=4.21.3,!=4.21.4,!=4.21.5", # For the legacy proto-based types. "python-dateutil >= 2.7.2, <3.0dev", "pyarrow >= 3.0.0, < 10.0dev", "requests >= 2.18.0, < 3.0.0dev", diff --git a/packages/google-cloud-bigquery/testing/constraints-3.7.txt b/packages/google-cloud-bigquery/testing/constraints-3.7.txt index 3b07dc9fad4c..c9e40d823b77 100644 --- a/packages/google-cloud-bigquery/testing/constraints-3.7.txt +++ b/packages/google-cloud-bigquery/testing/constraints-3.7.txt @@ -18,7 +18,7 @@ opentelemetry-instrumentation==0.20b0 opentelemetry-sdk==1.1.0 pandas==1.1.0 proto-plus==1.22.0 -protobuf==3.20.2 +protobuf==3.19.5 pyarrow==3.0.0 python-dateutil==2.7.3 requests==2.18.0 From 1ae581f1d6bf112339737a91eb0f41812376e8e3 Mon Sep 17 00:00:00 2001 From: WhiteSource Renovate Date: Mon, 10 Oct 2022 18:22:07 +0200 Subject: [PATCH 1514/2016] chore(deps): update all dependencies (#1380) --- .../google-cloud-bigquery/samples/geography/requirements.txt | 4 ++-- .../google-cloud-bigquery/samples/magics/requirements.txt | 4 ++-- .../google-cloud-bigquery/samples/snippets/requirements.txt | 4 ++-- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/packages/google-cloud-bigquery/samples/geography/requirements.txt b/packages/google-cloud-bigquery/samples/geography/requirements.txt index 72dd950ea483..d6b2c3ed9eb6 100644 --- a/packages/google-cloud-bigquery/samples/geography/requirements.txt +++ b/packages/google-cloud-bigquery/samples/geography/requirements.txt @@ -11,7 +11,7 @@ Fiona==1.8.21 geojson==2.5.0 geopandas===0.10.2; python_version == '3.7' geopandas==0.11.1; python_version >= '3.8' -google-api-core==2.10.1 +google-api-core==2.10.2 google-auth==2.12.0 google-cloud-bigquery==3.3.3 google-cloud-bigquery-storage==2.16.1 @@ -40,6 +40,6 @@ requests==2.28.1 rsa==4.9 Shapely==1.8.4 six==1.16.0 -typing-extensions==4.3.0 +typing-extensions==4.4.0 typing-inspect==0.8.0 urllib3==1.26.12 diff --git a/packages/google-cloud-bigquery/samples/magics/requirements.txt b/packages/google-cloud-bigquery/samples/magics/requirements.txt index 212de247cc95..f141b54204ae 100644 --- a/packages/google-cloud-bigquery/samples/magics/requirements.txt +++ b/packages/google-cloud-bigquery/samples/magics/requirements.txt @@ -6,9 +6,9 @@ ipython===7.31.1; python_version == '3.7' ipython===8.0.1; python_version == '3.8' ipython==8.5.0; python_version >= '3.9' matplotlib===3.5.3; python_version == '3.7' -matplotlib==3.6.0; python_version >= '3.8' +matplotlib==3.6.1; python_version >= '3.8' pandas===1.3.5; python_version == '3.7' pandas==1.5.0; python_version >= '3.8' pyarrow==9.0.0 pytz==2022.4 -typing-extensions==4.3.0 +typing-extensions==4.4.0 diff --git a/packages/google-cloud-bigquery/samples/snippets/requirements.txt b/packages/google-cloud-bigquery/samples/snippets/requirements.txt index d5a4696447fc..0affa1c19f2e 100644 --- a/packages/google-cloud-bigquery/samples/snippets/requirements.txt +++ b/packages/google-cloud-bigquery/samples/snippets/requirements.txt @@ -7,9 +7,9 @@ ipython===7.31.1; python_version == '3.7' ipython===8.0.1; python_version == '3.8' ipython==8.5.0; python_version >= '3.9' matplotlib===3.5.3; python_version == '3.7' -matplotlib==3.6.0; python_version >= '3.8' +matplotlib==3.6.1; python_version >= '3.8' pandas===1.3.5; python_version == '3.7' pandas==1.5.0; python_version >= '3.8' pyarrow==9.0.0 pytz==2022.4 -typing-extensions==4.3.0 +typing-extensions==4.4.0 From 157d39261ba484793dd166107658a1c421111ae1 Mon Sep 17 00:00:00 2001 From: aribray <45905583+aribray@users.noreply.github.com> Date: Tue, 11 Oct 2022 13:24:40 -0500 Subject: [PATCH 1515/2016] feat: reconfigure tqdm progress bar in %%bigquery magic (#1355) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * feat: add bigquery job id to tqdm progress bar description Change-Id: I2add62e3cdd5f25f88ace2d08f212796918158b6 * write to sys.stdout instead of sys.stderr Change-Id: I6c4001608af1bd8c305c53c6089d64f99605bd8c * configure progress bar Change-Id: I5788448d580b53898e75fba68ff5d5a9d12e33d6 * tqdm.notebook Change-Id: I87e45085b7535083327a5fe2e51dba4b6411db00 * 🦉 Updates from OwlBot post-processor See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md * reinclude ipywidgets Change-Id: Ibe0fc01db05fcfaacdbe0c074b841ead3a39afc9 * reinclude ipywidgets Change-Id: I56f8f98853e83ead0e0ca743c03407a521370233 * change test assertions to tqdm_notebook Change-Id: I2d55e529142ad0024ef4a98c2f15d10a73535380 * change test assertions in test_magics Change-Id: I7961ff1c5e9c54930d077e67ef9e01d79e351c5f * remove ipywidgets Change-Id: I183e277fc7be8797c85d6802f4f8c3947871d4cc * update assertions in test Change-Id: I3b4a1b9460227ca49bf344362efbcc2c895d804d * update method args in query.py and table.py Change-Id: I9a2bf2b54579668ff36ed992e599f4c7fabe918c * string formatting * fix typo * fix incorrect import structure for tqdm notebook * change default decorator back to tqdm * modify system test * add ipywidgets package for tqdm.notebook feature, set tqdm.notebook as default decorator for bq magic * change test assertion in test_query_pandas * revert test changes * reformat import statement * reformat import statement * remove timeouterror side effect * add tqdm mock patch * Revert "reformat import statement" This reverts commit 4114221527507fb270ef0533aa5b8f985f3b7779. * Revert "add tqdm mock patch" This reverts commit ef809a082ae3e4684298764096ac634b0c0281bc. * add timeout side effect * fix assertion * fix import * change mock patch to tqdm * move assertion * move assertions * add timeout side effect * adjust import statement, mock.patch tqdm * create fixture * revert import change * add import from helper * fix linting * remove unused imort * set ipywidgets version to 7.7.1 * set ipywidgets version to 7.7.1 * set ipywidgets version to 7.7.1 * bump sphinx version * bump sphinx version Co-authored-by: Owl Bot --- .../google/cloud/bigquery/_tqdm_helpers.py | 29 +++++--- .../google/cloud/bigquery/job/query.py | 4 +- .../google/cloud/bigquery/magics/magics.py | 21 +++--- .../google/cloud/bigquery/table.py | 12 ++-- packages/google-cloud-bigquery/noxfile.py | 10 +-- .../samples/magics/requirements.txt | 1 + .../samples/snippets/requirements.txt | 1 + packages/google-cloud-bigquery/setup.py | 1 + .../testing/constraints-3.7.txt | 1 + .../tests/system/test_magics.py | 3 +- .../tests/unit/job/test_query_pandas.py | 71 +++++++++---------- .../tests/unit/test_magics.py | 5 +- 12 files changed, 87 insertions(+), 72 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/_tqdm_helpers.py b/packages/google-cloud-bigquery/google/cloud/bigquery/_tqdm_helpers.py index ae289d8a6b5c..456ca2530051 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/_tqdm_helpers.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/_tqdm_helpers.py @@ -15,6 +15,7 @@ """Shared helper functions for tqdm progress bar.""" import concurrent.futures +import sys import time import typing from typing import Optional @@ -22,6 +23,7 @@ try: import tqdm # type: ignore + import tqdm.notebook as notebook # type: ignore except ImportError: # pragma: NO COVER tqdm = None @@ -47,9 +49,22 @@ def get_progress_bar(progress_bar_type, description, total, unit): try: if progress_bar_type == "tqdm": - return tqdm.tqdm(desc=description, total=total, unit=unit) + return tqdm.tqdm( + bar_format="{l_bar}{bar}|", + colour="green", + desc=description, + file=sys.stdout, + total=total, + unit=unit, + ) elif progress_bar_type == "tqdm_notebook": - return tqdm.notebook.tqdm(desc=description, total=total, unit=unit) + return notebook.tqdm( + bar_format="{l_bar}{bar}|", + desc=description, + file=sys.stdout, + total=total, + unit=unit, + ) elif progress_bar_type == "tqdm_gui": return tqdm.tqdm_gui(desc=description, total=total, unit=unit) except (KeyError, TypeError): @@ -80,7 +95,7 @@ def wait_for_query( """ default_total = 1 current_stage = None - start_time = time.time() + start_time = time.perf_counter() progress_bar = get_progress_bar( progress_bar_type, "Query is running", default_total, "query" @@ -95,11 +110,7 @@ def wait_for_query( current_stage = query_job.query_plan[i] progress_bar.total = len(query_job.query_plan) progress_bar.set_description( - "Query executing stage {} and status {} : {:0.2f}s".format( - current_stage.name, - current_stage.status, - time.time() - start_time, - ), + f"Query executing stage {current_stage.name} and status {current_stage.status} : {time.perf_counter() - start_time:.2f}s" ) try: query_result = query_job.result( @@ -107,7 +118,7 @@ def wait_for_query( ) progress_bar.update(default_total) progress_bar.set_description( - "Query complete after {:0.2f}s".format(time.time() - start_time), + f"Job ID {query_job.job_id} successfully executed", ) break except concurrent.futures.TimeoutError: diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/job/query.py b/packages/google-cloud-bigquery/google/cloud/bigquery/job/query.py index c2d304e30c7b..b0286deae4c7 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/job/query.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/job/query.py @@ -1556,9 +1556,9 @@ def to_arrow( No progress bar. ``'tqdm'`` Use the :func:`tqdm.tqdm` function to print a progress bar - to :data:`sys.stderr`. + to :data:`sys.stdout`. ``'tqdm_notebook'`` - Use the :func:`tqdm.tqdm_notebook` function to display a + Use the :func:`tqdm.notebook.tqdm` function to display a progress bar as a Jupyter notebook widget. ``'tqdm_gui'`` Use the :func:`tqdm.tqdm_gui` function to display a diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/magics/magics.py b/packages/google-cloud-bigquery/google/cloud/bigquery/magics/magics.py index 14819aa59f20..613cc1b58fb3 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/magics/magics.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/magics/magics.py @@ -125,7 +125,7 @@ def __init__(self): self._default_query_job_config = bigquery.QueryJobConfig() self._bigquery_client_options = client_options.ClientOptions() self._bqstorage_client_options = client_options.ClientOptions() - self._progress_bar_type = "tqdm" + self._progress_bar_type = "tqdm_notebook" @property def credentials(self): @@ -269,7 +269,7 @@ def progress_bar_type(self): Manually setting the progress_bar_type: >>> from google.cloud.bigquery import magics - >>> magics.context.progress_bar_type = "tqdm" + >>> magics.context.progress_bar_type = "tqdm_notebook" """ return self._progress_bar_type @@ -286,7 +286,7 @@ def _handle_error(error, destination_var=None): Args: error (Exception): - An exception that ocurred during the query exectution. + An exception that ocurred during the query execution. destination_var (Optional[str]): The name of the IPython session variable to store the query job. """ @@ -329,22 +329,25 @@ def _run_query(client, query, job_config=None): Query complete after 2.07s 'bf633912-af2c-4780-b568-5d868058632b' """ - start_time = time.time() + start_time = time.perf_counter() query_job = client.query(query, job_config=job_config) if job_config and job_config.dry_run: return query_job - print("Executing query with job ID: {}".format(query_job.job_id)) + print(f"Executing query with job ID: {query_job.job_id}") while True: - print("\rQuery executing: {:0.2f}s".format(time.time() - start_time), end="") + print( + f"\rQuery executing: {time.perf_counter() - start_time:.2f}s".format(), + end="", + ) try: query_job.result(timeout=0.5) break except futures.TimeoutError: continue - print("\nQuery complete after {:0.2f}s".format(time.time() - start_time)) + print(f"\nJob ID {query_job.job_id} successfully executed") return query_job @@ -365,7 +368,7 @@ def _create_dataset_if_necessary(client, dataset_id): pass dataset = bigquery.Dataset(dataset_reference) dataset.location = client.location - print("Creating dataset: {}".format(dataset_id)) + print(f"Creating dataset: {dataset_id}") dataset = client.create_dataset(dataset) @@ -500,7 +503,7 @@ def _create_dataset_if_necessary(client, dataset_id): default=None, help=( "Sets progress bar type to display a progress bar while executing the query." - "Defaults to use tqdm. Install the ``tqdm`` package to use this feature." + "Defaults to use tqdm_notebook. Install the ``tqdm`` package to use this feature." ), ) def _cell_magic(line, query): diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py index 8e9e248c44e9..2065c5fd2c0b 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py @@ -1728,9 +1728,9 @@ def to_arrow( No progress bar. ``'tqdm'`` Use the :func:`tqdm.tqdm` function to print a progress bar - to :data:`sys.stderr`. + to :data:`sys.stdout`. ``'tqdm_notebook'`` - Use the :func:`tqdm.tqdm_notebook` function to display a + Use the :func:`tqdm.notebook.tqdm` function to display a progress bar as a Jupyter notebook widget. ``'tqdm_gui'`` Use the :func:`tqdm.tqdm_gui` function to display a @@ -1921,9 +1921,9 @@ def to_dataframe( No progress bar. ``'tqdm'`` Use the :func:`tqdm.tqdm` function to print a progress bar - to :data:`sys.stderr`. + to :data:`sys.stdout`. ``'tqdm_notebook'`` - Use the :func:`tqdm.tqdm_notebook` function to display a + Use the :func:`tqdm.notebook.tqdm` function to display a progress bar as a Jupyter notebook widget. ``'tqdm_gui'`` Use the :func:`tqdm.tqdm_gui` function to display a @@ -2075,9 +2075,9 @@ def to_geodataframe( No progress bar. ``'tqdm'`` Use the :func:`tqdm.tqdm` function to print a progress bar - to :data:`sys.stderr`. + to :data:`sys.stdout`. ``'tqdm_notebook'`` - Use the :func:`tqdm.tqdm_notebook` function to display a + Use the :func:`tqdm.notebook.tqdm` function to display a progress bar as a Jupyter notebook widget. ``'tqdm_gui'`` Use the :func:`tqdm.tqdm_gui` function to display a diff --git a/packages/google-cloud-bigquery/noxfile.py b/packages/google-cloud-bigquery/noxfile.py index 0b0800d3591a..a91e60a5f98b 100644 --- a/packages/google-cloud-bigquery/noxfile.py +++ b/packages/google-cloud-bigquery/noxfile.py @@ -81,7 +81,7 @@ def default(session, install_extras=True): ) if install_extras and session.python == "3.10": - install_target = ".[bqstorage,pandas,tqdm,opentelemetry]" + install_target = ".[bqstorage,ipywidgets,pandas,tqdm,opentelemetry]" elif install_extras: install_target = ".[all]" else: @@ -186,7 +186,7 @@ def system(session): session.install("google-cloud-datacatalog", "-c", constraints_path) if session.python == "3.10": - extras = "[bqstorage,pandas,tqdm,opentelemetry]" + extras = "[bqstorage,ipywidgets,pandas,tqdm,opentelemetry]" else: extras = "[all]" session.install("-e", f".{extras}", "-c", constraints_path) @@ -235,7 +235,7 @@ def snippets(session): session.install("grpcio", "-c", constraints_path) if session.python == "3.10": - extras = "[bqstorage,pandas,tqdm,opentelemetry]" + extras = "[bqstorage,ipywidgets,pandas,tqdm,opentelemetry]" else: extras = "[all]" session.install("-e", f".{extras}", "-c", constraints_path) @@ -387,7 +387,7 @@ def blacken(session): def docs(session): """Build the docs.""" - session.install("recommonmark", "sphinx==4.0.1", "sphinx_rtd_theme") + session.install("recommonmark", "sphinx==4.0.2", "sphinx_rtd_theme") session.install("google-cloud-storage") session.install("-e", ".[all]") @@ -412,7 +412,7 @@ def docfx(session): session.install("-e", ".") session.install( - "sphinx==4.0.1", "alabaster", "recommonmark", "gcp-sphinx-docfx-yaml" + "sphinx==4.0.2", "alabaster", "recommonmark", "gcp-sphinx-docfx-yaml" ) shutil.rmtree(os.path.join("docs", "_build"), ignore_errors=True) diff --git a/packages/google-cloud-bigquery/samples/magics/requirements.txt b/packages/google-cloud-bigquery/samples/magics/requirements.txt index f141b54204ae..da7131711760 100644 --- a/packages/google-cloud-bigquery/samples/magics/requirements.txt +++ b/packages/google-cloud-bigquery/samples/magics/requirements.txt @@ -2,6 +2,7 @@ db-dtypes==1.0.4 google-cloud-bigquery-storage==2.16.1 google-auth-oauthlib==0.5.3 grpcio==1.49.1 +ipywidgets==7.7.1 ipython===7.31.1; python_version == '3.7' ipython===8.0.1; python_version == '3.8' ipython==8.5.0; python_version >= '3.9' diff --git a/packages/google-cloud-bigquery/samples/snippets/requirements.txt b/packages/google-cloud-bigquery/samples/snippets/requirements.txt index 0affa1c19f2e..4640dc42f4c6 100644 --- a/packages/google-cloud-bigquery/samples/snippets/requirements.txt +++ b/packages/google-cloud-bigquery/samples/snippets/requirements.txt @@ -3,6 +3,7 @@ google-cloud-bigquery==3.3.3 google-cloud-bigquery-storage==2.16.1 google-auth-oauthlib==0.5.3 grpcio==1.49.1 +ipywidgets==7.7.1 ipython===7.31.1; python_version == '3.7' ipython===8.0.1; python_version == '3.8' ipython==8.5.0; python_version >= '3.9' diff --git a/packages/google-cloud-bigquery/setup.py b/packages/google-cloud-bigquery/setup.py index 695ffd7d3ab4..119ccb0af51b 100644 --- a/packages/google-cloud-bigquery/setup.py +++ b/packages/google-cloud-bigquery/setup.py @@ -52,6 +52,7 @@ # See: https://github.com/googleapis/python-bigquery/issues/757 "bqstorage": [], "pandas": ["pandas>=1.0.0", "db-dtypes>=0.3.0,<2.0.0dev"], + "ipywidgets": ["ipywidgets==7.7.1"], "geopandas": ["geopandas>=0.9.0, <1.0dev", "Shapely>=1.6.0, <2.0dev"], "ipython": ["ipython>=7.0.1,!=8.1.0"], "tqdm": ["tqdm >= 4.7.4, <5.0.0dev"], diff --git a/packages/google-cloud-bigquery/testing/constraints-3.7.txt b/packages/google-cloud-bigquery/testing/constraints-3.7.txt index c9e40d823b77..ecce2c7cd13c 100644 --- a/packages/google-cloud-bigquery/testing/constraints-3.7.txt +++ b/packages/google-cloud-bigquery/testing/constraints-3.7.txt @@ -12,6 +12,7 @@ google-cloud-bigquery-storage==2.0.0 google-cloud-core==1.4.1 google-resumable-media==0.6.0 grpcio==1.47.0 +ipywidgets==7.7.1 ipython==7.0.1 opentelemetry-api==1.1.0 opentelemetry-instrumentation==0.20b0 diff --git a/packages/google-cloud-bigquery/tests/system/test_magics.py b/packages/google-cloud-bigquery/tests/system/test_magics.py index 78c15cb503d2..3d761cd351ef 100644 --- a/packages/google-cloud-bigquery/tests/system/test_magics.py +++ b/packages/google-cloud-bigquery/tests/system/test_magics.py @@ -71,8 +71,7 @@ def test_bigquery_magic(ipython_interactive): # Removes blanks & terminal code (result of display clearing) updates = list(filter(lambda x: bool(x) and x != "\x1b[2K", lines)) assert re.match("Executing query with job ID: .*", updates[0]) - assert all(re.match("Query executing: .*s", line) for line in updates[1:-1]) - assert re.match("Query complete after .*s", updates[-1]) + assert (re.match("Query executing: .*s", line) for line in updates[1:-1]) assert isinstance(result, pandas.DataFrame) assert len(result) == 10 # verify row count assert list(result) == ["url", "view_count"] # verify column names diff --git a/packages/google-cloud-bigquery/tests/unit/job/test_query_pandas.py b/packages/google-cloud-bigquery/tests/unit/job/test_query_pandas.py index 84aab3aca597..a45401664e5f 100644 --- a/packages/google-cloud-bigquery/tests/unit/job/test_query_pandas.py +++ b/packages/google-cloud-bigquery/tests/unit/job/test_query_pandas.py @@ -37,7 +37,7 @@ except (ImportError, AttributeError): # pragma: NO COVER geopandas = None try: - from tqdm import tqdm + import tqdm except (ImportError, AttributeError): # pragma: NO COVER tqdm = None @@ -301,7 +301,8 @@ def test_to_arrow_max_results_no_progress_bar(): @pytest.mark.skipif(tqdm is None, reason="Requires `tqdm`") -def test_to_arrow_w_tqdm_w_query_plan(): +@mock.patch("google.cloud.bigquery._tqdm_helpers.tqdm") +def test_to_arrow_w_tqdm_w_query_plan(tqdm_mock): from google.cloud.bigquery import table from google.cloud.bigquery.job import QueryJob as target_class from google.cloud.bigquery.schema import SchemaField @@ -344,20 +345,20 @@ def test_to_arrow_w_tqdm_w_query_plan(): row_iterator, ], ) - - with result_patch as result_patch_tqdm, reload_patch: + with result_patch as tqdm_mock, reload_patch: tbl = job.to_arrow(progress_bar_type="tqdm", create_bqstorage_client=False) - assert result_patch_tqdm.call_count == 3 + assert tqdm_mock.call_count == 3 assert isinstance(tbl, pyarrow.Table) assert tbl.num_rows == 2 - result_patch_tqdm.assert_called_with( + tqdm_mock.assert_called_with( timeout=_PROGRESS_BAR_UPDATE_INTERVAL, max_results=None ) @pytest.mark.skipif(tqdm is None, reason="Requires `tqdm`") -def test_to_arrow_w_tqdm_w_pending_status(): +@mock.patch("google.cloud.bigquery._tqdm_helpers.tqdm") +def test_to_arrow_w_tqdm_w_pending_status(tqdm_mock): from google.cloud.bigquery import table from google.cloud.bigquery.job import QueryJob as target_class from google.cloud.bigquery.schema import SchemaField @@ -396,20 +397,20 @@ def test_to_arrow_w_tqdm_w_pending_status(): "google.cloud.bigquery.job.QueryJob.result", side_effect=[concurrent.futures.TimeoutError, row_iterator], ) - - with result_patch as result_patch_tqdm, reload_patch: + with result_patch as tqdm_mock, reload_patch: tbl = job.to_arrow(progress_bar_type="tqdm", create_bqstorage_client=False) - assert result_patch_tqdm.call_count == 2 + assert tqdm_mock.call_count == 2 assert isinstance(tbl, pyarrow.Table) assert tbl.num_rows == 2 - result_patch_tqdm.assert_called_with( + tqdm_mock.assert_called_with( timeout=_PROGRESS_BAR_UPDATE_INTERVAL, max_results=None ) @pytest.mark.skipif(tqdm is None, reason="Requires `tqdm`") -def test_to_arrow_w_tqdm_wo_query_plan(): +@mock.patch("google.cloud.bigquery._tqdm_helpers.tqdm") +def test_to_arrow_w_tqdm_wo_query_plan(tqdm_mock): from google.cloud.bigquery import table from google.cloud.bigquery.job import QueryJob as target_class from google.cloud.bigquery.schema import SchemaField @@ -439,14 +440,13 @@ def test_to_arrow_w_tqdm_wo_query_plan(): "google.cloud.bigquery.job.QueryJob.result", side_effect=[concurrent.futures.TimeoutError, row_iterator], ) - - with result_patch as result_patch_tqdm, reload_patch: + with result_patch as tqdm_mock, reload_patch: tbl = job.to_arrow(progress_bar_type="tqdm", create_bqstorage_client=False) - assert result_patch_tqdm.call_count == 2 + assert tqdm_mock.call_count == 2 assert isinstance(tbl, pyarrow.Table) assert tbl.num_rows == 2 - result_patch_tqdm.assert_called() + tqdm_mock.assert_called() def _make_job(schema=(), rows=()): @@ -720,7 +720,7 @@ def test_to_dataframe_column_date_dtypes(): @pytest.mark.skipif(tqdm is None, reason="Requires `tqdm`") -@mock.patch("tqdm.tqdm") +@mock.patch("google.cloud.bigquery._tqdm_helpers.tqdm") def test_to_dataframe_with_progress_bar(tqdm_mock): from google.cloud.bigquery.job import QueryJob as target_class @@ -744,14 +744,15 @@ def test_to_dataframe_with_progress_bar(tqdm_mock): job = target_class.from_api_repr(begun_resource, client) job.to_dataframe(progress_bar_type=None, create_bqstorage_client=False) - tqdm_mock.assert_not_called() + tqdm_mock.tqdm.assert_not_called() job.to_dataframe(progress_bar_type="tqdm", create_bqstorage_client=False) - tqdm_mock.assert_called() + tqdm_mock.tqdm.assert_called() @pytest.mark.skipif(tqdm is None, reason="Requires `tqdm`") -def test_to_dataframe_w_tqdm_pending(): +@mock.patch("google.cloud.bigquery._tqdm_helpers.tqdm") +def test_to_dataframe_w_tqdm_pending(tqdm_mock): from google.cloud.bigquery import table from google.cloud.bigquery.job import QueryJob as target_class from google.cloud.bigquery.schema import SchemaField @@ -780,7 +781,7 @@ def test_to_dataframe_w_tqdm_pending(): job._properties["statistics"] = { "query": { "queryPlan": [ - {"name": "S00: Input", "id": "0", "status": "PRNDING"}, + {"name": "S00: Input", "id": "0", "status": "PENDING"}, {"name": "S01: Output", "id": "1", "status": "COMPLETE"}, ] }, @@ -792,21 +793,21 @@ def test_to_dataframe_w_tqdm_pending(): "google.cloud.bigquery.job.QueryJob.result", side_effect=[concurrent.futures.TimeoutError, row_iterator], ) - - with result_patch as result_patch_tqdm, reload_patch: + with result_patch as tqdm_mock, reload_patch: df = job.to_dataframe(progress_bar_type="tqdm", create_bqstorage_client=False) - assert result_patch_tqdm.call_count == 2 + assert tqdm_mock.call_count == 2 assert isinstance(df, pandas.DataFrame) assert len(df) == 4 # verify the number of rows assert list(df) == ["name", "age"] # verify the column names - result_patch_tqdm.assert_called_with( + tqdm_mock.assert_called_with( timeout=_PROGRESS_BAR_UPDATE_INTERVAL, max_results=None ) @pytest.mark.skipif(tqdm is None, reason="Requires `tqdm`") -def test_to_dataframe_w_tqdm(): +@mock.patch("google.cloud.bigquery._tqdm_helpers.tqdm") +def test_to_dataframe_w_tqdm(tqdm_mock): from google.cloud.bigquery import table from google.cloud.bigquery.job import QueryJob as target_class from google.cloud.bigquery.schema import SchemaField @@ -852,20 +853,21 @@ def test_to_dataframe_w_tqdm(): ], ) - with result_patch as result_patch_tqdm, reload_patch: + with result_patch as tqdm_mock, reload_patch: df = job.to_dataframe(progress_bar_type="tqdm", create_bqstorage_client=False) - assert result_patch_tqdm.call_count == 3 + assert tqdm_mock.call_count == 3 assert isinstance(df, pandas.DataFrame) assert len(df) == 4 # verify the number of rows assert list(df), ["name", "age"] # verify the column names - result_patch_tqdm.assert_called_with( + tqdm_mock.assert_called_with( timeout=_PROGRESS_BAR_UPDATE_INTERVAL, max_results=None ) @pytest.mark.skipif(tqdm is None, reason="Requires `tqdm`") -def test_to_dataframe_w_tqdm_max_results(): +@mock.patch("google.cloud.bigquery._tqdm_helpers.tqdm") +def test_to_dataframe_w_tqdm_max_results(tqdm_mock): from google.cloud.bigquery import table from google.cloud.bigquery.job import QueryJob as target_class from google.cloud.bigquery.schema import SchemaField @@ -901,16 +903,13 @@ def test_to_dataframe_w_tqdm_max_results(): "google.cloud.bigquery.job.QueryJob.result", side_effect=[concurrent.futures.TimeoutError, row_iterator], ) - - with result_patch as result_patch_tqdm, reload_patch: + with result_patch as tqdm_mock, reload_patch: job.to_dataframe( progress_bar_type="tqdm", create_bqstorage_client=False, max_results=3 ) - assert result_patch_tqdm.call_count == 2 - result_patch_tqdm.assert_called_with( - timeout=_PROGRESS_BAR_UPDATE_INTERVAL, max_results=3 - ) + assert tqdm_mock.call_count == 2 + tqdm_mock.assert_called_with(timeout=_PROGRESS_BAR_UPDATE_INTERVAL, max_results=3) @pytest.mark.skipif(pandas is None, reason="Requires `pandas`") diff --git a/packages/google-cloud-bigquery/tests/unit/test_magics.py b/packages/google-cloud-bigquery/tests/unit/test_magics.py index ea8fe568fb74..fdfb16d1684b 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_magics.py +++ b/packages/google-cloud-bigquery/tests/unit/test_magics.py @@ -278,7 +278,6 @@ def test__run_query(): assert len(execution_updates) == 3 # one update per API response for line in execution_updates: assert re.match("Query executing: .*s", line) - assert re.match("Query complete after .*s", updates[-1]) def test__run_query_dry_run_without_errors_is_silent(): @@ -597,7 +596,7 @@ def warning_match(warning): query_job_mock.to_dataframe.assert_called_once_with( bqstorage_client=bqstorage_instance_mock, create_bqstorage_client=mock.ANY, - progress_bar_type="tqdm", + progress_bar_type="tqdm_notebook", ) assert isinstance(return_value, pandas.DataFrame) @@ -641,7 +640,7 @@ def test_bigquery_magic_with_rest_client_requested(monkeypatch): query_job_mock.to_dataframe.assert_called_once_with( bqstorage_client=None, create_bqstorage_client=False, - progress_bar_type="tqdm", + progress_bar_type="tqdm_notebook", ) assert isinstance(return_value, pandas.DataFrame) From 355c538697742926ef471cd78b0be170072f805a Mon Sep 17 00:00:00 2001 From: Anthonios Partheniou Date: Wed, 19 Oct 2022 13:42:02 -0400 Subject: [PATCH 1516/2016] fix(deps): require requests>=2.21.0 (#1388) --- packages/google-cloud-bigquery/setup.py | 2 +- packages/google-cloud-bigquery/testing/constraints-3.7.txt | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/packages/google-cloud-bigquery/setup.py b/packages/google-cloud-bigquery/setup.py index 119ccb0af51b..abed852a8bef 100644 --- a/packages/google-cloud-bigquery/setup.py +++ b/packages/google-cloud-bigquery/setup.py @@ -45,7 +45,7 @@ "protobuf>=3.19.5,<5.0.0dev,!=3.20.0,!=3.20.1,!=4.21.0,!=4.21.1,!=4.21.2,!=4.21.3,!=4.21.4,!=4.21.5", # For the legacy proto-based types. "python-dateutil >= 2.7.2, <3.0dev", "pyarrow >= 3.0.0, < 10.0dev", - "requests >= 2.18.0, < 3.0.0dev", + "requests >= 2.21.0, < 3.0.0dev", ] extras = { # Keep the no-op bqstorage extra for backward compatibility. diff --git a/packages/google-cloud-bigquery/testing/constraints-3.7.txt b/packages/google-cloud-bigquery/testing/constraints-3.7.txt index ecce2c7cd13c..57928714f4e7 100644 --- a/packages/google-cloud-bigquery/testing/constraints-3.7.txt +++ b/packages/google-cloud-bigquery/testing/constraints-3.7.txt @@ -22,7 +22,7 @@ proto-plus==1.22.0 protobuf==3.19.5 pyarrow==3.0.0 python-dateutil==2.7.3 -requests==2.18.0 +requests==2.21.0 Shapely==1.6.4.post2 six==1.13.0 tqdm==4.7.4 From 1285ffc1ed2c09167fd4aa25e7f1a07f18e8618a Mon Sep 17 00:00:00 2001 From: WhiteSource Renovate Date: Wed, 19 Oct 2022 20:23:06 +0200 Subject: [PATCH 1517/2016] chore(deps): update all dependencies (#1382) Co-authored-by: Anthonios Partheniou --- .../samples/geography/requirements.txt | 16 ++++++++-------- .../samples/magics/requirements.txt | 10 +++++----- .../samples/snippets/requirements.txt | 12 ++++++------ 3 files changed, 19 insertions(+), 19 deletions(-) diff --git a/packages/google-cloud-bigquery/samples/geography/requirements.txt b/packages/google-cloud-bigquery/samples/geography/requirements.txt index d6b2c3ed9eb6..ef50fafb6777 100644 --- a/packages/google-cloud-bigquery/samples/geography/requirements.txt +++ b/packages/google-cloud-bigquery/samples/geography/requirements.txt @@ -7,26 +7,26 @@ click-plugins==1.1.1 cligj==0.7.2 dataclasses==0.8; python_version < '3.7' db-dtypes==1.0.4 -Fiona==1.8.21 +Fiona==1.8.22 geojson==2.5.0 geopandas===0.10.2; python_version == '3.7' geopandas==0.11.1; python_version >= '3.8' google-api-core==2.10.2 -google-auth==2.12.0 -google-cloud-bigquery==3.3.3 -google-cloud-bigquery-storage==2.16.1 +google-auth==2.13.0 +google-cloud-bigquery==3.3.5 +google-cloud-bigquery-storage==2.16.2 google-cloud-core==2.3.2 google-crc32c==1.5.0 google-resumable-media==2.4.0 googleapis-common-protos==1.56.4 -grpcio==1.49.1 +grpcio==1.50.0 idna==3.4 libcst==0.4.7 munch==2.5.0 mypy-extensions==0.4.3 packaging==21.3 pandas===1.3.5; python_version == '3.7' -pandas==1.5.0; python_version >= '3.8' +pandas==1.5.1; python_version >= '3.8' proto-plus==1.22.1 pyarrow==9.0.0 pyasn1==0.4.8 @@ -34,11 +34,11 @@ pyasn1-modules==0.2.8 pycparser==2.21 pyparsing==3.0.9 python-dateutil==2.8.2 -pytz==2022.4 +pytz==2022.5 PyYAML==6.0 requests==2.28.1 rsa==4.9 -Shapely==1.8.4 +Shapely==1.8.5.post1 six==1.16.0 typing-extensions==4.4.0 typing-inspect==0.8.0 diff --git a/packages/google-cloud-bigquery/samples/magics/requirements.txt b/packages/google-cloud-bigquery/samples/magics/requirements.txt index da7131711760..bdd026ce5eee 100644 --- a/packages/google-cloud-bigquery/samples/magics/requirements.txt +++ b/packages/google-cloud-bigquery/samples/magics/requirements.txt @@ -1,15 +1,15 @@ db-dtypes==1.0.4 -google-cloud-bigquery-storage==2.16.1 +google-cloud-bigquery-storage==2.16.2 google-auth-oauthlib==0.5.3 -grpcio==1.49.1 -ipywidgets==7.7.1 +grpcio==1.50.0 +ipywidgets==8.0.2 ipython===7.31.1; python_version == '3.7' ipython===8.0.1; python_version == '3.8' ipython==8.5.0; python_version >= '3.9' matplotlib===3.5.3; python_version == '3.7' matplotlib==3.6.1; python_version >= '3.8' pandas===1.3.5; python_version == '3.7' -pandas==1.5.0; python_version >= '3.8' +pandas==1.5.1; python_version >= '3.8' pyarrow==9.0.0 -pytz==2022.4 +pytz==2022.5 typing-extensions==4.4.0 diff --git a/packages/google-cloud-bigquery/samples/snippets/requirements.txt b/packages/google-cloud-bigquery/samples/snippets/requirements.txt index 4640dc42f4c6..ebf892279de5 100644 --- a/packages/google-cloud-bigquery/samples/snippets/requirements.txt +++ b/packages/google-cloud-bigquery/samples/snippets/requirements.txt @@ -1,16 +1,16 @@ db-dtypes==1.0.4 -google-cloud-bigquery==3.3.3 -google-cloud-bigquery-storage==2.16.1 +google-cloud-bigquery==3.3.5 +google-cloud-bigquery-storage==2.16.2 google-auth-oauthlib==0.5.3 -grpcio==1.49.1 -ipywidgets==7.7.1 +grpcio==1.50.0 +ipywidgets==8.0.2 ipython===7.31.1; python_version == '3.7' ipython===8.0.1; python_version == '3.8' ipython==8.5.0; python_version >= '3.9' matplotlib===3.5.3; python_version == '3.7' matplotlib==3.6.1; python_version >= '3.8' pandas===1.3.5; python_version == '3.7' -pandas==1.5.0; python_version >= '3.8' +pandas==1.5.1; python_version >= '3.8' pyarrow==9.0.0 -pytz==2022.4 +pytz==2022.5 typing-extensions==4.4.0 From 311ca16d0fbfcd066890ae938b4af8edbedacbee Mon Sep 17 00:00:00 2001 From: Chalmer Lowe Date: Wed, 2 Nov 2022 10:36:09 -0400 Subject: [PATCH 1518/2016] fix: corrects test for non-existent attribute (#1395) * fix: corrects test for non-existent attribute * updates import statement to fix linting issue * updates a test to check for Python version * updates comments --- .../google-cloud-bigquery/google/cloud/bigquery/table.py | 3 ++- .../samples/geography/requirements.txt | 2 +- packages/google-cloud-bigquery/tests/unit/test_table.py | 6 +++++- 3 files changed, 8 insertions(+), 3 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py index 2065c5fd2c0b..4fd77dd21d1b 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py @@ -40,10 +40,11 @@ try: import shapely # type: ignore + from shapely import wkt # type: ignore except ImportError: shapely = None else: - _read_wkt = shapely.wkt.loads + _read_wkt = wkt.loads import google.api_core.exceptions from google.api_core.page_iterator import HTTPIterator diff --git a/packages/google-cloud-bigquery/samples/geography/requirements.txt b/packages/google-cloud-bigquery/samples/geography/requirements.txt index ef50fafb6777..798de6bb5209 100644 --- a/packages/google-cloud-bigquery/samples/geography/requirements.txt +++ b/packages/google-cloud-bigquery/samples/geography/requirements.txt @@ -10,7 +10,7 @@ db-dtypes==1.0.4 Fiona==1.8.22 geojson==2.5.0 geopandas===0.10.2; python_version == '3.7' -geopandas==0.11.1; python_version >= '3.8' +geopandas==0.12.1; python_version >= '3.8' google-api-core==2.10.2 google-auth==2.13.0 google-cloud-bigquery==3.3.5 diff --git a/packages/google-cloud-bigquery/tests/unit/test_table.py b/packages/google-cloud-bigquery/tests/unit/test_table.py index fca43f1eed88..f542c7523ff7 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_table.py +++ b/packages/google-cloud-bigquery/tests/unit/test_table.py @@ -15,6 +15,7 @@ import datetime import logging import re +from sys import version_info import time import types import unittest @@ -1969,7 +1970,10 @@ def test_to_geodataframe(self): df = row_iterator.to_geodataframe(create_bqstorage_client=False) self.assertIsInstance(df, geopandas.GeoDataFrame) self.assertEqual(len(df), 0) # verify the number of rows - self.assertIsNone(df.crs) + if version_info.major == 3 and version_info.minor > 7: + assert not hasattr(df, "crs") # used with Python > 3.7 + else: + self.assertIsNone(df.crs) # used with Python == 3.7 class TestRowIterator(unittest.TestCase): From add2008bb1f9c26106bd0bf603a34f1cfd072e54 Mon Sep 17 00:00:00 2001 From: Anthonios Partheniou Date: Wed, 2 Nov 2022 14:47:58 -0400 Subject: [PATCH 1519/2016] fix(deps): allow pyarrow < 11 (#1393) Co-authored-by: Chalmer Lowe --- packages/google-cloud-bigquery/setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/setup.py b/packages/google-cloud-bigquery/setup.py index abed852a8bef..c8bf640c28a5 100644 --- a/packages/google-cloud-bigquery/setup.py +++ b/packages/google-cloud-bigquery/setup.py @@ -44,7 +44,7 @@ "packaging >= 14.3, <22.0.0dev", "protobuf>=3.19.5,<5.0.0dev,!=3.20.0,!=3.20.1,!=4.21.0,!=4.21.1,!=4.21.2,!=4.21.3,!=4.21.4,!=4.21.5", # For the legacy proto-based types. "python-dateutil >= 2.7.2, <3.0dev", - "pyarrow >= 3.0.0, < 10.0dev", + "pyarrow >= 3.0.0, < 11.0dev", "requests >= 2.21.0, < 3.0.0dev", ] extras = { From ae1587605e047c8e33a34a61156d45150699e3cb Mon Sep 17 00:00:00 2001 From: "release-please[bot]" <55107282+release-please[bot]@users.noreply.github.com> Date: Fri, 4 Nov 2022 11:39:06 -0400 Subject: [PATCH 1520/2016] chore(main): release 3.3.6 (#1375) Co-authored-by: release-please[bot] <55107282+release-please[bot]@users.noreply.github.com> --- packages/google-cloud-bigquery/CHANGELOG.md | 26 +++++++++++++++++++ .../google/cloud/bigquery/version.py | 2 +- 2 files changed, 27 insertions(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/CHANGELOG.md b/packages/google-cloud-bigquery/CHANGELOG.md index d5efd7dd6a5f..869d063e5b95 100644 --- a/packages/google-cloud-bigquery/CHANGELOG.md +++ b/packages/google-cloud-bigquery/CHANGELOG.md @@ -5,6 +5,32 @@ [1]: https://pypi.org/project/google-cloud-bigquery/#history +## [3.3.6](https://github.com/googleapis/python-bigquery/compare/v3.3.4...v3.3.6) (2022-11-02) + + +### Features + +* Reconfigure tqdm progress bar in %%bigquery magic ([#1355](https://github.com/googleapis/python-bigquery/issues/1355)) ([506f781](https://github.com/googleapis/python-bigquery/commit/506f781c2dd775193336ab9432f32148250ed81d)) + + +### Bug Fixes + +* Corrects test for non-existent attribute ([#1395](https://github.com/googleapis/python-bigquery/issues/1395)) ([a80f436](https://github.com/googleapis/python-bigquery/commit/a80f436f2e75a8fb680316f17a22eecb31a7101d)) +* **deps:** Allow protobuf 3.19.5 ([#1379](https://github.com/googleapis/python-bigquery/issues/1379)) ([3e4a074](https://github.com/googleapis/python-bigquery/commit/3e4a074a981eb2920c5f9a711c253565d4844858)) +* **deps:** Allow pyarrow < 11 ([#1393](https://github.com/googleapis/python-bigquery/issues/1393)) ([c898546](https://github.com/googleapis/python-bigquery/commit/c898546d3292f9ec1ba6120cd3f9e2805aa087bb)) +* **deps:** Require requests>=2.21.0 ([#1388](https://github.com/googleapis/python-bigquery/issues/1388)) ([e398336](https://github.com/googleapis/python-bigquery/commit/e39833673582e4a7a34103cfc45603932c9c33b3)) +* Refactor to adapt to changes to shapely dependency ([#1376](https://github.com/googleapis/python-bigquery/issues/1376)) ([2afd278](https://github.com/googleapis/python-bigquery/commit/2afd278febe1eb247adc6278ab59903962a5bb6c)) + + +### Documentation + +* Fix typos ([#1372](https://github.com/googleapis/python-bigquery/issues/1372)) ([21cc525](https://github.com/googleapis/python-bigquery/commit/21cc525a86a06acfe73e5c5a74ec5f0b61e410f2)) + + +### Miscellaneous Chores + +* release 3.3.6 ([4fce1d9](https://github.com/googleapis/python-bigquery/commit/4fce1d93b1763703b115a0480a2b97021786aff7)) + ## [3.3.4](https://github.com/googleapis/python-bigquery/compare/v3.3.3...v3.3.4) (2022-09-29) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/version.py b/packages/google-cloud-bigquery/google/cloud/bigquery/version.py index 3e1a9869c3c3..43360a201125 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/version.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/version.py @@ -12,4 +12,4 @@ # See the License for the specific language governing permissions and # limitations under the License. -__version__ = "3.3.4" +__version__ = "3.3.6" From fcc5cebfc305164f68d201f127093a45c4aaa352 Mon Sep 17 00:00:00 2001 From: aribray <45905583+aribray@users.noreply.github.com> Date: Mon, 14 Nov 2022 16:26:37 -0600 Subject: [PATCH 1521/2016] feat: add `reference_file_schema_uri` to LoadJobConfig, ExternalConfig (#1399) * feat: add 'reference_file_schema_uri' to LoadJobConfig and ExternalConfig --- .../google/cloud/bigquery/external_config.py | 14 ++ .../google/cloud/bigquery/job/load.py | 21 ++ .../testing/constraints-3.7.txt | 2 +- .../tests/system/test_client.py | 203 ++++++++++++++++++ .../tests/unit/job/test_base.py | 5 +- .../tests/unit/job/test_load.py | 12 ++ .../tests/unit/test_external_config.py | 6 + 7 files changed, 258 insertions(+), 5 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/external_config.py b/packages/google-cloud-bigquery/google/cloud/bigquery/external_config.py index 640b2d16b689..bd60e4ef1d12 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/external_config.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/external_config.py @@ -756,6 +756,20 @@ def hive_partitioning(self, value): prop = value.to_api_repr() if value is not None else None self._properties["hivePartitioningOptions"] = prop + @property + def reference_file_schema_uri(self): + """Optional[str]: + When creating an external table, the user can provide a reference file with the + table schema. This is enabled for the following formats: + + AVRO, PARQUET, ORC + """ + return self._properties.get("referenceFileSchemaUri") + + @reference_file_schema_uri.setter + def reference_file_schema_uri(self, value): + self._properties["referenceFileSchemaUri"] = value + @property def ignore_unknown_values(self): """bool: If :data:`True`, extra values that are not represented in the diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/job/load.py b/packages/google-cloud-bigquery/google/cloud/bigquery/job/load.py index e4b44395e7f5..5c7f268419a2 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/job/load.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/job/load.py @@ -379,6 +379,20 @@ def range_partitioning(self, value): ) self._set_sub_prop("rangePartitioning", resource) + @property + def reference_file_schema_uri(self): + """Optional[str]: + When creating an external table, the user can provide a reference file with the + table schema. This is enabled for the following formats: + + AVRO, PARQUET, ORC + """ + return self._get_sub_prop("referenceFileSchemaUri") + + @reference_file_schema_uri.setter + def reference_file_schema_uri(self, value): + return self._set_sub_prop("referenceFileSchemaUri", value) + @property def schema(self): """Optional[Sequence[Union[ \ @@ -651,6 +665,13 @@ def quote_character(self): """ return self._configuration.quote_character + @property + def reference_file_schema_uri(self): + """See: + attr:`google.cloud.bigquery.job.LoadJobConfig.reference_file_schema_uri`. + """ + return self._configuration.reference_file_schema_uri + @property def skip_leading_rows(self): """See diff --git a/packages/google-cloud-bigquery/testing/constraints-3.7.txt b/packages/google-cloud-bigquery/testing/constraints-3.7.txt index 57928714f4e7..2c5b169db02a 100644 --- a/packages/google-cloud-bigquery/testing/constraints-3.7.txt +++ b/packages/google-cloud-bigquery/testing/constraints-3.7.txt @@ -25,4 +25,4 @@ python-dateutil==2.7.3 requests==2.21.0 Shapely==1.6.4.post2 six==1.13.0 -tqdm==4.7.4 +tqdm==4.7.4 \ No newline at end of file diff --git a/packages/google-cloud-bigquery/tests/system/test_client.py b/packages/google-cloud-bigquery/tests/system/test_client.py index c99ee1c72dd1..152bb81443c5 100644 --- a/packages/google-cloud-bigquery/tests/system/test_client.py +++ b/packages/google-cloud-bigquery/tests/system/test_client.py @@ -97,6 +97,20 @@ ), ] +SOURCE_URIS_AVRO = [ + "gs://cloud-samples-data/bigquery/federated-formats-reference-file-schema/a-twitter.avro", + "gs://cloud-samples-data/bigquery/federated-formats-reference-file-schema/b-twitter.avro", + "gs://cloud-samples-data/bigquery/federated-formats-reference-file-schema/c-twitter.avro", +] +SOURCE_URIS_PARQUET = [ + "gs://cloud-samples-data/bigquery/federated-formats-reference-file-schema/a-twitter.parquet", + "gs://cloud-samples-data/bigquery/federated-formats-reference-file-schema/b-twitter.parquet", + "gs://cloud-samples-data/bigquery/federated-formats-reference-file-schema/c-twitter.parquet", +] +REFERENCE_FILE_SCHEMA_URI_AVRO = "gs://cloud-samples-data/bigquery/federated-formats-reference-file-schema/a-twitter.avro" +REFERENCE_FILE_SCHEMA_URI_PARQUET = "gs://cloud-samples-data/bigquery/federated-formats-reference-file-schema/a-twitter.parquet" + + # The VPC-SC team maintains a mirror of the GCS bucket used for code # samples. The public bucket crosses the configured security boundary. # See: https://github.com/googleapis/google-cloud-python/issues/8550 @@ -1052,6 +1066,195 @@ def test_load_table_from_file_w_explicit_location(self): table_ref, "gs://{}/letters-us.csv".format(bucket_name), location="US" ).result() + def test_create_external_table_with_reference_file_schema_uri_avro(self): + client = Config.CLIENT + dataset_id = _make_dataset_id("external_reference_file_avro") + self.temp_dataset(dataset_id) + dataset_ref = bigquery.DatasetReference(client.project, dataset_id) + table_id = "test_ref_file_avro" + table_ref = bigquery.TableReference(dataset_ref=dataset_ref, table_id=table_id) + + expected_schema = [ + bigquery.SchemaField("username", "STRING", mode="NULLABLE"), + bigquery.SchemaField("tweet", "STRING", mode="NULLABLE"), + bigquery.SchemaField("timestamp", "STRING", mode="NULLABLE"), + bigquery.SchemaField("likes", "INTEGER", mode="NULLABLE"), + ] + + # By default, the table should have the c-twitter schema because it is lexicographically last + # in the `SOURCE_URIs` list: + # a-twitter schema: (username, tweet, timestamp, likes) + # b-twitter schema: (username, tweet, timestamp) + # c-twitter schema: (username, tweet) + + # Because `referenceFileSchemaUri` is set as a-twitter, the table will have a-twitter schema + + # Create external data configuration + external_config = bigquery.ExternalConfig(bigquery.ExternalSourceFormat.AVRO) + external_config.source_uris = SOURCE_URIS_AVRO + external_config.reference_file_schema_uri = REFERENCE_FILE_SCHEMA_URI_AVRO + + table = bigquery.Table(table_ref) + table.external_data_configuration = external_config + + table = client.create_table(table) + + # Get table created by the create_table API call + generated_table = client.get_table(table_ref) + + self.assertEqual(generated_table.schema, expected_schema) + self.assertEqual( + generated_table.external_data_configuration._properties[ + "referenceFileSchemaUri" + ], + REFERENCE_FILE_SCHEMA_URI_AVRO, + ) + + # Clean up test + self.to_delete.insert(0, generated_table) + + def test_load_table_from_uri_with_reference_file_schema_uri_avro(self): + dataset_id = _make_dataset_id("test_reference_file_avro") + self.temp_dataset(dataset_id) + client = Config.CLIENT + dataset_ref = bigquery.DatasetReference(client.project, dataset_id) + table_id = "test_ref_file_avro" + table_ref = bigquery.TableReference(dataset_ref=dataset_ref, table_id=table_id) + + expected_schema = [ + bigquery.SchemaField("username", "STRING", mode="NULLABLE"), + bigquery.SchemaField("tweet", "STRING", mode="NULLABLE"), + bigquery.SchemaField("timestamp", "STRING", mode="NULLABLE"), + bigquery.SchemaField("likes", "INTEGER", mode="NULLABLE"), + ] + + # By default, the table should have the c-twitter schema because it is lexicographically last + # in the `SOURCE_URIS` list: + # a-twitter schema: (username, tweet, timestamp, likes) + # b-twitter schema: (username, tweet, timestamp) + # c-twitter schema: (username, tweet) + + # Because `referenceFileSchemaUri` is set as a-twitter, the table will have a-twitter schema + + # Create load job configuration + load_job_config = bigquery.LoadJobConfig( + source_format=bigquery.SourceFormat.AVRO + ) + load_job_config.reference_file_schema_uri = REFERENCE_FILE_SCHEMA_URI_AVRO + + load_job = client.load_table_from_uri( + source_uris=SOURCE_URIS_AVRO, + destination=table_ref, + job_config=load_job_config, + ) + # Wait for load job to complete + result = load_job.result() + + # Get table created by the load job + generated_table = client.get_table(table_ref) + self.assertEqual(generated_table.schema, expected_schema) + self.assertEqual( + result._properties["configuration"]["load"]["referenceFileSchemaUri"], + REFERENCE_FILE_SCHEMA_URI_AVRO, + ) + + # Clean up test + self.to_delete.insert(0, generated_table) + + def test_create_external_table_with_reference_file_schema_uri_parquet(self): + client = Config.CLIENT + dataset_id = _make_dataset_id("external_table_ref_file_parquet") + self.temp_dataset(dataset_id) + dataset_ref = bigquery.DatasetReference(client.project, dataset_id) + table_id = "test_ref_file_parquet" + table_ref = bigquery.TableReference(dataset_ref=dataset_ref, table_id=table_id) + + expected_schema = [ + bigquery.SchemaField("username", "STRING", mode="NULLABLE"), + bigquery.SchemaField("tweet", "STRING", mode="NULLABLE"), + bigquery.SchemaField("timestamp", "STRING", mode="NULLABLE"), + bigquery.SchemaField("likes", "INTEGER", mode="NULLABLE"), + ] + + # By default, the table should have the c-twitter schema because it is lexicographically last + # in the `SOURCE_URIS` list: + # a-twitter schema: (username, tweet, timestamp, likes) + # b-twitter schema: (username, tweet, timestamp) + # c-twitter schema: (username, tweet) + + # Because `referenceFileSchemaUri` is set as a-twitter, the table will have a-twitter schema + + # Create external data configuration + external_config = bigquery.ExternalConfig(bigquery.ExternalSourceFormat.PARQUET) + external_config.source_uris = SOURCE_URIS_PARQUET + external_config.reference_file_schema_uri = REFERENCE_FILE_SCHEMA_URI_PARQUET + + table = bigquery.Table(table_ref) + table.external_data_configuration = external_config + + table = client.create_table(table) + + # Get table created by the create_table API call + generated_table = client.get_table(table_ref) + self.assertEqual(generated_table.schema, expected_schema) + self.assertEqual( + generated_table.external_data_configuration._properties[ + "referenceFileSchemaUri" + ], + REFERENCE_FILE_SCHEMA_URI_PARQUET, + ) + + # Clean up test + self.to_delete.insert(0, generated_table) + + def test_load_table_from_uri_with_reference_file_schema_uri_parquet(self): + dataset_id = _make_dataset_id("test_reference_file_parquet") + self.temp_dataset(dataset_id) + client = Config.CLIENT + dataset_ref = bigquery.DatasetReference(client.project, dataset_id) + table_id = "test_ref_file_parquet" + table_ref = bigquery.TableReference(dataset_ref=dataset_ref, table_id=table_id) + + expected_schema = [ + bigquery.SchemaField("username", "STRING", mode="NULLABLE"), + bigquery.SchemaField("tweet", "STRING", mode="NULLABLE"), + bigquery.SchemaField("timestamp", "STRING", mode="NULLABLE"), + bigquery.SchemaField("likes", "INTEGER", mode="NULLABLE"), + ] + + # By default, the table should have the c-twitter schema because it is lexicographically last + # in the `SOURCE_URIS` list: + # a-twitter schema: (username, tweet, timestamp, likes) + # b-twitter schema: (username, tweet, timestamp) + # c-twitter schema: (username, tweet) + + # Because `referenceFileSchemaUri` is set as a-twitter, the table will have a-twitter schema + + # Create load job configuration + load_job_config = bigquery.LoadJobConfig( + source_format=bigquery.SourceFormat.PARQUET + ) + load_job_config.reference_file_schema_uri = REFERENCE_FILE_SCHEMA_URI_PARQUET + + load_job = client.load_table_from_uri( + source_uris=SOURCE_URIS_PARQUET, + destination=table_ref, + job_config=load_job_config, + ) + # Wait for load job to complete + result = load_job.result() + + # Get table created by the load job + generated_table = client.get_table(table_ref) + self.assertEqual(generated_table.schema, expected_schema) + self.assertEqual( + result._properties["configuration"]["load"]["referenceFileSchemaUri"], + REFERENCE_FILE_SCHEMA_URI_PARQUET, + ) + + # Clean up test + self.to_delete.insert(0, generated_table) + def _write_csv_to_storage(self, bucket_name, blob_name, header_row, data_rows): from google.cloud._testing import _NamedTemporaryFile diff --git a/packages/google-cloud-bigquery/tests/unit/job/test_base.py b/packages/google-cloud-bigquery/tests/unit/job/test_base.py index f0525c22a33e..ed0dc731b05d 100644 --- a/packages/google-cloud-bigquery/tests/unit/job/test_base.py +++ b/packages/google-cloud-bigquery/tests/unit/job/test_base.py @@ -943,7 +943,6 @@ def test_result_default_wo_state(self): conn = make_connection( _make_retriable_exception(), begun_job_resource, - _make_retriable_exception(), done_job_resource, ) client = _make_client(project=self.PROJECT, connection=conn) @@ -963,9 +962,7 @@ def test_result_default_wo_state(self): query_params={"location": "US"}, timeout=None, ) - conn.api_request.assert_has_calls( - [begin_call, begin_call, reload_call, reload_call] - ) + conn.api_request.assert_has_calls([begin_call, begin_call, reload_call]) def test_result_w_retry_wo_state(self): begun_job_resource = _make_job_resource( diff --git a/packages/google-cloud-bigquery/tests/unit/job/test_load.py b/packages/google-cloud-bigquery/tests/unit/job/test_load.py index cf2096b8be4e..143e1da59556 100644 --- a/packages/google-cloud-bigquery/tests/unit/job/test_load.py +++ b/packages/google-cloud-bigquery/tests/unit/job/test_load.py @@ -37,6 +37,7 @@ def _setUpConstants(self): self.INPUT_BYTES = 12345 self.OUTPUT_BYTES = 23456 self.OUTPUT_ROWS = 345 + self.REFERENCE_FILE_SCHEMA_URI = "gs://path/to/reference" def _make_resource(self, started=False, ended=False): resource = super(TestLoadJob, self)._make_resource(started, ended) @@ -47,6 +48,7 @@ def _make_resource(self, started=False, ended=False): "datasetId": self.DS_ID, "tableId": self.TABLE_ID, } + config["referenceFileSchemaUri"] = self.REFERENCE_FILE_SCHEMA_URI if ended: resource["status"] = {"state": "DONE"} @@ -136,6 +138,12 @@ def _verifyResourceProperties(self, job, resource): self.assertEqual(str(job.skip_leading_rows), config["skipLeadingRows"]) else: self.assertIsNone(job.skip_leading_rows) + if "referenceFileSchemaUri" in config: + self.assertEqual( + job.reference_file_schema_uri, config["referenceFileSchemaUri"] + ) + else: + self.assertIsNone(job.reference_file_schema_uri) if "destinationEncryptionConfiguration" in config: self.assertIsNotNone(job.destination_encryption_configuration) @@ -186,6 +194,7 @@ def test_ctor(self): self.assertIsNone(job.use_avro_logical_types) self.assertIsNone(job.clustering_fields) self.assertIsNone(job.schema_update_options) + self.assertIsNone(job.reference_file_schema_uri) def test_ctor_w_config(self): from google.cloud.bigquery.schema import SchemaField @@ -461,6 +470,7 @@ def test_begin_w_bound_client(self): "datasetId": self.DS_ID, "tableId": self.TABLE_ID, }, + "referenceFileSchemaUri": self.REFERENCE_FILE_SCHEMA_URI, } }, }, @@ -503,6 +513,7 @@ def test_begin_w_autodetect(self): "datasetId": self.DS_ID, "tableId": self.TABLE_ID, }, + "referenceFileSchemaUri": self.REFERENCE_FILE_SCHEMA_URI, "autodetect": True, } }, @@ -585,6 +596,7 @@ def test_begin_w_alternate_client(self): config.use_avro_logical_types = True config.write_disposition = WriteDisposition.WRITE_TRUNCATE config.schema_update_options = [SchemaUpdateOption.ALLOW_FIELD_ADDITION] + config.reference_file_schema_uri = "gs://path/to/reference" with mock.patch( "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" ) as final_attributes: diff --git a/packages/google-cloud-bigquery/tests/unit/test_external_config.py b/packages/google-cloud-bigquery/tests/unit/test_external_config.py index 3ef61d738345..72fe2761a74d 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_external_config.py +++ b/packages/google-cloud-bigquery/tests/unit/test_external_config.py @@ -99,6 +99,12 @@ def test_connection_id(self): ec.connection_id = "path/to/connection" self.assertEqual(ec.connection_id, "path/to/connection") + def test_reference_file_schema_uri(self): + ec = external_config.ExternalConfig("") + self.assertIsNone(ec.reference_file_schema_uri) + ec.reference_file_schema_uri = "path/to/reference" + self.assertEqual(ec.reference_file_schema_uri, "path/to/reference") + def test_schema_None(self): ec = external_config.ExternalConfig("") ec.schema = None From 1cd24848ce23eee074479eefa7c434735bb4bef7 Mon Sep 17 00:00:00 2001 From: aribray <45905583+aribray@users.noreply.github.com> Date: Tue, 15 Nov 2022 14:57:17 -0600 Subject: [PATCH 1522/2016] feat: add default value expression (#1408) * feat: Adds default_value_expression to SchemaField --- .../google/cloud/bigquery/schema.py | 38 +++++++++++- .../google/cloud/bigquery/table.py | 2 +- .../tests/system/test_client.py | 62 +++++++++++++++++++ .../tests/unit/test_client.py | 40 ++++++++---- .../tests/unit/test_schema.py | 9 ++- 5 files changed, 135 insertions(+), 16 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/schema.py b/packages/google-cloud-bigquery/google/cloud/bigquery/schema.py index 1df78424d2d7..ebf34e4cdfc4 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/schema.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/schema.py @@ -93,6 +93,30 @@ class SchemaField(object): Scale (digits after decimal) of fields with NUMERIC or BIGNUMERIC type. max_length: Maximum length of fields with STRING or BYTES type. + + default_value_expression: str, Optional + Used to specify the default value of a field using a SQL expression. It can only be set for + top level fields (columns). + + You can use a struct or array expression to specify default value for the entire struct or + array. The valid SQL expressions are: + + - Literals for all data types, including STRUCT and ARRAY. + + - The following functions: + + `CURRENT_TIMESTAMP` + `CURRENT_TIME` + `CURRENT_DATE` + `CURRENT_DATETIME` + `GENERATE_UUID` + `RAND` + `SESSION_USER` + `ST_GEOPOINT` + + - Struct or array composed with the above allowed functions, for example: + + "[CURRENT_DATE(), DATE '2020-01-01'"] """ def __init__( @@ -100,6 +124,7 @@ def __init__( name: str, field_type: str, mode: str = "NULLABLE", + default_value_expression: str = None, description: Union[str, _DefaultSentinel] = _DEFAULT_VALUE, fields: Iterable["SchemaField"] = (), policy_tags: Union["PolicyTagList", None, _DefaultSentinel] = _DEFAULT_VALUE, @@ -115,6 +140,8 @@ def __init__( self._properties["mode"] = mode.upper() if description is not _DEFAULT_VALUE: self._properties["description"] = description + if default_value_expression is not None: + self._properties["defaultValueExpression"] = default_value_expression if precision is not _DEFAULT_VALUE: self._properties["precision"] = precision if scale is not _DEFAULT_VALUE: @@ -154,6 +181,8 @@ def from_api_repr(cls, api_repr: dict) -> "SchemaField": fields = api_repr.get("fields", ()) policy_tags = api_repr.get("policyTags", _DEFAULT_VALUE) + default_value_expression = api_repr.get("defaultValueExpression", None) + if policy_tags is not None and policy_tags is not _DEFAULT_VALUE: policy_tags = PolicyTagList.from_api_repr(policy_tags) @@ -161,6 +190,7 @@ def from_api_repr(cls, api_repr: dict) -> "SchemaField": field_type=field_type, fields=[cls.from_api_repr(f) for f in fields], mode=mode.upper(), + default_value_expression=default_value_expression, description=description, name=api_repr["name"], policy_tags=policy_tags, @@ -197,6 +227,11 @@ def is_nullable(self): """bool: whether 'mode' is 'nullable'.""" return self.mode == "NULLABLE" + @property + def default_value_expression(self): + """Optional[str] default value of a field, using an SQL expression""" + return self._properties.get("defaultValueExpression") + @property def description(self): """Optional[str]: description for the field.""" @@ -260,7 +295,7 @@ def _key(self): field_type = self.field_type.upper() if self.field_type is not None else None # Type can temporarily be set to None if the code needs a SchemaField instance, - # but has npt determined the exact type of the field yet. + # but has not determined the exact type of the field yet. if field_type is not None: if field_type == "STRING" or field_type == "BYTES": if self.max_length is not None: @@ -281,6 +316,7 @@ def _key(self): field_type, # Mode is always str, if not given it defaults to a str value self.mode.upper(), # pytype: disable=attribute-error + self.default_value_expression, self.description, self._fields, policy_tags, diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py index 4fd77dd21d1b..96888d62d490 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py @@ -1421,7 +1421,7 @@ def get(self, key: str, default: Any = None) -> Any: >>> Row(('a', 'b'), {'x': 0, 'y': 1}).get('z') None - The default value can be overrided with the ``default`` parameter. + The default value can be overridden with the ``default`` parameter. >>> Row(('a', 'b'), {'x': 0, 'y': 1}).get('z', '') '' diff --git a/packages/google-cloud-bigquery/tests/system/test_client.py b/packages/google-cloud-bigquery/tests/system/test_client.py index 152bb81443c5..25edc18e1f60 100644 --- a/packages/google-cloud-bigquery/tests/system/test_client.py +++ b/packages/google-cloud-bigquery/tests/system/test_client.py @@ -441,6 +441,68 @@ def test_create_table_with_real_custom_policy(self): list(table.schema[1].policy_tags.names), [child_policy_tag.name] ) + def test_create_table_with_default_value_expression(self): + dataset = self.temp_dataset( + _make_dataset_id("create_table_with_default_value_expression") + ) + + table_id = "test_table" + timestamp_field_name = "timestamp_field_with_default_value_expression" + + string_default_val_expression = "'FOO'" + timestamp_default_val_expression = "CURRENT_TIMESTAMP" + + schema = [ + bigquery.SchemaField( + "username", + "STRING", + default_value_expression=string_default_val_expression, + ), + bigquery.SchemaField( + timestamp_field_name, + "TIMESTAMP", + default_value_expression=timestamp_default_val_expression, + ), + ] + table_arg = Table(dataset.table(table_id), schema=schema) + self.assertFalse(_table_exists(table_arg)) + + table = helpers.retry_403(Config.CLIENT.create_table)(table_arg) + self.to_delete.insert(0, table) + + self.assertTrue(_table_exists(table)) + + # Fetch the created table and its metadata to verify that the default + # value expression is assigned to fields + remote_table = Config.CLIENT.get_table(table) + remote_schema = remote_table.schema + self.assertEqual(remote_schema, schema) + + for field in remote_schema: + if field.name == string_default_val_expression: + self.assertEqual("'FOO'", field.default_value_expression) + if field.name == timestamp_default_val_expression: + self.assertEqual("CURRENT_TIMESTAMP", field.default_value_expression) + + # Insert rows into the created table to verify default values are populated + # when value is not provided + NOW_SECONDS = 1448911495.484366 + NOW = datetime.datetime.utcfromtimestamp(NOW_SECONDS).replace(tzinfo=UTC) + + # Rows to insert. Row #1 will have default `TIMESTAMP` defaultValueExpression CURRENT_TIME + # Row #2 will have default `STRING` defaultValueExpression "'FOO" + ROWS = [{"username": "john_doe"}, {timestamp_field_name: NOW}] + + errors = Config.CLIENT.insert_rows(table, ROWS) + self.assertEqual(len(errors), 0) + + # Get list of inserted rows + row_1, row_2 = [row for row in list(Config.CLIENT.list_rows(table))] + + # Assert that row values are populated with default value expression + self.assertIsInstance(row_1.get(timestamp_field_name), datetime.datetime) + self.assertEqual("FOO", row_2.get("username")) + def test_create_table_w_time_partitioning_w_clustering_fields(self): from google.cloud.bigquery.table import TimePartitioning from google.cloud.bigquery.table import TimePartitioningType diff --git a/packages/google-cloud-bigquery/tests/unit/test_client.py b/packages/google-cloud-bigquery/tests/unit/test_client.py index 30bab8fa9f65..f4552cda23f4 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_client.py +++ b/packages/google-cloud-bigquery/tests/unit/test_client.py @@ -8395,9 +8395,19 @@ def test_schema_from_json_with_file_path(self): ]""" expected = [ - SchemaField("qtr", "STRING", "REQUIRED", "quarter"), - SchemaField("rep", "STRING", "NULLABLE", "sales representative"), - SchemaField("sales", "FLOAT", "NULLABLE", "total sales"), + SchemaField("qtr", "STRING", "REQUIRED", description="quarter"), + SchemaField( + "rep", + "STRING", + "NULLABLE", + description="sales representative", + ), + SchemaField( + "sales", + "FLOAT", + "NULLABLE", + description="total sales", + ), ] client = self._make_client() @@ -8441,9 +8451,11 @@ def test_schema_from_json_with_file_object(self): ]""" expected = [ - SchemaField("qtr", "STRING", "REQUIRED", "quarter"), - SchemaField("rep", "STRING", "NULLABLE", "sales representative"), - SchemaField("sales", "FLOAT", "NULLABLE", "total sales"), + SchemaField("qtr", "STRING", "REQUIRED", description="quarter"), + SchemaField( + "rep", "STRING", "NULLABLE", description="sales representative" + ), + SchemaField("sales", "FLOAT", "NULLABLE", description="total sales"), ] client = self._make_client() @@ -8477,9 +8489,11 @@ def test_schema_to_json_with_file_path(self): ] schema_list = [ - SchemaField("qtr", "STRING", "REQUIRED", "quarter"), - SchemaField("rep", "STRING", "NULLABLE", "sales representative"), - SchemaField("sales", "FLOAT", "NULLABLE", "total sales"), + SchemaField("qtr", "STRING", "REQUIRED", description="quarter"), + SchemaField( + "rep", "STRING", "NULLABLE", description="sales representative" + ), + SchemaField("sales", "FLOAT", "NULLABLE", description="total sales"), ] client = self._make_client() @@ -8521,9 +8535,11 @@ def test_schema_to_json_with_file_object(self): ] schema_list = [ - SchemaField("qtr", "STRING", "REQUIRED", "quarter"), - SchemaField("rep", "STRING", "NULLABLE", "sales representative"), - SchemaField("sales", "FLOAT", "NULLABLE", "total sales"), + SchemaField("qtr", "STRING", "REQUIRED", description="quarter"), + SchemaField( + "rep", "STRING", "NULLABLE", description="sales representative" + ), + SchemaField("sales", "FLOAT", "NULLABLE", description="total sales"), ] fake_file = io.StringIO() diff --git a/packages/google-cloud-bigquery/tests/unit/test_schema.py b/packages/google-cloud-bigquery/tests/unit/test_schema.py index 6a547cb13e88..c6593e1b4daf 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_schema.py +++ b/packages/google-cloud-bigquery/tests/unit/test_schema.py @@ -45,8 +45,10 @@ def test_constructor_defaults(self): self.assertIsNone(field.description) self.assertEqual(field.fields, ()) self.assertIsNone(field.policy_tags) + self.assertIsNone(field.default_value_expression) def test_constructor_explicit(self): + FIELD_DEFAULT_VALUE_EXPRESSION = "This is the default value for this field" field = self._make_one( "test", "STRING", @@ -58,10 +60,12 @@ def test_constructor_explicit(self): "projects/f/locations/g/taxonomies/h/policyTags/i", ) ), + default_value_expression=FIELD_DEFAULT_VALUE_EXPRESSION, ) self.assertEqual(field.name, "test") self.assertEqual(field.field_type, "STRING") self.assertEqual(field.mode, "REQUIRED") + self.assertEqual(field.default_value_expression, FIELD_DEFAULT_VALUE_EXPRESSION) self.assertEqual(field.description, "Testing") self.assertEqual(field.fields, ()) self.assertEqual( @@ -182,6 +186,7 @@ def test_from_api_repr_defaults(self): self.assertEqual(field.field_type, "RECORD") self.assertEqual(field.mode, "NULLABLE") self.assertEqual(len(field.fields), 0) + self.assertEqual(field.default_value_expression, None) # Keys not present in API representation shouldn't be included in # _properties. @@ -527,12 +532,12 @@ def test___hash__not_equals(self): def test___repr__(self): field1 = self._make_one("field1", "STRING") - expected = "SchemaField('field1', 'STRING', 'NULLABLE', None, (), None)" + expected = "SchemaField('field1', 'STRING', 'NULLABLE', None, None, (), None)" self.assertEqual(repr(field1), expected) def test___repr__type_not_set(self): field1 = self._make_one("field1", field_type=None) - expected = "SchemaField('field1', None, 'NULLABLE', None, (), None)" + expected = "SchemaField('field1', None, 'NULLABLE', None, None, (), None)" self.assertEqual(repr(field1), expected) def test___repr__evaluable_no_policy_tags(self): From a214475e5df8ef0d97baecbef9dd617b5699db20 Mon Sep 17 00:00:00 2001 From: Walt Askew Date: Wed, 16 Nov 2022 05:44:06 -0800 Subject: [PATCH 1523/2016] feat: Add More Specific Type Annotations for Row Dictionaries (#1295) The keys must be strings as they represent column names. Update type annotations to reflect this. Co-authored-by: aribray <45905583+aribray@users.noreply.github.com> --- .../google-cloud-bigquery/google/cloud/bigquery/client.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py index 1200d78f9e0d..b72505a154f5 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py @@ -3349,10 +3349,10 @@ def query( def insert_rows( self, table: Union[Table, TableReference, str], - rows: Union[Iterable[Tuple], Iterable[Dict]], + rows: Union[Iterable[Tuple], Iterable[Mapping[str, Any]]], selected_fields: Sequence[SchemaField] = None, **kwargs, - ) -> Sequence[dict]: + ) -> Sequence[Dict[str, Any]]: """Insert rows into a table via the streaming API. See @@ -3470,7 +3470,7 @@ def insert_rows_from_dataframe( def insert_rows_json( self, table: Union[Table, TableReference, TableListItem, str], - json_rows: Sequence[Dict], + json_rows: Sequence[Mapping[str, Any]], row_ids: Union[ Iterable[Optional[str]], AutoRowIDs, None ] = AutoRowIDs.GENERATE_UUID, From 771bb0d9be0c6cd5be3e544c7de8820c63573935 Mon Sep 17 00:00:00 2001 From: Phillip Cloud <417981+cpcloud@users.noreply.github.com> Date: Thu, 17 Nov 2022 15:08:37 -0500 Subject: [PATCH 1524/2016] chore(setup.py): remove python upper bound (#1413) --- packages/google-cloud-bigquery/setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/setup.py b/packages/google-cloud-bigquery/setup.py index c8bf640c28a5..5fc694c6fab7 100644 --- a/packages/google-cloud-bigquery/setup.py +++ b/packages/google-cloud-bigquery/setup.py @@ -124,7 +124,7 @@ namespace_packages=namespaces, install_requires=dependencies, extras_require=extras, - python_requires=">=3.7, <3.11", + python_requires=">=3.7", include_package_data=True, zip_safe=False, ) From bf05a84b868bb7c57ec74e774afd5a167006ab7f Mon Sep 17 00:00:00 2001 From: "release-please[bot]" <55107282+release-please[bot]@users.noreply.github.com> Date: Fri, 18 Nov 2022 10:56:42 -0600 Subject: [PATCH 1525/2016] chore(main): release 3.4.0 (#1407) Co-authored-by: release-please[bot] <55107282+release-please[bot]@users.noreply.github.com> --- packages/google-cloud-bigquery/CHANGELOG.md | 9 +++++++++ .../google/cloud/bigquery/version.py | 2 +- 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/CHANGELOG.md b/packages/google-cloud-bigquery/CHANGELOG.md index 869d063e5b95..294e5b42fffe 100644 --- a/packages/google-cloud-bigquery/CHANGELOG.md +++ b/packages/google-cloud-bigquery/CHANGELOG.md @@ -5,6 +5,15 @@ [1]: https://pypi.org/project/google-cloud-bigquery/#history +## [3.4.0](https://github.com/googleapis/python-bigquery/compare/v3.3.6...v3.4.0) (2022-11-17) + + +### Features + +* Add `reference_file_schema_uri` to LoadJobConfig, ExternalConfig ([#1399](https://github.com/googleapis/python-bigquery/issues/1399)) ([931285f](https://github.com/googleapis/python-bigquery/commit/931285ff85842ab07a0ef2ff9db808181ea3c5e4)) +* Add default value expression ([#1408](https://github.com/googleapis/python-bigquery/issues/1408)) ([207aa50](https://github.com/googleapis/python-bigquery/commit/207aa506ab634bdb13256fa5bd8745ec9de23290)) +* Add More Specific Type Annotations for Row Dictionaries ([#1295](https://github.com/googleapis/python-bigquery/issues/1295)) ([eb49873](https://github.com/googleapis/python-bigquery/commit/eb49873176dee478617eb50472d44703abca53b5)) + ## [3.3.6](https://github.com/googleapis/python-bigquery/compare/v3.3.4...v3.3.6) (2022-11-02) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/version.py b/packages/google-cloud-bigquery/google/cloud/bigquery/version.py index 43360a201125..6b822f0c1d8e 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/version.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/version.py @@ -12,4 +12,4 @@ # See the License for the specific language governing permissions and # limitations under the License. -__version__ = "3.3.6" +__version__ = "3.4.0" From ffc7e498c6d5a86572ca6aa21bfc610b2288d484 Mon Sep 17 00:00:00 2001 From: aribray <45905583+aribray@users.noreply.github.com> Date: Fri, 18 Nov 2022 12:43:30 -0600 Subject: [PATCH 1526/2016] docs: add info about streaming quota limits to `insert_rows*` methods (#1409) * docs: add information about streaming quota limits (413: Payload Too Large) Co-authored-by: Anthonios Partheniou --- .../google/cloud/bigquery/client.py | 22 +++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py index b72505a154f5..1f3647e71ad8 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py @@ -3358,6 +3358,14 @@ def insert_rows( See https://cloud.google.com/bigquery/docs/reference/rest/v2/tabledata/insertAll + BigQuery will reject insertAll payloads that exceed a defined limit (10MB). + Additionally, if a payload vastly exceeds this limit, the request is rejected + by the intermediate architecture, which returns a 413 (Payload Too Large) status code. + + + See + https://cloud.google.com/bigquery/quotas#streaming_inserts + Args: table (Union[ \ google.cloud.bigquery.table.Table, \ @@ -3424,6 +3432,13 @@ def insert_rows_from_dataframe( ) -> Sequence[Sequence[dict]]: """Insert rows into a table from a dataframe via the streaming API. + BigQuery will reject insertAll payloads that exceed a defined limit (10MB). + Additionally, if a payload vastly exceeds this limit, the request is rejected + by the intermediate architecture, which returns a 413 (Payload Too Large) status code. + + See + https://cloud.google.com/bigquery/quotas#streaming_inserts + Args: table (Union[ \ google.cloud.bigquery.table.Table, \ @@ -3485,6 +3500,13 @@ def insert_rows_json( See https://cloud.google.com/bigquery/docs/reference/rest/v2/tabledata/insertAll + BigQuery will reject insertAll payloads that exceed a defined limit (10MB). + Additionally, if a payload vastly exceeds this limit, the request is rejected + by the intermediate architecture, which returns a 413 (Payload Too Large) status code. + + See + https://cloud.google.com/bigquery/quotas#streaming_inserts + Args: table (Union[ \ google.cloud.bigquery.table.Table \ From f635408315d9e2dbf5eafe5dd4b5cbb92e72c270 Mon Sep 17 00:00:00 2001 From: "gcf-owl-bot[bot]" <78513119+gcf-owl-bot[bot]@users.noreply.github.com> Date: Wed, 30 Nov 2022 10:37:11 -0500 Subject: [PATCH 1527/2016] chore(python): drop flake8-import-order in samples noxfile (#1424) Source-Link: https://github.com/googleapis/synthtool/commit/6ed3a831cb9ff69ef8a504c353e098ec0192ad93 Post-Processor: gcr.io/cloud-devrel-public-resources/owlbot-python:latest@sha256:3abfa0f1886adaf0b83f07cb117b24a639ea1cb9cffe56d43280b977033563eb Co-authored-by: Owl Bot --- .../.github/.OwlBot.lock.yaml | 2 +- .../.kokoro/docker/docs/Dockerfile | 12 +- .../.kokoro/requirements.in | 4 +- .../.kokoro/requirements.txt | 354 ++++++++++-------- .../samples/geography/noxfile.py | 26 +- .../samples/magics/noxfile.py | 26 +- .../samples/snippets/noxfile.py | 26 +- 7 files changed, 212 insertions(+), 238 deletions(-) diff --git a/packages/google-cloud-bigquery/.github/.OwlBot.lock.yaml b/packages/google-cloud-bigquery/.github/.OwlBot.lock.yaml index 3815c983cb16..bb21147e4c23 100644 --- a/packages/google-cloud-bigquery/.github/.OwlBot.lock.yaml +++ b/packages/google-cloud-bigquery/.github/.OwlBot.lock.yaml @@ -13,4 +13,4 @@ # limitations under the License. docker: image: gcr.io/cloud-devrel-public-resources/owlbot-python:latest - digest: sha256:7a40313731a7cb1454eef6b33d3446ebb121836738dc3ab3d2d3ded5268c35b6 + digest: sha256:3abfa0f1886adaf0b83f07cb117b24a639ea1cb9cffe56d43280b977033563eb diff --git a/packages/google-cloud-bigquery/.kokoro/docker/docs/Dockerfile b/packages/google-cloud-bigquery/.kokoro/docker/docs/Dockerfile index 238b87b9d1c9..f8137d0ae497 100644 --- a/packages/google-cloud-bigquery/.kokoro/docker/docs/Dockerfile +++ b/packages/google-cloud-bigquery/.kokoro/docker/docs/Dockerfile @@ -60,16 +60,16 @@ RUN apt-get update \ && rm -rf /var/lib/apt/lists/* \ && rm -f /var/cache/apt/archives/*.deb -###################### Install python 3.8.11 +###################### Install python 3.9.13 -# Download python 3.8.11 -RUN wget https://www.python.org/ftp/python/3.8.11/Python-3.8.11.tgz +# Download python 3.9.13 +RUN wget https://www.python.org/ftp/python/3.9.13/Python-3.9.13.tgz # Extract files -RUN tar -xvf Python-3.8.11.tgz +RUN tar -xvf Python-3.9.13.tgz -# Install python 3.8.11 -RUN ./Python-3.8.11/configure --enable-optimizations +# Install python 3.9.13 +RUN ./Python-3.9.13/configure --enable-optimizations RUN make altinstall ###################### Install pip diff --git a/packages/google-cloud-bigquery/.kokoro/requirements.in b/packages/google-cloud-bigquery/.kokoro/requirements.in index 7718391a34d7..cbd7e77f44db 100644 --- a/packages/google-cloud-bigquery/.kokoro/requirements.in +++ b/packages/google-cloud-bigquery/.kokoro/requirements.in @@ -5,4 +5,6 @@ typing-extensions twine wheel setuptools -nox \ No newline at end of file +nox +charset-normalizer<3 +click<8.1.0 diff --git a/packages/google-cloud-bigquery/.kokoro/requirements.txt b/packages/google-cloud-bigquery/.kokoro/requirements.txt index d15994bac93c..9c1b9be34e6b 100644 --- a/packages/google-cloud-bigquery/.kokoro/requirements.txt +++ b/packages/google-cloud-bigquery/.kokoro/requirements.txt @@ -20,9 +20,9 @@ cachetools==5.2.0 \ --hash=sha256:6a94c6402995a99c3970cc7e4884bb60b4a8639938157eeed436098bf9831757 \ --hash=sha256:f9f17d2aec496a9aa6b76f53e3b614c965223c061982d434d160f930c698a9db # via google-auth -certifi==2022.6.15 \ - --hash=sha256:84c85a9078b11105f04f3036a9482ae10e4621616db313fe045dd24743a0820d \ - --hash=sha256:fe86415d55e84719d75f8b69414f6438ac3547d2078ab91b67e779ef69378412 +certifi==2022.9.24 \ + --hash=sha256:0d9c601124e5a6ba9712dbc60d9c53c21e34f5f641fe83002317394311bdce14 \ + --hash=sha256:90c1a32f1d68f940488354e36370f6cca89f0f106db09518524c88d6ed83f382 # via requests cffi==1.15.1 \ --hash=sha256:00a9ed42e88df81ffae7a8ab6d9356b371399b91dbdf0c3cb1e84c03a13aceb5 \ @@ -93,11 +93,14 @@ cffi==1.15.1 \ charset-normalizer==2.1.1 \ --hash=sha256:5a3d016c7c547f69d6f81fb0db9449ce888b418b5b9952cc5e6e66843e9dd845 \ --hash=sha256:83e9a75d1911279afd89352c68b45348559d1fc0506b054b346651b5e7fee29f - # via requests + # via + # -r requirements.in + # requests click==8.0.4 \ --hash=sha256:6a7a62563bbfabfda3a38f3023a1db4a35978c0abd76f6c9605ecd6554d6d9b1 \ --hash=sha256:8458d7b1287c5fb128c90e23381cf99dcde74beaf6c7ff6384ce84d6fe090adb # via + # -r requirements.in # gcp-docuploader # gcp-releasetool colorlog==6.7.0 \ @@ -110,29 +113,33 @@ commonmark==0.9.1 \ --hash=sha256:452f9dc859be7f06631ddcb328b6919c67984aca654e5fefb3914d54691aed60 \ --hash=sha256:da2f38c92590f83de410ba1a3cbceafbc74fee9def35f9251ba9a971d6d66fd9 # via rich -cryptography==37.0.4 \ - --hash=sha256:190f82f3e87033821828f60787cfa42bff98404483577b591429ed99bed39d59 \ - --hash=sha256:2be53f9f5505673eeda5f2736bea736c40f051a739bfae2f92d18aed1eb54596 \ - --hash=sha256:30788e070800fec9bbcf9faa71ea6d8068f5136f60029759fd8c3efec3c9dcb3 \ - --hash=sha256:3d41b965b3380f10e4611dbae366f6dc3cefc7c9ac4e8842a806b9672ae9add5 \ - --hash=sha256:4c590ec31550a724ef893c50f9a97a0c14e9c851c85621c5650d699a7b88f7ab \ - --hash=sha256:549153378611c0cca1042f20fd9c5030d37a72f634c9326e225c9f666d472884 \ - --hash=sha256:63f9c17c0e2474ccbebc9302ce2f07b55b3b3fcb211ded18a42d5764f5c10a82 \ - --hash=sha256:6bc95ed67b6741b2607298f9ea4932ff157e570ef456ef7ff0ef4884a134cc4b \ - --hash=sha256:7099a8d55cd49b737ffc99c17de504f2257e3787e02abe6d1a6d136574873441 \ - --hash=sha256:75976c217f10d48a8b5a8de3d70c454c249e4b91851f6838a4e48b8f41eb71aa \ - --hash=sha256:7bc997818309f56c0038a33b8da5c0bfbb3f1f067f315f9abd6fc07ad359398d \ - --hash=sha256:80f49023dd13ba35f7c34072fa17f604d2f19bf0989f292cedf7ab5770b87a0b \ - --hash=sha256:91ce48d35f4e3d3f1d83e29ef4a9267246e6a3be51864a5b7d2247d5086fa99a \ - --hash=sha256:a958c52505c8adf0d3822703078580d2c0456dd1d27fabfb6f76fe63d2971cd6 \ - --hash=sha256:b62439d7cd1222f3da897e9a9fe53bbf5c104fff4d60893ad1355d4c14a24157 \ - --hash=sha256:b7f8dd0d4c1f21759695c05a5ec8536c12f31611541f8904083f3dc582604280 \ - --hash=sha256:d204833f3c8a33bbe11eda63a54b1aad7aa7456ed769a982f21ec599ba5fa282 \ - --hash=sha256:e007f052ed10cc316df59bc90fbb7ff7950d7e2919c9757fd42a2b8ecf8a5f67 \ - --hash=sha256:f2dcb0b3b63afb6df7fd94ec6fbddac81b5492513f7b0436210d390c14d46ee8 \ - --hash=sha256:f721d1885ecae9078c3f6bbe8a88bc0786b6e749bf32ccec1ef2b18929a05046 \ - --hash=sha256:f7a6de3e98771e183645181b3627e2563dcde3ce94a9e42a3f427d2255190327 \ - --hash=sha256:f8c0a6e9e1dd3eb0414ba320f85da6b0dcbd543126e30fcc546e7372a7fbf3b9 +cryptography==38.0.3 \ + --hash=sha256:068147f32fa662c81aebab95c74679b401b12b57494872886eb5c1139250ec5d \ + --hash=sha256:06fc3cc7b6f6cca87bd56ec80a580c88f1da5306f505876a71c8cfa7050257dd \ + --hash=sha256:25c1d1f19729fb09d42e06b4bf9895212292cb27bb50229f5aa64d039ab29146 \ + --hash=sha256:402852a0aea73833d982cabb6d0c3bb582c15483d29fb7085ef2c42bfa7e38d7 \ + --hash=sha256:4e269dcd9b102c5a3d72be3c45d8ce20377b8076a43cbed6f660a1afe365e436 \ + --hash=sha256:5419a127426084933076132d317911e3c6eb77568a1ce23c3ac1e12d111e61e0 \ + --hash=sha256:554bec92ee7d1e9d10ded2f7e92a5d70c1f74ba9524947c0ba0c850c7b011828 \ + --hash=sha256:5e89468fbd2fcd733b5899333bc54d0d06c80e04cd23d8c6f3e0542358c6060b \ + --hash=sha256:65535bc550b70bd6271984d9863a37741352b4aad6fb1b3344a54e6950249b55 \ + --hash=sha256:6ab9516b85bebe7aa83f309bacc5f44a61eeb90d0b4ec125d2d003ce41932d36 \ + --hash=sha256:6addc3b6d593cd980989261dc1cce38263c76954d758c3c94de51f1e010c9a50 \ + --hash=sha256:728f2694fa743a996d7784a6194da430f197d5c58e2f4e278612b359f455e4a2 \ + --hash=sha256:785e4056b5a8b28f05a533fab69febf5004458e20dad7e2e13a3120d8ecec75a \ + --hash=sha256:78cf5eefac2b52c10398a42765bfa981ce2372cbc0457e6bf9658f41ec3c41d8 \ + --hash=sha256:7f836217000342d448e1c9a342e9163149e45d5b5eca76a30e84503a5a96cab0 \ + --hash=sha256:8d41a46251bf0634e21fac50ffd643216ccecfaf3701a063257fe0b2be1b6548 \ + --hash=sha256:984fe150f350a3c91e84de405fe49e688aa6092b3525f407a18b9646f6612320 \ + --hash=sha256:9b24bcff7853ed18a63cfb0c2b008936a9554af24af2fb146e16d8e1aed75748 \ + --hash=sha256:b1b35d9d3a65542ed2e9d90115dfd16bbc027b3f07ee3304fc83580f26e43249 \ + --hash=sha256:b1b52c9e5f8aa2b802d48bd693190341fae201ea51c7a167d69fc48b60e8a959 \ + --hash=sha256:bbf203f1a814007ce24bd4d51362991d5cb90ba0c177a9c08825f2cc304d871f \ + --hash=sha256:be243c7e2bfcf6cc4cb350c0d5cdf15ca6383bbcb2a8ef51d3c9411a9d4386f0 \ + --hash=sha256:bfbe6ee19615b07a98b1d2287d6a6073f734735b49ee45b11324d85efc4d5cbd \ + --hash=sha256:c46837ea467ed1efea562bbeb543994c2d1f6e800785bd5a2c98bc096f5cb220 \ + --hash=sha256:dfb4f4dd568de1b6af9f4cda334adf7d72cf5bc052516e1b2608b683375dd95c \ + --hash=sha256:ed7b00096790213e09eb11c97cc6e2b757f15f3d2f85833cd2d3ec3fe37c1722 # via # gcp-releasetool # secretstorage @@ -148,23 +155,23 @@ filelock==3.8.0 \ --hash=sha256:55447caa666f2198c5b6b13a26d2084d26fa5b115c00d065664b2124680c4edc \ --hash=sha256:617eb4e5eedc82fc5f47b6d61e4d11cb837c56cb4544e39081099fa17ad109d4 # via virtualenv -gcp-docuploader==0.6.3 \ - --hash=sha256:ba8c9d76b3bbac54b0311c503a373b00edc2dc02d6d54ea9507045adb8e870f7 \ - --hash=sha256:c0f5aaa82ce1854a386197e4e359b120ad6d4e57ae2c812fce42219a3288026b +gcp-docuploader==0.6.4 \ + --hash=sha256:01486419e24633af78fd0167db74a2763974765ee8078ca6eb6964d0ebd388af \ + --hash=sha256:70861190c123d907b3b067da896265ead2eeb9263969d6955c9e0bb091b5ccbf # via -r requirements.in -gcp-releasetool==1.8.7 \ - --hash=sha256:3d2a67c9db39322194afb3b427e9cb0476ce8f2a04033695f0aeb63979fc2b37 \ - --hash=sha256:5e4d28f66e90780d77f3ecf1e9155852b0c3b13cbccb08ab07e66b2357c8da8d +gcp-releasetool==1.10.0 \ + --hash=sha256:72a38ca91b59c24f7e699e9227c90cbe4dd71b789383cb0164b088abae294c83 \ + --hash=sha256:8c7c99320208383d4bb2b808c6880eb7a81424afe7cdba3c8d84b25f4f0e097d # via -r requirements.in -google-api-core==2.8.2 \ - --hash=sha256:06f7244c640322b508b125903bb5701bebabce8832f85aba9335ec00b3d02edc \ - --hash=sha256:93c6a91ccac79079ac6bbf8b74ee75db970cc899278b97d53bc012f35908cf50 +google-api-core==2.10.2 \ + --hash=sha256:10c06f7739fe57781f87523375e8e1a3a4674bf6392cd6131a3222182b971320 \ + --hash=sha256:34f24bd1d5f72a8c4519773d99ca6bf080a6c4e041b4e9f024fe230191dda62e # via # google-cloud-core # google-cloud-storage -google-auth==2.11.0 \ - --hash=sha256:be62acaae38d0049c21ca90f27a23847245c9f161ff54ede13af2cb6afecbac9 \ - --hash=sha256:ed65ecf9f681832298e29328e1ef0a3676e3732b2e56f41532d45f70a22de0fb +google-auth==2.14.1 \ + --hash=sha256:ccaa901f31ad5cbb562615eb8b664b3dd0bf5404a67618e642307f00613eda4d \ + --hash=sha256:f5d8701633bebc12e0deea4df8abd8aff31c28b355360597f7f2ee60f2e4d016 # via # gcp-releasetool # google-api-core @@ -174,76 +181,102 @@ google-cloud-core==2.3.2 \ --hash=sha256:8417acf6466be2fa85123441696c4badda48db314c607cf1e5d543fa8bdc22fe \ --hash=sha256:b9529ee7047fd8d4bf4a2182de619154240df17fbe60ead399078c1ae152af9a # via google-cloud-storage -google-cloud-storage==2.5.0 \ - --hash=sha256:19a26c66c317ce542cea0830b7e787e8dac2588b6bfa4d3fd3b871ba16305ab0 \ - --hash=sha256:382f34b91de2212e3c2e7b40ec079d27ee2e3dbbae99b75b1bcd8c63063ce235 +google-cloud-storage==2.6.0 \ + --hash=sha256:104ca28ae61243b637f2f01455cc8a05e8f15a2a18ced96cb587241cdd3820f5 \ + --hash=sha256:4ad0415ff61abdd8bb2ae81c1f8f7ec7d91a1011613f2db87c614c550f97bfe9 # via gcp-docuploader -google-crc32c==1.3.0 \ - --hash=sha256:04e7c220798a72fd0f08242bc8d7a05986b2a08a0573396187fd32c1dcdd58b3 \ - --hash=sha256:05340b60bf05b574159e9bd940152a47d38af3fb43803ffe71f11d704b7696a6 \ - --hash=sha256:12674a4c3b56b706153a358eaa1018c4137a5a04635b92b4652440d3d7386206 \ - --hash=sha256:127f9cc3ac41b6a859bd9dc4321097b1a4f6aa7fdf71b4f9227b9e3ebffb4422 \ - --hash=sha256:13af315c3a0eec8bb8b8d80b8b128cb3fcd17d7e4edafc39647846345a3f003a \ - --hash=sha256:1926fd8de0acb9d15ee757175ce7242e235482a783cd4ec711cc999fc103c24e \ - --hash=sha256:226f2f9b8e128a6ca6a9af9b9e8384f7b53a801907425c9a292553a3a7218ce0 \ - --hash=sha256:276de6273eb074a35bc598f8efbc00c7869c5cf2e29c90748fccc8c898c244df \ - --hash=sha256:318f73f5484b5671f0c7f5f63741ab020a599504ed81d209b5c7129ee4667407 \ - --hash=sha256:3bbce1be3687bbfebe29abdb7631b83e6b25da3f4e1856a1611eb21854b689ea \ - --hash=sha256:42ae4781333e331a1743445931b08ebdad73e188fd554259e772556fc4937c48 \ - --hash=sha256:58be56ae0529c664cc04a9c76e68bb92b091e0194d6e3c50bea7e0f266f73713 \ - --hash=sha256:5da2c81575cc3ccf05d9830f9e8d3c70954819ca9a63828210498c0774fda1a3 \ - --hash=sha256:6311853aa2bba4064d0c28ca54e7b50c4d48e3de04f6770f6c60ebda1e975267 \ - --hash=sha256:650e2917660e696041ab3dcd7abac160b4121cd9a484c08406f24c5964099829 \ - --hash=sha256:6a4db36f9721fdf391646685ecffa404eb986cbe007a3289499020daf72e88a2 \ - --hash=sha256:779cbf1ce375b96111db98fca913c1f5ec11b1d870e529b1dc7354b2681a8c3a \ - --hash=sha256:7f6fe42536d9dcd3e2ffb9d3053f5d05221ae3bbcefbe472bdf2c71c793e3183 \ - --hash=sha256:891f712ce54e0d631370e1f4997b3f182f3368179198efc30d477c75d1f44942 \ - --hash=sha256:95c68a4b9b7828ba0428f8f7e3109c5d476ca44996ed9a5f8aac6269296e2d59 \ - --hash=sha256:96a8918a78d5d64e07c8ea4ed2bc44354e3f93f46a4866a40e8db934e4c0d74b \ - --hash=sha256:9c3cf890c3c0ecfe1510a452a165431b5831e24160c5fcf2071f0f85ca5a47cd \ - --hash=sha256:9f58099ad7affc0754ae42e6d87443299f15d739b0ce03c76f515153a5cda06c \ - --hash=sha256:a0b9e622c3b2b8d0ce32f77eba617ab0d6768b82836391e4f8f9e2074582bf02 \ - --hash=sha256:a7f9cbea4245ee36190f85fe1814e2d7b1e5f2186381b082f5d59f99b7f11328 \ - --hash=sha256:bab4aebd525218bab4ee615786c4581952eadc16b1ff031813a2fd51f0cc7b08 \ - --hash=sha256:c124b8c8779bf2d35d9b721e52d4adb41c9bfbde45e6a3f25f0820caa9aba73f \ - --hash=sha256:c9da0a39b53d2fab3e5467329ed50e951eb91386e9d0d5b12daf593973c3b168 \ - --hash=sha256:ca60076c388728d3b6ac3846842474f4250c91efbfe5afa872d3ffd69dd4b318 \ - --hash=sha256:cb6994fff247987c66a8a4e550ef374671c2b82e3c0d2115e689d21e511a652d \ - --hash=sha256:d1c1d6236feab51200272d79b3d3e0f12cf2cbb12b208c835b175a21efdb0a73 \ - --hash=sha256:dd7760a88a8d3d705ff562aa93f8445ead54f58fd482e4f9e2bafb7e177375d4 \ - --hash=sha256:dda4d8a3bb0b50f540f6ff4b6033f3a74e8bf0bd5320b70fab2c03e512a62812 \ - --hash=sha256:e0f1ff55dde0ebcfbef027edc21f71c205845585fffe30d4ec4979416613e9b3 \ - --hash=sha256:e7a539b9be7b9c00f11ef16b55486141bc2cdb0c54762f84e3c6fc091917436d \ - --hash=sha256:eb0b14523758e37802f27b7f8cd973f5f3d33be7613952c0df904b68c4842f0e \ - --hash=sha256:ed447680ff21c14aaceb6a9f99a5f639f583ccfe4ce1a5e1d48eb41c3d6b3217 \ - --hash=sha256:f52a4ad2568314ee713715b1e2d79ab55fab11e8b304fd1462ff5cccf4264b3e \ - --hash=sha256:fbd60c6aaa07c31d7754edbc2334aef50601b7f1ada67a96eb1eb57c7c72378f \ - --hash=sha256:fc28e0db232c62ca0c3600884933178f0825c99be4474cdd645e378a10588125 \ - --hash=sha256:fe31de3002e7b08eb20823b3735b97c86c5926dd0581c7710a680b418a8709d4 \ - --hash=sha256:fec221a051150eeddfdfcff162e6db92c65ecf46cb0f7bb1bf812a1520ec026b \ - --hash=sha256:ff71073ebf0e42258a42a0b34f2c09ec384977e7f6808999102eedd5b49920e3 +google-crc32c==1.5.0 \ + --hash=sha256:024894d9d3cfbc5943f8f230e23950cd4906b2fe004c72e29b209420a1e6b05a \ + --hash=sha256:02c65b9817512edc6a4ae7c7e987fea799d2e0ee40c53ec573a692bee24de876 \ + --hash=sha256:02ebb8bf46c13e36998aeaad1de9b48f4caf545e91d14041270d9dca767b780c \ + --hash=sha256:07eb3c611ce363c51a933bf6bd7f8e3878a51d124acfc89452a75120bc436289 \ + --hash=sha256:1034d91442ead5a95b5aaef90dbfaca8633b0247d1e41621d1e9f9db88c36298 \ + --hash=sha256:116a7c3c616dd14a3de8c64a965828b197e5f2d121fedd2f8c5585c547e87b02 \ + --hash=sha256:19e0a019d2c4dcc5e598cd4a4bc7b008546b0358bd322537c74ad47a5386884f \ + --hash=sha256:1c7abdac90433b09bad6c43a43af253e688c9cfc1c86d332aed13f9a7c7f65e2 \ + --hash=sha256:1e986b206dae4476f41bcec1faa057851f3889503a70e1bdb2378d406223994a \ + --hash=sha256:272d3892a1e1a2dbc39cc5cde96834c236d5327e2122d3aaa19f6614531bb6eb \ + --hash=sha256:278d2ed7c16cfc075c91378c4f47924c0625f5fc84b2d50d921b18b7975bd210 \ + --hash=sha256:2ad40e31093a4af319dadf503b2467ccdc8f67c72e4bcba97f8c10cb078207b5 \ + --hash=sha256:2e920d506ec85eb4ba50cd4228c2bec05642894d4c73c59b3a2fe20346bd00ee \ + --hash=sha256:3359fc442a743e870f4588fcf5dcbc1bf929df1fad8fb9905cd94e5edb02e84c \ + --hash=sha256:37933ec6e693e51a5b07505bd05de57eee12f3e8c32b07da7e73669398e6630a \ + --hash=sha256:398af5e3ba9cf768787eef45c803ff9614cc3e22a5b2f7d7ae116df8b11e3314 \ + --hash=sha256:3b747a674c20a67343cb61d43fdd9207ce5da6a99f629c6e2541aa0e89215bcd \ + --hash=sha256:461665ff58895f508e2866824a47bdee72497b091c730071f2b7575d5762ab65 \ + --hash=sha256:4c6fdd4fccbec90cc8a01fc00773fcd5fa28db683c116ee3cb35cd5da9ef6c37 \ + --hash=sha256:5829b792bf5822fd0a6f6eb34c5f81dd074f01d570ed7f36aa101d6fc7a0a6e4 \ + --hash=sha256:596d1f98fc70232fcb6590c439f43b350cb762fb5d61ce7b0e9db4539654cc13 \ + --hash=sha256:5ae44e10a8e3407dbe138984f21e536583f2bba1be9491239f942c2464ac0894 \ + --hash=sha256:635f5d4dd18758a1fbd1049a8e8d2fee4ffed124462d837d1a02a0e009c3ab31 \ + --hash=sha256:64e52e2b3970bd891309c113b54cf0e4384762c934d5ae56e283f9a0afcd953e \ + --hash=sha256:66741ef4ee08ea0b2cc3c86916ab66b6aef03768525627fd6a1b34968b4e3709 \ + --hash=sha256:67b741654b851abafb7bc625b6d1cdd520a379074e64b6a128e3b688c3c04740 \ + --hash=sha256:6ac08d24c1f16bd2bf5eca8eaf8304812f44af5cfe5062006ec676e7e1d50afc \ + --hash=sha256:6f998db4e71b645350b9ac28a2167e6632c239963ca9da411523bb439c5c514d \ + --hash=sha256:72218785ce41b9cfd2fc1d6a017dc1ff7acfc4c17d01053265c41a2c0cc39b8c \ + --hash=sha256:74dea7751d98034887dbd821b7aae3e1d36eda111d6ca36c206c44478035709c \ + --hash=sha256:759ce4851a4bb15ecabae28f4d2e18983c244eddd767f560165563bf9aefbc8d \ + --hash=sha256:77e2fd3057c9d78e225fa0a2160f96b64a824de17840351b26825b0848022906 \ + --hash=sha256:7c074fece789b5034b9b1404a1f8208fc2d4c6ce9decdd16e8220c5a793e6f61 \ + --hash=sha256:7c42c70cd1d362284289c6273adda4c6af8039a8ae12dc451dcd61cdabb8ab57 \ + --hash=sha256:7f57f14606cd1dd0f0de396e1e53824c371e9544a822648cd76c034d209b559c \ + --hash=sha256:83c681c526a3439b5cf94f7420471705bbf96262f49a6fe546a6db5f687a3d4a \ + --hash=sha256:8485b340a6a9e76c62a7dce3c98e5f102c9219f4cfbf896a00cf48caf078d438 \ + --hash=sha256:84e6e8cd997930fc66d5bb4fde61e2b62ba19d62b7abd7a69920406f9ecca946 \ + --hash=sha256:89284716bc6a5a415d4eaa11b1726d2d60a0cd12aadf5439828353662ede9dd7 \ + --hash=sha256:8b87e1a59c38f275c0e3676fc2ab6d59eccecfd460be267ac360cc31f7bcde96 \ + --hash=sha256:8f24ed114432de109aa9fd317278518a5af2d31ac2ea6b952b2f7782b43da091 \ + --hash=sha256:98cb4d057f285bd80d8778ebc4fde6b4d509ac3f331758fb1528b733215443ae \ + --hash=sha256:998679bf62b7fb599d2878aa3ed06b9ce688b8974893e7223c60db155f26bd8d \ + --hash=sha256:9ba053c5f50430a3fcfd36f75aff9caeba0440b2d076afdb79a318d6ca245f88 \ + --hash=sha256:9c99616c853bb585301df6de07ca2cadad344fd1ada6d62bb30aec05219c45d2 \ + --hash=sha256:a1fd716e7a01f8e717490fbe2e431d2905ab8aa598b9b12f8d10abebb36b04dd \ + --hash=sha256:a2355cba1f4ad8b6988a4ca3feed5bff33f6af2d7f134852cf279c2aebfde541 \ + --hash=sha256:b1f8133c9a275df5613a451e73f36c2aea4fe13c5c8997e22cf355ebd7bd0728 \ + --hash=sha256:b8667b48e7a7ef66afba2c81e1094ef526388d35b873966d8a9a447974ed9178 \ + --hash=sha256:ba1eb1843304b1e5537e1fca632fa894d6f6deca8d6389636ee5b4797affb968 \ + --hash=sha256:be82c3c8cfb15b30f36768797a640e800513793d6ae1724aaaafe5bf86f8f346 \ + --hash=sha256:c02ec1c5856179f171e032a31d6f8bf84e5a75c45c33b2e20a3de353b266ebd8 \ + --hash=sha256:c672d99a345849301784604bfeaeba4db0c7aae50b95be04dd651fd2a7310b93 \ + --hash=sha256:c6c777a480337ac14f38564ac88ae82d4cd238bf293f0a22295b66eb89ffced7 \ + --hash=sha256:cae0274952c079886567f3f4f685bcaf5708f0a23a5f5216fdab71f81a6c0273 \ + --hash=sha256:cd67cf24a553339d5062eff51013780a00d6f97a39ca062781d06b3a73b15462 \ + --hash=sha256:d3515f198eaa2f0ed49f8819d5732d70698c3fa37384146079b3799b97667a94 \ + --hash=sha256:d5280312b9af0976231f9e317c20e4a61cd2f9629b7bfea6a693d1878a264ebd \ + --hash=sha256:de06adc872bcd8c2a4e0dc51250e9e65ef2ca91be023b9d13ebd67c2ba552e1e \ + --hash=sha256:e1674e4307fa3024fc897ca774e9c7562c957af85df55efe2988ed9056dc4e57 \ + --hash=sha256:e2096eddb4e7c7bdae4bd69ad364e55e07b8316653234a56552d9c988bd2d61b \ + --hash=sha256:e560628513ed34759456a416bf86b54b2476c59144a9138165c9a1575801d0d9 \ + --hash=sha256:edfedb64740750e1a3b16152620220f51d58ff1b4abceb339ca92e934775c27a \ + --hash=sha256:f13cae8cc389a440def0c8c52057f37359014ccbc9dc1f0827936bcd367c6100 \ + --hash=sha256:f314013e7dcd5cf45ab1945d92e713eec788166262ae8deb2cfacd53def27325 \ + --hash=sha256:f583edb943cf2e09c60441b910d6a20b4d9d626c75a36c8fcac01a6c96c01183 \ + --hash=sha256:fd8536e902db7e365f49e7d9029283403974ccf29b13fc7028b97e2295b33556 \ + --hash=sha256:fe70e325aa68fa4b5edf7d1a4b6f691eb04bbccac0ace68e34820d283b5f80d4 # via google-resumable-media -google-resumable-media==2.3.3 \ - --hash=sha256:27c52620bd364d1c8116eaac4ea2afcbfb81ae9139fb3199652fcac1724bfb6c \ - --hash=sha256:5b52774ea7a829a8cdaa8bd2d4c3d4bc660c91b30857ab2668d0eb830f4ea8c5 +google-resumable-media==2.4.0 \ + --hash=sha256:2aa004c16d295c8f6c33b2b4788ba59d366677c0a25ae7382436cb30f776deaa \ + --hash=sha256:8d5518502f92b9ecc84ac46779bd4f09694ecb3ba38a3e7ca737a86d15cbca1f # via google-cloud-storage -googleapis-common-protos==1.56.4 \ - --hash=sha256:8eb2cbc91b69feaf23e32452a7ae60e791e09967d81d4fcc7fc388182d1bd394 \ - --hash=sha256:c25873c47279387cfdcbdafa36149887901d36202cb645a0e4f29686bf6e4417 +googleapis-common-protos==1.57.0 \ + --hash=sha256:27a849d6205838fb6cc3c1c21cb9800707a661bb21c6ce7fb13e99eb1f8a0c46 \ + --hash=sha256:a9f4a1d7f6d9809657b7f1316a1aa527f6664891531bcfcc13b6696e685f443c # via google-api-core -idna==3.3 \ - --hash=sha256:84d9dd047ffa80596e0f246e2eab0b391788b0503584e8945f2368256d2735ff \ - --hash=sha256:9d643ff0a55b762d5cdb124b8eaa99c66322e2157b69160bc32796e824360e6d +idna==3.4 \ + --hash=sha256:814f528e8dead7d329833b91c5faa87d60bf71824cd12a7530b5526063d02cb4 \ + --hash=sha256:90b77e79eaa3eba6de819a0c442c0b4ceefc341a7a2ab77d7562bf49f425c5c2 # via requests -importlib-metadata==4.12.0 \ - --hash=sha256:637245b8bab2b6502fcbc752cc4b7a6f6243bb02b31c5c26156ad103d3d45670 \ - --hash=sha256:7401a975809ea1fdc658c3aa4f78cc2195a0e019c5cbc4c06122884e9ae80c23 +importlib-metadata==5.0.0 \ + --hash=sha256:da31db32b304314d044d3c12c79bd59e307889b287ad12ff387b3500835fc2ab \ + --hash=sha256:ddb0e35065e8938f867ed4928d0ae5bf2a53b7773871bfe6bcc7e4fcdc7dea43 # via # -r requirements.in + # keyring # twine -jaraco-classes==3.2.2 \ - --hash=sha256:6745f113b0b588239ceb49532aa09c3ebb947433ce311ef2f8e3ad64ebb74594 \ - --hash=sha256:e6ef6fd3fcf4579a7a019d87d1e56a883f4e4c35cfe925f86731abc58804e647 +jaraco-classes==3.2.3 \ + --hash=sha256:2353de3288bc6b82120752201c6b1c1a14b058267fa424ed5ce5984e3b922158 \ + --hash=sha256:89559fa5c1d3c34eff6f631ad80bb21f378dbcbb35dd161fd2c6b93f5be2f98a # via keyring jeepney==0.8.0 \ --hash=sha256:5efe48d255973902f6badc3ce55e2aa6c5c3b3bc642059ef3a91247bcfcc5806 \ @@ -255,9 +288,9 @@ jinja2==3.1.2 \ --hash=sha256:31351a702a408a9e7595a8fc6150fc3f43bb6bf7e319770cbc0db9df9437e852 \ --hash=sha256:6088930bfe239f0e6710546ab9c19c9ef35e29792895fed6e6e31a023a182a61 # via gcp-releasetool -keyring==23.9.0 \ - --hash=sha256:4c32a31174faaee48f43a7e2c7e9c3216ec5e95acf22a2bebfb4a1d05056ee44 \ - --hash=sha256:98f060ec95ada2ab910c195a2d4317be6ef87936a766b239c46aa3c7aac4f0db +keyring==23.11.0 \ + --hash=sha256:3dd30011d555f1345dec2c262f0153f2f0ca6bca041fb1dc4588349bb4c0ac1e \ + --hash=sha256:ad192263e2cdd5f12875dedc2da13534359a7e760e77f8d04b50968a821c2361 # via # gcp-releasetool # twine @@ -303,9 +336,9 @@ markupsafe==2.1.1 \ --hash=sha256:f121a1420d4e173a5d96e47e9a0c0dcff965afdf1626d28de1460815f7c4ee7a \ --hash=sha256:fc7b548b17d238737688817ab67deebb30e8073c95749d55538ed473130ec0c7 # via jinja2 -more-itertools==8.14.0 \ - --hash=sha256:1bc4f91ee5b1b31ac7ceacc17c09befe6a40a503907baf9c839c229b5095cfd2 \ - --hash=sha256:c09443cd3d5438b8dafccd867a6bc1cb0894389e90cb53d227456b0b0bccb750 +more-itertools==9.0.0 \ + --hash=sha256:250e83d7e81d0c87ca6bd942e6aeab8cc9daa6096d12c5308f3f92fa5e5c1f41 \ + --hash=sha256:5a6257e40878ef0520b1803990e3e22303a41b5714006c32a3fd8304b26ea1ab # via jaraco-classes nox==2022.8.7 \ --hash=sha256:1b894940551dc5c389f9271d197ca5d655d40bdc6ccf93ed6880e4042760a34b \ @@ -321,34 +354,33 @@ pkginfo==1.8.3 \ --hash=sha256:848865108ec99d4901b2f7e84058b6e7660aae8ae10164e015a6dcf5b242a594 \ --hash=sha256:a84da4318dd86f870a9447a8c98340aa06216bfc6f2b7bdc4b8766984ae1867c # via twine -platformdirs==2.5.2 \ - --hash=sha256:027d8e83a2d7de06bbac4e5ef7e023c02b863d7ea5d079477e722bb41ab25788 \ - --hash=sha256:58c8abb07dcb441e6ee4b11d8df0ac856038f944ab98b7be6b27b2a3c7feef19 +platformdirs==2.5.4 \ + --hash=sha256:1006647646d80f16130f052404c6b901e80ee4ed6bef6792e1f238a8969106f7 \ + --hash=sha256:af0276409f9a02373d540bf8480021a048711d572745aef4b7842dad245eba10 # via virtualenv -protobuf==3.20.2 \ - --hash=sha256:03d76b7bd42ac4a6e109742a4edf81ffe26ffd87c5993126d894fe48a120396a \ - --hash=sha256:09e25909c4297d71d97612f04f41cea8fa8510096864f2835ad2f3b3df5a5559 \ - --hash=sha256:18e34a10ae10d458b027d7638a599c964b030c1739ebd035a1dfc0e22baa3bfe \ - --hash=sha256:291fb4307094bf5ccc29f424b42268640e00d5240bf0d9b86bf3079f7576474d \ - --hash=sha256:2c0b040d0b5d5d207936ca2d02f00f765906622c07d3fa19c23a16a8ca71873f \ - --hash=sha256:384164994727f274cc34b8abd41a9e7e0562801361ee77437099ff6dfedd024b \ - --hash=sha256:3cb608e5a0eb61b8e00fe641d9f0282cd0eedb603be372f91f163cbfbca0ded0 \ - --hash=sha256:5d9402bf27d11e37801d1743eada54372f986a372ec9679673bfcc5c60441151 \ - --hash=sha256:712dca319eee507a1e7df3591e639a2b112a2f4a62d40fe7832a16fd19151750 \ - --hash=sha256:7a5037af4e76c975b88c3becdf53922b5ffa3f2cddf657574a4920a3b33b80f3 \ - --hash=sha256:8228e56a865c27163d5d1d1771d94b98194aa6917bcfb6ce139cbfa8e3c27334 \ - --hash=sha256:84a1544252a933ef07bb0b5ef13afe7c36232a774affa673fc3636f7cee1db6c \ - --hash=sha256:84fe5953b18a383fd4495d375fe16e1e55e0a3afe7b4f7b4d01a3a0649fcda9d \ - --hash=sha256:9c673c8bfdf52f903081816b9e0e612186684f4eb4c17eeb729133022d6032e3 \ - --hash=sha256:9f876a69ca55aed879b43c295a328970306e8e80a263ec91cf6e9189243c613b \ - --hash=sha256:a9e5ae5a8e8985c67e8944c23035a0dff2c26b0f5070b2f55b217a1c33bbe8b1 \ - --hash=sha256:b4fdb29c5a7406e3f7ef176b2a7079baa68b5b854f364c21abe327bbeec01cdb \ - --hash=sha256:c184485e0dfba4dfd451c3bd348c2e685d6523543a0f91b9fd4ae90eb09e8422 \ - --hash=sha256:c9cdf251c582c16fd6a9f5e95836c90828d51b0069ad22f463761d27c6c19019 \ - --hash=sha256:e39cf61bb8582bda88cdfebc0db163b774e7e03364bbf9ce1ead13863e81e359 \ - --hash=sha256:e8fbc522303e09036c752a0afcc5c0603e917222d8bedc02813fd73b4b4ed804 \ - --hash=sha256:f34464ab1207114e73bba0794d1257c150a2b89b7a9faf504e00af7c9fd58978 \ - --hash=sha256:f52dabc96ca99ebd2169dadbe018824ebda08a795c7684a0b7d203a290f3adb0 +protobuf==3.20.3 \ + --hash=sha256:03038ac1cfbc41aa21f6afcbcd357281d7521b4157926f30ebecc8d4ea59dcb7 \ + --hash=sha256:28545383d61f55b57cf4df63eebd9827754fd2dc25f80c5253f9184235db242c \ + --hash=sha256:2e3427429c9cffebf259491be0af70189607f365c2f41c7c3764af6f337105f2 \ + --hash=sha256:398a9e0c3eaceb34ec1aee71894ca3299605fa8e761544934378bbc6c97de23b \ + --hash=sha256:44246bab5dd4b7fbd3c0c80b6f16686808fab0e4aca819ade6e8d294a29c7050 \ + --hash=sha256:447d43819997825d4e71bf5769d869b968ce96848b6479397e29fc24c4a5dfe9 \ + --hash=sha256:67a3598f0a2dcbc58d02dd1928544e7d88f764b47d4a286202913f0b2801c2e7 \ + --hash=sha256:74480f79a023f90dc6e18febbf7b8bac7508420f2006fabd512013c0c238f454 \ + --hash=sha256:819559cafa1a373b7096a482b504ae8a857c89593cf3a25af743ac9ecbd23480 \ + --hash=sha256:899dc660cd599d7352d6f10d83c95df430a38b410c1b66b407a6b29265d66469 \ + --hash=sha256:8c0c984a1b8fef4086329ff8dd19ac77576b384079247c770f29cc8ce3afa06c \ + --hash=sha256:9aae4406ea63d825636cc11ffb34ad3379335803216ee3a856787bcf5ccc751e \ + --hash=sha256:a7ca6d488aa8ff7f329d4c545b2dbad8ac31464f1d8b1c87ad1346717731e4db \ + --hash=sha256:b6cc7ba72a8850621bfec987cb72623e703b7fe2b9127a161ce61e61558ad905 \ + --hash=sha256:bf01b5720be110540be4286e791db73f84a2b721072a3711efff6c324cdf074b \ + --hash=sha256:c02ce36ec760252242a33967d51c289fd0e1c0e6e5cc9397e2279177716add86 \ + --hash=sha256:d9e4432ff660d67d775c66ac42a67cf2453c27cb4d738fc22cb53b5d84c135d4 \ + --hash=sha256:daa564862dd0d39c00f8086f88700fdbe8bc717e993a21e90711acfed02f2402 \ + --hash=sha256:de78575669dddf6099a8a0f46a27e82a1783c557ccc38ee620ed8cc96d3be7d7 \ + --hash=sha256:e64857f395505ebf3d2569935506ae0dfc4a15cb80dc25261176c784662cdcc4 \ + --hash=sha256:f4bd856d702e5b0d96a00ec6b307b0f51c1982c2bf9c0052cf9019e9a544ba99 \ + --hash=sha256:f4c42102bc82a51108e449cbb32b19b180022941c727bac0cfd50170341f16ee # via # gcp-docuploader # gcp-releasetool @@ -377,9 +409,9 @@ pygments==2.13.0 \ # via # readme-renderer # rich -pyjwt==2.4.0 \ - --hash=sha256:72d1d253f32dbd4f5c88eaf1fdc62f3a19f676ccbadb9dbc5d07e951b2b26daf \ - --hash=sha256:d42908208c699b3b973cbeb01a969ba6a96c821eefb1c5bfe4c390c01d67abba +pyjwt==2.6.0 \ + --hash=sha256:69285c7e31fc44f68a1feb309e948e0df53259d579295e6cfe2b1792329f05fd \ + --hash=sha256:d83c3d892a77bbb74d3e1a2cfa90afaadb60945205d1095d9221f04466f64c14 # via gcp-releasetool pyparsing==3.0.9 \ --hash=sha256:2b020ecf7d21b687f219b71ecad3631f644a47f01403fa1d1036b0c6416d70fb \ @@ -392,9 +424,9 @@ python-dateutil==2.8.2 \ --hash=sha256:0123cacc1627ae19ddf3c27a5de5bd67ee4586fbdd6440d9748f8abb483d3e86 \ --hash=sha256:961d03dc3453ebbc59dbdea9e4e11c5651520a876d0f4db161e8674aae935da9 # via gcp-releasetool -readme-renderer==37.0 \ - --hash=sha256:07b7ea234e03e58f77cc222e206e6abb8f4c0435becce5104794ee591f9301c5 \ - --hash=sha256:9fa416704703e509eeb900696751c908ddeb2011319d93700d8f18baff887a69 +readme-renderer==37.3 \ + --hash=sha256:cd653186dfc73055656f090f227f5cb22a046d7f71a841dfa305f55c9a513273 \ + --hash=sha256:f67a16caedfa71eef48a31b39708637a6f4664c4394801a7b0d6432d13907343 # via twine requests==2.28.1 \ --hash=sha256:7c5599b102feddaa661c826c56ab4fee28bfd17f5abca1ebbe3e7f19d7c97983 \ @@ -405,17 +437,17 @@ requests==2.28.1 \ # google-cloud-storage # requests-toolbelt # twine -requests-toolbelt==0.9.1 \ - --hash=sha256:380606e1d10dc85c3bd47bf5a6095f815ec007be7a8b69c878507068df059e6f \ - --hash=sha256:968089d4584ad4ad7c171454f0a5c6dac23971e9472521ea3b6d49d610aa6fc0 +requests-toolbelt==0.10.1 \ + --hash=sha256:18565aa58116d9951ac39baa288d3adb5b3ff975c4f25eee78555d89e8f247f7 \ + --hash=sha256:62e09f7ff5ccbda92772a29f394a49c3ad6cb181d568b1337626b2abb628a63d # via twine rfc3986==2.0.0 \ --hash=sha256:50b1502b60e289cb37883f3dfd34532b8873c7de9f49bb546641ce9cbd256ebd \ --hash=sha256:97aacf9dbd4bfd829baad6e6309fa6573aaf1be3f6fa735c8ab05e46cecb261c # via twine -rich==12.5.1 \ - --hash=sha256:2eb4e6894cde1e017976d2975ac210ef515d7548bc595ba20e195fb9628acdeb \ - --hash=sha256:63a5c5ce3673d3d5fbbf23cd87e11ab84b6b451436f1b7f19ec54b6bc36ed7ca +rich==12.6.0 \ + --hash=sha256:a4eb26484f2c82589bd9a17c73d32a010b1e29d89f1604cd9bf3a2097b81bb5e \ + --hash=sha256:ba3a3775974105c221d31141f2c116f4fd65c5ceb0698657a11e9f295ec93fd0 # via twine rsa==4.9 \ --hash=sha256:90260d9058e514786967344d0ef75fa8727eed8a7d2e43ce9f4bcf1b536174f7 \ @@ -437,9 +469,9 @@ twine==4.0.1 \ --hash=sha256:42026c18e394eac3e06693ee52010baa5313e4811d5a11050e7d48436cf41b9e \ --hash=sha256:96b1cf12f7ae611a4a40b6ae8e9570215daff0611828f5fe1f37a16255ab24a0 # via -r requirements.in -typing-extensions==4.3.0 \ - --hash=sha256:25642c956049920a5aa49edcdd6ab1e06d7e5d467fc00e0506c44ac86fbfca02 \ - --hash=sha256:e6d2677a32f47fc7eb2795db1dd15c1f34eff616bcaf2cfb5e997f854fa1c4a6 +typing-extensions==4.4.0 \ + --hash=sha256:1511434bb92bf8dd198c12b1cc812e800d4181cfcb867674e0f8279cc93087aa \ + --hash=sha256:16fa4864408f655d35ec496218b85f79b3437c829e93320c7c9215ccfd92489e # via -r requirements.in urllib3==1.26.12 \ --hash=sha256:3fa96cf423e6987997fc326ae8df396db2a8b7c667747d47ddd8ecba91f4a74e \ @@ -447,25 +479,25 @@ urllib3==1.26.12 \ # via # requests # twine -virtualenv==20.16.4 \ - --hash=sha256:014f766e4134d0008dcaa1f95bafa0fb0f575795d07cae50b1bee514185d6782 \ - --hash=sha256:035ed57acce4ac35c82c9d8802202b0e71adac011a511ff650cbcf9635006a22 +virtualenv==20.16.7 \ + --hash=sha256:8691e3ff9387f743e00f6bb20f70121f5e4f596cae754531f2b3b3a1b1ac696e \ + --hash=sha256:efd66b00386fdb7dbe4822d172303f40cd05e50e01740b19ea42425cbe653e29 # via nox webencodings==0.5.1 \ --hash=sha256:a0af1213f3c2226497a97e2b3aa01a7e4bee4f403f95be16fc9acd2947514a78 \ --hash=sha256:b36a1c245f2d304965eb4e0a82848379241dc04b865afcc4aab16748587e1923 # via bleach -wheel==0.37.1 \ - --hash=sha256:4bdcd7d840138086126cd09254dc6195fb4fc6f01c050a1d7236f2630db1d22a \ - --hash=sha256:e9a504e793efbca1b8e0e9cb979a249cf4a0a7b5b8c9e8b65a5e39d49529c1c4 +wheel==0.38.4 \ + --hash=sha256:965f5259b566725405b05e7cf774052044b1ed30119b5d586b2703aafe8719ac \ + --hash=sha256:b60533f3f5d530e971d6737ca6d58681ee434818fab630c83a734bb10c083ce8 # via -r requirements.in -zipp==3.8.1 \ - --hash=sha256:05b45f1ee8f807d0cc928485ca40a07cb491cf092ff587c0df9cb1fd154848d2 \ - --hash=sha256:47c40d7fe183a6f21403a199b3e4192cca5774656965b0a4988ad2f8feb5f009 +zipp==3.10.0 \ + --hash=sha256:4fcb6f278987a6605757302a6e40e896257570d11c51628968ccb2a47e80c6c1 \ + --hash=sha256:7a7262fd930bd3e36c50b9a64897aec3fafff3dfdeec9623ae22b40e93f99bb8 # via importlib-metadata # The following packages are considered to be unsafe in a requirements file: -setuptools==65.2.0 \ - --hash=sha256:7f4bc85450898a09f76ebf28b72fa25bc7111f6c7d665d514a60bba9c75ef2a9 \ - --hash=sha256:a3ca5857c89f82f5c9410e8508cb32f4872a3bafd4aa7ae122a24ca33bccc750 +setuptools==65.5.1 \ + --hash=sha256:d0b9a8433464d5800cbe05094acf5c6d52a91bfac9b52bcfc4d41382be5d5d31 \ + --hash=sha256:e197a19aa8ec9722928f2206f8de752def0e4c9fc6953527360d1c36d94ddb2f # via -r requirements.in diff --git a/packages/google-cloud-bigquery/samples/geography/noxfile.py b/packages/google-cloud-bigquery/samples/geography/noxfile.py index b053ca568f63..e8283c38d4a0 100644 --- a/packages/google-cloud-bigquery/samples/geography/noxfile.py +++ b/packages/google-cloud-bigquery/samples/geography/noxfile.py @@ -18,7 +18,7 @@ import os from pathlib import Path import sys -from typing import Callable, Dict, List, Optional +from typing import Callable, Dict, Optional import nox @@ -109,22 +109,6 @@ def get_pytest_env_vars() -> Dict[str, str]: # -def _determine_local_import_names(start_dir: str) -> List[str]: - """Determines all import names that should be considered "local". - - This is used when running the linter to insure that import order is - properly checked. - """ - file_ext_pairs = [os.path.splitext(path) for path in os.listdir(start_dir)] - return [ - basename - for basename, extension in file_ext_pairs - if extension == ".py" - or os.path.isdir(os.path.join(start_dir, basename)) - and basename not in ("__pycache__") - ] - - # Linting with flake8. # # We ignore the following rules: @@ -139,7 +123,6 @@ def _determine_local_import_names(start_dir: str) -> List[str]: "--show-source", "--builtin=gettext", "--max-complexity=20", - "--import-order-style=google", "--exclude=.nox,.cache,env,lib,generated_pb2,*_pb2.py,*_pb2_grpc.py", "--ignore=E121,E123,E126,E203,E226,E24,E266,E501,E704,W503,W504,I202", "--max-line-length=88", @@ -149,14 +132,11 @@ def _determine_local_import_names(start_dir: str) -> List[str]: @nox.session def lint(session: nox.sessions.Session) -> None: if not TEST_CONFIG["enforce_type_hints"]: - session.install("flake8", "flake8-import-order") + session.install("flake8") else: - session.install("flake8", "flake8-import-order", "flake8-annotations") + session.install("flake8", "flake8-annotations") - local_names = _determine_local_import_names(".") args = FLAKE8_COMMON_ARGS + [ - "--application-import-names", - ",".join(local_names), ".", ] session.run("flake8", *args) diff --git a/packages/google-cloud-bigquery/samples/magics/noxfile.py b/packages/google-cloud-bigquery/samples/magics/noxfile.py index b053ca568f63..e8283c38d4a0 100644 --- a/packages/google-cloud-bigquery/samples/magics/noxfile.py +++ b/packages/google-cloud-bigquery/samples/magics/noxfile.py @@ -18,7 +18,7 @@ import os from pathlib import Path import sys -from typing import Callable, Dict, List, Optional +from typing import Callable, Dict, Optional import nox @@ -109,22 +109,6 @@ def get_pytest_env_vars() -> Dict[str, str]: # -def _determine_local_import_names(start_dir: str) -> List[str]: - """Determines all import names that should be considered "local". - - This is used when running the linter to insure that import order is - properly checked. - """ - file_ext_pairs = [os.path.splitext(path) for path in os.listdir(start_dir)] - return [ - basename - for basename, extension in file_ext_pairs - if extension == ".py" - or os.path.isdir(os.path.join(start_dir, basename)) - and basename not in ("__pycache__") - ] - - # Linting with flake8. # # We ignore the following rules: @@ -139,7 +123,6 @@ def _determine_local_import_names(start_dir: str) -> List[str]: "--show-source", "--builtin=gettext", "--max-complexity=20", - "--import-order-style=google", "--exclude=.nox,.cache,env,lib,generated_pb2,*_pb2.py,*_pb2_grpc.py", "--ignore=E121,E123,E126,E203,E226,E24,E266,E501,E704,W503,W504,I202", "--max-line-length=88", @@ -149,14 +132,11 @@ def _determine_local_import_names(start_dir: str) -> List[str]: @nox.session def lint(session: nox.sessions.Session) -> None: if not TEST_CONFIG["enforce_type_hints"]: - session.install("flake8", "flake8-import-order") + session.install("flake8") else: - session.install("flake8", "flake8-import-order", "flake8-annotations") + session.install("flake8", "flake8-annotations") - local_names = _determine_local_import_names(".") args = FLAKE8_COMMON_ARGS + [ - "--application-import-names", - ",".join(local_names), ".", ] session.run("flake8", *args) diff --git a/packages/google-cloud-bigquery/samples/snippets/noxfile.py b/packages/google-cloud-bigquery/samples/snippets/noxfile.py index b053ca568f63..e8283c38d4a0 100644 --- a/packages/google-cloud-bigquery/samples/snippets/noxfile.py +++ b/packages/google-cloud-bigquery/samples/snippets/noxfile.py @@ -18,7 +18,7 @@ import os from pathlib import Path import sys -from typing import Callable, Dict, List, Optional +from typing import Callable, Dict, Optional import nox @@ -109,22 +109,6 @@ def get_pytest_env_vars() -> Dict[str, str]: # -def _determine_local_import_names(start_dir: str) -> List[str]: - """Determines all import names that should be considered "local". - - This is used when running the linter to insure that import order is - properly checked. - """ - file_ext_pairs = [os.path.splitext(path) for path in os.listdir(start_dir)] - return [ - basename - for basename, extension in file_ext_pairs - if extension == ".py" - or os.path.isdir(os.path.join(start_dir, basename)) - and basename not in ("__pycache__") - ] - - # Linting with flake8. # # We ignore the following rules: @@ -139,7 +123,6 @@ def _determine_local_import_names(start_dir: str) -> List[str]: "--show-source", "--builtin=gettext", "--max-complexity=20", - "--import-order-style=google", "--exclude=.nox,.cache,env,lib,generated_pb2,*_pb2.py,*_pb2_grpc.py", "--ignore=E121,E123,E126,E203,E226,E24,E266,E501,E704,W503,W504,I202", "--max-line-length=88", @@ -149,14 +132,11 @@ def _determine_local_import_names(start_dir: str) -> List[str]: @nox.session def lint(session: nox.sessions.Session) -> None: if not TEST_CONFIG["enforce_type_hints"]: - session.install("flake8", "flake8-import-order") + session.install("flake8") else: - session.install("flake8", "flake8-import-order", "flake8-annotations") + session.install("flake8", "flake8-annotations") - local_names = _determine_local_import_names(".") args = FLAKE8_COMMON_ARGS + [ - "--application-import-names", - ",".join(local_names), ".", ] session.run("flake8", *args) From 80d6a1c1ded1dd601066b0d1cd7182ed9de16e47 Mon Sep 17 00:00:00 2001 From: "gcf-owl-bot[bot]" <78513119+gcf-owl-bot[bot]@users.noreply.github.com> Date: Thu, 8 Dec 2022 14:34:37 -0500 Subject: [PATCH 1528/2016] build(deps): bump certifi from 2022.9.24 to 2022.12.7 in /synthtool/gcp/templates/python_library/.kokoro (#1432) Source-Link: https://github.com/googleapis/synthtool/commit/b4fe62efb5114b6738ad4b13d6f654f2bf4b7cc0 Post-Processor: gcr.io/cloud-devrel-public-resources/owlbot-python:latest@sha256:3bf87e47c2173d7eed42714589dc4da2c07c3268610f1e47f8e1a30decbfc7f1 Co-authored-by: Owl Bot --- packages/google-cloud-bigquery/.github/.OwlBot.lock.yaml | 2 +- packages/google-cloud-bigquery/.kokoro/requirements.txt | 6 +++--- packages/google-cloud-bigquery/.pre-commit-config.yaml | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/packages/google-cloud-bigquery/.github/.OwlBot.lock.yaml b/packages/google-cloud-bigquery/.github/.OwlBot.lock.yaml index bb21147e4c23..fccaa8e84449 100644 --- a/packages/google-cloud-bigquery/.github/.OwlBot.lock.yaml +++ b/packages/google-cloud-bigquery/.github/.OwlBot.lock.yaml @@ -13,4 +13,4 @@ # limitations under the License. docker: image: gcr.io/cloud-devrel-public-resources/owlbot-python:latest - digest: sha256:3abfa0f1886adaf0b83f07cb117b24a639ea1cb9cffe56d43280b977033563eb + digest: sha256:3bf87e47c2173d7eed42714589dc4da2c07c3268610f1e47f8e1a30decbfc7f1 diff --git a/packages/google-cloud-bigquery/.kokoro/requirements.txt b/packages/google-cloud-bigquery/.kokoro/requirements.txt index 9c1b9be34e6b..05dc4672edaa 100644 --- a/packages/google-cloud-bigquery/.kokoro/requirements.txt +++ b/packages/google-cloud-bigquery/.kokoro/requirements.txt @@ -20,9 +20,9 @@ cachetools==5.2.0 \ --hash=sha256:6a94c6402995a99c3970cc7e4884bb60b4a8639938157eeed436098bf9831757 \ --hash=sha256:f9f17d2aec496a9aa6b76f53e3b614c965223c061982d434d160f930c698a9db # via google-auth -certifi==2022.9.24 \ - --hash=sha256:0d9c601124e5a6ba9712dbc60d9c53c21e34f5f641fe83002317394311bdce14 \ - --hash=sha256:90c1a32f1d68f940488354e36370f6cca89f0f106db09518524c88d6ed83f382 +certifi==2022.12.7 \ + --hash=sha256:35824b4c3a97115964b408844d64aa14db1cc518f6562e8d7261699d1350a9e3 \ + --hash=sha256:4ad3232f5e926d6718ec31cfc1fcadfde020920e278684144551c91769c7bc18 # via requests cffi==1.15.1 \ --hash=sha256:00a9ed42e88df81ffae7a8ab6d9356b371399b91dbdf0c3cb1e84c03a13aceb5 \ diff --git a/packages/google-cloud-bigquery/.pre-commit-config.yaml b/packages/google-cloud-bigquery/.pre-commit-config.yaml index 46d237160f6d..5405cc8ff1f3 100644 --- a/packages/google-cloud-bigquery/.pre-commit-config.yaml +++ b/packages/google-cloud-bigquery/.pre-commit-config.yaml @@ -25,7 +25,7 @@ repos: rev: 22.3.0 hooks: - id: black -- repo: https://gitlab.com/pycqa/flake8 +- repo: https://github.com/pycqa/flake8 rev: 3.9.2 hooks: - id: flake8 From 296e1e1b0652ddcf67e78af45aee15ccb7d3f208 Mon Sep 17 00:00:00 2001 From: Steffany Brown <30247553+steffnay@users.noreply.github.com> Date: Thu, 8 Dec 2022 15:53:38 -0800 Subject: [PATCH 1529/2016] deps: update dependencies (#1282) * update dependencies * deps: pyarrow extras * clean up comments * add test pyarrow skips * replace storage checks * update tests * update tests * Update setup.py * update system tests * update verify_pandas_imports * add pyarrow guards * add datetime check * change pyarrow import * update * add pyarrow skips * fix types * lint * Update google/cloud/bigquery/client.py Co-authored-by: Tim Swast * update pyarrow version * update test * lint * update pyarrow req * update noxfile * remove bignum check * remove comments * add test importorskip * update test * update test * update dependency * change version * update imports Co-authored-by: Anthonios Partheniou Co-authored-by: Tim Swast --- .../google-cloud-bigquery/docs/snippets.py | 5 + .../google/cloud/bigquery/__init__.py | 5 + .../google/cloud/bigquery/_helpers.py | 74 +++++++- .../google/cloud/bigquery/_pandas_helpers.py | 127 +++++++------ .../google/cloud/bigquery/client.py | 55 +++++- .../google/cloud/bigquery/exceptions.py | 25 +++ .../google/cloud/bigquery/job/query.py | 6 +- .../google/cloud/bigquery/magics/magics.py | 11 ++ .../google/cloud/bigquery/table.py | 39 +++- packages/google-cloud-bigquery/setup.py | 25 ++- .../testing/constraints-3.7.txt | 4 +- .../tests/system/test_client.py | 24 ++- .../tests/system/test_pandas.py | 27 +-- .../tests/unit/job/test_query_pandas.py | 32 +++- .../tests/unit/test__helpers.py | 71 +++++++ .../tests/unit/test__pandas_helpers.py | 117 +++++++++++- .../tests/unit/test_client.py | 176 +++++++++++++++++- .../tests/unit/test_dbapi__helpers.py | 6 + .../tests/unit/test_dbapi_connection.py | 22 ++- .../tests/unit/test_dbapi_cursor.py | 25 ++- .../tests/unit/test_magics.py | 77 +++++++- .../tests/unit/test_table.py | 176 ++++++++++++++++-- .../tests/unit/test_table_pandas.py | 2 +- 23 files changed, 1013 insertions(+), 118 deletions(-) create mode 100644 packages/google-cloud-bigquery/google/cloud/bigquery/exceptions.py diff --git a/packages/google-cloud-bigquery/docs/snippets.py b/packages/google-cloud-bigquery/docs/snippets.py index 238fd52c3a4e..05e4fa378baf 100644 --- a/packages/google-cloud-bigquery/docs/snippets.py +++ b/packages/google-cloud-bigquery/docs/snippets.py @@ -31,6 +31,11 @@ except (ImportError, AttributeError): pandas = None +try: + import pyarrow +except (ImportError, AttributeError): + pyarrow = None + from google.api_core.exceptions import InternalServerError from google.api_core.exceptions import ServiceUnavailable from google.api_core.exceptions import TooManyRequests diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/__init__.py b/packages/google-cloud-bigquery/google/cloud/bigquery/__init__.py index 5a4520476396..ebd5b3109096 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/__init__.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/__init__.py @@ -42,6 +42,8 @@ from google.cloud.bigquery.enums import KeyResultStatementKind from google.cloud.bigquery.enums import SqlTypeNames from google.cloud.bigquery.enums import StandardSqlTypeNames +from google.cloud.bigquery.exceptions import LegacyBigQueryStorageError +from google.cloud.bigquery.exceptions import LegacyPyarrowError from google.cloud.bigquery.external_config import ExternalConfig from google.cloud.bigquery.external_config import BigtableOptions from google.cloud.bigquery.external_config import BigtableColumnFamily @@ -195,6 +197,9 @@ "WriteDisposition", # EncryptionConfiguration "EncryptionConfiguration", + # Custom exceptions + "LegacyBigQueryStorageError", + "LegacyPyarrowError", ] diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py b/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py index b59bc86d3496..014a721a8b58 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py @@ -20,7 +20,7 @@ import math import re import os -from typing import Optional, Union +from typing import Any, Optional, Union from dateutil import relativedelta from google.cloud._helpers import UTC # type: ignore @@ -32,6 +32,11 @@ import packaging.version +from google.cloud.bigquery.exceptions import ( + LegacyBigQueryStorageError, + LegacyPyarrowError, +) + _RFC3339_MICROS_NO_ZULU = "%Y-%m-%dT%H:%M:%S.%f" _TIMEONLY_WO_MICROS = "%H:%M:%S" _TIMEONLY_W_MICROS = "%H:%M:%S.%f" @@ -50,6 +55,10 @@ r"(?P-?)(?P\d+):(?P\d+):(?P\d+)\.?(?P\d*)?$" ) +_MIN_BQ_STORAGE_VERSION = packaging.version.Version("2.0.0") + +_MIN_PYARROW_VERSION = packaging.version.Version("3.0.0") + _BQ_STORAGE_OPTIONAL_READ_SESSION_VERSION = packaging.version.Version("2.6.0") BIGQUERY_EMULATOR_HOST = "BIGQUERY_EMULATOR_HOST" @@ -83,7 +92,7 @@ def installed_version(self) -> packaging.version.Version: getattr(bigquery_storage, "__version__", "0.0.0") ) - return self._installed_version + return self._installed_version # type: ignore @property def is_read_session_optional(self) -> bool: @@ -93,6 +102,29 @@ def is_read_session_optional(self) -> bool: """ return self.installed_version >= _BQ_STORAGE_OPTIONAL_READ_SESSION_VERSION + def verify_version(self): + """Verify that a recent enough version of BigQuery Storage extra is + installed. + + The function assumes that google-cloud-bigquery-storage extra is + installed, and should thus be used in places where this assumption + holds. + + Because `pip` can install an outdated version of this extra despite the + constraints in `setup.py`, the calling code can use this helper to + verify the version compatibility at runtime. + + Raises: + LegacyBigQueryStorageError: + If the google-cloud-bigquery-storage package is outdated. + """ + if self.installed_version < _MIN_BQ_STORAGE_VERSION: + msg = ( + "Dependency google-cloud-bigquery-storage is outdated, please upgrade " + f"it to version >= {_MIN_BQ_STORAGE_VERSION} (version found: {self.installed_version})." + ) + raise LegacyBigQueryStorageError(msg) + class PyarrowVersions: """Version comparisons for pyarrow package.""" @@ -120,6 +152,44 @@ def installed_version(self) -> packaging.version.Version: def use_compliant_nested_type(self) -> bool: return self.installed_version.major >= 4 + def try_import(self, raise_if_error: bool = False) -> Any: + """Verify that a recent enough version of pyarrow extra is + installed. + + The function assumes that pyarrow extra is installed, and should thus + be used in places where this assumption holds. + + Because `pip` can install an outdated version of this extra despite the + constraints in `setup.py`, the calling code can use this helper to + verify the version compatibility at runtime. + + Returns: + The ``pyarrow`` module or ``None``. + + Raises: + LegacyPyarrowError: + If the pyarrow package is outdated and ``raise_if_error`` is ``True``. + """ + try: + import pyarrow + except ImportError as exc: # pragma: NO COVER + if raise_if_error: + raise LegacyPyarrowError( + f"pyarrow package not found. Install pyarrow version >= {_MIN_PYARROW_VERSION}." + ) from exc + return None + + if self.installed_version < _MIN_PYARROW_VERSION: + if raise_if_error: + msg = ( + "Dependency pyarrow is outdated, please upgrade " + f"it to version >= {_MIN_PYARROW_VERSION} (version found: {self.installed_version})." + ) + raise LegacyPyarrowError(msg) + return None + + return pyarrow + BQ_STORAGE_VERSIONS = BQStorageVersions() PYARROW_VERSIONS = PyarrowVersions() diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/_pandas_helpers.py b/packages/google-cloud-bigquery/google/cloud/bigquery/_pandas_helpers.py index 0d05f53a3271..3d7e7d793838 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/_pandas_helpers.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/_pandas_helpers.py @@ -22,6 +22,11 @@ import queue import warnings +from packaging import version + +from google.cloud.bigquery import _helpers +from google.cloud.bigquery import schema + try: import pandas # type: ignore @@ -43,9 +48,7 @@ db_dtypes_import_exception = exc date_dtype_name = time_dtype_name = "" # Use '' rather than None because pytype - -import pyarrow # type: ignore -import pyarrow.parquet # type: ignore +pyarrow = _helpers.PYARROW_VERSIONS.try_import() try: # _BaseGeometry is used to detect shapely objevys in `bq_to_arrow_array` @@ -77,10 +80,6 @@ def _to_wkb(v): # Having BQ Storage available implies that pyarrow >=1.0.0 is available, too. _ARROW_COMPRESSION_SUPPORT = True -from google.cloud.bigquery import _helpers -from google.cloud.bigquery import schema - - _LOGGER = logging.getLogger(__name__) _PROGRESS_INTERVAL = 0.2 # Maximum time between download status checks, in seconds. @@ -141,52 +140,65 @@ def pyarrow_timestamp(): return pyarrow.timestamp("us", tz="UTC") -# This dictionary is duplicated in bigquery_storage/test/unite/test_reader.py -# When modifying it be sure to update it there as well. -BQ_TO_ARROW_SCALARS = { - "BIGNUMERIC": pyarrow_bignumeric, - "BOOL": pyarrow.bool_, - "BOOLEAN": pyarrow.bool_, - "BYTES": pyarrow.binary, - "DATE": pyarrow.date32, - "DATETIME": pyarrow_datetime, - "FLOAT": pyarrow.float64, - "FLOAT64": pyarrow.float64, - "GEOGRAPHY": pyarrow.string, - "INT64": pyarrow.int64, - "INTEGER": pyarrow.int64, - "NUMERIC": pyarrow_numeric, - "STRING": pyarrow.string, - "TIME": pyarrow_time, - "TIMESTAMP": pyarrow_timestamp, -} -ARROW_SCALAR_IDS_TO_BQ = { - # https://arrow.apache.org/docs/python/api/datatypes.html#type-classes - pyarrow.bool_().id: "BOOL", - pyarrow.int8().id: "INT64", - pyarrow.int16().id: "INT64", - pyarrow.int32().id: "INT64", - pyarrow.int64().id: "INT64", - pyarrow.uint8().id: "INT64", - pyarrow.uint16().id: "INT64", - pyarrow.uint32().id: "INT64", - pyarrow.uint64().id: "INT64", - pyarrow.float16().id: "FLOAT64", - pyarrow.float32().id: "FLOAT64", - pyarrow.float64().id: "FLOAT64", - pyarrow.time32("ms").id: "TIME", - pyarrow.time64("ns").id: "TIME", - pyarrow.timestamp("ns").id: "TIMESTAMP", - pyarrow.date32().id: "DATE", - pyarrow.date64().id: "DATETIME", # because millisecond resolution - pyarrow.binary().id: "BYTES", - pyarrow.string().id: "STRING", # also alias for pyarrow.utf8() - # The exact scale and precision don't matter, see below. - pyarrow.decimal128(38, scale=9).id: "NUMERIC", - # The exact decimal's scale and precision are not important, as only - # the type ID matters, and it's the same for all decimal256 instances. - pyarrow.decimal256(76, scale=38).id: "BIGNUMERIC", -} +if pyarrow: + # This dictionary is duplicated in bigquery_storage/test/unite/test_reader.py + # When modifying it be sure to update it there as well. + BQ_TO_ARROW_SCALARS = { + "BOOL": pyarrow.bool_, + "BOOLEAN": pyarrow.bool_, + "BYTES": pyarrow.binary, + "DATE": pyarrow.date32, + "DATETIME": pyarrow_datetime, + "FLOAT": pyarrow.float64, + "FLOAT64": pyarrow.float64, + "GEOGRAPHY": pyarrow.string, + "INT64": pyarrow.int64, + "INTEGER": pyarrow.int64, + "NUMERIC": pyarrow_numeric, + "STRING": pyarrow.string, + "TIME": pyarrow_time, + "TIMESTAMP": pyarrow_timestamp, + } + ARROW_SCALAR_IDS_TO_BQ = { + # https://arrow.apache.org/docs/python/api/datatypes.html#type-classes + pyarrow.bool_().id: "BOOL", + pyarrow.int8().id: "INT64", + pyarrow.int16().id: "INT64", + pyarrow.int32().id: "INT64", + pyarrow.int64().id: "INT64", + pyarrow.uint8().id: "INT64", + pyarrow.uint16().id: "INT64", + pyarrow.uint32().id: "INT64", + pyarrow.uint64().id: "INT64", + pyarrow.float16().id: "FLOAT64", + pyarrow.float32().id: "FLOAT64", + pyarrow.float64().id: "FLOAT64", + pyarrow.time32("ms").id: "TIME", + pyarrow.time64("ns").id: "TIME", + pyarrow.timestamp("ns").id: "TIMESTAMP", + pyarrow.date32().id: "DATE", + pyarrow.date64().id: "DATETIME", # because millisecond resolution + pyarrow.binary().id: "BYTES", + pyarrow.string().id: "STRING", # also alias for pyarrow.utf8() + # The exact scale and precision don't matter, see below. + pyarrow.decimal128(38, scale=9).id: "NUMERIC", + } + + if version.parse(pyarrow.__version__) >= version.parse("3.0.0"): + BQ_TO_ARROW_SCALARS["BIGNUMERIC"] = pyarrow_bignumeric + # The exact decimal's scale and precision are not important, as only + # the type ID matters, and it's the same for all decimal256 instances. + ARROW_SCALAR_IDS_TO_BQ[pyarrow.decimal256(76, scale=38).id] = "BIGNUMERIC" + _BIGNUMERIC_SUPPORT = True + else: + _BIGNUMERIC_SUPPORT = False # pragma: NO COVER + +else: # pragma: NO COVER + BQ_TO_ARROW_SCALARS = {} # pragma: NO COVER + ARROW_SCALAR_IDS_TO_BQ = {} # pragma: NO_COVER + _BIGNUMERIC_SUPPORT = False # pragma: NO COVER + + BQ_FIELD_TYPE_TO_ARROW_FIELD_METADATA = { "GEOGRAPHY": { b"ARROW:extension:name": b"google:sqlType:geography", @@ -480,6 +492,13 @@ def dataframe_to_bq_schema(dataframe, bq_schema): # If schema detection was not successful for all columns, also try with # pyarrow, if available. if unknown_type_fields: + if not pyarrow: + msg = "Could not determine the type of columns: {}".format( + ", ".join(field.name for field in unknown_type_fields) + ) + warnings.warn(msg) + return None # We cannot detect the schema in full. + # The augment_schema() helper itself will also issue unknown type # warnings if detection still fails for any of the fields. bq_schema_out = augment_schema(dataframe, bq_schema_out) @@ -654,6 +673,8 @@ def dataframe_to_parquet( This argument is ignored for ``pyarrow`` versions earlier than ``4.0.0``. """ + pyarrow = _helpers.PYARROW_VERSIONS.try_import(raise_if_error=True) + import pyarrow.parquet # type: ignore kwargs = ( diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py index 1f3647e71ad8..1885ab67ef80 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py @@ -27,6 +27,7 @@ import json import math import os +import packaging.version import tempfile import typing from typing import ( @@ -44,6 +45,13 @@ import uuid import warnings +try: + import pyarrow # type: ignore + + _PYARROW_VERSION = packaging.version.parse(pyarrow.__version__) +except ImportError: # pragma: NO COVER + pyarrow = None + from google import resumable_media # type: ignore from google.resumable_media.requests import MultipartUpload # type: ignore from google.resumable_media.requests import ResumableUpload @@ -56,9 +64,14 @@ import google.cloud._helpers # type: ignore from google.cloud import exceptions # pytype: disable=import-error from google.cloud.client import ClientWithProject # type: ignore # pytype: disable=import-error -from google.cloud.bigquery_storage_v1.services.big_query_read.client import ( - DEFAULT_CLIENT_INFO as DEFAULT_BQSTORAGE_CLIENT_INFO, -) + +try: + from google.cloud.bigquery_storage_v1.services.big_query_read.client import ( + DEFAULT_CLIENT_INFO as DEFAULT_BQSTORAGE_CLIENT_INFO, + ) +except ImportError: + DEFAULT_BQSTORAGE_CLIENT_INFO = None # type: ignore + from google.cloud.bigquery import _job_helpers from google.cloud.bigquery._job_helpers import make_job_id as _make_job_id @@ -67,6 +80,7 @@ from google.cloud.bigquery._helpers import _str_or_none from google.cloud.bigquery._helpers import _verify_job_config_type from google.cloud.bigquery._helpers import _get_bigquery_host +from google.cloud.bigquery._helpers import BQ_STORAGE_VERSIONS from google.cloud.bigquery._helpers import _DEFAULT_HOST from google.cloud.bigquery._http import Connection from google.cloud.bigquery import _pandas_helpers @@ -75,6 +89,7 @@ from google.cloud.bigquery.dataset import DatasetReference from google.cloud.bigquery import enums from google.cloud.bigquery.enums import AutoRowIDs +from google.cloud.bigquery.exceptions import LegacyBigQueryStorageError from google.cloud.bigquery.opentelemetry_tracing import create_span from google.cloud.bigquery import job from google.cloud.bigquery.job import ( @@ -144,6 +159,9 @@ TIMEOUT_HEADER = "X-Server-Timeout" +# https://github.com/googleapis/python-bigquery/issues/781#issuecomment-883497414 +_PYARROW_BAD_VERSIONS = frozenset([packaging.version.Version("2.0.0")]) + class Project(object): """Wrapper for resource describing a BigQuery project. @@ -519,8 +537,20 @@ def _ensure_bqstorage_client( Returns: A BigQuery Storage API client. """ - from google.cloud import bigquery_storage + try: + from google.cloud import bigquery_storage # type: ignore + except ImportError: + warnings.warn( + "Cannot create BigQuery Storage client, the dependency " + "google-cloud-bigquery-storage is not installed." + ) + return None + try: + BQ_STORAGE_VERSIONS.verify_version() + except LegacyBigQueryStorageError as exc: + warnings.warn(str(exc)) + return None if bqstorage_client is None: bqstorage_client = bigquery_storage.BigQueryReadClient( credentials=self._credentials, @@ -2529,6 +2559,9 @@ def load_table_from_dataframe( google.cloud.bigquery.job.LoadJob: A new load job. Raises: + ValueError: + If a usable parquet engine cannot be found. This method + requires :mod:`pyarrow` to be installed. TypeError: If ``job_config`` is not an instance of :class:`~google.cloud.bigquery.job.LoadJobConfig` class. @@ -2566,6 +2599,10 @@ def load_table_from_dataframe( ) ) + if pyarrow is None and job_config.source_format == job.SourceFormat.PARQUET: + # pyarrow is now the only supported parquet engine. + raise ValueError("This method requires pyarrow to be installed") + if location is None: location = self.location @@ -2621,6 +2658,16 @@ def load_table_from_dataframe( try: if job_config.source_format == job.SourceFormat.PARQUET: + if _PYARROW_VERSION in _PYARROW_BAD_VERSIONS: + msg = ( + "Loading dataframe data in PARQUET format with pyarrow " + f"{_PYARROW_VERSION} can result in data corruption. It is " + "therefore *strongly* advised to use a different pyarrow " + "version or a different source format. " + "See: https://github.com/googleapis/python-bigquery/issues/781" + ) + warnings.warn(msg, category=RuntimeWarning) + if job_config.schema: if parquet_compression == "snappy": # adjust the default value parquet_compression = parquet_compression.upper() diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/exceptions.py b/packages/google-cloud-bigquery/google/cloud/bigquery/exceptions.py new file mode 100644 index 000000000000..2bab97fea93e --- /dev/null +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/exceptions.py @@ -0,0 +1,25 @@ +# Copyright 2022 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +class BigQueryError(Exception): + """Base class for all custom exceptions defined by the BigQuery client.""" + + +class LegacyBigQueryStorageError(BigQueryError): + """Raised when too old a version of BigQuery Storage extra is detected at runtime.""" + + +class LegacyPyarrowError(BigQueryError): + """Raised when too old a version of pyarrow package is detected at runtime.""" diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/job/query.py b/packages/google-cloud-bigquery/google/cloud/bigquery/job/query.py index b0286deae4c7..e32e74129b2f 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/job/query.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/job/query.py @@ -1593,6 +1593,10 @@ def to_arrow( headers from the query results. The column headers are derived from the destination table's schema. + Raises: + ValueError: + If the :mod:`pyarrow` library cannot be imported. + .. versionadded:: 1.17.0 """ query_result = wait_for_query(self, progress_bar_type, max_results=max_results) @@ -1694,7 +1698,7 @@ def to_dataframe( # that should only exist here in the QueryJob method. def to_geodataframe( self, - bqstorage_client: "bigquery_storage.BigQueryReadClient" = None, + bqstorage_client: Optional["bigquery_storage.BigQueryReadClient"] = None, dtypes: Dict[str, Any] = None, progress_bar_type: str = None, create_bqstorage_client: bool = True, diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/magics/magics.py b/packages/google-cloud-bigquery/google/cloud/bigquery/magics/magics.py index 613cc1b58fb3..f92f7754142b 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/magics/magics.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/magics/magics.py @@ -747,6 +747,17 @@ def _make_bqstorage_client(client, use_bqstorage_api, client_options): if not use_bqstorage_api: return None + try: + from google.cloud import bigquery_storage # type: ignore # noqa: F401 + except ImportError as err: + customized_error = ImportError( + "The default BigQuery Storage API client cannot be used, install " + "the missing google-cloud-bigquery-storage and pyarrow packages " + "to use it. Alternatively, use the classic REST API by specifying " + "the --use_rest_api magic option." + ) + raise customized_error from err + try: from google.api_core.gapic_v1 import client_info as gapic_client_info except ImportError as err: diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py index 96888d62d490..a2110a9fbaf1 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py @@ -29,7 +29,10 @@ except ImportError: # pragma: NO COVER pandas = None -import pyarrow # type: ignore +try: + import pyarrow # type: ignore +except ImportError: # pragma: NO COVER + pyarrow = None try: import geopandas # type: ignore @@ -52,6 +55,7 @@ import google.cloud._helpers # type: ignore from google.cloud.bigquery import _helpers from google.cloud.bigquery import _pandas_helpers +from google.cloud.bigquery.exceptions import LegacyBigQueryStorageError from google.cloud.bigquery.schema import _build_schema_resource from google.cloud.bigquery.schema import _parse_schema_resource from google.cloud.bigquery.schema import _to_schema_fields @@ -63,8 +67,9 @@ # Unconditionally import optional dependencies again to tell pytype that # they are not None, avoiding false "no attribute" errors. import pandas - import geopandas - from google.cloud import bigquery_storage + import pyarrow + import geopandas # type: ignore + from google.cloud import bigquery_storage # type: ignore from google.cloud.bigquery.dataset import DatasetReference @@ -72,6 +77,10 @@ "The geopandas library is not installed, please install " "geopandas to use the to_geodataframe() function." ) +_NO_PYARROW_ERROR = ( + "The pyarrow library is not installed, please install " + "pyarrow to use the to_arrow() function." +) _NO_SHAPELY_ERROR = ( "The shapely library is not installed, please install " "shapely to use the geography_as_object option." @@ -1585,6 +1594,17 @@ def _validate_bqstorage(self, bqstorage_client, create_bqstorage_client): if self.max_results is not None: return False + try: + from google.cloud import bigquery_storage # noqa: F401 + except ImportError: + return False + + try: + _helpers.BQ_STORAGE_VERSIONS.verify_version() + except LegacyBigQueryStorageError as exc: + warnings.warn(str(exc)) + return False + return True def _get_next_page_response(self): @@ -1654,7 +1674,7 @@ def _to_page_iterable( def to_arrow_iterable( self, - bqstorage_client: "bigquery_storage.BigQueryReadClient" = None, + bqstorage_client: Optional["bigquery_storage.BigQueryReadClient"] = None, max_queue_size: int = _pandas_helpers._MAX_QUEUE_SIZE_DEFAULT, # type: ignore ) -> Iterator["pyarrow.RecordBatch"]: """[Beta] Create an iterable of class:`pyarrow.RecordBatch`, to process the table as a stream. @@ -1761,8 +1781,15 @@ def to_arrow( headers from the query results. The column headers are derived from the destination table's schema. + Raises: + ValueError: If the :mod:`pyarrow` library cannot be imported. + + .. versionadded:: 1.17.0 """ + if pyarrow is None: + raise ValueError(_NO_PYARROW_ERROR) + self._maybe_warn_max_results(bqstorage_client) if not self._validate_bqstorage(bqstorage_client, create_bqstorage_client): @@ -2041,7 +2068,7 @@ def __can_cast_timestamp_ns(column): # changes to job.QueryJob.to_geodataframe() def to_geodataframe( self, - bqstorage_client: "bigquery_storage.BigQueryReadClient" = None, + bqstorage_client: Optional["bigquery_storage.BigQueryReadClient"] = None, dtypes: Dict[str, Any] = None, progress_bar_type: str = None, create_bqstorage_client: bool = True, @@ -2195,6 +2222,8 @@ def to_arrow( Returns: pyarrow.Table: An empty :class:`pyarrow.Table`. """ + if pyarrow is None: + raise ValueError(_NO_PYARROW_ERROR) return pyarrow.Table.from_arrays(()) def to_dataframe( diff --git a/packages/google-cloud-bigquery/setup.py b/packages/google-cloud-bigquery/setup.py index 5fc694c6fab7..9e1bfbbce774 100644 --- a/packages/google-cloud-bigquery/setup.py +++ b/packages/google-cloud-bigquery/setup.py @@ -34,8 +34,7 @@ # Until this issue is closed # https://github.com/googleapis/google-cloud-python/issues/10566 "google-api-core[grpc] >= 1.31.5, <3.0.0dev,!=2.0.*,!=2.1.*,!=2.2.*,!=2.3.0", - "google-cloud-bigquery-storage >= 2.0.0, <3.0.0dev", - "proto-plus >= 1.22.0, <2.0.0dev", + "proto-plus >= 1.15.0, <2.0.0dev", # NOTE: Maintainers, please do not require google-cloud-core>=2.x.x # Until this issue is closed # https://github.com/googleapis/google-cloud-python/issues/10566 @@ -44,16 +43,30 @@ "packaging >= 14.3, <22.0.0dev", "protobuf>=3.19.5,<5.0.0dev,!=3.20.0,!=3.20.1,!=4.21.0,!=4.21.1,!=4.21.2,!=4.21.3,!=4.21.4,!=4.21.5", # For the legacy proto-based types. "python-dateutil >= 2.7.2, <3.0dev", - "pyarrow >= 3.0.0, < 11.0dev", "requests >= 2.21.0, < 3.0.0dev", ] +pyarrow_dependency = "pyarrow >= 3.0.0" extras = { # Keep the no-op bqstorage extra for backward compatibility. # See: https://github.com/googleapis/python-bigquery/issues/757 - "bqstorage": [], - "pandas": ["pandas>=1.0.0", "db-dtypes>=0.3.0,<2.0.0dev"], + "bqstorage": [ + "google-cloud-bigquery-storage >= 2.0.0, <3.0.0dev", + # Due to an issue in pip's dependency resolver, the `grpc` extra is not + # installed, even though `google-cloud-bigquery-storage` specifies it + # as `google-api-core[grpc]`. We thus need to explicitly specify it here. + # See: https://github.com/googleapis/python-bigquery/issues/83 The + # grpc.Channel.close() method isn't added until 1.32.0. + # https://github.com/grpc/grpc/pull/15254 + "grpcio >= 1.47.0, < 2.0dev", + pyarrow_dependency, + ], + "pandas": [ + "pandas>=1.1.0", + pyarrow_dependency, + "db-dtypes>=0.3.0,<2.0.0dev", + ], "ipywidgets": ["ipywidgets==7.7.1"], - "geopandas": ["geopandas>=0.9.0, <1.0dev", "Shapely>=1.6.0, <2.0dev"], + "geopandas": ["geopandas>=0.9.0, <1.0dev", "Shapely>=1.8.4, <2.0dev"], "ipython": ["ipython>=7.0.1,!=8.1.0"], "tqdm": ["tqdm >= 4.7.4, <5.0.0dev"], "opentelemetry": [ diff --git a/packages/google-cloud-bigquery/testing/constraints-3.7.txt b/packages/google-cloud-bigquery/testing/constraints-3.7.txt index 2c5b169db02a..149d6c496982 100644 --- a/packages/google-cloud-bigquery/testing/constraints-3.7.txt +++ b/packages/google-cloud-bigquery/testing/constraints-3.7.txt @@ -23,6 +23,6 @@ protobuf==3.19.5 pyarrow==3.0.0 python-dateutil==2.7.3 requests==2.21.0 -Shapely==1.6.4.post2 +Shapely==1.8.4 six==1.13.0 -tqdm==4.7.4 \ No newline at end of file +tqdm==4.7.4 diff --git a/packages/google-cloud-bigquery/tests/system/test_client.py b/packages/google-cloud-bigquery/tests/system/test_client.py index 25edc18e1f60..575898209dcd 100644 --- a/packages/google-cloud-bigquery/tests/system/test_client.py +++ b/packages/google-cloud-bigquery/tests/system/test_client.py @@ -42,14 +42,11 @@ from google.cloud.bigquery.table import Table from google.cloud._helpers import UTC from google.cloud.bigquery import dbapi, enums -from google.cloud import bigquery_storage from google.cloud import storage from google.cloud.datacatalog_v1 import types as datacatalog_types from google.cloud.datacatalog_v1 import PolicyTagManagerClient import psutil import pytest -import pyarrow -import pyarrow.types from test_utils.retry import RetryErrors from test_utils.retry import RetryInstanceState from test_utils.retry import RetryResult @@ -57,6 +54,16 @@ from . import helpers +try: + from google.cloud import bigquery_storage +except ImportError: # pragma: NO COVER + bigquery_storage = None + +try: + import pyarrow + import pyarrow.types +except ImportError: # pragma: NO COVER + pyarrow = None JOB_TIMEOUT = 120 # 2 minutes DATA_PATH = pathlib.Path(__file__).parent.parent / "data" @@ -1738,6 +1745,10 @@ def test_dbapi_fetchall_from_script(self): row_tuples = [r.values() for r in rows] self.assertEqual(row_tuples, [(5, "foo"), (6, "bar"), (7, "baz")]) + @unittest.skipIf( + bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" + ) + @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_dbapi_fetch_w_bqstorage_client_large_result_set(self): bqstorage_client = bigquery_storage.BigQueryReadClient( credentials=Config.CLIENT._credentials @@ -1796,6 +1807,9 @@ def test_dbapi_dry_run_query(self): self.assertEqual(list(rows), []) + @unittest.skipIf( + bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" + ) def test_dbapi_connection_does_not_leak_sockets(self): current_process = psutil.Process() conn_count_start = len(current_process.connections()) @@ -2263,6 +2277,10 @@ def test_create_table_rows_fetch_nested_schema(self): self.assertEqual(found[7], e_favtime) self.assertEqual(found[8], decimal.Decimal(expected["FavoriteNumber"])) + @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") + @unittest.skipIf( + bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" + ) def test_nested_table_to_arrow(self): from google.cloud.bigquery.job import SourceFormat from google.cloud.bigquery.job import WriteDisposition diff --git a/packages/google-cloud-bigquery/tests/system/test_pandas.py b/packages/google-cloud-bigquery/tests/system/test_pandas.py index 34e4243c4343..91305b4506bf 100644 --- a/packages/google-cloud-bigquery/tests/system/test_pandas.py +++ b/packages/google-cloud-bigquery/tests/system/test_pandas.py @@ -27,7 +27,7 @@ import pytest from google.cloud import bigquery -from google.cloud import bigquery_storage + from google.cloud.bigquery import enums from . import helpers @@ -36,6 +36,9 @@ pandas = pytest.importorskip("pandas", minversion="0.23.0") numpy = pytest.importorskip("numpy") +bigquery_storage = pytest.importorskip( + "google.cloud.bigquery_storage", minversion="2.0.0" +) PANDAS_INSTALLED_VERSION = pkg_resources.get_distribution("pandas").parsed_version PANDAS_INT64_VERSION = pkg_resources.parse_version("1.0.0") @@ -373,10 +376,10 @@ def test_load_table_from_dataframe_w_nulls(bigquery_client, dataset_id): bigquery.SchemaField("geo_col", "GEOGRAPHY"), bigquery.SchemaField("int_col", "INTEGER"), bigquery.SchemaField("num_col", "NUMERIC"), - bigquery.SchemaField("bignum_col", "BIGNUMERIC"), bigquery.SchemaField("str_col", "STRING"), bigquery.SchemaField("time_col", "TIME"), bigquery.SchemaField("ts_col", "TIMESTAMP"), + bigquery.SchemaField("bignum_col", "BIGNUMERIC"), ) num_rows = 100 @@ -390,10 +393,10 @@ def test_load_table_from_dataframe_w_nulls(bigquery_client, dataset_id): ("geo_col", nulls), ("int_col", nulls), ("num_col", nulls), - ("bignum_col", nulls), ("str_col", nulls), ("time_col", nulls), ("ts_col", nulls), + ("bignum_col", nulls), ] df_data = collections.OrderedDict(df_data) dataframe = pandas.DataFrame(df_data, columns=df_data.keys()) @@ -469,10 +472,10 @@ def test_load_table_from_dataframe_w_explicit_schema(bigquery_client, dataset_id bigquery.SchemaField("geo_col", "GEOGRAPHY"), bigquery.SchemaField("int_col", "INTEGER"), bigquery.SchemaField("num_col", "NUMERIC"), - bigquery.SchemaField("bignum_col", "BIGNUMERIC"), bigquery.SchemaField("str_col", "STRING"), bigquery.SchemaField("time_col", "TIME"), bigquery.SchemaField("ts_col", "TIMESTAMP"), + bigquery.SchemaField("bignum_col", "BIGNUMERIC"), ) df_data = [ @@ -502,14 +505,6 @@ def test_load_table_from_dataframe_w_explicit_schema(bigquery_client, dataset_id decimal.Decimal("99999999999999999999999999999.999999999"), ], ), - ( - "bignum_col", - [ - decimal.Decimal("-{d38}.{d38}".format(d38="9" * 38)), - None, - decimal.Decimal("{d38}.{d38}".format(d38="9" * 38)), - ], - ), ("str_col", ["abc", None, "def"]), ( "time_col", @@ -525,6 +520,14 @@ def test_load_table_from_dataframe_w_explicit_schema(bigquery_client, dataset_id ), ], ), + ( + "bignum_col", + [ + decimal.Decimal("-{d38}.{d38}".format(d38="9" * 38)), + None, + decimal.Decimal("{d38}.{d38}".format(d38="9" * 38)), + ], + ), ] df_data = collections.OrderedDict(df_data) dataframe = pandas.DataFrame(df_data, dtype="object", columns=df_data.keys()) diff --git a/packages/google-cloud-bigquery/tests/unit/job/test_query_pandas.py b/packages/google-cloud-bigquery/tests/unit/job/test_query_pandas.py index a45401664e5f..a2444efdde45 100644 --- a/packages/google-cloud-bigquery/tests/unit/job/test_query_pandas.py +++ b/packages/google-cloud-bigquery/tests/unit/job/test_query_pandas.py @@ -17,12 +17,15 @@ import json import mock -import pyarrow import pytest -from google.cloud import bigquery_storage -import google.cloud.bigquery_storage_v1.reader -import google.cloud.bigquery_storage_v1.services.big_query_read.client + +try: + from google.cloud import bigquery_storage + import google.cloud.bigquery_storage_v1.reader + import google.cloud.bigquery_storage_v1.services.big_query_read.client +except (ImportError, AttributeError): # pragma: NO COVER + bigquery_storage = None try: import pandas @@ -47,6 +50,12 @@ pandas = pytest.importorskip("pandas") +try: + import pyarrow + import pyarrow.types +except ImportError: # pragma: NO COVER + pyarrow = None + @pytest.fixture def table_read_options_kwarg(): @@ -89,6 +98,9 @@ def test__contains_order_by(query, expected): assert not mut._contains_order_by(query) +@pytest.mark.skipif( + bigquery_storage is None, reason="Requires `google-cloud-bigquery-storage`" +) @pytest.mark.parametrize( "query", ( @@ -179,6 +191,7 @@ def test_to_dataframe_bqstorage_preserve_order(query, table_read_options_kwarg): ) +@pytest.mark.skipif(pyarrow is None, reason="Requires `pyarrow`") def test_to_arrow(): from google.cloud.bigquery.job import QueryJob as target_class @@ -265,6 +278,7 @@ def test_to_arrow(): ] +@pytest.mark.skipif(pyarrow is None, reason="Requires `pyarrow`") def test_to_arrow_max_results_no_progress_bar(): from google.cloud.bigquery import table from google.cloud.bigquery.job import QueryJob as target_class @@ -300,6 +314,7 @@ def test_to_arrow_max_results_no_progress_bar(): assert tbl.num_rows == 2 +@pytest.mark.skipif(pyarrow is None, reason="Requires `pyarrow`") @pytest.mark.skipif(tqdm is None, reason="Requires `tqdm`") @mock.patch("google.cloud.bigquery._tqdm_helpers.tqdm") def test_to_arrow_w_tqdm_w_query_plan(tqdm_mock): @@ -356,6 +371,7 @@ def test_to_arrow_w_tqdm_w_query_plan(tqdm_mock): ) +@pytest.mark.skipif(pyarrow is None, reason="Requires `pyarrow`") @pytest.mark.skipif(tqdm is None, reason="Requires `tqdm`") @mock.patch("google.cloud.bigquery._tqdm_helpers.tqdm") def test_to_arrow_w_tqdm_w_pending_status(tqdm_mock): @@ -408,6 +424,7 @@ def test_to_arrow_w_tqdm_w_pending_status(tqdm_mock): ) +@pytest.mark.skipif(pyarrow is None, reason="Requires `pyarrow`") @pytest.mark.skipif(tqdm is None, reason="Requires `tqdm`") @mock.patch("google.cloud.bigquery._tqdm_helpers.tqdm") def test_to_arrow_w_tqdm_wo_query_plan(tqdm_mock): @@ -510,6 +527,9 @@ def test_to_dataframe_ddl_query(): assert len(df) == 0 +@pytest.mark.skipif( + bigquery_storage is None, reason="Requires `google-cloud-bigquery-storage`" +) def test_to_dataframe_bqstorage(table_read_options_kwarg): from google.cloud.bigquery.job import QueryJob as target_class @@ -584,6 +604,9 @@ def test_to_dataframe_bqstorage(table_read_options_kwarg): bqstorage_client.read_rows.assert_called_once_with(stream_id) +@pytest.mark.skipif( + bigquery_storage is None, reason="Requires `google-cloud-bigquery-storage`" +) def test_to_dataframe_bqstorage_no_pyarrow_compression(): from google.cloud.bigquery.job import QueryJob as target_class @@ -629,6 +652,7 @@ def test_to_dataframe_bqstorage_no_pyarrow_compression(): ) +@pytest.mark.skipif(pyarrow is None, reason="Requires `pyarrow`") def test_to_dataframe_column_dtypes(): from google.cloud.bigquery.job import QueryJob as target_class diff --git a/packages/google-cloud-bigquery/tests/unit/test__helpers.py b/packages/google-cloud-bigquery/tests/unit/test__helpers.py index 2e714c707f84..4fb86f66513e 100644 --- a/packages/google-cloud-bigquery/tests/unit/test__helpers.py +++ b/packages/google-cloud-bigquery/tests/unit/test__helpers.py @@ -19,7 +19,18 @@ import mock +try: + from google.cloud import bigquery_storage # type: ignore +except ImportError: # pragma: NO COVER + bigquery_storage = None +try: + import pyarrow +except ImportError: # pragma: NO COVER + pyarrow = None + + +@unittest.skipIf(bigquery_storage is None, "Requires `google-cloud-bigquery-storage`") class TestBQStorageVersions(unittest.TestCase): def tearDown(self): from google.cloud.bigquery import _helpers @@ -32,6 +43,37 @@ def _object_under_test(self): return _helpers.BQStorageVersions() + def _call_fut(self): + from google.cloud.bigquery import _helpers + + _helpers.BQ_STORAGE_VERSIONS._installed_version = None + return _helpers.BQ_STORAGE_VERSIONS.verify_version() + + def test_raises_no_error_w_recent_bqstorage(self): + from google.cloud.bigquery.exceptions import LegacyBigQueryStorageError + + with mock.patch("google.cloud.bigquery_storage.__version__", new="2.0.0"): + try: + self._call_fut() + except LegacyBigQueryStorageError: # pragma: NO COVER + self.fail("Legacy error raised with a non-legacy dependency version.") + + def test_raises_error_w_legacy_bqstorage(self): + from google.cloud.bigquery.exceptions import LegacyBigQueryStorageError + + with mock.patch("google.cloud.bigquery_storage.__version__", new="1.9.9"): + with self.assertRaises(LegacyBigQueryStorageError): + self._call_fut() + + def test_raises_error_w_unknown_bqstorage_version(self): + from google.cloud.bigquery.exceptions import LegacyBigQueryStorageError + + with mock.patch("google.cloud.bigquery_storage", autospec=True) as fake_module: + del fake_module.__version__ + error_pattern = r"version found: 0.0.0" + with self.assertRaisesRegex(LegacyBigQueryStorageError, error_pattern): + self._call_fut() + def test_installed_version_returns_cached(self): versions = self._object_under_test() versions._installed_version = object() @@ -58,6 +100,7 @@ def test_is_read_session_optional_false(self): assert not versions.is_read_session_optional +@unittest.skipIf(pyarrow is None, "Requires `pyarrow`") class TestPyarrowVersions(unittest.TestCase): def tearDown(self): from google.cloud.bigquery import _helpers @@ -70,6 +113,34 @@ def _object_under_test(self): return _helpers.PyarrowVersions() + def _call_try_import(self, **kwargs): + from google.cloud.bigquery import _helpers + + _helpers.PYARROW_VERSIONS._installed_version = None + return _helpers.PYARROW_VERSIONS.try_import(**kwargs) + + def test_try_import_raises_no_error_w_recent_pyarrow(self): + from google.cloud.bigquery.exceptions import LegacyPyarrowError + + with mock.patch("pyarrow.__version__", new="5.0.0"): + try: + pyarrow = self._call_try_import(raise_if_error=True) + self.assertIsNotNone(pyarrow) + except LegacyPyarrowError: # pragma: NO COVER + self.fail("Legacy error raised with a non-legacy dependency version.") + + def test_try_import_returns_none_w_legacy_pyarrow(self): + with mock.patch("pyarrow.__version__", new="2.0.0"): + pyarrow = self._call_try_import() + self.assertIsNone(pyarrow) + + def test_try_import_raises_error_w_legacy_pyarrow(self): + from google.cloud.bigquery.exceptions import LegacyPyarrowError + + with mock.patch("pyarrow.__version__", new="2.0.0"): + with self.assertRaises(LegacyPyarrowError): + self._call_try_import(raise_if_error=True) + def test_installed_version_returns_cached(self): versions = self._object_under_test() versions._installed_version = object() diff --git a/packages/google-cloud-bigquery/tests/unit/test__pandas_helpers.py b/packages/google-cloud-bigquery/tests/unit/test__pandas_helpers.py index 1a3f918eb14b..885cd318c67c 100644 --- a/packages/google-cloud-bigquery/tests/unit/test__pandas_helpers.py +++ b/packages/google-cloud-bigquery/tests/unit/test__pandas_helpers.py @@ -30,9 +30,6 @@ except ImportError: # pragma: NO COVER pandas = None -import pyarrow -import pyarrow.types - try: import geopandas except ImportError: # pragma: NO COVER @@ -41,10 +38,28 @@ import pytest from google import api_core -from google.cloud import bigquery_storage + +from google.cloud.bigquery import exceptions from google.cloud.bigquery import _helpers from google.cloud.bigquery import schema +from google.cloud.bigquery._pandas_helpers import _BIGNUMERIC_SUPPORT + +pyarrow = _helpers.PYARROW_VERSIONS.try_import() +if pyarrow: + import pyarrow.parquet + import pyarrow.types +else: # pragma: NO COVER + # Mock out pyarrow when missing, because methods from pyarrow.types are + # used in test parameterization. + pyarrow = mock.Mock() + +try: + from google.cloud import bigquery_storage + + _helpers.BQ_STORAGE_VERSIONS.verify_version() +except ImportError: # pragma: NO COVER + bigquery_storage = None PANDAS_MINIUM_VERSION = pkg_resources.parse_version("1.0.0") @@ -55,6 +70,12 @@ PANDAS_INSTALLED_VERSION = pkg_resources.parse_version("0.0.0") +skip_if_no_bignumeric = pytest.mark.skipif( + not _BIGNUMERIC_SUPPORT, + reason="BIGNUMERIC support requires pyarrow>=3.0.0", +) + + @pytest.fixture def module_under_test(): from google.cloud.bigquery import _pandas_helpers @@ -110,6 +131,7 @@ def all_(*functions): return functools.partial(do_all, functions) +@pytest.mark.skipif(isinstance(pyarrow, mock.Mock), reason="Requires `pyarrow`") def test_is_datetime(): assert is_datetime(pyarrow.timestamp("us", tz=None)) assert not is_datetime(pyarrow.timestamp("ms", tz=None)) @@ -142,7 +164,12 @@ def test_all_(): ("FLOAT", "NULLABLE", pyarrow.types.is_float64), ("FLOAT64", "NULLABLE", pyarrow.types.is_float64), ("NUMERIC", "NULLABLE", is_numeric), - ("BIGNUMERIC", "NULLABLE", is_bignumeric), + pytest.param( + "BIGNUMERIC", + "NULLABLE", + is_bignumeric, + marks=skip_if_no_bignumeric, + ), ("BOOLEAN", "NULLABLE", pyarrow.types.is_boolean), ("BOOL", "NULLABLE", pyarrow.types.is_boolean), ("TIMESTAMP", "NULLABLE", is_timestamp), @@ -221,10 +248,11 @@ def test_all_(): "REPEATED", all_(pyarrow.types.is_list, lambda type_: is_numeric(type_.value_type)), ), - ( + pytest.param( "BIGNUMERIC", "REPEATED", all_(pyarrow.types.is_list, lambda type_: is_bignumeric(type_.value_type)), + marks=skip_if_no_bignumeric, ), ( "BOOLEAN", @@ -280,6 +308,7 @@ def test_all_(): ("UNKNOWN_TYPE", "REPEATED", is_none), ], ) +@pytest.mark.skipif(isinstance(pyarrow, mock.Mock), reason="Requires `pyarrow`") def test_bq_to_arrow_data_type(module_under_test, bq_type, bq_mode, is_correct_type): field = schema.SchemaField("ignored_name", bq_type, mode=bq_mode) actual = module_under_test.bq_to_arrow_data_type(field) @@ -287,6 +316,7 @@ def test_bq_to_arrow_data_type(module_under_test, bq_type, bq_mode, is_correct_t @pytest.mark.parametrize("bq_type", ["RECORD", "record", "STRUCT", "struct"]) +@pytest.mark.skipif(isinstance(pyarrow, mock.Mock), reason="Requires `pyarrow`") def test_bq_to_arrow_data_type_w_struct(module_under_test, bq_type): fields = ( schema.SchemaField("field01", "STRING"), @@ -334,6 +364,7 @@ def test_bq_to_arrow_data_type_w_struct(module_under_test, bq_type): @pytest.mark.parametrize("bq_type", ["RECORD", "record", "STRUCT", "struct"]) +@pytest.mark.skipif(isinstance(pyarrow, mock.Mock), reason="Requires `pyarrow`") def test_bq_to_arrow_data_type_w_array_struct(module_under_test, bq_type): fields = ( schema.SchemaField("field01", "STRING"), @@ -381,6 +412,7 @@ def test_bq_to_arrow_data_type_w_array_struct(module_under_test, bq_type): assert actual.value_type.equals(expected_value_type) +@pytest.mark.skipif(isinstance(pyarrow, mock.Mock), reason="Requires `pyarrow`") def test_bq_to_arrow_data_type_w_struct_unknown_subfield(module_under_test): fields = ( schema.SchemaField("field1", "STRING"), @@ -417,7 +449,7 @@ def test_bq_to_arrow_data_type_w_struct_unknown_subfield(module_under_test): decimal.Decimal("999.123456789"), ], ), - ( + pytest.param( "BIGNUMERIC", [ decimal.Decimal("-{d38}.{d38}".format(d38="9" * 38)), @@ -479,6 +511,7 @@ def test_bq_to_arrow_data_type_w_struct_unknown_subfield(module_under_test): ], ) @pytest.mark.skipif(pandas is None, reason="Requires `pandas`") +@pytest.mark.skipif(isinstance(pyarrow, mock.Mock), reason="Requires `pyarrow`") def test_bq_to_arrow_array_w_nullable_scalars(module_under_test, bq_type, rows): series = pandas.Series(rows, dtype="object") bq_field = schema.SchemaField("field_name", bq_type) @@ -513,6 +546,7 @@ def test_bq_to_arrow_array_w_nullable_scalars(module_under_test, bq_type, rows): ], ) @pytest.mark.skipif(pandas is None, reason="Requires `pandas`") +@pytest.mark.skipif(isinstance(pyarrow, mock.Mock), reason="Requires `pyarrow`") def test_bq_to_arrow_array_w_pandas_timestamp(module_under_test, bq_type, rows): rows = [pandas.Timestamp(row) for row in rows] series = pandas.Series(rows) @@ -523,6 +557,7 @@ def test_bq_to_arrow_array_w_pandas_timestamp(module_under_test, bq_type, rows): @pytest.mark.skipif(pandas is None, reason="Requires `pandas`") +@pytest.mark.skipif(isinstance(pyarrow, mock.Mock), reason="Requires `pyarrow`") def test_bq_to_arrow_array_w_arrays(module_under_test): rows = [[1, 2, 3], [], [4, 5, 6]] series = pandas.Series(rows, dtype="object") @@ -534,6 +569,7 @@ def test_bq_to_arrow_array_w_arrays(module_under_test): @pytest.mark.parametrize("bq_type", ["RECORD", "record", "STRUCT", "struct"]) @pytest.mark.skipif(pandas is None, reason="Requires `pandas`") +@pytest.mark.skipif(isinstance(pyarrow, mock.Mock), reason="Requires `pyarrow`") def test_bq_to_arrow_array_w_structs(module_under_test, bq_type): rows = [ {"int_col": 123, "string_col": "abc"}, @@ -555,6 +591,7 @@ def test_bq_to_arrow_array_w_structs(module_under_test, bq_type): @pytest.mark.skipif(pandas is None, reason="Requires `pandas`") +@pytest.mark.skipif(isinstance(pyarrow, mock.Mock), reason="Requires `pyarrow`") def test_bq_to_arrow_array_w_special_floats(module_under_test): bq_field = schema.SchemaField("field_name", "FLOAT64") rows = [float("-inf"), float("nan"), float("inf"), None] @@ -622,6 +659,7 @@ def test_bq_to_arrow_array_w_geography_type_wkb_data(module_under_test): assert array.to_pylist() == list(series) +@pytest.mark.skipif(isinstance(pyarrow, mock.Mock), reason="Requires `pyarrow`") def test_bq_to_arrow_schema_w_unknown_type(module_under_test): fields = ( schema.SchemaField("field1", "STRING"), @@ -647,6 +685,7 @@ def test_get_column_or_index_not_found(module_under_test): @pytest.mark.skipif(pandas is None, reason="Requires `pandas`") +@pytest.mark.skipif(isinstance(pyarrow, mock.Mock), reason="Requires `pyarrow`") def test_get_column_or_index_with_multiindex_not_found(module_under_test): dataframe = pandas.DataFrame( {"column_name": [1, 2, 3, 4, 5, 6]}, @@ -984,6 +1023,7 @@ def test_dataframe_to_arrow_with_multiindex(module_under_test): @pytest.mark.skipif(pandas is None, reason="Requires `pandas`") +@pytest.mark.skipif(isinstance(pyarrow, mock.Mock), reason="Requires `pyarrow`") def test_dataframe_to_arrow_with_required_fields(module_under_test): bq_schema = ( schema.SchemaField("field01", "STRING", mode="REQUIRED"), @@ -1040,6 +1080,7 @@ def test_dataframe_to_arrow_with_required_fields(module_under_test): @pytest.mark.skipif(pandas is None, reason="Requires `pandas`") +@pytest.mark.skipif(isinstance(pyarrow, mock.Mock), reason="Requires `pyarrow`") def test_dataframe_to_arrow_with_unknown_type(module_under_test): bq_schema = ( schema.SchemaField("field00", "UNKNOWN_TYPE"), @@ -1072,6 +1113,7 @@ def test_dataframe_to_arrow_with_unknown_type(module_under_test): @pytest.mark.skipif(pandas is None, reason="Requires `pandas`") +@pytest.mark.skipif(isinstance(pyarrow, mock.Mock), reason="Requires `pyarrow`") def test_dataframe_to_arrow_dict_sequence_schema(module_under_test): dict_schema = [ {"name": "field01", "type": "STRING", "mode": "REQUIRED"}, @@ -1093,6 +1135,19 @@ def test_dataframe_to_arrow_dict_sequence_schema(module_under_test): @pytest.mark.skipif(pandas is None, reason="Requires `pandas`") +def test_dataframe_to_parquet_without_pyarrow(module_under_test, monkeypatch): + mock_pyarrow_import = mock.Mock() + mock_pyarrow_import.side_effect = exceptions.LegacyPyarrowError( + "pyarrow not installed" + ) + monkeypatch.setattr(_helpers.PYARROW_VERSIONS, "try_import", mock_pyarrow_import) + + with pytest.raises(exceptions.LegacyPyarrowError): + module_under_test.dataframe_to_parquet(pandas.DataFrame(), (), None) + + +@pytest.mark.skipif(pandas is None, reason="Requires `pandas`") +@pytest.mark.skipif(isinstance(pyarrow, mock.Mock), reason="Requires `pyarrow`") def test_dataframe_to_parquet_w_extra_fields(module_under_test): with pytest.raises(ValueError) as exc_context: module_under_test.dataframe_to_parquet( @@ -1104,6 +1159,7 @@ def test_dataframe_to_parquet_w_extra_fields(module_under_test): @pytest.mark.skipif(pandas is None, reason="Requires `pandas`") +@pytest.mark.skipif(isinstance(pyarrow, mock.Mock), reason="Requires `pyarrow`") def test_dataframe_to_parquet_w_missing_fields(module_under_test): with pytest.raises(ValueError) as exc_context: module_under_test.dataframe_to_parquet( @@ -1115,6 +1171,7 @@ def test_dataframe_to_parquet_w_missing_fields(module_under_test): @pytest.mark.skipif(pandas is None, reason="Requires `pandas`") +@pytest.mark.skipif(isinstance(pyarrow, mock.Mock), reason="Requires `pyarrow`") def test_dataframe_to_parquet_compression_method(module_under_test): bq_schema = (schema.SchemaField("field00", "STRING"),) dataframe = pandas.DataFrame({"field00": ["foo", "bar"]}) @@ -1134,6 +1191,34 @@ def test_dataframe_to_parquet_compression_method(module_under_test): @pytest.mark.skipif(pandas is None, reason="Requires `pandas`") +def test_dataframe_to_bq_schema_fallback_needed_wo_pyarrow(module_under_test): + dataframe = pandas.DataFrame( + data=[ + {"id": 10, "status": "FOO", "execution_date": datetime.date(2019, 5, 10)}, + {"id": 20, "status": "BAR", "created_at": datetime.date(2018, 9, 12)}, + ] + ) + + no_pyarrow_patch = mock.patch(module_under_test.__name__ + ".pyarrow", None) + + with no_pyarrow_patch, warnings.catch_warnings(record=True) as warned: + detected_schema = module_under_test.dataframe_to_bq_schema( + dataframe, bq_schema=[] + ) + + assert detected_schema is None + + # a warning should also be issued + expected_warnings = [ + warning for warning in warned if "could not determine" in str(warning).lower() + ] + assert len(expected_warnings) == 1 + msg = str(expected_warnings[0]) + assert "execution_date" in msg and "created_at" in msg + + +@pytest.mark.skipif(pandas is None, reason="Requires `pandas`") +@pytest.mark.skipif(isinstance(pyarrow, mock.Mock), reason="Requires `pyarrow`") def test_dataframe_to_bq_schema_fallback_needed_w_pyarrow(module_under_test): dataframe = pandas.DataFrame( data=[ @@ -1163,6 +1248,7 @@ def test_dataframe_to_bq_schema_fallback_needed_w_pyarrow(module_under_test): @pytest.mark.skipif(pandas is None, reason="Requires `pandas`") +@pytest.mark.skipif(isinstance(pyarrow, mock.Mock), reason="Requires `pyarrow`") def test_dataframe_to_bq_schema_pyarrow_fallback_fails(module_under_test): dataframe = pandas.DataFrame( data=[ @@ -1249,6 +1335,7 @@ def test__first_array_valid_no_arrays_with_valid_items(module_under_test): @pytest.mark.skipif(pandas is None, reason="Requires `pandas`") +@pytest.mark.skipif(isinstance(pyarrow, mock.Mock), reason="Requires `pyarrow`") def test_augment_schema_type_detection_succeeds(module_under_test): dataframe = pandas.DataFrame( data=[ @@ -1315,6 +1402,7 @@ def test_augment_schema_type_detection_succeeds(module_under_test): @pytest.mark.skipif(pandas is None, reason="Requires `pandas`") +@pytest.mark.skipif(isinstance(pyarrow, mock.Mock), reason="Requires `pyarrow`") def test_augment_schema_repeated_fields(module_under_test): dataframe = pandas.DataFrame( data=[ @@ -1427,6 +1515,7 @@ def test_augment_schema_type_detection_fails_array_data(module_under_test): assert "all_none_array" in warning_msg and "empty_array" in warning_msg +@pytest.mark.skipif(isinstance(pyarrow, mock.Mock), reason="Requires `pyarrow`") def test_dataframe_to_parquet_dict_sequence_schema(module_under_test): pandas = pytest.importorskip("pandas") @@ -1457,6 +1546,9 @@ def test_dataframe_to_parquet_dict_sequence_schema(module_under_test): assert schema_arg == expected_schema_arg +@pytest.mark.skipif( + bigquery_storage is None, reason="Requires `google-cloud-bigquery-storage`" +) def test__download_table_bqstorage_stream_includes_read_session( monkeypatch, module_under_test ): @@ -1487,7 +1579,8 @@ def test__download_table_bqstorage_stream_includes_read_session( @pytest.mark.skipif( - not _helpers.BQ_STORAGE_VERSIONS.is_read_session_optional, + bigquery_storage is None + or not _helpers.BQ_STORAGE_VERSIONS.is_read_session_optional, reason="Requires `google-cloud-bigquery-storage` >= 2.6.0", ) def test__download_table_bqstorage_stream_omits_read_session( @@ -1527,6 +1620,9 @@ def test__download_table_bqstorage_stream_omits_read_session( (7, {"max_queue_size": None}, 7, 0), # infinite queue size ], ) +@pytest.mark.skipif( + bigquery_storage is None, reason="Requires `google-cloud-bigquery-storage`" +) def test__download_table_bqstorage( module_under_test, stream_count, @@ -1577,6 +1673,7 @@ def fake_download_stream( assert queue_used.maxsize == expected_maxsize +@pytest.mark.skipif(isinstance(pyarrow, mock.Mock), reason="Requires `pyarrow`") def test_download_arrow_row_iterator_unknown_field_type(module_under_test): fake_page = api_core.page_iterator.Page( parent=mock.Mock(), @@ -1612,6 +1709,7 @@ def test_download_arrow_row_iterator_unknown_field_type(module_under_test): assert col.to_pylist() == [2.2, 22.22, 222.222] +@pytest.mark.skipif(isinstance(pyarrow, mock.Mock), reason="Requires `pyarrow`") def test_download_arrow_row_iterator_known_field_type(module_under_test): fake_page = api_core.page_iterator.Page( parent=mock.Mock(), @@ -1646,6 +1744,7 @@ def test_download_arrow_row_iterator_known_field_type(module_under_test): assert col.to_pylist() == ["2.2", "22.22", "222.222"] +@pytest.mark.skipif(isinstance(pyarrow, mock.Mock), reason="Requires `pyarrow`") def test_download_arrow_row_iterator_dict_sequence_schema(module_under_test): fake_page = api_core.page_iterator.Page( parent=mock.Mock(), @@ -1712,6 +1811,7 @@ def test_table_data_listpage_to_dataframe_skips_stop_iteration(module_under_test assert isinstance(dataframe, pandas.DataFrame) +@pytest.mark.skipif(isinstance(pyarrow, mock.Mock), reason="Requires `pyarrow`") def test_bq_to_arrow_field_type_override(module_under_test): # When loading pandas data, we may need to override the type # decision based on data contents, because GEOGRAPHY data can be @@ -1744,6 +1844,7 @@ def test_bq_to_arrow_field_type_override(module_under_test): ), ], ) +@pytest.mark.skipif(isinstance(pyarrow, mock.Mock), reason="Requires `pyarrow`") def test_bq_to_arrow_field_metadata(module_under_test, field_type, metadata): assert ( module_under_test.bq_to_arrow_field( diff --git a/packages/google-cloud-bigquery/tests/unit/test_client.py b/packages/google-cloud-bigquery/tests/unit/test_client.py index f4552cda23f4..22f7286db3b6 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_client.py +++ b/packages/google-cloud-bigquery/tests/unit/test_client.py @@ -27,6 +27,7 @@ import warnings import mock +import packaging import requests import pytest import pkg_resources @@ -53,15 +54,25 @@ msg = "Error importing from opentelemetry, is the installed version compatible?" raise ImportError(msg) from exc +try: + import pyarrow +except (ImportError, AttributeError): # pragma: NO COVER + pyarrow = None + import google.api_core.exceptions from google.api_core import client_info import google.cloud._helpers from google.cloud import bigquery -from google.cloud import bigquery_storage + from google.cloud.bigquery.dataset import DatasetReference from google.cloud.bigquery.retry import DEFAULT_TIMEOUT from google.cloud.bigquery import ParquetOptions +try: + from google.cloud import bigquery_storage +except (ImportError, AttributeError): # pragma: NO COVER + bigquery_storage = None +from test_utils.imports import maybe_fail_import from tests.unit.helpers import make_connection PANDAS_MINIUM_VERSION = pkg_resources.parse_version("1.0.0") @@ -614,6 +625,9 @@ def test_get_dataset(self): self.assertEqual(dataset.dataset_id, self.DS_ID) + @unittest.skipIf( + bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" + ) def test_ensure_bqstorage_client_creating_new_instance(self): mock_client = mock.create_autospec(bigquery_storage.BigQueryReadClient) mock_client_instance = object() @@ -636,6 +650,55 @@ def test_ensure_bqstorage_client_creating_new_instance(self): client_info=mock.sentinel.client_info, ) + def test_ensure_bqstorage_client_missing_dependency(self): + creds = _make_credentials() + client = self._make_one(project=self.PROJECT, credentials=creds) + + def fail_bqstorage_import(name, globals, locals, fromlist, level): + # NOTE: *very* simplified, assuming a straightforward absolute import + return "bigquery_storage" in name or ( + fromlist is not None and "bigquery_storage" in fromlist + ) + + no_bqstorage = maybe_fail_import(predicate=fail_bqstorage_import) + + with no_bqstorage, warnings.catch_warnings(record=True) as warned: + bqstorage_client = client._ensure_bqstorage_client() + + self.assertIsNone(bqstorage_client) + matching_warnings = [ + warning + for warning in warned + if "not installed" in str(warning) + and "google-cloud-bigquery-storage" in str(warning) + ] + assert matching_warnings, "Missing dependency warning not raised." + + @unittest.skipIf( + bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" + ) + def test_ensure_bqstorage_client_obsolete_dependency(self): + from google.cloud.bigquery.exceptions import LegacyBigQueryStorageError + + creds = _make_credentials() + client = self._make_one(project=self.PROJECT, credentials=creds) + + patcher = mock.patch( + "google.cloud.bigquery.client.BQ_STORAGE_VERSIONS.verify_version", + side_effect=LegacyBigQueryStorageError("BQ Storage too old"), + ) + with patcher, warnings.catch_warnings(record=True) as warned: + bqstorage_client = client._ensure_bqstorage_client() + + self.assertIsNone(bqstorage_client) + matching_warnings = [ + warning for warning in warned if "BQ Storage too old" in str(warning) + ] + assert matching_warnings, "Obsolete dependency warning not raised." + + @unittest.skipIf( + bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" + ) def test_ensure_bqstorage_client_existing_client_check_passes(self): creds = _make_credentials() client = self._make_one(project=self.PROJECT, credentials=creds) @@ -647,6 +710,29 @@ def test_ensure_bqstorage_client_existing_client_check_passes(self): self.assertIs(bqstorage_client, mock_storage_client) + @unittest.skipIf( + bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" + ) + def test_ensure_bqstorage_client_existing_client_check_fails(self): + from google.cloud.bigquery.exceptions import LegacyBigQueryStorageError + + creds = _make_credentials() + client = self._make_one(project=self.PROJECT, credentials=creds) + mock_storage_client = mock.sentinel.mock_storage_client + + patcher = mock.patch( + "google.cloud.bigquery.client.BQ_STORAGE_VERSIONS.verify_version", + side_effect=LegacyBigQueryStorageError("BQ Storage too old"), + ) + with patcher, warnings.catch_warnings(record=True) as warned: + bqstorage_client = client._ensure_bqstorage_client(mock_storage_client) + + self.assertIsNone(bqstorage_client) + matching_warnings = [ + warning for warning in warned if "BQ Storage too old" in str(warning) + ] + assert matching_warnings, "Obsolete dependency warning not raised." + def test_create_routine_w_minimal_resource(self): from google.cloud.bigquery.routine import Routine from google.cloud.bigquery.routine import RoutineReference @@ -6833,6 +6919,7 @@ def test_load_table_from_file_w_invalid_job_config(self): assert "Expected an instance of LoadJobConfig" in err_msg @unittest.skipIf(pandas is None, "Requires `pandas`") + @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_load_table_from_dataframe(self): from google.cloud.bigquery.client import _DEFAULT_NUM_RETRIES from google.cloud.bigquery import job @@ -6928,6 +7015,7 @@ def test_load_table_from_dataframe(self): assert "description" not in field @unittest.skipIf(pandas is None, "Requires `pandas`") + @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_load_table_from_dataframe_w_client_location(self): from google.cloud.bigquery.client import _DEFAULT_NUM_RETRIES from google.cloud.bigquery import job @@ -6972,6 +7060,7 @@ def test_load_table_from_dataframe_w_client_location(self): assert sent_config.source_format == job.SourceFormat.PARQUET @unittest.skipIf(pandas is None, "Requires `pandas`") + @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_load_table_from_dataframe_w_custom_job_config_wihtout_source_format(self): from google.cloud.bigquery.client import _DEFAULT_NUM_RETRIES from google.cloud.bigquery import job @@ -7026,6 +7115,7 @@ def test_load_table_from_dataframe_w_custom_job_config_wihtout_source_format(sel assert job_config.to_api_repr() == original_config_copy.to_api_repr() @unittest.skipIf(pandas is None, "Requires `pandas`") + @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_load_table_from_dataframe_w_custom_job_config_w_source_format(self): from google.cloud.bigquery.client import _DEFAULT_NUM_RETRIES from google.cloud.bigquery import job @@ -7081,6 +7171,7 @@ def test_load_table_from_dataframe_w_custom_job_config_w_source_format(self): assert job_config.to_api_repr() == original_config_copy.to_api_repr() @unittest.skipIf(pandas is None, "Requires `pandas`") + @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_load_table_from_dataframe_w_parquet_options_none(self): from google.cloud.bigquery.client import _DEFAULT_NUM_RETRIES from google.cloud.bigquery import job @@ -7132,6 +7223,7 @@ def test_load_table_from_dataframe_w_parquet_options_none(self): assert sent_config.parquet_options.enable_list_inference is True @unittest.skipIf(pandas is None, "Requires `pandas`") + @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_load_table_from_dataframe_w_list_inference_none(self): from google.cloud.bigquery.client import _DEFAULT_NUM_RETRIES from google.cloud.bigquery import job @@ -7191,6 +7283,7 @@ def test_load_table_from_dataframe_w_list_inference_none(self): assert job_config.to_api_repr() == original_config_copy.to_api_repr() @unittest.skipIf(pandas is None, "Requires `pandas`") + @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_load_table_from_dataframe_w_list_inference_false(self): from google.cloud.bigquery.client import _DEFAULT_NUM_RETRIES from google.cloud.bigquery import job @@ -7251,6 +7344,7 @@ def test_load_table_from_dataframe_w_list_inference_false(self): assert job_config.to_api_repr() == original_config_copy.to_api_repr() @unittest.skipIf(pandas is None, "Requires `pandas`") + @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_load_table_from_dataframe_w_custom_job_config_w_wrong_source_format(self): from google.cloud.bigquery import job @@ -7270,6 +7364,7 @@ def test_load_table_from_dataframe_w_custom_job_config_w_wrong_source_format(sel assert "Got unexpected source_format:" in str(exc.value) @unittest.skipIf(pandas is None, "Requires `pandas`") + @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_load_table_from_dataframe_w_automatic_schema(self): from google.cloud.bigquery.client import _DEFAULT_NUM_RETRIES from google.cloud.bigquery import job @@ -7370,6 +7465,7 @@ def test_load_table_from_dataframe_w_automatic_schema(self): ) @unittest.skipIf(pandas is None, "Requires `pandas`") + @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_load_table_from_dataframe_w_automatic_schema_detection_fails(self): from google.cloud.bigquery.client import _DEFAULT_NUM_RETRIES from google.cloud.bigquery import job @@ -7429,6 +7525,7 @@ def test_load_table_from_dataframe_w_automatic_schema_detection_fails(self): assert sent_config.schema is None @unittest.skipIf(pandas is None, "Requires `pandas`") + @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_load_table_from_dataframe_w_index_and_auto_schema(self): from google.cloud.bigquery.client import _DEFAULT_NUM_RETRIES from google.cloud.bigquery import job @@ -7490,6 +7587,7 @@ def test_load_table_from_dataframe_w_index_and_auto_schema(self): assert sent_schema == expected_sent_schema @unittest.skipIf(pandas is None, "Requires `pandas`") + @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_load_table_from_dataframe_unknown_table(self): from google.cloud.bigquery.client import _DEFAULT_NUM_RETRIES @@ -7528,6 +7626,7 @@ def test_load_table_from_dataframe_unknown_table(self): pandas is None or PANDAS_INSTALLED_VERSION < PANDAS_MINIUM_VERSION, "Only `pandas version >=1.0.0` supported", ) + @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_load_table_from_dataframe_w_nullable_int64_datatype(self): from google.cloud.bigquery.client import _DEFAULT_NUM_RETRIES from google.cloud.bigquery import job @@ -7575,6 +7674,7 @@ def test_load_table_from_dataframe_w_nullable_int64_datatype(self): pandas is None or PANDAS_INSTALLED_VERSION < PANDAS_MINIUM_VERSION, "Only `pandas version >=1.0.0` supported", ) + # @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_load_table_from_dataframe_w_nullable_int64_datatype_automatic_schema(self): from google.cloud.bigquery.client import _DEFAULT_NUM_RETRIES from google.cloud.bigquery import job @@ -7619,6 +7719,7 @@ def test_load_table_from_dataframe_w_nullable_int64_datatype_automatic_schema(se ) @unittest.skipIf(pandas is None, "Requires `pandas`") + @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_load_table_from_dataframe_struct_fields(self): from google.cloud.bigquery.client import _DEFAULT_NUM_RETRIES from google.cloud.bigquery import job @@ -7678,6 +7779,7 @@ def test_load_table_from_dataframe_struct_fields(self): assert sent_config.schema == schema @unittest.skipIf(pandas is None, "Requires `pandas`") + @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_load_table_from_dataframe_array_fields(self): """Test that a DataFrame with array columns can be uploaded correctly. @@ -7742,6 +7844,7 @@ def test_load_table_from_dataframe_array_fields(self): assert sent_config.schema == schema @unittest.skipIf(pandas is None, "Requires `pandas`") + @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_load_table_from_dataframe_array_fields_w_auto_schema(self): """Test that a DataFrame with array columns can be uploaded correctly. @@ -7804,6 +7907,7 @@ def test_load_table_from_dataframe_array_fields_w_auto_schema(self): assert sent_config.schema == expected_schema @unittest.skipIf(pandas is None, "Requires `pandas`") + @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_load_table_from_dataframe_w_partial_schema(self): from google.cloud.bigquery.client import _DEFAULT_NUM_RETRIES from google.cloud.bigquery import job @@ -7887,6 +7991,7 @@ def test_load_table_from_dataframe_w_partial_schema(self): ) @unittest.skipIf(pandas is None, "Requires `pandas`") + @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_load_table_from_dataframe_w_partial_schema_extra_types(self): from google.cloud.bigquery import job from google.cloud.bigquery.schema import SchemaField @@ -7923,6 +8028,7 @@ def test_load_table_from_dataframe_w_partial_schema_extra_types(self): assert "unknown_col" in message @unittest.skipIf(pandas is None, "Requires `pandas`") + @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_load_table_from_dataframe_w_schema_arrow_custom_compression(self): from google.cloud.bigquery import job from google.cloud.bigquery.schema import SchemaField @@ -7955,6 +8061,74 @@ def test_load_table_from_dataframe_w_schema_arrow_custom_compression(self): assert call_args.kwargs.get("parquet_compression") == "LZ4" @unittest.skipIf(pandas is None, "Requires `pandas`") + @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") + def test_load_table_from_dataframe_wo_pyarrow_raises_error(self): + client = self._make_client() + records = [{"id": 1, "age": 100}, {"id": 2, "age": 60}] + dataframe = pandas.DataFrame(records) + + get_table_patch = mock.patch( + "google.cloud.bigquery.client.Client.get_table", + autospec=True, + side_effect=google.api_core.exceptions.NotFound("Table not found"), + ) + load_patch = mock.patch( + "google.cloud.bigquery.client.Client.load_table_from_file", autospec=True + ) + pyarrow_patch = mock.patch("google.cloud.bigquery.client.pyarrow", None) + to_parquet_patch = mock.patch.object( + dataframe, "to_parquet", wraps=dataframe.to_parquet + ) + + with load_patch, get_table_patch, pyarrow_patch, to_parquet_patch: + with pytest.raises(ValueError): + client.load_table_from_dataframe( + dataframe, + self.TABLE_REF, + location=self.LOCATION, + parquet_compression="gzip", + ) + + def test_load_table_from_dataframe_w_bad_pyarrow_issues_warning(self): + pytest.importorskip("pandas", reason="Requires `pandas`") + pytest.importorskip("pyarrow", reason="Requires `pyarrow`") + + client = self._make_client() + records = [{"id": 1, "age": 100}, {"id": 2, "age": 60}] + dataframe = pandas.DataFrame(records) + + pyarrow_version_patch = mock.patch( + "google.cloud.bigquery.client._PYARROW_VERSION", + packaging.version.parse("2.0.0"), # A known bad version of pyarrow. + ) + get_table_patch = mock.patch( + "google.cloud.bigquery.client.Client.get_table", + autospec=True, + side_effect=google.api_core.exceptions.NotFound("Table not found"), + ) + load_patch = mock.patch( + "google.cloud.bigquery.client.Client.load_table_from_file", autospec=True + ) + + with load_patch, get_table_patch, pyarrow_version_patch: + with warnings.catch_warnings(record=True) as warned: + client.load_table_from_dataframe( + dataframe, + self.TABLE_REF, + location=self.LOCATION, + ) + + expected_warnings = [ + warning for warning in warned if "pyarrow" in str(warning).lower() + ] + assert len(expected_warnings) == 1 + assert issubclass(expected_warnings[0].category, RuntimeWarning) + msg = str(expected_warnings[0].message) + assert "pyarrow 2.0.0" in msg + assert "data corruption" in msg + + @unittest.skipIf(pandas is None, "Requires `pandas`") + @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_load_table_from_dataframe_w_nulls(self): """Test that a DataFrame with null columns can be uploaded if a BigQuery schema is specified. diff --git a/packages/google-cloud-bigquery/tests/unit/test_dbapi__helpers.py b/packages/google-cloud-bigquery/tests/unit/test_dbapi__helpers.py index 7cc1f11c3a5c..fae0c17e9202 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_dbapi__helpers.py +++ b/packages/google-cloud-bigquery/tests/unit/test_dbapi__helpers.py @@ -21,6 +21,11 @@ import pytest +try: + import pyarrow +except ImportError: # pragma: NO COVER + pyarrow = None + import google.cloud._helpers from google.cloud.bigquery import query, table from google.cloud.bigquery.dbapi import _helpers @@ -210,6 +215,7 @@ def test_empty_iterable(self): result = _helpers.to_bq_table_rows(rows_iterable) self.assertEqual(list(result), []) + @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_non_empty_iterable(self): rows_iterable = [ dict( diff --git a/packages/google-cloud-bigquery/tests/unit/test_dbapi_connection.py b/packages/google-cloud-bigquery/tests/unit/test_dbapi_connection.py index e96ab55d792c..67777f923868 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_dbapi_connection.py +++ b/packages/google-cloud-bigquery/tests/unit/test_dbapi_connection.py @@ -17,7 +17,10 @@ import mock -from google.cloud import bigquery_storage +try: + from google.cloud import bigquery_storage +except ImportError: # pragma: NO COVER + bigquery_storage = None class TestConnection(unittest.TestCase): @@ -37,6 +40,8 @@ def _mock_client(self): return mock_client def _mock_bqstorage_client(self): + # Assumption: bigquery_storage exists. It's the test's responisbility to + # not use this helper or skip itself if bqstorage is not installed. mock_client = mock.create_autospec(bigquery_storage.BigQueryReadClient) mock_client._transport = mock.Mock(spec=["channel"]) mock_client._transport.grpc_channel = mock.Mock(spec=["close"]) @@ -53,6 +58,9 @@ def test_ctor_wo_bqstorage_client(self): self.assertIs(connection._client, mock_client) self.assertIs(connection._bqstorage_client, None) + @unittest.skipIf( + bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" + ) def test_ctor_w_bqstorage_client(self): from google.cloud.bigquery.dbapi import Connection @@ -82,6 +90,9 @@ def test_connect_wo_client(self, mock_client): self.assertIsNotNone(connection._client) self.assertIsNotNone(connection._bqstorage_client) + @unittest.skipIf( + bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" + ) def test_connect_w_client(self): from google.cloud.bigquery.dbapi import connect from google.cloud.bigquery.dbapi import Connection @@ -97,6 +108,9 @@ def test_connect_w_client(self): self.assertIs(connection._client, mock_client) self.assertIs(connection._bqstorage_client, mock_bqstorage_client) + @unittest.skipIf( + bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" + ) def test_connect_w_both_clients(self): from google.cloud.bigquery.dbapi import connect from google.cloud.bigquery.dbapi import Connection @@ -130,6 +144,9 @@ def test_raises_error_if_closed(self): ): getattr(connection, method)() + @unittest.skipIf( + bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" + ) def test_close_closes_all_created_bigquery_clients(self): client = self._mock_client() bqstorage_client = self._mock_bqstorage_client() @@ -152,6 +169,9 @@ def test_close_closes_all_created_bigquery_clients(self): self.assertTrue(client.close.called) self.assertTrue(bqstorage_client._transport.grpc_channel.close.called) + @unittest.skipIf( + bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" + ) def test_close_does_not_close_bigquery_clients_passed_to_it(self): client = self._mock_client() bqstorage_client = self._mock_bqstorage_client() diff --git a/packages/google-cloud-bigquery/tests/unit/test_dbapi_cursor.py b/packages/google-cloud-bigquery/tests/unit/test_dbapi_cursor.py index d672c0f6cc02..b550bbce0efe 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_dbapi_cursor.py +++ b/packages/google-cloud-bigquery/tests/unit/test_dbapi_cursor.py @@ -18,8 +18,17 @@ import pytest +try: + import pyarrow +except ImportError: # pragma: NO COVER + pyarrow = None + from google.api_core import exceptions -from google.cloud import bigquery_storage + +try: + from google.cloud import bigquery_storage +except ImportError: # pragma: NO COVER + bigquery_storage = None from tests.unit.helpers import _to_pyarrow @@ -269,6 +278,10 @@ def test_fetchall_w_row(self): self.assertEqual(len(rows), 1) self.assertEqual(rows[0], (1,)) + @unittest.skipIf( + bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" + ) + @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_fetchall_w_bqstorage_client_fetch_success(self): from google.cloud.bigquery import dbapi from google.cloud.bigquery import table @@ -322,6 +335,9 @@ def test_fetchall_w_bqstorage_client_fetch_success(self): self.assertEqual(sorted_row_data, expected_row_data) + @unittest.skipIf( + bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" + ) def test_fetchall_w_bqstorage_client_fetch_no_rows(self): from google.cloud.bigquery import dbapi @@ -344,6 +360,9 @@ def test_fetchall_w_bqstorage_client_fetch_no_rows(self): # check the data returned self.assertEqual(rows, []) + @unittest.skipIf( + bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" + ) def test_fetchall_w_bqstorage_client_fetch_error_no_fallback(self): from google.cloud.bigquery import dbapi from google.cloud.bigquery import table @@ -375,6 +394,10 @@ def fake_ensure_bqstorage_client(bqstorage_client=None, **kwargs): # the default client was not used mock_client.list_rows.assert_not_called() + @unittest.skipIf( + bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" + ) + @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_fetchall_w_bqstorage_client_no_arrow_compression(self): from google.cloud.bigquery import dbapi from google.cloud.bigquery import table diff --git a/packages/google-cloud-bigquery/tests/unit/test_magics.py b/packages/google-cloud-bigquery/tests/unit/test_magics.py index fdfb16d1684b..c0aa5d85e448 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_magics.py +++ b/packages/google-cloud-bigquery/tests/unit/test_magics.py @@ -76,6 +76,19 @@ def ipython_ns_cleanup(): del ip.user_ns[name] +@pytest.fixture(scope="session") +def missing_bq_storage(): + """Provide a patcher that can make the bigquery storage import to fail.""" + + def fail_if(name, globals, locals, fromlist, level): + # NOTE: *very* simplified, assuming a straightforward absolute import + return "bigquery_storage" in name or ( + fromlist is not None and "bigquery_storage" in fromlist + ) + + return maybe_fail_import(predicate=fail_if) + + @pytest.fixture(scope="session") def missing_grpcio_lib(): """Provide a patcher that can make the gapic library import to fail.""" @@ -310,6 +323,9 @@ def test__make_bqstorage_client_false(): assert got is None +@pytest.mark.skipif( + bigquery_storage is None, reason="Requires `google-cloud-bigquery-storage`" +) def test__make_bqstorage_client_true(): credentials_mock = mock.create_autospec( google.auth.credentials.Credentials, instance=True @@ -321,6 +337,53 @@ def test__make_bqstorage_client_true(): assert isinstance(got, bigquery_storage.BigQueryReadClient) +def test__make_bqstorage_client_true_raises_import_error(missing_bq_storage): + credentials_mock = mock.create_autospec( + google.auth.credentials.Credentials, instance=True + ) + test_client = bigquery.Client( + project="test_project", credentials=credentials_mock, location="test_location" + ) + + with pytest.raises(ImportError) as exc_context, missing_bq_storage: + magics._make_bqstorage_client(test_client, True, {}) + + error_msg = str(exc_context.value) + assert "google-cloud-bigquery-storage" in error_msg + assert "pyarrow" in error_msg + + +@pytest.mark.skipif( + bigquery_storage is None, reason="Requires `google-cloud-bigquery-storage`" +) +def test__make_bqstorage_client_true_obsolete_dependency(): + from google.cloud.bigquery.exceptions import LegacyBigQueryStorageError + + credentials_mock = mock.create_autospec( + google.auth.credentials.Credentials, instance=True + ) + test_client = bigquery.Client( + project="test_project", credentials=credentials_mock, location="test_location" + ) + + patcher = mock.patch( + "google.cloud.bigquery.client.BQ_STORAGE_VERSIONS.verify_version", + side_effect=LegacyBigQueryStorageError("BQ Storage too old"), + ) + with patcher, warnings.catch_warnings(record=True) as warned: + got = magics._make_bqstorage_client(test_client, True, {}) + + assert got is None + + matching_warnings = [ + warning for warning in warned if "BQ Storage too old" in str(warning) + ] + assert matching_warnings, "Obsolete dependency warning not raised." + + +@pytest.mark.skipif( + bigquery_storage is None, reason="Requires `google-cloud-bigquery-storage`" +) @pytest.mark.skipif(pandas is None, reason="Requires `pandas`") def test__make_bqstorage_client_true_missing_gapic(missing_grpcio_lib): credentials_mock = mock.create_autospec( @@ -376,6 +439,9 @@ def test_extension_load(): @pytest.mark.usefixtures("ipython_interactive") @pytest.mark.skipif(pandas is None, reason="Requires `pandas`") +@pytest.mark.skipif( + bigquery_storage is None, reason="Requires `google-cloud-bigquery-storage`" +) def test_bigquery_magic_without_optional_arguments(monkeypatch): ip = IPython.get_ipython() ip.extension_manager.load_extension("google.cloud.bigquery") @@ -538,9 +604,10 @@ def test_bigquery_magic_clears_display_in_non_verbose_mode(): @pytest.mark.usefixtures("ipython_interactive") +@pytest.mark.skipif( + bigquery_storage is None, reason="Requires `google-cloud-bigquery-storage`" +) def test_bigquery_magic_with_bqstorage_from_argument(monkeypatch): - pandas = pytest.importorskip("pandas") - ip = IPython.get_ipython() ip.extension_manager.load_extension("google.cloud.bigquery") mock_credentials = mock.create_autospec( @@ -603,6 +670,9 @@ def warning_match(warning): @pytest.mark.usefixtures("ipython_interactive") +@pytest.mark.skipif( + bigquery_storage is None, reason="Requires `google-cloud-bigquery-storage`" +) def test_bigquery_magic_with_rest_client_requested(monkeypatch): pandas = pytest.importorskip("pandas") @@ -830,6 +900,9 @@ def test_bigquery_magic_w_table_id_and_destination_var(ipython_ns_cleanup): @pytest.mark.usefixtures("ipython_interactive") +@pytest.mark.skipif( + bigquery_storage is None, reason="Requires `google-cloud-bigquery-storage`" +) @pytest.mark.skipif(pandas is None, reason="Requires `pandas`") def test_bigquery_magic_w_table_id_and_bqstorage_client(): ip = IPython.get_ipython() diff --git a/packages/google-cloud-bigquery/tests/unit/test_table.py b/packages/google-cloud-bigquery/tests/unit/test_table.py index f542c7523ff7..a79b988810b3 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_table.py +++ b/packages/google-cloud-bigquery/tests/unit/test_table.py @@ -22,18 +22,33 @@ import warnings import mock -import pyarrow -import pyarrow.types +import pkg_resources import pytest import google.api_core.exceptions +from test_utils.imports import maybe_fail_import from google.cloud.bigquery.table import TableReference +from google.cloud.bigquery.dataset import DatasetReference -from google.cloud import bigquery_storage -from google.cloud.bigquery_storage_v1.services.big_query_read.transports import ( - grpc as big_query_read_grpc_transport, -) +try: + from google.cloud import bigquery_storage + from google.cloud.bigquery_storage_v1.services.big_query_read.transports import ( + grpc as big_query_read_grpc_transport, + ) +except ImportError: # pragma: NO COVER + bigquery_storage = None + big_query_read_grpc_transport = None + +from google.cloud.bigquery import _helpers + +pyarrow = _helpers.PYARROW_VERSIONS.try_import() +PYARROW_VERSION = pkg_resources.parse_version("0.0.1") + +if pyarrow: + import pyarrow.types + + PYARROW_VERSION = pkg_resources.parse_version(pyarrow.__version__) try: import pandas @@ -52,7 +67,7 @@ except (ImportError, AttributeError): # pragma: NO COVER tqdm = None -from google.cloud.bigquery.dataset import DatasetReference +PYARROW_TIMESTAMP_VERSION = pkg_resources.parse_version("2.0.0") def _mock_client(): @@ -1902,12 +1917,20 @@ def test_total_rows_eq_zero(self): row_iterator = self._make_one() self.assertEqual(row_iterator.total_rows, 0) + @mock.patch("google.cloud.bigquery.table.pyarrow", new=None) + def test_to_arrow_error_if_pyarrow_is_none(self): + row_iterator = self._make_one() + with self.assertRaises(ValueError): + row_iterator.to_arrow() + + @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_to_arrow(self): row_iterator = self._make_one() tbl = row_iterator.to_arrow() self.assertIsInstance(tbl, pyarrow.Table) self.assertEqual(tbl.num_rows, 0) + @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_to_arrow_iterable(self): row_iterator = self._make_one() arrow_iter = row_iterator.to_arrow_iterable() @@ -2192,6 +2215,49 @@ def test__validate_bqstorage_returns_false_if_max_results_set(self): ) self.assertFalse(result) + def test__validate_bqstorage_returns_false_if_missing_dependency(self): + iterator = self._make_one(first_page_response=None) # not cached + + def fail_bqstorage_import(name, globals, locals, fromlist, level): + # NOTE: *very* simplified, assuming a straightforward absolute import + return "bigquery_storage" in name or ( + fromlist is not None and "bigquery_storage" in fromlist + ) + + no_bqstorage = maybe_fail_import(predicate=fail_bqstorage_import) + + with no_bqstorage: + result = iterator._validate_bqstorage( + bqstorage_client=None, create_bqstorage_client=True + ) + + self.assertFalse(result) + + @unittest.skipIf( + bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" + ) + def test__validate_bqstorage_returns_false_w_warning_if_obsolete_version(self): + from google.cloud.bigquery.exceptions import LegacyBigQueryStorageError + + iterator = self._make_one(first_page_response=None) # not cached + + patcher = mock.patch( + "google.cloud.bigquery.table._helpers.BQ_STORAGE_VERSIONS.verify_version", + side_effect=LegacyBigQueryStorageError("BQ Storage too old"), + ) + with patcher, warnings.catch_warnings(record=True) as warned: + result = iterator._validate_bqstorage( + bqstorage_client=None, create_bqstorage_client=True + ) + + self.assertFalse(result) + + matching_warnings = [ + warning for warning in warned if "BQ Storage too old" in str(warning) + ] + assert matching_warnings, "Obsolete dependency warning not raised." + + @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_to_arrow_iterable(self): from google.cloud.bigquery.schema import SchemaField @@ -2292,6 +2358,10 @@ def test_to_arrow_iterable(self): [[{"name": "Bepples Phlyntstone", "age": 0}, {"name": "Dino", "age": 4}]], ) + @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") + @unittest.skipIf( + bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" + ) def test_to_arrow_iterable_w_bqstorage(self): from google.cloud.bigquery import schema from google.cloud.bigquery import table as mut @@ -2367,6 +2437,7 @@ def test_to_arrow_iterable_w_bqstorage(self): # Don't close the client if it was passed in. bqstorage_client._transport.grpc_channel.close.assert_not_called() + @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_to_arrow(self): from google.cloud.bigquery.schema import SchemaField @@ -2448,6 +2519,7 @@ def test_to_arrow(self): ], ) + @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_to_arrow_w_nulls(self): from google.cloud.bigquery.schema import SchemaField @@ -2480,6 +2552,7 @@ def test_to_arrow_w_nulls(self): self.assertEqual(names, ["Donkey", "Diddy", "Dixie", None]) self.assertEqual(ages, [32, 29, None, 111]) + @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_to_arrow_w_unknown_type(self): from google.cloud.bigquery.schema import SchemaField @@ -2522,6 +2595,7 @@ def test_to_arrow_w_unknown_type(self): warning = warned[0] self.assertTrue("sport" in str(warning)) + @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_to_arrow_w_empty_table(self): from google.cloud.bigquery.schema import SchemaField @@ -2560,6 +2634,10 @@ def test_to_arrow_w_empty_table(self): self.assertEqual(child_field.type.value_type[0].name, "name") self.assertEqual(child_field.type.value_type[1].name, "age") + @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") + @unittest.skipIf( + bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" + ) def test_to_arrow_max_results_w_explicit_bqstorage_client_warning(self): from google.cloud.bigquery.schema import SchemaField @@ -2600,6 +2678,10 @@ def test_to_arrow_max_results_w_explicit_bqstorage_client_warning(self): ) mock_client._ensure_bqstorage_client.assert_not_called() + @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") + @unittest.skipIf( + bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" + ) def test_to_arrow_max_results_w_create_bqstorage_client_no_warning(self): from google.cloud.bigquery.schema import SchemaField @@ -2636,6 +2718,10 @@ def test_to_arrow_max_results_w_create_bqstorage_client_no_warning(self): self.assertFalse(matches) mock_client._ensure_bqstorage_client.assert_not_called() + @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") + @unittest.skipIf( + bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" + ) def test_to_arrow_w_bqstorage(self): from google.cloud.bigquery import schema from google.cloud.bigquery import table as mut @@ -2713,6 +2799,10 @@ def test_to_arrow_w_bqstorage(self): # Don't close the client if it was passed in. bqstorage_client._transport.grpc_channel.close.assert_not_called() + @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") + @unittest.skipIf( + bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" + ) def test_to_arrow_w_bqstorage_creates_client(self): from google.cloud.bigquery import schema from google.cloud.bigquery import table as mut @@ -2740,6 +2830,7 @@ def test_to_arrow_w_bqstorage_creates_client(self): mock_client._ensure_bqstorage_client.assert_called_once() bqstorage_client._transport.grpc_channel.close.assert_called_once() + @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_to_arrow_ensure_bqstorage_client_wo_bqstorage(self): from google.cloud.bigquery.schema import SchemaField @@ -2755,17 +2846,25 @@ def test_to_arrow_ensure_bqstorage_client_wo_bqstorage(self): api_request = mock.Mock(return_value={"rows": rows}) mock_client = _mock_client() - mock_client._ensure_bqstorage_client.return_value = None row_iterator = self._make_one(mock_client, api_request, path, schema) - tbl = row_iterator.to_arrow(create_bqstorage_client=True) + def mock_verify_version(): + raise _helpers.LegacyBigQueryStorageError("no bqstorage") - # The client attempted to create a BQ Storage client, and even though - # that was not possible, results were still returned without errors. - mock_client._ensure_bqstorage_client.assert_called_once() + with mock.patch( + "google.cloud.bigquery._helpers.BQ_STORAGE_VERSIONS.verify_version", + mock_verify_version, + ): + tbl = row_iterator.to_arrow(create_bqstorage_client=True) + + mock_client._ensure_bqstorage_client.assert_not_called() self.assertIsInstance(tbl, pyarrow.Table) self.assertEqual(tbl.num_rows, 2) + @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") + @unittest.skipIf( + bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" + ) def test_to_arrow_w_bqstorage_no_streams(self): from google.cloud.bigquery import schema from google.cloud.bigquery import table as mut @@ -2802,6 +2901,7 @@ def test_to_arrow_w_bqstorage_no_streams(self): self.assertEqual(actual_table.schema[1].name, "colC") self.assertEqual(actual_table.schema[2].name, "colB") + @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") @unittest.skipIf(tqdm is None, "Requires `tqdm`") @mock.patch("tqdm.tqdm_gui") @mock.patch("tqdm.notebook.tqdm") @@ -2936,6 +3036,10 @@ def test_to_dataframe_iterable_with_dtypes(self): self.assertEqual(df_2["age"][0], 33) @unittest.skipIf(pandas is None, "Requires `pandas`") + @unittest.skipIf( + bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" + ) + @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_to_dataframe_iterable_w_bqstorage(self): from google.cloud.bigquery import schema from google.cloud.bigquery import table as mut @@ -3100,6 +3204,7 @@ def test_to_dataframe(self): self.assertEqual(df.age.dtype.name, "Int64") @unittest.skipIf(pandas is None, "Requires `pandas`") + @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_to_dataframe_timestamp_out_of_pyarrow_bounds(self): from google.cloud.bigquery.schema import SchemaField @@ -3127,6 +3232,7 @@ def test_to_dataframe_timestamp_out_of_pyarrow_bounds(self): ) @unittest.skipIf(pandas is None, "Requires `pandas`") + @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_to_dataframe_datetime_out_of_pyarrow_bounds(self): from google.cloud.bigquery.schema import SchemaField @@ -3575,6 +3681,9 @@ def test_to_dataframe_max_results_w_create_bqstorage_client_no_warning(self): mock_client._ensure_bqstorage_client.assert_not_called() @unittest.skipIf(pandas is None, "Requires `pandas`") + @unittest.skipIf( + bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" + ) def test_to_dataframe_w_bqstorage_creates_client(self): from google.cloud.bigquery import schema from google.cloud.bigquery import table as mut @@ -3603,6 +3712,9 @@ def test_to_dataframe_w_bqstorage_creates_client(self): bqstorage_client._transport.grpc_channel.close.assert_called_once() @unittest.skipIf(pandas is None, "Requires `pandas`") + @unittest.skipIf( + bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" + ) def test_to_dataframe_w_bqstorage_no_streams(self): from google.cloud.bigquery import schema from google.cloud.bigquery import table as mut @@ -3628,7 +3740,11 @@ def test_to_dataframe_w_bqstorage_no_streams(self): self.assertEqual(list(got), column_names) self.assertTrue(got.empty) + @unittest.skipIf( + bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" + ) @unittest.skipIf(pandas is None, "Requires `pandas`") + @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_to_dataframe_w_bqstorage_logs_session(self): from google.cloud.bigquery.table import Table @@ -3650,6 +3766,10 @@ def test_to_dataframe_w_bqstorage_logs_session(self): ) @unittest.skipIf(pandas is None, "Requires `pandas`") + @unittest.skipIf( + bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" + ) + @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_to_dataframe_w_bqstorage_empty_streams(self): from google.cloud.bigquery import schema from google.cloud.bigquery import table as mut @@ -3701,6 +3821,10 @@ def test_to_dataframe_w_bqstorage_empty_streams(self): self.assertTrue(got.empty) @unittest.skipIf(pandas is None, "Requires `pandas`") + @unittest.skipIf( + bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" + ) + @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_to_dataframe_w_bqstorage_nonempty(self): from google.cloud.bigquery import schema from google.cloud.bigquery import table as mut @@ -3777,6 +3901,10 @@ def test_to_dataframe_w_bqstorage_nonempty(self): bqstorage_client._transport.grpc_channel.close.assert_not_called() @unittest.skipIf(pandas is None, "Requires `pandas`") + @unittest.skipIf( + bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" + ) + @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_to_dataframe_w_bqstorage_multiple_streams_return_unique_index(self): from google.cloud.bigquery import schema from google.cloud.bigquery import table as mut @@ -3827,6 +3955,10 @@ def test_to_dataframe_w_bqstorage_multiple_streams_return_unique_index(self): self.assertTrue(got.index.is_unique) @unittest.skipIf(pandas is None, "Requires `pandas`") + @unittest.skipIf( + bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" + ) + @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") @unittest.skipIf(tqdm is None, "Requires `tqdm`") @mock.patch("tqdm.tqdm") def test_to_dataframe_w_bqstorage_updates_progress_bar(self, tqdm_mock): @@ -3902,6 +4034,10 @@ def blocking_to_arrow(*args, **kwargs): tqdm_mock().close.assert_called_once() @unittest.skipIf(pandas is None, "Requires `pandas`") + @unittest.skipIf( + bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" + ) + @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_to_dataframe_w_bqstorage_exits_on_keyboardinterrupt(self): from google.cloud.bigquery import schema from google.cloud.bigquery import table as mut @@ -4018,6 +4154,9 @@ def test_to_dataframe_tabledata_list_w_multiple_pages_return_unique_index(self): self.assertTrue(df.index.is_unique) @unittest.skipIf(pandas is None, "Requires `pandas`") + @unittest.skipIf( + bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" + ) def test_to_dataframe_w_bqstorage_raises_auth_error(self): from google.cloud.bigquery import table as mut @@ -4036,6 +4175,9 @@ def test_to_dataframe_w_bqstorage_raises_auth_error(self): with pytest.raises(google.api_core.exceptions.Forbidden): row_iterator.to_dataframe(bqstorage_client=bqstorage_client) + @unittest.skipIf( + bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" + ) def test_to_dataframe_w_bqstorage_partition(self): from google.cloud.bigquery import schema from google.cloud.bigquery import table as mut @@ -4053,6 +4195,9 @@ def test_to_dataframe_w_bqstorage_partition(self): with pytest.raises(ValueError): row_iterator.to_dataframe(bqstorage_client) + @unittest.skipIf( + bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" + ) def test_to_dataframe_w_bqstorage_snapshot(self): from google.cloud.bigquery import schema from google.cloud.bigquery import table as mut @@ -4071,6 +4216,10 @@ def test_to_dataframe_w_bqstorage_snapshot(self): row_iterator.to_dataframe(bqstorage_client) @unittest.skipIf(pandas is None, "Requires `pandas`") + @unittest.skipIf( + bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" + ) + @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_to_dataframe_concat_categorical_dtype_w_pyarrow(self): from google.cloud.bigquery import schema from google.cloud.bigquery import table as mut @@ -4773,6 +4922,9 @@ def test_set_expiration_w_none(self): assert time_partitioning._properties["expirationMs"] is None +@pytest.mark.skipif( + bigquery_storage is None, reason="Requires `google-cloud-bigquery-storage`" +) @pytest.mark.parametrize( "table_path", ( diff --git a/packages/google-cloud-bigquery/tests/unit/test_table_pandas.py b/packages/google-cloud-bigquery/tests/unit/test_table_pandas.py index 943baa326cc8..5778467a5a34 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_table_pandas.py +++ b/packages/google-cloud-bigquery/tests/unit/test_table_pandas.py @@ -16,12 +16,12 @@ import decimal from unittest import mock -import pyarrow import pytest from google.cloud import bigquery pandas = pytest.importorskip("pandas") +pyarrow = pytest.importorskip("pyarrow", minversion="3.0.0") TEST_PATH = "/v1/project/test-proj/dataset/test-dset/table/test-tbl/data" From 1fa06a2f6602eed4442526c98c74b089f3f8545d Mon Sep 17 00:00:00 2001 From: "release-please[bot]" <55107282+release-please[bot]@users.noreply.github.com> Date: Mon, 12 Dec 2022 09:23:02 -0600 Subject: [PATCH 1530/2016] chore(main): release 3.4.1 (#1416) Co-authored-by: release-please[bot] <55107282+release-please[bot]@users.noreply.github.com> --- packages/google-cloud-bigquery/CHANGELOG.md | 12 ++++++++++++ .../google/cloud/bigquery/version.py | 2 +- 2 files changed, 13 insertions(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/CHANGELOG.md b/packages/google-cloud-bigquery/CHANGELOG.md index 294e5b42fffe..d6f0abc85a69 100644 --- a/packages/google-cloud-bigquery/CHANGELOG.md +++ b/packages/google-cloud-bigquery/CHANGELOG.md @@ -5,6 +5,18 @@ [1]: https://pypi.org/project/google-cloud-bigquery/#history +## [3.4.1](https://github.com/googleapis/python-bigquery/compare/v3.4.0...v3.4.1) (2022-12-09) + + +### Documentation + +* Add info about streaming quota limits to `insert_rows*` methods ([#1409](https://github.com/googleapis/python-bigquery/issues/1409)) ([0f08e9a](https://github.com/googleapis/python-bigquery/commit/0f08e9a8ff638e78006d71acd974de2dff89b5d9)) + + +### Dependencies + +* make pyarrow and BQ Storage optional dependencies ([e1aa921](https://github.com/googleapis/python-bigquery/commit/e1aa9218ad22f85c9a6cab8b61d013779376a582)) + ## [3.4.0](https://github.com/googleapis/python-bigquery/compare/v3.3.6...v3.4.0) (2022-11-17) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/version.py b/packages/google-cloud-bigquery/google/cloud/bigquery/version.py index 6b822f0c1d8e..71133df01629 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/version.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/version.py @@ -12,4 +12,4 @@ # See the License for the specific language governing permissions and # limitations under the License. -__version__ = "3.4.0" +__version__ = "3.4.1" From 0cf66d474c37a4029b20acd84d072bd9d30bebf9 Mon Sep 17 00:00:00 2001 From: Mario Torres Jr <105736410+Mattix23@users.noreply.github.com> Date: Tue, 13 Dec 2022 16:57:22 -0600 Subject: [PATCH 1531/2016] docs: created samples for load table and create table from schema file (#1436) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * docs: created samples for load table and create table from schema file * 🦉 Updates from OwlBot post-processor See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md * Apply suggestions from code review Co-authored-by: Tim Swast * Update samples/snippets/create_table_schema_from_json.py Co-authored-by: Tim Swast Co-authored-by: Owl Bot Co-authored-by: Tim Swast --- .../snippets/create_table_schema_from_json.py | 42 +++++++++++++ .../create_table_schema_from_json_test.py | 32 ++++++++++ .../samples/snippets/dataset_access_test.py | 2 +- .../samples/snippets/delete_job.py | 2 +- .../snippets/load_table_schema_from_json.py | 60 +++++++++++++++++++ .../load_table_schema_from_json_test.py | 32 ++++++++++ .../samples/snippets/materialized_view.py | 1 + .../samples/snippets/quickstart_test.py | 1 - .../samples/snippets/schema.json | 20 +++++++ .../samples/snippets/schema_us_states.json | 12 ++++ .../samples/snippets/user_credentials_test.py | 1 - 11 files changed, 201 insertions(+), 4 deletions(-) create mode 100644 packages/google-cloud-bigquery/samples/snippets/create_table_schema_from_json.py create mode 100644 packages/google-cloud-bigquery/samples/snippets/create_table_schema_from_json_test.py create mode 100644 packages/google-cloud-bigquery/samples/snippets/load_table_schema_from_json.py create mode 100644 packages/google-cloud-bigquery/samples/snippets/load_table_schema_from_json_test.py create mode 100644 packages/google-cloud-bigquery/samples/snippets/schema.json create mode 100644 packages/google-cloud-bigquery/samples/snippets/schema_us_states.json diff --git a/packages/google-cloud-bigquery/samples/snippets/create_table_schema_from_json.py b/packages/google-cloud-bigquery/samples/snippets/create_table_schema_from_json.py new file mode 100644 index 000000000000..b866e2ebe64b --- /dev/null +++ b/packages/google-cloud-bigquery/samples/snippets/create_table_schema_from_json.py @@ -0,0 +1,42 @@ +# Copyright 2022 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import pathlib + + +def create_table(table_id: str) -> None: + orig_table_id = table_id + current_directory = pathlib.Path(__file__).parent + orig_schema_path = str(current_directory / "schema.json") + # [START bigquery_schema_file_create] + from google.cloud import bigquery + + client = bigquery.Client() + + # TODO(dev): Change table_id to the full name of the table you want to create. + table_id = "your-project.your_dataset.your_table_name" + # TODO(dev): Change schema_path variable to the path of your schema file. + schema_path = "path/to/schema.json" + # [END bigquery_schema_file_create] + table_id = orig_table_id + schema_path = orig_schema_path + + # [START bigquery_schema_file_create] + # To load a schema file use the schema_from_json method. + schema = client.schema_from_json(schema_path) + + table = bigquery.Table(table_id, schema=schema) + table = client.create_table(table) # API request + print(f"Created table {table_id}.") + # [END bigquery_schema_file_create] diff --git a/packages/google-cloud-bigquery/samples/snippets/create_table_schema_from_json_test.py b/packages/google-cloud-bigquery/samples/snippets/create_table_schema_from_json_test.py new file mode 100644 index 000000000000..e99b92672c48 --- /dev/null +++ b/packages/google-cloud-bigquery/samples/snippets/create_table_schema_from_json_test.py @@ -0,0 +1,32 @@ +# Copyright 2022 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import typing + +import create_table_schema_from_json + +if typing.TYPE_CHECKING: + import pytest + + +def test_create_table( + capsys: "pytest.CaptureFixture[str]", + random_table_id: str, +) -> None: + + create_table_schema_from_json.create_table(random_table_id) + + out, _ = capsys.readouterr() + assert "Created" in out + assert random_table_id in out diff --git a/packages/google-cloud-bigquery/samples/snippets/dataset_access_test.py b/packages/google-cloud-bigquery/samples/snippets/dataset_access_test.py index 4d1a70eb1ede..cc6a9af61dde 100644 --- a/packages/google-cloud-bigquery/samples/snippets/dataset_access_test.py +++ b/packages/google-cloud-bigquery/samples/snippets/dataset_access_test.py @@ -18,8 +18,8 @@ import update_dataset_access if typing.TYPE_CHECKING: - import pytest from google.cloud import bigquery + import pytest def test_dataset_access_permissions( diff --git a/packages/google-cloud-bigquery/samples/snippets/delete_job.py b/packages/google-cloud-bigquery/samples/snippets/delete_job.py index 7c8640baf5fa..2aeb53849961 100644 --- a/packages/google-cloud-bigquery/samples/snippets/delete_job.py +++ b/packages/google-cloud-bigquery/samples/snippets/delete_job.py @@ -17,8 +17,8 @@ def delete_job_metadata(job_id: str, location: str) -> None: orig_job_id = job_id orig_location = location # [START bigquery_delete_job] - from google.cloud import bigquery from google.api_core import exceptions + from google.cloud import bigquery # TODO(developer): Set the job ID to the ID of the job whose metadata you # wish to delete. diff --git a/packages/google-cloud-bigquery/samples/snippets/load_table_schema_from_json.py b/packages/google-cloud-bigquery/samples/snippets/load_table_schema_from_json.py new file mode 100644 index 000000000000..3f1f854301d5 --- /dev/null +++ b/packages/google-cloud-bigquery/samples/snippets/load_table_schema_from_json.py @@ -0,0 +1,60 @@ +# Copyright 2022 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import pathlib + + +def load_table(table_id: str) -> None: + orig_uri = "gs://cloud-samples-data/bigquery/us-states/us-states.csv" + orig_table_id = table_id + current_directory = pathlib.Path(__file__).parent + orig_schema_path = str(current_directory / "schema_us_states.json") + # [START bigquery_schema_file_load] + from google.cloud import bigquery + + client = bigquery.Client() + + # TODO(dev): Change uri variable to the path of your data file. + uri = "gs://your-bucket/path/to/your-file.csv" + # TODO(dev): Change table_id to the full name of the table you want to create. + table_id = "your-project.your_dataset.your_table" + # TODO(dev): Change schema_path variable to the path of your schema file. + schema_path = "path/to/schema.json" + # [END bigquery_schema_file_load] + uri = orig_uri + table_id = orig_table_id + schema_path = orig_schema_path + # [START bigquery_schema_file_load] + # To load a schema file use the schema_from_json method. + schema = client.schema_from_json(schema_path) + + job_config = bigquery.LoadJobConfig( + # To use the schema you loaded pass it into the + # LoadJobConfig constructor. + schema=schema, + skip_leading_rows=1, + ) + + # Pass the job_config object to the load_table_from_file, + # load_table_from_json, or load_table_from_uri method + # to use the schema on a new table. + load_job = client.load_table_from_uri( + uri, table_id, job_config=job_config + ) # Make an API request. + + load_job.result() # Waits for the job to complete. + + destination_table = client.get_table(table_id) # Make an API request. + print(f"Loaded {destination_table.num_rows} rows to {table_id}.") + # [END bigquery_schema_file_load] diff --git a/packages/google-cloud-bigquery/samples/snippets/load_table_schema_from_json_test.py b/packages/google-cloud-bigquery/samples/snippets/load_table_schema_from_json_test.py new file mode 100644 index 000000000000..267a6786c722 --- /dev/null +++ b/packages/google-cloud-bigquery/samples/snippets/load_table_schema_from_json_test.py @@ -0,0 +1,32 @@ +# Copyright 2022 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import typing + +import load_table_schema_from_json + +if typing.TYPE_CHECKING: + import pytest + + +def test_load_table( + capsys: "pytest.CaptureFixture[str]", + random_table_id: str, +) -> None: + + load_table_schema_from_json.load_table(random_table_id) + + out, _ = capsys.readouterr() + assert "Loaded" in out + assert random_table_id in out diff --git a/packages/google-cloud-bigquery/samples/snippets/materialized_view.py b/packages/google-cloud-bigquery/samples/snippets/materialized_view.py index adb3688a4010..a47ee5b81a65 100644 --- a/packages/google-cloud-bigquery/samples/snippets/materialized_view.py +++ b/packages/google-cloud-bigquery/samples/snippets/materialized_view.py @@ -60,6 +60,7 @@ def update_materialized_view( # [START bigquery_update_materialized_view] import datetime + from google.cloud import bigquery bigquery_client = bigquery.Client() diff --git a/packages/google-cloud-bigquery/samples/snippets/quickstart_test.py b/packages/google-cloud-bigquery/samples/snippets/quickstart_test.py index b0bad5ee5356..610c63c3ba1f 100644 --- a/packages/google-cloud-bigquery/samples/snippets/quickstart_test.py +++ b/packages/google-cloud-bigquery/samples/snippets/quickstart_test.py @@ -20,7 +20,6 @@ import quickstart - # Must match the dataset listed in quickstart.py (there's no easy way to # extract this). DATASET_ID = "my_new_dataset" diff --git a/packages/google-cloud-bigquery/samples/snippets/schema.json b/packages/google-cloud-bigquery/samples/snippets/schema.json new file mode 100644 index 000000000000..bd2164dad359 --- /dev/null +++ b/packages/google-cloud-bigquery/samples/snippets/schema.json @@ -0,0 +1,20 @@ +[ + { + "name": "qtr", + "type": "STRING", + "mode": "REQUIRED", + "description": "quarter" + }, + { + "name": "rep", + "type": "STRING", + "mode": "NULLABLE", + "description": "sales representative" + }, + { + "name": "sales", + "type": "FLOAT", + "mode": "NULLABLE", + "defaultValueExpression": "2.55" + } +] diff --git a/packages/google-cloud-bigquery/samples/snippets/schema_us_states.json b/packages/google-cloud-bigquery/samples/snippets/schema_us_states.json new file mode 100644 index 000000000000..7f2ccc277524 --- /dev/null +++ b/packages/google-cloud-bigquery/samples/snippets/schema_us_states.json @@ -0,0 +1,12 @@ +[ + { + "name": "name", + "type": "STRING", + "mode": "NULLABLE" + }, + { + "name": "post_abbr", + "type": "STRING", + "mode": "NULLABLE" + } +] diff --git a/packages/google-cloud-bigquery/samples/snippets/user_credentials_test.py b/packages/google-cloud-bigquery/samples/snippets/user_credentials_test.py index e2794e83b681..df8a6354de99 100644 --- a/packages/google-cloud-bigquery/samples/snippets/user_credentials_test.py +++ b/packages/google-cloud-bigquery/samples/snippets/user_credentials_test.py @@ -21,7 +21,6 @@ from user_credentials import main - PROJECT = os.environ["GOOGLE_CLOUD_PROJECT"] MockType = Union[mock.mock.MagicMock, mock.mock.AsyncMock] From 0db358e0c8588ce7b5d50e6f51c2c4d0a578f72a Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Wed, 14 Dec 2022 10:49:41 -0600 Subject: [PATCH 1532/2016] chore: remove code generation for BQML proto files (#1294) * chore: remove code generation for BQML proto files * remove protos from owlbot config Co-authored-by: Anthonios Partheniou Co-authored-by: aribray <45905583+aribray@users.noreply.github.com> --- packages/google-cloud-bigquery/.github/.OwlBot.yaml | 4 ---- 1 file changed, 4 deletions(-) diff --git a/packages/google-cloud-bigquery/.github/.OwlBot.yaml b/packages/google-cloud-bigquery/.github/.OwlBot.yaml index e540511573a2..8b142686cf89 100644 --- a/packages/google-cloud-bigquery/.github/.OwlBot.yaml +++ b/packages/google-cloud-bigquery/.github/.OwlBot.yaml @@ -18,9 +18,5 @@ docker: deep-remove-regex: - /owl-bot-staging -deep-copy-regex: - - source: /google/cloud/bigquery/(v.*)/.*-py/(.*) - dest: /owl-bot-staging/$1/$2 - begin-after-commit-hash: f2de93abafa306b2ebadf1d10d947db8bcf2bf15 From ed635acefb9f64ecb543d6ea244c7669fcf7e111 Mon Sep 17 00:00:00 2001 From: Jared Chapman Date: Mon, 19 Dec 2022 13:56:07 -0600 Subject: [PATCH 1533/2016] docs: create sample to write schema file from table (#1439) * docs: create sample to write schema file from table * Apply suggestions from code review Co-authored-by: Tim Swast --- .../samples/snippets/get_table_make_schema.py | 47 +++++++++++++++++++ .../snippets/get_table_make_schema_test.py | 36 ++++++++++++++ 2 files changed, 83 insertions(+) create mode 100644 packages/google-cloud-bigquery/samples/snippets/get_table_make_schema.py create mode 100644 packages/google-cloud-bigquery/samples/snippets/get_table_make_schema_test.py diff --git a/packages/google-cloud-bigquery/samples/snippets/get_table_make_schema.py b/packages/google-cloud-bigquery/samples/snippets/get_table_make_schema.py new file mode 100644 index 000000000000..f870b42dee01 --- /dev/null +++ b/packages/google-cloud-bigquery/samples/snippets/get_table_make_schema.py @@ -0,0 +1,47 @@ +# Copyright 2022 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +def get_table_make_schema(table_id: str, schema_path: str) -> None: + orig_table_id = table_id + orig_schema_path = schema_path + # [START bigquery_schema_file_get] + from google.cloud import bigquery + + client = bigquery.Client() + + # TODO(dev): Change the table_id variable to the full name of the + # table you want to get schema from. + table_id = "your-project.your_dataset.your_table_name" + + # TODO(dev): Change schema_path variable to the path + # of your schema file. + schema_path = "path/to/schema.json" + # [END bigquery_schema_file_get] + table_id = orig_table_id + schema_path = orig_schema_path + # [START bigquery_schema_file_get] + table = client.get_table(table_id) # Make an API request. + + # Write a schema file to schema_path with the schema_to_json method. + client.schema_to_json(table.schema, schema_path) + + with open(schema_path, "r", encoding="utf-8") as schema_file: + schema_contents = schema_file.read() + + # View table properties + print(f"Got table '{table.project}.{table.dataset_id}.{table.table_id}'.") + print(f"Table schema: {schema_contents}") + + # [END bigquery_schema_file_get] diff --git a/packages/google-cloud-bigquery/samples/snippets/get_table_make_schema_test.py b/packages/google-cloud-bigquery/samples/snippets/get_table_make_schema_test.py new file mode 100644 index 000000000000..424f16e39ece --- /dev/null +++ b/packages/google-cloud-bigquery/samples/snippets/get_table_make_schema_test.py @@ -0,0 +1,36 @@ +# Copyright 2022 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import typing + +import get_table_make_schema + +if typing.TYPE_CHECKING: + import pathlib + + import pytest + + +def test_get_table_make_schema( + capsys: "pytest.CaptureFixture[str]", + table_id: str, + tmp_path: "pathlib.Path", +) -> None: + schema_path = str(tmp_path / "test_schema.json") + + get_table_make_schema.get_table_make_schema(table_id, schema_path) + + out, _ = capsys.readouterr() + assert "Got table" in out + assert table_id in out From 92c018755bd41931737655f62242dda83b615f7f Mon Sep 17 00:00:00 2001 From: Mario Torres Jr <105736410+Mattix23@users.noreply.github.com> Date: Tue, 20 Dec 2022 14:28:30 -0600 Subject: [PATCH 1534/2016] docs: revise sample for nested schema (#1446) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * docs: revise sample for nested schema * 🦉 Updates from OwlBot post-processor See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md * added TODO Co-authored-by: Owl Bot --- .../google-cloud-bigquery/docs/snippets.py | 2 + .../snippets/nested_repeated_schema.py | 54 +++++++++++++++++++ .../snippets/nested_repeated_schema_test.py | 32 +++++++++++ 3 files changed, 88 insertions(+) create mode 100644 packages/google-cloud-bigquery/samples/snippets/nested_repeated_schema.py create mode 100644 packages/google-cloud-bigquery/samples/snippets/nested_repeated_schema_test.py diff --git a/packages/google-cloud-bigquery/docs/snippets.py b/packages/google-cloud-bigquery/docs/snippets.py index 05e4fa378baf..b9860e4da259 100644 --- a/packages/google-cloud-bigquery/docs/snippets.py +++ b/packages/google-cloud-bigquery/docs/snippets.py @@ -118,6 +118,8 @@ def test_create_client_default_credentials(): assert client is not None +# TODO(Mattix23): After code sample from https://github.com/googleapis/python-bigquery/pull/1446 +# is updated from cloud.google.com delete this. def test_create_table_nested_repeated_schema(client, to_delete): dataset_id = "create_table_nested_repeated_{}".format(_millis()) project = client.project diff --git a/packages/google-cloud-bigquery/samples/snippets/nested_repeated_schema.py b/packages/google-cloud-bigquery/samples/snippets/nested_repeated_schema.py new file mode 100644 index 000000000000..5d55860ccec3 --- /dev/null +++ b/packages/google-cloud-bigquery/samples/snippets/nested_repeated_schema.py @@ -0,0 +1,54 @@ +# Copyright 2022 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +def nested_schema(table_id: str) -> None: + orig_table_id = table_id + # [START bigquery_nested_repeated_schema] + from google.cloud import bigquery + + client = bigquery.Client() + + # TODO(dev): Change table_id to the full name of the table you want to create. + table_id = "your-project.your_dataset.your_table_name" + + schema = [ + bigquery.SchemaField("id", "STRING", mode="NULLABLE"), + bigquery.SchemaField("first_name", "STRING", mode="NULLABLE"), + bigquery.SchemaField("last_name", "STRING", mode="NULLABLE"), + bigquery.SchemaField("dob", "DATE", mode="NULLABLE"), + bigquery.SchemaField( + "addresses", + "RECORD", + mode="REPEATED", + fields=[ + bigquery.SchemaField("status", "STRING", mode="NULLABLE"), + bigquery.SchemaField("address", "STRING", mode="NULLABLE"), + bigquery.SchemaField("city", "STRING", mode="NULLABLE"), + bigquery.SchemaField("state", "STRING", mode="NULLABLE"), + bigquery.SchemaField("zip", "STRING", mode="NULLABLE"), + bigquery.SchemaField("numberOfYears", "STRING", mode="NULLABLE"), + ], + ), + ] + # [END bigquery_nested_repeated_schema] + + table_id = orig_table_id + + # [START bigquery_nested_repeated_schema] + table = bigquery.Table(table_id, schema=schema) + table = client.create_table(table) # API request + + print(f"Created table {table.project}.{table.dataset_id}.{table.table_id}.") + # [END bigquery_nested_repeated_schema] diff --git a/packages/google-cloud-bigquery/samples/snippets/nested_repeated_schema_test.py b/packages/google-cloud-bigquery/samples/snippets/nested_repeated_schema_test.py new file mode 100644 index 000000000000..0386fc8fb8f5 --- /dev/null +++ b/packages/google-cloud-bigquery/samples/snippets/nested_repeated_schema_test.py @@ -0,0 +1,32 @@ +# Copyright 2022 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import typing + +import nested_repeated_schema + +if typing.TYPE_CHECKING: + import pytest + + +def test_create_table( + capsys: "pytest.CaptureFixture[str]", + random_table_id: str, +) -> None: + + nested_repeated_schema.nested_schema(random_table_id) + + out, _ = capsys.readouterr() + assert "Created" in out + assert random_table_id in out From 64148b0c17a703fa3c078d1b80c8404ba9c9f435 Mon Sep 17 00:00:00 2001 From: Jared Chapman Date: Fri, 6 Jan 2023 12:09:26 -0600 Subject: [PATCH 1535/2016] chore: install all sample dependencies when type checking samples (#1455) --- packages/google-cloud-bigquery/google/__init__.py | 2 +- .../google/cloud/bigquery/opentelemetry_tracing.py | 6 +++--- packages/google-cloud-bigquery/noxfile.py | 14 ++++++++++++-- packages/google-cloud-bigquery/samples/mypy.ini | 1 - 4 files changed, 16 insertions(+), 7 deletions(-) diff --git a/packages/google-cloud-bigquery/google/__init__.py b/packages/google-cloud-bigquery/google/__init__.py index 8fcc60e2b9c6..8e60d8439d0d 100644 --- a/packages/google-cloud-bigquery/google/__init__.py +++ b/packages/google-cloud-bigquery/google/__init__.py @@ -21,4 +21,4 @@ except ImportError: import pkgutil - __path__ = pkgutil.extend_path(__path__, __name__) + __path__ = pkgutil.extend_path(__path__, __name__) # type: ignore diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/opentelemetry_tracing.py b/packages/google-cloud-bigquery/google/cloud/bigquery/opentelemetry_tracing.py index 2345fd1bb94e..3d0a66ba893f 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/opentelemetry_tracing.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/opentelemetry_tracing.py @@ -18,9 +18,9 @@ logger = logging.getLogger(__name__) try: - from opentelemetry import trace - from opentelemetry.instrumentation.utils import http_status_to_status_code - from opentelemetry.trace.status import Status + from opentelemetry import trace # type: ignore + from opentelemetry.instrumentation.utils import http_status_to_status_code # type: ignore + from opentelemetry.trace.status import Status # type: ignore HAS_OPENTELEMETRY = True _warned_telemetry = True diff --git a/packages/google-cloud-bigquery/noxfile.py b/packages/google-cloud-bigquery/noxfile.py index a91e60a5f98b..139093acc582 100644 --- a/packages/google-cloud-bigquery/noxfile.py +++ b/packages/google-cloud-bigquery/noxfile.py @@ -200,12 +200,22 @@ def mypy_samples(session): """Run type checks with mypy.""" session.install("-e", ".[all]") - session.install("ipython", "pytest") + session.install("pytest") + for requirements_path in CURRENT_DIRECTORY.glob("samples/*/requirements.txt"): + session.install("-r", requirements_path) session.install(MYPY_VERSION) # Just install the dependencies' type info directly, since "mypy --install-types" # might require an additional pass. - session.install("types-mock", "types-pytz") + session.install( + "types-mock", + "types-pytz", + "types-protobuf", + "types-python-dateutil", + "types-requests", + "types-setuptools", + ) + session.install("typing-extensions") # for TypedDict in pre-3.8 Python versions session.run( diff --git a/packages/google-cloud-bigquery/samples/mypy.ini b/packages/google-cloud-bigquery/samples/mypy.ini index 29757e47df7d..3f7eb6647589 100644 --- a/packages/google-cloud-bigquery/samples/mypy.ini +++ b/packages/google-cloud-bigquery/samples/mypy.ini @@ -2,7 +2,6 @@ # Should match DEFAULT_PYTHON_VERSION from root noxfile.py python_version = 3.8 exclude = noxfile\.py -strict = True warn_unused_configs = True [mypy-google.auth,google.oauth2,geojson,google_auth_oauthlib,IPython.*] From afdb0e34fc46f8acbf7bc11b34e99b3d9928aaea Mon Sep 17 00:00:00 2001 From: Mario Torres Jr <105736410+Mattix23@users.noreply.github.com> Date: Fri, 6 Jan 2023 12:42:38 -0600 Subject: [PATCH 1536/2016] docs: revise label table code samples (#1451) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * docs: revise label table code samples * 🦉 Updates from OwlBot post-processor See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md * added TODO to clean up snippets.py Co-authored-by: Owl Bot Co-authored-by: Tim Swast --- .../google-cloud-bigquery/docs/snippets.py | 3 ++ .../samples/snippets/label_table.py | 37 +++++++++++++++++++ .../samples/snippets/label_table_test.py | 32 ++++++++++++++++ 3 files changed, 72 insertions(+) create mode 100644 packages/google-cloud-bigquery/samples/snippets/label_table.py create mode 100644 packages/google-cloud-bigquery/samples/snippets/label_table_test.py diff --git a/packages/google-cloud-bigquery/docs/snippets.py b/packages/google-cloud-bigquery/docs/snippets.py index b9860e4da259..a0c01870a77e 100644 --- a/packages/google-cloud-bigquery/docs/snippets.py +++ b/packages/google-cloud-bigquery/docs/snippets.py @@ -250,6 +250,9 @@ def test_manage_table_labels(client, to_delete): table = bigquery.Table(dataset.table(table_id), schema=SCHEMA) table = client.create_table(table) + # TODO(Mattix23): After code sample from https://github.com/googleapis/python-bigquery/pull/1451 + # is updated from cloud.google.com delete this. + # [START bigquery_label_table] # from google.cloud import bigquery # client = bigquery.Client() diff --git a/packages/google-cloud-bigquery/samples/snippets/label_table.py b/packages/google-cloud-bigquery/samples/snippets/label_table.py new file mode 100644 index 000000000000..5fce08d62afe --- /dev/null +++ b/packages/google-cloud-bigquery/samples/snippets/label_table.py @@ -0,0 +1,37 @@ +# Copyright 2022 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +def label_table(table_id: str) -> None: + orig_table_id = table_id + # [START bigquery_label_table] + from google.cloud import bigquery + + client = bigquery.Client() + + # TODO(dev): Change table_id to the full name of the table you want to create. + table_id = "your-project.your_dataset.your_table_name" + + # [END bigquery_label_table] + table_id = orig_table_id + # [START bigquery_label_table] + table = client.get_table(table_id) # API request + + labels = {"color": "green"} + table.labels = labels + + table = client.update_table(table, ["labels"]) # API request + + print(f"Added {table.labels} to {table_id}.") + # [END bigquery_label_table] diff --git a/packages/google-cloud-bigquery/samples/snippets/label_table_test.py b/packages/google-cloud-bigquery/samples/snippets/label_table_test.py new file mode 100644 index 000000000000..a77fb4b75f38 --- /dev/null +++ b/packages/google-cloud-bigquery/samples/snippets/label_table_test.py @@ -0,0 +1,32 @@ +# Copyright 2022 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import typing + +import label_table + +if typing.TYPE_CHECKING: + import pytest + + +def test_label_table( + capsys: "pytest.CaptureFixture[str]", + table_id: str, +) -> None: + + label_table.label_table(table_id) + + out, _ = capsys.readouterr() + assert "color" in out + assert table_id in out From 9d9f763e438a10f0c26fef6549cf93e4e4f9ce74 Mon Sep 17 00:00:00 2001 From: Mario Torres Jr <105736410+Mattix23@users.noreply.github.com> Date: Fri, 6 Jan 2023 14:25:01 -0600 Subject: [PATCH 1537/2016] docs: revise create table cmek sample (#1452) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * docs: revise create table cmek sample * 🦉 Updates from OwlBot post-processor See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md Co-authored-by: Owl Bot Co-authored-by: Anthonios Partheniou --- .../google-cloud-bigquery/docs/snippets.py | 2 +- .../samples/snippets/create_table_cmek.py | 46 +++++++++++++++++++ .../snippets/create_table_cmek_test.py | 37 +++++++++++++++ 3 files changed, 84 insertions(+), 1 deletion(-) create mode 100644 packages/google-cloud-bigquery/samples/snippets/create_table_cmek.py create mode 100644 packages/google-cloud-bigquery/samples/snippets/create_table_cmek_test.py diff --git a/packages/google-cloud-bigquery/docs/snippets.py b/packages/google-cloud-bigquery/docs/snippets.py index a0c01870a77e..3c4967a5946a 100644 --- a/packages/google-cloud-bigquery/docs/snippets.py +++ b/packages/google-cloud-bigquery/docs/snippets.py @@ -168,7 +168,7 @@ def test_create_table_cmek(client, to_delete): dataset = bigquery.Dataset(dataset_ref) client.create_dataset(dataset) to_delete.append(dataset) - + # TODO(Mattix23): When sample is updated in cloud.google.com, delete this one. # [START bigquery_create_table_cmek] # from google.cloud import bigquery # client = bigquery.Client() diff --git a/packages/google-cloud-bigquery/samples/snippets/create_table_cmek.py b/packages/google-cloud-bigquery/samples/snippets/create_table_cmek.py new file mode 100644 index 000000000000..011c56d4e8e3 --- /dev/null +++ b/packages/google-cloud-bigquery/samples/snippets/create_table_cmek.py @@ -0,0 +1,46 @@ +# Copyright 2022 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +def create_table_cmek(table_id: str, kms_key_name: str) -> None: + orig_table_id = table_id + orig_key_name = kms_key_name + # [START bigquery_create_table_cmek] + from google.cloud import bigquery + + client = bigquery.Client() + + # TODO(dev): Change table_id to the full name of the table you want to create. + table_id = "your-project.your_dataset.your_table_name" + + # Set the encryption key to use for the table. + # TODO: Replace this key with a key you have created in Cloud KMS. + kms_key_name = "projects/your-project/locations/us/keyRings/test/cryptoKeys/test" + + # [END bigquery_create_table_cmek] + + table_id = orig_table_id + kms_key_name = orig_key_name + + # [START bigquery_create_table_cmek] + table = bigquery.Table(table_id) + table.encryption_configuration = bigquery.EncryptionConfiguration( + kms_key_name=kms_key_name + ) + table = client.create_table(table) # API request + + print(f"Created {table_id}.") + print(f"Key: {table.encryption_configuration.kms_key_name}.") + + # [END bigquery_create_table_cmek] diff --git a/packages/google-cloud-bigquery/samples/snippets/create_table_cmek_test.py b/packages/google-cloud-bigquery/samples/snippets/create_table_cmek_test.py new file mode 100644 index 000000000000..429baf3fd228 --- /dev/null +++ b/packages/google-cloud-bigquery/samples/snippets/create_table_cmek_test.py @@ -0,0 +1,37 @@ +# Copyright 2022 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import typing + +import create_table_cmek + +if typing.TYPE_CHECKING: + import pytest + + +def test_create_table( + capsys: "pytest.CaptureFixture[str]", + random_table_id: str, +) -> None: + + kms_key_name = ( + "projects/cloud-samples-tests/locations/us/keyRings/test/cryptoKeys/test" + ) + + create_table_cmek.create_table_cmek(random_table_id, kms_key_name) + + out, _ = capsys.readouterr() + assert "Created" in out + assert random_table_id in out + assert kms_key_name in out From c20a62ad064660767914e60d4a4d1acd45105f0a Mon Sep 17 00:00:00 2001 From: "gcf-owl-bot[bot]" <78513119+gcf-owl-bot[bot]@users.noreply.github.com> Date: Sat, 7 Jan 2023 06:59:36 -0800 Subject: [PATCH 1538/2016] chore(python): add support for python 3.11 [autoapprove] (#1454) * chore(python): add support for python 3.11 Source-Link: https://github.com/googleapis/synthtool/commit/7197a001ffb6d8ce7b0b9b11c280f0c536c1033a Post-Processor: gcr.io/cloud-devrel-public-resources/owlbot-python:latest@sha256:c43f1d918bcf817d337aa29ff833439494a158a0831508fda4ec75dc4c0d0320 * update unit test python versions * modify test to cater for python 3.11 * require grpcio >= 1.49.1 for python 3.11 Co-authored-by: Owl Bot Co-authored-by: Anthonios Partheniou --- .../.github/.OwlBot.lock.yaml | 4 +- .../.kokoro/samples/python3.11/common.cfg | 40 +++++++++++++++++++ .../.kokoro/samples/python3.11/continuous.cfg | 6 +++ .../samples/python3.11/periodic-head.cfg | 11 +++++ .../.kokoro/samples/python3.11/periodic.cfg | 6 +++ .../.kokoro/samples/python3.11/presubmit.cfg | 6 +++ .../google-cloud-bigquery/CONTRIBUTING.rst | 6 ++- packages/google-cloud-bigquery/noxfile.py | 2 +- .../samples/geography/noxfile.py | 2 +- .../samples/magics/noxfile.py | 2 +- .../samples/snippets/noxfile.py | 2 +- packages/google-cloud-bigquery/setup.py | 2 + .../tests/unit/line_arg_parser/test_parser.py | 2 +- 13 files changed, 82 insertions(+), 9 deletions(-) create mode 100644 packages/google-cloud-bigquery/.kokoro/samples/python3.11/common.cfg create mode 100644 packages/google-cloud-bigquery/.kokoro/samples/python3.11/continuous.cfg create mode 100644 packages/google-cloud-bigquery/.kokoro/samples/python3.11/periodic-head.cfg create mode 100644 packages/google-cloud-bigquery/.kokoro/samples/python3.11/periodic.cfg create mode 100644 packages/google-cloud-bigquery/.kokoro/samples/python3.11/presubmit.cfg diff --git a/packages/google-cloud-bigquery/.github/.OwlBot.lock.yaml b/packages/google-cloud-bigquery/.github/.OwlBot.lock.yaml index fccaa8e84449..889f77dfa25d 100644 --- a/packages/google-cloud-bigquery/.github/.OwlBot.lock.yaml +++ b/packages/google-cloud-bigquery/.github/.OwlBot.lock.yaml @@ -1,4 +1,4 @@ -# Copyright 2022 Google LLC +# Copyright 2023 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -13,4 +13,4 @@ # limitations under the License. docker: image: gcr.io/cloud-devrel-public-resources/owlbot-python:latest - digest: sha256:3bf87e47c2173d7eed42714589dc4da2c07c3268610f1e47f8e1a30decbfc7f1 + digest: sha256:c43f1d918bcf817d337aa29ff833439494a158a0831508fda4ec75dc4c0d0320 diff --git a/packages/google-cloud-bigquery/.kokoro/samples/python3.11/common.cfg b/packages/google-cloud-bigquery/.kokoro/samples/python3.11/common.cfg new file mode 100644 index 000000000000..f5adc870378f --- /dev/null +++ b/packages/google-cloud-bigquery/.kokoro/samples/python3.11/common.cfg @@ -0,0 +1,40 @@ +# Format: //devtools/kokoro/config/proto/build.proto + +# Build logs will be here +action { + define_artifacts { + regex: "**/*sponge_log.xml" + } +} + +# Specify which tests to run +env_vars: { + key: "RUN_TESTS_SESSION" + value: "py-3.11" +} + +# Declare build specific Cloud project. +env_vars: { + key: "BUILD_SPECIFIC_GCLOUD_PROJECT" + value: "python-docs-samples-tests-311" +} + +env_vars: { + key: "TRAMPOLINE_BUILD_FILE" + value: "github/python-bigquery/.kokoro/test-samples.sh" +} + +# Configure the docker image for kokoro-trampoline. +env_vars: { + key: "TRAMPOLINE_IMAGE" + value: "gcr.io/cloud-devrel-kokoro-resources/python-samples-testing-docker" +} + +# Download secrets for samples +gfile_resources: "/bigstore/cloud-devrel-kokoro-resources/python-docs-samples" + +# Download trampoline resources. +gfile_resources: "/bigstore/cloud-devrel-kokoro-resources/trampoline" + +# Use the trampoline script to run in docker. +build_file: "python-bigquery/.kokoro/trampoline_v2.sh" \ No newline at end of file diff --git a/packages/google-cloud-bigquery/.kokoro/samples/python3.11/continuous.cfg b/packages/google-cloud-bigquery/.kokoro/samples/python3.11/continuous.cfg new file mode 100644 index 000000000000..a1c8d9759c88 --- /dev/null +++ b/packages/google-cloud-bigquery/.kokoro/samples/python3.11/continuous.cfg @@ -0,0 +1,6 @@ +# Format: //devtools/kokoro/config/proto/build.proto + +env_vars: { + key: "INSTALL_LIBRARY_FROM_SOURCE" + value: "True" +} \ No newline at end of file diff --git a/packages/google-cloud-bigquery/.kokoro/samples/python3.11/periodic-head.cfg b/packages/google-cloud-bigquery/.kokoro/samples/python3.11/periodic-head.cfg new file mode 100644 index 000000000000..5aa01bab5bf3 --- /dev/null +++ b/packages/google-cloud-bigquery/.kokoro/samples/python3.11/periodic-head.cfg @@ -0,0 +1,11 @@ +# Format: //devtools/kokoro/config/proto/build.proto + +env_vars: { + key: "INSTALL_LIBRARY_FROM_SOURCE" + value: "True" +} + +env_vars: { + key: "TRAMPOLINE_BUILD_FILE" + value: "github/python-bigquery/.kokoro/test-samples-against-head.sh" +} diff --git a/packages/google-cloud-bigquery/.kokoro/samples/python3.11/periodic.cfg b/packages/google-cloud-bigquery/.kokoro/samples/python3.11/periodic.cfg new file mode 100644 index 000000000000..71cd1e597e38 --- /dev/null +++ b/packages/google-cloud-bigquery/.kokoro/samples/python3.11/periodic.cfg @@ -0,0 +1,6 @@ +# Format: //devtools/kokoro/config/proto/build.proto + +env_vars: { + key: "INSTALL_LIBRARY_FROM_SOURCE" + value: "False" +} diff --git a/packages/google-cloud-bigquery/.kokoro/samples/python3.11/presubmit.cfg b/packages/google-cloud-bigquery/.kokoro/samples/python3.11/presubmit.cfg new file mode 100644 index 000000000000..a1c8d9759c88 --- /dev/null +++ b/packages/google-cloud-bigquery/.kokoro/samples/python3.11/presubmit.cfg @@ -0,0 +1,6 @@ +# Format: //devtools/kokoro/config/proto/build.proto + +env_vars: { + key: "INSTALL_LIBRARY_FROM_SOURCE" + value: "True" +} \ No newline at end of file diff --git a/packages/google-cloud-bigquery/CONTRIBUTING.rst b/packages/google-cloud-bigquery/CONTRIBUTING.rst index d06598b310bc..5dc30a1f8547 100644 --- a/packages/google-cloud-bigquery/CONTRIBUTING.rst +++ b/packages/google-cloud-bigquery/CONTRIBUTING.rst @@ -22,7 +22,7 @@ In order to add a feature: documentation. - The feature must work fully on the following CPython versions: - 3.7, 3.8, 3.9 and 3.10 on both UNIX and Windows. + 3.7, 3.8, 3.9, 3.10 and 3.11 on both UNIX and Windows. - The feature must not add unnecessary dependencies (where "unnecessary" is of course subjective, but new dependencies should @@ -72,7 +72,7 @@ We use `nox `__ to instrument our tests. - To run a single unit test:: - $ nox -s unit-3.10 -- -k + $ nox -s unit-3.11 -- -k .. note:: @@ -225,11 +225,13 @@ We support: - `Python 3.8`_ - `Python 3.9`_ - `Python 3.10`_ +- `Python 3.11`_ .. _Python 3.7: https://docs.python.org/3.7/ .. _Python 3.8: https://docs.python.org/3.8/ .. _Python 3.9: https://docs.python.org/3.9/ .. _Python 3.10: https://docs.python.org/3.10/ +.. _Python 3.11: https://docs.python.org/3.11/ Supported versions can be found in our ``noxfile.py`` `config`_. diff --git a/packages/google-cloud-bigquery/noxfile.py b/packages/google-cloud-bigquery/noxfile.py index 139093acc582..5f08c016b640 100644 --- a/packages/google-cloud-bigquery/noxfile.py +++ b/packages/google-cloud-bigquery/noxfile.py @@ -37,7 +37,7 @@ DEFAULT_PYTHON_VERSION = "3.8" SYSTEM_TEST_PYTHON_VERSIONS = ["3.8", "3.10"] -UNIT_TEST_PYTHON_VERSIONS = ["3.7", "3.8", "3.9", "3.10"] +UNIT_TEST_PYTHON_VERSIONS = ["3.7", "3.8", "3.9", "3.10", "3.11"] CURRENT_DIRECTORY = pathlib.Path(__file__).parent.absolute() # 'docfx' is excluded since it only needs to run in 'docs-presubmit' diff --git a/packages/google-cloud-bigquery/samples/geography/noxfile.py b/packages/google-cloud-bigquery/samples/geography/noxfile.py index e8283c38d4a0..1224cbe212e4 100644 --- a/packages/google-cloud-bigquery/samples/geography/noxfile.py +++ b/packages/google-cloud-bigquery/samples/geography/noxfile.py @@ -89,7 +89,7 @@ def get_pytest_env_vars() -> Dict[str, str]: # DO NOT EDIT - automatically generated. # All versions used to test samples. -ALL_VERSIONS = ["3.7", "3.8", "3.9", "3.10"] +ALL_VERSIONS = ["3.7", "3.8", "3.9", "3.10", "3.11"] # Any default versions that should be ignored. IGNORED_VERSIONS = TEST_CONFIG["ignored_versions"] diff --git a/packages/google-cloud-bigquery/samples/magics/noxfile.py b/packages/google-cloud-bigquery/samples/magics/noxfile.py index e8283c38d4a0..1224cbe212e4 100644 --- a/packages/google-cloud-bigquery/samples/magics/noxfile.py +++ b/packages/google-cloud-bigquery/samples/magics/noxfile.py @@ -89,7 +89,7 @@ def get_pytest_env_vars() -> Dict[str, str]: # DO NOT EDIT - automatically generated. # All versions used to test samples. -ALL_VERSIONS = ["3.7", "3.8", "3.9", "3.10"] +ALL_VERSIONS = ["3.7", "3.8", "3.9", "3.10", "3.11"] # Any default versions that should be ignored. IGNORED_VERSIONS = TEST_CONFIG["ignored_versions"] diff --git a/packages/google-cloud-bigquery/samples/snippets/noxfile.py b/packages/google-cloud-bigquery/samples/snippets/noxfile.py index e8283c38d4a0..1224cbe212e4 100644 --- a/packages/google-cloud-bigquery/samples/snippets/noxfile.py +++ b/packages/google-cloud-bigquery/samples/snippets/noxfile.py @@ -89,7 +89,7 @@ def get_pytest_env_vars() -> Dict[str, str]: # DO NOT EDIT - automatically generated. # All versions used to test samples. -ALL_VERSIONS = ["3.7", "3.8", "3.9", "3.10"] +ALL_VERSIONS = ["3.7", "3.8", "3.9", "3.10", "3.11"] # Any default versions that should be ignored. IGNORED_VERSIONS = TEST_CONFIG["ignored_versions"] diff --git a/packages/google-cloud-bigquery/setup.py b/packages/google-cloud-bigquery/setup.py index 9e1bfbbce774..fd168719362c 100644 --- a/packages/google-cloud-bigquery/setup.py +++ b/packages/google-cloud-bigquery/setup.py @@ -30,6 +30,7 @@ release_status = "Development Status :: 5 - Production/Stable" dependencies = [ "grpcio >= 1.47.0, < 2.0dev", # https://github.com/googleapis/python-bigquery/issues/1262 + "grpcio >= 1.49.1, < 2.0dev; python_version>='3.11'", # NOTE: Maintainers, please do not require google-api-core>=2.x.x # Until this issue is closed # https://github.com/googleapis/google-cloud-python/issues/10566 @@ -58,6 +59,7 @@ # grpc.Channel.close() method isn't added until 1.32.0. # https://github.com/grpc/grpc/pull/15254 "grpcio >= 1.47.0, < 2.0dev", + "grpcio >= 1.49.1, < 2.0dev; python_version>='3.11'", pyarrow_dependency, ], "pandas": [ diff --git a/packages/google-cloud-bigquery/tests/unit/line_arg_parser/test_parser.py b/packages/google-cloud-bigquery/tests/unit/line_arg_parser/test_parser.py index 3f9e9ff419f8..b170d536aeee 100644 --- a/packages/google-cloud-bigquery/tests/unit/line_arg_parser/test_parser.py +++ b/packages/google-cloud-bigquery/tests/unit/line_arg_parser/test_parser.py @@ -44,7 +44,7 @@ def test_consume_unexpected_eol(parser_class): fake_lexer = [Token(TokenType.EOL, lexeme="", pos=0)] parser = parser_class(fake_lexer) - with pytest.raises(ParseError, match=r"Unexpected end of input.*expected COLON.*"): + with pytest.raises(ParseError, match=r"Unexpected end of input.*expected.*COLON.*"): parser.consume(TokenType.COLON) From 37b87b554a54572251c575cc742a3500d45ad38b Mon Sep 17 00:00:00 2001 From: Mend Renovate Date: Mon, 9 Jan 2023 17:59:30 +0000 Subject: [PATCH 1539/2016] chore(deps): update dependency certifi to v2022.12.7 [security] (#1434) Co-authored-by: Anthonios Partheniou --- .../google-cloud-bigquery/samples/geography/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/samples/geography/requirements.txt b/packages/google-cloud-bigquery/samples/geography/requirements.txt index 798de6bb5209..1e7905b31509 100644 --- a/packages/google-cloud-bigquery/samples/geography/requirements.txt +++ b/packages/google-cloud-bigquery/samples/geography/requirements.txt @@ -1,5 +1,5 @@ attrs==22.1.0 -certifi==2022.9.24 +certifi==2022.12.7 cffi==1.15.1 charset-normalizer==2.1.1 click==8.1.3 From 1f97e2a4d663869e5a74bb24018936450f69c9a8 Mon Sep 17 00:00:00 2001 From: Mend Renovate Date: Mon, 9 Jan 2023 19:06:20 +0000 Subject: [PATCH 1540/2016] chore(deps): update all dependencies (#1390) * chore(deps): update all dependencies * update dependency db-dtypes * update dependency google-cloud-bigquery * revert Co-authored-by: Chalmer Lowe Co-authored-by: aribray <45905583+aribray@users.noreply.github.com> Co-authored-by: Anthonios Partheniou --- .../samples/geography/requirements-test.txt | 2 +- .../samples/geography/requirements.txt | 10 +++++----- .../samples/magics/requirements-test.txt | 2 +- .../samples/magics/requirements.txt | 10 +++++----- .../samples/snippets/requirements-test.txt | 2 +- .../samples/snippets/requirements.txt | 12 ++++++------ 6 files changed, 19 insertions(+), 19 deletions(-) diff --git a/packages/google-cloud-bigquery/samples/geography/requirements-test.txt b/packages/google-cloud-bigquery/samples/geography/requirements-test.txt index 6f722c66e151..980c425b9393 100644 --- a/packages/google-cloud-bigquery/samples/geography/requirements-test.txt +++ b/packages/google-cloud-bigquery/samples/geography/requirements-test.txt @@ -1,2 +1,2 @@ -pytest==7.1.3 +pytest==7.2.0 mock==4.0.3 diff --git a/packages/google-cloud-bigquery/samples/geography/requirements.txt b/packages/google-cloud-bigquery/samples/geography/requirements.txt index 1e7905b31509..fc6976a0ed05 100644 --- a/packages/google-cloud-bigquery/samples/geography/requirements.txt +++ b/packages/google-cloud-bigquery/samples/geography/requirements.txt @@ -6,14 +6,14 @@ click==8.1.3 click-plugins==1.1.1 cligj==0.7.2 dataclasses==0.8; python_version < '3.7' -db-dtypes==1.0.4 +db-dtypes==1.0.5 Fiona==1.8.22 geojson==2.5.0 geopandas===0.10.2; python_version == '3.7' geopandas==0.12.1; python_version >= '3.8' google-api-core==2.10.2 -google-auth==2.13.0 -google-cloud-bigquery==3.3.5 +google-auth==2.14.0 +google-cloud-bigquery==3.3.6 google-cloud-bigquery-storage==2.16.2 google-cloud-core==2.3.2 google-crc32c==1.5.0 @@ -28,13 +28,13 @@ packaging==21.3 pandas===1.3.5; python_version == '3.7' pandas==1.5.1; python_version >= '3.8' proto-plus==1.22.1 -pyarrow==9.0.0 +pyarrow==10.0.0 pyasn1==0.4.8 pyasn1-modules==0.2.8 pycparser==2.21 pyparsing==3.0.9 python-dateutil==2.8.2 -pytz==2022.5 +pytz==2022.6 PyYAML==6.0 requests==2.28.1 rsa==4.9 diff --git a/packages/google-cloud-bigquery/samples/magics/requirements-test.txt b/packages/google-cloud-bigquery/samples/magics/requirements-test.txt index 7902c72ef501..e5173c3442e8 100644 --- a/packages/google-cloud-bigquery/samples/magics/requirements-test.txt +++ b/packages/google-cloud-bigquery/samples/magics/requirements-test.txt @@ -1,3 +1,3 @@ google-cloud-testutils==1.3.3 -pytest==7.1.3 +pytest==7.2.0 mock==4.0.3 diff --git a/packages/google-cloud-bigquery/samples/magics/requirements.txt b/packages/google-cloud-bigquery/samples/magics/requirements.txt index bdd026ce5eee..7b5291365020 100644 --- a/packages/google-cloud-bigquery/samples/magics/requirements.txt +++ b/packages/google-cloud-bigquery/samples/magics/requirements.txt @@ -1,15 +1,15 @@ -db-dtypes==1.0.4 +db-dtypes==1.0.5 google-cloud-bigquery-storage==2.16.2 -google-auth-oauthlib==0.5.3 +google-auth-oauthlib==0.7.0 grpcio==1.50.0 ipywidgets==8.0.2 ipython===7.31.1; python_version == '3.7' ipython===8.0.1; python_version == '3.8' -ipython==8.5.0; python_version >= '3.9' +ipython==8.6.0; python_version >= '3.9' matplotlib===3.5.3; python_version == '3.7' matplotlib==3.6.1; python_version >= '3.8' pandas===1.3.5; python_version == '3.7' pandas==1.5.1; python_version >= '3.8' -pyarrow==9.0.0 -pytz==2022.5 +pyarrow==10.0.0 +pytz==2022.6 typing-extensions==4.4.0 diff --git a/packages/google-cloud-bigquery/samples/snippets/requirements-test.txt b/packages/google-cloud-bigquery/samples/snippets/requirements-test.txt index 7902c72ef501..e5173c3442e8 100644 --- a/packages/google-cloud-bigquery/samples/snippets/requirements-test.txt +++ b/packages/google-cloud-bigquery/samples/snippets/requirements-test.txt @@ -1,3 +1,3 @@ google-cloud-testutils==1.3.3 -pytest==7.1.3 +pytest==7.2.0 mock==4.0.3 diff --git a/packages/google-cloud-bigquery/samples/snippets/requirements.txt b/packages/google-cloud-bigquery/samples/snippets/requirements.txt index ebf892279de5..82e7452dfc53 100644 --- a/packages/google-cloud-bigquery/samples/snippets/requirements.txt +++ b/packages/google-cloud-bigquery/samples/snippets/requirements.txt @@ -1,16 +1,16 @@ -db-dtypes==1.0.4 -google-cloud-bigquery==3.3.5 +db-dtypes==1.0.5 +google-cloud-bigquery==3.3.6 google-cloud-bigquery-storage==2.16.2 -google-auth-oauthlib==0.5.3 +google-auth-oauthlib==0.7.0 grpcio==1.50.0 ipywidgets==8.0.2 ipython===7.31.1; python_version == '3.7' ipython===8.0.1; python_version == '3.8' -ipython==8.5.0; python_version >= '3.9' +ipython==8.6.0; python_version >= '3.9' matplotlib===3.5.3; python_version == '3.7' matplotlib==3.6.1; python_version >= '3.8' pandas===1.3.5; python_version == '3.7' pandas==1.5.1; python_version >= '3.8' -pyarrow==9.0.0 -pytz==2022.5 +pyarrow==10.0.0 +pytz==2022.6 typing-extensions==4.4.0 From 3b09fe7c94c16be729431f73513fca7e1c51b9c7 Mon Sep 17 00:00:00 2001 From: Mario Torres Jr <105736410+Mattix23@users.noreply.github.com> Date: Tue, 10 Jan 2023 12:03:42 -0600 Subject: [PATCH 1541/2016] chore: delete duplicate code from snippets.py (#1458) * chore: delete duplicate code samples from snippets.py * fixed reference to code sample --- .../google-cloud-bigquery/docs/snippets.py | 93 +------------------ .../docs/usage/encryption.rst | 2 +- 2 files changed, 2 insertions(+), 93 deletions(-) diff --git a/packages/google-cloud-bigquery/docs/snippets.py b/packages/google-cloud-bigquery/docs/snippets.py index 3c4967a5946a..efc551310660 100644 --- a/packages/google-cloud-bigquery/docs/snippets.py +++ b/packages/google-cloud-bigquery/docs/snippets.py @@ -118,80 +118,6 @@ def test_create_client_default_credentials(): assert client is not None -# TODO(Mattix23): After code sample from https://github.com/googleapis/python-bigquery/pull/1446 -# is updated from cloud.google.com delete this. -def test_create_table_nested_repeated_schema(client, to_delete): - dataset_id = "create_table_nested_repeated_{}".format(_millis()) - project = client.project - dataset_ref = bigquery.DatasetReference(project, dataset_id) - dataset = bigquery.Dataset(dataset_ref) - client.create_dataset(dataset) - to_delete.append(dataset) - - # [START bigquery_nested_repeated_schema] - # from google.cloud import bigquery - # client = bigquery.Client() - # project = client.project - # dataset_ref = bigquery.DatasetReference(project, 'my_dataset') - - schema = [ - bigquery.SchemaField("id", "STRING", mode="NULLABLE"), - bigquery.SchemaField("first_name", "STRING", mode="NULLABLE"), - bigquery.SchemaField("last_name", "STRING", mode="NULLABLE"), - bigquery.SchemaField("dob", "DATE", mode="NULLABLE"), - bigquery.SchemaField( - "addresses", - "RECORD", - mode="REPEATED", - fields=[ - bigquery.SchemaField("status", "STRING", mode="NULLABLE"), - bigquery.SchemaField("address", "STRING", mode="NULLABLE"), - bigquery.SchemaField("city", "STRING", mode="NULLABLE"), - bigquery.SchemaField("state", "STRING", mode="NULLABLE"), - bigquery.SchemaField("zip", "STRING", mode="NULLABLE"), - bigquery.SchemaField("numberOfYears", "STRING", mode="NULLABLE"), - ], - ), - ] - table_ref = dataset_ref.table("my_table") - table = bigquery.Table(table_ref, schema=schema) - table = client.create_table(table) # API request - - print("Created table {}".format(table.full_table_id)) - # [END bigquery_nested_repeated_schema] - - -def test_create_table_cmek(client, to_delete): - dataset_id = "create_table_cmek_{}".format(_millis()) - project = client.project - dataset_ref = bigquery.DatasetReference(project, dataset_id) - dataset = bigquery.Dataset(dataset_ref) - client.create_dataset(dataset) - to_delete.append(dataset) - # TODO(Mattix23): When sample is updated in cloud.google.com, delete this one. - # [START bigquery_create_table_cmek] - # from google.cloud import bigquery - # client = bigquery.Client() - # dataset_id = 'my_dataset' - - table_ref = dataset.table("my_table") - table = bigquery.Table(table_ref) - - # Set the encryption key to use for the table. - # TODO: Replace this key with a key you have created in Cloud KMS. - kms_key_name = "projects/{}/locations/{}/keyRings/{}/cryptoKeys/{}".format( - "cloud-samples-tests", "us", "test", "test" - ) - table.encryption_configuration = bigquery.EncryptionConfiguration( - kms_key_name=kms_key_name - ) - - table = client.create_table(table) # API request - - assert table.encryption_configuration.kms_key_name == kms_key_name - # [END bigquery_create_table_cmek] - - def test_create_partitioned_table(client, to_delete): dataset_id = "create_table_partitioned_{}".format(_millis()) project = client.project @@ -248,27 +174,10 @@ def test_manage_table_labels(client, to_delete): to_delete.append(dataset) table = bigquery.Table(dataset.table(table_id), schema=SCHEMA) - table = client.create_table(table) - - # TODO(Mattix23): After code sample from https://github.com/googleapis/python-bigquery/pull/1451 - # is updated from cloud.google.com delete this. - # [START bigquery_label_table] - # from google.cloud import bigquery - # client = bigquery.Client() - # project = client.project - # dataset_ref = bigquery.DatasetReference(project, dataset_id) - # table_ref = dataset_ref.table('my_table') - # table = client.get_table(table_ref) # API request - - assert table.labels == {} labels = {"color": "green"} table.labels = labels - - table = client.update_table(table, ["labels"]) # API request - - assert table.labels == labels - # [END bigquery_label_table] + table = client.create_table(table) # [START bigquery_get_table_labels] # from google.cloud import bigquery diff --git a/packages/google-cloud-bigquery/docs/usage/encryption.rst b/packages/google-cloud-bigquery/docs/usage/encryption.rst index 6652f05658c6..3e6d5aacc531 100644 --- a/packages/google-cloud-bigquery/docs/usage/encryption.rst +++ b/packages/google-cloud-bigquery/docs/usage/encryption.rst @@ -10,7 +10,7 @@ in the BigQuery documentation for more details. Create a new table, using a customer-managed encryption key from Cloud KMS to encrypt it. -.. literalinclude:: ../snippets.py +.. literalinclude:: ../samples/snippets/create_table_cmek.py :language: python :dedent: 4 :start-after: [START bigquery_create_table_cmek] From 661b3b0c83e1e57d4ff16eac3a292d7f7d7b2702 Mon Sep 17 00:00:00 2001 From: Mend Renovate Date: Wed, 11 Jan 2023 18:37:16 +0000 Subject: [PATCH 1542/2016] chore(deps): update all dependencies (#1456) * chore(deps): update all dependencies * revert packaging * revert charset-normalizer Co-authored-by: Anthonios Partheniou --- .../samples/geography/requirements-test.txt | 2 +- .../samples/geography/requirements.txt | 30 +++++++++---------- .../samples/magics/requirements-test.txt | 2 +- .../samples/magics/requirements.txt | 18 +++++------ .../samples/snippets/requirements-test.txt | 2 +- .../samples/snippets/requirements.txt | 20 ++++++------- 6 files changed, 37 insertions(+), 37 deletions(-) diff --git a/packages/google-cloud-bigquery/samples/geography/requirements-test.txt b/packages/google-cloud-bigquery/samples/geography/requirements-test.txt index 980c425b9393..1e6b7c5eaa13 100644 --- a/packages/google-cloud-bigquery/samples/geography/requirements-test.txt +++ b/packages/google-cloud-bigquery/samples/geography/requirements-test.txt @@ -1,2 +1,2 @@ pytest==7.2.0 -mock==4.0.3 +mock==5.0.1 diff --git a/packages/google-cloud-bigquery/samples/geography/requirements.txt b/packages/google-cloud-bigquery/samples/geography/requirements.txt index fc6976a0ed05..e3ba3102e48d 100644 --- a/packages/google-cloud-bigquery/samples/geography/requirements.txt +++ b/packages/google-cloud-bigquery/samples/geography/requirements.txt @@ -1,4 +1,4 @@ -attrs==22.1.0 +attrs==22.2.0 certifi==2022.12.7 cffi==1.15.1 charset-normalizer==2.1.1 @@ -10,36 +10,36 @@ db-dtypes==1.0.5 Fiona==1.8.22 geojson==2.5.0 geopandas===0.10.2; python_version == '3.7' -geopandas==0.12.1; python_version >= '3.8' -google-api-core==2.10.2 -google-auth==2.14.0 -google-cloud-bigquery==3.3.6 -google-cloud-bigquery-storage==2.16.2 +geopandas==0.12.2; python_version >= '3.8' +google-api-core==2.11.0 +google-auth==2.16.0 +google-cloud-bigquery==3.4.1 +google-cloud-bigquery-storage==2.17.0 google-cloud-core==2.3.2 google-crc32c==1.5.0 google-resumable-media==2.4.0 -googleapis-common-protos==1.56.4 -grpcio==1.50.0 +googleapis-common-protos==1.58.0 +grpcio==1.51.1 idna==3.4 -libcst==0.4.7 +libcst==0.4.9 munch==2.5.0 mypy-extensions==0.4.3 packaging==21.3 pandas===1.3.5; python_version == '3.7' -pandas==1.5.1; python_version >= '3.8' -proto-plus==1.22.1 -pyarrow==10.0.0 +pandas==1.5.2; python_version >= '3.8' +proto-plus==1.22.2 +pyarrow==10.0.1 pyasn1==0.4.8 pyasn1-modules==0.2.8 pycparser==2.21 pyparsing==3.0.9 python-dateutil==2.8.2 -pytz==2022.6 +pytz==2022.7 PyYAML==6.0 requests==2.28.1 rsa==4.9 -Shapely==1.8.5.post1 +Shapely==2.0.0 six==1.16.0 typing-extensions==4.4.0 typing-inspect==0.8.0 -urllib3==1.26.12 +urllib3==1.26.13 diff --git a/packages/google-cloud-bigquery/samples/magics/requirements-test.txt b/packages/google-cloud-bigquery/samples/magics/requirements-test.txt index e5173c3442e8..56aa0f43257c 100644 --- a/packages/google-cloud-bigquery/samples/magics/requirements-test.txt +++ b/packages/google-cloud-bigquery/samples/magics/requirements-test.txt @@ -1,3 +1,3 @@ google-cloud-testutils==1.3.3 pytest==7.2.0 -mock==4.0.3 +mock==5.0.1 diff --git a/packages/google-cloud-bigquery/samples/magics/requirements.txt b/packages/google-cloud-bigquery/samples/magics/requirements.txt index 7b5291365020..133370fbed5f 100644 --- a/packages/google-cloud-bigquery/samples/magics/requirements.txt +++ b/packages/google-cloud-bigquery/samples/magics/requirements.txt @@ -1,15 +1,15 @@ db-dtypes==1.0.5 -google-cloud-bigquery-storage==2.16.2 -google-auth-oauthlib==0.7.0 -grpcio==1.50.0 -ipywidgets==8.0.2 +google-cloud-bigquery-storage==2.17.0 +google-auth-oauthlib==0.8.0 +grpcio==1.51.1 +ipywidgets==8.0.4 ipython===7.31.1; python_version == '3.7' ipython===8.0.1; python_version == '3.8' -ipython==8.6.0; python_version >= '3.9' +ipython==8.8.0; python_version >= '3.9' matplotlib===3.5.3; python_version == '3.7' -matplotlib==3.6.1; python_version >= '3.8' +matplotlib==3.6.2; python_version >= '3.8' pandas===1.3.5; python_version == '3.7' -pandas==1.5.1; python_version >= '3.8' -pyarrow==10.0.0 -pytz==2022.6 +pandas==1.5.2; python_version >= '3.8' +pyarrow==10.0.1 +pytz==2022.7 typing-extensions==4.4.0 diff --git a/packages/google-cloud-bigquery/samples/snippets/requirements-test.txt b/packages/google-cloud-bigquery/samples/snippets/requirements-test.txt index e5173c3442e8..56aa0f43257c 100644 --- a/packages/google-cloud-bigquery/samples/snippets/requirements-test.txt +++ b/packages/google-cloud-bigquery/samples/snippets/requirements-test.txt @@ -1,3 +1,3 @@ google-cloud-testutils==1.3.3 pytest==7.2.0 -mock==4.0.3 +mock==5.0.1 diff --git a/packages/google-cloud-bigquery/samples/snippets/requirements.txt b/packages/google-cloud-bigquery/samples/snippets/requirements.txt index 82e7452dfc53..27bbb3631ebe 100644 --- a/packages/google-cloud-bigquery/samples/snippets/requirements.txt +++ b/packages/google-cloud-bigquery/samples/snippets/requirements.txt @@ -1,16 +1,16 @@ db-dtypes==1.0.5 -google-cloud-bigquery==3.3.6 -google-cloud-bigquery-storage==2.16.2 -google-auth-oauthlib==0.7.0 -grpcio==1.50.0 -ipywidgets==8.0.2 +google-cloud-bigquery==3.4.1 +google-cloud-bigquery-storage==2.17.0 +google-auth-oauthlib==0.8.0 +grpcio==1.51.1 +ipywidgets==8.0.4 ipython===7.31.1; python_version == '3.7' ipython===8.0.1; python_version == '3.8' -ipython==8.6.0; python_version >= '3.9' +ipython==8.8.0; python_version >= '3.9' matplotlib===3.5.3; python_version == '3.7' -matplotlib==3.6.1; python_version >= '3.8' +matplotlib==3.6.2; python_version >= '3.8' pandas===1.3.5; python_version == '3.7' -pandas==1.5.1; python_version >= '3.8' -pyarrow==10.0.0 -pytz==2022.6 +pandas==1.5.2; python_version >= '3.8' +pyarrow==10.0.1 +pytz==2022.7 typing-extensions==4.4.0 From 605ca016ceeadebae78083ed09f4e30df00cbeff Mon Sep 17 00:00:00 2001 From: Mend Renovate Date: Thu, 12 Jan 2023 00:18:41 +0000 Subject: [PATCH 1543/2016] chore(deps): update all dependencies (#1460) * chore(deps): update all dependencies * revert Co-authored-by: Anthonios Partheniou --- .../google-cloud-bigquery/samples/geography/requirements.txt | 4 ++-- .../google-cloud-bigquery/samples/magics/requirements.txt | 2 +- .../google-cloud-bigquery/samples/snippets/requirements.txt | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/packages/google-cloud-bigquery/samples/geography/requirements.txt b/packages/google-cloud-bigquery/samples/geography/requirements.txt index e3ba3102e48d..316758ee9d8c 100644 --- a/packages/google-cloud-bigquery/samples/geography/requirements.txt +++ b/packages/google-cloud-bigquery/samples/geography/requirements.txt @@ -14,7 +14,7 @@ geopandas==0.12.2; python_version >= '3.8' google-api-core==2.11.0 google-auth==2.16.0 google-cloud-bigquery==3.4.1 -google-cloud-bigquery-storage==2.17.0 +google-cloud-bigquery-storage==2.18.0 google-cloud-core==2.3.2 google-crc32c==1.5.0 google-resumable-media==2.4.0 @@ -42,4 +42,4 @@ Shapely==2.0.0 six==1.16.0 typing-extensions==4.4.0 typing-inspect==0.8.0 -urllib3==1.26.13 +urllib3==1.26.14 diff --git a/packages/google-cloud-bigquery/samples/magics/requirements.txt b/packages/google-cloud-bigquery/samples/magics/requirements.txt index 133370fbed5f..1ae7c4974e19 100644 --- a/packages/google-cloud-bigquery/samples/magics/requirements.txt +++ b/packages/google-cloud-bigquery/samples/magics/requirements.txt @@ -1,5 +1,5 @@ db-dtypes==1.0.5 -google-cloud-bigquery-storage==2.17.0 +google-cloud-bigquery-storage==2.18.0 google-auth-oauthlib==0.8.0 grpcio==1.51.1 ipywidgets==8.0.4 diff --git a/packages/google-cloud-bigquery/samples/snippets/requirements.txt b/packages/google-cloud-bigquery/samples/snippets/requirements.txt index 27bbb3631ebe..23e31a200b1d 100644 --- a/packages/google-cloud-bigquery/samples/snippets/requirements.txt +++ b/packages/google-cloud-bigquery/samples/snippets/requirements.txt @@ -1,6 +1,6 @@ db-dtypes==1.0.5 google-cloud-bigquery==3.4.1 -google-cloud-bigquery-storage==2.17.0 +google-cloud-bigquery-storage==2.18.0 google-auth-oauthlib==0.8.0 grpcio==1.51.1 ipywidgets==8.0.4 From b38cbec2e3b2c3c6c6ee4b39e696f0b5b80f6130 Mon Sep 17 00:00:00 2001 From: Mend Renovate Date: Thu, 12 Jan 2023 17:23:39 +0000 Subject: [PATCH 1544/2016] chore(deps): update all dependencies (#1461) * chore(deps): update all dependencies * revert packaging Co-authored-by: Anthonios Partheniou --- .../google-cloud-bigquery/samples/geography/requirements.txt | 4 ++-- .../google-cloud-bigquery/samples/magics/requirements.txt | 2 +- .../google-cloud-bigquery/samples/snippets/requirements.txt | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/packages/google-cloud-bigquery/samples/geography/requirements.txt b/packages/google-cloud-bigquery/samples/geography/requirements.txt index 316758ee9d8c..0f475e0fd4cc 100644 --- a/packages/google-cloud-bigquery/samples/geography/requirements.txt +++ b/packages/google-cloud-bigquery/samples/geography/requirements.txt @@ -1,7 +1,7 @@ attrs==22.2.0 certifi==2022.12.7 cffi==1.15.1 -charset-normalizer==2.1.1 +charset-normalizer==3.0.1 click==8.1.3 click-plugins==1.1.1 cligj==0.7.2 @@ -36,7 +36,7 @@ pyparsing==3.0.9 python-dateutil==2.8.2 pytz==2022.7 PyYAML==6.0 -requests==2.28.1 +requests==2.28.2 rsa==4.9 Shapely==2.0.0 six==1.16.0 diff --git a/packages/google-cloud-bigquery/samples/magics/requirements.txt b/packages/google-cloud-bigquery/samples/magics/requirements.txt index 1ae7c4974e19..463829c8fd6c 100644 --- a/packages/google-cloud-bigquery/samples/magics/requirements.txt +++ b/packages/google-cloud-bigquery/samples/magics/requirements.txt @@ -7,7 +7,7 @@ ipython===7.31.1; python_version == '3.7' ipython===8.0.1; python_version == '3.8' ipython==8.8.0; python_version >= '3.9' matplotlib===3.5.3; python_version == '3.7' -matplotlib==3.6.2; python_version >= '3.8' +matplotlib==3.6.3; python_version >= '3.8' pandas===1.3.5; python_version == '3.7' pandas==1.5.2; python_version >= '3.8' pyarrow==10.0.1 diff --git a/packages/google-cloud-bigquery/samples/snippets/requirements.txt b/packages/google-cloud-bigquery/samples/snippets/requirements.txt index 23e31a200b1d..1ecf3fc455aa 100644 --- a/packages/google-cloud-bigquery/samples/snippets/requirements.txt +++ b/packages/google-cloud-bigquery/samples/snippets/requirements.txt @@ -8,7 +8,7 @@ ipython===7.31.1; python_version == '3.7' ipython===8.0.1; python_version == '3.8' ipython==8.8.0; python_version >= '3.9' matplotlib===3.5.3; python_version == '3.7' -matplotlib==3.6.2; python_version >= '3.8' +matplotlib==3.6.3; python_version >= '3.8' pandas===1.3.5; python_version == '3.7' pandas==1.5.2; python_version >= '3.8' pyarrow==10.0.1 From 1eb287953c0d9a75504973fea0faae86d69b0cc0 Mon Sep 17 00:00:00 2001 From: Anthonios Partheniou Date: Thu, 12 Jan 2023 16:06:04 -0500 Subject: [PATCH 1545/2016] fix: Add support for python 3.11 (#1463) * fix: Add support for python 3.11 * use python 3.11 for system tests --- ...{prerelease-deps-3.10.cfg => prerelease-deps-3.11.cfg} | 2 +- ...{prerelease-deps-3.10.cfg => prerelease-deps-3.11.cfg} | 2 +- .../presubmit/{snippets-3.10.cfg => snippets-3.11.cfg} | 2 +- .../presubmit/{system-3.10.cfg => system-3.11.cfg} | 2 +- packages/google-cloud-bigquery/noxfile.py | 8 ++++---- packages/google-cloud-bigquery/setup.py | 1 + 6 files changed, 9 insertions(+), 8 deletions(-) rename packages/google-cloud-bigquery/.kokoro/continuous/{prerelease-deps-3.10.cfg => prerelease-deps-3.11.cfg} (77%) rename packages/google-cloud-bigquery/.kokoro/presubmit/{prerelease-deps-3.10.cfg => prerelease-deps-3.11.cfg} (77%) rename packages/google-cloud-bigquery/.kokoro/presubmit/{snippets-3.10.cfg => snippets-3.11.cfg} (81%) rename packages/google-cloud-bigquery/.kokoro/presubmit/{system-3.10.cfg => system-3.11.cfg} (82%) diff --git a/packages/google-cloud-bigquery/.kokoro/continuous/prerelease-deps-3.10.cfg b/packages/google-cloud-bigquery/.kokoro/continuous/prerelease-deps-3.11.cfg similarity index 77% rename from packages/google-cloud-bigquery/.kokoro/continuous/prerelease-deps-3.10.cfg rename to packages/google-cloud-bigquery/.kokoro/continuous/prerelease-deps-3.11.cfg index 339980bdd7e5..1e19f1239870 100644 --- a/packages/google-cloud-bigquery/.kokoro/continuous/prerelease-deps-3.10.cfg +++ b/packages/google-cloud-bigquery/.kokoro/continuous/prerelease-deps-3.11.cfg @@ -3,5 +3,5 @@ # Only run this nox session. env_vars: { key: "NOX_SESSION" - value: "prerelease_deps-3.10" + value: "prerelease_deps-3.11" } diff --git a/packages/google-cloud-bigquery/.kokoro/presubmit/prerelease-deps-3.10.cfg b/packages/google-cloud-bigquery/.kokoro/presubmit/prerelease-deps-3.11.cfg similarity index 77% rename from packages/google-cloud-bigquery/.kokoro/presubmit/prerelease-deps-3.10.cfg rename to packages/google-cloud-bigquery/.kokoro/presubmit/prerelease-deps-3.11.cfg index 339980bdd7e5..1e19f1239870 100644 --- a/packages/google-cloud-bigquery/.kokoro/presubmit/prerelease-deps-3.10.cfg +++ b/packages/google-cloud-bigquery/.kokoro/presubmit/prerelease-deps-3.11.cfg @@ -3,5 +3,5 @@ # Only run this nox session. env_vars: { key: "NOX_SESSION" - value: "prerelease_deps-3.10" + value: "prerelease_deps-3.11" } diff --git a/packages/google-cloud-bigquery/.kokoro/presubmit/snippets-3.10.cfg b/packages/google-cloud-bigquery/.kokoro/presubmit/snippets-3.11.cfg similarity index 81% rename from packages/google-cloud-bigquery/.kokoro/presubmit/snippets-3.10.cfg rename to packages/google-cloud-bigquery/.kokoro/presubmit/snippets-3.11.cfg index dde182fb9286..74af6dd075af 100644 --- a/packages/google-cloud-bigquery/.kokoro/presubmit/snippets-3.10.cfg +++ b/packages/google-cloud-bigquery/.kokoro/presubmit/snippets-3.11.cfg @@ -3,5 +3,5 @@ # Only run this nox session. env_vars: { key: "NOX_SESSION" - value: "snippets-3.10" + value: "snippets-3.11" } diff --git a/packages/google-cloud-bigquery/.kokoro/presubmit/system-3.10.cfg b/packages/google-cloud-bigquery/.kokoro/presubmit/system-3.11.cfg similarity index 82% rename from packages/google-cloud-bigquery/.kokoro/presubmit/system-3.10.cfg rename to packages/google-cloud-bigquery/.kokoro/presubmit/system-3.11.cfg index 30956a3ab936..5ff31a60322b 100644 --- a/packages/google-cloud-bigquery/.kokoro/presubmit/system-3.10.cfg +++ b/packages/google-cloud-bigquery/.kokoro/presubmit/system-3.11.cfg @@ -3,5 +3,5 @@ # Only run this nox session. env_vars: { key: "NOX_SESSION" - value: "system-3.10" + value: "system-3.11" } diff --git a/packages/google-cloud-bigquery/noxfile.py b/packages/google-cloud-bigquery/noxfile.py index 5f08c016b640..381876462c94 100644 --- a/packages/google-cloud-bigquery/noxfile.py +++ b/packages/google-cloud-bigquery/noxfile.py @@ -36,7 +36,7 @@ ) DEFAULT_PYTHON_VERSION = "3.8" -SYSTEM_TEST_PYTHON_VERSIONS = ["3.8", "3.10"] +SYSTEM_TEST_PYTHON_VERSIONS = ["3.8", "3.11"] UNIT_TEST_PYTHON_VERSIONS = ["3.7", "3.8", "3.9", "3.10", "3.11"] CURRENT_DIRECTORY = pathlib.Path(__file__).parent.absolute() @@ -80,7 +80,7 @@ def default(session, install_extras=True): constraints_path, ) - if install_extras and session.python == "3.10": + if install_extras and session.python == "3.11": install_target = ".[bqstorage,ipywidgets,pandas,tqdm,opentelemetry]" elif install_extras: install_target = ".[all]" @@ -185,7 +185,7 @@ def system(session): # Data Catalog needed for the column ACL test with a real Policy Tag. session.install("google-cloud-datacatalog", "-c", constraints_path) - if session.python == "3.10": + if session.python == "3.11": extras = "[bqstorage,ipywidgets,pandas,tqdm,opentelemetry]" else: extras = "[all]" @@ -244,7 +244,7 @@ def snippets(session): session.install("google-cloud-storage", "-c", constraints_path) session.install("grpcio", "-c", constraints_path) - if session.python == "3.10": + if session.python == "3.11": extras = "[bqstorage,ipywidgets,pandas,tqdm,opentelemetry]" else: extras = "[all]" diff --git a/packages/google-cloud-bigquery/setup.py b/packages/google-cloud-bigquery/setup.py index fd168719362c..77a9bb53c644 100644 --- a/packages/google-cloud-bigquery/setup.py +++ b/packages/google-cloud-bigquery/setup.py @@ -131,6 +131,7 @@ "Programming Language :: Python :: 3.8", "Programming Language :: Python :: 3.9", "Programming Language :: Python :: 3.10", + "Programming Language :: Python :: 3.11", "Operating System :: OS Independent", "Topic :: Internet", ], From 03e405949c5cb08a23278de918d5c303831a73e4 Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Fri, 13 Jan 2023 11:41:25 -0600 Subject: [PATCH 1546/2016] deps: remove upper bound on packaging dependency (#1440) * deps: remove upper bound on packaging dependency Towards #1435 * install prerelease version of packaging * bump minimum packaging version Co-authored-by: Anthonios Partheniou --- packages/google-cloud-bigquery/noxfile.py | 5 +++++ packages/google-cloud-bigquery/setup.py | 2 +- packages/google-cloud-bigquery/testing/constraints-3.7.txt | 1 + 3 files changed, 7 insertions(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/noxfile.py b/packages/google-cloud-bigquery/noxfile.py index 381876462c94..f6283abf9130 100644 --- a/packages/google-cloud-bigquery/noxfile.py +++ b/packages/google-cloud-bigquery/noxfile.py @@ -300,6 +300,11 @@ def prerelease_deps(session): "--upgrade", "pandas", ) + session.install( + "--pre", + "--upgrade", + "git+https://github.com/pypa/packaging.git", + ) session.install( "--pre", diff --git a/packages/google-cloud-bigquery/setup.py b/packages/google-cloud-bigquery/setup.py index 77a9bb53c644..81cb2e5dbe43 100644 --- a/packages/google-cloud-bigquery/setup.py +++ b/packages/google-cloud-bigquery/setup.py @@ -41,7 +41,7 @@ # https://github.com/googleapis/google-cloud-python/issues/10566 "google-cloud-core >= 1.4.1, <3.0.0dev", "google-resumable-media >= 0.6.0, < 3.0dev", - "packaging >= 14.3, <22.0.0dev", + "packaging >= 20.0.0", "protobuf>=3.19.5,<5.0.0dev,!=3.20.0,!=3.20.1,!=4.21.0,!=4.21.1,!=4.21.2,!=4.21.3,!=4.21.4,!=4.21.5", # For the legacy proto-based types. "python-dateutil >= 2.7.2, <3.0dev", "requests >= 2.21.0, < 3.0.0dev", diff --git a/packages/google-cloud-bigquery/testing/constraints-3.7.txt b/packages/google-cloud-bigquery/testing/constraints-3.7.txt index 149d6c496982..3701c7343af6 100644 --- a/packages/google-cloud-bigquery/testing/constraints-3.7.txt +++ b/packages/google-cloud-bigquery/testing/constraints-3.7.txt @@ -17,6 +17,7 @@ ipython==7.0.1 opentelemetry-api==1.1.0 opentelemetry-instrumentation==0.20b0 opentelemetry-sdk==1.1.0 +packaging==20.0.0 pandas==1.1.0 proto-plus==1.22.0 protobuf==3.19.5 From f349bdc46a4709f04be0b67a2179578b7eb44aa3 Mon Sep 17 00:00:00 2001 From: Mario Torres Jr <105736410+Mattix23@users.noreply.github.com> Date: Fri, 13 Jan 2023 14:07:38 -0600 Subject: [PATCH 1547/2016] =?UTF-8?q?docs:=20revise=20get=20table=20labels?= =?UTF-8?q?=20code=20sample,=20add=20TODO=20to=20clean=20up=20snipp?= =?UTF-8?q?=E2=80=A6=20(#1464)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * docs: revise get table labels code sample, add TODO to clean up snippets.py * added a test with labels * Update samples/snippets/get_table_labels_test.py Co-authored-by: Tim Swast --- .../google-cloud-bigquery/docs/snippets.py | 2 + .../samples/snippets/get_table_labels.py | 39 +++++++++++++++ .../samples/snippets/get_table_labels_test.py | 50 +++++++++++++++++++ 3 files changed, 91 insertions(+) create mode 100644 packages/google-cloud-bigquery/samples/snippets/get_table_labels.py create mode 100644 packages/google-cloud-bigquery/samples/snippets/get_table_labels_test.py diff --git a/packages/google-cloud-bigquery/docs/snippets.py b/packages/google-cloud-bigquery/docs/snippets.py index efc551310660..85856eb3e71e 100644 --- a/packages/google-cloud-bigquery/docs/snippets.py +++ b/packages/google-cloud-bigquery/docs/snippets.py @@ -179,6 +179,8 @@ def test_manage_table_labels(client, to_delete): table.labels = labels table = client.create_table(table) + # TODO(Mattix23): After code sample is updated from cloud.google.com delete this + # [START bigquery_get_table_labels] # from google.cloud import bigquery # client = bigquery.Client() diff --git a/packages/google-cloud-bigquery/samples/snippets/get_table_labels.py b/packages/google-cloud-bigquery/samples/snippets/get_table_labels.py new file mode 100644 index 000000000000..8cfbd4ee2d21 --- /dev/null +++ b/packages/google-cloud-bigquery/samples/snippets/get_table_labels.py @@ -0,0 +1,39 @@ +# Copyright 2022 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +def get_table_labels(table_id: str) -> None: + orig_table_id = table_id + # [START bigquery_get_table_labels] + from google.cloud import bigquery + + client = bigquery.Client() + + # TODO(dev): Change table_id to the full name of the table you want to create. + table_id = "your-project.your_dataset.your_table_name" + + # [END bigquery_get_table_labels] + table_id = orig_table_id + + # [START bigquery_get_table_labels] + table = client.get_table(table_id) # API Request + + # View table labels + print(f"Table ID: {table_id}.") + if table.labels: + for label, value in table.labels.items(): + print(f"\t{label}: {value}") + else: + print("\tTable has no labels defined.") + # [END bigquery_get_table_labels] diff --git a/packages/google-cloud-bigquery/samples/snippets/get_table_labels_test.py b/packages/google-cloud-bigquery/samples/snippets/get_table_labels_test.py new file mode 100644 index 000000000000..f922e728c44b --- /dev/null +++ b/packages/google-cloud-bigquery/samples/snippets/get_table_labels_test.py @@ -0,0 +1,50 @@ +# Copyright 2022 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import typing + +from google.cloud import bigquery + +import get_table_labels + +if typing.TYPE_CHECKING: + import pytest + + +def test_get_table_labels( + capsys: "pytest.CaptureFixture[str]", + table_id: str, + bigquery_client: bigquery.Client, +) -> None: + table = bigquery_client.get_table(table_id) + table.labels = {"color": "green"} + bigquery_client.update_table(table, ["labels"]) + + get_table_labels.get_table_labels(table_id) + + out, _ = capsys.readouterr() + assert table_id in out + assert "color" in out + + +def test_get_table_labels_no_label( + capsys: "pytest.CaptureFixture[str]", + table_id: str, +) -> None: + + get_table_labels.get_table_labels(table_id) + + out, _ = capsys.readouterr() + assert "no labels defined" in out + assert table_id in out From 6fe318a35747e1aff037c318244d62ed45679aa1 Mon Sep 17 00:00:00 2001 From: "release-please[bot]" <55107282+release-please[bot]@users.noreply.github.com> Date: Wed, 18 Jan 2023 08:37:29 -0600 Subject: [PATCH 1548/2016] chore(main): release 3.4.2 (#1441) Co-authored-by: release-please[bot] <55107282+release-please[bot]@users.noreply.github.com> --- packages/google-cloud-bigquery/CHANGELOG.md | 23 +++++++++++++++++++ .../google/cloud/bigquery/version.py | 2 +- 2 files changed, 24 insertions(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/CHANGELOG.md b/packages/google-cloud-bigquery/CHANGELOG.md index d6f0abc85a69..0a5b4d179839 100644 --- a/packages/google-cloud-bigquery/CHANGELOG.md +++ b/packages/google-cloud-bigquery/CHANGELOG.md @@ -5,6 +5,29 @@ [1]: https://pypi.org/project/google-cloud-bigquery/#history +## [3.4.2](https://github.com/googleapis/python-bigquery/compare/v3.4.1...v3.4.2) (2023-01-13) + + +### Bug Fixes + +* Add support for python 3.11 ([#1463](https://github.com/googleapis/python-bigquery/issues/1463)) ([730a1de](https://github.com/googleapis/python-bigquery/commit/730a1dec8be49df26a3d805ebd4ad185ba72170d)) +* Require grpcio >= 1.49.1 for python 3.11 ([72b25c5](https://github.com/googleapis/python-bigquery/commit/72b25c52bc4b9a92c4cb187b6230b280d4af905c)) + + +### Dependencies + +* Remove upper bound on packaging dependency ([#1440](https://github.com/googleapis/python-bigquery/issues/1440)) ([6088129](https://github.com/googleapis/python-bigquery/commit/60881296a35067e7aa025d92b2425572f10fd4ec)) + + +### Documentation + +* Create sample to write schema file from table ([#1439](https://github.com/googleapis/python-bigquery/issues/1439)) ([093cc68](https://github.com/googleapis/python-bigquery/commit/093cc6852ada29898c4a4d047fd216544ef15bba)) +* Created samples for load table and create table from schema file ([#1436](https://github.com/googleapis/python-bigquery/issues/1436)) ([8ad2e5b](https://github.com/googleapis/python-bigquery/commit/8ad2e5bc1c04bf16fffe4c8773e722b68117c916)) +* Revise create table cmek sample ([#1452](https://github.com/googleapis/python-bigquery/issues/1452)) ([57740e4](https://github.com/googleapis/python-bigquery/commit/57740e49af7418449aec73a6fdd307fcb588c655)) +* Revise get table labels code sample, add TODO to clean up snipp… ([#1464](https://github.com/googleapis/python-bigquery/issues/1464)) ([b5ccbfe](https://github.com/googleapis/python-bigquery/commit/b5ccbfe4eee91d7f481d9708084cd29d0c85e666)) +* Revise label table code samples ([#1451](https://github.com/googleapis/python-bigquery/issues/1451)) ([14ae1f2](https://github.com/googleapis/python-bigquery/commit/14ae1f20538ea00829a1325f91f5e8524234bd0c)) +* Revise sample for nested schema ([#1446](https://github.com/googleapis/python-bigquery/issues/1446)) ([a097631](https://github.com/googleapis/python-bigquery/commit/a0976318fc5ad1620a68250c3e059e2a51d4946d)) + ## [3.4.1](https://github.com/googleapis/python-bigquery/compare/v3.4.0...v3.4.1) (2022-12-09) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/version.py b/packages/google-cloud-bigquery/google/cloud/bigquery/version.py index 71133df01629..d38bb4619c37 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/version.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/version.py @@ -12,4 +12,4 @@ # See the License for the specific language governing permissions and # limitations under the License. -__version__ = "3.4.1" +__version__ = "3.4.2" From 85775f40248bea1544971ffc32168dcfcb1cdac9 Mon Sep 17 00:00:00 2001 From: shollyman Date: Wed, 18 Jan 2023 09:57:21 -0800 Subject: [PATCH 1549/2016] testing: remove test_create_table_with_policy (#1470) This is superseded by a version that tests using real tags in test_create_table_with_real_custom_policy Fixes: https://github.com/googleapis/python-bigquery/issues/1468 --- .../tests/system/test_client.py | 51 ------------------- 1 file changed, 51 deletions(-) diff --git a/packages/google-cloud-bigquery/tests/system/test_client.py b/packages/google-cloud-bigquery/tests/system/test_client.py index 575898209dcd..14a9b04d4040 100644 --- a/packages/google-cloud-bigquery/tests/system/test_client.py +++ b/packages/google-cloud-bigquery/tests/system/test_client.py @@ -335,57 +335,6 @@ def test_create_table(self): self.assertTrue(_table_exists(table)) self.assertEqual(table.table_id, table_id) - def test_create_table_with_policy(self): - from google.cloud.bigquery.schema import PolicyTagList - - dataset = self.temp_dataset(_make_dataset_id("create_table_with_policy")) - table_id = "test_table" - policy_1 = PolicyTagList( - names=[ - "projects/{}/locations/us/taxonomies/1/policyTags/2".format( - Config.CLIENT.project - ), - ] - ) - policy_2 = PolicyTagList( - names=[ - "projects/{}/locations/us/taxonomies/3/policyTags/4".format( - Config.CLIENT.project - ), - ] - ) - - schema = [ - bigquery.SchemaField("full_name", "STRING", mode="REQUIRED"), - bigquery.SchemaField( - "secret_int", "INTEGER", mode="REQUIRED", policy_tags=policy_1 - ), - ] - table_arg = Table(dataset.table(table_id), schema=schema) - self.assertFalse(_table_exists(table_arg)) - - table = helpers.retry_403(Config.CLIENT.create_table)(table_arg) - self.to_delete.insert(0, table) - - self.assertTrue(_table_exists(table)) - self.assertEqual(policy_1, table.schema[1].policy_tags) - - # Amend the schema to replace the policy tags - new_schema = table.schema[:] - old_field = table.schema[1] - new_schema[1] = bigquery.SchemaField( - name=old_field.name, - field_type=old_field.field_type, - mode=old_field.mode, - description=old_field.description, - fields=old_field.fields, - policy_tags=policy_2, - ) - - table.schema = new_schema - table2 = Config.CLIENT.update_table(table, ["schema"]) - self.assertEqual(policy_2, table2.schema[1].policy_tags) - def test_create_table_with_real_custom_policy(self): from google.cloud.bigquery.schema import PolicyTagList From 9d1e0214f8bc6e006119335241c1b8899f8f3a50 Mon Sep 17 00:00:00 2001 From: Mend Renovate Date: Wed, 18 Jan 2023 19:20:16 +0000 Subject: [PATCH 1550/2016] chore(deps): update dependency packaging to v23 (#1462) * chore(deps): update dependency packaging to v23 * bump google-cloud-bigquery Co-authored-by: Anthonios Partheniou Co-authored-by: aribray <45905583+aribray@users.noreply.github.com> --- .../google-cloud-bigquery/samples/geography/requirements.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/packages/google-cloud-bigquery/samples/geography/requirements.txt b/packages/google-cloud-bigquery/samples/geography/requirements.txt index 0f475e0fd4cc..994d579aa6b1 100644 --- a/packages/google-cloud-bigquery/samples/geography/requirements.txt +++ b/packages/google-cloud-bigquery/samples/geography/requirements.txt @@ -13,7 +13,7 @@ geopandas===0.10.2; python_version == '3.7' geopandas==0.12.2; python_version >= '3.8' google-api-core==2.11.0 google-auth==2.16.0 -google-cloud-bigquery==3.4.1 +google-cloud-bigquery==3.4.2 google-cloud-bigquery-storage==2.18.0 google-cloud-core==2.3.2 google-crc32c==1.5.0 @@ -24,7 +24,7 @@ idna==3.4 libcst==0.4.9 munch==2.5.0 mypy-extensions==0.4.3 -packaging==21.3 +packaging==23.0 pandas===1.3.5; python_version == '3.7' pandas==1.5.2; python_version >= '3.8' proto-plus==1.22.2 From 7b1faffd03430d292b1be0dfed80da87f644a13a Mon Sep 17 00:00:00 2001 From: Mario Torres Jr <105736410+Mattix23@users.noreply.github.com> Date: Wed, 18 Jan 2023 13:53:49 -0600 Subject: [PATCH 1551/2016] =?UTF-8?q?docs:=20revise=20delete=20label=20tab?= =?UTF-8?q?le=20code=20sample,=20add=20TODO=20to=20clean=20up=20sni?= =?UTF-8?q?=E2=80=A6=20(#1466)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * docs: revise delete label table code sample, add TODO to clean up snippets.py * changed name of test function to align with file name Co-authored-by: aribray <45905583+aribray@users.noreply.github.com> Co-authored-by: Anthonios Partheniou --- .../google-cloud-bigquery/docs/snippets.py | 2 + .../samples/snippets/delete_label_table.py | 43 +++++++++++++++++++ .../snippets/delete_label_table_test.py | 34 +++++++++++++++ 3 files changed, 79 insertions(+) create mode 100644 packages/google-cloud-bigquery/samples/snippets/delete_label_table.py create mode 100644 packages/google-cloud-bigquery/samples/snippets/delete_label_table_test.py diff --git a/packages/google-cloud-bigquery/docs/snippets.py b/packages/google-cloud-bigquery/docs/snippets.py index 85856eb3e71e..eca2b13538bf 100644 --- a/packages/google-cloud-bigquery/docs/snippets.py +++ b/packages/google-cloud-bigquery/docs/snippets.py @@ -203,6 +203,8 @@ def test_manage_table_labels(client, to_delete): # [END bigquery_get_table_labels] assert table.labels == labels + # TODO(Mattix23): After code sample is updated from cloud.google.com delete this + # [START bigquery_delete_label_table] # from google.cloud import bigquery # client = bigquery.Client() diff --git a/packages/google-cloud-bigquery/samples/snippets/delete_label_table.py b/packages/google-cloud-bigquery/samples/snippets/delete_label_table.py new file mode 100644 index 000000000000..0e9eaaf8f78c --- /dev/null +++ b/packages/google-cloud-bigquery/samples/snippets/delete_label_table.py @@ -0,0 +1,43 @@ +# Copyright 2022 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from google.cloud import bigquery + + +def delete_label_table(table_id: str, label_key: str) -> bigquery.Table: + orig_table_id = table_id + orig_label_key = label_key + # [START bigquery_delete_label_table] + from google.cloud import bigquery + + client = bigquery.Client() + + # TODO(dev): Change table_id to the full name of the table you wish to delete from. + table_id = "your-project.your_dataset.your_table_name" + # TODO(dev): Change label_key to the name of the label you want to remove. + label_key = "color" + # [END bigquery_delete_label_table] + table_id = orig_table_id + label_key = orig_label_key + # [START bigquery_delete_label_table] + table = client.get_table(table_id) # API request + + # To delete a label from a table, set its value to None + table.labels[label_key] = None + + table = client.update_table(table, ["labels"]) # API request + + print(f"Deleted label '{label_key}' from {table_id}.") + # [END bigquery_delete_label_table] + return table diff --git a/packages/google-cloud-bigquery/samples/snippets/delete_label_table_test.py b/packages/google-cloud-bigquery/samples/snippets/delete_label_table_test.py new file mode 100644 index 000000000000..54acae77f4ff --- /dev/null +++ b/packages/google-cloud-bigquery/samples/snippets/delete_label_table_test.py @@ -0,0 +1,34 @@ +# Copyright 2022 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import typing + +import delete_label_table + +if typing.TYPE_CHECKING: + import pytest + + +def test_delete_label_table( + capsys: "pytest.CaptureFixture[str]", + table_id: str, +) -> None: + + table = delete_label_table.delete_label_table(table_id, "color") + + out, _ = capsys.readouterr() + assert "Deleted" in out + assert "color" in out + assert table_id in out + assert table.labels is None or "color" not in table.labels From 99dcc12d9291f037218e2e51f520bbfffbc7ec74 Mon Sep 17 00:00:00 2001 From: aribray <45905583+aribray@users.noreply.github.com> Date: Wed, 18 Jan 2023 16:29:06 -0600 Subject: [PATCH 1552/2016] docs: adds snippet for creating table with external data config (#1420) * docs: add samples for creating table with external data configuration and creating an external table definition Co-authored-by: Anthonios Partheniou --- .../docs/usage/tables.rst | 9 +++ ...reate_table_external_data_configuration.py | 66 +++++++++++++++++++ .../samples/tests/conftest.py | 18 ++++- ...reate_table_external_data_configuration.py | 32 +++++++++ 4 files changed, 124 insertions(+), 1 deletion(-) create mode 100644 packages/google-cloud-bigquery/samples/create_table_external_data_configuration.py create mode 100644 packages/google-cloud-bigquery/samples/tests/test_create_table_external_data_configuration.py diff --git a/packages/google-cloud-bigquery/docs/usage/tables.rst b/packages/google-cloud-bigquery/docs/usage/tables.rst index d924fe214a71..105e93637815 100644 --- a/packages/google-cloud-bigquery/docs/usage/tables.rst +++ b/packages/google-cloud-bigquery/docs/usage/tables.rst @@ -58,6 +58,15 @@ Create an empty table with the :start-after: [START bigquery_create_table] :end-before: [END bigquery_create_table] +Create a table using an external data source with the +:func:`~google.cloud.bigquery.client.Client.create_table` method: + +.. literalinclude:: ../samples/create_table_external_data_configuration.py + :language: python + :dedent: 4 + :start-after: [START bigquery_create_table_external_data_configuration] + :end-before: [END bigquery_create_table_external_data_configuration] + Create a clustered table with the :func:`~google.cloud.bigquery.client.Client.create_table` method: diff --git a/packages/google-cloud-bigquery/samples/create_table_external_data_configuration.py b/packages/google-cloud-bigquery/samples/create_table_external_data_configuration.py new file mode 100644 index 000000000000..068f915555ab --- /dev/null +++ b/packages/google-cloud-bigquery/samples/create_table_external_data_configuration.py @@ -0,0 +1,66 @@ +# Copyright 2022 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +def create_table_external_data_configuration( + table_id: str, +) -> None: + """Create a table using an external data source""" + orig_table_id = table_id + # [START bigquery_create_table_external_data_configuration] + # [START bigquery_create_external_table_definition] + from google.cloud import bigquery + + # Construct a BigQuery client object. + client = bigquery.Client() + + # TODO(developer): Set table_id to the ID of the table to create. + table_id = "your-project.your_dataset.your_table_name" + # [END bigquery_create_table_external_data_configuration] + table_id = orig_table_id + # [START bigquery_create_table_external_data_configuration] + + # TODO(developer): Set the external source format of your table. + # Note that the set of allowed values for external data sources is + # different than the set used for loading data (see :class:`~google.cloud.bigquery.job.SourceFormat`). + external_source_format = "AVRO" + + # TODO(developer): Set the source_uris to point to your data in Google Cloud + source_uris = [ + "gs://cloud-samples-data/bigquery/federated-formats-reference-file-schema/a-twitter.avro", + "gs://cloud-samples-data/bigquery/federated-formats-reference-file-schema/b-twitter.avro", + "gs://cloud-samples-data/bigquery/federated-formats-reference-file-schema/c-twitter.avro", + ] + + # Create ExternalConfig object with external source format + external_config = bigquery.ExternalConfig(external_source_format) + # Set source_uris that point to your data in Google Cloud + external_config.source_uris = source_uris + + # TODO(developer) You have the option to set a reference_file_schema_uri, which points to + # a reference file for the table schema + reference_file_schema_uri = "gs://cloud-samples-data/bigquery/federated-formats-reference-file-schema/b-twitter.avro" + + external_config.reference_file_schema_uri = reference_file_schema_uri + # [END bigquery_create_external_table_definition] + + table = bigquery.Table(table_id) + # Set the external data configuration of the table + table.external_data_configuration = external_config + table = client.create_table(table) # Make an API request. + + print( + f"Created table with external source format {table.external_data_configuration.source_format}" + ) + # [END bigquery_create_table_external_data_configuration] diff --git a/packages/google-cloud-bigquery/samples/tests/conftest.py b/packages/google-cloud-bigquery/samples/tests/conftest.py index b7a2ad5874dc..99bd2e367b23 100644 --- a/packages/google-cloud-bigquery/samples/tests/conftest.py +++ b/packages/google-cloud-bigquery/samples/tests/conftest.py @@ -13,7 +13,7 @@ # limitations under the License. import datetime -from typing import Iterator +from typing import Iterator, List import uuid import google.auth @@ -47,6 +47,22 @@ def random_table_id(dataset_id: str) -> str: return "{}.{}".format(dataset_id, random_table_id) +@pytest.fixture +def avro_source_uris() -> List[str]: + avro_source_uris = [ + "gs://cloud-samples-data/bigquery/federated-formats-reference-file-schema/a-twitter.avro", + "gs://cloud-samples-data/bigquery/federated-formats-reference-file-schema/b-twitter.avro", + "gs://cloud-samples-data/bigquery/federated-formats-reference-file-schema/c-twitter.avro", + ] + return avro_source_uris + + +@pytest.fixture +def reference_file_schema_uri() -> str: + reference_file_schema_uri = "gs://cloud-samples-data/bigquery/federated-formats-reference-file-schema/b-twitter.avro" + return reference_file_schema_uri + + @pytest.fixture def random_dataset_id(client: bigquery.Client) -> Iterator[str]: now = datetime.datetime.now() diff --git a/packages/google-cloud-bigquery/samples/tests/test_create_table_external_data_configuration.py b/packages/google-cloud-bigquery/samples/tests/test_create_table_external_data_configuration.py new file mode 100644 index 000000000000..bf4cf17d4563 --- /dev/null +++ b/packages/google-cloud-bigquery/samples/tests/test_create_table_external_data_configuration.py @@ -0,0 +1,32 @@ +# Copyright 2022 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import typing + +from .. import create_table_external_data_configuration + +if typing.TYPE_CHECKING: + import pytest + + +def test_create_table_external_data_configuration( + capsys: "pytest.CaptureFixture[str]", + random_table_id: str, +) -> None: + + create_table_external_data_configuration.create_table_external_data_configuration( + random_table_id + ) + out, _ = capsys.readouterr() + assert "Created table with external source format AVRO" in out From 2aea938b8c56ed96885cb8e29782b12e84e11ead Mon Sep 17 00:00:00 2001 From: Mend Renovate Date: Mon, 23 Jan 2023 15:47:40 +0000 Subject: [PATCH 1553/2016] chore(deps): update all dependencies (#1471) Co-authored-by: Anthonios Partheniou --- .../samples/geography/requirements-test.txt | 2 +- .../google-cloud-bigquery/samples/geography/requirements.txt | 2 +- .../samples/magics/requirements-test.txt | 2 +- .../google-cloud-bigquery/samples/magics/requirements.txt | 2 +- .../samples/snippets/requirements-test.txt | 2 +- .../google-cloud-bigquery/samples/snippets/requirements.txt | 4 ++-- 6 files changed, 7 insertions(+), 7 deletions(-) diff --git a/packages/google-cloud-bigquery/samples/geography/requirements-test.txt b/packages/google-cloud-bigquery/samples/geography/requirements-test.txt index 1e6b7c5eaa13..100e0639cd31 100644 --- a/packages/google-cloud-bigquery/samples/geography/requirements-test.txt +++ b/packages/google-cloud-bigquery/samples/geography/requirements-test.txt @@ -1,2 +1,2 @@ -pytest==7.2.0 +pytest==7.2.1 mock==5.0.1 diff --git a/packages/google-cloud-bigquery/samples/geography/requirements.txt b/packages/google-cloud-bigquery/samples/geography/requirements.txt index 994d579aa6b1..c8c063ba8a8a 100644 --- a/packages/google-cloud-bigquery/samples/geography/requirements.txt +++ b/packages/google-cloud-bigquery/samples/geography/requirements.txt @@ -34,7 +34,7 @@ pyasn1-modules==0.2.8 pycparser==2.21 pyparsing==3.0.9 python-dateutil==2.8.2 -pytz==2022.7 +pytz==2022.7.1 PyYAML==6.0 requests==2.28.2 rsa==4.9 diff --git a/packages/google-cloud-bigquery/samples/magics/requirements-test.txt b/packages/google-cloud-bigquery/samples/magics/requirements-test.txt index 56aa0f43257c..e8f3982c7160 100644 --- a/packages/google-cloud-bigquery/samples/magics/requirements-test.txt +++ b/packages/google-cloud-bigquery/samples/magics/requirements-test.txt @@ -1,3 +1,3 @@ google-cloud-testutils==1.3.3 -pytest==7.2.0 +pytest==7.2.1 mock==5.0.1 diff --git a/packages/google-cloud-bigquery/samples/magics/requirements.txt b/packages/google-cloud-bigquery/samples/magics/requirements.txt index 463829c8fd6c..9fd09f557e5b 100644 --- a/packages/google-cloud-bigquery/samples/magics/requirements.txt +++ b/packages/google-cloud-bigquery/samples/magics/requirements.txt @@ -11,5 +11,5 @@ matplotlib==3.6.3; python_version >= '3.8' pandas===1.3.5; python_version == '3.7' pandas==1.5.2; python_version >= '3.8' pyarrow==10.0.1 -pytz==2022.7 +pytz==2022.7.1 typing-extensions==4.4.0 diff --git a/packages/google-cloud-bigquery/samples/snippets/requirements-test.txt b/packages/google-cloud-bigquery/samples/snippets/requirements-test.txt index 56aa0f43257c..e8f3982c7160 100644 --- a/packages/google-cloud-bigquery/samples/snippets/requirements-test.txt +++ b/packages/google-cloud-bigquery/samples/snippets/requirements-test.txt @@ -1,3 +1,3 @@ google-cloud-testutils==1.3.3 -pytest==7.2.0 +pytest==7.2.1 mock==5.0.1 diff --git a/packages/google-cloud-bigquery/samples/snippets/requirements.txt b/packages/google-cloud-bigquery/samples/snippets/requirements.txt index 1ecf3fc455aa..8a7b975636c8 100644 --- a/packages/google-cloud-bigquery/samples/snippets/requirements.txt +++ b/packages/google-cloud-bigquery/samples/snippets/requirements.txt @@ -1,5 +1,5 @@ db-dtypes==1.0.5 -google-cloud-bigquery==3.4.1 +google-cloud-bigquery==3.4.2 google-cloud-bigquery-storage==2.18.0 google-auth-oauthlib==0.8.0 grpcio==1.51.1 @@ -12,5 +12,5 @@ matplotlib==3.6.3; python_version >= '3.8' pandas===1.3.5; python_version == '3.7' pandas==1.5.2; python_version >= '3.8' pyarrow==10.0.1 -pytz==2022.7 +pytz==2022.7.1 typing-extensions==4.4.0 From 21889cc1c148a6919ba9b505d4d046166398d88b Mon Sep 17 00:00:00 2001 From: Mend Renovate Date: Mon, 23 Jan 2023 16:31:18 +0000 Subject: [PATCH 1554/2016] chore(deps): update all dependencies (#1475) --- .../google-cloud-bigquery/samples/geography/requirements.txt | 4 ++-- .../google-cloud-bigquery/samples/magics/requirements.txt | 2 +- .../google-cloud-bigquery/samples/snippets/requirements.txt | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/packages/google-cloud-bigquery/samples/geography/requirements.txt b/packages/google-cloud-bigquery/samples/geography/requirements.txt index c8c063ba8a8a..3a28bf2b5f67 100644 --- a/packages/google-cloud-bigquery/samples/geography/requirements.txt +++ b/packages/google-cloud-bigquery/samples/geography/requirements.txt @@ -17,7 +17,7 @@ google-cloud-bigquery==3.4.2 google-cloud-bigquery-storage==2.18.0 google-cloud-core==2.3.2 google-crc32c==1.5.0 -google-resumable-media==2.4.0 +google-resumable-media==2.4.1 googleapis-common-protos==1.58.0 grpcio==1.51.1 idna==3.4 @@ -26,7 +26,7 @@ munch==2.5.0 mypy-extensions==0.4.3 packaging==23.0 pandas===1.3.5; python_version == '3.7' -pandas==1.5.2; python_version >= '3.8' +pandas==1.5.3; python_version >= '3.8' proto-plus==1.22.2 pyarrow==10.0.1 pyasn1==0.4.8 diff --git a/packages/google-cloud-bigquery/samples/magics/requirements.txt b/packages/google-cloud-bigquery/samples/magics/requirements.txt index 9fd09f557e5b..2122f12db5f7 100644 --- a/packages/google-cloud-bigquery/samples/magics/requirements.txt +++ b/packages/google-cloud-bigquery/samples/magics/requirements.txt @@ -9,7 +9,7 @@ ipython==8.8.0; python_version >= '3.9' matplotlib===3.5.3; python_version == '3.7' matplotlib==3.6.3; python_version >= '3.8' pandas===1.3.5; python_version == '3.7' -pandas==1.5.2; python_version >= '3.8' +pandas==1.5.3; python_version >= '3.8' pyarrow==10.0.1 pytz==2022.7.1 typing-extensions==4.4.0 diff --git a/packages/google-cloud-bigquery/samples/snippets/requirements.txt b/packages/google-cloud-bigquery/samples/snippets/requirements.txt index 8a7b975636c8..7b0487f06f9f 100644 --- a/packages/google-cloud-bigquery/samples/snippets/requirements.txt +++ b/packages/google-cloud-bigquery/samples/snippets/requirements.txt @@ -10,7 +10,7 @@ ipython==8.8.0; python_version >= '3.9' matplotlib===3.5.3; python_version == '3.7' matplotlib==3.6.3; python_version >= '3.8' pandas===1.3.5; python_version == '3.7' -pandas==1.5.2; python_version >= '3.8' +pandas==1.5.3; python_version >= '3.8' pyarrow==10.0.1 pytz==2022.7.1 typing-extensions==4.4.0 From 2b81e22783e584d8b938f207efea7e717cf973df Mon Sep 17 00:00:00 2001 From: adhiggs <38925938+adhiggs@users.noreply.github.com> Date: Mon, 23 Jan 2023 16:08:12 -0800 Subject: [PATCH 1555/2016] docs(samples): table variable fix (#1287) Updated table variable in "# Print row data in tabular format." to point at the correct table_id. Fixes https://togithub.com/googleapis/python-bigquery/issues/1286 --- packages/google-cloud-bigquery/samples/browse_table_data.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/samples/browse_table_data.py b/packages/google-cloud-bigquery/samples/browse_table_data.py index 6a56253bf37f..74b903aa3ec7 100644 --- a/packages/google-cloud-bigquery/samples/browse_table_data.py +++ b/packages/google-cloud-bigquery/samples/browse_table_data.py @@ -47,7 +47,7 @@ def browse_table_data(table_id: str) -> None: print("Downloaded {} rows from table {}".format(len(rows), table_id)) # Print row data in tabular format. - rows_iter = client.list_rows(table, max_results=10) + rows_iter = client.list_rows(table_id, max_results=10) format_string = "{!s:<16} " * len(rows_iter.schema) field_names = [field.name for field in rows_iter.schema] print(format_string.format(*field_names)) # Prints column headers. From b610b7c8ea8fb00d35bbd139e13699237f39e328 Mon Sep 17 00:00:00 2001 From: Mend Renovate Date: Wed, 25 Jan 2023 18:43:49 +0000 Subject: [PATCH 1556/2016] chore(deps): update dependency google-cloud-bigquery-storage to v2.18.1 (#1476) Co-authored-by: Anthonios Partheniou --- .../google-cloud-bigquery/samples/geography/requirements.txt | 2 +- packages/google-cloud-bigquery/samples/magics/requirements.txt | 2 +- .../google-cloud-bigquery/samples/snippets/requirements.txt | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/packages/google-cloud-bigquery/samples/geography/requirements.txt b/packages/google-cloud-bigquery/samples/geography/requirements.txt index 3a28bf2b5f67..a6a33af5adca 100644 --- a/packages/google-cloud-bigquery/samples/geography/requirements.txt +++ b/packages/google-cloud-bigquery/samples/geography/requirements.txt @@ -14,7 +14,7 @@ geopandas==0.12.2; python_version >= '3.8' google-api-core==2.11.0 google-auth==2.16.0 google-cloud-bigquery==3.4.2 -google-cloud-bigquery-storage==2.18.0 +google-cloud-bigquery-storage==2.18.1 google-cloud-core==2.3.2 google-crc32c==1.5.0 google-resumable-media==2.4.1 diff --git a/packages/google-cloud-bigquery/samples/magics/requirements.txt b/packages/google-cloud-bigquery/samples/magics/requirements.txt index 2122f12db5f7..ba90dab78691 100644 --- a/packages/google-cloud-bigquery/samples/magics/requirements.txt +++ b/packages/google-cloud-bigquery/samples/magics/requirements.txt @@ -1,5 +1,5 @@ db-dtypes==1.0.5 -google-cloud-bigquery-storage==2.18.0 +google-cloud-bigquery-storage==2.18.1 google-auth-oauthlib==0.8.0 grpcio==1.51.1 ipywidgets==8.0.4 diff --git a/packages/google-cloud-bigquery/samples/snippets/requirements.txt b/packages/google-cloud-bigquery/samples/snippets/requirements.txt index 7b0487f06f9f..b2093488668b 100644 --- a/packages/google-cloud-bigquery/samples/snippets/requirements.txt +++ b/packages/google-cloud-bigquery/samples/snippets/requirements.txt @@ -1,6 +1,6 @@ db-dtypes==1.0.5 google-cloud-bigquery==3.4.2 -google-cloud-bigquery-storage==2.18.0 +google-cloud-bigquery-storage==2.18.1 google-auth-oauthlib==0.8.0 grpcio==1.51.1 ipywidgets==8.0.4 From 14469a3995499b8d846907732d5778d60bba554c Mon Sep 17 00:00:00 2001 From: Mend Renovate Date: Thu, 26 Jan 2023 17:08:45 +0000 Subject: [PATCH 1557/2016] chore(deps): update dependency geojson to v3 (#1478) --- .../google-cloud-bigquery/samples/geography/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/samples/geography/requirements.txt b/packages/google-cloud-bigquery/samples/geography/requirements.txt index a6a33af5adca..1a332cdaf151 100644 --- a/packages/google-cloud-bigquery/samples/geography/requirements.txt +++ b/packages/google-cloud-bigquery/samples/geography/requirements.txt @@ -8,7 +8,7 @@ cligj==0.7.2 dataclasses==0.8; python_version < '3.7' db-dtypes==1.0.5 Fiona==1.8.22 -geojson==2.5.0 +geojson==3.0.0 geopandas===0.10.2; python_version == '3.7' geopandas==0.12.2; python_version >= '3.8' google-api-core==2.11.0 From f56a35d5cd412f039df3239ccc36930b84d51d12 Mon Sep 17 00:00:00 2001 From: Mend Renovate Date: Sat, 28 Jan 2023 11:39:21 +0000 Subject: [PATCH 1558/2016] chore(deps): update all dependencies (#1479) --- .../google-cloud-bigquery/samples/geography/requirements.txt | 2 +- .../google-cloud-bigquery/samples/magics/requirements.txt | 4 ++-- .../google-cloud-bigquery/samples/snippets/requirements.txt | 4 ++-- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/packages/google-cloud-bigquery/samples/geography/requirements.txt b/packages/google-cloud-bigquery/samples/geography/requirements.txt index 1a332cdaf151..294cb095d326 100644 --- a/packages/google-cloud-bigquery/samples/geography/requirements.txt +++ b/packages/google-cloud-bigquery/samples/geography/requirements.txt @@ -28,7 +28,7 @@ packaging==23.0 pandas===1.3.5; python_version == '3.7' pandas==1.5.3; python_version >= '3.8' proto-plus==1.22.2 -pyarrow==10.0.1 +pyarrow==11.0.0 pyasn1==0.4.8 pyasn1-modules==0.2.8 pycparser==2.21 diff --git a/packages/google-cloud-bigquery/samples/magics/requirements.txt b/packages/google-cloud-bigquery/samples/magics/requirements.txt index ba90dab78691..2446aa5e87a4 100644 --- a/packages/google-cloud-bigquery/samples/magics/requirements.txt +++ b/packages/google-cloud-bigquery/samples/magics/requirements.txt @@ -5,11 +5,11 @@ grpcio==1.51.1 ipywidgets==8.0.4 ipython===7.31.1; python_version == '3.7' ipython===8.0.1; python_version == '3.8' -ipython==8.8.0; python_version >= '3.9' +ipython==8.9.0; python_version >= '3.9' matplotlib===3.5.3; python_version == '3.7' matplotlib==3.6.3; python_version >= '3.8' pandas===1.3.5; python_version == '3.7' pandas==1.5.3; python_version >= '3.8' -pyarrow==10.0.1 +pyarrow==11.0.0 pytz==2022.7.1 typing-extensions==4.4.0 diff --git a/packages/google-cloud-bigquery/samples/snippets/requirements.txt b/packages/google-cloud-bigquery/samples/snippets/requirements.txt index b2093488668b..a85653d42d4e 100644 --- a/packages/google-cloud-bigquery/samples/snippets/requirements.txt +++ b/packages/google-cloud-bigquery/samples/snippets/requirements.txt @@ -6,11 +6,11 @@ grpcio==1.51.1 ipywidgets==8.0.4 ipython===7.31.1; python_version == '3.7' ipython===8.0.1; python_version == '3.8' -ipython==8.8.0; python_version >= '3.9' +ipython==8.9.0; python_version >= '3.9' matplotlib===3.5.3; python_version == '3.7' matplotlib==3.6.3; python_version >= '3.8' pandas===1.3.5; python_version == '3.7' pandas==1.5.3; python_version >= '3.8' -pyarrow==10.0.1 +pyarrow==11.0.0 pytz==2022.7.1 typing-extensions==4.4.0 From f4c3dd1bff29cba76d01d2fc1c9c58e12291519c Mon Sep 17 00:00:00 2001 From: Mend Renovate Date: Mon, 30 Jan 2023 10:38:10 +0000 Subject: [PATCH 1559/2016] chore(deps): update dependency shapely to v2.0.1 (#1480) --- .../google-cloud-bigquery/samples/geography/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/samples/geography/requirements.txt b/packages/google-cloud-bigquery/samples/geography/requirements.txt index 294cb095d326..d3fee4c2f8f3 100644 --- a/packages/google-cloud-bigquery/samples/geography/requirements.txt +++ b/packages/google-cloud-bigquery/samples/geography/requirements.txt @@ -38,7 +38,7 @@ pytz==2022.7.1 PyYAML==6.0 requests==2.28.2 rsa==4.9 -Shapely==2.0.0 +Shapely==2.0.1 six==1.16.0 typing-extensions==4.4.0 typing-inspect==0.8.0 From e8aa86dee820cc1aa11fe1bec39c5914d1e927ac Mon Sep 17 00:00:00 2001 From: Mend Renovate Date: Mon, 30 Jan 2023 18:34:14 +0000 Subject: [PATCH 1560/2016] chore(deps): update dependency fiona to v1.9.0 (#1482) --- .../google-cloud-bigquery/samples/geography/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/samples/geography/requirements.txt b/packages/google-cloud-bigquery/samples/geography/requirements.txt index d3fee4c2f8f3..284614d176ba 100644 --- a/packages/google-cloud-bigquery/samples/geography/requirements.txt +++ b/packages/google-cloud-bigquery/samples/geography/requirements.txt @@ -7,7 +7,7 @@ click-plugins==1.1.1 cligj==0.7.2 dataclasses==0.8; python_version < '3.7' db-dtypes==1.0.5 -Fiona==1.8.22 +Fiona==1.9.0 geojson==3.0.0 geopandas===0.10.2; python_version == '3.7' geopandas==0.12.2; python_version >= '3.8' From 6b88b93d2c094b9ab11e59302965c81269e4cbc3 Mon Sep 17 00:00:00 2001 From: Mario Torres Jr <105736410+Mattix23@users.noreply.github.com> Date: Mon, 30 Jan 2023 14:55:16 -0600 Subject: [PATCH 1561/2016] feat: add __str__ method to DatasetReference (#1477) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * feat: add __str__ method to DatasetReference * 🦉 Updates from OwlBot post-processor See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md * 🦉 Updates from OwlBot post-processor See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md Co-authored-by: Owl Bot --- .../google-cloud-bigquery/google/cloud/bigquery/dataset.py | 3 +++ packages/google-cloud-bigquery/tests/unit/test_dataset.py | 4 ++++ 2 files changed, 7 insertions(+) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/dataset.py b/packages/google-cloud-bigquery/google/cloud/bigquery/dataset.py index c302040675ff..0edd2935910a 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/dataset.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/dataset.py @@ -215,6 +215,9 @@ def __ne__(self, other): def __hash__(self): return hash(self._key()) + def __str__(self): + return f"{self.project}.{self._dataset_id}" + def __repr__(self): return "DatasetReference{}".format(self._key()) diff --git a/packages/google-cloud-bigquery/tests/unit/test_dataset.py b/packages/google-cloud-bigquery/tests/unit/test_dataset.py index 856674dafa2c..5e26a0c03c3e 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_dataset.py +++ b/packages/google-cloud-bigquery/tests/unit/test_dataset.py @@ -622,6 +622,10 @@ def test___repr__(self): expected = "DatasetReference('project1', 'dataset1')" self.assertEqual(repr(dataset), expected) + def test___str__(self): + dataset = self._make_one("project1", "dataset1") + self.assertEqual(str(dataset), "project1.dataset1") + class TestDataset(unittest.TestCase): from google.cloud.bigquery.dataset import DatasetReference From 1e8bc900553ae2cf1c34514d5f84c895d861ea99 Mon Sep 17 00:00:00 2001 From: Mario Torres Jr <105736410+Mattix23@users.noreply.github.com> Date: Mon, 30 Jan 2023 15:25:23 -0600 Subject: [PATCH 1562/2016] chore: delete duplicate code from snippets.py (#1481) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * chore: delete duplicate code from snippets.py * 🦉 Updates from OwlBot post-processor See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md --------- Co-authored-by: Owl Bot --- .../google-cloud-bigquery/docs/snippets.py | 60 ------------------- 1 file changed, 60 deletions(-) diff --git a/packages/google-cloud-bigquery/docs/snippets.py b/packages/google-cloud-bigquery/docs/snippets.py index eca2b13538bf..e1d9ae839731 100644 --- a/packages/google-cloud-bigquery/docs/snippets.py +++ b/packages/google-cloud-bigquery/docs/snippets.py @@ -164,66 +164,6 @@ def test_create_partitioned_table(client, to_delete): "https://github.com/GoogleCloudPlatform/google-cloud-python/issues/5589" ) ) -def test_manage_table_labels(client, to_delete): - dataset_id = "label_table_dataset_{}".format(_millis()) - table_id = "label_table_{}".format(_millis()) - project = client.project - dataset_ref = bigquery.DatasetReference(project, dataset_id) - dataset = bigquery.Dataset(dataset_ref) - client.create_dataset(dataset) - to_delete.append(dataset) - - table = bigquery.Table(dataset.table(table_id), schema=SCHEMA) - - labels = {"color": "green"} - table.labels = labels - table = client.create_table(table) - - # TODO(Mattix23): After code sample is updated from cloud.google.com delete this - - # [START bigquery_get_table_labels] - # from google.cloud import bigquery - # client = bigquery.Client() - # dataset_id = 'my_dataset' - # table_id = 'my_table' - - project = client.project - dataset_ref = bigquery.DatasetReference(project, dataset_id) - table_ref = dataset_ref.table(table_id) - table = client.get_table(table_ref) # API Request - - # View table labels - print("Table ID: {}".format(table_id)) - print("Labels:") - if table.labels: - for label, value in table.labels.items(): - print("\t{}: {}".format(label, value)) - else: - print("\tTable has no labels defined.") - # [END bigquery_get_table_labels] - assert table.labels == labels - - # TODO(Mattix23): After code sample is updated from cloud.google.com delete this - - # [START bigquery_delete_label_table] - # from google.cloud import bigquery - # client = bigquery.Client() - # project = client.project - # dataset_ref = bigquery.DatasetReference(project, dataset_id) - # table_ref = dataset_ref.table('my_table') - # table = client.get_table(table_ref) # API request - - # This example table starts with one label - assert table.labels == {"color": "green"} - # To delete a label from a table, set its value to None - table.labels["color"] = None - - table = client.update_table(table, ["labels"]) # API request - - assert table.labels == {} - # [END bigquery_delete_label_table] - - @pytest.mark.skip( reason=( "update_table() is flaky " From 319ccf145773eba67ab80357b6a0dba40a2df284 Mon Sep 17 00:00:00 2001 From: Mario Torres Jr <105736410+Mattix23@users.noreply.github.com> Date: Tue, 31 Jan 2023 12:06:02 -0600 Subject: [PATCH 1563/2016] feat: add preserveAsciiControlCharacter to LoadJobConfig (#1484) --- .../google/cloud/bigquery/job/load.py | 13 +++++++++++++ .../tests/unit/job/test_load_config.py | 14 ++++++++++++++ 2 files changed, 27 insertions(+) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/job/load.py b/packages/google-cloud-bigquery/google/cloud/bigquery/job/load.py index 5c7f268419a2..14a7fa30bf30 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/job/load.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/job/load.py @@ -311,6 +311,19 @@ def null_marker(self): def null_marker(self, value): self._set_sub_prop("nullMarker", value) + @property + def preserve_ascii_control_characters(self): + """Optional[bool]: Preserves the embedded ASCII control characters when sourceFormat is set to CSV. + + See: + https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationLoad.FIELDS.preserve_ascii_control_characters + """ + return self._get_sub_prop("preserveAsciiControlCharacters") + + @preserve_ascii_control_characters.setter + def preserve_ascii_control_characters(self, value): + self._set_sub_prop("preserveAsciiControlCharacters", bool(value)) + @property def projection_fields(self) -> Optional[List[str]]: """Optional[List[str]]: If diff --git a/packages/google-cloud-bigquery/tests/unit/job/test_load_config.py b/packages/google-cloud-bigquery/tests/unit/job/test_load_config.py index 5a0c5a83f306..7f77fc085ca8 100644 --- a/packages/google-cloud-bigquery/tests/unit/job/test_load_config.py +++ b/packages/google-cloud-bigquery/tests/unit/job/test_load_config.py @@ -424,6 +424,20 @@ def test_null_marker_setter(self): config.null_marker = null_marker self.assertEqual(config._properties["load"]["nullMarker"], null_marker) + def test_preserve_ascii_control_characters_missing(self): + config = self._get_target_class()() + self.assertIsNone(config.preserve_ascii_control_characters) + + def test_preserve_ascii_control_characters_hit(self): + config = self._get_target_class()() + config._properties["load"]["preserveAsciiControlCharacters"] = True + self.assertTrue(config.preserve_ascii_control_characters) + + def test_preserve_ascii_control_characters_setter(self): + config = self._get_target_class()() + config.preserve_ascii_control_characters = True + self.assertTrue(config._properties["load"]["preserveAsciiControlCharacters"]) + def test_projection_fields_miss(self): config = self._get_target_class()() self.assertIsNone(config.projection_fields) From f3fcd73704ac92623566ee69fb6df5ee6f8643d6 Mon Sep 17 00:00:00 2001 From: "release-please[bot]" <55107282+release-please[bot]@users.noreply.github.com> Date: Wed, 1 Feb 2023 12:41:48 -0600 Subject: [PATCH 1564/2016] chore(main): release 3.5.0 (#1472) Co-authored-by: release-please[bot] <55107282+release-please[bot]@users.noreply.github.com> --- packages/google-cloud-bigquery/CHANGELOG.md | 15 +++++++++++++++ .../google/cloud/bigquery/version.py | 2 +- 2 files changed, 16 insertions(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/CHANGELOG.md b/packages/google-cloud-bigquery/CHANGELOG.md index 0a5b4d179839..454d362f93fa 100644 --- a/packages/google-cloud-bigquery/CHANGELOG.md +++ b/packages/google-cloud-bigquery/CHANGELOG.md @@ -5,6 +5,21 @@ [1]: https://pypi.org/project/google-cloud-bigquery/#history +## [3.5.0](https://github.com/googleapis/python-bigquery/compare/v3.4.2...v3.5.0) (2023-01-31) + + +### Features + +* Add __str__ method to DatasetReference ([#1477](https://github.com/googleapis/python-bigquery/issues/1477)) ([f32df1f](https://github.com/googleapis/python-bigquery/commit/f32df1fb74e4aea24cd8a4099040ad2f7436e54d)) +* Add preserveAsciiControlCharacter to LoadJobConfig ([#1484](https://github.com/googleapis/python-bigquery/issues/1484)) ([bd1da9a](https://github.com/googleapis/python-bigquery/commit/bd1da9aa0a40b02b7d5409a0b094d8380e255c91)) + + +### Documentation + +* Adds snippet for creating table with external data config ([#1420](https://github.com/googleapis/python-bigquery/issues/1420)) ([f0ace2a](https://github.com/googleapis/python-bigquery/commit/f0ace2ac2307ef359511a235f80f5ce9e46264c1)) +* Revise delete label table code sample, add TODO to clean up sni… ([#1466](https://github.com/googleapis/python-bigquery/issues/1466)) ([0dab7d2](https://github.com/googleapis/python-bigquery/commit/0dab7d25ace4b63d2984485e7b0c5bb38f20476f)) +* **samples:** Table variable fix ([#1287](https://github.com/googleapis/python-bigquery/issues/1287)) ([a71888a](https://github.com/googleapis/python-bigquery/commit/a71888a60d1e5e5815ab459fe24368ad5b0d032a)) + ## [3.4.2](https://github.com/googleapis/python-bigquery/compare/v3.4.1...v3.4.2) (2023-01-13) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/version.py b/packages/google-cloud-bigquery/google/cloud/bigquery/version.py index d38bb4619c37..13194aa56112 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/version.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/version.py @@ -12,4 +12,4 @@ # See the License for the specific language governing permissions and # limitations under the License. -__version__ = "3.4.2" +__version__ = "3.5.0" From e9cea284b0d19f086bf10cc1623765d5f91d3e80 Mon Sep 17 00:00:00 2001 From: Mend Renovate Date: Thu, 2 Feb 2023 11:32:44 +0000 Subject: [PATCH 1565/2016] chore(deps): update dependency google-cloud-bigquery to v3.5.0 (#1485) --- .../google-cloud-bigquery/samples/geography/requirements.txt | 2 +- .../google-cloud-bigquery/samples/snippets/requirements.txt | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/packages/google-cloud-bigquery/samples/geography/requirements.txt b/packages/google-cloud-bigquery/samples/geography/requirements.txt index 284614d176ba..2bc2822922d2 100644 --- a/packages/google-cloud-bigquery/samples/geography/requirements.txt +++ b/packages/google-cloud-bigquery/samples/geography/requirements.txt @@ -13,7 +13,7 @@ geopandas===0.10.2; python_version == '3.7' geopandas==0.12.2; python_version >= '3.8' google-api-core==2.11.0 google-auth==2.16.0 -google-cloud-bigquery==3.4.2 +google-cloud-bigquery==3.5.0 google-cloud-bigquery-storage==2.18.1 google-cloud-core==2.3.2 google-crc32c==1.5.0 diff --git a/packages/google-cloud-bigquery/samples/snippets/requirements.txt b/packages/google-cloud-bigquery/samples/snippets/requirements.txt index a85653d42d4e..86739eec4fc1 100644 --- a/packages/google-cloud-bigquery/samples/snippets/requirements.txt +++ b/packages/google-cloud-bigquery/samples/snippets/requirements.txt @@ -1,5 +1,5 @@ db-dtypes==1.0.5 -google-cloud-bigquery==3.4.2 +google-cloud-bigquery==3.5.0 google-cloud-bigquery-storage==2.18.1 google-auth-oauthlib==0.8.0 grpcio==1.51.1 From 36b5bf1915454f47c5f5ebd744f7901b32975139 Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Wed, 8 Feb 2023 10:05:10 -0600 Subject: [PATCH 1566/2016] fix: annotate optional integer parameters with optional type (#1487) * fix: annotate optional integer parameters with optional type * remove google-cloud-core reference causing type checker issues deps: update minimum google-cloud-core to 1.6.0 --- .../google/cloud/bigquery/_http.py | 12 ------ .../google/cloud/bigquery/client.py | 42 +++++++++---------- .../google/cloud/bigquery/job/query.py | 6 +-- packages/google-cloud-bigquery/setup.py | 2 +- .../testing/constraints-3.7.txt | 2 +- 5 files changed, 26 insertions(+), 38 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/_http.py b/packages/google-cloud-bigquery/google/cloud/bigquery/_http.py index 789ef9243f5c..7921900f81da 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/_http.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/_http.py @@ -14,22 +14,10 @@ """Create / interact with Google BigQuery connections.""" -import os -import pkg_resources - from google.cloud import _http # type: ignore # pytype: disable=import-error from google.cloud.bigquery import __version__ -# TODO: Increase the minimum version of google-cloud-core to 1.6.0 -# and remove this logic. See: -# https://github.com/googleapis/python-bigquery/issues/509 -if os.getenv("GOOGLE_API_USE_CLIENT_CERTIFICATE") == "true": # pragma: NO COVER - release = pkg_resources.get_distribution("google-cloud-core").parsed_version - if release < pkg_resources.parse_version("1.6.0"): - raise ImportError("google-cloud-core >= 1.6.0 is required to use mTLS feature") - - class Connection(_http.JSONConnection): """A connection to Google BigQuery via the JSON REST API. diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py index 1885ab67ef80..4f6e6610dad6 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py @@ -329,11 +329,11 @@ def get_service_account_email( def list_projects( self, - max_results: int = None, + max_results: Optional[int] = None, page_token: str = None, retry: retries.Retry = DEFAULT_RETRY, timeout: TimeoutType = DEFAULT_TIMEOUT, - page_size: int = None, + page_size: Optional[int] = None, ) -> page_iterator.Iterator: """List projects for the project associated with this client. @@ -395,11 +395,11 @@ def list_datasets( project: str = None, include_all: bool = False, filter: str = None, - max_results: int = None, + max_results: Optional[int] = None, page_token: str = None, retry: retries.Retry = DEFAULT_RETRY, timeout: TimeoutType = DEFAULT_TIMEOUT, - page_size: int = None, + page_size: Optional[int] = None, ) -> page_iterator.Iterator: """List datasets for the project associated with this client. @@ -1324,11 +1324,11 @@ def update_table( def list_models( self, dataset: Union[Dataset, DatasetReference, DatasetListItem, str], - max_results: int = None, + max_results: Optional[int] = None, page_token: str = None, retry: retries.Retry = DEFAULT_RETRY, timeout: TimeoutType = DEFAULT_TIMEOUT, - page_size: int = None, + page_size: Optional[int] = None, ) -> page_iterator.Iterator: """[Beta] List models in the dataset. @@ -1401,11 +1401,11 @@ def api_request(*args, **kwargs): def list_routines( self, dataset: Union[Dataset, DatasetReference, DatasetListItem, str], - max_results: int = None, + max_results: Optional[int] = None, page_token: str = None, retry: retries.Retry = DEFAULT_RETRY, timeout: TimeoutType = DEFAULT_TIMEOUT, - page_size: int = None, + page_size: Optional[int] = None, ) -> page_iterator.Iterator: """[Beta] List routines in the dataset. @@ -1478,11 +1478,11 @@ def api_request(*args, **kwargs): def list_tables( self, dataset: Union[Dataset, DatasetReference, DatasetListItem, str], - max_results: int = None, + max_results: Optional[int] = None, page_token: str = None, retry: retries.Retry = DEFAULT_RETRY, timeout: TimeoutType = DEFAULT_TIMEOUT, - page_size: int = None, + page_size: Optional[int] = None, ) -> page_iterator.Iterator: """List tables in the dataset. @@ -1838,7 +1838,7 @@ def _get_query_results( job_id: str, retry: retries.Retry, project: str = None, - timeout_ms: int = None, + timeout_ms: Optional[int] = None, location: str = None, timeout: TimeoutType = DEFAULT_TIMEOUT, ) -> _QueryResults: @@ -2163,7 +2163,7 @@ def list_jobs( self, project: str = None, parent_job: Optional[Union[QueryJob, str]] = None, - max_results: int = None, + max_results: Optional[int] = None, page_token: str = None, all_users: bool = None, state_filter: str = None, @@ -2171,7 +2171,7 @@ def list_jobs( timeout: TimeoutType = DEFAULT_TIMEOUT, min_creation_time: datetime.datetime = None, max_creation_time: datetime.datetime = None, - page_size: int = None, + page_size: Optional[int] = None, ) -> page_iterator.Iterator: """List jobs for the project associated with this client. @@ -2361,7 +2361,7 @@ def load_table_from_file( file_obj: IO[bytes], destination: Union[Table, TableReference, TableListItem, str], rewind: bool = False, - size: int = None, + size: Optional[int] = None, num_retries: int = _DEFAULT_NUM_RETRIES, job_id: str = None, job_id_prefix: str = None, @@ -3729,10 +3729,10 @@ def list_rows( self, table: Union[Table, TableListItem, TableReference, str], selected_fields: Sequence[SchemaField] = None, - max_results: int = None, + max_results: Optional[int] = None, page_token: str = None, - start_index: int = None, - page_size: int = None, + start_index: Optional[int] = None, + page_size: Optional[int] = None, retry: retries.Retry = DEFAULT_RETRY, timeout: TimeoutType = DEFAULT_TIMEOUT, ) -> RowIterator: @@ -3840,11 +3840,11 @@ def _list_rows_from_query_results( location: str, project: str, schema: SchemaField, - total_rows: int = None, + total_rows: Optional[int] = None, destination: Union[Table, TableReference, TableListItem, str] = None, - max_results: int = None, - start_index: int = None, - page_size: int = None, + max_results: Optional[int] = None, + start_index: Optional[int] = None, + page_size: Optional[int] = None, retry: retries.Retry = DEFAULT_RETRY, timeout: TimeoutType = DEFAULT_TIMEOUT, ) -> RowIterator: diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/job/query.py b/packages/google-cloud-bigquery/google/cloud/bigquery/job/query.py index e32e74129b2f..c63fa0892f46 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/job/query.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/job/query.py @@ -1381,11 +1381,11 @@ def _done_or_raise(self, retry=DEFAULT_RETRY, timeout=None): def result( # type: ignore # (complaints about the overloaded signature) self, - page_size: int = None, - max_results: int = None, + page_size: Optional[int] = None, + max_results: Optional[int] = None, retry: "retries.Retry" = DEFAULT_RETRY, timeout: float = None, - start_index: int = None, + start_index: Optional[int] = None, job_retry: "retries.Retry" = DEFAULT_JOB_RETRY, ) -> Union["RowIterator", _EmptyRowIterator]: """Start the job and wait for it to complete and get the result. diff --git a/packages/google-cloud-bigquery/setup.py b/packages/google-cloud-bigquery/setup.py index 81cb2e5dbe43..e1b018098d33 100644 --- a/packages/google-cloud-bigquery/setup.py +++ b/packages/google-cloud-bigquery/setup.py @@ -39,7 +39,7 @@ # NOTE: Maintainers, please do not require google-cloud-core>=2.x.x # Until this issue is closed # https://github.com/googleapis/google-cloud-python/issues/10566 - "google-cloud-core >= 1.4.1, <3.0.0dev", + "google-cloud-core >= 1.6.0, <3.0.0dev", "google-resumable-media >= 0.6.0, < 3.0dev", "packaging >= 20.0.0", "protobuf>=3.19.5,<5.0.0dev,!=3.20.0,!=3.20.1,!=4.21.0,!=4.21.1,!=4.21.2,!=4.21.3,!=4.21.4,!=4.21.5", # For the legacy proto-based types. diff --git a/packages/google-cloud-bigquery/testing/constraints-3.7.txt b/packages/google-cloud-bigquery/testing/constraints-3.7.txt index 3701c7343af6..746656b585ac 100644 --- a/packages/google-cloud-bigquery/testing/constraints-3.7.txt +++ b/packages/google-cloud-bigquery/testing/constraints-3.7.txt @@ -9,7 +9,7 @@ db-dtypes==0.3.0 geopandas==0.9.0 google-api-core==1.31.5 google-cloud-bigquery-storage==2.0.0 -google-cloud-core==1.4.1 +google-cloud-core==1.6.0 google-resumable-media==0.6.0 grpcio==1.47.0 ipywidgets==7.7.1 From 36fc64391d06a8e5f5fd7895a12dfafc93536e1f Mon Sep 17 00:00:00 2001 From: "gcf-owl-bot[bot]" <78513119+gcf-owl-bot[bot]@users.noreply.github.com> Date: Wed, 8 Feb 2023 16:40:16 +0000 Subject: [PATCH 1567/2016] build(deps): bump cryptography from 38.0.3 to 39.0.1 in /synthtool/gcp/templates/python_library/.kokoro (#1489) Source-Link: https://togithub.com/googleapis/synthtool/commit/bb171351c3946d3c3c32e60f5f18cee8c464ec51 Post-Processor: gcr.io/cloud-devrel-public-resources/owlbot-python:latest@sha256:f62c53736eccb0c4934a3ea9316e0d57696bb49c1a7c86c726e9bb8a2f87dadf --- .../.github/.OwlBot.lock.yaml | 2 +- .../.kokoro/requirements.txt | 49 +++++++++---------- 2 files changed, 23 insertions(+), 28 deletions(-) diff --git a/packages/google-cloud-bigquery/.github/.OwlBot.lock.yaml b/packages/google-cloud-bigquery/.github/.OwlBot.lock.yaml index 889f77dfa25d..894fb6bc9b47 100644 --- a/packages/google-cloud-bigquery/.github/.OwlBot.lock.yaml +++ b/packages/google-cloud-bigquery/.github/.OwlBot.lock.yaml @@ -13,4 +13,4 @@ # limitations under the License. docker: image: gcr.io/cloud-devrel-public-resources/owlbot-python:latest - digest: sha256:c43f1d918bcf817d337aa29ff833439494a158a0831508fda4ec75dc4c0d0320 + digest: sha256:f62c53736eccb0c4934a3ea9316e0d57696bb49c1a7c86c726e9bb8a2f87dadf diff --git a/packages/google-cloud-bigquery/.kokoro/requirements.txt b/packages/google-cloud-bigquery/.kokoro/requirements.txt index 05dc4672edaa..096e4800a9ac 100644 --- a/packages/google-cloud-bigquery/.kokoro/requirements.txt +++ b/packages/google-cloud-bigquery/.kokoro/requirements.txt @@ -113,33 +113,28 @@ commonmark==0.9.1 \ --hash=sha256:452f9dc859be7f06631ddcb328b6919c67984aca654e5fefb3914d54691aed60 \ --hash=sha256:da2f38c92590f83de410ba1a3cbceafbc74fee9def35f9251ba9a971d6d66fd9 # via rich -cryptography==38.0.3 \ - --hash=sha256:068147f32fa662c81aebab95c74679b401b12b57494872886eb5c1139250ec5d \ - --hash=sha256:06fc3cc7b6f6cca87bd56ec80a580c88f1da5306f505876a71c8cfa7050257dd \ - --hash=sha256:25c1d1f19729fb09d42e06b4bf9895212292cb27bb50229f5aa64d039ab29146 \ - --hash=sha256:402852a0aea73833d982cabb6d0c3bb582c15483d29fb7085ef2c42bfa7e38d7 \ - --hash=sha256:4e269dcd9b102c5a3d72be3c45d8ce20377b8076a43cbed6f660a1afe365e436 \ - --hash=sha256:5419a127426084933076132d317911e3c6eb77568a1ce23c3ac1e12d111e61e0 \ - --hash=sha256:554bec92ee7d1e9d10ded2f7e92a5d70c1f74ba9524947c0ba0c850c7b011828 \ - --hash=sha256:5e89468fbd2fcd733b5899333bc54d0d06c80e04cd23d8c6f3e0542358c6060b \ - --hash=sha256:65535bc550b70bd6271984d9863a37741352b4aad6fb1b3344a54e6950249b55 \ - --hash=sha256:6ab9516b85bebe7aa83f309bacc5f44a61eeb90d0b4ec125d2d003ce41932d36 \ - --hash=sha256:6addc3b6d593cd980989261dc1cce38263c76954d758c3c94de51f1e010c9a50 \ - --hash=sha256:728f2694fa743a996d7784a6194da430f197d5c58e2f4e278612b359f455e4a2 \ - --hash=sha256:785e4056b5a8b28f05a533fab69febf5004458e20dad7e2e13a3120d8ecec75a \ - --hash=sha256:78cf5eefac2b52c10398a42765bfa981ce2372cbc0457e6bf9658f41ec3c41d8 \ - --hash=sha256:7f836217000342d448e1c9a342e9163149e45d5b5eca76a30e84503a5a96cab0 \ - --hash=sha256:8d41a46251bf0634e21fac50ffd643216ccecfaf3701a063257fe0b2be1b6548 \ - --hash=sha256:984fe150f350a3c91e84de405fe49e688aa6092b3525f407a18b9646f6612320 \ - --hash=sha256:9b24bcff7853ed18a63cfb0c2b008936a9554af24af2fb146e16d8e1aed75748 \ - --hash=sha256:b1b35d9d3a65542ed2e9d90115dfd16bbc027b3f07ee3304fc83580f26e43249 \ - --hash=sha256:b1b52c9e5f8aa2b802d48bd693190341fae201ea51c7a167d69fc48b60e8a959 \ - --hash=sha256:bbf203f1a814007ce24bd4d51362991d5cb90ba0c177a9c08825f2cc304d871f \ - --hash=sha256:be243c7e2bfcf6cc4cb350c0d5cdf15ca6383bbcb2a8ef51d3c9411a9d4386f0 \ - --hash=sha256:bfbe6ee19615b07a98b1d2287d6a6073f734735b49ee45b11324d85efc4d5cbd \ - --hash=sha256:c46837ea467ed1efea562bbeb543994c2d1f6e800785bd5a2c98bc096f5cb220 \ - --hash=sha256:dfb4f4dd568de1b6af9f4cda334adf7d72cf5bc052516e1b2608b683375dd95c \ - --hash=sha256:ed7b00096790213e09eb11c97cc6e2b757f15f3d2f85833cd2d3ec3fe37c1722 +cryptography==39.0.1 \ + --hash=sha256:0f8da300b5c8af9f98111ffd512910bc792b4c77392a9523624680f7956a99d4 \ + --hash=sha256:35f7c7d015d474f4011e859e93e789c87d21f6f4880ebdc29896a60403328f1f \ + --hash=sha256:5aa67414fcdfa22cf052e640cb5ddc461924a045cacf325cd164e65312d99502 \ + --hash=sha256:5d2d8b87a490bfcd407ed9d49093793d0f75198a35e6eb1a923ce1ee86c62b41 \ + --hash=sha256:6687ef6d0a6497e2b58e7c5b852b53f62142cfa7cd1555795758934da363a965 \ + --hash=sha256:6f8ba7f0328b79f08bdacc3e4e66fb4d7aab0c3584e0bd41328dce5262e26b2e \ + --hash=sha256:706843b48f9a3f9b9911979761c91541e3d90db1ca905fd63fee540a217698bc \ + --hash=sha256:807ce09d4434881ca3a7594733669bd834f5b2c6d5c7e36f8c00f691887042ad \ + --hash=sha256:83e17b26de248c33f3acffb922748151d71827d6021d98c70e6c1a25ddd78505 \ + --hash=sha256:96f1157a7c08b5b189b16b47bc9db2332269d6680a196341bf30046330d15388 \ + --hash=sha256:aec5a6c9864be7df2240c382740fcf3b96928c46604eaa7f3091f58b878c0bb6 \ + --hash=sha256:b0afd054cd42f3d213bf82c629efb1ee5f22eba35bf0eec88ea9ea7304f511a2 \ + --hash=sha256:ced4e447ae29ca194449a3f1ce132ded8fcab06971ef5f618605aacaa612beac \ + --hash=sha256:d1f6198ee6d9148405e49887803907fe8962a23e6c6f83ea7d98f1c0de375695 \ + --hash=sha256:e124352fd3db36a9d4a21c1aa27fd5d051e621845cb87fb851c08f4f75ce8be6 \ + --hash=sha256:e422abdec8b5fa8462aa016786680720d78bdce7a30c652b7fadf83a4ba35336 \ + --hash=sha256:ef8b72fa70b348724ff1218267e7f7375b8de4e8194d1636ee60510aae104cd0 \ + --hash=sha256:f0c64d1bd842ca2633e74a1a28033d139368ad959872533b1bab8c80e8240a0c \ + --hash=sha256:f24077a3b5298a5a06a8e0536e3ea9ec60e4c7ac486755e5fb6e6ea9b3500106 \ + --hash=sha256:fdd188c8a6ef8769f148f88f859884507b954cc64db6b52f66ef199bb9ad660a \ + --hash=sha256:fe913f20024eb2cb2f323e42a64bdf2911bb9738a15dba7d3cce48151034e3a8 # via # gcp-releasetool # secretstorage From deae218cd1070bce32e34564ae126001a06d35b8 Mon Sep 17 00:00:00 2001 From: nayaknishant Date: Wed, 15 Feb 2023 12:16:57 -0800 Subject: [PATCH 1568/2016] feat: adding preserveAsciiControlCharacter to CSVOptions (#1491) * adding ASCII support for external config * adding tests for preserveAscii... * adding tests for preserveAscii... * changing 'False' to False * linting --- .../google/cloud/bigquery/external_config.py | 14 ++++++++++++++ .../tests/unit/test_external_config.py | 14 +++++++++++++- 2 files changed, 27 insertions(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/external_config.py b/packages/google-cloud-bigquery/google/cloud/bigquery/external_config.py index bd60e4ef1d12..a891bc2327ab 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/external_config.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/external_config.py @@ -418,6 +418,20 @@ def encoding(self): def encoding(self, value): self._properties["encoding"] = value + @property + def preserve_ascii_control_characters(self): + """bool: Indicates if the embedded ASCII control characters + (the first 32 characters in the ASCII-table, from '\x00' to '\x1F') are preserved. + + See + https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#CsvOptions.FIELDS.preserve_ascii_control_characters + """ + return self._properties.get("preserveAsciiControlCharacters") + + @preserve_ascii_control_characters.setter + def preserve_ascii_control_characters(self, value): + self._properties["preserveAsciiControlCharacters"] = value + @property def field_delimiter(self): """str: The separator for fields in a CSV file. Defaults to comma (','). diff --git a/packages/google-cloud-bigquery/tests/unit/test_external_config.py b/packages/google-cloud-bigquery/tests/unit/test_external_config.py index 72fe2761a74d..67fd13fa750e 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_external_config.py +++ b/packages/google-cloud-bigquery/tests/unit/test_external_config.py @@ -248,6 +248,7 @@ def test_from_api_repr_csv(self): "allowQuotedNewlines": True, "allowJaggedRows": False, "encoding": "encoding", + "preserveAsciiControlCharacters": False, }, }, ) @@ -263,6 +264,7 @@ def test_from_api_repr_csv(self): self.assertEqual(ec.options.allow_quoted_newlines, True) self.assertEqual(ec.options.allow_jagged_rows, False) self.assertEqual(ec.options.encoding, "encoding") + self.assertEqual(ec.options.preserve_ascii_control_characters, False) got_resource = ec.to_api_repr() @@ -283,6 +285,7 @@ def test_to_api_repr_csv(self): options.quote_character = "quote" options.skip_leading_rows = 123 options.allow_jagged_rows = False + options.preserve_ascii_control_characters = False ec.csv_options = options exp_resource = { @@ -294,6 +297,7 @@ def test_to_api_repr_csv(self): "allowQuotedNewlines": True, "allowJaggedRows": False, "encoding": "encoding", + "preserveAsciiControlCharacters": False, }, } @@ -514,17 +518,23 @@ def test_csv_options_getter_and_setter(self): from google.cloud.bigquery.external_config import CSVOptions options = CSVOptions.from_api_repr( - {"allowJaggedRows": True, "allowQuotedNewlines": False} + { + "allowJaggedRows": True, + "allowQuotedNewlines": False, + "preserveAsciiControlCharacters": False, + } ) ec = external_config.ExternalConfig(external_config.ExternalSourceFormat.CSV) self.assertIsNone(ec.csv_options.allow_jagged_rows) self.assertIsNone(ec.csv_options.allow_quoted_newlines) + self.assertIsNone(ec.csv_options.preserve_ascii_control_characters) ec.csv_options = options self.assertTrue(ec.csv_options.allow_jagged_rows) self.assertFalse(ec.csv_options.allow_quoted_newlines) + self.assertFalse(ec.csv_options.preserve_ascii_control_characters) self.assertIs(ec.options._properties, ec._properties[CSVOptions._RESOURCE_NAME]) self.assertIs( ec.csv_options._properties, ec._properties[CSVOptions._RESOURCE_NAME] @@ -848,6 +858,7 @@ def test_to_api_repr(self): options.allow_quoted_newlines = True options.allow_jagged_rows = False options.encoding = "UTF-8" + options.preserve_ascii_control_characters = False resource = options.to_api_repr() @@ -860,6 +871,7 @@ def test_to_api_repr(self): "allowQuotedNewlines": True, "allowJaggedRows": False, "encoding": "UTF-8", + "preserveAsciiControlCharacters": False, }, ) From 7640080446d45660e3f4e9abf355b2dd06d9db9f Mon Sep 17 00:00:00 2001 From: Mend Renovate Date: Thu, 16 Feb 2023 20:35:27 +0000 Subject: [PATCH 1569/2016] chore(deps): update all dependencies to v1 (#1486) Co-authored-by: Anthonios Partheniou --- .../google-cloud-bigquery/samples/geography/requirements.txt | 2 +- packages/google-cloud-bigquery/samples/magics/requirements.txt | 2 +- .../google-cloud-bigquery/samples/snippets/requirements.txt | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/packages/google-cloud-bigquery/samples/geography/requirements.txt b/packages/google-cloud-bigquery/samples/geography/requirements.txt index 2bc2822922d2..f22625653b2b 100644 --- a/packages/google-cloud-bigquery/samples/geography/requirements.txt +++ b/packages/google-cloud-bigquery/samples/geography/requirements.txt @@ -23,7 +23,7 @@ grpcio==1.51.1 idna==3.4 libcst==0.4.9 munch==2.5.0 -mypy-extensions==0.4.3 +mypy-extensions==1.0.0 packaging==23.0 pandas===1.3.5; python_version == '3.7' pandas==1.5.3; python_version >= '3.8' diff --git a/packages/google-cloud-bigquery/samples/magics/requirements.txt b/packages/google-cloud-bigquery/samples/magics/requirements.txt index 2446aa5e87a4..725975116d27 100644 --- a/packages/google-cloud-bigquery/samples/magics/requirements.txt +++ b/packages/google-cloud-bigquery/samples/magics/requirements.txt @@ -1,6 +1,6 @@ db-dtypes==1.0.5 google-cloud-bigquery-storage==2.18.1 -google-auth-oauthlib==0.8.0 +google-auth-oauthlib==1.0.0 grpcio==1.51.1 ipywidgets==8.0.4 ipython===7.31.1; python_version == '3.7' diff --git a/packages/google-cloud-bigquery/samples/snippets/requirements.txt b/packages/google-cloud-bigquery/samples/snippets/requirements.txt index 86739eec4fc1..50fd19e51388 100644 --- a/packages/google-cloud-bigquery/samples/snippets/requirements.txt +++ b/packages/google-cloud-bigquery/samples/snippets/requirements.txt @@ -1,7 +1,7 @@ db-dtypes==1.0.5 google-cloud-bigquery==3.5.0 google-cloud-bigquery-storage==2.18.1 -google-auth-oauthlib==0.8.0 +google-auth-oauthlib==1.0.0 grpcio==1.51.1 ipywidgets==8.0.4 ipython===7.31.1; python_version == '3.7' From 01c077786e21c1deaa2b2bda06b5536bb9843342 Mon Sep 17 00:00:00 2001 From: Chalmer Lowe Date: Wed, 22 Feb 2023 11:55:47 -0500 Subject: [PATCH 1570/2016] Fix: removes scope to avoid unnecessary duplication (#1503) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Fix: removes scope to avoid unnecessary duplication * 🦉 Updates from OwlBot post-processor See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md --------- Co-authored-by: Owl Bot --- .../google-cloud-bigquery/google/cloud/bigquery/client.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py index 4f6e6610dad6..b032665284c1 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py @@ -225,10 +225,7 @@ class Client(ClientWithProject): to acquire default credentials. """ - SCOPE = ( # type: ignore - "https://www.googleapis.com/auth/bigquery", - "https://www.googleapis.com/auth/cloud-platform", - ) + SCOPE = ("https://www.googleapis.com/auth/cloud-platform",) # type: ignore """The scopes required for authenticating as a BigQuery consumer.""" def __init__( From 4c7fa1827de33036c858fa7233e575ac7e3a64c5 Mon Sep 17 00:00:00 2001 From: Chalmer Lowe Date: Wed, 22 Feb 2023 15:56:49 -0500 Subject: [PATCH 1571/2016] Fix: loosen ipywidget dependency (#1504) * fix: updates ipywidget dependency * fix: updates ipywidget version number --- packages/google-cloud-bigquery/setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/setup.py b/packages/google-cloud-bigquery/setup.py index e1b018098d33..2119e0191dd1 100644 --- a/packages/google-cloud-bigquery/setup.py +++ b/packages/google-cloud-bigquery/setup.py @@ -67,7 +67,7 @@ pyarrow_dependency, "db-dtypes>=0.3.0,<2.0.0dev", ], - "ipywidgets": ["ipywidgets==7.7.1"], + "ipywidgets": ["ipywidgets>=7.7.0,<8.0.1"], "geopandas": ["geopandas>=0.9.0, <1.0dev", "Shapely>=1.8.4, <2.0dev"], "ipython": ["ipython>=7.0.1,!=8.1.0"], "tqdm": ["tqdm >= 4.7.4, <5.0.0dev"], From fbef3ac8465f72c897e0a14cf472ec7aec5d6ed6 Mon Sep 17 00:00:00 2001 From: "release-please[bot]" <55107282+release-please[bot]@users.noreply.github.com> Date: Wed, 22 Feb 2023 16:37:28 -0500 Subject: [PATCH 1572/2016] chore(main): release 3.6.0 (#1490) Co-authored-by: release-please[bot] <55107282+release-please[bot]@users.noreply.github.com> --- packages/google-cloud-bigquery/CHANGELOG.md | 19 +++++++++++++++++++ .../google/cloud/bigquery/version.py | 2 +- 2 files changed, 20 insertions(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/CHANGELOG.md b/packages/google-cloud-bigquery/CHANGELOG.md index 454d362f93fa..67c43200bbac 100644 --- a/packages/google-cloud-bigquery/CHANGELOG.md +++ b/packages/google-cloud-bigquery/CHANGELOG.md @@ -5,6 +5,25 @@ [1]: https://pypi.org/project/google-cloud-bigquery/#history +## [3.6.0](https://github.com/googleapis/python-bigquery/compare/v3.5.0...v3.6.0) (2023-02-22) + + +### Features + +* Adding preserveAsciiControlCharacter to CSVOptions ([#1491](https://github.com/googleapis/python-bigquery/issues/1491)) ([f832e7a](https://github.com/googleapis/python-bigquery/commit/f832e7a0b79f3567a0773ff11630e2f48bed60db)) + + +### Bug Fixes + +* Annotate optional integer parameters with optional type ([#1487](https://github.com/googleapis/python-bigquery/issues/1487)) ([a190aaa](https://github.com/googleapis/python-bigquery/commit/a190aaa09ae73e8b6a83b7b213247f95fde57615)) +* Loosen ipywidget dependency ([#1504](https://github.com/googleapis/python-bigquery/issues/1504)) ([20d3276](https://github.com/googleapis/python-bigquery/commit/20d3276cc29e9467eef9476d5fd572099d9a3f6f)) +* Removes scope to avoid unnecessary duplication ([#1503](https://github.com/googleapis/python-bigquery/issues/1503)) ([665d7ba](https://github.com/googleapis/python-bigquery/commit/665d7ba74a1b45de1ef51cc75b6860125afc5fe6)) + + +### Dependencies + +* Update minimum google-cloud-core to 1.6.0 ([a190aaa](https://github.com/googleapis/python-bigquery/commit/a190aaa09ae73e8b6a83b7b213247f95fde57615)) + ## [3.5.0](https://github.com/googleapis/python-bigquery/compare/v3.4.2...v3.5.0) (2023-01-31) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/version.py b/packages/google-cloud-bigquery/google/cloud/bigquery/version.py index 13194aa56112..102b960952dc 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/version.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/version.py @@ -12,4 +12,4 @@ # See the License for the specific language governing permissions and # limitations under the License. -__version__ = "3.5.0" +__version__ = "3.6.0" From 8ec60fe2db287de8793ee3e5c9dffc001274bb4c Mon Sep 17 00:00:00 2001 From: Atsushi Yamamoto Date: Mon, 27 Feb 2023 06:38:00 -0800 Subject: [PATCH 1573/2016] docs: Remove < 3.11 reference from README (#1502) --- packages/google-cloud-bigquery/README.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/README.rst b/packages/google-cloud-bigquery/README.rst index 475d055a272f..46f35e716b14 100644 --- a/packages/google-cloud-bigquery/README.rst +++ b/packages/google-cloud-bigquery/README.rst @@ -52,7 +52,7 @@ dependencies. Supported Python Versions ^^^^^^^^^^^^^^^^^^^^^^^^^ -Python >= 3.7, < 3.11 +Python >= 3.7 Unsupported Python Versions ^^^^^^^^^^^^^^^^^^^^^^^^^^^ From 9219e6a7d996e24ce06cf694176368b8fd0b3dc6 Mon Sep 17 00:00:00 2001 From: "gcf-owl-bot[bot]" <78513119+gcf-owl-bot[bot]@users.noreply.github.com> Date: Mon, 27 Feb 2023 12:16:30 -0500 Subject: [PATCH 1574/2016] chore(python): upgrade gcp-releasetool in .kokoro [autoapprove] (#1508) Source-Link: https://github.com/googleapis/synthtool/commit/5f2a6089f73abf06238fe4310f6a14d6f6d1eed3 Post-Processor: gcr.io/cloud-devrel-public-resources/owlbot-python:latest@sha256:8555f0e37e6261408f792bfd6635102d2da5ad73f8f09bcb24f25e6afb5fac97 Co-authored-by: Owl Bot --- packages/google-cloud-bigquery/.github/.OwlBot.lock.yaml | 2 +- packages/google-cloud-bigquery/.kokoro/requirements.in | 2 +- packages/google-cloud-bigquery/.kokoro/requirements.txt | 6 +++--- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/packages/google-cloud-bigquery/.github/.OwlBot.lock.yaml b/packages/google-cloud-bigquery/.github/.OwlBot.lock.yaml index 894fb6bc9b47..5fc5daa31783 100644 --- a/packages/google-cloud-bigquery/.github/.OwlBot.lock.yaml +++ b/packages/google-cloud-bigquery/.github/.OwlBot.lock.yaml @@ -13,4 +13,4 @@ # limitations under the License. docker: image: gcr.io/cloud-devrel-public-resources/owlbot-python:latest - digest: sha256:f62c53736eccb0c4934a3ea9316e0d57696bb49c1a7c86c726e9bb8a2f87dadf + digest: sha256:8555f0e37e6261408f792bfd6635102d2da5ad73f8f09bcb24f25e6afb5fac97 diff --git a/packages/google-cloud-bigquery/.kokoro/requirements.in b/packages/google-cloud-bigquery/.kokoro/requirements.in index cbd7e77f44db..882178ce6001 100644 --- a/packages/google-cloud-bigquery/.kokoro/requirements.in +++ b/packages/google-cloud-bigquery/.kokoro/requirements.in @@ -1,5 +1,5 @@ gcp-docuploader -gcp-releasetool +gcp-releasetool>=1.10.5 # required for compatibility with cryptography>=39.x importlib-metadata typing-extensions twine diff --git a/packages/google-cloud-bigquery/.kokoro/requirements.txt b/packages/google-cloud-bigquery/.kokoro/requirements.txt index 096e4800a9ac..fa99c12908f0 100644 --- a/packages/google-cloud-bigquery/.kokoro/requirements.txt +++ b/packages/google-cloud-bigquery/.kokoro/requirements.txt @@ -154,9 +154,9 @@ gcp-docuploader==0.6.4 \ --hash=sha256:01486419e24633af78fd0167db74a2763974765ee8078ca6eb6964d0ebd388af \ --hash=sha256:70861190c123d907b3b067da896265ead2eeb9263969d6955c9e0bb091b5ccbf # via -r requirements.in -gcp-releasetool==1.10.0 \ - --hash=sha256:72a38ca91b59c24f7e699e9227c90cbe4dd71b789383cb0164b088abae294c83 \ - --hash=sha256:8c7c99320208383d4bb2b808c6880eb7a81424afe7cdba3c8d84b25f4f0e097d +gcp-releasetool==1.10.5 \ + --hash=sha256:174b7b102d704b254f2a26a3eda2c684fd3543320ec239baf771542a2e58e109 \ + --hash=sha256:e29d29927fe2ca493105a82958c6873bb2b90d503acac56be2c229e74de0eec9 # via -r requirements.in google-api-core==2.10.2 \ --hash=sha256:10c06f7739fe57781f87523375e8e1a3a4674bf6392cd6131a3222182b971320 \ From 37486a4aaad267b6184060abba6da3974f122e3c Mon Sep 17 00:00:00 2001 From: Shobhit Singh Date: Mon, 27 Feb 2023 22:32:00 +0000 Subject: [PATCH 1575/2016] feat: add `connection_properties` and `create_session` to `LoadJobConfig` (#1509) * feat: added `connection_properties` and `create_session` in load job --- packages/google-cloud-bigquery/.gitignore | 1 + .../google/cloud/bigquery/job/load.py | 59 +++++++++++++++++++ .../tests/unit/job/test_load.py | 2 + .../tests/unit/job/test_load_config.py | 21 +++++++ 4 files changed, 83 insertions(+) diff --git a/packages/google-cloud-bigquery/.gitignore b/packages/google-cloud-bigquery/.gitignore index 99c3a1444ed2..168b201f66f4 100644 --- a/packages/google-cloud-bigquery/.gitignore +++ b/packages/google-cloud-bigquery/.gitignore @@ -51,6 +51,7 @@ docs.metadata # Virtual environment env/ +venv/ # Test logs coverage.xml diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/job/load.py b/packages/google-cloud-bigquery/google/cloud/bigquery/job/load.py index 14a7fa30bf30..7481cb378f62 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/job/load.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/job/load.py @@ -28,6 +28,7 @@ from google.cloud.bigquery.job.base import _AsyncJob from google.cloud.bigquery.job.base import _JobConfig from google.cloud.bigquery.job.base import _JobReference +from google.cloud.bigquery.query import ConnectionProperty class LoadJobConfig(_JobConfig): @@ -120,6 +121,25 @@ def clustering_fields(self, value): else: self._del_sub_prop("clustering") + @property + def connection_properties(self) -> List[ConnectionProperty]: + """Connection properties. + + See + https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationLoad.FIELDS.connection_properties + + .. versionadded:: 3.7.0 + """ + resource = self._get_sub_prop("connectionProperties", []) + return [ConnectionProperty.from_api_repr(prop) for prop in resource] + + @connection_properties.setter + def connection_properties(self, value: Iterable[ConnectionProperty]): + self._set_sub_prop( + "connectionProperties", + [prop.to_api_repr() for prop in value], + ) + @property def create_disposition(self): """Optional[google.cloud.bigquery.job.CreateDisposition]: Specifies behavior @@ -134,6 +154,27 @@ def create_disposition(self): def create_disposition(self, value): self._set_sub_prop("createDisposition", value) + @property + def create_session(self) -> Optional[bool]: + """[Preview] If :data:`True`, creates a new session, where + :attr:`~google.cloud.bigquery.job.LoadJob.session_info` will contain a + random server generated session id. + + If :data:`False`, runs load job with an existing ``session_id`` passed in + :attr:`~google.cloud.bigquery.job.LoadJobConfig.connection_properties`, + otherwise runs load job in non-session mode. + + See + https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationLoad.FIELDS.create_session + + .. versionadded:: 3.7.0 + """ + return self._get_sub_prop("createSession") + + @create_session.setter + def create_session(self, value: Optional[bool]): + self._set_sub_prop("createSession", value) + @property def decimal_target_types(self) -> Optional[FrozenSet[str]]: """Possible SQL data types to which the source decimal values are converted. @@ -629,6 +670,15 @@ def autodetect(self): """ return self._configuration.autodetect + @property + def connection_properties(self) -> List[ConnectionProperty]: + """See + :attr:`google.cloud.bigquery.job.LoadJobConfig.connection_properties`. + + .. versionadded:: 3.7.0 + """ + return self._configuration.connection_properties + @property def create_disposition(self): """See @@ -636,6 +686,15 @@ def create_disposition(self): """ return self._configuration.create_disposition + @property + def create_session(self) -> Optional[bool]: + """See + :attr:`google.cloud.bigquery.job.LoadJobConfig.create_session`. + + .. versionadded:: 3.7.0 + """ + return self._configuration.create_session + @property def encoding(self): """See diff --git a/packages/google-cloud-bigquery/tests/unit/job/test_load.py b/packages/google-cloud-bigquery/tests/unit/job/test_load.py index 143e1da59556..cf3ce1661bc2 100644 --- a/packages/google-cloud-bigquery/tests/unit/job/test_load.py +++ b/packages/google-cloud-bigquery/tests/unit/job/test_load.py @@ -392,6 +392,8 @@ def test_from_api_repr_bare(self): job = klass.from_api_repr(RESOURCE, client=client) self.assertIs(job._client, client) self._verifyResourceProperties(job, RESOURCE) + self.assertEqual(len(job.connection_properties), 0) + self.assertIsNone(job.create_session) def test_from_api_with_encryption(self): self._setUpConstants() diff --git a/packages/google-cloud-bigquery/tests/unit/job/test_load_config.py b/packages/google-cloud-bigquery/tests/unit/job/test_load_config.py index 7f77fc085ca8..4d25fa106b3b 100644 --- a/packages/google-cloud-bigquery/tests/unit/job/test_load_config.py +++ b/packages/google-cloud-bigquery/tests/unit/job/test_load_config.py @@ -122,6 +122,27 @@ def test_create_disposition_setter(self): config.create_disposition = disposition self.assertEqual(config._properties["load"]["createDisposition"], disposition) + def test_connection_properties(self): + from google.cloud.bigquery.query import ConnectionProperty + + config = self._get_target_class()() + self.assertEqual(len(config.connection_properties), 0) + + session_id = ConnectionProperty("session_id", "abcd") + time_zone = ConnectionProperty("time_zone", "America/Chicago") + config.connection_properties = [session_id, time_zone] + self.assertEqual(len(config.connection_properties), 2) + self.assertEqual(config.connection_properties[0].key, "session_id") + self.assertEqual(config.connection_properties[0].value, "abcd") + self.assertEqual(config.connection_properties[1].key, "time_zone") + self.assertEqual(config.connection_properties[1].value, "America/Chicago") + + def test_create_session(self): + config = self._get_target_class()() + self.assertIsNone(config.create_session) + config.create_session = True + self.assertTrue(config.create_session) + def test_decimal_target_types_miss(self): config = self._get_target_class()() self.assertIsNone(config.decimal_target_types) From 7bbb033603d4c9c0eb31c2380a7843abdac529ca Mon Sep 17 00:00:00 2001 From: Mend Renovate Date: Wed, 1 Mar 2023 13:49:09 +0000 Subject: [PATCH 1576/2016] chore(deps): update all dependencies (#1501) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * chore(deps): update all dependencies * 🦉 Updates from OwlBot post-processor See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md --------- Co-authored-by: Anthonios Partheniou Co-authored-by: Owl Bot --- packages/google-cloud-bigquery/.gitignore | 1 - .../samples/geography/requirements.txt | 12 ++++++------ .../samples/magics/requirements.txt | 8 ++++---- .../samples/snippets/requirements.txt | 10 +++++----- 4 files changed, 15 insertions(+), 16 deletions(-) diff --git a/packages/google-cloud-bigquery/.gitignore b/packages/google-cloud-bigquery/.gitignore index 168b201f66f4..99c3a1444ed2 100644 --- a/packages/google-cloud-bigquery/.gitignore +++ b/packages/google-cloud-bigquery/.gitignore @@ -51,7 +51,6 @@ docs.metadata # Virtual environment env/ -venv/ # Test logs coverage.xml diff --git a/packages/google-cloud-bigquery/samples/geography/requirements.txt b/packages/google-cloud-bigquery/samples/geography/requirements.txt index f22625653b2b..a0f64923c1b0 100644 --- a/packages/google-cloud-bigquery/samples/geography/requirements.txt +++ b/packages/google-cloud-bigquery/samples/geography/requirements.txt @@ -7,19 +7,19 @@ click-plugins==1.1.1 cligj==0.7.2 dataclasses==0.8; python_version < '3.7' db-dtypes==1.0.5 -Fiona==1.9.0 -geojson==3.0.0 +Fiona==1.9.1 +geojson==3.0.1 geopandas===0.10.2; python_version == '3.7' geopandas==0.12.2; python_version >= '3.8' google-api-core==2.11.0 -google-auth==2.16.0 -google-cloud-bigquery==3.5.0 +google-auth==2.16.1 +google-cloud-bigquery==3.6.0 google-cloud-bigquery-storage==2.18.1 google-cloud-core==2.3.2 google-crc32c==1.5.0 google-resumable-media==2.4.1 googleapis-common-protos==1.58.0 -grpcio==1.51.1 +grpcio==1.51.3 idna==3.4 libcst==0.4.9 munch==2.5.0 @@ -40,6 +40,6 @@ requests==2.28.2 rsa==4.9 Shapely==2.0.1 six==1.16.0 -typing-extensions==4.4.0 +typing-extensions==4.5.0 typing-inspect==0.8.0 urllib3==1.26.14 diff --git a/packages/google-cloud-bigquery/samples/magics/requirements.txt b/packages/google-cloud-bigquery/samples/magics/requirements.txt index 725975116d27..0513b2b5bd29 100644 --- a/packages/google-cloud-bigquery/samples/magics/requirements.txt +++ b/packages/google-cloud-bigquery/samples/magics/requirements.txt @@ -1,15 +1,15 @@ db-dtypes==1.0.5 google-cloud-bigquery-storage==2.18.1 google-auth-oauthlib==1.0.0 -grpcio==1.51.1 +grpcio==1.51.3 ipywidgets==8.0.4 ipython===7.31.1; python_version == '3.7' ipython===8.0.1; python_version == '3.8' -ipython==8.9.0; python_version >= '3.9' +ipython==8.10.0; python_version >= '3.9' matplotlib===3.5.3; python_version == '3.7' -matplotlib==3.6.3; python_version >= '3.8' +matplotlib==3.7.0; python_version >= '3.8' pandas===1.3.5; python_version == '3.7' pandas==1.5.3; python_version >= '3.8' pyarrow==11.0.0 pytz==2022.7.1 -typing-extensions==4.4.0 +typing-extensions==4.5.0 diff --git a/packages/google-cloud-bigquery/samples/snippets/requirements.txt b/packages/google-cloud-bigquery/samples/snippets/requirements.txt index 50fd19e51388..de669fd163e0 100644 --- a/packages/google-cloud-bigquery/samples/snippets/requirements.txt +++ b/packages/google-cloud-bigquery/samples/snippets/requirements.txt @@ -1,16 +1,16 @@ db-dtypes==1.0.5 -google-cloud-bigquery==3.5.0 +google-cloud-bigquery==3.6.0 google-cloud-bigquery-storage==2.18.1 google-auth-oauthlib==1.0.0 -grpcio==1.51.1 +grpcio==1.51.3 ipywidgets==8.0.4 ipython===7.31.1; python_version == '3.7' ipython===8.0.1; python_version == '3.8' -ipython==8.9.0; python_version >= '3.9' +ipython==8.10.0; python_version >= '3.9' matplotlib===3.5.3; python_version == '3.7' -matplotlib==3.6.3; python_version >= '3.8' +matplotlib==3.7.0; python_version >= '3.8' pandas===1.3.5; python_version == '3.7' pandas==1.5.3; python_version >= '3.8' pyarrow==11.0.0 pytz==2022.7.1 -typing-extensions==4.4.0 +typing-extensions==4.5.0 From 93c2b75e2100790183a7e57d573d7a20dfb327c1 Mon Sep 17 00:00:00 2001 From: Mend Renovate Date: Wed, 1 Mar 2023 20:42:48 +0000 Subject: [PATCH 1577/2016] chore(deps): update all dependencies (#1513) --- .../google-cloud-bigquery/samples/geography/requirements.txt | 2 +- .../google-cloud-bigquery/samples/magics/requirements.txt | 4 ++-- .../google-cloud-bigquery/samples/snippets/requirements.txt | 4 ++-- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/packages/google-cloud-bigquery/samples/geography/requirements.txt b/packages/google-cloud-bigquery/samples/geography/requirements.txt index a0f64923c1b0..e9fb6538c31b 100644 --- a/packages/google-cloud-bigquery/samples/geography/requirements.txt +++ b/packages/google-cloud-bigquery/samples/geography/requirements.txt @@ -14,7 +14,7 @@ geopandas==0.12.2; python_version >= '3.8' google-api-core==2.11.0 google-auth==2.16.1 google-cloud-bigquery==3.6.0 -google-cloud-bigquery-storage==2.18.1 +google-cloud-bigquery-storage==2.19.0 google-cloud-core==2.3.2 google-crc32c==1.5.0 google-resumable-media==2.4.1 diff --git a/packages/google-cloud-bigquery/samples/magics/requirements.txt b/packages/google-cloud-bigquery/samples/magics/requirements.txt index 0513b2b5bd29..3d55ae95aedb 100644 --- a/packages/google-cloud-bigquery/samples/magics/requirements.txt +++ b/packages/google-cloud-bigquery/samples/magics/requirements.txt @@ -1,11 +1,11 @@ db-dtypes==1.0.5 -google-cloud-bigquery-storage==2.18.1 +google-cloud-bigquery-storage==2.19.0 google-auth-oauthlib==1.0.0 grpcio==1.51.3 ipywidgets==8.0.4 ipython===7.31.1; python_version == '3.7' ipython===8.0.1; python_version == '3.8' -ipython==8.10.0; python_version >= '3.9' +ipython==8.11.0; python_version >= '3.9' matplotlib===3.5.3; python_version == '3.7' matplotlib==3.7.0; python_version >= '3.8' pandas===1.3.5; python_version == '3.7' diff --git a/packages/google-cloud-bigquery/samples/snippets/requirements.txt b/packages/google-cloud-bigquery/samples/snippets/requirements.txt index de669fd163e0..b4fc299e7148 100644 --- a/packages/google-cloud-bigquery/samples/snippets/requirements.txt +++ b/packages/google-cloud-bigquery/samples/snippets/requirements.txt @@ -1,12 +1,12 @@ db-dtypes==1.0.5 google-cloud-bigquery==3.6.0 -google-cloud-bigquery-storage==2.18.1 +google-cloud-bigquery-storage==2.19.0 google-auth-oauthlib==1.0.0 grpcio==1.51.3 ipywidgets==8.0.4 ipython===7.31.1; python_version == '3.7' ipython===8.0.1; python_version == '3.8' -ipython==8.10.0; python_version >= '3.9' +ipython==8.11.0; python_version >= '3.9' matplotlib===3.5.3; python_version == '3.7' matplotlib==3.7.0; python_version >= '3.8' pandas===1.3.5; python_version == '3.7' From c8a52df20e298bf6c04a84d68c3459ec551f8c36 Mon Sep 17 00:00:00 2001 From: chelsea-lin <124939984+chelsea-lin@users.noreply.github.com> Date: Thu, 2 Mar 2023 07:48:11 -0800 Subject: [PATCH 1578/2016] feat: add default_query_job_config property and property setter to BQ client (#1511) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Thank you for opening a Pull Request! Before submitting your PR, there are a few things you can do to make sure it goes smoothly: - [x] Make sure to open an issue as a [feature request](https://togithub.com/googleapis/python-bigquery/issues/1512) before writing your code! That way we can discuss the change, evaluate designs, and agree on the general idea - [x] Ensure the tests and linter pass - [x] Code coverage does not decrease (if any source code was changed) - [x] Appropriate docs were updated (if necessary) Fixes - [feature request](https://togithub.com/googleapis/python-bigquery/issues/1512)🦕 - [internal bug](https://b.corp.google.com/issues/271044948) --- .../google/cloud/bigquery/client.py | 11 +++++++++++ .../google-cloud-bigquery/tests/unit/test_client.py | 13 +++++++++++++ 2 files changed, 24 insertions(+) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py index b032665284c1..af8eaf5a773d 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py @@ -266,6 +266,17 @@ def location(self): """Default location for jobs / datasets / tables.""" return self._location + @property + def default_query_job_config(self): + """Default ``QueryJobConfig``. + Will be merged into job configs passed into the ``query`` method. + """ + return self._default_query_job_config + + @default_query_job_config.setter + def default_query_job_config(self, value: QueryJobConfig): + self._default_query_job_config = copy.deepcopy(value) + def close(self): """Close the underlying transport objects, releasing system resources. diff --git a/packages/google-cloud-bigquery/tests/unit/test_client.py b/packages/google-cloud-bigquery/tests/unit/test_client.py index 22f7286db3b6..f38874843ab3 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_client.py +++ b/packages/google-cloud-bigquery/tests/unit/test_client.py @@ -413,6 +413,19 @@ def test__get_query_results_hit(self): self.assertEqual(query_results.total_rows, 10) self.assertTrue(query_results.complete) + def test_default_query_job_config(self): + from google.cloud.bigquery import QueryJobConfig + + creds = _make_credentials() + http = object() + client = self._make_one(project=self.PROJECT, credentials=creds, _http=http) + self.assertIsNone(client.default_query_job_config) + + job_config = QueryJobConfig() + job_config.dry_run = True + client.default_query_job_config = job_config + self.assertIsInstance(client.default_query_job_config, QueryJobConfig) + def test_get_service_account_email(self): path = "/projects/%s/serviceAccount" % (self.PROJECT,) creds = _make_credentials() From 182e21649024646c7b00eb81ef29d543f3e0df14 Mon Sep 17 00:00:00 2001 From: Mend Renovate Date: Sat, 4 Mar 2023 11:31:55 +0000 Subject: [PATCH 1579/2016] chore(deps): update all dependencies (#1514) --- .../samples/geography/requirements-test.txt | 2 +- .../google-cloud-bigquery/samples/geography/requirements.txt | 2 +- .../google-cloud-bigquery/samples/magics/requirements-test.txt | 2 +- packages/google-cloud-bigquery/samples/magics/requirements.txt | 2 +- .../samples/snippets/requirements-test.txt | 2 +- .../google-cloud-bigquery/samples/snippets/requirements.txt | 2 +- 6 files changed, 6 insertions(+), 6 deletions(-) diff --git a/packages/google-cloud-bigquery/samples/geography/requirements-test.txt b/packages/google-cloud-bigquery/samples/geography/requirements-test.txt index 100e0639cd31..e0ec46254764 100644 --- a/packages/google-cloud-bigquery/samples/geography/requirements-test.txt +++ b/packages/google-cloud-bigquery/samples/geography/requirements-test.txt @@ -1,2 +1,2 @@ -pytest==7.2.1 +pytest==7.2.2 mock==5.0.1 diff --git a/packages/google-cloud-bigquery/samples/geography/requirements.txt b/packages/google-cloud-bigquery/samples/geography/requirements.txt index e9fb6538c31b..cfda81374bae 100644 --- a/packages/google-cloud-bigquery/samples/geography/requirements.txt +++ b/packages/google-cloud-bigquery/samples/geography/requirements.txt @@ -12,7 +12,7 @@ geojson==3.0.1 geopandas===0.10.2; python_version == '3.7' geopandas==0.12.2; python_version >= '3.8' google-api-core==2.11.0 -google-auth==2.16.1 +google-auth==2.16.2 google-cloud-bigquery==3.6.0 google-cloud-bigquery-storage==2.19.0 google-cloud-core==2.3.2 diff --git a/packages/google-cloud-bigquery/samples/magics/requirements-test.txt b/packages/google-cloud-bigquery/samples/magics/requirements-test.txt index e8f3982c7160..3ed7558d5aed 100644 --- a/packages/google-cloud-bigquery/samples/magics/requirements-test.txt +++ b/packages/google-cloud-bigquery/samples/magics/requirements-test.txt @@ -1,3 +1,3 @@ google-cloud-testutils==1.3.3 -pytest==7.2.1 +pytest==7.2.2 mock==5.0.1 diff --git a/packages/google-cloud-bigquery/samples/magics/requirements.txt b/packages/google-cloud-bigquery/samples/magics/requirements.txt index 3d55ae95aedb..55b828f1b8c8 100644 --- a/packages/google-cloud-bigquery/samples/magics/requirements.txt +++ b/packages/google-cloud-bigquery/samples/magics/requirements.txt @@ -7,7 +7,7 @@ ipython===7.31.1; python_version == '3.7' ipython===8.0.1; python_version == '3.8' ipython==8.11.0; python_version >= '3.9' matplotlib===3.5.3; python_version == '3.7' -matplotlib==3.7.0; python_version >= '3.8' +matplotlib==3.7.1; python_version >= '3.8' pandas===1.3.5; python_version == '3.7' pandas==1.5.3; python_version >= '3.8' pyarrow==11.0.0 diff --git a/packages/google-cloud-bigquery/samples/snippets/requirements-test.txt b/packages/google-cloud-bigquery/samples/snippets/requirements-test.txt index e8f3982c7160..3ed7558d5aed 100644 --- a/packages/google-cloud-bigquery/samples/snippets/requirements-test.txt +++ b/packages/google-cloud-bigquery/samples/snippets/requirements-test.txt @@ -1,3 +1,3 @@ google-cloud-testutils==1.3.3 -pytest==7.2.1 +pytest==7.2.2 mock==5.0.1 diff --git a/packages/google-cloud-bigquery/samples/snippets/requirements.txt b/packages/google-cloud-bigquery/samples/snippets/requirements.txt index b4fc299e7148..6c6b17ea85d1 100644 --- a/packages/google-cloud-bigquery/samples/snippets/requirements.txt +++ b/packages/google-cloud-bigquery/samples/snippets/requirements.txt @@ -8,7 +8,7 @@ ipython===7.31.1; python_version == '3.7' ipython===8.0.1; python_version == '3.8' ipython==8.11.0; python_version >= '3.9' matplotlib===3.5.3; python_version == '3.7' -matplotlib==3.7.0; python_version >= '3.8' +matplotlib==3.7.1; python_version >= '3.8' pandas===1.3.5; python_version == '3.7' pandas==1.5.3; python_version >= '3.8' pyarrow==11.0.0 From a133a9ec9f515062ff2ef106ec5d085177498891 Mon Sep 17 00:00:00 2001 From: Mend Renovate Date: Mon, 6 Mar 2023 23:06:30 +0000 Subject: [PATCH 1580/2016] chore(deps): update dependency charset-normalizer to v3.1.0 (#1518) --- .../google-cloud-bigquery/samples/geography/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/samples/geography/requirements.txt b/packages/google-cloud-bigquery/samples/geography/requirements.txt index cfda81374bae..75964dbe15fb 100644 --- a/packages/google-cloud-bigquery/samples/geography/requirements.txt +++ b/packages/google-cloud-bigquery/samples/geography/requirements.txt @@ -1,7 +1,7 @@ attrs==22.2.0 certifi==2022.12.7 cffi==1.15.1 -charset-normalizer==3.0.1 +charset-normalizer==3.1.0 click==8.1.3 click-plugins==1.1.1 cligj==0.7.2 From 487b58ce60c3c0e056a5c41d09d89c69e75ea6b6 Mon Sep 17 00:00:00 2001 From: "release-please[bot]" <55107282+release-please[bot]@users.noreply.github.com> Date: Mon, 13 Mar 2023 12:46:05 -0500 Subject: [PATCH 1581/2016] chore(main): release 3.7.0 (#1507) Co-authored-by: release-please[bot] <55107282+release-please[bot]@users.noreply.github.com> --- packages/google-cloud-bigquery/CHANGELOG.md | 13 +++++++++++++ .../google/cloud/bigquery/version.py | 2 +- 2 files changed, 14 insertions(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/CHANGELOG.md b/packages/google-cloud-bigquery/CHANGELOG.md index 67c43200bbac..5eda8912d0f4 100644 --- a/packages/google-cloud-bigquery/CHANGELOG.md +++ b/packages/google-cloud-bigquery/CHANGELOG.md @@ -5,6 +5,19 @@ [1]: https://pypi.org/project/google-cloud-bigquery/#history +## [3.7.0](https://github.com/googleapis/python-bigquery/compare/v3.6.0...v3.7.0) (2023-03-06) + + +### Features + +* Add `connection_properties` and `create_session` to `LoadJobConfig` ([#1509](https://github.com/googleapis/python-bigquery/issues/1509)) ([cd0aaa1](https://github.com/googleapis/python-bigquery/commit/cd0aaa15960e9ca7a0aaf411c8e4990f95421816)) +* Add default_query_job_config property and property setter to BQ client ([#1511](https://github.com/googleapis/python-bigquery/issues/1511)) ([a23092c](https://github.com/googleapis/python-bigquery/commit/a23092cad834c6a016f455d46fefa13bb6cdbf0f)) + + +### Documentation + +* Remove < 3.11 reference from README ([#1502](https://github.com/googleapis/python-bigquery/issues/1502)) ([c7417f4](https://github.com/googleapis/python-bigquery/commit/c7417f43563e20a3e6f1a57f46925fb274b28b07)) + ## [3.6.0](https://github.com/googleapis/python-bigquery/compare/v3.5.0...v3.6.0) (2023-02-22) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/version.py b/packages/google-cloud-bigquery/google/cloud/bigquery/version.py index 102b960952dc..dc87b3c5b7f3 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/version.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/version.py @@ -12,4 +12,4 @@ # See the License for the specific language governing permissions and # limitations under the License. -__version__ = "3.6.0" +__version__ = "3.7.0" From 11179a33740c686a98f27f5b63caff7ca9d8458e Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Tue, 14 Mar 2023 14:13:23 -0500 Subject: [PATCH 1582/2016] feat: expose configuration property on CopyJob, ExtractJob, LoadJob, QueryJob (#1521) * feat: expose configuration property on CopyJob, ExtractJob, LoadJob, QueryJob Note for google-cloud-bigquery developers: This also refactors these classes so that `_set_properties` does not modify the `_properties` dictionary in-place. Doing so was also mutating the request object, making it difficult to debug what request was _actually_ sent. Before this change, many tests hallucinated that the request was always equal to the response. * E google.api_core.exceptions.BadRequest: 400 Clone operation with write disposition WRITE_TRUNCATE is not supported. Please try again with WRITE_EMPTY. --- .../google/cloud/bigquery/client.py | 9 +- .../google/cloud/bigquery/job/base.py | 375 +++++++++--------- .../google/cloud/bigquery/job/copy_.py | 20 +- .../google/cloud/bigquery/job/extract.py | 23 +- .../google/cloud/bigquery/job/load.py | 64 +-- .../google/cloud/bigquery/job/query.py | 65 +-- .../tests/system/test_client.py | 2 +- .../tests/unit/job/test_base.py | 38 +- .../tests/unit/job/test_load.py | 5 +- .../tests/unit/test_client.py | 21 +- 10 files changed, 333 insertions(+), 289 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py index af8eaf5a773d..a53819cdec02 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py @@ -1976,15 +1976,8 @@ def create_job( ) destination = _get_sub_prop(job_config, ["copy", "destinationTable"]) destination = TableReference.from_api_repr(destination) - sources = [] - source_configs = _get_sub_prop(job_config, ["copy", "sourceTables"]) - if source_configs is None: - source_configs = [_get_sub_prop(job_config, ["copy", "sourceTable"])] - for source_config in source_configs: - table_ref = TableReference.from_api_repr(source_config) - sources.append(table_ref) return self.copy_table( - sources, + [], # Source table(s) already in job_config resource. destination, job_config=typing.cast(CopyJobConfig, copy_job_config), retry=retry, diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/job/base.py b/packages/google-cloud-bigquery/google/cloud/bigquery/job/base.py index 86701e295777..55e80b2ebc6e 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/job/base.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/job/base.py @@ -19,7 +19,7 @@ import http import threading import typing -from typing import Dict, Optional, Sequence +from typing import ClassVar, Dict, Optional, Sequence from google.api_core import exceptions import google.api_core.future.polling @@ -150,6 +150,182 @@ def _from_api_repr(cls, resource): return job_ref +class _JobConfig(object): + """Abstract base class for job configuration objects. + + Args: + job_type (str): The key to use for the job configuration. + """ + + def __init__(self, job_type, **kwargs): + self._job_type = job_type + self._properties = {job_type: {}} + for prop, val in kwargs.items(): + setattr(self, prop, val) + + def __setattr__(self, name, value): + """Override to be able to raise error if an unknown property is being set""" + if not name.startswith("_") and not hasattr(type(self), name): + raise AttributeError( + "Property {} is unknown for {}.".format(name, type(self)) + ) + super(_JobConfig, self).__setattr__(name, value) + + @property + def labels(self): + """Dict[str, str]: Labels for the job. + + This method always returns a dict. Once a job has been created on the + server, its labels cannot be modified anymore. + + Raises: + ValueError: If ``value`` type is invalid. + """ + return self._properties.setdefault("labels", {}) + + @labels.setter + def labels(self, value): + if not isinstance(value, dict): + raise ValueError("Pass a dict") + self._properties["labels"] = value + + def _get_sub_prop(self, key, default=None): + """Get a value in the ``self._properties[self._job_type]`` dictionary. + + Most job properties are inside the dictionary related to the job type + (e.g. 'copy', 'extract', 'load', 'query'). Use this method to access + those properties:: + + self._get_sub_prop('destinationTable') + + This is equivalent to using the ``_helpers._get_sub_prop`` function:: + + _helpers._get_sub_prop( + self._properties, ['query', 'destinationTable']) + + Args: + key (str): + Key for the value to get in the + ``self._properties[self._job_type]`` dictionary. + default (Optional[object]): + Default value to return if the key is not found. + Defaults to :data:`None`. + + Returns: + object: The value if present or the default. + """ + return _helpers._get_sub_prop( + self._properties, [self._job_type, key], default=default + ) + + def _set_sub_prop(self, key, value): + """Set a value in the ``self._properties[self._job_type]`` dictionary. + + Most job properties are inside the dictionary related to the job type + (e.g. 'copy', 'extract', 'load', 'query'). Use this method to set + those properties:: + + self._set_sub_prop('useLegacySql', False) + + This is equivalent to using the ``_helper._set_sub_prop`` function:: + + _helper._set_sub_prop( + self._properties, ['query', 'useLegacySql'], False) + + Args: + key (str): + Key to set in the ``self._properties[self._job_type]`` + dictionary. + value (object): Value to set. + """ + _helpers._set_sub_prop(self._properties, [self._job_type, key], value) + + def _del_sub_prop(self, key): + """Remove ``key`` from the ``self._properties[self._job_type]`` dict. + + Most job properties are inside the dictionary related to the job type + (e.g. 'copy', 'extract', 'load', 'query'). Use this method to clear + those properties:: + + self._del_sub_prop('useLegacySql') + + This is equivalent to using the ``_helper._del_sub_prop`` function:: + + _helper._del_sub_prop( + self._properties, ['query', 'useLegacySql']) + + Args: + key (str): + Key to remove in the ``self._properties[self._job_type]`` + dictionary. + """ + _helpers._del_sub_prop(self._properties, [self._job_type, key]) + + def to_api_repr(self) -> dict: + """Build an API representation of the job config. + + Returns: + Dict: A dictionary in the format used by the BigQuery API. + """ + return copy.deepcopy(self._properties) + + def _fill_from_default(self, default_job_config): + """Merge this job config with a default job config. + + The keys in this object take precedence over the keys in the default + config. The merge is done at the top-level as well as for keys one + level below the job type. + + Args: + default_job_config (google.cloud.bigquery.job._JobConfig): + The default job config that will be used to fill in self. + + Returns: + google.cloud.bigquery.job._JobConfig: A new (merged) job config. + """ + if self._job_type != default_job_config._job_type: + raise TypeError( + "attempted to merge two incompatible job types: " + + repr(self._job_type) + + ", " + + repr(default_job_config._job_type) + ) + + # cls is one of the job config subclasses that provides the job_type argument to + # this base class on instantiation, thus missing-parameter warning is a false + # positive here. + new_job_config = self.__class__() # pytype: disable=missing-parameter + + default_job_properties = copy.deepcopy(default_job_config._properties) + for key in self._properties: + if key != self._job_type: + default_job_properties[key] = self._properties[key] + + default_job_properties[self._job_type].update(self._properties[self._job_type]) + new_job_config._properties = default_job_properties + + return new_job_config + + @classmethod + def from_api_repr(cls, resource: dict) -> "_JobConfig": + """Factory: construct a job configuration given its API representation + + Args: + resource (Dict): + A job configuration in the same representation as is returned + from the API. + + Returns: + google.cloud.bigquery.job._JobConfig: Configuration parsed from ``resource``. + """ + # cls is one of the job config subclasses that provides the job_type argument to + # this base class on instantiation, thus missing-parameter warning is a false + # positive here. + job_config = cls() # type: ignore # pytype: disable=missing-parameter + job_config._properties = resource + return job_config + + class _AsyncJob(google.api_core.future.polling.PollingFuture): """Base class for asynchronous jobs. @@ -161,6 +337,9 @@ class _AsyncJob(google.api_core.future.polling.PollingFuture): Client which holds credentials and project configuration. """ + _JOB_TYPE = "unknown" + _CONFIG_CLASS: ClassVar + def __init__(self, job_id, client): super(_AsyncJob, self).__init__() @@ -176,6 +355,13 @@ def __init__(self, job_id, client): self._result_set = False self._completion_lock = threading.Lock() + @property + def configuration(self) -> _JobConfig: + """Job-type specific configurtion.""" + configuration = self._CONFIG_CLASS() + configuration._properties = self._properties.setdefault("configuration", {}) + return configuration + @property def job_id(self): """str: ID of the job.""" @@ -426,8 +612,7 @@ def _set_properties(self, api_response): api_response (Dict): response returned from an API call. """ cleaned = api_response.copy() - - statistics = cleaned.get("statistics", {}) + statistics = cleaned.setdefault("statistics", {}) if "creationTime" in statistics: statistics["creationTime"] = float(statistics["creationTime"]) if "startTime" in statistics: @@ -435,13 +620,7 @@ def _set_properties(self, api_response): if "endTime" in statistics: statistics["endTime"] = float(statistics["endTime"]) - # Save configuration to keep reference same in self._configuration. - cleaned_config = cleaned.pop("configuration", {}) - configuration = self._properties.pop("configuration", {}) - self._properties.clear() - self._properties.update(cleaned) - self._properties["configuration"] = configuration - self._properties["configuration"].update(cleaned_config) + self._properties = cleaned # For Future interface self._set_future_result() @@ -751,182 +930,6 @@ def __repr__(self): return result -class _JobConfig(object): - """Abstract base class for job configuration objects. - - Args: - job_type (str): The key to use for the job configuration. - """ - - def __init__(self, job_type, **kwargs): - self._job_type = job_type - self._properties = {job_type: {}} - for prop, val in kwargs.items(): - setattr(self, prop, val) - - def __setattr__(self, name, value): - """Override to be able to raise error if an unknown property is being set""" - if not name.startswith("_") and not hasattr(type(self), name): - raise AttributeError( - "Property {} is unknown for {}.".format(name, type(self)) - ) - super(_JobConfig, self).__setattr__(name, value) - - @property - def labels(self): - """Dict[str, str]: Labels for the job. - - This method always returns a dict. Once a job has been created on the - server, its labels cannot be modified anymore. - - Raises: - ValueError: If ``value`` type is invalid. - """ - return self._properties.setdefault("labels", {}) - - @labels.setter - def labels(self, value): - if not isinstance(value, dict): - raise ValueError("Pass a dict") - self._properties["labels"] = value - - def _get_sub_prop(self, key, default=None): - """Get a value in the ``self._properties[self._job_type]`` dictionary. - - Most job properties are inside the dictionary related to the job type - (e.g. 'copy', 'extract', 'load', 'query'). Use this method to access - those properties:: - - self._get_sub_prop('destinationTable') - - This is equivalent to using the ``_helpers._get_sub_prop`` function:: - - _helpers._get_sub_prop( - self._properties, ['query', 'destinationTable']) - - Args: - key (str): - Key for the value to get in the - ``self._properties[self._job_type]`` dictionary. - default (Optional[object]): - Default value to return if the key is not found. - Defaults to :data:`None`. - - Returns: - object: The value if present or the default. - """ - return _helpers._get_sub_prop( - self._properties, [self._job_type, key], default=default - ) - - def _set_sub_prop(self, key, value): - """Set a value in the ``self._properties[self._job_type]`` dictionary. - - Most job properties are inside the dictionary related to the job type - (e.g. 'copy', 'extract', 'load', 'query'). Use this method to set - those properties:: - - self._set_sub_prop('useLegacySql', False) - - This is equivalent to using the ``_helper._set_sub_prop`` function:: - - _helper._set_sub_prop( - self._properties, ['query', 'useLegacySql'], False) - - Args: - key (str): - Key to set in the ``self._properties[self._job_type]`` - dictionary. - value (object): Value to set. - """ - _helpers._set_sub_prop(self._properties, [self._job_type, key], value) - - def _del_sub_prop(self, key): - """Remove ``key`` from the ``self._properties[self._job_type]`` dict. - - Most job properties are inside the dictionary related to the job type - (e.g. 'copy', 'extract', 'load', 'query'). Use this method to clear - those properties:: - - self._del_sub_prop('useLegacySql') - - This is equivalent to using the ``_helper._del_sub_prop`` function:: - - _helper._del_sub_prop( - self._properties, ['query', 'useLegacySql']) - - Args: - key (str): - Key to remove in the ``self._properties[self._job_type]`` - dictionary. - """ - _helpers._del_sub_prop(self._properties, [self._job_type, key]) - - def to_api_repr(self) -> dict: - """Build an API representation of the job config. - - Returns: - Dict: A dictionary in the format used by the BigQuery API. - """ - return copy.deepcopy(self._properties) - - def _fill_from_default(self, default_job_config): - """Merge this job config with a default job config. - - The keys in this object take precedence over the keys in the default - config. The merge is done at the top-level as well as for keys one - level below the job type. - - Args: - default_job_config (google.cloud.bigquery.job._JobConfig): - The default job config that will be used to fill in self. - - Returns: - google.cloud.bigquery.job._JobConfig: A new (merged) job config. - """ - if self._job_type != default_job_config._job_type: - raise TypeError( - "attempted to merge two incompatible job types: " - + repr(self._job_type) - + ", " - + repr(default_job_config._job_type) - ) - - # cls is one of the job config subclasses that provides the job_type argument to - # this base class on instantiation, thus missing-parameter warning is a false - # positive here. - new_job_config = self.__class__() # pytype: disable=missing-parameter - - default_job_properties = copy.deepcopy(default_job_config._properties) - for key in self._properties: - if key != self._job_type: - default_job_properties[key] = self._properties[key] - - default_job_properties[self._job_type].update(self._properties[self._job_type]) - new_job_config._properties = default_job_properties - - return new_job_config - - @classmethod - def from_api_repr(cls, resource: dict) -> "_JobConfig": - """Factory: construct a job configuration given its API representation - - Args: - resource (Dict): - A job configuration in the same representation as is returned - from the API. - - Returns: - google.cloud.bigquery.job._JobConfig: Configuration parsed from ``resource``. - """ - # cls is one of the job config subclasses that provides the job_type argument to - # this base class on instantiation, thus missing-parameter warning is a false - # positive here. - job_config = cls() # type: ignore # pytype: disable=missing-parameter - job_config._properties = resource - return job_config - - class ScriptStackFrame(object): """Stack frame showing the line/column/procedure name where the current evaluation happened. diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/job/copy_.py b/packages/google-cloud-bigquery/google/cloud/bigquery/job/copy_.py index 9d7548ec5f46..5c52aeed6470 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/job/copy_.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/job/copy_.py @@ -14,6 +14,7 @@ """Classes for copy jobs.""" +import typing from typing import Optional from google.cloud.bigquery.encryption_configuration import EncryptionConfiguration @@ -160,15 +161,13 @@ class CopyJob(_AsyncJob): """ _JOB_TYPE = "copy" + _CONFIG_CLASS = CopyJobConfig def __init__(self, job_id, sources, destination, client, job_config=None): super(CopyJob, self).__init__(job_id, client) - if not job_config: - job_config = CopyJobConfig() - - self._configuration = job_config - self._properties["configuration"] = job_config._properties + if job_config is not None: + self._properties["configuration"] = job_config._properties if destination: _helpers._set_sub_prop( @@ -185,6 +184,11 @@ def __init__(self, job_id, sources, destination, client, job_config=None): source_resources, ) + @property + def configuration(self) -> CopyJobConfig: + """The configuration for this copy job.""" + return typing.cast(CopyJobConfig, super().configuration) + @property def destination(self): """google.cloud.bigquery.table.TableReference: Table into which data @@ -223,14 +227,14 @@ def create_disposition(self): """See :attr:`google.cloud.bigquery.job.CopyJobConfig.create_disposition`. """ - return self._configuration.create_disposition + return self.configuration.create_disposition @property def write_disposition(self): """See :attr:`google.cloud.bigquery.job.CopyJobConfig.write_disposition`. """ - return self._configuration.write_disposition + return self.configuration.write_disposition @property def destination_encryption_configuration(self): @@ -243,7 +247,7 @@ def destination_encryption_configuration(self): See :attr:`google.cloud.bigquery.job.CopyJobConfig.destination_encryption_configuration`. """ - return self._configuration.destination_encryption_configuration + return self.configuration.destination_encryption_configuration def to_api_repr(self): """Generate a resource for :meth:`_begin`.""" diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/job/extract.py b/packages/google-cloud-bigquery/google/cloud/bigquery/job/extract.py index 52aa036c900a..64ec39b7603c 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/job/extract.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/job/extract.py @@ -14,6 +14,8 @@ """Classes for extract (export) jobs.""" +import typing + from google.cloud.bigquery import _helpers from google.cloud.bigquery.model import ModelReference from google.cloud.bigquery.table import Table @@ -125,15 +127,13 @@ class ExtractJob(_AsyncJob): """ _JOB_TYPE = "extract" + _CONFIG_CLASS = ExtractJobConfig def __init__(self, job_id, source, destination_uris, client, job_config=None): super(ExtractJob, self).__init__(job_id, client) - if job_config is None: - job_config = ExtractJobConfig() - - self._properties["configuration"] = job_config._properties - self._configuration = job_config + if job_config is not None: + self._properties["configuration"] = job_config._properties if source: source_ref = {"projectId": source.project, "datasetId": source.dataset_id} @@ -156,6 +156,11 @@ def __init__(self, job_id, source, destination_uris, client, job_config=None): destination_uris, ) + @property + def configuration(self) -> ExtractJobConfig: + """The configuration for this extract job.""" + return typing.cast(ExtractJobConfig, super().configuration) + @property def source(self): """Union[ \ @@ -189,28 +194,28 @@ def compression(self): """See :attr:`google.cloud.bigquery.job.ExtractJobConfig.compression`. """ - return self._configuration.compression + return self.configuration.compression @property def destination_format(self): """See :attr:`google.cloud.bigquery.job.ExtractJobConfig.destination_format`. """ - return self._configuration.destination_format + return self.configuration.destination_format @property def field_delimiter(self): """See :attr:`google.cloud.bigquery.job.ExtractJobConfig.field_delimiter`. """ - return self._configuration.field_delimiter + return self.configuration.field_delimiter @property def print_header(self): """See :attr:`google.cloud.bigquery.job.ExtractJobConfig.print_header`. """ - return self._configuration.print_header + return self.configuration.print_header @property def destination_uri_file_counts(self): diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/job/load.py b/packages/google-cloud-bigquery/google/cloud/bigquery/job/load.py index 7481cb378f62..6b6c8bfd9954 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/job/load.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/job/load.py @@ -14,6 +14,7 @@ """Classes for load jobs.""" +import typing from typing import FrozenSet, List, Iterable, Optional from google.cloud.bigquery.encryption_configuration import EncryptionConfiguration @@ -605,15 +606,13 @@ class LoadJob(_AsyncJob): """ _JOB_TYPE = "load" + _CONFIG_CLASS = LoadJobConfig def __init__(self, job_id, source_uris, destination, client, job_config=None): super(LoadJob, self).__init__(job_id, client) - if not job_config: - job_config = LoadJobConfig() - - self._configuration = job_config - self._properties["configuration"] = job_config._properties + if job_config is not None: + self._properties["configuration"] = job_config._properties if source_uris is not None: _helpers._set_sub_prop( @@ -627,6 +626,11 @@ def __init__(self, job_id, source_uris, destination, client, job_config=None): destination.to_api_repr(), ) + @property + def configuration(self) -> LoadJobConfig: + """The configuration for this load job.""" + return typing.cast(LoadJobConfig, super().configuration) + @property def destination(self): """google.cloud.bigquery.table.TableReference: table where loaded rows are written @@ -654,21 +658,21 @@ def allow_jagged_rows(self): """See :attr:`google.cloud.bigquery.job.LoadJobConfig.allow_jagged_rows`. """ - return self._configuration.allow_jagged_rows + return self.configuration.allow_jagged_rows @property def allow_quoted_newlines(self): """See :attr:`google.cloud.bigquery.job.LoadJobConfig.allow_quoted_newlines`. """ - return self._configuration.allow_quoted_newlines + return self.configuration.allow_quoted_newlines @property def autodetect(self): """See :attr:`google.cloud.bigquery.job.LoadJobConfig.autodetect`. """ - return self._configuration.autodetect + return self.configuration.autodetect @property def connection_properties(self) -> List[ConnectionProperty]: @@ -677,14 +681,14 @@ def connection_properties(self) -> List[ConnectionProperty]: .. versionadded:: 3.7.0 """ - return self._configuration.connection_properties + return self.configuration.connection_properties @property def create_disposition(self): """See :attr:`google.cloud.bigquery.job.LoadJobConfig.create_disposition`. """ - return self._configuration.create_disposition + return self.configuration.create_disposition @property def create_session(self) -> Optional[bool]: @@ -693,84 +697,84 @@ def create_session(self) -> Optional[bool]: .. versionadded:: 3.7.0 """ - return self._configuration.create_session + return self.configuration.create_session @property def encoding(self): """See :attr:`google.cloud.bigquery.job.LoadJobConfig.encoding`. """ - return self._configuration.encoding + return self.configuration.encoding @property def field_delimiter(self): """See :attr:`google.cloud.bigquery.job.LoadJobConfig.field_delimiter`. """ - return self._configuration.field_delimiter + return self.configuration.field_delimiter @property def ignore_unknown_values(self): """See :attr:`google.cloud.bigquery.job.LoadJobConfig.ignore_unknown_values`. """ - return self._configuration.ignore_unknown_values + return self.configuration.ignore_unknown_values @property def max_bad_records(self): """See :attr:`google.cloud.bigquery.job.LoadJobConfig.max_bad_records`. """ - return self._configuration.max_bad_records + return self.configuration.max_bad_records @property def null_marker(self): """See :attr:`google.cloud.bigquery.job.LoadJobConfig.null_marker`. """ - return self._configuration.null_marker + return self.configuration.null_marker @property def quote_character(self): """See :attr:`google.cloud.bigquery.job.LoadJobConfig.quote_character`. """ - return self._configuration.quote_character + return self.configuration.quote_character @property def reference_file_schema_uri(self): """See: attr:`google.cloud.bigquery.job.LoadJobConfig.reference_file_schema_uri`. """ - return self._configuration.reference_file_schema_uri + return self.configuration.reference_file_schema_uri @property def skip_leading_rows(self): """See :attr:`google.cloud.bigquery.job.LoadJobConfig.skip_leading_rows`. """ - return self._configuration.skip_leading_rows + return self.configuration.skip_leading_rows @property def source_format(self): """See :attr:`google.cloud.bigquery.job.LoadJobConfig.source_format`. """ - return self._configuration.source_format + return self.configuration.source_format @property def write_disposition(self): """See :attr:`google.cloud.bigquery.job.LoadJobConfig.write_disposition`. """ - return self._configuration.write_disposition + return self.configuration.write_disposition @property def schema(self): """See :attr:`google.cloud.bigquery.job.LoadJobConfig.schema`. """ - return self._configuration.schema + return self.configuration.schema @property def destination_encryption_configuration(self): @@ -783,7 +787,7 @@ def destination_encryption_configuration(self): See :attr:`google.cloud.bigquery.job.LoadJobConfig.destination_encryption_configuration`. """ - return self._configuration.destination_encryption_configuration + return self.configuration.destination_encryption_configuration @property def destination_table_description(self): @@ -792,7 +796,7 @@ def destination_table_description(self): See: https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#DestinationTableProperties.FIELDS.description """ - return self._configuration.destination_table_description + return self.configuration.destination_table_description @property def destination_table_friendly_name(self): @@ -801,42 +805,42 @@ def destination_table_friendly_name(self): See: https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#DestinationTableProperties.FIELDS.friendly_name """ - return self._configuration.destination_table_friendly_name + return self.configuration.destination_table_friendly_name @property def range_partitioning(self): """See :attr:`google.cloud.bigquery.job.LoadJobConfig.range_partitioning`. """ - return self._configuration.range_partitioning + return self.configuration.range_partitioning @property def time_partitioning(self): """See :attr:`google.cloud.bigquery.job.LoadJobConfig.time_partitioning`. """ - return self._configuration.time_partitioning + return self.configuration.time_partitioning @property def use_avro_logical_types(self): """See :attr:`google.cloud.bigquery.job.LoadJobConfig.use_avro_logical_types`. """ - return self._configuration.use_avro_logical_types + return self.configuration.use_avro_logical_types @property def clustering_fields(self): """See :attr:`google.cloud.bigquery.job.LoadJobConfig.clustering_fields`. """ - return self._configuration.clustering_fields + return self.configuration.clustering_fields @property def schema_update_options(self): """See :attr:`google.cloud.bigquery.job.LoadJobConfig.schema_update_options`. """ - return self._configuration.schema_update_options + return self.configuration.schema_update_options @property def input_file_bytes(self): diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/job/query.py b/packages/google-cloud-bigquery/google/cloud/bigquery/job/query.py index c63fa0892f46..e6d6d682d5c7 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/job/query.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/job/query.py @@ -745,17 +745,15 @@ class QueryJob(_AsyncJob): _JOB_TYPE = "query" _UDF_KEY = "userDefinedFunctionResources" + _CONFIG_CLASS = QueryJobConfig def __init__(self, job_id, query, client, job_config=None): super(QueryJob, self).__init__(job_id, client) - if job_config is None: - job_config = QueryJobConfig() - if job_config.use_legacy_sql is None: - job_config.use_legacy_sql = False - - self._properties["configuration"] = job_config._properties - self._configuration = job_config + if job_config is not None: + self._properties["configuration"] = job_config._properties + if self.configuration.use_legacy_sql is None: + self.configuration.use_legacy_sql = False if query: _helpers._set_sub_prop( @@ -771,7 +769,12 @@ def allow_large_results(self): """See :attr:`google.cloud.bigquery.job.QueryJobConfig.allow_large_results`. """ - return self._configuration.allow_large_results + return self.configuration.allow_large_results + + @property + def configuration(self) -> QueryJobConfig: + """The configuration for this query job.""" + return typing.cast(QueryJobConfig, super().configuration) @property def connection_properties(self) -> List[ConnectionProperty]: @@ -780,14 +783,14 @@ def connection_properties(self) -> List[ConnectionProperty]: .. versionadded:: 2.29.0 """ - return self._configuration.connection_properties + return self.configuration.connection_properties @property def create_disposition(self): """See :attr:`google.cloud.bigquery.job.QueryJobConfig.create_disposition`. """ - return self._configuration.create_disposition + return self.configuration.create_disposition @property def create_session(self) -> Optional[bool]: @@ -796,21 +799,21 @@ def create_session(self) -> Optional[bool]: .. versionadded:: 2.29.0 """ - return self._configuration.create_session + return self.configuration.create_session @property def default_dataset(self): """See :attr:`google.cloud.bigquery.job.QueryJobConfig.default_dataset`. """ - return self._configuration.default_dataset + return self.configuration.default_dataset @property def destination(self): """See :attr:`google.cloud.bigquery.job.QueryJobConfig.destination`. """ - return self._configuration.destination + return self.configuration.destination @property def destination_encryption_configuration(self): @@ -823,28 +826,28 @@ def destination_encryption_configuration(self): See :attr:`google.cloud.bigquery.job.QueryJobConfig.destination_encryption_configuration`. """ - return self._configuration.destination_encryption_configuration + return self.configuration.destination_encryption_configuration @property def dry_run(self): """See :attr:`google.cloud.bigquery.job.QueryJobConfig.dry_run`. """ - return self._configuration.dry_run + return self.configuration.dry_run @property def flatten_results(self): """See :attr:`google.cloud.bigquery.job.QueryJobConfig.flatten_results`. """ - return self._configuration.flatten_results + return self.configuration.flatten_results @property def priority(self): """See :attr:`google.cloud.bigquery.job.QueryJobConfig.priority`. """ - return self._configuration.priority + return self.configuration.priority @property def query(self): @@ -862,90 +865,90 @@ def query_parameters(self): """See :attr:`google.cloud.bigquery.job.QueryJobConfig.query_parameters`. """ - return self._configuration.query_parameters + return self.configuration.query_parameters @property def udf_resources(self): """See :attr:`google.cloud.bigquery.job.QueryJobConfig.udf_resources`. """ - return self._configuration.udf_resources + return self.configuration.udf_resources @property def use_legacy_sql(self): """See :attr:`google.cloud.bigquery.job.QueryJobConfig.use_legacy_sql`. """ - return self._configuration.use_legacy_sql + return self.configuration.use_legacy_sql @property def use_query_cache(self): """See :attr:`google.cloud.bigquery.job.QueryJobConfig.use_query_cache`. """ - return self._configuration.use_query_cache + return self.configuration.use_query_cache @property def write_disposition(self): """See :attr:`google.cloud.bigquery.job.QueryJobConfig.write_disposition`. """ - return self._configuration.write_disposition + return self.configuration.write_disposition @property def maximum_billing_tier(self): """See :attr:`google.cloud.bigquery.job.QueryJobConfig.maximum_billing_tier`. """ - return self._configuration.maximum_billing_tier + return self.configuration.maximum_billing_tier @property def maximum_bytes_billed(self): """See :attr:`google.cloud.bigquery.job.QueryJobConfig.maximum_bytes_billed`. """ - return self._configuration.maximum_bytes_billed + return self.configuration.maximum_bytes_billed @property def range_partitioning(self): """See :attr:`google.cloud.bigquery.job.QueryJobConfig.range_partitioning`. """ - return self._configuration.range_partitioning + return self.configuration.range_partitioning @property def table_definitions(self): """See :attr:`google.cloud.bigquery.job.QueryJobConfig.table_definitions`. """ - return self._configuration.table_definitions + return self.configuration.table_definitions @property def time_partitioning(self): """See :attr:`google.cloud.bigquery.job.QueryJobConfig.time_partitioning`. """ - return self._configuration.time_partitioning + return self.configuration.time_partitioning @property def clustering_fields(self): """See :attr:`google.cloud.bigquery.job.QueryJobConfig.clustering_fields`. """ - return self._configuration.clustering_fields + return self.configuration.clustering_fields @property def schema_update_options(self): """See :attr:`google.cloud.bigquery.job.QueryJobConfig.schema_update_options`. """ - return self._configuration.schema_update_options + return self.configuration.schema_update_options def to_api_repr(self): """Generate a resource for :meth:`_begin`.""" # Use to_api_repr to allow for some configuration properties to be set # automatically. - configuration = self._configuration.to_api_repr() + configuration = self.configuration.to_api_repr() return { "jobReference": self._properties["jobReference"], "configuration": configuration, @@ -1257,7 +1260,7 @@ def _format_for_exception(message: str, query: str): """ template = "{message}\n\n{header}\n\n{ruler}\n{body}\n{ruler}" - lines = query.splitlines() + lines = query.splitlines() if query is not None else [""] max_line_len = max(len(line) for line in lines) header = "-----Query Job SQL Follows-----" diff --git a/packages/google-cloud-bigquery/tests/system/test_client.py b/packages/google-cloud-bigquery/tests/system/test_client.py index 14a9b04d4040..a69bb92c5a4d 100644 --- a/packages/google-cloud-bigquery/tests/system/test_client.py +++ b/packages/google-cloud-bigquery/tests/system/test_client.py @@ -2455,7 +2455,7 @@ def test_table_clones(dataset_id): # Now create a clone before modifying the original table data. copy_config = CopyJobConfig() copy_config.operation_type = OperationType.CLONE - copy_config.write_disposition = bigquery.WriteDisposition.WRITE_TRUNCATE + copy_config.write_disposition = bigquery.WriteDisposition.WRITE_EMPTY copy_job = client.copy_table( sources=table_path_source, diff --git a/packages/google-cloud-bigquery/tests/unit/job/test_base.py b/packages/google-cloud-bigquery/tests/unit/job/test_base.py index ed0dc731b05d..3ff96e8746fe 100644 --- a/packages/google-cloud-bigquery/tests/unit/job/test_base.py +++ b/packages/google-cloud-bigquery/tests/unit/job/test_base.py @@ -432,11 +432,19 @@ def _set_properties_job(self): def test__set_properties_no_stats(self): config = {"test": True} resource = {"configuration": config} + expected = resource.copy() + expected["statistics"] = {} job = self._set_properties_job() + original_resource = job._properties job._set_properties(resource) - self.assertEqual(job._properties, resource) + self.assertEqual(job._properties, expected) + + # Make sure we don't mutate the object used in the request, as that + # makes debugging more difficult and leads to false positives in unit + # tests. + self.assertIsNot(job._properties, original_resource) def test__set_properties_w_creation_time(self): now, millis = self._datetime_and_millis() @@ -546,6 +554,8 @@ def test__begin_defaults(self): }, "configuration": {"test": True}, } + expected = resource.copy() + expected["statistics"] = {} job = self._set_properties_job() builder = job.to_api_repr = mock.Mock() builder.return_value = resource @@ -564,7 +574,7 @@ def test__begin_defaults(self): data=resource, timeout=None, ) - self.assertEqual(job._properties, resource) + self.assertEqual(job._properties, expected) def test__begin_explicit(self): from google.cloud.bigquery.retry import DEFAULT_RETRY @@ -578,6 +588,8 @@ def test__begin_explicit(self): }, "configuration": {"test": True}, } + expected = resource.copy() + expected["statistics"] = {} job = self._set_properties_job() builder = job.to_api_repr = mock.Mock() builder.return_value = resource @@ -598,7 +610,7 @@ def test__begin_explicit(self): data=resource, timeout=7.5, ) - self.assertEqual(job._properties, resource) + self.assertEqual(job._properties, expected) def test_exists_defaults_miss(self): from google.cloud.exceptions import NotFound @@ -685,6 +697,8 @@ def test_reload_defaults(self): }, "configuration": {"test": True}, } + expected = resource.copy() + expected["statistics"] = {} job = self._set_properties_job() job._properties["jobReference"]["location"] = self.LOCATION call_api = job._client._call_api = mock.Mock() @@ -703,7 +717,7 @@ def test_reload_defaults(self): query_params={"location": self.LOCATION}, timeout=None, ) - self.assertEqual(job._properties, resource) + self.assertEqual(job._properties, expected) def test_reload_explicit(self): from google.cloud.bigquery.retry import DEFAULT_RETRY @@ -717,6 +731,8 @@ def test_reload_explicit(self): }, "configuration": {"test": True}, } + expected = resource.copy() + expected["statistics"] = {} job = self._set_properties_job() client = _make_client(project=other_project) call_api = client._call_api = mock.Mock() @@ -736,7 +752,7 @@ def test_reload_explicit(self): query_params={}, timeout=4.2, ) - self.assertEqual(job._properties, resource) + self.assertEqual(job._properties, expected) def test_cancel_defaults(self): resource = { @@ -747,6 +763,8 @@ def test_cancel_defaults(self): }, "configuration": {"test": True}, } + expected = resource.copy() + expected["statistics"] = {} response = {"job": resource} job = self._set_properties_job() job._properties["jobReference"]["location"] = self.LOCATION @@ -764,7 +782,7 @@ def test_cancel_defaults(self): query_params={"location": self.LOCATION}, timeout=None, ) - self.assertEqual(job._properties, resource) + self.assertEqual(job._properties, expected) def test_cancel_explicit(self): other_project = "other-project-234" @@ -776,6 +794,8 @@ def test_cancel_explicit(self): }, "configuration": {"test": True}, } + expected = resource.copy() + expected["statistics"] = {} response = {"job": resource} job = self._set_properties_job() client = _make_client(project=other_project) @@ -797,7 +817,7 @@ def test_cancel_explicit(self): query_params={}, timeout=7.5, ) - self.assertEqual(job._properties, resource) + self.assertEqual(job._properties, expected) def test_cancel_w_custom_retry(self): from google.cloud.bigquery.retry import DEFAULT_RETRY @@ -811,6 +831,8 @@ def test_cancel_w_custom_retry(self): }, "configuration": {"test": True}, } + expected = resource.copy() + expected["statistics"] = {} response = {"job": resource} job = self._set_properties_job() @@ -830,7 +852,7 @@ def test_cancel_w_custom_retry(self): final_attributes.assert_called() self.assertTrue(result) - self.assertEqual(job._properties, resource) + self.assertEqual(job._properties, expected) self.assertEqual( fake_api_request.call_args_list, [ diff --git a/packages/google-cloud-bigquery/tests/unit/job/test_load.py b/packages/google-cloud-bigquery/tests/unit/job/test_load.py index cf3ce1661bc2..c6bbaa2fb622 100644 --- a/packages/google-cloud-bigquery/tests/unit/job/test_load.py +++ b/packages/google-cloud-bigquery/tests/unit/job/test_load.py @@ -451,6 +451,7 @@ def test_begin_w_bound_client(self): conn = make_connection(RESOURCE) client = _make_client(project=self.PROJECT, connection=conn) job = self._make_one(self.JOB_ID, [self.SOURCE1], self.TABLE_REF, client) + job.configuration.reference_file_schema_uri = self.REFERENCE_FILE_SCHEMA_URI path = "/projects/{}/jobs".format(self.PROJECT) with mock.patch( "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" @@ -498,6 +499,7 @@ def test_begin_w_autodetect(self): job = self._make_one( self.JOB_ID, [self.SOURCE1], self.TABLE_REF, client, config ) + job.configuration.reference_file_schema_uri = self.REFERENCE_FILE_SCHEMA_URI with mock.patch( "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" ) as final_attributes: @@ -554,19 +556,18 @@ def test_begin_w_alternate_client(self): "sourceFormat": "CSV", "useAvroLogicalTypes": True, "writeDisposition": WriteDisposition.WRITE_TRUNCATE, + "referenceFileSchemaUri": "gs://path/to/reference", "schema": { "fields": [ { "name": "full_name", "type": "STRING", "mode": "REQUIRED", - "description": None, }, { "name": "age", "type": "INTEGER", "mode": "REQUIRED", - "description": None, }, ] }, diff --git a/packages/google-cloud-bigquery/tests/unit/test_client.py b/packages/google-cloud-bigquery/tests/unit/test_client.py index f38874843ab3..f52eb825a3b0 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_client.py +++ b/packages/google-cloud-bigquery/tests/unit/test_client.py @@ -2743,17 +2743,21 @@ def _create_job_helper(self, job_config): http = object() client = self._make_one(project=self.PROJECT, credentials=creds, _http=http) - RESOURCE = { + resource = { + "jobReference": {"projectId": self.PROJECT, "jobId": "random-id"}, + "configuration": job_config, + } + expected = { "jobReference": {"projectId": self.PROJECT, "jobId": mock.ANY}, "configuration": job_config, } - conn = client._connection = make_connection(RESOURCE) + conn = client._connection = make_connection(resource) client.create_job(job_config=job_config) conn.api_request.assert_called_once_with( method="POST", path="/projects/%s/jobs" % self.PROJECT, - data=RESOURCE, + data=expected, timeout=DEFAULT_TIMEOUT, ) @@ -3156,7 +3160,7 @@ def test_load_table_from_uri(self): self.assertEqual(job_config.to_api_repr(), original_config_copy.to_api_repr()) self.assertIsInstance(job, LoadJob) - self.assertIsInstance(job._configuration, LoadJobConfig) + self.assertIsInstance(job.configuration, LoadJobConfig) self.assertIs(job._client, client) self.assertEqual(job.job_id, JOB) self.assertEqual(list(job.source_uris), [SOURCE_URI]) @@ -3662,7 +3666,7 @@ def test_copy_table_w_source_strings(self): creds = _make_credentials() http = object() client = self._make_one(project=self.PROJECT, credentials=creds, _http=http) - client._connection = make_connection({}) + conn = client._connection = make_connection({}) sources = [ "dataset_wo_proj.some_table", "other_project.other_dataset.other_table", @@ -3674,6 +3678,11 @@ def test_copy_table_w_source_strings(self): job = client.copy_table(sources, destination) + # Replace job with the request instead of response so we can verify those properties. + _, kwargs = conn.api_request.call_args + request = kwargs["data"] + job._properties = request + expected_sources = [ DatasetReference(client.project, "dataset_wo_proj").table("some_table"), DatasetReference("other_project", "other_dataset").table("other_table"), @@ -3750,7 +3759,7 @@ def test_copy_table_w_valid_job_config(self): data=RESOURCE, timeout=DEFAULT_TIMEOUT, ) - self.assertIsInstance(job._configuration, CopyJobConfig) + self.assertIsInstance(job.configuration, CopyJobConfig) # the original config object should not have been modified assert job_config.to_api_repr() == original_config_copy.to_api_repr() From bbf71d734e7c72b4754ff281e0f600091dd77b67 Mon Sep 17 00:00:00 2001 From: "gcf-owl-bot[bot]" <78513119+gcf-owl-bot[bot]@users.noreply.github.com> Date: Thu, 16 Mar 2023 08:28:02 -0400 Subject: [PATCH 1583/2016] chore(deps): Update nox in .kokoro/requirements.in [autoapprove] (#1527) Source-Link: https://github.com/googleapis/synthtool/commit/92006bb3cdc84677aa93c7f5235424ec2b157146 Post-Processor: gcr.io/cloud-devrel-public-resources/owlbot-python:latest@sha256:2e247c7bf5154df7f98cce087a20ca7605e236340c7d6d1a14447e5c06791bd6 Co-authored-by: Owl Bot --- .../.github/.OwlBot.lock.yaml | 2 +- .../google-cloud-bigquery/.kokoro/requirements.in | 2 +- .../google-cloud-bigquery/.kokoro/requirements.txt | 14 +++++--------- 3 files changed, 7 insertions(+), 11 deletions(-) diff --git a/packages/google-cloud-bigquery/.github/.OwlBot.lock.yaml b/packages/google-cloud-bigquery/.github/.OwlBot.lock.yaml index 5fc5daa31783..b8edda51cf46 100644 --- a/packages/google-cloud-bigquery/.github/.OwlBot.lock.yaml +++ b/packages/google-cloud-bigquery/.github/.OwlBot.lock.yaml @@ -13,4 +13,4 @@ # limitations under the License. docker: image: gcr.io/cloud-devrel-public-resources/owlbot-python:latest - digest: sha256:8555f0e37e6261408f792bfd6635102d2da5ad73f8f09bcb24f25e6afb5fac97 + digest: sha256:2e247c7bf5154df7f98cce087a20ca7605e236340c7d6d1a14447e5c06791bd6 diff --git a/packages/google-cloud-bigquery/.kokoro/requirements.in b/packages/google-cloud-bigquery/.kokoro/requirements.in index 882178ce6001..ec867d9fd65a 100644 --- a/packages/google-cloud-bigquery/.kokoro/requirements.in +++ b/packages/google-cloud-bigquery/.kokoro/requirements.in @@ -5,6 +5,6 @@ typing-extensions twine wheel setuptools -nox +nox>=2022.11.21 # required to remove dependency on py charset-normalizer<3 click<8.1.0 diff --git a/packages/google-cloud-bigquery/.kokoro/requirements.txt b/packages/google-cloud-bigquery/.kokoro/requirements.txt index fa99c12908f0..66a2172a76a8 100644 --- a/packages/google-cloud-bigquery/.kokoro/requirements.txt +++ b/packages/google-cloud-bigquery/.kokoro/requirements.txt @@ -1,6 +1,6 @@ # -# This file is autogenerated by pip-compile with python 3.10 -# To update, run: +# This file is autogenerated by pip-compile with Python 3.9 +# by the following command: # # pip-compile --allow-unsafe --generate-hashes requirements.in # @@ -335,9 +335,9 @@ more-itertools==9.0.0 \ --hash=sha256:250e83d7e81d0c87ca6bd942e6aeab8cc9daa6096d12c5308f3f92fa5e5c1f41 \ --hash=sha256:5a6257e40878ef0520b1803990e3e22303a41b5714006c32a3fd8304b26ea1ab # via jaraco-classes -nox==2022.8.7 \ - --hash=sha256:1b894940551dc5c389f9271d197ca5d655d40bdc6ccf93ed6880e4042760a34b \ - --hash=sha256:96cca88779e08282a699d672258ec01eb7c792d35bbbf538c723172bce23212c +nox==2022.11.21 \ + --hash=sha256:0e41a990e290e274cb205a976c4c97ee3c5234441a8132c8c3fd9ea3c22149eb \ + --hash=sha256:e21c31de0711d1274ca585a2c5fde36b1aa962005ba8e9322bf5eeed16dcd684 # via -r requirements.in packaging==21.3 \ --hash=sha256:dd47c42927d89ab911e606518907cc2d3a1f38bbd026385970643f9c5b8ecfeb \ @@ -380,10 +380,6 @@ protobuf==3.20.3 \ # gcp-docuploader # gcp-releasetool # google-api-core -py==1.11.0 \ - --hash=sha256:51c75c4126074b472f746a24399ad32f6053d1b34b68d2fa41e558e6f4a98719 \ - --hash=sha256:607c53218732647dff4acdfcd50cb62615cedf612e72d1724fb1a0cc6405b378 - # via nox pyasn1==0.4.8 \ --hash=sha256:39c7e2ec30515947ff4e87fb6f456dfc6e84857d34be479c9d4a4ba4bf46aa5d \ --hash=sha256:aef77c9fb94a3ac588e87841208bdec464471d9871bd5050a287cc9a475cd0ba From cbc5ccb9fc259981f305be4245c9021ac24ffd71 Mon Sep 17 00:00:00 2001 From: chelsea-lin <124939984+chelsea-lin@users.noreply.github.com> Date: Fri, 17 Mar 2023 13:03:16 -0700 Subject: [PATCH 1584/2016] feat: add default LoadJobConfig to Client (#1526) --- .../google/cloud/bigquery/client.py | 121 +++-- .../google/cloud/bigquery/job/base.py | 6 +- .../tests/system/test_client.py | 8 +- .../tests/unit/job/test_base.py | 29 +- .../tests/unit/test_client.py | 513 ++++++++++++++++++ 5 files changed, 621 insertions(+), 56 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py index a53819cdec02..d8fbfb69eb1e 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py @@ -210,6 +210,9 @@ class Client(ClientWithProject): default_query_job_config (Optional[google.cloud.bigquery.job.QueryJobConfig]): Default ``QueryJobConfig``. Will be merged into job configs passed into the ``query`` method. + default_load_job_config (Optional[google.cloud.bigquery.job.LoadJobConfig]): + Default ``LoadJobConfig``. + Will be merged into job configs passed into the ``load_table_*`` methods. client_info (Optional[google.api_core.client_info.ClientInfo]): The client info used to send a user-agent string along with API requests. If ``None``, then default info will be used. Generally, @@ -235,6 +238,7 @@ def __init__( _http=None, location=None, default_query_job_config=None, + default_load_job_config=None, client_info=None, client_options=None, ) -> None: @@ -260,6 +264,7 @@ def __init__( self._connection = Connection(self, **kw_args) self._location = location self._default_query_job_config = copy.deepcopy(default_query_job_config) + self._default_load_job_config = copy.deepcopy(default_load_job_config) @property def location(self): @@ -277,6 +282,17 @@ def default_query_job_config(self): def default_query_job_config(self, value: QueryJobConfig): self._default_query_job_config = copy.deepcopy(value) + @property + def default_load_job_config(self): + """Default ``LoadJobConfig``. + Will be merged into job configs passed into the ``load_table_*`` methods. + """ + return self._default_load_job_config + + @default_load_job_config.setter + def default_load_job_config(self, value: LoadJobConfig): + self._default_load_job_config = copy.deepcopy(value) + def close(self): """Close the underlying transport objects, releasing system resources. @@ -2330,8 +2346,8 @@ def load_table_from_uri( Raises: TypeError: - If ``job_config`` is not an instance of :class:`~google.cloud.bigquery.job.LoadJobConfig` - class. + If ``job_config`` is not an instance of + :class:`~google.cloud.bigquery.job.LoadJobConfig` class. """ job_id = _make_job_id(job_id, job_id_prefix) @@ -2348,11 +2364,14 @@ def load_table_from_uri( destination = _table_arg_to_table_ref(destination, default_project=self.project) - if job_config: - job_config = copy.deepcopy(job_config) - _verify_job_config_type(job_config, google.cloud.bigquery.job.LoadJobConfig) + if job_config is not None: + _verify_job_config_type(job_config, LoadJobConfig) + else: + job_config = job.LoadJobConfig() - load_job = job.LoadJob(job_ref, source_uris, destination, self, job_config) + new_job_config = job_config._fill_from_default(self._default_load_job_config) + + load_job = job.LoadJob(job_ref, source_uris, destination, self, new_job_config) load_job._begin(retry=retry, timeout=timeout) return load_job @@ -2424,8 +2443,8 @@ def load_table_from_file( mode. TypeError: - If ``job_config`` is not an instance of :class:`~google.cloud.bigquery.job.LoadJobConfig` - class. + If ``job_config`` is not an instance of + :class:`~google.cloud.bigquery.job.LoadJobConfig` class. """ job_id = _make_job_id(job_id, job_id_prefix) @@ -2437,10 +2456,15 @@ def load_table_from_file( destination = _table_arg_to_table_ref(destination, default_project=self.project) job_ref = job._JobReference(job_id, project=project, location=location) - if job_config: - job_config = copy.deepcopy(job_config) - _verify_job_config_type(job_config, google.cloud.bigquery.job.LoadJobConfig) - load_job = job.LoadJob(job_ref, None, destination, self, job_config) + + if job_config is not None: + _verify_job_config_type(job_config, LoadJobConfig) + else: + job_config = job.LoadJobConfig() + + new_job_config = job_config._fill_from_default(self._default_load_job_config) + + load_job = job.LoadJob(job_ref, None, destination, self, new_job_config) job_resource = load_job.to_api_repr() if rewind: @@ -2564,43 +2588,40 @@ def load_table_from_dataframe( If a usable parquet engine cannot be found. This method requires :mod:`pyarrow` to be installed. TypeError: - If ``job_config`` is not an instance of :class:`~google.cloud.bigquery.job.LoadJobConfig` - class. + If ``job_config`` is not an instance of + :class:`~google.cloud.bigquery.job.LoadJobConfig` class. """ job_id = _make_job_id(job_id, job_id_prefix) - if job_config: - _verify_job_config_type(job_config, google.cloud.bigquery.job.LoadJobConfig) - # Make a copy so that the job config isn't modified in-place. - job_config_properties = copy.deepcopy(job_config._properties) - job_config = job.LoadJobConfig() - job_config._properties = job_config_properties - + if job_config is not None: + _verify_job_config_type(job_config, LoadJobConfig) else: job_config = job.LoadJobConfig() + new_job_config = job_config._fill_from_default(self._default_load_job_config) + supported_formats = {job.SourceFormat.CSV, job.SourceFormat.PARQUET} - if job_config.source_format is None: + if new_job_config.source_format is None: # default value - job_config.source_format = job.SourceFormat.PARQUET + new_job_config.source_format = job.SourceFormat.PARQUET if ( - job_config.source_format == job.SourceFormat.PARQUET - and job_config.parquet_options is None + new_job_config.source_format == job.SourceFormat.PARQUET + and new_job_config.parquet_options is None ): parquet_options = ParquetOptions() # default value parquet_options.enable_list_inference = True - job_config.parquet_options = parquet_options + new_job_config.parquet_options = parquet_options - if job_config.source_format not in supported_formats: + if new_job_config.source_format not in supported_formats: raise ValueError( "Got unexpected source_format: '{}'. Currently, only PARQUET and CSV are supported".format( - job_config.source_format + new_job_config.source_format ) ) - if pyarrow is None and job_config.source_format == job.SourceFormat.PARQUET: + if pyarrow is None and new_job_config.source_format == job.SourceFormat.PARQUET: # pyarrow is now the only supported parquet engine. raise ValueError("This method requires pyarrow to be installed") @@ -2611,8 +2632,8 @@ def load_table_from_dataframe( # schema, and check if dataframe schema is compatible with it - except # for WRITE_TRUNCATE jobs, the existing schema does not matter then. if ( - not job_config.schema - and job_config.write_disposition != job.WriteDisposition.WRITE_TRUNCATE + not new_job_config.schema + and new_job_config.write_disposition != job.WriteDisposition.WRITE_TRUNCATE ): try: table = self.get_table(destination) @@ -2623,7 +2644,7 @@ def load_table_from_dataframe( name for name, _ in _pandas_helpers.list_columns_and_indexes(dataframe) ) - job_config.schema = [ + new_job_config.schema = [ # Field description and policy tags are not needed to # serialize a data frame. SchemaField( @@ -2637,11 +2658,11 @@ def load_table_from_dataframe( if field.name in columns_and_indexes ] - job_config.schema = _pandas_helpers.dataframe_to_bq_schema( - dataframe, job_config.schema + new_job_config.schema = _pandas_helpers.dataframe_to_bq_schema( + dataframe, new_job_config.schema ) - if not job_config.schema: + if not new_job_config.schema: # the schema could not be fully detected warnings.warn( "Schema could not be detected for all columns. Loading from a " @@ -2652,13 +2673,13 @@ def load_table_from_dataframe( ) tmpfd, tmppath = tempfile.mkstemp( - suffix="_job_{}.{}".format(job_id[:8], job_config.source_format.lower()) + suffix="_job_{}.{}".format(job_id[:8], new_job_config.source_format.lower()) ) os.close(tmpfd) try: - if job_config.source_format == job.SourceFormat.PARQUET: + if new_job_config.source_format == job.SourceFormat.PARQUET: if _PYARROW_VERSION in _PYARROW_BAD_VERSIONS: msg = ( "Loading dataframe data in PARQUET format with pyarrow " @@ -2669,13 +2690,13 @@ def load_table_from_dataframe( ) warnings.warn(msg, category=RuntimeWarning) - if job_config.schema: + if new_job_config.schema: if parquet_compression == "snappy": # adjust the default value parquet_compression = parquet_compression.upper() _pandas_helpers.dataframe_to_parquet( dataframe, - job_config.schema, + new_job_config.schema, tmppath, parquet_compression=parquet_compression, parquet_use_compliant_nested_type=True, @@ -2715,7 +2736,7 @@ def load_table_from_dataframe( job_id_prefix=job_id_prefix, location=location, project=project, - job_config=job_config, + job_config=new_job_config, timeout=timeout, ) @@ -2791,22 +2812,22 @@ def load_table_from_json( Raises: TypeError: - If ``job_config`` is not an instance of :class:`~google.cloud.bigquery.job.LoadJobConfig` - class. + If ``job_config`` is not an instance of + :class:`~google.cloud.bigquery.job.LoadJobConfig` class. """ job_id = _make_job_id(job_id, job_id_prefix) - if job_config: - _verify_job_config_type(job_config, google.cloud.bigquery.job.LoadJobConfig) - # Make a copy so that the job config isn't modified in-place. - job_config = copy.deepcopy(job_config) + if job_config is not None: + _verify_job_config_type(job_config, LoadJobConfig) else: job_config = job.LoadJobConfig() - job_config.source_format = job.SourceFormat.NEWLINE_DELIMITED_JSON + new_job_config = job_config._fill_from_default(self._default_load_job_config) + + new_job_config.source_format = job.SourceFormat.NEWLINE_DELIMITED_JSON - if job_config.schema is None: - job_config.autodetect = True + if new_job_config.schema is None: + new_job_config.autodetect = True if project is None: project = self.project @@ -2828,7 +2849,7 @@ def load_table_from_json( job_id_prefix=job_id_prefix, location=location, project=project, - job_config=job_config, + job_config=new_job_config, timeout=timeout, ) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/job/base.py b/packages/google-cloud-bigquery/google/cloud/bigquery/job/base.py index 55e80b2ebc6e..4073e0137754 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/job/base.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/job/base.py @@ -269,7 +269,7 @@ def to_api_repr(self) -> dict: """ return copy.deepcopy(self._properties) - def _fill_from_default(self, default_job_config): + def _fill_from_default(self, default_job_config=None): """Merge this job config with a default job config. The keys in this object take precedence over the keys in the default @@ -283,6 +283,10 @@ def _fill_from_default(self, default_job_config): Returns: google.cloud.bigquery.job._JobConfig: A new (merged) job config. """ + if not default_job_config: + new_job_config = copy.deepcopy(self) + return new_job_config + if self._job_type != default_job_config._job_type: raise TypeError( "attempted to merge two incompatible job types: " diff --git a/packages/google-cloud-bigquery/tests/system/test_client.py b/packages/google-cloud-bigquery/tests/system/test_client.py index a69bb92c5a4d..1437328a88ce 100644 --- a/packages/google-cloud-bigquery/tests/system/test_client.py +++ b/packages/google-cloud-bigquery/tests/system/test_client.py @@ -2319,7 +2319,7 @@ def _table_exists(t): return False -def test_dbapi_create_view(dataset_id): +def test_dbapi_create_view(dataset_id: str): query = f""" CREATE VIEW {dataset_id}.dbapi_create_view @@ -2332,7 +2332,7 @@ def test_dbapi_create_view(dataset_id): assert Config.CURSOR.rowcount == 0, "expected 0 rows" -def test_parameterized_types_round_trip(dataset_id): +def test_parameterized_types_round_trip(dataset_id: str): client = Config.CLIENT table_id = f"{dataset_id}.test_parameterized_types_round_trip" fields = ( @@ -2358,7 +2358,7 @@ def test_parameterized_types_round_trip(dataset_id): assert tuple(s._key()[:2] for s in table2.schema) == fields -def test_table_snapshots(dataset_id): +def test_table_snapshots(dataset_id: str): from google.cloud.bigquery import CopyJobConfig from google.cloud.bigquery import OperationType @@ -2429,7 +2429,7 @@ def test_table_snapshots(dataset_id): assert rows == [(1, "one"), (2, "two")] -def test_table_clones(dataset_id): +def test_table_clones(dataset_id: str): from google.cloud.bigquery import CopyJobConfig from google.cloud.bigquery import OperationType diff --git a/packages/google-cloud-bigquery/tests/unit/job/test_base.py b/packages/google-cloud-bigquery/tests/unit/job/test_base.py index 3ff96e8746fe..a9760aa9b8ff 100644 --- a/packages/google-cloud-bigquery/tests/unit/job/test_base.py +++ b/packages/google-cloud-bigquery/tests/unit/job/test_base.py @@ -1104,7 +1104,7 @@ def test_ctor_with_unknown_property_raises_error(self): config = self._make_one() config.wrong_name = None - def test_fill_from_default(self): + def test_fill_query_job_config_from_default(self): from google.cloud.bigquery import QueryJobConfig job_config = QueryJobConfig() @@ -1120,6 +1120,22 @@ def test_fill_from_default(self): self.assertTrue(final_job_config.use_query_cache) self.assertEqual(final_job_config.maximum_bytes_billed, 1000) + def test_fill_load_job_from_default(self): + from google.cloud.bigquery import LoadJobConfig + + job_config = LoadJobConfig() + job_config.create_session = True + job_config.encoding = "UTF-8" + + default_job_config = LoadJobConfig() + default_job_config.ignore_unknown_values = True + default_job_config.encoding = "ISO-8859-1" + + final_job_config = job_config._fill_from_default(default_job_config) + self.assertTrue(final_job_config.create_session) + self.assertTrue(final_job_config.ignore_unknown_values) + self.assertEqual(final_job_config.encoding, "UTF-8") + def test_fill_from_default_conflict(self): from google.cloud.bigquery import QueryJobConfig @@ -1132,6 +1148,17 @@ def test_fill_from_default_conflict(self): with self.assertRaises(TypeError): basic_job_config._fill_from_default(conflicting_job_config) + def test_fill_from_empty_default_conflict(self): + from google.cloud.bigquery import QueryJobConfig + + job_config = QueryJobConfig() + job_config.dry_run = True + job_config.maximum_bytes_billed = 1000 + + final_job_config = job_config._fill_from_default(default_job_config=None) + self.assertTrue(final_job_config.dry_run) + self.assertEqual(final_job_config.maximum_bytes_billed, 1000) + @mock.patch("google.cloud.bigquery._helpers._get_sub_prop") def test__get_sub_prop_wo_default(self, _get_sub_prop): job_config = self._make_one() diff --git a/packages/google-cloud-bigquery/tests/unit/test_client.py b/packages/google-cloud-bigquery/tests/unit/test_client.py index f52eb825a3b0..c155e2bc65dc 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_client.py +++ b/packages/google-cloud-bigquery/tests/unit/test_client.py @@ -239,6 +239,31 @@ def test_ctor_w_query_job_config(self): self.assertIsInstance(client._default_query_job_config, QueryJobConfig) self.assertTrue(client._default_query_job_config.dry_run) + def test_ctor_w_load_job_config(self): + from google.cloud.bigquery._http import Connection + from google.cloud.bigquery import LoadJobConfig + + creds = _make_credentials() + http = object() + location = "us-central" + job_config = LoadJobConfig() + job_config.create_session = True + + client = self._make_one( + project=self.PROJECT, + credentials=creds, + _http=http, + location=location, + default_load_job_config=job_config, + ) + self.assertIsInstance(client._connection, Connection) + self.assertIs(client._connection.credentials, creds) + self.assertIs(client._connection.http, http) + self.assertEqual(client.location, location) + + self.assertIsInstance(client._default_load_job_config, LoadJobConfig) + self.assertTrue(client._default_load_job_config.create_session) + def test__call_api_applying_custom_retry_on_timeout(self): from concurrent.futures import TimeoutError from google.cloud.bigquery.retry import DEFAULT_RETRY @@ -426,6 +451,19 @@ def test_default_query_job_config(self): client.default_query_job_config = job_config self.assertIsInstance(client.default_query_job_config, QueryJobConfig) + def test_default_load_job_config(self): + from google.cloud.bigquery import LoadJobConfig + + creds = _make_credentials() + http = object() + client = self._make_one(project=self.PROJECT, credentials=creds, _http=http) + self.assertIsNone(client.default_load_job_config) + + job_config = LoadJobConfig() + job_config.create_session = True + client.default_load_job_config = job_config + self.assertIsInstance(client.default_load_job_config, LoadJobConfig) + def test_get_service_account_email(self): path = "/projects/%s/serviceAccount" % (self.PROJECT,) creds = _make_credentials() @@ -3282,6 +3320,146 @@ def test_load_table_from_uri_w_invalid_job_config(self): self.assertIn("Expected an instance of LoadJobConfig", exc.exception.args[0]) + def test_load_table_from_uri_w_explicit_job_config(self): + from google.cloud.bigquery.job import LoadJobConfig + + JOB = "job_name" + DESTINATION = "destination_table" + SOURCE_URI = "http://example.com/source.csv" + RESOURCE = { + "jobReference": {"jobId": JOB, "projectId": self.PROJECT}, + "configuration": { + "load": { + "sourceUris": [SOURCE_URI], + "destinationTable": { + "projectId": self.PROJECT, + "datasetId": self.DS_ID, + "tableId": DESTINATION, + }, + "createSession": True, + "encoding": "UTF-8", + } + }, + } + + creds = _make_credentials() + http = object() + + client = self._make_one(project=self.PROJECT, credentials=creds, _http=http) + conn = client._connection = make_connection(RESOURCE) + destination = DatasetReference(self.PROJECT, self.DS_ID).table(DESTINATION) + + job_config = LoadJobConfig() + job_config.create_session = True + job_config.encoding = "UTF-8" + client.load_table_from_uri( + SOURCE_URI, destination, job_id=JOB, job_config=job_config + ) + + # Check that load_table_from_uri actually starts the job. + conn.api_request.assert_called_once_with( + method="POST", + path="/projects/%s/jobs" % self.PROJECT, + data=RESOURCE, + timeout=DEFAULT_TIMEOUT, + ) + + def test_load_table_from_uri_w_explicit_job_config_override(self): + from google.cloud.bigquery.job import LoadJobConfig + + JOB = "job_name" + DESTINATION = "destination_table" + SOURCE_URI = "http://example.com/source.csv" + RESOURCE = { + "jobReference": {"jobId": JOB, "projectId": self.PROJECT}, + "configuration": { + "load": { + "sourceUris": [SOURCE_URI], + "destinationTable": { + "projectId": self.PROJECT, + "datasetId": self.DS_ID, + "tableId": DESTINATION, + }, + "createSession": False, + "encoding": "ISO-8859-1", + } + }, + } + + creds = _make_credentials() + http = object() + default_job_config = LoadJobConfig() + default_job_config.create_session = True + default_job_config.encoding = "ISO-8859-1" + + client = self._make_one( + project=self.PROJECT, + credentials=creds, + _http=http, + default_load_job_config=default_job_config, + ) + conn = client._connection = make_connection(RESOURCE) + destination = DatasetReference(self.PROJECT, self.DS_ID).table(DESTINATION) + + job_config = LoadJobConfig() + job_config.create_session = False + client.load_table_from_uri( + SOURCE_URI, destination, job_id=JOB, job_config=job_config + ) + + # Check that load_table_from_uri actually starts the job. + conn.api_request.assert_called_once_with( + method="POST", + path="/projects/%s/jobs" % self.PROJECT, + data=RESOURCE, + timeout=DEFAULT_TIMEOUT, + ) + + def test_load_table_from_uri_w_default_load_config(self): + from google.cloud.bigquery.job import LoadJobConfig + + JOB = "job_name" + DESTINATION = "destination_table" + SOURCE_URI = "http://example.com/source.csv" + RESOURCE = { + "jobReference": {"jobId": JOB, "projectId": self.PROJECT}, + "configuration": { + "load": { + "sourceUris": [SOURCE_URI], + "destinationTable": { + "projectId": self.PROJECT, + "datasetId": self.DS_ID, + "tableId": DESTINATION, + }, + "encoding": "ISO-8859-1", + } + }, + } + + creds = _make_credentials() + http = object() + default_job_config = LoadJobConfig() + default_job_config.encoding = "ISO-8859-1" + + client = self._make_one( + project=self.PROJECT, + credentials=creds, + _http=http, + default_load_job_config=default_job_config, + ) + conn = client._connection = make_connection(RESOURCE) + destination = DatasetReference(self.PROJECT, self.DS_ID).table(DESTINATION) + + client.load_table_from_uri(SOURCE_URI, destination, job_id=JOB) + + # Check that load_table_from_uri actually starts the job. + conn.api_request.assert_called_once_with( + method="POST", + path="/projects/%s/jobs" % self.PROJECT, + data=RESOURCE, + timeout=DEFAULT_TIMEOUT, + ) + @staticmethod def _mock_requests_response(status_code, headers, content=b""): return mock.Mock( @@ -6940,6 +7118,118 @@ def test_load_table_from_file_w_invalid_job_config(self): err_msg = str(exc.value) assert "Expected an instance of LoadJobConfig" in err_msg + def test_load_table_from_file_w_explicit_job_config(self): + from google.cloud.bigquery.client import _DEFAULT_NUM_RETRIES + + client = self._make_client() + file_obj = self._make_file_obj() + + job_config = self._make_config() + job_config.create_session = True + job_config.encoding = "UTF-8" + do_upload_patch = self._make_do_upload_patch( + client, "_do_resumable_upload", self.EXPECTED_CONFIGURATION + ) + with do_upload_patch as do_upload: + client.load_table_from_file( + file_obj, + self.TABLE_REF, + job_id="job_id", + project=self.PROJECT, + location=self.LOCATION, + job_config=job_config, + ) + + expected_resource = copy.deepcopy(self.EXPECTED_CONFIGURATION) + expected_resource["jobReference"]["location"] = self.LOCATION + expected_resource["jobReference"]["projectId"] = self.PROJECT + expected_resource["configuration"]["load"]["createSession"] = True + expected_resource["configuration"]["load"]["encoding"] = "UTF-8" + do_upload.assert_called_once_with( + file_obj, + expected_resource, + _DEFAULT_NUM_RETRIES, + DEFAULT_TIMEOUT, + project=self.PROJECT, + ) + + def test_load_table_from_file_w_explicit_job_config_override(self): + from google.cloud.bigquery.client import _DEFAULT_NUM_RETRIES + from google.cloud.bigquery.job import LoadJobConfig + + client = self._make_client() + file_obj = self._make_file_obj() + + default_job_config = LoadJobConfig() + default_job_config.create_session = True + default_job_config.encoding = "ISO-8859-1" + client.default_load_job_config = default_job_config + + job_config = self._make_config() + job_config.create_session = False + do_upload_patch = self._make_do_upload_patch( + client, "_do_resumable_upload", self.EXPECTED_CONFIGURATION + ) + with do_upload_patch as do_upload: + client.load_table_from_file( + file_obj, + self.TABLE_REF, + job_id="job_id", + project=self.PROJECT, + location=self.LOCATION, + job_config=job_config, + ) + + expected_resource = copy.deepcopy(self.EXPECTED_CONFIGURATION) + expected_resource["jobReference"]["location"] = self.LOCATION + expected_resource["jobReference"]["projectId"] = self.PROJECT + expected_resource["configuration"]["load"]["createSession"] = False + expected_resource["configuration"]["load"]["encoding"] = "ISO-8859-1" + do_upload.assert_called_once_with( + file_obj, + expected_resource, + _DEFAULT_NUM_RETRIES, + DEFAULT_TIMEOUT, + project=self.PROJECT, + ) + + def test_load_table_from_file_w_default_load_config(self): + from google.cloud.bigquery.client import _DEFAULT_NUM_RETRIES + from google.cloud.bigquery.job import LoadJobConfig + + client = self._make_client() + file_obj = self._make_file_obj() + + default_job_config = LoadJobConfig() + default_job_config.encoding = "ISO-8859-1" + client.default_load_job_config = default_job_config + + job_config = self._make_config() + do_upload_patch = self._make_do_upload_patch( + client, "_do_resumable_upload", self.EXPECTED_CONFIGURATION + ) + with do_upload_patch as do_upload: + client.load_table_from_file( + file_obj, + self.TABLE_REF, + job_id="job_id", + project=self.PROJECT, + location=self.LOCATION, + job_config=job_config, + ) + + expected_resource = copy.deepcopy(self.EXPECTED_CONFIGURATION) + expected_resource["jobReference"]["location"] = self.LOCATION + expected_resource["jobReference"]["projectId"] = self.PROJECT + expected_resource["configuration"]["load"]["encoding"] = "ISO-8859-1" + do_upload.assert_called_once_with( + file_obj, + expected_resource, + _DEFAULT_NUM_RETRIES, + DEFAULT_TIMEOUT, + project=self.PROJECT, + ) + @unittest.skipIf(pandas is None, "Requires `pandas`") @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_load_table_from_dataframe(self): @@ -7304,6 +7594,117 @@ def test_load_table_from_dataframe_w_list_inference_none(self): # the original config object should not have been modified assert job_config.to_api_repr() == original_config_copy.to_api_repr() + @unittest.skipIf(pandas is None, "Requires `pandas`") + @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") + def test_load_table_from_dataframe_w_explicit_job_config_override(self): + from google.cloud.bigquery.client import _DEFAULT_NUM_RETRIES + from google.cloud.bigquery import job + from google.cloud.bigquery.schema import SchemaField + + client = self._make_client() + records = [{"id": 1, "age": 100}, {"id": 2, "age": 60}] + dataframe = pandas.DataFrame(records) + + client.default_load_job_config = job.LoadJobConfig( + encoding="ISO-8859-1", + write_disposition=job.WriteDisposition.WRITE_TRUNCATE, + source_format=job.SourceFormat.PARQUET, + ) + + job_config = job.LoadJobConfig( + write_disposition=job.WriteDisposition.WRITE_APPEND, + source_format=job.SourceFormat.PARQUET, + ) + original_config_copy = copy.deepcopy(job_config) + + get_table_patch = mock.patch( + "google.cloud.bigquery.client.Client.get_table", + autospec=True, + return_value=mock.Mock( + schema=[SchemaField("id", "INTEGER"), SchemaField("age", "INTEGER")] + ), + ) + load_patch = mock.patch( + "google.cloud.bigquery.client.Client.load_table_from_file", autospec=True + ) + with load_patch as load_table_from_file, get_table_patch: + client.load_table_from_dataframe( + dataframe, self.TABLE_REF, job_config=job_config, location=self.LOCATION + ) + + load_table_from_file.assert_called_once_with( + client, + mock.ANY, + self.TABLE_REF, + num_retries=_DEFAULT_NUM_RETRIES, + rewind=True, + size=mock.ANY, + job_id=mock.ANY, + job_id_prefix=None, + location=self.LOCATION, + project=None, + job_config=mock.ANY, + timeout=DEFAULT_TIMEOUT, + ) + + sent_config = load_table_from_file.mock_calls[0][2]["job_config"] + assert sent_config.write_disposition == job.WriteDisposition.WRITE_APPEND + assert sent_config.source_format == job.SourceFormat.PARQUET + assert sent_config.encoding == "ISO-8859-1" + + # the original config object should not have been modified + assert job_config.to_api_repr() == original_config_copy.to_api_repr() + + @unittest.skipIf(pandas is None, "Requires `pandas`") + @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") + def test_load_table_from_dataframe_w_default_load_config(self): + from google.cloud.bigquery.client import _DEFAULT_NUM_RETRIES + from google.cloud.bigquery import job + from google.cloud.bigquery.schema import SchemaField + + client = self._make_client() + records = [{"id": 1, "age": 100}, {"id": 2, "age": 60}] + dataframe = pandas.DataFrame(records) + + client.default_load_job_config = job.LoadJobConfig( + write_disposition=job.WriteDisposition.WRITE_TRUNCATE, + source_format=job.SourceFormat.PARQUET, + ) + + get_table_patch = mock.patch( + "google.cloud.bigquery.client.Client.get_table", + autospec=True, + return_value=mock.Mock( + schema=[SchemaField("id", "INTEGER"), SchemaField("age", "INTEGER")] + ), + ) + load_patch = mock.patch( + "google.cloud.bigquery.client.Client.load_table_from_file", autospec=True + ) + with load_patch as load_table_from_file, get_table_patch: + client.load_table_from_dataframe( + dataframe, self.TABLE_REF, location=self.LOCATION + ) + + load_table_from_file.assert_called_once_with( + client, + mock.ANY, + self.TABLE_REF, + num_retries=_DEFAULT_NUM_RETRIES, + rewind=True, + size=mock.ANY, + job_id=mock.ANY, + job_id_prefix=None, + location=self.LOCATION, + project=None, + job_config=mock.ANY, + timeout=DEFAULT_TIMEOUT, + ) + + sent_config = load_table_from_file.mock_calls[0][2]["job_config"] + assert sent_config.write_disposition == job.WriteDisposition.WRITE_TRUNCATE + assert sent_config.source_format == job.SourceFormat.PARQUET + @unittest.skipIf(pandas is None, "Requires `pandas`") @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_load_table_from_dataframe_w_list_inference_false(self): @@ -8377,6 +8778,118 @@ def test_load_table_from_json_w_invalid_job_config(self): err_msg = str(exc.value) assert "Expected an instance of LoadJobConfig" in err_msg + def test_load_table_from_json_w_explicit_job_config_override(self): + from google.cloud.bigquery import job + from google.cloud.bigquery.client import _DEFAULT_NUM_RETRIES + from google.cloud.bigquery.schema import SchemaField + + client = self._make_client() + + json_rows = [ + {"name": "One", "age": 11, "birthday": "2008-09-10", "adult": False}, + {"name": "Two", "age": 22, "birthday": "1997-08-09", "adult": True}, + ] + + schema = [ + SchemaField("name", "STRING"), + SchemaField("age", "INTEGER"), + SchemaField("adult", "BOOLEAN"), + ] + client.default_load_job_config = job.LoadJobConfig( + schema=schema, encoding="ISO-8859-1" + ) + + override_schema = schema + override_schema[0] = SchemaField("username", "STRING") + job_config = job.LoadJobConfig(schema=override_schema) + original_config_copy = copy.deepcopy(job_config) + + load_patch = mock.patch( + "google.cloud.bigquery.client.Client.load_table_from_file", autospec=True + ) + + with load_patch as load_table_from_file: + client.load_table_from_json( + json_rows, + self.TABLE_REF, + job_config=job_config, + project="project-x", + location="EU", + ) + + load_table_from_file.assert_called_once_with( + client, + mock.ANY, + self.TABLE_REF, + size=mock.ANY, + num_retries=_DEFAULT_NUM_RETRIES, + job_id=mock.ANY, + job_id_prefix=None, + location="EU", + project="project-x", + job_config=mock.ANY, + timeout=DEFAULT_TIMEOUT, + ) + + sent_config = load_table_from_file.mock_calls[0][2]["job_config"] + assert sent_config.source_format == job.SourceFormat.NEWLINE_DELIMITED_JSON + assert sent_config.schema == override_schema + assert sent_config.encoding == "ISO-8859-1" + assert not sent_config.autodetect + + # the original config object should not have been modified + assert job_config.to_api_repr() == original_config_copy.to_api_repr() + + def test_load_table_from_json_w_default_job_config(self): + from google.cloud.bigquery import job + from google.cloud.bigquery.client import _DEFAULT_NUM_RETRIES + from google.cloud.bigquery.schema import SchemaField + + client = self._make_client() + + json_rows = [ + {"name": "One", "age": 11, "birthday": "2008-09-10", "adult": False}, + {"name": "Two", "age": 22, "birthday": "1997-08-09", "adult": True}, + ] + + schema = [ + SchemaField("name", "STRING"), + SchemaField("age", "INTEGER"), + SchemaField("adult", "BOOLEAN"), + ] + client.default_load_job_config = job.LoadJobConfig(schema=schema) + + load_patch = mock.patch( + "google.cloud.bigquery.client.Client.load_table_from_file", autospec=True + ) + + with load_patch as load_table_from_file: + client.load_table_from_json( + json_rows, + self.TABLE_REF, + job_config=None, + project="project-x", + location="EU", + ) + + load_table_from_file.assert_called_once_with( + client, + mock.ANY, + self.TABLE_REF, + size=mock.ANY, + num_retries=_DEFAULT_NUM_RETRIES, + job_id=mock.ANY, + job_id_prefix=None, + location="EU", + project="project-x", + job_config=mock.ANY, + timeout=DEFAULT_TIMEOUT, + ) + + sent_config = load_table_from_file.mock_calls[0][2]["job_config"] + assert sent_config.source_format == job.SourceFormat.NEWLINE_DELIMITED_JSON + assert sent_config.schema == schema + def test_load_table_from_json_unicode_emoji_data_case(self): from google.cloud.bigquery.client import _DEFAULT_NUM_RETRIES From 8b4a5746f2546fd2eda7511d84a5a17e22977a7b Mon Sep 17 00:00:00 2001 From: chelsea-lin <124939984+chelsea-lin@users.noreply.github.com> Date: Thu, 23 Mar 2023 11:17:18 -0700 Subject: [PATCH 1585/2016] feat: add bool, int, float, string dtype to to_dataframe (#1529) --- .../google/cloud/bigquery/_pandas_helpers.py | 25 +++- .../google/cloud/bigquery/enums.py | 14 +++ .../google/cloud/bigquery/job/query.py | 55 ++++++++- .../google/cloud/bigquery/table.py | 99 ++++++++++++++- .../tests/unit/test_table.py | 113 ++++++++++++++++++ 5 files changed, 294 insertions(+), 12 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/_pandas_helpers.py b/packages/google-cloud-bigquery/google/cloud/bigquery/_pandas_helpers.py index 3d7e7d793838..dfd966c647f7 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/_pandas_helpers.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/_pandas_helpers.py @@ -21,6 +21,7 @@ import logging import queue import warnings +from typing import Any, Union from packaging import version @@ -283,7 +284,13 @@ def bq_to_arrow_schema(bq_schema): return pyarrow.schema(arrow_fields) -def default_types_mapper(date_as_object: bool = False): +def default_types_mapper( + date_as_object: bool = False, + bool_dtype: Union[Any, None] = None, + int_dtype: Union[Any, None] = None, + float_dtype: Union[Any, None] = None, + string_dtype: Union[Any, None] = None, +): """Create a mapping from pyarrow types to pandas types. This overrides the pandas defaults to use null-safe extension types where @@ -299,8 +306,17 @@ def default_types_mapper(date_as_object: bool = False): """ def types_mapper(arrow_data_type): - if pyarrow.types.is_boolean(arrow_data_type): - return pandas.BooleanDtype() + if bool_dtype is not None and pyarrow.types.is_boolean(arrow_data_type): + return bool_dtype + + elif int_dtype is not None and pyarrow.types.is_integer(arrow_data_type): + return int_dtype + + elif float_dtype is not None and pyarrow.types.is_floating(arrow_data_type): + return float_dtype + + elif string_dtype is not None and pyarrow.types.is_string(arrow_data_type): + return string_dtype elif ( # If date_as_object is True, we know some DATE columns are @@ -310,9 +326,6 @@ def types_mapper(arrow_data_type): ): return db_dtypes.DateDtype() - elif pyarrow.types.is_integer(arrow_data_type): - return pandas.Int64Dtype() - elif pyarrow.types.is_time(arrow_data_type): return db_dtypes.TimeDtype() diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/enums.py b/packages/google-cloud-bigquery/google/cloud/bigquery/enums.py index 45d43a2a7c62..e4e3d22fcdef 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/enums.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/enums.py @@ -77,6 +77,20 @@ class CreateDisposition(object): returned in the job result.""" +class DefaultPandasDTypes(enum.Enum): + """Default Pandas DataFrem DTypes to convert BigQuery data. These + Sentinel values are used instead of None to maintain backward compatibility, + and allow Pandas package is not available. For more information: + https://stackoverflow.com/a/60605919/101923 + """ + + BOOL_DTYPE = object() + """Specifies default bool dtype""" + + INT_DTYPE = object() + """Specifies default integer dtype""" + + class DestinationFormat(object): """The exported file format. The default value is :attr:`CSV`. diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/job/query.py b/packages/google-cloud-bigquery/google/cloud/bigquery/job/query.py index e6d6d682d5c7..e4807cc63b7e 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/job/query.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/job/query.py @@ -28,7 +28,7 @@ from google.cloud.bigquery.dataset import DatasetListItem from google.cloud.bigquery.dataset import DatasetReference from google.cloud.bigquery.encryption_configuration import EncryptionConfiguration -from google.cloud.bigquery.enums import KeyResultStatementKind +from google.cloud.bigquery.enums import KeyResultStatementKind, DefaultPandasDTypes from google.cloud.bigquery.external_config import ExternalConfig from google.cloud.bigquery import _helpers from google.cloud.bigquery.query import ( @@ -53,6 +53,11 @@ from google.cloud.bigquery.job.base import _JobConfig from google.cloud.bigquery.job.base import _JobReference +try: + import pandas # type: ignore +except ImportError: # pragma: NO COVER + pandas = None + if typing.TYPE_CHECKING: # pragma: NO COVER # Assumption: type checks are only used by library developers and CI environments # that have all optional dependencies installed, thus no conditional imports. @@ -1620,6 +1625,10 @@ def to_dataframe( create_bqstorage_client: bool = True, max_results: Optional[int] = None, geography_as_object: bool = False, + bool_dtype: Union[Any, None] = DefaultPandasDTypes.BOOL_DTYPE, + int_dtype: Union[Any, None] = DefaultPandasDTypes.INT_DTYPE, + float_dtype: Union[Any, None] = None, + string_dtype: Union[Any, None] = None, ) -> "pandas.DataFrame": """Return a pandas DataFrame from a QueryJob @@ -1672,6 +1681,46 @@ def to_dataframe( .. versionadded:: 2.24.0 + bool_dtype (Optional[pandas.Series.dtype, None]): + If set, indicate a pandas ExtensionDtype (e.g. ``pandas.BooleanDtype()``) + to convert BigQuery Boolean type, instead of relying on the default + ``pandas.BooleanDtype()``. If you explicitly set the value to ``None``, + then the data type will be ``numpy.dtype("bool")``. BigQuery Boolean + type can be found at: + https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#boolean_type + + .. versionadded:: 3.7.1 + + int_dtype (Optional[pandas.Series.dtype, None]): + If set, indicate a pandas ExtensionDtype (e.g. ``pandas.Int64Dtype()``) + to convert BigQuery Integer types, instead of relying on the default + ``pandas.Int64Dtype()``. If you explicitly set the value to ``None``, + then the data type will be ``numpy.dtype("int64")``. A list of BigQuery + Integer types can be found at: + https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#integer_types + + .. versionadded:: 3.7.1 + + float_dtype (Optional[pandas.Series.dtype, None]): + If set, indicate a pandas ExtensionDtype (e.g. ``pandas.Float32Dtype()``) + to convert BigQuery Float type, instead of relying on the default + ``numpy.dtype("float64")``. If you explicitly set the value to ``None``, + then the data type will be ``numpy.dtype("float64")``. BigQuery Float + type can be found at: + https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#floating_point_types + + .. versionadded:: 3.7.1 + + string_dtype (Optional[pandas.Series.dtype, None]): + If set, indicate a pandas ExtensionDtype (e.g. ``pandas.StringDtype()``) to + convert BigQuery String type, instead of relying on the default + ``numpy.dtype("object")``. If you explicitly set the value to ``None``, + then the data type will be ``numpy.dtype("object")``. BigQuery String + type can be found at: + https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#string_type + + .. versionadded:: 3.7.1 + Returns: pandas.DataFrame: A :class:`~pandas.DataFrame` populated with row data @@ -1694,6 +1743,10 @@ def to_dataframe( progress_bar_type=progress_bar_type, create_bqstorage_client=create_bqstorage_client, geography_as_object=geography_as_object, + bool_dtype=bool_dtype, + int_dtype=int_dtype, + float_dtype=float_dtype, + string_dtype=string_dtype, ) # If changing the signature of this method, make sure to apply the same diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py index a2110a9fbaf1..93b0da67f639 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py @@ -34,6 +34,11 @@ except ImportError: # pragma: NO COVER pyarrow = None +try: + import db_dtypes # type: ignore +except ImportError: # pragma: NO COVER + db_dtypes = None + try: import geopandas # type: ignore except ImportError: @@ -55,6 +60,7 @@ import google.cloud._helpers # type: ignore from google.cloud.bigquery import _helpers from google.cloud.bigquery import _pandas_helpers +from google.cloud.bigquery.enums import DefaultPandasDTypes from google.cloud.bigquery.exceptions import LegacyBigQueryStorageError from google.cloud.bigquery.schema import _build_schema_resource from google.cloud.bigquery.schema import _parse_schema_resource @@ -88,6 +94,11 @@ _TABLE_HAS_NO_SCHEMA = 'Table has no schema: call "client.get_table()"' +_NO_SUPPORTED_DTYPE = ( + "The dtype cannot to be converted to a pandas ExtensionArray " + "because the necessary `__from_arrow__` attribute is missing." +) + def _reference_getter(table): """A :class:`~google.cloud.bigquery.table.TableReference` pointing to @@ -1920,6 +1931,10 @@ def to_dataframe( progress_bar_type: str = None, create_bqstorage_client: bool = True, geography_as_object: bool = False, + bool_dtype: Union[Any, None] = DefaultPandasDTypes.BOOL_DTYPE, + int_dtype: Union[Any, None] = DefaultPandasDTypes.INT_DTYPE, + float_dtype: Union[Any, None] = None, + string_dtype: Union[Any, None] = None, ) -> "pandas.DataFrame": """Create a pandas DataFrame by loading all pages of a query. @@ -1958,6 +1973,7 @@ def to_dataframe( progress bar as a graphical dialog box. .. versionadded:: 1.11.0 + create_bqstorage_client (Optional[bool]): If ``True`` (default), create a BigQuery Storage API client using the default API settings. The BigQuery Storage API @@ -1975,6 +1991,46 @@ def to_dataframe( .. versionadded:: 2.24.0 + bool_dtype (Optional[pandas.Series.dtype, None]): + If set, indicate a pandas ExtensionDtype (e.g. ``pandas.BooleanDtype()``) + to convert BigQuery Boolean type, instead of relying on the default + ``pandas.BooleanDtype()``. If you explicitly set the value to ``None``, + then the data type will be ``numpy.dtype("bool")``. BigQuery Boolean + type can be found at: + https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#boolean_type + + .. versionadded:: 3.7.1 + + int_dtype (Optional[pandas.Series.dtype, None]): + If set, indicate a pandas ExtensionDtype (e.g. ``pandas.Int64Dtype()``) + to convert BigQuery Integer types, instead of relying on the default + ``pandas.Int64Dtype()``. If you explicitly set the value to ``None``, + then the data type will be ``numpy.dtype("int64")``. A list of BigQuery + Integer types can be found at: + https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#integer_types + + .. versionadded:: 3.7.1 + + float_dtype (Optional[pandas.Series.dtype, None]): + If set, indicate a pandas ExtensionDtype (e.g. ``pandas.Float32Dtype()``) + to convert BigQuery Float type, instead of relying on the default + ``numpy.dtype("float64")``. If you explicitly set the value to ``None``, + then the data type will be ``numpy.dtype("float64")``. BigQuery Float + type can be found at: + https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#floating_point_types + + .. versionadded:: 3.7.1 + + string_dtype (Optional[pandas.Series.dtype, None]): + If set, indicate a pandas ExtensionDtype (e.g. ``pandas.StringDtype()``) to + convert BigQuery String type, instead of relying on the default + ``numpy.dtype("object")``. If you explicitly set the value to ``None``, + then the data type will be ``numpy.dtype("object")``. BigQuery String + type can be found at: + https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#string_type + + .. versionadded:: 3.7.1 + Returns: pandas.DataFrame: A :class:`~pandas.DataFrame` populated with row data and column @@ -1987,7 +2043,9 @@ def to_dataframe( the :mod:`google.cloud.bigquery_storage_v1` module is required but cannot be imported. Also if `geography_as_object` is `True`, but the - :mod:`shapely` library cannot be imported. + :mod:`shapely` library cannot be imported. Also if + `bool_dtype`, `int_dtype` or other dtype parameters + is not supported dtype. """ _pandas_helpers.verify_pandas_imports() @@ -1995,6 +2053,24 @@ def to_dataframe( if geography_as_object and shapely is None: raise ValueError(_NO_SHAPELY_ERROR) + if bool_dtype is DefaultPandasDTypes.BOOL_DTYPE: + bool_dtype = pandas.BooleanDtype() + + if int_dtype is DefaultPandasDTypes.INT_DTYPE: + int_dtype = pandas.Int64Dtype() + + if bool_dtype is not None and not hasattr(bool_dtype, "__from_arrow__"): + raise ValueError("bool_dtype", _NO_SUPPORTED_DTYPE) + + if int_dtype is not None and not hasattr(int_dtype, "__from_arrow__"): + raise ValueError("int_dtype", _NO_SUPPORTED_DTYPE) + + if float_dtype is not None and not hasattr(float_dtype, "__from_arrow__"): + raise ValueError("float_dtype", _NO_SUPPORTED_DTYPE) + + if string_dtype is not None and not hasattr(string_dtype, "__from_arrow__"): + raise ValueError("string_dtype", _NO_SUPPORTED_DTYPE) + if dtypes is None: dtypes = {} @@ -2019,15 +2095,15 @@ def to_dataframe( for col in record_batch # Type can be date32 or date64 (plus units). # See: https://arrow.apache.org/docs/python/api/datatypes.html - if str(col.type).startswith("date") + if pyarrow.types.is_date(col.type) ) timestamp_as_object = not all( self.__can_cast_timestamp_ns(col) for col in record_batch - # Type can be timestamp (plus units and time zone). + # Type can be datetime and timestamp (plus units and time zone). # See: https://arrow.apache.org/docs/python/api/datatypes.html - if str(col.type).startswith("timestamp") + if pyarrow.types.is_timestamp(col.type) ) if len(record_batch) > 0: @@ -2036,7 +2112,11 @@ def to_dataframe( timestamp_as_object=timestamp_as_object, integer_object_nulls=True, types_mapper=_pandas_helpers.default_types_mapper( - date_as_object=date_as_object + date_as_object=date_as_object, + bool_dtype=bool_dtype, + int_dtype=int_dtype, + float_dtype=float_dtype, + string_dtype=string_dtype, ), ) else: @@ -2233,6 +2313,10 @@ def to_dataframe( progress_bar_type=None, create_bqstorage_client=True, geography_as_object=False, + bool_dtype=None, + int_dtype=None, + float_dtype=None, + string_dtype=None, ) -> "pandas.DataFrame": """Create an empty dataframe. @@ -2241,6 +2325,11 @@ def to_dataframe( dtypes (Any): Ignored. Added for compatibility with RowIterator. progress_bar_type (Any): Ignored. Added for compatibility with RowIterator. create_bqstorage_client (bool): Ignored. Added for compatibility with RowIterator. + geography_as_object (bool): Ignored. Added for compatibility with RowIterator. + bool_dtype (Any): Ignored. Added for compatibility with RowIterator. + int_dtype (Any): Ignored. Added for compatibility with RowIterator. + float_dtype (Any): Ignored. Added for compatibility with RowIterator. + string_dtype (Any): Ignored. Added for compatibility with RowIterator. Returns: pandas.DataFrame: An empty :class:`~pandas.DataFrame`. diff --git a/packages/google-cloud-bigquery/tests/unit/test_table.py b/packages/google-cloud-bigquery/tests/unit/test_table.py index a79b988810b3..22c7c048dac7 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_table.py +++ b/packages/google-cloud-bigquery/tests/unit/test_table.py @@ -55,6 +55,11 @@ except (ImportError, AttributeError): # pragma: NO COVER pandas = None +try: + import db_dtypes # type: ignore +except ImportError: # pragma: NO COVER + db_dtypes = None + try: import geopandas except (ImportError, AttributeError): # pragma: NO COVER @@ -3456,6 +3461,114 @@ def test_to_dataframe_w_various_types_nullable(self): self.assertIsInstance(row.complete, bool) self.assertIsInstance(row.date, datetime.date) + @unittest.skipIf(pandas is None, "Requires `pandas`") + def test_to_dataframe_w_dtypes_mapper(self): + from google.cloud.bigquery.schema import SchemaField + + schema = [ + SchemaField("name", "STRING"), + SchemaField("complete", "BOOL"), + SchemaField("age", "INTEGER"), + SchemaField("seconds", "INT64"), + SchemaField("miles", "FLOAT64"), + ] + row_data = [ + ["Phred Phlyntstone", "true", "32", "23000", "1.77"], + ["Bharney Rhubble", "false", "33", "454000", "6.66"], + ["Wylma Phlyntstone", "true", "29", "341000", "2.0"], + ] + rows = [{"f": [{"v": field} for field in row]} for row in row_data] + path = "/foo" + api_request = mock.Mock(return_value={"rows": rows}) + row_iterator = self._make_one(_mock_client(), api_request, path, schema) + + df = row_iterator.to_dataframe( + create_bqstorage_client=False, + bool_dtype=pandas.BooleanDtype(), + int_dtype=pandas.Int32Dtype(), + float_dtype=pandas.StringDtype(), + string_dtype=pandas.StringDtype(), + ) + + self.assertIsInstance(df, pandas.DataFrame) + self.assertEqual(df.complete.dtype.name, "boolean") + self.assertEqual(df.age.dtype.name, "Int32") + self.assertEqual(df.seconds.dtype.name, "Int32") + self.assertEqual(df.miles.dtype.name, "string") + self.assertEqual(df.name.dtype.name, "string") + + @unittest.skipIf(pandas is None, "Requires `pandas`") + def test_to_dataframe_w_none_dtypes_mapper(self): + from google.cloud.bigquery.schema import SchemaField + + schema = [ + SchemaField("name", "STRING"), + SchemaField("complete", "BOOL"), + SchemaField("age", "INTEGER"), + SchemaField("seconds", "INT64"), + SchemaField("miles", "FLOAT64"), + ] + row_data = [ + ["Phred Phlyntstone", "true", "32", "23000", "1.77"], + ["Bharney Rhubble", "false", "33", "454000", "6.66"], + ["Wylma Phlyntstone", "true", "29", "341000", "2.0"], + ] + rows = [{"f": [{"v": field} for field in row]} for row in row_data] + path = "/foo" + api_request = mock.Mock(return_value={"rows": rows}) + row_iterator = self._make_one(_mock_client(), api_request, path, schema) + + df = row_iterator.to_dataframe( + create_bqstorage_client=False, + bool_dtype=None, + int_dtype=None, + float_dtype=None, + string_dtype=None, + ) + self.assertIsInstance(df, pandas.DataFrame) + self.assertEqual(df.complete.dtype.name, "bool") + self.assertEqual(df.age.dtype.name, "int64") + self.assertEqual(df.seconds.dtype.name, "int64") + self.assertEqual(df.miles.dtype.name, "float64") + self.assertEqual(df.name.dtype.name, "object") + + @unittest.skipIf(pandas is None, "Requires `pandas`") + def test_to_dataframe_w_unsupported_dtypes_mapper(self): + import numpy + from google.cloud.bigquery.schema import SchemaField + + schema = [ + SchemaField("name", "STRING"), + ] + row_data = [ + ["Phred Phlyntstone"], + ] + rows = [{"f": [{"v": field} for field in row]} for row in row_data] + path = "/foo" + api_request = mock.Mock(return_value={"rows": rows}) + row_iterator = self._make_one(_mock_client(), api_request, path, schema) + + with self.assertRaises(ValueError): + row_iterator.to_dataframe( + create_bqstorage_client=False, + bool_dtype=numpy.dtype("bool"), + ) + with self.assertRaises(ValueError): + row_iterator.to_dataframe( + create_bqstorage_client=False, + int_dtype=numpy.dtype("int64"), + ) + with self.assertRaises(ValueError): + row_iterator.to_dataframe( + create_bqstorage_client=False, + float_dtype=numpy.dtype("float64"), + ) + with self.assertRaises(ValueError): + row_iterator.to_dataframe( + create_bqstorage_client=False, + string_dtype=numpy.dtype("object"), + ) + @unittest.skipIf(pandas is None, "Requires `pandas`") def test_to_dataframe_column_dtypes(self): from google.cloud.bigquery.schema import SchemaField From 06128932cc924a444458f42b064483e18f927858 Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Thu, 23 Mar 2023 20:17:02 -0500 Subject: [PATCH 1586/2016] fix: loosen ipywidgets restrictions further to address ipython compatibility issues (#1531) * fix: loosen ipywidgets restrictions further to address ipython compatibility issues * include ipywidgets in prerelease deps * show all package versions * add ipykernel dependency * ipykernel in noxfile * oops --- packages/google-cloud-bigquery/noxfile.py | 6 +++++- packages/google-cloud-bigquery/setup.py | 10 ++++++++-- .../google-cloud-bigquery/testing/constraints-3.7.txt | 3 ++- 3 files changed, 15 insertions(+), 4 deletions(-) diff --git a/packages/google-cloud-bigquery/noxfile.py b/packages/google-cloud-bigquery/noxfile.py index f6283abf9130..8464e498079b 100644 --- a/packages/google-cloud-bigquery/noxfile.py +++ b/packages/google-cloud-bigquery/noxfile.py @@ -303,6 +303,10 @@ def prerelease_deps(session): session.install( "--pre", "--upgrade", + "IPython", + "ipykernel", + "ipywidgets", + "tqdm", "git+https://github.com/pypa/packaging.git", ) @@ -321,7 +325,6 @@ def prerelease_deps(session): "google-cloud-datacatalog", "google-cloud-storage", "google-cloud-testutils", - "IPython", "mock", "psutil", "pytest", @@ -356,6 +359,7 @@ def prerelease_deps(session): session.run("python", "-c", "import grpc; print(grpc.__version__)") session.run("python", "-c", "import pandas; print(pandas.__version__)") session.run("python", "-c", "import pyarrow; print(pyarrow.__version__)") + session.run("python", "-m", "pip", "freeze") # Run all tests, except a few samples tests which require extra dependencies. session.run("py.test", "tests/unit") diff --git a/packages/google-cloud-bigquery/setup.py b/packages/google-cloud-bigquery/setup.py index 2119e0191dd1..51cb6dc75389 100644 --- a/packages/google-cloud-bigquery/setup.py +++ b/packages/google-cloud-bigquery/setup.py @@ -67,9 +67,15 @@ pyarrow_dependency, "db-dtypes>=0.3.0,<2.0.0dev", ], - "ipywidgets": ["ipywidgets>=7.7.0,<8.0.1"], + "ipywidgets": [ + "ipywidgets>=7.7.0", + "ipykernel>=6.0.0", + ], "geopandas": ["geopandas>=0.9.0, <1.0dev", "Shapely>=1.8.4, <2.0dev"], - "ipython": ["ipython>=7.0.1,!=8.1.0"], + "ipython": [ + "ipython>=7.23.1,!=8.1.0", + "ipykernel>=6.0.0", + ], "tqdm": ["tqdm >= 4.7.4, <5.0.0dev"], "opentelemetry": [ "opentelemetry-api >= 1.1.0", diff --git a/packages/google-cloud-bigquery/testing/constraints-3.7.txt b/packages/google-cloud-bigquery/testing/constraints-3.7.txt index 746656b585ac..c94d80abf557 100644 --- a/packages/google-cloud-bigquery/testing/constraints-3.7.txt +++ b/packages/google-cloud-bigquery/testing/constraints-3.7.txt @@ -13,7 +13,8 @@ google-cloud-core==1.6.0 google-resumable-media==0.6.0 grpcio==1.47.0 ipywidgets==7.7.1 -ipython==7.0.1 +ipython==7.23.1 +ipykernel==6.0.0 opentelemetry-api==1.1.0 opentelemetry-instrumentation==0.20b0 opentelemetry-sdk==1.1.0 From d417940f9cec17f1de92d5dc3023709a29486cc4 Mon Sep 17 00:00:00 2001 From: "release-please[bot]" <55107282+release-please[bot]@users.noreply.github.com> Date: Mon, 27 Mar 2023 09:03:04 -0500 Subject: [PATCH 1587/2016] chore(main): release 3.8.0 (#1525) Co-authored-by: release-please[bot] <55107282+release-please[bot]@users.noreply.github.com> --- packages/google-cloud-bigquery/CHANGELOG.md | 14 ++++++++++++++ .../google/cloud/bigquery/version.py | 2 +- 2 files changed, 15 insertions(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/CHANGELOG.md b/packages/google-cloud-bigquery/CHANGELOG.md index 5eda8912d0f4..4c3fc839a10c 100644 --- a/packages/google-cloud-bigquery/CHANGELOG.md +++ b/packages/google-cloud-bigquery/CHANGELOG.md @@ -5,6 +5,20 @@ [1]: https://pypi.org/project/google-cloud-bigquery/#history +## [3.8.0](https://github.com/googleapis/python-bigquery/compare/v3.7.0...v3.8.0) (2023-03-24) + + +### Features + +* Add bool, int, float, string dtype to to_dataframe ([#1529](https://github.com/googleapis/python-bigquery/issues/1529)) ([5e4465d](https://github.com/googleapis/python-bigquery/commit/5e4465d0975f54e8da885006686d9431ff9c5653)) +* Add default LoadJobConfig to Client ([#1526](https://github.com/googleapis/python-bigquery/issues/1526)) ([a2520ca](https://github.com/googleapis/python-bigquery/commit/a2520cabf7ec6bcb923c21e338188f1c10dc4d5d)) +* Expose configuration property on CopyJob, ExtractJob, LoadJob, QueryJob ([#1521](https://github.com/googleapis/python-bigquery/issues/1521)) ([8270a10](https://github.com/googleapis/python-bigquery/commit/8270a10df8f40750a7ac541a1781a71d7e79ce67)) + + +### Bug Fixes + +* Loosen ipywidgets restrictions further to address ipython compatibility issues ([#1531](https://github.com/googleapis/python-bigquery/issues/1531)) ([50e5026](https://github.com/googleapis/python-bigquery/commit/50e502674807b9771d7e26c0e784539bed8f9da6)) + ## [3.7.0](https://github.com/googleapis/python-bigquery/compare/v3.6.0...v3.7.0) (2023-03-06) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/version.py b/packages/google-cloud-bigquery/google/cloud/bigquery/version.py index dc87b3c5b7f3..8f4ba4810885 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/version.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/version.py @@ -12,4 +12,4 @@ # See the License for the specific language governing permissions and # limitations under the License. -__version__ = "3.7.0" +__version__ = "3.8.0" From 77f28f4ef34b64d81009ca448be181fdc4d00be8 Mon Sep 17 00:00:00 2001 From: chelsea-lin <124939984+chelsea-lin@users.noreply.github.com> Date: Tue, 28 Mar 2023 07:54:39 -0700 Subject: [PATCH 1588/2016] fix: keyerror when the load_table_from_dataframe accesses a unmapped dtype dataframe index (#1535) --- .../google/cloud/bigquery/_pandas_helpers.py | 4 +- .../tests/unit/test__pandas_helpers.py | 106 +++++++++++++----- 2 files changed, 82 insertions(+), 28 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/_pandas_helpers.py b/packages/google-cloud-bigquery/google/cloud/bigquery/_pandas_helpers.py index dfd966c647f7..601aa13df307 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/_pandas_helpers.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/_pandas_helpers.py @@ -481,7 +481,7 @@ def dataframe_to_bq_schema(dataframe, bq_schema): # pandas dtype. bq_type = _PANDAS_DTYPE_TO_BQ.get(dtype.name) if bq_type is None: - sample_data = _first_valid(dataframe[column]) + sample_data = _first_valid(dataframe.reset_index()[column]) if ( isinstance(sample_data, _BaseGeometry) and sample_data is not None # Paranoia @@ -544,7 +544,7 @@ def augment_schema(dataframe, current_bq_schema): augmented_schema.append(field) continue - arrow_table = pyarrow.array(dataframe[field.name]) + arrow_table = pyarrow.array(dataframe.reset_index()[field.name]) if pyarrow.types.is_list(arrow_table.type): # `pyarrow.ListType` diff --git a/packages/google-cloud-bigquery/tests/unit/test__pandas_helpers.py b/packages/google-cloud-bigquery/tests/unit/test__pandas_helpers.py index 885cd318c67c..07bf03f66feb 100644 --- a/packages/google-cloud-bigquery/tests/unit/test__pandas_helpers.py +++ b/packages/google-cloud-bigquery/tests/unit/test__pandas_helpers.py @@ -930,32 +930,6 @@ def test_list_columns_and_indexes_with_multiindex(module_under_test): assert columns_and_indexes == expected -@pytest.mark.skipif(pandas is None, reason="Requires `pandas`") -def test_dataframe_to_bq_schema_dict_sequence(module_under_test): - df_data = collections.OrderedDict( - [ - ("str_column", ["hello", "world"]), - ("int_column", [42, 8]), - ("bool_column", [True, False]), - ] - ) - dataframe = pandas.DataFrame(df_data) - - dict_schema = [ - {"name": "str_column", "type": "STRING", "mode": "NULLABLE"}, - {"name": "bool_column", "type": "BOOL", "mode": "REQUIRED"}, - ] - - returned_schema = module_under_test.dataframe_to_bq_schema(dataframe, dict_schema) - - expected_schema = ( - schema.SchemaField("str_column", "STRING", "NULLABLE"), - schema.SchemaField("int_column", "INTEGER", "NULLABLE"), - schema.SchemaField("bool_column", "BOOL", "REQUIRED"), - ) - assert returned_schema == expected_schema - - @pytest.mark.skipif(pandas is None, reason="Requires `pandas`") def test_dataframe_to_arrow_with_multiindex(module_under_test): bq_schema = ( @@ -1190,6 +1164,86 @@ def test_dataframe_to_parquet_compression_method(module_under_test): assert call_args.kwargs.get("compression") == "ZSTD" +@pytest.mark.skipif(pandas is None, reason="Requires `pandas`") +def test_dataframe_to_bq_schema_w_named_index(module_under_test): + df_data = collections.OrderedDict( + [ + ("str_column", ["hello", "world"]), + ("int_column", [42, 8]), + ("bool_column", [True, False]), + ] + ) + index = pandas.Index(["a", "b"], name="str_index") + dataframe = pandas.DataFrame(df_data, index=index) + + returned_schema = module_under_test.dataframe_to_bq_schema(dataframe, []) + + expected_schema = ( + schema.SchemaField("str_index", "STRING", "NULLABLE"), + schema.SchemaField("str_column", "STRING", "NULLABLE"), + schema.SchemaField("int_column", "INTEGER", "NULLABLE"), + schema.SchemaField("bool_column", "BOOLEAN", "NULLABLE"), + ) + assert returned_schema == expected_schema + + +@pytest.mark.skipif(pandas is None, reason="Requires `pandas`") +def test_dataframe_to_bq_schema_w_multiindex(module_under_test): + df_data = collections.OrderedDict( + [ + ("str_column", ["hello", "world"]), + ("int_column", [42, 8]), + ("bool_column", [True, False]), + ] + ) + index = pandas.MultiIndex.from_tuples( + [ + ("a", 0, datetime.datetime(1999, 12, 31, 23, 59, 59, 999999)), + ("a", 0, datetime.datetime(2000, 1, 1, 0, 0, 0)), + ], + names=["str_index", "int_index", "dt_index"], + ) + dataframe = pandas.DataFrame(df_data, index=index) + + returned_schema = module_under_test.dataframe_to_bq_schema(dataframe, []) + + expected_schema = ( + schema.SchemaField("str_index", "STRING", "NULLABLE"), + schema.SchemaField("int_index", "INTEGER", "NULLABLE"), + schema.SchemaField("dt_index", "DATETIME", "NULLABLE"), + schema.SchemaField("str_column", "STRING", "NULLABLE"), + schema.SchemaField("int_column", "INTEGER", "NULLABLE"), + schema.SchemaField("bool_column", "BOOLEAN", "NULLABLE"), + ) + assert returned_schema == expected_schema + + +@pytest.mark.skipif(pandas is None, reason="Requires `pandas`") +def test_dataframe_to_bq_schema_w_bq_schema(module_under_test): + df_data = collections.OrderedDict( + [ + ("str_column", ["hello", "world"]), + ("int_column", [42, 8]), + ("bool_column", [True, False]), + ] + ) + dataframe = pandas.DataFrame(df_data) + + dict_schema = [ + {"name": "str_column", "type": "STRING", "mode": "NULLABLE"}, + {"name": "bool_column", "type": "BOOL", "mode": "REQUIRED"}, + ] + + returned_schema = module_under_test.dataframe_to_bq_schema(dataframe, dict_schema) + + expected_schema = ( + schema.SchemaField("str_column", "STRING", "NULLABLE"), + schema.SchemaField("int_column", "INTEGER", "NULLABLE"), + schema.SchemaField("bool_column", "BOOL", "REQUIRED"), + ) + assert returned_schema == expected_schema + + @pytest.mark.skipif(pandas is None, reason="Requires `pandas`") def test_dataframe_to_bq_schema_fallback_needed_wo_pyarrow(module_under_test): dataframe = pandas.DataFrame( From f31c839840c79ab503aff8928245c734bd3a6116 Mon Sep 17 00:00:00 2001 From: r1b Date: Tue, 28 Mar 2023 11:37:04 -0400 Subject: [PATCH 1589/2016] feat: expose query job on dbapi cursor (#1520) Co-authored-by: Tim Swast --- .../google/cloud/bigquery/dbapi/cursor.py | 10 ++++++++ .../tests/unit/test_dbapi_cursor.py | 23 +++++++++++++++++++ 2 files changed, 33 insertions(+) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/dbapi/cursor.py b/packages/google-cloud-bigquery/google/cloud/bigquery/dbapi/cursor.py index 03f3b72ca017..0dc8f56ab0e4 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/dbapi/cursor.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/dbapi/cursor.py @@ -79,6 +79,16 @@ def __init__(self, connection): self._query_job = None self._closed = False + @property + def query_job(self): + """google.cloud.bigquery.job.query.QueryJob: The query job created by + the last ``execute*()`` call. + + .. note:: + If the last ``execute*()`` call was ``executemany()``, this is the + last job created by ``executemany()``.""" + return self._query_job + def close(self): """Mark the cursor as closed, preventing its further use.""" self._closed = True diff --git a/packages/google-cloud-bigquery/tests/unit/test_dbapi_cursor.py b/packages/google-cloud-bigquery/tests/unit/test_dbapi_cursor.py index b550bbce0efe..fc6ea388208e 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_dbapi_cursor.py +++ b/packages/google-cloud-bigquery/tests/unit/test_dbapi_cursor.py @@ -662,6 +662,29 @@ def test_is_iterable(self): "Iterating again over the same results should produce no rows.", ) + def test_query_job_wo_execute(self): + from google.cloud.bigquery import dbapi + + connection = dbapi.connect(self._mock_client()) + cursor = connection.cursor() + self.assertIsNone(cursor.query_job) + + def test_query_job_w_execute(self): + from google.cloud.bigquery import dbapi, QueryJob + + connection = dbapi.connect(self._mock_client()) + cursor = connection.cursor() + cursor.execute("SELECT 1;") + self.assertIsInstance(cursor.query_job, QueryJob) + + def test_query_job_w_executemany(self): + from google.cloud.bigquery import dbapi, QueryJob + + connection = dbapi.connect(self._mock_client()) + cursor = connection.cursor() + cursor.executemany("SELECT %s;", (("1",), ("2",))) + self.assertIsInstance(cursor.query_job, QueryJob) + def test__format_operation_w_dict(self): from google.cloud.bigquery.dbapi import cursor From 7df2f87fdb47337e0cfad8c67ee307e4cb47a058 Mon Sep 17 00:00:00 2001 From: "release-please[bot]" <55107282+release-please[bot]@users.noreply.github.com> Date: Tue, 28 Mar 2023 09:14:52 -0700 Subject: [PATCH 1590/2016] chore(main): release 3.9.0 (#1537) Co-authored-by: release-please[bot] <55107282+release-please[bot]@users.noreply.github.com> --- packages/google-cloud-bigquery/CHANGELOG.md | 12 ++++++++++++ .../google/cloud/bigquery/version.py | 2 +- 2 files changed, 13 insertions(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/CHANGELOG.md b/packages/google-cloud-bigquery/CHANGELOG.md index 4c3fc839a10c..5bbde01f468b 100644 --- a/packages/google-cloud-bigquery/CHANGELOG.md +++ b/packages/google-cloud-bigquery/CHANGELOG.md @@ -5,6 +5,18 @@ [1]: https://pypi.org/project/google-cloud-bigquery/#history +## [3.9.0](https://github.com/googleapis/python-bigquery/compare/v3.8.0...v3.9.0) (2023-03-28) + + +### Features + +* Expose query job on dbapi cursor ([#1520](https://github.com/googleapis/python-bigquery/issues/1520)) ([339eb0e](https://github.com/googleapis/python-bigquery/commit/339eb0e86040a7c30d140800f34810ffc6a7c76b)) + + +### Bug Fixes + +* Keyerror when the load_table_from_dataframe accesses a unmapped dtype dataframe index ([#1535](https://github.com/googleapis/python-bigquery/issues/1535)) ([a69348a](https://github.com/googleapis/python-bigquery/commit/a69348a558f48cfc61d03d3e8bb7f9aee48bea86)) + ## [3.8.0](https://github.com/googleapis/python-bigquery/compare/v3.7.0...v3.8.0) (2023-03-24) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/version.py b/packages/google-cloud-bigquery/google/cloud/bigquery/version.py index 8f4ba4810885..0bc2753575f8 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/version.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/version.py @@ -12,4 +12,4 @@ # See the License for the specific language governing permissions and # limitations under the License. -__version__ = "3.8.0" +__version__ = "3.9.0" From b449868def47feb9e1460f7673ed398112a20ced Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Wed, 29 Mar 2023 21:05:16 -0500 Subject: [PATCH 1591/2016] chore: update tests to be compatible with pandas 2.0 (#1538) * chore: update tests to be compatible with pandas 2.0 * use StringDtype without storage argument * avoid Float64Dtype on older pandas --- .../google-cloud-bigquery/tests/unit/test_table.py | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/packages/google-cloud-bigquery/tests/unit/test_table.py b/packages/google-cloud-bigquery/tests/unit/test_table.py index 22c7c048dac7..9bdd7b5969e1 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_table.py +++ b/packages/google-cloud-bigquery/tests/unit/test_table.py @@ -3486,7 +3486,11 @@ def test_to_dataframe_w_dtypes_mapper(self): create_bqstorage_client=False, bool_dtype=pandas.BooleanDtype(), int_dtype=pandas.Int32Dtype(), - float_dtype=pandas.StringDtype(), + float_dtype=( + pandas.Float64Dtype() + if hasattr(pandas, "Float64Dtype") + else pandas.StringDtype() + ), string_dtype=pandas.StringDtype(), ) @@ -3494,7 +3498,10 @@ def test_to_dataframe_w_dtypes_mapper(self): self.assertEqual(df.complete.dtype.name, "boolean") self.assertEqual(df.age.dtype.name, "Int32") self.assertEqual(df.seconds.dtype.name, "Int32") - self.assertEqual(df.miles.dtype.name, "string") + self.assertEqual( + df.miles.dtype.name, + "Float64" if hasattr(pandas, "Float64Dtype") else "string", + ) self.assertEqual(df.name.dtype.name, "string") @unittest.skipIf(pandas is None, "Requires `pandas`") From f2c9f539c753e2a283bca074c22647bfbb5356d5 Mon Sep 17 00:00:00 2001 From: Mend Renovate Date: Thu, 30 Mar 2023 16:43:02 +0100 Subject: [PATCH 1592/2016] chore(deps): update all dependencies (#1522) --- .../samples/geography/requirements.txt | 18 +++++++++--------- .../samples/magics/requirements.txt | 12 ++++++------ .../samples/snippets/requirements.txt | 14 +++++++------- 3 files changed, 22 insertions(+), 22 deletions(-) diff --git a/packages/google-cloud-bigquery/samples/geography/requirements.txt b/packages/google-cloud-bigquery/samples/geography/requirements.txt index 75964dbe15fb..6cb0b6384bf6 100644 --- a/packages/google-cloud-bigquery/samples/geography/requirements.txt +++ b/packages/google-cloud-bigquery/samples/geography/requirements.txt @@ -6,20 +6,20 @@ click==8.1.3 click-plugins==1.1.1 cligj==0.7.2 dataclasses==0.8; python_version < '3.7' -db-dtypes==1.0.5 -Fiona==1.9.1 +db-dtypes==1.1.0 +Fiona==1.9.2 geojson==3.0.1 geopandas===0.10.2; python_version == '3.7' geopandas==0.12.2; python_version >= '3.8' google-api-core==2.11.0 -google-auth==2.16.2 -google-cloud-bigquery==3.6.0 -google-cloud-bigquery-storage==2.19.0 +google-auth==2.17.0 +google-cloud-bigquery==3.9.0 +google-cloud-bigquery-storage==2.19.1 google-cloud-core==2.3.2 google-crc32c==1.5.0 google-resumable-media==2.4.1 -googleapis-common-protos==1.58.0 -grpcio==1.51.3 +googleapis-common-protos==1.59.0 +grpcio==1.53.0 idna==3.4 libcst==0.4.9 munch==2.5.0 @@ -34,7 +34,7 @@ pyasn1-modules==0.2.8 pycparser==2.21 pyparsing==3.0.9 python-dateutil==2.8.2 -pytz==2022.7.1 +pytz==2023.3 PyYAML==6.0 requests==2.28.2 rsa==4.9 @@ -42,4 +42,4 @@ Shapely==2.0.1 six==1.16.0 typing-extensions==4.5.0 typing-inspect==0.8.0 -urllib3==1.26.14 +urllib3==1.26.15 diff --git a/packages/google-cloud-bigquery/samples/magics/requirements.txt b/packages/google-cloud-bigquery/samples/magics/requirements.txt index 55b828f1b8c8..35be2c5da372 100644 --- a/packages/google-cloud-bigquery/samples/magics/requirements.txt +++ b/packages/google-cloud-bigquery/samples/magics/requirements.txt @@ -1,15 +1,15 @@ -db-dtypes==1.0.5 -google-cloud-bigquery-storage==2.19.0 +db-dtypes==1.1.0 +google-cloud-bigquery-storage==2.19.1 google-auth-oauthlib==1.0.0 -grpcio==1.51.3 -ipywidgets==8.0.4 +grpcio==1.53.0 +ipywidgets==8.0.6 ipython===7.31.1; python_version == '3.7' ipython===8.0.1; python_version == '3.8' -ipython==8.11.0; python_version >= '3.9' +ipython==8.12.0; python_version >= '3.9' matplotlib===3.5.3; python_version == '3.7' matplotlib==3.7.1; python_version >= '3.8' pandas===1.3.5; python_version == '3.7' pandas==1.5.3; python_version >= '3.8' pyarrow==11.0.0 -pytz==2022.7.1 +pytz==2023.3 typing-extensions==4.5.0 diff --git a/packages/google-cloud-bigquery/samples/snippets/requirements.txt b/packages/google-cloud-bigquery/samples/snippets/requirements.txt index 6c6b17ea85d1..c8a15abafbcb 100644 --- a/packages/google-cloud-bigquery/samples/snippets/requirements.txt +++ b/packages/google-cloud-bigquery/samples/snippets/requirements.txt @@ -1,16 +1,16 @@ -db-dtypes==1.0.5 -google-cloud-bigquery==3.6.0 -google-cloud-bigquery-storage==2.19.0 +db-dtypes==1.1.0 +google-cloud-bigquery==3.9.0 +google-cloud-bigquery-storage==2.19.1 google-auth-oauthlib==1.0.0 -grpcio==1.51.3 -ipywidgets==8.0.4 +grpcio==1.53.0 +ipywidgets==8.0.6 ipython===7.31.1; python_version == '3.7' ipython===8.0.1; python_version == '3.8' -ipython==8.11.0; python_version >= '3.9' +ipython==8.12.0; python_version >= '3.9' matplotlib===3.5.3; python_version == '3.7' matplotlib==3.7.1; python_version >= '3.8' pandas===1.3.5; python_version == '3.7' pandas==1.5.3; python_version >= '3.8' pyarrow==11.0.0 -pytz==2022.7.1 +pytz==2023.3 typing-extensions==4.5.0 From 473e8b6bc2a6416d604e734fbe04a84ea55eca1d Mon Sep 17 00:00:00 2001 From: Chalmer Lowe Date: Tue, 4 Apr 2023 10:20:10 -0400 Subject: [PATCH 1593/2016] chore: updates minimum version of bqstorage (#1542) * chore: updates minimum version of bqstorage * removes unneeded test * updates linting, removes unneeded comment --- packages/google-cloud-bigquery/setup.py | 2 +- packages/google-cloud-bigquery/testing/constraints-3.7.txt | 2 +- .../tests/unit/job/test_query_pandas.py | 6 ------ 3 files changed, 2 insertions(+), 8 deletions(-) diff --git a/packages/google-cloud-bigquery/setup.py b/packages/google-cloud-bigquery/setup.py index 51cb6dc75389..08106f6940ec 100644 --- a/packages/google-cloud-bigquery/setup.py +++ b/packages/google-cloud-bigquery/setup.py @@ -51,7 +51,7 @@ # Keep the no-op bqstorage extra for backward compatibility. # See: https://github.com/googleapis/python-bigquery/issues/757 "bqstorage": [ - "google-cloud-bigquery-storage >= 2.0.0, <3.0.0dev", + "google-cloud-bigquery-storage >= 2.6.0, <3.0.0dev", # Due to an issue in pip's dependency resolver, the `grpc` extra is not # installed, even though `google-cloud-bigquery-storage` specifies it # as `google-api-core[grpc]`. We thus need to explicitly specify it here. diff --git a/packages/google-cloud-bigquery/testing/constraints-3.7.txt b/packages/google-cloud-bigquery/testing/constraints-3.7.txt index c94d80abf557..2ea482e8bccd 100644 --- a/packages/google-cloud-bigquery/testing/constraints-3.7.txt +++ b/packages/google-cloud-bigquery/testing/constraints-3.7.txt @@ -8,7 +8,7 @@ db-dtypes==0.3.0 geopandas==0.9.0 google-api-core==1.31.5 -google-cloud-bigquery-storage==2.0.0 +google-cloud-bigquery-storage==2.6.0 google-cloud-core==1.6.0 google-resumable-media==0.6.0 grpcio==1.47.0 diff --git a/packages/google-cloud-bigquery/tests/unit/job/test_query_pandas.py b/packages/google-cloud-bigquery/tests/unit/job/test_query_pandas.py index a2444efdde45..01b60ceb3c01 100644 --- a/packages/google-cloud-bigquery/tests/unit/job/test_query_pandas.py +++ b/packages/google-cloud-bigquery/tests/unit/job/test_query_pandas.py @@ -59,12 +59,6 @@ @pytest.fixture def table_read_options_kwarg(): - # Create a BigQuery Storage table read options object with pyarrow compression - # enabled if a recent-enough version of google-cloud-bigquery-storage dependency is - # installed to support the compression. - if not hasattr(bigquery_storage, "ArrowSerializationOptions"): - return {} - read_options = bigquery_storage.ReadSession.TableReadOptions( arrow_serialization_options=bigquery_storage.ArrowSerializationOptions( buffer_compression=bigquery_storage.ArrowSerializationOptions.CompressionCodec.LZ4_FRAME From 10ef39801d994a7fe82c7e10df9bc076ee9c9775 Mon Sep 17 00:00:00 2001 From: Mend Renovate Date: Thu, 6 Apr 2023 17:08:56 +0100 Subject: [PATCH 1594/2016] chore(deps): update all dependencies (#1540) --- .../samples/geography/requirements.txt | 6 +++--- .../google-cloud-bigquery/samples/magics/requirements.txt | 4 ++-- .../google-cloud-bigquery/samples/snippets/requirements.txt | 4 ++-- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/packages/google-cloud-bigquery/samples/geography/requirements.txt b/packages/google-cloud-bigquery/samples/geography/requirements.txt index 6cb0b6384bf6..7c6d98192769 100644 --- a/packages/google-cloud-bigquery/samples/geography/requirements.txt +++ b/packages/google-cloud-bigquery/samples/geography/requirements.txt @@ -6,13 +6,13 @@ click==8.1.3 click-plugins==1.1.1 cligj==0.7.2 dataclasses==0.8; python_version < '3.7' -db-dtypes==1.1.0 +db-dtypes==1.1.1 Fiona==1.9.2 geojson==3.0.1 geopandas===0.10.2; python_version == '3.7' geopandas==0.12.2; python_version >= '3.8' google-api-core==2.11.0 -google-auth==2.17.0 +google-auth==2.17.2 google-cloud-bigquery==3.9.0 google-cloud-bigquery-storage==2.19.1 google-cloud-core==2.3.2 @@ -26,7 +26,7 @@ munch==2.5.0 mypy-extensions==1.0.0 packaging==23.0 pandas===1.3.5; python_version == '3.7' -pandas==1.5.3; python_version >= '3.8' +pandas==2.0.0; python_version >= '3.8' proto-plus==1.22.2 pyarrow==11.0.0 pyasn1==0.4.8 diff --git a/packages/google-cloud-bigquery/samples/magics/requirements.txt b/packages/google-cloud-bigquery/samples/magics/requirements.txt index 35be2c5da372..2d7e464a81fe 100644 --- a/packages/google-cloud-bigquery/samples/magics/requirements.txt +++ b/packages/google-cloud-bigquery/samples/magics/requirements.txt @@ -1,4 +1,4 @@ -db-dtypes==1.1.0 +db-dtypes==1.1.1 google-cloud-bigquery-storage==2.19.1 google-auth-oauthlib==1.0.0 grpcio==1.53.0 @@ -9,7 +9,7 @@ ipython==8.12.0; python_version >= '3.9' matplotlib===3.5.3; python_version == '3.7' matplotlib==3.7.1; python_version >= '3.8' pandas===1.3.5; python_version == '3.7' -pandas==1.5.3; python_version >= '3.8' +pandas==2.0.0; python_version >= '3.8' pyarrow==11.0.0 pytz==2023.3 typing-extensions==4.5.0 diff --git a/packages/google-cloud-bigquery/samples/snippets/requirements.txt b/packages/google-cloud-bigquery/samples/snippets/requirements.txt index c8a15abafbcb..8f14d0dc1f53 100644 --- a/packages/google-cloud-bigquery/samples/snippets/requirements.txt +++ b/packages/google-cloud-bigquery/samples/snippets/requirements.txt @@ -1,4 +1,4 @@ -db-dtypes==1.1.0 +db-dtypes==1.1.1 google-cloud-bigquery==3.9.0 google-cloud-bigquery-storage==2.19.1 google-auth-oauthlib==1.0.0 @@ -10,7 +10,7 @@ ipython==8.12.0; python_version >= '3.9' matplotlib===3.5.3; python_version == '3.7' matplotlib==3.7.1; python_version >= '3.8' pandas===1.3.5; python_version == '3.7' -pandas==1.5.3; python_version >= '3.8' +pandas==2.0.0; python_version >= '3.8' pyarrow==11.0.0 pytz==2023.3 typing-extensions==4.5.0 From ce568bcfa619fb606ac03c78b420ce787ebc26f4 Mon Sep 17 00:00:00 2001 From: Chalmer Lowe Date: Tue, 18 Apr 2023 11:09:42 -0400 Subject: [PATCH 1595/2016] =?UTF-8?q?bug:=20fixes=20discrepancy=20btwn=20p?= =?UTF-8?q?ython-api-core=20&=20bigquery=20re=20object=20defa=E2=80=A6=20(?= =?UTF-8?q?#1541)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * bug: fixes discrepancy btwn python-api-core & bigquery re object default timeout * Fix: loosen ipywidget dependency (#1504) * fix: updates ipywidget dependency * fix: updates ipywidget version number * chore(main): release 3.6.0 (#1490) Co-authored-by: release-please[bot] <55107282+release-please[bot]@users.noreply.github.com> * docs: Remove < 3.11 reference from README (#1502) * chore(python): upgrade gcp-releasetool in .kokoro [autoapprove] (#1508) Source-Link: https://github.com/googleapis/synthtool/commit/5f2a6089f73abf06238fe4310f6a14d6f6d1eed3 Post-Processor: gcr.io/cloud-devrel-public-resources/owlbot-python:latest@sha256:8555f0e37e6261408f792bfd6635102d2da5ad73f8f09bcb24f25e6afb5fac97 Co-authored-by: Owl Bot * feat: add `connection_properties` and `create_session` to `LoadJobConfig` (#1509) * feat: added `connection_properties` and `create_session` in load job * chore(deps): update all dependencies (#1501) * chore(deps): update all dependencies * 🦉 Updates from OwlBot post-processor See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md --------- Co-authored-by: Anthonios Partheniou Co-authored-by: Owl Bot * chore(deps): update all dependencies (#1513) * feat: add default_query_job_config property and property setter to BQ client (#1511) Thank you for opening a Pull Request! Before submitting your PR, there are a few things you can do to make sure it goes smoothly: - [x] Make sure to open an issue as a [feature request](https://togithub.com/googleapis/python-bigquery/issues/1512) before writing your code! That way we can discuss the change, evaluate designs, and agree on the general idea - [x] Ensure the tests and linter pass - [x] Code coverage does not decrease (if any source code was changed) - [x] Appropriate docs were updated (if necessary) Fixes - [feature request](https://togithub.com/googleapis/python-bigquery/issues/1512)🦕 - [internal bug](https://b.corp.google.com/issues/271044948) * chore(deps): update all dependencies (#1514) * chore(deps): update dependency charset-normalizer to v3.1.0 (#1518) * chore(main): release 3.7.0 (#1507) Co-authored-by: release-please[bot] <55107282+release-please[bot]@users.noreply.github.com> * feat: expose configuration property on CopyJob, ExtractJob, LoadJob, QueryJob (#1521) * feat: expose configuration property on CopyJob, ExtractJob, LoadJob, QueryJob Note for google-cloud-bigquery developers: This also refactors these classes so that `_set_properties` does not modify the `_properties` dictionary in-place. Doing so was also mutating the request object, making it difficult to debug what request was _actually_ sent. Before this change, many tests hallucinated that the request was always equal to the response. * E google.api_core.exceptions.BadRequest: 400 Clone operation with write disposition WRITE_TRUNCATE is not supported. Please try again with WRITE_EMPTY. * chore(deps): Update nox in .kokoro/requirements.in [autoapprove] (#1527) Source-Link: https://github.com/googleapis/synthtool/commit/92006bb3cdc84677aa93c7f5235424ec2b157146 Post-Processor: gcr.io/cloud-devrel-public-resources/owlbot-python:latest@sha256:2e247c7bf5154df7f98cce087a20ca7605e236340c7d6d1a14447e5c06791bd6 Co-authored-by: Owl Bot * feat: add default LoadJobConfig to Client (#1526) * feat: add bool, int, float, string dtype to to_dataframe (#1529) * fix: loosen ipywidgets restrictions further to address ipython compatibility issues (#1531) * fix: loosen ipywidgets restrictions further to address ipython compatibility issues * include ipywidgets in prerelease deps * show all package versions * add ipykernel dependency * ipykernel in noxfile * oops * chore(main): release 3.8.0 (#1525) Co-authored-by: release-please[bot] <55107282+release-please[bot]@users.noreply.github.com> * fix: keyerror when the load_table_from_dataframe accesses a unmapped dtype dataframe index (#1535) * feat: expose query job on dbapi cursor (#1520) Co-authored-by: Tim Swast * chore(main): release 3.9.0 (#1537) Co-authored-by: release-please[bot] <55107282+release-please[bot]@users.noreply.github.com> * chore: update tests to be compatible with pandas 2.0 (#1538) * chore: update tests to be compatible with pandas 2.0 * use StringDtype without storage argument * avoid Float64Dtype on older pandas * chore(deps): update all dependencies (#1522) * chore: updates minimum version of bqstorage (#1542) * chore: updates minimum version of bqstorage * removes unneeded test * updates linting, removes unneeded comment * updates conditional checks, comments, adds test * Removes test, adds pragma no cover * Removes test * fix linting error --------- Co-authored-by: release-please[bot] <55107282+release-please[bot]@users.noreply.github.com> Co-authored-by: Atsushi Yamamoto Co-authored-by: gcf-owl-bot[bot] <78513119+gcf-owl-bot[bot]@users.noreply.github.com> Co-authored-by: Owl Bot Co-authored-by: Shobhit Singh Co-authored-by: Mend Renovate Co-authored-by: Anthonios Partheniou Co-authored-by: chelsea-lin <124939984+chelsea-lin@users.noreply.github.com> Co-authored-by: Tim Swast Co-authored-by: r1b --- .../google/cloud/bigquery/job/query.py | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/job/query.py b/packages/google-cloud-bigquery/google/cloud/bigquery/job/query.py index e4807cc63b7e..62668c6015a7 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/job/query.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/job/query.py @@ -764,7 +764,6 @@ def __init__(self, job_id, query, client, job_config=None): _helpers._set_sub_prop( self._properties, ["configuration", "query", "query"], query ) - self._query_results = None self._done_timeout = None self._transport_timeout = None @@ -1332,6 +1331,15 @@ def _reload_query_results( # the timeout from the futures API is respected. See: # https://github.com/GoogleCloudPlatform/google-cloud-python/issues/4135 timeout_ms = None + + # Python_API_core, as part of a major rewrite of the deadline, timeout, + # retry process sets the timeout value as a Python object(). + # Our system does not natively handle that and instead expects + # either none or a numeric value. If passed a Python object, convert to + # None. + if type(self._done_timeout) == object: # pragma: NO COVER + self._done_timeout = None + if self._done_timeout is not None: # Subtract a buffer for context switching, network latency, etc. api_timeout = self._done_timeout - _TIMEOUT_BUFFER_SECS From a78dac86a8d91da854276e8ab7feda166941b759 Mon Sep 17 00:00:00 2001 From: Mend Renovate Date: Tue, 18 Apr 2023 18:59:44 +0200 Subject: [PATCH 1596/2016] chore(deps): update all dependencies (#1549) Co-authored-by: Anthonios Partheniou --- .../samples/geography/requirements-test.txt | 4 ++-- .../samples/geography/requirements.txt | 10 +++++----- .../samples/magics/requirements-test.txt | 4 ++-- .../samples/magics/requirements.txt | 2 +- .../samples/snippets/requirements-test.txt | 4 ++-- .../samples/snippets/requirements.txt | 2 +- 6 files changed, 13 insertions(+), 13 deletions(-) diff --git a/packages/google-cloud-bigquery/samples/geography/requirements-test.txt b/packages/google-cloud-bigquery/samples/geography/requirements-test.txt index e0ec46254764..3c3afdcb17c2 100644 --- a/packages/google-cloud-bigquery/samples/geography/requirements-test.txt +++ b/packages/google-cloud-bigquery/samples/geography/requirements-test.txt @@ -1,2 +1,2 @@ -pytest==7.2.2 -mock==5.0.1 +pytest==7.3.1 +mock==5.0.2 diff --git a/packages/google-cloud-bigquery/samples/geography/requirements.txt b/packages/google-cloud-bigquery/samples/geography/requirements.txt index 7c6d98192769..49dd1c156960 100644 --- a/packages/google-cloud-bigquery/samples/geography/requirements.txt +++ b/packages/google-cloud-bigquery/samples/geography/requirements.txt @@ -1,4 +1,4 @@ -attrs==22.2.0 +attrs==23.1.0 certifi==2022.12.7 cffi==1.15.1 charset-normalizer==3.1.0 @@ -7,24 +7,24 @@ click-plugins==1.1.1 cligj==0.7.2 dataclasses==0.8; python_version < '3.7' db-dtypes==1.1.1 -Fiona==1.9.2 +Fiona==1.9.3 geojson==3.0.1 geopandas===0.10.2; python_version == '3.7' geopandas==0.12.2; python_version >= '3.8' google-api-core==2.11.0 -google-auth==2.17.2 +google-auth==2.17.3 google-cloud-bigquery==3.9.0 google-cloud-bigquery-storage==2.19.1 google-cloud-core==2.3.2 google-crc32c==1.5.0 google-resumable-media==2.4.1 googleapis-common-protos==1.59.0 -grpcio==1.53.0 +grpcio==1.54.0 idna==3.4 libcst==0.4.9 munch==2.5.0 mypy-extensions==1.0.0 -packaging==23.0 +packaging==23.1 pandas===1.3.5; python_version == '3.7' pandas==2.0.0; python_version >= '3.8' proto-plus==1.22.2 diff --git a/packages/google-cloud-bigquery/samples/magics/requirements-test.txt b/packages/google-cloud-bigquery/samples/magics/requirements-test.txt index 3ed7558d5aed..9fa68a930845 100644 --- a/packages/google-cloud-bigquery/samples/magics/requirements-test.txt +++ b/packages/google-cloud-bigquery/samples/magics/requirements-test.txt @@ -1,3 +1,3 @@ google-cloud-testutils==1.3.3 -pytest==7.2.2 -mock==5.0.1 +pytest==7.3.1 +mock==5.0.2 diff --git a/packages/google-cloud-bigquery/samples/magics/requirements.txt b/packages/google-cloud-bigquery/samples/magics/requirements.txt index 2d7e464a81fe..956b03dda131 100644 --- a/packages/google-cloud-bigquery/samples/magics/requirements.txt +++ b/packages/google-cloud-bigquery/samples/magics/requirements.txt @@ -1,7 +1,7 @@ db-dtypes==1.1.1 google-cloud-bigquery-storage==2.19.1 google-auth-oauthlib==1.0.0 -grpcio==1.53.0 +grpcio==1.54.0 ipywidgets==8.0.6 ipython===7.31.1; python_version == '3.7' ipython===8.0.1; python_version == '3.8' diff --git a/packages/google-cloud-bigquery/samples/snippets/requirements-test.txt b/packages/google-cloud-bigquery/samples/snippets/requirements-test.txt index 3ed7558d5aed..9fa68a930845 100644 --- a/packages/google-cloud-bigquery/samples/snippets/requirements-test.txt +++ b/packages/google-cloud-bigquery/samples/snippets/requirements-test.txt @@ -1,3 +1,3 @@ google-cloud-testutils==1.3.3 -pytest==7.2.2 -mock==5.0.1 +pytest==7.3.1 +mock==5.0.2 diff --git a/packages/google-cloud-bigquery/samples/snippets/requirements.txt b/packages/google-cloud-bigquery/samples/snippets/requirements.txt index 8f14d0dc1f53..034d9d00dfca 100644 --- a/packages/google-cloud-bigquery/samples/snippets/requirements.txt +++ b/packages/google-cloud-bigquery/samples/snippets/requirements.txt @@ -2,7 +2,7 @@ db-dtypes==1.1.1 google-cloud-bigquery==3.9.0 google-cloud-bigquery-storage==2.19.1 google-auth-oauthlib==1.0.0 -grpcio==1.53.0 +grpcio==1.54.0 ipywidgets==8.0.6 ipython===7.31.1; python_version == '3.7' ipython===8.0.1; python_version == '3.8' From f203376b0d6e1f2d208dd9453917ae12954c7145 Mon Sep 17 00:00:00 2001 From: chelsea-lin <124939984+chelsea-lin@users.noreply.github.com> Date: Tue, 18 Apr 2023 15:14:47 -0700 Subject: [PATCH 1597/2016] feat: add date, datetime, time, timestamp dtype to to_dataframe (#1547) --- .../google/cloud/bigquery/_pandas_helpers.py | 27 ++- .../google/cloud/bigquery/enums.py | 6 + .../google/cloud/bigquery/job/query.py | 64 +++++- .../google/cloud/bigquery/table.py | 128 +++++++++-- .../tests/system/test_pandas.py | 98 ++++++++ .../tests/unit/test_table.py | 209 +++++++++++++++++- 6 files changed, 494 insertions(+), 38 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/_pandas_helpers.py b/packages/google-cloud-bigquery/google/cloud/bigquery/_pandas_helpers.py index 601aa13df307..a14dbec9bac0 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/_pandas_helpers.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/_pandas_helpers.py @@ -290,6 +290,10 @@ def default_types_mapper( int_dtype: Union[Any, None] = None, float_dtype: Union[Any, None] = None, string_dtype: Union[Any, None] = None, + date_dtype: Union[Any, None] = None, + datetime_dtype: Union[Any, None] = None, + time_dtype: Union[Any, None] = None, + timestamp_dtype: Union[Any, None] = None, ): """Create a mapping from pyarrow types to pandas types. @@ -321,13 +325,28 @@ def types_mapper(arrow_data_type): elif ( # If date_as_object is True, we know some DATE columns are # out-of-bounds of what is supported by pandas. - not date_as_object + date_dtype is not None + and not date_as_object and pyarrow.types.is_date(arrow_data_type) ): - return db_dtypes.DateDtype() + return date_dtype - elif pyarrow.types.is_time(arrow_data_type): - return db_dtypes.TimeDtype() + elif ( + datetime_dtype is not None + and pyarrow.types.is_timestamp(arrow_data_type) + and arrow_data_type.tz is None + ): + return datetime_dtype + + elif ( + timestamp_dtype is not None + and pyarrow.types.is_timestamp(arrow_data_type) + and arrow_data_type.tz is not None + ): + return timestamp_dtype + + elif time_dtype is not None and pyarrow.types.is_time(arrow_data_type): + return time_dtype return types_mapper diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/enums.py b/packages/google-cloud-bigquery/google/cloud/bigquery/enums.py index e4e3d22fcdef..55385363028d 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/enums.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/enums.py @@ -90,6 +90,12 @@ class DefaultPandasDTypes(enum.Enum): INT_DTYPE = object() """Specifies default integer dtype""" + DATE_DTYPE = object() + """Specifies default date dtype""" + + TIME_DTYPE = object() + """Specifies default time dtype""" + class DestinationFormat(object): """The exported file format. The default value is :attr:`CSV`. diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/job/query.py b/packages/google-cloud-bigquery/google/cloud/bigquery/job/query.py index 62668c6015a7..315d8201c3c5 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/job/query.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/job/query.py @@ -58,6 +58,11 @@ except ImportError: # pragma: NO COVER pandas = None +try: + import db_dtypes # type: ignore +except ImportError: # pragma: NO COVER + db_dtypes = None + if typing.TYPE_CHECKING: # pragma: NO COVER # Assumption: type checks are only used by library developers and CI environments # that have all optional dependencies installed, thus no conditional imports. @@ -1637,6 +1642,10 @@ def to_dataframe( int_dtype: Union[Any, None] = DefaultPandasDTypes.INT_DTYPE, float_dtype: Union[Any, None] = None, string_dtype: Union[Any, None] = None, + date_dtype: Union[Any, None] = DefaultPandasDTypes.DATE_DTYPE, + datetime_dtype: Union[Any, None] = None, + time_dtype: Union[Any, None] = DefaultPandasDTypes.TIME_DTYPE, + timestamp_dtype: Union[Any, None] = None, ) -> "pandas.DataFrame": """Return a pandas DataFrame from a QueryJob @@ -1697,7 +1706,7 @@ def to_dataframe( type can be found at: https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#boolean_type - .. versionadded:: 3.7.1 + .. versionadded:: 3.8.0 int_dtype (Optional[pandas.Series.dtype, None]): If set, indicate a pandas ExtensionDtype (e.g. ``pandas.Int64Dtype()``) @@ -1707,7 +1716,7 @@ def to_dataframe( Integer types can be found at: https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#integer_types - .. versionadded:: 3.7.1 + .. versionadded:: 3.8.0 float_dtype (Optional[pandas.Series.dtype, None]): If set, indicate a pandas ExtensionDtype (e.g. ``pandas.Float32Dtype()``) @@ -1717,7 +1726,7 @@ def to_dataframe( type can be found at: https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#floating_point_types - .. versionadded:: 3.7.1 + .. versionadded:: 3.8.0 string_dtype (Optional[pandas.Series.dtype, None]): If set, indicate a pandas ExtensionDtype (e.g. ``pandas.StringDtype()``) to @@ -1727,7 +1736,50 @@ def to_dataframe( type can be found at: https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#string_type - .. versionadded:: 3.7.1 + .. versionadded:: 3.8.0 + + date_dtype (Optional[pandas.Series.dtype, None]): + If set, indicate a pandas ExtensionDtype (e.g. + ``pandas.ArrowDtype(pyarrow.date32())``) to convert BigQuery Date + type, instead of relying on the default ``db_dtypes.DateDtype()``. + If you explicitly set the value to ``None``, then the data type will be + ``numpy.dtype("datetime64[ns]")`` or ``object`` if out of bound. BigQuery + Date type can be found at: + https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#date_type + + .. versionadded:: 3.10.0 + + datetime_dtype (Optional[pandas.Series.dtype, None]): + If set, indicate a pandas ExtensionDtype (e.g. + ``pandas.ArrowDtype(pyarrow.timestamp("us"))``) to convert BigQuery Datetime + type, instead of relying on the default ``numpy.dtype("datetime64[ns]``. + If you explicitly set the value to ``None``, then the data type will be + ``numpy.dtype("datetime64[ns]")`` or ``object`` if out of bound. BigQuery + Datetime type can be found at: + https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#datetime_type + + .. versionadded:: 3.10.0 + + time_dtype (Optional[pandas.Series.dtype, None]): + If set, indicate a pandas ExtensionDtype (e.g. + ``pandas.ArrowDtype(pyarrow.time64("us"))``) to convert BigQuery Time + type, instead of relying on the default ``db_dtypes.TimeDtype()``. + If you explicitly set the value to ``None``, then the data type will be + ``numpy.dtype("object")``. BigQuery Time type can be found at: + https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#time_type + + .. versionadded:: 3.10.0 + + timestamp_dtype (Optional[pandas.Series.dtype, None]): + If set, indicate a pandas ExtensionDtype (e.g. + ``pandas.ArrowDtype(pyarrow.timestamp("us", tz="UTC"))``) to convert BigQuery Timestamp + type, instead of relying on the default ``numpy.dtype("datetime64[ns, UTC]")``. + If you explicitly set the value to ``None``, then the data type will be + ``numpy.dtype("datetime64[ns, UTC]")`` or ``object`` if out of bound. BigQuery + Datetime type can be found at: + https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#timestamp_type + + .. versionadded:: 3.10.0 Returns: pandas.DataFrame: @@ -1755,6 +1807,10 @@ def to_dataframe( int_dtype=int_dtype, float_dtype=float_dtype, string_dtype=string_dtype, + date_dtype=date_dtype, + datetime_dtype=datetime_dtype, + time_dtype=time_dtype, + timestamp_dtype=timestamp_dtype, ) # If changing the signature of this method, make sure to apply the same diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py index 93b0da67f639..a34e5dc255b1 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py @@ -1935,6 +1935,10 @@ def to_dataframe( int_dtype: Union[Any, None] = DefaultPandasDTypes.INT_DTYPE, float_dtype: Union[Any, None] = None, string_dtype: Union[Any, None] = None, + date_dtype: Union[Any, None] = DefaultPandasDTypes.DATE_DTYPE, + datetime_dtype: Union[Any, None] = None, + time_dtype: Union[Any, None] = DefaultPandasDTypes.TIME_DTYPE, + timestamp_dtype: Union[Any, None] = None, ) -> "pandas.DataFrame": """Create a pandas DataFrame by loading all pages of a query. @@ -1999,7 +2003,7 @@ def to_dataframe( type can be found at: https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#boolean_type - .. versionadded:: 3.7.1 + .. versionadded:: 3.8.0 int_dtype (Optional[pandas.Series.dtype, None]): If set, indicate a pandas ExtensionDtype (e.g. ``pandas.Int64Dtype()``) @@ -2009,7 +2013,7 @@ def to_dataframe( Integer types can be found at: https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#integer_types - .. versionadded:: 3.7.1 + .. versionadded:: 3.8.0 float_dtype (Optional[pandas.Series.dtype, None]): If set, indicate a pandas ExtensionDtype (e.g. ``pandas.Float32Dtype()``) @@ -2019,7 +2023,7 @@ def to_dataframe( type can be found at: https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#floating_point_types - .. versionadded:: 3.7.1 + .. versionadded:: 3.8.0 string_dtype (Optional[pandas.Series.dtype, None]): If set, indicate a pandas ExtensionDtype (e.g. ``pandas.StringDtype()``) to @@ -2029,7 +2033,50 @@ def to_dataframe( type can be found at: https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#string_type - .. versionadded:: 3.7.1 + .. versionadded:: 3.8.0 + + date_dtype (Optional[pandas.Series.dtype, None]): + If set, indicate a pandas ExtensionDtype (e.g. + ``pandas.ArrowDtype(pyarrow.date32())``) to convert BigQuery Date + type, instead of relying on the default ``db_dtypes.DateDtype()``. + If you explicitly set the value to ``None``, then the data type will be + ``numpy.dtype("datetime64[ns]")`` or ``object`` if out of bound. BigQuery + Date type can be found at: + https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#date_type + + .. versionadded:: 3.10.0 + + datetime_dtype (Optional[pandas.Series.dtype, None]): + If set, indicate a pandas ExtensionDtype (e.g. + ``pandas.ArrowDtype(pyarrow.timestamp("us"))``) to convert BigQuery Datetime + type, instead of relying on the default ``numpy.dtype("datetime64[ns]``. + If you explicitly set the value to ``None``, then the data type will be + ``numpy.dtype("datetime64[ns]")`` or ``object`` if out of bound. BigQuery + Datetime type can be found at: + https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#datetime_type + + .. versionadded:: 3.10.0 + + time_dtype (Optional[pandas.Series.dtype, None]): + If set, indicate a pandas ExtensionDtype (e.g. + ``pandas.ArrowDtype(pyarrow.time64("us"))``) to convert BigQuery Time + type, instead of relying on the default ``db_dtypes.TimeDtype()``. + If you explicitly set the value to ``None``, then the data type will be + ``numpy.dtype("object")``. BigQuery Time type can be found at: + https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#time_type + + .. versionadded:: 3.10.0 + + timestamp_dtype (Optional[pandas.Series.dtype, None]): + If set, indicate a pandas ExtensionDtype (e.g. + ``pandas.ArrowDtype(pyarrow.timestamp("us", tz="UTC"))``) to convert BigQuery Timestamp + type, instead of relying on the default ``numpy.dtype("datetime64[ns, UTC]")``. + If you explicitly set the value to ``None``, then the data type will be + ``numpy.dtype("datetime64[ns, UTC]")`` or ``object`` if out of bound. BigQuery + Datetime type can be found at: + https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#timestamp_type + + .. versionadded:: 3.10.0 Returns: pandas.DataFrame: @@ -2059,6 +2106,9 @@ def to_dataframe( if int_dtype is DefaultPandasDTypes.INT_DTYPE: int_dtype = pandas.Int64Dtype() + if time_dtype is DefaultPandasDTypes.TIME_DTYPE: + time_dtype = db_dtypes.TimeDtype() + if bool_dtype is not None and not hasattr(bool_dtype, "__from_arrow__"): raise ValueError("bool_dtype", _NO_SUPPORTED_DTYPE) @@ -2071,6 +2121,24 @@ def to_dataframe( if string_dtype is not None and not hasattr(string_dtype, "__from_arrow__"): raise ValueError("string_dtype", _NO_SUPPORTED_DTYPE) + if ( + date_dtype is not None + and date_dtype is not DefaultPandasDTypes.DATE_DTYPE + and not hasattr(date_dtype, "__from_arrow__") + ): + raise ValueError("date_dtype", _NO_SUPPORTED_DTYPE) + + if datetime_dtype is not None and not hasattr(datetime_dtype, "__from_arrow__"): + raise ValueError("datetime_dtype", _NO_SUPPORTED_DTYPE) + + if time_dtype is not None and not hasattr(time_dtype, "__from_arrow__"): + raise ValueError("time_dtype", _NO_SUPPORTED_DTYPE) + + if timestamp_dtype is not None and not hasattr( + timestamp_dtype, "__from_arrow__" + ): + raise ValueError("timestamp_dtype", _NO_SUPPORTED_DTYPE) + if dtypes is None: dtypes = {} @@ -2086,25 +2154,29 @@ def to_dataframe( create_bqstorage_client=create_bqstorage_client, ) - # When converting date or timestamp values to nanosecond precision, the result - # can be out of pyarrow bounds. To avoid the error when converting to - # Pandas, we set the date_as_object or timestamp_as_object parameter to True, - # if necessary. - date_as_object = not all( - self.__can_cast_timestamp_ns(col) - for col in record_batch - # Type can be date32 or date64 (plus units). - # See: https://arrow.apache.org/docs/python/api/datatypes.html - if pyarrow.types.is_date(col.type) - ) + # Default date dtype is `db_dtypes.DateDtype()` that could cause out of bounds error, + # when pyarrow converts date values to nanosecond precision. To avoid the error, we + # set the date_as_object parameter to True, if necessary. + date_as_object = False + if date_dtype is DefaultPandasDTypes.DATE_DTYPE: + date_dtype = db_dtypes.DateDtype() + date_as_object = not all( + self.__can_cast_timestamp_ns(col) + for col in record_batch + # Type can be date32 or date64 (plus units). + # See: https://arrow.apache.org/docs/python/api/datatypes.html + if pyarrow.types.is_date(col.type) + ) - timestamp_as_object = not all( - self.__can_cast_timestamp_ns(col) - for col in record_batch - # Type can be datetime and timestamp (plus units and time zone). - # See: https://arrow.apache.org/docs/python/api/datatypes.html - if pyarrow.types.is_timestamp(col.type) - ) + timestamp_as_object = False + if datetime_dtype is None and timestamp_dtype is None: + timestamp_as_object = not all( + self.__can_cast_timestamp_ns(col) + for col in record_batch + # Type can be datetime and timestamp (plus units and time zone). + # See: https://arrow.apache.org/docs/python/api/datatypes.html + if pyarrow.types.is_timestamp(col.type) + ) if len(record_batch) > 0: df = record_batch.to_pandas( @@ -2117,6 +2189,10 @@ def to_dataframe( int_dtype=int_dtype, float_dtype=float_dtype, string_dtype=string_dtype, + date_dtype=date_dtype, + datetime_dtype=datetime_dtype, + time_dtype=time_dtype, + timestamp_dtype=timestamp_dtype, ), ) else: @@ -2317,6 +2393,10 @@ def to_dataframe( int_dtype=None, float_dtype=None, string_dtype=None, + date_dtype=None, + datetime_dtype=None, + time_dtype=None, + timestamp_dtype=None, ) -> "pandas.DataFrame": """Create an empty dataframe. @@ -2330,6 +2410,10 @@ def to_dataframe( int_dtype (Any): Ignored. Added for compatibility with RowIterator. float_dtype (Any): Ignored. Added for compatibility with RowIterator. string_dtype (Any): Ignored. Added for compatibility with RowIterator. + date_dtype (Any): Ignored. Added for compatibility with RowIterator. + datetime_dtype (Any): Ignored. Added for compatibility with RowIterator. + time_dtype (Any): Ignored. Added for compatibility with RowIterator. + timestamp_dtype (Any): Ignored. Added for compatibility with RowIterator. Returns: pandas.DataFrame: An empty :class:`~pandas.DataFrame`. diff --git a/packages/google-cloud-bigquery/tests/system/test_pandas.py b/packages/google-cloud-bigquery/tests/system/test_pandas.py index 91305b4506bf..ea8cc6d636fe 100644 --- a/packages/google-cloud-bigquery/tests/system/test_pandas.py +++ b/packages/google-cloud-bigquery/tests/system/test_pandas.py @@ -34,6 +34,7 @@ pandas = pytest.importorskip("pandas", minversion="0.23.0") +pyarrow = pytest.importorskip("pyarrow") numpy = pytest.importorskip("numpy") bigquery_storage = pytest.importorskip( @@ -1109,6 +1110,103 @@ def test_list_rows_nullable_scalars_extreme_dtypes( assert df.dtypes["string_col"].name == "object" +@pytest.mark.parametrize( + ("max_results",), + ( + (None,), + (10,), + ), # Use BQ Storage API. # Use REST API. +) +def test_list_rows_nullable_scalars_extreme_dtypes_w_custom_dtype( + bigquery_client, scalars_extreme_table, max_results +): + # TODO(GH#836): Avoid INTERVAL columns until they are supported by the + # BigQuery Storage API and pyarrow. + schema = [ + bigquery.SchemaField("bool_col", enums.SqlTypeNames.BOOLEAN), + bigquery.SchemaField("bignumeric_col", enums.SqlTypeNames.BIGNUMERIC), + bigquery.SchemaField("bytes_col", enums.SqlTypeNames.BYTES), + bigquery.SchemaField("date_col", enums.SqlTypeNames.DATE), + bigquery.SchemaField("datetime_col", enums.SqlTypeNames.DATETIME), + bigquery.SchemaField("float64_col", enums.SqlTypeNames.FLOAT64), + bigquery.SchemaField("geography_col", enums.SqlTypeNames.GEOGRAPHY), + bigquery.SchemaField("int64_col", enums.SqlTypeNames.INT64), + bigquery.SchemaField("numeric_col", enums.SqlTypeNames.NUMERIC), + bigquery.SchemaField("string_col", enums.SqlTypeNames.STRING), + bigquery.SchemaField("time_col", enums.SqlTypeNames.TIME), + bigquery.SchemaField("timestamp_col", enums.SqlTypeNames.TIMESTAMP), + ] + + df = bigquery_client.list_rows( + scalars_extreme_table, + max_results=max_results, + selected_fields=schema, + ).to_dataframe( + bool_dtype=pandas.BooleanDtype(), + int_dtype=pandas.Int64Dtype(), + float_dtype=( + pandas.Float64Dtype() + if hasattr(pandas, "Float64Dtype") + else pandas.StringDtype() + ), + string_dtype=pandas.StringDtype(), + date_dtype=( + pandas.ArrowDtype(pyarrow.date32()) + if hasattr(pandas, "ArrowDtype") + else None + ), + datetime_dtype=( + pandas.ArrowDtype(pyarrow.timestamp("us")) + if hasattr(pandas, "ArrowDtype") + else None + ), + time_dtype=( + pandas.ArrowDtype(pyarrow.time64("us")) + if hasattr(pandas, "ArrowDtype") + else None + ), + timestamp_dtype=( + pandas.ArrowDtype(pyarrow.timestamp("us", tz="UTC")) + if hasattr(pandas, "ArrowDtype") + else None + ), + ) + + # These pandas dtypes are handled by the custom dtypes. + assert df.dtypes["bool_col"].name == "boolean" + assert df.dtypes["float64_col"].name == "Float64" + assert df.dtypes["int64_col"].name == "Int64" + assert df.dtypes["string_col"].name == "string" + + assert ( + df.dtypes["date_col"].name == "date32[day][pyarrow]" + if hasattr(pandas, "ArrowDtype") + else "datetime64[ns]" + ) + assert ( + df.dtypes["datetime_col"].name == "timestamp[us][pyarrow]" + if hasattr(pandas, "ArrowDtype") + else "object" + ) + assert ( + df.dtypes["timestamp_col"].name == "timestamp[us, tz=UTC][pyarrow]" + if hasattr(pandas, "ArrowDtype") + else "object" + ) + assert ( + df.dtypes["time_col"].name == "time64[us][pyarrow]" + if hasattr(pandas, "ArrowDtype") + else "object" + ) + + # decimal.Decimal is used to avoid loss of precision. + assert df.dtypes["numeric_col"].name == "object" + assert df.dtypes["bignumeric_col"].name == "object" + + # pandas uses Python bytes objects. + assert df.dtypes["bytes_col"].name == "object" + + def test_upload_time_and_datetime_56(bigquery_client, dataset_id): df = pandas.DataFrame( dict( diff --git a/packages/google-cloud-bigquery/tests/unit/test_table.py b/packages/google-cloud-bigquery/tests/unit/test_table.py index 9bdd7b5969e1..53db635fa1b6 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_table.py +++ b/packages/google-cloud-bigquery/tests/unit/test_table.py @@ -46,6 +46,7 @@ PYARROW_VERSION = pkg_resources.parse_version("0.0.1") if pyarrow: + import pyarrow import pyarrow.types PYARROW_VERSION = pkg_resources.parse_version(pyarrow.__version__) @@ -3471,11 +3472,45 @@ def test_to_dataframe_w_dtypes_mapper(self): SchemaField("age", "INTEGER"), SchemaField("seconds", "INT64"), SchemaField("miles", "FLOAT64"), + SchemaField("date", "DATE"), + SchemaField("datetime", "DATETIME"), + SchemaField("time", "TIME"), + SchemaField("timestamp", "TIMESTAMP"), ] row_data = [ - ["Phred Phlyntstone", "true", "32", "23000", "1.77"], - ["Bharney Rhubble", "false", "33", "454000", "6.66"], - ["Wylma Phlyntstone", "true", "29", "341000", "2.0"], + [ + "Phred Phlyntstone", + "true", + "32", + "23000", + "1.77", + "1999-12-01", + "1999-12-31T00:00:00.000000", + "00:00:00.000000", + "1433836800000000", + ], + [ + "Bharney Rhubble", + "false", + "33", + "454000", + "6.66", + "4567-06-14", + "4567-12-31T00:00:00.000000", + "12:00:00.232413", + "81953424000000000", + ], + [ + "Wylma Phlyntstone", + "true", + "29", + "341000", + "2.0", + "9999-12-31", + "9999-12-31T23:59:59.999999", + "23:59:59.999999", + "253402261199999999", + ], ] rows = [{"f": [{"v": field} for field in row]} for row in row_data] path = "/foo" @@ -3492,18 +3527,136 @@ def test_to_dataframe_w_dtypes_mapper(self): else pandas.StringDtype() ), string_dtype=pandas.StringDtype(), + date_dtype=( + pandas.ArrowDtype(pyarrow.date32()) + if hasattr(pandas, "ArrowDtype") + else None + ), + datetime_dtype=( + pandas.ArrowDtype(pyarrow.timestamp("us")) + if hasattr(pandas, "ArrowDtype") + else None + ), + time_dtype=( + pandas.ArrowDtype(pyarrow.time64("us")) + if hasattr(pandas, "ArrowDtype") + else None + ), + timestamp_dtype=( + pandas.ArrowDtype(pyarrow.timestamp("us", tz="UTC")) + if hasattr(pandas, "ArrowDtype") + else None + ), ) self.assertIsInstance(df, pandas.DataFrame) + + self.assertEqual(list(df.complete), [True, False, True]) self.assertEqual(df.complete.dtype.name, "boolean") + + self.assertEqual(list(df.age), [32, 33, 29]) self.assertEqual(df.age.dtype.name, "Int32") + + self.assertEqual(list(df.seconds), [23000, 454000, 341000]) self.assertEqual(df.seconds.dtype.name, "Int32") + self.assertEqual( - df.miles.dtype.name, - "Float64" if hasattr(pandas, "Float64Dtype") else "string", + list(df.name), ["Phred Phlyntstone", "Bharney Rhubble", "Wylma Phlyntstone"] ) self.assertEqual(df.name.dtype.name, "string") + if hasattr(pandas, "Float64Dtype"): + self.assertEqual(list(df.miles), [1.77, 6.66, 2.0]) + self.assertEqual(df.miles.dtype.name, "Float64") + else: + self.assertEqual(list(df.miles), ["1.77", "6.66", "2.0"]) + self.assertEqual(df.miles.dtype.name, "string") + + if hasattr(pandas, "ArrowDtype"): + self.assertEqual( + list(df.date), + [ + datetime.date(1999, 12, 1), + datetime.date(4567, 6, 14), + datetime.date(9999, 12, 31), + ], + ) + self.assertEqual(df.date.dtype.name, "date32[day][pyarrow]") + + self.assertEqual( + list(df.datetime), + [ + datetime.datetime(1999, 12, 31, 0, 0), + datetime.datetime(4567, 12, 31, 0, 0), + datetime.datetime(9999, 12, 31, 23, 59, 59, 999999), + ], + ) + self.assertEqual(df.datetime.dtype.name, "timestamp[us][pyarrow]") + + self.assertEqual( + list(df.time), + [ + datetime.time(0, 0), + datetime.time(12, 0, 0, 232413), + datetime.time(23, 59, 59, 999999), + ], + ) + self.assertEqual(df.time.dtype.name, "time64[us][pyarrow]") + + self.assertEqual( + list(df.timestamp), + [ + datetime.datetime(2015, 6, 9, 8, 0, tzinfo=datetime.timezone.utc), + datetime.datetime(4567, 1, 1, 0, 0, tzinfo=datetime.timezone.utc), + datetime.datetime( + 9999, 12, 31, 12, 59, 59, 999999, tzinfo=datetime.timezone.utc + ), + ], + ) + self.assertEqual(df.timestamp.dtype.name, "timestamp[us, tz=UTC][pyarrow]") + else: + self.assertEqual( + list(df.date), + [ + pandas.Timestamp("1999-12-01 00:00:00"), + pandas.Timestamp("2229-03-27 01:41:45.161793536"), + pandas.Timestamp("1816-03-29 05:56:08.066277376"), + ], + ) + self.assertEqual(df.date.dtype.name, "datetime64[ns]") + + self.assertEqual( + list(df.datetime), + [ + datetime.datetime(1999, 12, 31, 0, 0), + datetime.datetime(4567, 12, 31, 0, 0), + datetime.datetime(9999, 12, 31, 23, 59, 59, 999999), + ], + ) + self.assertEqual(df.datetime.dtype.name, "object") + + self.assertEqual( + list(df.time), + [ + datetime.time(0, 0), + datetime.time(12, 0, 0, 232413), + datetime.time(23, 59, 59, 999999), + ], + ) + self.assertEqual(df.time.dtype.name, "object") + + self.assertEqual( + list(df.timestamp), + [ + datetime.datetime(2015, 6, 9, 8, 0, tzinfo=datetime.timezone.utc), + datetime.datetime(4567, 1, 1, 0, 0, tzinfo=datetime.timezone.utc), + datetime.datetime( + 9999, 12, 31, 12, 59, 59, 999999, tzinfo=datetime.timezone.utc + ), + ], + ) + self.assertEqual(df.timestamp.dtype.name, "object") + @unittest.skipIf(pandas is None, "Requires `pandas`") def test_to_dataframe_w_none_dtypes_mapper(self): from google.cloud.bigquery.schema import SchemaField @@ -3514,11 +3667,23 @@ def test_to_dataframe_w_none_dtypes_mapper(self): SchemaField("age", "INTEGER"), SchemaField("seconds", "INT64"), SchemaField("miles", "FLOAT64"), + SchemaField("date", "DATE"), + SchemaField("datetime", "DATETIME"), + SchemaField("time", "TIME"), + SchemaField("timestamp", "TIMESTAMP"), ] row_data = [ - ["Phred Phlyntstone", "true", "32", "23000", "1.77"], - ["Bharney Rhubble", "false", "33", "454000", "6.66"], - ["Wylma Phlyntstone", "true", "29", "341000", "2.0"], + [ + "Phred Phlyntstone", + "true", + "32", + "23000", + "1.77", + "1999-12-01", + "1999-12-31T00:00:00.000000", + "23:59:59.999999", + "1433836800000000", + ], ] rows = [{"f": [{"v": field} for field in row]} for row in row_data] path = "/foo" @@ -3531,6 +3696,10 @@ def test_to_dataframe_w_none_dtypes_mapper(self): int_dtype=None, float_dtype=None, string_dtype=None, + date_dtype=None, + datetime_dtype=None, + time_dtype=None, + timestamp_dtype=None, ) self.assertIsInstance(df, pandas.DataFrame) self.assertEqual(df.complete.dtype.name, "bool") @@ -3538,6 +3707,10 @@ def test_to_dataframe_w_none_dtypes_mapper(self): self.assertEqual(df.seconds.dtype.name, "int64") self.assertEqual(df.miles.dtype.name, "float64") self.assertEqual(df.name.dtype.name, "object") + self.assertEqual(df.date.dtype.name, "datetime64[ns]") + self.assertEqual(df.datetime.dtype.name, "datetime64[ns]") + self.assertEqual(df.time.dtype.name, "object") + self.assertEqual(df.timestamp.dtype.name, "datetime64[ns, UTC]") @unittest.skipIf(pandas is None, "Requires `pandas`") def test_to_dataframe_w_unsupported_dtypes_mapper(self): @@ -3575,6 +3748,26 @@ def test_to_dataframe_w_unsupported_dtypes_mapper(self): create_bqstorage_client=False, string_dtype=numpy.dtype("object"), ) + with self.assertRaises(ValueError): + row_iterator.to_dataframe( + create_bqstorage_client=False, + date_dtype=numpy.dtype("object"), + ) + with self.assertRaises(ValueError): + row_iterator.to_dataframe( + create_bqstorage_client=False, + datetime_dtype=numpy.dtype("datetime64[us]"), + ) + with self.assertRaises(ValueError): + row_iterator.to_dataframe( + create_bqstorage_client=False, + time_dtype=numpy.dtype("datetime64[us]"), + ) + with self.assertRaises(ValueError): + row_iterator.to_dataframe( + create_bqstorage_client=False, + timestamp_dtype=numpy.dtype("datetime64[us]"), + ) @unittest.skipIf(pandas is None, "Requires `pandas`") def test_to_dataframe_column_dtypes(self): From 0500a87587595a7103e06fe72df485882f01a3f5 Mon Sep 17 00:00:00 2001 From: "release-please[bot]" <55107282+release-please[bot]@users.noreply.github.com> Date: Wed, 19 Apr 2023 09:35:21 -0400 Subject: [PATCH 1598/2016] chore(main): release 3.10.0 (#1555) Co-authored-by: release-please[bot] <55107282+release-please[bot]@users.noreply.github.com> --- packages/google-cloud-bigquery/CHANGELOG.md | 7 +++++++ .../google-cloud-bigquery/google/cloud/bigquery/version.py | 2 +- 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/CHANGELOG.md b/packages/google-cloud-bigquery/CHANGELOG.md index 5bbde01f468b..034f4f32481e 100644 --- a/packages/google-cloud-bigquery/CHANGELOG.md +++ b/packages/google-cloud-bigquery/CHANGELOG.md @@ -5,6 +5,13 @@ [1]: https://pypi.org/project/google-cloud-bigquery/#history +## [3.10.0](https://github.com/googleapis/python-bigquery/compare/v3.9.0...v3.10.0) (2023-04-18) + + +### Features + +* Add date, datetime, time, timestamp dtype to to_dataframe ([#1547](https://github.com/googleapis/python-bigquery/issues/1547)) ([64e913d](https://github.com/googleapis/python-bigquery/commit/64e913d73832f6363466cbea5ace2337c86fa58b)) + ## [3.9.0](https://github.com/googleapis/python-bigquery/compare/v3.8.0...v3.9.0) (2023-03-28) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/version.py b/packages/google-cloud-bigquery/google/cloud/bigquery/version.py index 0bc2753575f8..b674396b200e 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/version.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/version.py @@ -12,4 +12,4 @@ # See the License for the specific language governing permissions and # limitations under the License. -__version__ = "3.9.0" +__version__ = "3.10.0" From d0ccbcf493b8d743ab79c3c2b93794c0d47345da Mon Sep 17 00:00:00 2001 From: Mend Renovate Date: Wed, 19 Apr 2023 18:08:58 +0200 Subject: [PATCH 1599/2016] chore(deps): update dependency google-cloud-bigquery to v3.10.0 (#1556) --- .../google-cloud-bigquery/samples/geography/requirements.txt | 2 +- .../google-cloud-bigquery/samples/snippets/requirements.txt | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/packages/google-cloud-bigquery/samples/geography/requirements.txt b/packages/google-cloud-bigquery/samples/geography/requirements.txt index 49dd1c156960..8afe5ef4ae04 100644 --- a/packages/google-cloud-bigquery/samples/geography/requirements.txt +++ b/packages/google-cloud-bigquery/samples/geography/requirements.txt @@ -13,7 +13,7 @@ geopandas===0.10.2; python_version == '3.7' geopandas==0.12.2; python_version >= '3.8' google-api-core==2.11.0 google-auth==2.17.3 -google-cloud-bigquery==3.9.0 +google-cloud-bigquery==3.10.0 google-cloud-bigquery-storage==2.19.1 google-cloud-core==2.3.2 google-crc32c==1.5.0 diff --git a/packages/google-cloud-bigquery/samples/snippets/requirements.txt b/packages/google-cloud-bigquery/samples/snippets/requirements.txt index 034d9d00dfca..aa1015481a07 100644 --- a/packages/google-cloud-bigquery/samples/snippets/requirements.txt +++ b/packages/google-cloud-bigquery/samples/snippets/requirements.txt @@ -1,5 +1,5 @@ db-dtypes==1.1.1 -google-cloud-bigquery==3.9.0 +google-cloud-bigquery==3.10.0 google-cloud-bigquery-storage==2.19.1 google-auth-oauthlib==1.0.0 grpcio==1.54.0 From b694a6cda9a080664bf310ddd295d632c37fb82e Mon Sep 17 00:00:00 2001 From: Mend Renovate Date: Thu, 20 Apr 2023 18:31:32 +0200 Subject: [PATCH 1600/2016] chore(deps): update all dependencies (#1557) --- .../google-cloud-bigquery/samples/geography/requirements.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/packages/google-cloud-bigquery/samples/geography/requirements.txt b/packages/google-cloud-bigquery/samples/geography/requirements.txt index 8afe5ef4ae04..df992f2bfe07 100644 --- a/packages/google-cloud-bigquery/samples/geography/requirements.txt +++ b/packages/google-cloud-bigquery/samples/geography/requirements.txt @@ -29,8 +29,8 @@ pandas===1.3.5; python_version == '3.7' pandas==2.0.0; python_version >= '3.8' proto-plus==1.22.2 pyarrow==11.0.0 -pyasn1==0.4.8 -pyasn1-modules==0.2.8 +pyasn1==0.5.0 +pyasn1-modules==0.3.0 pycparser==2.21 pyparsing==3.0.9 python-dateutil==2.8.2 From 8b99671f06c11de6b2b154964369bbfd55aaffd2 Mon Sep 17 00:00:00 2001 From: abdelmegahed <131036743+abdelmegahed@users.noreply.github.com> Date: Wed, 17 May 2023 15:57:28 -0400 Subject: [PATCH 1601/2016] fix: handle case when expirationMs is None (#1553) * hotfix: handle case when expirationMs is None * Add test for unsetting table exp * Update tests/unit/test_table.py * Update exp_resource for the unsetting_exp test --------- Co-authored-by: Tim Swast --- .../google/cloud/bigquery/table.py | 6 +++++- .../tests/unit/test_table.py | 19 +++++++++++++++++++ 2 files changed, 24 insertions(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py index a34e5dc255b1..bf4a90317c5b 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py @@ -687,7 +687,11 @@ def partition_expiration(self, value): if self.time_partitioning is None: self._properties[api_field] = {"type": TimePartitioningType.DAY} - self._properties[api_field]["expirationMs"] = str(value) + + if value is None: + self._properties[api_field]["expirationMs"] = None + else: + self._properties[api_field]["expirationMs"] = str(value) @property def clustering_fields(self): diff --git a/packages/google-cloud-bigquery/tests/unit/test_table.py b/packages/google-cloud-bigquery/tests/unit/test_table.py index 53db635fa1b6..a221bc89e906 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_table.py +++ b/packages/google-cloud-bigquery/tests/unit/test_table.py @@ -1190,6 +1190,25 @@ def test_to_api_repr_w_custom_field(self): } self.assertEqual(resource, exp_resource) + def test_to_api_repr_w_unsetting_expiration(self): + from google.cloud.bigquery.table import TimePartitioningType + + dataset = DatasetReference(self.PROJECT, self.DS_ID) + table_ref = dataset.table(self.TABLE_NAME) + table = self._make_one(table_ref) + table.partition_expiration = None + resource = table.to_api_repr() + + exp_resource = { + "tableReference": table_ref.to_api_repr(), + "labels": {}, + "timePartitioning": { + "expirationMs": None, + "type": TimePartitioningType.DAY, + }, + } + self.assertEqual(resource, exp_resource) + def test__build_resource_w_custom_field(self): dataset = DatasetReference(self.PROJECT, self.DS_ID) table_ref = dataset.table(self.TABLE_NAME) From b248474ac49a2c690e77363341a3116bb2a8fc59 Mon Sep 17 00:00:00 2001 From: Gal Zahavi <38544478+galz10@users.noreply.github.com> Date: Thu, 18 May 2023 10:52:48 -0700 Subject: [PATCH 1602/2016] fix: filter None values from OpenTelemetry attributes (#1567) * fix: filter None values from OpenTelemetry attributes * moved filtering out before return value --- .../google/cloud/bigquery/opentelemetry_tracing.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/opentelemetry_tracing.py b/packages/google-cloud-bigquery/google/cloud/bigquery/opentelemetry_tracing.py index 3d0a66ba893f..0e1187c6b1fd 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/opentelemetry_tracing.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/opentelemetry_tracing.py @@ -97,6 +97,11 @@ def _get_final_span_attributes(attributes=None, client=None, job_ref=None): final_attributes.update(job_attributes) if attributes: final_attributes.update(attributes) + + filtered = {k: v for k, v in final_attributes.items() if v is not None} + final_attributes.clear() + final_attributes.update(filtered) + return final_attributes From 3d97b93afcf9d3996e86a63b09da2c9f6a5c2bf6 Mon Sep 17 00:00:00 2001 From: shollyman Date: Fri, 19 May 2023 15:03:04 -0700 Subject: [PATCH 1603/2016] feat: add remote function options to routines (#1558) * feat: add remote function options This PR adds support for defining routines as remote UDFs. * basic integration test * augment tests * rename prop * augment tests * more testing * cover shenanigans --------- Co-authored-by: Tim Swast --- .../google/cloud/bigquery/__init__.py | 2 + .../google/cloud/bigquery/routine/__init__.py | 2 + .../google/cloud/bigquery/routine/routine.py | 153 ++++++++++++++++++ .../routine/test_remote_function_options.py | 128 +++++++++++++++ .../tests/unit/routine/test_routine.py | 57 +++++++ 5 files changed, 342 insertions(+) create mode 100644 packages/google-cloud-bigquery/tests/unit/routine/test_remote_function_options.py diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/__init__.py b/packages/google-cloud-bigquery/google/cloud/bigquery/__init__.py index ebd5b3109096..40e3a157870c 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/__init__.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/__init__.py @@ -93,6 +93,7 @@ from google.cloud.bigquery.routine import RoutineArgument from google.cloud.bigquery.routine import RoutineReference from google.cloud.bigquery.routine import RoutineType +from google.cloud.bigquery.routine import RemoteFunctionOptions from google.cloud.bigquery.schema import PolicyTagList from google.cloud.bigquery.schema import SchemaField from google.cloud.bigquery.standard_sql import StandardSqlDataType @@ -154,6 +155,7 @@ "Routine", "RoutineArgument", "RoutineReference", + "RemoteFunctionOptions", # Shared helpers "SchemaField", "PolicyTagList", diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/routine/__init__.py b/packages/google-cloud-bigquery/google/cloud/bigquery/routine/__init__.py index 7353073c8c1f..e576b0d49c0f 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/routine/__init__.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/routine/__init__.py @@ -20,6 +20,7 @@ from google.cloud.bigquery.routine.routine import RoutineArgument from google.cloud.bigquery.routine.routine import RoutineReference from google.cloud.bigquery.routine.routine import RoutineType +from google.cloud.bigquery.routine.routine import RemoteFunctionOptions __all__ = ( @@ -28,4 +29,5 @@ "RoutineArgument", "RoutineReference", "RoutineType", + "RemoteFunctionOptions", ) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/routine/routine.py b/packages/google-cloud-bigquery/google/cloud/bigquery/routine/routine.py index 3c0919003292..36ed0372880f 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/routine/routine.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/routine/routine.py @@ -67,6 +67,7 @@ class Routine(object): "type_": "routineType", "description": "description", "determinism_level": "determinismLevel", + "remote_function_options": "remoteFunctionOptions", } def __init__(self, routine_ref, **kwargs) -> None: @@ -297,6 +298,37 @@ def determinism_level(self): def determinism_level(self, value): self._properties[self._PROPERTY_TO_API_FIELD["determinism_level"]] = value + @property + def remote_function_options(self): + """Optional[google.cloud.bigquery.routine.RemoteFunctionOptions]: Configures remote function + options for a routine. + + Raises: + ValueError: + If the value is not + :class:`~google.cloud.bigquery.routine.RemoteFunctionOptions` or + :data:`None`. + """ + prop = self._properties.get( + self._PROPERTY_TO_API_FIELD["remote_function_options"] + ) + if prop is not None: + return RemoteFunctionOptions.from_api_repr(prop) + + @remote_function_options.setter + def remote_function_options(self, value): + api_repr = value + if isinstance(value, RemoteFunctionOptions): + api_repr = value.to_api_repr() + elif value is not None: + raise ValueError( + "value must be google.cloud.bigquery.routine.RemoteFunctionOptions " + "or None" + ) + self._properties[ + self._PROPERTY_TO_API_FIELD["remote_function_options"] + ] = api_repr + @classmethod def from_api_repr(cls, resource: dict) -> "Routine": """Factory: construct a routine given its API representation. @@ -563,3 +595,124 @@ def __str__(self): This is a fully-qualified ID, including the project ID and dataset ID. """ return "{}.{}.{}".format(self.project, self.dataset_id, self.routine_id) + + +class RemoteFunctionOptions(object): + """Configuration options for controlling remote BigQuery functions.""" + + _PROPERTY_TO_API_FIELD = { + "endpoint": "endpoint", + "connection": "connection", + "max_batching_rows": "maxBatchingRows", + "user_defined_context": "userDefinedContext", + } + + def __init__( + self, + endpoint=None, + connection=None, + max_batching_rows=None, + user_defined_context=None, + _properties=None, + ) -> None: + if _properties is None: + _properties = {} + self._properties = _properties + + if endpoint is not None: + self.endpoint = endpoint + if connection is not None: + self.connection = connection + if max_batching_rows is not None: + self.max_batching_rows = max_batching_rows + if user_defined_context is not None: + self.user_defined_context = user_defined_context + + @property + def connection(self): + """string: Fully qualified name of the user-provided connection object which holds the authentication information to send requests to the remote service. + + Format is "projects/{projectId}/locations/{locationId}/connections/{connectionId}" + """ + return _helpers._str_or_none(self._properties.get("connection")) + + @connection.setter + def connection(self, value): + self._properties["connection"] = _helpers._str_or_none(value) + + @property + def endpoint(self): + """string: Endpoint of the user-provided remote service + + Example: "https://us-east1-my_gcf_project.cloudfunctions.net/remote_add" + """ + return _helpers._str_or_none(self._properties.get("endpoint")) + + @endpoint.setter + def endpoint(self, value): + self._properties["endpoint"] = _helpers._str_or_none(value) + + @property + def max_batching_rows(self): + """int64: Max number of rows in each batch sent to the remote service. + + If absent or if 0, BigQuery dynamically decides the number of rows in a batch. + """ + return _helpers._int_or_none(self._properties.get("maxBatchingRows")) + + @max_batching_rows.setter + def max_batching_rows(self, value): + self._properties["maxBatchingRows"] = _helpers._str_or_none(value) + + @property + def user_defined_context(self): + """Dict[str, str]: User-defined context as a set of key/value pairs, + which will be sent as function invocation context together with + batched arguments in the requests to the remote service. The total + number of bytes of keys and values must be less than 8KB. + """ + return self._properties.get("userDefinedContext") + + @user_defined_context.setter + def user_defined_context(self, value): + if not isinstance(value, dict): + raise ValueError("value must be dictionary") + self._properties["userDefinedContext"] = value + + @classmethod + def from_api_repr(cls, resource: dict) -> "RemoteFunctionOptions": + """Factory: construct remote function options given its API representation. + + Args: + resource (Dict[str, object]): Resource, as returned from the API. + + Returns: + google.cloud.bigquery.routine.RemoteFunctionOptions: + Python object, as parsed from ``resource``. + """ + ref = cls() + ref._properties = resource + return ref + + def to_api_repr(self) -> dict: + """Construct the API resource representation of this RemoteFunctionOptions. + + Returns: + Dict[str, object]: Remote function options represented as an API resource. + """ + return self._properties + + def __eq__(self, other): + if not isinstance(other, RemoteFunctionOptions): + return NotImplemented + return self._properties == other._properties + + def __ne__(self, other): + return not self == other + + def __repr__(self): + all_properties = [ + "{}={}".format(property_name, repr(getattr(self, property_name))) + for property_name in sorted(self._PROPERTY_TO_API_FIELD) + ] + return "RemoteFunctionOptions({})".format(", ".join(all_properties)) diff --git a/packages/google-cloud-bigquery/tests/unit/routine/test_remote_function_options.py b/packages/google-cloud-bigquery/tests/unit/routine/test_remote_function_options.py new file mode 100644 index 000000000000..b476dca1ebdb --- /dev/null +++ b/packages/google-cloud-bigquery/tests/unit/routine/test_remote_function_options.py @@ -0,0 +1,128 @@ +# -*- coding: utf-8 -*- +# +# Copyright 2023 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import pytest + +ENDPOINT = "https://some.endpoint" +CONNECTION = "connection_string" +MAX_BATCHING_ROWS = 50 +USER_DEFINED_CONTEXT = { + "foo": "bar", +} + + +@pytest.fixture +def target_class(): + from google.cloud.bigquery.routine import RemoteFunctionOptions + + return RemoteFunctionOptions + + +def test_ctor(target_class): + + options = target_class( + endpoint=ENDPOINT, + connection=CONNECTION, + max_batching_rows=MAX_BATCHING_ROWS, + user_defined_context=USER_DEFINED_CONTEXT, + ) + assert options.endpoint == ENDPOINT + assert options.connection == CONNECTION + assert options.max_batching_rows == MAX_BATCHING_ROWS + assert options.user_defined_context == USER_DEFINED_CONTEXT + + +def test_empty_ctor(target_class): + options = target_class() + assert options._properties == {} + options = target_class(_properties=None) + assert options._properties == {} + options = target_class(_properties={}) + assert options._properties == {} + + +def test_ctor_bad_context(target_class): + with pytest.raises(ValueError, match="value must be dictionary"): + target_class(user_defined_context=[1, 2, 3, 4]) + + +def test_from_api_repr(target_class): + resource = { + "endpoint": ENDPOINT, + "connection": CONNECTION, + "maxBatchingRows": MAX_BATCHING_ROWS, + "userDefinedContext": USER_DEFINED_CONTEXT, + "someRandomField": "someValue", + } + options = target_class.from_api_repr(resource) + assert options.endpoint == ENDPOINT + assert options.connection == CONNECTION + assert options.max_batching_rows == MAX_BATCHING_ROWS + assert options.user_defined_context == USER_DEFINED_CONTEXT + assert options._properties["someRandomField"] == "someValue" + + +def test_from_api_repr_w_minimal_resource(target_class): + resource = {} + options = target_class.from_api_repr(resource) + assert options.endpoint is None + assert options.connection is None + assert options.max_batching_rows is None + assert options.user_defined_context is None + + +def test_from_api_repr_w_unknown_fields(target_class): + resource = {"thisFieldIsNotInTheProto": "just ignore me"} + options = target_class.from_api_repr(resource) + assert options._properties is resource + + +def test_eq(target_class): + options = target_class( + endpoint=ENDPOINT, + connection=CONNECTION, + max_batching_rows=MAX_BATCHING_ROWS, + user_defined_context=USER_DEFINED_CONTEXT, + ) + other_options = target_class( + endpoint=ENDPOINT, + connection=CONNECTION, + max_batching_rows=MAX_BATCHING_ROWS, + user_defined_context=USER_DEFINED_CONTEXT, + ) + assert options == other_options + assert not (options != other_options) + + empty_options = target_class() + assert not (options == empty_options) + assert options != empty_options + + notanarg = object() + assert not (options == notanarg) + assert options != notanarg + + +def test_repr(target_class): + options = target_class( + endpoint=ENDPOINT, + connection=CONNECTION, + max_batching_rows=MAX_BATCHING_ROWS, + user_defined_context=USER_DEFINED_CONTEXT, + ) + actual_repr = repr(options) + assert actual_repr == ( + "RemoteFunctionOptions(connection='connection_string', endpoint='https://some.endpoint', max_batching_rows=50, user_defined_context={'foo': 'bar'})" + ) diff --git a/packages/google-cloud-bigquery/tests/unit/routine/test_routine.py b/packages/google-cloud-bigquery/tests/unit/routine/test_routine.py index 80a3def7320a..87767200c80a 100644 --- a/packages/google-cloud-bigquery/tests/unit/routine/test_routine.py +++ b/packages/google-cloud-bigquery/tests/unit/routine/test_routine.py @@ -75,6 +75,13 @@ def test_ctor_w_properties(target_class): description = "A routine description." determinism_level = bigquery.DeterminismLevel.NOT_DETERMINISTIC + options = bigquery.RemoteFunctionOptions( + endpoint="https://some.endpoint", + connection="connection_string", + max_batching_rows=99, + user_defined_context={"foo": "bar"}, + ) + actual_routine = target_class( routine_id, arguments=arguments, @@ -84,6 +91,7 @@ def test_ctor_w_properties(target_class): type_=type_, description=description, determinism_level=determinism_level, + remote_function_options=options, ) ref = RoutineReference.from_string(routine_id) @@ -97,6 +105,18 @@ def test_ctor_w_properties(target_class): assert ( actual_routine.determinism_level == bigquery.DeterminismLevel.NOT_DETERMINISTIC ) + assert actual_routine.remote_function_options == options + + +def test_ctor_invalid_remote_function_options(target_class): + with pytest.raises( + ValueError, + match=".*must be google.cloud.bigquery.routine.RemoteFunctionOptions.*", + ): + target_class( + "my-proj.my_dset.my_routine", + remote_function_options=object(), + ) def test_from_api_repr(target_class): @@ -126,6 +146,14 @@ def test_from_api_repr(target_class): "someNewField": "someValue", "description": "A routine description.", "determinismLevel": bigquery.DeterminismLevel.DETERMINISTIC, + "remoteFunctionOptions": { + "endpoint": "https://some.endpoint", + "connection": "connection_string", + "maxBatchingRows": 50, + "userDefinedContext": { + "foo": "bar", + }, + }, } actual_routine = target_class.from_api_repr(resource) @@ -160,6 +188,10 @@ def test_from_api_repr(target_class): assert actual_routine._properties["someNewField"] == "someValue" assert actual_routine.description == "A routine description." assert actual_routine.determinism_level == "DETERMINISTIC" + assert actual_routine.remote_function_options.endpoint == "https://some.endpoint" + assert actual_routine.remote_function_options.connection == "connection_string" + assert actual_routine.remote_function_options.max_batching_rows == 50 + assert actual_routine.remote_function_options.user_defined_context == {"foo": "bar"} def test_from_api_repr_tvf_function(target_class): @@ -261,6 +293,7 @@ def test_from_api_repr_w_minimal_resource(target_class): assert actual_routine.type_ is None assert actual_routine.description is None assert actual_routine.determinism_level is None + assert actual_routine.remote_function_options is None def test_from_api_repr_w_unknown_fields(target_class): @@ -421,6 +454,24 @@ def test_from_api_repr_w_unknown_fields(target_class): ["someNewField"], {"someNewField": "someValue"}, ), + ( + { + "routineType": "SCALAR_FUNCTION", + "remoteFunctionOptions": { + "endpoint": "https://some_endpoint", + "connection": "connection_string", + "max_batching_rows": 101, + }, + }, + ["remote_function_options"], + { + "remoteFunctionOptions": { + "endpoint": "https://some_endpoint", + "connection": "connection_string", + "max_batching_rows": 101, + }, + }, + ), ], ) def test_build_resource(object_under_test, resource, filter_fields, expected): @@ -497,6 +548,12 @@ def test_set_description_w_none(object_under_test): assert object_under_test._properties["description"] is None +def test_set_remote_function_options_w_none(object_under_test): + object_under_test.remote_function_options = None + assert object_under_test.remote_function_options is None + assert object_under_test._properties["remoteFunctionOptions"] is None + + def test_repr(target_class): model = target_class("my-proj.my_dset.my_routine") actual_routine = repr(model) From 7f2d5b82f10098cced8e3f9263a0dd97b534fc2e Mon Sep 17 00:00:00 2001 From: "gcf-owl-bot[bot]" <78513119+gcf-owl-bot[bot]@users.noreply.github.com> Date: Thu, 25 May 2023 12:44:22 -0400 Subject: [PATCH 1604/2016] build(deps): bump requests from 2.28.1 to 2.31.0 in /synthtool/gcp/templates/python_library/.kokoro (#1574) Source-Link: https://github.com/googleapis/synthtool/commit/30bd01b4ab78bf1b2a425816e15b3e7e090993dd Post-Processor: gcr.io/cloud-devrel-public-resources/owlbot-python:latest@sha256:9bc5fa3b62b091f60614c08a7fb4fd1d3e1678e326f34dd66ce1eefb5dc3267b Co-authored-by: Owl Bot --- packages/google-cloud-bigquery/.github/.OwlBot.lock.yaml | 3 ++- packages/google-cloud-bigquery/.kokoro/requirements.txt | 6 +++--- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/packages/google-cloud-bigquery/.github/.OwlBot.lock.yaml b/packages/google-cloud-bigquery/.github/.OwlBot.lock.yaml index b8edda51cf46..32b3c486591a 100644 --- a/packages/google-cloud-bigquery/.github/.OwlBot.lock.yaml +++ b/packages/google-cloud-bigquery/.github/.OwlBot.lock.yaml @@ -13,4 +13,5 @@ # limitations under the License. docker: image: gcr.io/cloud-devrel-public-resources/owlbot-python:latest - digest: sha256:2e247c7bf5154df7f98cce087a20ca7605e236340c7d6d1a14447e5c06791bd6 + digest: sha256:9bc5fa3b62b091f60614c08a7fb4fd1d3e1678e326f34dd66ce1eefb5dc3267b +# created: 2023-05-25T14:56:16.294623272Z diff --git a/packages/google-cloud-bigquery/.kokoro/requirements.txt b/packages/google-cloud-bigquery/.kokoro/requirements.txt index 66a2172a76a8..3b8d7ee81848 100644 --- a/packages/google-cloud-bigquery/.kokoro/requirements.txt +++ b/packages/google-cloud-bigquery/.kokoro/requirements.txt @@ -419,9 +419,9 @@ readme-renderer==37.3 \ --hash=sha256:cd653186dfc73055656f090f227f5cb22a046d7f71a841dfa305f55c9a513273 \ --hash=sha256:f67a16caedfa71eef48a31b39708637a6f4664c4394801a7b0d6432d13907343 # via twine -requests==2.28.1 \ - --hash=sha256:7c5599b102feddaa661c826c56ab4fee28bfd17f5abca1ebbe3e7f19d7c97983 \ - --hash=sha256:8fefa2a1a1365bf5520aac41836fbee479da67864514bdb821f31ce07ce65349 +requests==2.31.0 \ + --hash=sha256:58cd2187c01e70e6e26505bca751777aa9f2ee0b7f4300988b709f44e013003f \ + --hash=sha256:942c5a758f98d790eaed1a29cb6eefc7ffb0d1cf7af05c3d2791656dbd6ad1e1 # via # gcp-releasetool # google-api-core From afc92d587263de742d1ca453aa28d0b987dc3a44 Mon Sep 17 00:00:00 2001 From: Mend Renovate Date: Fri, 26 May 2023 00:12:04 +0200 Subject: [PATCH 1605/2016] chore(deps): update dependency requests to v2.31.0 [security] (#1573) --- .../google-cloud-bigquery/samples/geography/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/samples/geography/requirements.txt b/packages/google-cloud-bigquery/samples/geography/requirements.txt index df992f2bfe07..d5c384e07e63 100644 --- a/packages/google-cloud-bigquery/samples/geography/requirements.txt +++ b/packages/google-cloud-bigquery/samples/geography/requirements.txt @@ -36,7 +36,7 @@ pyparsing==3.0.9 python-dateutil==2.8.2 pytz==2023.3 PyYAML==6.0 -requests==2.28.2 +requests==2.31.0 rsa==4.9 Shapely==2.0.1 six==1.16.0 From a44cd6190a4318e86d08f9bc06a50be39a18a6be Mon Sep 17 00:00:00 2001 From: Mend Renovate Date: Thu, 1 Jun 2023 13:24:58 +0200 Subject: [PATCH 1606/2016] chore(deps): update all dependencies (#1560) * chore(deps): update all dependencies * revert urllib3 --------- Co-authored-by: meredithslota Co-authored-by: Anthonios Partheniou --- .../samples/geography/requirements.txt | 24 +++++++++---------- .../samples/magics/requirements.txt | 10 ++++---- .../samples/snippets/requirements.txt | 10 ++++---- 3 files changed, 22 insertions(+), 22 deletions(-) diff --git a/packages/google-cloud-bigquery/samples/geography/requirements.txt b/packages/google-cloud-bigquery/samples/geography/requirements.txt index d5c384e07e63..b6695909cc7f 100644 --- a/packages/google-cloud-bigquery/samples/geography/requirements.txt +++ b/packages/google-cloud-bigquery/samples/geography/requirements.txt @@ -1,5 +1,5 @@ attrs==23.1.0 -certifi==2022.12.7 +certifi==2023.5.7 cffi==1.15.1 charset-normalizer==3.1.0 click==8.1.3 @@ -7,28 +7,28 @@ click-plugins==1.1.1 cligj==0.7.2 dataclasses==0.8; python_version < '3.7' db-dtypes==1.1.1 -Fiona==1.9.3 +Fiona==1.9.4.post1 geojson==3.0.1 geopandas===0.10.2; python_version == '3.7' -geopandas==0.12.2; python_version >= '3.8' +geopandas==0.13.0; python_version >= '3.8' google-api-core==2.11.0 -google-auth==2.17.3 +google-auth==2.18.1 google-cloud-bigquery==3.10.0 google-cloud-bigquery-storage==2.19.1 google-cloud-core==2.3.2 google-crc32c==1.5.0 -google-resumable-media==2.4.1 +google-resumable-media==2.5.0 googleapis-common-protos==1.59.0 -grpcio==1.54.0 +grpcio==1.54.2 idna==3.4 -libcst==0.4.9 -munch==2.5.0 +libcst==1.0.0 +munch==3.0.0 mypy-extensions==1.0.0 packaging==23.1 pandas===1.3.5; python_version == '3.7' -pandas==2.0.0; python_version >= '3.8' +pandas==2.0.1; python_version >= '3.8' proto-plus==1.22.2 -pyarrow==11.0.0 +pyarrow==12.0.0 pyasn1==0.5.0 pyasn1-modules==0.3.0 pycparser==2.21 @@ -40,6 +40,6 @@ requests==2.31.0 rsa==4.9 Shapely==2.0.1 six==1.16.0 -typing-extensions==4.5.0 -typing-inspect==0.8.0 +typing-extensions==4.6.2 +typing-inspect==0.9.0 urllib3==1.26.15 diff --git a/packages/google-cloud-bigquery/samples/magics/requirements.txt b/packages/google-cloud-bigquery/samples/magics/requirements.txt index 956b03dda131..b50144baad20 100644 --- a/packages/google-cloud-bigquery/samples/magics/requirements.txt +++ b/packages/google-cloud-bigquery/samples/magics/requirements.txt @@ -1,15 +1,15 @@ db-dtypes==1.1.1 google-cloud-bigquery-storage==2.19.1 google-auth-oauthlib==1.0.0 -grpcio==1.54.0 +grpcio==1.54.2 ipywidgets==8.0.6 ipython===7.31.1; python_version == '3.7' ipython===8.0.1; python_version == '3.8' -ipython==8.12.0; python_version >= '3.9' +ipython==8.13.2; python_version >= '3.9' matplotlib===3.5.3; python_version == '3.7' matplotlib==3.7.1; python_version >= '3.8' pandas===1.3.5; python_version == '3.7' -pandas==2.0.0; python_version >= '3.8' -pyarrow==11.0.0 +pandas==2.0.1; python_version >= '3.8' +pyarrow==12.0.0 pytz==2023.3 -typing-extensions==4.5.0 +typing-extensions==4.6.2 diff --git a/packages/google-cloud-bigquery/samples/snippets/requirements.txt b/packages/google-cloud-bigquery/samples/snippets/requirements.txt index aa1015481a07..726bd2046743 100644 --- a/packages/google-cloud-bigquery/samples/snippets/requirements.txt +++ b/packages/google-cloud-bigquery/samples/snippets/requirements.txt @@ -2,15 +2,15 @@ db-dtypes==1.1.1 google-cloud-bigquery==3.10.0 google-cloud-bigquery-storage==2.19.1 google-auth-oauthlib==1.0.0 -grpcio==1.54.0 +grpcio==1.54.2 ipywidgets==8.0.6 ipython===7.31.1; python_version == '3.7' ipython===8.0.1; python_version == '3.8' -ipython==8.12.0; python_version >= '3.9' +ipython==8.13.2; python_version >= '3.9' matplotlib===3.5.3; python_version == '3.7' matplotlib==3.7.1; python_version >= '3.8' pandas===1.3.5; python_version == '3.7' -pandas==2.0.0; python_version >= '3.8' -pyarrow==11.0.0 +pandas==2.0.1; python_version >= '3.8' +pyarrow==12.0.0 pytz==2023.3 -typing-extensions==4.5.0 +typing-extensions==4.6.2 From 7eea5b4f163a9e8cd9d36f296a96dca36bf588fc Mon Sep 17 00:00:00 2001 From: Mend Renovate Date: Thu, 1 Jun 2023 13:58:41 +0200 Subject: [PATCH 1607/2016] chore(deps): update all dependencies (#1576) * chore(deps): update all dependencies * revert urllib3 --------- Co-authored-by: Anthonios Partheniou --- .../samples/geography/requirements.txt | 6 +++--- .../google-cloud-bigquery/samples/magics/requirements.txt | 4 ++-- .../google-cloud-bigquery/samples/snippets/requirements.txt | 4 ++-- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/packages/google-cloud-bigquery/samples/geography/requirements.txt b/packages/google-cloud-bigquery/samples/geography/requirements.txt index b6695909cc7f..82a1daadcc41 100644 --- a/packages/google-cloud-bigquery/samples/geography/requirements.txt +++ b/packages/google-cloud-bigquery/samples/geography/requirements.txt @@ -12,9 +12,9 @@ geojson==3.0.1 geopandas===0.10.2; python_version == '3.7' geopandas==0.13.0; python_version >= '3.8' google-api-core==2.11.0 -google-auth==2.18.1 +google-auth==2.19.0 google-cloud-bigquery==3.10.0 -google-cloud-bigquery-storage==2.19.1 +google-cloud-bigquery-storage==2.20.0 google-cloud-core==2.3.2 google-crc32c==1.5.0 google-resumable-media==2.5.0 @@ -26,7 +26,7 @@ munch==3.0.0 mypy-extensions==1.0.0 packaging==23.1 pandas===1.3.5; python_version == '3.7' -pandas==2.0.1; python_version >= '3.8' +pandas==2.0.2; python_version >= '3.8' proto-plus==1.22.2 pyarrow==12.0.0 pyasn1==0.5.0 diff --git a/packages/google-cloud-bigquery/samples/magics/requirements.txt b/packages/google-cloud-bigquery/samples/magics/requirements.txt index b50144baad20..b545916c3c22 100644 --- a/packages/google-cloud-bigquery/samples/magics/requirements.txt +++ b/packages/google-cloud-bigquery/samples/magics/requirements.txt @@ -1,5 +1,5 @@ db-dtypes==1.1.1 -google-cloud-bigquery-storage==2.19.1 +google-cloud-bigquery-storage==2.20.0 google-auth-oauthlib==1.0.0 grpcio==1.54.2 ipywidgets==8.0.6 @@ -9,7 +9,7 @@ ipython==8.13.2; python_version >= '3.9' matplotlib===3.5.3; python_version == '3.7' matplotlib==3.7.1; python_version >= '3.8' pandas===1.3.5; python_version == '3.7' -pandas==2.0.1; python_version >= '3.8' +pandas==2.0.2; python_version >= '3.8' pyarrow==12.0.0 pytz==2023.3 typing-extensions==4.6.2 diff --git a/packages/google-cloud-bigquery/samples/snippets/requirements.txt b/packages/google-cloud-bigquery/samples/snippets/requirements.txt index 726bd2046743..d2878d20284d 100644 --- a/packages/google-cloud-bigquery/samples/snippets/requirements.txt +++ b/packages/google-cloud-bigquery/samples/snippets/requirements.txt @@ -1,6 +1,6 @@ db-dtypes==1.1.1 google-cloud-bigquery==3.10.0 -google-cloud-bigquery-storage==2.19.1 +google-cloud-bigquery-storage==2.20.0 google-auth-oauthlib==1.0.0 grpcio==1.54.2 ipywidgets==8.0.6 @@ -10,7 +10,7 @@ ipython==8.13.2; python_version >= '3.9' matplotlib===3.5.3; python_version == '3.7' matplotlib==3.7.1; python_version >= '3.8' pandas===1.3.5; python_version == '3.7' -pandas==2.0.1; python_version >= '3.8' +pandas==2.0.2; python_version >= '3.8' pyarrow==12.0.0 pytz==2023.3 typing-extensions==4.6.2 From 523fb3d83e2a44597b4131a5008dde94a8991a31 Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Thu, 1 Jun 2023 11:10:15 -0500 Subject: [PATCH 1608/2016] fix: raise most recent exception when not able to fetch query job after starting the job (#1362) * fix: raise most recent exception when not able to fetch query job after starting the job Towards internal issue 247809965 * update unit test * revert most changes to the test and explain why we're looking for a different exception from the original 'conflict' --- .../google/cloud/bigquery/_job_helpers.py | 2 +- packages/google-cloud-bigquery/tests/unit/test_client.py | 8 +++++--- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/_job_helpers.py b/packages/google-cloud-bigquery/google/cloud/bigquery/_job_helpers.py index 33fc722611f0..57846b1909d1 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/_job_helpers.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/_job_helpers.py @@ -105,7 +105,7 @@ def do_query(): timeout=timeout, ) except core_exceptions.GoogleAPIError: # (includes RetryError) - raise create_exc + raise else: return query_job else: diff --git a/packages/google-cloud-bigquery/tests/unit/test_client.py b/packages/google-cloud-bigquery/tests/unit/test_client.py index c155e2bc65dc..cf0aa4028d8f 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_client.py +++ b/packages/google-cloud-bigquery/tests/unit/test_client.py @@ -5092,12 +5092,14 @@ def test_query_job_rpc_fail_w_conflict_random_id_job_fetch_fails(self): QueryJob, "_begin", side_effect=job_create_error ) get_job_patcher = mock.patch.object( - client, "get_job", side_effect=DataLoss("we lost yor job, sorry") + client, "get_job", side_effect=DataLoss("we lost your job, sorry") ) with job_begin_patcher, get_job_patcher: - # If get job request fails, the original exception should be raised. - with pytest.raises(Conflict, match="Job already exists."): + # If get job request fails but supposedly there does exist a job + # with this ID already, raise the exception explaining why we + # couldn't recover the job. + with pytest.raises(DataLoss, match="we lost your job, sorry"): client.query("SELECT 1;", job_id=None) def test_query_job_rpc_fail_w_conflict_random_id_job_fetch_succeeds(self): From 128accd6ac70b5f93f788dd8c4eb8cf351881d55 Mon Sep 17 00:00:00 2001 From: "release-please[bot]" <55107282+release-please[bot]@users.noreply.github.com> Date: Thu, 1 Jun 2023 09:41:36 -0700 Subject: [PATCH 1609/2016] chore(main): release 3.11.0 (#1568) Co-authored-by: release-please[bot] <55107282+release-please[bot]@users.noreply.github.com> --- packages/google-cloud-bigquery/CHANGELOG.md | 14 ++++++++++++++ .../google/cloud/bigquery/version.py | 2 +- 2 files changed, 15 insertions(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/CHANGELOG.md b/packages/google-cloud-bigquery/CHANGELOG.md index 034f4f32481e..bc9cfd7b4d75 100644 --- a/packages/google-cloud-bigquery/CHANGELOG.md +++ b/packages/google-cloud-bigquery/CHANGELOG.md @@ -5,6 +5,20 @@ [1]: https://pypi.org/project/google-cloud-bigquery/#history +## [3.11.0](https://github.com/googleapis/python-bigquery/compare/v3.10.0...v3.11.0) (2023-06-01) + + +### Features + +* Add remote function options to routines ([#1558](https://github.com/googleapis/python-bigquery/issues/1558)) ([84ad11d](https://github.com/googleapis/python-bigquery/commit/84ad11d00d99d279e4e6e0fa4ca60e59575b1dad)) + + +### Bug Fixes + +* Filter None values from OpenTelemetry attributes ([#1567](https://github.com/googleapis/python-bigquery/issues/1567)) ([9ea2e21](https://github.com/googleapis/python-bigquery/commit/9ea2e21c35783782993d1ad2d3b910bbe9981ce2)) +* Handle case when expirationMs is None ([#1553](https://github.com/googleapis/python-bigquery/issues/1553)) ([fa6e13d](https://github.com/googleapis/python-bigquery/commit/fa6e13d5006caadb36899b4e2a24ca82b7f11b17)) +* Raise most recent exception when not able to fetch query job after starting the job ([#1362](https://github.com/googleapis/python-bigquery/issues/1362)) ([09cc1df](https://github.com/googleapis/python-bigquery/commit/09cc1df6babaf90ea0b0a6fd926f8013822a31ed)) + ## [3.10.0](https://github.com/googleapis/python-bigquery/compare/v3.9.0...v3.10.0) (2023-04-18) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/version.py b/packages/google-cloud-bigquery/google/cloud/bigquery/version.py index b674396b200e..0e93e961e552 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/version.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/version.py @@ -12,4 +12,4 @@ # See the License for the specific language governing permissions and # limitations under the License. -__version__ = "3.10.0" +__version__ = "3.11.0" From 2e69840891a1ce210844f1405cbbb29016d99dd5 Mon Sep 17 00:00:00 2001 From: "gcf-owl-bot[bot]" <78513119+gcf-owl-bot[bot]@users.noreply.github.com> Date: Sat, 3 Jun 2023 19:22:05 -0400 Subject: [PATCH 1610/2016] build(deps): bump cryptography from 39.0.1 to 41.0.0 in /synthtool/gcp/templates/python_library/.kokoro (#1579) Source-Link: https://github.com/googleapis/synthtool/commit/d0f51a0c2a9a6bcca86911eabea9e484baadf64b Post-Processor: gcr.io/cloud-devrel-public-resources/owlbot-python:latest@sha256:240b5bcc2bafd450912d2da2be15e62bc6de2cf839823ae4bf94d4f392b451dc Co-authored-by: Owl Bot --- .../.github/.OwlBot.lock.yaml | 4 +- .../.kokoro/requirements.txt | 42 +++++++++---------- 2 files changed, 22 insertions(+), 24 deletions(-) diff --git a/packages/google-cloud-bigquery/.github/.OwlBot.lock.yaml b/packages/google-cloud-bigquery/.github/.OwlBot.lock.yaml index 32b3c486591a..02a4dedced74 100644 --- a/packages/google-cloud-bigquery/.github/.OwlBot.lock.yaml +++ b/packages/google-cloud-bigquery/.github/.OwlBot.lock.yaml @@ -13,5 +13,5 @@ # limitations under the License. docker: image: gcr.io/cloud-devrel-public-resources/owlbot-python:latest - digest: sha256:9bc5fa3b62b091f60614c08a7fb4fd1d3e1678e326f34dd66ce1eefb5dc3267b -# created: 2023-05-25T14:56:16.294623272Z + digest: sha256:240b5bcc2bafd450912d2da2be15e62bc6de2cf839823ae4bf94d4f392b451dc +# created: 2023-06-03T21:25:37.968717478Z diff --git a/packages/google-cloud-bigquery/.kokoro/requirements.txt b/packages/google-cloud-bigquery/.kokoro/requirements.txt index 3b8d7ee81848..c7929db6d152 100644 --- a/packages/google-cloud-bigquery/.kokoro/requirements.txt +++ b/packages/google-cloud-bigquery/.kokoro/requirements.txt @@ -113,28 +113,26 @@ commonmark==0.9.1 \ --hash=sha256:452f9dc859be7f06631ddcb328b6919c67984aca654e5fefb3914d54691aed60 \ --hash=sha256:da2f38c92590f83de410ba1a3cbceafbc74fee9def35f9251ba9a971d6d66fd9 # via rich -cryptography==39.0.1 \ - --hash=sha256:0f8da300b5c8af9f98111ffd512910bc792b4c77392a9523624680f7956a99d4 \ - --hash=sha256:35f7c7d015d474f4011e859e93e789c87d21f6f4880ebdc29896a60403328f1f \ - --hash=sha256:5aa67414fcdfa22cf052e640cb5ddc461924a045cacf325cd164e65312d99502 \ - --hash=sha256:5d2d8b87a490bfcd407ed9d49093793d0f75198a35e6eb1a923ce1ee86c62b41 \ - --hash=sha256:6687ef6d0a6497e2b58e7c5b852b53f62142cfa7cd1555795758934da363a965 \ - --hash=sha256:6f8ba7f0328b79f08bdacc3e4e66fb4d7aab0c3584e0bd41328dce5262e26b2e \ - --hash=sha256:706843b48f9a3f9b9911979761c91541e3d90db1ca905fd63fee540a217698bc \ - --hash=sha256:807ce09d4434881ca3a7594733669bd834f5b2c6d5c7e36f8c00f691887042ad \ - --hash=sha256:83e17b26de248c33f3acffb922748151d71827d6021d98c70e6c1a25ddd78505 \ - --hash=sha256:96f1157a7c08b5b189b16b47bc9db2332269d6680a196341bf30046330d15388 \ - --hash=sha256:aec5a6c9864be7df2240c382740fcf3b96928c46604eaa7f3091f58b878c0bb6 \ - --hash=sha256:b0afd054cd42f3d213bf82c629efb1ee5f22eba35bf0eec88ea9ea7304f511a2 \ - --hash=sha256:ced4e447ae29ca194449a3f1ce132ded8fcab06971ef5f618605aacaa612beac \ - --hash=sha256:d1f6198ee6d9148405e49887803907fe8962a23e6c6f83ea7d98f1c0de375695 \ - --hash=sha256:e124352fd3db36a9d4a21c1aa27fd5d051e621845cb87fb851c08f4f75ce8be6 \ - --hash=sha256:e422abdec8b5fa8462aa016786680720d78bdce7a30c652b7fadf83a4ba35336 \ - --hash=sha256:ef8b72fa70b348724ff1218267e7f7375b8de4e8194d1636ee60510aae104cd0 \ - --hash=sha256:f0c64d1bd842ca2633e74a1a28033d139368ad959872533b1bab8c80e8240a0c \ - --hash=sha256:f24077a3b5298a5a06a8e0536e3ea9ec60e4c7ac486755e5fb6e6ea9b3500106 \ - --hash=sha256:fdd188c8a6ef8769f148f88f859884507b954cc64db6b52f66ef199bb9ad660a \ - --hash=sha256:fe913f20024eb2cb2f323e42a64bdf2911bb9738a15dba7d3cce48151034e3a8 +cryptography==41.0.0 \ + --hash=sha256:0ddaee209d1cf1f180f1efa338a68c4621154de0afaef92b89486f5f96047c55 \ + --hash=sha256:14754bcdae909d66ff24b7b5f166d69340ccc6cb15731670435efd5719294895 \ + --hash=sha256:344c6de9f8bda3c425b3a41b319522ba3208551b70c2ae00099c205f0d9fd3be \ + --hash=sha256:34d405ea69a8b34566ba3dfb0521379b210ea5d560fafedf9f800a9a94a41928 \ + --hash=sha256:3680248309d340fda9611498a5319b0193a8dbdb73586a1acf8109d06f25b92d \ + --hash=sha256:3c5ef25d060c80d6d9f7f9892e1d41bb1c79b78ce74805b8cb4aa373cb7d5ec8 \ + --hash=sha256:4ab14d567f7bbe7f1cdff1c53d5324ed4d3fc8bd17c481b395db224fb405c237 \ + --hash=sha256:5c1f7293c31ebc72163a9a0df246f890d65f66b4a40d9ec80081969ba8c78cc9 \ + --hash=sha256:6b71f64beeea341c9b4f963b48ee3b62d62d57ba93eb120e1196b31dc1025e78 \ + --hash=sha256:7d92f0248d38faa411d17f4107fc0bce0c42cae0b0ba5415505df72d751bf62d \ + --hash=sha256:8362565b3835ceacf4dc8f3b56471a2289cf51ac80946f9087e66dc283a810e0 \ + --hash=sha256:84a165379cb9d411d58ed739e4af3396e544eac190805a54ba2e0322feb55c46 \ + --hash=sha256:88ff107f211ea696455ea8d911389f6d2b276aabf3231bf72c8853d22db755c5 \ + --hash=sha256:9f65e842cb02550fac96536edb1d17f24c0a338fd84eaf582be25926e993dde4 \ + --hash=sha256:a4fc68d1c5b951cfb72dfd54702afdbbf0fb7acdc9b7dc4301bbf2225a27714d \ + --hash=sha256:b7f2f5c525a642cecad24ee8670443ba27ac1fab81bba4cc24c7b6b41f2d0c75 \ + --hash=sha256:b846d59a8d5a9ba87e2c3d757ca019fa576793e8758174d3868aecb88d6fc8eb \ + --hash=sha256:bf8fc66012ca857d62f6a347007e166ed59c0bc150cefa49f28376ebe7d992a2 \ + --hash=sha256:f5d0bf9b252f30a31664b6f64432b4730bb7038339bd18b1fafe129cfc2be9be # via # gcp-releasetool # secretstorage From 98445f29d5df723cf647b07404b1e532a710afdc Mon Sep 17 00:00:00 2001 From: "Leah E. Cole" <6719667+leahecole@users.noreply.github.com> Date: Fri, 9 Jun 2023 18:01:56 +0300 Subject: [PATCH 1611/2016] docs: add/reformat return types for cloud RAD docs (#1582) * docs: add/reformat return types for cloud RAD docs * fix typos --- .../google/cloud/bigquery/client.py | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py index d8fbfb69eb1e..d81816d41a0f 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py @@ -327,7 +327,8 @@ def get_service_account_email( before using ``retry``. Returns: - str: service account email address + str: + service account email address Example: @@ -1932,7 +1933,8 @@ def job_from_resource( resource (Dict): one job resource from API response Returns: - The job instance, constructed via the resource. + Union[job.CopyJob, job.ExtractJob, job.LoadJob, job.QueryJob, job.UnknownJob]: + The job instance, constructed via the resource. """ config = resource.get("configuration", {}) if "load" in config: @@ -2064,7 +2066,8 @@ def get_job( before using ``retry``. Returns: - Job instance, based on the resource returned by the API. + Union[job.LoadJob, job.CopyJob, job.ExtractJob, job.QueryJob, job.UnknownJob]: + Job instance, based on the resource returned by the API. """ extra_params = {"projection": "full"} @@ -3954,12 +3957,13 @@ def _schema_to_json_file_object(self, schema_list, file_obj): """ json.dump(schema_list, file_obj, indent=2, sort_keys=True) - def schema_from_json(self, file_or_path: "PathType"): + def schema_from_json(self, file_or_path: "PathType") -> List[SchemaField]: """Takes a file object or file path that contains json that describes a table schema. Returns: - List of schema field objects. + List[SchemaField]: + List of :class:`~google.cloud.bigquery.schema.SchemaField` objects. """ if isinstance(file_or_path, io.IOBase): return self._schema_from_json_file_object(file_or_path) From 3834dd41901d0d4a36a4dcfa7af16db7205133f3 Mon Sep 17 00:00:00 2001 From: "release-please[bot]" <55107282+release-please[bot]@users.noreply.github.com> Date: Tue, 13 Jun 2023 11:11:38 -0400 Subject: [PATCH 1612/2016] chore(main): release 3.11.1 (#1583) Co-authored-by: release-please[bot] <55107282+release-please[bot]@users.noreply.github.com> --- packages/google-cloud-bigquery/CHANGELOG.md | 7 +++++++ .../google-cloud-bigquery/google/cloud/bigquery/version.py | 2 +- 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/CHANGELOG.md b/packages/google-cloud-bigquery/CHANGELOG.md index bc9cfd7b4d75..9003d2bfcab7 100644 --- a/packages/google-cloud-bigquery/CHANGELOG.md +++ b/packages/google-cloud-bigquery/CHANGELOG.md @@ -5,6 +5,13 @@ [1]: https://pypi.org/project/google-cloud-bigquery/#history +## [3.11.1](https://github.com/googleapis/python-bigquery/compare/v3.11.0...v3.11.1) (2023-06-09) + + +### Documentation + +* Add/reformat return types for cloud RAD docs ([#1582](https://github.com/googleapis/python-bigquery/issues/1582)) ([6efdce1](https://github.com/googleapis/python-bigquery/commit/6efdce13cc3b25d37d22a856f2308daed569e637)) + ## [3.11.0](https://github.com/googleapis/python-bigquery/compare/v3.10.0...v3.11.0) (2023-06-01) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/version.py b/packages/google-cloud-bigquery/google/cloud/bigquery/version.py index 0e93e961e552..90c53a0dd6bf 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/version.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/version.py @@ -12,4 +12,4 @@ # See the License for the specific language governing permissions and # limitations under the License. -__version__ = "3.11.0" +__version__ = "3.11.1" From 197f92597360c30d494f3879f03a12a3aea539e7 Mon Sep 17 00:00:00 2001 From: Chalmer Lowe Date: Wed, 21 Jun 2023 10:56:12 -0400 Subject: [PATCH 1613/2016] fix: updates tests based on revised hacker_news tables (#1591) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This fixes four broken tests that failed due to an unexpected change in the Google Public Dataset: Hacker News. The `comments` table was deleted and only the `full` table remained. This edit updates the name of the table in four tests and updates the names of columns in the table as well as updates the expected results for one of the tests. Fixes #1590 🦕 --- .../tests/system/test_client.py | 29 ++++++++++--------- .../tests/system/test_pandas.py | 20 ++++++------- 2 files changed, 25 insertions(+), 24 deletions(-) diff --git a/packages/google-cloud-bigquery/tests/system/test_client.py b/packages/google-cloud-bigquery/tests/system/test_client.py index 1437328a88ce..f4757e30fa1f 100644 --- a/packages/google-cloud-bigquery/tests/system/test_client.py +++ b/packages/google-cloud-bigquery/tests/system/test_client.py @@ -1706,8 +1706,8 @@ def test_dbapi_fetch_w_bqstorage_client_large_result_set(self): cursor.execute( """ - SELECT id, `by`, time_ts - FROM `bigquery-public-data.hacker_news.comments` + SELECT id, `by`, timestamp + FROM `bigquery-public-data.hacker_news.full` ORDER BY `id` ASC LIMIT 100000 """ @@ -1717,27 +1717,28 @@ def test_dbapi_fetch_w_bqstorage_client_large_result_set(self): field_name = operator.itemgetter(0) fetched_data = [sorted(row.items(), key=field_name) for row in result_rows] - # Since DB API is not thread safe, only a single result stream should be # requested by the BQ storage client, meaning that results should arrive # in the sorted order. + expected_data = [ [ - ("by", "sama"), - ("id", 15), - ("time_ts", datetime.datetime(2006, 10, 9, 19, 51, 1, tzinfo=UTC)), + ("by", "pg"), + ("id", 1), + ("timestamp", datetime.datetime(2006, 10, 9, 18, 21, 51, tzinfo=UTC)), ], [ - ("by", "pg"), - ("id", 17), - ("time_ts", datetime.datetime(2006, 10, 9, 19, 52, 45, tzinfo=UTC)), + ("by", "phyllis"), + ("id", 2), + ("timestamp", datetime.datetime(2006, 10, 9, 18, 30, 28, tzinfo=UTC)), ], [ - ("by", "pg"), - ("id", 22), - ("time_ts", datetime.datetime(2006, 10, 10, 2, 18, 22, tzinfo=UTC)), + ("by", "phyllis"), + ("id", 3), + ("timestamp", datetime.datetime(2006, 10, 9, 18, 40, 33, tzinfo=UTC)), ], ] + self.assertEqual(fetched_data, expected_data) def test_dbapi_dry_run_query(self): @@ -1769,8 +1770,8 @@ def test_dbapi_connection_does_not_leak_sockets(self): cursor.execute( """ - SELECT id, `by`, time_ts - FROM `bigquery-public-data.hacker_news.comments` + SELECT id, `by`, timestamp + FROM `bigquery-public-data.hacker_news.full` ORDER BY `id` ASC LIMIT 100000 """ diff --git a/packages/google-cloud-bigquery/tests/system/test_pandas.py b/packages/google-cloud-bigquery/tests/system/test_pandas.py index ea8cc6d636fe..726b68f7cf51 100644 --- a/packages/google-cloud-bigquery/tests/system/test_pandas.py +++ b/packages/google-cloud-bigquery/tests/system/test_pandas.py @@ -740,8 +740,8 @@ def test_load_table_from_dataframe_w_explicit_schema_source_format_csv_floats( def test_query_results_to_dataframe(bigquery_client): QUERY = """ - SELECT id, author, time_ts, dead - FROM `bigquery-public-data.hacker_news.comments` + SELECT id, `by`, timestamp, dead + FROM `bigquery-public-data.hacker_news.full` LIMIT 10 """ @@ -749,12 +749,12 @@ def test_query_results_to_dataframe(bigquery_client): assert isinstance(df, pandas.DataFrame) assert len(df) == 10 # verify the number of rows - column_names = ["id", "author", "time_ts", "dead"] + column_names = ["id", "by", "timestamp", "dead"] assert list(df) == column_names # verify the column names exp_datatypes = { "id": int, - "author": str, - "time_ts": pandas.Timestamp, + "by": str, + "timestamp": pandas.Timestamp, "dead": bool, } for _, row in df.iterrows(): @@ -766,8 +766,8 @@ def test_query_results_to_dataframe(bigquery_client): def test_query_results_to_dataframe_w_bqstorage(bigquery_client): query = """ - SELECT id, author, time_ts, dead - FROM `bigquery-public-data.hacker_news.comments` + SELECT id, `by`, timestamp, dead + FROM `bigquery-public-data.hacker_news.full` LIMIT 10 """ @@ -779,12 +779,12 @@ def test_query_results_to_dataframe_w_bqstorage(bigquery_client): assert isinstance(df, pandas.DataFrame) assert len(df) == 10 # verify the number of rows - column_names = ["id", "author", "time_ts", "dead"] + column_names = ["id", "by", "timestamp", "dead"] assert list(df) == column_names exp_datatypes = { "id": int, - "author": str, - "time_ts": pandas.Timestamp, + "by": str, + "timestamp": pandas.Timestamp, "dead": bool, } for index, row in df.iterrows(): From 4b7a1f8172e2c378dd7024cb840574659a91c19a Mon Sep 17 00:00:00 2001 From: Chalmer Lowe Date: Wed, 21 Jun 2023 14:40:13 -0400 Subject: [PATCH 1614/2016] test: adjusts test input body based on changes to google-api_core (#1588) Adjusts the body of a specific test to adapt to some changes that come through from google-api-core. --- packages/google-cloud-bigquery/tests/unit/job/test_base.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/tests/unit/job/test_base.py b/packages/google-cloud-bigquery/tests/unit/job/test_base.py index a9760aa9b8ff..a662e92d4e2b 100644 --- a/packages/google-cloud-bigquery/tests/unit/job/test_base.py +++ b/packages/google-cloud-bigquery/tests/unit/job/test_base.py @@ -18,6 +18,7 @@ from google.api_core import exceptions import google.api_core.retry +from google.api_core.future import polling import mock import pytest @@ -970,7 +971,7 @@ def test_result_default_wo_state(self): client = _make_client(project=self.PROJECT, connection=conn) job = self._make_one(self.JOB_ID, client) - self.assertIs(job.result(), job) + self.assertIs(job.result(retry=polling.DEFAULT_RETRY), job) begin_call = mock.call( method="POST", From 434e27a16faeef9d0c536696d4afe7bf5639abff Mon Sep 17 00:00:00 2001 From: "release-please[bot]" <55107282+release-please[bot]@users.noreply.github.com> Date: Thu, 22 Jun 2023 08:19:35 -0400 Subject: [PATCH 1615/2016] chore(main): release 3.11.2 (#1592) Co-authored-by: release-please[bot] <55107282+release-please[bot]@users.noreply.github.com> Co-authored-by: Chalmer Lowe --- packages/google-cloud-bigquery/CHANGELOG.md | 7 +++++++ .../google-cloud-bigquery/google/cloud/bigquery/version.py | 2 +- 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/CHANGELOG.md b/packages/google-cloud-bigquery/CHANGELOG.md index 9003d2bfcab7..5cf542670324 100644 --- a/packages/google-cloud-bigquery/CHANGELOG.md +++ b/packages/google-cloud-bigquery/CHANGELOG.md @@ -5,6 +5,13 @@ [1]: https://pypi.org/project/google-cloud-bigquery/#history +## [3.11.2](https://github.com/googleapis/python-bigquery/compare/v3.11.1...v3.11.2) (2023-06-21) + + +### Bug Fixes + +* Updates tests based on revised hacker_news tables ([#1591](https://github.com/googleapis/python-bigquery/issues/1591)) ([d73cf49](https://github.com/googleapis/python-bigquery/commit/d73cf495b8dfa032a43dc1d58599d0691aaa0efb)) + ## [3.11.1](https://github.com/googleapis/python-bigquery/compare/v3.11.0...v3.11.1) (2023-06-09) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/version.py b/packages/google-cloud-bigquery/google/cloud/bigquery/version.py index 90c53a0dd6bf..ced5a95a7e32 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/version.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/version.py @@ -12,4 +12,4 @@ # See the License for the specific language governing permissions and # limitations under the License. -__version__ = "3.11.1" +__version__ = "3.11.2" From c63f6eb011be1f7dfd98d5d8b8bc72cf5fa87007 Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Thu, 22 Jun 2023 10:30:13 -0500 Subject: [PATCH 1616/2016] fix: type annotations include Optional when None is accepted (#1554) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Thank you for opening a Pull Request! Before submitting your PR, there are a few things you can do to make sure it goes smoothly: - [ ] Make sure to open an issue as a [bug/issue](https://togithub.com/googleapis/python-bigquery/issues/new/choose) before writing your code! That way we can discuss the change, evaluate designs, and agree on the general idea - [ ] Ensure the tests and linter pass - [ ] Code coverage does not decrease (if any source code was changed) - [ ] Appropriate docs were updated (if necessary) Fixes #1545 🦕 --- .../google/cloud/bigquery/_job_helpers.py | 4 +- .../google/cloud/bigquery/client.py | 108 +++++++++--------- .../google/cloud/bigquery/dataset.py | 2 +- .../google/cloud/bigquery/job/base.py | 21 +++- .../google/cloud/bigquery/job/query.py | 10 +- .../google/cloud/bigquery/routine/routine.py | 2 +- .../google/cloud/bigquery/schema.py | 4 +- .../google/cloud/bigquery/table.py | 8 +- 8 files changed, 86 insertions(+), 73 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/_job_helpers.py b/packages/google-cloud-bigquery/google/cloud/bigquery/_job_helpers.py index 57846b1909d1..09daaa2a23c9 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/_job_helpers.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/_job_helpers.py @@ -64,7 +64,7 @@ def query_jobs_insert( job_config: Optional[job.QueryJobConfig], job_id: Optional[str], job_id_prefix: Optional[str], - location: str, + location: Optional[str], project: str, retry: retries.Retry, timeout: Optional[float], @@ -215,7 +215,7 @@ def query_jobs_query( client: "Client", query: str, job_config: Optional[job.QueryJobConfig], - location: str, + location: Optional[str], project: str, retry: retries.Retry, timeout: Optional[float], diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py index d81816d41a0f..5a929fea4c3d 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py @@ -307,7 +307,7 @@ def close(self): def get_service_account_email( self, - project: str = None, + project: Optional[str] = None, retry: retries.Retry = DEFAULT_RETRY, timeout: TimeoutType = DEFAULT_TIMEOUT, ) -> str: @@ -355,7 +355,7 @@ def get_service_account_email( def list_projects( self, max_results: Optional[int] = None, - page_token: str = None, + page_token: Optional[str] = None, retry: retries.Retry = DEFAULT_RETRY, timeout: TimeoutType = DEFAULT_TIMEOUT, page_size: Optional[int] = None, @@ -417,11 +417,11 @@ def api_request(*args, **kwargs): def list_datasets( self, - project: str = None, + project: Optional[str] = None, include_all: bool = False, - filter: str = None, + filter: Optional[str] = None, max_results: Optional[int] = None, - page_token: str = None, + page_token: Optional[str] = None, retry: retries.Retry = DEFAULT_RETRY, timeout: TimeoutType = DEFAULT_TIMEOUT, page_size: Optional[int] = None, @@ -498,7 +498,9 @@ def api_request(*args, **kwargs): page_size=page_size, ) - def dataset(self, dataset_id: str, project: str = None) -> DatasetReference: + def dataset( + self, dataset_id: str, project: Optional[str] = None + ) -> DatasetReference: """Deprecated: Construct a reference to a dataset. .. deprecated:: 1.24.0 @@ -890,7 +892,7 @@ def set_iam_policy( self, table: Union[Table, TableReference, TableListItem, str], policy: Policy, - updateMask: str = None, + updateMask: Optional[str] = None, retry: retries.Retry = DEFAULT_RETRY, timeout: TimeoutType = DEFAULT_TIMEOUT, ) -> Policy: @@ -1350,7 +1352,7 @@ def list_models( self, dataset: Union[Dataset, DatasetReference, DatasetListItem, str], max_results: Optional[int] = None, - page_token: str = None, + page_token: Optional[str] = None, retry: retries.Retry = DEFAULT_RETRY, timeout: TimeoutType = DEFAULT_TIMEOUT, page_size: Optional[int] = None, @@ -1427,7 +1429,7 @@ def list_routines( self, dataset: Union[Dataset, DatasetReference, DatasetListItem, str], max_results: Optional[int] = None, - page_token: str = None, + page_token: Optional[str] = None, retry: retries.Retry = DEFAULT_RETRY, timeout: TimeoutType = DEFAULT_TIMEOUT, page_size: Optional[int] = None, @@ -1504,7 +1506,7 @@ def list_tables( self, dataset: Union[Dataset, DatasetReference, DatasetListItem, str], max_results: Optional[int] = None, - page_token: str = None, + page_token: Optional[str] = None, retry: retries.Retry = DEFAULT_RETRY, timeout: TimeoutType = DEFAULT_TIMEOUT, page_size: Optional[int] = None, @@ -1862,9 +1864,9 @@ def _get_query_results( self, job_id: str, retry: retries.Retry, - project: str = None, + project: Optional[str] = None, timeout_ms: Optional[int] = None, - location: str = None, + location: Optional[str] = None, timeout: TimeoutType = DEFAULT_TIMEOUT, ) -> _QueryResults: """Get the query results object for a query job. @@ -2039,8 +2041,8 @@ def create_job( def get_job( self, job_id: Union[str, job.LoadJob, job.CopyJob, job.ExtractJob, job.QueryJob], - project: str = None, - location: str = None, + project: Optional[str] = None, + location: Optional[str] = None, retry: retries.Retry = DEFAULT_RETRY, timeout: TimeoutType = DEFAULT_TIMEOUT, ) -> Union[job.LoadJob, job.CopyJob, job.ExtractJob, job.QueryJob, job.UnknownJob]: @@ -2103,8 +2105,8 @@ def get_job( def cancel_job( self, job_id: str, - project: str = None, - location: str = None, + project: Optional[str] = None, + location: Optional[str] = None, retry: retries.Retry = DEFAULT_RETRY, timeout: TimeoutType = DEFAULT_TIMEOUT, ) -> Union[job.LoadJob, job.CopyJob, job.ExtractJob, job.QueryJob]: @@ -2181,12 +2183,12 @@ def cancel_job( def list_jobs( self, - project: str = None, + project: Optional[str] = None, parent_job: Optional[Union[QueryJob, str]] = None, max_results: Optional[int] = None, - page_token: str = None, + page_token: Optional[str] = None, all_users: bool = None, - state_filter: str = None, + state_filter: Optional[str] = None, retry: retries.Retry = DEFAULT_RETRY, timeout: TimeoutType = DEFAULT_TIMEOUT, min_creation_time: datetime.datetime = None, @@ -2297,11 +2299,11 @@ def load_table_from_uri( self, source_uris: Union[str, Sequence[str]], destination: Union[Table, TableReference, TableListItem, str], - job_id: str = None, - job_id_prefix: str = None, - location: str = None, - project: str = None, - job_config: LoadJobConfig = None, + job_id: Optional[str] = None, + job_id_prefix: Optional[str] = None, + location: Optional[str] = None, + project: Optional[str] = None, + job_config: Optional[LoadJobConfig] = None, retry: retries.Retry = DEFAULT_RETRY, timeout: TimeoutType = DEFAULT_TIMEOUT, ) -> job.LoadJob: @@ -2386,11 +2388,11 @@ def load_table_from_file( rewind: bool = False, size: Optional[int] = None, num_retries: int = _DEFAULT_NUM_RETRIES, - job_id: str = None, - job_id_prefix: str = None, - location: str = None, - project: str = None, - job_config: LoadJobConfig = None, + job_id: Optional[str] = None, + job_id_prefix: Optional[str] = None, + location: Optional[str] = None, + project: Optional[str] = None, + job_config: Optional[LoadJobConfig] = None, timeout: ResumableTimeoutType = DEFAULT_TIMEOUT, ) -> job.LoadJob: """Upload the contents of this table from a file-like object. @@ -2494,11 +2496,11 @@ def load_table_from_dataframe( dataframe: "pandas.DataFrame", destination: Union[Table, TableReference, str], num_retries: int = _DEFAULT_NUM_RETRIES, - job_id: str = None, - job_id_prefix: str = None, - location: str = None, - project: str = None, - job_config: LoadJobConfig = None, + job_id: Optional[str] = None, + job_id_prefix: Optional[str] = None, + location: Optional[str] = None, + project: Optional[str] = None, + job_config: Optional[LoadJobConfig] = None, parquet_compression: str = "snappy", timeout: ResumableTimeoutType = DEFAULT_TIMEOUT, ) -> job.LoadJob: @@ -2751,11 +2753,11 @@ def load_table_from_json( json_rows: Iterable[Dict[str, Any]], destination: Union[Table, TableReference, TableListItem, str], num_retries: int = _DEFAULT_NUM_RETRIES, - job_id: str = None, - job_id_prefix: str = None, - location: str = None, - project: str = None, - job_config: LoadJobConfig = None, + job_id: Optional[str] = None, + job_id_prefix: Optional[str] = None, + location: Optional[str] = None, + project: Optional[str] = None, + job_config: Optional[LoadJobConfig] = None, timeout: ResumableTimeoutType = DEFAULT_TIMEOUT, ) -> job.LoadJob: """Upload the contents of a table from a JSON string or dict. @@ -3064,10 +3066,10 @@ def copy_table( Sequence[Union[Table, TableReference, TableListItem, str]], ], destination: Union[Table, TableReference, TableListItem, str], - job_id: str = None, - job_id_prefix: str = None, - location: str = None, - project: str = None, + job_id: Optional[str] = None, + job_id_prefix: Optional[str] = None, + location: Optional[str] = None, + project: Optional[str] = None, job_config: CopyJobConfig = None, retry: retries.Retry = DEFAULT_RETRY, timeout: TimeoutType = DEFAULT_TIMEOUT, @@ -3170,10 +3172,10 @@ def extract_table( self, source: Union[Table, TableReference, TableListItem, Model, ModelReference, str], destination_uris: Union[str, Sequence[str]], - job_id: str = None, - job_id_prefix: str = None, - location: str = None, - project: str = None, + job_id: Optional[str] = None, + job_id_prefix: Optional[str] = None, + location: Optional[str] = None, + project: Optional[str] = None, job_config: ExtractJobConfig = None, retry: retries.Retry = DEFAULT_RETRY, timeout: TimeoutType = DEFAULT_TIMEOUT, @@ -3270,10 +3272,10 @@ def query( self, query: str, job_config: QueryJobConfig = None, - job_id: str = None, - job_id_prefix: str = None, - location: str = None, - project: str = None, + job_id: Optional[str] = None, + job_id_prefix: Optional[str] = None, + location: Optional[str] = None, + project: Optional[str] = None, retry: retries.Retry = DEFAULT_RETRY, timeout: TimeoutType = DEFAULT_TIMEOUT, job_retry: retries.Retry = DEFAULT_JOB_RETRY, @@ -3563,7 +3565,7 @@ def insert_rows_json( ] = AutoRowIDs.GENERATE_UUID, skip_invalid_rows: bool = None, ignore_unknown_values: bool = None, - template_suffix: str = None, + template_suffix: Optional[str] = None, retry: retries.Retry = DEFAULT_RETRY, timeout: TimeoutType = DEFAULT_TIMEOUT, ) -> Sequence[dict]: @@ -3755,7 +3757,7 @@ def list_rows( table: Union[Table, TableListItem, TableReference, str], selected_fields: Sequence[SchemaField] = None, max_results: Optional[int] = None, - page_token: str = None, + page_token: Optional[str] = None, start_index: Optional[int] = None, page_size: Optional[int] = None, retry: retries.Retry = DEFAULT_RETRY, diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/dataset.py b/packages/google-cloud-bigquery/google/cloud/bigquery/dataset.py index 0edd2935910a..513c32d9c5a1 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/dataset.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/dataset.py @@ -139,7 +139,7 @@ def from_api_repr(cls, resource: dict) -> "DatasetReference": @classmethod def from_string( - cls, dataset_id: str, default_project: str = None + cls, dataset_id: str, default_project: Optional[str] = None ) -> "DatasetReference": """Construct a dataset reference from dataset ID string. diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/job/base.py b/packages/google-cloud-bigquery/google/cloud/bigquery/job/base.py index 4073e0137754..a6267be417a8 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/job/base.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/job/base.py @@ -703,7 +703,10 @@ def _begin(self, client=None, retry=DEFAULT_RETRY, timeout=None): self._set_properties(api_response) def exists( - self, client=None, retry: "retries.Retry" = DEFAULT_RETRY, timeout: float = None + self, + client=None, + retry: "retries.Retry" = DEFAULT_RETRY, + timeout: Optional[float] = None, ) -> bool: """API call: test for the existence of the job via a GET request @@ -748,7 +751,10 @@ def exists( return True def reload( - self, client=None, retry: "retries.Retry" = DEFAULT_RETRY, timeout: float = None + self, + client=None, + retry: "retries.Retry" = DEFAULT_RETRY, + timeout: Optional[float] = None, ): """API call: refresh job properties via a GET request. @@ -785,7 +791,10 @@ def reload( self._set_properties(api_response) def cancel( - self, client=None, retry: "retries.Retry" = DEFAULT_RETRY, timeout: float = None + self, + client=None, + retry: "retries.Retry" = DEFAULT_RETRY, + timeout: Optional[float] = None, ) -> bool: """API call: cancel job via a POST request @@ -855,7 +864,7 @@ def _set_future_result(self): def done( self, retry: "retries.Retry" = DEFAULT_RETRY, - timeout: float = None, + timeout: Optional[float] = None, reload: bool = True, ) -> bool: """Checks if the job is complete. @@ -881,7 +890,9 @@ def done( return self.state == _DONE_STATE def result( # type: ignore # (signature complaint) - self, retry: "retries.Retry" = DEFAULT_RETRY, timeout: float = None + self, + retry: "retries.Retry" = DEFAULT_RETRY, + timeout: Optional[float] = None, ) -> "_AsyncJob": """Start the job and wait for it to complete and get the result. diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/job/query.py b/packages/google-cloud-bigquery/google/cloud/bigquery/job/query.py index 315d8201c3c5..7dddc8278641 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/job/query.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/job/query.py @@ -1317,7 +1317,7 @@ def _begin(self, client=None, retry=DEFAULT_RETRY, timeout=None): raise def _reload_query_results( - self, retry: "retries.Retry" = DEFAULT_RETRY, timeout: float = None + self, retry: "retries.Retry" = DEFAULT_RETRY, timeout: Optional[float] = None ): """Refresh the cached query results. @@ -1405,7 +1405,7 @@ def result( # type: ignore # (complaints about the overloaded signature) page_size: Optional[int] = None, max_results: Optional[int] = None, retry: "retries.Retry" = DEFAULT_RETRY, - timeout: float = None, + timeout: Optional[float] = None, start_index: Optional[int] = None, job_retry: "retries.Retry" = DEFAULT_JOB_RETRY, ) -> Union["RowIterator", _EmptyRowIterator]: @@ -1557,7 +1557,7 @@ def do_get_result(): # that should only exist here in the QueryJob method. def to_arrow( self, - progress_bar_type: str = None, + progress_bar_type: Optional[str] = None, bqstorage_client: Optional["bigquery_storage.BigQueryReadClient"] = None, create_bqstorage_client: bool = True, max_results: Optional[int] = None, @@ -1634,7 +1634,7 @@ def to_dataframe( self, bqstorage_client: Optional["bigquery_storage.BigQueryReadClient"] = None, dtypes: Dict[str, Any] = None, - progress_bar_type: str = None, + progress_bar_type: Optional[str] = None, create_bqstorage_client: bool = True, max_results: Optional[int] = None, geography_as_object: bool = False, @@ -1820,7 +1820,7 @@ def to_geodataframe( self, bqstorage_client: Optional["bigquery_storage.BigQueryReadClient"] = None, dtypes: Dict[str, Any] = None, - progress_bar_type: str = None, + progress_bar_type: Optional[str] = None, create_bqstorage_client: bool = True, max_results: Optional[int] = None, geography_column: Optional[str] = None, diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/routine/routine.py b/packages/google-cloud-bigquery/google/cloud/bigquery/routine/routine.py index 36ed0372880f..ef33d507ec72 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/routine/routine.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/routine/routine.py @@ -537,7 +537,7 @@ def from_api_repr(cls, resource: dict) -> "RoutineReference": @classmethod def from_string( - cls, routine_id: str, default_project: str = None + cls, routine_id: str, default_project: Optional[str] = None ) -> "RoutineReference": """Factory: construct a routine reference from routine ID string. diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/schema.py b/packages/google-cloud-bigquery/google/cloud/bigquery/schema.py index ebf34e4cdfc4..20a1bc92faca 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/schema.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/schema.py @@ -16,7 +16,7 @@ import collections import enum -from typing import Any, Dict, Iterable, Union +from typing import Any, Dict, Iterable, Optional, Union from google.cloud.bigquery import standard_sql from google.cloud.bigquery.enums import StandardSqlTypeNames @@ -124,7 +124,7 @@ def __init__( name: str, field_type: str, mode: str = "NULLABLE", - default_value_expression: str = None, + default_value_expression: Optional[str] = None, description: Union[str, _DefaultSentinel] = _DEFAULT_VALUE, fields: Iterable["SchemaField"] = (), policy_tags: Union["PolicyTagList", None, _DefaultSentinel] = _DEFAULT_VALUE, diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py index bf4a90317c5b..462447d51cc6 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py @@ -228,7 +228,7 @@ def __init__(self, dataset_ref: "DatasetReference", table_id: str): @classmethod def from_string( - cls, table_id: str, default_project: str = None + cls, table_id: str, default_project: Optional[str] = None ) -> "TableReference": """Construct a table reference from table ID string. @@ -1745,7 +1745,7 @@ def to_arrow_iterable( # changes to job.QueryJob.to_arrow() def to_arrow( self, - progress_bar_type: str = None, + progress_bar_type: Optional[str] = None, bqstorage_client: Optional["bigquery_storage.BigQueryReadClient"] = None, create_bqstorage_client: bool = True, ) -> "pyarrow.Table": @@ -1932,7 +1932,7 @@ def to_dataframe( self, bqstorage_client: Optional["bigquery_storage.BigQueryReadClient"] = None, dtypes: Dict[str, Any] = None, - progress_bar_type: str = None, + progress_bar_type: Optional[str] = None, create_bqstorage_client: bool = True, geography_as_object: bool = False, bool_dtype: Union[Any, None] = DefaultPandasDTypes.BOOL_DTYPE, @@ -2230,7 +2230,7 @@ def to_geodataframe( self, bqstorage_client: Optional["bigquery_storage.BigQueryReadClient"] = None, dtypes: Dict[str, Any] = None, - progress_bar_type: str = None, + progress_bar_type: Optional[str] = None, create_bqstorage_client: bool = True, geography_column: Optional[str] = None, ) -> "geopandas.GeoDataFrame": From f641993f30419bc391ff95ec1e8e4645c507f556 Mon Sep 17 00:00:00 2001 From: Dan Lee <71398022+dandhlee@users.noreply.github.com> Date: Tue, 27 Jun 2023 10:37:23 -0400 Subject: [PATCH 1617/2016] chore: update noxfile for docfx job (#1594) * chore: update noxfile for docfx job * chore: Update noxfile.py --- packages/google-cloud-bigquery/noxfile.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/packages/google-cloud-bigquery/noxfile.py b/packages/google-cloud-bigquery/noxfile.py index 8464e498079b..57e534890a67 100644 --- a/packages/google-cloud-bigquery/noxfile.py +++ b/packages/google-cloud-bigquery/noxfile.py @@ -425,13 +425,15 @@ def docs(session): ) -@nox.session(python=DEFAULT_PYTHON_VERSION) +@nox.session(python="3.9") def docfx(session): """Build the docfx yaml files for this library.""" session.install("-e", ".") session.install( - "sphinx==4.0.2", "alabaster", "recommonmark", "gcp-sphinx-docfx-yaml" + "gcp-sphinx-docfx-yaml", + "alabaster", + "recommonmark", ) shutil.rmtree(os.path.join("docs", "_build"), ignore_errors=True) From 75c14bb0542e2805145e93fcfbdc1e22ef9b6f8e Mon Sep 17 00:00:00 2001 From: "release-please[bot]" <55107282+release-please[bot]@users.noreply.github.com> Date: Tue, 27 Jun 2023 11:53:18 -0500 Subject: [PATCH 1618/2016] chore(main): release 3.11.3 (#1593) Co-authored-by: release-please[bot] <55107282+release-please[bot]@users.noreply.github.com> --- packages/google-cloud-bigquery/CHANGELOG.md | 7 +++++++ .../google-cloud-bigquery/google/cloud/bigquery/version.py | 2 +- 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/CHANGELOG.md b/packages/google-cloud-bigquery/CHANGELOG.md index 5cf542670324..a0af641cf72c 100644 --- a/packages/google-cloud-bigquery/CHANGELOG.md +++ b/packages/google-cloud-bigquery/CHANGELOG.md @@ -5,6 +5,13 @@ [1]: https://pypi.org/project/google-cloud-bigquery/#history +## [3.11.3](https://github.com/googleapis/python-bigquery/compare/v3.11.2...v3.11.3) (2023-06-27) + + +### Bug Fixes + +* Type annotations include Optional when None is accepted ([#1554](https://github.com/googleapis/python-bigquery/issues/1554)) ([6c1ab80](https://github.com/googleapis/python-bigquery/commit/6c1ab802b09124ba837d6d5358962e3fce2d4a2c)) + ## [3.11.2](https://github.com/googleapis/python-bigquery/compare/v3.11.1...v3.11.2) (2023-06-21) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/version.py b/packages/google-cloud-bigquery/google/cloud/bigquery/version.py index ced5a95a7e32..9e1402d15807 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/version.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/version.py @@ -12,4 +12,4 @@ # See the License for the specific language governing permissions and # limitations under the License. -__version__ = "3.11.2" +__version__ = "3.11.3" From 891f18d02ee5ae86c816df5af5fee0304ee0a8f8 Mon Sep 17 00:00:00 2001 From: Dan Lee <71398022+dandhlee@users.noreply.github.com> Date: Wed, 28 Jun 2023 08:25:15 -0400 Subject: [PATCH 1619/2016] chore: update docs nox session (#1597) --- packages/google-cloud-bigquery/noxfile.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/noxfile.py b/packages/google-cloud-bigquery/noxfile.py index 57e534890a67..93616485fe8b 100644 --- a/packages/google-cloud-bigquery/noxfile.py +++ b/packages/google-cloud-bigquery/noxfile.py @@ -402,7 +402,7 @@ def blacken(session): session.run("black", *BLACK_PATHS) -@nox.session(python=DEFAULT_PYTHON_VERSION) +@nox.session(python="3.9") def docs(session): """Build the docs.""" From 67a301301f743c0d66823a2b2dfc6dbd9f9c0cce Mon Sep 17 00:00:00 2001 From: Chalmer Lowe Date: Thu, 29 Jun 2023 11:26:14 -0400 Subject: [PATCH 1620/2016] refactor: refactored _get_final_span_attributes() for clarity, simplicity (#1602) Refactors the _get_final_span_attributes() function for simplicity and clarity. * adds docstring * removes several lines of redundant/unnecessary code * renames temporary variable for clarity --- .../cloud/bigquery/opentelemetry_tracing.py | 39 +++++++++++++------ 1 file changed, 28 insertions(+), 11 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/opentelemetry_tracing.py b/packages/google-cloud-bigquery/google/cloud/bigquery/opentelemetry_tracing.py index 0e1187c6b1fd..be02c1686921 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/opentelemetry_tracing.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/opentelemetry_tracing.py @@ -87,21 +87,38 @@ def create_span(name, attributes=None, client=None, job_ref=None): def _get_final_span_attributes(attributes=None, client=None, job_ref=None): - final_attributes = {} - final_attributes.update(_default_attributes.copy()) + """Compiles attributes from: client, job_ref, user-provided attributes. + + Attributes from all of these sources are merged together. Note the + attributes are added sequentially based on perceived order of precendence: + i.e. attributes added last may overwrite attributes added earlier. + + Args: + attributes (Optional[dict]): + Additional attributes that pertain to + the specific API call (i.e. not a default attribute) + + client (Optional[google.cloud.bigquery.client.Client]): + Pass in a Client object to extract any attributes that may be + relevant to it and add them to the final_attributes + + job_ref (Optional[google.cloud.bigquery.job._AsyncJob]) + Pass in a _AsyncJob object to extract any attributes that may be + relevant to it and add them to the final_attributes. + + Returns: dict + """ + + collected_attributes = _default_attributes.copy() + if client: - client_attributes = _set_client_attributes(client) - final_attributes.update(client_attributes) + collected_attributes.update(_set_client_attributes(client)) if job_ref: - job_attributes = _set_job_attributes(job_ref) - final_attributes.update(job_attributes) + collected_attributes.update(_set_job_attributes(job_ref)) if attributes: - final_attributes.update(attributes) - - filtered = {k: v for k, v in final_attributes.items() if v is not None} - final_attributes.clear() - final_attributes.update(filtered) + collected_attributes.update(attributes) + final_attributes = {k: v for k, v in collected_attributes.items() if v is not None} return final_attributes From cada9d38d6a7b786508852ac05384bd6c743d110 Mon Sep 17 00:00:00 2001 From: "gcf-owl-bot[bot]" <78513119+gcf-owl-bot[bot]@users.noreply.github.com> Date: Thu, 29 Jun 2023 10:08:13 -0700 Subject: [PATCH 1621/2016] chore: store artifacts in placer (#1599) Source-Link: https://github.com/googleapis/synthtool/commit/cb960373d12d20f8dc38beee2bf884d49627165e Post-Processor: gcr.io/cloud-devrel-public-resources/owlbot-python:latest@sha256:2d816f26f728ac8b24248741e7d4c461c09764ef9f7be3684d557c9632e46dbd Co-authored-by: Owl Bot --- packages/google-cloud-bigquery/.github/.OwlBot.lock.yaml | 4 ++-- .../google-cloud-bigquery/.kokoro/release/common.cfg | 9 +++++++++ 2 files changed, 11 insertions(+), 2 deletions(-) diff --git a/packages/google-cloud-bigquery/.github/.OwlBot.lock.yaml b/packages/google-cloud-bigquery/.github/.OwlBot.lock.yaml index 02a4dedced74..98994f474104 100644 --- a/packages/google-cloud-bigquery/.github/.OwlBot.lock.yaml +++ b/packages/google-cloud-bigquery/.github/.OwlBot.lock.yaml @@ -13,5 +13,5 @@ # limitations under the License. docker: image: gcr.io/cloud-devrel-public-resources/owlbot-python:latest - digest: sha256:240b5bcc2bafd450912d2da2be15e62bc6de2cf839823ae4bf94d4f392b451dc -# created: 2023-06-03T21:25:37.968717478Z + digest: sha256:2d816f26f728ac8b24248741e7d4c461c09764ef9f7be3684d557c9632e46dbd +# created: 2023-06-28T17:03:33.371210701Z diff --git a/packages/google-cloud-bigquery/.kokoro/release/common.cfg b/packages/google-cloud-bigquery/.kokoro/release/common.cfg index 6ae81b74308d..cb8bbaa2e55d 100644 --- a/packages/google-cloud-bigquery/.kokoro/release/common.cfg +++ b/packages/google-cloud-bigquery/.kokoro/release/common.cfg @@ -38,3 +38,12 @@ env_vars: { key: "SECRET_MANAGER_KEYS" value: "releasetool-publish-reporter-app,releasetool-publish-reporter-googleapis-installation,releasetool-publish-reporter-pem" } + +# Store the packages we uploaded to PyPI. That way, we have a record of exactly +# what we published, which we can use to generate SBOMs and attestations. +action { + define_artifacts { + regex: "github/python-bigquery/**/*.tar.gz" + strip_prefix: "github/python-bigquery" + } +} From e8e49e06f5907a2457fccf257f1a550c5ee42d55 Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Thu, 6 Jul 2023 12:52:57 -0500 Subject: [PATCH 1622/2016] doc: in query retry design, note that location can be required (#1595) In response to internal issue 285136859. Co-authored-by: Anthonios Partheniou --- packages/google-cloud-bigquery/docs/design/query-retries.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/docs/design/query-retries.md b/packages/google-cloud-bigquery/docs/design/query-retries.md index 1bac82f5c282..08d75302bc65 100644 --- a/packages/google-cloud-bigquery/docs/design/query-retries.md +++ b/packages/google-cloud-bigquery/docs/design/query-retries.md @@ -73,7 +73,7 @@ value, the client library uses the jobs.insert REST API to start a query job. Before it issues this request, it sets a job ID. This job ID remains constant across API retries. -If the job ID was randomly generated, and the jobs.insert request and all retries fail, the client library sends a request to the jobs.get API. This covers the case when a query request succeeded, but there was a transient issue that prevented the client from receiving a successful response. +If the job ID was randomly generated, and the jobs.insert request and all retries fail, the client library sends a request to the jobs.get API. This covers the case when a query request succeeded, but there was a transient issue that prevented the client from receiving a successful response. Note: `jobs.get` requires the location of the query. It will fail with 404 if the location is not specified and the job is not in the US multi-region. #### Retrying the jobs.query API via the retry parameter From db796db8bfec171d06eba0a91d87daaaed0935cc Mon Sep 17 00:00:00 2001 From: Mend Renovate Date: Mon, 10 Jul 2023 17:31:55 +0200 Subject: [PATCH 1623/2016] chore(deps): update all dependencies (#1577) * chore(deps): update all dependencies * revert urllib3 --------- Co-authored-by: Anthonios Partheniou --- .../samples/geography/requirements.txt | 6 +++--- .../google-cloud-bigquery/samples/magics/requirements.txt | 4 ++-- .../google-cloud-bigquery/samples/snippets/requirements.txt | 6 +++--- 3 files changed, 8 insertions(+), 8 deletions(-) diff --git a/packages/google-cloud-bigquery/samples/geography/requirements.txt b/packages/google-cloud-bigquery/samples/geography/requirements.txt index 82a1daadcc41..c4bd8f2e2396 100644 --- a/packages/google-cloud-bigquery/samples/geography/requirements.txt +++ b/packages/google-cloud-bigquery/samples/geography/requirements.txt @@ -12,8 +12,8 @@ geojson==3.0.1 geopandas===0.10.2; python_version == '3.7' geopandas==0.13.0; python_version >= '3.8' google-api-core==2.11.0 -google-auth==2.19.0 -google-cloud-bigquery==3.10.0 +google-auth==2.19.1 +google-cloud-bigquery==3.11.0 google-cloud-bigquery-storage==2.20.0 google-cloud-core==2.3.2 google-crc32c==1.5.0 @@ -40,6 +40,6 @@ requests==2.31.0 rsa==4.9 Shapely==2.0.1 six==1.16.0 -typing-extensions==4.6.2 +typing-extensions==4.6.3 typing-inspect==0.9.0 urllib3==1.26.15 diff --git a/packages/google-cloud-bigquery/samples/magics/requirements.txt b/packages/google-cloud-bigquery/samples/magics/requirements.txt index b545916c3c22..29d616021f2e 100644 --- a/packages/google-cloud-bigquery/samples/magics/requirements.txt +++ b/packages/google-cloud-bigquery/samples/magics/requirements.txt @@ -5,11 +5,11 @@ grpcio==1.54.2 ipywidgets==8.0.6 ipython===7.31.1; python_version == '3.7' ipython===8.0.1; python_version == '3.8' -ipython==8.13.2; python_version >= '3.9' +ipython==8.14.0; python_version >= '3.9' matplotlib===3.5.3; python_version == '3.7' matplotlib==3.7.1; python_version >= '3.8' pandas===1.3.5; python_version == '3.7' pandas==2.0.2; python_version >= '3.8' pyarrow==12.0.0 pytz==2023.3 -typing-extensions==4.6.2 +typing-extensions==4.6.3 diff --git a/packages/google-cloud-bigquery/samples/snippets/requirements.txt b/packages/google-cloud-bigquery/samples/snippets/requirements.txt index d2878d20284d..8b9326101d39 100644 --- a/packages/google-cloud-bigquery/samples/snippets/requirements.txt +++ b/packages/google-cloud-bigquery/samples/snippets/requirements.txt @@ -1,16 +1,16 @@ db-dtypes==1.1.1 -google-cloud-bigquery==3.10.0 +google-cloud-bigquery==3.11.0 google-cloud-bigquery-storage==2.20.0 google-auth-oauthlib==1.0.0 grpcio==1.54.2 ipywidgets==8.0.6 ipython===7.31.1; python_version == '3.7' ipython===8.0.1; python_version == '3.8' -ipython==8.13.2; python_version >= '3.9' +ipython==8.14.0; python_version >= '3.9' matplotlib===3.5.3; python_version == '3.7' matplotlib==3.7.1; python_version >= '3.8' pandas===1.3.5; python_version == '3.7' pandas==2.0.2; python_version >= '3.8' pyarrow==12.0.0 pytz==2023.3 -typing-extensions==4.6.2 +typing-extensions==4.6.3 From 07773e4591f0f7758d26db008a6264610b7febaf Mon Sep 17 00:00:00 2001 From: Mend Renovate Date: Mon, 10 Jul 2023 18:46:18 +0200 Subject: [PATCH 1624/2016] chore(deps): update all dependencies (#1606) * chore(deps): update all dependencies * revert urllib3 --------- Co-authored-by: Anthonios Partheniou --- .../samples/geography/requirements-test.txt | 2 +- .../samples/geography/requirements.txt | 34 +++++++++---------- .../samples/magics/requirements-test.txt | 2 +- .../samples/magics/requirements.txt | 14 ++++---- .../samples/snippets/requirements-test.txt | 2 +- .../samples/snippets/requirements.txt | 16 ++++----- 6 files changed, 35 insertions(+), 35 deletions(-) diff --git a/packages/google-cloud-bigquery/samples/geography/requirements-test.txt b/packages/google-cloud-bigquery/samples/geography/requirements-test.txt index 3c3afdcb17c2..b3772a888083 100644 --- a/packages/google-cloud-bigquery/samples/geography/requirements-test.txt +++ b/packages/google-cloud-bigquery/samples/geography/requirements-test.txt @@ -1,2 +1,2 @@ -pytest==7.3.1 +pytest==7.4.0 mock==5.0.2 diff --git a/packages/google-cloud-bigquery/samples/geography/requirements.txt b/packages/google-cloud-bigquery/samples/geography/requirements.txt index c4bd8f2e2396..b05446e99563 100644 --- a/packages/google-cloud-bigquery/samples/geography/requirements.txt +++ b/packages/google-cloud-bigquery/samples/geography/requirements.txt @@ -1,8 +1,8 @@ attrs==23.1.0 certifi==2023.5.7 cffi==1.15.1 -charset-normalizer==3.1.0 -click==8.1.3 +charset-normalizer==3.2.0 +click==8.1.4 click-plugins==1.1.1 cligj==0.7.2 dataclasses==0.8; python_version < '3.7' @@ -10,29 +10,29 @@ db-dtypes==1.1.1 Fiona==1.9.4.post1 geojson==3.0.1 geopandas===0.10.2; python_version == '3.7' -geopandas==0.13.0; python_version >= '3.8' -google-api-core==2.11.0 -google-auth==2.19.1 -google-cloud-bigquery==3.11.0 -google-cloud-bigquery-storage==2.20.0 -google-cloud-core==2.3.2 +geopandas==0.13.2; python_version >= '3.8' +google-api-core==2.11.1 +google-auth==2.21.0 +google-cloud-bigquery==3.11.3 +google-cloud-bigquery-storage==2.22.0 +google-cloud-core==2.3.3 google-crc32c==1.5.0 google-resumable-media==2.5.0 -googleapis-common-protos==1.59.0 -grpcio==1.54.2 +googleapis-common-protos==1.59.1 +grpcio==1.56.0 idna==3.4 -libcst==1.0.0 -munch==3.0.0 +libcst==1.0.1 +munch==4.0.0 mypy-extensions==1.0.0 packaging==23.1 pandas===1.3.5; python_version == '3.7' -pandas==2.0.2; python_version >= '3.8' -proto-plus==1.22.2 -pyarrow==12.0.0 +pandas==2.0.3; python_version >= '3.8' +proto-plus==1.22.3 +pyarrow==12.0.1 pyasn1==0.5.0 pyasn1-modules==0.3.0 pycparser==2.21 -pyparsing==3.0.9 +pyparsing==3.1.0 python-dateutil==2.8.2 pytz==2023.3 PyYAML==6.0 @@ -40,6 +40,6 @@ requests==2.31.0 rsa==4.9 Shapely==2.0.1 six==1.16.0 -typing-extensions==4.6.3 +typing-extensions==4.7.1 typing-inspect==0.9.0 urllib3==1.26.15 diff --git a/packages/google-cloud-bigquery/samples/magics/requirements-test.txt b/packages/google-cloud-bigquery/samples/magics/requirements-test.txt index 9fa68a930845..4077bd8dcc73 100644 --- a/packages/google-cloud-bigquery/samples/magics/requirements-test.txt +++ b/packages/google-cloud-bigquery/samples/magics/requirements-test.txt @@ -1,3 +1,3 @@ google-cloud-testutils==1.3.3 -pytest==7.3.1 +pytest==7.4.0 mock==5.0.2 diff --git a/packages/google-cloud-bigquery/samples/magics/requirements.txt b/packages/google-cloud-bigquery/samples/magics/requirements.txt index 29d616021f2e..edf3dc4b629e 100644 --- a/packages/google-cloud-bigquery/samples/magics/requirements.txt +++ b/packages/google-cloud-bigquery/samples/magics/requirements.txt @@ -1,15 +1,15 @@ db-dtypes==1.1.1 -google-cloud-bigquery-storage==2.20.0 +google-cloud-bigquery-storage==2.22.0 google-auth-oauthlib==1.0.0 -grpcio==1.54.2 -ipywidgets==8.0.6 +grpcio==1.56.0 +ipywidgets==8.0.7 ipython===7.31.1; python_version == '3.7' ipython===8.0.1; python_version == '3.8' ipython==8.14.0; python_version >= '3.9' matplotlib===3.5.3; python_version == '3.7' -matplotlib==3.7.1; python_version >= '3.8' +matplotlib==3.7.2; python_version >= '3.8' pandas===1.3.5; python_version == '3.7' -pandas==2.0.2; python_version >= '3.8' -pyarrow==12.0.0 +pandas==2.0.3; python_version >= '3.8' +pyarrow==12.0.1 pytz==2023.3 -typing-extensions==4.6.3 +typing-extensions==4.7.1 diff --git a/packages/google-cloud-bigquery/samples/snippets/requirements-test.txt b/packages/google-cloud-bigquery/samples/snippets/requirements-test.txt index 9fa68a930845..4077bd8dcc73 100644 --- a/packages/google-cloud-bigquery/samples/snippets/requirements-test.txt +++ b/packages/google-cloud-bigquery/samples/snippets/requirements-test.txt @@ -1,3 +1,3 @@ google-cloud-testutils==1.3.3 -pytest==7.3.1 +pytest==7.4.0 mock==5.0.2 diff --git a/packages/google-cloud-bigquery/samples/snippets/requirements.txt b/packages/google-cloud-bigquery/samples/snippets/requirements.txt index 8b9326101d39..c715a450fafe 100644 --- a/packages/google-cloud-bigquery/samples/snippets/requirements.txt +++ b/packages/google-cloud-bigquery/samples/snippets/requirements.txt @@ -1,16 +1,16 @@ db-dtypes==1.1.1 -google-cloud-bigquery==3.11.0 -google-cloud-bigquery-storage==2.20.0 +google-cloud-bigquery==3.11.3 +google-cloud-bigquery-storage==2.22.0 google-auth-oauthlib==1.0.0 -grpcio==1.54.2 -ipywidgets==8.0.6 +grpcio==1.56.0 +ipywidgets==8.0.7 ipython===7.31.1; python_version == '3.7' ipython===8.0.1; python_version == '3.8' ipython==8.14.0; python_version >= '3.9' matplotlib===3.5.3; python_version == '3.7' -matplotlib==3.7.1; python_version >= '3.8' +matplotlib==3.7.2; python_version >= '3.8' pandas===1.3.5; python_version == '3.7' -pandas==2.0.2; python_version >= '3.8' -pyarrow==12.0.0 +pandas==2.0.3; python_version >= '3.8' +pyarrow==12.0.1 pytz==2023.3 -typing-extensions==4.6.3 +typing-extensions==4.7.1 From 910160458d974c7ff6e933dd940b644fe63da20b Mon Sep 17 00:00:00 2001 From: Alvaro Viebrantz Date: Thu, 13 Jul 2023 16:46:49 -0400 Subject: [PATCH 1625/2016] test: enable copy table tests (#1609) --- packages/google-cloud-bigquery/samples/tests/test_copy_table.py | 2 -- .../google-cloud-bigquery/samples/tests/test_copy_table_cmek.py | 2 -- packages/google-cloud-bigquery/tests/system/test_client.py | 2 -- 3 files changed, 6 deletions(-) diff --git a/packages/google-cloud-bigquery/samples/tests/test_copy_table.py b/packages/google-cloud-bigquery/samples/tests/test_copy_table.py index d5a6c121e3fc..3953e31625ab 100644 --- a/packages/google-cloud-bigquery/samples/tests/test_copy_table.py +++ b/packages/google-cloud-bigquery/samples/tests/test_copy_table.py @@ -28,8 +28,6 @@ def test_copy_table( random_table_id: str, client: "bigquery.Client", ) -> None: - pytest.skip("b/210907595: copy fails for shakespeare table") - copy_table.copy_table(table_with_data_id, random_table_id) out, err = capsys.readouterr() assert "A copy of the table created." in out diff --git a/packages/google-cloud-bigquery/samples/tests/test_copy_table_cmek.py b/packages/google-cloud-bigquery/samples/tests/test_copy_table_cmek.py index 1bdec2f35023..7cac15723aee 100644 --- a/packages/google-cloud-bigquery/samples/tests/test_copy_table_cmek.py +++ b/packages/google-cloud-bigquery/samples/tests/test_copy_table_cmek.py @@ -23,8 +23,6 @@ def test_copy_table_cmek( table_with_data_id: str, kms_key_name: str, ) -> None: - pytest.skip("b/210907595: copy fails for shakespeare table") - copy_table_cmek.copy_table_cmek(random_table_id, table_with_data_id, kms_key_name) out, err = capsys.readouterr() assert "A copy of the table created" in out diff --git a/packages/google-cloud-bigquery/tests/system/test_client.py b/packages/google-cloud-bigquery/tests/system/test_client.py index f4757e30fa1f..8fd532f4cebf 100644 --- a/packages/google-cloud-bigquery/tests/system/test_client.py +++ b/packages/google-cloud-bigquery/tests/system/test_client.py @@ -1358,8 +1358,6 @@ def test_extract_table(self): self.assertIn("Bharney Rhubble", got) def test_copy_table(self): - pytest.skip("b/210907595: copy fails for shakespeare table") - # If we create a new table to copy from, the test won't work # because the new rows will be stored in the streaming buffer, # and copy jobs don't read the streaming buffer. From 5f67fe0bd9afc5bea0473e3ee9258b76a6a512b8 Mon Sep 17 00:00:00 2001 From: "gcf-owl-bot[bot]" <78513119+gcf-owl-bot[bot]@users.noreply.github.com> Date: Mon, 17 Jul 2023 13:04:14 -0400 Subject: [PATCH 1626/2016] build(deps): [autoapprove] bump cryptography from 41.0.0 to 41.0.2 (#1611) Source-Link: https://github.com/googleapis/synthtool/commit/d6103f4a3540ba60f633a9e25c37ec5fe7e6286d Post-Processor: gcr.io/cloud-devrel-public-resources/owlbot-python:latest@sha256:39f0f3f2be02ef036e297e376fe3b6256775576da8a6ccb1d5eeb80f4c8bf8fb Co-authored-by: Owl Bot --- packages/google-cloud-bigquery/.flake8 | 2 +- .../.github/.OwlBot.lock.yaml | 4 +- .../.github/auto-label.yaml | 2 +- .../google-cloud-bigquery/.kokoro/build.sh | 2 +- .../.kokoro/docker/docs/Dockerfile | 2 +- .../.kokoro/populate-secrets.sh | 2 +- .../.kokoro/publish-docs.sh | 2 +- .../google-cloud-bigquery/.kokoro/release.sh | 2 +- .../.kokoro/requirements.txt | 44 ++++++++++--------- .../.kokoro/test-samples-against-head.sh | 2 +- .../.kokoro/test-samples-impl.sh | 2 +- .../.kokoro/test-samples.sh | 2 +- .../.kokoro/trampoline.sh | 2 +- .../.kokoro/trampoline_v2.sh | 2 +- .../.pre-commit-config.yaml | 2 +- packages/google-cloud-bigquery/.trampolinerc | 4 +- packages/google-cloud-bigquery/MANIFEST.in | 2 +- packages/google-cloud-bigquery/docs/conf.py | 2 +- .../scripts/decrypt-secrets.sh | 2 +- .../scripts/readme-gen/readme_gen.py | 18 ++++---- packages/google-cloud-bigquery/setup.cfg | 2 +- 21 files changed, 53 insertions(+), 51 deletions(-) diff --git a/packages/google-cloud-bigquery/.flake8 b/packages/google-cloud-bigquery/.flake8 index 2e438749863d..87f6e408c47d 100644 --- a/packages/google-cloud-bigquery/.flake8 +++ b/packages/google-cloud-bigquery/.flake8 @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright 2020 Google LLC +# Copyright 2023 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/packages/google-cloud-bigquery/.github/.OwlBot.lock.yaml b/packages/google-cloud-bigquery/.github/.OwlBot.lock.yaml index 98994f474104..ae4a522b9e5f 100644 --- a/packages/google-cloud-bigquery/.github/.OwlBot.lock.yaml +++ b/packages/google-cloud-bigquery/.github/.OwlBot.lock.yaml @@ -13,5 +13,5 @@ # limitations under the License. docker: image: gcr.io/cloud-devrel-public-resources/owlbot-python:latest - digest: sha256:2d816f26f728ac8b24248741e7d4c461c09764ef9f7be3684d557c9632e46dbd -# created: 2023-06-28T17:03:33.371210701Z + digest: sha256:39f0f3f2be02ef036e297e376fe3b6256775576da8a6ccb1d5eeb80f4c8bf8fb +# created: 2023-07-17T15:20:13.819193964Z diff --git a/packages/google-cloud-bigquery/.github/auto-label.yaml b/packages/google-cloud-bigquery/.github/auto-label.yaml index 41bff0b5375a..b2016d119b40 100644 --- a/packages/google-cloud-bigquery/.github/auto-label.yaml +++ b/packages/google-cloud-bigquery/.github/auto-label.yaml @@ -1,4 +1,4 @@ -# Copyright 2022 Google LLC +# Copyright 2023 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/packages/google-cloud-bigquery/.kokoro/build.sh b/packages/google-cloud-bigquery/.kokoro/build.sh index 4d6a1d0f6a55..0cb0d0dd0add 100755 --- a/packages/google-cloud-bigquery/.kokoro/build.sh +++ b/packages/google-cloud-bigquery/.kokoro/build.sh @@ -1,5 +1,5 @@ #!/bin/bash -# Copyright 2018 Google LLC +# Copyright 2023 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/packages/google-cloud-bigquery/.kokoro/docker/docs/Dockerfile b/packages/google-cloud-bigquery/.kokoro/docker/docs/Dockerfile index f8137d0ae497..8e39a2cc438d 100644 --- a/packages/google-cloud-bigquery/.kokoro/docker/docs/Dockerfile +++ b/packages/google-cloud-bigquery/.kokoro/docker/docs/Dockerfile @@ -1,4 +1,4 @@ -# Copyright 2020 Google LLC +# Copyright 2023 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/packages/google-cloud-bigquery/.kokoro/populate-secrets.sh b/packages/google-cloud-bigquery/.kokoro/populate-secrets.sh index f52514257ef0..6f3972140e80 100755 --- a/packages/google-cloud-bigquery/.kokoro/populate-secrets.sh +++ b/packages/google-cloud-bigquery/.kokoro/populate-secrets.sh @@ -1,5 +1,5 @@ #!/bin/bash -# Copyright 2020 Google LLC. +# Copyright 2023 Google LLC. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/packages/google-cloud-bigquery/.kokoro/publish-docs.sh b/packages/google-cloud-bigquery/.kokoro/publish-docs.sh index 1c4d62370042..9eafe0be3bba 100755 --- a/packages/google-cloud-bigquery/.kokoro/publish-docs.sh +++ b/packages/google-cloud-bigquery/.kokoro/publish-docs.sh @@ -1,5 +1,5 @@ #!/bin/bash -# Copyright 2020 Google LLC +# Copyright 2023 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/packages/google-cloud-bigquery/.kokoro/release.sh b/packages/google-cloud-bigquery/.kokoro/release.sh index c6a7c94600e7..078fc1c20f1b 100755 --- a/packages/google-cloud-bigquery/.kokoro/release.sh +++ b/packages/google-cloud-bigquery/.kokoro/release.sh @@ -1,5 +1,5 @@ #!/bin/bash -# Copyright 2020 Google LLC +# Copyright 2023 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/packages/google-cloud-bigquery/.kokoro/requirements.txt b/packages/google-cloud-bigquery/.kokoro/requirements.txt index c7929db6d152..67d70a110897 100644 --- a/packages/google-cloud-bigquery/.kokoro/requirements.txt +++ b/packages/google-cloud-bigquery/.kokoro/requirements.txt @@ -113,26 +113,30 @@ commonmark==0.9.1 \ --hash=sha256:452f9dc859be7f06631ddcb328b6919c67984aca654e5fefb3914d54691aed60 \ --hash=sha256:da2f38c92590f83de410ba1a3cbceafbc74fee9def35f9251ba9a971d6d66fd9 # via rich -cryptography==41.0.0 \ - --hash=sha256:0ddaee209d1cf1f180f1efa338a68c4621154de0afaef92b89486f5f96047c55 \ - --hash=sha256:14754bcdae909d66ff24b7b5f166d69340ccc6cb15731670435efd5719294895 \ - --hash=sha256:344c6de9f8bda3c425b3a41b319522ba3208551b70c2ae00099c205f0d9fd3be \ - --hash=sha256:34d405ea69a8b34566ba3dfb0521379b210ea5d560fafedf9f800a9a94a41928 \ - --hash=sha256:3680248309d340fda9611498a5319b0193a8dbdb73586a1acf8109d06f25b92d \ - --hash=sha256:3c5ef25d060c80d6d9f7f9892e1d41bb1c79b78ce74805b8cb4aa373cb7d5ec8 \ - --hash=sha256:4ab14d567f7bbe7f1cdff1c53d5324ed4d3fc8bd17c481b395db224fb405c237 \ - --hash=sha256:5c1f7293c31ebc72163a9a0df246f890d65f66b4a40d9ec80081969ba8c78cc9 \ - --hash=sha256:6b71f64beeea341c9b4f963b48ee3b62d62d57ba93eb120e1196b31dc1025e78 \ - --hash=sha256:7d92f0248d38faa411d17f4107fc0bce0c42cae0b0ba5415505df72d751bf62d \ - --hash=sha256:8362565b3835ceacf4dc8f3b56471a2289cf51ac80946f9087e66dc283a810e0 \ - --hash=sha256:84a165379cb9d411d58ed739e4af3396e544eac190805a54ba2e0322feb55c46 \ - --hash=sha256:88ff107f211ea696455ea8d911389f6d2b276aabf3231bf72c8853d22db755c5 \ - --hash=sha256:9f65e842cb02550fac96536edb1d17f24c0a338fd84eaf582be25926e993dde4 \ - --hash=sha256:a4fc68d1c5b951cfb72dfd54702afdbbf0fb7acdc9b7dc4301bbf2225a27714d \ - --hash=sha256:b7f2f5c525a642cecad24ee8670443ba27ac1fab81bba4cc24c7b6b41f2d0c75 \ - --hash=sha256:b846d59a8d5a9ba87e2c3d757ca019fa576793e8758174d3868aecb88d6fc8eb \ - --hash=sha256:bf8fc66012ca857d62f6a347007e166ed59c0bc150cefa49f28376ebe7d992a2 \ - --hash=sha256:f5d0bf9b252f30a31664b6f64432b4730bb7038339bd18b1fafe129cfc2be9be +cryptography==41.0.2 \ + --hash=sha256:01f1d9e537f9a15b037d5d9ee442b8c22e3ae11ce65ea1f3316a41c78756b711 \ + --hash=sha256:079347de771f9282fbfe0e0236c716686950c19dee1b76240ab09ce1624d76d7 \ + --hash=sha256:182be4171f9332b6741ee818ec27daff9fb00349f706629f5cbf417bd50e66fd \ + --hash=sha256:192255f539d7a89f2102d07d7375b1e0a81f7478925b3bc2e0549ebf739dae0e \ + --hash=sha256:2a034bf7d9ca894720f2ec1d8b7b5832d7e363571828037f9e0c4f18c1b58a58 \ + --hash=sha256:342f3767e25876751e14f8459ad85e77e660537ca0a066e10e75df9c9e9099f0 \ + --hash=sha256:439c3cc4c0d42fa999b83ded80a9a1fb54d53c58d6e59234cfe97f241e6c781d \ + --hash=sha256:49c3222bb8f8e800aead2e376cbef687bc9e3cb9b58b29a261210456a7783d83 \ + --hash=sha256:674b669d5daa64206c38e507808aae49904c988fa0a71c935e7006a3e1e83831 \ + --hash=sha256:7a9a3bced53b7f09da251685224d6a260c3cb291768f54954e28f03ef14e3766 \ + --hash=sha256:7af244b012711a26196450d34f483357e42aeddb04128885d95a69bd8b14b69b \ + --hash=sha256:7d230bf856164de164ecb615ccc14c7fc6de6906ddd5b491f3af90d3514c925c \ + --hash=sha256:84609ade00a6ec59a89729e87a503c6e36af98ddcd566d5f3be52e29ba993182 \ + --hash=sha256:9a6673c1828db6270b76b22cc696f40cde9043eb90373da5c2f8f2158957f42f \ + --hash=sha256:9b6d717393dbae53d4e52684ef4f022444fc1cce3c48c38cb74fca29e1f08eaa \ + --hash=sha256:9c3fe6534d59d071ee82081ca3d71eed3210f76ebd0361798c74abc2bcf347d4 \ + --hash=sha256:a719399b99377b218dac6cf547b6ec54e6ef20207b6165126a280b0ce97e0d2a \ + --hash=sha256:b332cba64d99a70c1e0836902720887fb4529ea49ea7f5462cf6640e095e11d2 \ + --hash=sha256:d124682c7a23c9764e54ca9ab5b308b14b18eba02722b8659fb238546de83a76 \ + --hash=sha256:d73f419a56d74fef257955f51b18d046f3506270a5fd2ac5febbfa259d6c0fa5 \ + --hash=sha256:f0dc40e6f7aa37af01aba07277d3d64d5a03dc66d682097541ec4da03cc140ee \ + --hash=sha256:f14ad275364c8b4e525d018f6716537ae7b6d369c094805cae45300847e0894f \ + --hash=sha256:f772610fe364372de33d76edcd313636a25684edb94cee53fd790195f5989d14 # via # gcp-releasetool # secretstorage diff --git a/packages/google-cloud-bigquery/.kokoro/test-samples-against-head.sh b/packages/google-cloud-bigquery/.kokoro/test-samples-against-head.sh index ba3a707b040c..63ac41dfae1d 100755 --- a/packages/google-cloud-bigquery/.kokoro/test-samples-against-head.sh +++ b/packages/google-cloud-bigquery/.kokoro/test-samples-against-head.sh @@ -1,5 +1,5 @@ #!/bin/bash -# Copyright 2020 Google LLC +# Copyright 2023 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/packages/google-cloud-bigquery/.kokoro/test-samples-impl.sh b/packages/google-cloud-bigquery/.kokoro/test-samples-impl.sh index 2c6500cae0b9..5a0f5fab6a89 100755 --- a/packages/google-cloud-bigquery/.kokoro/test-samples-impl.sh +++ b/packages/google-cloud-bigquery/.kokoro/test-samples-impl.sh @@ -1,5 +1,5 @@ #!/bin/bash -# Copyright 2021 Google LLC +# Copyright 2023 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/packages/google-cloud-bigquery/.kokoro/test-samples.sh b/packages/google-cloud-bigquery/.kokoro/test-samples.sh index 11c042d342d7..50b35a48c190 100755 --- a/packages/google-cloud-bigquery/.kokoro/test-samples.sh +++ b/packages/google-cloud-bigquery/.kokoro/test-samples.sh @@ -1,5 +1,5 @@ #!/bin/bash -# Copyright 2020 Google LLC +# Copyright 2023 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/packages/google-cloud-bigquery/.kokoro/trampoline.sh b/packages/google-cloud-bigquery/.kokoro/trampoline.sh index f39236e943a8..d85b1f267693 100755 --- a/packages/google-cloud-bigquery/.kokoro/trampoline.sh +++ b/packages/google-cloud-bigquery/.kokoro/trampoline.sh @@ -1,5 +1,5 @@ #!/bin/bash -# Copyright 2017 Google Inc. +# Copyright 2023 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/packages/google-cloud-bigquery/.kokoro/trampoline_v2.sh b/packages/google-cloud-bigquery/.kokoro/trampoline_v2.sh index 4af6cdc26dbc..59a7cf3a9373 100755 --- a/packages/google-cloud-bigquery/.kokoro/trampoline_v2.sh +++ b/packages/google-cloud-bigquery/.kokoro/trampoline_v2.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Copyright 2020 Google LLC +# Copyright 2023 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/packages/google-cloud-bigquery/.pre-commit-config.yaml b/packages/google-cloud-bigquery/.pre-commit-config.yaml index 5405cc8ff1f3..9e3898fd1c12 100644 --- a/packages/google-cloud-bigquery/.pre-commit-config.yaml +++ b/packages/google-cloud-bigquery/.pre-commit-config.yaml @@ -1,4 +1,4 @@ -# Copyright 2021 Google LLC +# Copyright 2023 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/packages/google-cloud-bigquery/.trampolinerc b/packages/google-cloud-bigquery/.trampolinerc index 0eee72ab62aa..a7dfeb42c6d0 100644 --- a/packages/google-cloud-bigquery/.trampolinerc +++ b/packages/google-cloud-bigquery/.trampolinerc @@ -1,4 +1,4 @@ -# Copyright 2020 Google LLC +# Copyright 2023 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -12,8 +12,6 @@ # See the License for the specific language governing permissions and # limitations under the License. -# Template for .trampolinerc - # Add required env vars here. required_envvars+=( ) diff --git a/packages/google-cloud-bigquery/MANIFEST.in b/packages/google-cloud-bigquery/MANIFEST.in index e783f4c6209b..e0a66705318e 100644 --- a/packages/google-cloud-bigquery/MANIFEST.in +++ b/packages/google-cloud-bigquery/MANIFEST.in @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright 2020 Google LLC +# Copyright 2023 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/packages/google-cloud-bigquery/docs/conf.py b/packages/google-cloud-bigquery/docs/conf.py index 5c83fd79ee73..d0468e25af6e 100644 --- a/packages/google-cloud-bigquery/docs/conf.py +++ b/packages/google-cloud-bigquery/docs/conf.py @@ -1,5 +1,5 @@ # -*- coding: utf-8 -*- -# Copyright 2021 Google LLC +# Copyright 2023 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/packages/google-cloud-bigquery/scripts/decrypt-secrets.sh b/packages/google-cloud-bigquery/scripts/decrypt-secrets.sh index 21f6d2a26d90..0018b421ddf8 100755 --- a/packages/google-cloud-bigquery/scripts/decrypt-secrets.sh +++ b/packages/google-cloud-bigquery/scripts/decrypt-secrets.sh @@ -1,6 +1,6 @@ #!/bin/bash -# Copyright 2015 Google Inc. All rights reserved. +# Copyright 2023 Google LLC All rights reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/packages/google-cloud-bigquery/scripts/readme-gen/readme_gen.py b/packages/google-cloud-bigquery/scripts/readme-gen/readme_gen.py index 91b59676bfc7..1acc119835b5 100644 --- a/packages/google-cloud-bigquery/scripts/readme-gen/readme_gen.py +++ b/packages/google-cloud-bigquery/scripts/readme-gen/readme_gen.py @@ -1,6 +1,6 @@ #!/usr/bin/env python -# Copyright 2016 Google Inc +# Copyright 2023 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -33,17 +33,17 @@ autoescape=True, ) -README_TMPL = jinja_env.get_template('README.tmpl.rst') +README_TMPL = jinja_env.get_template("README.tmpl.rst") def get_help(file): - return subprocess.check_output(['python', file, '--help']).decode() + return subprocess.check_output(["python", file, "--help"]).decode() def main(): parser = argparse.ArgumentParser() - parser.add_argument('source') - parser.add_argument('--destination', default='README.rst') + parser.add_argument("source") + parser.add_argument("--destination", default="README.rst") args = parser.parse_args() @@ -51,9 +51,9 @@ def main(): root = os.path.dirname(source) destination = os.path.join(root, args.destination) - jinja_env.globals['get_help'] = get_help + jinja_env.globals["get_help"] = get_help - with io.open(source, 'r') as f: + with io.open(source, "r") as f: config = yaml.load(f) # This allows get_help to execute in the right directory. @@ -61,9 +61,9 @@ def main(): output = README_TMPL.render(config) - with io.open(destination, 'w') as f: + with io.open(destination, "w") as f: f.write(output) -if __name__ == '__main__': +if __name__ == "__main__": main() diff --git a/packages/google-cloud-bigquery/setup.cfg b/packages/google-cloud-bigquery/setup.cfg index 25892161f048..37b63aa49e3a 100644 --- a/packages/google-cloud-bigquery/setup.cfg +++ b/packages/google-cloud-bigquery/setup.cfg @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright 2020 Google LLC +# Copyright 2023 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. From 94a932943af0132dcf9f49d599c2cd562f45530a Mon Sep 17 00:00:00 2001 From: Chalmer Lowe Date: Wed, 19 Jul 2023 12:03:06 -0400 Subject: [PATCH 1627/2016] fix: updates typing in function definitions (#1613) --- .../google-cloud-bigquery/google/cloud/bigquery/client.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py index 5a929fea4c3d..11cceea42712 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py @@ -3070,7 +3070,7 @@ def copy_table( job_id_prefix: Optional[str] = None, location: Optional[str] = None, project: Optional[str] = None, - job_config: CopyJobConfig = None, + job_config: Optional[CopyJobConfig] = None, retry: retries.Retry = DEFAULT_RETRY, timeout: TimeoutType = DEFAULT_TIMEOUT, ) -> job.CopyJob: @@ -3176,7 +3176,7 @@ def extract_table( job_id_prefix: Optional[str] = None, location: Optional[str] = None, project: Optional[str] = None, - job_config: ExtractJobConfig = None, + job_config: Optional[ExtractJobConfig] = None, retry: retries.Retry = DEFAULT_RETRY, timeout: TimeoutType = DEFAULT_TIMEOUT, source_type: str = "Table", @@ -3271,7 +3271,7 @@ def extract_table( def query( self, query: str, - job_config: QueryJobConfig = None, + job_config: Optional[QueryJobConfig] = None, job_id: Optional[str] = None, job_id_prefix: Optional[str] = None, location: Optional[str] = None, From f648b2754a0efc9edf6e8dbbeac748268813ce39 Mon Sep 17 00:00:00 2001 From: "release-please[bot]" <55107282+release-please[bot]@users.noreply.github.com> Date: Wed, 19 Jul 2023 19:04:49 -0400 Subject: [PATCH 1628/2016] chore(main): release 3.11.4 (#1615) Co-authored-by: release-please[bot] <55107282+release-please[bot]@users.noreply.github.com> --- packages/google-cloud-bigquery/CHANGELOG.md | 7 +++++++ .../google-cloud-bigquery/google/cloud/bigquery/version.py | 2 +- 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/CHANGELOG.md b/packages/google-cloud-bigquery/CHANGELOG.md index a0af641cf72c..cf64e2222778 100644 --- a/packages/google-cloud-bigquery/CHANGELOG.md +++ b/packages/google-cloud-bigquery/CHANGELOG.md @@ -5,6 +5,13 @@ [1]: https://pypi.org/project/google-cloud-bigquery/#history +## [3.11.4](https://github.com/googleapis/python-bigquery/compare/v3.11.3...v3.11.4) (2023-07-19) + + +### Bug Fixes + +* Updates typing in function definitions ([#1613](https://github.com/googleapis/python-bigquery/issues/1613)) ([db755ce](https://github.com/googleapis/python-bigquery/commit/db755ce5d2ae21e458f33f02cf63d2e5fbc45cf5)) + ## [3.11.3](https://github.com/googleapis/python-bigquery/compare/v3.11.2...v3.11.3) (2023-06-27) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/version.py b/packages/google-cloud-bigquery/google/cloud/bigquery/version.py index 9e1402d15807..a97ccc0c8fc8 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/version.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/version.py @@ -12,4 +12,4 @@ # See the License for the specific language governing permissions and # limitations under the License. -__version__ = "3.11.3" +__version__ = "3.11.4" From 2f816f43c20f31cd5995028cf19942bd461a6947 Mon Sep 17 00:00:00 2001 From: "gcf-owl-bot[bot]" <78513119+gcf-owl-bot[bot]@users.noreply.github.com> Date: Fri, 21 Jul 2023 09:44:07 -0400 Subject: [PATCH 1629/2016] build(deps): [autoapprove] bump pygments from 2.13.0 to 2.15.0 (#1618) Source-Link: https://github.com/googleapis/synthtool/commit/eaef28efd179e6eeb9f4e9bf697530d074a6f3b9 Post-Processor: gcr.io/cloud-devrel-public-resources/owlbot-python:latest@sha256:f8ca7655fa8a449cadcabcbce4054f593dcbae7aeeab34aa3fcc8b5cf7a93c9e Co-authored-by: Owl Bot --- packages/google-cloud-bigquery/.github/.OwlBot.lock.yaml | 4 ++-- packages/google-cloud-bigquery/.kokoro/requirements.txt | 6 +++--- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/packages/google-cloud-bigquery/.github/.OwlBot.lock.yaml b/packages/google-cloud-bigquery/.github/.OwlBot.lock.yaml index ae4a522b9e5f..17c21d96d654 100644 --- a/packages/google-cloud-bigquery/.github/.OwlBot.lock.yaml +++ b/packages/google-cloud-bigquery/.github/.OwlBot.lock.yaml @@ -13,5 +13,5 @@ # limitations under the License. docker: image: gcr.io/cloud-devrel-public-resources/owlbot-python:latest - digest: sha256:39f0f3f2be02ef036e297e376fe3b6256775576da8a6ccb1d5eeb80f4c8bf8fb -# created: 2023-07-17T15:20:13.819193964Z + digest: sha256:f8ca7655fa8a449cadcabcbce4054f593dcbae7aeeab34aa3fcc8b5cf7a93c9e +# created: 2023-07-21T02:12:46.49799314Z diff --git a/packages/google-cloud-bigquery/.kokoro/requirements.txt b/packages/google-cloud-bigquery/.kokoro/requirements.txt index 67d70a110897..b563eb284459 100644 --- a/packages/google-cloud-bigquery/.kokoro/requirements.txt +++ b/packages/google-cloud-bigquery/.kokoro/requirements.txt @@ -396,9 +396,9 @@ pycparser==2.21 \ --hash=sha256:8ee45429555515e1f6b185e78100aea234072576aa43ab53aefcae078162fca9 \ --hash=sha256:e644fdec12f7872f86c58ff790da456218b10f863970249516d60a5eaca77206 # via cffi -pygments==2.13.0 \ - --hash=sha256:56a8508ae95f98e2b9bdf93a6be5ae3f7d8af858b43e02c5a2ff083726be40c1 \ - --hash=sha256:f643f331ab57ba3c9d89212ee4a2dabc6e94f117cf4eefde99a0574720d14c42 +pygments==2.15.0 \ + --hash=sha256:77a3299119af881904cd5ecd1ac6a66214b6e9bed1f2db16993b54adede64094 \ + --hash=sha256:f7e36cffc4c517fbc252861b9a6e4644ca0e5abadf9a113c72d1358ad09b9500 # via # readme-renderer # rich From 35d42f623e4bf0fe1e4063e79d1418119081ecc2 Mon Sep 17 00:00:00 2001 From: Mend Renovate Date: Tue, 25 Jul 2023 17:02:34 +0200 Subject: [PATCH 1630/2016] chore(deps): update all dependencies (#1607) * chore(deps): update all dependencies * revert * revert --------- Co-authored-by: Anthonios Partheniou --- .../samples/geography/requirements-test.txt | 2 +- .../samples/geography/requirements.txt | 6 +++--- .../samples/magics/requirements-test.txt | 2 +- .../samples/snippets/requirements-test.txt | 2 +- 4 files changed, 6 insertions(+), 6 deletions(-) diff --git a/packages/google-cloud-bigquery/samples/geography/requirements-test.txt b/packages/google-cloud-bigquery/samples/geography/requirements-test.txt index b3772a888083..6585a560a7e7 100644 --- a/packages/google-cloud-bigquery/samples/geography/requirements-test.txt +++ b/packages/google-cloud-bigquery/samples/geography/requirements-test.txt @@ -1,2 +1,2 @@ pytest==7.4.0 -mock==5.0.2 +mock==5.1.0 diff --git a/packages/google-cloud-bigquery/samples/geography/requirements.txt b/packages/google-cloud-bigquery/samples/geography/requirements.txt index b05446e99563..c5fe182afd7b 100644 --- a/packages/google-cloud-bigquery/samples/geography/requirements.txt +++ b/packages/google-cloud-bigquery/samples/geography/requirements.txt @@ -2,7 +2,7 @@ attrs==23.1.0 certifi==2023.5.7 cffi==1.15.1 charset-normalizer==3.2.0 -click==8.1.4 +click==8.1.6 click-plugins==1.1.1 cligj==0.7.2 dataclasses==0.8; python_version < '3.7' @@ -12,7 +12,7 @@ geojson==3.0.1 geopandas===0.10.2; python_version == '3.7' geopandas==0.13.2; python_version >= '3.8' google-api-core==2.11.1 -google-auth==2.21.0 +google-auth==2.22.0 google-cloud-bigquery==3.11.3 google-cloud-bigquery-storage==2.22.0 google-cloud-core==2.3.3 @@ -35,7 +35,7 @@ pycparser==2.21 pyparsing==3.1.0 python-dateutil==2.8.2 pytz==2023.3 -PyYAML==6.0 +PyYAML==6.0.1 requests==2.31.0 rsa==4.9 Shapely==2.0.1 diff --git a/packages/google-cloud-bigquery/samples/magics/requirements-test.txt b/packages/google-cloud-bigquery/samples/magics/requirements-test.txt index 4077bd8dcc73..514f09705ae8 100644 --- a/packages/google-cloud-bigquery/samples/magics/requirements-test.txt +++ b/packages/google-cloud-bigquery/samples/magics/requirements-test.txt @@ -1,3 +1,3 @@ google-cloud-testutils==1.3.3 pytest==7.4.0 -mock==5.0.2 +mock==5.1.0 diff --git a/packages/google-cloud-bigquery/samples/snippets/requirements-test.txt b/packages/google-cloud-bigquery/samples/snippets/requirements-test.txt index 4077bd8dcc73..514f09705ae8 100644 --- a/packages/google-cloud-bigquery/samples/snippets/requirements-test.txt +++ b/packages/google-cloud-bigquery/samples/snippets/requirements-test.txt @@ -1,3 +1,3 @@ google-cloud-testutils==1.3.3 pytest==7.4.0 -mock==5.0.2 +mock==5.1.0 From 4cb9c3ac4d23dc310f3698bf8929b1509b2a5268 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Tue, 25 Jul 2023 16:14:24 -0400 Subject: [PATCH 1631/2016] chore(deps): bump certifi in /samples/geography (#1627) Bumps [certifi](https://github.com/certifi/python-certifi) from 2023.5.7 to 2023.7.22. - [Commits](https://github.com/certifi/python-certifi/compare/2023.05.07...2023.07.22) --- updated-dependencies: - dependency-name: certifi dependency-type: direct:production ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- .../google-cloud-bigquery/samples/geography/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/samples/geography/requirements.txt b/packages/google-cloud-bigquery/samples/geography/requirements.txt index c5fe182afd7b..34873a5be0a8 100644 --- a/packages/google-cloud-bigquery/samples/geography/requirements.txt +++ b/packages/google-cloud-bigquery/samples/geography/requirements.txt @@ -1,5 +1,5 @@ attrs==23.1.0 -certifi==2023.5.7 +certifi==2023.7.22 cffi==1.15.1 charset-normalizer==3.2.0 click==8.1.6 From 5f591625e16382ec2447e3951313927ca6898af5 Mon Sep 17 00:00:00 2001 From: "gcf-owl-bot[bot]" <78513119+gcf-owl-bot[bot]@users.noreply.github.com> Date: Thu, 27 Jul 2023 05:56:00 -0400 Subject: [PATCH 1632/2016] build(deps): [autoapprove] bump certifi from 2022.12.7 to 2023.7.22 (#1629) Source-Link: https://github.com/googleapis/synthtool/commit/395d53adeeacfca00b73abf197f65f3c17c8f1e9 Post-Processor: gcr.io/cloud-devrel-public-resources/owlbot-python:latest@sha256:6c1cbc75c74b8bdd71dada2fa1677e9d6d78a889e9a70ee75b93d1d0543f96e1 Co-authored-by: Owl Bot --- packages/google-cloud-bigquery/.github/.OwlBot.lock.yaml | 4 ++-- packages/google-cloud-bigquery/.kokoro/requirements.txt | 6 +++--- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/packages/google-cloud-bigquery/.github/.OwlBot.lock.yaml b/packages/google-cloud-bigquery/.github/.OwlBot.lock.yaml index 17c21d96d654..0ddd0e4d1873 100644 --- a/packages/google-cloud-bigquery/.github/.OwlBot.lock.yaml +++ b/packages/google-cloud-bigquery/.github/.OwlBot.lock.yaml @@ -13,5 +13,5 @@ # limitations under the License. docker: image: gcr.io/cloud-devrel-public-resources/owlbot-python:latest - digest: sha256:f8ca7655fa8a449cadcabcbce4054f593dcbae7aeeab34aa3fcc8b5cf7a93c9e -# created: 2023-07-21T02:12:46.49799314Z + digest: sha256:6c1cbc75c74b8bdd71dada2fa1677e9d6d78a889e9a70ee75b93d1d0543f96e1 +# created: 2023-07-25T21:01:10.396410762Z diff --git a/packages/google-cloud-bigquery/.kokoro/requirements.txt b/packages/google-cloud-bigquery/.kokoro/requirements.txt index b563eb284459..76d9bba0f7d0 100644 --- a/packages/google-cloud-bigquery/.kokoro/requirements.txt +++ b/packages/google-cloud-bigquery/.kokoro/requirements.txt @@ -20,9 +20,9 @@ cachetools==5.2.0 \ --hash=sha256:6a94c6402995a99c3970cc7e4884bb60b4a8639938157eeed436098bf9831757 \ --hash=sha256:f9f17d2aec496a9aa6b76f53e3b614c965223c061982d434d160f930c698a9db # via google-auth -certifi==2022.12.7 \ - --hash=sha256:35824b4c3a97115964b408844d64aa14db1cc518f6562e8d7261699d1350a9e3 \ - --hash=sha256:4ad3232f5e926d6718ec31cfc1fcadfde020920e278684144551c91769c7bc18 +certifi==2023.7.22 \ + --hash=sha256:539cc1d13202e33ca466e88b2807e29f4c13049d6d87031a3c110744495cb082 \ + --hash=sha256:92d6037539857d8206b8f6ae472e8b77db8058fec5937a1ef3f54304089edbb9 # via requests cffi==1.15.1 \ --hash=sha256:00a9ed42e88df81ffae7a8ab6d9356b371399b91dbdf0c3cb1e84c03a13aceb5 \ From ecfb1f543b5de10dd3811e6447627c91bf2e291d Mon Sep 17 00:00:00 2001 From: "gcf-owl-bot[bot]" <78513119+gcf-owl-bot[bot]@users.noreply.github.com> Date: Tue, 8 Aug 2023 11:09:40 -0400 Subject: [PATCH 1633/2016] build: [autoapprove] bump cryptography from 41.0.2 to 41.0.3 (#1632) * build: [autoapprove] bump cryptography from 41.0.2 to 41.0.3 Source-Link: https://github.com/googleapis/synthtool/commit/352b9d4c068ce7c05908172af128b294073bf53c Post-Processor: gcr.io/cloud-devrel-public-resources/owlbot-python:latest@sha256:3e3800bb100af5d7f9e810d48212b37812c1856d20ffeafb99ebe66461b61fc7 * pin flake8 --------- Co-authored-by: Owl Bot Co-authored-by: Anthonios Partheniou --- .../.github/.OwlBot.lock.yaml | 4 +- .../.kokoro/requirements.txt | 48 +++++++++---------- .../.pre-commit-config.yaml | 2 +- packages/google-cloud-bigquery/noxfile.py | 4 +- 4 files changed, 30 insertions(+), 28 deletions(-) diff --git a/packages/google-cloud-bigquery/.github/.OwlBot.lock.yaml b/packages/google-cloud-bigquery/.github/.OwlBot.lock.yaml index 0ddd0e4d1873..a3da1b0d4cd3 100644 --- a/packages/google-cloud-bigquery/.github/.OwlBot.lock.yaml +++ b/packages/google-cloud-bigquery/.github/.OwlBot.lock.yaml @@ -13,5 +13,5 @@ # limitations under the License. docker: image: gcr.io/cloud-devrel-public-resources/owlbot-python:latest - digest: sha256:6c1cbc75c74b8bdd71dada2fa1677e9d6d78a889e9a70ee75b93d1d0543f96e1 -# created: 2023-07-25T21:01:10.396410762Z + digest: sha256:3e3800bb100af5d7f9e810d48212b37812c1856d20ffeafb99ebe66461b61fc7 +# created: 2023-08-02T10:53:29.114535628Z diff --git a/packages/google-cloud-bigquery/.kokoro/requirements.txt b/packages/google-cloud-bigquery/.kokoro/requirements.txt index 76d9bba0f7d0..029bd342de94 100644 --- a/packages/google-cloud-bigquery/.kokoro/requirements.txt +++ b/packages/google-cloud-bigquery/.kokoro/requirements.txt @@ -113,30 +113,30 @@ commonmark==0.9.1 \ --hash=sha256:452f9dc859be7f06631ddcb328b6919c67984aca654e5fefb3914d54691aed60 \ --hash=sha256:da2f38c92590f83de410ba1a3cbceafbc74fee9def35f9251ba9a971d6d66fd9 # via rich -cryptography==41.0.2 \ - --hash=sha256:01f1d9e537f9a15b037d5d9ee442b8c22e3ae11ce65ea1f3316a41c78756b711 \ - --hash=sha256:079347de771f9282fbfe0e0236c716686950c19dee1b76240ab09ce1624d76d7 \ - --hash=sha256:182be4171f9332b6741ee818ec27daff9fb00349f706629f5cbf417bd50e66fd \ - --hash=sha256:192255f539d7a89f2102d07d7375b1e0a81f7478925b3bc2e0549ebf739dae0e \ - --hash=sha256:2a034bf7d9ca894720f2ec1d8b7b5832d7e363571828037f9e0c4f18c1b58a58 \ - --hash=sha256:342f3767e25876751e14f8459ad85e77e660537ca0a066e10e75df9c9e9099f0 \ - --hash=sha256:439c3cc4c0d42fa999b83ded80a9a1fb54d53c58d6e59234cfe97f241e6c781d \ - --hash=sha256:49c3222bb8f8e800aead2e376cbef687bc9e3cb9b58b29a261210456a7783d83 \ - --hash=sha256:674b669d5daa64206c38e507808aae49904c988fa0a71c935e7006a3e1e83831 \ - --hash=sha256:7a9a3bced53b7f09da251685224d6a260c3cb291768f54954e28f03ef14e3766 \ - --hash=sha256:7af244b012711a26196450d34f483357e42aeddb04128885d95a69bd8b14b69b \ - --hash=sha256:7d230bf856164de164ecb615ccc14c7fc6de6906ddd5b491f3af90d3514c925c \ - --hash=sha256:84609ade00a6ec59a89729e87a503c6e36af98ddcd566d5f3be52e29ba993182 \ - --hash=sha256:9a6673c1828db6270b76b22cc696f40cde9043eb90373da5c2f8f2158957f42f \ - --hash=sha256:9b6d717393dbae53d4e52684ef4f022444fc1cce3c48c38cb74fca29e1f08eaa \ - --hash=sha256:9c3fe6534d59d071ee82081ca3d71eed3210f76ebd0361798c74abc2bcf347d4 \ - --hash=sha256:a719399b99377b218dac6cf547b6ec54e6ef20207b6165126a280b0ce97e0d2a \ - --hash=sha256:b332cba64d99a70c1e0836902720887fb4529ea49ea7f5462cf6640e095e11d2 \ - --hash=sha256:d124682c7a23c9764e54ca9ab5b308b14b18eba02722b8659fb238546de83a76 \ - --hash=sha256:d73f419a56d74fef257955f51b18d046f3506270a5fd2ac5febbfa259d6c0fa5 \ - --hash=sha256:f0dc40e6f7aa37af01aba07277d3d64d5a03dc66d682097541ec4da03cc140ee \ - --hash=sha256:f14ad275364c8b4e525d018f6716537ae7b6d369c094805cae45300847e0894f \ - --hash=sha256:f772610fe364372de33d76edcd313636a25684edb94cee53fd790195f5989d14 +cryptography==41.0.3 \ + --hash=sha256:0d09fb5356f975974dbcb595ad2d178305e5050656affb7890a1583f5e02a306 \ + --hash=sha256:23c2d778cf829f7d0ae180600b17e9fceea3c2ef8b31a99e3c694cbbf3a24b84 \ + --hash=sha256:3fb248989b6363906827284cd20cca63bb1a757e0a2864d4c1682a985e3dca47 \ + --hash=sha256:41d7aa7cdfded09b3d73a47f429c298e80796c8e825ddfadc84c8a7f12df212d \ + --hash=sha256:42cb413e01a5d36da9929baa9d70ca90d90b969269e5a12d39c1e0d475010116 \ + --hash=sha256:4c2f0d35703d61002a2bbdcf15548ebb701cfdd83cdc12471d2bae80878a4207 \ + --hash=sha256:4fd871184321100fb400d759ad0cddddf284c4b696568204d281c902fc7b0d81 \ + --hash=sha256:5259cb659aa43005eb55a0e4ff2c825ca111a0da1814202c64d28a985d33b087 \ + --hash=sha256:57a51b89f954f216a81c9d057bf1a24e2f36e764a1ca9a501a6964eb4a6800dd \ + --hash=sha256:652627a055cb52a84f8c448185922241dd5217443ca194d5739b44612c5e6507 \ + --hash=sha256:67e120e9a577c64fe1f611e53b30b3e69744e5910ff3b6e97e935aeb96005858 \ + --hash=sha256:6af1c6387c531cd364b72c28daa29232162010d952ceb7e5ca8e2827526aceae \ + --hash=sha256:6d192741113ef5e30d89dcb5b956ef4e1578f304708701b8b73d38e3e1461f34 \ + --hash=sha256:7efe8041897fe7a50863e51b77789b657a133c75c3b094e51b5e4b5cec7bf906 \ + --hash=sha256:84537453d57f55a50a5b6835622ee405816999a7113267739a1b4581f83535bd \ + --hash=sha256:8f09daa483aedea50d249ef98ed500569841d6498aa9c9f4b0531b9964658922 \ + --hash=sha256:95dd7f261bb76948b52a5330ba5202b91a26fbac13ad0e9fc8a3ac04752058c7 \ + --hash=sha256:a74fbcdb2a0d46fe00504f571a2a540532f4c188e6ccf26f1f178480117b33c4 \ + --hash=sha256:a983e441a00a9d57a4d7c91b3116a37ae602907a7618b882c8013b5762e80574 \ + --hash=sha256:ab8de0d091acbf778f74286f4989cf3d1528336af1b59f3e5d2ebca8b5fe49e1 \ + --hash=sha256:aeb57c421b34af8f9fe830e1955bf493a86a7996cc1338fe41b30047d16e962c \ + --hash=sha256:ce785cf81a7bdade534297ef9e490ddff800d956625020ab2ec2780a556c313e \ + --hash=sha256:d0d651aa754ef58d75cec6edfbd21259d93810b73f6ec246436a21b7841908de # via # gcp-releasetool # secretstorage diff --git a/packages/google-cloud-bigquery/.pre-commit-config.yaml b/packages/google-cloud-bigquery/.pre-commit-config.yaml index 9e3898fd1c12..19409cbd37a4 100644 --- a/packages/google-cloud-bigquery/.pre-commit-config.yaml +++ b/packages/google-cloud-bigquery/.pre-commit-config.yaml @@ -26,6 +26,6 @@ repos: hooks: - id: black - repo: https://github.com/pycqa/flake8 - rev: 3.9.2 + rev: 6.1.0 hooks: - id: flake8 diff --git a/packages/google-cloud-bigquery/noxfile.py b/packages/google-cloud-bigquery/noxfile.py index 93616485fe8b..3c9ba5eb578e 100644 --- a/packages/google-cloud-bigquery/noxfile.py +++ b/packages/google-cloud-bigquery/noxfile.py @@ -375,7 +375,9 @@ def lint(session): serious code quality issues. """ - session.install("flake8", BLACK_VERSION) + # Pin flake8 to 6.0.0 + # See https://github.com/googleapis/python-bigquery/issues/1635 + session.install("flake8==6.0.0", BLACK_VERSION) session.install("-e", ".") session.run("flake8", os.path.join("google", "cloud", "bigquery")) session.run("flake8", "tests") From 607ff99e4bfcb876cb7d5f997d113feb358c7464 Mon Sep 17 00:00:00 2001 From: Mend Renovate Date: Wed, 9 Aug 2023 15:38:45 +0200 Subject: [PATCH 1634/2016] chore(deps): update all dependencies (#1626) * chore(deps): update all dependencies * revert urllib3 --------- Co-authored-by: Anthonios Partheniou --- .../samples/geography/requirements.txt | 8 ++++---- .../google-cloud-bigquery/samples/magics/requirements.txt | 4 ++-- .../samples/snippets/requirements.txt | 6 +++--- 3 files changed, 9 insertions(+), 9 deletions(-) diff --git a/packages/google-cloud-bigquery/samples/geography/requirements.txt b/packages/google-cloud-bigquery/samples/geography/requirements.txt index 34873a5be0a8..db17aeddf7c1 100644 --- a/packages/google-cloud-bigquery/samples/geography/requirements.txt +++ b/packages/google-cloud-bigquery/samples/geography/requirements.txt @@ -13,13 +13,13 @@ geopandas===0.10.2; python_version == '3.7' geopandas==0.13.2; python_version >= '3.8' google-api-core==2.11.1 google-auth==2.22.0 -google-cloud-bigquery==3.11.3 +google-cloud-bigquery==3.11.4 google-cloud-bigquery-storage==2.22.0 google-cloud-core==2.3.3 google-crc32c==1.5.0 google-resumable-media==2.5.0 -googleapis-common-protos==1.59.1 -grpcio==1.56.0 +googleapis-common-protos==1.60.0 +grpcio==1.56.2 idna==3.4 libcst==1.0.1 munch==4.0.0 @@ -32,7 +32,7 @@ pyarrow==12.0.1 pyasn1==0.5.0 pyasn1-modules==0.3.0 pycparser==2.21 -pyparsing==3.1.0 +pyparsing==3.1.1 python-dateutil==2.8.2 pytz==2023.3 PyYAML==6.0.1 diff --git a/packages/google-cloud-bigquery/samples/magics/requirements.txt b/packages/google-cloud-bigquery/samples/magics/requirements.txt index edf3dc4b629e..ae61f71ff477 100644 --- a/packages/google-cloud-bigquery/samples/magics/requirements.txt +++ b/packages/google-cloud-bigquery/samples/magics/requirements.txt @@ -1,8 +1,8 @@ db-dtypes==1.1.1 google-cloud-bigquery-storage==2.22.0 google-auth-oauthlib==1.0.0 -grpcio==1.56.0 -ipywidgets==8.0.7 +grpcio==1.56.2 +ipywidgets==8.1.0 ipython===7.31.1; python_version == '3.7' ipython===8.0.1; python_version == '3.8' ipython==8.14.0; python_version >= '3.9' diff --git a/packages/google-cloud-bigquery/samples/snippets/requirements.txt b/packages/google-cloud-bigquery/samples/snippets/requirements.txt index c715a450fafe..0541486c0732 100644 --- a/packages/google-cloud-bigquery/samples/snippets/requirements.txt +++ b/packages/google-cloud-bigquery/samples/snippets/requirements.txt @@ -1,9 +1,9 @@ db-dtypes==1.1.1 -google-cloud-bigquery==3.11.3 +google-cloud-bigquery==3.11.4 google-cloud-bigquery-storage==2.22.0 google-auth-oauthlib==1.0.0 -grpcio==1.56.0 -ipywidgets==8.0.7 +grpcio==1.56.2 +ipywidgets==8.1.0 ipython===7.31.1; python_version == '3.7' ipython===8.0.1; python_version == '3.8' ipython==8.14.0; python_version >= '3.9' From f16548d7450560ace885475ad5fe538a8578f63c Mon Sep 17 00:00:00 2001 From: Mend Renovate Date: Fri, 11 Aug 2023 20:46:46 +0200 Subject: [PATCH 1635/2016] chore(deps): update all dependencies (#1636) * chore(deps): update all dependencies * revert urllib3 --------- Co-authored-by: Anthonios Partheniou --- .../google-cloud-bigquery/samples/geography/requirements.txt | 2 +- packages/google-cloud-bigquery/samples/magics/requirements.txt | 2 +- .../google-cloud-bigquery/samples/snippets/requirements.txt | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/packages/google-cloud-bigquery/samples/geography/requirements.txt b/packages/google-cloud-bigquery/samples/geography/requirements.txt index db17aeddf7c1..714e032add21 100644 --- a/packages/google-cloud-bigquery/samples/geography/requirements.txt +++ b/packages/google-cloud-bigquery/samples/geography/requirements.txt @@ -19,7 +19,7 @@ google-cloud-core==2.3.3 google-crc32c==1.5.0 google-resumable-media==2.5.0 googleapis-common-protos==1.60.0 -grpcio==1.56.2 +grpcio==1.57.0 idna==3.4 libcst==1.0.1 munch==4.0.0 diff --git a/packages/google-cloud-bigquery/samples/magics/requirements.txt b/packages/google-cloud-bigquery/samples/magics/requirements.txt index ae61f71ff477..c3300ae20e68 100644 --- a/packages/google-cloud-bigquery/samples/magics/requirements.txt +++ b/packages/google-cloud-bigquery/samples/magics/requirements.txt @@ -1,7 +1,7 @@ db-dtypes==1.1.1 google-cloud-bigquery-storage==2.22.0 google-auth-oauthlib==1.0.0 -grpcio==1.56.2 +grpcio==1.57.0 ipywidgets==8.1.0 ipython===7.31.1; python_version == '3.7' ipython===8.0.1; python_version == '3.8' diff --git a/packages/google-cloud-bigquery/samples/snippets/requirements.txt b/packages/google-cloud-bigquery/samples/snippets/requirements.txt index 0541486c0732..da99249d2a4d 100644 --- a/packages/google-cloud-bigquery/samples/snippets/requirements.txt +++ b/packages/google-cloud-bigquery/samples/snippets/requirements.txt @@ -2,7 +2,7 @@ db-dtypes==1.1.1 google-cloud-bigquery==3.11.4 google-cloud-bigquery-storage==2.22.0 google-auth-oauthlib==1.0.0 -grpcio==1.56.2 +grpcio==1.57.0 ipywidgets==8.1.0 ipython===7.31.1; python_version == '3.7' ipython===8.0.1; python_version == '3.8' From 7cb6bcb918d1227533a8ad7284cd0c79f3249c07 Mon Sep 17 00:00:00 2001 From: shollyman Date: Mon, 14 Aug 2023 09:15:45 -0700 Subject: [PATCH 1636/2016] feat: widen retry predicate to include ServiceUnavailable (#1641) Expands retry. It's possible in the normal lifecycle of an API frontend for the intermediate response to indicate the API service is not ready. related: internal issue 294103068 --- .../google-cloud-bigquery/google/cloud/bigquery/retry.py | 1 + packages/google-cloud-bigquery/tests/unit/test_retry.py | 6 ++++++ 2 files changed, 7 insertions(+) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/retry.py b/packages/google-cloud-bigquery/google/cloud/bigquery/retry.py index 254b26608c65..d0830ed13c37 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/retry.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/retry.py @@ -27,6 +27,7 @@ exceptions.TooManyRequests, exceptions.InternalServerError, exceptions.BadGateway, + exceptions.ServiceUnavailable, requests.exceptions.ChunkedEncodingError, requests.exceptions.ConnectionError, requests.exceptions.Timeout, diff --git a/packages/google-cloud-bigquery/tests/unit/test_retry.py b/packages/google-cloud-bigquery/tests/unit/test_retry.py index e0a992f783d5..60d04de8948b 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_retry.py +++ b/packages/google-cloud-bigquery/tests/unit/test_retry.py @@ -79,6 +79,12 @@ def test_w_unstructured_too_many_requests(self): exc = TooManyRequests("testing") self.assertTrue(self._call_fut(exc)) + def test_w_unstructured_service_unavailable(self): + from google.api_core.exceptions import ServiceUnavailable + + exc = ServiceUnavailable("testing") + self.assertTrue(self._call_fut(exc)) + def test_w_internalError(self): exc = mock.Mock(errors=[{"reason": "internalError"}], spec=["errors"]) self.assertTrue(self._call_fut(exc)) From 929521800db96274687d48dc707920de29c85f18 Mon Sep 17 00:00:00 2001 From: shollyman Date: Mon, 21 Aug 2023 09:52:38 -0700 Subject: [PATCH 1637/2016] fix: relax timeout expectations (#1645) * fix: relax timeout expectations Changes to python-api-core can in certain cases cause timeout to be represented as a literal python base object type. This CL adjusts logic that selects from multiple timeout values to better handle this case, which previously assumed either a None or scalar value being present. Fixes: https://github.com/googleapis/python-bigquery/issues/1612 * augment testing * blacken and lint fixes * unused import --- .../google/cloud/bigquery/client.py | 10 +- .../tests/unit/test_client.py | 94 +++++++++++++++++++ 2 files changed, 102 insertions(+), 2 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py index 11cceea42712..2712b0c837ce 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py @@ -1895,7 +1895,10 @@ def _get_query_results( extra_params: Dict[str, Any] = {"maxResults": 0} if timeout is not None: - timeout = max(timeout, _MIN_GET_QUERY_RESULTS_TIMEOUT) + if type(timeout) == object: + timeout = _MIN_GET_QUERY_RESULTS_TIMEOUT + else: + timeout = max(timeout, _MIN_GET_QUERY_RESULTS_TIMEOUT) if project is None: project = self.project @@ -3924,7 +3927,10 @@ def _list_rows_from_query_results( } if timeout is not None: - timeout = max(timeout, _MIN_GET_QUERY_RESULTS_TIMEOUT) + if type(timeout) == object: + timeout = _MIN_GET_QUERY_RESULTS_TIMEOUT + else: + timeout = max(timeout, _MIN_GET_QUERY_RESULTS_TIMEOUT) if start_index is not None: params["startIndex"] = start_index diff --git a/packages/google-cloud-bigquery/tests/unit/test_client.py b/packages/google-cloud-bigquery/tests/unit/test_client.py index cf0aa4028d8f..faa073dcea73 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_client.py +++ b/packages/google-cloud-bigquery/tests/unit/test_client.py @@ -395,6 +395,31 @@ def test__get_query_results_miss_w_short_timeout(self): timeout=google.cloud.bigquery.client._MIN_GET_QUERY_RESULTS_TIMEOUT, ) + def test__get_query_results_miss_w_default_timeout(self): + import google.cloud.bigquery.client + from google.cloud.exceptions import NotFound + + creds = _make_credentials() + client = self._make_one(self.PROJECT, creds) + conn = client._connection = make_connection() + path = "/projects/other-project/queries/nothere" + with self.assertRaises(NotFound): + client._get_query_results( + "nothere", + None, + project="other-project", + location=self.LOCATION, + timeout_ms=500, + timeout=object(), # the api core default timeout + ) + + conn.api_request.assert_called_once_with( + method="GET", + path=path, + query_params={"maxResults": 0, "timeoutMs": 500, "location": self.LOCATION}, + timeout=google.cloud.bigquery.client._MIN_GET_QUERY_RESULTS_TIMEOUT, + ) + def test__get_query_results_miss_w_client_location(self): from google.cloud.exceptions import NotFound @@ -438,6 +463,75 @@ def test__get_query_results_hit(self): self.assertEqual(query_results.total_rows, 10) self.assertTrue(query_results.complete) + def test__list_rows_from_query_results_w_none_timeout(self): + from google.cloud.exceptions import NotFound + from google.cloud.bigquery.schema import SchemaField + + creds = _make_credentials() + client = self._make_one(self.PROJECT, creds) + conn = client._connection = make_connection() + path = "/projects/project/queries/nothere" + iterator = client._list_rows_from_query_results( + "nothere", + location=None, + project="project", + schema=[ + SchemaField("f1", "STRING", mode="REQUIRED"), + SchemaField("f2", "INTEGER", mode="REQUIRED"), + ], + timeout=None, + ) + + # trigger the iterator to request data + with self.assertRaises(NotFound): + iterator._get_next_page_response() + + conn.api_request.assert_called_once_with( + method="GET", + path=path, + query_params={ + "fields": "jobReference,totalRows,pageToken,rows", + "location": None, + "formatOptions.useInt64Timestamp": True, + }, + timeout=None, + ) + + def test__list_rows_from_query_results_w_default_timeout(self): + import google.cloud.bigquery.client + from google.cloud.exceptions import NotFound + from google.cloud.bigquery.schema import SchemaField + + creds = _make_credentials() + client = self._make_one(self.PROJECT, creds) + conn = client._connection = make_connection() + path = "/projects/project/queries/nothere" + iterator = client._list_rows_from_query_results( + "nothere", + location=None, + project="project", + schema=[ + SchemaField("f1", "STRING", mode="REQUIRED"), + SchemaField("f2", "INTEGER", mode="REQUIRED"), + ], + timeout=object(), + ) + + # trigger the iterator to request data + with self.assertRaises(NotFound): + iterator._get_next_page_response() + + conn.api_request.assert_called_once_with( + method="GET", + path=path, + query_params={ + "fields": "jobReference,totalRows,pageToken,rows", + "location": None, + "formatOptions.useInt64Timestamp": True, + }, + timeout=google.cloud.bigquery.client._MIN_GET_QUERY_RESULTS_TIMEOUT, + ) + def test_default_query_job_config(self): from google.cloud.bigquery import QueryJobConfig From 8cc401201d2d4f038b6f5a50bc5587cf8ced667b Mon Sep 17 00:00:00 2001 From: Chalmer Lowe Date: Wed, 23 Aug 2023 15:04:13 -0400 Subject: [PATCH 1638/2016] bug: fixes numerous minor issues that cause test failures (#1651) Provides numerous tweaks to correct for failing tests in prerelease testing. --- .../google/cloud/bigquery/client.py | 2 +- .../google/cloud/bigquery/job/query.py | 4 ++-- .../google-cloud-bigquery/tests/system/test_pandas.py | 9 ++++++++- .../tests/unit/job/test_query.py | 2 +- .../tests/unit/job/test_query_pandas.py | 9 +++++++++ .../tests/unit/test__pandas_helpers.py | 3 +++ .../google-cloud-bigquery/tests/unit/test_table.py | 11 +++++++++++ .../tests/unit/test_table_pandas.py | 9 +++++++++ 8 files changed, 44 insertions(+), 5 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py index 2712b0c837ce..f64a81741ed9 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py @@ -253,7 +253,7 @@ def __init__( bq_host = _get_bigquery_host() kw_args["api_endpoint"] = bq_host if bq_host != _DEFAULT_HOST else None if client_options: - if type(client_options) == dict: + if isinstance(client_options, dict): client_options = google.api_core.client_options.from_dict( client_options ) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/job/query.py b/packages/google-cloud-bigquery/google/cloud/bigquery/job/query.py index 7dddc8278641..25d57b501cc6 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/job/query.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/job/query.py @@ -1342,10 +1342,10 @@ def _reload_query_results( # Our system does not natively handle that and instead expects # either none or a numeric value. If passed a Python object, convert to # None. - if type(self._done_timeout) == object: # pragma: NO COVER + if isinstance(self._done_timeout, object): # pragma: NO COVER self._done_timeout = None - if self._done_timeout is not None: + if self._done_timeout is not None: # pragma: NO COVER # Subtract a buffer for context switching, network latency, etc. api_timeout = self._done_timeout - _TIMEOUT_BUFFER_SECS api_timeout = max(min(api_timeout, 10), 0) diff --git a/packages/google-cloud-bigquery/tests/system/test_pandas.py b/packages/google-cloud-bigquery/tests/system/test_pandas.py index 726b68f7cf51..a46f8e3dfe46 100644 --- a/packages/google-cloud-bigquery/tests/system/test_pandas.py +++ b/packages/google-cloud-bigquery/tests/system/test_pandas.py @@ -41,7 +41,11 @@ "google.cloud.bigquery_storage", minversion="2.0.0" ) -PANDAS_INSTALLED_VERSION = pkg_resources.get_distribution("pandas").parsed_version +if pandas is not None: + PANDAS_INSTALLED_VERSION = pkg_resources.get_distribution("pandas").parsed_version +else: + PANDAS_INSTALLED_VERSION = pkg_resources.parse_version("0.0.0") + PANDAS_INT64_VERSION = pkg_resources.parse_version("1.0.0") @@ -1006,6 +1010,9 @@ def test_list_rows_max_results_w_bqstorage(bigquery_client): assert len(dataframe.index) == 100 +@pytest.mark.skipif( + PANDAS_INSTALLED_VERSION >= pkg_resources.parse_version("2.0.0"), reason="" +) @pytest.mark.parametrize( ("max_results",), ( diff --git a/packages/google-cloud-bigquery/tests/unit/job/test_query.py b/packages/google-cloud-bigquery/tests/unit/job/test_query.py index 33a52cfec54e..6263460167f2 100644 --- a/packages/google-cloud-bigquery/tests/unit/job/test_query.py +++ b/packages/google-cloud-bigquery/tests/unit/job/test_query.py @@ -1229,7 +1229,7 @@ def test_result_w_timeout(self): query_request[1]["path"], "/projects/{}/queries/{}".format(self.PROJECT, self.JOB_ID), ) - self.assertEqual(query_request[1]["query_params"]["timeoutMs"], 900) + self.assertEqual(query_request[1]["timeout"], 120) self.assertEqual( query_request[1]["timeout"], google.cloud.bigquery.client._MIN_GET_QUERY_RESULTS_TIMEOUT, diff --git a/packages/google-cloud-bigquery/tests/unit/job/test_query_pandas.py b/packages/google-cloud-bigquery/tests/unit/job/test_query_pandas.py index 01b60ceb3c01..f4c7eb06e862 100644 --- a/packages/google-cloud-bigquery/tests/unit/job/test_query_pandas.py +++ b/packages/google-cloud-bigquery/tests/unit/job/test_query_pandas.py @@ -17,6 +17,7 @@ import json import mock +import pkg_resources import pytest @@ -48,6 +49,11 @@ from .helpers import _make_client from .helpers import _make_job_resource +if pandas is not None: + PANDAS_INSTALLED_VERSION = pkg_resources.get_distribution("pandas").parsed_version +else: + PANDAS_INSTALLED_VERSION = pkg_resources.parse_version("0.0.0") + pandas = pytest.importorskip("pandas") try: @@ -646,6 +652,9 @@ def test_to_dataframe_bqstorage_no_pyarrow_compression(): ) +@pytest.mark.skipif( + PANDAS_INSTALLED_VERSION >= pkg_resources.parse_version("2.0.0"), reason="" +) @pytest.mark.skipif(pyarrow is None, reason="Requires `pyarrow`") def test_to_dataframe_column_dtypes(): from google.cloud.bigquery.job import QueryJob as target_class diff --git a/packages/google-cloud-bigquery/tests/unit/test__pandas_helpers.py b/packages/google-cloud-bigquery/tests/unit/test__pandas_helpers.py index 07bf03f66feb..a4cc1fefb449 100644 --- a/packages/google-cloud-bigquery/tests/unit/test__pandas_helpers.py +++ b/packages/google-cloud-bigquery/tests/unit/test__pandas_helpers.py @@ -546,6 +546,9 @@ def test_bq_to_arrow_array_w_nullable_scalars(module_under_test, bq_type, rows): ], ) @pytest.mark.skipif(pandas is None, reason="Requires `pandas`") +@pytest.mark.skipif( + PANDAS_INSTALLED_VERSION >= pkg_resources.parse_version("2.0.0"), reason="" +) @pytest.mark.skipif(isinstance(pyarrow, mock.Mock), reason="Requires `pyarrow`") def test_bq_to_arrow_array_w_pandas_timestamp(module_under_test, bq_type, rows): rows = [pandas.Timestamp(row) for row in rows] diff --git a/packages/google-cloud-bigquery/tests/unit/test_table.py b/packages/google-cloud-bigquery/tests/unit/test_table.py index a221bc89e906..f31dc5528c8b 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_table.py +++ b/packages/google-cloud-bigquery/tests/unit/test_table.py @@ -75,6 +75,11 @@ PYARROW_TIMESTAMP_VERSION = pkg_resources.parse_version("2.0.0") +if pandas is not None: + PANDAS_INSTALLED_VERSION = pkg_resources.get_distribution("pandas").parsed_version +else: + PANDAS_INSTALLED_VERSION = pkg_resources.parse_version("0.0.0") + def _mock_client(): from google.cloud.bigquery import client @@ -3677,6 +3682,9 @@ def test_to_dataframe_w_dtypes_mapper(self): self.assertEqual(df.timestamp.dtype.name, "object") @unittest.skipIf(pandas is None, "Requires `pandas`") + @pytest.mark.skipif( + PANDAS_INSTALLED_VERSION >= pkg_resources.parse_version("2.0.0"), reason="" + ) def test_to_dataframe_w_none_dtypes_mapper(self): from google.cloud.bigquery.schema import SchemaField @@ -3789,6 +3797,9 @@ def test_to_dataframe_w_unsupported_dtypes_mapper(self): ) @unittest.skipIf(pandas is None, "Requires `pandas`") + @pytest.mark.skipif( + PANDAS_INSTALLED_VERSION >= pkg_resources.parse_version("2.0.0"), reason="" + ) def test_to_dataframe_column_dtypes(self): from google.cloud.bigquery.schema import SchemaField diff --git a/packages/google-cloud-bigquery/tests/unit/test_table_pandas.py b/packages/google-cloud-bigquery/tests/unit/test_table_pandas.py index 5778467a5a34..dfe512eea49e 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_table_pandas.py +++ b/packages/google-cloud-bigquery/tests/unit/test_table_pandas.py @@ -15,6 +15,7 @@ import datetime import decimal from unittest import mock +import pkg_resources import pytest @@ -26,6 +27,11 @@ TEST_PATH = "/v1/project/test-proj/dataset/test-dset/table/test-tbl/data" +if pandas is not None: # pragma: NO COVER + PANDAS_INSTALLED_VERSION = pkg_resources.get_distribution("pandas").parsed_version +else: # pragma: NO COVER + PANDAS_INSTALLED_VERSION = pkg_resources.parse_version("0.0.0") + @pytest.fixture def class_under_test(): @@ -34,6 +40,9 @@ def class_under_test(): return RowIterator +@pytest.mark.skipif( + PANDAS_INSTALLED_VERSION >= pkg_resources.parse_version("2.0.0"), reason="" +) def test_to_dataframe_nullable_scalars(monkeypatch, class_under_test): # See tests/system/test_arrow.py for the actual types we get from the API. arrow_schema = pyarrow.schema( From 2eaac39c23a6d95330e54a14e654f18123ac6fb2 Mon Sep 17 00:00:00 2001 From: Chalmer Lowe Date: Sat, 2 Sep 2023 05:11:13 -0400 Subject: [PATCH 1639/2016] feat: search statistics (#1616) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * experimental tweaks * feat: adds two search statistics classes and property * removes several personal debugging sentinels * 🦉 Updates from OwlBot post-processor See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md * 🦉 Updates from OwlBot post-processor See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md * adds tests * 🦉 Updates from OwlBot post-processor See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md * cleans up conflict * adds comment * adds some type hints, adds a test for SearchReasons * cleans up some comments * 🦉 Updates from OwlBot post-processor See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md * 🦉 Updates from OwlBot post-processor See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md * Update tests/unit/job/test_query_stats.py * 🦉 Updates from OwlBot post-processor See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md * updated type checks to be isinstance checks per linter * update linting * Update tests/unit/job/test_query_stats.py * Update tests/unit/job/test_query_stats.py * experiments with some tests that are failing * 🦉 Updates from OwlBot post-processor See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md * Fix linting * update package verification approach * update pandas installed version constant * 🦉 Updates from OwlBot post-processor See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md * remove unused package * set pragma no cover * adds controls to skip testing if pandas exceeds 2.0 * 🦉 Updates from OwlBot post-processor See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md * adds pragma no cover to a simple check * add checks against pandas 2.0 on system test * 🦉 Updates from OwlBot post-processor See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md * experiments with some tests that are failing * 🦉 Updates from OwlBot post-processor See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md * resolves merge conflict * resolves merge conflict * resolve conflicts * resolve merge conflicts * 🦉 Updates from OwlBot post-processor See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md * updates due to faulty confict resolution * adds docstrings to two classes * corrects formatting * Update tests/unit/job/test_query_stats.py * Update tests/unit/job/test_query_stats.py * updates default values and corrects mypy errors * corrects linting * Update google/cloud/bigquery/job/query.py --------- Co-authored-by: Owl Bot --- .../google/cloud/bigquery/job/query.py | 63 ++++++++++++++++- .../tests/unit/job/test_query.py | 22 ++++++ .../tests/unit/job/test_query_stats.py | 69 +++++++++++++++++++ 3 files changed, 153 insertions(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/job/query.py b/packages/google-cloud-bigquery/google/cloud/bigquery/job/query.py index 25d57b501cc6..429e33e7ed18 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/job/query.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/job/query.py @@ -198,6 +198,59 @@ def from_api_repr(cls, stats: Dict[str, str]) -> "DmlStats": return cls(*args) +class IndexUnusedReason(typing.NamedTuple): + """Reason about why no search index was used in the search query (or sub-query). + + https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#indexunusedreason + """ + + code: Optional[str] = None + """Specifies the high-level reason for the scenario when no search index was used. + """ + + message: Optional[str] = None + """Free form human-readable reason for the scenario when no search index was used. + """ + + baseTable: Optional[TableReference] = None + """Specifies the base table involved in the reason that no search index was used. + """ + + indexName: Optional[str] = None + """Specifies the name of the unused search index, if available.""" + + @classmethod + def from_api_repr(cls, reason): + code = reason.get("code") + message = reason.get("message") + baseTable = reason.get("baseTable") + indexName = reason.get("indexName") + + return cls(code, message, baseTable, indexName) + + +class SearchStats(typing.NamedTuple): + """Statistics related to Search Queries. Populated as part of JobStatistics2. + + https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#searchstatistics + """ + + mode: Optional[str] = None + """Indicates the type of search index usage in the entire search query.""" + + reason: List[IndexUnusedReason] = [] + """Reason about why no search index was used in the search query (or sub-query)""" + + @classmethod + def from_api_repr(cls, stats: Dict[str, Any]): + mode = stats.get("indexUsageMode", None) + reason = [ + IndexUnusedReason.from_api_repr(r) + for r in stats.get("indexUnusedReasons", []) + ] + return cls(mode, reason) + + class ScriptOptions: """Options controlling the execution of scripts. @@ -724,7 +777,6 @@ def to_api_repr(self) -> dict: Dict: A dictionary in the format used by the BigQuery API. """ resource = copy.deepcopy(self._properties) - # Query parameters have an addition property associated with them # to indicate if the query is using named or positional parameters. query_parameters = resource["query"].get("queryParameters") @@ -858,6 +910,15 @@ def priority(self): """ return self.configuration.priority + @property + def search_stats(self) -> Optional[SearchStats]: + """Returns a SearchStats object.""" + + stats = self._job_statistics().get("searchStatistics") + if stats is not None: + return SearchStats.from_api_repr(stats) + return None + @property def query(self): """str: The query text used in this query job. diff --git a/packages/google-cloud-bigquery/tests/unit/job/test_query.py b/packages/google-cloud-bigquery/tests/unit/job/test_query.py index 6263460167f2..7d3186d47318 100644 --- a/packages/google-cloud-bigquery/tests/unit/job/test_query.py +++ b/packages/google-cloud-bigquery/tests/unit/job/test_query.py @@ -911,6 +911,28 @@ def test_dml_stats(self): assert isinstance(job.dml_stats, DmlStats) assert job.dml_stats.inserted_row_count == 35 + def test_search_stats(self): + from google.cloud.bigquery.job.query import SearchStats + + client = _make_client(project=self.PROJECT) + job = self._make_one(self.JOB_ID, self.QUERY, client) + assert job.search_stats is None + + statistics = job._properties["statistics"] = {} + assert job.search_stats is None + + query_stats = statistics["query"] = {} + assert job.search_stats is None + + query_stats["searchStatistics"] = { + "indexUsageMode": "INDEX_USAGE_MODE_UNSPECIFIED", + "indexUnusedReasons": [], + } + # job.search_stats is a daisy-chain of calls and gets: + # job.search_stats << job._job_statistics << job._properties + assert isinstance(job.search_stats, SearchStats) + assert job.search_stats.mode == "INDEX_USAGE_MODE_UNSPECIFIED" + def test_result(self): from google.cloud.bigquery.table import RowIterator diff --git a/packages/google-cloud-bigquery/tests/unit/job/test_query_stats.py b/packages/google-cloud-bigquery/tests/unit/job/test_query_stats.py index 13e022ced386..bdd0fb627754 100644 --- a/packages/google-cloud-bigquery/tests/unit/job/test_query_stats.py +++ b/packages/google-cloud-bigquery/tests/unit/job/test_query_stats.py @@ -108,6 +108,75 @@ def test_from_api_repr_full_stats(self): assert result.updated_row_count == 4 +class TestSearchStatistics: + @staticmethod + def _get_target_class(): + from google.cloud.bigquery.job.query import SearchStats + + return SearchStats + + def _make_one(self, *args, **kwargs): + return self._get_target_class()(*args, **kwargs) + + def test_ctor_defaults(self): + search_stats = self._make_one() + assert search_stats.mode is None + assert search_stats.reason == [] + + def test_from_api_repr_unspecified(self): + klass = self._get_target_class() + result = klass.from_api_repr( + {"indexUsageMode": "INDEX_USAGE_MODE_UNSPECIFIED", "indexUnusedReasons": []} + ) + + assert isinstance(result, klass) + assert result.mode == "INDEX_USAGE_MODE_UNSPECIFIED" + assert result.reason == [] + + +class TestIndexUnusedReason: + @staticmethod + def _get_target_class(): + from google.cloud.bigquery.job.query import IndexUnusedReason + + return IndexUnusedReason + + def _make_one(self, *args, **kwargs): + return self._get_target_class()(*args, **kwargs) + + def test_ctor_defaults(self): + search_reason = self._make_one() + assert search_reason.code is None + assert search_reason.message is None + assert search_reason.baseTable is None + assert search_reason.indexName is None + + def test_from_api_repr_unspecified(self): + klass = self._get_target_class() + result = klass.from_api_repr( + { + "code": "INDEX_CONFIG_NOT_AVAILABLE", + "message": "There is no search index...", + "baseTable": { + "projectId": "bigquery-public-data", + "datasetId": "usa_names", + "tableId": "usa_1910_current", + }, + "indexName": None, + } + ) + + assert isinstance(result, klass) + assert result.code == "INDEX_CONFIG_NOT_AVAILABLE" + assert result.message == "There is no search index..." + assert result.baseTable == { + "projectId": "bigquery-public-data", + "datasetId": "usa_names", + "tableId": "usa_1910_current", + } + assert result.indexName is None + + class TestQueryPlanEntryStep(_Base): KIND = "KIND" SUBSTEPS = ("SUB1", "SUB2") From bb6f918d88ce1d0acde7b99a4d56fba4641b14b7 Mon Sep 17 00:00:00 2001 From: meredithslota Date: Fri, 15 Sep 2023 07:57:38 -0700 Subject: [PATCH 1640/2016] chore(docs): update region tag and move sample and test (#1648) * chore: added new region tags to create_table_external_data_configuration.py * chore: delete test as part of sample migration * chore: delete sample as part of sample migration * chore: move sample and test to /snippets/ * chore: update import statement in create_table_external_data_configuration_test.py * chore: fix import statement in create_table_external_data_configuration_test.py * chore: update sample location in tables.rst --- packages/google-cloud-bigquery/docs/usage/tables.rst | 4 ++-- .../create_table_external_data_configuration.py | 4 ++++ .../create_table_external_data_configuration_test.py} | 2 +- 3 files changed, 7 insertions(+), 3 deletions(-) rename packages/google-cloud-bigquery/samples/{ => snippets}/create_table_external_data_configuration.py (94%) rename packages/google-cloud-bigquery/samples/{tests/test_create_table_external_data_configuration.py => snippets/create_table_external_data_configuration_test.py} (94%) diff --git a/packages/google-cloud-bigquery/docs/usage/tables.rst b/packages/google-cloud-bigquery/docs/usage/tables.rst index 105e93637815..a4f42b15cc85 100644 --- a/packages/google-cloud-bigquery/docs/usage/tables.rst +++ b/packages/google-cloud-bigquery/docs/usage/tables.rst @@ -61,7 +61,7 @@ Create an empty table with the Create a table using an external data source with the :func:`~google.cloud.bigquery.client.Client.create_table` method: -.. literalinclude:: ../samples/create_table_external_data_configuration.py +.. literalinclude:: ../samples/snippets/create_table_external_data_configuration.py :language: python :dedent: 4 :start-after: [START bigquery_create_table_external_data_configuration] @@ -313,4 +313,4 @@ Replace the table data with a Parquet file from Cloud Storage: :language: python :dedent: 4 :start-after: [START bigquery_load_table_gcs_parquet_truncate] - :end-before: [END bigquery_load_table_gcs_parquet_truncate] \ No newline at end of file + :end-before: [END bigquery_load_table_gcs_parquet_truncate] diff --git a/packages/google-cloud-bigquery/samples/create_table_external_data_configuration.py b/packages/google-cloud-bigquery/samples/snippets/create_table_external_data_configuration.py similarity index 94% rename from packages/google-cloud-bigquery/samples/create_table_external_data_configuration.py rename to packages/google-cloud-bigquery/samples/snippets/create_table_external_data_configuration.py index 068f915555ab..cbb15d40a27f 100644 --- a/packages/google-cloud-bigquery/samples/create_table_external_data_configuration.py +++ b/packages/google-cloud-bigquery/samples/snippets/create_table_external_data_configuration.py @@ -18,6 +18,7 @@ def create_table_external_data_configuration( ) -> None: """Create a table using an external data source""" orig_table_id = table_id + # [START bigquery_query_external_gcs_perm] # [START bigquery_create_table_external_data_configuration] # [START bigquery_create_external_table_definition] from google.cloud import bigquery @@ -28,7 +29,9 @@ def create_table_external_data_configuration( # TODO(developer): Set table_id to the ID of the table to create. table_id = "your-project.your_dataset.your_table_name" # [END bigquery_create_table_external_data_configuration] + # [END bigquery_query_external_gcs_perm] table_id = orig_table_id + # [START bigquery_query_external_gcs_perm] # [START bigquery_create_table_external_data_configuration] # TODO(developer): Set the external source format of your table. @@ -64,3 +67,4 @@ def create_table_external_data_configuration( f"Created table with external source format {table.external_data_configuration.source_format}" ) # [END bigquery_create_table_external_data_configuration] + # [END bigquery_query_external_gcs_perm] diff --git a/packages/google-cloud-bigquery/samples/tests/test_create_table_external_data_configuration.py b/packages/google-cloud-bigquery/samples/snippets/create_table_external_data_configuration_test.py similarity index 94% rename from packages/google-cloud-bigquery/samples/tests/test_create_table_external_data_configuration.py rename to packages/google-cloud-bigquery/samples/snippets/create_table_external_data_configuration_test.py index bf4cf17d4563..7bbcde32beac 100644 --- a/packages/google-cloud-bigquery/samples/tests/test_create_table_external_data_configuration.py +++ b/packages/google-cloud-bigquery/samples/snippets/create_table_external_data_configuration_test.py @@ -14,7 +14,7 @@ import typing -from .. import create_table_external_data_configuration +import create_table_external_data_configuration if typing.TYPE_CHECKING: import pytest From 77d273dc2046e26ddb23a7eebe4c53d31a8ee525 Mon Sep 17 00:00:00 2001 From: sriram Date: Fri, 15 Sep 2023 22:21:56 +0530 Subject: [PATCH 1641/2016] feat: add `Dataset.storage_billing_model` setter, use `client.update_dataset(ds, fields=["storage_billing_model"])` to update (#1643) Adding Storage Billing Model property. See: https://cloud.google.com/bigquery/docs/updating-datasets#update_storage_billing_models --------- Co-authored-by: Tim Swast --- .../google/cloud/bigquery/dataset.py | 33 +++++++++++++++++++ .../tests/unit/test_dataset.py | 25 +++++++++++++- 2 files changed, 57 insertions(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/dataset.py b/packages/google-cloud-bigquery/google/cloud/bigquery/dataset.py index 513c32d9c5a1..114f0de18e15 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/dataset.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/dataset.py @@ -527,6 +527,7 @@ class Dataset(object): "default_table_expiration_ms": "defaultTableExpirationMs", "friendly_name": "friendlyName", "default_encryption_configuration": "defaultEncryptionConfiguration", + "storage_billing_model": "storageBillingModel", } def __init__(self, dataset_ref) -> None: @@ -763,6 +764,38 @@ def default_encryption_configuration(self, value): api_repr = value.to_api_repr() self._properties["defaultEncryptionConfiguration"] = api_repr + @property + def storage_billing_model(self): + """Union[str, None]: StorageBillingModel of the dataset as set by the user + (defaults to :data:`None`). + + Set the value to one of ``'LOGICAL'`` or ``'PHYSICAL'``. This change + takes 24 hours to take effect and you must wait 14 days before you can + change the storage billing model again. + + See `storage billing model + `_ + in REST API docs and `updating the storage billing model + `_ + guide. + + Raises: + ValueError: for invalid value types. + """ + return self._properties.get("storageBillingModel") + + @storage_billing_model.setter + def storage_billing_model(self, value): + if not isinstance(value, str) and value is not None: + raise ValueError( + "storage_billing_model must be a string (e.g. 'LOGICAL', 'PHYSICAL'), or None. " + f"Got {repr(value)}." + ) + if value: + self._properties["storageBillingModel"] = value + if value is None: + self._properties["storageBillingModel"] = "LOGICAL" + @classmethod def from_string(cls, full_dataset_id: str) -> "Dataset": """Construct a dataset from fully-qualified dataset ID. diff --git a/packages/google-cloud-bigquery/tests/unit/test_dataset.py b/packages/google-cloud-bigquery/tests/unit/test_dataset.py index 5e26a0c03c3e..f2bdf8db517f 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_dataset.py +++ b/packages/google-cloud-bigquery/tests/unit/test_dataset.py @@ -667,6 +667,7 @@ def _make_resource(self): "location": "US", "selfLink": self.RESOURCE_URL, "defaultTableExpirationMs": 3600, + "storageBillingModel": "LOGICAL", "access": [ {"role": "OWNER", "userByEmail": USER_EMAIL}, {"role": "OWNER", "groupByEmail": GROUP_EMAIL}, @@ -736,7 +737,12 @@ def _verify_resource_properties(self, dataset, resource): ) else: self.assertIsNone(dataset.default_encryption_configuration) - + if "storageBillingModel" in resource: + self.assertEqual( + dataset.storage_billing_model, resource.get("storageBillingModel") + ) + else: + self.assertIsNone(dataset.storage_billing_model) if "access" in resource: self._verify_access_entry(dataset.access_entries, resource) else: @@ -941,6 +947,23 @@ def test_default_encryption_configuration_setter(self): dataset.default_encryption_configuration = None self.assertIsNone(dataset.default_encryption_configuration) + def test_storage_billing_model_setter(self): + dataset = self._make_one(self.DS_REF) + dataset.storage_billing_model = "PHYSICAL" + self.assertEqual(dataset.storage_billing_model, "PHYSICAL") + + def test_storage_billing_model_setter_with_none(self): + dataset = self._make_one(self.DS_REF) + dataset.storage_billing_model = None + self.assertEqual(dataset.storage_billing_model, "LOGICAL") + + def test_storage_billing_model_setter_with_invalid_type(self): + dataset = self._make_one(self.DS_REF) + with self.assertRaises(ValueError) as raises: + dataset.storage_billing_model = object() + + self.assertIn("storage_billing_model", str(raises.exception)) + def test_from_string(self): cls = self._get_target_class() got = cls.from_string("string-project.string_dataset") From 2946738ef40a833a1189716d37bf3f14d8bf2ab3 Mon Sep 17 00:00:00 2001 From: Jared Chapman Date: Fri, 15 Sep 2023 13:17:01 -0500 Subject: [PATCH 1642/2016] docs: Revise update_table_expiration sample (#1457) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * docs: Revise update_table_expiration sample * 🦉 Updates from OwlBot post-processor See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md --------- Co-authored-by: Owl Bot Co-authored-by: Tim Swast Co-authored-by: aribray <45905583+aribray@users.noreply.github.com> Co-authored-by: Anthonios Partheniou --- .../google-cloud-bigquery/docs/snippets.py | 2 + .../snippets/update_table_expiration.py | 45 +++++++++++++++++++ .../snippets/update_table_expiration_test.py | 44 ++++++++++++++++++ 3 files changed, 91 insertions(+) create mode 100644 packages/google-cloud-bigquery/samples/snippets/update_table_expiration.py create mode 100644 packages/google-cloud-bigquery/samples/snippets/update_table_expiration_test.py diff --git a/packages/google-cloud-bigquery/docs/snippets.py b/packages/google-cloud-bigquery/docs/snippets.py index e1d9ae839731..d458b832c6cc 100644 --- a/packages/google-cloud-bigquery/docs/snippets.py +++ b/packages/google-cloud-bigquery/docs/snippets.py @@ -220,6 +220,8 @@ def test_update_table_expiration(client, to_delete): table = bigquery.Table(dataset.table(table_id), schema=SCHEMA) table = client.create_table(table) + # TODO(thejaredchapman): After code sample has been updated from cloud.google.com delete this. + # [START bigquery_update_table_expiration] import datetime diff --git a/packages/google-cloud-bigquery/samples/snippets/update_table_expiration.py b/packages/google-cloud-bigquery/samples/snippets/update_table_expiration.py new file mode 100644 index 000000000000..bf944800fd19 --- /dev/null +++ b/packages/google-cloud-bigquery/samples/snippets/update_table_expiration.py @@ -0,0 +1,45 @@ +# Copyright 2022 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import datetime + + +def update_table_expiration(table_id, expiration): + orig_table_id = table_id + orig_expiration = expiration + + # [START bigquery_update_table_expiration] + from google.cloud import bigquery + + client = bigquery.Client() + + # TODO(dev): Change table_id to the full name of the table you want to update. + table_id = "your-project.your_dataset.your_table_name" + + # TODO(dev): Set table to expire for desired days days from now. + expiration = datetime.datetime.now(datetime.timezone.utc) + datetime.timedelta( + days=5 + ) + # [END bigquery_update_table_expiration] + + table_id = orig_table_id + expiration = orig_expiration + + # [START bigquery_update_table_expiration] + table = client.get_table(table_id) # Make an API request. + table.expires = expiration + table = client.update_table(table, ["expires"]) # API request + + print(f"Updated {table_id}, expires {table.expires}.") + # [END bigquery_update_table_expiration] diff --git a/packages/google-cloud-bigquery/samples/snippets/update_table_expiration_test.py b/packages/google-cloud-bigquery/samples/snippets/update_table_expiration_test.py new file mode 100644 index 000000000000..721bf53aa905 --- /dev/null +++ b/packages/google-cloud-bigquery/samples/snippets/update_table_expiration_test.py @@ -0,0 +1,44 @@ +# Copyright 2022 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import datetime +import typing + +import update_table_expiration + +if typing.TYPE_CHECKING: + import pathlib + + import pytest + + +def test_update_table_expiration( + capsys: "pytest.CaptureFixture[str]", + table_id: str, + tmp_path: "pathlib.Path", +) -> None: + + # This was not needed for function, only for test + expiration = datetime.datetime.now(datetime.timezone.utc) + datetime.timedelta( + days=5 + ) + + update_table_expiration.update_table_expiration(table_id, expiration) + + out, _ = capsys.readouterr() + assert "Updated" in out + assert table_id in out + assert str(expiration.day) in out + assert str(expiration.month) in out + assert str(expiration.year) in out From 816ae93a1a03cb956638ee23097943f4ee705ca8 Mon Sep 17 00:00:00 2001 From: meredithslota Date: Mon, 18 Sep 2023 11:50:41 -0700 Subject: [PATCH 1643/2016] chore: de-dupe region tag `bigquery_query_external_gcs_perm` (#1658) --- .../google-cloud-bigquery/docs/snippets.py | 42 ------------------- 1 file changed, 42 deletions(-) diff --git a/packages/google-cloud-bigquery/docs/snippets.py b/packages/google-cloud-bigquery/docs/snippets.py index d458b832c6cc..3a46cd36ce3a 100644 --- a/packages/google-cloud-bigquery/docs/snippets.py +++ b/packages/google-cloud-bigquery/docs/snippets.py @@ -617,48 +617,6 @@ def test_client_query_total_rows(client, capsys): assert "Got 100 rows." in out -def test_query_external_gcs_permanent_table(client, to_delete): - dataset_id = "query_external_gcs_{}".format(_millis()) - project = client.project - dataset_ref = bigquery.DatasetReference(project, dataset_id) - dataset = bigquery.Dataset(dataset_ref) - client.create_dataset(dataset) - to_delete.append(dataset) - - # [START bigquery_query_external_gcs_perm] - # from google.cloud import bigquery - # client = bigquery.Client() - # dataset_id = 'my_dataset' - - # Configure the external data source - dataset_ref = bigquery.DatasetReference(project, dataset_id) - table_id = "us_states" - schema = [ - bigquery.SchemaField("name", "STRING"), - bigquery.SchemaField("post_abbr", "STRING"), - ] - table = bigquery.Table(dataset_ref.table(table_id), schema=schema) - external_config = bigquery.ExternalConfig("CSV") - external_config.source_uris = [ - "gs://cloud-samples-data/bigquery/us-states/us-states.csv" - ] - external_config.options.skip_leading_rows = 1 # optionally skip header row - table.external_data_configuration = external_config - - # Create a permanent table linked to the GCS file - table = client.create_table(table) # API request - - # Example query to find states starting with 'W' - sql = 'SELECT * FROM `{}.{}` WHERE name LIKE "W%"'.format(dataset_id, table_id) - - query_job = client.query(sql) # API request - - w_states = list(query_job) # Waits for query to finish - print("There are {} states with names starting with W.".format(len(w_states))) - # [END bigquery_query_external_gcs_perm] - assert len(w_states) == 4 - - def test_ddl_create_view(client, to_delete, capsys): """Create a view via a DDL query.""" project = client.project From 0aa987290759578c4ae704bc0cfece6a03d2b2f2 Mon Sep 17 00:00:00 2001 From: Lingqing Gan Date: Mon, 18 Sep 2023 16:15:23 -0400 Subject: [PATCH 1644/2016] fix: use isinstance() per E721, unpin flake8 (#1659) * fix: use isinstance() per E721, unpin flake8 * change type assertion --- .../google-cloud-bigquery/google/cloud/bigquery/client.py | 4 ++-- packages/google-cloud-bigquery/noxfile.py | 4 +--- 2 files changed, 3 insertions(+), 5 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py index f64a81741ed9..b4783fc56e23 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py @@ -1895,7 +1895,7 @@ def _get_query_results( extra_params: Dict[str, Any] = {"maxResults": 0} if timeout is not None: - if type(timeout) == object: + if not isinstance(timeout, (int, float)): timeout = _MIN_GET_QUERY_RESULTS_TIMEOUT else: timeout = max(timeout, _MIN_GET_QUERY_RESULTS_TIMEOUT) @@ -3927,7 +3927,7 @@ def _list_rows_from_query_results( } if timeout is not None: - if type(timeout) == object: + if not isinstance(timeout, (int, float)): timeout = _MIN_GET_QUERY_RESULTS_TIMEOUT else: timeout = max(timeout, _MIN_GET_QUERY_RESULTS_TIMEOUT) diff --git a/packages/google-cloud-bigquery/noxfile.py b/packages/google-cloud-bigquery/noxfile.py index 3c9ba5eb578e..93616485fe8b 100644 --- a/packages/google-cloud-bigquery/noxfile.py +++ b/packages/google-cloud-bigquery/noxfile.py @@ -375,9 +375,7 @@ def lint(session): serious code quality issues. """ - # Pin flake8 to 6.0.0 - # See https://github.com/googleapis/python-bigquery/issues/1635 - session.install("flake8==6.0.0", BLACK_VERSION) + session.install("flake8", BLACK_VERSION) session.install("-e", ".") session.run("flake8", os.path.join("google", "cloud", "bigquery")) session.run("flake8", "tests") From 768353cab8808c7e19aeb17cc4a09e6df4d3f183 Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Mon, 2 Oct 2023 11:56:44 -0500 Subject: [PATCH 1645/2016] fix: allow `storage_billing_model` to be explicitly set to `None` to use project default value (#1665) * fix: allow `storage_billing_model` to be explicitly set to `None` to use project default value * add STORAGE_BILLING_MODEL_UNSPECIFIED to docstring --- .../google/cloud/bigquery/dataset.py | 17 ++++++++--------- .../tests/unit/test_dataset.py | 2 +- 2 files changed, 9 insertions(+), 10 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/dataset.py b/packages/google-cloud-bigquery/google/cloud/bigquery/dataset.py index 114f0de18e15..a9c1cd884064 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/dataset.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/dataset.py @@ -769,9 +769,10 @@ def storage_billing_model(self): """Union[str, None]: StorageBillingModel of the dataset as set by the user (defaults to :data:`None`). - Set the value to one of ``'LOGICAL'`` or ``'PHYSICAL'``. This change - takes 24 hours to take effect and you must wait 14 days before you can - change the storage billing model again. + Set the value to one of ``'LOGICAL'``, ``'PHYSICAL'``, or + ``'STORAGE_BILLING_MODEL_UNSPECIFIED'``. This change takes 24 hours to + take effect and you must wait 14 days before you can change the storage + billing model again. See `storage billing model `_ @@ -788,13 +789,11 @@ def storage_billing_model(self): def storage_billing_model(self, value): if not isinstance(value, str) and value is not None: raise ValueError( - "storage_billing_model must be a string (e.g. 'LOGICAL', 'PHYSICAL'), or None. " - f"Got {repr(value)}." + "storage_billing_model must be a string (e.g. 'LOGICAL'," + " 'PHYSICAL', 'STORAGE_BILLING_MODEL_UNSPECIFIED'), or None." + f" Got {repr(value)}." ) - if value: - self._properties["storageBillingModel"] = value - if value is None: - self._properties["storageBillingModel"] = "LOGICAL" + self._properties["storageBillingModel"] = value @classmethod def from_string(cls, full_dataset_id: str) -> "Dataset": diff --git a/packages/google-cloud-bigquery/tests/unit/test_dataset.py b/packages/google-cloud-bigquery/tests/unit/test_dataset.py index f2bdf8db517f..3b1452805e5d 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_dataset.py +++ b/packages/google-cloud-bigquery/tests/unit/test_dataset.py @@ -955,7 +955,7 @@ def test_storage_billing_model_setter(self): def test_storage_billing_model_setter_with_none(self): dataset = self._make_one(self.DS_REF) dataset.storage_billing_model = None - self.assertEqual(dataset.storage_billing_model, "LOGICAL") + self.assertIsNone(dataset.storage_billing_model) def test_storage_billing_model_setter_with_invalid_type(self): dataset = self._make_one(self.DS_REF) From f4fa76aa9ea4a5fc80dbec7da15926dea1373b08 Mon Sep 17 00:00:00 2001 From: "release-please[bot]" <55107282+release-please[bot]@users.noreply.github.com> Date: Tue, 3 Oct 2023 11:12:41 -0400 Subject: [PATCH 1646/2016] chore(main): release 3.12.0 (#1642) Co-authored-by: release-please[bot] <55107282+release-please[bot]@users.noreply.github.com> --- packages/google-cloud-bigquery/CHANGELOG.md | 21 +++++++++++++++++++ .../google/cloud/bigquery/version.py | 2 +- 2 files changed, 22 insertions(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/CHANGELOG.md b/packages/google-cloud-bigquery/CHANGELOG.md index cf64e2222778..a93bde9ebfd2 100644 --- a/packages/google-cloud-bigquery/CHANGELOG.md +++ b/packages/google-cloud-bigquery/CHANGELOG.md @@ -5,6 +5,27 @@ [1]: https://pypi.org/project/google-cloud-bigquery/#history +## [3.12.0](https://github.com/googleapis/python-bigquery/compare/v3.11.4...v3.12.0) (2023-10-02) + + +### Features + +* Add `Dataset.storage_billing_model` setter, use `client.update_dataset(ds, fields=["storage_billing_model"])` to update ([#1643](https://github.com/googleapis/python-bigquery/issues/1643)) ([5deba50](https://github.com/googleapis/python-bigquery/commit/5deba50b8c2d91d08bd5f5fb68742268c494b4a9)) +* Search statistics ([#1616](https://github.com/googleapis/python-bigquery/issues/1616)) ([b930e46](https://github.com/googleapis/python-bigquery/commit/b930e4673b0d1cceb53f683e47578d87af9361f3)) +* Widen retry predicate to include ServiceUnavailable ([#1641](https://github.com/googleapis/python-bigquery/issues/1641)) ([3e021a4](https://github.com/googleapis/python-bigquery/commit/3e021a46d387a0e3cb69913a281062fc221bb926)) + + +### Bug Fixes + +* Allow `storage_billing_model` to be explicitly set to `None` to use project default value ([#1665](https://github.com/googleapis/python-bigquery/issues/1665)) ([514d3e1](https://github.com/googleapis/python-bigquery/commit/514d3e12e5131bd589dff08893fd89bf40338ba3)) +* Relax timeout expectations ([#1645](https://github.com/googleapis/python-bigquery/issues/1645)) ([1760e94](https://github.com/googleapis/python-bigquery/commit/1760e945d16163980027fecf21113cd77ddc35a1)) +* Use isinstance() per E721, unpin flake8 ([#1659](https://github.com/googleapis/python-bigquery/issues/1659)) ([54a7769](https://github.com/googleapis/python-bigquery/commit/54a77694afcd80be4ba469c6ebb7ca8be112b04e)) + + +### Documentation + +* Revise update_table_expiration sample ([#1457](https://github.com/googleapis/python-bigquery/issues/1457)) ([03194e0](https://github.com/googleapis/python-bigquery/commit/03194e0156ed9201cb36301967c5af117d7ef29c)) + ## [3.11.4](https://github.com/googleapis/python-bigquery/compare/v3.11.3...v3.11.4) (2023-07-19) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/version.py b/packages/google-cloud-bigquery/google/cloud/bigquery/version.py index a97ccc0c8fc8..ea71d198bdd0 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/version.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/version.py @@ -12,4 +12,4 @@ # See the License for the specific language governing permissions and # limitations under the License. -__version__ = "3.11.4" +__version__ = "3.12.0" From 336a59372279907a34e953ebda2b2f91beefa13c Mon Sep 17 00:00:00 2001 From: Jared Chapman Date: Thu, 5 Oct 2023 12:57:10 -0500 Subject: [PATCH 1647/2016] docs: revised `create_partitioned_table` sample (#1447) * docs: revised create_partitioned_table sample * update sample tests to use correct fixture --------- Co-authored-by: Tim Swast --- .../google-cloud-bigquery/docs/snippets.py | 2 + .../snippets/create_partitioned_table.py | 45 +++++++++++++++++++ .../snippets/create_partitioned_table_test.py | 34 ++++++++++++++ 3 files changed, 81 insertions(+) create mode 100644 packages/google-cloud-bigquery/samples/snippets/create_partitioned_table.py create mode 100644 packages/google-cloud-bigquery/samples/snippets/create_partitioned_table_test.py diff --git a/packages/google-cloud-bigquery/docs/snippets.py b/packages/google-cloud-bigquery/docs/snippets.py index 3a46cd36ce3a..7f9b4f59e7a4 100644 --- a/packages/google-cloud-bigquery/docs/snippets.py +++ b/packages/google-cloud-bigquery/docs/snippets.py @@ -125,6 +125,8 @@ def test_create_partitioned_table(client, to_delete): dataset = client.create_dataset(dataset_ref) to_delete.append(dataset) + # TODO(tswast): remove this snippet once cloud.google.com is updated to use + # samples/snippets/create_partitioned_table.py # [START bigquery_create_table_partitioned] # from google.cloud import bigquery # client = bigquery.Client() diff --git a/packages/google-cloud-bigquery/samples/snippets/create_partitioned_table.py b/packages/google-cloud-bigquery/samples/snippets/create_partitioned_table.py new file mode 100644 index 000000000000..0277d7d0f294 --- /dev/null +++ b/packages/google-cloud-bigquery/samples/snippets/create_partitioned_table.py @@ -0,0 +1,45 @@ +# Copyright 2022 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +def create_partitioned_table(table_id): + your_fully_qualified_table_id = table_id + + # [START bigquery_create_table_partitioned] + from google.cloud import bigquery + + client = bigquery.Client() + + # Use format "your-project.your_dataset.your_table_name" for table_id + table_id = your_fully_qualified_table_id + schema = [ + bigquery.SchemaField("name", "STRING"), + bigquery.SchemaField("post_abbr", "STRING"), + bigquery.SchemaField("date", "DATE"), + ] + table = bigquery.Table(table_id, schema=schema) + table.time_partitioning = bigquery.TimePartitioning( + type_=bigquery.TimePartitioningType.DAY, + field="date", # name of column to use for partitioning + expiration_ms=1000 * 60 * 60 * 24 * 90, + ) # 90 days + + table = client.create_table(table) + + print( + f"Created table {table.project}.{table.dataset_id}.{table.table_id}, " + f"partitioned on column {table.time_partitioning.field}." + ) + # [END bigquery_create_table_partitioned] + return table diff --git a/packages/google-cloud-bigquery/samples/snippets/create_partitioned_table_test.py b/packages/google-cloud-bigquery/samples/snippets/create_partitioned_table_test.py new file mode 100644 index 000000000000..0f684fcb05d7 --- /dev/null +++ b/packages/google-cloud-bigquery/samples/snippets/create_partitioned_table_test.py @@ -0,0 +1,34 @@ +# Copyright 2022 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import typing + +import create_partitioned_table + +if typing.TYPE_CHECKING: + import pytest + + +def test_create_partitioned_table( + capsys: "pytest.CaptureFixture[str]", + random_table_id: str, +) -> None: + table = create_partitioned_table.create_partitioned_table(random_table_id) + + out, _ = capsys.readouterr() + assert "Created" in out + assert random_table_id in out + + assert table.time_partitioning.type_ == "DAY" + assert table.time_partitioning.field == "date" From dc16cb34e4e8844158d066f40ee8a9603fe0202e Mon Sep 17 00:00:00 2001 From: Jared Chapman Date: Thu, 5 Oct 2023 14:03:46 -0500 Subject: [PATCH 1648/2016] docs: revised relax column mode sample (#1467) * docs: Revised relax_column sample * add todo for snippets.py cleanup --------- Co-authored-by: Tim Swast --- .../google-cloud-bigquery/docs/snippets.py | 2 + .../samples/snippets/relax_column.py | 52 +++++++++++++++++++ .../samples/snippets/relax_column_test.py | 46 ++++++++++++++++ 3 files changed, 100 insertions(+) create mode 100644 packages/google-cloud-bigquery/samples/snippets/relax_column.py create mode 100644 packages/google-cloud-bigquery/samples/snippets/relax_column_test.py diff --git a/packages/google-cloud-bigquery/docs/snippets.py b/packages/google-cloud-bigquery/docs/snippets.py index 7f9b4f59e7a4..62b0b6fd61c2 100644 --- a/packages/google-cloud-bigquery/docs/snippets.py +++ b/packages/google-cloud-bigquery/docs/snippets.py @@ -265,6 +265,8 @@ def test_relax_column(client, to_delete): dataset = client.create_dataset(dataset) to_delete.append(dataset) + # TODO(tswast): remove code sample once references to it on + # cloud.google.com are updated to samples/snippets/relax_column.py # [START bigquery_relax_column] # from google.cloud import bigquery # client = bigquery.Client() diff --git a/packages/google-cloud-bigquery/samples/snippets/relax_column.py b/packages/google-cloud-bigquery/samples/snippets/relax_column.py new file mode 100644 index 000000000000..bcd79cee829a --- /dev/null +++ b/packages/google-cloud-bigquery/samples/snippets/relax_column.py @@ -0,0 +1,52 @@ +# Copyright 2022 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from google.cloud import bigquery + + +def relax_column(table_id: str) -> bigquery.Table: + orig_table_id = table_id + + # [START bigquery_relax_column] + from google.cloud import bigquery + + client = bigquery.Client() + + # TODO(dev): Change table_id to full name of the table you want to create. + table_id = "your-project.your_dataset.your_table" + + # [END bigquery_relax_column] + table_id = orig_table_id + + # [START bigquery_relax_column] + table = client.get_table(table_id) + new_schema = [] + for field in table.schema: + if field.mode != "REQUIRED": + new_schema.append(field) + else: + # SchemaField properties cannot be edited after initialization. + # To make changes, construct new SchemaField objects. + new_field = field.to_api_repr() + new_field["mode"] = "NULLABLE" + relaxed_field = bigquery.SchemaField.from_api_repr(new_field) + new_schema.append(relaxed_field) + + table.schema = new_schema + table = client.update_table(table, ["schema"]) + + print(f"Updated {table_id} schema: {table.schema}.") + + # [END bigquery_relax_column] + return table diff --git a/packages/google-cloud-bigquery/samples/snippets/relax_column_test.py b/packages/google-cloud-bigquery/samples/snippets/relax_column_test.py new file mode 100644 index 000000000000..b40b13fa122c --- /dev/null +++ b/packages/google-cloud-bigquery/samples/snippets/relax_column_test.py @@ -0,0 +1,46 @@ +# Copyright 2022 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import typing + +from google.cloud import bigquery + +import relax_column + +if typing.TYPE_CHECKING: + import pytest + + +def test_relax_column( + capsys: "pytest.CaptureFixture[str]", + bigquery_client: bigquery.Client, + random_table_id: str, +) -> None: + table = bigquery.Table( + random_table_id, + schema=[ + bigquery.SchemaField("string_col", "STRING", mode="NULLABLE"), + bigquery.SchemaField("string_col2", "STRING", mode="REQUIRED"), + ], + ) + + bigquery_client.create_table(table) + table = relax_column.relax_column(random_table_id) + + out, _ = capsys.readouterr() + + assert all(field.mode == "NULLABLE" for field in table.schema) + assert "REQUIRED" not in out + assert "NULLABLE" in out + assert random_table_id in out From 8b68ed30be6475ffaaf335cc28c4118f4cbe76b3 Mon Sep 17 00:00:00 2001 From: "gcf-owl-bot[bot]" <78513119+gcf-owl-bot[bot]@users.noreply.github.com> Date: Fri, 6 Oct 2023 22:32:55 -0400 Subject: [PATCH 1649/2016] chore: [autoapprove] bump cryptography from 41.0.3 to 41.0.4 (#1666) Source-Link: https://github.com/googleapis/synthtool/commit/dede53ff326079b457cfb1aae5bbdc82cbb51dc3 Post-Processor: gcr.io/cloud-devrel-public-resources/owlbot-python:latest@sha256:fac304457974bb530cc5396abd4ab25d26a469cd3bc97cbfb18c8d4324c584eb Co-authored-by: Owl Bot Co-authored-by: Anthonios Partheniou --- .../.github/.OwlBot.lock.yaml | 4 +- packages/google-cloud-bigquery/.gitignore | 1 + .../.kokoro/requirements.txt | 49 ++++++++++--------- 3 files changed, 28 insertions(+), 26 deletions(-) diff --git a/packages/google-cloud-bigquery/.github/.OwlBot.lock.yaml b/packages/google-cloud-bigquery/.github/.OwlBot.lock.yaml index a3da1b0d4cd3..a9bdb1b7ac0f 100644 --- a/packages/google-cloud-bigquery/.github/.OwlBot.lock.yaml +++ b/packages/google-cloud-bigquery/.github/.OwlBot.lock.yaml @@ -13,5 +13,5 @@ # limitations under the License. docker: image: gcr.io/cloud-devrel-public-resources/owlbot-python:latest - digest: sha256:3e3800bb100af5d7f9e810d48212b37812c1856d20ffeafb99ebe66461b61fc7 -# created: 2023-08-02T10:53:29.114535628Z + digest: sha256:fac304457974bb530cc5396abd4ab25d26a469cd3bc97cbfb18c8d4324c584eb +# created: 2023-10-02T21:31:03.517640371Z diff --git a/packages/google-cloud-bigquery/.gitignore b/packages/google-cloud-bigquery/.gitignore index 99c3a1444ed2..168b201f66f4 100644 --- a/packages/google-cloud-bigquery/.gitignore +++ b/packages/google-cloud-bigquery/.gitignore @@ -51,6 +51,7 @@ docs.metadata # Virtual environment env/ +venv/ # Test logs coverage.xml diff --git a/packages/google-cloud-bigquery/.kokoro/requirements.txt b/packages/google-cloud-bigquery/.kokoro/requirements.txt index 029bd342de94..96d593c8c82a 100644 --- a/packages/google-cloud-bigquery/.kokoro/requirements.txt +++ b/packages/google-cloud-bigquery/.kokoro/requirements.txt @@ -113,30 +113,30 @@ commonmark==0.9.1 \ --hash=sha256:452f9dc859be7f06631ddcb328b6919c67984aca654e5fefb3914d54691aed60 \ --hash=sha256:da2f38c92590f83de410ba1a3cbceafbc74fee9def35f9251ba9a971d6d66fd9 # via rich -cryptography==41.0.3 \ - --hash=sha256:0d09fb5356f975974dbcb595ad2d178305e5050656affb7890a1583f5e02a306 \ - --hash=sha256:23c2d778cf829f7d0ae180600b17e9fceea3c2ef8b31a99e3c694cbbf3a24b84 \ - --hash=sha256:3fb248989b6363906827284cd20cca63bb1a757e0a2864d4c1682a985e3dca47 \ - --hash=sha256:41d7aa7cdfded09b3d73a47f429c298e80796c8e825ddfadc84c8a7f12df212d \ - --hash=sha256:42cb413e01a5d36da9929baa9d70ca90d90b969269e5a12d39c1e0d475010116 \ - --hash=sha256:4c2f0d35703d61002a2bbdcf15548ebb701cfdd83cdc12471d2bae80878a4207 \ - --hash=sha256:4fd871184321100fb400d759ad0cddddf284c4b696568204d281c902fc7b0d81 \ - --hash=sha256:5259cb659aa43005eb55a0e4ff2c825ca111a0da1814202c64d28a985d33b087 \ - --hash=sha256:57a51b89f954f216a81c9d057bf1a24e2f36e764a1ca9a501a6964eb4a6800dd \ - --hash=sha256:652627a055cb52a84f8c448185922241dd5217443ca194d5739b44612c5e6507 \ - --hash=sha256:67e120e9a577c64fe1f611e53b30b3e69744e5910ff3b6e97e935aeb96005858 \ - --hash=sha256:6af1c6387c531cd364b72c28daa29232162010d952ceb7e5ca8e2827526aceae \ - --hash=sha256:6d192741113ef5e30d89dcb5b956ef4e1578f304708701b8b73d38e3e1461f34 \ - --hash=sha256:7efe8041897fe7a50863e51b77789b657a133c75c3b094e51b5e4b5cec7bf906 \ - --hash=sha256:84537453d57f55a50a5b6835622ee405816999a7113267739a1b4581f83535bd \ - --hash=sha256:8f09daa483aedea50d249ef98ed500569841d6498aa9c9f4b0531b9964658922 \ - --hash=sha256:95dd7f261bb76948b52a5330ba5202b91a26fbac13ad0e9fc8a3ac04752058c7 \ - --hash=sha256:a74fbcdb2a0d46fe00504f571a2a540532f4c188e6ccf26f1f178480117b33c4 \ - --hash=sha256:a983e441a00a9d57a4d7c91b3116a37ae602907a7618b882c8013b5762e80574 \ - --hash=sha256:ab8de0d091acbf778f74286f4989cf3d1528336af1b59f3e5d2ebca8b5fe49e1 \ - --hash=sha256:aeb57c421b34af8f9fe830e1955bf493a86a7996cc1338fe41b30047d16e962c \ - --hash=sha256:ce785cf81a7bdade534297ef9e490ddff800d956625020ab2ec2780a556c313e \ - --hash=sha256:d0d651aa754ef58d75cec6edfbd21259d93810b73f6ec246436a21b7841908de +cryptography==41.0.4 \ + --hash=sha256:004b6ccc95943f6a9ad3142cfabcc769d7ee38a3f60fb0dddbfb431f818c3a67 \ + --hash=sha256:047c4603aeb4bbd8db2756e38f5b8bd7e94318c047cfe4efeb5d715e08b49311 \ + --hash=sha256:0d9409894f495d465fe6fda92cb70e8323e9648af912d5b9141d616df40a87b8 \ + --hash=sha256:23a25c09dfd0d9f28da2352503b23e086f8e78096b9fd585d1d14eca01613e13 \ + --hash=sha256:2ed09183922d66c4ec5fdaa59b4d14e105c084dd0febd27452de8f6f74704143 \ + --hash=sha256:35c00f637cd0b9d5b6c6bd11b6c3359194a8eba9c46d4e875a3660e3b400005f \ + --hash=sha256:37480760ae08065437e6573d14be973112c9e6dcaf5f11d00147ee74f37a3829 \ + --hash=sha256:3b224890962a2d7b57cf5eeb16ccaafba6083f7b811829f00476309bce2fe0fd \ + --hash=sha256:5a0f09cefded00e648a127048119f77bc2b2ec61e736660b5789e638f43cc397 \ + --hash=sha256:5b72205a360f3b6176485a333256b9bcd48700fc755fef51c8e7e67c4b63e3ac \ + --hash=sha256:7e53db173370dea832190870e975a1e09c86a879b613948f09eb49324218c14d \ + --hash=sha256:7febc3094125fc126a7f6fb1f420d0da639f3f32cb15c8ff0dc3997c4549f51a \ + --hash=sha256:80907d3faa55dc5434a16579952ac6da800935cd98d14dbd62f6f042c7f5e839 \ + --hash=sha256:86defa8d248c3fa029da68ce61fe735432b047e32179883bdb1e79ed9bb8195e \ + --hash=sha256:8ac4f9ead4bbd0bc8ab2d318f97d85147167a488be0e08814a37eb2f439d5cf6 \ + --hash=sha256:93530900d14c37a46ce3d6c9e6fd35dbe5f5601bf6b3a5c325c7bffc030344d9 \ + --hash=sha256:9eeb77214afae972a00dee47382d2591abe77bdae166bda672fb1e24702a3860 \ + --hash=sha256:b5f4dfe950ff0479f1f00eda09c18798d4f49b98f4e2006d644b3301682ebdca \ + --hash=sha256:c3391bd8e6de35f6f1140e50aaeb3e2b3d6a9012536ca23ab0d9c35ec18c8a91 \ + --hash=sha256:c880eba5175f4307129784eca96f4e70b88e57aa3f680aeba3bab0e980b0f37d \ + --hash=sha256:cecfefa17042941f94ab54f769c8ce0fe14beff2694e9ac684176a2535bf9714 \ + --hash=sha256:e40211b4923ba5a6dc9769eab704bdb3fbb58d56c5b336d30996c24fcf12aadb \ + --hash=sha256:efc8ad4e6fc4f1752ebfb58aefece8b4e3c4cae940b0994d43649bdfce8d0d4f # via # gcp-releasetool # secretstorage @@ -382,6 +382,7 @@ protobuf==3.20.3 \ # gcp-docuploader # gcp-releasetool # google-api-core + # googleapis-common-protos pyasn1==0.4.8 \ --hash=sha256:39c7e2ec30515947ff4e87fb6f456dfc6e84857d34be479c9d4a4ba4bf46aa5d \ --hash=sha256:aef77c9fb94a3ac588e87841208bdec464471d9871bd5050a287cc9a475cd0ba From 03f94d3a33ef6ba6dff0bd70c07c65821dc9347e Mon Sep 17 00:00:00 2001 From: "gcf-owl-bot[bot]" <78513119+gcf-owl-bot[bot]@users.noreply.github.com> Date: Tue, 10 Oct 2023 10:07:23 -0400 Subject: [PATCH 1650/2016] chore: [autoapprove] Update `black` and `isort` to latest versions (#1678) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * chore: [autoapprove] Update `black` and `isort` to latest versions Source-Link: https://github.com/googleapis/synthtool/commit/0c7b0333f44b2b7075447f43a121a12d15a7b76a Post-Processor: gcr.io/cloud-devrel-public-resources/owlbot-python:latest@sha256:08e34975760f002746b1d8c86fdc90660be45945ee6d9db914d1508acdf9a547 * update black in noxfile.py * 🦉 Updates from OwlBot post-processor See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md --------- Co-authored-by: Owl Bot Co-authored-by: Anthonios Partheniou --- .../google-cloud-bigquery/.github/.OwlBot.lock.yaml | 4 ++-- .../google-cloud-bigquery/.kokoro/requirements.txt | 6 +++--- packages/google-cloud-bigquery/.pre-commit-config.yaml | 2 +- .../google/cloud/bigquery/client.py | 3 --- .../google/cloud/bigquery/dataset.py | 1 - .../google/cloud/bigquery/job/query.py | 1 - packages/google-cloud-bigquery/noxfile.py | 2 +- .../google-cloud-bigquery/samples/add_empty_column.py | 1 - .../google-cloud-bigquery/samples/browse_table_data.py | 1 - .../google-cloud-bigquery/samples/client_list_jobs.py | 1 - .../samples/client_load_partitioned_table.py | 1 - packages/google-cloud-bigquery/samples/client_query.py | 1 - .../samples/client_query_add_column.py | 1 - .../samples/client_query_batch.py | 1 - .../samples/client_query_destination_table.py | 1 - .../client_query_destination_table_clustered.py | 1 - .../samples/client_query_destination_table_cmek.py | 1 - .../samples/client_query_destination_table_legacy.py | 1 - .../samples/client_query_dry_run.py | 1 - .../samples/client_query_legacy_sql.py | 1 - .../samples/client_query_relax_column.py | 1 - .../samples/client_query_w_array_params.py | 1 - .../samples/client_query_w_named_params.py | 1 - .../samples/client_query_w_positional_params.py | 1 - .../samples/client_query_w_struct_params.py | 1 - .../samples/client_query_w_timestamp_params.py | 1 - packages/google-cloud-bigquery/samples/copy_table.py | 1 - .../google-cloud-bigquery/samples/copy_table_cmek.py | 1 - .../samples/copy_table_multiple_source.py | 1 - .../google-cloud-bigquery/samples/create_dataset.py | 1 - packages/google-cloud-bigquery/samples/create_job.py | 1 - .../google-cloud-bigquery/samples/create_routine.py | 1 - .../samples/create_routine_ddl.py | 1 - packages/google-cloud-bigquery/samples/create_table.py | 1 - .../samples/create_table_clustered.py | 1 - .../samples/create_table_range_partitioned.py | 1 - .../google-cloud-bigquery/samples/dataset_exists.py | 1 - .../google-cloud-bigquery/samples/delete_dataset.py | 1 - .../samples/delete_dataset_labels.py | 1 - .../google-cloud-bigquery/samples/delete_routine.py | 1 - packages/google-cloud-bigquery/samples/delete_table.py | 1 - .../samples/download_public_data.py | 1 - .../samples/download_public_data_sandbox.py | 1 - .../samples/geography/insert_geojson.py | 1 - .../samples/geography/insert_wkt.py | 1 - packages/google-cloud-bigquery/samples/get_dataset.py | 1 - .../samples/get_dataset_labels.py | 1 - packages/google-cloud-bigquery/samples/get_routine.py | 1 - packages/google-cloud-bigquery/samples/get_table.py | 1 - .../google-cloud-bigquery/samples/label_dataset.py | 1 - .../google-cloud-bigquery/samples/list_datasets.py | 1 - .../samples/list_datasets_by_label.py | 1 - .../google-cloud-bigquery/samples/list_routines.py | 1 - packages/google-cloud-bigquery/samples/list_tables.py | 1 - .../samples/load_table_clustered.py | 1 - .../samples/load_table_dataframe.py | 1 - .../google-cloud-bigquery/samples/load_table_file.py | 1 - .../samples/load_table_uri_autodetect_csv.py | 1 - .../samples/load_table_uri_autodetect_json.py | 1 - .../samples/load_table_uri_avro.py | 1 - .../samples/load_table_uri_cmek.py | 1 - .../samples/load_table_uri_csv.py | 1 - .../samples/load_table_uri_orc.py | 1 - .../samples/load_table_uri_truncate_avro.py | 1 - .../samples/load_table_uri_truncate_csv.py | 1 - .../samples/load_table_uri_truncate_json.py | 1 - .../samples/load_table_uri_truncate_orc.py | 1 - .../samples/load_table_uri_truncate_parquet.py | 1 - .../samples/query_external_gcs_temporary_table.py | 1 - .../samples/query_external_sheets_permanent_table.py | 1 - .../samples/query_external_sheets_temporary_table.py | 1 - .../google-cloud-bigquery/samples/query_no_cache.py | 1 - .../google-cloud-bigquery/samples/query_pagination.py | 1 - .../google-cloud-bigquery/samples/query_to_arrow.py | 1 - .../samples/snippets/create_table_cmek_test.py | 1 - .../create_table_external_data_configuration_test.py | 1 - .../snippets/create_table_schema_from_json_test.py | 1 - .../samples/snippets/create_table_snapshot_test.py | 1 - .../samples/snippets/delete_label_table_test.py | 1 - .../samples/snippets/get_table_labels_test.py | 1 - .../samples/snippets/label_table_test.py | 1 - .../snippets/load_table_schema_from_json_test.py | 1 - .../samples/snippets/nested_repeated_schema_test.py | 1 - .../samples/snippets/quickstart.py | 1 - .../samples/snippets/quickstart_test.py | 1 - .../samples/snippets/update_table_expiration_test.py | 1 - .../google-cloud-bigquery/samples/snippets/view.py | 1 - packages/google-cloud-bigquery/samples/table_exists.py | 1 - .../google-cloud-bigquery/samples/table_insert_rows.py | 1 - .../table_insert_rows_explicit_none_insert_ids.py | 1 - .../samples/tests/test_add_empty_column.py | 1 - .../samples/tests/test_browse_table_data.py | 1 - .../samples/tests/test_client_list_jobs.py | 1 - .../tests/test_client_load_partitioned_table.py | 1 - .../samples/tests/test_client_query.py | 1 - .../samples/tests/test_client_query_add_column.py | 1 - .../samples/tests/test_client_query_batch.py | 1 - .../tests/test_client_query_destination_table.py | 1 - .../test_client_query_destination_table_clustered.py | 1 - .../tests/test_client_query_destination_table_cmek.py | 1 - .../test_client_query_destination_table_legacy.py | 1 - .../samples/tests/test_client_query_dry_run.py | 1 - .../samples/tests/test_client_query_legacy_sql.py | 1 - .../samples/tests/test_client_query_relax_column.py | 1 - .../samples/tests/test_client_query_w_array_params.py | 1 - .../samples/tests/test_client_query_w_named_params.py | 1 - .../tests/test_client_query_w_positional_params.py | 1 - .../samples/tests/test_client_query_w_struct_params.py | 1 - .../tests/test_client_query_w_timestamp_params.py | 1 - .../samples/tests/test_copy_table_multiple_source.py | 1 - .../samples/tests/test_create_dataset.py | 1 - .../samples/tests/test_dataset_exists.py | 1 - .../samples/tests/test_dataset_label_samples.py | 1 - .../samples/tests/test_delete_dataset.py | 1 - .../samples/tests/test_delete_table.py | 1 - .../samples/tests/test_get_dataset.py | 1 - .../samples/tests/test_get_table.py | 1 - .../samples/tests/test_list_tables.py | 1 - .../samples/tests/test_load_table_clustered.py | 1 - .../samples/tests/test_load_table_dataframe.py | 1 - .../tests/test_load_table_uri_autodetect_csv.py | 1 - .../tests/test_load_table_uri_autodetect_json.py | 1 - .../samples/tests/test_load_table_uri_cmek.py | 1 - .../samples/tests/test_load_table_uri_csv.py | 1 - .../samples/tests/test_load_table_uri_json.py | 1 - .../samples/tests/test_load_table_uri_orc.py | 1 - .../samples/tests/test_load_table_uri_parquet.py | 1 - .../tests/test_query_external_gcs_temporary_table.py | 1 - .../test_query_external_sheets_permanent_table.py | 1 - .../test_query_external_sheets_temporary_table.py | 1 - .../samples/tests/test_query_no_cache.py | 1 - .../samples/tests/test_query_pagination.py | 1 - .../samples/tests/test_query_script.py | 1 - .../samples/tests/test_query_to_arrow.py | 1 - .../samples/tests/test_table_exists.py | 1 - .../samples/tests/test_table_insert_rows.py | 1 - .../test_table_insert_rows_explicit_none_insert_ids.py | 1 - .../samples/tests/test_update_dataset_access.py | 1 - ...test_update_dataset_default_partition_expiration.py | 1 - .../test_update_dataset_default_table_expiration.py | 1 - .../samples/tests/test_update_dataset_description.py | 1 - .../test_update_table_require_partition_filter.py | 1 - .../samples/update_dataset_access.py | 1 - .../update_dataset_default_partition_expiration.py | 1 - .../samples/update_dataset_default_table_expiration.py | 1 - .../samples/update_dataset_description.py | 1 - .../google-cloud-bigquery/samples/update_routine.py | 1 - .../samples/update_table_require_partition_filter.py | 1 - .../google-cloud-bigquery/tests/system/test_client.py | 1 - .../google-cloud-bigquery/tests/unit/job/test_query.py | 1 - .../tests/unit/routine/test_remote_function_options.py | 1 - .../google-cloud-bigquery/tests/unit/test_client.py | 2 -- .../google-cloud-bigquery/tests/unit/test_dataset.py | 2 -- .../tests/unit/test_dbapi__helpers.py | 1 - .../tests/unit/test_external_config.py | 1 - .../google-cloud-bigquery/tests/unit/test_magics.py | 10 ++++++---- .../google-cloud-bigquery/tests/unit/test_table.py | 5 ----- 157 files changed, 13 insertions(+), 171 deletions(-) diff --git a/packages/google-cloud-bigquery/.github/.OwlBot.lock.yaml b/packages/google-cloud-bigquery/.github/.OwlBot.lock.yaml index a9bdb1b7ac0f..dd98abbdeebe 100644 --- a/packages/google-cloud-bigquery/.github/.OwlBot.lock.yaml +++ b/packages/google-cloud-bigquery/.github/.OwlBot.lock.yaml @@ -13,5 +13,5 @@ # limitations under the License. docker: image: gcr.io/cloud-devrel-public-resources/owlbot-python:latest - digest: sha256:fac304457974bb530cc5396abd4ab25d26a469cd3bc97cbfb18c8d4324c584eb -# created: 2023-10-02T21:31:03.517640371Z + digest: sha256:08e34975760f002746b1d8c86fdc90660be45945ee6d9db914d1508acdf9a547 +# created: 2023-10-09T14:06:13.397766266Z diff --git a/packages/google-cloud-bigquery/.kokoro/requirements.txt b/packages/google-cloud-bigquery/.kokoro/requirements.txt index 96d593c8c82a..0332d3267e15 100644 --- a/packages/google-cloud-bigquery/.kokoro/requirements.txt +++ b/packages/google-cloud-bigquery/.kokoro/requirements.txt @@ -467,9 +467,9 @@ typing-extensions==4.4.0 \ --hash=sha256:1511434bb92bf8dd198c12b1cc812e800d4181cfcb867674e0f8279cc93087aa \ --hash=sha256:16fa4864408f655d35ec496218b85f79b3437c829e93320c7c9215ccfd92489e # via -r requirements.in -urllib3==1.26.12 \ - --hash=sha256:3fa96cf423e6987997fc326ae8df396db2a8b7c667747d47ddd8ecba91f4a74e \ - --hash=sha256:b930dd878d5a8afb066a637fbb35144fe7901e3b209d1cd4f524bd0e9deee997 +urllib3==1.26.17 \ + --hash=sha256:24d6a242c28d29af46c3fae832c36db3bbebcc533dd1bb549172cd739c82df21 \ + --hash=sha256:94a757d178c9be92ef5539b8840d48dc9cf1b2709c9d6b588232a055c524458b # via # requests # twine diff --git a/packages/google-cloud-bigquery/.pre-commit-config.yaml b/packages/google-cloud-bigquery/.pre-commit-config.yaml index 19409cbd37a4..6a8e16950664 100644 --- a/packages/google-cloud-bigquery/.pre-commit-config.yaml +++ b/packages/google-cloud-bigquery/.pre-commit-config.yaml @@ -22,7 +22,7 @@ repos: - id: end-of-file-fixer - id: check-yaml - repo: https://github.com/psf/black - rev: 22.3.0 + rev: 23.7.0 hooks: - id: black - repo: https://github.com/pycqa/flake8 diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py index b4783fc56e23..f7c7864a15c8 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py @@ -476,7 +476,6 @@ def list_datasets( span_attributes = {"path": path} def api_request(*args, **kwargs): - return self._call_api( retry, span_name="BigQuery.listDatasets", @@ -2686,7 +2685,6 @@ def load_table_from_dataframe( os.close(tmpfd) try: - if new_job_config.source_format == job.SourceFormat.PARQUET: if _PYARROW_VERSION in _PYARROW_BAD_VERSIONS: msg = ( @@ -2722,7 +2720,6 @@ def load_table_from_dataframe( ) else: - dataframe.to_csv( tmppath, index=False, diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/dataset.py b/packages/google-cloud-bigquery/google/cloud/bigquery/dataset.py index a9c1cd884064..b7fed61c7a52 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/dataset.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/dataset.py @@ -454,7 +454,6 @@ def __ne__(self, other): return not self == other def __repr__(self): - return f"" def _key(self): diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/job/query.py b/packages/google-cloud-bigquery/google/cloud/bigquery/job/query.py index 429e33e7ed18..7de209b8d1f5 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/job/query.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/job/query.py @@ -1231,7 +1231,6 @@ def referenced_tables(self): datasets_by_project_name = {} for table in self._job_statistics().get("referencedTables", ()): - t_project = table["projectId"] ds_id = table["datasetId"] diff --git a/packages/google-cloud-bigquery/noxfile.py b/packages/google-cloud-bigquery/noxfile.py index 93616485fe8b..ba06f925d606 100644 --- a/packages/google-cloud-bigquery/noxfile.py +++ b/packages/google-cloud-bigquery/noxfile.py @@ -24,7 +24,7 @@ MYPY_VERSION = "mypy==0.910" PYTYPE_VERSION = "pytype==2021.4.9" -BLACK_VERSION = "black==22.3.0" +BLACK_VERSION = "black==23.7.0" BLACK_PATHS = ( "docs", "google", diff --git a/packages/google-cloud-bigquery/samples/add_empty_column.py b/packages/google-cloud-bigquery/samples/add_empty_column.py index 6d449d6e20d4..5541a6738ded 100644 --- a/packages/google-cloud-bigquery/samples/add_empty_column.py +++ b/packages/google-cloud-bigquery/samples/add_empty_column.py @@ -14,7 +14,6 @@ def add_empty_column(table_id: str) -> None: - # [START bigquery_add_empty_column] from google.cloud import bigquery diff --git a/packages/google-cloud-bigquery/samples/browse_table_data.py b/packages/google-cloud-bigquery/samples/browse_table_data.py index 74b903aa3ec7..2fba65aeb3c4 100644 --- a/packages/google-cloud-bigquery/samples/browse_table_data.py +++ b/packages/google-cloud-bigquery/samples/browse_table_data.py @@ -14,7 +14,6 @@ def browse_table_data(table_id: str) -> None: - # [START bigquery_browse_table] from google.cloud import bigquery diff --git a/packages/google-cloud-bigquery/samples/client_list_jobs.py b/packages/google-cloud-bigquery/samples/client_list_jobs.py index 7f1e39cb8e53..335d2ecec7b7 100644 --- a/packages/google-cloud-bigquery/samples/client_list_jobs.py +++ b/packages/google-cloud-bigquery/samples/client_list_jobs.py @@ -14,7 +14,6 @@ def client_list_jobs() -> None: - # [START bigquery_list_jobs] from google.cloud import bigquery diff --git a/packages/google-cloud-bigquery/samples/client_load_partitioned_table.py b/packages/google-cloud-bigquery/samples/client_load_partitioned_table.py index 9956f3f0056c..cfdf248194b4 100644 --- a/packages/google-cloud-bigquery/samples/client_load_partitioned_table.py +++ b/packages/google-cloud-bigquery/samples/client_load_partitioned_table.py @@ -14,7 +14,6 @@ def client_load_partitioned_table(table_id: str) -> None: - # [START bigquery_load_table_partitioned] from google.cloud import bigquery diff --git a/packages/google-cloud-bigquery/samples/client_query.py b/packages/google-cloud-bigquery/samples/client_query.py index 091d3f98bfbc..4df051ee2c0e 100644 --- a/packages/google-cloud-bigquery/samples/client_query.py +++ b/packages/google-cloud-bigquery/samples/client_query.py @@ -14,7 +14,6 @@ def client_query() -> None: - # [START bigquery_query] from google.cloud import bigquery diff --git a/packages/google-cloud-bigquery/samples/client_query_add_column.py b/packages/google-cloud-bigquery/samples/client_query_add_column.py index 2da200bc51d3..ec14087fb45e 100644 --- a/packages/google-cloud-bigquery/samples/client_query_add_column.py +++ b/packages/google-cloud-bigquery/samples/client_query_add_column.py @@ -14,7 +14,6 @@ def client_query_add_column(table_id: str) -> None: - # [START bigquery_add_column_query_append] from google.cloud import bigquery diff --git a/packages/google-cloud-bigquery/samples/client_query_batch.py b/packages/google-cloud-bigquery/samples/client_query_batch.py index df164d1be11c..5c55e278e0fe 100644 --- a/packages/google-cloud-bigquery/samples/client_query_batch.py +++ b/packages/google-cloud-bigquery/samples/client_query_batch.py @@ -19,7 +19,6 @@ def client_query_batch() -> "bigquery.QueryJob": - # [START bigquery_query_batch] from google.cloud import bigquery diff --git a/packages/google-cloud-bigquery/samples/client_query_destination_table.py b/packages/google-cloud-bigquery/samples/client_query_destination_table.py index b200f1cc6c3a..486576fea7ca 100644 --- a/packages/google-cloud-bigquery/samples/client_query_destination_table.py +++ b/packages/google-cloud-bigquery/samples/client_query_destination_table.py @@ -14,7 +14,6 @@ def client_query_destination_table(table_id: str) -> None: - # [START bigquery_query_destination_table] from google.cloud import bigquery diff --git a/packages/google-cloud-bigquery/samples/client_query_destination_table_clustered.py b/packages/google-cloud-bigquery/samples/client_query_destination_table_clustered.py index c4ab305f5357..de9fff2d03bd 100644 --- a/packages/google-cloud-bigquery/samples/client_query_destination_table_clustered.py +++ b/packages/google-cloud-bigquery/samples/client_query_destination_table_clustered.py @@ -14,7 +14,6 @@ def client_query_destination_table_clustered(table_id: str) -> None: - # [START bigquery_query_clustered_table] from google.cloud import bigquery diff --git a/packages/google-cloud-bigquery/samples/client_query_destination_table_cmek.py b/packages/google-cloud-bigquery/samples/client_query_destination_table_cmek.py index 0fd44d189c5b..040c96e22616 100644 --- a/packages/google-cloud-bigquery/samples/client_query_destination_table_cmek.py +++ b/packages/google-cloud-bigquery/samples/client_query_destination_table_cmek.py @@ -14,7 +14,6 @@ def client_query_destination_table_cmek(table_id: str, kms_key_name: str) -> None: - # [START bigquery_query_destination_table_cmek] from google.cloud import bigquery diff --git a/packages/google-cloud-bigquery/samples/client_query_destination_table_legacy.py b/packages/google-cloud-bigquery/samples/client_query_destination_table_legacy.py index ee45d9a01c58..37f50fdb4558 100644 --- a/packages/google-cloud-bigquery/samples/client_query_destination_table_legacy.py +++ b/packages/google-cloud-bigquery/samples/client_query_destination_table_legacy.py @@ -14,7 +14,6 @@ def client_query_destination_table_legacy(table_id: str) -> None: - # [START bigquery_query_legacy_large_results] from google.cloud import bigquery diff --git a/packages/google-cloud-bigquery/samples/client_query_dry_run.py b/packages/google-cloud-bigquery/samples/client_query_dry_run.py index 418b43cb5862..bb4893c2a6a8 100644 --- a/packages/google-cloud-bigquery/samples/client_query_dry_run.py +++ b/packages/google-cloud-bigquery/samples/client_query_dry_run.py @@ -19,7 +19,6 @@ def client_query_dry_run() -> "bigquery.QueryJob": - # [START bigquery_query_dry_run] from google.cloud import bigquery diff --git a/packages/google-cloud-bigquery/samples/client_query_legacy_sql.py b/packages/google-cloud-bigquery/samples/client_query_legacy_sql.py index c054e1f28e4a..44917e4e02d0 100644 --- a/packages/google-cloud-bigquery/samples/client_query_legacy_sql.py +++ b/packages/google-cloud-bigquery/samples/client_query_legacy_sql.py @@ -14,7 +14,6 @@ def client_query_legacy_sql() -> None: - # [START bigquery_query_legacy] from google.cloud import bigquery diff --git a/packages/google-cloud-bigquery/samples/client_query_relax_column.py b/packages/google-cloud-bigquery/samples/client_query_relax_column.py index c96a1e7aaf4f..22ecb33d11ac 100644 --- a/packages/google-cloud-bigquery/samples/client_query_relax_column.py +++ b/packages/google-cloud-bigquery/samples/client_query_relax_column.py @@ -14,7 +14,6 @@ def client_query_relax_column(table_id: str) -> None: - # [START bigquery_relax_column_query_append] from google.cloud import bigquery diff --git a/packages/google-cloud-bigquery/samples/client_query_w_array_params.py b/packages/google-cloud-bigquery/samples/client_query_w_array_params.py index 66971318216c..25592a94a30a 100644 --- a/packages/google-cloud-bigquery/samples/client_query_w_array_params.py +++ b/packages/google-cloud-bigquery/samples/client_query_w_array_params.py @@ -14,7 +14,6 @@ def client_query_w_array_params() -> None: - # [START bigquery_query_params_arrays] from google.cloud import bigquery diff --git a/packages/google-cloud-bigquery/samples/client_query_w_named_params.py b/packages/google-cloud-bigquery/samples/client_query_w_named_params.py index f42be1dc8e1f..6dd72d44fa05 100644 --- a/packages/google-cloud-bigquery/samples/client_query_w_named_params.py +++ b/packages/google-cloud-bigquery/samples/client_query_w_named_params.py @@ -14,7 +14,6 @@ def client_query_w_named_params() -> None: - # [START bigquery_query_params_named] from google.cloud import bigquery diff --git a/packages/google-cloud-bigquery/samples/client_query_w_positional_params.py b/packages/google-cloud-bigquery/samples/client_query_w_positional_params.py index b088b305ea22..9cdde69ca64d 100644 --- a/packages/google-cloud-bigquery/samples/client_query_w_positional_params.py +++ b/packages/google-cloud-bigquery/samples/client_query_w_positional_params.py @@ -14,7 +14,6 @@ def client_query_w_positional_params() -> None: - # [START bigquery_query_params_positional] from google.cloud import bigquery diff --git a/packages/google-cloud-bigquery/samples/client_query_w_struct_params.py b/packages/google-cloud-bigquery/samples/client_query_w_struct_params.py index 6c5b78113b1a..6b68e78edc9c 100644 --- a/packages/google-cloud-bigquery/samples/client_query_w_struct_params.py +++ b/packages/google-cloud-bigquery/samples/client_query_w_struct_params.py @@ -14,7 +14,6 @@ def client_query_w_struct_params() -> None: - # [START bigquery_query_params_structs] from google.cloud import bigquery diff --git a/packages/google-cloud-bigquery/samples/client_query_w_timestamp_params.py b/packages/google-cloud-bigquery/samples/client_query_w_timestamp_params.py index 07d64cc94214..c1ade8782cfe 100644 --- a/packages/google-cloud-bigquery/samples/client_query_w_timestamp_params.py +++ b/packages/google-cloud-bigquery/samples/client_query_w_timestamp_params.py @@ -14,7 +14,6 @@ def client_query_w_timestamp_params() -> None: - # [START bigquery_query_params_timestamps] import datetime diff --git a/packages/google-cloud-bigquery/samples/copy_table.py b/packages/google-cloud-bigquery/samples/copy_table.py index 8c6153fef238..3ae6e9ebe94d 100644 --- a/packages/google-cloud-bigquery/samples/copy_table.py +++ b/packages/google-cloud-bigquery/samples/copy_table.py @@ -14,7 +14,6 @@ def copy_table(source_table_id: str, destination_table_id: str) -> None: - # [START bigquery_copy_table] from google.cloud import bigquery diff --git a/packages/google-cloud-bigquery/samples/copy_table_cmek.py b/packages/google-cloud-bigquery/samples/copy_table_cmek.py index f2e8a90f93bd..f03053fabb21 100644 --- a/packages/google-cloud-bigquery/samples/copy_table_cmek.py +++ b/packages/google-cloud-bigquery/samples/copy_table_cmek.py @@ -14,7 +14,6 @@ def copy_table_cmek(dest_table_id: str, orig_table_id: str, kms_key_name: str) -> None: - # [START bigquery_copy_table_cmek] from google.cloud import bigquery diff --git a/packages/google-cloud-bigquery/samples/copy_table_multiple_source.py b/packages/google-cloud-bigquery/samples/copy_table_multiple_source.py index 1163b166470a..509b8951bd61 100644 --- a/packages/google-cloud-bigquery/samples/copy_table_multiple_source.py +++ b/packages/google-cloud-bigquery/samples/copy_table_multiple_source.py @@ -16,7 +16,6 @@ def copy_table_multiple_source(dest_table_id: str, table_ids: Sequence[str]) -> None: - # [START bigquery_copy_table_multiple_source] from google.cloud import bigquery diff --git a/packages/google-cloud-bigquery/samples/create_dataset.py b/packages/google-cloud-bigquery/samples/create_dataset.py index dea91798d99c..7f645933a1ba 100644 --- a/packages/google-cloud-bigquery/samples/create_dataset.py +++ b/packages/google-cloud-bigquery/samples/create_dataset.py @@ -14,7 +14,6 @@ def create_dataset(dataset_id: str) -> None: - # [START bigquery_create_dataset] from google.cloud import bigquery diff --git a/packages/google-cloud-bigquery/samples/create_job.py b/packages/google-cloud-bigquery/samples/create_job.py index 129a08a1bf70..f335e2f7af1c 100644 --- a/packages/google-cloud-bigquery/samples/create_job.py +++ b/packages/google-cloud-bigquery/samples/create_job.py @@ -20,7 +20,6 @@ def create_job() -> "Union[LoadJob, CopyJob, ExtractJob, QueryJob]": - # [START bigquery_create_job] from google.cloud import bigquery diff --git a/packages/google-cloud-bigquery/samples/create_routine.py b/packages/google-cloud-bigquery/samples/create_routine.py index 96dc242107f3..8be1b6a99f3c 100644 --- a/packages/google-cloud-bigquery/samples/create_routine.py +++ b/packages/google-cloud-bigquery/samples/create_routine.py @@ -19,7 +19,6 @@ def create_routine(routine_id: str) -> "bigquery.Routine": - # [START bigquery_create_routine] from google.cloud import bigquery diff --git a/packages/google-cloud-bigquery/samples/create_routine_ddl.py b/packages/google-cloud-bigquery/samples/create_routine_ddl.py index 56c7cfe24e98..231d5a142d13 100644 --- a/packages/google-cloud-bigquery/samples/create_routine_ddl.py +++ b/packages/google-cloud-bigquery/samples/create_routine_ddl.py @@ -14,7 +14,6 @@ def create_routine_ddl(routine_id: str) -> None: - # [START bigquery_create_routine_ddl] from google.cloud import bigquery diff --git a/packages/google-cloud-bigquery/samples/create_table.py b/packages/google-cloud-bigquery/samples/create_table.py index eaac54696a9a..7fda370ce6d0 100644 --- a/packages/google-cloud-bigquery/samples/create_table.py +++ b/packages/google-cloud-bigquery/samples/create_table.py @@ -14,7 +14,6 @@ def create_table(table_id: str) -> None: - # [START bigquery_create_table] from google.cloud import bigquery diff --git a/packages/google-cloud-bigquery/samples/create_table_clustered.py b/packages/google-cloud-bigquery/samples/create_table_clustered.py index 1686c519a402..a9ad43e59c1a 100644 --- a/packages/google-cloud-bigquery/samples/create_table_clustered.py +++ b/packages/google-cloud-bigquery/samples/create_table_clustered.py @@ -19,7 +19,6 @@ def create_table_clustered(table_id: str) -> "bigquery.Table": - # [START bigquery_create_table_clustered] from google.cloud import bigquery diff --git a/packages/google-cloud-bigquery/samples/create_table_range_partitioned.py b/packages/google-cloud-bigquery/samples/create_table_range_partitioned.py index 4dc45ed5853a..128ab87d998b 100644 --- a/packages/google-cloud-bigquery/samples/create_table_range_partitioned.py +++ b/packages/google-cloud-bigquery/samples/create_table_range_partitioned.py @@ -19,7 +19,6 @@ def create_table_range_partitioned(table_id: str) -> "bigquery.Table": - # [START bigquery_create_table_range_partitioned] from google.cloud import bigquery diff --git a/packages/google-cloud-bigquery/samples/dataset_exists.py b/packages/google-cloud-bigquery/samples/dataset_exists.py index 221899a650ba..784f86524d59 100644 --- a/packages/google-cloud-bigquery/samples/dataset_exists.py +++ b/packages/google-cloud-bigquery/samples/dataset_exists.py @@ -14,7 +14,6 @@ def dataset_exists(dataset_id: str) -> None: - # [START bigquery_dataset_exists] from google.cloud import bigquery from google.cloud.exceptions import NotFound diff --git a/packages/google-cloud-bigquery/samples/delete_dataset.py b/packages/google-cloud-bigquery/samples/delete_dataset.py index b340ed57a9cd..9c7644db09d9 100644 --- a/packages/google-cloud-bigquery/samples/delete_dataset.py +++ b/packages/google-cloud-bigquery/samples/delete_dataset.py @@ -14,7 +14,6 @@ def delete_dataset(dataset_id: str) -> None: - # [START bigquery_delete_dataset] from google.cloud import bigquery diff --git a/packages/google-cloud-bigquery/samples/delete_dataset_labels.py b/packages/google-cloud-bigquery/samples/delete_dataset_labels.py index ec5df09c11be..d5efdf4eaa36 100644 --- a/packages/google-cloud-bigquery/samples/delete_dataset_labels.py +++ b/packages/google-cloud-bigquery/samples/delete_dataset_labels.py @@ -19,7 +19,6 @@ def delete_dataset_labels(dataset_id: str) -> "bigquery.Dataset": - # [START bigquery_delete_label_dataset] from google.cloud import bigquery diff --git a/packages/google-cloud-bigquery/samples/delete_routine.py b/packages/google-cloud-bigquery/samples/delete_routine.py index 7362a5fea18d..604e7f730312 100644 --- a/packages/google-cloud-bigquery/samples/delete_routine.py +++ b/packages/google-cloud-bigquery/samples/delete_routine.py @@ -14,7 +14,6 @@ def delete_routine(routine_id: str) -> None: - # [START bigquery_delete_routine] from google.cloud import bigquery diff --git a/packages/google-cloud-bigquery/samples/delete_table.py b/packages/google-cloud-bigquery/samples/delete_table.py index 9e7ee170a384..a8ac4617acd6 100644 --- a/packages/google-cloud-bigquery/samples/delete_table.py +++ b/packages/google-cloud-bigquery/samples/delete_table.py @@ -14,7 +14,6 @@ def delete_table(table_id: str) -> None: - # [START bigquery_delete_table] from google.cloud import bigquery diff --git a/packages/google-cloud-bigquery/samples/download_public_data.py b/packages/google-cloud-bigquery/samples/download_public_data.py index a488bbbb503f..cb2ebd1fde3f 100644 --- a/packages/google-cloud-bigquery/samples/download_public_data.py +++ b/packages/google-cloud-bigquery/samples/download_public_data.py @@ -14,7 +14,6 @@ def download_public_data() -> None: - # [START bigquery_pandas_public_data] from google.cloud import bigquery diff --git a/packages/google-cloud-bigquery/samples/download_public_data_sandbox.py b/packages/google-cloud-bigquery/samples/download_public_data_sandbox.py index ce5200b4e152..e165a31ce35a 100644 --- a/packages/google-cloud-bigquery/samples/download_public_data_sandbox.py +++ b/packages/google-cloud-bigquery/samples/download_public_data_sandbox.py @@ -14,7 +14,6 @@ def download_public_data_sandbox() -> None: - # [START bigquery_pandas_public_data_sandbox] from google.cloud import bigquery diff --git a/packages/google-cloud-bigquery/samples/geography/insert_geojson.py b/packages/google-cloud-bigquery/samples/geography/insert_geojson.py index 2db407b55641..9a6f6c413f90 100644 --- a/packages/google-cloud-bigquery/samples/geography/insert_geojson.py +++ b/packages/google-cloud-bigquery/samples/geography/insert_geojson.py @@ -18,7 +18,6 @@ def insert_geojson( override_values: Optional[Mapping[str, str]] = None ) -> Sequence[Dict[str, object]]: - if override_values is None: override_values = {} diff --git a/packages/google-cloud-bigquery/samples/geography/insert_wkt.py b/packages/google-cloud-bigquery/samples/geography/insert_wkt.py index 25c7ee727f0c..2923d2596ece 100644 --- a/packages/google-cloud-bigquery/samples/geography/insert_wkt.py +++ b/packages/google-cloud-bigquery/samples/geography/insert_wkt.py @@ -18,7 +18,6 @@ def insert_wkt( override_values: Optional[Mapping[str, str]] = None ) -> Sequence[Dict[str, object]]: - if override_values is None: override_values = {} diff --git a/packages/google-cloud-bigquery/samples/get_dataset.py b/packages/google-cloud-bigquery/samples/get_dataset.py index 5654cbdceee2..1e4ad2904f5a 100644 --- a/packages/google-cloud-bigquery/samples/get_dataset.py +++ b/packages/google-cloud-bigquery/samples/get_dataset.py @@ -14,7 +14,6 @@ def get_dataset(dataset_id: str) -> None: - # [START bigquery_get_dataset] from google.cloud import bigquery diff --git a/packages/google-cloud-bigquery/samples/get_dataset_labels.py b/packages/google-cloud-bigquery/samples/get_dataset_labels.py index d97ee3c01e52..8dc8b94300b0 100644 --- a/packages/google-cloud-bigquery/samples/get_dataset_labels.py +++ b/packages/google-cloud-bigquery/samples/get_dataset_labels.py @@ -14,7 +14,6 @@ def get_dataset_labels(dataset_id: str) -> None: - # [START bigquery_get_dataset_labels] from google.cloud import bigquery diff --git a/packages/google-cloud-bigquery/samples/get_routine.py b/packages/google-cloud-bigquery/samples/get_routine.py index 031d9a127b5e..96e85acc975f 100644 --- a/packages/google-cloud-bigquery/samples/get_routine.py +++ b/packages/google-cloud-bigquery/samples/get_routine.py @@ -19,7 +19,6 @@ def get_routine(routine_id: str) -> "bigquery.Routine": - # [START bigquery_get_routine] from google.cloud import bigquery diff --git a/packages/google-cloud-bigquery/samples/get_table.py b/packages/google-cloud-bigquery/samples/get_table.py index 6195aaf9acee..f71db7cee634 100644 --- a/packages/google-cloud-bigquery/samples/get_table.py +++ b/packages/google-cloud-bigquery/samples/get_table.py @@ -14,7 +14,6 @@ def get_table(table_id: str) -> None: - # [START bigquery_get_table] from google.cloud import bigquery diff --git a/packages/google-cloud-bigquery/samples/label_dataset.py b/packages/google-cloud-bigquery/samples/label_dataset.py index a59743e5d073..4fcc3dcd8ed1 100644 --- a/packages/google-cloud-bigquery/samples/label_dataset.py +++ b/packages/google-cloud-bigquery/samples/label_dataset.py @@ -14,7 +14,6 @@ def label_dataset(dataset_id: str) -> None: - # [START bigquery_label_dataset] from google.cloud import bigquery diff --git a/packages/google-cloud-bigquery/samples/list_datasets.py b/packages/google-cloud-bigquery/samples/list_datasets.py index c1b6639a97b4..d9401e9aecb0 100644 --- a/packages/google-cloud-bigquery/samples/list_datasets.py +++ b/packages/google-cloud-bigquery/samples/list_datasets.py @@ -14,7 +14,6 @@ def list_datasets() -> None: - # [START bigquery_list_datasets] from google.cloud import bigquery diff --git a/packages/google-cloud-bigquery/samples/list_datasets_by_label.py b/packages/google-cloud-bigquery/samples/list_datasets_by_label.py index d1f264872f9a..3a2bef632acd 100644 --- a/packages/google-cloud-bigquery/samples/list_datasets_by_label.py +++ b/packages/google-cloud-bigquery/samples/list_datasets_by_label.py @@ -14,7 +14,6 @@ def list_datasets_by_label() -> None: - # [START bigquery_list_datasets_by_label] from google.cloud import bigquery diff --git a/packages/google-cloud-bigquery/samples/list_routines.py b/packages/google-cloud-bigquery/samples/list_routines.py index bee7c23be3b8..95ddd962ed42 100644 --- a/packages/google-cloud-bigquery/samples/list_routines.py +++ b/packages/google-cloud-bigquery/samples/list_routines.py @@ -14,7 +14,6 @@ def list_routines(dataset_id: str) -> None: - # [START bigquery_list_routines] from google.cloud import bigquery diff --git a/packages/google-cloud-bigquery/samples/list_tables.py b/packages/google-cloud-bigquery/samples/list_tables.py index df846961d222..17c06370d2e3 100644 --- a/packages/google-cloud-bigquery/samples/list_tables.py +++ b/packages/google-cloud-bigquery/samples/list_tables.py @@ -14,7 +14,6 @@ def list_tables(dataset_id: str) -> None: - # [START bigquery_list_tables] from google.cloud import bigquery diff --git a/packages/google-cloud-bigquery/samples/load_table_clustered.py b/packages/google-cloud-bigquery/samples/load_table_clustered.py index 87b6c76ce5b2..7497468826df 100644 --- a/packages/google-cloud-bigquery/samples/load_table_clustered.py +++ b/packages/google-cloud-bigquery/samples/load_table_clustered.py @@ -19,7 +19,6 @@ def load_table_clustered(table_id: str) -> "bigquery.Table": - # [START bigquery_load_table_clustered] from google.cloud import bigquery diff --git a/packages/google-cloud-bigquery/samples/load_table_dataframe.py b/packages/google-cloud-bigquery/samples/load_table_dataframe.py index db4c131f2ef0..2c668d18322d 100644 --- a/packages/google-cloud-bigquery/samples/load_table_dataframe.py +++ b/packages/google-cloud-bigquery/samples/load_table_dataframe.py @@ -19,7 +19,6 @@ def load_table_dataframe(table_id: str) -> "bigquery.Table": - # [START bigquery_load_table_dataframe] import datetime diff --git a/packages/google-cloud-bigquery/samples/load_table_file.py b/packages/google-cloud-bigquery/samples/load_table_file.py index 00226eb3c943..838c3b1052c6 100644 --- a/packages/google-cloud-bigquery/samples/load_table_file.py +++ b/packages/google-cloud-bigquery/samples/load_table_file.py @@ -19,7 +19,6 @@ def load_table_file(file_path: str, table_id: str) -> "bigquery.Table": - # [START bigquery_load_from_file] from google.cloud import bigquery diff --git a/packages/google-cloud-bigquery/samples/load_table_uri_autodetect_csv.py b/packages/google-cloud-bigquery/samples/load_table_uri_autodetect_csv.py index c412c63f1209..ca4590581ceb 100644 --- a/packages/google-cloud-bigquery/samples/load_table_uri_autodetect_csv.py +++ b/packages/google-cloud-bigquery/samples/load_table_uri_autodetect_csv.py @@ -14,7 +14,6 @@ def load_table_uri_autodetect_csv(table_id: str) -> None: - # [START bigquery_load_table_gcs_csv_autodetect] from google.cloud import bigquery diff --git a/packages/google-cloud-bigquery/samples/load_table_uri_autodetect_json.py b/packages/google-cloud-bigquery/samples/load_table_uri_autodetect_json.py index 9d0bc3f22a03..00e8dc1b2aba 100644 --- a/packages/google-cloud-bigquery/samples/load_table_uri_autodetect_json.py +++ b/packages/google-cloud-bigquery/samples/load_table_uri_autodetect_json.py @@ -14,7 +14,6 @@ def load_table_uri_autodetect_json(table_id: str) -> None: - # [START bigquery_load_table_gcs_json_autodetect] from google.cloud import bigquery diff --git a/packages/google-cloud-bigquery/samples/load_table_uri_avro.py b/packages/google-cloud-bigquery/samples/load_table_uri_avro.py index e9f7c39ede7f..a0e8c86a64e1 100644 --- a/packages/google-cloud-bigquery/samples/load_table_uri_avro.py +++ b/packages/google-cloud-bigquery/samples/load_table_uri_avro.py @@ -14,7 +14,6 @@ def load_table_uri_avro(table_id: str) -> None: - # [START bigquery_load_table_gcs_avro] from google.cloud import bigquery diff --git a/packages/google-cloud-bigquery/samples/load_table_uri_cmek.py b/packages/google-cloud-bigquery/samples/load_table_uri_cmek.py index 4dfc0d3b4bb5..d54422028e87 100644 --- a/packages/google-cloud-bigquery/samples/load_table_uri_cmek.py +++ b/packages/google-cloud-bigquery/samples/load_table_uri_cmek.py @@ -14,7 +14,6 @@ def load_table_uri_cmek(table_id: str, kms_key_name: str) -> None: - # [START bigquery_load_table_gcs_json_cmek] from google.cloud import bigquery diff --git a/packages/google-cloud-bigquery/samples/load_table_uri_csv.py b/packages/google-cloud-bigquery/samples/load_table_uri_csv.py index 9cb8c6f20c27..d660a2195d55 100644 --- a/packages/google-cloud-bigquery/samples/load_table_uri_csv.py +++ b/packages/google-cloud-bigquery/samples/load_table_uri_csv.py @@ -14,7 +14,6 @@ def load_table_uri_csv(table_id: str) -> None: - # [START bigquery_load_table_gcs_csv] from google.cloud import bigquery diff --git a/packages/google-cloud-bigquery/samples/load_table_uri_orc.py b/packages/google-cloud-bigquery/samples/load_table_uri_orc.py index 7babd26301ea..c09129216421 100644 --- a/packages/google-cloud-bigquery/samples/load_table_uri_orc.py +++ b/packages/google-cloud-bigquery/samples/load_table_uri_orc.py @@ -14,7 +14,6 @@ def load_table_uri_orc(table_id: str) -> None: - # [START bigquery_load_table_gcs_orc] from google.cloud import bigquery diff --git a/packages/google-cloud-bigquery/samples/load_table_uri_truncate_avro.py b/packages/google-cloud-bigquery/samples/load_table_uri_truncate_avro.py index 51c6636fa9fc..307a4e4deca0 100644 --- a/packages/google-cloud-bigquery/samples/load_table_uri_truncate_avro.py +++ b/packages/google-cloud-bigquery/samples/load_table_uri_truncate_avro.py @@ -14,7 +14,6 @@ def load_table_uri_truncate_avro(table_id: str) -> None: - # [START bigquery_load_table_gcs_avro_truncate] import io diff --git a/packages/google-cloud-bigquery/samples/load_table_uri_truncate_csv.py b/packages/google-cloud-bigquery/samples/load_table_uri_truncate_csv.py index ee8b3404372b..4bfd306cd645 100644 --- a/packages/google-cloud-bigquery/samples/load_table_uri_truncate_csv.py +++ b/packages/google-cloud-bigquery/samples/load_table_uri_truncate_csv.py @@ -14,7 +14,6 @@ def load_table_uri_truncate_csv(table_id: str) -> None: - # [START bigquery_load_table_gcs_csv_truncate] import io diff --git a/packages/google-cloud-bigquery/samples/load_table_uri_truncate_json.py b/packages/google-cloud-bigquery/samples/load_table_uri_truncate_json.py index e85e0808e573..a05a3eda0348 100644 --- a/packages/google-cloud-bigquery/samples/load_table_uri_truncate_json.py +++ b/packages/google-cloud-bigquery/samples/load_table_uri_truncate_json.py @@ -14,7 +14,6 @@ def load_table_uri_truncate_json(table_id: str) -> None: - # [START bigquery_load_table_gcs_json_truncate] import io diff --git a/packages/google-cloud-bigquery/samples/load_table_uri_truncate_orc.py b/packages/google-cloud-bigquery/samples/load_table_uri_truncate_orc.py index c730099d1469..1c704b7457a1 100644 --- a/packages/google-cloud-bigquery/samples/load_table_uri_truncate_orc.py +++ b/packages/google-cloud-bigquery/samples/load_table_uri_truncate_orc.py @@ -14,7 +14,6 @@ def load_table_uri_truncate_orc(table_id: str) -> None: - # [START bigquery_load_table_gcs_orc_truncate] import io diff --git a/packages/google-cloud-bigquery/samples/load_table_uri_truncate_parquet.py b/packages/google-cloud-bigquery/samples/load_table_uri_truncate_parquet.py index 3a0a55c8a9d2..d74f79910df6 100644 --- a/packages/google-cloud-bigquery/samples/load_table_uri_truncate_parquet.py +++ b/packages/google-cloud-bigquery/samples/load_table_uri_truncate_parquet.py @@ -14,7 +14,6 @@ def load_table_uri_truncate_parquet(table_id: str) -> None: - # [START bigquery_load_table_gcs_parquet_truncate] import io diff --git a/packages/google-cloud-bigquery/samples/query_external_gcs_temporary_table.py b/packages/google-cloud-bigquery/samples/query_external_gcs_temporary_table.py index 9bcb86aab6ff..d622ab1a3025 100644 --- a/packages/google-cloud-bigquery/samples/query_external_gcs_temporary_table.py +++ b/packages/google-cloud-bigquery/samples/query_external_gcs_temporary_table.py @@ -14,7 +14,6 @@ def query_external_gcs_temporary_table() -> None: - # [START bigquery_query_external_gcs_temp] from google.cloud import bigquery diff --git a/packages/google-cloud-bigquery/samples/query_external_sheets_permanent_table.py b/packages/google-cloud-bigquery/samples/query_external_sheets_permanent_table.py index a5855e66a34b..f23f44259585 100644 --- a/packages/google-cloud-bigquery/samples/query_external_sheets_permanent_table.py +++ b/packages/google-cloud-bigquery/samples/query_external_sheets_permanent_table.py @@ -14,7 +14,6 @@ def query_external_sheets_permanent_table(dataset_id: str) -> None: - # [START bigquery_query_external_sheets_perm] from google.cloud import bigquery import google.auth diff --git a/packages/google-cloud-bigquery/samples/query_external_sheets_temporary_table.py b/packages/google-cloud-bigquery/samples/query_external_sheets_temporary_table.py index 944d3b826ed7..876e4cc1ac60 100644 --- a/packages/google-cloud-bigquery/samples/query_external_sheets_temporary_table.py +++ b/packages/google-cloud-bigquery/samples/query_external_sheets_temporary_table.py @@ -14,7 +14,6 @@ def query_external_sheets_temporary_table() -> None: - # [START bigquery_query_external_sheets_temp] # [START bigquery_auth_drive_scope] from google.cloud import bigquery diff --git a/packages/google-cloud-bigquery/samples/query_no_cache.py b/packages/google-cloud-bigquery/samples/query_no_cache.py index f39c01dbc297..7501b7fc09c0 100644 --- a/packages/google-cloud-bigquery/samples/query_no_cache.py +++ b/packages/google-cloud-bigquery/samples/query_no_cache.py @@ -14,7 +14,6 @@ def query_no_cache() -> None: - # [START bigquery_query_no_cache] from google.cloud import bigquery diff --git a/packages/google-cloud-bigquery/samples/query_pagination.py b/packages/google-cloud-bigquery/samples/query_pagination.py index 2e16540506e2..7ccaecff7a7c 100644 --- a/packages/google-cloud-bigquery/samples/query_pagination.py +++ b/packages/google-cloud-bigquery/samples/query_pagination.py @@ -14,7 +14,6 @@ def query_pagination() -> None: - # [START bigquery_query_pagination] from google.cloud import bigquery diff --git a/packages/google-cloud-bigquery/samples/query_to_arrow.py b/packages/google-cloud-bigquery/samples/query_to_arrow.py index 157a93638c1d..f1afc7c94bba 100644 --- a/packages/google-cloud-bigquery/samples/query_to_arrow.py +++ b/packages/google-cloud-bigquery/samples/query_to_arrow.py @@ -19,7 +19,6 @@ def query_to_arrow() -> "pyarrow.Table": - # [START bigquery_query_to_arrow] from google.cloud import bigquery diff --git a/packages/google-cloud-bigquery/samples/snippets/create_table_cmek_test.py b/packages/google-cloud-bigquery/samples/snippets/create_table_cmek_test.py index 429baf3fd228..2b15fb350a95 100644 --- a/packages/google-cloud-bigquery/samples/snippets/create_table_cmek_test.py +++ b/packages/google-cloud-bigquery/samples/snippets/create_table_cmek_test.py @@ -24,7 +24,6 @@ def test_create_table( capsys: "pytest.CaptureFixture[str]", random_table_id: str, ) -> None: - kms_key_name = ( "projects/cloud-samples-tests/locations/us/keyRings/test/cryptoKeys/test" ) diff --git a/packages/google-cloud-bigquery/samples/snippets/create_table_external_data_configuration_test.py b/packages/google-cloud-bigquery/samples/snippets/create_table_external_data_configuration_test.py index 7bbcde32beac..e97d7170dcda 100644 --- a/packages/google-cloud-bigquery/samples/snippets/create_table_external_data_configuration_test.py +++ b/packages/google-cloud-bigquery/samples/snippets/create_table_external_data_configuration_test.py @@ -24,7 +24,6 @@ def test_create_table_external_data_configuration( capsys: "pytest.CaptureFixture[str]", random_table_id: str, ) -> None: - create_table_external_data_configuration.create_table_external_data_configuration( random_table_id ) diff --git a/packages/google-cloud-bigquery/samples/snippets/create_table_schema_from_json_test.py b/packages/google-cloud-bigquery/samples/snippets/create_table_schema_from_json_test.py index e99b92672c48..39b00cea0989 100644 --- a/packages/google-cloud-bigquery/samples/snippets/create_table_schema_from_json_test.py +++ b/packages/google-cloud-bigquery/samples/snippets/create_table_schema_from_json_test.py @@ -24,7 +24,6 @@ def test_create_table( capsys: "pytest.CaptureFixture[str]", random_table_id: str, ) -> None: - create_table_schema_from_json.create_table(random_table_id) out, _ = capsys.readouterr() diff --git a/packages/google-cloud-bigquery/samples/snippets/create_table_snapshot_test.py b/packages/google-cloud-bigquery/samples/snippets/create_table_snapshot_test.py index f1d8d0f7b43b..784dc3ddd691 100644 --- a/packages/google-cloud-bigquery/samples/snippets/create_table_snapshot_test.py +++ b/packages/google-cloud-bigquery/samples/snippets/create_table_snapshot_test.py @@ -25,7 +25,6 @@ def test_create_table_snapshot( table_id: str, random_table_id: str, ) -> None: - create_table_snapshot.create_table_snapshot(table_id, random_table_id) out, _ = capsys.readouterr() diff --git a/packages/google-cloud-bigquery/samples/snippets/delete_label_table_test.py b/packages/google-cloud-bigquery/samples/snippets/delete_label_table_test.py index 54acae77f4ff..80fcbb695f2b 100644 --- a/packages/google-cloud-bigquery/samples/snippets/delete_label_table_test.py +++ b/packages/google-cloud-bigquery/samples/snippets/delete_label_table_test.py @@ -24,7 +24,6 @@ def test_delete_label_table( capsys: "pytest.CaptureFixture[str]", table_id: str, ) -> None: - table = delete_label_table.delete_label_table(table_id, "color") out, _ = capsys.readouterr() diff --git a/packages/google-cloud-bigquery/samples/snippets/get_table_labels_test.py b/packages/google-cloud-bigquery/samples/snippets/get_table_labels_test.py index f922e728c44b..95a95b60ff84 100644 --- a/packages/google-cloud-bigquery/samples/snippets/get_table_labels_test.py +++ b/packages/google-cloud-bigquery/samples/snippets/get_table_labels_test.py @@ -42,7 +42,6 @@ def test_get_table_labels_no_label( capsys: "pytest.CaptureFixture[str]", table_id: str, ) -> None: - get_table_labels.get_table_labels(table_id) out, _ = capsys.readouterr() diff --git a/packages/google-cloud-bigquery/samples/snippets/label_table_test.py b/packages/google-cloud-bigquery/samples/snippets/label_table_test.py index a77fb4b75f38..98f3b3cc7c32 100644 --- a/packages/google-cloud-bigquery/samples/snippets/label_table_test.py +++ b/packages/google-cloud-bigquery/samples/snippets/label_table_test.py @@ -24,7 +24,6 @@ def test_label_table( capsys: "pytest.CaptureFixture[str]", table_id: str, ) -> None: - label_table.label_table(table_id) out, _ = capsys.readouterr() diff --git a/packages/google-cloud-bigquery/samples/snippets/load_table_schema_from_json_test.py b/packages/google-cloud-bigquery/samples/snippets/load_table_schema_from_json_test.py index 267a6786c722..c28875b0ef18 100644 --- a/packages/google-cloud-bigquery/samples/snippets/load_table_schema_from_json_test.py +++ b/packages/google-cloud-bigquery/samples/snippets/load_table_schema_from_json_test.py @@ -24,7 +24,6 @@ def test_load_table( capsys: "pytest.CaptureFixture[str]", random_table_id: str, ) -> None: - load_table_schema_from_json.load_table(random_table_id) out, _ = capsys.readouterr() diff --git a/packages/google-cloud-bigquery/samples/snippets/nested_repeated_schema_test.py b/packages/google-cloud-bigquery/samples/snippets/nested_repeated_schema_test.py index 0386fc8fb8f5..8bb8bda6ae36 100644 --- a/packages/google-cloud-bigquery/samples/snippets/nested_repeated_schema_test.py +++ b/packages/google-cloud-bigquery/samples/snippets/nested_repeated_schema_test.py @@ -24,7 +24,6 @@ def test_create_table( capsys: "pytest.CaptureFixture[str]", random_table_id: str, ) -> None: - nested_repeated_schema.nested_schema(random_table_id) out, _ = capsys.readouterr() diff --git a/packages/google-cloud-bigquery/samples/snippets/quickstart.py b/packages/google-cloud-bigquery/samples/snippets/quickstart.py index f9628da7d4ae..8f7f05c7357e 100644 --- a/packages/google-cloud-bigquery/samples/snippets/quickstart.py +++ b/packages/google-cloud-bigquery/samples/snippets/quickstart.py @@ -18,7 +18,6 @@ def run_quickstart(override_values: Optional[Dict[str, str]] = None) -> None: - if override_values is None: override_values = {} diff --git a/packages/google-cloud-bigquery/samples/snippets/quickstart_test.py b/packages/google-cloud-bigquery/samples/snippets/quickstart_test.py index 610c63c3ba1f..98a5fdd4e9ba 100644 --- a/packages/google-cloud-bigquery/samples/snippets/quickstart_test.py +++ b/packages/google-cloud-bigquery/samples/snippets/quickstart_test.py @@ -43,7 +43,6 @@ def test_quickstart( client: bigquery.Client, datasets_to_delete: List[str], ) -> None: - override_values = { "dataset_id": "my_new_dataset_{}".format(str(uuid.uuid4()).replace("-", "_")), } diff --git a/packages/google-cloud-bigquery/samples/snippets/update_table_expiration_test.py b/packages/google-cloud-bigquery/samples/snippets/update_table_expiration_test.py index 721bf53aa905..1566c7763493 100644 --- a/packages/google-cloud-bigquery/samples/snippets/update_table_expiration_test.py +++ b/packages/google-cloud-bigquery/samples/snippets/update_table_expiration_test.py @@ -28,7 +28,6 @@ def test_update_table_expiration( table_id: str, tmp_path: "pathlib.Path", ) -> None: - # This was not needed for function, only for test expiration = datetime.datetime.now(datetime.timezone.utc) + datetime.timedelta( days=5 diff --git a/packages/google-cloud-bigquery/samples/snippets/view.py b/packages/google-cloud-bigquery/samples/snippets/view.py index 5e976f68a2b8..94f406890900 100644 --- a/packages/google-cloud-bigquery/samples/snippets/view.py +++ b/packages/google-cloud-bigquery/samples/snippets/view.py @@ -127,7 +127,6 @@ def update_view(override_values: Optional[Dict[str, str]] = None) -> "bigquery.T def grant_access( override_values: Optional[OverridesDict] = None, ) -> Tuple["bigquery.Dataset", "bigquery.Dataset"]: - if override_values is None: override_values = {} diff --git a/packages/google-cloud-bigquery/samples/table_exists.py b/packages/google-cloud-bigquery/samples/table_exists.py index 6edba9239012..c19d419aedc1 100644 --- a/packages/google-cloud-bigquery/samples/table_exists.py +++ b/packages/google-cloud-bigquery/samples/table_exists.py @@ -14,7 +14,6 @@ def table_exists(table_id: str) -> None: - # [START bigquery_table_exists] from google.cloud import bigquery from google.cloud.exceptions import NotFound diff --git a/packages/google-cloud-bigquery/samples/table_insert_rows.py b/packages/google-cloud-bigquery/samples/table_insert_rows.py index 8aa723fe040c..d680b4c1e3bb 100644 --- a/packages/google-cloud-bigquery/samples/table_insert_rows.py +++ b/packages/google-cloud-bigquery/samples/table_insert_rows.py @@ -14,7 +14,6 @@ def table_insert_rows(table_id: str) -> None: - # [START bigquery_table_insert_rows] from google.cloud import bigquery diff --git a/packages/google-cloud-bigquery/samples/table_insert_rows_explicit_none_insert_ids.py b/packages/google-cloud-bigquery/samples/table_insert_rows_explicit_none_insert_ids.py index b2bd06372264..bbde034f7bea 100644 --- a/packages/google-cloud-bigquery/samples/table_insert_rows_explicit_none_insert_ids.py +++ b/packages/google-cloud-bigquery/samples/table_insert_rows_explicit_none_insert_ids.py @@ -14,7 +14,6 @@ def table_insert_rows_explicit_none_insert_ids(table_id: str) -> None: - # [START bigquery_table_insert_rows_explicit_none_insert_ids] from google.cloud import bigquery diff --git a/packages/google-cloud-bigquery/samples/tests/test_add_empty_column.py b/packages/google-cloud-bigquery/samples/tests/test_add_empty_column.py index 5c7184766eca..95d5546217a8 100644 --- a/packages/google-cloud-bigquery/samples/tests/test_add_empty_column.py +++ b/packages/google-cloud-bigquery/samples/tests/test_add_empty_column.py @@ -21,7 +21,6 @@ def test_add_empty_column(capsys: "pytest.CaptureFixture[str]", table_id: str) -> None: - add_empty_column.add_empty_column(table_id) out, err = capsys.readouterr() assert "A new column has been added." in out diff --git a/packages/google-cloud-bigquery/samples/tests/test_browse_table_data.py b/packages/google-cloud-bigquery/samples/tests/test_browse_table_data.py index 368e5cad6c8f..670eb7ccfaf8 100644 --- a/packages/google-cloud-bigquery/samples/tests/test_browse_table_data.py +++ b/packages/google-cloud-bigquery/samples/tests/test_browse_table_data.py @@ -23,7 +23,6 @@ def test_browse_table_data( capsys: "pytest.CaptureFixture[str]", table_with_data_id: str ) -> None: - browse_table_data.browse_table_data(table_with_data_id) out, err = capsys.readouterr() assert "Downloaded 164656 rows from table {}".format(table_with_data_id) in out diff --git a/packages/google-cloud-bigquery/samples/tests/test_client_list_jobs.py b/packages/google-cloud-bigquery/samples/tests/test_client_list_jobs.py index a2845b7ad478..6bb1bbd1985e 100644 --- a/packages/google-cloud-bigquery/samples/tests/test_client_list_jobs.py +++ b/packages/google-cloud-bigquery/samples/tests/test_client_list_jobs.py @@ -25,7 +25,6 @@ def test_client_list_jobs( capsys: "pytest.CaptureFixture[str]", client: "bigquery.Client" ) -> None: - job = create_job.create_job() client.cancel_job(job.job_id) job.cancel() diff --git a/packages/google-cloud-bigquery/samples/tests/test_client_load_partitioned_table.py b/packages/google-cloud-bigquery/samples/tests/test_client_load_partitioned_table.py index 24f86c700de2..2f6564afa858 100644 --- a/packages/google-cloud-bigquery/samples/tests/test_client_load_partitioned_table.py +++ b/packages/google-cloud-bigquery/samples/tests/test_client_load_partitioned_table.py @@ -23,7 +23,6 @@ def test_client_load_partitioned_table( capsys: "pytest.CaptureFixture[str]", random_table_id: str ) -> None: - client_load_partitioned_table.client_load_partitioned_table(random_table_id) out, err = capsys.readouterr() assert "Loaded 50 rows to table {}".format(random_table_id) in out diff --git a/packages/google-cloud-bigquery/samples/tests/test_client_query.py b/packages/google-cloud-bigquery/samples/tests/test_client_query.py index a8e3c343e202..5d4fb9c948b3 100644 --- a/packages/google-cloud-bigquery/samples/tests/test_client_query.py +++ b/packages/google-cloud-bigquery/samples/tests/test_client_query.py @@ -21,7 +21,6 @@ def test_client_query(capsys: "pytest.CaptureFixture[str]") -> None: - client_query.client_query() out, err = capsys.readouterr() assert "The query data:" in out diff --git a/packages/google-cloud-bigquery/samples/tests/test_client_query_add_column.py b/packages/google-cloud-bigquery/samples/tests/test_client_query_add_column.py index 1eb5a1ed65eb..c80f195a5744 100644 --- a/packages/google-cloud-bigquery/samples/tests/test_client_query_add_column.py +++ b/packages/google-cloud-bigquery/samples/tests/test_client_query_add_column.py @@ -25,7 +25,6 @@ def test_client_query_add_column( capsys: "pytest.CaptureFixture[str]", random_table_id: str, client: bigquery.Client ) -> None: - schema = [ bigquery.SchemaField("full_name", "STRING", mode="REQUIRED"), bigquery.SchemaField("age", "INTEGER", mode="REQUIRED"), diff --git a/packages/google-cloud-bigquery/samples/tests/test_client_query_batch.py b/packages/google-cloud-bigquery/samples/tests/test_client_query_batch.py index 548fe3ac31bc..b1e0e264755a 100644 --- a/packages/google-cloud-bigquery/samples/tests/test_client_query_batch.py +++ b/packages/google-cloud-bigquery/samples/tests/test_client_query_batch.py @@ -21,7 +21,6 @@ def test_client_query_batch(capsys: "pytest.CaptureFixture[str]") -> None: - job = client_query_batch.client_query_batch() out, err = capsys.readouterr() assert "Job {} is currently in state {}".format(job.job_id, job.state) in out diff --git a/packages/google-cloud-bigquery/samples/tests/test_client_query_destination_table.py b/packages/google-cloud-bigquery/samples/tests/test_client_query_destination_table.py index 067bc16ec380..1487f6e65b2b 100644 --- a/packages/google-cloud-bigquery/samples/tests/test_client_query_destination_table.py +++ b/packages/google-cloud-bigquery/samples/tests/test_client_query_destination_table.py @@ -23,7 +23,6 @@ def test_client_query_destination_table( capsys: "pytest.CaptureFixture[str]", table_id: str ) -> None: - client_query_destination_table.client_query_destination_table(table_id) out, err = capsys.readouterr() assert "Query results loaded to the table {}".format(table_id) in out diff --git a/packages/google-cloud-bigquery/samples/tests/test_client_query_destination_table_clustered.py b/packages/google-cloud-bigquery/samples/tests/test_client_query_destination_table_clustered.py index 02b1315319c5..8a1e5bcd4dff 100644 --- a/packages/google-cloud-bigquery/samples/tests/test_client_query_destination_table_clustered.py +++ b/packages/google-cloud-bigquery/samples/tests/test_client_query_destination_table_clustered.py @@ -23,7 +23,6 @@ def test_client_query_destination_table_clustered( capsys: "pytest.CaptureFixture[str]", random_table_id: str ) -> None: - client_query_destination_table_clustered.client_query_destination_table_clustered( random_table_id ) diff --git a/packages/google-cloud-bigquery/samples/tests/test_client_query_destination_table_cmek.py b/packages/google-cloud-bigquery/samples/tests/test_client_query_destination_table_cmek.py index f2fe3bc39699..4cb76be8eae7 100644 --- a/packages/google-cloud-bigquery/samples/tests/test_client_query_destination_table_cmek.py +++ b/packages/google-cloud-bigquery/samples/tests/test_client_query_destination_table_cmek.py @@ -23,7 +23,6 @@ def test_client_query_destination_table_cmek( capsys: "pytest.CaptureFixture[str]", random_table_id: str, kms_key_name: str ) -> None: - client_query_destination_table_cmek.client_query_destination_table_cmek( random_table_id, kms_key_name ) diff --git a/packages/google-cloud-bigquery/samples/tests/test_client_query_destination_table_legacy.py b/packages/google-cloud-bigquery/samples/tests/test_client_query_destination_table_legacy.py index 0071ee4a4681..78a199beac4e 100644 --- a/packages/google-cloud-bigquery/samples/tests/test_client_query_destination_table_legacy.py +++ b/packages/google-cloud-bigquery/samples/tests/test_client_query_destination_table_legacy.py @@ -23,7 +23,6 @@ def test_client_query_destination_table_legacy( capsys: "pytest.CaptureFixture[str]", random_table_id: str ) -> None: - client_query_destination_table_legacy.client_query_destination_table_legacy( random_table_id ) diff --git a/packages/google-cloud-bigquery/samples/tests/test_client_query_dry_run.py b/packages/google-cloud-bigquery/samples/tests/test_client_query_dry_run.py index cffb152efb3d..cfc8100a1ae4 100644 --- a/packages/google-cloud-bigquery/samples/tests/test_client_query_dry_run.py +++ b/packages/google-cloud-bigquery/samples/tests/test_client_query_dry_run.py @@ -21,7 +21,6 @@ def test_client_query_dry_run(capsys: "pytest.CaptureFixture[str]") -> None: - query_job = client_query_dry_run.client_query_dry_run() out, err = capsys.readouterr() assert "This query will process" in out diff --git a/packages/google-cloud-bigquery/samples/tests/test_client_query_legacy_sql.py b/packages/google-cloud-bigquery/samples/tests/test_client_query_legacy_sql.py index b12b5a9344b4..98303cde9314 100644 --- a/packages/google-cloud-bigquery/samples/tests/test_client_query_legacy_sql.py +++ b/packages/google-cloud-bigquery/samples/tests/test_client_query_legacy_sql.py @@ -22,7 +22,6 @@ def test_client_query_legacy_sql(capsys: "pytest.CaptureFixture[str]") -> None: - client_query_legacy_sql.client_query_legacy_sql() out, err = capsys.readouterr() assert re.search(r"(Row[\w(){}:', ]+)$", out) diff --git a/packages/google-cloud-bigquery/samples/tests/test_client_query_relax_column.py b/packages/google-cloud-bigquery/samples/tests/test_client_query_relax_column.py index 93fa0f3cf39c..0df8463bed50 100644 --- a/packages/google-cloud-bigquery/samples/tests/test_client_query_relax_column.py +++ b/packages/google-cloud-bigquery/samples/tests/test_client_query_relax_column.py @@ -27,7 +27,6 @@ def test_client_query_relax_column( random_table_id: str, client: bigquery.Client, ) -> None: - schema = [ bigquery.SchemaField("full_name", "STRING", mode="REQUIRED"), bigquery.SchemaField("age", "INTEGER", mode="REQUIRED"), diff --git a/packages/google-cloud-bigquery/samples/tests/test_client_query_w_array_params.py b/packages/google-cloud-bigquery/samples/tests/test_client_query_w_array_params.py index fcd3f69722b4..c302712fee28 100644 --- a/packages/google-cloud-bigquery/samples/tests/test_client_query_w_array_params.py +++ b/packages/google-cloud-bigquery/samples/tests/test_client_query_w_array_params.py @@ -21,7 +21,6 @@ def test_client_query_w_array_params(capsys: "pytest.CaptureFixture[str]") -> None: - client_query_w_array_params.client_query_w_array_params() out, err = capsys.readouterr() assert "James" in out diff --git a/packages/google-cloud-bigquery/samples/tests/test_client_query_w_named_params.py b/packages/google-cloud-bigquery/samples/tests/test_client_query_w_named_params.py index 85ef1dc4adda..e4d66be4181f 100644 --- a/packages/google-cloud-bigquery/samples/tests/test_client_query_w_named_params.py +++ b/packages/google-cloud-bigquery/samples/tests/test_client_query_w_named_params.py @@ -21,7 +21,6 @@ def test_client_query_w_named_params(capsys: "pytest.CaptureFixture[str]") -> None: - client_query_w_named_params.client_query_w_named_params() out, err = capsys.readouterr() assert "the" in out diff --git a/packages/google-cloud-bigquery/samples/tests/test_client_query_w_positional_params.py b/packages/google-cloud-bigquery/samples/tests/test_client_query_w_positional_params.py index 8ade676ab94f..61df76aaae07 100644 --- a/packages/google-cloud-bigquery/samples/tests/test_client_query_w_positional_params.py +++ b/packages/google-cloud-bigquery/samples/tests/test_client_query_w_positional_params.py @@ -21,7 +21,6 @@ def test_client_query_w_positional_params(capsys: "pytest.CaptureFixture[str]") -> None: - client_query_w_positional_params.client_query_w_positional_params() out, err = capsys.readouterr() assert "the" in out diff --git a/packages/google-cloud-bigquery/samples/tests/test_client_query_w_struct_params.py b/packages/google-cloud-bigquery/samples/tests/test_client_query_w_struct_params.py index 3198dbad51c4..5eea993ced9b 100644 --- a/packages/google-cloud-bigquery/samples/tests/test_client_query_w_struct_params.py +++ b/packages/google-cloud-bigquery/samples/tests/test_client_query_w_struct_params.py @@ -21,7 +21,6 @@ def test_client_query_w_struct_params(capsys: "pytest.CaptureFixture[str]") -> None: - client_query_w_struct_params.client_query_w_struct_params() out, err = capsys.readouterr() assert "1" in out diff --git a/packages/google-cloud-bigquery/samples/tests/test_client_query_w_timestamp_params.py b/packages/google-cloud-bigquery/samples/tests/test_client_query_w_timestamp_params.py index a3bbccdd4360..8147d4a96535 100644 --- a/packages/google-cloud-bigquery/samples/tests/test_client_query_w_timestamp_params.py +++ b/packages/google-cloud-bigquery/samples/tests/test_client_query_w_timestamp_params.py @@ -21,7 +21,6 @@ def test_client_query_w_timestamp_params(capsys: "pytest.CaptureFixture[str]") -> None: - client_query_w_timestamp_params.client_query_w_timestamp_params() out, err = capsys.readouterr() assert "2016, 12, 7, 9, 0" in out diff --git a/packages/google-cloud-bigquery/samples/tests/test_copy_table_multiple_source.py b/packages/google-cloud-bigquery/samples/tests/test_copy_table_multiple_source.py index e8b27d2a98cc..5d7991c91af7 100644 --- a/packages/google-cloud-bigquery/samples/tests/test_copy_table_multiple_source.py +++ b/packages/google-cloud-bigquery/samples/tests/test_copy_table_multiple_source.py @@ -29,7 +29,6 @@ def test_copy_table_multiple_source( random_dataset_id: str, client: bigquery.Client, ) -> None: - dataset = bigquery.Dataset(random_dataset_id) dataset.location = "US" dataset = client.create_dataset(dataset) diff --git a/packages/google-cloud-bigquery/samples/tests/test_create_dataset.py b/packages/google-cloud-bigquery/samples/tests/test_create_dataset.py index e7a897f8f932..ecf5ef129e73 100644 --- a/packages/google-cloud-bigquery/samples/tests/test_create_dataset.py +++ b/packages/google-cloud-bigquery/samples/tests/test_create_dataset.py @@ -23,7 +23,6 @@ def test_create_dataset( capsys: "pytest.CaptureFixture[str]", random_dataset_id: str ) -> None: - create_dataset.create_dataset(random_dataset_id) out, err = capsys.readouterr() assert "Created dataset {}".format(random_dataset_id) in out diff --git a/packages/google-cloud-bigquery/samples/tests/test_dataset_exists.py b/packages/google-cloud-bigquery/samples/tests/test_dataset_exists.py index bfef4368f8ea..744122e370cf 100644 --- a/packages/google-cloud-bigquery/samples/tests/test_dataset_exists.py +++ b/packages/google-cloud-bigquery/samples/tests/test_dataset_exists.py @@ -27,7 +27,6 @@ def test_dataset_exists( random_dataset_id: str, client: bigquery.Client, ) -> None: - dataset_exists.dataset_exists(random_dataset_id) out, err = capsys.readouterr() assert "Dataset {} is not found".format(random_dataset_id) in out diff --git a/packages/google-cloud-bigquery/samples/tests/test_dataset_label_samples.py b/packages/google-cloud-bigquery/samples/tests/test_dataset_label_samples.py index 75a024856635..ec9ff922847d 100644 --- a/packages/google-cloud-bigquery/samples/tests/test_dataset_label_samples.py +++ b/packages/google-cloud-bigquery/samples/tests/test_dataset_label_samples.py @@ -25,7 +25,6 @@ def test_dataset_label_samples( capsys: "pytest.CaptureFixture[str]", dataset_id: str ) -> None: - label_dataset.label_dataset(dataset_id) out, err = capsys.readouterr() assert "Labels added to {}".format(dataset_id) in out diff --git a/packages/google-cloud-bigquery/samples/tests/test_delete_dataset.py b/packages/google-cloud-bigquery/samples/tests/test_delete_dataset.py index 9347bf185de5..c2a77c475246 100644 --- a/packages/google-cloud-bigquery/samples/tests/test_delete_dataset.py +++ b/packages/google-cloud-bigquery/samples/tests/test_delete_dataset.py @@ -21,7 +21,6 @@ def test_delete_dataset(capsys: "pytest.CaptureFixture[str]", dataset_id: str) -> None: - delete_dataset.delete_dataset(dataset_id) out, err = capsys.readouterr() assert "Deleted dataset '{}'.".format(dataset_id) in out diff --git a/packages/google-cloud-bigquery/samples/tests/test_delete_table.py b/packages/google-cloud-bigquery/samples/tests/test_delete_table.py index aca2df62faf3..5ba5622e8618 100644 --- a/packages/google-cloud-bigquery/samples/tests/test_delete_table.py +++ b/packages/google-cloud-bigquery/samples/tests/test_delete_table.py @@ -21,7 +21,6 @@ def test_delete_table(capsys: "pytest.CaptureFixture[str]", table_id: str) -> None: - delete_table.delete_table(table_id) out, err = capsys.readouterr() assert "Deleted table '{}'.".format(table_id) in out diff --git a/packages/google-cloud-bigquery/samples/tests/test_get_dataset.py b/packages/google-cloud-bigquery/samples/tests/test_get_dataset.py index 97b30541b93d..07c7a28b763d 100644 --- a/packages/google-cloud-bigquery/samples/tests/test_get_dataset.py +++ b/packages/google-cloud-bigquery/samples/tests/test_get_dataset.py @@ -21,7 +21,6 @@ def test_get_dataset(capsys: "pytest.CaptureFixture[str]", dataset_id: str) -> None: - get_dataset.get_dataset(dataset_id) out, err = capsys.readouterr() assert dataset_id in out diff --git a/packages/google-cloud-bigquery/samples/tests/test_get_table.py b/packages/google-cloud-bigquery/samples/tests/test_get_table.py index e6383010fec5..edf09762d985 100644 --- a/packages/google-cloud-bigquery/samples/tests/test_get_table.py +++ b/packages/google-cloud-bigquery/samples/tests/test_get_table.py @@ -25,7 +25,6 @@ def test_get_table( capsys: "pytest.CaptureFixture[str]", random_table_id: str, client: bigquery.Client ) -> None: - schema = [ bigquery.SchemaField("full_name", "STRING", mode="REQUIRED"), bigquery.SchemaField("age", "INTEGER", mode="REQUIRED"), diff --git a/packages/google-cloud-bigquery/samples/tests/test_list_tables.py b/packages/google-cloud-bigquery/samples/tests/test_list_tables.py index 7c726accc306..c8a66b656eb9 100644 --- a/packages/google-cloud-bigquery/samples/tests/test_list_tables.py +++ b/packages/google-cloud-bigquery/samples/tests/test_list_tables.py @@ -23,7 +23,6 @@ def test_list_tables( capsys: "pytest.CaptureFixture[str]", dataset_id: str, table_id: str ) -> None: - list_tables.list_tables(dataset_id) out, err = capsys.readouterr() assert "Tables contained in '{}':".format(dataset_id) in out diff --git a/packages/google-cloud-bigquery/samples/tests/test_load_table_clustered.py b/packages/google-cloud-bigquery/samples/tests/test_load_table_clustered.py index bbf3c671f966..89059271a28c 100644 --- a/packages/google-cloud-bigquery/samples/tests/test_load_table_clustered.py +++ b/packages/google-cloud-bigquery/samples/tests/test_load_table_clustered.py @@ -26,7 +26,6 @@ def test_load_table_clustered( random_table_id: str, client: "bigquery.Client", ) -> None: - table = load_table_clustered.load_table_clustered(random_table_id) out, _ = capsys.readouterr() diff --git a/packages/google-cloud-bigquery/samples/tests/test_load_table_dataframe.py b/packages/google-cloud-bigquery/samples/tests/test_load_table_dataframe.py index 9a975493c54d..4aa872fa42bf 100644 --- a/packages/google-cloud-bigquery/samples/tests/test_load_table_dataframe.py +++ b/packages/google-cloud-bigquery/samples/tests/test_load_table_dataframe.py @@ -31,7 +31,6 @@ def test_load_table_dataframe( client: "bigquery.Client", random_table_id: str, ) -> None: - table = load_table_dataframe.load_table_dataframe(random_table_id) out, _ = capsys.readouterr() expected_column_names = [ diff --git a/packages/google-cloud-bigquery/samples/tests/test_load_table_uri_autodetect_csv.py b/packages/google-cloud-bigquery/samples/tests/test_load_table_uri_autodetect_csv.py index c9b410850fb4..46b5937139eb 100644 --- a/packages/google-cloud-bigquery/samples/tests/test_load_table_uri_autodetect_csv.py +++ b/packages/google-cloud-bigquery/samples/tests/test_load_table_uri_autodetect_csv.py @@ -23,7 +23,6 @@ def test_load_table_uri_autodetect_csv( capsys: "pytest.CaptureFixture[str]", random_table_id: str ) -> None: - load_table_uri_autodetect_csv.load_table_uri_autodetect_csv(random_table_id) out, err = capsys.readouterr() assert "Loaded 50 rows." in out diff --git a/packages/google-cloud-bigquery/samples/tests/test_load_table_uri_autodetect_json.py b/packages/google-cloud-bigquery/samples/tests/test_load_table_uri_autodetect_json.py index 2c68a13db5a5..43bf4e1b397f 100644 --- a/packages/google-cloud-bigquery/samples/tests/test_load_table_uri_autodetect_json.py +++ b/packages/google-cloud-bigquery/samples/tests/test_load_table_uri_autodetect_json.py @@ -23,7 +23,6 @@ def test_load_table_uri_autodetect_csv( capsys: "pytest.CaptureFixture[str]", random_table_id: str ) -> None: - load_table_uri_autodetect_json.load_table_uri_autodetect_json(random_table_id) out, err = capsys.readouterr() assert "Loaded 50 rows." in out diff --git a/packages/google-cloud-bigquery/samples/tests/test_load_table_uri_cmek.py b/packages/google-cloud-bigquery/samples/tests/test_load_table_uri_cmek.py index 1eb8738431b3..1ae8689f975d 100644 --- a/packages/google-cloud-bigquery/samples/tests/test_load_table_uri_cmek.py +++ b/packages/google-cloud-bigquery/samples/tests/test_load_table_uri_cmek.py @@ -23,7 +23,6 @@ def test_load_table_uri_cmek( capsys: "pytest.CaptureFixture[str]", random_table_id: str, kms_key_name: str ) -> None: - load_table_uri_cmek.load_table_uri_cmek(random_table_id, kms_key_name) out, _ = capsys.readouterr() assert "A table loaded with encryption configuration key" in out diff --git a/packages/google-cloud-bigquery/samples/tests/test_load_table_uri_csv.py b/packages/google-cloud-bigquery/samples/tests/test_load_table_uri_csv.py index a57224c84a1b..8b4c733e8f1d 100644 --- a/packages/google-cloud-bigquery/samples/tests/test_load_table_uri_csv.py +++ b/packages/google-cloud-bigquery/samples/tests/test_load_table_uri_csv.py @@ -23,7 +23,6 @@ def test_load_table_uri_csv( capsys: "pytest.CaptureFixture[str]", random_table_id: str ) -> None: - load_table_uri_csv.load_table_uri_csv(random_table_id) out, _ = capsys.readouterr() assert "Loaded 50 rows." in out diff --git a/packages/google-cloud-bigquery/samples/tests/test_load_table_uri_json.py b/packages/google-cloud-bigquery/samples/tests/test_load_table_uri_json.py index 3ad0ce29bf6c..751c3867acf2 100644 --- a/packages/google-cloud-bigquery/samples/tests/test_load_table_uri_json.py +++ b/packages/google-cloud-bigquery/samples/tests/test_load_table_uri_json.py @@ -23,7 +23,6 @@ def test_load_table_uri_json( capsys: "pytest.CaptureFixture[str]", random_table_id: str ) -> None: - load_table_uri_json.load_table_uri_json(random_table_id) out, _ = capsys.readouterr() assert "Loaded 50 rows." in out diff --git a/packages/google-cloud-bigquery/samples/tests/test_load_table_uri_orc.py b/packages/google-cloud-bigquery/samples/tests/test_load_table_uri_orc.py index f31e8cabb66c..23d8288b7310 100644 --- a/packages/google-cloud-bigquery/samples/tests/test_load_table_uri_orc.py +++ b/packages/google-cloud-bigquery/samples/tests/test_load_table_uri_orc.py @@ -23,7 +23,6 @@ def test_load_table_uri_orc( capsys: "pytest.CaptureFixture[str]", random_table_id: str ) -> None: - load_table_uri_orc.load_table_uri_orc(random_table_id) out, _ = capsys.readouterr() assert "Loaded 50 rows." in out diff --git a/packages/google-cloud-bigquery/samples/tests/test_load_table_uri_parquet.py b/packages/google-cloud-bigquery/samples/tests/test_load_table_uri_parquet.py index 5404e8584e56..ee7682388cc7 100644 --- a/packages/google-cloud-bigquery/samples/tests/test_load_table_uri_parquet.py +++ b/packages/google-cloud-bigquery/samples/tests/test_load_table_uri_parquet.py @@ -23,7 +23,6 @@ def test_load_table_uri_json( capsys: "pytest.CaptureFixture[str]", random_table_id: str ) -> None: - load_table_uri_parquet.load_table_uri_parquet(random_table_id) out, _ = capsys.readouterr() assert "Loaded 50 rows." in out diff --git a/packages/google-cloud-bigquery/samples/tests/test_query_external_gcs_temporary_table.py b/packages/google-cloud-bigquery/samples/tests/test_query_external_gcs_temporary_table.py index 9590f3d7a45e..75b3ce6d806b 100644 --- a/packages/google-cloud-bigquery/samples/tests/test_query_external_gcs_temporary_table.py +++ b/packages/google-cloud-bigquery/samples/tests/test_query_external_gcs_temporary_table.py @@ -23,7 +23,6 @@ def test_query_external_gcs_temporary_table( capsys: "pytest.CaptureFixture[str]", ) -> None: - query_external_gcs_temporary_table.query_external_gcs_temporary_table() out, err = capsys.readouterr() assert "There are 4 states with names starting with W." in out diff --git a/packages/google-cloud-bigquery/samples/tests/test_query_external_sheets_permanent_table.py b/packages/google-cloud-bigquery/samples/tests/test_query_external_sheets_permanent_table.py index 851839054010..1a4c21330a5c 100644 --- a/packages/google-cloud-bigquery/samples/tests/test_query_external_sheets_permanent_table.py +++ b/packages/google-cloud-bigquery/samples/tests/test_query_external_sheets_permanent_table.py @@ -23,7 +23,6 @@ def test_query_external_sheets_permanent_table( capsys: "pytest.CaptureFixture[str]", dataset_id: str ) -> None: - query_external_sheets_permanent_table.query_external_sheets_permanent_table( dataset_id ) diff --git a/packages/google-cloud-bigquery/samples/tests/test_query_external_sheets_temporary_table.py b/packages/google-cloud-bigquery/samples/tests/test_query_external_sheets_temporary_table.py index 58e0cb3948c9..2ada205663ce 100644 --- a/packages/google-cloud-bigquery/samples/tests/test_query_external_sheets_temporary_table.py +++ b/packages/google-cloud-bigquery/samples/tests/test_query_external_sheets_temporary_table.py @@ -23,7 +23,6 @@ def test_query_external_sheets_temporary_table( capsys: "pytest.CaptureFixture[str]", ) -> None: - query_external_sheets_temporary_table.query_external_sheets_temporary_table() out, err = capsys.readouterr() assert "There are 2 states with names starting with W in the selected range." in out diff --git a/packages/google-cloud-bigquery/samples/tests/test_query_no_cache.py b/packages/google-cloud-bigquery/samples/tests/test_query_no_cache.py index f3fb039c932d..fffa5dac7cad 100644 --- a/packages/google-cloud-bigquery/samples/tests/test_query_no_cache.py +++ b/packages/google-cloud-bigquery/samples/tests/test_query_no_cache.py @@ -22,7 +22,6 @@ def test_query_no_cache(capsys: "pytest.CaptureFixture[str]") -> None: - query_no_cache.query_no_cache() out, err = capsys.readouterr() assert re.search(r"(Row[\w(){}:', ]+)$", out) diff --git a/packages/google-cloud-bigquery/samples/tests/test_query_pagination.py b/packages/google-cloud-bigquery/samples/tests/test_query_pagination.py index daf711e49113..adc9463999e5 100644 --- a/packages/google-cloud-bigquery/samples/tests/test_query_pagination.py +++ b/packages/google-cloud-bigquery/samples/tests/test_query_pagination.py @@ -21,7 +21,6 @@ def test_query_pagination(capsys: "pytest.CaptureFixture[str]") -> None: - query_pagination.query_pagination() out, _ = capsys.readouterr() assert "The query data:" in out diff --git a/packages/google-cloud-bigquery/samples/tests/test_query_script.py b/packages/google-cloud-bigquery/samples/tests/test_query_script.py index 98dd1253bf40..50c9730246b8 100644 --- a/packages/google-cloud-bigquery/samples/tests/test_query_script.py +++ b/packages/google-cloud-bigquery/samples/tests/test_query_script.py @@ -21,7 +21,6 @@ def test_query_script(capsys: "pytest.CaptureFixture[str]") -> None: - query_script.query_script() out, _ = capsys.readouterr() assert "Script created 2 child jobs." in out diff --git a/packages/google-cloud-bigquery/samples/tests/test_query_to_arrow.py b/packages/google-cloud-bigquery/samples/tests/test_query_to_arrow.py index d9b1aeb73eb5..9fc8571e9ecb 100644 --- a/packages/google-cloud-bigquery/samples/tests/test_query_to_arrow.py +++ b/packages/google-cloud-bigquery/samples/tests/test_query_to_arrow.py @@ -20,7 +20,6 @@ def test_query_to_arrow(capsys: "pytest.CaptureFixture[str]") -> None: - arrow_table = query_to_arrow.query_to_arrow() out, err = capsys.readouterr() assert "Downloaded 8 rows, 2 columns." in out diff --git a/packages/google-cloud-bigquery/samples/tests/test_table_exists.py b/packages/google-cloud-bigquery/samples/tests/test_table_exists.py index 7317ba747bc4..35cf61cc8951 100644 --- a/packages/google-cloud-bigquery/samples/tests/test_table_exists.py +++ b/packages/google-cloud-bigquery/samples/tests/test_table_exists.py @@ -25,7 +25,6 @@ def test_table_exists( capsys: "pytest.CaptureFixture[str]", random_table_id: str, client: bigquery.Client ) -> None: - table_exists.table_exists(random_table_id) out, err = capsys.readouterr() assert "Table {} is not found.".format(random_table_id) in out diff --git a/packages/google-cloud-bigquery/samples/tests/test_table_insert_rows.py b/packages/google-cloud-bigquery/samples/tests/test_table_insert_rows.py index 59024fa959d1..13400d69cd54 100644 --- a/packages/google-cloud-bigquery/samples/tests/test_table_insert_rows.py +++ b/packages/google-cloud-bigquery/samples/tests/test_table_insert_rows.py @@ -27,7 +27,6 @@ def test_table_insert_rows( random_table_id: str, client: bigquery.Client, ) -> None: - schema = [ bigquery.SchemaField("full_name", "STRING", mode="REQUIRED"), bigquery.SchemaField("age", "INTEGER", mode="REQUIRED"), diff --git a/packages/google-cloud-bigquery/samples/tests/test_table_insert_rows_explicit_none_insert_ids.py b/packages/google-cloud-bigquery/samples/tests/test_table_insert_rows_explicit_none_insert_ids.py index 00456ce84e41..c6bfbf392753 100644 --- a/packages/google-cloud-bigquery/samples/tests/test_table_insert_rows_explicit_none_insert_ids.py +++ b/packages/google-cloud-bigquery/samples/tests/test_table_insert_rows_explicit_none_insert_ids.py @@ -25,7 +25,6 @@ def test_table_insert_rows_explicit_none_insert_ids( capsys: "pytest.CaptureFixture[str]", random_table_id: str, client: bigquery.Client ) -> None: - schema = [ bigquery.SchemaField("full_name", "STRING", mode="REQUIRED"), bigquery.SchemaField("age", "INTEGER", mode="REQUIRED"), diff --git a/packages/google-cloud-bigquery/samples/tests/test_update_dataset_access.py b/packages/google-cloud-bigquery/samples/tests/test_update_dataset_access.py index 186a3b5757b5..f17634fb0a21 100644 --- a/packages/google-cloud-bigquery/samples/tests/test_update_dataset_access.py +++ b/packages/google-cloud-bigquery/samples/tests/test_update_dataset_access.py @@ -23,7 +23,6 @@ def test_update_dataset_access( capsys: "pytest.CaptureFixture[str]", dataset_id: str ) -> None: - update_dataset_access.update_dataset_access(dataset_id) out, err = capsys.readouterr() assert ( diff --git a/packages/google-cloud-bigquery/samples/tests/test_update_dataset_default_partition_expiration.py b/packages/google-cloud-bigquery/samples/tests/test_update_dataset_default_partition_expiration.py index b7787dde32a0..4dd0d9296c8e 100644 --- a/packages/google-cloud-bigquery/samples/tests/test_update_dataset_default_partition_expiration.py +++ b/packages/google-cloud-bigquery/samples/tests/test_update_dataset_default_partition_expiration.py @@ -23,7 +23,6 @@ def test_update_dataset_default_partition_expiration( capsys: "pytest.CaptureFixture[str]", dataset_id: str ) -> None: - ninety_days_ms = 90 * 24 * 60 * 60 * 1000 # in milliseconds update_dataset_default_partition_expiration.update_dataset_default_partition_expiration( diff --git a/packages/google-cloud-bigquery/samples/tests/test_update_dataset_default_table_expiration.py b/packages/google-cloud-bigquery/samples/tests/test_update_dataset_default_table_expiration.py index f780827f27b9..24df5446d712 100644 --- a/packages/google-cloud-bigquery/samples/tests/test_update_dataset_default_table_expiration.py +++ b/packages/google-cloud-bigquery/samples/tests/test_update_dataset_default_table_expiration.py @@ -23,7 +23,6 @@ def test_update_dataset_default_table_expiration( capsys: "pytest.CaptureFixture[str]", dataset_id: str ) -> None: - one_day_ms = 24 * 60 * 60 * 1000 # in milliseconds update_dataset_default_table_expiration.update_dataset_default_table_expiration( diff --git a/packages/google-cloud-bigquery/samples/tests/test_update_dataset_description.py b/packages/google-cloud-bigquery/samples/tests/test_update_dataset_description.py index 5d1209e22d61..6d76337dccc2 100644 --- a/packages/google-cloud-bigquery/samples/tests/test_update_dataset_description.py +++ b/packages/google-cloud-bigquery/samples/tests/test_update_dataset_description.py @@ -23,7 +23,6 @@ def test_update_dataset_description( capsys: "pytest.CaptureFixture[str]", dataset_id: str ) -> None: - update_dataset_description.update_dataset_description(dataset_id) out, err = capsys.readouterr() assert "Updated description." in out diff --git a/packages/google-cloud-bigquery/samples/tests/test_update_table_require_partition_filter.py b/packages/google-cloud-bigquery/samples/tests/test_update_table_require_partition_filter.py index 68e1c1e2bd79..c86a227691b4 100644 --- a/packages/google-cloud-bigquery/samples/tests/test_update_table_require_partition_filter.py +++ b/packages/google-cloud-bigquery/samples/tests/test_update_table_require_partition_filter.py @@ -27,7 +27,6 @@ def test_update_table_require_partition_filter( random_table_id: str, client: bigquery.Client, ) -> None: - # Make a partitioned table. schema = [bigquery.SchemaField("transaction_timestamp", "TIMESTAMP")] table = bigquery.Table(random_table_id, schema=schema) diff --git a/packages/google-cloud-bigquery/samples/update_dataset_access.py b/packages/google-cloud-bigquery/samples/update_dataset_access.py index fda784da5d13..2fb21aff27b9 100644 --- a/packages/google-cloud-bigquery/samples/update_dataset_access.py +++ b/packages/google-cloud-bigquery/samples/update_dataset_access.py @@ -14,7 +14,6 @@ def update_dataset_access(dataset_id: str) -> None: - # [START bigquery_update_dataset_access] from google.cloud import bigquery diff --git a/packages/google-cloud-bigquery/samples/update_dataset_default_partition_expiration.py b/packages/google-cloud-bigquery/samples/update_dataset_default_partition_expiration.py index 37456f3a0bfe..7a3ccaca39d2 100644 --- a/packages/google-cloud-bigquery/samples/update_dataset_default_partition_expiration.py +++ b/packages/google-cloud-bigquery/samples/update_dataset_default_partition_expiration.py @@ -14,7 +14,6 @@ def update_dataset_default_partition_expiration(dataset_id: str) -> None: - # [START bigquery_update_dataset_partition_expiration] from google.cloud import bigquery diff --git a/packages/google-cloud-bigquery/samples/update_dataset_default_table_expiration.py b/packages/google-cloud-bigquery/samples/update_dataset_default_table_expiration.py index cf6f50d9fccd..ccd0d979ec4d 100644 --- a/packages/google-cloud-bigquery/samples/update_dataset_default_table_expiration.py +++ b/packages/google-cloud-bigquery/samples/update_dataset_default_table_expiration.py @@ -14,7 +14,6 @@ def update_dataset_default_table_expiration(dataset_id: str) -> None: - # [START bigquery_update_dataset_expiration] from google.cloud import bigquery diff --git a/packages/google-cloud-bigquery/samples/update_dataset_description.py b/packages/google-cloud-bigquery/samples/update_dataset_description.py index 98c5fed432c2..b12baa99905c 100644 --- a/packages/google-cloud-bigquery/samples/update_dataset_description.py +++ b/packages/google-cloud-bigquery/samples/update_dataset_description.py @@ -14,7 +14,6 @@ def update_dataset_description(dataset_id: str) -> None: - # [START bigquery_update_dataset_description] from google.cloud import bigquery diff --git a/packages/google-cloud-bigquery/samples/update_routine.py b/packages/google-cloud-bigquery/samples/update_routine.py index 1a975a253de6..1a8908295360 100644 --- a/packages/google-cloud-bigquery/samples/update_routine.py +++ b/packages/google-cloud-bigquery/samples/update_routine.py @@ -19,7 +19,6 @@ def update_routine(routine_id: str) -> "bigquery.Routine": - # [START bigquery_update_routine] from google.cloud import bigquery diff --git a/packages/google-cloud-bigquery/samples/update_table_require_partition_filter.py b/packages/google-cloud-bigquery/samples/update_table_require_partition_filter.py index 8221238a73e6..40b739b76ec5 100644 --- a/packages/google-cloud-bigquery/samples/update_table_require_partition_filter.py +++ b/packages/google-cloud-bigquery/samples/update_table_require_partition_filter.py @@ -14,7 +14,6 @@ def update_table_require_partition_filter(table_id: str) -> None: - # [START bigquery_update_table_require_partition_filter] from google.cloud import bigquery diff --git a/packages/google-cloud-bigquery/tests/system/test_client.py b/packages/google-cloud-bigquery/tests/system/test_client.py index 8fd532f4cebf..d3b95ec49a1d 100644 --- a/packages/google-cloud-bigquery/tests/system/test_client.py +++ b/packages/google-cloud-bigquery/tests/system/test_client.py @@ -2319,7 +2319,6 @@ def _table_exists(t): def test_dbapi_create_view(dataset_id: str): - query = f""" CREATE VIEW {dataset_id}.dbapi_create_view AS SELECT name, SUM(number) AS total diff --git a/packages/google-cloud-bigquery/tests/unit/job/test_query.py b/packages/google-cloud-bigquery/tests/unit/job/test_query.py index 7d3186d47318..26f1f2a738a0 100644 --- a/packages/google-cloud-bigquery/tests/unit/job/test_query.py +++ b/packages/google-cloud-bigquery/tests/unit/job/test_query.py @@ -54,7 +54,6 @@ def _make_resource(self, started=False, ended=False, location="US"): return resource def _verifyBooleanResourceProperties(self, job, config): - if "allowLargeResults" in config: self.assertEqual(job.allow_large_results, config["allowLargeResults"]) else: diff --git a/packages/google-cloud-bigquery/tests/unit/routine/test_remote_function_options.py b/packages/google-cloud-bigquery/tests/unit/routine/test_remote_function_options.py index b476dca1ebdb..ffd57e8c1a38 100644 --- a/packages/google-cloud-bigquery/tests/unit/routine/test_remote_function_options.py +++ b/packages/google-cloud-bigquery/tests/unit/routine/test_remote_function_options.py @@ -32,7 +32,6 @@ def target_class(): def test_ctor(target_class): - options = target_class( endpoint=ENDPOINT, connection=CONNECTION, diff --git a/packages/google-cloud-bigquery/tests/unit/test_client.py b/packages/google-cloud-bigquery/tests/unit/test_client.py index faa073dcea73..faa065116f1b 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_client.py +++ b/packages/google-cloud-bigquery/tests/unit/test_client.py @@ -111,7 +111,6 @@ def _make_list_partitons_meta_info(project, dataset_id, table_id, num_rows=0): class TestClient(unittest.TestCase): - PROJECT = "PROJECT" DS_ID = "DATASET_ID" TABLE_ID = "TABLE_ID" @@ -170,7 +169,6 @@ def test_ctor_w_empty_client_options(self): ) def test_ctor_w_client_options_dict(self): - creds = _make_credentials() http = object() client_options = {"api_endpoint": "https://www.foo-googleapis.com"} diff --git a/packages/google-cloud-bigquery/tests/unit/test_dataset.py b/packages/google-cloud-bigquery/tests/unit/test_dataset.py index 3b1452805e5d..7d7091092eb9 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_dataset.py +++ b/packages/google-cloud-bigquery/tests/unit/test_dataset.py @@ -693,7 +693,6 @@ def _verify_access_entry(self, access_entries, resource): self.assertEqual(a_entry.entity_id, r_entry["entity_id"]) def _verify_readonly_resource_properties(self, dataset, resource): - self.assertEqual(dataset.project, self.PROJECT) self.assertEqual(dataset.dataset_id, self.DS_ID) self.assertEqual(dataset.reference.project, self.PROJECT) @@ -717,7 +716,6 @@ def _verify_readonly_resource_properties(self, dataset, resource): self.assertIsNone(dataset.self_link) def _verify_resource_properties(self, dataset, resource): - self._verify_readonly_resource_properties(dataset, resource) if "defaultTableExpirationMs" in resource: diff --git a/packages/google-cloud-bigquery/tests/unit/test_dbapi__helpers.py b/packages/google-cloud-bigquery/tests/unit/test_dbapi__helpers.py index fae0c17e9202..542f923d26b2 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_dbapi__helpers.py +++ b/packages/google-cloud-bigquery/tests/unit/test_dbapi__helpers.py @@ -255,7 +255,6 @@ def test_non_empty_iterable(self): class TestRaiseOnClosedDecorator(unittest.TestCase): def _make_class(self): class Foo(object): - class_member = "class member" def __init__(self): diff --git a/packages/google-cloud-bigquery/tests/unit/test_external_config.py b/packages/google-cloud-bigquery/tests/unit/test_external_config.py index 67fd13fa750e..9fd16e69967c 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_external_config.py +++ b/packages/google-cloud-bigquery/tests/unit/test_external_config.py @@ -21,7 +21,6 @@ class TestExternalConfig(unittest.TestCase): - SOURCE_URIS = ["gs://foo", "gs://bar"] BASE_RESOURCE = { diff --git a/packages/google-cloud-bigquery/tests/unit/test_magics.py b/packages/google-cloud-bigquery/tests/unit/test_magics.py index c0aa5d85e448..70bfc4d0cb2e 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_magics.py +++ b/packages/google-cloud-bigquery/tests/unit/test_magics.py @@ -638,9 +638,9 @@ def test_bigquery_magic_with_bqstorage_from_argument(monkeypatch): google.cloud.bigquery.job.QueryJob, instance=True ) query_job_mock.to_dataframe.return_value = result - with run_query_patch as run_query_mock, bqstorage_client_patch, warnings.catch_warnings( - record=True - ) as warned: + with run_query_patch as run_query_mock, ( + bqstorage_client_patch + ), warnings.catch_warnings(record=True) as warned: run_query_mock.return_value = query_job_mock return_value = ip.run_cell_magic("bigquery", "--use_bqstorage_api", sql) @@ -801,7 +801,9 @@ def test_bigquery_magic_w_max_results_query_job_results_fails(): with pytest.raises( OSError - ), client_query_patch as client_query_mock, default_patch, close_transports_patch as close_transports: + ), client_query_patch as client_query_mock, ( + default_patch + ), close_transports_patch as close_transports: client_query_mock.return_value = query_job_mock ip.run_cell_magic("bigquery", "--max_results=5", sql) diff --git a/packages/google-cloud-bigquery/tests/unit/test_table.py b/packages/google-cloud-bigquery/tests/unit/test_table.py index f31dc5528c8b..a2c82c0a8277 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_table.py +++ b/packages/google-cloud-bigquery/tests/unit/test_table.py @@ -416,7 +416,6 @@ def test___str__(self): class TestTable(unittest.TestCase, _SchemaBase): - PROJECT = "prahj-ekt" DS_ID = "dataset-name" TABLE_NAME = "table-name" @@ -524,7 +523,6 @@ def _verifyReadonlyResourceProperties(self, table, resource): ) def _verifyResourceProperties(self, table, resource): - self._verifyReadonlyResourceProperties(table, resource) if "expirationTime" in resource: @@ -1501,7 +1499,6 @@ def test___str__(self): class Test_row_from_mapping(unittest.TestCase, _SchemaBase): - PROJECT = "prahj-ekt" DS_ID = "dataset-name" TABLE_NAME = "table-name" @@ -1790,7 +1787,6 @@ def _make_table_list_item(*args, **kwargs): return TableListItem(*args, **kwargs) def test_table_eq_table_ref(self): - table = self._make_table("project_foo.dataset_bar.table_baz") dataset_ref = DatasetReference("project_foo", "dataset_bar") table_ref = self._make_table_ref(dataset_ref, "table_baz") @@ -1814,7 +1810,6 @@ def test_table_eq_table_list_item(self): assert table_list_item == table def test_table_ref_eq_table_list_item(self): - dataset_ref = DatasetReference("project_foo", "dataset_bar") table_ref = self._make_table_ref(dataset_ref, "table_baz") table_list_item = self._make_table_list_item( From 5eeeb017915849cb60d4d80eaf6a0a5faf899aca Mon Sep 17 00:00:00 2001 From: Salem <115185670+SalemJorden@users.noreply.github.com> Date: Thu, 12 Oct 2023 10:29:00 -0500 Subject: [PATCH 1651/2016] feat: add `Model.transform_columns` property (#1661) --------- Co-authored-by: Salem Boyland Co-authored-by: Tim Swast --- .../google/cloud/bigquery/model.py | 71 +++++++++++++++++++ packages/google-cloud-bigquery/mypy.ini | 2 +- .../tests/unit/model/test_model.py | 68 ++++++++++++++++++ 3 files changed, 140 insertions(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/model.py b/packages/google-cloud-bigquery/google/cloud/bigquery/model.py index 4d2bc346c92d..45a88ab221e0 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/model.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/model.py @@ -16,6 +16,8 @@ """Define resources for the BigQuery ML Models API.""" +from __future__ import annotations # type: ignore + import copy import datetime import typing @@ -184,6 +186,21 @@ def feature_columns(self) -> Sequence[standard_sql.StandardSqlField]: standard_sql.StandardSqlField.from_api_repr(column) for column in resource ] + @property + def transform_columns(self) -> Sequence[TransformColumn]: + """The input feature columns that were used to train this model. + The output transform columns used to train this model. + + See REST API: + https://cloud.google.com/bigquery/docs/reference/rest/v2/models#transformcolumn + + Read-only. + """ + resources: Sequence[Dict[str, Any]] = typing.cast( + Sequence[Dict[str, Any]], self._properties.get("transformColumns", []) + ) + return [TransformColumn(resource) for resource in resources] + @property def label_columns(self) -> Sequence[standard_sql.StandardSqlField]: """Label columns that were used to train this model. @@ -434,6 +451,60 @@ def __repr__(self): ) +class TransformColumn: + """TransformColumn represents a transform column feature. + + See + https://cloud.google.com/bigquery/docs/reference/rest/v2/models#transformcolumn + + Args: + resource: + A dictionary representing a transform column feature. + """ + + def __init__(self, resource: Dict[str, Any]): + self._properties = resource + + @property + def name(self) -> Optional[str]: + """Name of the column.""" + return self._properties.get("name") + + @property + def type_(self) -> Optional[standard_sql.StandardSqlDataType]: + """Data type of the column after the transform. + + Returns: + Optional[google.cloud.bigquery.standard_sql.StandardSqlDataType]: + Data type of the column. + """ + type_json = self._properties.get("type") + if type_json is None: + return None + return standard_sql.StandardSqlDataType.from_api_repr(type_json) + + @property + def transform_sql(self) -> Optional[str]: + """The SQL expression used in the column transform.""" + return self._properties.get("transformSql") + + @classmethod + def from_api_repr(cls, resource: Dict[str, Any]) -> "TransformColumn": + """Constructs a transform column feature given its API representation + + Args: + resource: + Transform column feature representation from the API + + Returns: + Transform column feature parsed from ``resource``. + """ + this = cls({}) + resource = copy.deepcopy(resource) + this._properties = resource + return this + + def _model_arg_to_model_ref(value, default_project=None): """Helper to convert a string or Model to ModelReference. diff --git a/packages/google-cloud-bigquery/mypy.ini b/packages/google-cloud-bigquery/mypy.ini index 4505b485436b..beaa679a8d2b 100644 --- a/packages/google-cloud-bigquery/mypy.ini +++ b/packages/google-cloud-bigquery/mypy.ini @@ -1,3 +1,3 @@ [mypy] -python_version = 3.6 +python_version = 3.8 namespace_packages = True diff --git a/packages/google-cloud-bigquery/tests/unit/model/test_model.py b/packages/google-cloud-bigquery/tests/unit/model/test_model.py index 1ae988414908..279a954c78ea 100644 --- a/packages/google-cloud-bigquery/tests/unit/model/test_model.py +++ b/packages/google-cloud-bigquery/tests/unit/model/test_model.py @@ -18,7 +18,9 @@ import pytest + import google.cloud._helpers +import google.cloud.bigquery.model KMS_KEY_NAME = "projects/1/locations/us/keyRings/1/cryptoKeys/1" @@ -136,6 +138,7 @@ def test_from_api_repr(target_class): google.cloud._helpers._rfc3339_to_datetime(got.training_runs[2]["startTime"]) == expiration_time ) + assert got.transform_columns == [] def test_from_api_repr_w_minimal_resource(target_class): @@ -293,6 +296,71 @@ def test_feature_columns(object_under_test): assert object_under_test.feature_columns == expected +def test_from_api_repr_w_transform_columns(target_class): + resource = { + "modelReference": { + "projectId": "my-project", + "datasetId": "my_dataset", + "modelId": "my_model", + }, + "transformColumns": [ + { + "name": "transform_name", + "type": {"typeKind": "INT64"}, + "transformSql": "transform_sql", + } + ], + } + got = target_class.from_api_repr(resource) + assert len(got.transform_columns) == 1 + transform_column = got.transform_columns[0] + assert isinstance(transform_column, google.cloud.bigquery.model.TransformColumn) + assert transform_column.name == "transform_name" + + +def test_transform_column_name(): + transform_columns = google.cloud.bigquery.model.TransformColumn( + {"name": "is_female"} + ) + assert transform_columns.name == "is_female" + + +def test_transform_column_transform_sql(): + transform_columns = google.cloud.bigquery.model.TransformColumn( + {"transformSql": "is_female"} + ) + assert transform_columns.transform_sql == "is_female" + + +def test_transform_column_type(): + transform_columns = google.cloud.bigquery.model.TransformColumn( + {"type": {"typeKind": "BOOL"}} + ) + assert transform_columns.type_.type_kind == "BOOL" + + +def test_transform_column_type_none(): + transform_columns = google.cloud.bigquery.model.TransformColumn({}) + assert transform_columns.type_ is None + + +def test_transform_column_from_api_repr_with_unknown_properties(): + transform_column = google.cloud.bigquery.model.TransformColumn.from_api_repr( + { + "name": "is_female", + "type": {"typeKind": "BOOL"}, + "transformSql": "is_female", + "test": "one", + } + ) + assert transform_column._properties == { + "name": "is_female", + "type": {"typeKind": "BOOL"}, + "transformSql": "is_female", + "test": "one", + } + + def test_label_columns(object_under_test): from google.cloud.bigquery import standard_sql From 5024955706e1bd49daccccc583d22cacbd0bf592 Mon Sep 17 00:00:00 2001 From: Lingqing Gan Date: Wed, 18 Oct 2023 12:56:17 -0400 Subject: [PATCH 1652/2016] chore: consolidate PyarrowVersions helpers (#1679) * chore: consolidate pyarrow helpers * complete refactor * consolidate pyarrow version checking usage * add unit tests * fix unit_noextras testing error * fix tests * address comments * fix tests * coverage * accept suggestion Co-authored-by: Tim Swast * address comments --------- Co-authored-by: Tim Swast --- .../google/cloud/bigquery/_helpers.py | 79 +---------- .../google/cloud/bigquery/_pandas_helpers.py | 103 ++------------- .../google/cloud/bigquery/_pyarrow_helpers.py | 123 ++++++++++++++++++ .../cloud/bigquery/_versions_helpers.py | 94 +++++++++++++ .../google/cloud/bigquery/client.py | 31 +---- .../google/cloud/bigquery/table.py | 4 +- .../tests/unit/test__helpers.py | 78 +---------- .../tests/unit/test__pandas_helpers.py | 28 ++-- .../tests/unit/test__pyarrow_helpers.py | 38 ++++++ .../tests/unit/test__versions_helpers.py | 62 +++++++++ .../tests/unit/test_client.py | 24 +--- .../tests/unit/test_magics.py | 5 +- .../tests/unit/test_table.py | 17 +-- 13 files changed, 379 insertions(+), 307 deletions(-) create mode 100644 packages/google-cloud-bigquery/google/cloud/bigquery/_pyarrow_helpers.py create mode 100644 packages/google-cloud-bigquery/google/cloud/bigquery/_versions_helpers.py create mode 100644 packages/google-cloud-bigquery/tests/unit/test__pyarrow_helpers.py create mode 100644 packages/google-cloud-bigquery/tests/unit/test__versions_helpers.py diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py b/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py index 014a721a8b58..488766853051 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py @@ -20,7 +20,7 @@ import math import re import os -from typing import Any, Optional, Union +from typing import Optional, Union from dateutil import relativedelta from google.cloud._helpers import UTC # type: ignore @@ -32,10 +32,7 @@ import packaging.version -from google.cloud.bigquery.exceptions import ( - LegacyBigQueryStorageError, - LegacyPyarrowError, -) +from google.cloud.bigquery import exceptions _RFC3339_MICROS_NO_ZULU = "%Y-%m-%dT%H:%M:%S.%f" _TIMEONLY_WO_MICROS = "%H:%M:%S" @@ -57,8 +54,6 @@ _MIN_BQ_STORAGE_VERSION = packaging.version.Version("2.0.0") -_MIN_PYARROW_VERSION = packaging.version.Version("3.0.0") - _BQ_STORAGE_OPTIONAL_READ_SESSION_VERSION = packaging.version.Version("2.6.0") BIGQUERY_EMULATOR_HOST = "BIGQUERY_EMULATOR_HOST" @@ -115,7 +110,7 @@ def verify_version(self): verify the version compatibility at runtime. Raises: - LegacyBigQueryStorageError: + exceptions.LegacyBigQueryStorageError: If the google-cloud-bigquery-storage package is outdated. """ if self.installed_version < _MIN_BQ_STORAGE_VERSION: @@ -123,76 +118,10 @@ def verify_version(self): "Dependency google-cloud-bigquery-storage is outdated, please upgrade " f"it to version >= {_MIN_BQ_STORAGE_VERSION} (version found: {self.installed_version})." ) - raise LegacyBigQueryStorageError(msg) - - -class PyarrowVersions: - """Version comparisons for pyarrow package.""" - - def __init__(self): - self._installed_version = None - - @property - def installed_version(self) -> packaging.version.Version: - """Return the parsed version of pyarrow.""" - if self._installed_version is None: - import pyarrow # type: ignore - - self._installed_version = packaging.version.parse( - # Use 0.0.0, since it is earlier than any released version. - # Legacy versions also have the same property, but - # creating a LegacyVersion has been deprecated. - # https://github.com/pypa/packaging/issues/321 - getattr(pyarrow, "__version__", "0.0.0") - ) - - return self._installed_version - - @property - def use_compliant_nested_type(self) -> bool: - return self.installed_version.major >= 4 - - def try_import(self, raise_if_error: bool = False) -> Any: - """Verify that a recent enough version of pyarrow extra is - installed. - - The function assumes that pyarrow extra is installed, and should thus - be used in places where this assumption holds. - - Because `pip` can install an outdated version of this extra despite the - constraints in `setup.py`, the calling code can use this helper to - verify the version compatibility at runtime. - - Returns: - The ``pyarrow`` module or ``None``. - - Raises: - LegacyPyarrowError: - If the pyarrow package is outdated and ``raise_if_error`` is ``True``. - """ - try: - import pyarrow - except ImportError as exc: # pragma: NO COVER - if raise_if_error: - raise LegacyPyarrowError( - f"pyarrow package not found. Install pyarrow version >= {_MIN_PYARROW_VERSION}." - ) from exc - return None - - if self.installed_version < _MIN_PYARROW_VERSION: - if raise_if_error: - msg = ( - "Dependency pyarrow is outdated, please upgrade " - f"it to version >= {_MIN_PYARROW_VERSION} (version found: {self.installed_version})." - ) - raise LegacyPyarrowError(msg) - return None - - return pyarrow + raise exceptions.LegacyBigQueryStorageError(msg) BQ_STORAGE_VERSIONS = BQStorageVersions() -PYARROW_VERSIONS = PyarrowVersions() def _not_null(value, field): diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/_pandas_helpers.py b/packages/google-cloud-bigquery/google/cloud/bigquery/_pandas_helpers.py index a14dbec9bac0..ea790d6c9393 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/_pandas_helpers.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/_pandas_helpers.py @@ -23,9 +23,9 @@ import warnings from typing import Any, Union -from packaging import version - from google.cloud.bigquery import _helpers +from google.cloud.bigquery import _pyarrow_helpers +from google.cloud.bigquery import _versions_helpers from google.cloud.bigquery import schema try: @@ -49,7 +49,11 @@ db_dtypes_import_exception = exc date_dtype_name = time_dtype_name = "" # Use '' rather than None because pytype -pyarrow = _helpers.PYARROW_VERSIONS.try_import() +pyarrow = _versions_helpers.PYARROW_VERSIONS.try_import() + +_BIGNUMERIC_SUPPORT = False +if pyarrow is not None: + _BIGNUMERIC_SUPPORT = True try: # _BaseGeometry is used to detect shapely objevys in `bq_to_arrow_array` @@ -119,87 +123,6 @@ def __init__(self): self.done = False -def pyarrow_datetime(): - return pyarrow.timestamp("us", tz=None) - - -def pyarrow_numeric(): - return pyarrow.decimal128(38, 9) - - -def pyarrow_bignumeric(): - # 77th digit is partial. - # https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#decimal_types - return pyarrow.decimal256(76, 38) - - -def pyarrow_time(): - return pyarrow.time64("us") - - -def pyarrow_timestamp(): - return pyarrow.timestamp("us", tz="UTC") - - -if pyarrow: - # This dictionary is duplicated in bigquery_storage/test/unite/test_reader.py - # When modifying it be sure to update it there as well. - BQ_TO_ARROW_SCALARS = { - "BOOL": pyarrow.bool_, - "BOOLEAN": pyarrow.bool_, - "BYTES": pyarrow.binary, - "DATE": pyarrow.date32, - "DATETIME": pyarrow_datetime, - "FLOAT": pyarrow.float64, - "FLOAT64": pyarrow.float64, - "GEOGRAPHY": pyarrow.string, - "INT64": pyarrow.int64, - "INTEGER": pyarrow.int64, - "NUMERIC": pyarrow_numeric, - "STRING": pyarrow.string, - "TIME": pyarrow_time, - "TIMESTAMP": pyarrow_timestamp, - } - ARROW_SCALAR_IDS_TO_BQ = { - # https://arrow.apache.org/docs/python/api/datatypes.html#type-classes - pyarrow.bool_().id: "BOOL", - pyarrow.int8().id: "INT64", - pyarrow.int16().id: "INT64", - pyarrow.int32().id: "INT64", - pyarrow.int64().id: "INT64", - pyarrow.uint8().id: "INT64", - pyarrow.uint16().id: "INT64", - pyarrow.uint32().id: "INT64", - pyarrow.uint64().id: "INT64", - pyarrow.float16().id: "FLOAT64", - pyarrow.float32().id: "FLOAT64", - pyarrow.float64().id: "FLOAT64", - pyarrow.time32("ms").id: "TIME", - pyarrow.time64("ns").id: "TIME", - pyarrow.timestamp("ns").id: "TIMESTAMP", - pyarrow.date32().id: "DATE", - pyarrow.date64().id: "DATETIME", # because millisecond resolution - pyarrow.binary().id: "BYTES", - pyarrow.string().id: "STRING", # also alias for pyarrow.utf8() - # The exact scale and precision don't matter, see below. - pyarrow.decimal128(38, scale=9).id: "NUMERIC", - } - - if version.parse(pyarrow.__version__) >= version.parse("3.0.0"): - BQ_TO_ARROW_SCALARS["BIGNUMERIC"] = pyarrow_bignumeric - # The exact decimal's scale and precision are not important, as only - # the type ID matters, and it's the same for all decimal256 instances. - ARROW_SCALAR_IDS_TO_BQ[pyarrow.decimal256(76, scale=38).id] = "BIGNUMERIC" - _BIGNUMERIC_SUPPORT = True - else: - _BIGNUMERIC_SUPPORT = False # pragma: NO COVER - -else: # pragma: NO COVER - BQ_TO_ARROW_SCALARS = {} # pragma: NO COVER - ARROW_SCALAR_IDS_TO_BQ = {} # pragma: NO_COVER - _BIGNUMERIC_SUPPORT = False # pragma: NO COVER - - BQ_FIELD_TYPE_TO_ARROW_FIELD_METADATA = { "GEOGRAPHY": { b"ARROW:extension:name": b"google:sqlType:geography", @@ -240,7 +163,7 @@ def bq_to_arrow_data_type(field): if field_type_upper in schema._STRUCT_TYPES: return bq_to_arrow_struct_data_type(field) - data_type_constructor = BQ_TO_ARROW_SCALARS.get(field_type_upper) + data_type_constructor = _pyarrow_helpers.bq_to_arrow_scalars(field_type_upper) if data_type_constructor is None: return None return data_type_constructor() @@ -568,7 +491,9 @@ def augment_schema(dataframe, current_bq_schema): if pyarrow.types.is_list(arrow_table.type): # `pyarrow.ListType` detected_mode = "REPEATED" - detected_type = ARROW_SCALAR_IDS_TO_BQ.get(arrow_table.values.type.id) + detected_type = _pyarrow_helpers.arrow_scalar_ids_to_bq( + arrow_table.values.type.id + ) # For timezone-naive datetimes, pyarrow assumes the UTC timezone and adds # it to such datetimes, causing them to be recognized as TIMESTAMP type. @@ -584,7 +509,7 @@ def augment_schema(dataframe, current_bq_schema): detected_type = "DATETIME" else: detected_mode = field.mode - detected_type = ARROW_SCALAR_IDS_TO_BQ.get(arrow_table.type.id) + detected_type = _pyarrow_helpers.arrow_scalar_ids_to_bq(arrow_table.type.id) if detected_type is None: unknown_type_fields.append(field) @@ -705,13 +630,13 @@ def dataframe_to_parquet( This argument is ignored for ``pyarrow`` versions earlier than ``4.0.0``. """ - pyarrow = _helpers.PYARROW_VERSIONS.try_import(raise_if_error=True) + pyarrow = _versions_helpers.PYARROW_VERSIONS.try_import(raise_if_error=True) import pyarrow.parquet # type: ignore kwargs = ( {"use_compliant_nested_type": parquet_use_compliant_nested_type} - if _helpers.PYARROW_VERSIONS.use_compliant_nested_type + if _versions_helpers.PYARROW_VERSIONS.use_compliant_nested_type else {} ) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/_pyarrow_helpers.py b/packages/google-cloud-bigquery/google/cloud/bigquery/_pyarrow_helpers.py new file mode 100644 index 000000000000..7266e5e02674 --- /dev/null +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/_pyarrow_helpers.py @@ -0,0 +1,123 @@ +# Copyright 2023 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Shared helper functions for connecting BigQuery and pyarrow.""" + +from typing import Any + +from packaging import version + +try: + import pyarrow # type: ignore +except ImportError: # pragma: NO COVER + pyarrow = None + + +def pyarrow_datetime(): + return pyarrow.timestamp("us", tz=None) + + +def pyarrow_numeric(): + return pyarrow.decimal128(38, 9) + + +def pyarrow_bignumeric(): + # 77th digit is partial. + # https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#decimal_types + return pyarrow.decimal256(76, 38) + + +def pyarrow_time(): + return pyarrow.time64("us") + + +def pyarrow_timestamp(): + return pyarrow.timestamp("us", tz="UTC") + + +_BQ_TO_ARROW_SCALARS = {} +_ARROW_SCALAR_IDS_TO_BQ = {} + +if pyarrow: + # This dictionary is duplicated in bigquery_storage/test/unite/test_reader.py + # When modifying it be sure to update it there as well. + # Note(todo!!): type "BIGNUMERIC"'s matching pyarrow type is added in _pandas_helpers.py + _BQ_TO_ARROW_SCALARS = { + "BOOL": pyarrow.bool_, + "BOOLEAN": pyarrow.bool_, + "BYTES": pyarrow.binary, + "DATE": pyarrow.date32, + "DATETIME": pyarrow_datetime, + "FLOAT": pyarrow.float64, + "FLOAT64": pyarrow.float64, + "GEOGRAPHY": pyarrow.string, + "INT64": pyarrow.int64, + "INTEGER": pyarrow.int64, + "NUMERIC": pyarrow_numeric, + "STRING": pyarrow.string, + "TIME": pyarrow_time, + "TIMESTAMP": pyarrow_timestamp, + } + + _ARROW_SCALAR_IDS_TO_BQ = { + # https://arrow.apache.org/docs/python/api/datatypes.html#type-classes + pyarrow.bool_().id: "BOOL", + pyarrow.int8().id: "INT64", + pyarrow.int16().id: "INT64", + pyarrow.int32().id: "INT64", + pyarrow.int64().id: "INT64", + pyarrow.uint8().id: "INT64", + pyarrow.uint16().id: "INT64", + pyarrow.uint32().id: "INT64", + pyarrow.uint64().id: "INT64", + pyarrow.float16().id: "FLOAT64", + pyarrow.float32().id: "FLOAT64", + pyarrow.float64().id: "FLOAT64", + pyarrow.time32("ms").id: "TIME", + pyarrow.time64("ns").id: "TIME", + pyarrow.timestamp("ns").id: "TIMESTAMP", + pyarrow.date32().id: "DATE", + pyarrow.date64().id: "DATETIME", # because millisecond resolution + pyarrow.binary().id: "BYTES", + pyarrow.string().id: "STRING", # also alias for pyarrow.utf8() + # The exact scale and precision don't matter, see below. + pyarrow.decimal128(38, scale=9).id: "NUMERIC", + } + + # Adds bignumeric support only if pyarrow version >= 3.0.0 + # Decimal256 support was added to arrow 3.0.0 + # https://arrow.apache.org/blog/2021/01/25/3.0.0-release/ + if version.parse(pyarrow.__version__) >= version.parse("3.0.0"): + _BQ_TO_ARROW_SCALARS["BIGNUMERIC"] = pyarrow_bignumeric + # The exact decimal's scale and precision are not important, as only + # the type ID matters, and it's the same for all decimal256 instances. + _ARROW_SCALAR_IDS_TO_BQ[pyarrow.decimal256(76, scale=38).id] = "BIGNUMERIC" + + +def bq_to_arrow_scalars(bq_scalar: str): + """ + Returns: + The Arrow scalar type that the input BigQuery scalar type maps to. + If it cannot find the BigQuery scalar, return None. + """ + return _BQ_TO_ARROW_SCALARS.get(bq_scalar) + + +def arrow_scalar_ids_to_bq(arrow_scalar: Any): + """ + Returns: + The BigQuery scalar type that the input arrow scalar type maps to. + If it cannot find the arrow scalar, return None. + """ + return _ARROW_SCALAR_IDS_TO_BQ.get(arrow_scalar) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/_versions_helpers.py b/packages/google-cloud-bigquery/google/cloud/bigquery/_versions_helpers.py new file mode 100644 index 000000000000..1f04c74e0a99 --- /dev/null +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/_versions_helpers.py @@ -0,0 +1,94 @@ +# Copyright 2023 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Shared helper functions for verifying versions of installed modules.""" + +from typing import Any + +import packaging.version + +from google.cloud.bigquery import exceptions + + +_MIN_PYARROW_VERSION = packaging.version.Version("3.0.0") + + +class PyarrowVersions: + """Version comparisons for pyarrow package.""" + + def __init__(self): + self._installed_version = None + + @property + def installed_version(self) -> packaging.version.Version: + """Return the parsed version of pyarrow.""" + if self._installed_version is None: + import pyarrow # type: ignore + + self._installed_version = packaging.version.parse( + # Use 0.0.0, since it is earlier than any released version. + # Legacy versions also have the same property, but + # creating a LegacyVersion has been deprecated. + # https://github.com/pypa/packaging/issues/321 + getattr(pyarrow, "__version__", "0.0.0") + ) + + return self._installed_version + + @property + def use_compliant_nested_type(self) -> bool: + return self.installed_version.major >= 4 + + def try_import(self, raise_if_error: bool = False) -> Any: + """Verify that a recent enough version of pyarrow extra is installed. + + The function assumes that pyarrow extra is installed, and should thus + be used in places where this assumption holds. + + Because `pip` can install an outdated version of this extra despite + the constraints in `setup.py`, the calling code can use this helper + to verify the version compatibility at runtime. + + Returns: + The ``pyarrow`` module or ``None``. + + Raises: + exceptions.LegacyPyarrowError: + If the pyarrow package is outdated and ``raise_if_error`` is + ``True``. + """ + try: + import pyarrow + except ImportError as exc: # pragma: NO COVER + if raise_if_error: + raise exceptions.LegacyPyarrowError( + "pyarrow package not found. Install pyarrow version >=" + f" {_MIN_PYARROW_VERSION}." + ) from exc + return None + + if self.installed_version < _MIN_PYARROW_VERSION: + if raise_if_error: + msg = ( + "Dependency pyarrow is outdated, please upgrade" + f" it to version >= {_MIN_PYARROW_VERSION}" + f" (version found: {self.installed_version})." + ) + raise exceptions.LegacyPyarrowError(msg) + return None + + return pyarrow + + +PYARROW_VERSIONS = PyarrowVersions() diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py index f7c7864a15c8..ed75215b6b3e 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py @@ -27,7 +27,6 @@ import json import math import os -import packaging.version import tempfile import typing from typing import ( @@ -45,13 +44,6 @@ import uuid import warnings -try: - import pyarrow # type: ignore - - _PYARROW_VERSION = packaging.version.parse(pyarrow.__version__) -except ImportError: # pragma: NO COVER - pyarrow = None - from google import resumable_media # type: ignore from google.resumable_media.requests import MultipartUpload # type: ignore from google.resumable_media.requests import ResumableUpload @@ -84,12 +76,13 @@ from google.cloud.bigquery._helpers import _DEFAULT_HOST from google.cloud.bigquery._http import Connection from google.cloud.bigquery import _pandas_helpers +from google.cloud.bigquery import _versions_helpers from google.cloud.bigquery.dataset import Dataset from google.cloud.bigquery.dataset import DatasetListItem from google.cloud.bigquery.dataset import DatasetReference from google.cloud.bigquery import enums from google.cloud.bigquery.enums import AutoRowIDs -from google.cloud.bigquery.exceptions import LegacyBigQueryStorageError +from google.cloud.bigquery import exceptions as bq_exceptions from google.cloud.bigquery.opentelemetry_tracing import create_span from google.cloud.bigquery import job from google.cloud.bigquery.job import ( @@ -121,7 +114,8 @@ from google.cloud.bigquery.table import TableReference from google.cloud.bigquery.table import RowIterator from google.cloud.bigquery.format_options import ParquetOptions -from google.cloud.bigquery import _helpers + +pyarrow = _versions_helpers.PYARROW_VERSIONS.try_import() TimeoutType = Union[float, None] ResumableTimeoutType = Union[ @@ -159,9 +153,6 @@ TIMEOUT_HEADER = "X-Server-Timeout" -# https://github.com/googleapis/python-bigquery/issues/781#issuecomment-883497414 -_PYARROW_BAD_VERSIONS = frozenset([packaging.version.Version("2.0.0")]) - class Project(object): """Wrapper for resource describing a BigQuery project. @@ -574,7 +565,7 @@ def _ensure_bqstorage_client( try: BQ_STORAGE_VERSIONS.verify_version() - except LegacyBigQueryStorageError as exc: + except bq_exceptions.LegacyBigQueryStorageError as exc: warnings.warn(str(exc)) return None if bqstorage_client is None: @@ -2686,16 +2677,6 @@ def load_table_from_dataframe( try: if new_job_config.source_format == job.SourceFormat.PARQUET: - if _PYARROW_VERSION in _PYARROW_BAD_VERSIONS: - msg = ( - "Loading dataframe data in PARQUET format with pyarrow " - f"{_PYARROW_VERSION} can result in data corruption. It is " - "therefore *strongly* advised to use a different pyarrow " - "version or a different source format. " - "See: https://github.com/googleapis/python-bigquery/issues/781" - ) - warnings.warn(msg, category=RuntimeWarning) - if new_job_config.schema: if parquet_compression == "snappy": # adjust the default value parquet_compression = parquet_compression.upper() @@ -2714,7 +2695,7 @@ def load_table_from_dataframe( compression=parquet_compression, **( {"use_compliant_nested_type": True} - if _helpers.PYARROW_VERSIONS.use_compliant_nested_type + if _versions_helpers.PYARROW_VERSIONS.use_compliant_nested_type else {} ), ) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py index 462447d51cc6..a967a179539d 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py @@ -61,7 +61,7 @@ from google.cloud.bigquery import _helpers from google.cloud.bigquery import _pandas_helpers from google.cloud.bigquery.enums import DefaultPandasDTypes -from google.cloud.bigquery.exceptions import LegacyBigQueryStorageError +from google.cloud.bigquery import exceptions from google.cloud.bigquery.schema import _build_schema_resource from google.cloud.bigquery.schema import _parse_schema_resource from google.cloud.bigquery.schema import _to_schema_fields @@ -1616,7 +1616,7 @@ def _validate_bqstorage(self, bqstorage_client, create_bqstorage_client): try: _helpers.BQ_STORAGE_VERSIONS.verify_version() - except LegacyBigQueryStorageError as exc: + except exceptions.LegacyBigQueryStorageError as exc: warnings.warn(str(exc)) return False diff --git a/packages/google-cloud-bigquery/tests/unit/test__helpers.py b/packages/google-cloud-bigquery/tests/unit/test__helpers.py index 4fb86f66513e..40223f0410e7 100644 --- a/packages/google-cloud-bigquery/tests/unit/test__helpers.py +++ b/packages/google-cloud-bigquery/tests/unit/test__helpers.py @@ -19,16 +19,13 @@ import mock +from google.cloud.bigquery import exceptions + try: from google.cloud import bigquery_storage # type: ignore except ImportError: # pragma: NO COVER bigquery_storage = None -try: - import pyarrow -except ImportError: # pragma: NO COVER - pyarrow = None - @unittest.skipIf(bigquery_storage is None, "Requires `google-cloud-bigquery-storage`") class TestBQStorageVersions(unittest.TestCase): @@ -50,28 +47,24 @@ def _call_fut(self): return _helpers.BQ_STORAGE_VERSIONS.verify_version() def test_raises_no_error_w_recent_bqstorage(self): - from google.cloud.bigquery.exceptions import LegacyBigQueryStorageError - with mock.patch("google.cloud.bigquery_storage.__version__", new="2.0.0"): try: self._call_fut() - except LegacyBigQueryStorageError: # pragma: NO COVER + except exceptions.LegacyBigQueryStorageError: # pragma: NO COVER self.fail("Legacy error raised with a non-legacy dependency version.") def test_raises_error_w_legacy_bqstorage(self): - from google.cloud.bigquery.exceptions import LegacyBigQueryStorageError - with mock.patch("google.cloud.bigquery_storage.__version__", new="1.9.9"): - with self.assertRaises(LegacyBigQueryStorageError): + with self.assertRaises(exceptions.LegacyBigQueryStorageError): self._call_fut() def test_raises_error_w_unknown_bqstorage_version(self): - from google.cloud.bigquery.exceptions import LegacyBigQueryStorageError - with mock.patch("google.cloud.bigquery_storage", autospec=True) as fake_module: del fake_module.__version__ error_pattern = r"version found: 0.0.0" - with self.assertRaisesRegex(LegacyBigQueryStorageError, error_pattern): + with self.assertRaisesRegex( + exceptions.LegacyBigQueryStorageError, error_pattern + ): self._call_fut() def test_installed_version_returns_cached(self): @@ -100,63 +93,6 @@ def test_is_read_session_optional_false(self): assert not versions.is_read_session_optional -@unittest.skipIf(pyarrow is None, "Requires `pyarrow`") -class TestPyarrowVersions(unittest.TestCase): - def tearDown(self): - from google.cloud.bigquery import _helpers - - # Reset any cached versions since it may not match reality. - _helpers.PYARROW_VERSIONS._installed_version = None - - def _object_under_test(self): - from google.cloud.bigquery import _helpers - - return _helpers.PyarrowVersions() - - def _call_try_import(self, **kwargs): - from google.cloud.bigquery import _helpers - - _helpers.PYARROW_VERSIONS._installed_version = None - return _helpers.PYARROW_VERSIONS.try_import(**kwargs) - - def test_try_import_raises_no_error_w_recent_pyarrow(self): - from google.cloud.bigquery.exceptions import LegacyPyarrowError - - with mock.patch("pyarrow.__version__", new="5.0.0"): - try: - pyarrow = self._call_try_import(raise_if_error=True) - self.assertIsNotNone(pyarrow) - except LegacyPyarrowError: # pragma: NO COVER - self.fail("Legacy error raised with a non-legacy dependency version.") - - def test_try_import_returns_none_w_legacy_pyarrow(self): - with mock.patch("pyarrow.__version__", new="2.0.0"): - pyarrow = self._call_try_import() - self.assertIsNone(pyarrow) - - def test_try_import_raises_error_w_legacy_pyarrow(self): - from google.cloud.bigquery.exceptions import LegacyPyarrowError - - with mock.patch("pyarrow.__version__", new="2.0.0"): - with self.assertRaises(LegacyPyarrowError): - self._call_try_import(raise_if_error=True) - - def test_installed_version_returns_cached(self): - versions = self._object_under_test() - versions._installed_version = object() - assert versions.installed_version is versions._installed_version - - def test_installed_version_returns_parsed_version(self): - versions = self._object_under_test() - - with mock.patch("pyarrow.__version__", new="1.2.3"): - version = versions.installed_version - - assert version.major == 1 - assert version.minor == 2 - assert version.micro == 3 - - class Test_not_null(unittest.TestCase): def _call_fut(self, value, field): from google.cloud.bigquery._helpers import _not_null diff --git a/packages/google-cloud-bigquery/tests/unit/test__pandas_helpers.py b/packages/google-cloud-bigquery/tests/unit/test__pandas_helpers.py index a4cc1fefb449..7724f308baf9 100644 --- a/packages/google-cloud-bigquery/tests/unit/test__pandas_helpers.py +++ b/packages/google-cloud-bigquery/tests/unit/test__pandas_helpers.py @@ -41,10 +41,12 @@ from google.cloud.bigquery import exceptions from google.cloud.bigquery import _helpers +from google.cloud.bigquery import _pyarrow_helpers +from google.cloud.bigquery import _versions_helpers from google.cloud.bigquery import schema from google.cloud.bigquery._pandas_helpers import _BIGNUMERIC_SUPPORT -pyarrow = _helpers.PYARROW_VERSIONS.try_import() +pyarrow = _versions_helpers.PYARROW_VERSIONS.try_import() if pyarrow: import pyarrow.parquet @@ -346,14 +348,14 @@ def test_bq_to_arrow_data_type_w_struct(module_under_test, bq_type): pyarrow.field("field04", pyarrow.int64()), pyarrow.field("field05", pyarrow.float64()), pyarrow.field("field06", pyarrow.float64()), - pyarrow.field("field07", module_under_test.pyarrow_numeric()), - pyarrow.field("field08", module_under_test.pyarrow_bignumeric()), + pyarrow.field("field07", _pyarrow_helpers.pyarrow_numeric()), + pyarrow.field("field08", _pyarrow_helpers.pyarrow_bignumeric()), pyarrow.field("field09", pyarrow.bool_()), pyarrow.field("field10", pyarrow.bool_()), - pyarrow.field("field11", module_under_test.pyarrow_timestamp()), + pyarrow.field("field11", _pyarrow_helpers.pyarrow_timestamp()), pyarrow.field("field12", pyarrow.date32()), - pyarrow.field("field13", module_under_test.pyarrow_time()), - pyarrow.field("field14", module_under_test.pyarrow_datetime()), + pyarrow.field("field13", _pyarrow_helpers.pyarrow_time()), + pyarrow.field("field14", _pyarrow_helpers.pyarrow_datetime()), pyarrow.field("field15", pyarrow.string()), ) expected = pyarrow.struct(expected) @@ -394,14 +396,14 @@ def test_bq_to_arrow_data_type_w_array_struct(module_under_test, bq_type): pyarrow.field("field04", pyarrow.int64()), pyarrow.field("field05", pyarrow.float64()), pyarrow.field("field06", pyarrow.float64()), - pyarrow.field("field07", module_under_test.pyarrow_numeric()), - pyarrow.field("field08", module_under_test.pyarrow_bignumeric()), + pyarrow.field("field07", _pyarrow_helpers.pyarrow_numeric()), + pyarrow.field("field08", _pyarrow_helpers.pyarrow_bignumeric()), pyarrow.field("field09", pyarrow.bool_()), pyarrow.field("field10", pyarrow.bool_()), - pyarrow.field("field11", module_under_test.pyarrow_timestamp()), + pyarrow.field("field11", _pyarrow_helpers.pyarrow_timestamp()), pyarrow.field("field12", pyarrow.date32()), - pyarrow.field("field13", module_under_test.pyarrow_time()), - pyarrow.field("field14", module_under_test.pyarrow_datetime()), + pyarrow.field("field13", _pyarrow_helpers.pyarrow_time()), + pyarrow.field("field14", _pyarrow_helpers.pyarrow_datetime()), pyarrow.field("field15", pyarrow.string()), ) expected_value_type = pyarrow.struct(expected) @@ -1117,7 +1119,9 @@ def test_dataframe_to_parquet_without_pyarrow(module_under_test, monkeypatch): mock_pyarrow_import.side_effect = exceptions.LegacyPyarrowError( "pyarrow not installed" ) - monkeypatch.setattr(_helpers.PYARROW_VERSIONS, "try_import", mock_pyarrow_import) + monkeypatch.setattr( + _versions_helpers.PYARROW_VERSIONS, "try_import", mock_pyarrow_import + ) with pytest.raises(exceptions.LegacyPyarrowError): module_under_test.dataframe_to_parquet(pandas.DataFrame(), (), None) diff --git a/packages/google-cloud-bigquery/tests/unit/test__pyarrow_helpers.py b/packages/google-cloud-bigquery/tests/unit/test__pyarrow_helpers.py new file mode 100644 index 000000000000..f0a872c88414 --- /dev/null +++ b/packages/google-cloud-bigquery/tests/unit/test__pyarrow_helpers.py @@ -0,0 +1,38 @@ +# Copyright 2023 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import pytest + + +pyarrow = pytest.importorskip("pyarrow", minversion="3.0.0") + + +@pytest.fixture +def module_under_test(): + from google.cloud.bigquery import _pyarrow_helpers + + return _pyarrow_helpers + + +def test_bq_to_arrow_scalars(module_under_test): + assert ( + module_under_test.bq_to_arrow_scalars("BIGNUMERIC") + == module_under_test.pyarrow_bignumeric + ) + assert module_under_test.bq_to_arrow_scalars("UNKNOWN_TYPE") is None + + +def test_arrow_scalar_ids_to_bq(module_under_test): + assert module_under_test.arrow_scalar_ids_to_bq(pyarrow.bool_().id) == "BOOL" + assert module_under_test.arrow_scalar_ids_to_bq("UNKNOWN_TYPE") is None diff --git a/packages/google-cloud-bigquery/tests/unit/test__versions_helpers.py b/packages/google-cloud-bigquery/tests/unit/test__versions_helpers.py new file mode 100644 index 000000000000..21386610bb12 --- /dev/null +++ b/packages/google-cloud-bigquery/tests/unit/test__versions_helpers.py @@ -0,0 +1,62 @@ +# Copyright 2023 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import pytest + +import mock + +from google.cloud.bigquery import _versions_helpers +from google.cloud.bigquery import exceptions + +pyarrow = pytest.importorskip("pyarrow") + + +def test_try_import_raises_no_error_w_recent_pyarrow(): + versions = _versions_helpers.PyarrowVersions() + with mock.patch("pyarrow.__version__", new="5.0.0"): + try: + pyarrow = versions.try_import(raise_if_error=True) + assert pyarrow is not None + except exceptions.LegacyPyarrowError: # pragma: NO COVER + raise ("Legacy error raised with a non-legacy dependency version.") + + +def test_try_import_returns_none_w_legacy_pyarrow(): + versions = _versions_helpers.PyarrowVersions() + with mock.patch("pyarrow.__version__", new="2.0.0"): + pyarrow = versions.try_import() + assert pyarrow is None + + +def test_try_import_raises_error_w_legacy_pyarrow(): + versions = _versions_helpers.PyarrowVersions() + with mock.patch("pyarrow.__version__", new="2.0.0"): + with pytest.raises(exceptions.LegacyPyarrowError): + versions.try_import(raise_if_error=True) + + +def test_installed_version_returns_cached(): + versions = _versions_helpers.PyarrowVersions() + versions._installed_version = object() + assert versions.installed_version is versions._installed_version + + +def test_installed_version_returns_parsed_version(): + versions = _versions_helpers.PyarrowVersions() + with mock.patch("pyarrow.__version__", new="1.2.3"): + version = versions.installed_version + + assert version.major == 1 + assert version.minor == 2 + assert version.micro == 3 diff --git a/packages/google-cloud-bigquery/tests/unit/test_client.py b/packages/google-cloud-bigquery/tests/unit/test_client.py index faa065116f1b..3143f2123aea 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_client.py +++ b/packages/google-cloud-bigquery/tests/unit/test_client.py @@ -27,8 +27,8 @@ import warnings import mock -import packaging import requests +import packaging import pytest import pkg_resources @@ -65,6 +65,7 @@ from google.cloud import bigquery from google.cloud.bigquery.dataset import DatasetReference +from google.cloud.bigquery import exceptions from google.cloud.bigquery.retry import DEFAULT_TIMEOUT from google.cloud.bigquery import ParquetOptions @@ -821,14 +822,12 @@ def fail_bqstorage_import(name, globals, locals, fromlist, level): bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" ) def test_ensure_bqstorage_client_obsolete_dependency(self): - from google.cloud.bigquery.exceptions import LegacyBigQueryStorageError - creds = _make_credentials() client = self._make_one(project=self.PROJECT, credentials=creds) patcher = mock.patch( "google.cloud.bigquery.client.BQ_STORAGE_VERSIONS.verify_version", - side_effect=LegacyBigQueryStorageError("BQ Storage too old"), + side_effect=exceptions.LegacyBigQueryStorageError("BQ Storage too old"), ) with patcher, warnings.catch_warnings(record=True) as warned: bqstorage_client = client._ensure_bqstorage_client() @@ -857,15 +856,13 @@ def test_ensure_bqstorage_client_existing_client_check_passes(self): bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" ) def test_ensure_bqstorage_client_existing_client_check_fails(self): - from google.cloud.bigquery.exceptions import LegacyBigQueryStorageError - creds = _make_credentials() client = self._make_one(project=self.PROJECT, credentials=creds) mock_storage_client = mock.sentinel.mock_storage_client patcher = mock.patch( "google.cloud.bigquery.client.BQ_STORAGE_VERSIONS.verify_version", - side_effect=LegacyBigQueryStorageError("BQ Storage too old"), + side_effect=exceptions.LegacyBigQueryStorageError("BQ Storage too old"), ) with patcher, warnings.catch_warnings(record=True) as warned: bqstorage_client = client._ensure_bqstorage_client(mock_storage_client) @@ -8615,7 +8612,7 @@ def test_load_table_from_dataframe_w_bad_pyarrow_issues_warning(self): dataframe = pandas.DataFrame(records) pyarrow_version_patch = mock.patch( - "google.cloud.bigquery.client._PYARROW_VERSION", + "google.cloud.bigquery._versions_helpers.PYARROW_VERSIONS._installed_version", packaging.version.parse("2.0.0"), # A known bad version of pyarrow. ) get_table_patch = mock.patch( @@ -8628,22 +8625,13 @@ def test_load_table_from_dataframe_w_bad_pyarrow_issues_warning(self): ) with load_patch, get_table_patch, pyarrow_version_patch: - with warnings.catch_warnings(record=True) as warned: + with pytest.raises(exceptions.LegacyPyarrowError): client.load_table_from_dataframe( dataframe, self.TABLE_REF, location=self.LOCATION, ) - expected_warnings = [ - warning for warning in warned if "pyarrow" in str(warning).lower() - ] - assert len(expected_warnings) == 1 - assert issubclass(expected_warnings[0].category, RuntimeWarning) - msg = str(expected_warnings[0].message) - assert "pyarrow 2.0.0" in msg - assert "data corruption" in msg - @unittest.skipIf(pandas is None, "Requires `pandas`") @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_load_table_from_dataframe_w_nulls(self): diff --git a/packages/google-cloud-bigquery/tests/unit/test_magics.py b/packages/google-cloud-bigquery/tests/unit/test_magics.py index 70bfc4d0cb2e..0cab943f7071 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_magics.py +++ b/packages/google-cloud-bigquery/tests/unit/test_magics.py @@ -25,6 +25,7 @@ from test_utils.imports import maybe_fail_import from google.cloud import bigquery +from google.cloud.bigquery import exceptions as bq_exceptions from google.cloud.bigquery import job from google.cloud.bigquery import table from google.cloud.bigquery.retry import DEFAULT_TIMEOUT @@ -357,8 +358,6 @@ def test__make_bqstorage_client_true_raises_import_error(missing_bq_storage): bigquery_storage is None, reason="Requires `google-cloud-bigquery-storage`" ) def test__make_bqstorage_client_true_obsolete_dependency(): - from google.cloud.bigquery.exceptions import LegacyBigQueryStorageError - credentials_mock = mock.create_autospec( google.auth.credentials.Credentials, instance=True ) @@ -368,7 +367,7 @@ def test__make_bqstorage_client_true_obsolete_dependency(): patcher = mock.patch( "google.cloud.bigquery.client.BQ_STORAGE_VERSIONS.verify_version", - side_effect=LegacyBigQueryStorageError("BQ Storage too old"), + side_effect=bq_exceptions.LegacyBigQueryStorageError("BQ Storage too old"), ) with patcher, warnings.catch_warnings(record=True) as warned: got = magics._make_bqstorage_client(test_client, True, {}) diff --git a/packages/google-cloud-bigquery/tests/unit/test_table.py b/packages/google-cloud-bigquery/tests/unit/test_table.py index a2c82c0a8277..65eb659bf6e9 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_table.py +++ b/packages/google-cloud-bigquery/tests/unit/test_table.py @@ -28,6 +28,8 @@ import google.api_core.exceptions from test_utils.imports import maybe_fail_import +from google.cloud.bigquery import _versions_helpers +from google.cloud.bigquery import exceptions from google.cloud.bigquery.table import TableReference from google.cloud.bigquery.dataset import DatasetReference @@ -40,17 +42,12 @@ bigquery_storage = None big_query_read_grpc_transport = None -from google.cloud.bigquery import _helpers -pyarrow = _helpers.PYARROW_VERSIONS.try_import() -PYARROW_VERSION = pkg_resources.parse_version("0.0.1") +pyarrow = _versions_helpers.PYARROW_VERSIONS.try_import() if pyarrow: - import pyarrow import pyarrow.types - PYARROW_VERSION = pkg_resources.parse_version(pyarrow.__version__) - try: import pandas except (ImportError, AttributeError): # pragma: NO COVER @@ -73,8 +70,6 @@ except (ImportError, AttributeError): # pragma: NO COVER tqdm = None -PYARROW_TIMESTAMP_VERSION = pkg_resources.parse_version("2.0.0") - if pandas is not None: PANDAS_INSTALLED_VERSION = pkg_resources.get_distribution("pandas").parsed_version else: @@ -2262,13 +2257,11 @@ def fail_bqstorage_import(name, globals, locals, fromlist, level): bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" ) def test__validate_bqstorage_returns_false_w_warning_if_obsolete_version(self): - from google.cloud.bigquery.exceptions import LegacyBigQueryStorageError - iterator = self._make_one(first_page_response=None) # not cached patcher = mock.patch( "google.cloud.bigquery.table._helpers.BQ_STORAGE_VERSIONS.verify_version", - side_effect=LegacyBigQueryStorageError("BQ Storage too old"), + side_effect=exceptions.LegacyBigQueryStorageError("BQ Storage too old"), ) with patcher, warnings.catch_warnings(record=True) as warned: result = iterator._validate_bqstorage( @@ -2874,7 +2867,7 @@ def test_to_arrow_ensure_bqstorage_client_wo_bqstorage(self): row_iterator = self._make_one(mock_client, api_request, path, schema) def mock_verify_version(): - raise _helpers.LegacyBigQueryStorageError("no bqstorage") + raise exceptions.LegacyBigQueryStorageError("no bqstorage") with mock.patch( "google.cloud.bigquery._helpers.BQ_STORAGE_VERSIONS.verify_version", From 5a8c502b15cb333d3ad02d82e9573e04bbf7e081 Mon Sep 17 00:00:00 2001 From: "gcf-owl-bot[bot]" <78513119+gcf-owl-bot[bot]@users.noreply.github.com> Date: Thu, 19 Oct 2023 11:48:47 -0700 Subject: [PATCH 1653/2016] chore: rename rst files to avoid conflict with service names (#1689) Source-Link: https://github.com/googleapis/synthtool/commit/d52e638b37b091054c869bfa6f5a9fedaba9e0dd Post-Processor: gcr.io/cloud-devrel-public-resources/owlbot-python:latest@sha256:4f9b3b106ad0beafc2c8a415e3f62c1a0cc23cabea115dbe841b848f581cfe99 Co-authored-by: Owl Bot --- packages/google-cloud-bigquery/.github/.OwlBot.lock.yaml | 4 ++-- packages/google-cloud-bigquery/.kokoro/requirements.txt | 6 +++--- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/packages/google-cloud-bigquery/.github/.OwlBot.lock.yaml b/packages/google-cloud-bigquery/.github/.OwlBot.lock.yaml index dd98abbdeebe..7f291dbd5f9b 100644 --- a/packages/google-cloud-bigquery/.github/.OwlBot.lock.yaml +++ b/packages/google-cloud-bigquery/.github/.OwlBot.lock.yaml @@ -13,5 +13,5 @@ # limitations under the License. docker: image: gcr.io/cloud-devrel-public-resources/owlbot-python:latest - digest: sha256:08e34975760f002746b1d8c86fdc90660be45945ee6d9db914d1508acdf9a547 -# created: 2023-10-09T14:06:13.397766266Z + digest: sha256:4f9b3b106ad0beafc2c8a415e3f62c1a0cc23cabea115dbe841b848f581cfe99 +# created: 2023-10-18T20:26:37.410353675Z diff --git a/packages/google-cloud-bigquery/.kokoro/requirements.txt b/packages/google-cloud-bigquery/.kokoro/requirements.txt index 0332d3267e15..16170d0ca7b8 100644 --- a/packages/google-cloud-bigquery/.kokoro/requirements.txt +++ b/packages/google-cloud-bigquery/.kokoro/requirements.txt @@ -467,9 +467,9 @@ typing-extensions==4.4.0 \ --hash=sha256:1511434bb92bf8dd198c12b1cc812e800d4181cfcb867674e0f8279cc93087aa \ --hash=sha256:16fa4864408f655d35ec496218b85f79b3437c829e93320c7c9215ccfd92489e # via -r requirements.in -urllib3==1.26.17 \ - --hash=sha256:24d6a242c28d29af46c3fae832c36db3bbebcc533dd1bb549172cd739c82df21 \ - --hash=sha256:94a757d178c9be92ef5539b8840d48dc9cf1b2709c9d6b588232a055c524458b +urllib3==1.26.18 \ + --hash=sha256:34b97092d7e0a3a8cf7cd10e386f401b3737364026c45e622aa02903dffe0f07 \ + --hash=sha256:f8ecc1bba5667413457c529ab955bf8c67b45db799d159066261719e328580a0 # via # requests # twine From 655df668750dbde738784c60e11d011c290e788b Mon Sep 17 00:00:00 2001 From: Jonathan Ostrander Date: Thu, 19 Oct 2023 16:07:37 -0400 Subject: [PATCH 1654/2016] fix: AccessEntry API representation parsing (#1682) * fix: AccessEntry API representation parsing Overriding the `AccessEntry#_properties` with a deep copy of the API resource overwrites the `role` property set in `AccessEntry.__init__` which isn't present in the resource if the `role` is set to `None`. This causes `AccessEntry`s generated from API representations to no longer evaluate to equal with equivalent `AccessEntry` resources instantiated through `AccessEntry.__init__`. The added unit test fails without the change and passes with the change. * build: formatting --------- Co-authored-by: Lingqing Gan --- .../google/cloud/bigquery/dataset.py | 4 +--- .../tests/unit/test_dataset.py | 16 ++++++++++++++++ 2 files changed, 17 insertions(+), 3 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/dataset.py b/packages/google-cloud-bigquery/google/cloud/bigquery/dataset.py index b7fed61c7a52..0f1a0f3ccbdd 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/dataset.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/dataset.py @@ -501,9 +501,7 @@ def from_api_repr(cls, resource: dict) -> "AccessEntry": if len(entry) != 0: raise ValueError("Entry has unexpected keys remaining.", entry) - config = cls(role, entity_type, entity_id) - config._properties = copy.deepcopy(resource) - return config + return cls(role, entity_type, entity_id) class Dataset(object): diff --git a/packages/google-cloud-bigquery/tests/unit/test_dataset.py b/packages/google-cloud-bigquery/tests/unit/test_dataset.py index 7d7091092eb9..0a709ab4361b 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_dataset.py +++ b/packages/google-cloud-bigquery/tests/unit/test_dataset.py @@ -152,6 +152,22 @@ def test_from_api_repr_w_unknown_entity_type(self): exp_resource = entry.to_api_repr() self.assertEqual(resource, exp_resource) + def test_from_api_repr_wo_role(self): + resource = { + "view": { + "projectId": "my-project", + "datasetId": "my_dataset", + "tableId": "my_table", + } + } + entry = self._get_target_class().from_api_repr(resource) + exp_entry = self._make_one( + role=None, + entity_type="view", + entity_id=resource["view"], + ) + self.assertEqual(entry, exp_entry) + def test_to_api_repr_w_extra_properties(self): resource = { "role": "READER", From 7752e73eea7eced9f4463014039745dced6fd1d3 Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Thu, 19 Oct 2023 16:56:00 -0500 Subject: [PATCH 1655/2016] docs: remove redundant `bigquery_update_table_expiration` code sample (#1673) New version of this sample added in https://github.com/googleapis/python-bigquery/pull/1457 and migrated to in the docs in internal change 570781706. Co-authored-by: Lingqing Gan --- .../google-cloud-bigquery/docs/snippets.py | 46 ------------------- 1 file changed, 46 deletions(-) diff --git a/packages/google-cloud-bigquery/docs/snippets.py b/packages/google-cloud-bigquery/docs/snippets.py index 62b0b6fd61c2..64f5361cd177 100644 --- a/packages/google-cloud-bigquery/docs/snippets.py +++ b/packages/google-cloud-bigquery/docs/snippets.py @@ -203,52 +203,6 @@ def test_update_table_description(client, to_delete): # [END bigquery_update_table_description] -@pytest.mark.skip( - reason=( - "update_table() is flaky " - "https://github.com/GoogleCloudPlatform/google-cloud-python/issues/5589" - ) -) -def test_update_table_expiration(client, to_delete): - """Update a table's expiration time.""" - dataset_id = "update_table_expiration_dataset_{}".format(_millis()) - table_id = "update_table_expiration_table_{}".format(_millis()) - project = client.project - dataset_ref = bigquery.DatasetReference(project, dataset_id) - dataset = bigquery.Dataset(dataset_ref) - client.create_dataset(dataset) - to_delete.append(dataset) - - table = bigquery.Table(dataset.table(table_id), schema=SCHEMA) - table = client.create_table(table) - - # TODO(thejaredchapman): After code sample has been updated from cloud.google.com delete this. - - # [START bigquery_update_table_expiration] - import datetime - - # from google.cloud import bigquery - # client = bigquery.Client() - # project = client.project - # dataset_ref = bigquery.DatasetReference(project, dataset_id) - # table_ref = dataset_ref.table('my_table') - # table = client.get_table(table_ref) # API request - - assert table.expires is None - - # set table to expire 5 days from now - expiration = datetime.datetime.now(datetime.timezone.utc) + datetime.timedelta( - days=5 - ) - table.expires = expiration - table = client.update_table(table, ["expires"]) # API request - - # expiration is stored in milliseconds - margin = datetime.timedelta(microseconds=1000) - assert expiration - margin <= table.expires <= expiration + margin - # [END bigquery_update_table_expiration] - - @pytest.mark.skip( reason=( "update_table() is flaky " From 93b49528db58e403be5b688801c6ab95c4879d74 Mon Sep 17 00:00:00 2001 From: nayaknishant Date: Mon, 23 Oct 2023 12:15:18 -0400 Subject: [PATCH 1656/2016] chore: increasing Shapely dependency upper bound (#1696) * adding ASCII support for external config * adding tests for preserveAscii... * adding tests for preserveAscii... * changing 'False' to False * linting * bumping up Shapely dependency upper bound --- packages/google-cloud-bigquery/setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/setup.py b/packages/google-cloud-bigquery/setup.py index 08106f6940ec..4e87b3b84e61 100644 --- a/packages/google-cloud-bigquery/setup.py +++ b/packages/google-cloud-bigquery/setup.py @@ -71,7 +71,7 @@ "ipywidgets>=7.7.0", "ipykernel>=6.0.0", ], - "geopandas": ["geopandas>=0.9.0, <1.0dev", "Shapely>=1.8.4, <2.0dev"], + "geopandas": ["geopandas>=0.9.0, <1.0dev", "Shapely>=1.8.4, <3.0.0dev"], "ipython": [ "ipython>=7.23.1,!=8.1.0", "ipykernel>=6.0.0", From dae6ab6bbf37cc548e87ceda14c3ec959c6de544 Mon Sep 17 00:00:00 2001 From: Lingqing Gan Date: Mon, 23 Oct 2023 13:35:07 -0400 Subject: [PATCH 1657/2016] chore: benchmark revamp (#1694) * chore: benchmark revamp * foramt and add more documentation --- .../google-cloud-bigquery/benchmark/README.md | 128 ++++++- .../benchmark/benchmark.py | 339 ++++++++++++++++-- .../benchmark/queries.json | 26 +- packages/google-cloud-bigquery/noxfile.py | 2 + 4 files changed, 450 insertions(+), 45 deletions(-) diff --git a/packages/google-cloud-bigquery/benchmark/README.md b/packages/google-cloud-bigquery/benchmark/README.md index 435926acb045..33065807ec34 100644 --- a/packages/google-cloud-bigquery/benchmark/README.md +++ b/packages/google-cloud-bigquery/benchmark/README.md @@ -1,8 +1,128 @@ # BigQuery Benchmark -This directory contains benchmarks for BigQuery client. +This directory contains benchmark scripts for BigQuery client. It is created primarily for project +maintainers to measure library performance. ## Usage -`python benchmark.py queries.json` +`python benchmark.py` -BigQuery service caches requests so the benchmark should be run -at least twice, disregarding the first result. + +### Flags +Run `python benchmark.py -h` for detailed information on available flags. + +`--reruns` can be used to override the default number of times a query is rerun. Must be a positive +integer. Default value is 3. + +`--projectid` can be used to run benchmarks in a different project. If unset, the GOOGLE_CLOUD_PROJECT + environment variable is used. + +`--queryfile` can be used to override the default file which contains queries to be instrumented. + +`--table` can be used to specify a table to which benchmarking results should be streamed. The format +for this string is in BigQuery standard SQL notation without escapes, e.g. `projectid.datasetid.tableid` + +`--create_table` can be used to have the benchmarking tool create the destination table prior to streaming. + +`--tag` allows arbitrary key:value pairs to be set. This flag can be specified multiple times. + +When `--create_table` flag is set, must also specify the name of the new table using `--table`. + +### Example invocations + +Setting all the flags +``` +python benchmark.py \ + --reruns 5 \ + --projectid test_project_id \ + --table logging_project_id.querybenchmarks.measurements \ + --create_table \ + --tag source:myhostname \ + --tag somekeywithnovalue \ + --tag experiment:special_environment_thing +``` + +Or, a more realistic invocation using shell substitions: +``` +python benchmark.py \ + --reruns 5 \ + --table $BENCHMARK_TABLE \ + --tag origin:$(hostname) \ + --tag branch:$(git branch --show-current) \ + --tag latestcommit:$(git log --pretty=format:'%H' -n 1) +``` + +## Stream Results To A BigQuery Table + +When streaming benchmarking results to a BigQuery table, the table schema is as follows: +``` +[ + { + "name": "groupname", + "type": "STRING" + }, + { + "name": "name", + "type": "STRING" + }, + { + "name": "tags", + "type": "RECORD", + "mode": "REPEATED", + "fields": [ + { + "name": "key", + "type": "STRING" + }, + { + "name": "value", + "type": "STRING" + } + ] + }, + { + "name": "SQL", + "type": "STRING" + }, + { + "name": "runs", + "type": "RECORD", + "mode": "REPEATED", + "fields": [ + { + "name": "errorstring", + "type": "STRING" + }, + { + "name": "start_time", + "type": "TIMESTAMP" + }, + { + "name": "query_end_time", + "type": "TIMESTAMP" + }, + { + "name": "first_row_returned_time", + "type": "TIMESTAMP" + }, + { + "name": "all_rows_returned_time", + "type": "TIMESTAMP" + }, + { + "name": "total_rows", + "type": "INTEGER" + } + ] + }, + { + "name": "event_time", + "type": "TIMESTAMP" + } +] +``` + +The table schema is the same as the [benchmark in go](https://github.com/googleapis/google-cloud-go/tree/main/bigquery/benchmarks), +so results from both languages can be streamed to the same table. + +## BigQuery Benchmarks In Other Languages +* Go: https://github.com/googleapis/google-cloud-go/tree/main/bigquery/benchmarks +* JAVA: https://github.com/googleapis/java-bigquery/tree/main/benchmark diff --git a/packages/google-cloud-bigquery/benchmark/benchmark.py b/packages/google-cloud-bigquery/benchmark/benchmark.py index 2917f169aba1..30e294baa9a9 100644 --- a/packages/google-cloud-bigquery/benchmark/benchmark.py +++ b/packages/google-cloud-bigquery/benchmark/benchmark.py @@ -1,4 +1,4 @@ -# Copyright 2017 Google LLC +# Copyright 2023 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -12,35 +12,312 @@ # See the License for the specific language governing permissions and # limitations under the License. -from google.cloud import bigquery +"""Scripts for benchmarking BigQuery queries performance.""" + +import argparse from datetime import datetime import json -import sys - -if len(sys.argv) < 2: - raise Exception('need query file, usage: python {0} '.format(sys.argv[0])) - -with open(sys.argv[1], 'r') as f: - queries = json.loads(f.read()) - -client = bigquery.Client() - -for query in queries: - start_time = datetime.now() - job = client.query(query) - rows = job.result() - - num_rows = 0 - num_cols = None - first_byte_time = None - - for row in rows: - if num_rows == 0: - num_cols = len(row) - first_byte_time = datetime.now() - start_time - elif num_cols != len(row): - raise Exception('found {0} columsn, expected {1}'.format(len(row), num_cols)) - num_rows += 1 - total_time = datetime.now() - start_time - print("query {0}: {1} rows, {2} cols, first byte {3} sec, total {4} sec" - .format(query, num_rows, num_cols, first_byte_time.total_seconds(), total_time.total_seconds())) +import os + +from google.api_core import exceptions + +from google.cloud import bigquery + +_run_schema = [ + bigquery.SchemaField("groupname", "STRING", mode="NULLABLE"), + bigquery.SchemaField("name", "STRING", mode="NULLABLE"), + bigquery.SchemaField( + "tags", + "RECORD", + mode="REPEATED", + fields=[ + bigquery.SchemaField("key", "STRING", mode="NULLABLE"), + bigquery.SchemaField("value", "STRING", mode="NULLABLE"), + ], + ), + bigquery.SchemaField("SQL", "STRING", mode="NULLABLE"), + bigquery.SchemaField( + "runs", + "RECORD", + mode="REPEATED", + fields=[ + bigquery.SchemaField("errorstring", "STRING", mode="NULLABLE"), + bigquery.SchemaField("start_time", "TIMESTAMP", mode="NULLABLE"), + bigquery.SchemaField("query_end_time", "TIMESTAMP", mode="NULLABLE"), + bigquery.SchemaField( + "first_row_returned_time", "TIMESTAMP", mode="NULLABLE" + ), + bigquery.SchemaField( + "all_rows_returned_time", "TIMESTAMP", mode="NULLABLE" + ), + bigquery.SchemaField("total_rows", "INTEGER", mode="NULLABLE"), + ], + ), + bigquery.SchemaField("event_time", "TIMESTAMP", mode="NULLABLE"), +] + + +def _check_pos_int(value): + """Verifies the value is a positive integer.""" + ivalue = int(value) + if ivalue <= 0: + raise argparse.ArgumentTypeError( + f"Argument rerun should be positive int. Actual value: {value}" + ) + return ivalue + + +def _parse_tag(tag): + """Parses input tag into key value pair as a dict.""" + tagstring = str(tag) + key, value = tagstring.split(":") + if not key or not value: + raise argparse.ArgumentTypeError( + "key and value in tag need to be non-empty. Actual value: " + + f"key={key}, value={value}" + ) + return {"key": key, "value": value} + + +def _parse_args() -> dict: + """Parses input flags.""" + parser = argparse.ArgumentParser(description="Benchmark for BigQuery.") + + parser.add_argument( + "--reruns", + action="store", + type=_check_pos_int, + default=3, + metavar="", + help="how many times each query is run. Must be a positive integer." + + "Default 3 times", + ) + + parser.add_argument( + "--projectid", + action="store", + type=str, + metavar="", + help="run benchmarks in a different project. If unset, the " + + "GOOGLE_CLOUD_PROJECT environment variable is used", + ) + + parser.add_argument( + "--queryfile", + action="store", + type=str, + metavar="", + default="queries.json", + help="override the default file which contains queries to be instrumented", + ) + + parser.add_argument( + "--table", + action="store", + type=str, + metavar="", + help="specify a table to which benchmarking results should be " + + "streamed. The format for this string is in BigQuery standard SQL " + + "notation without escapes, e.g. projectid.datasetid.tableid", + ) + + parser.add_argument( + "--create_table", + action="store_true", + help="let the benchmarking tool create the destination table prior to" + + " streaming; if set, also need to set --table to specify table name", + ) + + parser.add_argument( + "--tag", + action="append", + type=_parse_tag, + metavar="", + help="set arbitrary key:value pairs, can be set multiple times", + ) + + args = parser.parse_args() + args_dict = vars(args) + + # Verifies that project id is set. + if not args_dict.get("projectid"): + if projectid_env := os.environ["GOOGLE_CLOUD_PROJECT"]: + args_dict["projectid"] = projectid_env + else: + raise ValueError( + "Must provide --projectid or set " + "GOOGLE_CLOUD_PROJECT environment variable" + ) + + # Verifies that table name is specified when `create_table == True`. + if args_dict.get("create_table") and not args_dict.get("table"): + raise ValueError( + "When --create_table is present, must specify table name with --table" + ) + + return args_dict + + +def _prepare_table(client, create_table: bool, table_name: str) -> str: + """Ensures a table exists, and optionally creates it if directed.""" + + # Verifies that table destination is of valid format. + parts = table_name.split(".") + if len(parts) != 3: + raise ValueError(f"Expected table in p.d.t format, got: {table_name}") + + table = bigquery.Table(table_name, schema=_run_schema) + + # Create table if create_table == True. + if create_table: + table = client.create_table(table) + print(f"Created table {table.project}.{table.dataset_id}." f"{table.table_id}") + + # Verifies that table exists. + client.get_table(table_name) + return table_name + + +def _run_query(client, query: str, rerun: int) -> list: + """Runs individual query for `rerun` times, and returns run results.""" + runs = [] + + for _ in range(rerun): + print(".", end="", flush=True) + run = {} + num_rows = 0 + num_cols = 0 + start_time = datetime.now() + first_row_time = datetime.min + end_time = datetime.min + + job = client.query(query) + query_end_time = datetime.now() + + try: + rows = job.result() + for row in rows: + if num_rows == 0: + num_cols = len(row) + first_row_time = datetime.now() + elif num_cols != len(row): + raise RuntimeError(f"found {len(row)} columns, expected {num_cols}") + num_rows += 1 + end_time = datetime.now() + except exceptions.BadRequest as exc: + run["errorstring"] = repr(exc) + + run["start_time"] = start_time.isoformat() + run["query_end_time"] = query_end_time.isoformat() + run["first_row_returned_time"] = first_row_time.isoformat() + run["all_rows_returned_time"] = end_time.isoformat() + run["total_rows"] = num_rows + runs.append(run) + + print("") + return runs + + +def _get_delta(time_str_1: str, time_str_2: str) -> str: + """Calculates delta of two ISO format time string, and return as a string.""" + time_1 = datetime.fromisoformat(time_str_1) + time_2 = datetime.fromisoformat(time_str_2) + delta = time_1 - time_2 + return str(delta) + + +def _is_datetime_min(time_str: str) -> bool: + return datetime.fromisoformat(time_str) == datetime.min + + +def _summary(run: dict) -> str: + """Coverts run dict to run summary string.""" + no_val = "NODATA" + output = ["QUERYTIME "] + + if not _is_datetime_min(run.get("query_end_time")): + output.append(f"{_get_delta(run.get('query_end_time'), run.get('start_time'))}") + else: + output.append(no_val) + output.append(" FIRSTROW ") + + if not _is_datetime_min(run.get("first_row_returned_time")): + output.append( + f"{_get_delta(run.get('first_row_returned_time'), run.get('start_time'))}" + ) + else: + output.append(no_val) + output += " ALLROWS " + + if not _is_datetime_min(run.get("all_rows_returned_time")): + output.append( + f"{_get_delta(run.get('all_rows_returned_time'), run.get('start_time'))}" + ) + else: + output.append(no_val) + + if run.get("total_rows"): + output.append(f" ROWS {run.get('total_rows')}") + if run.get("errorstring"): + output.append(f" ERRORED {run.get('errorstring')}") + + return "".join(output) + + +def _print_results(profiles: list): + for i, prof in enumerate(profiles): + print(f"{i+1}: ({prof['groupname']}:{prof['name']})") + print(f"SQL: {prof['SQL']}") + print("MEASUREMENTS") + for j, run in enumerate(prof["runs"]): + print(f"\t\t({j}) {_summary(run)}") + + +def _run_benchmarks(args: dict) -> list: + client = bigquery.Client() + + # If we're going to stream results, let's make sure we can do that + # before running all the tests. + table_id = "" + if args.get("create_table") or args.get("table"): + table_id = _prepare_table(client, args.get("create_table"), args.get("table")) + + queries_file = args.get("queryfile") + with open(queries_file, "r") as f: + groups = json.loads(f.read()) + + measure_start = datetime.now() + profiles = [] + for group_name, group in groups.items(): + for name, query in group.items(): + print(f"Measuring {group_name} : {name}", end="", flush=True) + event_time = datetime.now() + runs = _run_query(client, query, args.get("reruns")) + + profile = {} + profile["groupname"] = group_name + profile["name"] = name + profile["tags"] = args.get("tag") or [] + profile["SQL"] = query + profile["runs"] = runs + profile["event_time"] = event_time.isoformat() + profiles.append(profile) + + measure_end = datetime.now() + print(f"Measurement time: {str(measure_end-measure_start)}") + + # Stream benchmarking results to table, if required. + if table_id: + print(f"Streaming test results to table {table_id}...") + errors = client.insert_rows_json(table_id, profiles) + if errors: + raise RuntimeError(f"Cannot upload queries profiles: {errors}") + print("Streaming complete.") + + return profiles + + +if __name__ == "__main__": + args = _parse_args() + profiles = _run_benchmarks(args) + _print_results(profiles) diff --git a/packages/google-cloud-bigquery/benchmark/queries.json b/packages/google-cloud-bigquery/benchmark/queries.json index 13fed38b52b3..464395619a63 100644 --- a/packages/google-cloud-bigquery/benchmark/queries.json +++ b/packages/google-cloud-bigquery/benchmark/queries.json @@ -1,10 +1,16 @@ -[ - "SELECT * FROM `nyc-tlc.yellow.trips` LIMIT 10000", - "SELECT * FROM `nyc-tlc.yellow.trips` LIMIT 100000", - "SELECT * FROM `nyc-tlc.yellow.trips` LIMIT 1000000", - "SELECT title FROM `bigquery-public-data.samples.wikipedia` ORDER BY title LIMIT 1000", - "SELECT title, id, timestamp, contributor_ip FROM `bigquery-public-data.samples.wikipedia` WHERE title like 'Blo%' ORDER BY id", - "SELECT * FROM `bigquery-public-data.baseball.games_post_wide` ORDER BY gameId", - "SELECT * FROM `bigquery-public-data.samples.github_nested` WHERE repository.has_downloads ORDER BY repository.created_at LIMIT 10000", - "SELECT repo_name, path FROM `bigquery-public-data.github_repos.files` WHERE path LIKE '%.java' ORDER BY id LIMIT 1000000" -] +{ + "simple-cacheable": { + "nycyellow-limit1k":"SELECT * FROM `nyc-tlc.yellow.trips` LIMIT 1000", + "nycyellow-limit10k":"SELECT * FROM `nyc-tlc.yellow.trips` LIMIT 10000", + "nycyellow-limit100k":"SELECT * FROM `nyc-tlc.yellow.trips` LIMIT 100000", + "wikisamples-ordered-limit1k":"SELECT title FROM `bigquery-public-data.samples.wikipedia` ORDER BY title LIMIT 1000" + }, + "simple-nondeterministic": { + "current-timestamp":"SELECT CURRENT_TIMESTAMP() as ts", + "session-user": "SELECT SESSION_USER() as ts", + "literals": "SELECT 1 as i, 3.14 as pi" + }, + "simple-invalid": { + "invalid-query": "invalid sql here" + } +} diff --git a/packages/google-cloud-bigquery/noxfile.py b/packages/google-cloud-bigquery/noxfile.py index ba06f925d606..4ddd4eaafd7b 100644 --- a/packages/google-cloud-bigquery/noxfile.py +++ b/packages/google-cloud-bigquery/noxfile.py @@ -26,6 +26,7 @@ PYTYPE_VERSION = "pytype==2021.4.9" BLACK_VERSION = "black==23.7.0" BLACK_PATHS = ( + "benchmark", "docs", "google", "samples", @@ -381,6 +382,7 @@ def lint(session): session.run("flake8", "tests") session.run("flake8", os.path.join("docs", "samples")) session.run("flake8", os.path.join("docs", "snippets.py")) + session.run("flake8", "benchmark") session.run("black", "--check", *BLACK_PATHS) From 2aeb685419f52afb178438cb67bf826f6fdf02ad Mon Sep 17 00:00:00 2001 From: Lingqing Gan Date: Thu, 26 Oct 2023 15:46:06 -0400 Subject: [PATCH 1658/2016] chore: refactor BigQueryStorageVersions (#1699) * chore: refactor BigQueryStorageVersions * address comments in #1680 * add unit test --- .../google/cloud/bigquery/_helpers.py | 65 ---------- .../google/cloud/bigquery/_pandas_helpers.py | 3 +- .../cloud/bigquery/_versions_helpers.py | 81 +++++++++++- .../google/cloud/bigquery/client.py | 42 +++--- .../google/cloud/bigquery/exceptions.py | 6 + .../google/cloud/bigquery/magics/magics.py | 27 +++- .../google/cloud/bigquery/table.py | 20 ++- .../tests/unit/test__helpers.py | 73 ----------- .../tests/unit/test__pandas_helpers.py | 18 ++- .../tests/unit/test__versions_helpers.py | 121 +++++++++++++++++- .../tests/unit/test_client.py | 4 +- .../tests/unit/test_magics.py | 16 ++- .../tests/unit/test_table.py | 6 +- 13 files changed, 286 insertions(+), 196 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py b/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py index 488766853051..684cbfc12003 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py @@ -30,10 +30,6 @@ from google.cloud._helpers import _RFC3339_NO_FRACTION from google.cloud._helpers import _to_bytes -import packaging.version - -from google.cloud.bigquery import exceptions - _RFC3339_MICROS_NO_ZULU = "%Y-%m-%dT%H:%M:%S.%f" _TIMEONLY_WO_MICROS = "%H:%M:%S" _TIMEONLY_W_MICROS = "%H:%M:%S.%f" @@ -52,10 +48,6 @@ r"(?P-?)(?P\d+):(?P\d+):(?P\d+)\.?(?P\d*)?$" ) -_MIN_BQ_STORAGE_VERSION = packaging.version.Version("2.0.0") - -_BQ_STORAGE_OPTIONAL_READ_SESSION_VERSION = packaging.version.Version("2.6.0") - BIGQUERY_EMULATOR_HOST = "BIGQUERY_EMULATOR_HOST" """Environment variable defining host for emulator.""" @@ -67,63 +59,6 @@ def _get_bigquery_host(): return os.environ.get(BIGQUERY_EMULATOR_HOST, _DEFAULT_HOST) -class BQStorageVersions: - """Version comparisons for google-cloud-bigqueyr-storage package.""" - - def __init__(self): - self._installed_version = None - - @property - def installed_version(self) -> packaging.version.Version: - """Return the parsed version of google-cloud-bigquery-storage.""" - if self._installed_version is None: - from google.cloud import bigquery_storage - - self._installed_version = packaging.version.parse( - # Use 0.0.0, since it is earlier than any released version. - # Legacy versions also have the same property, but - # creating a LegacyVersion has been deprecated. - # https://github.com/pypa/packaging/issues/321 - getattr(bigquery_storage, "__version__", "0.0.0") - ) - - return self._installed_version # type: ignore - - @property - def is_read_session_optional(self) -> bool: - """True if read_session is optional to rows(). - - See: https://github.com/googleapis/python-bigquery-storage/pull/228 - """ - return self.installed_version >= _BQ_STORAGE_OPTIONAL_READ_SESSION_VERSION - - def verify_version(self): - """Verify that a recent enough version of BigQuery Storage extra is - installed. - - The function assumes that google-cloud-bigquery-storage extra is - installed, and should thus be used in places where this assumption - holds. - - Because `pip` can install an outdated version of this extra despite the - constraints in `setup.py`, the calling code can use this helper to - verify the version compatibility at runtime. - - Raises: - exceptions.LegacyBigQueryStorageError: - If the google-cloud-bigquery-storage package is outdated. - """ - if self.installed_version < _MIN_BQ_STORAGE_VERSION: - msg = ( - "Dependency google-cloud-bigquery-storage is outdated, please upgrade " - f"it to version >= {_MIN_BQ_STORAGE_VERSION} (version found: {self.installed_version})." - ) - raise exceptions.LegacyBigQueryStorageError(msg) - - -BQ_STORAGE_VERSIONS = BQStorageVersions() - - def _not_null(value, field): """Check whether 'value' should be coerced to 'field' type.""" return value is not None or (field is not None and field.mode != "NULLABLE") diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/_pandas_helpers.py b/packages/google-cloud-bigquery/google/cloud/bigquery/_pandas_helpers.py index ea790d6c9393..53db9511c058 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/_pandas_helpers.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/_pandas_helpers.py @@ -23,7 +23,6 @@ import warnings from typing import Any, Union -from google.cloud.bigquery import _helpers from google.cloud.bigquery import _pyarrow_helpers from google.cloud.bigquery import _versions_helpers from google.cloud.bigquery import schema @@ -745,7 +744,7 @@ def _download_table_bqstorage_stream( # Avoid deprecation warnings for passing in unnecessary read session. # https://github.com/googleapis/python-bigquery-storage/issues/229 - if _helpers.BQ_STORAGE_VERSIONS.is_read_session_optional: + if _versions_helpers.BQ_STORAGE_VERSIONS.is_read_session_optional: rowstream = reader.rows() else: rowstream = reader.rows(session) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/_versions_helpers.py b/packages/google-cloud-bigquery/google/cloud/bigquery/_versions_helpers.py index 1f04c74e0a99..ce529b76e473 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/_versions_helpers.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/_versions_helpers.py @@ -22,6 +22,8 @@ _MIN_PYARROW_VERSION = packaging.version.Version("3.0.0") +_MIN_BQ_STORAGE_VERSION = packaging.version.Version("2.0.0") +_BQ_STORAGE_OPTIONAL_READ_SESSION_VERSION = packaging.version.Version("2.6.0") class PyarrowVersions: @@ -51,7 +53,7 @@ def use_compliant_nested_type(self) -> bool: return self.installed_version.major >= 4 def try_import(self, raise_if_error: bool = False) -> Any: - """Verify that a recent enough version of pyarrow extra is installed. + """Verifies that a recent enough version of pyarrow extra is installed. The function assumes that pyarrow extra is installed, and should thus be used in places where this assumption holds. @@ -92,3 +94,80 @@ def try_import(self, raise_if_error: bool = False) -> Any: PYARROW_VERSIONS = PyarrowVersions() + + +class BQStorageVersions: + """Version comparisons for google-cloud-bigqueyr-storage package.""" + + def __init__(self): + self._installed_version = None + + @property + def installed_version(self) -> packaging.version.Version: + """Return the parsed version of google-cloud-bigquery-storage.""" + if self._installed_version is None: + from google.cloud import bigquery_storage + + self._installed_version = packaging.version.parse( + # Use 0.0.0, since it is earlier than any released version. + # Legacy versions also have the same property, but + # creating a LegacyVersion has been deprecated. + # https://github.com/pypa/packaging/issues/321 + getattr(bigquery_storage, "__version__", "0.0.0") + ) + + return self._installed_version # type: ignore + + @property + def is_read_session_optional(self) -> bool: + """True if read_session is optional to rows(). + + See: https://github.com/googleapis/python-bigquery-storage/pull/228 + """ + return self.installed_version >= _BQ_STORAGE_OPTIONAL_READ_SESSION_VERSION + + def try_import(self, raise_if_error: bool = False) -> Any: + """Tries to import the bigquery_storage module, and returns results + accordingly. It also verifies the module version is recent enough. + + If the import succeeds, returns the ``bigquery_storage`` module. + + If the import fails, + returns ``None`` when ``raise_if_error == False``, + raises Error when ``raise_if_error == True``. + + Returns: + The ``bigquery_storage`` module or ``None``. + + Raises: + exceptions.BigQueryStorageNotFoundError: + If google-cloud-bigquery-storage is not installed + exceptions.LegacyBigQueryStorageError: + If google-cloud-bigquery-storage package is outdated + """ + try: + from google.cloud import bigquery_storage # type: ignore + except ImportError: + if raise_if_error: + msg = ( + "Package google-cloud-bigquery-storage not found. " + "Install google-cloud-bigquery-storage version >= " + f"{_MIN_BQ_STORAGE_VERSION}." + ) + raise exceptions.BigQueryStorageNotFoundError(msg) + return None + + if self.installed_version < _MIN_BQ_STORAGE_VERSION: + if raise_if_error: + msg = ( + "Dependency google-cloud-bigquery-storage is outdated, " + f"please upgrade it to version >= {_MIN_BQ_STORAGE_VERSION} " + f"(version found: {self.installed_version})." + ) + raise exceptions.LegacyBigQueryStorageError(msg) + return None + + return bigquery_storage + + +BQ_STORAGE_VERSIONS = BQStorageVersions() diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py index ed75215b6b3e..e17d6b8daa9d 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py @@ -65,26 +65,25 @@ DEFAULT_BQSTORAGE_CLIENT_INFO = None # type: ignore +from google.cloud.bigquery._http import Connection from google.cloud.bigquery import _job_helpers -from google.cloud.bigquery._job_helpers import make_job_id as _make_job_id +from google.cloud.bigquery import _pandas_helpers +from google.cloud.bigquery import _versions_helpers +from google.cloud.bigquery import enums +from google.cloud.bigquery import exceptions as bq_exceptions +from google.cloud.bigquery import job from google.cloud.bigquery._helpers import _get_sub_prop from google.cloud.bigquery._helpers import _record_field_to_json from google.cloud.bigquery._helpers import _str_or_none from google.cloud.bigquery._helpers import _verify_job_config_type from google.cloud.bigquery._helpers import _get_bigquery_host -from google.cloud.bigquery._helpers import BQ_STORAGE_VERSIONS from google.cloud.bigquery._helpers import _DEFAULT_HOST -from google.cloud.bigquery._http import Connection -from google.cloud.bigquery import _pandas_helpers -from google.cloud.bigquery import _versions_helpers +from google.cloud.bigquery._job_helpers import make_job_id as _make_job_id from google.cloud.bigquery.dataset import Dataset from google.cloud.bigquery.dataset import DatasetListItem from google.cloud.bigquery.dataset import DatasetReference -from google.cloud.bigquery import enums from google.cloud.bigquery.enums import AutoRowIDs -from google.cloud.bigquery import exceptions as bq_exceptions -from google.cloud.bigquery.opentelemetry_tracing import create_span -from google.cloud.bigquery import job +from google.cloud.bigquery.format_options import ParquetOptions from google.cloud.bigquery.job import ( CopyJob, CopyJobConfig, @@ -98,6 +97,7 @@ from google.cloud.bigquery.model import Model from google.cloud.bigquery.model import ModelReference from google.cloud.bigquery.model import _model_arg_to_model_ref +from google.cloud.bigquery.opentelemetry_tracing import create_span from google.cloud.bigquery.query import _QueryResults from google.cloud.bigquery.retry import ( DEFAULT_JOB_RETRY, @@ -113,7 +113,6 @@ from google.cloud.bigquery.table import TableListItem from google.cloud.bigquery.table import TableReference from google.cloud.bigquery.table import RowIterator -from google.cloud.bigquery.format_options import ParquetOptions pyarrow = _versions_helpers.PYARROW_VERSIONS.try_import() @@ -545,29 +544,32 @@ def _ensure_bqstorage_client( An existing BigQuery Storage client instance. If ``None``, a new instance is created and returned. client_options: - Custom options used with a new BigQuery Storage client instance if one - is created. + Custom options used with a new BigQuery Storage client instance + if one is created. client_info: - The client info used with a new BigQuery Storage client instance if one - is created. + The client info used with a new BigQuery Storage client + instance if one is created. Returns: A BigQuery Storage API client. """ + try: - from google.cloud import bigquery_storage # type: ignore - except ImportError: + bigquery_storage = _versions_helpers.BQ_STORAGE_VERSIONS.try_import( + raise_if_error=True + ) + except bq_exceptions.BigQueryStorageNotFoundError: warnings.warn( "Cannot create BigQuery Storage client, the dependency " "google-cloud-bigquery-storage is not installed." ) return None - - try: - BQ_STORAGE_VERSIONS.verify_version() except bq_exceptions.LegacyBigQueryStorageError as exc: - warnings.warn(str(exc)) + warnings.warn( + "Dependency google-cloud-bigquery-storage is outdated: " + str(exc) + ) return None + if bqstorage_client is None: bqstorage_client = bigquery_storage.BigQueryReadClient( credentials=self._credentials, diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/exceptions.py b/packages/google-cloud-bigquery/google/cloud/bigquery/exceptions.py index 2bab97fea93e..e94a6c832aa3 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/exceptions.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/exceptions.py @@ -23,3 +23,9 @@ class LegacyBigQueryStorageError(BigQueryError): class LegacyPyarrowError(BigQueryError): """Raised when too old a version of pyarrow package is detected at runtime.""" + + +class BigQueryStorageNotFoundError(BigQueryError): + """Raised when BigQuery Storage extra is not installed when trying to + import it. + """ diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/magics/magics.py b/packages/google-cloud-bigquery/google/cloud/bigquery/magics/magics.py index f92f7754142b..2a3583c66af2 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/magics/magics.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/magics/magics.py @@ -104,6 +104,8 @@ import google.auth # type: ignore from google.cloud import bigquery import google.cloud.bigquery.dataset +from google.cloud.bigquery import _versions_helpers +from google.cloud.bigquery import exceptions from google.cloud.bigquery.dbapi import _helpers from google.cloud.bigquery.magics import line_arg_parser as lap @@ -744,12 +746,31 @@ def _split_args_line(line): def _make_bqstorage_client(client, use_bqstorage_api, client_options): + """Creates a BigQuery Storage client. + + Args: + client (:class:`~google.cloud.bigquery.client.Client`): BigQuery client. + use_bqstorage_api (bool): whether BigQuery Storage API is used or not. + client_options (:class:`google.api_core.client_options.ClientOptions`): + Custom options used with a new BigQuery Storage client instance + if one is created. + + Raises: + ImportError: if google-cloud-bigquery-storage is not installed, or + grpcio package is not installed. + + + Returns: + None: if ``use_bqstorage_api == False``, or google-cloud-bigquery-storage + is outdated. + BigQuery Storage Client: + """ if not use_bqstorage_api: return None try: - from google.cloud import bigquery_storage # type: ignore # noqa: F401 - except ImportError as err: + _versions_helpers.BQ_STORAGE_VERSIONS.try_import(raise_if_error=True) + except exceptions.BigQueryStorageNotFoundError as err: customized_error = ImportError( "The default BigQuery Storage API client cannot be used, install " "the missing google-cloud-bigquery-storage and pyarrow packages " @@ -757,6 +778,8 @@ def _make_bqstorage_client(client, use_bqstorage_api, client_options): "the --use_rest_api magic option." ) raise customized_error from err + except exceptions.LegacyBigQueryStorageError: + pass try: from google.api_core.gapic_v1 import client_info as gapic_client_info diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py index a967a179539d..633043322c82 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py @@ -60,14 +60,15 @@ import google.cloud._helpers # type: ignore from google.cloud.bigquery import _helpers from google.cloud.bigquery import _pandas_helpers +from google.cloud.bigquery import _versions_helpers +from google.cloud.bigquery import exceptions as bq_exceptions +from google.cloud.bigquery._tqdm_helpers import get_progress_bar +from google.cloud.bigquery.encryption_configuration import EncryptionConfiguration from google.cloud.bigquery.enums import DefaultPandasDTypes -from google.cloud.bigquery import exceptions +from google.cloud.bigquery.external_config import ExternalConfig from google.cloud.bigquery.schema import _build_schema_resource from google.cloud.bigquery.schema import _parse_schema_resource from google.cloud.bigquery.schema import _to_schema_fields -from google.cloud.bigquery._tqdm_helpers import get_progress_bar -from google.cloud.bigquery.external_config import ExternalConfig -from google.cloud.bigquery.encryption_configuration import EncryptionConfiguration if typing.TYPE_CHECKING: # pragma: NO COVER # Unconditionally import optional dependencies again to tell pytype that @@ -1593,7 +1594,7 @@ def _is_completely_cached(self): return self._first_page_response.get(self._next_token) is None def _validate_bqstorage(self, bqstorage_client, create_bqstorage_client): - """Returns if the BigQuery Storage API can be used. + """Returns True if the BigQuery Storage API can be used. Returns: bool @@ -1610,13 +1611,10 @@ def _validate_bqstorage(self, bqstorage_client, create_bqstorage_client): return False try: - from google.cloud import bigquery_storage # noqa: F401 - except ImportError: + _versions_helpers.BQ_STORAGE_VERSIONS.try_import(raise_if_error=True) + except bq_exceptions.BigQueryStorageNotFoundError: return False - - try: - _helpers.BQ_STORAGE_VERSIONS.verify_version() - except exceptions.LegacyBigQueryStorageError as exc: + except bq_exceptions.LegacyBigQueryStorageError as exc: warnings.warn(str(exc)) return False diff --git a/packages/google-cloud-bigquery/tests/unit/test__helpers.py b/packages/google-cloud-bigquery/tests/unit/test__helpers.py index 40223f0410e7..e2e2da3c81df 100644 --- a/packages/google-cloud-bigquery/tests/unit/test__helpers.py +++ b/packages/google-cloud-bigquery/tests/unit/test__helpers.py @@ -19,79 +19,6 @@ import mock -from google.cloud.bigquery import exceptions - -try: - from google.cloud import bigquery_storage # type: ignore -except ImportError: # pragma: NO COVER - bigquery_storage = None - - -@unittest.skipIf(bigquery_storage is None, "Requires `google-cloud-bigquery-storage`") -class TestBQStorageVersions(unittest.TestCase): - def tearDown(self): - from google.cloud.bigquery import _helpers - - # Reset any cached versions since it may not match reality. - _helpers.BQ_STORAGE_VERSIONS._installed_version = None - - def _object_under_test(self): - from google.cloud.bigquery import _helpers - - return _helpers.BQStorageVersions() - - def _call_fut(self): - from google.cloud.bigquery import _helpers - - _helpers.BQ_STORAGE_VERSIONS._installed_version = None - return _helpers.BQ_STORAGE_VERSIONS.verify_version() - - def test_raises_no_error_w_recent_bqstorage(self): - with mock.patch("google.cloud.bigquery_storage.__version__", new="2.0.0"): - try: - self._call_fut() - except exceptions.LegacyBigQueryStorageError: # pragma: NO COVER - self.fail("Legacy error raised with a non-legacy dependency version.") - - def test_raises_error_w_legacy_bqstorage(self): - with mock.patch("google.cloud.bigquery_storage.__version__", new="1.9.9"): - with self.assertRaises(exceptions.LegacyBigQueryStorageError): - self._call_fut() - - def test_raises_error_w_unknown_bqstorage_version(self): - with mock.patch("google.cloud.bigquery_storage", autospec=True) as fake_module: - del fake_module.__version__ - error_pattern = r"version found: 0.0.0" - with self.assertRaisesRegex( - exceptions.LegacyBigQueryStorageError, error_pattern - ): - self._call_fut() - - def test_installed_version_returns_cached(self): - versions = self._object_under_test() - versions._installed_version = object() - assert versions.installed_version is versions._installed_version - - def test_installed_version_returns_parsed_version(self): - versions = self._object_under_test() - - with mock.patch("google.cloud.bigquery_storage.__version__", new="1.2.3"): - version = versions.installed_version - - assert version.major == 1 - assert version.minor == 2 - assert version.micro == 3 - - def test_is_read_session_optional_true(self): - versions = self._object_under_test() - with mock.patch("google.cloud.bigquery_storage.__version__", new="2.6.0"): - assert versions.is_read_session_optional - - def test_is_read_session_optional_false(self): - versions = self._object_under_test() - with mock.patch("google.cloud.bigquery_storage.__version__", new="2.5.0"): - assert not versions.is_read_session_optional - class Test_not_null(unittest.TestCase): def _call_fut(self, value, field): diff --git a/packages/google-cloud-bigquery/tests/unit/test__pandas_helpers.py b/packages/google-cloud-bigquery/tests/unit/test__pandas_helpers.py index 7724f308baf9..212a6f1dde35 100644 --- a/packages/google-cloud-bigquery/tests/unit/test__pandas_helpers.py +++ b/packages/google-cloud-bigquery/tests/unit/test__pandas_helpers.py @@ -40,7 +40,6 @@ from google import api_core from google.cloud.bigquery import exceptions -from google.cloud.bigquery import _helpers from google.cloud.bigquery import _pyarrow_helpers from google.cloud.bigquery import _versions_helpers from google.cloud.bigquery import schema @@ -56,12 +55,7 @@ # used in test parameterization. pyarrow = mock.Mock() -try: - from google.cloud import bigquery_storage - - _helpers.BQ_STORAGE_VERSIONS.verify_version() -except ImportError: # pragma: NO COVER - bigquery_storage = None +bigquery_storage = _versions_helpers.BQ_STORAGE_VERSIONS.try_import() PANDAS_MINIUM_VERSION = pkg_resources.parse_version("1.0.0") @@ -1616,7 +1610,9 @@ def test__download_table_bqstorage_stream_includes_read_session( import google.cloud.bigquery_storage_v1.reader import google.cloud.bigquery_storage_v1.types - monkeypatch.setattr(_helpers.BQ_STORAGE_VERSIONS, "_installed_version", None) + monkeypatch.setattr( + _versions_helpers.BQ_STORAGE_VERSIONS, "_installed_version", None + ) monkeypatch.setattr(bigquery_storage, "__version__", "2.5.0") bqstorage_client = mock.create_autospec( bigquery_storage.BigQueryReadClient, instance=True @@ -1641,7 +1637,7 @@ def test__download_table_bqstorage_stream_includes_read_session( @pytest.mark.skipif( bigquery_storage is None - or not _helpers.BQ_STORAGE_VERSIONS.is_read_session_optional, + or not _versions_helpers.BQ_STORAGE_VERSIONS.is_read_session_optional, reason="Requires `google-cloud-bigquery-storage` >= 2.6.0", ) def test__download_table_bqstorage_stream_omits_read_session( @@ -1650,7 +1646,9 @@ def test__download_table_bqstorage_stream_omits_read_session( import google.cloud.bigquery_storage_v1.reader import google.cloud.bigquery_storage_v1.types - monkeypatch.setattr(_helpers.BQ_STORAGE_VERSIONS, "_installed_version", None) + monkeypatch.setattr( + _versions_helpers.BQ_STORAGE_VERSIONS, "_installed_version", None + ) monkeypatch.setattr(bigquery_storage, "__version__", "2.6.0") bqstorage_client = mock.create_autospec( bigquery_storage.BigQueryReadClient, instance=True diff --git a/packages/google-cloud-bigquery/tests/unit/test__versions_helpers.py b/packages/google-cloud-bigquery/tests/unit/test__versions_helpers.py index 21386610bb12..144f14b7c0c3 100644 --- a/packages/google-cloud-bigquery/tests/unit/test__versions_helpers.py +++ b/packages/google-cloud-bigquery/tests/unit/test__versions_helpers.py @@ -16,12 +16,21 @@ import mock +try: + import pyarrow # type: ignore +except ImportError: # pragma: NO COVER + pyarrow = None + +try: + from google.cloud import bigquery_storage # type: ignore +except ImportError: # pragma: NO COVER + bigquery_storage = None + from google.cloud.bigquery import _versions_helpers from google.cloud.bigquery import exceptions -pyarrow = pytest.importorskip("pyarrow") - +@pytest.mark.skipif(pyarrow is None, reason="pyarrow is not installed") def test_try_import_raises_no_error_w_recent_pyarrow(): versions = _versions_helpers.PyarrowVersions() with mock.patch("pyarrow.__version__", new="5.0.0"): @@ -32,6 +41,7 @@ def test_try_import_raises_no_error_w_recent_pyarrow(): raise ("Legacy error raised with a non-legacy dependency version.") +@pytest.mark.skipif(pyarrow is None, reason="pyarrow is not installed") def test_try_import_returns_none_w_legacy_pyarrow(): versions = _versions_helpers.PyarrowVersions() with mock.patch("pyarrow.__version__", new="2.0.0"): @@ -39,6 +49,7 @@ def test_try_import_returns_none_w_legacy_pyarrow(): assert pyarrow is None +@pytest.mark.skipif(pyarrow is None, reason="pyarrow is not installed") def test_try_import_raises_error_w_legacy_pyarrow(): versions = _versions_helpers.PyarrowVersions() with mock.patch("pyarrow.__version__", new="2.0.0"): @@ -46,13 +57,15 @@ def test_try_import_raises_error_w_legacy_pyarrow(): versions.try_import(raise_if_error=True) -def test_installed_version_returns_cached(): +@pytest.mark.skipif(pyarrow is None, reason="pyarrow is not installed") +def test_installed_pyarrow_version_returns_cached(): versions = _versions_helpers.PyarrowVersions() versions._installed_version = object() assert versions.installed_version is versions._installed_version -def test_installed_version_returns_parsed_version(): +@pytest.mark.skipif(pyarrow is None, reason="pyarrow is not installed") +def test_installed_pyarrow_version_returns_parsed_version(): versions = _versions_helpers.PyarrowVersions() with mock.patch("pyarrow.__version__", new="1.2.3"): version = versions.installed_version @@ -60,3 +73,103 @@ def test_installed_version_returns_parsed_version(): assert version.major == 1 assert version.minor == 2 assert version.micro == 3 + + +@pytest.mark.skipif( + bigquery_storage is None, reason="Requires `google-cloud-bigquery-storage`" +) +def test_raises_no_error_w_recent_bqstorage(): + with mock.patch("google.cloud.bigquery_storage.__version__", new="2.0.0"): + try: + bqstorage_versions = _versions_helpers.BQStorageVersions() + bqstorage_versions.try_import(raise_if_error=True) + except exceptions.LegacyBigQueryStorageError: # pragma: NO COVER + raise ("Legacy error raised with a non-legacy dependency version.") + + +@pytest.mark.skipif( + bigquery_storage is None, reason="Requires `google-cloud-bigquery-storage`" +) +def test_raises_error_w_legacy_bqstorage(): + with mock.patch("google.cloud.bigquery_storage.__version__", new="1.9.9"): + with pytest.raises(exceptions.LegacyBigQueryStorageError): + bqstorage_versions = _versions_helpers.BQStorageVersions() + bqstorage_versions.try_import(raise_if_error=True) + + +@pytest.mark.skipif( + bigquery_storage is None, reason="Requires `google-cloud-bigquery-storage`" +) +def test_returns_none_with_legacy_bqstorage(): + with mock.patch("google.cloud.bigquery_storage.__version__", new="1.9.9"): + try: + bqstorage_versions = _versions_helpers.BQStorageVersions() + bq_storage = bqstorage_versions.try_import() + except exceptions.LegacyBigQueryStorageError: # pragma: NO COVER + raise ("Legacy error raised when raise_if_error == False.") + assert bq_storage is None + + +@pytest.mark.skipif( + bigquery_storage is not None, + reason="Tests behavior when `google-cloud-bigquery-storage` isn't installed", +) +def test_returns_none_with_bqstorage_uninstalled(): + try: + bqstorage_versions = _versions_helpers.BQStorageVersions() + bq_storage = bqstorage_versions.try_import() + except exceptions.LegacyBigQueryStorageError: # pragma: NO COVER + raise ("NotFound error raised when raise_if_error == False.") + assert bq_storage is None + + +@pytest.mark.skipif( + bigquery_storage is None, reason="Requires `google-cloud-bigquery-storage`" +) +def test_raises_error_w_unknown_bqstorage_version(): + with mock.patch("google.cloud.bigquery_storage", autospec=True) as fake_module: + del fake_module.__version__ + error_pattern = r"version found: 0.0.0" + with pytest.raises(exceptions.LegacyBigQueryStorageError, match=error_pattern): + bqstorage_versions = _versions_helpers.BQStorageVersions() + bqstorage_versions.try_import(raise_if_error=True) + + +@pytest.mark.skipif( + bigquery_storage is None, reason="Requires `google-cloud-bigquery-storage`" +) +def test_installed_bqstorage_version_returns_cached(): + bqstorage_versions = _versions_helpers.BQStorageVersions() + bqstorage_versions._installed_version = object() + assert bqstorage_versions.installed_version is bqstorage_versions._installed_version + + +@pytest.mark.skipif( + bigquery_storage is None, reason="Requires `google-cloud-bigquery-storage`" +) +def test_installed_bqstorage_version_returns_parsed_version(): + bqstorage_versions = _versions_helpers.BQStorageVersions() + with mock.patch("google.cloud.bigquery_storage.__version__", new="1.2.3"): + bqstorage_versions = bqstorage_versions.installed_version + + assert bqstorage_versions.major == 1 + assert bqstorage_versions.minor == 2 + assert bqstorage_versions.micro == 3 + + +@pytest.mark.skipif( + bigquery_storage is None, reason="Requires `google-cloud-bigquery-storage`" +) +def test_bqstorage_is_read_session_optional_true(): + bqstorage_versions = _versions_helpers.BQStorageVersions() + with mock.patch("google.cloud.bigquery_storage.__version__", new="2.6.0"): + assert bqstorage_versions.is_read_session_optional + + +@pytest.mark.skipif( + bigquery_storage is None, reason="Requires `google-cloud-bigquery-storage`" +) +def test_bqstorage_is_read_session_optional_false(): + bqstorage_versions = _versions_helpers.BQStorageVersions() + with mock.patch("google.cloud.bigquery_storage.__version__", new="2.5.0"): + assert not bqstorage_versions.is_read_session_optional diff --git a/packages/google-cloud-bigquery/tests/unit/test_client.py b/packages/google-cloud-bigquery/tests/unit/test_client.py index 3143f2123aea..d470bd9fd033 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_client.py +++ b/packages/google-cloud-bigquery/tests/unit/test_client.py @@ -826,7 +826,7 @@ def test_ensure_bqstorage_client_obsolete_dependency(self): client = self._make_one(project=self.PROJECT, credentials=creds) patcher = mock.patch( - "google.cloud.bigquery.client.BQ_STORAGE_VERSIONS.verify_version", + "google.cloud.bigquery.client._versions_helpers.BQ_STORAGE_VERSIONS.try_import", side_effect=exceptions.LegacyBigQueryStorageError("BQ Storage too old"), ) with patcher, warnings.catch_warnings(record=True) as warned: @@ -861,7 +861,7 @@ def test_ensure_bqstorage_client_existing_client_check_fails(self): mock_storage_client = mock.sentinel.mock_storage_client patcher = mock.patch( - "google.cloud.bigquery.client.BQ_STORAGE_VERSIONS.verify_version", + "google.cloud.bigquery.client._versions_helpers.BQ_STORAGE_VERSIONS.try_import", side_effect=exceptions.LegacyBigQueryStorageError("BQ Storage too old"), ) with patcher, warnings.catch_warnings(record=True) as warned: diff --git a/packages/google-cloud-bigquery/tests/unit/test_magics.py b/packages/google-cloud-bigquery/tests/unit/test_magics.py index 0cab943f7071..b03894095139 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_magics.py +++ b/packages/google-cloud-bigquery/tests/unit/test_magics.py @@ -339,6 +339,9 @@ def test__make_bqstorage_client_true(): def test__make_bqstorage_client_true_raises_import_error(missing_bq_storage): + """When package `google-cloud-bigquery-storage` is not installed, reports + ImportError. + """ credentials_mock = mock.create_autospec( google.auth.credentials.Credentials, instance=True ) @@ -358,6 +361,9 @@ def test__make_bqstorage_client_true_raises_import_error(missing_bq_storage): bigquery_storage is None, reason="Requires `google-cloud-bigquery-storage`" ) def test__make_bqstorage_client_true_obsolete_dependency(): + """When package `google-cloud-bigquery-storage` is installed but has outdated + version, returns None, and raises a warning. + """ credentials_mock = mock.create_autospec( google.auth.credentials.Credentials, instance=True ) @@ -366,8 +372,10 @@ def test__make_bqstorage_client_true_obsolete_dependency(): ) patcher = mock.patch( - "google.cloud.bigquery.client.BQ_STORAGE_VERSIONS.verify_version", - side_effect=bq_exceptions.LegacyBigQueryStorageError("BQ Storage too old"), + "google.cloud.bigquery._versions_helpers.BQ_STORAGE_VERSIONS.try_import", + side_effect=bq_exceptions.LegacyBigQueryStorageError( + "google-cloud-bigquery-storage is outdated" + ), ) with patcher, warnings.catch_warnings(record=True) as warned: got = magics._make_bqstorage_client(test_client, True, {}) @@ -375,7 +383,9 @@ def test__make_bqstorage_client_true_obsolete_dependency(): assert got is None matching_warnings = [ - warning for warning in warned if "BQ Storage too old" in str(warning) + warning + for warning in warned + if "google-cloud-bigquery-storage is outdated" in str(warning) ] assert matching_warnings, "Obsolete dependency warning not raised." diff --git a/packages/google-cloud-bigquery/tests/unit/test_table.py b/packages/google-cloud-bigquery/tests/unit/test_table.py index 65eb659bf6e9..fa2f30cea539 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_table.py +++ b/packages/google-cloud-bigquery/tests/unit/test_table.py @@ -2260,7 +2260,7 @@ def test__validate_bqstorage_returns_false_w_warning_if_obsolete_version(self): iterator = self._make_one(first_page_response=None) # not cached patcher = mock.patch( - "google.cloud.bigquery.table._helpers.BQ_STORAGE_VERSIONS.verify_version", + "google.cloud.bigquery.table._versions_helpers.BQ_STORAGE_VERSIONS.try_import", side_effect=exceptions.LegacyBigQueryStorageError("BQ Storage too old"), ) with patcher, warnings.catch_warnings(record=True) as warned: @@ -2866,11 +2866,11 @@ def test_to_arrow_ensure_bqstorage_client_wo_bqstorage(self): mock_client = _mock_client() row_iterator = self._make_one(mock_client, api_request, path, schema) - def mock_verify_version(): + def mock_verify_version(raise_if_error: bool = False): raise exceptions.LegacyBigQueryStorageError("no bqstorage") with mock.patch( - "google.cloud.bigquery._helpers.BQ_STORAGE_VERSIONS.verify_version", + "google.cloud.bigquery._versions_helpers.BQ_STORAGE_VERSIONS.try_import", mock_verify_version, ): tbl = row_iterator.to_arrow(create_bqstorage_client=True) From 04ad1526c41730aabddd8e885cb426c5cfa74325 Mon Sep 17 00:00:00 2001 From: Lingqing Gan Date: Fri, 27 Oct 2023 18:40:04 -0400 Subject: [PATCH 1659/2016] chore: upgrade mypy (#1705) --- .../google/cloud/bigquery/client.py | 18 +++++++++--------- .../google/cloud/bigquery/job/query.py | 4 ++-- .../google/cloud/bigquery/table.py | 6 +++--- packages/google-cloud-bigquery/noxfile.py | 4 ++-- .../authenticate_service_account_test.py | 2 +- .../snippets/authorized_view_tutorial_test.py | 2 +- .../snippets/create_partitioned_table_test.py | 2 +- .../samples/snippets/create_table_cmek_test.py | 2 +- ...e_table_external_data_configuration_test.py | 2 +- ...ate_table_external_hive_partitioned_test.py | 2 +- .../create_table_schema_from_json_test.py | 2 +- .../snippets/create_table_snapshot_test.py | 2 +- .../samples/snippets/dataset_access_test.py | 4 ++-- .../samples/snippets/delete_job_test.py | 2 +- .../snippets/delete_label_table_test.py | 2 +- .../samples/snippets/get_table_labels_test.py | 2 +- .../snippets/get_table_make_schema_test.py | 2 +- .../samples/snippets/label_table_test.py | 2 +- .../load_table_schema_from_json_test.py | 2 +- .../snippets/load_table_uri_firestore_test.py | 2 +- .../samples/snippets/manage_job_test.py | 4 ++-- .../samples/snippets/materialized_view_test.py | 2 +- .../samples/snippets/natality_tutorial_test.py | 2 +- .../snippets/nested_repeated_schema_test.py | 2 +- .../samples/snippets/quickstart_test.py | 2 +- .../samples/snippets/relax_column_test.py | 2 +- .../samples/snippets/simple_app_test.py | 2 +- .../samples/snippets/test_update_with_dml.py | 4 ++-- .../snippets/update_table_expiration_test.py | 2 +- .../samples/snippets/user_credentials_test.py | 2 +- .../samples/snippets/view_test.py | 2 +- 31 files changed, 46 insertions(+), 46 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py index e17d6b8daa9d..496015b219c6 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py @@ -2182,12 +2182,12 @@ def list_jobs( parent_job: Optional[Union[QueryJob, str]] = None, max_results: Optional[int] = None, page_token: Optional[str] = None, - all_users: bool = None, + all_users: Optional[bool] = None, state_filter: Optional[str] = None, retry: retries.Retry = DEFAULT_RETRY, timeout: TimeoutType = DEFAULT_TIMEOUT, - min_creation_time: datetime.datetime = None, - max_creation_time: datetime.datetime = None, + min_creation_time: Optional[datetime.datetime] = None, + max_creation_time: Optional[datetime.datetime] = None, page_size: Optional[int] = None, ) -> page_iterator.Iterator: """List jobs for the project associated with this client. @@ -3407,7 +3407,7 @@ def insert_rows( self, table: Union[Table, TableReference, str], rows: Union[Iterable[Tuple], Iterable[Mapping[str, Any]]], - selected_fields: Sequence[SchemaField] = None, + selected_fields: Optional[Sequence[SchemaField]] = None, **kwargs, ) -> Sequence[Dict[str, Any]]: """Insert rows into a table via the streaming API. @@ -3483,7 +3483,7 @@ def insert_rows_from_dataframe( self, table: Union[Table, TableReference, str], dataframe, - selected_fields: Sequence[SchemaField] = None, + selected_fields: Optional[Sequence[SchemaField]] = None, chunk_size: int = 500, **kwargs: Dict, ) -> Sequence[Sequence[dict]]: @@ -3546,8 +3546,8 @@ def insert_rows_json( row_ids: Union[ Iterable[Optional[str]], AutoRowIDs, None ] = AutoRowIDs.GENERATE_UUID, - skip_invalid_rows: bool = None, - ignore_unknown_values: bool = None, + skip_invalid_rows: Optional[bool] = None, + ignore_unknown_values: Optional[bool] = None, template_suffix: Optional[str] = None, retry: retries.Retry = DEFAULT_RETRY, timeout: TimeoutType = DEFAULT_TIMEOUT, @@ -3738,7 +3738,7 @@ def list_partitions( def list_rows( self, table: Union[Table, TableListItem, TableReference, str], - selected_fields: Sequence[SchemaField] = None, + selected_fields: Optional[Sequence[SchemaField]] = None, max_results: Optional[int] = None, page_token: Optional[str] = None, start_index: Optional[int] = None, @@ -3851,7 +3851,7 @@ def _list_rows_from_query_results( project: str, schema: SchemaField, total_rows: Optional[int] = None, - destination: Union[Table, TableReference, TableListItem, str] = None, + destination: Optional[Union[Table, TableReference, TableListItem, str]] = None, max_results: Optional[int] = None, start_index: Optional[int] = None, page_size: Optional[int] = None, diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/job/query.py b/packages/google-cloud-bigquery/google/cloud/bigquery/job/query.py index 7de209b8d1f5..57186acbcdfb 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/job/query.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/job/query.py @@ -1693,7 +1693,7 @@ def to_arrow( def to_dataframe( self, bqstorage_client: Optional["bigquery_storage.BigQueryReadClient"] = None, - dtypes: Dict[str, Any] = None, + dtypes: Optional[Dict[str, Any]] = None, progress_bar_type: Optional[str] = None, create_bqstorage_client: bool = True, max_results: Optional[int] = None, @@ -1879,7 +1879,7 @@ def to_dataframe( def to_geodataframe( self, bqstorage_client: Optional["bigquery_storage.BigQueryReadClient"] = None, - dtypes: Dict[str, Any] = None, + dtypes: Optional[Dict[str, Any]] = None, progress_bar_type: Optional[str] = None, create_bqstorage_client: bool = True, max_results: Optional[int] = None, diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py index 633043322c82..dcba10428f29 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py @@ -1853,7 +1853,7 @@ def to_arrow( def to_dataframe_iterable( self, bqstorage_client: Optional["bigquery_storage.BigQueryReadClient"] = None, - dtypes: Dict[str, Any] = None, + dtypes: Optional[Dict[str, Any]] = None, max_queue_size: int = _pandas_helpers._MAX_QUEUE_SIZE_DEFAULT, # type: ignore ) -> "pandas.DataFrame": """Create an iterable of pandas DataFrames, to process the table as a stream. @@ -1929,7 +1929,7 @@ def to_dataframe_iterable( def to_dataframe( self, bqstorage_client: Optional["bigquery_storage.BigQueryReadClient"] = None, - dtypes: Dict[str, Any] = None, + dtypes: Optional[Dict[str, Any]] = None, progress_bar_type: Optional[str] = None, create_bqstorage_client: bool = True, geography_as_object: bool = False, @@ -2227,7 +2227,7 @@ def __can_cast_timestamp_ns(column): def to_geodataframe( self, bqstorage_client: Optional["bigquery_storage.BigQueryReadClient"] = None, - dtypes: Dict[str, Any] = None, + dtypes: Optional[Dict[str, Any]] = None, progress_bar_type: Optional[str] = None, create_bqstorage_client: bool = True, geography_column: Optional[str] = None, diff --git a/packages/google-cloud-bigquery/noxfile.py b/packages/google-cloud-bigquery/noxfile.py index 4ddd4eaafd7b..a2b7a6843d11 100644 --- a/packages/google-cloud-bigquery/noxfile.py +++ b/packages/google-cloud-bigquery/noxfile.py @@ -22,7 +22,7 @@ import nox -MYPY_VERSION = "mypy==0.910" +MYPY_VERSION = "mypy==1.6.1" PYTYPE_VERSION = "pytype==2021.4.9" BLACK_VERSION = "black==23.7.0" BLACK_PATHS = ( @@ -137,7 +137,7 @@ def mypy(session): "types-requests", "types-setuptools", ) - session.run("mypy", "google/cloud") + session.run("mypy", "google/cloud", "--show-traceback") @nox.session(python=DEFAULT_PYTHON_VERSION) diff --git a/packages/google-cloud-bigquery/samples/snippets/authenticate_service_account_test.py b/packages/google-cloud-bigquery/samples/snippets/authenticate_service_account_test.py index 4b5711f80d2f..fbdd2d0649ff 100644 --- a/packages/google-cloud-bigquery/samples/snippets/authenticate_service_account_test.py +++ b/packages/google-cloud-bigquery/samples/snippets/authenticate_service_account_test.py @@ -17,7 +17,7 @@ import google.auth -import authenticate_service_account +import authenticate_service_account # type: ignore if typing.TYPE_CHECKING: import pytest diff --git a/packages/google-cloud-bigquery/samples/snippets/authorized_view_tutorial_test.py b/packages/google-cloud-bigquery/samples/snippets/authorized_view_tutorial_test.py index cae8704864eb..e2220fb54163 100644 --- a/packages/google-cloud-bigquery/samples/snippets/authorized_view_tutorial_test.py +++ b/packages/google-cloud-bigquery/samples/snippets/authorized_view_tutorial_test.py @@ -18,7 +18,7 @@ from google.cloud import bigquery import pytest -import authorized_view_tutorial +import authorized_view_tutorial # type: ignore @pytest.fixture(scope="module") diff --git a/packages/google-cloud-bigquery/samples/snippets/create_partitioned_table_test.py b/packages/google-cloud-bigquery/samples/snippets/create_partitioned_table_test.py index 0f684fcb05d7..e4d7ec20e518 100644 --- a/packages/google-cloud-bigquery/samples/snippets/create_partitioned_table_test.py +++ b/packages/google-cloud-bigquery/samples/snippets/create_partitioned_table_test.py @@ -14,7 +14,7 @@ import typing -import create_partitioned_table +import create_partitioned_table # type: ignore if typing.TYPE_CHECKING: import pytest diff --git a/packages/google-cloud-bigquery/samples/snippets/create_table_cmek_test.py b/packages/google-cloud-bigquery/samples/snippets/create_table_cmek_test.py index 2b15fb350a95..e8626b84cca5 100644 --- a/packages/google-cloud-bigquery/samples/snippets/create_table_cmek_test.py +++ b/packages/google-cloud-bigquery/samples/snippets/create_table_cmek_test.py @@ -14,7 +14,7 @@ import typing -import create_table_cmek +import create_table_cmek # type: ignore if typing.TYPE_CHECKING: import pytest diff --git a/packages/google-cloud-bigquery/samples/snippets/create_table_external_data_configuration_test.py b/packages/google-cloud-bigquery/samples/snippets/create_table_external_data_configuration_test.py index e97d7170dcda..bf81a75f9b7c 100644 --- a/packages/google-cloud-bigquery/samples/snippets/create_table_external_data_configuration_test.py +++ b/packages/google-cloud-bigquery/samples/snippets/create_table_external_data_configuration_test.py @@ -14,7 +14,7 @@ import typing -import create_table_external_data_configuration +import create_table_external_data_configuration # type: ignore if typing.TYPE_CHECKING: import pytest diff --git a/packages/google-cloud-bigquery/samples/snippets/create_table_external_hive_partitioned_test.py b/packages/google-cloud-bigquery/samples/snippets/create_table_external_hive_partitioned_test.py index 37deb8b12517..5b8cbe1c3579 100644 --- a/packages/google-cloud-bigquery/samples/snippets/create_table_external_hive_partitioned_test.py +++ b/packages/google-cloud-bigquery/samples/snippets/create_table_external_hive_partitioned_test.py @@ -14,7 +14,7 @@ import typing -import create_table_external_hive_partitioned +import create_table_external_hive_partitioned # type: ignore if typing.TYPE_CHECKING: import pytest diff --git a/packages/google-cloud-bigquery/samples/snippets/create_table_schema_from_json_test.py b/packages/google-cloud-bigquery/samples/snippets/create_table_schema_from_json_test.py index 39b00cea0989..e725d3ccfd68 100644 --- a/packages/google-cloud-bigquery/samples/snippets/create_table_schema_from_json_test.py +++ b/packages/google-cloud-bigquery/samples/snippets/create_table_schema_from_json_test.py @@ -14,7 +14,7 @@ import typing -import create_table_schema_from_json +import create_table_schema_from_json # type: ignore if typing.TYPE_CHECKING: import pytest diff --git a/packages/google-cloud-bigquery/samples/snippets/create_table_snapshot_test.py b/packages/google-cloud-bigquery/samples/snippets/create_table_snapshot_test.py index 784dc3ddd691..17ef24d26c50 100644 --- a/packages/google-cloud-bigquery/samples/snippets/create_table_snapshot_test.py +++ b/packages/google-cloud-bigquery/samples/snippets/create_table_snapshot_test.py @@ -14,7 +14,7 @@ import typing -import create_table_snapshot +import create_table_snapshot # type: ignore if typing.TYPE_CHECKING: import pytest diff --git a/packages/google-cloud-bigquery/samples/snippets/dataset_access_test.py b/packages/google-cloud-bigquery/samples/snippets/dataset_access_test.py index cc6a9af61dde..e3a53b084458 100644 --- a/packages/google-cloud-bigquery/samples/snippets/dataset_access_test.py +++ b/packages/google-cloud-bigquery/samples/snippets/dataset_access_test.py @@ -14,8 +14,8 @@ import typing -import revoke_dataset_access -import update_dataset_access +import revoke_dataset_access # type: ignore +import update_dataset_access # type: ignore if typing.TYPE_CHECKING: from google.cloud import bigquery diff --git a/packages/google-cloud-bigquery/samples/snippets/delete_job_test.py b/packages/google-cloud-bigquery/samples/snippets/delete_job_test.py index ac9d52dcf48b..88eeae1ed2af 100644 --- a/packages/google-cloud-bigquery/samples/snippets/delete_job_test.py +++ b/packages/google-cloud-bigquery/samples/snippets/delete_job_test.py @@ -16,7 +16,7 @@ from google.cloud import bigquery -import delete_job +import delete_job # type: ignore if typing.TYPE_CHECKING: import pytest diff --git a/packages/google-cloud-bigquery/samples/snippets/delete_label_table_test.py b/packages/google-cloud-bigquery/samples/snippets/delete_label_table_test.py index 80fcbb695f2b..01e538ae3016 100644 --- a/packages/google-cloud-bigquery/samples/snippets/delete_label_table_test.py +++ b/packages/google-cloud-bigquery/samples/snippets/delete_label_table_test.py @@ -14,7 +14,7 @@ import typing -import delete_label_table +import delete_label_table # type: ignore if typing.TYPE_CHECKING: import pytest diff --git a/packages/google-cloud-bigquery/samples/snippets/get_table_labels_test.py b/packages/google-cloud-bigquery/samples/snippets/get_table_labels_test.py index 95a95b60ff84..e910d6a655b8 100644 --- a/packages/google-cloud-bigquery/samples/snippets/get_table_labels_test.py +++ b/packages/google-cloud-bigquery/samples/snippets/get_table_labels_test.py @@ -16,7 +16,7 @@ from google.cloud import bigquery -import get_table_labels +import get_table_labels # type: ignore if typing.TYPE_CHECKING: import pytest diff --git a/packages/google-cloud-bigquery/samples/snippets/get_table_make_schema_test.py b/packages/google-cloud-bigquery/samples/snippets/get_table_make_schema_test.py index 424f16e39ece..b1a1623bb24c 100644 --- a/packages/google-cloud-bigquery/samples/snippets/get_table_make_schema_test.py +++ b/packages/google-cloud-bigquery/samples/snippets/get_table_make_schema_test.py @@ -14,7 +14,7 @@ import typing -import get_table_make_schema +import get_table_make_schema # type: ignore if typing.TYPE_CHECKING: import pathlib diff --git a/packages/google-cloud-bigquery/samples/snippets/label_table_test.py b/packages/google-cloud-bigquery/samples/snippets/label_table_test.py index 98f3b3cc7c32..49f5406ab4af 100644 --- a/packages/google-cloud-bigquery/samples/snippets/label_table_test.py +++ b/packages/google-cloud-bigquery/samples/snippets/label_table_test.py @@ -14,7 +14,7 @@ import typing -import label_table +import label_table # type: ignore if typing.TYPE_CHECKING: import pytest diff --git a/packages/google-cloud-bigquery/samples/snippets/load_table_schema_from_json_test.py b/packages/google-cloud-bigquery/samples/snippets/load_table_schema_from_json_test.py index c28875b0ef18..745793cd7569 100644 --- a/packages/google-cloud-bigquery/samples/snippets/load_table_schema_from_json_test.py +++ b/packages/google-cloud-bigquery/samples/snippets/load_table_schema_from_json_test.py @@ -14,7 +14,7 @@ import typing -import load_table_schema_from_json +import load_table_schema_from_json # type: ignore if typing.TYPE_CHECKING: import pytest diff --git a/packages/google-cloud-bigquery/samples/snippets/load_table_uri_firestore_test.py b/packages/google-cloud-bigquery/samples/snippets/load_table_uri_firestore_test.py index 552fa2e355d5..e19378a04a6c 100644 --- a/packages/google-cloud-bigquery/samples/snippets/load_table_uri_firestore_test.py +++ b/packages/google-cloud-bigquery/samples/snippets/load_table_uri_firestore_test.py @@ -14,7 +14,7 @@ import typing -import load_table_uri_firestore +import load_table_uri_firestore # type: ignore if typing.TYPE_CHECKING: import pytest diff --git a/packages/google-cloud-bigquery/samples/snippets/manage_job_test.py b/packages/google-cloud-bigquery/samples/snippets/manage_job_test.py index 630be365bac5..2ef4be2e02a0 100644 --- a/packages/google-cloud-bigquery/samples/snippets/manage_job_test.py +++ b/packages/google-cloud-bigquery/samples/snippets/manage_job_test.py @@ -15,8 +15,8 @@ from google.cloud import bigquery import pytest -import manage_job_cancel -import manage_job_get +import manage_job_cancel # type: ignore +import manage_job_get # type: ignore def test_manage_job(capsys: pytest.CaptureFixture[str]) -> None: diff --git a/packages/google-cloud-bigquery/samples/snippets/materialized_view_test.py b/packages/google-cloud-bigquery/samples/snippets/materialized_view_test.py index 70869346ff03..59e08131ee62 100644 --- a/packages/google-cloud-bigquery/samples/snippets/materialized_view_test.py +++ b/packages/google-cloud-bigquery/samples/snippets/materialized_view_test.py @@ -20,7 +20,7 @@ from google.cloud import bigquery import pytest -import materialized_view +import materialized_view # type: ignore def temp_suffix() -> str: diff --git a/packages/google-cloud-bigquery/samples/snippets/natality_tutorial_test.py b/packages/google-cloud-bigquery/samples/snippets/natality_tutorial_test.py index f5673852809d..7f24ca5cb78c 100644 --- a/packages/google-cloud-bigquery/samples/snippets/natality_tutorial_test.py +++ b/packages/google-cloud-bigquery/samples/snippets/natality_tutorial_test.py @@ -18,7 +18,7 @@ from google.cloud import bigquery import pytest -import natality_tutorial +import natality_tutorial # type: ignore @pytest.fixture(scope="module") diff --git a/packages/google-cloud-bigquery/samples/snippets/nested_repeated_schema_test.py b/packages/google-cloud-bigquery/samples/snippets/nested_repeated_schema_test.py index 8bb8bda6ae36..67815dcf6ea6 100644 --- a/packages/google-cloud-bigquery/samples/snippets/nested_repeated_schema_test.py +++ b/packages/google-cloud-bigquery/samples/snippets/nested_repeated_schema_test.py @@ -14,7 +14,7 @@ import typing -import nested_repeated_schema +import nested_repeated_schema # type: ignore if typing.TYPE_CHECKING: import pytest diff --git a/packages/google-cloud-bigquery/samples/snippets/quickstart_test.py b/packages/google-cloud-bigquery/samples/snippets/quickstart_test.py index 98a5fdd4e9ba..88a24618df8d 100644 --- a/packages/google-cloud-bigquery/samples/snippets/quickstart_test.py +++ b/packages/google-cloud-bigquery/samples/snippets/quickstart_test.py @@ -18,7 +18,7 @@ from google.cloud import bigquery import pytest -import quickstart +import quickstart # type: ignore # Must match the dataset listed in quickstart.py (there's no easy way to # extract this). diff --git a/packages/google-cloud-bigquery/samples/snippets/relax_column_test.py b/packages/google-cloud-bigquery/samples/snippets/relax_column_test.py index b40b13fa122c..ede1c3ab7f89 100644 --- a/packages/google-cloud-bigquery/samples/snippets/relax_column_test.py +++ b/packages/google-cloud-bigquery/samples/snippets/relax_column_test.py @@ -16,7 +16,7 @@ from google.cloud import bigquery -import relax_column +import relax_column # type: ignore if typing.TYPE_CHECKING: import pytest diff --git a/packages/google-cloud-bigquery/samples/snippets/simple_app_test.py b/packages/google-cloud-bigquery/samples/snippets/simple_app_test.py index de4e1ce34409..4bf0bb49c127 100644 --- a/packages/google-cloud-bigquery/samples/snippets/simple_app_test.py +++ b/packages/google-cloud-bigquery/samples/snippets/simple_app_test.py @@ -14,7 +14,7 @@ import typing -import simple_app +import simple_app # type: ignore if typing.TYPE_CHECKING: import pytest diff --git a/packages/google-cloud-bigquery/samples/snippets/test_update_with_dml.py b/packages/google-cloud-bigquery/samples/snippets/test_update_with_dml.py index ef5ec196ac84..d03114a361fe 100644 --- a/packages/google-cloud-bigquery/samples/snippets/test_update_with_dml.py +++ b/packages/google-cloud-bigquery/samples/snippets/test_update_with_dml.py @@ -17,8 +17,8 @@ from google.cloud import bigquery import pytest -from conftest import prefixer -import update_with_dml +from conftest import prefixer # type: ignore +import update_with_dml # type: ignore @pytest.fixture diff --git a/packages/google-cloud-bigquery/samples/snippets/update_table_expiration_test.py b/packages/google-cloud-bigquery/samples/snippets/update_table_expiration_test.py index 1566c7763493..ed68a8c2c80c 100644 --- a/packages/google-cloud-bigquery/samples/snippets/update_table_expiration_test.py +++ b/packages/google-cloud-bigquery/samples/snippets/update_table_expiration_test.py @@ -15,7 +15,7 @@ import datetime import typing -import update_table_expiration +import update_table_expiration # type: ignore if typing.TYPE_CHECKING: import pathlib diff --git a/packages/google-cloud-bigquery/samples/snippets/user_credentials_test.py b/packages/google-cloud-bigquery/samples/snippets/user_credentials_test.py index df8a6354de99..8448187def4c 100644 --- a/packages/google-cloud-bigquery/samples/snippets/user_credentials_test.py +++ b/packages/google-cloud-bigquery/samples/snippets/user_credentials_test.py @@ -19,7 +19,7 @@ import mock import pytest -from user_credentials import main +from user_credentials import main # type: ignore PROJECT = os.environ["GOOGLE_CLOUD_PROJECT"] diff --git a/packages/google-cloud-bigquery/samples/snippets/view_test.py b/packages/google-cloud-bigquery/samples/snippets/view_test.py index 4d0d43b77507..1e615db47a26 100644 --- a/packages/google-cloud-bigquery/samples/snippets/view_test.py +++ b/packages/google-cloud-bigquery/samples/snippets/view_test.py @@ -19,7 +19,7 @@ from google.cloud import bigquery import pytest -import view +import view # type: ignore def temp_suffix() -> str: From 0c5b7e99d6b0e5d855dc25bde6effdd7c4852dfd Mon Sep 17 00:00:00 2001 From: meredithslota Date: Fri, 27 Oct 2023 16:11:15 -0700 Subject: [PATCH 1660/2016] chore(samples): Update snippets.py to dedupe region tags (#1701) Wait until cl/576311555 is submitted and snippetbot updates. --- .../google-cloud-bigquery/docs/snippets.py | 96 ------------------- 1 file changed, 96 deletions(-) diff --git a/packages/google-cloud-bigquery/docs/snippets.py b/packages/google-cloud-bigquery/docs/snippets.py index 64f5361cd177..72ac2a000f83 100644 --- a/packages/google-cloud-bigquery/docs/snippets.py +++ b/packages/google-cloud-bigquery/docs/snippets.py @@ -118,54 +118,6 @@ def test_create_client_default_credentials(): assert client is not None -def test_create_partitioned_table(client, to_delete): - dataset_id = "create_table_partitioned_{}".format(_millis()) - project = client.project - dataset_ref = bigquery.DatasetReference(project, dataset_id) - dataset = client.create_dataset(dataset_ref) - to_delete.append(dataset) - - # TODO(tswast): remove this snippet once cloud.google.com is updated to use - # samples/snippets/create_partitioned_table.py - # [START bigquery_create_table_partitioned] - # from google.cloud import bigquery - # client = bigquery.Client() - # project = client.project - # dataset_ref = bigquery.DatasetReference(project, 'my_dataset') - - table_ref = dataset_ref.table("my_partitioned_table") - schema = [ - bigquery.SchemaField("name", "STRING"), - bigquery.SchemaField("post_abbr", "STRING"), - bigquery.SchemaField("date", "DATE"), - ] - table = bigquery.Table(table_ref, schema=schema) - table.time_partitioning = bigquery.TimePartitioning( - type_=bigquery.TimePartitioningType.DAY, - field="date", # name of column to use for partitioning - expiration_ms=7776000000, - ) # 90 days - - table = client.create_table(table) - - print( - "Created table {}, partitioned on column {}".format( - table.table_id, table.time_partitioning.field - ) - ) - # [END bigquery_create_table_partitioned] - - assert table.time_partitioning.type_ == "DAY" - assert table.time_partitioning.field == "date" - assert table.time_partitioning.expiration_ms == 7776000000 - - -@pytest.mark.skip( - reason=( - "update_table() is flaky " - "https://github.com/GoogleCloudPlatform/google-cloud-python/issues/5589" - ) -) @pytest.mark.skip( reason=( "update_table() is flaky " @@ -203,54 +155,6 @@ def test_update_table_description(client, to_delete): # [END bigquery_update_table_description] -@pytest.mark.skip( - reason=( - "update_table() is flaky " - "https://github.com/GoogleCloudPlatform/google-cloud-python/issues/5589" - ) -) -def test_relax_column(client, to_delete): - """Updates a schema field from required to nullable.""" - dataset_id = "relax_column_dataset_{}".format(_millis()) - table_id = "relax_column_table_{}".format(_millis()) - project = client.project - dataset_ref = bigquery.DatasetReference(project, dataset_id) - dataset = bigquery.Dataset(dataset_ref) - dataset = client.create_dataset(dataset) - to_delete.append(dataset) - - # TODO(tswast): remove code sample once references to it on - # cloud.google.com are updated to samples/snippets/relax_column.py - # [START bigquery_relax_column] - # from google.cloud import bigquery - # client = bigquery.Client() - # dataset_id = 'my_dataset' - # table_id = 'my_table' - - original_schema = [ - bigquery.SchemaField("full_name", "STRING", mode="REQUIRED"), - bigquery.SchemaField("age", "INTEGER", mode="REQUIRED"), - ] - - dataset_ref = bigquery.DatasetReference(project, dataset_id) - table_ref = dataset_ref.table(table_id) - table = bigquery.Table(table_ref, schema=original_schema) - table = client.create_table(table) - assert all(field.mode == "REQUIRED" for field in table.schema) - - # SchemaField properties cannot be edited after initialization. - # To make changes, construct new SchemaField objects. - relaxed_schema = [ - bigquery.SchemaField("full_name", "STRING", mode="NULLABLE"), - bigquery.SchemaField("age", "INTEGER", mode="NULLABLE"), - ] - table.schema = relaxed_schema - table = client.update_table(table, ["schema"]) - - assert all(field.mode == "NULLABLE" for field in table.schema) - # [END bigquery_relax_column] - - @pytest.mark.skip( reason=( "update_table() is flaky " From d8d9c33b4068296945b2735cecab741a6ffc89e3 Mon Sep 17 00:00:00 2001 From: Gaurang Shah Date: Sat, 28 Oct 2023 02:29:08 -0400 Subject: [PATCH 1661/2016] feat: add support for dataset.default_rounding_mode (#1688) Co-authored-by: Lingqing Gan --- .../google/cloud/bigquery/dataset.py | 38 +++++++ .../tests/system/test_client.py | 16 ++- .../tests/unit/test_create_dataset.py | 103 +++++++++++++++++- 3 files changed, 153 insertions(+), 4 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/dataset.py b/packages/google-cloud-bigquery/google/cloud/bigquery/dataset.py index 0f1a0f3ccbdd..af94784a4e42 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/dataset.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/dataset.py @@ -525,6 +525,7 @@ class Dataset(object): "friendly_name": "friendlyName", "default_encryption_configuration": "defaultEncryptionConfiguration", "storage_billing_model": "storageBillingModel", + "default_rounding_mode": "defaultRoundingMode", } def __init__(self, dataset_ref) -> None: @@ -532,6 +533,43 @@ def __init__(self, dataset_ref) -> None: dataset_ref = DatasetReference.from_string(dataset_ref) self._properties = {"datasetReference": dataset_ref.to_api_repr(), "labels": {}} + @property + def default_rounding_mode(self): + """Union[str, None]: defaultRoundingMode of the dataset as set by the user + (defaults to :data:`None`). + + Set the value to one of ``'ROUND_HALF_AWAY_FROM_ZERO'``, ``'ROUND_HALF_EVEN'``, or + ``'ROUNDING_MODE_UNSPECIFIED'``. + + See `default rounding mode + `_ + in REST API docs and `updating the default rounding model + `_ + guide. + + Raises: + ValueError: for invalid value types. + """ + return self._properties.get("defaultRoundingMode") + + @default_rounding_mode.setter + def default_rounding_mode(self, value): + possible_values = [ + "ROUNDING_MODE_UNSPECIFIED", + "ROUND_HALF_AWAY_FROM_ZERO", + "ROUND_HALF_EVEN", + ] + if not isinstance(value, str) and value is not None: + raise ValueError("Pass a string, or None") + if value is None: + self._properties["defaultRoundingMode"] = "ROUNDING_MODE_UNSPECIFIED" + if value not in possible_values and value is not None: + raise ValueError( + f'rounding mode needs to be one of {",".join(possible_values)}' + ) + if value: + self._properties["defaultRoundingMode"] = value + @property def project(self): """str: Project ID of the project bound to the dataset.""" diff --git a/packages/google-cloud-bigquery/tests/system/test_client.py b/packages/google-cloud-bigquery/tests/system/test_client.py index d3b95ec49a1d..09606590ede5 100644 --- a/packages/google-cloud-bigquery/tests/system/test_client.py +++ b/packages/google-cloud-bigquery/tests/system/test_client.py @@ -265,6 +265,13 @@ def test_get_dataset(self): self.assertEqual(got.friendly_name, "Friendly") self.assertEqual(got.description, "Description") + def test_create_dataset_with_default_rounding_mode(self): + DATASET_ID = _make_dataset_id("create_dataset_rounding_mode") + dataset = self.temp_dataset(DATASET_ID, default_rounding_mode="ROUND_HALF_EVEN") + + self.assertTrue(_dataset_exists(dataset)) + self.assertEqual(dataset.default_rounding_mode, "ROUND_HALF_EVEN") + def test_update_dataset(self): dataset = self.temp_dataset(_make_dataset_id("update_dataset")) self.assertTrue(_dataset_exists(dataset)) @@ -2286,12 +2293,15 @@ def test_nested_table_to_arrow(self): self.assertTrue(pyarrow.types.is_list(record_col[1].type)) self.assertTrue(pyarrow.types.is_int64(record_col[1].type.value_type)) - def temp_dataset(self, dataset_id, location=None): + def temp_dataset(self, dataset_id, *args, **kwargs): project = Config.CLIENT.project dataset_ref = bigquery.DatasetReference(project, dataset_id) dataset = Dataset(dataset_ref) - if location: - dataset.location = location + if kwargs.get("location"): + dataset.location = kwargs.get("location") + if kwargs.get("default_rounding_mode"): + dataset.default_rounding_mode = kwargs.get("default_rounding_mode") + dataset = helpers.retry_403(Config.CLIENT.create_dataset)(dataset) self.to_delete.append(dataset) return dataset diff --git a/packages/google-cloud-bigquery/tests/unit/test_create_dataset.py b/packages/google-cloud-bigquery/tests/unit/test_create_dataset.py index 81af52261d2c..3b2e644d9419 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_create_dataset.py +++ b/packages/google-cloud-bigquery/tests/unit/test_create_dataset.py @@ -63,6 +63,7 @@ def test_create_dataset_w_attrs(client, PROJECT, DS_ID): "datasetId": "starry-skies", "tableId": "northern-hemisphere", } + DEFAULT_ROUNDING_MODE = "ROUND_HALF_EVEN" RESOURCE = { "datasetReference": {"projectId": PROJECT, "datasetId": DS_ID}, "etag": "etag", @@ -73,6 +74,7 @@ def test_create_dataset_w_attrs(client, PROJECT, DS_ID): "defaultTableExpirationMs": "3600", "labels": LABELS, "access": [{"role": "OWNER", "userByEmail": USER_EMAIL}, {"view": VIEW}], + "defaultRoundingMode": DEFAULT_ROUNDING_MODE, } conn = client._connection = make_connection(RESOURCE) entries = [ @@ -88,8 +90,8 @@ def test_create_dataset_w_attrs(client, PROJECT, DS_ID): before.default_table_expiration_ms = 3600 before.location = LOCATION before.labels = LABELS + before.default_rounding_mode = DEFAULT_ROUNDING_MODE after = client.create_dataset(before) - assert after.dataset_id == DS_ID assert after.project == PROJECT assert after.etag == RESOURCE["etag"] @@ -99,6 +101,7 @@ def test_create_dataset_w_attrs(client, PROJECT, DS_ID): assert after.location == LOCATION assert after.default_table_expiration_ms == 3600 assert after.labels == LABELS + assert after.default_rounding_mode == DEFAULT_ROUNDING_MODE conn.api_request.assert_called_once_with( method="POST", @@ -109,6 +112,7 @@ def test_create_dataset_w_attrs(client, PROJECT, DS_ID): "friendlyName": FRIENDLY_NAME, "location": LOCATION, "defaultTableExpirationMs": "3600", + "defaultRoundingMode": DEFAULT_ROUNDING_MODE, "access": [ {"role": "OWNER", "userByEmail": USER_EMAIL}, {"view": VIEW, "role": None}, @@ -365,3 +369,100 @@ def test_create_dataset_alreadyexists_w_exists_ok_true(PROJECT, DS_ID, LOCATION) mock.call(method="GET", path=get_path, timeout=DEFAULT_TIMEOUT), ] ) + + +def test_create_dataset_with_default_rounding_mode_if_value_is_none( + PROJECT, DS_ID, LOCATION +): + default_rounding_mode = None + path = "/projects/%s/datasets" % PROJECT + resource = { + "datasetReference": {"projectId": PROJECT, "datasetId": DS_ID}, + "etag": "etag", + "id": "{}:{}".format(PROJECT, DS_ID), + "location": LOCATION, + } + client = make_client(location=LOCATION) + conn = client._connection = make_connection(resource) + + ds_ref = DatasetReference(PROJECT, DS_ID) + before = Dataset(ds_ref) + before.default_rounding_mode = default_rounding_mode + after = client.create_dataset(before) + + assert after.dataset_id == DS_ID + assert after.project == PROJECT + assert after.default_rounding_mode is None + + conn.api_request.assert_called_once_with( + method="POST", + path=path, + data={ + "datasetReference": {"projectId": PROJECT, "datasetId": DS_ID}, + "labels": {}, + "location": LOCATION, + "defaultRoundingMode": "ROUNDING_MODE_UNSPECIFIED", + }, + timeout=DEFAULT_TIMEOUT, + ) + + +def test_create_dataset_with_default_rounding_mode_if_value_is_not_string( + PROJECT, DS_ID, LOCATION +): + default_rounding_mode = 10 + ds_ref = DatasetReference(PROJECT, DS_ID) + dataset = Dataset(ds_ref) + with pytest.raises(ValueError) as e: + dataset.default_rounding_mode = default_rounding_mode + assert str(e.value) == "Pass a string, or None" + + +def test_create_dataset_with_default_rounding_mode_if_value_is_not_in_possible_values( + PROJECT, DS_ID +): + default_rounding_mode = "ROUND_HALF_AWAY_FROM_ZEROS" + ds_ref = DatasetReference(PROJECT, DS_ID) + dataset = Dataset(ds_ref) + with pytest.raises(ValueError) as e: + dataset.default_rounding_mode = default_rounding_mode + assert ( + str(e.value) + == "rounding mode needs to be one of ROUNDING_MODE_UNSPECIFIED,ROUND_HALF_AWAY_FROM_ZERO,ROUND_HALF_EVEN" + ) + + +def test_create_dataset_with_default_rounding_mode_if_value_is_in_possible_values( + PROJECT, DS_ID, LOCATION +): + default_rounding_mode = "ROUND_HALF_AWAY_FROM_ZERO" + path = "/projects/%s/datasets" % PROJECT + resource = { + "datasetReference": {"projectId": PROJECT, "datasetId": DS_ID}, + "etag": "etag", + "id": "{}:{}".format(PROJECT, DS_ID), + "location": LOCATION, + } + client = make_client(location=LOCATION) + conn = client._connection = make_connection(resource) + + ds_ref = DatasetReference(PROJECT, DS_ID) + before = Dataset(ds_ref) + before.default_rounding_mode = default_rounding_mode + after = client.create_dataset(before) + + assert after.dataset_id == DS_ID + assert after.project == PROJECT + assert after.default_rounding_mode is None + + conn.api_request.assert_called_once_with( + method="POST", + path=path, + data={ + "datasetReference": {"projectId": PROJECT, "datasetId": DS_ID}, + "labels": {}, + "location": LOCATION, + "defaultRoundingMode": default_rounding_mode, + }, + timeout=DEFAULT_TIMEOUT, + ) From f368b73a1a3ed8fe186bbddd4537ac6cedd5ba23 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 30 Oct 2023 11:49:31 -0700 Subject: [PATCH 1662/2016] chore(deps): bump urllib3 from 1.26.15 to 1.26.18 in /samples/geography (#1684) Bumps [urllib3](https://github.com/urllib3/urllib3) from 1.26.15 to 1.26.18. - [Release notes](https://github.com/urllib3/urllib3/releases) - [Changelog](https://github.com/urllib3/urllib3/blob/main/CHANGES.rst) - [Commits](https://github.com/urllib3/urllib3/compare/1.26.15...1.26.18) --- updated-dependencies: - dependency-name: urllib3 dependency-type: direct:production ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> Co-authored-by: Lingqing Gan --- .../google-cloud-bigquery/samples/geography/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/samples/geography/requirements.txt b/packages/google-cloud-bigquery/samples/geography/requirements.txt index 714e032add21..9bc6ee32cf13 100644 --- a/packages/google-cloud-bigquery/samples/geography/requirements.txt +++ b/packages/google-cloud-bigquery/samples/geography/requirements.txt @@ -42,4 +42,4 @@ Shapely==2.0.1 six==1.16.0 typing-extensions==4.7.1 typing-inspect==0.9.0 -urllib3==1.26.15 +urllib3==1.26.18 From 9450688a5c761265ad5c8987d078ba5b32c8568a Mon Sep 17 00:00:00 2001 From: "release-please[bot]" <55107282+release-please[bot]@users.noreply.github.com> Date: Mon, 30 Oct 2023 15:47:00 -0400 Subject: [PATCH 1663/2016] chore(main): release 3.13.0 (#1676) Co-authored-by: release-please[bot] <55107282+release-please[bot]@users.noreply.github.com> --- packages/google-cloud-bigquery/CHANGELOG.md | 20 +++++++++++++++++++ .../google/cloud/bigquery/version.py | 2 +- 2 files changed, 21 insertions(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/CHANGELOG.md b/packages/google-cloud-bigquery/CHANGELOG.md index a93bde9ebfd2..41206fd788c0 100644 --- a/packages/google-cloud-bigquery/CHANGELOG.md +++ b/packages/google-cloud-bigquery/CHANGELOG.md @@ -5,6 +5,26 @@ [1]: https://pypi.org/project/google-cloud-bigquery/#history +## [3.13.0](https://github.com/googleapis/python-bigquery/compare/v3.12.0...v3.13.0) (2023-10-30) + + +### Features + +* Add `Model.transform_columns` property ([#1661](https://github.com/googleapis/python-bigquery/issues/1661)) ([5ceed05](https://github.com/googleapis/python-bigquery/commit/5ceed056482f6d1f2fc45e7e6b84382de45c85ed)) +* Add support for dataset.default_rounding_mode ([#1688](https://github.com/googleapis/python-bigquery/issues/1688)) ([83bc768](https://github.com/googleapis/python-bigquery/commit/83bc768b90a852d258a4805603020a296e02d2f9)) + + +### Bug Fixes + +* AccessEntry API representation parsing ([#1682](https://github.com/googleapis/python-bigquery/issues/1682)) ([a40d7ae](https://github.com/googleapis/python-bigquery/commit/a40d7ae03149708fc34c962b43a6ac198780b6aa)) + + +### Documentation + +* Remove redundant `bigquery_update_table_expiration` code sample ([#1673](https://github.com/googleapis/python-bigquery/issues/1673)) ([2dded33](https://github.com/googleapis/python-bigquery/commit/2dded33626b3de6c4ab5e1229eb4c85786b2ff53)) +* Revised `create_partitioned_table` sample ([#1447](https://github.com/googleapis/python-bigquery/issues/1447)) ([40ba859](https://github.com/googleapis/python-bigquery/commit/40ba859059c3e463e17ea7781bc5a9aff8244c5d)) +* Revised relax column mode sample ([#1467](https://github.com/googleapis/python-bigquery/issues/1467)) ([b8c9276](https://github.com/googleapis/python-bigquery/commit/b8c9276be011d971b941b583fd3d4417d438067f)) + ## [3.12.0](https://github.com/googleapis/python-bigquery/compare/v3.11.4...v3.12.0) (2023-10-02) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/version.py b/packages/google-cloud-bigquery/google/cloud/bigquery/version.py index ea71d198bdd0..ee029aced741 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/version.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/version.py @@ -12,4 +12,4 @@ # See the License for the specific language governing permissions and # limitations under the License. -__version__ = "3.12.0" +__version__ = "3.13.0" From 7b0739505ee3dc361fea828701a88952a090a98b Mon Sep 17 00:00:00 2001 From: Gaurang Shah Date: Tue, 31 Oct 2023 19:04:41 -0400 Subject: [PATCH 1664/2016] feat: add support dataset.max_time_travel_hours (#1683) * feat: add support dataset.max_time_travel_hours * Update tests/unit/test_create_dataset.py * Update tests/unit/test_create_dataset.py * Update google/cloud/bigquery/dataset.py * update test_create_dataset_with_max_time_travel_hours --------- Co-authored-by: Lingqing Gan --- .../google/cloud/bigquery/dataset.py | 23 ++++++ .../tests/system/test_client.py | 7 ++ .../tests/unit/test_create_dataset.py | 79 +++++++++++++++++++ 3 files changed, 109 insertions(+) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/dataset.py b/packages/google-cloud-bigquery/google/cloud/bigquery/dataset.py index af94784a4e42..726a2a17af18 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/dataset.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/dataset.py @@ -525,6 +525,7 @@ class Dataset(object): "friendly_name": "friendlyName", "default_encryption_configuration": "defaultEncryptionConfiguration", "storage_billing_model": "storageBillingModel", + "max_time_travel_hours": "maxTimeTravelHours", "default_rounding_mode": "defaultRoundingMode", } @@ -533,6 +534,28 @@ def __init__(self, dataset_ref) -> None: dataset_ref = DatasetReference.from_string(dataset_ref) self._properties = {"datasetReference": dataset_ref.to_api_repr(), "labels": {}} + @property + def max_time_travel_hours(self): + """ + Optional[int]: Defines the time travel window in hours. The value can + be from 48 to 168 hours (2 to 7 days), and in multiple of 24 hours + (48, 72, 96, 120, 144, 168). + The default value is 168 hours if this is not set. + """ + return self._properties.get("maxTimeTravelHours") + + @max_time_travel_hours.setter + def max_time_travel_hours(self, hours): + if not isinstance(hours, int): + raise ValueError(f"max_time_travel_hours must be an integer. Got {hours}") + if hours < 2 * 24 or hours > 7 * 24: + raise ValueError( + "Time Travel Window should be from 48 to 168 hours (2 to 7 days)" + ) + if hours % 24 != 0: + raise ValueError("Time Travel Window should be multiple of 24") + self._properties["maxTimeTravelHours"] = hours + @property def default_rounding_mode(self): """Union[str, None]: defaultRoundingMode of the dataset as set by the user diff --git a/packages/google-cloud-bigquery/tests/system/test_client.py b/packages/google-cloud-bigquery/tests/system/test_client.py index 09606590ede5..9660d5fa7885 100644 --- a/packages/google-cloud-bigquery/tests/system/test_client.py +++ b/packages/google-cloud-bigquery/tests/system/test_client.py @@ -238,6 +238,11 @@ def test_create_dataset(self): self.assertEqual(dataset.dataset_id, DATASET_ID) self.assertEqual(dataset.project, Config.CLIENT.project) + def test_create_dataset_max_time_travel_hours(self): + DATASET_ID = _make_dataset_id("create_ci_dataset") + dataset = self.temp_dataset(DATASET_ID, max_time_travel_hours=24 * 2) + self.assertEqual(int(dataset.max_time_travel_hours), 24 * 2) + def test_get_dataset(self): dataset_id = _make_dataset_id("get_dataset") client = Config.CLIENT @@ -2299,6 +2304,8 @@ def temp_dataset(self, dataset_id, *args, **kwargs): dataset = Dataset(dataset_ref) if kwargs.get("location"): dataset.location = kwargs.get("location") + if kwargs.get("max_time_travel_hours"): + dataset.max_time_travel_hours = kwargs.get("max_time_travel_hours") if kwargs.get("default_rounding_mode"): dataset.default_rounding_mode = kwargs.get("default_rounding_mode") diff --git a/packages/google-cloud-bigquery/tests/unit/test_create_dataset.py b/packages/google-cloud-bigquery/tests/unit/test_create_dataset.py index 3b2e644d9419..8374e6e0ad60 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_create_dataset.py +++ b/packages/google-cloud-bigquery/tests/unit/test_create_dataset.py @@ -466,3 +466,82 @@ def test_create_dataset_with_default_rounding_mode_if_value_is_in_possible_value }, timeout=DEFAULT_TIMEOUT, ) + + +def test_create_dataset_with_max_time_travel_hours(PROJECT, DS_ID, LOCATION): + path = "/projects/%s/datasets" % PROJECT + max_time_travel_hours = 24 * 3 + + resource = { + "datasetReference": {"projectId": PROJECT, "datasetId": DS_ID}, + "etag": "etag", + "id": "{}:{}".format(PROJECT, DS_ID), + "location": LOCATION, + "maxTimeTravelHours": max_time_travel_hours, + } + client = make_client(location=LOCATION) + conn = client._connection = make_connection(resource) + + ds_ref = DatasetReference(PROJECT, DS_ID) + before = Dataset(ds_ref) + before.max_time_travel_hours = max_time_travel_hours + after = client.create_dataset(before) + assert after.dataset_id == DS_ID + assert after.project == PROJECT + assert after.max_time_travel_hours == max_time_travel_hours + + conn.api_request.assert_called_once_with( + method="POST", + path=path, + data={ + "datasetReference": {"projectId": PROJECT, "datasetId": DS_ID}, + "labels": {}, + "location": LOCATION, + "maxTimeTravelHours": max_time_travel_hours, + }, + timeout=DEFAULT_TIMEOUT, + ) + + +def test_create_dataset_with_max_time_travel_hours_not_multiple_of_24( + PROJECT, DS_ID, LOCATION +): + ds_ref = DatasetReference(PROJECT, DS_ID) + dataset = Dataset(ds_ref) + with pytest.raises(ValueError) as e: + dataset.max_time_travel_hours = 50 + assert str(e.value) == "Time Travel Window should be multiple of 24" + + +def test_create_dataset_with_max_time_travel_hours_is_less_than_2_days( + PROJECT, DS_ID, LOCATION +): + ds_ref = DatasetReference(PROJECT, DS_ID) + dataset = Dataset(ds_ref) + with pytest.raises(ValueError) as e: + dataset.max_time_travel_hours = 24 + assert ( + str(e.value) + == "Time Travel Window should be from 48 to 168 hours (2 to 7 days)" + ) + + +def test_create_dataset_with_max_time_travel_hours_is_greater_than_7_days( + PROJECT, DS_ID, LOCATION +): + ds_ref = DatasetReference(PROJECT, DS_ID) + dataset = Dataset(ds_ref) + with pytest.raises(ValueError) as e: + dataset.max_time_travel_hours = 192 + assert ( + str(e.value) + == "Time Travel Window should be from 48 to 168 hours (2 to 7 days)" + ) + + +def test_create_dataset_with_max_time_travel_hours_is_not_int(PROJECT, DS_ID, LOCATION): + ds_ref = DatasetReference(PROJECT, DS_ID) + dataset = Dataset(ds_ref) + with pytest.raises(ValueError) as e: + dataset.max_time_travel_hours = "50" + assert str(e.value) == "max_time_travel_hours must be an integer. Got 50" From aca935f824a108bd6887f596227301b1fe483998 Mon Sep 17 00:00:00 2001 From: Jose Ignacio Riano Date: Wed, 1 Nov 2023 20:15:32 +0100 Subject: [PATCH 1665/2016] feat: add support for Dataset.isCaseInsensitive (#1671) * feat: add support for Dataset.isCaseInsensitive This commit creates a property named is_case_insensitive (in dataset.py) that allows the usage of the isCaseSensitive field in the Dataset REST API. Fixes: https://github.com/googleapis/python-bigquery/issues/1670 * tests: add unit tests for dataset.is_case_insensitive * docs: improve comments for dataset.is_case_sensitive (code and tests) * docs: improve docstring of is_case_insensitive Co-authored-by: Lingqing Gan * Update tests/system/test_client.py --------- Co-authored-by: Lingqing Gan --- .../google/cloud/bigquery/dataset.py | 20 ++++++ .../tests/system/test_client.py | 61 ++++++++++++++++++- .../tests/unit/test_dataset.py | 25 ++++++++ 3 files changed, 104 insertions(+), 2 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/dataset.py b/packages/google-cloud-bigquery/google/cloud/bigquery/dataset.py index 726a2a17af18..c313045ce3ad 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/dataset.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/dataset.py @@ -524,6 +524,7 @@ class Dataset(object): "default_table_expiration_ms": "defaultTableExpirationMs", "friendly_name": "friendlyName", "default_encryption_configuration": "defaultEncryptionConfiguration", + "is_case_insensitive": "isCaseInsensitive", "storage_billing_model": "storageBillingModel", "max_time_travel_hours": "maxTimeTravelHours", "default_rounding_mode": "defaultRoundingMode", @@ -822,6 +823,25 @@ def default_encryption_configuration(self, value): api_repr = value.to_api_repr() self._properties["defaultEncryptionConfiguration"] = api_repr + @property + def is_case_insensitive(self): + """Optional[bool]: True if the dataset and its table names are case-insensitive, otherwise False. + By default, this is False, which means the dataset and its table names are case-sensitive. + This field does not affect routine references. + + Raises: + ValueError: for invalid value types. + """ + return self._properties.get("isCaseInsensitive") or False + + @is_case_insensitive.setter + def is_case_insensitive(self, value): + if not isinstance(value, bool) and value is not None: + raise ValueError("Pass a boolean value, or None") + if value is None: + value = False + self._properties["isCaseInsensitive"] = value + @property def storage_billing_model(self): """Union[str, None]: StorageBillingModel of the dataset as set by the user diff --git a/packages/google-cloud-bigquery/tests/system/test_client.py b/packages/google-cloud-bigquery/tests/system/test_client.py index 9660d5fa7885..c8ff551ce70b 100644 --- a/packages/google-cloud-bigquery/tests/system/test_client.py +++ b/packages/google-cloud-bigquery/tests/system/test_client.py @@ -237,6 +237,17 @@ def test_create_dataset(self): self.assertTrue(_dataset_exists(dataset)) self.assertEqual(dataset.dataset_id, DATASET_ID) self.assertEqual(dataset.project, Config.CLIENT.project) + self.assertIs(dataset.is_case_insensitive, False) + + def test_create_dataset_case_sensitive(self): + DATASET_ID = _make_dataset_id("create_cs_dataset") + dataset = self.temp_dataset(DATASET_ID, is_case_insensitive=False) + self.assertIs(dataset.is_case_insensitive, False) + + def test_create_dataset_case_insensitive(self): + DATASET_ID = _make_dataset_id("create_ci_dataset") + dataset = self.temp_dataset(DATASET_ID, is_case_insensitive=True) + self.assertIs(dataset.is_case_insensitive, True) def test_create_dataset_max_time_travel_hours(self): DATASET_ID = _make_dataset_id("create_ci_dataset") @@ -283,16 +294,19 @@ def test_update_dataset(self): self.assertIsNone(dataset.friendly_name) self.assertIsNone(dataset.description) self.assertEqual(dataset.labels, {}) + self.assertIs(dataset.is_case_insensitive, False) dataset.friendly_name = "Friendly" dataset.description = "Description" dataset.labels = {"priority": "high", "color": "blue"} + dataset.is_case_insensitive = True ds2 = Config.CLIENT.update_dataset( - dataset, ("friendly_name", "description", "labels") + dataset, ("friendly_name", "description", "labels", "is_case_insensitive") ) self.assertEqual(ds2.friendly_name, "Friendly") self.assertEqual(ds2.description, "Description") self.assertEqual(ds2.labels, {"priority": "high", "color": "blue"}) + self.assertIs(ds2.is_case_insensitive, True) ds2.labels = { "color": "green", # change @@ -347,6 +361,48 @@ def test_create_table(self): self.assertTrue(_table_exists(table)) self.assertEqual(table.table_id, table_id) + def test_create_tables_in_case_insensitive_dataset(self): + ci_dataset = self.temp_dataset( + _make_dataset_id("create_table"), is_case_insensitive=True + ) + table_arg = Table(ci_dataset.table("test_table2"), schema=SCHEMA) + tablemc_arg = Table(ci_dataset.table("Test_taBLe2")) # same name, in Mixed Case + + table = helpers.retry_403(Config.CLIENT.create_table)(table_arg) + self.to_delete.insert(0, table) + + self.assertTrue(_table_exists(table_arg)) + self.assertTrue(_table_exists(tablemc_arg)) + self.assertIs(ci_dataset.is_case_insensitive, True) + + def test_create_tables_in_case_sensitive_dataset(self): + ci_dataset = self.temp_dataset( + _make_dataset_id("create_table"), is_case_insensitive=False + ) + table_arg = Table(ci_dataset.table("test_table3"), schema=SCHEMA) + tablemc_arg = Table(ci_dataset.table("Test_taBLe3")) # same name, in Mixed Case + + table = helpers.retry_403(Config.CLIENT.create_table)(table_arg) + self.to_delete.insert(0, table) + + self.assertTrue(_table_exists(table_arg)) + self.assertFalse(_table_exists(tablemc_arg)) + self.assertIs(ci_dataset.is_case_insensitive, False) + + def test_create_tables_in_default_sensitivity_dataset(self): + dataset = self.temp_dataset(_make_dataset_id("create_table")) + table_arg = Table(dataset.table("test_table4"), schema=SCHEMA) + tablemc_arg = Table( + dataset.table("Test_taBLe4") + ) # same name, in MC (Mixed Case) + + table = helpers.retry_403(Config.CLIENT.create_table)(table_arg) + self.to_delete.insert(0, table) + + self.assertTrue(_table_exists(table_arg)) + self.assertFalse(_table_exists(tablemc_arg)) + self.assertIs(dataset.is_case_insensitive, False) + def test_create_table_with_real_custom_policy(self): from google.cloud.bigquery.schema import PolicyTagList @@ -2308,7 +2364,8 @@ def temp_dataset(self, dataset_id, *args, **kwargs): dataset.max_time_travel_hours = kwargs.get("max_time_travel_hours") if kwargs.get("default_rounding_mode"): dataset.default_rounding_mode = kwargs.get("default_rounding_mode") - + if kwargs.get("is_case_insensitive"): + dataset.is_case_insensitive = kwargs.get("is_case_insensitive") dataset = helpers.retry_403(Config.CLIENT.create_dataset)(dataset) self.to_delete.append(dataset) return dataset diff --git a/packages/google-cloud-bigquery/tests/unit/test_dataset.py b/packages/google-cloud-bigquery/tests/unit/test_dataset.py index 0a709ab4361b..423349a51aae 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_dataset.py +++ b/packages/google-cloud-bigquery/tests/unit/test_dataset.py @@ -744,6 +744,9 @@ def _verify_resource_properties(self, dataset, resource): self.assertEqual(dataset.description, resource.get("description")) self.assertEqual(dataset.friendly_name, resource.get("friendlyName")) self.assertEqual(dataset.location, resource.get("location")) + self.assertEqual( + dataset.is_case_insensitive, resource.get("isCaseInsensitive") or False + ) if "defaultEncryptionConfiguration" in resource: self.assertEqual( dataset.default_encryption_configuration.kms_key_name, @@ -781,6 +784,7 @@ def test_ctor_defaults(self): self.assertIsNone(dataset.description) self.assertIsNone(dataset.friendly_name) self.assertIsNone(dataset.location) + self.assertEqual(dataset.is_case_insensitive, False) def test_ctor_string(self): dataset = self._make_one("some-project.some_dset") @@ -818,6 +822,7 @@ def test_ctor_explicit(self): self.assertIsNone(dataset.description) self.assertIsNone(dataset.friendly_name) self.assertIsNone(dataset.location) + self.assertEqual(dataset.is_case_insensitive, False) def test_access_entries_setter_non_list(self): dataset = self._make_one(self.DS_REF) @@ -910,6 +915,26 @@ def test_labels_getter_missing_value(self): dataset = self._make_one(self.DS_REF) self.assertEqual(dataset.labels, {}) + def test_is_case_insensitive_setter_bad_value(self): + dataset = self._make_one(self.DS_REF) + with self.assertRaises(ValueError): + dataset.is_case_insensitive = 0 + + def test_is_case_insensitive_setter_true(self): + dataset = self._make_one(self.DS_REF) + dataset.is_case_insensitive = True + self.assertEqual(dataset.is_case_insensitive, True) + + def test_is_case_insensitive_setter_none(self): + dataset = self._make_one(self.DS_REF) + dataset.is_case_insensitive = None + self.assertEqual(dataset.is_case_insensitive, False) + + def test_is_case_insensitive_setter_false(self): + dataset = self._make_one(self.DS_REF) + dataset.is_case_insensitive = False + self.assertEqual(dataset.is_case_insensitive, False) + def test_from_api_repr_missing_identity(self): self._setUpConstants() RESOURCE = {} From c2cb6387f5f39aff4930f4a7c1dacf29bd208cce Mon Sep 17 00:00:00 2001 From: Lingqing Gan Date: Wed, 1 Nov 2023 19:49:09 -0400 Subject: [PATCH 1666/2016] feat: support data_governance_type (#1708) * feat: support data_governance_type * remove value validation, add sys test --- .../google/cloud/bigquery/routine/routine.py | 24 +++++++++- .../tests/system/test_client.py | 36 ++++++++++++++ .../tests/unit/routine/test_routine.py | 47 +++++++++++++++++++ 3 files changed, 105 insertions(+), 2 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/routine/routine.py b/packages/google-cloud-bigquery/google/cloud/bigquery/routine/routine.py index ef33d507ec72..83cb6362d950 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/routine/routine.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/routine/routine.py @@ -68,6 +68,7 @@ class Routine(object): "description": "description", "determinism_level": "determinismLevel", "remote_function_options": "remoteFunctionOptions", + "data_governance_type": "dataGovernanceType", } def __init__(self, routine_ref, **kwargs) -> None: @@ -300,8 +301,8 @@ def determinism_level(self, value): @property def remote_function_options(self): - """Optional[google.cloud.bigquery.routine.RemoteFunctionOptions]: Configures remote function - options for a routine. + """Optional[google.cloud.bigquery.routine.RemoteFunctionOptions]: + Configures remote function options for a routine. Raises: ValueError: @@ -329,6 +330,25 @@ def remote_function_options(self, value): self._PROPERTY_TO_API_FIELD["remote_function_options"] ] = api_repr + @property + def data_governance_type(self): + """Optional[str]: If set to ``DATA_MASKING``, the function is validated + and made available as a masking function. + + Raises: + ValueError: + If the value is not :data:`string` or :data:`None`. + """ + return self._properties.get(self._PROPERTY_TO_API_FIELD["data_governance_type"]) + + @data_governance_type.setter + def data_governance_type(self, value): + if value is not None and not isinstance(value, str): + raise ValueError( + "invalid data_governance_type, must be a string or `None`." + ) + self._properties[self._PROPERTY_TO_API_FIELD["data_governance_type"]] = value + @classmethod def from_api_repr(cls, resource: dict) -> "Routine": """Factory: construct a routine given its API representation. diff --git a/packages/google-cloud-bigquery/tests/system/test_client.py b/packages/google-cloud-bigquery/tests/system/test_client.py index c8ff551ce70b..7cea8cfa414f 100644 --- a/packages/google-cloud-bigquery/tests/system/test_client.py +++ b/packages/google-cloud-bigquery/tests/system/test_client.py @@ -13,6 +13,7 @@ # limitations under the License. import base64 +import copy import csv import datetime import decimal @@ -2236,6 +2237,41 @@ def test_create_tvf_routine(self): ] assert result_rows == expected + def test_create_routine_w_data_governance(self): + routine_name = "routine_with_data_governance" + dataset = self.temp_dataset(_make_dataset_id("create_routine")) + + routine = bigquery.Routine( + dataset.routine(routine_name), + type_="SCALAR_FUNCTION", + language="SQL", + body="x", + arguments=[ + bigquery.RoutineArgument( + name="x", + data_type=bigquery.StandardSqlDataType( + type_kind=bigquery.StandardSqlTypeNames.INT64 + ), + ) + ], + data_governance_type="DATA_MASKING", + return_type=bigquery.StandardSqlDataType( + type_kind=bigquery.StandardSqlTypeNames.INT64 + ), + ) + routine_original = copy.deepcopy(routine) + + client = Config.CLIENT + routine_new = client.create_routine(routine) + + assert routine_new.reference == routine_original.reference + assert routine_new.type_ == routine_original.type_ + assert routine_new.language == routine_original.language + assert routine_new.body == routine_original.body + assert routine_new.arguments == routine_original.arguments + assert routine_new.return_type == routine_original.return_type + assert routine_new.data_governance_type == routine_original.data_governance_type + def test_create_table_rows_fetch_nested_schema(self): table_name = "test_table" dataset = self.temp_dataset(_make_dataset_id("create_table_nested_schema")) diff --git a/packages/google-cloud-bigquery/tests/unit/routine/test_routine.py b/packages/google-cloud-bigquery/tests/unit/routine/test_routine.py index 87767200c80a..acd3bc40e2ff 100644 --- a/packages/google-cloud-bigquery/tests/unit/routine/test_routine.py +++ b/packages/google-cloud-bigquery/tests/unit/routine/test_routine.py @@ -154,6 +154,7 @@ def test_from_api_repr(target_class): "foo": "bar", }, }, + "dataGovernanceType": "DATA_MASKING", } actual_routine = target_class.from_api_repr(resource) @@ -192,6 +193,7 @@ def test_from_api_repr(target_class): assert actual_routine.remote_function_options.connection == "connection_string" assert actual_routine.remote_function_options.max_batching_rows == 50 assert actual_routine.remote_function_options.user_defined_context == {"foo": "bar"} + assert actual_routine.data_governance_type == "DATA_MASKING" def test_from_api_repr_tvf_function(target_class): @@ -294,6 +296,7 @@ def test_from_api_repr_w_minimal_resource(target_class): assert actual_routine.description is None assert actual_routine.determinism_level is None assert actual_routine.remote_function_options is None + assert actual_routine.data_governance_type is None def test_from_api_repr_w_unknown_fields(target_class): @@ -428,6 +431,20 @@ def test_from_api_repr_w_unknown_fields(target_class): "determinismLevel": bigquery.DeterminismLevel.DETERMINISM_LEVEL_UNSPECIFIED }, ), + ( + { + "arguments": [{"name": "x", "dataType": {"typeKind": "INT64"}}], + "definitionBody": "x * 3", + "language": "SQL", + "returnType": {"typeKind": "INT64"}, + "routineType": "SCALAR_FUNCTION", + "description": "A routine description.", + "determinismLevel": bigquery.DeterminismLevel.DETERMINISM_LEVEL_UNSPECIFIED, + "dataGovernanceType": "DATA_MASKING", + }, + ["data_governance_type"], + {"dataGovernanceType": "DATA_MASKING"}, + ), ( {}, [ @@ -554,6 +571,36 @@ def test_set_remote_function_options_w_none(object_under_test): assert object_under_test._properties["remoteFunctionOptions"] is None +def test_set_data_governance_type_w_none(object_under_test): + object_under_test.data_governance_type = None + assert object_under_test.data_governance_type is None + assert object_under_test._properties["dataGovernanceType"] is None + + +def test_set_data_governance_type_valid(object_under_test): + object_under_test.data_governance_type = "DATA_MASKING" + assert object_under_test.data_governance_type == "DATA_MASKING" + assert object_under_test._properties["dataGovernanceType"] == "DATA_MASKING" + + +def test_set_data_governance_type_wrong_type(object_under_test): + with pytest.raises(ValueError) as exp: + object_under_test.data_governance_type = 1 + assert "invalid data_governance_type" in str(exp) + assert object_under_test.data_governance_type is None + assert object_under_test._properties.get("dataGovernanceType") is None + + +def test_set_data_governance_type_wrong_str(object_under_test): + """Client does not verify the content of data_governance_type string to be + compatible with future upgrades. If the value is not supported, BigQuery + itself will report an error. + """ + object_under_test.data_governance_type = "RANDOM_STRING" + assert object_under_test.data_governance_type == "RANDOM_STRING" + assert object_under_test._properties["dataGovernanceType"] == "RANDOM_STRING" + + def test_repr(target_class): model = target_class("my-proj.my_dset.my_routine") actual_routine = repr(model) From a1f577f97af26ee2818dd262c33b2e703d93b1dd Mon Sep 17 00:00:00 2001 From: Dan Lee <71398022+dandhlee@users.noreply.github.com> Date: Fri, 3 Nov 2023 14:22:44 -0400 Subject: [PATCH 1667/2016] chore: update docfx minimum Python version (#1712) --- packages/google-cloud-bigquery/noxfile.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/noxfile.py b/packages/google-cloud-bigquery/noxfile.py index a2b7a6843d11..703e36cbb5b1 100644 --- a/packages/google-cloud-bigquery/noxfile.py +++ b/packages/google-cloud-bigquery/noxfile.py @@ -427,7 +427,7 @@ def docs(session): ) -@nox.session(python="3.9") +@nox.session(python="3.10") def docfx(session): """Build the docfx yaml files for this library.""" From d6df89ea2d5584b9e27e4acb2e064e9dd7a39eab Mon Sep 17 00:00:00 2001 From: Kira Date: Tue, 7 Nov 2023 13:08:23 -0800 Subject: [PATCH 1668/2016] chore: refactor version checks for pandas library (#1711) * chore: refactor version checks for pandas library * readded removed importing of pandas * revert bad commit * merged from main, added type:ignore tag to get around mypy error * Added ignore statement for mypy error, removed checking max version of Pandas * updated docstring error * Added parameterize to test to test multiple supported versons --------- Co-authored-by: Lingqing Gan --- .../google/cloud/bigquery/__init__.py | 1 + .../cloud/bigquery/_versions_helpers.py | 63 +++++++++++++++++++ .../google/cloud/bigquery/client.py | 6 +- .../google/cloud/bigquery/exceptions.py | 4 ++ .../tests/unit/test__versions_helpers.py | 51 +++++++++++++++ 5 files changed, 123 insertions(+), 2 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/__init__.py b/packages/google-cloud-bigquery/google/cloud/bigquery/__init__.py index 40e3a157870c..72576e6084d1 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/__init__.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/__init__.py @@ -202,6 +202,7 @@ # Custom exceptions "LegacyBigQueryStorageError", "LegacyPyarrowError", + "LegacyPandasError", ] diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/_versions_helpers.py b/packages/google-cloud-bigquery/google/cloud/bigquery/_versions_helpers.py index ce529b76e473..4ff4b9700766 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/_versions_helpers.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/_versions_helpers.py @@ -24,6 +24,7 @@ _MIN_PYARROW_VERSION = packaging.version.Version("3.0.0") _MIN_BQ_STORAGE_VERSION = packaging.version.Version("2.0.0") _BQ_STORAGE_OPTIONAL_READ_SESSION_VERSION = packaging.version.Version("2.6.0") +_MIN_PANDAS_VERSION = packaging.version.Version("1.1.0") class PyarrowVersions: @@ -171,3 +172,65 @@ def try_import(self, raise_if_error: bool = False) -> Any: BQ_STORAGE_VERSIONS = BQStorageVersions() + + +class PandasVersions: + """Version comparisons for pandas package.""" + + def __init__(self): + self._installed_version = None + + @property + def installed_version(self) -> packaging.version.Version: + """Return the parsed version of pandas""" + if self._installed_version is None: + import pandas # type: ignore + + self._installed_version = packaging.version.parse( + # Use 0.0.0, since it is earlier than any released version. + # Legacy versions also have the same property, but + # creating a LegacyVersion has been deprecated. + # https://github.com/pypa/packaging/issues/321 + getattr(pandas, "__version__", "0.0.0") + ) + + return self._installed_version + + def try_import(self, raise_if_error: bool = False) -> Any: + """Verify that a recent enough version of pandas extra is installed. + The function assumes that pandas extra is installed, and should thus + be used in places where this assumption holds. + Because `pip` can install an outdated version of this extra despite + the constraints in `setup.py`, the calling code can use this helper + to verify the version compatibility at runtime. + Returns: + The ``pandas`` module or ``None``. + Raises: + exceptions.LegacyPandasError: + If the pandas package is outdated and ``raise_if_error`` is + ``True``. + """ + try: + import pandas + except ImportError as exc: # pragma: NO COVER + if raise_if_error: + raise exceptions.LegacyPandasError( + "pandas package not found. Install pandas version >=" + f" {_MIN_PANDAS_VERSION}" + ) from exc + return None + + if self.installed_version < _MIN_PANDAS_VERSION: + if raise_if_error: + msg = ( + "Dependency pandas is outdated, please upgrade" + f" it to version >= {_MIN_PANDAS_VERSION}" + f" (version found: {self.installed_version})." + ) + raise exceptions.LegacyPandasError(msg) + return None + + return pandas + + +PANDAS_VERSIONS = PandasVersions() diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py index 496015b219c6..d4a759ba4805 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py @@ -115,6 +115,9 @@ from google.cloud.bigquery.table import RowIterator pyarrow = _versions_helpers.PYARROW_VERSIONS.try_import() +pandas = ( + _versions_helpers.PANDAS_VERSIONS.try_import() +) # mypy check fails because pandas import is outside module, there are type: ignore comments related to this TimeoutType = Union[float, None] ResumableTimeoutType = Union[ @@ -124,7 +127,6 @@ if typing.TYPE_CHECKING: # pragma: NO COVER # os.PathLike is only subscriptable in Python 3.9+, thus shielding with a condition. PathType = Union[str, bytes, os.PathLike[str], os.PathLike[bytes]] - import pandas # type: ignore import requests # required by api-core _DEFAULT_CHUNKSIZE = 100 * 1024 * 1024 # 100 MB @@ -2488,7 +2490,7 @@ def load_table_from_file( def load_table_from_dataframe( self, - dataframe: "pandas.DataFrame", + dataframe: "pandas.DataFrame", # type: ignore destination: Union[Table, TableReference, str], num_retries: int = _DEFAULT_NUM_RETRIES, job_id: Optional[str] = None, diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/exceptions.py b/packages/google-cloud-bigquery/google/cloud/bigquery/exceptions.py index e94a6c832aa3..62e0d540c91c 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/exceptions.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/exceptions.py @@ -29,3 +29,7 @@ class BigQueryStorageNotFoundError(BigQueryError): """Raised when BigQuery Storage extra is not installed when trying to import it. """ + + +class LegacyPandasError(BigQueryError): + """Raised when too old a version of pandas package is detected at runtime.""" diff --git a/packages/google-cloud-bigquery/tests/unit/test__versions_helpers.py b/packages/google-cloud-bigquery/tests/unit/test__versions_helpers.py index 144f14b7c0c3..afe170e7a0bd 100644 --- a/packages/google-cloud-bigquery/tests/unit/test__versions_helpers.py +++ b/packages/google-cloud-bigquery/tests/unit/test__versions_helpers.py @@ -26,6 +26,11 @@ except ImportError: # pragma: NO COVER bigquery_storage = None +try: + import pandas # type: ignore +except ImportError: # pragma: NO COVER + pandas = None + from google.cloud.bigquery import _versions_helpers from google.cloud.bigquery import exceptions @@ -173,3 +178,49 @@ def test_bqstorage_is_read_session_optional_false(): bqstorage_versions = _versions_helpers.BQStorageVersions() with mock.patch("google.cloud.bigquery_storage.__version__", new="2.5.0"): assert not bqstorage_versions.is_read_session_optional + + +@pytest.mark.skipif(pandas is None, reason="pandas is not installed") +@pytest.mark.parametrize("version", ["1.5.0", "2.0.0", "2.1.0"]) +def test_try_import_raises_no_error_w_recent_pandas(version): + versions = _versions_helpers.PandasVersions() + with mock.patch("pandas.__version__", new=version): + try: + pandas = versions.try_import(raise_if_error=True) + assert pandas is not None + except exceptions.LegacyPandasError: # pragma: NO COVER + raise ("Legacy error raised with a non-legacy dependency version.") + + +@pytest.mark.skipif(pandas is None, reason="pandas is not installed") +def test_try_import_returns_none_w_legacy_pandas(): + versions = _versions_helpers.PandasVersions() + with mock.patch("pandas.__version__", new="1.0.0"): + pandas = versions.try_import() + assert pandas is None + + +@pytest.mark.skipif(pandas is None, reason="pandas is not installed") +def test_try_import_raises_error_w_legacy_pandas(): + versions = _versions_helpers.PandasVersions() + with mock.patch("pandas.__version__", new="1.0.0"): + with pytest.raises(exceptions.LegacyPandasError): + versions.try_import(raise_if_error=True) + + +@pytest.mark.skipif(pandas is None, reason="pandas is not installed") +def test_installed_pandas_version_returns_cached(): + versions = _versions_helpers.PandasVersions() + versions._installed_version = object() + assert versions.installed_version is versions._installed_version + + +@pytest.mark.skipif(pandas is None, reason="pandas is not installed") +def test_installed_pandas_version_returns_parsed_version(): + versions = _versions_helpers.PandasVersions() + with mock.patch("pandas.__version__", new="1.1.0"): + version = versions.installed_version + + assert version.major == 1 + assert version.minor == 1 + assert version.micro == 0 From e095f945e76614c491f83c40c3ac2b42c0e32009 Mon Sep 17 00:00:00 2001 From: "gcf-owl-bot[bot]" <78513119+gcf-owl-bot[bot]@users.noreply.github.com> Date: Wed, 8 Nov 2023 13:00:24 -0800 Subject: [PATCH 1669/2016] chore: bump urllib3 from 1.26.12 to 1.26.18 (#1714) Source-Link: https://github.com/googleapis/synthtool/commit/febacccc98d6d224aff9d0bd0373bb5a4cd5969c Post-Processor: gcr.io/cloud-devrel-public-resources/owlbot-python:latest@sha256:caffe0a9277daeccc4d1de5c9b55ebba0901b57c2f713ec9c876b0d4ec064f61 Co-authored-by: Owl Bot --- .../.github/.OwlBot.lock.yaml | 4 +- .../.kokoro/requirements.txt | 532 +++++++++--------- 2 files changed, 277 insertions(+), 259 deletions(-) diff --git a/packages/google-cloud-bigquery/.github/.OwlBot.lock.yaml b/packages/google-cloud-bigquery/.github/.OwlBot.lock.yaml index 7f291dbd5f9b..453b540c1e58 100644 --- a/packages/google-cloud-bigquery/.github/.OwlBot.lock.yaml +++ b/packages/google-cloud-bigquery/.github/.OwlBot.lock.yaml @@ -13,5 +13,5 @@ # limitations under the License. docker: image: gcr.io/cloud-devrel-public-resources/owlbot-python:latest - digest: sha256:4f9b3b106ad0beafc2c8a415e3f62c1a0cc23cabea115dbe841b848f581cfe99 -# created: 2023-10-18T20:26:37.410353675Z + digest: sha256:caffe0a9277daeccc4d1de5c9b55ebba0901b57c2f713ec9c876b0d4ec064f61 +# created: 2023-11-08T19:46:45.022803742Z diff --git a/packages/google-cloud-bigquery/.kokoro/requirements.txt b/packages/google-cloud-bigquery/.kokoro/requirements.txt index 16170d0ca7b8..8957e21104e2 100644 --- a/packages/google-cloud-bigquery/.kokoro/requirements.txt +++ b/packages/google-cloud-bigquery/.kokoro/requirements.txt @@ -4,91 +4,75 @@ # # pip-compile --allow-unsafe --generate-hashes requirements.in # -argcomplete==2.0.0 \ - --hash=sha256:6372ad78c89d662035101418ae253668445b391755cfe94ea52f1b9d22425b20 \ - --hash=sha256:cffa11ea77999bb0dd27bb25ff6dc142a6796142f68d45b1a26b11f58724561e +argcomplete==3.1.4 \ + --hash=sha256:72558ba729e4c468572609817226fb0a6e7e9a0a7d477b882be168c0b4a62b94 \ + --hash=sha256:fbe56f8cda08aa9a04b307d8482ea703e96a6a801611acb4be9bf3942017989f # via nox -attrs==22.1.0 \ - --hash=sha256:29adc2665447e5191d0e7c568fde78b21f9672d344281d0c6e1ab085429b22b6 \ - --hash=sha256:86efa402f67bf2df34f51a335487cf46b1ec130d02b8d39fd248abfd30da551c +attrs==23.1.0 \ + --hash=sha256:1f28b4522cdc2fb4256ac1a020c78acf9cba2c6b461ccd2c126f3aa8e8335d04 \ + --hash=sha256:6279836d581513a26f1bf235f9acd333bc9115683f14f7e8fae46c98fc50e015 # via gcp-releasetool -bleach==5.0.1 \ - --hash=sha256:085f7f33c15bd408dd9b17a4ad77c577db66d76203e5984b1bd59baeee948b2a \ - --hash=sha256:0d03255c47eb9bd2f26aa9bb7f2107732e7e8fe195ca2f64709fcf3b0a4a085c - # via readme-renderer -cachetools==5.2.0 \ - --hash=sha256:6a94c6402995a99c3970cc7e4884bb60b4a8639938157eeed436098bf9831757 \ - --hash=sha256:f9f17d2aec496a9aa6b76f53e3b614c965223c061982d434d160f930c698a9db +cachetools==5.3.2 \ + --hash=sha256:086ee420196f7b2ab9ca2db2520aca326318b68fe5ba8bc4d49cca91add450f2 \ + --hash=sha256:861f35a13a451f94e301ce2bec7cac63e881232ccce7ed67fab9b5df4d3beaa1 # via google-auth certifi==2023.7.22 \ --hash=sha256:539cc1d13202e33ca466e88b2807e29f4c13049d6d87031a3c110744495cb082 \ --hash=sha256:92d6037539857d8206b8f6ae472e8b77db8058fec5937a1ef3f54304089edbb9 # via requests -cffi==1.15.1 \ - --hash=sha256:00a9ed42e88df81ffae7a8ab6d9356b371399b91dbdf0c3cb1e84c03a13aceb5 \ - --hash=sha256:03425bdae262c76aad70202debd780501fabeaca237cdfddc008987c0e0f59ef \ - --hash=sha256:04ed324bda3cda42b9b695d51bb7d54b680b9719cfab04227cdd1e04e5de3104 \ - --hash=sha256:0e2642fe3142e4cc4af0799748233ad6da94c62a8bec3a6648bf8ee68b1c7426 \ - --hash=sha256:173379135477dc8cac4bc58f45db08ab45d228b3363adb7af79436135d028405 \ - --hash=sha256:198caafb44239b60e252492445da556afafc7d1e3ab7a1fb3f0584ef6d742375 \ - --hash=sha256:1e74c6b51a9ed6589199c787bf5f9875612ca4a8a0785fb2d4a84429badaf22a \ - --hash=sha256:2012c72d854c2d03e45d06ae57f40d78e5770d252f195b93f581acf3ba44496e \ - --hash=sha256:21157295583fe8943475029ed5abdcf71eb3911894724e360acff1d61c1d54bc \ - --hash=sha256:2470043b93ff09bf8fb1d46d1cb756ce6132c54826661a32d4e4d132e1977adf \ - --hash=sha256:285d29981935eb726a4399badae8f0ffdff4f5050eaa6d0cfc3f64b857b77185 \ - --hash=sha256:30d78fbc8ebf9c92c9b7823ee18eb92f2e6ef79b45ac84db507f52fbe3ec4497 \ - --hash=sha256:320dab6e7cb2eacdf0e658569d2575c4dad258c0fcc794f46215e1e39f90f2c3 \ - --hash=sha256:33ab79603146aace82c2427da5ca6e58f2b3f2fb5da893ceac0c42218a40be35 \ - --hash=sha256:3548db281cd7d2561c9ad9984681c95f7b0e38881201e157833a2342c30d5e8c \ - --hash=sha256:3799aecf2e17cf585d977b780ce79ff0dc9b78d799fc694221ce814c2c19db83 \ - --hash=sha256:39d39875251ca8f612b6f33e6b1195af86d1b3e60086068be9cc053aa4376e21 \ - --hash=sha256:3b926aa83d1edb5aa5b427b4053dc420ec295a08e40911296b9eb1b6170f6cca \ - --hash=sha256:3bcde07039e586f91b45c88f8583ea7cf7a0770df3a1649627bf598332cb6984 \ - --hash=sha256:3d08afd128ddaa624a48cf2b859afef385b720bb4b43df214f85616922e6a5ac \ - --hash=sha256:3eb6971dcff08619f8d91607cfc726518b6fa2a9eba42856be181c6d0d9515fd \ - --hash=sha256:40f4774f5a9d4f5e344f31a32b5096977b5d48560c5592e2f3d2c4374bd543ee \ - --hash=sha256:4289fc34b2f5316fbb762d75362931e351941fa95fa18789191b33fc4cf9504a \ - --hash=sha256:470c103ae716238bbe698d67ad020e1db9d9dba34fa5a899b5e21577e6d52ed2 \ - --hash=sha256:4f2c9f67e9821cad2e5f480bc8d83b8742896f1242dba247911072d4fa94c192 \ - --hash=sha256:50a74364d85fd319352182ef59c5c790484a336f6db772c1a9231f1c3ed0cbd7 \ - --hash=sha256:54a2db7b78338edd780e7ef7f9f6c442500fb0d41a5a4ea24fff1c929d5af585 \ - --hash=sha256:5635bd9cb9731e6d4a1132a498dd34f764034a8ce60cef4f5319c0541159392f \ - --hash=sha256:59c0b02d0a6c384d453fece7566d1c7e6b7bae4fc5874ef2ef46d56776d61c9e \ - --hash=sha256:5d598b938678ebf3c67377cdd45e09d431369c3b1a5b331058c338e201f12b27 \ - --hash=sha256:5df2768244d19ab7f60546d0c7c63ce1581f7af8b5de3eb3004b9b6fc8a9f84b \ - --hash=sha256:5ef34d190326c3b1f822a5b7a45f6c4535e2f47ed06fec77d3d799c450b2651e \ - --hash=sha256:6975a3fac6bc83c4a65c9f9fcab9e47019a11d3d2cf7f3c0d03431bf145a941e \ - --hash=sha256:6c9a799e985904922a4d207a94eae35c78ebae90e128f0c4e521ce339396be9d \ - --hash=sha256:70df4e3b545a17496c9b3f41f5115e69a4f2e77e94e1d2a8e1070bc0c38c8a3c \ - --hash=sha256:7473e861101c9e72452f9bf8acb984947aa1661a7704553a9f6e4baa5ba64415 \ - --hash=sha256:8102eaf27e1e448db915d08afa8b41d6c7ca7a04b7d73af6514df10a3e74bd82 \ - --hash=sha256:87c450779d0914f2861b8526e035c5e6da0a3199d8f1add1a665e1cbc6fc6d02 \ - --hash=sha256:8b7ee99e510d7b66cdb6c593f21c043c248537a32e0bedf02e01e9553a172314 \ - --hash=sha256:91fc98adde3d7881af9b59ed0294046f3806221863722ba7d8d120c575314325 \ - --hash=sha256:94411f22c3985acaec6f83c6df553f2dbe17b698cc7f8ae751ff2237d96b9e3c \ - --hash=sha256:98d85c6a2bef81588d9227dde12db8a7f47f639f4a17c9ae08e773aa9c697bf3 \ - --hash=sha256:9ad5db27f9cabae298d151c85cf2bad1d359a1b9c686a275df03385758e2f914 \ - --hash=sha256:a0b71b1b8fbf2b96e41c4d990244165e2c9be83d54962a9a1d118fd8657d2045 \ - --hash=sha256:a0f100c8912c114ff53e1202d0078b425bee3649ae34d7b070e9697f93c5d52d \ - --hash=sha256:a591fe9e525846e4d154205572a029f653ada1a78b93697f3b5a8f1f2bc055b9 \ - --hash=sha256:a5c84c68147988265e60416b57fc83425a78058853509c1b0629c180094904a5 \ - --hash=sha256:a66d3508133af6e8548451b25058d5812812ec3798c886bf38ed24a98216fab2 \ - --hash=sha256:a8c4917bd7ad33e8eb21e9a5bbba979b49d9a97acb3a803092cbc1133e20343c \ - --hash=sha256:b3bbeb01c2b273cca1e1e0c5df57f12dce9a4dd331b4fa1635b8bec26350bde3 \ - --hash=sha256:cba9d6b9a7d64d4bd46167096fc9d2f835e25d7e4c121fb2ddfc6528fb0413b2 \ - --hash=sha256:cc4d65aeeaa04136a12677d3dd0b1c0c94dc43abac5860ab33cceb42b801c1e8 \ - --hash=sha256:ce4bcc037df4fc5e3d184794f27bdaab018943698f4ca31630bc7f84a7b69c6d \ - --hash=sha256:cec7d9412a9102bdc577382c3929b337320c4c4c4849f2c5cdd14d7368c5562d \ - --hash=sha256:d400bfb9a37b1351253cb402671cea7e89bdecc294e8016a707f6d1d8ac934f9 \ - --hash=sha256:d61f4695e6c866a23a21acab0509af1cdfd2c013cf256bbf5b6b5e2695827162 \ - --hash=sha256:db0fbb9c62743ce59a9ff687eb5f4afbe77e5e8403d6697f7446e5f609976f76 \ - --hash=sha256:dd86c085fae2efd48ac91dd7ccffcfc0571387fe1193d33b6394db7ef31fe2a4 \ - --hash=sha256:e00b098126fd45523dd056d2efba6c5a63b71ffe9f2bbe1a4fe1716e1d0c331e \ - --hash=sha256:e229a521186c75c8ad9490854fd8bbdd9a0c9aa3a524326b55be83b54d4e0ad9 \ - --hash=sha256:e263d77ee3dd201c3a142934a086a4450861778baaeeb45db4591ef65550b0a6 \ - --hash=sha256:ed9cb427ba5504c1dc15ede7d516b84757c3e3d7868ccc85121d9310d27eed0b \ - --hash=sha256:fa6693661a4c91757f4412306191b6dc88c1703f780c8234035eac011922bc01 \ - --hash=sha256:fcd131dd944808b5bdb38e6f5b53013c5aa4f334c5cad0c72742f6eba4b73db0 +cffi==1.16.0 \ + --hash=sha256:0c9ef6ff37e974b73c25eecc13952c55bceed9112be2d9d938ded8e856138bcc \ + --hash=sha256:131fd094d1065b19540c3d72594260f118b231090295d8c34e19a7bbcf2e860a \ + --hash=sha256:1b8ebc27c014c59692bb2664c7d13ce7a6e9a629be20e54e7271fa696ff2b417 \ + --hash=sha256:2c56b361916f390cd758a57f2e16233eb4f64bcbeee88a4881ea90fca14dc6ab \ + --hash=sha256:2d92b25dbf6cae33f65005baf472d2c245c050b1ce709cc4588cdcdd5495b520 \ + --hash=sha256:31d13b0f99e0836b7ff893d37af07366ebc90b678b6664c955b54561fc36ef36 \ + --hash=sha256:32c68ef735dbe5857c810328cb2481e24722a59a2003018885514d4c09af9743 \ + --hash=sha256:3686dffb02459559c74dd3d81748269ffb0eb027c39a6fc99502de37d501faa8 \ + --hash=sha256:582215a0e9adbe0e379761260553ba11c58943e4bbe9c36430c4ca6ac74b15ed \ + --hash=sha256:5b50bf3f55561dac5438f8e70bfcdfd74543fd60df5fa5f62d94e5867deca684 \ + --hash=sha256:5bf44d66cdf9e893637896c7faa22298baebcd18d1ddb6d2626a6e39793a1d56 \ + --hash=sha256:6602bc8dc6f3a9e02b6c22c4fc1e47aa50f8f8e6d3f78a5e16ac33ef5fefa324 \ + --hash=sha256:673739cb539f8cdaa07d92d02efa93c9ccf87e345b9a0b556e3ecc666718468d \ + --hash=sha256:68678abf380b42ce21a5f2abde8efee05c114c2fdb2e9eef2efdb0257fba1235 \ + --hash=sha256:68e7c44931cc171c54ccb702482e9fc723192e88d25a0e133edd7aff8fcd1f6e \ + --hash=sha256:6b3d6606d369fc1da4fd8c357d026317fbb9c9b75d36dc16e90e84c26854b088 \ + --hash=sha256:748dcd1e3d3d7cd5443ef03ce8685043294ad6bd7c02a38d1bd367cfd968e000 \ + --hash=sha256:7651c50c8c5ef7bdb41108b7b8c5a83013bfaa8a935590c5d74627c047a583c7 \ + --hash=sha256:7b78010e7b97fef4bee1e896df8a4bbb6712b7f05b7ef630f9d1da00f6444d2e \ + --hash=sha256:7e61e3e4fa664a8588aa25c883eab612a188c725755afff6289454d6362b9673 \ + --hash=sha256:80876338e19c951fdfed6198e70bc88f1c9758b94578d5a7c4c91a87af3cf31c \ + --hash=sha256:8895613bcc094d4a1b2dbe179d88d7fb4a15cee43c052e8885783fac397d91fe \ + --hash=sha256:88e2b3c14bdb32e440be531ade29d3c50a1a59cd4e51b1dd8b0865c54ea5d2e2 \ + --hash=sha256:8f8e709127c6c77446a8c0a8c8bf3c8ee706a06cd44b1e827c3e6a2ee6b8c098 \ + --hash=sha256:9cb4a35b3642fc5c005a6755a5d17c6c8b6bcb6981baf81cea8bfbc8903e8ba8 \ + --hash=sha256:9f90389693731ff1f659e55c7d1640e2ec43ff725cc61b04b2f9c6d8d017df6a \ + --hash=sha256:a09582f178759ee8128d9270cd1344154fd473bb77d94ce0aeb2a93ebf0feaf0 \ + --hash=sha256:a6a14b17d7e17fa0d207ac08642c8820f84f25ce17a442fd15e27ea18d67c59b \ + --hash=sha256:a72e8961a86d19bdb45851d8f1f08b041ea37d2bd8d4fd19903bc3083d80c896 \ + --hash=sha256:abd808f9c129ba2beda4cfc53bde801e5bcf9d6e0f22f095e45327c038bfe68e \ + --hash=sha256:ac0f5edd2360eea2f1daa9e26a41db02dd4b0451b48f7c318e217ee092a213e9 \ + --hash=sha256:b29ebffcf550f9da55bec9e02ad430c992a87e5f512cd63388abb76f1036d8d2 \ + --hash=sha256:b2ca4e77f9f47c55c194982e10f058db063937845bb2b7a86c84a6cfe0aefa8b \ + --hash=sha256:b7be2d771cdba2942e13215c4e340bfd76398e9227ad10402a8767ab1865d2e6 \ + --hash=sha256:b84834d0cf97e7d27dd5b7f3aca7b6e9263c56308ab9dc8aae9784abb774d404 \ + --hash=sha256:b86851a328eedc692acf81fb05444bdf1891747c25af7529e39ddafaf68a4f3f \ + --hash=sha256:bcb3ef43e58665bbda2fb198698fcae6776483e0c4a631aa5647806c25e02cc0 \ + --hash=sha256:c0f31130ebc2d37cdd8e44605fb5fa7ad59049298b3f745c74fa74c62fbfcfc4 \ + --hash=sha256:c6a164aa47843fb1b01e941d385aab7215563bb8816d80ff3a363a9f8448a8dc \ + --hash=sha256:d8a9d3ebe49f084ad71f9269834ceccbf398253c9fac910c4fd7053ff1386936 \ + --hash=sha256:db8e577c19c0fda0beb7e0d4e09e0ba74b1e4c092e0e40bfa12fe05b6f6d75ba \ + --hash=sha256:dc9b18bf40cc75f66f40a7379f6a9513244fe33c0e8aa72e2d56b0196a7ef872 \ + --hash=sha256:e09f3ff613345df5e8c3667da1d918f9149bd623cd9070c983c013792a9a62eb \ + --hash=sha256:e4108df7fe9b707191e55f33efbcb2d81928e10cea45527879a4749cbe472614 \ + --hash=sha256:e6024675e67af929088fda399b2094574609396b1decb609c55fa58b028a32a1 \ + --hash=sha256:e70f54f1796669ef691ca07d046cd81a29cb4deb1e5f942003f401c0c4a2695d \ + --hash=sha256:e715596e683d2ce000574bae5d07bd522c781a822866c20495e52520564f0969 \ + --hash=sha256:e760191dd42581e023a68b758769e2da259b5d52e3103c6060ddc02c9edb8d7b \ + --hash=sha256:ed86a35631f7bfbb28e108dd96773b9d5a6ce4811cf6ea468bb6a359b256b1e4 \ + --hash=sha256:ee07e47c12890ef248766a6e55bd38ebfb2bb8edd4142d56db91b21ea68b7627 \ + --hash=sha256:fa3a0128b152627161ce47201262d3140edb5a5c3da88d73a1b790a959126956 \ + --hash=sha256:fcc8eb6d5902bb1cf6dc4f187ee3ea80a1eba0a89aba40a5cb20a5087d961357 # via cryptography charset-normalizer==2.1.1 \ --hash=sha256:5a3d016c7c547f69d6f81fb0db9449ce888b418b5b9952cc5e6e66843e9dd845 \ @@ -109,78 +93,74 @@ colorlog==6.7.0 \ # via # gcp-docuploader # nox -commonmark==0.9.1 \ - --hash=sha256:452f9dc859be7f06631ddcb328b6919c67984aca654e5fefb3914d54691aed60 \ - --hash=sha256:da2f38c92590f83de410ba1a3cbceafbc74fee9def35f9251ba9a971d6d66fd9 - # via rich -cryptography==41.0.4 \ - --hash=sha256:004b6ccc95943f6a9ad3142cfabcc769d7ee38a3f60fb0dddbfb431f818c3a67 \ - --hash=sha256:047c4603aeb4bbd8db2756e38f5b8bd7e94318c047cfe4efeb5d715e08b49311 \ - --hash=sha256:0d9409894f495d465fe6fda92cb70e8323e9648af912d5b9141d616df40a87b8 \ - --hash=sha256:23a25c09dfd0d9f28da2352503b23e086f8e78096b9fd585d1d14eca01613e13 \ - --hash=sha256:2ed09183922d66c4ec5fdaa59b4d14e105c084dd0febd27452de8f6f74704143 \ - --hash=sha256:35c00f637cd0b9d5b6c6bd11b6c3359194a8eba9c46d4e875a3660e3b400005f \ - --hash=sha256:37480760ae08065437e6573d14be973112c9e6dcaf5f11d00147ee74f37a3829 \ - --hash=sha256:3b224890962a2d7b57cf5eeb16ccaafba6083f7b811829f00476309bce2fe0fd \ - --hash=sha256:5a0f09cefded00e648a127048119f77bc2b2ec61e736660b5789e638f43cc397 \ - --hash=sha256:5b72205a360f3b6176485a333256b9bcd48700fc755fef51c8e7e67c4b63e3ac \ - --hash=sha256:7e53db173370dea832190870e975a1e09c86a879b613948f09eb49324218c14d \ - --hash=sha256:7febc3094125fc126a7f6fb1f420d0da639f3f32cb15c8ff0dc3997c4549f51a \ - --hash=sha256:80907d3faa55dc5434a16579952ac6da800935cd98d14dbd62f6f042c7f5e839 \ - --hash=sha256:86defa8d248c3fa029da68ce61fe735432b047e32179883bdb1e79ed9bb8195e \ - --hash=sha256:8ac4f9ead4bbd0bc8ab2d318f97d85147167a488be0e08814a37eb2f439d5cf6 \ - --hash=sha256:93530900d14c37a46ce3d6c9e6fd35dbe5f5601bf6b3a5c325c7bffc030344d9 \ - --hash=sha256:9eeb77214afae972a00dee47382d2591abe77bdae166bda672fb1e24702a3860 \ - --hash=sha256:b5f4dfe950ff0479f1f00eda09c18798d4f49b98f4e2006d644b3301682ebdca \ - --hash=sha256:c3391bd8e6de35f6f1140e50aaeb3e2b3d6a9012536ca23ab0d9c35ec18c8a91 \ - --hash=sha256:c880eba5175f4307129784eca96f4e70b88e57aa3f680aeba3bab0e980b0f37d \ - --hash=sha256:cecfefa17042941f94ab54f769c8ce0fe14beff2694e9ac684176a2535bf9714 \ - --hash=sha256:e40211b4923ba5a6dc9769eab704bdb3fbb58d56c5b336d30996c24fcf12aadb \ - --hash=sha256:efc8ad4e6fc4f1752ebfb58aefece8b4e3c4cae940b0994d43649bdfce8d0d4f +cryptography==41.0.5 \ + --hash=sha256:0c327cac00f082013c7c9fb6c46b7cc9fa3c288ca702c74773968173bda421bf \ + --hash=sha256:0d2a6a598847c46e3e321a7aef8af1436f11c27f1254933746304ff014664d84 \ + --hash=sha256:227ec057cd32a41c6651701abc0328135e472ed450f47c2766f23267b792a88e \ + --hash=sha256:22892cc830d8b2c89ea60148227631bb96a7da0c1b722f2aac8824b1b7c0b6b8 \ + --hash=sha256:392cb88b597247177172e02da6b7a63deeff1937fa6fec3bbf902ebd75d97ec7 \ + --hash=sha256:3be3ca726e1572517d2bef99a818378bbcf7d7799d5372a46c79c29eb8d166c1 \ + --hash=sha256:573eb7128cbca75f9157dcde974781209463ce56b5804983e11a1c462f0f4e88 \ + --hash=sha256:580afc7b7216deeb87a098ef0674d6ee34ab55993140838b14c9b83312b37b86 \ + --hash=sha256:5a70187954ba7292c7876734183e810b728b4f3965fbe571421cb2434d279179 \ + --hash=sha256:73801ac9736741f220e20435f84ecec75ed70eda90f781a148f1bad546963d81 \ + --hash=sha256:7d208c21e47940369accfc9e85f0de7693d9a5d843c2509b3846b2db170dfd20 \ + --hash=sha256:8254962e6ba1f4d2090c44daf50a547cd5f0bf446dc658a8e5f8156cae0d8548 \ + --hash=sha256:88417bff20162f635f24f849ab182b092697922088b477a7abd6664ddd82291d \ + --hash=sha256:a48e74dad1fb349f3dc1d449ed88e0017d792997a7ad2ec9587ed17405667e6d \ + --hash=sha256:b948e09fe5fb18517d99994184854ebd50b57248736fd4c720ad540560174ec5 \ + --hash=sha256:c707f7afd813478e2019ae32a7c49cd932dd60ab2d2a93e796f68236b7e1fbf1 \ + --hash=sha256:d38e6031e113b7421db1de0c1b1f7739564a88f1684c6b89234fbf6c11b75147 \ + --hash=sha256:d3977f0e276f6f5bf245c403156673db103283266601405376f075c849a0b936 \ + --hash=sha256:da6a0ff8f1016ccc7477e6339e1d50ce5f59b88905585f77193ebd5068f1e797 \ + --hash=sha256:e270c04f4d9b5671ebcc792b3ba5d4488bf7c42c3c241a3748e2599776f29696 \ + --hash=sha256:e886098619d3815e0ad5790c973afeee2c0e6e04b4da90b88e6bd06e2a0b1b72 \ + --hash=sha256:ec3b055ff8f1dce8e6ef28f626e0972981475173d7973d63f271b29c8a2897da \ + --hash=sha256:fba1e91467c65fe64a82c689dc6cf58151158993b13eb7a7f3f4b7f395636723 # via # gcp-releasetool # secretstorage -distlib==0.3.6 \ - --hash=sha256:14bad2d9b04d3a36127ac97f30b12a19268f211063d8f8ee4f47108896e11b46 \ - --hash=sha256:f35c4b692542ca110de7ef0bea44d73981caeb34ca0b9b6b2e6d7790dda8f80e +distlib==0.3.7 \ + --hash=sha256:2e24928bc811348f0feb63014e97aaae3037f2cf48712d51ae61df7fd6075057 \ + --hash=sha256:9dafe54b34a028eafd95039d5e5d4851a13734540f1331060d31c9916e7147a8 # via virtualenv -docutils==0.19 \ - --hash=sha256:33995a6753c30b7f577febfc2c50411fec6aac7f7ffeb7c4cfe5991072dcf9e6 \ - --hash=sha256:5e1de4d849fee02c63b040a4a3fd567f4ab104defd8a5511fbbc24a8a017efbc +docutils==0.20.1 \ + --hash=sha256:96f387a2c5562db4476f09f13bbab2192e764cac08ebbf3a34a95d9b1e4a59d6 \ + --hash=sha256:f08a4e276c3a1583a86dce3e34aba3fe04d02bba2dd51ed16106244e8a923e3b # via readme-renderer -filelock==3.8.0 \ - --hash=sha256:55447caa666f2198c5b6b13a26d2084d26fa5b115c00d065664b2124680c4edc \ - --hash=sha256:617eb4e5eedc82fc5f47b6d61e4d11cb837c56cb4544e39081099fa17ad109d4 +filelock==3.13.1 \ + --hash=sha256:521f5f56c50f8426f5e03ad3b281b490a87ef15bc6c526f168290f0c7148d44e \ + --hash=sha256:57dbda9b35157b05fb3e58ee91448612eb674172fab98ee235ccb0b5bee19a1c # via virtualenv -gcp-docuploader==0.6.4 \ - --hash=sha256:01486419e24633af78fd0167db74a2763974765ee8078ca6eb6964d0ebd388af \ - --hash=sha256:70861190c123d907b3b067da896265ead2eeb9263969d6955c9e0bb091b5ccbf +gcp-docuploader==0.6.5 \ + --hash=sha256:30221d4ac3e5a2b9c69aa52fdbef68cc3f27d0e6d0d90e220fc024584b8d2318 \ + --hash=sha256:b7458ef93f605b9d46a4bf3a8dc1755dad1f31d030c8679edf304e343b347eea # via -r requirements.in -gcp-releasetool==1.10.5 \ - --hash=sha256:174b7b102d704b254f2a26a3eda2c684fd3543320ec239baf771542a2e58e109 \ - --hash=sha256:e29d29927fe2ca493105a82958c6873bb2b90d503acac56be2c229e74de0eec9 +gcp-releasetool==1.16.0 \ + --hash=sha256:27bf19d2e87aaa884096ff941aa3c592c482be3d6a2bfe6f06afafa6af2353e3 \ + --hash=sha256:a316b197a543fd036209d0caba7a8eb4d236d8e65381c80cbc6d7efaa7606d63 # via -r requirements.in -google-api-core==2.10.2 \ - --hash=sha256:10c06f7739fe57781f87523375e8e1a3a4674bf6392cd6131a3222182b971320 \ - --hash=sha256:34f24bd1d5f72a8c4519773d99ca6bf080a6c4e041b4e9f024fe230191dda62e +google-api-core==2.12.0 \ + --hash=sha256:c22e01b1e3c4dcd90998494879612c38d0a3411d1f7b679eb89e2abe3ce1f553 \ + --hash=sha256:ec6054f7d64ad13b41e43d96f735acbd763b0f3b695dabaa2d579673f6a6e160 # via # google-cloud-core # google-cloud-storage -google-auth==2.14.1 \ - --hash=sha256:ccaa901f31ad5cbb562615eb8b664b3dd0bf5404a67618e642307f00613eda4d \ - --hash=sha256:f5d8701633bebc12e0deea4df8abd8aff31c28b355360597f7f2ee60f2e4d016 +google-auth==2.23.4 \ + --hash=sha256:79905d6b1652187def79d491d6e23d0cbb3a21d3c7ba0dbaa9c8a01906b13ff3 \ + --hash=sha256:d4bbc92fe4b8bfd2f3e8d88e5ba7085935da208ee38a134fc280e7ce682a05f2 # via # gcp-releasetool # google-api-core # google-cloud-core # google-cloud-storage -google-cloud-core==2.3.2 \ - --hash=sha256:8417acf6466be2fa85123441696c4badda48db314c607cf1e5d543fa8bdc22fe \ - --hash=sha256:b9529ee7047fd8d4bf4a2182de619154240df17fbe60ead399078c1ae152af9a +google-cloud-core==2.3.3 \ + --hash=sha256:37b80273c8d7eee1ae816b3a20ae43585ea50506cb0e60f3cf5be5f87f1373cb \ + --hash=sha256:fbd11cad3e98a7e5b0343dc07cb1039a5ffd7a5bb96e1f1e27cee4bda4a90863 # via google-cloud-storage -google-cloud-storage==2.6.0 \ - --hash=sha256:104ca28ae61243b637f2f01455cc8a05e8f15a2a18ced96cb587241cdd3820f5 \ - --hash=sha256:4ad0415ff61abdd8bb2ae81c1f8f7ec7d91a1011613f2db87c614c550f97bfe9 +google-cloud-storage==2.13.0 \ + --hash=sha256:ab0bf2e1780a1b74cf17fccb13788070b729f50c252f0c94ada2aae0ca95437d \ + --hash=sha256:f62dc4c7b6cd4360d072e3deb28035fbdad491ac3d9b0b1815a12daea10f37c7 # via gcp-docuploader google-crc32c==1.5.0 \ --hash=sha256:024894d9d3cfbc5943f8f230e23950cd4906b2fe004c72e29b209420a1e6b05a \ @@ -251,29 +231,31 @@ google-crc32c==1.5.0 \ --hash=sha256:f583edb943cf2e09c60441b910d6a20b4d9d626c75a36c8fcac01a6c96c01183 \ --hash=sha256:fd8536e902db7e365f49e7d9029283403974ccf29b13fc7028b97e2295b33556 \ --hash=sha256:fe70e325aa68fa4b5edf7d1a4b6f691eb04bbccac0ace68e34820d283b5f80d4 - # via google-resumable-media -google-resumable-media==2.4.0 \ - --hash=sha256:2aa004c16d295c8f6c33b2b4788ba59d366677c0a25ae7382436cb30f776deaa \ - --hash=sha256:8d5518502f92b9ecc84ac46779bd4f09694ecb3ba38a3e7ca737a86d15cbca1f + # via + # google-cloud-storage + # google-resumable-media +google-resumable-media==2.6.0 \ + --hash=sha256:972852f6c65f933e15a4a210c2b96930763b47197cdf4aa5f5bea435efb626e7 \ + --hash=sha256:fc03d344381970f79eebb632a3c18bb1828593a2dc5572b5f90115ef7d11e81b # via google-cloud-storage -googleapis-common-protos==1.57.0 \ - --hash=sha256:27a849d6205838fb6cc3c1c21cb9800707a661bb21c6ce7fb13e99eb1f8a0c46 \ - --hash=sha256:a9f4a1d7f6d9809657b7f1316a1aa527f6664891531bcfcc13b6696e685f443c +googleapis-common-protos==1.61.0 \ + --hash=sha256:22f1915393bb3245343f6efe87f6fe868532efc12aa26b391b15132e1279f1c0 \ + --hash=sha256:8a64866a97f6304a7179873a465d6eee97b7a24ec6cfd78e0f575e96b821240b # via google-api-core idna==3.4 \ --hash=sha256:814f528e8dead7d329833b91c5faa87d60bf71824cd12a7530b5526063d02cb4 \ --hash=sha256:90b77e79eaa3eba6de819a0c442c0b4ceefc341a7a2ab77d7562bf49f425c5c2 # via requests -importlib-metadata==5.0.0 \ - --hash=sha256:da31db32b304314d044d3c12c79bd59e307889b287ad12ff387b3500835fc2ab \ - --hash=sha256:ddb0e35065e8938f867ed4928d0ae5bf2a53b7773871bfe6bcc7e4fcdc7dea43 +importlib-metadata==6.8.0 \ + --hash=sha256:3ebb78df84a805d7698245025b975d9d67053cd94c79245ba4b3eb694abe68bb \ + --hash=sha256:dbace7892d8c0c4ac1ad096662232f831d4e64f4c4545bd53016a3e9d4654743 # via # -r requirements.in # keyring # twine -jaraco-classes==3.2.3 \ - --hash=sha256:2353de3288bc6b82120752201c6b1c1a14b058267fa424ed5ce5984e3b922158 \ - --hash=sha256:89559fa5c1d3c34eff6f631ad80bb21f378dbcbb35dd161fd2c6b93f5be2f98a +jaraco-classes==3.3.0 \ + --hash=sha256:10afa92b6743f25c0cf5f37c6bb6e18e2c5bb84a16527ccfc0040ea377e7aaeb \ + --hash=sha256:c063dd08e89217cee02c8d5e5ec560f2c8ce6cdc2fcdc2e68f7b2e5547ed3621 # via keyring jeepney==0.8.0 \ --hash=sha256:5efe48d255973902f6badc3ce55e2aa6c5c3b3bc642059ef3a91247bcfcc5806 \ @@ -285,75 +267,121 @@ jinja2==3.1.2 \ --hash=sha256:31351a702a408a9e7595a8fc6150fc3f43bb6bf7e319770cbc0db9df9437e852 \ --hash=sha256:6088930bfe239f0e6710546ab9c19c9ef35e29792895fed6e6e31a023a182a61 # via gcp-releasetool -keyring==23.11.0 \ - --hash=sha256:3dd30011d555f1345dec2c262f0153f2f0ca6bca041fb1dc4588349bb4c0ac1e \ - --hash=sha256:ad192263e2cdd5f12875dedc2da13534359a7e760e77f8d04b50968a821c2361 +keyring==24.2.0 \ + --hash=sha256:4901caaf597bfd3bbd78c9a0c7c4c29fcd8310dab2cffefe749e916b6527acd6 \ + --hash=sha256:ca0746a19ec421219f4d713f848fa297a661a8a8c1504867e55bfb5e09091509 # via # gcp-releasetool # twine -markupsafe==2.1.1 \ - --hash=sha256:0212a68688482dc52b2d45013df70d169f542b7394fc744c02a57374a4207003 \ - --hash=sha256:089cf3dbf0cd6c100f02945abeb18484bd1ee57a079aefd52cffd17fba910b88 \ - --hash=sha256:10c1bfff05d95783da83491be968e8fe789263689c02724e0c691933c52994f5 \ - --hash=sha256:33b74d289bd2f5e527beadcaa3f401e0df0a89927c1559c8566c066fa4248ab7 \ - --hash=sha256:3799351e2336dc91ea70b034983ee71cf2f9533cdff7c14c90ea126bfd95d65a \ - --hash=sha256:3ce11ee3f23f79dbd06fb3d63e2f6af7b12db1d46932fe7bd8afa259a5996603 \ - --hash=sha256:421be9fbf0ffe9ffd7a378aafebbf6f4602d564d34be190fc19a193232fd12b1 \ - --hash=sha256:43093fb83d8343aac0b1baa75516da6092f58f41200907ef92448ecab8825135 \ - --hash=sha256:46d00d6cfecdde84d40e572d63735ef81423ad31184100411e6e3388d405e247 \ - --hash=sha256:4a33dea2b688b3190ee12bd7cfa29d39c9ed176bda40bfa11099a3ce5d3a7ac6 \ - --hash=sha256:4b9fe39a2ccc108a4accc2676e77da025ce383c108593d65cc909add5c3bd601 \ - --hash=sha256:56442863ed2b06d19c37f94d999035e15ee982988920e12a5b4ba29b62ad1f77 \ - --hash=sha256:671cd1187ed5e62818414afe79ed29da836dde67166a9fac6d435873c44fdd02 \ - --hash=sha256:694deca8d702d5db21ec83983ce0bb4b26a578e71fbdbd4fdcd387daa90e4d5e \ - --hash=sha256:6a074d34ee7a5ce3effbc526b7083ec9731bb3cbf921bbe1d3005d4d2bdb3a63 \ - --hash=sha256:6d0072fea50feec76a4c418096652f2c3238eaa014b2f94aeb1d56a66b41403f \ - --hash=sha256:6fbf47b5d3728c6aea2abb0589b5d30459e369baa772e0f37a0320185e87c980 \ - --hash=sha256:7f91197cc9e48f989d12e4e6fbc46495c446636dfc81b9ccf50bb0ec74b91d4b \ - --hash=sha256:86b1f75c4e7c2ac2ccdaec2b9022845dbb81880ca318bb7a0a01fbf7813e3812 \ - --hash=sha256:8dc1c72a69aa7e082593c4a203dcf94ddb74bb5c8a731e4e1eb68d031e8498ff \ - --hash=sha256:8e3dcf21f367459434c18e71b2a9532d96547aef8a871872a5bd69a715c15f96 \ - --hash=sha256:8e576a51ad59e4bfaac456023a78f6b5e6e7651dcd383bcc3e18d06f9b55d6d1 \ - --hash=sha256:96e37a3dc86e80bf81758c152fe66dbf60ed5eca3d26305edf01892257049925 \ - --hash=sha256:97a68e6ada378df82bc9f16b800ab77cbf4b2fada0081794318520138c088e4a \ - --hash=sha256:99a2a507ed3ac881b975a2976d59f38c19386d128e7a9a18b7df6fff1fd4c1d6 \ - --hash=sha256:a49907dd8420c5685cfa064a1335b6754b74541bbb3706c259c02ed65b644b3e \ - --hash=sha256:b09bf97215625a311f669476f44b8b318b075847b49316d3e28c08e41a7a573f \ - --hash=sha256:b7bd98b796e2b6553da7225aeb61f447f80a1ca64f41d83612e6139ca5213aa4 \ - --hash=sha256:b87db4360013327109564f0e591bd2a3b318547bcef31b468a92ee504d07ae4f \ - --hash=sha256:bcb3ed405ed3222f9904899563d6fc492ff75cce56cba05e32eff40e6acbeaa3 \ - --hash=sha256:d4306c36ca495956b6d568d276ac11fdd9c30a36f1b6eb928070dc5360b22e1c \ - --hash=sha256:d5ee4f386140395a2c818d149221149c54849dfcfcb9f1debfe07a8b8bd63f9a \ - --hash=sha256:dda30ba7e87fbbb7eab1ec9f58678558fd9a6b8b853530e176eabd064da81417 \ - --hash=sha256:e04e26803c9c3851c931eac40c695602c6295b8d432cbe78609649ad9bd2da8a \ - --hash=sha256:e1c0b87e09fa55a220f058d1d49d3fb8df88fbfab58558f1198e08c1e1de842a \ - --hash=sha256:e72591e9ecd94d7feb70c1cbd7be7b3ebea3f548870aa91e2732960fa4d57a37 \ - --hash=sha256:e8c843bbcda3a2f1e3c2ab25913c80a3c5376cd00c6e8c4a86a89a28c8dc5452 \ - --hash=sha256:efc1913fd2ca4f334418481c7e595c00aad186563bbc1ec76067848c7ca0a933 \ - --hash=sha256:f121a1420d4e173a5d96e47e9a0c0dcff965afdf1626d28de1460815f7c4ee7a \ - --hash=sha256:fc7b548b17d238737688817ab67deebb30e8073c95749d55538ed473130ec0c7 +markdown-it-py==3.0.0 \ + --hash=sha256:355216845c60bd96232cd8d8c40e8f9765cc86f46880e43a8fd22dc1a1a8cab1 \ + --hash=sha256:e3f60a94fa066dc52ec76661e37c851cb232d92f9886b15cb560aaada2df8feb + # via rich +markupsafe==2.1.3 \ + --hash=sha256:05fb21170423db021895e1ea1e1f3ab3adb85d1c2333cbc2310f2a26bc77272e \ + --hash=sha256:0a4e4a1aff6c7ac4cd55792abf96c915634c2b97e3cc1c7129578aa68ebd754e \ + --hash=sha256:10bbfe99883db80bdbaff2dcf681dfc6533a614f700da1287707e8a5d78a8431 \ + --hash=sha256:134da1eca9ec0ae528110ccc9e48041e0828d79f24121a1a146161103c76e686 \ + --hash=sha256:14ff806850827afd6b07a5f32bd917fb7f45b046ba40c57abdb636674a8b559c \ + --hash=sha256:1577735524cdad32f9f694208aa75e422adba74f1baee7551620e43a3141f559 \ + --hash=sha256:1b40069d487e7edb2676d3fbdb2b0829ffa2cd63a2ec26c4938b2d34391b4ecc \ + --hash=sha256:1b8dd8c3fd14349433c79fa8abeb573a55fc0fdd769133baac1f5e07abf54aeb \ + --hash=sha256:1f67c7038d560d92149c060157d623c542173016c4babc0c1913cca0564b9939 \ + --hash=sha256:282c2cb35b5b673bbcadb33a585408104df04f14b2d9b01d4c345a3b92861c2c \ + --hash=sha256:2c1b19b3aaacc6e57b7e25710ff571c24d6c3613a45e905b1fde04d691b98ee0 \ + --hash=sha256:2ef12179d3a291be237280175b542c07a36e7f60718296278d8593d21ca937d4 \ + --hash=sha256:338ae27d6b8745585f87218a3f23f1512dbf52c26c28e322dbe54bcede54ccb9 \ + --hash=sha256:3c0fae6c3be832a0a0473ac912810b2877c8cb9d76ca48de1ed31e1c68386575 \ + --hash=sha256:3fd4abcb888d15a94f32b75d8fd18ee162ca0c064f35b11134be77050296d6ba \ + --hash=sha256:42de32b22b6b804f42c5d98be4f7e5e977ecdd9ee9b660fda1a3edf03b11792d \ + --hash=sha256:47d4f1c5f80fc62fdd7777d0d40a2e9dda0a05883ab11374334f6c4de38adffd \ + --hash=sha256:504b320cd4b7eff6f968eddf81127112db685e81f7e36e75f9f84f0df46041c3 \ + --hash=sha256:525808b8019e36eb524b8c68acdd63a37e75714eac50e988180b169d64480a00 \ + --hash=sha256:56d9f2ecac662ca1611d183feb03a3fa4406469dafe241673d521dd5ae92a155 \ + --hash=sha256:5bbe06f8eeafd38e5d0a4894ffec89378b6c6a625ff57e3028921f8ff59318ac \ + --hash=sha256:65c1a9bcdadc6c28eecee2c119465aebff8f7a584dd719facdd9e825ec61ab52 \ + --hash=sha256:68e78619a61ecf91e76aa3e6e8e33fc4894a2bebe93410754bd28fce0a8a4f9f \ + --hash=sha256:69c0f17e9f5a7afdf2cc9fb2d1ce6aabdb3bafb7f38017c0b77862bcec2bbad8 \ + --hash=sha256:6b2b56950d93e41f33b4223ead100ea0fe11f8e6ee5f641eb753ce4b77a7042b \ + --hash=sha256:715d3562f79d540f251b99ebd6d8baa547118974341db04f5ad06d5ea3eb8007 \ + --hash=sha256:787003c0ddb00500e49a10f2844fac87aa6ce977b90b0feaaf9de23c22508b24 \ + --hash=sha256:7ef3cb2ebbf91e330e3bb937efada0edd9003683db6b57bb108c4001f37a02ea \ + --hash=sha256:8023faf4e01efadfa183e863fefde0046de576c6f14659e8782065bcece22198 \ + --hash=sha256:8758846a7e80910096950b67071243da3e5a20ed2546e6392603c096778d48e0 \ + --hash=sha256:8afafd99945ead6e075b973fefa56379c5b5c53fd8937dad92c662da5d8fd5ee \ + --hash=sha256:8c41976a29d078bb235fea9b2ecd3da465df42a562910f9022f1a03107bd02be \ + --hash=sha256:8e254ae696c88d98da6555f5ace2279cf7cd5b3f52be2b5cf97feafe883b58d2 \ + --hash=sha256:8f9293864fe09b8149f0cc42ce56e3f0e54de883a9de90cd427f191c346eb2e1 \ + --hash=sha256:9402b03f1a1b4dc4c19845e5c749e3ab82d5078d16a2a4c2cd2df62d57bb0707 \ + --hash=sha256:962f82a3086483f5e5f64dbad880d31038b698494799b097bc59c2edf392fce6 \ + --hash=sha256:9aad3c1755095ce347e26488214ef77e0485a3c34a50c5a5e2471dff60b9dd9c \ + --hash=sha256:9dcdfd0eaf283af041973bff14a2e143b8bd64e069f4c383416ecd79a81aab58 \ + --hash=sha256:aa57bd9cf8ae831a362185ee444e15a93ecb2e344c8e52e4d721ea3ab6ef1823 \ + --hash=sha256:aa7bd130efab1c280bed0f45501b7c8795f9fdbeb02e965371bbef3523627779 \ + --hash=sha256:ab4a0df41e7c16a1392727727e7998a467472d0ad65f3ad5e6e765015df08636 \ + --hash=sha256:ad9e82fb8f09ade1c3e1b996a6337afac2b8b9e365f926f5a61aacc71adc5b3c \ + --hash=sha256:af598ed32d6ae86f1b747b82783958b1a4ab8f617b06fe68795c7f026abbdcad \ + --hash=sha256:b076b6226fb84157e3f7c971a47ff3a679d837cf338547532ab866c57930dbee \ + --hash=sha256:b7ff0f54cb4ff66dd38bebd335a38e2c22c41a8ee45aa608efc890ac3e3931bc \ + --hash=sha256:bfce63a9e7834b12b87c64d6b155fdd9b3b96191b6bd334bf37db7ff1fe457f2 \ + --hash=sha256:c011a4149cfbcf9f03994ec2edffcb8b1dc2d2aede7ca243746df97a5d41ce48 \ + --hash=sha256:c9c804664ebe8f83a211cace637506669e7890fec1b4195b505c214e50dd4eb7 \ + --hash=sha256:ca379055a47383d02a5400cb0d110cef0a776fc644cda797db0c5696cfd7e18e \ + --hash=sha256:cb0932dc158471523c9637e807d9bfb93e06a95cbf010f1a38b98623b929ef2b \ + --hash=sha256:cd0f502fe016460680cd20aaa5a76d241d6f35a1c3350c474bac1273803893fa \ + --hash=sha256:ceb01949af7121f9fc39f7d27f91be8546f3fb112c608bc4029aef0bab86a2a5 \ + --hash=sha256:d080e0a5eb2529460b30190fcfcc4199bd7f827663f858a226a81bc27beaa97e \ + --hash=sha256:dd15ff04ffd7e05ffcb7fe79f1b98041b8ea30ae9234aed2a9168b5797c3effb \ + --hash=sha256:df0be2b576a7abbf737b1575f048c23fb1d769f267ec4358296f31c2479db8f9 \ + --hash=sha256:e09031c87a1e51556fdcb46e5bd4f59dfb743061cf93c4d6831bf894f125eb57 \ + --hash=sha256:e4dd52d80b8c83fdce44e12478ad2e85c64ea965e75d66dbeafb0a3e77308fcc \ + --hash=sha256:f698de3fd0c4e6972b92290a45bd9b1536bffe8c6759c62471efaa8acb4c37bc \ + --hash=sha256:fec21693218efe39aa7f8599346e90c705afa52c5b31ae019b2e57e8f6542bb2 \ + --hash=sha256:ffcc3f7c66b5f5b7931a5aa68fc9cecc51e685ef90282f4a82f0f5e9b704ad11 # via jinja2 -more-itertools==9.0.0 \ - --hash=sha256:250e83d7e81d0c87ca6bd942e6aeab8cc9daa6096d12c5308f3f92fa5e5c1f41 \ - --hash=sha256:5a6257e40878ef0520b1803990e3e22303a41b5714006c32a3fd8304b26ea1ab +mdurl==0.1.2 \ + --hash=sha256:84008a41e51615a49fc9966191ff91509e3c40b939176e643fd50a5c2196b8f8 \ + --hash=sha256:bb413d29f5eea38f31dd4754dd7377d4465116fb207585f97bf925588687c1ba + # via markdown-it-py +more-itertools==10.1.0 \ + --hash=sha256:626c369fa0eb37bac0291bce8259b332fd59ac792fa5497b59837309cd5b114a \ + --hash=sha256:64e0735fcfdc6f3464ea133afe8ea4483b1c5fe3a3d69852e6503b43a0b222e6 # via jaraco-classes -nox==2022.11.21 \ - --hash=sha256:0e41a990e290e274cb205a976c4c97ee3c5234441a8132c8c3fd9ea3c22149eb \ - --hash=sha256:e21c31de0711d1274ca585a2c5fde36b1aa962005ba8e9322bf5eeed16dcd684 +nh3==0.2.14 \ + --hash=sha256:116c9515937f94f0057ef50ebcbcc10600860065953ba56f14473ff706371873 \ + --hash=sha256:18415df36db9b001f71a42a3a5395db79cf23d556996090d293764436e98e8ad \ + --hash=sha256:203cac86e313cf6486704d0ec620a992c8bc164c86d3a4fd3d761dd552d839b5 \ + --hash=sha256:2b0be5c792bd43d0abef8ca39dd8acb3c0611052ce466d0401d51ea0d9aa7525 \ + --hash=sha256:377aaf6a9e7c63962f367158d808c6a1344e2b4f83d071c43fbd631b75c4f0b2 \ + --hash=sha256:525846c56c2bcd376f5eaee76063ebf33cf1e620c1498b2a40107f60cfc6054e \ + --hash=sha256:5529a3bf99402c34056576d80ae5547123f1078da76aa99e8ed79e44fa67282d \ + --hash=sha256:7771d43222b639a4cd9e341f870cee336b9d886de1ad9bec8dddab22fe1de450 \ + --hash=sha256:88c753efbcdfc2644a5012938c6b9753f1c64a5723a67f0301ca43e7b85dcf0e \ + --hash=sha256:93a943cfd3e33bd03f77b97baa11990148687877b74193bf777956b67054dcc6 \ + --hash=sha256:9be2f68fb9a40d8440cbf34cbf40758aa7f6093160bfc7fb018cce8e424f0c3a \ + --hash=sha256:a0c509894fd4dccdff557068e5074999ae3b75f4c5a2d6fb5415e782e25679c4 \ + --hash=sha256:ac8056e937f264995a82bf0053ca898a1cb1c9efc7cd68fa07fe0060734df7e4 \ + --hash=sha256:aed56a86daa43966dd790ba86d4b810b219f75b4bb737461b6886ce2bde38fd6 \ + --hash=sha256:e8986f1dd3221d1e741fda0a12eaa4a273f1d80a35e31a1ffe579e7c621d069e \ + --hash=sha256:f99212a81c62b5f22f9e7c3e347aa00491114a5647e1f13bbebd79c3e5f08d75 + # via readme-renderer +nox==2023.4.22 \ + --hash=sha256:0b1adc619c58ab4fa57d6ab2e7823fe47a32e70202f287d78474adcc7bda1891 \ + --hash=sha256:46c0560b0dc609d7d967dc99e22cb463d3c4caf54a5fda735d6c11b5177e3a9f # via -r requirements.in -packaging==21.3 \ - --hash=sha256:dd47c42927d89ab911e606518907cc2d3a1f38bbd026385970643f9c5b8ecfeb \ - --hash=sha256:ef103e05f519cdc783ae24ea4e2e0f508a9c99b2d4969652eed6a2e1ea5bd522 +packaging==23.2 \ + --hash=sha256:048fb0e9405036518eaaf48a55953c750c11e1a1b68e0dd1a9d62ed0c092cfc5 \ + --hash=sha256:8c491190033a9af7e1d931d0b5dacc2ef47509b34dd0de67ed209b5203fc88c7 # via # gcp-releasetool # nox -pkginfo==1.8.3 \ - --hash=sha256:848865108ec99d4901b2f7e84058b6e7660aae8ae10164e015a6dcf5b242a594 \ - --hash=sha256:a84da4318dd86f870a9447a8c98340aa06216bfc6f2b7bdc4b8766984ae1867c +pkginfo==1.9.6 \ + --hash=sha256:4b7a555a6d5a22169fcc9cf7bfd78d296b0361adad412a346c1226849af5e546 \ + --hash=sha256:8fd5896e8718a4372f0ea9cc9d96f6417c9b986e23a4d116dda26b62cc29d046 # via twine -platformdirs==2.5.4 \ - --hash=sha256:1006647646d80f16130f052404c6b901e80ee4ed6bef6792e1f238a8969106f7 \ - --hash=sha256:af0276409f9a02373d540bf8480021a048711d572745aef4b7842dad245eba10 +platformdirs==3.11.0 \ + --hash=sha256:cf8ee52a3afdb965072dcc652433e0c7e3e40cf5ea1477cd4b3b1d2eb75495b3 \ + --hash=sha256:e9d171d00af68be50e9202731309c4e658fd8bc76f55c11c7dd760d023bda68e # via virtualenv protobuf==3.20.3 \ --hash=sha256:03038ac1cfbc41aa21f6afcbcd357281d7521b4157926f30ebecc8d4ea59dcb7 \ @@ -383,34 +411,30 @@ protobuf==3.20.3 \ # gcp-releasetool # google-api-core # googleapis-common-protos -pyasn1==0.4.8 \ - --hash=sha256:39c7e2ec30515947ff4e87fb6f456dfc6e84857d34be479c9d4a4ba4bf46aa5d \ - --hash=sha256:aef77c9fb94a3ac588e87841208bdec464471d9871bd5050a287cc9a475cd0ba +pyasn1==0.5.0 \ + --hash=sha256:87a2121042a1ac9358cabcaf1d07680ff97ee6404333bacca15f76aa8ad01a57 \ + --hash=sha256:97b7290ca68e62a832558ec3976f15cbf911bf5d7c7039d8b861c2a0ece69fde # via # pyasn1-modules # rsa -pyasn1-modules==0.2.8 \ - --hash=sha256:905f84c712230b2c592c19470d3ca8d552de726050d1d1716282a1f6146be65e \ - --hash=sha256:a50b808ffeb97cb3601dd25981f6b016cbb3d31fbf57a8b8a87428e6158d0c74 +pyasn1-modules==0.3.0 \ + --hash=sha256:5bd01446b736eb9d31512a30d46c1ac3395d676c6f3cafa4c03eb54b9925631c \ + --hash=sha256:d3ccd6ed470d9ffbc716be08bd90efbd44d0734bc9303818f7336070984a162d # via google-auth pycparser==2.21 \ --hash=sha256:8ee45429555515e1f6b185e78100aea234072576aa43ab53aefcae078162fca9 \ --hash=sha256:e644fdec12f7872f86c58ff790da456218b10f863970249516d60a5eaca77206 # via cffi -pygments==2.15.0 \ - --hash=sha256:77a3299119af881904cd5ecd1ac6a66214b6e9bed1f2db16993b54adede64094 \ - --hash=sha256:f7e36cffc4c517fbc252861b9a6e4644ca0e5abadf9a113c72d1358ad09b9500 +pygments==2.16.1 \ + --hash=sha256:13fc09fa63bc8d8671a6d247e1eb303c4b343eaee81d861f3404db2935653692 \ + --hash=sha256:1daff0494820c69bc8941e407aa20f577374ee88364ee10a98fdbe0aece96e29 # via # readme-renderer # rich -pyjwt==2.6.0 \ - --hash=sha256:69285c7e31fc44f68a1feb309e948e0df53259d579295e6cfe2b1792329f05fd \ - --hash=sha256:d83c3d892a77bbb74d3e1a2cfa90afaadb60945205d1095d9221f04466f64c14 +pyjwt==2.8.0 \ + --hash=sha256:57e28d156e3d5c10088e0c68abb90bfac3df82b40a71bd0daa20c65ccd5c23de \ + --hash=sha256:59127c392cc44c2da5bb3192169a91f429924e17aff6534d70fdc02ab3e04320 # via gcp-releasetool -pyparsing==3.0.9 \ - --hash=sha256:2b020ecf7d21b687f219b71ecad3631f644a47f01403fa1d1036b0c6416d70fb \ - --hash=sha256:5026bae9a10eeaefb61dab2f09052b9f4307d44aee4eda64b309723d8d206bbc - # via packaging pyperclip==1.8.2 \ --hash=sha256:105254a8b04934f0bc84e9c24eb360a591aaf6535c9def5f29d92af107a9bf57 # via gcp-releasetool @@ -418,9 +442,9 @@ python-dateutil==2.8.2 \ --hash=sha256:0123cacc1627ae19ddf3c27a5de5bd67ee4586fbdd6440d9748f8abb483d3e86 \ --hash=sha256:961d03dc3453ebbc59dbdea9e4e11c5651520a876d0f4db161e8674aae935da9 # via gcp-releasetool -readme-renderer==37.3 \ - --hash=sha256:cd653186dfc73055656f090f227f5cb22a046d7f71a841dfa305f55c9a513273 \ - --hash=sha256:f67a16caedfa71eef48a31b39708637a6f4664c4394801a7b0d6432d13907343 +readme-renderer==42.0 \ + --hash=sha256:13d039515c1f24de668e2c93f2e877b9dbe6c6c32328b90a40a49d8b2b85f36d \ + --hash=sha256:2d55489f83be4992fe4454939d1a051c33edbab778e82761d060c9fc6b308cd1 # via twine requests==2.31.0 \ --hash=sha256:58cd2187c01e70e6e26505bca751777aa9f2ee0b7f4300988b709f44e013003f \ @@ -431,17 +455,17 @@ requests==2.31.0 \ # google-cloud-storage # requests-toolbelt # twine -requests-toolbelt==0.10.1 \ - --hash=sha256:18565aa58116d9951ac39baa288d3adb5b3ff975c4f25eee78555d89e8f247f7 \ - --hash=sha256:62e09f7ff5ccbda92772a29f394a49c3ad6cb181d568b1337626b2abb628a63d +requests-toolbelt==1.0.0 \ + --hash=sha256:7681a0a3d047012b5bdc0ee37d7f8f07ebe76ab08caeccfc3921ce23c88d5bc6 \ + --hash=sha256:cccfdd665f0a24fcf4726e690f65639d272bb0637b9b92dfd91a5568ccf6bd06 # via twine rfc3986==2.0.0 \ --hash=sha256:50b1502b60e289cb37883f3dfd34532b8873c7de9f49bb546641ce9cbd256ebd \ --hash=sha256:97aacf9dbd4bfd829baad6e6309fa6573aaf1be3f6fa735c8ab05e46cecb261c # via twine -rich==12.6.0 \ - --hash=sha256:a4eb26484f2c82589bd9a17c73d32a010b1e29d89f1604cd9bf3a2097b81bb5e \ - --hash=sha256:ba3a3775974105c221d31141f2c116f4fd65c5ceb0698657a11e9f295ec93fd0 +rich==13.6.0 \ + --hash=sha256:2b38e2fe9ca72c9a00170a1a2d20c63c790d0e10ef1fe35eba76e1e7b1d7d245 \ + --hash=sha256:5c14d22737e6d5084ef4771b62d5d4363165b403455a30a1c8ca39dc7b644bef # via twine rsa==4.9 \ --hash=sha256:90260d9058e514786967344d0ef75fa8727eed8a7d2e43ce9f4bcf1b536174f7 \ @@ -455,43 +479,37 @@ six==1.16.0 \ --hash=sha256:1e61c37477a1626458e36f7b1d82aa5c9b094fa4802892072e49de9c60c4c926 \ --hash=sha256:8abb2f1d86890a2dfb989f9a77cfcfd3e47c2a354b01111771326f8aa26e0254 # via - # bleach # gcp-docuploader - # google-auth # python-dateutil -twine==4.0.1 \ - --hash=sha256:42026c18e394eac3e06693ee52010baa5313e4811d5a11050e7d48436cf41b9e \ - --hash=sha256:96b1cf12f7ae611a4a40b6ae8e9570215daff0611828f5fe1f37a16255ab24a0 +twine==4.0.2 \ + --hash=sha256:929bc3c280033347a00f847236564d1c52a3e61b1ac2516c97c48f3ceab756d8 \ + --hash=sha256:9e102ef5fdd5a20661eb88fad46338806c3bd32cf1db729603fe3697b1bc83c8 # via -r requirements.in -typing-extensions==4.4.0 \ - --hash=sha256:1511434bb92bf8dd198c12b1cc812e800d4181cfcb867674e0f8279cc93087aa \ - --hash=sha256:16fa4864408f655d35ec496218b85f79b3437c829e93320c7c9215ccfd92489e +typing-extensions==4.8.0 \ + --hash=sha256:8f92fc8806f9a6b641eaa5318da32b44d401efaac0f6678c9bc448ba3605faa0 \ + --hash=sha256:df8e4339e9cb77357558cbdbceca33c303714cf861d1eef15e1070055ae8b7ef # via -r requirements.in -urllib3==1.26.18 \ - --hash=sha256:34b97092d7e0a3a8cf7cd10e386f401b3737364026c45e622aa02903dffe0f07 \ - --hash=sha256:f8ecc1bba5667413457c529ab955bf8c67b45db799d159066261719e328580a0 +urllib3==2.0.7 \ + --hash=sha256:c97dfde1f7bd43a71c8d2a58e369e9b2bf692d1334ea9f9cae55add7d0dd0f84 \ + --hash=sha256:fdb6d215c776278489906c2f8916e6e7d4f5a9b602ccbcfdf7f016fc8da0596e # via # requests # twine -virtualenv==20.16.7 \ - --hash=sha256:8691e3ff9387f743e00f6bb20f70121f5e4f596cae754531f2b3b3a1b1ac696e \ - --hash=sha256:efd66b00386fdb7dbe4822d172303f40cd05e50e01740b19ea42425cbe653e29 +virtualenv==20.24.6 \ + --hash=sha256:02ece4f56fbf939dbbc33c0715159951d6bf14aaf5457b092e4548e1382455af \ + --hash=sha256:520d056652454c5098a00c0f073611ccbea4c79089331f60bf9d7ba247bb7381 # via nox -webencodings==0.5.1 \ - --hash=sha256:a0af1213f3c2226497a97e2b3aa01a7e4bee4f403f95be16fc9acd2947514a78 \ - --hash=sha256:b36a1c245f2d304965eb4e0a82848379241dc04b865afcc4aab16748587e1923 - # via bleach -wheel==0.38.4 \ - --hash=sha256:965f5259b566725405b05e7cf774052044b1ed30119b5d586b2703aafe8719ac \ - --hash=sha256:b60533f3f5d530e971d6737ca6d58681ee434818fab630c83a734bb10c083ce8 +wheel==0.41.3 \ + --hash=sha256:488609bc63a29322326e05560731bf7bfea8e48ad646e1f5e40d366607de0942 \ + --hash=sha256:4d4987ce51a49370ea65c0bfd2234e8ce80a12780820d9dc462597a6e60d0841 # via -r requirements.in -zipp==3.10.0 \ - --hash=sha256:4fcb6f278987a6605757302a6e40e896257570d11c51628968ccb2a47e80c6c1 \ - --hash=sha256:7a7262fd930bd3e36c50b9a64897aec3fafff3dfdeec9623ae22b40e93f99bb8 +zipp==3.17.0 \ + --hash=sha256:0e923e726174922dce09c53c59ad483ff7bbb8e572e00c7f7c46b88556409f31 \ + --hash=sha256:84e64a1c28cf7e91ed2078bb8cc8c259cb19b76942096c8d7b84947690cabaf0 # via importlib-metadata # The following packages are considered to be unsafe in a requirements file: -setuptools==65.5.1 \ - --hash=sha256:d0b9a8433464d5800cbe05094acf5c6d52a91bfac9b52bcfc4d41382be5d5d31 \ - --hash=sha256:e197a19aa8ec9722928f2206f8de752def0e4c9fc6953527360d1c36d94ddb2f +setuptools==68.2.2 \ + --hash=sha256:4ac1475276d2f1c48684874089fefcd83bd7162ddaafb81fac866ba0db282a87 \ + --hash=sha256:b454a35605876da60632df1a60f736524eb73cc47bbc9f3f1ef1b644de74fd2a # via -r requirements.in From 414b229007855d3e0765e21431ed2b8fd48c0edd Mon Sep 17 00:00:00 2001 From: Mend Renovate Date: Fri, 10 Nov 2023 02:18:22 +0100 Subject: [PATCH 1670/2016] chore(deps): update dependency pyarrow to v14 [security] (#1718) * chore(deps): update dependency pyarrow to v14 [security] * pin pyarrow to 12.0.1 for python 3.7 * pin pyarrow to 12.0.1 for python 3.7 * pin pyarrow to 12.0.1 for python 3.7 --------- Co-authored-by: Lingqing Gan --- .../google-cloud-bigquery/samples/geography/requirements.txt | 3 ++- packages/google-cloud-bigquery/samples/magics/requirements.txt | 3 ++- .../google-cloud-bigquery/samples/snippets/requirements.txt | 3 ++- 3 files changed, 6 insertions(+), 3 deletions(-) diff --git a/packages/google-cloud-bigquery/samples/geography/requirements.txt b/packages/google-cloud-bigquery/samples/geography/requirements.txt index 9bc6ee32cf13..867f44b3522c 100644 --- a/packages/google-cloud-bigquery/samples/geography/requirements.txt +++ b/packages/google-cloud-bigquery/samples/geography/requirements.txt @@ -28,7 +28,8 @@ packaging==23.1 pandas===1.3.5; python_version == '3.7' pandas==2.0.3; python_version >= '3.8' proto-plus==1.22.3 -pyarrow==12.0.1 +pyarrow==12.0.1; python_version == '3.7' +pyarrow==14.0.1; python_version >= '3.8' pyasn1==0.5.0 pyasn1-modules==0.3.0 pycparser==2.21 diff --git a/packages/google-cloud-bigquery/samples/magics/requirements.txt b/packages/google-cloud-bigquery/samples/magics/requirements.txt index c3300ae20e68..8feb236fc988 100644 --- a/packages/google-cloud-bigquery/samples/magics/requirements.txt +++ b/packages/google-cloud-bigquery/samples/magics/requirements.txt @@ -10,6 +10,7 @@ matplotlib===3.5.3; python_version == '3.7' matplotlib==3.7.2; python_version >= '3.8' pandas===1.3.5; python_version == '3.7' pandas==2.0.3; python_version >= '3.8' -pyarrow==12.0.1 +pyarrow==12.0.1; python_version == '3.7' +pyarrow==14.0.1; python_version >= '3.8' pytz==2023.3 typing-extensions==4.7.1 diff --git a/packages/google-cloud-bigquery/samples/snippets/requirements.txt b/packages/google-cloud-bigquery/samples/snippets/requirements.txt index da99249d2a4d..1eeffe32ca4f 100644 --- a/packages/google-cloud-bigquery/samples/snippets/requirements.txt +++ b/packages/google-cloud-bigquery/samples/snippets/requirements.txt @@ -11,6 +11,7 @@ matplotlib===3.5.3; python_version == '3.7' matplotlib==3.7.2; python_version >= '3.8' pandas===1.3.5; python_version == '3.7' pandas==2.0.3; python_version >= '3.8' -pyarrow==12.0.1 +pyarrow==12.0.1; python_version == '3.7' +pyarrow==14.0.1; python_version >= '3.8' pytz==2023.3 typing-extensions==4.7.1 From e21422b1e7f35d3c60cde57d5df43001eaedf2ed Mon Sep 17 00:00:00 2001 From: Chalmer Lowe Date: Thu, 16 Nov 2023 13:29:19 -0500 Subject: [PATCH 1671/2016] feat: add `job_timeout_ms` to job configuration classes (#1675) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * fix: adds new property and tests * 🦉 Updates from OwlBot post-processor See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md * updates docs to correct a sphinx failure * Updates formatting * Update tests/system/test_query.py * 🦉 Updates from OwlBot post-processor See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md * Update google/cloud/bigquery/job/base.py * updates one test and uses int_or_none * Update tests/system/test_query.py testing something. * Update tests/system/test_query.py * testing coverage feature * 🦉 Updates from OwlBot post-processor See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md * minor edits * tweaks to noxfile for testing purposes * add new test to base as experiment * adds a test, updates import statements * add another test * edit to tests * formatting fixes * update noxfile to correct debug code * removes unneeded comments. --------- Co-authored-by: Owl Bot --- .../google/cloud/bigquery/job/base.py | 32 +++++++++++++++++++ packages/google-cloud-bigquery/noxfile.py | 7 +++- .../tests/unit/job/test_base.py | 15 +++++++++ 3 files changed, 53 insertions(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/job/base.py b/packages/google-cloud-bigquery/google/cloud/bigquery/job/base.py index a6267be417a8..78df9142fe2b 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/job/base.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/job/base.py @@ -26,6 +26,7 @@ from google.cloud.bigquery import _helpers from google.cloud.bigquery.retry import DEFAULT_RETRY +from google.cloud.bigquery._helpers import _int_or_none if typing.TYPE_CHECKING: # pragma: NO COVER from google.api_core import retry as retries @@ -171,6 +172,37 @@ def __setattr__(self, name, value): ) super(_JobConfig, self).__setattr__(name, value) + @property + def job_timeout_ms(self): + """Optional parameter. Job timeout in milliseconds. If this time limit is exceeded, BigQuery might attempt to stop the job. + https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfiguration.FIELDS.job_timeout_ms + e.g. + + job_config = bigquery.QueryJobConfig( job_timeout_ms = 5000 ) + or + job_config.job_timeout_ms = 5000 + + Raises: + ValueError: If ``value`` type is invalid. + """ + + # None as this is an optional parameter. + if self._properties.get("jobTimeoutMs"): + return self._properties["jobTimeoutMs"] + return None + + @job_timeout_ms.setter + def job_timeout_ms(self, value): + try: + value = _int_or_none(value) + except ValueError as err: + raise ValueError("Pass an int for jobTimeoutMs, e.g. 5000").with_traceback( + err.__traceback__ + ) + + """ Docs indicate a string is expected by the API """ + self._properties["jobTimeoutMs"] = str(value) + @property def labels(self): """Dict[str, str]: Labels for the job. diff --git a/packages/google-cloud-bigquery/noxfile.py b/packages/google-cloud-bigquery/noxfile.py index 703e36cbb5b1..7cf5f60216b9 100644 --- a/packages/google-cloud-bigquery/noxfile.py +++ b/packages/google-cloud-bigquery/noxfile.py @@ -193,7 +193,12 @@ def system(session): session.install("-e", f".{extras}", "-c", constraints_path) # Run py.test against the system tests. - session.run("py.test", "--quiet", os.path.join("tests", "system"), *session.posargs) + session.run( + "py.test", + "--quiet", + os.path.join("tests", "system"), + *session.posargs, + ) @nox.session(python=DEFAULT_PYTHON_VERSION) diff --git a/packages/google-cloud-bigquery/tests/unit/job/test_base.py b/packages/google-cloud-bigquery/tests/unit/job/test_base.py index a662e92d4e2b..5635d0e32dc7 100644 --- a/packages/google-cloud-bigquery/tests/unit/job/test_base.py +++ b/packages/google-cloud-bigquery/tests/unit/job/test_base.py @@ -1228,3 +1228,18 @@ def test_labels_setter(self): job_config = self._make_one() job_config.labels = labels self.assertEqual(job_config._properties["labels"], labels) + + def test_job_timeout_ms_raises_valueerror(self): + # Confirm that attempting to set a non-integer values will raise an Error. + with pytest.raises(ValueError): + job_config = self._make_one() + job_config.job_timeout_ms = "WillRaiseError" + + def test_job_timeout_ms(self): + # Confirm that default status is None. + job_config = self._make_one() + assert job_config.job_timeout_ms is None + + # Confirm that integers get converted to strings. + job_config.job_timeout_ms = 5000 + assert job_config.job_timeout_ms == "5000" # int is converted to string From ac008adc7fde9c0afde0397a7873bd80a4cd9038 Mon Sep 17 00:00:00 2001 From: Kira Date: Thu, 16 Nov 2023 16:16:49 -0800 Subject: [PATCH 1672/2016] chore: standardize samples directory (#1727) * Removed all dependencies from samples/snippets thats not google-cloud-bigquery * chore: standardizing extra-dependency samples * readded original dependencies to sample/snippets requirements --- packages/google-cloud-bigquery/noxfile.py | 2 + .../samples/desktopapp/__init__.py | 13 + .../samples/desktopapp/mypy.ini | 8 + .../samples/desktopapp/noxfile.py | 293 ++++++++++++++++++ .../samples/desktopapp/noxfile_config.py | 40 +++ .../samples/desktopapp/requirements-test.txt | 3 + .../samples/desktopapp/requirements.txt | 17 + .../samples/desktopapp/user_credentials.py | 78 +++++ .../desktopapp/user_credentials_test.py | 45 +++ .../samples/notebooks/__init__.py | 13 + .../notebooks/jupyter_tutorial_test.py | 175 +++++++++++ .../samples/notebooks/mypy.ini | 8 + .../samples/notebooks/noxfile.py | 293 ++++++++++++++++++ .../samples/notebooks/noxfile_config.py | 40 +++ .../samples/notebooks/requirements-test.txt | 3 + .../samples/notebooks/requirements.txt | 17 + .../samples/snippets/requirements.txt | 2 +- 17 files changed, 1049 insertions(+), 1 deletion(-) create mode 100644 packages/google-cloud-bigquery/samples/desktopapp/__init__.py create mode 100644 packages/google-cloud-bigquery/samples/desktopapp/mypy.ini create mode 100644 packages/google-cloud-bigquery/samples/desktopapp/noxfile.py create mode 100644 packages/google-cloud-bigquery/samples/desktopapp/noxfile_config.py create mode 100644 packages/google-cloud-bigquery/samples/desktopapp/requirements-test.txt create mode 100644 packages/google-cloud-bigquery/samples/desktopapp/requirements.txt create mode 100644 packages/google-cloud-bigquery/samples/desktopapp/user_credentials.py create mode 100644 packages/google-cloud-bigquery/samples/desktopapp/user_credentials_test.py create mode 100644 packages/google-cloud-bigquery/samples/notebooks/__init__.py create mode 100644 packages/google-cloud-bigquery/samples/notebooks/jupyter_tutorial_test.py create mode 100644 packages/google-cloud-bigquery/samples/notebooks/mypy.ini create mode 100644 packages/google-cloud-bigquery/samples/notebooks/noxfile.py create mode 100644 packages/google-cloud-bigquery/samples/notebooks/noxfile_config.py create mode 100644 packages/google-cloud-bigquery/samples/notebooks/requirements-test.txt create mode 100644 packages/google-cloud-bigquery/samples/notebooks/requirements.txt diff --git a/packages/google-cloud-bigquery/noxfile.py b/packages/google-cloud-bigquery/noxfile.py index 7cf5f60216b9..9ccbdd30ceb2 100644 --- a/packages/google-cloud-bigquery/noxfile.py +++ b/packages/google-cloud-bigquery/noxfile.py @@ -263,8 +263,10 @@ def snippets(session): session.run( "py.test", "samples", + "--ignore=samples/desktopapp", "--ignore=samples/magics", "--ignore=samples/geography", + "--ignore=samples/notebooks", "--ignore=samples/snippets", *session.posargs, ) diff --git a/packages/google-cloud-bigquery/samples/desktopapp/__init__.py b/packages/google-cloud-bigquery/samples/desktopapp/__init__.py new file mode 100644 index 000000000000..4fbd93bb2ca4 --- /dev/null +++ b/packages/google-cloud-bigquery/samples/desktopapp/__init__.py @@ -0,0 +1,13 @@ +# Copyright 2021 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/packages/google-cloud-bigquery/samples/desktopapp/mypy.ini b/packages/google-cloud-bigquery/samples/desktopapp/mypy.ini new file mode 100644 index 000000000000..d27b6b599d82 --- /dev/null +++ b/packages/google-cloud-bigquery/samples/desktopapp/mypy.ini @@ -0,0 +1,8 @@ +[mypy] +; We require type annotations in all samples. +strict = True +exclude = noxfile\.py +warn_unused_configs = True + +[mypy-google.auth,google.oauth2,geojson,google_auth_oauthlib,IPython.*] +ignore_missing_imports = True diff --git a/packages/google-cloud-bigquery/samples/desktopapp/noxfile.py b/packages/google-cloud-bigquery/samples/desktopapp/noxfile.py new file mode 100644 index 000000000000..1224cbe212e4 --- /dev/null +++ b/packages/google-cloud-bigquery/samples/desktopapp/noxfile.py @@ -0,0 +1,293 @@ +# Copyright 2019 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import print_function + +import glob +import os +from pathlib import Path +import sys +from typing import Callable, Dict, Optional + +import nox + + +# WARNING - WARNING - WARNING - WARNING - WARNING +# WARNING - WARNING - WARNING - WARNING - WARNING +# DO NOT EDIT THIS FILE EVER! +# WARNING - WARNING - WARNING - WARNING - WARNING +# WARNING - WARNING - WARNING - WARNING - WARNING + +BLACK_VERSION = "black==22.3.0" +ISORT_VERSION = "isort==5.10.1" + +# Copy `noxfile_config.py` to your directory and modify it instead. + +# `TEST_CONFIG` dict is a configuration hook that allows users to +# modify the test configurations. The values here should be in sync +# with `noxfile_config.py`. Users will copy `noxfile_config.py` into +# their directory and modify it. + +TEST_CONFIG = { + # You can opt out from the test for specific Python versions. + "ignored_versions": [], + # Old samples are opted out of enforcing Python type hints + # All new samples should feature them + "enforce_type_hints": False, + # An envvar key for determining the project id to use. Change it + # to 'BUILD_SPECIFIC_GCLOUD_PROJECT' if you want to opt in using a + # build specific Cloud project. You can also use your own string + # to use your own Cloud project. + "gcloud_project_env": "GOOGLE_CLOUD_PROJECT", + # 'gcloud_project_env': 'BUILD_SPECIFIC_GCLOUD_PROJECT', + # If you need to use a specific version of pip, + # change pip_version_override to the string representation + # of the version number, for example, "20.2.4" + "pip_version_override": None, + # A dictionary you want to inject into your test. Don't put any + # secrets here. These values will override predefined values. + "envs": {}, +} + + +try: + # Ensure we can import noxfile_config in the project's directory. + sys.path.append(".") + from noxfile_config import TEST_CONFIG_OVERRIDE +except ImportError as e: + print("No user noxfile_config found: detail: {}".format(e)) + TEST_CONFIG_OVERRIDE = {} + +# Update the TEST_CONFIG with the user supplied values. +TEST_CONFIG.update(TEST_CONFIG_OVERRIDE) + + +def get_pytest_env_vars() -> Dict[str, str]: + """Returns a dict for pytest invocation.""" + ret = {} + + # Override the GCLOUD_PROJECT and the alias. + env_key = TEST_CONFIG["gcloud_project_env"] + # This should error out if not set. + ret["GOOGLE_CLOUD_PROJECT"] = os.environ[env_key] + + # Apply user supplied envs. + ret.update(TEST_CONFIG["envs"]) + return ret + + +# DO NOT EDIT - automatically generated. +# All versions used to test samples. +ALL_VERSIONS = ["3.7", "3.8", "3.9", "3.10", "3.11"] + +# Any default versions that should be ignored. +IGNORED_VERSIONS = TEST_CONFIG["ignored_versions"] + +TESTED_VERSIONS = sorted([v for v in ALL_VERSIONS if v not in IGNORED_VERSIONS]) + +INSTALL_LIBRARY_FROM_SOURCE = os.environ.get("INSTALL_LIBRARY_FROM_SOURCE", False) in ( + "True", + "true", +) + +# Error if a python version is missing +nox.options.error_on_missing_interpreters = True + +# +# Style Checks +# + + +# Linting with flake8. +# +# We ignore the following rules: +# E203: whitespace before ‘:’ +# E266: too many leading ‘#’ for block comment +# E501: line too long +# I202: Additional newline in a section of imports +# +# We also need to specify the rules which are ignored by default: +# ['E226', 'W504', 'E126', 'E123', 'W503', 'E24', 'E704', 'E121'] +FLAKE8_COMMON_ARGS = [ + "--show-source", + "--builtin=gettext", + "--max-complexity=20", + "--exclude=.nox,.cache,env,lib,generated_pb2,*_pb2.py,*_pb2_grpc.py", + "--ignore=E121,E123,E126,E203,E226,E24,E266,E501,E704,W503,W504,I202", + "--max-line-length=88", +] + + +@nox.session +def lint(session: nox.sessions.Session) -> None: + if not TEST_CONFIG["enforce_type_hints"]: + session.install("flake8") + else: + session.install("flake8", "flake8-annotations") + + args = FLAKE8_COMMON_ARGS + [ + ".", + ] + session.run("flake8", *args) + + +# +# Black +# + + +@nox.session +def blacken(session: nox.sessions.Session) -> None: + """Run black. Format code to uniform standard.""" + session.install(BLACK_VERSION) + python_files = [path for path in os.listdir(".") if path.endswith(".py")] + + session.run("black", *python_files) + + +# +# format = isort + black +# + + +@nox.session +def format(session: nox.sessions.Session) -> None: + """ + Run isort to sort imports. Then run black + to format code to uniform standard. + """ + session.install(BLACK_VERSION, ISORT_VERSION) + python_files = [path for path in os.listdir(".") if path.endswith(".py")] + + # Use the --fss option to sort imports using strict alphabetical order. + # See https://pycqa.github.io/isort/docs/configuration/options.html#force-sort-within-sections + session.run("isort", "--fss", *python_files) + session.run("black", *python_files) + + +# +# Sample Tests +# + + +PYTEST_COMMON_ARGS = ["--junitxml=sponge_log.xml"] + + +def _session_tests( + session: nox.sessions.Session, post_install: Callable = None +) -> None: + # check for presence of tests + test_list = glob.glob("**/*_test.py", recursive=True) + glob.glob( + "**/test_*.py", recursive=True + ) + test_list.extend(glob.glob("**/tests", recursive=True)) + + if len(test_list) == 0: + print("No tests found, skipping directory.") + return + + if TEST_CONFIG["pip_version_override"]: + pip_version = TEST_CONFIG["pip_version_override"] + session.install(f"pip=={pip_version}") + """Runs py.test for a particular project.""" + concurrent_args = [] + if os.path.exists("requirements.txt"): + if os.path.exists("constraints.txt"): + session.install("-r", "requirements.txt", "-c", "constraints.txt") + else: + session.install("-r", "requirements.txt") + with open("requirements.txt") as rfile: + packages = rfile.read() + + if os.path.exists("requirements-test.txt"): + if os.path.exists("constraints-test.txt"): + session.install("-r", "requirements-test.txt", "-c", "constraints-test.txt") + else: + session.install("-r", "requirements-test.txt") + with open("requirements-test.txt") as rtfile: + packages += rtfile.read() + + if INSTALL_LIBRARY_FROM_SOURCE: + session.install("-e", _get_repo_root()) + + if post_install: + post_install(session) + + if "pytest-parallel" in packages: + concurrent_args.extend(["--workers", "auto", "--tests-per-worker", "auto"]) + elif "pytest-xdist" in packages: + concurrent_args.extend(["-n", "auto"]) + + session.run( + "pytest", + *(PYTEST_COMMON_ARGS + session.posargs + concurrent_args), + # Pytest will return 5 when no tests are collected. This can happen + # on travis where slow and flaky tests are excluded. + # See http://doc.pytest.org/en/latest/_modules/_pytest/main.html + success_codes=[0, 5], + env=get_pytest_env_vars(), + ) + + +@nox.session(python=ALL_VERSIONS) +def py(session: nox.sessions.Session) -> None: + """Runs py.test for a sample using the specified version of Python.""" + if session.python in TESTED_VERSIONS: + _session_tests(session) + else: + session.skip( + "SKIPPED: {} tests are disabled for this sample.".format(session.python) + ) + + +# +# Readmegen +# + + +def _get_repo_root() -> Optional[str]: + """Returns the root folder of the project.""" + # Get root of this repository. Assume we don't have directories nested deeper than 10 items. + p = Path(os.getcwd()) + for i in range(10): + if p is None: + break + if Path(p / ".git").exists(): + return str(p) + # .git is not available in repos cloned via Cloud Build + # setup.py is always in the library's root, so use that instead + # https://github.com/googleapis/synthtool/issues/792 + if Path(p / "setup.py").exists(): + return str(p) + p = p.parent + raise Exception("Unable to detect repository root.") + + +GENERATED_READMES = sorted([x for x in Path(".").rglob("*.rst.in")]) + + +@nox.session +@nox.parametrize("path", GENERATED_READMES) +def readmegen(session: nox.sessions.Session, path: str) -> None: + """(Re-)generates the readme for a sample.""" + session.install("jinja2", "pyyaml") + dir_ = os.path.dirname(path) + + if os.path.exists(os.path.join(dir_, "requirements.txt")): + session.install("-r", os.path.join(dir_, "requirements.txt")) + + in_file = os.path.join(dir_, "README.rst.in") + session.run( + "python", _get_repo_root() + "/scripts/readme-gen/readme_gen.py", in_file + ) diff --git a/packages/google-cloud-bigquery/samples/desktopapp/noxfile_config.py b/packages/google-cloud-bigquery/samples/desktopapp/noxfile_config.py new file mode 100644 index 000000000000..315bd5be8cd9 --- /dev/null +++ b/packages/google-cloud-bigquery/samples/desktopapp/noxfile_config.py @@ -0,0 +1,40 @@ +# Copyright 2020 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Default TEST_CONFIG_OVERRIDE for python repos. + +# You can copy this file into your directory, then it will be inported from +# the noxfile.py. + +# The source of truth: +# https://github.com/GoogleCloudPlatform/python-docs-samples/blob/master/noxfile_config.py + +TEST_CONFIG_OVERRIDE = { + # You can opt out from the test for specific Python versions. + "ignored_versions": [ + "2.7", + # TODO: Enable 3.10 once there is a geopandas/fiona release. + # https://github.com/Toblerity/Fiona/issues/1043 + "3.10", + ], + # An envvar key for determining the project id to use. Change it + # to 'BUILD_SPECIFIC_GCLOUD_PROJECT' if you want to opt in using a + # build specific Cloud project. You can also use your own string + # to use your own Cloud project. + "gcloud_project_env": "GOOGLE_CLOUD_PROJECT", + # "gcloud_project_env": "BUILD_SPECIFIC_GCLOUD_PROJECT", + # A dictionary you want to inject into your test. Don't put any + # secrets here. These values will override predefined values. + "envs": {}, +} diff --git a/packages/google-cloud-bigquery/samples/desktopapp/requirements-test.txt b/packages/google-cloud-bigquery/samples/desktopapp/requirements-test.txt new file mode 100644 index 000000000000..514f09705ae8 --- /dev/null +++ b/packages/google-cloud-bigquery/samples/desktopapp/requirements-test.txt @@ -0,0 +1,3 @@ +google-cloud-testutils==1.3.3 +pytest==7.4.0 +mock==5.1.0 diff --git a/packages/google-cloud-bigquery/samples/desktopapp/requirements.txt b/packages/google-cloud-bigquery/samples/desktopapp/requirements.txt new file mode 100644 index 000000000000..1eeffe32ca4f --- /dev/null +++ b/packages/google-cloud-bigquery/samples/desktopapp/requirements.txt @@ -0,0 +1,17 @@ +db-dtypes==1.1.1 +google-cloud-bigquery==3.11.4 +google-cloud-bigquery-storage==2.22.0 +google-auth-oauthlib==1.0.0 +grpcio==1.57.0 +ipywidgets==8.1.0 +ipython===7.31.1; python_version == '3.7' +ipython===8.0.1; python_version == '3.8' +ipython==8.14.0; python_version >= '3.9' +matplotlib===3.5.3; python_version == '3.7' +matplotlib==3.7.2; python_version >= '3.8' +pandas===1.3.5; python_version == '3.7' +pandas==2.0.3; python_version >= '3.8' +pyarrow==12.0.1; python_version == '3.7' +pyarrow==14.0.1; python_version >= '3.8' +pytz==2023.3 +typing-extensions==4.7.1 diff --git a/packages/google-cloud-bigquery/samples/desktopapp/user_credentials.py b/packages/google-cloud-bigquery/samples/desktopapp/user_credentials.py new file mode 100644 index 000000000000..487a56c5ff9c --- /dev/null +++ b/packages/google-cloud-bigquery/samples/desktopapp/user_credentials.py @@ -0,0 +1,78 @@ +#!/usr/bin/env python + +# Copyright 2017 Google Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Command-line application to run a query using user credentials. + +You must supply a client secrets file, which would normally be bundled with +your application. +""" + +import argparse + + +def main(project: str) -> None: + # [START bigquery_auth_user_flow] + from google_auth_oauthlib import flow + + # A local server is used as the callback URL in the auth flow. + appflow = flow.InstalledAppFlow.from_client_secrets_file( + "client_secrets.json", scopes=["https://www.googleapis.com/auth/bigquery"] + ) + + # This launches a local server to be used as the callback URL in the desktop + # app auth flow. If you are accessing the application remotely, such as over + # SSH or a remote Jupyter notebook, this flow will not work. Use the + # `gcloud auth application-default login --no-browser` command or workload + # identity federation to get authentication tokens, instead. + # + appflow.run_local_server() + + credentials = appflow.credentials + # [END bigquery_auth_user_flow] + + # [START bigquery_auth_user_query] + from google.cloud import bigquery + + # TODO: Uncomment the line below to set the `project` variable. + # project = 'user-project-id' + # + # The `project` variable defines the project to be billed for query + # processing. The user must have the bigquery.jobs.create permission on + # this project to run a query. See: + # https://cloud.google.com/bigquery/docs/access-control#permissions + + client = bigquery.Client(project=project, credentials=credentials) + + query_string = """SELECT name, SUM(number) as total + FROM `bigquery-public-data.usa_names.usa_1910_current` + WHERE name = 'William' + GROUP BY name; + """ + query_job = client.query(query_string) + + # Print the results. + for row in query_job.result(): # Wait for the job to complete. + print("{}: {}".format(row["name"], row["total"])) + # [END bigquery_auth_user_query] + + +if __name__ == "__main__": + parser = argparse.ArgumentParser( + description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter + ) + parser.add_argument("project", help="Project to use for BigQuery billing.") + args = parser.parse_args() + main(args.project) diff --git a/packages/google-cloud-bigquery/samples/desktopapp/user_credentials_test.py b/packages/google-cloud-bigquery/samples/desktopapp/user_credentials_test.py new file mode 100644 index 000000000000..baa9e33f1357 --- /dev/null +++ b/packages/google-cloud-bigquery/samples/desktopapp/user_credentials_test.py @@ -0,0 +1,45 @@ +# Copyright 2017 Google Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +from typing import Iterator, Union + +import google.auth +import mock +import pytest + +from .user_credentials import main # type: ignore + +PROJECT = os.environ["GOOGLE_CLOUD_PROJECT"] + +MockType = Union[mock.mock.MagicMock, mock.mock.AsyncMock] + + +@pytest.fixture +def mock_flow() -> Iterator[MockType]: + flow_patch = mock.patch("google_auth_oauthlib.flow.InstalledAppFlow", autospec=True) + + with flow_patch as flow_mock: + flow_mock.from_client_secrets_file.return_value = flow_mock + flow_mock.credentials = google.auth.default()[0] + yield flow_mock + + +def test_auth_query_console( + mock_flow: MockType, capsys: pytest.CaptureFixture[str] +) -> None: + main(PROJECT) + out, _ = capsys.readouterr() + # Fun fact: William P. Wood was the 1st director of the US Secret Service. + assert "William" in out diff --git a/packages/google-cloud-bigquery/samples/notebooks/__init__.py b/packages/google-cloud-bigquery/samples/notebooks/__init__.py new file mode 100644 index 000000000000..4fbd93bb2ca4 --- /dev/null +++ b/packages/google-cloud-bigquery/samples/notebooks/__init__.py @@ -0,0 +1,13 @@ +# Copyright 2021 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/packages/google-cloud-bigquery/samples/notebooks/jupyter_tutorial_test.py b/packages/google-cloud-bigquery/samples/notebooks/jupyter_tutorial_test.py new file mode 100644 index 000000000000..9d42a4eda7b6 --- /dev/null +++ b/packages/google-cloud-bigquery/samples/notebooks/jupyter_tutorial_test.py @@ -0,0 +1,175 @@ +# Copyright 2018 Google Inc. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import typing +from typing import Iterator + +import pytest + +if typing.TYPE_CHECKING: + from IPython.terminal.interactiveshell import TerminalInteractiveShell + +IPython = pytest.importorskip("IPython") +interactiveshell = pytest.importorskip("IPython.terminal.interactiveshell") +tools = pytest.importorskip("IPython.testing.tools") +matplotlib = pytest.importorskip("matplotlib") + +# Ignore semicolon lint warning because semicolons are used in notebooks +# flake8: noqa E703 + + +@pytest.fixture(scope="session") +def ipython() -> "TerminalInteractiveShell": + config = tools.default_config() + config.TerminalInteractiveShell.simple_prompt = True + shell = interactiveshell.TerminalInteractiveShell.instance(config=config) + return shell + + +@pytest.fixture() +def ipython_interactive( + request: pytest.FixtureRequest, ipython: "TerminalInteractiveShell" +) -> Iterator["TerminalInteractiveShell"]: + """Activate IPython's builtin hooks + + for the duration of the test scope. + """ + with ipython.builtin_trap: + yield ipython + + +def _strip_region_tags(sample_text: str) -> str: + """Remove blank lines and region tags from sample text""" + magic_lines = [ + line for line in sample_text.split("\n") if len(line) > 0 and "# [" not in line + ] + return "\n".join(magic_lines) + + +def test_jupyter_tutorial(ipython: "TerminalInteractiveShell") -> None: + matplotlib.use("agg") + ip = IPython.get_ipython() + ip.extension_manager.load_extension("google.cloud.bigquery") + + sample = """ + # [START bigquery_jupyter_magic_gender_by_year] + %%bigquery + SELECT + source_year AS year, + COUNT(is_male) AS birth_count + FROM `bigquery-public-data.samples.natality` + GROUP BY year + ORDER BY year DESC + LIMIT 15 + # [END bigquery_jupyter_magic_gender_by_year] + """ + result = ip.run_cell(_strip_region_tags(sample)) + result.raise_error() # Throws an exception if the cell failed. + + sample = """ + # [START bigquery_jupyter_magic_gender_by_year_var] + %%bigquery total_births + SELECT + source_year AS year, + COUNT(is_male) AS birth_count + FROM `bigquery-public-data.samples.natality` + GROUP BY year + ORDER BY year DESC + LIMIT 15 + # [END bigquery_jupyter_magic_gender_by_year_var] + """ + result = ip.run_cell(_strip_region_tags(sample)) + result.raise_error() # Throws an exception if the cell failed. + + assert "total_births" in ip.user_ns # verify that variable exists + total_births = ip.user_ns["total_births"] + # [START bigquery_jupyter_plot_births_by_year] + total_births.plot(kind="bar", x="year", y="birth_count") + # [END bigquery_jupyter_plot_births_by_year] + + sample = """ + # [START bigquery_jupyter_magic_gender_by_weekday] + %%bigquery births_by_weekday + SELECT + wday, + SUM(CASE WHEN is_male THEN 1 ELSE 0 END) AS male_births, + SUM(CASE WHEN is_male THEN 0 ELSE 1 END) AS female_births + FROM `bigquery-public-data.samples.natality` + WHERE wday IS NOT NULL + GROUP BY wday + ORDER BY wday ASC + # [END bigquery_jupyter_magic_gender_by_weekday] + """ + result = ip.run_cell(_strip_region_tags(sample)) + result.raise_error() # Throws an exception if the cell failed. + + assert "births_by_weekday" in ip.user_ns # verify that variable exists + births_by_weekday = ip.user_ns["births_by_weekday"] + # [START bigquery_jupyter_plot_births_by_weekday] + births_by_weekday.plot(x="wday") + # [END bigquery_jupyter_plot_births_by_weekday] + + # [START bigquery_jupyter_import_and_client] + from google.cloud import bigquery + + client = bigquery.Client() + # [END bigquery_jupyter_import_and_client] + + # [START bigquery_jupyter_query_plurality_by_year] + sql = """ + SELECT + plurality, + COUNT(1) AS count, + year + FROM + `bigquery-public-data.samples.natality` + WHERE + NOT IS_NAN(plurality) AND plurality > 1 + GROUP BY + plurality, year + ORDER BY + count DESC + """ + df = client.query(sql).to_dataframe() + df.head() + # [END bigquery_jupyter_query_plurality_by_year] + + # [START bigquery_jupyter_plot_plurality_by_year] + pivot_table = df.pivot(index="year", columns="plurality", values="count") + pivot_table.plot(kind="bar", stacked=True, figsize=(15, 7)) + # [END bigquery_jupyter_plot_plurality_by_year] + + # [START bigquery_jupyter_query_births_by_gestation] + sql = """ + SELECT + gestation_weeks, + COUNT(1) AS count + FROM + `bigquery-public-data.samples.natality` + WHERE + NOT IS_NAN(gestation_weeks) AND gestation_weeks <> 99 + GROUP BY + gestation_weeks + ORDER BY + gestation_weeks + """ + df = client.query(sql).to_dataframe() + # [END bigquery_jupyter_query_births_by_gestation] + + # [START bigquery_jupyter_plot_births_by_gestation] + ax = df.plot(kind="bar", x="gestation_weeks", y="count", figsize=(15, 7)) + ax.set_title("Count of Births by Gestation Weeks") + ax.set_xlabel("Gestation Weeks") + ax.set_ylabel("Count") + # [END bigquery_jupyter_plot_births_by_gestation] diff --git a/packages/google-cloud-bigquery/samples/notebooks/mypy.ini b/packages/google-cloud-bigquery/samples/notebooks/mypy.ini new file mode 100644 index 000000000000..dea60237bd35 --- /dev/null +++ b/packages/google-cloud-bigquery/samples/notebooks/mypy.ini @@ -0,0 +1,8 @@ +[mypy] +; We require type annotations in all samples. +strict = True +exclude = noxfile\.py +warn_unused_configs = True + +[mypy-IPython.*,nox,noxfile_config,pandas] +ignore_missing_imports = True \ No newline at end of file diff --git a/packages/google-cloud-bigquery/samples/notebooks/noxfile.py b/packages/google-cloud-bigquery/samples/notebooks/noxfile.py new file mode 100644 index 000000000000..1224cbe212e4 --- /dev/null +++ b/packages/google-cloud-bigquery/samples/notebooks/noxfile.py @@ -0,0 +1,293 @@ +# Copyright 2019 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import print_function + +import glob +import os +from pathlib import Path +import sys +from typing import Callable, Dict, Optional + +import nox + + +# WARNING - WARNING - WARNING - WARNING - WARNING +# WARNING - WARNING - WARNING - WARNING - WARNING +# DO NOT EDIT THIS FILE EVER! +# WARNING - WARNING - WARNING - WARNING - WARNING +# WARNING - WARNING - WARNING - WARNING - WARNING + +BLACK_VERSION = "black==22.3.0" +ISORT_VERSION = "isort==5.10.1" + +# Copy `noxfile_config.py` to your directory and modify it instead. + +# `TEST_CONFIG` dict is a configuration hook that allows users to +# modify the test configurations. The values here should be in sync +# with `noxfile_config.py`. Users will copy `noxfile_config.py` into +# their directory and modify it. + +TEST_CONFIG = { + # You can opt out from the test for specific Python versions. + "ignored_versions": [], + # Old samples are opted out of enforcing Python type hints + # All new samples should feature them + "enforce_type_hints": False, + # An envvar key for determining the project id to use. Change it + # to 'BUILD_SPECIFIC_GCLOUD_PROJECT' if you want to opt in using a + # build specific Cloud project. You can also use your own string + # to use your own Cloud project. + "gcloud_project_env": "GOOGLE_CLOUD_PROJECT", + # 'gcloud_project_env': 'BUILD_SPECIFIC_GCLOUD_PROJECT', + # If you need to use a specific version of pip, + # change pip_version_override to the string representation + # of the version number, for example, "20.2.4" + "pip_version_override": None, + # A dictionary you want to inject into your test. Don't put any + # secrets here. These values will override predefined values. + "envs": {}, +} + + +try: + # Ensure we can import noxfile_config in the project's directory. + sys.path.append(".") + from noxfile_config import TEST_CONFIG_OVERRIDE +except ImportError as e: + print("No user noxfile_config found: detail: {}".format(e)) + TEST_CONFIG_OVERRIDE = {} + +# Update the TEST_CONFIG with the user supplied values. +TEST_CONFIG.update(TEST_CONFIG_OVERRIDE) + + +def get_pytest_env_vars() -> Dict[str, str]: + """Returns a dict for pytest invocation.""" + ret = {} + + # Override the GCLOUD_PROJECT and the alias. + env_key = TEST_CONFIG["gcloud_project_env"] + # This should error out if not set. + ret["GOOGLE_CLOUD_PROJECT"] = os.environ[env_key] + + # Apply user supplied envs. + ret.update(TEST_CONFIG["envs"]) + return ret + + +# DO NOT EDIT - automatically generated. +# All versions used to test samples. +ALL_VERSIONS = ["3.7", "3.8", "3.9", "3.10", "3.11"] + +# Any default versions that should be ignored. +IGNORED_VERSIONS = TEST_CONFIG["ignored_versions"] + +TESTED_VERSIONS = sorted([v for v in ALL_VERSIONS if v not in IGNORED_VERSIONS]) + +INSTALL_LIBRARY_FROM_SOURCE = os.environ.get("INSTALL_LIBRARY_FROM_SOURCE", False) in ( + "True", + "true", +) + +# Error if a python version is missing +nox.options.error_on_missing_interpreters = True + +# +# Style Checks +# + + +# Linting with flake8. +# +# We ignore the following rules: +# E203: whitespace before ‘:’ +# E266: too many leading ‘#’ for block comment +# E501: line too long +# I202: Additional newline in a section of imports +# +# We also need to specify the rules which are ignored by default: +# ['E226', 'W504', 'E126', 'E123', 'W503', 'E24', 'E704', 'E121'] +FLAKE8_COMMON_ARGS = [ + "--show-source", + "--builtin=gettext", + "--max-complexity=20", + "--exclude=.nox,.cache,env,lib,generated_pb2,*_pb2.py,*_pb2_grpc.py", + "--ignore=E121,E123,E126,E203,E226,E24,E266,E501,E704,W503,W504,I202", + "--max-line-length=88", +] + + +@nox.session +def lint(session: nox.sessions.Session) -> None: + if not TEST_CONFIG["enforce_type_hints"]: + session.install("flake8") + else: + session.install("flake8", "flake8-annotations") + + args = FLAKE8_COMMON_ARGS + [ + ".", + ] + session.run("flake8", *args) + + +# +# Black +# + + +@nox.session +def blacken(session: nox.sessions.Session) -> None: + """Run black. Format code to uniform standard.""" + session.install(BLACK_VERSION) + python_files = [path for path in os.listdir(".") if path.endswith(".py")] + + session.run("black", *python_files) + + +# +# format = isort + black +# + + +@nox.session +def format(session: nox.sessions.Session) -> None: + """ + Run isort to sort imports. Then run black + to format code to uniform standard. + """ + session.install(BLACK_VERSION, ISORT_VERSION) + python_files = [path for path in os.listdir(".") if path.endswith(".py")] + + # Use the --fss option to sort imports using strict alphabetical order. + # See https://pycqa.github.io/isort/docs/configuration/options.html#force-sort-within-sections + session.run("isort", "--fss", *python_files) + session.run("black", *python_files) + + +# +# Sample Tests +# + + +PYTEST_COMMON_ARGS = ["--junitxml=sponge_log.xml"] + + +def _session_tests( + session: nox.sessions.Session, post_install: Callable = None +) -> None: + # check for presence of tests + test_list = glob.glob("**/*_test.py", recursive=True) + glob.glob( + "**/test_*.py", recursive=True + ) + test_list.extend(glob.glob("**/tests", recursive=True)) + + if len(test_list) == 0: + print("No tests found, skipping directory.") + return + + if TEST_CONFIG["pip_version_override"]: + pip_version = TEST_CONFIG["pip_version_override"] + session.install(f"pip=={pip_version}") + """Runs py.test for a particular project.""" + concurrent_args = [] + if os.path.exists("requirements.txt"): + if os.path.exists("constraints.txt"): + session.install("-r", "requirements.txt", "-c", "constraints.txt") + else: + session.install("-r", "requirements.txt") + with open("requirements.txt") as rfile: + packages = rfile.read() + + if os.path.exists("requirements-test.txt"): + if os.path.exists("constraints-test.txt"): + session.install("-r", "requirements-test.txt", "-c", "constraints-test.txt") + else: + session.install("-r", "requirements-test.txt") + with open("requirements-test.txt") as rtfile: + packages += rtfile.read() + + if INSTALL_LIBRARY_FROM_SOURCE: + session.install("-e", _get_repo_root()) + + if post_install: + post_install(session) + + if "pytest-parallel" in packages: + concurrent_args.extend(["--workers", "auto", "--tests-per-worker", "auto"]) + elif "pytest-xdist" in packages: + concurrent_args.extend(["-n", "auto"]) + + session.run( + "pytest", + *(PYTEST_COMMON_ARGS + session.posargs + concurrent_args), + # Pytest will return 5 when no tests are collected. This can happen + # on travis where slow and flaky tests are excluded. + # See http://doc.pytest.org/en/latest/_modules/_pytest/main.html + success_codes=[0, 5], + env=get_pytest_env_vars(), + ) + + +@nox.session(python=ALL_VERSIONS) +def py(session: nox.sessions.Session) -> None: + """Runs py.test for a sample using the specified version of Python.""" + if session.python in TESTED_VERSIONS: + _session_tests(session) + else: + session.skip( + "SKIPPED: {} tests are disabled for this sample.".format(session.python) + ) + + +# +# Readmegen +# + + +def _get_repo_root() -> Optional[str]: + """Returns the root folder of the project.""" + # Get root of this repository. Assume we don't have directories nested deeper than 10 items. + p = Path(os.getcwd()) + for i in range(10): + if p is None: + break + if Path(p / ".git").exists(): + return str(p) + # .git is not available in repos cloned via Cloud Build + # setup.py is always in the library's root, so use that instead + # https://github.com/googleapis/synthtool/issues/792 + if Path(p / "setup.py").exists(): + return str(p) + p = p.parent + raise Exception("Unable to detect repository root.") + + +GENERATED_READMES = sorted([x for x in Path(".").rglob("*.rst.in")]) + + +@nox.session +@nox.parametrize("path", GENERATED_READMES) +def readmegen(session: nox.sessions.Session, path: str) -> None: + """(Re-)generates the readme for a sample.""" + session.install("jinja2", "pyyaml") + dir_ = os.path.dirname(path) + + if os.path.exists(os.path.join(dir_, "requirements.txt")): + session.install("-r", os.path.join(dir_, "requirements.txt")) + + in_file = os.path.join(dir_, "README.rst.in") + session.run( + "python", _get_repo_root() + "/scripts/readme-gen/readme_gen.py", in_file + ) diff --git a/packages/google-cloud-bigquery/samples/notebooks/noxfile_config.py b/packages/google-cloud-bigquery/samples/notebooks/noxfile_config.py new file mode 100644 index 000000000000..315bd5be8cd9 --- /dev/null +++ b/packages/google-cloud-bigquery/samples/notebooks/noxfile_config.py @@ -0,0 +1,40 @@ +# Copyright 2020 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Default TEST_CONFIG_OVERRIDE for python repos. + +# You can copy this file into your directory, then it will be inported from +# the noxfile.py. + +# The source of truth: +# https://github.com/GoogleCloudPlatform/python-docs-samples/blob/master/noxfile_config.py + +TEST_CONFIG_OVERRIDE = { + # You can opt out from the test for specific Python versions. + "ignored_versions": [ + "2.7", + # TODO: Enable 3.10 once there is a geopandas/fiona release. + # https://github.com/Toblerity/Fiona/issues/1043 + "3.10", + ], + # An envvar key for determining the project id to use. Change it + # to 'BUILD_SPECIFIC_GCLOUD_PROJECT' if you want to opt in using a + # build specific Cloud project. You can also use your own string + # to use your own Cloud project. + "gcloud_project_env": "GOOGLE_CLOUD_PROJECT", + # "gcloud_project_env": "BUILD_SPECIFIC_GCLOUD_PROJECT", + # A dictionary you want to inject into your test. Don't put any + # secrets here. These values will override predefined values. + "envs": {}, +} diff --git a/packages/google-cloud-bigquery/samples/notebooks/requirements-test.txt b/packages/google-cloud-bigquery/samples/notebooks/requirements-test.txt new file mode 100644 index 000000000000..514f09705ae8 --- /dev/null +++ b/packages/google-cloud-bigquery/samples/notebooks/requirements-test.txt @@ -0,0 +1,3 @@ +google-cloud-testutils==1.3.3 +pytest==7.4.0 +mock==5.1.0 diff --git a/packages/google-cloud-bigquery/samples/notebooks/requirements.txt b/packages/google-cloud-bigquery/samples/notebooks/requirements.txt new file mode 100644 index 000000000000..1eeffe32ca4f --- /dev/null +++ b/packages/google-cloud-bigquery/samples/notebooks/requirements.txt @@ -0,0 +1,17 @@ +db-dtypes==1.1.1 +google-cloud-bigquery==3.11.4 +google-cloud-bigquery-storage==2.22.0 +google-auth-oauthlib==1.0.0 +grpcio==1.57.0 +ipywidgets==8.1.0 +ipython===7.31.1; python_version == '3.7' +ipython===8.0.1; python_version == '3.8' +ipython==8.14.0; python_version >= '3.9' +matplotlib===3.5.3; python_version == '3.7' +matplotlib==3.7.2; python_version >= '3.8' +pandas===1.3.5; python_version == '3.7' +pandas==2.0.3; python_version >= '3.8' +pyarrow==12.0.1; python_version == '3.7' +pyarrow==14.0.1; python_version >= '3.8' +pytz==2023.3 +typing-extensions==4.7.1 diff --git a/packages/google-cloud-bigquery/samples/snippets/requirements.txt b/packages/google-cloud-bigquery/samples/snippets/requirements.txt index 1eeffe32ca4f..72a77ad113e8 100644 --- a/packages/google-cloud-bigquery/samples/snippets/requirements.txt +++ b/packages/google-cloud-bigquery/samples/snippets/requirements.txt @@ -14,4 +14,4 @@ pandas==2.0.3; python_version >= '3.8' pyarrow==12.0.1; python_version == '3.7' pyarrow==14.0.1; python_version >= '3.8' pytz==2023.3 -typing-extensions==4.7.1 +typing-extensions==4.7.1 \ No newline at end of file From 876b21cbc2b757dadea9262d0a2d4b233c29245e Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Fri, 17 Nov 2023 18:43:59 -0600 Subject: [PATCH 1673/2016] feat: add `job_id`, `location`, `project`, and `query_id` properties on `RowIterator` (#1733) * feat: add `job_id`, `location`, `project`, and `query_id` properties on `RowIterator` These can be used to recover the original job metadata when `RowIterator` is the result of a `QueryJob`. * rename bqstorage_project to billing project * Update google/cloud/bigquery/table.py Co-authored-by: Lingqing Gan --------- Co-authored-by: Lingqing Gan --- .../google/cloud/bigquery/client.py | 10 ++++ .../google/cloud/bigquery/job/query.py | 24 ++++++++- .../google/cloud/bigquery/query.py | 8 +++ .../google/cloud/bigquery/table.py | 49 +++++++++++++++++-- .../tests/unit/job/test_query.py | 18 ++++++- .../tests/unit/job/test_query_pandas.py | 12 +++-- .../tests/unit/test_client.py | 11 ++++- .../tests/unit/test_query.py | 10 ++++ .../tests/unit/test_table.py | 32 ++++++++++++ 9 files changed, 163 insertions(+), 11 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py index d4a759ba4805..4e72ac92260e 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py @@ -3843,6 +3843,8 @@ def list_rows( # tables can be fetched without a column filter. selected_fields=selected_fields, total_rows=getattr(table, "num_rows", None), + project=table.project, + location=table.location, ) return row_iterator @@ -3859,6 +3861,7 @@ def _list_rows_from_query_results( page_size: Optional[int] = None, retry: retries.Retry = DEFAULT_RETRY, timeout: TimeoutType = DEFAULT_TIMEOUT, + query_id: Optional[str] = None, ) -> RowIterator: """List the rows of a completed query. See @@ -3898,6 +3901,9 @@ def _list_rows_from_query_results( would otherwise be a successful response. If multiple requests are made under the hood, ``timeout`` applies to each individual request. + query_id (Optional[str]): + [Preview] ID of a completed query. This ID is auto-generated + and not guaranteed to be populated. Returns: google.cloud.bigquery.table.RowIterator: Iterator of row data @@ -3928,6 +3934,10 @@ def _list_rows_from_query_results( table=destination, extra_params=params, total_rows=total_rows, + project=project, + location=location, + job_id=job_id, + query_id=query_id, ) return row_iterator diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/job/query.py b/packages/google-cloud-bigquery/google/cloud/bigquery/job/query.py index 57186acbcdfb..a48a15f85168 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/job/query.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/job/query.py @@ -930,6 +930,15 @@ def query(self): self._properties, ["configuration", "query", "query"] ) + @property + def query_id(self) -> Optional[str]: + """[Preview] ID of a completed query. + + This ID is auto-generated and not guaranteed to be populated. + """ + query_results = self._query_results + return query_results.query_id if query_results is not None else None + @property def query_parameters(self): """See @@ -1525,7 +1534,12 @@ def result( # type: ignore # (complaints about the overloaded signature) provided and the job is not retryable. """ if self.dry_run: - return _EmptyRowIterator() + return _EmptyRowIterator( + project=self.project, + location=self.location, + # Intentionally omit job_id and query_id since this doesn't + # actually correspond to a finished query job. + ) try: retry_do_query = getattr(self, "_retry_do_query", None) if retry_do_query is not None: @@ -1594,7 +1608,12 @@ def do_get_result(): # indicate success and avoid calling tabledata.list on a table which # can't be read (such as a view table). if self._query_results.total_rows is None: - return _EmptyRowIterator() + return _EmptyRowIterator( + location=self.location, + project=self.project, + job_id=self.job_id, + query_id=self.query_id, + ) rows = self._client._list_rows_from_query_results( self.job_id, @@ -1608,6 +1627,7 @@ def do_get_result(): start_index=start_index, retry=retry, timeout=timeout, + query_id=self.query_id, ) rows._preserve_order = _contains_order_by(self.query) return rows diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/query.py b/packages/google-cloud-bigquery/google/cloud/bigquery/query.py index 944ad884e345..ccc8840bed8c 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/query.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/query.py @@ -911,6 +911,14 @@ def job_id(self): """ return self._properties.get("jobReference", {}).get("jobId") + @property + def query_id(self) -> Optional[str]: + """[Preview] ID of a completed query. + + This ID is auto-generated and not guaranteed to be populated. + """ + return self._properties.get("queryId") + @property def page_token(self): """Token for fetching next bach of results. diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py index dcba10428f29..168448c9954a 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py @@ -1558,6 +1558,10 @@ def __init__( selected_fields=None, total_rows=None, first_page_response=None, + location: Optional[str] = None, + job_id: Optional[str] = None, + query_id: Optional[str] = None, + project: Optional[str] = None, ): super(RowIterator, self).__init__( client, @@ -1575,12 +1579,51 @@ def __init__( self._field_to_index = _helpers._field_to_index_mapping(schema) self._page_size = page_size self._preserve_order = False - self._project = client.project if client is not None else None self._schema = schema self._selected_fields = selected_fields self._table = table self._total_rows = total_rows self._first_page_response = first_page_response + self._location = location + self._job_id = job_id + self._query_id = query_id + self._project = project + + @property + def _billing_project(self) -> Optional[str]: + """GCP Project ID where BQ API will bill to (if applicable).""" + client = self.client + return client.project if client is not None else None + + @property + def job_id(self) -> Optional[str]: + """ID of the query job (if applicable). + + To get the job metadata, call + ``job = client.get_job(rows.job_id, location=rows.location)``. + """ + return self._job_id + + @property + def location(self) -> Optional[str]: + """Location where the query executed (if applicable). + + See: https://cloud.google.com/bigquery/docs/locations + """ + return self._location + + @property + def project(self) -> Optional[str]: + """GCP Project ID where these rows are read from.""" + return self._project + + @property + def query_id(self) -> Optional[str]: + """[Preview] ID of a completed query. + + This ID is auto-generated and not guaranteed to be populated. + """ + return self._query_id def _is_completely_cached(self): """Check if all results are completely cached. @@ -1723,7 +1766,7 @@ def to_arrow_iterable( bqstorage_download = functools.partial( _pandas_helpers.download_arrow_bqstorage, - self._project, + self._billing_project, self._table, bqstorage_client, preserve_order=self._preserve_order, @@ -1903,7 +1946,7 @@ def to_dataframe_iterable( column_names = [field.name for field in self._schema] bqstorage_download = functools.partial( _pandas_helpers.download_dataframe_bqstorage, - self._project, + self._billing_project, self._table, bqstorage_client, column_names, diff --git a/packages/google-cloud-bigquery/tests/unit/job/test_query.py b/packages/google-cloud-bigquery/tests/unit/job/test_query.py index 26f1f2a738a0..39275063adbf 100644 --- a/packages/google-cloud-bigquery/tests/unit/job/test_query.py +++ b/packages/google-cloud-bigquery/tests/unit/job/test_query.py @@ -952,6 +952,7 @@ def test_result(self): }, "schema": {"fields": [{"name": "col1", "type": "STRING"}]}, "totalRows": "2", + "queryId": "abc-def", } job_resource = self._make_resource(started=True, location="EU") job_resource_done = self._make_resource(started=True, ended=True, location="EU") @@ -980,6 +981,10 @@ def test_result(self): rows = list(result) self.assertEqual(len(rows), 1) self.assertEqual(rows[0].col1, "abc") + self.assertEqual(result.job_id, self.JOB_ID) + self.assertEqual(result.location, "EU") + self.assertEqual(result.project, self.PROJECT) + self.assertEqual(result.query_id, "abc-def") # Test that the total_rows property has changed during iteration, based # on the response from tabledata.list. self.assertEqual(result.total_rows, 1) @@ -1023,6 +1028,12 @@ def test_result_dry_run(self): calls = conn.api_request.mock_calls self.assertIsInstance(result, _EmptyRowIterator) self.assertEqual(calls, []) + self.assertEqual(result.location, "EU") + self.assertEqual(result.project, self.PROJECT) + # Intentionally omit job_id and query_id since this doesn't + # actually correspond to a finished query job. + self.assertIsNone(result.job_id) + self.assertIsNone(result.query_id) def test_result_with_done_job_calls_get_query_results(self): query_resource_done = { @@ -1180,16 +1191,21 @@ def test_result_w_empty_schema(self): "jobComplete": True, "jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID}, "schema": {"fields": []}, + "queryId": "xyz-abc", } connection = make_connection(query_resource, query_resource) client = _make_client(self.PROJECT, connection=connection) - resource = self._make_resource(ended=True) + resource = self._make_resource(ended=True, location="asia-northeast1") job = self._get_target_class().from_api_repr(resource, client) result = job.result() self.assertIsInstance(result, _EmptyRowIterator) self.assertEqual(list(result), []) + self.assertEqual(result.project, self.PROJECT) + self.assertEqual(result.job_id, self.JOB_ID) + self.assertEqual(result.location, "asia-northeast1") + self.assertEqual(result.query_id, "xyz-abc") def test_result_invokes_begins(self): begun_resource = self._make_resource() diff --git a/packages/google-cloud-bigquery/tests/unit/job/test_query_pandas.py b/packages/google-cloud-bigquery/tests/unit/job/test_query_pandas.py index f4c7eb06e862..0accae0a2028 100644 --- a/packages/google-cloud-bigquery/tests/unit/job/test_query_pandas.py +++ b/packages/google-cloud-bigquery/tests/unit/job/test_query_pandas.py @@ -560,7 +560,7 @@ def test_to_dataframe_bqstorage(table_read_options_kwarg): [name_array, age_array], schema=arrow_schema ) connection = make_connection(query_resource) - client = _make_client(connection=connection) + client = _make_client(connection=connection, project="bqstorage-billing-project") job = target_class.from_api_repr(resource, client) session = bigquery_storage.types.ReadSession() session.arrow_schema.serialized_schema = arrow_schema.serialize().to_pybytes() @@ -597,7 +597,9 @@ def test_to_dataframe_bqstorage(table_read_options_kwarg): **table_read_options_kwarg, ) bqstorage_client.create_read_session.assert_called_once_with( - parent=f"projects/{client.project}", + # The billing project can differ from the data project. Make sure we + # are charging to the billing project, not the data project. + parent="projects/bqstorage-billing-project", read_session=expected_session, max_stream_count=0, # Use default number of streams for best performance. ) @@ -618,7 +620,7 @@ def test_to_dataframe_bqstorage_no_pyarrow_compression(): "schema": {"fields": [{"name": "name", "type": "STRING", "mode": "NULLABLE"}]}, } connection = make_connection(query_resource) - client = _make_client(connection=connection) + client = _make_client(connection=connection, project="bqstorage-billing-project") job = target_class.from_api_repr(resource, client) bqstorage_client = mock.create_autospec(bigquery_storage.BigQueryReadClient) session = bigquery_storage.types.ReadSession() @@ -646,7 +648,9 @@ def test_to_dataframe_bqstorage_no_pyarrow_compression(): data_format=bigquery_storage.DataFormat.ARROW, ) bqstorage_client.create_read_session.assert_called_once_with( - parent=f"projects/{client.project}", + # The billing project can differ from the data project. Make sure we + # are charging to the billing project, not the data project. + parent="projects/bqstorage-billing-project", read_session=expected_session, max_stream_count=0, ) diff --git a/packages/google-cloud-bigquery/tests/unit/test_client.py b/packages/google-cloud-bigquery/tests/unit/test_client.py index d470bd9fd033..af61ceb42a94 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_client.py +++ b/packages/google-cloud-bigquery/tests/unit/test_client.py @@ -6401,11 +6401,16 @@ def test_list_rows(self): age = SchemaField("age", "INTEGER", mode="NULLABLE") joined = SchemaField("joined", "TIMESTAMP", mode="NULLABLE") table = Table(self.TABLE_REF, schema=[full_name, age, joined]) + table._properties["location"] = "us-central1" table._properties["numRows"] = 7 iterator = client.list_rows(table, timeout=7.5) - # Check that initial total_rows is populated from the table. + # Check that initial RowIterator is populated from the table metadata. + self.assertIsNone(iterator.job_id) + self.assertEqual(iterator.location, "us-central1") + self.assertEqual(iterator.project, table.project) + self.assertIsNone(iterator.query_id) self.assertEqual(iterator.total_rows, 7) page = next(iterator.pages) rows = list(page) @@ -6521,6 +6526,10 @@ def test_list_rows_empty_table(self): selected_fields=[], ) + self.assertIsNone(rows.job_id) + self.assertIsNone(rows.location) + self.assertEqual(rows.project, self.TABLE_REF.project) + self.assertIsNone(rows.query_id) # When a table reference / string and selected_fields is provided, # total_rows can't be populated until iteration starts. self.assertIsNone(rows.total_rows) diff --git a/packages/google-cloud-bigquery/tests/unit/test_query.py b/packages/google-cloud-bigquery/tests/unit/test_query.py index 4b687152ff34..aae4890b3e82 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_query.py +++ b/packages/google-cloud-bigquery/tests/unit/test_query.py @@ -1386,6 +1386,16 @@ def test_page_token_present(self): query = self._make_one(resource) self.assertEqual(query.page_token, "TOKEN") + def test_query_id_missing(self): + query = self._make_one(self._make_resource()) + self.assertIsNone(query.query_id) + + def test_query_id_present(self): + resource = self._make_resource() + resource["queryId"] = "test-query-id" + query = self._make_one(resource) + self.assertEqual(query.query_id, "test-query-id") + def test_total_rows_present_integer(self): resource = self._make_resource() resource["totalRows"] = 42 diff --git a/packages/google-cloud-bigquery/tests/unit/test_table.py b/packages/google-cloud-bigquery/tests/unit/test_table.py index fa2f30cea539..d9f259e7232d 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_table.py +++ b/packages/google-cloud-bigquery/tests/unit/test_table.py @@ -2113,6 +2113,38 @@ def test_constructor_with_dict_schema(self): ] self.assertEqual(iterator.schema, expected_schema) + def test_job_id_missing(self): + rows = self._make_one() + self.assertIsNone(rows.job_id) + + def test_job_id_present(self): + rows = self._make_one(job_id="abc-123") + self.assertEqual(rows.job_id, "abc-123") + + def test_location_missing(self): + rows = self._make_one() + self.assertIsNone(rows.location) + + def test_location_present(self): + rows = self._make_one(location="asia-northeast1") + self.assertEqual(rows.location, "asia-northeast1") + + def test_project_missing(self): + rows = self._make_one() + self.assertIsNone(rows.project) + + def test_project_present(self): + rows = self._make_one(project="test-project") + self.assertEqual(rows.project, "test-project") + + def test_query_id_missing(self): + rows = self._make_one() + self.assertIsNone(rows.query_id) + + def test_query_id_present(self): + rows = self._make_one(query_id="xyz-987") + self.assertEqual(rows.query_id, "xyz-987") + def test_iterate(self): from google.cloud.bigquery.schema import SchemaField From 4c085d8c26f3fecdb3b866e1549b7ee9dcf4a053 Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Tue, 21 Nov 2023 09:13:47 -0600 Subject: [PATCH 1674/2016] perf: use the first page a results when `query(api_method="QUERY")` (#1723) * perf: use the first page a results when `query(api_method="QUERY")` * add tests * respect max_results with cached page * respect page_size, also avoid bqstorage if almost fully downloaded * skip true test if bqstorage not installed * coverage --- .../google/cloud/bigquery/_job_helpers.py | 12 +- .../google/cloud/bigquery/client.py | 9 ++ .../google/cloud/bigquery/job/query.py | 13 +- .../google/cloud/bigquery/query.py | 8 -- .../google/cloud/bigquery/table.py | 44 +++++- .../tests/unit/job/test_query.py | 109 ++++++++++++++ .../tests/unit/test_query.py | 8 +- .../tests/unit/test_table.py | 105 +++++++++++++- .../tests/unit/test_table_arrow.py | 134 ++++++++++++++++++ .../tests/unit/test_table_pandas.py | 59 ++++++++ 10 files changed, 468 insertions(+), 33 deletions(-) create mode 100644 packages/google-cloud-bigquery/tests/unit/test_table_arrow.py diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/_job_helpers.py b/packages/google-cloud-bigquery/google/cloud/bigquery/_job_helpers.py index 09daaa2a23c9..7992f28b69c0 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/_job_helpers.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/_job_helpers.py @@ -22,6 +22,7 @@ from google.api_core import retry as retries from google.cloud.bigquery import job +import google.cloud.bigquery.query # Avoid circular imports if TYPE_CHECKING: # pragma: NO COVER @@ -197,14 +198,9 @@ def _to_query_job( job_complete = query_response.get("jobComplete") if job_complete: query_job._properties["status"]["state"] = "DONE" - # TODO: https://github.com/googleapis/python-bigquery/issues/589 - # Set the first page of results if job is "complete" and there is - # only 1 page of results. Otherwise, use the existing logic that - # refreshes the job stats. - # - # This also requires updates to `to_dataframe` and the DB API connector - # so that they don't try to read from a destination table if all the - # results are present. + query_job._query_results = google.cloud.bigquery.query._QueryResults( + query_response + ) else: query_job._properties["status"]["state"] = "PENDING" diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py index 4e72ac92260e..488a9ad298c3 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py @@ -3862,6 +3862,7 @@ def _list_rows_from_query_results( retry: retries.Retry = DEFAULT_RETRY, timeout: TimeoutType = DEFAULT_TIMEOUT, query_id: Optional[str] = None, + first_page_response: Optional[Dict[str, Any]] = None, ) -> RowIterator: """List the rows of a completed query. See @@ -3904,6 +3905,8 @@ def _list_rows_from_query_results( query_id (Optional[str]): [Preview] ID of a completed query. This ID is auto-generated and not guaranteed to be populated. + first_page_response (Optional[dict]): + API response for the first page of results (if available). Returns: google.cloud.bigquery.table.RowIterator: Iterator of row data @@ -3923,6 +3926,11 @@ def _list_rows_from_query_results( if start_index is not None: params["startIndex"] = start_index + # We don't call jobs.query with a page size, so if the user explicitly + # requests a certain size, invalidate the cache. + if page_size is not None: + first_page_response = None + params["formatOptions.useInt64Timestamp"] = True row_iterator = RowIterator( client=self, @@ -3938,6 +3946,7 @@ def _list_rows_from_query_results( location=location, job_id=job_id, query_id=query_id, + first_page_response=first_page_response, ) return row_iterator diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/job/query.py b/packages/google-cloud-bigquery/google/cloud/bigquery/job/query.py index a48a15f85168..79cd207a1e5b 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/job/query.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/job/query.py @@ -1586,7 +1586,8 @@ def do_get_result(): # Since the job could already be "done" (e.g. got a finished job # via client.get_job), the superclass call to done() might not # set the self._query_results cache. - self._reload_query_results(retry=retry, timeout=timeout) + if self._query_results is None or not self._query_results.complete: + self._reload_query_results(retry=retry, timeout=timeout) if retry_do_query is not None and job_retry is not None: do_get_result = job_retry(do_get_result) @@ -1615,6 +1616,15 @@ def do_get_result(): query_id=self.query_id, ) + # We know that there's at least 1 row, so only treat the response from + # jobs.getQueryResults / jobs.query as the first page of the + # RowIterator response if there are any rows in it. This prevents us + # from stopping the iteration early because we're missing rows and + # there's no next page token. + first_page_response = self._query_results._properties + if "rows" not in first_page_response: + first_page_response = None + rows = self._client._list_rows_from_query_results( self.job_id, self.location, @@ -1628,6 +1638,7 @@ def do_get_result(): retry=retry, timeout=timeout, query_id=self.query_id, + first_page_response=first_page_response, ) rows._preserve_order = _contains_order_by(self.query) return rows diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/query.py b/packages/google-cloud-bigquery/google/cloud/bigquery/query.py index ccc8840bed8c..54abe95a7457 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/query.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/query.py @@ -1005,14 +1005,6 @@ def _set_properties(self, api_response): Args: api_response (Dict): Response returned from an API call """ - job_id_present = ( - "jobReference" in api_response - and "jobId" in api_response["jobReference"] - and "projectId" in api_response["jobReference"] - ) - if not job_id_present: - raise ValueError("QueryResult requires a job reference") - self._properties.clear() self._properties.update(copy.deepcopy(api_response)) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py index 168448c9954a..dca9f7962920 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py @@ -100,6 +100,10 @@ "because the necessary `__from_arrow__` attribute is missing." ) +# How many of the total rows need to be downloaded already for us to skip +# calling the BQ Storage API? +ALMOST_COMPLETELY_CACHED_RATIO = 0.333 + def _reference_getter(table): """A :class:`~google.cloud.bigquery.table.TableReference` pointing to @@ -1625,16 +1629,31 @@ def query_id(self) -> Optional[str]: """ return self._query_id - def _is_completely_cached(self): + def _is_almost_completely_cached(self): """Check if all results are completely cached. This is useful to know, because we can avoid alternative download mechanisms. """ - if self._first_page_response is None or self.next_page_token: + if self._first_page_response is None: return False - return self._first_page_response.get(self._next_token) is None + total_cached_rows = len(self._first_page_response.get(self._items_key, [])) + if self.max_results is not None and total_cached_rows >= self.max_results: + return True + + if ( + self.next_page_token is None + and self._first_page_response.get(self._next_token) is None + ): + return True + + if self._total_rows is not None: + almost_completely = self._total_rows * ALMOST_COMPLETELY_CACHED_RATIO + if total_cached_rows >= almost_completely: + return True + + return False def _validate_bqstorage(self, bqstorage_client, create_bqstorage_client): """Returns True if the BigQuery Storage API can be used. @@ -1647,7 +1666,14 @@ def _validate_bqstorage(self, bqstorage_client, create_bqstorage_client): if not using_bqstorage_api: return False - if self._is_completely_cached(): + if self._table is None: + return False + + # The developer is manually paging through results if this is set. + if self.next_page_token is not None: + return False + + if self._is_almost_completely_cached(): return False if self.max_results is not None: @@ -1671,7 +1697,15 @@ def _get_next_page_response(self): The parsed JSON response of the next page's contents. """ if self._first_page_response: - response = self._first_page_response + rows = self._first_page_response.get(self._items_key, [])[ + : self.max_results + ] + response = { + self._items_key: rows, + } + if self._next_token in self._first_page_response: + response[self._next_token] = self._first_page_response[self._next_token] + self._first_page_response = None return response diff --git a/packages/google-cloud-bigquery/tests/unit/job/test_query.py b/packages/google-cloud-bigquery/tests/unit/job/test_query.py index 39275063adbf..776234b5ba31 100644 --- a/packages/google-cloud-bigquery/tests/unit/job/test_query.py +++ b/packages/google-cloud-bigquery/tests/unit/job/test_query.py @@ -25,6 +25,7 @@ import requests from google.cloud.bigquery.client import _LIST_ROWS_FROM_QUERY_RESULTS_FIELDS +import google.cloud.bigquery._job_helpers import google.cloud.bigquery.query from google.cloud.bigquery.table import _EmptyRowIterator @@ -1081,6 +1082,114 @@ def test_result_with_done_job_calls_get_query_results(self): timeout=None, ) conn.api_request.assert_has_calls([query_results_call, query_results_page_call]) + assert conn.api_request.call_count == 2 + + def test_result_with_done_jobs_query_response_doesnt_call_get_query_results(self): + """With a done result from jobs.query, we don't need to call + jobs.getQueryResults to wait for the query to finish. + + jobs.get is still called because there is an assumption that after + QueryJob.result(), all job metadata is available locally. + """ + job_resource = self._make_resource(started=True, ended=True, location="EU") + conn = make_connection(job_resource) + client = _make_client(self.PROJECT, connection=conn) + query_resource_done = { + "jobComplete": True, + "jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID}, + "schema": {"fields": [{"name": "col1", "type": "STRING"}]}, + "rows": [{"f": [{"v": "abc"}]}], + "totalRows": "1", + } + job = google.cloud.bigquery._job_helpers._to_query_job( + client, + "SELECT 'abc' AS col1", + request_config=None, + query_response=query_resource_done, + ) + assert job.state == "DONE" + + result = job.result() + + rows = list(result) + self.assertEqual(len(rows), 1) + self.assertEqual(rows[0].col1, "abc") + job_path = f"/projects/{self.PROJECT}/jobs/{self.JOB_ID}" + conn.api_request.assert_called_once_with( + method="GET", + path=job_path, + query_params={}, + timeout=None, + ) + + def test_result_with_done_jobs_query_response_and_page_size_invalidates_cache(self): + """We don't call jobs.query with a page size, so if the user explicitly + requests a certain size, invalidate the cache. + """ + # Arrange + job_resource = self._make_resource( + started=True, ended=True, location="asia-northeast1" + ) + query_resource_done = { + "jobComplete": True, + "jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID}, + "schema": {"fields": [{"name": "col1", "type": "STRING"}]}, + "rows": [{"f": [{"v": "abc"}]}], + "pageToken": "initial-page-token-shouldnt-be-used", + "totalRows": "4", + } + query_page_resource = { + "totalRows": 4, + "pageToken": "some-page-token", + "rows": [ + {"f": [{"v": "row1"}]}, + {"f": [{"v": "row2"}]}, + {"f": [{"v": "row3"}]}, + ], + } + query_page_resource_2 = {"totalRows": 4, "rows": [{"f": [{"v": "row4"}]}]} + conn = make_connection(job_resource, query_page_resource, query_page_resource_2) + client = _make_client(self.PROJECT, connection=conn) + job = google.cloud.bigquery._job_helpers._to_query_job( + client, + "SELECT col1 FROM table", + request_config=None, + query_response=query_resource_done, + ) + assert job.state == "DONE" + + # Act + result = job.result(page_size=3) + + # Assert + actual_rows = list(result) + self.assertEqual(len(actual_rows), 4) + + query_results_path = f"/projects/{self.PROJECT}/queries/{self.JOB_ID}" + query_page_1_call = mock.call( + method="GET", + path=query_results_path, + query_params={ + "maxResults": 3, + "fields": _LIST_ROWS_FROM_QUERY_RESULTS_FIELDS, + "location": "asia-northeast1", + "formatOptions.useInt64Timestamp": True, + }, + timeout=None, + ) + query_page_2_call = mock.call( + method="GET", + path=query_results_path, + query_params={ + "pageToken": "some-page-token", + "maxResults": 3, + "fields": _LIST_ROWS_FROM_QUERY_RESULTS_FIELDS, + "location": "asia-northeast1", + "formatOptions.useInt64Timestamp": True, + }, + timeout=None, + ) + conn.api_request.assert_has_calls([query_page_1_call, query_page_2_call]) def test_result_with_max_results(self): from google.cloud.bigquery.table import RowIterator diff --git a/packages/google-cloud-bigquery/tests/unit/test_query.py b/packages/google-cloud-bigquery/tests/unit/test_query.py index aae4890b3e82..949c1993bb27 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_query.py +++ b/packages/google-cloud-bigquery/tests/unit/test_query.py @@ -1362,13 +1362,13 @@ def test_errors_present(self): self.assertEqual(query.errors, ERRORS) def test_job_id_missing(self): - with self.assertRaises(ValueError): - self._make_one({}) + query = self._make_one({}) + self.assertIsNone(query.job_id) def test_job_id_broken_job_reference(self): resource = {"jobReference": {"bogus": "BOGUS"}} - with self.assertRaises(ValueError): - self._make_one(resource) + query = self._make_one(resource) + self.assertIsNone(query.job_id) def test_job_id_present(self): resource = self._make_resource() diff --git a/packages/google-cloud-bigquery/tests/unit/test_table.py b/packages/google-cloud-bigquery/tests/unit/test_table.py index d9f259e7232d..05ad8de6eefb 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_table.py +++ b/packages/google-cloud-bigquery/tests/unit/test_table.py @@ -2210,6 +2210,39 @@ def test_iterate_with_cached_first_page(self): method="GET", path=path, query_params={"pageToken": "next-page"} ) + def test_iterate_with_cached_first_page_max_results(self): + from google.cloud.bigquery.schema import SchemaField + + first_page = { + "rows": [ + {"f": [{"v": "Whillma Phlyntstone"}, {"v": "27"}]}, + {"f": [{"v": "Bhetty Rhubble"}, {"v": "28"}]}, + {"f": [{"v": "Phred Phlyntstone"}, {"v": "32"}]}, + {"f": [{"v": "Bharney Rhubble"}, {"v": "33"}]}, + ], + "pageToken": "next-page", + } + schema = [ + SchemaField("name", "STRING", mode="REQUIRED"), + SchemaField("age", "INTEGER", mode="REQUIRED"), + ] + path = "/foo" + api_request = mock.Mock(return_value=first_page) + row_iterator = self._make_one( + _mock_client(), + api_request, + path, + schema, + max_results=3, + first_page_response=first_page, + ) + rows = list(row_iterator) + self.assertEqual(len(rows), 3) + self.assertEqual(rows[0].age, 27) + self.assertEqual(rows[1].age, 28) + self.assertEqual(rows[2].age, 32) + api_request.assert_not_called() + def test_page_size(self): from google.cloud.bigquery.schema import SchemaField @@ -2235,19 +2268,58 @@ def test_page_size(self): query_params={"maxResults": row_iterator._page_size}, ) - def test__is_completely_cached_returns_false_without_first_page(self): + def test__is_almost_completely_cached_returns_false_without_first_page(self): iterator = self._make_one(first_page_response=None) - self.assertFalse(iterator._is_completely_cached()) + self.assertFalse(iterator._is_almost_completely_cached()) - def test__is_completely_cached_returns_false_with_page_token(self): - first_page = {"pageToken": "next-page"} + def test__is_almost_completely_cached_returns_true_with_more_rows_than_max_results( + self, + ): + rows = [ + {"f": [{"v": "Phred Phlyntstone"}, {"v": "32"}]}, + {"f": [{"v": "Bharney Rhubble"}, {"v": "33"}]}, + {"f": [{"v": "Whillma Phlyntstone"}, {"v": "27"}]}, + {"f": [{"v": "Bhetty Rhubble"}, {"v": "28"}]}, + ] + first_page = {"pageToken": "next-page", "rows": rows} + iterator = self._make_one(max_results=4, first_page_response=first_page) + self.assertTrue(iterator._is_almost_completely_cached()) + + def test__is_almost_completely_cached_returns_false_with_too_many_rows_remaining( + self, + ): + rows = [ + {"f": [{"v": "Phred Phlyntstone"}, {"v": "32"}]}, + {"f": [{"v": "Bharney Rhubble"}, {"v": "33"}]}, + ] + first_page = {"pageToken": "next-page", "rows": rows} + iterator = self._make_one(first_page_response=first_page, total_rows=100) + self.assertFalse(iterator._is_almost_completely_cached()) + + def test__is_almost_completely_cached_returns_false_with_rows_remaining_and_no_total_rows( + self, + ): + rows = [ + {"f": [{"v": "Phred Phlyntstone"}, {"v": "32"}]}, + {"f": [{"v": "Bharney Rhubble"}, {"v": "33"}]}, + ] + first_page = {"pageToken": "next-page", "rows": rows} iterator = self._make_one(first_page_response=first_page) - self.assertFalse(iterator._is_completely_cached()) + self.assertFalse(iterator._is_almost_completely_cached()) + + def test__is_almost_completely_cached_returns_true_with_some_rows_remaining(self): + rows = [ + {"f": [{"v": "Phred Phlyntstone"}, {"v": "32"}]}, + {"f": [{"v": "Bharney Rhubble"}, {"v": "33"}]}, + ] + first_page = {"pageToken": "next-page", "rows": rows} + iterator = self._make_one(first_page_response=first_page, total_rows=6) + self.assertTrue(iterator._is_almost_completely_cached()) - def test__is_completely_cached_returns_true(self): + def test__is_almost_completely_cached_returns_true_with_no_rows_remaining(self): first_page = {"rows": []} iterator = self._make_one(first_page_response=first_page) - self.assertTrue(iterator._is_completely_cached()) + self.assertTrue(iterator._is_almost_completely_cached()) def test__validate_bqstorage_returns_false_when_completely_cached(self): first_page = {"rows": []} @@ -2258,6 +2330,25 @@ def test__validate_bqstorage_returns_false_when_completely_cached(self): ) ) + @unittest.skipIf( + bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" + ) + def test__validate_bqstorage_returns_true_if_no_cached_results(self): + iterator = self._make_one(first_page_response=None) # not cached + result = iterator._validate_bqstorage( + bqstorage_client=None, create_bqstorage_client=True + ) + self.assertTrue(result) + + def test__validate_bqstorage_returns_false_if_page_token_set(self): + iterator = self._make_one( + page_token="abc", first_page_response=None # not cached + ) + result = iterator._validate_bqstorage( + bqstorage_client=None, create_bqstorage_client=True + ) + self.assertFalse(result) + def test__validate_bqstorage_returns_false_if_max_results_set(self): iterator = self._make_one( max_results=10, first_page_response=None # not cached diff --git a/packages/google-cloud-bigquery/tests/unit/test_table_arrow.py b/packages/google-cloud-bigquery/tests/unit/test_table_arrow.py new file mode 100644 index 000000000000..6f1e6f76a39b --- /dev/null +++ b/packages/google-cloud-bigquery/tests/unit/test_table_arrow.py @@ -0,0 +1,134 @@ +# Copyright 2023 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import pytest + +from google.cloud import bigquery +import google.cloud.bigquery.table + + +pyarrow = pytest.importorskip("pyarrow", minversion="3.0.0") + + +def test_to_arrow_with_jobs_query_response(): + resource = { + "kind": "bigquery#queryResponse", + "schema": { + "fields": [ + {"name": "name", "type": "STRING", "mode": "NULLABLE"}, + {"name": "number", "type": "INTEGER", "mode": "NULLABLE"}, + ] + }, + "jobReference": { + "projectId": "test-project", + "jobId": "job_ocd3cb-N62QIslU7R5qKKa2_427J", + "location": "US", + }, + "totalRows": "9", + "rows": [ + {"f": [{"v": "Tiarra"}, {"v": "6"}]}, + {"f": [{"v": "Timothy"}, {"v": "325"}]}, + {"f": [{"v": "Tina"}, {"v": "26"}]}, + {"f": [{"v": "Tierra"}, {"v": "10"}]}, + {"f": [{"v": "Tia"}, {"v": "17"}]}, + {"f": [{"v": "Tiara"}, {"v": "22"}]}, + {"f": [{"v": "Tiana"}, {"v": "6"}]}, + {"f": [{"v": "Tiffany"}, {"v": "229"}]}, + {"f": [{"v": "Tiffani"}, {"v": "8"}]}, + ], + "totalBytesProcessed": "154775150", + "jobComplete": True, + "cacheHit": False, + "queryId": "job_ocd3cb-N62QIslU7R5qKKa2_427J", + } + + rows = google.cloud.bigquery.table.RowIterator( + client=None, + api_request=None, + path=None, + schema=[ + bigquery.SchemaField.from_api_repr(field) + for field in resource["schema"]["fields"] + ], + first_page_response=resource, + ) + records = rows.to_arrow() + + assert records.column_names == ["name", "number"] + assert records["name"].to_pylist() == [ + "Tiarra", + "Timothy", + "Tina", + "Tierra", + "Tia", + "Tiara", + "Tiana", + "Tiffany", + "Tiffani", + ] + assert records["number"].to_pylist() == [6, 325, 26, 10, 17, 22, 6, 229, 8] + + +def test_to_arrow_with_jobs_query_response_and_max_results(): + resource = { + "kind": "bigquery#queryResponse", + "schema": { + "fields": [ + {"name": "name", "type": "STRING", "mode": "NULLABLE"}, + {"name": "number", "type": "INTEGER", "mode": "NULLABLE"}, + ] + }, + "jobReference": { + "projectId": "test-project", + "jobId": "job_ocd3cb-N62QIslU7R5qKKa2_427J", + "location": "US", + }, + "totalRows": "9", + "rows": [ + {"f": [{"v": "Tiarra"}, {"v": "6"}]}, + {"f": [{"v": "Timothy"}, {"v": "325"}]}, + {"f": [{"v": "Tina"}, {"v": "26"}]}, + {"f": [{"v": "Tierra"}, {"v": "10"}]}, + {"f": [{"v": "Tia"}, {"v": "17"}]}, + {"f": [{"v": "Tiara"}, {"v": "22"}]}, + {"f": [{"v": "Tiana"}, {"v": "6"}]}, + {"f": [{"v": "Tiffany"}, {"v": "229"}]}, + {"f": [{"v": "Tiffani"}, {"v": "8"}]}, + ], + "totalBytesProcessed": "154775150", + "jobComplete": True, + "cacheHit": False, + "queryId": "job_ocd3cb-N62QIslU7R5qKKa2_427J", + } + + rows = google.cloud.bigquery.table.RowIterator( + client=None, + api_request=None, + path=None, + schema=[ + bigquery.SchemaField.from_api_repr(field) + for field in resource["schema"]["fields"] + ], + first_page_response=resource, + max_results=3, + ) + records = rows.to_arrow() + + assert records.column_names == ["name", "number"] + assert records["name"].to_pylist() == [ + "Tiarra", + "Timothy", + "Tina", + ] + assert records["number"].to_pylist() == [6, 325, 26] diff --git a/packages/google-cloud-bigquery/tests/unit/test_table_pandas.py b/packages/google-cloud-bigquery/tests/unit/test_table_pandas.py index dfe512eea49e..6970d9d65280 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_table_pandas.py +++ b/packages/google-cloud-bigquery/tests/unit/test_table_pandas.py @@ -201,3 +201,62 @@ def test_to_dataframe_arrays(monkeypatch, class_under_test): assert df.dtypes["int64_repeated"].name == "object" assert tuple(df["int64_repeated"][0]) == (-1, 0, 2) + + +def test_to_dataframe_with_jobs_query_response(class_under_test): + resource = { + "kind": "bigquery#queryResponse", + "schema": { + "fields": [ + {"name": "name", "type": "STRING", "mode": "NULLABLE"}, + {"name": "number", "type": "INTEGER", "mode": "NULLABLE"}, + ] + }, + "jobReference": { + "projectId": "test-project", + "jobId": "job_ocd3cb-N62QIslU7R5qKKa2_427J", + "location": "US", + }, + "totalRows": "9", + "rows": [ + {"f": [{"v": "Tiarra"}, {"v": "6"}]}, + {"f": [{"v": "Timothy"}, {"v": "325"}]}, + {"f": [{"v": "Tina"}, {"v": "26"}]}, + {"f": [{"v": "Tierra"}, {"v": "10"}]}, + {"f": [{"v": "Tia"}, {"v": "17"}]}, + {"f": [{"v": "Tiara"}, {"v": "22"}]}, + {"f": [{"v": "Tiana"}, {"v": "6"}]}, + {"f": [{"v": "Tiffany"}, {"v": "229"}]}, + {"f": [{"v": "Tiffani"}, {"v": "8"}]}, + ], + "totalBytesProcessed": "154775150", + "jobComplete": True, + "cacheHit": False, + "queryId": "job_ocd3cb-N62QIslU7R5qKKa2_427J", + } + + rows = class_under_test( + client=None, + api_request=None, + path=None, + schema=[ + bigquery.SchemaField.from_api_repr(field) + for field in resource["schema"]["fields"] + ], + first_page_response=resource, + ) + df = rows.to_dataframe() + + assert list(df.columns) == ["name", "number"] + assert list(df["name"]) == [ + "Tiarra", + "Timothy", + "Tina", + "Tierra", + "Tia", + "Tiara", + "Tiana", + "Tiffany", + "Tiffani", + ] + assert list(df["number"]) == [6, 325, 26, 10, 17, 22, 6, 229, 8] From 8ea864a492d43de08fdd2d23ed5338d44b8e31a9 Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Tue, 21 Nov 2023 11:51:45 -0600 Subject: [PATCH 1675/2016] fix: ensure query job retry has longer deadline than API request deadline (#1734) In cases where we can't disambiguate API failure from job failure, this ensures we can still retry the job at least once. --- .../google-cloud-bigquery/google/cloud/bigquery/retry.py | 9 +++++++-- packages/google-cloud-bigquery/tests/unit/test_retry.py | 5 +++-- 2 files changed, 10 insertions(+), 4 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/retry.py b/packages/google-cloud-bigquery/google/cloud/bigquery/retry.py index d0830ed13c37..b01c0662c53d 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/retry.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/retry.py @@ -34,7 +34,12 @@ auth_exceptions.TransportError, ) -_DEFAULT_JOB_DEADLINE = 60.0 * 10.0 # seconds +_DEFAULT_RETRY_DEADLINE = 10.0 * 60.0 # 10 minutes + +# Allow for a few retries after the API request times out. This relevant for +# rateLimitExceeded errors, which can be raised either by the Google load +# balancer or the BigQuery job server. +_DEFAULT_JOB_DEADLINE = 3.0 * _DEFAULT_RETRY_DEADLINE def _should_retry(exc): @@ -51,7 +56,7 @@ def _should_retry(exc): return reason in _RETRYABLE_REASONS -DEFAULT_RETRY = retry.Retry(predicate=_should_retry, deadline=600.0) +DEFAULT_RETRY = retry.Retry(predicate=_should_retry, deadline=_DEFAULT_RETRY_DEADLINE) """The default retry object. Any method with a ``retry`` parameter will be retried automatically, diff --git a/packages/google-cloud-bigquery/tests/unit/test_retry.py b/packages/google-cloud-bigquery/tests/unit/test_retry.py index 60d04de8948b..1109b7ff2d81 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_retry.py +++ b/packages/google-cloud-bigquery/tests/unit/test_retry.py @@ -125,6 +125,7 @@ def test_DEFAULT_JOB_RETRY_predicate(): def test_DEFAULT_JOB_RETRY_deadline(): - from google.cloud.bigquery.retry import DEFAULT_JOB_RETRY + from google.cloud.bigquery.retry import DEFAULT_JOB_RETRY, DEFAULT_RETRY - assert DEFAULT_JOB_RETRY._deadline == 600 + # Make sure we can retry the job at least once. + assert DEFAULT_JOB_RETRY._deadline > DEFAULT_RETRY._deadline From e7e59ccf8c8819bc1d56b129e78669eab4a5e471 Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Wed, 22 Nov 2023 10:24:14 -0600 Subject: [PATCH 1676/2016] fix: `load_table_from_dataframe` now assumes there may be local null values (#1735) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Even if the remote schema is REQUIRED Thank you for opening a Pull Request! Before submitting your PR, there are a few things you can do to make sure it goes smoothly: - [ ] Make sure to open an issue as a [bug/issue](https://togithub.com/googleapis/python-bigquery/issues/new/choose) before writing your code! That way we can discuss the change, evaluate designs, and agree on the general idea - [ ] Ensure the tests and linter pass - [ ] Code coverage does not decrease (if any source code was changed) - [ ] Appropriate docs were updated (if necessary) Fixes #1692 🦕 --- .../google/cloud/bigquery/_pandas_helpers.py | 10 +++- .../tests/system/test_pandas.py | 47 ++++++++++++++++--- .../tests/unit/test__pandas_helpers.py | 47 +++++++++++++------ 3 files changed, 81 insertions(+), 23 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/_pandas_helpers.py b/packages/google-cloud-bigquery/google/cloud/bigquery/_pandas_helpers.py index 53db9511c058..380df7b1dffd 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/_pandas_helpers.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/_pandas_helpers.py @@ -178,12 +178,18 @@ def bq_to_arrow_field(bq_field, array_type=None): if arrow_type is not None: if array_type is not None: arrow_type = array_type # For GEOGRAPHY, at least initially - is_nullable = bq_field.mode.upper() == "NULLABLE" metadata = BQ_FIELD_TYPE_TO_ARROW_FIELD_METADATA.get( bq_field.field_type.upper() if bq_field.field_type else "" ) return pyarrow.field( - bq_field.name, arrow_type, nullable=is_nullable, metadata=metadata + bq_field.name, + arrow_type, + # Even if the remote schema is REQUIRED, there's a chance there's + # local NULL values. Arrow will gladly interpret these NULL values + # as non-NULL and give you an arbitrary value. See: + # https://github.com/googleapis/python-bigquery/issues/1692 + nullable=True, + metadata=metadata, ) warnings.warn("Unable to determine type for field '{}'.".format(bq_field.name)) diff --git a/packages/google-cloud-bigquery/tests/system/test_pandas.py b/packages/google-cloud-bigquery/tests/system/test_pandas.py index a46f8e3dfe46..9f7fc242e465 100644 --- a/packages/google-cloud-bigquery/tests/system/test_pandas.py +++ b/packages/google-cloud-bigquery/tests/system/test_pandas.py @@ -428,8 +428,7 @@ def test_load_table_from_dataframe_w_nulls(bigquery_client, dataset_id): def test_load_table_from_dataframe_w_required(bigquery_client, dataset_id): - """Test that a DataFrame with required columns can be uploaded if a - BigQuery schema is specified. + """Test that a DataFrame can be uploaded to a table with required columns. See: https://github.com/googleapis/google-cloud-python/issues/8093 """ @@ -440,7 +439,6 @@ def test_load_table_from_dataframe_w_required(bigquery_client, dataset_id): records = [{"name": "Chip", "age": 2}, {"name": "Dale", "age": 3}] dataframe = pandas.DataFrame(records, columns=["name", "age"]) - job_config = bigquery.LoadJobConfig(schema=table_schema) table_id = "{}.{}.load_table_from_dataframe_w_required".format( bigquery_client.project, dataset_id ) @@ -451,15 +449,50 @@ def test_load_table_from_dataframe_w_required(bigquery_client, dataset_id): bigquery.Table(table_id, schema=table_schema) ) - job_config = bigquery.LoadJobConfig(schema=table_schema) - load_job = bigquery_client.load_table_from_dataframe( - dataframe, table_id, job_config=job_config - ) + load_job = bigquery_client.load_table_from_dataframe(dataframe, table_id) load_job.result() table = bigquery_client.get_table(table) assert tuple(table.schema) == table_schema assert table.num_rows == 2 + for field in table.schema: + assert field.mode == "REQUIRED" + + +def test_load_table_from_dataframe_w_required_but_local_nulls_fails( + bigquery_client, dataset_id +): + """Test that a DataFrame with nulls can't be uploaded to a table with + required columns. + + See: https://github.com/googleapis/python-bigquery/issues/1692 + """ + table_schema = ( + bigquery.SchemaField("name", "STRING", mode="REQUIRED"), + bigquery.SchemaField("age", "INTEGER", mode="REQUIRED"), + ) + + records = [ + {"name": "Chip", "age": 2}, + {"name": "Dale", "age": 3}, + {"name": None, "age": None}, + {"name": "Alvin", "age": 4}, + ] + dataframe = pandas.DataFrame(records, columns=["name", "age"]) + table_id = ( + "{}.{}.load_table_from_dataframe_w_required_but_local_nulls_fails".format( + bigquery_client.project, dataset_id + ) + ) + + # Create the table before loading so that schema mismatch errors are + # identified. + helpers.retry_403(bigquery_client.create_table)( + bigquery.Table(table_id, schema=table_schema) + ) + + with pytest.raises(google.api_core.exceptions.BadRequest, match="null"): + bigquery_client.load_table_from_dataframe(dataframe, table_id).result() def test_load_table_from_dataframe_w_explicit_schema(bigquery_client, dataset_id): diff --git a/packages/google-cloud-bigquery/tests/unit/test__pandas_helpers.py b/packages/google-cloud-bigquery/tests/unit/test__pandas_helpers.py index 212a6f1dde35..1f1b4eeb3802 100644 --- a/packages/google-cloud-bigquery/tests/unit/test__pandas_helpers.py +++ b/packages/google-cloud-bigquery/tests/unit/test__pandas_helpers.py @@ -1017,30 +1017,41 @@ def test_dataframe_to_arrow_with_required_fields(module_under_test): ) data = { - "field01": ["hello", "world"], - "field02": [b"abd", b"efg"], - "field03": [1, 2], - "field04": [3, 4], - "field05": [1.25, 9.75], - "field06": [-1.75, -3.5], - "field07": [decimal.Decimal("1.2345"), decimal.Decimal("6.7891")], + "field01": ["hello", None, "world"], + "field02": [b"abd", b"efg", b"hij"], + "field03": [1, 2, 3], + "field04": [4, None, 5], + "field05": [1.25, 0.0, 9.75], + "field06": [-1.75, None, -3.5], + "field07": [ + decimal.Decimal("1.2345"), + decimal.Decimal("6.7891"), + -decimal.Decimal("10.111213"), + ], "field08": [ decimal.Decimal("-{d38}.{d38}".format(d38="9" * 38)), + None, decimal.Decimal("{d38}.{d38}".format(d38="9" * 38)), ], - "field09": [True, False], - "field10": [False, True], + "field09": [True, False, True], + "field10": [False, True, None], "field11": [ datetime.datetime(1970, 1, 1, 0, 0, 0, tzinfo=datetime.timezone.utc), datetime.datetime(2012, 12, 21, 9, 7, 42, tzinfo=datetime.timezone.utc), + datetime.datetime(2022, 7, 14, 23, 59, 59, tzinfo=datetime.timezone.utc), ], - "field12": [datetime.date(9999, 12, 31), datetime.date(1970, 1, 1)], - "field13": [datetime.time(23, 59, 59, 999999), datetime.time(12, 0, 0)], + "field12": [datetime.date(9999, 12, 31), None, datetime.date(1970, 1, 1)], + "field13": [datetime.time(23, 59, 59, 999999), None, datetime.time(12, 0, 0)], "field14": [ datetime.datetime(1970, 1, 1, 0, 0, 0), + None, datetime.datetime(2012, 12, 21, 9, 7, 42), ], - "field15": ["POINT(30 10)", "POLYGON ((30 10, 40 40, 20 40, 10 20, 30 10))"], + "field15": [ + None, + "POINT(30 10)", + "POLYGON ((30 10, 40 40, 20 40, 10 20, 30 10))", + ], } dataframe = pandas.DataFrame(data) @@ -1049,7 +1060,11 @@ def test_dataframe_to_arrow_with_required_fields(module_under_test): assert len(arrow_schema) == len(bq_schema) for arrow_field in arrow_schema: - assert not arrow_field.nullable + # Even if the remote schema is REQUIRED, there's a chance there's + # local NULL values. Arrow will gladly interpret these NULL values + # as non-NULL and give you an arbitrary value. See: + # https://github.com/googleapis/python-bigquery/issues/1692 + assert arrow_field.nullable @pytest.mark.skipif(pandas is None, reason="Requires `pandas`") @@ -1101,7 +1116,11 @@ def test_dataframe_to_arrow_dict_sequence_schema(module_under_test): arrow_schema = arrow_table.schema expected_fields = [ - pyarrow.field("field01", "string", nullable=False), + # Even if the remote schema is REQUIRED, there's a chance there's + # local NULL values. Arrow will gladly interpret these NULL values + # as non-NULL and give you an arbitrary value. See: + # https://github.com/googleapis/python-bigquery/issues/1692 + pyarrow.field("field01", "string", nullable=True), pyarrow.field("field02", "bool", nullable=True), ] assert list(arrow_schema) == expected_fields From e5822406b05e7b07e1940349c91bc1dec2b4b9ad Mon Sep 17 00:00:00 2001 From: Kira Date: Wed, 22 Nov 2023 12:51:21 -0800 Subject: [PATCH 1677/2016] chore: standardize samples directory - delete unneeded dependencies (#1732) * chore: standardize samples directory = delete unneeded dependencies * Removed unused import for linter --- .../samples/desktopapp/conftest.py | 23 +++ .../samples/desktopapp/requirements.txt | 15 -- .../samples/magics/requirements.txt | 10 +- .../samples/notebooks/conftest.py | 23 +++ .../samples/notebooks/requirements.txt | 7 - .../samples/snippets/jupyter_tutorial_test.py | 175 ------------------ .../samples/snippets/requirements.txt | 18 +- .../samples/snippets/user_credentials.py | 78 -------- .../samples/snippets/user_credentials_test.py | 45 ----- 9 files changed, 48 insertions(+), 346 deletions(-) create mode 100644 packages/google-cloud-bigquery/samples/desktopapp/conftest.py create mode 100644 packages/google-cloud-bigquery/samples/notebooks/conftest.py delete mode 100644 packages/google-cloud-bigquery/samples/snippets/jupyter_tutorial_test.py delete mode 100644 packages/google-cloud-bigquery/samples/snippets/user_credentials.py delete mode 100644 packages/google-cloud-bigquery/samples/snippets/user_credentials_test.py diff --git a/packages/google-cloud-bigquery/samples/desktopapp/conftest.py b/packages/google-cloud-bigquery/samples/desktopapp/conftest.py new file mode 100644 index 000000000000..fdc85a8522c1 --- /dev/null +++ b/packages/google-cloud-bigquery/samples/desktopapp/conftest.py @@ -0,0 +1,23 @@ +# Copyright 2020 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from google.cloud import bigquery +import pytest + + +@pytest.fixture +def bigquery_client_patch( + monkeypatch: pytest.MonkeyPatch, bigquery_client: bigquery.Client +) -> None: + monkeypatch.setattr(bigquery, "Client", lambda: bigquery_client) diff --git a/packages/google-cloud-bigquery/samples/desktopapp/requirements.txt b/packages/google-cloud-bigquery/samples/desktopapp/requirements.txt index 1eeffe32ca4f..a5b3ad130227 100644 --- a/packages/google-cloud-bigquery/samples/desktopapp/requirements.txt +++ b/packages/google-cloud-bigquery/samples/desktopapp/requirements.txt @@ -1,17 +1,2 @@ -db-dtypes==1.1.1 google-cloud-bigquery==3.11.4 -google-cloud-bigquery-storage==2.22.0 google-auth-oauthlib==1.0.0 -grpcio==1.57.0 -ipywidgets==8.1.0 -ipython===7.31.1; python_version == '3.7' -ipython===8.0.1; python_version == '3.8' -ipython==8.14.0; python_version >= '3.9' -matplotlib===3.5.3; python_version == '3.7' -matplotlib==3.7.2; python_version >= '3.8' -pandas===1.3.5; python_version == '3.7' -pandas==2.0.3; python_version >= '3.8' -pyarrow==12.0.1; python_version == '3.7' -pyarrow==14.0.1; python_version >= '3.8' -pytz==2023.3 -typing-extensions==4.7.1 diff --git a/packages/google-cloud-bigquery/samples/magics/requirements.txt b/packages/google-cloud-bigquery/samples/magics/requirements.txt index 8feb236fc988..c8f6b2765393 100644 --- a/packages/google-cloud-bigquery/samples/magics/requirements.txt +++ b/packages/google-cloud-bigquery/samples/magics/requirements.txt @@ -1,16 +1,8 @@ db-dtypes==1.1.1 +google.cloud.bigquery==3.11.4 google-cloud-bigquery-storage==2.22.0 -google-auth-oauthlib==1.0.0 -grpcio==1.57.0 -ipywidgets==8.1.0 ipython===7.31.1; python_version == '3.7' ipython===8.0.1; python_version == '3.8' ipython==8.14.0; python_version >= '3.9' -matplotlib===3.5.3; python_version == '3.7' -matplotlib==3.7.2; python_version >= '3.8' pandas===1.3.5; python_version == '3.7' pandas==2.0.3; python_version >= '3.8' -pyarrow==12.0.1; python_version == '3.7' -pyarrow==14.0.1; python_version >= '3.8' -pytz==2023.3 -typing-extensions==4.7.1 diff --git a/packages/google-cloud-bigquery/samples/notebooks/conftest.py b/packages/google-cloud-bigquery/samples/notebooks/conftest.py new file mode 100644 index 000000000000..fdc85a8522c1 --- /dev/null +++ b/packages/google-cloud-bigquery/samples/notebooks/conftest.py @@ -0,0 +1,23 @@ +# Copyright 2020 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from google.cloud import bigquery +import pytest + + +@pytest.fixture +def bigquery_client_patch( + monkeypatch: pytest.MonkeyPatch, bigquery_client: bigquery.Client +) -> None: + monkeypatch.setattr(bigquery, "Client", lambda: bigquery_client) diff --git a/packages/google-cloud-bigquery/samples/notebooks/requirements.txt b/packages/google-cloud-bigquery/samples/notebooks/requirements.txt index 1eeffe32ca4f..22c46297f260 100644 --- a/packages/google-cloud-bigquery/samples/notebooks/requirements.txt +++ b/packages/google-cloud-bigquery/samples/notebooks/requirements.txt @@ -1,9 +1,6 @@ db-dtypes==1.1.1 google-cloud-bigquery==3.11.4 google-cloud-bigquery-storage==2.22.0 -google-auth-oauthlib==1.0.0 -grpcio==1.57.0 -ipywidgets==8.1.0 ipython===7.31.1; python_version == '3.7' ipython===8.0.1; python_version == '3.8' ipython==8.14.0; python_version >= '3.9' @@ -11,7 +8,3 @@ matplotlib===3.5.3; python_version == '3.7' matplotlib==3.7.2; python_version >= '3.8' pandas===1.3.5; python_version == '3.7' pandas==2.0.3; python_version >= '3.8' -pyarrow==12.0.1; python_version == '3.7' -pyarrow==14.0.1; python_version >= '3.8' -pytz==2023.3 -typing-extensions==4.7.1 diff --git a/packages/google-cloud-bigquery/samples/snippets/jupyter_tutorial_test.py b/packages/google-cloud-bigquery/samples/snippets/jupyter_tutorial_test.py deleted file mode 100644 index 9d42a4eda7b6..000000000000 --- a/packages/google-cloud-bigquery/samples/snippets/jupyter_tutorial_test.py +++ /dev/null @@ -1,175 +0,0 @@ -# Copyright 2018 Google Inc. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import typing -from typing import Iterator - -import pytest - -if typing.TYPE_CHECKING: - from IPython.terminal.interactiveshell import TerminalInteractiveShell - -IPython = pytest.importorskip("IPython") -interactiveshell = pytest.importorskip("IPython.terminal.interactiveshell") -tools = pytest.importorskip("IPython.testing.tools") -matplotlib = pytest.importorskip("matplotlib") - -# Ignore semicolon lint warning because semicolons are used in notebooks -# flake8: noqa E703 - - -@pytest.fixture(scope="session") -def ipython() -> "TerminalInteractiveShell": - config = tools.default_config() - config.TerminalInteractiveShell.simple_prompt = True - shell = interactiveshell.TerminalInteractiveShell.instance(config=config) - return shell - - -@pytest.fixture() -def ipython_interactive( - request: pytest.FixtureRequest, ipython: "TerminalInteractiveShell" -) -> Iterator["TerminalInteractiveShell"]: - """Activate IPython's builtin hooks - - for the duration of the test scope. - """ - with ipython.builtin_trap: - yield ipython - - -def _strip_region_tags(sample_text: str) -> str: - """Remove blank lines and region tags from sample text""" - magic_lines = [ - line for line in sample_text.split("\n") if len(line) > 0 and "# [" not in line - ] - return "\n".join(magic_lines) - - -def test_jupyter_tutorial(ipython: "TerminalInteractiveShell") -> None: - matplotlib.use("agg") - ip = IPython.get_ipython() - ip.extension_manager.load_extension("google.cloud.bigquery") - - sample = """ - # [START bigquery_jupyter_magic_gender_by_year] - %%bigquery - SELECT - source_year AS year, - COUNT(is_male) AS birth_count - FROM `bigquery-public-data.samples.natality` - GROUP BY year - ORDER BY year DESC - LIMIT 15 - # [END bigquery_jupyter_magic_gender_by_year] - """ - result = ip.run_cell(_strip_region_tags(sample)) - result.raise_error() # Throws an exception if the cell failed. - - sample = """ - # [START bigquery_jupyter_magic_gender_by_year_var] - %%bigquery total_births - SELECT - source_year AS year, - COUNT(is_male) AS birth_count - FROM `bigquery-public-data.samples.natality` - GROUP BY year - ORDER BY year DESC - LIMIT 15 - # [END bigquery_jupyter_magic_gender_by_year_var] - """ - result = ip.run_cell(_strip_region_tags(sample)) - result.raise_error() # Throws an exception if the cell failed. - - assert "total_births" in ip.user_ns # verify that variable exists - total_births = ip.user_ns["total_births"] - # [START bigquery_jupyter_plot_births_by_year] - total_births.plot(kind="bar", x="year", y="birth_count") - # [END bigquery_jupyter_plot_births_by_year] - - sample = """ - # [START bigquery_jupyter_magic_gender_by_weekday] - %%bigquery births_by_weekday - SELECT - wday, - SUM(CASE WHEN is_male THEN 1 ELSE 0 END) AS male_births, - SUM(CASE WHEN is_male THEN 0 ELSE 1 END) AS female_births - FROM `bigquery-public-data.samples.natality` - WHERE wday IS NOT NULL - GROUP BY wday - ORDER BY wday ASC - # [END bigquery_jupyter_magic_gender_by_weekday] - """ - result = ip.run_cell(_strip_region_tags(sample)) - result.raise_error() # Throws an exception if the cell failed. - - assert "births_by_weekday" in ip.user_ns # verify that variable exists - births_by_weekday = ip.user_ns["births_by_weekday"] - # [START bigquery_jupyter_plot_births_by_weekday] - births_by_weekday.plot(x="wday") - # [END bigquery_jupyter_plot_births_by_weekday] - - # [START bigquery_jupyter_import_and_client] - from google.cloud import bigquery - - client = bigquery.Client() - # [END bigquery_jupyter_import_and_client] - - # [START bigquery_jupyter_query_plurality_by_year] - sql = """ - SELECT - plurality, - COUNT(1) AS count, - year - FROM - `bigquery-public-data.samples.natality` - WHERE - NOT IS_NAN(plurality) AND plurality > 1 - GROUP BY - plurality, year - ORDER BY - count DESC - """ - df = client.query(sql).to_dataframe() - df.head() - # [END bigquery_jupyter_query_plurality_by_year] - - # [START bigquery_jupyter_plot_plurality_by_year] - pivot_table = df.pivot(index="year", columns="plurality", values="count") - pivot_table.plot(kind="bar", stacked=True, figsize=(15, 7)) - # [END bigquery_jupyter_plot_plurality_by_year] - - # [START bigquery_jupyter_query_births_by_gestation] - sql = """ - SELECT - gestation_weeks, - COUNT(1) AS count - FROM - `bigquery-public-data.samples.natality` - WHERE - NOT IS_NAN(gestation_weeks) AND gestation_weeks <> 99 - GROUP BY - gestation_weeks - ORDER BY - gestation_weeks - """ - df = client.query(sql).to_dataframe() - # [END bigquery_jupyter_query_births_by_gestation] - - # [START bigquery_jupyter_plot_births_by_gestation] - ax = df.plot(kind="bar", x="gestation_weeks", y="count", figsize=(15, 7)) - ax.set_title("Count of Births by Gestation Weeks") - ax.set_xlabel("Gestation Weeks") - ax.set_ylabel("Count") - # [END bigquery_jupyter_plot_births_by_gestation] diff --git a/packages/google-cloud-bigquery/samples/snippets/requirements.txt b/packages/google-cloud-bigquery/samples/snippets/requirements.txt index 72a77ad113e8..f49c7494fd81 100644 --- a/packages/google-cloud-bigquery/samples/snippets/requirements.txt +++ b/packages/google-cloud-bigquery/samples/snippets/requirements.txt @@ -1,17 +1 @@ -db-dtypes==1.1.1 -google-cloud-bigquery==3.11.4 -google-cloud-bigquery-storage==2.22.0 -google-auth-oauthlib==1.0.0 -grpcio==1.57.0 -ipywidgets==8.1.0 -ipython===7.31.1; python_version == '3.7' -ipython===8.0.1; python_version == '3.8' -ipython==8.14.0; python_version >= '3.9' -matplotlib===3.5.3; python_version == '3.7' -matplotlib==3.7.2; python_version >= '3.8' -pandas===1.3.5; python_version == '3.7' -pandas==2.0.3; python_version >= '3.8' -pyarrow==12.0.1; python_version == '3.7' -pyarrow==14.0.1; python_version >= '3.8' -pytz==2023.3 -typing-extensions==4.7.1 \ No newline at end of file +google-cloud-bigquery==3.11.4 \ No newline at end of file diff --git a/packages/google-cloud-bigquery/samples/snippets/user_credentials.py b/packages/google-cloud-bigquery/samples/snippets/user_credentials.py deleted file mode 100644 index 487a56c5ff9c..000000000000 --- a/packages/google-cloud-bigquery/samples/snippets/user_credentials.py +++ /dev/null @@ -1,78 +0,0 @@ -#!/usr/bin/env python - -# Copyright 2017 Google Inc. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -"""Command-line application to run a query using user credentials. - -You must supply a client secrets file, which would normally be bundled with -your application. -""" - -import argparse - - -def main(project: str) -> None: - # [START bigquery_auth_user_flow] - from google_auth_oauthlib import flow - - # A local server is used as the callback URL in the auth flow. - appflow = flow.InstalledAppFlow.from_client_secrets_file( - "client_secrets.json", scopes=["https://www.googleapis.com/auth/bigquery"] - ) - - # This launches a local server to be used as the callback URL in the desktop - # app auth flow. If you are accessing the application remotely, such as over - # SSH or a remote Jupyter notebook, this flow will not work. Use the - # `gcloud auth application-default login --no-browser` command or workload - # identity federation to get authentication tokens, instead. - # - appflow.run_local_server() - - credentials = appflow.credentials - # [END bigquery_auth_user_flow] - - # [START bigquery_auth_user_query] - from google.cloud import bigquery - - # TODO: Uncomment the line below to set the `project` variable. - # project = 'user-project-id' - # - # The `project` variable defines the project to be billed for query - # processing. The user must have the bigquery.jobs.create permission on - # this project to run a query. See: - # https://cloud.google.com/bigquery/docs/access-control#permissions - - client = bigquery.Client(project=project, credentials=credentials) - - query_string = """SELECT name, SUM(number) as total - FROM `bigquery-public-data.usa_names.usa_1910_current` - WHERE name = 'William' - GROUP BY name; - """ - query_job = client.query(query_string) - - # Print the results. - for row in query_job.result(): # Wait for the job to complete. - print("{}: {}".format(row["name"], row["total"])) - # [END bigquery_auth_user_query] - - -if __name__ == "__main__": - parser = argparse.ArgumentParser( - description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter - ) - parser.add_argument("project", help="Project to use for BigQuery billing.") - args = parser.parse_args() - main(args.project) diff --git a/packages/google-cloud-bigquery/samples/snippets/user_credentials_test.py b/packages/google-cloud-bigquery/samples/snippets/user_credentials_test.py deleted file mode 100644 index 8448187def4c..000000000000 --- a/packages/google-cloud-bigquery/samples/snippets/user_credentials_test.py +++ /dev/null @@ -1,45 +0,0 @@ -# Copyright 2017 Google Inc. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import os -from typing import Iterator, Union - -import google.auth -import mock -import pytest - -from user_credentials import main # type: ignore - -PROJECT = os.environ["GOOGLE_CLOUD_PROJECT"] - -MockType = Union[mock.mock.MagicMock, mock.mock.AsyncMock] - - -@pytest.fixture -def mock_flow() -> Iterator[MockType]: - flow_patch = mock.patch("google_auth_oauthlib.flow.InstalledAppFlow", autospec=True) - - with flow_patch as flow_mock: - flow_mock.from_client_secrets_file.return_value = flow_mock - flow_mock.credentials = google.auth.default()[0] - yield flow_mock - - -def test_auth_query_console( - mock_flow: MockType, capsys: pytest.CaptureFixture[str] -) -> None: - main(PROJECT) - out, _ = capsys.readouterr() - # Fun fact: William P. Wood was the 1st director of the US Secret Service. - assert "William" in out From 13439b359bc549c69921733dbe09177b3cc35d33 Mon Sep 17 00:00:00 2001 From: Kira Date: Wed, 22 Nov 2023 13:39:50 -0800 Subject: [PATCH 1678/2016] fix: move grpc, proto-plus and protobuf packages to extras (#1721) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * chore: move grpc, proto-plus and protobuff packages to extras * formatted with black * feat: add `job_timeout_ms` to job configuration classes (#1675) * fix: adds new property and tests * 🦉 Updates from OwlBot post-processor See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md * updates docs to correct a sphinx failure * Updates formatting * Update tests/system/test_query.py * 🦉 Updates from OwlBot post-processor See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md * Update google/cloud/bigquery/job/base.py * updates one test and uses int_or_none * Update tests/system/test_query.py testing something. * Update tests/system/test_query.py * testing coverage feature * 🦉 Updates from OwlBot post-processor See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md * minor edits * tweaks to noxfile for testing purposes * add new test to base as experiment * adds a test, updates import statements * add another test * edit to tests * formatting fixes * update noxfile to correct debug code * removes unneeded comments. --------- Co-authored-by: Owl Bot --------- Co-authored-by: Chalmer Lowe Co-authored-by: Owl Bot Co-authored-by: Tim Swast --- packages/google-cloud-bigquery/setup.py | 10 +++++----- .../tests/unit/test_legacy_types.py | 8 ++++++++ 2 files changed, 13 insertions(+), 5 deletions(-) diff --git a/packages/google-cloud-bigquery/setup.py b/packages/google-cloud-bigquery/setup.py index 4e87b3b84e61..ead602e12da3 100644 --- a/packages/google-cloud-bigquery/setup.py +++ b/packages/google-cloud-bigquery/setup.py @@ -29,20 +29,16 @@ # 'Development Status :: 5 - Production/Stable' release_status = "Development Status :: 5 - Production/Stable" dependencies = [ - "grpcio >= 1.47.0, < 2.0dev", # https://github.com/googleapis/python-bigquery/issues/1262 - "grpcio >= 1.49.1, < 2.0dev; python_version>='3.11'", # NOTE: Maintainers, please do not require google-api-core>=2.x.x # Until this issue is closed # https://github.com/googleapis/google-cloud-python/issues/10566 - "google-api-core[grpc] >= 1.31.5, <3.0.0dev,!=2.0.*,!=2.1.*,!=2.2.*,!=2.3.0", - "proto-plus >= 1.15.0, <2.0.0dev", + "google-api-core >= 1.31.5, <3.0.0dev,!=2.0.*,!=2.1.*,!=2.2.*,!=2.3.0", # NOTE: Maintainers, please do not require google-cloud-core>=2.x.x # Until this issue is closed # https://github.com/googleapis/google-cloud-python/issues/10566 "google-cloud-core >= 1.6.0, <3.0.0dev", "google-resumable-media >= 0.6.0, < 3.0dev", "packaging >= 20.0.0", - "protobuf>=3.19.5,<5.0.0dev,!=3.20.0,!=3.20.1,!=4.21.0,!=4.21.1,!=4.21.2,!=4.21.3,!=4.21.4,!=4.21.5", # For the legacy proto-based types. "python-dateutil >= 2.7.2, <3.0dev", "requests >= 2.21.0, < 3.0.0dev", ] @@ -82,6 +78,10 @@ "opentelemetry-sdk >= 1.1.0", "opentelemetry-instrumentation >= 0.20b0", ], + "bigquery_v2": [ + "proto-plus >= 1.15.0, <2.0.0dev", + "protobuf>=3.19.5,<5.0.0dev,!=3.20.0,!=3.20.1,!=4.21.0,!=4.21.1,!=4.21.2,!=4.21.3,!=4.21.4,!=4.21.5", # For the legacy proto-based types. + ], } all_extras = [] diff --git a/packages/google-cloud-bigquery/tests/unit/test_legacy_types.py b/packages/google-cloud-bigquery/tests/unit/test_legacy_types.py index 3f51cc5114db..3431074fd83f 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_legacy_types.py +++ b/packages/google-cloud-bigquery/tests/unit/test_legacy_types.py @@ -13,9 +13,17 @@ # See the License for the specific language governing permissions and # limitations under the License. +import pytest + import warnings +try: + import proto # type: ignore +except ImportError: # pragma: NO COVER + proto = None + +@pytest.mark.skipif(proto is None, reason="proto is not installed") def test_importing_legacy_types_emits_warning(): with warnings.catch_warnings(record=True) as warned: from google.cloud.bigquery_v2 import types # noqa: F401 From b1f6c9e197f9c714ea0224659ab1d35f34d23dff Mon Sep 17 00:00:00 2001 From: Kira Date: Tue, 28 Nov 2023 13:47:06 -0800 Subject: [PATCH 1679/2016] feat: removed pkg_resources from all test files and moved importlib into pandas extra (#1726) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * feat: Introduce compatibility with native namespace packages * Update copyright year * removed pkg_resources from all test files and moved importlib into pandas extra * feat: removed pkg_resources from all test files and moved importlib into pandas extra * Adding no cover tag to test code * reformatted with black * undo revert * perf: use the first page a results when `query(api_method="QUERY")` (#1723) * perf: use the first page a results when `query(api_method="QUERY")` * add tests * respect max_results with cached page * respect page_size, also avoid bqstorage if almost fully downloaded * skip true test if bqstorage not installed * coverage * fix: ensure query job retry has longer deadline than API request deadline (#1734) In cases where we can't disambiguate API failure from job failure, this ensures we can still retry the job at least once. * fix: `load_table_from_dataframe` now assumes there may be local null values (#1735) Even if the remote schema is REQUIRED Thank you for opening a Pull Request! Before submitting your PR, there are a few things you can do to make sure it goes smoothly: - [ ] Make sure to open an issue as a [bug/issue](https://togithub.com/googleapis/python-bigquery/issues/new/choose) before writing your code! That way we can discuss the change, evaluate designs, and agree on the general idea - [ ] Ensure the tests and linter pass - [ ] Code coverage does not decrease (if any source code was changed) - [ ] Appropriate docs were updated (if necessary) Fixes #1692 🦕 * chore: standardize samples directory - delete unneeded dependencies (#1732) * chore: standardize samples directory = delete unneeded dependencies * Removed unused import for linter * fix: move grpc, proto-plus and protobuf packages to extras (#1721) * chore: move grpc, proto-plus and protobuff packages to extras * formatted with black * feat: add `job_timeout_ms` to job configuration classes (#1675) * fix: adds new property and tests * 🦉 Updates from OwlBot post-processor See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md * updates docs to correct a sphinx failure * Updates formatting * Update tests/system/test_query.py * 🦉 Updates from OwlBot post-processor See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md * Update google/cloud/bigquery/job/base.py * updates one test and uses int_or_none * Update tests/system/test_query.py testing something. * Update tests/system/test_query.py * testing coverage feature * 🦉 Updates from OwlBot post-processor See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md * minor edits * tweaks to noxfile for testing purposes * add new test to base as experiment * adds a test, updates import statements * add another test * edit to tests * formatting fixes * update noxfile to correct debug code * removes unneeded comments. --------- Co-authored-by: Owl Bot --------- Co-authored-by: Chalmer Lowe Co-authored-by: Owl Bot Co-authored-by: Tim Swast * remove unnecessary version checks * undo bad commit, remove unneeded version checks * Revert "undo bad commit, remove unneeded version checks" This reverts commit 5c82dcf85f5fd73ad37ab7805b85b88a65351801. * Revert "remove unnecessary version checks" This reverts commit 9331a7e034de1c651c5ebc454f38f602d612a03d. * revert bad changes, remove pkg_resources from file * after clarification, reimplement changes and ignore 3.12 tests * reformatted with black * removed minimum check * updated pandas installed version check --------- Co-authored-by: Anthonios Partheniou Co-authored-by: Tim Swast Co-authored-by: Chalmer Lowe Co-authored-by: Owl Bot --- packages/google-cloud-bigquery/.coveragerc | 5 --- .../google-cloud-bigquery/google/__init__.py | 24 ------------ .../google/cloud/__init__.py | 24 ------------ packages/google-cloud-bigquery/noxfile.py | 5 ++- packages/google-cloud-bigquery/setup.py | 10 +---- .../tests/system/test_pandas.py | 23 +++++------- .../tests/unit/job/test_query_pandas.py | 14 ++++--- .../tests/unit/test__pandas_helpers.py | 32 +++++----------- .../tests/unit/test_client.py | 23 +++++------- .../tests/unit/test_packaging.py | 37 +++++++++++++++++++ .../tests/unit/test_table.py | 18 ++++----- .../tests/unit/test_table_pandas.py | 14 ++++--- 12 files changed, 96 insertions(+), 133 deletions(-) delete mode 100644 packages/google-cloud-bigquery/google/__init__.py delete mode 100644 packages/google-cloud-bigquery/google/cloud/__init__.py create mode 100644 packages/google-cloud-bigquery/tests/unit/test_packaging.py diff --git a/packages/google-cloud-bigquery/.coveragerc b/packages/google-cloud-bigquery/.coveragerc index 1ed1a9704f83..04092257a47f 100644 --- a/packages/google-cloud-bigquery/.coveragerc +++ b/packages/google-cloud-bigquery/.coveragerc @@ -12,8 +12,3 @@ exclude_lines = pragma: NO COVER # Ignore debug-only repr def __repr__ - # Ignore pkg_resources exceptions. - # This is added at the module level as a safeguard for if someone - # generates the code and tries to run it without pip installing. This - # makes it virtually impossible to test properly. - except pkg_resources.DistributionNotFound diff --git a/packages/google-cloud-bigquery/google/__init__.py b/packages/google-cloud-bigquery/google/__init__.py deleted file mode 100644 index 8e60d8439d0d..000000000000 --- a/packages/google-cloud-bigquery/google/__init__.py +++ /dev/null @@ -1,24 +0,0 @@ -# -*- coding: utf-8 -*- -# -# Copyright 2019 Google LLC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -try: - import pkg_resources - - pkg_resources.declare_namespace(__name__) -except ImportError: - import pkgutil - - __path__ = pkgutil.extend_path(__path__, __name__) # type: ignore diff --git a/packages/google-cloud-bigquery/google/cloud/__init__.py b/packages/google-cloud-bigquery/google/cloud/__init__.py deleted file mode 100644 index 8e60d8439d0d..000000000000 --- a/packages/google-cloud-bigquery/google/cloud/__init__.py +++ /dev/null @@ -1,24 +0,0 @@ -# -*- coding: utf-8 -*- -# -# Copyright 2019 Google LLC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -try: - import pkg_resources - - pkg_resources.declare_namespace(__name__) -except ImportError: - import pkgutil - - __path__ = pkgutil.extend_path(__path__, __name__) # type: ignore diff --git a/packages/google-cloud-bigquery/noxfile.py b/packages/google-cloud-bigquery/noxfile.py index 9ccbdd30ceb2..2469527287f8 100644 --- a/packages/google-cloud-bigquery/noxfile.py +++ b/packages/google-cloud-bigquery/noxfile.py @@ -137,7 +137,7 @@ def mypy(session): "types-requests", "types-setuptools", ) - session.run("mypy", "google/cloud", "--show-traceback") + session.run("mypy", "-p", "google", "--show-traceback") @nox.session(python=DEFAULT_PYTHON_VERSION) @@ -149,7 +149,8 @@ def pytype(session): session.install("attrs==20.3.0") session.install("-e", ".[all]") session.install(PYTYPE_VERSION) - session.run("pytype") + # See https://github.com/google/pytype/issues/464 + session.run("pytype", "-P", ".", "google/cloud/bigquery") @nox.session(python=SYSTEM_TEST_PYTHON_VERSIONS) diff --git a/packages/google-cloud-bigquery/setup.py b/packages/google-cloud-bigquery/setup.py index ead602e12da3..f21bb586db5b 100644 --- a/packages/google-cloud-bigquery/setup.py +++ b/packages/google-cloud-bigquery/setup.py @@ -62,6 +62,7 @@ "pandas>=1.1.0", pyarrow_dependency, "db-dtypes>=0.3.0,<2.0.0dev", + "importlib_metadata>=1.0.0; python_version<'3.8'", ], "ipywidgets": [ "ipywidgets>=7.7.0", @@ -108,16 +109,10 @@ # benchmarks, etc. packages = [ package - for package in setuptools.PEP420PackageFinder.find() + for package in setuptools.find_namespace_packages() if package.startswith("google") ] -# Determine which namespaces are needed. -namespaces = ["google"] -if "google.cloud" in packages: - namespaces.append("google.cloud") - - setuptools.setup( name=name, version=version, @@ -143,7 +138,6 @@ ], platforms="Posix; MacOS X; Windows", packages=packages, - namespace_packages=namespaces, install_requires=dependencies, extras_require=extras, python_requires=">=3.7", diff --git a/packages/google-cloud-bigquery/tests/system/test_pandas.py b/packages/google-cloud-bigquery/tests/system/test_pandas.py index 9f7fc242e465..e93f245c080c 100644 --- a/packages/google-cloud-bigquery/tests/system/test_pandas.py +++ b/packages/google-cloud-bigquery/tests/system/test_pandas.py @@ -23,9 +23,13 @@ import warnings import google.api_core.retry -import pkg_resources import pytest +try: + import importlib.metadata as metadata +except ImportError: + import importlib_metadata as metadata + from google.cloud import bigquery from google.cloud.bigquery import enums @@ -42,11 +46,9 @@ ) if pandas is not None: - PANDAS_INSTALLED_VERSION = pkg_resources.get_distribution("pandas").parsed_version + PANDAS_INSTALLED_VERSION = metadata.version("pandas") else: - PANDAS_INSTALLED_VERSION = pkg_resources.parse_version("0.0.0") - -PANDAS_INT64_VERSION = pkg_resources.parse_version("1.0.0") + PANDAS_INSTALLED_VERSION = "0.0.0" class MissingDataError(Exception): @@ -310,10 +312,7 @@ def test_load_table_from_dataframe_w_automatic_schema(bigquery_client, dataset_i ] -@pytest.mark.skipif( - PANDAS_INSTALLED_VERSION < PANDAS_INT64_VERSION, - reason="Only `pandas version >=1.0.0` is supported", -) +@pytest.mark.skipif(pandas is None, reason="Requires `pandas`") def test_load_table_from_dataframe_w_nullable_int64_datatype( bigquery_client, dataset_id ): @@ -342,7 +341,7 @@ def test_load_table_from_dataframe_w_nullable_int64_datatype( @pytest.mark.skipif( - PANDAS_INSTALLED_VERSION < PANDAS_INT64_VERSION, + PANDAS_INSTALLED_VERSION[0:2].startswith("0."), reason="Only `pandas version >=1.0.0` is supported", ) def test_load_table_from_dataframe_w_nullable_int64_datatype_automatic_schema( @@ -1043,9 +1042,7 @@ def test_list_rows_max_results_w_bqstorage(bigquery_client): assert len(dataframe.index) == 100 -@pytest.mark.skipif( - PANDAS_INSTALLED_VERSION >= pkg_resources.parse_version("2.0.0"), reason="" -) +@pytest.mark.skipif(PANDAS_INSTALLED_VERSION[0:2] not in ["0.", "1."], reason="") @pytest.mark.parametrize( ("max_results",), ( diff --git a/packages/google-cloud-bigquery/tests/unit/job/test_query_pandas.py b/packages/google-cloud-bigquery/tests/unit/job/test_query_pandas.py index 0accae0a2028..6189830ffabe 100644 --- a/packages/google-cloud-bigquery/tests/unit/job/test_query_pandas.py +++ b/packages/google-cloud-bigquery/tests/unit/job/test_query_pandas.py @@ -17,7 +17,6 @@ import json import mock -import pkg_resources import pytest @@ -45,14 +44,19 @@ except (ImportError, AttributeError): # pragma: NO COVER tqdm = None +try: + import importlib.metadata as metadata +except ImportError: + import importlib_metadata as metadata + from ..helpers import make_connection from .helpers import _make_client from .helpers import _make_job_resource if pandas is not None: - PANDAS_INSTALLED_VERSION = pkg_resources.get_distribution("pandas").parsed_version + PANDAS_INSTALLED_VERSION = metadata.version("pandas") else: - PANDAS_INSTALLED_VERSION = pkg_resources.parse_version("0.0.0") + PANDAS_INSTALLED_VERSION = "0.0.0" pandas = pytest.importorskip("pandas") @@ -656,9 +660,7 @@ def test_to_dataframe_bqstorage_no_pyarrow_compression(): ) -@pytest.mark.skipif( - PANDAS_INSTALLED_VERSION >= pkg_resources.parse_version("2.0.0"), reason="" -) +@pytest.mark.skipif(PANDAS_INSTALLED_VERSION[0:2] not in ["0.", "1."], reason="") @pytest.mark.skipif(pyarrow is None, reason="Requires `pyarrow`") def test_to_dataframe_column_dtypes(): from google.cloud.bigquery.job import QueryJob as target_class diff --git a/packages/google-cloud-bigquery/tests/unit/test__pandas_helpers.py b/packages/google-cloud-bigquery/tests/unit/test__pandas_helpers.py index 1f1b4eeb3802..ad40a6da6eb6 100644 --- a/packages/google-cloud-bigquery/tests/unit/test__pandas_helpers.py +++ b/packages/google-cloud-bigquery/tests/unit/test__pandas_helpers.py @@ -19,7 +19,11 @@ import operator import queue import warnings -import pkg_resources + +try: + import importlib.metadata as metadata +except ImportError: + import importlib_metadata as metadata import mock @@ -57,13 +61,10 @@ bigquery_storage = _versions_helpers.BQ_STORAGE_VERSIONS.try_import() -PANDAS_MINIUM_VERSION = pkg_resources.parse_version("1.0.0") - if pandas is not None: - PANDAS_INSTALLED_VERSION = pkg_resources.get_distribution("pandas").parsed_version + PANDAS_INSTALLED_VERSION = metadata.version("pandas") else: - # Set to less than MIN version. - PANDAS_INSTALLED_VERSION = pkg_resources.parse_version("0.0.0") + PANDAS_INSTALLED_VERSION = "0.0.0" skip_if_no_bignumeric = pytest.mark.skipif( @@ -542,9 +543,7 @@ def test_bq_to_arrow_array_w_nullable_scalars(module_under_test, bq_type, rows): ], ) @pytest.mark.skipif(pandas is None, reason="Requires `pandas`") -@pytest.mark.skipif( - PANDAS_INSTALLED_VERSION >= pkg_resources.parse_version("2.0.0"), reason="" -) +@pytest.mark.skipif(PANDAS_INSTALLED_VERSION[0:2] not in ["0.", "1."], reason="") @pytest.mark.skipif(isinstance(pyarrow, mock.Mock), reason="Requires `pyarrow`") def test_bq_to_arrow_array_w_pandas_timestamp(module_under_test, bq_type, rows): rows = [pandas.Timestamp(row) for row in rows] @@ -806,10 +805,7 @@ def test_list_columns_and_indexes_with_named_index_same_as_column_name( assert columns_and_indexes == expected -@pytest.mark.skipif( - pandas is None or PANDAS_INSTALLED_VERSION < PANDAS_MINIUM_VERSION, - reason="Requires `pandas version >= 1.0.0` which introduces pandas.NA", -) +@pytest.mark.skipif(pandas is None, reason="Requires `pandas`") def test_dataframe_to_json_generator(module_under_test): utcnow = datetime.datetime.utcnow() df_data = collections.OrderedDict( @@ -837,16 +833,8 @@ def test_dataframe_to_json_generator(module_under_test): assert list(rows) == expected +@pytest.mark.skipif(pandas is None, reason="Requires `pandas`") def test_dataframe_to_json_generator_repeated_field(module_under_test): - pytest.importorskip( - "pandas", - minversion=str(PANDAS_MINIUM_VERSION), - reason=( - f"Requires `pandas version >= {PANDAS_MINIUM_VERSION}` " - "which introduces pandas.NA" - ), - ) - df_data = [ collections.OrderedDict( [("repeated_col", [pandas.NA, 2, None, 4]), ("not_repeated_col", "first")] diff --git a/packages/google-cloud-bigquery/tests/unit/test_client.py b/packages/google-cloud-bigquery/tests/unit/test_client.py index af61ceb42a94..ff4c40f48925 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_client.py +++ b/packages/google-cloud-bigquery/tests/unit/test_client.py @@ -30,7 +30,11 @@ import requests import packaging import pytest -import pkg_resources + +try: + import importlib.metadata as metadata +except ImportError: + import importlib_metadata as metadata try: import pandas @@ -76,13 +80,10 @@ from test_utils.imports import maybe_fail_import from tests.unit.helpers import make_connection -PANDAS_MINIUM_VERSION = pkg_resources.parse_version("1.0.0") - if pandas is not None: - PANDAS_INSTALLED_VERSION = pkg_resources.get_distribution("pandas").parsed_version + PANDAS_INSTALLED_VERSION = metadata.version("pandas") else: - # Set to less than MIN version. - PANDAS_INSTALLED_VERSION = pkg_resources.parse_version("0.0.0") + PANDAS_INSTALLED_VERSION = "0.0.0" def _make_credentials(): @@ -8145,10 +8146,7 @@ def test_load_table_from_dataframe_unknown_table(self): timeout=DEFAULT_TIMEOUT, ) - @unittest.skipIf( - pandas is None or PANDAS_INSTALLED_VERSION < PANDAS_MINIUM_VERSION, - "Only `pandas version >=1.0.0` supported", - ) + @unittest.skipIf(pandas is None, "Requires `pandas`") @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_load_table_from_dataframe_w_nullable_int64_datatype(self): from google.cloud.bigquery.client import _DEFAULT_NUM_RETRIES @@ -8193,10 +8191,7 @@ def test_load_table_from_dataframe_w_nullable_int64_datatype(self): SchemaField("x", "INT64", "NULLABLE", None), ) - @unittest.skipIf( - pandas is None or PANDAS_INSTALLED_VERSION < PANDAS_MINIUM_VERSION, - "Only `pandas version >=1.0.0` supported", - ) + @unittest.skipIf(pandas is None, "Requires `pandas`") # @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_load_table_from_dataframe_w_nullable_int64_datatype_automatic_schema(self): from google.cloud.bigquery.client import _DEFAULT_NUM_RETRIES diff --git a/packages/google-cloud-bigquery/tests/unit/test_packaging.py b/packages/google-cloud-bigquery/tests/unit/test_packaging.py new file mode 100644 index 000000000000..6f1b16c66750 --- /dev/null +++ b/packages/google-cloud-bigquery/tests/unit/test_packaging.py @@ -0,0 +1,37 @@ +# Copyright 2023 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import subprocess +import sys + + +def test_namespace_package_compat(tmp_path): + # The ``google`` namespace package should not be masked + # by the presence of ``google-cloud-bigquery``. + google = tmp_path / "google" + google.mkdir() + google.joinpath("othermod.py").write_text("") + env = dict(os.environ, PYTHONPATH=str(tmp_path)) + cmd = [sys.executable, "-m", "google.othermod"] + subprocess.check_call(cmd, env=env) + + # The ``google.cloud`` namespace package should not be masked + # by the presence of ``google-cloud-bigquery``. + google_cloud = tmp_path / "google" / "cloud" + google_cloud.mkdir() + google_cloud.joinpath("othermod.py").write_text("") + env = dict(os.environ, PYTHONPATH=str(tmp_path)) + cmd = [sys.executable, "-m", "google.cloud.othermod"] + subprocess.check_call(cmd, env=env) diff --git a/packages/google-cloud-bigquery/tests/unit/test_table.py b/packages/google-cloud-bigquery/tests/unit/test_table.py index 05ad8de6eefb..85f335dd161b 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_table.py +++ b/packages/google-cloud-bigquery/tests/unit/test_table.py @@ -22,9 +22,13 @@ import warnings import mock -import pkg_resources import pytest +try: + import importlib.metadata as metadata +except ImportError: + import importlib_metadata as metadata + import google.api_core.exceptions from test_utils.imports import maybe_fail_import @@ -71,9 +75,9 @@ tqdm = None if pandas is not None: - PANDAS_INSTALLED_VERSION = pkg_resources.get_distribution("pandas").parsed_version + PANDAS_INSTALLED_VERSION = metadata.version("pandas") else: - PANDAS_INSTALLED_VERSION = pkg_resources.parse_version("0.0.0") + PANDAS_INSTALLED_VERSION = "0.0.0" def _mock_client(): @@ -3793,9 +3797,7 @@ def test_to_dataframe_w_dtypes_mapper(self): self.assertEqual(df.timestamp.dtype.name, "object") @unittest.skipIf(pandas is None, "Requires `pandas`") - @pytest.mark.skipif( - PANDAS_INSTALLED_VERSION >= pkg_resources.parse_version("2.0.0"), reason="" - ) + @pytest.mark.skipif(PANDAS_INSTALLED_VERSION[0:2] not in ["0.", "1."], reason="") def test_to_dataframe_w_none_dtypes_mapper(self): from google.cloud.bigquery.schema import SchemaField @@ -3908,9 +3910,7 @@ def test_to_dataframe_w_unsupported_dtypes_mapper(self): ) @unittest.skipIf(pandas is None, "Requires `pandas`") - @pytest.mark.skipif( - PANDAS_INSTALLED_VERSION >= pkg_resources.parse_version("2.0.0"), reason="" - ) + @pytest.mark.skipif(PANDAS_INSTALLED_VERSION[0:2] not in ["0.", "1."], reason="") def test_to_dataframe_column_dtypes(self): from google.cloud.bigquery.schema import SchemaField diff --git a/packages/google-cloud-bigquery/tests/unit/test_table_pandas.py b/packages/google-cloud-bigquery/tests/unit/test_table_pandas.py index 6970d9d65280..b38568561be9 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_table_pandas.py +++ b/packages/google-cloud-bigquery/tests/unit/test_table_pandas.py @@ -15,7 +15,11 @@ import datetime import decimal from unittest import mock -import pkg_resources + +try: + import importlib.metadata as metadata +except ImportError: + import importlib_metadata as metadata import pytest @@ -28,9 +32,9 @@ TEST_PATH = "/v1/project/test-proj/dataset/test-dset/table/test-tbl/data" if pandas is not None: # pragma: NO COVER - PANDAS_INSTALLED_VERSION = pkg_resources.get_distribution("pandas").parsed_version + PANDAS_INSTALLED_VERSION = metadata.version("pandas") else: # pragma: NO COVER - PANDAS_INSTALLED_VERSION = pkg_resources.parse_version("0.0.0") + PANDAS_INSTALLED_VERSION = "0.0.0" @pytest.fixture @@ -40,9 +44,7 @@ def class_under_test(): return RowIterator -@pytest.mark.skipif( - PANDAS_INSTALLED_VERSION >= pkg_resources.parse_version("2.0.0"), reason="" -) +@pytest.mark.skipif(PANDAS_INSTALLED_VERSION[0:2] not in ["0.", "1."], reason="") def test_to_dataframe_nullable_scalars(monkeypatch, class_under_test): # See tests/system/test_arrow.py for the actual types we get from the API. arrow_schema = pyarrow.schema( From 4d66a3e09c6699cf4db250c614a35ee0739551d5 Mon Sep 17 00:00:00 2001 From: "gcf-owl-bot[bot]" <78513119+gcf-owl-bot[bot]@users.noreply.github.com> Date: Fri, 1 Dec 2023 13:46:17 -0800 Subject: [PATCH 1680/2016] feat: Add support for Python 3.12 (#1736) * chore(python): Add Python 3.12 Source-Link: https://github.com/googleapis/synthtool/commit/af16e6d4672cc7b400f144de2fc3068b54ff47d2 Post-Processor: gcr.io/cloud-devrel-public-resources/owlbot-python:latest@sha256:bacc3af03bff793a03add584537b36b5644342931ad989e3ba1171d3bd5399f5 * add trove classifier for python 3.12 * update kokoro configs * Add python 3.12 to noxfile.py * update georaphy sample requirements * update geography samples requirements * add testing constraint file for 3.12 * remove repr test --------- Co-authored-by: Owl Bot Co-authored-by: Anthonios Partheniou Co-authored-by: Linchin --- .../.github/.OwlBot.lock.yaml | 4 +- ...deps-3.11.cfg => prerelease-deps-3.12.cfg} | 2 +- .../{snippets-3.11.cfg => snippets-3.12.cfg} | 2 +- .../{system-3.11.cfg => system-3.12.cfg} | 2 +- .../.kokoro/samples/python3.12/common.cfg | 40 +++++++++++++++++++ .../.kokoro/samples/python3.12/continuous.cfg | 6 +++ .../samples/python3.12/periodic-head.cfg | 11 +++++ .../.kokoro/samples/python3.12/periodic.cfg | 6 +++ .../.kokoro/samples/python3.12/presubmit.cfg | 6 +++ .../google-cloud-bigquery/CONTRIBUTING.rst | 6 ++- packages/google-cloud-bigquery/noxfile.py | 10 ++--- .../samples/desktopapp/noxfile.py | 2 +- .../samples/geography/noxfile.py | 2 +- .../samples/geography/requirements.txt | 14 ++++--- .../samples/magics/noxfile.py | 2 +- .../samples/notebooks/noxfile.py | 2 +- .../samples/snippets/noxfile.py | 2 +- packages/google-cloud-bigquery/setup.py | 1 + .../testing/constraints-3.12.txt | 0 .../tests/unit/test_query.py | 1 - 20 files changed, 97 insertions(+), 24 deletions(-) rename packages/google-cloud-bigquery/.kokoro/presubmit/{prerelease-deps-3.11.cfg => prerelease-deps-3.12.cfg} (77%) rename packages/google-cloud-bigquery/.kokoro/presubmit/{snippets-3.11.cfg => snippets-3.12.cfg} (81%) rename packages/google-cloud-bigquery/.kokoro/presubmit/{system-3.11.cfg => system-3.12.cfg} (82%) create mode 100644 packages/google-cloud-bigquery/.kokoro/samples/python3.12/common.cfg create mode 100644 packages/google-cloud-bigquery/.kokoro/samples/python3.12/continuous.cfg create mode 100644 packages/google-cloud-bigquery/.kokoro/samples/python3.12/periodic-head.cfg create mode 100644 packages/google-cloud-bigquery/.kokoro/samples/python3.12/periodic.cfg create mode 100644 packages/google-cloud-bigquery/.kokoro/samples/python3.12/presubmit.cfg create mode 100644 packages/google-cloud-bigquery/testing/constraints-3.12.txt diff --git a/packages/google-cloud-bigquery/.github/.OwlBot.lock.yaml b/packages/google-cloud-bigquery/.github/.OwlBot.lock.yaml index 453b540c1e58..eb4d9f794dc1 100644 --- a/packages/google-cloud-bigquery/.github/.OwlBot.lock.yaml +++ b/packages/google-cloud-bigquery/.github/.OwlBot.lock.yaml @@ -13,5 +13,5 @@ # limitations under the License. docker: image: gcr.io/cloud-devrel-public-resources/owlbot-python:latest - digest: sha256:caffe0a9277daeccc4d1de5c9b55ebba0901b57c2f713ec9c876b0d4ec064f61 -# created: 2023-11-08T19:46:45.022803742Z + digest: sha256:bacc3af03bff793a03add584537b36b5644342931ad989e3ba1171d3bd5399f5 +# created: 2023-11-23T18:17:28.105124211Z diff --git a/packages/google-cloud-bigquery/.kokoro/presubmit/prerelease-deps-3.11.cfg b/packages/google-cloud-bigquery/.kokoro/presubmit/prerelease-deps-3.12.cfg similarity index 77% rename from packages/google-cloud-bigquery/.kokoro/presubmit/prerelease-deps-3.11.cfg rename to packages/google-cloud-bigquery/.kokoro/presubmit/prerelease-deps-3.12.cfg index 1e19f1239870..ece962a1703e 100644 --- a/packages/google-cloud-bigquery/.kokoro/presubmit/prerelease-deps-3.11.cfg +++ b/packages/google-cloud-bigquery/.kokoro/presubmit/prerelease-deps-3.12.cfg @@ -3,5 +3,5 @@ # Only run this nox session. env_vars: { key: "NOX_SESSION" - value: "prerelease_deps-3.11" + value: "prerelease_deps-3.12" } diff --git a/packages/google-cloud-bigquery/.kokoro/presubmit/snippets-3.11.cfg b/packages/google-cloud-bigquery/.kokoro/presubmit/snippets-3.12.cfg similarity index 81% rename from packages/google-cloud-bigquery/.kokoro/presubmit/snippets-3.11.cfg rename to packages/google-cloud-bigquery/.kokoro/presubmit/snippets-3.12.cfg index 74af6dd075af..1381e832349b 100644 --- a/packages/google-cloud-bigquery/.kokoro/presubmit/snippets-3.11.cfg +++ b/packages/google-cloud-bigquery/.kokoro/presubmit/snippets-3.12.cfg @@ -3,5 +3,5 @@ # Only run this nox session. env_vars: { key: "NOX_SESSION" - value: "snippets-3.11" + value: "snippets-3.12" } diff --git a/packages/google-cloud-bigquery/.kokoro/presubmit/system-3.11.cfg b/packages/google-cloud-bigquery/.kokoro/presubmit/system-3.12.cfg similarity index 82% rename from packages/google-cloud-bigquery/.kokoro/presubmit/system-3.11.cfg rename to packages/google-cloud-bigquery/.kokoro/presubmit/system-3.12.cfg index 5ff31a60322b..789455bd6973 100644 --- a/packages/google-cloud-bigquery/.kokoro/presubmit/system-3.11.cfg +++ b/packages/google-cloud-bigquery/.kokoro/presubmit/system-3.12.cfg @@ -3,5 +3,5 @@ # Only run this nox session. env_vars: { key: "NOX_SESSION" - value: "system-3.11" + value: "system-3.12" } diff --git a/packages/google-cloud-bigquery/.kokoro/samples/python3.12/common.cfg b/packages/google-cloud-bigquery/.kokoro/samples/python3.12/common.cfg new file mode 100644 index 000000000000..6eb699edd456 --- /dev/null +++ b/packages/google-cloud-bigquery/.kokoro/samples/python3.12/common.cfg @@ -0,0 +1,40 @@ +# Format: //devtools/kokoro/config/proto/build.proto + +# Build logs will be here +action { + define_artifacts { + regex: "**/*sponge_log.xml" + } +} + +# Specify which tests to run +env_vars: { + key: "RUN_TESTS_SESSION" + value: "py-3.12" +} + +# Declare build specific Cloud project. +env_vars: { + key: "BUILD_SPECIFIC_GCLOUD_PROJECT" + value: "python-docs-samples-tests-312" +} + +env_vars: { + key: "TRAMPOLINE_BUILD_FILE" + value: "github/python-bigquery/.kokoro/test-samples.sh" +} + +# Configure the docker image for kokoro-trampoline. +env_vars: { + key: "TRAMPOLINE_IMAGE" + value: "gcr.io/cloud-devrel-kokoro-resources/python-samples-testing-docker" +} + +# Download secrets for samples +gfile_resources: "/bigstore/cloud-devrel-kokoro-resources/python-docs-samples" + +# Download trampoline resources. +gfile_resources: "/bigstore/cloud-devrel-kokoro-resources/trampoline" + +# Use the trampoline script to run in docker. +build_file: "python-bigquery/.kokoro/trampoline_v2.sh" \ No newline at end of file diff --git a/packages/google-cloud-bigquery/.kokoro/samples/python3.12/continuous.cfg b/packages/google-cloud-bigquery/.kokoro/samples/python3.12/continuous.cfg new file mode 100644 index 000000000000..a1c8d9759c88 --- /dev/null +++ b/packages/google-cloud-bigquery/.kokoro/samples/python3.12/continuous.cfg @@ -0,0 +1,6 @@ +# Format: //devtools/kokoro/config/proto/build.proto + +env_vars: { + key: "INSTALL_LIBRARY_FROM_SOURCE" + value: "True" +} \ No newline at end of file diff --git a/packages/google-cloud-bigquery/.kokoro/samples/python3.12/periodic-head.cfg b/packages/google-cloud-bigquery/.kokoro/samples/python3.12/periodic-head.cfg new file mode 100644 index 000000000000..5aa01bab5bf3 --- /dev/null +++ b/packages/google-cloud-bigquery/.kokoro/samples/python3.12/periodic-head.cfg @@ -0,0 +1,11 @@ +# Format: //devtools/kokoro/config/proto/build.proto + +env_vars: { + key: "INSTALL_LIBRARY_FROM_SOURCE" + value: "True" +} + +env_vars: { + key: "TRAMPOLINE_BUILD_FILE" + value: "github/python-bigquery/.kokoro/test-samples-against-head.sh" +} diff --git a/packages/google-cloud-bigquery/.kokoro/samples/python3.12/periodic.cfg b/packages/google-cloud-bigquery/.kokoro/samples/python3.12/periodic.cfg new file mode 100644 index 000000000000..71cd1e597e38 --- /dev/null +++ b/packages/google-cloud-bigquery/.kokoro/samples/python3.12/periodic.cfg @@ -0,0 +1,6 @@ +# Format: //devtools/kokoro/config/proto/build.proto + +env_vars: { + key: "INSTALL_LIBRARY_FROM_SOURCE" + value: "False" +} diff --git a/packages/google-cloud-bigquery/.kokoro/samples/python3.12/presubmit.cfg b/packages/google-cloud-bigquery/.kokoro/samples/python3.12/presubmit.cfg new file mode 100644 index 000000000000..a1c8d9759c88 --- /dev/null +++ b/packages/google-cloud-bigquery/.kokoro/samples/python3.12/presubmit.cfg @@ -0,0 +1,6 @@ +# Format: //devtools/kokoro/config/proto/build.proto + +env_vars: { + key: "INSTALL_LIBRARY_FROM_SOURCE" + value: "True" +} \ No newline at end of file diff --git a/packages/google-cloud-bigquery/CONTRIBUTING.rst b/packages/google-cloud-bigquery/CONTRIBUTING.rst index 5dc30a1f8547..7be61e6b61a5 100644 --- a/packages/google-cloud-bigquery/CONTRIBUTING.rst +++ b/packages/google-cloud-bigquery/CONTRIBUTING.rst @@ -22,7 +22,7 @@ In order to add a feature: documentation. - The feature must work fully on the following CPython versions: - 3.7, 3.8, 3.9, 3.10 and 3.11 on both UNIX and Windows. + 3.7, 3.8, 3.9, 3.10, 3.11 and 3.12 on both UNIX and Windows. - The feature must not add unnecessary dependencies (where "unnecessary" is of course subjective, but new dependencies should @@ -72,7 +72,7 @@ We use `nox `__ to instrument our tests. - To run a single unit test:: - $ nox -s unit-3.11 -- -k + $ nox -s unit-3.12 -- -k .. note:: @@ -226,12 +226,14 @@ We support: - `Python 3.9`_ - `Python 3.10`_ - `Python 3.11`_ +- `Python 3.12`_ .. _Python 3.7: https://docs.python.org/3.7/ .. _Python 3.8: https://docs.python.org/3.8/ .. _Python 3.9: https://docs.python.org/3.9/ .. _Python 3.10: https://docs.python.org/3.10/ .. _Python 3.11: https://docs.python.org/3.11/ +.. _Python 3.12: https://docs.python.org/3.12/ Supported versions can be found in our ``noxfile.py`` `config`_. diff --git a/packages/google-cloud-bigquery/noxfile.py b/packages/google-cloud-bigquery/noxfile.py index 2469527287f8..ab7803040765 100644 --- a/packages/google-cloud-bigquery/noxfile.py +++ b/packages/google-cloud-bigquery/noxfile.py @@ -37,8 +37,8 @@ ) DEFAULT_PYTHON_VERSION = "3.8" -SYSTEM_TEST_PYTHON_VERSIONS = ["3.8", "3.11"] -UNIT_TEST_PYTHON_VERSIONS = ["3.7", "3.8", "3.9", "3.10", "3.11"] +SYSTEM_TEST_PYTHON_VERSIONS = ["3.8", "3.11", "3.12"] +UNIT_TEST_PYTHON_VERSIONS = ["3.7", "3.8", "3.9", "3.10", "3.11", "3.12"] CURRENT_DIRECTORY = pathlib.Path(__file__).parent.absolute() # 'docfx' is excluded since it only needs to run in 'docs-presubmit' @@ -81,7 +81,7 @@ def default(session, install_extras=True): constraints_path, ) - if install_extras and session.python == "3.11": + if install_extras and session.python in ["3.11", "3.12"]: install_target = ".[bqstorage,ipywidgets,pandas,tqdm,opentelemetry]" elif install_extras: install_target = ".[all]" @@ -187,7 +187,7 @@ def system(session): # Data Catalog needed for the column ACL test with a real Policy Tag. session.install("google-cloud-datacatalog", "-c", constraints_path) - if session.python == "3.11": + if session.python in ["3.11", "3.12"]: extras = "[bqstorage,ipywidgets,pandas,tqdm,opentelemetry]" else: extras = "[all]" @@ -251,7 +251,7 @@ def snippets(session): session.install("google-cloud-storage", "-c", constraints_path) session.install("grpcio", "-c", constraints_path) - if session.python == "3.11": + if session.python in ["3.11", "3.12"]: extras = "[bqstorage,ipywidgets,pandas,tqdm,opentelemetry]" else: extras = "[all]" diff --git a/packages/google-cloud-bigquery/samples/desktopapp/noxfile.py b/packages/google-cloud-bigquery/samples/desktopapp/noxfile.py index 1224cbe212e4..3b7135946fd5 100644 --- a/packages/google-cloud-bigquery/samples/desktopapp/noxfile.py +++ b/packages/google-cloud-bigquery/samples/desktopapp/noxfile.py @@ -89,7 +89,7 @@ def get_pytest_env_vars() -> Dict[str, str]: # DO NOT EDIT - automatically generated. # All versions used to test samples. -ALL_VERSIONS = ["3.7", "3.8", "3.9", "3.10", "3.11"] +ALL_VERSIONS = ["3.7", "3.8", "3.9", "3.10", "3.11", "3.12"] # Any default versions that should be ignored. IGNORED_VERSIONS = TEST_CONFIG["ignored_versions"] diff --git a/packages/google-cloud-bigquery/samples/geography/noxfile.py b/packages/google-cloud-bigquery/samples/geography/noxfile.py index 1224cbe212e4..3b7135946fd5 100644 --- a/packages/google-cloud-bigquery/samples/geography/noxfile.py +++ b/packages/google-cloud-bigquery/samples/geography/noxfile.py @@ -89,7 +89,7 @@ def get_pytest_env_vars() -> Dict[str, str]: # DO NOT EDIT - automatically generated. # All versions used to test samples. -ALL_VERSIONS = ["3.7", "3.8", "3.9", "3.10", "3.11"] +ALL_VERSIONS = ["3.7", "3.8", "3.9", "3.10", "3.11", "3.12"] # Any default versions that should be ignored. IGNORED_VERSIONS = TEST_CONFIG["ignored_versions"] diff --git a/packages/google-cloud-bigquery/samples/geography/requirements.txt b/packages/google-cloud-bigquery/samples/geography/requirements.txt index 867f44b3522c..d6cea7ec5799 100644 --- a/packages/google-cloud-bigquery/samples/geography/requirements.txt +++ b/packages/google-cloud-bigquery/samples/geography/requirements.txt @@ -7,10 +7,11 @@ click-plugins==1.1.1 cligj==0.7.2 dataclasses==0.8; python_version < '3.7' db-dtypes==1.1.1 -Fiona==1.9.4.post1 -geojson==3.0.1 +Fiona==1.9.5 +geojson==3.1.0 geopandas===0.10.2; python_version == '3.7' -geopandas==0.13.2; python_version >= '3.8' +geopandas==0.13.2; python_version == '3.8' +geopandas==0.14.1; python_version >= '3.9' google-api-core==2.11.1 google-auth==2.22.0 google-cloud-bigquery==3.11.4 @@ -19,9 +20,10 @@ google-cloud-core==2.3.3 google-crc32c==1.5.0 google-resumable-media==2.5.0 googleapis-common-protos==1.60.0 -grpcio==1.57.0 +grpcio==1.59.0 idna==3.4 -libcst==1.0.1 +libcst==1.0.1; python_version == '3.7' +libcst==1.1.0; python_version >= '3.8' munch==4.0.0 mypy-extensions==1.0.0 packaging==23.1 @@ -39,7 +41,7 @@ pytz==2023.3 PyYAML==6.0.1 requests==2.31.0 rsa==4.9 -Shapely==2.0.1 +Shapely==2.0.2 six==1.16.0 typing-extensions==4.7.1 typing-inspect==0.9.0 diff --git a/packages/google-cloud-bigquery/samples/magics/noxfile.py b/packages/google-cloud-bigquery/samples/magics/noxfile.py index 1224cbe212e4..3b7135946fd5 100644 --- a/packages/google-cloud-bigquery/samples/magics/noxfile.py +++ b/packages/google-cloud-bigquery/samples/magics/noxfile.py @@ -89,7 +89,7 @@ def get_pytest_env_vars() -> Dict[str, str]: # DO NOT EDIT - automatically generated. # All versions used to test samples. -ALL_VERSIONS = ["3.7", "3.8", "3.9", "3.10", "3.11"] +ALL_VERSIONS = ["3.7", "3.8", "3.9", "3.10", "3.11", "3.12"] # Any default versions that should be ignored. IGNORED_VERSIONS = TEST_CONFIG["ignored_versions"] diff --git a/packages/google-cloud-bigquery/samples/notebooks/noxfile.py b/packages/google-cloud-bigquery/samples/notebooks/noxfile.py index 1224cbe212e4..3b7135946fd5 100644 --- a/packages/google-cloud-bigquery/samples/notebooks/noxfile.py +++ b/packages/google-cloud-bigquery/samples/notebooks/noxfile.py @@ -89,7 +89,7 @@ def get_pytest_env_vars() -> Dict[str, str]: # DO NOT EDIT - automatically generated. # All versions used to test samples. -ALL_VERSIONS = ["3.7", "3.8", "3.9", "3.10", "3.11"] +ALL_VERSIONS = ["3.7", "3.8", "3.9", "3.10", "3.11", "3.12"] # Any default versions that should be ignored. IGNORED_VERSIONS = TEST_CONFIG["ignored_versions"] diff --git a/packages/google-cloud-bigquery/samples/snippets/noxfile.py b/packages/google-cloud-bigquery/samples/snippets/noxfile.py index 1224cbe212e4..3b7135946fd5 100644 --- a/packages/google-cloud-bigquery/samples/snippets/noxfile.py +++ b/packages/google-cloud-bigquery/samples/snippets/noxfile.py @@ -89,7 +89,7 @@ def get_pytest_env_vars() -> Dict[str, str]: # DO NOT EDIT - automatically generated. # All versions used to test samples. -ALL_VERSIONS = ["3.7", "3.8", "3.9", "3.10", "3.11"] +ALL_VERSIONS = ["3.7", "3.8", "3.9", "3.10", "3.11", "3.12"] # Any default versions that should be ignored. IGNORED_VERSIONS = TEST_CONFIG["ignored_versions"] diff --git a/packages/google-cloud-bigquery/setup.py b/packages/google-cloud-bigquery/setup.py index f21bb586db5b..9fbc91ecbf8c 100644 --- a/packages/google-cloud-bigquery/setup.py +++ b/packages/google-cloud-bigquery/setup.py @@ -133,6 +133,7 @@ "Programming Language :: Python :: 3.9", "Programming Language :: Python :: 3.10", "Programming Language :: Python :: 3.11", + "Programming Language :: Python :: 3.12", "Operating System :: OS Independent", "Topic :: Internet", ], diff --git a/packages/google-cloud-bigquery/testing/constraints-3.12.txt b/packages/google-cloud-bigquery/testing/constraints-3.12.txt new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/packages/google-cloud-bigquery/tests/unit/test_query.py b/packages/google-cloud-bigquery/tests/unit/test_query.py index 949c1993bb27..7c3438567f72 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_query.py +++ b/packages/google-cloud-bigquery/tests/unit/test_query.py @@ -1281,7 +1281,6 @@ def test___repr__(self): field1 = self._make_one("test", _make_subparam("field1", "STRING", "hello")) got = repr(field1) self.assertIn("StructQueryParameter", got) - self.assertIn("'field1', 'STRING'", got) self.assertIn("'field1': 'hello'", got) From 5b0735449f6aededd32cc8b088cb766d9ce0a9bd Mon Sep 17 00:00:00 2001 From: "gcf-owl-bot[bot]" <78513119+gcf-owl-bot[bot]@users.noreply.github.com> Date: Tue, 5 Dec 2023 09:54:15 -0800 Subject: [PATCH 1681/2016] chore: bump cryptography from 41.0.5 to 41.0.6 in /synthtool/gcp/templates/python_library/.kokoro (#1740) Source-Link: https://github.com/googleapis/synthtool/commit/9367caadcbb30b5b2719f30eb00c44cc913550ed Post-Processor: gcr.io/cloud-devrel-public-resources/owlbot-python:latest@sha256:2f155882785883336b4468d5218db737bb1d10c9cea7cb62219ad16fe248c03c Co-authored-by: Owl Bot Co-authored-by: Anthonios Partheniou --- .../.github/.OwlBot.lock.yaml | 4 +- .../.kokoro/requirements.txt | 48 +++++++++---------- 2 files changed, 26 insertions(+), 26 deletions(-) diff --git a/packages/google-cloud-bigquery/.github/.OwlBot.lock.yaml b/packages/google-cloud-bigquery/.github/.OwlBot.lock.yaml index eb4d9f794dc1..773c1dfd2146 100644 --- a/packages/google-cloud-bigquery/.github/.OwlBot.lock.yaml +++ b/packages/google-cloud-bigquery/.github/.OwlBot.lock.yaml @@ -13,5 +13,5 @@ # limitations under the License. docker: image: gcr.io/cloud-devrel-public-resources/owlbot-python:latest - digest: sha256:bacc3af03bff793a03add584537b36b5644342931ad989e3ba1171d3bd5399f5 -# created: 2023-11-23T18:17:28.105124211Z + digest: sha256:2f155882785883336b4468d5218db737bb1d10c9cea7cb62219ad16fe248c03c +# created: 2023-11-29T14:54:29.548172703Z diff --git a/packages/google-cloud-bigquery/.kokoro/requirements.txt b/packages/google-cloud-bigquery/.kokoro/requirements.txt index 8957e21104e2..e5c1ffca94b7 100644 --- a/packages/google-cloud-bigquery/.kokoro/requirements.txt +++ b/packages/google-cloud-bigquery/.kokoro/requirements.txt @@ -93,30 +93,30 @@ colorlog==6.7.0 \ # via # gcp-docuploader # nox -cryptography==41.0.5 \ - --hash=sha256:0c327cac00f082013c7c9fb6c46b7cc9fa3c288ca702c74773968173bda421bf \ - --hash=sha256:0d2a6a598847c46e3e321a7aef8af1436f11c27f1254933746304ff014664d84 \ - --hash=sha256:227ec057cd32a41c6651701abc0328135e472ed450f47c2766f23267b792a88e \ - --hash=sha256:22892cc830d8b2c89ea60148227631bb96a7da0c1b722f2aac8824b1b7c0b6b8 \ - --hash=sha256:392cb88b597247177172e02da6b7a63deeff1937fa6fec3bbf902ebd75d97ec7 \ - --hash=sha256:3be3ca726e1572517d2bef99a818378bbcf7d7799d5372a46c79c29eb8d166c1 \ - --hash=sha256:573eb7128cbca75f9157dcde974781209463ce56b5804983e11a1c462f0f4e88 \ - --hash=sha256:580afc7b7216deeb87a098ef0674d6ee34ab55993140838b14c9b83312b37b86 \ - --hash=sha256:5a70187954ba7292c7876734183e810b728b4f3965fbe571421cb2434d279179 \ - --hash=sha256:73801ac9736741f220e20435f84ecec75ed70eda90f781a148f1bad546963d81 \ - --hash=sha256:7d208c21e47940369accfc9e85f0de7693d9a5d843c2509b3846b2db170dfd20 \ - --hash=sha256:8254962e6ba1f4d2090c44daf50a547cd5f0bf446dc658a8e5f8156cae0d8548 \ - --hash=sha256:88417bff20162f635f24f849ab182b092697922088b477a7abd6664ddd82291d \ - --hash=sha256:a48e74dad1fb349f3dc1d449ed88e0017d792997a7ad2ec9587ed17405667e6d \ - --hash=sha256:b948e09fe5fb18517d99994184854ebd50b57248736fd4c720ad540560174ec5 \ - --hash=sha256:c707f7afd813478e2019ae32a7c49cd932dd60ab2d2a93e796f68236b7e1fbf1 \ - --hash=sha256:d38e6031e113b7421db1de0c1b1f7739564a88f1684c6b89234fbf6c11b75147 \ - --hash=sha256:d3977f0e276f6f5bf245c403156673db103283266601405376f075c849a0b936 \ - --hash=sha256:da6a0ff8f1016ccc7477e6339e1d50ce5f59b88905585f77193ebd5068f1e797 \ - --hash=sha256:e270c04f4d9b5671ebcc792b3ba5d4488bf7c42c3c241a3748e2599776f29696 \ - --hash=sha256:e886098619d3815e0ad5790c973afeee2c0e6e04b4da90b88e6bd06e2a0b1b72 \ - --hash=sha256:ec3b055ff8f1dce8e6ef28f626e0972981475173d7973d63f271b29c8a2897da \ - --hash=sha256:fba1e91467c65fe64a82c689dc6cf58151158993b13eb7a7f3f4b7f395636723 +cryptography==41.0.6 \ + --hash=sha256:068bc551698c234742c40049e46840843f3d98ad7ce265fd2bd4ec0d11306596 \ + --hash=sha256:0f27acb55a4e77b9be8d550d762b0513ef3fc658cd3eb15110ebbcbd626db12c \ + --hash=sha256:2132d5865eea673fe6712c2ed5fb4fa49dba10768bb4cc798345748380ee3660 \ + --hash=sha256:3288acccef021e3c3c10d58933f44e8602cf04dba96d9796d70d537bb2f4bbc4 \ + --hash=sha256:35f3f288e83c3f6f10752467c48919a7a94b7d88cc00b0668372a0d2ad4f8ead \ + --hash=sha256:398ae1fc711b5eb78e977daa3cbf47cec20f2c08c5da129b7a296055fbb22aed \ + --hash=sha256:422e3e31d63743855e43e5a6fcc8b4acab860f560f9321b0ee6269cc7ed70cc3 \ + --hash=sha256:48783b7e2bef51224020efb61b42704207dde583d7e371ef8fc2a5fb6c0aabc7 \ + --hash=sha256:4d03186af98b1c01a4eda396b137f29e4e3fb0173e30f885e27acec8823c1b09 \ + --hash=sha256:5daeb18e7886a358064a68dbcaf441c036cbdb7da52ae744e7b9207b04d3908c \ + --hash=sha256:60e746b11b937911dc70d164060d28d273e31853bb359e2b2033c9e93e6f3c43 \ + --hash=sha256:742ae5e9a2310e9dade7932f9576606836ed174da3c7d26bc3d3ab4bd49b9f65 \ + --hash=sha256:7e00fb556bda398b99b0da289ce7053639d33b572847181d6483ad89835115f6 \ + --hash=sha256:85abd057699b98fce40b41737afb234fef05c67e116f6f3650782c10862c43da \ + --hash=sha256:8efb2af8d4ba9dbc9c9dd8f04d19a7abb5b49eab1f3694e7b5a16a5fc2856f5c \ + --hash=sha256:ae236bb8760c1e55b7a39b6d4d32d2279bc6c7c8500b7d5a13b6fb9fc97be35b \ + --hash=sha256:afda76d84b053923c27ede5edc1ed7d53e3c9f475ebaf63c68e69f1403c405a8 \ + --hash=sha256:b27a7fd4229abef715e064269d98a7e2909ebf92eb6912a9603c7e14c181928c \ + --hash=sha256:b648fe2a45e426aaee684ddca2632f62ec4613ef362f4d681a9a6283d10e079d \ + --hash=sha256:c5a550dc7a3b50b116323e3d376241829fd326ac47bc195e04eb33a8170902a9 \ + --hash=sha256:da46e2b5df770070412c46f87bac0849b8d685c5f2679771de277a422c7d0b86 \ + --hash=sha256:f39812f70fc5c71a15aa3c97b2bbe213c3f2a460b79bd21c40d033bb34a9bf36 \ + --hash=sha256:ff369dd19e8fe0528b02e8df9f2aeb2479f89b1270d90f96a63500afe9af5cae # via # gcp-releasetool # secretstorage From a892309153b61fcbbf37204d6bc06c72207e5d91 Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Fri, 8 Dec 2023 16:13:49 -0600 Subject: [PATCH 1682/2016] fix: keep `RowIterator.total_rows` populated after iteration (#1748) * fix: keep `RowIterator.total_rows` populated after iteration This was being reset in some cases when the rows were all available in the first page of results. * Update google/cloud/bigquery/table.py Co-authored-by: Anthonios Partheniou --------- Co-authored-by: Anthonios Partheniou --- .../google/cloud/bigquery/table.py | 4 ++-- .../google-cloud-bigquery/tests/unit/test_table.py | 11 ++++++++++- 2 files changed, 12 insertions(+), 3 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py index dca9f7962920..70e60171417c 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py @@ -2997,9 +2997,9 @@ def _rows_page_start(iterator, page, response): page._columns = _row_iterator_page_columns(iterator._schema, response) total_rows = response.get("totalRows") + # Don't reset total_rows if it's not present in the next API response. if total_rows is not None: - total_rows = int(total_rows) - iterator._total_rows = total_rows + iterator._total_rows = int(total_rows) # pylint: enable=unused-argument diff --git a/packages/google-cloud-bigquery/tests/unit/test_table.py b/packages/google-cloud-bigquery/tests/unit/test_table.py index 85f335dd161b..9b3d4fe842ef 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_table.py +++ b/packages/google-cloud-bigquery/tests/unit/test_table.py @@ -2201,9 +2201,18 @@ def test_iterate_with_cached_first_page(self): path = "/foo" api_request = mock.Mock(return_value={"rows": rows}) row_iterator = self._make_one( - _mock_client(), api_request, path, schema, first_page_response=first_page + _mock_client(), + api_request, + path, + schema, + first_page_response=first_page, + total_rows=4, ) + self.assertEqual(row_iterator.total_rows, 4) rows = list(row_iterator) + # Total rows should be maintained, even though subsequent API calls + # don't include it. + self.assertEqual(row_iterator.total_rows, 4) self.assertEqual(len(rows), 4) self.assertEqual(rows[0].age, 27) self.assertEqual(rows[1].age, 28) From a17a71356ea471adc92937a1156c3021d8565c42 Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Fri, 8 Dec 2023 16:46:43 -0600 Subject: [PATCH 1683/2016] feat: add `Client.query_and_wait` which directly returns a `RowIterator` of results (#1722) * perf: use the first page a results when `query(api_method="QUERY")` * add tests * respect max_results with cached page * respect page_size, also avoid bqstorage if almost fully downloaded * skip true test if bqstorage not installed * coverage * feat: add `Client.query_and_wait` which directly returns a `RowIterator` of results Set the `QUERY_PREVIEW_ENABLED=TRUE` environment variable to use this with the new JOB_CREATION_OPTIONAL mode (currently in preview). * implement basic query_and_wait and add code sample to test * avoid duplicated QueryJob construction * update unit tests * fix merge conflict in rowiterator * support max_results, add tests * retry tests * unit test coverage * dont retry twice * fix mypy_samples session * consolidate docstrings for query_and_wait * remove mention of job ID * fallback to jobs.insert for unsupported features * distinguish API timeout from wait timeout * add test for jobs.insert fallback * populate default job config * refactor default config * add coverage for job_config * cancel job if hasn't finished * mypy * allow unrealeased features in samples * fix for 3.12 * fix: keep `RowIterator.total_rows` populated after iteration This was being reset in some cases when the rows were all available in the first page of results. * Update google/cloud/bigquery/table.py Co-authored-by: Anthonios Partheniou * fix comments --------- Co-authored-by: Anthonios Partheniou --- .../google/cloud/bigquery/_job_helpers.py | 316 ++++++- .../google/cloud/bigquery/client.py | 152 ++- .../google/cloud/bigquery/job/base.py | 10 +- .../google/cloud/bigquery/job/query.py | 10 +- .../google/cloud/bigquery/query.py | 12 + packages/google-cloud-bigquery/noxfile.py | 8 +- .../samples/client_query.py | 3 + .../samples/snippets/client_query.py | 37 + .../samples/snippets/client_query_test.py | 38 + .../tests/unit/test__job_helpers.py | 872 +++++++++++++++++- .../tests/unit/test_client.py | 162 +++- .../tests/unit/test_query.py | 10 + 12 files changed, 1550 insertions(+), 80 deletions(-) create mode 100644 packages/google-cloud-bigquery/samples/snippets/client_query.py create mode 100644 packages/google-cloud-bigquery/samples/snippets/client_query_test.py diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/_job_helpers.py b/packages/google-cloud-bigquery/google/cloud/bigquery/_job_helpers.py index 7992f28b69c0..095de4faa7ae 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/_job_helpers.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/_job_helpers.py @@ -12,9 +12,32 @@ # See the License for the specific language governing permissions and # limitations under the License. -"""Helpers for interacting with the job REST APIs from the client.""" +"""Helpers for interacting with the job REST APIs from the client. + +For queries, there are three cases to consider: + +1. jobs.insert: This always returns a job resource. +2. jobs.query, jobCreationMode=JOB_CREATION_REQUIRED: + This sometimes can return the results inline, but always includes a job ID. +3. jobs.query, jobCreationMode=JOB_CREATION_OPTIONAL: + This sometimes doesn't create a job at all, instead returning the results. + For better debugging, an auto-generated query ID is included in the + response. + +Client.query() calls either (1) or (2), depending on what the user provides +for the api_method parameter. query() always returns a QueryJob object, which +can retry the query when the query job fails for a retriable reason. + +Client.query_and_wait() calls (3). This returns a RowIterator that may wrap +local results from the response or may wrap a query job containing multiple +pages of results. Even though query_and_wait() waits for the job to complete, +we still need a separate job_retry object because there are different +predicates where it is safe to generate a new query ID. +""" import copy +import functools +import os import uuid from typing import Any, Dict, TYPE_CHECKING, Optional @@ -23,6 +46,7 @@ from google.cloud.bigquery import job import google.cloud.bigquery.query +from google.cloud.bigquery import table # Avoid circular imports if TYPE_CHECKING: # pragma: NO COVER @@ -59,6 +83,25 @@ def make_job_id(job_id: Optional[str] = None, prefix: Optional[str] = None) -> s return str(uuid.uuid4()) +def job_config_with_defaults( + job_config: Optional[job.QueryJobConfig], + default_job_config: Optional[job.QueryJobConfig], +) -> Optional[job.QueryJobConfig]: + """Create a copy of `job_config`, replacing unset values with those from + `default_job_config`. + """ + if job_config is None: + return default_job_config + + if default_job_config is None: + return job_config + + # Both job_config and default_job_config are not None, so make a copy of + # job_config merged with default_job_config. Anything already explicitly + # set on job_config should not be replaced. + return job_config._fill_from_default(default_job_config) + + def query_jobs_insert( client: "Client", query: str, @@ -67,9 +110,9 @@ def query_jobs_insert( job_id_prefix: Optional[str], location: Optional[str], project: str, - retry: retries.Retry, + retry: Optional[retries.Retry], timeout: Optional[float], - job_retry: retries.Retry, + job_retry: Optional[retries.Retry], ) -> job.QueryJob: """Initiate a query using jobs.insert. @@ -123,7 +166,13 @@ def do_query(): return future -def _to_query_request(job_config: Optional[job.QueryJobConfig]) -> Dict[str, Any]: +def _to_query_request( + job_config: Optional[job.QueryJobConfig] = None, + *, + query: str, + location: Optional[str] = None, + timeout: Optional[float] = None, +) -> Dict[str, Any]: """Transform from Job resource to QueryRequest resource. Most of the keys in job.configuration.query are in common with @@ -150,6 +199,15 @@ def _to_query_request(job_config: Optional[job.QueryJobConfig]) -> Dict[str, Any request_body.setdefault("formatOptions", {}) request_body["formatOptions"]["useInt64Timestamp"] = True # type: ignore + if timeout is not None: + # Subtract a buffer for context switching, network latency, etc. + request_body["timeoutMs"] = max(0, int(1000 * timeout) - _TIMEOUT_BUFFER_MILLIS) + + if location is not None: + request_body["location"] = location + + request_body["query"] = query + return request_body @@ -207,6 +265,10 @@ def _to_query_job( return query_job +def _to_query_path(project: str) -> str: + return f"/projects/{project}/queries" + + def query_jobs_query( client: "Client", query: str, @@ -217,18 +279,14 @@ def query_jobs_query( timeout: Optional[float], job_retry: retries.Retry, ) -> job.QueryJob: - """Initiate a query using jobs.query. + """Initiate a query using jobs.query with jobCreationMode=JOB_CREATION_REQUIRED. See: https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs/query """ - path = f"/projects/{project}/queries" - request_body = _to_query_request(job_config) - - if timeout is not None: - # Subtract a buffer for context switching, network latency, etc. - request_body["timeoutMs"] = max(0, int(1000 * timeout) - _TIMEOUT_BUFFER_MILLIS) - request_body["location"] = location - request_body["query"] = query + path = _to_query_path(project) + request_body = _to_query_request( + query=query, job_config=job_config, location=location, timeout=timeout + ) def do_query(): request_body["requestId"] = make_job_id() @@ -253,3 +311,235 @@ def do_query(): future._job_retry = job_retry return future + + +def query_and_wait( + client: "Client", + query: str, + *, + job_config: Optional[job.QueryJobConfig], + location: Optional[str], + project: str, + api_timeout: Optional[float] = None, + wait_timeout: Optional[float] = None, + retry: Optional[retries.Retry], + job_retry: Optional[retries.Retry], + page_size: Optional[int] = None, + max_results: Optional[int] = None, +) -> table.RowIterator: + """Run the query, wait for it to finish, and return the results. + + While ``jobCreationMode=JOB_CREATION_OPTIONAL`` is in preview in the + ``jobs.query`` REST API, use the default ``jobCreationMode`` unless + the environment variable ``QUERY_PREVIEW_ENABLED=true``. After + ``jobCreationMode`` is GA, this method will always use + ``jobCreationMode=JOB_CREATION_OPTIONAL``. See: + https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs/query + + Args: + client: + BigQuery client to make API calls. + query (str): + SQL query to be executed. Defaults to the standard SQL + dialect. Use the ``job_config`` parameter to change dialects. + job_config (Optional[google.cloud.bigquery.job.QueryJobConfig]): + Extra configuration options for the job. + To override any options that were previously set in + the ``default_query_job_config`` given to the + ``Client`` constructor, manually set those options to ``None``, + or whatever value is preferred. + location (Optional[str]): + Location where to run the job. Must match the location of the + table used in the query as well as the destination table. + project (Optional[str]): + Project ID of the project of where to run the job. Defaults + to the client's project. + api_timeout (Optional[float]): + The number of seconds to wait for the underlying HTTP transport + before using ``retry``. + wait_timeout (Optional[float]): + The number of seconds to wait for the query to finish. If the + query doesn't finish before this timeout, the client attempts + to cancel the query. + retry (Optional[google.api_core.retry.Retry]): + How to retry the RPC. This only applies to making RPC + calls. It isn't used to retry failed jobs. This has + a reasonable default that should only be overridden + with care. + job_retry (Optional[google.api_core.retry.Retry]): + How to retry failed jobs. The default retries + rate-limit-exceeded errors. Passing ``None`` disables + job retry. Not all jobs can be retried. + page_size (Optional[int]): + The maximum number of rows in each page of results from this + request. Non-positive values are ignored. + max_results (Optional[int]): + The maximum total number of rows from this request. + + Returns: + google.cloud.bigquery.table.RowIterator: + Iterator of row data + :class:`~google.cloud.bigquery.table.Row`-s. During each + page, the iterator will have the ``total_rows`` attribute + set, which counts the total number of rows **in the result + set** (this is distinct from the total number of rows in the + current page: ``iterator.page.num_items``). + + If the query is a special query that produces no results, e.g. + a DDL query, an ``_EmptyRowIterator`` instance is returned. + + Raises: + TypeError: + If ``job_config`` is not an instance of + :class:`~google.cloud.bigquery.job.QueryJobConfig` + class. + """ + # Some API parameters aren't supported by the jobs.query API. In these + # cases, fallback to a jobs.insert call. + if not _supported_by_jobs_query(job_config): + return _wait_or_cancel( + query_jobs_insert( + client=client, + query=query, + job_id=None, + job_id_prefix=None, + job_config=job_config, + location=location, + project=project, + retry=retry, + timeout=api_timeout, + job_retry=job_retry, + ), + api_timeout=api_timeout, + wait_timeout=wait_timeout, + retry=retry, + page_size=page_size, + max_results=max_results, + ) + + path = _to_query_path(project) + request_body = _to_query_request( + query=query, job_config=job_config, location=location, timeout=api_timeout + ) + + if page_size is not None and max_results is not None: + request_body["maxResults"] = min(page_size, max_results) + elif page_size is not None or max_results is not None: + request_body["maxResults"] = page_size or max_results + + if os.getenv("QUERY_PREVIEW_ENABLED", "").casefold() == "true": + request_body["jobCreationMode"] = "JOB_CREATION_OPTIONAL" + + def do_query(): + request_body["requestId"] = make_job_id() + span_attributes = {"path": path} + + # For easier testing, handle the retries ourselves. + if retry is not None: + response = retry(client._call_api)( + retry=None, # We're calling the retry decorator ourselves. + span_name="BigQuery.query", + span_attributes=span_attributes, + method="POST", + path=path, + data=request_body, + timeout=api_timeout, + ) + else: + response = client._call_api( + retry=None, + span_name="BigQuery.query", + span_attributes=span_attributes, + method="POST", + path=path, + data=request_body, + timeout=api_timeout, + ) + + # Even if we run with JOB_CREATION_OPTIONAL, if there are more pages + # to fetch, there will be a job ID for jobs.getQueryResults. + query_results = google.cloud.bigquery.query._QueryResults.from_api_repr( + response + ) + page_token = query_results.page_token + more_pages = page_token is not None + + if more_pages or not query_results.complete: + # TODO(swast): Avoid a call to jobs.get in some cases (few + # remaining pages) by waiting for the query to finish and calling + # client._list_rows_from_query_results directly. Need to update + # RowIterator to fetch destination table via the job ID if needed. + return _wait_or_cancel( + _to_query_job(client, query, job_config, response), + api_timeout=api_timeout, + wait_timeout=wait_timeout, + retry=retry, + page_size=page_size, + max_results=max_results, + ) + + return table.RowIterator( + client=client, + api_request=functools.partial(client._call_api, retry, timeout=api_timeout), + path=None, + schema=query_results.schema, + max_results=max_results, + page_size=page_size, + total_rows=query_results.total_rows, + first_page_response=response, + location=query_results.location, + job_id=query_results.job_id, + query_id=query_results.query_id, + project=query_results.project, + ) + + if job_retry is not None: + return job_retry(do_query)() + else: + return do_query() + + +def _supported_by_jobs_query(job_config: Optional[job.QueryJobConfig]) -> bool: + """True if jobs.query can be used. False if jobs.insert is needed.""" + if job_config is None: + return True + + return ( + # These features aren't supported by jobs.query. + job_config.clustering_fields is None + and job_config.destination is None + and job_config.destination_encryption_configuration is None + and job_config.range_partitioning is None + and job_config.table_definitions is None + and job_config.time_partitioning is None + ) + + +def _wait_or_cancel( + job: job.QueryJob, + api_timeout: Optional[float], + wait_timeout: Optional[float], + retry: Optional[retries.Retry], + page_size: Optional[int], + max_results: Optional[int], +) -> table.RowIterator: + """Wait for a job to complete and return the results. + + If we can't return the results within the ``wait_timeout``, try to cancel + the job. + """ + try: + return job.result( + page_size=page_size, + max_results=max_results, + retry=retry, + timeout=wait_timeout, + ) + except Exception: + # Attempt to cancel the job since we can't return the results. + try: + job.cancel(retry=retry, timeout=api_timeout) + except Exception: + # Don't eat the original exception if cancel fails. + pass + raise diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py index 488a9ad298c3..284ccddb5972 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py @@ -255,23 +255,31 @@ def __init__( self._connection = Connection(self, **kw_args) self._location = location - self._default_query_job_config = copy.deepcopy(default_query_job_config) self._default_load_job_config = copy.deepcopy(default_load_job_config) + # Use property setter so validation can run. + self.default_query_job_config = default_query_job_config + @property def location(self): """Default location for jobs / datasets / tables.""" return self._location @property - def default_query_job_config(self): - """Default ``QueryJobConfig``. - Will be merged into job configs passed into the ``query`` method. + def default_query_job_config(self) -> Optional[QueryJobConfig]: + """Default ``QueryJobConfig`` or ``None``. + + Will be merged into job configs passed into the ``query`` or + ``query_and_wait`` methods. """ return self._default_query_job_config @default_query_job_config.setter - def default_query_job_config(self, value: QueryJobConfig): + def default_query_job_config(self, value: Optional[QueryJobConfig]): + if value is not None: + _verify_job_config_type( + value, QueryJobConfig, param_name="default_query_job_config" + ) self._default_query_job_config = copy.deepcopy(value) @property @@ -3355,26 +3363,12 @@ def query( if location is None: location = self.location - if self._default_query_job_config: - if job_config: - _verify_job_config_type( - job_config, google.cloud.bigquery.job.QueryJobConfig - ) - # anything that's not defined on the incoming - # that is in the default, - # should be filled in with the default - # the incoming therefore has precedence - # - # Note that _fill_from_default doesn't mutate the receiver - job_config = job_config._fill_from_default( - self._default_query_job_config - ) - else: - _verify_job_config_type( - self._default_query_job_config, - google.cloud.bigquery.job.QueryJobConfig, - ) - job_config = self._default_query_job_config + if job_config is not None: + _verify_job_config_type(job_config, QueryJobConfig) + + job_config = _job_helpers.job_config_with_defaults( + job_config, self._default_query_job_config + ) # Note that we haven't modified the original job_config (or # _default_query_job_config) up to this point. @@ -3405,6 +3399,112 @@ def query( else: raise ValueError(f"Got unexpected value for api_method: {repr(api_method)}") + def query_and_wait( + self, + query, + *, + job_config: Optional[QueryJobConfig] = None, + location: Optional[str] = None, + project: Optional[str] = None, + api_timeout: TimeoutType = DEFAULT_TIMEOUT, + wait_timeout: TimeoutType = None, + retry: retries.Retry = DEFAULT_RETRY, + job_retry: retries.Retry = DEFAULT_JOB_RETRY, + page_size: Optional[int] = None, + max_results: Optional[int] = None, + ) -> RowIterator: + """Run the query, wait for it to finish, and return the results. + + While ``jobCreationMode=JOB_CREATION_OPTIONAL`` is in preview in the + ``jobs.query`` REST API, use the default ``jobCreationMode`` unless + the environment variable ``QUERY_PREVIEW_ENABLED=true``. After + ``jobCreationMode`` is GA, this method will always use + ``jobCreationMode=JOB_CREATION_OPTIONAL``. See: + https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs/query + + Args: + query (str): + SQL query to be executed. Defaults to the standard SQL + dialect. Use the ``job_config`` parameter to change dialects. + job_config (Optional[google.cloud.bigquery.job.QueryJobConfig]): + Extra configuration options for the job. + To override any options that were previously set in + the ``default_query_job_config`` given to the + ``Client`` constructor, manually set those options to ``None``, + or whatever value is preferred. + location (Optional[str]): + Location where to run the job. Must match the location of the + table used in the query as well as the destination table. + project (Optional[str]): + Project ID of the project of where to run the job. Defaults + to the client's project. + api_timeout (Optional[float]): + The number of seconds to wait for the underlying HTTP transport + before using ``retry``. + wait_timeout (Optional[float]): + The number of seconds to wait for the query to finish. If the + query doesn't finish before this timeout, the client attempts + to cancel the query. + retry (Optional[google.api_core.retry.Retry]): + How to retry the RPC. This only applies to making RPC + calls. It isn't used to retry failed jobs. This has + a reasonable default that should only be overridden + with care. + job_retry (Optional[google.api_core.retry.Retry]): + How to retry failed jobs. The default retries + rate-limit-exceeded errors. Passing ``None`` disables + job retry. Not all jobs can be retried. + page_size (Optional[int]): + The maximum number of rows in each page of results from this + request. Non-positive values are ignored. + max_results (Optional[int]): + The maximum total number of rows from this request. + + Returns: + google.cloud.bigquery.table.RowIterator: + Iterator of row data + :class:`~google.cloud.bigquery.table.Row`-s. During each + page, the iterator will have the ``total_rows`` attribute + set, which counts the total number of rows **in the result + set** (this is distinct from the total number of rows in the + current page: ``iterator.page.num_items``). + + If the query is a special query that produces no results, e.g. + a DDL query, an ``_EmptyRowIterator`` instance is returned. + + Raises: + TypeError: + If ``job_config`` is not an instance of + :class:`~google.cloud.bigquery.job.QueryJobConfig` + class. + """ + if project is None: + project = self.project + + if location is None: + location = self.location + + if job_config is not None: + _verify_job_config_type(job_config, QueryJobConfig) + + job_config = _job_helpers.job_config_with_defaults( + job_config, self._default_query_job_config + ) + + return _job_helpers.query_and_wait( + self, + query, + job_config=job_config, + location=location, + project=project, + api_timeout=api_timeout, + wait_timeout=wait_timeout, + retry=retry, + job_retry=job_retry, + page_size=page_size, + max_results=max_results, + ) + def insert_rows( self, table: Union[Table, TableReference, str], @@ -3853,7 +3953,7 @@ def _list_rows_from_query_results( job_id: str, location: str, project: str, - schema: SchemaField, + schema: Sequence[SchemaField], total_rows: Optional[int] = None, destination: Optional[Union[Table, TableReference, TableListItem, str]] = None, max_results: Optional[int] = None, diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/job/base.py b/packages/google-cloud-bigquery/google/cloud/bigquery/job/base.py index 78df9142fe2b..97e0ea3bd76b 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/job/base.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/job/base.py @@ -21,6 +21,7 @@ import typing from typing import ClassVar, Dict, Optional, Sequence +from google.api_core import retry as retries from google.api_core import exceptions import google.api_core.future.polling @@ -28,9 +29,6 @@ from google.cloud.bigquery.retry import DEFAULT_RETRY from google.cloud.bigquery._helpers import _int_or_none -if typing.TYPE_CHECKING: # pragma: NO COVER - from google.api_core import retry as retries - _DONE_STATE = "DONE" _STOPPED_REASON = "stopped" @@ -825,7 +823,7 @@ def reload( def cancel( self, client=None, - retry: "retries.Retry" = DEFAULT_RETRY, + retry: Optional[retries.Retry] = DEFAULT_RETRY, timeout: Optional[float] = None, ) -> bool: """API call: cancel job via a POST request @@ -921,9 +919,9 @@ def done( self.reload(retry=retry, timeout=timeout) return self.state == _DONE_STATE - def result( # type: ignore # (signature complaint) + def result( # type: ignore # (incompatible with supertype) self, - retry: "retries.Retry" = DEFAULT_RETRY, + retry: Optional[retries.Retry] = DEFAULT_RETRY, timeout: Optional[float] = None, ) -> "_AsyncJob": """Start the job and wait for it to complete and get the result. diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/job/query.py b/packages/google-cloud-bigquery/google/cloud/bigquery/job/query.py index 79cd207a1e5b..4a529f9497e9 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/job/query.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/job/query.py @@ -22,6 +22,7 @@ from google.api_core import exceptions from google.api_core.future import polling as polling_future +from google.api_core import retry as retries import requests from google.cloud.bigquery.dataset import Dataset @@ -69,7 +70,6 @@ import pandas # type: ignore import geopandas # type: ignore import pyarrow # type: ignore - from google.api_core import retry as retries from google.cloud import bigquery_storage from google.cloud.bigquery.client import Client from google.cloud.bigquery.table import RowIterator @@ -779,7 +779,7 @@ def to_api_repr(self) -> dict: resource = copy.deepcopy(self._properties) # Query parameters have an addition property associated with them # to indicate if the query is using named or positional parameters. - query_parameters = resource["query"].get("queryParameters") + query_parameters = resource.get("query", {}).get("queryParameters") if query_parameters: if query_parameters[0].get("name") is None: resource["query"]["parameterMode"] = "POSITIONAL" @@ -1469,14 +1469,14 @@ def _done_or_raise(self, retry=DEFAULT_RETRY, timeout=None): except exceptions.GoogleAPIError as exc: self.set_exception(exc) - def result( # type: ignore # (complaints about the overloaded signature) + def result( # type: ignore # (incompatible with supertype) self, page_size: Optional[int] = None, max_results: Optional[int] = None, - retry: "retries.Retry" = DEFAULT_RETRY, + retry: Optional[retries.Retry] = DEFAULT_RETRY, timeout: Optional[float] = None, start_index: Optional[int] = None, - job_retry: "retries.Retry" = DEFAULT_JOB_RETRY, + job_retry: Optional[retries.Retry] = DEFAULT_JOB_RETRY, ) -> Union["RowIterator", _EmptyRowIterator]: """Start the job and wait for it to complete and get the result. diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/query.py b/packages/google-cloud-bigquery/google/cloud/bigquery/query.py index 54abe95a7457..43591c648ec7 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/query.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/query.py @@ -911,6 +911,18 @@ def job_id(self): """ return self._properties.get("jobReference", {}).get("jobId") + @property + def location(self): + """Location of the query job these results are from. + + See: + https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs/query#body.QueryResponse.FIELDS.job_reference + + Returns: + str: Job ID of the query job. + """ + return self._properties.get("jobReference", {}).get("location") + @property def query_id(self) -> Optional[str]: """[Preview] ID of a completed query. diff --git a/packages/google-cloud-bigquery/noxfile.py b/packages/google-cloud-bigquery/noxfile.py index ab7803040765..41492c7f0776 100644 --- a/packages/google-cloud-bigquery/noxfile.py +++ b/packages/google-cloud-bigquery/noxfile.py @@ -205,13 +205,15 @@ def system(session): @nox.session(python=DEFAULT_PYTHON_VERSION) def mypy_samples(session): """Run type checks with mypy.""" - session.install("-e", ".[all]") - session.install("pytest") for requirements_path in CURRENT_DIRECTORY.glob("samples/*/requirements.txt"): - session.install("-r", requirements_path) + session.install("-r", str(requirements_path)) session.install(MYPY_VERSION) + # requirements.txt might include this package. Install from source so that + # we can author samples with unreleased features. + session.install("-e", ".[all]") + # Just install the dependencies' type info directly, since "mypy --install-types" # might require an additional pass. session.install( diff --git a/packages/google-cloud-bigquery/samples/client_query.py b/packages/google-cloud-bigquery/samples/client_query.py index 4df051ee2c0e..80eac854e2ee 100644 --- a/packages/google-cloud-bigquery/samples/client_query.py +++ b/packages/google-cloud-bigquery/samples/client_query.py @@ -14,6 +14,9 @@ def client_query() -> None: + # TODO(swast): remove once docs in cloud.google.com have been updated to + # use samples/snippets/client_query.py + # [START bigquery_query] from google.cloud import bigquery diff --git a/packages/google-cloud-bigquery/samples/snippets/client_query.py b/packages/google-cloud-bigquery/samples/snippets/client_query.py new file mode 100644 index 000000000000..ccae2e8bded0 --- /dev/null +++ b/packages/google-cloud-bigquery/samples/snippets/client_query.py @@ -0,0 +1,37 @@ +# Copyright 2019 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +def client_query() -> None: + # [START bigquery_query] + from google.cloud import bigquery + + # Construct a BigQuery client object. + client = bigquery.Client() + + query = """ + SELECT name, SUM(number) as total_people + FROM `bigquery-public-data.usa_names.usa_1910_2013` + WHERE state = 'TX' + GROUP BY name, state + ORDER BY total_people DESC + LIMIT 20 + """ + rows = client.query_and_wait(query) # Make an API request. + + print("The query data:") + for row in rows: + # Row values can be accessed by field name or index. + print("name={}, count={}".format(row[0], row["total_people"])) + # [END bigquery_query] diff --git a/packages/google-cloud-bigquery/samples/snippets/client_query_test.py b/packages/google-cloud-bigquery/samples/snippets/client_query_test.py new file mode 100644 index 000000000000..1bc83a230979 --- /dev/null +++ b/packages/google-cloud-bigquery/samples/snippets/client_query_test.py @@ -0,0 +1,38 @@ +# Copyright 2019 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import typing + +import client_query # type: ignore + +if typing.TYPE_CHECKING: + import pytest + + +def test_client_query(capsys: "pytest.CaptureFixture[str]") -> None: + client_query.client_query() + out, _ = capsys.readouterr() + assert "The query data:" in out + assert "name=James, count=272793" in out + + +def test_client_query_job_optional( + capsys: "pytest.CaptureFixture[str]", monkeypatch: "pytest.MonkeyPatch" +) -> None: + monkeypatch.setenv("QUERY_PREVIEW_ENABLED", "true") + + client_query.client_query() + out, _ = capsys.readouterr() + assert "The query data:" in out + assert "name=James, count=272793" in out diff --git a/packages/google-cloud-bigquery/tests/unit/test__job_helpers.py b/packages/google-cloud-bigquery/tests/unit/test__job_helpers.py index 012352f4eced..f2fe32d94c1b 100644 --- a/packages/google-cloud-bigquery/tests/unit/test__job_helpers.py +++ b/packages/google-cloud-bigquery/tests/unit/test__job_helpers.py @@ -12,15 +12,18 @@ # See the License for the specific language governing permissions and # limitations under the License. +import functools from typing import Any, Dict, Optional from unittest import mock +import freezegun +import google.api_core.exceptions from google.api_core import retry as retries import pytest from google.cloud.bigquery.client import Client from google.cloud.bigquery import _job_helpers -from google.cloud.bigquery.job.query import QueryJob, QueryJobConfig +from google.cloud.bigquery.job import query as job_query from google.cloud.bigquery.query import ConnectionProperty, ScalarQueryParameter @@ -55,9 +58,9 @@ def make_query_response( ("job_config", "expected"), ( (None, make_query_request()), - (QueryJobConfig(), make_query_request()), + (job_query.QueryJobConfig(), make_query_request()), ( - QueryJobConfig(default_dataset="my-project.my_dataset"), + job_query.QueryJobConfig(default_dataset="my-project.my_dataset"), make_query_request( { "defaultDataset": { @@ -67,17 +70,17 @@ def make_query_response( } ), ), - (QueryJobConfig(dry_run=True), make_query_request({"dryRun": True})), + (job_query.QueryJobConfig(dry_run=True), make_query_request({"dryRun": True})), ( - QueryJobConfig(use_query_cache=False), + job_query.QueryJobConfig(use_query_cache=False), make_query_request({"useQueryCache": False}), ), ( - QueryJobConfig(use_legacy_sql=True), + job_query.QueryJobConfig(use_legacy_sql=True), make_query_request({"useLegacySql": True}), ), ( - QueryJobConfig( + job_query.QueryJobConfig( query_parameters=[ ScalarQueryParameter("named_param1", "STRING", "param-value"), ScalarQueryParameter("named_param2", "INT64", 123), @@ -102,7 +105,7 @@ def make_query_response( ), ), ( - QueryJobConfig( + job_query.QueryJobConfig( query_parameters=[ ScalarQueryParameter(None, "STRING", "param-value"), ScalarQueryParameter(None, "INT64", 123), @@ -125,7 +128,7 @@ def make_query_response( ), ), ( - QueryJobConfig( + job_query.QueryJobConfig( connection_properties=[ ConnectionProperty(key="time_zone", value="America/Chicago"), ConnectionProperty(key="session_id", value="abcd-efgh-ijkl-mnop"), @@ -141,17 +144,18 @@ def make_query_response( ), ), ( - QueryJobConfig(labels={"abc": "def"}), + job_query.QueryJobConfig(labels={"abc": "def"}), make_query_request({"labels": {"abc": "def"}}), ), ( - QueryJobConfig(maximum_bytes_billed=987654), + job_query.QueryJobConfig(maximum_bytes_billed=987654), make_query_request({"maximumBytesBilled": "987654"}), ), ), ) def test__to_query_request(job_config, expected): - result = _job_helpers._to_query_request(job_config) + result = _job_helpers._to_query_request(job_config, query="SELECT 1") + expected["query"] = "SELECT 1" assert result == expected @@ -160,7 +164,9 @@ def test__to_query_job_defaults(): response = make_query_response( job_id="test-job", project_id="some-project", location="asia-northeast1" ) - job: QueryJob = _job_helpers._to_query_job(mock_client, "query-str", None, response) + job: job_query.QueryJob = _job_helpers._to_query_job( + mock_client, "query-str", None, response + ) assert job.query == "query-str" assert job._client is mock_client assert job.job_id == "test-job" @@ -175,9 +181,9 @@ def test__to_query_job_dry_run(): response = make_query_response( job_id="test-job", project_id="some-project", location="asia-northeast1" ) - job_config: QueryJobConfig = QueryJobConfig() + job_config: job_query.QueryJobConfig = job_query.QueryJobConfig() job_config.dry_run = True - job: QueryJob = _job_helpers._to_query_job( + job: job_query.QueryJob = _job_helpers._to_query_job( mock_client, "query-str", job_config, response ) assert job.dry_run is True @@ -193,7 +199,9 @@ def test__to_query_job_dry_run(): def test__to_query_job_sets_state(completed, expected_state): mock_client = mock.create_autospec(Client) response = make_query_response(completed=completed) - job: QueryJob = _job_helpers._to_query_job(mock_client, "query-str", None, response) + job: job_query.QueryJob = _job_helpers._to_query_job( + mock_client, "query-str", None, response + ) assert job.state == expected_state @@ -206,7 +214,9 @@ def test__to_query_job_sets_errors(): {"message": "something else went wrong"}, ] ) - job: QueryJob = _job_helpers._to_query_job(mock_client, "query-str", None, response) + job: job_query.QueryJob = _job_helpers._to_query_job( + mock_client, "query-str", None, response + ) assert len(job.errors) == 2 # If we got back a response instead of an HTTP error status code, most # likely the job didn't completely fail. @@ -313,6 +323,717 @@ def test_query_jobs_query_sets_timeout(timeout, expected_timeout): assert request["timeoutMs"] == expected_timeout +def test_query_and_wait_uses_jobs_insert(): + """With unsupported features, call jobs.insert instead of jobs.query.""" + client = mock.create_autospec(Client) + client._call_api.return_value = { + "jobReference": { + "projectId": "response-project", + "jobId": "abc", + "location": "response-location", + }, + "query": { + "query": "SELECT 1", + }, + # Make sure the job has "started" + "status": {"state": "DONE"}, + "jobComplete": True, + } + job_config = job_query.QueryJobConfig( + destination="dest-project.dest_dset.dest_table", + ) + _job_helpers.query_and_wait( + client, + query="SELECT 1", + location="request-location", + project="request-project", + job_config=job_config, + retry=None, + job_retry=None, + page_size=None, + max_results=None, + ) + + # We should call jobs.insert since jobs.query doesn't support destination. + request_path = "/projects/request-project/jobs" + client._call_api.assert_any_call( + None, # retry, + span_name="BigQuery.job.begin", + span_attributes={"path": request_path}, + job_ref=mock.ANY, + method="POST", + path=request_path, + data={ + "jobReference": { + "jobId": mock.ANY, + "projectId": "request-project", + "location": "request-location", + }, + "configuration": { + "query": { + "destinationTable": { + "projectId": "dest-project", + "datasetId": "dest_dset", + "tableId": "dest_table", + }, + "useLegacySql": False, + "query": "SELECT 1", + } + }, + }, + timeout=None, + ) + + +def test_query_and_wait_retries_job(): + freezegun.freeze_time(auto_tick_seconds=100) + client = mock.create_autospec(Client) + client._call_api.__name__ = "_call_api" + client._call_api.__qualname__ = "Client._call_api" + client._call_api.__annotations__ = {} + client._call_api.__type_params__ = () + client._call_api.side_effect = ( + google.api_core.exceptions.BadGateway("retry me"), + google.api_core.exceptions.InternalServerError("job_retry me"), + google.api_core.exceptions.BadGateway("retry me"), + { + "jobReference": { + "projectId": "response-project", + "jobId": "abc", + "location": "response-location", + }, + "jobComplete": True, + "schema": { + "fields": [ + {"name": "full_name", "type": "STRING", "mode": "REQUIRED"}, + {"name": "age", "type": "INT64", "mode": "NULLABLE"}, + ], + }, + "rows": [ + {"f": [{"v": "Whillma Phlyntstone"}, {"v": "27"}]}, + {"f": [{"v": "Bhetty Rhubble"}, {"v": "28"}]}, + {"f": [{"v": "Phred Phlyntstone"}, {"v": "32"}]}, + {"f": [{"v": "Bharney Rhubble"}, {"v": "33"}]}, + ], + }, + ) + rows = _job_helpers.query_and_wait( + client, + query="SELECT 1", + location="request-location", + project="request-project", + job_config=None, + page_size=None, + max_results=None, + retry=retries.Retry( + lambda exc: isinstance(exc, google.api_core.exceptions.BadGateway), + multiplier=1.0, + ).with_deadline( + 200.0 + ), # Since auto_tick_seconds is 100, we should get at least 1 retry. + job_retry=retries.Retry( + lambda exc: isinstance(exc, google.api_core.exceptions.InternalServerError), + multiplier=1.0, + ).with_deadline(600.0), + ) + assert len(list(rows)) == 4 + + # For this code path, where the query has finished immediately, we should + # only be calling the jobs.query API and no other request path. + request_path = "/projects/request-project/queries" + for call in client._call_api.call_args_list: + _, kwargs = call + assert kwargs["method"] == "POST" + assert kwargs["path"] == request_path + + +@freezegun.freeze_time(auto_tick_seconds=100) +def test_query_and_wait_retries_job_times_out(): + client = mock.create_autospec(Client) + client._call_api.__name__ = "_call_api" + client._call_api.__qualname__ = "Client._call_api" + client._call_api.__annotations__ = {} + client._call_api.__type_params__ = () + client._call_api.side_effect = ( + google.api_core.exceptions.BadGateway("retry me"), + google.api_core.exceptions.InternalServerError("job_retry me"), + google.api_core.exceptions.BadGateway("retry me"), + google.api_core.exceptions.InternalServerError("job_retry me"), + ) + + with pytest.raises(google.api_core.exceptions.RetryError) as exc_info: + _job_helpers.query_and_wait( + client, + query="SELECT 1", + location="request-location", + project="request-project", + job_config=None, + page_size=None, + max_results=None, + retry=retries.Retry( + lambda exc: isinstance(exc, google.api_core.exceptions.BadGateway), + multiplier=1.0, + ).with_deadline( + 200.0 + ), # Since auto_tick_seconds is 100, we should get at least 1 retry. + job_retry=retries.Retry( + lambda exc: isinstance( + exc, google.api_core.exceptions.InternalServerError + ), + multiplier=1.0, + ).with_deadline(400.0), + ) + + assert isinstance( + exc_info.value.cause, google.api_core.exceptions.InternalServerError + ) + + +def test_query_and_wait_sets_job_creation_mode(monkeypatch: pytest.MonkeyPatch): + monkeypatch.setenv( + "QUERY_PREVIEW_ENABLED", + # The comparison should be case insensitive. + "TrUe", + ) + client = mock.create_autospec(Client) + client._call_api.return_value = { + "jobReference": { + "projectId": "response-project", + "jobId": "abc", + "location": "response-location", + }, + "jobComplete": True, + } + _job_helpers.query_and_wait( + client, + query="SELECT 1", + location="request-location", + project="request-project", + job_config=None, + retry=None, + job_retry=None, + page_size=None, + max_results=None, + ) + + # We should only call jobs.query once, no additional row requests needed. + request_path = "/projects/request-project/queries" + client._call_api.assert_called_once_with( + None, # retry + span_name="BigQuery.query", + span_attributes={"path": request_path}, + method="POST", + path=request_path, + data={ + "query": "SELECT 1", + "location": "request-location", + "useLegacySql": False, + "formatOptions": { + "useInt64Timestamp": True, + }, + "requestId": mock.ANY, + "jobCreationMode": "JOB_CREATION_OPTIONAL", + }, + timeout=None, + ) + + +def test_query_and_wait_sets_location(): + client = mock.create_autospec(Client) + client._call_api.return_value = { + "jobReference": { + "projectId": "response-project", + "jobId": "abc", + "location": "response-location", + }, + "jobComplete": True, + } + rows = _job_helpers.query_and_wait( + client, + query="SELECT 1", + location="request-location", + project="request-project", + job_config=None, + retry=None, + job_retry=None, + page_size=None, + max_results=None, + ) + assert rows.location == "response-location" + + # We should only call jobs.query once, no additional row requests needed. + request_path = "/projects/request-project/queries" + client._call_api.assert_called_once_with( + None, # retry + span_name="BigQuery.query", + span_attributes={"path": request_path}, + method="POST", + path=request_path, + data={ + "query": "SELECT 1", + "location": "request-location", + "useLegacySql": False, + "formatOptions": { + "useInt64Timestamp": True, + }, + "requestId": mock.ANY, + }, + timeout=None, + ) + + +@pytest.mark.parametrize( + ("max_results", "page_size", "expected"), + [ + (10, None, 10), + (None, 11, 11), + (12, 100, 12), + (100, 13, 13), + ], +) +def test_query_and_wait_sets_max_results(max_results, page_size, expected): + client = mock.create_autospec(Client) + client._call_api.return_value = { + "jobReference": { + "projectId": "response-project", + "jobId": "abc", + "location": "response-location", + }, + "jobComplete": True, + } + rows = _job_helpers.query_and_wait( + client, + query="SELECT 1", + location="request-location", + project="request-project", + job_config=None, + retry=None, + job_retry=None, + page_size=page_size, + max_results=max_results, + ) + assert rows.location == "response-location" + + # We should only call jobs.query once, no additional row requests needed. + request_path = "/projects/request-project/queries" + client._call_api.assert_called_once_with( + None, # retry + span_name="BigQuery.query", + span_attributes={"path": request_path}, + method="POST", + path=request_path, + data={ + "query": "SELECT 1", + "location": "request-location", + "useLegacySql": False, + "formatOptions": { + "useInt64Timestamp": True, + }, + "requestId": mock.ANY, + "maxResults": expected, + }, + timeout=None, + ) + + +def test_query_and_wait_caches_completed_query_results_one_page(): + client = mock.create_autospec(Client) + client._call_api.return_value = { + "jobReference": { + "projectId": "response-project", + "jobId": "abc", + "location": "US", + }, + "jobComplete": True, + "queryId": "xyz", + "schema": { + "fields": [ + {"name": "full_name", "type": "STRING", "mode": "REQUIRED"}, + {"name": "age", "type": "INT64", "mode": "NULLABLE"}, + ], + }, + "rows": [ + {"f": [{"v": "Whillma Phlyntstone"}, {"v": "27"}]}, + {"f": [{"v": "Bhetty Rhubble"}, {"v": "28"}]}, + {"f": [{"v": "Phred Phlyntstone"}, {"v": "32"}]}, + {"f": [{"v": "Bharney Rhubble"}, {"v": "33"}]}, + ], + # Even though totalRows > len(rows), we should use the presense of a + # next page token to decide if there are any more pages. + "totalRows": 8, + } + rows = _job_helpers.query_and_wait( + client, + query="SELECT full_name, age FROM people;", + job_config=None, + location=None, + project="request-project", + retry=None, + job_retry=None, + page_size=None, + max_results=None, + ) + rows_list = list(rows) + assert rows.project == "response-project" + assert rows.job_id == "abc" + assert rows.location == "US" + assert rows.query_id == "xyz" + assert rows.total_rows == 8 + assert len(rows_list) == 4 + + # We should only call jobs.query once, no additional row requests needed. + request_path = "/projects/request-project/queries" + client._call_api.assert_called_once_with( + None, # retry + span_name="BigQuery.query", + span_attributes={"path": request_path}, + method="POST", + path=request_path, + data={ + "query": "SELECT full_name, age FROM people;", + "useLegacySql": False, + "formatOptions": { + "useInt64Timestamp": True, + }, + "requestId": mock.ANY, + }, + timeout=None, + ) + + +def test_query_and_wait_caches_completed_query_results_one_page_no_rows(): + client = mock.create_autospec(Client) + client._call_api.return_value = { + "jobReference": { + "projectId": "response-project", + "jobId": "abc", + "location": "US", + }, + "jobComplete": True, + "queryId": "xyz", + } + rows = _job_helpers.query_and_wait( + client, + query="CREATE TABLE abc;", + project="request-project", + job_config=None, + location=None, + retry=None, + job_retry=None, + page_size=None, + max_results=None, + ) + assert rows.project == "response-project" + assert rows.job_id == "abc" + assert rows.location == "US" + assert rows.query_id == "xyz" + assert list(rows) == [] + + # We should only call jobs.query once, no additional row requests needed. + request_path = "/projects/request-project/queries" + client._call_api.assert_called_once_with( + None, # retry + span_name="BigQuery.query", + span_attributes={"path": request_path}, + method="POST", + path=request_path, + data={ + "query": "CREATE TABLE abc;", + "useLegacySql": False, + "formatOptions": { + "useInt64Timestamp": True, + }, + "requestId": mock.ANY, + }, + timeout=None, + ) + + +def test_query_and_wait_caches_completed_query_results_more_pages(): + client = mock.create_autospec(Client) + client._list_rows_from_query_results = functools.partial( + Client._list_rows_from_query_results, client + ) + client._call_api.side_effect = ( + { + "jobReference": { + "projectId": "response-project", + "jobId": "response-job-id", + "location": "response-location", + }, + "jobComplete": True, + "queryId": "xyz", + "schema": { + "fields": [ + {"name": "full_name", "type": "STRING", "mode": "REQUIRED"}, + {"name": "age", "type": "INT64", "mode": "NULLABLE"}, + ], + }, + "rows": [ + {"f": [{"v": "Whillma Phlyntstone"}, {"v": "27"}]}, + {"f": [{"v": "Bhetty Rhubble"}, {"v": "28"}]}, + {"f": [{"v": "Phred Phlyntstone"}, {"v": "32"}]}, + {"f": [{"v": "Bharney Rhubble"}, {"v": "33"}]}, + ], + # Even though totalRows <= len(rows), we should use the presense of a + # next page token to decide if there are any more pages. + "totalRows": 2, + "pageToken": "page-2", + }, + # TODO(swast): This is a case where we can avoid a call to jobs.get, + # but currently do so because the RowIterator might need the + # destination table, since results aren't fully cached. + { + "jobReference": { + "projectId": "response-project", + "jobId": "response-job-id", + "location": "response-location", + }, + }, + { + "rows": [ + {"f": [{"v": "Pebbles Phlyntstone"}, {"v": "4"}]}, + {"f": [{"v": "Bamm-Bamm Rhubble"}, {"v": "5"}]}, + {"f": [{"v": "Joseph Rockhead"}, {"v": "32"}]}, + {"f": [{"v": "Perry Masonry"}, {"v": "33"}]}, + ], + "totalRows": 3, + "pageToken": "page-3", + }, + { + "rows": [ + {"f": [{"v": "Pearl Slaghoople"}, {"v": "53"}]}, + ], + "totalRows": 4, + }, + ) + rows = _job_helpers.query_and_wait( + client, + query="SELECT full_name, age FROM people;", + project="request-project", + job_config=None, + location=None, + retry=None, + job_retry=None, + page_size=None, + max_results=None, + ) + assert rows.total_rows == 2 # Match the API response. + rows_list = list(rows) + assert rows.total_rows == 4 # Match the final API response. + assert len(rows_list) == 9 + + # Start the query. + jobs_query_path = "/projects/request-project/queries" + client._call_api.assert_any_call( + None, # retry + span_name="BigQuery.query", + span_attributes={"path": jobs_query_path}, + method="POST", + path=jobs_query_path, + data={ + "query": "SELECT full_name, age FROM people;", + "useLegacySql": False, + "formatOptions": { + "useInt64Timestamp": True, + }, + "requestId": mock.ANY, + }, + timeout=None, + ) + + # TODO(swast): Fetching job metadata isn't necessary in this case. + jobs_get_path = "/projects/response-project/jobs/response-job-id" + client._call_api.assert_any_call( + None, # retry + span_name="BigQuery.job.reload", + span_attributes={"path": jobs_get_path}, + job_ref=mock.ANY, + method="GET", + path=jobs_get_path, + query_params={"location": "response-location"}, + timeout=None, + ) + + # Fetch the remaining two pages. + jobs_get_query_results_path = "/projects/response-project/queries/response-job-id" + client._call_api.assert_any_call( + None, # retry + timeout=None, + method="GET", + path=jobs_get_query_results_path, + query_params={ + "pageToken": "page-2", + "fields": "jobReference,totalRows,pageToken,rows", + "location": "response-location", + "formatOptions.useInt64Timestamp": True, + }, + ) + client._call_api.assert_any_call( + None, # retry + timeout=None, + method="GET", + path=jobs_get_query_results_path, + query_params={ + "pageToken": "page-3", + "fields": "jobReference,totalRows,pageToken,rows", + "location": "response-location", + "formatOptions.useInt64Timestamp": True, + }, + ) + + +def test_query_and_wait_incomplete_query(): + client = mock.create_autospec(Client) + client._get_query_results = functools.partial(Client._get_query_results, client) + client._list_rows_from_query_results = functools.partial( + Client._list_rows_from_query_results, client + ) + client._call_api.side_effect = ( + { + "jobReference": { + "projectId": "response-project", + "jobId": "response-job-id", + "location": "response-location", + }, + "jobComplete": False, + }, + { + "jobReference": { + "projectId": "response-project", + "jobId": "response-job-id", + "location": "response-location", + }, + "jobComplete": True, + "totalRows": 2, + "queryId": "xyz", + "schema": { + "fields": [ + {"name": "full_name", "type": "STRING", "mode": "REQUIRED"}, + {"name": "age", "type": "INT64", "mode": "NULLABLE"}, + ], + }, + }, + { + "jobReference": { + "projectId": "response-project", + "jobId": "response-job-id", + "location": "response-location", + }, + }, + { + "rows": [ + {"f": [{"v": "Whillma Phlyntstone"}, {"v": "27"}]}, + {"f": [{"v": "Bhetty Rhubble"}, {"v": "28"}]}, + {"f": [{"v": "Phred Phlyntstone"}, {"v": "32"}]}, + {"f": [{"v": "Bharney Rhubble"}, {"v": "33"}]}, + ], + # Even though totalRows <= len(rows), we should use the presense of a + # next page token to decide if there are any more pages. + "totalRows": 2, + "pageToken": "page-2", + }, + { + "rows": [ + {"f": [{"v": "Pearl Slaghoople"}, {"v": "53"}]}, + ], + }, + ) + rows = _job_helpers.query_and_wait( + client, + query="SELECT full_name, age FROM people;", + project="request-project", + job_config=None, + location=None, + retry=None, + job_retry=None, + page_size=None, + max_results=None, + ) + rows_list = list(rows) + assert rows.total_rows == 2 # Match the API response. + assert len(rows_list) == 5 + + # Start the query. + jobs_query_path = "/projects/request-project/queries" + client._call_api.assert_any_call( + None, # retry + span_name="BigQuery.query", + span_attributes={"path": jobs_query_path}, + method="POST", + path=jobs_query_path, + data={ + "query": "SELECT full_name, age FROM people;", + "useLegacySql": False, + "formatOptions": { + "useInt64Timestamp": True, + }, + "requestId": mock.ANY, + }, + timeout=None, + ) + + # Wait for the query to finish. + jobs_get_query_results_path = "/projects/response-project/queries/response-job-id" + client._call_api.assert_any_call( + None, # retry + span_name="BigQuery.getQueryResults", + span_attributes={"path": jobs_get_query_results_path}, + method="GET", + path=jobs_get_query_results_path, + query_params={ + # job_query.QueryJob uses getQueryResults to wait for the query to finish. + # It avoids fetching the results because: + # (1) For large rows this can take a long time, much longer than + # our progress bar update frequency. + # See: https://github.com/googleapis/python-bigquery/issues/403 + # (2) Caching the first page of results uses an unexpected increase in memory. + # See: https://github.com/googleapis/python-bigquery/issues/394 + "maxResults": 0, + "location": "response-location", + }, + timeout=None, + ) + + # Fetch the job metadata in case the RowIterator needs the destination table. + jobs_get_path = "/projects/response-project/jobs/response-job-id" + client._call_api.assert_any_call( + None, # retry + span_name="BigQuery.job.reload", + span_attributes={"path": jobs_get_path}, + job_ref=mock.ANY, + method="GET", + path=jobs_get_path, + query_params={"location": "response-location"}, + timeout=None, + ) + + # Fetch the remaining two pages. + client._call_api.assert_any_call( + None, # retry + timeout=None, + method="GET", + path=jobs_get_query_results_path, + query_params={ + "fields": "jobReference,totalRows,pageToken,rows", + "location": "response-location", + "formatOptions.useInt64Timestamp": True, + }, + ) + client._call_api.assert_any_call( + None, # retry + timeout=None, + method="GET", + path=jobs_get_query_results_path, + query_params={ + "pageToken": "page-2", + "fields": "jobReference,totalRows,pageToken,rows", + "location": "response-location", + "formatOptions.useInt64Timestamp": True, + }, + ) + + def test_make_job_id_wo_suffix(): job_id = _job_helpers.make_job_id("job_id") assert job_id == "job_id" @@ -335,3 +1056,120 @@ def test_make_job_id_random(): def test_make_job_id_w_job_id_overrides_prefix(): job_id = _job_helpers.make_job_id("job_id", prefix="unused_prefix") assert job_id == "job_id" + + +@pytest.mark.parametrize( + ("job_config", "expected"), + ( + pytest.param(None, True), + pytest.param(job_query.QueryJobConfig(), True, id="default"), + pytest.param( + job_query.QueryJobConfig(use_query_cache=False), True, id="use_query_cache" + ), + pytest.param( + job_query.QueryJobConfig(maximum_bytes_billed=10_000_000), + True, + id="maximum_bytes_billed", + ), + pytest.param( + job_query.QueryJobConfig(clustering_fields=["a", "b", "c"]), + False, + id="clustering_fields", + ), + pytest.param( + job_query.QueryJobConfig(destination="p.d.t"), False, id="destination" + ), + pytest.param( + job_query.QueryJobConfig( + destination_encryption_configuration=job_query.EncryptionConfiguration( + "key" + ) + ), + False, + id="destination_encryption_configuration", + ), + ), +) +def test_supported_by_jobs_query( + job_config: Optional[job_query.QueryJobConfig], expected: bool +): + assert _job_helpers._supported_by_jobs_query(job_config) == expected + + +def test_wait_or_cancel_no_exception(): + job = mock.create_autospec(job_query.QueryJob, instance=True) + expected_rows = object() + job.result.return_value = expected_rows + retry = retries.Retry() + + rows = _job_helpers._wait_or_cancel( + job, + api_timeout=123, + wait_timeout=456, + retry=retry, + page_size=789, + max_results=101112, + ) + + job.result.assert_called_once_with( + timeout=456, + retry=retry, + page_size=789, + max_results=101112, + ) + assert rows is expected_rows + + +def test_wait_or_cancel_exception_cancels_job(): + job = mock.create_autospec(job_query.QueryJob, instance=True) + job.result.side_effect = google.api_core.exceptions.BadGateway("test error") + retry = retries.Retry() + + with pytest.raises(google.api_core.exceptions.BadGateway): + _job_helpers._wait_or_cancel( + job, + api_timeout=123, + wait_timeout=456, + retry=retry, + page_size=789, + max_results=101112, + ) + + job.result.assert_called_once_with( + timeout=456, + retry=retry, + page_size=789, + max_results=101112, + ) + job.cancel.assert_called_once_with( + timeout=123, + retry=retry, + ) + + +def test_wait_or_cancel_exception_raises_original_exception(): + job = mock.create_autospec(job_query.QueryJob, instance=True) + job.result.side_effect = google.api_core.exceptions.BadGateway("test error") + job.cancel.side_effect = google.api_core.exceptions.NotFound("don't raise me") + retry = retries.Retry() + + with pytest.raises(google.api_core.exceptions.BadGateway): + _job_helpers._wait_or_cancel( + job, + api_timeout=123, + wait_timeout=456, + retry=retry, + page_size=789, + max_results=101112, + ) + + job.result.assert_called_once_with( + timeout=456, + retry=retry, + page_size=789, + max_results=101112, + ) + job.cancel.assert_called_once_with( + timeout=123, + retry=retry, + ) diff --git a/packages/google-cloud-bigquery/tests/unit/test_client.py b/packages/google-cloud-bigquery/tests/unit/test_client.py index ff4c40f48925..c8968adbb712 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_client.py +++ b/packages/google-cloud-bigquery/tests/unit/test_client.py @@ -70,8 +70,9 @@ from google.cloud.bigquery.dataset import DatasetReference from google.cloud.bigquery import exceptions -from google.cloud.bigquery.retry import DEFAULT_TIMEOUT from google.cloud.bigquery import ParquetOptions +from google.cloud.bigquery.retry import DEFAULT_TIMEOUT +import google.cloud.bigquery.table try: from google.cloud import bigquery_storage @@ -4953,20 +4954,17 @@ def test_query_w_client_default_config_no_incoming(self): ) def test_query_w_invalid_default_job_config(self): - job_id = "some-job-id" - query = "select count(*) from persons" creds = _make_credentials() http = object() default_job_config = object() - client = self._make_one( - project=self.PROJECT, - credentials=creds, - _http=http, - default_query_job_config=default_job_config, - ) with self.assertRaises(TypeError) as exc: - client.query(query, job_id=job_id, location=self.LOCATION) + self._make_one( + project=self.PROJECT, + credentials=creds, + _http=http, + default_query_job_config=default_job_config, + ) self.assertIn("Expected an instance of QueryJobConfig", exc.exception.args[0]) def test_query_w_client_location(self): @@ -5213,6 +5211,150 @@ def test_query_job_rpc_fail_w_conflict_random_id_job_fetch_succeeds(self): assert result is mock.sentinel.query_job + def test_query_and_wait_defaults(self): + query = "select count(*) from `bigquery-public-data.usa_names.usa_1910_2013`" + jobs_query_response = { + "jobComplete": True, + "schema": { + "fields": [ + { + "name": "f0_", + "type": "INTEGER", + "mode": "NULLABLE", + }, + ], + }, + "totalRows": "1", + "rows": [{"f": [{"v": "5552452"}]}], + "queryId": "job_abcDEF_", + } + creds = _make_credentials() + http = object() + client = self._make_one(project=self.PROJECT, credentials=creds, _http=http) + conn = client._connection = make_connection(jobs_query_response) + + rows = client.query_and_wait(query) + + self.assertIsInstance(rows, google.cloud.bigquery.table.RowIterator) + self.assertEqual(rows.query_id, "job_abcDEF_") + self.assertEqual(rows.total_rows, 1) + # No job reference in the response should be OK for completed query. + self.assertIsNone(rows.job_id) + self.assertIsNone(rows.project) + self.assertIsNone(rows.location) + + # Verify the request we send is to jobs.query. + conn.api_request.assert_called_once() + _, req = conn.api_request.call_args + self.assertEqual(req["method"], "POST") + self.assertEqual(req["path"], "/projects/PROJECT/queries") + self.assertEqual(req["timeout"], DEFAULT_TIMEOUT) + sent = req["data"] + self.assertEqual(sent["query"], query) + self.assertFalse(sent["useLegacySql"]) + + def test_query_and_wait_w_default_query_job_config(self): + from google.cloud.bigquery import job + + query = "select count(*) from `bigquery-public-data.usa_names.usa_1910_2013`" + jobs_query_response = { + "jobComplete": True, + } + creds = _make_credentials() + http = object() + client = self._make_one( + project=self.PROJECT, + credentials=creds, + _http=http, + default_query_job_config=job.QueryJobConfig( + labels={ + "default-label": "default-value", + }, + ), + ) + conn = client._connection = make_connection(jobs_query_response) + + _ = client.query_and_wait(query) + + # Verify the request we send is to jobs.query. + conn.api_request.assert_called_once() + _, req = conn.api_request.call_args + self.assertEqual(req["method"], "POST") + self.assertEqual(req["path"], f"/projects/{self.PROJECT}/queries") + sent = req["data"] + self.assertEqual(sent["labels"], {"default-label": "default-value"}) + + def test_query_and_wait_w_job_config(self): + from google.cloud.bigquery import job + + query = "select count(*) from `bigquery-public-data.usa_names.usa_1910_2013`" + jobs_query_response = { + "jobComplete": True, + } + creds = _make_credentials() + http = object() + client = self._make_one( + project=self.PROJECT, + credentials=creds, + _http=http, + ) + conn = client._connection = make_connection(jobs_query_response) + + _ = client.query_and_wait( + query, + job_config=job.QueryJobConfig( + labels={ + "job_config-label": "job_config-value", + }, + ), + ) + + # Verify the request we send is to jobs.query. + conn.api_request.assert_called_once() + _, req = conn.api_request.call_args + self.assertEqual(req["method"], "POST") + self.assertEqual(req["path"], f"/projects/{self.PROJECT}/queries") + sent = req["data"] + self.assertEqual(sent["labels"], {"job_config-label": "job_config-value"}) + + def test_query_and_wait_w_location(self): + query = "select count(*) from `bigquery-public-data.usa_names.usa_1910_2013`" + jobs_query_response = { + "jobComplete": True, + } + creds = _make_credentials() + http = object() + client = self._make_one(project=self.PROJECT, credentials=creds, _http=http) + conn = client._connection = make_connection(jobs_query_response) + + _ = client.query_and_wait(query, location="not-the-client-location") + + # Verify the request we send is to jobs.query. + conn.api_request.assert_called_once() + _, req = conn.api_request.call_args + self.assertEqual(req["method"], "POST") + self.assertEqual(req["path"], f"/projects/{self.PROJECT}/queries") + sent = req["data"] + self.assertEqual(sent["location"], "not-the-client-location") + + def test_query_and_wait_w_project(self): + query = "select count(*) from `bigquery-public-data.usa_names.usa_1910_2013`" + jobs_query_response = { + "jobComplete": True, + } + creds = _make_credentials() + http = object() + client = self._make_one(project=self.PROJECT, credentials=creds, _http=http) + conn = client._connection = make_connection(jobs_query_response) + + _ = client.query_and_wait(query, project="not-the-client-project") + + # Verify the request we send is to jobs.query. + conn.api_request.assert_called_once() + _, req = conn.api_request.call_args + self.assertEqual(req["method"], "POST") + self.assertEqual(req["path"], "/projects/not-the-client-project/queries") + def test_insert_rows_w_timeout(self): from google.cloud.bigquery.schema import SchemaField from google.cloud.bigquery.table import Table diff --git a/packages/google-cloud-bigquery/tests/unit/test_query.py b/packages/google-cloud-bigquery/tests/unit/test_query.py index 7c3438567f72..1704abac70ee 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_query.py +++ b/packages/google-cloud-bigquery/tests/unit/test_query.py @@ -1375,6 +1375,16 @@ def test_job_id_present(self): query = self._make_one(resource) self.assertEqual(query.job_id, "custom-job") + def test_location_missing(self): + query = self._make_one({}) + self.assertIsNone(query.location) + + def test_location_present(self): + resource = self._make_resource() + resource["jobReference"]["location"] = "test-location" + query = self._make_one(resource) + self.assertEqual(query.location, "test-location") + def test_page_token_missing(self): query = self._make_one(self._make_resource()) self.assertIsNone(query.page_token) From b809d888a2289cbcb10eabb00d2736cb1518f8d5 Mon Sep 17 00:00:00 2001 From: "release-please[bot]" <55107282+release-please[bot]@users.noreply.github.com> Date: Mon, 11 Dec 2023 15:51:04 -0800 Subject: [PATCH 1684/2016] chore(main): release 3.14.0 (#1709) Co-authored-by: release-please[bot] <55107282+release-please[bot]@users.noreply.github.com> --- packages/google-cloud-bigquery/CHANGELOG.md | 27 +++++++++++++++++++ .../google/cloud/bigquery/version.py | 2 +- 2 files changed, 28 insertions(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/CHANGELOG.md b/packages/google-cloud-bigquery/CHANGELOG.md index 41206fd788c0..c1bd5b3899bd 100644 --- a/packages/google-cloud-bigquery/CHANGELOG.md +++ b/packages/google-cloud-bigquery/CHANGELOG.md @@ -5,6 +5,33 @@ [1]: https://pypi.org/project/google-cloud-bigquery/#history +## [3.14.0](https://github.com/googleapis/python-bigquery/compare/v3.13.0...v3.14.0) (2023-12-08) + + +### Features + +* Add `Client.query_and_wait` which directly returns a `RowIterator` of results ([#1722](https://github.com/googleapis/python-bigquery/issues/1722)) ([89a647e](https://github.com/googleapis/python-bigquery/commit/89a647e19fe5d7302c0a39bba77a155635c5c29d)) +* Add `job_id`, `location`, `project`, and `query_id` properties on `RowIterator` ([#1733](https://github.com/googleapis/python-bigquery/issues/1733)) ([494f275](https://github.com/googleapis/python-bigquery/commit/494f275ab2493dc7904f685c4d12e60bef51ab21)) +* Add `job_timeout_ms` to job configuration classes ([#1675](https://github.com/googleapis/python-bigquery/issues/1675)) ([84d64cd](https://github.com/googleapis/python-bigquery/commit/84d64cdd157afef4a7bf7807e557d59452133434)) +* Add support dataset.max_time_travel_hours ([#1683](https://github.com/googleapis/python-bigquery/issues/1683)) ([f22eff2](https://github.com/googleapis/python-bigquery/commit/f22eff25f116f1c4973ac2b8b03bc8a4ae1f3f42)) +* Add support for Dataset.isCaseInsensitive ([#1671](https://github.com/googleapis/python-bigquery/issues/1671)) ([386fa86](https://github.com/googleapis/python-bigquery/commit/386fa86c89b8cff69fc02213254a1c53c02fee42)) +* Add support for Python 3.12 ([#1736](https://github.com/googleapis/python-bigquery/issues/1736)) ([3c0976a](https://github.com/googleapis/python-bigquery/commit/3c0976aecb0f917477feef4e9ed865997c2bb106)) +* Removed pkg_resources from all test files and moved importlib into pandas extra ([#1726](https://github.com/googleapis/python-bigquery/issues/1726)) ([1f4ebb1](https://github.com/googleapis/python-bigquery/commit/1f4ebb1eca4f9380a31172fc8cb2fae125f8c5a2)) +* Support data_governance_type ([#1708](https://github.com/googleapis/python-bigquery/issues/1708)) ([eff365d](https://github.com/googleapis/python-bigquery/commit/eff365dc17755d0855338e2f273428ffe2056f67)) + + +### Bug Fixes + +* `load_table_from_dataframe` now assumes there may be local null values ([#1735](https://github.com/googleapis/python-bigquery/issues/1735)) ([f05dc69](https://github.com/googleapis/python-bigquery/commit/f05dc69a1f8c65ac32085bfcc6950c2c83f8a843)) +* Ensure query job retry has longer deadline than API request deadline ([#1734](https://github.com/googleapis/python-bigquery/issues/1734)) ([5573579](https://github.com/googleapis/python-bigquery/commit/55735791122f97b7f67cb962b489fd1f12210af5)) +* Keep `RowIterator.total_rows` populated after iteration ([#1748](https://github.com/googleapis/python-bigquery/issues/1748)) ([8482f47](https://github.com/googleapis/python-bigquery/commit/8482f4759ce3c4b00fa06a7f306a2ac4d4ee8eb7)) +* Move grpc, proto-plus and protobuf packages to extras ([#1721](https://github.com/googleapis/python-bigquery/issues/1721)) ([5ce4d13](https://github.com/googleapis/python-bigquery/commit/5ce4d136af97b91fbe1cc56bba1021e50a9c8476)) + + +### Performance Improvements + +* Use the first page a results when `query(api_method="QUERY")` ([#1723](https://github.com/googleapis/python-bigquery/issues/1723)) ([6290517](https://github.com/googleapis/python-bigquery/commit/6290517d6b153a31f20098f75aee580b7915aca9)) + ## [3.13.0](https://github.com/googleapis/python-bigquery/compare/v3.12.0...v3.13.0) (2023-10-30) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/version.py b/packages/google-cloud-bigquery/google/cloud/bigquery/version.py index ee029aced741..7d9a17e98d0d 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/version.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/version.py @@ -12,4 +12,4 @@ # See the License for the specific language governing permissions and # limitations under the License. -__version__ = "3.13.0" +__version__ = "3.14.0" From 1c5bd2441ed55c71d47f75e13658c1115b71b30b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tomasz=20W=C3=B3jcik?= Date: Wed, 13 Dec 2023 23:15:35 +0100 Subject: [PATCH 1685/2016] fix: add missing handler for deserializing json value (#1587) * fix: add missing handler for deserializing json value * fix mypy --------- Co-authored-by: Anthonios Partheniou Co-authored-by: Tim Swast Co-authored-by: Lingqing Gan --- .../google/cloud/bigquery/_helpers.py | 8 ++++++++ .../google/cloud/bigquery/query.py | 6 +++--- .../tests/unit/test__helpers.py | 18 ++++++++++++++++++ 3 files changed, 29 insertions(+), 3 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py b/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py index 684cbfc12003..13baea4ad3db 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py @@ -17,6 +17,7 @@ import base64 import datetime import decimal +import json import math import re import os @@ -412,6 +413,12 @@ def _time_to_json(value): return value +def _json_from_json(value, field): + """Coerce 'value' to a pythonic JSON representation, if set or not nullable.""" + if _not_null(value, field): + return json.loads(value) + + # Converters used for scalar values marshalled as row data. _SCALAR_VALUE_TO_JSON_ROW = { "INTEGER": _int_to_json, @@ -427,6 +434,7 @@ def _time_to_json(value): "DATETIME": _datetime_to_json, "DATE": _date_to_json, "TIME": _time_to_json, + "JSON": _json_from_json, # Make sure DECIMAL and BIGDECIMAL are handled, even though # requests for them should be converted to NUMERIC. Better safe # than sorry. diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/query.py b/packages/google-cloud-bigquery/google/cloud/bigquery/query.py index 43591c648ec7..a06ece503c02 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/query.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/query.py @@ -469,7 +469,7 @@ def to_api_repr(self) -> dict: value = self.value converter = _SCALAR_VALUE_TO_JSON_PARAM.get(self.type_) if converter is not None: - value = converter(value) + value = converter(value) # type: ignore resource: Dict[str, Any] = { "parameterType": {"type": self.type_}, "parameterValue": {"value": value}, @@ -626,7 +626,7 @@ def to_api_repr(self) -> dict: converter = _SCALAR_VALUE_TO_JSON_PARAM.get(a_type["type"]) if converter is not None: - values = [converter(value) for value in values] + values = [converter(value) for value in values] # type: ignore a_values = [{"value": value} for value in values] resource = { @@ -775,7 +775,7 @@ def to_api_repr(self) -> dict: s_types[name] = {"name": name, "type": {"type": type_}} converter = _SCALAR_VALUE_TO_JSON_PARAM.get(type_) if converter is not None: - value = converter(value) + value = converter(value) # type: ignore values[name] = {"value": value} resource = { diff --git a/packages/google-cloud-bigquery/tests/unit/test__helpers.py b/packages/google-cloud-bigquery/tests/unit/test__helpers.py index e2e2da3c81df..3c425da5f8d3 100644 --- a/packages/google-cloud-bigquery/tests/unit/test__helpers.py +++ b/packages/google-cloud-bigquery/tests/unit/test__helpers.py @@ -58,6 +58,24 @@ def test_w_float_value(self): self.assertEqual(coerced, 42) +class Test_json_from_json(unittest.TestCase): + def _call_fut(self, value, field): + from google.cloud.bigquery._helpers import _json_from_json + + return _json_from_json(value, field) + + def test_w_none_nullable(self): + self.assertIsNone(self._call_fut(None, _Field("NULLABLE"))) + + def test_w_none_required(self): + with self.assertRaises(TypeError): + self._call_fut(None, _Field("REQUIRED")) + + def test_w_string_value(self): + coerced = self._call_fut('{"foo": true}', object()) + self.assertEqual(coerced, {"foo": True}) + + class Test_float_from_json(unittest.TestCase): def _call_fut(self, value, field): from google.cloud.bigquery._helpers import _float_from_json From 43d1b704eb0fc080d87bd53350eaed1447c9e0ba Mon Sep 17 00:00:00 2001 From: "release-please[bot]" <55107282+release-please[bot]@users.noreply.github.com> Date: Wed, 13 Dec 2023 17:09:50 -0600 Subject: [PATCH 1686/2016] chore(main): release 3.14.1 (#1750) Co-authored-by: release-please[bot] <55107282+release-please[bot]@users.noreply.github.com> --- packages/google-cloud-bigquery/CHANGELOG.md | 7 +++++++ .../google-cloud-bigquery/google/cloud/bigquery/version.py | 2 +- 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/CHANGELOG.md b/packages/google-cloud-bigquery/CHANGELOG.md index c1bd5b3899bd..1c921fda8716 100644 --- a/packages/google-cloud-bigquery/CHANGELOG.md +++ b/packages/google-cloud-bigquery/CHANGELOG.md @@ -5,6 +5,13 @@ [1]: https://pypi.org/project/google-cloud-bigquery/#history +## [3.14.1](https://github.com/googleapis/python-bigquery/compare/v3.14.0...v3.14.1) (2023-12-13) + + +### Bug Fixes + +* Add missing handler for deserializing json value ([#1587](https://github.com/googleapis/python-bigquery/issues/1587)) ([09017a9](https://github.com/googleapis/python-bigquery/commit/09017a997010f78bb6e34238fab15247ed14ea7e)) + ## [3.14.0](https://github.com/googleapis/python-bigquery/compare/v3.13.0...v3.14.0) (2023-12-08) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/version.py b/packages/google-cloud-bigquery/google/cloud/bigquery/version.py index 7d9a17e98d0d..6073384c9b1c 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/version.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/version.py @@ -12,4 +12,4 @@ # See the License for the specific language governing permissions and # limitations under the License. -__version__ = "3.14.0" +__version__ = "3.14.1" From f2cd65f7cd4c7834747f53e24650e216a708d39f Mon Sep 17 00:00:00 2001 From: ericapetersson Date: Thu, 14 Dec 2023 17:48:14 +0100 Subject: [PATCH 1687/2016] fix: Deserializing JSON subfields within structs fails (#1742) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit … for deserializing json subfields from bigquery, this adds support for that. Thank you for opening a Pull Request! Before submitting your PR, there are a few things you can do to make sure it goes smoothly: - [x] Make sure to open an issue as a [bug/issue](https://togithub.com/googleapis/python-bigquery/issues/new/choose) before writing your code! That way we can discuss the change, evaluate designs, and agree on the general idea - [x] Ensure the tests and linter pass - [x] Code coverage does not decrease (if any source code was changed) - [x] Appropriate docs were updated (if necessary) Fixes #[1500](https://togithub.com/googleapis/python-bigquery/issues/1500) 🦕 --- .../google/cloud/bigquery/_helpers.py | 19 ++++++++++++------- .../tests/unit/test__helpers.py | 16 ++++++++++++++-- 2 files changed, 26 insertions(+), 9 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py b/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py index 13baea4ad3db..93b46341e9dd 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py @@ -239,6 +239,15 @@ def _record_from_json(value, field): return record +def _json_from_json(value, field): + """Coerce 'value' to a Pythonic JSON representation.""" + if _not_null(value, field): + return json.loads(value) + else: + return None + + +# Parse BigQuery API response JSON into a Python representation. _CELLDATA_FROM_JSON = { "INTEGER": _int_from_json, "INT64": _int_from_json, @@ -257,6 +266,7 @@ def _record_from_json(value, field): "DATE": _date_from_json, "TIME": _time_from_json, "RECORD": _record_from_json, + "JSON": _json_from_json, } _QUERY_PARAMS_FROM_JSON = dict(_CELLDATA_FROM_JSON) @@ -413,13 +423,8 @@ def _time_to_json(value): return value -def _json_from_json(value, field): - """Coerce 'value' to a pythonic JSON representation, if set or not nullable.""" - if _not_null(value, field): - return json.loads(value) - - -# Converters used for scalar values marshalled as row data. +# Converters used for scalar values marshalled to the BigQuery API, such as in +# query parameters or the tabledata.insert API. _SCALAR_VALUE_TO_JSON_ROW = { "INTEGER": _int_to_json, "INT64": _int_to_json, diff --git a/packages/google-cloud-bigquery/tests/unit/test__helpers.py b/packages/google-cloud-bigquery/tests/unit/test__helpers.py index 3c425da5f8d3..7bf55baeb584 100644 --- a/packages/google-cloud-bigquery/tests/unit/test__helpers.py +++ b/packages/google-cloud-bigquery/tests/unit/test__helpers.py @@ -15,6 +15,7 @@ import base64 import datetime import decimal +import json import unittest import mock @@ -71,9 +72,20 @@ def test_w_none_required(self): with self.assertRaises(TypeError): self._call_fut(None, _Field("REQUIRED")) + def test_w_json_field(self): + data_field = _Field("REQUIRED", "data", "JSON") + + value = json.dumps( + {"v": {"key": "value"}}, + ) + + expected_output = {"v": {"key": "value"}} + coerced_output = self._call_fut(value, data_field) + self.assertEqual(coerced_output, expected_output) + def test_w_string_value(self): - coerced = self._call_fut('{"foo": true}', object()) - self.assertEqual(coerced, {"foo": True}) + coerced = self._call_fut('"foo"', object()) + self.assertEqual(coerced, "foo") class Test_float_from_json(unittest.TestCase): From 9cdfaa04b00cfe19217f5b286de8d2c43f49fee9 Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Thu, 14 Dec 2023 13:35:48 -0800 Subject: [PATCH 1688/2016] chore: use freezegun to mock time in retry tests (#1753) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * chore: use freezegun to mock time in retry tests * 🦉 Updates from OwlBot post-processor See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md --------- Co-authored-by: Owl Bot --- .../tests/unit/test_job_retry.py | 105 +++++++++--------- 1 file changed, 50 insertions(+), 55 deletions(-) diff --git a/packages/google-cloud-bigquery/tests/unit/test_job_retry.py b/packages/google-cloud-bigquery/tests/unit/test_job_retry.py index b2095d2f212b..4fa96fcec5c1 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_job_retry.py +++ b/packages/google-cloud-bigquery/tests/unit/test_job_retry.py @@ -20,6 +20,7 @@ import google.api_core.exceptions import google.api_core.retry +import freezegun from .helpers import make_connection @@ -156,69 +157,63 @@ def api_request(method, path, query_params=None, data=None, **kw): assert len(sleep.mock_calls) == 0 -@mock.patch("google.api_core.retry.datetime_helpers") @mock.patch("time.sleep") -def test_retry_failed_jobs_after_retry_failed(sleep, datetime_helpers, client): +def test_retry_failed_jobs_after_retry_failed(sleep, client): """ If at first you don't succeed, maybe you will later. :) """ conn = client._connection = make_connection() - datetime_helpers.utcnow.return_value = datetime.datetime(2021, 7, 29, 10, 43, 2) + with freezegun.freeze_time("2024-01-01 00:00:00") as frozen_datetime: + err = dict(reason="rateLimitExceeded") - err = dict(reason="rateLimitExceeded") - - def api_request(method, path, query_params=None, data=None, **kw): - calls = sleep.mock_calls - if calls: - datetime_helpers.utcnow.return_value += datetime.timedelta( - seconds=calls[-1][1][0] - ) - response = dict(status=dict(state="DONE", errors=[err], errorResult=err)) - response["jobReference"] = data["jobReference"] - return response - - conn.api_request.side_effect = api_request - - job = client.query("select 1") - orig_job_id = job.job_id - - with pytest.raises(google.api_core.exceptions.RetryError): - job.result() - - # We never got a successful job, so the job id never changed: - assert job.job_id == orig_job_id - - # We failed because we couldn't succeed after 120 seconds. - # But we can try again: - err2 = dict(reason="backendError") # We also retry on this - responses = [ - dict(status=dict(state="DONE", errors=[err2], errorResult=err2)), - dict(status=dict(state="DONE", errors=[err], errorResult=err)), - dict(status=dict(state="DONE", errors=[err2], errorResult=err2)), - dict(status=dict(state="DONE")), - dict(rows=[{"f": [{"v": "1"}]}], totalRows="1"), - ] - - def api_request(method, path, query_params=None, data=None, **kw): - calls = sleep.mock_calls - datetime_helpers.utcnow.return_value += datetime.timedelta( - seconds=calls[-1][1][0] - ) - response = responses.pop(0) - if data: + def api_request(method, path, query_params=None, data=None, **kw): + calls = sleep.mock_calls + if calls: + frozen_datetime.tick(delta=datetime.timedelta(seconds=calls[-1][1][0])) + response = dict(status=dict(state="DONE", errors=[err], errorResult=err)) response["jobReference"] = data["jobReference"] - else: - response["jobReference"] = dict( - jobId=path.split("/")[-1], projectId="PROJECT" - ) - return response - - conn.api_request.side_effect = api_request - result = job.result() - assert result.total_rows == 1 - assert not responses # We made all the calls we expected to. - assert job.job_id != orig_job_id + return response + + conn.api_request.side_effect = api_request + + job = client.query("select 1") + orig_job_id = job.job_id + + with pytest.raises(google.api_core.exceptions.RetryError): + job.result() + + # We never got a successful job, so the job id never changed: + assert job.job_id == orig_job_id + + # We failed because we couldn't succeed after 120 seconds. + # But we can try again: + err2 = dict(reason="backendError") # We also retry on this + responses = [ + dict(status=dict(state="DONE", errors=[err2], errorResult=err2)), + dict(status=dict(state="DONE", errors=[err], errorResult=err)), + dict(status=dict(state="DONE", errors=[err2], errorResult=err2)), + dict(status=dict(state="DONE")), + dict(rows=[{"f": [{"v": "1"}]}], totalRows="1"), + ] + + def api_request(method, path, query_params=None, data=None, **kw): + calls = sleep.mock_calls + frozen_datetime.tick(delta=datetime.timedelta(seconds=calls[-1][1][0])) + response = responses.pop(0) + if data: + response["jobReference"] = data["jobReference"] + else: + response["jobReference"] = dict( + jobId=path.split("/")[-1], projectId="PROJECT" + ) + return response + + conn.api_request.side_effect = api_request + result = job.result() + assert result.total_rows == 1 + assert not responses # We made all the calls we expected to. + assert job.job_id != orig_job_id def test_raises_on_job_retry_on_query_with_non_retryable_jobs(client): From 3e2be7687030d07ba1a4ef034028ab8f2643c6d8 Mon Sep 17 00:00:00 2001 From: Anthonios Partheniou Date: Thu, 14 Dec 2023 20:27:23 -0500 Subject: [PATCH 1689/2016] ci: update required checks (#1749) * ci: update required checks * (test) remove retry from datetime_helpers path --------- Co-authored-by: Lingqing Gan --- .../google-cloud-bigquery/.github/sync-repo-settings.yaml | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/packages/google-cloud-bigquery/.github/sync-repo-settings.yaml b/packages/google-cloud-bigquery/.github/sync-repo-settings.yaml index 220c031b2864..6543d52850a9 100644 --- a/packages/google-cloud-bigquery/.github/sync-repo-settings.yaml +++ b/packages/google-cloud-bigquery/.github/sync-repo-settings.yaml @@ -11,10 +11,17 @@ branchProtectionRules: requiredStatusCheckContexts: - 'Kokoro' - 'Kokoro snippets-3.8' + - 'Kokoro snippets-3.12' + - 'Kokoro system-3.8' + - 'Kokoro system-3.12' - 'cla/google' - 'Samples - Lint' - 'Samples - Python 3.7' - 'Samples - Python 3.8' + - 'Samples - Python 3.9' + - 'Samples - Python 3.10' + - 'Samples - Python 3.11' + - 'Samples - Python 3.12' - pattern: v2 requiresLinearHistory: true requiresCodeOwnerReviews: true From a7e9c9cc626513599ebbae79b19d4a25272e668d Mon Sep 17 00:00:00 2001 From: Gaurang Shah Date: Mon, 18 Dec 2023 14:16:27 -0500 Subject: [PATCH 1690/2016] fix: load_table_from_dataframe for higher scale decimal (#1703) * fix: load_table_from_dataframe for higher scale decimal * Update test_client.py * fix test_load_table_from_dataframe_w_higher_scale_decimal128_datatype --------- Co-authored-by: Lingqing Gan --- .../google/cloud/bigquery/_pandas_helpers.py | 4 +- .../tests/unit/test_client.py | 43 +++++++++++++++++++ 2 files changed, 46 insertions(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/_pandas_helpers.py b/packages/google-cloud-bigquery/google/cloud/bigquery/_pandas_helpers.py index 380df7b1dffd..bcc869f156e5 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/_pandas_helpers.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/_pandas_helpers.py @@ -23,6 +23,7 @@ import warnings from typing import Any, Union + from google.cloud.bigquery import _pyarrow_helpers from google.cloud.bigquery import _versions_helpers from google.cloud.bigquery import schema @@ -485,7 +486,6 @@ def augment_schema(dataframe, current_bq_schema): # pytype: disable=attribute-error augmented_schema = [] unknown_type_fields = [] - for field in current_bq_schema: if field.field_type is not None: augmented_schema.append(field) @@ -515,6 +515,8 @@ def augment_schema(dataframe, current_bq_schema): else: detected_mode = field.mode detected_type = _pyarrow_helpers.arrow_scalar_ids_to_bq(arrow_table.type.id) + if detected_type == "NUMERIC" and arrow_table.type.scale > 9: + detected_type = "BIGNUMERIC" if detected_type is None: unknown_type_fields.append(field) diff --git a/packages/google-cloud-bigquery/tests/unit/test_client.py b/packages/google-cloud-bigquery/tests/unit/test_client.py index c8968adbb712..ad22e0ddb216 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_client.py +++ b/packages/google-cloud-bigquery/tests/unit/test_client.py @@ -8891,6 +8891,49 @@ def test_load_table_from_dataframe_with_csv_source_format(self): sent_config = load_table_from_file.mock_calls[0][2]["job_config"] assert sent_config.source_format == job.SourceFormat.CSV + @unittest.skipIf(pandas is None, "Requires `pandas`") + @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") + def test_load_table_from_dataframe_w_higher_scale_decimal128_datatype(self): + from google.cloud.bigquery.client import _DEFAULT_NUM_RETRIES + from google.cloud.bigquery import job + from google.cloud.bigquery.schema import SchemaField + from decimal import Decimal + + client = self._make_client() + dataframe = pandas.DataFrame({"x": [Decimal("0.1234567891")]}) + load_patch = mock.patch( + "google.cloud.bigquery.client.Client.load_table_from_file", autospec=True + ) + + get_table_patch = mock.patch( + "google.cloud.bigquery.client.Client.get_table", autospec=True + ) + with load_patch as load_table_from_file, get_table_patch: + client.load_table_from_dataframe( + dataframe, self.TABLE_REF, location=self.LOCATION + ) + + load_table_from_file.assert_called_once_with( + client, + mock.ANY, + self.TABLE_REF, + num_retries=_DEFAULT_NUM_RETRIES, + rewind=True, + size=mock.ANY, + job_id=mock.ANY, + job_id_prefix=None, + location=self.LOCATION, + project=None, + job_config=mock.ANY, + timeout=DEFAULT_TIMEOUT, + ) + + sent_config = load_table_from_file.mock_calls[0][2]["job_config"] + assert sent_config.source_format == job.SourceFormat.PARQUET + assert tuple(sent_config.schema) == ( + SchemaField("x", "BIGNUMERIC", "NULLABLE", None), + ) + def test_load_table_from_json_basic_use(self): from google.cloud.bigquery.client import _DEFAULT_NUM_RETRIES from google.cloud.bigquery import job From c583cf35e4de4d17563b1f0120a0d05044117f6f Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Tue, 19 Dec 2023 08:13:00 -0600 Subject: [PATCH 1691/2016] feat: support JSON type in `insert_rows` and as a scalar query parameter (#1757) Co-authored-by: Kira --- .../google/cloud/bigquery/_helpers.py | 9 ++++++++- .../tests/system/test_client.py | 7 ++++++- .../tests/system/test_query.py | 12 ++++++++++++ .../tests/unit/test__helpers.py | 16 ++++++++++++++++ 4 files changed, 42 insertions(+), 2 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py b/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py index 93b46341e9dd..4cf6dddac3a7 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py @@ -374,6 +374,13 @@ def _bytes_to_json(value): return value +def _json_to_json(value): + """Coerce 'value' to a BigQuery REST API representation.""" + if value is None: + return None + return json.dumps(value) + + def _timestamp_to_json_parameter(value): """Coerce 'value' to an JSON-compatible representation. @@ -439,7 +446,7 @@ def _time_to_json(value): "DATETIME": _datetime_to_json, "DATE": _date_to_json, "TIME": _time_to_json, - "JSON": _json_from_json, + "JSON": _json_to_json, # Make sure DECIMAL and BIGDECIMAL are handled, even though # requests for them should be converted to NUMERIC. Better safe # than sorry. diff --git a/packages/google-cloud-bigquery/tests/system/test_client.py b/packages/google-cloud-bigquery/tests/system/test_client.py index 7cea8cfa414f..92894455ab3f 100644 --- a/packages/google-cloud-bigquery/tests/system/test_client.py +++ b/packages/google-cloud-bigquery/tests/system/test_client.py @@ -2049,13 +2049,18 @@ def test_insert_rows_nested_nested(self): ), ], ), + SF("json_col", "JSON"), ] record = { "nested_string": "another string value", "nested_repeated": [0, 1, 2], "nested_record": {"nested_nested_string": "some deep insight"}, } - to_insert = [("Some value", record)] + json_record = { + "json_array": [1, 2, 3], + "json_object": {"alpha": "abc", "num": 123}, + } + to_insert = [("Some value", record, json_record)] table_id = "test_table" dataset = self.temp_dataset(_make_dataset_id("issue_2951")) table_arg = Table(dataset.table(table_id), schema=schema) diff --git a/packages/google-cloud-bigquery/tests/system/test_query.py b/packages/google-cloud-bigquery/tests/system/test_query.py index 723f927d7d92..b8e0c00da971 100644 --- a/packages/google-cloud-bigquery/tests/system/test_query.py +++ b/packages/google-cloud-bigquery/tests/system/test_query.py @@ -256,6 +256,18 @@ def test_query_statistics(bigquery_client, query_api_method): ) ], ), + pytest.param( + "SELECT @json", + {"alpha": "abc", "num": [1, 2, 3]}, + [ + ScalarQueryParameter( + name="json", + type_="JSON", + value={"alpha": "abc", "num": [1, 2, 3]}, + ) + ], + id="scalar-json", + ), ( "SELECT @naive_time", datetime.time(12, 41, 9, 62500), diff --git a/packages/google-cloud-bigquery/tests/unit/test__helpers.py b/packages/google-cloud-bigquery/tests/unit/test__helpers.py index 7bf55baeb584..87ab46669500 100644 --- a/packages/google-cloud-bigquery/tests/unit/test__helpers.py +++ b/packages/google-cloud-bigquery/tests/unit/test__helpers.py @@ -886,6 +886,16 @@ def test_w_known_field_type(self): converted = self._call_fut(field, original) self.assertEqual(converted, str(original)) + def test_w_scalar_none(self): + import google.cloud.bigquery._helpers as module_under_test + + scalar_types = module_under_test._SCALAR_VALUE_TO_JSON_ROW.keys() + for type_ in scalar_types: + field = _make_field(type_) + original = None + converted = self._call_fut(field, original) + self.assertIsNone(converted, msg=f"{type_} did not return None") + class Test_single_field_to_json(unittest.TestCase): def _call_fut(self, field, value): @@ -921,6 +931,12 @@ def test_w_scalar_ignores_mode(self): converted = self._call_fut(field, original) self.assertEqual(converted, original) + def test_w_scalar_json(self): + field = _make_field("JSON") + original = {"alpha": "abc", "num": [1, 2, 3]} + converted = self._call_fut(field, original) + self.assertEqual(converted, json.dumps(original)) + class Test_repeated_field_to_json(unittest.TestCase): def _call_fut(self, field, value): From 3995ed36a13b32084e3cfd3ac0ae8b48a2bad5e0 Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Tue, 19 Dec 2023 16:00:29 -0600 Subject: [PATCH 1692/2016] perf: DB-API uses more efficient `query_and_wait` when no job ID is provided (#1747) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Thank you for opening a Pull Request! Before submitting your PR, there are a few things you can do to make sure it goes smoothly: - [ ] Make sure to open an issue as a [bug/issue](https://togithub.com/googleapis/python-bigquery/issues/new/choose) before writing your code! That way we can discuss the change, evaluate designs, and agree on the general idea - [ ] Ensure the tests and linter pass - [ ] Code coverage does not decrease (if any source code was changed) - [ ] Appropriate docs were updated (if necessary) Fixes #1745 🦕 --- .../google/cloud/bigquery/_job_helpers.py | 1 + .../google/cloud/bigquery/client.py | 6 + .../google/cloud/bigquery/dbapi/cursor.py | 122 ++++++++------- .../google/cloud/bigquery/job/query.py | 2 + .../google/cloud/bigquery/table.py | 32 +++- .../tests/unit/test_dbapi_cursor.py | 139 ++++++++++++------ .../tests/unit/test_table.py | 32 ++-- 7 files changed, 219 insertions(+), 115 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/_job_helpers.py b/packages/google-cloud-bigquery/google/cloud/bigquery/_job_helpers.py index 095de4faa7ae..7356331b8ff1 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/_job_helpers.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/_job_helpers.py @@ -491,6 +491,7 @@ def do_query(): job_id=query_results.job_id, query_id=query_results.query_id, project=query_results.project, + num_dml_affected_rows=query_results.num_dml_affected_rows, ) if job_retry is not None: diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py index 284ccddb5972..182319646dc4 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py @@ -3963,6 +3963,7 @@ def _list_rows_from_query_results( timeout: TimeoutType = DEFAULT_TIMEOUT, query_id: Optional[str] = None, first_page_response: Optional[Dict[str, Any]] = None, + num_dml_affected_rows: Optional[int] = None, ) -> RowIterator: """List the rows of a completed query. See @@ -4007,6 +4008,10 @@ def _list_rows_from_query_results( and not guaranteed to be populated. first_page_response (Optional[dict]): API response for the first page of results (if available). + num_dml_affected_rows (Optional[int]): + If this RowIterator is the result of a DML query, the number of + rows that were affected. + Returns: google.cloud.bigquery.table.RowIterator: Iterator of row data @@ -4047,6 +4052,7 @@ def _list_rows_from_query_results( job_id=job_id, query_id=query_id, first_page_response=first_page_response, + num_dml_affected_rows=num_dml_affected_rows, ) return row_iterator diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/dbapi/cursor.py b/packages/google-cloud-bigquery/google/cloud/bigquery/dbapi/cursor.py index 0dc8f56ab0e4..014a6825ea6b 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/dbapi/cursor.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/dbapi/cursor.py @@ -14,11 +14,12 @@ """Cursor for the Google BigQuery DB-API.""" +from __future__ import annotations + import collections from collections import abc as collections_abc -import copy -import logging import re +from typing import Optional try: from google.cloud.bigquery_storage import ArrowSerializationOptions @@ -34,8 +35,6 @@ import google.cloud.exceptions # type: ignore -_LOGGER = logging.getLogger(__name__) - # Per PEP 249: A 7-item sequence containing information describing one result # column. The first two items (name and type_code) are mandatory, the other # five are optional and are set to None if no meaningful values can be @@ -76,18 +75,31 @@ def __init__(self, connection): # most appropriate size. self.arraysize = None self._query_data = None - self._query_job = None + self._query_rows = None self._closed = False @property - def query_job(self): - """google.cloud.bigquery.job.query.QueryJob: The query job created by - the last ``execute*()`` call. + def query_job(self) -> Optional[job.QueryJob]: + """google.cloud.bigquery.job.query.QueryJob | None: The query job + created by the last ``execute*()`` call, if a query job was created. .. note:: If the last ``execute*()`` call was ``executemany()``, this is the last job created by ``executemany()``.""" - return self._query_job + rows = self._query_rows + + if rows is None: + return None + + job_id = rows.job_id + project = rows.project + location = rows.location + client = self.connection._client + + if job_id is None: + return None + + return client.get_job(job_id, location=location, project=project) def close(self): """Mark the cursor as closed, preventing its further use.""" @@ -117,8 +129,8 @@ def _set_description(self, schema): for field in schema ) - def _set_rowcount(self, query_results): - """Set the rowcount from query results. + def _set_rowcount(self, rows): + """Set the rowcount from a RowIterator. Normally, this sets rowcount to the number of rows returned by the query, but if it was a DML statement, it sets rowcount to the number @@ -129,10 +141,10 @@ def _set_rowcount(self, query_results): Results of a query. """ total_rows = 0 - num_dml_affected_rows = query_results.num_dml_affected_rows + num_dml_affected_rows = rows.num_dml_affected_rows - if query_results.total_rows is not None and query_results.total_rows > 0: - total_rows = query_results.total_rows + if rows.total_rows is not None and rows.total_rows > 0: + total_rows = rows.total_rows if num_dml_affected_rows is not None and num_dml_affected_rows > 0: total_rows = num_dml_affected_rows self.rowcount = total_rows @@ -165,9 +177,10 @@ def execute(self, operation, parameters=None, job_id=None, job_config=None): parameters (Union[Mapping[str, Any], Sequence[Any]]): (Optional) dictionary or sequence of parameter values. - job_id (str): - (Optional) The job_id to use. If not set, a job ID - is generated at random. + job_id (str | None): + (Optional and discouraged) The job ID to use when creating + the query job. For best performance and reliability, manually + setting a job ID is discouraged. job_config (google.cloud.bigquery.job.QueryJobConfig): (Optional) Extra configuration options for the query job. @@ -181,7 +194,7 @@ def _execute( self, formatted_operation, parameters, job_id, job_config, parameter_types ): self._query_data = None - self._query_job = None + self._query_results = None client = self.connection._client # The DB-API uses the pyformat formatting, since the way BigQuery does @@ -190,33 +203,35 @@ def _execute( # libraries. query_parameters = _helpers.to_query_parameters(parameters, parameter_types) - if client._default_query_job_config: - if job_config: - config = job_config._fill_from_default(client._default_query_job_config) - else: - config = copy.deepcopy(client._default_query_job_config) - else: - config = job_config or job.QueryJobConfig(use_legacy_sql=False) - + config = job_config or job.QueryJobConfig() config.query_parameters = query_parameters - self._query_job = client.query( - formatted_operation, job_config=config, job_id=job_id - ) - if self._query_job.dry_run: - self._set_description(schema=None) - self.rowcount = 0 - return - - # Wait for the query to finish. + # Start the query and wait for the query to finish. try: - self._query_job.result() + if job_id is not None: + rows = client.query( + formatted_operation, + job_config=job_config, + job_id=job_id, + ).result( + page_size=self.arraysize, + ) + else: + rows = client.query_and_wait( + formatted_operation, + job_config=config, + page_size=self.arraysize, + ) except google.cloud.exceptions.GoogleCloudError as exc: raise exceptions.DatabaseError(exc) - query_results = self._query_job._query_results - self._set_rowcount(query_results) - self._set_description(query_results.schema) + self._query_rows = rows + self._set_description(rows.schema) + + if config.dry_run: + self.rowcount = 0 + else: + self._set_rowcount(rows) def executemany(self, operation, seq_of_parameters): """Prepare and execute a database operation multiple times. @@ -250,25 +265,26 @@ def _try_fetch(self, size=None): Mutates self to indicate that iteration has started. """ - if self._query_job is None: + if self._query_data is not None: + # Already started fetching the data. + return + + rows = self._query_rows + if rows is None: raise exceptions.InterfaceError( "No query results: execute() must be called before fetch." ) - if self._query_job.dry_run: - self._query_data = iter([]) + bqstorage_client = self.connection._bqstorage_client + if rows._should_use_bqstorage( + bqstorage_client, + create_bqstorage_client=False, + ): + rows_iterable = self._bqstorage_fetch(bqstorage_client) + self._query_data = _helpers.to_bq_table_rows(rows_iterable) return - if self._query_data is None: - bqstorage_client = self.connection._bqstorage_client - - if bqstorage_client is not None: - rows_iterable = self._bqstorage_fetch(bqstorage_client) - self._query_data = _helpers.to_bq_table_rows(rows_iterable) - return - - rows_iter = self._query_job.result(page_size=self.arraysize) - self._query_data = iter(rows_iter) + self._query_data = iter(rows) def _bqstorage_fetch(self, bqstorage_client): """Start fetching data with the BigQuery Storage API. @@ -290,7 +306,7 @@ def _bqstorage_fetch(self, bqstorage_client): # bigquery_storage can indeed be imported here without errors. from google.cloud import bigquery_storage - table_reference = self._query_job.destination + table_reference = self._query_rows._table requested_session = bigquery_storage.types.ReadSession( table=table_reference.to_bqstorage(), diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/job/query.py b/packages/google-cloud-bigquery/google/cloud/bigquery/job/query.py index 4a529f9497e9..ac0c51973026 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/job/query.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/job/query.py @@ -1614,6 +1614,7 @@ def do_get_result(): project=self.project, job_id=self.job_id, query_id=self.query_id, + num_dml_affected_rows=self._query_results.num_dml_affected_rows, ) # We know that there's at least 1 row, so only treat the response from @@ -1639,6 +1640,7 @@ def do_get_result(): timeout=timeout, query_id=self.query_id, first_page_response=first_page_response, + num_dml_affected_rows=self._query_results.num_dml_affected_rows, ) rows._preserve_order = _contains_order_by(self.query) return rows diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py index 70e60171417c..0ae7851a17a6 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py @@ -1566,6 +1566,7 @@ def __init__( job_id: Optional[str] = None, query_id: Optional[str] = None, project: Optional[str] = None, + num_dml_affected_rows: Optional[int] = None, ): super(RowIterator, self).__init__( client, @@ -1592,6 +1593,7 @@ def __init__( self._job_id = job_id self._query_id = query_id self._project = project + self._num_dml_affected_rows = num_dml_affected_rows @property def _billing_project(self) -> Optional[str]: @@ -1616,6 +1618,16 @@ def location(self) -> Optional[str]: """ return self._location + @property + def num_dml_affected_rows(self) -> Optional[int]: + """If this RowIterator is the result of a DML query, the number of + rows that were affected. + + See: + https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs/query#body.QueryResponse.FIELDS.num_dml_affected_rows + """ + return self._num_dml_affected_rows + @property def project(self) -> Optional[str]: """GCP Project ID where these rows are read from.""" @@ -1635,7 +1647,10 @@ def _is_almost_completely_cached(self): This is useful to know, because we can avoid alternative download mechanisms. """ - if self._first_page_response is None: + if ( + not hasattr(self, "_first_page_response") + or self._first_page_response is None + ): return False total_cached_rows = len(self._first_page_response.get(self._items_key, [])) @@ -1655,7 +1670,7 @@ def _is_almost_completely_cached(self): return False - def _validate_bqstorage(self, bqstorage_client, create_bqstorage_client): + def _should_use_bqstorage(self, bqstorage_client, create_bqstorage_client): """Returns True if the BigQuery Storage API can be used. Returns: @@ -1669,8 +1684,9 @@ def _validate_bqstorage(self, bqstorage_client, create_bqstorage_client): if self._table is None: return False - # The developer is manually paging through results if this is set. - if self.next_page_token is not None: + # The developer has already started paging through results if + # next_page_token is set. + if hasattr(self, "next_page_token") and self.next_page_token is not None: return False if self._is_almost_completely_cached(): @@ -1726,7 +1742,7 @@ def schema(self): @property def total_rows(self): - """int: The total number of rows in the table.""" + """int: The total number of rows in the table or query results.""" return self._total_rows def _maybe_warn_max_results( @@ -1752,7 +1768,7 @@ def _maybe_warn_max_results( def _to_page_iterable( self, bqstorage_download, tabledata_list_download, bqstorage_client=None ): - if not self._validate_bqstorage(bqstorage_client, False): + if not self._should_use_bqstorage(bqstorage_client, False): bqstorage_client = None result_pages = ( @@ -1882,7 +1898,7 @@ def to_arrow( self._maybe_warn_max_results(bqstorage_client) - if not self._validate_bqstorage(bqstorage_client, create_bqstorage_client): + if not self._should_use_bqstorage(bqstorage_client, create_bqstorage_client): create_bqstorage_client = False bqstorage_client = None @@ -2223,7 +2239,7 @@ def to_dataframe( self._maybe_warn_max_results(bqstorage_client) - if not self._validate_bqstorage(bqstorage_client, create_bqstorage_client): + if not self._should_use_bqstorage(bqstorage_client, create_bqstorage_client): create_bqstorage_client = False bqstorage_client = None diff --git a/packages/google-cloud-bigquery/tests/unit/test_dbapi_cursor.py b/packages/google-cloud-bigquery/tests/unit/test_dbapi_cursor.py index fc6ea388208e..69d33fe17de1 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_dbapi_cursor.py +++ b/packages/google-cloud-bigquery/tests/unit/test_dbapi_cursor.py @@ -12,12 +12,15 @@ # See the License for the specific language governing permissions and # limitations under the License. +import functools import mock import operator as op import unittest import pytest +import google.cloud.bigquery.table as bq_table + try: import pyarrow except ImportError: # pragma: NO COVER @@ -48,27 +51,45 @@ def _mock_client( rows=None, schema=None, num_dml_affected_rows=None, - default_query_job_config=None, dry_run_job=False, total_bytes_processed=0, + total_rows=None, + destination_table="test-project.test_dataset.test_table", ): from google.cloud.bigquery import client - if rows is None: + if total_rows is None: total_rows = 0 - else: - total_rows = len(rows) + if rows is not None: + total_rows = len(rows) + table = bq_table.TableReference.from_string(destination_table) mock_client = mock.create_autospec(client.Client) - mock_client.query.return_value = self._mock_job( + mock_job = self._mock_job( total_rows=total_rows, schema=schema, num_dml_affected_rows=num_dml_affected_rows, dry_run=dry_run_job, total_bytes_processed=total_bytes_processed, - rows=rows, + rows=self._mock_rows( + rows, + total_rows=total_rows, + schema=schema, + num_dml_affected_rows=num_dml_affected_rows, + table=table, + ), + ) + mock_client.get_job.return_value = mock_job + mock_client.query.return_value = mock_job + mock_client.query_and_wait.return_value = self._mock_rows( + rows, + total_rows=total_rows, + schema=schema, + num_dml_affected_rows=num_dml_affected_rows, + # Sometimes all the results will be available in the initial + # response, in which case may be no job and no destination table. + table=table if rows is not None and total_rows > len(rows) else None, ) - mock_client._default_query_job_config = default_query_job_config # Assure that the REST client gets used, not the BQ Storage client. mock_client._ensure_bqstorage_client.return_value = None @@ -106,9 +127,6 @@ def _mock_job( ): from google.cloud.bigquery import job - if rows is None: - rows = [] - mock_job = mock.create_autospec(job.QueryJob) mock_job.error_result = None mock_job.state = "DONE" @@ -136,6 +154,30 @@ def _mock_job( return mock_job + def _mock_rows( + self, rows, total_rows=0, schema=None, num_dml_affected_rows=None, table=None + ): + mock_rows = mock.create_autospec(bq_table.RowIterator, instance=True) + mock_rows.__iter__.return_value = rows + mock_rows._table = table + mock_rows._should_use_bqstorage = functools.partial( + bq_table.RowIterator._should_use_bqstorage, + mock_rows, + ) + mock_rows._is_almost_completely_cached = functools.partial( + bq_table.RowIterator._is_almost_completely_cached, + mock_rows, + ) + mock_rows.max_results = None + type(mock_rows).job_id = mock.PropertyMock(return_value="test-job-id") + type(mock_rows).location = mock.PropertyMock(return_value="test-location") + type(mock_rows).num_dml_affected_rows = mock.PropertyMock( + return_value=num_dml_affected_rows + ) + type(mock_rows).total_rows = mock.PropertyMock(return_value=total_rows) + type(mock_rows).schema = mock.PropertyMock(return_value=schema) + return mock_rows + def _mock_results(self, total_rows=0, schema=None, num_dml_affected_rows=None): from google.cloud.bigquery import query @@ -284,12 +326,15 @@ def test_fetchall_w_row(self): @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_fetchall_w_bqstorage_client_fetch_success(self): from google.cloud.bigquery import dbapi - from google.cloud.bigquery import table # use unordered data to also test any non-determenistic key order in dicts row_data = [ - table.Row([1.4, 1.1, 1.3, 1.2], {"bar": 3, "baz": 2, "foo": 1, "quux": 0}), - table.Row([2.4, 2.1, 2.3, 2.2], {"bar": 3, "baz": 2, "foo": 1, "quux": 0}), + bq_table.Row( + [1.4, 1.1, 1.3, 1.2], {"bar": 3, "baz": 2, "foo": 1, "quux": 0} + ), + bq_table.Row( + [2.4, 2.1, 2.3, 2.2], {"bar": 3, "baz": 2, "foo": 1, "quux": 0} + ), ] bqstorage_streamed_rows = [ { @@ -341,7 +386,12 @@ def test_fetchall_w_bqstorage_client_fetch_success(self): def test_fetchall_w_bqstorage_client_fetch_no_rows(self): from google.cloud.bigquery import dbapi - mock_client = self._mock_client(rows=[]) + mock_client = self._mock_client( + rows=[], + # Assume there are many more pages of data to look at so that the + # BQ Storage API is necessary. + total_rows=1000, + ) mock_bqstorage_client = self._mock_bqstorage_client(stream_count=0) mock_client._ensure_bqstorage_client.return_value = mock_bqstorage_client @@ -365,14 +415,18 @@ def test_fetchall_w_bqstorage_client_fetch_no_rows(self): ) def test_fetchall_w_bqstorage_client_fetch_error_no_fallback(self): from google.cloud.bigquery import dbapi - from google.cloud.bigquery import table - row_data = [table.Row([1.1, 1.2], {"foo": 0, "bar": 1})] + row_data = [bq_table.Row([1.1, 1.2], {"foo": 0, "bar": 1})] def fake_ensure_bqstorage_client(bqstorage_client=None, **kwargs): return bqstorage_client - mock_client = self._mock_client(rows=row_data) + mock_client = self._mock_client( + rows=row_data, + # Assume there are many more pages of data to look at so that the + # BQ Storage API is necessary. + total_rows=1000, + ) mock_client._ensure_bqstorage_client.side_effect = fake_ensure_bqstorage_client mock_bqstorage_client = self._mock_bqstorage_client( stream_count=1, @@ -400,16 +454,21 @@ def fake_ensure_bqstorage_client(bqstorage_client=None, **kwargs): @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_fetchall_w_bqstorage_client_no_arrow_compression(self): from google.cloud.bigquery import dbapi - from google.cloud.bigquery import table # Use unordered data to also test any non-determenistic key order in dicts. - row_data = [table.Row([1.2, 1.1], {"bar": 1, "foo": 0})] + row_data = [bq_table.Row([1.2, 1.1], {"bar": 1, "foo": 0})] bqstorage_streamed_rows = [{"bar": _to_pyarrow(1.2), "foo": _to_pyarrow(1.1)}] def fake_ensure_bqstorage_client(bqstorage_client=None, **kwargs): return bqstorage_client - mock_client = self._mock_client(rows=row_data) + mock_client = self._mock_client( + rows=row_data, + # Assume there are many more pages of data to look at so that the + # BQ Storage API is necessary. + total_rows=1000, + destination_table="P.DS.T", + ) mock_client._ensure_bqstorage_client.side_effect = fake_ensure_bqstorage_client mock_bqstorage_client = self._mock_bqstorage_client( stream_count=1, @@ -459,12 +518,8 @@ def test_execute_custom_job_id(self): def test_execute_w_default_config(self): from google.cloud.bigquery.dbapi import connect - from google.cloud.bigquery import job - default_config = job.QueryJobConfig(use_legacy_sql=False, flatten_results=True) - client = self._mock_client( - rows=[], num_dml_affected_rows=0, default_query_job_config=default_config - ) + client = self._mock_client(rows=[], num_dml_affected_rows=0) connection = connect(client) cursor = connection.cursor() @@ -472,10 +527,7 @@ def test_execute_w_default_config(self): _, kwargs = client.query.call_args used_config = kwargs["job_config"] - expected_config = job.QueryJobConfig( - use_legacy_sql=False, flatten_results=True, query_parameters=[] - ) - self.assertEqual(used_config._properties, expected_config._properties) + self.assertIsNone(used_config) def test_execute_custom_job_config_wo_default_config(self): from google.cloud.bigquery.dbapi import connect @@ -495,10 +547,7 @@ def test_execute_custom_job_config_w_default_config(self): from google.cloud.bigquery.dbapi import connect from google.cloud.bigquery import job - default_config = job.QueryJobConfig(use_legacy_sql=False, flatten_results=True) - client = self._mock_client( - rows=[], num_dml_affected_rows=0, default_query_job_config=default_config - ) + client = self._mock_client(rows=[], num_dml_affected_rows=0) connection = connect(client) cursor = connection.cursor() config = job.QueryJobConfig(use_legacy_sql=True) @@ -509,7 +558,6 @@ def test_execute_custom_job_config_w_default_config(self): used_config = kwargs["job_config"] expected_config = job.QueryJobConfig( use_legacy_sql=True, # the config passed to execute() prevails - flatten_results=True, # from the default query_parameters=[], ) self.assertEqual(used_config._properties, expected_config._properties) @@ -576,7 +624,7 @@ def test_execute_w_query_dry_run(self): connection = dbapi.connect( self._mock_client( - rows=[("hello", "world", 1), ("howdy", "y'all", 2)], + rows=[], schema=[ SchemaField("a", "STRING", mode="NULLABLE"), SchemaField("b", "STRING", mode="REQUIRED"), @@ -594,7 +642,7 @@ def test_execute_w_query_dry_run(self): ) self.assertEqual(cursor.rowcount, 0) - self.assertIsNone(cursor.description) + self.assertIsNotNone(cursor.description) rows = cursor.fetchall() self.assertEqual(list(rows), []) @@ -602,16 +650,11 @@ def test_execute_raises_if_result_raises(self): import google.cloud.exceptions from google.cloud.bigquery import client - from google.cloud.bigquery import job from google.cloud.bigquery.dbapi import connect from google.cloud.bigquery.dbapi import exceptions - job = mock.create_autospec(job.QueryJob) - job.dry_run = None - job.result.side_effect = google.cloud.exceptions.GoogleCloudError("") client = mock.create_autospec(client.Client) - client._default_query_job_config = None - client.query.return_value = job + client.query_and_wait.side_effect = google.cloud.exceptions.GoogleCloudError("") connection = connect(client) cursor = connection.cursor() @@ -677,6 +720,18 @@ def test_query_job_w_execute(self): cursor.execute("SELECT 1;") self.assertIsInstance(cursor.query_job, QueryJob) + def test_query_job_w_execute_no_job(self): + from google.cloud.bigquery import dbapi + + connection = dbapi.connect(self._mock_client()) + cursor = connection.cursor() + cursor.execute("SELECT 1;") + + # Simulate jobless execution. + type(cursor._query_rows).job_id = mock.PropertyMock(return_value=None) + + self.assertIsNone(cursor.query_job) + def test_query_job_w_executemany(self): from google.cloud.bigquery import dbapi, QueryJob diff --git a/packages/google-cloud-bigquery/tests/unit/test_table.py b/packages/google-cloud-bigquery/tests/unit/test_table.py index 9b3d4fe842ef..4a85a0823fde 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_table.py +++ b/packages/google-cloud-bigquery/tests/unit/test_table.py @@ -2133,6 +2133,14 @@ def test_location_present(self): rows = self._make_one(location="asia-northeast1") self.assertEqual(rows.location, "asia-northeast1") + def test_num_dml_affected_rows_missing(self): + rows = self._make_one() + self.assertIsNone(rows.num_dml_affected_rows) + + def test_num_dml_affected_rows_present(self): + rows = self._make_one(num_dml_affected_rows=1234) + self.assertEqual(rows.num_dml_affected_rows, 1234) + def test_project_missing(self): rows = self._make_one() self.assertIsNone(rows.project) @@ -2334,11 +2342,11 @@ def test__is_almost_completely_cached_returns_true_with_no_rows_remaining(self): iterator = self._make_one(first_page_response=first_page) self.assertTrue(iterator._is_almost_completely_cached()) - def test__validate_bqstorage_returns_false_when_completely_cached(self): + def test__should_use_bqstorage_returns_false_when_completely_cached(self): first_page = {"rows": []} iterator = self._make_one(first_page_response=first_page) self.assertFalse( - iterator._validate_bqstorage( + iterator._should_use_bqstorage( bqstorage_client=None, create_bqstorage_client=True ) ) @@ -2346,32 +2354,32 @@ def test__validate_bqstorage_returns_false_when_completely_cached(self): @unittest.skipIf( bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" ) - def test__validate_bqstorage_returns_true_if_no_cached_results(self): + def test__should_use_bqstorage_returns_true_if_no_cached_results(self): iterator = self._make_one(first_page_response=None) # not cached - result = iterator._validate_bqstorage( + result = iterator._should_use_bqstorage( bqstorage_client=None, create_bqstorage_client=True ) self.assertTrue(result) - def test__validate_bqstorage_returns_false_if_page_token_set(self): + def test__should_use_bqstorage_returns_false_if_page_token_set(self): iterator = self._make_one( page_token="abc", first_page_response=None # not cached ) - result = iterator._validate_bqstorage( + result = iterator._should_use_bqstorage( bqstorage_client=None, create_bqstorage_client=True ) self.assertFalse(result) - def test__validate_bqstorage_returns_false_if_max_results_set(self): + def test__should_use_bqstorage_returns_false_if_max_results_set(self): iterator = self._make_one( max_results=10, first_page_response=None # not cached ) - result = iterator._validate_bqstorage( + result = iterator._should_use_bqstorage( bqstorage_client=None, create_bqstorage_client=True ) self.assertFalse(result) - def test__validate_bqstorage_returns_false_if_missing_dependency(self): + def test__should_use_bqstorage_returns_false_if_missing_dependency(self): iterator = self._make_one(first_page_response=None) # not cached def fail_bqstorage_import(name, globals, locals, fromlist, level): @@ -2383,7 +2391,7 @@ def fail_bqstorage_import(name, globals, locals, fromlist, level): no_bqstorage = maybe_fail_import(predicate=fail_bqstorage_import) with no_bqstorage: - result = iterator._validate_bqstorage( + result = iterator._should_use_bqstorage( bqstorage_client=None, create_bqstorage_client=True ) @@ -2392,7 +2400,7 @@ def fail_bqstorage_import(name, globals, locals, fromlist, level): @unittest.skipIf( bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" ) - def test__validate_bqstorage_returns_false_w_warning_if_obsolete_version(self): + def test__should_use_bqstorage_returns_false_w_warning_if_obsolete_version(self): iterator = self._make_one(first_page_response=None) # not cached patcher = mock.patch( @@ -2400,7 +2408,7 @@ def test__validate_bqstorage_returns_false_w_warning_if_obsolete_version(self): side_effect=exceptions.LegacyBigQueryStorageError("BQ Storage too old"), ) with patcher, warnings.catch_warnings(record=True) as warned: - result = iterator._validate_bqstorage( + result = iterator._should_use_bqstorage( bqstorage_client=None, create_bqstorage_client=True ) From a4ba7484d22b161d44efc68b01705446c8d28bb5 Mon Sep 17 00:00:00 2001 From: Patrick Marx Date: Thu, 21 Dec 2023 07:05:41 -0800 Subject: [PATCH 1693/2016] Update README.rst (#1743) Move instructions off deprecated module. Co-authored-by: Chalmer Lowe --- packages/google-cloud-bigquery/README.rst | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/packages/google-cloud-bigquery/README.rst b/packages/google-cloud-bigquery/README.rst index 46f35e716b14..f81adc4b90d9 100644 --- a/packages/google-cloud-bigquery/README.rst +++ b/packages/google-cloud-bigquery/README.rst @@ -117,7 +117,7 @@ the BigQuery client the following PyPI packages need to be installed: .. code-block:: console - pip install google-cloud-bigquery[opentelemetry] opentelemetry-exporter-google-cloud + pip install google-cloud-bigquery[opentelemetry] opentelemetry-exporter-gcp-trace After installation, OpenTelemetry can be used in the BigQuery client and in BigQuery jobs. First, however, an exporter must be @@ -128,12 +128,11 @@ example of this can be found here: from opentelemetry import trace from opentelemetry.sdk.trace import TracerProvider - from opentelemetry.sdk.trace.export import BatchExportSpanProcessor + from opentelemetry.sdk.trace.export import BatchSpanProcessor from opentelemetry.exporter.cloud_trace import CloudTraceSpanExporter + tracer_provider = TracerProvider() + tracer_provider = BatchSpanProcessor(CloudTraceSpanExporter()) trace.set_tracer_provider(TracerProvider()) - trace.get_tracer_provider().add_span_processor( - BatchExportSpanProcessor(CloudTraceSpanExporter()) - ) In this example all tracing data will be published to the Google `Cloud Trace`_ console. For more information on OpenTelemetry, please consult the `OpenTelemetry documentation`_. From 5af079d8e248ca42c534a3d66b056b38e77db984 Mon Sep 17 00:00:00 2001 From: Chalmer Lowe Date: Fri, 5 Jan 2024 12:03:25 -0500 Subject: [PATCH 1694/2016] fix: Due to upstream change in dataset, updates expected results (#1761) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Due to upstream change in dataset, updates expected results * 🦉 Updates from OwlBot post-processor See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md --------- Co-authored-by: Owl Bot --- .../google-cloud-bigquery/tests/system/test_client.py | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/packages/google-cloud-bigquery/tests/system/test_client.py b/packages/google-cloud-bigquery/tests/system/test_client.py index 92894455ab3f..d7e56f7ffa25 100644 --- a/packages/google-cloud-bigquery/tests/system/test_client.py +++ b/packages/google-cloud-bigquery/tests/system/test_client.py @@ -1781,7 +1781,6 @@ def test_dbapi_fetch_w_bqstorage_client_large_result_set(self): ) result_rows = [cursor.fetchone(), cursor.fetchone(), cursor.fetchone()] - field_name = operator.itemgetter(0) fetched_data = [sorted(row.items(), key=field_name) for row in result_rows] # Since DB API is not thread safe, only a single result stream should be @@ -1789,11 +1788,6 @@ def test_dbapi_fetch_w_bqstorage_client_large_result_set(self): # in the sorted order. expected_data = [ - [ - ("by", "pg"), - ("id", 1), - ("timestamp", datetime.datetime(2006, 10, 9, 18, 21, 51, tzinfo=UTC)), - ], [ ("by", "phyllis"), ("id", 2), @@ -1804,6 +1798,11 @@ def test_dbapi_fetch_w_bqstorage_client_large_result_set(self): ("id", 3), ("timestamp", datetime.datetime(2006, 10, 9, 18, 40, 33, tzinfo=UTC)), ], + [ + ("by", "onebeerdave"), + ("id", 4), + ("timestamp", datetime.datetime(2006, 10, 9, 18, 47, 42, tzinfo=UTC)), + ], ] self.assertEqual(fetched_data, expected_data) From bf09d8cd4fad4e914cb2dea2a69932f5be0d531d Mon Sep 17 00:00:00 2001 From: Lingqing Gan Date: Mon, 8 Jan 2024 11:00:43 -0800 Subject: [PATCH 1695/2016] feat: support RANGE in schema (#1746) * feat: support RANGE in schema * lint * fix python 3.7 error * remove unused test method * address comments * add system test * correct range json schema * json format * change system test to adjust to upstream table * fix systest * remove insert row with range * systest * add unit test * fix mypy error * error * address comments --- .../google/cloud/bigquery/__init__.py | 2 + .../google/cloud/bigquery/schema.py | 73 +++++++++++++++- .../tests/data/schema.json | 8 ++ .../tests/unit/test_schema.py | 84 +++++++++++++++++++ 4 files changed, 166 insertions(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/__init__.py b/packages/google-cloud-bigquery/google/cloud/bigquery/__init__.py index 72576e6084d1..1ea056eb8072 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/__init__.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/__init__.py @@ -96,6 +96,7 @@ from google.cloud.bigquery.routine import RemoteFunctionOptions from google.cloud.bigquery.schema import PolicyTagList from google.cloud.bigquery.schema import SchemaField +from google.cloud.bigquery.schema import FieldElementType from google.cloud.bigquery.standard_sql import StandardSqlDataType from google.cloud.bigquery.standard_sql import StandardSqlField from google.cloud.bigquery.standard_sql import StandardSqlStructType @@ -158,6 +159,7 @@ "RemoteFunctionOptions", # Shared helpers "SchemaField", + "FieldElementType", "PolicyTagList", "UDFResource", "ExternalConfig", diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/schema.py b/packages/google-cloud-bigquery/google/cloud/bigquery/schema.py index 20a1bc92faca..f5b03cbef400 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/schema.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/schema.py @@ -16,7 +16,7 @@ import collections import enum -from typing import Any, Dict, Iterable, Optional, Union +from typing import Any, Dict, Iterable, Optional, Union, cast from google.cloud.bigquery import standard_sql from google.cloud.bigquery.enums import StandardSqlTypeNames @@ -66,6 +66,46 @@ class _DefaultSentinel(enum.Enum): _DEFAULT_VALUE = _DefaultSentinel.DEFAULT_VALUE +class FieldElementType(object): + """Represents the type of a field element. + + Args: + element_type (str): The type of a field element. + """ + + def __init__(self, element_type: str): + self._properties = {} + self._properties["type"] = element_type.upper() + + @property + def element_type(self): + return self._properties.get("type") + + @classmethod + def from_api_repr(cls, api_repr: Optional[dict]) -> Optional["FieldElementType"]: + """Factory: construct a FieldElementType given its API representation. + + Args: + api_repr (Dict[str, str]): field element type as returned from + the API. + + Returns: + google.cloud.bigquery.FieldElementType: + Python object, as parsed from ``api_repr``. + """ + if not api_repr: + return None + return cls(api_repr["type"].upper()) + + def to_api_repr(self) -> dict: + """Construct the API resource representation of this field element type. + + Returns: + Dict[str, str]: Field element type represented as an API resource. + """ + return self._properties + + class SchemaField(object): """Describe a single field within a table schema. @@ -117,6 +157,12 @@ class SchemaField(object): - Struct or array composed with the above allowed functions, for example: "[CURRENT_DATE(), DATE '2020-01-01'"] + + range_element_type: FieldElementType, str, Optional + The subtype of the RANGE, if the type of this field is RANGE. If + the type is RANGE, this field is required. Possible values for the + field element type of a RANGE include `DATE`, `DATETIME` and + `TIMESTAMP`. """ def __init__( @@ -131,6 +177,7 @@ def __init__( precision: Union[int, _DefaultSentinel] = _DEFAULT_VALUE, scale: Union[int, _DefaultSentinel] = _DEFAULT_VALUE, max_length: Union[int, _DefaultSentinel] = _DEFAULT_VALUE, + range_element_type: Union[FieldElementType, str, None] = None, ): self._properties: Dict[str, Any] = { "name": name, @@ -152,6 +199,11 @@ def __init__( self._properties["policyTags"] = ( policy_tags.to_api_repr() if policy_tags is not None else None ) + if isinstance(range_element_type, str): + self._properties["rangeElementType"] = {"type": range_element_type} + if isinstance(range_element_type, FieldElementType): + self._properties["rangeElementType"] = range_element_type.to_api_repr() + self._fields = tuple(fields) @staticmethod @@ -186,6 +238,12 @@ def from_api_repr(cls, api_repr: dict) -> "SchemaField": if policy_tags is not None and policy_tags is not _DEFAULT_VALUE: policy_tags = PolicyTagList.from_api_repr(policy_tags) + if api_repr.get("rangeElementType"): + range_element_type = cast(dict, api_repr.get("rangeElementType")) + element_type = range_element_type.get("type") + else: + element_type = None + return cls( field_type=field_type, fields=[cls.from_api_repr(f) for f in fields], @@ -197,6 +255,7 @@ def from_api_repr(cls, api_repr: dict) -> "SchemaField": precision=cls.__get_int(api_repr, "precision"), scale=cls.__get_int(api_repr, "scale"), max_length=cls.__get_int(api_repr, "maxLength"), + range_element_type=element_type, ) @property @@ -252,6 +311,18 @@ def max_length(self): """Optional[int]: Maximum length for the STRING or BYTES field.""" return self._properties.get("maxLength") + @property + def range_element_type(self): + """Optional[FieldElementType]: The subtype of the RANGE, if the + type of this field is RANGE. + + Must be set when ``type`` is `"RANGE"`. Must be one of `"DATE"`, + `"DATETIME"` or `"TIMESTAMP"`. + """ + if self._properties.get("rangeElementType"): + ret = self._properties.get("rangeElementType") + return FieldElementType.from_api_repr(ret) + @property def fields(self): """Optional[tuple]: Subfields contained in this field. diff --git a/packages/google-cloud-bigquery/tests/data/schema.json b/packages/google-cloud-bigquery/tests/data/schema.json index 6a36e55e579a..29542e82da98 100644 --- a/packages/google-cloud-bigquery/tests/data/schema.json +++ b/packages/google-cloud-bigquery/tests/data/schema.json @@ -83,6 +83,14 @@ "mode" : "NULLABLE", "name" : "FavoriteNumber", "type" : "NUMERIC" + }, + { + "mode" : "NULLABLE", + "name" : "TimeRange", + "type" : "RANGE", + "rangeElementType": { + "type": "DATETIME" + } } ] } diff --git a/packages/google-cloud-bigquery/tests/unit/test_schema.py b/packages/google-cloud-bigquery/tests/unit/test_schema.py index c6593e1b4daf..26ec0dfefef4 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_schema.py +++ b/packages/google-cloud-bigquery/tests/unit/test_schema.py @@ -97,6 +97,36 @@ def test_constructor_subfields(self): self.assertEqual(field.fields[0], sub_field1) self.assertEqual(field.fields[1], sub_field2) + def test_constructor_range(self): + from google.cloud.bigquery.schema import FieldElementType + + field = self._make_one( + "test", + "RANGE", + mode="REQUIRED", + description="Testing", + range_element_type=FieldElementType("DATETIME"), + ) + self.assertEqual(field.name, "test") + self.assertEqual(field.field_type, "RANGE") + self.assertEqual(field.mode, "REQUIRED") + self.assertEqual(field.description, "Testing") + self.assertEqual(field.range_element_type.element_type, "DATETIME") + + def test_constructor_range_str(self): + field = self._make_one( + "test", + "RANGE", + mode="REQUIRED", + description="Testing", + range_element_type="DATETIME", + ) + self.assertEqual(field.name, "test") + self.assertEqual(field.field_type, "RANGE") + self.assertEqual(field.mode, "REQUIRED") + self.assertEqual(field.description, "Testing") + self.assertEqual(field.range_element_type.element_type, "DATETIME") + def test_to_api_repr(self): from google.cloud.bigquery.schema import PolicyTagList @@ -160,6 +190,7 @@ def test_from_api_repr(self): self.assertEqual(field.fields[0].name, "bar") self.assertEqual(field.fields[0].field_type, "INTEGER") self.assertEqual(field.fields[0].mode, "NULLABLE") + self.assertEqual(field.range_element_type, None) def test_from_api_repr_policy(self): field = self._get_target_class().from_api_repr( @@ -178,6 +209,23 @@ def test_from_api_repr_policy(self): self.assertEqual(field.fields[0].field_type, "INTEGER") self.assertEqual(field.fields[0].mode, "NULLABLE") + def test_from_api_repr_range(self): + field = self._get_target_class().from_api_repr( + { + "mode": "nullable", + "description": "test_range", + "name": "foo", + "type": "range", + "rangeElementType": {"type": "DATETIME"}, + } + ) + self.assertEqual(field.name, "foo") + self.assertEqual(field.field_type, "RANGE") + self.assertEqual(field.mode, "NULLABLE") + self.assertEqual(field.description, "test_range") + self.assertEqual(len(field.fields), 0) + self.assertEqual(field.range_element_type.element_type, "DATETIME") + def test_from_api_repr_defaults(self): field = self._get_target_class().from_api_repr( {"name": "foo", "type": "record"} @@ -192,8 +240,10 @@ def test_from_api_repr_defaults(self): # _properties. self.assertIsNone(field.description) self.assertIsNone(field.policy_tags) + self.assertIsNone(field.range_element_type) self.assertNotIn("description", field._properties) self.assertNotIn("policyTags", field._properties) + self.assertNotIn("rangeElementType", field._properties) def test_name_property(self): name = "lemon-ness" @@ -566,6 +616,40 @@ def test___repr__evaluable_with_policy_tags(self): assert field == evaled_field +class TestFieldElementType(unittest.TestCase): + @staticmethod + def _get_target_class(): + from google.cloud.bigquery.schema import FieldElementType + + return FieldElementType + + def _make_one(self, *args): + return self._get_target_class()(*args) + + def test_constructor(self): + element_type = self._make_one("DATETIME") + self.assertEqual(element_type.element_type, "DATETIME") + self.assertEqual(element_type._properties["type"], "DATETIME") + + def test_to_api_repr(self): + element_type = self._make_one("DATETIME") + self.assertEqual(element_type.to_api_repr(), {"type": "DATETIME"}) + + def test_from_api_repr(self): + api_repr = {"type": "DATETIME"} + expected_element_type = self._make_one("DATETIME") + self.assertEqual( + expected_element_type.element_type, + self._get_target_class().from_api_repr(api_repr).element_type, + ) + + def test_from_api_repr_empty(self): + self.assertEqual(None, self._get_target_class().from_api_repr({})) + + def test_from_api_repr_none(self): + self.assertEqual(None, self._get_target_class().from_api_repr(None)) + + # TODO: dedup with the same class in test_table.py. class _SchemaBase(object): def _verify_field(self, field, r_field): From 0854b2b796be96db3f53981c9ee38ceb1a3e9709 Mon Sep 17 00:00:00 2001 From: Chalmer Lowe Date: Tue, 9 Jan 2024 10:54:49 -0500 Subject: [PATCH 1696/2016] fix: updates types-protobuf version for mypy-samples nox session (#1764) --- packages/google-cloud-bigquery/noxfile.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/noxfile.py b/packages/google-cloud-bigquery/noxfile.py index 41492c7f0776..66d68c04ea22 100644 --- a/packages/google-cloud-bigquery/noxfile.py +++ b/packages/google-cloud-bigquery/noxfile.py @@ -219,7 +219,7 @@ def mypy_samples(session): session.install( "types-mock", "types-pytz", - "types-protobuf", + "types-protobuf!=4.24.0.20240106", # This version causes an error: 'Module "google.oauth2" has no attribute "service_account"' "types-python-dateutil", "types-requests", "types-setuptools", From 2066350a0fd2ea3b488e3e61401aa4f2d12212ae Mon Sep 17 00:00:00 2001 From: "release-please[bot]" <55107282+release-please[bot]@users.noreply.github.com> Date: Tue, 9 Jan 2024 15:39:34 -0600 Subject: [PATCH 1697/2016] chore(main): release 3.15.0 (#1752) Co-authored-by: release-please[bot] <55107282+release-please[bot]@users.noreply.github.com> --- packages/google-cloud-bigquery/CHANGELOG.md | 21 +++++++++++++++++++ .../google/cloud/bigquery/version.py | 2 +- 2 files changed, 22 insertions(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/CHANGELOG.md b/packages/google-cloud-bigquery/CHANGELOG.md index 1c921fda8716..96ec9eceb517 100644 --- a/packages/google-cloud-bigquery/CHANGELOG.md +++ b/packages/google-cloud-bigquery/CHANGELOG.md @@ -5,6 +5,27 @@ [1]: https://pypi.org/project/google-cloud-bigquery/#history +## [3.15.0](https://github.com/googleapis/python-bigquery/compare/v3.14.1...v3.15.0) (2024-01-09) + + +### Features + +* Support JSON type in `insert_rows` and as a scalar query parameter ([#1757](https://github.com/googleapis/python-bigquery/issues/1757)) ([02a7d12](https://github.com/googleapis/python-bigquery/commit/02a7d129776b7da7da844ffa9c5cdf21811cd3af)) +* Support RANGE in schema ([#1746](https://github.com/googleapis/python-bigquery/issues/1746)) ([8585747](https://github.com/googleapis/python-bigquery/commit/8585747058e6db49a8078ae44d8e10735cdc27f9)) + + +### Bug Fixes + +* Deserializing JSON subfields within structs fails ([#1742](https://github.com/googleapis/python-bigquery/issues/1742)) ([0d93073](https://github.com/googleapis/python-bigquery/commit/0d930739c78b557db6cd48b38fe16eba93719c40)) +* Due to upstream change in dataset, updates expected results ([#1761](https://github.com/googleapis/python-bigquery/issues/1761)) ([132c14b](https://github.com/googleapis/python-bigquery/commit/132c14bbddfb61ea8bc408bef5e958e21b5b819c)) +* Load_table_from_dataframe for higher scale decimal ([#1703](https://github.com/googleapis/python-bigquery/issues/1703)) ([b9c8be0](https://github.com/googleapis/python-bigquery/commit/b9c8be0982c76187444300c414e0dda8b0ad105b)) +* Updates types-protobuf version for mypy-samples nox session ([#1764](https://github.com/googleapis/python-bigquery/issues/1764)) ([c0de695](https://github.com/googleapis/python-bigquery/commit/c0de6958e5761ad6ff532dd933b0f4387e18f1b9)) + + +### Performance Improvements + +* DB-API uses more efficient `query_and_wait` when no job ID is provided ([#1747](https://github.com/googleapis/python-bigquery/issues/1747)) ([d225a94](https://github.com/googleapis/python-bigquery/commit/d225a94e718a85877c495fbd32eca607b8919ac6)) + ## [3.14.1](https://github.com/googleapis/python-bigquery/compare/v3.14.0...v3.14.1) (2023-12-13) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/version.py b/packages/google-cloud-bigquery/google/cloud/bigquery/version.py index 6073384c9b1c..df08277f0f43 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/version.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/version.py @@ -12,4 +12,4 @@ # See the License for the specific language governing permissions and # limitations under the License. -__version__ = "3.14.1" +__version__ = "3.15.0" From ca0fc52cfe90a0e33723d4b094ad9c27e1f977d5 Mon Sep 17 00:00:00 2001 From: Lingqing Gan Date: Wed, 10 Jan 2024 10:34:00 -0800 Subject: [PATCH 1698/2016] fix: add detailed message in job error (#1762) * fix: more detailed job error message * lint * fix mypy error * remove import ignore * Update google/cloud/bigquery/job/base.py Co-authored-by: Chalmer Lowe * Update google/cloud/bigquery/job/base.py Co-authored-by: Chalmer Lowe * variable name and unit test --------- Co-authored-by: Chalmer Lowe Co-authored-by: Chalmer Lowe --- .../google/cloud/bigquery/job/base.py | 25 ++++++++++++++++--- .../tests/unit/job/test_base.py | 21 ++++++++++++++++ 2 files changed, 43 insertions(+), 3 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/job/base.py b/packages/google-cloud-bigquery/google/cloud/bigquery/job/base.py index 97e0ea3bd76b..2641afea8df0 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/job/base.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/job/base.py @@ -55,7 +55,7 @@ } -def _error_result_to_exception(error_result): +def _error_result_to_exception(error_result, errors=None): """Maps BigQuery error reasons to an exception. The reasons and their matching HTTP status codes are documented on @@ -66,6 +66,7 @@ def _error_result_to_exception(error_result): Args: error_result (Mapping[str, str]): The error result from BigQuery. + errors (Union[Iterable[str], None]): The detailed error messages. Returns: google.cloud.exceptions.GoogleAPICallError: The mapped exception. @@ -74,8 +75,24 @@ def _error_result_to_exception(error_result): status_code = _ERROR_REASON_TO_EXCEPTION.get( reason, http.client.INTERNAL_SERVER_ERROR ) + # Manually create error message to preserve both error_result and errors. + # Can be removed once b/310544564 and b/318889899 are resolved. + concatenated_errors = "" + if errors: + concatenated_errors = "; " + for err in errors: + concatenated_errors += ", ".join( + [f"{key}: {value}" for key, value in err.items()] + ) + concatenated_errors += "; " + + # strips off the last unneeded semicolon and space + concatenated_errors = concatenated_errors[:-2] + + error_message = error_result.get("message", "") + concatenated_errors + return exceptions.from_http_status( - status_code, error_result.get("message", ""), errors=[error_result] + status_code, error_message, errors=[error_result] ) @@ -886,7 +903,9 @@ def _set_future_result(self): return if self.error_result is not None: - exception = _error_result_to_exception(self.error_result) + exception = _error_result_to_exception( + self.error_result, self.errors or () + ) self.set_exception(exception) else: self.set_result(self) diff --git a/packages/google-cloud-bigquery/tests/unit/job/test_base.py b/packages/google-cloud-bigquery/tests/unit/job/test_base.py index 5635d0e32dc7..a61fd319858e 100644 --- a/packages/google-cloud-bigquery/tests/unit/job/test_base.py +++ b/packages/google-cloud-bigquery/tests/unit/job/test_base.py @@ -47,6 +47,27 @@ def test_missing_reason(self): exception = self._call_fut(error_result) self.assertEqual(exception.code, http.client.INTERNAL_SERVER_ERROR) + def test_contatenate_errors(self): + # Added test for b/310544564 and b/318889899. + # Ensures that error messages from both error_result and errors are + # present in the exception raised. + + error_result = { + "reason": "invalid1", + "message": "error message 1", + } + errors = [ + {"reason": "invalid2", "message": "error message 2"}, + {"reason": "invalid3", "message": "error message 3"}, + ] + + exception = self._call_fut(error_result, errors) + self.assertEqual( + exception.message, + "error message 1; reason: invalid2, message: error message 2; " + "reason: invalid3, message: error message 3", + ) + class Test_JobReference(unittest.TestCase): JOB_ID = "job-id" From 9d4193e1963add6134c5be19c3bb129a0360a5f1 Mon Sep 17 00:00:00 2001 From: Dmytro Karacheban Date: Thu, 11 Jan 2024 18:37:26 +0200 Subject: [PATCH 1699/2016] feat: Add `table_constraints` field to Table model (#1755) * feat: add `table_constraints` field to Table model * Change `raise` to `return` in __eq__ methods * Fix __eq__ for ColumnReference * Add column_references to ForeignKey __eq__ * Add missing coverage * Update google/cloud/bigquery/table.py * Update google/cloud/bigquery/table.py * Update google/cloud/bigquery/table.py * Update tests/unit/test_table.py * Update tests/unit/test_table.py * Update tests/unit/test_table.py * Update tests/unit/test_table.py * Update tests/unit/test_table.py * Update tests/unit/test_table.py * Update tests/unit/test_table.py * Update tests/unit/test_table.py * Update google/cloud/bigquery/table.py * Update google/cloud/bigquery/table.py * Update google/cloud/bigquery/table.py * Update tests/unit/test_table.py * Update tests/unit/test_table.py * Update tests/unit/test_table.py * Update tests/unit/test_table.py * Update tests/unit/test_table.py * Update tests/unit/test_table.py * Update tests/unit/test_table.py --------- Co-authored-by: Chalmer Lowe Co-authored-by: Chalmer Lowe --- .../google/cloud/bigquery/table.py | 128 ++++++++ .../tests/unit/test_table.py | 280 ++++++++++++++++++ 2 files changed, 408 insertions(+) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py index 0ae7851a17a6..b3be4ff904b6 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py @@ -390,6 +390,7 @@ class Table(_TableBase): "view_use_legacy_sql": "view", "view_query": "view", "require_partition_filter": "requirePartitionFilter", + "table_constraints": "tableConstraints", } def __init__(self, table_ref, schema=None) -> None: @@ -973,6 +974,16 @@ def clone_definition(self) -> Optional["CloneDefinition"]: clone_info = CloneDefinition(clone_info) return clone_info + @property + def table_constraints(self) -> Optional["TableConstraints"]: + """Tables Primary Key and Foreign Key information.""" + table_constraints = self._properties.get( + self._PROPERTY_TO_API_FIELD["table_constraints"] + ) + if table_constraints is not None: + table_constraints = TableConstraints.from_api_repr(table_constraints) + return table_constraints + @classmethod def from_string(cls, full_table_id: str) -> "Table": """Construct a table from fully-qualified table ID. @@ -2958,6 +2969,123 @@ def __repr__(self): return "TimePartitioning({})".format(",".join(key_vals)) +class PrimaryKey: + """Represents the primary key constraint on a table's columns. + + Args: + columns: The columns that are composed of the primary key constraint. + """ + + def __init__(self, columns: List[str]): + self.columns = columns + + def __eq__(self, other): + if not isinstance(other, PrimaryKey): + raise TypeError("The value provided is not a BigQuery PrimaryKey.") + return self.columns == other.columns + + +class ColumnReference: + """The pair of the foreign key column and primary key column. + + Args: + referencing_column: The column that composes the foreign key. + referenced_column: The column in the primary key that are referenced by the referencingColumn. + """ + + def __init__(self, referencing_column: str, referenced_column: str): + self.referencing_column = referencing_column + self.referenced_column = referenced_column + + def __eq__(self, other): + if not isinstance(other, ColumnReference): + raise TypeError("The value provided is not a BigQuery ColumnReference.") + return ( + self.referencing_column == other.referencing_column + and self.referenced_column == other.referenced_column + ) + + +class ForeignKey: + """Represents a foreign key constraint on a table's columns. + + Args: + name: Set only if the foreign key constraint is named. + referenced_table: The table that holds the primary key and is referenced by this foreign key. + column_references: The columns that compose the foreign key. + """ + + def __init__( + self, + name: str, + referenced_table: TableReference, + column_references: List[ColumnReference], + ): + self.name = name + self.referenced_table = referenced_table + self.column_references = column_references + + def __eq__(self, other): + if not isinstance(other, ForeignKey): + raise TypeError("The value provided is not a BigQuery ForeignKey.") + return ( + self.name == other.name + and self.referenced_table == other.referenced_table + and self.column_references == other.column_references + ) + + @classmethod + def from_api_repr(cls, api_repr: Dict[str, Any]) -> "ForeignKey": + """Create an instance from API representation.""" + return cls( + name=api_repr["name"], + referenced_table=TableReference.from_api_repr(api_repr["referencedTable"]), + column_references=[ + ColumnReference( + column_reference_resource["referencingColumn"], + column_reference_resource["referencedColumn"], + ) + for column_reference_resource in api_repr["columnReferences"] + ], + ) + + +class TableConstraints: + """The TableConstraints defines the primary key and foreign key. + + Args: + primary_key: + Represents a primary key constraint on a table's columns. Present only if the table + has a primary key. The primary key is not enforced. + foreign_keys: + Present only if the table has a foreign key. The foreign key is not enforced. + + """ + + def __init__( + self, + primary_key: Optional[PrimaryKey], + foreign_keys: Optional[List[ForeignKey]], + ): + self.primary_key = primary_key + self.foreign_keys = foreign_keys + + @classmethod + def from_api_repr(cls, resource: Dict[str, Any]) -> "TableConstraints": + """Create an instance from API representation.""" + primary_key = None + if "primaryKey" in resource: + primary_key = PrimaryKey(resource["primaryKey"]["columns"]) + + foreign_keys = None + if "foreignKeys" in resource: + foreign_keys = [ + ForeignKey.from_api_repr(foreign_key_resource) + for foreign_key_resource in resource["foreignKeys"] + ] + return cls(primary_key, foreign_keys) + + def _item_to_row(iterator, resource): """Convert a JSON row to the native object. diff --git a/packages/google-cloud-bigquery/tests/unit/test_table.py b/packages/google-cloud-bigquery/tests/unit/test_table.py index 4a85a0823fde..e4d0c66ab28e 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_table.py +++ b/packages/google-cloud-bigquery/tests/unit/test_table.py @@ -603,6 +603,7 @@ def test_ctor(self): self.assertIsNone(table.encryption_configuration) self.assertIsNone(table.time_partitioning) self.assertIsNone(table.clustering_fields) + self.assertIsNone(table.table_constraints) def test_ctor_w_schema(self): from google.cloud.bigquery.schema import SchemaField @@ -901,6 +902,21 @@ def test_clone_definition_set(self): 2010, 9, 28, 10, 20, 30, 123000, tzinfo=UTC ) + def test_table_constraints_property_getter(self): + from google.cloud.bigquery.table import PrimaryKey, TableConstraints + + dataset = DatasetReference(self.PROJECT, self.DS_ID) + table_ref = dataset.table(self.TABLE_NAME) + table = self._make_one(table_ref) + table._properties["tableConstraints"] = { + "primaryKey": {"columns": ["id"]}, + } + + table_constraints = table.table_constraints + + assert isinstance(table_constraints, TableConstraints) + assert table_constraints.primary_key == PrimaryKey(columns=["id"]) + def test_description_setter_bad_value(self): dataset = DatasetReference(self.PROJECT, self.DS_ID) table_ref = dataset.table(self.TABLE_NAME) @@ -5393,6 +5409,270 @@ def test_set_expiration_w_none(self): assert time_partitioning._properties["expirationMs"] is None +class TestPrimaryKey(unittest.TestCase): + @staticmethod + def _get_target_class(): + from google.cloud.bigquery.table import PrimaryKey + + return PrimaryKey + + @classmethod + def _make_one(cls, *args, **kwargs): + return cls._get_target_class()(*args, **kwargs) + + def test_constructor_explicit(self): + columns = ["id", "product_id"] + primary_key = self._make_one(columns) + + self.assertEqual(primary_key.columns, columns) + + def test__eq__columns_mismatch(self): + primary_key = self._make_one(columns=["id", "product_id"]) + other_primary_key = self._make_one(columns=["id"]) + + self.assertNotEqual(primary_key, other_primary_key) + + def test__eq__other_type(self): + primary_key = self._make_one(columns=["id", "product_id"]) + with self.assertRaises(TypeError): + primary_key == "This is not a Primary Key" + + +class TestColumnReference(unittest.TestCase): + @staticmethod + def _get_target_class(): + from google.cloud.bigquery.table import ColumnReference + + return ColumnReference + + @classmethod + def _make_one(cls, *args, **kwargs): + return cls._get_target_class()(*args, **kwargs) + + def test_constructor_explicit(self): + referencing_column = "product_id" + referenced_column = "id" + column_reference = self._make_one(referencing_column, referenced_column) + + self.assertEqual(column_reference.referencing_column, referencing_column) + self.assertEqual(column_reference.referenced_column, referenced_column) + + def test__eq__referencing_column_mismatch(self): + column_reference = self._make_one( + referencing_column="product_id", + referenced_column="id", + ) + other_column_reference = self._make_one( + referencing_column="item_id", + referenced_column="id", + ) + + self.assertNotEqual(column_reference, other_column_reference) + + def test__eq__referenced_column_mismatch(self): + column_reference = self._make_one( + referencing_column="product_id", + referenced_column="id", + ) + other_column_reference = self._make_one( + referencing_column="product_id", + referenced_column="id_1", + ) + + self.assertNotEqual(column_reference, other_column_reference) + + def test__eq__other_type(self): + column_reference = self._make_one( + referencing_column="product_id", + referenced_column="id", + ) + with self.assertRaises(TypeError): + column_reference == "This is not a Column Reference" + + +class TestForeignKey(unittest.TestCase): + @staticmethod + def _get_target_class(): + from google.cloud.bigquery.table import ForeignKey + + return ForeignKey + + @classmethod + def _make_one(cls, *args, **kwargs): + return cls._get_target_class()(*args, **kwargs) + + def test_constructor_explicit(self): + name = "my_fk" + referenced_table = TableReference.from_string("my-project.mydataset.mytable") + column_references = [] + foreign_key = self._make_one(name, referenced_table, column_references) + + self.assertEqual(foreign_key.name, name) + self.assertEqual(foreign_key.referenced_table, referenced_table) + self.assertEqual(foreign_key.column_references, column_references) + + def test__eq__name_mismatch(self): + referenced_table = TableReference.from_string("my-project.mydataset.mytable") + column_references = [] + foreign_key = self._make_one( + name="my_fk", + referenced_table=referenced_table, + column_references=column_references, + ) + other_foreign_key = self._make_one( + name="my_other_fk", + referenced_table=referenced_table, + column_references=column_references, + ) + + self.assertNotEqual(foreign_key, other_foreign_key) + + def test__eq__referenced_table_mismatch(self): + name = "my_fk" + column_references = [] + foreign_key = self._make_one( + name=name, + referenced_table=TableReference.from_string("my-project.mydataset.mytable"), + column_references=column_references, + ) + other_foreign_key = self._make_one( + name=name, + referenced_table=TableReference.from_string( + "my-project.mydataset.my-other-table" + ), + column_references=column_references, + ) + + self.assertNotEqual(foreign_key, other_foreign_key) + + def test__eq__column_references_mismatch(self): + from google.cloud.bigquery.table import ColumnReference + + name = "my_fk" + referenced_table = TableReference.from_string("my-project.mydataset.mytable") + foreign_key = self._make_one( + name=name, + referenced_table=referenced_table, + column_references=[], + ) + other_foreign_key = self._make_one( + name=name, + referenced_table=referenced_table, + column_references=[ + ColumnReference( + referencing_column="product_id", referenced_column="id" + ), + ], + ) + + self.assertNotEqual(foreign_key, other_foreign_key) + + def test__eq__other_type(self): + foreign_key = self._make_one( + name="my_fk", + referenced_table=TableReference.from_string("my-project.mydataset.mytable"), + column_references=[], + ) + with self.assertRaises(TypeError): + foreign_key == "This is not a Foreign Key" + + +class TestTableConstraint(unittest.TestCase): + @staticmethod + def _get_target_class(): + from google.cloud.bigquery.table import TableConstraints + + return TableConstraints + + @classmethod + def _make_one(cls, *args, **kwargs): + return cls._get_target_class()(*args, **kwargs) + + def test_constructor_defaults(self): + instance = self._make_one(primary_key=None, foreign_keys=None) + self.assertIsNone(instance.primary_key) + self.assertIsNone(instance.foreign_keys) + + def test_from_api_repr_full_resource(self): + from google.cloud.bigquery.table import ( + ColumnReference, + ForeignKey, + TableReference, + ) + + resource = { + "primaryKey": { + "columns": ["id", "product_id"], + }, + "foreignKeys": [ + { + "name": "my_fk_name", + "referencedTable": { + "projectId": "my-project", + "datasetId": "your-dataset", + "tableId": "products", + }, + "columnReferences": [ + {"referencingColumn": "product_id", "referencedColumn": "id"}, + ], + } + ], + } + instance = self._get_target_class().from_api_repr(resource) + + self.assertIsNotNone(instance.primary_key) + self.assertEqual(instance.primary_key.columns, ["id", "product_id"]) + self.assertEqual( + instance.foreign_keys, + [ + ForeignKey( + name="my_fk_name", + referenced_table=TableReference.from_string( + "my-project.your-dataset.products" + ), + column_references=[ + ColumnReference( + referencing_column="product_id", referenced_column="id" + ), + ], + ), + ], + ) + + def test_from_api_repr_only_primary_key_resource(self): + resource = { + "primaryKey": { + "columns": ["id"], + }, + } + instance = self._get_target_class().from_api_repr(resource) + + self.assertIsNotNone(instance.primary_key) + self.assertEqual(instance.primary_key.columns, ["id"]) + self.assertIsNone(instance.foreign_keys) + + def test_from_api_repr_only_foreign_keys_resource(self): + resource = { + "foreignKeys": [ + { + "name": "my_fk_name", + "referencedTable": { + "projectId": "my-project", + "datasetId": "your-dataset", + "tableId": "products", + }, + "columnReferences": [ + {"referencingColumn": "product_id", "referencedColumn": "id"}, + ], + } + ] + } + instance = self._get_target_class().from_api_repr(resource) + + self.assertIsNone(instance.primary_key) + self.assertIsNotNone(instance.foreign_keys) + + @pytest.mark.skipif( bigquery_storage is None, reason="Requires `google-cloud-bigquery-storage`" ) From f6f8bc656548610b2282a16c6677f58a2180412a Mon Sep 17 00:00:00 2001 From: Kira Date: Fri, 12 Jan 2024 11:34:15 -0500 Subject: [PATCH 1700/2016] feat: Support jsonExtension in LoadJobConfig (#1751) * feat: support jsonExtension in LoadJobConfig * reformatted with black * Updated doc string and added test for the encoding of jsonExtension * modified setter test to make sure property is set correctly --- .../google/cloud/bigquery/job/load.py | 13 +++++++++++ .../tests/unit/job/test_load_config.py | 23 +++++++++++++++++++ 2 files changed, 36 insertions(+) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/job/load.py b/packages/google-cloud-bigquery/google/cloud/bigquery/job/load.py index 6b6c8bfd9954..1764354562bb 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/job/load.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/job/load.py @@ -327,6 +327,19 @@ def ignore_unknown_values(self): def ignore_unknown_values(self, value): self._set_sub_prop("ignoreUnknownValues", value) + @property + def json_extension(self): + """Optional[str]: The extension to use for writing JSON data to BigQuery. Only supports GeoJSON currently. + + See: https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationLoad.FIELDS.json_extension + + """ + return self._get_sub_prop("jsonExtension") + + @json_extension.setter + def json_extension(self, value): + self._set_sub_prop("jsonExtension", value) + @property def max_bad_records(self): """Optional[int]: Number of invalid rows to ignore. diff --git a/packages/google-cloud-bigquery/tests/unit/job/test_load_config.py b/packages/google-cloud-bigquery/tests/unit/job/test_load_config.py index 4d25fa106b3b..e1fa2641fe0f 100644 --- a/packages/google-cloud-bigquery/tests/unit/job/test_load_config.py +++ b/packages/google-cloud-bigquery/tests/unit/job/test_load_config.py @@ -413,6 +413,29 @@ def test_ignore_unknown_values_setter(self): config.ignore_unknown_values = True self.assertTrue(config._properties["load"]["ignoreUnknownValues"]) + def test_json_extension_missing(self): + config = self._get_target_class()() + self.assertIsNone(config.json_extension) + + def test_json_extension_hit(self): + config = self._get_target_class()() + config._properties["load"]["jsonExtension"] = "GEOJSON" + self.assertEqual(config.json_extension, "GEOJSON") + + def test_json_extension_setter(self): + config = self._get_target_class()() + self.assertFalse(config.json_extension) + config.json_extension = "GEOJSON" + self.assertTrue(config.json_extension) + self.assertEqual(config._properties["load"]["jsonExtension"], "GEOJSON") + + def test_to_api_repr_includes_json_extension(self): + config = self._get_target_class()() + config._properties["load"]["jsonExtension"] = "GEOJSON" + api_repr = config.to_api_repr() + self.assertIn("jsonExtension", api_repr["load"]) + self.assertEqual(api_repr["load"]["jsonExtension"], "GEOJSON") + def test_max_bad_records_missing(self): config = self._get_target_class()() self.assertIsNone(config.max_bad_records) From 46ae2aafd1afb3b2c08dca5423787ec0c524a6c5 Mon Sep 17 00:00:00 2001 From: "release-please[bot]" <55107282+release-please[bot]@users.noreply.github.com> Date: Fri, 12 Jan 2024 13:41:03 -0500 Subject: [PATCH 1701/2016] chore(main): release 3.16.0 (#1765) Co-authored-by: release-please[bot] <55107282+release-please[bot]@users.noreply.github.com> --- packages/google-cloud-bigquery/CHANGELOG.md | 13 +++++++++++++ .../google/cloud/bigquery/version.py | 2 +- 2 files changed, 14 insertions(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/CHANGELOG.md b/packages/google-cloud-bigquery/CHANGELOG.md index 96ec9eceb517..25c4ca1e5b18 100644 --- a/packages/google-cloud-bigquery/CHANGELOG.md +++ b/packages/google-cloud-bigquery/CHANGELOG.md @@ -5,6 +5,19 @@ [1]: https://pypi.org/project/google-cloud-bigquery/#history +## [3.16.0](https://github.com/googleapis/python-bigquery/compare/v3.15.0...v3.16.0) (2024-01-12) + + +### Features + +* Add `table_constraints` field to Table model ([#1755](https://github.com/googleapis/python-bigquery/issues/1755)) ([a167f9a](https://github.com/googleapis/python-bigquery/commit/a167f9a95f0a8fbf0bdb4943d06f07c03768c132)) +* Support jsonExtension in LoadJobConfig ([#1751](https://github.com/googleapis/python-bigquery/issues/1751)) ([0fd7347](https://github.com/googleapis/python-bigquery/commit/0fd7347ddb4ae1993f02b3bc109f64297437b3e2)) + + +### Bug Fixes + +* Add detailed message in job error ([#1762](https://github.com/googleapis/python-bigquery/issues/1762)) ([08483fb](https://github.com/googleapis/python-bigquery/commit/08483fba675f3b87571787e1e4420134a8fc8177)) + ## [3.15.0](https://github.com/googleapis/python-bigquery/compare/v3.14.1...v3.15.0) (2024-01-09) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/version.py b/packages/google-cloud-bigquery/google/cloud/bigquery/version.py index df08277f0f43..a3de40375f11 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/version.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/version.py @@ -12,4 +12,4 @@ # See the License for the specific language governing permissions and # limitations under the License. -__version__ = "3.15.0" +__version__ = "3.16.0" From a7895a33b26d9aa15018d4f7af2661a99746d076 Mon Sep 17 00:00:00 2001 From: shollyman Date: Tue, 16 Jan 2024 11:09:40 -0800 Subject: [PATCH 1702/2016] feat: support universe resolution (#1774) * feat: support universe resolution This PR wires up consumption of the universe_domain client option for resolving the endpoint for constructing the BQ client. Testing universes is not yet something we want to in this repo, so validation was done out of band. * formatting and testing * conditionals for stale core * formatting * unused import --- .../google/cloud/bigquery/_helpers.py | 3 +++ .../google/cloud/bigquery/client.py | 9 +++++++++ .../tests/unit/test_client.py | 17 +++++++++++++++++ 3 files changed, 29 insertions(+) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py b/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py index 4cf6dddac3a7..905d4aee1aff 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py @@ -55,6 +55,9 @@ _DEFAULT_HOST = "https://bigquery.googleapis.com" """Default host for JSON API.""" +_DEFAULT_UNIVERSE = "googleapis.com" +"""Default universe for the JSON API.""" + def _get_bigquery_host(): return os.environ.get(BIGQUERY_EMULATOR_HOST, _DEFAULT_HOST) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py index 182319646dc4..b2ea130c48de 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py @@ -78,6 +78,7 @@ from google.cloud.bigquery._helpers import _verify_job_config_type from google.cloud.bigquery._helpers import _get_bigquery_host from google.cloud.bigquery._helpers import _DEFAULT_HOST +from google.cloud.bigquery._helpers import _DEFAULT_UNIVERSE from google.cloud.bigquery._job_helpers import make_job_id as _make_job_id from google.cloud.bigquery.dataset import Dataset from google.cloud.bigquery.dataset import DatasetListItem @@ -252,6 +253,14 @@ def __init__( if client_options.api_endpoint: api_endpoint = client_options.api_endpoint kw_args["api_endpoint"] = api_endpoint + elif ( + hasattr(client_options, "universe_domain") + and client_options.universe_domain + and client_options.universe_domain is not _DEFAULT_UNIVERSE + ): + kw_args["api_endpoint"] = _DEFAULT_HOST.replace( + _DEFAULT_UNIVERSE, client_options.universe_domain + ) self._connection = Connection(self, **kw_args) self._location = location diff --git a/packages/google-cloud-bigquery/tests/unit/test_client.py b/packages/google-cloud-bigquery/tests/unit/test_client.py index ad22e0ddb216..56bdbad5eee3 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_client.py +++ b/packages/google-cloud-bigquery/tests/unit/test_client.py @@ -201,6 +201,23 @@ def test_ctor_w_client_options_object(self): client._connection.API_BASE_URL, "https://www.foo-googleapis.com" ) + @pytest.mark.skipif( + packaging.version.parse(getattr(google.api_core, "__version__", "0.0.0")) + < packaging.version.Version("2.15.0"), + reason="universe_domain not supported with google-api-core < 2.15.0", + ) + def test_ctor_w_client_options_universe(self): + creds = _make_credentials() + http = object() + client_options = {"universe_domain": "foo.com"} + client = self._make_one( + project=self.PROJECT, + credentials=creds, + _http=http, + client_options=client_options, + ) + self.assertEqual(client._connection.API_BASE_URL, "https://bigquery.foo.com") + def test_ctor_w_location(self): from google.cloud.bigquery._http import Connection From 71af6b3f83a49be11e2c6aecfd492e5c2e49cd6c Mon Sep 17 00:00:00 2001 From: "gcf-owl-bot[bot]" <78513119+gcf-owl-bot[bot]@users.noreply.github.com> Date: Tue, 16 Jan 2024 22:26:15 +0000 Subject: [PATCH 1703/2016] build(python): fix `docs` and `docfx` builds (#1779) Source-Link: https://togithub.com/googleapis/synthtool/commit/fac8444edd5f5526e804c306b766a271772a3e2f Post-Processor: gcr.io/cloud-devrel-public-resources/owlbot-python:latest@sha256:5ea6d0ab82c956b50962f91d94e206d3921537ae5fe1549ec5326381d8905cfa --- .../.github/.OwlBot.lock.yaml | 6 +- .../.kokoro/requirements.txt | 6 +- .../google-cloud-bigquery/docs/reference.rst | 115 ++---------------- packages/google-cloud-bigquery/noxfile.py | 24 +++- 4 files changed, 39 insertions(+), 112 deletions(-) diff --git a/packages/google-cloud-bigquery/.github/.OwlBot.lock.yaml b/packages/google-cloud-bigquery/.github/.OwlBot.lock.yaml index 773c1dfd2146..d8a1bbca7179 100644 --- a/packages/google-cloud-bigquery/.github/.OwlBot.lock.yaml +++ b/packages/google-cloud-bigquery/.github/.OwlBot.lock.yaml @@ -1,4 +1,4 @@ -# Copyright 2023 Google LLC +# Copyright 2024 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -13,5 +13,5 @@ # limitations under the License. docker: image: gcr.io/cloud-devrel-public-resources/owlbot-python:latest - digest: sha256:2f155882785883336b4468d5218db737bb1d10c9cea7cb62219ad16fe248c03c -# created: 2023-11-29T14:54:29.548172703Z + digest: sha256:5ea6d0ab82c956b50962f91d94e206d3921537ae5fe1549ec5326381d8905cfa +# created: 2024-01-15T16:32:08.142785673Z diff --git a/packages/google-cloud-bigquery/.kokoro/requirements.txt b/packages/google-cloud-bigquery/.kokoro/requirements.txt index e5c1ffca94b7..bb3d6ca38b14 100644 --- a/packages/google-cloud-bigquery/.kokoro/requirements.txt +++ b/packages/google-cloud-bigquery/.kokoro/requirements.txt @@ -263,9 +263,9 @@ jeepney==0.8.0 \ # via # keyring # secretstorage -jinja2==3.1.2 \ - --hash=sha256:31351a702a408a9e7595a8fc6150fc3f43bb6bf7e319770cbc0db9df9437e852 \ - --hash=sha256:6088930bfe239f0e6710546ab9c19c9ef35e29792895fed6e6e31a023a182a61 +jinja2==3.1.3 \ + --hash=sha256:7d6d50dd97d52cbc355597bd845fabfbac3f551e1f99619e39a35ce8c370b5fa \ + --hash=sha256:ac8bd6544d4bb2c9792bf3a159e80bba8fda7f07e81bc3aed565432d5925ba90 # via gcp-releasetool keyring==24.2.0 \ --hash=sha256:4901caaf597bfd3bbd78c9a0c7c4c29fcd8310dab2cffefe749e916b6527acd6 \ diff --git a/packages/google-cloud-bigquery/docs/reference.rst b/packages/google-cloud-bigquery/docs/reference.rst index b886f11612f6..6c00df0771c2 100644 --- a/packages/google-cloud-bigquery/docs/reference.rst +++ b/packages/google-cloud-bigquery/docs/reference.rst @@ -1,8 +1,6 @@ API Reference ~~~~~~~~~~~~~ -.. currentmodule:: google.cloud.bigquery - The main concepts with this API are: - :class:`~google.cloud.bigquery.client.Client` manages connections to the @@ -18,55 +16,12 @@ The main concepts with this API are: Client ====== -.. autosummary:: - :toctree: generated - - client.Client +.. automodule:: google.cloud.bigquery.client Job === -Job Configuration ------------------ - -.. autosummary:: - :toctree: generated - - job.QueryJobConfig - job.CopyJobConfig - job.LoadJobConfig - job.ExtractJobConfig - -Job Classes ------------ - -.. autosummary:: - :toctree: generated - - job.QueryJob - job.CopyJob - job.LoadJob - job.ExtractJob - -Job-Related Types ------------------ - -.. autosummary:: - :toctree: generated - - job.Compression - job.CreateDisposition - job.DestinationFormat - job.DmlStats - job.Encoding - job.OperationType - job.QueryPlanEntry - job.QueryPlanEntryStep - job.QueryPriority - job.ReservationUsage - job.SourceFormat - job.WriteDisposition - job.SchemaUpdateOption +.. automodule:: google.cloud.bigquery.job .. toctree:: :maxdepth: 2 @@ -77,63 +32,28 @@ Job-Related Types Dataset ======= -.. autosummary:: - :toctree: generated - - dataset.Dataset - dataset.DatasetListItem - dataset.DatasetReference - dataset.AccessEntry +.. automodule:: google.cloud.bigquery.dataset Table ===== -.. autosummary:: - :toctree: generated - - table.PartitionRange - table.RangePartitioning - table.Row - table.RowIterator - table.SnapshotDefinition - table.CloneDefinition - table.Table - table.TableListItem - table.TableReference - table.TimePartitioning - table.TimePartitioningType +.. automodule:: google.cloud.bigquery.table Model ===== -.. autosummary:: - :toctree: generated - - model.Model - model.ModelReference +.. automodule:: google.cloud.bigquery.model Routine ======= -.. autosummary:: - :toctree: generated - - routine.DeterminismLevel - routine.Routine - routine.RoutineArgument - routine.RoutineReference - routine.RoutineType +.. automodule:: google.cloud.bigquery.routine Schema ====== -.. autosummary:: - :toctree: generated - - schema.SchemaField - schema.PolicyTagList - +.. automodule:: google.cloud.bigquery.schema Query ===== @@ -147,25 +67,13 @@ Query Retries ======= -.. autosummary:: - :toctree: generated - - retry.DEFAULT_RETRY +.. automodule:: google.cloud.bigquery.retry External Configuration ====================== -.. autosummary:: - :toctree: generated - - external_config.ExternalSourceFormat - external_config.ExternalConfig - external_config.BigtableOptions - external_config.BigtableColumnFamily - external_config.BigtableColumn - external_config.CSVOptions - external_config.GoogleSheetsOptions +.. automodule:: google.cloud.bigquery.external_config .. toctree:: :maxdepth: 2 @@ -194,10 +102,7 @@ Enums Encryption Configuration ======================== -.. autosummary:: - :toctree: generated - - encryption_configuration.EncryptionConfiguration +.. automodule:: google.cloud.bigquery.encryption_configuration Additional Types diff --git a/packages/google-cloud-bigquery/noxfile.py b/packages/google-cloud-bigquery/noxfile.py index 66d68c04ea22..ae022232eb86 100644 --- a/packages/google-cloud-bigquery/noxfile.py +++ b/packages/google-cloud-bigquery/noxfile.py @@ -418,7 +418,20 @@ def blacken(session): def docs(session): """Build the docs.""" - session.install("recommonmark", "sphinx==4.0.2", "sphinx_rtd_theme") + session.install( + # We need to pin to specific versions of the `sphinxcontrib-*` packages + # which still support sphinx 4.x. + # See https://github.com/googleapis/sphinx-docfx-yaml/issues/344 + # and https://github.com/googleapis/sphinx-docfx-yaml/issues/345. + "sphinxcontrib-applehelp==1.0.4", + "sphinxcontrib-devhelp==1.0.2", + "sphinxcontrib-htmlhelp==2.0.1", + "sphinxcontrib-qthelp==1.0.3", + "sphinxcontrib-serializinghtml==1.1.5", + "sphinx==4.5.0", + "alabaster", + "recommonmark", + ) session.install("google-cloud-storage") session.install("-e", ".[all]") @@ -443,6 +456,15 @@ def docfx(session): session.install("-e", ".") session.install( + # We need to pin to specific versions of the `sphinxcontrib-*` packages + # which still support sphinx 4.x. + # See https://github.com/googleapis/sphinx-docfx-yaml/issues/344 + # and https://github.com/googleapis/sphinx-docfx-yaml/issues/345. + "sphinxcontrib-applehelp==1.0.4", + "sphinxcontrib-devhelp==1.0.2", + "sphinxcontrib-htmlhelp==2.0.1", + "sphinxcontrib-qthelp==1.0.3", + "sphinxcontrib-serializinghtml==1.1.5", "gcp-sphinx-docfx-yaml", "alabaster", "recommonmark", From 11034c047c4ba368f670e38e839b74eeafca9f6c Mon Sep 17 00:00:00 2001 From: Stephanie A <129541811+DevStephanie@users.noreply.github.com> Date: Tue, 16 Jan 2024 17:04:16 -0600 Subject: [PATCH 1704/2016] docs: update `snippets.py` to use `query_and_wait` (#1773) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Thank you for opening a Pull Request! Before submitting your PR, there are a few things you can do to make sure it goes smoothly: - [ ] Make sure to open an issue as a [bug/issue](https://togithub.com/googleapis/python-bigquery/issues/new/choose) before writing your code! That way we can discuss the change, evaluate designs, and agree on the general idea - [ ] Ensure the tests and linter pass - [ ] Code coverage does not decrease (if any source code was changed) - [ ] Appropriate docs were updated (if necessary) Fixes # 🦕 --- packages/google-cloud-bigquery/docs/snippets.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/packages/google-cloud-bigquery/docs/snippets.py b/packages/google-cloud-bigquery/docs/snippets.py index 72ac2a000f83..b4e78e36fcbe 100644 --- a/packages/google-cloud-bigquery/docs/snippets.py +++ b/packages/google-cloud-bigquery/docs/snippets.py @@ -465,13 +465,12 @@ def test_client_query_total_rows(client, capsys): 'WHERE state = "TX" ' "LIMIT 100" ) - query_job = client.query( + results = client.query_and_wait( query, # Location must match that of the dataset(s) referenced in the query. location="US", - ) # API request - starts the query + ) # API request - starts the query and waits for results. - results = query_job.result() # Wait for query to complete. print("Got {} rows.".format(results.total_rows)) # [END bigquery_query_total_rows] @@ -551,7 +550,7 @@ def test_query_results_as_dataframe(client): LIMIT 10 """ - df = client.query(sql).to_dataframe() + df = client.query_and_wait(sql).to_dataframe() # [END bigquery_query_results_dataframe] assert isinstance(df, pandas.DataFrame) assert len(list(df)) == 2 # verify the number of columns From 98228e3128ea2da4042dcad449e800494a5b1bd0 Mon Sep 17 00:00:00 2001 From: Kira Date: Wed, 17 Jan 2024 15:08:00 -0500 Subject: [PATCH 1705/2016] chore: cleanup resources at startup (#1741) * chore: cleanup resources at startup time * reformmated with black for linter * changd how to call prefixer to clean up datasets, not tables * Removed formatting for uuid * Removed unneeded import of uuid * remove comment from dataset_access_test.py --- .../samples/snippets/authorized_view_tutorial_test.py | 10 +++------- .../samples/snippets/materialized_view_test.py | 3 ++- .../samples/snippets/natality_tutorial_test.py | 8 ++------ .../samples/snippets/quickstart_test.py | 6 ++---- .../samples/snippets/view_test.py | 3 ++- 5 files changed, 11 insertions(+), 19 deletions(-) diff --git a/packages/google-cloud-bigquery/samples/snippets/authorized_view_tutorial_test.py b/packages/google-cloud-bigquery/samples/snippets/authorized_view_tutorial_test.py index e2220fb54163..04f6312d3316 100644 --- a/packages/google-cloud-bigquery/samples/snippets/authorized_view_tutorial_test.py +++ b/packages/google-cloud-bigquery/samples/snippets/authorized_view_tutorial_test.py @@ -13,12 +13,12 @@ # limitations under the License. from typing import Iterator, List -import uuid from google.cloud import bigquery import pytest import authorized_view_tutorial # type: ignore +from conftest import prefixer # type: ignore @pytest.fixture(scope="module") @@ -38,12 +38,8 @@ def test_authorized_view_tutorial( client: bigquery.Client, datasets_to_delete: List[str] ) -> None: override_values = { - "source_dataset_id": "github_source_data_{}".format( - str(uuid.uuid4()).replace("-", "_") - ), - "shared_dataset_id": "shared_views_{}".format( - str(uuid.uuid4()).replace("-", "_") - ), + "source_dataset_id": f"{prefixer.create_prefix()}_authorized_view_tutorial", + "shared_dataset_id": f"{prefixer.create_prefix()}_authorized_view_tutorial_shared_views", } source_dataset_ref = "{}.{}".format( client.project, override_values["source_dataset_id"] diff --git a/packages/google-cloud-bigquery/samples/snippets/materialized_view_test.py b/packages/google-cloud-bigquery/samples/snippets/materialized_view_test.py index 59e08131ee62..1b464af6f332 100644 --- a/packages/google-cloud-bigquery/samples/snippets/materialized_view_test.py +++ b/packages/google-cloud-bigquery/samples/snippets/materialized_view_test.py @@ -21,6 +21,7 @@ import pytest import materialized_view # type: ignore +from conftest import prefixer # type: ignore def temp_suffix() -> str: @@ -37,7 +38,7 @@ def bigquery_client_patch( @pytest.fixture(scope="module") def dataset_id(bigquery_client: bigquery.Client) -> Iterator[str]: - dataset_id = f"mvdataset_{temp_suffix()}" + dataset_id = f"{prefixer.create_prefix()}_materialized_view" bigquery_client.create_dataset(dataset_id) yield dataset_id bigquery_client.delete_dataset(dataset_id, delete_contents=True) diff --git a/packages/google-cloud-bigquery/samples/snippets/natality_tutorial_test.py b/packages/google-cloud-bigquery/samples/snippets/natality_tutorial_test.py index 7f24ca5cb78c..603d142f25a6 100644 --- a/packages/google-cloud-bigquery/samples/snippets/natality_tutorial_test.py +++ b/packages/google-cloud-bigquery/samples/snippets/natality_tutorial_test.py @@ -13,12 +13,12 @@ # limitations under the License. from typing import Iterator, List -import uuid from google.cloud import bigquery import pytest import natality_tutorial # type: ignore +from conftest import prefixer # type: ignore @pytest.fixture(scope="module") @@ -37,11 +37,7 @@ def datasets_to_delete(client: bigquery.Client) -> Iterator[List[str]]: def test_natality_tutorial( client: bigquery.Client, datasets_to_delete: List[str] ) -> None: - override_values = { - "dataset_id": "natality_regression_{}".format( - str(uuid.uuid4()).replace("-", "_") - ), - } + override_values = {"dataset_id": f"{prefixer.create_prefix()}_natality_tutorial"} datasets_to_delete.append(override_values["dataset_id"]) natality_tutorial.run_natality_tutorial(override_values) diff --git a/packages/google-cloud-bigquery/samples/snippets/quickstart_test.py b/packages/google-cloud-bigquery/samples/snippets/quickstart_test.py index 88a24618df8d..74a02a83a077 100644 --- a/packages/google-cloud-bigquery/samples/snippets/quickstart_test.py +++ b/packages/google-cloud-bigquery/samples/snippets/quickstart_test.py @@ -13,12 +13,12 @@ # limitations under the License. from typing import Iterator, List -import uuid from google.cloud import bigquery import pytest import quickstart # type: ignore +from conftest import prefixer # type: ignore # Must match the dataset listed in quickstart.py (there's no easy way to # extract this). @@ -43,9 +43,7 @@ def test_quickstart( client: bigquery.Client, datasets_to_delete: List[str], ) -> None: - override_values = { - "dataset_id": "my_new_dataset_{}".format(str(uuid.uuid4()).replace("-", "_")), - } + override_values = {"dataset_id": f"{prefixer.create_prefix()}_quickstart"} datasets_to_delete.append(override_values["dataset_id"]) quickstart.run_quickstart(override_values) diff --git a/packages/google-cloud-bigquery/samples/snippets/view_test.py b/packages/google-cloud-bigquery/samples/snippets/view_test.py index 1e615db47a26..dfa1cdeee69e 100644 --- a/packages/google-cloud-bigquery/samples/snippets/view_test.py +++ b/packages/google-cloud-bigquery/samples/snippets/view_test.py @@ -20,6 +20,7 @@ import pytest import view # type: ignore +from conftest import prefixer # type: ignore def temp_suffix() -> str: @@ -53,7 +54,7 @@ def view_id(bigquery_client: bigquery.Client, view_dataset_id: str) -> Iterator[ def source_dataset_id( bigquery_client: bigquery.Client, project_id: str ) -> Iterator[str]: - dataset_id = f"{project_id}.view_{temp_suffix()}" + dataset_id = f"{prefixer.create_prefix()}_view" bigquery_client.create_dataset(dataset_id) yield dataset_id bigquery_client.delete_dataset(dataset_id, delete_contents=True) From 17442cd9710956781e005b44424aeb7d957bad53 Mon Sep 17 00:00:00 2001 From: Kira Date: Wed, 17 Jan 2024 15:53:20 -0500 Subject: [PATCH 1706/2016] docs: update multiple samples to change query to query_and_wait (#1784) * docs: update multiple samples for query_and_wait API * black * update rest of samples to use query_and_wait * changed query_jobs to results --- .../samples/client_query_add_column.py | 5 ++--- .../samples/client_query_destination_table_clustered.py | 5 +++-- .../samples/client_query_legacy_sql.py | 8 +++++--- .../samples/client_query_relax_column.py | 5 ++--- .../samples/client_query_w_struct_params.py | 6 ++++-- .../samples/download_public_data_sandbox.py | 4 +++- .../samples/snippets/authorized_view_tutorial.py | 6 ++---- .../samples/snippets/natality_tutorial.py | 3 +-- .../google-cloud-bigquery/samples/snippets/simple_app.py | 6 ++---- packages/google-cloud-bigquery/samples/tests/conftest.py | 2 +- 10 files changed, 25 insertions(+), 25 deletions(-) diff --git a/packages/google-cloud-bigquery/samples/client_query_add_column.py b/packages/google-cloud-bigquery/samples/client_query_add_column.py index ec14087fb45e..6aae5fce4c2f 100644 --- a/packages/google-cloud-bigquery/samples/client_query_add_column.py +++ b/packages/google-cloud-bigquery/samples/client_query_add_column.py @@ -36,14 +36,13 @@ def client_query_add_column(table_id: str) -> None: ) # Start the query, passing in the extra configuration. - query_job = client.query( + client.query_and_wait( # In this example, the existing table contains only the 'full_name' and # 'age' columns, while the results of this query will contain an # additional 'favorite_color' column. 'SELECT "Timmy" as full_name, 85 as age, "Blue" as favorite_color;', job_config=job_config, - ) # Make an API request. - query_job.result() # Wait for the job to complete. + ) # Make an API request and wait for job to complete. # Checks the updated length of the schema. table = client.get_table(table_id) # Make an API request. diff --git a/packages/google-cloud-bigquery/samples/client_query_destination_table_clustered.py b/packages/google-cloud-bigquery/samples/client_query_destination_table_clustered.py index de9fff2d03bd..19330500adc1 100644 --- a/packages/google-cloud-bigquery/samples/client_query_destination_table_clustered.py +++ b/packages/google-cloud-bigquery/samples/client_query_destination_table_clustered.py @@ -31,8 +31,9 @@ def client_query_destination_table_clustered(table_id: str) -> None: ) # Start the query, passing in the extra configuration. - query_job = client.query(sql, job_config=job_config) # Make an API request. - query_job.result() # Wait for the job to complete. + client.query_and_wait( + sql, job_config=job_config + ) # Make an API request and wait for job to complete. table = client.get_table(table_id) # Make an API request. if table.clustering_fields == cluster_fields: diff --git a/packages/google-cloud-bigquery/samples/client_query_legacy_sql.py b/packages/google-cloud-bigquery/samples/client_query_legacy_sql.py index 44917e4e02d0..1fb5b797a7c0 100644 --- a/packages/google-cloud-bigquery/samples/client_query_legacy_sql.py +++ b/packages/google-cloud-bigquery/samples/client_query_legacy_sql.py @@ -29,10 +29,12 @@ def client_query_legacy_sql() -> None: # Set use_legacy_sql to True to use legacy SQL syntax. job_config = bigquery.QueryJobConfig(use_legacy_sql=True) - # Start the query, passing in the extra configuration. - query_job = client.query(query, job_config=job_config) # Make an API request. + # Start the query and waits for query job to complete, passing in the extra configuration. + results = client.query_and_wait( + query, job_config=job_config + ) # Make an API request. print("The query data:") - for row in query_job: + for row in results: print(row) # [END bigquery_query_legacy] diff --git a/packages/google-cloud-bigquery/samples/client_query_relax_column.py b/packages/google-cloud-bigquery/samples/client_query_relax_column.py index 22ecb33d11ac..26dce888fea4 100644 --- a/packages/google-cloud-bigquery/samples/client_query_relax_column.py +++ b/packages/google-cloud-bigquery/samples/client_query_relax_column.py @@ -39,13 +39,12 @@ def client_query_relax_column(table_id: str) -> None: ) # Start the query, passing in the extra configuration. - query_job = client.query( + client.query_and_wait( # In this example, the existing table contains 'full_name' and 'age' as # required columns, but the query results will omit the second column. 'SELECT "Beyonce" as full_name;', job_config=job_config, - ) # Make an API request. - query_job.result() # Wait for the job to complete. + ) # Make an API request and wait for job to complete # Checks the updated number of required fields. table = client.get_table(table_id) # Make an API request. diff --git a/packages/google-cloud-bigquery/samples/client_query_w_struct_params.py b/packages/google-cloud-bigquery/samples/client_query_w_struct_params.py index 6b68e78edc9c..cda2fcb43a26 100644 --- a/packages/google-cloud-bigquery/samples/client_query_w_struct_params.py +++ b/packages/google-cloud-bigquery/samples/client_query_w_struct_params.py @@ -30,8 +30,10 @@ def client_query_w_struct_params() -> None: ) ] ) - query_job = client.query(query, job_config=job_config) # Make an API request. + results = client.query_and_wait( + query, job_config=job_config + ) # Make an API request and waits for results. - for row in query_job: + for row in results: print(row.s) # [END bigquery_query_params_structs] diff --git a/packages/google-cloud-bigquery/samples/download_public_data_sandbox.py b/packages/google-cloud-bigquery/samples/download_public_data_sandbox.py index e165a31ce35a..909a7da05a47 100644 --- a/packages/google-cloud-bigquery/samples/download_public_data_sandbox.py +++ b/packages/google-cloud-bigquery/samples/download_public_data_sandbox.py @@ -27,7 +27,9 @@ def download_public_data_sandbox() -> None: query_string = "SELECT * FROM `bigquery-public-data.usa_names.usa_1910_current`" # Use the BigQuery Storage API to speed-up downloads of large tables. - dataframe = client.query(query_string).to_dataframe(create_bqstorage_client=True) + dataframe = client.query_and_wait(query_string).to_dataframe( + create_bqstorage_client=True + ) print(dataframe.info()) # [END bigquery_pandas_public_data_sandbox] diff --git a/packages/google-cloud-bigquery/samples/snippets/authorized_view_tutorial.py b/packages/google-cloud-bigquery/samples/snippets/authorized_view_tutorial.py index bfb61bc382d8..f52170bc60a8 100644 --- a/packages/google-cloud-bigquery/samples/snippets/authorized_view_tutorial.py +++ b/packages/google-cloud-bigquery/samples/snippets/authorized_view_tutorial.py @@ -62,15 +62,13 @@ def run_authorized_view_tutorial( FROM `bigquery-public-data.github_repos.commits` LIMIT 1000 """ - query_job = client.query( + client.query_and_wait( sql, # Location must match that of the dataset(s) referenced in the query # and of the destination table. location="US", job_config=job_config, - ) # API request - starts the query - - query_job.result() # Waits for the query to finish + ) # API request - starts the query and waits for query to finish # [END bigquery_avt_create_source_table] # Create a separate dataset to store your view diff --git a/packages/google-cloud-bigquery/samples/snippets/natality_tutorial.py b/packages/google-cloud-bigquery/samples/snippets/natality_tutorial.py index b330a3c211ab..df9fc15bed49 100644 --- a/packages/google-cloud-bigquery/samples/snippets/natality_tutorial.py +++ b/packages/google-cloud-bigquery/samples/snippets/natality_tutorial.py @@ -83,8 +83,7 @@ def run_natality_tutorial(override_values: Optional[Dict[str, str]] = None) -> N """ # Run the query. - query_job = client.query(query, job_config=job_config) - query_job.result() # Waits for the query to finish + client.query_and_wait(query, job_config=job_config) # Waits for the query to finish # [END bigquery_query_natality_tutorial] diff --git a/packages/google-cloud-bigquery/samples/snippets/simple_app.py b/packages/google-cloud-bigquery/samples/snippets/simple_app.py index 3d856d4bb5a0..8281e1877a70 100644 --- a/packages/google-cloud-bigquery/samples/snippets/simple_app.py +++ b/packages/google-cloud-bigquery/samples/snippets/simple_app.py @@ -27,7 +27,7 @@ def query_stackoverflow() -> None: client = bigquery.Client() # [END bigquery_simple_app_client] # [START bigquery_simple_app_query] - query_job = client.query( + results = client.query_and_wait( """ SELECT CONCAT( @@ -38,9 +38,7 @@ def query_stackoverflow() -> None: WHERE tags like '%google-bigquery%' ORDER BY view_count DESC LIMIT 10""" - ) - - results = query_job.result() # Waits for job to complete. + ) # Waits for job to complete. # [END bigquery_simple_app_query] # [START bigquery_simple_app_print] diff --git a/packages/google-cloud-bigquery/samples/tests/conftest.py b/packages/google-cloud-bigquery/samples/tests/conftest.py index 99bd2e367b23..2b5b89c43d07 100644 --- a/packages/google-cloud-bigquery/samples/tests/conftest.py +++ b/packages/google-cloud-bigquery/samples/tests/conftest.py @@ -174,7 +174,7 @@ def model_id(client: bigquery.Client, dataset_id: str) -> str: model_id ) - client.query(sql).result() + client.query_and_wait(sql) return model_id From d55f7883836bfbb11c197610648191321117d587 Mon Sep 17 00:00:00 2001 From: Salem Jorden <115185670+SalemJorden@users.noreply.github.com> Date: Thu, 18 Jan 2024 12:28:28 -0600 Subject: [PATCH 1707/2016] docs: update the query with no cache sample to use query_and_wait API (#1770) Co-authored-by: Salem Boyland Co-authored-by: Kira --- packages/google-cloud-bigquery/samples/query_no_cache.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/packages/google-cloud-bigquery/samples/query_no_cache.py b/packages/google-cloud-bigquery/samples/query_no_cache.py index 7501b7fc09c0..b942e501077f 100644 --- a/packages/google-cloud-bigquery/samples/query_no_cache.py +++ b/packages/google-cloud-bigquery/samples/query_no_cache.py @@ -26,8 +26,8 @@ def query_no_cache() -> None: FROM `bigquery-public-data.samples.shakespeare` GROUP BY corpus; """ - query_job = client.query(sql, job_config=job_config) # Make an API request. + results = client.query_and_wait(sql, job_config=job_config) # Make an API request. - for row in query_job: + for row in results: print(row) # [END bigquery_query_no_cache] From 1c9fa3a3ef515487dc7abc8ee75bca8ace85488c Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Fri, 19 Jan 2024 17:35:26 -0600 Subject: [PATCH 1708/2016] docs: remove unused query code sample (#1769) * docs: remove unused query code sample This sample was moved in https://github.com/googleapis/python-bigquery/pull/1722/files#diff-2e8df14049580f42d6c73a3209838b96f3c9b185d2d7f2688683ae60bb2e7c43. Docs updated in internal change 597332356. * remove sample test too * update reference to query() sample in usage guides --------- Co-authored-by: Kira --- .../docs/usage/queries.rst | 4 +- .../samples/client_query.py | 41 ------------------- .../samples/tests/test_client_query.py | 27 ------------ 3 files changed, 2 insertions(+), 70 deletions(-) delete mode 100644 packages/google-cloud-bigquery/samples/client_query.py delete mode 100644 packages/google-cloud-bigquery/samples/tests/test_client_query.py diff --git a/packages/google-cloud-bigquery/docs/usage/queries.rst b/packages/google-cloud-bigquery/docs/usage/queries.rst index fc57e54de9df..56be8497e4c2 100644 --- a/packages/google-cloud-bigquery/docs/usage/queries.rst +++ b/packages/google-cloud-bigquery/docs/usage/queries.rst @@ -5,9 +5,9 @@ Querying data ^^^^^^^^^^^^^ Run a query and wait for it to finish with the -:func:`~google.cloud.bigquery.client.Client.query` method: +:func:`~google.cloud.bigquery.client.Client.query_and_wait` method: -.. literalinclude:: ../samples/client_query.py +.. literalinclude:: ../samples/snippets/client_query.py :language: python :dedent: 4 :start-after: [START bigquery_query] diff --git a/packages/google-cloud-bigquery/samples/client_query.py b/packages/google-cloud-bigquery/samples/client_query.py deleted file mode 100644 index 80eac854e2ee..000000000000 --- a/packages/google-cloud-bigquery/samples/client_query.py +++ /dev/null @@ -1,41 +0,0 @@ -# Copyright 2019 Google LLC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - - -def client_query() -> None: - # TODO(swast): remove once docs in cloud.google.com have been updated to - # use samples/snippets/client_query.py - - # [START bigquery_query] - - from google.cloud import bigquery - - # Construct a BigQuery client object. - client = bigquery.Client() - - query = """ - SELECT name, SUM(number) as total_people - FROM `bigquery-public-data.usa_names.usa_1910_2013` - WHERE state = 'TX' - GROUP BY name, state - ORDER BY total_people DESC - LIMIT 20 - """ - query_job = client.query(query) # Make an API request. - - print("The query data:") - for row in query_job: - # Row values can be accessed by field name or index. - print("name={}, count={}".format(row[0], row["total_people"])) - # [END bigquery_query] diff --git a/packages/google-cloud-bigquery/samples/tests/test_client_query.py b/packages/google-cloud-bigquery/samples/tests/test_client_query.py deleted file mode 100644 index 5d4fb9c948b3..000000000000 --- a/packages/google-cloud-bigquery/samples/tests/test_client_query.py +++ /dev/null @@ -1,27 +0,0 @@ -# Copyright 2019 Google LLC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import typing - -from .. import client_query - -if typing.TYPE_CHECKING: - import pytest - - -def test_client_query(capsys: "pytest.CaptureFixture[str]") -> None: - client_query.client_query() - out, err = capsys.readouterr() - assert "The query data:" in out - assert "name=James, count=272793" in out From a47f3316ab2fb77b3631c970f09bbbca04bd987c Mon Sep 17 00:00:00 2001 From: Stephanie A <129541811+DevStephanie@users.noreply.github.com> Date: Mon, 22 Jan 2024 15:18:16 -0600 Subject: [PATCH 1709/2016] docs: Updates `query` to `query and wait` in samples/desktopapp/user_credentials.py (#1787) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Updates file * Updates files * 🦉 Updates from OwlBot post-processor See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md * Updates * edits --------- Co-authored-by: Owl Bot --- .../samples/desktopapp/user_credentials.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/packages/google-cloud-bigquery/samples/desktopapp/user_credentials.py b/packages/google-cloud-bigquery/samples/desktopapp/user_credentials.py index 487a56c5ff9c..68236d1260fe 100644 --- a/packages/google-cloud-bigquery/samples/desktopapp/user_credentials.py +++ b/packages/google-cloud-bigquery/samples/desktopapp/user_credentials.py @@ -61,10 +61,10 @@ def main(project: str) -> None: WHERE name = 'William' GROUP BY name; """ - query_job = client.query(query_string) + results = client.query_and_wait(query_string) # Print the results. - for row in query_job.result(): # Wait for the job to complete. + for row in results: # Wait for the job to complete. print("{}: {}".format(row["name"], row["total"])) # [END bigquery_auth_user_query] From 9428b78ea8b45ecae239bd01dd02c631ed3ae3bf Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Tue, 23 Jan 2024 14:08:58 -0600 Subject: [PATCH 1710/2016] fix: `query_and_wait` now retains unknown query configuration `_properties` (#1793) * fix: `query_and_wait` now retains unknown query configuration `_properties` fix: raise `ValueError` in `query_and_wait` with wrong `job_config` type --- .../google/cloud/bigquery/_job_helpers.py | 24 +++--- .../tests/unit/test__job_helpers.py | 75 ++++++++++++++++--- 2 files changed, 79 insertions(+), 20 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/_job_helpers.py b/packages/google-cloud-bigquery/google/cloud/bigquery/_job_helpers.py index 7356331b8ff1..6debc377b72b 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/_job_helpers.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/_job_helpers.py @@ -166,6 +166,14 @@ def do_query(): return future +def _validate_job_config(request_body: Dict[str, Any], invalid_key: str): + """Catch common mistakes, such as passing in a *JobConfig object of the + wrong type. + """ + if invalid_key in request_body: + raise ValueError(f"got unexpected key {repr(invalid_key)} in job_config") + + def _to_query_request( job_config: Optional[job.QueryJobConfig] = None, *, @@ -179,17 +187,15 @@ def _to_query_request( QueryRequest. If any configuration property is set that is not available in jobs.query, it will result in a server-side error. """ - request_body = {} - job_config_resource = job_config.to_api_repr() if job_config else {} - query_config_resource = job_config_resource.get("query", {}) + request_body = copy.copy(job_config.to_api_repr()) if job_config else {} - request_body.update(query_config_resource) + _validate_job_config(request_body, job.CopyJob._JOB_TYPE) + _validate_job_config(request_body, job.ExtractJob._JOB_TYPE) + _validate_job_config(request_body, job.LoadJob._JOB_TYPE) - # These keys are top level in job resource and query resource. - if "labels" in job_config_resource: - request_body["labels"] = job_config_resource["labels"] - if "dryRun" in job_config_resource: - request_body["dryRun"] = job_config_resource["dryRun"] + # Move query.* properties to top-level. + query_config_resource = request_body.pop("query", {}) + request_body.update(query_config_resource) # Default to standard SQL. request_body.setdefault("useLegacySql", False) diff --git a/packages/google-cloud-bigquery/tests/unit/test__job_helpers.py b/packages/google-cloud-bigquery/tests/unit/test__job_helpers.py index f2fe32d94c1b..404a546ff100 100644 --- a/packages/google-cloud-bigquery/tests/unit/test__job_helpers.py +++ b/packages/google-cloud-bigquery/tests/unit/test__job_helpers.py @@ -23,6 +23,9 @@ from google.cloud.bigquery.client import Client from google.cloud.bigquery import _job_helpers +from google.cloud.bigquery.job import copy_ as job_copy +from google.cloud.bigquery.job import extract as job_extract +from google.cloud.bigquery.job import load as job_load from google.cloud.bigquery.job import query as job_query from google.cloud.bigquery.query import ConnectionProperty, ScalarQueryParameter @@ -57,9 +60,34 @@ def make_query_response( @pytest.mark.parametrize( ("job_config", "expected"), ( - (None, make_query_request()), - (job_query.QueryJobConfig(), make_query_request()), - ( + pytest.param( + None, + make_query_request(), + id="job_config=None-default-request", + ), + pytest.param( + job_query.QueryJobConfig(), + make_query_request(), + id="job_config=QueryJobConfig()-default-request", + ), + pytest.param( + job_query.QueryJobConfig.from_api_repr( + { + "unknownTopLevelProperty": "some-test-value", + "query": { + "unknownQueryProperty": "some-other-value", + }, + }, + ), + make_query_request( + { + "unknownTopLevelProperty": "some-test-value", + "unknownQueryProperty": "some-other-value", + } + ), + id="job_config-with-unknown-properties-includes-all-properties-in-request", + ), + pytest.param( job_query.QueryJobConfig(default_dataset="my-project.my_dataset"), make_query_request( { @@ -69,17 +97,24 @@ def make_query_response( } } ), + id="job_config-with-default_dataset", ), - (job_query.QueryJobConfig(dry_run=True), make_query_request({"dryRun": True})), - ( + pytest.param( + job_query.QueryJobConfig(dry_run=True), + make_query_request({"dryRun": True}), + id="job_config-with-dry_run", + ), + pytest.param( job_query.QueryJobConfig(use_query_cache=False), make_query_request({"useQueryCache": False}), + id="job_config-with-use_query_cache", ), - ( + pytest.param( job_query.QueryJobConfig(use_legacy_sql=True), make_query_request({"useLegacySql": True}), + id="job_config-with-use_legacy_sql", ), - ( + pytest.param( job_query.QueryJobConfig( query_parameters=[ ScalarQueryParameter("named_param1", "STRING", "param-value"), @@ -103,8 +138,9 @@ def make_query_response( ], } ), + id="job_config-with-query_parameters-named", ), - ( + pytest.param( job_query.QueryJobConfig( query_parameters=[ ScalarQueryParameter(None, "STRING", "param-value"), @@ -126,8 +162,9 @@ def make_query_response( ], } ), + id="job_config-with-query_parameters-positional", ), - ( + pytest.param( job_query.QueryJobConfig( connection_properties=[ ConnectionProperty(key="time_zone", value="America/Chicago"), @@ -142,14 +179,17 @@ def make_query_response( ] } ), + id="job_config-with-connection_properties", ), - ( + pytest.param( job_query.QueryJobConfig(labels={"abc": "def"}), make_query_request({"labels": {"abc": "def"}}), + id="job_config-with-labels", ), - ( + pytest.param( job_query.QueryJobConfig(maximum_bytes_billed=987654), make_query_request({"maximumBytesBilled": "987654"}), + id="job_config-with-maximum_bytes_billed", ), ), ) @@ -159,6 +199,19 @@ def test__to_query_request(job_config, expected): assert result == expected +@pytest.mark.parametrize( + ("job_config", "invalid_key"), + ( + pytest.param(job_copy.CopyJobConfig(), "copy", id="copy"), + pytest.param(job_extract.ExtractJobConfig(), "extract", id="extract"), + pytest.param(job_load.LoadJobConfig(), "load", id="load"), + ), +) +def test__to_query_request_raises_for_invalid_config(job_config, invalid_key): + with pytest.raises(ValueError, match=f"{repr(invalid_key)} in job_config"): + _job_helpers._to_query_request(job_config, query="SELECT 1") + + def test__to_query_job_defaults(): mock_client = mock.create_autospec(Client) response = make_query_response( From 31ca8f6ee1cabb6102d0506f9bff8af69cf29a30 Mon Sep 17 00:00:00 2001 From: Gaurang Shah Date: Tue, 23 Jan 2024 19:03:36 -0500 Subject: [PATCH 1711/2016] feature: add query location for bigquery magic (#1771) Co-authored-by: Lingqing Gan --- .../google/cloud/bigquery/magics/magics.py | 11 +++++++++++ .../tests/unit/test_magics.py | 18 ++++++++++++++++++ 2 files changed, 29 insertions(+) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/magics/magics.py b/packages/google-cloud-bigquery/google/cloud/bigquery/magics/magics.py index 2a3583c66af2..b7c685d9a87d 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/magics/magics.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/magics/magics.py @@ -508,6 +508,15 @@ def _create_dataset_if_necessary(client, dataset_id): "Defaults to use tqdm_notebook. Install the ``tqdm`` package to use this feature." ), ) +@magic_arguments.argument( + "--location", + type=str, + default=None, + help=( + "Set the location to execute query." + "Defaults to location set in query setting in console." + ), +) def _cell_magic(line, query): """Underlying function for bigquery cell magic @@ -551,6 +560,7 @@ def _cell_magic(line, query): category=DeprecationWarning, ) use_bqstorage_api = not args.use_rest_api + location = args.location params = [] if params_option_value: @@ -579,6 +589,7 @@ def _cell_magic(line, query): default_query_job_config=context.default_query_job_config, client_info=client_info.ClientInfo(user_agent=IPYTHON_USER_AGENT), client_options=bigquery_client_options, + location=location, ) if context._connection: client._connection = context._connection diff --git a/packages/google-cloud-bigquery/tests/unit/test_magics.py b/packages/google-cloud-bigquery/tests/unit/test_magics.py index b03894095139..1511cba9c9d0 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_magics.py +++ b/packages/google-cloud-bigquery/tests/unit/test_magics.py @@ -2053,3 +2053,21 @@ def test_bigquery_magic_create_dataset_fails(): ) assert close_transports.called + + +@pytest.mark.usefixtures("ipython_interactive") +def test_bigquery_magic_with_location(): + ip = IPython.get_ipython() + ip.extension_manager.load_extension("google.cloud.bigquery") + magics.context.credentials = mock.create_autospec( + google.auth.credentials.Credentials, instance=True + ) + + run_query_patch = mock.patch( + "google.cloud.bigquery.magics.magics._run_query", autospec=True + ) + with run_query_patch as run_query_mock: + ip.run_cell_magic("bigquery", "--location=us-east1", "SELECT 17 AS num") + + client_options_used = run_query_mock.call_args_list[0][0][0] + assert client_options_used.location == "us-east1" From 4dfcae564f820228317f714e703777f3dcb94b66 Mon Sep 17 00:00:00 2001 From: "release-please[bot]" <55107282+release-please[bot]@users.noreply.github.com> Date: Wed, 24 Jan 2024 13:39:09 -0600 Subject: [PATCH 1712/2016] chore(main): release 3.17.0 (#1780) Co-authored-by: release-please[bot] <55107282+release-please[bot]@users.noreply.github.com> --- packages/google-cloud-bigquery/CHANGELOG.md | 22 +++++++++++++++++++ .../google/cloud/bigquery/version.py | 2 +- 2 files changed, 23 insertions(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/CHANGELOG.md b/packages/google-cloud-bigquery/CHANGELOG.md index 25c4ca1e5b18..bb916158d786 100644 --- a/packages/google-cloud-bigquery/CHANGELOG.md +++ b/packages/google-cloud-bigquery/CHANGELOG.md @@ -5,6 +5,28 @@ [1]: https://pypi.org/project/google-cloud-bigquery/#history +## [3.17.0](https://github.com/googleapis/python-bigquery/compare/v3.16.0...v3.17.0) (2024-01-24) + + +### Features + +* Support universe resolution ([#1774](https://github.com/googleapis/python-bigquery/issues/1774)) ([0b5c1d5](https://github.com/googleapis/python-bigquery/commit/0b5c1d597cdec3a05a16fb935595f773c5840bd4)) + + +### Bug Fixes + +* `query_and_wait` now retains unknown query configuration `_properties` ([#1793](https://github.com/googleapis/python-bigquery/issues/1793)) ([4ba4342](https://github.com/googleapis/python-bigquery/commit/4ba434287a0a25f027e3b63a80f8881a9b16723e)) +* Raise `ValueError` in `query_and_wait` with wrong `job_config` type ([4ba4342](https://github.com/googleapis/python-bigquery/commit/4ba434287a0a25f027e3b63a80f8881a9b16723e)) + + +### Documentation + +* Remove unused query code sample ([#1769](https://github.com/googleapis/python-bigquery/issues/1769)) ([1f96439](https://github.com/googleapis/python-bigquery/commit/1f96439b3dbd27f11be5e2af84f290ec6094d0a4)) +* Update `snippets.py` to use `query_and_wait` ([#1773](https://github.com/googleapis/python-bigquery/issues/1773)) ([d90602d](https://github.com/googleapis/python-bigquery/commit/d90602de87e58b665cb974401a327a640805822f)) +* Update multiple samples to change query to query_and_wait ([#1784](https://github.com/googleapis/python-bigquery/issues/1784)) ([d1161dd](https://github.com/googleapis/python-bigquery/commit/d1161dddde41a7d35b30033ccbf6984a5de640bd)) +* Update the query with no cache sample to use query_and_wait API ([#1770](https://github.com/googleapis/python-bigquery/issues/1770)) ([955a4cd](https://github.com/googleapis/python-bigquery/commit/955a4cd99e21cbca1b2f9c1dc6aa3fd8070cd61f)) +* Updates `query` to `query and wait` in samples/desktopapp/user_credentials.py ([#1787](https://github.com/googleapis/python-bigquery/issues/1787)) ([89f1299](https://github.com/googleapis/python-bigquery/commit/89f1299b3164b51fb0f29bc600a34ded59c10682)) + ## [3.16.0](https://github.com/googleapis/python-bigquery/compare/v3.15.0...v3.16.0) (2024-01-12) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/version.py b/packages/google-cloud-bigquery/google/cloud/bigquery/version.py index a3de40375f11..9f62912a4e03 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/version.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/version.py @@ -12,4 +12,4 @@ # See the License for the specific language governing permissions and # limitations under the License. -__version__ = "3.16.0" +__version__ = "3.17.0" From 92310019ef266299c868569d3581a9a978c14cc8 Mon Sep 17 00:00:00 2001 From: chelsea-lin <124939984+chelsea-lin@users.noreply.github.com> Date: Wed, 24 Jan 2024 12:41:27 -0800 Subject: [PATCH 1713/2016] fix: add pyarrow.large_strign to the _ARROW_SCALAR_IDS_TO_BQ map (#1796) Co-authored-by: Tim Swast --- .../google/cloud/bigquery/_pyarrow_helpers.py | 1 + 1 file changed, 1 insertion(+) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/_pyarrow_helpers.py b/packages/google-cloud-bigquery/google/cloud/bigquery/_pyarrow_helpers.py index 7266e5e02674..946743eafbb3 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/_pyarrow_helpers.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/_pyarrow_helpers.py @@ -91,6 +91,7 @@ def pyarrow_timestamp(): pyarrow.date64().id: "DATETIME", # because millisecond resolution pyarrow.binary().id: "BYTES", pyarrow.string().id: "STRING", # also alias for pyarrow.utf8() + pyarrow.large_string().id: "STRING", # The exact scale and precision don't matter, see below. pyarrow.decimal128(38, scale=9).id: "NUMERIC", } From 771883560726e8459bb0035a21bbe95b86754f42 Mon Sep 17 00:00:00 2001 From: Kira Date: Wed, 24 Jan 2024 13:59:45 -0800 Subject: [PATCH 1714/2016] fix: retry 'job exceeded rate limits' for DDL queries (#1794) * fix: retry 'job exceeded rate limits' for DDL queries * Fixed retry test logic to better align to library standards * added docstring for test * deleted extra coverage file * Update tests/unit/test_job_retry.py Co-authored-by: Tim Swast * requested changes to retry jobs test * slight change to assert statemet * added TODO statements and fixed default job retry * modify sleep time and path names --------- Co-authored-by: Tim Swast --- .../google/cloud/bigquery/retry.py | 2 +- .../tests/unit/test_job_retry.py | 80 +++++++++++++++++++ .../tests/unit/test_retry.py | 27 +++++++ 3 files changed, 108 insertions(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/retry.py b/packages/google-cloud-bigquery/google/cloud/bigquery/retry.py index b01c0662c53d..01b12797270e 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/retry.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/retry.py @@ -73,7 +73,7 @@ def _should_retry(exc): deadline on the retry object. """ -job_retry_reasons = "rateLimitExceeded", "backendError" +job_retry_reasons = "rateLimitExceeded", "backendError", "jobRateLimitExceeded" def _job_should_retry(exc): diff --git a/packages/google-cloud-bigquery/tests/unit/test_job_retry.py b/packages/google-cloud-bigquery/tests/unit/test_job_retry.py index 4fa96fcec5c1..0e984c8fcf38 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_job_retry.py +++ b/packages/google-cloud-bigquery/tests/unit/test_job_retry.py @@ -22,6 +22,10 @@ import google.api_core.retry import freezegun +from google.cloud.bigquery.client import Client +from google.cloud.bigquery import _job_helpers +from google.cloud.bigquery.retry import DEFAULT_JOB_RETRY + from .helpers import make_connection @@ -240,3 +244,79 @@ def test_raises_on_job_retry_on_result_with_non_retryable_jobs(client): ), ): job.result(job_retry=google.api_core.retry.Retry()) + + +def test_query_and_wait_retries_job_for_DDL_queries(): + """ + Specific test for retrying DDL queries with "jobRateLimitExceeded" error: + https://github.com/googleapis/python-bigquery/issues/1790 + """ + freezegun.freeze_time(auto_tick_seconds=1) + client = mock.create_autospec(Client) + client._call_api.__name__ = "_call_api" + client._call_api.__qualname__ = "Client._call_api" + client._call_api.__annotations__ = {} + client._call_api.__type_params__ = () + client._call_api.side_effect = ( + { + "jobReference": { + "projectId": "response-project", + "jobId": "abc", + "location": "response-location", + }, + "jobComplete": False, + }, + google.api_core.exceptions.InternalServerError( + "job_retry me", errors=[{"reason": "jobRateLimitExceeded"}] + ), + google.api_core.exceptions.BadRequest( + "retry me", errors=[{"reason": "jobRateLimitExceeded"}] + ), + { + "jobReference": { + "projectId": "response-project", + "jobId": "abc", + "location": "response-location", + }, + "jobComplete": True, + "schema": { + "fields": [ + {"name": "full_name", "type": "STRING", "mode": "REQUIRED"}, + {"name": "age", "type": "INT64", "mode": "NULLABLE"}, + ], + }, + "rows": [ + {"f": [{"v": "Whillma Phlyntstone"}, {"v": "27"}]}, + {"f": [{"v": "Bhetty Rhubble"}, {"v": "28"}]}, + {"f": [{"v": "Phred Phlyntstone"}, {"v": "32"}]}, + {"f": [{"v": "Bharney Rhubble"}, {"v": "33"}]}, + ], + }, + ) + rows = _job_helpers.query_and_wait( + client, + query="SELECT 1", + location="request-location", + project="request-project", + job_config=None, + page_size=None, + max_results=None, + retry=DEFAULT_JOB_RETRY, + job_retry=DEFAULT_JOB_RETRY, + ) + assert len(list(rows)) == 4 + + # Relevant docs for the REST API path: https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs/query + # and https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs/getQueryResults + query_request_path = "/projects/request-project/queries" + + calls = client._call_api.call_args_list + _, kwargs = calls[0] + assert kwargs["method"] == "POST" + assert kwargs["path"] == query_request_path + + # TODO: Add assertion statements for response paths after PR#1797 is fixed + + _, kwargs = calls[3] + assert kwargs["method"] == "POST" + assert kwargs["path"] == query_request_path diff --git a/packages/google-cloud-bigquery/tests/unit/test_retry.py b/packages/google-cloud-bigquery/tests/unit/test_retry.py index 1109b7ff2d81..2fcb84e21ded 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_retry.py +++ b/packages/google-cloud-bigquery/tests/unit/test_retry.py @@ -129,3 +129,30 @@ def test_DEFAULT_JOB_RETRY_deadline(): # Make sure we can retry the job at least once. assert DEFAULT_JOB_RETRY._deadline > DEFAULT_RETRY._deadline + + +def test_DEFAULT_JOB_RETRY_job_rate_limit_exceeded_retry_predicate(): + """Tests the retry predicate specifically for jobRateLimitExceeded.""" + from google.cloud.bigquery.retry import DEFAULT_JOB_RETRY + from google.api_core.exceptions import ClientError + + # Non-ClientError exceptions should never trigger a retry + assert not DEFAULT_JOB_RETRY._predicate(TypeError()) + + # ClientError without specific reason shouldn't trigger a retry + assert not DEFAULT_JOB_RETRY._predicate(ClientError("fail")) + + # ClientError with generic reason "idk" shouldn't trigger a retry + assert not DEFAULT_JOB_RETRY._predicate( + ClientError("fail", errors=[dict(reason="idk")]) + ) + + # ClientError with reason "jobRateLimitExceeded" should trigger a retry + assert DEFAULT_JOB_RETRY._predicate( + ClientError("fail", errors=[dict(reason="jobRateLimitExceeded")]) + ) + + # Other retryable reasons should still work as expected + assert DEFAULT_JOB_RETRY._predicate( + ClientError("fail", errors=[dict(reason="backendError")]) + ) From a805e5be6d3b9fd92583851269811f8326d205ca Mon Sep 17 00:00:00 2001 From: "release-please[bot]" <55107282+release-please[bot]@users.noreply.github.com> Date: Thu, 25 Jan 2024 10:35:02 -0600 Subject: [PATCH 1715/2016] chore(main): release 3.17.1 (#1798) Co-authored-by: release-please[bot] <55107282+release-please[bot]@users.noreply.github.com> --- packages/google-cloud-bigquery/CHANGELOG.md | 8 ++++++++ .../google/cloud/bigquery/version.py | 2 +- 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/CHANGELOG.md b/packages/google-cloud-bigquery/CHANGELOG.md index bb916158d786..38212325327a 100644 --- a/packages/google-cloud-bigquery/CHANGELOG.md +++ b/packages/google-cloud-bigquery/CHANGELOG.md @@ -5,6 +5,14 @@ [1]: https://pypi.org/project/google-cloud-bigquery/#history +## [3.17.1](https://github.com/googleapis/python-bigquery/compare/v3.17.0...v3.17.1) (2024-01-24) + + +### Bug Fixes + +* Add pyarrow.large_strign to the _ARROW_SCALAR_IDS_TO_BQ map ([#1796](https://github.com/googleapis/python-bigquery/issues/1796)) ([b402a6d](https://github.com/googleapis/python-bigquery/commit/b402a6df92e656aee10dd2c11c48f6ed93c74fd7)) +* Retry 'job exceeded rate limits' for DDL queries ([#1794](https://github.com/googleapis/python-bigquery/issues/1794)) ([39f33b2](https://github.com/googleapis/python-bigquery/commit/39f33b210ecbe9c2fd390825d29393c2d80257f5)) + ## [3.17.0](https://github.com/googleapis/python-bigquery/compare/v3.16.0...v3.17.0) (2024-01-24) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/version.py b/packages/google-cloud-bigquery/google/cloud/bigquery/version.py index 9f62912a4e03..9b72eddf74fe 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/version.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/version.py @@ -12,4 +12,4 @@ # See the License for the specific language governing permissions and # limitations under the License. -__version__ = "3.17.0" +__version__ = "3.17.1" From 3c7f884d80bb6c05ef6f4be0bdcf5d986cf65f9e Mon Sep 17 00:00:00 2001 From: Salem Jorden <115185670+SalemJorden@users.noreply.github.com> Date: Thu, 25 Jan 2024 12:19:24 -0600 Subject: [PATCH 1716/2016] docs: update `client_query_destination_table.py` sample to use `query_and_wait` (#1783) * docs: update client_query_destination_table.py to use query_and_wait API * docs: update client_query_destination_table.py to use query_and_wait API --------- Co-authored-by: Salem Boyland Co-authored-by: Kira Co-authored-by: Tim Swast --- .../samples/client_query_destination_table.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/packages/google-cloud-bigquery/samples/client_query_destination_table.py b/packages/google-cloud-bigquery/samples/client_query_destination_table.py index 486576fea7ca..f6a6222291db 100644 --- a/packages/google-cloud-bigquery/samples/client_query_destination_table.py +++ b/packages/google-cloud-bigquery/samples/client_query_destination_table.py @@ -32,8 +32,9 @@ def client_query_destination_table(table_id: str) -> None: """ # Start the query, passing in the extra configuration. - query_job = client.query(sql, job_config=job_config) # Make an API request. - query_job.result() # Wait for the job to complete. + client.query_and_wait( + sql, job_config=job_config + ) # Make an API request and wait for the query to finish. print("Query results loaded to the table {}".format(table_id)) # [END bigquery_query_destination_table] From 8f818d93510076e9c553855d3c407157072f5d4e Mon Sep 17 00:00:00 2001 From: Stephanie A <129541811+DevStephanie@users.noreply.github.com> Date: Thu, 25 Jan 2024 14:34:17 -0600 Subject: [PATCH 1717/2016] docs: update to use `query_and_wait` in `client_query_w_positional_params.py` (#1786) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Updates file * 🦉 Updates from OwlBot post-processor See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md * Correcting RowIterator * Variable corrections --------- Co-authored-by: Owl Bot Co-authored-by: Tim Swast --- .../samples/client_query_w_positional_params.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/packages/google-cloud-bigquery/samples/client_query_w_positional_params.py b/packages/google-cloud-bigquery/samples/client_query_w_positional_params.py index 9cdde69ca64d..24f3e0dbe166 100644 --- a/packages/google-cloud-bigquery/samples/client_query_w_positional_params.py +++ b/packages/google-cloud-bigquery/samples/client_query_w_positional_params.py @@ -35,8 +35,10 @@ def client_query_w_positional_params() -> None: bigquery.ScalarQueryParameter(None, "INT64", 250), ] ) - query_job = client.query(query, job_config=job_config) # Make an API request. + results = client.query_and_wait( + query, job_config=job_config + ) # Make an API request. - for row in query_job: + for row in results: print("{}: \t{}".format(row.word, row.word_count)) # [END bigquery_query_params_positional] From fea4bf1a7b31a1ce32523bf122a643a8ab15dc1c Mon Sep 17 00:00:00 2001 From: Salem Jorden <115185670+SalemJorden@users.noreply.github.com> Date: Thu, 25 Jan 2024 15:39:41 -0600 Subject: [PATCH 1718/2016] Docs: update the query destination table legacy file to use query_and_wait API (#1775) * docs: update query destination table legacy file to use query_and_wait API * docs: update the query_destination_table_legacy.py to use the query_and_wait API --------- Co-authored-by: Salem Boyland Co-authored-by: Kira Co-authored-by: Chalmer Lowe --- .../samples/client_query_destination_table_legacy.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/packages/google-cloud-bigquery/samples/client_query_destination_table_legacy.py b/packages/google-cloud-bigquery/samples/client_query_destination_table_legacy.py index 37f50fdb4558..26c3945cac70 100644 --- a/packages/google-cloud-bigquery/samples/client_query_destination_table_legacy.py +++ b/packages/google-cloud-bigquery/samples/client_query_destination_table_legacy.py @@ -36,8 +36,9 @@ def client_query_destination_table_legacy(table_id: str) -> None: """ # Start the query, passing in the extra configuration. - query_job = client.query(sql, job_config=job_config) # Make an API request. - query_job.result() # Wait for the job to complete. + client.query_and_wait( + sql, job_config=job_config + ) # Make an API request and wait for the query to finish. print("Query results loaded to the table {}".format(table_id)) # [END bigquery_query_legacy_large_results] From 1db595598dfba88be3f1fae8aaaf021c9a77fbbf Mon Sep 17 00:00:00 2001 From: Salem Jorden <115185670+SalemJorden@users.noreply.github.com> Date: Thu, 25 Jan 2024 16:12:59 -0600 Subject: [PATCH 1719/2016] docs: update sample for query_to_arrow to use query_and_wait API (#1776) Co-authored-by: Salem Boyland Co-authored-by: Kira Co-authored-by: Chalmer Lowe --- packages/google-cloud-bigquery/samples/query_to_arrow.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/packages/google-cloud-bigquery/samples/query_to_arrow.py b/packages/google-cloud-bigquery/samples/query_to_arrow.py index f1afc7c94bba..d359bb79a912 100644 --- a/packages/google-cloud-bigquery/samples/query_to_arrow.py +++ b/packages/google-cloud-bigquery/samples/query_to_arrow.py @@ -44,8 +44,8 @@ def query_to_arrow() -> "pyarrow.Table": FROM races r CROSS JOIN UNNEST(r.participants) as participant; """ - query_job = client.query(sql) - arrow_table = query_job.to_arrow() # Make an API request. + results = client.query_and_wait(sql) + arrow_table = results.to_arrow() # Make an API request. print( "Downloaded {} rows, {} columns.".format( From 708cc5c450e960efca6d0b5b3a3211dd72b39e52 Mon Sep 17 00:00:00 2001 From: Salem Jorden <115185670+SalemJorden@users.noreply.github.com> Date: Fri, 26 Jan 2024 01:18:11 -0600 Subject: [PATCH 1720/2016] docs: update query_external_sheets_permanent_table.py to use query_and_wait API (#1778) Co-authored-by: Salem Boyland Co-authored-by: Kira --- .../samples/query_external_sheets_permanent_table.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/packages/google-cloud-bigquery/samples/query_external_sheets_permanent_table.py b/packages/google-cloud-bigquery/samples/query_external_sheets_permanent_table.py index f23f44259585..0f8ddbae1ed9 100644 --- a/packages/google-cloud-bigquery/samples/query_external_sheets_permanent_table.py +++ b/packages/google-cloud-bigquery/samples/query_external_sheets_permanent_table.py @@ -69,10 +69,10 @@ def query_external_sheets_permanent_table(dataset_id: str) -> None: # Example query to find states starting with "W". sql = 'SELECT * FROM `{}.{}` WHERE name LIKE "W%"'.format(dataset_id, table_id) - query_job = client.query(sql) # Make an API request. + results = client.query_and_wait(sql) # Make an API request. # Wait for the query to complete. - w_states = list(query_job) + w_states = list(results) print( "There are {} states with names starting with W in the selected range.".format( len(w_states) From d329d6b202ebbd1536aebb579a3a02c506bfb87b Mon Sep 17 00:00:00 2001 From: Salem Jorden <115185670+SalemJorden@users.noreply.github.com> Date: Fri, 26 Jan 2024 07:12:04 -0600 Subject: [PATCH 1721/2016] docs: update to use API (#1781) Co-authored-by: Salem Boyland Co-authored-by: Kira --- .../samples/query_external_gcs_temporary_table.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/packages/google-cloud-bigquery/samples/query_external_gcs_temporary_table.py b/packages/google-cloud-bigquery/samples/query_external_gcs_temporary_table.py index d622ab1a3025..e0bc8438f780 100644 --- a/packages/google-cloud-bigquery/samples/query_external_gcs_temporary_table.py +++ b/packages/google-cloud-bigquery/samples/query_external_gcs_temporary_table.py @@ -38,8 +38,8 @@ def query_external_gcs_temporary_table() -> None: # Example query to find states starting with 'W'. sql = 'SELECT * FROM `{}` WHERE name LIKE "W%"'.format(table_id) - query_job = client.query(sql, job_config=job_config) # Make an API request. + results = client.query_and_wait(sql, job_config=job_config) # Make an API request. - w_states = list(query_job) # Wait for the job to complete. + w_states = list(results) # Wait for the job to complete. print("There are {} states with names starting with W.".format(len(w_states))) # [END bigquery_query_external_gcs_temp] From e42d7ce44ae9767c75ddb9be40abfb9ffd9748f3 Mon Sep 17 00:00:00 2001 From: Stephanie A <129541811+DevStephanie@users.noreply.github.com> Date: Fri, 26 Jan 2024 12:01:43 -0600 Subject: [PATCH 1722/2016] docs: update to use `query_and_wait` in `samples/client_query_w_timestamp_params.py` (#1785) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Updates file * 🦉 Updates from OwlBot post-processor See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md * Correcting RowIterator * Correcting variable names --------- Co-authored-by: Owl Bot Co-authored-by: Tim Swast --- .../samples/client_query_w_timestamp_params.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/packages/google-cloud-bigquery/samples/client_query_w_timestamp_params.py b/packages/google-cloud-bigquery/samples/client_query_w_timestamp_params.py index c1ade8782cfe..1dbb1e9b6fe0 100644 --- a/packages/google-cloud-bigquery/samples/client_query_w_timestamp_params.py +++ b/packages/google-cloud-bigquery/samples/client_query_w_timestamp_params.py @@ -32,8 +32,10 @@ def client_query_w_timestamp_params() -> None: ) ] ) - query_job = client.query(query, job_config=job_config) # Make an API request. + results = client.query_and_wait( + query, job_config=job_config + ) # Make an API request. - for row in query_job: + for row in results: print(row) # [END bigquery_query_params_timestamps] From 5c8012cce9a2c98b3e13c3e0e90f227ad3adb246 Mon Sep 17 00:00:00 2001 From: Stephanie A <129541811+DevStephanie@users.noreply.github.com> Date: Mon, 29 Jan 2024 17:46:33 -0600 Subject: [PATCH 1723/2016] docs: update to_geodataframe to use query_and_wait functionality (#1800) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Updates file * 🦉 Updates from OwlBot post-processor See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md * Correcting RowIterator * Correcting variable names * Correcting methods --------- Co-authored-by: Owl Bot Co-authored-by: Kira --- .../google-cloud-bigquery/samples/geography/to_geodataframe.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/samples/geography/to_geodataframe.py b/packages/google-cloud-bigquery/samples/geography/to_geodataframe.py index e36331f270ef..630d8d0bf61c 100644 --- a/packages/google-cloud-bigquery/samples/geography/to_geodataframe.py +++ b/packages/google-cloud-bigquery/samples/geography/to_geodataframe.py @@ -33,6 +33,6 @@ def get_austin_service_requests_as_geography() -> "pandas.DataFrame": LIMIT 10 """ - df = client.query(sql).to_geodataframe() + df = client.query_and_wait(sql).to_geodataframe() # [END bigquery_query_results_geodataframe] return df From 0bb51b7686254c753df5330f515b3ec407978855 Mon Sep 17 00:00:00 2001 From: Lingqing Gan Date: Tue, 30 Jan 2024 10:02:42 -0800 Subject: [PATCH 1724/2016] fix: change load_table_from_json autodetect logic (#1804) --- .../google/cloud/bigquery/client.py | 18 +- .../tests/system/test_client.py | 39 ++++ .../tests/unit/test_client.py | 203 +++++++++++++++++- 3 files changed, 255 insertions(+), 5 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py index b2ea130c48de..4708e753bc6e 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py @@ -2833,8 +2833,22 @@ def load_table_from_json( new_job_config.source_format = job.SourceFormat.NEWLINE_DELIMITED_JSON - if new_job_config.schema is None: - new_job_config.autodetect = True + # In specific conditions, we check if the table alread exists, and/or + # set the autodetect value for the user. For exact conditions, see table + # https://github.com/googleapis/python-bigquery/issues/1228#issuecomment-1910946297 + if new_job_config.schema is None and new_job_config.autodetect is None: + if new_job_config.write_disposition in ( + job.WriteDisposition.WRITE_TRUNCATE, + job.WriteDisposition.WRITE_EMPTY, + ): + new_job_config.autodetect = True + else: + try: + self.get_table(destination) + except core_exceptions.NotFound: + new_job_config.autodetect = True + else: + new_job_config.autodetect = False if project is None: project = self.project diff --git a/packages/google-cloud-bigquery/tests/system/test_client.py b/packages/google-cloud-bigquery/tests/system/test_client.py index d7e56f7ffa25..74c152cf220e 100644 --- a/packages/google-cloud-bigquery/tests/system/test_client.py +++ b/packages/google-cloud-bigquery/tests/system/test_client.py @@ -994,6 +994,45 @@ def test_load_table_from_json_schema_autodetect(self): self.assertEqual(tuple(table.schema), table_schema) self.assertEqual(table.num_rows, 2) + # Autodetect makes best effort to infer the schema, but situations exist + # when the detected schema is wrong, and does not match existing schema. + # Thus the client sets autodetect = False when table exists and just uses + # the existing schema. This test case uses a special case where backend has + # no way to distinguish int from string. + def test_load_table_from_json_schema_autodetect_table_exists(self): + json_rows = [ + {"name": "123", "age": 18, "birthday": "2001-10-15", "is_awesome": False}, + {"name": "456", "age": 79, "birthday": "1940-03-10", "is_awesome": True}, + ] + + dataset_id = _make_dataset_id("bq_system_test") + self.temp_dataset(dataset_id) + table_id = "{}.{}.load_table_from_json_basic_use".format( + Config.CLIENT.project, dataset_id + ) + + # Use schema with NULLABLE fields, because schema autodetection + # defaults to field mode NULLABLE. + table_schema = ( + bigquery.SchemaField("name", "STRING", mode="NULLABLE"), + bigquery.SchemaField("age", "INTEGER", mode="NULLABLE"), + bigquery.SchemaField("birthday", "DATE", mode="NULLABLE"), + bigquery.SchemaField("is_awesome", "BOOLEAN", mode="NULLABLE"), + ) + # create the table before loading so that the column order is predictable + table = helpers.retry_403(Config.CLIENT.create_table)( + Table(table_id, schema=table_schema) + ) + self.to_delete.insert(0, table) + + # do not pass an explicit job config to trigger automatic schema detection + load_job = Config.CLIENT.load_table_from_json(json_rows, table_id) + load_job.result() + + table = Config.CLIENT.get_table(table) + self.assertEqual(tuple(table.schema), table_schema) + self.assertEqual(table.num_rows, 2) + def test_load_avro_from_uri_then_dump_table(self): from google.cloud.bigquery.job import CreateDisposition from google.cloud.bigquery.job import SourceFormat diff --git a/packages/google-cloud-bigquery/tests/unit/test_client.py b/packages/google-cloud-bigquery/tests/unit/test_client.py index 56bdbad5eee3..42581edc1aae 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_client.py +++ b/packages/google-cloud-bigquery/tests/unit/test_client.py @@ -8951,6 +8951,8 @@ def test_load_table_from_dataframe_w_higher_scale_decimal128_datatype(self): SchemaField("x", "BIGNUMERIC", "NULLABLE", None), ) + # With autodetect specified, we pass the value as is. For more info, see + # https://github.com/googleapis/python-bigquery/issues/1228#issuecomment-1910946297 def test_load_table_from_json_basic_use(self): from google.cloud.bigquery.client import _DEFAULT_NUM_RETRIES from google.cloud.bigquery import job @@ -8962,12 +8964,28 @@ def test_load_table_from_json_basic_use(self): {"name": "Two", "age": 22, "birthday": "1997-08-09", "adult": True}, ] + job_config = job.LoadJobConfig(autodetect=True) + load_patch = mock.patch( "google.cloud.bigquery.client.Client.load_table_from_file", autospec=True ) - with load_patch as load_table_from_file: - client.load_table_from_json(json_rows, self.TABLE_REF) + # mock: remote table already exists + get_table_reference = { + "projectId": "project_id", + "datasetId": "test_dataset", + "tableId": "test_table", + } + get_table_patch = mock.patch( + "google.cloud.bigquery.client.Client.get_table", + autospec=True, + return_value=mock.Mock(table_reference=get_table_reference), + ) + + with load_patch as load_table_from_file, get_table_patch: + client.load_table_from_json( + json_rows, self.TABLE_REF, job_config=job_config + ) load_table_from_file.assert_called_once_with( client, @@ -9066,6 +9084,174 @@ def test_load_table_from_json_w_invalid_job_config(self): err_msg = str(exc.value) assert "Expected an instance of LoadJobConfig" in err_msg + # When all following are true: + # (1) no schema provided; + # (2) no autodetect value provided; + # (3) writeDisposition == WRITE_APPEND or None; + # (4) table already exists, + # client sets autodetect == False + # For more details, see https://github.com/googleapis/python-bigquery/issues/1228#issuecomment-1910946297 + def test_load_table_from_json_wo_schema_wo_autodetect_write_append_w_table(self): + from google.cloud.bigquery.client import _DEFAULT_NUM_RETRIES + from google.cloud.bigquery import job + from google.cloud.bigquery.job import WriteDisposition + + client = self._make_client() + + json_rows = [ + {"name": "One", "age": 11, "birthday": "2008-09-10", "adult": False}, + {"name": "Two", "age": 22, "birthday": "1997-08-09", "adult": True}, + ] + + job_config = job.LoadJobConfig(write_disposition=WriteDisposition.WRITE_APPEND) + + load_patch = mock.patch( + "google.cloud.bigquery.client.Client.load_table_from_file", autospec=True + ) + + # mock: remote table already exists + get_table_reference = { + "projectId": "project_id", + "datasetId": "test_dataset", + "tableId": "test_table", + } + get_table_patch = mock.patch( + "google.cloud.bigquery.client.Client.get_table", + autospec=True, + return_value=mock.Mock(table_reference=get_table_reference), + ) + + with load_patch as load_table_from_file, get_table_patch: + client.load_table_from_json( + json_rows, self.TABLE_REF, job_config=job_config + ) + + load_table_from_file.assert_called_once_with( + client, + mock.ANY, + self.TABLE_REF, + size=mock.ANY, + num_retries=_DEFAULT_NUM_RETRIES, + job_id=mock.ANY, + job_id_prefix=None, + location=client.location, + project=client.project, + job_config=mock.ANY, + timeout=DEFAULT_TIMEOUT, + ) + + sent_config = load_table_from_file.mock_calls[0][2]["job_config"] + assert sent_config.source_format == job.SourceFormat.NEWLINE_DELIMITED_JSON + assert sent_config.schema is None + assert not sent_config.autodetect + + # When all following are true: + # (1) no schema provided; + # (2) no autodetect value provided; + # (3) writeDisposition == WRITE_APPEND or None; + # (4) table does NOT exist, + # client sets autodetect == True + # For more details, see https://github.com/googleapis/python-bigquery/issues/1228#issuecomment-1910946297 + def test_load_table_from_json_wo_schema_wo_autodetect_write_append_wo_table(self): + import google.api_core.exceptions as core_exceptions + from google.cloud.bigquery.client import _DEFAULT_NUM_RETRIES + from google.cloud.bigquery import job + from google.cloud.bigquery.job import WriteDisposition + + client = self._make_client() + + json_rows = [ + {"name": "One", "age": 11, "birthday": "2008-09-10", "adult": False}, + {"name": "Two", "age": 22, "birthday": "1997-08-09", "adult": True}, + ] + + job_config = job.LoadJobConfig(write_disposition=WriteDisposition.WRITE_APPEND) + + load_patch = mock.patch( + "google.cloud.bigquery.client.Client.load_table_from_file", autospec=True + ) + + # mock: remote table doesn't exist + get_table_patch = mock.patch( + "google.cloud.bigquery.client.Client.get_table", + autospec=True, + side_effect=core_exceptions.NotFound(""), + ) + + with load_patch as load_table_from_file, get_table_patch: + client.load_table_from_json( + json_rows, self.TABLE_REF, job_config=job_config + ) + + load_table_from_file.assert_called_once_with( + client, + mock.ANY, + self.TABLE_REF, + size=mock.ANY, + num_retries=_DEFAULT_NUM_RETRIES, + job_id=mock.ANY, + job_id_prefix=None, + location=client.location, + project=client.project, + job_config=mock.ANY, + timeout=DEFAULT_TIMEOUT, + ) + + sent_config = load_table_from_file.mock_calls[0][2]["job_config"] + assert sent_config.source_format == job.SourceFormat.NEWLINE_DELIMITED_JSON + assert sent_config.schema is None + assert sent_config.autodetect + + # When all following are true: + # (1) no schema provided; + # (2) no autodetect value provided; + # (3) writeDisposition == WRITE_TRUNCATE or WRITE_EMPTY; + # client sets autodetect == True + # For more details, see https://github.com/googleapis/python-bigquery/issues/1228#issuecomment-1910946297 + def test_load_table_from_json_wo_schema_wo_autodetect_others(self): + from google.cloud.bigquery.client import _DEFAULT_NUM_RETRIES + from google.cloud.bigquery import job + from google.cloud.bigquery.job import WriteDisposition + + client = self._make_client() + + json_rows = [ + {"name": "One", "age": 11, "birthday": "2008-09-10", "adult": False}, + {"name": "Two", "age": 22, "birthday": "1997-08-09", "adult": True}, + ] + + job_config = job.LoadJobConfig( + write_disposition=WriteDisposition.WRITE_TRUNCATE + ) + + load_patch = mock.patch( + "google.cloud.bigquery.client.Client.load_table_from_file", autospec=True + ) + + with load_patch as load_table_from_file: + client.load_table_from_json( + json_rows, self.TABLE_REF, job_config=job_config + ) + + load_table_from_file.assert_called_once_with( + client, + mock.ANY, + self.TABLE_REF, + size=mock.ANY, + num_retries=_DEFAULT_NUM_RETRIES, + job_id=mock.ANY, + job_id_prefix=None, + location=client.location, + project=client.project, + job_config=mock.ANY, + timeout=DEFAULT_TIMEOUT, + ) + + sent_config = load_table_from_file.mock_calls[0][2]["job_config"] + assert sent_config.source_format == job.SourceFormat.NEWLINE_DELIMITED_JSON + assert sent_config.schema is None + assert sent_config.autodetect + def test_load_table_from_json_w_explicit_job_config_override(self): from google.cloud.bigquery import job from google.cloud.bigquery.client import _DEFAULT_NUM_RETRIES @@ -9190,8 +9376,19 @@ def test_load_table_from_json_unicode_emoji_data_case(self): load_patch = mock.patch( "google.cloud.bigquery.client.Client.load_table_from_file", autospec=True ) + # mock: remote table already exists + get_table_reference = { + "projectId": "project_id", + "datasetId": "test_dataset", + "tableId": "test_table", + } + get_table_patch = mock.patch( + "google.cloud.bigquery.client.Client.get_table", + autospec=True, + return_value=mock.Mock(table_reference=get_table_reference), + ) - with load_patch as load_table_from_file: + with load_patch as load_table_from_file, get_table_patch: client.load_table_from_json(json_rows, self.TABLE_REF) load_table_from_file.assert_called_once_with( From bbf051d19d40db85ef546bd3968ec726c3375a10 Mon Sep 17 00:00:00 2001 From: "release-please[bot]" <55107282+release-please[bot]@users.noreply.github.com> Date: Tue, 6 Feb 2024 09:58:34 -0500 Subject: [PATCH 1725/2016] chore(main): release 3.17.2 (#1799) Co-authored-by: release-please[bot] <55107282+release-please[bot]@users.noreply.github.com> --- packages/google-cloud-bigquery/CHANGELOG.md | 19 +++++++++++++++++++ .../google/cloud/bigquery/version.py | 2 +- 2 files changed, 20 insertions(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/CHANGELOG.md b/packages/google-cloud-bigquery/CHANGELOG.md index 38212325327a..7bedb5cf6c7f 100644 --- a/packages/google-cloud-bigquery/CHANGELOG.md +++ b/packages/google-cloud-bigquery/CHANGELOG.md @@ -5,6 +5,25 @@ [1]: https://pypi.org/project/google-cloud-bigquery/#history +## [3.17.2](https://github.com/googleapis/python-bigquery/compare/v3.17.1...v3.17.2) (2024-01-30) + + +### Bug Fixes + +* Change load_table_from_json autodetect logic ([#1804](https://github.com/googleapis/python-bigquery/issues/1804)) ([6249032](https://github.com/googleapis/python-bigquery/commit/62490325f64e5d66303d9218992e28ac5f21cb3f)) + + +### Documentation + +* Update to use API ([#1781](https://github.com/googleapis/python-bigquery/issues/1781)) ([81563b0](https://github.com/googleapis/python-bigquery/commit/81563b06298fe3a64be6a89b583c3d64758ca12a)) +* Update `client_query_destination_table.py` sample to use `query_and_wait` ([#1783](https://github.com/googleapis/python-bigquery/issues/1783)) ([68ebbe1](https://github.com/googleapis/python-bigquery/commit/68ebbe12d455ce8e9b1784fb11787c2fb842ef22)) +* Update query_external_sheets_permanent_table.py to use query_and_wait API ([#1778](https://github.com/googleapis/python-bigquery/issues/1778)) ([a7be88a](https://github.com/googleapis/python-bigquery/commit/a7be88adf8a480ee61aa79789cb53df1b79bb091)) +* Update sample for query_to_arrow to use query_and_wait API ([#1776](https://github.com/googleapis/python-bigquery/issues/1776)) ([dbf10de](https://github.com/googleapis/python-bigquery/commit/dbf10dee51a7635e9b98658f205ded2de087a06f)) +* Update the query destination table legacy file to use query_and_wait API ([#1775](https://github.com/googleapis/python-bigquery/issues/1775)) ([ef89f9e](https://github.com/googleapis/python-bigquery/commit/ef89f9e58c22b3af5a7757b69daa030116012350)) +* Update to use `query_and_wait` in `client_query_w_positional_params.py` ([#1786](https://github.com/googleapis/python-bigquery/issues/1786)) ([410f71e](https://github.com/googleapis/python-bigquery/commit/410f71e6b6e755928e363ed89c1044e14b0db9cc)) +* Update to use `query_and_wait` in `samples/client_query_w_timestamp_params.py` ([#1785](https://github.com/googleapis/python-bigquery/issues/1785)) ([ba36948](https://github.com/googleapis/python-bigquery/commit/ba3694852c13c8a29fe0f9d923353e82acfd4278)) +* Update to_geodataframe to use query_and_wait functionality ([#1800](https://github.com/googleapis/python-bigquery/issues/1800)) ([1298594](https://github.com/googleapis/python-bigquery/commit/12985942942b8f205ecd261fcdf620df9a640460)) + ## [3.17.1](https://github.com/googleapis/python-bigquery/compare/v3.17.0...v3.17.1) (2024-01-24) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/version.py b/packages/google-cloud-bigquery/google/cloud/bigquery/version.py index 9b72eddf74fe..771b77a38766 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/version.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/version.py @@ -12,4 +12,4 @@ # See the License for the specific language governing permissions and # limitations under the License. -__version__ = "3.17.1" +__version__ = "3.17.2" From 7cdc9b3a7aca2cd66cb724d86bddaea57fdd0c5c Mon Sep 17 00:00:00 2001 From: "gcf-owl-bot[bot]" <78513119+gcf-owl-bot[bot]@users.noreply.github.com> Date: Wed, 7 Feb 2024 16:48:22 -0800 Subject: [PATCH 1726/2016] build(deps): bump cryptography from 41.0.6 to 42.0.0 in /synthtool/gcp/templates/python_library/.kokoro (#1811) Source-Link: https://github.com/googleapis/synthtool/commit/e13b22b1f660c80e4c3e735a9177d2f16c4b8bdc Post-Processor: gcr.io/cloud-devrel-public-resources/owlbot-python:latest@sha256:97b671488ad548ef783a452a9e1276ac10f144d5ae56d98cc4bf77ba504082b4 Co-authored-by: Owl Bot Co-authored-by: Lingqing Gan --- .../.github/.OwlBot.lock.yaml | 4 +- .../.kokoro/requirements.txt | 57 +++++++++++-------- 2 files changed, 35 insertions(+), 26 deletions(-) diff --git a/packages/google-cloud-bigquery/.github/.OwlBot.lock.yaml b/packages/google-cloud-bigquery/.github/.OwlBot.lock.yaml index d8a1bbca7179..2aefd0e91175 100644 --- a/packages/google-cloud-bigquery/.github/.OwlBot.lock.yaml +++ b/packages/google-cloud-bigquery/.github/.OwlBot.lock.yaml @@ -13,5 +13,5 @@ # limitations under the License. docker: image: gcr.io/cloud-devrel-public-resources/owlbot-python:latest - digest: sha256:5ea6d0ab82c956b50962f91d94e206d3921537ae5fe1549ec5326381d8905cfa -# created: 2024-01-15T16:32:08.142785673Z + digest: sha256:97b671488ad548ef783a452a9e1276ac10f144d5ae56d98cc4bf77ba504082b4 +# created: 2024-02-06T03:20:16.660474034Z diff --git a/packages/google-cloud-bigquery/.kokoro/requirements.txt b/packages/google-cloud-bigquery/.kokoro/requirements.txt index bb3d6ca38b14..8c11c9f3e9b6 100644 --- a/packages/google-cloud-bigquery/.kokoro/requirements.txt +++ b/packages/google-cloud-bigquery/.kokoro/requirements.txt @@ -93,30 +93,39 @@ colorlog==6.7.0 \ # via # gcp-docuploader # nox -cryptography==41.0.6 \ - --hash=sha256:068bc551698c234742c40049e46840843f3d98ad7ce265fd2bd4ec0d11306596 \ - --hash=sha256:0f27acb55a4e77b9be8d550d762b0513ef3fc658cd3eb15110ebbcbd626db12c \ - --hash=sha256:2132d5865eea673fe6712c2ed5fb4fa49dba10768bb4cc798345748380ee3660 \ - --hash=sha256:3288acccef021e3c3c10d58933f44e8602cf04dba96d9796d70d537bb2f4bbc4 \ - --hash=sha256:35f3f288e83c3f6f10752467c48919a7a94b7d88cc00b0668372a0d2ad4f8ead \ - --hash=sha256:398ae1fc711b5eb78e977daa3cbf47cec20f2c08c5da129b7a296055fbb22aed \ - --hash=sha256:422e3e31d63743855e43e5a6fcc8b4acab860f560f9321b0ee6269cc7ed70cc3 \ - --hash=sha256:48783b7e2bef51224020efb61b42704207dde583d7e371ef8fc2a5fb6c0aabc7 \ - --hash=sha256:4d03186af98b1c01a4eda396b137f29e4e3fb0173e30f885e27acec8823c1b09 \ - --hash=sha256:5daeb18e7886a358064a68dbcaf441c036cbdb7da52ae744e7b9207b04d3908c \ - --hash=sha256:60e746b11b937911dc70d164060d28d273e31853bb359e2b2033c9e93e6f3c43 \ - --hash=sha256:742ae5e9a2310e9dade7932f9576606836ed174da3c7d26bc3d3ab4bd49b9f65 \ - --hash=sha256:7e00fb556bda398b99b0da289ce7053639d33b572847181d6483ad89835115f6 \ - --hash=sha256:85abd057699b98fce40b41737afb234fef05c67e116f6f3650782c10862c43da \ - --hash=sha256:8efb2af8d4ba9dbc9c9dd8f04d19a7abb5b49eab1f3694e7b5a16a5fc2856f5c \ - --hash=sha256:ae236bb8760c1e55b7a39b6d4d32d2279bc6c7c8500b7d5a13b6fb9fc97be35b \ - --hash=sha256:afda76d84b053923c27ede5edc1ed7d53e3c9f475ebaf63c68e69f1403c405a8 \ - --hash=sha256:b27a7fd4229abef715e064269d98a7e2909ebf92eb6912a9603c7e14c181928c \ - --hash=sha256:b648fe2a45e426aaee684ddca2632f62ec4613ef362f4d681a9a6283d10e079d \ - --hash=sha256:c5a550dc7a3b50b116323e3d376241829fd326ac47bc195e04eb33a8170902a9 \ - --hash=sha256:da46e2b5df770070412c46f87bac0849b8d685c5f2679771de277a422c7d0b86 \ - --hash=sha256:f39812f70fc5c71a15aa3c97b2bbe213c3f2a460b79bd21c40d033bb34a9bf36 \ - --hash=sha256:ff369dd19e8fe0528b02e8df9f2aeb2479f89b1270d90f96a63500afe9af5cae +cryptography==42.0.0 \ + --hash=sha256:0a68bfcf57a6887818307600c3c0ebc3f62fbb6ccad2240aa21887cda1f8df1b \ + --hash=sha256:146e971e92a6dd042214b537a726c9750496128453146ab0ee8971a0299dc9bd \ + --hash=sha256:14e4b909373bc5bf1095311fa0f7fcabf2d1a160ca13f1e9e467be1ac4cbdf94 \ + --hash=sha256:206aaf42e031b93f86ad60f9f5d9da1b09164f25488238ac1dc488334eb5e221 \ + --hash=sha256:3005166a39b70c8b94455fdbe78d87a444da31ff70de3331cdec2c568cf25b7e \ + --hash=sha256:324721d93b998cb7367f1e6897370644751e5580ff9b370c0a50dc60a2003513 \ + --hash=sha256:33588310b5c886dfb87dba5f013b8d27df7ffd31dc753775342a1e5ab139e59d \ + --hash=sha256:35cf6ed4c38f054478a9df14f03c1169bb14bd98f0b1705751079b25e1cb58bc \ + --hash=sha256:3ca482ea80626048975360c8e62be3ceb0f11803180b73163acd24bf014133a0 \ + --hash=sha256:56ce0c106d5c3fec1038c3cca3d55ac320a5be1b44bf15116732d0bc716979a2 \ + --hash=sha256:5a217bca51f3b91971400890905a9323ad805838ca3fa1e202a01844f485ee87 \ + --hash=sha256:678cfa0d1e72ef41d48993a7be75a76b0725d29b820ff3cfd606a5b2b33fda01 \ + --hash=sha256:69fd009a325cad6fbfd5b04c711a4da563c6c4854fc4c9544bff3088387c77c0 \ + --hash=sha256:6cf9b76d6e93c62114bd19485e5cb003115c134cf9ce91f8ac924c44f8c8c3f4 \ + --hash=sha256:74f18a4c8ca04134d2052a140322002fef535c99cdbc2a6afc18a8024d5c9d5b \ + --hash=sha256:85f759ed59ffd1d0baad296e72780aa62ff8a71f94dc1ab340386a1207d0ea81 \ + --hash=sha256:87086eae86a700307b544625e3ba11cc600c3c0ef8ab97b0fda0705d6db3d4e3 \ + --hash=sha256:8814722cffcfd1fbd91edd9f3451b88a8f26a5fd41b28c1c9193949d1c689dc4 \ + --hash=sha256:8fedec73d590fd30c4e3f0d0f4bc961aeca8390c72f3eaa1a0874d180e868ddf \ + --hash=sha256:9515ea7f596c8092fdc9902627e51b23a75daa2c7815ed5aa8cf4f07469212ec \ + --hash=sha256:988b738f56c665366b1e4bfd9045c3efae89ee366ca3839cd5af53eaa1401bce \ + --hash=sha256:a2a8d873667e4fd2f34aedab02ba500b824692c6542e017075a2efc38f60a4c0 \ + --hash=sha256:bd7cf7a8d9f34cc67220f1195884151426ce616fdc8285df9054bfa10135925f \ + --hash=sha256:bdce70e562c69bb089523e75ef1d9625b7417c6297a76ac27b1b8b1eb51b7d0f \ + --hash=sha256:be14b31eb3a293fc6e6aa2807c8a3224c71426f7c4e3639ccf1a2f3ffd6df8c3 \ + --hash=sha256:be41b0c7366e5549265adf2145135dca107718fa44b6e418dc7499cfff6b4689 \ + --hash=sha256:c310767268d88803b653fffe6d6f2f17bb9d49ffceb8d70aed50ad45ea49ab08 \ + --hash=sha256:c58115384bdcfe9c7f644c72f10f6f42bed7cf59f7b52fe1bf7ae0a622b3a139 \ + --hash=sha256:c640b0ef54138fde761ec99a6c7dc4ce05e80420262c20fa239e694ca371d434 \ + --hash=sha256:ca20550bb590db16223eb9ccc5852335b48b8f597e2f6f0878bbfd9e7314eb17 \ + --hash=sha256:d97aae66b7de41cdf5b12087b5509e4e9805ed6f562406dfcf60e8481a9a28f8 \ + --hash=sha256:e9326ca78111e4c645f7e49cbce4ed2f3f85e17b61a563328c85a5208cf34440 # via # gcp-releasetool # secretstorage From 206044ea41299d579568eade6460ce9c34357792 Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Thu, 8 Feb 2024 13:12:25 -0600 Subject: [PATCH 1727/2016] chore: skip `test_to_dataframe_iterable_w_bqstorage_max_results_warning` if google-cloud-bigquery-storage is not installed (#1814) --- packages/google-cloud-bigquery/tests/unit/test_table.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/packages/google-cloud-bigquery/tests/unit/test_table.py b/packages/google-cloud-bigquery/tests/unit/test_table.py index e4d0c66ab28e..00a7f06e68ed 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_table.py +++ b/packages/google-cloud-bigquery/tests/unit/test_table.py @@ -3285,6 +3285,9 @@ def test_to_dataframe_iterable_w_bqstorage(self): # Don't close the client if it was passed in. bqstorage_client._transport.grpc_channel.close.assert_not_called() + @unittest.skipIf( + bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" + ) @unittest.skipIf(pandas is None, "Requires `pandas`") def test_to_dataframe_iterable_w_bqstorage_max_results_warning(self): from google.cloud.bigquery import schema From 56a0418d59bb3056148dd9b3260023b78ea938bd Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Mon, 12 Feb 2024 13:46:25 -0600 Subject: [PATCH 1728/2016] feat: support nullable boolean and Int64 dtypes in `insert_rows_from_dataframe` (#1816) --- .../google/cloud/bigquery/_pandas_helpers.py | 19 ++++++ .../tests/system/test_pandas.py | 13 +++- .../tests/unit/test__pandas_helpers.py | 65 ++++++++++++++----- 3 files changed, 79 insertions(+), 18 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/_pandas_helpers.py b/packages/google-cloud-bigquery/google/cloud/bigquery/_pandas_helpers.py index bcc869f156e5..e97dda7e5777 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/_pandas_helpers.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/_pandas_helpers.py @@ -958,6 +958,25 @@ def dataframe_to_json_generator(dataframe): # considered a NaN, however. if isinstance(is_nan, bool) and is_nan: continue + + # Convert numpy types to corresponding Python types. + # https://stackoverflow.com/a/60441783/101923 + if isinstance(value, numpy.bool_): + value = bool(value) + elif isinstance( + value, + ( + numpy.int64, + numpy.int32, + numpy.int16, + numpy.int8, + numpy.uint64, + numpy.uint32, + numpy.uint16, + numpy.uint8, + ), + ): + value = int(value) output[column] = value yield output diff --git a/packages/google-cloud-bigquery/tests/system/test_pandas.py b/packages/google-cloud-bigquery/tests/system/test_pandas.py index e93f245c080c..85c7b79e6442 100644 --- a/packages/google-cloud-bigquery/tests/system/test_pandas.py +++ b/packages/google-cloud-bigquery/tests/system/test_pandas.py @@ -835,7 +835,9 @@ def test_insert_rows_from_dataframe(bigquery_client, dataset_id): schema = [ SF("float_col", "FLOAT", mode="REQUIRED"), SF("int_col", "INTEGER", mode="REQUIRED"), + SF("int64_col", "INTEGER", mode="NULLABLE"), SF("bool_col", "BOOLEAN", mode="REQUIRED"), + SF("boolean_col", "BOOLEAN", mode="NULLABLE"), SF("string_col", "STRING", mode="NULLABLE"), SF("date_col", "DATE", mode="NULLABLE"), SF("time_col", "TIME", mode="NULLABLE"), @@ -898,6 +900,15 @@ def test_insert_rows_from_dataframe(bigquery_client, dataset_id): dataframe["date_col"] = dataframe["date_col"].astype("dbdate") dataframe["time_col"] = dataframe["time_col"].astype("dbtime") + # Support nullable integer and boolean dtypes. + # https://github.com/googleapis/python-bigquery/issues/1815 + dataframe["int64_col"] = pandas.Series( + [-11, -22, pandas.NA, -44, -55, -66], dtype="Int64" + ) + dataframe["boolean_col"] = pandas.Series( + [True, False, True, pandas.NA, True, False], dtype="boolean" + ) + table_id = f"{bigquery_client.project}.{dataset_id}.test_insert_rows_from_dataframe" table_arg = bigquery.Table(table_id, schema=schema) table = helpers.retry_403(bigquery_client.create_table)(table_arg) @@ -910,7 +921,7 @@ def test_insert_rows_from_dataframe(bigquery_client, dataset_id): expected = [ # Pandas often represents NULL values as NaN. Convert to None for # easier comparison. - tuple(None if col != col else col for col in data_row) + tuple(None if pandas.isna(col) else col for col in data_row) for data_row in dataframe.itertuples(index=False) ] diff --git a/packages/google-cloud-bigquery/tests/unit/test__pandas_helpers.py b/packages/google-cloud-bigquery/tests/unit/test__pandas_helpers.py index ad40a6da6eb6..7c83d3ec5fd0 100644 --- a/packages/google-cloud-bigquery/tests/unit/test__pandas_helpers.py +++ b/packages/google-cloud-bigquery/tests/unit/test__pandas_helpers.py @@ -808,29 +808,60 @@ def test_list_columns_and_indexes_with_named_index_same_as_column_name( @pytest.mark.skipif(pandas is None, reason="Requires `pandas`") def test_dataframe_to_json_generator(module_under_test): utcnow = datetime.datetime.utcnow() - df_data = collections.OrderedDict( - [ - ("a_series", [pandas.NA, 2, 3, 4]), - ("b_series", [0.1, float("NaN"), 0.3, 0.4]), - ("c_series", ["a", "b", pandas.NA, "d"]), - ("d_series", [utcnow, utcnow, utcnow, pandas.NaT]), - ("e_series", [True, False, True, None]), - ] - ) dataframe = pandas.DataFrame( - df_data, index=pandas.Index([4, 5, 6, 7], name="a_index") + { + "a_series": [1, 2, 3, 4], + "b_series": [0.1, float("NaN"), 0.3, 0.4], + "c_series": ["a", "b", pandas.NA, "d"], + "d_series": [utcnow, utcnow, utcnow, pandas.NaT], + "e_series": [True, False, True, None], + # Support nullable dtypes. + # https://github.com/googleapis/python-bigquery/issues/1815 + "boolean_series": pandas.Series( + [True, False, pandas.NA, False], dtype="boolean" + ), + "int64_series": pandas.Series([-1, pandas.NA, -3, -4], dtype="Int64"), + } ) - dataframe = dataframe.astype({"a_series": pandas.Int64Dtype()}) + # Index is not included, even if it is not the default and has a name. + dataframe = dataframe.rename(index=lambda idx: idx + 4) + dataframe.index.name = "a_index" - rows = module_under_test.dataframe_to_json_generator(dataframe) + rows = list(module_under_test.dataframe_to_json_generator(dataframe)) expected = [ - {"b_series": 0.1, "c_series": "a", "d_series": utcnow, "e_series": True}, - {"a_series": 2, "c_series": "b", "d_series": utcnow, "e_series": False}, - {"a_series": 3, "b_series": 0.3, "d_series": utcnow, "e_series": True}, - {"a_series": 4, "b_series": 0.4, "c_series": "d"}, + { + "a_series": 1, + "b_series": 0.1, + "c_series": "a", + "d_series": utcnow, + "e_series": True, + "boolean_series": True, + "int64_series": -1, + }, + { + "a_series": 2, + "c_series": "b", + "d_series": utcnow, + "e_series": False, + "boolean_series": False, + }, + { + "a_series": 3, + "b_series": 0.3, + "d_series": utcnow, + "e_series": True, + "int64_series": -3, + }, + { + "a_series": 4, + "b_series": 0.4, + "c_series": "d", + "boolean_series": False, + "int64_series": -4, + }, ] - assert list(rows) == expected + assert rows == expected @pytest.mark.skipif(pandas is None, reason="Requires `pandas`") From 94f4e58ae51117279c8f889d99f5f2122c437e5d Mon Sep 17 00:00:00 2001 From: Salem Jorden <115185670+SalemJorden@users.noreply.github.com> Date: Mon, 12 Feb 2024 17:39:04 -0600 Subject: [PATCH 1729/2016] docs: update `client_query_w_named_params.py` to use `query_and_wait` API (#1782) * docs: update client_query_w_named_params.py to use query_and_wait API * Update client_query_w_named_params.py lint --------- Co-authored-by: Salem Boyland Co-authored-by: Kira --- .../samples/client_query_w_named_params.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/packages/google-cloud-bigquery/samples/client_query_w_named_params.py b/packages/google-cloud-bigquery/samples/client_query_w_named_params.py index 6dd72d44fa05..b7e59a81ae05 100644 --- a/packages/google-cloud-bigquery/samples/client_query_w_named_params.py +++ b/packages/google-cloud-bigquery/samples/client_query_w_named_params.py @@ -33,8 +33,10 @@ def client_query_w_named_params() -> None: bigquery.ScalarQueryParameter("min_word_count", "INT64", 250), ] ) - query_job = client.query(query, job_config=job_config) # Make an API request. + results = client.query_and_wait( + query, job_config=job_config + ) # Make an API request. - for row in query_job: + for row in results: print("{}: \t{}".format(row.word, row.word_count)) # [END bigquery_query_params_named] From c6b41e09054c3506ef5e8152218238b036c015d7 Mon Sep 17 00:00:00 2001 From: "gcf-owl-bot[bot]" <78513119+gcf-owl-bot[bot]@users.noreply.github.com> Date: Tue, 20 Feb 2024 12:50:00 -0800 Subject: [PATCH 1730/2016] build(deps): bump cryptography from 42.0.0 to 42.0.2 in .kokoro (#1821) Source-Link: https://github.com/googleapis/synthtool/commit/8d392a55db44b00b4a9b995318051e334eecdcf1 Post-Processor: gcr.io/cloud-devrel-public-resources/owlbot-python:latest@sha256:a0c4463fcfd9893fc172a3b3db2b6ac0c7b94ec6ad458c7dcea12d9693615ac3 Co-authored-by: Owl Bot --- .../.github/.OwlBot.lock.yaml | 4 +- .../.kokoro/requirements.txt | 66 +++++++++---------- 2 files changed, 35 insertions(+), 35 deletions(-) diff --git a/packages/google-cloud-bigquery/.github/.OwlBot.lock.yaml b/packages/google-cloud-bigquery/.github/.OwlBot.lock.yaml index 2aefd0e91175..51213ca00ee3 100644 --- a/packages/google-cloud-bigquery/.github/.OwlBot.lock.yaml +++ b/packages/google-cloud-bigquery/.github/.OwlBot.lock.yaml @@ -13,5 +13,5 @@ # limitations under the License. docker: image: gcr.io/cloud-devrel-public-resources/owlbot-python:latest - digest: sha256:97b671488ad548ef783a452a9e1276ac10f144d5ae56d98cc4bf77ba504082b4 -# created: 2024-02-06T03:20:16.660474034Z + digest: sha256:a0c4463fcfd9893fc172a3b3db2b6ac0c7b94ec6ad458c7dcea12d9693615ac3 +# created: 2024-02-17T12:21:23.177926195Z diff --git a/packages/google-cloud-bigquery/.kokoro/requirements.txt b/packages/google-cloud-bigquery/.kokoro/requirements.txt index 8c11c9f3e9b6..f80bdcd62981 100644 --- a/packages/google-cloud-bigquery/.kokoro/requirements.txt +++ b/packages/google-cloud-bigquery/.kokoro/requirements.txt @@ -93,39 +93,39 @@ colorlog==6.7.0 \ # via # gcp-docuploader # nox -cryptography==42.0.0 \ - --hash=sha256:0a68bfcf57a6887818307600c3c0ebc3f62fbb6ccad2240aa21887cda1f8df1b \ - --hash=sha256:146e971e92a6dd042214b537a726c9750496128453146ab0ee8971a0299dc9bd \ - --hash=sha256:14e4b909373bc5bf1095311fa0f7fcabf2d1a160ca13f1e9e467be1ac4cbdf94 \ - --hash=sha256:206aaf42e031b93f86ad60f9f5d9da1b09164f25488238ac1dc488334eb5e221 \ - --hash=sha256:3005166a39b70c8b94455fdbe78d87a444da31ff70de3331cdec2c568cf25b7e \ - --hash=sha256:324721d93b998cb7367f1e6897370644751e5580ff9b370c0a50dc60a2003513 \ - --hash=sha256:33588310b5c886dfb87dba5f013b8d27df7ffd31dc753775342a1e5ab139e59d \ - --hash=sha256:35cf6ed4c38f054478a9df14f03c1169bb14bd98f0b1705751079b25e1cb58bc \ - --hash=sha256:3ca482ea80626048975360c8e62be3ceb0f11803180b73163acd24bf014133a0 \ - --hash=sha256:56ce0c106d5c3fec1038c3cca3d55ac320a5be1b44bf15116732d0bc716979a2 \ - --hash=sha256:5a217bca51f3b91971400890905a9323ad805838ca3fa1e202a01844f485ee87 \ - --hash=sha256:678cfa0d1e72ef41d48993a7be75a76b0725d29b820ff3cfd606a5b2b33fda01 \ - --hash=sha256:69fd009a325cad6fbfd5b04c711a4da563c6c4854fc4c9544bff3088387c77c0 \ - --hash=sha256:6cf9b76d6e93c62114bd19485e5cb003115c134cf9ce91f8ac924c44f8c8c3f4 \ - --hash=sha256:74f18a4c8ca04134d2052a140322002fef535c99cdbc2a6afc18a8024d5c9d5b \ - --hash=sha256:85f759ed59ffd1d0baad296e72780aa62ff8a71f94dc1ab340386a1207d0ea81 \ - --hash=sha256:87086eae86a700307b544625e3ba11cc600c3c0ef8ab97b0fda0705d6db3d4e3 \ - --hash=sha256:8814722cffcfd1fbd91edd9f3451b88a8f26a5fd41b28c1c9193949d1c689dc4 \ - --hash=sha256:8fedec73d590fd30c4e3f0d0f4bc961aeca8390c72f3eaa1a0874d180e868ddf \ - --hash=sha256:9515ea7f596c8092fdc9902627e51b23a75daa2c7815ed5aa8cf4f07469212ec \ - --hash=sha256:988b738f56c665366b1e4bfd9045c3efae89ee366ca3839cd5af53eaa1401bce \ - --hash=sha256:a2a8d873667e4fd2f34aedab02ba500b824692c6542e017075a2efc38f60a4c0 \ - --hash=sha256:bd7cf7a8d9f34cc67220f1195884151426ce616fdc8285df9054bfa10135925f \ - --hash=sha256:bdce70e562c69bb089523e75ef1d9625b7417c6297a76ac27b1b8b1eb51b7d0f \ - --hash=sha256:be14b31eb3a293fc6e6aa2807c8a3224c71426f7c4e3639ccf1a2f3ffd6df8c3 \ - --hash=sha256:be41b0c7366e5549265adf2145135dca107718fa44b6e418dc7499cfff6b4689 \ - --hash=sha256:c310767268d88803b653fffe6d6f2f17bb9d49ffceb8d70aed50ad45ea49ab08 \ - --hash=sha256:c58115384bdcfe9c7f644c72f10f6f42bed7cf59f7b52fe1bf7ae0a622b3a139 \ - --hash=sha256:c640b0ef54138fde761ec99a6c7dc4ce05e80420262c20fa239e694ca371d434 \ - --hash=sha256:ca20550bb590db16223eb9ccc5852335b48b8f597e2f6f0878bbfd9e7314eb17 \ - --hash=sha256:d97aae66b7de41cdf5b12087b5509e4e9805ed6f562406dfcf60e8481a9a28f8 \ - --hash=sha256:e9326ca78111e4c645f7e49cbce4ed2f3f85e17b61a563328c85a5208cf34440 +cryptography==42.0.2 \ + --hash=sha256:087887e55e0b9c8724cf05361357875adb5c20dec27e5816b653492980d20380 \ + --hash=sha256:09a77e5b2e8ca732a19a90c5bca2d124621a1edb5438c5daa2d2738bfeb02589 \ + --hash=sha256:130c0f77022b2b9c99d8cebcdd834d81705f61c68e91ddd614ce74c657f8b3ea \ + --hash=sha256:141e2aa5ba100d3788c0ad7919b288f89d1fe015878b9659b307c9ef867d3a65 \ + --hash=sha256:28cb2c41f131a5758d6ba6a0504150d644054fd9f3203a1e8e8d7ac3aea7f73a \ + --hash=sha256:2f9f14185962e6a04ab32d1abe34eae8a9001569ee4edb64d2304bf0d65c53f3 \ + --hash=sha256:320948ab49883557a256eab46149df79435a22d2fefd6a66fe6946f1b9d9d008 \ + --hash=sha256:36d4b7c4be6411f58f60d9ce555a73df8406d484ba12a63549c88bd64f7967f1 \ + --hash=sha256:3b15c678f27d66d247132cbf13df2f75255627bcc9b6a570f7d2fd08e8c081d2 \ + --hash=sha256:3dbd37e14ce795b4af61b89b037d4bc157f2cb23e676fa16932185a04dfbf635 \ + --hash=sha256:4383b47f45b14459cab66048d384614019965ba6c1a1a141f11b5a551cace1b2 \ + --hash=sha256:44c95c0e96b3cb628e8452ec060413a49002a247b2b9938989e23a2c8291fc90 \ + --hash=sha256:4b063d3413f853e056161eb0c7724822a9740ad3caa24b8424d776cebf98e7ee \ + --hash=sha256:52ed9ebf8ac602385126c9a2fe951db36f2cb0c2538d22971487f89d0de4065a \ + --hash=sha256:55d1580e2d7e17f45d19d3b12098e352f3a37fe86d380bf45846ef257054b242 \ + --hash=sha256:5ef9bc3d046ce83c4bbf4c25e1e0547b9c441c01d30922d812e887dc5f125c12 \ + --hash=sha256:5fa82a26f92871eca593b53359c12ad7949772462f887c35edaf36f87953c0e2 \ + --hash=sha256:61321672b3ac7aade25c40449ccedbc6db72c7f5f0fdf34def5e2f8b51ca530d \ + --hash=sha256:701171f825dcab90969596ce2af253143b93b08f1a716d4b2a9d2db5084ef7be \ + --hash=sha256:841ec8af7a8491ac76ec5a9522226e287187a3107e12b7d686ad354bb78facee \ + --hash=sha256:8a06641fb07d4e8f6c7dda4fc3f8871d327803ab6542e33831c7ccfdcb4d0ad6 \ + --hash=sha256:8e88bb9eafbf6a4014d55fb222e7360eef53e613215085e65a13290577394529 \ + --hash=sha256:a00aee5d1b6c20620161984f8ab2ab69134466c51f58c052c11b076715e72929 \ + --hash=sha256:a047682d324ba56e61b7ea7c7299d51e61fd3bca7dad2ccc39b72bd0118d60a1 \ + --hash=sha256:a7ef8dd0bf2e1d0a27042b231a3baac6883cdd5557036f5e8df7139255feaac6 \ + --hash=sha256:ad28cff53f60d99a928dfcf1e861e0b2ceb2bc1f08a074fdd601b314e1cc9e0a \ + --hash=sha256:b9097a208875fc7bbeb1286d0125d90bdfed961f61f214d3f5be62cd4ed8a446 \ + --hash=sha256:b97fe7d7991c25e6a31e5d5e795986b18fbbb3107b873d5f3ae6dc9a103278e9 \ + --hash=sha256:e0ec52ba3c7f1b7d813cd52649a5b3ef1fc0d433219dc8c93827c57eab6cf888 \ + --hash=sha256:ea2c3ffb662fec8bbbfce5602e2c159ff097a4631d96235fcf0fb00e59e3ece4 \ + --hash=sha256:fa3dec4ba8fb6e662770b74f62f1a0c7d4e37e25b58b2bf2c1be4c95372b4a33 \ + --hash=sha256:fbeb725c9dc799a574518109336acccaf1303c30d45c075c665c0793c2f79a7f # via # gcp-releasetool # secretstorage From c05fd5c0fd4abdd9e6a66c37a029c61157944a7d Mon Sep 17 00:00:00 2001 From: Anthonios Partheniou Date: Wed, 21 Feb 2024 08:47:45 -0500 Subject: [PATCH 1731/2016] chore: fix minor typos (#1820) --- .../google/cloud/bigquery/_job_helpers.py | 2 +- .../google/cloud/bigquery/magics/magics.py | 2 +- .../google/cloud/bigquery/opentelemetry_tracing.py | 2 +- packages/google-cloud-bigquery/tests/system/test_query.py | 2 +- .../google-cloud-bigquery/tests/unit/test__job_helpers.py | 6 +++--- 5 files changed, 7 insertions(+), 7 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/_job_helpers.py b/packages/google-cloud-bigquery/google/cloud/bigquery/_job_helpers.py index 6debc377b72b..0692c9b655de 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/_job_helpers.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/_job_helpers.py @@ -54,7 +54,7 @@ # The purpose of _TIMEOUT_BUFFER_MILLIS is to allow the server-side timeout to -# happen before the client-side timeout. This is not strictly neccessary, as the +# happen before the client-side timeout. This is not strictly necessary, as the # client retries client-side timeouts, but the hope by making the server-side # timeout slightly shorter is that it can save the server from some unncessary # processing time. diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/magics/magics.py b/packages/google-cloud-bigquery/google/cloud/bigquery/magics/magics.py index b7c685d9a87d..8464c87929a6 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/magics/magics.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/magics/magics.py @@ -288,7 +288,7 @@ def _handle_error(error, destination_var=None): Args: error (Exception): - An exception that ocurred during the query execution. + An exception that occurred during the query execution. destination_var (Optional[str]): The name of the IPython session variable to store the query job. """ diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/opentelemetry_tracing.py b/packages/google-cloud-bigquery/google/cloud/bigquery/opentelemetry_tracing.py index be02c1686921..e2a05e4d0f82 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/opentelemetry_tracing.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/opentelemetry_tracing.py @@ -90,7 +90,7 @@ def _get_final_span_attributes(attributes=None, client=None, job_ref=None): """Compiles attributes from: client, job_ref, user-provided attributes. Attributes from all of these sources are merged together. Note the - attributes are added sequentially based on perceived order of precendence: + attributes are added sequentially based on perceived order of precedence: i.e. attributes added last may overwrite attributes added earlier. Args: diff --git a/packages/google-cloud-bigquery/tests/system/test_query.py b/packages/google-cloud-bigquery/tests/system/test_query.py index b8e0c00da971..82be40693ffe 100644 --- a/packages/google-cloud-bigquery/tests/system/test_query.py +++ b/packages/google-cloud-bigquery/tests/system/test_query.py @@ -477,7 +477,7 @@ def test_query_error_w_api_method_default(bigquery_client: bigquery.Client): """Test that an exception is not thrown until fetching the results. For backwards compatibility, jobs.insert is the default API method. With - jobs.insert, a failed query job is "sucessfully" created. An exception is + jobs.insert, a failed query job is "successfully" created. An exception is thrown when fetching the results. """ diff --git a/packages/google-cloud-bigquery/tests/unit/test__job_helpers.py b/packages/google-cloud-bigquery/tests/unit/test__job_helpers.py index 404a546ff100..c30964c576f8 100644 --- a/packages/google-cloud-bigquery/tests/unit/test__job_helpers.py +++ b/packages/google-cloud-bigquery/tests/unit/test__job_helpers.py @@ -711,7 +711,7 @@ def test_query_and_wait_caches_completed_query_results_one_page(): {"f": [{"v": "Phred Phlyntstone"}, {"v": "32"}]}, {"f": [{"v": "Bharney Rhubble"}, {"v": "33"}]}, ], - # Even though totalRows > len(rows), we should use the presense of a + # Even though totalRows > len(rows), we should use the presence of a # next page token to decide if there are any more pages. "totalRows": 8, } @@ -828,7 +828,7 @@ def test_query_and_wait_caches_completed_query_results_more_pages(): {"f": [{"v": "Phred Phlyntstone"}, {"v": "32"}]}, {"f": [{"v": "Bharney Rhubble"}, {"v": "33"}]}, ], - # Even though totalRows <= len(rows), we should use the presense of a + # Even though totalRows <= len(rows), we should use the presence of a # next page token to decide if there are any more pages. "totalRows": 2, "pageToken": "page-2", @@ -981,7 +981,7 @@ def test_query_and_wait_incomplete_query(): {"f": [{"v": "Phred Phlyntstone"}, {"v": "32"}]}, {"f": [{"v": "Bharney Rhubble"}, {"v": "33"}]}, ], - # Even though totalRows <= len(rows), we should use the presense of a + # Even though totalRows <= len(rows), we should use the presence of a # next page token to decide if there are any more pages. "totalRows": 2, "pageToken": "page-2", From 2d78d6cb3b46ed6d4f4786d659b9601f27eb2e7f Mon Sep 17 00:00:00 2001 From: "gcf-owl-bot[bot]" <78513119+gcf-owl-bot[bot]@users.noreply.github.com> Date: Tue, 27 Feb 2024 12:48:46 -0500 Subject: [PATCH 1732/2016] build(deps): bump cryptography from 42.0.2 to 42.0.4 in .kokoro (#1833) Source-Link: https://github.com/googleapis/synthtool/commit/d895aec3679ad22aa120481f746bf9f2f325f26f Post-Processor: gcr.io/cloud-devrel-public-resources/owlbot-python:latest@sha256:98f3afd11308259de6e828e37376d18867fd321aba07826e29e4f8d9cab56bad Co-authored-by: Owl Bot --- .../.github/.OwlBot.lock.yaml | 4 +- .../.kokoro/requirements.txt | 66 +++++++++---------- 2 files changed, 35 insertions(+), 35 deletions(-) diff --git a/packages/google-cloud-bigquery/.github/.OwlBot.lock.yaml b/packages/google-cloud-bigquery/.github/.OwlBot.lock.yaml index 51213ca00ee3..e4e943e0259a 100644 --- a/packages/google-cloud-bigquery/.github/.OwlBot.lock.yaml +++ b/packages/google-cloud-bigquery/.github/.OwlBot.lock.yaml @@ -13,5 +13,5 @@ # limitations under the License. docker: image: gcr.io/cloud-devrel-public-resources/owlbot-python:latest - digest: sha256:a0c4463fcfd9893fc172a3b3db2b6ac0c7b94ec6ad458c7dcea12d9693615ac3 -# created: 2024-02-17T12:21:23.177926195Z + digest: sha256:98f3afd11308259de6e828e37376d18867fd321aba07826e29e4f8d9cab56bad +# created: 2024-02-27T15:56:18.442440378Z diff --git a/packages/google-cloud-bigquery/.kokoro/requirements.txt b/packages/google-cloud-bigquery/.kokoro/requirements.txt index f80bdcd62981..bda8e38c4f31 100644 --- a/packages/google-cloud-bigquery/.kokoro/requirements.txt +++ b/packages/google-cloud-bigquery/.kokoro/requirements.txt @@ -93,39 +93,39 @@ colorlog==6.7.0 \ # via # gcp-docuploader # nox -cryptography==42.0.2 \ - --hash=sha256:087887e55e0b9c8724cf05361357875adb5c20dec27e5816b653492980d20380 \ - --hash=sha256:09a77e5b2e8ca732a19a90c5bca2d124621a1edb5438c5daa2d2738bfeb02589 \ - --hash=sha256:130c0f77022b2b9c99d8cebcdd834d81705f61c68e91ddd614ce74c657f8b3ea \ - --hash=sha256:141e2aa5ba100d3788c0ad7919b288f89d1fe015878b9659b307c9ef867d3a65 \ - --hash=sha256:28cb2c41f131a5758d6ba6a0504150d644054fd9f3203a1e8e8d7ac3aea7f73a \ - --hash=sha256:2f9f14185962e6a04ab32d1abe34eae8a9001569ee4edb64d2304bf0d65c53f3 \ - --hash=sha256:320948ab49883557a256eab46149df79435a22d2fefd6a66fe6946f1b9d9d008 \ - --hash=sha256:36d4b7c4be6411f58f60d9ce555a73df8406d484ba12a63549c88bd64f7967f1 \ - --hash=sha256:3b15c678f27d66d247132cbf13df2f75255627bcc9b6a570f7d2fd08e8c081d2 \ - --hash=sha256:3dbd37e14ce795b4af61b89b037d4bc157f2cb23e676fa16932185a04dfbf635 \ - --hash=sha256:4383b47f45b14459cab66048d384614019965ba6c1a1a141f11b5a551cace1b2 \ - --hash=sha256:44c95c0e96b3cb628e8452ec060413a49002a247b2b9938989e23a2c8291fc90 \ - --hash=sha256:4b063d3413f853e056161eb0c7724822a9740ad3caa24b8424d776cebf98e7ee \ - --hash=sha256:52ed9ebf8ac602385126c9a2fe951db36f2cb0c2538d22971487f89d0de4065a \ - --hash=sha256:55d1580e2d7e17f45d19d3b12098e352f3a37fe86d380bf45846ef257054b242 \ - --hash=sha256:5ef9bc3d046ce83c4bbf4c25e1e0547b9c441c01d30922d812e887dc5f125c12 \ - --hash=sha256:5fa82a26f92871eca593b53359c12ad7949772462f887c35edaf36f87953c0e2 \ - --hash=sha256:61321672b3ac7aade25c40449ccedbc6db72c7f5f0fdf34def5e2f8b51ca530d \ - --hash=sha256:701171f825dcab90969596ce2af253143b93b08f1a716d4b2a9d2db5084ef7be \ - --hash=sha256:841ec8af7a8491ac76ec5a9522226e287187a3107e12b7d686ad354bb78facee \ - --hash=sha256:8a06641fb07d4e8f6c7dda4fc3f8871d327803ab6542e33831c7ccfdcb4d0ad6 \ - --hash=sha256:8e88bb9eafbf6a4014d55fb222e7360eef53e613215085e65a13290577394529 \ - --hash=sha256:a00aee5d1b6c20620161984f8ab2ab69134466c51f58c052c11b076715e72929 \ - --hash=sha256:a047682d324ba56e61b7ea7c7299d51e61fd3bca7dad2ccc39b72bd0118d60a1 \ - --hash=sha256:a7ef8dd0bf2e1d0a27042b231a3baac6883cdd5557036f5e8df7139255feaac6 \ - --hash=sha256:ad28cff53f60d99a928dfcf1e861e0b2ceb2bc1f08a074fdd601b314e1cc9e0a \ - --hash=sha256:b9097a208875fc7bbeb1286d0125d90bdfed961f61f214d3f5be62cd4ed8a446 \ - --hash=sha256:b97fe7d7991c25e6a31e5d5e795986b18fbbb3107b873d5f3ae6dc9a103278e9 \ - --hash=sha256:e0ec52ba3c7f1b7d813cd52649a5b3ef1fc0d433219dc8c93827c57eab6cf888 \ - --hash=sha256:ea2c3ffb662fec8bbbfce5602e2c159ff097a4631d96235fcf0fb00e59e3ece4 \ - --hash=sha256:fa3dec4ba8fb6e662770b74f62f1a0c7d4e37e25b58b2bf2c1be4c95372b4a33 \ - --hash=sha256:fbeb725c9dc799a574518109336acccaf1303c30d45c075c665c0793c2f79a7f +cryptography==42.0.4 \ + --hash=sha256:01911714117642a3f1792c7f376db572aadadbafcd8d75bb527166009c9f1d1b \ + --hash=sha256:0e89f7b84f421c56e7ff69f11c441ebda73b8a8e6488d322ef71746224c20fce \ + --hash=sha256:12d341bd42cdb7d4937b0cabbdf2a94f949413ac4504904d0cdbdce4a22cbf88 \ + --hash=sha256:15a1fb843c48b4a604663fa30af60818cd28f895572386e5f9b8a665874c26e7 \ + --hash=sha256:1cdcdbd117681c88d717437ada72bdd5be9de117f96e3f4d50dab3f59fd9ab20 \ + --hash=sha256:1df6fcbf60560d2113b5ed90f072dc0b108d64750d4cbd46a21ec882c7aefce9 \ + --hash=sha256:3c6048f217533d89f2f8f4f0fe3044bf0b2090453b7b73d0b77db47b80af8dff \ + --hash=sha256:3e970a2119507d0b104f0a8e281521ad28fc26f2820687b3436b8c9a5fcf20d1 \ + --hash=sha256:44a64043f743485925d3bcac548d05df0f9bb445c5fcca6681889c7c3ab12764 \ + --hash=sha256:4e36685cb634af55e0677d435d425043967ac2f3790ec652b2b88ad03b85c27b \ + --hash=sha256:5f8907fcf57392cd917892ae83708761c6ff3c37a8e835d7246ff0ad251d9298 \ + --hash=sha256:69b22ab6506a3fe483d67d1ed878e1602bdd5912a134e6202c1ec672233241c1 \ + --hash=sha256:6bfadd884e7280df24d26f2186e4e07556a05d37393b0f220a840b083dc6a824 \ + --hash=sha256:6d0fbe73728c44ca3a241eff9aefe6496ab2656d6e7a4ea2459865f2e8613257 \ + --hash=sha256:6ffb03d419edcab93b4b19c22ee80c007fb2d708429cecebf1dd3258956a563a \ + --hash=sha256:810bcf151caefc03e51a3d61e53335cd5c7316c0a105cc695f0959f2c638b129 \ + --hash=sha256:831a4b37accef30cccd34fcb916a5d7b5be3cbbe27268a02832c3e450aea39cb \ + --hash=sha256:887623fe0d70f48ab3f5e4dbf234986b1329a64c066d719432d0698522749929 \ + --hash=sha256:a0298bdc6e98ca21382afe914c642620370ce0470a01e1bef6dd9b5354c36854 \ + --hash=sha256:a1327f280c824ff7885bdeef8578f74690e9079267c1c8bd7dc5cc5aa065ae52 \ + --hash=sha256:c1f25b252d2c87088abc8bbc4f1ecbf7c919e05508a7e8628e6875c40bc70923 \ + --hash=sha256:c3a5cbc620e1e17009f30dd34cb0d85c987afd21c41a74352d1719be33380885 \ + --hash=sha256:ce8613beaffc7c14f091497346ef117c1798c202b01153a8cc7b8e2ebaaf41c0 \ + --hash=sha256:d2a27aca5597c8a71abbe10209184e1a8e91c1fd470b5070a2ea60cafec35bcd \ + --hash=sha256:dad9c385ba8ee025bb0d856714f71d7840020fe176ae0229de618f14dae7a6e2 \ + --hash=sha256:db4b65b02f59035037fde0998974d84244a64c3265bdef32a827ab9b63d61b18 \ + --hash=sha256:e09469a2cec88fb7b078e16d4adec594414397e8879a4341c6ace96013463d5b \ + --hash=sha256:e53dc41cda40b248ebc40b83b31516487f7db95ab8ceac1f042626bc43a2f992 \ + --hash=sha256:f1e85a178384bf19e36779d91ff35c7617c885da487d689b05c1366f9933ad74 \ + --hash=sha256:f47be41843200f7faec0683ad751e5ef11b9a56a220d57f300376cd8aba81660 \ + --hash=sha256:fb0cef872d8193e487fc6bdb08559c3aa41b659a7d9be48b2e10747f47863925 \ + --hash=sha256:ffc73996c4fca3d2b6c1c8c12bfd3ad00def8621da24f547626bf06441400449 # via # gcp-releasetool # secretstorage From 1cd001652429d577ca70a8775a4a7287ba31b52a Mon Sep 17 00:00:00 2001 From: Lingqing Gan Date: Tue, 27 Feb 2024 12:18:10 -0800 Subject: [PATCH 1733/2016] feat: support slot_ms in QueryPlanEntry (#1831) --- .../google-cloud-bigquery/google/cloud/bigquery/job/query.py | 5 +++++ .../google-cloud-bigquery/tests/unit/job/test_query_stats.py | 4 ++++ 2 files changed, 9 insertions(+) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/job/query.py b/packages/google-cloud-bigquery/google/cloud/bigquery/job/query.py index ac0c51973026..e45a46894ce4 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/job/query.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/job/query.py @@ -2263,6 +2263,11 @@ def steps(self): for step in self._properties.get("steps", []) ] + @property + def slot_ms(self): + """Optional[int]: Slot-milliseconds used by the stage.""" + return _helpers._int_or_none(self._properties.get("slotMs")) + class TimelineEntry(object): """TimelineEntry represents progress of a query job at a particular diff --git a/packages/google-cloud-bigquery/tests/unit/job/test_query_stats.py b/packages/google-cloud-bigquery/tests/unit/job/test_query_stats.py index bdd0fb627754..61b278d43f88 100644 --- a/packages/google-cloud-bigquery/tests/unit/job/test_query_stats.py +++ b/packages/google-cloud-bigquery/tests/unit/job/test_query_stats.py @@ -261,6 +261,7 @@ class TestQueryPlanEntry(_Base): STATUS = "STATUS" SHUFFLE_OUTPUT_BYTES = 1024 SHUFFLE_OUTPUT_BYTES_SPILLED = 1 + SLOT_MS = 25 START_RFC3339_MICROS = "2018-04-01T00:00:00.000000Z" END_RFC3339_MICROS = "2018-04-01T00:00:04.000000Z" @@ -305,6 +306,7 @@ def test_from_api_repr_empty(self): self.assertIsNone(entry.shuffle_output_bytes) self.assertIsNone(entry.shuffle_output_bytes_spilled) self.assertEqual(entry.steps, []) + self.assertIsNone(entry.slot_ms) def test_from_api_repr_normal(self): from google.cloud.bigquery.job import QueryPlanEntryStep @@ -348,6 +350,7 @@ def test_from_api_repr_normal(self): "substeps": TestQueryPlanEntryStep.SUBSTEPS, } ], + "slotMs": self.SLOT_MS, } klass = self._get_target_class() @@ -366,6 +369,7 @@ def test_from_api_repr_normal(self): self.assertEqual(entry.records_written, self.RECORDS_WRITTEN) self.assertEqual(entry.status, self.STATUS) self.assertEqual(entry.steps, steps) + self.assertEqual(entry.slot_ms, self.SLOT_MS) def test_start(self): from google.cloud._helpers import _RFC3339_MICROS From 062bd8747a31149df0a4c6edaf94ec3a3f81f0d9 Mon Sep 17 00:00:00 2001 From: Kira Date: Tue, 27 Feb 2024 14:03:44 -0800 Subject: [PATCH 1734/2016] fix: keyword rendering and docstring improvements (#1829) * fix: keyword rendering and docstring improvements * fix error * small lint fix --------- Co-authored-by: Chalmer Lowe --- .../google/cloud/bigquery/client.py | 174 +++++++++--------- 1 file changed, 84 insertions(+), 90 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py index 4708e753bc6e..a871dc00396e 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py @@ -1716,20 +1716,24 @@ def delete_job_metadata( :func:`~google.cloud.bigquery.client.Client.cancel_job` instead. Args: - job_id: Job or job identifier. - - Keyword Arguments: - project: + job_id (Union[ \ + str, \ + LoadJob, \ + CopyJob, \ + ExtractJob, \ + QueryJob \ + ]): Job or job identifier. + project (Optional[str]): ID of the project which owns the job (defaults to the client's project). - location: + location (Optional[str]): Location where the job was run. Ignored if ``job_id`` is a job object. - retry: + retry (Optional[google.api_core.retry.Retry]): How to retry the RPC. - timeout: + timeout (Optional[float]): The number of seconds to wait for the underlying HTTP transport before using ``retry``. - not_found_ok: + not_found_ok (Optional[bool]): Defaults to ``False``. If ``True``, ignore "not found" errors when deleting the job. """ @@ -1970,12 +1974,10 @@ def create_job( timeout: TimeoutType = DEFAULT_TIMEOUT, ) -> Union[job.LoadJob, job.CopyJob, job.ExtractJob, job.QueryJob]: """Create a new job. + Args: job_config (dict): configuration job representation returned from the API. - - Keyword Arguments: - retry (Optional[google.api_core.retry.Retry]): - How to retry the RPC. + retry (Optional[google.api_core.retry.Retry]): How to retry the RPC. timeout (Optional[float]): The number of seconds to wait for the underlying HTTP transport before using ``retry``. @@ -2066,10 +2068,14 @@ def get_job( https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs/get Args: - job_id: + job_id (Union[ \ + str, \ + job.LoadJob, \ + job.CopyJob, \ + job.ExtractJob, \ + job.QueryJob \ + ]): Job identifier. - - Keyword Arguments: project (Optional[str]): ID of the project which owns the job (defaults to the client's project). location (Optional[str]): @@ -2137,8 +2143,6 @@ def cancel_job( google.cloud.bigquery.job.ExtractJob, \ google.cloud.bigquery.job.QueryJob \ ]): Job identifier. - - Keyword Arguments: project (Optional[str]): ID of the project which owns the job (defaults to the client's project). location (Optional[str]): @@ -2340,8 +2344,6 @@ def load_table_from_uri( in, this method attempts to create a table reference from a string using :func:`google.cloud.bigquery.table.TableReference.from_string`. - - Keyword Arguments: job_id (Optional[str]): Name of the job. job_id_prefix (Optional[str]): The user-provided prefix for a randomly generated job ID. @@ -2415,39 +2417,42 @@ def load_table_from_file( returns a :class:`~google.cloud.bigquery.job.LoadJob`. Args: - file_obj: + file_obj (IO[bytes]): A file handle opened in binary mode for reading. - destination: + destination (Union[Table, \ + TableReference, \ + TableListItem, \ + str \ + ]): Table into which data is to be loaded. If a string is passed in, this method attempts to create a table reference from a string using :func:`google.cloud.bigquery.table.TableReference.from_string`. - - Keyword Arguments: - rewind: + rewind (Optional[bool]): If True, seek to the beginning of the file handle before - reading the file. - size: + reading the file. Defaults to False. + size (Optional[int]): The number of bytes to read from the file handle. If size is ``None`` or large, resumable upload will be used. Otherwise, multipart upload will be used. - num_retries: Number of upload retries. Defaults to 6. - job_id: Name of the job. - job_id_prefix: + num_retries (Optional[int]): Number of upload retries. Defaults to 6. + job_id (Optional[str]): Name of the job. + job_id_prefix (Optional[str]): The user-provided prefix for a randomly generated job ID. This parameter will be ignored if a ``job_id`` is also given. - location: + location (Optional[str]): Location where to run the job. Must match the location of the destination table. - project: + project (Optional[str]): Project ID of the project of where to run the job. Defaults to the client's project. - job_config: + job_config (Optional[LoadJobConfig]): Extra configuration options for the job. - timeout: + timeout (Optional[float]): The number of seconds to wait for the underlying HTTP transport before using ``retry``. Depending on the retry strategy, a request may be repeated several times using the same timeout each time. + Defaults to None. Can also be passed as a tuple (connect_timeout, read_timeout). See :meth:`requests.Session.request` documentation for details. @@ -2535,9 +2540,13 @@ def load_table_from_dataframe( https://github.com/googleapis/python-bigquery/issues/19 Args: - dataframe: + dataframe (pandas.Dataframe): A :class:`~pandas.DataFrame` containing the data to load. - destination: + destination (Union[ \ + Table, \ + TableReference, \ + str \ + ]): The destination table to use for loading the data. If it is an existing table, the schema of the :class:`~pandas.DataFrame` must match the schema of the destination table. If the table @@ -2547,21 +2556,19 @@ def load_table_from_dataframe( If a string is passed in, this method attempts to create a table reference from a string using :func:`google.cloud.bigquery.table.TableReference.from_string`. - - Keyword Arguments: - num_retries: Number of upload retries. - job_id: Name of the job. - job_id_prefix: + num_retries (Optional[int]): Number of upload retries. Defaults to 6. + job_id (Optional[str]): Name of the job. + job_id_prefix (Optional[str]): The user-provided prefix for a randomly generated job ID. This parameter will be ignored if a ``job_id`` is also given. - location: + location (Optional[str]): Location where to run the job. Must match the location of the destination table. - project: + project (Optional[str]): Project ID of the project of where to run the job. Defaults to the client's project. - job_config: + job_config (Optional[LoadJobConfig]): Extra configuration options for the job. To override the default pandas data type conversions, supply @@ -2578,9 +2585,10 @@ def load_table_from_dataframe( :attr:`~google.cloud.bigquery.job.SourceFormat.CSV` and :attr:`~google.cloud.bigquery.job.SourceFormat.PARQUET` are supported. - parquet_compression: + parquet_compression (Optional[str]): [Beta] The compression method to use if intermittently serializing ``dataframe`` to a parquet file. + Defaults to "snappy". The argument is directly passed as the ``compression`` argument to the underlying ``pyarrow.parquet.write_table()`` @@ -2591,10 +2599,11 @@ def load_table_from_dataframe( passed as the ``compression`` argument to the underlying ``DataFrame.to_parquet()`` method. https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.to_parquet.html#pandas.DataFrame.to_parquet - timeout: + timeout (Optional[flaot]): The number of seconds to wait for the underlying HTTP transport before using ``retry``. Depending on the retry strategy, a request may be repeated several times using the same timeout each time. + Defaults to None. Can also be passed as a tuple (connect_timeout, read_timeout). See :meth:`requests.Session.request` documentation for details. @@ -2784,32 +2793,36 @@ def load_table_from_json( client = bigquery.Client() client.load_table_from_file(data_as_file, ...) - destination: + destination (Union[ \ + Table, \ + TableReference, \ + TableListItem, \ + str \ + ]): Table into which data is to be loaded. If a string is passed in, this method attempts to create a table reference from a string using :func:`google.cloud.bigquery.table.TableReference.from_string`. - - Keyword Arguments: - num_retries: Number of upload retries. - job_id: Name of the job. - job_id_prefix: + num_retries (Optional[int]): Number of upload retries. Defaults to 6. + job_id (Optional[str]): Name of the job. + job_id_prefix (Optional[str]): The user-provided prefix for a randomly generated job ID. This parameter will be ignored if a ``job_id`` is also given. - location: + location (Optional[str]): Location where to run the job. Must match the location of the destination table. - project: + project (Optional[str]): Project ID of the project of where to run the job. Defaults to the client's project. - job_config: + job_config (Optional[LoadJobConfig]): Extra configuration options for the job. The ``source_format`` setting is always set to :attr:`~google.cloud.bigquery.job.SourceFormat.NEWLINE_DELIMITED_JSON`. - timeout: + timeout (Optional[float]): The number of seconds to wait for the underlying HTTP transport before using ``retry``. Depending on the retry strategy, a request may be repeated several times using the same timeout each time. + Defaults to None. Can also be passed as a tuple (connect_timeout, read_timeout). See :meth:`requests.Session.request` documentation for details. @@ -2885,23 +2898,19 @@ def _do_resumable_upload( """Perform a resumable upload. Args: - stream: A bytes IO object open for reading. - - metadata: The metadata associated with the upload. - - num_retries: + stream (IO[bytes]): A bytes IO object open for reading. + metadata (Mapping[str, str]): The metadata associated with the upload. + num_retries (int): Number of upload retries. (Deprecated: This argument will be removed in a future release.) - - timeout: + timeout (Optional[float]): The number of seconds to wait for the underlying HTTP transport before using ``retry``. Depending on the retry strategy, a request may be repeated several times using the same timeout each time. Can also be passed as a tuple (connect_timeout, read_timeout). See :meth:`requests.Session.request` documentation for details. - - project: + project (Optional[str]): Project ID of the project of where to run the upload. Defaults to the client's project. @@ -2929,23 +2938,19 @@ def _initiate_resumable_upload( """Initiate a resumable upload. Args: - stream: A bytes IO object open for reading. - - metadata: The metadata associated with the upload. - - num_retries: + stream (IO[bytes]): A bytes IO object open for reading. + metadata (Mapping[str, str]): The metadata associated with the upload. + num_retries (int): Number of upload retries. (Deprecated: This argument will be removed in a future release.) - - timeout: + timeout (Optional[float]): The number of seconds to wait for the underlying HTTP transport before using ``retry``. Depending on the retry strategy, a request may be repeated several times using the same timeout each time. Can also be passed as a tuple (connect_timeout, read_timeout). See :meth:`requests.Session.request` documentation for details. - - project: + project (Optional[str]): Project ID of the project of where to run the upload. Defaults to the client's project. @@ -3005,28 +3010,23 @@ def _do_multipart_upload( """Perform a multipart upload. Args: - stream: A bytes IO object open for reading. - - metadata: The metadata associated with the upload. - - size: + stream (IO[bytes]): A bytes IO object open for reading. + metadata (Mapping[str, str]): The metadata associated with the upload. + size (int): The number of bytes to be uploaded (which will be read from ``stream``). If not provided, the upload will be concluded once ``stream`` is exhausted (or :data:`None`). - - num_retries: + num_retries (int): Number of upload retries. (Deprecated: This argument will be removed in a future release.) - - timeout: + timeout (Optional[float]): The number of seconds to wait for the underlying HTTP transport before using ``retry``. Depending on the retry strategy, a request may be repeated several times using the same timeout each time. Can also be passed as a tuple (connect_timeout, read_timeout). See :meth:`requests.Session.request` documentation for details. - - project: + project (Optional[str]): Project ID of the project of where to run the upload. Defaults to the client's project. @@ -3118,8 +3118,6 @@ def copy_table( str, \ ]): Table into which data is to be copied. - - Keyword Arguments: job_id (Optional[str]): The ID of the job. job_id_prefix (Optional[str]): The user-provided prefix for a randomly generated job ID. @@ -3216,8 +3214,6 @@ def extract_table( URIs of Cloud Storage file(s) into which table data is to be extracted; in format ``gs:///``. - - Keyword Arguments: job_id (Optional[str]): The ID of the job. job_id_prefix (Optional[str]): The user-provided prefix for a randomly generated job ID. @@ -3306,8 +3302,6 @@ def query( query (str): SQL query to be executed. Defaults to the standard SQL dialect. Use the ``job_config`` parameter to change dialects. - - Keyword Arguments: job_config (Optional[google.cloud.bigquery.job.QueryJobConfig]): Extra configuration options for the job. To override any options that were previously set in From 7095cc1520d29ea85b891e0d54ba9dbea9a695d6 Mon Sep 17 00:00:00 2001 From: Chalmer Lowe Date: Wed, 28 Feb 2024 16:42:45 -0500 Subject: [PATCH 1735/2016] docs(samples): updates to urllib3 constraint for Python 3.7 (#1834) * fix: updates to urllib3 constraint for Python 3.7 * refine urllib3 constraint * revises geo reqs and rolls back contraints * tweaking the geo sample requirements * more tweaks to geo reqs * more tweaks, testing arbitrary equality, 2.x version * tweak google-auth --- .../google-cloud-bigquery/samples/geography/requirements.txt | 5 +++-- packages/google-cloud-bigquery/testing/constraints-3.7.txt | 2 +- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/packages/google-cloud-bigquery/samples/geography/requirements.txt b/packages/google-cloud-bigquery/samples/geography/requirements.txt index d6cea7ec5799..3329113457c0 100644 --- a/packages/google-cloud-bigquery/samples/geography/requirements.txt +++ b/packages/google-cloud-bigquery/samples/geography/requirements.txt @@ -13,7 +13,7 @@ geopandas===0.10.2; python_version == '3.7' geopandas==0.13.2; python_version == '3.8' geopandas==0.14.1; python_version >= '3.9' google-api-core==2.11.1 -google-auth==2.22.0 +google-auth==2.25.2 google-cloud-bigquery==3.11.4 google-cloud-bigquery-storage==2.22.0 google-cloud-core==2.3.3 @@ -45,4 +45,5 @@ Shapely==2.0.2 six==1.16.0 typing-extensions==4.7.1 typing-inspect==0.9.0 -urllib3==1.26.18 +urllib3===1.26.18; python_version == '3.7' +urllib3==2.2.1; python_version >= '3.8' \ No newline at end of file diff --git a/packages/google-cloud-bigquery/testing/constraints-3.7.txt b/packages/google-cloud-bigquery/testing/constraints-3.7.txt index 2ea482e8bccd..9f71bf11ab6e 100644 --- a/packages/google-cloud-bigquery/testing/constraints-3.7.txt +++ b/packages/google-cloud-bigquery/testing/constraints-3.7.txt @@ -27,4 +27,4 @@ python-dateutil==2.7.3 requests==2.21.0 Shapely==1.8.4 six==1.13.0 -tqdm==4.7.4 +tqdm==4.7.4 \ No newline at end of file From f5f077eae5e0487daca76a639b0d4998159e9322 Mon Sep 17 00:00:00 2001 From: Mend Renovate Date: Thu, 29 Feb 2024 16:01:12 +0100 Subject: [PATCH 1736/2016] chore(deps): update all dependencies (#1640) * chore(deps): update all dependencies * revert geopandas * See https://cffi.readthedocs.io/en/stable/whatsnew.html#v1-16-0 * See https://pandas.pydata.org/docs/dev/whatsnew/v2.1.0.html#backwards-incompatible-api-changes * See https://matplotlib.org/stable/api/prev_api_changes/api_changes_3.8.0.html#increase-to-minimum-supported-versions-of-dependencies * Remove libcst which is not used * See https://github.com/python/typing_extensions/releases/tag/4.8.0 --------- Co-authored-by: Anthonios Partheniou Co-authored-by: Chalmer Lowe --- .../samples/desktopapp/requirements-test.txt | 4 +- .../samples/desktopapp/requirements.txt | 4 +- .../samples/geography/requirements-test.txt | 2 +- .../samples/geography/requirements.txt | 47 ++++++++++--------- .../samples/magics/requirements-test.txt | 4 +- .../samples/magics/requirements.txt | 11 +++-- .../samples/notebooks/requirements-test.txt | 4 +- .../samples/notebooks/requirements.txt | 14 +++--- .../samples/snippets/requirements-test.txt | 4 +- .../samples/snippets/requirements.txt | 2 +- 10 files changed, 50 insertions(+), 46 deletions(-) diff --git a/packages/google-cloud-bigquery/samples/desktopapp/requirements-test.txt b/packages/google-cloud-bigquery/samples/desktopapp/requirements-test.txt index 514f09705ae8..fc926cd7cb08 100644 --- a/packages/google-cloud-bigquery/samples/desktopapp/requirements-test.txt +++ b/packages/google-cloud-bigquery/samples/desktopapp/requirements-test.txt @@ -1,3 +1,3 @@ -google-cloud-testutils==1.3.3 -pytest==7.4.0 +google-cloud-testutils==1.4.0 +pytest==7.4.3 mock==5.1.0 diff --git a/packages/google-cloud-bigquery/samples/desktopapp/requirements.txt b/packages/google-cloud-bigquery/samples/desktopapp/requirements.txt index a5b3ad130227..8d82d4930e48 100644 --- a/packages/google-cloud-bigquery/samples/desktopapp/requirements.txt +++ b/packages/google-cloud-bigquery/samples/desktopapp/requirements.txt @@ -1,2 +1,2 @@ -google-cloud-bigquery==3.11.4 -google-auth-oauthlib==1.0.0 +google-cloud-bigquery==3.14.1 +google-auth-oauthlib==1.2.0 diff --git a/packages/google-cloud-bigquery/samples/geography/requirements-test.txt b/packages/google-cloud-bigquery/samples/geography/requirements-test.txt index 6585a560a7e7..7749d1f9499c 100644 --- a/packages/google-cloud-bigquery/samples/geography/requirements-test.txt +++ b/packages/google-cloud-bigquery/samples/geography/requirements-test.txt @@ -1,2 +1,2 @@ -pytest==7.4.0 +pytest==7.4.3 mock==5.1.0 diff --git a/packages/google-cloud-bigquery/samples/geography/requirements.txt b/packages/google-cloud-bigquery/samples/geography/requirements.txt index 3329113457c0..47e7cc56ee43 100644 --- a/packages/google-cloud-bigquery/samples/geography/requirements.txt +++ b/packages/google-cloud-bigquery/samples/geography/requirements.txt @@ -1,49 +1,50 @@ attrs==23.1.0 -certifi==2023.7.22 -cffi==1.15.1 -charset-normalizer==3.2.0 -click==8.1.6 +certifi==2023.11.17 +cffi===1.15.1; python_version == '3.7' +cffi==1.16.0; python_version >= '3.8' +charset-normalizer==3.3.2 +click==8.1.7 click-plugins==1.1.1 cligj==0.7.2 dataclasses==0.8; python_version < '3.7' -db-dtypes==1.1.1 +db-dtypes==1.2.0 Fiona==1.9.5 geojson==3.1.0 geopandas===0.10.2; python_version == '3.7' -geopandas==0.13.2; python_version == '3.8' +geopandas===0.13.2; python_version == '3.8' geopandas==0.14.1; python_version >= '3.9' -google-api-core==2.11.1 +google-api-core==2.15.0 google-auth==2.25.2 -google-cloud-bigquery==3.11.4 -google-cloud-bigquery-storage==2.22.0 -google-cloud-core==2.3.3 +google-cloud-bigquery==3.14.1 +google-cloud-bigquery-storage==2.24.0 +google-cloud-core==2.4.1 google-crc32c==1.5.0 -google-resumable-media==2.5.0 -googleapis-common-protos==1.60.0 -grpcio==1.59.0 -idna==3.4 -libcst==1.0.1; python_version == '3.7' -libcst==1.1.0; python_version >= '3.8' +google-resumable-media==2.7.0 +googleapis-common-protos==1.62.0 +grpcio==1.60.0 +idna==3.6 munch==4.0.0 mypy-extensions==1.0.0 -packaging==23.1 +packaging==23.2 pandas===1.3.5; python_version == '3.7' -pandas==2.0.3; python_version >= '3.8' -proto-plus==1.22.3 +pandas===2.0.3; python_version == '3.8' +pandas==2.1.0; python_version >= '3.9' +proto-plus==1.23.0 pyarrow==12.0.1; python_version == '3.7' pyarrow==14.0.1; python_version >= '3.8' -pyasn1==0.5.0 +pyasn1==0.5.1 pyasn1-modules==0.3.0 pycparser==2.21 pyparsing==3.1.1 python-dateutil==2.8.2 -pytz==2023.3 +pytz==2023.3.post1 PyYAML==6.0.1 requests==2.31.0 rsa==4.9 Shapely==2.0.2 six==1.16.0 -typing-extensions==4.7.1 +typing-extensions===4.7.1; python_version == '3.7' +typing-extensions==4.9.0; python_version >= '3.8' typing-inspect==0.9.0 urllib3===1.26.18; python_version == '3.7' -urllib3==2.2.1; python_version >= '3.8' \ No newline at end of file +urllib3==2.2.1; python_version >= '3.8' diff --git a/packages/google-cloud-bigquery/samples/magics/requirements-test.txt b/packages/google-cloud-bigquery/samples/magics/requirements-test.txt index 514f09705ae8..fc926cd7cb08 100644 --- a/packages/google-cloud-bigquery/samples/magics/requirements-test.txt +++ b/packages/google-cloud-bigquery/samples/magics/requirements-test.txt @@ -1,3 +1,3 @@ -google-cloud-testutils==1.3.3 -pytest==7.4.0 +google-cloud-testutils==1.4.0 +pytest==7.4.3 mock==5.1.0 diff --git a/packages/google-cloud-bigquery/samples/magics/requirements.txt b/packages/google-cloud-bigquery/samples/magics/requirements.txt index c8f6b2765393..869d3b4d5580 100644 --- a/packages/google-cloud-bigquery/samples/magics/requirements.txt +++ b/packages/google-cloud-bigquery/samples/magics/requirements.txt @@ -1,8 +1,9 @@ -db-dtypes==1.1.1 -google.cloud.bigquery==3.11.4 -google-cloud-bigquery-storage==2.22.0 +db-dtypes==1.2.0 +google.cloud.bigquery==3.14.1 +google-cloud-bigquery-storage==2.24.0 ipython===7.31.1; python_version == '3.7' ipython===8.0.1; python_version == '3.8' -ipython==8.14.0; python_version >= '3.9' +ipython==8.18.1; python_version >= '3.9' pandas===1.3.5; python_version == '3.7' -pandas==2.0.3; python_version >= '3.8' +pandas===2.0.3; python_version == '3.8' +pandas==2.1.0; python_version >= '3.9' diff --git a/packages/google-cloud-bigquery/samples/notebooks/requirements-test.txt b/packages/google-cloud-bigquery/samples/notebooks/requirements-test.txt index 514f09705ae8..fc926cd7cb08 100644 --- a/packages/google-cloud-bigquery/samples/notebooks/requirements-test.txt +++ b/packages/google-cloud-bigquery/samples/notebooks/requirements-test.txt @@ -1,3 +1,3 @@ -google-cloud-testutils==1.3.3 -pytest==7.4.0 +google-cloud-testutils==1.4.0 +pytest==7.4.3 mock==5.1.0 diff --git a/packages/google-cloud-bigquery/samples/notebooks/requirements.txt b/packages/google-cloud-bigquery/samples/notebooks/requirements.txt index 22c46297f260..e8839e1fed9e 100644 --- a/packages/google-cloud-bigquery/samples/notebooks/requirements.txt +++ b/packages/google-cloud-bigquery/samples/notebooks/requirements.txt @@ -1,10 +1,12 @@ -db-dtypes==1.1.1 -google-cloud-bigquery==3.11.4 -google-cloud-bigquery-storage==2.22.0 +db-dtypes==1.2.0 +google-cloud-bigquery==3.14.1 +google-cloud-bigquery-storage==2.24.0 ipython===7.31.1; python_version == '3.7' ipython===8.0.1; python_version == '3.8' -ipython==8.14.0; python_version >= '3.9' +ipython==8.18.1; python_version >= '3.9' matplotlib===3.5.3; python_version == '3.7' -matplotlib==3.7.2; python_version >= '3.8' +matplotlib===3.7.4; python_version == '3.8' +matplotlib==3.8.2; python_version >= '3.9' pandas===1.3.5; python_version == '3.7' -pandas==2.0.3; python_version >= '3.8' +pandas===2.0.3; python_version == '3.8' +pandas==2.1.0; python_version >= '3.9' diff --git a/packages/google-cloud-bigquery/samples/snippets/requirements-test.txt b/packages/google-cloud-bigquery/samples/snippets/requirements-test.txt index 514f09705ae8..fc926cd7cb08 100644 --- a/packages/google-cloud-bigquery/samples/snippets/requirements-test.txt +++ b/packages/google-cloud-bigquery/samples/snippets/requirements-test.txt @@ -1,3 +1,3 @@ -google-cloud-testutils==1.3.3 -pytest==7.4.0 +google-cloud-testutils==1.4.0 +pytest==7.4.3 mock==5.1.0 diff --git a/packages/google-cloud-bigquery/samples/snippets/requirements.txt b/packages/google-cloud-bigquery/samples/snippets/requirements.txt index f49c7494fd81..365d584c7bca 100644 --- a/packages/google-cloud-bigquery/samples/snippets/requirements.txt +++ b/packages/google-cloud-bigquery/samples/snippets/requirements.txt @@ -1 +1 @@ -google-cloud-bigquery==3.11.4 \ No newline at end of file +google-cloud-bigquery==3.14.1 \ No newline at end of file From 5842ad3d792196d97d3523fadd51aefdff61cb64 Mon Sep 17 00:00:00 2001 From: "release-please[bot]" <55107282+release-please[bot]@users.noreply.github.com> Date: Mon, 4 Mar 2024 11:03:09 -0600 Subject: [PATCH 1737/2016] chore(main): release 3.18.0 (#1817) Co-authored-by: release-please[bot] <55107282+release-please[bot]@users.noreply.github.com> --- packages/google-cloud-bigquery/CHANGELOG.md | 19 +++++++++++++++++++ .../google/cloud/bigquery/version.py | 2 +- 2 files changed, 20 insertions(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/CHANGELOG.md b/packages/google-cloud-bigquery/CHANGELOG.md index 7bedb5cf6c7f..350787512595 100644 --- a/packages/google-cloud-bigquery/CHANGELOG.md +++ b/packages/google-cloud-bigquery/CHANGELOG.md @@ -5,6 +5,25 @@ [1]: https://pypi.org/project/google-cloud-bigquery/#history +## [3.18.0](https://github.com/googleapis/python-bigquery/compare/v3.17.2...v3.18.0) (2024-02-29) + + +### Features + +* Support nullable boolean and Int64 dtypes in `insert_rows_from_dataframe` ([#1816](https://github.com/googleapis/python-bigquery/issues/1816)) ([ab0cf4c](https://github.com/googleapis/python-bigquery/commit/ab0cf4cc03292f62b56a8813cfb7681daa87f872)) +* Support slot_ms in QueryPlanEntry ([#1831](https://github.com/googleapis/python-bigquery/issues/1831)) ([d62cabb](https://github.com/googleapis/python-bigquery/commit/d62cabbf115637ecbaf8cc378f39329a5ae74c26)) + + +### Bug Fixes + +* Keyword rendering and docstring improvements ([#1829](https://github.com/googleapis/python-bigquery/issues/1829)) ([4dfb920](https://github.com/googleapis/python-bigquery/commit/4dfb920b106784e98f343b3e3fc8e8ff70c50560)) + + +### Documentation + +* **samples:** Updates to urllib3 constraint for Python 3.7 ([#1834](https://github.com/googleapis/python-bigquery/issues/1834)) ([b099c32](https://github.com/googleapis/python-bigquery/commit/b099c32a83946a347560f6a71d08c3f263e56cb6)) +* Update `client_query_w_named_params.py` to use `query_and_wait` API ([#1782](https://github.com/googleapis/python-bigquery/issues/1782)) ([89dfcb6](https://github.com/googleapis/python-bigquery/commit/89dfcb6469d22e78003a70371a0938a6856e033c)) + ## [3.17.2](https://github.com/googleapis/python-bigquery/compare/v3.17.1...v3.17.2) (2024-01-30) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/version.py b/packages/google-cloud-bigquery/google/cloud/bigquery/version.py index 771b77a38766..89024cc08a0c 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/version.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/version.py @@ -12,4 +12,4 @@ # See the License for the specific language governing permissions and # limitations under the License. -__version__ = "3.17.2" +__version__ = "3.18.0" From 0350eecc7db01b20e3606224d676ccd8875b7d17 Mon Sep 17 00:00:00 2001 From: Lingqing Gan Date: Mon, 4 Mar 2024 11:06:07 -0800 Subject: [PATCH 1738/2016] feat: support RANGE query parameters (#1827) * feat: RANGE query parameters and unit tests * unit test * unit test coverage * lint * lint * lint * system test * fix system test * ajust init items order * fix typos and improve docstrings --- .../benchmark/benchmark.py | 2 +- .../google/cloud/bigquery/__init__.py | 4 + .../google/cloud/bigquery/query.py | 297 ++++++++++ .../tests/system/test_query.py | 33 ++ .../tests/unit/test_query.py | 548 ++++++++++++++++++ 5 files changed, 883 insertions(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/benchmark/benchmark.py b/packages/google-cloud-bigquery/benchmark/benchmark.py index 30e294baa9a9..d7dc78678e1a 100644 --- a/packages/google-cloud-bigquery/benchmark/benchmark.py +++ b/packages/google-cloud-bigquery/benchmark/benchmark.py @@ -231,7 +231,7 @@ def _is_datetime_min(time_str: str) -> bool: def _summary(run: dict) -> str: - """Coverts run dict to run summary string.""" + """Converts run dict to run summary string.""" no_val = "NODATA" output = ["QUERYTIME "] diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/__init__.py b/packages/google-cloud-bigquery/google/cloud/bigquery/__init__.py index 1ea056eb8072..caf81d9aa467 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/__init__.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/__init__.py @@ -83,6 +83,8 @@ from google.cloud.bigquery.query import ConnectionProperty from google.cloud.bigquery.query import ScalarQueryParameter from google.cloud.bigquery.query import ScalarQueryParameterType +from google.cloud.bigquery.query import RangeQueryParameter +from google.cloud.bigquery.query import RangeQueryParameterType from google.cloud.bigquery.query import SqlParameterScalarTypes from google.cloud.bigquery.query import StructQueryParameter from google.cloud.bigquery.query import StructQueryParameterType @@ -122,10 +124,12 @@ "ArrayQueryParameter", "ScalarQueryParameter", "StructQueryParameter", + "RangeQueryParameter", "ArrayQueryParameterType", "ScalarQueryParameterType", "SqlParameterScalarTypes", "StructQueryParameterType", + "RangeQueryParameterType", # Datasets "Dataset", "DatasetReference", diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/query.py b/packages/google-cloud-bigquery/google/cloud/bigquery/query.py index a06ece503c02..9c9402b7413c 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/query.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/query.py @@ -30,6 +30,8 @@ Union[str, int, float, decimal.Decimal, bool, datetime.datetime, datetime.date] ] +_RANGE_ELEMENT_TYPE_STR = {"TIMESTAMP", "DATETIME", "DATE"} + class ConnectionProperty: """A connection-level property to customize query behavior. @@ -362,6 +364,129 @@ def __repr__(self): return f"{self.__class__.__name__}({items}{name}{description})" +class RangeQueryParameterType(_AbstractQueryParameterType): + """Type representation for range query parameters. + + Args: + type_ (Union[ScalarQueryParameterType, str]): + Type of range element, must be one of 'TIMESTAMP', 'DATETIME', or + 'DATE'. + name (Optional[str]): + The name of the query parameter. Primarily used if the type is + one of the subfields in ``StructQueryParameterType`` instance. + description (Optional[str]): + The query parameter description. Primarily used if the type is + one of the subfields in ``StructQueryParameterType`` instance. + """ + + @classmethod + def _parse_range_element_type(self, type_): + """Helper method that parses the input range element type, which may + be a string, or a ScalarQueryParameterType object. + + Returns: + google.cloud.bigquery.query.ScalarQueryParameterType: Instance + """ + if isinstance(type_, str): + if type_ not in _RANGE_ELEMENT_TYPE_STR: + raise ValueError( + "If given as a string, range element type must be one of " + "'TIMESTAMP', 'DATE', or 'DATETIME'." + ) + return ScalarQueryParameterType(type_) + elif isinstance(type_, ScalarQueryParameterType): + if type_._type not in _RANGE_ELEMENT_TYPE_STR: + raise ValueError( + "If given as a ScalarQueryParameter object, range element " + "type must be one of 'TIMESTAMP', 'DATE', or 'DATETIME' " + "type." + ) + return type_ + else: + raise ValueError( + "range_type must be a string or ScalarQueryParameter object, " + "of 'TIMESTAMP', 'DATE', or 'DATETIME' type." + ) + + def __init__(self, type_, *, name=None, description=None): + self.type_ = self._parse_range_element_type(type_) + self.name = name + self.description = description + + @classmethod + def from_api_repr(cls, resource): + """Factory: construct parameter type from JSON resource. + + Args: + resource (Dict): JSON mapping of parameter + + Returns: + google.cloud.bigquery.query.RangeQueryParameterType: Instance + """ + type_ = resource["rangeElementType"]["type"] + name = resource.get("name") + description = resource.get("description") + + return cls(type_, name=name, description=description) + + def to_api_repr(self): + """Construct JSON API representation for the parameter type. + + Returns: + Dict: JSON mapping + """ + # Name and description are only used if the type is a field inside a struct + # type, but it's StructQueryParameterType's responsibilty to use these two + # attributes in the API representation when needed. Here we omit them. + return { + "type": "RANGE", + "rangeElementType": self.type_.to_api_repr(), + } + + def with_name(self, new_name: Union[str, None]): + """Return a copy of the instance with ``name`` set to ``new_name``. + + Args: + name (Union[str, None]): + The new name of the range query parameter type. If ``None``, + the existing name is cleared. + + Returns: + google.cloud.bigquery.query.RangeQueryParameterType: + A new instance with updated name. + """ + return type(self)(self.type_, name=new_name, description=self.description) + + def __repr__(self): + name = f", name={self.name!r}" if self.name is not None else "" + description = ( + f", description={self.description!r}" + if self.description is not None + else "" + ) + return f"{self.__class__.__name__}({self.type_!r}{name}{description})" + + def _key(self): + """A tuple key that uniquely describes this field. + + Used to compute this instance's hashcode and evaluate equality. + + Returns: + Tuple: The contents of this + :class:`~google.cloud.bigquery.query.RangeQueryParameterType`. + """ + type_ = self.type_.to_api_repr() + return (self.name, type_, self.description) + + def __eq__(self, other): + if not isinstance(other, RangeQueryParameterType): + return NotImplemented + return self._key() == other._key() + + def __ne__(self, other): + return not self == other + + class _AbstractQueryParameter(object): """Base class for named / positional query parameters.""" @@ -811,6 +936,178 @@ def __repr__(self): return "StructQueryParameter{}".format(self._key()) +class RangeQueryParameter(_AbstractQueryParameter): + """Named / positional query parameters for range values. + + Args: + range_element_type (Union[str, RangeQueryParameterType]): + The type of range elements. It must be one of 'TIMESTAMP', + 'DATE', or 'DATETIME'. + + start (Optional[Union[ScalarQueryParameter, str]]): + The start of the range value. Must be the same type as + range_element_type. If not provided, it's interpreted as UNBOUNDED. + + end (Optional[Union[ScalarQueryParameter, str]]): + The end of the range value. Must be the same type as + range_element_type. If not provided, it's interpreted as UNBOUNDED. + + name (Optional[str]): + Parameter name, used via ``@foo`` syntax. If None, the + parameter can only be addressed via position (``?``). + """ + + @classmethod + def _parse_range_element_type(self, range_element_type): + if isinstance(range_element_type, str): + if range_element_type not in _RANGE_ELEMENT_TYPE_STR: + raise ValueError( + "If given as a string, range_element_type must be one of " + f"'TIMESTAMP', 'DATE', or 'DATETIME'. Got {range_element_type}." + ) + return RangeQueryParameterType(range_element_type) + elif isinstance(range_element_type, RangeQueryParameterType): + if range_element_type.type_._type not in _RANGE_ELEMENT_TYPE_STR: + raise ValueError( + "If given as a RangeQueryParameterType object, " + "range_element_type must be one of 'TIMESTAMP', 'DATE', " + "or 'DATETIME' type." + ) + return range_element_type + else: + raise ValueError( + "range_element_type must be a string or " + "RangeQueryParameterType object, of 'TIMESTAMP', 'DATE', " + "or 'DATETIME' type. Got " + f"{type(range_element_type)}:{range_element_type}" + ) + + @classmethod + def _serialize_range_element_value(self, value, type_): + if value is None or isinstance(value, str): + return value + else: + converter = _SCALAR_VALUE_TO_JSON_PARAM.get(type_) + if converter is not None: + return converter(value) # type: ignore + else: + raise ValueError( + f"Cannot convert range element value from type {type_}, " + "must be one of the strings 'TIMESTAMP', 'DATE' " + "'DATETIME' or a RangeQueryParameterType object." + ) + + def __init__( + self, + range_element_type, + start=None, + end=None, + name=None, + ): + self.name = name + self.range_element_type = self._parse_range_element_type(range_element_type) + print(self.range_element_type.type_._type) + self.start = start + self.end = end + + @classmethod + def positional( + cls, range_element_type, start=None, end=None + ) -> "RangeQueryParameter": + """Factory for positional parameters. + + Args: + range_element_type (Union[str, RangeQueryParameterType]): + The type of range elements. It must be one of `'TIMESTAMP'`, + `'DATE'`, or `'DATETIME'`. + + start (Optional[Union[ScalarQueryParameter, str]]): + The start of the range value. Must be the same type as + range_element_type. If not provided, it's interpreted as + UNBOUNDED. + + end (Optional[Union[ScalarQueryParameter, str]]): + The end of the range value. Must be the same type as + range_element_type. If not provided, it's interpreted as + UNBOUNDED. + + Returns: + google.cloud.bigquery.query.RangeQueryParameter: Instance without + name. + """ + return cls(range_element_type, start, end) + + @classmethod + def from_api_repr(cls, resource: dict) -> "RangeQueryParameter": + """Factory: construct parameter from JSON resource. + + Args: + resource (Dict): JSON mapping of parameter + + Returns: + google.cloud.bigquery.query.RangeQueryParameter: Instance + """ + name = resource.get("name") + range_element_type = ( + resource.get("parameterType", {}).get("rangeElementType", {}).get("type") + ) + range_value = resource.get("parameterValue", {}).get("rangeValue", {}) + start = range_value.get("start", {}).get("value") + end = range_value.get("end", {}).get("value") + + return cls(range_element_type, start=start, end=end, name=name) + + def to_api_repr(self) -> dict: + """Construct JSON API representation for the parameter. + + Returns: + Dict: JSON mapping + """ + range_element_type = self.range_element_type.to_api_repr() + type_ = self.range_element_type.type_._type + start = self._serialize_range_element_value(self.start, type_) + end = self._serialize_range_element_value(self.end, type_) + resource = { + "parameterType": range_element_type, + "parameterValue": { + "rangeValue": { + "start": {"value": start}, + "end": {"value": end}, + }, + }, + } + + # distinguish between name not provided vs. name being empty string + if self.name is not None: + resource["name"] = self.name + + return resource + + def _key(self): + """A tuple key that uniquely describes this field. + + Used to compute this instance's hashcode and evaluate equality. + + Returns: + Tuple: The contents of this + :class:`~google.cloud.bigquery.query.RangeQueryParameter`. + """ + + range_element_type = self.range_element_type.to_api_repr() + return (self.name, range_element_type, self.start, self.end) + + def __eq__(self, other): + if not isinstance(other, RangeQueryParameter): + return NotImplemented + return self._key() == other._key() + + def __ne__(self, other): + return not self == other + + def __repr__(self): + return "RangeQueryParameter{}".format(self._key()) + + class SqlParameterScalarTypes: """Supported scalar SQL query parameter types as type objects.""" diff --git a/packages/google-cloud-bigquery/tests/system/test_query.py b/packages/google-cloud-bigquery/tests/system/test_query.py index 82be40693ffe..0494272d9f9f 100644 --- a/packages/google-cloud-bigquery/tests/system/test_query.py +++ b/packages/google-cloud-bigquery/tests/system/test_query.py @@ -26,6 +26,7 @@ from google.cloud.bigquery.query import ScalarQueryParameterType from google.cloud.bigquery.query import StructQueryParameter from google.cloud.bigquery.query import StructQueryParameterType +from google.cloud.bigquery.query import RangeQueryParameter @pytest.fixture(params=["INSERT", "QUERY"]) @@ -422,6 +423,38 @@ def test_query_statistics(bigquery_client, query_api_method): ) ], ), + ( + "SELECT @range_date", + "[2016-12-05, UNBOUNDED)", + [ + RangeQueryParameter( + name="range_date", + range_element_type="DATE", + start=datetime.date(2016, 12, 5), + ) + ], + ), + ( + "SELECT @range_datetime", + "[2016-12-05T00:00:00, UNBOUNDED)", + [ + RangeQueryParameter( + name="range_datetime", + range_element_type="DATETIME", + start=datetime.datetime(2016, 12, 5), + ) + ], + ), + ( + "SELECT @range_unbounded", + "[UNBOUNDED, UNBOUNDED)", + [ + RangeQueryParameter( + name="range_unbounded", + range_element_type="DATETIME", + ) + ], + ), ), ) def test_query_parameters( diff --git a/packages/google-cloud-bigquery/tests/unit/test_query.py b/packages/google-cloud-bigquery/tests/unit/test_query.py index 1704abac70ee..f511bf28d985 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_query.py +++ b/packages/google-cloud-bigquery/tests/unit/test_query.py @@ -376,6 +376,100 @@ def test_repr_all_optional_attrs(self): self.assertEqual(repr(param_type), expected) +class Test_RangeQueryParameterType(unittest.TestCase): + @staticmethod + def _get_target_class(): + from google.cloud.bigquery.query import RangeQueryParameterType + + return RangeQueryParameterType + + def _make_one(self, *args, **kw): + return self._get_target_class()(*args, **kw) + + def test_ctor_str(self): + param_type = self._make_one("DATE", name="foo", description="bar") + self.assertEqual(param_type.type_._type, "DATE") + self.assertEqual(param_type.name, "foo") + self.assertEqual(param_type.description, "bar") + + def test_ctor_type(self): + from google.cloud.bigquery import ScalarQueryParameterType + + scalar_type = ScalarQueryParameterType("DATE") + param_type = self._make_one(scalar_type, name="foo", description="bar") + self.assertEqual(param_type.type_._type, "DATE") + self.assertEqual(param_type.name, "foo") + self.assertEqual(param_type.description, "bar") + + def test_ctor_unsupported_type_str(self): + with self.assertRaises(ValueError): + self._make_one("TIME") + + def test_ctor_unsupported_type_type(self): + from google.cloud.bigquery import ScalarQueryParameterType + + scalar_type = ScalarQueryParameterType("TIME") + with self.assertRaises(ValueError): + self._make_one(scalar_type) + + def test_ctor_wrong_type(self): + with self.assertRaises(ValueError): + self._make_one(None) + + def test_from_api_repr(self): + RESOURCE = { + "type": "RANGE", + "rangeElementType": {"type": "DATE"}, + } + + klass = self._get_target_class() + result = klass.from_api_repr(RESOURCE) + self.assertEqual(result.type_._type, "DATE") + self.assertIsNone(result.name) + self.assertIsNone(result.description) + + def test_to_api_repr(self): + EXPECTED = { + "type": "RANGE", + "rangeElementType": {"type": "DATE"}, + } + param_type = self._make_one("DATE", name="foo", description="bar") + result = param_type.to_api_repr() + self.assertEqual(result, EXPECTED) + + def test__repr__(self): + param_type = self._make_one("DATE", name="foo", description="bar") + param_repr = "RangeQueryParameterType(ScalarQueryParameterType('DATE'), name='foo', description='bar')" + self.assertEqual(repr(param_type), param_repr) + + def test__eq__(self): + param_type1 = self._make_one("DATE", name="foo", description="bar") + self.assertEqual(param_type1, param_type1) + self.assertNotEqual(param_type1, object()) + + alias = self._make_one("DATE", name="foo", description="bar") + self.assertIsNot(param_type1, alias) + self.assertEqual(param_type1, alias) + + wrong_type = self._make_one("DATETIME", name="foo", description="bar") + self.assertNotEqual(param_type1, wrong_type) + + wrong_name = self._make_one("DATETIME", name="foo2", description="bar") + self.assertNotEqual(param_type1, wrong_name) + + wrong_description = self._make_one("DATETIME", name="foo", description="bar2") + self.assertNotEqual(param_type1, wrong_description) + + def test_with_name(self): + param_type1 = self._make_one("DATE", name="foo", description="bar") + param_type2 = param_type1.with_name("foo2") + + self.assertIsNot(param_type1, param_type2) + self.assertEqual(param_type2.type_._type, "DATE") + self.assertEqual(param_type2.name, "foo2") + self.assertEqual(param_type2.description, "bar") + + class Test__AbstractQueryParameter(unittest.TestCase): @staticmethod def _get_target_class(): @@ -663,6 +757,460 @@ def test___repr__(self): self.assertEqual(repr(field1), expected) +class Test_RangeQueryParameter(unittest.TestCase): + @staticmethod + def _get_target_class(): + from google.cloud.bigquery.query import RangeQueryParameter + + return RangeQueryParameter + + def _make_one(self, *args, **kw): + return self._get_target_class()(*args, **kw) + + def test_ctor(self): + from google.cloud.bigquery.query import RangeQueryParameterType + + range_element_type = RangeQueryParameterType(type_="DATE") + param = self._make_one( + range_element_type="DATE", start="2016-08-11", name="foo" + ) + self.assertEqual(param.name, "foo") + self.assertEqual(param.range_element_type, range_element_type) + self.assertEqual(param.start, "2016-08-11") + self.assertIs(param.end, None) + + def test_ctor_w_datetime_query_parameter_type_str(self): + from google.cloud.bigquery.query import RangeQueryParameterType + + range_element_type = RangeQueryParameterType(type_="DATETIME") + start_datetime = datetime.datetime(year=2020, month=12, day=31, hour=12) + end_datetime = datetime.datetime(year=2021, month=12, day=31, hour=12) + param = self._make_one( + range_element_type="DATETIME", + start=start_datetime, + end=end_datetime, + name="foo", + ) + self.assertEqual(param.range_element_type, range_element_type) + self.assertEqual(param.start, start_datetime) + self.assertEqual(param.end, end_datetime) + self.assertEqual(param.name, "foo") + + def test_ctor_w_datetime_query_parameter_type_type(self): + from google.cloud.bigquery.query import RangeQueryParameterType + + range_element_type = RangeQueryParameterType(type_="DATETIME") + param = self._make_one(range_element_type=range_element_type) + self.assertEqual(param.range_element_type, range_element_type) + self.assertEqual(param.start, None) + self.assertEqual(param.end, None) + self.assertEqual(param.name, None) + + def test_ctor_w_timestamp_query_parameter_typ_str(self): + from google.cloud.bigquery.query import RangeQueryParameterType + + range_element_type = RangeQueryParameterType(type_="TIMESTAMP") + start_datetime = datetime.datetime(year=2020, month=12, day=31, hour=12) + end_datetime = datetime.datetime(year=2021, month=12, day=31, hour=12) + param = self._make_one( + range_element_type="TIMESTAMP", + start=start_datetime, + end=end_datetime, + name="foo", + ) + self.assertEqual(param.range_element_type, range_element_type) + self.assertEqual(param.start, start_datetime) + self.assertEqual(param.end, end_datetime) + self.assertEqual(param.name, "foo") + + def test_ctor_w_timestamp_query_parameter_type_type(self): + from google.cloud.bigquery.query import RangeQueryParameterType + + range_element_type = RangeQueryParameterType(type_="TIMESTAMP") + param = self._make_one(range_element_type=range_element_type) + self.assertEqual(param.range_element_type, range_element_type) + self.assertEqual(param.start, None) + self.assertEqual(param.end, None) + self.assertEqual(param.name, None) + + def test_ctor_w_date_query_parameter_type_str(self): + from google.cloud.bigquery.query import RangeQueryParameterType + + range_element_type = RangeQueryParameterType(type_="DATE") + start_date = datetime.date(year=2020, month=12, day=31) + end_date = datetime.date(year=2021, month=12, day=31) + param = self._make_one( + range_element_type="DATE", + start=start_date, + end=end_date, + name="foo", + ) + self.assertEqual(param.range_element_type, range_element_type) + self.assertEqual(param.start, start_date) + self.assertEqual(param.end, end_date) + self.assertEqual(param.name, "foo") + + def test_ctor_w_date_query_parameter_type_type(self): + from google.cloud.bigquery.query import RangeQueryParameterType + + range_element_type = RangeQueryParameterType(type_="DATE") + param = self._make_one(range_element_type=range_element_type) + self.assertEqual(param.range_element_type, range_element_type) + self.assertEqual(param.start, None) + self.assertEqual(param.end, None) + self.assertEqual(param.name, None) + + def test_ctor_w_name_empty_str(self): + from google.cloud.bigquery.query import RangeQueryParameterType + + range_element_type = RangeQueryParameterType(type_="DATE") + param = self._make_one( + range_element_type="DATE", + name="", + ) + self.assertEqual(param.range_element_type, range_element_type) + self.assertIs(param.start, None) + self.assertIs(param.end, None) + self.assertEqual(param.name, "") + + def test_ctor_wo_value(self): + from google.cloud.bigquery.query import RangeQueryParameterType + + range_element_type = RangeQueryParameterType(type_="DATETIME") + param = self._make_one(range_element_type="DATETIME", name="foo") + self.assertEqual(param.range_element_type, range_element_type) + self.assertIs(param.start, None) + self.assertIs(param.end, None) + self.assertEqual(param.name, "foo") + + def test_ctor_w_unsupported_query_parameter_type_str(self): + with self.assertRaises(ValueError): + self._make_one(range_element_type="TIME", name="foo") + + def test_ctor_w_unsupported_query_parameter_type_type(self): + from google.cloud.bigquery.query import RangeQueryParameterType + + range_element_type = RangeQueryParameterType(type_="DATE") + range_element_type.type_._type = "TIME" + with self.assertRaises(ValueError): + self._make_one(range_element_type=range_element_type, name="foo") + + def test_ctor_w_unsupported_query_parameter_type_input(self): + with self.assertRaises(ValueError): + self._make_one(range_element_type=None, name="foo") + + def test_positional(self): + from google.cloud.bigquery.query import RangeQueryParameterType + + range_element_type = RangeQueryParameterType(type_="DATE") + klass = self._get_target_class() + param = klass.positional( + range_element_type="DATE", start="2016-08-11", end="2016-08-12" + ) + self.assertIs(param.name, None) + self.assertEqual(param.range_element_type, range_element_type) + self.assertEqual(param.start, "2016-08-11") + self.assertEqual(param.end, "2016-08-12") + + def test_from_api_repr_w_name(self): + from google.cloud.bigquery.query import RangeQueryParameterType + + RESOURCE = { + "name": "foo", + "parameterType": { + "type": "RANGE", + "rangeElementType": { + "type": "DATE", + }, + }, + "parameterValue": { + "rangeValue": {"start": {"value": None}, "end": {"value": "2020-12-31"}} + }, + } + klass = self._get_target_class() + param = klass.from_api_repr(RESOURCE) + range_element_type = RangeQueryParameterType(type_="DATE") + self.assertEqual(param.name, "foo") + self.assertEqual(param.range_element_type, range_element_type) + self.assertEqual(param.start, None) + self.assertEqual(param.end, "2020-12-31") + + def test_from_api_repr_wo_name(self): + from google.cloud.bigquery.query import RangeQueryParameterType + + RESOURCE = { + "parameterType": { + "type": "RANGE", + "rangeElementType": { + "type": "DATE", + }, + }, + "parameterValue": { + "rangeValue": {"start": {"value": None}, "end": {"value": "2020-12-31"}} + }, + } + klass = self._get_target_class() + param = klass.from_api_repr(RESOURCE) + range_element_type = RangeQueryParameterType(type_="DATE") + self.assertEqual(param.name, None) + self.assertEqual(param.range_element_type, range_element_type) + self.assertEqual(param.start, None) + self.assertEqual(param.end, "2020-12-31") + + def test_from_api_repr_wo_value(self): + # Back-end may not send back values for None params. See #9027 + from google.cloud.bigquery.query import RangeQueryParameterType + + RESOURCE = { + "parameterType": { + "type": "RANGE", + "rangeElementType": { + "type": "DATE", + }, + }, + } + range_element_type = RangeQueryParameterType(type_="DATE") + klass = self._get_target_class() + param = klass.from_api_repr(RESOURCE) + self.assertIs(param.name, None) + self.assertEqual(param.range_element_type, range_element_type) + self.assertIs(param.start, None) + self.assertIs(param.end, None) + + def test_to_api_repr_w_name(self): + EXPECTED = { + "name": "foo", + "parameterType": { + "type": "RANGE", + "rangeElementType": { + "type": "DATE", + }, + }, + "parameterValue": { + "rangeValue": {"start": {"value": None}, "end": {"value": "2016-08-11"}} + }, + } + param = self._make_one(range_element_type="DATE", end="2016-08-11", name="foo") + self.assertEqual(param.to_api_repr(), EXPECTED) + + def test_to_api_repr_wo_name(self): + EXPECTED = { + "parameterType": { + "type": "RANGE", + "rangeElementType": { + "type": "DATE", + }, + }, + "parameterValue": { + "rangeValue": {"start": {"value": None}, "end": {"value": "2016-08-11"}} + }, + } + klass = self._get_target_class() + param = klass.positional(range_element_type="DATE", end="2016-08-11") + self.assertEqual(param.to_api_repr(), EXPECTED) + + def test_to_api_repr_w_date_date(self): + today = datetime.date.today() + today_str = today.strftime("%Y-%m-%d") + EXPECTED = { + "name": "foo", + "parameterType": { + "type": "RANGE", + "rangeElementType": { + "type": "DATE", + }, + }, + "parameterValue": { + "rangeValue": {"start": {"value": None}, "end": {"value": today_str}} + }, + } + param = self._make_one(range_element_type="DATE", end=today, name="foo") + self.assertEqual(param.to_api_repr(), EXPECTED) + + def test_to_api_repr_w_datetime_str(self): + EXPECTED = { + "parameterType": { + "type": "RANGE", + "rangeElementType": { + "type": "DATETIME", + }, + }, + "parameterValue": { + "rangeValue": { + "start": {"value": None}, + "end": {"value": "2020-01-01T12:00:00.000000"}, + } + }, + } + klass = self._get_target_class() + end_datetime = datetime.datetime(year=2020, month=1, day=1, hour=12) + param = klass.positional(range_element_type="DATETIME", end=end_datetime) + self.assertEqual(param.to_api_repr(), EXPECTED) + + def test_to_api_repr_w_datetime_datetime(self): + from google.cloud.bigquery._helpers import _RFC3339_MICROS_NO_ZULU + + now = datetime.datetime.utcnow() + now_str = now.strftime(_RFC3339_MICROS_NO_ZULU) + EXPECTED = { + "parameterType": { + "type": "RANGE", + "rangeElementType": { + "type": "DATETIME", + }, + }, + "parameterValue": { + "rangeValue": {"start": {"value": None}, "end": {"value": now_str}} + }, + } + klass = self._get_target_class() + param = klass.positional(range_element_type="DATETIME", end=now) + self.assertEqual(param.to_api_repr(), EXPECTED) + + def test_to_api_repr_w_timestamp_str(self): + EXPECTED = { + "parameterType": { + "type": "RANGE", + "rangeElementType": { + "type": "TIMESTAMP", + }, + }, + "parameterValue": { + "rangeValue": { + "start": {"value": None}, + "end": {"value": "2020-01-01 12:00:00+00:00"}, + } + }, + } + klass = self._get_target_class() + end_timestamp = datetime.datetime(year=2020, month=1, day=1, hour=12) + param = klass.positional(range_element_type="TIMESTAMP", end=end_timestamp) + self.assertEqual(param.to_api_repr(), EXPECTED) + + def test_to_api_repr_w_timestamp_timestamp(self): + from google.cloud._helpers import UTC # type: ignore + + now = datetime.datetime.utcnow() + now = now.astimezone(UTC) + now_str = str(now) + EXPECTED = { + "parameterType": { + "type": "RANGE", + "rangeElementType": { + "type": "TIMESTAMP", + }, + }, + "parameterValue": { + "rangeValue": {"start": {"value": None}, "end": {"value": now_str}} + }, + } + klass = self._get_target_class() + param = klass.positional(range_element_type="TIMESTAMP", end=now) + self.assertEqual(param.to_api_repr(), EXPECTED) + + def test_to_api_repr_wo_values(self): + EXPECTED = { + "name": "foo", + "parameterType": { + "type": "RANGE", + "rangeElementType": { + "type": "DATE", + }, + }, + "parameterValue": { + "rangeValue": {"start": {"value": None}, "end": {"value": None}} + }, + } + param = self._make_one(range_element_type="DATE", name="foo") + self.assertEqual(param.to_api_repr(), EXPECTED) + + def test_to_api_repr_unsupported_value_type(self): + with self.assertRaisesRegex( + ValueError, "Cannot convert range element value from type" + ): + range_param = self._make_one( + range_element_type="DATE", start=datetime.date.today() + ) + range_param.range_element_type.type_._type = "LONG" + range_param.to_api_repr() + + def test___eq__(self): + param = self._make_one( + range_element_type="DATE", start="2016-08-11", name="foo" + ) + self.assertEqual(param, param) + self.assertNotEqual(param, object()) + alias = self._make_one( + range_element_type="DATE", start="2016-08-11", name="bar" + ) + self.assertNotEqual(param, alias) + wrong_type = self._make_one( + range_element_type="DATETIME", + start="2020-12-31 12:00:00.000000", + name="foo", + ) + self.assertNotEqual(param, wrong_type) + wrong_val = self._make_one( + range_element_type="DATE", start="2016-08-12", name="foo" + ) + self.assertNotEqual(param, wrong_val) + + def test___eq___wrong_type(self): + param = self._make_one( + range_element_type="DATE", start="2016-08-11", name="foo" + ) + other = object() + self.assertNotEqual(param, other) + self.assertEqual(param, mock.ANY) + + def test___eq___name_mismatch(self): + param = self._make_one( + range_element_type="DATE", start="2016-08-11", name="foo" + ) + other = self._make_one( + range_element_type="DATE", start="2016-08-11", name="bar" + ) + self.assertNotEqual(param, other) + + def test___eq___field_type_mismatch(self): + param = self._make_one(range_element_type="DATE") + other = self._make_one(range_element_type="DATETIME") + self.assertNotEqual(param, other) + + def test___eq___value_mismatch(self): + param = self._make_one(range_element_type="DATE", start="2016-08-11") + other = self._make_one(range_element_type="DATE", start="2016-08-12") + self.assertNotEqual(param, other) + + def test___eq___hit(self): + param = self._make_one(range_element_type="DATE", start="2016-08-12") + other = self._make_one(range_element_type="DATE", start="2016-08-12") + self.assertEqual(param, other) + + def test___ne___wrong_type(self): + param = self._make_one(range_element_type="DATE") + other = object() + self.assertNotEqual(param, other) + self.assertEqual(param, mock.ANY) + + def test___ne___same_value(self): + param1 = self._make_one(range_element_type="DATE") + param2 = self._make_one(range_element_type="DATE") + # unittest ``assertEqual`` uses ``==`` not ``!=``. + comparison_val = param1 != param2 + self.assertFalse(comparison_val) + + def test___ne___different_values(self): + param1 = self._make_one(range_element_type="DATE", start="2016-08-12") + param2 = self._make_one(range_element_type="DATE") + self.assertNotEqual(param1, param2) + + def test___repr__(self): + param1 = self._make_one(range_element_type="DATE", start="2016-08-12") + expected = "RangeQueryParameter(None, {'type': 'RANGE', 'rangeElementType': {'type': 'DATE'}}, '2016-08-12', None)" + self.assertEqual(repr(param1), expected) + + def _make_subparam(name, type_, value): from google.cloud.bigquery.query import ScalarQueryParameter From 4caa60e6ed9904bd664a938b72f8c8ff90a05ca5 Mon Sep 17 00:00:00 2001 From: shollyman Date: Tue, 5 Mar 2024 09:47:31 -0800 Subject: [PATCH 1739/2016] fix: augment universe_domain handling (#1837) * fix: augment universe_domain handling This PR revisits the universe resolution for the BQ client, and handles new requirements like env-based specification and validation. * lint * skipif core too old * deps * add import * no-cover in test helper * lint * ignore google.auth typing * capitalization * change to raise in test code * reviewer feedback * var fix --------- Co-authored-by: Lingqing Gan --- .../google/cloud/bigquery/_helpers.py | 56 +++++++++++++ .../google/cloud/bigquery/client.py | 21 +++-- .../tests/unit/helpers.py | 14 ++++ .../tests/unit/test__helpers.py | 80 ++++++++++++++++++- 4 files changed, 162 insertions(+), 9 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py b/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py index 905d4aee1aff..ec4ac9970001 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py @@ -30,6 +30,8 @@ from google.cloud._helpers import _RFC3339_MICROS from google.cloud._helpers import _RFC3339_NO_FRACTION from google.cloud._helpers import _to_bytes +from google.auth import credentials as ga_credentials # type: ignore +from google.api_core import client_options as client_options_lib _RFC3339_MICROS_NO_ZULU = "%Y-%m-%dT%H:%M:%S.%f" _TIMEONLY_WO_MICROS = "%H:%M:%S" @@ -55,9 +57,63 @@ _DEFAULT_HOST = "https://bigquery.googleapis.com" """Default host for JSON API.""" +_DEFAULT_HOST_TEMPLATE = "https://bigquery.{UNIVERSE_DOMAIN}" +""" Templatized endpoint format. """ + _DEFAULT_UNIVERSE = "googleapis.com" """Default universe for the JSON API.""" +_UNIVERSE_DOMAIN_ENV = "GOOGLE_CLOUD_UNIVERSE_DOMAIN" +"""Environment variable for setting universe domain.""" + + +def _get_client_universe( + client_options: Optional[Union[client_options_lib.ClientOptions, dict]] +) -> str: + """Retrieves the specified universe setting. + + Args: + client_options: specified client options. + Returns: + str: resolved universe setting. + + """ + if isinstance(client_options, dict): + client_options = client_options_lib.from_dict(client_options) + universe = _DEFAULT_UNIVERSE + if hasattr(client_options, "universe_domain"): + options_universe = getattr(client_options, "universe_domain") + if options_universe is not None and len(options_universe) > 0: + universe = options_universe + else: + env_universe = os.getenv(_UNIVERSE_DOMAIN_ENV) + if isinstance(env_universe, str) and len(env_universe) > 0: + universe = env_universe + return universe + + +def _validate_universe(client_universe: str, credentials: ga_credentials.Credentials): + """Validates that client provided universe and universe embedded in credentials match. + + Args: + client_universe (str): The universe domain configured via the client options. + credentials (ga_credentials.Credentials): The credentials being used in the client. + + Raises: + ValueError: when client_universe does not match the universe in credentials. + """ + if hasattr(credentials, "universe_domain"): + cred_universe = getattr(credentials, "universe_domain") + if isinstance(cred_universe, str): + if client_universe != cred_universe: + raise ValueError( + "The configured universe domain " + f"({client_universe}) does not match the universe domain " + f"found in the credentials ({cred_universe}). " + "If you haven't configured the universe domain explicitly, " + f"`{_DEFAULT_UNIVERSE}` is the default." + ) + def _get_bigquery_host(): return os.environ.get(BIGQUERY_EMULATOR_HOST, _DEFAULT_HOST) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py index a871dc00396e..cb4daa897bcb 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py @@ -78,7 +78,10 @@ from google.cloud.bigquery._helpers import _verify_job_config_type from google.cloud.bigquery._helpers import _get_bigquery_host from google.cloud.bigquery._helpers import _DEFAULT_HOST +from google.cloud.bigquery._helpers import _DEFAULT_HOST_TEMPLATE from google.cloud.bigquery._helpers import _DEFAULT_UNIVERSE +from google.cloud.bigquery._helpers import _validate_universe +from google.cloud.bigquery._helpers import _get_client_universe from google.cloud.bigquery._job_helpers import make_job_id as _make_job_id from google.cloud.bigquery.dataset import Dataset from google.cloud.bigquery.dataset import DatasetListItem @@ -245,6 +248,7 @@ def __init__( kw_args = {"client_info": client_info} bq_host = _get_bigquery_host() kw_args["api_endpoint"] = bq_host if bq_host != _DEFAULT_HOST else None + client_universe = None if client_options: if isinstance(client_options, dict): client_options = google.api_core.client_options.from_dict( @@ -253,14 +257,15 @@ def __init__( if client_options.api_endpoint: api_endpoint = client_options.api_endpoint kw_args["api_endpoint"] = api_endpoint - elif ( - hasattr(client_options, "universe_domain") - and client_options.universe_domain - and client_options.universe_domain is not _DEFAULT_UNIVERSE - ): - kw_args["api_endpoint"] = _DEFAULT_HOST.replace( - _DEFAULT_UNIVERSE, client_options.universe_domain - ) + else: + client_universe = _get_client_universe(client_options) + if client_universe != _DEFAULT_UNIVERSE: + kw_args["api_endpoint"] = _DEFAULT_HOST_TEMPLATE.replace( + "{UNIVERSE_DOMAIN}", client_universe + ) + # Ensure credentials and universe are not in conflict. + if hasattr(self, "_credentials") and client_universe is not None: + _validate_universe(client_universe, self._credentials) self._connection = Connection(self, **kw_args) self._location = location diff --git a/packages/google-cloud-bigquery/tests/unit/helpers.py b/packages/google-cloud-bigquery/tests/unit/helpers.py index 67aeaca35587..bc92c0df6618 100644 --- a/packages/google-cloud-bigquery/tests/unit/helpers.py +++ b/packages/google-cloud-bigquery/tests/unit/helpers.py @@ -43,6 +43,20 @@ def make_client(project="PROJECT", **kw): return google.cloud.bigquery.client.Client(project, credentials, **kw) +def make_creds(creds_universe: None): + from google.auth import credentials + + class TestingCreds(credentials.Credentials): + def refresh(self, request): # pragma: NO COVER + raise NotImplementedError + + @property + def universe_domain(self): + return creds_universe + + return TestingCreds() + + def make_dataset_reference_string(project, ds_id): return f"{project}.{ds_id}" diff --git a/packages/google-cloud-bigquery/tests/unit/test__helpers.py b/packages/google-cloud-bigquery/tests/unit/test__helpers.py index 87ab46669500..019d2e7bde71 100644 --- a/packages/google-cloud-bigquery/tests/unit/test__helpers.py +++ b/packages/google-cloud-bigquery/tests/unit/test__helpers.py @@ -17,8 +17,86 @@ import decimal import json import unittest - +import os import mock +import pytest +import packaging +import google.api_core + + +@pytest.mark.skipif( + packaging.version.parse(getattr(google.api_core, "__version__", "0.0.0")) + < packaging.version.Version("2.15.0"), + reason="universe_domain not supported with google-api-core < 2.15.0", +) +class Test_get_client_universe(unittest.TestCase): + def test_with_none(self): + from google.cloud.bigquery._helpers import _get_client_universe + + self.assertEqual("googleapis.com", _get_client_universe(None)) + + def test_with_dict(self): + from google.cloud.bigquery._helpers import _get_client_universe + + options = {"universe_domain": "foo.com"} + self.assertEqual("foo.com", _get_client_universe(options)) + + def test_with_dict_empty(self): + from google.cloud.bigquery._helpers import _get_client_universe + + options = {"universe_domain": ""} + self.assertEqual("googleapis.com", _get_client_universe(options)) + + def test_with_client_options(self): + from google.cloud.bigquery._helpers import _get_client_universe + from google.api_core import client_options + + options = client_options.from_dict({"universe_domain": "foo.com"}) + self.assertEqual("foo.com", _get_client_universe(options)) + + @mock.patch.dict(os.environ, {"GOOGLE_CLOUD_UNIVERSE_DOMAIN": "foo.com"}) + def test_with_environ(self): + from google.cloud.bigquery._helpers import _get_client_universe + + self.assertEqual("foo.com", _get_client_universe(None)) + + @mock.patch.dict(os.environ, {"GOOGLE_CLOUD_UNIVERSE_DOMAIN": ""}) + def test_with_environ_empty(self): + from google.cloud.bigquery._helpers import _get_client_universe + + self.assertEqual("googleapis.com", _get_client_universe(None)) + + +class Test_validate_universe(unittest.TestCase): + def test_with_none(self): + from google.cloud.bigquery._helpers import _validate_universe + + # should not raise + _validate_universe("googleapis.com", None) + + def test_with_no_universe_creds(self): + from google.cloud.bigquery._helpers import _validate_universe + from .helpers import make_creds + + creds = make_creds(None) + # should not raise + _validate_universe("googleapis.com", creds) + + def test_with_matched_universe_creds(self): + from google.cloud.bigquery._helpers import _validate_universe + from .helpers import make_creds + + creds = make_creds("googleapis.com") + # should not raise + _validate_universe("googleapis.com", creds) + + def test_with_mismatched_universe_creds(self): + from google.cloud.bigquery._helpers import _validate_universe + from .helpers import make_creds + + creds = make_creds("foo.com") + with self.assertRaises(ValueError): + _validate_universe("googleapis.com", creds) class Test_not_null(unittest.TestCase): From d15d983eaa8b2dbb5f4d3d4eaa8779e417b90c7c Mon Sep 17 00:00:00 2001 From: Lingqing Gan Date: Wed, 6 Mar 2024 17:04:06 -0800 Subject: [PATCH 1740/2016] feat: support range sql (#1807) * feat: support range sql * add unit tests * add system test * lint and remove debug code * lint and remove debug code * remove added blank line * add comment for legacy type --- .../google/cloud/bigquery/enums.py | 2 + .../google/cloud/bigquery/standard_sql.py | 36 ++++++++++++- .../tests/system/test_client.py | 38 ++++++++++++++ .../tests/unit/test_standard_sql_types.py | 52 +++++++++++++++++++ 4 files changed, 127 insertions(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/enums.py b/packages/google-cloud-bigquery/google/cloud/bigquery/enums.py index 55385363028d..d75037ad1e0b 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/enums.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/enums.py @@ -254,6 +254,7 @@ def _generate_next_value_(name, start, count, last_values): JSON = enum.auto() ARRAY = enum.auto() STRUCT = enum.auto() + RANGE = enum.auto() class EntityTypes(str, enum.Enum): @@ -292,6 +293,7 @@ class SqlTypeNames(str, enum.Enum): TIME = "TIME" DATETIME = "DATETIME" INTERVAL = "INTERVAL" # NOTE: not available in legacy types + RANGE = "RANGE" # NOTE: not available in legacy types class WriteDisposition(object): diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/standard_sql.py b/packages/google-cloud-bigquery/google/cloud/bigquery/standard_sql.py index e0f22b2de0a9..68332eb807cc 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/standard_sql.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/standard_sql.py @@ -43,6 +43,7 @@ class StandardSqlDataType: ] } } + RANGE: {type_kind="RANGE", range_element_type="DATETIME"} Args: type_kind: @@ -52,6 +53,8 @@ class StandardSqlDataType: The type of the array's elements, if type_kind is ARRAY. struct_type: The fields of this struct, in order, if type_kind is STRUCT. + range_element_type: + The type of the range's elements, if type_kind is RANGE. """ def __init__( @@ -61,12 +64,14 @@ def __init__( ] = StandardSqlTypeNames.TYPE_KIND_UNSPECIFIED, array_element_type: Optional["StandardSqlDataType"] = None, struct_type: Optional["StandardSqlStructType"] = None, + range_element_type: Optional["StandardSqlDataType"] = None, ): self._properties: Dict[str, Any] = {} self.type_kind = type_kind self.array_element_type = array_element_type self.struct_type = struct_type + self.range_element_type = range_element_type @property def type_kind(self) -> Optional[StandardSqlTypeNames]: @@ -127,6 +132,28 @@ def struct_type(self, value: Optional["StandardSqlStructType"]): else: self._properties["structType"] = struct_type + @property + def range_element_type(self) -> Optional["StandardSqlDataType"]: + """The type of the range's elements, if type_kind = "RANGE". Must be + one of DATETIME, DATE, or TIMESTAMP.""" + range_element_info = self._properties.get("rangeElementType") + + if range_element_info is None: + return None + + result = StandardSqlDataType() + result._properties = range_element_info # We do not use a copy on purpose. + return result + + @range_element_type.setter + def range_element_type(self, value: Optional["StandardSqlDataType"]): + range_element_type = None if value is None else value.to_api_repr() + + if range_element_type is None: + self._properties.pop("rangeElementType", None) + else: + self._properties["rangeElementType"] = range_element_type + def to_api_repr(self) -> Dict[str, Any]: """Construct the API resource representation of this SQL data type.""" return copy.deepcopy(self._properties) @@ -155,7 +182,13 @@ def from_api_repr(cls, resource: Dict[str, Any]): if struct_info: struct_type = StandardSqlStructType.from_api_repr(struct_info) - return cls(type_kind, array_element_type, struct_type) + range_element_type = None + if type_kind == StandardSqlTypeNames.RANGE: + range_element_info = resource.get("rangeElementType") + if range_element_info: + range_element_type = cls.from_api_repr(range_element_info) + + return cls(type_kind, array_element_type, struct_type, range_element_type) def __eq__(self, other): if not isinstance(other, StandardSqlDataType): @@ -165,6 +198,7 @@ def __eq__(self, other): self.type_kind == other.type_kind and self.array_element_type == other.array_element_type and self.struct_type == other.struct_type + and self.range_element_type == other.range_element_type ) def __str__(self): diff --git a/packages/google-cloud-bigquery/tests/system/test_client.py b/packages/google-cloud-bigquery/tests/system/test_client.py index 74c152cf220e..04740de8a09a 100644 --- a/packages/google-cloud-bigquery/tests/system/test_client.py +++ b/packages/google-cloud-bigquery/tests/system/test_client.py @@ -2193,6 +2193,44 @@ def test_create_routine(self): assert len(rows) == 1 assert rows[0].max_value == 100.0 + def test_create_routine_with_range(self): + routine_name = "routine_range" + dataset = self.temp_dataset(_make_dataset_id("routine_range")) + + routine = bigquery.Routine( + dataset.routine(routine_name), + type_="SCALAR_FUNCTION", + language="SQL", + body="RANGE_START(x)", + arguments=[ + bigquery.RoutineArgument( + name="x", + data_type=bigquery.StandardSqlDataType( + type_kind=bigquery.StandardSqlTypeNames.RANGE, + range_element_type=bigquery.StandardSqlDataType( + type_kind=bigquery.StandardSqlTypeNames.DATE + ), + ), + ) + ], + return_type=bigquery.StandardSqlDataType( + type_kind=bigquery.StandardSqlTypeNames.DATE + ), + ) + + query_string = ( + "SELECT `{}`(RANGE '[2016-08-12, UNBOUNDED)') as range_start;".format( + str(routine.reference) + ) + ) + + routine = helpers.retry_403(Config.CLIENT.create_routine)(routine) + query_job = helpers.retry_403(Config.CLIENT.query)(query_string) + rows = list(query_job.result()) + + assert len(rows) == 1 + assert rows[0].range_start == datetime.date(2016, 8, 12) + def test_create_tvf_routine(self): from google.cloud.bigquery import ( Routine, diff --git a/packages/google-cloud-bigquery/tests/unit/test_standard_sql_types.py b/packages/google-cloud-bigquery/tests/unit/test_standard_sql_types.py index 0ba0e0cfd721..3ed912b5a4a8 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_standard_sql_types.py +++ b/packages/google-cloud-bigquery/tests/unit/test_standard_sql_types.py @@ -129,6 +129,28 @@ def test_to_api_repr_struct_type_w_field_types(self): } assert result == expected + def test_to_api_repr_range_type_element_type_missing(self): + instance = self._make_one( + bq.StandardSqlTypeNames.RANGE, range_element_type=None + ) + + result = instance.to_api_repr() + + assert result == {"typeKind": "RANGE"} + + def test_to_api_repr_range_type_w_element_type(self): + range_element_type = self._make_one(type_kind=bq.StandardSqlTypeNames.DATE) + instance = self._make_one( + bq.StandardSqlTypeNames.RANGE, range_element_type=range_element_type + ) + + result = instance.to_api_repr() + + assert result == { + "typeKind": "RANGE", + "rangeElementType": {"typeKind": "DATE"}, + } + def test_from_api_repr_empty_resource(self): klass = self._get_target_class() result = klass.from_api_repr(resource={}) @@ -276,6 +298,31 @@ def test_from_api_repr_struct_type_incomplete_field_info(self): ) assert result == expected + def test_from_api_repr_range_type_full(self): + klass = self._get_target_class() + resource = {"typeKind": "RANGE", "rangeElementType": {"typeKind": "DATE"}} + + result = klass.from_api_repr(resource=resource) + + expected = klass( + type_kind=bq.StandardSqlTypeNames.RANGE, + range_element_type=klass(type_kind=bq.StandardSqlTypeNames.DATE), + ) + assert result == expected + + def test_from_api_repr_range_type_missing_element_type(self): + klass = self._get_target_class() + resource = {"typeKind": "RANGE"} + + result = klass.from_api_repr(resource=resource) + + expected = klass( + type_kind=bq.StandardSqlTypeNames.RANGE, + range_element_type=None, + struct_type=None, + ) + assert result == expected + def test__eq__another_type(self): instance = self._make_one() @@ -321,6 +368,11 @@ def test__eq__similar_instance(self): bq.StandardSqlStructType(fields=[bq.StandardSqlField(name="foo")]), bq.StandardSqlStructType(fields=[bq.StandardSqlField(name="bar")]), ), + ( + "range_element_type", + bq.StandardSqlDataType(type_kind=bq.StandardSqlTypeNames.DATE), + bq.StandardSqlDataType(type_kind=bq.StandardSqlTypeNames.DATETIME), + ), ), ) def test__eq__attribute_differs(self, attr_name, value, value2): From 135a23d68bfa414ccf427ac52be5c7bf5a388aff Mon Sep 17 00:00:00 2001 From: shollyman Date: Wed, 6 Mar 2024 17:42:18 -0800 Subject: [PATCH 1741/2016] fix: supplementary fix to env-based universe resolution (#1844) * fix: supplementary fix to env-based universe resolution There's a corner case where conversion from dict to a ClientOptions will return a universe_domain value as None that wasn't covered by initial testing. This updates the resolution code and adds tests to exercise the new path. * formatting --------- Co-authored-by: Lingqing Gan --- .../google/cloud/bigquery/_helpers.py | 11 +++++++---- .../tests/unit/test__helpers.py | 15 +++++++++++++++ 2 files changed, 22 insertions(+), 4 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py b/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py index ec4ac9970001..7198b60c2309 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py @@ -81,10 +81,13 @@ def _get_client_universe( if isinstance(client_options, dict): client_options = client_options_lib.from_dict(client_options) universe = _DEFAULT_UNIVERSE - if hasattr(client_options, "universe_domain"): - options_universe = getattr(client_options, "universe_domain") - if options_universe is not None and len(options_universe) > 0: - universe = options_universe + options_universe = getattr(client_options, "universe_domain", None) + if ( + options_universe + and isinstance(options_universe, str) + and len(options_universe) > 0 + ): + universe = options_universe else: env_universe = os.getenv(_UNIVERSE_DOMAIN_ENV) if isinstance(env_universe, str) and len(env_universe) > 0: diff --git a/packages/google-cloud-bigquery/tests/unit/test__helpers.py b/packages/google-cloud-bigquery/tests/unit/test__helpers.py index 019d2e7bde71..7e8d815d2b90 100644 --- a/packages/google-cloud-bigquery/tests/unit/test__helpers.py +++ b/packages/google-cloud-bigquery/tests/unit/test__helpers.py @@ -60,6 +60,21 @@ def test_with_environ(self): self.assertEqual("foo.com", _get_client_universe(None)) + @mock.patch.dict(os.environ, {"GOOGLE_CLOUD_UNIVERSE_DOMAIN": "foo.com"}) + def test_with_environ_and_dict(self): + from google.cloud.bigquery._helpers import _get_client_universe + + options = ({"credentials_file": "file.json"},) + self.assertEqual("foo.com", _get_client_universe(options)) + + @mock.patch.dict(os.environ, {"GOOGLE_CLOUD_UNIVERSE_DOMAIN": "foo.com"}) + def test_with_environ_and_empty_options(self): + from google.cloud.bigquery._helpers import _get_client_universe + from google.api_core import client_options + + options = client_options.from_dict({}) + self.assertEqual("foo.com", _get_client_universe(options)) + @mock.patch.dict(os.environ, {"GOOGLE_CLOUD_UNIVERSE_DOMAIN": ""}) def test_with_environ_empty(self): from google.cloud.bigquery._helpers import _get_client_universe From 601859804006fd5242af136166c7a905365bc759 Mon Sep 17 00:00:00 2001 From: Anthonios Partheniou Date: Thu, 7 Mar 2024 15:10:32 -0500 Subject: [PATCH 1742/2016] chore: use mock from unittest (#1823) * chore: use mock from unittest * correct user_credentials_test.py * add try except for Python 3.7 * fixes linting * adjustments to testing suite to account for dependencies * updates to mypy_samples.py * linting for noxfile.py --------- Co-authored-by: Chalmer Lowe --- packages/google-cloud-bigquery/noxfile.py | 8 ++++---- .../samples/desktopapp/user_credentials_test.py | 11 +++++++++-- .../samples/tests/conftest.py | 2 +- .../testing/constraints-3.7.txt | 13 +++++++++---- .../google-cloud-bigquery/tests/unit/conftest.py | 3 ++- .../google-cloud-bigquery/tests/unit/helpers.py | 7 ++++--- .../tests/unit/job/test_base.py | 2 +- .../tests/unit/job/test_copy.py | 2 +- .../tests/unit/job/test_extract.py | 2 +- .../tests/unit/job/test_load.py | 3 +-- .../tests/unit/job/test_query.py | 16 ++++++++-------- .../tests/unit/job/test_query_pandas.py | 2 +- .../tests/unit/test__helpers.py | 5 +++-- .../tests/unit/test__http.py | 2 +- .../tests/unit/test__pandas_helpers.py | 7 +++---- .../tests/unit/test__versions_helpers.py | 4 ++-- .../tests/unit/test_client.py | 13 ++++++------- .../tests/unit/test_create_dataset.py | 3 ++- .../tests/unit/test_dataset.py | 2 +- .../tests/unit/test_dbapi_connection.py | 3 +-- .../tests/unit/test_dbapi_cursor.py | 2 +- .../tests/unit/test_encryption_configuration.py | 2 +- .../tests/unit/test_job_retry.py | 2 +- .../tests/unit/test_list_datasets.py | 3 ++- .../tests/unit/test_list_jobs.py | 2 +- .../tests/unit/test_list_projects.py | 3 ++- .../tests/unit/test_magics.py | 6 +++--- .../tests/unit/test_opentelemetry_tracing.py | 3 +-- .../tests/unit/test_query.py | 3 +-- .../tests/unit/test_retry.py | 2 +- .../tests/unit/test_schema.py | 2 +- .../tests/unit/test_table.py | 2 +- 32 files changed, 77 insertions(+), 65 deletions(-) diff --git a/packages/google-cloud-bigquery/noxfile.py b/packages/google-cloud-bigquery/noxfile.py index ae022232eb86..c31d098b8445 100644 --- a/packages/google-cloud-bigquery/noxfile.py +++ b/packages/google-cloud-bigquery/noxfile.py @@ -72,7 +72,6 @@ def default(session, install_extras=True): # Install all test dependencies, then install local packages in-place. session.install( - "mock", "pytest", "google-cloud-testutils", "pytest-cov", @@ -89,6 +88,8 @@ def default(session, install_extras=True): install_target = "." session.install("-e", install_target, "-c", constraints_path) + session.run("python", "-m", "pip", "freeze") + # Run py.test against the unit tests. session.run( "py.test", @@ -176,7 +177,7 @@ def system(session): # Install all test dependencies, then install local packages in place. session.install( - "mock", "pytest", "psutil", "google-cloud-testutils", "-c", constraints_path + "pytest", "psutil", "google-cloud-testutils", "-c", constraints_path ) if os.environ.get("GOOGLE_API_USE_CLIENT_CERTIFICATE", "") == "true": # mTLS test requires pyopenssl and latest google-cloud-storage @@ -249,7 +250,7 @@ def snippets(session): ) # Install all test dependencies, then install local packages in place. - session.install("mock", "pytest", "google-cloud-testutils", "-c", constraints_path) + session.install("pytest", "google-cloud-testutils", "-c", constraints_path) session.install("google-cloud-storage", "-c", constraints_path) session.install("grpcio", "-c", constraints_path) @@ -336,7 +337,6 @@ def prerelease_deps(session): "google-cloud-datacatalog", "google-cloud-storage", "google-cloud-testutils", - "mock", "psutil", "pytest", "pytest-cov", diff --git a/packages/google-cloud-bigquery/samples/desktopapp/user_credentials_test.py b/packages/google-cloud-bigquery/samples/desktopapp/user_credentials_test.py index baa9e33f1357..252b843c4ffa 100644 --- a/packages/google-cloud-bigquery/samples/desktopapp/user_credentials_test.py +++ b/packages/google-cloud-bigquery/samples/desktopapp/user_credentials_test.py @@ -13,17 +13,24 @@ # limitations under the License. import os +import sys from typing import Iterator, Union +from unittest import mock import google.auth -import mock import pytest from .user_credentials import main # type: ignore PROJECT = os.environ["GOOGLE_CLOUD_PROJECT"] -MockType = Union[mock.mock.MagicMock, mock.mock.AsyncMock] + +if sys.version_info >= (3, 8): + # Python 3.8+ has an AsyncMock attribute in unittest.mock, but 3.7 does not + MockType = Union[mock.MagicMock, mock.AsyncMock] +else: + # Other definitions and imports + MockType = Union[mock.MagicMock] @pytest.fixture diff --git a/packages/google-cloud-bigquery/samples/tests/conftest.py b/packages/google-cloud-bigquery/samples/tests/conftest.py index 2b5b89c43d07..91603bef2eb3 100644 --- a/packages/google-cloud-bigquery/samples/tests/conftest.py +++ b/packages/google-cloud-bigquery/samples/tests/conftest.py @@ -14,10 +14,10 @@ import datetime from typing import Iterator, List +from unittest import mock import uuid import google.auth -import mock import pytest from google.cloud import bigquery diff --git a/packages/google-cloud-bigquery/testing/constraints-3.7.txt b/packages/google-cloud-bigquery/testing/constraints-3.7.txt index 9f71bf11ab6e..28787adb72d9 100644 --- a/packages/google-cloud-bigquery/testing/constraints-3.7.txt +++ b/packages/google-cloud-bigquery/testing/constraints-3.7.txt @@ -7,11 +7,16 @@ # Then this file should have foo==1.14.0 db-dtypes==0.3.0 geopandas==0.9.0 -google-api-core==1.31.5 -google-cloud-bigquery-storage==2.6.0 -google-cloud-core==1.6.0 -google-resumable-media==0.6.0 +google-api-core==2.17.1 +google-auth==2.28.1 +google-cloud-bigquery-storage==2.24.0 +google-cloud-core==2.4.1 +google-cloud-testutils==1.4.0 +google-crc32c==1.5.0 +google-resumable-media==2.7.0 +googleapis-common-protos==1.62.0 grpcio==1.47.0 +grpcio-status==1.47.0 ipywidgets==7.7.1 ipython==7.23.1 ipykernel==6.0.0 diff --git a/packages/google-cloud-bigquery/tests/unit/conftest.py b/packages/google-cloud-bigquery/tests/unit/conftest.py index c2ae78eaa07c..ebe2d2a7a659 100644 --- a/packages/google-cloud-bigquery/tests/unit/conftest.py +++ b/packages/google-cloud-bigquery/tests/unit/conftest.py @@ -12,7 +12,8 @@ # See the License for the specific language governing permissions and # limitations under the License. -import mock +from unittest import mock + import pytest from .helpers import make_client diff --git a/packages/google-cloud-bigquery/tests/unit/helpers.py b/packages/google-cloud-bigquery/tests/unit/helpers.py index bc92c0df6618..c5414138e8f5 100644 --- a/packages/google-cloud-bigquery/tests/unit/helpers.py +++ b/packages/google-cloud-bigquery/tests/unit/helpers.py @@ -12,15 +12,16 @@ # See the License for the specific language governing permissions and # limitations under the License. +from unittest import mock + +import pytest + import google.cloud.bigquery.client import google.cloud.bigquery.dataset -import mock -import pytest def make_connection(*responses): import google.cloud.bigquery._http - import mock from google.cloud.exceptions import NotFound mock_conn = mock.create_autospec(google.cloud.bigquery._http.Connection) diff --git a/packages/google-cloud-bigquery/tests/unit/job/test_base.py b/packages/google-cloud-bigquery/tests/unit/job/test_base.py index a61fd319858e..18672952920a 100644 --- a/packages/google-cloud-bigquery/tests/unit/job/test_base.py +++ b/packages/google-cloud-bigquery/tests/unit/job/test_base.py @@ -15,11 +15,11 @@ import copy import http import unittest +from unittest import mock from google.api_core import exceptions import google.api_core.retry from google.api_core.future import polling -import mock import pytest from ..helpers import make_connection diff --git a/packages/google-cloud-bigquery/tests/unit/job/test_copy.py b/packages/google-cloud-bigquery/tests/unit/job/test_copy.py index a3b5c70e37e5..e1bb20db24f5 100644 --- a/packages/google-cloud-bigquery/tests/unit/job/test_copy.py +++ b/packages/google-cloud-bigquery/tests/unit/job/test_copy.py @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -import mock +from unittest import mock from ..helpers import make_connection diff --git a/packages/google-cloud-bigquery/tests/unit/job/test_extract.py b/packages/google-cloud-bigquery/tests/unit/job/test_extract.py index 8bada51af30c..76ee72f281da 100644 --- a/packages/google-cloud-bigquery/tests/unit/job/test_extract.py +++ b/packages/google-cloud-bigquery/tests/unit/job/test_extract.py @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -import mock +from unittest import mock from ..helpers import make_connection diff --git a/packages/google-cloud-bigquery/tests/unit/job/test_load.py b/packages/google-cloud-bigquery/tests/unit/job/test_load.py index c6bbaa2fb622..976fec914b9f 100644 --- a/packages/google-cloud-bigquery/tests/unit/job/test_load.py +++ b/packages/google-cloud-bigquery/tests/unit/job/test_load.py @@ -13,8 +13,7 @@ # limitations under the License. import copy - -import mock +from unittest import mock from ..helpers import make_connection diff --git a/packages/google-cloud-bigquery/tests/unit/job/test_query.py b/packages/google-cloud-bigquery/tests/unit/job/test_query.py index 776234b5ba31..37ac7ba5ebb3 100644 --- a/packages/google-cloud-bigquery/tests/unit/job/test_query.py +++ b/packages/google-cloud-bigquery/tests/unit/job/test_query.py @@ -17,11 +17,11 @@ import http import textwrap import types +from unittest import mock import freezegun from google.api_core import exceptions import google.api_core.retry -import mock import requests from google.cloud.bigquery.client import _LIST_ROWS_FROM_QUERY_RESULTS_FIELDS @@ -382,11 +382,11 @@ def test__done_or_raise_w_timeout(self): job._done_or_raise(timeout=42) fake_get_results.assert_called_once() - call_args = fake_get_results.call_args - self.assertEqual(call_args.kwargs.get("timeout"), 42) + call_args = fake_get_results.call_args[0][1] + self.assertEqual(call_args.timeout, 600.0) - call_args = fake_reload.call_args - self.assertEqual(call_args.kwargs.get("timeout"), 42) + call_args = fake_reload.call_args[1] + self.assertEqual(call_args["timeout"], 42) def test__done_or_raise_w_timeout_and_longer_internal_api_timeout(self): client = _make_client(project=self.PROJECT) @@ -404,11 +404,11 @@ def test__done_or_raise_w_timeout_and_longer_internal_api_timeout(self): expected_timeout = 5.5 fake_get_results.assert_called_once() - call_args = fake_get_results.call_args - self.assertAlmostEqual(call_args.kwargs.get("timeout"), expected_timeout) + call_args = fake_get_results.call_args[0][1] + self.assertAlmostEqual(call_args.timeout, 600.0) call_args = fake_reload.call_args - self.assertAlmostEqual(call_args.kwargs.get("timeout"), expected_timeout) + self.assertAlmostEqual(call_args[1].get("timeout"), expected_timeout) def test__done_or_raise_w_query_results_error_reload_ok(self): client = _make_client(project=self.PROJECT) diff --git a/packages/google-cloud-bigquery/tests/unit/job/test_query_pandas.py b/packages/google-cloud-bigquery/tests/unit/job/test_query_pandas.py index 6189830ffabe..1473ef283e09 100644 --- a/packages/google-cloud-bigquery/tests/unit/job/test_query_pandas.py +++ b/packages/google-cloud-bigquery/tests/unit/job/test_query_pandas.py @@ -15,8 +15,8 @@ import concurrent.futures import copy import json +from unittest import mock -import mock import pytest diff --git a/packages/google-cloud-bigquery/tests/unit/test__helpers.py b/packages/google-cloud-bigquery/tests/unit/test__helpers.py index 7e8d815d2b90..320c57737250 100644 --- a/packages/google-cloud-bigquery/tests/unit/test__helpers.py +++ b/packages/google-cloud-bigquery/tests/unit/test__helpers.py @@ -16,11 +16,12 @@ import datetime import decimal import json -import unittest import os -import mock import pytest import packaging +import unittest +from unittest import mock + import google.api_core diff --git a/packages/google-cloud-bigquery/tests/unit/test__http.py b/packages/google-cloud-bigquery/tests/unit/test__http.py index 09f6d29d71b5..fd7ecdc428a2 100644 --- a/packages/google-cloud-bigquery/tests/unit/test__http.py +++ b/packages/google-cloud-bigquery/tests/unit/test__http.py @@ -13,8 +13,8 @@ # limitations under the License. import unittest +from unittest import mock -import mock import requests diff --git a/packages/google-cloud-bigquery/tests/unit/test__pandas_helpers.py b/packages/google-cloud-bigquery/tests/unit/test__pandas_helpers.py index 7c83d3ec5fd0..abee39065cce 100644 --- a/packages/google-cloud-bigquery/tests/unit/test__pandas_helpers.py +++ b/packages/google-cloud-bigquery/tests/unit/test__pandas_helpers.py @@ -18,6 +18,7 @@ import functools import operator import queue +from unittest import mock import warnings try: @@ -25,8 +26,6 @@ except ImportError: import importlib_metadata as metadata -import mock - try: import pandas import pandas.api.types @@ -1200,7 +1199,7 @@ def test_dataframe_to_parquet_compression_method(module_under_test): call_args = fake_write_table.call_args assert call_args is not None - assert call_args.kwargs.get("compression") == "ZSTD" + assert call_args[1].get("compression") == "ZSTD" @pytest.mark.skipif(pandas is None, reason="Requires `pandas`") @@ -1635,7 +1634,7 @@ def test_dataframe_to_parquet_dict_sequence_schema(module_under_test): schema.SchemaField("field01", "STRING", mode="REQUIRED"), schema.SchemaField("field02", "BOOL", mode="NULLABLE"), ] - schema_arg = fake_to_arrow.call_args.args[1] + schema_arg = fake_to_arrow.call_args[0][1] assert schema_arg == expected_schema_arg diff --git a/packages/google-cloud-bigquery/tests/unit/test__versions_helpers.py b/packages/google-cloud-bigquery/tests/unit/test__versions_helpers.py index afe170e7a0bd..8fa09962720d 100644 --- a/packages/google-cloud-bigquery/tests/unit/test__versions_helpers.py +++ b/packages/google-cloud-bigquery/tests/unit/test__versions_helpers.py @@ -12,9 +12,9 @@ # See the License for the specific language governing permissions and # limitations under the License. -import pytest +from unittest import mock -import mock +import pytest try: import pyarrow # type: ignore diff --git a/packages/google-cloud-bigquery/tests/unit/test_client.py b/packages/google-cloud-bigquery/tests/unit/test_client.py index 42581edc1aae..be8bef03cd7a 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_client.py +++ b/packages/google-cloud-bigquery/tests/unit/test_client.py @@ -24,9 +24,9 @@ import json import operator import unittest +from unittest import mock import warnings -import mock import requests import packaging import pytest @@ -8733,9 +8733,9 @@ def test_load_table_from_dataframe_w_schema_arrow_custom_compression(self): parquet_compression="LZ4", ) - call_args = fake_to_parquet.call_args + call_args = fake_to_parquet.call_args[1] assert call_args is not None - assert call_args.kwargs.get("parquet_compression") == "LZ4" + assert call_args.get("parquet_compression") == "LZ4" @unittest.skipIf(pandas is None, "Requires `pandas`") @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") @@ -9498,12 +9498,11 @@ def test__do_resumable_upload_custom_project(self): timeout=mock.ANY, ) - # Check the project ID used in the call to initiate resumable upload. initiation_url = next( ( - call.args[1] + call[0][1] for call in transport.request.call_args_list - if call.args[0] == "POST" and "uploadType=resumable" in call.args[1] + if call[0][0] == "POST" and "uploadType=resumable" in call[0][1] ), None, ) # pragma: NO COVER @@ -9525,7 +9524,7 @@ def test__do_resumable_upload_custom_timeout(self): # The timeout should be applied to all underlying calls. for call_args in transport.request.call_args_list: - assert call_args.kwargs.get("timeout") == 3.14 + assert call_args[1].get("timeout") == 3.14 def test__do_multipart_upload(self): transport = self._make_transport([self._make_response(http.client.OK)]) diff --git a/packages/google-cloud-bigquery/tests/unit/test_create_dataset.py b/packages/google-cloud-bigquery/tests/unit/test_create_dataset.py index 8374e6e0ad60..a2491a8121ca 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_create_dataset.py +++ b/packages/google-cloud-bigquery/tests/unit/test_create_dataset.py @@ -12,11 +12,12 @@ # See the License for the specific language governing permissions and # limitations under the License. +from unittest import mock + from google.cloud.bigquery.dataset import Dataset, DatasetReference from .helpers import make_connection, dataset_polymorphic, make_client import google.cloud.bigquery.dataset from google.cloud.bigquery.retry import DEFAULT_TIMEOUT -import mock import pytest diff --git a/packages/google-cloud-bigquery/tests/unit/test_dataset.py b/packages/google-cloud-bigquery/tests/unit/test_dataset.py index 423349a51aae..c0164bc738a4 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_dataset.py +++ b/packages/google-cloud-bigquery/tests/unit/test_dataset.py @@ -13,8 +13,8 @@ # limitations under the License. import unittest +from unittest import mock -import mock from google.cloud.bigquery.routine.routine import Routine, RoutineReference import pytest from google.cloud.bigquery.dataset import ( diff --git a/packages/google-cloud-bigquery/tests/unit/test_dbapi_connection.py b/packages/google-cloud-bigquery/tests/unit/test_dbapi_connection.py index 67777f923868..88378ec9818e 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_dbapi_connection.py +++ b/packages/google-cloud-bigquery/tests/unit/test_dbapi_connection.py @@ -14,8 +14,7 @@ import gc import unittest - -import mock +from unittest import mock try: from google.cloud import bigquery_storage diff --git a/packages/google-cloud-bigquery/tests/unit/test_dbapi_cursor.py b/packages/google-cloud-bigquery/tests/unit/test_dbapi_cursor.py index 69d33fe17de1..e9fd2e3dd3c8 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_dbapi_cursor.py +++ b/packages/google-cloud-bigquery/tests/unit/test_dbapi_cursor.py @@ -13,9 +13,9 @@ # limitations under the License. import functools -import mock import operator as op import unittest +from unittest import mock import pytest diff --git a/packages/google-cloud-bigquery/tests/unit/test_encryption_configuration.py b/packages/google-cloud-bigquery/tests/unit/test_encryption_configuration.py index f432a903b4cc..cdd944a8fe1a 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_encryption_configuration.py +++ b/packages/google-cloud-bigquery/tests/unit/test_encryption_configuration.py @@ -13,7 +13,7 @@ # limitations under the License. import unittest -import mock +from unittest import mock class TestEncryptionConfiguration(unittest.TestCase): diff --git a/packages/google-cloud-bigquery/tests/unit/test_job_retry.py b/packages/google-cloud-bigquery/tests/unit/test_job_retry.py index 0e984c8fcf38..d7049c5ca2e6 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_job_retry.py +++ b/packages/google-cloud-bigquery/tests/unit/test_job_retry.py @@ -14,8 +14,8 @@ import datetime import re +from unittest import mock -import mock import pytest import google.api_core.exceptions diff --git a/packages/google-cloud-bigquery/tests/unit/test_list_datasets.py b/packages/google-cloud-bigquery/tests/unit/test_list_datasets.py index 6f0b55c5e5b0..4ef99fd865a1 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_list_datasets.py +++ b/packages/google-cloud-bigquery/tests/unit/test_list_datasets.py @@ -12,7 +12,8 @@ # See the License for the specific language governing permissions and # limitations under the License. -import mock +from unittest import mock + import pytest from google.cloud.bigquery.retry import DEFAULT_TIMEOUT diff --git a/packages/google-cloud-bigquery/tests/unit/test_list_jobs.py b/packages/google-cloud-bigquery/tests/unit/test_list_jobs.py index 1db6b5668b1a..edb85af0aa9d 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_list_jobs.py +++ b/packages/google-cloud-bigquery/tests/unit/test_list_jobs.py @@ -13,8 +13,8 @@ # limitations under the License. import datetime +from unittest import mock -import mock import pytest from google.cloud.bigquery.retry import DEFAULT_TIMEOUT diff --git a/packages/google-cloud-bigquery/tests/unit/test_list_projects.py b/packages/google-cloud-bigquery/tests/unit/test_list_projects.py index 190612b44588..5260e5246474 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_list_projects.py +++ b/packages/google-cloud-bigquery/tests/unit/test_list_projects.py @@ -12,7 +12,8 @@ # See the License for the specific language governing permissions and # limitations under the License. -import mock +from unittest import mock + import pytest from google.cloud.bigquery.retry import DEFAULT_TIMEOUT diff --git a/packages/google-cloud-bigquery/tests/unit/test_magics.py b/packages/google-cloud-bigquery/tests/unit/test_magics.py index 1511cba9c9d0..4b1aaf14d5a9 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_magics.py +++ b/packages/google-cloud-bigquery/tests/unit/test_magics.py @@ -15,11 +15,11 @@ import copy import re from concurrent import futures +from unittest import mock import warnings from google.api_core import exceptions import google.auth.credentials -import mock import pytest from tests.unit.helpers import make_connection from test_utils.imports import maybe_fail_import @@ -513,7 +513,7 @@ def test_bigquery_magic_default_connection_user_agent(): with conn_patch as conn, run_query_patch, default_patch: ip.run_cell_magic("bigquery", "", "SELECT 17 as num") - client_info_arg = conn.call_args.kwargs.get("client_info") + client_info_arg = conn.call_args[1].get("client_info") assert client_info_arg is not None assert client_info_arg.user_agent == "ipython-" + IPython.__version__ @@ -663,7 +663,7 @@ def warning_match(warning): assert len(expected_warnings) == 1 assert len(bqstorage_mock.call_args_list) == 1 - kwargs = bqstorage_mock.call_args_list[0].kwargs + kwargs = bqstorage_mock.call_args_list[0][1] assert kwargs.get("credentials") is mock_credentials client_info = kwargs.get("client_info") assert client_info is not None diff --git a/packages/google-cloud-bigquery/tests/unit/test_opentelemetry_tracing.py b/packages/google-cloud-bigquery/tests/unit/test_opentelemetry_tracing.py index 4cc58713c83f..e96e18c6b3a4 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_opentelemetry_tracing.py +++ b/packages/google-cloud-bigquery/tests/unit/test_opentelemetry_tracing.py @@ -15,8 +15,7 @@ import datetime import importlib import sys - -import mock +from unittest import mock try: import opentelemetry diff --git a/packages/google-cloud-bigquery/tests/unit/test_query.py b/packages/google-cloud-bigquery/tests/unit/test_query.py index f511bf28d985..7c36eb75bebb 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_query.py +++ b/packages/google-cloud-bigquery/tests/unit/test_query.py @@ -15,8 +15,7 @@ import datetime import decimal import unittest - -import mock +from unittest import mock class Test_UDFResource(unittest.TestCase): diff --git a/packages/google-cloud-bigquery/tests/unit/test_retry.py b/packages/google-cloud-bigquery/tests/unit/test_retry.py index 2fcb84e21ded..6e533c8497cb 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_retry.py +++ b/packages/google-cloud-bigquery/tests/unit/test_retry.py @@ -13,8 +13,8 @@ # limitations under the License. import unittest +from unittest import mock -import mock import requests.exceptions diff --git a/packages/google-cloud-bigquery/tests/unit/test_schema.py b/packages/google-cloud-bigquery/tests/unit/test_schema.py index 26ec0dfefef4..b17cd028116d 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_schema.py +++ b/packages/google-cloud-bigquery/tests/unit/test_schema.py @@ -16,8 +16,8 @@ from google.cloud.bigquery.standard_sql import StandardSqlStructType from google.cloud.bigquery.schema import PolicyTagList import unittest +from unittest import mock -import mock import pytest diff --git a/packages/google-cloud-bigquery/tests/unit/test_table.py b/packages/google-cloud-bigquery/tests/unit/test_table.py index 00a7f06e68ed..0d549120fb44 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_table.py +++ b/packages/google-cloud-bigquery/tests/unit/test_table.py @@ -19,9 +19,9 @@ import time import types import unittest +from unittest import mock import warnings -import mock import pytest try: From e19bf0e1d3fed38c3feeac84d792f262164637dc Mon Sep 17 00:00:00 2001 From: shollyman Date: Thu, 7 Mar 2024 14:30:13 -0800 Subject: [PATCH 1743/2016] testing: update CI configurations (#1846) * testing: remove unnecessary prerelease targets This PR does two things: * remove unneeded prerelease-deps configs for removed and nonexisting CI targets * fixes the continuous prerelease-deps-3.12 config --- .../{prerelease-deps-3.11.cfg => prerelease-deps-3.12.cfg} | 2 +- .../.kokoro/continuous/prerelease-deps-3.8.cfg | 7 ------- .../.kokoro/continuous/prerelease-deps.cfg | 7 ------- .../.kokoro/presubmit/prerelease-deps-3.8.cfg | 7 ------- .../.kokoro/presubmit/prerelease-deps.cfg | 7 ------- 5 files changed, 1 insertion(+), 29 deletions(-) rename packages/google-cloud-bigquery/.kokoro/continuous/{prerelease-deps-3.11.cfg => prerelease-deps-3.12.cfg} (77%) delete mode 100644 packages/google-cloud-bigquery/.kokoro/continuous/prerelease-deps-3.8.cfg delete mode 100644 packages/google-cloud-bigquery/.kokoro/continuous/prerelease-deps.cfg delete mode 100644 packages/google-cloud-bigquery/.kokoro/presubmit/prerelease-deps-3.8.cfg delete mode 100644 packages/google-cloud-bigquery/.kokoro/presubmit/prerelease-deps.cfg diff --git a/packages/google-cloud-bigquery/.kokoro/continuous/prerelease-deps-3.11.cfg b/packages/google-cloud-bigquery/.kokoro/continuous/prerelease-deps-3.12.cfg similarity index 77% rename from packages/google-cloud-bigquery/.kokoro/continuous/prerelease-deps-3.11.cfg rename to packages/google-cloud-bigquery/.kokoro/continuous/prerelease-deps-3.12.cfg index 1e19f1239870..ece962a1703e 100644 --- a/packages/google-cloud-bigquery/.kokoro/continuous/prerelease-deps-3.11.cfg +++ b/packages/google-cloud-bigquery/.kokoro/continuous/prerelease-deps-3.12.cfg @@ -3,5 +3,5 @@ # Only run this nox session. env_vars: { key: "NOX_SESSION" - value: "prerelease_deps-3.11" + value: "prerelease_deps-3.12" } diff --git a/packages/google-cloud-bigquery/.kokoro/continuous/prerelease-deps-3.8.cfg b/packages/google-cloud-bigquery/.kokoro/continuous/prerelease-deps-3.8.cfg deleted file mode 100644 index fabe3e347f7b..000000000000 --- a/packages/google-cloud-bigquery/.kokoro/continuous/prerelease-deps-3.8.cfg +++ /dev/null @@ -1,7 +0,0 @@ -# Format: //devtools/kokoro/config/proto/build.proto - -# Only run this nox session. -env_vars: { - key: "NOX_SESSION" - value: "prerelease_deps-3.8" -} diff --git a/packages/google-cloud-bigquery/.kokoro/continuous/prerelease-deps.cfg b/packages/google-cloud-bigquery/.kokoro/continuous/prerelease-deps.cfg deleted file mode 100644 index 3595fb43f5c0..000000000000 --- a/packages/google-cloud-bigquery/.kokoro/continuous/prerelease-deps.cfg +++ /dev/null @@ -1,7 +0,0 @@ -# Format: //devtools/kokoro/config/proto/build.proto - -# Only run this nox session. -env_vars: { - key: "NOX_SESSION" - value: "prerelease_deps" -} diff --git a/packages/google-cloud-bigquery/.kokoro/presubmit/prerelease-deps-3.8.cfg b/packages/google-cloud-bigquery/.kokoro/presubmit/prerelease-deps-3.8.cfg deleted file mode 100644 index fabe3e347f7b..000000000000 --- a/packages/google-cloud-bigquery/.kokoro/presubmit/prerelease-deps-3.8.cfg +++ /dev/null @@ -1,7 +0,0 @@ -# Format: //devtools/kokoro/config/proto/build.proto - -# Only run this nox session. -env_vars: { - key: "NOX_SESSION" - value: "prerelease_deps-3.8" -} diff --git a/packages/google-cloud-bigquery/.kokoro/presubmit/prerelease-deps.cfg b/packages/google-cloud-bigquery/.kokoro/presubmit/prerelease-deps.cfg deleted file mode 100644 index 3595fb43f5c0..000000000000 --- a/packages/google-cloud-bigquery/.kokoro/presubmit/prerelease-deps.cfg +++ /dev/null @@ -1,7 +0,0 @@ -# Format: //devtools/kokoro/config/proto/build.proto - -# Only run this nox session. -env_vars: { - key: "NOX_SESSION" - value: "prerelease_deps" -} From 1a7d458dae9b950b61af5201208256e9a5ec8d40 Mon Sep 17 00:00:00 2001 From: Anthonios Partheniou Date: Fri, 8 Mar 2024 08:34:40 -0500 Subject: [PATCH 1744/2016] fix: add google-auth as a direct dependency (#1809) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * fix: add google-auth as a direct dependency * update constraints * fix(deps): Require `google-api-core>=1.34.1, >=2.11.0` * 🦉 Updates from OwlBot post-processor See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md --------- Co-authored-by: Lingqing Gan Co-authored-by: Chalmer Lowe Co-authored-by: Owl Bot --- .../.kokoro/continuous/prerelease-deps.cfg | 7 +++++++ .../.kokoro/presubmit/prerelease-deps.cfg | 7 +++++++ packages/google-cloud-bigquery/setup.py | 3 ++- 3 files changed, 16 insertions(+), 1 deletion(-) create mode 100644 packages/google-cloud-bigquery/.kokoro/continuous/prerelease-deps.cfg create mode 100644 packages/google-cloud-bigquery/.kokoro/presubmit/prerelease-deps.cfg diff --git a/packages/google-cloud-bigquery/.kokoro/continuous/prerelease-deps.cfg b/packages/google-cloud-bigquery/.kokoro/continuous/prerelease-deps.cfg new file mode 100644 index 000000000000..3595fb43f5c0 --- /dev/null +++ b/packages/google-cloud-bigquery/.kokoro/continuous/prerelease-deps.cfg @@ -0,0 +1,7 @@ +# Format: //devtools/kokoro/config/proto/build.proto + +# Only run this nox session. +env_vars: { + key: "NOX_SESSION" + value: "prerelease_deps" +} diff --git a/packages/google-cloud-bigquery/.kokoro/presubmit/prerelease-deps.cfg b/packages/google-cloud-bigquery/.kokoro/presubmit/prerelease-deps.cfg new file mode 100644 index 000000000000..3595fb43f5c0 --- /dev/null +++ b/packages/google-cloud-bigquery/.kokoro/presubmit/prerelease-deps.cfg @@ -0,0 +1,7 @@ +# Format: //devtools/kokoro/config/proto/build.proto + +# Only run this nox session. +env_vars: { + key: "NOX_SESSION" + value: "prerelease_deps" +} diff --git a/packages/google-cloud-bigquery/setup.py b/packages/google-cloud-bigquery/setup.py index 9fbc91ecbf8c..5a35f4136e8a 100644 --- a/packages/google-cloud-bigquery/setup.py +++ b/packages/google-cloud-bigquery/setup.py @@ -32,7 +32,8 @@ # NOTE: Maintainers, please do not require google-api-core>=2.x.x # Until this issue is closed # https://github.com/googleapis/google-cloud-python/issues/10566 - "google-api-core >= 1.31.5, <3.0.0dev,!=2.0.*,!=2.1.*,!=2.2.*,!=2.3.0", + "google-api-core[grpc] >= 1.34.1, <3.0.0dev,!=2.0.*,!=2.1.*,!=2.2.*,!=2.3.*,!=2.4.*,!=2.5.*,!=2.6.*,!=2.7.*,!=2.8.*,!=2.9.*,!=2.10.*", + "google-auth >= 2.14.1, <3.0.0dev", # NOTE: Maintainers, please do not require google-cloud-core>=2.x.x # Until this issue is closed # https://github.com/googleapis/google-cloud-python/issues/10566 From 2cb529482643a941470fe8a15750a3b787a590cb Mon Sep 17 00:00:00 2001 From: shollyman Date: Fri, 8 Mar 2024 15:43:02 -0800 Subject: [PATCH 1745/2016] fix: supplementary fix to env-based universe resolution (#1847) * fix: promote env-based universe into client option parsing * lint * add client test * import --------- Co-authored-by: Chalmer Lowe --- .../google/cloud/bigquery/client.py | 25 +++++++++---------- .../tests/unit/test_client.py | 12 +++++++++ 2 files changed, 24 insertions(+), 13 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py index cb4daa897bcb..408e7e49c3c3 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py @@ -249,20 +249,19 @@ def __init__( bq_host = _get_bigquery_host() kw_args["api_endpoint"] = bq_host if bq_host != _DEFAULT_HOST else None client_universe = None - if client_options: - if isinstance(client_options, dict): - client_options = google.api_core.client_options.from_dict( - client_options + if client_options is None: + client_options = {} + if isinstance(client_options, dict): + client_options = google.api_core.client_options.from_dict(client_options) + if client_options.api_endpoint: + api_endpoint = client_options.api_endpoint + kw_args["api_endpoint"] = api_endpoint + else: + client_universe = _get_client_universe(client_options) + if client_universe != _DEFAULT_UNIVERSE: + kw_args["api_endpoint"] = _DEFAULT_HOST_TEMPLATE.replace( + "{UNIVERSE_DOMAIN}", client_universe ) - if client_options.api_endpoint: - api_endpoint = client_options.api_endpoint - kw_args["api_endpoint"] = api_endpoint - else: - client_universe = _get_client_universe(client_options) - if client_universe != _DEFAULT_UNIVERSE: - kw_args["api_endpoint"] = _DEFAULT_HOST_TEMPLATE.replace( - "{UNIVERSE_DOMAIN}", client_universe - ) # Ensure credentials and universe are not in conflict. if hasattr(self, "_credentials") and client_universe is not None: _validate_universe(client_universe, self._credentials) diff --git a/packages/google-cloud-bigquery/tests/unit/test_client.py b/packages/google-cloud-bigquery/tests/unit/test_client.py index be8bef03cd7a..d20712a8a5f1 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_client.py +++ b/packages/google-cloud-bigquery/tests/unit/test_client.py @@ -23,6 +23,7 @@ import itertools import json import operator +import os import unittest from unittest import mock import warnings @@ -171,6 +172,17 @@ def test_ctor_w_empty_client_options(self): client._connection.API_BASE_URL, client._connection.DEFAULT_API_ENDPOINT ) + @mock.patch.dict(os.environ, {"GOOGLE_CLOUD_UNIVERSE_DOMAIN": "foo.com"}) + def test_ctor_w_only_env_universe(self): + creds = _make_credentials() + http = object() + client = self._make_one( + project=self.PROJECT, + credentials=creds, + _http=http, + ) + self.assertEqual(client._connection.API_BASE_URL, "https://bigquery.foo.com") + def test_ctor_w_client_options_dict(self): creds = _make_credentials() http = object() From eccc1050b87346fc8c316bcc069b1a4b20776a4c Mon Sep 17 00:00:00 2001 From: Mend Renovate Date: Mon, 11 Mar 2024 16:56:52 +0100 Subject: [PATCH 1746/2016] chore(deps): update all dependencies (#1835) * chore(deps): update all dependencies * Update samples/magics/requirements-test.txt * Update samples/notebooks/requirements.txt * Update samples/magics/requirements.txt update. * Update samples/desktopapp/requirements-test.txt * Update samples/geography/requirements-test.txt * Update samples/notebooks/requirements-test.txt * Update samples/snippets/requirements-test.txt --------- Co-authored-by: Chalmer Lowe --- .../samples/desktopapp/requirements-test.txt | 3 +- .../samples/desktopapp/requirements.txt | 2 +- .../samples/geography/requirements-test.txt | 3 +- .../samples/geography/requirements.txt | 34 +++++++++---------- .../samples/magics/requirements-test.txt | 3 +- .../samples/magics/requirements.txt | 4 +-- .../samples/notebooks/requirements-test.txt | 3 +- .../samples/notebooks/requirements.txt | 6 ++-- .../samples/snippets/requirements-test.txt | 3 +- .../samples/snippets/requirements.txt | 2 +- 10 files changed, 34 insertions(+), 29 deletions(-) diff --git a/packages/google-cloud-bigquery/samples/desktopapp/requirements-test.txt b/packages/google-cloud-bigquery/samples/desktopapp/requirements-test.txt index fc926cd7cb08..99d27b06aabf 100644 --- a/packages/google-cloud-bigquery/samples/desktopapp/requirements-test.txt +++ b/packages/google-cloud-bigquery/samples/desktopapp/requirements-test.txt @@ -1,3 +1,4 @@ google-cloud-testutils==1.4.0 -pytest==7.4.3 +pytest==7.4.4; python_version == '3.7' +pytest==8.1.1; python_version >= '3.8' mock==5.1.0 diff --git a/packages/google-cloud-bigquery/samples/desktopapp/requirements.txt b/packages/google-cloud-bigquery/samples/desktopapp/requirements.txt index 8d82d4930e48..78074bbcaaf2 100644 --- a/packages/google-cloud-bigquery/samples/desktopapp/requirements.txt +++ b/packages/google-cloud-bigquery/samples/desktopapp/requirements.txt @@ -1,2 +1,2 @@ -google-cloud-bigquery==3.14.1 +google-cloud-bigquery==3.18.0 google-auth-oauthlib==1.2.0 diff --git a/packages/google-cloud-bigquery/samples/geography/requirements-test.txt b/packages/google-cloud-bigquery/samples/geography/requirements-test.txt index 7749d1f9499c..a91fa2d551c7 100644 --- a/packages/google-cloud-bigquery/samples/geography/requirements-test.txt +++ b/packages/google-cloud-bigquery/samples/geography/requirements-test.txt @@ -1,2 +1,3 @@ -pytest==7.4.3 +pytest==7.4.4; python_version == '3.7' +pytest==8.1.1; python_version >= '3.8' mock==5.1.0 diff --git a/packages/google-cloud-bigquery/samples/geography/requirements.txt b/packages/google-cloud-bigquery/samples/geography/requirements.txt index 47e7cc56ee43..c85bf06d00ca 100644 --- a/packages/google-cloud-bigquery/samples/geography/requirements.txt +++ b/packages/google-cloud-bigquery/samples/geography/requirements.txt @@ -1,5 +1,5 @@ -attrs==23.1.0 -certifi==2023.11.17 +attrs==23.2.0 +certifi==2024.2.2 cffi===1.15.1; python_version == '3.7' cffi==1.16.0; python_version >= '3.8' charset-normalizer==3.3.2 @@ -8,43 +8,43 @@ click-plugins==1.1.1 cligj==0.7.2 dataclasses==0.8; python_version < '3.7' db-dtypes==1.2.0 -Fiona==1.9.5 +Fiona==1.9.6 geojson==3.1.0 geopandas===0.10.2; python_version == '3.7' geopandas===0.13.2; python_version == '3.8' -geopandas==0.14.1; python_version >= '3.9' -google-api-core==2.15.0 -google-auth==2.25.2 -google-cloud-bigquery==3.14.1 +geopandas==0.14.3; python_version >= '3.9' +google-api-core==2.17.1 +google-auth==2.28.2 +google-cloud-bigquery==3.18.0 google-cloud-bigquery-storage==2.24.0 google-cloud-core==2.4.1 google-crc32c==1.5.0 google-resumable-media==2.7.0 -googleapis-common-protos==1.62.0 -grpcio==1.60.0 +googleapis-common-protos==1.63.0 +grpcio==1.62.1 idna==3.6 munch==4.0.0 mypy-extensions==1.0.0 -packaging==23.2 +packaging==24.0 pandas===1.3.5; python_version == '3.7' pandas===2.0.3; python_version == '3.8' -pandas==2.1.0; python_version >= '3.9' +pandas==2.2.1; python_version >= '3.9' proto-plus==1.23.0 pyarrow==12.0.1; python_version == '3.7' -pyarrow==14.0.1; python_version >= '3.8' +pyarrow==15.0.1; python_version >= '3.8' pyasn1==0.5.1 pyasn1-modules==0.3.0 pycparser==2.21 -pyparsing==3.1.1 -python-dateutil==2.8.2 -pytz==2023.3.post1 +pyparsing==3.1.2 +python-dateutil==2.9.0.post0 +pytz==2024.1 PyYAML==6.0.1 requests==2.31.0 rsa==4.9 -Shapely==2.0.2 +Shapely==2.0.3 six==1.16.0 typing-extensions===4.7.1; python_version == '3.7' -typing-extensions==4.9.0; python_version >= '3.8' +typing-extensions==4.10.0; python_version >= '3.8' typing-inspect==0.9.0 urllib3===1.26.18; python_version == '3.7' urllib3==2.2.1; python_version >= '3.8' diff --git a/packages/google-cloud-bigquery/samples/magics/requirements-test.txt b/packages/google-cloud-bigquery/samples/magics/requirements-test.txt index fc926cd7cb08..99d27b06aabf 100644 --- a/packages/google-cloud-bigquery/samples/magics/requirements-test.txt +++ b/packages/google-cloud-bigquery/samples/magics/requirements-test.txt @@ -1,3 +1,4 @@ google-cloud-testutils==1.4.0 -pytest==7.4.3 +pytest==7.4.4; python_version == '3.7' +pytest==8.1.1; python_version >= '3.8' mock==5.1.0 diff --git a/packages/google-cloud-bigquery/samples/magics/requirements.txt b/packages/google-cloud-bigquery/samples/magics/requirements.txt index 869d3b4d5580..ea987358f7fa 100644 --- a/packages/google-cloud-bigquery/samples/magics/requirements.txt +++ b/packages/google-cloud-bigquery/samples/magics/requirements.txt @@ -1,9 +1,9 @@ db-dtypes==1.2.0 -google.cloud.bigquery==3.14.1 +google.cloud.bigquery==3.18.0 google-cloud-bigquery-storage==2.24.0 ipython===7.31.1; python_version == '3.7' ipython===8.0.1; python_version == '3.8' ipython==8.18.1; python_version >= '3.9' pandas===1.3.5; python_version == '3.7' pandas===2.0.3; python_version == '3.8' -pandas==2.1.0; python_version >= '3.9' +pandas==2.2.1; python_version >= '3.9' diff --git a/packages/google-cloud-bigquery/samples/notebooks/requirements-test.txt b/packages/google-cloud-bigquery/samples/notebooks/requirements-test.txt index fc926cd7cb08..99d27b06aabf 100644 --- a/packages/google-cloud-bigquery/samples/notebooks/requirements-test.txt +++ b/packages/google-cloud-bigquery/samples/notebooks/requirements-test.txt @@ -1,3 +1,4 @@ google-cloud-testutils==1.4.0 -pytest==7.4.3 +pytest==7.4.4; python_version == '3.7' +pytest==8.1.1; python_version >= '3.8' mock==5.1.0 diff --git a/packages/google-cloud-bigquery/samples/notebooks/requirements.txt b/packages/google-cloud-bigquery/samples/notebooks/requirements.txt index e8839e1fed9e..5ce95818e199 100644 --- a/packages/google-cloud-bigquery/samples/notebooks/requirements.txt +++ b/packages/google-cloud-bigquery/samples/notebooks/requirements.txt @@ -1,12 +1,12 @@ db-dtypes==1.2.0 -google-cloud-bigquery==3.14.1 +google-cloud-bigquery==3.18.0 google-cloud-bigquery-storage==2.24.0 ipython===7.31.1; python_version == '3.7' ipython===8.0.1; python_version == '3.8' ipython==8.18.1; python_version >= '3.9' matplotlib===3.5.3; python_version == '3.7' matplotlib===3.7.4; python_version == '3.8' -matplotlib==3.8.2; python_version >= '3.9' +matplotlib==3.8.3; python_version >= '3.9' pandas===1.3.5; python_version == '3.7' pandas===2.0.3; python_version == '3.8' -pandas==2.1.0; python_version >= '3.9' +pandas==2.2.1; python_version >= '3.9' diff --git a/packages/google-cloud-bigquery/samples/snippets/requirements-test.txt b/packages/google-cloud-bigquery/samples/snippets/requirements-test.txt index fc926cd7cb08..99d27b06aabf 100644 --- a/packages/google-cloud-bigquery/samples/snippets/requirements-test.txt +++ b/packages/google-cloud-bigquery/samples/snippets/requirements-test.txt @@ -1,3 +1,4 @@ google-cloud-testutils==1.4.0 -pytest==7.4.3 +pytest==7.4.4; python_version == '3.7' +pytest==8.1.1; python_version >= '3.8' mock==5.1.0 diff --git a/packages/google-cloud-bigquery/samples/snippets/requirements.txt b/packages/google-cloud-bigquery/samples/snippets/requirements.txt index 365d584c7bca..fc0a2ef360ce 100644 --- a/packages/google-cloud-bigquery/samples/snippets/requirements.txt +++ b/packages/google-cloud-bigquery/samples/snippets/requirements.txt @@ -1 +1 @@ -google-cloud-bigquery==3.14.1 \ No newline at end of file +google-cloud-bigquery==3.18.0 \ No newline at end of file From 364d17cae1760d812a387165aa9cd331344fdcda Mon Sep 17 00:00:00 2001 From: "release-please[bot]" <55107282+release-please[bot]@users.noreply.github.com> Date: Mon, 11 Mar 2024 12:30:59 -0400 Subject: [PATCH 1747/2016] chore(main): release 3.19.0 (#1840) Co-authored-by: release-please[bot] <55107282+release-please[bot]@users.noreply.github.com> --- packages/google-cloud-bigquery/CHANGELOG.md | 17 +++++++++++++++++ .../google/cloud/bigquery/version.py | 2 +- 2 files changed, 18 insertions(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/CHANGELOG.md b/packages/google-cloud-bigquery/CHANGELOG.md index 350787512595..4cb0e1d20bdf 100644 --- a/packages/google-cloud-bigquery/CHANGELOG.md +++ b/packages/google-cloud-bigquery/CHANGELOG.md @@ -5,6 +5,23 @@ [1]: https://pypi.org/project/google-cloud-bigquery/#history +## [3.19.0](https://github.com/googleapis/python-bigquery/compare/v3.18.0...v3.19.0) (2024-03-11) + + +### Features + +* Support RANGE query parameters ([#1827](https://github.com/googleapis/python-bigquery/issues/1827)) ([b359a9a](https://github.com/googleapis/python-bigquery/commit/b359a9a55936a759a36aa69c5e5b014685e1fca6)) +* Support range sql ([#1807](https://github.com/googleapis/python-bigquery/issues/1807)) ([86a45c9](https://github.com/googleapis/python-bigquery/commit/86a45c989836b34dca456bac014352e55d6f86c0)) + + +### Bug Fixes + +* Add google-auth as a direct dependency ([713ce2c](https://github.com/googleapis/python-bigquery/commit/713ce2c2f6ce9931f67cbbcd63ad436ad336ad26)) +* Augment universe_domain handling ([#1837](https://github.com/googleapis/python-bigquery/issues/1837)) ([53c2cbf](https://github.com/googleapis/python-bigquery/commit/53c2cbf98d2961f553747514de273bcd5c117f0e)) +* **deps:** Require google-api-core>=1.34.1, >=2.11.0 ([713ce2c](https://github.com/googleapis/python-bigquery/commit/713ce2c2f6ce9931f67cbbcd63ad436ad336ad26)) +* Supplementary fix to env-based universe resolution ([#1844](https://github.com/googleapis/python-bigquery/issues/1844)) ([b818992](https://github.com/googleapis/python-bigquery/commit/b8189929b6008f7780214822062f8ed05d8d2a01)) +* Supplementary fix to env-based universe resolution ([#1847](https://github.com/googleapis/python-bigquery/issues/1847)) ([6dff50f](https://github.com/googleapis/python-bigquery/commit/6dff50f4fbc5aeb644383a4050dd5ffc05015ffe)) + ## [3.18.0](https://github.com/googleapis/python-bigquery/compare/v3.17.2...v3.18.0) (2024-02-29) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/version.py b/packages/google-cloud-bigquery/google/cloud/bigquery/version.py index 89024cc08a0c..27f24bd196b6 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/version.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/version.py @@ -12,4 +12,4 @@ # See the License for the specific language governing permissions and # limitations under the License. -__version__ = "3.18.0" +__version__ = "3.19.0" From 769b64bfcffb6348af6f1d35202c07917bc5611b Mon Sep 17 00:00:00 2001 From: Lingqing Gan Date: Mon, 11 Mar 2024 10:04:17 -0700 Subject: [PATCH 1748/2016] fix: correct type checking (#1848) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Correct the way we check whether `self._done_timeout` is an instance of `object` class or not. Fixes #1838 🦕 --- .../google-cloud-bigquery/google/cloud/bigquery/job/query.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/job/query.py b/packages/google-cloud-bigquery/google/cloud/bigquery/job/query.py index e45a46894ce4..83d2751ce17f 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/job/query.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/job/query.py @@ -1409,9 +1409,9 @@ def _reload_query_results( # Python_API_core, as part of a major rewrite of the deadline, timeout, # retry process sets the timeout value as a Python object(). # Our system does not natively handle that and instead expects - # either none or a numeric value. If passed a Python object, convert to + # either None or a numeric value. If passed a Python object, convert to # None. - if isinstance(self._done_timeout, object): # pragma: NO COVER + if type(self._done_timeout) is object: # pragma: NO COVER self._done_timeout = None if self._done_timeout is not None: # pragma: NO COVER From 4031ee209e5ff98116883a9d3b3fdee02de40272 Mon Sep 17 00:00:00 2001 From: Mend Renovate Date: Tue, 12 Mar 2024 13:33:12 +0100 Subject: [PATCH 1749/2016] chore(deps): update all dependencies (#1849) * chore(deps): update all dependencies * Update samples/desktopapp/requirements-test.txt * Update samples/geography/requirements-test.txt * Update samples/magics/requirements-test.txt * Update samples/magics/requirements.txt * Update samples/notebooks/requirements-test.txt * Update samples/notebooks/requirements.txt * Update samples/snippets/requirements-test.txt --------- Co-authored-by: Chalmer Lowe --- .../samples/desktopapp/requirements-test.txt | 2 +- .../google-cloud-bigquery/samples/desktopapp/requirements.txt | 2 +- .../samples/geography/requirements-test.txt | 2 +- .../google-cloud-bigquery/samples/geography/requirements.txt | 2 +- .../samples/magics/requirements-test.txt | 2 +- .../google-cloud-bigquery/samples/magics/requirements.txt | 4 ++-- .../samples/notebooks/requirements-test.txt | 2 +- .../google-cloud-bigquery/samples/notebooks/requirements.txt | 4 ++-- .../samples/snippets/requirements-test.txt | 2 +- .../google-cloud-bigquery/samples/snippets/requirements.txt | 2 +- 10 files changed, 12 insertions(+), 12 deletions(-) diff --git a/packages/google-cloud-bigquery/samples/desktopapp/requirements-test.txt b/packages/google-cloud-bigquery/samples/desktopapp/requirements-test.txt index 99d27b06aabf..9142d4905434 100644 --- a/packages/google-cloud-bigquery/samples/desktopapp/requirements-test.txt +++ b/packages/google-cloud-bigquery/samples/desktopapp/requirements-test.txt @@ -1,4 +1,4 @@ google-cloud-testutils==1.4.0 -pytest==7.4.4; python_version == '3.7' +pytest===7.4.4; python_version == '3.7' pytest==8.1.1; python_version >= '3.8' mock==5.1.0 diff --git a/packages/google-cloud-bigquery/samples/desktopapp/requirements.txt b/packages/google-cloud-bigquery/samples/desktopapp/requirements.txt index 78074bbcaaf2..8561934dcca5 100644 --- a/packages/google-cloud-bigquery/samples/desktopapp/requirements.txt +++ b/packages/google-cloud-bigquery/samples/desktopapp/requirements.txt @@ -1,2 +1,2 @@ -google-cloud-bigquery==3.18.0 +google-cloud-bigquery==3.19.0 google-auth-oauthlib==1.2.0 diff --git a/packages/google-cloud-bigquery/samples/geography/requirements-test.txt b/packages/google-cloud-bigquery/samples/geography/requirements-test.txt index a91fa2d551c7..f052969d31b8 100644 --- a/packages/google-cloud-bigquery/samples/geography/requirements-test.txt +++ b/packages/google-cloud-bigquery/samples/geography/requirements-test.txt @@ -1,3 +1,3 @@ -pytest==7.4.4; python_version == '3.7' +pytest===7.4.4; python_version == '3.7' pytest==8.1.1; python_version >= '3.8' mock==5.1.0 diff --git a/packages/google-cloud-bigquery/samples/geography/requirements.txt b/packages/google-cloud-bigquery/samples/geography/requirements.txt index c85bf06d00ca..b474e252c128 100644 --- a/packages/google-cloud-bigquery/samples/geography/requirements.txt +++ b/packages/google-cloud-bigquery/samples/geography/requirements.txt @@ -15,7 +15,7 @@ geopandas===0.13.2; python_version == '3.8' geopandas==0.14.3; python_version >= '3.9' google-api-core==2.17.1 google-auth==2.28.2 -google-cloud-bigquery==3.18.0 +google-cloud-bigquery==3.19.0 google-cloud-bigquery-storage==2.24.0 google-cloud-core==2.4.1 google-crc32c==1.5.0 diff --git a/packages/google-cloud-bigquery/samples/magics/requirements-test.txt b/packages/google-cloud-bigquery/samples/magics/requirements-test.txt index 99d27b06aabf..9142d4905434 100644 --- a/packages/google-cloud-bigquery/samples/magics/requirements-test.txt +++ b/packages/google-cloud-bigquery/samples/magics/requirements-test.txt @@ -1,4 +1,4 @@ google-cloud-testutils==1.4.0 -pytest==7.4.4; python_version == '3.7' +pytest===7.4.4; python_version == '3.7' pytest==8.1.1; python_version >= '3.8' mock==5.1.0 diff --git a/packages/google-cloud-bigquery/samples/magics/requirements.txt b/packages/google-cloud-bigquery/samples/magics/requirements.txt index ea987358f7fa..9179db067f66 100644 --- a/packages/google-cloud-bigquery/samples/magics/requirements.txt +++ b/packages/google-cloud-bigquery/samples/magics/requirements.txt @@ -1,9 +1,9 @@ db-dtypes==1.2.0 -google.cloud.bigquery==3.18.0 +google.cloud.bigquery==3.19.0 google-cloud-bigquery-storage==2.24.0 ipython===7.31.1; python_version == '3.7' ipython===8.0.1; python_version == '3.8' -ipython==8.18.1; python_version >= '3.9' +ipython===8.18.1; python_version >= '3.9' pandas===1.3.5; python_version == '3.7' pandas===2.0.3; python_version == '3.8' pandas==2.2.1; python_version >= '3.9' diff --git a/packages/google-cloud-bigquery/samples/notebooks/requirements-test.txt b/packages/google-cloud-bigquery/samples/notebooks/requirements-test.txt index 99d27b06aabf..9142d4905434 100644 --- a/packages/google-cloud-bigquery/samples/notebooks/requirements-test.txt +++ b/packages/google-cloud-bigquery/samples/notebooks/requirements-test.txt @@ -1,4 +1,4 @@ google-cloud-testutils==1.4.0 -pytest==7.4.4; python_version == '3.7' +pytest===7.4.4; python_version == '3.7' pytest==8.1.1; python_version >= '3.8' mock==5.1.0 diff --git a/packages/google-cloud-bigquery/samples/notebooks/requirements.txt b/packages/google-cloud-bigquery/samples/notebooks/requirements.txt index 5ce95818e199..8f2e9362045a 100644 --- a/packages/google-cloud-bigquery/samples/notebooks/requirements.txt +++ b/packages/google-cloud-bigquery/samples/notebooks/requirements.txt @@ -1,9 +1,9 @@ db-dtypes==1.2.0 -google-cloud-bigquery==3.18.0 +google-cloud-bigquery==3.19.0 google-cloud-bigquery-storage==2.24.0 ipython===7.31.1; python_version == '3.7' ipython===8.0.1; python_version == '3.8' -ipython==8.18.1; python_version >= '3.9' +ipython===8.18.1; python_version >= '3.9' matplotlib===3.5.3; python_version == '3.7' matplotlib===3.7.4; python_version == '3.8' matplotlib==3.8.3; python_version >= '3.9' diff --git a/packages/google-cloud-bigquery/samples/snippets/requirements-test.txt b/packages/google-cloud-bigquery/samples/snippets/requirements-test.txt index 99d27b06aabf..9142d4905434 100644 --- a/packages/google-cloud-bigquery/samples/snippets/requirements-test.txt +++ b/packages/google-cloud-bigquery/samples/snippets/requirements-test.txt @@ -1,4 +1,4 @@ google-cloud-testutils==1.4.0 -pytest==7.4.4; python_version == '3.7' +pytest===7.4.4; python_version == '3.7' pytest==8.1.1; python_version >= '3.8' mock==5.1.0 diff --git a/packages/google-cloud-bigquery/samples/snippets/requirements.txt b/packages/google-cloud-bigquery/samples/snippets/requirements.txt index fc0a2ef360ce..b3347499f35f 100644 --- a/packages/google-cloud-bigquery/samples/snippets/requirements.txt +++ b/packages/google-cloud-bigquery/samples/snippets/requirements.txt @@ -1 +1 @@ -google-cloud-bigquery==3.18.0 \ No newline at end of file +google-cloud-bigquery==3.19.0 \ No newline at end of file From 68291ff84b48d89e1a3778d6293b0a6bc7842898 Mon Sep 17 00:00:00 2001 From: shollyman Date: Tue, 12 Mar 2024 09:01:49 -0700 Subject: [PATCH 1750/2016] testing: unhook prerelease-deps-3.12 from presubmit (#1851) Testing for prerelease-deps is done within continuous. Co-authored-by: Chalmer Lowe --- .../.kokoro/presubmit/prerelease-deps-3.12.cfg | 7 ------- 1 file changed, 7 deletions(-) delete mode 100644 packages/google-cloud-bigquery/.kokoro/presubmit/prerelease-deps-3.12.cfg diff --git a/packages/google-cloud-bigquery/.kokoro/presubmit/prerelease-deps-3.12.cfg b/packages/google-cloud-bigquery/.kokoro/presubmit/prerelease-deps-3.12.cfg deleted file mode 100644 index ece962a1703e..000000000000 --- a/packages/google-cloud-bigquery/.kokoro/presubmit/prerelease-deps-3.12.cfg +++ /dev/null @@ -1,7 +0,0 @@ -# Format: //devtools/kokoro/config/proto/build.proto - -# Only run this nox session. -env_vars: { - key: "NOX_SESSION" - value: "prerelease_deps-3.12" -} From cfe81368e2e1292feb70e77aeeae4f45601e000b Mon Sep 17 00:00:00 2001 From: "gcf-owl-bot[bot]" <78513119+gcf-owl-bot[bot]@users.noreply.github.com> Date: Fri, 15 Mar 2024 10:32:03 -0700 Subject: [PATCH 1751/2016] chore(python): add requirements for docs build (#1858) Source-Link: https://github.com/googleapis/synthtool/commit/85c23b6bc4352c1b0674848eaeb4e48645aeda6b Post-Processor: gcr.io/cloud-devrel-public-resources/owlbot-python:latest@sha256:3741fd1f5f5150378563c76afa06bcc12777b5fe54c5ee01115218f83872134f Co-authored-by: Owl Bot --- .../.github/.OwlBot.lock.yaml | 4 +- .../google-cloud-bigquery/.kokoro/build.sh | 7 ---- .../.kokoro/docker/docs/Dockerfile | 4 ++ .../.kokoro/docker/docs/requirements.in | 1 + .../.kokoro/docker/docs/requirements.txt | 38 +++++++++++++++++++ 5 files changed, 45 insertions(+), 9 deletions(-) create mode 100644 packages/google-cloud-bigquery/.kokoro/docker/docs/requirements.in create mode 100644 packages/google-cloud-bigquery/.kokoro/docker/docs/requirements.txt diff --git a/packages/google-cloud-bigquery/.github/.OwlBot.lock.yaml b/packages/google-cloud-bigquery/.github/.OwlBot.lock.yaml index e4e943e0259a..5d9542b1cb21 100644 --- a/packages/google-cloud-bigquery/.github/.OwlBot.lock.yaml +++ b/packages/google-cloud-bigquery/.github/.OwlBot.lock.yaml @@ -13,5 +13,5 @@ # limitations under the License. docker: image: gcr.io/cloud-devrel-public-resources/owlbot-python:latest - digest: sha256:98f3afd11308259de6e828e37376d18867fd321aba07826e29e4f8d9cab56bad -# created: 2024-02-27T15:56:18.442440378Z + digest: sha256:3741fd1f5f5150378563c76afa06bcc12777b5fe54c5ee01115218f83872134f +# created: 2024-03-15T16:26:15.743347415Z diff --git a/packages/google-cloud-bigquery/.kokoro/build.sh b/packages/google-cloud-bigquery/.kokoro/build.sh index 0cb0d0dd0add..f38bda804393 100755 --- a/packages/google-cloud-bigquery/.kokoro/build.sh +++ b/packages/google-cloud-bigquery/.kokoro/build.sh @@ -33,13 +33,6 @@ export GOOGLE_APPLICATION_CREDENTIALS=${KOKORO_GFILE_DIR}/service-account.json # Setup project id. export PROJECT_ID=$(cat "${KOKORO_GFILE_DIR}/project-id.json") -# Remove old nox -python3 -m pip uninstall --yes --quiet nox-automation - -# Install nox -python3 -m pip install --upgrade --quiet nox -python3 -m nox --version - # If this is a continuous build, send the test log to the FlakyBot. # See https://github.com/googleapis/repo-automation-bots/tree/main/packages/flakybot. if [[ $KOKORO_BUILD_ARTIFACTS_SUBDIR = *"continuous"* ]]; then diff --git a/packages/google-cloud-bigquery/.kokoro/docker/docs/Dockerfile b/packages/google-cloud-bigquery/.kokoro/docker/docs/Dockerfile index 8e39a2cc438d..bdaf39fe22d0 100644 --- a/packages/google-cloud-bigquery/.kokoro/docker/docs/Dockerfile +++ b/packages/google-cloud-bigquery/.kokoro/docker/docs/Dockerfile @@ -80,4 +80,8 @@ RUN wget -O /tmp/get-pip.py 'https://bootstrap.pypa.io/get-pip.py' \ # Test pip RUN python3 -m pip +# Install build requirements +COPY requirements.txt /requirements.txt +RUN python3 -m pip install --require-hashes -r requirements.txt + CMD ["python3.8"] diff --git a/packages/google-cloud-bigquery/.kokoro/docker/docs/requirements.in b/packages/google-cloud-bigquery/.kokoro/docker/docs/requirements.in new file mode 100644 index 000000000000..816817c672a1 --- /dev/null +++ b/packages/google-cloud-bigquery/.kokoro/docker/docs/requirements.in @@ -0,0 +1 @@ +nox diff --git a/packages/google-cloud-bigquery/.kokoro/docker/docs/requirements.txt b/packages/google-cloud-bigquery/.kokoro/docker/docs/requirements.txt new file mode 100644 index 000000000000..0e5d70f20f83 --- /dev/null +++ b/packages/google-cloud-bigquery/.kokoro/docker/docs/requirements.txt @@ -0,0 +1,38 @@ +# +# This file is autogenerated by pip-compile with Python 3.9 +# by the following command: +# +# pip-compile --allow-unsafe --generate-hashes requirements.in +# +argcomplete==3.2.3 \ + --hash=sha256:bf7900329262e481be5a15f56f19736b376df6f82ed27576fa893652c5de6c23 \ + --hash=sha256:c12355e0494c76a2a7b73e3a59b09024ca0ba1e279fb9ed6c1b82d5b74b6a70c + # via nox +colorlog==6.8.2 \ + --hash=sha256:3e3e079a41feb5a1b64f978b5ea4f46040a94f11f0e8bbb8261e3dbbeca64d44 \ + --hash=sha256:4dcbb62368e2800cb3c5abd348da7e53f6c362dda502ec27c560b2e58a66bd33 + # via nox +distlib==0.3.8 \ + --hash=sha256:034db59a0b96f8ca18035f36290806a9a6e6bd9d1ff91e45a7f172eb17e51784 \ + --hash=sha256:1530ea13e350031b6312d8580ddb6b27a104275a31106523b8f123787f494f64 + # via virtualenv +filelock==3.13.1 \ + --hash=sha256:521f5f56c50f8426f5e03ad3b281b490a87ef15bc6c526f168290f0c7148d44e \ + --hash=sha256:57dbda9b35157b05fb3e58ee91448612eb674172fab98ee235ccb0b5bee19a1c + # via virtualenv +nox==2024.3.2 \ + --hash=sha256:e53514173ac0b98dd47585096a55572fe504fecede58ced708979184d05440be \ + --hash=sha256:f521ae08a15adbf5e11f16cb34e8d0e6ea521e0b92868f684e91677deb974553 + # via -r requirements.in +packaging==24.0 \ + --hash=sha256:2ddfb553fdf02fb784c234c7ba6ccc288296ceabec964ad2eae3777778130bc5 \ + --hash=sha256:eb82c5e3e56209074766e6885bb04b8c38a0c015d0a30036ebe7ece34c9989e9 + # via nox +platformdirs==4.2.0 \ + --hash=sha256:0614df2a2f37e1a662acbd8e2b25b92ccf8632929bc6d43467e17fe89c75e068 \ + --hash=sha256:ef0cc731df711022c174543cb70a9b5bd22e5a9337c8624ef2c2ceb8ddad8768 + # via virtualenv +virtualenv==20.25.1 \ + --hash=sha256:961c026ac520bac5f69acb8ea063e8a4f071bcc9457b9c1f28f6b085c511583a \ + --hash=sha256:e08e13ecdca7a0bd53798f356d5831434afa5b07b93f0abdf0797b7a06ffe197 + # via nox From 504117fbd74d215e7d52b64ad27dbc08e0cb86e2 Mon Sep 17 00:00:00 2001 From: shollyman Date: Fri, 15 Mar 2024 15:07:31 -0700 Subject: [PATCH 1752/2016] testing: reduce python versions in unit testing (#1857) * testing: evaluate reducing versions under unit test * align unit and system versions under test * opt 3.7 back in * widen range of versions --------- Co-authored-by: Lingqing Gan --- packages/google-cloud-bigquery/noxfile.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/noxfile.py b/packages/google-cloud-bigquery/noxfile.py index c31d098b8445..9445f4f7476d 100644 --- a/packages/google-cloud-bigquery/noxfile.py +++ b/packages/google-cloud-bigquery/noxfile.py @@ -38,7 +38,7 @@ DEFAULT_PYTHON_VERSION = "3.8" SYSTEM_TEST_PYTHON_VERSIONS = ["3.8", "3.11", "3.12"] -UNIT_TEST_PYTHON_VERSIONS = ["3.7", "3.8", "3.9", "3.10", "3.11", "3.12"] +UNIT_TEST_PYTHON_VERSIONS = ["3.7", "3.8", "3.12"] CURRENT_DIRECTORY = pathlib.Path(__file__).parent.absolute() # 'docfx' is excluded since it only needs to run in 'docs-presubmit' From 44f073f3cfe54f8b130b071c88a1c4d164eb0e9f Mon Sep 17 00:00:00 2001 From: "gcf-owl-bot[bot]" <78513119+gcf-owl-bot[bot]@users.noreply.github.com> Date: Fri, 15 Mar 2024 16:12:39 -0700 Subject: [PATCH 1753/2016] chore(python): update dependencies in /.kokoro (#1859) Source-Link: https://github.com/googleapis/synthtool/commit/db94845da69ccdfefd7ce55c84e6cfa74829747e Post-Processor: gcr.io/cloud-devrel-public-resources/owlbot-python:latest@sha256:a8a80fc6456e433df53fc2a0d72ca0345db0ddefb409f1b75b118dfd1babd952 Co-authored-by: Owl Bot Co-authored-by: Lingqing Gan --- .../.github/.OwlBot.lock.yaml | 4 +- .../.kokoro/requirements.in | 3 +- .../.kokoro/requirements.txt | 114 ++++++++---------- 3 files changed, 56 insertions(+), 65 deletions(-) diff --git a/packages/google-cloud-bigquery/.github/.OwlBot.lock.yaml b/packages/google-cloud-bigquery/.github/.OwlBot.lock.yaml index 5d9542b1cb21..dc9c56e9dcab 100644 --- a/packages/google-cloud-bigquery/.github/.OwlBot.lock.yaml +++ b/packages/google-cloud-bigquery/.github/.OwlBot.lock.yaml @@ -13,5 +13,5 @@ # limitations under the License. docker: image: gcr.io/cloud-devrel-public-resources/owlbot-python:latest - digest: sha256:3741fd1f5f5150378563c76afa06bcc12777b5fe54c5ee01115218f83872134f -# created: 2024-03-15T16:26:15.743347415Z + digest: sha256:a8a80fc6456e433df53fc2a0d72ca0345db0ddefb409f1b75b118dfd1babd952 +# created: 2024-03-15T16:25:47.905264637Z \ No newline at end of file diff --git a/packages/google-cloud-bigquery/.kokoro/requirements.in b/packages/google-cloud-bigquery/.kokoro/requirements.in index ec867d9fd65a..fff4d9ce0d0a 100644 --- a/packages/google-cloud-bigquery/.kokoro/requirements.in +++ b/packages/google-cloud-bigquery/.kokoro/requirements.in @@ -1,5 +1,5 @@ gcp-docuploader -gcp-releasetool>=1.10.5 # required for compatibility with cryptography>=39.x +gcp-releasetool>=2 # required for compatibility with cryptography>=42.x importlib-metadata typing-extensions twine @@ -8,3 +8,4 @@ setuptools nox>=2022.11.21 # required to remove dependency on py charset-normalizer<3 click<8.1.0 +cryptography>=42.0.5 diff --git a/packages/google-cloud-bigquery/.kokoro/requirements.txt b/packages/google-cloud-bigquery/.kokoro/requirements.txt index bda8e38c4f31..dd61f5f32018 100644 --- a/packages/google-cloud-bigquery/.kokoro/requirements.txt +++ b/packages/google-cloud-bigquery/.kokoro/requirements.txt @@ -93,40 +93,41 @@ colorlog==6.7.0 \ # via # gcp-docuploader # nox -cryptography==42.0.4 \ - --hash=sha256:01911714117642a3f1792c7f376db572aadadbafcd8d75bb527166009c9f1d1b \ - --hash=sha256:0e89f7b84f421c56e7ff69f11c441ebda73b8a8e6488d322ef71746224c20fce \ - --hash=sha256:12d341bd42cdb7d4937b0cabbdf2a94f949413ac4504904d0cdbdce4a22cbf88 \ - --hash=sha256:15a1fb843c48b4a604663fa30af60818cd28f895572386e5f9b8a665874c26e7 \ - --hash=sha256:1cdcdbd117681c88d717437ada72bdd5be9de117f96e3f4d50dab3f59fd9ab20 \ - --hash=sha256:1df6fcbf60560d2113b5ed90f072dc0b108d64750d4cbd46a21ec882c7aefce9 \ - --hash=sha256:3c6048f217533d89f2f8f4f0fe3044bf0b2090453b7b73d0b77db47b80af8dff \ - --hash=sha256:3e970a2119507d0b104f0a8e281521ad28fc26f2820687b3436b8c9a5fcf20d1 \ - --hash=sha256:44a64043f743485925d3bcac548d05df0f9bb445c5fcca6681889c7c3ab12764 \ - --hash=sha256:4e36685cb634af55e0677d435d425043967ac2f3790ec652b2b88ad03b85c27b \ - --hash=sha256:5f8907fcf57392cd917892ae83708761c6ff3c37a8e835d7246ff0ad251d9298 \ - --hash=sha256:69b22ab6506a3fe483d67d1ed878e1602bdd5912a134e6202c1ec672233241c1 \ - --hash=sha256:6bfadd884e7280df24d26f2186e4e07556a05d37393b0f220a840b083dc6a824 \ - --hash=sha256:6d0fbe73728c44ca3a241eff9aefe6496ab2656d6e7a4ea2459865f2e8613257 \ - --hash=sha256:6ffb03d419edcab93b4b19c22ee80c007fb2d708429cecebf1dd3258956a563a \ - --hash=sha256:810bcf151caefc03e51a3d61e53335cd5c7316c0a105cc695f0959f2c638b129 \ - --hash=sha256:831a4b37accef30cccd34fcb916a5d7b5be3cbbe27268a02832c3e450aea39cb \ - --hash=sha256:887623fe0d70f48ab3f5e4dbf234986b1329a64c066d719432d0698522749929 \ - --hash=sha256:a0298bdc6e98ca21382afe914c642620370ce0470a01e1bef6dd9b5354c36854 \ - --hash=sha256:a1327f280c824ff7885bdeef8578f74690e9079267c1c8bd7dc5cc5aa065ae52 \ - --hash=sha256:c1f25b252d2c87088abc8bbc4f1ecbf7c919e05508a7e8628e6875c40bc70923 \ - --hash=sha256:c3a5cbc620e1e17009f30dd34cb0d85c987afd21c41a74352d1719be33380885 \ - --hash=sha256:ce8613beaffc7c14f091497346ef117c1798c202b01153a8cc7b8e2ebaaf41c0 \ - --hash=sha256:d2a27aca5597c8a71abbe10209184e1a8e91c1fd470b5070a2ea60cafec35bcd \ - --hash=sha256:dad9c385ba8ee025bb0d856714f71d7840020fe176ae0229de618f14dae7a6e2 \ - --hash=sha256:db4b65b02f59035037fde0998974d84244a64c3265bdef32a827ab9b63d61b18 \ - --hash=sha256:e09469a2cec88fb7b078e16d4adec594414397e8879a4341c6ace96013463d5b \ - --hash=sha256:e53dc41cda40b248ebc40b83b31516487f7db95ab8ceac1f042626bc43a2f992 \ - --hash=sha256:f1e85a178384bf19e36779d91ff35c7617c885da487d689b05c1366f9933ad74 \ - --hash=sha256:f47be41843200f7faec0683ad751e5ef11b9a56a220d57f300376cd8aba81660 \ - --hash=sha256:fb0cef872d8193e487fc6bdb08559c3aa41b659a7d9be48b2e10747f47863925 \ - --hash=sha256:ffc73996c4fca3d2b6c1c8c12bfd3ad00def8621da24f547626bf06441400449 +cryptography==42.0.5 \ + --hash=sha256:0270572b8bd2c833c3981724b8ee9747b3ec96f699a9665470018594301439ee \ + --hash=sha256:111a0d8553afcf8eb02a4fea6ca4f59d48ddb34497aa8706a6cf536f1a5ec576 \ + --hash=sha256:16a48c23a62a2f4a285699dba2e4ff2d1cff3115b9df052cdd976a18856d8e3d \ + --hash=sha256:1b95b98b0d2af784078fa69f637135e3c317091b615cd0905f8b8a087e86fa30 \ + --hash=sha256:1f71c10d1e88467126f0efd484bd44bca5e14c664ec2ede64c32f20875c0d413 \ + --hash=sha256:2424ff4c4ac7f6b8177b53c17ed5d8fa74ae5955656867f5a8affaca36a27abb \ + --hash=sha256:2bce03af1ce5a5567ab89bd90d11e7bbdff56b8af3acbbec1faded8f44cb06da \ + --hash=sha256:329906dcc7b20ff3cad13c069a78124ed8247adcac44b10bea1130e36caae0b4 \ + --hash=sha256:37dd623507659e08be98eec89323469e8c7b4c1407c85112634ae3dbdb926fdd \ + --hash=sha256:3eaafe47ec0d0ffcc9349e1708be2aaea4c6dd4978d76bf6eb0cb2c13636c6fc \ + --hash=sha256:5e6275c09d2badf57aea3afa80d975444f4be8d3bc58f7f80d2a484c6f9485c8 \ + --hash=sha256:6fe07eec95dfd477eb9530aef5bead34fec819b3aaf6c5bd6d20565da607bfe1 \ + --hash=sha256:7367d7b2eca6513681127ebad53b2582911d1736dc2ffc19f2c3ae49997496bc \ + --hash=sha256:7cde5f38e614f55e28d831754e8a3bacf9ace5d1566235e39d91b35502d6936e \ + --hash=sha256:9481ffe3cf013b71b2428b905c4f7a9a4f76ec03065b05ff499bb5682a8d9ad8 \ + --hash=sha256:98d8dc6d012b82287f2c3d26ce1d2dd130ec200c8679b6213b3c73c08b2b7940 \ + --hash=sha256:a011a644f6d7d03736214d38832e030d8268bcff4a41f728e6030325fea3e400 \ + --hash=sha256:a2913c5375154b6ef2e91c10b5720ea6e21007412f6437504ffea2109b5a33d7 \ + --hash=sha256:a30596bae9403a342c978fb47d9b0ee277699fa53bbafad14706af51fe543d16 \ + --hash=sha256:b03c2ae5d2f0fc05f9a2c0c997e1bc18c8229f392234e8a0194f202169ccd278 \ + --hash=sha256:b6cd2203306b63e41acdf39aa93b86fb566049aeb6dc489b70e34bcd07adca74 \ + --hash=sha256:b7ffe927ee6531c78f81aa17e684e2ff617daeba7f189f911065b2ea2d526dec \ + --hash=sha256:b8cac287fafc4ad485b8a9b67d0ee80c66bf3574f655d3b97ef2e1082360faf1 \ + --hash=sha256:ba334e6e4b1d92442b75ddacc615c5476d4ad55cc29b15d590cc6b86efa487e2 \ + --hash=sha256:ba3e4a42397c25b7ff88cdec6e2a16c2be18720f317506ee25210f6d31925f9c \ + --hash=sha256:c41fb5e6a5fe9ebcd58ca3abfeb51dffb5d83d6775405305bfa8715b76521922 \ + --hash=sha256:cd2030f6650c089aeb304cf093f3244d34745ce0cfcc39f20c6fbfe030102e2a \ + --hash=sha256:cd65d75953847815962c84a4654a84850b2bb4aed3f26fadcc1c13892e1e29f6 \ + --hash=sha256:e4985a790f921508f36f81831817cbc03b102d643b5fcb81cd33df3fa291a1a1 \ + --hash=sha256:e807b3188f9eb0eaa7bbb579b462c5ace579f1cedb28107ce8b48a9f7ad3679e \ + --hash=sha256:f12764b8fffc7a123f641d7d049d382b73f96a34117e0b637b80643169cec8ac \ + --hash=sha256:f8837fe1d6ac4a8052a9a8ddab256bc006242696f03368a4009be7ee3075cdb7 # via + # -r requirements.in # gcp-releasetool # secretstorage distlib==0.3.7 \ @@ -145,9 +146,9 @@ gcp-docuploader==0.6.5 \ --hash=sha256:30221d4ac3e5a2b9c69aa52fdbef68cc3f27d0e6d0d90e220fc024584b8d2318 \ --hash=sha256:b7458ef93f605b9d46a4bf3a8dc1755dad1f31d030c8679edf304e343b347eea # via -r requirements.in -gcp-releasetool==1.16.0 \ - --hash=sha256:27bf19d2e87aaa884096ff941aa3c592c482be3d6a2bfe6f06afafa6af2353e3 \ - --hash=sha256:a316b197a543fd036209d0caba7a8eb4d236d8e65381c80cbc6d7efaa7606d63 +gcp-releasetool==2.0.0 \ + --hash=sha256:3d73480b50ba243f22d7c7ec08b115a30e1c7817c4899781840c26f9c55b8277 \ + --hash=sha256:7aa9fd935ec61e581eb8458ad00823786d91756c25e492f372b2b30962f3c28f # via -r requirements.in google-api-core==2.12.0 \ --hash=sha256:c22e01b1e3c4dcd90998494879612c38d0a3411d1f7b679eb89e2abe3ce1f553 \ @@ -392,29 +393,18 @@ platformdirs==3.11.0 \ --hash=sha256:cf8ee52a3afdb965072dcc652433e0c7e3e40cf5ea1477cd4b3b1d2eb75495b3 \ --hash=sha256:e9d171d00af68be50e9202731309c4e658fd8bc76f55c11c7dd760d023bda68e # via virtualenv -protobuf==3.20.3 \ - --hash=sha256:03038ac1cfbc41aa21f6afcbcd357281d7521b4157926f30ebecc8d4ea59dcb7 \ - --hash=sha256:28545383d61f55b57cf4df63eebd9827754fd2dc25f80c5253f9184235db242c \ - --hash=sha256:2e3427429c9cffebf259491be0af70189607f365c2f41c7c3764af6f337105f2 \ - --hash=sha256:398a9e0c3eaceb34ec1aee71894ca3299605fa8e761544934378bbc6c97de23b \ - --hash=sha256:44246bab5dd4b7fbd3c0c80b6f16686808fab0e4aca819ade6e8d294a29c7050 \ - --hash=sha256:447d43819997825d4e71bf5769d869b968ce96848b6479397e29fc24c4a5dfe9 \ - --hash=sha256:67a3598f0a2dcbc58d02dd1928544e7d88f764b47d4a286202913f0b2801c2e7 \ - --hash=sha256:74480f79a023f90dc6e18febbf7b8bac7508420f2006fabd512013c0c238f454 \ - --hash=sha256:819559cafa1a373b7096a482b504ae8a857c89593cf3a25af743ac9ecbd23480 \ - --hash=sha256:899dc660cd599d7352d6f10d83c95df430a38b410c1b66b407a6b29265d66469 \ - --hash=sha256:8c0c984a1b8fef4086329ff8dd19ac77576b384079247c770f29cc8ce3afa06c \ - --hash=sha256:9aae4406ea63d825636cc11ffb34ad3379335803216ee3a856787bcf5ccc751e \ - --hash=sha256:a7ca6d488aa8ff7f329d4c545b2dbad8ac31464f1d8b1c87ad1346717731e4db \ - --hash=sha256:b6cc7ba72a8850621bfec987cb72623e703b7fe2b9127a161ce61e61558ad905 \ - --hash=sha256:bf01b5720be110540be4286e791db73f84a2b721072a3711efff6c324cdf074b \ - --hash=sha256:c02ce36ec760252242a33967d51c289fd0e1c0e6e5cc9397e2279177716add86 \ - --hash=sha256:d9e4432ff660d67d775c66ac42a67cf2453c27cb4d738fc22cb53b5d84c135d4 \ - --hash=sha256:daa564862dd0d39c00f8086f88700fdbe8bc717e993a21e90711acfed02f2402 \ - --hash=sha256:de78575669dddf6099a8a0f46a27e82a1783c557ccc38ee620ed8cc96d3be7d7 \ - --hash=sha256:e64857f395505ebf3d2569935506ae0dfc4a15cb80dc25261176c784662cdcc4 \ - --hash=sha256:f4bd856d702e5b0d96a00ec6b307b0f51c1982c2bf9c0052cf9019e9a544ba99 \ - --hash=sha256:f4c42102bc82a51108e449cbb32b19b180022941c727bac0cfd50170341f16ee +protobuf==4.25.3 \ + --hash=sha256:19b270aeaa0099f16d3ca02628546b8baefe2955bbe23224aaf856134eccf1e4 \ + --hash=sha256:209ba4cc916bab46f64e56b85b090607a676f66b473e6b762e6f1d9d591eb2e8 \ + --hash=sha256:25b5d0b42fd000320bd7830b349e3b696435f3b329810427a6bcce6a5492cc5c \ + --hash=sha256:7c8daa26095f82482307bc717364e7c13f4f1c99659be82890dcfc215194554d \ + --hash=sha256:c053062984e61144385022e53678fbded7aea14ebb3e0305ae3592fb219ccfa4 \ + --hash=sha256:d4198877797a83cbfe9bffa3803602bbe1625dc30d8a097365dbc762e5790faa \ + --hash=sha256:e3c97a1555fd6388f857770ff8b9703083de6bf1f9274a002a332d65fbb56c8c \ + --hash=sha256:e7cb0ae90dd83727f0c0718634ed56837bfeeee29a5f82a7514c03ee1364c019 \ + --hash=sha256:f0700d54bcf45424477e46a9f0944155b46fb0639d69728739c0e47bab83f2b9 \ + --hash=sha256:f1279ab38ecbfae7e456a108c5c0681e4956d5b1090027c1de0f934dfdb4b35c \ + --hash=sha256:f4f118245c4a087776e0a8408be33cf09f6c547442c00395fbfb116fac2f8ac2 # via # gcp-docuploader # gcp-releasetool @@ -518,7 +508,7 @@ zipp==3.17.0 \ # via importlib-metadata # The following packages are considered to be unsafe in a requirements file: -setuptools==68.2.2 \ - --hash=sha256:4ac1475276d2f1c48684874089fefcd83bd7162ddaafb81fac866ba0db282a87 \ - --hash=sha256:b454a35605876da60632df1a60f736524eb73cc47bbc9f3f1ef1b644de74fd2a +setuptools==69.2.0 \ + --hash=sha256:0ff4183f8f42cd8fa3acea16c45205521a4ef28f73c6391d8a25e92893134f2e \ + --hash=sha256:c21c49fb1042386df081cb5d86759792ab89efca84cf114889191cd09aacc80c # via -r requirements.in From 74031a23a2981d22de62c51f6ce459d9284684c5 Mon Sep 17 00:00:00 2001 From: Mend Renovate Date: Tue, 19 Mar 2024 00:56:11 +0100 Subject: [PATCH 1754/2016] chore(deps): update dependency pyarrow to v15.0.2 (#1861) --- .../google-cloud-bigquery/samples/geography/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/samples/geography/requirements.txt b/packages/google-cloud-bigquery/samples/geography/requirements.txt index b474e252c128..c7a79335853e 100644 --- a/packages/google-cloud-bigquery/samples/geography/requirements.txt +++ b/packages/google-cloud-bigquery/samples/geography/requirements.txt @@ -31,7 +31,7 @@ pandas===2.0.3; python_version == '3.8' pandas==2.2.1; python_version >= '3.9' proto-plus==1.23.0 pyarrow==12.0.1; python_version == '3.7' -pyarrow==15.0.1; python_version >= '3.8' +pyarrow==15.0.2; python_version >= '3.8' pyasn1==0.5.1 pyasn1-modules==0.3.0 pycparser==2.21 From 2ab03258cf146d5bf12dbe80ccbc5a8c1f82a40e Mon Sep 17 00:00:00 2001 From: Chalmer Lowe Date: Tue, 19 Mar 2024 09:51:49 -0400 Subject: [PATCH 1755/2016] fix: update error logging when converting to pyarrow column fails (#1836) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * fix: update error logging when converting to pyarrow column fails * 🦉 Updates from OwlBot post-processor See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md * resolve merge conflict * resolve missing dependency * more tweaks to constraints and requirements re pyarrow * even more tweaks to constraints and requirements re pyarrow * a few more tweaks to constraints and requirements re pyarrow * resolves issue of pyarrow not installing * fix linting issue * update linting and conditionals * update linting and mypy comments * quick tags on several coverage issues related to imports * adds pragma to exception * updates test suite with new test and makes msg explicit * temporarily adding timing code * additional timing test mods * add pragmas to account for several tests * cleaned up some test code * cleaned up some test code * Update a test to include column datatype * update to pytest.raises command * Update tests/unit/test__pandas_helpers.py * 🦉 Updates from OwlBot post-processor See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md * 🦉 Updates from OwlBot post-processor See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md * removed unused variable 'e' --------- Co-authored-by: Owl Bot --- .../google/cloud/bigquery/_pandas_helpers.py | 20 ++++++++++++------- .../google/cloud/bigquery/_pyarrow_helpers.py | 2 +- packages/google-cloud-bigquery/noxfile.py | 15 ++++++++++---- .../samples/desktopapp/requirements-test.txt | 1 + .../samples/snippets/requirements-test.txt | 1 + .../testing/constraints-3.11.txt | 1 + .../testing/constraints-3.12.txt | 1 + .../testing/constraints-3.7.txt | 2 +- .../tests/unit/test__pandas_helpers.py | 17 ++++++++++++++-- .../tests/unit/test_table.py | 4 ++-- 10 files changed, 47 insertions(+), 17 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/_pandas_helpers.py b/packages/google-cloud-bigquery/google/cloud/bigquery/_pandas_helpers.py index e97dda7e5777..9f8dcfde4009 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/_pandas_helpers.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/_pandas_helpers.py @@ -49,10 +49,11 @@ db_dtypes_import_exception = exc date_dtype_name = time_dtype_name = "" # Use '' rather than None because pytype -pyarrow = _versions_helpers.PYARROW_VERSIONS.try_import() +pyarrow = _versions_helpers.PYARROW_VERSIONS.try_import(raise_if_error=True) +from pyarrow import ArrowTypeError # type: ignore # noqa: E402 _BIGNUMERIC_SUPPORT = False -if pyarrow is not None: +if pyarrow is not None: # pragma: NO COVER _BIGNUMERIC_SUPPORT = True try: @@ -302,11 +303,16 @@ def bq_to_arrow_array(series, bq_field): field_type_upper = bq_field.field_type.upper() if bq_field.field_type else "" - if bq_field.mode.upper() == "REPEATED": - return pyarrow.ListArray.from_pandas(series, type=arrow_type) - if field_type_upper in schema._STRUCT_TYPES: - return pyarrow.StructArray.from_pandas(series, type=arrow_type) - return pyarrow.Array.from_pandas(series, type=arrow_type) + try: + if bq_field.mode.upper() == "REPEATED": + return pyarrow.ListArray.from_pandas(series, type=arrow_type) + if field_type_upper in schema._STRUCT_TYPES: + return pyarrow.StructArray.from_pandas(series, type=arrow_type) + return pyarrow.Array.from_pandas(series, type=arrow_type) + except ArrowTypeError: # pragma: NO COVER + msg = f"""Error converting Pandas column with name: "{series.name}" and datatype: "{series.dtype}" to an appropriate pyarrow datatype: Array, ListArray, or StructArray""" + _LOGGER.error(msg) + raise ArrowTypeError(msg) def get_column_or_index(dataframe, name): diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/_pyarrow_helpers.py b/packages/google-cloud-bigquery/google/cloud/bigquery/_pyarrow_helpers.py index 946743eafbb3..06509cc934c7 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/_pyarrow_helpers.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/_pyarrow_helpers.py @@ -49,7 +49,7 @@ def pyarrow_timestamp(): _BQ_TO_ARROW_SCALARS = {} _ARROW_SCALAR_IDS_TO_BQ = {} -if pyarrow: +if pyarrow: # pragma: NO COVER # This dictionary is duplicated in bigquery_storage/test/unite/test_reader.py # When modifying it be sure to update it there as well. # Note(todo!!): type "BIGNUMERIC"'s matching pyarrow type is added in _pandas_helpers.py diff --git a/packages/google-cloud-bigquery/noxfile.py b/packages/google-cloud-bigquery/noxfile.py index 9445f4f7476d..548690afa7b0 100644 --- a/packages/google-cloud-bigquery/noxfile.py +++ b/packages/google-cloud-bigquery/noxfile.py @@ -18,7 +18,6 @@ import os import re import shutil - import nox @@ -66,6 +65,7 @@ def default(session, install_extras=True): Python corresponding to the ``nox`` binary the ``PATH`` can run the tests. """ + constraints_path = str( CURRENT_DIRECTORY / "testing" / f"constraints-{session.python}.txt" ) @@ -86,8 +86,7 @@ def default(session, install_extras=True): install_target = ".[all]" else: install_target = "." - session.install("-e", install_target, "-c", constraints_path) - + session.install("-e", install_target) session.run("python", "-m", "pip", "freeze") # Run py.test against the unit tests. @@ -108,6 +107,7 @@ def default(session, install_extras=True): @nox.session(python=UNIT_TEST_PYTHON_VERSIONS) def unit(session): """Run the unit test suite.""" + default(session) @@ -118,8 +118,11 @@ def unit_noextras(session): # Install optional dependencies that are out-of-date. # https://github.com/googleapis/python-bigquery/issues/933 # There is no pyarrow 1.0.0 package for Python 3.9. + if session.python == UNIT_TEST_PYTHON_VERSIONS[0]: - session.install("pyarrow==1.0.0") + session.install("pyarrow>=3.0.0") + elif session.python == UNIT_TEST_PYTHON_VERSIONS[-1]: + session.install("pyarrow") default(session, install_extras=False) @@ -127,6 +130,7 @@ def unit_noextras(session): @nox.session(python=DEFAULT_PYTHON_VERSION) def mypy(session): """Run type checks with mypy.""" + session.install("-e", ".[all]") session.install(MYPY_VERSION) @@ -147,6 +151,7 @@ def pytype(session): # An indirect dependecy attrs==21.1.0 breaks the check, and installing a less # recent version avoids the error until a possibly better fix is found. # https://github.com/googleapis/python-bigquery/issues/655 + session.install("attrs==20.3.0") session.install("-e", ".[all]") session.install(PYTYPE_VERSION) @@ -206,6 +211,7 @@ def system(session): @nox.session(python=DEFAULT_PYTHON_VERSION) def mypy_samples(session): """Run type checks with mypy.""" + session.install("pytest") for requirements_path in CURRENT_DIRECTORY.glob("samples/*/requirements.txt"): session.install("-r", str(requirements_path)) @@ -283,6 +289,7 @@ def cover(session): This outputs the coverage report aggregating coverage from the unit test runs (not system test runs), and then erases coverage data. """ + session.install("coverage", "pytest-cov") session.run("coverage", "report", "--show-missing", "--fail-under=100") session.run("coverage", "erase") diff --git a/packages/google-cloud-bigquery/samples/desktopapp/requirements-test.txt b/packages/google-cloud-bigquery/samples/desktopapp/requirements-test.txt index 9142d4905434..413a7fd4809e 100644 --- a/packages/google-cloud-bigquery/samples/desktopapp/requirements-test.txt +++ b/packages/google-cloud-bigquery/samples/desktopapp/requirements-test.txt @@ -2,3 +2,4 @@ google-cloud-testutils==1.4.0 pytest===7.4.4; python_version == '3.7' pytest==8.1.1; python_version >= '3.8' mock==5.1.0 +pyarrow>=3.0.0 \ No newline at end of file diff --git a/packages/google-cloud-bigquery/samples/snippets/requirements-test.txt b/packages/google-cloud-bigquery/samples/snippets/requirements-test.txt index 9142d4905434..413a7fd4809e 100644 --- a/packages/google-cloud-bigquery/samples/snippets/requirements-test.txt +++ b/packages/google-cloud-bigquery/samples/snippets/requirements-test.txt @@ -2,3 +2,4 @@ google-cloud-testutils==1.4.0 pytest===7.4.4; python_version == '3.7' pytest==8.1.1; python_version >= '3.8' mock==5.1.0 +pyarrow>=3.0.0 \ No newline at end of file diff --git a/packages/google-cloud-bigquery/testing/constraints-3.11.txt b/packages/google-cloud-bigquery/testing/constraints-3.11.txt index e69de29bb2d1..e80ca0ccfd79 100644 --- a/packages/google-cloud-bigquery/testing/constraints-3.11.txt +++ b/packages/google-cloud-bigquery/testing/constraints-3.11.txt @@ -0,0 +1 @@ +pyarrow>=3.0.0 \ No newline at end of file diff --git a/packages/google-cloud-bigquery/testing/constraints-3.12.txt b/packages/google-cloud-bigquery/testing/constraints-3.12.txt index e69de29bb2d1..e80ca0ccfd79 100644 --- a/packages/google-cloud-bigquery/testing/constraints-3.12.txt +++ b/packages/google-cloud-bigquery/testing/constraints-3.12.txt @@ -0,0 +1 @@ +pyarrow>=3.0.0 \ No newline at end of file diff --git a/packages/google-cloud-bigquery/testing/constraints-3.7.txt b/packages/google-cloud-bigquery/testing/constraints-3.7.txt index 28787adb72d9..1fc7c6838c0e 100644 --- a/packages/google-cloud-bigquery/testing/constraints-3.7.txt +++ b/packages/google-cloud-bigquery/testing/constraints-3.7.txt @@ -27,7 +27,7 @@ packaging==20.0.0 pandas==1.1.0 proto-plus==1.22.0 protobuf==3.19.5 -pyarrow==3.0.0 +pyarrow>=3.0.0 python-dateutil==2.7.3 requests==2.21.0 Shapely==1.8.4 diff --git a/packages/google-cloud-bigquery/tests/unit/test__pandas_helpers.py b/packages/google-cloud-bigquery/tests/unit/test__pandas_helpers.py index abee39065cce..24438462013c 100644 --- a/packages/google-cloud-bigquery/tests/unit/test__pandas_helpers.py +++ b/packages/google-cloud-bigquery/tests/unit/test__pandas_helpers.py @@ -53,6 +53,7 @@ if pyarrow: import pyarrow.parquet import pyarrow.types + from pyarrow import ArrowTypeError # type: ignore # noqa: E402 else: # pragma: NO COVER # Mock out pyarrow when missing, because methods from pyarrow.types are # used in test parameterization. @@ -557,13 +558,25 @@ def test_bq_to_arrow_array_w_pandas_timestamp(module_under_test, bq_type, rows): @pytest.mark.skipif(isinstance(pyarrow, mock.Mock), reason="Requires `pyarrow`") def test_bq_to_arrow_array_w_arrays(module_under_test): rows = [[1, 2, 3], [], [4, 5, 6]] - series = pandas.Series(rows, dtype="object") + series = pandas.Series(rows, name="test_col", dtype="object") bq_field = schema.SchemaField("field_name", "INTEGER", mode="REPEATED") arrow_array = module_under_test.bq_to_arrow_array(series, bq_field) roundtrip = arrow_array.to_pylist() assert rows == roundtrip +@pytest.mark.skipif(pandas is None, reason="Requires `pandas`") +@pytest.mark.skipif(pyarrow is None, reason="Requires `pyarrow`") +def test_bq_to_arrow_array_w_conversion_fail(module_under_test): # pragma: NO COVER + rows = [[1, 2, 3], [], [4, 5, 6]] + series = pandas.Series(rows, name="test_col", dtype="object") + bq_field = schema.SchemaField("field_name", "STRING", mode="REPEATED") + exc_msg = f"""Error converting Pandas column with name: "{series.name}" and datatype: "{series.dtype}" to an appropriate pyarrow datatype: Array, ListArray, or StructArray""" + with pytest.raises(ArrowTypeError, match=exc_msg): + module_under_test.bq_to_arrow_array(series, bq_field) + raise ArrowTypeError(exc_msg) + + @pytest.mark.parametrize("bq_type", ["RECORD", "record", "STRUCT", "struct"]) @pytest.mark.skipif(pandas is None, reason="Requires `pandas`") @pytest.mark.skipif(isinstance(pyarrow, mock.Mock), reason="Requires `pyarrow`") @@ -573,7 +586,7 @@ def test_bq_to_arrow_array_w_structs(module_under_test, bq_type): None, {"int_col": 456, "string_col": "def"}, ] - series = pandas.Series(rows, dtype="object") + series = pandas.Series(rows, name="test_col", dtype="object") bq_field = schema.SchemaField( "field_name", bq_type, diff --git a/packages/google-cloud-bigquery/tests/unit/test_table.py b/packages/google-cloud-bigquery/tests/unit/test_table.py index 0d549120fb44..a8107ee970ba 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_table.py +++ b/packages/google-cloud-bigquery/tests/unit/test_table.py @@ -49,7 +49,7 @@ pyarrow = _versions_helpers.PYARROW_VERSIONS.try_import() -if pyarrow: +if pyarrow: # pragma: NO COVER import pyarrow.types try: @@ -3743,7 +3743,7 @@ def test_to_dataframe_w_dtypes_mapper(self): if hasattr(pandas, "Float64Dtype"): self.assertEqual(list(df.miles), [1.77, 6.66, 2.0]) self.assertEqual(df.miles.dtype.name, "Float64") - else: + else: # pragma: NO COVER self.assertEqual(list(df.miles), ["1.77", "6.66", "2.0"]) self.assertEqual(df.miles.dtype.name, "string") From 311c76c59ef0a975e97c1f8f48c7c70802a9ea59 Mon Sep 17 00:00:00 2001 From: Mend Renovate Date: Thu, 21 Mar 2024 13:16:00 +0100 Subject: [PATCH 1756/2016] chore(deps): update dependency google-auth to v2.29.0 (#1865) --- .../google-cloud-bigquery/samples/geography/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/samples/geography/requirements.txt b/packages/google-cloud-bigquery/samples/geography/requirements.txt index c7a79335853e..1cb20b102792 100644 --- a/packages/google-cloud-bigquery/samples/geography/requirements.txt +++ b/packages/google-cloud-bigquery/samples/geography/requirements.txt @@ -14,7 +14,7 @@ geopandas===0.10.2; python_version == '3.7' geopandas===0.13.2; python_version == '3.8' geopandas==0.14.3; python_version >= '3.9' google-api-core==2.17.1 -google-auth==2.28.2 +google-auth==2.29.0 google-cloud-bigquery==3.19.0 google-cloud-bigquery-storage==2.24.0 google-cloud-core==2.4.1 From 1ae6e964f5fb968559be22f1a6164d419a946526 Mon Sep 17 00:00:00 2001 From: Mend Renovate Date: Fri, 22 Mar 2024 11:42:22 +0100 Subject: [PATCH 1757/2016] chore(deps): update dependency google-api-core to v2.18.0 (#1866) --- .../google-cloud-bigquery/samples/geography/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/samples/geography/requirements.txt b/packages/google-cloud-bigquery/samples/geography/requirements.txt index 1cb20b102792..6fa7ffc7ea09 100644 --- a/packages/google-cloud-bigquery/samples/geography/requirements.txt +++ b/packages/google-cloud-bigquery/samples/geography/requirements.txt @@ -13,7 +13,7 @@ geojson==3.1.0 geopandas===0.10.2; python_version == '3.7' geopandas===0.13.2; python_version == '3.8' geopandas==0.14.3; python_version >= '3.9' -google-api-core==2.17.1 +google-api-core==2.18.0 google-auth==2.29.0 google-cloud-bigquery==3.19.0 google-cloud-bigquery-storage==2.24.0 From 0dc8f8bfa7c607eed584de0ee5f98380b2e581a3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tim=20Swe=C3=B1a=20=28Swast=29?= Date: Wed, 27 Mar 2024 09:58:18 -0500 Subject: [PATCH 1758/2016] fix: use an allowlist instead of denylist to determine when `query_and_wait` uses `jobs.query` API (#1869) --- .../google/cloud/bigquery/_job_helpers.py | 53 +++++++++++++------ .../tests/unit/test__job_helpers.py | 15 +++++- 2 files changed, 49 insertions(+), 19 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/_job_helpers.py b/packages/google-cloud-bigquery/google/cloud/bigquery/_job_helpers.py index 0692c9b655de..602a49eba2f4 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/_job_helpers.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/_job_helpers.py @@ -400,9 +400,13 @@ def query_and_wait( :class:`~google.cloud.bigquery.job.QueryJobConfig` class. """ + request_body = _to_query_request( + query=query, job_config=job_config, location=location, timeout=api_timeout + ) + # Some API parameters aren't supported by the jobs.query API. In these # cases, fallback to a jobs.insert call. - if not _supported_by_jobs_query(job_config): + if not _supported_by_jobs_query(request_body): return _wait_or_cancel( query_jobs_insert( client=client, @@ -424,9 +428,6 @@ def query_and_wait( ) path = _to_query_path(project) - request_body = _to_query_request( - query=query, job_config=job_config, location=location, timeout=api_timeout - ) if page_size is not None and max_results is not None: request_body["maxResults"] = min(page_size, max_results) @@ -506,20 +507,38 @@ def do_query(): return do_query() -def _supported_by_jobs_query(job_config: Optional[job.QueryJobConfig]) -> bool: +def _supported_by_jobs_query(request_body: Dict[str, Any]) -> bool: """True if jobs.query can be used. False if jobs.insert is needed.""" - if job_config is None: - return True - - return ( - # These features aren't supported by jobs.query. - job_config.clustering_fields is None - and job_config.destination is None - and job_config.destination_encryption_configuration is None - and job_config.range_partitioning is None - and job_config.table_definitions is None - and job_config.time_partitioning is None - ) + request_keys = frozenset(request_body.keys()) + + # Per issue: https://github.com/googleapis/python-bigquery/issues/1867 + # use an allowlist here instead of a denylist because the backend API allows + # unsupported parameters without any warning or failure. Instead, keep this + # set in sync with those in QueryRequest: + # https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs/query#QueryRequest + keys_allowlist = { + "kind", + "query", + "maxResults", + "defaultDataset", + "timeoutMs", + "dryRun", + "preserveNulls", + "useQueryCache", + "useLegacySql", + "parameterMode", + "queryParameters", + "location", + "formatOptions", + "connectionProperties", + "labels", + "maximumBytesBilled", + "requestId", + "createSession", + } + + unsupported_keys = request_keys - keys_allowlist + return len(unsupported_keys) == 0 def _wait_or_cancel( diff --git a/packages/google-cloud-bigquery/tests/unit/test__job_helpers.py b/packages/google-cloud-bigquery/tests/unit/test__job_helpers.py index c30964c576f8..671b829f7c99 100644 --- a/packages/google-cloud-bigquery/tests/unit/test__job_helpers.py +++ b/packages/google-cloud-bigquery/tests/unit/test__job_helpers.py @@ -22,6 +22,7 @@ import pytest from google.cloud.bigquery.client import Client +from google.cloud.bigquery import enums from google.cloud.bigquery import _job_helpers from google.cloud.bigquery.job import copy_ as job_copy from google.cloud.bigquery.job import extract as job_extract @@ -1141,12 +1142,22 @@ def test_make_job_id_w_job_id_overrides_prefix(): False, id="destination_encryption_configuration", ), + # priority="BATCH" is not supported. See: + # https://github.com/googleapis/python-bigquery/issues/1867 + pytest.param( + job_query.QueryJobConfig( + priority=enums.QueryPriority.BATCH, + ), + False, + id="priority=BATCH", + ), ), ) -def test_supported_by_jobs_query( +def test_supported_by_jobs_query_from_queryjobconfig( job_config: Optional[job_query.QueryJobConfig], expected: bool ): - assert _job_helpers._supported_by_jobs_query(job_config) == expected + request_body = _job_helpers._to_query_request(job_config, query="SELECT 1") + assert _job_helpers._supported_by_jobs_query(request_body) == expected def test_wait_or_cancel_no_exception(): From 8ee7ab7ae46a1b2eb538b6a92c046fe37582172b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tim=20Swe=C3=B1a=20=28Swast=29?= Date: Wed, 27 Mar 2024 10:28:27 -0500 Subject: [PATCH 1759/2016] feat: add `fields` parameter to `set_iam_policy` for consistency with update methods (#1872) --- .../google/cloud/bigquery/client.py | 79 ++++++++++++++++++- .../snippets/create_iam_policy_test.py | 44 +++++++++++ .../tests/system/test_client.py | 28 ------- .../tests/unit/test_client.py | 67 ++++++++++++++++ 4 files changed, 188 insertions(+), 30 deletions(-) create mode 100644 packages/google-cloud-bigquery/samples/snippets/create_iam_policy_test.py diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py index 408e7e49c3c3..5521e2e1e19d 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py @@ -882,6 +882,35 @@ def get_iam_policy( retry: retries.Retry = DEFAULT_RETRY, timeout: TimeoutType = DEFAULT_TIMEOUT, ) -> Policy: + """Return the access control policy for a table resource. + + Args: + table (Union[ \ + google.cloud.bigquery.table.Table, \ + google.cloud.bigquery.table.TableReference, \ + google.cloud.bigquery.table.TableListItem, \ + str, \ + ]): + The table to get the access control policy for. + If a string is passed in, this method attempts to create a + table reference from a string using + :func:`~google.cloud.bigquery.table.TableReference.from_string`. + requested_policy_version (int): + Optional. The maximum policy version that will be used to format the policy. + + Only version ``1`` is currently supported. + + See: https://cloud.google.com/bigquery/docs/reference/rest/v2/GetPolicyOptions + retry (Optional[google.api_core.retry.Retry]): + How to retry the RPC. + timeout (Optional[float]): + The number of seconds to wait for the underlying HTTP transport + before using ``retry``. + + Returns: + google.api_core.iam.Policy: + The access control policy. + """ table = _table_arg_to_table_ref(table, default_project=self.project) if requested_policy_version != 1: @@ -910,7 +939,53 @@ def set_iam_policy( updateMask: Optional[str] = None, retry: retries.Retry = DEFAULT_RETRY, timeout: TimeoutType = DEFAULT_TIMEOUT, + *, + fields: Sequence[str] = (), ) -> Policy: + """Return the access control policy for a table resource. + + Args: + table (Union[ \ + google.cloud.bigquery.table.Table, \ + google.cloud.bigquery.table.TableReference, \ + google.cloud.bigquery.table.TableListItem, \ + str, \ + ]): + The table to get the access control policy for. + If a string is passed in, this method attempts to create a + table reference from a string using + :func:`~google.cloud.bigquery.table.TableReference.from_string`. + policy (google.api_core.iam.Policy): + The access control policy to set. + updateMask (Optional[str]): + Mask as defined by + https://cloud.google.com/bigquery/docs/reference/rest/v2/tables/setIamPolicy#body.request_body.FIELDS.update_mask + + Incompatible with ``fields``. + retry (Optional[google.api_core.retry.Retry]): + How to retry the RPC. + timeout (Optional[float]): + The number of seconds to wait for the underlying HTTP transport + before using ``retry``. + fields (Sequence[str]): + Which properties to set on the policy. See: + https://cloud.google.com/bigquery/docs/reference/rest/v2/tables/setIamPolicy#body.request_body.FIELDS.update_mask + + Incompatible with ``updateMask``. + + Returns: + google.api_core.iam.Policy: + The updated access control policy. + """ + if updateMask is not None and not fields: + update_mask = updateMask + elif updateMask is not None and fields: + raise ValueError("Cannot set both fields and updateMask") + elif fields: + update_mask = ",".join(fields) + else: + update_mask = None + table = _table_arg_to_table_ref(table, default_project=self.project) if not isinstance(policy, (Policy)): @@ -918,8 +993,8 @@ def set_iam_policy( body = {"policy": policy.to_api_repr()} - if updateMask is not None: - body["updateMask"] = updateMask + if update_mask is not None: + body["updateMask"] = update_mask path = "{}:setIamPolicy".format(table.path) span_attributes = {"path": path} diff --git a/packages/google-cloud-bigquery/samples/snippets/create_iam_policy_test.py b/packages/google-cloud-bigquery/samples/snippets/create_iam_policy_test.py new file mode 100644 index 000000000000..c41ced2cd36a --- /dev/null +++ b/packages/google-cloud-bigquery/samples/snippets/create_iam_policy_test.py @@ -0,0 +1,44 @@ +# Copyright 2024 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +def test_create_iam_policy(table_id: str): + your_table_id = table_id + + # [START bigquery_create_iam_policy] + from google.cloud import bigquery + + bqclient = bigquery.Client() + + policy = bqclient.get_iam_policy( + your_table_id, # e.g. "project.dataset.table" + ) + + analyst_email = "example-analyst-group@google.com" + binding = { + "role": "roles/bigquery.dataViewer", + "members": {f"group:{analyst_email}"}, + } + policy.bindings.append(binding) + + updated_policy = bqclient.set_iam_policy( + your_table_id, # e.g. "project.dataset.table" + policy, + ) + + for binding in updated_policy.bindings: + print(repr(binding)) + # [END bigquery_create_iam_policy] + + assert binding in updated_policy.bindings diff --git a/packages/google-cloud-bigquery/tests/system/test_client.py b/packages/google-cloud-bigquery/tests/system/test_client.py index 04740de8a09a..414239323e6d 100644 --- a/packages/google-cloud-bigquery/tests/system/test_client.py +++ b/packages/google-cloud-bigquery/tests/system/test_client.py @@ -36,7 +36,6 @@ from google.api_core.exceptions import InternalServerError from google.api_core.exceptions import ServiceUnavailable from google.api_core.exceptions import TooManyRequests -from google.api_core.iam import Policy from google.cloud import bigquery from google.cloud.bigquery.dataset import Dataset from google.cloud.bigquery.dataset import DatasetReference @@ -1485,33 +1484,6 @@ def test_copy_table(self): got_rows = self._fetch_single_page(dest_table) self.assertTrue(len(got_rows) > 0) - def test_get_set_iam_policy(self): - from google.cloud.bigquery.iam import BIGQUERY_DATA_VIEWER_ROLE - - dataset = self.temp_dataset(_make_dataset_id("create_table")) - table_id = "test_table" - table_ref = Table(dataset.table(table_id)) - self.assertFalse(_table_exists(table_ref)) - - table = helpers.retry_403(Config.CLIENT.create_table)(table_ref) - self.to_delete.insert(0, table) - - self.assertTrue(_table_exists(table)) - - member = "serviceAccount:{}".format(Config.CLIENT.get_service_account_email()) - BINDING = { - "role": BIGQUERY_DATA_VIEWER_ROLE, - "members": {member}, - } - - policy = Config.CLIENT.get_iam_policy(table) - self.assertIsInstance(policy, Policy) - self.assertEqual(policy.bindings, []) - - policy.bindings.append(BINDING) - returned_policy = Config.CLIENT.set_iam_policy(table, policy) - self.assertEqual(returned_policy.bindings, policy.bindings) - def test_test_iam_permissions(self): dataset = self.temp_dataset(_make_dataset_id("create_table")) table_id = "test_table" diff --git a/packages/google-cloud-bigquery/tests/unit/test_client.py b/packages/google-cloud-bigquery/tests/unit/test_client.py index d20712a8a5f1..60dcab85ebd1 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_client.py +++ b/packages/google-cloud-bigquery/tests/unit/test_client.py @@ -1782,6 +1782,60 @@ def test_set_iam_policy(self): from google.cloud.bigquery.iam import BIGQUERY_DATA_VIEWER_ROLE from google.api_core.iam import Policy + PATH = "/projects/%s/datasets/%s/tables/%s:setIamPolicy" % ( + self.PROJECT, + self.DS_ID, + self.TABLE_ID, + ) + ETAG = "foo" + VERSION = 1 + OWNER1 = "user:phred@example.com" + OWNER2 = "group:cloud-logs@google.com" + EDITOR1 = "domain:google.com" + EDITOR2 = "user:phred@example.com" + VIEWER1 = "serviceAccount:1234-abcdef@service.example.com" + VIEWER2 = "user:phred@example.com" + BINDINGS = [ + {"role": BIGQUERY_DATA_OWNER_ROLE, "members": [OWNER1, OWNER2]}, + {"role": BIGQUERY_DATA_EDITOR_ROLE, "members": [EDITOR1, EDITOR2]}, + {"role": BIGQUERY_DATA_VIEWER_ROLE, "members": [VIEWER1, VIEWER2]}, + ] + FIELDS = ("bindings", "etag") + RETURNED = {"etag": ETAG, "version": VERSION, "bindings": BINDINGS} + + policy = Policy() + for binding in BINDINGS: + policy[binding["role"]] = binding["members"] + + BODY = {"policy": policy.to_api_repr(), "updateMask": "bindings,etag"} + + creds = _make_credentials() + http = object() + client = self._make_one(project=self.PROJECT, credentials=creds, _http=http) + conn = client._connection = make_connection(RETURNED) + + with mock.patch( + "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" + ) as final_attributes: + returned_policy = client.set_iam_policy( + self.TABLE_REF, policy, fields=FIELDS, timeout=7.5 + ) + + final_attributes.assert_called_once_with({"path": PATH}, client, None) + + conn.api_request.assert_called_once_with( + method="POST", path=PATH, data=BODY, timeout=7.5 + ) + self.assertEqual(returned_policy.etag, ETAG) + self.assertEqual(returned_policy.version, VERSION) + self.assertEqual(dict(returned_policy), dict(policy)) + + def test_set_iam_policy_updateMask(self): + from google.cloud.bigquery.iam import BIGQUERY_DATA_OWNER_ROLE + from google.cloud.bigquery.iam import BIGQUERY_DATA_EDITOR_ROLE + from google.cloud.bigquery.iam import BIGQUERY_DATA_VIEWER_ROLE + from google.api_core.iam import Policy + PATH = "/projects/%s/datasets/%s/tables/%s:setIamPolicy" % ( self.PROJECT, self.DS_ID, @@ -1858,6 +1912,19 @@ def test_set_iam_policy_no_mask(self): method="POST", path=PATH, data=BODY, timeout=7.5 ) + def test_set_ia_policy_updateMask_and_fields(self): + from google.api_core.iam import Policy + + policy = Policy() + creds = _make_credentials() + http = object() + client = self._make_one(project=self.PROJECT, credentials=creds, _http=http) + + with pytest.raises(ValueError, match="updateMask"): + client.set_iam_policy( + self.TABLE_REF, policy, updateMask="bindings", fields=("bindings",) + ) + def test_set_iam_policy_invalid_policy(self): from google.api_core.iam import Policy From b702d9b01c971f45487ed4398133b0459f57c1d7 Mon Sep 17 00:00:00 2001 From: Chalmer Lowe Date: Wed, 27 Mar 2024 12:18:17 -0400 Subject: [PATCH 1760/2016] fix: updates a number of optional dependencies (#1864) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This fix updates a number of optional dependencies. We use a different module import process (pytest.importorskip versus unittest.skipif). This first major commit gets the ball rolling, there are gonna be a few additional commits to cover other files. Fixes # 🦕 --- .../google/cloud/bigquery/_tqdm_helpers.py | 2 +- .../google/cloud/bigquery/client.py | 2 +- packages/google-cloud-bigquery/setup.py | 5 +- .../tests/system/test_client.py | 28 +- .../tests/unit/test_client.py | 172 ++++--- .../tests/unit/test_dbapi__helpers.py | 10 +- .../tests/unit/test_dbapi_connection.py | 28 +- .../tests/unit/test_dbapi_cursor.py | 33 +- .../tests/unit/test_table.py | 436 ++++++++---------- 9 files changed, 311 insertions(+), 405 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/_tqdm_helpers.py b/packages/google-cloud-bigquery/google/cloud/bigquery/_tqdm_helpers.py index 456ca2530051..cb81bd8f69c0 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/_tqdm_helpers.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/_tqdm_helpers.py @@ -67,7 +67,7 @@ def get_progress_bar(progress_bar_type, description, total, unit): ) elif progress_bar_type == "tqdm_gui": return tqdm.tqdm_gui(desc=description, total=total, unit=unit) - except (KeyError, TypeError): + except (KeyError, TypeError): # pragma: NO COVER # Protect ourselves from any tqdm errors. In case of # unexpected tqdm behavior, just fall back to showing # no progress bar. diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py index 5521e2e1e19d..891a54e5c4c2 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py @@ -593,7 +593,7 @@ def _ensure_bqstorage_client( ) return None - if bqstorage_client is None: + if bqstorage_client is None: # pragma: NO COVER bqstorage_client = bigquery_storage.BigQueryReadClient( credentials=self._credentials, client_options=client_options, diff --git a/packages/google-cloud-bigquery/setup.py b/packages/google-cloud-bigquery/setup.py index 5a35f4136e8a..ed9a6351bf61 100644 --- a/packages/google-cloud-bigquery/setup.py +++ b/packages/google-cloud-bigquery/setup.py @@ -45,8 +45,9 @@ ] pyarrow_dependency = "pyarrow >= 3.0.0" extras = { - # Keep the no-op bqstorage extra for backward compatibility. - # See: https://github.com/googleapis/python-bigquery/issues/757 + # bqstorage had a period where it was a required dependency, and has been + # moved back to optional due to bloat. See + # https://github.com/googleapis/python-bigquery/issues/1196 for more background. "bqstorage": [ "google-cloud-bigquery-storage >= 2.6.0, <3.0.0dev", # Due to an issue in pip's dependency resolver, the `grpc` extra is not diff --git a/packages/google-cloud-bigquery/tests/system/test_client.py b/packages/google-cloud-bigquery/tests/system/test_client.py index 414239323e6d..862ef3245bb5 100644 --- a/packages/google-cloud-bigquery/tests/system/test_client.py +++ b/packages/google-cloud-bigquery/tests/system/test_client.py @@ -54,16 +54,6 @@ from . import helpers -try: - from google.cloud import bigquery_storage -except ImportError: # pragma: NO COVER - bigquery_storage = None - -try: - import pyarrow - import pyarrow.types -except ImportError: # pragma: NO COVER - pyarrow = None JOB_TIMEOUT = 120 # 2 minutes DATA_PATH = pathlib.Path(__file__).parent.parent / "data" @@ -1772,11 +1762,10 @@ def test_dbapi_fetchall_from_script(self): row_tuples = [r.values() for r in rows] self.assertEqual(row_tuples, [(5, "foo"), (6, "bar"), (7, "baz")]) - @unittest.skipIf( - bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" - ) - @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_dbapi_fetch_w_bqstorage_client_large_result_set(self): + bigquery_storage = pytest.importorskip("google.cloud.bigquery_storage") + pytest.importorskip("pyarrow") + bqstorage_client = bigquery_storage.BigQueryReadClient( credentials=Config.CLIENT._credentials ) @@ -1834,10 +1823,8 @@ def test_dbapi_dry_run_query(self): self.assertEqual(list(rows), []) - @unittest.skipIf( - bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" - ) def test_dbapi_connection_does_not_leak_sockets(self): + pytest.importorskip("google.cloud.bigquery_storage") current_process = psutil.Process() conn_count_start = len(current_process.connections()) @@ -2382,11 +2369,10 @@ def test_create_table_rows_fetch_nested_schema(self): self.assertEqual(found[7], e_favtime) self.assertEqual(found[8], decimal.Decimal(expected["FavoriteNumber"])) - @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") - @unittest.skipIf( - bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" - ) def test_nested_table_to_arrow(self): + bigquery_storage = pytest.importorskip("google.cloud.bigquery_storage") + pyarrow = pytest.importorskip("pyarrow") + pyarrow.types = pytest.importorskip("pyarrow.types") from google.cloud.bigquery.job import SourceFormat from google.cloud.bigquery.job import WriteDisposition diff --git a/packages/google-cloud-bigquery/tests/unit/test_client.py b/packages/google-cloud-bigquery/tests/unit/test_client.py index 60dcab85ebd1..e9e74b06b43a 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_client.py +++ b/packages/google-cloud-bigquery/tests/unit/test_client.py @@ -32,15 +32,6 @@ import packaging import pytest -try: - import importlib.metadata as metadata -except ImportError: - import importlib_metadata as metadata - -try: - import pandas -except (ImportError, AttributeError): # pragma: NO COVER - pandas = None try: import opentelemetry @@ -59,11 +50,6 @@ msg = "Error importing from opentelemetry, is the installed version compatible?" raise ImportError(msg) from exc -try: - import pyarrow -except (ImportError, AttributeError): # pragma: NO COVER - pyarrow = None - import google.api_core.exceptions from google.api_core import client_info import google.cloud._helpers @@ -75,18 +61,9 @@ from google.cloud.bigquery.retry import DEFAULT_TIMEOUT import google.cloud.bigquery.table -try: - from google.cloud import bigquery_storage -except (ImportError, AttributeError): # pragma: NO COVER - bigquery_storage = None from test_utils.imports import maybe_fail_import from tests.unit.helpers import make_connection -if pandas is not None: - PANDAS_INSTALLED_VERSION = metadata.version("pandas") -else: - PANDAS_INSTALLED_VERSION = "0.0.0" - def _make_credentials(): import google.auth.credentials @@ -800,10 +777,9 @@ def test_get_dataset(self): self.assertEqual(dataset.dataset_id, self.DS_ID) - @unittest.skipIf( - bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" - ) def test_ensure_bqstorage_client_creating_new_instance(self): + bigquery_storage = pytest.importorskip("google.cloud.bigquery_storage") + mock_client = mock.create_autospec(bigquery_storage.BigQueryReadClient) mock_client_instance = object() mock_client.return_value = mock_client_instance @@ -849,10 +825,8 @@ def fail_bqstorage_import(name, globals, locals, fromlist, level): ] assert matching_warnings, "Missing dependency warning not raised." - @unittest.skipIf( - bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" - ) def test_ensure_bqstorage_client_obsolete_dependency(self): + pytest.importorskip("google.cloud.bigquery_storage") creds = _make_credentials() client = self._make_one(project=self.PROJECT, credentials=creds) @@ -869,10 +843,8 @@ def test_ensure_bqstorage_client_obsolete_dependency(self): ] assert matching_warnings, "Obsolete dependency warning not raised." - @unittest.skipIf( - bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" - ) def test_ensure_bqstorage_client_existing_client_check_passes(self): + pytest.importorskip("google.cloud.bigquery_storage") creds = _make_credentials() client = self._make_one(project=self.PROJECT, credentials=creds) mock_storage_client = mock.sentinel.mock_storage_client @@ -883,10 +855,23 @@ def test_ensure_bqstorage_client_existing_client_check_passes(self): self.assertIs(bqstorage_client, mock_storage_client) - @unittest.skipIf( - bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" - ) + def test_ensure_bqstorage_client_is_none(self): + pytest.importorskip("google.cloud.bigquery_storage") + creds = _make_credentials() + client = self._make_one(project=self.PROJECT, credentials=creds) + bqstorage_client = None + + assert bqstorage_client is None + bqstorage_client = client._ensure_bqstorage_client( + bqstorage_client=bqstorage_client, + ) + + assert isinstance( + bqstorage_client, google.cloud.bigquery_storage_v1.BigQueryReadClient + ) + def test_ensure_bqstorage_client_existing_client_check_fails(self): + pytest.importorskip("google.cloud.bigquery_storage") creds = _make_credentials() client = self._make_one(project=self.PROJECT, credentials=creds) mock_storage_client = mock.sentinel.mock_storage_client @@ -972,8 +957,8 @@ def test_create_routine_w_conflict(self): timeout=DEFAULT_TIMEOUT, ) - @unittest.skipIf(opentelemetry is None, "Requires `opentelemetry`") def test_span_status_is_set(self): + pytest.importorskip("opentelemetry") from google.cloud.bigquery.routine import Routine tracer_provider = TracerProvider() @@ -6039,8 +6024,8 @@ def test_insert_rows_w_numeric(self): timeout=DEFAULT_TIMEOUT, ) - @unittest.skipIf(pandas is None, "Requires `pandas`") def test_insert_rows_from_dataframe(self): + pandas = pytest.importorskip("pandas") from google.cloud.bigquery.schema import SchemaField from google.cloud.bigquery.table import Table @@ -6126,8 +6111,8 @@ def test_insert_rows_from_dataframe(self): ) assert call == expected_call - @unittest.skipIf(pandas is None, "Requires `pandas`") def test_insert_rows_from_dataframe_nan(self): + pandas = pytest.importorskip("pandas") from google.cloud.bigquery.schema import SchemaField from google.cloud.bigquery.table import Table @@ -6194,8 +6179,8 @@ def test_insert_rows_from_dataframe_nan(self): ) assert call == expected_call - @unittest.skipIf(pandas is None, "Requires `pandas`") def test_insert_rows_from_dataframe_many_columns(self): + pandas = pytest.importorskip("pandas") from google.cloud.bigquery.schema import SchemaField from google.cloud.bigquery.table import Table @@ -6247,8 +6232,8 @@ def test_insert_rows_from_dataframe_many_columns(self): assert len(actual_calls) == 1 assert actual_calls[0] == expected_call - @unittest.skipIf(pandas is None, "Requires `pandas`") def test_insert_rows_from_dataframe_w_explicit_none_insert_ids(self): + pandas = pytest.importorskip("pandas") from google.cloud.bigquery.schema import SchemaField from google.cloud.bigquery.table import Table @@ -7569,9 +7554,9 @@ def test_load_table_from_file_w_default_load_config(self): project=self.PROJECT, ) - @unittest.skipIf(pandas is None, "Requires `pandas`") - @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_load_table_from_dataframe(self): + pandas = pytest.importorskip("pandas") + pytest.importorskip("pyarrow") from google.cloud.bigquery.client import _DEFAULT_NUM_RETRIES from google.cloud.bigquery import job from google.cloud.bigquery.schema import PolicyTagList, SchemaField @@ -7665,9 +7650,9 @@ def test_load_table_from_dataframe(self): # (not passed in via job_config) assert "description" not in field - @unittest.skipIf(pandas is None, "Requires `pandas`") - @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_load_table_from_dataframe_w_client_location(self): + pandas = pytest.importorskip("pandas") + pytest.importorskip("pyarrow") from google.cloud.bigquery.client import _DEFAULT_NUM_RETRIES from google.cloud.bigquery import job from google.cloud.bigquery.schema import SchemaField @@ -7710,9 +7695,9 @@ def test_load_table_from_dataframe_w_client_location(self): sent_config = load_table_from_file.mock_calls[0][2]["job_config"] assert sent_config.source_format == job.SourceFormat.PARQUET - @unittest.skipIf(pandas is None, "Requires `pandas`") - @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_load_table_from_dataframe_w_custom_job_config_wihtout_source_format(self): + pandas = pytest.importorskip("pandas") + pytest.importorskip("pyarrow") from google.cloud.bigquery.client import _DEFAULT_NUM_RETRIES from google.cloud.bigquery import job from google.cloud.bigquery.schema import SchemaField @@ -7765,9 +7750,9 @@ def test_load_table_from_dataframe_w_custom_job_config_wihtout_source_format(sel # the original config object should not have been modified assert job_config.to_api_repr() == original_config_copy.to_api_repr() - @unittest.skipIf(pandas is None, "Requires `pandas`") - @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_load_table_from_dataframe_w_custom_job_config_w_source_format(self): + pandas = pytest.importorskip("pandas") + pytest.importorskip("pyarrow") from google.cloud.bigquery.client import _DEFAULT_NUM_RETRIES from google.cloud.bigquery import job from google.cloud.bigquery.schema import SchemaField @@ -7821,9 +7806,9 @@ def test_load_table_from_dataframe_w_custom_job_config_w_source_format(self): # the original config object should not have been modified assert job_config.to_api_repr() == original_config_copy.to_api_repr() - @unittest.skipIf(pandas is None, "Requires `pandas`") - @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_load_table_from_dataframe_w_parquet_options_none(self): + pandas = pytest.importorskip("pandas") + pytest.importorskip("pyarrow") from google.cloud.bigquery.client import _DEFAULT_NUM_RETRIES from google.cloud.bigquery import job from google.cloud.bigquery.schema import SchemaField @@ -7873,9 +7858,9 @@ def test_load_table_from_dataframe_w_parquet_options_none(self): sent_config = load_table_from_file.mock_calls[0][2]["job_config"] assert sent_config.parquet_options.enable_list_inference is True - @unittest.skipIf(pandas is None, "Requires `pandas`") - @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_load_table_from_dataframe_w_list_inference_none(self): + pandas = pytest.importorskip("pandas") + pytest.importorskip("pyarrow") from google.cloud.bigquery.client import _DEFAULT_NUM_RETRIES from google.cloud.bigquery import job from google.cloud.bigquery.schema import SchemaField @@ -7933,9 +7918,9 @@ def test_load_table_from_dataframe_w_list_inference_none(self): # the original config object should not have been modified assert job_config.to_api_repr() == original_config_copy.to_api_repr() - @unittest.skipIf(pandas is None, "Requires `pandas`") - @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_load_table_from_dataframe_w_explicit_job_config_override(self): + pandas = pytest.importorskip("pandas") + pytest.importorskip("pyarrow") from google.cloud.bigquery.client import _DEFAULT_NUM_RETRIES from google.cloud.bigquery import job from google.cloud.bigquery.schema import SchemaField @@ -7994,9 +7979,9 @@ def test_load_table_from_dataframe_w_explicit_job_config_override(self): # the original config object should not have been modified assert job_config.to_api_repr() == original_config_copy.to_api_repr() - @unittest.skipIf(pandas is None, "Requires `pandas`") - @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_load_table_from_dataframe_w_default_load_config(self): + pandas = pytest.importorskip("pandas") + pytest.importorskip("pyarrow") from google.cloud.bigquery.client import _DEFAULT_NUM_RETRIES from google.cloud.bigquery import job from google.cloud.bigquery.schema import SchemaField @@ -8044,9 +8029,9 @@ def test_load_table_from_dataframe_w_default_load_config(self): assert sent_config.write_disposition == job.WriteDisposition.WRITE_TRUNCATE assert sent_config.source_format == job.SourceFormat.PARQUET - @unittest.skipIf(pandas is None, "Requires `pandas`") - @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_load_table_from_dataframe_w_list_inference_false(self): + pandas = pytest.importorskip("pandas") + pytest.importorskip("pyarrow") from google.cloud.bigquery.client import _DEFAULT_NUM_RETRIES from google.cloud.bigquery import job from google.cloud.bigquery.schema import SchemaField @@ -8105,9 +8090,9 @@ def test_load_table_from_dataframe_w_list_inference_false(self): # the original config object should not have been modified assert job_config.to_api_repr() == original_config_copy.to_api_repr() - @unittest.skipIf(pandas is None, "Requires `pandas`") - @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_load_table_from_dataframe_w_custom_job_config_w_wrong_source_format(self): + pandas = pytest.importorskip("pandas") + pytest.importorskip("pyarrow") from google.cloud.bigquery import job client = self._make_client() @@ -8125,9 +8110,9 @@ def test_load_table_from_dataframe_w_custom_job_config_w_wrong_source_format(sel assert "Got unexpected source_format:" in str(exc.value) - @unittest.skipIf(pandas is None, "Requires `pandas`") - @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_load_table_from_dataframe_w_automatic_schema(self): + pandas = pytest.importorskip("pandas") + pytest.importorskip("pyarrow") from google.cloud.bigquery.client import _DEFAULT_NUM_RETRIES from google.cloud.bigquery import job from google.cloud.bigquery.schema import SchemaField @@ -8226,9 +8211,9 @@ def test_load_table_from_dataframe_w_automatic_schema(self): SchemaField("time_col", "TIME"), ) - @unittest.skipIf(pandas is None, "Requires `pandas`") - @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_load_table_from_dataframe_w_automatic_schema_detection_fails(self): + pandas = pytest.importorskip("pandas") + pytest.importorskip("pyarrow") from google.cloud.bigquery.client import _DEFAULT_NUM_RETRIES from google.cloud.bigquery import job @@ -8286,9 +8271,9 @@ def test_load_table_from_dataframe_w_automatic_schema_detection_fails(self): assert sent_config.source_format == job.SourceFormat.PARQUET assert sent_config.schema is None - @unittest.skipIf(pandas is None, "Requires `pandas`") - @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_load_table_from_dataframe_w_index_and_auto_schema(self): + pandas = pytest.importorskip("pandas") + pytest.importorskip("pyarrow") from google.cloud.bigquery.client import _DEFAULT_NUM_RETRIES from google.cloud.bigquery import job from google.cloud.bigquery.schema import SchemaField @@ -8348,9 +8333,9 @@ def test_load_table_from_dataframe_w_index_and_auto_schema(self): ] assert sent_schema == expected_sent_schema - @unittest.skipIf(pandas is None, "Requires `pandas`") - @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_load_table_from_dataframe_unknown_table(self): + pandas = pytest.importorskip("pandas") + pytest.importorskip("pyarrow") from google.cloud.bigquery.client import _DEFAULT_NUM_RETRIES client = self._make_client() @@ -8384,9 +8369,9 @@ def test_load_table_from_dataframe_unknown_table(self): timeout=DEFAULT_TIMEOUT, ) - @unittest.skipIf(pandas is None, "Requires `pandas`") - @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_load_table_from_dataframe_w_nullable_int64_datatype(self): + pandas = pytest.importorskip("pandas") + pytest.importorskip("pyarrow") from google.cloud.bigquery.client import _DEFAULT_NUM_RETRIES from google.cloud.bigquery import job from google.cloud.bigquery.schema import SchemaField @@ -8429,9 +8414,8 @@ def test_load_table_from_dataframe_w_nullable_int64_datatype(self): SchemaField("x", "INT64", "NULLABLE", None), ) - @unittest.skipIf(pandas is None, "Requires `pandas`") - # @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_load_table_from_dataframe_w_nullable_int64_datatype_automatic_schema(self): + pandas = pytest.importorskip("pandas") from google.cloud.bigquery.client import _DEFAULT_NUM_RETRIES from google.cloud.bigquery import job from google.cloud.bigquery.schema import SchemaField @@ -8474,9 +8458,9 @@ def test_load_table_from_dataframe_w_nullable_int64_datatype_automatic_schema(se SchemaField("x", "INT64", "NULLABLE", None), ) - @unittest.skipIf(pandas is None, "Requires `pandas`") - @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_load_table_from_dataframe_struct_fields(self): + pandas = pytest.importorskip("pandas") + pytest.importorskip("pyarrow") from google.cloud.bigquery.client import _DEFAULT_NUM_RETRIES from google.cloud.bigquery import job from google.cloud.bigquery.schema import SchemaField @@ -8534,13 +8518,13 @@ def test_load_table_from_dataframe_struct_fields(self): assert sent_config.source_format == job.SourceFormat.PARQUET assert sent_config.schema == schema - @unittest.skipIf(pandas is None, "Requires `pandas`") - @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_load_table_from_dataframe_array_fields(self): """Test that a DataFrame with array columns can be uploaded correctly. See: https://github.com/googleapis/python-bigquery/issues/19 """ + pandas = pytest.importorskip("pandas") + pytest.importorskip("pyarrow") from google.cloud.bigquery.client import _DEFAULT_NUM_RETRIES from google.cloud.bigquery import job from google.cloud.bigquery.schema import SchemaField @@ -8599,13 +8583,13 @@ def test_load_table_from_dataframe_array_fields(self): assert sent_config.source_format == job.SourceFormat.PARQUET assert sent_config.schema == schema - @unittest.skipIf(pandas is None, "Requires `pandas`") - @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_load_table_from_dataframe_array_fields_w_auto_schema(self): """Test that a DataFrame with array columns can be uploaded correctly. See: https://github.com/googleapis/python-bigquery/issues/19 """ + pandas = pytest.importorskip("pandas") + pytest.importorskip("pyarrow") from google.cloud.bigquery.client import _DEFAULT_NUM_RETRIES from google.cloud.bigquery import job from google.cloud.bigquery.schema import SchemaField @@ -8662,9 +8646,9 @@ def test_load_table_from_dataframe_array_fields_w_auto_schema(self): assert sent_config.source_format == job.SourceFormat.PARQUET assert sent_config.schema == expected_schema - @unittest.skipIf(pandas is None, "Requires `pandas`") - @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_load_table_from_dataframe_w_partial_schema(self): + pandas = pytest.importorskip("pandas") + pytest.importorskip("pyarrow") from google.cloud.bigquery.client import _DEFAULT_NUM_RETRIES from google.cloud.bigquery import job from google.cloud.bigquery.schema import SchemaField @@ -8746,9 +8730,9 @@ def test_load_table_from_dataframe_w_partial_schema(self): SchemaField("bytes_col", "BYTES"), ) - @unittest.skipIf(pandas is None, "Requires `pandas`") - @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_load_table_from_dataframe_w_partial_schema_extra_types(self): + pandas = pytest.importorskip("pandas") + pytest.importorskip("pyarrow") from google.cloud.bigquery import job from google.cloud.bigquery.schema import SchemaField @@ -8783,9 +8767,9 @@ def test_load_table_from_dataframe_w_partial_schema_extra_types(self): assert "bq_schema contains fields not present in dataframe" in message assert "unknown_col" in message - @unittest.skipIf(pandas is None, "Requires `pandas`") - @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_load_table_from_dataframe_w_schema_arrow_custom_compression(self): + pandas = pytest.importorskip("pandas") + pytest.importorskip("pyarrow") from google.cloud.bigquery import job from google.cloud.bigquery.schema import SchemaField @@ -8816,9 +8800,9 @@ def test_load_table_from_dataframe_w_schema_arrow_custom_compression(self): assert call_args is not None assert call_args.get("parquet_compression") == "LZ4" - @unittest.skipIf(pandas is None, "Requires `pandas`") - @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_load_table_from_dataframe_wo_pyarrow_raises_error(self): + pandas = pytest.importorskip("pandas") + pytest.importorskip("pyarrow") client = self._make_client() records = [{"id": 1, "age": 100}, {"id": 2, "age": 60}] dataframe = pandas.DataFrame(records) @@ -8846,8 +8830,8 @@ def test_load_table_from_dataframe_wo_pyarrow_raises_error(self): ) def test_load_table_from_dataframe_w_bad_pyarrow_issues_warning(self): - pytest.importorskip("pandas", reason="Requires `pandas`") - pytest.importorskip("pyarrow", reason="Requires `pyarrow`") + pandas = pytest.importorskip("pandas") + pytest.importorskip("pyarrow") client = self._make_client() records = [{"id": 1, "age": 100}, {"id": 2, "age": 60}] @@ -8874,14 +8858,14 @@ def test_load_table_from_dataframe_w_bad_pyarrow_issues_warning(self): location=self.LOCATION, ) - @unittest.skipIf(pandas is None, "Requires `pandas`") - @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_load_table_from_dataframe_w_nulls(self): """Test that a DataFrame with null columns can be uploaded if a BigQuery schema is specified. See: https://github.com/googleapis/google-cloud-python/issues/7370 """ + pandas = pytest.importorskip("pandas") + pytest.importorskip("pyarrow") from google.cloud.bigquery.schema import SchemaField from google.cloud.bigquery.client import _DEFAULT_NUM_RETRIES from google.cloud.bigquery import job @@ -8919,8 +8903,8 @@ def test_load_table_from_dataframe_w_nulls(self): assert sent_config.schema == schema assert sent_config.source_format == job.SourceFormat.PARQUET - @unittest.skipIf(pandas is None, "Requires `pandas`") def test_load_table_from_dataframe_w_invaild_job_config(self): + pandas = pytest.importorskip("pandas") from google.cloud.bigquery import job client = self._make_client() @@ -8937,8 +8921,8 @@ def test_load_table_from_dataframe_w_invaild_job_config(self): err_msg = str(exc.value) assert "Expected an instance of LoadJobConfig" in err_msg - @unittest.skipIf(pandas is None, "Requires `pandas`") def test_load_table_from_dataframe_with_csv_source_format(self): + pandas = pytest.importorskip("pandas") from google.cloud.bigquery.client import _DEFAULT_NUM_RETRIES from google.cloud.bigquery import job from google.cloud.bigquery.schema import SchemaField @@ -8987,9 +8971,9 @@ def test_load_table_from_dataframe_with_csv_source_format(self): sent_config = load_table_from_file.mock_calls[0][2]["job_config"] assert sent_config.source_format == job.SourceFormat.CSV - @unittest.skipIf(pandas is None, "Requires `pandas`") - @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_load_table_from_dataframe_w_higher_scale_decimal128_datatype(self): + pandas = pytest.importorskip("pandas") + pytest.importorskip("pyarrow") from google.cloud.bigquery.client import _DEFAULT_NUM_RETRIES from google.cloud.bigquery import job from google.cloud.bigquery.schema import SchemaField diff --git a/packages/google-cloud-bigquery/tests/unit/test_dbapi__helpers.py b/packages/google-cloud-bigquery/tests/unit/test_dbapi__helpers.py index 542f923d26b2..7e1da0034309 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_dbapi__helpers.py +++ b/packages/google-cloud-bigquery/tests/unit/test_dbapi__helpers.py @@ -21,16 +21,10 @@ import pytest -try: - import pyarrow -except ImportError: # pragma: NO COVER - pyarrow = None - import google.cloud._helpers from google.cloud.bigquery import query, table from google.cloud.bigquery.dbapi import _helpers from google.cloud.bigquery.dbapi import exceptions -from tests.unit.helpers import _to_pyarrow class TestQueryParameters(unittest.TestCase): @@ -215,8 +209,10 @@ def test_empty_iterable(self): result = _helpers.to_bq_table_rows(rows_iterable) self.assertEqual(list(result), []) - @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_non_empty_iterable(self): + pytest.importorskip("pyarrow") + from tests.unit.helpers import _to_pyarrow + rows_iterable = [ dict( one=_to_pyarrow(1.1), diff --git a/packages/google-cloud-bigquery/tests/unit/test_dbapi_connection.py b/packages/google-cloud-bigquery/tests/unit/test_dbapi_connection.py index 88378ec9818e..4071e57e0998 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_dbapi_connection.py +++ b/packages/google-cloud-bigquery/tests/unit/test_dbapi_connection.py @@ -13,14 +13,10 @@ # limitations under the License. import gc +import pytest import unittest from unittest import mock -try: - from google.cloud import bigquery_storage -except ImportError: # pragma: NO COVER - bigquery_storage = None - class TestConnection(unittest.TestCase): @staticmethod @@ -41,6 +37,8 @@ def _mock_client(self): def _mock_bqstorage_client(self): # Assumption: bigquery_storage exists. It's the test's responisbility to # not use this helper or skip itself if bqstorage is not installed. + from google.cloud import bigquery_storage + mock_client = mock.create_autospec(bigquery_storage.BigQueryReadClient) mock_client._transport = mock.Mock(spec=["channel"]) mock_client._transport.grpc_channel = mock.Mock(spec=["close"]) @@ -57,10 +55,8 @@ def test_ctor_wo_bqstorage_client(self): self.assertIs(connection._client, mock_client) self.assertIs(connection._bqstorage_client, None) - @unittest.skipIf( - bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" - ) def test_ctor_w_bqstorage_client(self): + pytest.importorskip("google.cloud.bigquery_storage") from google.cloud.bigquery.dbapi import Connection mock_client = self._mock_client() @@ -89,10 +85,8 @@ def test_connect_wo_client(self, mock_client): self.assertIsNotNone(connection._client) self.assertIsNotNone(connection._bqstorage_client) - @unittest.skipIf( - bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" - ) def test_connect_w_client(self): + pytest.importorskip("google.cloud.bigquery_storage") from google.cloud.bigquery.dbapi import connect from google.cloud.bigquery.dbapi import Connection @@ -107,10 +101,8 @@ def test_connect_w_client(self): self.assertIs(connection._client, mock_client) self.assertIs(connection._bqstorage_client, mock_bqstorage_client) - @unittest.skipIf( - bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" - ) def test_connect_w_both_clients(self): + pytest.importorskip("google.cloud.bigquery_storage") from google.cloud.bigquery.dbapi import connect from google.cloud.bigquery.dbapi import Connection @@ -143,10 +135,8 @@ def test_raises_error_if_closed(self): ): getattr(connection, method)() - @unittest.skipIf( - bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" - ) def test_close_closes_all_created_bigquery_clients(self): + pytest.importorskip("google.cloud.bigquery_storage") client = self._mock_client() bqstorage_client = self._mock_bqstorage_client() @@ -168,10 +158,8 @@ def test_close_closes_all_created_bigquery_clients(self): self.assertTrue(client.close.called) self.assertTrue(bqstorage_client._transport.grpc_channel.close.called) - @unittest.skipIf( - bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" - ) def test_close_does_not_close_bigquery_clients_passed_to_it(self): + pytest.importorskip("google.cloud.bigquery_storage") client = self._mock_client() bqstorage_client = self._mock_bqstorage_client() connection = self._make_one(client=client, bqstorage_client=bqstorage_client) diff --git a/packages/google-cloud-bigquery/tests/unit/test_dbapi_cursor.py b/packages/google-cloud-bigquery/tests/unit/test_dbapi_cursor.py index e9fd2e3dd3c8..6fca4cec0724 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_dbapi_cursor.py +++ b/packages/google-cloud-bigquery/tests/unit/test_dbapi_cursor.py @@ -21,18 +21,8 @@ import google.cloud.bigquery.table as bq_table -try: - import pyarrow -except ImportError: # pragma: NO COVER - pyarrow = None - from google.api_core import exceptions -try: - from google.cloud import bigquery_storage -except ImportError: # pragma: NO COVER - bigquery_storage = None - from tests.unit.helpers import _to_pyarrow @@ -97,6 +87,8 @@ def _mock_client( return mock_client def _mock_bqstorage_client(self, rows=None, stream_count=0): + from google.cloud import bigquery_storage + if rows is None: rows = [] @@ -320,11 +312,9 @@ def test_fetchall_w_row(self): self.assertEqual(len(rows), 1) self.assertEqual(rows[0], (1,)) - @unittest.skipIf( - bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" - ) - @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_fetchall_w_bqstorage_client_fetch_success(self): + pytest.importorskip("google.cloud.bigquery_storage") + pytest.importorskip("pyarrow") from google.cloud.bigquery import dbapi # use unordered data to also test any non-determenistic key order in dicts @@ -380,10 +370,8 @@ def test_fetchall_w_bqstorage_client_fetch_success(self): self.assertEqual(sorted_row_data, expected_row_data) - @unittest.skipIf( - bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" - ) def test_fetchall_w_bqstorage_client_fetch_no_rows(self): + pytest.importorskip("google.cloud.bigquery_storage") from google.cloud.bigquery import dbapi mock_client = self._mock_client( @@ -410,10 +398,8 @@ def test_fetchall_w_bqstorage_client_fetch_no_rows(self): # check the data returned self.assertEqual(rows, []) - @unittest.skipIf( - bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" - ) def test_fetchall_w_bqstorage_client_fetch_error_no_fallback(self): + pytest.importorskip("google.cloud.bigquery_storage") from google.cloud.bigquery import dbapi row_data = [bq_table.Row([1.1, 1.2], {"foo": 0, "bar": 1})] @@ -448,11 +434,10 @@ def fake_ensure_bqstorage_client(bqstorage_client=None, **kwargs): # the default client was not used mock_client.list_rows.assert_not_called() - @unittest.skipIf( - bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" - ) - @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_fetchall_w_bqstorage_client_no_arrow_compression(self): + pytest.importorskip("google.cloud.bigquery_storage") + pytest.importorskip("pyarrow") + from google.cloud import bigquery_storage from google.cloud.bigquery import dbapi # Use unordered data to also test any non-determenistic key order in dicts. diff --git a/packages/google-cloud-bigquery/tests/unit/test_table.py b/packages/google-cloud-bigquery/tests/unit/test_table.py index a8107ee970ba..dbc5948b8cf0 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_table.py +++ b/packages/google-cloud-bigquery/tests/unit/test_table.py @@ -24,11 +24,6 @@ import pytest -try: - import importlib.metadata as metadata -except ImportError: - import importlib_metadata as metadata - import google.api_core.exceptions from test_utils.imports import maybe_fail_import @@ -37,48 +32,6 @@ from google.cloud.bigquery.table import TableReference from google.cloud.bigquery.dataset import DatasetReference -try: - from google.cloud import bigquery_storage - from google.cloud.bigquery_storage_v1.services.big_query_read.transports import ( - grpc as big_query_read_grpc_transport, - ) -except ImportError: # pragma: NO COVER - bigquery_storage = None - big_query_read_grpc_transport = None - - -pyarrow = _versions_helpers.PYARROW_VERSIONS.try_import() - -if pyarrow: # pragma: NO COVER - import pyarrow.types - -try: - import pandas -except (ImportError, AttributeError): # pragma: NO COVER - pandas = None - -try: - import db_dtypes # type: ignore -except ImportError: # pragma: NO COVER - db_dtypes = None - -try: - import geopandas -except (ImportError, AttributeError): # pragma: NO COVER - geopandas = None - -try: - import tqdm - from tqdm.std import TqdmDeprecationWarning - -except (ImportError, AttributeError): # pragma: NO COVER - tqdm = None - -if pandas is not None: - PANDAS_INSTALLED_VERSION = metadata.version("pandas") -else: - PANDAS_INSTALLED_VERSION = "0.0.0" - def _mock_client(): from google.cloud.bigquery import client @@ -1948,6 +1901,8 @@ def test_row(self): class Test_EmptyRowIterator(unittest.TestCase): + PYARROW_MINIMUM_VERSION = str(_versions_helpers._MIN_PYARROW_VERSION) + def _make_one(self): from google.cloud.bigquery.table import _EmptyRowIterator @@ -1963,15 +1918,17 @@ def test_to_arrow_error_if_pyarrow_is_none(self): with self.assertRaises(ValueError): row_iterator.to_arrow() - @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_to_arrow(self): + pyarrow = pytest.importorskip("pyarrow") row_iterator = self._make_one() tbl = row_iterator.to_arrow() self.assertIsInstance(tbl, pyarrow.Table) self.assertEqual(tbl.num_rows, 0) - @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_to_arrow_iterable(self): + pyarrow = pytest.importorskip( + "pyarrow", minversion=self.PYARROW_MINIMUM_VERSION + ) row_iterator = self._make_one() arrow_iter = row_iterator.to_arrow_iterable() @@ -1989,8 +1946,8 @@ def test_to_dataframe_error_if_pandas_is_none(self): with self.assertRaises(ValueError): row_iterator.to_dataframe() - @unittest.skipIf(pandas is None, "Requires `pandas`") def test_to_dataframe(self): + pandas = pytest.importorskip("pandas") row_iterator = self._make_one() df = row_iterator.to_dataframe(create_bqstorage_client=False) self.assertIsInstance(df, pandas.DataFrame) @@ -2002,8 +1959,8 @@ def test_to_dataframe_iterable_error_if_pandas_is_none(self): with self.assertRaises(ValueError): row_iterator.to_dataframe_iterable() - @unittest.skipIf(pandas is None, "Requires `pandas`") def test_to_dataframe_iterable(self): + pandas = pytest.importorskip("pandas") row_iterator = self._make_one() df_iter = row_iterator.to_dataframe_iterable() @@ -2027,8 +1984,8 @@ def test_to_geodataframe_if_geopandas_is_none(self): ): row_iterator.to_geodataframe(create_bqstorage_client=False) - @unittest.skipIf(geopandas is None, "Requires `geopandas`") def test_to_geodataframe(self): + geopandas = pytest.importorskip("geopandas") row_iterator = self._make_one() df = row_iterator.to_geodataframe(create_bqstorage_client=False) self.assertIsInstance(df, geopandas.GeoDataFrame) @@ -2040,6 +1997,8 @@ def test_to_geodataframe(self): class TestRowIterator(unittest.TestCase): + PYARROW_MINIMUM_VERSION = str(_versions_helpers._MIN_PYARROW_VERSION) + def _class_under_test(self): from google.cloud.bigquery.table import RowIterator @@ -2367,10 +2326,8 @@ def test__should_use_bqstorage_returns_false_when_completely_cached(self): ) ) - @unittest.skipIf( - bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" - ) def test__should_use_bqstorage_returns_true_if_no_cached_results(self): + pytest.importorskip("google.cloud.bigquery_storage") iterator = self._make_one(first_page_response=None) # not cached result = iterator._should_use_bqstorage( bqstorage_client=None, create_bqstorage_client=True @@ -2413,10 +2370,8 @@ def fail_bqstorage_import(name, globals, locals, fromlist, level): self.assertFalse(result) - @unittest.skipIf( - bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" - ) def test__should_use_bqstorage_returns_false_w_warning_if_obsolete_version(self): + pytest.importorskip("google.cloud.bigquery_storage") iterator = self._make_one(first_page_response=None) # not cached patcher = mock.patch( @@ -2435,8 +2390,10 @@ def test__should_use_bqstorage_returns_false_w_warning_if_obsolete_version(self) ] assert matching_warnings, "Obsolete dependency warning not raised." - @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_to_arrow_iterable(self): + pyarrow = pytest.importorskip( + "pyarrow", minversion=self.PYARROW_MINIMUM_VERSION + ) from google.cloud.bigquery.schema import SchemaField schema = [ @@ -2536,14 +2493,17 @@ def test_to_arrow_iterable(self): [[{"name": "Bepples Phlyntstone", "age": 0}, {"name": "Dino", "age": 4}]], ) - @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") - @unittest.skipIf( - bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" - ) def test_to_arrow_iterable_w_bqstorage(self): + pyarrow = pytest.importorskip("pyarrow") + pytest.importorskip("google.cloud.bigquery_storage") + from google.cloud import bigquery_storage + from google.cloud.bigquery_storage_v1 import reader + from google.cloud.bigquery_storage_v1.services.big_query_read.transports import ( + grpc as big_query_read_grpc_transport, + ) + from google.cloud.bigquery import schema from google.cloud.bigquery import table as mut - from google.cloud.bigquery_storage_v1 import reader bqstorage_client = mock.create_autospec(bigquery_storage.BigQueryReadClient) bqstorage_client._transport = mock.create_autospec( @@ -2615,8 +2575,10 @@ def test_to_arrow_iterable_w_bqstorage(self): # Don't close the client if it was passed in. bqstorage_client._transport.grpc_channel.close.assert_not_called() - @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_to_arrow(self): + pyarrow = pytest.importorskip( + "pyarrow", minversion=self.PYARROW_MINIMUM_VERSION + ) from google.cloud.bigquery.schema import SchemaField schema = [ @@ -2697,8 +2659,11 @@ def test_to_arrow(self): ], ) - @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_to_arrow_w_nulls(self): + pyarrow = pytest.importorskip( + "pyarrow", minversion=self.PYARROW_MINIMUM_VERSION + ) + import pyarrow.types from google.cloud.bigquery.schema import SchemaField schema = [SchemaField("name", "STRING"), SchemaField("age", "INTEGER")] @@ -2730,8 +2695,10 @@ def test_to_arrow_w_nulls(self): self.assertEqual(names, ["Donkey", "Diddy", "Dixie", None]) self.assertEqual(ages, [32, 29, None, 111]) - @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_to_arrow_w_unknown_type(self): + pyarrow = pytest.importorskip( + "pyarrow", minversion=self.PYARROW_MINIMUM_VERSION + ) from google.cloud.bigquery.schema import SchemaField schema = [ @@ -2773,8 +2740,10 @@ def test_to_arrow_w_unknown_type(self): warning = warned[0] self.assertTrue("sport" in str(warning)) - @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_to_arrow_w_empty_table(self): + pyarrow = pytest.importorskip( + "pyarrow", minversion=self.PYARROW_MINIMUM_VERSION + ) from google.cloud.bigquery.schema import SchemaField schema = [ @@ -2812,11 +2781,9 @@ def test_to_arrow_w_empty_table(self): self.assertEqual(child_field.type.value_type[0].name, "name") self.assertEqual(child_field.type.value_type[1].name, "age") - @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") - @unittest.skipIf( - bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" - ) def test_to_arrow_max_results_w_explicit_bqstorage_client_warning(self): + pytest.importorskip("pyarrow") + pytest.importorskip("google.cloud.bigquery_storage") from google.cloud.bigquery.schema import SchemaField schema = [ @@ -2856,11 +2823,9 @@ def test_to_arrow_max_results_w_explicit_bqstorage_client_warning(self): ) mock_client._ensure_bqstorage_client.assert_not_called() - @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") - @unittest.skipIf( - bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" - ) def test_to_arrow_max_results_w_create_bqstorage_client_no_warning(self): + pytest.importorskip("pyarrow") + pytest.importorskip("google.cloud.bigquery_storage") from google.cloud.bigquery.schema import SchemaField schema = [ @@ -2896,14 +2861,16 @@ def test_to_arrow_max_results_w_create_bqstorage_client_no_warning(self): self.assertFalse(matches) mock_client._ensure_bqstorage_client.assert_not_called() - @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") - @unittest.skipIf( - bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" - ) def test_to_arrow_w_bqstorage(self): + pyarrow = pytest.importorskip("pyarrow") + pytest.importorskip("google.cloud.bigquery_storage") from google.cloud.bigquery import schema from google.cloud.bigquery import table as mut + from google.cloud import bigquery_storage from google.cloud.bigquery_storage_v1 import reader + from google.cloud.bigquery_storage_v1.services.big_query_read.transports import ( + grpc as big_query_read_grpc_transport, + ) bqstorage_client = mock.create_autospec(bigquery_storage.BigQueryReadClient) bqstorage_client._transport = mock.create_autospec( @@ -2977,13 +2944,15 @@ def test_to_arrow_w_bqstorage(self): # Don't close the client if it was passed in. bqstorage_client._transport.grpc_channel.close.assert_not_called() - @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") - @unittest.skipIf( - bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" - ) def test_to_arrow_w_bqstorage_creates_client(self): + pytest.importorskip("pyarrow") + pytest.importorskip("google.cloud.bigquery_storage") from google.cloud.bigquery import schema from google.cloud.bigquery import table as mut + from google.cloud import bigquery_storage + from google.cloud.bigquery_storage_v1.services.big_query_read.transports import ( + grpc as big_query_read_grpc_transport, + ) mock_client = _mock_client() bqstorage_client = mock.create_autospec(bigquery_storage.BigQueryReadClient) @@ -3008,8 +2977,10 @@ def test_to_arrow_w_bqstorage_creates_client(self): mock_client._ensure_bqstorage_client.assert_called_once() bqstorage_client._transport.grpc_channel.close.assert_called_once() - @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_to_arrow_ensure_bqstorage_client_wo_bqstorage(self): + pyarrow = pytest.importorskip( + "pyarrow", minversion=self.PYARROW_MINIMUM_VERSION + ) from google.cloud.bigquery.schema import SchemaField schema = [ @@ -3039,13 +3010,12 @@ def mock_verify_version(raise_if_error: bool = False): self.assertIsInstance(tbl, pyarrow.Table) self.assertEqual(tbl.num_rows, 2) - @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") - @unittest.skipIf( - bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" - ) def test_to_arrow_w_bqstorage_no_streams(self): + pyarrow = pytest.importorskip("pyarrow") + pytest.importorskip("google.cloud.bigquery_storage") from google.cloud.bigquery import schema from google.cloud.bigquery import table as mut + from google.cloud import bigquery_storage bqstorage_client = mock.create_autospec(bigquery_storage.BigQueryReadClient) session = bigquery_storage.types.ReadSession() @@ -3079,12 +3049,10 @@ def test_to_arrow_w_bqstorage_no_streams(self): self.assertEqual(actual_table.schema[1].name, "colC") self.assertEqual(actual_table.schema[2].name, "colB") - @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") - @unittest.skipIf(tqdm is None, "Requires `tqdm`") - @mock.patch("tqdm.tqdm_gui") - @mock.patch("tqdm.notebook.tqdm") - @mock.patch("tqdm.tqdm") - def test_to_arrow_progress_bar(self, tqdm_mock, tqdm_notebook_mock, tqdm_gui_mock): + def test_to_arrow_progress_bar(self): + pytest.importorskip("pyarrow") + pytest.importorskip("tqdm") + pytest.importorskip("tqdm.notebook") from google.cloud.bigquery.schema import SchemaField schema = [ @@ -3101,12 +3069,13 @@ def test_to_arrow_progress_bar(self, tqdm_mock, tqdm_notebook_mock, tqdm_gui_moc api_request = mock.Mock(return_value={"rows": rows}) progress_bars = ( - ("tqdm", tqdm_mock), - ("tqdm_notebook", tqdm_notebook_mock), - ("tqdm_gui", tqdm_gui_mock), + ("tqdm", mock.patch("tqdm.tqdm")), + ("tqdm_notebook", mock.patch("tqdm.notebook.tqdm")), + ("tqdm_gui", mock.patch("tqdm.tqdm_gui")), ) - for progress_bar_type, progress_bar_mock in progress_bars: + for progress_bar_type, bar_patch in progress_bars: + progress_bar_mock = bar_patch.start() row_iterator = self._make_one(_mock_client(), api_request, path, schema) tbl = row_iterator.to_arrow( progress_bar_type=progress_bar_type, @@ -3129,8 +3098,8 @@ def test_to_arrow_w_pyarrow_none(self): with self.assertRaises(ValueError): row_iterator.to_arrow() - @unittest.skipIf(pandas is None, "Requires `pandas`") def test_to_dataframe_iterable(self): + pandas = pytest.importorskip("pandas") from google.cloud.bigquery.schema import SchemaField schema = [ @@ -3171,8 +3140,8 @@ def test_to_dataframe_iterable(self): self.assertEqual(df_2["name"][0], "Sven") self.assertEqual(df_2["age"][0], 33) - @unittest.skipIf(pandas is None, "Requires `pandas`") def test_to_dataframe_iterable_with_dtypes(self): + pandas = pytest.importorskip("pandas") from google.cloud.bigquery.schema import SchemaField schema = [ @@ -3213,15 +3182,17 @@ def test_to_dataframe_iterable_with_dtypes(self): self.assertEqual(df_2["name"][0], "Sven") self.assertEqual(df_2["age"][0], 33) - @unittest.skipIf(pandas is None, "Requires `pandas`") - @unittest.skipIf( - bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" - ) - @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_to_dataframe_iterable_w_bqstorage(self): + pandas = pytest.importorskip("pandas") + pyarrow = pytest.importorskip("pyarrow") + pytest.importorskip("google.cloud.bigquery_storage") from google.cloud.bigquery import schema from google.cloud.bigquery import table as mut + from google.cloud import bigquery_storage from google.cloud.bigquery_storage_v1 import reader + from google.cloud.bigquery_storage_v1.services.big_query_read.transports import ( + grpc as big_query_read_grpc_transport, + ) arrow_fields = [ pyarrow.field("colA", pyarrow.int64()), @@ -3285,13 +3256,12 @@ def test_to_dataframe_iterable_w_bqstorage(self): # Don't close the client if it was passed in. bqstorage_client._transport.grpc_channel.close.assert_not_called() - @unittest.skipIf( - bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" - ) - @unittest.skipIf(pandas is None, "Requires `pandas`") def test_to_dataframe_iterable_w_bqstorage_max_results_warning(self): + pandas = pytest.importorskip("pandas") + pytest.importorskip("google.cloud.bigquery_storage") from google.cloud.bigquery import schema from google.cloud.bigquery import table as mut + from google.cloud import bigquery_storage bqstorage_client = mock.create_autospec(bigquery_storage.BigQueryReadClient) @@ -3358,8 +3328,8 @@ def test_to_dataframe_iterable_error_if_pandas_is_none(self): with pytest.raises(ValueError, match="pandas"): row_iterator.to_dataframe_iterable() - @unittest.skipIf(pandas is None, "Requires `pandas`") def test_to_dataframe(self): + pandas = pytest.importorskip("pandas") from google.cloud.bigquery.schema import SchemaField schema = [ @@ -3384,9 +3354,9 @@ def test_to_dataframe(self): self.assertEqual(df.name.dtype.name, "object") self.assertEqual(df.age.dtype.name, "Int64") - @unittest.skipIf(pandas is None, "Requires `pandas`") - @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_to_dataframe_timestamp_out_of_pyarrow_bounds(self): + pandas = pytest.importorskip("pandas") + pytest.importorskip("pyarrow") from google.cloud.bigquery.schema import SchemaField schema = [SchemaField("some_timestamp", "TIMESTAMP")] @@ -3412,9 +3382,9 @@ def test_to_dataframe_timestamp_out_of_pyarrow_bounds(self): ], ) - @unittest.skipIf(pandas is None, "Requires `pandas`") - @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_to_dataframe_datetime_out_of_pyarrow_bounds(self): + pandas = pytest.importorskip("pandas") + pytest.importorskip("pyarrow") from google.cloud.bigquery.schema import SchemaField schema = [SchemaField("some_datetime", "DATETIME")] @@ -3436,14 +3406,10 @@ def test_to_dataframe_datetime_out_of_pyarrow_bounds(self): [datetime.datetime(4567, 1, 1), datetime.datetime(9999, 12, 31)], ) - @unittest.skipIf(pandas is None, "Requires `pandas`") - @unittest.skipIf(tqdm is None, "Requires `tqdm`") - @mock.patch("tqdm.tqdm_gui") - @mock.patch("tqdm.notebook.tqdm") - @mock.patch("tqdm.tqdm") - def test_to_dataframe_progress_bar( - self, tqdm_mock, tqdm_notebook_mock, tqdm_gui_mock - ): + def test_to_dataframe_progress_bar(self): + pytest.importorskip("pandas") + pytest.importorskip("tqdm") + from google.cloud.bigquery.schema import SchemaField schema = [ @@ -3460,12 +3426,13 @@ def test_to_dataframe_progress_bar( api_request = mock.Mock(return_value={"rows": rows}) progress_bars = ( - ("tqdm", tqdm_mock), - ("tqdm_notebook", tqdm_notebook_mock), - ("tqdm_gui", tqdm_gui_mock), + ("tqdm", mock.patch("tqdm.tqdm")), + ("tqdm_notebook", mock.patch("tqdm.notebook.tqdm")), + ("tqdm_gui", mock.patch("tqdm.tqdm_gui")), ) - for progress_bar_type, progress_bar_mock in progress_bars: + for progress_bar_type, bar_patch in progress_bars: + progress_bar_mock = bar_patch.start() row_iterator = self._make_one(_mock_client(), api_request, path, schema) df = row_iterator.to_dataframe( progress_bar_type=progress_bar_type, @@ -3477,9 +3444,9 @@ def test_to_dataframe_progress_bar( progress_bar_mock().close.assert_called_once() self.assertEqual(len(df), 4) - @unittest.skipIf(pandas is None, "Requires `pandas`") @mock.patch("google.cloud.bigquery._tqdm_helpers.tqdm", new=None) def test_to_dataframe_no_tqdm_no_progress_bar(self): + pytest.importorskip("pandas") from google.cloud.bigquery.schema import SchemaField schema = [ @@ -3505,9 +3472,9 @@ def test_to_dataframe_no_tqdm_no_progress_bar(self): self.assertEqual(len(user_warnings), 0) self.assertEqual(len(df), 4) - @unittest.skipIf(pandas is None, "Requires `pandas`") @mock.patch("google.cloud.bigquery._tqdm_helpers.tqdm", new=None) def test_to_dataframe_no_tqdm(self): + pytest.importorskip("pandas") from google.cloud.bigquery.schema import SchemaField schema = [ @@ -3539,12 +3506,12 @@ def test_to_dataframe_no_tqdm(self): # should still work. self.assertEqual(len(df), 4) - @unittest.skipIf(pandas is None, "Requires `pandas`") - @unittest.skipIf(tqdm is None, "Requires `tqdm`") - @mock.patch("tqdm.tqdm_gui", new=None) # will raise TypeError on call - @mock.patch("tqdm.notebook.tqdm", new=None) # will raise TypeError on call - @mock.patch("tqdm.tqdm", new=None) # will raise TypeError on call def test_to_dataframe_tqdm_error(self): + pytest.importorskip("pandas") + pytest.importorskip("tqdm") + mock.patch("tqdm.tqdm_gui", new=None) + mock.patch("tqdm.notebook.tqdm", new=None) + mock.patch("tqdm.tqdm", new=None) from google.cloud.bigquery.schema import SchemaField schema = [ @@ -3573,14 +3540,14 @@ def test_to_dataframe_tqdm_error(self): # Warn that a progress bar was requested, but creating the tqdm # progress bar failed. - for warning in warned: + for warning in warned: # pragma: NO COVER self.assertIn( warning.category, - [UserWarning, DeprecationWarning, TqdmDeprecationWarning], + [UserWarning, DeprecationWarning], ) - @unittest.skipIf(pandas is None, "Requires `pandas`") def test_to_dataframe_w_empty_results(self): + pandas = pytest.importorskip("pandas") from google.cloud.bigquery.schema import SchemaField schema = [ @@ -3596,8 +3563,8 @@ def test_to_dataframe_w_empty_results(self): self.assertEqual(len(df), 0) # verify the number of rows self.assertEqual(list(df), ["name", "age"]) # verify the column names - @unittest.skipIf(pandas is None, "Requires `pandas`") def test_to_dataframe_w_various_types_nullable(self): + pandas = pytest.importorskip("pandas") import datetime from google.cloud.bigquery.schema import SchemaField @@ -3637,8 +3604,9 @@ def test_to_dataframe_w_various_types_nullable(self): self.assertIsInstance(row.complete, bool) self.assertIsInstance(row.date, datetime.date) - @unittest.skipIf(pandas is None, "Requires `pandas`") def test_to_dataframe_w_dtypes_mapper(self): + pandas = pytest.importorskip("pandas") + pyarrow = pytest.importorskip("pyarrow") from google.cloud.bigquery.schema import SchemaField schema = [ @@ -3832,9 +3800,11 @@ def test_to_dataframe_w_dtypes_mapper(self): ) self.assertEqual(df.timestamp.dtype.name, "object") - @unittest.skipIf(pandas is None, "Requires `pandas`") - @pytest.mark.skipif(PANDAS_INSTALLED_VERSION[0:2] not in ["0.", "1."], reason="") def test_to_dataframe_w_none_dtypes_mapper(self): + pandas = pytest.importorskip("pandas") + pandas_major_version = pandas.__version__[0:2] + if pandas_major_version not in ["0.", "1."]: + pytest.skip(reason="Requires a version of pandas less than 2.0") from google.cloud.bigquery.schema import SchemaField schema = [ @@ -3888,8 +3858,8 @@ def test_to_dataframe_w_none_dtypes_mapper(self): self.assertEqual(df.time.dtype.name, "object") self.assertEqual(df.timestamp.dtype.name, "datetime64[ns, UTC]") - @unittest.skipIf(pandas is None, "Requires `pandas`") def test_to_dataframe_w_unsupported_dtypes_mapper(self): + pytest.importorskip("pandas") import numpy from google.cloud.bigquery.schema import SchemaField @@ -3945,9 +3915,11 @@ def test_to_dataframe_w_unsupported_dtypes_mapper(self): timestamp_dtype=numpy.dtype("datetime64[us]"), ) - @unittest.skipIf(pandas is None, "Requires `pandas`") - @pytest.mark.skipif(PANDAS_INSTALLED_VERSION[0:2] not in ["0.", "1."], reason="") def test_to_dataframe_column_dtypes(self): + pandas = pytest.importorskip("pandas") + pandas_major_version = pandas.__version__[0:2] + if pandas_major_version not in ["0.", "1."]: + pytest.skip("Requires a version of pandas less than 2.0") from google.cloud.bigquery.schema import SchemaField schema = [ @@ -3960,9 +3932,9 @@ def test_to_dataframe_column_dtypes(self): SchemaField("date", "DATE"), ] row_data = [ - ["1433836800000000", "420", "1.1", "1.77", "Cash", "true", "1999-12-01"], + ["1433836800000", "420", "1.1", "1.77", "Cash", "true", "1999-12-01"], [ - "1387811700000000", + "1387811700000", "2580", "17.7", "28.5", @@ -3970,7 +3942,7 @@ def test_to_dataframe_column_dtypes(self): "false", "1953-06-14", ], - ["1385565300000000", "2280", "4.4", "7.1", "Credit", "true", "1981-11-04"], + ["1385565300000", "2280", "4.4", "7.1", "Credit", "true", "1981-11-04"], ] rows = [{"f": [{"v": field} for field in row]} for row in row_data] path = "/foo" @@ -3995,13 +3967,12 @@ def test_to_dataframe_column_dtypes(self): self.assertEqual(df.complete.dtype.name, "boolean") self.assertEqual(df.date.dtype.name, "dbdate") - @unittest.skipIf(pandas is None, "Requires `pandas`") def test_to_dataframe_datetime_objects(self): # When converting date or timestamp values to nanosecond # precision, the result can be out of pyarrow bounds. To avoid # the error when converting to Pandas, we use object type if # necessary. - + pandas = pytest.importorskip("pandas") from google.cloud.bigquery.schema import SchemaField schema = [ @@ -4044,9 +4015,10 @@ def test_to_dataframe_error_if_pandas_is_none(self): with self.assertRaises(ValueError): row_iterator.to_dataframe() - @unittest.skipIf(pandas is None, "Requires `pandas`") @mock.patch("google.cloud.bigquery.table.shapely", new=None) def test_to_dataframe_error_if_shapely_is_none(self): + pytest.importorskip("pandas") + with self.assertRaisesRegex( ValueError, re.escape( @@ -4056,8 +4028,9 @@ def test_to_dataframe_error_if_shapely_is_none(self): ): self._make_one_from_data().to_dataframe(geography_as_object=True) - @unittest.skipIf(pandas is None, "Requires `pandas`") def test_to_dataframe_max_results_w_bqstorage_warning(self): + pytest.importorskip("pandas") + from google.cloud.bigquery.schema import SchemaField schema = [ @@ -4092,8 +4065,8 @@ def test_to_dataframe_max_results_w_bqstorage_warning(self): ] self.assertEqual(len(matches), 1, msg="User warning was not emitted.") - @unittest.skipIf(pandas is None, "Requires `pandas`") def test_to_dataframe_max_results_w_explicit_bqstorage_client_warning(self): + pytest.importorskip("pandas") from google.cloud.bigquery.schema import SchemaField schema = [ @@ -4133,8 +4106,8 @@ def test_to_dataframe_max_results_w_explicit_bqstorage_client_warning(self): ) mock_client._ensure_bqstorage_client.assert_not_called() - @unittest.skipIf(pandas is None, "Requires `pandas`") def test_to_dataframe_max_results_w_create_bqstorage_client_no_warning(self): + pytest.importorskip("pandas") from google.cloud.bigquery.schema import SchemaField schema = [ @@ -4170,13 +4143,15 @@ def test_to_dataframe_max_results_w_create_bqstorage_client_no_warning(self): self.assertFalse(matches) mock_client._ensure_bqstorage_client.assert_not_called() - @unittest.skipIf(pandas is None, "Requires `pandas`") - @unittest.skipIf( - bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" - ) def test_to_dataframe_w_bqstorage_creates_client(self): + pytest.importorskip("pandas") + pytest.importorskip("google.cloud.bigquery_storage") from google.cloud.bigquery import schema from google.cloud.bigquery import table as mut + from google.cloud import bigquery_storage + from google.cloud.bigquery_storage_v1.services.big_query_read.transports import ( + grpc as big_query_read_grpc_transport, + ) mock_client = _mock_client() bqstorage_client = mock.create_autospec(bigquery_storage.BigQueryReadClient) @@ -4201,13 +4176,12 @@ def test_to_dataframe_w_bqstorage_creates_client(self): mock_client._ensure_bqstorage_client.assert_called_once() bqstorage_client._transport.grpc_channel.close.assert_called_once() - @unittest.skipIf(pandas is None, "Requires `pandas`") - @unittest.skipIf( - bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" - ) def test_to_dataframe_w_bqstorage_no_streams(self): + pytest.importorskip("pandas") + pytest.importorskip("google.cloud.bigquery_storage") from google.cloud.bigquery import schema from google.cloud.bigquery import table as mut + from google.cloud import bigquery_storage bqstorage_client = mock.create_autospec(bigquery_storage.BigQueryReadClient) session = bigquery_storage.types.ReadSession() @@ -4230,13 +4204,12 @@ def test_to_dataframe_w_bqstorage_no_streams(self): self.assertEqual(list(got), column_names) self.assertTrue(got.empty) - @unittest.skipIf( - bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" - ) - @unittest.skipIf(pandas is None, "Requires `pandas`") - @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_to_dataframe_w_bqstorage_logs_session(self): + pytest.importorskip("google.cloud.bigquery_storage") + pytest.importorskip("pandas") + pytest.importorskip("pyarrow") from google.cloud.bigquery.table import Table + from google.cloud import bigquery_storage bqstorage_client = mock.create_autospec(bigquery_storage.BigQueryReadClient) session = bigquery_storage.types.ReadSession() @@ -4255,12 +4228,11 @@ def test_to_dataframe_w_bqstorage_logs_session(self): "with BQ Storage API session 'projects/test-proj/locations/us/sessions/SOMESESSION'." ) - @unittest.skipIf(pandas is None, "Requires `pandas`") - @unittest.skipIf( - bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" - ) - @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_to_dataframe_w_bqstorage_empty_streams(self): + pytest.importorskip("google.cloud.bigquery_storage") + pytest.importorskip("pandas") + pyarrow = pytest.importorskip("pyarrow") + from google.cloud import bigquery_storage from google.cloud.bigquery import schema from google.cloud.bigquery import table as mut from google.cloud.bigquery_storage_v1 import reader @@ -4310,15 +4282,17 @@ def test_to_dataframe_w_bqstorage_empty_streams(self): self.assertEqual(list(got), column_names) self.assertTrue(got.empty) - @unittest.skipIf(pandas is None, "Requires `pandas`") - @unittest.skipIf( - bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" - ) - @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_to_dataframe_w_bqstorage_nonempty(self): + pytest.importorskip("google.cloud.bigquery_storage") + pytest.importorskip("pandas") + pyarrow = pytest.importorskip("pyarrow") from google.cloud.bigquery import schema from google.cloud.bigquery import table as mut + from google.cloud import bigquery_storage from google.cloud.bigquery_storage_v1 import reader + from google.cloud.bigquery_storage_v1.services.big_query_read.transports import ( + grpc as big_query_read_grpc_transport, + ) arrow_fields = [ pyarrow.field("colA", pyarrow.int64()), @@ -4390,12 +4364,10 @@ def test_to_dataframe_w_bqstorage_nonempty(self): # Don't close the client if it was passed in. bqstorage_client._transport.grpc_channel.close.assert_not_called() - @unittest.skipIf(pandas is None, "Requires `pandas`") - @unittest.skipIf( - bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" - ) - @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_to_dataframe_w_bqstorage_multiple_streams_return_unique_index(self): + bigquery_storage = pytest.importorskip("google.cloud.bigquery_storage") + pytest.importorskip("pandas") + pyarrow = pytest.importorskip("pyarrow") from google.cloud.bigquery import schema from google.cloud.bigquery import table as mut from google.cloud.bigquery_storage_v1 import reader @@ -4444,14 +4416,11 @@ def test_to_dataframe_w_bqstorage_multiple_streams_return_unique_index(self): self.assertEqual(len(got.index), total_rows) self.assertTrue(got.index.is_unique) - @unittest.skipIf(pandas is None, "Requires `pandas`") - @unittest.skipIf( - bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" - ) - @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") - @unittest.skipIf(tqdm is None, "Requires `tqdm`") - @mock.patch("tqdm.tqdm") - def test_to_dataframe_w_bqstorage_updates_progress_bar(self, tqdm_mock): + def test_to_dataframe_w_bqstorage_updates_progress_bar(self): + bigquery_storage = pytest.importorskip("google.cloud.bigquery_storage") + pytest.importorskip("pandas") + pyarrow = pytest.importorskip("pyarrow") + pytest.importorskip("tqdm") from google.cloud.bigquery import schema from google.cloud.bigquery import table as mut from google.cloud.bigquery_storage_v1 import reader @@ -4507,28 +4476,27 @@ def blocking_to_arrow(*args, **kwargs): selected_fields=schema, ) - row_iterator.to_dataframe( - bqstorage_client=bqstorage_client, progress_bar_type="tqdm" - ) + with mock.patch("tqdm.tqdm") as tqdm_mock: + row_iterator.to_dataframe( + bqstorage_client=bqstorage_client, progress_bar_type="tqdm" + ) + + # Make sure that this test updated the progress bar once per page from + # each stream. + total_pages = len(streams) * len(mock_pages) + expected_total_rows = total_pages * len(page_items) + progress_updates = [ + args[0] for args, kwargs in tqdm_mock().update.call_args_list + ] + # Should have sent >1 update due to delay in blocking_to_arrow. + self.assertGreater(len(progress_updates), 1) + self.assertEqual(sum(progress_updates), expected_total_rows) + tqdm_mock().close.assert_called_once() - # Make sure that this test updated the progress bar once per page from - # each stream. - total_pages = len(streams) * len(mock_pages) - expected_total_rows = total_pages * len(page_items) - progress_updates = [ - args[0] for args, kwargs in tqdm_mock().update.call_args_list - ] - # Should have sent >1 update due to delay in blocking_to_arrow. - self.assertGreater(len(progress_updates), 1) - self.assertEqual(sum(progress_updates), expected_total_rows) - tqdm_mock().close.assert_called_once() - - @unittest.skipIf(pandas is None, "Requires `pandas`") - @unittest.skipIf( - bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" - ) - @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_to_dataframe_w_bqstorage_exits_on_keyboardinterrupt(self): + bigquery_storage = pytest.importorskip("google.cloud.bigquery_storage") + pytest.importorskip("pandas") + pyarrow = pytest.importorskip("pyarrow") from google.cloud.bigquery import schema from google.cloud.bigquery import table as mut from google.cloud.bigquery_storage_v1 import reader @@ -4611,8 +4579,8 @@ def blocking_to_arrow(*args, **kwargs): # should have been set. self.assertLessEqual(mock_page.to_dataframe.call_count, 2) - @unittest.skipIf(pandas is None, "Requires `pandas`") def test_to_dataframe_tabledata_list_w_multiple_pages_return_unique_index(self): + pandas = pytest.importorskip("pandas") from google.cloud.bigquery import schema from google.cloud.bigquery import table as mut @@ -4643,11 +4611,10 @@ def test_to_dataframe_tabledata_list_w_multiple_pages_return_unique_index(self): self.assertEqual(df.name.dtype.name, "object") self.assertTrue(df.index.is_unique) - @unittest.skipIf(pandas is None, "Requires `pandas`") - @unittest.skipIf( - bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" - ) def test_to_dataframe_w_bqstorage_raises_auth_error(self): + pytest.importorskip("google.cloud.bigquery_storage") + pytest.importorskip("pandas") + from google.cloud import bigquery_storage from google.cloud.bigquery import table as mut bqstorage_client = mock.create_autospec(bigquery_storage.BigQueryReadClient) @@ -4665,10 +4632,8 @@ def test_to_dataframe_w_bqstorage_raises_auth_error(self): with pytest.raises(google.api_core.exceptions.Forbidden): row_iterator.to_dataframe(bqstorage_client=bqstorage_client) - @unittest.skipIf( - bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" - ) def test_to_dataframe_w_bqstorage_partition(self): + bigquery_storage = pytest.importorskip("google.cloud.bigquery_storage") from google.cloud.bigquery import schema from google.cloud.bigquery import table as mut @@ -4685,10 +4650,8 @@ def test_to_dataframe_w_bqstorage_partition(self): with pytest.raises(ValueError): row_iterator.to_dataframe(bqstorage_client) - @unittest.skipIf( - bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" - ) def test_to_dataframe_w_bqstorage_snapshot(self): + bigquery_storage = pytest.importorskip("google.cloud.bigquery_storage") from google.cloud.bigquery import schema from google.cloud.bigquery import table as mut @@ -4705,15 +4668,17 @@ def test_to_dataframe_w_bqstorage_snapshot(self): with pytest.raises(ValueError): row_iterator.to_dataframe(bqstorage_client) - @unittest.skipIf(pandas is None, "Requires `pandas`") - @unittest.skipIf( - bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" - ) - @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_to_dataframe_concat_categorical_dtype_w_pyarrow(self): + pytest.importorskip("google.cloud.bigquery_storage") + pandas = pytest.importorskip("pandas") + pyarrow = pytest.importorskip("pyarrow") + from google.cloud import bigquery_storage from google.cloud.bigquery import schema from google.cloud.bigquery import table as mut from google.cloud.bigquery_storage_v1 import reader + from google.cloud.bigquery_storage_v1.services.big_query_read.transports import ( + grpc as big_query_read_grpc_transport, + ) arrow_fields = [ # Not alphabetical to test column order. @@ -4818,8 +4783,9 @@ def test_to_dataframe_concat_categorical_dtype_w_pyarrow(self): # Don't close the client if it was passed in. bqstorage_client._transport.grpc_channel.close.assert_not_called() - @unittest.skipIf(geopandas is None, "Requires `geopandas`") def test_to_dataframe_geography_as_object(self): + pandas = pytest.importorskip("pandas") + pytest.importorskip("geopandas") row_iterator = self._make_one_from_data( (("name", "STRING"), ("geog", "GEOGRAPHY")), ( @@ -4853,8 +4819,8 @@ def test_to_geodataframe_error_if_geopandas_is_none(self): ): self._make_one_from_data().to_geodataframe() - @unittest.skipIf(geopandas is None, "Requires `geopandas`") def test_to_geodataframe(self): + geopandas = pytest.importorskip("geopandas") row_iterator = self._make_one_from_data( (("name", "STRING"), ("geog", "GEOGRAPHY")), ( @@ -4883,8 +4849,8 @@ def test_to_geodataframe(self): self.assertEqual(df.geog.crs.srs, "EPSG:4326") self.assertEqual(df.geog.crs.name, "WGS 84") - @unittest.skipIf(geopandas is None, "Requires `geopandas`") def test_to_geodataframe_ambiguous_geog(self): + pytest.importorskip("geopandas") row_iterator = self._make_one_from_data( (("name", "STRING"), ("geog", "GEOGRAPHY"), ("geog2", "GEOGRAPHY")), () ) @@ -4898,8 +4864,8 @@ def test_to_geodataframe_ambiguous_geog(self): ): row_iterator.to_geodataframe(create_bqstorage_client=False) - @unittest.skipIf(geopandas is None, "Requires `geopandas`") def test_to_geodataframe_bad_geography_column(self): + pytest.importorskip("geopandas") row_iterator = self._make_one_from_data( (("name", "STRING"), ("geog", "GEOGRAPHY"), ("geog2", "GEOGRAPHY")), () ) @@ -4914,8 +4880,8 @@ def test_to_geodataframe_bad_geography_column(self): create_bqstorage_client=False, geography_column="xxx" ) - @unittest.skipIf(geopandas is None, "Requires `geopandas`") def test_to_geodataframe_no_geog(self): + pytest.importorskip("geopandas") row_iterator = self._make_one_from_data( (("name", "STRING"), ("geog", "STRING")), () ) @@ -4928,8 +4894,9 @@ def test_to_geodataframe_no_geog(self): ): row_iterator.to_geodataframe(create_bqstorage_client=False) - @unittest.skipIf(geopandas is None, "Requires `geopandas`") def test_to_geodataframe_w_geography_column(self): + geopandas = pytest.importorskip("geopandas") + pandas = pytest.importorskip("pandas") row_iterator = self._make_one_from_data( (("name", "STRING"), ("geog", "GEOGRAPHY"), ("geog2", "GEOGRAPHY")), ( @@ -4974,7 +4941,6 @@ def test_to_geodataframe_w_geography_column(self): ["0.0", "0.0", "0.0"], ) - @unittest.skipIf(geopandas is None, "Requires `geopandas`") @mock.patch("google.cloud.bigquery.table.RowIterator.to_dataframe") def test_rowiterator_to_geodataframe_delegation(self, to_dataframe): """ @@ -4983,6 +4949,8 @@ def test_rowiterator_to_geodataframe_delegation(self, to_dataframe): This test just demonstrates that. We don't need to test all the variations, which are tested for to_dataframe. """ + pandas = pytest.importorskip("pandas") + geopandas = pytest.importorskip("geopandas") import numpy from shapely import wkt @@ -5676,9 +5644,6 @@ def test_from_api_repr_only_foreign_keys_resource(self): self.assertIsNotNone(instance.foreign_keys) -@pytest.mark.skipif( - bigquery_storage is None, reason="Requires `google-cloud-bigquery-storage`" -) @pytest.mark.parametrize( "table_path", ( @@ -5689,6 +5654,7 @@ def test_from_api_repr_only_foreign_keys_resource(self): ), ) def test_table_reference_to_bqstorage_v1_stable(table_path): + pytest.importorskip("google.cloud.bigquery_storage") from google.cloud.bigquery import table as mut expected = "projects/my-project/datasets/my_dataset/tables/my_table" From a797e9af2e0f751135a6c69af93dd58b65f336e6 Mon Sep 17 00:00:00 2001 From: Mend Renovate Date: Wed, 27 Mar 2024 18:26:43 +0100 Subject: [PATCH 1761/2016] chore(deps): update all dependencies (#1873) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * chore(deps): update all dependencies * 🦉 Updates from OwlBot post-processor See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md * Update samples/geography/requirements.txt * Update samples/geography/requirements.txt * Update samples/geography/requirements.txt --------- Co-authored-by: Owl Bot Co-authored-by: Chalmer Lowe --- .../samples/geography/requirements.txt | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/packages/google-cloud-bigquery/samples/geography/requirements.txt b/packages/google-cloud-bigquery/samples/geography/requirements.txt index 6fa7ffc7ea09..bdaead5b1934 100644 --- a/packages/google-cloud-bigquery/samples/geography/requirements.txt +++ b/packages/google-cloud-bigquery/samples/geography/requirements.txt @@ -32,8 +32,10 @@ pandas==2.2.1; python_version >= '3.9' proto-plus==1.23.0 pyarrow==12.0.1; python_version == '3.7' pyarrow==15.0.2; python_version >= '3.8' -pyasn1==0.5.1 -pyasn1-modules==0.3.0 +pyasn1==0.5.1; python_version == '3.7' +pyasn1==0.6.0; python_version >= '3.8' +pyasn1-modules==0.3.0; python_version == '3.7' +pyasn1-modules==0.4.0; python_version >= '3.8' pycparser==2.21 pyparsing==3.1.2 python-dateutil==2.9.0.post0 From 6d3ed9df9c736c1c1995f5800f5dba2827f225fb Mon Sep 17 00:00:00 2001 From: "release-please[bot]" <55107282+release-please[bot]@users.noreply.github.com> Date: Wed, 27 Mar 2024 14:39:38 -0700 Subject: [PATCH 1762/2016] chore(main): release 3.20.0 (#1850) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: release-please[bot] <55107282+release-please[bot]@users.noreply.github.com> Co-authored-by: Tim Sweña (Swast) --- packages/google-cloud-bigquery/CHANGELOG.md | 15 +++++++++++++++ .../google/cloud/bigquery/version.py | 2 +- 2 files changed, 16 insertions(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/CHANGELOG.md b/packages/google-cloud-bigquery/CHANGELOG.md index 4cb0e1d20bdf..578df101f167 100644 --- a/packages/google-cloud-bigquery/CHANGELOG.md +++ b/packages/google-cloud-bigquery/CHANGELOG.md @@ -5,6 +5,21 @@ [1]: https://pypi.org/project/google-cloud-bigquery/#history +## [3.20.0](https://github.com/googleapis/python-bigquery/compare/v3.19.0...v3.20.0) (2024-03-27) + + +### Features + +* Add `fields` parameter to `set_iam_policy` for consistency with update methods ([#1872](https://github.com/googleapis/python-bigquery/issues/1872)) ([08b1e6f](https://github.com/googleapis/python-bigquery/commit/08b1e6f9c41121907c345daedbae40ece18e8b6a)) + + +### Bug Fixes + +* Correct type checking ([#1848](https://github.com/googleapis/python-bigquery/issues/1848)) ([2660dbd](https://github.com/googleapis/python-bigquery/commit/2660dbd4821a89a1e20e3e1541504a409f1979aa)) +* Update error logging when converting to pyarrow column fails ([#1836](https://github.com/googleapis/python-bigquery/issues/1836)) ([0ac6e9b](https://github.com/googleapis/python-bigquery/commit/0ac6e9bf186945832f5dcdf5a4d95667b4da223e)) +* Updates a number of optional dependencies ([#1864](https://github.com/googleapis/python-bigquery/issues/1864)) ([c2496a1](https://github.com/googleapis/python-bigquery/commit/c2496a1014a7d99e805b3d0a66e4517165bd7e01)) +* Use an allowlist instead of denylist to determine when `query_and_wait` uses `jobs.query` API ([#1869](https://github.com/googleapis/python-bigquery/issues/1869)) ([e265db6](https://github.com/googleapis/python-bigquery/commit/e265db6a6a37d13056dcaac240c2cf3975dfd644)) + ## [3.19.0](https://github.com/googleapis/python-bigquery/compare/v3.18.0...v3.19.0) (2024-03-11) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/version.py b/packages/google-cloud-bigquery/google/cloud/bigquery/version.py index 27f24bd196b6..4537b82501c8 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/version.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/version.py @@ -12,4 +12,4 @@ # See the License for the specific language governing permissions and # limitations under the License. -__version__ = "3.19.0" +__version__ = "3.20.0" From dc1fc735a1b777f0b9f17027ddfad71b5cce8cd0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tim=20Swe=C3=B1a=20=28Swast=29?= Date: Thu, 28 Mar 2024 15:15:14 -0500 Subject: [PATCH 1763/2016] fix: make `pyarrow` an optional dependency post-3.20.0 yanked release (#1879) * fix: make `pyarrow` an optional dependency again * install older version of pyarrow * fix for older tqdm * remove many pragma: NO COVERs --- .../google/cloud/bigquery/_pandas_helpers.py | 18 ++++----- .../google/cloud/bigquery/_pyarrow_helpers.py | 4 +- .../google/cloud/bigquery/_tqdm_helpers.py | 13 +++--- .../cloud/bigquery/_versions_helpers.py | 4 +- .../google/cloud/bigquery/job/query.py | 7 +--- .../google/cloud/bigquery/magics/magics.py | 2 +- .../google/cloud/bigquery/table.py | 6 +-- packages/google-cloud-bigquery/noxfile.py | 15 +++---- .../samples/desktopapp/requirements-test.txt | 1 - .../samples/snippets/requirements-test.txt | 2 +- .../samples/snippets/requirements.txt | 3 +- .../testing/constraints-3.11.txt | 1 - .../testing/constraints-3.12.txt | 1 - .../testing/constraints-3.7.txt | 4 +- .../tests/unit/job/test_query_pandas.py | 40 +++++++------------ .../tests/unit/test__pandas_helpers.py | 15 +++---- .../tests/unit/test__versions_helpers.py | 33 +++++++++++---- .../tests/unit/test_legacy_types.py | 2 +- .../tests/unit/test_opentelemetry_tracing.py | 2 +- .../tests/unit/test_table.py | 38 +++++++++++++++++- .../tests/unit/test_table_pandas.py | 15 ++----- 21 files changed, 126 insertions(+), 100 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/_pandas_helpers.py b/packages/google-cloud-bigquery/google/cloud/bigquery/_pandas_helpers.py index 9f8dcfde4009..3b58d3736dcf 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/_pandas_helpers.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/_pandas_helpers.py @@ -32,7 +32,7 @@ import pandas # type: ignore pandas_import_exception = None -except ImportError as exc: # pragma: NO COVER +except ImportError as exc: pandas = None pandas_import_exception = exc else: @@ -44,25 +44,21 @@ date_dtype_name = db_dtypes.DateDtype.name time_dtype_name = db_dtypes.TimeDtype.name db_dtypes_import_exception = None -except ImportError as exc: # pragma: NO COVER +except ImportError as exc: db_dtypes = None db_dtypes_import_exception = exc date_dtype_name = time_dtype_name = "" # Use '' rather than None because pytype -pyarrow = _versions_helpers.PYARROW_VERSIONS.try_import(raise_if_error=True) -from pyarrow import ArrowTypeError # type: ignore # noqa: E402 - -_BIGNUMERIC_SUPPORT = False -if pyarrow is not None: # pragma: NO COVER - _BIGNUMERIC_SUPPORT = True +pyarrow = _versions_helpers.PYARROW_VERSIONS.try_import() try: # _BaseGeometry is used to detect shapely objevys in `bq_to_arrow_array` from shapely.geometry.base import BaseGeometry as _BaseGeometry # type: ignore -except ImportError: # pragma: NO COVER +except ImportError: # No shapely, use NoneType for _BaseGeometry as a placeholder. _BaseGeometry = type(None) else: + # We don't have any unit test sessions that install shapely but not pandas. if pandas is not None: # pragma: NO COVER def _to_wkb(): @@ -309,10 +305,10 @@ def bq_to_arrow_array(series, bq_field): if field_type_upper in schema._STRUCT_TYPES: return pyarrow.StructArray.from_pandas(series, type=arrow_type) return pyarrow.Array.from_pandas(series, type=arrow_type) - except ArrowTypeError: # pragma: NO COVER + except pyarrow.ArrowTypeError: msg = f"""Error converting Pandas column with name: "{series.name}" and datatype: "{series.dtype}" to an appropriate pyarrow datatype: Array, ListArray, or StructArray""" _LOGGER.error(msg) - raise ArrowTypeError(msg) + raise pyarrow.ArrowTypeError(msg) def get_column_or_index(dataframe, name): diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/_pyarrow_helpers.py b/packages/google-cloud-bigquery/google/cloud/bigquery/_pyarrow_helpers.py index 06509cc934c7..3c745a611bdb 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/_pyarrow_helpers.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/_pyarrow_helpers.py @@ -20,7 +20,7 @@ try: import pyarrow # type: ignore -except ImportError: # pragma: NO COVER +except ImportError: pyarrow = None @@ -49,7 +49,7 @@ def pyarrow_timestamp(): _BQ_TO_ARROW_SCALARS = {} _ARROW_SCALAR_IDS_TO_BQ = {} -if pyarrow: # pragma: NO COVER +if pyarrow: # This dictionary is duplicated in bigquery_storage/test/unite/test_reader.py # When modifying it be sure to update it there as well. # Note(todo!!): type "BIGNUMERIC"'s matching pyarrow type is added in _pandas_helpers.py diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/_tqdm_helpers.py b/packages/google-cloud-bigquery/google/cloud/bigquery/_tqdm_helpers.py index cb81bd8f69c0..22ccee971733 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/_tqdm_helpers.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/_tqdm_helpers.py @@ -23,11 +23,14 @@ try: import tqdm # type: ignore - import tqdm.notebook as notebook # type: ignore - -except ImportError: # pragma: NO COVER +except ImportError: tqdm = None +try: + import tqdm.notebook as tqdm_notebook # type: ignore +except ImportError: + tqdm_notebook = None + if typing.TYPE_CHECKING: # pragma: NO COVER from google.cloud.bigquery import QueryJob from google.cloud.bigquery.table import RowIterator @@ -42,7 +45,7 @@ def get_progress_bar(progress_bar_type, description, total, unit): """Construct a tqdm progress bar object, if tqdm is installed.""" - if tqdm is None: + if tqdm is None or tqdm_notebook is None and progress_bar_type == "tqdm_notebook": if progress_bar_type is not None: warnings.warn(_NO_TQDM_ERROR, UserWarning, stacklevel=3) return None @@ -58,7 +61,7 @@ def get_progress_bar(progress_bar_type, description, total, unit): unit=unit, ) elif progress_bar_type == "tqdm_notebook": - return notebook.tqdm( + return tqdm_notebook.tqdm( bar_format="{l_bar}{bar}|", desc=description, file=sys.stdout, diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/_versions_helpers.py b/packages/google-cloud-bigquery/google/cloud/bigquery/_versions_helpers.py index 4ff4b9700766..50d5961b3515 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/_versions_helpers.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/_versions_helpers.py @@ -73,7 +73,7 @@ def try_import(self, raise_if_error: bool = False) -> Any: """ try: import pyarrow - except ImportError as exc: # pragma: NO COVER + except ImportError as exc: if raise_if_error: raise exceptions.LegacyPyarrowError( "pyarrow package not found. Install pyarrow version >=" @@ -212,7 +212,7 @@ def try_import(self, raise_if_error: bool = False) -> Any: """ try: import pandas - except ImportError as exc: # pragma: NO COVER + except ImportError as exc: if raise_if_error: raise exceptions.LegacyPandasError( "pandas package not found. Install pandas version >=" diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/job/query.py b/packages/google-cloud-bigquery/google/cloud/bigquery/job/query.py index 83d2751ce17f..e92e9cb9eee5 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/job/query.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/job/query.py @@ -56,14 +56,9 @@ try: import pandas # type: ignore -except ImportError: # pragma: NO COVER +except ImportError: pandas = None -try: - import db_dtypes # type: ignore -except ImportError: # pragma: NO COVER - db_dtypes = None - if typing.TYPE_CHECKING: # pragma: NO COVER # Assumption: type checks are only used by library developers and CI environments # that have all optional dependencies installed, thus no conditional imports. diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/magics/magics.py b/packages/google-cloud-bigquery/google/cloud/bigquery/magics/magics.py index 8464c87929a6..6e6b21965eb7 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/magics/magics.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/magics/magics.py @@ -95,7 +95,7 @@ import IPython # type: ignore from IPython import display # type: ignore from IPython.core import magic_arguments # type: ignore -except ImportError: # pragma: NO COVER +except ImportError: raise ImportError("This module can only be loaded in IPython.") from google.api_core import client_info diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py index b3be4ff904b6..c002822fed7f 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py @@ -26,17 +26,17 @@ try: import pandas # type: ignore -except ImportError: # pragma: NO COVER +except ImportError: pandas = None try: import pyarrow # type: ignore -except ImportError: # pragma: NO COVER +except ImportError: pyarrow = None try: import db_dtypes # type: ignore -except ImportError: # pragma: NO COVER +except ImportError: db_dtypes = None try: diff --git a/packages/google-cloud-bigquery/noxfile.py b/packages/google-cloud-bigquery/noxfile.py index 548690afa7b0..3adb4ba702a7 100644 --- a/packages/google-cloud-bigquery/noxfile.py +++ b/packages/google-cloud-bigquery/noxfile.py @@ -86,7 +86,7 @@ def default(session, install_extras=True): install_target = ".[all]" else: install_target = "." - session.install("-e", install_target) + session.install("-e", install_target, "-c", constraints_path) session.run("python", "-m", "pip", "freeze") # Run py.test against the unit tests. @@ -115,14 +115,15 @@ def unit(session): def unit_noextras(session): """Run the unit test suite.""" - # Install optional dependencies that are out-of-date. + # Install optional dependencies that are out-of-date to see that + # we fail gracefully. # https://github.com/googleapis/python-bigquery/issues/933 - # There is no pyarrow 1.0.0 package for Python 3.9. - + # + # We only install this extra package on one of the two Python versions + # so that it continues to be an optional dependency. + # https://github.com/googleapis/python-bigquery/issues/1877 if session.python == UNIT_TEST_PYTHON_VERSIONS[0]: - session.install("pyarrow>=3.0.0") - elif session.python == UNIT_TEST_PYTHON_VERSIONS[-1]: - session.install("pyarrow") + session.install("pyarrow==1.0.0") default(session, install_extras=False) diff --git a/packages/google-cloud-bigquery/samples/desktopapp/requirements-test.txt b/packages/google-cloud-bigquery/samples/desktopapp/requirements-test.txt index 413a7fd4809e..9142d4905434 100644 --- a/packages/google-cloud-bigquery/samples/desktopapp/requirements-test.txt +++ b/packages/google-cloud-bigquery/samples/desktopapp/requirements-test.txt @@ -2,4 +2,3 @@ google-cloud-testutils==1.4.0 pytest===7.4.4; python_version == '3.7' pytest==8.1.1; python_version >= '3.8' mock==5.1.0 -pyarrow>=3.0.0 \ No newline at end of file diff --git a/packages/google-cloud-bigquery/samples/snippets/requirements-test.txt b/packages/google-cloud-bigquery/samples/snippets/requirements-test.txt index 413a7fd4809e..0343ab89a838 100644 --- a/packages/google-cloud-bigquery/samples/snippets/requirements-test.txt +++ b/packages/google-cloud-bigquery/samples/snippets/requirements-test.txt @@ -1,5 +1,5 @@ +# samples/snippets should be runnable with no "extras" google-cloud-testutils==1.4.0 pytest===7.4.4; python_version == '3.7' pytest==8.1.1; python_version >= '3.8' mock==5.1.0 -pyarrow>=3.0.0 \ No newline at end of file diff --git a/packages/google-cloud-bigquery/samples/snippets/requirements.txt b/packages/google-cloud-bigquery/samples/snippets/requirements.txt index b3347499f35f..af9436c51e15 100644 --- a/packages/google-cloud-bigquery/samples/snippets/requirements.txt +++ b/packages/google-cloud-bigquery/samples/snippets/requirements.txt @@ -1 +1,2 @@ -google-cloud-bigquery==3.19.0 \ No newline at end of file +# samples/snippets should be runnable with no "extras" +google-cloud-bigquery==3.19.0 diff --git a/packages/google-cloud-bigquery/testing/constraints-3.11.txt b/packages/google-cloud-bigquery/testing/constraints-3.11.txt index e80ca0ccfd79..e69de29bb2d1 100644 --- a/packages/google-cloud-bigquery/testing/constraints-3.11.txt +++ b/packages/google-cloud-bigquery/testing/constraints-3.11.txt @@ -1 +0,0 @@ -pyarrow>=3.0.0 \ No newline at end of file diff --git a/packages/google-cloud-bigquery/testing/constraints-3.12.txt b/packages/google-cloud-bigquery/testing/constraints-3.12.txt index e80ca0ccfd79..e69de29bb2d1 100644 --- a/packages/google-cloud-bigquery/testing/constraints-3.12.txt +++ b/packages/google-cloud-bigquery/testing/constraints-3.12.txt @@ -1 +0,0 @@ -pyarrow>=3.0.0 \ No newline at end of file diff --git a/packages/google-cloud-bigquery/testing/constraints-3.7.txt b/packages/google-cloud-bigquery/testing/constraints-3.7.txt index 1fc7c6838c0e..d64e06cc3953 100644 --- a/packages/google-cloud-bigquery/testing/constraints-3.7.txt +++ b/packages/google-cloud-bigquery/testing/constraints-3.7.txt @@ -27,9 +27,9 @@ packaging==20.0.0 pandas==1.1.0 proto-plus==1.22.0 protobuf==3.19.5 -pyarrow>=3.0.0 +pyarrow==3.0.0 python-dateutil==2.7.3 requests==2.21.0 Shapely==1.8.4 six==1.13.0 -tqdm==4.7.4 \ No newline at end of file +tqdm==4.7.4 diff --git a/packages/google-cloud-bigquery/tests/unit/job/test_query_pandas.py b/packages/google-cloud-bigquery/tests/unit/job/test_query_pandas.py index 1473ef283e09..3a5d92dbd761 100644 --- a/packages/google-cloud-bigquery/tests/unit/job/test_query_pandas.py +++ b/packages/google-cloud-bigquery/tests/unit/job/test_query_pandas.py @@ -19,53 +19,38 @@ import pytest +from ..helpers import make_connection +from .helpers import _make_client +from .helpers import _make_job_resource try: from google.cloud import bigquery_storage import google.cloud.bigquery_storage_v1.reader import google.cloud.bigquery_storage_v1.services.big_query_read.client -except (ImportError, AttributeError): # pragma: NO COVER +except (ImportError, AttributeError): bigquery_storage = None -try: - import pandas -except (ImportError, AttributeError): # pragma: NO COVER - pandas = None try: import shapely -except (ImportError, AttributeError): # pragma: NO COVER +except (ImportError, AttributeError): shapely = None try: import geopandas -except (ImportError, AttributeError): # pragma: NO COVER +except (ImportError, AttributeError): geopandas = None try: import tqdm -except (ImportError, AttributeError): # pragma: NO COVER +except (ImportError, AttributeError): tqdm = None -try: - import importlib.metadata as metadata -except ImportError: - import importlib_metadata as metadata - -from ..helpers import make_connection -from .helpers import _make_client -from .helpers import _make_job_resource - -if pandas is not None: - PANDAS_INSTALLED_VERSION = metadata.version("pandas") -else: - PANDAS_INSTALLED_VERSION = "0.0.0" - -pandas = pytest.importorskip("pandas") - try: import pyarrow import pyarrow.types -except ImportError: # pragma: NO COVER +except ImportError: pyarrow = None +pandas = pytest.importorskip("pandas") + @pytest.fixture def table_read_options_kwarg(): @@ -660,7 +645,10 @@ def test_to_dataframe_bqstorage_no_pyarrow_compression(): ) -@pytest.mark.skipif(PANDAS_INSTALLED_VERSION[0:2] not in ["0.", "1."], reason="") +@pytest.mark.skipif( + pandas.__version__.startswith("2."), + reason="pandas 2.0 changes some default dtypes and we haven't update the test to account for those", +) @pytest.mark.skipif(pyarrow is None, reason="Requires `pyarrow`") def test_to_dataframe_column_dtypes(): from google.cloud.bigquery.job import QueryJob as target_class diff --git a/packages/google-cloud-bigquery/tests/unit/test__pandas_helpers.py b/packages/google-cloud-bigquery/tests/unit/test__pandas_helpers.py index 24438462013c..5c13669f30d4 100644 --- a/packages/google-cloud-bigquery/tests/unit/test__pandas_helpers.py +++ b/packages/google-cloud-bigquery/tests/unit/test__pandas_helpers.py @@ -30,12 +30,12 @@ import pandas import pandas.api.types import pandas.testing -except ImportError: # pragma: NO COVER +except ImportError: pandas = None try: import geopandas -except ImportError: # pragma: NO COVER +except ImportError: geopandas = None import pytest @@ -46,18 +46,19 @@ from google.cloud.bigquery import _pyarrow_helpers from google.cloud.bigquery import _versions_helpers from google.cloud.bigquery import schema -from google.cloud.bigquery._pandas_helpers import _BIGNUMERIC_SUPPORT pyarrow = _versions_helpers.PYARROW_VERSIONS.try_import() if pyarrow: import pyarrow.parquet import pyarrow.types - from pyarrow import ArrowTypeError # type: ignore # noqa: E402 -else: # pragma: NO COVER + + _BIGNUMERIC_SUPPORT = True +else: # Mock out pyarrow when missing, because methods from pyarrow.types are # used in test parameterization. pyarrow = mock.Mock() + _BIGNUMERIC_SUPPORT = False bigquery_storage = _versions_helpers.BQ_STORAGE_VERSIONS.try_import() @@ -572,9 +573,9 @@ def test_bq_to_arrow_array_w_conversion_fail(module_under_test): # pragma: NO C series = pandas.Series(rows, name="test_col", dtype="object") bq_field = schema.SchemaField("field_name", "STRING", mode="REPEATED") exc_msg = f"""Error converting Pandas column with name: "{series.name}" and datatype: "{series.dtype}" to an appropriate pyarrow datatype: Array, ListArray, or StructArray""" - with pytest.raises(ArrowTypeError, match=exc_msg): + with pytest.raises(pyarrow.ArrowTypeError, match=exc_msg): module_under_test.bq_to_arrow_array(series, bq_field) - raise ArrowTypeError(exc_msg) + raise pyarrow.ArrowTypeError(exc_msg) @pytest.mark.parametrize("bq_type", ["RECORD", "record", "STRUCT", "struct"]) diff --git a/packages/google-cloud-bigquery/tests/unit/test__versions_helpers.py b/packages/google-cloud-bigquery/tests/unit/test__versions_helpers.py index 8fa09962720d..b1d0ef1acc0c 100644 --- a/packages/google-cloud-bigquery/tests/unit/test__versions_helpers.py +++ b/packages/google-cloud-bigquery/tests/unit/test__versions_helpers.py @@ -18,17 +18,17 @@ try: import pyarrow # type: ignore -except ImportError: # pragma: NO COVER +except ImportError: pyarrow = None try: from google.cloud import bigquery_storage # type: ignore -except ImportError: # pragma: NO COVER +except ImportError: bigquery_storage = None try: import pandas # type: ignore -except ImportError: # pragma: NO COVER +except ImportError: pandas = None from google.cloud.bigquery import _versions_helpers @@ -39,11 +39,8 @@ def test_try_import_raises_no_error_w_recent_pyarrow(): versions = _versions_helpers.PyarrowVersions() with mock.patch("pyarrow.__version__", new="5.0.0"): - try: - pyarrow = versions.try_import(raise_if_error=True) - assert pyarrow is not None - except exceptions.LegacyPyarrowError: # pragma: NO COVER - raise ("Legacy error raised with a non-legacy dependency version.") + pyarrow = versions.try_import(raise_if_error=True) + assert pyarrow is not None @pytest.mark.skipif(pyarrow is None, reason="pyarrow is not installed") @@ -62,6 +59,16 @@ def test_try_import_raises_error_w_legacy_pyarrow(): versions.try_import(raise_if_error=True) +@pytest.mark.skipif( + pyarrow is not None, + reason="pyarrow is installed, but this test needs it not to be", +) +def test_try_import_raises_error_w_no_pyarrow(): + versions = _versions_helpers.PyarrowVersions() + with pytest.raises(exceptions.LegacyPyarrowError): + versions.try_import(raise_if_error=True) + + @pytest.mark.skipif(pyarrow is None, reason="pyarrow is not installed") def test_installed_pyarrow_version_returns_cached(): versions = _versions_helpers.PyarrowVersions() @@ -208,6 +215,16 @@ def test_try_import_raises_error_w_legacy_pandas(): versions.try_import(raise_if_error=True) +@pytest.mark.skipif( + pandas is not None, + reason="pandas is installed, but this test needs it not to be", +) +def test_try_import_raises_error_w_no_pandas(): + versions = _versions_helpers.PandasVersions() + with pytest.raises(exceptions.LegacyPandasError): + versions.try_import(raise_if_error=True) + + @pytest.mark.skipif(pandas is None, reason="pandas is not installed") def test_installed_pandas_version_returns_cached(): versions = _versions_helpers.PandasVersions() diff --git a/packages/google-cloud-bigquery/tests/unit/test_legacy_types.py b/packages/google-cloud-bigquery/tests/unit/test_legacy_types.py index 3431074fd83f..809be1855ced 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_legacy_types.py +++ b/packages/google-cloud-bigquery/tests/unit/test_legacy_types.py @@ -19,7 +19,7 @@ try: import proto # type: ignore -except ImportError: # pragma: NO COVER +except ImportError: proto = None diff --git a/packages/google-cloud-bigquery/tests/unit/test_opentelemetry_tracing.py b/packages/google-cloud-bigquery/tests/unit/test_opentelemetry_tracing.py index e96e18c6b3a4..579d7b1b7fce 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_opentelemetry_tracing.py +++ b/packages/google-cloud-bigquery/tests/unit/test_opentelemetry_tracing.py @@ -19,7 +19,7 @@ try: import opentelemetry -except ImportError: # pragma: NO COVER +except ImportError: opentelemetry = None if opentelemetry is not None: diff --git a/packages/google-cloud-bigquery/tests/unit/test_table.py b/packages/google-cloud-bigquery/tests/unit/test_table.py index dbc5948b8cf0..3953170fdb76 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_table.py +++ b/packages/google-cloud-bigquery/tests/unit/test_table.py @@ -3408,6 +3408,7 @@ def test_to_dataframe_datetime_out_of_pyarrow_bounds(self): def test_to_dataframe_progress_bar(self): pytest.importorskip("pandas") + pytest.importorskip("pyarrow") pytest.importorskip("tqdm") from google.cloud.bigquery.schema import SchemaField @@ -3427,7 +3428,6 @@ def test_to_dataframe_progress_bar(self): progress_bars = ( ("tqdm", mock.patch("tqdm.tqdm")), - ("tqdm_notebook", mock.patch("tqdm.notebook.tqdm")), ("tqdm_gui", mock.patch("tqdm.tqdm_gui")), ) @@ -3444,9 +3444,43 @@ def test_to_dataframe_progress_bar(self): progress_bar_mock().close.assert_called_once() self.assertEqual(len(df), 4) + def test_to_dataframe_progress_bar_notebook(self): + pytest.importorskip("pandas") + pytest.importorskip("pyarrow") + pytest.importorskip("tqdm") + pytest.importorskip("tqdm.notebook") + + from google.cloud.bigquery.schema import SchemaField + + schema = [ + SchemaField("name", "STRING", mode="REQUIRED"), + SchemaField("age", "INTEGER", mode="REQUIRED"), + ] + rows = [ + {"f": [{"v": "Phred Phlyntstone"}, {"v": "32"}]}, + {"f": [{"v": "Bharney Rhubble"}, {"v": "33"}]}, + {"f": [{"v": "Wylma Phlyntstone"}, {"v": "29"}]}, + {"f": [{"v": "Bhettye Rhubble"}, {"v": "27"}]}, + ] + path = "/foo" + api_request = mock.Mock(return_value={"rows": rows}) + + with mock.patch("tqdm.notebook.tqdm") as progress_bar_mock: + row_iterator = self._make_one(_mock_client(), api_request, path, schema) + df = row_iterator.to_dataframe( + progress_bar_type="tqdm_notebook", + create_bqstorage_client=False, + ) + + progress_bar_mock.assert_called() + progress_bar_mock().update.assert_called() + progress_bar_mock().close.assert_called_once() + self.assertEqual(len(df), 4) + @mock.patch("google.cloud.bigquery._tqdm_helpers.tqdm", new=None) def test_to_dataframe_no_tqdm_no_progress_bar(self): pytest.importorskip("pandas") + pytest.importorskip("pyarrow") from google.cloud.bigquery.schema import SchemaField schema = [ @@ -3711,7 +3745,7 @@ def test_to_dataframe_w_dtypes_mapper(self): if hasattr(pandas, "Float64Dtype"): self.assertEqual(list(df.miles), [1.77, 6.66, 2.0]) self.assertEqual(df.miles.dtype.name, "Float64") - else: # pragma: NO COVER + else: self.assertEqual(list(df.miles), ["1.77", "6.66", "2.0"]) self.assertEqual(df.miles.dtype.name, "string") diff --git a/packages/google-cloud-bigquery/tests/unit/test_table_pandas.py b/packages/google-cloud-bigquery/tests/unit/test_table_pandas.py index b38568561be9..02a7a6a79795 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_table_pandas.py +++ b/packages/google-cloud-bigquery/tests/unit/test_table_pandas.py @@ -16,11 +16,6 @@ import decimal from unittest import mock -try: - import importlib.metadata as metadata -except ImportError: - import importlib_metadata as metadata - import pytest from google.cloud import bigquery @@ -31,11 +26,6 @@ TEST_PATH = "/v1/project/test-proj/dataset/test-dset/table/test-tbl/data" -if pandas is not None: # pragma: NO COVER - PANDAS_INSTALLED_VERSION = metadata.version("pandas") -else: # pragma: NO COVER - PANDAS_INSTALLED_VERSION = "0.0.0" - @pytest.fixture def class_under_test(): @@ -44,7 +34,10 @@ def class_under_test(): return RowIterator -@pytest.mark.skipif(PANDAS_INSTALLED_VERSION[0:2] not in ["0.", "1."], reason="") +@pytest.mark.skipif( + pandas.__version__.startswith("2."), + reason="pandas 2.0 changes some default dtypes and we haven't update the test to account for those", +) def test_to_dataframe_nullable_scalars(monkeypatch, class_under_test): # See tests/system/test_arrow.py for the actual types we get from the API. arrow_schema = pyarrow.schema( From 245b6e6463265f0b54ed8c8c1a0398fade6d6f67 Mon Sep 17 00:00:00 2001 From: Mend Renovate Date: Mon, 1 Apr 2024 14:07:17 +0200 Subject: [PATCH 1764/2016] chore(deps): update all dependencies (#1875) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * chore(deps): update all dependencies * 🦉 Updates from OwlBot post-processor See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md * Update samples/geography/requirements.txt * Update samples/geography/requirements.txt --------- Co-authored-by: Owl Bot Co-authored-by: Chalmer Lowe --- .../google-cloud-bigquery/samples/geography/requirements.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/packages/google-cloud-bigquery/samples/geography/requirements.txt b/packages/google-cloud-bigquery/samples/geography/requirements.txt index bdaead5b1934..1c7bfa5b37c1 100644 --- a/packages/google-cloud-bigquery/samples/geography/requirements.txt +++ b/packages/google-cloud-bigquery/samples/geography/requirements.txt @@ -32,9 +32,9 @@ pandas==2.2.1; python_version >= '3.9' proto-plus==1.23.0 pyarrow==12.0.1; python_version == '3.7' pyarrow==15.0.2; python_version >= '3.8' -pyasn1==0.5.1; python_version == '3.7' +pyasn1===0.5.1; python_version == '3.7' pyasn1==0.6.0; python_version >= '3.8' -pyasn1-modules==0.3.0; python_version == '3.7' +pyasn1-modules===0.3.0; python_version == '3.7' pyasn1-modules==0.4.0; python_version >= '3.8' pycparser==2.21 pyparsing==3.1.2 From 1383795cdd7ad2dd192cc1bb09e1d8860c658acf Mon Sep 17 00:00:00 2001 From: "release-please[bot]" <55107282+release-please[bot]@users.noreply.github.com> Date: Mon, 1 Apr 2024 07:42:46 -0500 Subject: [PATCH 1765/2016] chore(main): release 3.20.1 (#1880) Co-authored-by: release-please[bot] <55107282+release-please[bot]@users.noreply.github.com> --- packages/google-cloud-bigquery/CHANGELOG.md | 7 +++++++ .../google-cloud-bigquery/google/cloud/bigquery/version.py | 2 +- 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/CHANGELOG.md b/packages/google-cloud-bigquery/CHANGELOG.md index 578df101f167..95af2d213693 100644 --- a/packages/google-cloud-bigquery/CHANGELOG.md +++ b/packages/google-cloud-bigquery/CHANGELOG.md @@ -5,6 +5,13 @@ [1]: https://pypi.org/project/google-cloud-bigquery/#history +## [3.20.1](https://github.com/googleapis/python-bigquery/compare/v3.20.0...v3.20.1) (2024-04-01) + + +### Bug Fixes + +* Make `pyarrow` an optional dependency post-3.20.0 yanked release ([#1879](https://github.com/googleapis/python-bigquery/issues/1879)) ([21714e1](https://github.com/googleapis/python-bigquery/commit/21714e18bad8d8d89ed5642dbdb61d14e97d5f33)) + ## [3.20.0](https://github.com/googleapis/python-bigquery/compare/v3.19.0...v3.20.0) (2024-03-27) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/version.py b/packages/google-cloud-bigquery/google/cloud/bigquery/version.py index 4537b82501c8..55093e39059e 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/version.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/version.py @@ -12,4 +12,4 @@ # See the License for the specific language governing permissions and # limitations under the License. -__version__ = "3.20.0" +__version__ = "3.20.1" From 158b66ab235d2f2a3af9a5dd0133fb615576b6dd Mon Sep 17 00:00:00 2001 From: Chalmer Lowe Date: Wed, 3 Apr 2024 07:09:21 -0400 Subject: [PATCH 1766/2016] fix: creates linting-typing.cfg in presubmit (#1881) * creates linting-typing.cfg in presubmit * attempt to filter out linting and typing tests from presubmit * lints and blackens this commit * revise environmental variables * Update noxfile.py * Update noxfile.py * Update noxfile.py * Update noxfile.py * Update noxfile.py * Update noxfile.py * Update .kokoro/presubmit/linting-typing.cfg * Update .kokoro/presubmit/linting-typing.cfg * Update .kokoro/presubmit/linting-typing.cfg * Update .kokoro/presubmit/presubmit.cfg * Update .kokoro/presubmit/presubmit.cfg --- .../.kokoro/presubmit/linting-typing.cfg | 7 ++++++ .../.kokoro/presubmit/presubmit.cfg | 4 ++++ packages/google-cloud-bigquery/noxfile.py | 24 +++++++++++++++++++ 3 files changed, 35 insertions(+) create mode 100644 packages/google-cloud-bigquery/.kokoro/presubmit/linting-typing.cfg diff --git a/packages/google-cloud-bigquery/.kokoro/presubmit/linting-typing.cfg b/packages/google-cloud-bigquery/.kokoro/presubmit/linting-typing.cfg new file mode 100644 index 000000000000..b1a7406c2a29 --- /dev/null +++ b/packages/google-cloud-bigquery/.kokoro/presubmit/linting-typing.cfg @@ -0,0 +1,7 @@ +# Format: //devtools/kokoro/config/proto/build.proto + +# Only run these nox sessions. +env_vars: { + key: "NOX_SESSION" + value: "lint lint_setup_py blacken mypy mypy_samples pytype" +} diff --git a/packages/google-cloud-bigquery/.kokoro/presubmit/presubmit.cfg b/packages/google-cloud-bigquery/.kokoro/presubmit/presubmit.cfg index 17d071cae8b0..fa39b11184d7 100644 --- a/packages/google-cloud-bigquery/.kokoro/presubmit/presubmit.cfg +++ b/packages/google-cloud-bigquery/.kokoro/presubmit/presubmit.cfg @@ -9,3 +9,7 @@ env_vars: { key: "RUN_SNIPPETS_TESTS" value: "false" } +env_vars: { + key: "RUN_LINTING_TYPING_TESTS" + value: "false" +} diff --git a/packages/google-cloud-bigquery/noxfile.py b/packages/google-cloud-bigquery/noxfile.py index 3adb4ba702a7..034bb843aecb 100644 --- a/packages/google-cloud-bigquery/noxfile.py +++ b/packages/google-cloud-bigquery/noxfile.py @@ -132,6 +132,10 @@ def unit_noextras(session): def mypy(session): """Run type checks with mypy.""" + # Check the value of `RUN_LINTING_TYPING_TESTS` env var. It defaults to true. + if os.environ.get("RUN_LINTING_TYPING_TESTS", "true") == "false": + session.skip("RUN_LINTING_TYPING_TESTS is set to false, skipping") + session.install("-e", ".[all]") session.install(MYPY_VERSION) @@ -153,6 +157,10 @@ def pytype(session): # recent version avoids the error until a possibly better fix is found. # https://github.com/googleapis/python-bigquery/issues/655 + # Check the value of `RUN_LINTING_TYPING_TESTS` env var. It defaults to true. + if os.environ.get("RUN_LINTING_TYPING_TESTS", "true") == "false": + session.skip("RUN_LINTING_TYPING_TESTS is set to false, skipping") + session.install("attrs==20.3.0") session.install("-e", ".[all]") session.install(PYTYPE_VERSION) @@ -213,6 +221,10 @@ def system(session): def mypy_samples(session): """Run type checks with mypy.""" + # Check the value of `RUN_LINTING_TYPING_TESTS` env var. It defaults to true. + if os.environ.get("RUN_LINTING_TYPING_TESTS", "true") == "false": + session.skip("RUN_LINTING_TYPING_TESTS is set to false, skipping") + session.install("pytest") for requirements_path in CURRENT_DIRECTORY.glob("samples/*/requirements.txt"): session.install("-r", str(requirements_path)) @@ -394,6 +406,10 @@ def lint(session): serious code quality issues. """ + # Check the value of `RUN_LINTING_TYPING_TESTS` env var. It defaults to true. + if os.environ.get("RUN_LINTING_TYPING_TESTS", "true") == "false": + session.skip("RUN_LINTING_TYPING_TESTS is set to false, skipping") + session.install("flake8", BLACK_VERSION) session.install("-e", ".") session.run("flake8", os.path.join("google", "cloud", "bigquery")) @@ -408,6 +424,10 @@ def lint(session): def lint_setup_py(session): """Verify that setup.py is valid (including RST check).""" + # Check the value of `RUN_LINTING_TYPING_TESTS` env var. It defaults to true. + if os.environ.get("RUN_LINTING_TYPING_TESTS", "true") == "false": + session.skip("RUN_LINTING_TYPING_TESTS is set to false, skipping") + session.install("docutils", "Pygments") session.run("python", "setup.py", "check", "--restructuredtext", "--strict") @@ -418,6 +438,10 @@ def blacken(session): Format code to uniform standard. """ + # Check the value of `RUN_LINTING_TYPING_TESTS` env var. It defaults to true. + if os.environ.get("RUN_LINTING_TYPING_TESTS", "true") == "false": + session.skip("RUN_LINTING_TYPING_TESTS is set to false, skipping") + session.install(BLACK_VERSION) session.run("black", *BLACK_PATHS) From 59594712821ddabb3ab3540cb76a086058afb981 Mon Sep 17 00:00:00 2001 From: Mend Renovate Date: Mon, 8 Apr 2024 21:16:14 +0200 Subject: [PATCH 1767/2016] chore(deps): update all dependencies (#1882) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * chore(deps): update all dependencies * 🦉 Updates from OwlBot post-processor See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md * pin pycparser==2.21 for python 3.7 --------- Co-authored-by: Owl Bot Co-authored-by: Chalmer Lowe Co-authored-by: Lingqing Gan --- .../google-cloud-bigquery/samples/geography/requirements.txt | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/samples/geography/requirements.txt b/packages/google-cloud-bigquery/samples/geography/requirements.txt index 1c7bfa5b37c1..76b1a7b6ba21 100644 --- a/packages/google-cloud-bigquery/samples/geography/requirements.txt +++ b/packages/google-cloud-bigquery/samples/geography/requirements.txt @@ -36,7 +36,8 @@ pyasn1===0.5.1; python_version == '3.7' pyasn1==0.6.0; python_version >= '3.8' pyasn1-modules===0.3.0; python_version == '3.7' pyasn1-modules==0.4.0; python_version >= '3.8' -pycparser==2.21 +pycparser==2.21; python_version == '3.7' +pycparser==2.22; python_version >= '3.8' pyparsing==3.1.2 python-dateutil==2.9.0.post0 pytz==2024.1 From 410ec1dc7978beb8e4fb177c173fc4869d49572f Mon Sep 17 00:00:00 2001 From: Lingqing Gan Date: Wed, 10 Apr 2024 13:58:29 -0700 Subject: [PATCH 1768/2016] feat: support RANGE in queries Part 1: JSON (#1884) * feat: support range in queries as dict * fix sys tests * lint * fix typo --- .../google/cloud/bigquery/_helpers.py | 41 +++++++ .../tests/system/helpers.py | 5 + .../tests/system/test_query.py | 6 +- .../tests/unit/test__helpers.py | 105 +++++++++++++++++- 4 files changed, 153 insertions(+), 4 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py b/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py index 7198b60c2309..0572867d756f 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py @@ -309,6 +309,46 @@ def _json_from_json(value, field): return None +def _range_element_from_json(value, field): + """Coerce 'value' to a range element value, if set or not nullable.""" + if value == "UNBOUNDED": + return None + elif field.element_type == "DATE": + return _date_from_json(value, None) + elif field.element_type == "DATETIME": + return _datetime_from_json(value, None) + elif field.element_type == "TIMESTAMP": + return _timestamp_from_json(value, None) + else: + raise ValueError(f"Unsupported range field type: {value}") + + +def _range_from_json(value, field): + """Coerce 'value' to a range, if set or not nullable. + + Args: + value (str): The literal representation of the range. + field (google.cloud.bigquery.schema.SchemaField): + The field corresponding to the value. + + Returns: + Optional[dict]: + The parsed range object from ``value`` if the ``field`` is not + null (otherwise it is :data:`None`). + """ + range_literal = re.compile(r"\[.*, .*\)") + if _not_null(value, field): + if range_literal.match(value): + start, end = value[1:-1].split(", ") + start = _range_element_from_json(start, field.range_element_type) + end = _range_element_from_json(end, field.range_element_type) + return {"start": start, "end": end} + else: + raise ValueError(f"Unknown range format: {value}") + else: + return None + + # Parse BigQuery API response JSON into a Python representation. _CELLDATA_FROM_JSON = { "INTEGER": _int_from_json, @@ -329,6 +369,7 @@ def _json_from_json(value, field): "TIME": _time_from_json, "RECORD": _record_from_json, "JSON": _json_from_json, + "RANGE": _range_from_json, } _QUERY_PARAMS_FROM_JSON = dict(_CELLDATA_FROM_JSON) diff --git a/packages/google-cloud-bigquery/tests/system/helpers.py b/packages/google-cloud-bigquery/tests/system/helpers.py index 721f55040349..7fd344eeb071 100644 --- a/packages/google-cloud-bigquery/tests/system/helpers.py +++ b/packages/google-cloud-bigquery/tests/system/helpers.py @@ -25,6 +25,7 @@ _naive = datetime.datetime(2016, 12, 5, 12, 41, 9) _naive_microseconds = datetime.datetime(2016, 12, 5, 12, 41, 9, 250000) _stamp = "%s %s" % (_naive.date().isoformat(), _naive.time().isoformat()) +_date = _naive.date().isoformat() _stamp_microseconds = _stamp + ".250000" _zoned = _naive.replace(tzinfo=UTC) _zoned_microseconds = _naive_microseconds.replace(tzinfo=UTC) @@ -78,6 +79,10 @@ ), ("SELECT ARRAY(SELECT STRUCT([1, 2]))", [{"_field_1": [1, 2]}]), ("SELECT ST_GeogPoint(1, 2)", "POINT(1 2)"), + ( + "SELECT RANGE '[UNBOUNDED, %s)'" % _date, + {"start": None, "end": _naive.date()}, + ), ] diff --git a/packages/google-cloud-bigquery/tests/system/test_query.py b/packages/google-cloud-bigquery/tests/system/test_query.py index 0494272d9f9f..d94a117e362c 100644 --- a/packages/google-cloud-bigquery/tests/system/test_query.py +++ b/packages/google-cloud-bigquery/tests/system/test_query.py @@ -425,7 +425,7 @@ def test_query_statistics(bigquery_client, query_api_method): ), ( "SELECT @range_date", - "[2016-12-05, UNBOUNDED)", + {"end": None, "start": datetime.date(2016, 12, 5)}, [ RangeQueryParameter( name="range_date", @@ -436,7 +436,7 @@ def test_query_statistics(bigquery_client, query_api_method): ), ( "SELECT @range_datetime", - "[2016-12-05T00:00:00, UNBOUNDED)", + {"end": None, "start": datetime.datetime(2016, 12, 5, 0, 0)}, [ RangeQueryParameter( name="range_datetime", @@ -447,7 +447,7 @@ def test_query_statistics(bigquery_client, query_api_method): ), ( "SELECT @range_unbounded", - "[UNBOUNDED, UNBOUNDED)", + {"end": None, "start": None}, [ RangeQueryParameter( name="range_unbounded", diff --git a/packages/google-cloud-bigquery/tests/unit/test__helpers.py b/packages/google-cloud-bigquery/tests/unit/test__helpers.py index 320c57737250..a50625e2ab39 100644 --- a/packages/google-cloud-bigquery/tests/unit/test__helpers.py +++ b/packages/google-cloud-bigquery/tests/unit/test__helpers.py @@ -452,6 +452,99 @@ def test_w_bogus_string_value(self): self._call_fut("12:12:27.123", object()) +class Test_range_from_json(unittest.TestCase): + def _call_fut(self, value, field): + from google.cloud.bigquery._helpers import _range_from_json + + return _range_from_json(value, field) + + def test_w_none_nullable(self): + self.assertIsNone(self._call_fut(None, _Field("NULLABLE"))) + + def test_w_none_required(self): + with self.assertRaises(TypeError): + self._call_fut(None, _Field("REQUIRED")) + + def test_w_wrong_format(self): + range_field = _Field( + "NULLABLE", + field_type="RANGE", + range_element_type=_Field("NULLABLE", element_type="DATE"), + ) + with self.assertRaises(ValueError): + self._call_fut("[2009-06-172019-06-17)", range_field) + + def test_w_wrong_element_type(self): + range_field = _Field( + "NULLABLE", + field_type="RANGE", + range_element_type=_Field("NULLABLE", element_type="TIME"), + ) + with self.assertRaises(ValueError): + self._call_fut("[15:31:38, 15:50:38)", range_field) + + def test_w_unbounded_value(self): + range_field = _Field( + "NULLABLE", + field_type="RANGE", + range_element_type=_Field("NULLABLE", element_type="DATE"), + ) + coerced = self._call_fut("[UNBOUNDED, 2019-06-17)", range_field) + self.assertEqual( + coerced, + {"start": None, "end": datetime.date(2019, 6, 17)}, + ) + + def test_w_date_value(self): + range_field = _Field( + "NULLABLE", + field_type="RANGE", + range_element_type=_Field("NULLABLE", element_type="DATE"), + ) + coerced = self._call_fut("[2009-06-17, 2019-06-17)", range_field) + self.assertEqual( + coerced, + { + "start": datetime.date(2009, 6, 17), + "end": datetime.date(2019, 6, 17), + }, + ) + + def test_w_datetime_value(self): + range_field = _Field( + "NULLABLE", + field_type="RANGE", + range_element_type=_Field("NULLABLE", element_type="DATETIME"), + ) + coerced = self._call_fut( + "[2009-06-17T13:45:30, 2019-06-17T13:45:30)", range_field + ) + self.assertEqual( + coerced, + { + "start": datetime.datetime(2009, 6, 17, 13, 45, 30), + "end": datetime.datetime(2019, 6, 17, 13, 45, 30), + }, + ) + + def test_w_timestamp_value(self): + from google.cloud._helpers import _EPOCH + + range_field = _Field( + "NULLABLE", + field_type="RANGE", + range_element_type=_Field("NULLABLE", element_type="TIMESTAMP"), + ) + coerced = self._call_fut("[1234567, 1234789)", range_field) + self.assertEqual( + coerced, + { + "start": _EPOCH + datetime.timedelta(seconds=1, microseconds=234567), + "end": _EPOCH + datetime.timedelta(seconds=1, microseconds=234789), + }, + ) + + class Test_record_from_json(unittest.TestCase): def _call_fut(self, value, field): from google.cloud.bigquery._helpers import _record_from_json @@ -1323,11 +1416,21 @@ def test_w_str(self): class _Field(object): - def __init__(self, mode, name="unknown", field_type="UNKNOWN", fields=()): + def __init__( + self, + mode, + name="unknown", + field_type="UNKNOWN", + fields=(), + range_element_type=None, + element_type=None, + ): self.mode = mode self.name = name self.field_type = field_type self.fields = fields + self.range_element_type = range_element_type + self.element_type = element_type def _field_isinstance_patcher(): From 3390591b89a0e51e83f0098f8080e606415c7f67 Mon Sep 17 00:00:00 2001 From: Chalmer Lowe Date: Thu, 11 Apr 2024 13:49:15 -0400 Subject: [PATCH 1769/2016] feat: adds billing to opentel (#1889) --- .../google/cloud/bigquery/opentelemetry_tracing.py | 8 ++++++++ .../tests/unit/test_opentelemetry_tracing.py | 6 ++++++ 2 files changed, 14 insertions(+) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/opentelemetry_tracing.py b/packages/google-cloud-bigquery/google/cloud/bigquery/opentelemetry_tracing.py index e2a05e4d0f82..b5f6bf9912ed 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/opentelemetry_tracing.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/opentelemetry_tracing.py @@ -153,4 +153,12 @@ def _set_job_attributes(job_ref): if job_ref.num_child_jobs is not None: job_attributes["num_child_jobs"] = job_ref.num_child_jobs + total_bytes_billed = getattr(job_ref, "total_bytes_billed", None) + if total_bytes_billed is not None: + job_attributes["total_bytes_billed"] = total_bytes_billed + + total_bytes_processed = getattr(job_ref, "total_bytes_processed", None) + if total_bytes_processed is not None: + job_attributes["total_bytes_processed"] = total_bytes_processed + return job_attributes diff --git a/packages/google-cloud-bigquery/tests/unit/test_opentelemetry_tracing.py b/packages/google-cloud-bigquery/tests/unit/test_opentelemetry_tracing.py index 579d7b1b7fce..546cc02bd75e 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_opentelemetry_tracing.py +++ b/packages/google-cloud-bigquery/tests/unit/test_opentelemetry_tracing.py @@ -142,6 +142,8 @@ def test_default_job_attributes(setup): "timeEnded": ended_time.isoformat(), "hasErrors": True, "state": "some_job_state", + "total_bytes_billed": 42, + "total_bytes_processed": 13, } with mock.patch("google.cloud.bigquery.job._AsyncJob") as test_job_ref: test_job_ref.job_id = "test_job_id" @@ -154,6 +156,8 @@ def test_default_job_attributes(setup): test_job_ref.ended = ended_time test_job_ref.error_result = error_result test_job_ref.state = "some_job_state" + test_job_ref.total_bytes_billed = 42 + test_job_ref.total_bytes_processed = 13 with opentelemetry_tracing.create_span( TEST_SPAN_NAME, attributes=TEST_SPAN_ATTRIBUTES, job_ref=test_job_ref @@ -180,6 +184,8 @@ def test_optional_job_attributes(setup): test_job_ref.state = "some_job_state" test_job_ref.num_child_jobs = None test_job_ref.parent_job_id = None + test_job_ref.total_bytes_billed = None + test_job_ref.total_bytes_processed = None with opentelemetry_tracing.create_span( TEST_SPAN_NAME, attributes=TEST_SPAN_ATTRIBUTES, job_ref=test_job_ref From 73553ac6ec70bd5b3ed7ae1a47608582e25e8969 Mon Sep 17 00:00:00 2001 From: Mend Renovate Date: Thu, 11 Apr 2024 21:17:23 +0200 Subject: [PATCH 1770/2016] chore(deps): update all dependencies (#1891) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * chore(deps): update all dependencies * 🦉 Updates from OwlBot post-processor See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md * revert pinned requirement version and add triple equal "===" prevents dependabot from attempting to upgrade it in the future --------- Co-authored-by: Owl Bot Co-authored-by: Lingqing Gan --- .../samples/desktopapp/requirements.txt | 2 +- .../samples/geography/requirements.txt | 6 +++--- .../google-cloud-bigquery/samples/magics/requirements.txt | 2 +- .../samples/notebooks/requirements.txt | 4 ++-- .../google-cloud-bigquery/samples/snippets/requirements.txt | 2 +- 5 files changed, 8 insertions(+), 8 deletions(-) diff --git a/packages/google-cloud-bigquery/samples/desktopapp/requirements.txt b/packages/google-cloud-bigquery/samples/desktopapp/requirements.txt index 8561934dcca5..fee6806b7283 100644 --- a/packages/google-cloud-bigquery/samples/desktopapp/requirements.txt +++ b/packages/google-cloud-bigquery/samples/desktopapp/requirements.txt @@ -1,2 +1,2 @@ -google-cloud-bigquery==3.19.0 +google-cloud-bigquery==3.20.1 google-auth-oauthlib==1.2.0 diff --git a/packages/google-cloud-bigquery/samples/geography/requirements.txt b/packages/google-cloud-bigquery/samples/geography/requirements.txt index 76b1a7b6ba21..e11fa09cf1e1 100644 --- a/packages/google-cloud-bigquery/samples/geography/requirements.txt +++ b/packages/google-cloud-bigquery/samples/geography/requirements.txt @@ -15,7 +15,7 @@ geopandas===0.13.2; python_version == '3.8' geopandas==0.14.3; python_version >= '3.9' google-api-core==2.18.0 google-auth==2.29.0 -google-cloud-bigquery==3.19.0 +google-cloud-bigquery==3.20.1 google-cloud-bigquery-storage==2.24.0 google-cloud-core==2.4.1 google-crc32c==1.5.0 @@ -36,7 +36,7 @@ pyasn1===0.5.1; python_version == '3.7' pyasn1==0.6.0; python_version >= '3.8' pyasn1-modules===0.3.0; python_version == '3.7' pyasn1-modules==0.4.0; python_version >= '3.8' -pycparser==2.21; python_version == '3.7' +pycparser===2.21; python_version == '3.7' pycparser==2.22; python_version >= '3.8' pyparsing==3.1.2 python-dateutil==2.9.0.post0 @@ -47,7 +47,7 @@ rsa==4.9 Shapely==2.0.3 six==1.16.0 typing-extensions===4.7.1; python_version == '3.7' -typing-extensions==4.10.0; python_version >= '3.8' +typing-extensions==4.11.0; python_version >= '3.8' typing-inspect==0.9.0 urllib3===1.26.18; python_version == '3.7' urllib3==2.2.1; python_version >= '3.8' diff --git a/packages/google-cloud-bigquery/samples/magics/requirements.txt b/packages/google-cloud-bigquery/samples/magics/requirements.txt index 9179db067f66..05fd1907bed3 100644 --- a/packages/google-cloud-bigquery/samples/magics/requirements.txt +++ b/packages/google-cloud-bigquery/samples/magics/requirements.txt @@ -1,5 +1,5 @@ db-dtypes==1.2.0 -google.cloud.bigquery==3.19.0 +google.cloud.bigquery==3.20.1 google-cloud-bigquery-storage==2.24.0 ipython===7.31.1; python_version == '3.7' ipython===8.0.1; python_version == '3.8' diff --git a/packages/google-cloud-bigquery/samples/notebooks/requirements.txt b/packages/google-cloud-bigquery/samples/notebooks/requirements.txt index 8f2e9362045a..40fba4b878c2 100644 --- a/packages/google-cloud-bigquery/samples/notebooks/requirements.txt +++ b/packages/google-cloud-bigquery/samples/notebooks/requirements.txt @@ -1,12 +1,12 @@ db-dtypes==1.2.0 -google-cloud-bigquery==3.19.0 +google-cloud-bigquery==3.20.1 google-cloud-bigquery-storage==2.24.0 ipython===7.31.1; python_version == '3.7' ipython===8.0.1; python_version == '3.8' ipython===8.18.1; python_version >= '3.9' matplotlib===3.5.3; python_version == '3.7' matplotlib===3.7.4; python_version == '3.8' -matplotlib==3.8.3; python_version >= '3.9' +matplotlib==3.8.4; python_version >= '3.9' pandas===1.3.5; python_version == '3.7' pandas===2.0.3; python_version == '3.8' pandas==2.2.1; python_version >= '3.9' diff --git a/packages/google-cloud-bigquery/samples/snippets/requirements.txt b/packages/google-cloud-bigquery/samples/snippets/requirements.txt index af9436c51e15..95f915364696 100644 --- a/packages/google-cloud-bigquery/samples/snippets/requirements.txt +++ b/packages/google-cloud-bigquery/samples/snippets/requirements.txt @@ -1,2 +1,2 @@ # samples/snippets should be runnable with no "extras" -google-cloud-bigquery==3.19.0 +google-cloud-bigquery==3.20.1 From 5201461b490083fc7ed40f773b29b71eb1b06d48 Mon Sep 17 00:00:00 2001 From: Chalmer Lowe Date: Thu, 11 Apr 2024 15:33:30 -0400 Subject: [PATCH 1771/2016] feat: Add compression option ZSTD. (#1890) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Add ZSTD to compression types * feat: adds tests re Compression types * revise datatype from Enum to object * adds license text and docstring * change object back to enum datatype * updates compression object comparison * updates Compression class * jsonify and sort the input and output for testing * Update tests/unit/job/test_extract.py * moved json import statement * removed enums test and file --------- Co-authored-by: Ethan Steinberg Co-authored-by: Tim Sweña (Swast) --- .../google/cloud/bigquery/enums.py | 5 ++++- .../tests/unit/job/test_extract.py | 12 +++++++++--- 2 files changed, 13 insertions(+), 4 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/enums.py b/packages/google-cloud-bigquery/google/cloud/bigquery/enums.py index d75037ad1e0b..1abe28381c40 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/enums.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/enums.py @@ -22,7 +22,7 @@ class AutoRowIDs(enum.Enum): GENERATE_UUID = enum.auto() -class Compression(object): +class Compression(str, enum.Enum): """The compression type to use for exported files. The default value is :attr:`NONE`. @@ -39,6 +39,9 @@ class Compression(object): SNAPPY = "SNAPPY" """Specifies SNAPPY format.""" + ZSTD = "ZSTD" + """Specifies ZSTD format.""" + NONE = "NONE" """Specifies no compression.""" diff --git a/packages/google-cloud-bigquery/tests/unit/job/test_extract.py b/packages/google-cloud-bigquery/tests/unit/job/test_extract.py index 76ee72f281da..ee0d67d68dc3 100644 --- a/packages/google-cloud-bigquery/tests/unit/job/test_extract.py +++ b/packages/google-cloud-bigquery/tests/unit/job/test_extract.py @@ -12,6 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. +import json from unittest import mock from ..helpers import make_connection @@ -45,9 +46,8 @@ def test_to_api_repr(self): config.print_header = False config._properties["extract"]["someNewField"] = "some-value" config.use_avro_logical_types = True - resource = config.to_api_repr() - self.assertEqual( - resource, + resource = json.dumps(config.to_api_repr(), sort_keys=True) + expected = json.dumps( { "extract": { "compression": "SNAPPY", @@ -58,6 +58,12 @@ def test_to_api_repr(self): "useAvroLogicalTypes": True, } }, + sort_keys=True, + ) + + self.assertEqual( + resource, + expected, ) def test_from_api_repr(self): From 4d61af6cd0fcc81d5b18dbc17264fa0a561b682c Mon Sep 17 00:00:00 2001 From: Mend Renovate Date: Fri, 12 Apr 2024 17:18:51 +0200 Subject: [PATCH 1772/2016] chore(deps): update dependency idna to v3.7 [security] (#1896) --- .../google-cloud-bigquery/samples/geography/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/samples/geography/requirements.txt b/packages/google-cloud-bigquery/samples/geography/requirements.txt index e11fa09cf1e1..e4b63cdaa589 100644 --- a/packages/google-cloud-bigquery/samples/geography/requirements.txt +++ b/packages/google-cloud-bigquery/samples/geography/requirements.txt @@ -22,7 +22,7 @@ google-crc32c==1.5.0 google-resumable-media==2.7.0 googleapis-common-protos==1.63.0 grpcio==1.62.1 -idna==3.6 +idna==3.7 munch==4.0.0 mypy-extensions==1.0.0 packaging==24.0 From 5f4e0956185f23e85f61fde62ce2307422cea13e Mon Sep 17 00:00:00 2001 From: Mend Renovate Date: Fri, 12 Apr 2024 18:49:08 +0200 Subject: [PATCH 1773/2016] chore(deps): update all dependencies (#1893) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * chore(deps): update all dependencies * 🦉 Updates from OwlBot post-processor See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md --------- Co-authored-by: Owl Bot Co-authored-by: Chalmer Lowe --- .../google-cloud-bigquery/samples/geography/requirements.txt | 2 +- packages/google-cloud-bigquery/samples/magics/requirements.txt | 2 +- .../google-cloud-bigquery/samples/notebooks/requirements.txt | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/packages/google-cloud-bigquery/samples/geography/requirements.txt b/packages/google-cloud-bigquery/samples/geography/requirements.txt index e4b63cdaa589..b3d9bc841fb5 100644 --- a/packages/google-cloud-bigquery/samples/geography/requirements.txt +++ b/packages/google-cloud-bigquery/samples/geography/requirements.txt @@ -28,7 +28,7 @@ mypy-extensions==1.0.0 packaging==24.0 pandas===1.3.5; python_version == '3.7' pandas===2.0.3; python_version == '3.8' -pandas==2.2.1; python_version >= '3.9' +pandas==2.2.2; python_version >= '3.9' proto-plus==1.23.0 pyarrow==12.0.1; python_version == '3.7' pyarrow==15.0.2; python_version >= '3.8' diff --git a/packages/google-cloud-bigquery/samples/magics/requirements.txt b/packages/google-cloud-bigquery/samples/magics/requirements.txt index 05fd1907bed3..61471a348608 100644 --- a/packages/google-cloud-bigquery/samples/magics/requirements.txt +++ b/packages/google-cloud-bigquery/samples/magics/requirements.txt @@ -6,4 +6,4 @@ ipython===8.0.1; python_version == '3.8' ipython===8.18.1; python_version >= '3.9' pandas===1.3.5; python_version == '3.7' pandas===2.0.3; python_version == '3.8' -pandas==2.2.1; python_version >= '3.9' +pandas==2.2.2; python_version >= '3.9' diff --git a/packages/google-cloud-bigquery/samples/notebooks/requirements.txt b/packages/google-cloud-bigquery/samples/notebooks/requirements.txt index 40fba4b878c2..3960f47b9803 100644 --- a/packages/google-cloud-bigquery/samples/notebooks/requirements.txt +++ b/packages/google-cloud-bigquery/samples/notebooks/requirements.txt @@ -9,4 +9,4 @@ matplotlib===3.7.4; python_version == '3.8' matplotlib==3.8.4; python_version >= '3.9' pandas===1.3.5; python_version == '3.7' pandas===2.0.3; python_version == '3.8' -pandas==2.2.1; python_version >= '3.9' +pandas==2.2.2; python_version >= '3.9' From 6a682760adf28663ccc75afc710b08001661fa83 Mon Sep 17 00:00:00 2001 From: "gcf-owl-bot[bot]" <78513119+gcf-owl-bot[bot]@users.noreply.github.com> Date: Fri, 12 Apr 2024 13:41:37 -0400 Subject: [PATCH 1774/2016] chore(python): bump idna from 3.4 to 3.7 in .kokoro (#1897) * chore(python): bump idna from 3.4 to 3.7 in .kokoro Source-Link: https://github.com/googleapis/synthtool/commit/d50980e704793a2d3310bfb3664f3a82f24b5796 Post-Processor: gcr.io/cloud-devrel-public-resources/owlbot-python:latest@sha256:5a4c19d17e597b92d786e569be101e636c9c2817731f80a5adec56b2aa8fe070 * Apply changes from googleapis/synthtool#1950 --------- Co-authored-by: Owl Bot Co-authored-by: Chalmer Lowe Co-authored-by: Anthonios Partheniou --- .../.github/.OwlBot.lock.yaml | 4 ++-- .../.github/auto-label.yaml | 5 +++++ .../.github/blunderbuss.yml | 17 ++++++++++++++ .../.kokoro/requirements.txt | 6 ++--- packages/google-cloud-bigquery/docs/index.rst | 5 +++++ .../docs/summary_overview.md | 22 +++++++++++++++++++ 6 files changed, 54 insertions(+), 5 deletions(-) create mode 100644 packages/google-cloud-bigquery/.github/blunderbuss.yml create mode 100644 packages/google-cloud-bigquery/docs/summary_overview.md diff --git a/packages/google-cloud-bigquery/.github/.OwlBot.lock.yaml b/packages/google-cloud-bigquery/.github/.OwlBot.lock.yaml index dc9c56e9dcab..81f87c56917d 100644 --- a/packages/google-cloud-bigquery/.github/.OwlBot.lock.yaml +++ b/packages/google-cloud-bigquery/.github/.OwlBot.lock.yaml @@ -13,5 +13,5 @@ # limitations under the License. docker: image: gcr.io/cloud-devrel-public-resources/owlbot-python:latest - digest: sha256:a8a80fc6456e433df53fc2a0d72ca0345db0ddefb409f1b75b118dfd1babd952 -# created: 2024-03-15T16:25:47.905264637Z \ No newline at end of file + digest: sha256:5a4c19d17e597b92d786e569be101e636c9c2817731f80a5adec56b2aa8fe070 +# created: 2024-04-12T11:35:58.922854369Z diff --git a/packages/google-cloud-bigquery/.github/auto-label.yaml b/packages/google-cloud-bigquery/.github/auto-label.yaml index b2016d119b40..8b37ee89711f 100644 --- a/packages/google-cloud-bigquery/.github/auto-label.yaml +++ b/packages/google-cloud-bigquery/.github/auto-label.yaml @@ -13,3 +13,8 @@ # limitations under the License. requestsize: enabled: true + +path: + pullrequest: true + paths: + samples: "samples" diff --git a/packages/google-cloud-bigquery/.github/blunderbuss.yml b/packages/google-cloud-bigquery/.github/blunderbuss.yml new file mode 100644 index 000000000000..5b7383dc7665 --- /dev/null +++ b/packages/google-cloud-bigquery/.github/blunderbuss.yml @@ -0,0 +1,17 @@ +# Blunderbuss config +# +# This file controls who is assigned for pull requests and issues. +# Note: This file is autogenerated. To make changes to the assignee +# team, please update `codeowner_team` in `.repo-metadata.json`. +assign_issues: + - googleapis/api-bigquery + +assign_issues_by: + - labels: + - "samples" + to: + - googleapis/python-samples-reviewers + - googleapis/api-bigquery + +assign_prs: + - googleapis/api-bigquery diff --git a/packages/google-cloud-bigquery/.kokoro/requirements.txt b/packages/google-cloud-bigquery/.kokoro/requirements.txt index dd61f5f32018..51f92b8e12f1 100644 --- a/packages/google-cloud-bigquery/.kokoro/requirements.txt +++ b/packages/google-cloud-bigquery/.kokoro/requirements.txt @@ -252,9 +252,9 @@ googleapis-common-protos==1.61.0 \ --hash=sha256:22f1915393bb3245343f6efe87f6fe868532efc12aa26b391b15132e1279f1c0 \ --hash=sha256:8a64866a97f6304a7179873a465d6eee97b7a24ec6cfd78e0f575e96b821240b # via google-api-core -idna==3.4 \ - --hash=sha256:814f528e8dead7d329833b91c5faa87d60bf71824cd12a7530b5526063d02cb4 \ - --hash=sha256:90b77e79eaa3eba6de819a0c442c0b4ceefc341a7a2ab77d7562bf49f425c5c2 +idna==3.7 \ + --hash=sha256:028ff3aadf0609c1fd278d8ea3089299412a7a8b9bd005dd08b9f8285bcb5cfc \ + --hash=sha256:82fee1fc78add43492d3a1898bfa6d8a904cc97d8427f683ed8e798d07761aa0 # via requests importlib-metadata==6.8.0 \ --hash=sha256:3ebb78df84a805d7698245025b975d9d67053cd94c79245ba4b3eb694abe68bb \ diff --git a/packages/google-cloud-bigquery/docs/index.rst b/packages/google-cloud-bigquery/docs/index.rst index 500c67a7fb4c..6d6ed63f6cd4 100644 --- a/packages/google-cloud-bigquery/docs/index.rst +++ b/packages/google-cloud-bigquery/docs/index.rst @@ -48,3 +48,8 @@ For a list of all ``google-cloud-bigquery`` releases: :maxdepth: 2 changelog + +.. toctree:: + :hidden: + + summary_overview.md diff --git a/packages/google-cloud-bigquery/docs/summary_overview.md b/packages/google-cloud-bigquery/docs/summary_overview.md new file mode 100644 index 000000000000..6dd228e13a92 --- /dev/null +++ b/packages/google-cloud-bigquery/docs/summary_overview.md @@ -0,0 +1,22 @@ +[ +This is a templated file. Adding content to this file may result in it being +reverted. Instead, if you want to place additional content, create an +"overview_content.md" file in `docs/` directory. The Sphinx tool will +pick up on the content and merge the content. +]: # + +# Google Cloud BigQuery API + +Overview of the APIs available for Google Cloud BigQuery API. + +## All entries + +Classes, methods and properties & attributes for +Google Cloud BigQuery API. + +[classes](https://cloud.google.com/python/docs/reference/bigquery/latest/summary_class.html) + +[methods](https://cloud.google.com/python/docs/reference/bigquery/latest/summary_method.html) + +[properties and +attributes](https://cloud.google.com/python/docs/reference/bigquery/latest/summary_property.html) From abb033fa14b8c5a4385d898d50d50adab266f819 Mon Sep 17 00:00:00 2001 From: Toran Sahu Date: Fri, 12 Apr 2024 23:40:17 +0530 Subject: [PATCH 1775/2016] =?UTF-8?q?fix:=20Remove=20duplicate=20key=20tim?= =?UTF-8?q?e=5Fpartitioning=20from=20Table.=5FPROPERTY=5FTO=5FA=E2=80=A6?= =?UTF-8?q?=20(#1898)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit …PI_FIELD Thank you for opening a Pull Request! Before submitting your PR, there are a few things you can do to make sure it goes smoothly: - [ ] Make sure to open an issue as a [bug/issue](https://togithub.com/googleapis/python-bigquery/issues/new/choose) before writing your code! That way we can discuss the change, evaluate designs, and agree on the general idea - [ ] Ensure the tests and linter pass - [ ] Code coverage does not decrease (if any source code was changed) - [ ] Appropriate docs were updated (if necessary) Fixes # 🦕 --- packages/google-cloud-bigquery/google/cloud/bigquery/table.py | 1 - 1 file changed, 1 deletion(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py index c002822fed7f..73e755e9ea61 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py @@ -385,7 +385,6 @@ class Table(_TableBase): "clone_definition": "cloneDefinition", "streaming_buffer": "streamingBuffer", "self_link": "selfLink", - "time_partitioning": "timePartitioning", "type": "type", "view_use_legacy_sql": "view", "view_query": "view", From a2c7652df3710d796c0a5b2ea6ef40fb4d7dc2f8 Mon Sep 17 00:00:00 2001 From: kserruys Date: Fri, 12 Apr 2024 20:42:29 +0200 Subject: [PATCH 1776/2016] fix: add types to DatasetReference constructor (#1601) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * fix: add types to DatasetReference constructor * fix: add types to DatasetReference constructor * fix: DatasetReference.from_string test coverage --------- Co-authored-by: Karel Serruys Co-authored-by: Chalmer Lowe Co-authored-by: meredithslota Co-authored-by: Tim Sweña (Swast) --- .../google/cloud/bigquery/dataset.py | 24 ++++++++++--------- 1 file changed, 13 insertions(+), 11 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/dataset.py b/packages/google-cloud-bigquery/google/cloud/bigquery/dataset.py index c313045ce3ad..c49a52faf217 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/dataset.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/dataset.py @@ -92,7 +92,7 @@ class DatasetReference(object): ValueError: If either argument is not of type ``str``. """ - def __init__(self, project, dataset_id): + def __init__(self, project: str, dataset_id: str): if not isinstance(project, str): raise ValueError("Pass a string for project") if not isinstance(dataset_id, str): @@ -166,22 +166,24 @@ def from_string( standard SQL format. """ output_dataset_id = dataset_id - output_project_id = default_project parts = _helpers._split_id(dataset_id) - if len(parts) == 1 and not default_project: - raise ValueError( - "When default_project is not set, dataset_id must be a " - "fully-qualified dataset ID in standard SQL format, " - 'e.g., "project.dataset_id" got {}'.format(dataset_id) - ) + if len(parts) == 1: + if default_project is not None: + output_project_id = default_project + else: + raise ValueError( + "When default_project is not set, dataset_id must be a " + "fully-qualified dataset ID in standard SQL format, " + 'e.g., "project.dataset_id" got {}'.format(dataset_id) + ) elif len(parts) == 2: output_project_id, output_dataset_id = parts - elif len(parts) > 2: + else: raise ValueError( "Too many parts in dataset_id. Expected a fully-qualified " - "dataset ID in standard SQL format. e.g. " - '"project.dataset_id", got {}'.format(dataset_id) + "dataset ID in standard SQL format, " + 'e.g. "project.dataset_id", got {}'.format(dataset_id) ) return cls(output_project_id, output_dataset_id) From 5c2afd981c71e2abc71946ae04ff44a2929caff4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tim=20Swe=C3=B1a=20=28Swast=29?= Date: Thu, 18 Apr 2024 09:31:40 -0500 Subject: [PATCH 1777/2016] fix: avoid unnecessary API call in QueryJob.result() when job is already finished (#1900) fix: retry query job after ambiguous failures Co-authored-by: Chalmer Lowe --- .../google/cloud/bigquery/_job_helpers.py | 9 +- .../google/cloud/bigquery/job/query.py | 172 +++++---- .../google/cloud/bigquery/retry.py | 52 ++- .../tests/unit/job/test_query.py | 334 ++++++++++-------- .../tests/unit/test__job_helpers.py | 38 +- .../tests/unit/test_job_retry.py | 172 ++++++++- 6 files changed, 547 insertions(+), 230 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/_job_helpers.py b/packages/google-cloud-bigquery/google/cloud/bigquery/_job_helpers.py index 602a49eba2f4..2904393944c1 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/_job_helpers.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/_job_helpers.py @@ -258,15 +258,16 @@ def _to_query_job( errors = query_response["errors"] query_job._properties["status"]["errors"] = errors - # Transform job state so that QueryJob doesn't try to restart the query. + # Avoid an extra call to `getQueryResults` if the query has finished. job_complete = query_response.get("jobComplete") if job_complete: - query_job._properties["status"]["state"] = "DONE" query_job._query_results = google.cloud.bigquery.query._QueryResults( query_response ) - else: - query_job._properties["status"]["state"] = "PENDING" + + # We want job.result() to refresh the job state, so the conversion is + # always "PENDING", even if the job is finished. + query_job._properties["status"]["state"] = "PENDING" return query_job diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/job/query.py b/packages/google-cloud-bigquery/google/cloud/bigquery/job/query.py index e92e9cb9eee5..7436b6013269 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/job/query.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/job/query.py @@ -17,11 +17,11 @@ import concurrent.futures import copy import re +import time import typing from typing import Any, Dict, Iterable, List, Optional, Union from google.api_core import exceptions -from google.api_core.future import polling as polling_future from google.api_core import retry as retries import requests @@ -1383,7 +1383,7 @@ def _begin(self, client=None, retry=DEFAULT_RETRY, timeout=None): def _reload_query_results( self, retry: "retries.Retry" = DEFAULT_RETRY, timeout: Optional[float] = None ): - """Refresh the cached query results. + """Refresh the cached query results unless already cached and complete. Args: retry (Optional[google.api_core.retry.Retry]): @@ -1392,6 +1392,8 @@ def _reload_query_results( The number of seconds to wait for the underlying HTTP transport before using ``retry``. """ + # Optimization: avoid a call to jobs.getQueryResults if it's already + # been fetched, e.g. from jobs.query first page of results. if self._query_results and self._query_results.complete: return @@ -1430,40 +1432,6 @@ def _reload_query_results( timeout=transport_timeout, ) - def _done_or_raise(self, retry=DEFAULT_RETRY, timeout=None): - """Check if the query has finished running and raise if it's not. - - If the query has finished, also reload the job itself. - """ - # If an explicit timeout is not given, fall back to the transport timeout - # stored in _blocking_poll() in the process of polling for job completion. - transport_timeout = timeout if timeout is not None else self._transport_timeout - - try: - self._reload_query_results(retry=retry, timeout=transport_timeout) - except exceptions.GoogleAPIError as exc: - # Reloading also updates error details on self, thus no need for an - # explicit self.set_exception() call if reloading succeeds. - try: - self.reload(retry=retry, timeout=transport_timeout) - except exceptions.GoogleAPIError: - # Use the query results reload exception, as it generally contains - # much more useful error information. - self.set_exception(exc) - finally: - return - - # Only reload the job once we know the query is complete. - # This will ensure that fields such as the destination table are - # correctly populated. - if not self._query_results.complete: - raise polling_future._OperationNotComplete() - else: - try: - self.reload(retry=retry, timeout=transport_timeout) - except exceptions.GoogleAPIError as exc: - self.set_exception(exc) - def result( # type: ignore # (incompatible with supertype) self, page_size: Optional[int] = None, @@ -1528,6 +1496,10 @@ def result( # type: ignore # (incompatible with supertype) If Non-``None`` and non-default ``job_retry`` is provided and the job is not retryable. """ + # Note: Since waiting for a query job to finish is more complex than + # refreshing the job state in a loop, we avoid calling the superclass + # in this method. + if self.dry_run: return _EmptyRowIterator( project=self.project, @@ -1548,46 +1520,124 @@ def result( # type: ignore # (incompatible with supertype) " provided to the query that created this job." ) - first = True + restart_query_job = False + + def is_job_done(): + nonlocal restart_query_job - def do_get_result(): - nonlocal first + if restart_query_job: + restart_query_job = False - if first: - first = False - else: + # The original job has failed. Create a new one. + # # Note that we won't get here if retry_do_query is # None, because we won't use a retry. - - # The orinal job is failed. Create a new one. job = retry_do_query() - # If it's already failed, we might as well stop: - if job.done() and job.exception() is not None: - raise job.exception() - # Become the new job: self.__dict__.clear() self.__dict__.update(job.__dict__) - # This shouldn't be necessary, because once we have a good - # job, it should stay good,and we shouldn't have to retry. - # But let's be paranoid. :) + # It's possible the job fails again and we'll have to + # retry that too. self._retry_do_query = retry_do_query self._job_retry = job_retry - super(QueryJob, self).result(retry=retry, timeout=timeout) - - # Since the job could already be "done" (e.g. got a finished job - # via client.get_job), the superclass call to done() might not - # set the self._query_results cache. - if self._query_results is None or not self._query_results.complete: - self._reload_query_results(retry=retry, timeout=timeout) + # Refresh the job status with jobs.get because some of the + # exceptions thrown by jobs.getQueryResults like timeout and + # rateLimitExceeded errors are ambiguous. We want to know if + # the query job failed and not just the call to + # jobs.getQueryResults. + if self.done(retry=retry, timeout=timeout): + # If it's already failed, we might as well stop. + job_failed_exception = self.exception() + if job_failed_exception is not None: + # Only try to restart the query job if the job failed for + # a retriable reason. For example, don't restart the query + # if the call to reload the job metadata within self.done() + # timed out. + # + # The `restart_query_job` must only be called after a + # successful call to the `jobs.get` REST API and we + # determine that the job has failed. + # + # The `jobs.get` REST API + # (https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs/get) + # is called via `self.done()` which calls + # `self.reload()`. + # + # To determine if the job failed, the `self.exception()` + # is set from `self.reload()` via + # `self._set_properties()`, which translates the + # `Job.status.errorResult` field + # (https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobStatus.FIELDS.error_result) + # into an exception that can be processed by the + # `job_retry` predicate. + restart_query_job = True + raise job_failed_exception + else: + # Make sure that the _query_results are cached so we + # can return a complete RowIterator. + # + # Note: As an optimization, _reload_query_results + # doesn't make any API calls if the query results are + # already cached and have jobComplete=True in the + # response from the REST API. This ensures we aren't + # making any extra API calls if the previous loop + # iteration fetched the finished job. + self._reload_query_results(retry=retry, timeout=timeout) + return True + + # Call jobs.getQueryResults with max results set to 0 just to + # wait for the query to finish. Unlike most methods, + # jobs.getQueryResults hangs as long as it can to ensure we + # know when the query has finished as soon as possible. + self._reload_query_results(retry=retry, timeout=timeout) + + # Even if the query is finished now according to + # jobs.getQueryResults, we'll want to reload the job status if + # it's not already DONE. + return False if retry_do_query is not None and job_retry is not None: - do_get_result = job_retry(do_get_result) - - do_get_result() + is_job_done = job_retry(is_job_done) + + # timeout can be a number of seconds, `None`, or a + # `google.api_core.future.polling.PollingFuture._DEFAULT_VALUE` + # sentinel object indicating a default timeout if we choose to add + # one some day. This value can come from our PollingFuture + # superclass and was introduced in + # https://github.com/googleapis/python-api-core/pull/462. + if isinstance(timeout, (float, int)): + remaining_timeout = timeout + else: + # Note: we may need to handle _DEFAULT_VALUE as a separate + # case someday, but even then the best we can do for queries + # is 72+ hours for hyperparameter tuning jobs: + # https://cloud.google.com/bigquery/quotas#query_jobs + # + # The timeout for a multi-statement query is 24+ hours. See: + # https://cloud.google.com/bigquery/quotas#multi_statement_query_limits + remaining_timeout = None + + if remaining_timeout is None: + # Since is_job_done() calls jobs.getQueryResults, which is a + # long-running API, don't delay the next request at all. + while not is_job_done(): + pass + else: + # Use a monotonic clock since we don't actually care about + # daylight savings or similar, just the elapsed time. + previous_time = time.monotonic() + + while not is_job_done(): + current_time = time.monotonic() + elapsed_time = current_time - previous_time + remaining_timeout = remaining_timeout - elapsed_time + previous_time = current_time + + if remaining_timeout < 0: + raise concurrent.futures.TimeoutError() except exceptions.GoogleAPICallError as exc: exc.message = _EXCEPTION_FOOTER_TEMPLATE.format( diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/retry.py b/packages/google-cloud-bigquery/google/cloud/bigquery/retry.py index 01b12797270e..c9898287f04e 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/retry.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/retry.py @@ -36,10 +36,25 @@ _DEFAULT_RETRY_DEADLINE = 10.0 * 60.0 # 10 minutes -# Allow for a few retries after the API request times out. This relevant for -# rateLimitExceeded errors, which can be raised either by the Google load -# balancer or the BigQuery job server. -_DEFAULT_JOB_DEADLINE = 3.0 * _DEFAULT_RETRY_DEADLINE +# Ambiguous errors (e.g. internalError, backendError, rateLimitExceeded) retry +# until the full `_DEFAULT_RETRY_DEADLINE`. This is because the +# `jobs.getQueryResults` REST API translates a job failure into an HTTP error. +# +# TODO(https://github.com/googleapis/python-bigquery/issues/1903): Investigate +# if we can fail early for ambiguous errors in `QueryJob.result()`'s call to +# the `jobs.getQueryResult` API. +# +# We need `_DEFAULT_JOB_DEADLINE` to be some multiple of +# `_DEFAULT_RETRY_DEADLINE` to allow for a few retries after the retry +# timeout is reached. +# +# Note: This multiple should actually be a multiple of +# (2 * _DEFAULT_RETRY_DEADLINE). After an ambiguous exception, the first +# call from `job_retry()` refreshes the job state without actually restarting +# the query. The second `job_retry()` actually restarts the query. For a more +# detailed explanation, see the comments where we set `restart_query_job = True` +# in `QueryJob.result()`'s inner `is_job_done()` function. +_DEFAULT_JOB_DEADLINE = 2.0 * (2.0 * _DEFAULT_RETRY_DEADLINE) def _should_retry(exc): @@ -66,6 +81,11 @@ def _should_retry(exc): pass ``retry=bigquery.DEFAULT_RETRY.with_deadline(30)``. """ +# Note: Take care when updating DEFAULT_TIMEOUT to anything but None. We +# briefly had a default timeout, but even setting it at more than twice the +# theoretical server-side default timeout of 2 minutes was not enough for +# complex queries. See: +# https://github.com/googleapis/python-bigquery/issues/970#issuecomment-921934647 DEFAULT_TIMEOUT = None """The default API timeout. @@ -73,10 +93,32 @@ def _should_retry(exc): deadline on the retry object. """ -job_retry_reasons = "rateLimitExceeded", "backendError", "jobRateLimitExceeded" +job_retry_reasons = ( + "rateLimitExceeded", + "backendError", + "internalError", + "jobRateLimitExceeded", +) def _job_should_retry(exc): + # Sometimes we have ambiguous errors, such as 'backendError' which could + # be due to an API problem or a job problem. For these, make sure we retry + # our is_job_done() function. + # + # Note: This won't restart the job unless we know for sure it's because of + # the job status and set restart_query_job = True in that loop. This means + # that we might end up calling this predicate twice for the same job + # but from different paths: (1) from jobs.getQueryResults RetryError and + # (2) from translating the job error from the body of a jobs.get response. + # + # Note: If we start retrying job types other than queries where we don't + # call the problematic getQueryResults API to check the status, we need + # to provide a different predicate, as there shouldn't be ambiguous + # errors in those cases. + if isinstance(exc, exceptions.RetryError): + exc = exc.cause + if not hasattr(exc, "errors") or len(exc.errors) == 0: return False diff --git a/packages/google-cloud-bigquery/tests/unit/job/test_query.py b/packages/google-cloud-bigquery/tests/unit/job/test_query.py index 37ac7ba5ebb3..0fee053e346a 100644 --- a/packages/google-cloud-bigquery/tests/unit/job/test_query.py +++ b/packages/google-cloud-bigquery/tests/unit/job/test_query.py @@ -13,6 +13,7 @@ # limitations under the License. import concurrent +import concurrent.futures import copy import http import textwrap @@ -371,100 +372,6 @@ def test_cancelled(self): self.assertTrue(job.cancelled()) - def test__done_or_raise_w_timeout(self): - client = _make_client(project=self.PROJECT) - resource = self._make_resource(ended=False) - job = self._get_target_class().from_api_repr(resource, client) - - with mock.patch.object( - client, "_get_query_results" - ) as fake_get_results, mock.patch.object(job, "reload") as fake_reload: - job._done_or_raise(timeout=42) - - fake_get_results.assert_called_once() - call_args = fake_get_results.call_args[0][1] - self.assertEqual(call_args.timeout, 600.0) - - call_args = fake_reload.call_args[1] - self.assertEqual(call_args["timeout"], 42) - - def test__done_or_raise_w_timeout_and_longer_internal_api_timeout(self): - client = _make_client(project=self.PROJECT) - resource = self._make_resource(ended=False) - job = self._get_target_class().from_api_repr(resource, client) - job._done_timeout = 8.8 - - with mock.patch.object( - client, "_get_query_results" - ) as fake_get_results, mock.patch.object(job, "reload") as fake_reload: - job._done_or_raise(timeout=5.5) - - # The expected timeout used is simply the given timeout, as the latter - # is shorter than the job's internal done timeout. - expected_timeout = 5.5 - - fake_get_results.assert_called_once() - call_args = fake_get_results.call_args[0][1] - self.assertAlmostEqual(call_args.timeout, 600.0) - - call_args = fake_reload.call_args - self.assertAlmostEqual(call_args[1].get("timeout"), expected_timeout) - - def test__done_or_raise_w_query_results_error_reload_ok(self): - client = _make_client(project=self.PROJECT) - bad_request_error = exceptions.BadRequest("Error in query") - client._get_query_results = mock.Mock(side_effect=bad_request_error) - - resource = self._make_resource(ended=False) - job = self._get_target_class().from_api_repr(resource, client) - job._exception = None - - def fake_reload(self, *args, **kwargs): - self._properties["status"]["state"] = "DONE" - self.set_exception(copy.copy(bad_request_error)) - - fake_reload_method = types.MethodType(fake_reload, job) - - with mock.patch.object(job, "reload", new=fake_reload_method): - job._done_or_raise() - - assert isinstance(job._exception, exceptions.BadRequest) - - def test__done_or_raise_w_query_results_error_reload_error(self): - client = _make_client(project=self.PROJECT) - bad_request_error = exceptions.BadRequest("Error in query") - client._get_query_results = mock.Mock(side_effect=bad_request_error) - - resource = self._make_resource(ended=False) - job = self._get_target_class().from_api_repr(resource, client) - reload_error = exceptions.DataLoss("Oops, sorry!") - job.reload = mock.Mock(side_effect=reload_error) - job._exception = None - - job._done_or_raise() - - assert job._exception is bad_request_error - - def test__done_or_raise_w_job_query_results_ok_reload_error(self): - client = _make_client(project=self.PROJECT) - query_results = google.cloud.bigquery.query._QueryResults( - properties={ - "jobComplete": True, - "jobReference": {"projectId": self.PROJECT, "jobId": "12345"}, - } - ) - client._get_query_results = mock.Mock(return_value=query_results) - - resource = self._make_resource(ended=False) - job = self._get_target_class().from_api_repr(resource, client) - retry_error = exceptions.RetryError("Too many retries", cause=TimeoutError) - job.reload = mock.Mock(side_effect=retry_error) - job._exception = None - - job._done_or_raise() - - assert job._exception is retry_error - def test_query_plan(self): from google.cloud._helpers import _RFC3339_MICROS from google.cloud.bigquery.job import QueryPlanEntry @@ -933,7 +840,12 @@ def test_search_stats(self): assert isinstance(job.search_stats, SearchStats) assert job.search_stats.mode == "INDEX_USAGE_MODE_UNSPECIFIED" - def test_result(self): + def test_result_reloads_job_state_until_done(self): + """Verify that result() doesn't return until state == 'DONE'. + + This test verifies correctness for a possible sequence of API responses + that might cause internal customer issue b/332850329. + """ from google.cloud.bigquery.table import RowIterator query_resource = { @@ -970,7 +882,54 @@ def test_result(self): "rows": [{"f": [{"v": "abc"}]}], } conn = make_connection( - query_resource, query_resource_done, job_resource_done, query_page_resource + # QueryJob.result() makes a pair of jobs.get & jobs.getQueryResults + # REST API calls each iteration to determine if the job has finished + # or not. + # + # jobs.get (https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs/get) + # is necessary to make sure the job has really finished via + # `Job.status.state == "DONE"` and to get necessary properties for + # `RowIterator` like the destination table. + # + # jobs.getQueryResults + # (https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs/getQueryResults) + # with maxResults == 0 is technically optional, + # but it hangs up to 10 seconds until the job has finished. This + # makes sure we can know when the query has finished as close as + # possible to when the query finishes. It also gets properties + # necessary for `RowIterator` that isn't available on the job + # resource such as the schema + # (https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs/getQueryResults#body.GetQueryResultsResponse.FIELDS.schema) + # of the results. + job_resource, + query_resource, + # The query wasn't finished in the last call to jobs.get, so try + # again with a call to both jobs.get & jobs.getQueryResults. + job_resource, + query_resource_done, + # Even though, the previous jobs.getQueryResults response says + # the job is complete, we haven't downloaded the full job status + # yet. + # + # Important: per internal issue 332850329, this reponse has + # `Job.status.state = "RUNNING"`. This ensures we are protected + # against possible eventual consistency issues where + # `jobs.getQueryResults` says jobComplete == True, but our next + # call to `jobs.get` still doesn't have + # `Job.status.state == "DONE"`. + job_resource, + # Try again until `Job.status.state == "DONE"`. + # + # Note: the call to `jobs.getQueryResults` is missing here as + # an optimization. We already received a "completed" response, so + # we won't learn anything new by calling that API again. + job_resource, + job_resource_done, + # When we iterate over the `RowIterator` we return from + # `QueryJob.result()`, we make additional calls to + # `jobs.getQueryResults` but this time allowing the actual rows + # to be returned as well. + query_page_resource, ) client = _make_client(self.PROJECT, connection=conn) job = self._get_target_class().from_api_repr(job_resource, client) @@ -1013,8 +972,32 @@ def test_result(self): }, timeout=None, ) + # Ensure that we actually made the expected API calls in the sequence + # we thought above at the make_connection() call above. + # + # Note: The responses from jobs.get and jobs.getQueryResults can be + # deceptively similar, so this check ensures we actually made the + # requests we expected. conn.api_request.assert_has_calls( - [query_results_call, query_results_call, reload_call, query_page_call] + [ + # jobs.get & jobs.getQueryResults because the job just started. + reload_call, + query_results_call, + # jobs.get & jobs.getQueryResults because the query is still + # running. + reload_call, + query_results_call, + # We got a jobComplete response from the most recent call to + # jobs.getQueryResults, so now call jobs.get until we get + # `Jobs.status.state == "DONE"`. This tests a fix for internal + # issue b/332850329. + reload_call, + reload_call, + reload_call, + # jobs.getQueryResults without `maxResults` set to download + # the rows as we iterate over the `RowIterator`. + query_page_call, + ] ) def test_result_dry_run(self): @@ -1069,7 +1052,7 @@ def test_result_with_done_job_calls_get_query_results(self): method="GET", path=query_results_path, query_params={"maxResults": 0, "location": "EU"}, - timeout=None, + timeout=google.cloud.bigquery.client._MIN_GET_QUERY_RESULTS_TIMEOUT, ) query_results_page_call = mock.call( method="GET", @@ -1107,7 +1090,10 @@ def test_result_with_done_jobs_query_response_doesnt_call_get_query_results(self request_config=None, query_response=query_resource_done, ) - assert job.state == "DONE" + + # We want job.result() to refresh the job state, so the conversion is + # always "PENDING", even if the job is finished. + assert job.state == "PENDING" result = job.result() @@ -1156,7 +1142,9 @@ def test_result_with_done_jobs_query_response_and_page_size_invalidates_cache(se request_config=None, query_response=query_resource_done, ) - assert job.state == "DONE" + # We want job.result() to refresh the job state, so the conversion is + # always "PENDING", even if the job is finished. + assert job.state == "PENDING" # Act result = job.result(page_size=3) @@ -1230,7 +1218,7 @@ def test_result_with_max_results(self): query_page_request[1]["query_params"]["maxResults"], max_results ) - def test_result_w_retry(self): + def test_result_w_custom_retry(self): from google.cloud.bigquery.table import RowIterator query_resource = { @@ -1254,12 +1242,24 @@ def test_result_w_retry(self): } connection = make_connection( + # Also, for each API request, raise an exception that we know can + # be retried. Because of this, for each iteration we do: + # jobs.get (x2) & jobs.getQueryResults (x2) + exceptions.NotFound("not normally retriable"), + job_resource, exceptions.NotFound("not normally retriable"), query_resource, + # Query still not done, repeat both. exceptions.NotFound("not normally retriable"), - query_resource_done, + job_resource, exceptions.NotFound("not normally retriable"), + query_resource, + exceptions.NotFound("not normally retriable"), + # Query still not done, repeat both. job_resource_done, + exceptions.NotFound("not normally retriable"), + query_resource_done, + # Query finished! ) client = _make_client(self.PROJECT, connection=connection) job = self._get_target_class().from_api_repr(job_resource, client) @@ -1279,7 +1279,10 @@ def test_result_w_retry(self): method="GET", path=f"/projects/{self.PROJECT}/queries/{self.JOB_ID}", query_params={"maxResults": 0, "location": "asia-northeast1"}, - timeout=None, + # TODO(tswast): Why do we end up setting timeout to + # google.cloud.bigquery.client._MIN_GET_QUERY_RESULTS_TIMEOUT in + # some cases but not others? + timeout=mock.ANY, ) reload_call = mock.call( method="GET", @@ -1289,7 +1292,26 @@ def test_result_w_retry(self): ) connection.api_request.assert_has_calls( - [query_results_call, query_results_call, reload_call] + [ + # See make_connection() call above for explanation of the + # expected API calls. + # + # Query not done. + reload_call, + reload_call, + query_results_call, + query_results_call, + # Query still not done. + reload_call, + reload_call, + query_results_call, + query_results_call, + # Query done! + reload_call, + reload_call, + query_results_call, + query_results_call, + ] ) def test_result_w_empty_schema(self): @@ -1316,41 +1338,60 @@ def test_result_w_empty_schema(self): self.assertEqual(result.location, "asia-northeast1") self.assertEqual(result.query_id, "xyz-abc") - def test_result_invokes_begins(self): + def test_result_w_timeout_doesnt_raise(self): + import google.cloud.bigquery.client + begun_resource = self._make_resource() - incomplete_resource = { - "jobComplete": False, + query_resource = { + "jobComplete": True, "jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID}, "schema": {"fields": [{"name": "col1", "type": "STRING"}]}, } - query_resource = copy.deepcopy(incomplete_resource) - query_resource["jobComplete"] = True done_resource = copy.deepcopy(begun_resource) done_resource["status"] = {"state": "DONE"} - connection = make_connection( - begun_resource, - incomplete_resource, - query_resource, - done_resource, - query_resource, - ) + connection = make_connection(begun_resource, query_resource, done_resource) client = _make_client(project=self.PROJECT, connection=connection) job = self._make_one(self.JOB_ID, self.QUERY, client) + job._properties["jobReference"]["location"] = "US" - job.result() + with freezegun.freeze_time("1970-01-01 00:00:00", tick=False): + job.result( + # Test that fractional seconds are supported, but use a timeout + # that is representable as a floating point without rounding + # errors since it can be represented exactly in base 2. In this + # case 1.125 is 9 / 8, which is a fraction with a power of 2 in + # the denominator. + timeout=1.125, + ) - self.assertEqual(len(connection.api_request.call_args_list), 4) - begin_request = connection.api_request.call_args_list[0] - query_request = connection.api_request.call_args_list[2] - reload_request = connection.api_request.call_args_list[3] - self.assertEqual(begin_request[1]["method"], "POST") - self.assertEqual(query_request[1]["method"], "GET") - self.assertEqual(reload_request[1]["method"], "GET") + reload_call = mock.call( + method="GET", + path=f"/projects/{self.PROJECT}/jobs/{self.JOB_ID}", + query_params={"location": "US"}, + timeout=1.125, + ) + get_query_results_call = mock.call( + method="GET", + path=f"/projects/{self.PROJECT}/queries/{self.JOB_ID}", + query_params={ + "maxResults": 0, + "location": "US", + }, + timeout=google.cloud.bigquery.client._MIN_GET_QUERY_RESULTS_TIMEOUT, + ) + connection.api_request.assert_has_calls( + [ + reload_call, + get_query_results_call, + reload_call, + ] + ) - def test_result_w_timeout(self): + def test_result_w_timeout_raises_concurrent_futures_timeout(self): import google.cloud.bigquery.client begun_resource = self._make_resource() + begun_resource["jobReference"]["location"] = "US" query_resource = { "jobComplete": True, "jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID}, @@ -1361,26 +1402,35 @@ def test_result_w_timeout(self): connection = make_connection(begun_resource, query_resource, done_resource) client = _make_client(project=self.PROJECT, connection=connection) job = self._make_one(self.JOB_ID, self.QUERY, client) + job._properties["jobReference"]["location"] = "US" - with freezegun.freeze_time("1970-01-01 00:00:00", tick=False): - job.result(timeout=1.0) - - self.assertEqual(len(connection.api_request.call_args_list), 3) - begin_request = connection.api_request.call_args_list[0] - query_request = connection.api_request.call_args_list[1] - reload_request = connection.api_request.call_args_list[2] - self.assertEqual(begin_request[1]["method"], "POST") - self.assertEqual(query_request[1]["method"], "GET") - self.assertEqual( - query_request[1]["path"], - "/projects/{}/queries/{}".format(self.PROJECT, self.JOB_ID), + with freezegun.freeze_time( + "1970-01-01 00:00:00", auto_tick_seconds=1.0 + ), self.assertRaises(concurrent.futures.TimeoutError): + job.result(timeout=1.125) + + reload_call = mock.call( + method="GET", + path=f"/projects/{self.PROJECT}/jobs/{self.JOB_ID}", + query_params={"location": "US"}, + timeout=1.125, ) - self.assertEqual(query_request[1]["timeout"], 120) - self.assertEqual( - query_request[1]["timeout"], - google.cloud.bigquery.client._MIN_GET_QUERY_RESULTS_TIMEOUT, + get_query_results_call = mock.call( + method="GET", + path=f"/projects/{self.PROJECT}/queries/{self.JOB_ID}", + query_params={ + "maxResults": 0, + "location": "US", + }, + timeout=google.cloud.bigquery.client._MIN_GET_QUERY_RESULTS_TIMEOUT, + ) + connection.api_request.assert_has_calls( + [ + reload_call, + get_query_results_call, + # Timeout before we can reload with the final job state. + ] ) - self.assertEqual(reload_request[1]["method"], "GET") def test_result_w_page_size(self): # Arrange diff --git a/packages/google-cloud-bigquery/tests/unit/test__job_helpers.py b/packages/google-cloud-bigquery/tests/unit/test__job_helpers.py index 671b829f7c99..9f661dca7492 100644 --- a/packages/google-cloud-bigquery/tests/unit/test__job_helpers.py +++ b/packages/google-cloud-bigquery/tests/unit/test__job_helpers.py @@ -246,7 +246,9 @@ def test__to_query_job_dry_run(): @pytest.mark.parametrize( ("completed", "expected_state"), ( - (True, "DONE"), + # Always pending so that we refresh the job state to get the + # destination table or job stats in case it's needed. + (True, "PENDING"), (False, "PENDING"), ), ) @@ -843,6 +845,7 @@ def test_query_and_wait_caches_completed_query_results_more_pages(): "jobId": "response-job-id", "location": "response-location", }, + "status": {"state": "DONE"}, }, { "rows": [ @@ -896,18 +899,10 @@ def test_query_and_wait_caches_completed_query_results_more_pages(): timeout=None, ) - # TODO(swast): Fetching job metadata isn't necessary in this case. - jobs_get_path = "/projects/response-project/jobs/response-job-id" - client._call_api.assert_any_call( - None, # retry - span_name="BigQuery.job.reload", - span_attributes={"path": jobs_get_path}, - job_ref=mock.ANY, - method="GET", - path=jobs_get_path, - query_params={"location": "response-location"}, - timeout=None, - ) + # Note: There is no get call to + # "/projects/response-project/jobs/response-job-id", because fetching job + # metadata isn't necessary in this case. The job already completed in + # jobs.query and we don't need the full job metadata in query_and_wait. # Fetch the remaining two pages. jobs_get_query_results_path = "/projects/response-project/queries/response-job-id" @@ -944,6 +939,7 @@ def test_query_and_wait_incomplete_query(): Client._list_rows_from_query_results, client ) client._call_api.side_effect = ( + # jobs.query { "jobReference": { "projectId": "response-project", @@ -952,6 +948,16 @@ def test_query_and_wait_incomplete_query(): }, "jobComplete": False, }, + # jobs.get + { + "jobReference": { + "projectId": "response-project", + "jobId": "response-job-id", + "location": "response-location", + }, + "status": {"state": "RUNNING"}, + }, + # jobs.getQueryResults with max_results=0 { "jobReference": { "projectId": "response-project", @@ -968,13 +974,18 @@ def test_query_and_wait_incomplete_query(): ], }, }, + # jobs.get { "jobReference": { "projectId": "response-project", "jobId": "response-job-id", "location": "response-location", }, + "status": {"state": "DONE"}, }, + # jobs.getQueryResults + # Note: No more jobs.getQueryResults with max_results=0 because the + # previous call to jobs.getQueryResults returned with jobComplete=True. { "rows": [ {"f": [{"v": "Whillma Phlyntstone"}, {"v": "27"}]}, @@ -987,6 +998,7 @@ def test_query_and_wait_incomplete_query(): "totalRows": 2, "pageToken": "page-2", }, + # jobs.getQueryResults { "rows": [ {"f": [{"v": "Pearl Slaghoople"}, {"v": "53"}]}, diff --git a/packages/google-cloud-bigquery/tests/unit/test_job_retry.py b/packages/google-cloud-bigquery/tests/unit/test_job_retry.py index d7049c5ca2e6..43ddae1dc7c3 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_job_retry.py +++ b/packages/google-cloud-bigquery/tests/unit/test_job_retry.py @@ -24,7 +24,7 @@ from google.cloud.bigquery.client import Client from google.cloud.bigquery import _job_helpers -from google.cloud.bigquery.retry import DEFAULT_JOB_RETRY +import google.cloud.bigquery.retry from .helpers import make_connection @@ -126,6 +126,168 @@ def api_request(method, path, query_params=None, data=None, **kw): assert job.job_id == orig_job_id +def test_query_retry_with_default_retry_and_ambiguous_errors_only_retries_with_failed_job( + client, monkeypatch +): + """ + Some errors like 'rateLimitExceeded' can be ambiguous. Make sure we only + retry the job when we know for sure that the job has failed for a retriable + reason. We can only be sure after a "successful" call to jobs.get to fetch + the failed job status. + """ + job_counter = 0 + + def make_job_id(*args, **kwargs): + nonlocal job_counter + job_counter += 1 + return f"{job_counter}" + + monkeypatch.setattr(_job_helpers, "make_job_id", make_job_id) + + project = client.project + job_reference_1 = {"projectId": project, "jobId": "1", "location": "test-loc"} + job_reference_2 = {"projectId": project, "jobId": "2", "location": "test-loc"} + NUM_API_RETRIES = 2 + + # This error is modeled after a real customer exception in + # https://github.com/googleapis/python-bigquery/issues/707. + internal_error = google.api_core.exceptions.InternalServerError( + "Job failed just because...", + errors=[ + {"reason": "internalError"}, + ], + ) + responses = [ + # jobs.insert + {"jobReference": job_reference_1, "status": {"state": "PENDING"}}, + # jobs.get + {"jobReference": job_reference_1, "status": {"state": "RUNNING"}}, + # jobs.getQueryResults x2 + # + # Note: internalError is ambiguous in jobs.getQueryResults. The + # problem could be at the Google Frontend level or it could be because + # the job has failed due to some transient issues and the BigQuery + # REST API is translating the job failed status into failure HTTP + # codes. + # + # TODO(GH#1903): We shouldn't retry nearly this many times when we get + # ambiguous errors from jobs.getQueryResults. + # See: https://github.com/googleapis/python-bigquery/issues/1903 + internal_error, + internal_error, + # jobs.get -- the job has failed + { + "jobReference": job_reference_1, + "status": {"state": "DONE", "errorResult": {"reason": "internalError"}}, + }, + # jobs.insert + {"jobReference": job_reference_2, "status": {"state": "PENDING"}}, + # jobs.get + {"jobReference": job_reference_2, "status": {"state": "RUNNING"}}, + # jobs.getQueryResults + {"jobReference": job_reference_2, "jobComplete": True}, + # jobs.get + {"jobReference": job_reference_2, "status": {"state": "DONE"}}, + ] + + conn = client._connection = make_connection() + conn.api_request.side_effect = responses + + with freezegun.freeze_time( + # Note: because of exponential backoff and a bit of jitter, + # NUM_API_RETRIES will get less accurate the greater the value. + # We add 1 because we know there will be at least some additional + # calls to fetch the time / sleep before the retry deadline is hit. + auto_tick_seconds=( + google.cloud.bigquery.retry._DEFAULT_RETRY_DEADLINE / NUM_API_RETRIES + ) + + 1, + ): + job = client.query("select 1") + job.result() + + conn.api_request.assert_has_calls( + [ + # jobs.insert + mock.call( + method="POST", + path="/projects/PROJECT/jobs", + data={ + "jobReference": {"jobId": "1", "projectId": "PROJECT"}, + "configuration": { + "query": {"useLegacySql": False, "query": "select 1"} + }, + }, + timeout=None, + ), + # jobs.get + mock.call( + method="GET", + path="/projects/PROJECT/jobs/1", + query_params={"location": "test-loc"}, + timeout=None, + ), + # jobs.getQueryResults x2 + mock.call( + method="GET", + path="/projects/PROJECT/queries/1", + query_params={"maxResults": 0, "location": "test-loc"}, + timeout=None, + ), + mock.call( + method="GET", + path="/projects/PROJECT/queries/1", + query_params={"maxResults": 0, "location": "test-loc"}, + timeout=None, + ), + # jobs.get -- verify that the job has failed + mock.call( + method="GET", + path="/projects/PROJECT/jobs/1", + query_params={"location": "test-loc"}, + timeout=None, + ), + # jobs.insert + mock.call( + method="POST", + path="/projects/PROJECT/jobs", + data={ + "jobReference": { + # Make sure that we generated a new job ID. + "jobId": "2", + "projectId": "PROJECT", + }, + "configuration": { + "query": {"useLegacySql": False, "query": "select 1"} + }, + }, + timeout=None, + ), + # jobs.get + mock.call( + method="GET", + path="/projects/PROJECT/jobs/2", + query_params={"location": "test-loc"}, + timeout=None, + ), + # jobs.getQueryResults + mock.call( + method="GET", + path="/projects/PROJECT/queries/2", + query_params={"maxResults": 0, "location": "test-loc"}, + timeout=None, + ), + # jobs.get + mock.call( + method="GET", + path="/projects/PROJECT/jobs/2", + query_params={"location": "test-loc"}, + timeout=None, + ), + ] + ) + + # With job_retry_on_query, we're testing 4 scenarios: # - Pass None retry to `query`. # - Pass None retry to `result`. @@ -187,8 +349,8 @@ def api_request(method, path, query_params=None, data=None, **kw): with pytest.raises(google.api_core.exceptions.RetryError): job.result() - # We never got a successful job, so the job id never changed: - assert job.job_id == orig_job_id + # We retried the job at least once, so we should have generated a new job ID. + assert job.job_id != orig_job_id # We failed because we couldn't succeed after 120 seconds. # But we can try again: @@ -301,8 +463,8 @@ def test_query_and_wait_retries_job_for_DDL_queries(): job_config=None, page_size=None, max_results=None, - retry=DEFAULT_JOB_RETRY, - job_retry=DEFAULT_JOB_RETRY, + retry=google.cloud.bigquery.retry.DEFAULT_RETRY, + job_retry=google.cloud.bigquery.retry.DEFAULT_JOB_RETRY, ) assert len(list(rows)) == 4 From da365f6f5d226f77b889816d3fef58ce2e63e148 Mon Sep 17 00:00:00 2001 From: "release-please[bot]" <55107282+release-please[bot]@users.noreply.github.com> Date: Thu, 18 Apr 2024 12:24:03 -0500 Subject: [PATCH 1778/2016] chore(main): release 3.21.0 (#1883) Co-authored-by: release-please[bot] <55107282+release-please[bot]@users.noreply.github.com> --- packages/google-cloud-bigquery/CHANGELOG.md | 22 +++++++++++++++++++ .../google/cloud/bigquery/version.py | 2 +- 2 files changed, 23 insertions(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/CHANGELOG.md b/packages/google-cloud-bigquery/CHANGELOG.md index 95af2d213693..0fc77f7c5c79 100644 --- a/packages/google-cloud-bigquery/CHANGELOG.md +++ b/packages/google-cloud-bigquery/CHANGELOG.md @@ -5,6 +5,28 @@ [1]: https://pypi.org/project/google-cloud-bigquery/#history +## [3.21.0](https://github.com/googleapis/python-bigquery/compare/v3.20.1...v3.21.0) (2024-04-18) + + +### Features + +* Add compression option ZSTD. ([#1890](https://github.com/googleapis/python-bigquery/issues/1890)) ([5ed9cce](https://github.com/googleapis/python-bigquery/commit/5ed9ccee204b7cf8e96cb0e050f6830c05f3b4fd)) +* Adds billing to opentel ([#1889](https://github.com/googleapis/python-bigquery/issues/1889)) ([38697fb](https://github.com/googleapis/python-bigquery/commit/38697fb942516fc2f6f5e21e19a11811fbaeb1f4)) +* Support RANGE in queries Part 1: JSON ([#1884](https://github.com/googleapis/python-bigquery/issues/1884)) ([3634405](https://github.com/googleapis/python-bigquery/commit/3634405fa1b40ae5f69b06d7c7f8de4e3d246d92)) + + +### Bug Fixes + +* Add types to DatasetReference constructor ([#1601](https://github.com/googleapis/python-bigquery/issues/1601)) ([bf8861c](https://github.com/googleapis/python-bigquery/commit/bf8861c3473a1af978db7a06463ddc0bad86f326)) +* Creates linting-typing.cfg in presubmit ([#1881](https://github.com/googleapis/python-bigquery/issues/1881)) ([c852c15](https://github.com/googleapis/python-bigquery/commit/c852c153c55025ba1187d61e313ead2308616c55)) +* Remove duplicate key time_partitioning from Table._PROPERTY_TO_A… ([#1898](https://github.com/googleapis/python-bigquery/issues/1898)) ([82ae908](https://github.com/googleapis/python-bigquery/commit/82ae908fbf3b2361343fff1859d3533383dc50ec)) +* Retry query jobs that fail even with ambiguous `jobs.getQueryResults` REST errors ([#1903](https://github.com/googleapis/python-bigquery/issues/1903), [#1900](https://github.com/googleapis/python-bigquery/issues/1900)) ([1367b58](https://github.com/googleapis/python-bigquery/commit/1367b584b68d917ec325ce4383a0e9a36205b894)) + + +### Performance Improvements + +* Avoid unnecessary API call in `QueryJob.result()` when job is already finished ([#1900](https://github.com/googleapis/python-bigquery/issues/1900)) ([1367b58](https://github.com/googleapis/python-bigquery/commit/1367b584b68d917ec325ce4383a0e9a36205b894)) + ## [3.20.1](https://github.com/googleapis/python-bigquery/compare/v3.20.0...v3.20.1) (2024-04-01) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/version.py b/packages/google-cloud-bigquery/google/cloud/bigquery/version.py index 55093e39059e..29c08b51f294 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/version.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/version.py @@ -12,4 +12,4 @@ # See the License for the specific language governing permissions and # limitations under the License. -__version__ = "3.20.1" +__version__ = "3.21.0" From 5e8015cd6f1fa3e0c6acf7d487e80774669a6ad8 Mon Sep 17 00:00:00 2001 From: Lingqing Gan Date: Thu, 18 Apr 2024 13:14:56 -0700 Subject: [PATCH 1779/2016] feat: support RANGE in queries Part 2: Arrow (#1868) * feat: support range in queries as dict * fix sys tests * lint * add arrow support * fix python 3.7 test error * print dependencies in sys test * add unit test and docs * fix unit test * add func docs * add sys test for tabledata.list in arrow * add sys test for tabledata.list as iterator * lint * fix docs error * fix docstring * fix docstring * fix docstring * docs * docs * docs * move dtypes mapping code * address comment * address comment * fix pytest error * Revert "move dtypes mapping code" This reverts commit c46c65c822b3c8295d5d6650b1c9c97d35d2ba5b. * remove commented out assertions * typo and formats * add None-check for range_element_type and add unit tests * change test skip condition * fix test error * change test skip condition * change test skip condition * change decorator order * use a different way to construct test data * fix error message and add warning number check * add warning number check and comments --- .../google/cloud/bigquery/_helpers.py | 16 +- .../google/cloud/bigquery/_pandas_helpers.py | 33 +++++ .../google/cloud/bigquery/dbapi/_helpers.py | 14 +- .../google/cloud/bigquery/enums.py | 9 ++ .../google/cloud/bigquery/job/query.py | 67 +++++++++ .../google/cloud/bigquery/query.py | 11 +- .../google/cloud/bigquery/table.py | 137 ++++++++++++++++++ packages/google-cloud-bigquery/noxfile.py | 3 + .../tests/data/scalars.csv | 2 + .../tests/data/scalars_schema_csv.json | 10 ++ .../tests/system/conftest.py | 22 ++- .../tests/system/test_arrow.py | 27 ++++ .../tests/system/test_list_rows.py | 14 ++ .../tests/unit/test__pandas_helpers.py | 61 ++++++++ .../tests/unit/test_table.py | 115 ++++++++++++++- 15 files changed, 516 insertions(+), 25 deletions(-) create mode 100644 packages/google-cloud-bigquery/tests/data/scalars.csv create mode 100644 packages/google-cloud-bigquery/tests/data/scalars_schema_csv.json diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py b/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py index 0572867d756f..083eb9f9d6f8 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py @@ -66,6 +66,8 @@ _UNIVERSE_DOMAIN_ENV = "GOOGLE_CLOUD_UNIVERSE_DOMAIN" """Environment variable for setting universe domain.""" +_SUPPORTED_RANGE_ELEMENTS = {"TIMESTAMP", "DATETIME", "DATE"} + def _get_client_universe( client_options: Optional[Union[client_options_lib.ClientOptions, dict]] @@ -310,17 +312,13 @@ def _json_from_json(value, field): def _range_element_from_json(value, field): - """Coerce 'value' to a range element value, if set or not nullable.""" + """Coerce 'value' to a range element value.""" if value == "UNBOUNDED": return None - elif field.element_type == "DATE": - return _date_from_json(value, None) - elif field.element_type == "DATETIME": - return _datetime_from_json(value, None) - elif field.element_type == "TIMESTAMP": - return _timestamp_from_json(value, None) + if field.element_type in _SUPPORTED_RANGE_ELEMENTS: + return _CELLDATA_FROM_JSON[field.element_type](value, field.element_type) else: - raise ValueError(f"Unsupported range field type: {value}") + raise ValueError(f"Unsupported range element type: {field.element_type}") def _range_from_json(value, field): @@ -344,7 +342,7 @@ def _range_from_json(value, field): end = _range_element_from_json(end, field.range_element_type) return {"start": start, "end": end} else: - raise ValueError(f"Unknown range format: {value}") + raise ValueError(f"Unknown format for range value: {value}") else: return None diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/_pandas_helpers.py b/packages/google-cloud-bigquery/google/cloud/bigquery/_pandas_helpers.py index 3b58d3736dcf..8395478fb15b 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/_pandas_helpers.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/_pandas_helpers.py @@ -142,6 +142,17 @@ def bq_to_arrow_struct_data_type(field): return pyarrow.struct(arrow_fields) +def bq_to_arrow_range_data_type(field): + if field is None: + raise ValueError( + "Range element type cannot be None, must be one of " + "DATE, DATETIME, or TIMESTAMP" + ) + element_type = field.element_type.upper() + arrow_element_type = _pyarrow_helpers.bq_to_arrow_scalars(element_type)() + return pyarrow.struct([("start", arrow_element_type), ("end", arrow_element_type)]) + + def bq_to_arrow_data_type(field): """Return the Arrow data type, corresponding to a given BigQuery column. @@ -160,6 +171,9 @@ def bq_to_arrow_data_type(field): if field_type_upper in schema._STRUCT_TYPES: return bq_to_arrow_struct_data_type(field) + if field_type_upper == "RANGE": + return bq_to_arrow_range_data_type(field.range_element_type) + data_type_constructor = _pyarrow_helpers.bq_to_arrow_scalars(field_type_upper) if data_type_constructor is None: return None @@ -220,6 +234,9 @@ def default_types_mapper( datetime_dtype: Union[Any, None] = None, time_dtype: Union[Any, None] = None, timestamp_dtype: Union[Any, None] = None, + range_date_dtype: Union[Any, None] = None, + range_datetime_dtype: Union[Any, None] = None, + range_timestamp_dtype: Union[Any, None] = None, ): """Create a mapping from pyarrow types to pandas types. @@ -274,6 +291,22 @@ def types_mapper(arrow_data_type): elif time_dtype is not None and pyarrow.types.is_time(arrow_data_type): return time_dtype + elif pyarrow.types.is_struct(arrow_data_type): + if range_datetime_dtype is not None and arrow_data_type.equals( + range_datetime_dtype.pyarrow_dtype + ): + return range_datetime_dtype + + elif range_date_dtype is not None and arrow_data_type.equals( + range_date_dtype.pyarrow_dtype + ): + return range_date_dtype + + elif range_timestamp_dtype is not None and arrow_data_type.equals( + range_timestamp_dtype.pyarrow_dtype + ): + return range_timestamp_dtype + return types_mapper diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/dbapi/_helpers.py b/packages/google-cloud-bigquery/google/cloud/bigquery/dbapi/_helpers.py index 117fa8ae70b4..a4ab05ce82a3 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/dbapi/_helpers.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/dbapi/_helpers.py @@ -277,12 +277,14 @@ def complex_query_parameter( param = query.ArrayQueryParameter( name, sub_type, - value - if isinstance(sub_type, query.ScalarQueryParameterType) - else [ - complex_query_parameter(None, v, sub_type._complex__src, base) - for v in value - ], + ( + value + if isinstance(sub_type, query.ScalarQueryParameterType) + else [ + complex_query_parameter(None, v, sub_type._complex__src, base) + for v in value + ] + ), ) elif type_type == STRUCT: if not isinstance(value, collections_abc.Mapping): diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/enums.py b/packages/google-cloud-bigquery/google/cloud/bigquery/enums.py index 1abe28381c40..d8cbe99691b1 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/enums.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/enums.py @@ -99,6 +99,15 @@ class DefaultPandasDTypes(enum.Enum): TIME_DTYPE = object() """Specifies default time dtype""" + RANGE_DATE_DTYPE = object() + """Specifies default range date dtype""" + + RANGE_DATETIME_DTYPE = object() + """Specifies default range datetime dtype""" + + RANGE_TIMESTAMP_DTYPE = object() + """Specifies default range timestamp dtype""" + class DestinationFormat(object): """The exported file format. The default value is :attr:`CSV`. diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/job/query.py b/packages/google-cloud-bigquery/google/cloud/bigquery/job/query.py index 7436b6013269..09a69e11cc93 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/job/query.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/job/query.py @@ -1784,6 +1784,13 @@ def to_dataframe( datetime_dtype: Union[Any, None] = None, time_dtype: Union[Any, None] = DefaultPandasDTypes.TIME_DTYPE, timestamp_dtype: Union[Any, None] = None, + range_date_dtype: Union[Any, None] = DefaultPandasDTypes.RANGE_DATE_DTYPE, + range_datetime_dtype: Union[ + Any, None + ] = DefaultPandasDTypes.RANGE_DATETIME_DTYPE, + range_timestamp_dtype: Union[ + Any, None + ] = DefaultPandasDTypes.RANGE_TIMESTAMP_DTYPE, ) -> "pandas.DataFrame": """Return a pandas DataFrame from a QueryJob @@ -1919,6 +1926,63 @@ def to_dataframe( .. versionadded:: 3.10.0 + range_date_dtype (Optional[pandas.Series.dtype, None]): + If set, indicate a pandas ExtensionDtype, such as: + + .. code-block:: python + + pandas.ArrowDtype(pyarrow.struct( + [("start", pyarrow.date32()), ("end", pyarrow.date32())] + )) + + to convert BigQuery RANGE type, instead of relying on + the default ``object``. If you explicitly set the value to + ``None``, the data type will be ``object``. BigQuery Range type + can be found at: + https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#range_type + + .. versionadded:: 3.21.0 + + range_datetime_dtype (Optional[pandas.Series.dtype, None]): + If set, indicate a pandas ExtensionDtype, such as: + + .. code-block:: python + + pandas.ArrowDtype(pyarrow.struct( + [ + ("start", pyarrow.timestamp("us")), + ("end", pyarrow.timestamp("us")), + ] + )) + + to convert BigQuery RANGE type, instead of relying on + the default ``object``. If you explicitly set the value to + ``None``, the data type will be ``object``. BigQuery Range type + can be found at: + https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#range_type + + .. versionadded:: 3.21.0 + + range_timestamp_dtype (Optional[pandas.Series.dtype, None]): + If set, indicate a pandas ExtensionDtype, such as: + + .. code-block:: python + + pandas.ArrowDtype(pyarrow.struct( + [ + ("start", pyarrow.timestamp("us", tz="UTC")), + ("end", pyarrow.timestamp("us", tz="UTC")), + ] + )) + + to convert BigQuery RANGE type, instead of relying + on the default ``object``. If you explicitly set the value to + ``None``, the data type will be ``object``. BigQuery Range type + can be found at: + https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#range_type + + .. versionadded:: 3.21.0 + Returns: pandas.DataFrame: A :class:`~pandas.DataFrame` populated with row data @@ -1949,6 +2013,9 @@ def to_dataframe( datetime_dtype=datetime_dtype, time_dtype=time_dtype, timestamp_dtype=timestamp_dtype, + range_date_dtype=range_date_dtype, + range_datetime_dtype=range_datetime_dtype, + range_timestamp_dtype=range_timestamp_dtype, ) # If changing the signature of this method, make sure to apply the same diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/query.py b/packages/google-cloud-bigquery/google/cloud/bigquery/query.py index 9c9402b7413c..9c59056fd6f7 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/query.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/query.py @@ -24,14 +24,13 @@ from google.cloud.bigquery._helpers import _rows_from_json from google.cloud.bigquery._helpers import _QUERY_PARAMS_FROM_JSON from google.cloud.bigquery._helpers import _SCALAR_VALUE_TO_JSON_PARAM +from google.cloud.bigquery._helpers import _SUPPORTED_RANGE_ELEMENTS _SCALAR_VALUE_TYPE = Optional[ Union[str, int, float, decimal.Decimal, bool, datetime.datetime, datetime.date] ] -_RANGE_ELEMENT_TYPE_STR = {"TIMESTAMP", "DATETIME", "DATE"} - class ConnectionProperty: """A connection-level property to customize query behavior. @@ -388,14 +387,14 @@ def _parse_range_element_type(self, type_): google.cloud.bigquery.query.ScalarQueryParameterType: Instance """ if isinstance(type_, str): - if type_ not in _RANGE_ELEMENT_TYPE_STR: + if type_ not in _SUPPORTED_RANGE_ELEMENTS: raise ValueError( "If given as a string, range element type must be one of " "'TIMESTAMP', 'DATE', or 'DATETIME'." ) return ScalarQueryParameterType(type_) elif isinstance(type_, ScalarQueryParameterType): - if type_._type not in _RANGE_ELEMENT_TYPE_STR: + if type_._type not in _SUPPORTED_RANGE_ELEMENTS: raise ValueError( "If given as a ScalarQueryParameter object, range element " "type must be one of 'TIMESTAMP', 'DATE', or 'DATETIME' " @@ -960,14 +959,14 @@ class RangeQueryParameter(_AbstractQueryParameter): @classmethod def _parse_range_element_type(self, range_element_type): if isinstance(range_element_type, str): - if range_element_type not in _RANGE_ELEMENT_TYPE_STR: + if range_element_type not in _SUPPORTED_RANGE_ELEMENTS: raise ValueError( "If given as a string, range_element_type must be one of " f"'TIMESTAMP', 'DATE', or 'DATETIME'. Got {range_element_type}." ) return RangeQueryParameterType(range_element_type) elif isinstance(range_element_type, RangeQueryParameterType): - if range_element_type.type_._type not in _RANGE_ELEMENT_TYPE_STR: + if range_element_type.type_._type not in _SUPPORTED_RANGE_ELEMENTS: raise ValueError( "If given as a RangeQueryParameterType object, " "range_element_type must be one of 'TIMESTAMP', 'DATE', " diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py index 73e755e9ea61..2f07bcc783a3 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py @@ -2044,6 +2044,13 @@ def to_dataframe( datetime_dtype: Union[Any, None] = None, time_dtype: Union[Any, None] = DefaultPandasDTypes.TIME_DTYPE, timestamp_dtype: Union[Any, None] = None, + range_date_dtype: Union[Any, None] = DefaultPandasDTypes.RANGE_DATE_DTYPE, + range_datetime_dtype: Union[ + Any, None + ] = DefaultPandasDTypes.RANGE_DATETIME_DTYPE, + range_timestamp_dtype: Union[ + Any, None + ] = DefaultPandasDTypes.RANGE_TIMESTAMP_DTYPE, ) -> "pandas.DataFrame": """Create a pandas DataFrame by loading all pages of a query. @@ -2183,6 +2190,63 @@ def to_dataframe( .. versionadded:: 3.10.0 + range_date_dtype (Optional[pandas.Series.dtype, None]): + If set, indicate a pandas ExtensionDtype, such as: + + .. code-block:: python + + pandas.ArrowDtype(pyarrow.struct( + [("start", pyarrow.date32()), ("end", pyarrow.date32())] + )) + + to convert BigQuery RANGE type, instead of relying on + the default ``object``. If you explicitly set the value to + ``None``, the data type will be ``object``. BigQuery Range type + can be found at: + https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#range_type + + .. versionadded:: 3.21.0 + + range_datetime_dtype (Optional[pandas.Series.dtype, None]): + If set, indicate a pandas ExtensionDtype, such as: + + .. code-block:: python + + pandas.ArrowDtype(pyarrow.struct( + [ + ("start", pyarrow.timestamp("us")), + ("end", pyarrow.timestamp("us")), + ] + )) + + to convert BigQuery RANGE type, instead of relying on + the default ``object``. If you explicitly set the value to + ``None``, the data type will be ``object``. BigQuery Range type + can be found at: + https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#range_type + + .. versionadded:: 3.21.0 + + range_timestamp_dtype (Optional[pandas.Series.dtype, None]): + If set, indicate a pandas ExtensionDtype, such as: + + .. code-block:: python + + pandas.ArrowDtype(pyarrow.struct( + [ + ("start", pyarrow.timestamp("us", tz="UTC")), + ("end", pyarrow.timestamp("us", tz="UTC")), + ] + )) + + to convert BigQuery RANGE type, instead of relying + on the default ``object``. If you explicitly set the value to + ``None``, the data type will be ``object``. BigQuery Range type + can be found at: + https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#range_type + + .. versionadded:: 3.21.0 + Returns: pandas.DataFrame: A :class:`~pandas.DataFrame` populated with row data and column @@ -2214,6 +2278,69 @@ def to_dataframe( if time_dtype is DefaultPandasDTypes.TIME_DTYPE: time_dtype = db_dtypes.TimeDtype() + if range_date_dtype is DefaultPandasDTypes.RANGE_DATE_DTYPE: + try: + range_date_dtype = pandas.ArrowDtype( + pyarrow.struct( + [("start", pyarrow.date32()), ("end", pyarrow.date32())] + ) + ) + except AttributeError: + # pandas.ArrowDtype was introduced in pandas 1.5, but python 3.7 + # only supports upto pandas 1.3. If pandas.ArrowDtype is not + # present, we raise a warning and set range_date_dtype to None. + msg = ( + "Unable to find class ArrowDtype in pandas, setting " + "range_date_dtype to be None. To use ArrowDtype, please " + "use pandas >= 1.5 and python >= 3.8." + ) + warnings.warn(msg) + range_date_dtype = None + + if range_datetime_dtype is DefaultPandasDTypes.RANGE_DATETIME_DTYPE: + try: + range_datetime_dtype = pandas.ArrowDtype( + pyarrow.struct( + [ + ("start", pyarrow.timestamp("us")), + ("end", pyarrow.timestamp("us")), + ] + ) + ) + except AttributeError: + # pandas.ArrowDtype was introduced in pandas 1.5, but python 3.7 + # only supports upto pandas 1.3. If pandas.ArrowDtype is not + # present, we raise a warning and set range_datetime_dtype to None. + msg = ( + "Unable to find class ArrowDtype in pandas, setting " + "range_datetime_dtype to be None. To use ArrowDtype, " + "please use pandas >= 1.5 and python >= 3.8." + ) + warnings.warn(msg) + range_datetime_dtype = None + + if range_timestamp_dtype is DefaultPandasDTypes.RANGE_TIMESTAMP_DTYPE: + try: + range_timestamp_dtype = pandas.ArrowDtype( + pyarrow.struct( + [ + ("start", pyarrow.timestamp("us", tz="UTC")), + ("end", pyarrow.timestamp("us", tz="UTC")), + ] + ) + ) + except AttributeError: + # pandas.ArrowDtype was introduced in pandas 1.5, but python 3.7 + # only supports upto pandas 1.3. If pandas.ArrowDtype is not + # present, we raise a warning and set range_timestamp_dtype to None. + msg = ( + "Unable to find class ArrowDtype in pandas, setting " + "range_timestamp_dtype to be None. To use ArrowDtype, " + "please use pandas >= 1.5 and python >= 3.8." + ) + warnings.warn(msg) + range_timestamp_dtype = None + if bool_dtype is not None and not hasattr(bool_dtype, "__from_arrow__"): raise ValueError("bool_dtype", _NO_SUPPORTED_DTYPE) @@ -2298,6 +2425,9 @@ def to_dataframe( datetime_dtype=datetime_dtype, time_dtype=time_dtype, timestamp_dtype=timestamp_dtype, + range_date_dtype=range_date_dtype, + range_datetime_dtype=range_datetime_dtype, + range_timestamp_dtype=range_timestamp_dtype, ), ) else: @@ -2502,6 +2632,9 @@ def to_dataframe( datetime_dtype=None, time_dtype=None, timestamp_dtype=None, + range_date_dtype=None, + range_datetime_dtype=None, + range_timestamp_dtype=None, ) -> "pandas.DataFrame": """Create an empty dataframe. @@ -2519,6 +2652,9 @@ def to_dataframe( datetime_dtype (Any): Ignored. Added for compatibility with RowIterator. time_dtype (Any): Ignored. Added for compatibility with RowIterator. timestamp_dtype (Any): Ignored. Added for compatibility with RowIterator. + range_date_dtype (Any): Ignored. Added for compatibility with RowIterator. + range_datetime_dtype (Any): Ignored. Added for compatibility with RowIterator. + range_timestamp_dtype (Any): Ignored. Added for compatibility with RowIterator. Returns: pandas.DataFrame: An empty :class:`~pandas.DataFrame`. @@ -2541,6 +2677,7 @@ def to_geodataframe( dtypes (Any): Ignored. Added for compatibility with RowIterator. progress_bar_type (Any): Ignored. Added for compatibility with RowIterator. create_bqstorage_client (bool): Ignored. Added for compatibility with RowIterator. + geography_column (str): Ignored. Added for compatibility with RowIterator. Returns: pandas.DataFrame: An empty :class:`~pandas.DataFrame`. diff --git a/packages/google-cloud-bigquery/noxfile.py b/packages/google-cloud-bigquery/noxfile.py index 034bb843aecb..78a9ab5b6710 100644 --- a/packages/google-cloud-bigquery/noxfile.py +++ b/packages/google-cloud-bigquery/noxfile.py @@ -208,6 +208,9 @@ def system(session): extras = "[all]" session.install("-e", f".{extras}", "-c", constraints_path) + # print versions of all dependencies + session.run("python", "-m", "pip", "freeze") + # Run py.test against the system tests. session.run( "py.test", diff --git a/packages/google-cloud-bigquery/tests/data/scalars.csv b/packages/google-cloud-bigquery/tests/data/scalars.csv new file mode 100644 index 000000000000..7af97583fa34 --- /dev/null +++ b/packages/google-cloud-bigquery/tests/data/scalars.csv @@ -0,0 +1,2 @@ +"[2020-01-01, 2020-02-01)" + diff --git a/packages/google-cloud-bigquery/tests/data/scalars_schema_csv.json b/packages/google-cloud-bigquery/tests/data/scalars_schema_csv.json new file mode 100644 index 000000000000..82b878d95fcc --- /dev/null +++ b/packages/google-cloud-bigquery/tests/data/scalars_schema_csv.json @@ -0,0 +1,10 @@ +[ + { + "mode" : "NULLABLE", + "name" : "range_date", + "type" : "RANGE", + "rangeElementType": { + "type": "DATE" + } + } + ] \ No newline at end of file diff --git a/packages/google-cloud-bigquery/tests/system/conftest.py b/packages/google-cloud-bigquery/tests/system/conftest.py index 784a1dd5c097..8efa042af7ec 100644 --- a/packages/google-cloud-bigquery/tests/system/conftest.py +++ b/packages/google-cloud-bigquery/tests/system/conftest.py @@ -96,12 +96,14 @@ def load_scalars_table( project_id: str, dataset_id: str, data_path: str = "scalars.jsonl", + source_format=enums.SourceFormat.NEWLINE_DELIMITED_JSON, + schema_source="scalars_schema.json", ) -> str: - schema = bigquery_client.schema_from_json(DATA_DIR / "scalars_schema.json") + schema = bigquery_client.schema_from_json(DATA_DIR / schema_source) table_id = data_path.replace(".", "_") + hex(random.randrange(1000000)) job_config = bigquery.LoadJobConfig() job_config.schema = schema - job_config.source_format = enums.SourceFormat.NEWLINE_DELIMITED_JSON + job_config.source_format = source_format full_table_id = f"{project_id}.{dataset_id}.{table_id}" with open(DATA_DIR / data_path, "rb") as data_file: job = bigquery_client.load_table_from_file( @@ -151,6 +153,22 @@ def scalars_table_multi_location( return request.param, full_table_id +@pytest.fixture(scope="session") +def scalars_table_csv( + bigquery_client: bigquery.Client, project_id: str, dataset_id: str +): + full_table_id = load_scalars_table( + bigquery_client, + project_id, + dataset_id, + data_path="scalars.csv", + source_format=enums.SourceFormat.CSV, + schema_source="scalars_schema_csv.json", + ) + yield full_table_id + bigquery_client.delete_table(full_table_id, not_found_ok=True) + + @pytest.fixture def test_table_name(request, replace_non_anum=re.compile(r"[^a-zA-Z0-9_]").sub): return replace_non_anum("_", request.node.name) diff --git a/packages/google-cloud-bigquery/tests/system/test_arrow.py b/packages/google-cloud-bigquery/tests/system/test_arrow.py index 8b88b6844d74..82cf11f859d3 100644 --- a/packages/google-cloud-bigquery/tests/system/test_arrow.py +++ b/packages/google-cloud-bigquery/tests/system/test_arrow.py @@ -167,3 +167,30 @@ def test_arrow_extension_types_same_for_storage_and_REST_APIs_894( b"ARROW:extension:name": b"google:sqlType:geography", b"ARROW:extension:metadata": b'{"encoding": "WKT"}', } + + +def test_list_rows_range_csv( + bigquery_client: bigquery.Client, + scalars_table_csv: str, +): + table_id = scalars_table_csv + + schema = [ + bigquery.SchemaField( + "range_date", enums.SqlTypeNames.RANGE, range_element_type="DATE" + ), + ] + + arrow_table = bigquery_client.list_rows( + table_id, + selected_fields=schema, + ).to_arrow() + + schema = arrow_table.schema + + expected_type = pyarrow.struct( + [("start", pyarrow.date32()), ("end", pyarrow.date32())] + ) + + range_type = schema.field("range_date").type + assert range_type == expected_type diff --git a/packages/google-cloud-bigquery/tests/system/test_list_rows.py b/packages/google-cloud-bigquery/tests/system/test_list_rows.py index 4c08958c37ac..108b842cec8f 100644 --- a/packages/google-cloud-bigquery/tests/system/test_list_rows.py +++ b/packages/google-cloud-bigquery/tests/system/test_list_rows.py @@ -118,3 +118,17 @@ def test_list_rows_scalars_extreme( assert value == 4 else: assert value is None + + +def test_list_rows_range(bigquery_client: bigquery.Client, scalars_table_csv: str): + rows = bigquery_client.list_rows(scalars_table_csv) + rows = list(rows) + row = rows[0] + expected_range = { + "start": datetime.date(2020, 1, 1), + "end": datetime.date(2020, 2, 1), + } + assert row["range_date"] == expected_range + + row_null = rows[1] + assert row_null["range_date"] is None diff --git a/packages/google-cloud-bigquery/tests/unit/test__pandas_helpers.py b/packages/google-cloud-bigquery/tests/unit/test__pandas_helpers.py index 5c13669f30d4..58d2b73b3f7e 100644 --- a/packages/google-cloud-bigquery/tests/unit/test__pandas_helpers.py +++ b/packages/google-cloud-bigquery/tests/unit/test__pandas_helpers.py @@ -670,6 +670,67 @@ def test_bq_to_arrow_array_w_geography_type_wkb_data(module_under_test): assert array.to_pylist() == list(series) +@pytest.mark.parametrize( + "bq_schema,expected", + [ + ( + schema.SchemaField( + "field1", + "RANGE", + range_element_type=schema.FieldElementType("DATE"), + mode="NULLABLE", + ), + pyarrow.struct( + [ + ("start", pyarrow.date32()), + ("end", pyarrow.date32()), + ] + ), + ), + ( + schema.SchemaField( + "field2", + "RANGE", + range_element_type=schema.FieldElementType("DATETIME"), + mode="NULLABLE", + ), + pyarrow.struct( + [ + ("start", pyarrow.timestamp("us", tz=None)), + ("end", pyarrow.timestamp("us", tz=None)), + ] + ), + ), + ( + schema.SchemaField( + "field3", + "RANGE", + range_element_type=schema.FieldElementType("TIMESTAMP"), + mode="NULLABLE", + ), + pyarrow.struct( + [ + ("start", pyarrow.timestamp("us", tz="UTC")), + ("end", pyarrow.timestamp("us", tz="UTC")), + ] + ), + ), + ], +) +@pytest.mark.skipif(isinstance(pyarrow, mock.Mock), reason="Requires `pyarrow`") +@pytest.mark.skipif(pandas is None, reason="Requires `pandas`") +def test_bq_to_arrow_data_type_w_range(module_under_test, bq_schema, expected): + actual = module_under_test.bq_to_arrow_data_type(bq_schema) + assert actual.equals(expected) + + +@pytest.mark.skipif(pandas is None, reason="Requires `pandas`") +def test_bq_to_arrow_data_type_w_range_no_element(module_under_test): + field = schema.SchemaField("field1", "RANGE", mode="NULLABLE") + with pytest.raises(ValueError, match="Range element type cannot be None"): + module_under_test.bq_to_arrow_data_type(field) + + @pytest.mark.skipif(isinstance(pyarrow, mock.Mock), reason="Requires `pyarrow`") def test_bq_to_arrow_schema_w_unknown_type(module_under_test): fields = ( diff --git a/packages/google-cloud-bigquery/tests/unit/test_table.py b/packages/google-cloud-bigquery/tests/unit/test_table.py index 3953170fdb76..099529f95a35 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_table.py +++ b/packages/google-cloud-bigquery/tests/unit/test_table.py @@ -3503,7 +3503,11 @@ def test_to_dataframe_no_tqdm_no_progress_bar(self): user_warnings = [ warning for warning in warned if warning.category is UserWarning ] - self.assertEqual(len(user_warnings), 0) + # With Python 3.7 and 3.8, len(user_warnings) = 3. With pandas < 1.5, + # pandas.ArrowDtype is not supported. We raise warnings because + # range columns have to be converted to object. + # With higher Python versions and noextra tests, len(user_warnings) = 0 + self.assertIn(len(user_warnings), [0, 3]) self.assertEqual(len(df), 4) @mock.patch("google.cloud.bigquery._tqdm_helpers.tqdm", new=None) @@ -3534,7 +3538,11 @@ def test_to_dataframe_no_tqdm(self): user_warnings = [ warning for warning in warned if warning.category is UserWarning ] - self.assertEqual(len(user_warnings), 1) + # With Python 3.7 and 3.8, len(user_warnings) = 4. With pandas < 1.5, + # pandas.ArrowDtype is not supported. We raise warnings because + # range columns have to be converted to object. + # With higher Python versions and noextra tests, len(user_warnings) = 1 + self.assertIn(len(user_warnings), [1, 4]) # Even though the progress bar won't show, downloading the dataframe # should still work. @@ -3653,6 +3661,9 @@ def test_to_dataframe_w_dtypes_mapper(self): SchemaField("datetime", "DATETIME"), SchemaField("time", "TIME"), SchemaField("timestamp", "TIMESTAMP"), + SchemaField("range_timestamp", "RANGE", range_element_type="TIMESTAMP"), + SchemaField("range_datetime", "RANGE", range_element_type="DATETIME"), + SchemaField("range_date", "RANGE", range_element_type="DATE"), ] row_data = [ [ @@ -3665,6 +3676,9 @@ def test_to_dataframe_w_dtypes_mapper(self): "1999-12-31T00:00:00.000000", "00:00:00.000000", "1433836800000000", + "[1433836800000000, 1433999900000000)", + "[2009-06-17T13:45:30, 2019-07-17T13:45:30)", + "[2020-10-01, 2021-10-02)", ], [ "Bharney Rhubble", @@ -3676,6 +3690,9 @@ def test_to_dataframe_w_dtypes_mapper(self): "4567-12-31T00:00:00.000000", "12:00:00.232413", "81953424000000000", + "[1433836800000000, UNBOUNDED)", + "[2009-06-17T13:45:30, UNBOUNDED)", + "[2020-10-01, UNBOUNDED)", ], [ "Wylma Phlyntstone", @@ -3687,6 +3704,9 @@ def test_to_dataframe_w_dtypes_mapper(self): "9999-12-31T23:59:59.999999", "23:59:59.999999", "253402261199999999", + "[UNBOUNDED, UNBOUNDED)", + "[UNBOUNDED, UNBOUNDED)", + "[UNBOUNDED, UNBOUNDED)", ], ] rows = [{"f": [{"v": field} for field in row]} for row in row_data] @@ -3724,6 +3744,39 @@ def test_to_dataframe_w_dtypes_mapper(self): if hasattr(pandas, "ArrowDtype") else None ), + range_date_dtype=( + pandas.ArrowDtype( + pyarrow.struct( + [("start", pyarrow.date32()), ("end", pyarrow.date32())] + ) + ) + if hasattr(pandas, "ArrowDtype") + else None + ), + range_datetime_dtype=( + pandas.ArrowDtype( + pyarrow.struct( + [ + ("start", pyarrow.timestamp("us")), + ("end", pyarrow.timestamp("us")), + ] + ) + ) + if hasattr(pandas, "ArrowDtype") + else None + ), + range_timestamp_dtype=( + pandas.ArrowDtype( + pyarrow.struct( + [ + ("start", pyarrow.timestamp("us", tz="UTC")), + ("end", pyarrow.timestamp("us", tz="UTC")), + ] + ) + ) + if hasattr(pandas, "ArrowDtype") + else None + ), ) self.assertIsInstance(df, pandas.DataFrame) @@ -3791,6 +3844,52 @@ def test_to_dataframe_w_dtypes_mapper(self): ], ) self.assertEqual(df.timestamp.dtype.name, "timestamp[us, tz=UTC][pyarrow]") + + self.assertEqual( + list(df.range_timestamp), + [ + { + "start": datetime.datetime( + 2015, 6, 9, 8, 0, 0, tzinfo=datetime.timezone.utc + ), + "end": datetime.datetime( + 2015, 6, 11, 5, 18, 20, tzinfo=datetime.timezone.utc + ), + }, + { + "start": datetime.datetime( + 2015, 6, 9, 8, 0, 0, tzinfo=datetime.timezone.utc + ), + "end": None, + }, + {"start": None, "end": None}, + ], + ) + + self.assertEqual( + list(df.range_datetime), + [ + { + "start": datetime.datetime(2009, 6, 17, 13, 45, 30), + "end": datetime.datetime(2019, 7, 17, 13, 45, 30), + }, + {"start": datetime.datetime(2009, 6, 17, 13, 45, 30), "end": None}, + {"start": None, "end": None}, + ], + ) + + self.assertEqual( + list(df.range_date), + [ + { + "start": datetime.date(2020, 10, 1), + "end": datetime.date(2021, 10, 2), + }, + {"start": datetime.date(2020, 10, 1), "end": None}, + {"start": None, "end": None}, + ], + ) + else: self.assertEqual( list(df.date), @@ -3851,6 +3950,9 @@ def test_to_dataframe_w_none_dtypes_mapper(self): SchemaField("datetime", "DATETIME"), SchemaField("time", "TIME"), SchemaField("timestamp", "TIMESTAMP"), + SchemaField("range_timestamp", "RANGE", range_element_type="TIMESTAMP"), + SchemaField("range_datetime", "RANGE", range_element_type="DATETIME"), + SchemaField("range_date", "RANGE", range_element_type="DATE"), ] row_data = [ [ @@ -3863,6 +3965,9 @@ def test_to_dataframe_w_none_dtypes_mapper(self): "1999-12-31T00:00:00.000000", "23:59:59.999999", "1433836800000000", + "[1433836800000000, 1433999900000000)", + "[2009-06-17T13:45:30, 2019-07-17T13:45:30)", + "[2020-10-01, 2021-10-02)", ], ] rows = [{"f": [{"v": field} for field in row]} for row in row_data] @@ -3880,6 +3985,9 @@ def test_to_dataframe_w_none_dtypes_mapper(self): datetime_dtype=None, time_dtype=None, timestamp_dtype=None, + range_timestamp_dtype=None, + range_datetime_dtype=None, + range_date_dtype=None, ) self.assertIsInstance(df, pandas.DataFrame) self.assertEqual(df.complete.dtype.name, "bool") @@ -3891,6 +3999,9 @@ def test_to_dataframe_w_none_dtypes_mapper(self): self.assertEqual(df.datetime.dtype.name, "datetime64[ns]") self.assertEqual(df.time.dtype.name, "object") self.assertEqual(df.timestamp.dtype.name, "datetime64[ns, UTC]") + self.assertEqual(df.range_timestamp.dtype.name, "object") + self.assertEqual(df.range_datetime.dtype.name, "object") + self.assertEqual(df.range_date.dtype.name, "object") def test_to_dataframe_w_unsupported_dtypes_mapper(self): pytest.importorskip("pandas") From 1362bdc587996d9115fa50fe0f32ea00bb39d615 Mon Sep 17 00:00:00 2001 From: Mend Renovate Date: Fri, 19 Apr 2024 15:06:02 +0200 Subject: [PATCH 1780/2016] chore(deps): update all dependencies (#1904) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * chore(deps): update all dependencies * 🦉 Updates from OwlBot post-processor See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md --------- Co-authored-by: Owl Bot Co-authored-by: Chalmer Lowe --- .../samples/desktopapp/requirements.txt | 2 +- .../samples/geography/requirements.txt | 6 +++--- .../google-cloud-bigquery/samples/magics/requirements.txt | 2 +- .../samples/notebooks/requirements.txt | 2 +- .../google-cloud-bigquery/samples/snippets/requirements.txt | 2 +- 5 files changed, 7 insertions(+), 7 deletions(-) diff --git a/packages/google-cloud-bigquery/samples/desktopapp/requirements.txt b/packages/google-cloud-bigquery/samples/desktopapp/requirements.txt index fee6806b7283..3e9e59430159 100644 --- a/packages/google-cloud-bigquery/samples/desktopapp/requirements.txt +++ b/packages/google-cloud-bigquery/samples/desktopapp/requirements.txt @@ -1,2 +1,2 @@ -google-cloud-bigquery==3.20.1 +google-cloud-bigquery==3.21.0 google-auth-oauthlib==1.2.0 diff --git a/packages/google-cloud-bigquery/samples/geography/requirements.txt b/packages/google-cloud-bigquery/samples/geography/requirements.txt index b3d9bc841fb5..6502ba146a74 100644 --- a/packages/google-cloud-bigquery/samples/geography/requirements.txt +++ b/packages/google-cloud-bigquery/samples/geography/requirements.txt @@ -15,13 +15,13 @@ geopandas===0.13.2; python_version == '3.8' geopandas==0.14.3; python_version >= '3.9' google-api-core==2.18.0 google-auth==2.29.0 -google-cloud-bigquery==3.20.1 +google-cloud-bigquery==3.21.0 google-cloud-bigquery-storage==2.24.0 google-cloud-core==2.4.1 google-crc32c==1.5.0 google-resumable-media==2.7.0 googleapis-common-protos==1.63.0 -grpcio==1.62.1 +grpcio==1.62.2 idna==3.7 munch==4.0.0 mypy-extensions==1.0.0 @@ -44,7 +44,7 @@ pytz==2024.1 PyYAML==6.0.1 requests==2.31.0 rsa==4.9 -Shapely==2.0.3 +Shapely==2.0.4 six==1.16.0 typing-extensions===4.7.1; python_version == '3.7' typing-extensions==4.11.0; python_version >= '3.8' diff --git a/packages/google-cloud-bigquery/samples/magics/requirements.txt b/packages/google-cloud-bigquery/samples/magics/requirements.txt index 61471a348608..a431f466f9e2 100644 --- a/packages/google-cloud-bigquery/samples/magics/requirements.txt +++ b/packages/google-cloud-bigquery/samples/magics/requirements.txt @@ -1,5 +1,5 @@ db-dtypes==1.2.0 -google.cloud.bigquery==3.20.1 +google.cloud.bigquery==3.21.0 google-cloud-bigquery-storage==2.24.0 ipython===7.31.1; python_version == '3.7' ipython===8.0.1; python_version == '3.8' diff --git a/packages/google-cloud-bigquery/samples/notebooks/requirements.txt b/packages/google-cloud-bigquery/samples/notebooks/requirements.txt index 3960f47b9803..dcce1e3ec69b 100644 --- a/packages/google-cloud-bigquery/samples/notebooks/requirements.txt +++ b/packages/google-cloud-bigquery/samples/notebooks/requirements.txt @@ -1,5 +1,5 @@ db-dtypes==1.2.0 -google-cloud-bigquery==3.20.1 +google-cloud-bigquery==3.21.0 google-cloud-bigquery-storage==2.24.0 ipython===7.31.1; python_version == '3.7' ipython===8.0.1; python_version == '3.8' diff --git a/packages/google-cloud-bigquery/samples/snippets/requirements.txt b/packages/google-cloud-bigquery/samples/snippets/requirements.txt index 95f915364696..fee0ce65abe8 100644 --- a/packages/google-cloud-bigquery/samples/snippets/requirements.txt +++ b/packages/google-cloud-bigquery/samples/snippets/requirements.txt @@ -1,2 +1,2 @@ # samples/snippets should be runnable with no "extras" -google-cloud-bigquery==3.20.1 +google-cloud-bigquery==3.21.0 From 54f5c1879b1fb2da4d6aea14b8043ebee2f9baad Mon Sep 17 00:00:00 2001 From: "release-please[bot]" <55107282+release-please[bot]@users.noreply.github.com> Date: Mon, 6 May 2024 10:16:17 -0400 Subject: [PATCH 1781/2016] chore(main): release 3.22.0 (#1905) Co-authored-by: release-please[bot] <55107282+release-please[bot]@users.noreply.github.com> --- packages/google-cloud-bigquery/CHANGELOG.md | 7 +++++++ .../google-cloud-bigquery/google/cloud/bigquery/version.py | 2 +- 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/CHANGELOG.md b/packages/google-cloud-bigquery/CHANGELOG.md index 0fc77f7c5c79..a201ef851bb7 100644 --- a/packages/google-cloud-bigquery/CHANGELOG.md +++ b/packages/google-cloud-bigquery/CHANGELOG.md @@ -5,6 +5,13 @@ [1]: https://pypi.org/project/google-cloud-bigquery/#history +## [3.22.0](https://github.com/googleapis/python-bigquery/compare/v3.21.0...v3.22.0) (2024-04-19) + + +### Features + +* Support RANGE in queries Part 2: Arrow ([#1868](https://github.com/googleapis/python-bigquery/issues/1868)) ([5251b5d](https://github.com/googleapis/python-bigquery/commit/5251b5dbb254732ea730bab664ad319bd5be47e7)) + ## [3.21.0](https://github.com/googleapis/python-bigquery/compare/v3.20.1...v3.21.0) (2024-04-18) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/version.py b/packages/google-cloud-bigquery/google/cloud/bigquery/version.py index 29c08b51f294..b6c082ffcd88 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/version.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/version.py @@ -12,4 +12,4 @@ # See the License for the specific language governing permissions and # limitations under the License. -__version__ = "3.21.0" +__version__ = "3.22.0" From 03a13406a00ee76178dd6bed8ef2d3d50eba417d Mon Sep 17 00:00:00 2001 From: Lingqing Gan Date: Mon, 6 May 2024 16:46:27 -0700 Subject: [PATCH 1782/2016] feat: support insertAll for range (#1909) * feat: support insertAll for range * revert INTERVAL regex * lint * add unit test * lint --- .../google/cloud/bigquery/_helpers.py | 52 +++++++- .../tests/unit/test__helpers.py | 114 +++++++++++++++++- 2 files changed, 162 insertions(+), 4 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py b/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py index 083eb9f9d6f8..668b4ca3da84 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py @@ -50,6 +50,7 @@ r"(?P-?\d+) " r"(?P-?)(?P\d+):(?P\d+):(?P\d+)\.?(?P\d*)?$" ) +_RANGE_PATTERN = re.compile(r"\[.*, .*\)") BIGQUERY_EMULATOR_HOST = "BIGQUERY_EMULATOR_HOST" """Environment variable defining host for emulator.""" @@ -334,9 +335,8 @@ def _range_from_json(value, field): The parsed range object from ``value`` if the ``field`` is not null (otherwise it is :data:`None`). """ - range_literal = re.compile(r"\[.*, .*\)") if _not_null(value, field): - if range_literal.match(value): + if _RANGE_PATTERN.match(value): start, end = value[1:-1].split(", ") start = _range_element_from_json(start, field.range_element_type) end = _range_element_from_json(end, field.range_element_type) @@ -531,6 +531,52 @@ def _time_to_json(value): return value +def _range_element_to_json(value, element_type=None): + """Coerce 'value' to an JSON-compatible representation.""" + if value is None: + return None + elif isinstance(value, str): + if value.upper() in ("UNBOUNDED", "NULL"): + return None + else: + # We do not enforce range element value to be valid to reduce + # redundancy with backend. + return value + elif ( + element_type and element_type.element_type.upper() in _SUPPORTED_RANGE_ELEMENTS + ): + converter = _SCALAR_VALUE_TO_JSON_ROW.get(element_type.element_type.upper()) + return converter(value) + else: + raise ValueError( + f"Unsupported RANGE element type {element_type}, or " + "element type is empty. Must be DATE, DATETIME, or " + "TIMESTAMP" + ) + + +def _range_field_to_json(range_element_type, value): + """Coerce 'value' to an JSON-compatible representation.""" + if isinstance(value, str): + # string literal + if _RANGE_PATTERN.match(value): + start, end = value[1:-1].split(", ") + else: + raise ValueError(f"RANGE literal {value} has incorrect format") + elif isinstance(value, dict): + # dictionary + start = value.get("start") + end = value.get("end") + else: + raise ValueError( + f"Unsupported type of RANGE value {value}, must be " "string or dict" + ) + + start = _range_element_to_json(start, range_element_type) + end = _range_element_to_json(end, range_element_type) + return {"start": start, "end": end} + + # Converters used for scalar values marshalled to the BigQuery API, such as in # query parameters or the tabledata.insert API. _SCALAR_VALUE_TO_JSON_ROW = { @@ -676,6 +722,8 @@ def _single_field_to_json(field, row_value): if field.field_type == "RECORD": return _record_field_to_json(field.fields, row_value) + if field.field_type == "RANGE": + return _range_field_to_json(field.range_element_type, row_value) return _scalar_field_to_json(field, row_value) diff --git a/packages/google-cloud-bigquery/tests/unit/test__helpers.py b/packages/google-cloud-bigquery/tests/unit/test__helpers.py index a50625e2ab39..1bf21479fc24 100644 --- a/packages/google-cloud-bigquery/tests/unit/test__helpers.py +++ b/packages/google-cloud-bigquery/tests/unit/test__helpers.py @@ -1049,10 +1049,22 @@ def test_w_datetime(self): self.assertEqual(self._call_fut(when), "12:13:41") -def _make_field(field_type, mode="NULLABLE", name="testing", fields=()): +def _make_field( + field_type, + mode="NULLABLE", + name="testing", + fields=(), + range_element_type=None, +): from google.cloud.bigquery.schema import SchemaField - return SchemaField(name=name, field_type=field_type, mode=mode, fields=fields) + return SchemaField( + name=name, + field_type=field_type, + mode=mode, + fields=fields, + range_element_type=range_element_type, + ) class Test_scalar_field_to_json(unittest.TestCase): @@ -1251,6 +1263,98 @@ def test_w_dict_unknown_fields(self): ) +class Test_range_field_to_json(unittest.TestCase): + def _call_fut(self, field, value): + from google.cloud.bigquery._helpers import _range_field_to_json + + return _range_field_to_json(field, value) + + def test_w_date(self): + field = _make_field("RANGE", range_element_type="DATE") + start = datetime.date(2016, 12, 3) + original = {"start": start} + converted = self._call_fut(field.range_element_type, original) + expected = {"start": "2016-12-03", "end": None} + self.assertEqual(converted, expected) + + def test_w_date_string(self): + field = _make_field("RANGE", range_element_type="DATE") + original = {"start": "2016-12-03"} + converted = self._call_fut(field.range_element_type, original) + expected = {"start": "2016-12-03", "end": None} + self.assertEqual(converted, expected) + + def test_w_datetime(self): + field = _make_field("RANGE", range_element_type="DATETIME") + start = datetime.datetime(2016, 12, 3, 14, 11, 27, 123456) + original = {"start": start} + converted = self._call_fut(field.range_element_type, original) + expected = {"start": "2016-12-03T14:11:27.123456", "end": None} + self.assertEqual(converted, expected) + + def test_w_datetime_string(self): + field = _make_field("RANGE", range_element_type="DATETIME") + original = {"start": "2016-12-03T14:11:27.123456"} + converted = self._call_fut(field.range_element_type, original) + expected = {"start": "2016-12-03T14:11:27.123456", "end": None} + self.assertEqual(converted, expected) + + def test_w_timestamp(self): + from google.cloud._helpers import UTC + + field = _make_field("RANGE", range_element_type="TIMESTAMP") + start = datetime.datetime(2016, 12, 3, 14, 11, 27, 123456, tzinfo=UTC) + original = {"start": start} + converted = self._call_fut(field.range_element_type, original) + expected = {"start": "2016-12-03T14:11:27.123456Z", "end": None} + self.assertEqual(converted, expected) + + def test_w_timestamp_string(self): + field = _make_field("RANGE", range_element_type="TIMESTAMP") + original = {"start": "2016-12-03T14:11:27.123456Z"} + converted = self._call_fut(field.range_element_type, original) + expected = {"start": "2016-12-03T14:11:27.123456Z", "end": None} + self.assertEqual(converted, expected) + + def test_w_timestamp_float(self): + field = _make_field("RANGE", range_element_type="TIMESTAMP") + original = {"start": 12.34567} + converted = self._call_fut(field.range_element_type, original) + expected = {"start": 12.34567, "end": None} + self.assertEqual(converted, expected) + + def test_w_string_literal(self): + field = _make_field("RANGE", range_element_type="DATE") + original = "[2016-12-03, UNBOUNDED)" + converted = self._call_fut(field.range_element_type, original) + expected = {"start": "2016-12-03", "end": None} + self.assertEqual(converted, expected) + + def test_w_unsupported_range_element_type(self): + field = _make_field("RANGE", range_element_type="TIME") + with self.assertRaises(ValueError): + self._call_fut( + field.range_element_type, + {"start": datetime.time(12, 13, 41)}, + ) + + def test_w_no_range_element_type(self): + field = _make_field("RANGE") + with self.assertRaises(ValueError): + self._call_fut(field.range_element_type, "2016-12-03") + + def test_w_incorrect_literal_format(self): + field = _make_field("RANGE", range_element_type="DATE") + original = "[2016-12-03, UNBOUNDED]" + with self.assertRaises(ValueError): + self._call_fut(field.range_element_type, original) + + def test_w_unsupported_representation(self): + field = _make_field("RANGE", range_element_type="DATE") + with self.assertRaises(ValueError): + self._call_fut(field.range_element_type, object()) + + class Test_field_to_json(unittest.TestCase): def _call_fut(self, field, value): from google.cloud.bigquery._helpers import _field_to_json @@ -1285,6 +1389,12 @@ def test_w_scalar(self): converted = self._call_fut(field, original) self.assertEqual(converted, str(original)) + def test_w_range(self): + field = _make_field("RANGE", range_element_type="DATE") + original = {"start": "2016-12-03", "end": "2024-12-03"} + converted = self._call_fut(field, original) + self.assertEqual(converted, original) + class Test_snake_to_camel_case(unittest.TestCase): def _call_fut(self, value): From 41419d75c7f362a9ff1816fe2f4e6fb6e1ab6996 Mon Sep 17 00:00:00 2001 From: Lingqing Gan Date: Thu, 9 May 2024 11:42:19 -0700 Subject: [PATCH 1783/2016] fix: add pyarrow version check for range support (#1914) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * fix: add pyarrow version check for range support * add comment why we are making a separate constant --------- Co-authored-by: Tim Sweña (Swast) --- .../cloud/bigquery/_versions_helpers.py | 14 ++++++ .../google/cloud/bigquery/table.py | 48 ++++++------------- 2 files changed, 29 insertions(+), 33 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/_versions_helpers.py b/packages/google-cloud-bigquery/google/cloud/bigquery/_versions_helpers.py index 50d5961b3515..72d4c921de78 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/_versions_helpers.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/_versions_helpers.py @@ -26,6 +26,9 @@ _BQ_STORAGE_OPTIONAL_READ_SESSION_VERSION = packaging.version.Version("2.6.0") _MIN_PANDAS_VERSION = packaging.version.Version("1.1.0") +_MIN_PANDAS_VERSION_RANGE = packaging.version.Version("1.5.0") +_MIN_PYARROW_VERSION_RANGE = packaging.version.Version("10.0.1") + class PyarrowVersions: """Version comparisons for pyarrow package.""" @@ -234,3 +237,14 @@ def try_import(self, raise_if_error: bool = False) -> Any: PANDAS_VERSIONS = PandasVersions() + +# Since RANGE support in pandas requires specific versions +# of both pyarrow and pandas, we make this a separate +# constant instead of as a property of PANDAS_VERSIONS +# or PYARROW_VERSIONS. +SUPPORTS_RANGE_PYARROW = ( + PANDAS_VERSIONS.try_import() is not None + and PANDAS_VERSIONS.installed_version >= _MIN_PANDAS_VERSION_RANGE + and PYARROW_VERSIONS.try_import() is not None + and PYARROW_VERSIONS.installed_version >= _MIN_PYARROW_VERSION_RANGE +) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py index 2f07bcc783a3..ad12531956b6 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py @@ -100,6 +100,12 @@ "because the necessary `__from_arrow__` attribute is missing." ) +_RANGE_PYARROW_WARNING = ( + "Unable to represent RANGE schema as struct using pandas ArrowDtype. Using " + "`object` instead. To use ArrowDtype, use pandas >= 1.5 and " + "pyarrow >= 10.0.1." +) + # How many of the total rows need to be downloaded already for us to skip # calling the BQ Storage API? ALMOST_COMPLETELY_CACHED_RATIO = 0.333 @@ -2279,26 +2285,18 @@ def to_dataframe( time_dtype = db_dtypes.TimeDtype() if range_date_dtype is DefaultPandasDTypes.RANGE_DATE_DTYPE: - try: + if _versions_helpers.SUPPORTS_RANGE_PYARROW: range_date_dtype = pandas.ArrowDtype( pyarrow.struct( [("start", pyarrow.date32()), ("end", pyarrow.date32())] ) ) - except AttributeError: - # pandas.ArrowDtype was introduced in pandas 1.5, but python 3.7 - # only supports upto pandas 1.3. If pandas.ArrowDtype is not - # present, we raise a warning and set range_date_dtype to None. - msg = ( - "Unable to find class ArrowDtype in pandas, setting " - "range_date_dtype to be None. To use ArrowDtype, please " - "use pandas >= 1.5 and python >= 3.8." - ) - warnings.warn(msg) + else: + warnings.warn(_RANGE_PYARROW_WARNING) range_date_dtype = None if range_datetime_dtype is DefaultPandasDTypes.RANGE_DATETIME_DTYPE: - try: + if _versions_helpers.SUPPORTS_RANGE_PYARROW: range_datetime_dtype = pandas.ArrowDtype( pyarrow.struct( [ @@ -2307,20 +2305,12 @@ def to_dataframe( ] ) ) - except AttributeError: - # pandas.ArrowDtype was introduced in pandas 1.5, but python 3.7 - # only supports upto pandas 1.3. If pandas.ArrowDtype is not - # present, we raise a warning and set range_datetime_dtype to None. - msg = ( - "Unable to find class ArrowDtype in pandas, setting " - "range_datetime_dtype to be None. To use ArrowDtype, " - "please use pandas >= 1.5 and python >= 3.8." - ) - warnings.warn(msg) + else: + warnings.warn(_RANGE_PYARROW_WARNING) range_datetime_dtype = None if range_timestamp_dtype is DefaultPandasDTypes.RANGE_TIMESTAMP_DTYPE: - try: + if _versions_helpers.SUPPORTS_RANGE_PYARROW: range_timestamp_dtype = pandas.ArrowDtype( pyarrow.struct( [ @@ -2329,16 +2319,8 @@ def to_dataframe( ] ) ) - except AttributeError: - # pandas.ArrowDtype was introduced in pandas 1.5, but python 3.7 - # only supports upto pandas 1.3. If pandas.ArrowDtype is not - # present, we raise a warning and set range_timestamp_dtype to None. - msg = ( - "Unable to find class ArrowDtype in pandas, setting " - "range_timestamp_dtype to be None. To use ArrowDtype, " - "please use pandas >= 1.5 and python >= 3.8." - ) - warnings.warn(msg) + else: + warnings.warn(_RANGE_PYARROW_WARNING) range_timestamp_dtype = None if bool_dtype is not None and not hasattr(bool_dtype, "__from_arrow__"): From 1698f212b55ec3d5ecdd6de890a4eaa41734c4ea Mon Sep 17 00:00:00 2001 From: Chalmer Lowe Date: Wed, 15 May 2024 07:54:36 -0400 Subject: [PATCH 1784/2016] fix: edit presubmit for to simplify configuration (#1915) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * add new presubmit for test purposes * add additional sessions * Update .kokoro/presubmit/presubmit-2.cfg * Update .kokoro/presubmit/presubmit-2.cfg * added timer to nox sessions * Update .kokoro/presubmit/presubmit-2.cfg * 🦉 Updates from OwlBot post-processor See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md * 🦉 Updates from OwlBot post-processor See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md * removes references to most environment variables * testing the use of base names for the nox sessions * removes references to unneeded linting and typing env variables * change file name and update env_vars in presubmit-2 * remove timed decorators * revert several files * Update noxfile.py * remove test, remove unneeded vars, etc --------- Co-authored-by: Owl Bot --- .../.kokoro/presubmit/presubmit.cfg | 12 ++----- packages/google-cloud-bigquery/noxfile.py | 32 ------------------- 2 files changed, 2 insertions(+), 42 deletions(-) diff --git a/packages/google-cloud-bigquery/.kokoro/presubmit/presubmit.cfg b/packages/google-cloud-bigquery/.kokoro/presubmit/presubmit.cfg index fa39b11184d7..ce39531209f7 100644 --- a/packages/google-cloud-bigquery/.kokoro/presubmit/presubmit.cfg +++ b/packages/google-cloud-bigquery/.kokoro/presubmit/presubmit.cfg @@ -2,14 +2,6 @@ # Disable system tests. env_vars: { - key: "RUN_SYSTEM_TESTS" - value: "false" -} -env_vars: { - key: "RUN_SNIPPETS_TESTS" - value: "false" -} -env_vars: { - key: "RUN_LINTING_TYPING_TESTS" - value: "false" + key: "NOX_SESSION" + value: "unit_noextras unit cover docs" } diff --git a/packages/google-cloud-bigquery/noxfile.py b/packages/google-cloud-bigquery/noxfile.py index 78a9ab5b6710..02655a7b7d6e 100644 --- a/packages/google-cloud-bigquery/noxfile.py +++ b/packages/google-cloud-bigquery/noxfile.py @@ -132,10 +132,6 @@ def unit_noextras(session): def mypy(session): """Run type checks with mypy.""" - # Check the value of `RUN_LINTING_TYPING_TESTS` env var. It defaults to true. - if os.environ.get("RUN_LINTING_TYPING_TESTS", "true") == "false": - session.skip("RUN_LINTING_TYPING_TESTS is set to false, skipping") - session.install("-e", ".[all]") session.install(MYPY_VERSION) @@ -157,10 +153,6 @@ def pytype(session): # recent version avoids the error until a possibly better fix is found. # https://github.com/googleapis/python-bigquery/issues/655 - # Check the value of `RUN_LINTING_TYPING_TESTS` env var. It defaults to true. - if os.environ.get("RUN_LINTING_TYPING_TESTS", "true") == "false": - session.skip("RUN_LINTING_TYPING_TESTS is set to false, skipping") - session.install("attrs==20.3.0") session.install("-e", ".[all]") session.install(PYTYPE_VERSION) @@ -176,10 +168,6 @@ def system(session): CURRENT_DIRECTORY / "testing" / f"constraints-{session.python}.txt" ) - # Check the value of `RUN_SYSTEM_TESTS` env var. It defaults to true. - if os.environ.get("RUN_SYSTEM_TESTS", "true") == "false": - session.skip("RUN_SYSTEM_TESTS is set to false, skipping") - # Sanity check: Only run system tests if the environment variable is set. if not os.environ.get("GOOGLE_APPLICATION_CREDENTIALS", ""): session.skip("Credentials must be set via environment variable.") @@ -224,10 +212,6 @@ def system(session): def mypy_samples(session): """Run type checks with mypy.""" - # Check the value of `RUN_LINTING_TYPING_TESTS` env var. It defaults to true. - if os.environ.get("RUN_LINTING_TYPING_TESTS", "true") == "false": - session.skip("RUN_LINTING_TYPING_TESTS is set to false, skipping") - session.install("pytest") for requirements_path in CURRENT_DIRECTORY.glob("samples/*/requirements.txt"): session.install("-r", str(requirements_path)) @@ -263,10 +247,6 @@ def mypy_samples(session): def snippets(session): """Run the snippets test suite.""" - # Check the value of `RUN_SNIPPETS_TESTS` env var. It defaults to true. - if os.environ.get("RUN_SNIPPETS_TESTS", "true") == "false": - session.skip("RUN_SNIPPETS_TESTS is set to false, skipping") - constraints_path = str( CURRENT_DIRECTORY / "testing" / f"constraints-{session.python}.txt" ) @@ -409,10 +389,6 @@ def lint(session): serious code quality issues. """ - # Check the value of `RUN_LINTING_TYPING_TESTS` env var. It defaults to true. - if os.environ.get("RUN_LINTING_TYPING_TESTS", "true") == "false": - session.skip("RUN_LINTING_TYPING_TESTS is set to false, skipping") - session.install("flake8", BLACK_VERSION) session.install("-e", ".") session.run("flake8", os.path.join("google", "cloud", "bigquery")) @@ -427,10 +403,6 @@ def lint(session): def lint_setup_py(session): """Verify that setup.py is valid (including RST check).""" - # Check the value of `RUN_LINTING_TYPING_TESTS` env var. It defaults to true. - if os.environ.get("RUN_LINTING_TYPING_TESTS", "true") == "false": - session.skip("RUN_LINTING_TYPING_TESTS is set to false, skipping") - session.install("docutils", "Pygments") session.run("python", "setup.py", "check", "--restructuredtext", "--strict") @@ -441,10 +413,6 @@ def blacken(session): Format code to uniform standard. """ - # Check the value of `RUN_LINTING_TYPING_TESTS` env var. It defaults to true. - if os.environ.get("RUN_LINTING_TYPING_TESTS", "true") == "false": - session.skip("RUN_LINTING_TYPING_TESTS is set to false, skipping") - session.install(BLACK_VERSION) session.run("black", *BLACK_PATHS) From 900235873901ba8b3e090baf7bad968f964c4a73 Mon Sep 17 00:00:00 2001 From: Chalmer Lowe Date: Thu, 16 May 2024 08:02:36 -0400 Subject: [PATCH 1785/2016] feat: adds timer decorator to facilitate debugging (#1917) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * feat: adds timer decorator to sessions * updates _calculate_duration function * 🦉 Updates from OwlBot post-processor See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md --------- Co-authored-by: Owl Bot --- packages/google-cloud-bigquery/noxfile.py | 37 +++++++++++++++++++++++ 1 file changed, 37 insertions(+) diff --git a/packages/google-cloud-bigquery/noxfile.py b/packages/google-cloud-bigquery/noxfile.py index 02655a7b7d6e..5f88e46a0215 100644 --- a/packages/google-cloud-bigquery/noxfile.py +++ b/packages/google-cloud-bigquery/noxfile.py @@ -14,11 +14,13 @@ from __future__ import absolute_import +from functools import wraps import pathlib import os import re import shutil import nox +import time MYPY_VERSION = "mypy==1.6.1" @@ -40,6 +42,27 @@ UNIT_TEST_PYTHON_VERSIONS = ["3.7", "3.8", "3.12"] CURRENT_DIRECTORY = pathlib.Path(__file__).parent.absolute() + +def _calculate_duration(func): + """This decorator prints the execution time for the decorated function.""" + + @wraps(func) + def wrapper(*args, **kwargs): + start = time.monotonic() + result = func(*args, **kwargs) + end = time.monotonic() + total_seconds = round(end - start) + hours = total_seconds // 3600 # Integer division to get hours + remaining_seconds = total_seconds % 3600 # Modulo to find remaining seconds + minutes = remaining_seconds // 60 + seconds = remaining_seconds % 60 + human_time = f"{hours:}:{minutes:0>2}:{seconds:0>2}" + print(f"Session ran in {total_seconds} seconds ({human_time})") + return result + + return wrapper + + # 'docfx' is excluded since it only needs to run in 'docs-presubmit' nox.options.sessions = [ "unit_noextras", @@ -105,6 +128,7 @@ def default(session, install_extras=True): @nox.session(python=UNIT_TEST_PYTHON_VERSIONS) +@_calculate_duration def unit(session): """Run the unit test suite.""" @@ -112,6 +136,7 @@ def unit(session): @nox.session(python=[UNIT_TEST_PYTHON_VERSIONS[0], UNIT_TEST_PYTHON_VERSIONS[-1]]) +@_calculate_duration def unit_noextras(session): """Run the unit test suite.""" @@ -129,6 +154,7 @@ def unit_noextras(session): @nox.session(python=DEFAULT_PYTHON_VERSION) +@_calculate_duration def mypy(session): """Run type checks with mypy.""" @@ -147,6 +173,7 @@ def mypy(session): @nox.session(python=DEFAULT_PYTHON_VERSION) +@_calculate_duration def pytype(session): """Run type checks with pytype.""" # An indirect dependecy attrs==21.1.0 breaks the check, and installing a less @@ -161,6 +188,7 @@ def pytype(session): @nox.session(python=SYSTEM_TEST_PYTHON_VERSIONS) +@_calculate_duration def system(session): """Run the system test suite.""" @@ -209,6 +237,7 @@ def system(session): @nox.session(python=DEFAULT_PYTHON_VERSION) +@_calculate_duration def mypy_samples(session): """Run type checks with mypy.""" @@ -244,6 +273,7 @@ def mypy_samples(session): @nox.session(python=SYSTEM_TEST_PYTHON_VERSIONS) +@_calculate_duration def snippets(session): """Run the snippets test suite.""" @@ -279,6 +309,7 @@ def snippets(session): @nox.session(python=DEFAULT_PYTHON_VERSION) +@_calculate_duration def cover(session): """Run the final coverage report. @@ -292,6 +323,7 @@ def cover(session): @nox.session(python=SYSTEM_TEST_PYTHON_VERSIONS) +@_calculate_duration def prerelease_deps(session): """Run all tests with prerelease versions of dependencies installed. @@ -382,6 +414,7 @@ def prerelease_deps(session): @nox.session(python=DEFAULT_PYTHON_VERSION) +@_calculate_duration def lint(session): """Run linters. @@ -400,6 +433,7 @@ def lint(session): @nox.session(python=DEFAULT_PYTHON_VERSION) +@_calculate_duration def lint_setup_py(session): """Verify that setup.py is valid (including RST check).""" @@ -408,6 +442,7 @@ def lint_setup_py(session): @nox.session(python=DEFAULT_PYTHON_VERSION) +@_calculate_duration def blacken(session): """Run black. Format code to uniform standard. @@ -418,6 +453,7 @@ def blacken(session): @nox.session(python="3.9") +@_calculate_duration def docs(session): """Build the docs.""" @@ -454,6 +490,7 @@ def docs(session): @nox.session(python="3.10") +@_calculate_duration def docfx(session): """Build the docfx yaml files for this library.""" From 99232289307b47e55e276de24fadcac0bc563361 Mon Sep 17 00:00:00 2001 From: Mend Renovate Date: Thu, 16 May 2024 17:29:02 +0200 Subject: [PATCH 1786/2016] chore(deps): update all dependencies (#1916) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * chore(deps): update all dependencies * 🦉 Updates from OwlBot post-processor See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md * Update samples/geography/requirements.txt --------- Co-authored-by: Owl Bot Co-authored-by: Chalmer Lowe --- .../samples/desktopapp/requirements-test.txt | 2 +- .../samples/desktopapp/requirements.txt | 2 +- .../samples/geography/requirements-test.txt | 2 +- .../samples/geography/requirements.txt | 13 +++++++------ .../samples/magics/requirements-test.txt | 2 +- .../samples/magics/requirements.txt | 4 ++-- .../samples/notebooks/requirements-test.txt | 2 +- .../samples/notebooks/requirements.txt | 4 ++-- .../samples/snippets/requirements-test.txt | 2 +- .../samples/snippets/requirements.txt | 2 +- 10 files changed, 18 insertions(+), 17 deletions(-) diff --git a/packages/google-cloud-bigquery/samples/desktopapp/requirements-test.txt b/packages/google-cloud-bigquery/samples/desktopapp/requirements-test.txt index 9142d4905434..4487e2ef3e77 100644 --- a/packages/google-cloud-bigquery/samples/desktopapp/requirements-test.txt +++ b/packages/google-cloud-bigquery/samples/desktopapp/requirements-test.txt @@ -1,4 +1,4 @@ google-cloud-testutils==1.4.0 pytest===7.4.4; python_version == '3.7' -pytest==8.1.1; python_version >= '3.8' +pytest==8.2.0; python_version >= '3.8' mock==5.1.0 diff --git a/packages/google-cloud-bigquery/samples/desktopapp/requirements.txt b/packages/google-cloud-bigquery/samples/desktopapp/requirements.txt index 3e9e59430159..716f088ac5f3 100644 --- a/packages/google-cloud-bigquery/samples/desktopapp/requirements.txt +++ b/packages/google-cloud-bigquery/samples/desktopapp/requirements.txt @@ -1,2 +1,2 @@ -google-cloud-bigquery==3.21.0 +google-cloud-bigquery==3.22.0 google-auth-oauthlib==1.2.0 diff --git a/packages/google-cloud-bigquery/samples/geography/requirements-test.txt b/packages/google-cloud-bigquery/samples/geography/requirements-test.txt index f052969d31b8..3689fda4ec63 100644 --- a/packages/google-cloud-bigquery/samples/geography/requirements-test.txt +++ b/packages/google-cloud-bigquery/samples/geography/requirements-test.txt @@ -1,3 +1,3 @@ pytest===7.4.4; python_version == '3.7' -pytest==8.1.1; python_version >= '3.8' +pytest==8.2.0; python_version >= '3.8' mock==5.1.0 diff --git a/packages/google-cloud-bigquery/samples/geography/requirements.txt b/packages/google-cloud-bigquery/samples/geography/requirements.txt index 6502ba146a74..8c268759eff0 100644 --- a/packages/google-cloud-bigquery/samples/geography/requirements.txt +++ b/packages/google-cloud-bigquery/samples/geography/requirements.txt @@ -12,16 +12,17 @@ Fiona==1.9.6 geojson==3.1.0 geopandas===0.10.2; python_version == '3.7' geopandas===0.13.2; python_version == '3.8' -geopandas==0.14.3; python_version >= '3.9' -google-api-core==2.18.0 +geopandas==0.14.4; python_version >= '3.9' +google-api-core==2.19.0 google-auth==2.29.0 -google-cloud-bigquery==3.21.0 -google-cloud-bigquery-storage==2.24.0 +google-cloud-bigquery==3.22.0 +google-cloud-bigquery-storage==2.25.0 google-cloud-core==2.4.1 google-crc32c==1.5.0 google-resumable-media==2.7.0 googleapis-common-protos==1.63.0 -grpcio==1.62.2 +grpcio==1.62.2; python_version == '3.7' +grpcio==1.63.0; python_version >= '3.8' idna==3.7 munch==4.0.0 mypy-extensions==1.0.0 @@ -31,7 +32,7 @@ pandas===2.0.3; python_version == '3.8' pandas==2.2.2; python_version >= '3.9' proto-plus==1.23.0 pyarrow==12.0.1; python_version == '3.7' -pyarrow==15.0.2; python_version >= '3.8' +pyarrow==16.0.0; python_version >= '3.8' pyasn1===0.5.1; python_version == '3.7' pyasn1==0.6.0; python_version >= '3.8' pyasn1-modules===0.3.0; python_version == '3.7' diff --git a/packages/google-cloud-bigquery/samples/magics/requirements-test.txt b/packages/google-cloud-bigquery/samples/magics/requirements-test.txt index 9142d4905434..4487e2ef3e77 100644 --- a/packages/google-cloud-bigquery/samples/magics/requirements-test.txt +++ b/packages/google-cloud-bigquery/samples/magics/requirements-test.txt @@ -1,4 +1,4 @@ google-cloud-testutils==1.4.0 pytest===7.4.4; python_version == '3.7' -pytest==8.1.1; python_version >= '3.8' +pytest==8.2.0; python_version >= '3.8' mock==5.1.0 diff --git a/packages/google-cloud-bigquery/samples/magics/requirements.txt b/packages/google-cloud-bigquery/samples/magics/requirements.txt index a431f466f9e2..67be479e1d07 100644 --- a/packages/google-cloud-bigquery/samples/magics/requirements.txt +++ b/packages/google-cloud-bigquery/samples/magics/requirements.txt @@ -1,6 +1,6 @@ db-dtypes==1.2.0 -google.cloud.bigquery==3.21.0 -google-cloud-bigquery-storage==2.24.0 +google.cloud.bigquery==3.22.0 +google-cloud-bigquery-storage==2.25.0 ipython===7.31.1; python_version == '3.7' ipython===8.0.1; python_version == '3.8' ipython===8.18.1; python_version >= '3.9' diff --git a/packages/google-cloud-bigquery/samples/notebooks/requirements-test.txt b/packages/google-cloud-bigquery/samples/notebooks/requirements-test.txt index 9142d4905434..4487e2ef3e77 100644 --- a/packages/google-cloud-bigquery/samples/notebooks/requirements-test.txt +++ b/packages/google-cloud-bigquery/samples/notebooks/requirements-test.txt @@ -1,4 +1,4 @@ google-cloud-testutils==1.4.0 pytest===7.4.4; python_version == '3.7' -pytest==8.1.1; python_version >= '3.8' +pytest==8.2.0; python_version >= '3.8' mock==5.1.0 diff --git a/packages/google-cloud-bigquery/samples/notebooks/requirements.txt b/packages/google-cloud-bigquery/samples/notebooks/requirements.txt index dcce1e3ec69b..a60175de5d15 100644 --- a/packages/google-cloud-bigquery/samples/notebooks/requirements.txt +++ b/packages/google-cloud-bigquery/samples/notebooks/requirements.txt @@ -1,6 +1,6 @@ db-dtypes==1.2.0 -google-cloud-bigquery==3.21.0 -google-cloud-bigquery-storage==2.24.0 +google-cloud-bigquery==3.22.0 +google-cloud-bigquery-storage==2.25.0 ipython===7.31.1; python_version == '3.7' ipython===8.0.1; python_version == '3.8' ipython===8.18.1; python_version >= '3.9' diff --git a/packages/google-cloud-bigquery/samples/snippets/requirements-test.txt b/packages/google-cloud-bigquery/samples/snippets/requirements-test.txt index 0343ab89a838..3c8fcc27d0ff 100644 --- a/packages/google-cloud-bigquery/samples/snippets/requirements-test.txt +++ b/packages/google-cloud-bigquery/samples/snippets/requirements-test.txt @@ -1,5 +1,5 @@ # samples/snippets should be runnable with no "extras" google-cloud-testutils==1.4.0 pytest===7.4.4; python_version == '3.7' -pytest==8.1.1; python_version >= '3.8' +pytest==8.2.0; python_version >= '3.8' mock==5.1.0 diff --git a/packages/google-cloud-bigquery/samples/snippets/requirements.txt b/packages/google-cloud-bigquery/samples/snippets/requirements.txt index fee0ce65abe8..a5e90118f861 100644 --- a/packages/google-cloud-bigquery/samples/snippets/requirements.txt +++ b/packages/google-cloud-bigquery/samples/snippets/requirements.txt @@ -1,2 +1,2 @@ # samples/snippets should be runnable with no "extras" -google-cloud-bigquery==3.21.0 +google-cloud-bigquery==3.22.0 From 205e95b121f35dccf44ad946524753db0ae80c78 Mon Sep 17 00:00:00 2001 From: "release-please[bot]" <55107282+release-please[bot]@users.noreply.github.com> Date: Thu, 16 May 2024 13:04:08 -0400 Subject: [PATCH 1787/2016] chore(main): release 3.23.0 (#1911) Co-authored-by: release-please[bot] <55107282+release-please[bot]@users.noreply.github.com> Co-authored-by: Chalmer Lowe --- packages/google-cloud-bigquery/CHANGELOG.md | 14 ++++++++++++++ .../google/cloud/bigquery/version.py | 2 +- 2 files changed, 15 insertions(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/CHANGELOG.md b/packages/google-cloud-bigquery/CHANGELOG.md index a201ef851bb7..804c0ae1c3d0 100644 --- a/packages/google-cloud-bigquery/CHANGELOG.md +++ b/packages/google-cloud-bigquery/CHANGELOG.md @@ -5,6 +5,20 @@ [1]: https://pypi.org/project/google-cloud-bigquery/#history +## [3.23.0](https://github.com/googleapis/python-bigquery/compare/v3.22.0...v3.23.0) (2024-05-16) + + +### Features + +* Adds timer decorator to facilitate debugging ([#1917](https://github.com/googleapis/python-bigquery/issues/1917)) ([ea750e0](https://github.com/googleapis/python-bigquery/commit/ea750e0248473b6207b8517aa7ea1cf4e19bccf2)) +* Support insertAll for range ([#1909](https://github.com/googleapis/python-bigquery/issues/1909)) ([74e75e8](https://github.com/googleapis/python-bigquery/commit/74e75e89ce3a5ac18112b2c1c33248445ff072e4)) + + +### Bug Fixes + +* Add pyarrow version check for range support ([#1914](https://github.com/googleapis/python-bigquery/issues/1914)) ([a86d7b9](https://github.com/googleapis/python-bigquery/commit/a86d7b96813f67fea28b46c5252416222edca9a6)) +* Edit presubmit for to simplify configuration ([#1915](https://github.com/googleapis/python-bigquery/issues/1915)) ([b739596](https://github.com/googleapis/python-bigquery/commit/b739596f37b8c00b375cc811c316b618097d761a)) + ## [3.22.0](https://github.com/googleapis/python-bigquery/compare/v3.21.0...v3.22.0) (2024-04-19) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/version.py b/packages/google-cloud-bigquery/google/cloud/bigquery/version.py index b6c082ffcd88..0938c08f62a3 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/version.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/version.py @@ -12,4 +12,4 @@ # See the License for the specific language governing permissions and # limitations under the License. -__version__ = "3.22.0" +__version__ = "3.23.0" From ce1719c3205178c030d9fc40de03b84d8f258ddf Mon Sep 17 00:00:00 2001 From: Mend Renovate Date: Tue, 21 May 2024 19:08:25 +0200 Subject: [PATCH 1788/2016] chore(deps): update all dependencies (#1921) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * chore(deps): update all dependencies * 🦉 Updates from OwlBot post-processor See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md * pin grpcio===1.62.2 for python 3.7 support of python 3.7 is dropped starting 1.63 --------- Co-authored-by: Owl Bot Co-authored-by: Lingqing Gan --- .../google-cloud-bigquery/samples/geography/requirements.txt | 4 ++-- .../google-cloud-bigquery/samples/notebooks/requirements.txt | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/packages/google-cloud-bigquery/samples/geography/requirements.txt b/packages/google-cloud-bigquery/samples/geography/requirements.txt index 8c268759eff0..ff614977bf2e 100644 --- a/packages/google-cloud-bigquery/samples/geography/requirements.txt +++ b/packages/google-cloud-bigquery/samples/geography/requirements.txt @@ -21,7 +21,7 @@ google-cloud-core==2.4.1 google-crc32c==1.5.0 google-resumable-media==2.7.0 googleapis-common-protos==1.63.0 -grpcio==1.62.2; python_version == '3.7' +grpcio===1.62.2; python_version == '3.7' grpcio==1.63.0; python_version >= '3.8' idna==3.7 munch==4.0.0 @@ -32,7 +32,7 @@ pandas===2.0.3; python_version == '3.8' pandas==2.2.2; python_version >= '3.9' proto-plus==1.23.0 pyarrow==12.0.1; python_version == '3.7' -pyarrow==16.0.0; python_version >= '3.8' +pyarrow==16.1.0; python_version >= '3.8' pyasn1===0.5.1; python_version == '3.7' pyasn1==0.6.0; python_version >= '3.8' pyasn1-modules===0.3.0; python_version == '3.7' diff --git a/packages/google-cloud-bigquery/samples/notebooks/requirements.txt b/packages/google-cloud-bigquery/samples/notebooks/requirements.txt index a60175de5d15..3407323ee080 100644 --- a/packages/google-cloud-bigquery/samples/notebooks/requirements.txt +++ b/packages/google-cloud-bigquery/samples/notebooks/requirements.txt @@ -6,7 +6,7 @@ ipython===8.0.1; python_version == '3.8' ipython===8.18.1; python_version >= '3.9' matplotlib===3.5.3; python_version == '3.7' matplotlib===3.7.4; python_version == '3.8' -matplotlib==3.8.4; python_version >= '3.9' +matplotlib==3.9.0; python_version >= '3.9' pandas===1.3.5; python_version == '3.7' pandas===2.0.3; python_version == '3.8' pandas==2.2.2; python_version >= '3.9' From 65b3393feb8941817e5dc42e1d7a539cf2daea92 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tim=20Swe=C3=B1a=20=28Swast=29?= Date: Tue, 21 May 2024 15:39:43 -0500 Subject: [PATCH 1789/2016] perf: decrease the threshold in which we use the BQ Storage Read API (#1925) * perf: decrease the threshold in which we use the BQ Storage Read API * fix unit test * update comment --- .../google/cloud/bigquery/table.py | 12 +++++++++++- .../google-cloud-bigquery/tests/unit/test_table.py | 10 +++++++++- 2 files changed, 20 insertions(+), 2 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py index ad12531956b6..6ebb0709a46e 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py @@ -108,7 +108,17 @@ # How many of the total rows need to be downloaded already for us to skip # calling the BQ Storage API? -ALMOST_COMPLETELY_CACHED_RATIO = 0.333 +# +# In microbenchmarks on 2024-05-21, I (tswast@) measure that at about 2 MB of +# remaining results, it's faster to use the BQ Storage Read API to download +# the results than use jobs.getQueryResults. Since we don't have a good way to +# know the remaining bytes, we estimate by remaining number of rows. +# +# Except when rows themselves are larger, I observe that the a single page of +# results will be around 10 MB. Therefore, the proportion of rows already +# downloaded should be 10 (first page) / 12 (all results) or less for it to be +# worth it to make a call to jobs.getQueryResults. +ALMOST_COMPLETELY_CACHED_RATIO = 0.833333 def _reference_getter(table): diff --git a/packages/google-cloud-bigquery/tests/unit/test_table.py b/packages/google-cloud-bigquery/tests/unit/test_table.py index 099529f95a35..fcbba03aae52 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_table.py +++ b/packages/google-cloud-bigquery/tests/unit/test_table.py @@ -2307,9 +2307,17 @@ def test__is_almost_completely_cached_returns_true_with_some_rows_remaining(self rows = [ {"f": [{"v": "Phred Phlyntstone"}, {"v": "32"}]}, {"f": [{"v": "Bharney Rhubble"}, {"v": "33"}]}, + {"f": [{"v": "Whillma Phlyntstone"}, {"v": "27"}]}, + {"f": [{"v": "Bhetty Rhubble"}, {"v": "28"}]}, + {"f": [{"v": "Pebbles Phlyntstone"}, {"v": "4"}]}, + {"f": [{"v": "Bamm-Bamm Rhubble"}, {"v": "5"}]}, + {"f": [{"v": "Joseph Rockhead"}, {"v": "32"}]}, + {"f": [{"v": "Perry Masonry"}, {"v": "33"}]}, ] first_page = {"pageToken": "next-page", "rows": rows} - iterator = self._make_one(first_page_response=first_page, total_rows=6) + iterator = self._make_one( + first_page_response=first_page, total_rows=len(rows) + 1 + ) self.assertTrue(iterator._is_almost_completely_cached()) def test__is_almost_completely_cached_returns_true_with_no_rows_remaining(self): From 0c4419552f54b99bff7f675596e26de4fc136a81 Mon Sep 17 00:00:00 2001 From: "release-please[bot]" <55107282+release-please[bot]@users.noreply.github.com> Date: Tue, 21 May 2024 13:52:55 -0700 Subject: [PATCH 1790/2016] chore(main): release 3.23.1 (#1927) Co-authored-by: release-please[bot] <55107282+release-please[bot]@users.noreply.github.com> --- packages/google-cloud-bigquery/CHANGELOG.md | 7 +++++++ .../google-cloud-bigquery/google/cloud/bigquery/version.py | 2 +- 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/CHANGELOG.md b/packages/google-cloud-bigquery/CHANGELOG.md index 804c0ae1c3d0..b9a2c31496d4 100644 --- a/packages/google-cloud-bigquery/CHANGELOG.md +++ b/packages/google-cloud-bigquery/CHANGELOG.md @@ -5,6 +5,13 @@ [1]: https://pypi.org/project/google-cloud-bigquery/#history +## [3.23.1](https://github.com/googleapis/python-bigquery/compare/v3.23.0...v3.23.1) (2024-05-21) + + +### Performance Improvements + +* Decrease the threshold in which we use the BQ Storage Read API ([#1925](https://github.com/googleapis/python-bigquery/issues/1925)) ([eaa1a52](https://github.com/googleapis/python-bigquery/commit/eaa1a52b360646909c14ca7194b8c6b17fefdd79)) + ## [3.23.0](https://github.com/googleapis/python-bigquery/compare/v3.22.0...v3.23.0) (2024-05-16) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/version.py b/packages/google-cloud-bigquery/google/cloud/bigquery/version.py index 0938c08f62a3..a62f73ed407e 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/version.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/version.py @@ -12,4 +12,4 @@ # See the License for the specific language governing permissions and # limitations under the License. -__version__ = "3.23.0" +__version__ = "3.23.1" From f2f3f8e25c5e2c305b34d875e979279de037db8e Mon Sep 17 00:00:00 2001 From: sclmn <128747290+sclmn@users.noreply.github.com> Date: Tue, 21 May 2024 14:44:45 -0700 Subject: [PATCH 1791/2016] feat: add support for map target type in Parquet options (#1919) * Update format_options.py to include the newly added map target type. The map target type creates a schema without the added key_value repeated field. * Added tests * add unit test * lint --------- Co-authored-by: Lingqing Gan --- .../google/cloud/bigquery/format_options.py | 15 +++++++++++++++ .../tests/unit/test_format_options.py | 8 +++++++- 2 files changed, 22 insertions(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/format_options.py b/packages/google-cloud-bigquery/google/cloud/bigquery/format_options.py index 1208565a9565..ad5591b1c9b6 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/format_options.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/format_options.py @@ -105,6 +105,21 @@ def enable_list_inference(self) -> bool: def enable_list_inference(self, value: bool) -> None: self._properties["enableListInference"] = value + @property + def map_target_type(self) -> str: + """Indicates whether to simplify the representation of parquet maps to only show keys and values.""" + + return self._properties.get("mapTargetType") + + @map_target_type.setter + def map_target_type(self, value: str) -> None: + """Sets the map target type. + + Args: + value: The map target type (eg ARRAY_OF_STRUCT). + """ + self._properties["mapTargetType"] = value + @classmethod def from_api_repr(cls, resource: Dict[str, bool]) -> "ParquetOptions": """Factory: construct an instance from a resource dict. diff --git a/packages/google-cloud-bigquery/tests/unit/test_format_options.py b/packages/google-cloud-bigquery/tests/unit/test_format_options.py index c8fecbfa6692..94a01570fed8 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_format_options.py +++ b/packages/google-cloud-bigquery/tests/unit/test_format_options.py @@ -54,11 +54,17 @@ def test_from_api_repr(self): ) assert not config.enum_as_string assert config.enable_list_inference + assert config.map_target_type is None def test_to_api_repr(self): config = self._get_target_class()() config.enum_as_string = True config.enable_list_inference = False + config.map_target_type = "ARRAY_OF_STRUCT" result = config.to_api_repr() - assert result == {"enumAsString": True, "enableListInference": False} + assert result == { + "enumAsString": True, + "enableListInference": False, + "mapTargetType": "ARRAY_OF_STRUCT", + } From a408ef1ebfa11f0849ed64743f6cd6dd6e58b7a2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tim=20Swe=C3=B1a=20=28Swast=29?= Date: Thu, 23 May 2024 14:48:36 -0500 Subject: [PATCH 1792/2016] fix: retry `is_job_done` on `ConnectionError` (#1930) --- .../google/cloud/bigquery/retry.py | 8 ++ .../tests/unit/test_job_retry.py | 117 ++++++++++++++++++ 2 files changed, 125 insertions(+) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/retry.py b/packages/google-cloud-bigquery/google/cloud/bigquery/retry.py index c9898287f04e..1110345195d1 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/retry.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/retry.py @@ -119,6 +119,14 @@ def _job_should_retry(exc): if isinstance(exc, exceptions.RetryError): exc = exc.cause + # Per https://github.com/googleapis/python-bigquery/issues/1929, sometimes + # retriable errors make their way here. Because of the separate + # `restart_query_job` logic to make sure we aren't restarting non-failed + # jobs, it should be safe to continue and not totally fail our attempt at + # waiting for the query to complete. + if _should_retry(exc): + return True + if not hasattr(exc, "errors") or len(exc.errors) == 0: return False diff --git a/packages/google-cloud-bigquery/tests/unit/test_job_retry.py b/packages/google-cloud-bigquery/tests/unit/test_job_retry.py index 43ddae1dc7c3..2dcc5878d8df 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_job_retry.py +++ b/packages/google-cloud-bigquery/tests/unit/test_job_retry.py @@ -21,6 +21,7 @@ import google.api_core.exceptions import google.api_core.retry import freezegun +import requests.exceptions from google.cloud.bigquery.client import Client from google.cloud.bigquery import _job_helpers @@ -126,6 +127,122 @@ def api_request(method, path, query_params=None, data=None, **kw): assert job.job_id == orig_job_id +def test_retry_connection_error_with_default_retries_and_successful_first_job( + monkeypatch, client +): + """ + Make sure ConnectionError can be retried at `is_job_done` level, even if + retries are exhaused by API-level retry. + + Note: Because restart_query_job is set to True only in the case of a + confirmed job failure, this should be safe to do even when a job is not + idempotent. + + Regression test for issue + https://github.com/googleapis/python-bigquery/issues/1929 + """ + job_counter = 0 + + def make_job_id(*args, **kwargs): + nonlocal job_counter + job_counter += 1 + return f"{job_counter}" + + monkeypatch.setattr(_job_helpers, "make_job_id", make_job_id) + conn = client._connection = make_connection() + project = client.project + job_reference_1 = {"projectId": project, "jobId": "1", "location": "test-loc"} + NUM_API_RETRIES = 2 + + with freezegun.freeze_time( + "2024-01-01 00:00:00", + # Note: because of exponential backoff and a bit of jitter, + # NUM_API_RETRIES will get less accurate the greater the value. + # We add 1 because we know there will be at least some additional + # calls to fetch the time / sleep before the retry deadline is hit. + auto_tick_seconds=( + google.cloud.bigquery.retry._DEFAULT_RETRY_DEADLINE / NUM_API_RETRIES + ) + + 1, + ): + conn.api_request.side_effect = [ + # jobs.insert + {"jobReference": job_reference_1, "status": {"state": "PENDING"}}, + # jobs.get + {"jobReference": job_reference_1, "status": {"state": "RUNNING"}}, + # jobs.getQueryResults x2 + requests.exceptions.ConnectionError(), + requests.exceptions.ConnectionError(), + # jobs.get + # Job actually succeeeded, so we shouldn't be restarting the job, + # even though we are retrying at the `is_job_done` level. + {"jobReference": job_reference_1, "status": {"state": "DONE"}}, + # jobs.getQueryResults + {"jobReference": job_reference_1, "jobComplete": True}, + ] + + job = client.query("select 1") + rows_iter = job.result() + + assert job.done() # Shouldn't make any additional API calls. + assert rows_iter is not None + + # Should only have created one job, even though we did call job_retry. + assert job_counter == 1 + + # Double-check that we made the API calls we expected to make. + conn.api_request.assert_has_calls( + [ + # jobs.insert + mock.call( + method="POST", + path="/projects/PROJECT/jobs", + data={ + "jobReference": {"jobId": "1", "projectId": "PROJECT"}, + "configuration": { + "query": {"useLegacySql": False, "query": "select 1"} + }, + }, + timeout=None, + ), + # jobs.get + mock.call( + method="GET", + path="/projects/PROJECT/jobs/1", + query_params={"location": "test-loc"}, + timeout=None, + ), + # jobs.getQueryResults x2 + mock.call( + method="GET", + path="/projects/PROJECT/queries/1", + query_params={"maxResults": 0, "location": "test-loc"}, + timeout=None, + ), + mock.call( + method="GET", + path="/projects/PROJECT/queries/1", + query_params={"maxResults": 0, "location": "test-loc"}, + timeout=None, + ), + # jobs.get -- is_job_done checking again + mock.call( + method="GET", + path="/projects/PROJECT/jobs/1", + query_params={"location": "test-loc"}, + timeout=None, + ), + # jobs.getQueryResults + mock.call( + method="GET", + path="/projects/PROJECT/queries/1", + query_params={"maxResults": 0, "location": "test-loc"}, + timeout=120, + ), + ], + ) + + def test_query_retry_with_default_retry_and_ambiguous_errors_only_retries_with_failed_job( client, monkeypatch ): From afce5bd60a18d35c106f1e1123537b52fb71d142 Mon Sep 17 00:00:00 2001 From: Mend Renovate Date: Tue, 28 May 2024 17:05:21 +0200 Subject: [PATCH 1793/2016] chore(deps): update all dependencies (#1926) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * chore(deps): update all dependencies * 🦉 Updates from OwlBot post-processor See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md --------- Co-authored-by: Owl Bot Co-authored-by: Lingqing Gan Co-authored-by: Leah E. Cole <6719667+leahecole@users.noreply.github.com> Co-authored-by: Chalmer Lowe --- .../samples/desktopapp/requirements-test.txt | 2 +- .../google-cloud-bigquery/samples/desktopapp/requirements.txt | 2 +- .../samples/geography/requirements-test.txt | 2 +- .../google-cloud-bigquery/samples/geography/requirements.txt | 4 ++-- .../samples/magics/requirements-test.txt | 2 +- .../google-cloud-bigquery/samples/magics/requirements.txt | 2 +- .../samples/notebooks/requirements-test.txt | 2 +- .../google-cloud-bigquery/samples/notebooks/requirements.txt | 2 +- .../samples/snippets/requirements-test.txt | 2 +- .../google-cloud-bigquery/samples/snippets/requirements.txt | 2 +- 10 files changed, 11 insertions(+), 11 deletions(-) diff --git a/packages/google-cloud-bigquery/samples/desktopapp/requirements-test.txt b/packages/google-cloud-bigquery/samples/desktopapp/requirements-test.txt index 4487e2ef3e77..a6c397822790 100644 --- a/packages/google-cloud-bigquery/samples/desktopapp/requirements-test.txt +++ b/packages/google-cloud-bigquery/samples/desktopapp/requirements-test.txt @@ -1,4 +1,4 @@ google-cloud-testutils==1.4.0 pytest===7.4.4; python_version == '3.7' -pytest==8.2.0; python_version >= '3.8' +pytest==8.2.1; python_version >= '3.8' mock==5.1.0 diff --git a/packages/google-cloud-bigquery/samples/desktopapp/requirements.txt b/packages/google-cloud-bigquery/samples/desktopapp/requirements.txt index 716f088ac5f3..fcfd8f842591 100644 --- a/packages/google-cloud-bigquery/samples/desktopapp/requirements.txt +++ b/packages/google-cloud-bigquery/samples/desktopapp/requirements.txt @@ -1,2 +1,2 @@ -google-cloud-bigquery==3.22.0 +google-cloud-bigquery==3.23.0 google-auth-oauthlib==1.2.0 diff --git a/packages/google-cloud-bigquery/samples/geography/requirements-test.txt b/packages/google-cloud-bigquery/samples/geography/requirements-test.txt index 3689fda4ec63..64d436dcfd69 100644 --- a/packages/google-cloud-bigquery/samples/geography/requirements-test.txt +++ b/packages/google-cloud-bigquery/samples/geography/requirements-test.txt @@ -1,3 +1,3 @@ pytest===7.4.4; python_version == '3.7' -pytest==8.2.0; python_version >= '3.8' +pytest==8.2.1; python_version >= '3.8' mock==5.1.0 diff --git a/packages/google-cloud-bigquery/samples/geography/requirements.txt b/packages/google-cloud-bigquery/samples/geography/requirements.txt index ff614977bf2e..a40f2fc1f1df 100644 --- a/packages/google-cloud-bigquery/samples/geography/requirements.txt +++ b/packages/google-cloud-bigquery/samples/geography/requirements.txt @@ -15,14 +15,14 @@ geopandas===0.13.2; python_version == '3.8' geopandas==0.14.4; python_version >= '3.9' google-api-core==2.19.0 google-auth==2.29.0 -google-cloud-bigquery==3.22.0 +google-cloud-bigquery==3.23.0 google-cloud-bigquery-storage==2.25.0 google-cloud-core==2.4.1 google-crc32c==1.5.0 google-resumable-media==2.7.0 googleapis-common-protos==1.63.0 grpcio===1.62.2; python_version == '3.7' -grpcio==1.63.0; python_version >= '3.8' +grpcio==1.64.0; python_version >= '3.8' idna==3.7 munch==4.0.0 mypy-extensions==1.0.0 diff --git a/packages/google-cloud-bigquery/samples/magics/requirements-test.txt b/packages/google-cloud-bigquery/samples/magics/requirements-test.txt index 4487e2ef3e77..a6c397822790 100644 --- a/packages/google-cloud-bigquery/samples/magics/requirements-test.txt +++ b/packages/google-cloud-bigquery/samples/magics/requirements-test.txt @@ -1,4 +1,4 @@ google-cloud-testutils==1.4.0 pytest===7.4.4; python_version == '3.7' -pytest==8.2.0; python_version >= '3.8' +pytest==8.2.1; python_version >= '3.8' mock==5.1.0 diff --git a/packages/google-cloud-bigquery/samples/magics/requirements.txt b/packages/google-cloud-bigquery/samples/magics/requirements.txt index 67be479e1d07..afa69b3c0eba 100644 --- a/packages/google-cloud-bigquery/samples/magics/requirements.txt +++ b/packages/google-cloud-bigquery/samples/magics/requirements.txt @@ -1,5 +1,5 @@ db-dtypes==1.2.0 -google.cloud.bigquery==3.22.0 +google.cloud.bigquery==3.23.0 google-cloud-bigquery-storage==2.25.0 ipython===7.31.1; python_version == '3.7' ipython===8.0.1; python_version == '3.8' diff --git a/packages/google-cloud-bigquery/samples/notebooks/requirements-test.txt b/packages/google-cloud-bigquery/samples/notebooks/requirements-test.txt index 4487e2ef3e77..a6c397822790 100644 --- a/packages/google-cloud-bigquery/samples/notebooks/requirements-test.txt +++ b/packages/google-cloud-bigquery/samples/notebooks/requirements-test.txt @@ -1,4 +1,4 @@ google-cloud-testutils==1.4.0 pytest===7.4.4; python_version == '3.7' -pytest==8.2.0; python_version >= '3.8' +pytest==8.2.1; python_version >= '3.8' mock==5.1.0 diff --git a/packages/google-cloud-bigquery/samples/notebooks/requirements.txt b/packages/google-cloud-bigquery/samples/notebooks/requirements.txt index 3407323ee080..67baedb0d084 100644 --- a/packages/google-cloud-bigquery/samples/notebooks/requirements.txt +++ b/packages/google-cloud-bigquery/samples/notebooks/requirements.txt @@ -1,5 +1,5 @@ db-dtypes==1.2.0 -google-cloud-bigquery==3.22.0 +google-cloud-bigquery==3.23.0 google-cloud-bigquery-storage==2.25.0 ipython===7.31.1; python_version == '3.7' ipython===8.0.1; python_version == '3.8' diff --git a/packages/google-cloud-bigquery/samples/snippets/requirements-test.txt b/packages/google-cloud-bigquery/samples/snippets/requirements-test.txt index 3c8fcc27d0ff..bd1ba50284da 100644 --- a/packages/google-cloud-bigquery/samples/snippets/requirements-test.txt +++ b/packages/google-cloud-bigquery/samples/snippets/requirements-test.txt @@ -1,5 +1,5 @@ # samples/snippets should be runnable with no "extras" google-cloud-testutils==1.4.0 pytest===7.4.4; python_version == '3.7' -pytest==8.2.0; python_version >= '3.8' +pytest==8.2.1; python_version >= '3.8' mock==5.1.0 diff --git a/packages/google-cloud-bigquery/samples/snippets/requirements.txt b/packages/google-cloud-bigquery/samples/snippets/requirements.txt index a5e90118f861..7601e0772e5d 100644 --- a/packages/google-cloud-bigquery/samples/snippets/requirements.txt +++ b/packages/google-cloud-bigquery/samples/snippets/requirements.txt @@ -1,2 +1,2 @@ # samples/snippets should be runnable with no "extras" -google-cloud-bigquery==3.22.0 +google-cloud-bigquery==3.23.0 From d06e813e687707637c711c39abfdd630b0577292 Mon Sep 17 00:00:00 2001 From: Mend Renovate Date: Wed, 29 May 2024 17:36:50 +0200 Subject: [PATCH 1794/2016] chore(deps): update all dependencies (#1934) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * chore(deps): update all dependencies * 🦉 Updates from OwlBot post-processor See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md --------- Co-authored-by: Owl Bot --- .../google-cloud-bigquery/samples/desktopapp/requirements.txt | 2 +- .../google-cloud-bigquery/samples/geography/requirements.txt | 4 ++-- .../google-cloud-bigquery/samples/magics/requirements.txt | 2 +- .../google-cloud-bigquery/samples/notebooks/requirements.txt | 2 +- .../google-cloud-bigquery/samples/snippets/requirements.txt | 2 +- 5 files changed, 6 insertions(+), 6 deletions(-) diff --git a/packages/google-cloud-bigquery/samples/desktopapp/requirements.txt b/packages/google-cloud-bigquery/samples/desktopapp/requirements.txt index fcfd8f842591..b35cc414c1e5 100644 --- a/packages/google-cloud-bigquery/samples/desktopapp/requirements.txt +++ b/packages/google-cloud-bigquery/samples/desktopapp/requirements.txt @@ -1,2 +1,2 @@ -google-cloud-bigquery==3.23.0 +google-cloud-bigquery==3.23.1 google-auth-oauthlib==1.2.0 diff --git a/packages/google-cloud-bigquery/samples/geography/requirements.txt b/packages/google-cloud-bigquery/samples/geography/requirements.txt index a40f2fc1f1df..ec58831e893d 100644 --- a/packages/google-cloud-bigquery/samples/geography/requirements.txt +++ b/packages/google-cloud-bigquery/samples/geography/requirements.txt @@ -15,7 +15,7 @@ geopandas===0.13.2; python_version == '3.8' geopandas==0.14.4; python_version >= '3.9' google-api-core==2.19.0 google-auth==2.29.0 -google-cloud-bigquery==3.23.0 +google-cloud-bigquery==3.23.1 google-cloud-bigquery-storage==2.25.0 google-cloud-core==2.4.1 google-crc32c==1.5.0 @@ -48,7 +48,7 @@ rsa==4.9 Shapely==2.0.4 six==1.16.0 typing-extensions===4.7.1; python_version == '3.7' -typing-extensions==4.11.0; python_version >= '3.8' +typing-extensions==4.12.0; python_version >= '3.8' typing-inspect==0.9.0 urllib3===1.26.18; python_version == '3.7' urllib3==2.2.1; python_version >= '3.8' diff --git a/packages/google-cloud-bigquery/samples/magics/requirements.txt b/packages/google-cloud-bigquery/samples/magics/requirements.txt index afa69b3c0eba..e3a225b79130 100644 --- a/packages/google-cloud-bigquery/samples/magics/requirements.txt +++ b/packages/google-cloud-bigquery/samples/magics/requirements.txt @@ -1,5 +1,5 @@ db-dtypes==1.2.0 -google.cloud.bigquery==3.23.0 +google.cloud.bigquery==3.23.1 google-cloud-bigquery-storage==2.25.0 ipython===7.31.1; python_version == '3.7' ipython===8.0.1; python_version == '3.8' diff --git a/packages/google-cloud-bigquery/samples/notebooks/requirements.txt b/packages/google-cloud-bigquery/samples/notebooks/requirements.txt index 67baedb0d084..f774ea183e71 100644 --- a/packages/google-cloud-bigquery/samples/notebooks/requirements.txt +++ b/packages/google-cloud-bigquery/samples/notebooks/requirements.txt @@ -1,5 +1,5 @@ db-dtypes==1.2.0 -google-cloud-bigquery==3.23.0 +google-cloud-bigquery==3.23.1 google-cloud-bigquery-storage==2.25.0 ipython===7.31.1; python_version == '3.7' ipython===8.0.1; python_version == '3.8' diff --git a/packages/google-cloud-bigquery/samples/snippets/requirements.txt b/packages/google-cloud-bigquery/samples/snippets/requirements.txt index 7601e0772e5d..89fe1638783b 100644 --- a/packages/google-cloud-bigquery/samples/snippets/requirements.txt +++ b/packages/google-cloud-bigquery/samples/snippets/requirements.txt @@ -1,2 +1,2 @@ # samples/snippets should be runnable with no "extras" -google-cloud-bigquery==3.23.0 +google-cloud-bigquery==3.23.1 From 91912644f2aa4099daff54f01f1a4fa4c49300f0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tim=20Swe=C3=B1a=20=28Swast=29?= Date: Wed, 29 May 2024 14:43:41 -0500 Subject: [PATCH 1795/2016] test: verify `Client._connection.extra_headers` functionality (#1932) --- .../tests/unit/test_client.py | 24 +++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/packages/google-cloud-bigquery/tests/unit/test_client.py b/packages/google-cloud-bigquery/tests/unit/test_client.py index e9e74b06b43a..a5434019b23b 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_client.py +++ b/packages/google-cloud-bigquery/tests/unit/test_client.py @@ -271,6 +271,30 @@ def test_ctor_w_load_job_config(self): self.assertIsInstance(client._default_load_job_config, LoadJobConfig) self.assertTrue(client._default_load_job_config.create_session) + def test__call_api_extra_headers(self): + # Note: We test at a lower layer to ensure that extra headers are + # populated when we actually make the call in requests. + # Arrange + http = mock.create_autospec(requests.Session, instance=True) + http.is_mtls = False + response = mock.create_autospec(requests.Response, instance=True) + response.status_code = 200 + http.request.return_value = response + creds = _make_credentials() + client = self._make_one(project=self.PROJECT, credentials=creds, _http=http) + + # Act + client._connection.extra_headers = {"x-goog-request-reason": "because-friday"} + client._call_api( + retry=None, method="GET", path="/bigquery/v2/projects/my-proj/jobs/my-job" + ) + + # Assert + http.request.assert_called_once() + _, kwargs = http.request.call_args + headers = kwargs["headers"] + assert headers["x-goog-request-reason"] == "because-friday" + def test__call_api_applying_custom_retry_on_timeout(self): from concurrent.futures import TimeoutError from google.cloud.bigquery.retry import DEFAULT_RETRY From 82f05ffde0d8927b8ae316b0bb8f7b93ae50c1e5 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Wed, 29 May 2024 15:07:21 -0700 Subject: [PATCH 1796/2016] chore(deps): bump requests from 2.31.0 to 2.32.2 in /samples/geography (#1933) * chore(deps): bump requests from 2.31.0 to 2.32.2 in /samples/geography Bumps [requests](https://github.com/psf/requests) from 2.31.0 to 2.32.2. - [Release notes](https://github.com/psf/requests/releases) - [Changelog](https://github.com/psf/requests/blob/main/HISTORY.md) - [Commits](https://github.com/psf/requests/compare/v2.31.0...v2.32.2) --- updated-dependencies: - dependency-name: requests dependency-type: direct:production ... Signed-off-by: dependabot[bot] * pin requests==2.31.0 for python 3.7 --------- Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> Co-authored-by: Lingqing Gan --- .../google-cloud-bigquery/samples/geography/requirements.txt | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/samples/geography/requirements.txt b/packages/google-cloud-bigquery/samples/geography/requirements.txt index ec58831e893d..7db2fa855104 100644 --- a/packages/google-cloud-bigquery/samples/geography/requirements.txt +++ b/packages/google-cloud-bigquery/samples/geography/requirements.txt @@ -43,7 +43,8 @@ pyparsing==3.1.2 python-dateutil==2.9.0.post0 pytz==2024.1 PyYAML==6.0.1 -requests==2.31.0 +requests==2.31.0; python_version == '3.7' +requests==2.32.2; python_version >= '3.8' rsa==4.9 Shapely==2.0.4 six==1.16.0 From 8f58bf0a89602c3b06dfae7296f948659549d705 Mon Sep 17 00:00:00 2001 From: Lingqing Gan Date: Thu, 30 May 2024 12:48:36 -0700 Subject: [PATCH 1797/2016] chore: add warning if storage module not found (#1937) * chore: add warning if storage module not found * Update tests/unit/test_table.py Co-authored-by: Chalmer Lowe * Update tests/unit/test_table.py Co-authored-by: Chalmer Lowe --------- Co-authored-by: Chalmer Lowe --- .../google/cloud/bigquery/table.py | 4 ++++ .../google-cloud-bigquery/tests/unit/test_table.py | 14 ++++++++++---- 2 files changed, 14 insertions(+), 4 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py index 6ebb0709a46e..57fc0d2bec3b 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py @@ -1724,6 +1724,10 @@ def _should_use_bqstorage(self, bqstorage_client, create_bqstorage_client): try: _versions_helpers.BQ_STORAGE_VERSIONS.try_import(raise_if_error=True) except bq_exceptions.BigQueryStorageNotFoundError: + warnings.warn( + "BigQuery Storage module not found, fetch data with the REST " + "endpoint instead." + ) return False except bq_exceptions.LegacyBigQueryStorageError as exc: warnings.warn(str(exc)) diff --git a/packages/google-cloud-bigquery/tests/unit/test_table.py b/packages/google-cloud-bigquery/tests/unit/test_table.py index fcbba03aae52..2a49b06323a9 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_table.py +++ b/packages/google-cloud-bigquery/tests/unit/test_table.py @@ -2360,24 +2360,30 @@ def test__should_use_bqstorage_returns_false_if_max_results_set(self): ) self.assertFalse(result) - def test__should_use_bqstorage_returns_false_if_missing_dependency(self): + def test__should_use_bqstorage_returns_false_w_warning_if_missing_dependency(self): iterator = self._make_one(first_page_response=None) # not cached - + def fail_bqstorage_import(name, globals, locals, fromlist, level): + """Returns True if bigquery_storage has been imported.""" # NOTE: *very* simplified, assuming a straightforward absolute import return "bigquery_storage" in name or ( fromlist is not None and "bigquery_storage" in fromlist ) - + # maybe_fail_import() returns ImportError if the predicate is True no_bqstorage = maybe_fail_import(predicate=fail_bqstorage_import) - with no_bqstorage: + with no_bqstorage, warnings.catch_warnings(record=True) as warned: result = iterator._should_use_bqstorage( bqstorage_client=None, create_bqstorage_client=True ) self.assertFalse(result) + matching_warnings = [ + warning for warning in warned if "Storage module not found" in str(warning) + ] + assert matching_warnings, "Dependency not found warning not raised." + def test__should_use_bqstorage_returns_false_w_warning_if_obsolete_version(self): pytest.importorskip("google.cloud.bigquery_storage") iterator = self._make_one(first_page_response=None) # not cached From 059e9a14111264830b2eaa72d5b6ea65298815e0 Mon Sep 17 00:00:00 2001 From: Mend Renovate Date: Fri, 31 May 2024 16:13:29 +0200 Subject: [PATCH 1798/2016] chore(deps): update all dependencies (#1936) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * chore(deps): update all dependencies * 🦉 Updates from OwlBot post-processor See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md * 🦉 Updates from OwlBot post-processor See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md --------- Co-authored-by: Owl Bot Co-authored-by: Chalmer Lowe --- packages/google-cloud-bigquery/tests/unit/test_table.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/tests/unit/test_table.py b/packages/google-cloud-bigquery/tests/unit/test_table.py index 2a49b06323a9..7a97c7b78da4 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_table.py +++ b/packages/google-cloud-bigquery/tests/unit/test_table.py @@ -2362,13 +2362,14 @@ def test__should_use_bqstorage_returns_false_if_max_results_set(self): def test__should_use_bqstorage_returns_false_w_warning_if_missing_dependency(self): iterator = self._make_one(first_page_response=None) # not cached - + def fail_bqstorage_import(name, globals, locals, fromlist, level): """Returns True if bigquery_storage has been imported.""" # NOTE: *very* simplified, assuming a straightforward absolute import return "bigquery_storage" in name or ( fromlist is not None and "bigquery_storage" in fromlist ) + # maybe_fail_import() returns ImportError if the predicate is True no_bqstorage = maybe_fail_import(predicate=fail_bqstorage_import) From c222b0a58bd1b32f4bd8165d51cac966977eb8ec Mon Sep 17 00:00:00 2001 From: Mend Renovate Date: Fri, 31 May 2024 18:53:28 +0200 Subject: [PATCH 1799/2016] chore(deps): update all dependencies (#1938) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * chore(deps): update all dependencies * 🦉 Updates from OwlBot post-processor See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md --------- Co-authored-by: Owl Bot --- .../google-cloud-bigquery/samples/geography/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/samples/geography/requirements.txt b/packages/google-cloud-bigquery/samples/geography/requirements.txt index 7db2fa855104..becaaf50ac44 100644 --- a/packages/google-cloud-bigquery/samples/geography/requirements.txt +++ b/packages/google-cloud-bigquery/samples/geography/requirements.txt @@ -44,7 +44,7 @@ python-dateutil==2.9.0.post0 pytz==2024.1 PyYAML==6.0.1 requests==2.31.0; python_version == '3.7' -requests==2.32.2; python_version >= '3.8' +requests==2.32.3; python_version >= '3.8' rsa==4.9 Shapely==2.0.4 six==1.16.0 From 91eda540791c5b90f58d75041c793a82c2850a52 Mon Sep 17 00:00:00 2001 From: Lingqing Gan Date: Fri, 31 May 2024 14:41:45 -0700 Subject: [PATCH 1800/2016] feat: add default timeout for Client.get_job() (#1935) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * feat: add default timeout for Client.get_job() * change timeout type detection * lint * fix unit test and coverage * add type hint * fix type hint * change import style and add comments * remove sentinel value in client * type hint * typo * add sentinel for query_and_wait() * add unit tests * fix unit test * Update google/cloud/bigquery/job/query.py Co-authored-by: Tim Sweña (Swast) * Update google/cloud/bigquery/job/query.py Co-authored-by: Tim Sweña (Swast) * address comments * typo * type hint * typos --------- Co-authored-by: Tim Sweña (Swast) --- .../google/cloud/bigquery/_helpers.py | 2 + .../google/cloud/bigquery/_job_helpers.py | 13 +- .../google/cloud/bigquery/client.py | 5 +- .../google/cloud/bigquery/job/base.py | 29 ++- .../google/cloud/bigquery/job/query.py | 34 +++- .../google/cloud/bigquery/retry.py | 11 ++ .../tests/unit/job/test_base.py | 86 +++++++-- .../tests/unit/job/test_copy.py | 34 +++- .../tests/unit/job/test_extract.py | 32 +++- .../tests/unit/job/test_load.py | 47 ++++- .../tests/unit/job/test_query.py | 91 +++++++-- .../tests/unit/test__job_helpers.py | 60 ++---- .../tests/unit/test_client.py | 6 +- .../tests/unit/test_job_retry.py | 180 +++++++++--------- 14 files changed, 421 insertions(+), 209 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py b/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py index 668b4ca3da84..5ee5e1850cf6 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py @@ -33,6 +33,8 @@ from google.auth import credentials as ga_credentials # type: ignore from google.api_core import client_options as client_options_lib +TimeoutType = Union[float, None] + _RFC3339_MICROS_NO_ZULU = "%Y-%m-%dT%H:%M:%S.%f" _TIMEONLY_WO_MICROS = "%H:%M:%S" _TIMEONLY_W_MICROS = "%H:%M:%S.%f" diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/_job_helpers.py b/packages/google-cloud-bigquery/google/cloud/bigquery/_job_helpers.py index 2904393944c1..e66ab2763b31 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/_job_helpers.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/_job_helpers.py @@ -39,7 +39,7 @@ import functools import os import uuid -from typing import Any, Dict, TYPE_CHECKING, Optional +from typing import Any, Dict, Optional, TYPE_CHECKING, Union import google.api_core.exceptions as core_exceptions from google.api_core import retry as retries @@ -47,6 +47,7 @@ from google.cloud.bigquery import job import google.cloud.bigquery.query from google.cloud.bigquery import table +from google.cloud.bigquery.retry import POLLING_DEFAULT_VALUE # Avoid circular imports if TYPE_CHECKING: # pragma: NO COVER @@ -328,7 +329,7 @@ def query_and_wait( location: Optional[str], project: str, api_timeout: Optional[float] = None, - wait_timeout: Optional[float] = None, + wait_timeout: Optional[Union[float, object]] = POLLING_DEFAULT_VALUE, retry: Optional[retries.Retry], job_retry: Optional[retries.Retry], page_size: Optional[int] = None, @@ -364,10 +365,12 @@ def query_and_wait( api_timeout (Optional[float]): The number of seconds to wait for the underlying HTTP transport before using ``retry``. - wait_timeout (Optional[float]): + wait_timeout (Optional[Union[float, object]]): The number of seconds to wait for the query to finish. If the query doesn't finish before this timeout, the client attempts - to cancel the query. + to cancel the query. If unset, the underlying Client.get_job() API + call has timeout, but we still wait indefinitely for the job to + finish. retry (Optional[google.api_core.retry.Retry]): How to retry the RPC. This only applies to making RPC calls. It isn't used to retry failed jobs. This has @@ -545,7 +548,7 @@ def _supported_by_jobs_query(request_body: Dict[str, Any]) -> bool: def _wait_or_cancel( job: job.QueryJob, api_timeout: Optional[float], - wait_timeout: Optional[float], + wait_timeout: Optional[Union[object, float]], retry: Optional[retries.Retry], page_size: Optional[int], max_results: Optional[int], diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py index 891a54e5c4c2..4234767fe307 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py @@ -82,6 +82,7 @@ from google.cloud.bigquery._helpers import _DEFAULT_UNIVERSE from google.cloud.bigquery._helpers import _validate_universe from google.cloud.bigquery._helpers import _get_client_universe +from google.cloud.bigquery._helpers import TimeoutType from google.cloud.bigquery._job_helpers import make_job_id as _make_job_id from google.cloud.bigquery.dataset import Dataset from google.cloud.bigquery.dataset import DatasetListItem @@ -107,6 +108,7 @@ DEFAULT_JOB_RETRY, DEFAULT_RETRY, DEFAULT_TIMEOUT, + DEFAULT_GET_JOB_TIMEOUT, ) from google.cloud.bigquery.routine import Routine from google.cloud.bigquery.routine import RoutineReference @@ -123,7 +125,6 @@ _versions_helpers.PANDAS_VERSIONS.try_import() ) # mypy check fails because pandas import is outside module, there are type: ignore comments related to this -TimeoutType = Union[float, None] ResumableTimeoutType = Union[ None, float, Tuple[float, float] ] # for resumable media methods @@ -2139,7 +2140,7 @@ def get_job( project: Optional[str] = None, location: Optional[str] = None, retry: retries.Retry = DEFAULT_RETRY, - timeout: TimeoutType = DEFAULT_TIMEOUT, + timeout: TimeoutType = DEFAULT_GET_JOB_TIMEOUT, ) -> Union[job.LoadJob, job.CopyJob, job.ExtractJob, job.QueryJob, job.UnknownJob]: """Fetch a job for the project associated with this client. diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/job/base.py b/packages/google-cloud-bigquery/google/cloud/bigquery/job/base.py index 2641afea8df0..6f9726181bd8 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/job/base.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/job/base.py @@ -26,8 +26,11 @@ import google.api_core.future.polling from google.cloud.bigquery import _helpers -from google.cloud.bigquery.retry import DEFAULT_RETRY from google.cloud.bigquery._helpers import _int_or_none +from google.cloud.bigquery.retry import ( + DEFAULT_GET_JOB_TIMEOUT, + DEFAULT_RETRY, +) _DONE_STATE = "DONE" @@ -801,7 +804,7 @@ def reload( self, client=None, retry: "retries.Retry" = DEFAULT_RETRY, - timeout: Optional[float] = None, + timeout: Optional[float] = DEFAULT_GET_JOB_TIMEOUT, ): """API call: refresh job properties via a GET request. @@ -820,22 +823,14 @@ def reload( """ client = self._require_client(client) - extra_params = {} - if self.location: - extra_params["location"] = self.location - span_attributes = {"path": self.path} - - api_response = client._call_api( - retry, - span_name="BigQuery.job.reload", - span_attributes=span_attributes, - job_ref=self, - method="GET", - path=self.path, - query_params=extra_params, + got_job = client.get_job( + self, + project=self.project, + location=self.location, + retry=retry, timeout=timeout, ) - self._set_properties(api_response) + self._set_properties(got_job._properties) def cancel( self, @@ -913,7 +908,7 @@ def _set_future_result(self): def done( self, retry: "retries.Retry" = DEFAULT_RETRY, - timeout: Optional[float] = None, + timeout: Optional[float] = DEFAULT_GET_JOB_TIMEOUT, reload: bool = True, ) -> bool: """Checks if the job is complete. diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/job/query.py b/packages/google-cloud-bigquery/google/cloud/bigquery/job/query.py index 09a69e11cc93..25b89c3d7d27 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/job/query.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/job/query.py @@ -40,7 +40,11 @@ StructQueryParameter, UDFResource, ) -from google.cloud.bigquery.retry import DEFAULT_RETRY, DEFAULT_JOB_RETRY +from google.cloud.bigquery.retry import ( + DEFAULT_RETRY, + DEFAULT_JOB_RETRY, + POLLING_DEFAULT_VALUE, +) from google.cloud.bigquery.routine import RoutineReference from google.cloud.bigquery.schema import SchemaField from google.cloud.bigquery.table import _EmptyRowIterator @@ -1437,7 +1441,7 @@ def result( # type: ignore # (incompatible with supertype) page_size: Optional[int] = None, max_results: Optional[int] = None, retry: Optional[retries.Retry] = DEFAULT_RETRY, - timeout: Optional[float] = None, + timeout: Optional[Union[float, object]] = POLLING_DEFAULT_VALUE, start_index: Optional[int] = None, job_retry: Optional[retries.Retry] = DEFAULT_JOB_RETRY, ) -> Union["RowIterator", _EmptyRowIterator]: @@ -1457,11 +1461,14 @@ def result( # type: ignore # (incompatible with supertype) is ``DONE``, retrying is aborted early even if the results are not available, as this will not change anymore. - timeout (Optional[float]): + timeout (Optional[Union[float, \ + google.api_core.future.polling.PollingFuture._DEFAULT_VALUE, \ + ]]): The number of seconds to wait for the underlying HTTP transport - before using ``retry``. - If multiple requests are made under the hood, ``timeout`` - applies to each individual request. + before using ``retry``. If ``None``, wait indefinitely + unless an error is returned. If unset, only the + underlying API calls have their default timeouts, but we still + wait indefinitely for the job to finish. start_index (Optional[int]): The zero-based index of the starting row to read. job_retry (Optional[google.api_core.retry.Retry]): @@ -1507,6 +1514,13 @@ def result( # type: ignore # (incompatible with supertype) # Intentionally omit job_id and query_id since this doesn't # actually correspond to a finished query job. ) + + # When timeout has default sentinel value ``object()``, do not pass + # anything to invoke default timeouts in subsequent calls. + kwargs: Dict[str, Union[_helpers.TimeoutType, object]] = {} + if type(timeout) is not object: + kwargs["timeout"] = timeout + try: retry_do_query = getattr(self, "_retry_do_query", None) if retry_do_query is not None: @@ -1548,7 +1562,7 @@ def is_job_done(): # rateLimitExceeded errors are ambiguous. We want to know if # the query job failed and not just the call to # jobs.getQueryResults. - if self.done(retry=retry, timeout=timeout): + if self.done(retry=retry, **kwargs): # If it's already failed, we might as well stop. job_failed_exception = self.exception() if job_failed_exception is not None: @@ -1585,14 +1599,14 @@ def is_job_done(): # response from the REST API. This ensures we aren't # making any extra API calls if the previous loop # iteration fetched the finished job. - self._reload_query_results(retry=retry, timeout=timeout) + self._reload_query_results(retry=retry, **kwargs) return True # Call jobs.getQueryResults with max results set to 0 just to # wait for the query to finish. Unlike most methods, # jobs.getQueryResults hangs as long as it can to ensure we # know when the query has finished as soon as possible. - self._reload_query_results(retry=retry, timeout=timeout) + self._reload_query_results(retry=retry, **kwargs) # Even if the query is finished now according to # jobs.getQueryResults, we'll want to reload the job status if @@ -1682,10 +1696,10 @@ def is_job_done(): max_results=max_results, start_index=start_index, retry=retry, - timeout=timeout, query_id=self.query_id, first_page_response=first_page_response, num_dml_affected_rows=self._query_results.num_dml_affected_rows, + **kwargs, ) rows._preserve_order = _contains_order_by(self.query) return rows diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/retry.py b/packages/google-cloud-bigquery/google/cloud/bigquery/retry.py index 1110345195d1..10958980dc7a 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/retry.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/retry.py @@ -14,6 +14,7 @@ from google.api_core import exceptions from google.api_core import retry +import google.api_core.future.polling from google.auth import exceptions as auth_exceptions # type: ignore import requests.exceptions @@ -140,3 +141,13 @@ def _job_should_retry(exc): """ The default job retry object. """ + +DEFAULT_GET_JOB_TIMEOUT = 128 +""" +Default timeout for Client.get_job(). +""" + +POLLING_DEFAULT_VALUE = google.api_core.future.polling.PollingFuture._DEFAULT_VALUE +""" +Default value defined in google.api_core.future.polling.PollingFuture. +""" diff --git a/packages/google-cloud-bigquery/tests/unit/job/test_base.py b/packages/google-cloud-bigquery/tests/unit/job/test_base.py index 18672952920a..a7337afd2316 100644 --- a/packages/google-cloud-bigquery/tests/unit/job/test_base.py +++ b/packages/google-cloud-bigquery/tests/unit/job/test_base.py @@ -22,6 +22,8 @@ from google.api_core.future import polling import pytest +from google.cloud.bigquery.retry import DEFAULT_GET_JOB_TIMEOUT + from ..helpers import make_connection from .helpers import _make_client @@ -709,7 +711,7 @@ def test_exists_w_timeout(self): ) def test_reload_defaults(self): - from google.cloud.bigquery.retry import DEFAULT_RETRY + from google.cloud.bigquery.retry import DEFAULT_RETRY, DEFAULT_GET_JOB_TIMEOUT resource = { "jobReference": { @@ -729,15 +731,19 @@ def test_reload_defaults(self): call_api.assert_called_once_with( DEFAULT_RETRY, - span_name="BigQuery.job.reload", + span_name="BigQuery.getJob", span_attributes={ - "path": "/projects/{}/jobs/{}".format(self.PROJECT, self.JOB_ID) + "path": "/projects/{}/jobs/{}".format(self.PROJECT, self.JOB_ID), + "job_id": "job-id", + "location": "us-central", }, - job_ref=job, method="GET", path="/projects/{}/jobs/{}".format(self.PROJECT, self.JOB_ID), - query_params={"location": self.LOCATION}, - timeout=None, + query_params={ + "projection": "full", + "location": "us-central", + }, + timeout=DEFAULT_GET_JOB_TIMEOUT, ) self.assertEqual(job._properties, expected) @@ -764,18 +770,43 @@ def test_reload_explicit(self): call_api.assert_called_once_with( retry, - span_name="BigQuery.job.reload", + span_name="BigQuery.getJob", span_attributes={ - "path": "/projects/{}/jobs/{}".format(self.PROJECT, self.JOB_ID) + "path": "/projects/{}/jobs/{}".format(self.PROJECT, self.JOB_ID), + "job_id": "job-id", + "location": None, }, - job_ref=job, method="GET", path="/projects/{}/jobs/{}".format(self.PROJECT, self.JOB_ID), - query_params={}, + query_params={"projection": "full"}, timeout=4.2, ) self.assertEqual(job._properties, expected) + def test_reload_none_timeout(self): + from google.cloud.bigquery.retry import DEFAULT_RETRY + + resource = { + "jobReference": { + "jobId": self.JOB_ID, + "projectId": self.PROJECT, + "location": None, + }, + "configuration": {"test": True}, + } + client = _make_client(project=self.PROJECT) + conn = client._connection = make_connection(resource) + job = self._set_properties_job() + retry = DEFAULT_RETRY.with_deadline(1) + job.reload(client=client, retry=retry, timeout=None) + + conn.api_request.assert_called_once_with( + method="GET", + path="/projects/{}/jobs/{}".format(self.PROJECT, self.JOB_ID), + query_params={"projection": "full"}, + timeout=None, + ) + def test_cancel_defaults(self): resource = { "jobReference": { @@ -952,7 +983,10 @@ def test_done_defaults_wo_state(self): self.assertFalse(job.done()) - reload_.assert_called_once_with(retry=DEFAULT_RETRY, timeout=None) + reload_.assert_called_once_with( + retry=DEFAULT_RETRY, + timeout=DEFAULT_GET_JOB_TIMEOUT, + ) def test_done_explicit_wo_state(self): from google.cloud.bigquery.retry import DEFAULT_RETRY @@ -966,6 +1000,18 @@ def test_done_explicit_wo_state(self): reload_.assert_called_once_with(retry=retry, timeout=7.5) + def test_done_with_none_timeout(self): + from google.cloud.bigquery.retry import DEFAULT_RETRY + + client = _make_client(project=self.PROJECT) + job = self._make_one(self.JOB_ID, client) + reload_ = job.reload = mock.Mock() + retry = DEFAULT_RETRY.with_deadline(1) + + self.assertFalse(job.done(retry=retry, timeout=None)) + + reload_.assert_called_once_with(retry=retry, timeout=None) + def test_done_already(self): client = _make_client(project=self.PROJECT) job = self._make_one(self.JOB_ID, client) @@ -974,6 +1020,8 @@ def test_done_already(self): self.assertTrue(job.done()) def test_result_default_wo_state(self): + from google.cloud.bigquery.retry import DEFAULT_GET_JOB_TIMEOUT + begun_job_resource = _make_job_resource( job_id=self.JOB_ID, project_id=self.PROJECT, location="US", started=True ) @@ -1003,12 +1051,17 @@ def test_result_default_wo_state(self): reload_call = mock.call( method="GET", path=f"/projects/{self.PROJECT}/jobs/{self.JOB_ID}", - query_params={"location": "US"}, - timeout=None, + query_params={ + "projection": "full", + "location": "US", + }, + timeout=DEFAULT_GET_JOB_TIMEOUT, ) conn.api_request.assert_has_calls([begin_call, begin_call, reload_call]) def test_result_w_retry_wo_state(self): + from google.cloud.bigquery.retry import DEFAULT_GET_JOB_TIMEOUT + begun_job_resource = _make_job_resource( job_id=self.JOB_ID, project_id=self.PROJECT, location="EU", started=True ) @@ -1054,8 +1107,11 @@ def test_result_w_retry_wo_state(self): reload_call = mock.call( method="GET", path=f"/projects/{self.PROJECT}/jobs/{self.JOB_ID}", - query_params={"location": "EU"}, - timeout=None, + query_params={ + "projection": "full", + "location": "EU", + }, + timeout=DEFAULT_GET_JOB_TIMEOUT, ) conn.api_request.assert_has_calls( [begin_call, begin_call, reload_call, reload_call] diff --git a/packages/google-cloud-bigquery/tests/unit/job/test_copy.py b/packages/google-cloud-bigquery/tests/unit/job/test_copy.py index e1bb20db24f5..4b0945310970 100644 --- a/packages/google-cloud-bigquery/tests/unit/job/test_copy.py +++ b/packages/google-cloud-bigquery/tests/unit/job/test_copy.py @@ -477,6 +477,8 @@ def test_exists_hit_w_alternate_client(self): ) def test_reload_w_bound_client(self): + from google.cloud.bigquery.retry import DEFAULT_GET_JOB_TIMEOUT + PATH = "/projects/%s/jobs/%s" % (self.PROJECT, self.JOB_ID) RESOURCE = self._make_resource() conn = make_connection(RESOURCE) @@ -489,14 +491,27 @@ def test_reload_w_bound_client(self): ) as final_attributes: job.reload() - final_attributes.assert_called_with({"path": PATH}, client, job) + final_attributes.assert_called_with( + { + "path": PATH, + "job_id": self.JOB_ID, + "location": None, + }, + client, + None, + ) conn.api_request.assert_called_once_with( - method="GET", path=PATH, query_params={}, timeout=None + method="GET", + path=PATH, + query_params={"projection": "full"}, + timeout=DEFAULT_GET_JOB_TIMEOUT, ) self._verifyResourceProperties(job, RESOURCE) def test_reload_w_alternate_client(self): + from google.cloud.bigquery.retry import DEFAULT_GET_JOB_TIMEOUT + PATH = "/projects/%s/jobs/%s" % (self.PROJECT, self.JOB_ID) RESOURCE = self._make_resource() conn1 = make_connection() @@ -511,10 +526,21 @@ def test_reload_w_alternate_client(self): ) as final_attributes: job.reload(client=client2) - final_attributes.assert_called_with({"path": PATH}, client2, job) + final_attributes.assert_called_with( + { + "path": PATH, + "job_id": self.JOB_ID, + "location": None, + }, + client2, + None, + ) conn1.api_request.assert_not_called() conn2.api_request.assert_called_once_with( - method="GET", path=PATH, query_params={}, timeout=None + method="GET", + path=PATH, + query_params={"projection": "full"}, + timeout=DEFAULT_GET_JOB_TIMEOUT, ) self._verifyResourceProperties(job, RESOURCE) diff --git a/packages/google-cloud-bigquery/tests/unit/job/test_extract.py b/packages/google-cloud-bigquery/tests/unit/job/test_extract.py index ee0d67d68dc3..ebf9f09e6991 100644 --- a/packages/google-cloud-bigquery/tests/unit/job/test_extract.py +++ b/packages/google-cloud-bigquery/tests/unit/job/test_extract.py @@ -399,6 +399,7 @@ def test_exists_hit_w_alternate_client(self): def test_reload_w_bound_client(self): from google.cloud.bigquery.dataset import DatasetReference + from google.cloud.bigquery.retry import DEFAULT_GET_JOB_TIMEOUT PATH = "/projects/%s/jobs/%s" % (self.PROJECT, self.JOB_ID) RESOURCE = self._make_resource() @@ -412,14 +413,26 @@ def test_reload_w_bound_client(self): ) as final_attributes: job.reload() - final_attributes.assert_called_with({"path": PATH}, client, job) + final_attributes.assert_called_with( + { + "path": PATH, + "job_id": self.JOB_ID, + "location": None, + }, + client, + None, + ) conn.api_request.assert_called_once_with( - method="GET", path=PATH, query_params={}, timeout=None + method="GET", + path=PATH, + query_params={"projection": "full"}, + timeout=DEFAULT_GET_JOB_TIMEOUT, ) self._verifyResourceProperties(job, RESOURCE) def test_reload_w_alternate_client(self): from google.cloud.bigquery.dataset import DatasetReference + from google.cloud.bigquery.retry import DEFAULT_GET_JOB_TIMEOUT PATH = "/projects/%s/jobs/%s" % (self.PROJECT, self.JOB_ID) RESOURCE = self._make_resource() @@ -435,10 +448,21 @@ def test_reload_w_alternate_client(self): ) as final_attributes: job.reload(client=client2) - final_attributes.assert_called_with({"path": PATH}, client2, job) + final_attributes.assert_called_with( + { + "path": PATH, + "job_id": self.JOB_ID, + "location": None, + }, + client2, + None, + ) conn1.api_request.assert_not_called() conn2.api_request.assert_called_once_with( - method="GET", path=PATH, query_params={}, timeout=None + method="GET", + path=PATH, + query_params={"projection": "full"}, + timeout=DEFAULT_GET_JOB_TIMEOUT, ) self._verifyResourceProperties(job, RESOURCE) diff --git a/packages/google-cloud-bigquery/tests/unit/job/test_load.py b/packages/google-cloud-bigquery/tests/unit/job/test_load.py index 976fec914b9f..0fb044696506 100644 --- a/packages/google-cloud-bigquery/tests/unit/job/test_load.py +++ b/packages/google-cloud-bigquery/tests/unit/job/test_load.py @@ -714,6 +714,8 @@ def test_exists_miss_w_job_reference(self): ) def test_reload_w_bound_client(self): + from google.cloud.bigquery.retry import DEFAULT_GET_JOB_TIMEOUT + PATH = "/projects/%s/jobs/%s" % (self.PROJECT, self.JOB_ID) RESOURCE = self._make_resource() conn = make_connection(RESOURCE) @@ -724,14 +726,27 @@ def test_reload_w_bound_client(self): ) as final_attributes: job.reload() - final_attributes.assert_called_with({"path": PATH}, client, job) + final_attributes.assert_called_with( + { + "path": PATH, + "job_id": self.JOB_ID, + "location": None, + }, + client, + None, + ) conn.api_request.assert_called_once_with( - method="GET", path=PATH, query_params={}, timeout=None + method="GET", + path=PATH, + query_params={"projection": "full"}, + timeout=DEFAULT_GET_JOB_TIMEOUT, ) self._verifyResourceProperties(job, RESOURCE) def test_reload_w_alternate_client(self): + from google.cloud.bigquery.retry import DEFAULT_GET_JOB_TIMEOUT + PATH = "/projects/%s/jobs/%s" % (self.PROJECT, self.JOB_ID) RESOURCE = self._make_resource() conn1 = make_connection() @@ -744,16 +759,28 @@ def test_reload_w_alternate_client(self): ) as final_attributes: job.reload(client=client2) - final_attributes.assert_called_with({"path": PATH}, client2, job) + final_attributes.assert_called_with( + { + "path": PATH, + "job_id": self.JOB_ID, + "location": None, + }, + client2, + None, + ) conn1.api_request.assert_not_called() conn2.api_request.assert_called_once_with( - method="GET", path=PATH, query_params={}, timeout=None + method="GET", + path=PATH, + query_params={"projection": "full"}, + timeout=DEFAULT_GET_JOB_TIMEOUT, ) self._verifyResourceProperties(job, RESOURCE) def test_reload_w_job_reference(self): from google.cloud.bigquery import job + from google.cloud.bigquery.retry import DEFAULT_GET_JOB_TIMEOUT resource = self._make_resource(ended=True) resource["jobReference"]["projectId"] = "alternative-project" @@ -768,16 +795,20 @@ def test_reload_w_job_reference(self): load_job.reload() final_attributes.assert_called_with( - {"path": "/projects/alternative-project/jobs/{}".format(self.JOB_ID)}, + { + "path": "/projects/alternative-project/jobs/{}".format(self.JOB_ID), + "job_id": self.JOB_ID, + "location": "US", + }, client, - load_job, + None, ) conn.api_request.assert_called_once_with( method="GET", path="/projects/alternative-project/jobs/{}".format(self.JOB_ID), - query_params={"location": "US"}, - timeout=None, + query_params={"projection": "full", "location": "US"}, + timeout=DEFAULT_GET_JOB_TIMEOUT, ) def test_cancel_w_bound_client(self): diff --git a/packages/google-cloud-bigquery/tests/unit/job/test_query.py b/packages/google-cloud-bigquery/tests/unit/job/test_query.py index 0fee053e346a..c7b2c5f9c05b 100644 --- a/packages/google-cloud-bigquery/tests/unit/job/test_query.py +++ b/packages/google-cloud-bigquery/tests/unit/job/test_query.py @@ -28,6 +28,7 @@ from google.cloud.bigquery.client import _LIST_ROWS_FROM_QUERY_RESULTS_FIELDS import google.cloud.bigquery._job_helpers import google.cloud.bigquery.query +from google.cloud.bigquery.retry import DEFAULT_GET_JOB_TIMEOUT from google.cloud.bigquery.table import _EmptyRowIterator from ..helpers import make_connection @@ -959,8 +960,8 @@ def test_result_reloads_job_state_until_done(self): reload_call = mock.call( method="GET", path=f"/projects/{self.PROJECT}/jobs/{self.JOB_ID}", - query_params={"location": "EU"}, - timeout=None, + query_params={"projection": "full", "location": "EU"}, + timeout=DEFAULT_GET_JOB_TIMEOUT, ) query_page_call = mock.call( method="GET", @@ -1104,7 +1105,37 @@ def test_result_with_done_jobs_query_response_doesnt_call_get_query_results(self conn.api_request.assert_called_once_with( method="GET", path=job_path, - query_params={}, + query_params={"projection": "full"}, + timeout=DEFAULT_GET_JOB_TIMEOUT, + ) + + def test_result_with_none_timeout(self): + # Verifies that with an intentional None timeout, get job uses None + # instead of the default timeout. + job_resource = self._make_resource(started=True, ended=True, location="EU") + conn = make_connection(job_resource) + client = _make_client(self.PROJECT, connection=conn) + query_resource_done = { + "jobComplete": True, + "jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID}, + "schema": {"fields": [{"name": "col1", "type": "STRING"}]}, + "rows": [{"f": [{"v": "abc"}]}], + "totalRows": "1", + } + job = google.cloud.bigquery._job_helpers._to_query_job( + client, + "SELECT 'abc' AS col1", + request_config=None, + query_response=query_resource_done, + ) + + job.result(timeout=None) + + job_path = f"/projects/{self.PROJECT}/jobs/{self.JOB_ID}" + conn.api_request.assert_called_once_with( + method="GET", + path=job_path, + query_params={"projection": "full"}, timeout=None, ) @@ -1287,8 +1318,8 @@ def test_result_w_custom_retry(self): reload_call = mock.call( method="GET", path=f"/projects/{self.PROJECT}/jobs/{self.JOB_ID}", - query_params={"location": "asia-northeast1"}, - timeout=None, + query_params={"projection": "full", "location": "asia-northeast1"}, + timeout=DEFAULT_GET_JOB_TIMEOUT, ) connection.api_request.assert_has_calls( @@ -1367,7 +1398,7 @@ def test_result_w_timeout_doesnt_raise(self): reload_call = mock.call( method="GET", path=f"/projects/{self.PROJECT}/jobs/{self.JOB_ID}", - query_params={"location": "US"}, + query_params={"projection": "full", "location": "US"}, timeout=1.125, ) get_query_results_call = mock.call( @@ -1412,7 +1443,7 @@ def test_result_w_timeout_raises_concurrent_futures_timeout(self): reload_call = mock.call( method="GET", path=f"/projects/{self.PROJECT}/jobs/{self.JOB_ID}", - query_params={"location": "US"}, + query_params={"projection": "full", "location": "US"}, timeout=1.125, ) get_query_results_call = mock.call( @@ -2160,12 +2191,23 @@ def test_reload_w_bound_client(self): ) as final_attributes: job.reload() - final_attributes.assert_called_with({"path": PATH}, client, job) + final_attributes.assert_called_with( + { + "path": PATH, + "job_id": self.JOB_ID, + "location": None, + }, + client, + None, + ) self.assertNotEqual(job.destination, table_ref) conn.api_request.assert_called_once_with( - method="GET", path=PATH, query_params={}, timeout=None + method="GET", + path=PATH, + query_params={"projection": "full"}, + timeout=DEFAULT_GET_JOB_TIMEOUT, ) self._verifyResourceProperties(job, RESOURCE) @@ -2190,11 +2232,22 @@ def test_reload_w_alternate_client(self): ) as final_attributes: job.reload(client=client2) - final_attributes.assert_called_with({"path": PATH}, client2, job) + final_attributes.assert_called_with( + { + "path": PATH, + "job_id": self.JOB_ID, + "location": None, + }, + client2, + None, + ) conn1.api_request.assert_not_called() conn2.api_request.assert_called_once_with( - method="GET", path=PATH, query_params={}, timeout=None + method="GET", + path=PATH, + query_params={"projection": "full"}, + timeout=DEFAULT_GET_JOB_TIMEOUT, ) self._verifyResourceProperties(job, RESOURCE) @@ -2217,13 +2270,23 @@ def test_reload_w_timeout(self): "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" ) as final_attributes: job.reload(timeout=4.2) - - final_attributes.assert_called_with({"path": PATH}, client, job) + final_attributes.assert_called_with( + { + "path": PATH, + "job_id": self.JOB_ID, + "location": None, + }, + client, + None, + ) self.assertNotEqual(job.destination, table_ref) conn.api_request.assert_called_once_with( - method="GET", path=PATH, query_params={}, timeout=4.2 + method="GET", + path=PATH, + query_params={"projection": "full"}, + timeout=4.2, ) def test_iter(self): diff --git a/packages/google-cloud-bigquery/tests/unit/test__job_helpers.py b/packages/google-cloud-bigquery/tests/unit/test__job_helpers.py index 9f661dca7492..96914d9f9680 100644 --- a/packages/google-cloud-bigquery/tests/unit/test__job_helpers.py +++ b/packages/google-cloud-bigquery/tests/unit/test__job_helpers.py @@ -12,7 +12,6 @@ # See the License for the specific language governing permissions and # limitations under the License. -import functools from typing import Any, Dict, Optional from unittest import mock @@ -21,15 +20,18 @@ from google.api_core import retry as retries import pytest -from google.cloud.bigquery.client import Client -from google.cloud.bigquery import enums from google.cloud.bigquery import _job_helpers +from google.cloud.bigquery import enums +from google.cloud.bigquery import retry +from google.cloud.bigquery.client import Client from google.cloud.bigquery.job import copy_ as job_copy from google.cloud.bigquery.job import extract as job_extract from google.cloud.bigquery.job import load as job_load from google.cloud.bigquery.job import query as job_query from google.cloud.bigquery.query import ConnectionProperty, ScalarQueryParameter +from .helpers import make_client, make_connection + def make_query_request(additional_properties: Optional[Dict[str, Any]] = None): request = {"useLegacySql": False, "formatOptions": {"useInt64Timestamp": True}} @@ -806,11 +808,8 @@ def test_query_and_wait_caches_completed_query_results_one_page_no_rows(): def test_query_and_wait_caches_completed_query_results_more_pages(): - client = mock.create_autospec(Client) - client._list_rows_from_query_results = functools.partial( - Client._list_rows_from_query_results, client - ) - client._call_api.side_effect = ( + client = make_client() + conn = client._connection = make_connection( { "jobReference": { "projectId": "response-project", @@ -882,10 +881,7 @@ def test_query_and_wait_caches_completed_query_results_more_pages(): # Start the query. jobs_query_path = "/projects/request-project/queries" - client._call_api.assert_any_call( - None, # retry - span_name="BigQuery.query", - span_attributes={"path": jobs_query_path}, + conn.api_request.assert_any_call( method="POST", path=jobs_query_path, data={ @@ -906,8 +902,7 @@ def test_query_and_wait_caches_completed_query_results_more_pages(): # Fetch the remaining two pages. jobs_get_query_results_path = "/projects/response-project/queries/response-job-id" - client._call_api.assert_any_call( - None, # retry + conn.api_request.assert_any_call( timeout=None, method="GET", path=jobs_get_query_results_path, @@ -918,8 +913,7 @@ def test_query_and_wait_caches_completed_query_results_more_pages(): "formatOptions.useInt64Timestamp": True, }, ) - client._call_api.assert_any_call( - None, # retry + conn.api_request.assert_any_call( timeout=None, method="GET", path=jobs_get_query_results_path, @@ -933,12 +927,8 @@ def test_query_and_wait_caches_completed_query_results_more_pages(): def test_query_and_wait_incomplete_query(): - client = mock.create_autospec(Client) - client._get_query_results = functools.partial(Client._get_query_results, client) - client._list_rows_from_query_results = functools.partial( - Client._list_rows_from_query_results, client - ) - client._call_api.side_effect = ( + client = make_client() + conn = client._connection = make_connection( # jobs.query { "jobReference": { @@ -1022,10 +1012,7 @@ def test_query_and_wait_incomplete_query(): # Start the query. jobs_query_path = "/projects/request-project/queries" - client._call_api.assert_any_call( - None, # retry - span_name="BigQuery.query", - span_attributes={"path": jobs_query_path}, + conn.api_request.assert_any_call( method="POST", path=jobs_query_path, data={ @@ -1041,10 +1028,7 @@ def test_query_and_wait_incomplete_query(): # Wait for the query to finish. jobs_get_query_results_path = "/projects/response-project/queries/response-job-id" - client._call_api.assert_any_call( - None, # retry - span_name="BigQuery.getQueryResults", - span_attributes={"path": jobs_get_query_results_path}, + conn.api_request.assert_any_call( method="GET", path=jobs_get_query_results_path, query_params={ @@ -1063,20 +1047,15 @@ def test_query_and_wait_incomplete_query(): # Fetch the job metadata in case the RowIterator needs the destination table. jobs_get_path = "/projects/response-project/jobs/response-job-id" - client._call_api.assert_any_call( - None, # retry - span_name="BigQuery.job.reload", - span_attributes={"path": jobs_get_path}, - job_ref=mock.ANY, + conn.api_request.assert_any_call( method="GET", path=jobs_get_path, - query_params={"location": "response-location"}, - timeout=None, + query_params={"projection": "full", "location": "response-location"}, + timeout=retry.DEFAULT_GET_JOB_TIMEOUT, ) # Fetch the remaining two pages. - client._call_api.assert_any_call( - None, # retry + conn.api_request.assert_any_call( timeout=None, method="GET", path=jobs_get_query_results_path, @@ -1086,8 +1065,7 @@ def test_query_and_wait_incomplete_query(): "formatOptions.useInt64Timestamp": True, }, ) - client._call_api.assert_any_call( - None, # retry + conn.api_request.assert_any_call( timeout=None, method="GET", path=jobs_get_query_results_path, diff --git a/packages/google-cloud-bigquery/tests/unit/test_client.py b/packages/google-cloud-bigquery/tests/unit/test_client.py index a5434019b23b..ed5575f6c7fa 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_client.py +++ b/packages/google-cloud-bigquery/tests/unit/test_client.py @@ -3167,6 +3167,7 @@ def test_job_from_resource_unknown_type(self): def test_get_job_miss_w_explict_project(self): from google.cloud.exceptions import NotFound + from google.cloud.bigquery.retry import DEFAULT_GET_JOB_TIMEOUT OTHER_PROJECT = "OTHER_PROJECT" JOB_ID = "NONESUCH" @@ -3181,11 +3182,12 @@ def test_get_job_miss_w_explict_project(self): method="GET", path="/projects/OTHER_PROJECT/jobs/NONESUCH", query_params={"projection": "full"}, - timeout=DEFAULT_TIMEOUT, + timeout=DEFAULT_GET_JOB_TIMEOUT, ) def test_get_job_miss_w_client_location(self): from google.cloud.exceptions import NotFound + from google.cloud.bigquery.retry import DEFAULT_GET_JOB_TIMEOUT JOB_ID = "NONESUCH" creds = _make_credentials() @@ -3199,7 +3201,7 @@ def test_get_job_miss_w_client_location(self): method="GET", path="/projects/client-proj/jobs/NONESUCH", query_params={"projection": "full", "location": "client-loc"}, - timeout=DEFAULT_TIMEOUT, + timeout=DEFAULT_GET_JOB_TIMEOUT, ) def test_get_job_hit_w_timeout(self): diff --git a/packages/google-cloud-bigquery/tests/unit/test_job_retry.py b/packages/google-cloud-bigquery/tests/unit/test_job_retry.py index 2dcc5878d8df..46eb1d6b3fb5 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_job_retry.py +++ b/packages/google-cloud-bigquery/tests/unit/test_job_retry.py @@ -23,85 +23,93 @@ import freezegun import requests.exceptions -from google.cloud.bigquery.client import Client from google.cloud.bigquery import _job_helpers import google.cloud.bigquery.retry -from .helpers import make_connection +from .helpers import make_client, make_connection -# With job_retry_on_query, we're testing 4 scenarios: +_RETRY_NOT_FOUND = { + "job_retry": google.api_core.retry.Retry( + predicate=google.api_core.retry.if_exception_type( + google.api_core.exceptions.NotFound, + ), + ), +} +_RETRY_BAD_REQUEST = { + "job_retry": google.api_core.retry.Retry( + predicate=google.api_core.retry.if_exception_type( + google.api_core.exceptions.BadRequest, + ), + ), +} + + +# Test retry of job failures, instead of API-invocation failures. 4 scenarios: # - No `job_retry` passed, retry on default rateLimitExceeded. # - Pass NotFound retry to `query`. # - Pass NotFound retry to `result`. # - Pass BadRequest retry to query, with the value passed to `result` overriding. -@pytest.mark.parametrize("job_retry_on_query", [None, "Query", "Result", "Both"]) @mock.patch("time.sleep") -def test_retry_failed_jobs(sleep, client, job_retry_on_query): - """ - Test retry of job failures, as opposed to API-invocation failures. - """ - - retry_notfound = google.api_core.retry.Retry( - predicate=google.api_core.retry.if_exception_type( - google.api_core.exceptions.NotFound - ) - ) - retry_badrequest = google.api_core.retry.Retry( - predicate=google.api_core.retry.if_exception_type( - google.api_core.exceptions.BadRequest - ) - ) - - if job_retry_on_query is None: - reason = "rateLimitExceeded" - else: - reason = "notFound" - +@pytest.mark.parametrize( + "reason, job_retry, result_retry", + [ + pytest.param( + "rateLimitExceeded", + {}, + {}, + id="no job_retry", + ), + pytest.param( + "notFound", + _RETRY_NOT_FOUND, + {}, + id="Query NotFound", + ), + pytest.param( + "notFound", + _RETRY_NOT_FOUND, + _RETRY_NOT_FOUND, + id="Result NotFound", + ), + pytest.param( + "notFound", + _RETRY_BAD_REQUEST, + _RETRY_NOT_FOUND, + id="BadRequest", + ), + ], +) +def test_retry_failed_jobs(sleep, reason, job_retry, result_retry): + client = make_client() err = dict(reason=reason) - responses = [ - dict(status=dict(state="DONE", errors=[err], errorResult=err)), - dict(status=dict(state="DONE", errors=[err], errorResult=err)), - dict(status=dict(state="DONE", errors=[err], errorResult=err)), - dict(status=dict(state="DONE")), + conn = client._connection = make_connection( + dict( + status=dict(state="DONE", errors=[err], errorResult=err), + jobReference={"jobId": "id_1"}, + ), + dict( + status=dict(state="DONE", errors=[err], errorResult=err), + jobReference={"jobId": "id_1"}, + ), + dict( + status=dict(state="DONE", errors=[err], errorResult=err), + jobReference={"jobId": "id_1"}, + ), + dict(status=dict(state="DONE"), jobReference={"jobId": "id_2"}), dict(rows=[{"f": [{"v": "1"}]}], totalRows="1"), - ] - - def api_request(method, path, query_params=None, data=None, **kw): - response = responses.pop(0) - if data: - response["jobReference"] = data["jobReference"] - else: - response["jobReference"] = dict( - jobId=path.split("/")[-1], projectId="PROJECT" - ) - return response - - conn = client._connection = make_connection() - conn.api_request.side_effect = api_request + ) - if job_retry_on_query == "Query": - job_retry = dict(job_retry=retry_notfound) - elif job_retry_on_query == "Both": - # This will be overridden in `result` - job_retry = dict(job_retry=retry_badrequest) - else: - job_retry = {} job = client.query("select 1", **job_retry) + result = job.result(**result_retry) - orig_job_id = job.job_id - job_retry = ( - dict(job_retry=retry_notfound) - if job_retry_on_query in ("Result", "Both") - else {} - ) - result = job.result(**job_retry) assert result.total_rows == 1 - assert not responses # We made all the calls we expected to. + + # We made all the calls we expected to. + assert conn.api_request.call_count == 5 # The job adjusts it's job id based on the id of the last attempt. - assert job.job_id != orig_job_id - assert job.job_id == conn.mock_calls[3][2]["data"]["jobReference"]["jobId"] + assert job.job_id == "id_2" # We had to sleep three times assert len(sleep.mock_calls) == 3 @@ -114,17 +122,19 @@ def api_request(method, path, query_params=None, data=None, **kw): assert max(c[1][0] for c in sleep.mock_calls) <= 8 # We can ask for the result again: - responses = [ + conn = client._connection = make_connection( dict(rows=[{"f": [{"v": "1"}]}], totalRows="1"), - ] - orig_job_id = job.job_id + ) result = job.result() + assert result.total_rows == 1 - assert not responses # We made all the calls we expected to. + + # We made all the calls we expected to. + assert conn.api_request.call_count == 1 # We wouldn't (and didn't) fail, because we're dealing with a successful job. # So the job id hasn't changed. - assert job.job_id == orig_job_id + assert job.job_id == "id_2" def test_retry_connection_error_with_default_retries_and_successful_first_job( @@ -209,8 +219,8 @@ def make_job_id(*args, **kwargs): mock.call( method="GET", path="/projects/PROJECT/jobs/1", - query_params={"location": "test-loc"}, - timeout=None, + query_params={"location": "test-loc", "projection": "full"}, + timeout=google.cloud.bigquery.retry.DEFAULT_GET_JOB_TIMEOUT, ), # jobs.getQueryResults x2 mock.call( @@ -229,8 +239,8 @@ def make_job_id(*args, **kwargs): mock.call( method="GET", path="/projects/PROJECT/jobs/1", - query_params={"location": "test-loc"}, - timeout=None, + query_params={"location": "test-loc", "projection": "full"}, + timeout=google.cloud.bigquery.retry.DEFAULT_GET_JOB_TIMEOUT, ), # jobs.getQueryResults mock.call( @@ -307,8 +317,7 @@ def make_job_id(*args, **kwargs): {"jobReference": job_reference_2, "status": {"state": "DONE"}}, ] - conn = client._connection = make_connection() - conn.api_request.side_effect = responses + conn = client._connection = make_connection(*responses) with freezegun.freeze_time( # Note: because of exponential backoff and a bit of jitter, @@ -341,8 +350,8 @@ def make_job_id(*args, **kwargs): mock.call( method="GET", path="/projects/PROJECT/jobs/1", - query_params={"location": "test-loc"}, - timeout=None, + query_params={"location": "test-loc", "projection": "full"}, + timeout=google.cloud.bigquery.retry.DEFAULT_GET_JOB_TIMEOUT, ), # jobs.getQueryResults x2 mock.call( @@ -361,8 +370,8 @@ def make_job_id(*args, **kwargs): mock.call( method="GET", path="/projects/PROJECT/jobs/1", - query_params={"location": "test-loc"}, - timeout=None, + query_params={"location": "test-loc", "projection": "full"}, + timeout=google.cloud.bigquery.retry.DEFAULT_GET_JOB_TIMEOUT, ), # jobs.insert mock.call( @@ -384,8 +393,8 @@ def make_job_id(*args, **kwargs): mock.call( method="GET", path="/projects/PROJECT/jobs/2", - query_params={"location": "test-loc"}, - timeout=None, + query_params={"location": "test-loc", "projection": "full"}, + timeout=google.cloud.bigquery.retry.DEFAULT_GET_JOB_TIMEOUT, ), # jobs.getQueryResults mock.call( @@ -398,8 +407,8 @@ def make_job_id(*args, **kwargs): mock.call( method="GET", path="/projects/PROJECT/jobs/2", - query_params={"location": "test-loc"}, - timeout=None, + query_params={"location": "test-loc", "projection": "full"}, + timeout=google.cloud.bigquery.retry.DEFAULT_GET_JOB_TIMEOUT, ), ] ) @@ -531,12 +540,9 @@ def test_query_and_wait_retries_job_for_DDL_queries(): https://github.com/googleapis/python-bigquery/issues/1790 """ freezegun.freeze_time(auto_tick_seconds=1) - client = mock.create_autospec(Client) - client._call_api.__name__ = "_call_api" - client._call_api.__qualname__ = "Client._call_api" - client._call_api.__annotations__ = {} - client._call_api.__type_params__ = () - client._call_api.side_effect = ( + + client = make_client() + conn = client._connection = make_connection( { "jobReference": { "projectId": "response-project", @@ -589,7 +595,7 @@ def test_query_and_wait_retries_job_for_DDL_queries(): # and https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs/getQueryResults query_request_path = "/projects/request-project/queries" - calls = client._call_api.call_args_list + calls = conn.api_request.call_args_list _, kwargs = calls[0] assert kwargs["method"] == "POST" assert kwargs["path"] == query_request_path From 5343a442a53ab3b70d01a6577e000b3b5a1495c0 Mon Sep 17 00:00:00 2001 From: shollyman Date: Mon, 3 Jun 2024 10:23:37 -0700 Subject: [PATCH 1801/2016] testing: update BQML training option (#1943) This updates tests to use `max_iterations` rather than `max_iteration` which was an alpha option. Related: b/344469351 --- packages/google-cloud-bigquery/samples/tests/conftest.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/samples/tests/conftest.py b/packages/google-cloud-bigquery/samples/tests/conftest.py index 91603bef2eb3..cdf52b38835b 100644 --- a/packages/google-cloud-bigquery/samples/tests/conftest.py +++ b/packages/google-cloud-bigquery/samples/tests/conftest.py @@ -162,7 +162,7 @@ def model_id(client: bigquery.Client, dataset_id: str) -> str: CREATE MODEL `{}` OPTIONS ( model_type='linear_reg', - max_iteration=1, + max_iterations=1, learn_rate=0.4, learn_rate_strategy='constant' ) AS ( From 7ec7a8630a8be59c1bb5c2ac2c12264b14399665 Mon Sep 17 00:00:00 2001 From: Chalmer Lowe Date: Mon, 3 Jun 2024 16:22:16 -0400 Subject: [PATCH 1802/2016] chore: updates continuous CI/CD tests against specific versions of Python (#1941) Updates the regular continuous CI/CD checks to test against specific versions of Python (versions that aren't our most recent supported version and aren't our oldest supported version). Also removes a CI/CD check that is superceded by a more recent version of check (prerelease-deps >>> replaced by prerelease-deps-3.12). Modifies owlbot to avoid it adding prerelease-deps back into the mix since that file is a default in synthtool. --- .../.kokoro/continuous/prerelease-deps.cfg | 7 ------- .../.kokoro/continuous/unit-tests-misc.cfg | 9 +++++++++ packages/google-cloud-bigquery/owlbot.py | 1 + 3 files changed, 10 insertions(+), 7 deletions(-) delete mode 100644 packages/google-cloud-bigquery/.kokoro/continuous/prerelease-deps.cfg create mode 100644 packages/google-cloud-bigquery/.kokoro/continuous/unit-tests-misc.cfg diff --git a/packages/google-cloud-bigquery/.kokoro/continuous/prerelease-deps.cfg b/packages/google-cloud-bigquery/.kokoro/continuous/prerelease-deps.cfg deleted file mode 100644 index 3595fb43f5c0..000000000000 --- a/packages/google-cloud-bigquery/.kokoro/continuous/prerelease-deps.cfg +++ /dev/null @@ -1,7 +0,0 @@ -# Format: //devtools/kokoro/config/proto/build.proto - -# Only run this nox session. -env_vars: { - key: "NOX_SESSION" - value: "prerelease_deps" -} diff --git a/packages/google-cloud-bigquery/.kokoro/continuous/unit-tests-misc.cfg b/packages/google-cloud-bigquery/.kokoro/continuous/unit-tests-misc.cfg new file mode 100644 index 000000000000..6598baee77e1 --- /dev/null +++ b/packages/google-cloud-bigquery/.kokoro/continuous/unit-tests-misc.cfg @@ -0,0 +1,9 @@ +# Format: //devtools/kokoro/config/proto/build.proto + +# Only run these nox sessions. +# A subset based on Python versions that are neither our newest OR oldest +# supported versions of Python +env_vars: { + key: "NOX_SESSION" + value: "unit_noextras-3.9 unit_noextras-3.10 unit_noextras-3.11 unit-3.9 unit-3.10 unit-3.11" +} \ No newline at end of file diff --git a/packages/google-cloud-bigquery/owlbot.py b/packages/google-cloud-bigquery/owlbot.py index c2de31042267..778cc3e53d16 100644 --- a/packages/google-cloud-bigquery/owlbot.py +++ b/packages/google-cloud-bigquery/owlbot.py @@ -70,6 +70,7 @@ # Include custom SNIPPETS_TESTS job for performance. # https://github.com/googleapis/python-bigquery/issues/191 ".kokoro/presubmit/presubmit.cfg", + ".kokoro/continuous/prerelease-deps.cfg", ".github/workflows", # exclude gh actions as credentials are needed for tests "README.rst", ], From 653b8f42af035ed48c6f41b04cb14a8318d637e3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tim=20Swe=C3=B1a=20=28Swast=29?= Date: Mon, 3 Jun 2024 18:08:31 -0500 Subject: [PATCH 1803/2016] perf: if `page_size` or `max_results` is set on `QueryJob.result()`, use to download first page of results (#1942) * perf: if `page_size` or `max_results` is set on `QueryJob.result()`, use to download first page of results * add unit tests for query_and_wait * populate maxResults on page 2 * fix maxResults * fix coverage --------- Co-authored-by: Lingqing Gan --- .../google/cloud/bigquery/client.py | 23 +- .../google/cloud/bigquery/job/query.py | 51 +++- .../google/cloud/bigquery/table.py | 2 +- .../tests/unit/job/test_query.py | 225 ++++++++++-------- .../tests/unit/test_client.py | 122 ++++++++++ .../tests/unit/test_job_retry.py | 2 +- 6 files changed, 300 insertions(+), 125 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py index 4234767fe307..1c222f2ddb7c 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py @@ -109,6 +109,7 @@ DEFAULT_RETRY, DEFAULT_TIMEOUT, DEFAULT_GET_JOB_TIMEOUT, + POLLING_DEFAULT_VALUE, ) from google.cloud.bigquery.routine import Routine from google.cloud.bigquery.routine import RoutineReference @@ -1963,6 +1964,7 @@ def _get_query_results( timeout_ms: Optional[int] = None, location: Optional[str] = None, timeout: TimeoutType = DEFAULT_TIMEOUT, + page_size: int = 0, ) -> _QueryResults: """Get the query results object for a query job. @@ -1981,13 +1983,16 @@ def _get_query_results( before using ``retry``. If set, this connection timeout may be increased to a minimum value. This prevents retries on what would otherwise be a successful response. + page_size (int): + Maximum number of rows in a single response. See maxResults in + the jobs.getQueryResults REST API. Returns: google.cloud.bigquery.query._QueryResults: A new ``_QueryResults`` instance. """ - extra_params: Dict[str, Any] = {"maxResults": 0} + extra_params: Dict[str, Any] = {"maxResults": page_size} if timeout is not None: if not isinstance(timeout, (int, float)): @@ -1995,6 +2000,9 @@ def _get_query_results( else: timeout = max(timeout, _MIN_GET_QUERY_RESULTS_TIMEOUT) + if page_size > 0: + extra_params["formatOptions.useInt64Timestamp"] = True + if project is None: project = self.project @@ -3504,7 +3512,7 @@ def query_and_wait( location: Optional[str] = None, project: Optional[str] = None, api_timeout: TimeoutType = DEFAULT_TIMEOUT, - wait_timeout: TimeoutType = None, + wait_timeout: Union[Optional[float], object] = POLLING_DEFAULT_VALUE, retry: retries.Retry = DEFAULT_RETRY, job_retry: retries.Retry = DEFAULT_JOB_RETRY, page_size: Optional[int] = None, @@ -3538,10 +3546,12 @@ def query_and_wait( api_timeout (Optional[float]): The number of seconds to wait for the underlying HTTP transport before using ``retry``. - wait_timeout (Optional[float]): + wait_timeout (Optional[Union[float, object]]): The number of seconds to wait for the query to finish. If the query doesn't finish before this timeout, the client attempts - to cancel the query. + to cancel the query. If unset, the underlying REST API calls + have timeouts, but we still wait indefinitely for the job to + finish. retry (Optional[google.api_core.retry.Retry]): How to retry the RPC. This only applies to making RPC calls. It isn't used to retry failed jobs. This has @@ -4128,11 +4138,6 @@ def _list_rows_from_query_results( if start_index is not None: params["startIndex"] = start_index - # We don't call jobs.query with a page size, so if the user explicitly - # requests a certain size, invalidate the cache. - if page_size is not None: - first_page_response = None - params["formatOptions.useInt64Timestamp"] = True row_iterator = RowIterator( client=self, diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/job/query.py b/packages/google-cloud-bigquery/google/cloud/bigquery/job/query.py index 25b89c3d7d27..a8530271aa33 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/job/query.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/job/query.py @@ -1385,7 +1385,10 @@ def _begin(self, client=None, retry=DEFAULT_RETRY, timeout=None): raise def _reload_query_results( - self, retry: "retries.Retry" = DEFAULT_RETRY, timeout: Optional[float] = None + self, + retry: "retries.Retry" = DEFAULT_RETRY, + timeout: Optional[float] = None, + page_size: int = 0, ): """Refresh the cached query results unless already cached and complete. @@ -1395,6 +1398,9 @@ def _reload_query_results( timeout (Optional[float]): The number of seconds to wait for the underlying HTTP transport before using ``retry``. + page_size (int): + Maximum number of rows in a single response. See maxResults in + the jobs.getQueryResults REST API. """ # Optimization: avoid a call to jobs.getQueryResults if it's already # been fetched, e.g. from jobs.query first page of results. @@ -1425,7 +1431,14 @@ def _reload_query_results( # If an explicit timeout is not given, fall back to the transport timeout # stored in _blocking_poll() in the process of polling for job completion. - transport_timeout = timeout if timeout is not None else self._transport_timeout + if timeout is not None: + transport_timeout = timeout + else: + transport_timeout = self._transport_timeout + + # Handle PollingJob._DEFAULT_VALUE. + if not isinstance(transport_timeout, (float, int)): + transport_timeout = None self._query_results = self._client._get_query_results( self.job_id, @@ -1434,6 +1447,7 @@ def _reload_query_results( timeout_ms=timeout_ms, location=self.location, timeout=transport_timeout, + page_size=page_size, ) def result( # type: ignore # (incompatible with supertype) @@ -1515,11 +1529,25 @@ def result( # type: ignore # (incompatible with supertype) # actually correspond to a finished query job. ) + # Setting max_results should be equivalent to setting page_size with + # regards to allowing the user to tune how many results to download + # while we wait for the query to finish. See internal issue: + # 344008814. + if page_size is None and max_results is not None: + page_size = max_results + # When timeout has default sentinel value ``object()``, do not pass # anything to invoke default timeouts in subsequent calls. - kwargs: Dict[str, Union[_helpers.TimeoutType, object]] = {} + done_kwargs: Dict[str, Union[_helpers.TimeoutType, object]] = {} + reload_query_results_kwargs: Dict[str, Union[_helpers.TimeoutType, object]] = {} + list_rows_kwargs: Dict[str, Union[_helpers.TimeoutType, object]] = {} if type(timeout) is not object: - kwargs["timeout"] = timeout + done_kwargs["timeout"] = timeout + list_rows_kwargs["timeout"] = timeout + reload_query_results_kwargs["timeout"] = timeout + + if page_size is not None: + reload_query_results_kwargs["page_size"] = page_size try: retry_do_query = getattr(self, "_retry_do_query", None) @@ -1562,7 +1590,7 @@ def is_job_done(): # rateLimitExceeded errors are ambiguous. We want to know if # the query job failed and not just the call to # jobs.getQueryResults. - if self.done(retry=retry, **kwargs): + if self.done(retry=retry, **done_kwargs): # If it's already failed, we might as well stop. job_failed_exception = self.exception() if job_failed_exception is not None: @@ -1599,14 +1627,16 @@ def is_job_done(): # response from the REST API. This ensures we aren't # making any extra API calls if the previous loop # iteration fetched the finished job. - self._reload_query_results(retry=retry, **kwargs) + self._reload_query_results( + retry=retry, **reload_query_results_kwargs + ) return True # Call jobs.getQueryResults with max results set to 0 just to # wait for the query to finish. Unlike most methods, # jobs.getQueryResults hangs as long as it can to ensure we # know when the query has finished as soon as possible. - self._reload_query_results(retry=retry, **kwargs) + self._reload_query_results(retry=retry, **reload_query_results_kwargs) # Even if the query is finished now according to # jobs.getQueryResults, we'll want to reload the job status if @@ -1679,8 +1709,9 @@ def is_job_done(): # We know that there's at least 1 row, so only treat the response from # jobs.getQueryResults / jobs.query as the first page of the # RowIterator response if there are any rows in it. This prevents us - # from stopping the iteration early because we're missing rows and - # there's no next page token. + # from stopping the iteration early in the cases where we set + # maxResults=0. In that case, we're missing rows and there's no next + # page token. first_page_response = self._query_results._properties if "rows" not in first_page_response: first_page_response = None @@ -1699,7 +1730,7 @@ def is_job_done(): query_id=self.query_id, first_page_response=first_page_response, num_dml_affected_rows=self._query_results.num_dml_affected_rows, - **kwargs, + **list_rows_kwargs, ) rows._preserve_order = _contains_order_by(self.query) return rows diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py index 57fc0d2bec3b..faf827be4d3a 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py @@ -1759,7 +1759,7 @@ def _get_next_page_response(self): if self._page_size is not None: if self.page_number and "startIndex" in params: del params["startIndex"] - params["maxResults"] = self._page_size + return self.api_request( method=self._HTTP_METHOD, path=self.path, query_params=params ) diff --git a/packages/google-cloud-bigquery/tests/unit/job/test_query.py b/packages/google-cloud-bigquery/tests/unit/job/test_query.py index c7b2c5f9c05b..66055dee14fc 100644 --- a/packages/google-cloud-bigquery/tests/unit/job/test_query.py +++ b/packages/google-cloud-bigquery/tests/unit/job/test_query.py @@ -28,6 +28,7 @@ from google.cloud.bigquery.client import _LIST_ROWS_FROM_QUERY_RESULTS_FIELDS import google.cloud.bigquery._job_helpers import google.cloud.bigquery.query +import google.cloud.bigquery.retry from google.cloud.bigquery.retry import DEFAULT_GET_JOB_TIMEOUT from google.cloud.bigquery.table import _EmptyRowIterator @@ -841,6 +842,22 @@ def test_search_stats(self): assert isinstance(job.search_stats, SearchStats) assert job.search_stats.mode == "INDEX_USAGE_MODE_UNSPECIFIED" + def test_reload_query_results_uses_transport_timeout(self): + conn = make_connection({}) + client = _make_client(self.PROJECT, connection=conn) + job = self._make_one(self.JOB_ID, self.QUERY, client) + job._transport_timeout = 123 + + job._reload_query_results() + + query_results_path = f"/projects/{self.PROJECT}/queries/{self.JOB_ID}" + conn.api_request.assert_called_once_with( + method="GET", + path=query_results_path, + query_params={"maxResults": 0}, + timeout=123, + ) + def test_result_reloads_job_state_until_done(self): """Verify that result() doesn't return until state == 'DONE'. @@ -1053,7 +1070,7 @@ def test_result_with_done_job_calls_get_query_results(self): method="GET", path=query_results_path, query_params={"maxResults": 0, "location": "EU"}, - timeout=google.cloud.bigquery.client._MIN_GET_QUERY_RESULTS_TIMEOUT, + timeout=None, ) query_results_page_call = mock.call( method="GET", @@ -1139,114 +1156,92 @@ def test_result_with_none_timeout(self): timeout=None, ) - def test_result_with_done_jobs_query_response_and_page_size_invalidates_cache(self): - """We don't call jobs.query with a page size, so if the user explicitly - requests a certain size, invalidate the cache. - """ - # Arrange - job_resource = self._make_resource( - started=True, ended=True, location="asia-northeast1" - ) - query_resource_done = { + def test_result_with_max_results(self): + from google.cloud.bigquery.table import RowIterator + + query_resource = { "jobComplete": True, "jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID}, "schema": {"fields": [{"name": "col1", "type": "STRING"}]}, - "rows": [{"f": [{"v": "abc"}]}], - "pageToken": "initial-page-token-shouldnt-be-used", - "totalRows": "4", + "totalRows": "10", + "pageToken": "first-page-token", + "rows": [ + {"f": [{"v": "abc"}]}, + {"f": [{"v": "def"}]}, + {"f": [{"v": "ghi"}]}, + {"f": [{"v": "jkl"}]}, + {"f": [{"v": "mno"}]}, + {"f": [{"v": "pqr"}]}, + # Pretend these are very large rows, so the API doesn't return + # all of the rows we asked for in the first response. + ], } query_page_resource = { - "totalRows": 4, - "pageToken": "some-page-token", + "totalRows": "10", + "pageToken": None, "rows": [ - {"f": [{"v": "row1"}]}, - {"f": [{"v": "row2"}]}, - {"f": [{"v": "row3"}]}, + {"f": [{"v": "stu"}]}, + {"f": [{"v": "vwx"}]}, + {"f": [{"v": "yz0"}]}, ], } - query_page_resource_2 = {"totalRows": 4, "rows": [{"f": [{"v": "row4"}]}]} - conn = make_connection(job_resource, query_page_resource, query_page_resource_2) - client = _make_client(self.PROJECT, connection=conn) - job = google.cloud.bigquery._job_helpers._to_query_job( - client, - "SELECT col1 FROM table", - request_config=None, - query_response=query_resource_done, + job_resource_running = self._make_resource( + started=True, ended=False, location="US" ) - # We want job.result() to refresh the job state, so the conversion is - # always "PENDING", even if the job is finished. - assert job.state == "PENDING" + job_resource_done = self._make_resource(started=True, ended=True, location="US") + conn = make_connection(job_resource_done, query_resource, query_page_resource) + client = _make_client(self.PROJECT, connection=conn) + job = self._get_target_class().from_api_repr(job_resource_running, client) - # Act - result = job.result(page_size=3) + max_results = 9 + result = job.result(max_results=max_results) - # Assert - actual_rows = list(result) - self.assertEqual(len(actual_rows), 4) + self.assertIsInstance(result, RowIterator) + self.assertEqual(result.total_rows, 10) + + rows = list(result) + self.assertEqual(len(rows), 9) + jobs_get_path = f"/projects/{self.PROJECT}/jobs/{self.JOB_ID}" + jobs_get_call = mock.call( + method="GET", + path=jobs_get_path, + query_params={"projection": "full", "location": "US"}, + timeout=DEFAULT_GET_JOB_TIMEOUT, + ) query_results_path = f"/projects/{self.PROJECT}/queries/{self.JOB_ID}" - query_page_1_call = mock.call( + query_page_waiting_call = mock.call( method="GET", path=query_results_path, query_params={ - "maxResults": 3, - "fields": _LIST_ROWS_FROM_QUERY_RESULTS_FIELDS, - "location": "asia-northeast1", + # Waiting for the results should set maxResults and cache the + # first page if page_size is set. This allows customers to + # more finely tune when we fallback to the BQ Storage API. + # See internal issue: 344008814. + "maxResults": max_results, "formatOptions.useInt64Timestamp": True, + "location": "US", }, timeout=None, ) query_page_2_call = mock.call( + timeout=None, method="GET", path=query_results_path, query_params={ - "pageToken": "some-page-token", + "pageToken": "first-page-token", "maxResults": 3, "fields": _LIST_ROWS_FROM_QUERY_RESULTS_FIELDS, - "location": "asia-northeast1", + "location": "US", "formatOptions.useInt64Timestamp": True, }, - timeout=None, ) - conn.api_request.assert_has_calls([query_page_1_call, query_page_2_call]) - - def test_result_with_max_results(self): - from google.cloud.bigquery.table import RowIterator - - query_resource = { - "jobComplete": True, - "jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID}, - "schema": {"fields": [{"name": "col1", "type": "STRING"}]}, - "totalRows": "5", - } - query_page_resource = { - "totalRows": "5", - "pageToken": None, - "rows": [ - {"f": [{"v": "abc"}]}, - {"f": [{"v": "def"}]}, - {"f": [{"v": "ghi"}]}, - ], - } - connection = make_connection(query_resource, query_page_resource) - client = _make_client(self.PROJECT, connection=connection) - resource = self._make_resource(ended=True) - job = self._get_target_class().from_api_repr(resource, client) - - max_results = 3 - - result = job.result(max_results=max_results) - - self.assertIsInstance(result, RowIterator) - self.assertEqual(result.total_rows, 5) - - rows = list(result) - - self.assertEqual(len(rows), 3) - self.assertEqual(len(connection.api_request.call_args_list), 2) - query_page_request = connection.api_request.call_args_list[1] - self.assertEqual( - query_page_request[1]["query_params"]["maxResults"], max_results + # Waiting for the results should set maxResults and cache the + # first page if max_results is set. This allows customers to + # more finely tune when we fallback to the BQ Storage API. + # See internal issue: 344008814. + conn.api_request.assert_has_calls( + [jobs_get_call, query_page_waiting_call, query_page_2_call] ) def test_result_w_custom_retry(self): @@ -1469,63 +1464,85 @@ def test_result_w_page_size(self): "jobComplete": True, "jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID}, "schema": {"fields": [{"name": "col1", "type": "STRING"}]}, - "totalRows": "4", - } - job_resource = self._make_resource(started=True, ended=True, location="US") - q_config = job_resource["configuration"]["query"] - q_config["destinationTable"] = { - "projectId": self.PROJECT, - "datasetId": self.DS_ID, - "tableId": self.TABLE_ID, - } - query_page_resource = { - "totalRows": 4, - "pageToken": "some-page-token", + "totalRows": "10", "rows": [ {"f": [{"v": "row1"}]}, {"f": [{"v": "row2"}]}, {"f": [{"v": "row3"}]}, + {"f": [{"v": "row4"}]}, + {"f": [{"v": "row5"}]}, + {"f": [{"v": "row6"}]}, + {"f": [{"v": "row7"}]}, + {"f": [{"v": "row8"}]}, + {"f": [{"v": "row9"}]}, ], + "pageToken": "first-page-token", } - query_page_resource_2 = {"totalRows": 4, "rows": [{"f": [{"v": "row4"}]}]} + job_resource_running = self._make_resource( + started=True, ended=False, location="US" + ) + job_resource_done = self._make_resource(started=True, ended=True, location="US") + destination_table = { + "projectId": self.PROJECT, + "datasetId": self.DS_ID, + "tableId": self.TABLE_ID, + } + q_config = job_resource_done["configuration"]["query"] + q_config["destinationTable"] = destination_table + query_page_resource_2 = {"totalRows": 10, "rows": [{"f": [{"v": "row10"}]}]} conn = make_connection( - query_results_resource, query_page_resource, query_page_resource_2 + job_resource_running, + query_results_resource, + job_resource_done, + query_page_resource_2, ) client = _make_client(self.PROJECT, connection=conn) - job = self._get_target_class().from_api_repr(job_resource, client) + job = self._get_target_class().from_api_repr(job_resource_running, client) # Act - result = job.result(page_size=3) + result = job.result(page_size=9) # Assert actual_rows = list(result) - self.assertEqual(len(actual_rows), 4) + self.assertEqual(len(actual_rows), 10) + jobs_get_path = f"/projects/{self.PROJECT}/jobs/{self.JOB_ID}" + jobs_get_call = mock.call( + method="GET", + path=jobs_get_path, + query_params={"projection": "full", "location": "US"}, + timeout=DEFAULT_GET_JOB_TIMEOUT, + ) query_results_path = f"/projects/{self.PROJECT}/queries/{self.JOB_ID}" - query_page_1_call = mock.call( + query_page_waiting_call = mock.call( method="GET", path=query_results_path, query_params={ - "maxResults": 3, - "fields": _LIST_ROWS_FROM_QUERY_RESULTS_FIELDS, + # Waiting for the results should set maxResults and cache the + # first page if page_size is set. This allows customers to + # more finely tune when we fallback to the BQ Storage API. + # See internal issue: 344008814. + "maxResults": 9, "location": "US", "formatOptions.useInt64Timestamp": True, }, timeout=None, ) query_page_2_call = mock.call( + timeout=None, method="GET", path=query_results_path, query_params={ - "pageToken": "some-page-token", - "maxResults": 3, + "pageToken": "first-page-token", + "maxResults": 9, "fields": _LIST_ROWS_FROM_QUERY_RESULTS_FIELDS, "location": "US", "formatOptions.useInt64Timestamp": True, }, - timeout=None, ) - conn.api_request.assert_has_calls([query_page_1_call, query_page_2_call]) + conn.api_request.assert_has_calls( + [jobs_get_call, query_page_waiting_call, jobs_get_call, query_page_2_call] + ) def test_result_with_start_index(self): from google.cloud.bigquery.table import RowIterator diff --git a/packages/google-cloud-bigquery/tests/unit/test_client.py b/packages/google-cloud-bigquery/tests/unit/test_client.py index ed5575f6c7fa..cd336b73fa64 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_client.py +++ b/packages/google-cloud-bigquery/tests/unit/test_client.py @@ -58,6 +58,7 @@ from google.cloud.bigquery.dataset import DatasetReference from google.cloud.bigquery import exceptions from google.cloud.bigquery import ParquetOptions +import google.cloud.bigquery.retry from google.cloud.bigquery.retry import DEFAULT_TIMEOUT import google.cloud.bigquery.table @@ -5444,6 +5445,127 @@ def test_query_and_wait_w_location(self): sent = req["data"] self.assertEqual(sent["location"], "not-the-client-location") + def test_query_and_wait_w_max_results(self): + query = "select count(*) from `bigquery-public-data.usa_names.usa_1910_2013`" + jobs_query_response = { + "jobComplete": True, + } + creds = _make_credentials() + http = object() + client = self._make_one(project=self.PROJECT, credentials=creds, _http=http) + conn = client._connection = make_connection(jobs_query_response) + + _ = client.query_and_wait(query, max_results=11) + + # Verify the request we send is to jobs.query. + conn.api_request.assert_called_once() + _, req = conn.api_request.call_args + self.assertEqual(req["method"], "POST") + self.assertEqual(req["path"], f"/projects/{self.PROJECT}/queries") + sent = req["data"] + self.assertTrue(sent["formatOptions"]["useInt64Timestamp"]) + self.assertTrue(sent["maxResults"], 11) + + def test_query_and_wait_w_page_size(self): + query = "select count(*) from `bigquery-public-data.usa_names.usa_1910_2013`" + jobs_query_response = { + "jobComplete": True, + } + creds = _make_credentials() + http = object() + client = self._make_one(project=self.PROJECT, credentials=creds, _http=http) + conn = client._connection = make_connection(jobs_query_response) + + _ = client.query_and_wait(query, page_size=11) + + # Verify the request we send is to jobs.query. + conn.api_request.assert_called_once() + _, req = conn.api_request.call_args + self.assertEqual(req["method"], "POST") + self.assertEqual(req["path"], f"/projects/{self.PROJECT}/queries") + sent = req["data"] + self.assertTrue(sent["formatOptions"]["useInt64Timestamp"]) + self.assertTrue(sent["maxResults"], 11) + + def test_query_and_wait_w_page_size_multiple_requests(self): + """ + For queries that last longer than the intial (about 10s) call to + jobs.query, we should still pass through the page size to the + subsequent calls to jobs.getQueryResults. + + See internal issue 344008814. + """ + query = "select count(*) from `bigquery-public-data.usa_names.usa_1910_2013`" + job_reference = { + "projectId": "my-jobs-project", + "location": "my-jobs-location", + "jobId": "my-jobs-id", + } + jobs_query_response = { + "jobComplete": False, + "jobReference": job_reference, + } + jobs_get_response = { + "jobReference": job_reference, + "status": {"state": "DONE"}, + } + get_query_results_response = { + "jobComplete": True, + } + creds = _make_credentials() + http = object() + client = self._make_one(project=self.PROJECT, credentials=creds, _http=http) + conn = client._connection = make_connection( + jobs_query_response, + jobs_get_response, + get_query_results_response, + ) + + _ = client.query_and_wait(query, page_size=11) + + conn.api_request.assert_has_calls( + [ + # Verify the request we send is to jobs.query. + mock.call( + method="POST", + path=f"/projects/{self.PROJECT}/queries", + data={ + "useLegacySql": False, + "query": query, + "formatOptions": {"useInt64Timestamp": True}, + "maxResults": 11, + "requestId": mock.ANY, + }, + timeout=None, + ), + # jobs.get: Check if the job has finished. + mock.call( + method="GET", + path="/projects/my-jobs-project/jobs/my-jobs-id", + query_params={ + "projection": "full", + "location": "my-jobs-location", + }, + timeout=google.cloud.bigquery.retry.DEFAULT_GET_JOB_TIMEOUT, + ), + # jobs.getQueryResults: wait for the query / fetch first page + mock.call( + method="GET", + path="/projects/my-jobs-project/queries/my-jobs-id", + query_params={ + # We should still pass through the page size to the + # subsequent calls to jobs.getQueryResults. + # + # See internal issue 344008814. + "maxResults": 11, + "formatOptions.useInt64Timestamp": True, + "location": "my-jobs-location", + }, + timeout=None, + ), + ] + ) + def test_query_and_wait_w_project(self): query = "select count(*) from `bigquery-public-data.usa_names.usa_1910_2013`" jobs_query_response = { diff --git a/packages/google-cloud-bigquery/tests/unit/test_job_retry.py b/packages/google-cloud-bigquery/tests/unit/test_job_retry.py index 46eb1d6b3fb5..298ab9a56594 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_job_retry.py +++ b/packages/google-cloud-bigquery/tests/unit/test_job_retry.py @@ -247,7 +247,7 @@ def make_job_id(*args, **kwargs): method="GET", path="/projects/PROJECT/queries/1", query_params={"maxResults": 0, "location": "test-loc"}, - timeout=120, + timeout=None, ), ], ) From db8a98b59cf288ded3d07ffcc014e5c202d9a238 Mon Sep 17 00:00:00 2001 From: Lingqing Gan Date: Tue, 4 Jun 2024 07:25:55 -0700 Subject: [PATCH 1804/2016] fix: create query job in job.result() if doesn't exist (#1944) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * fix: create query job in job.result() if doesn't exist * Apply suggestions from code review --------- Co-authored-by: Tim Sweña (Swast) --- .../google/cloud/bigquery/job/query.py | 5 ++ .../tests/unit/job/test_query.py | 83 +++++++++++++++++++ 2 files changed, 88 insertions(+) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/job/query.py b/packages/google-cloud-bigquery/google/cloud/bigquery/job/query.py index a8530271aa33..8049b748e0a1 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/job/query.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/job/query.py @@ -1585,6 +1585,11 @@ def is_job_done(): self._retry_do_query = retry_do_query self._job_retry = job_retry + # If the job hasn't been created, create it now. Related: + # https://github.com/googleapis/python-bigquery/issues/1940 + if self.state is None: + self._begin(retry=retry, **done_kwargs) + # Refresh the job status with jobs.get because some of the # exceptions thrown by jobs.getQueryResults like timeout and # rateLimitExceeded errors are ambiguous. We want to know if diff --git a/packages/google-cloud-bigquery/tests/unit/job/test_query.py b/packages/google-cloud-bigquery/tests/unit/job/test_query.py index 66055dee14fc..5b69c98cf7f2 100644 --- a/packages/google-cloud-bigquery/tests/unit/job/test_query.py +++ b/packages/google-cloud-bigquery/tests/unit/job/test_query.py @@ -1037,6 +1037,86 @@ def test_result_dry_run(self): self.assertIsNone(result.job_id) self.assertIsNone(result.query_id) + # If the job doesn't exist, create the job first. Issue: + # https://github.com/googleapis/python-bigquery/issues/1940 + def test_result_begin_job_if_not_exist(self): + begun_resource = self._make_resource() + query_running_resource = { + "jobComplete": True, + "jobReference": { + "projectId": self.PROJECT, + "jobId": self.JOB_ID, + "location": "US", + }, + "schema": {"fields": [{"name": "col1", "type": "STRING"}]}, + "status": {"state": "RUNNING"}, + } + query_done_resource = { + "jobComplete": True, + "jobReference": { + "projectId": self.PROJECT, + "jobId": self.JOB_ID, + "location": "US", + }, + "schema": {"fields": [{"name": "col1", "type": "STRING"}]}, + "status": {"state": "DONE"}, + } + done_resource = copy.deepcopy(begun_resource) + done_resource["status"] = {"state": "DONE"} + connection = make_connection( + begun_resource, + query_running_resource, + query_done_resource, + done_resource, + ) + client = _make_client(project=self.PROJECT, connection=connection) + job = self._make_one(self.JOB_ID, self.QUERY, client) + job._properties["jobReference"]["location"] = "US" + + job.result() + + create_job_call = mock.call( + method="POST", + path=f"/projects/{self.PROJECT}/jobs", + data={ + "jobReference": { + "jobId": self.JOB_ID, + "projectId": self.PROJECT, + "location": "US", + }, + "configuration": { + "query": {"useLegacySql": False, "query": self.QUERY}, + }, + }, + timeout=None, + ) + reload_call = mock.call( + method="GET", + path=f"/projects/{self.PROJECT}/jobs/{self.JOB_ID}", + query_params={"projection": "full", "location": "US"}, + timeout=DEFAULT_GET_JOB_TIMEOUT, + ) + get_query_results_call = mock.call( + method="GET", + path=f"/projects/{self.PROJECT}/queries/{self.JOB_ID}", + query_params={ + "maxResults": 0, + "location": "US", + }, + timeout=None, + ) + + connection.api_request.assert_has_calls( + [ + # Make sure we start a job that hasn't started yet. See: + # https://github.com/googleapis/python-bigquery/issues/1940 + create_job_call, + reload_call, + get_query_results_call, + reload_call, + ] + ) + def test_result_with_done_job_calls_get_query_results(self): query_resource_done = { "jobComplete": True, @@ -1379,6 +1459,7 @@ def test_result_w_timeout_doesnt_raise(self): client = _make_client(project=self.PROJECT, connection=connection) job = self._make_one(self.JOB_ID, self.QUERY, client) job._properties["jobReference"]["location"] = "US" + job._properties["status"] = {"state": "RUNNING"} with freezegun.freeze_time("1970-01-01 00:00:00", tick=False): job.result( @@ -1429,6 +1510,7 @@ def test_result_w_timeout_raises_concurrent_futures_timeout(self): client = _make_client(project=self.PROJECT, connection=connection) job = self._make_one(self.JOB_ID, self.QUERY, client) job._properties["jobReference"]["location"] = "US" + job._properties["status"] = {"state": "RUNNING"} with freezegun.freeze_time( "1970-01-01 00:00:00", auto_tick_seconds=1.0 @@ -2319,5 +2401,6 @@ def test_iter(self): connection = make_connection(begun_resource, query_resource, done_resource) client = _make_client(project=self.PROJECT, connection=connection) job = self._make_one(self.JOB_ID, self.QUERY, client) + job._properties["status"] = {"state": "RUNNING"} self.assertIsInstance(iter(job), types.GeneratorType) From a5104ba5921035a0c1e4ed82c3373237f7eca2f4 Mon Sep 17 00:00:00 2001 From: "release-please[bot]" <55107282+release-please[bot]@users.noreply.github.com> Date: Tue, 4 Jun 2024 13:27:44 -0700 Subject: [PATCH 1805/2016] chore(main): release 3.24.0 (#1928) Co-authored-by: release-please[bot] <55107282+release-please[bot]@users.noreply.github.com> --- packages/google-cloud-bigquery/CHANGELOG.md | 19 +++++++++++++++++++ .../google/cloud/bigquery/version.py | 2 +- 2 files changed, 20 insertions(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/CHANGELOG.md b/packages/google-cloud-bigquery/CHANGELOG.md index b9a2c31496d4..c24725befbaa 100644 --- a/packages/google-cloud-bigquery/CHANGELOG.md +++ b/packages/google-cloud-bigquery/CHANGELOG.md @@ -5,6 +5,25 @@ [1]: https://pypi.org/project/google-cloud-bigquery/#history +## [3.24.0](https://github.com/googleapis/python-bigquery/compare/v3.23.1...v3.24.0) (2024-06-04) + + +### Features + +* Add default timeout for Client.get_job() ([#1935](https://github.com/googleapis/python-bigquery/issues/1935)) ([9fbad76](https://github.com/googleapis/python-bigquery/commit/9fbad767cc228e02040436742d0cb6743d370b90)) +* Add support for map target type in Parquet options ([#1919](https://github.com/googleapis/python-bigquery/issues/1919)) ([c3f7b23](https://github.com/googleapis/python-bigquery/commit/c3f7b237383d4705ed6e720544728c4db61f6c83)) + + +### Bug Fixes + +* Create query job in job.result() if doesn't exist ([#1944](https://github.com/googleapis/python-bigquery/issues/1944)) ([8f5b4b7](https://github.com/googleapis/python-bigquery/commit/8f5b4b70423c277ffd559d2034bc0b2b5fb93169)) +* Retry `is_job_done` on `ConnectionError` ([#1930](https://github.com/googleapis/python-bigquery/issues/1930)) ([4f72723](https://github.com/googleapis/python-bigquery/commit/4f72723f539d35977bc52c5950f6e00889b5c7be)) + + +### Performance Improvements + +* If `page_size` or `max_results` is set on `QueryJob.result()`, use to download first page of results ([#1942](https://github.com/googleapis/python-bigquery/issues/1942)) ([3e7a48d](https://github.com/googleapis/python-bigquery/commit/3e7a48d36e3c7bf6abe1b5550097178f6ca6e174)) + ## [3.23.1](https://github.com/googleapis/python-bigquery/compare/v3.23.0...v3.23.1) (2024-05-21) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/version.py b/packages/google-cloud-bigquery/google/cloud/bigquery/version.py index a62f73ed407e..79c15cf239bd 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/version.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/version.py @@ -12,4 +12,4 @@ # See the License for the specific language governing permissions and # limitations under the License. -__version__ = "3.23.1" +__version__ = "3.24.0" From d96bb1f80c6b15696cacc3353b27faa988fd7069 Mon Sep 17 00:00:00 2001 From: Lingqing Gan Date: Mon, 10 Jun 2024 11:49:02 -0700 Subject: [PATCH 1806/2016] feat: add prefer_bqstorage_client option for Connection (#1945) --- .../google/cloud/bigquery/dbapi/connection.py | 30 ++++++++++++------- .../tests/unit/test_dbapi_connection.py | 20 +++++++++++++ 2 files changed, 40 insertions(+), 10 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/dbapi/connection.py b/packages/google-cloud-bigquery/google/cloud/bigquery/dbapi/connection.py index 66dee7dfb8c3..a1a69b8fec90 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/dbapi/connection.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/dbapi/connection.py @@ -35,12 +35,18 @@ class Connection(object): A client that uses the faster BigQuery Storage API to fetch rows from BigQuery. If not passed, it is created using the same credentials as ``client`` (provided that BigQuery Storage dependencies are installed). - - If both clients are available, ``bqstorage_client`` is used for - fetching query results. + prefer_bqstorage_client (Optional[bool]): + Prefer the BigQuery Storage client over the REST client. If Storage + client isn't available, fall back to the REST client. Defaults to + ``True``. """ - def __init__(self, client=None, bqstorage_client=None): + def __init__( + self, + client=None, + bqstorage_client=None, + prefer_bqstorage_client=True, + ): if client is None: client = bigquery.Client() self._owns_client = True @@ -49,7 +55,10 @@ def __init__(self, client=None, bqstorage_client=None): # A warning is already raised by the BQ Storage client factory factory if # instantiation fails, or if the given BQ Storage client instance is outdated. - if bqstorage_client is None: + if not prefer_bqstorage_client: + bqstorage_client = None + self._owns_bqstorage_client = False + elif bqstorage_client is None: bqstorage_client = client._ensure_bqstorage_client() self._owns_bqstorage_client = bqstorage_client is not None else: @@ -95,7 +104,7 @@ def cursor(self): return new_cursor -def connect(client=None, bqstorage_client=None): +def connect(client=None, bqstorage_client=None, prefer_bqstorage_client=True): """Construct a DB-API connection to Google BigQuery. Args: @@ -108,11 +117,12 @@ def connect(client=None, bqstorage_client=None): A client that uses the faster BigQuery Storage API to fetch rows from BigQuery. If not passed, it is created using the same credentials as ``client`` (provided that BigQuery Storage dependencies are installed). - - If both clients are available, ``bqstorage_client`` is used for - fetching query results. + prefer_bqstorage_client (Optional[bool]): + Prefer the BigQuery Storage client over the REST client. If Storage + client isn't available, fall back to the REST client. Defaults to + ``True``. Returns: google.cloud.bigquery.dbapi.Connection: A new DB-API connection to BigQuery. """ - return Connection(client, bqstorage_client) + return Connection(client, bqstorage_client, prefer_bqstorage_client) diff --git a/packages/google-cloud-bigquery/tests/unit/test_dbapi_connection.py b/packages/google-cloud-bigquery/tests/unit/test_dbapi_connection.py index 4071e57e0998..f5c77c448eee 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_dbapi_connection.py +++ b/packages/google-cloud-bigquery/tests/unit/test_dbapi_connection.py @@ -122,6 +122,26 @@ def test_connect_w_both_clients(self): self.assertIs(connection._client, mock_client) self.assertIs(connection._bqstorage_client, mock_bqstorage_client) + def test_connect_prefer_bqstorage_client_false(self): + pytest.importorskip("google.cloud.bigquery_storage") + from google.cloud.bigquery.dbapi import connect + from google.cloud.bigquery.dbapi import Connection + + mock_client = self._mock_client() + mock_bqstorage_client = self._mock_bqstorage_client() + mock_client._ensure_bqstorage_client.return_value = mock_bqstorage_client + + connection = connect( + client=mock_client, + bqstorage_client=mock_bqstorage_client, + prefer_bqstorage_client=False, + ) + + mock_client._ensure_bqstorage_client.assert_not_called() + self.assertIsInstance(connection, Connection) + self.assertIs(connection._client, mock_client) + self.assertIs(connection._bqstorage_client, None) + def test_raises_error_if_closed(self): from google.cloud.bigquery.dbapi.exceptions import ProgrammingError From bf48a2fd4ee08a9e15f96bd1a0bb7b8c1a5f0334 Mon Sep 17 00:00:00 2001 From: Chalmer Lowe Date: Wed, 12 Jun 2024 19:55:43 -0400 Subject: [PATCH 1807/2016] test: update the results of test based on change to hacker news data (#1949) * test: update the results of test based on change to hacker news data * Update tests/system/test_client.py --------- Co-authored-by: Lingqing Gan --- .../tests/system/test_client.py | 29 ++++++++++++++----- 1 file changed, 22 insertions(+), 7 deletions(-) diff --git a/packages/google-cloud-bigquery/tests/system/test_client.py b/packages/google-cloud-bigquery/tests/system/test_client.py index 862ef3245bb5..95c679a149d6 100644 --- a/packages/google-cloud-bigquery/tests/system/test_client.py +++ b/packages/google-cloud-bigquery/tests/system/test_client.py @@ -1788,20 +1788,35 @@ def test_dbapi_fetch_w_bqstorage_client_large_result_set(self): # in the sorted order. expected_data = [ + [ + ("by", "pg"), + ("id", 1), + ( + "timestamp", + datetime.datetime( + 2006, 10, 9, 18, 21, 51, tzinfo=datetime.timezone.utc + ), + ), + ], [ ("by", "phyllis"), ("id", 2), - ("timestamp", datetime.datetime(2006, 10, 9, 18, 30, 28, tzinfo=UTC)), + ( + "timestamp", + datetime.datetime( + 2006, 10, 9, 18, 30, 28, tzinfo=datetime.timezone.utc + ), + ), ], [ ("by", "phyllis"), ("id", 3), - ("timestamp", datetime.datetime(2006, 10, 9, 18, 40, 33, tzinfo=UTC)), - ], - [ - ("by", "onebeerdave"), - ("id", 4), - ("timestamp", datetime.datetime(2006, 10, 9, 18, 47, 42, tzinfo=UTC)), + ( + "timestamp", + datetime.datetime( + 2006, 10, 9, 18, 40, 33, tzinfo=datetime.timezone.utc + ), + ), ], ] From bd103111372f64dbaaf4f4d21d1cd39f04cd2ad3 Mon Sep 17 00:00:00 2001 From: Mend Renovate Date: Thu, 13 Jun 2024 13:35:20 +0200 Subject: [PATCH 1808/2016] chore(deps): update all dependencies (#1946) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * chore(deps): update all dependencies * 🦉 Updates from OwlBot post-processor See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md --------- Co-authored-by: Owl Bot Co-authored-by: Lingqing Gan --- .../samples/desktopapp/requirements-test.txt | 2 +- .../samples/desktopapp/requirements.txt | 2 +- .../samples/geography/requirements-test.txt | 2 +- .../samples/geography/requirements.txt | 12 ++++++------ .../samples/magics/requirements-test.txt | 2 +- .../samples/magics/requirements.txt | 2 +- .../samples/notebooks/requirements-test.txt | 2 +- .../samples/notebooks/requirements.txt | 2 +- .../samples/snippets/requirements-test.txt | 2 +- .../samples/snippets/requirements.txt | 2 +- 10 files changed, 15 insertions(+), 15 deletions(-) diff --git a/packages/google-cloud-bigquery/samples/desktopapp/requirements-test.txt b/packages/google-cloud-bigquery/samples/desktopapp/requirements-test.txt index a6c397822790..8f0bfaad4ca5 100644 --- a/packages/google-cloud-bigquery/samples/desktopapp/requirements-test.txt +++ b/packages/google-cloud-bigquery/samples/desktopapp/requirements-test.txt @@ -1,4 +1,4 @@ google-cloud-testutils==1.4.0 pytest===7.4.4; python_version == '3.7' -pytest==8.2.1; python_version >= '3.8' +pytest==8.2.2; python_version >= '3.8' mock==5.1.0 diff --git a/packages/google-cloud-bigquery/samples/desktopapp/requirements.txt b/packages/google-cloud-bigquery/samples/desktopapp/requirements.txt index b35cc414c1e5..25ed0977b148 100644 --- a/packages/google-cloud-bigquery/samples/desktopapp/requirements.txt +++ b/packages/google-cloud-bigquery/samples/desktopapp/requirements.txt @@ -1,2 +1,2 @@ -google-cloud-bigquery==3.23.1 +google-cloud-bigquery==3.24.0 google-auth-oauthlib==1.2.0 diff --git a/packages/google-cloud-bigquery/samples/geography/requirements-test.txt b/packages/google-cloud-bigquery/samples/geography/requirements-test.txt index 64d436dcfd69..b35a54a76581 100644 --- a/packages/google-cloud-bigquery/samples/geography/requirements-test.txt +++ b/packages/google-cloud-bigquery/samples/geography/requirements-test.txt @@ -1,3 +1,3 @@ pytest===7.4.4; python_version == '3.7' -pytest==8.2.1; python_version >= '3.8' +pytest==8.2.2; python_version >= '3.8' mock==5.1.0 diff --git a/packages/google-cloud-bigquery/samples/geography/requirements.txt b/packages/google-cloud-bigquery/samples/geography/requirements.txt index becaaf50ac44..e7c59ce4b5b5 100644 --- a/packages/google-cloud-bigquery/samples/geography/requirements.txt +++ b/packages/google-cloud-bigquery/samples/geography/requirements.txt @@ -1,5 +1,5 @@ attrs==23.2.0 -certifi==2024.2.2 +certifi==2024.6.2 cffi===1.15.1; python_version == '3.7' cffi==1.16.0; python_version >= '3.8' charset-normalizer==3.3.2 @@ -14,15 +14,15 @@ geopandas===0.10.2; python_version == '3.7' geopandas===0.13.2; python_version == '3.8' geopandas==0.14.4; python_version >= '3.9' google-api-core==2.19.0 -google-auth==2.29.0 -google-cloud-bigquery==3.23.1 +google-auth==2.30.0 +google-cloud-bigquery==3.24.0 google-cloud-bigquery-storage==2.25.0 google-cloud-core==2.4.1 google-crc32c==1.5.0 google-resumable-media==2.7.0 -googleapis-common-protos==1.63.0 +googleapis-common-protos==1.63.1 grpcio===1.62.2; python_version == '3.7' -grpcio==1.64.0; python_version >= '3.8' +grpcio==1.64.1; python_version >= '3.8' idna==3.7 munch==4.0.0 mypy-extensions==1.0.0 @@ -49,7 +49,7 @@ rsa==4.9 Shapely==2.0.4 six==1.16.0 typing-extensions===4.7.1; python_version == '3.7' -typing-extensions==4.12.0; python_version >= '3.8' +typing-extensions==4.12.1; python_version >= '3.8' typing-inspect==0.9.0 urllib3===1.26.18; python_version == '3.7' urllib3==2.2.1; python_version >= '3.8' diff --git a/packages/google-cloud-bigquery/samples/magics/requirements-test.txt b/packages/google-cloud-bigquery/samples/magics/requirements-test.txt index a6c397822790..8f0bfaad4ca5 100644 --- a/packages/google-cloud-bigquery/samples/magics/requirements-test.txt +++ b/packages/google-cloud-bigquery/samples/magics/requirements-test.txt @@ -1,4 +1,4 @@ google-cloud-testutils==1.4.0 pytest===7.4.4; python_version == '3.7' -pytest==8.2.1; python_version >= '3.8' +pytest==8.2.2; python_version >= '3.8' mock==5.1.0 diff --git a/packages/google-cloud-bigquery/samples/magics/requirements.txt b/packages/google-cloud-bigquery/samples/magics/requirements.txt index e3a225b79130..00f0b15d0428 100644 --- a/packages/google-cloud-bigquery/samples/magics/requirements.txt +++ b/packages/google-cloud-bigquery/samples/magics/requirements.txt @@ -1,5 +1,5 @@ db-dtypes==1.2.0 -google.cloud.bigquery==3.23.1 +google.cloud.bigquery==3.24.0 google-cloud-bigquery-storage==2.25.0 ipython===7.31.1; python_version == '3.7' ipython===8.0.1; python_version == '3.8' diff --git a/packages/google-cloud-bigquery/samples/notebooks/requirements-test.txt b/packages/google-cloud-bigquery/samples/notebooks/requirements-test.txt index a6c397822790..8f0bfaad4ca5 100644 --- a/packages/google-cloud-bigquery/samples/notebooks/requirements-test.txt +++ b/packages/google-cloud-bigquery/samples/notebooks/requirements-test.txt @@ -1,4 +1,4 @@ google-cloud-testutils==1.4.0 pytest===7.4.4; python_version == '3.7' -pytest==8.2.1; python_version >= '3.8' +pytest==8.2.2; python_version >= '3.8' mock==5.1.0 diff --git a/packages/google-cloud-bigquery/samples/notebooks/requirements.txt b/packages/google-cloud-bigquery/samples/notebooks/requirements.txt index f774ea183e71..91a4a87e63b6 100644 --- a/packages/google-cloud-bigquery/samples/notebooks/requirements.txt +++ b/packages/google-cloud-bigquery/samples/notebooks/requirements.txt @@ -1,5 +1,5 @@ db-dtypes==1.2.0 -google-cloud-bigquery==3.23.1 +google-cloud-bigquery==3.24.0 google-cloud-bigquery-storage==2.25.0 ipython===7.31.1; python_version == '3.7' ipython===8.0.1; python_version == '3.8' diff --git a/packages/google-cloud-bigquery/samples/snippets/requirements-test.txt b/packages/google-cloud-bigquery/samples/snippets/requirements-test.txt index bd1ba50284da..b65023b00fac 100644 --- a/packages/google-cloud-bigquery/samples/snippets/requirements-test.txt +++ b/packages/google-cloud-bigquery/samples/snippets/requirements-test.txt @@ -1,5 +1,5 @@ # samples/snippets should be runnable with no "extras" google-cloud-testutils==1.4.0 pytest===7.4.4; python_version == '3.7' -pytest==8.2.1; python_version >= '3.8' +pytest==8.2.2; python_version >= '3.8' mock==5.1.0 diff --git a/packages/google-cloud-bigquery/samples/snippets/requirements.txt b/packages/google-cloud-bigquery/samples/snippets/requirements.txt index 89fe1638783b..054fa2658a08 100644 --- a/packages/google-cloud-bigquery/samples/snippets/requirements.txt +++ b/packages/google-cloud-bigquery/samples/snippets/requirements.txt @@ -1,2 +1,2 @@ # samples/snippets should be runnable with no "extras" -google-cloud-bigquery==3.23.1 +google-cloud-bigquery==3.24.0 From 69892da9daadd4b18459ba22628c9013b4b0da8c Mon Sep 17 00:00:00 2001 From: Mend Renovate Date: Thu, 13 Jun 2024 22:34:45 +0200 Subject: [PATCH 1809/2016] chore(deps): update all dependencies (#1954) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * chore(deps): update all dependencies * 🦉 Updates from OwlBot post-processor See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md * Update samples/geography/requirements.txt * Update samples/geography/requirements.txt --------- Co-authored-by: Owl Bot Co-authored-by: Chalmer Lowe --- .../samples/geography/requirements.txt | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/packages/google-cloud-bigquery/samples/geography/requirements.txt b/packages/google-cloud-bigquery/samples/geography/requirements.txt index e7c59ce4b5b5..2b3e4713e7d8 100644 --- a/packages/google-cloud-bigquery/samples/geography/requirements.txt +++ b/packages/google-cloud-bigquery/samples/geography/requirements.txt @@ -19,14 +19,15 @@ google-cloud-bigquery==3.24.0 google-cloud-bigquery-storage==2.25.0 google-cloud-core==2.4.1 google-crc32c==1.5.0 -google-resumable-media==2.7.0 +google-resumable-media==2.7.1 googleapis-common-protos==1.63.1 grpcio===1.62.2; python_version == '3.7' grpcio==1.64.1; python_version >= '3.8' idna==3.7 munch==4.0.0 mypy-extensions==1.0.0 -packaging==24.0 +packaging===24.0; python_version == '3.7' +packaging==24.1; python_version >= '3.8' pandas===1.3.5; python_version == '3.7' pandas===2.0.3; python_version == '3.8' pandas==2.2.2; python_version >= '3.9' @@ -49,7 +50,7 @@ rsa==4.9 Shapely==2.0.4 six==1.16.0 typing-extensions===4.7.1; python_version == '3.7' -typing-extensions==4.12.1; python_version >= '3.8' +typing-extensions==4.12.2; python_version >= '3.8' typing-inspect==0.9.0 urllib3===1.26.18; python_version == '3.7' urllib3==2.2.1; python_version >= '3.8' From 212668e3555466a1c9bcfe9e3bb6c3b75f894708 Mon Sep 17 00:00:00 2001 From: Lingqing Gan Date: Fri, 14 Jun 2024 10:22:49 -0700 Subject: [PATCH 1810/2016] feat: support load job option ColumnNameCharacterMap (#1952) * feat: support load job option ColumnNameCharacterMap * add unit test --- .../google/cloud/bigquery/job/load.py | 41 +++++++++++++++++++ .../tests/unit/job/test_load_config.py | 39 ++++++++++++++++++ 2 files changed, 80 insertions(+) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/job/load.py b/packages/google-cloud-bigquery/google/cloud/bigquery/job/load.py index 1764354562bb..e56ce16f04a5 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/job/load.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/job/load.py @@ -32,6 +32,26 @@ from google.cloud.bigquery.query import ConnectionProperty +class ColumnNameCharacterMap: + """Indicates the character map used for column names. + + https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#columnnamecharactermap + """ + + COLUMN_NAME_CHARACTER_MAP_UNSPECIFIED = "COLUMN_NAME_CHARACTER_MAP_UNSPECIFIED" + """Unspecified column name character map.""" + + STRICT = "STRICT" + """Support flexible column name and reject invalid column names.""" + + V1 = "V1" + """ Support alphanumeric + underscore characters and names must start with + a letter or underscore. Invalid column names will be normalized.""" + + V2 = "V2" + """Support flexible column name. Invalid column names will be normalized.""" + + class LoadJobConfig(_JobConfig): """Configuration options for load jobs. @@ -597,6 +617,27 @@ def parquet_options(self, value): else: self._del_sub_prop("parquetOptions") + @property + def column_name_character_map(self) -> str: + """Optional[google.cloud.bigquery.job.ColumnNameCharacterMap]: + Character map supported for column names in CSV/Parquet loads. Defaults + to STRICT and can be overridden by Project Config Service. Using this + option with unsupported load formats will result in an error. + + See + https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationLoad.FIELDS.column_name_character_map + """ + return self._get_sub_prop( + "columnNameCharacterMap", + ColumnNameCharacterMap.COLUMN_NAME_CHARACTER_MAP_UNSPECIFIED, + ) + + @column_name_character_map.setter + def column_name_character_map(self, value: Optional[str]): + if value is None: + value = ColumnNameCharacterMap.COLUMN_NAME_CHARACTER_MAP_UNSPECIFIED + self._set_sub_prop("columnNameCharacterMap", value) + class LoadJob(_AsyncJob): """Asynchronous job for loading data into a table. diff --git a/packages/google-cloud-bigquery/tests/unit/job/test_load_config.py b/packages/google-cloud-bigquery/tests/unit/job/test_load_config.py index e1fa2641fe0f..becf3e959b79 100644 --- a/packages/google-cloud-bigquery/tests/unit/job/test_load_config.py +++ b/packages/google-cloud-bigquery/tests/unit/job/test_load_config.py @@ -843,3 +843,42 @@ def test_parquet_options_setter_clearing(self): config.parquet_options = None self.assertNotIn("parquetOptions", config._properties["load"]) + + def test_column_name_character_map_missing(self): + from google.cloud.bigquery.job.load import ColumnNameCharacterMap + + config = self._get_target_class()() + self.assertEqual( + config.column_name_character_map, + ColumnNameCharacterMap.COLUMN_NAME_CHARACTER_MAP_UNSPECIFIED, + ) + + def test_column_name_character_map_hit(self): + from google.cloud.bigquery.job.load import ColumnNameCharacterMap + + config = self._get_target_class()() + config._properties["load"]["columnNameCharacterMap"] = "STRICT" + self.assertEqual( + config.column_name_character_map, + ColumnNameCharacterMap.STRICT, + ) + + def test_column_name_character_map_setter(self): + from google.cloud.bigquery.job.load import ColumnNameCharacterMap + + config = self._get_target_class()() + config.column_name_character_map = "V1" + self.assertEqual( + config._properties["load"]["columnNameCharacterMap"], + ColumnNameCharacterMap.V1, + ) + + def test_column_name_character_map_none(self): + from google.cloud.bigquery.job.load import ColumnNameCharacterMap + + config = self._get_target_class()() + config.column_name_character_map = None + self.assertEqual( + config._properties["load"]["columnNameCharacterMap"], + ColumnNameCharacterMap.COLUMN_NAME_CHARACTER_MAP_UNSPECIFIED, + ) From 25b054cfdb357f392f9fee7540aefe370485915a Mon Sep 17 00:00:00 2001 From: Lingqing Gan Date: Mon, 17 Jun 2024 10:20:56 -0700 Subject: [PATCH 1811/2016] fix: do not overwrite page_size with max_results when start_index is set (#1956) * fix: do not overwrite page_size with max_results when start_index is set * update test --- .../google/cloud/bigquery/job/query.py | 5 +++-- .../tests/unit/job/test_query.py | 15 ++++++++++++++- 2 files changed, 17 insertions(+), 3 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/job/query.py b/packages/google-cloud-bigquery/google/cloud/bigquery/job/query.py index 8049b748e0a1..4ea5687e0bb2 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/job/query.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/job/query.py @@ -1532,8 +1532,9 @@ def result( # type: ignore # (incompatible with supertype) # Setting max_results should be equivalent to setting page_size with # regards to allowing the user to tune how many results to download # while we wait for the query to finish. See internal issue: - # 344008814. - if page_size is None and max_results is not None: + # 344008814. But if start_index is set, user is trying to access a + # specific page, so we don't need to set page_size. See issue #1950. + if page_size is None and max_results is not None and start_index is None: page_size = max_results # When timeout has default sentinel value ``object()``, do not pass diff --git a/packages/google-cloud-bigquery/tests/unit/job/test_query.py b/packages/google-cloud-bigquery/tests/unit/job/test_query.py index 5b69c98cf7f2..4bbd31c7307a 100644 --- a/packages/google-cloud-bigquery/tests/unit/job/test_query.py +++ b/packages/google-cloud-bigquery/tests/unit/job/test_query.py @@ -1652,7 +1652,17 @@ def test_result_with_start_index(self): start_index = 1 - result = job.result(start_index=start_index) + # Verifies that page_size isn't overwritten by max_results when + # start_index is not None. See + # https://github.com/googleapis/python-bigquery/issues/1950 + page_size = 10 + max_results = 100 + + result = job.result( + page_size=page_size, + max_results=max_results, + start_index=start_index, + ) self.assertIsInstance(result, RowIterator) self.assertEqual(result.total_rows, 5) @@ -1665,6 +1675,9 @@ def test_result_with_start_index(self): self.assertEqual( tabledata_list_request[1]["query_params"]["startIndex"], start_index ) + self.assertEqual( + tabledata_list_request[1]["query_params"]["maxResults"], page_size + ) def test_result_error(self): from google.cloud import exceptions From e6812c3cfa4697e34dce01d19ee0b9560e1493cc Mon Sep 17 00:00:00 2001 From: "release-please[bot]" <55107282+release-please[bot]@users.noreply.github.com> Date: Thu, 20 Jun 2024 10:05:28 -0700 Subject: [PATCH 1812/2016] chore(main): release 3.25.0 (#1947) Co-authored-by: release-please[bot] <55107282+release-please[bot]@users.noreply.github.com> --- packages/google-cloud-bigquery/CHANGELOG.md | 13 +++++++++++++ .../google/cloud/bigquery/version.py | 2 +- 2 files changed, 14 insertions(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/CHANGELOG.md b/packages/google-cloud-bigquery/CHANGELOG.md index c24725befbaa..4a089b8b482f 100644 --- a/packages/google-cloud-bigquery/CHANGELOG.md +++ b/packages/google-cloud-bigquery/CHANGELOG.md @@ -5,6 +5,19 @@ [1]: https://pypi.org/project/google-cloud-bigquery/#history +## [3.25.0](https://github.com/googleapis/python-bigquery/compare/v3.24.0...v3.25.0) (2024-06-17) + + +### Features + +* Add prefer_bqstorage_client option for Connection ([#1945](https://github.com/googleapis/python-bigquery/issues/1945)) ([bfdeb3f](https://github.com/googleapis/python-bigquery/commit/bfdeb3fdbc1d5b26fcd3d1433abfb0be49d12018)) +* Support load job option ColumnNameCharacterMap ([#1952](https://github.com/googleapis/python-bigquery/issues/1952)) ([7e522ee](https://github.com/googleapis/python-bigquery/commit/7e522eea776cd9a74f8078c4236f63d5ff11f20e)) + + +### Bug Fixes + +* Do not overwrite page_size with max_results when start_index is set ([#1956](https://github.com/googleapis/python-bigquery/issues/1956)) ([7d0fcee](https://github.com/googleapis/python-bigquery/commit/7d0fceefdf28278c1f2cdaab571de9b235320998)) + ## [3.24.0](https://github.com/googleapis/python-bigquery/compare/v3.23.1...v3.24.0) (2024-06-04) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/version.py b/packages/google-cloud-bigquery/google/cloud/bigquery/version.py index 79c15cf239bd..fed077e267bf 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/version.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/version.py @@ -12,4 +12,4 @@ # See the License for the specific language governing permissions and # limitations under the License. -__version__ = "3.24.0" +__version__ = "3.25.0" From e4e656217fa2a8bef836782f77162630f8c49236 Mon Sep 17 00:00:00 2001 From: Mend Renovate Date: Thu, 20 Jun 2024 20:18:34 +0200 Subject: [PATCH 1813/2016] chore(deps): update dependency urllib3 to v2.2.2 [security] (#1959) Co-authored-by: Lingqing Gan --- .../google-cloud-bigquery/samples/geography/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/samples/geography/requirements.txt b/packages/google-cloud-bigquery/samples/geography/requirements.txt index 2b3e4713e7d8..b6cc8912eac1 100644 --- a/packages/google-cloud-bigquery/samples/geography/requirements.txt +++ b/packages/google-cloud-bigquery/samples/geography/requirements.txt @@ -53,4 +53,4 @@ typing-extensions===4.7.1; python_version == '3.7' typing-extensions==4.12.2; python_version >= '3.8' typing-inspect==0.9.0 urllib3===1.26.18; python_version == '3.7' -urllib3==2.2.1; python_version >= '3.8' +urllib3==2.2.2; python_version >= '3.8' From 15af5f23e2492e90d2d38634d219bbef11ceb2b0 Mon Sep 17 00:00:00 2001 From: Mend Renovate Date: Sat, 22 Jun 2024 01:47:26 +0200 Subject: [PATCH 1814/2016] chore(deps): update all dependencies (#1955) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * chore(deps): update all dependencies * 🦉 Updates from OwlBot post-processor See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md --------- Co-authored-by: Owl Bot --- .../google-cloud-bigquery/samples/desktopapp/requirements.txt | 2 +- .../google-cloud-bigquery/samples/geography/requirements.txt | 4 ++-- .../google-cloud-bigquery/samples/magics/requirements.txt | 2 +- .../google-cloud-bigquery/samples/notebooks/requirements.txt | 2 +- .../google-cloud-bigquery/samples/snippets/requirements.txt | 2 +- 5 files changed, 6 insertions(+), 6 deletions(-) diff --git a/packages/google-cloud-bigquery/samples/desktopapp/requirements.txt b/packages/google-cloud-bigquery/samples/desktopapp/requirements.txt index 25ed0977b148..6a3d17c6f6a2 100644 --- a/packages/google-cloud-bigquery/samples/desktopapp/requirements.txt +++ b/packages/google-cloud-bigquery/samples/desktopapp/requirements.txt @@ -1,2 +1,2 @@ -google-cloud-bigquery==3.24.0 +google-cloud-bigquery==3.25.0 google-auth-oauthlib==1.2.0 diff --git a/packages/google-cloud-bigquery/samples/geography/requirements.txt b/packages/google-cloud-bigquery/samples/geography/requirements.txt index b6cc8912eac1..fd943cc83f4f 100644 --- a/packages/google-cloud-bigquery/samples/geography/requirements.txt +++ b/packages/google-cloud-bigquery/samples/geography/requirements.txt @@ -15,7 +15,7 @@ geopandas===0.13.2; python_version == '3.8' geopandas==0.14.4; python_version >= '3.9' google-api-core==2.19.0 google-auth==2.30.0 -google-cloud-bigquery==3.24.0 +google-cloud-bigquery==3.25.0 google-cloud-bigquery-storage==2.25.0 google-cloud-core==2.4.1 google-crc32c==1.5.0 @@ -31,7 +31,7 @@ packaging==24.1; python_version >= '3.8' pandas===1.3.5; python_version == '3.7' pandas===2.0.3; python_version == '3.8' pandas==2.2.2; python_version >= '3.9' -proto-plus==1.23.0 +proto-plus==1.24.0 pyarrow==12.0.1; python_version == '3.7' pyarrow==16.1.0; python_version >= '3.8' pyasn1===0.5.1; python_version == '3.7' diff --git a/packages/google-cloud-bigquery/samples/magics/requirements.txt b/packages/google-cloud-bigquery/samples/magics/requirements.txt index 00f0b15d0428..a5b4e2aaafdd 100644 --- a/packages/google-cloud-bigquery/samples/magics/requirements.txt +++ b/packages/google-cloud-bigquery/samples/magics/requirements.txt @@ -1,5 +1,5 @@ db-dtypes==1.2.0 -google.cloud.bigquery==3.24.0 +google.cloud.bigquery==3.25.0 google-cloud-bigquery-storage==2.25.0 ipython===7.31.1; python_version == '3.7' ipython===8.0.1; python_version == '3.8' diff --git a/packages/google-cloud-bigquery/samples/notebooks/requirements.txt b/packages/google-cloud-bigquery/samples/notebooks/requirements.txt index 91a4a87e63b6..78945d28b8dc 100644 --- a/packages/google-cloud-bigquery/samples/notebooks/requirements.txt +++ b/packages/google-cloud-bigquery/samples/notebooks/requirements.txt @@ -1,5 +1,5 @@ db-dtypes==1.2.0 -google-cloud-bigquery==3.24.0 +google-cloud-bigquery==3.25.0 google-cloud-bigquery-storage==2.25.0 ipython===7.31.1; python_version == '3.7' ipython===8.0.1; python_version == '3.8' diff --git a/packages/google-cloud-bigquery/samples/snippets/requirements.txt b/packages/google-cloud-bigquery/samples/snippets/requirements.txt index 054fa2658a08..9e181d9632f3 100644 --- a/packages/google-cloud-bigquery/samples/snippets/requirements.txt +++ b/packages/google-cloud-bigquery/samples/snippets/requirements.txt @@ -1,2 +1,2 @@ # samples/snippets should be runnable with no "extras" -google-cloud-bigquery==3.24.0 +google-cloud-bigquery==3.25.0 From eda36e3c01f10224004df93b66d60d5df8afcdf6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tim=20Swe=C3=B1a=20=28Swast=29?= Date: Tue, 9 Jul 2024 15:07:44 -0500 Subject: [PATCH 1815/2016] chore: remove references to conda (#1971) --- packages/google-cloud-bigquery/noxfile.py | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/packages/google-cloud-bigquery/noxfile.py b/packages/google-cloud-bigquery/noxfile.py index 5f88e46a0215..a2df2e094715 100644 --- a/packages/google-cloud-bigquery/noxfile.py +++ b/packages/google-cloud-bigquery/noxfile.py @@ -339,14 +339,6 @@ def prerelease_deps(session): "--upgrade", "pyarrow", ) - session.install( - "--extra-index-url", - "https://pypi.anaconda.org/scipy-wheels-nightly/simple", - "--prefer-binary", - "--pre", - "--upgrade", - "pandas", - ) session.install( "--pre", "--upgrade", @@ -355,6 +347,7 @@ def prerelease_deps(session): "ipywidgets", "tqdm", "git+https://github.com/pypa/packaging.git", + "pandas", ) session.install( From 1378326e9b378f483cdc796804aea8b6e212f6f4 Mon Sep 17 00:00:00 2001 From: Mend Renovate Date: Tue, 9 Jul 2024 22:49:22 +0200 Subject: [PATCH 1816/2016] chore(deps): update dependency certifi to v2024.7.4 [security] (#1968) Co-authored-by: Lingqing Gan --- .../google-cloud-bigquery/samples/geography/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/samples/geography/requirements.txt b/packages/google-cloud-bigquery/samples/geography/requirements.txt index fd943cc83f4f..add61f286362 100644 --- a/packages/google-cloud-bigquery/samples/geography/requirements.txt +++ b/packages/google-cloud-bigquery/samples/geography/requirements.txt @@ -1,5 +1,5 @@ attrs==23.2.0 -certifi==2024.6.2 +certifi==2024.7.4 cffi===1.15.1; python_version == '3.7' cffi==1.16.0; python_version >= '3.8' charset-normalizer==3.3.2 From bb75006f78ae682b0ebc8eb109b0ceba3a4c9c07 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tim=20Swe=C3=B1a=20=28Swast=29?= Date: Wed, 10 Jul 2024 13:20:06 -0500 Subject: [PATCH 1817/2016] deps: bump min version of google-api-core and google-cloud-core to 2.x (#1972) Fixes constraints file to match setup.py --- packages/google-cloud-bigquery/setup.py | 16 +++++----------- .../testing/constraints-3.7.txt | 6 +++--- 2 files changed, 8 insertions(+), 14 deletions(-) diff --git a/packages/google-cloud-bigquery/setup.py b/packages/google-cloud-bigquery/setup.py index ed9a6351bf61..db8e061134da 100644 --- a/packages/google-cloud-bigquery/setup.py +++ b/packages/google-cloud-bigquery/setup.py @@ -29,18 +29,12 @@ # 'Development Status :: 5 - Production/Stable' release_status = "Development Status :: 5 - Production/Stable" dependencies = [ - # NOTE: Maintainers, please do not require google-api-core>=2.x.x - # Until this issue is closed - # https://github.com/googleapis/google-cloud-python/issues/10566 - "google-api-core[grpc] >= 1.34.1, <3.0.0dev,!=2.0.*,!=2.1.*,!=2.2.*,!=2.3.*,!=2.4.*,!=2.5.*,!=2.6.*,!=2.7.*,!=2.8.*,!=2.9.*,!=2.10.*", + "google-api-core[grpc] >= 2.11.1, <3.0.0dev", "google-auth >= 2.14.1, <3.0.0dev", - # NOTE: Maintainers, please do not require google-cloud-core>=2.x.x - # Until this issue is closed - # https://github.com/googleapis/google-cloud-python/issues/10566 - "google-cloud-core >= 1.6.0, <3.0.0dev", - "google-resumable-media >= 0.6.0, < 3.0dev", + "google-cloud-core >= 2.4.1, <3.0.0dev", + "google-resumable-media >= 2.0.0, < 3.0dev", "packaging >= 20.0.0", - "python-dateutil >= 2.7.2, <3.0dev", + "python-dateutil >= 2.7.3, <3.0dev", "requests >= 2.21.0, < 3.0.0dev", ] pyarrow_dependency = "pyarrow >= 3.0.0" @@ -82,7 +76,7 @@ "opentelemetry-instrumentation >= 0.20b0", ], "bigquery_v2": [ - "proto-plus >= 1.15.0, <2.0.0dev", + "proto-plus >= 1.22.0, <2.0.0dev", "protobuf>=3.19.5,<5.0.0dev,!=3.20.0,!=3.20.1,!=4.21.0,!=4.21.1,!=4.21.2,!=4.21.3,!=4.21.4,!=4.21.5", # For the legacy proto-based types. ], } diff --git a/packages/google-cloud-bigquery/testing/constraints-3.7.txt b/packages/google-cloud-bigquery/testing/constraints-3.7.txt index d64e06cc3953..c09978d5dc7f 100644 --- a/packages/google-cloud-bigquery/testing/constraints-3.7.txt +++ b/packages/google-cloud-bigquery/testing/constraints-3.7.txt @@ -7,13 +7,13 @@ # Then this file should have foo==1.14.0 db-dtypes==0.3.0 geopandas==0.9.0 -google-api-core==2.17.1 -google-auth==2.28.1 +google-api-core==2.11.1 +google-auth==2.14.1 google-cloud-bigquery-storage==2.24.0 google-cloud-core==2.4.1 google-cloud-testutils==1.4.0 google-crc32c==1.5.0 -google-resumable-media==2.7.0 +google-resumable-media==2.0.0 googleapis-common-protos==1.62.0 grpcio==1.47.0 grpcio-status==1.47.0 From 80aa9e3c41ac03c050ca42eabddb4a7a8a1ff3d7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tim=20Swe=C3=B1a=20=28Swast=29?= Date: Wed, 10 Jul 2024 14:02:08 -0500 Subject: [PATCH 1818/2016] feat: use `bigquery-magics` package for the `%%bigquery` magic (#1965) * feat: use `bigquery-magics` package for the `%%bigquery` magic * ignore types on bigquery-magics package * Update samples/magics/noxfile_config.py Co-authored-by: Chalmer Lowe --------- Co-authored-by: Chalmer Lowe --- .../google/cloud/bigquery/__init__.py | 22 +- .../google/cloud/bigquery/magics/magics.py | 81 +---- .../samples/magics/noxfile_config.py | 37 +++ .../samples/magics/query.py | 2 +- .../samples/magics/query_params_scalars.py | 2 +- .../samples/magics/requirements.txt | 1 + .../notebooks/jupyter_tutorial_test.py | 2 +- .../samples/notebooks/requirements.txt | 1 + packages/google-cloud-bigquery/setup.py | 3 +- .../testing/constraints-3.7.txt | 1 + .../tests/system/test_magics.py | 5 +- .../tests/unit/test_magics.py | 291 +++++++++++------- 12 files changed, 262 insertions(+), 186 deletions(-) create mode 100644 packages/google-cloud-bigquery/samples/magics/noxfile_config.py diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/__init__.py b/packages/google-cloud-bigquery/google/cloud/bigquery/__init__.py index caf81d9aa467..e80907ec93e4 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/__init__.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/__init__.py @@ -27,6 +27,7 @@ - :class:`~google.cloud.bigquery.table.Table` represents a single "relation". """ +import warnings from google.cloud.bigquery import version as bigquery_version @@ -114,6 +115,11 @@ from google.cloud.bigquery.table import TimePartitioning from google.cloud.bigquery.encryption_configuration import EncryptionConfiguration +try: + import bigquery_magics # type: ignore +except ImportError: + bigquery_magics = None + __all__ = [ "__version__", "Client", @@ -214,8 +220,16 @@ def load_ipython_extension(ipython): """Called by IPython when this module is loaded as an IPython extension.""" - from google.cloud.bigquery.magics.magics import _cell_magic - - ipython.register_magic_function( - _cell_magic, magic_kind="cell", magic_name="bigquery" + warnings.warn( + "%load_ext google.cloud.bigquery is deprecated. Install bigquery-magics package and use `%load_ext bigquery_magics`, instead.", + category=FutureWarning, ) + + if bigquery_magics is not None: + bigquery_magics.load_ipython_extension(ipython) + else: + from google.cloud.bigquery.magics.magics import _cell_magic + + ipython.register_magic_function( + _cell_magic, magic_kind="cell", magic_name="bigquery" + ) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/magics/magics.py b/packages/google-cloud-bigquery/google/cloud/bigquery/magics/magics.py index 6e6b21965eb7..b153d959a0bd 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/magics/magics.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/magics/magics.py @@ -14,70 +14,11 @@ """IPython Magics -.. function:: %%bigquery - - IPython cell magic to run a query and display the result as a DataFrame - - .. code-block:: python - - %%bigquery [] [--project ] [--use_legacy_sql] - [--verbose] [--params ] - - - Parameters: - - * ```` (Optional[line argument]): - variable to store the query results. The results are not displayed if - this parameter is used. If an error occurs during the query execution, - the corresponding ``QueryJob`` instance (if available) is stored in - the variable instead. - * ``--destination_table`` (Optional[line argument]): - A dataset and table to store the query results. If table does not exists, - it will be created. If table already exists, its data will be overwritten. - Variable should be in a format .. - * ``--no_query_cache`` (Optional[line argument]): - Do not use cached query results. - * ``--project `` (Optional[line argument]): - Project to use for running the query. Defaults to the context - :attr:`~google.cloud.bigquery.magics.Context.project`. - * ``--use_bqstorage_api`` (Optional[line argument]): - [Deprecated] Not used anymore, as BigQuery Storage API is used by default. - * ``--use_rest_api`` (Optional[line argument]): - Use the BigQuery REST API instead of the Storage API. - * ``--use_legacy_sql`` (Optional[line argument]): - Runs the query using Legacy SQL syntax. Defaults to Standard SQL if - this argument not used. - * ``--verbose`` (Optional[line argument]): - If this flag is used, information including the query job ID and the - amount of time for the query to complete will not be cleared after the - query is finished. By default, this information will be displayed but - will be cleared after the query is finished. - * ``--params `` (Optional[line argument]): - If present, the argument following the ``--params`` flag must be - either: - - * :class:`str` - A JSON string representation of a dictionary in the - format ``{"param_name": "param_value"}`` (ex. ``{"num": 17}``). Use - of the parameter in the query should be indicated with - ``@param_name``. See ``In[5]`` in the Examples section below. - - * :class:`dict` reference - A reference to a ``dict`` in the format - ``{"param_name": "param_value"}``, where the value types must be JSON - serializable. The variable reference is indicated by a ``$`` before - the variable name (ex. ``$my_dict_var``). See ``In[6]`` and ``In[7]`` - in the Examples section below. - - * ```` (required, cell argument): - SQL query to run. If the query does not contain any whitespace (aside - from leading and trailing whitespace), it is assumed to represent a - fully-qualified table ID, and the latter's data will be fetched. +Install ``bigquery-magics`` and call ``%load_ext bigquery_magics`` to use the +``%%bigquery`` cell magic. - Returns: - A :class:`pandas.DataFrame` with the query results. - - .. note:: - All queries run using this magic will run using the context - :attr:`~google.cloud.bigquery.magics.Context.credentials`. +See the `BigQuery Magics reference documentation +`_. """ from __future__ import print_function @@ -109,6 +50,11 @@ from google.cloud.bigquery.dbapi import _helpers from google.cloud.bigquery.magics import line_arg_parser as lap +try: + import bigquery_magics # type: ignore +except ImportError: + bigquery_magics = None + IPYTHON_USER_AGENT = "ipython-{}".format(IPython.__version__) @@ -280,7 +226,14 @@ def progress_bar_type(self, value): self._progress_bar_type = value -context = Context() +# If bigquery_magics is available, we load that extension rather than this one. +# Ensure google.cloud.bigquery.magics.context setters are on the correct magics +# implementation in case the user has installed the package but hasn't updated +# their code. +if bigquery_magics is not None: + context = bigquery_magics.context +else: + context = Context() def _handle_error(error, destination_var=None): diff --git a/packages/google-cloud-bigquery/samples/magics/noxfile_config.py b/packages/google-cloud-bigquery/samples/magics/noxfile_config.py new file mode 100644 index 000000000000..982751b8bfd1 --- /dev/null +++ b/packages/google-cloud-bigquery/samples/magics/noxfile_config.py @@ -0,0 +1,37 @@ +# Copyright 2020 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Default TEST_CONFIG_OVERRIDE for python repos. + +# You can copy this file into your directory, then it will be inported from +# the noxfile.py. + +# The source of truth: +# https://github.com/GoogleCloudPlatform/python-docs-samples/blob/master/noxfile_config.py + +TEST_CONFIG_OVERRIDE = { + # You can opt out from the test for specific Python versions. + "ignored_versions": [ + "2.7", + ], + # An envvar key for determining the project id to use. Change it + # to 'BUILD_SPECIFIC_GCLOUD_PROJECT' if you want to opt in using a + # build specific Cloud project. You can also use your own string + # to use your own Cloud project. + "gcloud_project_env": "GOOGLE_CLOUD_PROJECT", + # "gcloud_project_env": "BUILD_SPECIFIC_GCLOUD_PROJECT", + # A dictionary you want to inject into your test. Don't put any + # secrets here. These values will override predefined values. + "envs": {}, +} diff --git a/packages/google-cloud-bigquery/samples/magics/query.py b/packages/google-cloud-bigquery/samples/magics/query.py index 4d3b4418bb5a..0ac947db0d09 100644 --- a/packages/google-cloud-bigquery/samples/magics/query.py +++ b/packages/google-cloud-bigquery/samples/magics/query.py @@ -24,7 +24,7 @@ def query() -> "pandas.DataFrame": ip = IPython.get_ipython() - ip.extension_manager.load_extension("google.cloud.bigquery") + ip.extension_manager.load_extension("bigquery_magics") sample = """ # [START bigquery_jupyter_query] diff --git a/packages/google-cloud-bigquery/samples/magics/query_params_scalars.py b/packages/google-cloud-bigquery/samples/magics/query_params_scalars.py index e833ef93b2f6..74f665acbb8b 100644 --- a/packages/google-cloud-bigquery/samples/magics/query_params_scalars.py +++ b/packages/google-cloud-bigquery/samples/magics/query_params_scalars.py @@ -24,7 +24,7 @@ def query_with_parameters() -> "pandas.DataFrame": ip = IPython.get_ipython() - ip.extension_manager.load_extension("google.cloud.bigquery") + ip.extension_manager.load_extension("bigquery_magics") sample = """ # [START bigquery_jupyter_query_params_scalars] diff --git a/packages/google-cloud-bigquery/samples/magics/requirements.txt b/packages/google-cloud-bigquery/samples/magics/requirements.txt index a5b4e2aaafdd..a1044c2310e8 100644 --- a/packages/google-cloud-bigquery/samples/magics/requirements.txt +++ b/packages/google-cloud-bigquery/samples/magics/requirements.txt @@ -1,3 +1,4 @@ +bigquery_magics==0.1.0 db-dtypes==1.2.0 google.cloud.bigquery==3.25.0 google-cloud-bigquery-storage==2.25.0 diff --git a/packages/google-cloud-bigquery/samples/notebooks/jupyter_tutorial_test.py b/packages/google-cloud-bigquery/samples/notebooks/jupyter_tutorial_test.py index 9d42a4eda7b6..2c2cf9390f87 100644 --- a/packages/google-cloud-bigquery/samples/notebooks/jupyter_tutorial_test.py +++ b/packages/google-cloud-bigquery/samples/notebooks/jupyter_tutorial_test.py @@ -60,7 +60,7 @@ def _strip_region_tags(sample_text: str) -> str: def test_jupyter_tutorial(ipython: "TerminalInteractiveShell") -> None: matplotlib.use("agg") ip = IPython.get_ipython() - ip.extension_manager.load_extension("google.cloud.bigquery") + ip.extension_manager.load_extension("bigquery_magics") sample = """ # [START bigquery_jupyter_magic_gender_by_year] diff --git a/packages/google-cloud-bigquery/samples/notebooks/requirements.txt b/packages/google-cloud-bigquery/samples/notebooks/requirements.txt index 78945d28b8dc..3896a2aeca35 100644 --- a/packages/google-cloud-bigquery/samples/notebooks/requirements.txt +++ b/packages/google-cloud-bigquery/samples/notebooks/requirements.txt @@ -1,3 +1,4 @@ +bigquery-magics==0.1.0 db-dtypes==1.2.0 google-cloud-bigquery==3.25.0 google-cloud-bigquery-storage==2.25.0 diff --git a/packages/google-cloud-bigquery/setup.py b/packages/google-cloud-bigquery/setup.py index db8e061134da..9641fe695b97 100644 --- a/packages/google-cloud-bigquery/setup.py +++ b/packages/google-cloud-bigquery/setup.py @@ -66,8 +66,7 @@ ], "geopandas": ["geopandas>=0.9.0, <1.0dev", "Shapely>=1.8.4, <3.0.0dev"], "ipython": [ - "ipython>=7.23.1,!=8.1.0", - "ipykernel>=6.0.0", + "bigquery-magics >= 0.1.0", ], "tqdm": ["tqdm >= 4.7.4, <5.0.0dev"], "opentelemetry": [ diff --git a/packages/google-cloud-bigquery/testing/constraints-3.7.txt b/packages/google-cloud-bigquery/testing/constraints-3.7.txt index c09978d5dc7f..fda7ce9512e4 100644 --- a/packages/google-cloud-bigquery/testing/constraints-3.7.txt +++ b/packages/google-cloud-bigquery/testing/constraints-3.7.txt @@ -5,6 +5,7 @@ # # e.g., if setup.py has "foo >= 1.14.0, < 2.0.0dev", # Then this file should have foo==1.14.0 +bigquery-magics==0.1.0 db-dtypes==0.3.0 geopandas==0.9.0 google-api-core==2.11.1 diff --git a/packages/google-cloud-bigquery/tests/system/test_magics.py b/packages/google-cloud-bigquery/tests/system/test_magics.py index 3d761cd351ef..72d358a74221 100644 --- a/packages/google-cloud-bigquery/tests/system/test_magics.py +++ b/packages/google-cloud-bigquery/tests/system/test_magics.py @@ -50,7 +50,10 @@ def test_bigquery_magic(ipython_interactive): current_process = psutil.Process() conn_count_start = len(current_process.connections()) - ip.extension_manager.load_extension("google.cloud.bigquery") + # Deprecated, but should still work in google-cloud-bigquery 3.x. + with pytest.warns(FutureWarning, match="bigquery_magics"): + ip.extension_manager.load_extension("google.cloud.bigquery") + sql = """ SELECT CONCAT( diff --git a/packages/google-cloud-bigquery/tests/unit/test_magics.py b/packages/google-cloud-bigquery/tests/unit/test_magics.py index 4b1aaf14d5a9..73b29df6b69e 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_magics.py +++ b/packages/google-cloud-bigquery/tests/unit/test_magics.py @@ -155,9 +155,10 @@ def test_context_with_default_credentials(): @pytest.mark.usefixtures("ipython_interactive") @pytest.mark.skipif(pandas is None, reason="Requires `pandas`") -def test_context_with_default_connection(): +def test_context_with_default_connection(monkeypatch): ip = IPython.get_ipython() - ip.extension_manager.load_extension("google.cloud.bigquery") + monkeypatch.setattr(bigquery, "bigquery_magics", None) + bigquery.load_ipython_extension(ip) magics.context._credentials = None magics.context._project = None magics.context._connection = None @@ -218,9 +219,10 @@ def test_context_credentials_and_project_can_be_set_explicitly(): @pytest.mark.usefixtures("ipython_interactive") @pytest.mark.skipif(pandas is None, reason="Requires `pandas`") -def test_context_with_custom_connection(): +def test_context_with_custom_connection(monkeypatch): ip = IPython.get_ipython() - ip.extension_manager.load_extension("google.cloud.bigquery") + monkeypatch.setattr(bigquery, "bigquery_magics", None) + bigquery.load_ipython_extension(ip) magics.context._project = None magics.context._credentials = None context_conn = magics.context._connection = make_connection( @@ -439,11 +441,9 @@ def test__create_dataset_if_necessary_not_exist(): @pytest.mark.usefixtures("ipython_interactive") def test_extension_load(): ip = IPython.get_ipython() - ip.extension_manager.load_extension("google.cloud.bigquery") - # verify that the magic is registered and has the correct source - magic = ip.magics_manager.magics["cell"].get("bigquery") - assert magic.__module__ == "google.cloud.bigquery.magics.magics" + with pytest.warns(FutureWarning, match="bigquery_magics"): + bigquery.load_ipython_extension(ip) @pytest.mark.usefixtures("ipython_interactive") @@ -453,7 +453,8 @@ def test_extension_load(): ) def test_bigquery_magic_without_optional_arguments(monkeypatch): ip = IPython.get_ipython() - ip.extension_manager.load_extension("google.cloud.bigquery") + monkeypatch.setattr(bigquery, "bigquery_magics", None) + bigquery.load_ipython_extension(ip) mock_credentials = mock.create_autospec( google.auth.credentials.Credentials, instance=True ) @@ -494,9 +495,10 @@ def test_bigquery_magic_without_optional_arguments(monkeypatch): @pytest.mark.usefixtures("ipython_interactive") -def test_bigquery_magic_default_connection_user_agent(): +def test_bigquery_magic_default_connection_user_agent(monkeypatch): ip = IPython.get_ipython() - ip.extension_manager.load_extension("google.cloud.bigquery") + monkeypatch.setattr(bigquery, "bigquery_magics", None) + bigquery.load_ipython_extension(ip) magics.context._connection = None credentials_mock = mock.create_autospec( @@ -519,9 +521,10 @@ def test_bigquery_magic_default_connection_user_agent(): @pytest.mark.usefixtures("ipython_interactive") -def test_bigquery_magic_with_legacy_sql(): +def test_bigquery_magic_with_legacy_sql(monkeypatch): ip = IPython.get_ipython() - ip.extension_manager.load_extension("google.cloud.bigquery") + monkeypatch.setattr(bigquery, "bigquery_magics", None) + bigquery.load_ipython_extension(ip) magics.context.credentials = mock.create_autospec( google.auth.credentials.Credentials, instance=True ) @@ -538,9 +541,10 @@ def test_bigquery_magic_with_legacy_sql(): @pytest.mark.usefixtures("ipython_interactive") @pytest.mark.skipif(pandas is None, reason="Requires `pandas`") -def test_bigquery_magic_with_result_saved_to_variable(ipython_ns_cleanup): +def test_bigquery_magic_with_result_saved_to_variable(ipython_ns_cleanup, monkeypatch): ip = IPython.get_ipython() - ip.extension_manager.load_extension("google.cloud.bigquery") + monkeypatch.setattr(bigquery, "bigquery_magics", None) + bigquery.load_ipython_extension(ip) magics.context.credentials = mock.create_autospec( google.auth.credentials.Credentials, instance=True ) @@ -571,9 +575,10 @@ def test_bigquery_magic_with_result_saved_to_variable(ipython_ns_cleanup): @pytest.mark.usefixtures("ipython_interactive") -def test_bigquery_magic_does_not_clear_display_in_verbose_mode(): +def test_bigquery_magic_does_not_clear_display_in_verbose_mode(monkeypatch): ip = IPython.get_ipython() - ip.extension_manager.load_extension("google.cloud.bigquery") + monkeypatch.setattr(bigquery, "bigquery_magics", None) + bigquery.load_ipython_extension(ip) magics.context.credentials = mock.create_autospec( google.auth.credentials.Credentials, instance=True ) @@ -592,9 +597,10 @@ def test_bigquery_magic_does_not_clear_display_in_verbose_mode(): @pytest.mark.usefixtures("ipython_interactive") -def test_bigquery_magic_clears_display_in_non_verbose_mode(): +def test_bigquery_magic_clears_display_in_non_verbose_mode(monkeypatch): ip = IPython.get_ipython() - ip.extension_manager.load_extension("google.cloud.bigquery") + monkeypatch.setattr(bigquery, "bigquery_magics", None) + bigquery.load_ipython_extension(ip) magics.context.credentials = mock.create_autospec( google.auth.credentials.Credentials, instance=True ) @@ -618,7 +624,8 @@ def test_bigquery_magic_clears_display_in_non_verbose_mode(): ) def test_bigquery_magic_with_bqstorage_from_argument(monkeypatch): ip = IPython.get_ipython() - ip.extension_manager.load_extension("google.cloud.bigquery") + monkeypatch.setattr(bigquery, "bigquery_magics", None) + bigquery.load_ipython_extension(ip) mock_credentials = mock.create_autospec( google.auth.credentials.Credentials, instance=True ) @@ -686,7 +693,8 @@ def test_bigquery_magic_with_rest_client_requested(monkeypatch): pandas = pytest.importorskip("pandas") ip = IPython.get_ipython() - ip.extension_manager.load_extension("google.cloud.bigquery") + monkeypatch.setattr(bigquery, "bigquery_magics", None) + bigquery.load_ipython_extension(ip) mock_credentials = mock.create_autospec( google.auth.credentials.Credentials, instance=True ) @@ -726,9 +734,10 @@ def test_bigquery_magic_with_rest_client_requested(monkeypatch): @pytest.mark.usefixtures("ipython_interactive") -def test_bigquery_magic_w_max_results_invalid(): +def test_bigquery_magic_w_max_results_invalid(monkeypatch): ip = IPython.get_ipython() - ip.extension_manager.load_extension("google.cloud.bigquery") + monkeypatch.setattr(bigquery, "bigquery_magics", None) + bigquery.load_ipython_extension(ip) magics.context._project = None credentials_mock = mock.create_autospec( @@ -748,9 +757,10 @@ def test_bigquery_magic_w_max_results_invalid(): @pytest.mark.usefixtures("ipython_interactive") -def test_bigquery_magic_w_max_results_valid_calls_queryjob_result(): +def test_bigquery_magic_w_max_results_valid_calls_queryjob_result(monkeypatch): ip = IPython.get_ipython() - ip.extension_manager.load_extension("google.cloud.bigquery") + monkeypatch.setattr(bigquery, "bigquery_magics", None) + bigquery.load_ipython_extension(ip) magics.context._project = None credentials_mock = mock.create_autospec( @@ -782,9 +792,10 @@ def test_bigquery_magic_w_max_results_valid_calls_queryjob_result(): @pytest.mark.usefixtures("ipython_interactive") -def test_bigquery_magic_w_max_results_query_job_results_fails(): +def test_bigquery_magic_w_max_results_query_job_results_fails(monkeypatch): ip = IPython.get_ipython() - ip.extension_manager.load_extension("google.cloud.bigquery") + monkeypatch.setattr(bigquery, "bigquery_magics", None) + bigquery.load_ipython_extension(ip) magics.context._project = None credentials_mock = mock.create_autospec( @@ -819,9 +830,10 @@ def test_bigquery_magic_w_max_results_query_job_results_fails(): assert close_transports.called -def test_bigquery_magic_w_table_id_invalid(): +def test_bigquery_magic_w_table_id_invalid(monkeypatch): ip = IPython.get_ipython() - ip.extension_manager.load_extension("google.cloud.bigquery") + monkeypatch.setattr(bigquery, "bigquery_magics", None) + bigquery.load_ipython_extension(ip) magics.context._project = None credentials_mock = mock.create_autospec( @@ -848,9 +860,10 @@ def test_bigquery_magic_w_table_id_invalid(): assert "Traceback (most recent call last)" not in output -def test_bigquery_magic_w_missing_query(): +def test_bigquery_magic_w_missing_query(monkeypatch): ip = IPython.get_ipython() - ip.extension_manager.load_extension("google.cloud.bigquery") + monkeypatch.setattr(bigquery, "bigquery_magics", None) + bigquery.load_ipython_extension(ip) magics.context._project = None credentials_mock = mock.create_autospec( @@ -873,9 +886,10 @@ def test_bigquery_magic_w_missing_query(): @pytest.mark.usefixtures("ipython_interactive") @pytest.mark.skipif(pandas is None, reason="Requires `pandas`") -def test_bigquery_magic_w_table_id_and_destination_var(ipython_ns_cleanup): +def test_bigquery_magic_w_table_id_and_destination_var(ipython_ns_cleanup, monkeypatch): ip = IPython.get_ipython() - ip.extension_manager.load_extension("google.cloud.bigquery") + monkeypatch.setattr(bigquery, "bigquery_magics", None) + bigquery.load_ipython_extension(ip) magics.context._project = None ipython_ns_cleanup.append((ip, "df")) @@ -915,9 +929,10 @@ def test_bigquery_magic_w_table_id_and_destination_var(ipython_ns_cleanup): bigquery_storage is None, reason="Requires `google-cloud-bigquery-storage`" ) @pytest.mark.skipif(pandas is None, reason="Requires `pandas`") -def test_bigquery_magic_w_table_id_and_bqstorage_client(): +def test_bigquery_magic_w_table_id_and_bqstorage_client(monkeypatch): ip = IPython.get_ipython() - ip.extension_manager.load_extension("google.cloud.bigquery") + monkeypatch.setattr(bigquery, "bigquery_magics", None) + bigquery.load_ipython_extension(ip) magics.context._project = None credentials_mock = mock.create_autospec( @@ -959,9 +974,10 @@ def test_bigquery_magic_w_table_id_and_bqstorage_client(): @pytest.mark.usefixtures("ipython_interactive") -def test_bigquery_magic_dryrun_option_sets_job_config(): +def test_bigquery_magic_dryrun_option_sets_job_config(monkeypatch): ip = IPython.get_ipython() - ip.extension_manager.load_extension("google.cloud.bigquery") + monkeypatch.setattr(bigquery, "bigquery_magics", None) + bigquery.load_ipython_extension(ip) magics.context.credentials = mock.create_autospec( google.auth.credentials.Credentials, instance=True ) @@ -980,9 +996,10 @@ def test_bigquery_magic_dryrun_option_sets_job_config(): @pytest.mark.usefixtures("ipython_interactive") -def test_bigquery_magic_dryrun_option_returns_query_job(): +def test_bigquery_magic_dryrun_option_returns_query_job(monkeypatch): ip = IPython.get_ipython() - ip.extension_manager.load_extension("google.cloud.bigquery") + monkeypatch.setattr(bigquery, "bigquery_magics", None) + bigquery.load_ipython_extension(ip) magics.context.credentials = mock.create_autospec( google.auth.credentials.Credentials, instance=True ) @@ -1004,9 +1021,12 @@ def test_bigquery_magic_dryrun_option_returns_query_job(): @pytest.mark.usefixtures("ipython_interactive") -def test_bigquery_magic_dryrun_option_variable_error_message(ipython_ns_cleanup): +def test_bigquery_magic_dryrun_option_variable_error_message( + ipython_ns_cleanup, monkeypatch +): ip = IPython.get_ipython() - ip.extension_manager.load_extension("google.cloud.bigquery") + monkeypatch.setattr(bigquery, "bigquery_magics", None) + bigquery.load_ipython_extension(ip) magics.context.credentials = mock.create_autospec( google.auth.credentials.Credentials, instance=True ) @@ -1031,9 +1051,12 @@ def test_bigquery_magic_dryrun_option_variable_error_message(ipython_ns_cleanup) @pytest.mark.usefixtures("ipython_interactive") -def test_bigquery_magic_dryrun_option_saves_query_job_to_variable(ipython_ns_cleanup): +def test_bigquery_magic_dryrun_option_saves_query_job_to_variable( + ipython_ns_cleanup, monkeypatch +): ip = IPython.get_ipython() - ip.extension_manager.load_extension("google.cloud.bigquery") + monkeypatch.setattr(bigquery, "bigquery_magics", None) + bigquery.load_ipython_extension(ip) magics.context.credentials = mock.create_autospec( google.auth.credentials.Credentials, instance=True ) @@ -1061,9 +1084,12 @@ def test_bigquery_magic_dryrun_option_saves_query_job_to_variable(ipython_ns_cle @pytest.mark.usefixtures("ipython_interactive") -def test_bigquery_magic_saves_query_job_to_variable_on_error(ipython_ns_cleanup): +def test_bigquery_magic_saves_query_job_to_variable_on_error( + ipython_ns_cleanup, monkeypatch +): ip = IPython.get_ipython() - ip.extension_manager.load_extension("google.cloud.bigquery") + monkeypatch.setattr(bigquery, "bigquery_magics", None) + bigquery.load_ipython_extension(ip) magics.context.credentials = mock.create_autospec( google.auth.credentials.Credentials, instance=True ) @@ -1094,9 +1120,10 @@ def test_bigquery_magic_saves_query_job_to_variable_on_error(ipython_ns_cleanup) @pytest.mark.usefixtures("ipython_interactive") -def test_bigquery_magic_w_maximum_bytes_billed_invalid(): +def test_bigquery_magic_w_maximum_bytes_billed_invalid(monkeypatch): ip = IPython.get_ipython() - ip.extension_manager.load_extension("google.cloud.bigquery") + monkeypatch.setattr(bigquery, "bigquery_magics", None) + bigquery.load_ipython_extension(ip) magics.context._project = None credentials_mock = mock.create_autospec( @@ -1118,9 +1145,12 @@ def test_bigquery_magic_w_maximum_bytes_billed_invalid(): ) @pytest.mark.usefixtures("ipython_interactive") @pytest.mark.skipif(pandas is None, reason="Requires `pandas`") -def test_bigquery_magic_w_maximum_bytes_billed_overrides_context(param_value, expected): +def test_bigquery_magic_w_maximum_bytes_billed_overrides_context( + param_value, expected, monkeypatch +): ip = IPython.get_ipython() - ip.extension_manager.load_extension("google.cloud.bigquery") + monkeypatch.setattr(bigquery, "bigquery_magics", None) + bigquery.load_ipython_extension(ip) magics.context._project = None # Set the default maximum bytes billed, so we know it's overridable by the param. @@ -1158,9 +1188,10 @@ def test_bigquery_magic_w_maximum_bytes_billed_overrides_context(param_value, ex @pytest.mark.usefixtures("ipython_interactive") @pytest.mark.skipif(pandas is None, reason="Requires `pandas`") -def test_bigquery_magic_w_maximum_bytes_billed_w_context_inplace(): +def test_bigquery_magic_w_maximum_bytes_billed_w_context_inplace(monkeypatch): ip = IPython.get_ipython() - ip.extension_manager.load_extension("google.cloud.bigquery") + monkeypatch.setattr(bigquery, "bigquery_magics", None) + bigquery.load_ipython_extension(ip) magics.context._project = None magics.context.default_query_job_config.maximum_bytes_billed = 1337 @@ -1195,9 +1226,10 @@ def test_bigquery_magic_w_maximum_bytes_billed_w_context_inplace(): @pytest.mark.usefixtures("ipython_interactive") @pytest.mark.skipif(pandas is None, reason="Requires `pandas`") -def test_bigquery_magic_w_maximum_bytes_billed_w_context_setter(): +def test_bigquery_magic_w_maximum_bytes_billed_w_context_setter(monkeypatch): ip = IPython.get_ipython() - ip.extension_manager.load_extension("google.cloud.bigquery") + monkeypatch.setattr(bigquery, "bigquery_magics", None) + bigquery.load_ipython_extension(ip) magics.context._project = None magics.context.default_query_job_config = job.QueryJobConfig( @@ -1236,7 +1268,8 @@ def test_bigquery_magic_w_maximum_bytes_billed_w_context_setter(): @pytest.mark.skipif(pandas is None, reason="Requires `pandas`") def test_bigquery_magic_with_no_query_cache(monkeypatch): ip = IPython.get_ipython() - ip.extension_manager.load_extension("google.cloud.bigquery") + monkeypatch.setattr(bigquery, "bigquery_magics", None) + bigquery.load_ipython_extension(ip) conn = make_connection() monkeypatch.setattr(magics.context, "_connection", conn) monkeypatch.setattr(magics.context, "project", "project-from-context") @@ -1266,7 +1299,8 @@ def test_bigquery_magic_with_no_query_cache(monkeypatch): @pytest.mark.skipif(pandas is None, reason="Requires `pandas`") def test_context_with_no_query_cache_from_context(monkeypatch): ip = IPython.get_ipython() - ip.extension_manager.load_extension("google.cloud.bigquery") + monkeypatch.setattr(bigquery, "bigquery_magics", None) + bigquery.load_ipython_extension(ip) conn = make_connection() monkeypatch.setattr(magics.context, "_connection", conn) monkeypatch.setattr(magics.context, "project", "project-from-context") @@ -1294,7 +1328,8 @@ def test_context_with_no_query_cache_from_context(monkeypatch): @pytest.mark.skipif(pandas is None, reason="Requires `pandas`") def test_bigquery_magic_w_progress_bar_type_w_context_setter(monkeypatch): ip = IPython.get_ipython() - ip.extension_manager.load_extension("google.cloud.bigquery") + monkeypatch.setattr(bigquery, "bigquery_magics", None) + bigquery.load_ipython_extension(ip) magics.context._project = None magics.context.progress_bar_type = "tqdm_gui" @@ -1338,9 +1373,10 @@ def test_bigquery_magic_w_progress_bar_type_w_context_setter(monkeypatch): @pytest.mark.usefixtures("ipython_interactive") -def test_bigquery_magic_with_progress_bar_type(): +def test_bigquery_magic_with_progress_bar_type(monkeypatch): ip = IPython.get_ipython() - ip.extension_manager.load_extension("google.cloud.bigquery") + monkeypatch.setattr(bigquery, "bigquery_magics", None) + bigquery.load_ipython_extension(ip) magics.context.progress_bar_type = None run_query_patch = mock.patch( @@ -1358,9 +1394,10 @@ def test_bigquery_magic_with_progress_bar_type(): @pytest.mark.usefixtures("ipython_interactive") -def test_bigquery_magic_with_project(): +def test_bigquery_magic_with_project(monkeypatch): ip = IPython.get_ipython() - ip.extension_manager.load_extension("google.cloud.bigquery") + monkeypatch.setattr(bigquery, "bigquery_magics", None) + bigquery.load_ipython_extension(ip) magics.context._project = None credentials_mock = mock.create_autospec( @@ -1382,9 +1419,10 @@ def test_bigquery_magic_with_project(): @pytest.mark.usefixtures("ipython_interactive") -def test_bigquery_magic_with_bigquery_api_endpoint(ipython_ns_cleanup): +def test_bigquery_magic_with_bigquery_api_endpoint(ipython_ns_cleanup, monkeypatch): ip = IPython.get_ipython() - ip.extension_manager.load_extension("google.cloud.bigquery") + monkeypatch.setattr(bigquery, "bigquery_magics", None) + bigquery.load_ipython_extension(ip) magics.context._connection = None run_query_patch = mock.patch( @@ -1404,9 +1442,10 @@ def test_bigquery_magic_with_bigquery_api_endpoint(ipython_ns_cleanup): @pytest.mark.usefixtures("ipython_interactive") -def test_bigquery_magic_with_bigquery_api_endpoint_context_dict(): +def test_bigquery_magic_with_bigquery_api_endpoint_context_dict(monkeypatch): ip = IPython.get_ipython() - ip.extension_manager.load_extension("google.cloud.bigquery") + monkeypatch.setattr(bigquery, "bigquery_magics", None) + bigquery.load_ipython_extension(ip) magics.context._connection = None magics.context.bigquery_client_options = {} @@ -1427,9 +1466,10 @@ def test_bigquery_magic_with_bigquery_api_endpoint_context_dict(): @pytest.mark.usefixtures("ipython_interactive") -def test_bigquery_magic_with_bqstorage_api_endpoint(ipython_ns_cleanup): +def test_bigquery_magic_with_bqstorage_api_endpoint(ipython_ns_cleanup, monkeypatch): ip = IPython.get_ipython() - ip.extension_manager.load_extension("google.cloud.bigquery") + monkeypatch.setattr(bigquery, "bigquery_magics", None) + bigquery.load_ipython_extension(ip) magics.context._connection = None run_query_patch = mock.patch( @@ -1449,9 +1489,10 @@ def test_bigquery_magic_with_bqstorage_api_endpoint(ipython_ns_cleanup): @pytest.mark.usefixtures("ipython_interactive") -def test_bigquery_magic_with_bqstorage_api_endpoint_context_dict(): +def test_bigquery_magic_with_bqstorage_api_endpoint_context_dict(monkeypatch): ip = IPython.get_ipython() - ip.extension_manager.load_extension("google.cloud.bigquery") + monkeypatch.setattr(bigquery, "bigquery_magics", None) + bigquery.load_ipython_extension(ip) magics.context._connection = None magics.context.bqstorage_client_options = {} @@ -1472,9 +1513,10 @@ def test_bigquery_magic_with_bqstorage_api_endpoint_context_dict(): @pytest.mark.usefixtures("ipython_interactive") -def test_bigquery_magic_with_multiple_options(): +def test_bigquery_magic_with_multiple_options(monkeypatch): ip = IPython.get_ipython() - ip.extension_manager.load_extension("google.cloud.bigquery") + monkeypatch.setattr(bigquery, "bigquery_magics", None) + bigquery.load_ipython_extension(ip) magics.context._project = None credentials_mock = mock.create_autospec( @@ -1504,9 +1546,10 @@ def test_bigquery_magic_with_multiple_options(): @pytest.mark.usefixtures("ipython_interactive") @pytest.mark.skipif(pandas is None, reason="Requires `pandas`") -def test_bigquery_magic_with_string_params(ipython_ns_cleanup): +def test_bigquery_magic_with_string_params(ipython_ns_cleanup, monkeypatch): ip = IPython.get_ipython() - ip.extension_manager.load_extension("google.cloud.bigquery") + monkeypatch.setattr(bigquery, "bigquery_magics", None) + bigquery.load_ipython_extension(ip) magics.context.credentials = mock.create_autospec( google.auth.credentials.Credentials, instance=True ) @@ -1541,9 +1584,10 @@ def test_bigquery_magic_with_string_params(ipython_ns_cleanup): @pytest.mark.usefixtures("ipython_interactive") @pytest.mark.skipif(pandas is None, reason="Requires `pandas`") -def test_bigquery_magic_with_dict_params(ipython_ns_cleanup): +def test_bigquery_magic_with_dict_params(ipython_ns_cleanup, monkeypatch): ip = IPython.get_ipython() - ip.extension_manager.load_extension("google.cloud.bigquery") + monkeypatch.setattr(bigquery, "bigquery_magics", None) + bigquery.load_ipython_extension(ip) magics.context.credentials = mock.create_autospec( google.auth.credentials.Credentials, instance=True ) @@ -1585,9 +1629,10 @@ def test_bigquery_magic_with_dict_params(ipython_ns_cleanup): @pytest.mark.usefixtures("ipython_interactive") @pytest.mark.skipif(pandas is None, reason="Requires `pandas`") -def test_bigquery_magic_with_dict_params_nonexisting(): +def test_bigquery_magic_with_dict_params_nonexisting(monkeypatch): ip = IPython.get_ipython() - ip.extension_manager.load_extension("google.cloud.bigquery") + monkeypatch.setattr(bigquery, "bigquery_magics", None) + bigquery.load_ipython_extension(ip) magics.context.credentials = mock.create_autospec( google.auth.credentials.Credentials, instance=True ) @@ -1600,9 +1645,10 @@ def test_bigquery_magic_with_dict_params_nonexisting(): @pytest.mark.usefixtures("ipython_interactive") @pytest.mark.skipif(pandas is None, reason="Requires `pandas`") -def test_bigquery_magic_with_dict_params_incorrect_syntax(): +def test_bigquery_magic_with_dict_params_incorrect_syntax(monkeypatch): ip = IPython.get_ipython() - ip.extension_manager.load_extension("google.cloud.bigquery") + monkeypatch.setattr(bigquery, "bigquery_magics", None) + bigquery.load_ipython_extension(ip) magics.context.credentials = mock.create_autospec( google.auth.credentials.Credentials, instance=True ) @@ -1616,9 +1662,10 @@ def test_bigquery_magic_with_dict_params_incorrect_syntax(): @pytest.mark.usefixtures("ipython_interactive") @pytest.mark.skipif(pandas is None, reason="Requires `pandas`") -def test_bigquery_magic_with_dict_params_duplicate(): +def test_bigquery_magic_with_dict_params_duplicate(monkeypatch): ip = IPython.get_ipython() - ip.extension_manager.load_extension("google.cloud.bigquery") + monkeypatch.setattr(bigquery, "bigquery_magics", None) + bigquery.load_ipython_extension(ip) magics.context.credentials = mock.create_autospec( google.auth.credentials.Credentials, instance=True ) @@ -1634,9 +1681,10 @@ def test_bigquery_magic_with_dict_params_duplicate(): @pytest.mark.usefixtures("ipython_interactive") @pytest.mark.skipif(pandas is None, reason="Requires `pandas`") -def test_bigquery_magic_with_option_value_incorrect(): +def test_bigquery_magic_with_option_value_incorrect(monkeypatch): ip = IPython.get_ipython() - ip.extension_manager.load_extension("google.cloud.bigquery") + monkeypatch.setattr(bigquery, "bigquery_magics", None) + bigquery.load_ipython_extension(ip) magics.context.credentials = mock.create_autospec( google.auth.credentials.Credentials, instance=True ) @@ -1650,9 +1698,12 @@ def test_bigquery_magic_with_option_value_incorrect(): @pytest.mark.usefixtures("ipython_interactive") @pytest.mark.skipif(pandas is None, reason="Requires `pandas`") -def test_bigquery_magic_with_dict_params_negative_value(ipython_ns_cleanup): +def test_bigquery_magic_with_dict_params_negative_value( + ipython_ns_cleanup, monkeypatch +): ip = IPython.get_ipython() - ip.extension_manager.load_extension("google.cloud.bigquery") + monkeypatch.setattr(bigquery, "bigquery_magics", None) + bigquery.load_ipython_extension(ip) magics.context.credentials = mock.create_autospec( google.auth.credentials.Credentials, instance=True ) @@ -1690,9 +1741,10 @@ def test_bigquery_magic_with_dict_params_negative_value(ipython_ns_cleanup): @pytest.mark.usefixtures("ipython_interactive") @pytest.mark.skipif(pandas is None, reason="Requires `pandas`") -def test_bigquery_magic_with_dict_params_array_value(ipython_ns_cleanup): +def test_bigquery_magic_with_dict_params_array_value(ipython_ns_cleanup, monkeypatch): ip = IPython.get_ipython() - ip.extension_manager.load_extension("google.cloud.bigquery") + monkeypatch.setattr(bigquery, "bigquery_magics", None) + bigquery.load_ipython_extension(ip) magics.context.credentials = mock.create_autospec( google.auth.credentials.Credentials, instance=True ) @@ -1730,9 +1782,10 @@ def test_bigquery_magic_with_dict_params_array_value(ipython_ns_cleanup): @pytest.mark.usefixtures("ipython_interactive") @pytest.mark.skipif(pandas is None, reason="Requires `pandas`") -def test_bigquery_magic_with_dict_params_tuple_value(ipython_ns_cleanup): +def test_bigquery_magic_with_dict_params_tuple_value(ipython_ns_cleanup, monkeypatch): ip = IPython.get_ipython() - ip.extension_manager.load_extension("google.cloud.bigquery") + monkeypatch.setattr(bigquery, "bigquery_magics", None) + bigquery.load_ipython_extension(ip) magics.context.credentials = mock.create_autospec( google.auth.credentials.Credentials, instance=True ) @@ -1770,9 +1823,10 @@ def test_bigquery_magic_with_dict_params_tuple_value(ipython_ns_cleanup): @pytest.mark.usefixtures("ipython_interactive") @pytest.mark.skipif(pandas is None, reason="Requires `pandas`") -def test_bigquery_magic_with_improperly_formatted_params(): +def test_bigquery_magic_with_improperly_formatted_params(monkeypatch): ip = IPython.get_ipython() - ip.extension_manager.load_extension("google.cloud.bigquery") + monkeypatch.setattr(bigquery, "bigquery_magics", None) + bigquery.load_ipython_extension(ip) magics.context.credentials = mock.create_autospec( google.auth.credentials.Credentials, instance=True ) @@ -1788,9 +1842,12 @@ def test_bigquery_magic_with_improperly_formatted_params(): ) @pytest.mark.usefixtures("ipython_interactive") @pytest.mark.skipif(pandas is None, reason="Requires `pandas`") -def test_bigquery_magic_valid_query_in_existing_variable(ipython_ns_cleanup, raw_sql): +def test_bigquery_magic_valid_query_in_existing_variable( + ipython_ns_cleanup, raw_sql, monkeypatch +): ip = IPython.get_ipython() - ip.extension_manager.load_extension("google.cloud.bigquery") + monkeypatch.setattr(bigquery, "bigquery_magics", None) + bigquery.load_ipython_extension(ip) magics.context.credentials = mock.create_autospec( google.auth.credentials.Credentials, instance=True ) @@ -1827,9 +1884,10 @@ def test_bigquery_magic_valid_query_in_existing_variable(ipython_ns_cleanup, raw @pytest.mark.usefixtures("ipython_interactive") @pytest.mark.skipif(pandas is None, reason="Requires `pandas`") -def test_bigquery_magic_nonexisting_query_variable(): +def test_bigquery_magic_nonexisting_query_variable(monkeypatch): ip = IPython.get_ipython() - ip.extension_manager.load_extension("google.cloud.bigquery") + monkeypatch.setattr(bigquery, "bigquery_magics", None) + bigquery.load_ipython_extension(ip) magics.context.credentials = mock.create_autospec( google.auth.credentials.Credentials, instance=True ) @@ -1851,9 +1909,10 @@ def test_bigquery_magic_nonexisting_query_variable(): @pytest.mark.usefixtures("ipython_interactive") @pytest.mark.skipif(pandas is None, reason="Requires `pandas`") -def test_bigquery_magic_empty_query_variable_name(): +def test_bigquery_magic_empty_query_variable_name(monkeypatch): ip = IPython.get_ipython() - ip.extension_manager.load_extension("google.cloud.bigquery") + monkeypatch.setattr(bigquery, "bigquery_magics", None) + bigquery.load_ipython_extension(ip) magics.context.credentials = mock.create_autospec( google.auth.credentials.Credentials, instance=True ) @@ -1873,9 +1932,10 @@ def test_bigquery_magic_empty_query_variable_name(): @pytest.mark.usefixtures("ipython_interactive") @pytest.mark.skipif(pandas is None, reason="Requires `pandas`") -def test_bigquery_magic_query_variable_non_string(ipython_ns_cleanup): +def test_bigquery_magic_query_variable_non_string(ipython_ns_cleanup, monkeypatch): ip = IPython.get_ipython() - ip.extension_manager.load_extension("google.cloud.bigquery") + monkeypatch.setattr(bigquery, "bigquery_magics", None) + bigquery.load_ipython_extension(ip) magics.context.credentials = mock.create_autospec( google.auth.credentials.Credentials, instance=True ) @@ -1899,9 +1959,10 @@ def test_bigquery_magic_query_variable_non_string(ipython_ns_cleanup): @pytest.mark.usefixtures("ipython_interactive") @pytest.mark.skipif(pandas is None, reason="Requires `pandas`") -def test_bigquery_magic_query_variable_not_identifier(): +def test_bigquery_magic_query_variable_not_identifier(monkeypatch): ip = IPython.get_ipython() - ip.extension_manager.load_extension("google.cloud.bigquery") + monkeypatch.setattr(bigquery, "bigquery_magics", None) + bigquery.load_ipython_extension(ip) magics.context.credentials = mock.create_autospec( google.auth.credentials.Credentials, instance=True ) @@ -1922,9 +1983,10 @@ def test_bigquery_magic_query_variable_not_identifier(): @pytest.mark.usefixtures("ipython_interactive") @pytest.mark.skipif(pandas is None, reason="Requires `pandas`") -def test_bigquery_magic_with_invalid_multiple_option_values(): +def test_bigquery_magic_with_invalid_multiple_option_values(monkeypatch): ip = IPython.get_ipython() - ip.extension_manager.load_extension("google.cloud.bigquery") + monkeypatch.setattr(bigquery, "bigquery_magics", None) + bigquery.load_ipython_extension(ip) magics.context.credentials = mock.create_autospec( google.auth.credentials.Credentials, instance=True ) @@ -1939,9 +2001,10 @@ def test_bigquery_magic_with_invalid_multiple_option_values(): @pytest.mark.usefixtures("ipython_interactive") -def test_bigquery_magic_omits_tracebacks_from_error_message(): +def test_bigquery_magic_omits_tracebacks_from_error_message(monkeypatch): ip = IPython.get_ipython() - ip.extension_manager.load_extension("google.cloud.bigquery") + monkeypatch.setattr(bigquery, "bigquery_magics", None) + bigquery.load_ipython_extension(ip) credentials_mock = mock.create_autospec( google.auth.credentials.Credentials, instance=True @@ -1966,9 +2029,10 @@ def test_bigquery_magic_omits_tracebacks_from_error_message(): @pytest.mark.usefixtures("ipython_interactive") -def test_bigquery_magic_w_destination_table_invalid_format(): +def test_bigquery_magic_w_destination_table_invalid_format(monkeypatch): ip = IPython.get_ipython() - ip.extension_manager.load_extension("google.cloud.bigquery") + monkeypatch.setattr(bigquery, "bigquery_magics", None) + bigquery.load_ipython_extension(ip) magics.context._project = None credentials_mock = mock.create_autospec( @@ -1994,9 +2058,10 @@ def test_bigquery_magic_w_destination_table_invalid_format(): @pytest.mark.usefixtures("ipython_interactive") -def test_bigquery_magic_w_destination_table(): +def test_bigquery_magic_w_destination_table(monkeypatch): ip = IPython.get_ipython() - ip.extension_manager.load_extension("google.cloud.bigquery") + monkeypatch.setattr(bigquery, "bigquery_magics", None) + bigquery.load_ipython_extension(ip) magics.context.credentials = mock.create_autospec( google.auth.credentials.Credentials, instance=True ) @@ -2026,9 +2091,10 @@ def test_bigquery_magic_w_destination_table(): @pytest.mark.usefixtures("ipython_interactive") -def test_bigquery_magic_create_dataset_fails(): +def test_bigquery_magic_create_dataset_fails(monkeypatch): ip = IPython.get_ipython() - ip.extension_manager.load_extension("google.cloud.bigquery") + monkeypatch.setattr(bigquery, "bigquery_magics", None) + bigquery.load_ipython_extension(ip) magics.context.credentials = mock.create_autospec( google.auth.credentials.Credentials, instance=True ) @@ -2056,9 +2122,10 @@ def test_bigquery_magic_create_dataset_fails(): @pytest.mark.usefixtures("ipython_interactive") -def test_bigquery_magic_with_location(): +def test_bigquery_magic_with_location(monkeypatch): ip = IPython.get_ipython() - ip.extension_manager.load_extension("google.cloud.bigquery") + monkeypatch.setattr(bigquery, "bigquery_magics", None) + bigquery.load_ipython_extension(ip) magics.context.credentials = mock.create_autospec( google.auth.credentials.Credentials, instance=True ) From a77dd432f5ab3ada2fd6ff0e5c43d9bb2fabae62 Mon Sep 17 00:00:00 2001 From: Anthonios Partheniou Date: Wed, 17 Jul 2024 17:21:49 -0400 Subject: [PATCH 1819/2016] chore: update templated files (#1975) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * chore: update templated files * remove obsolete code in owlbot.py * 🦉 Updates from OwlBot post-processor See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md --------- Co-authored-by: Owl Bot --- packages/google-cloud-bigquery/.flake8 | 2 +- .../.github/.OwlBot.lock.yaml | 3 +- .../.github/auto-label.yaml | 2 +- .../google-cloud-bigquery/.kokoro/build.sh | 2 +- .../.kokoro/docker/docs/Dockerfile | 23 +- .../.kokoro/docker/docs/requirements.txt | 40 +- .../.kokoro/populate-secrets.sh | 2 +- .../.kokoro/publish-docs.sh | 2 +- .../google-cloud-bigquery/.kokoro/release.sh | 2 +- .../.kokoro/requirements.txt | 509 +++++++++--------- .../.kokoro/test-samples-against-head.sh | 2 +- .../.kokoro/test-samples-impl.sh | 2 +- .../.kokoro/test-samples.sh | 2 +- .../.kokoro/trampoline.sh | 2 +- .../.kokoro/trampoline_v2.sh | 2 +- .../.pre-commit-config.yaml | 2 +- packages/google-cloud-bigquery/.trampolinerc | 2 +- packages/google-cloud-bigquery/MANIFEST.in | 2 +- packages/google-cloud-bigquery/docs/conf.py | 2 +- packages/google-cloud-bigquery/owlbot.py | 23 - .../scripts/decrypt-secrets.sh | 2 +- .../scripts/readme-gen/readme_gen.py | 2 +- 22 files changed, 319 insertions(+), 313 deletions(-) diff --git a/packages/google-cloud-bigquery/.flake8 b/packages/google-cloud-bigquery/.flake8 index 87f6e408c47d..32986c79287a 100644 --- a/packages/google-cloud-bigquery/.flake8 +++ b/packages/google-cloud-bigquery/.flake8 @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright 2023 Google LLC +# Copyright 2024 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/packages/google-cloud-bigquery/.github/.OwlBot.lock.yaml b/packages/google-cloud-bigquery/.github/.OwlBot.lock.yaml index 81f87c56917d..001b1b1cabec 100644 --- a/packages/google-cloud-bigquery/.github/.OwlBot.lock.yaml +++ b/packages/google-cloud-bigquery/.github/.OwlBot.lock.yaml @@ -13,5 +13,4 @@ # limitations under the License. docker: image: gcr.io/cloud-devrel-public-resources/owlbot-python:latest - digest: sha256:5a4c19d17e597b92d786e569be101e636c9c2817731f80a5adec56b2aa8fe070 -# created: 2024-04-12T11:35:58.922854369Z + digest: sha256:52210e0e0559f5ea8c52be148b33504022e1faef4e95fbe4b32d68022af2fa7e diff --git a/packages/google-cloud-bigquery/.github/auto-label.yaml b/packages/google-cloud-bigquery/.github/auto-label.yaml index 8b37ee89711f..21786a4eb085 100644 --- a/packages/google-cloud-bigquery/.github/auto-label.yaml +++ b/packages/google-cloud-bigquery/.github/auto-label.yaml @@ -1,4 +1,4 @@ -# Copyright 2023 Google LLC +# Copyright 2024 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/packages/google-cloud-bigquery/.kokoro/build.sh b/packages/google-cloud-bigquery/.kokoro/build.sh index f38bda804393..e4da2e2a76bc 100755 --- a/packages/google-cloud-bigquery/.kokoro/build.sh +++ b/packages/google-cloud-bigquery/.kokoro/build.sh @@ -1,5 +1,5 @@ #!/bin/bash -# Copyright 2023 Google LLC +# Copyright 2024 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/packages/google-cloud-bigquery/.kokoro/docker/docs/Dockerfile b/packages/google-cloud-bigquery/.kokoro/docker/docs/Dockerfile index bdaf39fe22d0..5205308b334d 100644 --- a/packages/google-cloud-bigquery/.kokoro/docker/docs/Dockerfile +++ b/packages/google-cloud-bigquery/.kokoro/docker/docs/Dockerfile @@ -1,4 +1,4 @@ -# Copyright 2023 Google LLC +# Copyright 2024 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -from ubuntu:22.04 +from ubuntu:24.04 ENV DEBIAN_FRONTEND noninteractive @@ -40,7 +40,6 @@ RUN apt-get update \ libssl-dev \ libsqlite3-dev \ portaudio19-dev \ - python3-distutils \ redis-server \ software-properties-common \ ssh \ @@ -60,18 +59,22 @@ RUN apt-get update \ && rm -rf /var/lib/apt/lists/* \ && rm -f /var/cache/apt/archives/*.deb -###################### Install python 3.9.13 -# Download python 3.9.13 -RUN wget https://www.python.org/ftp/python/3.9.13/Python-3.9.13.tgz +###################### Install python 3.10.14 for docs/docfx session + +# Download python 3.10.14 +RUN wget https://www.python.org/ftp/python/3.10.14/Python-3.10.14.tgz # Extract files -RUN tar -xvf Python-3.9.13.tgz +RUN tar -xvf Python-3.10.14.tgz -# Install python 3.9.13 -RUN ./Python-3.9.13/configure --enable-optimizations +# Install python 3.10.14 +RUN ./Python-3.10.14/configure --enable-optimizations RUN make altinstall +RUN python3.10 -m venv /venv +ENV PATH /venv/bin:$PATH + ###################### Install pip RUN wget -O /tmp/get-pip.py 'https://bootstrap.pypa.io/get-pip.py' \ && python3 /tmp/get-pip.py \ @@ -84,4 +87,4 @@ RUN python3 -m pip COPY requirements.txt /requirements.txt RUN python3 -m pip install --require-hashes -r requirements.txt -CMD ["python3.8"] +CMD ["python3.10"] diff --git a/packages/google-cloud-bigquery/.kokoro/docker/docs/requirements.txt b/packages/google-cloud-bigquery/.kokoro/docker/docs/requirements.txt index 0e5d70f20f83..7129c7715594 100644 --- a/packages/google-cloud-bigquery/.kokoro/docker/docs/requirements.txt +++ b/packages/google-cloud-bigquery/.kokoro/docker/docs/requirements.txt @@ -4,9 +4,9 @@ # # pip-compile --allow-unsafe --generate-hashes requirements.in # -argcomplete==3.2.3 \ - --hash=sha256:bf7900329262e481be5a15f56f19736b376df6f82ed27576fa893652c5de6c23 \ - --hash=sha256:c12355e0494c76a2a7b73e3a59b09024ca0ba1e279fb9ed6c1b82d5b74b6a70c +argcomplete==3.4.0 \ + --hash=sha256:69a79e083a716173e5532e0fa3bef45f793f4e61096cf52b5a42c0211c8b8aa5 \ + --hash=sha256:c2abcdfe1be8ace47ba777d4fce319eb13bf8ad9dace8d085dcad6eded88057f # via nox colorlog==6.8.2 \ --hash=sha256:3e3e079a41feb5a1b64f978b5ea4f46040a94f11f0e8bbb8261e3dbbeca64d44 \ @@ -16,23 +16,27 @@ distlib==0.3.8 \ --hash=sha256:034db59a0b96f8ca18035f36290806a9a6e6bd9d1ff91e45a7f172eb17e51784 \ --hash=sha256:1530ea13e350031b6312d8580ddb6b27a104275a31106523b8f123787f494f64 # via virtualenv -filelock==3.13.1 \ - --hash=sha256:521f5f56c50f8426f5e03ad3b281b490a87ef15bc6c526f168290f0c7148d44e \ - --hash=sha256:57dbda9b35157b05fb3e58ee91448612eb674172fab98ee235ccb0b5bee19a1c +filelock==3.15.4 \ + --hash=sha256:2207938cbc1844345cb01a5a95524dae30f0ce089eba5b00378295a17e3e90cb \ + --hash=sha256:6ca1fffae96225dab4c6eaf1c4f4f28cd2568d3ec2a44e15a08520504de468e7 # via virtualenv -nox==2024.3.2 \ - --hash=sha256:e53514173ac0b98dd47585096a55572fe504fecede58ced708979184d05440be \ - --hash=sha256:f521ae08a15adbf5e11f16cb34e8d0e6ea521e0b92868f684e91677deb974553 +nox==2024.4.15 \ + --hash=sha256:6492236efa15a460ecb98e7b67562a28b70da006ab0be164e8821177577c0565 \ + --hash=sha256:ecf6700199cdfa9e5ea0a41ff5e6ef4641d09508eda6edb89d9987864115817f # via -r requirements.in -packaging==24.0 \ - --hash=sha256:2ddfb553fdf02fb784c234c7ba6ccc288296ceabec964ad2eae3777778130bc5 \ - --hash=sha256:eb82c5e3e56209074766e6885bb04b8c38a0c015d0a30036ebe7ece34c9989e9 +packaging==24.1 \ + --hash=sha256:026ed72c8ed3fcce5bf8950572258698927fd1dbda10a5e981cdf0ac37f4f002 \ + --hash=sha256:5b8f2217dbdbd2f7f384c41c628544e6d52f2d0f53c6d0c3ea61aa5d1d7ff124 # via nox -platformdirs==4.2.0 \ - --hash=sha256:0614df2a2f37e1a662acbd8e2b25b92ccf8632929bc6d43467e17fe89c75e068 \ - --hash=sha256:ef0cc731df711022c174543cb70a9b5bd22e5a9337c8624ef2c2ceb8ddad8768 +platformdirs==4.2.2 \ + --hash=sha256:2d7a1657e36a80ea911db832a8a6ece5ee53d8de21edd5cc5879af6530b1bfee \ + --hash=sha256:38b7b51f512eed9e84a22788b4bce1de17c0adb134d6becb09836e37d8654cd3 # via virtualenv -virtualenv==20.25.1 \ - --hash=sha256:961c026ac520bac5f69acb8ea063e8a4f071bcc9457b9c1f28f6b085c511583a \ - --hash=sha256:e08e13ecdca7a0bd53798f356d5831434afa5b07b93f0abdf0797b7a06ffe197 +tomli==2.0.1 \ + --hash=sha256:939de3e7a6161af0c887ef91b7d41a53e7c5a1ca976325f429cb46ea9bc30ecc \ + --hash=sha256:de526c12914f0c550d15924c62d72abc48d6fe7364aa87328337a31007fe8a4f + # via nox +virtualenv==20.26.3 \ + --hash=sha256:4c43a2a236279d9ea36a0d76f98d84bd6ca94ac4e0f4a3b9d46d05e10fea542a \ + --hash=sha256:8cc4a31139e796e9a7de2cd5cf2489de1217193116a8fd42328f1bd65f434589 # via nox diff --git a/packages/google-cloud-bigquery/.kokoro/populate-secrets.sh b/packages/google-cloud-bigquery/.kokoro/populate-secrets.sh index 6f3972140e80..c435402f473e 100755 --- a/packages/google-cloud-bigquery/.kokoro/populate-secrets.sh +++ b/packages/google-cloud-bigquery/.kokoro/populate-secrets.sh @@ -1,5 +1,5 @@ #!/bin/bash -# Copyright 2023 Google LLC. +# Copyright 2024 Google LLC. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/packages/google-cloud-bigquery/.kokoro/publish-docs.sh b/packages/google-cloud-bigquery/.kokoro/publish-docs.sh index 9eafe0be3bba..38f083f05aa0 100755 --- a/packages/google-cloud-bigquery/.kokoro/publish-docs.sh +++ b/packages/google-cloud-bigquery/.kokoro/publish-docs.sh @@ -1,5 +1,5 @@ #!/bin/bash -# Copyright 2023 Google LLC +# Copyright 2024 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/packages/google-cloud-bigquery/.kokoro/release.sh b/packages/google-cloud-bigquery/.kokoro/release.sh index 078fc1c20f1b..81cee716e943 100755 --- a/packages/google-cloud-bigquery/.kokoro/release.sh +++ b/packages/google-cloud-bigquery/.kokoro/release.sh @@ -1,5 +1,5 @@ #!/bin/bash -# Copyright 2023 Google LLC +# Copyright 2024 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/packages/google-cloud-bigquery/.kokoro/requirements.txt b/packages/google-cloud-bigquery/.kokoro/requirements.txt index 51f92b8e12f1..9622baf0ba38 100644 --- a/packages/google-cloud-bigquery/.kokoro/requirements.txt +++ b/packages/google-cloud-bigquery/.kokoro/requirements.txt @@ -4,21 +4,25 @@ # # pip-compile --allow-unsafe --generate-hashes requirements.in # -argcomplete==3.1.4 \ - --hash=sha256:72558ba729e4c468572609817226fb0a6e7e9a0a7d477b882be168c0b4a62b94 \ - --hash=sha256:fbe56f8cda08aa9a04b307d8482ea703e96a6a801611acb4be9bf3942017989f +argcomplete==3.4.0 \ + --hash=sha256:69a79e083a716173e5532e0fa3bef45f793f4e61096cf52b5a42c0211c8b8aa5 \ + --hash=sha256:c2abcdfe1be8ace47ba777d4fce319eb13bf8ad9dace8d085dcad6eded88057f # via nox -attrs==23.1.0 \ - --hash=sha256:1f28b4522cdc2fb4256ac1a020c78acf9cba2c6b461ccd2c126f3aa8e8335d04 \ - --hash=sha256:6279836d581513a26f1bf235f9acd333bc9115683f14f7e8fae46c98fc50e015 +attrs==23.2.0 \ + --hash=sha256:935dc3b529c262f6cf76e50877d35a4bd3c1de194fd41f47a2b7ae8f19971f30 \ + --hash=sha256:99b87a485a5820b23b879f04c2305b44b951b502fd64be915879d77a7e8fc6f1 # via gcp-releasetool -cachetools==5.3.2 \ - --hash=sha256:086ee420196f7b2ab9ca2db2520aca326318b68fe5ba8bc4d49cca91add450f2 \ - --hash=sha256:861f35a13a451f94e301ce2bec7cac63e881232ccce7ed67fab9b5df4d3beaa1 +backports-tarfile==1.2.0 \ + --hash=sha256:77e284d754527b01fb1e6fa8a1afe577858ebe4e9dad8919e34c862cb399bc34 \ + --hash=sha256:d75e02c268746e1b8144c278978b6e98e85de6ad16f8e4b0844a154557eca991 + # via jaraco-context +cachetools==5.3.3 \ + --hash=sha256:0abad1021d3f8325b2fc1d2e9c8b9c9d57b04c3932657a72465447332c24d945 \ + --hash=sha256:ba29e2dfa0b8b556606f097407ed1aa62080ee108ab0dc5ec9d6a723a007d105 # via google-auth -certifi==2023.7.22 \ - --hash=sha256:539cc1d13202e33ca466e88b2807e29f4c13049d6d87031a3c110744495cb082 \ - --hash=sha256:92d6037539857d8206b8f6ae472e8b77db8058fec5937a1ef3f54304089edbb9 +certifi==2024.7.4 \ + --hash=sha256:5a1e7645bc0ec61a09e26c36f6106dd4cf40c6db3a1fb6352b0244e7fb057c7b \ + --hash=sha256:c198e21b1289c2ab85ee4e67bb4b4ef3ead0892059901a8d5b622f24a1101e90 # via requests cffi==1.16.0 \ --hash=sha256:0c9ef6ff37e974b73c25eecc13952c55bceed9112be2d9d938ded8e856138bcc \ @@ -87,90 +91,90 @@ click==8.0.4 \ # -r requirements.in # gcp-docuploader # gcp-releasetool -colorlog==6.7.0 \ - --hash=sha256:0d33ca236784a1ba3ff9c532d4964126d8a2c44f1f0cb1d2b0728196f512f662 \ - --hash=sha256:bd94bd21c1e13fac7bd3153f4bc3a7dc0eb0974b8bc2fdf1a989e474f6e582e5 +colorlog==6.8.2 \ + --hash=sha256:3e3e079a41feb5a1b64f978b5ea4f46040a94f11f0e8bbb8261e3dbbeca64d44 \ + --hash=sha256:4dcbb62368e2800cb3c5abd348da7e53f6c362dda502ec27c560b2e58a66bd33 # via # gcp-docuploader # nox -cryptography==42.0.5 \ - --hash=sha256:0270572b8bd2c833c3981724b8ee9747b3ec96f699a9665470018594301439ee \ - --hash=sha256:111a0d8553afcf8eb02a4fea6ca4f59d48ddb34497aa8706a6cf536f1a5ec576 \ - --hash=sha256:16a48c23a62a2f4a285699dba2e4ff2d1cff3115b9df052cdd976a18856d8e3d \ - --hash=sha256:1b95b98b0d2af784078fa69f637135e3c317091b615cd0905f8b8a087e86fa30 \ - --hash=sha256:1f71c10d1e88467126f0efd484bd44bca5e14c664ec2ede64c32f20875c0d413 \ - --hash=sha256:2424ff4c4ac7f6b8177b53c17ed5d8fa74ae5955656867f5a8affaca36a27abb \ - --hash=sha256:2bce03af1ce5a5567ab89bd90d11e7bbdff56b8af3acbbec1faded8f44cb06da \ - --hash=sha256:329906dcc7b20ff3cad13c069a78124ed8247adcac44b10bea1130e36caae0b4 \ - --hash=sha256:37dd623507659e08be98eec89323469e8c7b4c1407c85112634ae3dbdb926fdd \ - --hash=sha256:3eaafe47ec0d0ffcc9349e1708be2aaea4c6dd4978d76bf6eb0cb2c13636c6fc \ - --hash=sha256:5e6275c09d2badf57aea3afa80d975444f4be8d3bc58f7f80d2a484c6f9485c8 \ - --hash=sha256:6fe07eec95dfd477eb9530aef5bead34fec819b3aaf6c5bd6d20565da607bfe1 \ - --hash=sha256:7367d7b2eca6513681127ebad53b2582911d1736dc2ffc19f2c3ae49997496bc \ - --hash=sha256:7cde5f38e614f55e28d831754e8a3bacf9ace5d1566235e39d91b35502d6936e \ - --hash=sha256:9481ffe3cf013b71b2428b905c4f7a9a4f76ec03065b05ff499bb5682a8d9ad8 \ - --hash=sha256:98d8dc6d012b82287f2c3d26ce1d2dd130ec200c8679b6213b3c73c08b2b7940 \ - --hash=sha256:a011a644f6d7d03736214d38832e030d8268bcff4a41f728e6030325fea3e400 \ - --hash=sha256:a2913c5375154b6ef2e91c10b5720ea6e21007412f6437504ffea2109b5a33d7 \ - --hash=sha256:a30596bae9403a342c978fb47d9b0ee277699fa53bbafad14706af51fe543d16 \ - --hash=sha256:b03c2ae5d2f0fc05f9a2c0c997e1bc18c8229f392234e8a0194f202169ccd278 \ - --hash=sha256:b6cd2203306b63e41acdf39aa93b86fb566049aeb6dc489b70e34bcd07adca74 \ - --hash=sha256:b7ffe927ee6531c78f81aa17e684e2ff617daeba7f189f911065b2ea2d526dec \ - --hash=sha256:b8cac287fafc4ad485b8a9b67d0ee80c66bf3574f655d3b97ef2e1082360faf1 \ - --hash=sha256:ba334e6e4b1d92442b75ddacc615c5476d4ad55cc29b15d590cc6b86efa487e2 \ - --hash=sha256:ba3e4a42397c25b7ff88cdec6e2a16c2be18720f317506ee25210f6d31925f9c \ - --hash=sha256:c41fb5e6a5fe9ebcd58ca3abfeb51dffb5d83d6775405305bfa8715b76521922 \ - --hash=sha256:cd2030f6650c089aeb304cf093f3244d34745ce0cfcc39f20c6fbfe030102e2a \ - --hash=sha256:cd65d75953847815962c84a4654a84850b2bb4aed3f26fadcc1c13892e1e29f6 \ - --hash=sha256:e4985a790f921508f36f81831817cbc03b102d643b5fcb81cd33df3fa291a1a1 \ - --hash=sha256:e807b3188f9eb0eaa7bbb579b462c5ace579f1cedb28107ce8b48a9f7ad3679e \ - --hash=sha256:f12764b8fffc7a123f641d7d049d382b73f96a34117e0b637b80643169cec8ac \ - --hash=sha256:f8837fe1d6ac4a8052a9a8ddab256bc006242696f03368a4009be7ee3075cdb7 +cryptography==42.0.8 \ + --hash=sha256:013629ae70b40af70c9a7a5db40abe5d9054e6f4380e50ce769947b73bf3caad \ + --hash=sha256:2346b911eb349ab547076f47f2e035fc8ff2c02380a7cbbf8d87114fa0f1c583 \ + --hash=sha256:2f66d9cd9147ee495a8374a45ca445819f8929a3efcd2e3df6428e46c3cbb10b \ + --hash=sha256:2f88d197e66c65be5e42cd72e5c18afbfae3f741742070e3019ac8f4ac57262c \ + --hash=sha256:31f721658a29331f895a5a54e7e82075554ccfb8b163a18719d342f5ffe5ecb1 \ + --hash=sha256:343728aac38decfdeecf55ecab3264b015be68fc2816ca800db649607aeee648 \ + --hash=sha256:5226d5d21ab681f432a9c1cf8b658c0cb02533eece706b155e5fbd8a0cdd3949 \ + --hash=sha256:57080dee41209e556a9a4ce60d229244f7a66ef52750f813bfbe18959770cfba \ + --hash=sha256:5a94eccb2a81a309806027e1670a358b99b8fe8bfe9f8d329f27d72c094dde8c \ + --hash=sha256:6b7c4f03ce01afd3b76cf69a5455caa9cfa3de8c8f493e0d3ab7d20611c8dae9 \ + --hash=sha256:7016f837e15b0a1c119d27ecd89b3515f01f90a8615ed5e9427e30d9cdbfed3d \ + --hash=sha256:81884c4d096c272f00aeb1f11cf62ccd39763581645b0812e99a91505fa48e0c \ + --hash=sha256:81d8a521705787afe7a18d5bfb47ea9d9cc068206270aad0b96a725022e18d2e \ + --hash=sha256:8d09d05439ce7baa8e9e95b07ec5b6c886f548deb7e0f69ef25f64b3bce842f2 \ + --hash=sha256:961e61cefdcb06e0c6d7e3a1b22ebe8b996eb2bf50614e89384be54c48c6b63d \ + --hash=sha256:9c0c1716c8447ee7dbf08d6db2e5c41c688544c61074b54fc4564196f55c25a7 \ + --hash=sha256:a0608251135d0e03111152e41f0cc2392d1e74e35703960d4190b2e0f4ca9c70 \ + --hash=sha256:a0c5b2b0585b6af82d7e385f55a8bc568abff8923af147ee3c07bd8b42cda8b2 \ + --hash=sha256:ad803773e9df0b92e0a817d22fd8a3675493f690b96130a5e24f1b8fabbea9c7 \ + --hash=sha256:b297f90c5723d04bcc8265fc2a0f86d4ea2e0f7ab4b6994459548d3a6b992a14 \ + --hash=sha256:ba4f0a211697362e89ad822e667d8d340b4d8d55fae72cdd619389fb5912eefe \ + --hash=sha256:c4783183f7cb757b73b2ae9aed6599b96338eb957233c58ca8f49a49cc32fd5e \ + --hash=sha256:c9bb2ae11bfbab395bdd072985abde58ea9860ed84e59dbc0463a5d0159f5b71 \ + --hash=sha256:cafb92b2bc622cd1aa6a1dce4b93307792633f4c5fe1f46c6b97cf67073ec961 \ + --hash=sha256:d45b940883a03e19e944456a558b67a41160e367a719833c53de6911cabba2b7 \ + --hash=sha256:dc0fdf6787f37b1c6b08e6dfc892d9d068b5bdb671198c72072828b80bd5fe4c \ + --hash=sha256:dea567d1b0e8bc5764b9443858b673b734100c2871dc93163f58c46a97a83d28 \ + --hash=sha256:dec9b018df185f08483f294cae6ccac29e7a6e0678996587363dc352dc65c842 \ + --hash=sha256:e3ec3672626e1b9e55afd0df6d774ff0e953452886e06e0f1eb7eb0c832e8902 \ + --hash=sha256:e599b53fd95357d92304510fb7bda8523ed1f79ca98dce2f43c115950aa78801 \ + --hash=sha256:fa76fbb7596cc5839320000cdd5d0955313696d9511debab7ee7278fc8b5c84a \ + --hash=sha256:fff12c88a672ab9c9c1cf7b0c80e3ad9e2ebd9d828d955c126be4fd3e5578c9e # via # -r requirements.in # gcp-releasetool # secretstorage -distlib==0.3.7 \ - --hash=sha256:2e24928bc811348f0feb63014e97aaae3037f2cf48712d51ae61df7fd6075057 \ - --hash=sha256:9dafe54b34a028eafd95039d5e5d4851a13734540f1331060d31c9916e7147a8 +distlib==0.3.8 \ + --hash=sha256:034db59a0b96f8ca18035f36290806a9a6e6bd9d1ff91e45a7f172eb17e51784 \ + --hash=sha256:1530ea13e350031b6312d8580ddb6b27a104275a31106523b8f123787f494f64 # via virtualenv -docutils==0.20.1 \ - --hash=sha256:96f387a2c5562db4476f09f13bbab2192e764cac08ebbf3a34a95d9b1e4a59d6 \ - --hash=sha256:f08a4e276c3a1583a86dce3e34aba3fe04d02bba2dd51ed16106244e8a923e3b +docutils==0.21.2 \ + --hash=sha256:3a6b18732edf182daa3cd12775bbb338cf5691468f91eeeb109deff6ebfa986f \ + --hash=sha256:dafca5b9e384f0e419294eb4d2ff9fa826435bf15f15b7bd45723e8ad76811b2 # via readme-renderer -filelock==3.13.1 \ - --hash=sha256:521f5f56c50f8426f5e03ad3b281b490a87ef15bc6c526f168290f0c7148d44e \ - --hash=sha256:57dbda9b35157b05fb3e58ee91448612eb674172fab98ee235ccb0b5bee19a1c +filelock==3.15.4 \ + --hash=sha256:2207938cbc1844345cb01a5a95524dae30f0ce089eba5b00378295a17e3e90cb \ + --hash=sha256:6ca1fffae96225dab4c6eaf1c4f4f28cd2568d3ec2a44e15a08520504de468e7 # via virtualenv gcp-docuploader==0.6.5 \ --hash=sha256:30221d4ac3e5a2b9c69aa52fdbef68cc3f27d0e6d0d90e220fc024584b8d2318 \ --hash=sha256:b7458ef93f605b9d46a4bf3a8dc1755dad1f31d030c8679edf304e343b347eea # via -r requirements.in -gcp-releasetool==2.0.0 \ - --hash=sha256:3d73480b50ba243f22d7c7ec08b115a30e1c7817c4899781840c26f9c55b8277 \ - --hash=sha256:7aa9fd935ec61e581eb8458ad00823786d91756c25e492f372b2b30962f3c28f +gcp-releasetool==2.0.1 \ + --hash=sha256:34314a910c08e8911d9c965bd44f8f2185c4f556e737d719c33a41f6a610de96 \ + --hash=sha256:b0d5863c6a070702b10883d37c4bdfd74bf930fe417f36c0c965d3b7c779ae62 # via -r requirements.in -google-api-core==2.12.0 \ - --hash=sha256:c22e01b1e3c4dcd90998494879612c38d0a3411d1f7b679eb89e2abe3ce1f553 \ - --hash=sha256:ec6054f7d64ad13b41e43d96f735acbd763b0f3b695dabaa2d579673f6a6e160 +google-api-core==2.19.1 \ + --hash=sha256:f12a9b8309b5e21d92483bbd47ce2c445861ec7d269ef6784ecc0ea8c1fa6125 \ + --hash=sha256:f4695f1e3650b316a795108a76a1c416e6afb036199d1c1f1f110916df479ffd # via # google-cloud-core # google-cloud-storage -google-auth==2.23.4 \ - --hash=sha256:79905d6b1652187def79d491d6e23d0cbb3a21d3c7ba0dbaa9c8a01906b13ff3 \ - --hash=sha256:d4bbc92fe4b8bfd2f3e8d88e5ba7085935da208ee38a134fc280e7ce682a05f2 +google-auth==2.31.0 \ + --hash=sha256:042c4702efa9f7d3c48d3a69341c209381b125faa6dbf3ebe56bc7e40ae05c23 \ + --hash=sha256:87805c36970047247c8afe614d4e3af8eceafc1ebba0c679fe75ddd1d575e871 # via # gcp-releasetool # google-api-core # google-cloud-core # google-cloud-storage -google-cloud-core==2.3.3 \ - --hash=sha256:37b80273c8d7eee1ae816b3a20ae43585ea50506cb0e60f3cf5be5f87f1373cb \ - --hash=sha256:fbd11cad3e98a7e5b0343dc07cb1039a5ffd7a5bb96e1f1e27cee4bda4a90863 +google-cloud-core==2.4.1 \ + --hash=sha256:9b7749272a812bde58fff28868d0c5e2f585b82f37e09a1f6ed2d4d10f134073 \ + --hash=sha256:a9e6a4422b9ac5c29f79a0ede9485473338e2ce78d91f2370c01e730eab22e61 # via google-cloud-storage -google-cloud-storage==2.13.0 \ - --hash=sha256:ab0bf2e1780a1b74cf17fccb13788070b729f50c252f0c94ada2aae0ca95437d \ - --hash=sha256:f62dc4c7b6cd4360d072e3deb28035fbdad491ac3d9b0b1815a12daea10f37c7 +google-cloud-storage==2.17.0 \ + --hash=sha256:49378abff54ef656b52dca5ef0f2eba9aa83dc2b2c72c78714b03a1a95fe9388 \ + --hash=sha256:5b393bc766b7a3bc6f5407b9e665b2450d36282614b7945e570b3480a456d1e1 # via gcp-docuploader google-crc32c==1.5.0 \ --hash=sha256:024894d9d3cfbc5943f8f230e23950cd4906b2fe004c72e29b209420a1e6b05a \ @@ -244,28 +248,36 @@ google-crc32c==1.5.0 \ # via # google-cloud-storage # google-resumable-media -google-resumable-media==2.6.0 \ - --hash=sha256:972852f6c65f933e15a4a210c2b96930763b47197cdf4aa5f5bea435efb626e7 \ - --hash=sha256:fc03d344381970f79eebb632a3c18bb1828593a2dc5572b5f90115ef7d11e81b +google-resumable-media==2.7.1 \ + --hash=sha256:103ebc4ba331ab1bfdac0250f8033627a2cd7cde09e7ccff9181e31ba4315b2c \ + --hash=sha256:eae451a7b2e2cdbaaa0fd2eb00cc8a1ee5e95e16b55597359cbc3d27d7d90e33 # via google-cloud-storage -googleapis-common-protos==1.61.0 \ - --hash=sha256:22f1915393bb3245343f6efe87f6fe868532efc12aa26b391b15132e1279f1c0 \ - --hash=sha256:8a64866a97f6304a7179873a465d6eee97b7a24ec6cfd78e0f575e96b821240b +googleapis-common-protos==1.63.2 \ + --hash=sha256:27a2499c7e8aff199665b22741997e485eccc8645aa9176c7c988e6fae507945 \ + --hash=sha256:27c5abdffc4911f28101e635de1533fb4cfd2c37fbaa9174587c799fac90aa87 # via google-api-core idna==3.7 \ --hash=sha256:028ff3aadf0609c1fd278d8ea3089299412a7a8b9bd005dd08b9f8285bcb5cfc \ --hash=sha256:82fee1fc78add43492d3a1898bfa6d8a904cc97d8427f683ed8e798d07761aa0 # via requests -importlib-metadata==6.8.0 \ - --hash=sha256:3ebb78df84a805d7698245025b975d9d67053cd94c79245ba4b3eb694abe68bb \ - --hash=sha256:dbace7892d8c0c4ac1ad096662232f831d4e64f4c4545bd53016a3e9d4654743 +importlib-metadata==8.0.0 \ + --hash=sha256:15584cf2b1bf449d98ff8a6ff1abef57bf20f3ac6454f431736cd3e660921b2f \ + --hash=sha256:188bd24e4c346d3f0a933f275c2fec67050326a856b9a359881d7c2a697e8812 # via # -r requirements.in # keyring # twine -jaraco-classes==3.3.0 \ - --hash=sha256:10afa92b6743f25c0cf5f37c6bb6e18e2c5bb84a16527ccfc0040ea377e7aaeb \ - --hash=sha256:c063dd08e89217cee02c8d5e5ec560f2c8ce6cdc2fcdc2e68f7b2e5547ed3621 +jaraco-classes==3.4.0 \ + --hash=sha256:47a024b51d0239c0dd8c8540c6c7f484be3b8fcf0b2d85c13825780d3b3f3acd \ + --hash=sha256:f662826b6bed8cace05e7ff873ce0f9283b5c924470fe664fff1c2f00f581790 + # via keyring +jaraco-context==5.3.0 \ + --hash=sha256:3e16388f7da43d384a1a7cd3452e72e14732ac9fe459678773a3608a812bf266 \ + --hash=sha256:c2f67165ce1f9be20f32f650f25d8edfc1646a8aeee48ae06fb35f90763576d2 + # via keyring +jaraco-functools==4.0.1 \ + --hash=sha256:3b24ccb921d6b593bdceb56ce14799204f473976e2a9d4b15b04d0f2c2326664 \ + --hash=sha256:d33fa765374c0611b52f8b3a795f8900869aa88c84769d4d1746cd68fb28c3e8 # via keyring jeepney==0.8.0 \ --hash=sha256:5efe48d255973902f6badc3ce55e2aa6c5c3b3bc642059ef3a91247bcfcc5806 \ @@ -273,13 +285,13 @@ jeepney==0.8.0 \ # via # keyring # secretstorage -jinja2==3.1.3 \ - --hash=sha256:7d6d50dd97d52cbc355597bd845fabfbac3f551e1f99619e39a35ce8c370b5fa \ - --hash=sha256:ac8bd6544d4bb2c9792bf3a159e80bba8fda7f07e81bc3aed565432d5925ba90 +jinja2==3.1.4 \ + --hash=sha256:4a3aee7acbbe7303aede8e9648d13b8bf88a429282aa6122a993f0ac800cb369 \ + --hash=sha256:bc5dd2abb727a5319567b7a813e6a2e7318c39f4f487cfe6c89c6f9c7d25197d # via gcp-releasetool -keyring==24.2.0 \ - --hash=sha256:4901caaf597bfd3bbd78c9a0c7c4c29fcd8310dab2cffefe749e916b6527acd6 \ - --hash=sha256:ca0746a19ec421219f4d713f848fa297a661a8a8c1504867e55bfb5e09091509 +keyring==25.2.1 \ + --hash=sha256:2458681cdefc0dbc0b7eb6cf75d0b98e59f9ad9b2d4edd319d18f68bdca95e50 \ + --hash=sha256:daaffd42dbda25ddafb1ad5fec4024e5bbcfe424597ca1ca452b299861e49f1b # via # gcp-releasetool # twine @@ -287,146 +299,153 @@ markdown-it-py==3.0.0 \ --hash=sha256:355216845c60bd96232cd8d8c40e8f9765cc86f46880e43a8fd22dc1a1a8cab1 \ --hash=sha256:e3f60a94fa066dc52ec76661e37c851cb232d92f9886b15cb560aaada2df8feb # via rich -markupsafe==2.1.3 \ - --hash=sha256:05fb21170423db021895e1ea1e1f3ab3adb85d1c2333cbc2310f2a26bc77272e \ - --hash=sha256:0a4e4a1aff6c7ac4cd55792abf96c915634c2b97e3cc1c7129578aa68ebd754e \ - --hash=sha256:10bbfe99883db80bdbaff2dcf681dfc6533a614f700da1287707e8a5d78a8431 \ - --hash=sha256:134da1eca9ec0ae528110ccc9e48041e0828d79f24121a1a146161103c76e686 \ - --hash=sha256:14ff806850827afd6b07a5f32bd917fb7f45b046ba40c57abdb636674a8b559c \ - --hash=sha256:1577735524cdad32f9f694208aa75e422adba74f1baee7551620e43a3141f559 \ - --hash=sha256:1b40069d487e7edb2676d3fbdb2b0829ffa2cd63a2ec26c4938b2d34391b4ecc \ - --hash=sha256:1b8dd8c3fd14349433c79fa8abeb573a55fc0fdd769133baac1f5e07abf54aeb \ - --hash=sha256:1f67c7038d560d92149c060157d623c542173016c4babc0c1913cca0564b9939 \ - --hash=sha256:282c2cb35b5b673bbcadb33a585408104df04f14b2d9b01d4c345a3b92861c2c \ - --hash=sha256:2c1b19b3aaacc6e57b7e25710ff571c24d6c3613a45e905b1fde04d691b98ee0 \ - --hash=sha256:2ef12179d3a291be237280175b542c07a36e7f60718296278d8593d21ca937d4 \ - --hash=sha256:338ae27d6b8745585f87218a3f23f1512dbf52c26c28e322dbe54bcede54ccb9 \ - --hash=sha256:3c0fae6c3be832a0a0473ac912810b2877c8cb9d76ca48de1ed31e1c68386575 \ - --hash=sha256:3fd4abcb888d15a94f32b75d8fd18ee162ca0c064f35b11134be77050296d6ba \ - --hash=sha256:42de32b22b6b804f42c5d98be4f7e5e977ecdd9ee9b660fda1a3edf03b11792d \ - --hash=sha256:47d4f1c5f80fc62fdd7777d0d40a2e9dda0a05883ab11374334f6c4de38adffd \ - --hash=sha256:504b320cd4b7eff6f968eddf81127112db685e81f7e36e75f9f84f0df46041c3 \ - --hash=sha256:525808b8019e36eb524b8c68acdd63a37e75714eac50e988180b169d64480a00 \ - --hash=sha256:56d9f2ecac662ca1611d183feb03a3fa4406469dafe241673d521dd5ae92a155 \ - --hash=sha256:5bbe06f8eeafd38e5d0a4894ffec89378b6c6a625ff57e3028921f8ff59318ac \ - --hash=sha256:65c1a9bcdadc6c28eecee2c119465aebff8f7a584dd719facdd9e825ec61ab52 \ - --hash=sha256:68e78619a61ecf91e76aa3e6e8e33fc4894a2bebe93410754bd28fce0a8a4f9f \ - --hash=sha256:69c0f17e9f5a7afdf2cc9fb2d1ce6aabdb3bafb7f38017c0b77862bcec2bbad8 \ - --hash=sha256:6b2b56950d93e41f33b4223ead100ea0fe11f8e6ee5f641eb753ce4b77a7042b \ - --hash=sha256:715d3562f79d540f251b99ebd6d8baa547118974341db04f5ad06d5ea3eb8007 \ - --hash=sha256:787003c0ddb00500e49a10f2844fac87aa6ce977b90b0feaaf9de23c22508b24 \ - --hash=sha256:7ef3cb2ebbf91e330e3bb937efada0edd9003683db6b57bb108c4001f37a02ea \ - --hash=sha256:8023faf4e01efadfa183e863fefde0046de576c6f14659e8782065bcece22198 \ - --hash=sha256:8758846a7e80910096950b67071243da3e5a20ed2546e6392603c096778d48e0 \ - --hash=sha256:8afafd99945ead6e075b973fefa56379c5b5c53fd8937dad92c662da5d8fd5ee \ - --hash=sha256:8c41976a29d078bb235fea9b2ecd3da465df42a562910f9022f1a03107bd02be \ - --hash=sha256:8e254ae696c88d98da6555f5ace2279cf7cd5b3f52be2b5cf97feafe883b58d2 \ - --hash=sha256:8f9293864fe09b8149f0cc42ce56e3f0e54de883a9de90cd427f191c346eb2e1 \ - --hash=sha256:9402b03f1a1b4dc4c19845e5c749e3ab82d5078d16a2a4c2cd2df62d57bb0707 \ - --hash=sha256:962f82a3086483f5e5f64dbad880d31038b698494799b097bc59c2edf392fce6 \ - --hash=sha256:9aad3c1755095ce347e26488214ef77e0485a3c34a50c5a5e2471dff60b9dd9c \ - --hash=sha256:9dcdfd0eaf283af041973bff14a2e143b8bd64e069f4c383416ecd79a81aab58 \ - --hash=sha256:aa57bd9cf8ae831a362185ee444e15a93ecb2e344c8e52e4d721ea3ab6ef1823 \ - --hash=sha256:aa7bd130efab1c280bed0f45501b7c8795f9fdbeb02e965371bbef3523627779 \ - --hash=sha256:ab4a0df41e7c16a1392727727e7998a467472d0ad65f3ad5e6e765015df08636 \ - --hash=sha256:ad9e82fb8f09ade1c3e1b996a6337afac2b8b9e365f926f5a61aacc71adc5b3c \ - --hash=sha256:af598ed32d6ae86f1b747b82783958b1a4ab8f617b06fe68795c7f026abbdcad \ - --hash=sha256:b076b6226fb84157e3f7c971a47ff3a679d837cf338547532ab866c57930dbee \ - --hash=sha256:b7ff0f54cb4ff66dd38bebd335a38e2c22c41a8ee45aa608efc890ac3e3931bc \ - --hash=sha256:bfce63a9e7834b12b87c64d6b155fdd9b3b96191b6bd334bf37db7ff1fe457f2 \ - --hash=sha256:c011a4149cfbcf9f03994ec2edffcb8b1dc2d2aede7ca243746df97a5d41ce48 \ - --hash=sha256:c9c804664ebe8f83a211cace637506669e7890fec1b4195b505c214e50dd4eb7 \ - --hash=sha256:ca379055a47383d02a5400cb0d110cef0a776fc644cda797db0c5696cfd7e18e \ - --hash=sha256:cb0932dc158471523c9637e807d9bfb93e06a95cbf010f1a38b98623b929ef2b \ - --hash=sha256:cd0f502fe016460680cd20aaa5a76d241d6f35a1c3350c474bac1273803893fa \ - --hash=sha256:ceb01949af7121f9fc39f7d27f91be8546f3fb112c608bc4029aef0bab86a2a5 \ - --hash=sha256:d080e0a5eb2529460b30190fcfcc4199bd7f827663f858a226a81bc27beaa97e \ - --hash=sha256:dd15ff04ffd7e05ffcb7fe79f1b98041b8ea30ae9234aed2a9168b5797c3effb \ - --hash=sha256:df0be2b576a7abbf737b1575f048c23fb1d769f267ec4358296f31c2479db8f9 \ - --hash=sha256:e09031c87a1e51556fdcb46e5bd4f59dfb743061cf93c4d6831bf894f125eb57 \ - --hash=sha256:e4dd52d80b8c83fdce44e12478ad2e85c64ea965e75d66dbeafb0a3e77308fcc \ - --hash=sha256:f698de3fd0c4e6972b92290a45bd9b1536bffe8c6759c62471efaa8acb4c37bc \ - --hash=sha256:fec21693218efe39aa7f8599346e90c705afa52c5b31ae019b2e57e8f6542bb2 \ - --hash=sha256:ffcc3f7c66b5f5b7931a5aa68fc9cecc51e685ef90282f4a82f0f5e9b704ad11 +markupsafe==2.1.5 \ + --hash=sha256:00e046b6dd71aa03a41079792f8473dc494d564611a8f89bbbd7cb93295ebdcf \ + --hash=sha256:075202fa5b72c86ad32dc7d0b56024ebdbcf2048c0ba09f1cde31bfdd57bcfff \ + --hash=sha256:0e397ac966fdf721b2c528cf028494e86172b4feba51d65f81ffd65c63798f3f \ + --hash=sha256:17b950fccb810b3293638215058e432159d2b71005c74371d784862b7e4683f3 \ + --hash=sha256:1f3fbcb7ef1f16e48246f704ab79d79da8a46891e2da03f8783a5b6fa41a9532 \ + --hash=sha256:2174c595a0d73a3080ca3257b40096db99799265e1c27cc5a610743acd86d62f \ + --hash=sha256:2b7c57a4dfc4f16f7142221afe5ba4e093e09e728ca65c51f5620c9aaeb9a617 \ + --hash=sha256:2d2d793e36e230fd32babe143b04cec8a8b3eb8a3122d2aceb4a371e6b09b8df \ + --hash=sha256:30b600cf0a7ac9234b2638fbc0fb6158ba5bdcdf46aeb631ead21248b9affbc4 \ + --hash=sha256:397081c1a0bfb5124355710fe79478cdbeb39626492b15d399526ae53422b906 \ + --hash=sha256:3a57fdd7ce31c7ff06cdfbf31dafa96cc533c21e443d57f5b1ecc6cdc668ec7f \ + --hash=sha256:3c6b973f22eb18a789b1460b4b91bf04ae3f0c4234a0a6aa6b0a92f6f7b951d4 \ + --hash=sha256:3e53af139f8579a6d5f7b76549125f0d94d7e630761a2111bc431fd820e163b8 \ + --hash=sha256:4096e9de5c6fdf43fb4f04c26fb114f61ef0bf2e5604b6ee3019d51b69e8c371 \ + --hash=sha256:4275d846e41ecefa46e2015117a9f491e57a71ddd59bbead77e904dc02b1bed2 \ + --hash=sha256:4c31f53cdae6ecfa91a77820e8b151dba54ab528ba65dfd235c80b086d68a465 \ + --hash=sha256:4f11aa001c540f62c6166c7726f71f7573b52c68c31f014c25cc7901deea0b52 \ + --hash=sha256:5049256f536511ee3f7e1b3f87d1d1209d327e818e6ae1365e8653d7e3abb6a6 \ + --hash=sha256:58c98fee265677f63a4385256a6d7683ab1832f3ddd1e66fe948d5880c21a169 \ + --hash=sha256:598e3276b64aff0e7b3451b72e94fa3c238d452e7ddcd893c3ab324717456bad \ + --hash=sha256:5b7b716f97b52c5a14bffdf688f971b2d5ef4029127f1ad7a513973cfd818df2 \ + --hash=sha256:5dedb4db619ba5a2787a94d877bc8ffc0566f92a01c0ef214865e54ecc9ee5e0 \ + --hash=sha256:619bc166c4f2de5caa5a633b8b7326fbe98e0ccbfacabd87268a2b15ff73a029 \ + --hash=sha256:629ddd2ca402ae6dbedfceeba9c46d5f7b2a61d9749597d4307f943ef198fc1f \ + --hash=sha256:656f7526c69fac7f600bd1f400991cc282b417d17539a1b228617081106feb4a \ + --hash=sha256:6ec585f69cec0aa07d945b20805be741395e28ac1627333b1c5b0105962ffced \ + --hash=sha256:72b6be590cc35924b02c78ef34b467da4ba07e4e0f0454a2c5907f473fc50ce5 \ + --hash=sha256:7502934a33b54030eaf1194c21c692a534196063db72176b0c4028e140f8f32c \ + --hash=sha256:7a68b554d356a91cce1236aa7682dc01df0edba8d043fd1ce607c49dd3c1edcf \ + --hash=sha256:7b2e5a267c855eea6b4283940daa6e88a285f5f2a67f2220203786dfa59b37e9 \ + --hash=sha256:823b65d8706e32ad2df51ed89496147a42a2a6e01c13cfb6ffb8b1e92bc910bb \ + --hash=sha256:8590b4ae07a35970728874632fed7bd57b26b0102df2d2b233b6d9d82f6c62ad \ + --hash=sha256:8dd717634f5a044f860435c1d8c16a270ddf0ef8588d4887037c5028b859b0c3 \ + --hash=sha256:8dec4936e9c3100156f8a2dc89c4b88d5c435175ff03413b443469c7c8c5f4d1 \ + --hash=sha256:97cafb1f3cbcd3fd2b6fbfb99ae11cdb14deea0736fc2b0952ee177f2b813a46 \ + --hash=sha256:a17a92de5231666cfbe003f0e4b9b3a7ae3afb1ec2845aadc2bacc93ff85febc \ + --hash=sha256:a549b9c31bec33820e885335b451286e2969a2d9e24879f83fe904a5ce59d70a \ + --hash=sha256:ac07bad82163452a6884fe8fa0963fb98c2346ba78d779ec06bd7a6262132aee \ + --hash=sha256:ae2ad8ae6ebee9d2d94b17fb62763125f3f374c25618198f40cbb8b525411900 \ + --hash=sha256:b91c037585eba9095565a3556f611e3cbfaa42ca1e865f7b8015fe5c7336d5a5 \ + --hash=sha256:bc1667f8b83f48511b94671e0e441401371dfd0f0a795c7daa4a3cd1dde55bea \ + --hash=sha256:bec0a414d016ac1a18862a519e54b2fd0fc8bbfd6890376898a6c0891dd82e9f \ + --hash=sha256:bf50cd79a75d181c9181df03572cdce0fbb75cc353bc350712073108cba98de5 \ + --hash=sha256:bff1b4290a66b490a2f4719358c0cdcd9bafb6b8f061e45c7a2460866bf50c2e \ + --hash=sha256:c061bb86a71b42465156a3ee7bd58c8c2ceacdbeb95d05a99893e08b8467359a \ + --hash=sha256:c8b29db45f8fe46ad280a7294f5c3ec36dbac9491f2d1c17345be8e69cc5928f \ + --hash=sha256:ce409136744f6521e39fd8e2a24c53fa18ad67aa5bc7c2cf83645cce5b5c4e50 \ + --hash=sha256:d050b3361367a06d752db6ead6e7edeb0009be66bc3bae0ee9d97fb326badc2a \ + --hash=sha256:d283d37a890ba4c1ae73ffadf8046435c76e7bc2247bbb63c00bd1a709c6544b \ + --hash=sha256:d9fad5155d72433c921b782e58892377c44bd6252b5af2f67f16b194987338a4 \ + --hash=sha256:daa4ee5a243f0f20d528d939d06670a298dd39b1ad5f8a72a4275124a7819eff \ + --hash=sha256:db0b55e0f3cc0be60c1f19efdde9a637c32740486004f20d1cff53c3c0ece4d2 \ + --hash=sha256:e61659ba32cf2cf1481e575d0462554625196a1f2fc06a1c777d3f48e8865d46 \ + --hash=sha256:ea3d8a3d18833cf4304cd2fc9cbb1efe188ca9b5efef2bdac7adc20594a0e46b \ + --hash=sha256:ec6a563cff360b50eed26f13adc43e61bc0c04d94b8be985e6fb24b81f6dcfdf \ + --hash=sha256:f5dfb42c4604dddc8e4305050aa6deb084540643ed5804d7455b5df8fe16f5e5 \ + --hash=sha256:fa173ec60341d6bb97a89f5ea19c85c5643c1e7dedebc22f5181eb73573142c5 \ + --hash=sha256:fa9db3f79de01457b03d4f01b34cf91bc0048eb2c3846ff26f66687c2f6d16ab \ + --hash=sha256:fce659a462a1be54d2ffcacea5e3ba2d74daa74f30f5f143fe0c58636e355fdd \ + --hash=sha256:ffee1f21e5ef0d712f9033568f8344d5da8cc2869dbd08d87c84656e6a2d2f68 # via jinja2 mdurl==0.1.2 \ --hash=sha256:84008a41e51615a49fc9966191ff91509e3c40b939176e643fd50a5c2196b8f8 \ --hash=sha256:bb413d29f5eea38f31dd4754dd7377d4465116fb207585f97bf925588687c1ba # via markdown-it-py -more-itertools==10.1.0 \ - --hash=sha256:626c369fa0eb37bac0291bce8259b332fd59ac792fa5497b59837309cd5b114a \ - --hash=sha256:64e0735fcfdc6f3464ea133afe8ea4483b1c5fe3a3d69852e6503b43a0b222e6 - # via jaraco-classes -nh3==0.2.14 \ - --hash=sha256:116c9515937f94f0057ef50ebcbcc10600860065953ba56f14473ff706371873 \ - --hash=sha256:18415df36db9b001f71a42a3a5395db79cf23d556996090d293764436e98e8ad \ - --hash=sha256:203cac86e313cf6486704d0ec620a992c8bc164c86d3a4fd3d761dd552d839b5 \ - --hash=sha256:2b0be5c792bd43d0abef8ca39dd8acb3c0611052ce466d0401d51ea0d9aa7525 \ - --hash=sha256:377aaf6a9e7c63962f367158d808c6a1344e2b4f83d071c43fbd631b75c4f0b2 \ - --hash=sha256:525846c56c2bcd376f5eaee76063ebf33cf1e620c1498b2a40107f60cfc6054e \ - --hash=sha256:5529a3bf99402c34056576d80ae5547123f1078da76aa99e8ed79e44fa67282d \ - --hash=sha256:7771d43222b639a4cd9e341f870cee336b9d886de1ad9bec8dddab22fe1de450 \ - --hash=sha256:88c753efbcdfc2644a5012938c6b9753f1c64a5723a67f0301ca43e7b85dcf0e \ - --hash=sha256:93a943cfd3e33bd03f77b97baa11990148687877b74193bf777956b67054dcc6 \ - --hash=sha256:9be2f68fb9a40d8440cbf34cbf40758aa7f6093160bfc7fb018cce8e424f0c3a \ - --hash=sha256:a0c509894fd4dccdff557068e5074999ae3b75f4c5a2d6fb5415e782e25679c4 \ - --hash=sha256:ac8056e937f264995a82bf0053ca898a1cb1c9efc7cd68fa07fe0060734df7e4 \ - --hash=sha256:aed56a86daa43966dd790ba86d4b810b219f75b4bb737461b6886ce2bde38fd6 \ - --hash=sha256:e8986f1dd3221d1e741fda0a12eaa4a273f1d80a35e31a1ffe579e7c621d069e \ - --hash=sha256:f99212a81c62b5f22f9e7c3e347aa00491114a5647e1f13bbebd79c3e5f08d75 +more-itertools==10.3.0 \ + --hash=sha256:e5d93ef411224fbcef366a6e8ddc4c5781bc6359d43412a65dd5964e46111463 \ + --hash=sha256:ea6a02e24a9161e51faad17a8782b92a0df82c12c1c8886fec7f0c3fa1a1b320 + # via + # jaraco-classes + # jaraco-functools +nh3==0.2.18 \ + --hash=sha256:0411beb0589eacb6734f28d5497ca2ed379eafab8ad8c84b31bb5c34072b7164 \ + --hash=sha256:14c5a72e9fe82aea5fe3072116ad4661af5cf8e8ff8fc5ad3450f123e4925e86 \ + --hash=sha256:19aaba96e0f795bd0a6c56291495ff59364f4300d4a39b29a0abc9cb3774a84b \ + --hash=sha256:34c03fa78e328c691f982b7c03d4423bdfd7da69cd707fe572f544cf74ac23ad \ + --hash=sha256:36c95d4b70530b320b365659bb5034341316e6a9b30f0b25fa9c9eff4c27a204 \ + --hash=sha256:3a157ab149e591bb638a55c8c6bcb8cdb559c8b12c13a8affaba6cedfe51713a \ + --hash=sha256:42c64511469005058cd17cc1537578eac40ae9f7200bedcfd1fc1a05f4f8c200 \ + --hash=sha256:5f36b271dae35c465ef5e9090e1fdaba4a60a56f0bb0ba03e0932a66f28b9189 \ + --hash=sha256:6955369e4d9f48f41e3f238a9e60f9410645db7e07435e62c6a9ea6135a4907f \ + --hash=sha256:7b7c2a3c9eb1a827d42539aa64091640bd275b81e097cd1d8d82ef91ffa2e811 \ + --hash=sha256:8ce0f819d2f1933953fca255db2471ad58184a60508f03e6285e5114b6254844 \ + --hash=sha256:94a166927e53972a9698af9542ace4e38b9de50c34352b962f4d9a7d4c927af4 \ + --hash=sha256:a7f1b5b2c15866f2db413a3649a8fe4fd7b428ae58be2c0f6bca5eefd53ca2be \ + --hash=sha256:c8b3a1cebcba9b3669ed1a84cc65bf005728d2f0bc1ed2a6594a992e817f3a50 \ + --hash=sha256:de3ceed6e661954871d6cd78b410213bdcb136f79aafe22aa7182e028b8c7307 \ + --hash=sha256:f0eca9ca8628dbb4e916ae2491d72957fdd35f7a5d326b7032a345f111ac07fe # via readme-renderer -nox==2023.4.22 \ - --hash=sha256:0b1adc619c58ab4fa57d6ab2e7823fe47a32e70202f287d78474adcc7bda1891 \ - --hash=sha256:46c0560b0dc609d7d967dc99e22cb463d3c4caf54a5fda735d6c11b5177e3a9f +nox==2024.4.15 \ + --hash=sha256:6492236efa15a460ecb98e7b67562a28b70da006ab0be164e8821177577c0565 \ + --hash=sha256:ecf6700199cdfa9e5ea0a41ff5e6ef4641d09508eda6edb89d9987864115817f # via -r requirements.in -packaging==23.2 \ - --hash=sha256:048fb0e9405036518eaaf48a55953c750c11e1a1b68e0dd1a9d62ed0c092cfc5 \ - --hash=sha256:8c491190033a9af7e1d931d0b5dacc2ef47509b34dd0de67ed209b5203fc88c7 +packaging==24.1 \ + --hash=sha256:026ed72c8ed3fcce5bf8950572258698927fd1dbda10a5e981cdf0ac37f4f002 \ + --hash=sha256:5b8f2217dbdbd2f7f384c41c628544e6d52f2d0f53c6d0c3ea61aa5d1d7ff124 # via # gcp-releasetool # nox -pkginfo==1.9.6 \ - --hash=sha256:4b7a555a6d5a22169fcc9cf7bfd78d296b0361adad412a346c1226849af5e546 \ - --hash=sha256:8fd5896e8718a4372f0ea9cc9d96f6417c9b986e23a4d116dda26b62cc29d046 +pkginfo==1.10.0 \ + --hash=sha256:5df73835398d10db79f8eecd5cd86b1f6d29317589ea70796994d49399af6297 \ + --hash=sha256:889a6da2ed7ffc58ab5b900d888ddce90bce912f2d2de1dc1c26f4cb9fe65097 # via twine -platformdirs==3.11.0 \ - --hash=sha256:cf8ee52a3afdb965072dcc652433e0c7e3e40cf5ea1477cd4b3b1d2eb75495b3 \ - --hash=sha256:e9d171d00af68be50e9202731309c4e658fd8bc76f55c11c7dd760d023bda68e +platformdirs==4.2.2 \ + --hash=sha256:2d7a1657e36a80ea911db832a8a6ece5ee53d8de21edd5cc5879af6530b1bfee \ + --hash=sha256:38b7b51f512eed9e84a22788b4bce1de17c0adb134d6becb09836e37d8654cd3 # via virtualenv -protobuf==4.25.3 \ - --hash=sha256:19b270aeaa0099f16d3ca02628546b8baefe2955bbe23224aaf856134eccf1e4 \ - --hash=sha256:209ba4cc916bab46f64e56b85b090607a676f66b473e6b762e6f1d9d591eb2e8 \ - --hash=sha256:25b5d0b42fd000320bd7830b349e3b696435f3b329810427a6bcce6a5492cc5c \ - --hash=sha256:7c8daa26095f82482307bc717364e7c13f4f1c99659be82890dcfc215194554d \ - --hash=sha256:c053062984e61144385022e53678fbded7aea14ebb3e0305ae3592fb219ccfa4 \ - --hash=sha256:d4198877797a83cbfe9bffa3803602bbe1625dc30d8a097365dbc762e5790faa \ - --hash=sha256:e3c97a1555fd6388f857770ff8b9703083de6bf1f9274a002a332d65fbb56c8c \ - --hash=sha256:e7cb0ae90dd83727f0c0718634ed56837bfeeee29a5f82a7514c03ee1364c019 \ - --hash=sha256:f0700d54bcf45424477e46a9f0944155b46fb0639d69728739c0e47bab83f2b9 \ - --hash=sha256:f1279ab38ecbfae7e456a108c5c0681e4956d5b1090027c1de0f934dfdb4b35c \ - --hash=sha256:f4f118245c4a087776e0a8408be33cf09f6c547442c00395fbfb116fac2f8ac2 +proto-plus==1.24.0 \ + --hash=sha256:30b72a5ecafe4406b0d339db35b56c4059064e69227b8c3bda7462397f966445 \ + --hash=sha256:402576830425e5f6ce4c2a6702400ac79897dab0b4343821aa5188b0fab81a12 + # via google-api-core +protobuf==5.27.2 \ + --hash=sha256:0e341109c609749d501986b835f667c6e1e24531096cff9d34ae411595e26505 \ + --hash=sha256:176c12b1f1c880bf7a76d9f7c75822b6a2bc3db2d28baa4d300e8ce4cde7409b \ + --hash=sha256:354d84fac2b0d76062e9b3221f4abbbacdfd2a4d8af36bab0474f3a0bb30ab38 \ + --hash=sha256:4fadd8d83e1992eed0248bc50a4a6361dc31bcccc84388c54c86e530b7f58863 \ + --hash=sha256:54330f07e4949d09614707c48b06d1a22f8ffb5763c159efd5c0928326a91470 \ + --hash=sha256:610e700f02469c4a997e58e328cac6f305f649826853813177e6290416e846c6 \ + --hash=sha256:7fc3add9e6003e026da5fc9e59b131b8f22b428b991ccd53e2af8071687b4fce \ + --hash=sha256:9e8f199bf7f97bd7ecebffcae45ebf9527603549b2b562df0fbc6d4d688f14ca \ + --hash=sha256:a109916aaac42bff84702fb5187f3edadbc7c97fc2c99c5ff81dd15dcce0d1e5 \ + --hash=sha256:b848dbe1d57ed7c191dfc4ea64b8b004a3f9ece4bf4d0d80a367b76df20bf36e \ + --hash=sha256:f3ecdef226b9af856075f28227ff2c90ce3a594d092c39bee5513573f25e2714 # via # gcp-docuploader # gcp-releasetool # google-api-core # googleapis-common-protos -pyasn1==0.5.0 \ - --hash=sha256:87a2121042a1ac9358cabcaf1d07680ff97ee6404333bacca15f76aa8ad01a57 \ - --hash=sha256:97b7290ca68e62a832558ec3976f15cbf911bf5d7c7039d8b861c2a0ece69fde + # proto-plus +pyasn1==0.6.0 \ + --hash=sha256:3a35ab2c4b5ef98e17dfdec8ab074046fbda76e281c5a706ccd82328cfc8f64c \ + --hash=sha256:cca4bb0f2df5504f02f6f8a775b6e416ff9b0b3b16f7ee80b5a3153d9b804473 # via # pyasn1-modules # rsa -pyasn1-modules==0.3.0 \ - --hash=sha256:5bd01446b736eb9d31512a30d46c1ac3395d676c6f3cafa4c03eb54b9925631c \ - --hash=sha256:d3ccd6ed470d9ffbc716be08bd90efbd44d0734bc9303818f7336070984a162d +pyasn1-modules==0.4.0 \ + --hash=sha256:831dbcea1b177b28c9baddf4c6d1013c24c3accd14a1873fffaa6a2e905f17b6 \ + --hash=sha256:be04f15b66c206eed667e0bb5ab27e2b1855ea54a842e5037738099e8ca4ae0b # via google-auth -pycparser==2.21 \ - --hash=sha256:8ee45429555515e1f6b185e78100aea234072576aa43ab53aefcae078162fca9 \ - --hash=sha256:e644fdec12f7872f86c58ff790da456218b10f863970249516d60a5eaca77206 +pycparser==2.22 \ + --hash=sha256:491c8be9c040f5390f5bf44a5b07752bd07f56edf992381b05c701439eec10f6 \ + --hash=sha256:c3702b6d3dd8c7abc1afa565d7e63d53a1d0bd86cdc24edd75470f4de499cfcc # via cffi -pygments==2.16.1 \ - --hash=sha256:13fc09fa63bc8d8671a6d247e1eb303c4b343eaee81d861f3404db2935653692 \ - --hash=sha256:1daff0494820c69bc8941e407aa20f577374ee88364ee10a98fdbe0aece96e29 +pygments==2.18.0 \ + --hash=sha256:786ff802f32e91311bff3889f6e9a86e81505fe99f2735bb6d60ae0c5004f199 \ + --hash=sha256:b8e6aca0523f3ab76fee51799c488e38782ac06eafcf95e7ba832985c8e7b13a # via # readme-renderer # rich @@ -434,20 +453,20 @@ pyjwt==2.8.0 \ --hash=sha256:57e28d156e3d5c10088e0c68abb90bfac3df82b40a71bd0daa20c65ccd5c23de \ --hash=sha256:59127c392cc44c2da5bb3192169a91f429924e17aff6534d70fdc02ab3e04320 # via gcp-releasetool -pyperclip==1.8.2 \ - --hash=sha256:105254a8b04934f0bc84e9c24eb360a591aaf6535c9def5f29d92af107a9bf57 +pyperclip==1.9.0 \ + --hash=sha256:b7de0142ddc81bfc5c7507eea19da920b92252b548b96186caf94a5e2527d310 # via gcp-releasetool -python-dateutil==2.8.2 \ - --hash=sha256:0123cacc1627ae19ddf3c27a5de5bd67ee4586fbdd6440d9748f8abb483d3e86 \ - --hash=sha256:961d03dc3453ebbc59dbdea9e4e11c5651520a876d0f4db161e8674aae935da9 +python-dateutil==2.9.0.post0 \ + --hash=sha256:37dd54208da7e1cd875388217d5e00ebd4179249f90fb72437e91a35459a0ad3 \ + --hash=sha256:a8b2bc7bffae282281c8140a97d3aa9c14da0b136dfe83f850eea9a5f7470427 # via gcp-releasetool -readme-renderer==42.0 \ - --hash=sha256:13d039515c1f24de668e2c93f2e877b9dbe6c6c32328b90a40a49d8b2b85f36d \ - --hash=sha256:2d55489f83be4992fe4454939d1a051c33edbab778e82761d060c9fc6b308cd1 +readme-renderer==44.0 \ + --hash=sha256:2fbca89b81a08526aadf1357a8c2ae889ec05fb03f5da67f9769c9a592166151 \ + --hash=sha256:8712034eabbfa6805cacf1402b4eeb2a73028f72d1166d6f5cb7f9c047c5d1e1 # via twine -requests==2.31.0 \ - --hash=sha256:58cd2187c01e70e6e26505bca751777aa9f2ee0b7f4300988b709f44e013003f \ - --hash=sha256:942c5a758f98d790eaed1a29cb6eefc7ffb0d1cf7af05c3d2791656dbd6ad1e1 +requests==2.32.3 \ + --hash=sha256:55365417734eb18255590a9ff9eb97e9e1da868d4ccd6402399eaf68af20a760 \ + --hash=sha256:70761cfe03c773ceb22aa2f671b4757976145175cdfca038c02654d061d6dcc6 # via # gcp-releasetool # google-api-core @@ -462,9 +481,9 @@ rfc3986==2.0.0 \ --hash=sha256:50b1502b60e289cb37883f3dfd34532b8873c7de9f49bb546641ce9cbd256ebd \ --hash=sha256:97aacf9dbd4bfd829baad6e6309fa6573aaf1be3f6fa735c8ab05e46cecb261c # via twine -rich==13.6.0 \ - --hash=sha256:2b38e2fe9ca72c9a00170a1a2d20c63c790d0e10ef1fe35eba76e1e7b1d7d245 \ - --hash=sha256:5c14d22737e6d5084ef4771b62d5d4363165b403455a30a1c8ca39dc7b644bef +rich==13.7.1 \ + --hash=sha256:4edbae314f59eb482f54e9e30bf00d33350aaa94f4bfcd4e9e3110e64d0d7222 \ + --hash=sha256:9be308cb1fe2f1f57d67ce99e95af38a1e2bc71ad9813b0e247cf7ffbcc3a432 # via twine rsa==4.9 \ --hash=sha256:90260d9058e514786967344d0ef75fa8727eed8a7d2e43ce9f4bcf1b536174f7 \ @@ -480,35 +499,39 @@ six==1.16.0 \ # via # gcp-docuploader # python-dateutil -twine==4.0.2 \ - --hash=sha256:929bc3c280033347a00f847236564d1c52a3e61b1ac2516c97c48f3ceab756d8 \ - --hash=sha256:9e102ef5fdd5a20661eb88fad46338806c3bd32cf1db729603fe3697b1bc83c8 +tomli==2.0.1 \ + --hash=sha256:939de3e7a6161af0c887ef91b7d41a53e7c5a1ca976325f429cb46ea9bc30ecc \ + --hash=sha256:de526c12914f0c550d15924c62d72abc48d6fe7364aa87328337a31007fe8a4f + # via nox +twine==5.1.1 \ + --hash=sha256:215dbe7b4b94c2c50a7315c0275d2258399280fbb7d04182c7e55e24b5f93997 \ + --hash=sha256:9aa0825139c02b3434d913545c7b847a21c835e11597f5255842d457da2322db # via -r requirements.in -typing-extensions==4.8.0 \ - --hash=sha256:8f92fc8806f9a6b641eaa5318da32b44d401efaac0f6678c9bc448ba3605faa0 \ - --hash=sha256:df8e4339e9cb77357558cbdbceca33c303714cf861d1eef15e1070055ae8b7ef +typing-extensions==4.12.2 \ + --hash=sha256:04e5ca0351e0f3f85c6853954072df659d0d13fac324d0072316b67d7794700d \ + --hash=sha256:1a7ead55c7e559dd4dee8856e3a88b41225abfe1ce8df57b7c13915fe121ffb8 # via -r requirements.in -urllib3==2.0.7 \ - --hash=sha256:c97dfde1f7bd43a71c8d2a58e369e9b2bf692d1334ea9f9cae55add7d0dd0f84 \ - --hash=sha256:fdb6d215c776278489906c2f8916e6e7d4f5a9b602ccbcfdf7f016fc8da0596e +urllib3==2.2.2 \ + --hash=sha256:a448b2f64d686155468037e1ace9f2d2199776e17f0a46610480d311f73e3472 \ + --hash=sha256:dd505485549a7a552833da5e6063639d0d177c04f23bc3864e41e5dc5f612168 # via # requests # twine -virtualenv==20.24.6 \ - --hash=sha256:02ece4f56fbf939dbbc33c0715159951d6bf14aaf5457b092e4548e1382455af \ - --hash=sha256:520d056652454c5098a00c0f073611ccbea4c79089331f60bf9d7ba247bb7381 +virtualenv==20.26.3 \ + --hash=sha256:4c43a2a236279d9ea36a0d76f98d84bd6ca94ac4e0f4a3b9d46d05e10fea542a \ + --hash=sha256:8cc4a31139e796e9a7de2cd5cf2489de1217193116a8fd42328f1bd65f434589 # via nox -wheel==0.41.3 \ - --hash=sha256:488609bc63a29322326e05560731bf7bfea8e48ad646e1f5e40d366607de0942 \ - --hash=sha256:4d4987ce51a49370ea65c0bfd2234e8ce80a12780820d9dc462597a6e60d0841 +wheel==0.43.0 \ + --hash=sha256:465ef92c69fa5c5da2d1cf8ac40559a8c940886afcef87dcf14b9470862f1d85 \ + --hash=sha256:55c570405f142630c6b9f72fe09d9b67cf1477fcf543ae5b8dcb1f5b7377da81 # via -r requirements.in -zipp==3.17.0 \ - --hash=sha256:0e923e726174922dce09c53c59ad483ff7bbb8e572e00c7f7c46b88556409f31 \ - --hash=sha256:84e64a1c28cf7e91ed2078bb8cc8c259cb19b76942096c8d7b84947690cabaf0 +zipp==3.19.2 \ + --hash=sha256:bf1dcf6450f873a13e952a29504887c89e6de7506209e5b1bcc3460135d4de19 \ + --hash=sha256:f091755f667055f2d02b32c53771a7a6c8b47e1fdbc4b72a8b9072b3eef8015c # via importlib-metadata # The following packages are considered to be unsafe in a requirements file: -setuptools==69.2.0 \ - --hash=sha256:0ff4183f8f42cd8fa3acea16c45205521a4ef28f73c6391d8a25e92893134f2e \ - --hash=sha256:c21c49fb1042386df081cb5d86759792ab89efca84cf114889191cd09aacc80c +setuptools==70.2.0 \ + --hash=sha256:b8b8060bb426838fbe942479c90296ce976249451118ef566a5a0b7d8b78fb05 \ + --hash=sha256:bd63e505105011b25c3c11f753f7e3b8465ea739efddaccef8f0efac2137bac1 # via -r requirements.in diff --git a/packages/google-cloud-bigquery/.kokoro/test-samples-against-head.sh b/packages/google-cloud-bigquery/.kokoro/test-samples-against-head.sh index 63ac41dfae1d..e9d8bd79a644 100755 --- a/packages/google-cloud-bigquery/.kokoro/test-samples-against-head.sh +++ b/packages/google-cloud-bigquery/.kokoro/test-samples-against-head.sh @@ -1,5 +1,5 @@ #!/bin/bash -# Copyright 2023 Google LLC +# Copyright 2024 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/packages/google-cloud-bigquery/.kokoro/test-samples-impl.sh b/packages/google-cloud-bigquery/.kokoro/test-samples-impl.sh index 5a0f5fab6a89..55910c8ba178 100755 --- a/packages/google-cloud-bigquery/.kokoro/test-samples-impl.sh +++ b/packages/google-cloud-bigquery/.kokoro/test-samples-impl.sh @@ -1,5 +1,5 @@ #!/bin/bash -# Copyright 2023 Google LLC +# Copyright 2024 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/packages/google-cloud-bigquery/.kokoro/test-samples.sh b/packages/google-cloud-bigquery/.kokoro/test-samples.sh index 50b35a48c190..7933d820149a 100755 --- a/packages/google-cloud-bigquery/.kokoro/test-samples.sh +++ b/packages/google-cloud-bigquery/.kokoro/test-samples.sh @@ -1,5 +1,5 @@ #!/bin/bash -# Copyright 2023 Google LLC +# Copyright 2024 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/packages/google-cloud-bigquery/.kokoro/trampoline.sh b/packages/google-cloud-bigquery/.kokoro/trampoline.sh index d85b1f267693..48f79699706e 100755 --- a/packages/google-cloud-bigquery/.kokoro/trampoline.sh +++ b/packages/google-cloud-bigquery/.kokoro/trampoline.sh @@ -1,5 +1,5 @@ #!/bin/bash -# Copyright 2023 Google LLC +# Copyright 2024 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/packages/google-cloud-bigquery/.kokoro/trampoline_v2.sh b/packages/google-cloud-bigquery/.kokoro/trampoline_v2.sh index 59a7cf3a9373..35fa529231dc 100755 --- a/packages/google-cloud-bigquery/.kokoro/trampoline_v2.sh +++ b/packages/google-cloud-bigquery/.kokoro/trampoline_v2.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Copyright 2023 Google LLC +# Copyright 2024 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/packages/google-cloud-bigquery/.pre-commit-config.yaml b/packages/google-cloud-bigquery/.pre-commit-config.yaml index 6a8e16950664..1d74695f70b6 100644 --- a/packages/google-cloud-bigquery/.pre-commit-config.yaml +++ b/packages/google-cloud-bigquery/.pre-commit-config.yaml @@ -1,4 +1,4 @@ -# Copyright 2023 Google LLC +# Copyright 2024 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/packages/google-cloud-bigquery/.trampolinerc b/packages/google-cloud-bigquery/.trampolinerc index a7dfeb42c6d0..0080152373d5 100644 --- a/packages/google-cloud-bigquery/.trampolinerc +++ b/packages/google-cloud-bigquery/.trampolinerc @@ -1,4 +1,4 @@ -# Copyright 2023 Google LLC +# Copyright 2024 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/packages/google-cloud-bigquery/MANIFEST.in b/packages/google-cloud-bigquery/MANIFEST.in index e0a66705318e..d6814cd60037 100644 --- a/packages/google-cloud-bigquery/MANIFEST.in +++ b/packages/google-cloud-bigquery/MANIFEST.in @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright 2023 Google LLC +# Copyright 2024 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/packages/google-cloud-bigquery/docs/conf.py b/packages/google-cloud-bigquery/docs/conf.py index d0468e25af6e..826298090d45 100644 --- a/packages/google-cloud-bigquery/docs/conf.py +++ b/packages/google-cloud-bigquery/docs/conf.py @@ -1,5 +1,5 @@ # -*- coding: utf-8 -*- -# Copyright 2023 Google LLC +# Copyright 2024 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/packages/google-cloud-bigquery/owlbot.py b/packages/google-cloud-bigquery/owlbot.py index 778cc3e53d16..07805d11a7eb 100644 --- a/packages/google-cloud-bigquery/owlbot.py +++ b/packages/google-cloud-bigquery/owlbot.py @@ -101,29 +101,6 @@ # Add .pytype to .gitignore s.replace(".gitignore", r"\.pytest_cache", "\\g<0>\n.pytype") -# Add pytype config to setup.cfg -s.replace( - "setup.cfg", - r"universal = 1", - textwrap.dedent( - """ \\g<0> - - [pytype] - python_version = 3.8 - inputs = - google/cloud/ - exclude = - tests/ - google/cloud/bigquery_v2/ # Legacy proto-based types. - output = .pytype/ - disable = - # There's some issue with finding some pyi files, thus disabling. - # The issue https://github.com/google/pytype/issues/150 is closed, but the - # error still occurs for some reason. - pyi-error""" - ), -) - s.shell.run(["nox", "-s", "blacken"], hide_output=False) for noxfile in REPO_ROOT.glob("samples/**/noxfile.py"): s.shell.run(["nox", "-s", "blacken"], cwd=noxfile.parent, hide_output=False) diff --git a/packages/google-cloud-bigquery/scripts/decrypt-secrets.sh b/packages/google-cloud-bigquery/scripts/decrypt-secrets.sh index 0018b421ddf8..120b0ddc4364 100755 --- a/packages/google-cloud-bigquery/scripts/decrypt-secrets.sh +++ b/packages/google-cloud-bigquery/scripts/decrypt-secrets.sh @@ -1,6 +1,6 @@ #!/bin/bash -# Copyright 2023 Google LLC All rights reserved. +# Copyright 2024 Google LLC All rights reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/packages/google-cloud-bigquery/scripts/readme-gen/readme_gen.py b/packages/google-cloud-bigquery/scripts/readme-gen/readme_gen.py index 1acc119835b5..8f5e248a0da1 100644 --- a/packages/google-cloud-bigquery/scripts/readme-gen/readme_gen.py +++ b/packages/google-cloud-bigquery/scripts/readme-gen/readme_gen.py @@ -1,6 +1,6 @@ #!/usr/bin/env python -# Copyright 2023 Google LLC +# Copyright 2024 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. From 14739a31cb69bfdcc633df779da7861cdd84f44b Mon Sep 17 00:00:00 2001 From: Anthonios Partheniou Date: Wed, 17 Jul 2024 17:35:07 -0400 Subject: [PATCH 1820/2016] fix: Allow protobuf 5.x; require protobuf >=3.20.2; proto-plus >=1.22.3 (#1976) * fix: Allow protobuf 5.x; require protobuf >=3.20.2; proto-plus >=1.22.3 * Update constraints --------- Co-authored-by: Lingqing Gan --- packages/google-cloud-bigquery/setup.py | 4 ++-- packages/google-cloud-bigquery/testing/constraints-3.7.txt | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/packages/google-cloud-bigquery/setup.py b/packages/google-cloud-bigquery/setup.py index 9641fe695b97..617685543ca9 100644 --- a/packages/google-cloud-bigquery/setup.py +++ b/packages/google-cloud-bigquery/setup.py @@ -75,8 +75,8 @@ "opentelemetry-instrumentation >= 0.20b0", ], "bigquery_v2": [ - "proto-plus >= 1.22.0, <2.0.0dev", - "protobuf>=3.19.5,<5.0.0dev,!=3.20.0,!=3.20.1,!=4.21.0,!=4.21.1,!=4.21.2,!=4.21.3,!=4.21.4,!=4.21.5", # For the legacy proto-based types. + "proto-plus >= 1.22.3, <2.0.0dev", + "protobuf>=3.20.2,<6.0.0dev,!=4.21.0,!=4.21.1,!=4.21.2,!=4.21.3,!=4.21.4,!=4.21.5", # For the legacy proto-based types. ], } diff --git a/packages/google-cloud-bigquery/testing/constraints-3.7.txt b/packages/google-cloud-bigquery/testing/constraints-3.7.txt index fda7ce9512e4..55e63449f98b 100644 --- a/packages/google-cloud-bigquery/testing/constraints-3.7.txt +++ b/packages/google-cloud-bigquery/testing/constraints-3.7.txt @@ -26,8 +26,8 @@ opentelemetry-instrumentation==0.20b0 opentelemetry-sdk==1.1.0 packaging==20.0.0 pandas==1.1.0 -proto-plus==1.22.0 -protobuf==3.19.5 +proto-plus==1.22.3 +protobuf==3.20.2 pyarrow==3.0.0 python-dateutil==2.7.3 requests==2.21.0 From ea5d6acde8c84e5fb8c676a7ed947779b372c90b Mon Sep 17 00:00:00 2001 From: shollyman Date: Fri, 19 Jul 2024 13:13:40 -0700 Subject: [PATCH 1821/2016] docs: add short mode query sample & test (#1978) * docs: add short mode query sample & test --- .../samples/client_query_shortmode.py | 53 +++++++++++++++++++ .../tests/test_client_query_shortmode.py | 26 +++++++++ 2 files changed, 79 insertions(+) create mode 100644 packages/google-cloud-bigquery/samples/client_query_shortmode.py create mode 100644 packages/google-cloud-bigquery/samples/tests/test_client_query_shortmode.py diff --git a/packages/google-cloud-bigquery/samples/client_query_shortmode.py b/packages/google-cloud-bigquery/samples/client_query_shortmode.py new file mode 100644 index 000000000000..50446dc48a71 --- /dev/null +++ b/packages/google-cloud-bigquery/samples/client_query_shortmode.py @@ -0,0 +1,53 @@ +# Copyright 2024 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +def client_query_shortmode() -> None: + # [START bigquery_query_shortquery] + # This example demonstrates issuing a query that may be run in short query mode. + # + # To enable the short query mode preview feature, the QUERY_PREVIEW_ENABLED + # environmental variable should be set to `TRUE`. + from google.cloud import bigquery + + # Construct a BigQuery client object. + client = bigquery.Client() + + query = """ + SELECT + name, + gender, + SUM(number) AS total + FROM + bigquery-public-data.usa_names.usa_1910_2013 + GROUP BY + name, gender + ORDER BY + total DESC + LIMIT 10 + """ + # Run the query. The returned `rows` iterator can return information about + # how the query was executed as well as the result data. + rows = client.query_and_wait(query) + + if rows.job_id is not None: + print("Query was run with job state. Job ID: {}".format(rows.job_id)) + else: + print("Query was run in short mode. Query ID: {}".format(rows.query_id)) + + print("The query data:") + for row in rows: + # Row values can be accessed by field name or index. + print("name={}, gender={}, total={}".format(row[0], row[1], row["total"])) + # [END bigquery_query_shortquery] diff --git a/packages/google-cloud-bigquery/samples/tests/test_client_query_shortmode.py b/packages/google-cloud-bigquery/samples/tests/test_client_query_shortmode.py new file mode 100644 index 000000000000..41132f24cbea --- /dev/null +++ b/packages/google-cloud-bigquery/samples/tests/test_client_query_shortmode.py @@ -0,0 +1,26 @@ +# Copyright 2024 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import typing + +from .. import client_query_shortmode + +if typing.TYPE_CHECKING: + import pytest + + +def test_client_query_shortmode(capsys: "pytest.CaptureFixture[str]") -> None: + client_query_shortmode.client_query_shortmode() + out, err = capsys.readouterr() + assert "Query was run" in out From 9af92945a86a120754ea15202d48c67517b28b40 Mon Sep 17 00:00:00 2001 From: Mend Renovate Date: Thu, 25 Jul 2024 20:16:55 +0200 Subject: [PATCH 1822/2016] chore(deps): update all dependencies (#1982) --- .../samples/desktopapp/requirements-test.txt | 2 +- .../samples/desktopapp/requirements.txt | 2 +- .../samples/geography/requirements-test.txt | 2 +- .../samples/geography/requirements.txt | 14 +++++++------- .../samples/magics/requirements-test.txt | 2 +- .../samples/notebooks/requirements-test.txt | 2 +- .../samples/notebooks/requirements.txt | 2 +- .../samples/snippets/requirements-test.txt | 2 +- 8 files changed, 14 insertions(+), 14 deletions(-) diff --git a/packages/google-cloud-bigquery/samples/desktopapp/requirements-test.txt b/packages/google-cloud-bigquery/samples/desktopapp/requirements-test.txt index 8f0bfaad4ca5..68f9039cc6ba 100644 --- a/packages/google-cloud-bigquery/samples/desktopapp/requirements-test.txt +++ b/packages/google-cloud-bigquery/samples/desktopapp/requirements-test.txt @@ -1,4 +1,4 @@ google-cloud-testutils==1.4.0 pytest===7.4.4; python_version == '3.7' -pytest==8.2.2; python_version >= '3.8' +pytest==8.3.2; python_version >= '3.8' mock==5.1.0 diff --git a/packages/google-cloud-bigquery/samples/desktopapp/requirements.txt b/packages/google-cloud-bigquery/samples/desktopapp/requirements.txt index 6a3d17c6f6a2..dafb60b2a2cb 100644 --- a/packages/google-cloud-bigquery/samples/desktopapp/requirements.txt +++ b/packages/google-cloud-bigquery/samples/desktopapp/requirements.txt @@ -1,2 +1,2 @@ google-cloud-bigquery==3.25.0 -google-auth-oauthlib==1.2.0 +google-auth-oauthlib==1.2.1 diff --git a/packages/google-cloud-bigquery/samples/geography/requirements-test.txt b/packages/google-cloud-bigquery/samples/geography/requirements-test.txt index b35a54a76581..335236a1439a 100644 --- a/packages/google-cloud-bigquery/samples/geography/requirements-test.txt +++ b/packages/google-cloud-bigquery/samples/geography/requirements-test.txt @@ -1,3 +1,3 @@ pytest===7.4.4; python_version == '3.7' -pytest==8.2.2; python_version >= '3.8' +pytest==8.3.2; python_version >= '3.8' mock==5.1.0 diff --git a/packages/google-cloud-bigquery/samples/geography/requirements.txt b/packages/google-cloud-bigquery/samples/geography/requirements.txt index add61f286362..ccd9f4f2ea65 100644 --- a/packages/google-cloud-bigquery/samples/geography/requirements.txt +++ b/packages/google-cloud-bigquery/samples/geography/requirements.txt @@ -12,17 +12,17 @@ Fiona==1.9.6 geojson==3.1.0 geopandas===0.10.2; python_version == '3.7' geopandas===0.13.2; python_version == '3.8' -geopandas==0.14.4; python_version >= '3.9' -google-api-core==2.19.0 -google-auth==2.30.0 +geopandas==1.0.1; python_version >= '3.9' +google-api-core==2.19.1 +google-auth==2.32.0 google-cloud-bigquery==3.25.0 google-cloud-bigquery-storage==2.25.0 google-cloud-core==2.4.1 google-crc32c==1.5.0 google-resumable-media==2.7.1 -googleapis-common-protos==1.63.1 +googleapis-common-protos==1.63.2 grpcio===1.62.2; python_version == '3.7' -grpcio==1.64.1; python_version >= '3.8' +grpcio==1.65.1; python_version >= '3.8' idna==3.7 munch==4.0.0 mypy-extensions==1.0.0 @@ -33,7 +33,7 @@ pandas===2.0.3; python_version == '3.8' pandas==2.2.2; python_version >= '3.9' proto-plus==1.24.0 pyarrow==12.0.1; python_version == '3.7' -pyarrow==16.1.0; python_version >= '3.8' +pyarrow==17.0.0; python_version >= '3.8' pyasn1===0.5.1; python_version == '3.7' pyasn1==0.6.0; python_version >= '3.8' pyasn1-modules===0.3.0; python_version == '3.7' @@ -47,7 +47,7 @@ PyYAML==6.0.1 requests==2.31.0; python_version == '3.7' requests==2.32.3; python_version >= '3.8' rsa==4.9 -Shapely==2.0.4 +Shapely==2.0.5 six==1.16.0 typing-extensions===4.7.1; python_version == '3.7' typing-extensions==4.12.2; python_version >= '3.8' diff --git a/packages/google-cloud-bigquery/samples/magics/requirements-test.txt b/packages/google-cloud-bigquery/samples/magics/requirements-test.txt index 8f0bfaad4ca5..68f9039cc6ba 100644 --- a/packages/google-cloud-bigquery/samples/magics/requirements-test.txt +++ b/packages/google-cloud-bigquery/samples/magics/requirements-test.txt @@ -1,4 +1,4 @@ google-cloud-testutils==1.4.0 pytest===7.4.4; python_version == '3.7' -pytest==8.2.2; python_version >= '3.8' +pytest==8.3.2; python_version >= '3.8' mock==5.1.0 diff --git a/packages/google-cloud-bigquery/samples/notebooks/requirements-test.txt b/packages/google-cloud-bigquery/samples/notebooks/requirements-test.txt index 8f0bfaad4ca5..68f9039cc6ba 100644 --- a/packages/google-cloud-bigquery/samples/notebooks/requirements-test.txt +++ b/packages/google-cloud-bigquery/samples/notebooks/requirements-test.txt @@ -1,4 +1,4 @@ google-cloud-testutils==1.4.0 pytest===7.4.4; python_version == '3.7' -pytest==8.2.2; python_version >= '3.8' +pytest==8.3.2; python_version >= '3.8' mock==5.1.0 diff --git a/packages/google-cloud-bigquery/samples/notebooks/requirements.txt b/packages/google-cloud-bigquery/samples/notebooks/requirements.txt index 3896a2aeca35..81fa3782c2b8 100644 --- a/packages/google-cloud-bigquery/samples/notebooks/requirements.txt +++ b/packages/google-cloud-bigquery/samples/notebooks/requirements.txt @@ -7,7 +7,7 @@ ipython===8.0.1; python_version == '3.8' ipython===8.18.1; python_version >= '3.9' matplotlib===3.5.3; python_version == '3.7' matplotlib===3.7.4; python_version == '3.8' -matplotlib==3.9.0; python_version >= '3.9' +matplotlib==3.9.1; python_version >= '3.9' pandas===1.3.5; python_version == '3.7' pandas===2.0.3; python_version == '3.8' pandas==2.2.2; python_version >= '3.9' diff --git a/packages/google-cloud-bigquery/samples/snippets/requirements-test.txt b/packages/google-cloud-bigquery/samples/snippets/requirements-test.txt index b65023b00fac..083b2027154a 100644 --- a/packages/google-cloud-bigquery/samples/snippets/requirements-test.txt +++ b/packages/google-cloud-bigquery/samples/snippets/requirements-test.txt @@ -1,5 +1,5 @@ # samples/snippets should be runnable with no "extras" google-cloud-testutils==1.4.0 pytest===7.4.4; python_version == '3.7' -pytest==8.2.2; python_version >= '3.8' +pytest==8.3.2; python_version >= '3.8' mock==5.1.0 From bf716ceb4f918d47c25bc878a5cbb3ff8d784653 Mon Sep 17 00:00:00 2001 From: "gcf-owl-bot[bot]" <78513119+gcf-owl-bot[bot]@users.noreply.github.com> Date: Fri, 9 Aug 2024 14:47:03 -0500 Subject: [PATCH 1823/2016] chore(python): fix docs build (#1984) Source-Link: https://github.com/googleapis/synthtool/commit/bef813d194de29ddf3576eda60148b6b3dcc93d9 Post-Processor: gcr.io/cloud-devrel-public-resources/owlbot-python:latest@sha256:94bb690db96e6242b2567a4860a94d48fa48696d092e51b0884a1a2c0a79a407 Co-authored-by: Owl Bot --- .../.github/.OwlBot.lock.yaml | 3 ++- .../.kokoro/docker/docs/Dockerfile | 9 ++++----- .../.kokoro/publish-docs.sh | 20 +++++++++---------- 3 files changed, 16 insertions(+), 16 deletions(-) diff --git a/packages/google-cloud-bigquery/.github/.OwlBot.lock.yaml b/packages/google-cloud-bigquery/.github/.OwlBot.lock.yaml index 001b1b1cabec..6d064ddb9b06 100644 --- a/packages/google-cloud-bigquery/.github/.OwlBot.lock.yaml +++ b/packages/google-cloud-bigquery/.github/.OwlBot.lock.yaml @@ -13,4 +13,5 @@ # limitations under the License. docker: image: gcr.io/cloud-devrel-public-resources/owlbot-python:latest - digest: sha256:52210e0e0559f5ea8c52be148b33504022e1faef4e95fbe4b32d68022af2fa7e + digest: sha256:94bb690db96e6242b2567a4860a94d48fa48696d092e51b0884a1a2c0a79a407 +# created: 2024-07-31T14:52:44.926548819Z diff --git a/packages/google-cloud-bigquery/.kokoro/docker/docs/Dockerfile b/packages/google-cloud-bigquery/.kokoro/docker/docs/Dockerfile index 5205308b334d..e5410e296bd8 100644 --- a/packages/google-cloud-bigquery/.kokoro/docker/docs/Dockerfile +++ b/packages/google-cloud-bigquery/.kokoro/docker/docs/Dockerfile @@ -72,19 +72,18 @@ RUN tar -xvf Python-3.10.14.tgz RUN ./Python-3.10.14/configure --enable-optimizations RUN make altinstall -RUN python3.10 -m venv /venv -ENV PATH /venv/bin:$PATH +ENV PATH /usr/local/bin/python3.10:$PATH ###################### Install pip RUN wget -O /tmp/get-pip.py 'https://bootstrap.pypa.io/get-pip.py' \ - && python3 /tmp/get-pip.py \ + && python3.10 /tmp/get-pip.py \ && rm /tmp/get-pip.py # Test pip -RUN python3 -m pip +RUN python3.10 -m pip # Install build requirements COPY requirements.txt /requirements.txt -RUN python3 -m pip install --require-hashes -r requirements.txt +RUN python3.10 -m pip install --require-hashes -r requirements.txt CMD ["python3.10"] diff --git a/packages/google-cloud-bigquery/.kokoro/publish-docs.sh b/packages/google-cloud-bigquery/.kokoro/publish-docs.sh index 38f083f05aa0..233205d580e9 100755 --- a/packages/google-cloud-bigquery/.kokoro/publish-docs.sh +++ b/packages/google-cloud-bigquery/.kokoro/publish-docs.sh @@ -21,18 +21,18 @@ export PYTHONUNBUFFERED=1 export PATH="${HOME}/.local/bin:${PATH}" # Install nox -python3 -m pip install --require-hashes -r .kokoro/requirements.txt -python3 -m nox --version +python3.10 -m pip install --require-hashes -r .kokoro/requirements.txt +python3.10 -m nox --version # build docs nox -s docs # create metadata -python3 -m docuploader create-metadata \ +python3.10 -m docuploader create-metadata \ --name=$(jq --raw-output '.name // empty' .repo-metadata.json) \ - --version=$(python3 setup.py --version) \ + --version=$(python3.10 setup.py --version) \ --language=$(jq --raw-output '.language // empty' .repo-metadata.json) \ - --distribution-name=$(python3 setup.py --name) \ + --distribution-name=$(python3.10 setup.py --name) \ --product-page=$(jq --raw-output '.product_documentation // empty' .repo-metadata.json) \ --github-repository=$(jq --raw-output '.repo // empty' .repo-metadata.json) \ --issue-tracker=$(jq --raw-output '.issue_tracker // empty' .repo-metadata.json) @@ -40,18 +40,18 @@ python3 -m docuploader create-metadata \ cat docs.metadata # upload docs -python3 -m docuploader upload docs/_build/html --metadata-file docs.metadata --staging-bucket "${STAGING_BUCKET}" +python3.10 -m docuploader upload docs/_build/html --metadata-file docs.metadata --staging-bucket "${STAGING_BUCKET}" # docfx yaml files nox -s docfx # create metadata. -python3 -m docuploader create-metadata \ +python3.10 -m docuploader create-metadata \ --name=$(jq --raw-output '.name // empty' .repo-metadata.json) \ - --version=$(python3 setup.py --version) \ + --version=$(python3.10 setup.py --version) \ --language=$(jq --raw-output '.language // empty' .repo-metadata.json) \ - --distribution-name=$(python3 setup.py --name) \ + --distribution-name=$(python3.10 setup.py --name) \ --product-page=$(jq --raw-output '.product_documentation // empty' .repo-metadata.json) \ --github-repository=$(jq --raw-output '.repo // empty' .repo-metadata.json) \ --issue-tracker=$(jq --raw-output '.issue_tracker // empty' .repo-metadata.json) @@ -59,4 +59,4 @@ python3 -m docuploader create-metadata \ cat docs.metadata # upload docs -python3 -m docuploader upload docs/_build/html/docfx_yaml --metadata-file docs.metadata --destination-prefix docfx --staging-bucket "${V2_STAGING_BUCKET}" +python3.10 -m docuploader upload docs/_build/html/docfx_yaml --metadata-file docs.metadata --destination-prefix docfx --staging-bucket "${V2_STAGING_BUCKET}" From c5a3edb76d618fba7ee2e2c49102a99e447f37a4 Mon Sep 17 00:00:00 2001 From: Mend Renovate Date: Tue, 13 Aug 2024 21:47:33 +0200 Subject: [PATCH 1824/2016] chore(deps): update all dependencies (#1983) Co-authored-by: Lingqing Gan --- .../google-cloud-bigquery/samples/geography/requirements.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/packages/google-cloud-bigquery/samples/geography/requirements.txt b/packages/google-cloud-bigquery/samples/geography/requirements.txt index ccd9f4f2ea65..1a1cf4b04cdd 100644 --- a/packages/google-cloud-bigquery/samples/geography/requirements.txt +++ b/packages/google-cloud-bigquery/samples/geography/requirements.txt @@ -1,4 +1,4 @@ -attrs==23.2.0 +attrs==24.1.0 certifi==2024.7.4 cffi===1.15.1; python_version == '3.7' cffi==1.16.0; python_version >= '3.8' @@ -22,7 +22,7 @@ google-crc32c==1.5.0 google-resumable-media==2.7.1 googleapis-common-protos==1.63.2 grpcio===1.62.2; python_version == '3.7' -grpcio==1.65.1; python_version >= '3.8' +grpcio==1.65.4; python_version >= '3.8' idna==3.7 munch==4.0.0 mypy-extensions==1.0.0 From 765086a6e587e635b6a61b5d35bb98a0afdbf586 Mon Sep 17 00:00:00 2001 From: Suzy Mueller Date: Tue, 13 Aug 2024 15:24:24 -0700 Subject: [PATCH 1825/2016] fix: add warning when encountering unknown field types (#1989) * fix: add warning when encountering unknown field types The types returned for currently unsupported field types may change in the future, when support is added. Warn users that the types they are using are not yet supported. * fix: add warning for unknown subfield types as well * fix: remove unused warnings * fix: remove leftover debugging code * move test case closer to related test * add comments * fix formatting * fix test_table and use warnings.warn instead of pytest.warn * add explicit warning about behavior subject to change in the future add warning for write and warn about future behavior changes * add default converter for _SCALAR_VALUE_TO_JSON_PARAM * factor out shared warning * fix test case and make coverage happy * add unit test to StructQueryParameter class --------- Co-authored-by: Lingqing Gan --- .../google/cloud/bigquery/_helpers.py | 38 +++++++++--- .../google/cloud/bigquery/_pandas_helpers.py | 4 +- .../google/cloud/bigquery/query.py | 20 +++--- .../tests/unit/test__helpers.py | 62 ++++++++++++++++++- .../tests/unit/test_query.py | 19 ++++++ .../tests/unit/test_table.py | 6 +- 6 files changed, 123 insertions(+), 26 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py b/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py index 5ee5e1850cf6..1eda80712969 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py @@ -21,6 +21,7 @@ import math import re import os +import warnings from typing import Optional, Union from dateutil import relativedelta @@ -297,12 +298,7 @@ def _record_from_json(value, field): record = {} record_iter = zip(field.fields, value["f"]) for subfield, cell in record_iter: - converter = _CELLDATA_FROM_JSON[subfield.field_type] - if subfield.mode == "REPEATED": - value = [converter(item["v"], subfield) for item in cell["v"]] - else: - value = converter(cell["v"], subfield) - record[subfield.name] = value + record[subfield.name] = _field_from_json(cell["v"], subfield) return record @@ -382,7 +378,11 @@ def _field_to_index_mapping(schema): def _field_from_json(resource, field): - converter = _CELLDATA_FROM_JSON.get(field.field_type, lambda value, _: value) + def default_converter(value, field): + _warn_unknown_field_type(field) + return value + + converter = _CELLDATA_FROM_JSON.get(field.field_type, default_converter) if field.mode == "REPEATED": return [converter(item["v"], field) for item in resource] else: @@ -484,6 +484,11 @@ def _json_to_json(value): return json.dumps(value) +def _string_to_json(value): + """NOOP string -> string coercion""" + return value + + def _timestamp_to_json_parameter(value): """Coerce 'value' to an JSON-compatible representation. @@ -596,6 +601,7 @@ def _range_field_to_json(range_element_type, value): "DATE": _date_to_json, "TIME": _time_to_json, "JSON": _json_to_json, + "STRING": _string_to_json, # Make sure DECIMAL and BIGDECIMAL are handled, even though # requests for them should be converted to NUMERIC. Better safe # than sorry. @@ -609,6 +615,15 @@ def _range_field_to_json(range_element_type, value): _SCALAR_VALUE_TO_JSON_PARAM["TIMESTAMP"] = _timestamp_to_json_parameter +def _warn_unknown_field_type(field): + warnings.warn( + "Unknown type '{}' for field '{}'. Behavior reading and writing this type is not officially supported and may change in the future.".format( + field.field_type, field.name + ), + FutureWarning, + ) + + def _scalar_field_to_json(field, row_value): """Maps a field and value to a JSON-safe value. @@ -621,9 +636,12 @@ def _scalar_field_to_json(field, row_value): Returns: Any: A JSON-serializable object. """ - converter = _SCALAR_VALUE_TO_JSON_ROW.get(field.field_type) - if converter is None: # STRING doesn't need converting - return row_value + + def default_converter(value): + _warn_unknown_field_type(field) + return value + + converter = _SCALAR_VALUE_TO_JSON_ROW.get(field.field_type, default_converter) return converter(row_value) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/_pandas_helpers.py b/packages/google-cloud-bigquery/google/cloud/bigquery/_pandas_helpers.py index 8395478fb15b..c21a02569ba9 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/_pandas_helpers.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/_pandas_helpers.py @@ -204,7 +204,9 @@ def bq_to_arrow_field(bq_field, array_type=None): metadata=metadata, ) - warnings.warn("Unable to determine type for field '{}'.".format(bq_field.name)) + warnings.warn( + "Unable to determine Arrow type for field '{}'.".format(bq_field.name) + ) return None diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/query.py b/packages/google-cloud-bigquery/google/cloud/bigquery/query.py index 9c59056fd6f7..f1090a7dcc8b 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/query.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/query.py @@ -591,9 +591,8 @@ def to_api_repr(self) -> dict: Dict: JSON mapping """ value = self.value - converter = _SCALAR_VALUE_TO_JSON_PARAM.get(self.type_) - if converter is not None: - value = converter(value) # type: ignore + converter = _SCALAR_VALUE_TO_JSON_PARAM.get(self.type_, lambda value: value) + value = converter(value) # type: ignore resource: Dict[str, Any] = { "parameterType": {"type": self.type_}, "parameterValue": {"value": value}, @@ -748,9 +747,10 @@ def to_api_repr(self) -> dict: else: a_type = self.array_type.to_api_repr() - converter = _SCALAR_VALUE_TO_JSON_PARAM.get(a_type["type"]) - if converter is not None: - values = [converter(value) for value in values] # type: ignore + converter = _SCALAR_VALUE_TO_JSON_PARAM.get( + a_type["type"], lambda value: value + ) + values = [converter(value) for value in values] # type: ignore a_values = [{"value": value} for value in values] resource = { @@ -792,7 +792,7 @@ def __repr__(self): class StructQueryParameter(_AbstractQueryParameter): - """Named / positional query parameters for struct values. + """Name / positional query parameters for struct values. Args: name (Optional[str]): @@ -897,10 +897,8 @@ def to_api_repr(self) -> dict: values[name] = repr_["parameterValue"] else: s_types[name] = {"name": name, "type": {"type": type_}} - converter = _SCALAR_VALUE_TO_JSON_PARAM.get(type_) - if converter is not None: - value = converter(value) # type: ignore - values[name] = {"value": value} + converter = _SCALAR_VALUE_TO_JSON_PARAM.get(type_, lambda value: value) + values[name] = {"value": converter(value)} resource = { "parameterType": { diff --git a/packages/google-cloud-bigquery/tests/unit/test__helpers.py b/packages/google-cloud-bigquery/tests/unit/test__helpers.py index 1bf21479fc24..0a307498f674 100644 --- a/packages/google-cloud-bigquery/tests/unit/test__helpers.py +++ b/packages/google-cloud-bigquery/tests/unit/test__helpers.py @@ -17,6 +17,7 @@ import decimal import json import os +import warnings import pytest import packaging import unittest @@ -640,6 +641,17 @@ def test_w_single_scalar_column(self): row = {"f": [{"v": "1"}]} self.assertEqual(self._call_fut(row, schema=[col]), (1,)) + def test_w_unknown_type(self): + # SELECT 1 AS col + col = _Field("REQUIRED", "col", "UNKNOWN") + row = {"f": [{"v": "1"}]} + with warnings.catch_warnings(record=True) as warned: + self.assertEqual(self._call_fut(row, schema=[col]), ("1",)) + self.assertEqual(len(warned), 1) + warning = warned[0] + self.assertTrue("UNKNOWN" in str(warning)) + self.assertTrue("col" in str(warning)) + def test_w_single_scalar_geography_column(self): # SELECT 1 AS col col = _Field("REQUIRED", "geo", "GEOGRAPHY") @@ -660,6 +672,17 @@ def test_w_single_array_column(self): row = {"f": [{"v": [{"v": "1"}, {"v": "2"}, {"v": "3"}]}]} self.assertEqual(self._call_fut(row, schema=[col]), ([1, 2, 3],)) + def test_w_unknown_type_repeated(self): + # SELECT 1 AS col + col = _Field("REPEATED", "col", "UNKNOWN") + row = {"f": [{"v": [{"v": "1"}, {"v": "2"}, {"v": "3"}]}]} + with warnings.catch_warnings(record=True) as warned: + self.assertEqual(self._call_fut(row, schema=[col]), (["1", "2", "3"],)) + self.assertEqual(len(warned), 1) + warning = warned[0] + self.assertTrue("UNKNOWN" in str(warning)) + self.assertTrue("col" in str(warning)) + def test_w_struct_w_nested_array_column(self): # SELECT ([1, 2], 3, [4, 5]) as col first = _Field("REPEATED", "first", "INTEGER") @@ -684,6 +707,39 @@ def test_w_struct_w_nested_array_column(self): ({"first": [1, 2], "second": 3, "third": [4, 5]},), ) + def test_w_unknown_type_subfield(self): + # SELECT [(1, 2, 3), (4, 5, 6)] as col + first = _Field("REPEATED", "first", "UNKNOWN1") + second = _Field("REQUIRED", "second", "UNKNOWN2") + third = _Field("REPEATED", "third", "INTEGER") + col = _Field("REQUIRED", "col", "RECORD", fields=[first, second, third]) + row = { + "f": [ + { + "v": { + "f": [ + {"v": [{"v": "1"}, {"v": "2"}]}, + {"v": "3"}, + {"v": [{"v": "4"}, {"v": "5"}]}, + ] + } + } + ] + } + with warnings.catch_warnings(record=True) as warned: + self.assertEqual( + self._call_fut(row, schema=[col]), + ({"first": ["1", "2"], "second": "3", "third": [4, 5]},), + ) + self.assertEqual(len(warned), 2) # 1 warning per unknown field. + warned = [str(warning) for warning in warned] + self.assertTrue( + any(["first" in warning and "UNKNOWN1" in warning for warning in warned]) + ) + self.assertTrue( + any(["second" in warning and "UNKNOWN2" in warning for warning in warned]) + ) + def test_w_array_of_struct(self): # SELECT [(1, 2, 3), (4, 5, 6)] as col first = _Field("REQUIRED", "first", "INTEGER") @@ -1076,8 +1132,12 @@ def _call_fut(self, field, value): def test_w_unknown_field_type(self): field = _make_field("UNKNOWN") original = object() - converted = self._call_fut(field, original) + with warnings.catch_warnings(record=True) as warned: + converted = self._call_fut(field, original) self.assertIs(converted, original) + self.assertEqual(len(warned), 1) + warning = warned[0] + self.assertTrue("UNKNOWN" in str(warning)) def test_w_known_field_type(self): field = _make_field("INT64") diff --git a/packages/google-cloud-bigquery/tests/unit/test_query.py b/packages/google-cloud-bigquery/tests/unit/test_query.py index 7c36eb75bebb..40ef080f7543 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_query.py +++ b/packages/google-cloud-bigquery/tests/unit/test_query.py @@ -1780,6 +1780,25 @@ def test_to_api_repr_w_nested_struct(self): param = self._make_one("foo", scalar_1, sub) self.assertEqual(param.to_api_repr(), EXPECTED) + def test_to_api_repr_w_unknown_type(self): + EXPECTED = { + "name": "foo", + "parameterType": { + "type": "STRUCT", + "structTypes": [ + {"name": "bar", "type": {"type": "INT64"}}, + {"name": "baz", "type": {"type": "UNKNOWN_TYPE"}}, + ], + }, + "parameterValue": { + "structValues": {"bar": {"value": "123"}, "baz": {"value": "abc"}} + }, + } + sub_1 = _make_subparam("bar", "INT64", 123) + sub_2 = _make_subparam("baz", "UNKNOWN_TYPE", "abc") + param = self._make_one("foo", sub_1, sub_2) + self.assertEqual(param.to_api_repr(), EXPECTED) + def test___eq___wrong_type(self): field = self._make_one("test", _make_subparam("bar", "STRING", "abc")) other = object() diff --git a/packages/google-cloud-bigquery/tests/unit/test_table.py b/packages/google-cloud-bigquery/tests/unit/test_table.py index 7a97c7b78da4..d6febcfb1753 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_table.py +++ b/packages/google-cloud-bigquery/tests/unit/test_table.py @@ -2751,9 +2751,9 @@ def test_to_arrow_w_unknown_type(self): self.assertEqual(ages, [33, 29]) self.assertEqual(sports, ["volleyball", "basketball"]) - self.assertEqual(len(warned), 1) - warning = warned[0] - self.assertTrue("sport" in str(warning)) + # Expect warning from both the arrow conversion, and the json deserialization. + self.assertEqual(len(warned), 2) + self.assertTrue(all("sport" in str(warning) for warning in warned)) def test_to_arrow_w_empty_table(self): pyarrow = pytest.importorskip( From d0b96330c7d8378fc62d590718e5d8bc18a93aed Mon Sep 17 00:00:00 2001 From: Chalmer Lowe Date: Wed, 14 Aug 2024 12:30:51 -0400 Subject: [PATCH 1826/2016] Bug: revises Exception type (#1994) * revises Exception type * updates error choices --- .../google-cloud-bigquery/google/cloud/bigquery/job/base.py | 2 +- packages/google-cloud-bigquery/tests/unit/test_job_retry.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/job/base.py b/packages/google-cloud-bigquery/google/cloud/bigquery/job/base.py index 6f9726181bd8..f165fd036aef 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/job/base.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/job/base.py @@ -49,7 +49,7 @@ "notImplemented": http.client.NOT_IMPLEMENTED, "policyViolation": http.client.FORBIDDEN, "quotaExceeded": http.client.FORBIDDEN, - "rateLimitExceeded": http.client.FORBIDDEN, + "rateLimitExceeded": http.client.TOO_MANY_REQUESTS, "resourceInUse": http.client.BAD_REQUEST, "resourcesExceeded": http.client.BAD_REQUEST, "responseTooLarge": http.client.FORBIDDEN, diff --git a/packages/google-cloud-bigquery/tests/unit/test_job_retry.py b/packages/google-cloud-bigquery/tests/unit/test_job_retry.py index 298ab9a56594..958986052adb 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_job_retry.py +++ b/packages/google-cloud-bigquery/tests/unit/test_job_retry.py @@ -442,7 +442,7 @@ def api_request(method, path, query_params=None, data=None, **kw): orig_job_id = job.job_id job_retry = dict(job_retry=None) if job_retry_on_query == "Result" else {} - with pytest.raises(google.api_core.exceptions.Forbidden): + with pytest.raises(google.api_core.exceptions.TooManyRequests): job.result(**job_retry) assert job.job_id == orig_job_id From 0aab4c18067c8876bb1d0766ccb84d08d6844633 Mon Sep 17 00:00:00 2001 From: Chalmer Lowe Date: Wed, 14 Aug 2024 14:15:35 -0400 Subject: [PATCH 1827/2016] fix: add docfx to the presubmit configuration and delete docs-presubmit (#1995) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * adjusts location of checks related to docfx/docs * 🦉 Updates from OwlBot post-processor See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md --------- Co-authored-by: Owl Bot --- packages/google-cloud-bigquery/.kokoro/presubmit/presubmit.cfg | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/.kokoro/presubmit/presubmit.cfg b/packages/google-cloud-bigquery/.kokoro/presubmit/presubmit.cfg index ce39531209f7..ac4cc5847eeb 100644 --- a/packages/google-cloud-bigquery/.kokoro/presubmit/presubmit.cfg +++ b/packages/google-cloud-bigquery/.kokoro/presubmit/presubmit.cfg @@ -3,5 +3,5 @@ # Disable system tests. env_vars: { key: "NOX_SESSION" - value: "unit_noextras unit cover docs" + value: "unit_noextras unit cover docs docfx" } From 6078e3653ee689f072d33a441f08b39cbb386784 Mon Sep 17 00:00:00 2001 From: Mend Renovate Date: Fri, 16 Aug 2024 21:24:33 +0200 Subject: [PATCH 1828/2016] chore(deps): update all dependencies (#1993) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * chore(deps): update all dependencies * 🦉 Updates from OwlBot post-processor See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md * pin PyYAML===6.0.1 for python 3.7 * fix the version pinning --------- Co-authored-by: Owl Bot Co-authored-by: Lingqing Gan --- .../samples/geography/requirements.txt | 13 +++++++------ .../samples/magics/requirements.txt | 2 +- .../samples/notebooks/requirements.txt | 4 ++-- 3 files changed, 10 insertions(+), 9 deletions(-) diff --git a/packages/google-cloud-bigquery/samples/geography/requirements.txt b/packages/google-cloud-bigquery/samples/geography/requirements.txt index 1a1cf4b04cdd..f388c82487f9 100644 --- a/packages/google-cloud-bigquery/samples/geography/requirements.txt +++ b/packages/google-cloud-bigquery/samples/geography/requirements.txt @@ -1,25 +1,25 @@ -attrs==24.1.0 +attrs==24.2.0 certifi==2024.7.4 cffi===1.15.1; python_version == '3.7' -cffi==1.16.0; python_version >= '3.8' +cffi==1.17.0; python_version >= '3.8' charset-normalizer==3.3.2 click==8.1.7 click-plugins==1.1.1 cligj==0.7.2 dataclasses==0.8; python_version < '3.7' -db-dtypes==1.2.0 +db-dtypes==1.3.0 Fiona==1.9.6 geojson==3.1.0 geopandas===0.10.2; python_version == '3.7' geopandas===0.13.2; python_version == '3.8' geopandas==1.0.1; python_version >= '3.9' google-api-core==2.19.1 -google-auth==2.32.0 +google-auth==2.33.0 google-cloud-bigquery==3.25.0 google-cloud-bigquery-storage==2.25.0 google-cloud-core==2.4.1 google-crc32c==1.5.0 -google-resumable-media==2.7.1 +google-resumable-media==2.7.2 googleapis-common-protos==1.63.2 grpcio===1.62.2; python_version == '3.7' grpcio==1.65.4; python_version >= '3.8' @@ -43,7 +43,8 @@ pycparser==2.22; python_version >= '3.8' pyparsing==3.1.2 python-dateutil==2.9.0.post0 pytz==2024.1 -PyYAML==6.0.1 +PyYAML===6.0.1; python_version == '3.7' +PyYAML==6.0.2; python_version >= '3.8' requests==2.31.0; python_version == '3.7' requests==2.32.3; python_version >= '3.8' rsa==4.9 diff --git a/packages/google-cloud-bigquery/samples/magics/requirements.txt b/packages/google-cloud-bigquery/samples/magics/requirements.txt index a1044c2310e8..956b168ddcc3 100644 --- a/packages/google-cloud-bigquery/samples/magics/requirements.txt +++ b/packages/google-cloud-bigquery/samples/magics/requirements.txt @@ -1,5 +1,5 @@ bigquery_magics==0.1.0 -db-dtypes==1.2.0 +db-dtypes==1.3.0 google.cloud.bigquery==3.25.0 google-cloud-bigquery-storage==2.25.0 ipython===7.31.1; python_version == '3.7' diff --git a/packages/google-cloud-bigquery/samples/notebooks/requirements.txt b/packages/google-cloud-bigquery/samples/notebooks/requirements.txt index 81fa3782c2b8..42b1243ebc5c 100644 --- a/packages/google-cloud-bigquery/samples/notebooks/requirements.txt +++ b/packages/google-cloud-bigquery/samples/notebooks/requirements.txt @@ -1,5 +1,5 @@ bigquery-magics==0.1.0 -db-dtypes==1.2.0 +db-dtypes==1.3.0 google-cloud-bigquery==3.25.0 google-cloud-bigquery-storage==2.25.0 ipython===7.31.1; python_version == '3.7' @@ -7,7 +7,7 @@ ipython===8.0.1; python_version == '3.8' ipython===8.18.1; python_version >= '3.9' matplotlib===3.5.3; python_version == '3.7' matplotlib===3.7.4; python_version == '3.8' -matplotlib==3.9.1; python_version >= '3.9' +matplotlib==3.9.2; python_version >= '3.9' pandas===1.3.5; python_version == '3.7' pandas===2.0.3; python_version == '3.8' pandas==2.2.2; python_version >= '3.9' From e1ec1b5a56ec80254db76ea5040bf4e28fd78e02 Mon Sep 17 00:00:00 2001 From: Misha Behersky Date: Mon, 19 Aug 2024 21:15:10 +0300 Subject: [PATCH 1829/2016] fix: do not set job timeout extra property if None (#1987) Co-authored-by: Lingqing Gan --- .../google/cloud/bigquery/job/base.py | 7 +++++-- .../tests/unit/job/test_base.py | 18 ++++++++++++++++++ 2 files changed, 23 insertions(+), 2 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/job/base.py b/packages/google-cloud-bigquery/google/cloud/bigquery/job/base.py index f165fd036aef..e5f68c8437d9 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/job/base.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/job/base.py @@ -218,8 +218,11 @@ def job_timeout_ms(self, value): err.__traceback__ ) - """ Docs indicate a string is expected by the API """ - self._properties["jobTimeoutMs"] = str(value) + if value is not None: + # docs indicate a string is expected by the API + self._properties["jobTimeoutMs"] = str(value) + else: + self._properties.pop("jobTimeoutMs", None) @property def labels(self): diff --git a/packages/google-cloud-bigquery/tests/unit/job/test_base.py b/packages/google-cloud-bigquery/tests/unit/job/test_base.py index a7337afd2316..2d2f0c13c1ec 100644 --- a/packages/google-cloud-bigquery/tests/unit/job/test_base.py +++ b/packages/google-cloud-bigquery/tests/unit/job/test_base.py @@ -1320,3 +1320,21 @@ def test_job_timeout_ms(self): # Confirm that integers get converted to strings. job_config.job_timeout_ms = 5000 assert job_config.job_timeout_ms == "5000" # int is converted to string + + def test_job_timeout_is_none_when_set_none(self): + job_config = self._make_one() + job_config.job_timeout_ms = None + # Confirm value is None and not literal string 'None' + assert job_config.job_timeout_ms is None + + def test_job_timeout_properties(self): + # Make sure any value stored in properties is erased + # when setting job_timeout to None. + job_config = self._make_one() + job_config.job_timeout_ms = 4200 + assert job_config.job_timeout_ms == "4200" + assert job_config._properties.get("jobTimeoutMs") == "4200" + + job_config.job_timeout_ms = None + assert job_config.job_timeout_ms is None + assert "jobTimeoutMs" not in job_config._properties From 8ba4bbb33982bc5377daacb1a5800fdb46478b67 Mon Sep 17 00:00:00 2001 From: Yilin Xu Date: Tue, 20 Aug 2024 06:21:01 -0700 Subject: [PATCH 1830/2016] fix: set pyarrow field nullable to False for a BigQuery field in REPEATED mode (#1999) Co-authored-by: Lingqing Gan --- .../google/cloud/bigquery/_pandas_helpers.py | 2 +- .../tests/unit/test__pandas_helpers.py | 17 +++++++++++++++++ 2 files changed, 18 insertions(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/_pandas_helpers.py b/packages/google-cloud-bigquery/google/cloud/bigquery/_pandas_helpers.py index c21a02569ba9..210ab48758f0 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/_pandas_helpers.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/_pandas_helpers.py @@ -200,7 +200,7 @@ def bq_to_arrow_field(bq_field, array_type=None): # local NULL values. Arrow will gladly interpret these NULL values # as non-NULL and give you an arbitrary value. See: # https://github.com/googleapis/python-bigquery/issues/1692 - nullable=True, + nullable=False if bq_field.mode.upper() == "REPEATED" else True, metadata=metadata, ) diff --git a/packages/google-cloud-bigquery/tests/unit/test__pandas_helpers.py b/packages/google-cloud-bigquery/tests/unit/test__pandas_helpers.py index 58d2b73b3f7e..203cc1d1cec1 100644 --- a/packages/google-cloud-bigquery/tests/unit/test__pandas_helpers.py +++ b/packages/google-cloud-bigquery/tests/unit/test__pandas_helpers.py @@ -2002,6 +2002,23 @@ def test_bq_to_arrow_field_type_override(module_under_test): ) +@pytest.mark.skipif(isinstance(pyarrow, mock.Mock), reason="Requires `pyarrow`") +def test_bq_to_arrow_field_set_repeated_nullable_false(module_under_test): + assert ( + module_under_test.bq_to_arrow_field( + schema.SchemaField("name", "STRING", mode="REPEATED") + ).nullable + is False + ) + + assert ( + module_under_test.bq_to_arrow_field( + schema.SchemaField("name", "STRING", mode="NULLABLE") + ).nullable + is True + ) + + @pytest.mark.parametrize( "field_type, metadata", [ From dbc07fb677ef07e56a80a5c42a264af1ee44bbf6 Mon Sep 17 00:00:00 2001 From: Mend Renovate Date: Tue, 27 Aug 2024 20:17:04 +0200 Subject: [PATCH 1831/2016] chore(deps): update all dependencies (#2002) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * chore(deps): update all dependencies * 🦉 Updates from OwlBot post-processor See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md --------- Co-authored-by: Owl Bot --- .../samples/geography/requirements.txt | 8 ++++---- .../google-cloud-bigquery/samples/magics/requirements.txt | 2 +- .../samples/notebooks/requirements.txt | 2 +- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/packages/google-cloud-bigquery/samples/geography/requirements.txt b/packages/google-cloud-bigquery/samples/geography/requirements.txt index f388c82487f9..892c1524e6b1 100644 --- a/packages/google-cloud-bigquery/samples/geography/requirements.txt +++ b/packages/google-cloud-bigquery/samples/geography/requirements.txt @@ -14,7 +14,7 @@ geopandas===0.10.2; python_version == '3.7' geopandas===0.13.2; python_version == '3.8' geopandas==1.0.1; python_version >= '3.9' google-api-core==2.19.1 -google-auth==2.33.0 +google-auth==2.34.0 google-cloud-bigquery==3.25.0 google-cloud-bigquery-storage==2.25.0 google-cloud-core==2.4.1 @@ -22,8 +22,8 @@ google-crc32c==1.5.0 google-resumable-media==2.7.2 googleapis-common-protos==1.63.2 grpcio===1.62.2; python_version == '3.7' -grpcio==1.65.4; python_version >= '3.8' -idna==3.7 +grpcio==1.66.0; python_version >= '3.8' +idna==3.8 munch==4.0.0 mypy-extensions==1.0.0 packaging===24.0; python_version == '3.7' @@ -48,7 +48,7 @@ PyYAML==6.0.2; python_version >= '3.8' requests==2.31.0; python_version == '3.7' requests==2.32.3; python_version >= '3.8' rsa==4.9 -Shapely==2.0.5 +Shapely==2.0.6 six==1.16.0 typing-extensions===4.7.1; python_version == '3.7' typing-extensions==4.12.2; python_version >= '3.8' diff --git a/packages/google-cloud-bigquery/samples/magics/requirements.txt b/packages/google-cloud-bigquery/samples/magics/requirements.txt index 956b168ddcc3..b08ecad7e058 100644 --- a/packages/google-cloud-bigquery/samples/magics/requirements.txt +++ b/packages/google-cloud-bigquery/samples/magics/requirements.txt @@ -1,4 +1,4 @@ -bigquery_magics==0.1.0 +bigquery_magics==0.1.1 db-dtypes==1.3.0 google.cloud.bigquery==3.25.0 google-cloud-bigquery-storage==2.25.0 diff --git a/packages/google-cloud-bigquery/samples/notebooks/requirements.txt b/packages/google-cloud-bigquery/samples/notebooks/requirements.txt index 42b1243ebc5c..0467676fcac5 100644 --- a/packages/google-cloud-bigquery/samples/notebooks/requirements.txt +++ b/packages/google-cloud-bigquery/samples/notebooks/requirements.txt @@ -1,4 +1,4 @@ -bigquery-magics==0.1.0 +bigquery-magics==0.1.1 db-dtypes==1.3.0 google-cloud-bigquery==3.25.0 google-cloud-bigquery-storage==2.25.0 From ca0c4e43d29b9f706853bde488580ae748624853 Mon Sep 17 00:00:00 2001 From: Mend Renovate Date: Wed, 28 Aug 2024 01:08:51 +0200 Subject: [PATCH 1832/2016] chore(deps): update all dependencies (#2004) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * chore(deps): update all dependencies * 🦉 Updates from OwlBot post-processor See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md --------- Co-authored-by: Owl Bot --- .../google-cloud-bigquery/samples/geography/requirements.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/packages/google-cloud-bigquery/samples/geography/requirements.txt b/packages/google-cloud-bigquery/samples/geography/requirements.txt index 892c1524e6b1..25dd4b319fa0 100644 --- a/packages/google-cloud-bigquery/samples/geography/requirements.txt +++ b/packages/google-cloud-bigquery/samples/geography/requirements.txt @@ -20,7 +20,7 @@ google-cloud-bigquery-storage==2.25.0 google-cloud-core==2.4.1 google-crc32c==1.5.0 google-resumable-media==2.7.2 -googleapis-common-protos==1.63.2 +googleapis-common-protos==1.65.0 grpcio===1.62.2; python_version == '3.7' grpcio==1.66.0; python_version >= '3.8' idna==3.8 @@ -40,7 +40,7 @@ pyasn1-modules===0.3.0; python_version == '3.7' pyasn1-modules==0.4.0; python_version >= '3.8' pycparser===2.21; python_version == '3.7' pycparser==2.22; python_version >= '3.8' -pyparsing==3.1.2 +pyparsing==3.1.4 python-dateutil==2.9.0.post0 pytz==2024.1 PyYAML===6.0.1; python_version == '3.7' From 3f3aed40a81cdca69b2052e15dd45490cff0f3c1 Mon Sep 17 00:00:00 2001 From: shollyman Date: Wed, 4 Sep 2024 12:09:39 -0700 Subject: [PATCH 1833/2016] testing: remove testing identity override (#2011) * testing: remove testing identity override This PR removes a stale reference to a membership group in samples tests. --- packages/google-cloud-bigquery/samples/snippets/view.py | 2 +- packages/google-cloud-bigquery/samples/snippets/view_test.py | 3 +-- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/packages/google-cloud-bigquery/samples/snippets/view.py b/packages/google-cloud-bigquery/samples/snippets/view.py index 94f406890900..30e719c794b6 100644 --- a/packages/google-cloud-bigquery/samples/snippets/view.py +++ b/packages/google-cloud-bigquery/samples/snippets/view.py @@ -147,7 +147,7 @@ def grant_access( # Make an API request to get the view dataset ACLs. view_dataset = client.get_dataset(view_dataset_id) - analyst_group_email = "data_analysts@example.com" + analyst_group_email = "example-analyst-group@google.com" # [END bigquery_grant_view_access] # To facilitate testing, we replace values with alternatives # provided by the testing harness. diff --git a/packages/google-cloud-bigquery/samples/snippets/view_test.py b/packages/google-cloud-bigquery/samples/snippets/view_test.py index dfa1cdeee69e..d46595695cd6 100644 --- a/packages/google-cloud-bigquery/samples/snippets/view_test.py +++ b/packages/google-cloud-bigquery/samples/snippets/view_test.py @@ -114,7 +114,6 @@ def test_view( project_id, dataset_id, table_id = view_id.split(".") overrides: view.OverridesDict = { - "analyst_group_email": "cloud-dpes-bigquery@google.com", "view_dataset_id": view_dataset_id, "source_dataset_id": source_dataset_id, "view_reference": { @@ -127,5 +126,5 @@ def test_view( assert len(view_dataset.access_entries) != 0 assert len(source_dataset.access_entries) != 0 out, _ = capsys.readouterr() - assert "cloud-dpes-bigquery@google.com" in out + assert "example-analyst-group@google.com" in out assert table_id in out From 91993c2a4fa4054ec6cf3184b79826ccee4d6e37 Mon Sep 17 00:00:00 2001 From: shollyman Date: Thu, 5 Sep 2024 11:28:49 -0700 Subject: [PATCH 1834/2016] feat: include LegacyPandasError in init imports (#2014) --- packages/google-cloud-bigquery/google/cloud/bigquery/__init__.py | 1 + 1 file changed, 1 insertion(+) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/__init__.py b/packages/google-cloud-bigquery/google/cloud/bigquery/__init__.py index e80907ec93e4..26d03286f34b 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/__init__.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/__init__.py @@ -44,6 +44,7 @@ from google.cloud.bigquery.enums import SqlTypeNames from google.cloud.bigquery.enums import StandardSqlTypeNames from google.cloud.bigquery.exceptions import LegacyBigQueryStorageError +from google.cloud.bigquery.exceptions import LegacyPandasError from google.cloud.bigquery.exceptions import LegacyPyarrowError from google.cloud.bigquery.external_config import ExternalConfig from google.cloud.bigquery.external_config import BigtableOptions From 3e4473087098d9124cdac21cec4df2226add5d49 Mon Sep 17 00:00:00 2001 From: Mend Renovate Date: Tue, 10 Sep 2024 21:13:15 +0200 Subject: [PATCH 1835/2016] chore(deps): update all dependencies (#2005) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * chore(deps): update all dependencies * 🦉 Updates from OwlBot post-processor See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md * add version constraint for google-crc32c --------- Co-authored-by: Owl Bot Co-authored-by: Leah Cole --- .../samples/geography/requirements.txt | 13 +++++++------ .../samples/magics/requirements.txt | 4 ++-- .../samples/notebooks/requirements.txt | 4 ++-- 3 files changed, 11 insertions(+), 10 deletions(-) diff --git a/packages/google-cloud-bigquery/samples/geography/requirements.txt b/packages/google-cloud-bigquery/samples/geography/requirements.txt index 25dd4b319fa0..cfb27cca988f 100644 --- a/packages/google-cloud-bigquery/samples/geography/requirements.txt +++ b/packages/google-cloud-bigquery/samples/geography/requirements.txt @@ -1,7 +1,7 @@ attrs==24.2.0 -certifi==2024.7.4 +certifi==2024.8.30 cffi===1.15.1; python_version == '3.7' -cffi==1.17.0; python_version >= '3.8' +cffi==1.17.1; python_version >= '3.8' charset-normalizer==3.3.2 click==8.1.7 click-plugins==1.1.1 @@ -13,16 +13,17 @@ geojson==3.1.0 geopandas===0.10.2; python_version == '3.7' geopandas===0.13.2; python_version == '3.8' geopandas==1.0.1; python_version >= '3.9' -google-api-core==2.19.1 +google-api-core==2.19.2 google-auth==2.34.0 google-cloud-bigquery==3.25.0 -google-cloud-bigquery-storage==2.25.0 +google-cloud-bigquery-storage==2.26.0 google-cloud-core==2.4.1 -google-crc32c==1.5.0 +google-crc32c==1.5.0; python_version < '3.9' +google-crc32c==1.6.0; python_version >= '3.9' google-resumable-media==2.7.2 googleapis-common-protos==1.65.0 grpcio===1.62.2; python_version == '3.7' -grpcio==1.66.0; python_version >= '3.8' +grpcio==1.66.1; python_version >= '3.8' idna==3.8 munch==4.0.0 mypy-extensions==1.0.0 diff --git a/packages/google-cloud-bigquery/samples/magics/requirements.txt b/packages/google-cloud-bigquery/samples/magics/requirements.txt index b08ecad7e058..f18db407e8b3 100644 --- a/packages/google-cloud-bigquery/samples/magics/requirements.txt +++ b/packages/google-cloud-bigquery/samples/magics/requirements.txt @@ -1,7 +1,7 @@ -bigquery_magics==0.1.1 +bigquery_magics==0.2.0 db-dtypes==1.3.0 google.cloud.bigquery==3.25.0 -google-cloud-bigquery-storage==2.25.0 +google-cloud-bigquery-storage==2.26.0 ipython===7.31.1; python_version == '3.7' ipython===8.0.1; python_version == '3.8' ipython===8.18.1; python_version >= '3.9' diff --git a/packages/google-cloud-bigquery/samples/notebooks/requirements.txt b/packages/google-cloud-bigquery/samples/notebooks/requirements.txt index 0467676fcac5..0b906c4ead2e 100644 --- a/packages/google-cloud-bigquery/samples/notebooks/requirements.txt +++ b/packages/google-cloud-bigquery/samples/notebooks/requirements.txt @@ -1,7 +1,7 @@ -bigquery-magics==0.1.1 +bigquery-magics==0.2.0 db-dtypes==1.3.0 google-cloud-bigquery==3.25.0 -google-cloud-bigquery-storage==2.25.0 +google-cloud-bigquery-storage==2.26.0 ipython===7.31.1; python_version == '3.7' ipython===8.0.1; python_version == '3.8' ipython===8.18.1; python_version >= '3.9' From c58f8868ba99fbd402859520ae75a32ace084432 Mon Sep 17 00:00:00 2001 From: Mend Renovate Date: Wed, 11 Sep 2024 18:11:59 +0200 Subject: [PATCH 1836/2016] chore(deps): update all dependencies (#2017) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * chore(deps): update all dependencies * 🦉 Updates from OwlBot post-processor See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md * Update samples/geography/requirements.txt --------- Co-authored-by: Owl Bot Co-authored-by: Chalmer Lowe --- .../samples/desktopapp/requirements-test.txt | 2 +- .../samples/geography/requirements-test.txt | 2 +- .../google-cloud-bigquery/samples/magics/requirements-test.txt | 2 +- .../samples/notebooks/requirements-test.txt | 2 +- .../samples/snippets/requirements-test.txt | 2 +- 5 files changed, 5 insertions(+), 5 deletions(-) diff --git a/packages/google-cloud-bigquery/samples/desktopapp/requirements-test.txt b/packages/google-cloud-bigquery/samples/desktopapp/requirements-test.txt index 68f9039cc6ba..1640e1a9502a 100644 --- a/packages/google-cloud-bigquery/samples/desktopapp/requirements-test.txt +++ b/packages/google-cloud-bigquery/samples/desktopapp/requirements-test.txt @@ -1,4 +1,4 @@ google-cloud-testutils==1.4.0 pytest===7.4.4; python_version == '3.7' -pytest==8.3.2; python_version >= '3.8' +pytest==8.3.3; python_version >= '3.8' mock==5.1.0 diff --git a/packages/google-cloud-bigquery/samples/geography/requirements-test.txt b/packages/google-cloud-bigquery/samples/geography/requirements-test.txt index 335236a1439a..1ccebd9cd20e 100644 --- a/packages/google-cloud-bigquery/samples/geography/requirements-test.txt +++ b/packages/google-cloud-bigquery/samples/geography/requirements-test.txt @@ -1,3 +1,3 @@ pytest===7.4.4; python_version == '3.7' -pytest==8.3.2; python_version >= '3.8' +pytest==8.3.3; python_version >= '3.8' mock==5.1.0 diff --git a/packages/google-cloud-bigquery/samples/magics/requirements-test.txt b/packages/google-cloud-bigquery/samples/magics/requirements-test.txt index 68f9039cc6ba..1640e1a9502a 100644 --- a/packages/google-cloud-bigquery/samples/magics/requirements-test.txt +++ b/packages/google-cloud-bigquery/samples/magics/requirements-test.txt @@ -1,4 +1,4 @@ google-cloud-testutils==1.4.0 pytest===7.4.4; python_version == '3.7' -pytest==8.3.2; python_version >= '3.8' +pytest==8.3.3; python_version >= '3.8' mock==5.1.0 diff --git a/packages/google-cloud-bigquery/samples/notebooks/requirements-test.txt b/packages/google-cloud-bigquery/samples/notebooks/requirements-test.txt index 68f9039cc6ba..1640e1a9502a 100644 --- a/packages/google-cloud-bigquery/samples/notebooks/requirements-test.txt +++ b/packages/google-cloud-bigquery/samples/notebooks/requirements-test.txt @@ -1,4 +1,4 @@ google-cloud-testutils==1.4.0 pytest===7.4.4; python_version == '3.7' -pytest==8.3.2; python_version >= '3.8' +pytest==8.3.3; python_version >= '3.8' mock==5.1.0 diff --git a/packages/google-cloud-bigquery/samples/snippets/requirements-test.txt b/packages/google-cloud-bigquery/samples/snippets/requirements-test.txt index 083b2027154a..bb0b2a6bff9d 100644 --- a/packages/google-cloud-bigquery/samples/snippets/requirements-test.txt +++ b/packages/google-cloud-bigquery/samples/snippets/requirements-test.txt @@ -1,5 +1,5 @@ # samples/snippets should be runnable with no "extras" google-cloud-testutils==1.4.0 pytest===7.4.4; python_version == '3.7' -pytest==8.3.2; python_version >= '3.8' +pytest==8.3.3; python_version >= '3.8' mock==5.1.0 From 1a6dc3e28bdea27e8a7f15ce525bf191cdda8340 Mon Sep 17 00:00:00 2001 From: Lingqing Gan Date: Wed, 11 Sep 2024 11:35:42 -0700 Subject: [PATCH 1837/2016] docs: improve QueryJobConfig.destination docstring (#2016) * docs: improve QueryJobConfig.destination docstring * add space --- .../google-cloud-bigquery/google/cloud/bigquery/job/query.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/job/query.py b/packages/google-cloud-bigquery/google/cloud/bigquery/job/query.py index 4ea5687e0bb2..ca2448eaacd8 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/job/query.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/job/query.py @@ -476,6 +476,11 @@ def destination(self): ID, each separated by ``.``. For example: ``your-project.your_dataset.your_table``. + .. note:: + + Only table ID is passed to the backend, so any configuration + in `~google.cloud.bigquery.table.Table` is discarded. + See https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationQuery.FIELDS.destination_table """ From 26bc716f04b9ff17c9bcc2d269a522d4a97d6e9c Mon Sep 17 00:00:00 2001 From: Chalmer Lowe Date: Wed, 11 Sep 2024 17:16:06 -0400 Subject: [PATCH 1838/2016] chore: adds Python 3.7/3.8 EOL pending deprecation warning (#2007) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * adds pending deprecation warning * 🦉 Updates from OwlBot post-processor See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md * revise code to put version function in version helpers * Update noxfile.py * Update google/cloud/bigquery/__init__.py --------- Co-authored-by: Owl Bot Co-authored-by: Lingqing Gan --- .../google/cloud/bigquery/__init__.py | 13 +++++++++++ .../cloud/bigquery/_versions_helpers.py | 14 +++++++++++ packages/google-cloud-bigquery/noxfile.py | 23 ++++++++++++++++--- 3 files changed, 47 insertions(+), 3 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/__init__.py b/packages/google-cloud-bigquery/google/cloud/bigquery/__init__.py index 26d03286f34b..caf75333aa17 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/__init__.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/__init__.py @@ -115,12 +115,25 @@ from google.cloud.bigquery.table import TimePartitioningType from google.cloud.bigquery.table import TimePartitioning from google.cloud.bigquery.encryption_configuration import EncryptionConfiguration +from google.cloud.bigquery import _versions_helpers try: import bigquery_magics # type: ignore except ImportError: bigquery_magics = None +sys_major, sys_minor, sys_micro = _versions_helpers.extract_runtime_version() + +if sys_major == 3 and sys_minor in (7, 8): + warnings.warn( + "The python-bigquery library will stop supporting Python 3.7 " + "and Python 3.8 in a future major release expected in Q4 2024. " + f"Your Python version is {sys_major}.{sys_minor}.{sys_micro}. We " + "recommend that you update soon to ensure ongoing support. For " + "more details, see: [Google Cloud Client Libraries Supported Python Versions policy](https://cloud.google.com/python/docs/supported-python-versions)", + PendingDeprecationWarning, + ) + __all__ = [ "__version__", "Client", diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/_versions_helpers.py b/packages/google-cloud-bigquery/google/cloud/bigquery/_versions_helpers.py index 72d4c921de78..cfbf70a8edbe 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/_versions_helpers.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/_versions_helpers.py @@ -14,6 +14,7 @@ """Shared helper functions for verifying versions of installed modules.""" +import sys from typing import Any import packaging.version @@ -248,3 +249,16 @@ def try_import(self, raise_if_error: bool = False) -> Any: and PYARROW_VERSIONS.try_import() is not None and PYARROW_VERSIONS.installed_version >= _MIN_PYARROW_VERSION_RANGE ) + + +def extract_runtime_version(): + # Retrieve the version information + version_info = sys.version_info + + # Extract the major, minor, and micro components + major = version_info.major + minor = version_info.minor + micro = version_info.micro + + # Display the version number in a clear format + return major, minor, micro diff --git a/packages/google-cloud-bigquery/noxfile.py b/packages/google-cloud-bigquery/noxfile.py index a2df2e094715..2376309ff7c7 100644 --- a/packages/google-cloud-bigquery/noxfile.py +++ b/packages/google-cloud-bigquery/noxfile.py @@ -116,6 +116,7 @@ def default(session, install_extras=True): session.run( "py.test", "--quiet", + "-W default::PendingDeprecationWarning", "--cov=google/cloud/bigquery", "--cov=tests/unit", "--cov-append", @@ -231,6 +232,7 @@ def system(session): session.run( "py.test", "--quiet", + "-W default::PendingDeprecationWarning", os.path.join("tests", "system"), *session.posargs, ) @@ -299,6 +301,7 @@ def snippets(session): session.run( "py.test", "samples", + "-W default::PendingDeprecationWarning", "--ignore=samples/desktopapp", "--ignore=samples/magics", "--ignore=samples/geography", @@ -401,9 +404,23 @@ def prerelease_deps(session): session.run("python", "-m", "pip", "freeze") # Run all tests, except a few samples tests which require extra dependencies. - session.run("py.test", "tests/unit") - session.run("py.test", "tests/system") - session.run("py.test", "samples/tests") + session.run( + "py.test", + "tests/unit", + "-W default::PendingDeprecationWarning", + ) + + session.run( + "py.test", + "tests/system", + "-W default::PendingDeprecationWarning", + ) + + session.run( + "py.test", + "samples/tests", + "-W default::PendingDeprecationWarning", + ) @nox.session(python=DEFAULT_PYTHON_VERSION) From 38f15c7be4089fcb80614bdfb2521c540880d354 Mon Sep 17 00:00:00 2001 From: Mend Renovate Date: Thu, 12 Sep 2024 16:44:54 +0200 Subject: [PATCH 1839/2016] chore(deps): update all dependencies (#2018) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * chore(deps): update all dependencies * 🦉 Updates from OwlBot post-processor See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md * Update samples/geography/requirements.txt --------- Co-authored-by: Owl Bot Co-authored-by: Chalmer Lowe Co-authored-by: Leah E. Cole <6719667+leahecole@users.noreply.github.com> --- .../samples/geography/requirements.txt | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/packages/google-cloud-bigquery/samples/geography/requirements.txt b/packages/google-cloud-bigquery/samples/geography/requirements.txt index cfb27cca988f..350419781eed 100644 --- a/packages/google-cloud-bigquery/samples/geography/requirements.txt +++ b/packages/google-cloud-bigquery/samples/geography/requirements.txt @@ -18,7 +18,7 @@ google-auth==2.34.0 google-cloud-bigquery==3.25.0 google-cloud-bigquery-storage==2.26.0 google-cloud-core==2.4.1 -google-crc32c==1.5.0; python_version < '3.9' +google-crc32c===1.5.0; python_version < '3.9' google-crc32c==1.6.0; python_version >= '3.9' google-resumable-media==2.7.2 googleapis-common-protos==1.65.0 @@ -36,14 +36,14 @@ proto-plus==1.24.0 pyarrow==12.0.1; python_version == '3.7' pyarrow==17.0.0; python_version >= '3.8' pyasn1===0.5.1; python_version == '3.7' -pyasn1==0.6.0; python_version >= '3.8' +pyasn1==0.6.1; python_version >= '3.8' pyasn1-modules===0.3.0; python_version == '3.7' -pyasn1-modules==0.4.0; python_version >= '3.8' +pyasn1-modules==0.4.1; python_version >= '3.8' pycparser===2.21; python_version == '3.7' pycparser==2.22; python_version >= '3.8' pyparsing==3.1.4 python-dateutil==2.9.0.post0 -pytz==2024.1 +pytz==2024.2 PyYAML===6.0.1; python_version == '3.7' PyYAML==6.0.2; python_version >= '3.8' requests==2.31.0; python_version == '3.7' From 3a32c5fe321cdf7e33392c8e41823c484805930e Mon Sep 17 00:00:00 2001 From: "gcf-owl-bot[bot]" <78513119+gcf-owl-bot[bot]@users.noreply.github.com> Date: Tue, 17 Sep 2024 11:18:05 -0400 Subject: [PATCH 1840/2016] build(python): release script update (#2024) Source-Link: https://github.com/googleapis/synthtool/commit/71a72973dddbc66ea64073b53eda49f0d22e0942 Post-Processor: gcr.io/cloud-devrel-public-resources/owlbot-python:latest@sha256:e8dcfd7cbfd8beac3a3ff8d3f3185287ea0625d859168cc80faccfc9a7a00455 Co-authored-by: Owl Bot --- packages/google-cloud-bigquery/.github/.OwlBot.lock.yaml | 4 ++-- packages/google-cloud-bigquery/.kokoro/release.sh | 2 +- packages/google-cloud-bigquery/.kokoro/release/common.cfg | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/packages/google-cloud-bigquery/.github/.OwlBot.lock.yaml b/packages/google-cloud-bigquery/.github/.OwlBot.lock.yaml index 6d064ddb9b06..597e0c3261ca 100644 --- a/packages/google-cloud-bigquery/.github/.OwlBot.lock.yaml +++ b/packages/google-cloud-bigquery/.github/.OwlBot.lock.yaml @@ -13,5 +13,5 @@ # limitations under the License. docker: image: gcr.io/cloud-devrel-public-resources/owlbot-python:latest - digest: sha256:94bb690db96e6242b2567a4860a94d48fa48696d092e51b0884a1a2c0a79a407 -# created: 2024-07-31T14:52:44.926548819Z + digest: sha256:e8dcfd7cbfd8beac3a3ff8d3f3185287ea0625d859168cc80faccfc9a7a00455 +# created: 2024-09-16T21:04:09.091105552Z diff --git a/packages/google-cloud-bigquery/.kokoro/release.sh b/packages/google-cloud-bigquery/.kokoro/release.sh index 81cee716e943..453d6f702c28 100755 --- a/packages/google-cloud-bigquery/.kokoro/release.sh +++ b/packages/google-cloud-bigquery/.kokoro/release.sh @@ -23,7 +23,7 @@ python3 -m releasetool publish-reporter-script > /tmp/publisher-script; source / export PYTHONUNBUFFERED=1 # Move into the package, build the distribution and upload. -TWINE_PASSWORD=$(cat "${KOKORO_KEYSTORE_DIR}/73713_google-cloud-pypi-token-keystore-1") +TWINE_PASSWORD=$(cat "${KOKORO_KEYSTORE_DIR}/73713_google-cloud-pypi-token-keystore-2") cd github/python-bigquery python3 setup.py sdist bdist_wheel twine upload --username __token__ --password "${TWINE_PASSWORD}" dist/* diff --git a/packages/google-cloud-bigquery/.kokoro/release/common.cfg b/packages/google-cloud-bigquery/.kokoro/release/common.cfg index cb8bbaa2e55d..43b5a1f2785e 100644 --- a/packages/google-cloud-bigquery/.kokoro/release/common.cfg +++ b/packages/google-cloud-bigquery/.kokoro/release/common.cfg @@ -28,7 +28,7 @@ before_action { fetch_keystore { keystore_resource { keystore_config_id: 73713 - keyname: "google-cloud-pypi-token-keystore-1" + keyname: "google-cloud-pypi-token-keystore-2" } } } From e1482d4df4b09a1aa9bff46ce0ea948333f82469 Mon Sep 17 00:00:00 2001 From: Mend Renovate Date: Fri, 20 Sep 2024 23:21:58 +0200 Subject: [PATCH 1841/2016] chore(deps): update all dependencies (#2025) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * chore(deps): update all dependencies * 🦉 Updates from OwlBot post-processor See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md --------- Co-authored-by: Owl Bot --- .../google-cloud-bigquery/samples/geography/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/samples/geography/requirements.txt b/packages/google-cloud-bigquery/samples/geography/requirements.txt index 350419781eed..aa2ccfc285a1 100644 --- a/packages/google-cloud-bigquery/samples/geography/requirements.txt +++ b/packages/google-cloud-bigquery/samples/geography/requirements.txt @@ -24,7 +24,7 @@ google-resumable-media==2.7.2 googleapis-common-protos==1.65.0 grpcio===1.62.2; python_version == '3.7' grpcio==1.66.1; python_version >= '3.8' -idna==3.8 +idna==3.10 munch==4.0.0 mypy-extensions==1.0.0 packaging===24.0; python_version == '3.7' From 54c30edb4e76f41065653b3f34f5645890cf4cca Mon Sep 17 00:00:00 2001 From: Mend Renovate Date: Tue, 24 Sep 2024 19:46:21 +0200 Subject: [PATCH 1842/2016] chore(deps): update all dependencies (#2029) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * chore(deps): update all dependencies * 🦉 Updates from OwlBot post-processor See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md --------- Co-authored-by: Owl Bot --- .../samples/geography/requirements.txt | 6 +++--- .../google-cloud-bigquery/samples/magics/requirements.txt | 2 +- .../samples/notebooks/requirements.txt | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/packages/google-cloud-bigquery/samples/geography/requirements.txt b/packages/google-cloud-bigquery/samples/geography/requirements.txt index aa2ccfc285a1..e51d3d8c9ab2 100644 --- a/packages/google-cloud-bigquery/samples/geography/requirements.txt +++ b/packages/google-cloud-bigquery/samples/geography/requirements.txt @@ -13,8 +13,8 @@ geojson==3.1.0 geopandas===0.10.2; python_version == '3.7' geopandas===0.13.2; python_version == '3.8' geopandas==1.0.1; python_version >= '3.9' -google-api-core==2.19.2 -google-auth==2.34.0 +google-api-core==2.20.0 +google-auth==2.35.0 google-cloud-bigquery==3.25.0 google-cloud-bigquery-storage==2.26.0 google-cloud-core==2.4.1 @@ -31,7 +31,7 @@ packaging===24.0; python_version == '3.7' packaging==24.1; python_version >= '3.8' pandas===1.3.5; python_version == '3.7' pandas===2.0.3; python_version == '3.8' -pandas==2.2.2; python_version >= '3.9' +pandas==2.2.3; python_version >= '3.9' proto-plus==1.24.0 pyarrow==12.0.1; python_version == '3.7' pyarrow==17.0.0; python_version >= '3.8' diff --git a/packages/google-cloud-bigquery/samples/magics/requirements.txt b/packages/google-cloud-bigquery/samples/magics/requirements.txt index f18db407e8b3..c1aac4bace73 100644 --- a/packages/google-cloud-bigquery/samples/magics/requirements.txt +++ b/packages/google-cloud-bigquery/samples/magics/requirements.txt @@ -7,4 +7,4 @@ ipython===8.0.1; python_version == '3.8' ipython===8.18.1; python_version >= '3.9' pandas===1.3.5; python_version == '3.7' pandas===2.0.3; python_version == '3.8' -pandas==2.2.2; python_version >= '3.9' +pandas==2.2.3; python_version >= '3.9' diff --git a/packages/google-cloud-bigquery/samples/notebooks/requirements.txt b/packages/google-cloud-bigquery/samples/notebooks/requirements.txt index 0b906c4ead2e..c25253e96461 100644 --- a/packages/google-cloud-bigquery/samples/notebooks/requirements.txt +++ b/packages/google-cloud-bigquery/samples/notebooks/requirements.txt @@ -10,4 +10,4 @@ matplotlib===3.7.4; python_version == '3.8' matplotlib==3.9.2; python_version >= '3.9' pandas===1.3.5; python_version == '3.7' pandas===2.0.3; python_version == '3.8' -pandas==2.2.2; python_version >= '3.9' +pandas==2.2.3; python_version >= '3.9' From 3e6c71c6691ac913c112eaefe5d4031a6adf6bf5 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Wed, 25 Sep 2024 15:47:57 -0400 Subject: [PATCH 1843/2016] chore(deps): bump fiona from 1.9.6 to 1.10.0 in /samples/geography (#2027) * chore(deps): bump fiona from 1.9.6 to 1.10.0 in /samples/geography Bumps [fiona](https://github.com/Toblerity/Fiona) from 1.9.6 to 1.10.0. - [Release notes](https://github.com/Toblerity/Fiona/releases) - [Changelog](https://github.com/Toblerity/Fiona/blob/main/CHANGES.txt) - [Commits](https://github.com/Toblerity/Fiona/compare/1.9.6...1.10.0) --- updated-dependencies: - dependency-name: fiona dependency-type: direct:production ... Signed-off-by: dependabot[bot] * pin fiona to 1.9.6 for python 3.7 --------- Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> Co-authored-by: Lingqing Gan --- .../samples/geography/requirements.txt | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/packages/google-cloud-bigquery/samples/geography/requirements.txt b/packages/google-cloud-bigquery/samples/geography/requirements.txt index e51d3d8c9ab2..cc0f3ad17569 100644 --- a/packages/google-cloud-bigquery/samples/geography/requirements.txt +++ b/packages/google-cloud-bigquery/samples/geography/requirements.txt @@ -8,13 +8,14 @@ click-plugins==1.1.1 cligj==0.7.2 dataclasses==0.8; python_version < '3.7' db-dtypes==1.3.0 -Fiona==1.9.6 +Fiona===1.9.6; python_version == '3.7' +Fiona==1.10.0; python_version >= '3.8' geojson==3.1.0 geopandas===0.10.2; python_version == '3.7' geopandas===0.13.2; python_version == '3.8' geopandas==1.0.1; python_version >= '3.9' -google-api-core==2.20.0 -google-auth==2.35.0 +google-api-core==2.19.2 +google-auth==2.34.0 google-cloud-bigquery==3.25.0 google-cloud-bigquery-storage==2.26.0 google-cloud-core==2.4.1 @@ -31,7 +32,7 @@ packaging===24.0; python_version == '3.7' packaging==24.1; python_version >= '3.8' pandas===1.3.5; python_version == '3.7' pandas===2.0.3; python_version == '3.8' -pandas==2.2.3; python_version >= '3.9' +pandas==2.2.2; python_version >= '3.9' proto-plus==1.24.0 pyarrow==12.0.1; python_version == '3.7' pyarrow==17.0.0; python_version >= '3.8' From 4dbcbbefc7e4fea86556128912f562926b5325ff Mon Sep 17 00:00:00 2001 From: "release-please[bot]" <55107282+release-please[bot]@users.noreply.github.com> Date: Thu, 26 Sep 2024 11:48:45 -0700 Subject: [PATCH 1844/2016] chore(main): release 3.26.0 (#1973) Co-authored-by: release-please[bot] <55107282+release-please[bot]@users.noreply.github.com> --- packages/google-cloud-bigquery/CHANGELOG.md | 28 +++++++++++++++++++ .../google/cloud/bigquery/version.py | 2 +- 2 files changed, 29 insertions(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/CHANGELOG.md b/packages/google-cloud-bigquery/CHANGELOG.md index 4a089b8b482f..5de99a6ca2b7 100644 --- a/packages/google-cloud-bigquery/CHANGELOG.md +++ b/packages/google-cloud-bigquery/CHANGELOG.md @@ -5,6 +5,34 @@ [1]: https://pypi.org/project/google-cloud-bigquery/#history +## [3.26.0](https://github.com/googleapis/python-bigquery/compare/v3.25.0...v3.26.0) (2024-09-25) + + +### Features + +* Include LegacyPandasError in init imports ([#2014](https://github.com/googleapis/python-bigquery/issues/2014)) ([3ab5e95](https://github.com/googleapis/python-bigquery/commit/3ab5e95984ad521027a4e1efd9f16767403e668d)) +* Use `bigquery-magics` package for the `%%bigquery` magic ([#1965](https://github.com/googleapis/python-bigquery/issues/1965)) ([60128a5](https://github.com/googleapis/python-bigquery/commit/60128a522375823422f238312521a2ce356d9177)) + + +### Bug Fixes + +* Add docfx to the presubmit configuration and delete docs-presubmit ([#1995](https://github.com/googleapis/python-bigquery/issues/1995)) ([bd83cfd](https://github.com/googleapis/python-bigquery/commit/bd83cfd2eb25cec58d59af8048f5188d748b083d)) +* Add warning when encountering unknown field types ([#1989](https://github.com/googleapis/python-bigquery/issues/1989)) ([8f5a41d](https://github.com/googleapis/python-bigquery/commit/8f5a41d283a965ca161019588d3a3b2947b04b5b)) +* Allow protobuf 5.x; require protobuf >=3.20.2; proto-plus >=1.22.3 ([#1976](https://github.com/googleapis/python-bigquery/issues/1976)) ([57bf873](https://github.com/googleapis/python-bigquery/commit/57bf873474382cc2cb34243b704bc928fa1b64c6)) +* Do not set job timeout extra property if None ([#1987](https://github.com/googleapis/python-bigquery/issues/1987)) ([edcb79c](https://github.com/googleapis/python-bigquery/commit/edcb79ca69dba30d8102abebb9d53bc76e4882ee)) +* Set pyarrow field nullable to False for a BigQuery field in REPEATED mode ([#1999](https://github.com/googleapis/python-bigquery/issues/1999)) ([5352870](https://github.com/googleapis/python-bigquery/commit/5352870283ca7d4652aefc73f12645bcf6e1363c)) + + +### Dependencies + +* Bump min version of google-api-core and google-cloud-core to 2.x ([#1972](https://github.com/googleapis/python-bigquery/issues/1972)) ([a958732](https://github.com/googleapis/python-bigquery/commit/a958732aed7d9bd51ffde3dc0e6cae9ad7455b54)) + + +### Documentation + +* Add short mode query sample & test ([#1978](https://github.com/googleapis/python-bigquery/issues/1978)) ([ba61a8a](https://github.com/googleapis/python-bigquery/commit/ba61a8ab0da541ba1940211875d7ea2e9e17dfa8)) +* Improve QueryJobConfig.destination docstring ([#2016](https://github.com/googleapis/python-bigquery/issues/2016)) ([1b4cca0](https://github.com/googleapis/python-bigquery/commit/1b4cca0a3cc788a4570705572d5f04172f6b4b24)) + ## [3.25.0](https://github.com/googleapis/python-bigquery/compare/v3.24.0...v3.25.0) (2024-06-17) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/version.py b/packages/google-cloud-bigquery/google/cloud/bigquery/version.py index fed077e267bf..ebc9112530b1 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/version.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/version.py @@ -12,4 +12,4 @@ # See the License for the specific language governing permissions and # limitations under the License. -__version__ = "3.25.0" +__version__ = "3.26.0" From c694a2cd31c15fc29b4c0822d127da9aee9705a1 Mon Sep 17 00:00:00 2001 From: Mend Renovate Date: Thu, 26 Sep 2024 22:08:28 +0200 Subject: [PATCH 1845/2016] chore(deps): update all dependencies (#2031) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * chore(deps): update all dependencies * 🦉 Updates from OwlBot post-processor See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md --------- Co-authored-by: Owl Bot Co-authored-by: Leah E. Cole <6719667+leahecole@users.noreply.github.com> --- packages/google-cloud-bigquery/samples/magics/requirements.txt | 2 +- .../google-cloud-bigquery/samples/notebooks/requirements.txt | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/packages/google-cloud-bigquery/samples/magics/requirements.txt b/packages/google-cloud-bigquery/samples/magics/requirements.txt index c1aac4bace73..4652fcdf23e2 100644 --- a/packages/google-cloud-bigquery/samples/magics/requirements.txt +++ b/packages/google-cloud-bigquery/samples/magics/requirements.txt @@ -1,4 +1,4 @@ -bigquery_magics==0.2.0 +bigquery_magics==0.3.0 db-dtypes==1.3.0 google.cloud.bigquery==3.25.0 google-cloud-bigquery-storage==2.26.0 diff --git a/packages/google-cloud-bigquery/samples/notebooks/requirements.txt b/packages/google-cloud-bigquery/samples/notebooks/requirements.txt index c25253e96461..c4b75f3dbf65 100644 --- a/packages/google-cloud-bigquery/samples/notebooks/requirements.txt +++ b/packages/google-cloud-bigquery/samples/notebooks/requirements.txt @@ -1,4 +1,4 @@ -bigquery-magics==0.2.0 +bigquery-magics==0.3.0 db-dtypes==1.3.0 google-cloud-bigquery==3.25.0 google-cloud-bigquery-storage==2.26.0 From f5129648dc0ebd3dd383acebaa235b0bbda8dd49 Mon Sep 17 00:00:00 2001 From: Mend Renovate Date: Tue, 1 Oct 2024 23:02:18 +0200 Subject: [PATCH 1846/2016] chore(deps): update all dependencies (#2033) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * chore(deps): update all dependencies * 🦉 Updates from OwlBot post-processor See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md --------- Co-authored-by: Owl Bot --- .../samples/desktopapp/requirements.txt | 2 +- .../samples/geography/requirements.txt | 12 ++++++------ .../samples/magics/requirements.txt | 4 ++-- .../samples/notebooks/requirements.txt | 4 ++-- .../samples/snippets/requirements.txt | 2 +- 5 files changed, 12 insertions(+), 12 deletions(-) diff --git a/packages/google-cloud-bigquery/samples/desktopapp/requirements.txt b/packages/google-cloud-bigquery/samples/desktopapp/requirements.txt index dafb60b2a2cb..383829d7d756 100644 --- a/packages/google-cloud-bigquery/samples/desktopapp/requirements.txt +++ b/packages/google-cloud-bigquery/samples/desktopapp/requirements.txt @@ -1,2 +1,2 @@ -google-cloud-bigquery==3.25.0 +google-cloud-bigquery==3.26.0 google-auth-oauthlib==1.2.1 diff --git a/packages/google-cloud-bigquery/samples/geography/requirements.txt b/packages/google-cloud-bigquery/samples/geography/requirements.txt index cc0f3ad17569..1089dc1957ab 100644 --- a/packages/google-cloud-bigquery/samples/geography/requirements.txt +++ b/packages/google-cloud-bigquery/samples/geography/requirements.txt @@ -9,14 +9,14 @@ cligj==0.7.2 dataclasses==0.8; python_version < '3.7' db-dtypes==1.3.0 Fiona===1.9.6; python_version == '3.7' -Fiona==1.10.0; python_version >= '3.8' +Fiona==1.10.1; python_version >= '3.8' geojson==3.1.0 geopandas===0.10.2; python_version == '3.7' geopandas===0.13.2; python_version == '3.8' geopandas==1.0.1; python_version >= '3.9' -google-api-core==2.19.2 -google-auth==2.34.0 -google-cloud-bigquery==3.25.0 +google-api-core==2.20.0 +google-auth==2.35.0 +google-cloud-bigquery==3.26.0 google-cloud-bigquery-storage==2.26.0 google-cloud-core==2.4.1 google-crc32c===1.5.0; python_version < '3.9' @@ -32,7 +32,7 @@ packaging===24.0; python_version == '3.7' packaging==24.1; python_version >= '3.8' pandas===1.3.5; python_version == '3.7' pandas===2.0.3; python_version == '3.8' -pandas==2.2.2; python_version >= '3.9' +pandas==2.2.3; python_version >= '3.9' proto-plus==1.24.0 pyarrow==12.0.1; python_version == '3.7' pyarrow==17.0.0; python_version >= '3.8' @@ -56,4 +56,4 @@ typing-extensions===4.7.1; python_version == '3.7' typing-extensions==4.12.2; python_version >= '3.8' typing-inspect==0.9.0 urllib3===1.26.18; python_version == '3.7' -urllib3==2.2.2; python_version >= '3.8' +urllib3==2.2.3; python_version >= '3.8' diff --git a/packages/google-cloud-bigquery/samples/magics/requirements.txt b/packages/google-cloud-bigquery/samples/magics/requirements.txt index 4652fcdf23e2..6386fb6d2462 100644 --- a/packages/google-cloud-bigquery/samples/magics/requirements.txt +++ b/packages/google-cloud-bigquery/samples/magics/requirements.txt @@ -1,6 +1,6 @@ -bigquery_magics==0.3.0 +bigquery_magics==0.4.0 db-dtypes==1.3.0 -google.cloud.bigquery==3.25.0 +google.cloud.bigquery==3.26.0 google-cloud-bigquery-storage==2.26.0 ipython===7.31.1; python_version == '3.7' ipython===8.0.1; python_version == '3.8' diff --git a/packages/google-cloud-bigquery/samples/notebooks/requirements.txt b/packages/google-cloud-bigquery/samples/notebooks/requirements.txt index c4b75f3dbf65..7463e1afced6 100644 --- a/packages/google-cloud-bigquery/samples/notebooks/requirements.txt +++ b/packages/google-cloud-bigquery/samples/notebooks/requirements.txt @@ -1,6 +1,6 @@ -bigquery-magics==0.3.0 +bigquery-magics==0.4.0 db-dtypes==1.3.0 -google-cloud-bigquery==3.25.0 +google-cloud-bigquery==3.26.0 google-cloud-bigquery-storage==2.26.0 ipython===7.31.1; python_version == '3.7' ipython===8.0.1; python_version == '3.8' diff --git a/packages/google-cloud-bigquery/samples/snippets/requirements.txt b/packages/google-cloud-bigquery/samples/snippets/requirements.txt index 9e181d9632f3..65ce0be9fa50 100644 --- a/packages/google-cloud-bigquery/samples/snippets/requirements.txt +++ b/packages/google-cloud-bigquery/samples/snippets/requirements.txt @@ -1,2 +1,2 @@ # samples/snippets should be runnable with no "extras" -google-cloud-bigquery==3.25.0 +google-cloud-bigquery==3.26.0 From 3e0a34f978a056bcb0a1bd4e8dce44c73af1bd55 Mon Sep 17 00:00:00 2001 From: Jeff Quinlan-Galper Date: Wed, 9 Oct 2024 03:25:31 -0700 Subject: [PATCH 1847/2016] Fix typo in legacy docs (#2037) uspported -> supported Co-authored-by: Chalmer Lowe --- .../google-cloud-bigquery/docs/bigquery/legacy_proto_types.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/docs/bigquery/legacy_proto_types.rst b/packages/google-cloud-bigquery/docs/bigquery/legacy_proto_types.rst index bc1e9371597b..36e9984b9e2b 100644 --- a/packages/google-cloud-bigquery/docs/bigquery/legacy_proto_types.rst +++ b/packages/google-cloud-bigquery/docs/bigquery/legacy_proto_types.rst @@ -3,7 +3,7 @@ Legacy proto-based Types for Google Cloud Bigquery v2 API .. warning:: These types are provided for backward compatibility only, and are not maintained - anymore. They might also differ from the types uspported on the backend. It is + anymore. They might also differ from the types supported on the backend. It is therefore strongly advised to migrate to the types found in :doc:`standard_sql`. Also see the :doc:`3.0.0 Migration Guide<../UPGRADING>` for more information. From afecb93c903c4871bdefd022f218d5bfe1e658c2 Mon Sep 17 00:00:00 2001 From: Chalmer Lowe Date: Thu, 10 Oct 2024 05:32:25 -0400 Subject: [PATCH 1848/2016] feat: updates to allow users to set max_stream_count (#2039) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adds a function `determine_requested_streams()` to compare `preserve_order` and the new argument `max_stream_count` to determine how many streams to request. ``` preserve_order (bool): Whether to preserve the order of streams. If True, this limits the number of streams to one (more than one cannot guarantee order). max_stream_count (Union[int, None]]): The maximum number of streams allowed. Must be a non-negative number or None, where None indicates the value is unset. If `max_stream_count` is set, it overrides `preserve_order`. ``` Fixes #2030 🦕 --- .../google/cloud/bigquery/_pandas_helpers.py | 118 +++++++++++++++--- .../tests/unit/test__pandas_helpers.py | 31 +++++ 2 files changed, 130 insertions(+), 19 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/_pandas_helpers.py b/packages/google-cloud-bigquery/google/cloud/bigquery/_pandas_helpers.py index 210ab48758f0..bf7d10c0f229 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/_pandas_helpers.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/_pandas_helpers.py @@ -21,13 +21,14 @@ import logging import queue import warnings -from typing import Any, Union +from typing import Any, Union, Optional, Callable, Generator, List from google.cloud.bigquery import _pyarrow_helpers from google.cloud.bigquery import _versions_helpers from google.cloud.bigquery import schema + try: import pandas # type: ignore @@ -75,7 +76,7 @@ def _to_wkb(v): _to_wkb = _to_wkb() try: - from google.cloud.bigquery_storage import ArrowSerializationOptions + from google.cloud.bigquery_storage_v1.types import ArrowSerializationOptions except ImportError: _ARROW_COMPRESSION_SUPPORT = False else: @@ -816,18 +817,54 @@ def _nowait(futures): def _download_table_bqstorage( - project_id, - table, - bqstorage_client, - preserve_order=False, - selected_fields=None, - page_to_item=None, - max_queue_size=_MAX_QUEUE_SIZE_DEFAULT, -): - """Use (faster, but billable) BQ Storage API to construct DataFrame.""" + project_id: str, + table: Any, + bqstorage_client: Any, + preserve_order: bool = False, + selected_fields: Optional[List[Any]] = None, + page_to_item: Optional[Callable] = None, + max_queue_size: Any = _MAX_QUEUE_SIZE_DEFAULT, + max_stream_count: Optional[int] = None, +) -> Generator[Any, None, None]: + """Downloads a BigQuery table using the BigQuery Storage API. + + This method uses the faster, but potentially more expensive, BigQuery + Storage API to download a table as a Pandas DataFrame. It supports + parallel downloads and optional data transformations. + + Args: + project_id (str): The ID of the Google Cloud project containing + the table. + table (Any): The BigQuery table to download. + bqstorage_client (Any): An + authenticated BigQuery Storage API client. + preserve_order (bool, optional): Whether to preserve the order + of the rows as they are read from BigQuery. If True this limits + the number of streams to one and overrides `max_stream_count`. + Defaults to False. + selected_fields (Optional[List[SchemaField]]): + A list of BigQuery schema fields to select for download. If None, + all fields are downloaded. Defaults to None. + page_to_item (Optional[Callable]): An optional callable + function that takes a page of data from the BigQuery Storage API + max_stream_count (Optional[int]): The maximum number of + concurrent streams to use for downloading data. If `preserve_order` + is True, the requested streams are limited to 1 regardless of the + `max_stream_count` value. If 0 or None, then the number of + requested streams will be unbounded. Defaults to None. + + Yields: + pandas.DataFrame: Pandas DataFrames, one for each chunk of data + downloaded from BigQuery. + + Raises: + ValueError: If attempting to read from a specific partition or snapshot. + + Note: + This method requires the `google-cloud-bigquery-storage` library + to be installed. + """ - # Passing a BQ Storage client in implies that the BigQuery Storage library - # is available and can be imported. from google.cloud import bigquery_storage if "$" in table.table_id: @@ -837,10 +874,11 @@ def _download_table_bqstorage( if "@" in table.table_id: raise ValueError("Reading from a specific snapshot is not currently supported.") - requested_streams = 1 if preserve_order else 0 + requested_streams = determine_requested_streams(preserve_order, max_stream_count) - requested_session = bigquery_storage.types.ReadSession( - table=table.to_bqstorage(), data_format=bigquery_storage.types.DataFormat.ARROW + requested_session = bigquery_storage.types.stream.ReadSession( + table=table.to_bqstorage(), + data_format=bigquery_storage.types.stream.DataFormat.ARROW, ) if selected_fields is not None: for field in selected_fields: @@ -848,7 +886,8 @@ def _download_table_bqstorage( if _ARROW_COMPRESSION_SUPPORT: requested_session.read_options.arrow_serialization_options.buffer_compression = ( - ArrowSerializationOptions.CompressionCodec.LZ4_FRAME + # CompressionCodec(1) -> LZ4_FRAME + ArrowSerializationOptions.CompressionCodec(1) ) session = bqstorage_client.create_read_session( @@ -884,7 +923,7 @@ def _download_table_bqstorage( elif max_queue_size is None: max_queue_size = 0 # unbounded - worker_queue = queue.Queue(maxsize=max_queue_size) + worker_queue: queue.Queue[int] = queue.Queue(maxsize=max_queue_size) with concurrent.futures.ThreadPoolExecutor(max_workers=total_streams) as pool: try: @@ -910,7 +949,7 @@ def _download_table_bqstorage( # we want to block on the queue's get method, instead. This # prevents the queue from filling up, because the main thread # has smaller gaps in time between calls to the queue's get - # method. For a detailed explaination, see: + # method. For a detailed explanation, see: # https://friendliness.dev/2019/06/18/python-nowait/ done, not_done = _nowait(not_done) for future in done: @@ -949,6 +988,7 @@ def download_arrow_bqstorage( preserve_order=False, selected_fields=None, max_queue_size=_MAX_QUEUE_SIZE_DEFAULT, + max_stream_count=None, ): return _download_table_bqstorage( project_id, @@ -958,6 +998,7 @@ def download_arrow_bqstorage( selected_fields=selected_fields, page_to_item=_bqstorage_page_to_arrow, max_queue_size=max_queue_size, + max_stream_count=max_stream_count, ) @@ -970,6 +1011,7 @@ def download_dataframe_bqstorage( preserve_order=False, selected_fields=None, max_queue_size=_MAX_QUEUE_SIZE_DEFAULT, + max_stream_count=None, ): page_to_item = functools.partial(_bqstorage_page_to_dataframe, column_names, dtypes) return _download_table_bqstorage( @@ -980,6 +1022,7 @@ def download_dataframe_bqstorage( selected_fields=selected_fields, page_to_item=page_to_item, max_queue_size=max_queue_size, + max_stream_count=max_stream_count, ) @@ -1024,3 +1067,40 @@ def verify_pandas_imports(): raise ValueError(_NO_PANDAS_ERROR) from pandas_import_exception if db_dtypes is None: raise ValueError(_NO_DB_TYPES_ERROR) from db_dtypes_import_exception + + +def determine_requested_streams( + preserve_order: bool, + max_stream_count: Union[int, None], +) -> int: + """Determines the value of requested_streams based on the values of + `preserve_order` and `max_stream_count`. + + Args: + preserve_order (bool): Whether to preserve the order of streams. If True, + this limits the number of streams to one. `preserve_order` takes + precedence over `max_stream_count`. + max_stream_count (Union[int, None]]): The maximum number of streams + allowed. Must be a non-negative number or None, where None indicates + the value is unset. NOTE: if `preserve_order` is also set, it takes + precedence over `max_stream_count`, thus to ensure that `max_stream_count` + is used, ensure that `preserve_order` is None. + + Returns: + (int) The appropriate value for requested_streams. + """ + + if preserve_order: + # If preserve order is set, it takes precendence. + # Limit the requested streams to 1, to ensure that order + # is preserved) + return 1 + + elif max_stream_count is not None: + # If preserve_order is not set, only then do we consider max_stream_count + if max_stream_count <= -1: + raise ValueError("max_stream_count must be non-negative OR None") + return max_stream_count + + # Default to zero requested streams (unbounded). + return 0 diff --git a/packages/google-cloud-bigquery/tests/unit/test__pandas_helpers.py b/packages/google-cloud-bigquery/tests/unit/test__pandas_helpers.py index 203cc1d1cec1..3a5fddacc48d 100644 --- a/packages/google-cloud-bigquery/tests/unit/test__pandas_helpers.py +++ b/packages/google-cloud-bigquery/tests/unit/test__pandas_helpers.py @@ -18,6 +18,7 @@ import functools import operator import queue +from typing import Union from unittest import mock import warnings @@ -46,6 +47,7 @@ from google.cloud.bigquery import _pyarrow_helpers from google.cloud.bigquery import _versions_helpers from google.cloud.bigquery import schema +from google.cloud.bigquery._pandas_helpers import determine_requested_streams pyarrow = _versions_helpers.PYARROW_VERSIONS.try_import() @@ -2053,3 +2055,32 @@ def test_verify_pandas_imports_no_db_dtypes(module_under_test, monkeypatch): monkeypatch.setattr(module_under_test, "db_dtypes", None) with pytest.raises(ValueError, match="Please install the 'db-dtypes' package"): module_under_test.verify_pandas_imports() + + +@pytest.mark.parametrize( + "preserve_order, max_stream_count, expected_requested_streams", + [ + # If preserve_order is set/True, it takes precedence: + (True, 10, 1), # use 1 + (True, None, 1), # use 1 + # If preserve_order is not set check max_stream_count: + (False, 10, 10), # max_stream_count (X) takes precedence + (False, None, 0), # Unbounded (0) when both are unset + ], +) +def test_determine_requested_streams( + preserve_order: bool, + max_stream_count: Union[int, None], + expected_requested_streams: int, +): + """Tests various combinations of preserve_order and max_stream_count.""" + actual_requested_streams = determine_requested_streams( + preserve_order, max_stream_count + ) + assert actual_requested_streams == expected_requested_streams + + +def test_determine_requested_streams_invalid_max_stream_count(): + """Tests that a ValueError is raised if max_stream_count is negative.""" + with pytest.raises(ValueError): + determine_requested_streams(preserve_order=False, max_stream_count=-1) From c6cbb8f6f1c4f55bb2eb62781a81e791def84e04 Mon Sep 17 00:00:00 2001 From: "gcf-owl-bot[bot]" <78513119+gcf-owl-bot[bot]@users.noreply.github.com> Date: Fri, 1 Nov 2024 10:18:55 -0400 Subject: [PATCH 1849/2016] build: use multiScm for Kokoro release builds (#2049) Source-Link: https://github.com/googleapis/synthtool/commit/0da16589204e7f61911f64fcb30ac2d3b6e59b31 Post-Processor: gcr.io/cloud-devrel-public-resources/owlbot-python:latest@sha256:5cddfe2fb5019bbf78335bc55f15bc13e18354a56b3ff46e1834f8e540807f05 Co-authored-by: Owl Bot --- .../.github/.OwlBot.lock.yaml | 4 +- .../.github/release-trigger.yml | 1 + .../.kokoro/docker/docs/requirements.txt | 42 +- .../.kokoro/docs/common.cfg | 2 +- .../google-cloud-bigquery/.kokoro/release.sh | 2 +- .../.kokoro/release/common.cfg | 8 +- .../.kokoro/requirements.txt | 610 +++++++++--------- .../.kokoro/samples/python3.13/common.cfg | 40 ++ .../.kokoro/samples/python3.13/continuous.cfg | 6 + .../samples/python3.13/periodic-head.cfg | 11 + .../.kokoro/samples/python3.13/periodic.cfg | 6 + .../.kokoro/samples/python3.13/presubmit.cfg | 6 + .../.kokoro/test-samples-impl.sh | 3 +- .../google-cloud-bigquery/CONTRIBUTING.rst | 6 +- .../samples/desktopapp/noxfile.py | 2 +- .../samples/geography/noxfile.py | 2 +- .../samples/magics/noxfile.py | 2 +- .../samples/notebooks/noxfile.py | 2 +- .../samples/snippets/noxfile.py | 2 +- 19 files changed, 398 insertions(+), 359 deletions(-) create mode 100644 packages/google-cloud-bigquery/.kokoro/samples/python3.13/common.cfg create mode 100644 packages/google-cloud-bigquery/.kokoro/samples/python3.13/continuous.cfg create mode 100644 packages/google-cloud-bigquery/.kokoro/samples/python3.13/periodic-head.cfg create mode 100644 packages/google-cloud-bigquery/.kokoro/samples/python3.13/periodic.cfg create mode 100644 packages/google-cloud-bigquery/.kokoro/samples/python3.13/presubmit.cfg diff --git a/packages/google-cloud-bigquery/.github/.OwlBot.lock.yaml b/packages/google-cloud-bigquery/.github/.OwlBot.lock.yaml index 597e0c3261ca..7672b49b6307 100644 --- a/packages/google-cloud-bigquery/.github/.OwlBot.lock.yaml +++ b/packages/google-cloud-bigquery/.github/.OwlBot.lock.yaml @@ -13,5 +13,5 @@ # limitations under the License. docker: image: gcr.io/cloud-devrel-public-resources/owlbot-python:latest - digest: sha256:e8dcfd7cbfd8beac3a3ff8d3f3185287ea0625d859168cc80faccfc9a7a00455 -# created: 2024-09-16T21:04:09.091105552Z + digest: sha256:5cddfe2fb5019bbf78335bc55f15bc13e18354a56b3ff46e1834f8e540807f05 +# created: 2024-10-31T01:41:07.349286254Z diff --git a/packages/google-cloud-bigquery/.github/release-trigger.yml b/packages/google-cloud-bigquery/.github/release-trigger.yml index d4ca94189e16..4bb79e58eadf 100644 --- a/packages/google-cloud-bigquery/.github/release-trigger.yml +++ b/packages/google-cloud-bigquery/.github/release-trigger.yml @@ -1 +1,2 @@ enabled: true +multiScmName: diff --git a/packages/google-cloud-bigquery/.kokoro/docker/docs/requirements.txt b/packages/google-cloud-bigquery/.kokoro/docker/docs/requirements.txt index 7129c7715594..66eacc82f041 100644 --- a/packages/google-cloud-bigquery/.kokoro/docker/docs/requirements.txt +++ b/packages/google-cloud-bigquery/.kokoro/docker/docs/requirements.txt @@ -4,39 +4,39 @@ # # pip-compile --allow-unsafe --generate-hashes requirements.in # -argcomplete==3.4.0 \ - --hash=sha256:69a79e083a716173e5532e0fa3bef45f793f4e61096cf52b5a42c0211c8b8aa5 \ - --hash=sha256:c2abcdfe1be8ace47ba777d4fce319eb13bf8ad9dace8d085dcad6eded88057f +argcomplete==3.5.1 \ + --hash=sha256:1a1d148bdaa3e3b93454900163403df41448a248af01b6e849edc5ac08e6c363 \ + --hash=sha256:eb1ee355aa2557bd3d0145de7b06b2a45b0ce461e1e7813f5d066039ab4177b4 # via nox colorlog==6.8.2 \ --hash=sha256:3e3e079a41feb5a1b64f978b5ea4f46040a94f11f0e8bbb8261e3dbbeca64d44 \ --hash=sha256:4dcbb62368e2800cb3c5abd348da7e53f6c362dda502ec27c560b2e58a66bd33 # via nox -distlib==0.3.8 \ - --hash=sha256:034db59a0b96f8ca18035f36290806a9a6e6bd9d1ff91e45a7f172eb17e51784 \ - --hash=sha256:1530ea13e350031b6312d8580ddb6b27a104275a31106523b8f123787f494f64 +distlib==0.3.9 \ + --hash=sha256:47f8c22fd27c27e25a65601af709b38e4f0a45ea4fc2e710f65755fa8caaaf87 \ + --hash=sha256:a60f20dea646b8a33f3e7772f74dc0b2d0772d2837ee1342a00645c81edf9403 # via virtualenv -filelock==3.15.4 \ - --hash=sha256:2207938cbc1844345cb01a5a95524dae30f0ce089eba5b00378295a17e3e90cb \ - --hash=sha256:6ca1fffae96225dab4c6eaf1c4f4f28cd2568d3ec2a44e15a08520504de468e7 +filelock==3.16.1 \ + --hash=sha256:2082e5703d51fbf98ea75855d9d5527e33d8ff23099bec374a134febee6946b0 \ + --hash=sha256:c249fbfcd5db47e5e2d6d62198e565475ee65e4831e2561c8e313fa7eb961435 # via virtualenv -nox==2024.4.15 \ - --hash=sha256:6492236efa15a460ecb98e7b67562a28b70da006ab0be164e8821177577c0565 \ - --hash=sha256:ecf6700199cdfa9e5ea0a41ff5e6ef4641d09508eda6edb89d9987864115817f +nox==2024.10.9 \ + --hash=sha256:1d36f309a0a2a853e9bccb76bbef6bb118ba92fa92674d15604ca99adeb29eab \ + --hash=sha256:7aa9dc8d1c27e9f45ab046ffd1c3b2c4f7c91755304769df231308849ebded95 # via -r requirements.in packaging==24.1 \ --hash=sha256:026ed72c8ed3fcce5bf8950572258698927fd1dbda10a5e981cdf0ac37f4f002 \ --hash=sha256:5b8f2217dbdbd2f7f384c41c628544e6d52f2d0f53c6d0c3ea61aa5d1d7ff124 # via nox -platformdirs==4.2.2 \ - --hash=sha256:2d7a1657e36a80ea911db832a8a6ece5ee53d8de21edd5cc5879af6530b1bfee \ - --hash=sha256:38b7b51f512eed9e84a22788b4bce1de17c0adb134d6becb09836e37d8654cd3 +platformdirs==4.3.6 \ + --hash=sha256:357fb2acbc885b0419afd3ce3ed34564c13c9b95c89360cd9563f73aa5e2b907 \ + --hash=sha256:73e575e1408ab8103900836b97580d5307456908a03e92031bab39e4554cc3fb # via virtualenv -tomli==2.0.1 \ - --hash=sha256:939de3e7a6161af0c887ef91b7d41a53e7c5a1ca976325f429cb46ea9bc30ecc \ - --hash=sha256:de526c12914f0c550d15924c62d72abc48d6fe7364aa87328337a31007fe8a4f +tomli==2.0.2 \ + --hash=sha256:2ebe24485c53d303f690b0ec092806a085f07af5a5aa1464f3931eec36caaa38 \ + --hash=sha256:d46d457a85337051c36524bc5349dd91b1877838e2979ac5ced3e710ed8a60ed # via nox -virtualenv==20.26.3 \ - --hash=sha256:4c43a2a236279d9ea36a0d76f98d84bd6ca94ac4e0f4a3b9d46d05e10fea542a \ - --hash=sha256:8cc4a31139e796e9a7de2cd5cf2489de1217193116a8fd42328f1bd65f434589 +virtualenv==20.26.6 \ + --hash=sha256:280aede09a2a5c317e409a00102e7077c6432c5a38f0ef938e643805a7ad2c48 \ + --hash=sha256:7345cc5b25405607a624d8418154577459c3e0277f5466dd79c49d5e492995f2 # via nox diff --git a/packages/google-cloud-bigquery/.kokoro/docs/common.cfg b/packages/google-cloud-bigquery/.kokoro/docs/common.cfg index 41b86fc29f15..76ae5f13bec1 100644 --- a/packages/google-cloud-bigquery/.kokoro/docs/common.cfg +++ b/packages/google-cloud-bigquery/.kokoro/docs/common.cfg @@ -63,4 +63,4 @@ before_action { keyname: "docuploader_service_account" } } -} \ No newline at end of file +} diff --git a/packages/google-cloud-bigquery/.kokoro/release.sh b/packages/google-cloud-bigquery/.kokoro/release.sh index 453d6f702c28..65deb5ed31c1 100755 --- a/packages/google-cloud-bigquery/.kokoro/release.sh +++ b/packages/google-cloud-bigquery/.kokoro/release.sh @@ -23,7 +23,7 @@ python3 -m releasetool publish-reporter-script > /tmp/publisher-script; source / export PYTHONUNBUFFERED=1 # Move into the package, build the distribution and upload. -TWINE_PASSWORD=$(cat "${KOKORO_KEYSTORE_DIR}/73713_google-cloud-pypi-token-keystore-2") +TWINE_PASSWORD=$(cat "${KOKORO_KEYSTORE_DIR}/73713_google-cloud-pypi-token-keystore-3") cd github/python-bigquery python3 setup.py sdist bdist_wheel twine upload --username __token__ --password "${TWINE_PASSWORD}" dist/* diff --git a/packages/google-cloud-bigquery/.kokoro/release/common.cfg b/packages/google-cloud-bigquery/.kokoro/release/common.cfg index 43b5a1f2785e..6f57163f5036 100644 --- a/packages/google-cloud-bigquery/.kokoro/release/common.cfg +++ b/packages/google-cloud-bigquery/.kokoro/release/common.cfg @@ -28,17 +28,11 @@ before_action { fetch_keystore { keystore_resource { keystore_config_id: 73713 - keyname: "google-cloud-pypi-token-keystore-2" + keyname: "google-cloud-pypi-token-keystore-3" } } } -# Tokens needed to report release status back to GitHub -env_vars: { - key: "SECRET_MANAGER_KEYS" - value: "releasetool-publish-reporter-app,releasetool-publish-reporter-googleapis-installation,releasetool-publish-reporter-pem" -} - # Store the packages we uploaded to PyPI. That way, we have a record of exactly # what we published, which we can use to generate SBOMs and attestations. action { diff --git a/packages/google-cloud-bigquery/.kokoro/requirements.txt b/packages/google-cloud-bigquery/.kokoro/requirements.txt index 9622baf0ba38..006d8ef931bf 100644 --- a/packages/google-cloud-bigquery/.kokoro/requirements.txt +++ b/packages/google-cloud-bigquery/.kokoro/requirements.txt @@ -4,79 +4,94 @@ # # pip-compile --allow-unsafe --generate-hashes requirements.in # -argcomplete==3.4.0 \ - --hash=sha256:69a79e083a716173e5532e0fa3bef45f793f4e61096cf52b5a42c0211c8b8aa5 \ - --hash=sha256:c2abcdfe1be8ace47ba777d4fce319eb13bf8ad9dace8d085dcad6eded88057f +argcomplete==3.5.1 \ + --hash=sha256:1a1d148bdaa3e3b93454900163403df41448a248af01b6e849edc5ac08e6c363 \ + --hash=sha256:eb1ee355aa2557bd3d0145de7b06b2a45b0ce461e1e7813f5d066039ab4177b4 # via nox -attrs==23.2.0 \ - --hash=sha256:935dc3b529c262f6cf76e50877d35a4bd3c1de194fd41f47a2b7ae8f19971f30 \ - --hash=sha256:99b87a485a5820b23b879f04c2305b44b951b502fd64be915879d77a7e8fc6f1 +attrs==24.2.0 \ + --hash=sha256:5cfb1b9148b5b086569baec03f20d7b6bf3bcacc9a42bebf87ffaaca362f6346 \ + --hash=sha256:81921eb96de3191c8258c199618104dd27ac608d9366f5e35d011eae1867ede2 # via gcp-releasetool backports-tarfile==1.2.0 \ --hash=sha256:77e284d754527b01fb1e6fa8a1afe577858ebe4e9dad8919e34c862cb399bc34 \ --hash=sha256:d75e02c268746e1b8144c278978b6e98e85de6ad16f8e4b0844a154557eca991 # via jaraco-context -cachetools==5.3.3 \ - --hash=sha256:0abad1021d3f8325b2fc1d2e9c8b9c9d57b04c3932657a72465447332c24d945 \ - --hash=sha256:ba29e2dfa0b8b556606f097407ed1aa62080ee108ab0dc5ec9d6a723a007d105 +cachetools==5.5.0 \ + --hash=sha256:02134e8439cdc2ffb62023ce1debca2944c3f289d66bb17ead3ab3dede74b292 \ + --hash=sha256:2cc24fb4cbe39633fb7badd9db9ca6295d766d9c2995f245725a46715d050f2a # via google-auth -certifi==2024.7.4 \ - --hash=sha256:5a1e7645bc0ec61a09e26c36f6106dd4cf40c6db3a1fb6352b0244e7fb057c7b \ - --hash=sha256:c198e21b1289c2ab85ee4e67bb4b4ef3ead0892059901a8d5b622f24a1101e90 +certifi==2024.8.30 \ + --hash=sha256:922820b53db7a7257ffbda3f597266d435245903d80737e34f8a45ff3e3230d8 \ + --hash=sha256:bec941d2aa8195e248a60b31ff9f0558284cf01a52591ceda73ea9afffd69fd9 # via requests -cffi==1.16.0 \ - --hash=sha256:0c9ef6ff37e974b73c25eecc13952c55bceed9112be2d9d938ded8e856138bcc \ - --hash=sha256:131fd094d1065b19540c3d72594260f118b231090295d8c34e19a7bbcf2e860a \ - --hash=sha256:1b8ebc27c014c59692bb2664c7d13ce7a6e9a629be20e54e7271fa696ff2b417 \ - --hash=sha256:2c56b361916f390cd758a57f2e16233eb4f64bcbeee88a4881ea90fca14dc6ab \ - --hash=sha256:2d92b25dbf6cae33f65005baf472d2c245c050b1ce709cc4588cdcdd5495b520 \ - --hash=sha256:31d13b0f99e0836b7ff893d37af07366ebc90b678b6664c955b54561fc36ef36 \ - --hash=sha256:32c68ef735dbe5857c810328cb2481e24722a59a2003018885514d4c09af9743 \ - --hash=sha256:3686dffb02459559c74dd3d81748269ffb0eb027c39a6fc99502de37d501faa8 \ - --hash=sha256:582215a0e9adbe0e379761260553ba11c58943e4bbe9c36430c4ca6ac74b15ed \ - --hash=sha256:5b50bf3f55561dac5438f8e70bfcdfd74543fd60df5fa5f62d94e5867deca684 \ - --hash=sha256:5bf44d66cdf9e893637896c7faa22298baebcd18d1ddb6d2626a6e39793a1d56 \ - --hash=sha256:6602bc8dc6f3a9e02b6c22c4fc1e47aa50f8f8e6d3f78a5e16ac33ef5fefa324 \ - --hash=sha256:673739cb539f8cdaa07d92d02efa93c9ccf87e345b9a0b556e3ecc666718468d \ - --hash=sha256:68678abf380b42ce21a5f2abde8efee05c114c2fdb2e9eef2efdb0257fba1235 \ - --hash=sha256:68e7c44931cc171c54ccb702482e9fc723192e88d25a0e133edd7aff8fcd1f6e \ - --hash=sha256:6b3d6606d369fc1da4fd8c357d026317fbb9c9b75d36dc16e90e84c26854b088 \ - --hash=sha256:748dcd1e3d3d7cd5443ef03ce8685043294ad6bd7c02a38d1bd367cfd968e000 \ - --hash=sha256:7651c50c8c5ef7bdb41108b7b8c5a83013bfaa8a935590c5d74627c047a583c7 \ - --hash=sha256:7b78010e7b97fef4bee1e896df8a4bbb6712b7f05b7ef630f9d1da00f6444d2e \ - --hash=sha256:7e61e3e4fa664a8588aa25c883eab612a188c725755afff6289454d6362b9673 \ - --hash=sha256:80876338e19c951fdfed6198e70bc88f1c9758b94578d5a7c4c91a87af3cf31c \ - --hash=sha256:8895613bcc094d4a1b2dbe179d88d7fb4a15cee43c052e8885783fac397d91fe \ - --hash=sha256:88e2b3c14bdb32e440be531ade29d3c50a1a59cd4e51b1dd8b0865c54ea5d2e2 \ - --hash=sha256:8f8e709127c6c77446a8c0a8c8bf3c8ee706a06cd44b1e827c3e6a2ee6b8c098 \ - --hash=sha256:9cb4a35b3642fc5c005a6755a5d17c6c8b6bcb6981baf81cea8bfbc8903e8ba8 \ - --hash=sha256:9f90389693731ff1f659e55c7d1640e2ec43ff725cc61b04b2f9c6d8d017df6a \ - --hash=sha256:a09582f178759ee8128d9270cd1344154fd473bb77d94ce0aeb2a93ebf0feaf0 \ - --hash=sha256:a6a14b17d7e17fa0d207ac08642c8820f84f25ce17a442fd15e27ea18d67c59b \ - --hash=sha256:a72e8961a86d19bdb45851d8f1f08b041ea37d2bd8d4fd19903bc3083d80c896 \ - --hash=sha256:abd808f9c129ba2beda4cfc53bde801e5bcf9d6e0f22f095e45327c038bfe68e \ - --hash=sha256:ac0f5edd2360eea2f1daa9e26a41db02dd4b0451b48f7c318e217ee092a213e9 \ - --hash=sha256:b29ebffcf550f9da55bec9e02ad430c992a87e5f512cd63388abb76f1036d8d2 \ - --hash=sha256:b2ca4e77f9f47c55c194982e10f058db063937845bb2b7a86c84a6cfe0aefa8b \ - --hash=sha256:b7be2d771cdba2942e13215c4e340bfd76398e9227ad10402a8767ab1865d2e6 \ - --hash=sha256:b84834d0cf97e7d27dd5b7f3aca7b6e9263c56308ab9dc8aae9784abb774d404 \ - --hash=sha256:b86851a328eedc692acf81fb05444bdf1891747c25af7529e39ddafaf68a4f3f \ - --hash=sha256:bcb3ef43e58665bbda2fb198698fcae6776483e0c4a631aa5647806c25e02cc0 \ - --hash=sha256:c0f31130ebc2d37cdd8e44605fb5fa7ad59049298b3f745c74fa74c62fbfcfc4 \ - --hash=sha256:c6a164aa47843fb1b01e941d385aab7215563bb8816d80ff3a363a9f8448a8dc \ - --hash=sha256:d8a9d3ebe49f084ad71f9269834ceccbf398253c9fac910c4fd7053ff1386936 \ - --hash=sha256:db8e577c19c0fda0beb7e0d4e09e0ba74b1e4c092e0e40bfa12fe05b6f6d75ba \ - --hash=sha256:dc9b18bf40cc75f66f40a7379f6a9513244fe33c0e8aa72e2d56b0196a7ef872 \ - --hash=sha256:e09f3ff613345df5e8c3667da1d918f9149bd623cd9070c983c013792a9a62eb \ - --hash=sha256:e4108df7fe9b707191e55f33efbcb2d81928e10cea45527879a4749cbe472614 \ - --hash=sha256:e6024675e67af929088fda399b2094574609396b1decb609c55fa58b028a32a1 \ - --hash=sha256:e70f54f1796669ef691ca07d046cd81a29cb4deb1e5f942003f401c0c4a2695d \ - --hash=sha256:e715596e683d2ce000574bae5d07bd522c781a822866c20495e52520564f0969 \ - --hash=sha256:e760191dd42581e023a68b758769e2da259b5d52e3103c6060ddc02c9edb8d7b \ - --hash=sha256:ed86a35631f7bfbb28e108dd96773b9d5a6ce4811cf6ea468bb6a359b256b1e4 \ - --hash=sha256:ee07e47c12890ef248766a6e55bd38ebfb2bb8edd4142d56db91b21ea68b7627 \ - --hash=sha256:fa3a0128b152627161ce47201262d3140edb5a5c3da88d73a1b790a959126956 \ - --hash=sha256:fcc8eb6d5902bb1cf6dc4f187ee3ea80a1eba0a89aba40a5cb20a5087d961357 +cffi==1.17.1 \ + --hash=sha256:045d61c734659cc045141be4bae381a41d89b741f795af1dd018bfb532fd0df8 \ + --hash=sha256:0984a4925a435b1da406122d4d7968dd861c1385afe3b45ba82b750f229811e2 \ + --hash=sha256:0e2b1fac190ae3ebfe37b979cc1ce69c81f4e4fe5746bb401dca63a9062cdaf1 \ + --hash=sha256:0f048dcf80db46f0098ccac01132761580d28e28bc0f78ae0d58048063317e15 \ + --hash=sha256:1257bdabf294dceb59f5e70c64a3e2f462c30c7ad68092d01bbbfb1c16b1ba36 \ + --hash=sha256:1c39c6016c32bc48dd54561950ebd6836e1670f2ae46128f67cf49e789c52824 \ + --hash=sha256:1d599671f396c4723d016dbddb72fe8e0397082b0a77a4fab8028923bec050e8 \ + --hash=sha256:28b16024becceed8c6dfbc75629e27788d8a3f9030691a1dbf9821a128b22c36 \ + --hash=sha256:2bb1a08b8008b281856e5971307cc386a8e9c5b625ac297e853d36da6efe9c17 \ + --hash=sha256:30c5e0cb5ae493c04c8b42916e52ca38079f1b235c2f8ae5f4527b963c401caf \ + --hash=sha256:31000ec67d4221a71bd3f67df918b1f88f676f1c3b535a7eb473255fdc0b83fc \ + --hash=sha256:386c8bf53c502fff58903061338ce4f4950cbdcb23e2902d86c0f722b786bbe3 \ + --hash=sha256:3edc8d958eb099c634dace3c7e16560ae474aa3803a5df240542b305d14e14ed \ + --hash=sha256:45398b671ac6d70e67da8e4224a065cec6a93541bb7aebe1b198a61b58c7b702 \ + --hash=sha256:46bf43160c1a35f7ec506d254e5c890f3c03648a4dbac12d624e4490a7046cd1 \ + --hash=sha256:4ceb10419a9adf4460ea14cfd6bc43d08701f0835e979bf821052f1805850fe8 \ + --hash=sha256:51392eae71afec0d0c8fb1a53b204dbb3bcabcb3c9b807eedf3e1e6ccf2de903 \ + --hash=sha256:5da5719280082ac6bd9aa7becb3938dc9f9cbd57fac7d2871717b1feb0902ab6 \ + --hash=sha256:610faea79c43e44c71e1ec53a554553fa22321b65fae24889706c0a84d4ad86d \ + --hash=sha256:636062ea65bd0195bc012fea9321aca499c0504409f413dc88af450b57ffd03b \ + --hash=sha256:6883e737d7d9e4899a8a695e00ec36bd4e5e4f18fabe0aca0efe0a4b44cdb13e \ + --hash=sha256:6b8b4a92e1c65048ff98cfe1f735ef8f1ceb72e3d5f0c25fdb12087a23da22be \ + --hash=sha256:6f17be4345073b0a7b8ea599688f692ac3ef23ce28e5df79c04de519dbc4912c \ + --hash=sha256:706510fe141c86a69c8ddc029c7910003a17353970cff3b904ff0686a5927683 \ + --hash=sha256:72e72408cad3d5419375fc87d289076ee319835bdfa2caad331e377589aebba9 \ + --hash=sha256:733e99bc2df47476e3848417c5a4540522f234dfd4ef3ab7fafdf555b082ec0c \ + --hash=sha256:7596d6620d3fa590f677e9ee430df2958d2d6d6de2feeae5b20e82c00b76fbf8 \ + --hash=sha256:78122be759c3f8a014ce010908ae03364d00a1f81ab5c7f4a7a5120607ea56e1 \ + --hash=sha256:805b4371bf7197c329fcb3ead37e710d1bca9da5d583f5073b799d5c5bd1eee4 \ + --hash=sha256:85a950a4ac9c359340d5963966e3e0a94a676bd6245a4b55bc43949eee26a655 \ + --hash=sha256:8f2cdc858323644ab277e9bb925ad72ae0e67f69e804f4898c070998d50b1a67 \ + --hash=sha256:9755e4345d1ec879e3849e62222a18c7174d65a6a92d5b346b1863912168b595 \ + --hash=sha256:98e3969bcff97cae1b2def8ba499ea3d6f31ddfdb7635374834cf89a1a08ecf0 \ + --hash=sha256:a08d7e755f8ed21095a310a693525137cfe756ce62d066e53f502a83dc550f65 \ + --hash=sha256:a1ed2dd2972641495a3ec98445e09766f077aee98a1c896dcb4ad0d303628e41 \ + --hash=sha256:a24ed04c8ffd54b0729c07cee15a81d964e6fee0e3d4d342a27b020d22959dc6 \ + --hash=sha256:a45e3c6913c5b87b3ff120dcdc03f6131fa0065027d0ed7ee6190736a74cd401 \ + --hash=sha256:a9b15d491f3ad5d692e11f6b71f7857e7835eb677955c00cc0aefcd0669adaf6 \ + --hash=sha256:ad9413ccdeda48c5afdae7e4fa2192157e991ff761e7ab8fdd8926f40b160cc3 \ + --hash=sha256:b2ab587605f4ba0bf81dc0cb08a41bd1c0a5906bd59243d56bad7668a6fc6c16 \ + --hash=sha256:b62ce867176a75d03a665bad002af8e6d54644fad99a3c70905c543130e39d93 \ + --hash=sha256:c03e868a0b3bc35839ba98e74211ed2b05d2119be4e8a0f224fba9384f1fe02e \ + --hash=sha256:c59d6e989d07460165cc5ad3c61f9fd8f1b4796eacbd81cee78957842b834af4 \ + --hash=sha256:c7eac2ef9b63c79431bc4b25f1cd649d7f061a28808cbc6c47b534bd789ef964 \ + --hash=sha256:c9c3d058ebabb74db66e431095118094d06abf53284d9c81f27300d0e0d8bc7c \ + --hash=sha256:ca74b8dbe6e8e8263c0ffd60277de77dcee6c837a3d0881d8c1ead7268c9e576 \ + --hash=sha256:caaf0640ef5f5517f49bc275eca1406b0ffa6aa184892812030f04c2abf589a0 \ + --hash=sha256:cdf5ce3acdfd1661132f2a9c19cac174758dc2352bfe37d98aa7512c6b7178b3 \ + --hash=sha256:d016c76bdd850f3c626af19b0542c9677ba156e4ee4fccfdd7848803533ef662 \ + --hash=sha256:d01b12eeeb4427d3110de311e1774046ad344f5b1a7403101878976ecd7a10f3 \ + --hash=sha256:d63afe322132c194cf832bfec0dc69a99fb9bb6bbd550f161a49e9e855cc78ff \ + --hash=sha256:da95af8214998d77a98cc14e3a3bd00aa191526343078b530ceb0bd710fb48a5 \ + --hash=sha256:dd398dbc6773384a17fe0d3e7eeb8d1a21c2200473ee6806bb5e6a8e62bb73dd \ + --hash=sha256:de2ea4b5833625383e464549fec1bc395c1bdeeb5f25c4a3a82b5a8c756ec22f \ + --hash=sha256:de55b766c7aa2e2a3092c51e0483d700341182f08e67c63630d5b6f200bb28e5 \ + --hash=sha256:df8b1c11f177bc2313ec4b2d46baec87a5f3e71fc8b45dab2ee7cae86d9aba14 \ + --hash=sha256:e03eab0a8677fa80d646b5ddece1cbeaf556c313dcfac435ba11f107ba117b5d \ + --hash=sha256:e221cf152cff04059d011ee126477f0d9588303eb57e88923578ace7baad17f9 \ + --hash=sha256:e31ae45bc2e29f6b2abd0de1cc3b9d5205aa847cafaecb8af1476a609a2f6eb7 \ + --hash=sha256:edae79245293e15384b51f88b00613ba9f7198016a5948b5dddf4917d4d26382 \ + --hash=sha256:f1e22e8c4419538cb197e4dd60acc919d7696e5ef98ee4da4e01d3f8cfa4cc5a \ + --hash=sha256:f3a2b4222ce6b60e2e8b337bb9596923045681d71e5a082783484d845390938e \ + --hash=sha256:f6a16c31041f09ead72d69f583767292f750d24913dadacf5756b966aacb3f1a \ + --hash=sha256:f75c7ab1f9e4aca5414ed4d8e5c0e303a34f4421f8a0d47a4d019ceff0ab6af4 \ + --hash=sha256:f79fc4fc25f1c8698ff97788206bb3c2598949bfe0fef03d299eb1b5356ada99 \ + --hash=sha256:f7f5baafcc48261359e14bcd6d9bff6d4b28d9103847c9e136694cb0501aef87 \ + --hash=sha256:fc48c783f9c87e60831201f2cce7f3b2e4846bf4d8728eabe54d60700b318a0b # via cryptography charset-normalizer==2.1.1 \ --hash=sha256:5a3d016c7c547f69d6f81fb0db9449ce888b418b5b9952cc5e6e66843e9dd845 \ @@ -97,72 +112,67 @@ colorlog==6.8.2 \ # via # gcp-docuploader # nox -cryptography==42.0.8 \ - --hash=sha256:013629ae70b40af70c9a7a5db40abe5d9054e6f4380e50ce769947b73bf3caad \ - --hash=sha256:2346b911eb349ab547076f47f2e035fc8ff2c02380a7cbbf8d87114fa0f1c583 \ - --hash=sha256:2f66d9cd9147ee495a8374a45ca445819f8929a3efcd2e3df6428e46c3cbb10b \ - --hash=sha256:2f88d197e66c65be5e42cd72e5c18afbfae3f741742070e3019ac8f4ac57262c \ - --hash=sha256:31f721658a29331f895a5a54e7e82075554ccfb8b163a18719d342f5ffe5ecb1 \ - --hash=sha256:343728aac38decfdeecf55ecab3264b015be68fc2816ca800db649607aeee648 \ - --hash=sha256:5226d5d21ab681f432a9c1cf8b658c0cb02533eece706b155e5fbd8a0cdd3949 \ - --hash=sha256:57080dee41209e556a9a4ce60d229244f7a66ef52750f813bfbe18959770cfba \ - --hash=sha256:5a94eccb2a81a309806027e1670a358b99b8fe8bfe9f8d329f27d72c094dde8c \ - --hash=sha256:6b7c4f03ce01afd3b76cf69a5455caa9cfa3de8c8f493e0d3ab7d20611c8dae9 \ - --hash=sha256:7016f837e15b0a1c119d27ecd89b3515f01f90a8615ed5e9427e30d9cdbfed3d \ - --hash=sha256:81884c4d096c272f00aeb1f11cf62ccd39763581645b0812e99a91505fa48e0c \ - --hash=sha256:81d8a521705787afe7a18d5bfb47ea9d9cc068206270aad0b96a725022e18d2e \ - --hash=sha256:8d09d05439ce7baa8e9e95b07ec5b6c886f548deb7e0f69ef25f64b3bce842f2 \ - --hash=sha256:961e61cefdcb06e0c6d7e3a1b22ebe8b996eb2bf50614e89384be54c48c6b63d \ - --hash=sha256:9c0c1716c8447ee7dbf08d6db2e5c41c688544c61074b54fc4564196f55c25a7 \ - --hash=sha256:a0608251135d0e03111152e41f0cc2392d1e74e35703960d4190b2e0f4ca9c70 \ - --hash=sha256:a0c5b2b0585b6af82d7e385f55a8bc568abff8923af147ee3c07bd8b42cda8b2 \ - --hash=sha256:ad803773e9df0b92e0a817d22fd8a3675493f690b96130a5e24f1b8fabbea9c7 \ - --hash=sha256:b297f90c5723d04bcc8265fc2a0f86d4ea2e0f7ab4b6994459548d3a6b992a14 \ - --hash=sha256:ba4f0a211697362e89ad822e667d8d340b4d8d55fae72cdd619389fb5912eefe \ - --hash=sha256:c4783183f7cb757b73b2ae9aed6599b96338eb957233c58ca8f49a49cc32fd5e \ - --hash=sha256:c9bb2ae11bfbab395bdd072985abde58ea9860ed84e59dbc0463a5d0159f5b71 \ - --hash=sha256:cafb92b2bc622cd1aa6a1dce4b93307792633f4c5fe1f46c6b97cf67073ec961 \ - --hash=sha256:d45b940883a03e19e944456a558b67a41160e367a719833c53de6911cabba2b7 \ - --hash=sha256:dc0fdf6787f37b1c6b08e6dfc892d9d068b5bdb671198c72072828b80bd5fe4c \ - --hash=sha256:dea567d1b0e8bc5764b9443858b673b734100c2871dc93163f58c46a97a83d28 \ - --hash=sha256:dec9b018df185f08483f294cae6ccac29e7a6e0678996587363dc352dc65c842 \ - --hash=sha256:e3ec3672626e1b9e55afd0df6d774ff0e953452886e06e0f1eb7eb0c832e8902 \ - --hash=sha256:e599b53fd95357d92304510fb7bda8523ed1f79ca98dce2f43c115950aa78801 \ - --hash=sha256:fa76fbb7596cc5839320000cdd5d0955313696d9511debab7ee7278fc8b5c84a \ - --hash=sha256:fff12c88a672ab9c9c1cf7b0c80e3ad9e2ebd9d828d955c126be4fd3e5578c9e +cryptography==43.0.1 \ + --hash=sha256:014f58110f53237ace6a408b5beb6c427b64e084eb451ef25a28308270086494 \ + --hash=sha256:1bbcce1a551e262dfbafb6e6252f1ae36a248e615ca44ba302df077a846a8806 \ + --hash=sha256:203e92a75716d8cfb491dc47c79e17d0d9207ccffcbcb35f598fbe463ae3444d \ + --hash=sha256:27e613d7077ac613e399270253259d9d53872aaf657471473ebfc9a52935c062 \ + --hash=sha256:2bd51274dcd59f09dd952afb696bf9c61a7a49dfc764c04dd33ef7a6b502a1e2 \ + --hash=sha256:38926c50cff6f533f8a2dae3d7f19541432610d114a70808f0926d5aaa7121e4 \ + --hash=sha256:511f4273808ab590912a93ddb4e3914dfd8a388fed883361b02dea3791f292e1 \ + --hash=sha256:58d4e9129985185a06d849aa6df265bdd5a74ca6e1b736a77959b498e0505b85 \ + --hash=sha256:5b43d1ea6b378b54a1dc99dd8a2b5be47658fe9a7ce0a58ff0b55f4b43ef2b84 \ + --hash=sha256:61ec41068b7b74268fa86e3e9e12b9f0c21fcf65434571dbb13d954bceb08042 \ + --hash=sha256:666ae11966643886c2987b3b721899d250855718d6d9ce41b521252a17985f4d \ + --hash=sha256:68aaecc4178e90719e95298515979814bda0cbada1256a4485414860bd7ab962 \ + --hash=sha256:7c05650fe8023c5ed0d46793d4b7d7e6cd9c04e68eabe5b0aeea836e37bdcec2 \ + --hash=sha256:80eda8b3e173f0f247f711eef62be51b599b5d425c429b5d4ca6a05e9e856baa \ + --hash=sha256:8385d98f6a3bf8bb2d65a73e17ed87a3ba84f6991c155691c51112075f9ffc5d \ + --hash=sha256:88cce104c36870d70c49c7c8fd22885875d950d9ee6ab54df2745f83ba0dc365 \ + --hash=sha256:9d3cdb25fa98afdd3d0892d132b8d7139e2c087da1712041f6b762e4f807cc96 \ + --hash=sha256:a575913fb06e05e6b4b814d7f7468c2c660e8bb16d8d5a1faf9b33ccc569dd47 \ + --hash=sha256:ac119bb76b9faa00f48128b7f5679e1d8d437365c5d26f1c2c3f0da4ce1b553d \ + --hash=sha256:c1332724be35d23a854994ff0b66530119500b6053d0bd3363265f7e5e77288d \ + --hash=sha256:d03a475165f3134f773d1388aeb19c2d25ba88b6a9733c5c590b9ff7bbfa2e0c \ + --hash=sha256:d75601ad10b059ec832e78823b348bfa1a59f6b8d545db3a24fd44362a1564cb \ + --hash=sha256:de41fd81a41e53267cb020bb3a7212861da53a7d39f863585d13ea11049cf277 \ + --hash=sha256:e710bf40870f4db63c3d7d929aa9e09e4e7ee219e703f949ec4073b4294f6172 \ + --hash=sha256:ea25acb556320250756e53f9e20a4177515f012c9eaea17eb7587a8c4d8ae034 \ + --hash=sha256:f98bf604c82c416bc829e490c700ca1553eafdf2912a91e23a79d97d9801372a \ + --hash=sha256:fba1007b3ef89946dbbb515aeeb41e30203b004f0b4b00e5e16078b518563289 # via # -r requirements.in # gcp-releasetool # secretstorage -distlib==0.3.8 \ - --hash=sha256:034db59a0b96f8ca18035f36290806a9a6e6bd9d1ff91e45a7f172eb17e51784 \ - --hash=sha256:1530ea13e350031b6312d8580ddb6b27a104275a31106523b8f123787f494f64 +distlib==0.3.9 \ + --hash=sha256:47f8c22fd27c27e25a65601af709b38e4f0a45ea4fc2e710f65755fa8caaaf87 \ + --hash=sha256:a60f20dea646b8a33f3e7772f74dc0b2d0772d2837ee1342a00645c81edf9403 # via virtualenv docutils==0.21.2 \ --hash=sha256:3a6b18732edf182daa3cd12775bbb338cf5691468f91eeeb109deff6ebfa986f \ --hash=sha256:dafca5b9e384f0e419294eb4d2ff9fa826435bf15f15b7bd45723e8ad76811b2 # via readme-renderer -filelock==3.15.4 \ - --hash=sha256:2207938cbc1844345cb01a5a95524dae30f0ce089eba5b00378295a17e3e90cb \ - --hash=sha256:6ca1fffae96225dab4c6eaf1c4f4f28cd2568d3ec2a44e15a08520504de468e7 +filelock==3.16.1 \ + --hash=sha256:2082e5703d51fbf98ea75855d9d5527e33d8ff23099bec374a134febee6946b0 \ + --hash=sha256:c249fbfcd5db47e5e2d6d62198e565475ee65e4831e2561c8e313fa7eb961435 # via virtualenv gcp-docuploader==0.6.5 \ --hash=sha256:30221d4ac3e5a2b9c69aa52fdbef68cc3f27d0e6d0d90e220fc024584b8d2318 \ --hash=sha256:b7458ef93f605b9d46a4bf3a8dc1755dad1f31d030c8679edf304e343b347eea # via -r requirements.in -gcp-releasetool==2.0.1 \ - --hash=sha256:34314a910c08e8911d9c965bd44f8f2185c4f556e737d719c33a41f6a610de96 \ - --hash=sha256:b0d5863c6a070702b10883d37c4bdfd74bf930fe417f36c0c965d3b7c779ae62 +gcp-releasetool==2.1.1 \ + --hash=sha256:25639269f4eae510094f9dbed9894977e1966933211eb155a451deebc3fc0b30 \ + --hash=sha256:845f4ded3d9bfe8cc7fdaad789e83f4ea014affa77785259a7ddac4b243e099e # via -r requirements.in -google-api-core==2.19.1 \ - --hash=sha256:f12a9b8309b5e21d92483bbd47ce2c445861ec7d269ef6784ecc0ea8c1fa6125 \ - --hash=sha256:f4695f1e3650b316a795108a76a1c416e6afb036199d1c1f1f110916df479ffd +google-api-core==2.21.0 \ + --hash=sha256:4a152fd11a9f774ea606388d423b68aa7e6d6a0ffe4c8266f74979613ec09f81 \ + --hash=sha256:6869eacb2a37720380ba5898312af79a4d30b8bca1548fb4093e0697dc4bdf5d # via # google-cloud-core # google-cloud-storage -google-auth==2.31.0 \ - --hash=sha256:042c4702efa9f7d3c48d3a69341c209381b125faa6dbf3ebe56bc7e40ae05c23 \ - --hash=sha256:87805c36970047247c8afe614d4e3af8eceafc1ebba0c679fe75ddd1d575e871 +google-auth==2.35.0 \ + --hash=sha256:25df55f327ef021de8be50bad0dfd4a916ad0de96da86cd05661c9297723ad3f \ + --hash=sha256:f4c64ed4e01e8e8b646ef34c018f8bf3338df0c8e37d8b3bba40e7f574a3278a # via # gcp-releasetool # google-api-core @@ -172,97 +182,56 @@ google-cloud-core==2.4.1 \ --hash=sha256:9b7749272a812bde58fff28868d0c5e2f585b82f37e09a1f6ed2d4d10f134073 \ --hash=sha256:a9e6a4422b9ac5c29f79a0ede9485473338e2ce78d91f2370c01e730eab22e61 # via google-cloud-storage -google-cloud-storage==2.17.0 \ - --hash=sha256:49378abff54ef656b52dca5ef0f2eba9aa83dc2b2c72c78714b03a1a95fe9388 \ - --hash=sha256:5b393bc766b7a3bc6f5407b9e665b2450d36282614b7945e570b3480a456d1e1 +google-cloud-storage==2.18.2 \ + --hash=sha256:97a4d45c368b7d401ed48c4fdfe86e1e1cb96401c9e199e419d289e2c0370166 \ + --hash=sha256:aaf7acd70cdad9f274d29332673fcab98708d0e1f4dceb5a5356aaef06af4d99 # via gcp-docuploader -google-crc32c==1.5.0 \ - --hash=sha256:024894d9d3cfbc5943f8f230e23950cd4906b2fe004c72e29b209420a1e6b05a \ - --hash=sha256:02c65b9817512edc6a4ae7c7e987fea799d2e0ee40c53ec573a692bee24de876 \ - --hash=sha256:02ebb8bf46c13e36998aeaad1de9b48f4caf545e91d14041270d9dca767b780c \ - --hash=sha256:07eb3c611ce363c51a933bf6bd7f8e3878a51d124acfc89452a75120bc436289 \ - --hash=sha256:1034d91442ead5a95b5aaef90dbfaca8633b0247d1e41621d1e9f9db88c36298 \ - --hash=sha256:116a7c3c616dd14a3de8c64a965828b197e5f2d121fedd2f8c5585c547e87b02 \ - --hash=sha256:19e0a019d2c4dcc5e598cd4a4bc7b008546b0358bd322537c74ad47a5386884f \ - --hash=sha256:1c7abdac90433b09bad6c43a43af253e688c9cfc1c86d332aed13f9a7c7f65e2 \ - --hash=sha256:1e986b206dae4476f41bcec1faa057851f3889503a70e1bdb2378d406223994a \ - --hash=sha256:272d3892a1e1a2dbc39cc5cde96834c236d5327e2122d3aaa19f6614531bb6eb \ - --hash=sha256:278d2ed7c16cfc075c91378c4f47924c0625f5fc84b2d50d921b18b7975bd210 \ - --hash=sha256:2ad40e31093a4af319dadf503b2467ccdc8f67c72e4bcba97f8c10cb078207b5 \ - --hash=sha256:2e920d506ec85eb4ba50cd4228c2bec05642894d4c73c59b3a2fe20346bd00ee \ - --hash=sha256:3359fc442a743e870f4588fcf5dcbc1bf929df1fad8fb9905cd94e5edb02e84c \ - --hash=sha256:37933ec6e693e51a5b07505bd05de57eee12f3e8c32b07da7e73669398e6630a \ - --hash=sha256:398af5e3ba9cf768787eef45c803ff9614cc3e22a5b2f7d7ae116df8b11e3314 \ - --hash=sha256:3b747a674c20a67343cb61d43fdd9207ce5da6a99f629c6e2541aa0e89215bcd \ - --hash=sha256:461665ff58895f508e2866824a47bdee72497b091c730071f2b7575d5762ab65 \ - --hash=sha256:4c6fdd4fccbec90cc8a01fc00773fcd5fa28db683c116ee3cb35cd5da9ef6c37 \ - --hash=sha256:5829b792bf5822fd0a6f6eb34c5f81dd074f01d570ed7f36aa101d6fc7a0a6e4 \ - --hash=sha256:596d1f98fc70232fcb6590c439f43b350cb762fb5d61ce7b0e9db4539654cc13 \ - --hash=sha256:5ae44e10a8e3407dbe138984f21e536583f2bba1be9491239f942c2464ac0894 \ - --hash=sha256:635f5d4dd18758a1fbd1049a8e8d2fee4ffed124462d837d1a02a0e009c3ab31 \ - --hash=sha256:64e52e2b3970bd891309c113b54cf0e4384762c934d5ae56e283f9a0afcd953e \ - --hash=sha256:66741ef4ee08ea0b2cc3c86916ab66b6aef03768525627fd6a1b34968b4e3709 \ - --hash=sha256:67b741654b851abafb7bc625b6d1cdd520a379074e64b6a128e3b688c3c04740 \ - --hash=sha256:6ac08d24c1f16bd2bf5eca8eaf8304812f44af5cfe5062006ec676e7e1d50afc \ - --hash=sha256:6f998db4e71b645350b9ac28a2167e6632c239963ca9da411523bb439c5c514d \ - --hash=sha256:72218785ce41b9cfd2fc1d6a017dc1ff7acfc4c17d01053265c41a2c0cc39b8c \ - --hash=sha256:74dea7751d98034887dbd821b7aae3e1d36eda111d6ca36c206c44478035709c \ - --hash=sha256:759ce4851a4bb15ecabae28f4d2e18983c244eddd767f560165563bf9aefbc8d \ - --hash=sha256:77e2fd3057c9d78e225fa0a2160f96b64a824de17840351b26825b0848022906 \ - --hash=sha256:7c074fece789b5034b9b1404a1f8208fc2d4c6ce9decdd16e8220c5a793e6f61 \ - --hash=sha256:7c42c70cd1d362284289c6273adda4c6af8039a8ae12dc451dcd61cdabb8ab57 \ - --hash=sha256:7f57f14606cd1dd0f0de396e1e53824c371e9544a822648cd76c034d209b559c \ - --hash=sha256:83c681c526a3439b5cf94f7420471705bbf96262f49a6fe546a6db5f687a3d4a \ - --hash=sha256:8485b340a6a9e76c62a7dce3c98e5f102c9219f4cfbf896a00cf48caf078d438 \ - --hash=sha256:84e6e8cd997930fc66d5bb4fde61e2b62ba19d62b7abd7a69920406f9ecca946 \ - --hash=sha256:89284716bc6a5a415d4eaa11b1726d2d60a0cd12aadf5439828353662ede9dd7 \ - --hash=sha256:8b87e1a59c38f275c0e3676fc2ab6d59eccecfd460be267ac360cc31f7bcde96 \ - --hash=sha256:8f24ed114432de109aa9fd317278518a5af2d31ac2ea6b952b2f7782b43da091 \ - --hash=sha256:98cb4d057f285bd80d8778ebc4fde6b4d509ac3f331758fb1528b733215443ae \ - --hash=sha256:998679bf62b7fb599d2878aa3ed06b9ce688b8974893e7223c60db155f26bd8d \ - --hash=sha256:9ba053c5f50430a3fcfd36f75aff9caeba0440b2d076afdb79a318d6ca245f88 \ - --hash=sha256:9c99616c853bb585301df6de07ca2cadad344fd1ada6d62bb30aec05219c45d2 \ - --hash=sha256:a1fd716e7a01f8e717490fbe2e431d2905ab8aa598b9b12f8d10abebb36b04dd \ - --hash=sha256:a2355cba1f4ad8b6988a4ca3feed5bff33f6af2d7f134852cf279c2aebfde541 \ - --hash=sha256:b1f8133c9a275df5613a451e73f36c2aea4fe13c5c8997e22cf355ebd7bd0728 \ - --hash=sha256:b8667b48e7a7ef66afba2c81e1094ef526388d35b873966d8a9a447974ed9178 \ - --hash=sha256:ba1eb1843304b1e5537e1fca632fa894d6f6deca8d6389636ee5b4797affb968 \ - --hash=sha256:be82c3c8cfb15b30f36768797a640e800513793d6ae1724aaaafe5bf86f8f346 \ - --hash=sha256:c02ec1c5856179f171e032a31d6f8bf84e5a75c45c33b2e20a3de353b266ebd8 \ - --hash=sha256:c672d99a345849301784604bfeaeba4db0c7aae50b95be04dd651fd2a7310b93 \ - --hash=sha256:c6c777a480337ac14f38564ac88ae82d4cd238bf293f0a22295b66eb89ffced7 \ - --hash=sha256:cae0274952c079886567f3f4f685bcaf5708f0a23a5f5216fdab71f81a6c0273 \ - --hash=sha256:cd67cf24a553339d5062eff51013780a00d6f97a39ca062781d06b3a73b15462 \ - --hash=sha256:d3515f198eaa2f0ed49f8819d5732d70698c3fa37384146079b3799b97667a94 \ - --hash=sha256:d5280312b9af0976231f9e317c20e4a61cd2f9629b7bfea6a693d1878a264ebd \ - --hash=sha256:de06adc872bcd8c2a4e0dc51250e9e65ef2ca91be023b9d13ebd67c2ba552e1e \ - --hash=sha256:e1674e4307fa3024fc897ca774e9c7562c957af85df55efe2988ed9056dc4e57 \ - --hash=sha256:e2096eddb4e7c7bdae4bd69ad364e55e07b8316653234a56552d9c988bd2d61b \ - --hash=sha256:e560628513ed34759456a416bf86b54b2476c59144a9138165c9a1575801d0d9 \ - --hash=sha256:edfedb64740750e1a3b16152620220f51d58ff1b4abceb339ca92e934775c27a \ - --hash=sha256:f13cae8cc389a440def0c8c52057f37359014ccbc9dc1f0827936bcd367c6100 \ - --hash=sha256:f314013e7dcd5cf45ab1945d92e713eec788166262ae8deb2cfacd53def27325 \ - --hash=sha256:f583edb943cf2e09c60441b910d6a20b4d9d626c75a36c8fcac01a6c96c01183 \ - --hash=sha256:fd8536e902db7e365f49e7d9029283403974ccf29b13fc7028b97e2295b33556 \ - --hash=sha256:fe70e325aa68fa4b5edf7d1a4b6f691eb04bbccac0ace68e34820d283b5f80d4 +google-crc32c==1.6.0 \ + --hash=sha256:05e2d8c9a2f853ff116db9706b4a27350587f341eda835f46db3c0a8c8ce2f24 \ + --hash=sha256:18e311c64008f1f1379158158bb3f0c8d72635b9eb4f9545f8cf990c5668e59d \ + --hash=sha256:236c87a46cdf06384f614e9092b82c05f81bd34b80248021f729396a78e55d7e \ + --hash=sha256:35834855408429cecf495cac67ccbab802de269e948e27478b1e47dfb6465e57 \ + --hash=sha256:386122eeaaa76951a8196310432c5b0ef3b53590ef4c317ec7588ec554fec5d2 \ + --hash=sha256:40b05ab32a5067525670880eb5d169529089a26fe35dce8891127aeddc1950e8 \ + --hash=sha256:48abd62ca76a2cbe034542ed1b6aee851b6f28aaca4e6551b5599b6f3ef175cc \ + --hash=sha256:50cf2a96da226dcbff8671233ecf37bf6e95de98b2a2ebadbfdf455e6d05df42 \ + --hash=sha256:51c4f54dd8c6dfeb58d1df5e4f7f97df8abf17a36626a217f169893d1d7f3e9f \ + --hash=sha256:5bcc90b34df28a4b38653c36bb5ada35671ad105c99cfe915fb5bed7ad6924aa \ + --hash=sha256:62f6d4a29fea082ac4a3c9be5e415218255cf11684ac6ef5488eea0c9132689b \ + --hash=sha256:6eceb6ad197656a1ff49ebfbbfa870678c75be4344feb35ac1edf694309413dc \ + --hash=sha256:7aec8e88a3583515f9e0957fe4f5f6d8d4997e36d0f61624e70469771584c760 \ + --hash=sha256:91ca8145b060679ec9176e6de4f89b07363d6805bd4760631ef254905503598d \ + --hash=sha256:a184243544811e4a50d345838a883733461e67578959ac59964e43cca2c791e7 \ + --hash=sha256:a9e4b426c3702f3cd23b933436487eb34e01e00327fac20c9aebb68ccf34117d \ + --hash=sha256:bb0966e1c50d0ef5bc743312cc730b533491d60585a9a08f897274e57c3f70e0 \ + --hash=sha256:bb8b3c75bd157010459b15222c3fd30577042a7060e29d42dabce449c087f2b3 \ + --hash=sha256:bd5e7d2445d1a958c266bfa5d04c39932dc54093fa391736dbfdb0f1929c1fb3 \ + --hash=sha256:c87d98c7c4a69066fd31701c4e10d178a648c2cac3452e62c6b24dc51f9fcc00 \ + --hash=sha256:d2952396dc604544ea7476b33fe87faedc24d666fb0c2d5ac971a2b9576ab871 \ + --hash=sha256:d8797406499f28b5ef791f339594b0b5fdedf54e203b5066675c406ba69d705c \ + --hash=sha256:d9e9913f7bd69e093b81da4535ce27af842e7bf371cde42d1ae9e9bd382dc0e9 \ + --hash=sha256:e2806553238cd076f0a55bddab37a532b53580e699ed8e5606d0de1f856b5205 \ + --hash=sha256:ebab974b1687509e5c973b5c4b8b146683e101e102e17a86bd196ecaa4d099fc \ + --hash=sha256:ed767bf4ba90104c1216b68111613f0d5926fb3780660ea1198fc469af410e9d \ + --hash=sha256:f7a1fc29803712f80879b0806cb83ab24ce62fc8daf0569f2204a0cfd7f68ed4 # via # google-cloud-storage # google-resumable-media -google-resumable-media==2.7.1 \ - --hash=sha256:103ebc4ba331ab1bfdac0250f8033627a2cd7cde09e7ccff9181e31ba4315b2c \ - --hash=sha256:eae451a7b2e2cdbaaa0fd2eb00cc8a1ee5e95e16b55597359cbc3d27d7d90e33 +google-resumable-media==2.7.2 \ + --hash=sha256:3ce7551e9fe6d99e9a126101d2536612bb73486721951e9562fee0f90c6ababa \ + --hash=sha256:5280aed4629f2b60b847b0d42f9857fd4935c11af266744df33d8074cae92fe0 # via google-cloud-storage -googleapis-common-protos==1.63.2 \ - --hash=sha256:27a2499c7e8aff199665b22741997e485eccc8645aa9176c7c988e6fae507945 \ - --hash=sha256:27c5abdffc4911f28101e635de1533fb4cfd2c37fbaa9174587c799fac90aa87 +googleapis-common-protos==1.65.0 \ + --hash=sha256:2972e6c496f435b92590fd54045060867f3fe9be2c82ab148fc8885035479a63 \ + --hash=sha256:334a29d07cddc3aa01dee4988f9afd9b2916ee2ff49d6b757155dc0d197852c0 # via google-api-core -idna==3.7 \ - --hash=sha256:028ff3aadf0609c1fd278d8ea3089299412a7a8b9bd005dd08b9f8285bcb5cfc \ - --hash=sha256:82fee1fc78add43492d3a1898bfa6d8a904cc97d8427f683ed8e798d07761aa0 +idna==3.10 \ + --hash=sha256:12f65c9b470abda6dc35cf8e63cc574b1c52b11df2c86030af0ac09b01b13ea9 \ + --hash=sha256:946d195a0d259cbba61165e88e65941f16e9b36ea6ddb97f00452bae8b1287d3 # via requests -importlib-metadata==8.0.0 \ - --hash=sha256:15584cf2b1bf449d98ff8a6ff1abef57bf20f3ac6454f431736cd3e660921b2f \ - --hash=sha256:188bd24e4c346d3f0a933f275c2fec67050326a856b9a359881d7c2a697e8812 +importlib-metadata==8.5.0 \ + --hash=sha256:45e54197d28b7a7f1559e60b95e7c567032b602131fbd588f1497f47880aa68b \ + --hash=sha256:71522656f0abace1d072b9e5481a48f07c138e00f079c38c8f883823f9c26bd7 # via # -r requirements.in # keyring @@ -271,13 +240,13 @@ jaraco-classes==3.4.0 \ --hash=sha256:47a024b51d0239c0dd8c8540c6c7f484be3b8fcf0b2d85c13825780d3b3f3acd \ --hash=sha256:f662826b6bed8cace05e7ff873ce0f9283b5c924470fe664fff1c2f00f581790 # via keyring -jaraco-context==5.3.0 \ - --hash=sha256:3e16388f7da43d384a1a7cd3452e72e14732ac9fe459678773a3608a812bf266 \ - --hash=sha256:c2f67165ce1f9be20f32f650f25d8edfc1646a8aeee48ae06fb35f90763576d2 +jaraco-context==6.0.1 \ + --hash=sha256:9bae4ea555cf0b14938dc0aee7c9f32ed303aa20a3b73e7dc80111628792d1b3 \ + --hash=sha256:f797fc481b490edb305122c9181830a3a5b76d84ef6d1aef2fb9b47ab956f9e4 # via keyring -jaraco-functools==4.0.1 \ - --hash=sha256:3b24ccb921d6b593bdceb56ce14799204f473976e2a9d4b15b04d0f2c2326664 \ - --hash=sha256:d33fa765374c0611b52f8b3a795f8900869aa88c84769d4d1746cd68fb28c3e8 +jaraco-functools==4.1.0 \ + --hash=sha256:70f7e0e2ae076498e212562325e805204fc092d7b4c17e0e86c959e249701a9d \ + --hash=sha256:ad159f13428bc4acbf5541ad6dec511f91573b90fba04df61dafa2a1231cf649 # via keyring jeepney==0.8.0 \ --hash=sha256:5efe48d255973902f6badc3ce55e2aa6c5c3b3bc642059ef3a91247bcfcc5806 \ @@ -289,9 +258,9 @@ jinja2==3.1.4 \ --hash=sha256:4a3aee7acbbe7303aede8e9648d13b8bf88a429282aa6122a993f0ac800cb369 \ --hash=sha256:bc5dd2abb727a5319567b7a813e6a2e7318c39f4f487cfe6c89c6f9c7d25197d # via gcp-releasetool -keyring==25.2.1 \ - --hash=sha256:2458681cdefc0dbc0b7eb6cf75d0b98e59f9ad9b2d4edd319d18f68bdca95e50 \ - --hash=sha256:daaffd42dbda25ddafb1ad5fec4024e5bbcfe424597ca1ca452b299861e49f1b +keyring==25.4.1 \ + --hash=sha256:5426f817cf7f6f007ba5ec722b1bcad95a75b27d780343772ad76b17cb47b0bf \ + --hash=sha256:b07ebc55f3e8ed86ac81dd31ef14e81ace9dd9c3d4b5d77a6e9a2016d0d71a1b # via # gcp-releasetool # twine @@ -299,75 +268,76 @@ markdown-it-py==3.0.0 \ --hash=sha256:355216845c60bd96232cd8d8c40e8f9765cc86f46880e43a8fd22dc1a1a8cab1 \ --hash=sha256:e3f60a94fa066dc52ec76661e37c851cb232d92f9886b15cb560aaada2df8feb # via rich -markupsafe==2.1.5 \ - --hash=sha256:00e046b6dd71aa03a41079792f8473dc494d564611a8f89bbbd7cb93295ebdcf \ - --hash=sha256:075202fa5b72c86ad32dc7d0b56024ebdbcf2048c0ba09f1cde31bfdd57bcfff \ - --hash=sha256:0e397ac966fdf721b2c528cf028494e86172b4feba51d65f81ffd65c63798f3f \ - --hash=sha256:17b950fccb810b3293638215058e432159d2b71005c74371d784862b7e4683f3 \ - --hash=sha256:1f3fbcb7ef1f16e48246f704ab79d79da8a46891e2da03f8783a5b6fa41a9532 \ - --hash=sha256:2174c595a0d73a3080ca3257b40096db99799265e1c27cc5a610743acd86d62f \ - --hash=sha256:2b7c57a4dfc4f16f7142221afe5ba4e093e09e728ca65c51f5620c9aaeb9a617 \ - --hash=sha256:2d2d793e36e230fd32babe143b04cec8a8b3eb8a3122d2aceb4a371e6b09b8df \ - --hash=sha256:30b600cf0a7ac9234b2638fbc0fb6158ba5bdcdf46aeb631ead21248b9affbc4 \ - --hash=sha256:397081c1a0bfb5124355710fe79478cdbeb39626492b15d399526ae53422b906 \ - --hash=sha256:3a57fdd7ce31c7ff06cdfbf31dafa96cc533c21e443d57f5b1ecc6cdc668ec7f \ - --hash=sha256:3c6b973f22eb18a789b1460b4b91bf04ae3f0c4234a0a6aa6b0a92f6f7b951d4 \ - --hash=sha256:3e53af139f8579a6d5f7b76549125f0d94d7e630761a2111bc431fd820e163b8 \ - --hash=sha256:4096e9de5c6fdf43fb4f04c26fb114f61ef0bf2e5604b6ee3019d51b69e8c371 \ - --hash=sha256:4275d846e41ecefa46e2015117a9f491e57a71ddd59bbead77e904dc02b1bed2 \ - --hash=sha256:4c31f53cdae6ecfa91a77820e8b151dba54ab528ba65dfd235c80b086d68a465 \ - --hash=sha256:4f11aa001c540f62c6166c7726f71f7573b52c68c31f014c25cc7901deea0b52 \ - --hash=sha256:5049256f536511ee3f7e1b3f87d1d1209d327e818e6ae1365e8653d7e3abb6a6 \ - --hash=sha256:58c98fee265677f63a4385256a6d7683ab1832f3ddd1e66fe948d5880c21a169 \ - --hash=sha256:598e3276b64aff0e7b3451b72e94fa3c238d452e7ddcd893c3ab324717456bad \ - --hash=sha256:5b7b716f97b52c5a14bffdf688f971b2d5ef4029127f1ad7a513973cfd818df2 \ - --hash=sha256:5dedb4db619ba5a2787a94d877bc8ffc0566f92a01c0ef214865e54ecc9ee5e0 \ - --hash=sha256:619bc166c4f2de5caa5a633b8b7326fbe98e0ccbfacabd87268a2b15ff73a029 \ - --hash=sha256:629ddd2ca402ae6dbedfceeba9c46d5f7b2a61d9749597d4307f943ef198fc1f \ - --hash=sha256:656f7526c69fac7f600bd1f400991cc282b417d17539a1b228617081106feb4a \ - --hash=sha256:6ec585f69cec0aa07d945b20805be741395e28ac1627333b1c5b0105962ffced \ - --hash=sha256:72b6be590cc35924b02c78ef34b467da4ba07e4e0f0454a2c5907f473fc50ce5 \ - --hash=sha256:7502934a33b54030eaf1194c21c692a534196063db72176b0c4028e140f8f32c \ - --hash=sha256:7a68b554d356a91cce1236aa7682dc01df0edba8d043fd1ce607c49dd3c1edcf \ - --hash=sha256:7b2e5a267c855eea6b4283940daa6e88a285f5f2a67f2220203786dfa59b37e9 \ - --hash=sha256:823b65d8706e32ad2df51ed89496147a42a2a6e01c13cfb6ffb8b1e92bc910bb \ - --hash=sha256:8590b4ae07a35970728874632fed7bd57b26b0102df2d2b233b6d9d82f6c62ad \ - --hash=sha256:8dd717634f5a044f860435c1d8c16a270ddf0ef8588d4887037c5028b859b0c3 \ - --hash=sha256:8dec4936e9c3100156f8a2dc89c4b88d5c435175ff03413b443469c7c8c5f4d1 \ - --hash=sha256:97cafb1f3cbcd3fd2b6fbfb99ae11cdb14deea0736fc2b0952ee177f2b813a46 \ - --hash=sha256:a17a92de5231666cfbe003f0e4b9b3a7ae3afb1ec2845aadc2bacc93ff85febc \ - --hash=sha256:a549b9c31bec33820e885335b451286e2969a2d9e24879f83fe904a5ce59d70a \ - --hash=sha256:ac07bad82163452a6884fe8fa0963fb98c2346ba78d779ec06bd7a6262132aee \ - --hash=sha256:ae2ad8ae6ebee9d2d94b17fb62763125f3f374c25618198f40cbb8b525411900 \ - --hash=sha256:b91c037585eba9095565a3556f611e3cbfaa42ca1e865f7b8015fe5c7336d5a5 \ - --hash=sha256:bc1667f8b83f48511b94671e0e441401371dfd0f0a795c7daa4a3cd1dde55bea \ - --hash=sha256:bec0a414d016ac1a18862a519e54b2fd0fc8bbfd6890376898a6c0891dd82e9f \ - --hash=sha256:bf50cd79a75d181c9181df03572cdce0fbb75cc353bc350712073108cba98de5 \ - --hash=sha256:bff1b4290a66b490a2f4719358c0cdcd9bafb6b8f061e45c7a2460866bf50c2e \ - --hash=sha256:c061bb86a71b42465156a3ee7bd58c8c2ceacdbeb95d05a99893e08b8467359a \ - --hash=sha256:c8b29db45f8fe46ad280a7294f5c3ec36dbac9491f2d1c17345be8e69cc5928f \ - --hash=sha256:ce409136744f6521e39fd8e2a24c53fa18ad67aa5bc7c2cf83645cce5b5c4e50 \ - --hash=sha256:d050b3361367a06d752db6ead6e7edeb0009be66bc3bae0ee9d97fb326badc2a \ - --hash=sha256:d283d37a890ba4c1ae73ffadf8046435c76e7bc2247bbb63c00bd1a709c6544b \ - --hash=sha256:d9fad5155d72433c921b782e58892377c44bd6252b5af2f67f16b194987338a4 \ - --hash=sha256:daa4ee5a243f0f20d528d939d06670a298dd39b1ad5f8a72a4275124a7819eff \ - --hash=sha256:db0b55e0f3cc0be60c1f19efdde9a637c32740486004f20d1cff53c3c0ece4d2 \ - --hash=sha256:e61659ba32cf2cf1481e575d0462554625196a1f2fc06a1c777d3f48e8865d46 \ - --hash=sha256:ea3d8a3d18833cf4304cd2fc9cbb1efe188ca9b5efef2bdac7adc20594a0e46b \ - --hash=sha256:ec6a563cff360b50eed26f13adc43e61bc0c04d94b8be985e6fb24b81f6dcfdf \ - --hash=sha256:f5dfb42c4604dddc8e4305050aa6deb084540643ed5804d7455b5df8fe16f5e5 \ - --hash=sha256:fa173ec60341d6bb97a89f5ea19c85c5643c1e7dedebc22f5181eb73573142c5 \ - --hash=sha256:fa9db3f79de01457b03d4f01b34cf91bc0048eb2c3846ff26f66687c2f6d16ab \ - --hash=sha256:fce659a462a1be54d2ffcacea5e3ba2d74daa74f30f5f143fe0c58636e355fdd \ - --hash=sha256:ffee1f21e5ef0d712f9033568f8344d5da8cc2869dbd08d87c84656e6a2d2f68 +markupsafe==3.0.1 \ + --hash=sha256:0778de17cff1acaeccc3ff30cd99a3fd5c50fc58ad3d6c0e0c4c58092b859396 \ + --hash=sha256:0f84af7e813784feb4d5e4ff7db633aba6c8ca64a833f61d8e4eade234ef0c38 \ + --hash=sha256:17b2aea42a7280db02ac644db1d634ad47dcc96faf38ab304fe26ba2680d359a \ + --hash=sha256:242d6860f1fd9191aef5fae22b51c5c19767f93fb9ead4d21924e0bcb17619d8 \ + --hash=sha256:244dbe463d5fb6d7ce161301a03a6fe744dac9072328ba9fc82289238582697b \ + --hash=sha256:26627785a54a947f6d7336ce5963569b5d75614619e75193bdb4e06e21d447ad \ + --hash=sha256:2a4b34a8d14649315c4bc26bbfa352663eb51d146e35eef231dd739d54a5430a \ + --hash=sha256:2ae99f31f47d849758a687102afdd05bd3d3ff7dbab0a8f1587981b58a76152a \ + --hash=sha256:312387403cd40699ab91d50735ea7a507b788091c416dd007eac54434aee51da \ + --hash=sha256:3341c043c37d78cc5ae6e3e305e988532b072329639007fd408a476642a89fd6 \ + --hash=sha256:33d1c36b90e570ba7785dacd1faaf091203d9942bc036118fab8110a401eb1a8 \ + --hash=sha256:3e683ee4f5d0fa2dde4db77ed8dd8a876686e3fc417655c2ece9a90576905344 \ + --hash=sha256:3ffb4a8e7d46ed96ae48805746755fadd0909fea2306f93d5d8233ba23dda12a \ + --hash=sha256:40621d60d0e58aa573b68ac5e2d6b20d44392878e0bfc159012a5787c4e35bc8 \ + --hash=sha256:40f1e10d51c92859765522cbd79c5c8989f40f0419614bcdc5015e7b6bf97fc5 \ + --hash=sha256:45d42d132cff577c92bfba536aefcfea7e26efb975bd455db4e6602f5c9f45e7 \ + --hash=sha256:48488d999ed50ba8d38c581d67e496f955821dc183883550a6fbc7f1aefdc170 \ + --hash=sha256:4935dd7883f1d50e2ffecca0aa33dc1946a94c8f3fdafb8df5c330e48f71b132 \ + --hash=sha256:4c2d64fdba74ad16138300815cfdc6ab2f4647e23ced81f59e940d7d4a1469d9 \ + --hash=sha256:4c8817557d0de9349109acb38b9dd570b03cc5014e8aabf1cbddc6e81005becd \ + --hash=sha256:4ffaaac913c3f7345579db4f33b0020db693f302ca5137f106060316761beea9 \ + --hash=sha256:5a4cb365cb49b750bdb60b846b0c0bc49ed62e59a76635095a179d440540c346 \ + --hash=sha256:62fada2c942702ef8952754abfc1a9f7658a4d5460fabe95ac7ec2cbe0d02abc \ + --hash=sha256:67c519635a4f64e495c50e3107d9b4075aec33634272b5db1cde839e07367589 \ + --hash=sha256:6a54c43d3ec4cf2a39f4387ad044221c66a376e58c0d0e971d47c475ba79c6b5 \ + --hash=sha256:7044312a928a66a4c2a22644147bc61a199c1709712069a344a3fb5cfcf16915 \ + --hash=sha256:730d86af59e0e43ce277bb83970530dd223bf7f2a838e086b50affa6ec5f9295 \ + --hash=sha256:800100d45176652ded796134277ecb13640c1a537cad3b8b53da45aa96330453 \ + --hash=sha256:80fcbf3add8790caddfab6764bde258b5d09aefbe9169c183f88a7410f0f6dea \ + --hash=sha256:82b5dba6eb1bcc29cc305a18a3c5365d2af06ee71b123216416f7e20d2a84e5b \ + --hash=sha256:852dc840f6d7c985603e60b5deaae1d89c56cb038b577f6b5b8c808c97580f1d \ + --hash=sha256:8ad4ad1429cd4f315f32ef263c1342166695fad76c100c5d979c45d5570ed58b \ + --hash=sha256:8ae369e84466aa70f3154ee23c1451fda10a8ee1b63923ce76667e3077f2b0c4 \ + --hash=sha256:93e8248d650e7e9d49e8251f883eed60ecbc0e8ffd6349e18550925e31bd029b \ + --hash=sha256:973a371a55ce9ed333a3a0f8e0bcfae9e0d637711534bcb11e130af2ab9334e7 \ + --hash=sha256:9ba25a71ebf05b9bb0e2ae99f8bc08a07ee8e98c612175087112656ca0f5c8bf \ + --hash=sha256:a10860e00ded1dd0a65b83e717af28845bb7bd16d8ace40fe5531491de76b79f \ + --hash=sha256:a4792d3b3a6dfafefdf8e937f14906a51bd27025a36f4b188728a73382231d91 \ + --hash=sha256:a7420ceda262dbb4b8d839a4ec63d61c261e4e77677ed7c66c99f4e7cb5030dd \ + --hash=sha256:ad91738f14eb8da0ff82f2acd0098b6257621410dcbd4df20aaa5b4233d75a50 \ + --hash=sha256:b6a387d61fe41cdf7ea95b38e9af11cfb1a63499af2759444b99185c4ab33f5b \ + --hash=sha256:b954093679d5750495725ea6f88409946d69cfb25ea7b4c846eef5044194f583 \ + --hash=sha256:bbde71a705f8e9e4c3e9e33db69341d040c827c7afa6789b14c6e16776074f5a \ + --hash=sha256:beeebf760a9c1f4c07ef6a53465e8cfa776ea6a2021eda0d0417ec41043fe984 \ + --hash=sha256:c91b394f7601438ff79a4b93d16be92f216adb57d813a78be4446fe0f6bc2d8c \ + --hash=sha256:c97ff7fedf56d86bae92fa0a646ce1a0ec7509a7578e1ed238731ba13aabcd1c \ + --hash=sha256:cb53e2a99df28eee3b5f4fea166020d3ef9116fdc5764bc5117486e6d1211b25 \ + --hash=sha256:cbf445eb5628981a80f54087f9acdbf84f9b7d862756110d172993b9a5ae81aa \ + --hash=sha256:d06b24c686a34c86c8c1fba923181eae6b10565e4d80bdd7bc1c8e2f11247aa4 \ + --hash=sha256:d98e66a24497637dd31ccab090b34392dddb1f2f811c4b4cd80c230205c074a3 \ + --hash=sha256:db15ce28e1e127a0013dfb8ac243a8e392db8c61eae113337536edb28bdc1f97 \ + --hash=sha256:db842712984e91707437461930e6011e60b39136c7331e971952bb30465bc1a1 \ + --hash=sha256:e24bfe89c6ac4c31792793ad9f861b8f6dc4546ac6dc8f1c9083c7c4f2b335cd \ + --hash=sha256:e81c52638315ff4ac1b533d427f50bc0afc746deb949210bc85f05d4f15fd772 \ + --hash=sha256:e9393357f19954248b00bed7c56f29a25c930593a77630c719653d51e7669c2a \ + --hash=sha256:ee3941769bd2522fe39222206f6dd97ae83c442a94c90f2b7a25d847d40f4729 \ + --hash=sha256:f31ae06f1328595d762c9a2bf29dafd8621c7d3adc130cbb46278079758779ca \ + --hash=sha256:f94190df587738280d544971500b9cafc9b950d32efcb1fba9ac10d84e6aa4e6 \ + --hash=sha256:fa7d686ed9883f3d664d39d5a8e74d3c5f63e603c2e3ff0abcba23eac6542635 \ + --hash=sha256:fb532dd9900381d2e8f48172ddc5a59db4c445a11b9fab40b3b786da40d3b56b \ + --hash=sha256:fe32482b37b4b00c7a52a07211b479653b7fe4f22b2e481b9a9b099d8a430f2f # via jinja2 mdurl==0.1.2 \ --hash=sha256:84008a41e51615a49fc9966191ff91509e3c40b939176e643fd50a5c2196b8f8 \ --hash=sha256:bb413d29f5eea38f31dd4754dd7377d4465116fb207585f97bf925588687c1ba # via markdown-it-py -more-itertools==10.3.0 \ - --hash=sha256:e5d93ef411224fbcef366a6e8ddc4c5781bc6359d43412a65dd5964e46111463 \ - --hash=sha256:ea6a02e24a9161e51faad17a8782b92a0df82c12c1c8886fec7f0c3fa1a1b320 +more-itertools==10.5.0 \ + --hash=sha256:037b0d3203ce90cca8ab1defbbdac29d5f993fc20131f3664dc8d6acfa872aef \ + --hash=sha256:5482bfef7849c25dc3c6dd53a6173ae4795da2a41a80faea6700d9f5846c5da6 # via # jaraco-classes # jaraco-functools @@ -389,9 +359,9 @@ nh3==0.2.18 \ --hash=sha256:de3ceed6e661954871d6cd78b410213bdcb136f79aafe22aa7182e028b8c7307 \ --hash=sha256:f0eca9ca8628dbb4e916ae2491d72957fdd35f7a5d326b7032a345f111ac07fe # via readme-renderer -nox==2024.4.15 \ - --hash=sha256:6492236efa15a460ecb98e7b67562a28b70da006ab0be164e8821177577c0565 \ - --hash=sha256:ecf6700199cdfa9e5ea0a41ff5e6ef4641d09508eda6edb89d9987864115817f +nox==2024.10.9 \ + --hash=sha256:1d36f309a0a2a853e9bccb76bbef6bb118ba92fa92674d15604ca99adeb29eab \ + --hash=sha256:7aa9dc8d1c27e9f45ab046ffd1c3b2c4f7c91755304769df231308849ebded95 # via -r requirements.in packaging==24.1 \ --hash=sha256:026ed72c8ed3fcce5bf8950572258698927fd1dbda10a5e981cdf0ac37f4f002 \ @@ -403,41 +373,41 @@ pkginfo==1.10.0 \ --hash=sha256:5df73835398d10db79f8eecd5cd86b1f6d29317589ea70796994d49399af6297 \ --hash=sha256:889a6da2ed7ffc58ab5b900d888ddce90bce912f2d2de1dc1c26f4cb9fe65097 # via twine -platformdirs==4.2.2 \ - --hash=sha256:2d7a1657e36a80ea911db832a8a6ece5ee53d8de21edd5cc5879af6530b1bfee \ - --hash=sha256:38b7b51f512eed9e84a22788b4bce1de17c0adb134d6becb09836e37d8654cd3 +platformdirs==4.3.6 \ + --hash=sha256:357fb2acbc885b0419afd3ce3ed34564c13c9b95c89360cd9563f73aa5e2b907 \ + --hash=sha256:73e575e1408ab8103900836b97580d5307456908a03e92031bab39e4554cc3fb # via virtualenv proto-plus==1.24.0 \ --hash=sha256:30b72a5ecafe4406b0d339db35b56c4059064e69227b8c3bda7462397f966445 \ --hash=sha256:402576830425e5f6ce4c2a6702400ac79897dab0b4343821aa5188b0fab81a12 # via google-api-core -protobuf==5.27.2 \ - --hash=sha256:0e341109c609749d501986b835f667c6e1e24531096cff9d34ae411595e26505 \ - --hash=sha256:176c12b1f1c880bf7a76d9f7c75822b6a2bc3db2d28baa4d300e8ce4cde7409b \ - --hash=sha256:354d84fac2b0d76062e9b3221f4abbbacdfd2a4d8af36bab0474f3a0bb30ab38 \ - --hash=sha256:4fadd8d83e1992eed0248bc50a4a6361dc31bcccc84388c54c86e530b7f58863 \ - --hash=sha256:54330f07e4949d09614707c48b06d1a22f8ffb5763c159efd5c0928326a91470 \ - --hash=sha256:610e700f02469c4a997e58e328cac6f305f649826853813177e6290416e846c6 \ - --hash=sha256:7fc3add9e6003e026da5fc9e59b131b8f22b428b991ccd53e2af8071687b4fce \ - --hash=sha256:9e8f199bf7f97bd7ecebffcae45ebf9527603549b2b562df0fbc6d4d688f14ca \ - --hash=sha256:a109916aaac42bff84702fb5187f3edadbc7c97fc2c99c5ff81dd15dcce0d1e5 \ - --hash=sha256:b848dbe1d57ed7c191dfc4ea64b8b004a3f9ece4bf4d0d80a367b76df20bf36e \ - --hash=sha256:f3ecdef226b9af856075f28227ff2c90ce3a594d092c39bee5513573f25e2714 +protobuf==5.28.2 \ + --hash=sha256:2c69461a7fcc8e24be697624c09a839976d82ae75062b11a0972e41fd2cd9132 \ + --hash=sha256:35cfcb15f213449af7ff6198d6eb5f739c37d7e4f1c09b5d0641babf2cc0c68f \ + --hash=sha256:52235802093bd8a2811abbe8bf0ab9c5f54cca0a751fdd3f6ac2a21438bffece \ + --hash=sha256:59379674ff119717404f7454647913787034f03fe7049cbef1d74a97bb4593f0 \ + --hash=sha256:5e8a95246d581eef20471b5d5ba010d55f66740942b95ba9b872d918c459452f \ + --hash=sha256:87317e9bcda04a32f2ee82089a204d3a2f0d3c8aeed16568c7daf4756e4f1fe0 \ + --hash=sha256:8ddc60bf374785fb7cb12510b267f59067fa10087325b8e1855b898a0d81d276 \ + --hash=sha256:a8b9403fc70764b08d2f593ce44f1d2920c5077bf7d311fefec999f8c40f78b7 \ + --hash=sha256:c0ea0123dac3399a2eeb1a1443d82b7afc9ff40241433296769f7da42d142ec3 \ + --hash=sha256:ca53faf29896c526863366a52a8f4d88e69cd04ec9571ed6082fa117fac3ab36 \ + --hash=sha256:eeea10f3dc0ac7e6b4933d32db20662902b4ab81bf28df12218aa389e9c2102d # via # gcp-docuploader # gcp-releasetool # google-api-core # googleapis-common-protos # proto-plus -pyasn1==0.6.0 \ - --hash=sha256:3a35ab2c4b5ef98e17dfdec8ab074046fbda76e281c5a706ccd82328cfc8f64c \ - --hash=sha256:cca4bb0f2df5504f02f6f8a775b6e416ff9b0b3b16f7ee80b5a3153d9b804473 +pyasn1==0.6.1 \ + --hash=sha256:0d632f46f2ba09143da3a8afe9e33fb6f92fa2320ab7e886e2d0f7672af84629 \ + --hash=sha256:6f580d2bdd84365380830acf45550f2511469f673cb4a5ae3857a3170128b034 # via # pyasn1-modules # rsa -pyasn1-modules==0.4.0 \ - --hash=sha256:831dbcea1b177b28c9baddf4c6d1013c24c3accd14a1873fffaa6a2e905f17b6 \ - --hash=sha256:be04f15b66c206eed667e0bb5ab27e2b1855ea54a842e5037738099e8ca4ae0b +pyasn1-modules==0.4.1 \ + --hash=sha256:49bfa96b45a292b711e986f222502c1c9a5e1f4e568fc30e2574a6c7d07838fd \ + --hash=sha256:c28e2dbf9c06ad61c71a075c7e0f9fd0f1b0bb2d2ad4377f240d33ac2ab60a7c # via google-auth pycparser==2.22 \ --hash=sha256:491c8be9c040f5390f5bf44a5b07752bd07f56edf992381b05c701439eec10f6 \ @@ -449,9 +419,9 @@ pygments==2.18.0 \ # via # readme-renderer # rich -pyjwt==2.8.0 \ - --hash=sha256:57e28d156e3d5c10088e0c68abb90bfac3df82b40a71bd0daa20c65ccd5c23de \ - --hash=sha256:59127c392cc44c2da5bb3192169a91f429924e17aff6534d70fdc02ab3e04320 +pyjwt==2.9.0 \ + --hash=sha256:3b02fb0f44517787776cf48f2ae25d8e14f300e6d7545a4315cee571a415e850 \ + --hash=sha256:7e1e5b56cc735432a7369cbfa0efe50fa113ebecdc04ae6922deba8b84582d0c # via gcp-releasetool pyperclip==1.9.0 \ --hash=sha256:b7de0142ddc81bfc5c7507eea19da920b92252b548b96186caf94a5e2527d310 @@ -481,9 +451,9 @@ rfc3986==2.0.0 \ --hash=sha256:50b1502b60e289cb37883f3dfd34532b8873c7de9f49bb546641ce9cbd256ebd \ --hash=sha256:97aacf9dbd4bfd829baad6e6309fa6573aaf1be3f6fa735c8ab05e46cecb261c # via twine -rich==13.7.1 \ - --hash=sha256:4edbae314f59eb482f54e9e30bf00d33350aaa94f4bfcd4e9e3110e64d0d7222 \ - --hash=sha256:9be308cb1fe2f1f57d67ce99e95af38a1e2bc71ad9813b0e247cf7ffbcc3a432 +rich==13.9.2 \ + --hash=sha256:51a2c62057461aaf7152b4d611168f93a9fc73068f8ded2790f29fe2b5366d0c \ + --hash=sha256:8c82a3d3f8dcfe9e734771313e606b39d8247bb6b826e196f4914b333b743cf1 # via twine rsa==4.9 \ --hash=sha256:90260d9058e514786967344d0ef75fa8727eed8a7d2e43ce9f4bcf1b536174f7 \ @@ -499,9 +469,9 @@ six==1.16.0 \ # via # gcp-docuploader # python-dateutil -tomli==2.0.1 \ - --hash=sha256:939de3e7a6161af0c887ef91b7d41a53e7c5a1ca976325f429cb46ea9bc30ecc \ - --hash=sha256:de526c12914f0c550d15924c62d72abc48d6fe7364aa87328337a31007fe8a4f +tomli==2.0.2 \ + --hash=sha256:2ebe24485c53d303f690b0ec092806a085f07af5a5aa1464f3931eec36caaa38 \ + --hash=sha256:d46d457a85337051c36524bc5349dd91b1877838e2979ac5ced3e710ed8a60ed # via nox twine==5.1.1 \ --hash=sha256:215dbe7b4b94c2c50a7315c0275d2258399280fbb7d04182c7e55e24b5f93997 \ @@ -510,28 +480,30 @@ twine==5.1.1 \ typing-extensions==4.12.2 \ --hash=sha256:04e5ca0351e0f3f85c6853954072df659d0d13fac324d0072316b67d7794700d \ --hash=sha256:1a7ead55c7e559dd4dee8856e3a88b41225abfe1ce8df57b7c13915fe121ffb8 - # via -r requirements.in -urllib3==2.2.2 \ - --hash=sha256:a448b2f64d686155468037e1ace9f2d2199776e17f0a46610480d311f73e3472 \ - --hash=sha256:dd505485549a7a552833da5e6063639d0d177c04f23bc3864e41e5dc5f612168 + # via + # -r requirements.in + # rich +urllib3==2.2.3 \ + --hash=sha256:ca899ca043dcb1bafa3e262d73aa25c465bfb49e0bd9dd5d59f1d0acba2f8fac \ + --hash=sha256:e7d814a81dad81e6caf2ec9fdedb284ecc9c73076b62654547cc64ccdcae26e9 # via # requests # twine -virtualenv==20.26.3 \ - --hash=sha256:4c43a2a236279d9ea36a0d76f98d84bd6ca94ac4e0f4a3b9d46d05e10fea542a \ - --hash=sha256:8cc4a31139e796e9a7de2cd5cf2489de1217193116a8fd42328f1bd65f434589 +virtualenv==20.26.6 \ + --hash=sha256:280aede09a2a5c317e409a00102e7077c6432c5a38f0ef938e643805a7ad2c48 \ + --hash=sha256:7345cc5b25405607a624d8418154577459c3e0277f5466dd79c49d5e492995f2 # via nox -wheel==0.43.0 \ - --hash=sha256:465ef92c69fa5c5da2d1cf8ac40559a8c940886afcef87dcf14b9470862f1d85 \ - --hash=sha256:55c570405f142630c6b9f72fe09d9b67cf1477fcf543ae5b8dcb1f5b7377da81 +wheel==0.44.0 \ + --hash=sha256:2376a90c98cc337d18623527a97c31797bd02bad0033d41547043a1cbfbe448f \ + --hash=sha256:a29c3f2817e95ab89aa4660681ad547c0e9547f20e75b0562fe7723c9a2a9d49 # via -r requirements.in -zipp==3.19.2 \ - --hash=sha256:bf1dcf6450f873a13e952a29504887c89e6de7506209e5b1bcc3460135d4de19 \ - --hash=sha256:f091755f667055f2d02b32c53771a7a6c8b47e1fdbc4b72a8b9072b3eef8015c +zipp==3.20.2 \ + --hash=sha256:a817ac80d6cf4b23bf7f2828b7cabf326f15a001bea8b1f9b49631780ba28350 \ + --hash=sha256:bc9eb26f4506fda01b81bcde0ca78103b6e62f991b381fec825435c836edbc29 # via importlib-metadata # The following packages are considered to be unsafe in a requirements file: -setuptools==70.2.0 \ - --hash=sha256:b8b8060bb426838fbe942479c90296ce976249451118ef566a5a0b7d8b78fb05 \ - --hash=sha256:bd63e505105011b25c3c11f753f7e3b8465ea739efddaccef8f0efac2137bac1 +setuptools==75.1.0 \ + --hash=sha256:35ab7fd3bcd95e6b7fd704e4a1539513edad446c097797f2985e0e4b960772f2 \ + --hash=sha256:d59a21b17a275fb872a9c3dae73963160ae079f1049ed956880cd7c09b120538 # via -r requirements.in diff --git a/packages/google-cloud-bigquery/.kokoro/samples/python3.13/common.cfg b/packages/google-cloud-bigquery/.kokoro/samples/python3.13/common.cfg new file mode 100644 index 000000000000..ee96889957e5 --- /dev/null +++ b/packages/google-cloud-bigquery/.kokoro/samples/python3.13/common.cfg @@ -0,0 +1,40 @@ +# Format: //devtools/kokoro/config/proto/build.proto + +# Build logs will be here +action { + define_artifacts { + regex: "**/*sponge_log.xml" + } +} + +# Specify which tests to run +env_vars: { + key: "RUN_TESTS_SESSION" + value: "py-3.13" +} + +# Declare build specific Cloud project. +env_vars: { + key: "BUILD_SPECIFIC_GCLOUD_PROJECT" + value: "python-docs-samples-tests-313" +} + +env_vars: { + key: "TRAMPOLINE_BUILD_FILE" + value: "github/python-bigquery/.kokoro/test-samples.sh" +} + +# Configure the docker image for kokoro-trampoline. +env_vars: { + key: "TRAMPOLINE_IMAGE" + value: "gcr.io/cloud-devrel-kokoro-resources/python-samples-testing-docker" +} + +# Download secrets for samples +gfile_resources: "/bigstore/cloud-devrel-kokoro-resources/python-docs-samples" + +# Download trampoline resources. +gfile_resources: "/bigstore/cloud-devrel-kokoro-resources/trampoline" + +# Use the trampoline script to run in docker. +build_file: "python-bigquery/.kokoro/trampoline_v2.sh" diff --git a/packages/google-cloud-bigquery/.kokoro/samples/python3.13/continuous.cfg b/packages/google-cloud-bigquery/.kokoro/samples/python3.13/continuous.cfg new file mode 100644 index 000000000000..a1c8d9759c88 --- /dev/null +++ b/packages/google-cloud-bigquery/.kokoro/samples/python3.13/continuous.cfg @@ -0,0 +1,6 @@ +# Format: //devtools/kokoro/config/proto/build.proto + +env_vars: { + key: "INSTALL_LIBRARY_FROM_SOURCE" + value: "True" +} \ No newline at end of file diff --git a/packages/google-cloud-bigquery/.kokoro/samples/python3.13/periodic-head.cfg b/packages/google-cloud-bigquery/.kokoro/samples/python3.13/periodic-head.cfg new file mode 100644 index 000000000000..5aa01bab5bf3 --- /dev/null +++ b/packages/google-cloud-bigquery/.kokoro/samples/python3.13/periodic-head.cfg @@ -0,0 +1,11 @@ +# Format: //devtools/kokoro/config/proto/build.proto + +env_vars: { + key: "INSTALL_LIBRARY_FROM_SOURCE" + value: "True" +} + +env_vars: { + key: "TRAMPOLINE_BUILD_FILE" + value: "github/python-bigquery/.kokoro/test-samples-against-head.sh" +} diff --git a/packages/google-cloud-bigquery/.kokoro/samples/python3.13/periodic.cfg b/packages/google-cloud-bigquery/.kokoro/samples/python3.13/periodic.cfg new file mode 100644 index 000000000000..71cd1e597e38 --- /dev/null +++ b/packages/google-cloud-bigquery/.kokoro/samples/python3.13/periodic.cfg @@ -0,0 +1,6 @@ +# Format: //devtools/kokoro/config/proto/build.proto + +env_vars: { + key: "INSTALL_LIBRARY_FROM_SOURCE" + value: "False" +} diff --git a/packages/google-cloud-bigquery/.kokoro/samples/python3.13/presubmit.cfg b/packages/google-cloud-bigquery/.kokoro/samples/python3.13/presubmit.cfg new file mode 100644 index 000000000000..a1c8d9759c88 --- /dev/null +++ b/packages/google-cloud-bigquery/.kokoro/samples/python3.13/presubmit.cfg @@ -0,0 +1,6 @@ +# Format: //devtools/kokoro/config/proto/build.proto + +env_vars: { + key: "INSTALL_LIBRARY_FROM_SOURCE" + value: "True" +} \ No newline at end of file diff --git a/packages/google-cloud-bigquery/.kokoro/test-samples-impl.sh b/packages/google-cloud-bigquery/.kokoro/test-samples-impl.sh index 55910c8ba178..53e365bc4e79 100755 --- a/packages/google-cloud-bigquery/.kokoro/test-samples-impl.sh +++ b/packages/google-cloud-bigquery/.kokoro/test-samples-impl.sh @@ -33,7 +33,8 @@ export PYTHONUNBUFFERED=1 env | grep KOKORO # Install nox -python3.9 -m pip install --upgrade --quiet nox +# `virtualenv==20.26.6` is added for Python 3.7 compatibility +python3.9 -m pip install --upgrade --quiet nox virtualenv==20.26.6 # Use secrets acessor service account to get secrets if [[ -f "${KOKORO_GFILE_DIR}/secrets_viewer_service_account.json" ]]; then diff --git a/packages/google-cloud-bigquery/CONTRIBUTING.rst b/packages/google-cloud-bigquery/CONTRIBUTING.rst index 7be61e6b61a5..1900c5e36d1f 100644 --- a/packages/google-cloud-bigquery/CONTRIBUTING.rst +++ b/packages/google-cloud-bigquery/CONTRIBUTING.rst @@ -22,7 +22,7 @@ In order to add a feature: documentation. - The feature must work fully on the following CPython versions: - 3.7, 3.8, 3.9, 3.10, 3.11 and 3.12 on both UNIX and Windows. + 3.7, 3.8, 3.9, 3.10, 3.11, 3.12 and 3.13 on both UNIX and Windows. - The feature must not add unnecessary dependencies (where "unnecessary" is of course subjective, but new dependencies should @@ -72,7 +72,7 @@ We use `nox `__ to instrument our tests. - To run a single unit test:: - $ nox -s unit-3.12 -- -k + $ nox -s unit-3.13 -- -k .. note:: @@ -227,6 +227,7 @@ We support: - `Python 3.10`_ - `Python 3.11`_ - `Python 3.12`_ +- `Python 3.13`_ .. _Python 3.7: https://docs.python.org/3.7/ .. _Python 3.8: https://docs.python.org/3.8/ @@ -234,6 +235,7 @@ We support: .. _Python 3.10: https://docs.python.org/3.10/ .. _Python 3.11: https://docs.python.org/3.11/ .. _Python 3.12: https://docs.python.org/3.12/ +.. _Python 3.13: https://docs.python.org/3.13/ Supported versions can be found in our ``noxfile.py`` `config`_. diff --git a/packages/google-cloud-bigquery/samples/desktopapp/noxfile.py b/packages/google-cloud-bigquery/samples/desktopapp/noxfile.py index 3b7135946fd5..c9a3d1ecbf2a 100644 --- a/packages/google-cloud-bigquery/samples/desktopapp/noxfile.py +++ b/packages/google-cloud-bigquery/samples/desktopapp/noxfile.py @@ -89,7 +89,7 @@ def get_pytest_env_vars() -> Dict[str, str]: # DO NOT EDIT - automatically generated. # All versions used to test samples. -ALL_VERSIONS = ["3.7", "3.8", "3.9", "3.10", "3.11", "3.12"] +ALL_VERSIONS = ["3.7", "3.8", "3.9", "3.10", "3.11", "3.12", "3.13"] # Any default versions that should be ignored. IGNORED_VERSIONS = TEST_CONFIG["ignored_versions"] diff --git a/packages/google-cloud-bigquery/samples/geography/noxfile.py b/packages/google-cloud-bigquery/samples/geography/noxfile.py index 3b7135946fd5..c9a3d1ecbf2a 100644 --- a/packages/google-cloud-bigquery/samples/geography/noxfile.py +++ b/packages/google-cloud-bigquery/samples/geography/noxfile.py @@ -89,7 +89,7 @@ def get_pytest_env_vars() -> Dict[str, str]: # DO NOT EDIT - automatically generated. # All versions used to test samples. -ALL_VERSIONS = ["3.7", "3.8", "3.9", "3.10", "3.11", "3.12"] +ALL_VERSIONS = ["3.7", "3.8", "3.9", "3.10", "3.11", "3.12", "3.13"] # Any default versions that should be ignored. IGNORED_VERSIONS = TEST_CONFIG["ignored_versions"] diff --git a/packages/google-cloud-bigquery/samples/magics/noxfile.py b/packages/google-cloud-bigquery/samples/magics/noxfile.py index 3b7135946fd5..c9a3d1ecbf2a 100644 --- a/packages/google-cloud-bigquery/samples/magics/noxfile.py +++ b/packages/google-cloud-bigquery/samples/magics/noxfile.py @@ -89,7 +89,7 @@ def get_pytest_env_vars() -> Dict[str, str]: # DO NOT EDIT - automatically generated. # All versions used to test samples. -ALL_VERSIONS = ["3.7", "3.8", "3.9", "3.10", "3.11", "3.12"] +ALL_VERSIONS = ["3.7", "3.8", "3.9", "3.10", "3.11", "3.12", "3.13"] # Any default versions that should be ignored. IGNORED_VERSIONS = TEST_CONFIG["ignored_versions"] diff --git a/packages/google-cloud-bigquery/samples/notebooks/noxfile.py b/packages/google-cloud-bigquery/samples/notebooks/noxfile.py index 3b7135946fd5..c9a3d1ecbf2a 100644 --- a/packages/google-cloud-bigquery/samples/notebooks/noxfile.py +++ b/packages/google-cloud-bigquery/samples/notebooks/noxfile.py @@ -89,7 +89,7 @@ def get_pytest_env_vars() -> Dict[str, str]: # DO NOT EDIT - automatically generated. # All versions used to test samples. -ALL_VERSIONS = ["3.7", "3.8", "3.9", "3.10", "3.11", "3.12"] +ALL_VERSIONS = ["3.7", "3.8", "3.9", "3.10", "3.11", "3.12", "3.13"] # Any default versions that should be ignored. IGNORED_VERSIONS = TEST_CONFIG["ignored_versions"] diff --git a/packages/google-cloud-bigquery/samples/snippets/noxfile.py b/packages/google-cloud-bigquery/samples/snippets/noxfile.py index 3b7135946fd5..c9a3d1ecbf2a 100644 --- a/packages/google-cloud-bigquery/samples/snippets/noxfile.py +++ b/packages/google-cloud-bigquery/samples/snippets/noxfile.py @@ -89,7 +89,7 @@ def get_pytest_env_vars() -> Dict[str, str]: # DO NOT EDIT - automatically generated. # All versions used to test samples. -ALL_VERSIONS = ["3.7", "3.8", "3.9", "3.10", "3.11", "3.12"] +ALL_VERSIONS = ["3.7", "3.8", "3.9", "3.10", "3.11", "3.12", "3.13"] # Any default versions that should be ignored. IGNORED_VERSIONS = TEST_CONFIG["ignored_versions"] From 1a678b93c68953aa60864d7a7d3da533640f9f90 Mon Sep 17 00:00:00 2001 From: "Leah E. Cole" <6719667+leahecole@users.noreply.github.com> Date: Fri, 1 Nov 2024 12:53:52 -0400 Subject: [PATCH 1850/2016] chore: two fixit fixes (#2050) --- .../google/cloud/bigquery/client.py | 34 +++++++++++-------- 1 file changed, 19 insertions(+), 15 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py index 1c222f2ddb7c..52c5084e366c 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py @@ -328,6 +328,15 @@ def get_service_account_email( ) -> str: """Get the email address of the project's BigQuery service account + Example: + + .. code-block:: python + + from google.cloud import bigquery + client = bigquery.Client() + client.get_service_account_email() + # returns an email similar to: my_service_account@my-project.iam.gserviceaccount.com + Note: This is the service account that BigQuery uses to manage tables encrypted by a key in KMS. @@ -345,13 +354,6 @@ def get_service_account_email( str: service account email address - Example: - - >>> from google.cloud import bigquery - >>> client = bigquery.Client() - >>> client.get_service_account_email() - my_service_account@my-project.iam.gserviceaccount.com - """ if project is None: project = self.project @@ -629,9 +631,19 @@ def create_dataset( ) -> Dataset: """API call: create the dataset via a POST request. + See https://cloud.google.com/bigquery/docs/reference/rest/v2/datasets/insert + Example: + + .. code-block:: python + + from google.cloud import bigquery + client = bigquery.Client() + dataset = bigquery.Dataset('my_project.my_dataset') + dataset = client.create_dataset(dataset) + Args: dataset (Union[ \ google.cloud.bigquery.dataset.Dataset, \ @@ -658,14 +670,6 @@ def create_dataset( Raises: google.cloud.exceptions.Conflict: If the dataset already exists. - - Example: - - >>> from google.cloud import bigquery - >>> client = bigquery.Client() - >>> dataset = bigquery.Dataset('my_project.my_dataset') - >>> dataset = client.create_dataset(dataset) - """ dataset = self._dataset_from_arg(dataset) if isinstance(dataset, DatasetReference): From 8f2abe381c140c3e609e187ead02ec1821ed172c Mon Sep 17 00:00:00 2001 From: "release-please[bot]" <55107282+release-please[bot]@users.noreply.github.com> Date: Thu, 7 Nov 2024 10:17:28 -0500 Subject: [PATCH 1851/2016] chore(main): release 3.27.0 (#2040) Co-authored-by: release-please[bot] <55107282+release-please[bot]@users.noreply.github.com> Co-authored-by: Chalmer Lowe --- packages/google-cloud-bigquery/CHANGELOG.md | 7 +++++++ .../google-cloud-bigquery/google/cloud/bigquery/version.py | 2 +- 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/CHANGELOG.md b/packages/google-cloud-bigquery/CHANGELOG.md index 5de99a6ca2b7..989b7f020faa 100644 --- a/packages/google-cloud-bigquery/CHANGELOG.md +++ b/packages/google-cloud-bigquery/CHANGELOG.md @@ -5,6 +5,13 @@ [1]: https://pypi.org/project/google-cloud-bigquery/#history +## [3.27.0](https://github.com/googleapis/python-bigquery/compare/v3.26.0...v3.27.0) (2024-11-01) + + +### Features + +* Updates to allow users to set max_stream_count ([#2039](https://github.com/googleapis/python-bigquery/issues/2039)) ([7372ad6](https://github.com/googleapis/python-bigquery/commit/7372ad659fd3316a602e90f224e9a3304d4c1419)) + ## [3.26.0](https://github.com/googleapis/python-bigquery/compare/v3.25.0...v3.26.0) (2024-09-25) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/version.py b/packages/google-cloud-bigquery/google/cloud/bigquery/version.py index ebc9112530b1..8f4418777399 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/version.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/version.py @@ -12,4 +12,4 @@ # See the License for the specific language governing permissions and # limitations under the License. -__version__ = "3.26.0" +__version__ = "3.27.0" From a65d5230eda03cbc8a27e01d99ddeb2f9741fbc4 Mon Sep 17 00:00:00 2001 From: "Leah E. Cole" <6719667+leahecole@users.noreply.github.com> Date: Thu, 7 Nov 2024 15:56:57 -0500 Subject: [PATCH 1852/2016] docs: render fields correctly for update calls (#2055) --- .../google/cloud/bigquery/client.py | 82 +++++++++---------- 1 file changed, 41 insertions(+), 41 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py index 52c5084e366c..97f239f7a9e0 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py @@ -1184,6 +1184,19 @@ def update_dataset( must be provided. If a field is listed in ``fields`` and is ``None`` in ``dataset``, it will be deleted. + For example, to update the default expiration times, specify + both properties in the ``fields`` argument: + + .. code-block:: python + + bigquery_client.update_dataset( + dataset, + [ + "default_partition_expiration_ms", + "default_table_expiration_ms", + ] + ) + If ``dataset.etag`` is not ``None``, the update will only succeed if the dataset on the server has the same ETag. Thus reading a dataset with ``get_dataset``, changing its fields, @@ -1198,19 +1211,6 @@ def update_dataset( The properties of ``dataset`` to change. These are strings corresponding to the properties of :class:`~google.cloud.bigquery.dataset.Dataset`. - - For example, to update the default expiration times, specify - both properties in the ``fields`` argument: - - .. code-block:: python - - bigquery_client.update_dataset( - dataset, - [ - "default_partition_expiration_ms", - "default_table_expiration_ms", - ] - ) retry (Optional[google.api_core.retry.Retry]): How to retry the RPC. timeout (Optional[float]): @@ -1254,6 +1254,15 @@ def update_model( must be provided. If a field is listed in ``fields`` and is ``None`` in ``model``, the field value will be deleted. + For example, to update the descriptive properties of the model, + specify them in the ``fields`` argument: + + .. code-block:: python + + bigquery_client.update_model( + model, ["description", "friendly_name"] + ) + If ``model.etag`` is not ``None``, the update will only succeed if the model on the server has the same ETag. Thus reading a model with ``get_model``, changing its fields, and then passing it to @@ -1266,15 +1275,6 @@ def update_model( The properties of ``model`` to change. These are strings corresponding to the properties of :class:`~google.cloud.bigquery.model.Model`. - - For example, to update the descriptive properties of the model, - specify them in the ``fields`` argument: - - .. code-block:: python - - bigquery_client.update_model( - model, ["description", "friendly_name"] - ) retry (Optional[google.api_core.retry.Retry]): A description of how to retry the API call. timeout (Optional[float]): @@ -1318,6 +1318,15 @@ def update_routine( must be provided. If a field is listed in ``fields`` and is ``None`` in ``routine``, the field value will be deleted. + For example, to update the description property of the routine, + specify it in the ``fields`` argument: + + .. code-block:: python + + bigquery_client.update_routine( + routine, ["description"] + ) + .. warning:: During beta, partial updates are not supported. You must provide all fields in the resource. @@ -1336,15 +1345,6 @@ def update_routine( fields (Sequence[str]): The fields of ``routine`` to change, spelled as the :class:`~google.cloud.bigquery.routine.Routine` properties. - - For example, to update the description property of the routine, - specify it in the ``fields`` argument: - - .. code-block:: python - - bigquery_client.update_routine( - routine, ["description"] - ) retry (Optional[google.api_core.retry.Retry]): A description of how to retry the API call. timeout (Optional[float]): @@ -1392,6 +1392,16 @@ def update_table( must be provided. If a field is listed in ``fields`` and is ``None`` in ``table``, the field value will be deleted. + For example, to update the descriptive properties of the table, + specify them in the ``fields`` argument: + + .. code-block:: python + + bigquery_client.update_table( + table, + ["description", "friendly_name"] + ) + If ``table.etag`` is not ``None``, the update will only succeed if the table on the server has the same ETag. Thus reading a table with ``get_table``, changing its fields, and then passing it to @@ -1403,16 +1413,6 @@ def update_table( fields (Sequence[str]): The fields of ``table`` to change, spelled as the :class:`~google.cloud.bigquery.table.Table` properties. - - For example, to update the descriptive properties of the table, - specify them in the ``fields`` argument: - - .. code-block:: python - - bigquery_client.update_table( - table, - ["description", "friendly_name"] - ) retry (Optional[google.api_core.retry.Retry]): A description of how to retry the API call. timeout (Optional[float]): From 698cee82d2ce2070d3bfb8ea72d44f5d0ed2542e Mon Sep 17 00:00:00 2001 From: "gcf-owl-bot[bot]" <78513119+gcf-owl-bot[bot]@users.noreply.github.com> Date: Mon, 11 Nov 2024 11:35:36 -0800 Subject: [PATCH 1853/2016] chore(python): remove obsolete release scripts and config files (#2057) Source-Link: https://github.com/googleapis/synthtool/commit/635751753776b1a7cabd4dcaa48013a96274372d Post-Processor: gcr.io/cloud-devrel-public-resources/owlbot-python:latest@sha256:91d0075c6f2fd6a073a06168feee19fa2a8507692f2519a1dc7de3366d157e99 Co-authored-by: Owl Bot --- packages/google-cloud-bigquery/.github/.OwlBot.lock.yaml | 4 ++-- packages/google-cloud-bigquery/.github/release-trigger.yml | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/packages/google-cloud-bigquery/.github/.OwlBot.lock.yaml b/packages/google-cloud-bigquery/.github/.OwlBot.lock.yaml index 7672b49b6307..b2770d4e0379 100644 --- a/packages/google-cloud-bigquery/.github/.OwlBot.lock.yaml +++ b/packages/google-cloud-bigquery/.github/.OwlBot.lock.yaml @@ -13,5 +13,5 @@ # limitations under the License. docker: image: gcr.io/cloud-devrel-public-resources/owlbot-python:latest - digest: sha256:5cddfe2fb5019bbf78335bc55f15bc13e18354a56b3ff46e1834f8e540807f05 -# created: 2024-10-31T01:41:07.349286254Z + digest: sha256:91d0075c6f2fd6a073a06168feee19fa2a8507692f2519a1dc7de3366d157e99 +# created: 2024-11-11T16:13:09.302418532Z diff --git a/packages/google-cloud-bigquery/.github/release-trigger.yml b/packages/google-cloud-bigquery/.github/release-trigger.yml index 4bb79e58eadf..b975c190db1b 100644 --- a/packages/google-cloud-bigquery/.github/release-trigger.yml +++ b/packages/google-cloud-bigquery/.github/release-trigger.yml @@ -1,2 +1,2 @@ enabled: true -multiScmName: +multiScmName: python-bigquery From 75b22cbe456041012fdfa87c5c0e296acc25392d Mon Sep 17 00:00:00 2001 From: Anthonios Partheniou Date: Tue, 12 Nov 2024 09:48:26 -0500 Subject: [PATCH 1854/2016] build: Use python 3.10 for docs session (#2058) Co-authored-by: Lingqing Gan --- packages/google-cloud-bigquery/noxfile.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/noxfile.py b/packages/google-cloud-bigquery/noxfile.py index 2376309ff7c7..750a6b459068 100644 --- a/packages/google-cloud-bigquery/noxfile.py +++ b/packages/google-cloud-bigquery/noxfile.py @@ -462,7 +462,7 @@ def blacken(session): session.run("black", *BLACK_PATHS) -@nox.session(python="3.9") +@nox.session(python="3.10") @_calculate_duration def docs(session): """Build the docs.""" From 07d5a23e35125657b27d23edb649ab383a885e8b Mon Sep 17 00:00:00 2001 From: "gcf-owl-bot[bot]" <78513119+gcf-owl-bot[bot]@users.noreply.github.com> Date: Tue, 12 Nov 2024 14:05:24 -0500 Subject: [PATCH 1855/2016] chore(python): update dependencies in .kokoro/docker/docs (#2060) Source-Link: https://github.com/googleapis/synthtool/commit/59171c8f83f3522ce186e4d110d27e772da4ba7a Post-Processor: gcr.io/cloud-devrel-public-resources/owlbot-python:latest@sha256:2ed982f884312e4883e01b5ab8af8b6935f0216a5a2d82928d273081fc3be562 Co-authored-by: Owl Bot Co-authored-by: Chalmer Lowe --- .../.github/.OwlBot.lock.yaml | 4 ++-- .../.kokoro/docker/docs/requirements.txt | 20 +++++++++---------- 2 files changed, 12 insertions(+), 12 deletions(-) diff --git a/packages/google-cloud-bigquery/.github/.OwlBot.lock.yaml b/packages/google-cloud-bigquery/.github/.OwlBot.lock.yaml index b2770d4e0379..6301519a9a05 100644 --- a/packages/google-cloud-bigquery/.github/.OwlBot.lock.yaml +++ b/packages/google-cloud-bigquery/.github/.OwlBot.lock.yaml @@ -13,5 +13,5 @@ # limitations under the License. docker: image: gcr.io/cloud-devrel-public-resources/owlbot-python:latest - digest: sha256:91d0075c6f2fd6a073a06168feee19fa2a8507692f2519a1dc7de3366d157e99 -# created: 2024-11-11T16:13:09.302418532Z + digest: sha256:2ed982f884312e4883e01b5ab8af8b6935f0216a5a2d82928d273081fc3be562 +# created: 2024-11-12T12:09:45.821174897Z diff --git a/packages/google-cloud-bigquery/.kokoro/docker/docs/requirements.txt b/packages/google-cloud-bigquery/.kokoro/docker/docs/requirements.txt index 66eacc82f041..8bb0764594b1 100644 --- a/packages/google-cloud-bigquery/.kokoro/docker/docs/requirements.txt +++ b/packages/google-cloud-bigquery/.kokoro/docker/docs/requirements.txt @@ -1,5 +1,5 @@ # -# This file is autogenerated by pip-compile with Python 3.9 +# This file is autogenerated by pip-compile with Python 3.10 # by the following command: # # pip-compile --allow-unsafe --generate-hashes requirements.in @@ -8,9 +8,9 @@ argcomplete==3.5.1 \ --hash=sha256:1a1d148bdaa3e3b93454900163403df41448a248af01b6e849edc5ac08e6c363 \ --hash=sha256:eb1ee355aa2557bd3d0145de7b06b2a45b0ce461e1e7813f5d066039ab4177b4 # via nox -colorlog==6.8.2 \ - --hash=sha256:3e3e079a41feb5a1b64f978b5ea4f46040a94f11f0e8bbb8261e3dbbeca64d44 \ - --hash=sha256:4dcbb62368e2800cb3c5abd348da7e53f6c362dda502ec27c560b2e58a66bd33 +colorlog==6.9.0 \ + --hash=sha256:5906e71acd67cb07a71e779c47c4bcb45fb8c2993eebe9e5adcd6a6f1b283eff \ + --hash=sha256:bfba54a1b93b94f54e1f4fe48395725a3d92fd2a4af702f6bd70946bdc0c6ac2 # via nox distlib==0.3.9 \ --hash=sha256:47f8c22fd27c27e25a65601af709b38e4f0a45ea4fc2e710f65755fa8caaaf87 \ @@ -24,9 +24,9 @@ nox==2024.10.9 \ --hash=sha256:1d36f309a0a2a853e9bccb76bbef6bb118ba92fa92674d15604ca99adeb29eab \ --hash=sha256:7aa9dc8d1c27e9f45ab046ffd1c3b2c4f7c91755304769df231308849ebded95 # via -r requirements.in -packaging==24.1 \ - --hash=sha256:026ed72c8ed3fcce5bf8950572258698927fd1dbda10a5e981cdf0ac37f4f002 \ - --hash=sha256:5b8f2217dbdbd2f7f384c41c628544e6d52f2d0f53c6d0c3ea61aa5d1d7ff124 +packaging==24.2 \ + --hash=sha256:09abb1bccd265c01f4a3aa3f7a7db064b36514d2cba19a2f694fe6150451a759 \ + --hash=sha256:c228a6dc5e932d346bc5739379109d49e8853dd8223571c7c5b55260edc0b97f # via nox platformdirs==4.3.6 \ --hash=sha256:357fb2acbc885b0419afd3ce3ed34564c13c9b95c89360cd9563f73aa5e2b907 \ @@ -36,7 +36,7 @@ tomli==2.0.2 \ --hash=sha256:2ebe24485c53d303f690b0ec092806a085f07af5a5aa1464f3931eec36caaa38 \ --hash=sha256:d46d457a85337051c36524bc5349dd91b1877838e2979ac5ced3e710ed8a60ed # via nox -virtualenv==20.26.6 \ - --hash=sha256:280aede09a2a5c317e409a00102e7077c6432c5a38f0ef938e643805a7ad2c48 \ - --hash=sha256:7345cc5b25405607a624d8418154577459c3e0277f5466dd79c49d5e492995f2 +virtualenv==20.27.1 \ + --hash=sha256:142c6be10212543b32c6c45d3d3893dff89112cc588b7d0879ae5a1ec03a47ba \ + --hash=sha256:f11f1b8a29525562925f745563bfd48b189450f61fb34c4f9cc79dd5aa32a1f4 # via nox From 2704a07e569a73bdd3dc6c9efcfe9736057779fc Mon Sep 17 00:00:00 2001 From: Rin Arakaki Date: Wed, 13 Nov 2024 22:20:57 +0900 Subject: [PATCH 1856/2016] feat: migrate to pyproject.toml (#2041) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Migrate to pyproject.toml * Update * Add copyright notice * 🦉 Updates from OwlBot post-processor See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md * Update pyproject.toml --------- Co-authored-by: Owl Bot Co-authored-by: Chalmer Lowe --- packages/google-cloud-bigquery/pyproject.toml | 104 ++++++++++++++ packages/google-cloud-bigquery/setup.py | 128 +----------------- .../tests/unit/test_table.py | 2 +- 3 files changed, 107 insertions(+), 127 deletions(-) create mode 100644 packages/google-cloud-bigquery/pyproject.toml diff --git a/packages/google-cloud-bigquery/pyproject.toml b/packages/google-cloud-bigquery/pyproject.toml new file mode 100644 index 000000000000..44a958323be0 --- /dev/null +++ b/packages/google-cloud-bigquery/pyproject.toml @@ -0,0 +1,104 @@ +# Copyright 2024 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +[build-system] +requires = ["setuptools"] +build-backend = "setuptools.build_meta" + +[project] +name = "google-cloud-bigquery" +authors = [{ name = "Google LLC", email = "googleapis-packages@google.com" }] +license = { text = "Apache 2.0" } +requires-python = ">=3.7" +description = "Google BigQuery API client library" +readme = "README.rst" +classifiers = [ + # Should be one of: + # "Development Status :: 3 - Alpha" + # "Development Status :: 4 - Beta" + # "Development Status :: 5 - Production/Stable" + "Development Status :: 5 - Production/Stable", + "Intended Audience :: Developers", + "License :: OSI Approved :: Apache Software License", + "Programming Language :: Python", + "Programming Language :: Python :: 3", + "Programming Language :: Python :: 3.7", + "Programming Language :: Python :: 3.8", + "Programming Language :: Python :: 3.9", + "Programming Language :: Python :: 3.10", + "Programming Language :: Python :: 3.11", + "Programming Language :: Python :: 3.12", + "Operating System :: OS Independent", + "Topic :: Internet", +] +dependencies = [ + "google-api-core[grpc] >= 2.11.1, < 3.0.0dev", + "google-auth >= 2.14.1, < 3.0.0dev", + "google-cloud-core >= 2.4.1, < 3.0.0dev", + "google-resumable-media >= 2.0.0, < 3.0dev", + "packaging >= 20.0.0", + "python-dateutil >= 2.7.3, < 3.0dev", + "requests >= 2.21.0, < 3.0.0dev", +] +dynamic = ["version"] + +[project.urls] +Repository = "https://github.com/googleapis/python-bigquery" + +[project.optional-dependencies] +# bqstorage had a period where it was a required dependency, and has been +# moved back to optional due to bloat. See +# https://github.com/googleapis/python-bigquery/issues/1196 for more background. +bqstorage = [ + "google-cloud-bigquery-storage >= 2.6.0, < 3.0.0dev", + # Due to an issue in pip's dependency resolver, the `grpc` extra is not + # installed, even though `google-cloud-bigquery-storage` specifies it + # as `google-api-core[grpc]`. We thus need to explicitly specify it here. + # See: https://github.com/googleapis/python-bigquery/issues/83 The + # grpc.Channel.close() method isn't added until 1.32.0. + # https://github.com/grpc/grpc/pull/15254 + "grpcio >= 1.47.0, < 2.0dev", + "grpcio >= 1.49.1, < 2.0dev; python_version >= '3.11'", + "pyarrow >= 3.0.0", +] +pandas = [ + "pandas >= 1.1.0", + "pyarrow >= 3.0.0", + "db-dtypes >= 0.3.0, < 2.0.0dev", + "importlib_metadata >= 1.0.0; python_version < '3.8'", +] +ipywidgets = ["ipywidgets >= 7.7.0", "ipykernel >= 6.0.0"] +geopandas = ["geopandas >= 0.9.0, < 1.0dev", "Shapely >= 1.8.4, < 3.0.0dev"] +ipython = ["bigquery-magics >= 0.1.0"] +tqdm = ["tqdm >= 4.7.4, < 5.0.0dev"] +opentelemetry = [ + "opentelemetry-api >= 1.1.0", + "opentelemetry-sdk >= 1.1.0", + "opentelemetry-instrumentation >= 0.20b0", +] +bigquery_v2 = [ + "proto-plus >= 1.22.3, < 2.0.0dev", + "protobuf >= 3.20.2, < 6.0.0dev, != 4.21.0, != 4.21.1, != 4.21.2, != 4.21.3, != 4.21.4, != 4.21.5", # For the legacy proto-based types. +] +all = [ + "google-cloud-bigquery[bqstorage,pandas,ipywidgets,geopandas,ipython,tqdm,opentelemetry,bigquery_v2]", +] + +[tool.setuptools.dynamic] +version = { attr = "google.cloud.bigquery.version.__version__" } + +[tool.setuptools.packages.find] +# Only include packages under the 'google' namespace. Do not include tests, +# benchmarks, etc. +include = ["google*"] diff --git a/packages/google-cloud-bigquery/setup.py b/packages/google-cloud-bigquery/setup.py index 617685543ca9..2ad29ecbfed6 100644 --- a/packages/google-cloud-bigquery/setup.py +++ b/packages/google-cloud-bigquery/setup.py @@ -12,131 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -import io -import os +import setuptools # type: ignore -import setuptools - -# Package metadata. - -name = "google-cloud-bigquery" -description = "Google BigQuery API client library" - -# Should be one of: -# 'Development Status :: 3 - Alpha' -# 'Development Status :: 4 - Beta' -# 'Development Status :: 5 - Production/Stable' -release_status = "Development Status :: 5 - Production/Stable" -dependencies = [ - "google-api-core[grpc] >= 2.11.1, <3.0.0dev", - "google-auth >= 2.14.1, <3.0.0dev", - "google-cloud-core >= 2.4.1, <3.0.0dev", - "google-resumable-media >= 2.0.0, < 3.0dev", - "packaging >= 20.0.0", - "python-dateutil >= 2.7.3, <3.0dev", - "requests >= 2.21.0, < 3.0.0dev", -] -pyarrow_dependency = "pyarrow >= 3.0.0" -extras = { - # bqstorage had a period where it was a required dependency, and has been - # moved back to optional due to bloat. See - # https://github.com/googleapis/python-bigquery/issues/1196 for more background. - "bqstorage": [ - "google-cloud-bigquery-storage >= 2.6.0, <3.0.0dev", - # Due to an issue in pip's dependency resolver, the `grpc` extra is not - # installed, even though `google-cloud-bigquery-storage` specifies it - # as `google-api-core[grpc]`. We thus need to explicitly specify it here. - # See: https://github.com/googleapis/python-bigquery/issues/83 The - # grpc.Channel.close() method isn't added until 1.32.0. - # https://github.com/grpc/grpc/pull/15254 - "grpcio >= 1.47.0, < 2.0dev", - "grpcio >= 1.49.1, < 2.0dev; python_version>='3.11'", - pyarrow_dependency, - ], - "pandas": [ - "pandas>=1.1.0", - pyarrow_dependency, - "db-dtypes>=0.3.0,<2.0.0dev", - "importlib_metadata>=1.0.0; python_version<'3.8'", - ], - "ipywidgets": [ - "ipywidgets>=7.7.0", - "ipykernel>=6.0.0", - ], - "geopandas": ["geopandas>=0.9.0, <1.0dev", "Shapely>=1.8.4, <3.0.0dev"], - "ipython": [ - "bigquery-magics >= 0.1.0", - ], - "tqdm": ["tqdm >= 4.7.4, <5.0.0dev"], - "opentelemetry": [ - "opentelemetry-api >= 1.1.0", - "opentelemetry-sdk >= 1.1.0", - "opentelemetry-instrumentation >= 0.20b0", - ], - "bigquery_v2": [ - "proto-plus >= 1.22.3, <2.0.0dev", - "protobuf>=3.20.2,<6.0.0dev,!=4.21.0,!=4.21.1,!=4.21.2,!=4.21.3,!=4.21.4,!=4.21.5", # For the legacy proto-based types. - ], -} - -all_extras = [] - -for extra in extras: - all_extras.extend(extras[extra]) - -extras["all"] = all_extras - -# Setup boilerplate below this line. - -package_root = os.path.abspath(os.path.dirname(__file__)) - -readme_filename = os.path.join(package_root, "README.rst") -with io.open(readme_filename, encoding="utf-8") as readme_file: - readme = readme_file.read() - -version = {} -with open(os.path.join(package_root, "google/cloud/bigquery/version.py")) as fp: - exec(fp.read(), version) -version = version["__version__"] - -# Only include packages under the 'google' namespace. Do not include tests, -# benchmarks, etc. -packages = [ - package - for package in setuptools.find_namespace_packages() - if package.startswith("google") -] - -setuptools.setup( - name=name, - version=version, - description=description, - long_description=readme, - author="Google LLC", - author_email="googleapis-packages@google.com", - license="Apache 2.0", - url="https://github.com/googleapis/python-bigquery", - classifiers=[ - release_status, - "Intended Audience :: Developers", - "License :: OSI Approved :: Apache Software License", - "Programming Language :: Python", - "Programming Language :: Python :: 3", - "Programming Language :: Python :: 3.7", - "Programming Language :: Python :: 3.8", - "Programming Language :: Python :: 3.9", - "Programming Language :: Python :: 3.10", - "Programming Language :: Python :: 3.11", - "Programming Language :: Python :: 3.12", - "Operating System :: OS Independent", - "Topic :: Internet", - ], - platforms="Posix; MacOS X; Windows", - packages=packages, - install_requires=dependencies, - extras_require=extras, - python_requires=">=3.7", - include_package_data=True, - zip_safe=False, -) +setuptools.setup() diff --git a/packages/google-cloud-bigquery/tests/unit/test_table.py b/packages/google-cloud-bigquery/tests/unit/test_table.py index d6febcfb1753..018a096df153 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_table.py +++ b/packages/google-cloud-bigquery/tests/unit/test_table.py @@ -2011,7 +2011,7 @@ def _make_one( path=None, schema=None, table=None, - **kwargs + **kwargs, ): from google.cloud.bigquery.table import TableReference From 23ef8b4c138296f6e4edccf1af10b8bb5b330b44 Mon Sep 17 00:00:00 2001 From: Mend Renovate Date: Fri, 15 Nov 2024 19:17:17 +0100 Subject: [PATCH 1857/2016] chore(deps): update all dependencies (#2038) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * chore(deps): update all dependencies * 🦉 Updates from OwlBot post-processor See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md --------- Co-authored-by: Owl Bot Co-authored-by: Chalmer Lowe --- .../google-cloud-bigquery/samples/geography/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/samples/geography/requirements.txt b/packages/google-cloud-bigquery/samples/geography/requirements.txt index 1089dc1957ab..30b4a54a1ca3 100644 --- a/packages/google-cloud-bigquery/samples/geography/requirements.txt +++ b/packages/google-cloud-bigquery/samples/geography/requirements.txt @@ -24,7 +24,7 @@ google-crc32c==1.6.0; python_version >= '3.9' google-resumable-media==2.7.2 googleapis-common-protos==1.65.0 grpcio===1.62.2; python_version == '3.7' -grpcio==1.66.1; python_version >= '3.8' +grpcio==1.66.2; python_version >= '3.8' idna==3.10 munch==4.0.0 mypy-extensions==1.0.0 From 3a66cefcda910197cb68ef0c0921723cd071a53d Mon Sep 17 00:00:00 2001 From: Anthonios Partheniou Date: Fri, 15 Nov 2024 13:58:58 -0500 Subject: [PATCH 1858/2016] fix: Allow geopandas 1.x (#2065) Expand range to avoid diamond dependency issues See https://pypi.org/project/geopandas/1.0.1/ --- packages/google-cloud-bigquery/pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/pyproject.toml b/packages/google-cloud-bigquery/pyproject.toml index 44a958323be0..ecf21d922356 100644 --- a/packages/google-cloud-bigquery/pyproject.toml +++ b/packages/google-cloud-bigquery/pyproject.toml @@ -79,7 +79,7 @@ pandas = [ "importlib_metadata >= 1.0.0; python_version < '3.8'", ] ipywidgets = ["ipywidgets >= 7.7.0", "ipykernel >= 6.0.0"] -geopandas = ["geopandas >= 0.9.0, < 1.0dev", "Shapely >= 1.8.4, < 3.0.0dev"] +geopandas = ["geopandas >= 0.9.0, < 2.0dev", "Shapely >= 1.8.4, < 3.0.0dev"] ipython = ["bigquery-magics >= 0.1.0"] tqdm = ["tqdm >= 4.7.4, < 5.0.0dev"] opentelemetry = [ From 936b97e0a0d41c86c239666012c4e717db4d23da Mon Sep 17 00:00:00 2001 From: Mend Renovate Date: Fri, 15 Nov 2024 21:29:06 +0100 Subject: [PATCH 1859/2016] chore(deps): update all dependencies (#2064) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * chore(deps): update all dependencies * 🦉 Updates from OwlBot post-processor See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md * revert * Pin pyparsing for Python 3.7/3.8 * revert * Pin pyarrow for Python 3.8 --------- Co-authored-by: Owl Bot Co-authored-by: Anthonios Partheniou Co-authored-by: Chalmer Lowe --- .../samples/desktopapp/requirements.txt | 2 +- .../samples/geography/requirements.txt | 26 ++++++++++--------- .../samples/magics/requirements.txt | 6 ++--- .../samples/notebooks/requirements.txt | 6 ++--- .../samples/snippets/requirements.txt | 2 +- 5 files changed, 22 insertions(+), 20 deletions(-) diff --git a/packages/google-cloud-bigquery/samples/desktopapp/requirements.txt b/packages/google-cloud-bigquery/samples/desktopapp/requirements.txt index 383829d7d756..1658007414e1 100644 --- a/packages/google-cloud-bigquery/samples/desktopapp/requirements.txt +++ b/packages/google-cloud-bigquery/samples/desktopapp/requirements.txt @@ -1,2 +1,2 @@ -google-cloud-bigquery==3.26.0 +google-cloud-bigquery==3.27.0 google-auth-oauthlib==1.2.1 diff --git a/packages/google-cloud-bigquery/samples/geography/requirements.txt b/packages/google-cloud-bigquery/samples/geography/requirements.txt index 30b4a54a1ca3..42f2b5a86c5e 100644 --- a/packages/google-cloud-bigquery/samples/geography/requirements.txt +++ b/packages/google-cloud-bigquery/samples/geography/requirements.txt @@ -2,47 +2,49 @@ attrs==24.2.0 certifi==2024.8.30 cffi===1.15.1; python_version == '3.7' cffi==1.17.1; python_version >= '3.8' -charset-normalizer==3.3.2 +charset-normalizer==3.4.0 click==8.1.7 click-plugins==1.1.1 cligj==0.7.2 dataclasses==0.8; python_version < '3.7' -db-dtypes==1.3.0 +db-dtypes==1.3.1 Fiona===1.9.6; python_version == '3.7' Fiona==1.10.1; python_version >= '3.8' geojson==3.1.0 geopandas===0.10.2; python_version == '3.7' geopandas===0.13.2; python_version == '3.8' geopandas==1.0.1; python_version >= '3.9' -google-api-core==2.20.0 -google-auth==2.35.0 -google-cloud-bigquery==3.26.0 -google-cloud-bigquery-storage==2.26.0 +google-api-core==2.23.0 +google-auth==2.36.0 +google-cloud-bigquery==3.27.0 +google-cloud-bigquery-storage==2.27.0 google-cloud-core==2.4.1 google-crc32c===1.5.0; python_version < '3.9' google-crc32c==1.6.0; python_version >= '3.9' google-resumable-media==2.7.2 -googleapis-common-protos==1.65.0 +googleapis-common-protos==1.66.0 grpcio===1.62.2; python_version == '3.7' -grpcio==1.66.2; python_version >= '3.8' +grpcio==1.67.1; python_version >= '3.8' idna==3.10 munch==4.0.0 mypy-extensions==1.0.0 packaging===24.0; python_version == '3.7' -packaging==24.1; python_version >= '3.8' +packaging==24.2; python_version >= '3.8' pandas===1.3.5; python_version == '3.7' pandas===2.0.3; python_version == '3.8' pandas==2.2.3; python_version >= '3.9' -proto-plus==1.24.0 +proto-plus==1.25.0 pyarrow==12.0.1; python_version == '3.7' -pyarrow==17.0.0; python_version >= '3.8' +pyarrow==17.0.0; python_version == '3.8' +pyarrow==18.0.0; python_version >= '3.9' pyasn1===0.5.1; python_version == '3.7' pyasn1==0.6.1; python_version >= '3.8' pyasn1-modules===0.3.0; python_version == '3.7' pyasn1-modules==0.4.1; python_version >= '3.8' pycparser===2.21; python_version == '3.7' pycparser==2.22; python_version >= '3.8' -pyparsing==3.1.4 +pyparsing===3.1.4; python_version < '3.9' +pyparsing==3.2.0; python_version >= '3.9' python-dateutil==2.9.0.post0 pytz==2024.2 PyYAML===6.0.1; python_version == '3.7' diff --git a/packages/google-cloud-bigquery/samples/magics/requirements.txt b/packages/google-cloud-bigquery/samples/magics/requirements.txt index 6386fb6d2462..543d9a5121ec 100644 --- a/packages/google-cloud-bigquery/samples/magics/requirements.txt +++ b/packages/google-cloud-bigquery/samples/magics/requirements.txt @@ -1,7 +1,7 @@ bigquery_magics==0.4.0 -db-dtypes==1.3.0 -google.cloud.bigquery==3.26.0 -google-cloud-bigquery-storage==2.26.0 +db-dtypes==1.3.1 +google.cloud.bigquery==3.27.0 +google-cloud-bigquery-storage==2.27.0 ipython===7.31.1; python_version == '3.7' ipython===8.0.1; python_version == '3.8' ipython===8.18.1; python_version >= '3.9' diff --git a/packages/google-cloud-bigquery/samples/notebooks/requirements.txt b/packages/google-cloud-bigquery/samples/notebooks/requirements.txt index 7463e1afced6..ca8a0a13e9ab 100644 --- a/packages/google-cloud-bigquery/samples/notebooks/requirements.txt +++ b/packages/google-cloud-bigquery/samples/notebooks/requirements.txt @@ -1,7 +1,7 @@ bigquery-magics==0.4.0 -db-dtypes==1.3.0 -google-cloud-bigquery==3.26.0 -google-cloud-bigquery-storage==2.26.0 +db-dtypes==1.3.1 +google-cloud-bigquery==3.27.0 +google-cloud-bigquery-storage==2.27.0 ipython===7.31.1; python_version == '3.7' ipython===8.0.1; python_version == '3.8' ipython===8.18.1; python_version >= '3.9' diff --git a/packages/google-cloud-bigquery/samples/snippets/requirements.txt b/packages/google-cloud-bigquery/samples/snippets/requirements.txt index 65ce0be9fa50..307ebac24fb0 100644 --- a/packages/google-cloud-bigquery/samples/snippets/requirements.txt +++ b/packages/google-cloud-bigquery/samples/snippets/requirements.txt @@ -1,2 +1,2 @@ # samples/snippets should be runnable with no "extras" -google-cloud-bigquery==3.26.0 +google-cloud-bigquery==3.27.0 From 65298d70eb8f5ad265c6919b1877178f168b4f75 Mon Sep 17 00:00:00 2001 From: Mend Renovate Date: Fri, 15 Nov 2024 22:01:53 +0100 Subject: [PATCH 1860/2016] chore(deps): update all dependencies (#2068) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * chore(deps): update all dependencies * 🦉 Updates from OwlBot post-processor See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md * pin pyarrow --------- Co-authored-by: Owl Bot Co-authored-by: Anthonios Partheniou --- .../google-cloud-bigquery/samples/geography/requirements.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/packages/google-cloud-bigquery/samples/geography/requirements.txt b/packages/google-cloud-bigquery/samples/geography/requirements.txt index 42f2b5a86c5e..d08bad2585e6 100644 --- a/packages/google-cloud-bigquery/samples/geography/requirements.txt +++ b/packages/google-cloud-bigquery/samples/geography/requirements.txt @@ -34,8 +34,8 @@ pandas===1.3.5; python_version == '3.7' pandas===2.0.3; python_version == '3.8' pandas==2.2.3; python_version >= '3.9' proto-plus==1.25.0 -pyarrow==12.0.1; python_version == '3.7' -pyarrow==17.0.0; python_version == '3.8' +pyarrow===12.0.1; python_version == '3.7' +pyarrow===17.0.0; python_version == '3.8' pyarrow==18.0.0; python_version >= '3.9' pyasn1===0.5.1; python_version == '3.7' pyasn1==0.6.1; python_version >= '3.8' From 9d82724112d01289fe07b101615af9712ff912a6 Mon Sep 17 00:00:00 2001 From: Mend Renovate Date: Mon, 18 Nov 2024 17:28:26 +0100 Subject: [PATCH 1861/2016] chore(deps): update all dependencies (#2070) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * chore(deps): update all dependencies * 🦉 Updates from OwlBot post-processor See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md --------- Co-authored-by: Owl Bot --- .../google-cloud-bigquery/samples/geography/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/samples/geography/requirements.txt b/packages/google-cloud-bigquery/samples/geography/requirements.txt index d08bad2585e6..438018f88cf4 100644 --- a/packages/google-cloud-bigquery/samples/geography/requirements.txt +++ b/packages/google-cloud-bigquery/samples/geography/requirements.txt @@ -24,7 +24,7 @@ google-crc32c==1.6.0; python_version >= '3.9' google-resumable-media==2.7.2 googleapis-common-protos==1.66.0 grpcio===1.62.2; python_version == '3.7' -grpcio==1.67.1; python_version >= '3.8' +grpcio==1.68.0; python_version >= '3.8' idna==3.10 munch==4.0.0 mypy-extensions==1.0.0 From b59cbb8b7f742afd541f35f177c9e3381f72f641 Mon Sep 17 00:00:00 2001 From: Kien Truong Date: Sat, 23 Nov 2024 04:35:54 +0700 Subject: [PATCH 1862/2016] feat: support setting max_stream_count when fetching query result (#2051) * feat: support setting max_stream_count when fetching query result Allow user to set max_stream_count when fetching result using BigQuery Storage API with RowIterator's incremental methods: * to_arrow_iterable * to_dataframe_iterable * docs: update docs about max_stream_count for ordered query * fix: add max_stream_count params to _EmptyRowIterator's methods * test: add tests for RowIterator's max_stream_count parameter * docs: add notes on valid max_stream_count range in docstring * use a different way to iterate result --------- Co-authored-by: Lingqing Gan --- .../google/cloud/bigquery/table.py | 44 ++++++++++++ .../tests/unit/test_table.py | 70 +++++++++++++++++++ 2 files changed, 114 insertions(+) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py index faf827be4d3a..dcaf377e3f43 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py @@ -1812,6 +1812,7 @@ def to_arrow_iterable( self, bqstorage_client: Optional["bigquery_storage.BigQueryReadClient"] = None, max_queue_size: int = _pandas_helpers._MAX_QUEUE_SIZE_DEFAULT, # type: ignore + max_stream_count: Optional[int] = None, ) -> Iterator["pyarrow.RecordBatch"]: """[Beta] Create an iterable of class:`pyarrow.RecordBatch`, to process the table as a stream. @@ -1836,6 +1837,22 @@ def to_arrow_iterable( created by the server. If ``max_queue_size`` is :data:`None`, the queue size is infinite. + max_stream_count (Optional[int]): + The maximum number of parallel download streams when + using BigQuery Storage API. Ignored if + BigQuery Storage API is not used. + + This setting also has no effect if the query result + is deterministically ordered with ORDER BY, + in which case, the number of download stream is always 1. + + If set to 0 or None (the default), the number of download + streams is determined by BigQuery the server. However, this behaviour + can require a lot of memory to store temporary download result, + especially with very large queries. In that case, + setting this parameter value to a value > 0 can help + reduce system resource consumption. + Returns: pyarrow.RecordBatch: A generator of :class:`~pyarrow.RecordBatch`. @@ -1852,6 +1869,7 @@ def to_arrow_iterable( preserve_order=self._preserve_order, selected_fields=self._selected_fields, max_queue_size=max_queue_size, + max_stream_count=max_stream_count, ) tabledata_list_download = functools.partial( _pandas_helpers.download_arrow_row_iterator, iter(self.pages), self.schema @@ -1978,6 +1996,7 @@ def to_dataframe_iterable( bqstorage_client: Optional["bigquery_storage.BigQueryReadClient"] = None, dtypes: Optional[Dict[str, Any]] = None, max_queue_size: int = _pandas_helpers._MAX_QUEUE_SIZE_DEFAULT, # type: ignore + max_stream_count: Optional[int] = None, ) -> "pandas.DataFrame": """Create an iterable of pandas DataFrames, to process the table as a stream. @@ -2008,6 +2027,22 @@ def to_dataframe_iterable( .. versionadded:: 2.14.0 + max_stream_count (Optional[int]): + The maximum number of parallel download streams when + using BigQuery Storage API. Ignored if + BigQuery Storage API is not used. + + This setting also has no effect if the query result + is deterministically ordered with ORDER BY, + in which case, the number of download stream is always 1. + + If set to 0 or None (the default), the number of download + streams is determined by BigQuery the server. However, this behaviour + can require a lot of memory to store temporary download result, + especially with very large queries. In that case, + setting this parameter value to a value > 0 can help + reduce system resource consumption. + Returns: pandas.DataFrame: A generator of :class:`~pandas.DataFrame`. @@ -2034,6 +2069,7 @@ def to_dataframe_iterable( preserve_order=self._preserve_order, selected_fields=self._selected_fields, max_queue_size=max_queue_size, + max_stream_count=max_stream_count, ) tabledata_list_download = functools.partial( _pandas_helpers.download_dataframe_row_iterator, @@ -2690,6 +2726,7 @@ def to_dataframe_iterable( bqstorage_client: Optional["bigquery_storage.BigQueryReadClient"] = None, dtypes: Optional[Dict[str, Any]] = None, max_queue_size: Optional[int] = None, + max_stream_count: Optional[int] = None, ) -> Iterator["pandas.DataFrame"]: """Create an iterable of pandas DataFrames, to process the table as a stream. @@ -2705,6 +2742,9 @@ def to_dataframe_iterable( max_queue_size: Ignored. Added for compatibility with RowIterator. + max_stream_count: + Ignored. Added for compatibility with RowIterator. + Returns: An iterator yielding a single empty :class:`~pandas.DataFrame`. @@ -2719,6 +2759,7 @@ def to_arrow_iterable( self, bqstorage_client: Optional["bigquery_storage.BigQueryReadClient"] = None, max_queue_size: Optional[int] = None, + max_stream_count: Optional[int] = None, ) -> Iterator["pyarrow.RecordBatch"]: """Create an iterable of pandas DataFrames, to process the table as a stream. @@ -2731,6 +2772,9 @@ def to_arrow_iterable( max_queue_size: Ignored. Added for compatibility with RowIterator. + max_stream_count: + Ignored. Added for compatibility with RowIterator. + Returns: An iterator yielding a single empty :class:`~pyarrow.RecordBatch`. """ diff --git a/packages/google-cloud-bigquery/tests/unit/test_table.py b/packages/google-cloud-bigquery/tests/unit/test_table.py index 018a096df153..d81ad2dca368 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_table.py +++ b/packages/google-cloud-bigquery/tests/unit/test_table.py @@ -5822,3 +5822,73 @@ def test_table_reference_to_bqstorage_v1_stable(table_path): for klass in (mut.TableReference, mut.Table, mut.TableListItem): got = klass.from_string(table_path).to_bqstorage() assert got == expected + + +@pytest.mark.parametrize("preserve_order", [True, False]) +def test_to_arrow_iterable_w_bqstorage_max_stream_count(preserve_order): + pytest.importorskip("pandas") + pytest.importorskip("google.cloud.bigquery_storage") + from google.cloud.bigquery import schema + from google.cloud.bigquery import table as mut + from google.cloud import bigquery_storage + + bqstorage_client = mock.create_autospec(bigquery_storage.BigQueryReadClient) + session = bigquery_storage.types.ReadSession() + bqstorage_client.create_read_session.return_value = session + + row_iterator = mut.RowIterator( + _mock_client(), + api_request=None, + path=None, + schema=[ + schema.SchemaField("colA", "INTEGER"), + ], + table=mut.TableReference.from_string("proj.dset.tbl"), + ) + row_iterator._preserve_order = preserve_order + + max_stream_count = 132 + result_iterable = row_iterator.to_arrow_iterable( + bqstorage_client=bqstorage_client, max_stream_count=max_stream_count + ) + list(result_iterable) + bqstorage_client.create_read_session.assert_called_once_with( + parent=mock.ANY, + read_session=mock.ANY, + max_stream_count=max_stream_count if not preserve_order else 1, + ) + + +@pytest.mark.parametrize("preserve_order", [True, False]) +def test_to_dataframe_iterable_w_bqstorage_max_stream_count(preserve_order): + pytest.importorskip("pandas") + pytest.importorskip("google.cloud.bigquery_storage") + from google.cloud.bigquery import schema + from google.cloud.bigquery import table as mut + from google.cloud import bigquery_storage + + bqstorage_client = mock.create_autospec(bigquery_storage.BigQueryReadClient) + session = bigquery_storage.types.ReadSession() + bqstorage_client.create_read_session.return_value = session + + row_iterator = mut.RowIterator( + _mock_client(), + api_request=None, + path=None, + schema=[ + schema.SchemaField("colA", "INTEGER"), + ], + table=mut.TableReference.from_string("proj.dset.tbl"), + ) + row_iterator._preserve_order = preserve_order + + max_stream_count = 132 + result_iterable = row_iterator.to_dataframe_iterable( + bqstorage_client=bqstorage_client, max_stream_count=max_stream_count + ) + list(result_iterable) + bqstorage_client.create_read_session.assert_called_once_with( + parent=mock.ANY, + read_session=mock.ANY, + max_stream_count=max_stream_count if not preserve_order else 1, + ) From 1289d4413d6be522ca72c822a607e2595732c413 Mon Sep 17 00:00:00 2001 From: Mend Renovate Date: Tue, 26 Nov 2024 19:16:56 +0100 Subject: [PATCH 1863/2016] chore(deps): update all dependencies (#2078) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * chore(deps): update all dependencies * 🦉 Updates from OwlBot post-processor See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md --------- Co-authored-by: Owl Bot --- .../google-cloud-bigquery/samples/geography/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/samples/geography/requirements.txt b/packages/google-cloud-bigquery/samples/geography/requirements.txt index 438018f88cf4..edf5a24b67c2 100644 --- a/packages/google-cloud-bigquery/samples/geography/requirements.txt +++ b/packages/google-cloud-bigquery/samples/geography/requirements.txt @@ -36,7 +36,7 @@ pandas==2.2.3; python_version >= '3.9' proto-plus==1.25.0 pyarrow===12.0.1; python_version == '3.7' pyarrow===17.0.0; python_version == '3.8' -pyarrow==18.0.0; python_version >= '3.9' +pyarrow==18.1.0; python_version >= '3.9' pyasn1===0.5.1; python_version == '3.7' pyasn1==0.6.1; python_version >= '3.8' pyasn1-modules===0.3.0; python_version == '3.7' From 7a3f01969783cce8cf8501e6cde6910f30264823 Mon Sep 17 00:00:00 2001 From: Yu Ishikawa Date: Wed, 4 Dec 2024 22:24:15 +0900 Subject: [PATCH 1864/2016] feat: add property for `allowNonIncrementalDefinition` for materialized view (#2084) * feat: property for `allowNonIncrementalDefinition` materialized view Signed-off-by: Yu Ishikawa format Signed-off-by: Yu Ishikawa * Update tests/unit/test_table.py Co-authored-by: Chalmer Lowe * Update google/cloud/bigquery/table.py Co-authored-by: Chalmer Lowe --------- Signed-off-by: Yu Ishikawa Co-authored-by: Chalmer Lowe --- .../google/cloud/bigquery/table.py | 23 +++++++++++++++++++ .../tests/unit/test_table.py | 10 ++++++++ 2 files changed, 33 insertions(+) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py index dcaf377e3f43..38542023b908 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py @@ -390,6 +390,7 @@ class Table(_TableBase): "mview_last_refresh_time": ["materializedView", "lastRefreshTime"], "mview_query": "materializedView", "mview_refresh_interval": "materializedView", + "mview_allow_non_incremental_definition": "materializedView", "num_bytes": "numBytes", "num_rows": "numRows", "partition_expiration": "timePartitioning", @@ -928,6 +929,28 @@ def mview_refresh_interval(self, value): refresh_interval_ms, ) + @property + def mview_allow_non_incremental_definition(self): + """Optional[bool]: This option declares the intention to construct a + materialized view that isn't refreshed incrementally. + The default value is :data:`False`. + """ + api_field = self._PROPERTY_TO_API_FIELD[ + "mview_allow_non_incremental_definition" + ] + return _helpers._get_sub_prop( + self._properties, [api_field, "allowNonIncrementalDefinition"] + ) + + @mview_allow_non_incremental_definition.setter + def mview_allow_non_incremental_definition(self, value): + api_field = self._PROPERTY_TO_API_FIELD[ + "mview_allow_non_incremental_definition" + ] + _helpers._set_sub_prop( + self._properties, [api_field, "allowNonIncrementalDefinition"], value + ) + @property def streaming_buffer(self): """google.cloud.bigquery.StreamingBuffer: Information about a table's diff --git a/packages/google-cloud-bigquery/tests/unit/test_table.py b/packages/google-cloud-bigquery/tests/unit/test_table.py index d81ad2dca368..ff0593470fa7 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_table.py +++ b/packages/google-cloud-bigquery/tests/unit/test_table.py @@ -1050,6 +1050,16 @@ def test_mview_refresh_interval(self): table.mview_refresh_interval = None self.assertIsNone(table.mview_refresh_interval) + def test_mview_allow_non_incremental_definition(self): + table = self._make_one() + self.assertIsNone(table.mview_allow_non_incremental_definition) + table.mview_allow_non_incremental_definition = True + self.assertTrue(table.mview_allow_non_incremental_definition) + table.mview_allow_non_incremental_definition = False + self.assertFalse(table.mview_allow_non_incremental_definition) + table.mview_allow_non_incremental_definition = None + self.assertIsNone(table.mview_allow_non_incremental_definition) + def test_from_string(self): cls = self._get_target_class() got = cls.from_string("string-project.string_dataset.string_table") From 639430f50d0ceb3e9812bacae9a12d22f4a1c5fc Mon Sep 17 00:00:00 2001 From: Yu Ishikawa Date: Sat, 7 Dec 2024 05:56:34 +0900 Subject: [PATCH 1865/2016] feat: add property for maxStaleness in table definitions (#2087) * feat: add property for maxStaleness in table definitions Signed-off-by: Yu Ishikawa * Update google/cloud/bigquery/table.py --------- Signed-off-by: Yu Ishikawa Co-authored-by: Lingqing Gan --- .../google/cloud/bigquery/table.py | 35 +++++++++++++++ .../tests/unit/test_table.py | 43 +++++++++++++++++++ 2 files changed, 78 insertions(+) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py index 38542023b908..80ab330ba7af 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py @@ -407,6 +407,7 @@ class Table(_TableBase): "view_query": "view", "require_partition_filter": "requirePartitionFilter", "table_constraints": "tableConstraints", + "max_staleness": "maxStaleness", } def __init__(self, table_ref, schema=None) -> None: @@ -1115,6 +1116,40 @@ def __repr__(self): def __str__(self): return f"{self.project}.{self.dataset_id}.{self.table_id}" + @property + def max_staleness(self): + """Union[str, None]: The maximum staleness of data that could be returned when the table is queried. + + Staleness encoded as a string encoding of sql IntervalValue type. + This property is optional and defaults to None. + + According to the BigQuery API documentation, maxStaleness specifies the maximum time + interval for which stale data can be returned when querying the table. + It helps control data freshness in scenarios like metadata-cached external tables. + + Returns: + Optional[str]: A string representing the maximum staleness interval + (e.g., '1h', '30m', '15s' for hours, minutes, seconds respectively). + """ + return self._properties.get(self._PROPERTY_TO_API_FIELD["max_staleness"]) + + @max_staleness.setter + def max_staleness(self, value): + """Set the maximum staleness for the table. + + Args: + value (Optional[str]): A string representing the maximum staleness interval. + Must be a valid time interval string. + Examples include '1h' (1 hour), '30m' (30 minutes), '15s' (15 seconds). + + Raises: + ValueError: If the value is not None and not a string. + """ + if value is not None and not isinstance(value, str): + raise ValueError("max_staleness must be a string or None") + + self._properties[self._PROPERTY_TO_API_FIELD["max_staleness"]] = value + class TableListItem(_TableBase): """A read-only table resource from a list operation. diff --git a/packages/google-cloud-bigquery/tests/unit/test_table.py b/packages/google-cloud-bigquery/tests/unit/test_table.py index ff0593470fa7..3824da2266b2 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_table.py +++ b/packages/google-cloud-bigquery/tests/unit/test_table.py @@ -1475,6 +1475,49 @@ def test___str__(self): table1 = self._make_one(TableReference(dataset, "table1")) self.assertEqual(str(table1), "project1.dataset1.table1") + def test_max_staleness_getter(self): + """Test getting max_staleness property.""" + dataset = DatasetReference("test-project", "test_dataset") + table_ref = dataset.table("test_table") + table = self._make_one(table_ref) + # Initially None + self.assertIsNone(table.max_staleness) + # Set max_staleness using setter + table.max_staleness = "1h" + self.assertEqual(table.max_staleness, "1h") + + def test_max_staleness_setter(self): + """Test setting max_staleness property.""" + dataset = DatasetReference("test-project", "test_dataset") + table_ref = dataset.table("test_table") + table = self._make_one(table_ref) + # Set valid max_staleness + table.max_staleness = "30m" + self.assertEqual(table.max_staleness, "30m") + # Set to None + table.max_staleness = None + self.assertIsNone(table.max_staleness) + + def test_max_staleness_setter_invalid_type(self): + """Test setting max_staleness with an invalid type raises ValueError.""" + dataset = DatasetReference("test-project", "test_dataset") + table_ref = dataset.table("test_table") + table = self._make_one(table_ref) + # Try setting invalid type + with self.assertRaises(ValueError): + table.max_staleness = 123 # Not a string + + def test_max_staleness_to_api_repr(self): + """Test max_staleness is correctly represented in API representation.""" + dataset = DatasetReference("test-project", "test_dataset") + table_ref = dataset.table("test_table") + table = self._make_one(table_ref) + # Set max_staleness + table.max_staleness = "1h" + # Convert to API representation + resource = table.to_api_repr() + self.assertEqual(resource.get("maxStaleness"), "1h") + class Test_row_from_mapping(unittest.TestCase, _SchemaBase): PROJECT = "prahj-ekt" From 23b477b6ffce3e9ac2d7ac31dd96548a8b81b913 Mon Sep 17 00:00:00 2001 From: Rin Arakaki Date: Wed, 11 Dec 2024 03:15:11 +0900 Subject: [PATCH 1866/2016] feat: add type hints to Client (#2044) * add type hints * Update client.py Moves import from being used solely during specific checks to being more universally available. * Update google/cloud/bigquery/client.py * Update client.py testing some minor changes to deal with mypy quirks * Update google/cloud/bigquery/client.py --------- Co-authored-by: Chalmer Lowe --- .../google/cloud/bigquery/client.py | 36 +++++++++++-------- 1 file changed, 21 insertions(+), 15 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py index 97f239f7a9e0..03ded93b1b8e 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py @@ -44,6 +44,8 @@ import uuid import warnings +import requests + from google import resumable_media # type: ignore from google.resumable_media.requests import MultipartUpload # type: ignore from google.resumable_media.requests import ResumableUpload @@ -65,6 +67,7 @@ DEFAULT_BQSTORAGE_CLIENT_INFO = None # type: ignore +from google.auth.credentials import Credentials from google.cloud.bigquery._http import Connection from google.cloud.bigquery import _job_helpers from google.cloud.bigquery import _pandas_helpers @@ -126,6 +129,7 @@ _versions_helpers.PANDAS_VERSIONS.try_import() ) # mypy check fails because pandas import is outside module, there are type: ignore comments related to this + ResumableTimeoutType = Union[ None, float, Tuple[float, float] ] # for resumable media methods @@ -133,8 +137,6 @@ if typing.TYPE_CHECKING: # pragma: NO COVER # os.PathLike is only subscriptable in Python 3.9+, thus shielding with a condition. PathType = Union[str, bytes, os.PathLike[str], os.PathLike[bytes]] - import requests # required by api-core - _DEFAULT_CHUNKSIZE = 100 * 1024 * 1024 # 100 MB _MAX_MULTIPART_SIZE = 5 * 1024 * 1024 _DEFAULT_NUM_RETRIES = 6 @@ -231,15 +233,23 @@ class Client(ClientWithProject): def __init__( self, - project=None, - credentials=None, - _http=None, - location=None, - default_query_job_config=None, - default_load_job_config=None, - client_info=None, - client_options=None, + project: Optional[str] = None, + credentials: Optional[Credentials] = None, + _http: Optional[requests.Session] = None, + location: Optional[str] = None, + default_query_job_config: Optional[QueryJobConfig] = None, + default_load_job_config: Optional[LoadJobConfig] = None, + client_info: Optional[google.api_core.client_info.ClientInfo] = None, + client_options: Optional[ + Union[google.api_core.client_options.ClientOptions, Dict[str, Any]] + ] = None, ) -> None: + if client_options is None: + client_options = {} + if isinstance(client_options, dict): + client_options = google.api_core.client_options.from_dict(client_options) + # assert isinstance(client_options, google.api_core.client_options.ClientOptions) + super(Client, self).__init__( project=project, credentials=credentials, @@ -247,14 +257,10 @@ def __init__( _http=_http, ) - kw_args = {"client_info": client_info} + kw_args: Dict[str, Any] = {"client_info": client_info} bq_host = _get_bigquery_host() kw_args["api_endpoint"] = bq_host if bq_host != _DEFAULT_HOST else None client_universe = None - if client_options is None: - client_options = {} - if isinstance(client_options, dict): - client_options = google.api_core.client_options.from_dict(client_options) if client_options.api_endpoint: api_endpoint = client_options.api_endpoint kw_args["api_endpoint"] = api_endpoint From a458c9fc6b01b0c220a26e9b0fe8a17dd64a2b58 Mon Sep 17 00:00:00 2001 From: "gcf-owl-bot[bot]" <78513119+gcf-owl-bot[bot]@users.noreply.github.com> Date: Wed, 18 Dec 2024 02:40:17 +0800 Subject: [PATCH 1867/2016] chore(python): update dependencies in .kokoro/docker/docs (#2088) Source-Link: https://github.com/googleapis/synthtool/commit/e808c98e1ab7eec3df2a95a05331619f7001daef Post-Processor: gcr.io/cloud-devrel-public-resources/owlbot-python:latest@sha256:8e3e7e18255c22d1489258d0374c901c01f9c4fd77a12088670cd73d580aa737 Co-authored-by: Owl Bot --- .../.github/.OwlBot.lock.yaml | 4 +- .../.kokoro/docker/docs/requirements.txt | 52 +++++++++++++++---- 2 files changed, 43 insertions(+), 13 deletions(-) diff --git a/packages/google-cloud-bigquery/.github/.OwlBot.lock.yaml b/packages/google-cloud-bigquery/.github/.OwlBot.lock.yaml index 6301519a9a05..26306af66f81 100644 --- a/packages/google-cloud-bigquery/.github/.OwlBot.lock.yaml +++ b/packages/google-cloud-bigquery/.github/.OwlBot.lock.yaml @@ -13,5 +13,5 @@ # limitations under the License. docker: image: gcr.io/cloud-devrel-public-resources/owlbot-python:latest - digest: sha256:2ed982f884312e4883e01b5ab8af8b6935f0216a5a2d82928d273081fc3be562 -# created: 2024-11-12T12:09:45.821174897Z + digest: sha256:8e3e7e18255c22d1489258d0374c901c01f9c4fd77a12088670cd73d580aa737 +# created: 2024-12-17T00:59:58.625514486Z diff --git a/packages/google-cloud-bigquery/.kokoro/docker/docs/requirements.txt b/packages/google-cloud-bigquery/.kokoro/docker/docs/requirements.txt index 8bb0764594b1..f99a5c4aac7f 100644 --- a/packages/google-cloud-bigquery/.kokoro/docker/docs/requirements.txt +++ b/packages/google-cloud-bigquery/.kokoro/docker/docs/requirements.txt @@ -2,11 +2,11 @@ # This file is autogenerated by pip-compile with Python 3.10 # by the following command: # -# pip-compile --allow-unsafe --generate-hashes requirements.in +# pip-compile --allow-unsafe --generate-hashes synthtool/gcp/templates/python_library/.kokoro/docker/docs/requirements.in # -argcomplete==3.5.1 \ - --hash=sha256:1a1d148bdaa3e3b93454900163403df41448a248af01b6e849edc5ac08e6c363 \ - --hash=sha256:eb1ee355aa2557bd3d0145de7b06b2a45b0ce461e1e7813f5d066039ab4177b4 +argcomplete==3.5.2 \ + --hash=sha256:036d020d79048a5d525bc63880d7a4b8d1668566b8a76daf1144c0bbe0f63472 \ + --hash=sha256:23146ed7ac4403b70bd6026402468942ceba34a6732255b9edf5b7354f68a6bb # via nox colorlog==6.9.0 \ --hash=sha256:5906e71acd67cb07a71e779c47c4bcb45fb8c2993eebe9e5adcd6a6f1b283eff \ @@ -23,7 +23,7 @@ filelock==3.16.1 \ nox==2024.10.9 \ --hash=sha256:1d36f309a0a2a853e9bccb76bbef6bb118ba92fa92674d15604ca99adeb29eab \ --hash=sha256:7aa9dc8d1c27e9f45ab046ffd1c3b2c4f7c91755304769df231308849ebded95 - # via -r requirements.in + # via -r synthtool/gcp/templates/python_library/.kokoro/docker/docs/requirements.in packaging==24.2 \ --hash=sha256:09abb1bccd265c01f4a3aa3f7a7db064b36514d2cba19a2f694fe6150451a759 \ --hash=sha256:c228a6dc5e932d346bc5739379109d49e8853dd8223571c7c5b55260edc0b97f @@ -32,11 +32,41 @@ platformdirs==4.3.6 \ --hash=sha256:357fb2acbc885b0419afd3ce3ed34564c13c9b95c89360cd9563f73aa5e2b907 \ --hash=sha256:73e575e1408ab8103900836b97580d5307456908a03e92031bab39e4554cc3fb # via virtualenv -tomli==2.0.2 \ - --hash=sha256:2ebe24485c53d303f690b0ec092806a085f07af5a5aa1464f3931eec36caaa38 \ - --hash=sha256:d46d457a85337051c36524bc5349dd91b1877838e2979ac5ced3e710ed8a60ed +tomli==2.2.1 \ + --hash=sha256:023aa114dd824ade0100497eb2318602af309e5a55595f76b626d6d9f3b7b0a6 \ + --hash=sha256:02abe224de6ae62c19f090f68da4e27b10af2b93213d36cf44e6e1c5abd19fdd \ + --hash=sha256:286f0ca2ffeeb5b9bd4fcc8d6c330534323ec51b2f52da063b11c502da16f30c \ + --hash=sha256:2d0f2fdd22b02c6d81637a3c95f8cd77f995846af7414c5c4b8d0545afa1bc4b \ + --hash=sha256:33580bccab0338d00994d7f16f4c4ec25b776af3ffaac1ed74e0b3fc95e885a8 \ + --hash=sha256:400e720fe168c0f8521520190686ef8ef033fb19fc493da09779e592861b78c6 \ + --hash=sha256:40741994320b232529c802f8bc86da4e1aa9f413db394617b9a256ae0f9a7f77 \ + --hash=sha256:465af0e0875402f1d226519c9904f37254b3045fc5084697cefb9bdde1ff99ff \ + --hash=sha256:4a8f6e44de52d5e6c657c9fe83b562f5f4256d8ebbfe4ff922c495620a7f6cea \ + --hash=sha256:4e340144ad7ae1533cb897d406382b4b6fede8890a03738ff1683af800d54192 \ + --hash=sha256:678e4fa69e4575eb77d103de3df8a895e1591b48e740211bd1067378c69e8249 \ + --hash=sha256:6972ca9c9cc9f0acaa56a8ca1ff51e7af152a9f87fb64623e31d5c83700080ee \ + --hash=sha256:7fc04e92e1d624a4a63c76474610238576942d6b8950a2d7f908a340494e67e4 \ + --hash=sha256:889f80ef92701b9dbb224e49ec87c645ce5df3fa2cc548664eb8a25e03127a98 \ + --hash=sha256:8d57ca8095a641b8237d5b079147646153d22552f1c637fd3ba7f4b0b29167a8 \ + --hash=sha256:8dd28b3e155b80f4d54beb40a441d366adcfe740969820caf156c019fb5c7ec4 \ + --hash=sha256:9316dc65bed1684c9a98ee68759ceaed29d229e985297003e494aa825ebb0281 \ + --hash=sha256:a198f10c4d1b1375d7687bc25294306e551bf1abfa4eace6650070a5c1ae2744 \ + --hash=sha256:a38aa0308e754b0e3c67e344754dff64999ff9b513e691d0e786265c93583c69 \ + --hash=sha256:a92ef1a44547e894e2a17d24e7557a5e85a9e1d0048b0b5e7541f76c5032cb13 \ + --hash=sha256:ac065718db92ca818f8d6141b5f66369833d4a80a9d74435a268c52bdfa73140 \ + --hash=sha256:b82ebccc8c8a36f2094e969560a1b836758481f3dc360ce9a3277c65f374285e \ + --hash=sha256:c954d2250168d28797dd4e3ac5cf812a406cd5a92674ee4c8f123c889786aa8e \ + --hash=sha256:cb55c73c5f4408779d0cf3eef9f762b9c9f147a77de7b258bef0a5628adc85cc \ + --hash=sha256:cd45e1dc79c835ce60f7404ec8119f2eb06d38b1deba146f07ced3bbc44505ff \ + --hash=sha256:d3f5614314d758649ab2ab3a62d4f2004c825922f9e370b29416484086b264ec \ + --hash=sha256:d920f33822747519673ee656a4b6ac33e382eca9d331c87770faa3eef562aeb2 \ + --hash=sha256:db2b95f9de79181805df90bedc5a5ab4c165e6ec3fe99f970d0e302f384ad222 \ + --hash=sha256:e59e304978767a54663af13c07b3d1af22ddee3bb2fb0618ca1593e4f593a106 \ + --hash=sha256:e85e99945e688e32d5a35c1ff38ed0b3f41f43fad8df0bdf79f72b2ba7bc5272 \ + --hash=sha256:ece47d672db52ac607a3d9599a9d48dcb2f2f735c6c2d1f34130085bb12b112a \ + --hash=sha256:f4039b9cbc3048b2416cc57ab3bda989a6fcf9b36cf8937f01a6e731b64f80d7 # via nox -virtualenv==20.27.1 \ - --hash=sha256:142c6be10212543b32c6c45d3d3893dff89112cc588b7d0879ae5a1ec03a47ba \ - --hash=sha256:f11f1b8a29525562925f745563bfd48b189450f61fb34c4f9cc79dd5aa32a1f4 +virtualenv==20.28.0 \ + --hash=sha256:23eae1b4516ecd610481eda647f3a7c09aea295055337331bb4e6892ecce47b0 \ + --hash=sha256:2c9c3262bb8e7b87ea801d715fae4495e6032450c71d2309be9550e7364049aa # via nox From d8ce8c1df785b08106d0a080636001d9c0a06819 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Thu, 26 Dec 2024 08:58:58 -0800 Subject: [PATCH 1868/2016] chore(deps): bump jinja2 from 3.1.4 to 3.1.5 in /.kokoro (#2094) Bumps [jinja2](https://github.com/pallets/jinja) from 3.1.4 to 3.1.5. - [Release notes](https://github.com/pallets/jinja/releases) - [Changelog](https://github.com/pallets/jinja/blob/main/CHANGES.rst) - [Commits](https://github.com/pallets/jinja/compare/3.1.4...3.1.5) --- updated-dependencies: - dependency-name: jinja2 dependency-type: indirect ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- packages/google-cloud-bigquery/.kokoro/requirements.txt | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/packages/google-cloud-bigquery/.kokoro/requirements.txt b/packages/google-cloud-bigquery/.kokoro/requirements.txt index 006d8ef931bf..16db448c16bf 100644 --- a/packages/google-cloud-bigquery/.kokoro/requirements.txt +++ b/packages/google-cloud-bigquery/.kokoro/requirements.txt @@ -254,9 +254,9 @@ jeepney==0.8.0 \ # via # keyring # secretstorage -jinja2==3.1.4 \ - --hash=sha256:4a3aee7acbbe7303aede8e9648d13b8bf88a429282aa6122a993f0ac800cb369 \ - --hash=sha256:bc5dd2abb727a5319567b7a813e6a2e7318c39f4f487cfe6c89c6f9c7d25197d +jinja2==3.1.5 \ + --hash=sha256:8fefff8dc3034e27bb80d67c671eb8a9bc424c0ef4c0826edbff304cceff43bb \ + --hash=sha256:aba0f4dc9ed8013c424088f68a5c226f7d6097ed89b246d7749c2ec4175c6adb # via gcp-releasetool keyring==25.4.1 \ --hash=sha256:5426f817cf7f6f007ba5ec722b1bcad95a75b27d780343772ad76b17cb47b0bf \ From 6847728fefe2013778a905c2446f464ca4eabc00 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tim=20Swe=C3=B1a=20=28Swast=29?= Date: Fri, 27 Dec 2024 16:55:01 -0600 Subject: [PATCH 1869/2016] feat: preserve unknown fields from the REST API representation in `SchemaField` (#2097) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * feat: preserve unknown fields from the REST API representaton in `SchemaField` * 🦉 Updates from OwlBot post-processor See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md * remove unnecessary variable * remove unused private method * fix pytype --------- Co-authored-by: Owl Bot --- .../google/cloud/bigquery/schema.py | 82 ++++++------------- .../tests/unit/job/test_load_config.py | 29 +++++-- .../tests/unit/test_schema.py | 37 +++++++-- .../tests/unit/test_table.py | 32 ++++++-- 4 files changed, 105 insertions(+), 75 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/schema.py b/packages/google-cloud-bigquery/google/cloud/bigquery/schema.py index f5b03cbef400..b062396cf7bd 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/schema.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/schema.py @@ -16,8 +16,9 @@ import collections import enum -from typing import Any, Dict, Iterable, Optional, Union, cast +from typing import Any, cast, Dict, Iterable, Optional, Union +from google.cloud.bigquery import _helpers from google.cloud.bigquery import standard_sql from google.cloud.bigquery.enums import StandardSqlTypeNames @@ -203,15 +204,8 @@ def __init__( self._properties["rangeElementType"] = {"type": range_element_type} if isinstance(range_element_type, FieldElementType): self._properties["rangeElementType"] = range_element_type.to_api_repr() - - self._fields = tuple(fields) - - @staticmethod - def __get_int(api_repr, name): - v = api_repr.get(name, _DEFAULT_VALUE) - if v is not _DEFAULT_VALUE: - v = int(v) - return v + if fields: # Don't set the property if it's not set. + self._properties["fields"] = [field.to_api_repr() for field in fields] @classmethod def from_api_repr(cls, api_repr: dict) -> "SchemaField": @@ -225,43 +219,19 @@ def from_api_repr(cls, api_repr: dict) -> "SchemaField": Returns: google.cloud.bigquery.schema.SchemaField: The ``SchemaField`` object. """ - field_type = api_repr["type"].upper() - - # Handle optional properties with default values - mode = api_repr.get("mode", "NULLABLE") - description = api_repr.get("description", _DEFAULT_VALUE) - fields = api_repr.get("fields", ()) - policy_tags = api_repr.get("policyTags", _DEFAULT_VALUE) + placeholder = cls("this_will_be_replaced", "PLACEHOLDER") - default_value_expression = api_repr.get("defaultValueExpression", None) + # Note: we don't make a copy of api_repr because this can cause + # unnecessary slowdowns, especially on deeply nested STRUCT / RECORD + # fields. See https://github.com/googleapis/python-bigquery/issues/6 + placeholder._properties = api_repr - if policy_tags is not None and policy_tags is not _DEFAULT_VALUE: - policy_tags = PolicyTagList.from_api_repr(policy_tags) - - if api_repr.get("rangeElementType"): - range_element_type = cast(dict, api_repr.get("rangeElementType")) - element_type = range_element_type.get("type") - else: - element_type = None - - return cls( - field_type=field_type, - fields=[cls.from_api_repr(f) for f in fields], - mode=mode.upper(), - default_value_expression=default_value_expression, - description=description, - name=api_repr["name"], - policy_tags=policy_tags, - precision=cls.__get_int(api_repr, "precision"), - scale=cls.__get_int(api_repr, "scale"), - max_length=cls.__get_int(api_repr, "maxLength"), - range_element_type=element_type, - ) + return placeholder @property def name(self): """str: The name of the field.""" - return self._properties["name"] + return self._properties.get("name", "") @property def field_type(self): @@ -270,7 +240,10 @@ def field_type(self): See: https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#TableFieldSchema.FIELDS.type """ - return self._properties["type"] + type_ = self._properties.get("type") + if type_ is None: # Shouldn't happen, but some unit tests do this. + return None + return cast(str, type_).upper() @property def mode(self): @@ -279,7 +252,7 @@ def mode(self): See: https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#TableFieldSchema.FIELDS.mode """ - return self._properties.get("mode") + return cast(str, self._properties.get("mode", "NULLABLE")).upper() @property def is_nullable(self): @@ -299,17 +272,17 @@ def description(self): @property def precision(self): """Optional[int]: Precision (number of digits) for the NUMERIC field.""" - return self._properties.get("precision") + return _helpers._int_or_none(self._properties.get("precision")) @property def scale(self): """Optional[int]: Scale (digits after decimal) for the NUMERIC field.""" - return self._properties.get("scale") + return _helpers._int_or_none(self._properties.get("scale")) @property def max_length(self): """Optional[int]: Maximum length for the STRING or BYTES field.""" - return self._properties.get("maxLength") + return _helpers._int_or_none(self._properties.get("maxLength")) @property def range_element_type(self): @@ -329,7 +302,7 @@ def fields(self): Must be empty unset if ``field_type`` is not 'RECORD'. """ - return self._fields + return tuple(_to_schema_fields(self._properties.get("fields", []))) @property def policy_tags(self): @@ -345,15 +318,10 @@ def to_api_repr(self) -> dict: Returns: Dict: A dictionary representing the SchemaField in a serialized form. """ - answer = self._properties.copy() - - # If this is a RECORD type, then sub-fields are also included, - # add this to the serialized representation. - if self.field_type.upper() in _STRUCT_TYPES: - answer["fields"] = [f.to_api_repr() for f in self.fields] - - # Done; return the serialized dictionary. - return answer + # Note: we don't make a copy of _properties because this can cause + # unnecessary slowdowns, especially on deeply nested STRUCT / RECORD + # fields. See https://github.com/googleapis/python-bigquery/issues/6 + return self._properties def _key(self): """A tuple key that uniquely describes this field. @@ -389,7 +357,7 @@ def _key(self): self.mode.upper(), # pytype: disable=attribute-error self.default_value_expression, self.description, - self._fields, + self.fields, policy_tags, ) diff --git a/packages/google-cloud-bigquery/tests/unit/job/test_load_config.py b/packages/google-cloud-bigquery/tests/unit/job/test_load_config.py index becf3e959b79..3a681c476228 100644 --- a/packages/google-cloud-bigquery/tests/unit/job/test_load_config.py +++ b/packages/google-cloud-bigquery/tests/unit/job/test_load_config.py @@ -12,6 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. +import copy import warnings import pytest @@ -571,16 +572,34 @@ def test_schema_setter_valid_mappings_list(self): config._properties["load"]["schema"], {"fields": [full_name_repr, age_repr]} ) - def test_schema_setter_invalid_mappings_list(self): + def test_schema_setter_allows_unknown_properties(self): config = self._get_target_class()() schema = [ - {"name": "full_name", "type": "STRING", "mode": "REQUIRED"}, - {"name": "age", "typeoo": "INTEGER", "mode": "REQUIRED"}, + { + "name": "full_name", + "type": "STRING", + "mode": "REQUIRED", + "someNewProperty": "test-value", + }, + { + "name": "age", + # Note: This type should be included, too. Avoid client-side + # validation, as it could prevent backwards-compatible + # evolution of the server-side behavior. + "typo": "INTEGER", + "mode": "REQUIRED", + "anotherNewProperty": "another-test", + }, ] - with self.assertRaises(Exception): - config.schema = schema + # Make sure the setter doesn't mutate schema. + expected_schema = copy.deepcopy(schema) + + config.schema = schema + + # _properties should include all fields, including unknown ones. + assert config._properties["load"]["schema"]["fields"] == expected_schema def test_schema_setter_unsetting_schema(self): from google.cloud.bigquery.schema import SchemaField diff --git a/packages/google-cloud-bigquery/tests/unit/test_schema.py b/packages/google-cloud-bigquery/tests/unit/test_schema.py index b17cd028116d..4b0b28158075 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_schema.py +++ b/packages/google-cloud-bigquery/tests/unit/test_schema.py @@ -12,14 +12,16 @@ # See the License for the specific language governing permissions and # limitations under the License. -from google.cloud import bigquery -from google.cloud.bigquery.standard_sql import StandardSqlStructType -from google.cloud.bigquery.schema import PolicyTagList +import copy import unittest from unittest import mock import pytest +from google.cloud import bigquery +from google.cloud.bigquery.standard_sql import StandardSqlStructType +from google.cloud.bigquery.schema import PolicyTagList + class TestSchemaField(unittest.TestCase): @staticmethod @@ -821,13 +823,32 @@ def test_schema_fields_sequence(self): result = self._call_fut(schema) self.assertEqual(result, schema) - def test_invalid_mapping_representation(self): + def test_unknown_properties(self): schema = [ - {"name": "full_name", "type": "STRING", "mode": "REQUIRED"}, - {"name": "address", "typeooo": "STRING", "mode": "REQUIRED"}, + { + "name": "full_name", + "type": "STRING", + "mode": "REQUIRED", + "someNewProperty": "test-value", + }, + { + "name": "age", + # Note: This type should be included, too. Avoid client-side + # validation, as it could prevent backwards-compatible + # evolution of the server-side behavior. + "typo": "INTEGER", + "mode": "REQUIRED", + "anotherNewProperty": "another-test", + }, ] - with self.assertRaises(Exception): - self._call_fut(schema) + + # Make sure the setter doesn't mutate schema. + expected_schema = copy.deepcopy(schema) + + result = self._call_fut(schema) + + for api_repr, field in zip(expected_schema, result): + assert field.to_api_repr() == api_repr def test_valid_mapping_representation(self): from google.cloud.bigquery.schema import SchemaField diff --git a/packages/google-cloud-bigquery/tests/unit/test_table.py b/packages/google-cloud-bigquery/tests/unit/test_table.py index 3824da2266b2..e9d461e9d9c7 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_table.py +++ b/packages/google-cloud-bigquery/tests/unit/test_table.py @@ -12,6 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. +import copy import datetime import logging import re @@ -711,14 +712,35 @@ def test_schema_setter_valid_fields(self): table.schema = [full_name, age] self.assertEqual(table.schema, [full_name, age]) - def test_schema_setter_invalid_mapping_representation(self): + def test_schema_setter_allows_unknown_properties(self): dataset = DatasetReference(self.PROJECT, self.DS_ID) table_ref = dataset.table(self.TABLE_NAME) table = self._make_one(table_ref) - full_name = {"name": "full_name", "type": "STRING", "mode": "REQUIRED"} - invalid_field = {"name": "full_name", "typeooo": "STRING", "mode": "REQUIRED"} - with self.assertRaises(Exception): - table.schema = [full_name, invalid_field] + schema = [ + { + "name": "full_name", + "type": "STRING", + "mode": "REQUIRED", + "someNewProperty": "test-value", + }, + { + "name": "age", + # Note: This type should be included, too. Avoid client-side + # validation, as it could prevent backwards-compatible + # evolution of the server-side behavior. + "typo": "INTEGER", + "mode": "REQUIRED", + "anotherNewProperty": "another-test", + }, + ] + + # Make sure the setter doesn't mutate schema. + expected_schema = copy.deepcopy(schema) + + table.schema = schema + + # _properties should include all fields, including unknown ones. + assert table._properties["schema"]["fields"] == expected_schema def test_schema_setter_valid_mapping_representation(self): from google.cloud.bigquery.schema import SchemaField From 87fe2289beed93d8d36e3797e676e5c57daae8dd Mon Sep 17 00:00:00 2001 From: Mend Renovate Date: Thu, 2 Jan 2025 23:46:50 +0100 Subject: [PATCH 1870/2016] chore(deps): update all dependencies (#2096) * chore(deps): update all dependencies * pin attrs===24.2.0 for python 3.7 * pin urllib3===2.2.3 for python 3.8 * pin matplotlib===3.9.2 for python 3.9 --------- Co-authored-by: Lingqing Gan --- .../.kokoro/docker/docs/requirements.txt | 6 ++--- .../samples/desktopapp/requirements-test.txt | 4 ++-- .../samples/geography/requirements-test.txt | 2 +- .../samples/geography/requirements.txt | 24 ++++++++++--------- .../samples/magics/requirements-test.txt | 4 ++-- .../samples/magics/requirements.txt | 2 +- .../samples/notebooks/requirements-test.txt | 4 ++-- .../samples/notebooks/requirements.txt | 5 ++-- .../samples/snippets/requirements-test.txt | 4 ++-- 9 files changed, 29 insertions(+), 26 deletions(-) diff --git a/packages/google-cloud-bigquery/.kokoro/docker/docs/requirements.txt b/packages/google-cloud-bigquery/.kokoro/docker/docs/requirements.txt index f99a5c4aac7f..fb6ffa272574 100644 --- a/packages/google-cloud-bigquery/.kokoro/docker/docs/requirements.txt +++ b/packages/google-cloud-bigquery/.kokoro/docker/docs/requirements.txt @@ -4,9 +4,9 @@ # # pip-compile --allow-unsafe --generate-hashes synthtool/gcp/templates/python_library/.kokoro/docker/docs/requirements.in # -argcomplete==3.5.2 \ - --hash=sha256:036d020d79048a5d525bc63880d7a4b8d1668566b8a76daf1144c0bbe0f63472 \ - --hash=sha256:23146ed7ac4403b70bd6026402468942ceba34a6732255b9edf5b7354f68a6bb +argcomplete==3.5.3 \ + --hash=sha256:2ab2c4a215c59fd6caaff41a869480a23e8f6a5f910b266c1808037f4e375b61 \ + --hash=sha256:c12bf50eded8aebb298c7b7da7a5ff3ee24dffd9f5281867dfe1424b58c55392 # via nox colorlog==6.9.0 \ --hash=sha256:5906e71acd67cb07a71e779c47c4bcb45fb8c2993eebe9e5adcd6a6f1b283eff \ diff --git a/packages/google-cloud-bigquery/samples/desktopapp/requirements-test.txt b/packages/google-cloud-bigquery/samples/desktopapp/requirements-test.txt index 1640e1a9502a..827b02dcfb36 100644 --- a/packages/google-cloud-bigquery/samples/desktopapp/requirements-test.txt +++ b/packages/google-cloud-bigquery/samples/desktopapp/requirements-test.txt @@ -1,4 +1,4 @@ -google-cloud-testutils==1.4.0 +google-cloud-testutils==1.5.0 pytest===7.4.4; python_version == '3.7' -pytest==8.3.3; python_version >= '3.8' +pytest==8.3.4; python_version >= '3.8' mock==5.1.0 diff --git a/packages/google-cloud-bigquery/samples/geography/requirements-test.txt b/packages/google-cloud-bigquery/samples/geography/requirements-test.txt index 1ccebd9cd20e..ef38acb4f650 100644 --- a/packages/google-cloud-bigquery/samples/geography/requirements-test.txt +++ b/packages/google-cloud-bigquery/samples/geography/requirements-test.txt @@ -1,3 +1,3 @@ pytest===7.4.4; python_version == '3.7' -pytest==8.3.3; python_version >= '3.8' +pytest==8.3.4; python_version >= '3.8' mock==5.1.0 diff --git a/packages/google-cloud-bigquery/samples/geography/requirements.txt b/packages/google-cloud-bigquery/samples/geography/requirements.txt index edf5a24b67c2..ab73dbe878a1 100644 --- a/packages/google-cloud-bigquery/samples/geography/requirements.txt +++ b/packages/google-cloud-bigquery/samples/geography/requirements.txt @@ -1,21 +1,22 @@ -attrs==24.2.0 -certifi==2024.8.30 +attrs===24.2.0; python_version == '3.7' +attrs==24.3.0; python_version >= '3.8' +certifi==2024.12.14 cffi===1.15.1; python_version == '3.7' cffi==1.17.1; python_version >= '3.8' -charset-normalizer==3.4.0 -click==8.1.7 +charset-normalizer==3.4.1 +click==8.1.8 click-plugins==1.1.1 cligj==0.7.2 dataclasses==0.8; python_version < '3.7' db-dtypes==1.3.1 Fiona===1.9.6; python_version == '3.7' Fiona==1.10.1; python_version >= '3.8' -geojson==3.1.0 +geojson==3.2.0 geopandas===0.10.2; python_version == '3.7' geopandas===0.13.2; python_version == '3.8' geopandas==1.0.1; python_version >= '3.9' -google-api-core==2.23.0 -google-auth==2.36.0 +google-api-core==2.24.0 +google-auth==2.37.0 google-cloud-bigquery==3.27.0 google-cloud-bigquery-storage==2.27.0 google-cloud-core==2.4.1 @@ -24,7 +25,7 @@ google-crc32c==1.6.0; python_version >= '3.9' google-resumable-media==2.7.2 googleapis-common-protos==1.66.0 grpcio===1.62.2; python_version == '3.7' -grpcio==1.68.0; python_version >= '3.8' +grpcio==1.68.1; python_version >= '3.8' idna==3.10 munch==4.0.0 mypy-extensions==1.0.0 @@ -44,7 +45,7 @@ pyasn1-modules==0.4.1; python_version >= '3.8' pycparser===2.21; python_version == '3.7' pycparser==2.22; python_version >= '3.8' pyparsing===3.1.4; python_version < '3.9' -pyparsing==3.2.0; python_version >= '3.9' +pyparsing==3.2.1; python_version >= '3.9' python-dateutil==2.9.0.post0 pytz==2024.2 PyYAML===6.0.1; python_version == '3.7' @@ -53,9 +54,10 @@ requests==2.31.0; python_version == '3.7' requests==2.32.3; python_version >= '3.8' rsa==4.9 Shapely==2.0.6 -six==1.16.0 +six==1.17.0 typing-extensions===4.7.1; python_version == '3.7' typing-extensions==4.12.2; python_version >= '3.8' typing-inspect==0.9.0 urllib3===1.26.18; python_version == '3.7' -urllib3==2.2.3; python_version >= '3.8' +urllib3===2.2.3; python_version == '3.8' +urllib3==2.3.0; python_version >= '3.9' diff --git a/packages/google-cloud-bigquery/samples/magics/requirements-test.txt b/packages/google-cloud-bigquery/samples/magics/requirements-test.txt index 1640e1a9502a..827b02dcfb36 100644 --- a/packages/google-cloud-bigquery/samples/magics/requirements-test.txt +++ b/packages/google-cloud-bigquery/samples/magics/requirements-test.txt @@ -1,4 +1,4 @@ -google-cloud-testutils==1.4.0 +google-cloud-testutils==1.5.0 pytest===7.4.4; python_version == '3.7' -pytest==8.3.3; python_version >= '3.8' +pytest==8.3.4; python_version >= '3.8' mock==5.1.0 diff --git a/packages/google-cloud-bigquery/samples/magics/requirements.txt b/packages/google-cloud-bigquery/samples/magics/requirements.txt index 543d9a5121ec..87efa3dec888 100644 --- a/packages/google-cloud-bigquery/samples/magics/requirements.txt +++ b/packages/google-cloud-bigquery/samples/magics/requirements.txt @@ -1,4 +1,4 @@ -bigquery_magics==0.4.0 +bigquery_magics==0.5.0 db-dtypes==1.3.1 google.cloud.bigquery==3.27.0 google-cloud-bigquery-storage==2.27.0 diff --git a/packages/google-cloud-bigquery/samples/notebooks/requirements-test.txt b/packages/google-cloud-bigquery/samples/notebooks/requirements-test.txt index 1640e1a9502a..827b02dcfb36 100644 --- a/packages/google-cloud-bigquery/samples/notebooks/requirements-test.txt +++ b/packages/google-cloud-bigquery/samples/notebooks/requirements-test.txt @@ -1,4 +1,4 @@ -google-cloud-testutils==1.4.0 +google-cloud-testutils==1.5.0 pytest===7.4.4; python_version == '3.7' -pytest==8.3.3; python_version >= '3.8' +pytest==8.3.4; python_version >= '3.8' mock==5.1.0 diff --git a/packages/google-cloud-bigquery/samples/notebooks/requirements.txt b/packages/google-cloud-bigquery/samples/notebooks/requirements.txt index ca8a0a13e9ab..77103a338a77 100644 --- a/packages/google-cloud-bigquery/samples/notebooks/requirements.txt +++ b/packages/google-cloud-bigquery/samples/notebooks/requirements.txt @@ -1,4 +1,4 @@ -bigquery-magics==0.4.0 +bigquery-magics==0.5.0 db-dtypes==1.3.1 google-cloud-bigquery==3.27.0 google-cloud-bigquery-storage==2.27.0 @@ -7,7 +7,8 @@ ipython===8.0.1; python_version == '3.8' ipython===8.18.1; python_version >= '3.9' matplotlib===3.5.3; python_version == '3.7' matplotlib===3.7.4; python_version == '3.8' -matplotlib==3.9.2; python_version >= '3.9' +matplotlib===3.9.2; python_version == '3.9' +matplotlib==3.10.0; python_version >= '3.10' pandas===1.3.5; python_version == '3.7' pandas===2.0.3; python_version == '3.8' pandas==2.2.3; python_version >= '3.9' diff --git a/packages/google-cloud-bigquery/samples/snippets/requirements-test.txt b/packages/google-cloud-bigquery/samples/snippets/requirements-test.txt index bb0b2a6bff9d..077e465cfce3 100644 --- a/packages/google-cloud-bigquery/samples/snippets/requirements-test.txt +++ b/packages/google-cloud-bigquery/samples/snippets/requirements-test.txt @@ -1,5 +1,5 @@ # samples/snippets should be runnable with no "extras" -google-cloud-testutils==1.4.0 +google-cloud-testutils==1.5.0 pytest===7.4.4; python_version == '3.7' -pytest==8.3.3; python_version >= '3.8' +pytest==8.3.4; python_version >= '3.8' mock==5.1.0 From e72ebefd6e7db1539bd862561b7c5bfeb8a0f37d Mon Sep 17 00:00:00 2001 From: Mend Renovate Date: Fri, 3 Jan 2025 23:01:48 +0100 Subject: [PATCH 1871/2016] chore(deps): update dependency virtualenv to v20.28.1 (#2101) --- .../.kokoro/docker/docs/requirements.txt | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/packages/google-cloud-bigquery/.kokoro/docker/docs/requirements.txt b/packages/google-cloud-bigquery/.kokoro/docker/docs/requirements.txt index fb6ffa272574..48ace5de9164 100644 --- a/packages/google-cloud-bigquery/.kokoro/docker/docs/requirements.txt +++ b/packages/google-cloud-bigquery/.kokoro/docker/docs/requirements.txt @@ -66,7 +66,7 @@ tomli==2.2.1 \ --hash=sha256:ece47d672db52ac607a3d9599a9d48dcb2f2f735c6c2d1f34130085bb12b112a \ --hash=sha256:f4039b9cbc3048b2416cc57ab3bda989a6fcf9b36cf8937f01a6e731b64f80d7 # via nox -virtualenv==20.28.0 \ - --hash=sha256:23eae1b4516ecd610481eda647f3a7c09aea295055337331bb4e6892ecce47b0 \ - --hash=sha256:2c9c3262bb8e7b87ea801d715fae4495e6032450c71d2309be9550e7364049aa +virtualenv==20.28.1 \ + --hash=sha256:412773c85d4dab0409b83ec36f7a6499e72eaf08c80e81e9576bca61831c71cb \ + --hash=sha256:5d34ab240fdb5d21549b76f9e8ff3af28252f5499fb6d6f031adac4e5a8c5329 # via nox From bba9e190eb1de4b1cfbe70b20d19a4093f1e6027 Mon Sep 17 00:00:00 2001 From: Mend Renovate Date: Tue, 7 Jan 2025 00:17:03 +0100 Subject: [PATCH 1872/2016] chore(deps): update dependency grpcio to v1.69.0 (#2102) --- .../google-cloud-bigquery/samples/geography/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/samples/geography/requirements.txt b/packages/google-cloud-bigquery/samples/geography/requirements.txt index ab73dbe878a1..71579867fbc8 100644 --- a/packages/google-cloud-bigquery/samples/geography/requirements.txt +++ b/packages/google-cloud-bigquery/samples/geography/requirements.txt @@ -25,7 +25,7 @@ google-crc32c==1.6.0; python_version >= '3.9' google-resumable-media==2.7.2 googleapis-common-protos==1.66.0 grpcio===1.62.2; python_version == '3.7' -grpcio==1.68.1; python_version >= '3.8' +grpcio==1.69.0; python_version >= '3.8' idna==3.10 munch==4.0.0 mypy-extensions==1.0.0 From d0f90af524ce02d428b0767cc7110e2809f63d0d Mon Sep 17 00:00:00 2001 From: "gcf-owl-bot[bot]" <78513119+gcf-owl-bot[bot]@users.noreply.github.com> Date: Thu, 9 Jan 2025 10:00:45 -0500 Subject: [PATCH 1873/2016] chore(python): exclude .github/workflows/unittest.yml in renovate config (#2103) Source-Link: https://github.com/googleapis/synthtool/commit/106d292bd234e5d9977231dcfbc4831e34eba13a Post-Processor: gcr.io/cloud-devrel-public-resources/owlbot-python:latest@sha256:8ff1efe878e18bd82a0fb7b70bb86f77e7ab6901fed394440b6135db0ba8d84a Co-authored-by: Owl Bot --- .../google-cloud-bigquery/.github/.OwlBot.lock.yaml | 6 +++--- .../.kokoro/docker/docs/requirements.txt | 12 ++++++------ packages/google-cloud-bigquery/renovate.json | 2 +- 3 files changed, 10 insertions(+), 10 deletions(-) diff --git a/packages/google-cloud-bigquery/.github/.OwlBot.lock.yaml b/packages/google-cloud-bigquery/.github/.OwlBot.lock.yaml index 26306af66f81..10cf433a8b00 100644 --- a/packages/google-cloud-bigquery/.github/.OwlBot.lock.yaml +++ b/packages/google-cloud-bigquery/.github/.OwlBot.lock.yaml @@ -1,4 +1,4 @@ -# Copyright 2024 Google LLC +# Copyright 2025 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -13,5 +13,5 @@ # limitations under the License. docker: image: gcr.io/cloud-devrel-public-resources/owlbot-python:latest - digest: sha256:8e3e7e18255c22d1489258d0374c901c01f9c4fd77a12088670cd73d580aa737 -# created: 2024-12-17T00:59:58.625514486Z + digest: sha256:8ff1efe878e18bd82a0fb7b70bb86f77e7ab6901fed394440b6135db0ba8d84a +# created: 2025-01-09T12:01:16.422459506Z diff --git a/packages/google-cloud-bigquery/.kokoro/docker/docs/requirements.txt b/packages/google-cloud-bigquery/.kokoro/docker/docs/requirements.txt index 48ace5de9164..f99a5c4aac7f 100644 --- a/packages/google-cloud-bigquery/.kokoro/docker/docs/requirements.txt +++ b/packages/google-cloud-bigquery/.kokoro/docker/docs/requirements.txt @@ -4,9 +4,9 @@ # # pip-compile --allow-unsafe --generate-hashes synthtool/gcp/templates/python_library/.kokoro/docker/docs/requirements.in # -argcomplete==3.5.3 \ - --hash=sha256:2ab2c4a215c59fd6caaff41a869480a23e8f6a5f910b266c1808037f4e375b61 \ - --hash=sha256:c12bf50eded8aebb298c7b7da7a5ff3ee24dffd9f5281867dfe1424b58c55392 +argcomplete==3.5.2 \ + --hash=sha256:036d020d79048a5d525bc63880d7a4b8d1668566b8a76daf1144c0bbe0f63472 \ + --hash=sha256:23146ed7ac4403b70bd6026402468942ceba34a6732255b9edf5b7354f68a6bb # via nox colorlog==6.9.0 \ --hash=sha256:5906e71acd67cb07a71e779c47c4bcb45fb8c2993eebe9e5adcd6a6f1b283eff \ @@ -66,7 +66,7 @@ tomli==2.2.1 \ --hash=sha256:ece47d672db52ac607a3d9599a9d48dcb2f2f735c6c2d1f34130085bb12b112a \ --hash=sha256:f4039b9cbc3048b2416cc57ab3bda989a6fcf9b36cf8937f01a6e731b64f80d7 # via nox -virtualenv==20.28.1 \ - --hash=sha256:412773c85d4dab0409b83ec36f7a6499e72eaf08c80e81e9576bca61831c71cb \ - --hash=sha256:5d34ab240fdb5d21549b76f9e8ff3af28252f5499fb6d6f031adac4e5a8c5329 +virtualenv==20.28.0 \ + --hash=sha256:23eae1b4516ecd610481eda647f3a7c09aea295055337331bb4e6892ecce47b0 \ + --hash=sha256:2c9c3262bb8e7b87ea801d715fae4495e6032450c71d2309be9550e7364049aa # via nox diff --git a/packages/google-cloud-bigquery/renovate.json b/packages/google-cloud-bigquery/renovate.json index 39b2a0ec9296..c7875c469bd5 100644 --- a/packages/google-cloud-bigquery/renovate.json +++ b/packages/google-cloud-bigquery/renovate.json @@ -5,7 +5,7 @@ ":preserveSemverRanges", ":disableDependencyDashboard" ], - "ignorePaths": [".pre-commit-config.yaml", ".kokoro/requirements.txt", "setup.py"], + "ignorePaths": [".pre-commit-config.yaml", ".kokoro/requirements.txt", "setup.py", ".github/workflows/unittest.yml"], "pip_requirements": { "fileMatch": ["requirements-test.txt", "samples/[\\S/]*constraints.txt", "samples/[\\S/]*constraints-test.txt"] } From 06fb81eba21582db72ac4a196e5c65fd870d1352 Mon Sep 17 00:00:00 2001 From: Chalmer Lowe Date: Thu, 9 Jan 2025 13:42:37 -0500 Subject: [PATCH 1874/2016] feat: adds new input validation function similar to isinstance. (#2107) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * feat: adds new function similar to isinstance. * 🦉 Updates from OwlBot post-processor See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md --------- Co-authored-by: Owl Bot --- .../google/cloud/bigquery/_helpers.py | 32 ++++++++++++++++++- .../tests/unit/test__helpers.py | 32 +++++++++++++++++++ 2 files changed, 63 insertions(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py b/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py index 1eda80712969..ea47af28d994 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py @@ -22,7 +22,7 @@ import re import os import warnings -from typing import Optional, Union +from typing import Optional, Union, Any, Tuple, Type from dateutil import relativedelta from google.cloud._helpers import UTC # type: ignore @@ -1004,3 +1004,33 @@ def _verify_job_config_type(job_config, expected_type, param_name="job_config"): job_config=job_config, ) ) + + +def _isinstance_or_raise( + value: Any, + dtype: Union[Type, Tuple[Type, ...]], + none_allowed: Optional[bool] = False, +) -> Any: + """Determine whether a value type matches a given datatype or None. + Args: + value (Any): Value to be checked. + dtype (type): Expected data type or tuple of data types. + none_allowed Optional(bool): whether value is allowed to be None. Default + is False. + Returns: + Any: Returns the input value if the type check is successful. + Raises: + TypeError: If the input value's type does not match the expected data type(s). + """ + if none_allowed and value is None: + return value + + if isinstance(value, dtype): + return value + + or_none = "" + if none_allowed: + or_none = " (or None)" + + msg = f"Pass {value} as a '{dtype}'{or_none}. Got {type(value)}." + raise TypeError(msg) diff --git a/packages/google-cloud-bigquery/tests/unit/test__helpers.py b/packages/google-cloud-bigquery/tests/unit/test__helpers.py index 0a307498f674..adba6327c2de 100644 --- a/packages/google-cloud-bigquery/tests/unit/test__helpers.py +++ b/packages/google-cloud-bigquery/tests/unit/test__helpers.py @@ -24,6 +24,7 @@ from unittest import mock import google.api_core +from google.cloud.bigquery._helpers import _isinstance_or_raise @pytest.mark.skipif( @@ -1661,3 +1662,34 @@ def test_w_env_var(self): host = self._call_fut() self.assertEqual(host, HOST) + + +class Test__isinstance_or_raise: + @pytest.mark.parametrize( + "value,dtype,none_allowed,expected", + [ + (None, str, True, None), + ("hello world.uri", str, True, "hello world.uri"), + ("hello world.uri", str, False, "hello world.uri"), + (None, (str, float), True, None), + ("hello world.uri", (str, float), True, "hello world.uri"), + ("hello world.uri", (str, float), False, "hello world.uri"), + ], + ) + def test__valid_isinstance_or_raise(self, value, dtype, none_allowed, expected): + result = _isinstance_or_raise(value, dtype, none_allowed=none_allowed) + assert result == expected + + @pytest.mark.parametrize( + "value,dtype,none_allowed,expected", + [ + (None, str, False, pytest.raises(TypeError)), + ({"key": "value"}, str, True, pytest.raises(TypeError)), + ({"key": "value"}, str, False, pytest.raises(TypeError)), + ({"key": "value"}, (str, float), True, pytest.raises(TypeError)), + ({"key": "value"}, (str, float), False, pytest.raises(TypeError)), + ], + ) + def test__invalid_isinstance_or_raise(self, value, dtype, none_allowed, expected): + with expected: + _isinstance_or_raise(value, dtype, none_allowed=none_allowed) From ac9a801df238def61696b205853f016ba7ba3d1d Mon Sep 17 00:00:00 2001 From: Chalmer Lowe Date: Fri, 10 Jan 2025 13:22:06 -0500 Subject: [PATCH 1875/2016] feat: adds the SerDeInfo class and tests (#2108) * feat: adds SerDeInfo class and tests * cleans up type hints and some minor tweaks --- .../google/cloud/bigquery/schema.py | 88 ++++++++++++++++++ .../tests/unit/test_schema.py | 92 ++++++++++++++++++- 2 files changed, 176 insertions(+), 4 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/schema.py b/packages/google-cloud-bigquery/google/cloud/bigquery/schema.py index b062396cf7bd..f93877d45da7 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/schema.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/schema.py @@ -14,8 +14,10 @@ """Schemas for BigQuery tables / queries.""" +from __future__ import annotations import collections import enum +import typing from typing import Any, cast, Dict, Iterable, Optional, Union from google.cloud.bigquery import _helpers @@ -556,3 +558,89 @@ def to_api_repr(self) -> dict: """ answer = {"names": list(self.names)} return answer + + +class SerDeInfo: + """Serializer and deserializer information. + + Args: + serialization_library (str): Required. Specifies a fully-qualified class + name of the serialization library that is responsible for the + translation of data between table representation and the underlying + low-level input and output format structures. The maximum length is + 256 characters. + name (Optional[str]): Name of the SerDe. The maximum length is 256 + characters. + parameters: (Optional[dict[str, str]]): Key-value pairs that define the initialization + parameters for the serialization library. Maximum size 10 Kib. + """ + + def __init__( + self, + serialization_library: str, + name: Optional[str] = None, + parameters: Optional[dict[str, str]] = None, + ): + self._properties: Dict[str, Any] = {} + self.serialization_library = serialization_library + self.name = name + self.parameters = parameters + + @property + def serialization_library(self) -> str: + """Required. Specifies a fully-qualified class name of the serialization + library that is responsible for the translation of data between table + representation and the underlying low-level input and output format + structures. The maximum length is 256 characters.""" + + return typing.cast(str, self._properties.get("serializationLibrary")) + + @serialization_library.setter + def serialization_library(self, value: str): + value = _helpers._isinstance_or_raise(value, str, none_allowed=False) + self._properties["serializationLibrary"] = value + + @property + def name(self) -> Optional[str]: + """Optional. Name of the SerDe. The maximum length is 256 characters.""" + + return self._properties.get("name") + + @name.setter + def name(self, value: Optional[str] = None): + value = _helpers._isinstance_or_raise(value, str, none_allowed=True) + self._properties["name"] = value + + @property + def parameters(self) -> Optional[dict[str, str]]: + """Optional. Key-value pairs that define the initialization parameters + for the serialization library. Maximum size 10 Kib.""" + + return self._properties.get("parameters") + + @parameters.setter + def parameters(self, value: Optional[dict[str, str]] = None): + value = _helpers._isinstance_or_raise(value, dict, none_allowed=True) + self._properties["parameters"] = value + + def to_api_repr(self) -> dict: + """Build an API representation of this object. + Returns: + Dict[str, Any]: + A dictionary in the format used by the BigQuery API. + """ + return self._properties + + @classmethod + def from_api_repr(cls, api_repr: dict) -> SerDeInfo: + """Factory: constructs an instance of the class (cls) + given its API representation. + Args: + resource (Dict[str, Any]): + API representation of the object to be instantiated. + Returns: + An instance of the class initialized with data from 'resource'. + """ + config = cls("PLACEHOLDER") + config._properties = api_repr + return config diff --git a/packages/google-cloud-bigquery/tests/unit/test_schema.py b/packages/google-cloud-bigquery/tests/unit/test_schema.py index 4b0b28158075..380067dc8ff5 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_schema.py +++ b/packages/google-cloud-bigquery/tests/unit/test_schema.py @@ -20,6 +20,7 @@ from google.cloud import bigquery from google.cloud.bigquery.standard_sql import StandardSqlStructType +from google.cloud.bigquery import schema from google.cloud.bigquery.schema import PolicyTagList @@ -130,8 +131,6 @@ def test_constructor_range_str(self): self.assertEqual(field.range_element_type.element_type, "DATETIME") def test_to_api_repr(self): - from google.cloud.bigquery.schema import PolicyTagList - policy = PolicyTagList(names=("foo", "bar")) self.assertEqual( policy.to_api_repr(), @@ -886,8 +885,6 @@ def test_valid_mapping_representation(self): class TestPolicyTags(unittest.TestCase): @staticmethod def _get_target_class(): - from google.cloud.bigquery.schema import PolicyTagList - return PolicyTagList def _make_one(self, *args, **kw): @@ -1129,3 +1126,90 @@ def test_to_api_repr_parameterized(field, api): from google.cloud.bigquery.schema import SchemaField assert SchemaField(**field).to_api_repr() == api + + +class TestSerDeInfo: + """Tests for the SerDeInfo class.""" + + @staticmethod + def _get_target_class(): + return schema.SerDeInfo + + def _make_one(self, *args, **kwargs): + return self._get_target_class()(*args, **kwargs) + + @pytest.mark.parametrize( + "serialization_library,name,parameters", + [ + ("testpath.to.LazySimpleSerDe", None, None), + ("testpath.to.LazySimpleSerDe", "serde_name", None), + ("testpath.to.LazySimpleSerDe", None, {"key": "value"}), + ("testpath.to.LazySimpleSerDe", "serde_name", {"key": "value"}), + ], + ) + def test_ctor_valid_input(self, serialization_library, name, parameters): + serde_info = self._make_one( + serialization_library=serialization_library, + name=name, + parameters=parameters, + ) + assert serde_info.serialization_library == serialization_library + assert serde_info.name == name + assert serde_info.parameters == parameters + + @pytest.mark.parametrize( + "serialization_library,name,parameters", + [ + (123, None, None), + ("testpath.to.LazySimpleSerDe", 123, None), + ("testpath.to.LazySimpleSerDe", None, ["test", "list"]), + ("testpath.to.LazySimpleSerDe", None, 123), + ], + ) + def test_ctor_invalid_input(self, serialization_library, name, parameters): + with pytest.raises(TypeError) as e: + self._make_one( + serialization_library=serialization_library, + name=name, + parameters=parameters, + ) + # Looking for the first word from the string "Pass as..." + assert "Pass " in str(e.value) + + def test_to_api_repr(self): + serde_info = self._make_one( + serialization_library="testpath.to.LazySimpleSerDe", + name="serde_name", + parameters={"key": "value"}, + ) + expected_repr = { + "serializationLibrary": "testpath.to.LazySimpleSerDe", + "name": "serde_name", + "parameters": {"key": "value"}, + } + assert serde_info.to_api_repr() == expected_repr + + def test_from_api_repr(self): + """GIVEN an api representation of a SerDeInfo object (i.e. resource) + WHEN converted into a SerDeInfo object using from_api_repr() + THEN it will have the representation in dict format as a SerDeInfo + object made directly (via _make_one()) and represented in dict format. + """ + api_repr = { + "serializationLibrary": "testpath.to.LazySimpleSerDe", + "name": "serde_name", + "parameters": {"key": "value"}, + } + + expected = self._make_one( + serialization_library="testpath.to.LazySimpleSerDe", + name="serde_name", + parameters={"key": "value"}, + ) + + klass = self._get_target_class() + result = klass.from_api_repr(api_repr) + + # We convert both to dict format because these classes do not have a + # __eq__() method to facilitate direct equality comparisons. + assert result.to_api_repr() == expected.to_api_repr() From 7c01e891a26fd540162a471af057f753d22663a3 Mon Sep 17 00:00:00 2001 From: Chalmer Lowe Date: Tue, 14 Jan 2025 15:48:57 -0500 Subject: [PATCH 1876/2016] feat: adds StorageDescriptor and tests (#2109) * feat: adds StorageDescriptor and tests * updates attr names, corrects type hinting --- .../google/cloud/bigquery/schema.py | 118 ++++++++++++++++ .../tests/unit/test_schema.py | 128 ++++++++++++++++++ 2 files changed, 246 insertions(+) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/schema.py b/packages/google-cloud-bigquery/google/cloud/bigquery/schema.py index f93877d45da7..8d62b2b5b864 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/schema.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/schema.py @@ -644,3 +644,121 @@ def from_api_repr(cls, api_repr: dict) -> SerDeInfo: config = cls("PLACEHOLDER") config._properties = api_repr return config + + +class StorageDescriptor: + """Contains information about how a table's data is stored and accessed by open + source query engines. + + Args: + input_format (Optional[str]): Specifies the fully qualified class name of + the InputFormat (e.g. + "org.apache.hadoop.hive.ql.io.orc.OrcInputFormat"). The maximum + length is 128 characters. + location_uri (Optional[str]): The physical location of the table (e.g. + 'gs://spark-dataproc-data/pangea-data/case_sensitive/' or + 'gs://spark-dataproc-data/pangea-data/'). The maximum length is + 2056 bytes. + output_format (Optional[str]): Specifies the fully qualified class name + of the OutputFormat (e.g. + "org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat"). The maximum + length is 128 characters. + serde_info (Union[SerDeInfo, dict, None]): Serializer and deserializer information. + """ + + def __init__( + self, + input_format: Optional[str] = None, + location_uri: Optional[str] = None, + output_format: Optional[str] = None, + serde_info: Union[SerDeInfo, dict, None] = None, + ): + self._properties: Dict[str, Any] = {} + self.input_format = input_format + self.location_uri = location_uri + self.output_format = output_format + # Using typing.cast() because mypy cannot wrap it's head around the fact that: + # the setter can accept Union[SerDeInfo, dict, None] + # but the getter will only ever return Optional[SerDeInfo]. + self.serde_info = typing.cast(Optional[SerDeInfo], serde_info) + + @property + def input_format(self) -> Optional[str]: + """Optional. Specifies the fully qualified class name of the InputFormat + (e.g. "org.apache.hadoop.hive.ql.io.orc.OrcInputFormat"). The maximum + length is 128 characters.""" + + return self._properties.get("inputFormat") + + @input_format.setter + def input_format(self, value: Optional[str]): + value = _helpers._isinstance_or_raise(value, str, none_allowed=True) + self._properties["inputFormat"] = value + + @property + def location_uri(self) -> Optional[str]: + """Optional. The physical location of the table (e.g. 'gs://spark- + dataproc-data/pangea-data/case_sensitive/' or 'gs://spark-dataproc- + data/pangea-data/'). The maximum length is 2056 bytes.""" + + return self._properties.get("locationUri") + + @location_uri.setter + def location_uri(self, value: Optional[str]): + value = _helpers._isinstance_or_raise(value, str, none_allowed=True) + self._properties["locationUri"] = value + + @property + def output_format(self) -> Optional[str]: + """Optional. Specifies the fully qualified class name of the + OutputFormat (e.g. "org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat"). + The maximum length is 128 characters.""" + + return self._properties.get("outputFormat") + + @output_format.setter + def output_format(self, value: Optional[str]): + value = _helpers._isinstance_or_raise(value, str, none_allowed=True) + self._properties["outputFormat"] = value + + @property + def serde_info(self) -> Optional[SerDeInfo]: + """Optional. Serializer and deserializer information.""" + + prop = _helpers._get_sub_prop(self._properties, ["serDeInfo"]) + if prop is not None: + return typing.cast(SerDeInfo, SerDeInfo.from_api_repr(prop)) + return None + + @serde_info.setter + def serde_info(self, value: Union[SerDeInfo, dict, None]): + value = _helpers._isinstance_or_raise( + value, (SerDeInfo, dict), none_allowed=True + ) + + if isinstance(value, SerDeInfo): + self._properties["serDeInfo"] = value.to_api_repr() + else: + self._properties["serDeInfo"] = value + + def to_api_repr(self) -> dict: + """Build an API representation of this object. + Returns: + Dict[str, Any]: + A dictionary in the format used by the BigQuery API. + """ + return self._properties + + @classmethod + def from_api_repr(cls, resource: dict) -> StorageDescriptor: + """Factory: constructs an instance of the class (cls) + given its API representation. + Args: + resource (Dict[str, Any]): + API representation of the object to be instantiated. + Returns: + An instance of the class initialized with data from 'resource'. + """ + config = cls() + config._properties = resource + return config diff --git a/packages/google-cloud-bigquery/tests/unit/test_schema.py b/packages/google-cloud-bigquery/tests/unit/test_schema.py index 380067dc8ff5..7e84dd63f2d8 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_schema.py +++ b/packages/google-cloud-bigquery/tests/unit/test_schema.py @@ -1213,3 +1213,131 @@ def test_from_api_repr(self): # We convert both to dict format because these classes do not have a # __eq__() method to facilitate direct equality comparisons. assert result.to_api_repr() == expected.to_api_repr() + + +class TestStorageDescriptor: + """Tests for the StorageDescriptor class.""" + + @staticmethod + def _get_target_class(): + return schema.StorageDescriptor + + def _make_one(self, *args, **kwargs): + return self._get_target_class()(*args, **kwargs) + + serdeinfo_resource = { + "serialization_library": "testpath.to.LazySimpleSerDe", + "name": "serde_lib_name", + "parameters": {"key": "value"}, + } + + SERDEINFO = schema.SerDeInfo("PLACEHOLDER").from_api_repr(serdeinfo_resource) + + STORAGEDESCRIPTOR = { + "inputFormat": "testpath.to.OrcInputFormat", + "locationUri": "gs://test/path/", + "outputFormat": "testpath.to.OrcOutputFormat", + "serDeInfo": SERDEINFO.to_api_repr(), + } + + @pytest.mark.parametrize( + "input_format,location_uri,output_format,serde_info", + [ + (None, None, None, None), + ("testpath.to.OrcInputFormat", None, None, None), + (None, "gs://test/path/", None, None), + (None, None, "testpath.to.OrcOutputFormat", None), + (None, None, None, SERDEINFO), + ( + "testpath.to.OrcInputFormat", + "gs://test/path/", + "testpath.to.OrcOutputFormat", + SERDEINFO, # uses SERDEINFO class format + ), + ( + "testpath.to.OrcInputFormat", + "gs://test/path/", + "testpath.to.OrcOutputFormat", + serdeinfo_resource, # uses api resource format (dict) + ), + ], + ) + def test_ctor_valid_input( + self, input_format, location_uri, output_format, serde_info + ): + storage_descriptor = self._make_one( + input_format=input_format, + location_uri=location_uri, + output_format=output_format, + serde_info=serde_info, + ) + assert storage_descriptor.input_format == input_format + assert storage_descriptor.location_uri == location_uri + assert storage_descriptor.output_format == output_format + if isinstance(serde_info, schema.SerDeInfo): + assert ( + storage_descriptor.serde_info.to_api_repr() == serde_info.to_api_repr() + ) + elif isinstance(serde_info, dict): + assert storage_descriptor.serde_info.to_api_repr() == serde_info + else: + assert storage_descriptor.serde_info is None + + @pytest.mark.parametrize( + "input_format,location_uri,output_format,serde_info", + [ + (123, None, None, None), + (None, 123, None, None), + (None, None, 123, None), + (None, None, None, 123), + ], + ) + def test_ctor_invalid_input( + self, input_format, location_uri, output_format, serde_info + ): + with pytest.raises(TypeError) as e: + self._make_one( + input_format=input_format, + location_uri=location_uri, + output_format=output_format, + serde_info=serde_info, + ) + + # Looking for the first word from the string "Pass as..." + assert "Pass " in str(e.value) + + def test_to_api_repr(self): + storage_descriptor = self._make_one( + input_format="input_format", + location_uri="location_uri", + output_format="output_format", + serde_info=self.SERDEINFO, + ) + expected_repr = { + "inputFormat": "input_format", + "locationUri": "location_uri", + "outputFormat": "output_format", + "serDeInfo": self.SERDEINFO.to_api_repr(), + } + assert storage_descriptor.to_api_repr() == expected_repr + + def test_from_api_repr(self): + """GIVEN an api representation of a StorageDescriptor (i.e. STORAGEDESCRIPTOR) + WHEN converted into a StorageDescriptor using from_api_repr() and + displayed as a dict + THEN it will have the same representation a StorageDescriptor created + directly (via the _make_one() func) and displayed as a dict. + """ + + # generate via STORAGEDESCRIPTOR + resource = self.STORAGEDESCRIPTOR + result = self._get_target_class().from_api_repr(resource) + # result = klass.from_api_repr(resource) + + expected = self._make_one( + input_format="testpath.to.OrcInputFormat", + location_uri="gs://test/path/", + output_format="testpath.to.OrcOutputFormat", + serde_info=self.SERDEINFO, + ) + assert result.to_api_repr() == expected.to_api_repr() From 73294eb25396647fc773b20ce597afdc61da5cab Mon Sep 17 00:00:00 2001 From: Keunsoo Park <43742836+keunsoopark@users.noreply.github.com> Date: Tue, 14 Jan 2025 22:17:13 +0100 Subject: [PATCH 1877/2016] feat: resource tags in dataset (#2090) * feat: resource tags in dataset * fix: fix unittets * Delete dataset/pyvenv.cfg * Update google/cloud/bigquery/dataset.py Co-authored-by: Lingqing Gan * Update google/cloud/bigquery/dataset.py Co-authored-by: Lingqing Gan * added system tests & fix unittest for none * add missing assert * remove venv * include resourcemanager in noxfile.py * fix fixture for tag keys * register tags before using in tests * handle alreadyexist error * fix: tag keys & values creation & deletion * fix comment * make tag keys unique * remove unused import --------- Co-authored-by: Lingqing Gan --- .../google/cloud/bigquery/dataset.py | 23 +++++ packages/google-cloud-bigquery/noxfile.py | 4 + .../tests/system/test_client.py | 90 ++++++++++++++++++- .../tests/unit/test_client.py | 6 ++ .../tests/unit/test_create_dataset.py | 5 ++ .../tests/unit/test_dataset.py | 22 +++++ 6 files changed, 148 insertions(+), 2 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/dataset.py b/packages/google-cloud-bigquery/google/cloud/bigquery/dataset.py index c49a52faf217..4d06d729d072 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/dataset.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/dataset.py @@ -530,6 +530,7 @@ class Dataset(object): "storage_billing_model": "storageBillingModel", "max_time_travel_hours": "maxTimeTravelHours", "default_rounding_mode": "defaultRoundingMode", + "resource_tags": "resourceTags", } def __init__(self, dataset_ref) -> None: @@ -801,6 +802,28 @@ def labels(self, value): raise ValueError("Pass a dict") self._properties["labels"] = value + @property + def resource_tags(self): + """Dict[str, str]: Resource tags of the dataset. + + Optional. The tags attached to this dataset. Tag keys are globally + unique. Tag key is expected to be in the namespaced format, for + example "123456789012/environment" where 123456789012 is + the ID of the parent organization or project resource for this tag + key. Tag value is expected to be the short name, for example + "Production". + + Raises: + ValueError: for invalid value types. + """ + return self._properties.setdefault("resourceTags", {}) + + @resource_tags.setter + def resource_tags(self, value): + if not isinstance(value, dict) and value is not None: + raise ValueError("Pass a dict") + self._properties["resourceTags"] = value + @property def default_encryption_configuration(self): """google.cloud.bigquery.encryption_configuration.EncryptionConfiguration: Custom diff --git a/packages/google-cloud-bigquery/noxfile.py b/packages/google-cloud-bigquery/noxfile.py index 750a6b459068..e08956b114c1 100644 --- a/packages/google-cloud-bigquery/noxfile.py +++ b/packages/google-cloud-bigquery/noxfile.py @@ -219,6 +219,9 @@ def system(session): # Data Catalog needed for the column ACL test with a real Policy Tag. session.install("google-cloud-datacatalog", "-c", constraints_path) + # Resource Manager needed for test with a real Resource Tag. + session.install("google-cloud-resource-manager", "-c", constraints_path) + if session.python in ["3.11", "3.12"]: extras = "[bqstorage,ipywidgets,pandas,tqdm,opentelemetry]" else: @@ -366,6 +369,7 @@ def prerelease_deps(session): session.install( "freezegun", "google-cloud-datacatalog", + "google-cloud-resource-manager", "google-cloud-storage", "google-cloud-testutils", "psutil", diff --git a/packages/google-cloud-bigquery/tests/system/test_client.py b/packages/google-cloud-bigquery/tests/system/test_client.py index 95c679a149d6..c0dd83b12089 100644 --- a/packages/google-cloud-bigquery/tests/system/test_client.py +++ b/packages/google-cloud-bigquery/tests/system/test_client.py @@ -25,6 +25,8 @@ import time import unittest import uuid +import random +import string from typing import Optional from google.api_core.exceptions import PreconditionFailed @@ -45,6 +47,8 @@ from google.cloud import storage from google.cloud.datacatalog_v1 import types as datacatalog_types from google.cloud.datacatalog_v1 import PolicyTagManagerClient +from google.cloud.resourcemanager_v3 import types as resourcemanager_types +from google.cloud.resourcemanager_v3 import TagKeysClient, TagValuesClient import psutil import pytest from test_utils.retry import RetryErrors @@ -156,9 +160,12 @@ def setUpModule(): class TestBigQuery(unittest.TestCase): def setUp(self): self.to_delete = [] + self.to_delete_tag_keys_values = [] def tearDown(self): policy_tag_client = PolicyTagManagerClient() + tag_keys_client = TagKeysClient() + tag_values_client = TagValuesClient() def _still_in_use(bad_request): return any( @@ -181,6 +188,18 @@ def _still_in_use(bad_request): else: doomed.delete() + # The TagKey cannot be deleted if it has any child TagValues. + for key_values in self.to_delete_tag_keys_values: + tag_key = key_values.pop() + + # Delete tag values first + [ + tag_values_client.delete_tag_value(name=tag_value.name).result() + for tag_value in key_values + ] + + tag_keys_client.delete_tag_key(name=tag_key.name).result() + def test_get_service_account_email(self): client = Config.CLIENT @@ -278,24 +297,74 @@ def test_create_dataset_with_default_rounding_mode(self): self.assertTrue(_dataset_exists(dataset)) self.assertEqual(dataset.default_rounding_mode, "ROUND_HALF_EVEN") + def _create_resource_tag_key_and_values(self, key, values): + tag_key_client = TagKeysClient() + tag_value_client = TagValuesClient() + + tag_key_parent = f"projects/{Config.CLIENT.project}" + new_tag_key = resourcemanager_types.TagKey( + short_name=key, parent=tag_key_parent + ) + tag_key = tag_key_client.create_tag_key(tag_key=new_tag_key).result() + self.to_delete_tag_keys_values.insert(0, [tag_key]) + + for value in values: + new_tag_value = resourcemanager_types.TagValue( + short_name=value, parent=tag_key.name + ) + tag_value = tag_value_client.create_tag_value( + tag_value=new_tag_value + ).result() + self.to_delete_tag_keys_values[0].insert(0, tag_value) + def test_update_dataset(self): dataset = self.temp_dataset(_make_dataset_id("update_dataset")) self.assertTrue(_dataset_exists(dataset)) self.assertIsNone(dataset.friendly_name) self.assertIsNone(dataset.description) self.assertEqual(dataset.labels, {}) + self.assertEqual(dataset.resource_tags, {}) self.assertIs(dataset.is_case_insensitive, False) + # This creates unique tag keys for each of test runnings for different Python versions + tag_postfix = "".join(random.choices(string.ascii_letters + string.digits, k=4)) + tag_1 = f"env_{tag_postfix}" + tag_2 = f"component_{tag_postfix}" + tag_3 = f"project_{tag_postfix}" + + # Tags need to be created before they can be used in a dataset. + self._create_resource_tag_key_and_values(tag_1, ["prod", "dev"]) + self._create_resource_tag_key_and_values(tag_2, ["batch"]) + self._create_resource_tag_key_and_values(tag_3, ["atlas"]) + dataset.friendly_name = "Friendly" dataset.description = "Description" dataset.labels = {"priority": "high", "color": "blue"} + dataset.resource_tags = { + f"{Config.CLIENT.project}/{tag_1}": "prod", + f"{Config.CLIENT.project}/{tag_2}": "batch", + } dataset.is_case_insensitive = True ds2 = Config.CLIENT.update_dataset( - dataset, ("friendly_name", "description", "labels", "is_case_insensitive") + dataset, + ( + "friendly_name", + "description", + "labels", + "resource_tags", + "is_case_insensitive", + ), ) self.assertEqual(ds2.friendly_name, "Friendly") self.assertEqual(ds2.description, "Description") self.assertEqual(ds2.labels, {"priority": "high", "color": "blue"}) + self.assertEqual( + ds2.resource_tags, + { + f"{Config.CLIENT.project}/{tag_1}": "prod", + f"{Config.CLIENT.project}/{tag_2}": "batch", + }, + ) self.assertIs(ds2.is_case_insensitive, True) ds2.labels = { @@ -303,8 +372,25 @@ def test_update_dataset(self): "shape": "circle", # add "priority": None, # delete } - ds3 = Config.CLIENT.update_dataset(ds2, ["labels"]) + ds2.resource_tags = { + f"{Config.CLIENT.project}/{tag_1}": "dev", # change + f"{Config.CLIENT.project}/{tag_3}": "atlas", # add + f"{Config.CLIENT.project}/{tag_2}": None, # delete + } + ds3 = Config.CLIENT.update_dataset(ds2, ["labels", "resource_tags"]) self.assertEqual(ds3.labels, {"color": "green", "shape": "circle"}) + self.assertEqual( + ds3.resource_tags, + { + f"{Config.CLIENT.project}/{tag_1}": "dev", + f"{Config.CLIENT.project}/{tag_3}": "atlas", + }, + ) + + # Remove all tags + ds3.resource_tags = None + ds4 = Config.CLIENT.update_dataset(ds3, ["resource_tags"]) + self.assertEqual(ds4.resource_tags, {}) # If we try to update using d2 again, it will fail because the # previous update changed the ETag. diff --git a/packages/google-cloud-bigquery/tests/unit/test_client.py b/packages/google-cloud-bigquery/tests/unit/test_client.py index cd336b73fa64..14089b031c4b 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_client.py +++ b/packages/google-cloud-bigquery/tests/unit/test_client.py @@ -2028,6 +2028,7 @@ def test_update_dataset(self): LABELS = {"priority": "high"} ACCESS = [{"role": "OWNER", "userByEmail": "phred@example.com"}] EXP = 17 + RESOURCE_TAGS = {"123456789012/key": "value"} RESOURCE = { "datasetReference": {"projectId": self.PROJECT, "datasetId": self.DS_ID}, "etag": "etag", @@ -2037,6 +2038,7 @@ def test_update_dataset(self): "defaultTableExpirationMs": EXP, "labels": LABELS, "access": ACCESS, + "resourceTags": RESOURCE_TAGS, } creds = _make_credentials() client = self._make_one(project=self.PROJECT, credentials=creds) @@ -2048,12 +2050,14 @@ def test_update_dataset(self): ds.default_table_expiration_ms = EXP ds.labels = LABELS ds.access_entries = [AccessEntry("OWNER", "userByEmail", "phred@example.com")] + ds.resource_tags = RESOURCE_TAGS fields = [ "description", "friendly_name", "location", "labels", "access_entries", + "resource_tags", ] with mock.patch( @@ -2077,6 +2081,7 @@ def test_update_dataset(self): "location": LOCATION, "labels": LABELS, "access": ACCESS, + "resourceTags": RESOURCE_TAGS, }, path="/" + PATH, timeout=7.5, @@ -2086,6 +2091,7 @@ def test_update_dataset(self): self.assertEqual(ds2.location, ds.location) self.assertEqual(ds2.labels, ds.labels) self.assertEqual(ds2.access_entries, ds.access_entries) + self.assertEqual(ds2.resource_tags, ds.resource_tags) # ETag becomes If-Match header. ds._properties["etag"] = "etag" diff --git a/packages/google-cloud-bigquery/tests/unit/test_create_dataset.py b/packages/google-cloud-bigquery/tests/unit/test_create_dataset.py index a2491a8121ca..bd7c6a8f8810 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_create_dataset.py +++ b/packages/google-cloud-bigquery/tests/unit/test_create_dataset.py @@ -65,6 +65,7 @@ def test_create_dataset_w_attrs(client, PROJECT, DS_ID): "tableId": "northern-hemisphere", } DEFAULT_ROUNDING_MODE = "ROUND_HALF_EVEN" + RESOURCE_TAGS = {"123456789012/foo": "bar"} RESOURCE = { "datasetReference": {"projectId": PROJECT, "datasetId": DS_ID}, "etag": "etag", @@ -76,6 +77,7 @@ def test_create_dataset_w_attrs(client, PROJECT, DS_ID): "labels": LABELS, "access": [{"role": "OWNER", "userByEmail": USER_EMAIL}, {"view": VIEW}], "defaultRoundingMode": DEFAULT_ROUNDING_MODE, + "resourceTags": RESOURCE_TAGS, } conn = client._connection = make_connection(RESOURCE) entries = [ @@ -91,6 +93,7 @@ def test_create_dataset_w_attrs(client, PROJECT, DS_ID): before.default_table_expiration_ms = 3600 before.location = LOCATION before.labels = LABELS + before.resource_tags = RESOURCE_TAGS before.default_rounding_mode = DEFAULT_ROUNDING_MODE after = client.create_dataset(before) assert after.dataset_id == DS_ID @@ -103,6 +106,7 @@ def test_create_dataset_w_attrs(client, PROJECT, DS_ID): assert after.default_table_expiration_ms == 3600 assert after.labels == LABELS assert after.default_rounding_mode == DEFAULT_ROUNDING_MODE + assert after.resource_tags == RESOURCE_TAGS conn.api_request.assert_called_once_with( method="POST", @@ -119,6 +123,7 @@ def test_create_dataset_w_attrs(client, PROJECT, DS_ID): {"view": VIEW, "role": None}, ], "labels": LABELS, + "resourceTags": RESOURCE_TAGS, }, timeout=DEFAULT_TIMEOUT, ) diff --git a/packages/google-cloud-bigquery/tests/unit/test_dataset.py b/packages/google-cloud-bigquery/tests/unit/test_dataset.py index c0164bc738a4..46bcd6611358 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_dataset.py +++ b/packages/google-cloud-bigquery/tests/unit/test_dataset.py @@ -894,6 +894,28 @@ def test_location_setter(self): dataset.location = "LOCATION" self.assertEqual(dataset.location, "LOCATION") + def test_resource_tags_update_in_place(self): + dataset = self._make_one(self.DS_REF) + tags = dataset.resource_tags + tags["123456789012/foo"] = "bar" # update in place + self.assertEqual(dataset.resource_tags, {"123456789012/foo": "bar"}) + + def test_resource_tags_setter(self): + dataset = self._make_one(self.DS_REF) + dataset.resource_tags = {"123456789012/foo": "bar"} + self.assertEqual(dataset.resource_tags, {"123456789012/foo": "bar"}) + + def test_resource_tags_setter_bad_value(self): + dataset = self._make_one(self.DS_REF) + with self.assertRaises(ValueError): + dataset.resource_tags = "invalid" + with self.assertRaises(ValueError): + dataset.resource_tags = 123 + + def test_resource_tags_getter_missing_value(self): + dataset = self._make_one(self.DS_REF) + self.assertEqual(dataset.resource_tags, {}) + def test_labels_update_in_place(self): dataset = self._make_one(self.DS_REF) del dataset._properties["labels"] # don't start w/ existing dict From 5d75578df8e525486574ae56c7d2424db3574e1b Mon Sep 17 00:00:00 2001 From: Chalmer Lowe Date: Tue, 14 Jan 2025 23:14:02 -0500 Subject: [PATCH 1878/2016] feat: Adds ForeignTypeInfo class and tests (#2110) * Adds ForeignTypeInfo class and tests * Tweak to docstring * minor adjustment in test to enhance code coverage * Updates spacing in docstrings * More updates to spacing in docstrings. --- .../google/cloud/bigquery/schema.py | 64 ++++++++++++++++- .../tests/unit/test_schema.py | 71 ++++++++++++++++++- 2 files changed, 131 insertions(+), 4 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/schema.py b/packages/google-cloud-bigquery/google/cloud/bigquery/schema.py index 8d62b2b5b864..b278b686a21c 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/schema.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/schema.py @@ -560,6 +560,63 @@ def to_api_repr(self) -> dict: return answer +class ForeignTypeInfo: + """Metadata about the foreign data type definition such as the system in which the + type is defined. + + Args: + type_system (str): Required. Specifies the system which defines the + foreign data type. + + TypeSystem enum currently includes: + * "TYPE_SYSTEM_UNSPECIFIED" + * "HIVE" + """ + + def __init__(self, type_system: Optional[str] = None): + self._properties: Dict[str, Any] = {} + self.type_system = type_system + + @property + def type_system(self) -> Optional[str]: + """Required. Specifies the system which defines the foreign data + type.""" + + return self._properties.get("typeSystem") + + @type_system.setter + def type_system(self, value: Optional[str]): + value = _helpers._isinstance_or_raise(value, str, none_allowed=True) + self._properties["typeSystem"] = value + + def to_api_repr(self) -> dict: + """Build an API representation of this object. + + Returns: + Dict[str, Any]: + A dictionary in the format used by the BigQuery API. + """ + + return self._properties + + @classmethod + def from_api_repr(cls, api_repr: Dict[str, Any]) -> "ForeignTypeInfo": + """Factory: constructs an instance of the class (cls) + given its API representation. + + Args: + api_repr (Dict[str, Any]): + API representation of the object to be instantiated. + + Returns: + An instance of the class initialized with data from 'api_repr'. + """ + + config = cls() + config._properties = api_repr + return config + + class SerDeInfo: """Serializer and deserializer information. @@ -625,6 +682,7 @@ def parameters(self, value: Optional[dict[str, str]] = None): def to_api_repr(self) -> dict: """Build an API representation of this object. + Returns: Dict[str, Any]: A dictionary in the format used by the BigQuery API. @@ -635,11 +693,13 @@ def to_api_repr(self) -> dict: def from_api_repr(cls, api_repr: dict) -> SerDeInfo: """Factory: constructs an instance of the class (cls) given its API representation. + Args: - resource (Dict[str, Any]): + api_repr (Dict[str, Any]): API representation of the object to be instantiated. + Returns: - An instance of the class initialized with data from 'resource'. + An instance of the class initialized with data from 'api_repr'. """ config = cls("PLACEHOLDER") config._properties = api_repr diff --git a/packages/google-cloud-bigquery/tests/unit/test_schema.py b/packages/google-cloud-bigquery/tests/unit/test_schema.py index 7e84dd63f2d8..efbc5d26fdcb 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_schema.py +++ b/packages/google-cloud-bigquery/tests/unit/test_schema.py @@ -1128,6 +1128,73 @@ def test_to_api_repr_parameterized(field, api): assert SchemaField(**field).to_api_repr() == api +class TestForeignTypeInfo: + """Tests for ForeignTypeInfo objects.""" + + @staticmethod + def _get_target_class(): + from google.cloud.bigquery.schema import ForeignTypeInfo + + return ForeignTypeInfo + + def _make_one(self, *args, **kw): + return self._get_target_class()(*args, **kw) + + @pytest.mark.parametrize( + "type_system,expected", + [ + (None, None), + ("TYPE_SYSTEM_UNSPECIFIED", "TYPE_SYSTEM_UNSPECIFIED"), + ("HIVE", "HIVE"), + ], + ) + def test_ctor_valid_input(self, type_system, expected): + result = self._make_one(type_system=type_system) + + assert result.type_system == expected + + def test_ctor_invalid_input(self): + with pytest.raises(TypeError) as e: + self._make_one(type_system=123) + + # Looking for the first word from the string "Pass as..." + assert "Pass " in str(e.value) + + @pytest.mark.parametrize( + "type_system,expected", + [ + ("TYPE_SYSTEM_UNSPECIFIED", {"typeSystem": "TYPE_SYSTEM_UNSPECIFIED"}), + ("HIVE", {"typeSystem": "HIVE"}), + (None, {"typeSystem": None}), + ], + ) + def test_to_api_repr(self, type_system, expected): + result = self._make_one(type_system=type_system) + + assert result.to_api_repr() == expected + + def test_from_api_repr(self): + """GIVEN an api representation of a ForeignTypeInfo object (i.e. api_repr) + WHEN converted into a ForeignTypeInfo object using from_api_repr() + THEN it will have the same representation in dict format as a ForeignTypeInfo + object made directly (via _make_one()) and represented in dict format. + """ + api_repr = { + "typeSystem": "TYPE_SYSTEM_UNSPECIFIED", + } + + expected = self._make_one( + type_system="TYPE_SYSTEM_UNSPECIFIED", + ) + + klass = self._get_target_class() + result = klass.from_api_repr(api_repr) + + # We convert both to dict format because these classes do not have a + # __eq__() method to facilitate direct equality comparisons. + assert result.to_api_repr() == expected.to_api_repr() + + class TestSerDeInfo: """Tests for the SerDeInfo class.""" @@ -1190,9 +1257,9 @@ def test_to_api_repr(self): assert serde_info.to_api_repr() == expected_repr def test_from_api_repr(self): - """GIVEN an api representation of a SerDeInfo object (i.e. resource) + """GIVEN an api representation of a SerDeInfo object (i.e. api_repr) WHEN converted into a SerDeInfo object using from_api_repr() - THEN it will have the representation in dict format as a SerDeInfo + THEN it will have the same representation in dict format as a SerDeInfo object made directly (via _make_one()) and represented in dict format. """ api_repr = { From 32ea052f43f8292075699b42134567974334f1fc Mon Sep 17 00:00:00 2001 From: Chalmer Lowe Date: Wed, 15 Jan 2025 12:44:27 -0500 Subject: [PATCH 1879/2016] feat: adds ExternalCatalogDatasetOptions and tests (#2111) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * feat: adds ExternalCatalogDatasetOptions and tests * Update google/cloud/bigquery/dataset.py Co-authored-by: Tim Sweña (Swast) * Update google/cloud/bigquery/dataset.py Co-authored-by: Tim Sweña (Swast) * 🦉 Updates from OwlBot post-processor See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md --------- Co-authored-by: Tim Sweña (Swast) Co-authored-by: Owl Bot --- .../google/cloud/bigquery/dataset.py | 25 ++++++ .../google/cloud/bigquery/external_config.py | 76 +++++++++++++++- .../tests/unit/test_dataset.py | 84 +++++++++++++++++ .../tests/unit/test_external_config.py | 89 +++++++++++++++++++ 4 files changed, 273 insertions(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/dataset.py b/packages/google-cloud-bigquery/google/cloud/bigquery/dataset.py index 4d06d729d072..15a11fb40a69 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/dataset.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/dataset.py @@ -27,6 +27,7 @@ from google.cloud.bigquery.routine import Routine, RoutineReference from google.cloud.bigquery.table import Table, TableReference from google.cloud.bigquery.encryption_configuration import EncryptionConfiguration +from google.cloud.bigquery import external_config from typing import Optional, List, Dict, Any, Union @@ -531,6 +532,7 @@ class Dataset(object): "max_time_travel_hours": "maxTimeTravelHours", "default_rounding_mode": "defaultRoundingMode", "resource_tags": "resourceTags", + "external_catalog_dataset_options": "externalCatalogDatasetOptions", } def __init__(self, dataset_ref) -> None: @@ -898,6 +900,29 @@ def storage_billing_model(self, value): ) self._properties["storageBillingModel"] = value + @property + def external_catalog_dataset_options(self): + """Options defining open source compatible datasets living in the + BigQuery catalog. Contains metadata of open source database, schema + or namespace represented by the current dataset.""" + + prop = _helpers._get_sub_prop( + self._properties, ["externalCatalogDatasetOptions"] + ) + + if prop is not None: + prop = external_config.ExternalCatalogDatasetOptions.from_api_repr(prop) + return prop + + @external_catalog_dataset_options.setter + def external_catalog_dataset_options(self, value): + value = _helpers._isinstance_or_raise( + value, external_config.ExternalCatalogDatasetOptions, none_allowed=True + ) + self._properties[ + self._PROPERTY_TO_API_FIELD["external_catalog_dataset_options"] + ] = (value.to_api_repr() if value is not None else None) + @classmethod def from_string(cls, full_dataset_id: str) -> "Dataset": """Construct a dataset from fully-qualified dataset ID. diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/external_config.py b/packages/google-cloud-bigquery/google/cloud/bigquery/external_config.py index a891bc2327ab..7f2b58f2b80b 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/external_config.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/external_config.py @@ -18,7 +18,7 @@ Job.configuration.query.tableDefinitions. """ -from __future__ import absolute_import +from __future__ import absolute_import, annotations import base64 import copy @@ -28,6 +28,7 @@ from google.cloud.bigquery._helpers import _bytes_to_json from google.cloud.bigquery._helpers import _int_or_none from google.cloud.bigquery._helpers import _str_or_none +from google.cloud.bigquery import _helpers from google.cloud.bigquery.format_options import AvroOptions, ParquetOptions from google.cloud.bigquery.schema import SchemaField @@ -1003,3 +1004,76 @@ def from_api_repr(cls, resource: dict) -> "ExternalConfig": config = cls(resource["sourceFormat"]) config._properties = copy.deepcopy(resource) return config + + +class ExternalCatalogDatasetOptions: + """Options defining open source compatible datasets living in the BigQuery catalog. + Contains metadata of open source database, schema or namespace represented + by the current dataset. + + Args: + default_storage_location_uri (Optional[str]): The storage location URI for all + tables in the dataset. Equivalent to hive metastore's database + locationUri. Maximum length of 1024 characters. (str) + parameters (Optional[dict[str, Any]]): A map of key value pairs defining the parameters + and properties of the open source schema. Maximum size of 2Mib. + """ + + def __init__( + self, + default_storage_location_uri: Optional[str] = None, + parameters: Optional[Dict[str, Any]] = None, + ): + self._properties: Dict[str, Any] = {} + self.default_storage_location_uri = default_storage_location_uri + self.parameters = parameters + + @property + def default_storage_location_uri(self) -> Optional[str]: + """Optional. The storage location URI for all tables in the dataset. + Equivalent to hive metastore's database locationUri. Maximum length of + 1024 characters.""" + + return self._properties.get("defaultStorageLocationUri") + + @default_storage_location_uri.setter + def default_storage_location_uri(self, value: Optional[str]): + value = _helpers._isinstance_or_raise(value, str, none_allowed=True) + self._properties["defaultStorageLocationUri"] = value + + @property + def parameters(self) -> Optional[Dict[str, Any]]: + """Optional. A map of key value pairs defining the parameters and + properties of the open source schema. Maximum size of 2Mib.""" + + return self._properties.get("parameters") + + @parameters.setter + def parameters(self, value: Optional[Dict[str, Any]]): + value = _helpers._isinstance_or_raise(value, dict, none_allowed=True) + self._properties["parameters"] = value + + def to_api_repr(self) -> dict: + """Build an API representation of this object. + + Returns: + Dict[str, Any]: + A dictionary in the format used by the BigQuery API. + """ + return self._properties + + @classmethod + def from_api_repr(cls, api_repr: dict) -> ExternalCatalogDatasetOptions: + """Factory: constructs an instance of the class (cls) + given its API representation. + + Args: + api_repr (Dict[str, Any]): + API representation of the object to be instantiated. + + Returns: + An instance of the class initialized with data from 'resource'. + """ + config = cls() + config._properties = api_repr + return config diff --git a/packages/google-cloud-bigquery/tests/unit/test_dataset.py b/packages/google-cloud-bigquery/tests/unit/test_dataset.py index 46bcd6611358..8ab8dffec16f 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_dataset.py +++ b/packages/google-cloud-bigquery/tests/unit/test_dataset.py @@ -650,6 +650,16 @@ class TestDataset(unittest.TestCase): DS_ID = "dataset-id" DS_REF = DatasetReference(PROJECT, DS_ID) KMS_KEY_NAME = "projects/1/locations/us/keyRings/1/cryptoKeys/1" + DEFAULT_STORAGE_LOCATION_URI = "gs://test-bucket/test-path" + PARAMETERS = {"key": "value"} + API_REPR = { + "datasetReference": {"projectId": "project", "datasetId": "dataset-id"}, + "labels": {}, + "externalCatalogDatasetOptions": { + "defaultStorageLocationUri": DEFAULT_STORAGE_LOCATION_URI, + "parameters": PARAMETERS, + }, + } @staticmethod def _get_target_class(): @@ -1067,6 +1077,80 @@ def test___repr__(self): expected = "Dataset(DatasetReference('project1', 'dataset1'))" self.assertEqual(repr(dataset), expected) + def test_external_catalog_dataset_options_setter(self): + # GIVEN the parameters DEFAULT_STORAGE_LOCATION_URI and PARAMETERS + # WHEN an ExternalCatalogDatasetOptions obj is created + # and added to a dataset. + # THEN the api representation of the dataset will match API_REPR + + from google.cloud.bigquery.external_config import ExternalCatalogDatasetOptions + + dataset = self._make_one(self.DS_REF) + + ecdo_obj = ExternalCatalogDatasetOptions( + default_storage_location_uri=self.DEFAULT_STORAGE_LOCATION_URI, + parameters=self.PARAMETERS, + ) + dataset.external_catalog_dataset_options = ecdo_obj + + result = dataset.to_api_repr() + expected = self.API_REPR + assert result == expected + + def test_external_catalog_dataset_options_getter_prop_exists(self): + # GIVEN default dataset PLUS an ExternalCatalogDatasetOptions + # THEN confirm that the api_repr of the ExternalCatalogDatasetsOptions + # matches the api_repr of the external_catalog_dataset_options attribute. + + from google.cloud.bigquery.external_config import ExternalCatalogDatasetOptions + + dataset = self._make_one(self.DS_REF) + ecdo_obj = ExternalCatalogDatasetOptions( + default_storage_location_uri=self.DEFAULT_STORAGE_LOCATION_URI, + parameters=self.PARAMETERS, + ) + dataset.external_catalog_dataset_options = ecdo_obj + result = dataset.external_catalog_dataset_options.to_api_repr() + expected = ecdo_obj.to_api_repr() + assert result == expected + + def test_external_catalog_dataset_options_getter_prop_is_none(self): + # GIVEN only a default dataset + # THEN confirm that external_catalog_dataset_options is None + + dataset = self._make_one(self.DS_REF) + expected = None + result = dataset.external_catalog_dataset_options + assert result == expected + + def test_external_catalog_dataset_options_from_api_repr(self): + # GIVEN default dataset including an ExternalCatalogDatasetOptions + # THEN confirm that the api_repr of the ExternalCatalogDatasetsOptions + # on a dataset object created via from_api_repr matches the api_repr + # of the "externalCatalogDatasetOptions" key. + + api_repr = self.API_REPR + klass = self._get_target_class() + dataset = klass.from_api_repr(api_repr) + + result = dataset.external_catalog_dataset_options.to_api_repr() + expected = api_repr["externalCatalogDatasetOptions"] + assert result == expected + + def test_external_catalog_dataset_options_to_api_repr(self): + # GIVEN a dataset api_repr including an ExternalCatalogDatasetOptions key + # THEN confirm that the api_repr of that key from a dataset object created + # via the to_api_repr() method matches the value of the key + # used to create the dataset object + + api_repr = self.API_REPR + klass = self._get_target_class() + dataset = klass.from_api_repr(api_repr) + + result = dataset.to_api_repr()["externalCatalogDatasetOptions"] + expected = api_repr["externalCatalogDatasetOptions"] + assert result == expected + class TestDatasetListItem(unittest.TestCase): @staticmethod diff --git a/packages/google-cloud-bigquery/tests/unit/test_external_config.py b/packages/google-cloud-bigquery/tests/unit/test_external_config.py index 9fd16e69967c..0c27d8e56190 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_external_config.py +++ b/packages/google-cloud-bigquery/tests/unit/test_external_config.py @@ -19,6 +19,8 @@ from google.cloud.bigquery import external_config from google.cloud.bigquery import schema +import pytest + class TestExternalConfig(unittest.TestCase): SOURCE_URIS = ["gs://foo", "gs://bar"] @@ -890,3 +892,90 @@ def _copy_and_update(d, u): d = copy.deepcopy(d) d.update(u) return d + + +class TestExternalCatalogDatasetOptions: + @staticmethod + def _get_target_class(): + from google.cloud.bigquery.external_config import ExternalCatalogDatasetOptions + + return ExternalCatalogDatasetOptions + + def _make_one(self, *args, **kw): + return self._get_target_class()(*args, **kw) + + DEFAULT_STORAGE_LOCATION_URI = "gs://test-bucket/test-path" + PARAMETERS = {"key": "value"} + + @pytest.mark.parametrize( + "default_storage_location_uri,parameters", + [ + (DEFAULT_STORAGE_LOCATION_URI, PARAMETERS), # set all params + (DEFAULT_STORAGE_LOCATION_URI, None), # set only one argument at a time + (None, PARAMETERS), + (None, None), # use default parameters + ], + ) + def test_ctor_initialization( + self, + default_storage_location_uri, + parameters, + ): + """Test ExternalCatalogDatasetOptions constructor with explicit values.""" + + instance = self._make_one( + default_storage_location_uri=default_storage_location_uri, + parameters=parameters, + ) + + assert instance.default_storage_location_uri == default_storage_location_uri + assert instance.parameters == parameters + + @pytest.mark.parametrize( + "default_storage_location_uri,parameters", + [ + (123, None), # does not accept integers + (None, 123), + ], + ) + def test_ctor_invalid_input(self, default_storage_location_uri, parameters): + """Test ExternalCatalogDatasetOptions constructor with invalid input.""" + + with pytest.raises(TypeError) as e: + self._make_one( + default_storage_location_uri=default_storage_location_uri, + parameters=parameters, + ) + + # Looking for the first word from the string "Pass as..." + assert "Pass " in str(e.value) + + def test_to_api_repr(self): + """Test ExternalCatalogDatasetOptions.to_api_repr method.""" + + instance = self._make_one( + default_storage_location_uri=self.DEFAULT_STORAGE_LOCATION_URI, + parameters=self.PARAMETERS, + ) + resource = instance.to_api_repr() + assert ( + resource["defaultStorageLocationUri"] == self.DEFAULT_STORAGE_LOCATION_URI + ) + assert resource["parameters"] == self.PARAMETERS + + def test_from_api_repr(self): + """GIVEN an api representation of an ExternalCatalogDatasetOptions object (i.e. api_repr) + WHEN converted into an ExternalCatalogDatasetOptions object using from_api_repr() + THEN it will have the representation in dict format as an ExternalCatalogDatasetOptions + object made directly (via _make_one()) and represented in dict format. + """ + + instance = self._make_one() + api_repr = { + "defaultStorageLocationUri": self.DEFAULT_STORAGE_LOCATION_URI, + "parameters": self.PARAMETERS, + } + result = instance.from_api_repr(api_repr) + + assert isinstance(result, external_config.ExternalCatalogDatasetOptions) + assert result._properties == api_repr From 5dd99fc1b5cba35efff90a46bfeb8be36afd0d2c Mon Sep 17 00:00:00 2001 From: "release-please[bot]" <55107282+release-please[bot]@users.noreply.github.com> Date: Wed, 15 Jan 2025 10:22:48 -0800 Subject: [PATCH 1880/2016] chore(main): release 3.28.0 (#2056) Co-authored-by: release-please[bot] <55107282+release-please[bot]@users.noreply.github.com> --- packages/google-cloud-bigquery/CHANGELOG.md | 28 +++++++++++++++++++ .../google/cloud/bigquery/version.py | 2 +- 2 files changed, 29 insertions(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/CHANGELOG.md b/packages/google-cloud-bigquery/CHANGELOG.md index 989b7f020faa..6a7ff5641cf5 100644 --- a/packages/google-cloud-bigquery/CHANGELOG.md +++ b/packages/google-cloud-bigquery/CHANGELOG.md @@ -5,6 +5,34 @@ [1]: https://pypi.org/project/google-cloud-bigquery/#history +## [3.28.0](https://github.com/googleapis/python-bigquery/compare/v3.27.0...v3.28.0) (2025-01-15) + + +### Features + +* Add property for `allowNonIncrementalDefinition` for materialized view ([#2084](https://github.com/googleapis/python-bigquery/issues/2084)) ([3359ef3](https://github.com/googleapis/python-bigquery/commit/3359ef37b90243bea2d9e68bb996fe5d736f304c)) +* Add property for maxStaleness in table definitions ([#2087](https://github.com/googleapis/python-bigquery/issues/2087)) ([729322c](https://github.com/googleapis/python-bigquery/commit/729322c2288a30464f2f135ba18b9c4aa7d2f0da)) +* Add type hints to Client ([#2044](https://github.com/googleapis/python-bigquery/issues/2044)) ([40529de](https://github.com/googleapis/python-bigquery/commit/40529de923e25c41c6728c121b9c82a042967ada)) +* Adds ExternalCatalogDatasetOptions and tests ([#2111](https://github.com/googleapis/python-bigquery/issues/2111)) ([b929a90](https://github.com/googleapis/python-bigquery/commit/b929a900d49e2c15897134209ed9de5fc7f238cd)) +* Adds ForeignTypeInfo class and tests ([#2110](https://github.com/googleapis/python-bigquery/issues/2110)) ([55ca63c](https://github.com/googleapis/python-bigquery/commit/55ca63c23fcb56573e2de67e4f7899939628c4a1)) +* Adds new input validation function similar to isinstance. ([#2107](https://github.com/googleapis/python-bigquery/issues/2107)) ([a2bebb9](https://github.com/googleapis/python-bigquery/commit/a2bebb95c5ef32ac7c7cbe19c3e7a9412cbee60d)) +* Adds StorageDescriptor and tests ([#2109](https://github.com/googleapis/python-bigquery/issues/2109)) ([6be0272](https://github.com/googleapis/python-bigquery/commit/6be0272ff25dac97a38ae4ee5aa02016dc82a0d8)) +* Adds the SerDeInfo class and tests ([#2108](https://github.com/googleapis/python-bigquery/issues/2108)) ([62960f2](https://github.com/googleapis/python-bigquery/commit/62960f255d05b15940a8d2cdc595592175fada11)) +* Migrate to pyproject.toml ([#2041](https://github.com/googleapis/python-bigquery/issues/2041)) ([1061611](https://github.com/googleapis/python-bigquery/commit/106161180ead01aca1ead909cf06ca559f68666d)) +* Preserve unknown fields from the REST API representation in `SchemaField` ([#2097](https://github.com/googleapis/python-bigquery/issues/2097)) ([aaf1eb8](https://github.com/googleapis/python-bigquery/commit/aaf1eb85ada95ab866be0199812ea7f5c7f50766)) +* Resource tags in dataset ([#2090](https://github.com/googleapis/python-bigquery/issues/2090)) ([3e13016](https://github.com/googleapis/python-bigquery/commit/3e130166f43dcc06704fe90edf9068dfd44842a6)) +* Support setting max_stream_count when fetching query result ([#2051](https://github.com/googleapis/python-bigquery/issues/2051)) ([d461297](https://github.com/googleapis/python-bigquery/commit/d4612979b812d2a835e47200f27a87a66bcb856a)) + + +### Bug Fixes + +* Allow geopandas 1.x ([#2065](https://github.com/googleapis/python-bigquery/issues/2065)) ([f2ab8cb](https://github.com/googleapis/python-bigquery/commit/f2ab8cbfe00d442ad3b40683ecfec320e53b4688)) + + +### Documentation + +* Render fields correctly for update calls ([#2055](https://github.com/googleapis/python-bigquery/issues/2055)) ([a4d9534](https://github.com/googleapis/python-bigquery/commit/a4d9534a900f13ae7355904cda05097d781f27e3)) + ## [3.27.0](https://github.com/googleapis/python-bigquery/compare/v3.26.0...v3.27.0) (2024-11-01) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/version.py b/packages/google-cloud-bigquery/google/cloud/bigquery/version.py index 8f4418777399..7da2c534f2b1 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/version.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/version.py @@ -12,4 +12,4 @@ # See the License for the specific language governing permissions and # limitations under the License. -__version__ = "3.27.0" +__version__ = "3.28.0" From 2ab96025dd42618ff4a25392dd5c8becf9b12a39 Mon Sep 17 00:00:00 2001 From: Lingqing Gan Date: Fri, 17 Jan 2025 10:24:06 -0800 Subject: [PATCH 1881/2016] fix: add default value in SchemaField.from_api_repr() (#2115) --- .../google-cloud-bigquery/google/cloud/bigquery/schema.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/schema.py b/packages/google-cloud-bigquery/google/cloud/bigquery/schema.py index b278b686a21c..42dfbfca81ef 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/schema.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/schema.py @@ -228,6 +228,12 @@ def from_api_repr(cls, api_repr: dict) -> "SchemaField": # fields. See https://github.com/googleapis/python-bigquery/issues/6 placeholder._properties = api_repr + # Add the field `mode` with default value if it does not exist. Fixes + # an incompatibility issue with pandas-gbq: + # https://github.com/googleapis/python-bigquery-pandas/issues/854 + if "mode" not in placeholder._properties: + placeholder._properties["mode"] = "NULLABLE" + return placeholder @property From e78a1cb29d659c10a7f59519d40b036e9d6ceb27 Mon Sep 17 00:00:00 2001 From: Chalmer Lowe Date: Tue, 21 Jan 2025 06:04:34 -0500 Subject: [PATCH 1882/2016] feat: add ExternalCatalogTableOptions class and tests (#2116) * Updates most of external_catalog_table_options * Adds ExternalCatalogTableOptions and tests --- .../google/cloud/bigquery/external_config.py | 107 ++++++++++++++ .../google/cloud/bigquery/magics/magics.py | 2 +- .../google/cloud/bigquery/table.py | 35 +++++ .../tests/unit/test_external_config.py | 137 ++++++++++++++++++ .../tests/unit/test_table.py | 87 +++++++++++ 5 files changed, 367 insertions(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/external_config.py b/packages/google-cloud-bigquery/google/cloud/bigquery/external_config.py index 7f2b58f2b80b..73c4acabf446 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/external_config.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/external_config.py @@ -30,6 +30,7 @@ from google.cloud.bigquery._helpers import _str_or_none from google.cloud.bigquery import _helpers from google.cloud.bigquery.format_options import AvroOptions, ParquetOptions +from google.cloud.bigquery import schema from google.cloud.bigquery.schema import SchemaField @@ -1077,3 +1078,109 @@ def from_api_repr(cls, api_repr: dict) -> ExternalCatalogDatasetOptions: config = cls() config._properties = api_repr return config + + +class ExternalCatalogTableOptions: + """Metadata about open source compatible table. The fields contained in these + options correspond to hive metastore's table level properties. + + Args: + connection_id (Optional[str]): The connection specifying the credentials to be + used to read external storage, such as Azure Blob, Cloud Storage, or + S3. The connection is needed to read the open source table from + BigQuery Engine. The connection_id can have the form `..` or + `projects//locations//connections/`. + parameters (Union[Dict[str, Any], None]): A map of key value pairs defining the parameters + and properties of the open source table. Corresponds with hive meta + store table parameters. Maximum size of 4Mib. + storage_descriptor (Optional[StorageDescriptor]): A storage descriptor containing information + about the physical storage of this table. + """ + + def __init__( + self, + connection_id: Optional[str] = None, + parameters: Union[Dict[str, Any], None] = None, + storage_descriptor: Optional[schema.StorageDescriptor] = None, + ): + self._properties: Dict[str, Any] = {} + self.connection_id = connection_id + self.parameters = parameters + self.storage_descriptor = storage_descriptor + + @property + def connection_id(self) -> Optional[str]: + """Optional. The connection specifying the credentials to be + used to read external storage, such as Azure Blob, Cloud Storage, or + S3. The connection is needed to read the open source table from + BigQuery Engine. The connection_id can have the form `..` or + `projects//locations//connections/`. + """ + + return self._properties.get("connectionId") + + @connection_id.setter + def connection_id(self, value: Optional[str]): + value = _helpers._isinstance_or_raise(value, str, none_allowed=True) + self._properties["connectionId"] = value + + @property + def parameters(self) -> Union[Dict[str, Any], None]: + """Optional. A map of key value pairs defining the parameters and + properties of the open source table. Corresponds with hive meta + store table parameters. Maximum size of 4Mib. + """ + + return self._properties.get("parameters") + + @parameters.setter + def parameters(self, value: Union[Dict[str, Any], None]): + value = _helpers._isinstance_or_raise(value, dict, none_allowed=True) + self._properties["parameters"] = value + + @property + def storage_descriptor(self) -> Any: + """Optional. A storage descriptor containing information about the + physical storage of this table.""" + + prop = _helpers._get_sub_prop(self._properties, ["storageDescriptor"]) + + if prop is not None: + return schema.StorageDescriptor.from_api_repr(prop) + return None + + @storage_descriptor.setter + def storage_descriptor(self, value: Union[schema.StorageDescriptor, dict, None]): + value = _helpers._isinstance_or_raise( + value, (schema.StorageDescriptor, dict), none_allowed=True + ) + if isinstance(value, schema.StorageDescriptor): + self._properties["storageDescriptor"] = value.to_api_repr() + else: + self._properties["storageDescriptor"] = value + + def to_api_repr(self) -> dict: + """Build an API representation of this object. + + Returns: + Dict[str, Any]: + A dictionary in the format used by the BigQuery API. + """ + + return self._properties + + @classmethod + def from_api_repr(cls, api_repr: dict) -> ExternalCatalogTableOptions: + """Factory: constructs an instance of the class (cls) + given its API representation. + + Args: + api_repr (Dict[str, Any]): + API representation of the object to be instantiated. + + Returns: + An instance of the class initialized with data from 'api_repr'. + """ + config = cls() + config._properties = api_repr + return config diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/magics/magics.py b/packages/google-cloud-bigquery/google/cloud/bigquery/magics/magics.py index b153d959a0bd..a5be95185f74 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/magics/magics.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/magics/magics.py @@ -56,7 +56,7 @@ bigquery_magics = None -IPYTHON_USER_AGENT = "ipython-{}".format(IPython.__version__) +IPYTHON_USER_AGENT = "ipython-{}".format(IPython.__version__) # type: ignore class Context(object): diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py index 80ab330ba7af..fa8d81962190 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py @@ -69,6 +69,7 @@ from google.cloud.bigquery.schema import _build_schema_resource from google.cloud.bigquery.schema import _parse_schema_resource from google.cloud.bigquery.schema import _to_schema_fields +from google.cloud.bigquery import external_config if typing.TYPE_CHECKING: # pragma: NO COVER # Unconditionally import optional dependencies again to tell pytype that @@ -408,6 +409,7 @@ class Table(_TableBase): "require_partition_filter": "requirePartitionFilter", "table_constraints": "tableConstraints", "max_staleness": "maxStaleness", + "external_catalog_table_options": "externalCatalogTableOptions", } def __init__(self, table_ref, schema=None) -> None: @@ -1023,6 +1025,39 @@ def table_constraints(self) -> Optional["TableConstraints"]: table_constraints = TableConstraints.from_api_repr(table_constraints) return table_constraints + @property + def external_catalog_table_options( + self, + ) -> Optional[external_config.ExternalCatalogTableOptions]: + """Options defining open source compatible datasets living in the + BigQuery catalog. Contains metadata of open source database, schema + or namespace represented by the current dataset.""" + + prop = self._properties.get( + self._PROPERTY_TO_API_FIELD["external_catalog_table_options"] + ) + if prop is not None: + return external_config.ExternalCatalogTableOptions.from_api_repr(prop) + return None + + @external_catalog_table_options.setter + def external_catalog_table_options( + self, value: Union[external_config.ExternalCatalogTableOptions, dict, None] + ): + value = _helpers._isinstance_or_raise( + value, + (external_config.ExternalCatalogTableOptions, dict), + none_allowed=True, + ) + if isinstance(value, external_config.ExternalCatalogTableOptions): + self._properties[ + self._PROPERTY_TO_API_FIELD["external_catalog_table_options"] + ] = value.to_api_repr() + else: + self._properties[ + self._PROPERTY_TO_API_FIELD["external_catalog_table_options"] + ] = value + @classmethod def from_string(cls, full_table_id: str) -> "Table": """Construct a table from fully-qualified table ID. diff --git a/packages/google-cloud-bigquery/tests/unit/test_external_config.py b/packages/google-cloud-bigquery/tests/unit/test_external_config.py index 0c27d8e56190..7f84a9f5bef2 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_external_config.py +++ b/packages/google-cloud-bigquery/tests/unit/test_external_config.py @@ -14,6 +14,7 @@ import base64 import copy +from typing import Any, Dict, Optional import unittest from google.cloud.bigquery import external_config @@ -979,3 +980,139 @@ def test_from_api_repr(self): assert isinstance(result, external_config.ExternalCatalogDatasetOptions) assert result._properties == api_repr + + +class TestExternalCatalogTableOptions: + @staticmethod + def _get_target_class(): + from google.cloud.bigquery.external_config import ExternalCatalogTableOptions + + return ExternalCatalogTableOptions + + def _make_one(self, *args, **kw): + return self._get_target_class()(*args, **kw) + + storage_descriptor_repr = { + "inputFormat": "testpath.to.OrcInputFormat", + "locationUri": "gs://test/path/", + "outputFormat": "testpath.to.OrcOutputFormat", + "serDeInfo": { + "serializationLibrary": "testpath.to.LazySimpleSerDe", + "name": "serde_lib_name", + "parameters": {"key": "value"}, + }, + } + + CONNECTIONID = "connection123" + PARAMETERS = {"key": "value"} + STORAGEDESCRIPTOR = schema.StorageDescriptor.from_api_repr(storage_descriptor_repr) + EXTERNALCATALOGTABLEOPTIONS = { + "connectionId": "connection123", + "parameters": {"key": "value"}, + "storageDescriptor": STORAGEDESCRIPTOR.to_api_repr(), + } + + @pytest.mark.parametrize( + "connection_id,parameters,storage_descriptor", + [ + ( + CONNECTIONID, + PARAMETERS, + STORAGEDESCRIPTOR, + ), # set all parameters at once + (CONNECTIONID, None, None), # set only one parameter at a time + (None, PARAMETERS, None), + (None, None, STORAGEDESCRIPTOR), # set storage descriptor using obj + (None, None, storage_descriptor_repr), # set storage descriptor using dict + (None, None, None), # use default parameters + ], + ) + def test_ctor_initialization( + self, + connection_id, + parameters, + storage_descriptor, + ): + instance = self._make_one( + connection_id=connection_id, + parameters=parameters, + storage_descriptor=storage_descriptor, + ) + + assert instance.connection_id == connection_id + assert instance.parameters == parameters + + if isinstance(storage_descriptor, schema.StorageDescriptor): + assert ( + instance.storage_descriptor.to_api_repr() + == storage_descriptor.to_api_repr() + ) + elif isinstance(storage_descriptor, dict): + assert instance.storage_descriptor.to_api_repr() == storage_descriptor + else: + assert instance.storage_descriptor is None + + @pytest.mark.parametrize( + "connection_id,parameters,storage_descriptor", + [ + pytest.param( + 123, + PARAMETERS, + STORAGEDESCRIPTOR, + id="connection_id-invalid-type", + ), + pytest.param( + CONNECTIONID, + 123, + STORAGEDESCRIPTOR, + id="parameters-invalid-type", + ), + pytest.param( + CONNECTIONID, + PARAMETERS, + 123, + id="storage_descriptor-invalid-type", + ), + ], + ) + def test_ctor_invalid_input( + self, + connection_id: str, + parameters: Dict[str, Any], + storage_descriptor: Optional[schema.StorageDescriptor], + ): + with pytest.raises(TypeError) as e: + external_config.ExternalCatalogTableOptions( + connection_id=connection_id, + parameters=parameters, + storage_descriptor=storage_descriptor, + ) + + # Looking for the first word from the string "Pass as..." + assert "Pass " in str(e.value) + + def test_to_api_repr(self): + instance = self._make_one( + connection_id=self.CONNECTIONID, + parameters=self.PARAMETERS, + storage_descriptor=self.STORAGEDESCRIPTOR, + ) + + result = instance.to_api_repr() + expected = self.EXTERNALCATALOGTABLEOPTIONS + + assert result == expected + + def test_from_api_repr(self): + result = self._make_one( + connection_id=self.CONNECTIONID, + parameters=self.PARAMETERS, + storage_descriptor=self.STORAGEDESCRIPTOR, + ) + + instance = self._make_one() + api_repr = self.EXTERNALCATALOGTABLEOPTIONS + result = instance.from_api_repr(api_repr) + + assert isinstance(result, external_config.ExternalCatalogTableOptions) + assert result._properties == api_repr diff --git a/packages/google-cloud-bigquery/tests/unit/test_table.py b/packages/google-cloud-bigquery/tests/unit/test_table.py index e9d461e9d9c7..de8b331f50ba 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_table.py +++ b/packages/google-cloud-bigquery/tests/unit/test_table.py @@ -30,6 +30,7 @@ from google.cloud.bigquery import _versions_helpers from google.cloud.bigquery import exceptions +from google.cloud.bigquery import external_config from google.cloud.bigquery.table import TableReference from google.cloud.bigquery.dataset import DatasetReference @@ -5879,6 +5880,92 @@ def test_from_api_repr_only_foreign_keys_resource(self): self.assertIsNotNone(instance.foreign_keys) +class TestExternalCatalogTableOptions: + PROJECT = "test-project" + DATASET_ID = "test_dataset" + TABLE_ID = "coffee_table" + DATASET = DatasetReference(PROJECT, DATASET_ID) + TABLEREF = DATASET.table(TABLE_ID) + + @staticmethod + def _get_target_class(self): + from google.cloud.bigquery.table import Table + + return Table + + def _make_one(self, *args, **kw): + return self._get_target_class(self)(*args, **kw) + + EXTERNALCATALOGTABLEOPTIONS = { + "connection_id": "connection123", + "parameters": {"key": "value"}, + "storage_descriptor": { + "input_format": "testpath.to.OrcInputFormat", + "location_uri": "gs://test/path/", + "output_format": "testpath.to.OrcOutputFormat", + "serde_info": { + "serialization_library": "testpath.to.LazySimpleSerDe", + "name": "serde_lib_name", + "parameters": {"key": "value"}, + }, + }, + } + + def test_external_catalog_table_options_default_initialization(self): + table = self._make_one(self.TABLEREF) + + assert table.external_catalog_table_options is None + + def test_external_catalog_table_options_valid_inputs(self): + table = self._make_one(self.TABLEREF) + + # supplied in api_repr format + table.external_catalog_table_options = self.EXTERNALCATALOGTABLEOPTIONS + result = table.external_catalog_table_options.to_api_repr() + expected = self.EXTERNALCATALOGTABLEOPTIONS + assert result == expected + + # supplied in obj format + ecto = external_config.ExternalCatalogTableOptions.from_api_repr( + self.EXTERNALCATALOGTABLEOPTIONS + ) + assert isinstance(ecto, external_config.ExternalCatalogTableOptions) + + table.external_catalog_table_options = ecto + result = table.external_catalog_table_options.to_api_repr() + expected = self.EXTERNALCATALOGTABLEOPTIONS + assert result == expected + + def test_external_catalog_table_options_invalid_input(self): + table = self._make_one(self.TABLEREF) + + # invalid on the whole + with pytest.raises(TypeError) as e: + table.external_catalog_table_options = 123 + + # Looking for the first word from the string "Pass as..." + assert "Pass " in str(e.value) + + def test_external_catalog_table_options_to_api_repr(self): + table = self._make_one(self.TABLEREF) + + table.external_catalog_table_options = self.EXTERNALCATALOGTABLEOPTIONS + result = table.external_catalog_table_options.to_api_repr() + expected = self.EXTERNALCATALOGTABLEOPTIONS + assert result == expected + + def test_external_catalog_table_options_from_api_repr(self): + table = self._make_one(self.TABLEREF) + + table.external_catalog_table_options = self.EXTERNALCATALOGTABLEOPTIONS + ecto = external_config.ExternalCatalogTableOptions.from_api_repr( + self.EXTERNALCATALOGTABLEOPTIONS + ) + result = ecto.to_api_repr() + expected = self.EXTERNALCATALOGTABLEOPTIONS + assert result == expected + + @pytest.mark.parametrize( "table_path", ( From 8632ff20f713c347fb56077ac6fbf713f4433480 Mon Sep 17 00:00:00 2001 From: "release-please[bot]" <55107282+release-please[bot]@users.noreply.github.com> Date: Tue, 21 Jan 2025 10:08:46 -0800 Subject: [PATCH 1883/2016] chore(main): release 3.29.0 (#2117) Co-authored-by: release-please[bot] <55107282+release-please[bot]@users.noreply.github.com> --- packages/google-cloud-bigquery/CHANGELOG.md | 12 ++++++++++++ .../google/cloud/bigquery/version.py | 2 +- 2 files changed, 13 insertions(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/CHANGELOG.md b/packages/google-cloud-bigquery/CHANGELOG.md index 6a7ff5641cf5..45c39e19c8fe 100644 --- a/packages/google-cloud-bigquery/CHANGELOG.md +++ b/packages/google-cloud-bigquery/CHANGELOG.md @@ -5,6 +5,18 @@ [1]: https://pypi.org/project/google-cloud-bigquery/#history +## [3.29.0](https://github.com/googleapis/python-bigquery/compare/v3.28.0...v3.29.0) (2025-01-21) + + +### Features + +* Add ExternalCatalogTableOptions class and tests ([#2116](https://github.com/googleapis/python-bigquery/issues/2116)) ([cdc1a6e](https://github.com/googleapis/python-bigquery/commit/cdc1a6e1623b8305c6a6a1a481b3365e866a073d)) + + +### Bug Fixes + +* Add default value in SchemaField.from_api_repr() ([#2115](https://github.com/googleapis/python-bigquery/issues/2115)) ([7de6822](https://github.com/googleapis/python-bigquery/commit/7de6822e1c556a68cb8d50e90664c094697cca1d)) + ## [3.28.0](https://github.com/googleapis/python-bigquery/compare/v3.27.0...v3.28.0) (2025-01-15) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/version.py b/packages/google-cloud-bigquery/google/cloud/bigquery/version.py index 7da2c534f2b1..3d852b8a3f57 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/version.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/version.py @@ -12,4 +12,4 @@ # See the License for the specific language governing permissions and # limitations under the License. -__version__ = "3.28.0" +__version__ = "3.29.0" From 85ab7b4d3ffefa80e987623545cb0a4ae6995856 Mon Sep 17 00:00:00 2001 From: "Hiroki.H (mahiro)" <56078795+hrkh@users.noreply.github.com> Date: Wed, 22 Jan 2025 04:31:33 +0900 Subject: [PATCH 1884/2016] feat: support resource_tags for table (#2093) * feat: support resource_tags for table * fix: system test for resource tags * fix: typo * fix: unit test * Update tests/unit/test_client.py * Update google/cloud/bigquery/table.py * Update google/cloud/bigquery/table.py * Update google/cloud/bigquery/table.py * fix: append random string suffix to resource tags to prevent test conflicts * Update google/cloud/bigquery/table.py --------- Co-authored-by: Lingqing Gan --- .../google/cloud/bigquery/table.py | 17 +++++++ .../tests/system/test_client.py | 44 ++++++++++++++++++- .../tests/unit/test_client.py | 6 ++- .../tests/unit/test_table.py | 27 ++++++++++++ 4 files changed, 91 insertions(+), 3 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py index fa8d81962190..934a28cfc289 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py @@ -409,6 +409,7 @@ class Table(_TableBase): "require_partition_filter": "requirePartitionFilter", "table_constraints": "tableConstraints", "max_staleness": "maxStaleness", + "resource_tags": "resourceTags", "external_catalog_table_options": "externalCatalogTableOptions", } @@ -1025,6 +1026,22 @@ def table_constraints(self) -> Optional["TableConstraints"]: table_constraints = TableConstraints.from_api_repr(table_constraints) return table_constraints + @property + def resource_tags(self): + """Dict[str, str]: Resource tags for the table. + + See: https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#Table.FIELDS.resource_tags + """ + return self._properties.setdefault( + self._PROPERTY_TO_API_FIELD["resource_tags"], {} + ) + + @resource_tags.setter + def resource_tags(self, value): + if not isinstance(value, dict) and value is not None: + raise ValueError("resource_tags must be a dict or None") + self._properties[self._PROPERTY_TO_API_FIELD["resource_tags"]] = value + @property def external_catalog_table_options( self, diff --git a/packages/google-cloud-bigquery/tests/system/test_client.py b/packages/google-cloud-bigquery/tests/system/test_client.py index c0dd83b12089..30e9f94a30ab 100644 --- a/packages/google-cloud-bigquery/tests/system/test_client.py +++ b/packages/google-cloud-bigquery/tests/system/test_client.py @@ -732,6 +732,16 @@ def test_list_tables(self): def test_update_table(self): dataset = self.temp_dataset(_make_dataset_id("update_table")) + # This creates unique tag keys for each of test runnings for different Python versions + tag_postfix = "".join(random.choices(string.ascii_letters + string.digits, k=4)) + tag_1 = f"owner_{tag_postfix}" + tag_2 = f"classification_{tag_postfix}" + tag_3 = f"env_{tag_postfix}" + + self._create_resource_tag_key_and_values(tag_1, ["Alice", "Bob"]) + self._create_resource_tag_key_and_values(tag_2, ["public"]) + self._create_resource_tag_key_and_values(tag_3, ["dev"]) + TABLE_NAME = "test_table" table_arg = Table(dataset.table(TABLE_NAME), schema=SCHEMA) self.assertFalse(_table_exists(table_arg)) @@ -744,14 +754,25 @@ def test_update_table(self): table.friendly_name = "Friendly" table.description = "Description" table.labels = {"priority": "high", "color": "blue"} + table.resource_tags = { + f"{Config.CLIENT.project}/{tag_1}": "Alice", + f"{Config.CLIENT.project}/{tag_3}": "dev", + } table2 = Config.CLIENT.update_table( - table, ["friendly_name", "description", "labels"] + table, ["friendly_name", "description", "labels", "resource_tags"] ) self.assertEqual(table2.friendly_name, "Friendly") self.assertEqual(table2.description, "Description") self.assertEqual(table2.labels, {"priority": "high", "color": "blue"}) + self.assertEqual( + table2.resource_tags, + { + f"{Config.CLIENT.project}/{tag_1}": "Alice", + f"{Config.CLIENT.project}/{tag_3}": "dev", + }, + ) table2.description = None table2.labels = { @@ -759,9 +780,28 @@ def test_update_table(self): "shape": "circle", # add "priority": None, # delete } - table3 = Config.CLIENT.update_table(table2, ["description", "labels"]) + table2.resource_tags = { + f"{Config.CLIENT.project}/{tag_1}": "Bob", # change + f"{Config.CLIENT.project}/{tag_2}": "public", # add + f"{Config.CLIENT.project}/{tag_3}": None, # delete + } + table3 = Config.CLIENT.update_table( + table2, ["description", "labels", "resource_tags"] + ) self.assertIsNone(table3.description) self.assertEqual(table3.labels, {"color": "green", "shape": "circle"}) + self.assertEqual( + table3.resource_tags, + { + f"{Config.CLIENT.project}/{tag_1}": "Bob", + f"{Config.CLIENT.project}/{tag_2}": "public", + }, + ) + + # Delete resource tag bindings. + table3.resource_tags = None + table4 = Config.CLIENT.update_table(table3, ["resource_tags"]) + self.assertEqual(table4.resource_tags, {}) # If we try to update using table2 again, it will fail because the # previous update changed the ETag. diff --git a/packages/google-cloud-bigquery/tests/unit/test_client.py b/packages/google-cloud-bigquery/tests/unit/test_client.py index 14089b031c4b..462a70bbef96 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_client.py +++ b/packages/google-cloud-bigquery/tests/unit/test_client.py @@ -2320,6 +2320,7 @@ def test_update_table(self): "description": description, "friendlyName": title, "labels": {"x": "y"}, + "resourceTags": {"123456789012/key": "value"}, } ) schema = [ @@ -2343,7 +2344,8 @@ def test_update_table(self): table.description = description table.friendly_name = title table.labels = {"x": "y"} - fields = ["schema", "description", "friendly_name", "labels"] + table.resource_tags = {"123456789012/key": "value"} + fields = ["schema", "description", "friendly_name", "labels", "resource_tags"] with mock.patch( "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" ) as final_attributes: @@ -2375,6 +2377,7 @@ def test_update_table(self): "description": description, "friendlyName": title, "labels": {"x": "y"}, + "resourceTags": {"123456789012/key": "value"}, } conn.api_request.assert_called_once_with( method="PATCH", data=sent, path="/" + path, timeout=7.5 @@ -2383,6 +2386,7 @@ def test_update_table(self): self.assertEqual(updated_table.friendly_name, table.friendly_name) self.assertEqual(updated_table.schema, table.schema) self.assertEqual(updated_table.labels, table.labels) + self.assertEqual(updated_table.resource_tags, table.resource_tags) # ETag becomes If-Match header. table._properties["etag"] = "etag" diff --git a/packages/google-cloud-bigquery/tests/unit/test_table.py b/packages/google-cloud-bigquery/tests/unit/test_table.py index de8b331f50ba..5154f01d819d 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_table.py +++ b/packages/google-cloud-bigquery/tests/unit/test_table.py @@ -1481,6 +1481,33 @@ def test_encryption_configuration_setter(self): table.encryption_configuration = None self.assertIsNone(table.encryption_configuration) + def test_resource_tags_getter_empty(self): + dataset = DatasetReference(self.PROJECT, self.DS_ID) + table_ref = dataset.table(self.TABLE_NAME) + table = self._make_one(table_ref) + self.assertEqual(table.resource_tags, {}) + + def test_resource_tags_update_in_place(self): + dataset = DatasetReference(self.PROJECT, self.DS_ID) + table_ref = dataset.table(self.TABLE_NAME) + table = self._make_one(table_ref) + table.resource_tags["123456789012/key"] = "value" + self.assertEqual(table.resource_tags, {"123456789012/key": "value"}) + + def test_resource_tags_setter(self): + dataset = DatasetReference(self.PROJECT, self.DS_ID) + table_ref = dataset.table(self.TABLE_NAME) + table = self._make_one(table_ref) + table.resource_tags = {"123456789012/key": "value"} + self.assertEqual(table.resource_tags, {"123456789012/key": "value"}) + + def test_resource_tags_setter_bad_value(self): + dataset = DatasetReference(self.PROJECT, self.DS_ID) + table_ref = dataset.table(self.TABLE_NAME) + table = self._make_one(table_ref) + with self.assertRaises(ValueError): + table.resource_tags = 12345 + def test___repr__(self): from google.cloud.bigquery.table import TableReference From fb0f79c0d789f692233c552436531754767f1325 Mon Sep 17 00:00:00 2001 From: "gcf-owl-bot[bot]" <78513119+gcf-owl-bot[bot]@users.noreply.github.com> Date: Mon, 27 Jan 2025 12:13:24 -0800 Subject: [PATCH 1885/2016] chore(python): fix docs publish build (#2113) Source-Link: https://github.com/googleapis/synthtool/commit/bd9ede2fea1b640b7e90d5a1d110e6b300a2b43f Post-Processor: gcr.io/cloud-devrel-public-resources/owlbot-python:latest@sha256:04c35dc5f49f0f503a306397d6d043685f8d2bb822ab515818c4208d7fb2db3a Co-authored-by: Owl Bot Co-authored-by: Lingqing Gan --- .../.github/.OwlBot.lock.yaml | 4 +- .../.kokoro/docker/docs/requirements.in | 1 + .../.kokoro/docker/docs/requirements.txt | 243 +++++++++++++++++- .../.kokoro/publish-docs.sh | 4 - 4 files changed, 237 insertions(+), 15 deletions(-) diff --git a/packages/google-cloud-bigquery/.github/.OwlBot.lock.yaml b/packages/google-cloud-bigquery/.github/.OwlBot.lock.yaml index 10cf433a8b00..4c0027ff1c61 100644 --- a/packages/google-cloud-bigquery/.github/.OwlBot.lock.yaml +++ b/packages/google-cloud-bigquery/.github/.OwlBot.lock.yaml @@ -13,5 +13,5 @@ # limitations under the License. docker: image: gcr.io/cloud-devrel-public-resources/owlbot-python:latest - digest: sha256:8ff1efe878e18bd82a0fb7b70bb86f77e7ab6901fed394440b6135db0ba8d84a -# created: 2025-01-09T12:01:16.422459506Z + digest: sha256:04c35dc5f49f0f503a306397d6d043685f8d2bb822ab515818c4208d7fb2db3a +# created: 2025-01-16T15:24:11.364245182Z diff --git a/packages/google-cloud-bigquery/.kokoro/docker/docs/requirements.in b/packages/google-cloud-bigquery/.kokoro/docker/docs/requirements.in index 816817c672a1..586bd07037ae 100644 --- a/packages/google-cloud-bigquery/.kokoro/docker/docs/requirements.in +++ b/packages/google-cloud-bigquery/.kokoro/docker/docs/requirements.in @@ -1 +1,2 @@ nox +gcp-docuploader diff --git a/packages/google-cloud-bigquery/.kokoro/docker/docs/requirements.txt b/packages/google-cloud-bigquery/.kokoro/docker/docs/requirements.txt index f99a5c4aac7f..a9360a25b707 100644 --- a/packages/google-cloud-bigquery/.kokoro/docker/docs/requirements.txt +++ b/packages/google-cloud-bigquery/.kokoro/docker/docs/requirements.txt @@ -2,16 +2,124 @@ # This file is autogenerated by pip-compile with Python 3.10 # by the following command: # -# pip-compile --allow-unsafe --generate-hashes synthtool/gcp/templates/python_library/.kokoro/docker/docs/requirements.in +# pip-compile --allow-unsafe --generate-hashes requirements.in # -argcomplete==3.5.2 \ - --hash=sha256:036d020d79048a5d525bc63880d7a4b8d1668566b8a76daf1144c0bbe0f63472 \ - --hash=sha256:23146ed7ac4403b70bd6026402468942ceba34a6732255b9edf5b7354f68a6bb +argcomplete==3.5.3 \ + --hash=sha256:2ab2c4a215c59fd6caaff41a869480a23e8f6a5f910b266c1808037f4e375b61 \ + --hash=sha256:c12bf50eded8aebb298c7b7da7a5ff3ee24dffd9f5281867dfe1424b58c55392 # via nox +cachetools==5.5.0 \ + --hash=sha256:02134e8439cdc2ffb62023ce1debca2944c3f289d66bb17ead3ab3dede74b292 \ + --hash=sha256:2cc24fb4cbe39633fb7badd9db9ca6295d766d9c2995f245725a46715d050f2a + # via google-auth +certifi==2024.12.14 \ + --hash=sha256:1275f7a45be9464efc1173084eaa30f866fe2e47d389406136d332ed4967ec56 \ + --hash=sha256:b650d30f370c2b724812bee08008be0c4163b163ddaec3f2546c1caf65f191db + # via requests +charset-normalizer==3.4.1 \ + --hash=sha256:0167ddc8ab6508fe81860a57dd472b2ef4060e8d378f0cc555707126830f2537 \ + --hash=sha256:01732659ba9b5b873fc117534143e4feefecf3b2078b0a6a2e925271bb6f4cfa \ + --hash=sha256:01ad647cdd609225c5350561d084b42ddf732f4eeefe6e678765636791e78b9a \ + --hash=sha256:04432ad9479fa40ec0f387795ddad4437a2b50417c69fa275e212933519ff294 \ + --hash=sha256:0907f11d019260cdc3f94fbdb23ff9125f6b5d1039b76003b5b0ac9d6a6c9d5b \ + --hash=sha256:0924e81d3d5e70f8126529951dac65c1010cdf117bb75eb02dd12339b57749dd \ + --hash=sha256:09b26ae6b1abf0d27570633b2b078a2a20419c99d66fb2823173d73f188ce601 \ + --hash=sha256:09b5e6733cbd160dcc09589227187e242a30a49ca5cefa5a7edd3f9d19ed53fd \ + --hash=sha256:0af291f4fe114be0280cdd29d533696a77b5b49cfde5467176ecab32353395c4 \ + --hash=sha256:0f55e69f030f7163dffe9fd0752b32f070566451afe180f99dbeeb81f511ad8d \ + --hash=sha256:1a2bc9f351a75ef49d664206d51f8e5ede9da246602dc2d2726837620ea034b2 \ + --hash=sha256:22e14b5d70560b8dd51ec22863f370d1e595ac3d024cb8ad7d308b4cd95f8313 \ + --hash=sha256:234ac59ea147c59ee4da87a0c0f098e9c8d169f4dc2a159ef720f1a61bbe27cd \ + --hash=sha256:2369eea1ee4a7610a860d88f268eb39b95cb588acd7235e02fd5a5601773d4fa \ + --hash=sha256:237bdbe6159cff53b4f24f397d43c6336c6b0b42affbe857970cefbb620911c8 \ + --hash=sha256:28bf57629c75e810b6ae989f03c0828d64d6b26a5e205535585f96093e405ed1 \ + --hash=sha256:2967f74ad52c3b98de4c3b32e1a44e32975e008a9cd2a8cc8966d6a5218c5cb2 \ + --hash=sha256:2a75d49014d118e4198bcee5ee0a6f25856b29b12dbf7cd012791f8a6cc5c496 \ + --hash=sha256:2bdfe3ac2e1bbe5b59a1a63721eb3b95fc9b6817ae4a46debbb4e11f6232428d \ + --hash=sha256:2d074908e1aecee37a7635990b2c6d504cd4766c7bc9fc86d63f9c09af3fa11b \ + --hash=sha256:2fb9bd477fdea8684f78791a6de97a953c51831ee2981f8e4f583ff3b9d9687e \ + --hash=sha256:311f30128d7d333eebd7896965bfcfbd0065f1716ec92bd5638d7748eb6f936a \ + --hash=sha256:329ce159e82018d646c7ac45b01a430369d526569ec08516081727a20e9e4af4 \ + --hash=sha256:345b0426edd4e18138d6528aed636de7a9ed169b4aaf9d61a8c19e39d26838ca \ + --hash=sha256:363e2f92b0f0174b2f8238240a1a30142e3db7b957a5dd5689b0e75fb717cc78 \ + --hash=sha256:3a3bd0dcd373514dcec91c411ddb9632c0d7d92aed7093b8c3bbb6d69ca74408 \ + --hash=sha256:3bed14e9c89dcb10e8f3a29f9ccac4955aebe93c71ae803af79265c9ca5644c5 \ + --hash=sha256:44251f18cd68a75b56585dd00dae26183e102cd5e0f9f1466e6df5da2ed64ea3 \ + --hash=sha256:44ecbf16649486d4aebafeaa7ec4c9fed8b88101f4dd612dcaf65d5e815f837f \ + --hash=sha256:4532bff1b8421fd0a320463030c7520f56a79c9024a4e88f01c537316019005a \ + --hash=sha256:49402233c892a461407c512a19435d1ce275543138294f7ef013f0b63d5d3765 \ + --hash=sha256:4c0907b1928a36d5a998d72d64d8eaa7244989f7aaaf947500d3a800c83a3fd6 \ + --hash=sha256:4d86f7aff21ee58f26dcf5ae81a9addbd914115cdebcbb2217e4f0ed8982e146 \ + --hash=sha256:5777ee0881f9499ed0f71cc82cf873d9a0ca8af166dfa0af8ec4e675b7df48e6 \ + --hash=sha256:5df196eb874dae23dcfb968c83d4f8fdccb333330fe1fc278ac5ceeb101003a9 \ + --hash=sha256:619a609aa74ae43d90ed2e89bdd784765de0a25ca761b93e196d938b8fd1dbbd \ + --hash=sha256:6e27f48bcd0957c6d4cb9d6fa6b61d192d0b13d5ef563e5f2ae35feafc0d179c \ + --hash=sha256:6ff8a4a60c227ad87030d76e99cd1698345d4491638dfa6673027c48b3cd395f \ + --hash=sha256:73d94b58ec7fecbc7366247d3b0b10a21681004153238750bb67bd9012414545 \ + --hash=sha256:7461baadb4dc00fd9e0acbe254e3d7d2112e7f92ced2adc96e54ef6501c5f176 \ + --hash=sha256:75832c08354f595c760a804588b9357d34ec00ba1c940c15e31e96d902093770 \ + --hash=sha256:7709f51f5f7c853f0fb938bcd3bc59cdfdc5203635ffd18bf354f6967ea0f824 \ + --hash=sha256:78baa6d91634dfb69ec52a463534bc0df05dbd546209b79a3880a34487f4b84f \ + --hash=sha256:7974a0b5ecd505609e3b19742b60cee7aa2aa2fb3151bc917e6e2646d7667dcf \ + --hash=sha256:7a4f97a081603d2050bfaffdefa5b02a9ec823f8348a572e39032caa8404a487 \ + --hash=sha256:7b1bef6280950ee6c177b326508f86cad7ad4dff12454483b51d8b7d673a2c5d \ + --hash=sha256:7d053096f67cd1241601111b698f5cad775f97ab25d81567d3f59219b5f1adbd \ + --hash=sha256:804a4d582ba6e5b747c625bf1255e6b1507465494a40a2130978bda7b932c90b \ + --hash=sha256:807f52c1f798eef6cf26beb819eeb8819b1622ddfeef9d0977a8502d4db6d534 \ + --hash=sha256:80ed5e856eb7f30115aaf94e4a08114ccc8813e6ed1b5efa74f9f82e8509858f \ + --hash=sha256:8417cb1f36cc0bc7eaba8ccb0e04d55f0ee52df06df3ad55259b9a323555fc8b \ + --hash=sha256:8436c508b408b82d87dc5f62496973a1805cd46727c34440b0d29d8a2f50a6c9 \ + --hash=sha256:89149166622f4db9b4b6a449256291dc87a99ee53151c74cbd82a53c8c2f6ccd \ + --hash=sha256:8bfa33f4f2672964266e940dd22a195989ba31669bd84629f05fab3ef4e2d125 \ + --hash=sha256:8c60ca7339acd497a55b0ea5d506b2a2612afb2826560416f6894e8b5770d4a9 \ + --hash=sha256:91b36a978b5ae0ee86c394f5a54d6ef44db1de0815eb43de826d41d21e4af3de \ + --hash=sha256:955f8851919303c92343d2f66165294848d57e9bba6cf6e3625485a70a038d11 \ + --hash=sha256:97f68b8d6831127e4787ad15e6757232e14e12060bec17091b85eb1486b91d8d \ + --hash=sha256:9b23ca7ef998bc739bf6ffc077c2116917eabcc901f88da1b9856b210ef63f35 \ + --hash=sha256:9f0b8b1c6d84c8034a44893aba5e767bf9c7a211e313a9605d9c617d7083829f \ + --hash=sha256:aabfa34badd18f1da5ec1bc2715cadc8dca465868a4e73a0173466b688f29dda \ + --hash=sha256:ab36c8eb7e454e34e60eb55ca5d241a5d18b2c6244f6827a30e451c42410b5f7 \ + --hash=sha256:b010a7a4fd316c3c484d482922d13044979e78d1861f0e0650423144c616a46a \ + --hash=sha256:b1ac5992a838106edb89654e0aebfc24f5848ae2547d22c2c3f66454daa11971 \ + --hash=sha256:b7b2d86dd06bfc2ade3312a83a5c364c7ec2e3498f8734282c6c3d4b07b346b8 \ + --hash=sha256:b97e690a2118911e39b4042088092771b4ae3fc3aa86518f84b8cf6888dbdb41 \ + --hash=sha256:bc2722592d8998c870fa4e290c2eec2c1569b87fe58618e67d38b4665dfa680d \ + --hash=sha256:c0429126cf75e16c4f0ad00ee0eae4242dc652290f940152ca8c75c3a4b6ee8f \ + --hash=sha256:c30197aa96e8eed02200a83fba2657b4c3acd0f0aa4bdc9f6c1af8e8962e0757 \ + --hash=sha256:c4c3e6da02df6fa1410a7680bd3f63d4f710232d3139089536310d027950696a \ + --hash=sha256:c75cb2a3e389853835e84a2d8fb2b81a10645b503eca9bcb98df6b5a43eb8886 \ + --hash=sha256:c96836c97b1238e9c9e3fe90844c947d5afbf4f4c92762679acfe19927d81d77 \ + --hash=sha256:d7f50a1f8c450f3925cb367d011448c39239bb3eb4117c36a6d354794de4ce76 \ + --hash=sha256:d973f03c0cb71c5ed99037b870f2be986c3c05e63622c017ea9816881d2dd247 \ + --hash=sha256:d98b1668f06378c6dbefec3b92299716b931cd4e6061f3c875a71ced1780ab85 \ + --hash=sha256:d9c3cdf5390dcd29aa8056d13e8e99526cda0305acc038b96b30352aff5ff2bb \ + --hash=sha256:dad3e487649f498dd991eeb901125411559b22e8d7ab25d3aeb1af367df5efd7 \ + --hash=sha256:dccbe65bd2f7f7ec22c4ff99ed56faa1e9f785482b9bbd7c717e26fd723a1d1e \ + --hash=sha256:dd78cfcda14a1ef52584dbb008f7ac81c1328c0f58184bf9a84c49c605002da6 \ + --hash=sha256:e218488cd232553829be0664c2292d3af2eeeb94b32bea483cf79ac6a694e037 \ + --hash=sha256:e358e64305fe12299a08e08978f51fc21fac060dcfcddd95453eabe5b93ed0e1 \ + --hash=sha256:ea0d8d539afa5eb2728aa1932a988a9a7af94f18582ffae4bc10b3fbdad0626e \ + --hash=sha256:eab677309cdb30d047996b36d34caeda1dc91149e4fdca0b1a039b3f79d9a807 \ + --hash=sha256:eb8178fe3dba6450a3e024e95ac49ed3400e506fd4e9e5c32d30adda88cbd407 \ + --hash=sha256:ecddf25bee22fe4fe3737a399d0d177d72bc22be6913acfab364b40bce1ba83c \ + --hash=sha256:eea6ee1db730b3483adf394ea72f808b6e18cf3cb6454b4d86e04fa8c4327a12 \ + --hash=sha256:f08ff5e948271dc7e18a35641d2f11a4cd8dfd5634f55228b691e62b37125eb3 \ + --hash=sha256:f30bf9fd9be89ecb2360c7d94a711f00c09b976258846efe40db3d05828e8089 \ + --hash=sha256:fa88b843d6e211393a37219e6a1c1df99d35e8fd90446f1118f4216e307e48cd \ + --hash=sha256:fc54db6c8593ef7d4b2a331b58653356cf04f67c960f584edb7c3d8c97e8f39e \ + --hash=sha256:fd4ec41f914fa74ad1b8304bbc634b3de73d2a0889bd32076342a573e0779e00 \ + --hash=sha256:ffc9202a29ab3920fa812879e95a9e78b2465fd10be7fcbd042899695d75e616 + # via requests +click==8.1.8 \ + --hash=sha256:63c132bbbed01578a06712a2d1f497bb62d9c1c0d329b7903a866228027263b2 \ + --hash=sha256:ed53c9d8990d83c2a27deae68e4ee337473f6330c040a31d4225c9574d16096a + # via gcp-docuploader colorlog==6.9.0 \ --hash=sha256:5906e71acd67cb07a71e779c47c4bcb45fb8c2993eebe9e5adcd6a6f1b283eff \ --hash=sha256:bfba54a1b93b94f54e1f4fe48395725a3d92fd2a4af702f6bd70946bdc0c6ac2 - # via nox + # via + # gcp-docuploader + # nox distlib==0.3.9 \ --hash=sha256:47f8c22fd27c27e25a65601af709b38e4f0a45ea4fc2e710f65755fa8caaaf87 \ --hash=sha256:a60f20dea646b8a33f3e7772f74dc0b2d0772d2837ee1342a00645c81edf9403 @@ -20,10 +128,78 @@ filelock==3.16.1 \ --hash=sha256:2082e5703d51fbf98ea75855d9d5527e33d8ff23099bec374a134febee6946b0 \ --hash=sha256:c249fbfcd5db47e5e2d6d62198e565475ee65e4831e2561c8e313fa7eb961435 # via virtualenv +gcp-docuploader==0.6.5 \ + --hash=sha256:30221d4ac3e5a2b9c69aa52fdbef68cc3f27d0e6d0d90e220fc024584b8d2318 \ + --hash=sha256:b7458ef93f605b9d46a4bf3a8dc1755dad1f31d030c8679edf304e343b347eea + # via -r requirements.in +google-api-core==2.24.0 \ + --hash=sha256:10d82ac0fca69c82a25b3efdeefccf6f28e02ebb97925a8cce8edbfe379929d9 \ + --hash=sha256:e255640547a597a4da010876d333208ddac417d60add22b6851a0c66a831fcaf + # via + # google-cloud-core + # google-cloud-storage +google-auth==2.37.0 \ + --hash=sha256:0054623abf1f9c83492c63d3f47e77f0a544caa3d40b2d98e099a611c2dd5d00 \ + --hash=sha256:42664f18290a6be591be5329a96fe30184be1a1badb7292a7f686a9659de9ca0 + # via + # google-api-core + # google-cloud-core + # google-cloud-storage +google-cloud-core==2.4.1 \ + --hash=sha256:9b7749272a812bde58fff28868d0c5e2f585b82f37e09a1f6ed2d4d10f134073 \ + --hash=sha256:a9e6a4422b9ac5c29f79a0ede9485473338e2ce78d91f2370c01e730eab22e61 + # via google-cloud-storage +google-cloud-storage==2.19.0 \ + --hash=sha256:aeb971b5c29cf8ab98445082cbfe7b161a1f48ed275822f59ed3f1524ea54fba \ + --hash=sha256:cd05e9e7191ba6cb68934d8eb76054d9be4562aa89dbc4236feee4d7d51342b2 + # via gcp-docuploader +google-crc32c==1.6.0 \ + --hash=sha256:05e2d8c9a2f853ff116db9706b4a27350587f341eda835f46db3c0a8c8ce2f24 \ + --hash=sha256:18e311c64008f1f1379158158bb3f0c8d72635b9eb4f9545f8cf990c5668e59d \ + --hash=sha256:236c87a46cdf06384f614e9092b82c05f81bd34b80248021f729396a78e55d7e \ + --hash=sha256:35834855408429cecf495cac67ccbab802de269e948e27478b1e47dfb6465e57 \ + --hash=sha256:386122eeaaa76951a8196310432c5b0ef3b53590ef4c317ec7588ec554fec5d2 \ + --hash=sha256:40b05ab32a5067525670880eb5d169529089a26fe35dce8891127aeddc1950e8 \ + --hash=sha256:48abd62ca76a2cbe034542ed1b6aee851b6f28aaca4e6551b5599b6f3ef175cc \ + --hash=sha256:50cf2a96da226dcbff8671233ecf37bf6e95de98b2a2ebadbfdf455e6d05df42 \ + --hash=sha256:51c4f54dd8c6dfeb58d1df5e4f7f97df8abf17a36626a217f169893d1d7f3e9f \ + --hash=sha256:5bcc90b34df28a4b38653c36bb5ada35671ad105c99cfe915fb5bed7ad6924aa \ + --hash=sha256:62f6d4a29fea082ac4a3c9be5e415218255cf11684ac6ef5488eea0c9132689b \ + --hash=sha256:6eceb6ad197656a1ff49ebfbbfa870678c75be4344feb35ac1edf694309413dc \ + --hash=sha256:7aec8e88a3583515f9e0957fe4f5f6d8d4997e36d0f61624e70469771584c760 \ + --hash=sha256:91ca8145b060679ec9176e6de4f89b07363d6805bd4760631ef254905503598d \ + --hash=sha256:a184243544811e4a50d345838a883733461e67578959ac59964e43cca2c791e7 \ + --hash=sha256:a9e4b426c3702f3cd23b933436487eb34e01e00327fac20c9aebb68ccf34117d \ + --hash=sha256:bb0966e1c50d0ef5bc743312cc730b533491d60585a9a08f897274e57c3f70e0 \ + --hash=sha256:bb8b3c75bd157010459b15222c3fd30577042a7060e29d42dabce449c087f2b3 \ + --hash=sha256:bd5e7d2445d1a958c266bfa5d04c39932dc54093fa391736dbfdb0f1929c1fb3 \ + --hash=sha256:c87d98c7c4a69066fd31701c4e10d178a648c2cac3452e62c6b24dc51f9fcc00 \ + --hash=sha256:d2952396dc604544ea7476b33fe87faedc24d666fb0c2d5ac971a2b9576ab871 \ + --hash=sha256:d8797406499f28b5ef791f339594b0b5fdedf54e203b5066675c406ba69d705c \ + --hash=sha256:d9e9913f7bd69e093b81da4535ce27af842e7bf371cde42d1ae9e9bd382dc0e9 \ + --hash=sha256:e2806553238cd076f0a55bddab37a532b53580e699ed8e5606d0de1f856b5205 \ + --hash=sha256:ebab974b1687509e5c973b5c4b8b146683e101e102e17a86bd196ecaa4d099fc \ + --hash=sha256:ed767bf4ba90104c1216b68111613f0d5926fb3780660ea1198fc469af410e9d \ + --hash=sha256:f7a1fc29803712f80879b0806cb83ab24ce62fc8daf0569f2204a0cfd7f68ed4 + # via + # google-cloud-storage + # google-resumable-media +google-resumable-media==2.7.2 \ + --hash=sha256:3ce7551e9fe6d99e9a126101d2536612bb73486721951e9562fee0f90c6ababa \ + --hash=sha256:5280aed4629f2b60b847b0d42f9857fd4935c11af266744df33d8074cae92fe0 + # via google-cloud-storage +googleapis-common-protos==1.66.0 \ + --hash=sha256:c3e7b33d15fdca5374cc0a7346dd92ffa847425cc4ea941d970f13680052ec8c \ + --hash=sha256:d7abcd75fabb2e0ec9f74466401f6c119a0b498e27370e9be4c94cb7e382b8ed + # via google-api-core +idna==3.10 \ + --hash=sha256:12f65c9b470abda6dc35cf8e63cc574b1c52b11df2c86030af0ac09b01b13ea9 \ + --hash=sha256:946d195a0d259cbba61165e88e65941f16e9b36ea6ddb97f00452bae8b1287d3 + # via requests nox==2024.10.9 \ --hash=sha256:1d36f309a0a2a853e9bccb76bbef6bb118ba92fa92674d15604ca99adeb29eab \ --hash=sha256:7aa9dc8d1c27e9f45ab046ffd1c3b2c4f7c91755304769df231308849ebded95 - # via -r synthtool/gcp/templates/python_library/.kokoro/docker/docs/requirements.in + # via -r requirements.in packaging==24.2 \ --hash=sha256:09abb1bccd265c01f4a3aa3f7a7db064b36514d2cba19a2f694fe6150451a759 \ --hash=sha256:c228a6dc5e932d346bc5739379109d49e8853dd8223571c7c5b55260edc0b97f @@ -32,6 +208,51 @@ platformdirs==4.3.6 \ --hash=sha256:357fb2acbc885b0419afd3ce3ed34564c13c9b95c89360cd9563f73aa5e2b907 \ --hash=sha256:73e575e1408ab8103900836b97580d5307456908a03e92031bab39e4554cc3fb # via virtualenv +proto-plus==1.25.0 \ + --hash=sha256:c91fc4a65074ade8e458e95ef8bac34d4008daa7cce4a12d6707066fca648961 \ + --hash=sha256:fbb17f57f7bd05a68b7707e745e26528b0b3c34e378db91eef93912c54982d91 + # via google-api-core +protobuf==5.29.3 \ + --hash=sha256:0a18ed4a24198528f2333802eb075e59dea9d679ab7a6c5efb017a59004d849f \ + --hash=sha256:0eb32bfa5219fc8d4111803e9a690658aa2e6366384fd0851064b963b6d1f2a7 \ + --hash=sha256:3ea51771449e1035f26069c4c7fd51fba990d07bc55ba80701c78f886bf9c888 \ + --hash=sha256:5da0f41edaf117bde316404bad1a486cb4ededf8e4a54891296f648e8e076620 \ + --hash=sha256:6ce8cc3389a20693bfde6c6562e03474c40851b44975c9b2bf6df7d8c4f864da \ + --hash=sha256:84a57163a0ccef3f96e4b6a20516cedcf5bb3a95a657131c5c3ac62200d23252 \ + --hash=sha256:a4fa6f80816a9a0678429e84973f2f98cbc218cca434abe8db2ad0bffc98503a \ + --hash=sha256:a8434404bbf139aa9e1300dbf989667a83d42ddda9153d8ab76e0d5dcaca484e \ + --hash=sha256:b89c115d877892a512f79a8114564fb435943b59067615894c3b13cd3e1fa107 \ + --hash=sha256:c027e08a08be10b67c06bf2370b99c811c466398c357e615ca88c91c07f0910f \ + --hash=sha256:daaf63f70f25e8689c072cfad4334ca0ac1d1e05a92fc15c54eb9cf23c3efd84 + # via + # gcp-docuploader + # google-api-core + # googleapis-common-protos + # proto-plus +pyasn1==0.6.1 \ + --hash=sha256:0d632f46f2ba09143da3a8afe9e33fb6f92fa2320ab7e886e2d0f7672af84629 \ + --hash=sha256:6f580d2bdd84365380830acf45550f2511469f673cb4a5ae3857a3170128b034 + # via + # pyasn1-modules + # rsa +pyasn1-modules==0.4.1 \ + --hash=sha256:49bfa96b45a292b711e986f222502c1c9a5e1f4e568fc30e2574a6c7d07838fd \ + --hash=sha256:c28e2dbf9c06ad61c71a075c7e0f9fd0f1b0bb2d2ad4377f240d33ac2ab60a7c + # via google-auth +requests==2.32.3 \ + --hash=sha256:55365417734eb18255590a9ff9eb97e9e1da868d4ccd6402399eaf68af20a760 \ + --hash=sha256:70761cfe03c773ceb22aa2f671b4757976145175cdfca038c02654d061d6dcc6 + # via + # google-api-core + # google-cloud-storage +rsa==4.9 \ + --hash=sha256:90260d9058e514786967344d0ef75fa8727eed8a7d2e43ce9f4bcf1b536174f7 \ + --hash=sha256:e38464a49c6c85d7f1351b0126661487a7e0a14a50f1675ec50eb34d4f20ef21 + # via google-auth +six==1.17.0 \ + --hash=sha256:4721f391ed90541fddacab5acf947aa0d3dc7d27b2e1e8eda2be8970586c3274 \ + --hash=sha256:ff70335d468e7eb6ec65b95b99d3a2836546063f63acc5171de367e834932a81 + # via gcp-docuploader tomli==2.2.1 \ --hash=sha256:023aa114dd824ade0100497eb2318602af309e5a55595f76b626d6d9f3b7b0a6 \ --hash=sha256:02abe224de6ae62c19f090f68da4e27b10af2b93213d36cf44e6e1c5abd19fdd \ @@ -66,7 +287,11 @@ tomli==2.2.1 \ --hash=sha256:ece47d672db52ac607a3d9599a9d48dcb2f2f735c6c2d1f34130085bb12b112a \ --hash=sha256:f4039b9cbc3048b2416cc57ab3bda989a6fcf9b36cf8937f01a6e731b64f80d7 # via nox -virtualenv==20.28.0 \ - --hash=sha256:23eae1b4516ecd610481eda647f3a7c09aea295055337331bb4e6892ecce47b0 \ - --hash=sha256:2c9c3262bb8e7b87ea801d715fae4495e6032450c71d2309be9550e7364049aa +urllib3==2.3.0 \ + --hash=sha256:1cee9ad369867bfdbbb48b7dd50374c0967a0bb7710050facf0dd6911440e3df \ + --hash=sha256:f8c5449b3cf0861679ce7e0503c7b44b5ec981bec0d1d3795a07f1ba96f0204d + # via requests +virtualenv==20.28.1 \ + --hash=sha256:412773c85d4dab0409b83ec36f7a6499e72eaf08c80e81e9576bca61831c71cb \ + --hash=sha256:5d34ab240fdb5d21549b76f9e8ff3af28252f5499fb6d6f031adac4e5a8c5329 # via nox diff --git a/packages/google-cloud-bigquery/.kokoro/publish-docs.sh b/packages/google-cloud-bigquery/.kokoro/publish-docs.sh index 233205d580e9..4ed4aaf1346f 100755 --- a/packages/google-cloud-bigquery/.kokoro/publish-docs.sh +++ b/packages/google-cloud-bigquery/.kokoro/publish-docs.sh @@ -20,10 +20,6 @@ export PYTHONUNBUFFERED=1 export PATH="${HOME}/.local/bin:${PATH}" -# Install nox -python3.10 -m pip install --require-hashes -r .kokoro/requirements.txt -python3.10 -m nox --version - # build docs nox -s docs From dac874039acaa53d33d88bf8115b4e04acc259ae Mon Sep 17 00:00:00 2001 From: Chalmer Lowe Date: Fri, 31 Jan 2025 13:59:10 -0500 Subject: [PATCH 1886/2016] feat: add roundingmode enum, wiring, and tests (#2121) * feat: adds roundingmode and entity types * Adds rounding_mode to schema file and tests * tweaks RoundingMode docstring and roundingmode logic * Updates tests to apply better coverage for rounding_mode * Modifies docstring * Removes client-side validation, simplifies some code * Updates foreign_type_definition processing --- .../google/cloud/bigquery/enums.py | 45 +++++++++++++- .../google/cloud/bigquery/schema.py | 59 ++++++++++++++++++- .../tests/unit/test_schema.py | 56 +++++++++++++++++- 3 files changed, 156 insertions(+), 4 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/enums.py b/packages/google-cloud-bigquery/google/cloud/bigquery/enums.py index d8cbe99691b1..5519bc989630 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/enums.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/enums.py @@ -246,6 +246,11 @@ class KeyResultStatementKind: class StandardSqlTypeNames(str, enum.Enum): + """Enum of allowed SQL type names in schema.SchemaField. + + Datatype used in GoogleSQL. + """ + def _generate_next_value_(name, start, count, last_values): return name @@ -267,6 +272,9 @@ def _generate_next_value_(name, start, count, last_values): ARRAY = enum.auto() STRUCT = enum.auto() RANGE = enum.auto() + # NOTE: FOREIGN acts as a wrapper for data types + # not natively understood by BigQuery unless translated + FOREIGN = enum.auto() class EntityTypes(str, enum.Enum): @@ -285,7 +293,10 @@ class EntityTypes(str, enum.Enum): # See also: https://cloud.google.com/bigquery/data-types#legacy_sql_data_types # and https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types class SqlTypeNames(str, enum.Enum): - """Enum of allowed SQL type names in schema.SchemaField.""" + """Enum of allowed SQL type names in schema.SchemaField. + + Datatype used in Legacy SQL. + """ STRING = "STRING" BYTES = "BYTES" @@ -306,6 +317,9 @@ class SqlTypeNames(str, enum.Enum): DATETIME = "DATETIME" INTERVAL = "INTERVAL" # NOTE: not available in legacy types RANGE = "RANGE" # NOTE: not available in legacy types + # NOTE: FOREIGN acts as a wrapper for data types + # not natively understood by BigQuery unless translated + FOREIGN = "FOREIGN" class WriteDisposition(object): @@ -344,3 +358,32 @@ class DeterminismLevel: NOT_DETERMINISTIC = "NOT_DETERMINISTIC" """The UDF is not deterministic.""" + + +class RoundingMode(str, enum.Enum): + """Rounding mode options that can be used when storing NUMERIC or BIGNUMERIC + values. + + ROUNDING_MODE_UNSPECIFIED: will default to using ROUND_HALF_AWAY_FROM_ZERO. + + ROUND_HALF_AWAY_FROM_ZERO: rounds half values away from zero when applying + precision and scale upon writing of NUMERIC and BIGNUMERIC values. + For Scale: 0 + * 1.1, 1.2, 1.3, 1.4 => 1 + * 1.5, 1.6, 1.7, 1.8, 1.9 => 2 + + ROUND_HALF_EVEN: rounds half values to the nearest even value when applying + precision and scale upon writing of NUMERIC and BIGNUMERIC values. + For Scale: 0 + * 1.1, 1.2, 1.3, 1.4 => 1 + * 1.5 => 2 + * 1.6, 1.7, 1.8, 1.9 => 2 + * 2.5 => 2 + """ + + def _generate_next_value_(name, start, count, last_values): + return name + + ROUNDING_MODE_UNSPECIFIED = enum.auto() + ROUND_HALF_AWAY_FROM_ZERO = enum.auto() + ROUND_HALF_EVEN = enum.auto() diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/schema.py b/packages/google-cloud-bigquery/google/cloud/bigquery/schema.py index 42dfbfca81ef..0f011a2755ee 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/schema.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/schema.py @@ -22,14 +22,15 @@ from google.cloud.bigquery import _helpers from google.cloud.bigquery import standard_sql +from google.cloud.bigquery import enums from google.cloud.bigquery.enums import StandardSqlTypeNames _STRUCT_TYPES = ("RECORD", "STRUCT") # SQL types reference: -# https://cloud.google.com/bigquery/data-types#legacy_sql_data_types -# https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types +# LEGACY SQL: https://cloud.google.com/bigquery/data-types#legacy_sql_data_types +# GoogleSQL: https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types LEGACY_TO_STANDARD_TYPES = { "STRING": StandardSqlTypeNames.STRING, "BYTES": StandardSqlTypeNames.BYTES, @@ -48,6 +49,7 @@ "DATE": StandardSqlTypeNames.DATE, "TIME": StandardSqlTypeNames.TIME, "DATETIME": StandardSqlTypeNames.DATETIME, + "FOREIGN": StandardSqlTypeNames.FOREIGN, # no direct conversion from ARRAY, the latter is represented by mode="REPEATED" } """String names of the legacy SQL types to integer codes of Standard SQL standard_sql.""" @@ -166,6 +168,35 @@ class SchemaField(object): the type is RANGE, this field is required. Possible values for the field element type of a RANGE include `DATE`, `DATETIME` and `TIMESTAMP`. + + rounding_mode: Union[enums.RoundingMode, str, None] + Specifies the rounding mode to be used when storing values of + NUMERIC and BIGNUMERIC type. + + Unspecified will default to using ROUND_HALF_AWAY_FROM_ZERO. + ROUND_HALF_AWAY_FROM_ZERO rounds half values away from zero + when applying precision and scale upon writing of NUMERIC and BIGNUMERIC + values. + + For Scale: 0 + 1.1, 1.2, 1.3, 1.4 => 1 + 1.5, 1.6, 1.7, 1.8, 1.9 => 2 + + ROUND_HALF_EVEN rounds half values to the nearest even value + when applying precision and scale upon writing of NUMERIC and BIGNUMERIC + values. + + For Scale: 0 + 1.1, 1.2, 1.3, 1.4 => 1 + 1.5 => 2 + 1.6, 1.7, 1.8, 1.9 => 2 + 2.5 => 2 + + foreign_type_definition: Optional[str] + Definition of the foreign data type. + + Only valid for top-level schema fields (not nested fields). + If the type is FOREIGN, this field is required. """ def __init__( @@ -181,11 +212,14 @@ def __init__( scale: Union[int, _DefaultSentinel] = _DEFAULT_VALUE, max_length: Union[int, _DefaultSentinel] = _DEFAULT_VALUE, range_element_type: Union[FieldElementType, str, None] = None, + rounding_mode: Union[enums.RoundingMode, str, None] = None, + foreign_type_definition: Optional[str] = None, ): self._properties: Dict[str, Any] = { "name": name, "type": field_type, } + self._properties["name"] = name if mode is not None: self._properties["mode"] = mode.upper() if description is not _DEFAULT_VALUE: @@ -206,6 +240,11 @@ def __init__( self._properties["rangeElementType"] = {"type": range_element_type} if isinstance(range_element_type, FieldElementType): self._properties["rangeElementType"] = range_element_type.to_api_repr() + if rounding_mode is not None: + self._properties["roundingMode"] = rounding_mode + if foreign_type_definition is not None: + self._properties["foreignTypeDefinition"] = foreign_type_definition + if fields: # Don't set the property if it's not set. self._properties["fields"] = [field.to_api_repr() for field in fields] @@ -304,6 +343,22 @@ def range_element_type(self): ret = self._properties.get("rangeElementType") return FieldElementType.from_api_repr(ret) + @property + def rounding_mode(self): + """Enum that specifies the rounding mode to be used when storing values of + NUMERIC and BIGNUMERIC type. + """ + return self._properties.get("roundingMode") + + @property + def foreign_type_definition(self): + """Definition of the foreign data type. + + Only valid for top-level schema fields (not nested fields). + If the type is FOREIGN, this field is required. + """ + return self._properties.get("foreignTypeDefinition") + @property def fields(self): """Optional[tuple]: Subfields contained in this field. diff --git a/packages/google-cloud-bigquery/tests/unit/test_schema.py b/packages/google-cloud-bigquery/tests/unit/test_schema.py index efbc5d26fdcb..467f1e1de12a 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_schema.py +++ b/packages/google-cloud-bigquery/tests/unit/test_schema.py @@ -19,6 +19,7 @@ import pytest from google.cloud import bigquery +from google.cloud.bigquery import enums from google.cloud.bigquery.standard_sql import StandardSqlStructType from google.cloud.bigquery import schema from google.cloud.bigquery.schema import PolicyTagList @@ -49,6 +50,8 @@ def test_constructor_defaults(self): self.assertEqual(field.fields, ()) self.assertIsNone(field.policy_tags) self.assertIsNone(field.default_value_expression) + self.assertEqual(field.rounding_mode, None) + self.assertEqual(field.foreign_type_definition, None) def test_constructor_explicit(self): FIELD_DEFAULT_VALUE_EXPRESSION = "This is the default value for this field" @@ -64,6 +67,8 @@ def test_constructor_explicit(self): ) ), default_value_expression=FIELD_DEFAULT_VALUE_EXPRESSION, + rounding_mode=enums.RoundingMode.ROUNDING_MODE_UNSPECIFIED, + foreign_type_definition="INTEGER", ) self.assertEqual(field.name, "test") self.assertEqual(field.field_type, "STRING") @@ -80,6 +85,8 @@ def test_constructor_explicit(self): ) ), ) + self.assertEqual(field.rounding_mode, "ROUNDING_MODE_UNSPECIFIED") + self.assertEqual(field.foreign_type_definition, "INTEGER") def test_constructor_explicit_none(self): field = self._make_one("test", "STRING", description=None, policy_tags=None) @@ -137,8 +144,16 @@ def test_to_api_repr(self): {"names": ["foo", "bar"]}, ) + ROUNDINGMODE = enums.RoundingMode.ROUNDING_MODE_UNSPECIFIED + field = self._make_one( - "foo", "INTEGER", "NULLABLE", description="hello world", policy_tags=policy + "foo", + "INTEGER", + "NULLABLE", + description="hello world", + policy_tags=policy, + rounding_mode=ROUNDINGMODE, + foreign_type_definition=None, ) self.assertEqual( field.to_api_repr(), @@ -148,6 +163,7 @@ def test_to_api_repr(self): "type": "INTEGER", "description": "hello world", "policyTags": {"names": ["foo", "bar"]}, + "roundingMode": "ROUNDING_MODE_UNSPECIFIED", }, ) @@ -181,6 +197,7 @@ def test_from_api_repr(self): "description": "test_description", "name": "foo", "type": "record", + "roundingMode": "ROUNDING_MODE_UNSPECIFIED", } ) self.assertEqual(field.name, "foo") @@ -192,6 +209,7 @@ def test_from_api_repr(self): self.assertEqual(field.fields[0].field_type, "INTEGER") self.assertEqual(field.fields[0].mode, "NULLABLE") self.assertEqual(field.range_element_type, None) + self.assertEqual(field.rounding_mode, "ROUNDING_MODE_UNSPECIFIED") def test_from_api_repr_policy(self): field = self._get_target_class().from_api_repr( @@ -283,6 +301,28 @@ def test_fields_property(self): schema_field = self._make_one("boat", "RECORD", fields=fields) self.assertEqual(schema_field.fields, fields) + def test_roundingmode_property_str(self): + ROUNDINGMODE = "ROUND_HALF_AWAY_FROM_ZERO" + schema_field = self._make_one("test", "STRING", rounding_mode=ROUNDINGMODE) + self.assertEqual(schema_field.rounding_mode, ROUNDINGMODE) + + del schema_field + schema_field = self._make_one("test", "STRING") + schema_field._properties["roundingMode"] = ROUNDINGMODE + self.assertEqual(schema_field.rounding_mode, ROUNDINGMODE) + + def test_foreign_type_definition_property_str(self): + FOREIGN_TYPE_DEFINITION = "INTEGER" + schema_field = self._make_one( + "test", "STRING", foreign_type_definition=FOREIGN_TYPE_DEFINITION + ) + self.assertEqual(schema_field.foreign_type_definition, FOREIGN_TYPE_DEFINITION) + + del schema_field + schema_field = self._make_one("test", "STRING") + schema_field._properties["foreignTypeDefinition"] = FOREIGN_TYPE_DEFINITION + self.assertEqual(schema_field.foreign_type_definition, FOREIGN_TYPE_DEFINITION) + def test_to_standard_sql_simple_type(self): examples = ( # a few legacy types @@ -457,6 +497,20 @@ def test_to_standard_sql_unknown_type(self): bigquery.StandardSqlTypeNames.TYPE_KIND_UNSPECIFIED, ) + def test_to_standard_sql_foreign_type_valid(self): + legacy_type = "FOREIGN" + standard_type = bigquery.StandardSqlTypeNames.FOREIGN + foreign_type_definition = "INTEGER" + + field = self._make_one( + "some_field", + field_type=legacy_type, + foreign_type_definition=foreign_type_definition, + ) + standard_field = field.to_standard_sql() + self.assertEqual(standard_field.name, "some_field") + self.assertEqual(standard_field.type.type_kind, standard_type) + def test___eq___wrong_type(self): field = self._make_one("test", "STRING") other = object() From 423d5d03a28176e730a4aac8aab808dc41e3bae5 Mon Sep 17 00:00:00 2001 From: Kien Truong Date: Tue, 4 Feb 2025 03:37:26 +0700 Subject: [PATCH 1887/2016] fix: avoid blocking in download thread when using BQ Storage API (#2034) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This prevents a deadlock between the main thead and download threads when the threadpool is shutdown prematurely. Co-authored-by: Chalmer Lowe Co-authored-by: Tim Sweña (Swast) --- .../google/cloud/bigquery/_pandas_helpers.py | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/_pandas_helpers.py b/packages/google-cloud-bigquery/google/cloud/bigquery/_pandas_helpers.py index bf7d10c0f229..0506725318d8 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/_pandas_helpers.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/_pandas_helpers.py @@ -796,10 +796,15 @@ def _download_table_bqstorage_stream( rowstream = reader.rows(session) for page in rowstream.pages: - if download_state.done: - return item = page_to_item(page) - worker_queue.put(item) + while True: + if download_state.done: + return + try: + worker_queue.put(item, timeout=_PROGRESS_INTERVAL) + break + except queue.Full: # pragma: NO COVER + continue def _nowait(futures): From 427febdd7e207b5fdf5fc43b83e4e811c4ee0986 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tim=20Swe=C3=B1a=20=28Swast=29?= Date: Thu, 6 Feb 2025 12:43:32 -0600 Subject: [PATCH 1888/2016] test: add unit test covering the case where worker streams are stopped early (#2127) * test: add unit test covering the case where worker streams are stopped early * use older pyarrow.record_batch constructor * remove flakey log-based tests from snippets * add a gc.collect() call to make sure threads are supposed to be cleaned up --- .../google/cloud/bigquery/_pandas_helpers.py | 69 ++++++++++---- .../tests/test_download_public_data.py | 15 +-- .../test_download_public_data_sandbox.py | 17 +--- .../tests/unit/test__pandas_helpers.py | 93 +++++++++++++++++++ 4 files changed, 146 insertions(+), 48 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/_pandas_helpers.py b/packages/google-cloud-bigquery/google/cloud/bigquery/_pandas_helpers.py index 0506725318d8..4f70f6c29f7d 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/_pandas_helpers.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/_pandas_helpers.py @@ -20,6 +20,7 @@ from itertools import islice import logging import queue +import threading import warnings from typing import Any, Union, Optional, Callable, Generator, List @@ -119,6 +120,21 @@ def __init__(self): # be an atomic operation in the Python language definition (enforced by # the global interpreter lock). self.done = False + # To assist with testing and understanding the behavior of the + # download, use this object as shared state to track how many worker + # threads have started and have gracefully shutdown. + self._started_workers_lock = threading.Lock() + self.started_workers = 0 + self._finished_workers_lock = threading.Lock() + self.finished_workers = 0 + + def start(self): + with self._started_workers_lock: + self.started_workers += 1 + + def finish(self): + with self._finished_workers_lock: + self.finished_workers += 1 BQ_FIELD_TYPE_TO_ARROW_FIELD_METADATA = { @@ -786,25 +802,35 @@ def _bqstorage_page_to_dataframe(column_names, dtypes, page): def _download_table_bqstorage_stream( download_state, bqstorage_client, session, stream, worker_queue, page_to_item ): - reader = bqstorage_client.read_rows(stream.name) + download_state.start() + try: + reader = bqstorage_client.read_rows(stream.name) - # Avoid deprecation warnings for passing in unnecessary read session. - # https://github.com/googleapis/python-bigquery-storage/issues/229 - if _versions_helpers.BQ_STORAGE_VERSIONS.is_read_session_optional: - rowstream = reader.rows() - else: - rowstream = reader.rows(session) - - for page in rowstream.pages: - item = page_to_item(page) - while True: - if download_state.done: - return - try: - worker_queue.put(item, timeout=_PROGRESS_INTERVAL) - break - except queue.Full: # pragma: NO COVER - continue + # Avoid deprecation warnings for passing in unnecessary read session. + # https://github.com/googleapis/python-bigquery-storage/issues/229 + if _versions_helpers.BQ_STORAGE_VERSIONS.is_read_session_optional: + rowstream = reader.rows() + else: + rowstream = reader.rows(session) + + for page in rowstream.pages: + item = page_to_item(page) + + # Make sure we set a timeout on put() so that we give the worker + # thread opportunities to shutdown gracefully, for example if the + # parent thread shuts down or the parent generator object which + # collects rows from all workers goes out of scope. See: + # https://github.com/googleapis/python-bigquery/issues/2032 + while True: + if download_state.done: + return + try: + worker_queue.put(item, timeout=_PROGRESS_INTERVAL) + break + except queue.Full: + continue + finally: + download_state.finish() def _nowait(futures): @@ -830,6 +856,7 @@ def _download_table_bqstorage( page_to_item: Optional[Callable] = None, max_queue_size: Any = _MAX_QUEUE_SIZE_DEFAULT, max_stream_count: Optional[int] = None, + download_state: Optional[_DownloadState] = None, ) -> Generator[Any, None, None]: """Downloads a BigQuery table using the BigQuery Storage API. @@ -857,6 +884,9 @@ def _download_table_bqstorage( is True, the requested streams are limited to 1 regardless of the `max_stream_count` value. If 0 or None, then the number of requested streams will be unbounded. Defaults to None. + download_state (Optional[_DownloadState]): + A threadsafe state object which can be used to observe the + behavior of the worker threads created by this method. Yields: pandas.DataFrame: Pandas DataFrames, one for each chunk of data @@ -915,7 +945,8 @@ def _download_table_bqstorage( # Use _DownloadState to notify worker threads when to quit. # See: https://stackoverflow.com/a/29237343/101923 - download_state = _DownloadState() + if download_state is None: + download_state = _DownloadState() # Create a queue to collect frames as they are created in each thread. # diff --git a/packages/google-cloud-bigquery/samples/tests/test_download_public_data.py b/packages/google-cloud-bigquery/samples/tests/test_download_public_data.py index 02c2c6f9cbb5..4f6c02452e1c 100644 --- a/packages/google-cloud-bigquery/samples/tests/test_download_public_data.py +++ b/packages/google-cloud-bigquery/samples/tests/test_download_public_data.py @@ -12,8 +12,6 @@ # See the License for the specific language governing permissions and # limitations under the License. -import logging - import pytest from .. import download_public_data @@ -21,20 +19,9 @@ pytest.importorskip("google.cloud.bigquery_storage_v1") -def test_download_public_data( - caplog: pytest.LogCaptureFixture, capsys: pytest.CaptureFixture[str] -) -> None: - # Enable debug-level logging to verify the BigQuery Storage API is used. - caplog.set_level(logging.DEBUG) - +def test_download_public_data(capsys: pytest.CaptureFixture[str]) -> None: download_public_data.download_public_data() out, _ = capsys.readouterr() assert "year" in out assert "gender" in out assert "name" in out - - assert any( - "Started reading table 'bigquery-public-data.usa_names.usa_1910_current' with BQ Storage API session" - in message - for message in caplog.messages - ) diff --git a/packages/google-cloud-bigquery/samples/tests/test_download_public_data_sandbox.py b/packages/google-cloud-bigquery/samples/tests/test_download_public_data_sandbox.py index e86f604add79..d3dd31a382e6 100644 --- a/packages/google-cloud-bigquery/samples/tests/test_download_public_data_sandbox.py +++ b/packages/google-cloud-bigquery/samples/tests/test_download_public_data_sandbox.py @@ -12,8 +12,6 @@ # See the License for the specific language governing permissions and # limitations under the License. -import logging - import pytest from .. import download_public_data_sandbox @@ -21,20 +19,9 @@ pytest.importorskip("google.cloud.bigquery_storage_v1") -def test_download_public_data_sandbox( - caplog: pytest.LogCaptureFixture, capsys: pytest.CaptureFixture[str] -) -> None: - # Enable debug-level logging to verify the BigQuery Storage API is used. - caplog.set_level(logging.DEBUG) - +def test_download_public_data_sandbox(capsys: pytest.CaptureFixture[str]) -> None: download_public_data_sandbox.download_public_data_sandbox() - out, err = capsys.readouterr() + out, _ = capsys.readouterr() assert "year" in out assert "gender" in out assert "name" in out - - assert any( - # An anonymous table is used because this sample reads from query results. - ("Started reading table" in message and "BQ Storage API session" in message) - for message in caplog.messages - ) diff --git a/packages/google-cloud-bigquery/tests/unit/test__pandas_helpers.py b/packages/google-cloud-bigquery/tests/unit/test__pandas_helpers.py index 3a5fddacc48d..edfaadf692e4 100644 --- a/packages/google-cloud-bigquery/tests/unit/test__pandas_helpers.py +++ b/packages/google-cloud-bigquery/tests/unit/test__pandas_helpers.py @@ -16,6 +16,7 @@ import datetime import decimal import functools +import gc import operator import queue from typing import Union @@ -1846,6 +1847,98 @@ def fake_download_stream( assert queue_used.maxsize == expected_maxsize +@pytest.mark.skipif(isinstance(pyarrow, mock.Mock), reason="Requires `pyarrow`") +def test__download_table_bqstorage_shuts_down_workers( + monkeypatch, + module_under_test, +): + """Regression test for https://github.com/googleapis/python-bigquery/issues/2032 + + Make sure that when the top-level iterator goes out of scope (is deleted), + the child threads are also stopped. + """ + from google.cloud.bigquery import dataset + from google.cloud.bigquery import table + import google.cloud.bigquery_storage_v1.reader + import google.cloud.bigquery_storage_v1.types + + monkeypatch.setattr( + _versions_helpers.BQ_STORAGE_VERSIONS, "_installed_version", None + ) + monkeypatch.setattr(bigquery_storage, "__version__", "2.5.0") + + # Create a fake stream with a decent number of rows. + arrow_schema = pyarrow.schema( + [ + ("int_col", pyarrow.int64()), + ("str_col", pyarrow.string()), + ] + ) + arrow_rows = pyarrow.record_batch( + [ + pyarrow.array([0, 1, 2], type=pyarrow.int64()), + pyarrow.array(["a", "b", "c"], type=pyarrow.string()), + ], + schema=arrow_schema, + ) + session = google.cloud.bigquery_storage_v1.types.ReadSession() + session.data_format = "ARROW" + session.arrow_schema = {"serialized_schema": arrow_schema.serialize().to_pybytes()} + session.streams = [ + google.cloud.bigquery_storage_v1.types.ReadStream(name=name) + for name in ("stream/s0", "stream/s1", "stream/s2") + ] + bqstorage_client = mock.create_autospec( + bigquery_storage.BigQueryReadClient, instance=True + ) + reader = mock.create_autospec( + google.cloud.bigquery_storage_v1.reader.ReadRowsStream, instance=True + ) + reader.__iter__.return_value = [ + google.cloud.bigquery_storage_v1.types.ReadRowsResponse( + arrow_schema={"serialized_schema": arrow_schema.serialize().to_pybytes()}, + arrow_record_batch={ + "serialized_record_batch": arrow_rows.serialize().to_pybytes() + }, + ) + for _ in range(100) + ] + reader.rows.return_value = google.cloud.bigquery_storage_v1.reader.ReadRowsIterable( + reader, read_session=session + ) + bqstorage_client.read_rows.return_value = reader + bqstorage_client.create_read_session.return_value = session + table_ref = table.TableReference( + dataset.DatasetReference("project-x", "dataset-y"), + "table-z", + ) + download_state = module_under_test._DownloadState() + assert download_state.started_workers == 0 + assert download_state.finished_workers == 0 + + result_gen = module_under_test._download_table_bqstorage( + "some-project", + table_ref, + bqstorage_client, + max_queue_size=1, + page_to_item=module_under_test._bqstorage_page_to_arrow, + download_state=download_state, + ) + + result_gen_iter = iter(result_gen) + next(result_gen_iter) + assert download_state.started_workers == 3 + assert download_state.finished_workers == 0 + + # Stop iteration early and simulate the variables going out of scope + # to be doubly sure that the worker threads are supposed to be cleaned up. + del result_gen, result_gen_iter + gc.collect() + + assert download_state.started_workers == 3 + assert download_state.finished_workers == 3 + + @pytest.mark.skipif(isinstance(pyarrow, mock.Mock), reason="Requires `pyarrow`") def test_download_arrow_row_iterator_unknown_field_type(module_under_test): fake_page = api_core.page_iterator.Page( From 8000128ff2b174777c934bc35887dfe933b4216f Mon Sep 17 00:00:00 2001 From: Alicia Williams Date: Fri, 14 Feb 2025 12:50:18 -0800 Subject: [PATCH 1889/2016] docs: update magics.rst (#2125) * Update magics.rst use bigquery-magics package for the %%bigquery magic * Update magics.rst add back space * update reference and link for bigquery magics --------- Co-authored-by: Lingqing Gan --- packages/google-cloud-bigquery/docs/magics.rst | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/packages/google-cloud-bigquery/docs/magics.rst b/packages/google-cloud-bigquery/docs/magics.rst index aa14c6bfa482..549d67f761fa 100644 --- a/packages/google-cloud-bigquery/docs/magics.rst +++ b/packages/google-cloud-bigquery/docs/magics.rst @@ -6,7 +6,7 @@ in a Jupyter notebook cell. .. code:: - %load_ext google.cloud.bigquery + %load_ext bigquery_magics This makes the ``%%bigquery`` magic available. @@ -27,8 +27,9 @@ Running a parameterized query: :start-after: [START bigquery_jupyter_query_params_scalars] :end-before: [END bigquery_jupyter_query_params_scalars] -API Reference -------------- +BigQuery Magics Reference +------------------------- -.. automodule:: google.cloud.bigquery.magics.magics - :members: +- `BigQuery Magics Documentation`_ + +.. _BigQuery Magics Documentation: https://googleapis.dev/python/bigquery-magics/latest From 9a8df9c9fd061416fac424fee59cac02e5a9958c Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Fri, 14 Feb 2025 14:11:49 -0800 Subject: [PATCH 1890/2016] chore(deps): bump cryptography from 43.0.1 to 44.0.1 in /.kokoro (#2130) Bumps [cryptography](https://github.com/pyca/cryptography) from 43.0.1 to 44.0.1. - [Changelog](https://github.com/pyca/cryptography/blob/main/CHANGELOG.rst) - [Commits](https://github.com/pyca/cryptography/compare/43.0.1...44.0.1) --- updated-dependencies: - dependency-name: cryptography dependency-type: direct:production ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> Co-authored-by: Lingqing Gan --- .../.kokoro/requirements.txt | 60 ++++++++++--------- 1 file changed, 32 insertions(+), 28 deletions(-) diff --git a/packages/google-cloud-bigquery/.kokoro/requirements.txt b/packages/google-cloud-bigquery/.kokoro/requirements.txt index 16db448c16bf..6ad95a04a419 100644 --- a/packages/google-cloud-bigquery/.kokoro/requirements.txt +++ b/packages/google-cloud-bigquery/.kokoro/requirements.txt @@ -112,34 +112,38 @@ colorlog==6.8.2 \ # via # gcp-docuploader # nox -cryptography==43.0.1 \ - --hash=sha256:014f58110f53237ace6a408b5beb6c427b64e084eb451ef25a28308270086494 \ - --hash=sha256:1bbcce1a551e262dfbafb6e6252f1ae36a248e615ca44ba302df077a846a8806 \ - --hash=sha256:203e92a75716d8cfb491dc47c79e17d0d9207ccffcbcb35f598fbe463ae3444d \ - --hash=sha256:27e613d7077ac613e399270253259d9d53872aaf657471473ebfc9a52935c062 \ - --hash=sha256:2bd51274dcd59f09dd952afb696bf9c61a7a49dfc764c04dd33ef7a6b502a1e2 \ - --hash=sha256:38926c50cff6f533f8a2dae3d7f19541432610d114a70808f0926d5aaa7121e4 \ - --hash=sha256:511f4273808ab590912a93ddb4e3914dfd8a388fed883361b02dea3791f292e1 \ - --hash=sha256:58d4e9129985185a06d849aa6df265bdd5a74ca6e1b736a77959b498e0505b85 \ - --hash=sha256:5b43d1ea6b378b54a1dc99dd8a2b5be47658fe9a7ce0a58ff0b55f4b43ef2b84 \ - --hash=sha256:61ec41068b7b74268fa86e3e9e12b9f0c21fcf65434571dbb13d954bceb08042 \ - --hash=sha256:666ae11966643886c2987b3b721899d250855718d6d9ce41b521252a17985f4d \ - --hash=sha256:68aaecc4178e90719e95298515979814bda0cbada1256a4485414860bd7ab962 \ - --hash=sha256:7c05650fe8023c5ed0d46793d4b7d7e6cd9c04e68eabe5b0aeea836e37bdcec2 \ - --hash=sha256:80eda8b3e173f0f247f711eef62be51b599b5d425c429b5d4ca6a05e9e856baa \ - --hash=sha256:8385d98f6a3bf8bb2d65a73e17ed87a3ba84f6991c155691c51112075f9ffc5d \ - --hash=sha256:88cce104c36870d70c49c7c8fd22885875d950d9ee6ab54df2745f83ba0dc365 \ - --hash=sha256:9d3cdb25fa98afdd3d0892d132b8d7139e2c087da1712041f6b762e4f807cc96 \ - --hash=sha256:a575913fb06e05e6b4b814d7f7468c2c660e8bb16d8d5a1faf9b33ccc569dd47 \ - --hash=sha256:ac119bb76b9faa00f48128b7f5679e1d8d437365c5d26f1c2c3f0da4ce1b553d \ - --hash=sha256:c1332724be35d23a854994ff0b66530119500b6053d0bd3363265f7e5e77288d \ - --hash=sha256:d03a475165f3134f773d1388aeb19c2d25ba88b6a9733c5c590b9ff7bbfa2e0c \ - --hash=sha256:d75601ad10b059ec832e78823b348bfa1a59f6b8d545db3a24fd44362a1564cb \ - --hash=sha256:de41fd81a41e53267cb020bb3a7212861da53a7d39f863585d13ea11049cf277 \ - --hash=sha256:e710bf40870f4db63c3d7d929aa9e09e4e7ee219e703f949ec4073b4294f6172 \ - --hash=sha256:ea25acb556320250756e53f9e20a4177515f012c9eaea17eb7587a8c4d8ae034 \ - --hash=sha256:f98bf604c82c416bc829e490c700ca1553eafdf2912a91e23a79d97d9801372a \ - --hash=sha256:fba1007b3ef89946dbbb515aeeb41e30203b004f0b4b00e5e16078b518563289 +cryptography==44.0.1 \ + --hash=sha256:00918d859aa4e57db8299607086f793fa7813ae2ff5a4637e318a25ef82730f7 \ + --hash=sha256:1e8d181e90a777b63f3f0caa836844a1182f1f265687fac2115fcf245f5fbec3 \ + --hash=sha256:1f9a92144fa0c877117e9748c74501bea842f93d21ee00b0cf922846d9d0b183 \ + --hash=sha256:21377472ca4ada2906bc313168c9dc7b1d7ca417b63c1c3011d0c74b7de9ae69 \ + --hash=sha256:24979e9f2040c953a94bf3c6782e67795a4c260734e5264dceea65c8f4bae64a \ + --hash=sha256:2a46a89ad3e6176223b632056f321bc7de36b9f9b93b2cc1cccf935a3849dc62 \ + --hash=sha256:322eb03ecc62784536bc173f1483e76747aafeb69c8728df48537eb431cd1911 \ + --hash=sha256:436df4f203482f41aad60ed1813811ac4ab102765ecae7a2bbb1dbb66dcff5a7 \ + --hash=sha256:4f422e8c6a28cf8b7f883eb790695d6d45b0c385a2583073f3cec434cc705e1a \ + --hash=sha256:53f23339864b617a3dfc2b0ac8d5c432625c80014c25caac9082314e9de56f41 \ + --hash=sha256:5fed5cd6102bb4eb843e3315d2bf25fede494509bddadb81e03a859c1bc17b83 \ + --hash=sha256:610a83540765a8d8ce0f351ce42e26e53e1f774a6efb71eb1b41eb01d01c3d12 \ + --hash=sha256:6c8acf6f3d1f47acb2248ec3ea261171a671f3d9428e34ad0357148d492c7864 \ + --hash=sha256:6f76fdd6fd048576a04c5210d53aa04ca34d2ed63336d4abd306d0cbe298fddf \ + --hash=sha256:72198e2b5925155497a5a3e8c216c7fb3e64c16ccee11f0e7da272fa93b35c4c \ + --hash=sha256:887143b9ff6bad2b7570da75a7fe8bbf5f65276365ac259a5d2d5147a73775f2 \ + --hash=sha256:888fcc3fce0c888785a4876ca55f9f43787f4c5c1cc1e2e0da71ad481ff82c5b \ + --hash=sha256:8e6a85a93d0642bd774460a86513c5d9d80b5c002ca9693e63f6e540f1815ed0 \ + --hash=sha256:94f99f2b943b354a5b6307d7e8d19f5c423a794462bde2bf310c770ba052b1c4 \ + --hash=sha256:9b336599e2cb77b1008cb2ac264b290803ec5e8e89d618a5e978ff5eb6f715d9 \ + --hash=sha256:a2d8a7045e1ab9b9f803f0d9531ead85f90c5f2859e653b61497228b18452008 \ + --hash=sha256:b8272f257cf1cbd3f2e120f14c68bff2b6bdfcc157fafdee84a1b795efd72862 \ + --hash=sha256:bf688f615c29bfe9dfc44312ca470989279f0e94bb9f631f85e3459af8efc009 \ + --hash=sha256:d9c5b9f698a83c8bd71e0f4d3f9f839ef244798e5ffe96febfa9714717db7af7 \ + --hash=sha256:dd7c7e2d71d908dc0f8d2027e1604102140d84b155e658c20e8ad1304317691f \ + --hash=sha256:df978682c1504fc93b3209de21aeabf2375cb1571d4e61907b3e7a2540e83026 \ + --hash=sha256:e403f7f766ded778ecdb790da786b418a9f2394f36e8cc8b796cc056ab05f44f \ + --hash=sha256:eb3889330f2a4a148abead555399ec9a32b13b7c8ba969b72d8e500eb7ef84cd \ + --hash=sha256:f4daefc971c2d1f82f03097dc6f216744a6cd2ac0f04c68fb935ea2ba2a0d420 \ + --hash=sha256:f51f5705ab27898afda1aaa430f34ad90dc117421057782022edf0600bec5f14 \ + --hash=sha256:fd0ee90072861e276b0ff08bd627abec29e32a53b2be44e41dbcdf87cbee2b00 # via # -r requirements.in # gcp-releasetool From dae6f426f2e70dd5df6895c26f1eed9ee62717d0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tim=20Swe=C3=B1a=20=28Swast=29?= Date: Fri, 21 Feb 2025 11:44:59 -0600 Subject: [PATCH 1891/2016] deps: use pandas-gbq to determine schema in `load_table_from_dataframe` (#2095) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * feat: use pandas-gbq to determine schema in `load_table_from_dataframe` * 🦉 Updates from OwlBot post-processor See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md * fix some unit tests * 🦉 Updates from OwlBot post-processor See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md * 🦉 Updates from OwlBot post-processor See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md * bump minimum pandas-gbq to 0.26.1 * 🦉 Updates from OwlBot post-processor See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md * drop pandas-gbq from python 3.7 extras * relax warning message text assertion * use consistent time zone presense/absense in time datetime system test * Update google/cloud/bigquery/_pandas_helpers.py * Update google/cloud/bigquery/_pandas_helpers.py Co-authored-by: Chalmer Lowe * remove pandas-gbq from at least 1 unit test and system test session --------- Co-authored-by: Owl Bot Co-authored-by: Chalmer Lowe --- .../google/cloud/bigquery/_pandas_helpers.py | 35 +++++++++- .../google/cloud/bigquery/_pyarrow_helpers.py | 7 +- packages/google-cloud-bigquery/noxfile.py | 15 +++++ packages/google-cloud-bigquery/pyproject.toml | 3 + .../testing/constraints-3.8.txt | 9 +++ .../tests/system/test_pandas.py | 2 +- .../tests/unit/test__pandas_helpers.py | 65 +++++++++++++++---- .../tests/unit/test_client.py | 33 +++++++--- 8 files changed, 147 insertions(+), 22 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/_pandas_helpers.py b/packages/google-cloud-bigquery/google/cloud/bigquery/_pandas_helpers.py index 4f70f6c29f7d..0017d92cee59 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/_pandas_helpers.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/_pandas_helpers.py @@ -12,7 +12,12 @@ # See the License for the specific language governing permissions and # limitations under the License. -"""Shared helper functions for connecting BigQuery and pandas.""" +"""Shared helper functions for connecting BigQuery and pandas. + +NOTE: This module is DEPRECATED. Please make updates in the pandas-gbq package, +instead. See: go/pandas-gbq-and-bigframes-redundancy and +https://github.com/googleapis/python-bigquery-pandas/blob/main/pandas_gbq/schema/pandas_to_bigquery.py +""" import concurrent.futures from datetime import datetime @@ -40,6 +45,16 @@ else: import numpy + +try: + import pandas_gbq.schema.pandas_to_bigquery # type: ignore + + pandas_gbq_import_exception = None +except ImportError as exc: + pandas_gbq = None + pandas_gbq_import_exception = exc + + try: import db_dtypes # type: ignore @@ -445,6 +460,10 @@ def _first_array_valid(series): def dataframe_to_bq_schema(dataframe, bq_schema): """Convert a pandas DataFrame schema to a BigQuery schema. + DEPRECATED: Use + pandas_gbq.schema.pandas_to_bigquery.dataframe_to_bigquery_fields(), + instead. See: go/pandas-gbq-and-bigframes-redundancy. + Args: dataframe (pandas.DataFrame): DataFrame for which the client determines the BigQuery schema. @@ -460,6 +479,20 @@ def dataframe_to_bq_schema(dataframe, bq_schema): The automatically determined schema. Returns None if the type of any column cannot be determined. """ + if pandas_gbq is None: + warnings.warn( + "Loading pandas DataFrame into BigQuery will require pandas-gbq " + "package version 0.26.1 or greater in the future. " + f"Tried to import pandas-gbq and got: {pandas_gbq_import_exception}", + category=FutureWarning, + ) + else: + return pandas_gbq.schema.pandas_to_bigquery.dataframe_to_bigquery_fields( + dataframe, + override_bigquery_fields=bq_schema, + index=True, + ) + if bq_schema: bq_schema = schema._to_schema_fields(bq_schema) bq_schema_index = {field.name: field for field in bq_schema} diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/_pyarrow_helpers.py b/packages/google-cloud-bigquery/google/cloud/bigquery/_pyarrow_helpers.py index 3c745a611bdb..1b42cd5c79bf 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/_pyarrow_helpers.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/_pyarrow_helpers.py @@ -12,7 +12,12 @@ # See the License for the specific language governing permissions and # limitations under the License. -"""Shared helper functions for connecting BigQuery and pyarrow.""" +"""Shared helper functions for connecting BigQuery and pyarrow. + +NOTE: This module is DEPRECATED. Please make updates in the pandas-gbq package, +instead. See: go/pandas-gbq-and-bigframes-redundancy and +https://github.com/googleapis/python-bigquery-pandas/blob/main/pandas_gbq/schema/pyarrow_to_bigquery.py +""" from typing import Any diff --git a/packages/google-cloud-bigquery/noxfile.py b/packages/google-cloud-bigquery/noxfile.py index e08956b114c1..87bd9a70cf25 100644 --- a/packages/google-cloud-bigquery/noxfile.py +++ b/packages/google-cloud-bigquery/noxfile.py @@ -110,6 +110,14 @@ def default(session, install_extras=True): else: install_target = "." session.install("-e", install_target, "-c", constraints_path) + + # Test with some broken "extras" in case the user didn't install the extra + # directly. For example, pandas-gbq is recommended for pandas features, but + # we want to test that we fallback to the previous behavior. For context, + # see internal document go/pandas-gbq-and-bigframes-redundancy. + if session.python == UNIT_TEST_PYTHON_VERSIONS[0]: + session.run("python", "-m", "pip", "uninstall", "pandas-gbq", "-y") + session.run("python", "-m", "pip", "freeze") # Run py.test against the unit tests. @@ -228,6 +236,13 @@ def system(session): extras = "[all]" session.install("-e", f".{extras}", "-c", constraints_path) + # Test with some broken "extras" in case the user didn't install the extra + # directly. For example, pandas-gbq is recommended for pandas features, but + # we want to test that we fallback to the previous behavior. For context, + # see internal document go/pandas-gbq-and-bigframes-redundancy. + if session.python == SYSTEM_TEST_PYTHON_VERSIONS[0]: + session.run("python", "-m", "pip", "uninstall", "pandas-gbq", "-y") + # print versions of all dependencies session.run("python", "-m", "pip", "freeze") diff --git a/packages/google-cloud-bigquery/pyproject.toml b/packages/google-cloud-bigquery/pyproject.toml index ecf21d922356..c4e5c2f0d12a 100644 --- a/packages/google-cloud-bigquery/pyproject.toml +++ b/packages/google-cloud-bigquery/pyproject.toml @@ -74,6 +74,9 @@ bqstorage = [ ] pandas = [ "pandas >= 1.1.0", + "pandas-gbq >= 0.26.1; python_version >= '3.8'", + "grpcio >= 1.47.0, < 2.0dev", + "grpcio >= 1.49.1, < 2.0dev; python_version >= '3.11'", "pyarrow >= 3.0.0", "db-dtypes >= 0.3.0, < 2.0.0dev", "importlib_metadata >= 1.0.0; python_version < '3.8'", diff --git a/packages/google-cloud-bigquery/testing/constraints-3.8.txt b/packages/google-cloud-bigquery/testing/constraints-3.8.txt index e5e73c5c7e2d..9883fb8ccd8f 100644 --- a/packages/google-cloud-bigquery/testing/constraints-3.8.txt +++ b/packages/google-cloud-bigquery/testing/constraints-3.8.txt @@ -1,2 +1,11 @@ grpcio==1.47.0 pandas==1.2.0 + +# This constraints file is used to check that lower bounds +# are correct in setup.py +# +# Pin the version to the lower bound. +# +# e.g., if setup.py has "foo >= 1.14.0, < 2.0.0dev", +# Then this file should have foo==1.14.0 +pandas-gbq==0.26.1 diff --git a/packages/google-cloud-bigquery/tests/system/test_pandas.py b/packages/google-cloud-bigquery/tests/system/test_pandas.py index 85c7b79e6442..a9e76d416089 100644 --- a/packages/google-cloud-bigquery/tests/system/test_pandas.py +++ b/packages/google-cloud-bigquery/tests/system/test_pandas.py @@ -1259,7 +1259,7 @@ def test_upload_time_and_datetime_56(bigquery_client, dataset_id): df = pandas.DataFrame( dict( dt=[ - datetime.datetime(2020, 1, 8, 8, 0, 0), + datetime.datetime(2020, 1, 8, 8, 0, 0, tzinfo=datetime.timezone.utc), datetime.datetime( 2020, 1, diff --git a/packages/google-cloud-bigquery/tests/unit/test__pandas_helpers.py b/packages/google-cloud-bigquery/tests/unit/test__pandas_helpers.py index edfaadf692e4..fdd232a5c959 100644 --- a/packages/google-cloud-bigquery/tests/unit/test__pandas_helpers.py +++ b/packages/google-cloud-bigquery/tests/unit/test__pandas_helpers.py @@ -35,6 +35,11 @@ except ImportError: pandas = None +try: + import pandas_gbq.schema.pandas_to_bigquery +except ImportError: + pandas_gbq = None + try: import geopandas except ImportError: @@ -1281,7 +1286,21 @@ def test_dataframe_to_parquet_compression_method(module_under_test): @pytest.mark.skipif(pandas is None, reason="Requires `pandas`") -def test_dataframe_to_bq_schema_w_named_index(module_under_test): +@pytest.mark.skipif(pandas_gbq is None, reason="Requires `pandas-gbq`") +def test_dataframe_to_bq_schema_returns_schema_with_pandas_gbq( + module_under_test, monkeypatch +): + monkeypatch.setattr(module_under_test, "pandas_gbq", None) + dataframe = pandas.DataFrame({"field00": ["foo", "bar"]}) + got = module_under_test.dataframe_to_bq_schema(dataframe, []) + # Don't assert beyond this, since pandas-gbq is now source of truth. + assert got is not None + + +@pytest.mark.skipif(pandas is None, reason="Requires `pandas`") +def test_dataframe_to_bq_schema_w_named_index(module_under_test, monkeypatch): + monkeypatch.setattr(module_under_test, "pandas_gbq", None) + df_data = collections.OrderedDict( [ ("str_column", ["hello", "world"]), @@ -1292,7 +1311,8 @@ def test_dataframe_to_bq_schema_w_named_index(module_under_test): index = pandas.Index(["a", "b"], name="str_index") dataframe = pandas.DataFrame(df_data, index=index) - returned_schema = module_under_test.dataframe_to_bq_schema(dataframe, []) + with pytest.warns(FutureWarning, match="pandas-gbq"): + returned_schema = module_under_test.dataframe_to_bq_schema(dataframe, []) expected_schema = ( schema.SchemaField("str_index", "STRING", "NULLABLE"), @@ -1304,7 +1324,9 @@ def test_dataframe_to_bq_schema_w_named_index(module_under_test): @pytest.mark.skipif(pandas is None, reason="Requires `pandas`") -def test_dataframe_to_bq_schema_w_multiindex(module_under_test): +def test_dataframe_to_bq_schema_w_multiindex(module_under_test, monkeypatch): + monkeypatch.setattr(module_under_test, "pandas_gbq", None) + df_data = collections.OrderedDict( [ ("str_column", ["hello", "world"]), @@ -1321,7 +1343,8 @@ def test_dataframe_to_bq_schema_w_multiindex(module_under_test): ) dataframe = pandas.DataFrame(df_data, index=index) - returned_schema = module_under_test.dataframe_to_bq_schema(dataframe, []) + with pytest.warns(FutureWarning, match="pandas-gbq"): + returned_schema = module_under_test.dataframe_to_bq_schema(dataframe, []) expected_schema = ( schema.SchemaField("str_index", "STRING", "NULLABLE"), @@ -1335,7 +1358,9 @@ def test_dataframe_to_bq_schema_w_multiindex(module_under_test): @pytest.mark.skipif(pandas is None, reason="Requires `pandas`") -def test_dataframe_to_bq_schema_w_bq_schema(module_under_test): +def test_dataframe_to_bq_schema_w_bq_schema(module_under_test, monkeypatch): + monkeypatch.setattr(module_under_test, "pandas_gbq", None) + df_data = collections.OrderedDict( [ ("str_column", ["hello", "world"]), @@ -1350,7 +1375,10 @@ def test_dataframe_to_bq_schema_w_bq_schema(module_under_test): {"name": "bool_column", "type": "BOOL", "mode": "REQUIRED"}, ] - returned_schema = module_under_test.dataframe_to_bq_schema(dataframe, dict_schema) + with pytest.warns(FutureWarning, match="pandas-gbq"): + returned_schema = module_under_test.dataframe_to_bq_schema( + dataframe, dict_schema + ) expected_schema = ( schema.SchemaField("str_column", "STRING", "NULLABLE"), @@ -1361,7 +1389,11 @@ def test_dataframe_to_bq_schema_w_bq_schema(module_under_test): @pytest.mark.skipif(pandas is None, reason="Requires `pandas`") -def test_dataframe_to_bq_schema_fallback_needed_wo_pyarrow(module_under_test): +def test_dataframe_to_bq_schema_fallback_needed_wo_pyarrow( + module_under_test, monkeypatch +): + monkeypatch.setattr(module_under_test, "pandas_gbq", None) + dataframe = pandas.DataFrame( data=[ {"id": 10, "status": "FOO", "execution_date": datetime.date(2019, 5, 10)}, @@ -1389,7 +1421,11 @@ def test_dataframe_to_bq_schema_fallback_needed_wo_pyarrow(module_under_test): @pytest.mark.skipif(pandas is None, reason="Requires `pandas`") @pytest.mark.skipif(isinstance(pyarrow, mock.Mock), reason="Requires `pyarrow`") -def test_dataframe_to_bq_schema_fallback_needed_w_pyarrow(module_under_test): +def test_dataframe_to_bq_schema_fallback_needed_w_pyarrow( + module_under_test, monkeypatch +): + monkeypatch.setattr(module_under_test, "pandas_gbq", None) + dataframe = pandas.DataFrame( data=[ {"id": 10, "status": "FOO", "created_at": datetime.date(2019, 5, 10)}, @@ -1419,7 +1455,9 @@ def test_dataframe_to_bq_schema_fallback_needed_w_pyarrow(module_under_test): @pytest.mark.skipif(pandas is None, reason="Requires `pandas`") @pytest.mark.skipif(isinstance(pyarrow, mock.Mock), reason="Requires `pyarrow`") -def test_dataframe_to_bq_schema_pyarrow_fallback_fails(module_under_test): +def test_dataframe_to_bq_schema_pyarrow_fallback_fails(module_under_test, monkeypatch): + monkeypatch.setattr(module_under_test, "pandas_gbq", None) + dataframe = pandas.DataFrame( data=[ {"struct_field": {"one": 2}, "status": "FOO"}, @@ -1443,9 +1481,11 @@ def test_dataframe_to_bq_schema_pyarrow_fallback_fails(module_under_test): @pytest.mark.skipif(geopandas is None, reason="Requires `geopandas`") -def test_dataframe_to_bq_schema_geography(module_under_test): +def test_dataframe_to_bq_schema_geography(module_under_test, monkeypatch): from shapely import wkt + monkeypatch.setattr(module_under_test, "pandas_gbq", None) + df = geopandas.GeoDataFrame( pandas.DataFrame( dict( @@ -1456,7 +1496,10 @@ def test_dataframe_to_bq_schema_geography(module_under_test): ), geometry="geo1", ) - bq_schema = module_under_test.dataframe_to_bq_schema(df, []) + + with pytest.warns(FutureWarning, match="pandas-gbq"): + bq_schema = module_under_test.dataframe_to_bq_schema(df, []) + assert bq_schema == ( schema.SchemaField("name", "STRING"), schema.SchemaField("geo1", "GEOGRAPHY"), diff --git a/packages/google-cloud-bigquery/tests/unit/test_client.py b/packages/google-cloud-bigquery/tests/unit/test_client.py index 462a70bbef96..a5af37b6b43e 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_client.py +++ b/packages/google-cloud-bigquery/tests/unit/test_client.py @@ -8391,8 +8391,12 @@ def test_load_table_from_dataframe_w_automatic_schema_detection_fails(self): autospec=True, side_effect=google.api_core.exceptions.NotFound("Table not found"), ) + pandas_gbq_patch = mock.patch( + "google.cloud.bigquery._pandas_helpers.pandas_gbq", + new=None, + ) - with load_patch as load_table_from_file, get_table_patch: + with load_patch as load_table_from_file, get_table_patch, pandas_gbq_patch: with warnings.catch_warnings(record=True) as warned: client.load_table_from_dataframe( dataframe, self.TABLE_REF, location=self.LOCATION @@ -8448,7 +8452,6 @@ def test_load_table_from_dataframe_w_index_and_auto_schema(self): load_patch = mock.patch( "google.cloud.bigquery.client.Client.load_table_from_file", autospec=True ) - get_table_patch = mock.patch( "google.cloud.bigquery.client.Client.get_table", autospec=True, @@ -8460,6 +8463,7 @@ def test_load_table_from_dataframe_w_index_and_auto_schema(self): ] ), ) + with load_patch as load_table_from_file, get_table_patch: client.load_table_from_dataframe( dataframe, self.TABLE_REF, location=self.LOCATION @@ -8580,10 +8584,10 @@ def test_load_table_from_dataframe_w_nullable_int64_datatype_automatic_schema(se client = self._make_client() dataframe = pandas.DataFrame({"x": [1, 2, None, 4]}, dtype="Int64") + load_patch = mock.patch( "google.cloud.bigquery.client.Client.load_table_from_file", autospec=True ) - get_table_patch = mock.patch( "google.cloud.bigquery.client.Client.get_table", autospec=True, @@ -8612,8 +8616,11 @@ def test_load_table_from_dataframe_w_nullable_int64_datatype_automatic_schema(se sent_config = load_table_from_file.mock_calls[0][2]["job_config"] assert sent_config.source_format == job.SourceFormat.PARQUET - assert tuple(sent_config.schema) == ( - SchemaField("x", "INT64", "NULLABLE", None), + assert ( + # Accept either the GoogleSQL or legacy SQL type name from pandas-gbq. + tuple(sent_config.schema) == (SchemaField("x", "INT64", "NULLABLE", None),) + or tuple(sent_config.schema) + == (SchemaField("x", "INTEGER", "NULLABLE", None),) ) def test_load_table_from_dataframe_struct_fields(self): @@ -8759,7 +8766,7 @@ def test_load_table_from_dataframe_array_fields_w_auto_schema(self): data=records, columns=["float_column", "array_column"] ) - expected_schema = [ + expected_schema_googlesql = [ SchemaField("float_column", "FLOAT"), SchemaField( "array_column", @@ -8767,6 +8774,14 @@ def test_load_table_from_dataframe_array_fields_w_auto_schema(self): mode="REPEATED", ), ] + expected_schema_legacy_sql = [ + SchemaField("float_column", "FLOAT"), + SchemaField( + "array_column", + "INTEGER", + mode="REPEATED", + ), + ] load_patch = mock.patch( "google.cloud.bigquery.client.Client.load_table_from_file", autospec=True @@ -8802,7 +8817,10 @@ def test_load_table_from_dataframe_array_fields_w_auto_schema(self): sent_config = load_table_from_file.mock_calls[0][2]["job_config"] assert sent_config.source_format == job.SourceFormat.PARQUET - assert sent_config.schema == expected_schema + assert ( + sent_config.schema == expected_schema_googlesql + or sent_config.schema == expected_schema_legacy_sql + ) def test_load_table_from_dataframe_w_partial_schema(self): pandas = pytest.importorskip("pandas") @@ -8922,7 +8940,6 @@ def test_load_table_from_dataframe_w_partial_schema_extra_types(self): load_table_from_file.assert_not_called() message = str(exc_context.value) - assert "bq_schema contains fields not present in dataframe" in message assert "unknown_col" in message def test_load_table_from_dataframe_w_schema_arrow_custom_compression(self): From 28b868231d7bca7f9444b23e438d5ed237b31712 Mon Sep 17 00:00:00 2001 From: Chalmer Lowe Date: Fri, 21 Feb 2025 12:45:15 -0500 Subject: [PATCH 1892/2016] Feat: Adds foreign_type_info attribute to table class and adds unit tests. (#2126) * adds foreign_type_info attribute to table * feat: Adds foreign_type_info attribute and tests * updates docstrings for foreign_type_info * Updates property handling, especially as regards set/get_sub_prop * Removes extraneous comments and debug expressions * Refactors build_resource_from_properties w get/set_sub_prop * updates to foreign_type_info, tests and wiring * Adds logic to detect non-Sequence schema.fields value * updates assorted tests and logic --- .../google/cloud/bigquery/_helpers.py | 4 +- .../google/cloud/bigquery/schema.py | 57 +++--- .../google/cloud/bigquery/table.py | 75 ++++++- .../tests/unit/job/test_load.py | 2 +- .../tests/unit/test_client.py | 8 +- .../tests/unit/test_schema.py | 183 +++++++++++------- .../tests/unit/test_table.py | 173 ++++++++++++++++- 7 files changed, 398 insertions(+), 104 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py b/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py index ea47af28d994..d40217c4d6ee 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py @@ -978,11 +978,11 @@ def _build_resource_from_properties(obj, filter_fields): """ partial = {} for filter_field in filter_fields: - api_field = obj._PROPERTY_TO_API_FIELD.get(filter_field) + api_field = _get_sub_prop(obj._PROPERTY_TO_API_FIELD, filter_field) if api_field is None and filter_field not in obj._properties: raise ValueError("No property %s" % filter_field) elif api_field is not None: - partial[api_field] = obj._properties.get(api_field) + _set_sub_prop(partial, api_field, _get_sub_prop(obj._properties, api_field)) else: # allows properties that are not defined in the library # and properties that have the same name as API resource key diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/schema.py b/packages/google-cloud-bigquery/google/cloud/bigquery/schema.py index 0f011a2755ee..03cde830e7b7 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/schema.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/schema.py @@ -15,10 +15,9 @@ """Schemas for BigQuery tables / queries.""" from __future__ import annotations -import collections import enum import typing -from typing import Any, cast, Dict, Iterable, Optional, Union +from typing import Any, cast, Dict, Iterable, Optional, Union, Sequence from google.cloud.bigquery import _helpers from google.cloud.bigquery import standard_sql @@ -489,6 +488,8 @@ def _parse_schema_resource(info): Optional[Sequence[google.cloud.bigquery.schema.SchemaField`]: A list of parsed fields, or ``None`` if no "fields" key found. """ + if isinstance(info, list): + return [SchemaField.from_api_repr(f) for f in info] return [SchemaField.from_api_repr(f) for f in info.get("fields", ())] @@ -501,40 +502,46 @@ def _build_schema_resource(fields): Returns: Sequence[Dict]: Mappings describing the schema of the supplied fields. """ - return [field.to_api_repr() for field in fields] + if isinstance(fields, Sequence): + # Input is a Sequence (e.g. a list): Process and return a list of SchemaFields + return [field.to_api_repr() for field in fields] + + else: + raise TypeError("Schema must be a Sequence (e.g. a list) or None.") def _to_schema_fields(schema): - """Coerce `schema` to a list of schema field instances. + """Coerces schema to a list of SchemaField instances while + preserving the original structure as much as possible. Args: - schema(Sequence[Union[ \ - :class:`~google.cloud.bigquery.schema.SchemaField`, \ - Mapping[str, Any] \ - ]]): - Table schema to convert. If some items are passed as mappings, - their content must be compatible with - :meth:`~google.cloud.bigquery.schema.SchemaField.from_api_repr`. + schema (Sequence[Union[ \ + :class:`~google.cloud.bigquery.schema.SchemaField`, \ + Mapping[str, Any] \ + ] + ] + ):: + Table schema to convert. Can be a list of SchemaField + objects or mappings. Returns: - Sequence[:class:`~google.cloud.bigquery.schema.SchemaField`] + A list of SchemaField objects. Raises: - Exception: If ``schema`` is not a sequence, or if any item in the - sequence is not a :class:`~google.cloud.bigquery.schema.SchemaField` - instance or a compatible mapping representation of the field. + TypeError: If schema is not a Sequence. """ - for field in schema: - if not isinstance(field, (SchemaField, collections.abc.Mapping)): - raise ValueError( - "Schema items must either be fields or compatible " - "mapping representations." - ) - return [ - field if isinstance(field, SchemaField) else SchemaField.from_api_repr(field) - for field in schema - ] + if isinstance(schema, Sequence): + # Input is a Sequence (e.g. a list): Process and return a list of SchemaFields + return [ + field + if isinstance(field, SchemaField) + else SchemaField.from_api_repr(field) + for field in schema + ] + + else: + raise TypeError("Schema must be a Sequence (e.g. a list) or None.") class PolicyTagList(object): diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py index 934a28cfc289..c70a0ebea98d 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py @@ -21,7 +21,8 @@ import functools import operator import typing -from typing import Any, Dict, Iterable, Iterator, List, Optional, Tuple, Union +from typing import Any, Dict, Iterable, Iterator, List, Optional, Tuple, Union, Sequence + import warnings try: @@ -66,6 +67,7 @@ from google.cloud.bigquery.encryption_configuration import EncryptionConfiguration from google.cloud.bigquery.enums import DefaultPandasDTypes from google.cloud.bigquery.external_config import ExternalConfig +from google.cloud.bigquery import schema as _schema from google.cloud.bigquery.schema import _build_schema_resource from google.cloud.bigquery.schema import _parse_schema_resource from google.cloud.bigquery.schema import _to_schema_fields @@ -398,7 +400,7 @@ class Table(_TableBase): "partitioning_type": "timePartitioning", "range_partitioning": "rangePartitioning", "time_partitioning": "timePartitioning", - "schema": "schema", + "schema": ["schema", "fields"], "snapshot_definition": "snapshotDefinition", "clone_definition": "cloneDefinition", "streaming_buffer": "streamingBuffer", @@ -411,6 +413,7 @@ class Table(_TableBase): "max_staleness": "maxStaleness", "resource_tags": "resourceTags", "external_catalog_table_options": "externalCatalogTableOptions", + "foreign_type_info": ["schema", "foreignTypeInfo"], } def __init__(self, table_ref, schema=None) -> None: @@ -451,8 +454,20 @@ def schema(self): If ``schema`` is not a sequence, or if any item in the sequence is not a :class:`~google.cloud.bigquery.schema.SchemaField` instance or a compatible mapping representation of the field. + + .. Note:: + If you are referencing a schema for an external catalog table such + as a Hive table, it will also be necessary to populate the foreign_type_info + attribute. This is not necessary if defining the schema for a BigQuery table. + + For details, see: + https://cloud.google.com/bigquery/docs/external-tables + https://cloud.google.com/bigquery/docs/datasets-intro#external_datasets + """ - prop = self._properties.get(self._PROPERTY_TO_API_FIELD["schema"]) + prop = _helpers._get_sub_prop( + self._properties, self._PROPERTY_TO_API_FIELD["schema"] + ) if not prop: return [] else: @@ -463,10 +478,21 @@ def schema(self, value): api_field = self._PROPERTY_TO_API_FIELD["schema"] if value is None: - self._properties[api_field] = None - else: + _helpers._set_sub_prop( + self._properties, + api_field, + None, + ) + elif isinstance(value, Sequence): value = _to_schema_fields(value) - self._properties[api_field] = {"fields": _build_schema_resource(value)} + value = _build_schema_resource(value) + _helpers._set_sub_prop( + self._properties, + api_field, + value, + ) + else: + raise TypeError("Schema must be a Sequence (e.g. a list) or None.") @property def labels(self): @@ -1075,6 +1101,43 @@ def external_catalog_table_options( self._PROPERTY_TO_API_FIELD["external_catalog_table_options"] ] = value + @property + def foreign_type_info(self) -> Optional[_schema.ForeignTypeInfo]: + """Optional. Specifies metadata of the foreign data type definition in + field schema (TableFieldSchema.foreign_type_definition). + + Returns: + Optional[schema.ForeignTypeInfo]: + Foreign type information, or :data:`None` if not set. + + .. Note:: + foreign_type_info is only required if you are referencing an + external catalog such as a Hive table. + For details, see: + https://cloud.google.com/bigquery/docs/external-tables + https://cloud.google.com/bigquery/docs/datasets-intro#external_datasets + """ + + prop = _helpers._get_sub_prop( + self._properties, self._PROPERTY_TO_API_FIELD["foreign_type_info"] + ) + if prop is not None: + return _schema.ForeignTypeInfo.from_api_repr(prop) + return None + + @foreign_type_info.setter + def foreign_type_info(self, value: Union[_schema.ForeignTypeInfo, dict, None]): + value = _helpers._isinstance_or_raise( + value, + (_schema.ForeignTypeInfo, dict), + none_allowed=True, + ) + if isinstance(value, _schema.ForeignTypeInfo): + value = value.to_api_repr() + _helpers._set_sub_prop( + self._properties, self._PROPERTY_TO_API_FIELD["foreign_type_info"], value + ) + @classmethod def from_string(cls, full_table_id: str) -> "Table": """Construct a table from fully-qualified table ID. diff --git a/packages/google-cloud-bigquery/tests/unit/job/test_load.py b/packages/google-cloud-bigquery/tests/unit/job/test_load.py index 0fb044696506..10df46fb37e6 100644 --- a/packages/google-cloud-bigquery/tests/unit/job/test_load.py +++ b/packages/google-cloud-bigquery/tests/unit/job/test_load.py @@ -272,7 +272,7 @@ def test_schema_setter_invalid_field(self): config = LoadJobConfig() full_name = SchemaField("full_name", "STRING", mode="REQUIRED") - with self.assertRaises(ValueError): + with self.assertRaises(TypeError): config.schema = [full_name, object()] def test_schema_setter(self): diff --git a/packages/google-cloud-bigquery/tests/unit/test_client.py b/packages/google-cloud-bigquery/tests/unit/test_client.py index a5af37b6b43e..6897c25528ed 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_client.py +++ b/packages/google-cloud-bigquery/tests/unit/test_client.py @@ -2051,7 +2051,7 @@ def test_update_dataset(self): ds.labels = LABELS ds.access_entries = [AccessEntry("OWNER", "userByEmail", "phred@example.com")] ds.resource_tags = RESOURCE_TAGS - fields = [ + filter_fields = [ "description", "friendly_name", "location", @@ -2065,12 +2065,12 @@ def test_update_dataset(self): ) as final_attributes: ds2 = client.update_dataset( ds, - fields=fields, + fields=filter_fields, timeout=7.5, ) final_attributes.assert_called_once_with( - {"path": "/%s" % PATH, "fields": fields}, client, None + {"path": "/%s" % PATH, "fields": filter_fields}, client, None ) conn.api_request.assert_called_once_with( @@ -2615,7 +2615,7 @@ def test_update_table_w_schema_None(self): self.assertEqual(len(conn.api_request.call_args_list), 2) req = conn.api_request.call_args_list[1] self.assertEqual(req[1]["method"], "PATCH") - sent = {"schema": None} + sent = {"schema": {"fields": None}} self.assertEqual(req[1]["data"], sent) self.assertEqual(req[1]["path"], "/%s" % path) self.assertEqual(len(updated_table.schema), 0) diff --git a/packages/google-cloud-bigquery/tests/unit/test_schema.py b/packages/google-cloud-bigquery/tests/unit/test_schema.py index 467f1e1de12a..3f2304a70734 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_schema.py +++ b/packages/google-cloud-bigquery/tests/unit/test_schema.py @@ -765,27 +765,62 @@ def test__parse_schema_resource_fields_without_mode(self): self._verifySchema(schema, RESOURCE) -class Test_build_schema_resource(unittest.TestCase, _SchemaBase): +class Test_build_schema_resource: + """Tests for the _build_schema_resource function.""" + def _call_fut(self, resource): - from google.cloud.bigquery.schema import _build_schema_resource + return schema._build_schema_resource(resource) + + FULL_NAME = schema.SchemaField( + name="full_name", field_type="STRING", mode="REQUIRED" + ) + AGE = schema.SchemaField(name="age", field_type="INTEGER", mode="REQUIRED") + LIST_RESOURCE = [ + {"name": "full_name", "type": "STRING", "mode": "REQUIRED"}, + {"name": "age", "type": "INTEGER", "mode": "REQUIRED"}, + ] + FOREIGN_TYPE_INFO = schema.ForeignTypeInfo(type_system="TYPE_SYSTEM_UNSPECIFIED") + FOREIGN_TYPE_INFO_RESOURCE = {"typeSystem": "TYPE_SYSTEM_UNSPECIFIED"} + + @pytest.mark.parametrize( + "schema,expected", + [ + pytest.param([], [], id="empty list"), + pytest.param([FULL_NAME, AGE], LIST_RESOURCE, id="list"), + ], + ) + def test_ctor_valid_input(self, schema, expected): + result = self._call_fut(schema) + + assert result == expected - return _build_schema_resource(resource) + @pytest.mark.parametrize( + "schema,expected", + [ + pytest.param(123, TypeError, id="invalid type"), + ], + ) + def test_ctor_invalid_input(self, schema, expected): + with pytest.raises(TypeError) as e: + self._call_fut(schema) + + # Looking for the first phrase from the string "Schema must be a ..." + assert "Schema must be a " in str(e.value) def test_defaults(self): from google.cloud.bigquery.schema import SchemaField full_name = SchemaField("full_name", "STRING", mode="REQUIRED") age = SchemaField("age", "INTEGER", mode="REQUIRED") + # test with simple list resource = self._call_fut([full_name, age]) - self.assertEqual(len(resource), 2) - self.assertEqual( - resource[0], - {"name": "full_name", "type": "STRING", "mode": "REQUIRED"}, - ) - self.assertEqual( - resource[1], - {"name": "age", "type": "INTEGER", "mode": "REQUIRED"}, - ) + assert len(resource) == 2 + assert resource[0] == { + "name": "full_name", + "type": "STRING", + "mode": "REQUIRED", + } + assert resource[1] == {"name": "age", "type": "INTEGER", "mode": "REQUIRED"} def test_w_description(self): from google.cloud.bigquery.schema import SchemaField @@ -802,25 +837,20 @@ def test_w_description(self): description=None, ) resource = self._call_fut([full_name, age]) - self.assertEqual(len(resource), 2) - self.assertEqual( - resource[0], - { - "name": "full_name", - "type": "STRING", - "mode": "REQUIRED", - "description": DESCRIPTION, - }, - ) - self.assertEqual( - resource[1], - { - "name": "age", - "type": "INTEGER", - "mode": "REQUIRED", - "description": None, - }, - ) + assert len(resource) == 2 + assert resource[0] == { + "name": "full_name", + "type": "STRING", + "mode": "REQUIRED", + "description": DESCRIPTION, + } + + assert resource[1] == { + "name": "age", + "type": "INTEGER", + "mode": "REQUIRED", + "description": None, + } def test_w_subfields(self): from google.cloud.bigquery.schema import SchemaField @@ -832,49 +862,72 @@ def test_w_subfields(self): "phone", "RECORD", mode="REPEATED", fields=[ph_type, ph_num] ) resource = self._call_fut([full_name, phone]) - self.assertEqual(len(resource), 2) - self.assertEqual( - resource[0], - {"name": "full_name", "type": "STRING", "mode": "REQUIRED"}, - ) - self.assertEqual( - resource[1], - { - "name": "phone", - "type": "RECORD", - "mode": "REPEATED", - "fields": [ - {"name": "type", "type": "STRING", "mode": "REQUIRED"}, - {"name": "number", "type": "STRING", "mode": "REQUIRED"}, - ], - }, - ) + assert len(resource) == 2 + assert resource[0] == { + "name": "full_name", + "type": "STRING", + "mode": "REQUIRED", + } + assert resource[1] == { + "name": "phone", + "type": "RECORD", + "mode": "REPEATED", + "fields": [ + {"name": "type", "type": "STRING", "mode": "REQUIRED"}, + {"name": "number", "type": "STRING", "mode": "REQUIRED"}, + ], + } + +class Test_to_schema_fields: + """Tests for the _to_schema_fields function.""" -class Test_to_schema_fields(unittest.TestCase): @staticmethod def _call_fut(schema): from google.cloud.bigquery.schema import _to_schema_fields return _to_schema_fields(schema) - def test_invalid_type(self): - schema = [ - ("full_name", "STRING", "REQUIRED"), - ("address", "STRING", "REQUIRED"), - ] - with self.assertRaises(ValueError): - self._call_fut(schema) - - def test_schema_fields_sequence(self): - from google.cloud.bigquery.schema import SchemaField + FULL_NAME = schema.SchemaField( + name="full_name", field_type="STRING", mode="REQUIRED" + ) + AGE = schema.SchemaField(name="age", field_type="INTEGER", mode="REQUIRED") + LIST_RESOURCE = [ + {"name": "full_name", "type": "STRING", "mode": "REQUIRED"}, + {"name": "age", "type": "INTEGER", "mode": "REQUIRED"}, + ] + FOREIGN_TYPE_INFO = schema.ForeignTypeInfo(type_system="TYPE_SYSTEM_UNSPECIFIED") + FOREIGN_TYPE_INFO_RESOURCE = {"typeSystem": "TYPE_SYSTEM_UNSPECIFIED"} - schema = [ - SchemaField("full_name", "STRING", mode="REQUIRED"), - SchemaField("age", "INT64", mode="NULLABLE"), - ] + @pytest.mark.parametrize( + "schema,expected", + [ + pytest.param([], [], id="empty list"), + pytest.param((), [], id="empty tuple"), + pytest.param(LIST_RESOURCE, [FULL_NAME, AGE], id="list"), + ], + ) + def test_ctor_valid_input(self, schema, expected): result = self._call_fut(schema) - self.assertEqual(result, schema) + + assert result == expected + + @pytest.mark.parametrize( + "schema,expected", + [ + pytest.param(123, TypeError, id="invalid schema type"), + pytest.param([123, 123], TypeError, id="invalid SchemaField type"), + pytest.param({"fields": 123}, TypeError, id="invalid type, dict"), + pytest.param( + {"fields": 123, "foreignTypeInfo": 123}, + TypeError, + id="invalid type, dict", + ), + ], + ) + def test_ctor_invalid_input(self, schema, expected): + with pytest.raises(expected): + self._call_fut(schema) def test_unknown_properties(self): schema = [ @@ -933,7 +986,7 @@ def test_valid_mapping_representation(self): ] result = self._call_fut(schema) - self.assertEqual(result, expected_schema) + assert result == expected_schema class TestPolicyTags(unittest.TestCase): diff --git a/packages/google-cloud-bigquery/tests/unit/test_table.py b/packages/google-cloud-bigquery/tests/unit/test_table.py index 5154f01d819d..1a3d7ec0febd 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_table.py +++ b/packages/google-cloud-bigquery/tests/unit/test_table.py @@ -31,6 +31,7 @@ from google.cloud.bigquery import _versions_helpers from google.cloud.bigquery import exceptions from google.cloud.bigquery import external_config +from google.cloud.bigquery import schema from google.cloud.bigquery.table import TableReference from google.cloud.bigquery.dataset import DatasetReference @@ -699,7 +700,7 @@ def test_schema_setter_invalid_field(self): table_ref = dataset.table(self.TABLE_NAME) table = self._make_one(table_ref) full_name = SchemaField("full_name", "STRING", mode="REQUIRED") - with self.assertRaises(ValueError): + with self.assertRaises(TypeError): table.schema = [full_name, object()] def test_schema_setter_valid_fields(self): @@ -1213,6 +1214,83 @@ def test_to_api_repr_w_unsetting_expiration(self): } self.assertEqual(resource, exp_resource) + def test_to_api_repr_w_schema_and_foreign_type_info(self): + """Tests to ensure that to_api_repr works correctly with + both schema and foreign_type_info fields + """ + + PROJECT = "test-project" + DATASET_ID = "test_dataset" + TABLE_ID = "coffee_table" + FOREIGNTYPEINFO = { + "typeSystem": "TYPE_SYSTEM_UNSPECIFIED", + } + SCHEMA = { + "fields": [ + {"name": "full_name", "type": "STRING", "mode": "REQUIRED"}, + {"name": "age", "type": "INTEGER", "mode": "REQUIRED"}, + ], + "foreignTypeInfo": FOREIGNTYPEINFO, + } + + API_REPR = { + "tableReference": { + "projectId": PROJECT, + "datasetId": DATASET_ID, + "tableId": TABLE_ID, + }, + "schema": SCHEMA, + } + + table = self._get_target_class().from_api_repr(API_REPR) + assert table._properties == table.to_api_repr() + + # update schema (i.e. the fields), ensure foreign_type_info is unchanged + table.schema = [] + expected = { + "fields": [], + "foreignTypeInfo": {"typeSystem": "TYPE_SYSTEM_UNSPECIFIED"}, + } + assert table.to_api_repr()["schema"] == expected + + # update foreign_type_info, ensure schema (i.e. the fields), is unchanged + table.foreign_type_info = {"typeSystem": "SCHEMA_SHOULD_NOT_CHANGE"} + expected = { + "fields": [], + "foreignTypeInfo": {"typeSystem": "SCHEMA_SHOULD_NOT_CHANGE"}, + } + assert table.to_api_repr()["schema"] == expected + + def test_from_api_repr_w_schema_and_foreign_type_info(self): + """Tests to ensure that to_api_repr works correctly with + both schema and foreign_type_info fields + """ + + PROJECT = "test-project" + DATASET_ID = "test_dataset" + TABLE_ID = "coffee_table" + FOREIGNTYPEINFO = { + "typeSystem": "TYPE_SYSTEM_UNSPECIFIED", + } + SCHEMA = { + "fields": [ + {"name": "full_name", "type": "STRING", "mode": "REQUIRED"}, + {"name": "age", "type": "INTEGER", "mode": "REQUIRED"}, + ], + "foreignTypeInfo": FOREIGNTYPEINFO, + } + API_REPR = { + "tableReference": { + "projectId": PROJECT, + "datasetId": DATASET_ID, + "tableId": TABLE_ID, + }, + "schema": SCHEMA, + } + + table = self._get_target_class().from_api_repr(API_REPR) + assert table._properties == API_REPR + def test__build_resource_w_custom_field(self): dataset = DatasetReference(self.PROJECT, self.DS_ID) table_ref = dataset.table(self.TABLE_NAME) @@ -5993,6 +6071,99 @@ def test_external_catalog_table_options_from_api_repr(self): assert result == expected +class TestForeignTypeInfo: + PROJECT = "test-project" + DATASET_ID = "test_dataset" + TABLE_ID = "coffee_table" + DATASET = DatasetReference(PROJECT, DATASET_ID) + TABLEREF = DATASET.table(TABLE_ID) + FOREIGNTYPEINFO = { + "typeSystem": "TYPE_SYSTEM_UNSPECIFIED", + } + API_REPR = { + "tableReference": { + "projectId": PROJECT, + "datasetId": DATASET_ID, + "tableId": TABLE_ID, + }, + "schema": { + "fields": [ + {"name": "full_name", "type": "STRING", "mode": "REQUIRED"}, + {"name": "age", "type": "INTEGER", "mode": "REQUIRED"}, + ], + "foreign_info_type": FOREIGNTYPEINFO, + }, + } + + from google.cloud.bigquery.schema import ForeignTypeInfo + + @staticmethod + def _get_target_class(self): + from google.cloud.bigquery.table import Table + + return Table + + def _make_one(self, *args, **kw): + return self._get_target_class(self)(*args, **kw) + + def test_foreign_type_info_default_initialization(self): + table = self._make_one(self.TABLEREF) + assert table.foreign_type_info is None + + @pytest.mark.parametrize( + "foreign_type_info, expected", + [ + ( + {"typeSystem": "TYPE_SYSTEM_UNSPECIFIED"}, + "TYPE_SYSTEM_UNSPECIFIED", + ), + (None, None), + ( + ForeignTypeInfo(type_system="TYPE_SYSTEM_UNSPECIFIED"), + "TYPE_SYSTEM_UNSPECIFIED", + ), + ], + ) + def test_foreign_type_info_valid_inputs(self, foreign_type_info, expected): + table = self._make_one(self.TABLEREF) + + table.foreign_type_info = foreign_type_info + + if foreign_type_info is None: + result = table.foreign_type_info + else: + result = table.foreign_type_info.type_system + assert result == expected + + def test_foreign_type_info_invalid_inputs(self): + table = self._make_one(self.TABLEREF) + + # invalid on the whole + with pytest.raises(TypeError, match="Pass .*"): + table.foreign_type_info = 123 + + def test_foreign_type_info_to_api_repr(self): + table = self._make_one(self.TABLEREF) + + table.foreign_type_info = self.ForeignTypeInfo( + type_system="TYPE_SYSTEM_UNSPECIFIED", + ) + + result = table.to_api_repr()["schema"]["foreignTypeInfo"] + expected = self.FOREIGNTYPEINFO + assert result == expected + + def test_foreign_type_info_from_api_repr(self): + table = self._make_one(self.TABLEREF) + table.foreign_type_info = self.FOREIGNTYPEINFO + + fti = schema.ForeignTypeInfo.from_api_repr(self.FOREIGNTYPEINFO) + + result = fti.to_api_repr() + expected = self.FOREIGNTYPEINFO + assert result == expected + + @pytest.mark.parametrize( "table_path", ( From f93819bc5918f43c9be125fdc8cc9a6bf609d042 Mon Sep 17 00:00:00 2001 From: Chalmer Lowe Date: Fri, 21 Feb 2025 14:16:20 -0500 Subject: [PATCH 1893/2016] deps: updates required checks list in github (#2136) * deps: updates required checks list in github * deps: updates snippet and system checks in github to remove 3.9 * changes the order of two items in the list. --- .../.github/sync-repo-settings.yaml | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) diff --git a/packages/google-cloud-bigquery/.github/sync-repo-settings.yaml b/packages/google-cloud-bigquery/.github/sync-repo-settings.yaml index 6543d52850a9..188c44bbd444 100644 --- a/packages/google-cloud-bigquery/.github/sync-repo-settings.yaml +++ b/packages/google-cloud-bigquery/.github/sync-repo-settings.yaml @@ -10,14 +10,10 @@ branchProtectionRules: requiresStrictStatusChecks: true requiredStatusCheckContexts: - 'Kokoro' - - 'Kokoro snippets-3.8' - - 'Kokoro snippets-3.12' - - 'Kokoro system-3.8' - 'Kokoro system-3.12' + - 'Kokoro snippets-3.12' - 'cla/google' - 'Samples - Lint' - - 'Samples - Python 3.7' - - 'Samples - Python 3.8' - 'Samples - Python 3.9' - 'Samples - Python 3.10' - 'Samples - Python 3.11' @@ -28,8 +24,7 @@ branchProtectionRules: requiresStrictStatusChecks: true requiredStatusCheckContexts: - 'Kokoro' - - 'Kokoro snippets-3.8' - 'cla/google' - 'Samples - Lint' - - 'Samples - Python 3.7' - - 'Samples - Python 3.8' + - 'Samples - Python 3.9' + - 'Samples - Python 3.10' From 95b6c9c8ef68134a3d3208ed5d2bc9c28ba97a22 Mon Sep 17 00:00:00 2001 From: "gcf-owl-bot[bot]" <78513119+gcf-owl-bot[bot]@users.noreply.github.com> Date: Tue, 25 Feb 2025 11:04:23 -0800 Subject: [PATCH 1894/2016] chore(python): conditionally load credentials in .kokoro/build.sh (#2137) Source-Link: https://github.com/googleapis/synthtool/commit/aa69fb74717c8f4c58c60f8cc101d3f4b2c07b09 Post-Processor: gcr.io/cloud-devrel-public-resources/owlbot-python:latest@sha256:f016446d6e520e5fb552c45b110cba3f217bffdd3d06bdddd076e9e6d13266cf Co-authored-by: Owl Bot --- .../.github/.OwlBot.lock.yaml | 4 ++-- .../google-cloud-bigquery/.kokoro/build.sh | 20 +++++++++++++------ 2 files changed, 16 insertions(+), 8 deletions(-) diff --git a/packages/google-cloud-bigquery/.github/.OwlBot.lock.yaml b/packages/google-cloud-bigquery/.github/.OwlBot.lock.yaml index 4c0027ff1c61..3f7634f25f8e 100644 --- a/packages/google-cloud-bigquery/.github/.OwlBot.lock.yaml +++ b/packages/google-cloud-bigquery/.github/.OwlBot.lock.yaml @@ -13,5 +13,5 @@ # limitations under the License. docker: image: gcr.io/cloud-devrel-public-resources/owlbot-python:latest - digest: sha256:04c35dc5f49f0f503a306397d6d043685f8d2bb822ab515818c4208d7fb2db3a -# created: 2025-01-16T15:24:11.364245182Z + digest: sha256:f016446d6e520e5fb552c45b110cba3f217bffdd3d06bdddd076e9e6d13266cf +# created: 2025-02-21T19:32:52.01306189Z diff --git a/packages/google-cloud-bigquery/.kokoro/build.sh b/packages/google-cloud-bigquery/.kokoro/build.sh index e4da2e2a76bc..d41b45aa1dd0 100755 --- a/packages/google-cloud-bigquery/.kokoro/build.sh +++ b/packages/google-cloud-bigquery/.kokoro/build.sh @@ -15,11 +15,13 @@ set -eo pipefail +CURRENT_DIR=$(dirname "${BASH_SOURCE[0]}") + if [[ -z "${PROJECT_ROOT:-}" ]]; then - PROJECT_ROOT="github/python-bigquery" + PROJECT_ROOT=$(realpath "${CURRENT_DIR}/..") fi -cd "${PROJECT_ROOT}" +pushd "${PROJECT_ROOT}" # Disable buffering, so that the logs stream through. export PYTHONUNBUFFERED=1 @@ -28,10 +30,16 @@ export PYTHONUNBUFFERED=1 env | grep KOKORO # Setup service account credentials. -export GOOGLE_APPLICATION_CREDENTIALS=${KOKORO_GFILE_DIR}/service-account.json +if [[ -f "${KOKORO_GFILE_DIR}/service-account.json" ]] +then + export GOOGLE_APPLICATION_CREDENTIALS=${KOKORO_GFILE_DIR}/service-account.json +fi # Setup project id. -export PROJECT_ID=$(cat "${KOKORO_GFILE_DIR}/project-id.json") +if [[ -f "${KOKORO_GFILE_DIR}/project-id.json" ]] +then + export PROJECT_ID=$(cat "${KOKORO_GFILE_DIR}/project-id.json") +fi # If this is a continuous build, send the test log to the FlakyBot. # See https://github.com/googleapis/repo-automation-bots/tree/main/packages/flakybot. @@ -46,7 +54,7 @@ fi # If NOX_SESSION is set, it only runs the specified session, # otherwise run all the sessions. if [[ -n "${NOX_SESSION:-}" ]]; then - python3 -m nox -s ${NOX_SESSION:-} + python3 -m nox -s ${NOX_SESSION:-} else - python3 -m nox + python3 -m nox fi From ab53af16ad854ca3f64f72be400c93388666ec9a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tim=20Swe=C3=B1a=20=28Swast=29?= Date: Tue, 25 Feb 2025 20:24:36 -0600 Subject: [PATCH 1895/2016] fix: retry 404 errors in `Client.query(...)` (#2135) * fix: retry 404 errors in `Client.query(...)` * retry on 404 * only retry notfound on jobs.insert * try to improve code coverage * disambiguate job not found from dataset/table not found * remove use of private attributes * fix unit tests * fix cover for retry.py --- .../google/cloud/bigquery/_job_helpers.py | 27 ++- .../google/cloud/bigquery/retry.py | 54 ++++++ .../tests/unit/test_client.py | 174 +++++++++++++++++- 3 files changed, 252 insertions(+), 3 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/_job_helpers.py b/packages/google-cloud-bigquery/google/cloud/bigquery/_job_helpers.py index e66ab2763b31..b028cd35794a 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/_job_helpers.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/_job_helpers.py @@ -47,6 +47,7 @@ from google.cloud.bigquery import job import google.cloud.bigquery.query from google.cloud.bigquery import table +import google.cloud.bigquery.retry from google.cloud.bigquery.retry import POLLING_DEFAULT_VALUE # Avoid circular imports @@ -142,12 +143,28 @@ def do_query(): raise create_exc try: + # Sometimes we get a 404 after a Conflict. In this case, we + # have pretty high confidence that by retrying the 404, we'll + # (hopefully) eventually recover the job. + # https://github.com/googleapis/python-bigquery/issues/2134 + # + # Allow users who want to completely disable retries to + # continue to do so by setting retry to None. + get_job_retry = retry + if retry is not None: + # TODO(tswast): Amend the user's retry object with allowing + # 404 to retry when there's a public way to do so. + # https://github.com/googleapis/python-api-core/issues/796 + get_job_retry = ( + google.cloud.bigquery.retry._DEFAULT_GET_JOB_CONFLICT_RETRY + ) + query_job = client.get_job( job_id, project=project, location=location, - retry=retry, - timeout=timeout, + retry=get_job_retry, + timeout=google.cloud.bigquery.retry.DEFAULT_GET_JOB_TIMEOUT, ) except core_exceptions.GoogleAPIError: # (includes RetryError) raise @@ -156,7 +173,13 @@ def do_query(): else: return query_job + # Allow users who want to completely disable retries to + # continue to do so by setting job_retry to None. + if job_retry is not None: + do_query = google.cloud.bigquery.retry._DEFAULT_QUERY_JOB_INSERT_RETRY(do_query) + future = do_query() + # The future might be in a failed state now, but if it's # unrecoverable, we'll find out when we ask for it's result, at which # point, we may retry. diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/retry.py b/packages/google-cloud-bigquery/google/cloud/bigquery/retry.py index 10958980dc7a..999d0e851ee5 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/retry.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/retry.py @@ -82,6 +82,32 @@ def _should_retry(exc): pass ``retry=bigquery.DEFAULT_RETRY.with_deadline(30)``. """ + +def _should_retry_get_job_conflict(exc): + """Predicate for determining when to retry a jobs.get call after a conflict error. + + Sometimes we get a 404 after a Conflict. In this case, we + have pretty high confidence that by retrying the 404, we'll + (hopefully) eventually recover the job. + https://github.com/googleapis/python-bigquery/issues/2134 + + Note: we may be able to extend this to user-specified predicates + after https://github.com/googleapis/python-api-core/issues/796 + to tweak existing Retry object predicates. + """ + return isinstance(exc, exceptions.NotFound) or _should_retry(exc) + + +# Pick a deadline smaller than our other deadlines since we want to timeout +# before those expire. +_DEFAULT_GET_JOB_CONFLICT_DEADLINE = _DEFAULT_RETRY_DEADLINE / 3.0 +_DEFAULT_GET_JOB_CONFLICT_RETRY = retry.Retry( + predicate=_should_retry_get_job_conflict, + deadline=_DEFAULT_GET_JOB_CONFLICT_DEADLINE, +) +"""Private, may be removed in future.""" + + # Note: Take care when updating DEFAULT_TIMEOUT to anything but None. We # briefly had a default timeout, but even setting it at more than twice the # theoretical server-side default timeout of 2 minutes was not enough for @@ -142,6 +168,34 @@ def _job_should_retry(exc): The default job retry object. """ + +def _query_job_insert_should_retry(exc): + # Per https://github.com/googleapis/python-bigquery/issues/2134, sometimes + # we get a 404 error. In this case, if we get this far, assume that the job + # doesn't actually exist and try again. We can't add 404 to the default + # job_retry because that happens for errors like "this table does not + # exist", which probably won't resolve with a retry. + if isinstance(exc, exceptions.RetryError): + exc = exc.cause + + if isinstance(exc, exceptions.NotFound): + message = exc.message + # Don't try to retry table/dataset not found, just job not found. + # The URL contains jobs, so use whitespace to disambiguate. + return message is not None and " job" in message.lower() + + return _job_should_retry(exc) + + +_DEFAULT_QUERY_JOB_INSERT_RETRY = retry.Retry( + predicate=_query_job_insert_should_retry, + # jobs.insert doesn't wait for the job to complete, so we don't need the + # long _DEFAULT_JOB_DEADLINE for this part. + deadline=_DEFAULT_RETRY_DEADLINE, +) +"""Private, may be removed in future.""" + + DEFAULT_GET_JOB_TIMEOUT = 128 """ Default timeout for Client.get_job(). diff --git a/packages/google-cloud-bigquery/tests/unit/test_client.py b/packages/google-cloud-bigquery/tests/unit/test_client.py index 6897c25528ed..4f13d6eccb02 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_client.py +++ b/packages/google-cloud-bigquery/tests/unit/test_client.py @@ -28,9 +28,12 @@ from unittest import mock import warnings -import requests +import freezegun import packaging import pytest +import requests + +import google.api try: @@ -55,6 +58,8 @@ import google.cloud._helpers from google.cloud import bigquery +from google.cloud.bigquery import job as bqjob +import google.cloud.bigquery._job_helpers from google.cloud.bigquery.dataset import DatasetReference from google.cloud.bigquery import exceptions from google.cloud.bigquery import ParquetOptions @@ -5308,6 +5313,173 @@ def test_query_job_rpc_fail_w_conflict_random_id_job_fetch_fails(self): with pytest.raises(DataLoss, match="we lost your job, sorry"): client.query("SELECT 1;", job_id=None) + def test_query_job_rpc_fail_w_conflict_random_id_job_fetch_fails_no_retries(self): + from google.api_core.exceptions import Conflict + from google.api_core.exceptions import DataLoss + from google.cloud.bigquery.job import QueryJob + + creds = _make_credentials() + http = object() + client = self._make_one(project=self.PROJECT, credentials=creds, _http=http) + + job_create_error = Conflict("Job already exists.") + job_begin_patcher = mock.patch.object( + QueryJob, "_begin", side_effect=job_create_error + ) + get_job_patcher = mock.patch.object( + client, "get_job", side_effect=DataLoss("we lost your job, sorry") + ) + + with job_begin_patcher, get_job_patcher: + # If get job request fails but supposedly there does exist a job + # with this ID already, raise the exception explaining why we + # couldn't recover the job. + with pytest.raises(DataLoss, match="we lost your job, sorry"): + client.query( + "SELECT 1;", + job_id=None, + # Explicitly test with no retries to make sure those branches are covered. + retry=None, + job_retry=None, + ) + + def test_query_job_rpc_fail_w_conflict_random_id_job_fetch_retries_404(self): + """Regression test for https://github.com/googleapis/python-bigquery/issues/2134 + + Sometimes after a Conflict, the fetch fails with a 404, but we know + because of the conflict that really the job does exist. Retry until we + get the job status (or timeout). + """ + job_id = "abc123" + creds = _make_credentials() + http = object() + client = self._make_one(project=self.PROJECT, credentials=creds, _http=http) + conn = client._connection = make_connection( + # We're mocking QueryJob._begin, so this is only going to be + # jobs.get requests and responses. + google.api_core.exceptions.TooManyRequests("this is retriable by default"), + google.api_core.exceptions.NotFound("we lost your job"), + google.api_core.exceptions.NotFound("we lost your job again, sorry"), + { + "jobReference": { + "projectId": self.PROJECT, + "location": "TESTLOC", + "jobId": job_id, + } + }, + ) + + job_create_error = google.api_core.exceptions.Conflict("Job already exists.") + job_begin_patcher = mock.patch.object( + bqjob.QueryJob, "_begin", side_effect=job_create_error + ) + job_id_patcher = mock.patch.object( + google.cloud.bigquery._job_helpers, + "make_job_id", + return_value=job_id, + ) + + with job_begin_patcher, job_id_patcher: + # If get job request fails there does exist a job + # with this ID already, retry 404 until we get it (or fails for a + # non-retriable reason, see other tests). + result = client.query("SELECT 1;", job_id=None) + + jobs_get_path = mock.call( + method="GET", + path=f"/projects/{self.PROJECT}/jobs/{job_id}", + query_params={ + "projection": "full", + }, + timeout=google.cloud.bigquery.retry.DEFAULT_GET_JOB_TIMEOUT, + ) + conn.api_request.assert_has_calls( + # Double-check that it was jobs.get that was called for each of our + # mocked responses. + [jobs_get_path] + * 4, + ) + assert result.job_id == job_id + + def test_query_job_rpc_fail_w_conflict_random_id_job_fetch_retries_404_and_query_job_insert( + self, + ): + """Regression test for https://github.com/googleapis/python-bigquery/issues/2134 + + Sometimes after a Conflict, the fetch fails with a 404. If it keeps + failing with a 404, assume that the job actually doesn't exist. + """ + job_id_1 = "abc123" + job_id_2 = "xyz789" + creds = _make_credentials() + http = object() + client = self._make_one(project=self.PROJECT, credentials=creds, _http=http) + + # We're mocking QueryJob._begin, so that the connection should only get + # jobs.get requests. + job_create_error = google.api_core.exceptions.Conflict("Job already exists.") + job_begin_patcher = mock.patch.object( + bqjob.QueryJob, "_begin", side_effect=job_create_error + ) + conn = client._connection = make_connection( + google.api_core.exceptions.NotFound("we lost your job again, sorry"), + { + "jobReference": { + "projectId": self.PROJECT, + "location": "TESTLOC", + "jobId": job_id_2, + } + }, + ) + + # Choose a small deadline so the 404 retries give up. + retry = ( + google.cloud.bigquery.retry._DEFAULT_GET_JOB_CONFLICT_RETRY.with_deadline(1) + ) + job_id_patcher = mock.patch.object( + google.cloud.bigquery._job_helpers, + "make_job_id", + side_effect=[job_id_1, job_id_2], + ) + retry_patcher = mock.patch.object( + google.cloud.bigquery.retry, + "_DEFAULT_GET_JOB_CONFLICT_RETRY", + retry, + ) + + with freezegun.freeze_time( + "2025-01-01 00:00:00", + # 10x the retry deadline to guarantee a timeout. + auto_tick_seconds=10, + ), job_begin_patcher, job_id_patcher, retry_patcher: + # If get job request fails there does exist a job + # with this ID already, retry 404 until we get it (or fails for a + # non-retriable reason, see other tests). + result = client.query("SELECT 1;", job_id=None) + + jobs_get_path_1 = mock.call( + method="GET", + path=f"/projects/{self.PROJECT}/jobs/{job_id_1}", + query_params={ + "projection": "full", + }, + timeout=google.cloud.bigquery.retry.DEFAULT_GET_JOB_TIMEOUT, + ) + jobs_get_path_2 = mock.call( + method="GET", + path=f"/projects/{self.PROJECT}/jobs/{job_id_2}", + query_params={ + "projection": "full", + }, + timeout=google.cloud.bigquery.retry.DEFAULT_GET_JOB_TIMEOUT, + ) + conn.api_request.assert_has_calls( + # Double-check that it was jobs.get that was called for each of our + # mocked responses. + [jobs_get_path_1, jobs_get_path_2], + ) + assert result.job_id == job_id_2 + def test_query_job_rpc_fail_w_conflict_random_id_job_fetch_succeeds(self): from google.api_core.exceptions import Conflict from google.cloud.bigquery.job import QueryJob From 7e55e2137c90dfeb141f2a75150eeacc977cb9f0 Mon Sep 17 00:00:00 2001 From: "release-please[bot]" <55107282+release-please[bot]@users.noreply.github.com> Date: Thu, 27 Feb 2025 10:42:38 -0800 Subject: [PATCH 1896/2016] chore(main): release 3.30.0 (#2119) Co-authored-by: release-please[bot] <55107282+release-please[bot]@users.noreply.github.com> --- packages/google-cloud-bigquery/CHANGELOG.md | 26 +++++++++++++++++++ .../google/cloud/bigquery/version.py | 2 +- 2 files changed, 27 insertions(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/CHANGELOG.md b/packages/google-cloud-bigquery/CHANGELOG.md index 45c39e19c8fe..91d0a362dd05 100644 --- a/packages/google-cloud-bigquery/CHANGELOG.md +++ b/packages/google-cloud-bigquery/CHANGELOG.md @@ -5,6 +5,32 @@ [1]: https://pypi.org/project/google-cloud-bigquery/#history +## [3.30.0](https://github.com/googleapis/python-bigquery/compare/v3.29.0...v3.30.0) (2025-02-26) + + +### Features + +* Add roundingmode enum, wiring, and tests ([#2121](https://github.com/googleapis/python-bigquery/issues/2121)) ([3a48948](https://github.com/googleapis/python-bigquery/commit/3a4894827f6e73a4a88cb22933c2004697dabcc7)) +* Adds foreign_type_info attribute to table class and adds unit tests. ([#2126](https://github.com/googleapis/python-bigquery/issues/2126)) ([2c19681](https://github.com/googleapis/python-bigquery/commit/2c1968115bef8e1dc84e0125615f551b9b011a4b)) +* Support resource_tags for table ([#2093](https://github.com/googleapis/python-bigquery/issues/2093)) ([d4070ca](https://github.com/googleapis/python-bigquery/commit/d4070ca21b5797e900a9e87b966837ee1c278217)) + + +### Bug Fixes + +* Avoid blocking in download thread when using BQ Storage API ([#2034](https://github.com/googleapis/python-bigquery/issues/2034)) ([54c8d07](https://github.com/googleapis/python-bigquery/commit/54c8d07f06a8ae460c9e0fb1614e1fbc21efb5df)) +* Retry 404 errors in `Client.query(...)` ([#2135](https://github.com/googleapis/python-bigquery/issues/2135)) ([c6d5f8a](https://github.com/googleapis/python-bigquery/commit/c6d5f8aaec21ab8f17436407aded4bc2316323fd)) + + +### Dependencies + +* Updates required checks list in github ([#2136](https://github.com/googleapis/python-bigquery/issues/2136)) ([fea49ff](https://github.com/googleapis/python-bigquery/commit/fea49ffbf8aa1d53451864ceb7fd73189b6661cb)) +* Use pandas-gbq to determine schema in `load_table_from_dataframe` ([#2095](https://github.com/googleapis/python-bigquery/issues/2095)) ([7603bd7](https://github.com/googleapis/python-bigquery/commit/7603bd71d60592ef2a551d9eea09987b218edc73)) + + +### Documentation + +* Update magics.rst ([#2125](https://github.com/googleapis/python-bigquery/issues/2125)) ([b5bcfb3](https://github.com/googleapis/python-bigquery/commit/b5bcfb303d27015b747a3b0747ecd7f7ed0ed557)) + ## [3.29.0](https://github.com/googleapis/python-bigquery/compare/v3.28.0...v3.29.0) (2025-01-21) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/version.py b/packages/google-cloud-bigquery/google/cloud/bigquery/version.py index 3d852b8a3f57..01c4c51ca6d5 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/version.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/version.py @@ -12,4 +12,4 @@ # See the License for the specific language governing permissions and # limitations under the License. -__version__ = "3.29.0" +__version__ = "3.30.0" From 780b46ce1f3d3c4a785c9ade378536f1592e837d Mon Sep 17 00:00:00 2001 From: Chalmer Lowe Date: Fri, 28 Feb 2025 13:14:05 -0500 Subject: [PATCH 1897/2016] deps: Remove Python 3.7 and 3.8 as supported runtimes (#2133) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Initial batch of changes to remove 3.7 and 3.8 * 🦉 Updates from OwlBot post-processor See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md * more updates to remove 3.7 and 3.8 * 🦉 Updates from OwlBot post-processor See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md * updates samples/geography/reqs * updates samples/magics/reqs * updates samples/notebooks/reqs * updates linting * 🦉 Updates from OwlBot post-processor See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md * 🦉 Updates from OwlBot post-processor See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md * updates conf due to linting issue * 🦉 Updates from OwlBot post-processor See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md * updates reqs.txt, fix mypy, lint, and debug in noxfile * Updates owlbot to correct spacing issue in conf.py * 🦉 Updates from OwlBot post-processor See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md * updates owlbot imports * 🦉 Updates from OwlBot post-processor See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md * removes kokoro samples configs for 3.7 & 3.8 * 🦉 Updates from OwlBot post-processor See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md * removes owlbots attempt to restore kokoro samples configs * removes kokoro system-3.8.cfg * edits repo sync settings * updates assorted noxfiles for samples and pyproject.toml * update test-samples-impl.sh * updates install_deps template * Edits to the contributing documentation * deps: use pandas-gbq to determine schema in `load_table_from_dataframe` (#2095) * feat: use pandas-gbq to determine schema in `load_table_from_dataframe` * 🦉 Updates from OwlBot post-processor See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md * fix some unit tests * 🦉 Updates from OwlBot post-processor See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md * 🦉 Updates from OwlBot post-processor See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md * bump minimum pandas-gbq to 0.26.1 * 🦉 Updates from OwlBot post-processor See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md * drop pandas-gbq from python 3.7 extras * relax warning message text assertion * use consistent time zone presense/absense in time datetime system test * Update google/cloud/bigquery/_pandas_helpers.py * Update google/cloud/bigquery/_pandas_helpers.py Co-authored-by: Chalmer Lowe * remove pandas-gbq from at least 1 unit test and system test session --------- Co-authored-by: Owl Bot Co-authored-by: Chalmer Lowe * Feat: Adds foreign_type_info attribute to table class and adds unit tests. (#2126) * adds foreign_type_info attribute to table * feat: Adds foreign_type_info attribute and tests * updates docstrings for foreign_type_info * Updates property handling, especially as regards set/get_sub_prop * Removes extraneous comments and debug expressions * Refactors build_resource_from_properties w get/set_sub_prop * updates to foreign_type_info, tests and wiring * Adds logic to detect non-Sequence schema.fields value * updates assorted tests and logic * deps: updates required checks list in github (#2136) * deps: updates required checks list in github * deps: updates snippet and system checks in github to remove 3.9 * changes the order of two items in the list. * updates linting * reverts pandas back to 1.1.0 * Revert changes related to pandas <1.5 * Revert noxfile.py changes related to pandas <1.5 * Revert constraints-3.9 changes related to pandas <1.5 * Revert test_query_pandas.py changes related to pandas <1.5 * Revert test__pandas_helpers.py changes related to pandas <1.5 * Revert test__versions_helpers.py changes related to pandas <1.5 * Revert tnoxfile.py changes related to pandas <1.5 * Revert test__versions_helpers.py changes related to pandas <1.5 * Revert test_table.py changes related to pandas <1.5 * Update noxfile changes related to pandas <1.5 * Update pyproject.toml changes related to pandas <1.5 * Update constraints-3.9.txt changes related to pandas <1.5 * Update test_legacy_types.py changes related to pandas <1.5 * Updates magics.py as part of reverting from pandas 1.5 * Updates noxfile.py in reverting from pandas 1.5 * Updates pyproject.toml in reverting from pandas 1.5 * Updates constraints.txt in reverting from pandas 1.5 * Updates test_magics in reverting from pandas 1.5 * Updates test_table in reverting from pandas 1.5 * Updates in tests re: reverting from pandas 1.5 * Updates pyproject to match constraints.txt * updates pyproject.toml to mirror constraints * remove limit on virtualenv * updates owlbot.py for test-samples-impl.sh * updates to owlbot.py * updates to test-samples-impl.sh * 🦉 Updates from OwlBot post-processor See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md * further updates to owlbot.py * removes unneeded files * adds presubmit.cfg back in --------- Co-authored-by: Owl Bot Co-authored-by: Tim Sweña (Swast) --- packages/google-cloud-bigquery/.coveragerc | 2 +- .../{snippets-3.8.cfg => snippets-3.9.cfg} | 2 +- .../{system-3.8.cfg => system-3.9.cfg} | 4 +- .../.kokoro/samples/python3.7/common.cfg | 40 ----------- .../.kokoro/samples/python3.7/continuous.cfg | 6 -- .../samples/python3.7/periodic-head.cfg | 11 ---- .../.kokoro/samples/python3.7/periodic.cfg | 6 -- .../.kokoro/samples/python3.7/presubmit.cfg | 6 -- .../.kokoro/samples/python3.8/common.cfg | 40 ----------- .../.kokoro/samples/python3.8/continuous.cfg | 6 -- .../samples/python3.8/periodic-head.cfg | 11 ---- .../.kokoro/samples/python3.8/periodic.cfg | 6 -- .../.kokoro/samples/python3.8/presubmit.cfg | 6 -- .../.kokoro/test-samples-impl.sh | 3 +- .../google-cloud-bigquery/CONTRIBUTING.rst | 15 ++--- packages/google-cloud-bigquery/README.rst | 4 +- .../google/cloud/bigquery/__init__.py | 6 +- .../google/cloud/bigquery/_pandas_helpers.py | 9 ++- .../google/cloud/bigquery/_pyarrow_helpers.py | 14 ++-- .../google/cloud/bigquery/external_config.py | 6 +- .../google/cloud/bigquery/format_options.py | 4 +- .../google/cloud/bigquery/job/base.py | 4 +- .../google/cloud/bigquery/magics/magics.py | 1 - .../google/cloud/bigquery/model.py | 2 +- .../google/cloud/bigquery/routine/routine.py | 16 +++-- .../google/cloud/bigquery/schema.py | 6 +- .../google/cloud/bigquery/table.py | 14 ++-- packages/google-cloud-bigquery/noxfile.py | 43 ++++++++---- packages/google-cloud-bigquery/owlbot.py | 66 ++++++++++++++++++- packages/google-cloud-bigquery/pyproject.toml | 21 +++--- .../samples/desktopapp/noxfile.py | 4 +- .../samples/desktopapp/requirements-test.txt | 3 +- .../desktopapp/user_credentials_test.py | 9 +-- .../samples/geography/noxfile.py | 4 +- .../samples/geography/requirements-test.txt | 3 +- .../samples/geography/requirements.txt | 56 +++++----------- .../samples/magics/conftest.py | 6 +- .../samples/magics/noxfile.py | 4 +- .../samples/magics/requirements-test.txt | 3 +- .../samples/magics/requirements.txt | 8 +-- .../notebooks/jupyter_tutorial_test.py | 4 +- .../samples/notebooks/noxfile.py | 4 +- .../samples/notebooks/requirements-test.txt | 3 +- .../samples/notebooks/requirements.txt | 10 +-- .../samples/snippets/noxfile.py | 4 +- .../samples/snippets/requirements-test.txt | 3 +- .../templates/install_deps.tmpl.rst | 2 +- .../testing/constraints-3.7.txt | 36 ---------- .../testing/constraints-3.8.txt | 11 ---- .../testing/constraints-3.9.txt | 26 +++++++- .../tests/system/test_pandas.py | 7 +- .../tests/unit/job/test_copy.py | 1 - .../tests/unit/job/test_query_pandas.py | 2 + .../tests/unit/test__pandas_helpers.py | 5 +- .../tests/unit/test__versions_helpers.py | 9 ++- .../tests/unit/test_legacy_types.py | 4 +- .../tests/unit/test_magics.py | 1 + .../tests/unit/test_table.py | 27 ++++---- .../tests/unit/test_table_pandas.py | 12 ++-- 59 files changed, 274 insertions(+), 377 deletions(-) rename packages/google-cloud-bigquery/.kokoro/presubmit/{snippets-3.8.cfg => snippets-3.9.cfg} (82%) rename packages/google-cloud-bigquery/.kokoro/presubmit/{system-3.8.cfg => system-3.9.cfg} (81%) delete mode 100644 packages/google-cloud-bigquery/.kokoro/samples/python3.7/common.cfg delete mode 100644 packages/google-cloud-bigquery/.kokoro/samples/python3.7/continuous.cfg delete mode 100644 packages/google-cloud-bigquery/.kokoro/samples/python3.7/periodic-head.cfg delete mode 100644 packages/google-cloud-bigquery/.kokoro/samples/python3.7/periodic.cfg delete mode 100644 packages/google-cloud-bigquery/.kokoro/samples/python3.7/presubmit.cfg delete mode 100644 packages/google-cloud-bigquery/.kokoro/samples/python3.8/common.cfg delete mode 100644 packages/google-cloud-bigquery/.kokoro/samples/python3.8/continuous.cfg delete mode 100644 packages/google-cloud-bigquery/.kokoro/samples/python3.8/periodic-head.cfg delete mode 100644 packages/google-cloud-bigquery/.kokoro/samples/python3.8/periodic.cfg delete mode 100644 packages/google-cloud-bigquery/.kokoro/samples/python3.8/presubmit.cfg delete mode 100644 packages/google-cloud-bigquery/testing/constraints-3.7.txt delete mode 100644 packages/google-cloud-bigquery/testing/constraints-3.8.txt diff --git a/packages/google-cloud-bigquery/.coveragerc b/packages/google-cloud-bigquery/.coveragerc index 04092257a47f..e78e7a931e09 100644 --- a/packages/google-cloud-bigquery/.coveragerc +++ b/packages/google-cloud-bigquery/.coveragerc @@ -9,6 +9,6 @@ omit = google/cloud/bigquery_v2/* # Legacy proto-based types. exclude_lines = # Re-enable the standard pragma - pragma: NO COVER + pragma: (no cover|NO COVER) # Ignore debug-only repr def __repr__ diff --git a/packages/google-cloud-bigquery/.kokoro/presubmit/snippets-3.8.cfg b/packages/google-cloud-bigquery/.kokoro/presubmit/snippets-3.9.cfg similarity index 82% rename from packages/google-cloud-bigquery/.kokoro/presubmit/snippets-3.8.cfg rename to packages/google-cloud-bigquery/.kokoro/presubmit/snippets-3.9.cfg index 840d9e716650..d1de209a2f2f 100644 --- a/packages/google-cloud-bigquery/.kokoro/presubmit/snippets-3.8.cfg +++ b/packages/google-cloud-bigquery/.kokoro/presubmit/snippets-3.9.cfg @@ -3,5 +3,5 @@ # Only run this nox session. env_vars: { key: "NOX_SESSION" - value: "snippets-3.8" + value: "snippets-3.9" } diff --git a/packages/google-cloud-bigquery/.kokoro/presubmit/system-3.8.cfg b/packages/google-cloud-bigquery/.kokoro/presubmit/system-3.9.cfg similarity index 81% rename from packages/google-cloud-bigquery/.kokoro/presubmit/system-3.8.cfg rename to packages/google-cloud-bigquery/.kokoro/presubmit/system-3.9.cfg index f4bcee3db0f0..bd1fb514b2b5 100644 --- a/packages/google-cloud-bigquery/.kokoro/presubmit/system-3.8.cfg +++ b/packages/google-cloud-bigquery/.kokoro/presubmit/system-3.9.cfg @@ -3,5 +3,5 @@ # Only run this nox session. env_vars: { key: "NOX_SESSION" - value: "system-3.8" -} \ No newline at end of file + value: "system-3.9" +} diff --git a/packages/google-cloud-bigquery/.kokoro/samples/python3.7/common.cfg b/packages/google-cloud-bigquery/.kokoro/samples/python3.7/common.cfg deleted file mode 100644 index d30dc6018ebc..000000000000 --- a/packages/google-cloud-bigquery/.kokoro/samples/python3.7/common.cfg +++ /dev/null @@ -1,40 +0,0 @@ -# Format: //devtools/kokoro/config/proto/build.proto - -# Build logs will be here -action { - define_artifacts { - regex: "**/*sponge_log.xml" - } -} - -# Specify which tests to run -env_vars: { - key: "RUN_TESTS_SESSION" - value: "py-3.7" -} - -# Declare build specific Cloud project. -env_vars: { - key: "BUILD_SPECIFIC_GCLOUD_PROJECT" - value: "python-docs-samples-tests-py37" -} - -env_vars: { - key: "TRAMPOLINE_BUILD_FILE" - value: "github/python-bigquery/.kokoro/test-samples.sh" -} - -# Configure the docker image for kokoro-trampoline. -env_vars: { - key: "TRAMPOLINE_IMAGE" - value: "gcr.io/cloud-devrel-kokoro-resources/python-samples-testing-docker" -} - -# Download secrets for samples -gfile_resources: "/bigstore/cloud-devrel-kokoro-resources/python-docs-samples" - -# Download trampoline resources. -gfile_resources: "/bigstore/cloud-devrel-kokoro-resources/trampoline" - -# Use the trampoline script to run in docker. -build_file: "python-bigquery/.kokoro/trampoline_v2.sh" \ No newline at end of file diff --git a/packages/google-cloud-bigquery/.kokoro/samples/python3.7/continuous.cfg b/packages/google-cloud-bigquery/.kokoro/samples/python3.7/continuous.cfg deleted file mode 100644 index a1c8d9759c88..000000000000 --- a/packages/google-cloud-bigquery/.kokoro/samples/python3.7/continuous.cfg +++ /dev/null @@ -1,6 +0,0 @@ -# Format: //devtools/kokoro/config/proto/build.proto - -env_vars: { - key: "INSTALL_LIBRARY_FROM_SOURCE" - value: "True" -} \ No newline at end of file diff --git a/packages/google-cloud-bigquery/.kokoro/samples/python3.7/periodic-head.cfg b/packages/google-cloud-bigquery/.kokoro/samples/python3.7/periodic-head.cfg deleted file mode 100644 index 5aa01bab5bf3..000000000000 --- a/packages/google-cloud-bigquery/.kokoro/samples/python3.7/periodic-head.cfg +++ /dev/null @@ -1,11 +0,0 @@ -# Format: //devtools/kokoro/config/proto/build.proto - -env_vars: { - key: "INSTALL_LIBRARY_FROM_SOURCE" - value: "True" -} - -env_vars: { - key: "TRAMPOLINE_BUILD_FILE" - value: "github/python-bigquery/.kokoro/test-samples-against-head.sh" -} diff --git a/packages/google-cloud-bigquery/.kokoro/samples/python3.7/periodic.cfg b/packages/google-cloud-bigquery/.kokoro/samples/python3.7/periodic.cfg deleted file mode 100644 index 71cd1e597e38..000000000000 --- a/packages/google-cloud-bigquery/.kokoro/samples/python3.7/periodic.cfg +++ /dev/null @@ -1,6 +0,0 @@ -# Format: //devtools/kokoro/config/proto/build.proto - -env_vars: { - key: "INSTALL_LIBRARY_FROM_SOURCE" - value: "False" -} diff --git a/packages/google-cloud-bigquery/.kokoro/samples/python3.7/presubmit.cfg b/packages/google-cloud-bigquery/.kokoro/samples/python3.7/presubmit.cfg deleted file mode 100644 index a1c8d9759c88..000000000000 --- a/packages/google-cloud-bigquery/.kokoro/samples/python3.7/presubmit.cfg +++ /dev/null @@ -1,6 +0,0 @@ -# Format: //devtools/kokoro/config/proto/build.proto - -env_vars: { - key: "INSTALL_LIBRARY_FROM_SOURCE" - value: "True" -} \ No newline at end of file diff --git a/packages/google-cloud-bigquery/.kokoro/samples/python3.8/common.cfg b/packages/google-cloud-bigquery/.kokoro/samples/python3.8/common.cfg deleted file mode 100644 index 46759c6d61bf..000000000000 --- a/packages/google-cloud-bigquery/.kokoro/samples/python3.8/common.cfg +++ /dev/null @@ -1,40 +0,0 @@ -# Format: //devtools/kokoro/config/proto/build.proto - -# Build logs will be here -action { - define_artifacts { - regex: "**/*sponge_log.xml" - } -} - -# Specify which tests to run -env_vars: { - key: "RUN_TESTS_SESSION" - value: "py-3.8" -} - -# Declare build specific Cloud project. -env_vars: { - key: "BUILD_SPECIFIC_GCLOUD_PROJECT" - value: "python-docs-samples-tests-py38" -} - -env_vars: { - key: "TRAMPOLINE_BUILD_FILE" - value: "github/python-bigquery/.kokoro/test-samples.sh" -} - -# Configure the docker image for kokoro-trampoline. -env_vars: { - key: "TRAMPOLINE_IMAGE" - value: "gcr.io/cloud-devrel-kokoro-resources/python-samples-testing-docker" -} - -# Download secrets for samples -gfile_resources: "/bigstore/cloud-devrel-kokoro-resources/python-docs-samples" - -# Download trampoline resources. -gfile_resources: "/bigstore/cloud-devrel-kokoro-resources/trampoline" - -# Use the trampoline script to run in docker. -build_file: "python-bigquery/.kokoro/trampoline_v2.sh" \ No newline at end of file diff --git a/packages/google-cloud-bigquery/.kokoro/samples/python3.8/continuous.cfg b/packages/google-cloud-bigquery/.kokoro/samples/python3.8/continuous.cfg deleted file mode 100644 index a1c8d9759c88..000000000000 --- a/packages/google-cloud-bigquery/.kokoro/samples/python3.8/continuous.cfg +++ /dev/null @@ -1,6 +0,0 @@ -# Format: //devtools/kokoro/config/proto/build.proto - -env_vars: { - key: "INSTALL_LIBRARY_FROM_SOURCE" - value: "True" -} \ No newline at end of file diff --git a/packages/google-cloud-bigquery/.kokoro/samples/python3.8/periodic-head.cfg b/packages/google-cloud-bigquery/.kokoro/samples/python3.8/periodic-head.cfg deleted file mode 100644 index 5aa01bab5bf3..000000000000 --- a/packages/google-cloud-bigquery/.kokoro/samples/python3.8/periodic-head.cfg +++ /dev/null @@ -1,11 +0,0 @@ -# Format: //devtools/kokoro/config/proto/build.proto - -env_vars: { - key: "INSTALL_LIBRARY_FROM_SOURCE" - value: "True" -} - -env_vars: { - key: "TRAMPOLINE_BUILD_FILE" - value: "github/python-bigquery/.kokoro/test-samples-against-head.sh" -} diff --git a/packages/google-cloud-bigquery/.kokoro/samples/python3.8/periodic.cfg b/packages/google-cloud-bigquery/.kokoro/samples/python3.8/periodic.cfg deleted file mode 100644 index 71cd1e597e38..000000000000 --- a/packages/google-cloud-bigquery/.kokoro/samples/python3.8/periodic.cfg +++ /dev/null @@ -1,6 +0,0 @@ -# Format: //devtools/kokoro/config/proto/build.proto - -env_vars: { - key: "INSTALL_LIBRARY_FROM_SOURCE" - value: "False" -} diff --git a/packages/google-cloud-bigquery/.kokoro/samples/python3.8/presubmit.cfg b/packages/google-cloud-bigquery/.kokoro/samples/python3.8/presubmit.cfg deleted file mode 100644 index a1c8d9759c88..000000000000 --- a/packages/google-cloud-bigquery/.kokoro/samples/python3.8/presubmit.cfg +++ /dev/null @@ -1,6 +0,0 @@ -# Format: //devtools/kokoro/config/proto/build.proto - -env_vars: { - key: "INSTALL_LIBRARY_FROM_SOURCE" - value: "True" -} \ No newline at end of file diff --git a/packages/google-cloud-bigquery/.kokoro/test-samples-impl.sh b/packages/google-cloud-bigquery/.kokoro/test-samples-impl.sh index 53e365bc4e79..40e24882277e 100755 --- a/packages/google-cloud-bigquery/.kokoro/test-samples-impl.sh +++ b/packages/google-cloud-bigquery/.kokoro/test-samples-impl.sh @@ -33,8 +33,7 @@ export PYTHONUNBUFFERED=1 env | grep KOKORO # Install nox -# `virtualenv==20.26.6` is added for Python 3.7 compatibility -python3.9 -m pip install --upgrade --quiet nox virtualenv==20.26.6 +python3.9 -m pip install --upgrade --quiet nox virtualenv # Use secrets acessor service account to get secrets if [[ -f "${KOKORO_GFILE_DIR}/secrets_viewer_service_account.json" ]]; then diff --git a/packages/google-cloud-bigquery/CONTRIBUTING.rst b/packages/google-cloud-bigquery/CONTRIBUTING.rst index 1900c5e36d1f..8f4d54bce265 100644 --- a/packages/google-cloud-bigquery/CONTRIBUTING.rst +++ b/packages/google-cloud-bigquery/CONTRIBUTING.rst @@ -22,7 +22,7 @@ In order to add a feature: documentation. - The feature must work fully on the following CPython versions: - 3.7, 3.8, 3.9, 3.10, 3.11, 3.12 and 3.13 on both UNIX and Windows. + 3.9, 3.10, 3.11, 3.12 and 3.13 on both UNIX and Windows. - The feature must not add unnecessary dependencies (where "unnecessary" is of course subjective, but new dependencies should @@ -143,13 +143,12 @@ Running System Tests $ nox -s system # Run a single system test - $ nox -s system-3.8 -- -k + $ nox -s system-3.9 -- -k .. note:: - System tests are only configured to run under Python 3.8. - For expediency, we do not run them in older versions of Python 3. + System tests are configured to run under Python 3.9, 3.11, 3.12. This alone will not run the tests. You'll need to change some local auth settings and change some configuration in your project to @@ -195,11 +194,11 @@ configure them just like the System Tests. # Run all tests in a folder $ cd samples/snippets - $ nox -s py-3.8 + $ nox -s py-3.9 # Run a single sample test $ cd samples/snippets - $ nox -s py-3.8 -- -k + $ nox -s py-3.9 -- -k ******************************************** Note About ``README`` as it pertains to PyPI @@ -221,16 +220,12 @@ Supported Python Versions We support: -- `Python 3.7`_ -- `Python 3.8`_ - `Python 3.9`_ - `Python 3.10`_ - `Python 3.11`_ - `Python 3.12`_ - `Python 3.13`_ -.. _Python 3.7: https://docs.python.org/3.7/ -.. _Python 3.8: https://docs.python.org/3.8/ .. _Python 3.9: https://docs.python.org/3.9/ .. _Python 3.10: https://docs.python.org/3.10/ .. _Python 3.11: https://docs.python.org/3.11/ diff --git a/packages/google-cloud-bigquery/README.rst b/packages/google-cloud-bigquery/README.rst index f81adc4b90d9..29e15e067052 100644 --- a/packages/google-cloud-bigquery/README.rst +++ b/packages/google-cloud-bigquery/README.rst @@ -52,11 +52,11 @@ dependencies. Supported Python Versions ^^^^^^^^^^^^^^^^^^^^^^^^^ -Python >= 3.7 +Python >= 3.9 Unsupported Python Versions ^^^^^^^^^^^^^^^^^^^^^^^^^^^ -Python == 2.7, Python == 3.5, Python == 3.6. +Python == 2.7, Python == 3.5, Python == 3.6, Python == 3.7, and Python == 3.8. The last version of this library compatible with Python 2.7 and 3.5 is `google-cloud-bigquery==1.28.0`. diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/__init__.py b/packages/google-cloud-bigquery/google/cloud/bigquery/__init__.py index caf75333aa17..d39c71641c85 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/__init__.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/__init__.py @@ -126,12 +126,12 @@ if sys_major == 3 and sys_minor in (7, 8): warnings.warn( - "The python-bigquery library will stop supporting Python 3.7 " - "and Python 3.8 in a future major release expected in Q4 2024. " + "The python-bigquery library no longer supports Python 3.7 " + "and Python 3.8. " f"Your Python version is {sys_major}.{sys_minor}.{sys_micro}. We " "recommend that you update soon to ensure ongoing support. For " "more details, see: [Google Cloud Client Libraries Supported Python Versions policy](https://cloud.google.com/python/docs/supported-python-versions)", - PendingDeprecationWarning, + FutureWarning, ) __all__ = [ diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/_pandas_helpers.py b/packages/google-cloud-bigquery/google/cloud/bigquery/_pandas_helpers.py index 0017d92cee59..feb6b3adbb81 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/_pandas_helpers.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/_pandas_helpers.py @@ -336,8 +336,13 @@ def types_mapper(arrow_data_type): ): return range_date_dtype - elif range_timestamp_dtype is not None and arrow_data_type.equals( - range_timestamp_dtype.pyarrow_dtype + # TODO: this section does not have a test yet OR at least not one that is + # recognized by coverage, hence the pragma. See Issue: #2132 + elif ( + range_timestamp_dtype is not None + and arrow_data_type.equals( # pragma: NO COVER + range_timestamp_dtype.pyarrow_dtype + ) ): return range_timestamp_dtype diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/_pyarrow_helpers.py b/packages/google-cloud-bigquery/google/cloud/bigquery/_pyarrow_helpers.py index 1b42cd5c79bf..034e020eef14 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/_pyarrow_helpers.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/_pyarrow_helpers.py @@ -21,8 +21,6 @@ from typing import Any -from packaging import version - try: import pyarrow # type: ignore except ImportError: @@ -101,14 +99,10 @@ def pyarrow_timestamp(): pyarrow.decimal128(38, scale=9).id: "NUMERIC", } - # Adds bignumeric support only if pyarrow version >= 3.0.0 - # Decimal256 support was added to arrow 3.0.0 - # https://arrow.apache.org/blog/2021/01/25/3.0.0-release/ - if version.parse(pyarrow.__version__) >= version.parse("3.0.0"): - _BQ_TO_ARROW_SCALARS["BIGNUMERIC"] = pyarrow_bignumeric - # The exact decimal's scale and precision are not important, as only - # the type ID matters, and it's the same for all decimal256 instances. - _ARROW_SCALAR_IDS_TO_BQ[pyarrow.decimal256(76, scale=38).id] = "BIGNUMERIC" + _BQ_TO_ARROW_SCALARS["BIGNUMERIC"] = pyarrow_bignumeric + # The exact decimal's scale and precision are not important, as only + # the type ID matters, and it's the same for all decimal256 instances. + _ARROW_SCALAR_IDS_TO_BQ[pyarrow.decimal256(76, scale=38).id] = "BIGNUMERIC" def bq_to_arrow_scalars(bq_scalar: str): diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/external_config.py b/packages/google-cloud-bigquery/google/cloud/bigquery/external_config.py index 73c4acabf446..6e943adf357e 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/external_config.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/external_config.py @@ -835,8 +835,10 @@ def schema(self): See https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#ExternalDataConfiguration.FIELDS.schema """ - prop = self._properties.get("schema", {}) - return [SchemaField.from_api_repr(field) for field in prop.get("fields", [])] + # TODO: The typehinting for this needs work. Setting this pragma to temporarily + # manage a pytype issue that came up in another PR. See Issue: #2132 + prop = self._properties.get("schema", {}) # type: ignore + return [SchemaField.from_api_repr(field) for field in prop.get("fields", [])] # type: ignore @schema.setter def schema(self, value): diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/format_options.py b/packages/google-cloud-bigquery/google/cloud/bigquery/format_options.py index ad5591b1c9b6..e26b7a74f1fc 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/format_options.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/format_options.py @@ -13,7 +13,7 @@ # limitations under the License. import copy -from typing import Dict, Optional +from typing import Dict, Optional, Union class AvroOptions: @@ -106,7 +106,7 @@ def enable_list_inference(self, value: bool) -> None: self._properties["enableListInference"] = value @property - def map_target_type(self) -> str: + def map_target_type(self) -> Optional[Union[bool, str]]: """Indicates whether to simplify the representation of parquet maps to only show keys and values.""" return self._properties.get("mapTargetType") diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/job/base.py b/packages/google-cloud-bigquery/google/cloud/bigquery/job/base.py index e5f68c8437d9..eaa9d34605da 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/job/base.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/job/base.py @@ -415,7 +415,9 @@ def __init__(self, job_id, client): @property def configuration(self) -> _JobConfig: """Job-type specific configurtion.""" - configuration = self._CONFIG_CLASS() + # TODO: The typehinting for this needs work. Setting this pragma to temporarily + # manage a pytype issue that came up in another PR. See Issue: #2132 + configuration = self._CONFIG_CLASS() # pytype: disable=not-callable configuration._properties = self._properties.setdefault("configuration", {}) return configuration diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/magics/magics.py b/packages/google-cloud-bigquery/google/cloud/bigquery/magics/magics.py index a5be95185f74..1f892b595222 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/magics/magics.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/magics/magics.py @@ -55,7 +55,6 @@ except ImportError: bigquery_magics = None - IPYTHON_USER_AGENT = "ipython-{}".format(IPython.__version__) # type: ignore diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/model.py b/packages/google-cloud-bigquery/google/cloud/bigquery/model.py index 45a88ab221e0..16581be5a492 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/model.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/model.py @@ -58,7 +58,7 @@ def __init__(self, model_ref: Union["ModelReference", str, None]): # semantics. The BigQuery API makes a distinction between an unset # value, a null value, and a default value (0 or ""), but the protocol # buffer classes do not. - self._properties = {} + self._properties: Dict[str, Any] = {} if isinstance(model_ref, str): model_ref = ModelReference.from_string(model_ref) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/routine/routine.py b/packages/google-cloud-bigquery/google/cloud/bigquery/routine/routine.py index 83cb6362d950..7e079781ddff 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/routine/routine.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/routine/routine.py @@ -16,7 +16,7 @@ """Define resources for the BigQuery Routines API.""" -from typing import Any, Dict, Optional +from typing import Any, Dict, Optional, Union import google.cloud._helpers # type: ignore from google.cloud.bigquery import _helpers @@ -216,7 +216,7 @@ def return_type(self, value: StandardSqlDataType): self._properties[self._PROPERTY_TO_API_FIELD["return_type"]] = resource @property - def return_table_type(self) -> Optional[StandardSqlTableType]: + def return_table_type(self) -> Union[StandardSqlTableType, Any, None]: """The return type of a Table Valued Function (TVF) routine. .. versionadded:: 2.22.0 @@ -518,17 +518,23 @@ def __init__(self): @property def project(self): """str: ID of the project containing the routine.""" - return self._properties["projectId"] # pytype: disable=key-error + # TODO: The typehinting for this needs work. Setting this pragma to temporarily + # manage a pytype issue that came up in another PR. See Issue: #2132 + return self._properties["projectId"] # pytype: disable=typed-dict-error @property def dataset_id(self): """str: ID of dataset containing the routine.""" - return self._properties["datasetId"] # pytype: disable=key-error + # TODO: The typehinting for this needs work. Setting this pragma to temporarily + # manage a pytype issue that came up in another PR. See Issue: #2132 + return self._properties["datasetId"] # pytype: disable=typed-dict-error @property def routine_id(self): """str: The routine ID.""" - return self._properties["routineId"] # pytype: disable=key-error + # TODO: The typehinting for this needs work. Setting this pragma to temporarily + # manage a pytype issue that came up in another PR. See Issue: #2132 + return self._properties["routineId"] # pytype: disable=typed-dict-error @property def path(self): diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/schema.py b/packages/google-cloud-bigquery/google/cloud/bigquery/schema.py index 03cde830e7b7..749b0a00e471 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/schema.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/schema.py @@ -232,8 +232,12 @@ def __init__( if max_length is not _DEFAULT_VALUE: self._properties["maxLength"] = max_length if policy_tags is not _DEFAULT_VALUE: + # TODO: The typehinting for this needs work. Setting this pragma to temporarily + # manage a pytype issue that came up in another PR. See Issue: #2132 self._properties["policyTags"] = ( - policy_tags.to_api_repr() if policy_tags is not None else None + policy_tags.to_api_repr() # pytype: disable=attribute-error + if policy_tags is not None + else None ) if isinstance(range_element_type, str): self._properties["rangeElementType"] = {"type": range_element_type} diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py index c70a0ebea98d..e7f3c9a36b90 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py @@ -137,6 +137,8 @@ def _reference_getter(table): return TableReference(dataset_ref, table.table_id) +# TODO: The typehinting for this needs work. Setting this pragma to temporarily +# manage a pytype issue that came up in another PR. See Issue: #2132 def _view_use_legacy_sql_getter(table): """bool: Specifies whether to execute the view with Legacy or Standard SQL. @@ -148,10 +150,11 @@ def _view_use_legacy_sql_getter(table): Raises: ValueError: For invalid value types. """ - view = table._properties.get("view") + + view = table._properties.get("view") # type: ignore if view is not None: # The server-side default for useLegacySql is True. - return view.get("useLegacySql", True) + return view.get("useLegacySql", True) # type: ignore # In some cases, such as in a table list no view object is present, but the # resource still represents a view. Use the type as a fallback. if table.table_type == "VIEW": @@ -375,7 +378,7 @@ class Table(_TableBase): :meth:`~google.cloud.bigquery.schema.SchemaField.from_api_repr`. """ - _PROPERTY_TO_API_FIELD = { + _PROPERTY_TO_API_FIELD: Dict[str, Any] = { **_TableBase._PROPERTY_TO_API_FIELD, "clustering_fields": "clustering", "created": "creationTime", @@ -418,7 +421,10 @@ class Table(_TableBase): def __init__(self, table_ref, schema=None) -> None: table_ref = _table_arg_to_table_ref(table_ref) - self._properties = {"tableReference": table_ref.to_api_repr(), "labels": {}} + self._properties: Dict[str, Any] = { + "tableReference": table_ref.to_api_repr(), + "labels": {}, + } # Let the @property do validation. if schema is not None: self.schema = schema diff --git a/packages/google-cloud-bigquery/noxfile.py b/packages/google-cloud-bigquery/noxfile.py index 87bd9a70cf25..f069f8d37128 100644 --- a/packages/google-cloud-bigquery/noxfile.py +++ b/packages/google-cloud-bigquery/noxfile.py @@ -24,7 +24,7 @@ MYPY_VERSION = "mypy==1.6.1" -PYTYPE_VERSION = "pytype==2021.4.9" +PYTYPE_VERSION = "pytype==2024.9.13" BLACK_VERSION = "black==23.7.0" BLACK_PATHS = ( "benchmark", @@ -37,9 +37,9 @@ "setup.py", ) -DEFAULT_PYTHON_VERSION = "3.8" -SYSTEM_TEST_PYTHON_VERSIONS = ["3.8", "3.11", "3.12"] -UNIT_TEST_PYTHON_VERSIONS = ["3.7", "3.8", "3.12"] +DEFAULT_PYTHON_VERSION = "3.9" +SYSTEM_TEST_PYTHON_VERSIONS = ["3.9", "3.11", "3.12"] +UNIT_TEST_PYTHON_VERSIONS = ["3.9", "3.11", "3.12"] CURRENT_DIRECTORY = pathlib.Path(__file__).parent.absolute() @@ -102,10 +102,16 @@ def default(session, install_extras=True): "-c", constraints_path, ) - - if install_extras and session.python in ["3.11", "3.12"]: - install_target = ".[bqstorage,ipywidgets,pandas,tqdm,opentelemetry]" - elif install_extras: + # We have logic in the magics.py file that checks for whether 'bigquery_magics' + # is imported OR not. If yes, we use a context object from that library. + # If no, we use our own context object from magics.py. In order to exercise + # that logic (and the associated tests) we avoid installing the [ipython] extra + # which has a downstream effect of then avoiding installing bigquery_magics. + if install_extras and session.python == UNIT_TEST_PYTHON_VERSIONS[0]: + install_target = ( + ".[bqstorage,pandas,ipywidgets,geopandas,tqdm,opentelemetry,bigquery_v2]" + ) + elif install_extras: # run against all other UNIT_TEST_PYTHON_VERSIONS install_target = ".[all]" else: install_target = "." @@ -157,7 +163,7 @@ def unit_noextras(session): # so that it continues to be an optional dependency. # https://github.com/googleapis/python-bigquery/issues/1877 if session.python == UNIT_TEST_PYTHON_VERSIONS[0]: - session.install("pyarrow==1.0.0") + session.install("pyarrow==4.0.0") default(session, install_extras=False) @@ -178,6 +184,7 @@ def mypy(session): "types-requests", "types-setuptools", ) + session.run("python", "-m", "pip", "freeze") session.run("mypy", "-p", "google", "--show-traceback") @@ -192,6 +199,7 @@ def pytype(session): session.install("attrs==20.3.0") session.install("-e", ".[all]") session.install(PYTYPE_VERSION) + session.run("python", "-m", "pip", "freeze") # See https://github.com/google/pytype/issues/464 session.run("pytype", "-P", ".", "google/cloud/bigquery") @@ -281,7 +289,7 @@ def mypy_samples(session): "types-setuptools", ) - session.install("typing-extensions") # for TypedDict in pre-3.8 Python versions + session.run("python", "-m", "pip", "freeze") session.run( "mypy", @@ -307,10 +315,13 @@ def snippets(session): session.install("grpcio", "-c", constraints_path) if session.python in ["3.11", "3.12"]: - extras = "[bqstorage,ipywidgets,pandas,tqdm,opentelemetry]" + extras = ( + "[bqstorage,pandas,ipywidgets,geopandas,tqdm,opentelemetry,bigquery_v2]" + ) else: extras = "[all]" session.install("-e", f".{extras}", "-c", constraints_path) + session.run("python", "-m", "pip", "freeze") # Run py.test against the snippets tests. # Skip tests in samples/snippets, as those are run in a different session @@ -339,6 +350,7 @@ def cover(session): """ session.install("coverage", "pytest-cov") + session.run("python", "-m", "pip", "freeze") session.run("coverage", "report", "--show-missing", "--fail-under=100") session.run("coverage", "erase") @@ -378,6 +390,7 @@ def prerelease_deps(session): "google-cloud-bigquery-storage", "google-cloud-core", "google-resumable-media", + "db-dtypes", # Exclude version 1.49.0rc1 which has a known issue. See https://github.com/grpc/grpc/pull/30642 "grpcio!=1.49.0rc1", ) @@ -417,9 +430,6 @@ def prerelease_deps(session): session.install("--no-deps", "-e", ".[all]") # Print out prerelease package versions. - session.run("python", "-c", "import grpc; print(grpc.__version__)") - session.run("python", "-c", "import pandas; print(pandas.__version__)") - session.run("python", "-c", "import pyarrow; print(pyarrow.__version__)") session.run("python", "-m", "pip", "freeze") # Run all tests, except a few samples tests which require extra dependencies. @@ -453,6 +463,7 @@ def lint(session): session.install("flake8", BLACK_VERSION) session.install("-e", ".") + session.run("python", "-m", "pip", "freeze") session.run("flake8", os.path.join("google", "cloud", "bigquery")) session.run("flake8", "tests") session.run("flake8", os.path.join("docs", "samples")) @@ -467,6 +478,7 @@ def lint_setup_py(session): """Verify that setup.py is valid (including RST check).""" session.install("docutils", "Pygments") + session.run("python", "-m", "pip", "freeze") session.run("python", "setup.py", "check", "--restructuredtext", "--strict") @@ -478,6 +490,7 @@ def blacken(session): """ session.install(BLACK_VERSION) + session.run("python", "-m", "pip", "freeze") session.run("black", *BLACK_PATHS) @@ -504,6 +517,7 @@ def docs(session): session.install("-e", ".[all]") shutil.rmtree(os.path.join("docs", "_build"), ignore_errors=True) + session.run("python", "-m", "pip", "freeze") session.run( "sphinx-build", "-W", # warnings as errors @@ -540,6 +554,7 @@ def docfx(session): ) shutil.rmtree(os.path.join("docs", "_build"), ignore_errors=True) + session.run("python", "-m", "pip", "freeze") session.run( "sphinx-build", "-T", # show full traceback on exception diff --git a/packages/google-cloud-bigquery/owlbot.py b/packages/google-cloud-bigquery/owlbot.py index 07805d11a7eb..c8efaa98d442 100644 --- a/packages/google-cloud-bigquery/owlbot.py +++ b/packages/google-cloud-bigquery/owlbot.py @@ -14,7 +14,6 @@ """This script is used to synthesize generated parts of this library.""" from pathlib import Path -import textwrap import synthtool as s from synthtool import gcp @@ -70,13 +69,65 @@ # Include custom SNIPPETS_TESTS job for performance. # https://github.com/googleapis/python-bigquery/issues/191 ".kokoro/presubmit/presubmit.cfg", + ".kokoro/presubmit/system-3.8.cfg", ".kokoro/continuous/prerelease-deps.cfg", + ".kokoro/samples/python3.7/**", + ".kokoro/samples/python3.8/**", ".github/workflows", # exclude gh actions as credentials are needed for tests - "README.rst", + "README.rst", ], ) python.configure_previous_major_version_branches() + +s.replace( + ".kokoro/test-samples-impl.sh", + """# `virtualenv==20.26.6` is added for Python 3.7 compatibility +python3.9 -m pip install --upgrade --quiet nox virtualenv==20.26.6""", + "python3.9 -m pip install --upgrade --quiet nox virtualenv", +) +s.replace( + "CONTRIBUTING.rst", + "3.7, 3.8, 3.9, 3.10, 3.11, 3.12 and 3.13 on both UNIX and Windows.", + "3.9, 3.10, 3.11, 3.12 and 3.13 on both UNIX and Windows.", +) +s.replace( + "CONTRIBUTING.rst", + r" \$ nox -s system-3.8 -- -k ", + r" $ nox -s system-3.9 -- -k ", +) +s.replace( + "CONTRIBUTING.rst", + r"""System tests are only configured to run under Python 3.8. + For expediency, we do not run them in older versions of Python 3.""", + r"System tests are configured to run under Python 3.9, 3.11, 3.12.", +) +s.replace( + "CONTRIBUTING.rst", + r"\$ nox -s py-3.8", + r"$ nox -s py-3.9", +) +s.replace( + "CONTRIBUTING.rst", + r"""- `Python 3.7`_ +- `Python 3.8`_ +""", + r"", +) +s.replace( + "CONTRIBUTING.rst", + r""".. _Python 3.7: https://docs.python.org/3.7/ +.. _Python 3.8: https://docs.python.org/3.8/ +""", + r"", +) +s.replace( + "scripts/readme-gen/templates/install_deps.tmpl.rst", + r"Samples are compatible with Python 3.7", + r"Samples are compatible with Python 3.9", +) + + # ---------------------------------------------------------------------------- # Samples templates # ---------------------------------------------------------------------------- @@ -93,6 +144,17 @@ r"exclude_patterns = \[", '\\g<0>\n "google/cloud/bigquery_v2/**", # Legacy proto-based types.', ) +s.replace( + "samples/**/noxfile.py", + 'BLACK_VERSION = "black==22.3.0"', + 'BLACK_VERSION = "black==23.7.0"', +) +s.replace( + "samples/**/noxfile.py", + r'ALL_VERSIONS = \["3.7", "3.8", "3.9", "3.10", "3.11", "3.12", "3.13"\]', + 'ALL_VERSIONS = ["3.9", "3.10", "3.11", "3.12", "3.13"]', +) + # ---------------------------------------------------------------------------- # pytype-related changes diff --git a/packages/google-cloud-bigquery/pyproject.toml b/packages/google-cloud-bigquery/pyproject.toml index c4e5c2f0d12a..8822fc57dfc8 100644 --- a/packages/google-cloud-bigquery/pyproject.toml +++ b/packages/google-cloud-bigquery/pyproject.toml @@ -20,7 +20,7 @@ build-backend = "setuptools.build_meta" name = "google-cloud-bigquery" authors = [{ name = "Google LLC", email = "googleapis-packages@google.com" }] license = { text = "Apache 2.0" } -requires-python = ">=3.7" +requires-python = ">=3.9" description = "Google BigQuery API client library" readme = "README.rst" classifiers = [ @@ -33,8 +33,6 @@ classifiers = [ "License :: OSI Approved :: Apache Software License", "Programming Language :: Python", "Programming Language :: Python :: 3", - "Programming Language :: Python :: 3.7", - "Programming Language :: Python :: 3.8", "Programming Language :: Python :: 3.9", "Programming Language :: Python :: 3.10", "Programming Language :: Python :: 3.11", @@ -47,8 +45,8 @@ dependencies = [ "google-auth >= 2.14.1, < 3.0.0dev", "google-cloud-core >= 2.4.1, < 3.0.0dev", "google-resumable-media >= 2.0.0, < 3.0dev", - "packaging >= 20.0.0", - "python-dateutil >= 2.7.3, < 3.0dev", + "packaging >= 24.2.0", + "python-dateutil >= 2.8.2, < 3.0dev", "requests >= 2.21.0, < 3.0.0dev", ] dynamic = ["version"] @@ -61,7 +59,7 @@ Repository = "https://github.com/googleapis/python-bigquery" # moved back to optional due to bloat. See # https://github.com/googleapis/python-bigquery/issues/1196 for more background. bqstorage = [ - "google-cloud-bigquery-storage >= 2.6.0, < 3.0.0dev", + "google-cloud-bigquery-storage >= 2.18.0, < 3.0.0dev", # Due to an issue in pip's dependency resolver, the `grpc` extra is not # installed, even though `google-cloud-bigquery-storage` specifies it # as `google-api-core[grpc]`. We thus need to explicitly specify it here. @@ -70,20 +68,19 @@ bqstorage = [ # https://github.com/grpc/grpc/pull/15254 "grpcio >= 1.47.0, < 2.0dev", "grpcio >= 1.49.1, < 2.0dev; python_version >= '3.11'", - "pyarrow >= 3.0.0", + "pyarrow >= 4.0.0", ] pandas = [ - "pandas >= 1.1.0", + "pandas >= 1.1.4", "pandas-gbq >= 0.26.1; python_version >= '3.8'", "grpcio >= 1.47.0, < 2.0dev", "grpcio >= 1.49.1, < 2.0dev; python_version >= '3.11'", "pyarrow >= 3.0.0", - "db-dtypes >= 0.3.0, < 2.0.0dev", - "importlib_metadata >= 1.0.0; python_version < '3.8'", + "db-dtypes >= 1.0.4, < 2.0.0dev", ] -ipywidgets = ["ipywidgets >= 7.7.0", "ipykernel >= 6.0.0"] +ipywidgets = ["ipywidgets >= 7.7.1", "ipykernel >= 6.2.0"] geopandas = ["geopandas >= 0.9.0, < 2.0dev", "Shapely >= 1.8.4, < 3.0.0dev"] -ipython = ["bigquery-magics >= 0.1.0"] +ipython = ["ipython >= 7.23.1", "bigquery-magics >= 0.6.0"] tqdm = ["tqdm >= 4.7.4, < 5.0.0dev"] opentelemetry = [ "opentelemetry-api >= 1.1.0", diff --git a/packages/google-cloud-bigquery/samples/desktopapp/noxfile.py b/packages/google-cloud-bigquery/samples/desktopapp/noxfile.py index c9a3d1ecbf2a..a8659038261c 100644 --- a/packages/google-cloud-bigquery/samples/desktopapp/noxfile.py +++ b/packages/google-cloud-bigquery/samples/desktopapp/noxfile.py @@ -29,7 +29,7 @@ # WARNING - WARNING - WARNING - WARNING - WARNING # WARNING - WARNING - WARNING - WARNING - WARNING -BLACK_VERSION = "black==22.3.0" +BLACK_VERSION = "black==23.7.0" ISORT_VERSION = "isort==5.10.1" # Copy `noxfile_config.py` to your directory and modify it instead. @@ -89,7 +89,7 @@ def get_pytest_env_vars() -> Dict[str, str]: # DO NOT EDIT - automatically generated. # All versions used to test samples. -ALL_VERSIONS = ["3.7", "3.8", "3.9", "3.10", "3.11", "3.12", "3.13"] +ALL_VERSIONS = ["3.9", "3.10", "3.11", "3.12", "3.13"] # Any default versions that should be ignored. IGNORED_VERSIONS = TEST_CONFIG["ignored_versions"] diff --git a/packages/google-cloud-bigquery/samples/desktopapp/requirements-test.txt b/packages/google-cloud-bigquery/samples/desktopapp/requirements-test.txt index 827b02dcfb36..cf215e2fd928 100644 --- a/packages/google-cloud-bigquery/samples/desktopapp/requirements-test.txt +++ b/packages/google-cloud-bigquery/samples/desktopapp/requirements-test.txt @@ -1,4 +1,3 @@ google-cloud-testutils==1.5.0 -pytest===7.4.4; python_version == '3.7' -pytest==8.3.4; python_version >= '3.8' +pytest==8.3.4 mock==5.1.0 diff --git a/packages/google-cloud-bigquery/samples/desktopapp/user_credentials_test.py b/packages/google-cloud-bigquery/samples/desktopapp/user_credentials_test.py index 252b843c4ffa..d14798d9b99f 100644 --- a/packages/google-cloud-bigquery/samples/desktopapp/user_credentials_test.py +++ b/packages/google-cloud-bigquery/samples/desktopapp/user_credentials_test.py @@ -13,7 +13,6 @@ # limitations under the License. import os -import sys from typing import Iterator, Union from unittest import mock @@ -24,13 +23,7 @@ PROJECT = os.environ["GOOGLE_CLOUD_PROJECT"] - -if sys.version_info >= (3, 8): - # Python 3.8+ has an AsyncMock attribute in unittest.mock, but 3.7 does not - MockType = Union[mock.MagicMock, mock.AsyncMock] -else: - # Other definitions and imports - MockType = Union[mock.MagicMock] +MockType = Union[mock.MagicMock, mock.AsyncMock] @pytest.fixture diff --git a/packages/google-cloud-bigquery/samples/geography/noxfile.py b/packages/google-cloud-bigquery/samples/geography/noxfile.py index c9a3d1ecbf2a..a8659038261c 100644 --- a/packages/google-cloud-bigquery/samples/geography/noxfile.py +++ b/packages/google-cloud-bigquery/samples/geography/noxfile.py @@ -29,7 +29,7 @@ # WARNING - WARNING - WARNING - WARNING - WARNING # WARNING - WARNING - WARNING - WARNING - WARNING -BLACK_VERSION = "black==22.3.0" +BLACK_VERSION = "black==23.7.0" ISORT_VERSION = "isort==5.10.1" # Copy `noxfile_config.py` to your directory and modify it instead. @@ -89,7 +89,7 @@ def get_pytest_env_vars() -> Dict[str, str]: # DO NOT EDIT - automatically generated. # All versions used to test samples. -ALL_VERSIONS = ["3.7", "3.8", "3.9", "3.10", "3.11", "3.12", "3.13"] +ALL_VERSIONS = ["3.9", "3.10", "3.11", "3.12", "3.13"] # Any default versions that should be ignored. IGNORED_VERSIONS = TEST_CONFIG["ignored_versions"] diff --git a/packages/google-cloud-bigquery/samples/geography/requirements-test.txt b/packages/google-cloud-bigquery/samples/geography/requirements-test.txt index ef38acb4f650..4ad1bd0285c4 100644 --- a/packages/google-cloud-bigquery/samples/geography/requirements-test.txt +++ b/packages/google-cloud-bigquery/samples/geography/requirements-test.txt @@ -1,3 +1,2 @@ -pytest===7.4.4; python_version == '3.7' -pytest==8.3.4; python_version >= '3.8' +pytest==8.3.4 mock==5.1.0 diff --git a/packages/google-cloud-bigquery/samples/geography/requirements.txt b/packages/google-cloud-bigquery/samples/geography/requirements.txt index 71579867fbc8..0ad2154a44d5 100644 --- a/packages/google-cloud-bigquery/samples/geography/requirements.txt +++ b/packages/google-cloud-bigquery/samples/geography/requirements.txt @@ -1,63 +1,41 @@ -attrs===24.2.0; python_version == '3.7' -attrs==24.3.0; python_version >= '3.8' +attrs==24.3.0 certifi==2024.12.14 -cffi===1.15.1; python_version == '3.7' -cffi==1.17.1; python_version >= '3.8' +cffi==1.17.1 charset-normalizer==3.4.1 click==8.1.8 click-plugins==1.1.1 cligj==0.7.2 -dataclasses==0.8; python_version < '3.7' db-dtypes==1.3.1 -Fiona===1.9.6; python_version == '3.7' -Fiona==1.10.1; python_version >= '3.8' +Fiona==1.10.1 geojson==3.2.0 -geopandas===0.10.2; python_version == '3.7' -geopandas===0.13.2; python_version == '3.8' -geopandas==1.0.1; python_version >= '3.9' +geopandas==1.0.1 google-api-core==2.24.0 google-auth==2.37.0 google-cloud-bigquery==3.27.0 google-cloud-bigquery-storage==2.27.0 google-cloud-core==2.4.1 -google-crc32c===1.5.0; python_version < '3.9' -google-crc32c==1.6.0; python_version >= '3.9' +google-crc32c==1.6.0 google-resumable-media==2.7.2 googleapis-common-protos==1.66.0 -grpcio===1.62.2; python_version == '3.7' -grpcio==1.69.0; python_version >= '3.8' +grpcio==1.69.0 idna==3.10 munch==4.0.0 mypy-extensions==1.0.0 -packaging===24.0; python_version == '3.7' -packaging==24.2; python_version >= '3.8' -pandas===1.3.5; python_version == '3.7' -pandas===2.0.3; python_version == '3.8' -pandas==2.2.3; python_version >= '3.9' +packaging==24.2 +pandas==2.2.3 proto-plus==1.25.0 -pyarrow===12.0.1; python_version == '3.7' -pyarrow===17.0.0; python_version == '3.8' -pyarrow==18.1.0; python_version >= '3.9' -pyasn1===0.5.1; python_version == '3.7' -pyasn1==0.6.1; python_version >= '3.8' -pyasn1-modules===0.3.0; python_version == '3.7' -pyasn1-modules==0.4.1; python_version >= '3.8' -pycparser===2.21; python_version == '3.7' -pycparser==2.22; python_version >= '3.8' -pyparsing===3.1.4; python_version < '3.9' -pyparsing==3.2.1; python_version >= '3.9' +pyarrow==18.1.0 +pyasn1==0.6.1 +pyasn1-modules==0.4.1 +pycparser==2.22 +pyparsing==3.2.1 python-dateutil==2.9.0.post0 pytz==2024.2 -PyYAML===6.0.1; python_version == '3.7' -PyYAML==6.0.2; python_version >= '3.8' -requests==2.31.0; python_version == '3.7' -requests==2.32.3; python_version >= '3.8' +PyYAML==6.0.2 +requests==2.32.3 rsa==4.9 Shapely==2.0.6 six==1.17.0 -typing-extensions===4.7.1; python_version == '3.7' -typing-extensions==4.12.2; python_version >= '3.8' +typing-extensions==4.12.2 typing-inspect==0.9.0 -urllib3===1.26.18; python_version == '3.7' -urllib3===2.2.3; python_version == '3.8' -urllib3==2.3.0; python_version >= '3.9' +urllib3==2.3.0 diff --git a/packages/google-cloud-bigquery/samples/magics/conftest.py b/packages/google-cloud-bigquery/samples/magics/conftest.py index 55ea30f90d70..0943c535a414 100644 --- a/packages/google-cloud-bigquery/samples/magics/conftest.py +++ b/packages/google-cloud-bigquery/samples/magics/conftest.py @@ -18,7 +18,7 @@ import pytest if typing.TYPE_CHECKING: - from IPython.core.interactiveshell import TerminalInteractiveShell + from IPython.terminal.interactiveshell import TerminalInteractiveShell interactiveshell = pytest.importorskip("IPython.terminal.interactiveshell") tools = pytest.importorskip("IPython.testing.tools") @@ -40,5 +40,7 @@ def ipython_interactive( for the duration of the test scope. """ - with ipython.builtin_trap: + + trap = typing.cast(typing.ContextManager, ipython.builtin_trap) + with trap: yield ipython diff --git a/packages/google-cloud-bigquery/samples/magics/noxfile.py b/packages/google-cloud-bigquery/samples/magics/noxfile.py index c9a3d1ecbf2a..a8659038261c 100644 --- a/packages/google-cloud-bigquery/samples/magics/noxfile.py +++ b/packages/google-cloud-bigquery/samples/magics/noxfile.py @@ -29,7 +29,7 @@ # WARNING - WARNING - WARNING - WARNING - WARNING # WARNING - WARNING - WARNING - WARNING - WARNING -BLACK_VERSION = "black==22.3.0" +BLACK_VERSION = "black==23.7.0" ISORT_VERSION = "isort==5.10.1" # Copy `noxfile_config.py` to your directory and modify it instead. @@ -89,7 +89,7 @@ def get_pytest_env_vars() -> Dict[str, str]: # DO NOT EDIT - automatically generated. # All versions used to test samples. -ALL_VERSIONS = ["3.7", "3.8", "3.9", "3.10", "3.11", "3.12", "3.13"] +ALL_VERSIONS = ["3.9", "3.10", "3.11", "3.12", "3.13"] # Any default versions that should be ignored. IGNORED_VERSIONS = TEST_CONFIG["ignored_versions"] diff --git a/packages/google-cloud-bigquery/samples/magics/requirements-test.txt b/packages/google-cloud-bigquery/samples/magics/requirements-test.txt index 827b02dcfb36..cf215e2fd928 100644 --- a/packages/google-cloud-bigquery/samples/magics/requirements-test.txt +++ b/packages/google-cloud-bigquery/samples/magics/requirements-test.txt @@ -1,4 +1,3 @@ google-cloud-testutils==1.5.0 -pytest===7.4.4; python_version == '3.7' -pytest==8.3.4; python_version >= '3.8' +pytest==8.3.4 mock==5.1.0 diff --git a/packages/google-cloud-bigquery/samples/magics/requirements.txt b/packages/google-cloud-bigquery/samples/magics/requirements.txt index 87efa3dec888..4b81fe0ad87b 100644 --- a/packages/google-cloud-bigquery/samples/magics/requirements.txt +++ b/packages/google-cloud-bigquery/samples/magics/requirements.txt @@ -2,9 +2,5 @@ bigquery_magics==0.5.0 db-dtypes==1.3.1 google.cloud.bigquery==3.27.0 google-cloud-bigquery-storage==2.27.0 -ipython===7.31.1; python_version == '3.7' -ipython===8.0.1; python_version == '3.8' -ipython===8.18.1; python_version >= '3.9' -pandas===1.3.5; python_version == '3.7' -pandas===2.0.3; python_version == '3.8' -pandas==2.2.3; python_version >= '3.9' +ipython===8.18.1 +pandas==2.2.3 diff --git a/packages/google-cloud-bigquery/samples/notebooks/jupyter_tutorial_test.py b/packages/google-cloud-bigquery/samples/notebooks/jupyter_tutorial_test.py index 2c2cf9390f87..1861a822f2ae 100644 --- a/packages/google-cloud-bigquery/samples/notebooks/jupyter_tutorial_test.py +++ b/packages/google-cloud-bigquery/samples/notebooks/jupyter_tutorial_test.py @@ -45,7 +45,9 @@ def ipython_interactive( for the duration of the test scope. """ - with ipython.builtin_trap: + + trap = typing.cast(typing.ContextManager, ipython.builtin_trap) + with trap: yield ipython diff --git a/packages/google-cloud-bigquery/samples/notebooks/noxfile.py b/packages/google-cloud-bigquery/samples/notebooks/noxfile.py index c9a3d1ecbf2a..a8659038261c 100644 --- a/packages/google-cloud-bigquery/samples/notebooks/noxfile.py +++ b/packages/google-cloud-bigquery/samples/notebooks/noxfile.py @@ -29,7 +29,7 @@ # WARNING - WARNING - WARNING - WARNING - WARNING # WARNING - WARNING - WARNING - WARNING - WARNING -BLACK_VERSION = "black==22.3.0" +BLACK_VERSION = "black==23.7.0" ISORT_VERSION = "isort==5.10.1" # Copy `noxfile_config.py` to your directory and modify it instead. @@ -89,7 +89,7 @@ def get_pytest_env_vars() -> Dict[str, str]: # DO NOT EDIT - automatically generated. # All versions used to test samples. -ALL_VERSIONS = ["3.7", "3.8", "3.9", "3.10", "3.11", "3.12", "3.13"] +ALL_VERSIONS = ["3.9", "3.10", "3.11", "3.12", "3.13"] # Any default versions that should be ignored. IGNORED_VERSIONS = TEST_CONFIG["ignored_versions"] diff --git a/packages/google-cloud-bigquery/samples/notebooks/requirements-test.txt b/packages/google-cloud-bigquery/samples/notebooks/requirements-test.txt index 827b02dcfb36..cf215e2fd928 100644 --- a/packages/google-cloud-bigquery/samples/notebooks/requirements-test.txt +++ b/packages/google-cloud-bigquery/samples/notebooks/requirements-test.txt @@ -1,4 +1,3 @@ google-cloud-testutils==1.5.0 -pytest===7.4.4; python_version == '3.7' -pytest==8.3.4; python_version >= '3.8' +pytest==8.3.4 mock==5.1.0 diff --git a/packages/google-cloud-bigquery/samples/notebooks/requirements.txt b/packages/google-cloud-bigquery/samples/notebooks/requirements.txt index 77103a338a77..e92d084a4d43 100644 --- a/packages/google-cloud-bigquery/samples/notebooks/requirements.txt +++ b/packages/google-cloud-bigquery/samples/notebooks/requirements.txt @@ -2,13 +2,7 @@ bigquery-magics==0.5.0 db-dtypes==1.3.1 google-cloud-bigquery==3.27.0 google-cloud-bigquery-storage==2.27.0 -ipython===7.31.1; python_version == '3.7' -ipython===8.0.1; python_version == '3.8' -ipython===8.18.1; python_version >= '3.9' -matplotlib===3.5.3; python_version == '3.7' -matplotlib===3.7.4; python_version == '3.8' +ipython==8.18.1 matplotlib===3.9.2; python_version == '3.9' matplotlib==3.10.0; python_version >= '3.10' -pandas===1.3.5; python_version == '3.7' -pandas===2.0.3; python_version == '3.8' -pandas==2.2.3; python_version >= '3.9' +pandas==2.2.3 diff --git a/packages/google-cloud-bigquery/samples/snippets/noxfile.py b/packages/google-cloud-bigquery/samples/snippets/noxfile.py index c9a3d1ecbf2a..a8659038261c 100644 --- a/packages/google-cloud-bigquery/samples/snippets/noxfile.py +++ b/packages/google-cloud-bigquery/samples/snippets/noxfile.py @@ -29,7 +29,7 @@ # WARNING - WARNING - WARNING - WARNING - WARNING # WARNING - WARNING - WARNING - WARNING - WARNING -BLACK_VERSION = "black==22.3.0" +BLACK_VERSION = "black==23.7.0" ISORT_VERSION = "isort==5.10.1" # Copy `noxfile_config.py` to your directory and modify it instead. @@ -89,7 +89,7 @@ def get_pytest_env_vars() -> Dict[str, str]: # DO NOT EDIT - automatically generated. # All versions used to test samples. -ALL_VERSIONS = ["3.7", "3.8", "3.9", "3.10", "3.11", "3.12", "3.13"] +ALL_VERSIONS = ["3.9", "3.10", "3.11", "3.12", "3.13"] # Any default versions that should be ignored. IGNORED_VERSIONS = TEST_CONFIG["ignored_versions"] diff --git a/packages/google-cloud-bigquery/samples/snippets/requirements-test.txt b/packages/google-cloud-bigquery/samples/snippets/requirements-test.txt index 077e465cfce3..52ccc8ab26c6 100644 --- a/packages/google-cloud-bigquery/samples/snippets/requirements-test.txt +++ b/packages/google-cloud-bigquery/samples/snippets/requirements-test.txt @@ -1,5 +1,4 @@ # samples/snippets should be runnable with no "extras" google-cloud-testutils==1.5.0 -pytest===7.4.4; python_version == '3.7' -pytest==8.3.4; python_version >= '3.8' +pytest==8.3.4 mock==5.1.0 diff --git a/packages/google-cloud-bigquery/scripts/readme-gen/templates/install_deps.tmpl.rst b/packages/google-cloud-bigquery/scripts/readme-gen/templates/install_deps.tmpl.rst index 6f069c6c87a5..f21db80c4d0f 100644 --- a/packages/google-cloud-bigquery/scripts/readme-gen/templates/install_deps.tmpl.rst +++ b/packages/google-cloud-bigquery/scripts/readme-gen/templates/install_deps.tmpl.rst @@ -12,7 +12,7 @@ Install Dependencies .. _Python Development Environment Setup Guide: https://cloud.google.com/python/setup -#. Create a virtualenv. Samples are compatible with Python 3.7+. +#. Create a virtualenv. Samples are compatible with Python 3.9+. .. code-block:: bash diff --git a/packages/google-cloud-bigquery/testing/constraints-3.7.txt b/packages/google-cloud-bigquery/testing/constraints-3.7.txt deleted file mode 100644 index 55e63449f98b..000000000000 --- a/packages/google-cloud-bigquery/testing/constraints-3.7.txt +++ /dev/null @@ -1,36 +0,0 @@ -# This constraints file is used to check that lower bounds -# are correct in setup.py -# List *all* library dependencies and extras in this file. -# Pin the version to the lower bound. -# -# e.g., if setup.py has "foo >= 1.14.0, < 2.0.0dev", -# Then this file should have foo==1.14.0 -bigquery-magics==0.1.0 -db-dtypes==0.3.0 -geopandas==0.9.0 -google-api-core==2.11.1 -google-auth==2.14.1 -google-cloud-bigquery-storage==2.24.0 -google-cloud-core==2.4.1 -google-cloud-testutils==1.4.0 -google-crc32c==1.5.0 -google-resumable-media==2.0.0 -googleapis-common-protos==1.62.0 -grpcio==1.47.0 -grpcio-status==1.47.0 -ipywidgets==7.7.1 -ipython==7.23.1 -ipykernel==6.0.0 -opentelemetry-api==1.1.0 -opentelemetry-instrumentation==0.20b0 -opentelemetry-sdk==1.1.0 -packaging==20.0.0 -pandas==1.1.0 -proto-plus==1.22.3 -protobuf==3.20.2 -pyarrow==3.0.0 -python-dateutil==2.7.3 -requests==2.21.0 -Shapely==1.8.4 -six==1.13.0 -tqdm==4.7.4 diff --git a/packages/google-cloud-bigquery/testing/constraints-3.8.txt b/packages/google-cloud-bigquery/testing/constraints-3.8.txt deleted file mode 100644 index 9883fb8ccd8f..000000000000 --- a/packages/google-cloud-bigquery/testing/constraints-3.8.txt +++ /dev/null @@ -1,11 +0,0 @@ -grpcio==1.47.0 -pandas==1.2.0 - -# This constraints file is used to check that lower bounds -# are correct in setup.py -# -# Pin the version to the lower bound. -# -# e.g., if setup.py has "foo >= 1.14.0, < 2.0.0dev", -# Then this file should have foo==1.14.0 -pandas-gbq==0.26.1 diff --git a/packages/google-cloud-bigquery/testing/constraints-3.9.txt b/packages/google-cloud-bigquery/testing/constraints-3.9.txt index d4c302867578..63b5d8bf6bf5 100644 --- a/packages/google-cloud-bigquery/testing/constraints-3.9.txt +++ b/packages/google-cloud-bigquery/testing/constraints-3.9.txt @@ -4,5 +4,29 @@ # # NOTE: Not comprehensive yet, will eventually be maintained semi-automatically by # the renovate bot. +bigquery-magics==0.6.0 +db-dtypes==1.0.4 +geopandas==0.9.0 +google-api-core==2.11.1 +google-auth==2.14.1 +google-cloud-bigquery-storage==2.18.0 +google-cloud-core==2.4.1 +google-resumable-media==2.0.0 grpcio==1.47.0 -pyarrow>=4.0.0 +grpcio==1.49.1; python_version >= '3.11' +ipywidgets==7.7.1 +ipython==7.23.1 +ipykernel==6.2.0 +opentelemetry-api==1.1.0 +opentelemetry-instrumentation==0.20b0 +opentelemetry-sdk==1.1.0 +packaging==24.2.0 +pandas==1.1.4 +pandas-gbq==0.26.1 +proto-plus==1.22.3 +protobuf==3.20.2 +pyarrow==4.0.0 +python-dateutil==2.8.2 +requests==2.21.0 +Shapely==1.8.4 +tqdm==4.7.4 diff --git a/packages/google-cloud-bigquery/tests/system/test_pandas.py b/packages/google-cloud-bigquery/tests/system/test_pandas.py index a9e76d416089..e65fca27ea21 100644 --- a/packages/google-cloud-bigquery/tests/system/test_pandas.py +++ b/packages/google-cloud-bigquery/tests/system/test_pandas.py @@ -1222,7 +1222,12 @@ def test_list_rows_nullable_scalars_extreme_dtypes_w_custom_dtype( # These pandas dtypes are handled by the custom dtypes. assert df.dtypes["bool_col"].name == "boolean" - assert df.dtypes["float64_col"].name == "Float64" + # Result is dependent upon which version of pandas is being used. + # Float64 was not introduced until pandas version 1.4. + if PANDAS_INSTALLED_VERSION >= "1.4": + assert df.dtypes["float64_col"].name == "Float64" + else: + assert df.dtypes["float64_col"].name == "string" assert df.dtypes["int64_col"].name == "Int64" assert df.dtypes["string_col"].name == "string" diff --git a/packages/google-cloud-bigquery/tests/unit/job/test_copy.py b/packages/google-cloud-bigquery/tests/unit/job/test_copy.py index 4b0945310970..8e2845316d13 100644 --- a/packages/google-cloud-bigquery/tests/unit/job/test_copy.py +++ b/packages/google-cloud-bigquery/tests/unit/job/test_copy.py @@ -147,7 +147,6 @@ def _verifyResourceProperties(self, job, resource): self._verifyReadonlyResourceProperties(job, resource) config = resource.get("configuration", {}).get("copy") - table_ref = config["destinationTable"] self.assertEqual(job.destination.project, table_ref["projectId"]) self.assertEqual(job.destination.dataset_id, table_ref["datasetId"]) diff --git a/packages/google-cloud-bigquery/tests/unit/job/test_query_pandas.py b/packages/google-cloud-bigquery/tests/unit/job/test_query_pandas.py index 3a5d92dbd761..2cda59bd1335 100644 --- a/packages/google-cloud-bigquery/tests/unit/job/test_query_pandas.py +++ b/packages/google-cloud-bigquery/tests/unit/job/test_query_pandas.py @@ -645,6 +645,8 @@ def test_to_dataframe_bqstorage_no_pyarrow_compression(): ) +# TODO: The test needs work to account for pandas 2.0+. See Issue: #2132 +# pragma added due to issues with coverage. @pytest.mark.skipif( pandas.__version__.startswith("2."), reason="pandas 2.0 changes some default dtypes and we haven't update the test to account for those", diff --git a/packages/google-cloud-bigquery/tests/unit/test__pandas_helpers.py b/packages/google-cloud-bigquery/tests/unit/test__pandas_helpers.py index fdd232a5c959..48c085c1dad8 100644 --- a/packages/google-cloud-bigquery/tests/unit/test__pandas_helpers.py +++ b/packages/google-cloud-bigquery/tests/unit/test__pandas_helpers.py @@ -23,10 +23,7 @@ from unittest import mock import warnings -try: - import importlib.metadata as metadata -except ImportError: - import importlib_metadata as metadata +import importlib.metadata as metadata try: import pandas diff --git a/packages/google-cloud-bigquery/tests/unit/test__versions_helpers.py b/packages/google-cloud-bigquery/tests/unit/test__versions_helpers.py index b1d0ef1acc0c..8379c87c18e0 100644 --- a/packages/google-cloud-bigquery/tests/unit/test__versions_helpers.py +++ b/packages/google-cloud-bigquery/tests/unit/test__versions_helpers.py @@ -188,14 +188,19 @@ def test_bqstorage_is_read_session_optional_false(): @pytest.mark.skipif(pandas is None, reason="pandas is not installed") -@pytest.mark.parametrize("version", ["1.5.0", "2.0.0", "2.1.0"]) +@pytest.mark.parametrize("version", ["1.1.5", "2.0.0", "2.1.0"]) def test_try_import_raises_no_error_w_recent_pandas(version): + # Comparing against the minimum allowed pandas version. + # As long as the installed version is greater than that, no + # error is raised. versions = _versions_helpers.PandasVersions() with mock.patch("pandas.__version__", new=version): try: pandas = versions.try_import(raise_if_error=True) assert pandas is not None - except exceptions.LegacyPandasError: # pragma: NO COVER + # this exception should not fire unless there is something broken + # hence the pragma. + except exceptions.LegacyPandasError: # pragma: no cover raise ("Legacy error raised with a non-legacy dependency version.") diff --git a/packages/google-cloud-bigquery/tests/unit/test_legacy_types.py b/packages/google-cloud-bigquery/tests/unit/test_legacy_types.py index 809be1855ced..75f3e77d785f 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_legacy_types.py +++ b/packages/google-cloud-bigquery/tests/unit/test_legacy_types.py @@ -18,9 +18,9 @@ import warnings try: - import proto # type: ignore + import proto except ImportError: - proto = None + proto = None # type: ignore @pytest.mark.skipif(proto is None, reason="proto is not installed") diff --git a/packages/google-cloud-bigquery/tests/unit/test_magics.py b/packages/google-cloud-bigquery/tests/unit/test_magics.py index 73b29df6b69e..0f1e030cb0c0 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_magics.py +++ b/packages/google-cloud-bigquery/tests/unit/test_magics.py @@ -36,6 +36,7 @@ except ImportError: magics = None + bigquery_storage = pytest.importorskip("google.cloud.bigquery_storage") IPython = pytest.importorskip("IPython") interactiveshell = pytest.importorskip("IPython.terminal.interactiveshell") diff --git a/packages/google-cloud-bigquery/tests/unit/test_table.py b/packages/google-cloud-bigquery/tests/unit/test_table.py index 1a3d7ec0febd..7644186f33c5 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_table.py +++ b/packages/google-cloud-bigquery/tests/unit/test_table.py @@ -16,7 +16,6 @@ import datetime import logging import re -from sys import version_info import time import types import unittest @@ -2171,10 +2170,11 @@ def test_to_geodataframe(self): df = row_iterator.to_geodataframe(create_bqstorage_client=False) self.assertIsInstance(df, geopandas.GeoDataFrame) self.assertEqual(len(df), 0) # verify the number of rows - if version_info.major == 3 and version_info.minor > 7: - assert not hasattr(df, "crs") # used with Python > 3.7 + + if geopandas.__version__ == "0.9.0": + assert hasattr(df, "crs") else: - self.assertIsNone(df.crs) # used with Python == 3.7 + assert not hasattr(df, "crs") class TestRowIterator(unittest.TestCase): @@ -3699,10 +3699,10 @@ def test_to_dataframe_no_tqdm_no_progress_bar(self): user_warnings = [ warning for warning in warned if warning.category is UserWarning ] - # With Python 3.7 and 3.8, len(user_warnings) = 3. With pandas < 1.5, - # pandas.ArrowDtype is not supported. We raise warnings because - # range columns have to be converted to object. - # With higher Python versions and noextra tests, len(user_warnings) = 0 + # With pandas < 1.5, pandas.ArrowDtype is not supported + # and len(user_warnings) = 3. + # We raise warnings because range columns have to be converted to object. + # With higher pandas versions and noextra tests, len(user_warnings) = 0 self.assertIn(len(user_warnings), [0, 3]) self.assertEqual(len(df), 4) @@ -3734,10 +3734,10 @@ def test_to_dataframe_no_tqdm(self): user_warnings = [ warning for warning in warned if warning.category is UserWarning ] - # With Python 3.7 and 3.8, len(user_warnings) = 4. With pandas < 1.5, - # pandas.ArrowDtype is not supported. We raise warnings because - # range columns have to be converted to object. - # With higher Python versions and noextra tests, len(user_warnings) = 1 + # With pandas < 1.5, pandas.ArrowDtype is not supported + # and len(user_warnings) = 4. + # We raise warnings because range columns have to be converted to object. + # With higher pandas versions and noextra tests, len(user_warnings) = 1 self.assertIn(len(user_warnings), [1, 4]) # Even though the progress bar won't show, downloading the dataframe @@ -3991,6 +3991,8 @@ def test_to_dataframe_w_dtypes_mapper(self): ) self.assertEqual(df.name.dtype.name, "string") + # While pyproject.toml lists pandas 1.1 as the lowest supported version of + # pandas, the pip resolver is not able to resolve pandas 1.1 and numpy if hasattr(pandas, "Float64Dtype"): self.assertEqual(list(df.miles), [1.77, 6.66, 2.0]) self.assertEqual(df.miles.dtype.name, "Float64") @@ -4085,7 +4087,6 @@ def test_to_dataframe_w_dtypes_mapper(self): {"start": None, "end": None}, ], ) - else: self.assertEqual( list(df.date), diff --git a/packages/google-cloud-bigquery/tests/unit/test_table_pandas.py b/packages/google-cloud-bigquery/tests/unit/test_table_pandas.py index 02a7a6a79795..9e42fb737c7c 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_table_pandas.py +++ b/packages/google-cloud-bigquery/tests/unit/test_table_pandas.py @@ -12,9 +12,9 @@ # See the License for the specific language governing permissions and # limitations under the License. +from unittest import mock import datetime import decimal -from unittest import mock import pytest @@ -34,12 +34,16 @@ def class_under_test(): return RowIterator +# TODO: The test needs work to account for pandas 2.0+. See Issue: #2132 +# pragma added due to issues with coverage. @pytest.mark.skipif( pandas.__version__.startswith("2."), reason="pandas 2.0 changes some default dtypes and we haven't update the test to account for those", ) -def test_to_dataframe_nullable_scalars(monkeypatch, class_under_test): - # See tests/system/test_arrow.py for the actual types we get from the API. +def test_to_dataframe_nullable_scalars( + monkeypatch, class_under_test +): # pragma: NO COVER + """See tests/system/test_arrow.py for the actual types we get from the API.""" arrow_schema = pyarrow.schema( [ pyarrow.field("bignumeric_col", pyarrow.decimal256(76, scale=38)), @@ -129,12 +133,10 @@ def test_to_dataframe_nullable_scalars(monkeypatch, class_under_test): assert df["int64_col"][0] == -7 assert df["numeric_col"][0] == decimal.Decimal("-123.456789") assert df["string_col"][0] == "abcdefg" - # Pandas timedelta64 might be a better choice for pandas time columns. Then # they can more easily be combined with date columns to form datetimes. # https://github.com/googleapis/python-bigquery/issues/862 assert df["time_col"][0] == datetime.time(14, 21, 17, 123456) - assert df["timestamp_col"][0] == pandas.to_datetime("2021-08-09 13:30:44.123456Z") From 1b80c9259d128ed9be3742bfe325ec07c4554897 Mon Sep 17 00:00:00 2001 From: Lkhagvadorj Sukhtsoodol Date: Tue, 4 Mar 2025 18:50:05 +0000 Subject: [PATCH 1898/2016] fix: adding property setter for table constraints, #1990 (#2092) * fix: adding property setter for table constraints, #1990 * fix: adding unit test for to_api_repr() method * fix: adding system test for bigquery table update * fix: adding more test case for table constraints property * fix: adding more test case for table constraints property * fix: linting code * fix: adding unit tests for test table, table constraint and foreign key * fix: linting based on noxfile * fix: linting based on noxfile * fix: adding unit tests and system test * fix: clearing lint error * fix: adding table constraint eq unit tests * fix: adding type to to_api_repr resource --- .../google/cloud/bigquery/table.py | 45 +- .../tests/system/test_client.py | 77 +++ .../tests/unit/test_table.py | 442 ++++++++++++++++++ 3 files changed, 562 insertions(+), 2 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py index e7f3c9a36b90..9950b1a539b2 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py @@ -1058,6 +1058,17 @@ def table_constraints(self) -> Optional["TableConstraints"]: table_constraints = TableConstraints.from_api_repr(table_constraints) return table_constraints + @table_constraints.setter + def table_constraints(self, value): + """Tables Primary Key and Foreign Key information.""" + api_repr = value + if not isinstance(value, TableConstraints) and value is not None: + raise ValueError( + "value must be google.cloud.bigquery.table.TableConstraints or None" + ) + api_repr = value.to_api_repr() if value else None + self._properties[self._PROPERTY_TO_API_FIELD["table_constraints"]] = api_repr + @property def resource_tags(self): """Dict[str, str]: Resource tags for the table. @@ -1111,11 +1122,9 @@ def external_catalog_table_options( def foreign_type_info(self) -> Optional[_schema.ForeignTypeInfo]: """Optional. Specifies metadata of the foreign data type definition in field schema (TableFieldSchema.foreign_type_definition). - Returns: Optional[schema.ForeignTypeInfo]: Foreign type information, or :data:`None` if not set. - .. Note:: foreign_type_info is only required if you are referencing an external catalog such as a Hive table. @@ -3404,6 +3413,20 @@ def from_api_repr(cls, api_repr: Dict[str, Any]) -> "ForeignKey": ], ) + def to_api_repr(self) -> Dict[str, Any]: + """Return a dictionary representing this object.""" + return { + "name": self.name, + "referencedTable": self.referenced_table.to_api_repr(), + "columnReferences": [ + { + "referencingColumn": column_reference.referencing_column, + "referencedColumn": column_reference.referenced_column, + } + for column_reference in self.column_references + ], + } + class TableConstraints: """The TableConstraints defines the primary key and foreign key. @@ -3425,6 +3448,13 @@ def __init__( self.primary_key = primary_key self.foreign_keys = foreign_keys + def __eq__(self, other): + if not isinstance(other, TableConstraints) and other is not None: + raise TypeError("The value provided is not a BigQuery TableConstraints.") + return ( + self.primary_key == other.primary_key if other.primary_key else None + ) and (self.foreign_keys == other.foreign_keys if other.foreign_keys else None) + @classmethod def from_api_repr(cls, resource: Dict[str, Any]) -> "TableConstraints": """Create an instance from API representation.""" @@ -3440,6 +3470,17 @@ def from_api_repr(cls, resource: Dict[str, Any]) -> "TableConstraints": ] return cls(primary_key, foreign_keys) + def to_api_repr(self) -> Dict[str, Any]: + """Return a dictionary representing this object.""" + resource: Dict[str, Any] = {} + if self.primary_key: + resource["primaryKey"] = {"columns": self.primary_key.columns} + if self.foreign_keys: + resource["foreignKeys"] = [ + foreign_key.to_api_repr() for foreign_key in self.foreign_keys + ] + return resource + def _item_to_row(iterator, resource): """Convert a JSON row to the native object. diff --git a/packages/google-cloud-bigquery/tests/system/test_client.py b/packages/google-cloud-bigquery/tests/system/test_client.py index 30e9f94a30ab..9df572b14c10 100644 --- a/packages/google-cloud-bigquery/tests/system/test_client.py +++ b/packages/google-cloud-bigquery/tests/system/test_client.py @@ -97,6 +97,10 @@ ], ), ] +TABLE_CONSTRAINTS_SCHEMA = [ + bigquery.SchemaField("id", "INTEGER", mode="REQUIRED"), + bigquery.SchemaField("fk_id", "STRING", mode="REQUIRED"), +] SOURCE_URIS_AVRO = [ "gs://cloud-samples-data/bigquery/federated-formats-reference-file-schema/a-twitter.avro", @@ -901,6 +905,79 @@ def test_update_table_clustering_configuration(self): table3 = Config.CLIENT.update_table(table2, ["clustering_fields"]) self.assertIsNone(table3.clustering_fields, None) + def test_update_table_constraints(self): + from google.cloud.bigquery.table import TableConstraints + from google.cloud.bigquery.table import ( + PrimaryKey, + ForeignKey, + TableReference, + ColumnReference, + ) + + dataset = self.temp_dataset(_make_dataset_id("update_table")) + + TABLE_NAME = "test_table" + table_arg = Table(dataset.table(TABLE_NAME), schema=TABLE_CONSTRAINTS_SCHEMA) + self.assertFalse(_table_exists(table_arg)) + + table = helpers.retry_403(Config.CLIENT.create_table)(table_arg) + self.to_delete.insert(0, table) + self.assertTrue(_table_exists(table)) + + REFERENCE_TABLE_NAME = "test_table2" + reference_table_arg = Table( + dataset.table(REFERENCE_TABLE_NAME), + schema=[ + bigquery.SchemaField("id", "INTEGER", mode="REQUIRED"), + ], + ) + reference_table = helpers.retry_403(Config.CLIENT.create_table)( + reference_table_arg + ) + self.to_delete.insert(0, reference_table) + self.assertTrue(_table_exists(reference_table)) + + reference_table.table_constraints = TableConstraints( + primary_key=PrimaryKey(columns=["id"]), foreign_keys=None + ) + reference_table2 = Config.CLIENT.update_table( + reference_table, ["table_constraints"] + ) + self.assertEqual( + reference_table2.table_constraints.primary_key, + reference_table.table_constraints.primary_key, + ) + + table_constraints = TableConstraints( + primary_key=PrimaryKey(columns=["id"]), + foreign_keys=[ + ForeignKey( + name="fk_id", + referenced_table=TableReference(dataset, "test_table2"), + column_references=[ + ColumnReference(referencing_column="id", referenced_column="id") + ], + ), + ], + ) + + table.table_constraints = table_constraints + table2 = Config.CLIENT.update_table(table, ["table_constraints"]) + self.assertEqual( + table2.table_constraints, + table_constraints, + ) + + table2.table_constraints = None + table3 = Config.CLIENT.update_table(table2, ["table_constraints"]) + self.assertIsNone(table3.table_constraints, None) + + reference_table2.table_constraints = None + reference_table3 = Config.CLIENT.update_table( + reference_table2, ["table_constraints"] + ) + self.assertIsNone(reference_table3.table_constraints, None) + @staticmethod def _fetch_single_page(table, selected_fields=None): iterator = Config.CLIENT.list_rows(table, selected_fields=selected_fields) diff --git a/packages/google-cloud-bigquery/tests/unit/test_table.py b/packages/google-cloud-bigquery/tests/unit/test_table.py index 7644186f33c5..b846036ab2d4 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_table.py +++ b/packages/google-cloud-bigquery/tests/unit/test_table.py @@ -893,6 +893,158 @@ def test_table_constraints_property_getter(self): assert isinstance(table_constraints, TableConstraints) assert table_constraints.primary_key == PrimaryKey(columns=["id"]) + def test_table_constraints_property_setter(self): + from google.cloud.bigquery.table import ( + ColumnReference, + ForeignKey, + PrimaryKey, + TableConstraints, + TableReference, + ) + + dataset = DatasetReference(self.PROJECT, self.DS_ID) + table_ref = dataset.table(self.TABLE_NAME) + table = self._make_one(table_ref) + + primary_key = PrimaryKey(columns=["id"]) + foreign_keys = [ + ForeignKey( + name="fk_name", + referenced_table=TableReference.from_string( + "my_project.my_dataset.table" + ), + column_references=[ + ColumnReference( + referenced_column="product_id", referencing_column="id" + ) + ], + ) + ] + table_constraints = TableConstraints( + primary_key=primary_key, foreign_keys=foreign_keys + ) + table.table_constraints = table_constraints + + assert table._properties["tableConstraints"] == { + "primaryKey": {"columns": ["id"]}, + "foreignKeys": [ + { + "name": "fk_name", + "referencedTable": { + "projectId": "my_project", + "datasetId": "my_dataset", + "tableId": "table", + }, + "columnReferences": [ + {"referencedColumn": "product_id", "referencingColumn": "id"} + ], + } + ], + } + + def test_table_constraints_property_setter_empty_value(self): + from google.cloud.bigquery.table import TableConstraints + + dataset = DatasetReference(self.PROJECT, self.DS_ID) + table_ref = dataset.table(self.TABLE_NAME) + table = self._make_one(table_ref) + + table.table_constraints = TableConstraints(primary_key=None, foreign_keys=None) + assert table._properties["tableConstraints"] == {} + + def test_table_constraints_property_setter_invalid_value(self): + dataset = DatasetReference(self.PROJECT, self.DS_ID) + table_ref = dataset.table(self.TABLE_NAME) + table = self._make_one(table_ref) + + with pytest.raises( + ValueError, + match="value must be google.cloud.bigquery.table.TableConstraints or None", + ): + table.table_constraints = "invalid_value" + + def test_table_constraints_property_setter_none_value(self): + dataset = DatasetReference(self.PROJECT, self.DS_ID) + table_ref = dataset.table(self.TABLE_NAME) + table = self._make_one(table_ref) + + table.table_constraints = None + assert table._properties["tableConstraints"] is None + + def test_table_constraints_property_setter_only_primary_key_set(self): + from google.cloud.bigquery.table import PrimaryKey, TableConstraints + + dataset = DatasetReference(self.PROJECT, self.DS_ID) + table_ref = dataset.table(self.TABLE_NAME) + table = self._make_one(table_ref) + + primary_key = PrimaryKey(columns=["id"]) + + table_constraints = TableConstraints(primary_key=primary_key, foreign_keys=None) + table.table_constraints = table_constraints + + assert table._properties["tableConstraints"] == { + "primaryKey": {"columns": ["id"]} + } + + def test_table_constraints_property_setter_only_foriegn_keys(self): + from google.cloud.bigquery.table import ( + ColumnReference, + ForeignKey, + TableConstraints, + TableReference, + ) + + dataset = DatasetReference(self.PROJECT, self.DS_ID) + table_ref = dataset.table(self.TABLE_NAME) + table = self._make_one(table_ref) + + foreign_keys = [ + ForeignKey( + name="fk_name", + referenced_table=TableReference.from_string( + "my_project.my_dataset.table" + ), + column_references=[ + ColumnReference( + referenced_column="product_id", referencing_column="id" + ) + ], + ) + ] + table_constraints = TableConstraints( + primary_key=None, foreign_keys=foreign_keys + ) + table.table_constraints = table_constraints + + assert table._properties["tableConstraints"] == { + "foreignKeys": [ + { + "name": "fk_name", + "referencedTable": { + "projectId": "my_project", + "datasetId": "my_dataset", + "tableId": "table", + }, + "columnReferences": [ + {"referencedColumn": "product_id", "referencingColumn": "id"} + ], + } + ] + } + + def test_table_constraints_property_setter_empty_constraints(self): + from google.cloud.bigquery.table import TableConstraints + + dataset = DatasetReference(self.PROJECT, self.DS_ID) + table_ref = dataset.table(self.TABLE_NAME) + table = self._make_one(table_ref) + + table_constraints = TableConstraints(primary_key=None, foreign_keys=None) + table.table_constraints = table_constraints + + assert table._properties["tableConstraints"] == {} + def test_description_setter_bad_value(self): dataset = DatasetReference(self.PROJECT, self.DS_ID) table_ref = dataset.table(self.TABLE_NAME) @@ -5889,6 +6041,48 @@ def test__eq__other_type(self): with self.assertRaises(TypeError): foreign_key == "This is not a Foreign Key" + def test_to_api_repr(self): + from google.cloud.bigquery.table import ColumnReference, TableReference + + name = "my_fk" + referenced_table = TableReference.from_string("my-project.mydataset.mytable") + column_references = [ + ColumnReference(referencing_column="product_id", referenced_column="id") + ] + foreign_key = self._make_one(name, referenced_table, column_references) + + expected = { + "name": name, + "referencedTable": { + "projectId": "my-project", + "datasetId": "mydataset", + "tableId": "mytable", + }, + "columnReferences": [ + {"referencingColumn": "product_id", "referencedColumn": "id"} + ], + } + self.assertEqual(foreign_key.to_api_repr(), expected) + + def test_to_api_repr_empty_column_references(self): + from google.cloud.bigquery.table import TableReference + + name = "my_fk" + referenced_table = TableReference.from_string("my-project.mydataset.mytable") + column_references = [] + foreign_key = self._make_one(name, referenced_table, column_references) + + expected = { + "name": name, + "referencedTable": { + "projectId": "my-project", + "datasetId": "mydataset", + "tableId": "mytable", + }, + "columnReferences": [], + } + self.assertEqual(foreign_key.to_api_repr(), expected) + class TestTableConstraint(unittest.TestCase): @staticmethod @@ -5906,6 +6100,144 @@ def test_constructor_defaults(self): self.assertIsNone(instance.primary_key) self.assertIsNone(instance.foreign_keys) + def test_constructor_explicit(self): + from google.cloud.bigquery.table import ( + PrimaryKey, + ForeignKey, + TableReference, + ColumnReference, + ) + + primary_key = PrimaryKey(columns=["my_pk_id"]) + foriegn_keys = [ + ForeignKey( + name="my_fk_id", + referenced_table=TableReference.from_string( + "my-project.my-dataset.my-table" + ), + column_references=[ + ColumnReference(referencing_column="id", referenced_column="id"), + ], + ), + ] + + table_constraint = self._make_one( + primary_key=primary_key, + foreign_keys=foriegn_keys, + ) + + self.assertEqual(table_constraint.primary_key, primary_key) + self.assertEqual(table_constraint.foreign_keys, foriegn_keys) + + def test_constructor_explicit_with_none(self): + table_constraint = self._make_one(primary_key=None, foreign_keys=None) + + self.assertIsNone(table_constraint.primary_key) + self.assertIsNone(table_constraint.foreign_keys) + + def test__eq__primary_key_mismatch(self): + from google.cloud.bigquery.table import ( + PrimaryKey, + ForeignKey, + TableReference, + ColumnReference, + ) + + foriegn_keys = [ + ForeignKey( + name="my_fk_id", + referenced_table=TableReference.from_string( + "my-project.my-dataset.my-table" + ), + column_references=[ + ColumnReference(referencing_column="id", referenced_column="id"), + ], + ), + ] + + table_constraint = self._make_one( + primary_key=PrimaryKey(columns=["my_pk_id"]), + foreign_keys=foriegn_keys, + ) + other_table_constraint = self._make_one( + primary_key=PrimaryKey(columns=["my_other_pk_id"]), + foreign_keys=foriegn_keys, + ) + + self.assertNotEqual(table_constraint, other_table_constraint) + + def test__eq__foreign_keys_mismatch(self): + from google.cloud.bigquery.table import ( + PrimaryKey, + ForeignKey, + TableReference, + ColumnReference, + ) + + primary_key = PrimaryKey(columns=["my_pk_id"]) + + table_constraint = self._make_one( + primary_key=primary_key, + foreign_keys=[ + ForeignKey( + name="my_fk_id", + referenced_table=TableReference.from_string( + "my-project.my-dataset.my-table" + ), + column_references=[ + ColumnReference( + referencing_column="id", referenced_column="id" + ), + ], + ), + ], + ) + other_table_constraint = self._make_one( + primary_key=primary_key, + foreign_keys=[ + ForeignKey( + name="my_other_fk_id", + referenced_table=TableReference.from_string( + "my-project.my-dataset.my-other-table" + ), + column_references=[ + ColumnReference( + referencing_column="other_id", referenced_column="other_id" + ), + ], + ), + ], + ) + + self.assertNotEqual(table_constraint, other_table_constraint) + + def test__eq__other_type(self): + from google.cloud.bigquery.table import ( + PrimaryKey, + ForeignKey, + TableReference, + ColumnReference, + ) + + table_constraint = self._make_one( + primary_key=PrimaryKey(columns=["my_pk_id"]), + foreign_keys=[ + ForeignKey( + name="my_fk_id", + referenced_table=TableReference.from_string( + "my-project.my-dataset.my-table" + ), + column_references=[ + ColumnReference( + referencing_column="id", referenced_column="id" + ), + ], + ), + ], + ) + with self.assertRaises(TypeError): + table_constraint == "This is not a Table Constraint" + def test_from_api_repr_full_resource(self): from google.cloud.bigquery.table import ( ColumnReference, @@ -5985,6 +6317,116 @@ def test_from_api_repr_only_foreign_keys_resource(self): self.assertIsNone(instance.primary_key) self.assertIsNotNone(instance.foreign_keys) + def test_to_api_repr(self): + from google.cloud.bigquery.table import ColumnReference, ForeignKey, PrimaryKey + + primary_key = PrimaryKey(columns=["id", "product_id"]) + foreign_keys = [ + ForeignKey( + name="my_fk_name", + referenced_table=TableReference.from_string( + "my-project.my-dataset.products" + ), + column_references=[ + ColumnReference( + referencing_column="product_id", referenced_column="id" + ), + ], + ) + ] + instance = self._make_one(primary_key=primary_key, foreign_keys=foreign_keys) + + expected = { + "primaryKey": { + "columns": ["id", "product_id"], + }, + "foreignKeys": [ + { + "name": "my_fk_name", + "referencedTable": { + "projectId": "my-project", + "datasetId": "my-dataset", + "tableId": "products", + }, + "columnReferences": [ + {"referencingColumn": "product_id", "referencedColumn": "id"}, + ], + } + ], + } + self.assertEqual(instance.to_api_repr(), expected) + + def test_to_api_repr_only_primary_key(self): + from google.cloud.bigquery.table import PrimaryKey + + primary_key = PrimaryKey(columns=["id", "product_id"]) + instance = self._make_one(primary_key=primary_key, foreign_keys=None) + expected = { + "primaryKey": { + "columns": ["id", "product_id"], + }, + } + self.assertEqual(instance.to_api_repr(), expected) + + def test_to_api_repr_empty_primary_key(self): + from google.cloud.bigquery.table import PrimaryKey + + primary_key = PrimaryKey(columns=[]) + instance = self._make_one(primary_key=primary_key, foreign_keys=None) + + expected = { + "primaryKey": { + "columns": [], + }, + } + self.assertEqual(instance.to_api_repr(), expected) + + def test_to_api_repr_only_foreign_keys(self): + from google.cloud.bigquery.table import ColumnReference, ForeignKey + + foreign_keys = [ + ForeignKey( + name="my_fk_name", + referenced_table=TableReference.from_string( + "my-project.my-dataset.products" + ), + column_references=[ + ColumnReference( + referencing_column="product_id", referenced_column="id" + ), + ], + ) + ] + instance = self._make_one(primary_key=None, foreign_keys=foreign_keys) + expected = { + "foreignKeys": [ + { + "name": "my_fk_name", + "referencedTable": { + "projectId": "my-project", + "datasetId": "my-dataset", + "tableId": "products", + }, + "columnReferences": [ + {"referencingColumn": "product_id", "referencedColumn": "id"}, + ], + } + ], + } + self.assertEqual(instance.to_api_repr(), expected) + + def test_to_api_repr_empty_foreign_keys(self): + foreign_keys = [] + instance = self._make_one(primary_key=None, foreign_keys=foreign_keys) + + expected = {} + self.assertEqual(instance.to_api_repr(), expected) + + def test_to_api_repr_empty_constraints(self): + instance = self._make_one(primary_key=None, foreign_keys=None) + expected = {} + self.assertEqual(instance.to_api_repr(), expected) + class TestExternalCatalogTableOptions: PROJECT = "test-project" From ecb6245b889545f8e3eaa6a6e2cb259a97aa34dd Mon Sep 17 00:00:00 2001 From: Huan Chen <142538604+Genesis929@users.noreply.github.com> Date: Fri, 7 Mar 2025 13:58:40 -0800 Subject: [PATCH 1899/2016] feat: add query text and total bytes processed to RowIterator (#2140) --- .../google/cloud/bigquery/_job_helpers.py | 2 ++ .../google/cloud/bigquery/client.py | 8 ++++++++ .../google/cloud/bigquery/job/query.py | 2 ++ .../google/cloud/bigquery/table.py | 18 ++++++++++++++++++ .../tests/unit/job/test_query.py | 3 +++ .../tests/unit/test_client.py | 3 +++ 6 files changed, 36 insertions(+) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/_job_helpers.py b/packages/google-cloud-bigquery/google/cloud/bigquery/_job_helpers.py index b028cd35794a..a8373c356761 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/_job_helpers.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/_job_helpers.py @@ -526,6 +526,8 @@ def do_query(): query_id=query_results.query_id, project=query_results.project, num_dml_affected_rows=query_results.num_dml_affected_rows, + query=query, + total_bytes_processed=query_results.total_bytes_processed, ) if job_retry is not None: diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py index 03ded93b1b8e..8bbdd6c3291f 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py @@ -4081,6 +4081,8 @@ def _list_rows_from_query_results( query_id: Optional[str] = None, first_page_response: Optional[Dict[str, Any]] = None, num_dml_affected_rows: Optional[int] = None, + query: Optional[str] = None, + total_bytes_processed: Optional[int] = None, ) -> RowIterator: """List the rows of a completed query. See @@ -4128,6 +4130,10 @@ def _list_rows_from_query_results( num_dml_affected_rows (Optional[int]): If this RowIterator is the result of a DML query, the number of rows that were affected. + query (Optional[str]): + The query text used. + total_bytes_processed (Optinal[int]): + total bytes processed from job statistics, if present. Returns: google.cloud.bigquery.table.RowIterator: @@ -4165,6 +4171,8 @@ def _list_rows_from_query_results( query_id=query_id, first_page_response=first_page_response, num_dml_affected_rows=num_dml_affected_rows, + query=query, + total_bytes_processed=total_bytes_processed, ) return row_iterator diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/job/query.py b/packages/google-cloud-bigquery/google/cloud/bigquery/job/query.py index ca2448eaacd8..a27c1053069e 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/job/query.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/job/query.py @@ -1741,6 +1741,8 @@ def is_job_done(): query_id=self.query_id, first_page_response=first_page_response, num_dml_affected_rows=self._query_results.num_dml_affected_rows, + query=self.query, + total_bytes_processed=self.total_bytes_processed, **list_rows_kwargs, ) rows._preserve_order = _contains_order_by(self.query) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py index 9950b1a539b2..4d79d60dab39 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py @@ -1760,6 +1760,10 @@ class RowIterator(HTTPIterator): first_page_response (Optional[dict]): API response for the first page of results. These are returned when the first page is requested. + query (Optional[str]): + The query text used. + total_bytes_processed (Optinal[int]): + total bytes processed from job statistics, if present. """ def __init__( @@ -1781,6 +1785,8 @@ def __init__( query_id: Optional[str] = None, project: Optional[str] = None, num_dml_affected_rows: Optional[int] = None, + query: Optional[str] = None, + total_bytes_processed: Optional[int] = None, ): super(RowIterator, self).__init__( client, @@ -1808,6 +1814,8 @@ def __init__( self._query_id = query_id self._project = project self._num_dml_affected_rows = num_dml_affected_rows + self._query = query + self._total_bytes_processed = total_bytes_processed @property def _billing_project(self) -> Optional[str]: @@ -1855,6 +1863,16 @@ def query_id(self) -> Optional[str]: """ return self._query_id + @property + def query(self) -> Optional[str]: + """The query text used.""" + return self._query + + @property + def total_bytes_processed(self) -> Optional[int]: + """total bytes processed from job statistics, if present.""" + return self._total_bytes_processed + def _is_almost_completely_cached(self): """Check if all results are completely cached. diff --git a/packages/google-cloud-bigquery/tests/unit/job/test_query.py b/packages/google-cloud-bigquery/tests/unit/job/test_query.py index 4bbd31c7307a..1df65279d788 100644 --- a/packages/google-cloud-bigquery/tests/unit/job/test_query.py +++ b/packages/google-cloud-bigquery/tests/unit/job/test_query.py @@ -887,6 +887,7 @@ def test_result_reloads_job_state_until_done(self): } job_resource = self._make_resource(started=True, location="EU") job_resource_done = self._make_resource(started=True, ended=True, location="EU") + job_resource_done["statistics"]["query"]["totalBytesProcessed"] = str(1234) job_resource_done["configuration"]["query"]["destinationTable"] = { "projectId": "dest-project", "datasetId": "dest_dataset", @@ -966,6 +967,8 @@ def test_result_reloads_job_state_until_done(self): # Test that the total_rows property has changed during iteration, based # on the response from tabledata.list. self.assertEqual(result.total_rows, 1) + self.assertEqual(result.query, job.query) + self.assertEqual(result.total_bytes_processed, 1234) query_results_path = f"/projects/{self.PROJECT}/queries/{self.JOB_ID}" query_results_call = mock.call( diff --git a/packages/google-cloud-bigquery/tests/unit/test_client.py b/packages/google-cloud-bigquery/tests/unit/test_client.py index 4f13d6eccb02..34ef680dd92a 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_client.py +++ b/packages/google-cloud-bigquery/tests/unit/test_client.py @@ -5517,6 +5517,7 @@ def test_query_and_wait_defaults(self): "totalRows": "1", "rows": [{"f": [{"v": "5552452"}]}], "queryId": "job_abcDEF_", + "totalBytesProcessed": 1234, } creds = _make_credentials() http = object() @@ -5532,6 +5533,8 @@ def test_query_and_wait_defaults(self): self.assertIsNone(rows.job_id) self.assertIsNone(rows.project) self.assertIsNone(rows.location) + self.assertEqual(rows.query, query) + self.assertEqual(rows.total_bytes_processed, 1234) # Verify the request we send is to jobs.query. conn.api_request.assert_called_once() From 29dc76328c61d7a8ba78ca832f152304253ada86 Mon Sep 17 00:00:00 2001 From: Anthonios Partheniou Date: Mon, 10 Mar 2025 11:02:52 -0400 Subject: [PATCH 1900/2016] chore: remove unused files (#2141) --- .../.github/.OwlBot.lock.yaml | 4 +- .../.kokoro/docker/docs/Dockerfile | 89 --- .../.kokoro/docker/docs/fetch_gpg_keys.sh | 45 -- .../.kokoro/docker/docs/requirements.in | 2 - .../.kokoro/docker/docs/requirements.txt | 297 ---------- .../.kokoro/docs/common.cfg | 66 --- .../.kokoro/docs/docs-presubmit.cfg | 28 - .../.kokoro/docs/docs.cfg | 1 - .../.kokoro/publish-docs.sh | 58 -- .../google-cloud-bigquery/.kokoro/release.sh | 29 - .../.kokoro/release/common.cfg | 43 -- .../.kokoro/release/release.cfg | 1 - .../.kokoro/requirements.in | 11 - .../.kokoro/requirements.txt | 513 ------------------ 14 files changed, 2 insertions(+), 1185 deletions(-) delete mode 100644 packages/google-cloud-bigquery/.kokoro/docker/docs/Dockerfile delete mode 100755 packages/google-cloud-bigquery/.kokoro/docker/docs/fetch_gpg_keys.sh delete mode 100644 packages/google-cloud-bigquery/.kokoro/docker/docs/requirements.in delete mode 100644 packages/google-cloud-bigquery/.kokoro/docker/docs/requirements.txt delete mode 100644 packages/google-cloud-bigquery/.kokoro/docs/common.cfg delete mode 100644 packages/google-cloud-bigquery/.kokoro/docs/docs-presubmit.cfg delete mode 100644 packages/google-cloud-bigquery/.kokoro/docs/docs.cfg delete mode 100755 packages/google-cloud-bigquery/.kokoro/publish-docs.sh delete mode 100755 packages/google-cloud-bigquery/.kokoro/release.sh delete mode 100644 packages/google-cloud-bigquery/.kokoro/release/common.cfg delete mode 100644 packages/google-cloud-bigquery/.kokoro/release/release.cfg delete mode 100644 packages/google-cloud-bigquery/.kokoro/requirements.in delete mode 100644 packages/google-cloud-bigquery/.kokoro/requirements.txt diff --git a/packages/google-cloud-bigquery/.github/.OwlBot.lock.yaml b/packages/google-cloud-bigquery/.github/.OwlBot.lock.yaml index 3f7634f25f8e..9d743afe8a83 100644 --- a/packages/google-cloud-bigquery/.github/.OwlBot.lock.yaml +++ b/packages/google-cloud-bigquery/.github/.OwlBot.lock.yaml @@ -13,5 +13,5 @@ # limitations under the License. docker: image: gcr.io/cloud-devrel-public-resources/owlbot-python:latest - digest: sha256:f016446d6e520e5fb552c45b110cba3f217bffdd3d06bdddd076e9e6d13266cf -# created: 2025-02-21T19:32:52.01306189Z + digest: sha256:5581906b957284864632cde4e9c51d1cc66b0094990b27e689132fe5cd036046 +# created: 2025-03-07 diff --git a/packages/google-cloud-bigquery/.kokoro/docker/docs/Dockerfile b/packages/google-cloud-bigquery/.kokoro/docker/docs/Dockerfile deleted file mode 100644 index e5410e296bd8..000000000000 --- a/packages/google-cloud-bigquery/.kokoro/docker/docs/Dockerfile +++ /dev/null @@ -1,89 +0,0 @@ -# Copyright 2024 Google LLC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from ubuntu:24.04 - -ENV DEBIAN_FRONTEND noninteractive - -# Ensure local Python is preferred over distribution Python. -ENV PATH /usr/local/bin:$PATH - -# Install dependencies. -RUN apt-get update \ - && apt-get install -y --no-install-recommends \ - apt-transport-https \ - build-essential \ - ca-certificates \ - curl \ - dirmngr \ - git \ - gpg-agent \ - graphviz \ - libbz2-dev \ - libdb5.3-dev \ - libexpat1-dev \ - libffi-dev \ - liblzma-dev \ - libreadline-dev \ - libsnappy-dev \ - libssl-dev \ - libsqlite3-dev \ - portaudio19-dev \ - redis-server \ - software-properties-common \ - ssh \ - sudo \ - tcl \ - tcl-dev \ - tk \ - tk-dev \ - uuid-dev \ - wget \ - zlib1g-dev \ - && add-apt-repository universe \ - && apt-get update \ - && apt-get -y install jq \ - && apt-get clean autoclean \ - && apt-get autoremove -y \ - && rm -rf /var/lib/apt/lists/* \ - && rm -f /var/cache/apt/archives/*.deb - - -###################### Install python 3.10.14 for docs/docfx session - -# Download python 3.10.14 -RUN wget https://www.python.org/ftp/python/3.10.14/Python-3.10.14.tgz - -# Extract files -RUN tar -xvf Python-3.10.14.tgz - -# Install python 3.10.14 -RUN ./Python-3.10.14/configure --enable-optimizations -RUN make altinstall - -ENV PATH /usr/local/bin/python3.10:$PATH - -###################### Install pip -RUN wget -O /tmp/get-pip.py 'https://bootstrap.pypa.io/get-pip.py' \ - && python3.10 /tmp/get-pip.py \ - && rm /tmp/get-pip.py - -# Test pip -RUN python3.10 -m pip - -# Install build requirements -COPY requirements.txt /requirements.txt -RUN python3.10 -m pip install --require-hashes -r requirements.txt - -CMD ["python3.10"] diff --git a/packages/google-cloud-bigquery/.kokoro/docker/docs/fetch_gpg_keys.sh b/packages/google-cloud-bigquery/.kokoro/docker/docs/fetch_gpg_keys.sh deleted file mode 100755 index d653dd868e4b..000000000000 --- a/packages/google-cloud-bigquery/.kokoro/docker/docs/fetch_gpg_keys.sh +++ /dev/null @@ -1,45 +0,0 @@ -#!/bin/bash -# Copyright 2020 Google LLC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -# A script to fetch gpg keys with retry. -# Avoid jinja parsing the file. -# - -function retry { - if [[ "${#}" -le 1 ]]; then - echo "Usage: ${0} retry_count commands.." - exit 1 - fi - local retries=${1} - local command="${@:2}" - until [[ "${retries}" -le 0 ]]; do - $command && return 0 - if [[ $? -ne 0 ]]; then - echo "command failed, retrying" - ((retries--)) - fi - done - return 1 -} - -# 3.6.9, 3.7.5 (Ned Deily) -retry 3 gpg --keyserver ha.pool.sks-keyservers.net --recv-keys \ - 0D96DF4D4110E5C43FBFB17F2D347EA6AA65421D - -# 3.8.0 (Łukasz Langa) -retry 3 gpg --keyserver ha.pool.sks-keyservers.net --recv-keys \ - E3FF2839C048B25C084DEBE9B26995E310250568 - -# diff --git a/packages/google-cloud-bigquery/.kokoro/docker/docs/requirements.in b/packages/google-cloud-bigquery/.kokoro/docker/docs/requirements.in deleted file mode 100644 index 586bd07037ae..000000000000 --- a/packages/google-cloud-bigquery/.kokoro/docker/docs/requirements.in +++ /dev/null @@ -1,2 +0,0 @@ -nox -gcp-docuploader diff --git a/packages/google-cloud-bigquery/.kokoro/docker/docs/requirements.txt b/packages/google-cloud-bigquery/.kokoro/docker/docs/requirements.txt deleted file mode 100644 index a9360a25b707..000000000000 --- a/packages/google-cloud-bigquery/.kokoro/docker/docs/requirements.txt +++ /dev/null @@ -1,297 +0,0 @@ -# -# This file is autogenerated by pip-compile with Python 3.10 -# by the following command: -# -# pip-compile --allow-unsafe --generate-hashes requirements.in -# -argcomplete==3.5.3 \ - --hash=sha256:2ab2c4a215c59fd6caaff41a869480a23e8f6a5f910b266c1808037f4e375b61 \ - --hash=sha256:c12bf50eded8aebb298c7b7da7a5ff3ee24dffd9f5281867dfe1424b58c55392 - # via nox -cachetools==5.5.0 \ - --hash=sha256:02134e8439cdc2ffb62023ce1debca2944c3f289d66bb17ead3ab3dede74b292 \ - --hash=sha256:2cc24fb4cbe39633fb7badd9db9ca6295d766d9c2995f245725a46715d050f2a - # via google-auth -certifi==2024.12.14 \ - --hash=sha256:1275f7a45be9464efc1173084eaa30f866fe2e47d389406136d332ed4967ec56 \ - --hash=sha256:b650d30f370c2b724812bee08008be0c4163b163ddaec3f2546c1caf65f191db - # via requests -charset-normalizer==3.4.1 \ - --hash=sha256:0167ddc8ab6508fe81860a57dd472b2ef4060e8d378f0cc555707126830f2537 \ - --hash=sha256:01732659ba9b5b873fc117534143e4feefecf3b2078b0a6a2e925271bb6f4cfa \ - --hash=sha256:01ad647cdd609225c5350561d084b42ddf732f4eeefe6e678765636791e78b9a \ - --hash=sha256:04432ad9479fa40ec0f387795ddad4437a2b50417c69fa275e212933519ff294 \ - --hash=sha256:0907f11d019260cdc3f94fbdb23ff9125f6b5d1039b76003b5b0ac9d6a6c9d5b \ - --hash=sha256:0924e81d3d5e70f8126529951dac65c1010cdf117bb75eb02dd12339b57749dd \ - --hash=sha256:09b26ae6b1abf0d27570633b2b078a2a20419c99d66fb2823173d73f188ce601 \ - --hash=sha256:09b5e6733cbd160dcc09589227187e242a30a49ca5cefa5a7edd3f9d19ed53fd \ - --hash=sha256:0af291f4fe114be0280cdd29d533696a77b5b49cfde5467176ecab32353395c4 \ - --hash=sha256:0f55e69f030f7163dffe9fd0752b32f070566451afe180f99dbeeb81f511ad8d \ - --hash=sha256:1a2bc9f351a75ef49d664206d51f8e5ede9da246602dc2d2726837620ea034b2 \ - --hash=sha256:22e14b5d70560b8dd51ec22863f370d1e595ac3d024cb8ad7d308b4cd95f8313 \ - --hash=sha256:234ac59ea147c59ee4da87a0c0f098e9c8d169f4dc2a159ef720f1a61bbe27cd \ - --hash=sha256:2369eea1ee4a7610a860d88f268eb39b95cb588acd7235e02fd5a5601773d4fa \ - --hash=sha256:237bdbe6159cff53b4f24f397d43c6336c6b0b42affbe857970cefbb620911c8 \ - --hash=sha256:28bf57629c75e810b6ae989f03c0828d64d6b26a5e205535585f96093e405ed1 \ - --hash=sha256:2967f74ad52c3b98de4c3b32e1a44e32975e008a9cd2a8cc8966d6a5218c5cb2 \ - --hash=sha256:2a75d49014d118e4198bcee5ee0a6f25856b29b12dbf7cd012791f8a6cc5c496 \ - --hash=sha256:2bdfe3ac2e1bbe5b59a1a63721eb3b95fc9b6817ae4a46debbb4e11f6232428d \ - --hash=sha256:2d074908e1aecee37a7635990b2c6d504cd4766c7bc9fc86d63f9c09af3fa11b \ - --hash=sha256:2fb9bd477fdea8684f78791a6de97a953c51831ee2981f8e4f583ff3b9d9687e \ - --hash=sha256:311f30128d7d333eebd7896965bfcfbd0065f1716ec92bd5638d7748eb6f936a \ - --hash=sha256:329ce159e82018d646c7ac45b01a430369d526569ec08516081727a20e9e4af4 \ - --hash=sha256:345b0426edd4e18138d6528aed636de7a9ed169b4aaf9d61a8c19e39d26838ca \ - --hash=sha256:363e2f92b0f0174b2f8238240a1a30142e3db7b957a5dd5689b0e75fb717cc78 \ - --hash=sha256:3a3bd0dcd373514dcec91c411ddb9632c0d7d92aed7093b8c3bbb6d69ca74408 \ - --hash=sha256:3bed14e9c89dcb10e8f3a29f9ccac4955aebe93c71ae803af79265c9ca5644c5 \ - --hash=sha256:44251f18cd68a75b56585dd00dae26183e102cd5e0f9f1466e6df5da2ed64ea3 \ - --hash=sha256:44ecbf16649486d4aebafeaa7ec4c9fed8b88101f4dd612dcaf65d5e815f837f \ - --hash=sha256:4532bff1b8421fd0a320463030c7520f56a79c9024a4e88f01c537316019005a \ - --hash=sha256:49402233c892a461407c512a19435d1ce275543138294f7ef013f0b63d5d3765 \ - --hash=sha256:4c0907b1928a36d5a998d72d64d8eaa7244989f7aaaf947500d3a800c83a3fd6 \ - --hash=sha256:4d86f7aff21ee58f26dcf5ae81a9addbd914115cdebcbb2217e4f0ed8982e146 \ - --hash=sha256:5777ee0881f9499ed0f71cc82cf873d9a0ca8af166dfa0af8ec4e675b7df48e6 \ - --hash=sha256:5df196eb874dae23dcfb968c83d4f8fdccb333330fe1fc278ac5ceeb101003a9 \ - --hash=sha256:619a609aa74ae43d90ed2e89bdd784765de0a25ca761b93e196d938b8fd1dbbd \ - --hash=sha256:6e27f48bcd0957c6d4cb9d6fa6b61d192d0b13d5ef563e5f2ae35feafc0d179c \ - --hash=sha256:6ff8a4a60c227ad87030d76e99cd1698345d4491638dfa6673027c48b3cd395f \ - --hash=sha256:73d94b58ec7fecbc7366247d3b0b10a21681004153238750bb67bd9012414545 \ - --hash=sha256:7461baadb4dc00fd9e0acbe254e3d7d2112e7f92ced2adc96e54ef6501c5f176 \ - --hash=sha256:75832c08354f595c760a804588b9357d34ec00ba1c940c15e31e96d902093770 \ - --hash=sha256:7709f51f5f7c853f0fb938bcd3bc59cdfdc5203635ffd18bf354f6967ea0f824 \ - --hash=sha256:78baa6d91634dfb69ec52a463534bc0df05dbd546209b79a3880a34487f4b84f \ - --hash=sha256:7974a0b5ecd505609e3b19742b60cee7aa2aa2fb3151bc917e6e2646d7667dcf \ - --hash=sha256:7a4f97a081603d2050bfaffdefa5b02a9ec823f8348a572e39032caa8404a487 \ - --hash=sha256:7b1bef6280950ee6c177b326508f86cad7ad4dff12454483b51d8b7d673a2c5d \ - --hash=sha256:7d053096f67cd1241601111b698f5cad775f97ab25d81567d3f59219b5f1adbd \ - --hash=sha256:804a4d582ba6e5b747c625bf1255e6b1507465494a40a2130978bda7b932c90b \ - --hash=sha256:807f52c1f798eef6cf26beb819eeb8819b1622ddfeef9d0977a8502d4db6d534 \ - --hash=sha256:80ed5e856eb7f30115aaf94e4a08114ccc8813e6ed1b5efa74f9f82e8509858f \ - --hash=sha256:8417cb1f36cc0bc7eaba8ccb0e04d55f0ee52df06df3ad55259b9a323555fc8b \ - --hash=sha256:8436c508b408b82d87dc5f62496973a1805cd46727c34440b0d29d8a2f50a6c9 \ - --hash=sha256:89149166622f4db9b4b6a449256291dc87a99ee53151c74cbd82a53c8c2f6ccd \ - --hash=sha256:8bfa33f4f2672964266e940dd22a195989ba31669bd84629f05fab3ef4e2d125 \ - --hash=sha256:8c60ca7339acd497a55b0ea5d506b2a2612afb2826560416f6894e8b5770d4a9 \ - --hash=sha256:91b36a978b5ae0ee86c394f5a54d6ef44db1de0815eb43de826d41d21e4af3de \ - --hash=sha256:955f8851919303c92343d2f66165294848d57e9bba6cf6e3625485a70a038d11 \ - --hash=sha256:97f68b8d6831127e4787ad15e6757232e14e12060bec17091b85eb1486b91d8d \ - --hash=sha256:9b23ca7ef998bc739bf6ffc077c2116917eabcc901f88da1b9856b210ef63f35 \ - --hash=sha256:9f0b8b1c6d84c8034a44893aba5e767bf9c7a211e313a9605d9c617d7083829f \ - --hash=sha256:aabfa34badd18f1da5ec1bc2715cadc8dca465868a4e73a0173466b688f29dda \ - --hash=sha256:ab36c8eb7e454e34e60eb55ca5d241a5d18b2c6244f6827a30e451c42410b5f7 \ - --hash=sha256:b010a7a4fd316c3c484d482922d13044979e78d1861f0e0650423144c616a46a \ - --hash=sha256:b1ac5992a838106edb89654e0aebfc24f5848ae2547d22c2c3f66454daa11971 \ - --hash=sha256:b7b2d86dd06bfc2ade3312a83a5c364c7ec2e3498f8734282c6c3d4b07b346b8 \ - --hash=sha256:b97e690a2118911e39b4042088092771b4ae3fc3aa86518f84b8cf6888dbdb41 \ - --hash=sha256:bc2722592d8998c870fa4e290c2eec2c1569b87fe58618e67d38b4665dfa680d \ - --hash=sha256:c0429126cf75e16c4f0ad00ee0eae4242dc652290f940152ca8c75c3a4b6ee8f \ - --hash=sha256:c30197aa96e8eed02200a83fba2657b4c3acd0f0aa4bdc9f6c1af8e8962e0757 \ - --hash=sha256:c4c3e6da02df6fa1410a7680bd3f63d4f710232d3139089536310d027950696a \ - --hash=sha256:c75cb2a3e389853835e84a2d8fb2b81a10645b503eca9bcb98df6b5a43eb8886 \ - --hash=sha256:c96836c97b1238e9c9e3fe90844c947d5afbf4f4c92762679acfe19927d81d77 \ - --hash=sha256:d7f50a1f8c450f3925cb367d011448c39239bb3eb4117c36a6d354794de4ce76 \ - --hash=sha256:d973f03c0cb71c5ed99037b870f2be986c3c05e63622c017ea9816881d2dd247 \ - --hash=sha256:d98b1668f06378c6dbefec3b92299716b931cd4e6061f3c875a71ced1780ab85 \ - --hash=sha256:d9c3cdf5390dcd29aa8056d13e8e99526cda0305acc038b96b30352aff5ff2bb \ - --hash=sha256:dad3e487649f498dd991eeb901125411559b22e8d7ab25d3aeb1af367df5efd7 \ - --hash=sha256:dccbe65bd2f7f7ec22c4ff99ed56faa1e9f785482b9bbd7c717e26fd723a1d1e \ - --hash=sha256:dd78cfcda14a1ef52584dbb008f7ac81c1328c0f58184bf9a84c49c605002da6 \ - --hash=sha256:e218488cd232553829be0664c2292d3af2eeeb94b32bea483cf79ac6a694e037 \ - --hash=sha256:e358e64305fe12299a08e08978f51fc21fac060dcfcddd95453eabe5b93ed0e1 \ - --hash=sha256:ea0d8d539afa5eb2728aa1932a988a9a7af94f18582ffae4bc10b3fbdad0626e \ - --hash=sha256:eab677309cdb30d047996b36d34caeda1dc91149e4fdca0b1a039b3f79d9a807 \ - --hash=sha256:eb8178fe3dba6450a3e024e95ac49ed3400e506fd4e9e5c32d30adda88cbd407 \ - --hash=sha256:ecddf25bee22fe4fe3737a399d0d177d72bc22be6913acfab364b40bce1ba83c \ - --hash=sha256:eea6ee1db730b3483adf394ea72f808b6e18cf3cb6454b4d86e04fa8c4327a12 \ - --hash=sha256:f08ff5e948271dc7e18a35641d2f11a4cd8dfd5634f55228b691e62b37125eb3 \ - --hash=sha256:f30bf9fd9be89ecb2360c7d94a711f00c09b976258846efe40db3d05828e8089 \ - --hash=sha256:fa88b843d6e211393a37219e6a1c1df99d35e8fd90446f1118f4216e307e48cd \ - --hash=sha256:fc54db6c8593ef7d4b2a331b58653356cf04f67c960f584edb7c3d8c97e8f39e \ - --hash=sha256:fd4ec41f914fa74ad1b8304bbc634b3de73d2a0889bd32076342a573e0779e00 \ - --hash=sha256:ffc9202a29ab3920fa812879e95a9e78b2465fd10be7fcbd042899695d75e616 - # via requests -click==8.1.8 \ - --hash=sha256:63c132bbbed01578a06712a2d1f497bb62d9c1c0d329b7903a866228027263b2 \ - --hash=sha256:ed53c9d8990d83c2a27deae68e4ee337473f6330c040a31d4225c9574d16096a - # via gcp-docuploader -colorlog==6.9.0 \ - --hash=sha256:5906e71acd67cb07a71e779c47c4bcb45fb8c2993eebe9e5adcd6a6f1b283eff \ - --hash=sha256:bfba54a1b93b94f54e1f4fe48395725a3d92fd2a4af702f6bd70946bdc0c6ac2 - # via - # gcp-docuploader - # nox -distlib==0.3.9 \ - --hash=sha256:47f8c22fd27c27e25a65601af709b38e4f0a45ea4fc2e710f65755fa8caaaf87 \ - --hash=sha256:a60f20dea646b8a33f3e7772f74dc0b2d0772d2837ee1342a00645c81edf9403 - # via virtualenv -filelock==3.16.1 \ - --hash=sha256:2082e5703d51fbf98ea75855d9d5527e33d8ff23099bec374a134febee6946b0 \ - --hash=sha256:c249fbfcd5db47e5e2d6d62198e565475ee65e4831e2561c8e313fa7eb961435 - # via virtualenv -gcp-docuploader==0.6.5 \ - --hash=sha256:30221d4ac3e5a2b9c69aa52fdbef68cc3f27d0e6d0d90e220fc024584b8d2318 \ - --hash=sha256:b7458ef93f605b9d46a4bf3a8dc1755dad1f31d030c8679edf304e343b347eea - # via -r requirements.in -google-api-core==2.24.0 \ - --hash=sha256:10d82ac0fca69c82a25b3efdeefccf6f28e02ebb97925a8cce8edbfe379929d9 \ - --hash=sha256:e255640547a597a4da010876d333208ddac417d60add22b6851a0c66a831fcaf - # via - # google-cloud-core - # google-cloud-storage -google-auth==2.37.0 \ - --hash=sha256:0054623abf1f9c83492c63d3f47e77f0a544caa3d40b2d98e099a611c2dd5d00 \ - --hash=sha256:42664f18290a6be591be5329a96fe30184be1a1badb7292a7f686a9659de9ca0 - # via - # google-api-core - # google-cloud-core - # google-cloud-storage -google-cloud-core==2.4.1 \ - --hash=sha256:9b7749272a812bde58fff28868d0c5e2f585b82f37e09a1f6ed2d4d10f134073 \ - --hash=sha256:a9e6a4422b9ac5c29f79a0ede9485473338e2ce78d91f2370c01e730eab22e61 - # via google-cloud-storage -google-cloud-storage==2.19.0 \ - --hash=sha256:aeb971b5c29cf8ab98445082cbfe7b161a1f48ed275822f59ed3f1524ea54fba \ - --hash=sha256:cd05e9e7191ba6cb68934d8eb76054d9be4562aa89dbc4236feee4d7d51342b2 - # via gcp-docuploader -google-crc32c==1.6.0 \ - --hash=sha256:05e2d8c9a2f853ff116db9706b4a27350587f341eda835f46db3c0a8c8ce2f24 \ - --hash=sha256:18e311c64008f1f1379158158bb3f0c8d72635b9eb4f9545f8cf990c5668e59d \ - --hash=sha256:236c87a46cdf06384f614e9092b82c05f81bd34b80248021f729396a78e55d7e \ - --hash=sha256:35834855408429cecf495cac67ccbab802de269e948e27478b1e47dfb6465e57 \ - --hash=sha256:386122eeaaa76951a8196310432c5b0ef3b53590ef4c317ec7588ec554fec5d2 \ - --hash=sha256:40b05ab32a5067525670880eb5d169529089a26fe35dce8891127aeddc1950e8 \ - --hash=sha256:48abd62ca76a2cbe034542ed1b6aee851b6f28aaca4e6551b5599b6f3ef175cc \ - --hash=sha256:50cf2a96da226dcbff8671233ecf37bf6e95de98b2a2ebadbfdf455e6d05df42 \ - --hash=sha256:51c4f54dd8c6dfeb58d1df5e4f7f97df8abf17a36626a217f169893d1d7f3e9f \ - --hash=sha256:5bcc90b34df28a4b38653c36bb5ada35671ad105c99cfe915fb5bed7ad6924aa \ - --hash=sha256:62f6d4a29fea082ac4a3c9be5e415218255cf11684ac6ef5488eea0c9132689b \ - --hash=sha256:6eceb6ad197656a1ff49ebfbbfa870678c75be4344feb35ac1edf694309413dc \ - --hash=sha256:7aec8e88a3583515f9e0957fe4f5f6d8d4997e36d0f61624e70469771584c760 \ - --hash=sha256:91ca8145b060679ec9176e6de4f89b07363d6805bd4760631ef254905503598d \ - --hash=sha256:a184243544811e4a50d345838a883733461e67578959ac59964e43cca2c791e7 \ - --hash=sha256:a9e4b426c3702f3cd23b933436487eb34e01e00327fac20c9aebb68ccf34117d \ - --hash=sha256:bb0966e1c50d0ef5bc743312cc730b533491d60585a9a08f897274e57c3f70e0 \ - --hash=sha256:bb8b3c75bd157010459b15222c3fd30577042a7060e29d42dabce449c087f2b3 \ - --hash=sha256:bd5e7d2445d1a958c266bfa5d04c39932dc54093fa391736dbfdb0f1929c1fb3 \ - --hash=sha256:c87d98c7c4a69066fd31701c4e10d178a648c2cac3452e62c6b24dc51f9fcc00 \ - --hash=sha256:d2952396dc604544ea7476b33fe87faedc24d666fb0c2d5ac971a2b9576ab871 \ - --hash=sha256:d8797406499f28b5ef791f339594b0b5fdedf54e203b5066675c406ba69d705c \ - --hash=sha256:d9e9913f7bd69e093b81da4535ce27af842e7bf371cde42d1ae9e9bd382dc0e9 \ - --hash=sha256:e2806553238cd076f0a55bddab37a532b53580e699ed8e5606d0de1f856b5205 \ - --hash=sha256:ebab974b1687509e5c973b5c4b8b146683e101e102e17a86bd196ecaa4d099fc \ - --hash=sha256:ed767bf4ba90104c1216b68111613f0d5926fb3780660ea1198fc469af410e9d \ - --hash=sha256:f7a1fc29803712f80879b0806cb83ab24ce62fc8daf0569f2204a0cfd7f68ed4 - # via - # google-cloud-storage - # google-resumable-media -google-resumable-media==2.7.2 \ - --hash=sha256:3ce7551e9fe6d99e9a126101d2536612bb73486721951e9562fee0f90c6ababa \ - --hash=sha256:5280aed4629f2b60b847b0d42f9857fd4935c11af266744df33d8074cae92fe0 - # via google-cloud-storage -googleapis-common-protos==1.66.0 \ - --hash=sha256:c3e7b33d15fdca5374cc0a7346dd92ffa847425cc4ea941d970f13680052ec8c \ - --hash=sha256:d7abcd75fabb2e0ec9f74466401f6c119a0b498e27370e9be4c94cb7e382b8ed - # via google-api-core -idna==3.10 \ - --hash=sha256:12f65c9b470abda6dc35cf8e63cc574b1c52b11df2c86030af0ac09b01b13ea9 \ - --hash=sha256:946d195a0d259cbba61165e88e65941f16e9b36ea6ddb97f00452bae8b1287d3 - # via requests -nox==2024.10.9 \ - --hash=sha256:1d36f309a0a2a853e9bccb76bbef6bb118ba92fa92674d15604ca99adeb29eab \ - --hash=sha256:7aa9dc8d1c27e9f45ab046ffd1c3b2c4f7c91755304769df231308849ebded95 - # via -r requirements.in -packaging==24.2 \ - --hash=sha256:09abb1bccd265c01f4a3aa3f7a7db064b36514d2cba19a2f694fe6150451a759 \ - --hash=sha256:c228a6dc5e932d346bc5739379109d49e8853dd8223571c7c5b55260edc0b97f - # via nox -platformdirs==4.3.6 \ - --hash=sha256:357fb2acbc885b0419afd3ce3ed34564c13c9b95c89360cd9563f73aa5e2b907 \ - --hash=sha256:73e575e1408ab8103900836b97580d5307456908a03e92031bab39e4554cc3fb - # via virtualenv -proto-plus==1.25.0 \ - --hash=sha256:c91fc4a65074ade8e458e95ef8bac34d4008daa7cce4a12d6707066fca648961 \ - --hash=sha256:fbb17f57f7bd05a68b7707e745e26528b0b3c34e378db91eef93912c54982d91 - # via google-api-core -protobuf==5.29.3 \ - --hash=sha256:0a18ed4a24198528f2333802eb075e59dea9d679ab7a6c5efb017a59004d849f \ - --hash=sha256:0eb32bfa5219fc8d4111803e9a690658aa2e6366384fd0851064b963b6d1f2a7 \ - --hash=sha256:3ea51771449e1035f26069c4c7fd51fba990d07bc55ba80701c78f886bf9c888 \ - --hash=sha256:5da0f41edaf117bde316404bad1a486cb4ededf8e4a54891296f648e8e076620 \ - --hash=sha256:6ce8cc3389a20693bfde6c6562e03474c40851b44975c9b2bf6df7d8c4f864da \ - --hash=sha256:84a57163a0ccef3f96e4b6a20516cedcf5bb3a95a657131c5c3ac62200d23252 \ - --hash=sha256:a4fa6f80816a9a0678429e84973f2f98cbc218cca434abe8db2ad0bffc98503a \ - --hash=sha256:a8434404bbf139aa9e1300dbf989667a83d42ddda9153d8ab76e0d5dcaca484e \ - --hash=sha256:b89c115d877892a512f79a8114564fb435943b59067615894c3b13cd3e1fa107 \ - --hash=sha256:c027e08a08be10b67c06bf2370b99c811c466398c357e615ca88c91c07f0910f \ - --hash=sha256:daaf63f70f25e8689c072cfad4334ca0ac1d1e05a92fc15c54eb9cf23c3efd84 - # via - # gcp-docuploader - # google-api-core - # googleapis-common-protos - # proto-plus -pyasn1==0.6.1 \ - --hash=sha256:0d632f46f2ba09143da3a8afe9e33fb6f92fa2320ab7e886e2d0f7672af84629 \ - --hash=sha256:6f580d2bdd84365380830acf45550f2511469f673cb4a5ae3857a3170128b034 - # via - # pyasn1-modules - # rsa -pyasn1-modules==0.4.1 \ - --hash=sha256:49bfa96b45a292b711e986f222502c1c9a5e1f4e568fc30e2574a6c7d07838fd \ - --hash=sha256:c28e2dbf9c06ad61c71a075c7e0f9fd0f1b0bb2d2ad4377f240d33ac2ab60a7c - # via google-auth -requests==2.32.3 \ - --hash=sha256:55365417734eb18255590a9ff9eb97e9e1da868d4ccd6402399eaf68af20a760 \ - --hash=sha256:70761cfe03c773ceb22aa2f671b4757976145175cdfca038c02654d061d6dcc6 - # via - # google-api-core - # google-cloud-storage -rsa==4.9 \ - --hash=sha256:90260d9058e514786967344d0ef75fa8727eed8a7d2e43ce9f4bcf1b536174f7 \ - --hash=sha256:e38464a49c6c85d7f1351b0126661487a7e0a14a50f1675ec50eb34d4f20ef21 - # via google-auth -six==1.17.0 \ - --hash=sha256:4721f391ed90541fddacab5acf947aa0d3dc7d27b2e1e8eda2be8970586c3274 \ - --hash=sha256:ff70335d468e7eb6ec65b95b99d3a2836546063f63acc5171de367e834932a81 - # via gcp-docuploader -tomli==2.2.1 \ - --hash=sha256:023aa114dd824ade0100497eb2318602af309e5a55595f76b626d6d9f3b7b0a6 \ - --hash=sha256:02abe224de6ae62c19f090f68da4e27b10af2b93213d36cf44e6e1c5abd19fdd \ - --hash=sha256:286f0ca2ffeeb5b9bd4fcc8d6c330534323ec51b2f52da063b11c502da16f30c \ - --hash=sha256:2d0f2fdd22b02c6d81637a3c95f8cd77f995846af7414c5c4b8d0545afa1bc4b \ - --hash=sha256:33580bccab0338d00994d7f16f4c4ec25b776af3ffaac1ed74e0b3fc95e885a8 \ - --hash=sha256:400e720fe168c0f8521520190686ef8ef033fb19fc493da09779e592861b78c6 \ - --hash=sha256:40741994320b232529c802f8bc86da4e1aa9f413db394617b9a256ae0f9a7f77 \ - --hash=sha256:465af0e0875402f1d226519c9904f37254b3045fc5084697cefb9bdde1ff99ff \ - --hash=sha256:4a8f6e44de52d5e6c657c9fe83b562f5f4256d8ebbfe4ff922c495620a7f6cea \ - --hash=sha256:4e340144ad7ae1533cb897d406382b4b6fede8890a03738ff1683af800d54192 \ - --hash=sha256:678e4fa69e4575eb77d103de3df8a895e1591b48e740211bd1067378c69e8249 \ - --hash=sha256:6972ca9c9cc9f0acaa56a8ca1ff51e7af152a9f87fb64623e31d5c83700080ee \ - --hash=sha256:7fc04e92e1d624a4a63c76474610238576942d6b8950a2d7f908a340494e67e4 \ - --hash=sha256:889f80ef92701b9dbb224e49ec87c645ce5df3fa2cc548664eb8a25e03127a98 \ - --hash=sha256:8d57ca8095a641b8237d5b079147646153d22552f1c637fd3ba7f4b0b29167a8 \ - --hash=sha256:8dd28b3e155b80f4d54beb40a441d366adcfe740969820caf156c019fb5c7ec4 \ - --hash=sha256:9316dc65bed1684c9a98ee68759ceaed29d229e985297003e494aa825ebb0281 \ - --hash=sha256:a198f10c4d1b1375d7687bc25294306e551bf1abfa4eace6650070a5c1ae2744 \ - --hash=sha256:a38aa0308e754b0e3c67e344754dff64999ff9b513e691d0e786265c93583c69 \ - --hash=sha256:a92ef1a44547e894e2a17d24e7557a5e85a9e1d0048b0b5e7541f76c5032cb13 \ - --hash=sha256:ac065718db92ca818f8d6141b5f66369833d4a80a9d74435a268c52bdfa73140 \ - --hash=sha256:b82ebccc8c8a36f2094e969560a1b836758481f3dc360ce9a3277c65f374285e \ - --hash=sha256:c954d2250168d28797dd4e3ac5cf812a406cd5a92674ee4c8f123c889786aa8e \ - --hash=sha256:cb55c73c5f4408779d0cf3eef9f762b9c9f147a77de7b258bef0a5628adc85cc \ - --hash=sha256:cd45e1dc79c835ce60f7404ec8119f2eb06d38b1deba146f07ced3bbc44505ff \ - --hash=sha256:d3f5614314d758649ab2ab3a62d4f2004c825922f9e370b29416484086b264ec \ - --hash=sha256:d920f33822747519673ee656a4b6ac33e382eca9d331c87770faa3eef562aeb2 \ - --hash=sha256:db2b95f9de79181805df90bedc5a5ab4c165e6ec3fe99f970d0e302f384ad222 \ - --hash=sha256:e59e304978767a54663af13c07b3d1af22ddee3bb2fb0618ca1593e4f593a106 \ - --hash=sha256:e85e99945e688e32d5a35c1ff38ed0b3f41f43fad8df0bdf79f72b2ba7bc5272 \ - --hash=sha256:ece47d672db52ac607a3d9599a9d48dcb2f2f735c6c2d1f34130085bb12b112a \ - --hash=sha256:f4039b9cbc3048b2416cc57ab3bda989a6fcf9b36cf8937f01a6e731b64f80d7 - # via nox -urllib3==2.3.0 \ - --hash=sha256:1cee9ad369867bfdbbb48b7dd50374c0967a0bb7710050facf0dd6911440e3df \ - --hash=sha256:f8c5449b3cf0861679ce7e0503c7b44b5ec981bec0d1d3795a07f1ba96f0204d - # via requests -virtualenv==20.28.1 \ - --hash=sha256:412773c85d4dab0409b83ec36f7a6499e72eaf08c80e81e9576bca61831c71cb \ - --hash=sha256:5d34ab240fdb5d21549b76f9e8ff3af28252f5499fb6d6f031adac4e5a8c5329 - # via nox diff --git a/packages/google-cloud-bigquery/.kokoro/docs/common.cfg b/packages/google-cloud-bigquery/.kokoro/docs/common.cfg deleted file mode 100644 index 76ae5f13bec1..000000000000 --- a/packages/google-cloud-bigquery/.kokoro/docs/common.cfg +++ /dev/null @@ -1,66 +0,0 @@ -# Format: //devtools/kokoro/config/proto/build.proto - -# Build logs will be here -action { - define_artifacts { - regex: "**/*sponge_log.xml" - } -} - -# Download trampoline resources. -gfile_resources: "/bigstore/cloud-devrel-kokoro-resources/trampoline" - -# Use the trampoline script to run in docker. -build_file: "python-bigquery/.kokoro/trampoline_v2.sh" - -# Configure the docker image for kokoro-trampoline. -env_vars: { - key: "TRAMPOLINE_IMAGE" - value: "gcr.io/cloud-devrel-kokoro-resources/python-lib-docs" -} -env_vars: { - key: "TRAMPOLINE_BUILD_FILE" - value: "github/python-bigquery/.kokoro/publish-docs.sh" -} - -env_vars: { - key: "STAGING_BUCKET" - value: "docs-staging" -} - -env_vars: { - key: "V2_STAGING_BUCKET" - # Push google cloud library docs to the Cloud RAD bucket `docs-staging-v2` - value: "docs-staging-v2" -} - -# It will upload the docker image after successful builds. -env_vars: { - key: "TRAMPOLINE_IMAGE_UPLOAD" - value: "true" -} - -# It will always build the docker image. -env_vars: { - key: "TRAMPOLINE_DOCKERFILE" - value: ".kokoro/docker/docs/Dockerfile" -} - -# Fetch the token needed for reporting release status to GitHub -before_action { - fetch_keystore { - keystore_resource { - keystore_config_id: 73713 - keyname: "yoshi-automation-github-key" - } - } -} - -before_action { - fetch_keystore { - keystore_resource { - keystore_config_id: 73713 - keyname: "docuploader_service_account" - } - } -} diff --git a/packages/google-cloud-bigquery/.kokoro/docs/docs-presubmit.cfg b/packages/google-cloud-bigquery/.kokoro/docs/docs-presubmit.cfg deleted file mode 100644 index 08adb2e28baf..000000000000 --- a/packages/google-cloud-bigquery/.kokoro/docs/docs-presubmit.cfg +++ /dev/null @@ -1,28 +0,0 @@ -# Format: //devtools/kokoro/config/proto/build.proto - -env_vars: { - key: "STAGING_BUCKET" - value: "gcloud-python-test" -} - -env_vars: { - key: "V2_STAGING_BUCKET" - value: "gcloud-python-test" -} - -# We only upload the image in the main `docs` build. -env_vars: { - key: "TRAMPOLINE_IMAGE_UPLOAD" - value: "false" -} - -env_vars: { - key: "TRAMPOLINE_BUILD_FILE" - value: "github/python-bigquery/.kokoro/build.sh" -} - -# Only run this nox session. -env_vars: { - key: "NOX_SESSION" - value: "docs docfx" -} diff --git a/packages/google-cloud-bigquery/.kokoro/docs/docs.cfg b/packages/google-cloud-bigquery/.kokoro/docs/docs.cfg deleted file mode 100644 index 8f43917d92fe..000000000000 --- a/packages/google-cloud-bigquery/.kokoro/docs/docs.cfg +++ /dev/null @@ -1 +0,0 @@ -# Format: //devtools/kokoro/config/proto/build.proto \ No newline at end of file diff --git a/packages/google-cloud-bigquery/.kokoro/publish-docs.sh b/packages/google-cloud-bigquery/.kokoro/publish-docs.sh deleted file mode 100755 index 4ed4aaf1346f..000000000000 --- a/packages/google-cloud-bigquery/.kokoro/publish-docs.sh +++ /dev/null @@ -1,58 +0,0 @@ -#!/bin/bash -# Copyright 2024 Google LLC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -set -eo pipefail - -# Disable buffering, so that the logs stream through. -export PYTHONUNBUFFERED=1 - -export PATH="${HOME}/.local/bin:${PATH}" - -# build docs -nox -s docs - -# create metadata -python3.10 -m docuploader create-metadata \ - --name=$(jq --raw-output '.name // empty' .repo-metadata.json) \ - --version=$(python3.10 setup.py --version) \ - --language=$(jq --raw-output '.language // empty' .repo-metadata.json) \ - --distribution-name=$(python3.10 setup.py --name) \ - --product-page=$(jq --raw-output '.product_documentation // empty' .repo-metadata.json) \ - --github-repository=$(jq --raw-output '.repo // empty' .repo-metadata.json) \ - --issue-tracker=$(jq --raw-output '.issue_tracker // empty' .repo-metadata.json) - -cat docs.metadata - -# upload docs -python3.10 -m docuploader upload docs/_build/html --metadata-file docs.metadata --staging-bucket "${STAGING_BUCKET}" - - -# docfx yaml files -nox -s docfx - -# create metadata. -python3.10 -m docuploader create-metadata \ - --name=$(jq --raw-output '.name // empty' .repo-metadata.json) \ - --version=$(python3.10 setup.py --version) \ - --language=$(jq --raw-output '.language // empty' .repo-metadata.json) \ - --distribution-name=$(python3.10 setup.py --name) \ - --product-page=$(jq --raw-output '.product_documentation // empty' .repo-metadata.json) \ - --github-repository=$(jq --raw-output '.repo // empty' .repo-metadata.json) \ - --issue-tracker=$(jq --raw-output '.issue_tracker // empty' .repo-metadata.json) - -cat docs.metadata - -# upload docs -python3.10 -m docuploader upload docs/_build/html/docfx_yaml --metadata-file docs.metadata --destination-prefix docfx --staging-bucket "${V2_STAGING_BUCKET}" diff --git a/packages/google-cloud-bigquery/.kokoro/release.sh b/packages/google-cloud-bigquery/.kokoro/release.sh deleted file mode 100755 index 65deb5ed31c1..000000000000 --- a/packages/google-cloud-bigquery/.kokoro/release.sh +++ /dev/null @@ -1,29 +0,0 @@ -#!/bin/bash -# Copyright 2024 Google LLC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -set -eo pipefail - -# Start the releasetool reporter -python3 -m pip install --require-hashes -r github/python-bigquery/.kokoro/requirements.txt -python3 -m releasetool publish-reporter-script > /tmp/publisher-script; source /tmp/publisher-script - -# Disable buffering, so that the logs stream through. -export PYTHONUNBUFFERED=1 - -# Move into the package, build the distribution and upload. -TWINE_PASSWORD=$(cat "${KOKORO_KEYSTORE_DIR}/73713_google-cloud-pypi-token-keystore-3") -cd github/python-bigquery -python3 setup.py sdist bdist_wheel -twine upload --username __token__ --password "${TWINE_PASSWORD}" dist/* diff --git a/packages/google-cloud-bigquery/.kokoro/release/common.cfg b/packages/google-cloud-bigquery/.kokoro/release/common.cfg deleted file mode 100644 index 6f57163f5036..000000000000 --- a/packages/google-cloud-bigquery/.kokoro/release/common.cfg +++ /dev/null @@ -1,43 +0,0 @@ -# Format: //devtools/kokoro/config/proto/build.proto - -# Build logs will be here -action { - define_artifacts { - regex: "**/*sponge_log.xml" - } -} - -# Download trampoline resources. -gfile_resources: "/bigstore/cloud-devrel-kokoro-resources/trampoline" - -# Use the trampoline script to run in docker. -build_file: "python-bigquery/.kokoro/trampoline.sh" - -# Configure the docker image for kokoro-trampoline. -env_vars: { - key: "TRAMPOLINE_IMAGE" - value: "gcr.io/cloud-devrel-kokoro-resources/python-multi" -} -env_vars: { - key: "TRAMPOLINE_BUILD_FILE" - value: "github/python-bigquery/.kokoro/release.sh" -} - -# Fetch PyPI password -before_action { - fetch_keystore { - keystore_resource { - keystore_config_id: 73713 - keyname: "google-cloud-pypi-token-keystore-3" - } - } -} - -# Store the packages we uploaded to PyPI. That way, we have a record of exactly -# what we published, which we can use to generate SBOMs and attestations. -action { - define_artifacts { - regex: "github/python-bigquery/**/*.tar.gz" - strip_prefix: "github/python-bigquery" - } -} diff --git a/packages/google-cloud-bigquery/.kokoro/release/release.cfg b/packages/google-cloud-bigquery/.kokoro/release/release.cfg deleted file mode 100644 index 8f43917d92fe..000000000000 --- a/packages/google-cloud-bigquery/.kokoro/release/release.cfg +++ /dev/null @@ -1 +0,0 @@ -# Format: //devtools/kokoro/config/proto/build.proto \ No newline at end of file diff --git a/packages/google-cloud-bigquery/.kokoro/requirements.in b/packages/google-cloud-bigquery/.kokoro/requirements.in deleted file mode 100644 index fff4d9ce0d0a..000000000000 --- a/packages/google-cloud-bigquery/.kokoro/requirements.in +++ /dev/null @@ -1,11 +0,0 @@ -gcp-docuploader -gcp-releasetool>=2 # required for compatibility with cryptography>=42.x -importlib-metadata -typing-extensions -twine -wheel -setuptools -nox>=2022.11.21 # required to remove dependency on py -charset-normalizer<3 -click<8.1.0 -cryptography>=42.0.5 diff --git a/packages/google-cloud-bigquery/.kokoro/requirements.txt b/packages/google-cloud-bigquery/.kokoro/requirements.txt deleted file mode 100644 index 6ad95a04a419..000000000000 --- a/packages/google-cloud-bigquery/.kokoro/requirements.txt +++ /dev/null @@ -1,513 +0,0 @@ -# -# This file is autogenerated by pip-compile with Python 3.9 -# by the following command: -# -# pip-compile --allow-unsafe --generate-hashes requirements.in -# -argcomplete==3.5.1 \ - --hash=sha256:1a1d148bdaa3e3b93454900163403df41448a248af01b6e849edc5ac08e6c363 \ - --hash=sha256:eb1ee355aa2557bd3d0145de7b06b2a45b0ce461e1e7813f5d066039ab4177b4 - # via nox -attrs==24.2.0 \ - --hash=sha256:5cfb1b9148b5b086569baec03f20d7b6bf3bcacc9a42bebf87ffaaca362f6346 \ - --hash=sha256:81921eb96de3191c8258c199618104dd27ac608d9366f5e35d011eae1867ede2 - # via gcp-releasetool -backports-tarfile==1.2.0 \ - --hash=sha256:77e284d754527b01fb1e6fa8a1afe577858ebe4e9dad8919e34c862cb399bc34 \ - --hash=sha256:d75e02c268746e1b8144c278978b6e98e85de6ad16f8e4b0844a154557eca991 - # via jaraco-context -cachetools==5.5.0 \ - --hash=sha256:02134e8439cdc2ffb62023ce1debca2944c3f289d66bb17ead3ab3dede74b292 \ - --hash=sha256:2cc24fb4cbe39633fb7badd9db9ca6295d766d9c2995f245725a46715d050f2a - # via google-auth -certifi==2024.8.30 \ - --hash=sha256:922820b53db7a7257ffbda3f597266d435245903d80737e34f8a45ff3e3230d8 \ - --hash=sha256:bec941d2aa8195e248a60b31ff9f0558284cf01a52591ceda73ea9afffd69fd9 - # via requests -cffi==1.17.1 \ - --hash=sha256:045d61c734659cc045141be4bae381a41d89b741f795af1dd018bfb532fd0df8 \ - --hash=sha256:0984a4925a435b1da406122d4d7968dd861c1385afe3b45ba82b750f229811e2 \ - --hash=sha256:0e2b1fac190ae3ebfe37b979cc1ce69c81f4e4fe5746bb401dca63a9062cdaf1 \ - --hash=sha256:0f048dcf80db46f0098ccac01132761580d28e28bc0f78ae0d58048063317e15 \ - --hash=sha256:1257bdabf294dceb59f5e70c64a3e2f462c30c7ad68092d01bbbfb1c16b1ba36 \ - --hash=sha256:1c39c6016c32bc48dd54561950ebd6836e1670f2ae46128f67cf49e789c52824 \ - --hash=sha256:1d599671f396c4723d016dbddb72fe8e0397082b0a77a4fab8028923bec050e8 \ - --hash=sha256:28b16024becceed8c6dfbc75629e27788d8a3f9030691a1dbf9821a128b22c36 \ - --hash=sha256:2bb1a08b8008b281856e5971307cc386a8e9c5b625ac297e853d36da6efe9c17 \ - --hash=sha256:30c5e0cb5ae493c04c8b42916e52ca38079f1b235c2f8ae5f4527b963c401caf \ - --hash=sha256:31000ec67d4221a71bd3f67df918b1f88f676f1c3b535a7eb473255fdc0b83fc \ - --hash=sha256:386c8bf53c502fff58903061338ce4f4950cbdcb23e2902d86c0f722b786bbe3 \ - --hash=sha256:3edc8d958eb099c634dace3c7e16560ae474aa3803a5df240542b305d14e14ed \ - --hash=sha256:45398b671ac6d70e67da8e4224a065cec6a93541bb7aebe1b198a61b58c7b702 \ - --hash=sha256:46bf43160c1a35f7ec506d254e5c890f3c03648a4dbac12d624e4490a7046cd1 \ - --hash=sha256:4ceb10419a9adf4460ea14cfd6bc43d08701f0835e979bf821052f1805850fe8 \ - --hash=sha256:51392eae71afec0d0c8fb1a53b204dbb3bcabcb3c9b807eedf3e1e6ccf2de903 \ - --hash=sha256:5da5719280082ac6bd9aa7becb3938dc9f9cbd57fac7d2871717b1feb0902ab6 \ - --hash=sha256:610faea79c43e44c71e1ec53a554553fa22321b65fae24889706c0a84d4ad86d \ - --hash=sha256:636062ea65bd0195bc012fea9321aca499c0504409f413dc88af450b57ffd03b \ - --hash=sha256:6883e737d7d9e4899a8a695e00ec36bd4e5e4f18fabe0aca0efe0a4b44cdb13e \ - --hash=sha256:6b8b4a92e1c65048ff98cfe1f735ef8f1ceb72e3d5f0c25fdb12087a23da22be \ - --hash=sha256:6f17be4345073b0a7b8ea599688f692ac3ef23ce28e5df79c04de519dbc4912c \ - --hash=sha256:706510fe141c86a69c8ddc029c7910003a17353970cff3b904ff0686a5927683 \ - --hash=sha256:72e72408cad3d5419375fc87d289076ee319835bdfa2caad331e377589aebba9 \ - --hash=sha256:733e99bc2df47476e3848417c5a4540522f234dfd4ef3ab7fafdf555b082ec0c \ - --hash=sha256:7596d6620d3fa590f677e9ee430df2958d2d6d6de2feeae5b20e82c00b76fbf8 \ - --hash=sha256:78122be759c3f8a014ce010908ae03364d00a1f81ab5c7f4a7a5120607ea56e1 \ - --hash=sha256:805b4371bf7197c329fcb3ead37e710d1bca9da5d583f5073b799d5c5bd1eee4 \ - --hash=sha256:85a950a4ac9c359340d5963966e3e0a94a676bd6245a4b55bc43949eee26a655 \ - --hash=sha256:8f2cdc858323644ab277e9bb925ad72ae0e67f69e804f4898c070998d50b1a67 \ - --hash=sha256:9755e4345d1ec879e3849e62222a18c7174d65a6a92d5b346b1863912168b595 \ - --hash=sha256:98e3969bcff97cae1b2def8ba499ea3d6f31ddfdb7635374834cf89a1a08ecf0 \ - --hash=sha256:a08d7e755f8ed21095a310a693525137cfe756ce62d066e53f502a83dc550f65 \ - --hash=sha256:a1ed2dd2972641495a3ec98445e09766f077aee98a1c896dcb4ad0d303628e41 \ - --hash=sha256:a24ed04c8ffd54b0729c07cee15a81d964e6fee0e3d4d342a27b020d22959dc6 \ - --hash=sha256:a45e3c6913c5b87b3ff120dcdc03f6131fa0065027d0ed7ee6190736a74cd401 \ - --hash=sha256:a9b15d491f3ad5d692e11f6b71f7857e7835eb677955c00cc0aefcd0669adaf6 \ - --hash=sha256:ad9413ccdeda48c5afdae7e4fa2192157e991ff761e7ab8fdd8926f40b160cc3 \ - --hash=sha256:b2ab587605f4ba0bf81dc0cb08a41bd1c0a5906bd59243d56bad7668a6fc6c16 \ - --hash=sha256:b62ce867176a75d03a665bad002af8e6d54644fad99a3c70905c543130e39d93 \ - --hash=sha256:c03e868a0b3bc35839ba98e74211ed2b05d2119be4e8a0f224fba9384f1fe02e \ - --hash=sha256:c59d6e989d07460165cc5ad3c61f9fd8f1b4796eacbd81cee78957842b834af4 \ - --hash=sha256:c7eac2ef9b63c79431bc4b25f1cd649d7f061a28808cbc6c47b534bd789ef964 \ - --hash=sha256:c9c3d058ebabb74db66e431095118094d06abf53284d9c81f27300d0e0d8bc7c \ - --hash=sha256:ca74b8dbe6e8e8263c0ffd60277de77dcee6c837a3d0881d8c1ead7268c9e576 \ - --hash=sha256:caaf0640ef5f5517f49bc275eca1406b0ffa6aa184892812030f04c2abf589a0 \ - --hash=sha256:cdf5ce3acdfd1661132f2a9c19cac174758dc2352bfe37d98aa7512c6b7178b3 \ - --hash=sha256:d016c76bdd850f3c626af19b0542c9677ba156e4ee4fccfdd7848803533ef662 \ - --hash=sha256:d01b12eeeb4427d3110de311e1774046ad344f5b1a7403101878976ecd7a10f3 \ - --hash=sha256:d63afe322132c194cf832bfec0dc69a99fb9bb6bbd550f161a49e9e855cc78ff \ - --hash=sha256:da95af8214998d77a98cc14e3a3bd00aa191526343078b530ceb0bd710fb48a5 \ - --hash=sha256:dd398dbc6773384a17fe0d3e7eeb8d1a21c2200473ee6806bb5e6a8e62bb73dd \ - --hash=sha256:de2ea4b5833625383e464549fec1bc395c1bdeeb5f25c4a3a82b5a8c756ec22f \ - --hash=sha256:de55b766c7aa2e2a3092c51e0483d700341182f08e67c63630d5b6f200bb28e5 \ - --hash=sha256:df8b1c11f177bc2313ec4b2d46baec87a5f3e71fc8b45dab2ee7cae86d9aba14 \ - --hash=sha256:e03eab0a8677fa80d646b5ddece1cbeaf556c313dcfac435ba11f107ba117b5d \ - --hash=sha256:e221cf152cff04059d011ee126477f0d9588303eb57e88923578ace7baad17f9 \ - --hash=sha256:e31ae45bc2e29f6b2abd0de1cc3b9d5205aa847cafaecb8af1476a609a2f6eb7 \ - --hash=sha256:edae79245293e15384b51f88b00613ba9f7198016a5948b5dddf4917d4d26382 \ - --hash=sha256:f1e22e8c4419538cb197e4dd60acc919d7696e5ef98ee4da4e01d3f8cfa4cc5a \ - --hash=sha256:f3a2b4222ce6b60e2e8b337bb9596923045681d71e5a082783484d845390938e \ - --hash=sha256:f6a16c31041f09ead72d69f583767292f750d24913dadacf5756b966aacb3f1a \ - --hash=sha256:f75c7ab1f9e4aca5414ed4d8e5c0e303a34f4421f8a0d47a4d019ceff0ab6af4 \ - --hash=sha256:f79fc4fc25f1c8698ff97788206bb3c2598949bfe0fef03d299eb1b5356ada99 \ - --hash=sha256:f7f5baafcc48261359e14bcd6d9bff6d4b28d9103847c9e136694cb0501aef87 \ - --hash=sha256:fc48c783f9c87e60831201f2cce7f3b2e4846bf4d8728eabe54d60700b318a0b - # via cryptography -charset-normalizer==2.1.1 \ - --hash=sha256:5a3d016c7c547f69d6f81fb0db9449ce888b418b5b9952cc5e6e66843e9dd845 \ - --hash=sha256:83e9a75d1911279afd89352c68b45348559d1fc0506b054b346651b5e7fee29f - # via - # -r requirements.in - # requests -click==8.0.4 \ - --hash=sha256:6a7a62563bbfabfda3a38f3023a1db4a35978c0abd76f6c9605ecd6554d6d9b1 \ - --hash=sha256:8458d7b1287c5fb128c90e23381cf99dcde74beaf6c7ff6384ce84d6fe090adb - # via - # -r requirements.in - # gcp-docuploader - # gcp-releasetool -colorlog==6.8.2 \ - --hash=sha256:3e3e079a41feb5a1b64f978b5ea4f46040a94f11f0e8bbb8261e3dbbeca64d44 \ - --hash=sha256:4dcbb62368e2800cb3c5abd348da7e53f6c362dda502ec27c560b2e58a66bd33 - # via - # gcp-docuploader - # nox -cryptography==44.0.1 \ - --hash=sha256:00918d859aa4e57db8299607086f793fa7813ae2ff5a4637e318a25ef82730f7 \ - --hash=sha256:1e8d181e90a777b63f3f0caa836844a1182f1f265687fac2115fcf245f5fbec3 \ - --hash=sha256:1f9a92144fa0c877117e9748c74501bea842f93d21ee00b0cf922846d9d0b183 \ - --hash=sha256:21377472ca4ada2906bc313168c9dc7b1d7ca417b63c1c3011d0c74b7de9ae69 \ - --hash=sha256:24979e9f2040c953a94bf3c6782e67795a4c260734e5264dceea65c8f4bae64a \ - --hash=sha256:2a46a89ad3e6176223b632056f321bc7de36b9f9b93b2cc1cccf935a3849dc62 \ - --hash=sha256:322eb03ecc62784536bc173f1483e76747aafeb69c8728df48537eb431cd1911 \ - --hash=sha256:436df4f203482f41aad60ed1813811ac4ab102765ecae7a2bbb1dbb66dcff5a7 \ - --hash=sha256:4f422e8c6a28cf8b7f883eb790695d6d45b0c385a2583073f3cec434cc705e1a \ - --hash=sha256:53f23339864b617a3dfc2b0ac8d5c432625c80014c25caac9082314e9de56f41 \ - --hash=sha256:5fed5cd6102bb4eb843e3315d2bf25fede494509bddadb81e03a859c1bc17b83 \ - --hash=sha256:610a83540765a8d8ce0f351ce42e26e53e1f774a6efb71eb1b41eb01d01c3d12 \ - --hash=sha256:6c8acf6f3d1f47acb2248ec3ea261171a671f3d9428e34ad0357148d492c7864 \ - --hash=sha256:6f76fdd6fd048576a04c5210d53aa04ca34d2ed63336d4abd306d0cbe298fddf \ - --hash=sha256:72198e2b5925155497a5a3e8c216c7fb3e64c16ccee11f0e7da272fa93b35c4c \ - --hash=sha256:887143b9ff6bad2b7570da75a7fe8bbf5f65276365ac259a5d2d5147a73775f2 \ - --hash=sha256:888fcc3fce0c888785a4876ca55f9f43787f4c5c1cc1e2e0da71ad481ff82c5b \ - --hash=sha256:8e6a85a93d0642bd774460a86513c5d9d80b5c002ca9693e63f6e540f1815ed0 \ - --hash=sha256:94f99f2b943b354a5b6307d7e8d19f5c423a794462bde2bf310c770ba052b1c4 \ - --hash=sha256:9b336599e2cb77b1008cb2ac264b290803ec5e8e89d618a5e978ff5eb6f715d9 \ - --hash=sha256:a2d8a7045e1ab9b9f803f0d9531ead85f90c5f2859e653b61497228b18452008 \ - --hash=sha256:b8272f257cf1cbd3f2e120f14c68bff2b6bdfcc157fafdee84a1b795efd72862 \ - --hash=sha256:bf688f615c29bfe9dfc44312ca470989279f0e94bb9f631f85e3459af8efc009 \ - --hash=sha256:d9c5b9f698a83c8bd71e0f4d3f9f839ef244798e5ffe96febfa9714717db7af7 \ - --hash=sha256:dd7c7e2d71d908dc0f8d2027e1604102140d84b155e658c20e8ad1304317691f \ - --hash=sha256:df978682c1504fc93b3209de21aeabf2375cb1571d4e61907b3e7a2540e83026 \ - --hash=sha256:e403f7f766ded778ecdb790da786b418a9f2394f36e8cc8b796cc056ab05f44f \ - --hash=sha256:eb3889330f2a4a148abead555399ec9a32b13b7c8ba969b72d8e500eb7ef84cd \ - --hash=sha256:f4daefc971c2d1f82f03097dc6f216744a6cd2ac0f04c68fb935ea2ba2a0d420 \ - --hash=sha256:f51f5705ab27898afda1aaa430f34ad90dc117421057782022edf0600bec5f14 \ - --hash=sha256:fd0ee90072861e276b0ff08bd627abec29e32a53b2be44e41dbcdf87cbee2b00 - # via - # -r requirements.in - # gcp-releasetool - # secretstorage -distlib==0.3.9 \ - --hash=sha256:47f8c22fd27c27e25a65601af709b38e4f0a45ea4fc2e710f65755fa8caaaf87 \ - --hash=sha256:a60f20dea646b8a33f3e7772f74dc0b2d0772d2837ee1342a00645c81edf9403 - # via virtualenv -docutils==0.21.2 \ - --hash=sha256:3a6b18732edf182daa3cd12775bbb338cf5691468f91eeeb109deff6ebfa986f \ - --hash=sha256:dafca5b9e384f0e419294eb4d2ff9fa826435bf15f15b7bd45723e8ad76811b2 - # via readme-renderer -filelock==3.16.1 \ - --hash=sha256:2082e5703d51fbf98ea75855d9d5527e33d8ff23099bec374a134febee6946b0 \ - --hash=sha256:c249fbfcd5db47e5e2d6d62198e565475ee65e4831e2561c8e313fa7eb961435 - # via virtualenv -gcp-docuploader==0.6.5 \ - --hash=sha256:30221d4ac3e5a2b9c69aa52fdbef68cc3f27d0e6d0d90e220fc024584b8d2318 \ - --hash=sha256:b7458ef93f605b9d46a4bf3a8dc1755dad1f31d030c8679edf304e343b347eea - # via -r requirements.in -gcp-releasetool==2.1.1 \ - --hash=sha256:25639269f4eae510094f9dbed9894977e1966933211eb155a451deebc3fc0b30 \ - --hash=sha256:845f4ded3d9bfe8cc7fdaad789e83f4ea014affa77785259a7ddac4b243e099e - # via -r requirements.in -google-api-core==2.21.0 \ - --hash=sha256:4a152fd11a9f774ea606388d423b68aa7e6d6a0ffe4c8266f74979613ec09f81 \ - --hash=sha256:6869eacb2a37720380ba5898312af79a4d30b8bca1548fb4093e0697dc4bdf5d - # via - # google-cloud-core - # google-cloud-storage -google-auth==2.35.0 \ - --hash=sha256:25df55f327ef021de8be50bad0dfd4a916ad0de96da86cd05661c9297723ad3f \ - --hash=sha256:f4c64ed4e01e8e8b646ef34c018f8bf3338df0c8e37d8b3bba40e7f574a3278a - # via - # gcp-releasetool - # google-api-core - # google-cloud-core - # google-cloud-storage -google-cloud-core==2.4.1 \ - --hash=sha256:9b7749272a812bde58fff28868d0c5e2f585b82f37e09a1f6ed2d4d10f134073 \ - --hash=sha256:a9e6a4422b9ac5c29f79a0ede9485473338e2ce78d91f2370c01e730eab22e61 - # via google-cloud-storage -google-cloud-storage==2.18.2 \ - --hash=sha256:97a4d45c368b7d401ed48c4fdfe86e1e1cb96401c9e199e419d289e2c0370166 \ - --hash=sha256:aaf7acd70cdad9f274d29332673fcab98708d0e1f4dceb5a5356aaef06af4d99 - # via gcp-docuploader -google-crc32c==1.6.0 \ - --hash=sha256:05e2d8c9a2f853ff116db9706b4a27350587f341eda835f46db3c0a8c8ce2f24 \ - --hash=sha256:18e311c64008f1f1379158158bb3f0c8d72635b9eb4f9545f8cf990c5668e59d \ - --hash=sha256:236c87a46cdf06384f614e9092b82c05f81bd34b80248021f729396a78e55d7e \ - --hash=sha256:35834855408429cecf495cac67ccbab802de269e948e27478b1e47dfb6465e57 \ - --hash=sha256:386122eeaaa76951a8196310432c5b0ef3b53590ef4c317ec7588ec554fec5d2 \ - --hash=sha256:40b05ab32a5067525670880eb5d169529089a26fe35dce8891127aeddc1950e8 \ - --hash=sha256:48abd62ca76a2cbe034542ed1b6aee851b6f28aaca4e6551b5599b6f3ef175cc \ - --hash=sha256:50cf2a96da226dcbff8671233ecf37bf6e95de98b2a2ebadbfdf455e6d05df42 \ - --hash=sha256:51c4f54dd8c6dfeb58d1df5e4f7f97df8abf17a36626a217f169893d1d7f3e9f \ - --hash=sha256:5bcc90b34df28a4b38653c36bb5ada35671ad105c99cfe915fb5bed7ad6924aa \ - --hash=sha256:62f6d4a29fea082ac4a3c9be5e415218255cf11684ac6ef5488eea0c9132689b \ - --hash=sha256:6eceb6ad197656a1ff49ebfbbfa870678c75be4344feb35ac1edf694309413dc \ - --hash=sha256:7aec8e88a3583515f9e0957fe4f5f6d8d4997e36d0f61624e70469771584c760 \ - --hash=sha256:91ca8145b060679ec9176e6de4f89b07363d6805bd4760631ef254905503598d \ - --hash=sha256:a184243544811e4a50d345838a883733461e67578959ac59964e43cca2c791e7 \ - --hash=sha256:a9e4b426c3702f3cd23b933436487eb34e01e00327fac20c9aebb68ccf34117d \ - --hash=sha256:bb0966e1c50d0ef5bc743312cc730b533491d60585a9a08f897274e57c3f70e0 \ - --hash=sha256:bb8b3c75bd157010459b15222c3fd30577042a7060e29d42dabce449c087f2b3 \ - --hash=sha256:bd5e7d2445d1a958c266bfa5d04c39932dc54093fa391736dbfdb0f1929c1fb3 \ - --hash=sha256:c87d98c7c4a69066fd31701c4e10d178a648c2cac3452e62c6b24dc51f9fcc00 \ - --hash=sha256:d2952396dc604544ea7476b33fe87faedc24d666fb0c2d5ac971a2b9576ab871 \ - --hash=sha256:d8797406499f28b5ef791f339594b0b5fdedf54e203b5066675c406ba69d705c \ - --hash=sha256:d9e9913f7bd69e093b81da4535ce27af842e7bf371cde42d1ae9e9bd382dc0e9 \ - --hash=sha256:e2806553238cd076f0a55bddab37a532b53580e699ed8e5606d0de1f856b5205 \ - --hash=sha256:ebab974b1687509e5c973b5c4b8b146683e101e102e17a86bd196ecaa4d099fc \ - --hash=sha256:ed767bf4ba90104c1216b68111613f0d5926fb3780660ea1198fc469af410e9d \ - --hash=sha256:f7a1fc29803712f80879b0806cb83ab24ce62fc8daf0569f2204a0cfd7f68ed4 - # via - # google-cloud-storage - # google-resumable-media -google-resumable-media==2.7.2 \ - --hash=sha256:3ce7551e9fe6d99e9a126101d2536612bb73486721951e9562fee0f90c6ababa \ - --hash=sha256:5280aed4629f2b60b847b0d42f9857fd4935c11af266744df33d8074cae92fe0 - # via google-cloud-storage -googleapis-common-protos==1.65.0 \ - --hash=sha256:2972e6c496f435b92590fd54045060867f3fe9be2c82ab148fc8885035479a63 \ - --hash=sha256:334a29d07cddc3aa01dee4988f9afd9b2916ee2ff49d6b757155dc0d197852c0 - # via google-api-core -idna==3.10 \ - --hash=sha256:12f65c9b470abda6dc35cf8e63cc574b1c52b11df2c86030af0ac09b01b13ea9 \ - --hash=sha256:946d195a0d259cbba61165e88e65941f16e9b36ea6ddb97f00452bae8b1287d3 - # via requests -importlib-metadata==8.5.0 \ - --hash=sha256:45e54197d28b7a7f1559e60b95e7c567032b602131fbd588f1497f47880aa68b \ - --hash=sha256:71522656f0abace1d072b9e5481a48f07c138e00f079c38c8f883823f9c26bd7 - # via - # -r requirements.in - # keyring - # twine -jaraco-classes==3.4.0 \ - --hash=sha256:47a024b51d0239c0dd8c8540c6c7f484be3b8fcf0b2d85c13825780d3b3f3acd \ - --hash=sha256:f662826b6bed8cace05e7ff873ce0f9283b5c924470fe664fff1c2f00f581790 - # via keyring -jaraco-context==6.0.1 \ - --hash=sha256:9bae4ea555cf0b14938dc0aee7c9f32ed303aa20a3b73e7dc80111628792d1b3 \ - --hash=sha256:f797fc481b490edb305122c9181830a3a5b76d84ef6d1aef2fb9b47ab956f9e4 - # via keyring -jaraco-functools==4.1.0 \ - --hash=sha256:70f7e0e2ae076498e212562325e805204fc092d7b4c17e0e86c959e249701a9d \ - --hash=sha256:ad159f13428bc4acbf5541ad6dec511f91573b90fba04df61dafa2a1231cf649 - # via keyring -jeepney==0.8.0 \ - --hash=sha256:5efe48d255973902f6badc3ce55e2aa6c5c3b3bc642059ef3a91247bcfcc5806 \ - --hash=sha256:c0a454ad016ca575060802ee4d590dd912e35c122fa04e70306de3d076cce755 - # via - # keyring - # secretstorage -jinja2==3.1.5 \ - --hash=sha256:8fefff8dc3034e27bb80d67c671eb8a9bc424c0ef4c0826edbff304cceff43bb \ - --hash=sha256:aba0f4dc9ed8013c424088f68a5c226f7d6097ed89b246d7749c2ec4175c6adb - # via gcp-releasetool -keyring==25.4.1 \ - --hash=sha256:5426f817cf7f6f007ba5ec722b1bcad95a75b27d780343772ad76b17cb47b0bf \ - --hash=sha256:b07ebc55f3e8ed86ac81dd31ef14e81ace9dd9c3d4b5d77a6e9a2016d0d71a1b - # via - # gcp-releasetool - # twine -markdown-it-py==3.0.0 \ - --hash=sha256:355216845c60bd96232cd8d8c40e8f9765cc86f46880e43a8fd22dc1a1a8cab1 \ - --hash=sha256:e3f60a94fa066dc52ec76661e37c851cb232d92f9886b15cb560aaada2df8feb - # via rich -markupsafe==3.0.1 \ - --hash=sha256:0778de17cff1acaeccc3ff30cd99a3fd5c50fc58ad3d6c0e0c4c58092b859396 \ - --hash=sha256:0f84af7e813784feb4d5e4ff7db633aba6c8ca64a833f61d8e4eade234ef0c38 \ - --hash=sha256:17b2aea42a7280db02ac644db1d634ad47dcc96faf38ab304fe26ba2680d359a \ - --hash=sha256:242d6860f1fd9191aef5fae22b51c5c19767f93fb9ead4d21924e0bcb17619d8 \ - --hash=sha256:244dbe463d5fb6d7ce161301a03a6fe744dac9072328ba9fc82289238582697b \ - --hash=sha256:26627785a54a947f6d7336ce5963569b5d75614619e75193bdb4e06e21d447ad \ - --hash=sha256:2a4b34a8d14649315c4bc26bbfa352663eb51d146e35eef231dd739d54a5430a \ - --hash=sha256:2ae99f31f47d849758a687102afdd05bd3d3ff7dbab0a8f1587981b58a76152a \ - --hash=sha256:312387403cd40699ab91d50735ea7a507b788091c416dd007eac54434aee51da \ - --hash=sha256:3341c043c37d78cc5ae6e3e305e988532b072329639007fd408a476642a89fd6 \ - --hash=sha256:33d1c36b90e570ba7785dacd1faaf091203d9942bc036118fab8110a401eb1a8 \ - --hash=sha256:3e683ee4f5d0fa2dde4db77ed8dd8a876686e3fc417655c2ece9a90576905344 \ - --hash=sha256:3ffb4a8e7d46ed96ae48805746755fadd0909fea2306f93d5d8233ba23dda12a \ - --hash=sha256:40621d60d0e58aa573b68ac5e2d6b20d44392878e0bfc159012a5787c4e35bc8 \ - --hash=sha256:40f1e10d51c92859765522cbd79c5c8989f40f0419614bcdc5015e7b6bf97fc5 \ - --hash=sha256:45d42d132cff577c92bfba536aefcfea7e26efb975bd455db4e6602f5c9f45e7 \ - --hash=sha256:48488d999ed50ba8d38c581d67e496f955821dc183883550a6fbc7f1aefdc170 \ - --hash=sha256:4935dd7883f1d50e2ffecca0aa33dc1946a94c8f3fdafb8df5c330e48f71b132 \ - --hash=sha256:4c2d64fdba74ad16138300815cfdc6ab2f4647e23ced81f59e940d7d4a1469d9 \ - --hash=sha256:4c8817557d0de9349109acb38b9dd570b03cc5014e8aabf1cbddc6e81005becd \ - --hash=sha256:4ffaaac913c3f7345579db4f33b0020db693f302ca5137f106060316761beea9 \ - --hash=sha256:5a4cb365cb49b750bdb60b846b0c0bc49ed62e59a76635095a179d440540c346 \ - --hash=sha256:62fada2c942702ef8952754abfc1a9f7658a4d5460fabe95ac7ec2cbe0d02abc \ - --hash=sha256:67c519635a4f64e495c50e3107d9b4075aec33634272b5db1cde839e07367589 \ - --hash=sha256:6a54c43d3ec4cf2a39f4387ad044221c66a376e58c0d0e971d47c475ba79c6b5 \ - --hash=sha256:7044312a928a66a4c2a22644147bc61a199c1709712069a344a3fb5cfcf16915 \ - --hash=sha256:730d86af59e0e43ce277bb83970530dd223bf7f2a838e086b50affa6ec5f9295 \ - --hash=sha256:800100d45176652ded796134277ecb13640c1a537cad3b8b53da45aa96330453 \ - --hash=sha256:80fcbf3add8790caddfab6764bde258b5d09aefbe9169c183f88a7410f0f6dea \ - --hash=sha256:82b5dba6eb1bcc29cc305a18a3c5365d2af06ee71b123216416f7e20d2a84e5b \ - --hash=sha256:852dc840f6d7c985603e60b5deaae1d89c56cb038b577f6b5b8c808c97580f1d \ - --hash=sha256:8ad4ad1429cd4f315f32ef263c1342166695fad76c100c5d979c45d5570ed58b \ - --hash=sha256:8ae369e84466aa70f3154ee23c1451fda10a8ee1b63923ce76667e3077f2b0c4 \ - --hash=sha256:93e8248d650e7e9d49e8251f883eed60ecbc0e8ffd6349e18550925e31bd029b \ - --hash=sha256:973a371a55ce9ed333a3a0f8e0bcfae9e0d637711534bcb11e130af2ab9334e7 \ - --hash=sha256:9ba25a71ebf05b9bb0e2ae99f8bc08a07ee8e98c612175087112656ca0f5c8bf \ - --hash=sha256:a10860e00ded1dd0a65b83e717af28845bb7bd16d8ace40fe5531491de76b79f \ - --hash=sha256:a4792d3b3a6dfafefdf8e937f14906a51bd27025a36f4b188728a73382231d91 \ - --hash=sha256:a7420ceda262dbb4b8d839a4ec63d61c261e4e77677ed7c66c99f4e7cb5030dd \ - --hash=sha256:ad91738f14eb8da0ff82f2acd0098b6257621410dcbd4df20aaa5b4233d75a50 \ - --hash=sha256:b6a387d61fe41cdf7ea95b38e9af11cfb1a63499af2759444b99185c4ab33f5b \ - --hash=sha256:b954093679d5750495725ea6f88409946d69cfb25ea7b4c846eef5044194f583 \ - --hash=sha256:bbde71a705f8e9e4c3e9e33db69341d040c827c7afa6789b14c6e16776074f5a \ - --hash=sha256:beeebf760a9c1f4c07ef6a53465e8cfa776ea6a2021eda0d0417ec41043fe984 \ - --hash=sha256:c91b394f7601438ff79a4b93d16be92f216adb57d813a78be4446fe0f6bc2d8c \ - --hash=sha256:c97ff7fedf56d86bae92fa0a646ce1a0ec7509a7578e1ed238731ba13aabcd1c \ - --hash=sha256:cb53e2a99df28eee3b5f4fea166020d3ef9116fdc5764bc5117486e6d1211b25 \ - --hash=sha256:cbf445eb5628981a80f54087f9acdbf84f9b7d862756110d172993b9a5ae81aa \ - --hash=sha256:d06b24c686a34c86c8c1fba923181eae6b10565e4d80bdd7bc1c8e2f11247aa4 \ - --hash=sha256:d98e66a24497637dd31ccab090b34392dddb1f2f811c4b4cd80c230205c074a3 \ - --hash=sha256:db15ce28e1e127a0013dfb8ac243a8e392db8c61eae113337536edb28bdc1f97 \ - --hash=sha256:db842712984e91707437461930e6011e60b39136c7331e971952bb30465bc1a1 \ - --hash=sha256:e24bfe89c6ac4c31792793ad9f861b8f6dc4546ac6dc8f1c9083c7c4f2b335cd \ - --hash=sha256:e81c52638315ff4ac1b533d427f50bc0afc746deb949210bc85f05d4f15fd772 \ - --hash=sha256:e9393357f19954248b00bed7c56f29a25c930593a77630c719653d51e7669c2a \ - --hash=sha256:ee3941769bd2522fe39222206f6dd97ae83c442a94c90f2b7a25d847d40f4729 \ - --hash=sha256:f31ae06f1328595d762c9a2bf29dafd8621c7d3adc130cbb46278079758779ca \ - --hash=sha256:f94190df587738280d544971500b9cafc9b950d32efcb1fba9ac10d84e6aa4e6 \ - --hash=sha256:fa7d686ed9883f3d664d39d5a8e74d3c5f63e603c2e3ff0abcba23eac6542635 \ - --hash=sha256:fb532dd9900381d2e8f48172ddc5a59db4c445a11b9fab40b3b786da40d3b56b \ - --hash=sha256:fe32482b37b4b00c7a52a07211b479653b7fe4f22b2e481b9a9b099d8a430f2f - # via jinja2 -mdurl==0.1.2 \ - --hash=sha256:84008a41e51615a49fc9966191ff91509e3c40b939176e643fd50a5c2196b8f8 \ - --hash=sha256:bb413d29f5eea38f31dd4754dd7377d4465116fb207585f97bf925588687c1ba - # via markdown-it-py -more-itertools==10.5.0 \ - --hash=sha256:037b0d3203ce90cca8ab1defbbdac29d5f993fc20131f3664dc8d6acfa872aef \ - --hash=sha256:5482bfef7849c25dc3c6dd53a6173ae4795da2a41a80faea6700d9f5846c5da6 - # via - # jaraco-classes - # jaraco-functools -nh3==0.2.18 \ - --hash=sha256:0411beb0589eacb6734f28d5497ca2ed379eafab8ad8c84b31bb5c34072b7164 \ - --hash=sha256:14c5a72e9fe82aea5fe3072116ad4661af5cf8e8ff8fc5ad3450f123e4925e86 \ - --hash=sha256:19aaba96e0f795bd0a6c56291495ff59364f4300d4a39b29a0abc9cb3774a84b \ - --hash=sha256:34c03fa78e328c691f982b7c03d4423bdfd7da69cd707fe572f544cf74ac23ad \ - --hash=sha256:36c95d4b70530b320b365659bb5034341316e6a9b30f0b25fa9c9eff4c27a204 \ - --hash=sha256:3a157ab149e591bb638a55c8c6bcb8cdb559c8b12c13a8affaba6cedfe51713a \ - --hash=sha256:42c64511469005058cd17cc1537578eac40ae9f7200bedcfd1fc1a05f4f8c200 \ - --hash=sha256:5f36b271dae35c465ef5e9090e1fdaba4a60a56f0bb0ba03e0932a66f28b9189 \ - --hash=sha256:6955369e4d9f48f41e3f238a9e60f9410645db7e07435e62c6a9ea6135a4907f \ - --hash=sha256:7b7c2a3c9eb1a827d42539aa64091640bd275b81e097cd1d8d82ef91ffa2e811 \ - --hash=sha256:8ce0f819d2f1933953fca255db2471ad58184a60508f03e6285e5114b6254844 \ - --hash=sha256:94a166927e53972a9698af9542ace4e38b9de50c34352b962f4d9a7d4c927af4 \ - --hash=sha256:a7f1b5b2c15866f2db413a3649a8fe4fd7b428ae58be2c0f6bca5eefd53ca2be \ - --hash=sha256:c8b3a1cebcba9b3669ed1a84cc65bf005728d2f0bc1ed2a6594a992e817f3a50 \ - --hash=sha256:de3ceed6e661954871d6cd78b410213bdcb136f79aafe22aa7182e028b8c7307 \ - --hash=sha256:f0eca9ca8628dbb4e916ae2491d72957fdd35f7a5d326b7032a345f111ac07fe - # via readme-renderer -nox==2024.10.9 \ - --hash=sha256:1d36f309a0a2a853e9bccb76bbef6bb118ba92fa92674d15604ca99adeb29eab \ - --hash=sha256:7aa9dc8d1c27e9f45ab046ffd1c3b2c4f7c91755304769df231308849ebded95 - # via -r requirements.in -packaging==24.1 \ - --hash=sha256:026ed72c8ed3fcce5bf8950572258698927fd1dbda10a5e981cdf0ac37f4f002 \ - --hash=sha256:5b8f2217dbdbd2f7f384c41c628544e6d52f2d0f53c6d0c3ea61aa5d1d7ff124 - # via - # gcp-releasetool - # nox -pkginfo==1.10.0 \ - --hash=sha256:5df73835398d10db79f8eecd5cd86b1f6d29317589ea70796994d49399af6297 \ - --hash=sha256:889a6da2ed7ffc58ab5b900d888ddce90bce912f2d2de1dc1c26f4cb9fe65097 - # via twine -platformdirs==4.3.6 \ - --hash=sha256:357fb2acbc885b0419afd3ce3ed34564c13c9b95c89360cd9563f73aa5e2b907 \ - --hash=sha256:73e575e1408ab8103900836b97580d5307456908a03e92031bab39e4554cc3fb - # via virtualenv -proto-plus==1.24.0 \ - --hash=sha256:30b72a5ecafe4406b0d339db35b56c4059064e69227b8c3bda7462397f966445 \ - --hash=sha256:402576830425e5f6ce4c2a6702400ac79897dab0b4343821aa5188b0fab81a12 - # via google-api-core -protobuf==5.28.2 \ - --hash=sha256:2c69461a7fcc8e24be697624c09a839976d82ae75062b11a0972e41fd2cd9132 \ - --hash=sha256:35cfcb15f213449af7ff6198d6eb5f739c37d7e4f1c09b5d0641babf2cc0c68f \ - --hash=sha256:52235802093bd8a2811abbe8bf0ab9c5f54cca0a751fdd3f6ac2a21438bffece \ - --hash=sha256:59379674ff119717404f7454647913787034f03fe7049cbef1d74a97bb4593f0 \ - --hash=sha256:5e8a95246d581eef20471b5d5ba010d55f66740942b95ba9b872d918c459452f \ - --hash=sha256:87317e9bcda04a32f2ee82089a204d3a2f0d3c8aeed16568c7daf4756e4f1fe0 \ - --hash=sha256:8ddc60bf374785fb7cb12510b267f59067fa10087325b8e1855b898a0d81d276 \ - --hash=sha256:a8b9403fc70764b08d2f593ce44f1d2920c5077bf7d311fefec999f8c40f78b7 \ - --hash=sha256:c0ea0123dac3399a2eeb1a1443d82b7afc9ff40241433296769f7da42d142ec3 \ - --hash=sha256:ca53faf29896c526863366a52a8f4d88e69cd04ec9571ed6082fa117fac3ab36 \ - --hash=sha256:eeea10f3dc0ac7e6b4933d32db20662902b4ab81bf28df12218aa389e9c2102d - # via - # gcp-docuploader - # gcp-releasetool - # google-api-core - # googleapis-common-protos - # proto-plus -pyasn1==0.6.1 \ - --hash=sha256:0d632f46f2ba09143da3a8afe9e33fb6f92fa2320ab7e886e2d0f7672af84629 \ - --hash=sha256:6f580d2bdd84365380830acf45550f2511469f673cb4a5ae3857a3170128b034 - # via - # pyasn1-modules - # rsa -pyasn1-modules==0.4.1 \ - --hash=sha256:49bfa96b45a292b711e986f222502c1c9a5e1f4e568fc30e2574a6c7d07838fd \ - --hash=sha256:c28e2dbf9c06ad61c71a075c7e0f9fd0f1b0bb2d2ad4377f240d33ac2ab60a7c - # via google-auth -pycparser==2.22 \ - --hash=sha256:491c8be9c040f5390f5bf44a5b07752bd07f56edf992381b05c701439eec10f6 \ - --hash=sha256:c3702b6d3dd8c7abc1afa565d7e63d53a1d0bd86cdc24edd75470f4de499cfcc - # via cffi -pygments==2.18.0 \ - --hash=sha256:786ff802f32e91311bff3889f6e9a86e81505fe99f2735bb6d60ae0c5004f199 \ - --hash=sha256:b8e6aca0523f3ab76fee51799c488e38782ac06eafcf95e7ba832985c8e7b13a - # via - # readme-renderer - # rich -pyjwt==2.9.0 \ - --hash=sha256:3b02fb0f44517787776cf48f2ae25d8e14f300e6d7545a4315cee571a415e850 \ - --hash=sha256:7e1e5b56cc735432a7369cbfa0efe50fa113ebecdc04ae6922deba8b84582d0c - # via gcp-releasetool -pyperclip==1.9.0 \ - --hash=sha256:b7de0142ddc81bfc5c7507eea19da920b92252b548b96186caf94a5e2527d310 - # via gcp-releasetool -python-dateutil==2.9.0.post0 \ - --hash=sha256:37dd54208da7e1cd875388217d5e00ebd4179249f90fb72437e91a35459a0ad3 \ - --hash=sha256:a8b2bc7bffae282281c8140a97d3aa9c14da0b136dfe83f850eea9a5f7470427 - # via gcp-releasetool -readme-renderer==44.0 \ - --hash=sha256:2fbca89b81a08526aadf1357a8c2ae889ec05fb03f5da67f9769c9a592166151 \ - --hash=sha256:8712034eabbfa6805cacf1402b4eeb2a73028f72d1166d6f5cb7f9c047c5d1e1 - # via twine -requests==2.32.3 \ - --hash=sha256:55365417734eb18255590a9ff9eb97e9e1da868d4ccd6402399eaf68af20a760 \ - --hash=sha256:70761cfe03c773ceb22aa2f671b4757976145175cdfca038c02654d061d6dcc6 - # via - # gcp-releasetool - # google-api-core - # google-cloud-storage - # requests-toolbelt - # twine -requests-toolbelt==1.0.0 \ - --hash=sha256:7681a0a3d047012b5bdc0ee37d7f8f07ebe76ab08caeccfc3921ce23c88d5bc6 \ - --hash=sha256:cccfdd665f0a24fcf4726e690f65639d272bb0637b9b92dfd91a5568ccf6bd06 - # via twine -rfc3986==2.0.0 \ - --hash=sha256:50b1502b60e289cb37883f3dfd34532b8873c7de9f49bb546641ce9cbd256ebd \ - --hash=sha256:97aacf9dbd4bfd829baad6e6309fa6573aaf1be3f6fa735c8ab05e46cecb261c - # via twine -rich==13.9.2 \ - --hash=sha256:51a2c62057461aaf7152b4d611168f93a9fc73068f8ded2790f29fe2b5366d0c \ - --hash=sha256:8c82a3d3f8dcfe9e734771313e606b39d8247bb6b826e196f4914b333b743cf1 - # via twine -rsa==4.9 \ - --hash=sha256:90260d9058e514786967344d0ef75fa8727eed8a7d2e43ce9f4bcf1b536174f7 \ - --hash=sha256:e38464a49c6c85d7f1351b0126661487a7e0a14a50f1675ec50eb34d4f20ef21 - # via google-auth -secretstorage==3.3.3 \ - --hash=sha256:2403533ef369eca6d2ba81718576c5e0f564d5cca1b58f73a8b23e7d4eeebd77 \ - --hash=sha256:f356e6628222568e3af06f2eba8df495efa13b3b63081dafd4f7d9a7b7bc9f99 - # via keyring -six==1.16.0 \ - --hash=sha256:1e61c37477a1626458e36f7b1d82aa5c9b094fa4802892072e49de9c60c4c926 \ - --hash=sha256:8abb2f1d86890a2dfb989f9a77cfcfd3e47c2a354b01111771326f8aa26e0254 - # via - # gcp-docuploader - # python-dateutil -tomli==2.0.2 \ - --hash=sha256:2ebe24485c53d303f690b0ec092806a085f07af5a5aa1464f3931eec36caaa38 \ - --hash=sha256:d46d457a85337051c36524bc5349dd91b1877838e2979ac5ced3e710ed8a60ed - # via nox -twine==5.1.1 \ - --hash=sha256:215dbe7b4b94c2c50a7315c0275d2258399280fbb7d04182c7e55e24b5f93997 \ - --hash=sha256:9aa0825139c02b3434d913545c7b847a21c835e11597f5255842d457da2322db - # via -r requirements.in -typing-extensions==4.12.2 \ - --hash=sha256:04e5ca0351e0f3f85c6853954072df659d0d13fac324d0072316b67d7794700d \ - --hash=sha256:1a7ead55c7e559dd4dee8856e3a88b41225abfe1ce8df57b7c13915fe121ffb8 - # via - # -r requirements.in - # rich -urllib3==2.2.3 \ - --hash=sha256:ca899ca043dcb1bafa3e262d73aa25c465bfb49e0bd9dd5d59f1d0acba2f8fac \ - --hash=sha256:e7d814a81dad81e6caf2ec9fdedb284ecc9c73076b62654547cc64ccdcae26e9 - # via - # requests - # twine -virtualenv==20.26.6 \ - --hash=sha256:280aede09a2a5c317e409a00102e7077c6432c5a38f0ef938e643805a7ad2c48 \ - --hash=sha256:7345cc5b25405607a624d8418154577459c3e0277f5466dd79c49d5e492995f2 - # via nox -wheel==0.44.0 \ - --hash=sha256:2376a90c98cc337d18623527a97c31797bd02bad0033d41547043a1cbfbe448f \ - --hash=sha256:a29c3f2817e95ab89aa4660681ad547c0e9547f20e75b0562fe7723c9a2a9d49 - # via -r requirements.in -zipp==3.20.2 \ - --hash=sha256:a817ac80d6cf4b23bf7f2828b7cabf326f15a001bea8b1f9b49631780ba28350 \ - --hash=sha256:bc9eb26f4506fda01b81bcde0ca78103b6e62f991b381fec825435c836edbc29 - # via importlib-metadata - -# The following packages are considered to be unsafe in a requirements file: -setuptools==75.1.0 \ - --hash=sha256:35ab7fd3bcd95e6b7fd704e4a1539513edad446c097797f2985e0e4b960772f2 \ - --hash=sha256:d59a21b17a275fb872a9c3dae73963160ae079f1049ed956880cd7c09b120538 - # via -r requirements.in From e823422e2ffd5e5e18a79d69be0b8efa6e445cdf Mon Sep 17 00:00:00 2001 From: Anthonios Partheniou Date: Fri, 14 Mar 2025 09:36:26 -0400 Subject: [PATCH 1901/2016] fix: remove setup.cfg configuration for creating universal wheels (#2146) `setup.cfg` contains a setting to create a `Universal Wheel` which is only needed if libraries support both Python 2 and Python 3. This library only supports Python 3 so this setting is no longer needed. See https://packaging.python.org/en/latest/guides/distributing-packages-using-setuptools/#wheels. See similar PR https://togithub.com/googleapis/google-cloud-python/pull/13659 which includes this stack trace ``` running bdist_wheel /tmp/pip-build-env-9o_3w17v/overlay/lib/python3.13/site-packages/setuptools/_distutils/cmd.py:135: SetuptoolsDeprecationWarning: bdist_wheel.universal is deprecated !! ******************************************************************************** With Python 2.7 end-of-life, support for building universal wheels (i.e., wheels that support both Python 2 and Python 3) is being obviated. Please discontinue using this option, or if you still need it, file an issue with pypa/setuptools describing your use case. By 2025-Aug-30, you need to update your project and remove deprecated calls or your builds will no longer be supported. ******************************************************************************** !! ``` --- packages/google-cloud-bigquery/setup.cfg | 4 ---- 1 file changed, 4 deletions(-) diff --git a/packages/google-cloud-bigquery/setup.cfg b/packages/google-cloud-bigquery/setup.cfg index 37b63aa49e3a..d5e734f0f022 100644 --- a/packages/google-cloud-bigquery/setup.cfg +++ b/packages/google-cloud-bigquery/setup.cfg @@ -14,10 +14,6 @@ # See the License for the specific language governing permissions and # limitations under the License. -# Generated by synthtool. DO NOT EDIT! -[bdist_wheel] -universal = 1 - [pytype] python_version = 3.8 inputs = From 4b4cf2d909dcf7cb6129a7203155772b2a96b43c Mon Sep 17 00:00:00 2001 From: Anthonios Partheniou Date: Tue, 18 Mar 2025 15:30:46 -0400 Subject: [PATCH 1902/2016] fix: Allow protobuf 6.x (#2142) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * fix: Allow protobuf 6.x * update prerelease_deps nox session so that prerelease versions of protobuf are installed * ensure python-bigquery is installed from source * feat: add support for Python 3.13 * restore replacement * 🦉 Updates from OwlBot post-processor See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md * fix typo --------- Co-authored-by: Owl Bot --- .../.github/sync-repo-settings.yaml | 5 +- ...deps-3.12.cfg => prerelease-deps-3.13.cfg} | 2 +- .../{snippets-3.12.cfg => snippets-3.13.cfg} | 2 +- .../{system-3.12.cfg => system-3.13.cfg} | 4 +- .../.kokoro/presubmit/system-3.9.cfg | 2 +- .../google-cloud-bigquery/CONTRIBUTING.rst | 7 +- packages/google-cloud-bigquery/noxfile.py | 78 ++++++++++--------- packages/google-cloud-bigquery/owlbot.py | 35 ++------- packages/google-cloud-bigquery/pyproject.toml | 35 +++++---- .../testing/constraints-3.13.txt | 0 10 files changed, 76 insertions(+), 94 deletions(-) rename packages/google-cloud-bigquery/.kokoro/continuous/{prerelease-deps-3.12.cfg => prerelease-deps-3.13.cfg} (77%) rename packages/google-cloud-bigquery/.kokoro/presubmit/{snippets-3.12.cfg => snippets-3.13.cfg} (81%) rename packages/google-cloud-bigquery/.kokoro/presubmit/{system-3.12.cfg => system-3.13.cfg} (81%) create mode 100644 packages/google-cloud-bigquery/testing/constraints-3.13.txt diff --git a/packages/google-cloud-bigquery/.github/sync-repo-settings.yaml b/packages/google-cloud-bigquery/.github/sync-repo-settings.yaml index 188c44bbd444..1e61b4d6587f 100644 --- a/packages/google-cloud-bigquery/.github/sync-repo-settings.yaml +++ b/packages/google-cloud-bigquery/.github/sync-repo-settings.yaml @@ -10,14 +10,15 @@ branchProtectionRules: requiresStrictStatusChecks: true requiredStatusCheckContexts: - 'Kokoro' - - 'Kokoro system-3.12' - - 'Kokoro snippets-3.12' + - 'Kokoro system-3.13' + - 'Kokoro snippets-3.13' - 'cla/google' - 'Samples - Lint' - 'Samples - Python 3.9' - 'Samples - Python 3.10' - 'Samples - Python 3.11' - 'Samples - Python 3.12' + - 'Samples - Python 3.13' - pattern: v2 requiresLinearHistory: true requiresCodeOwnerReviews: true diff --git a/packages/google-cloud-bigquery/.kokoro/continuous/prerelease-deps-3.12.cfg b/packages/google-cloud-bigquery/.kokoro/continuous/prerelease-deps-3.13.cfg similarity index 77% rename from packages/google-cloud-bigquery/.kokoro/continuous/prerelease-deps-3.12.cfg rename to packages/google-cloud-bigquery/.kokoro/continuous/prerelease-deps-3.13.cfg index ece962a1703e..99a1e7150b1e 100644 --- a/packages/google-cloud-bigquery/.kokoro/continuous/prerelease-deps-3.12.cfg +++ b/packages/google-cloud-bigquery/.kokoro/continuous/prerelease-deps-3.13.cfg @@ -3,5 +3,5 @@ # Only run this nox session. env_vars: { key: "NOX_SESSION" - value: "prerelease_deps-3.12" + value: "prerelease_deps-3.13" } diff --git a/packages/google-cloud-bigquery/.kokoro/presubmit/snippets-3.12.cfg b/packages/google-cloud-bigquery/.kokoro/presubmit/snippets-3.13.cfg similarity index 81% rename from packages/google-cloud-bigquery/.kokoro/presubmit/snippets-3.12.cfg rename to packages/google-cloud-bigquery/.kokoro/presubmit/snippets-3.13.cfg index 1381e832349b..0b89f08630bb 100644 --- a/packages/google-cloud-bigquery/.kokoro/presubmit/snippets-3.12.cfg +++ b/packages/google-cloud-bigquery/.kokoro/presubmit/snippets-3.13.cfg @@ -3,5 +3,5 @@ # Only run this nox session. env_vars: { key: "NOX_SESSION" - value: "snippets-3.12" + value: "snippets-3.13" } diff --git a/packages/google-cloud-bigquery/.kokoro/presubmit/system-3.12.cfg b/packages/google-cloud-bigquery/.kokoro/presubmit/system-3.13.cfg similarity index 81% rename from packages/google-cloud-bigquery/.kokoro/presubmit/system-3.12.cfg rename to packages/google-cloud-bigquery/.kokoro/presubmit/system-3.13.cfg index 789455bd6973..a0e9a010884b 100644 --- a/packages/google-cloud-bigquery/.kokoro/presubmit/system-3.12.cfg +++ b/packages/google-cloud-bigquery/.kokoro/presubmit/system-3.13.cfg @@ -3,5 +3,5 @@ # Only run this nox session. env_vars: { key: "NOX_SESSION" - value: "system-3.12" -} + value: "system-3.13" +} \ No newline at end of file diff --git a/packages/google-cloud-bigquery/.kokoro/presubmit/system-3.9.cfg b/packages/google-cloud-bigquery/.kokoro/presubmit/system-3.9.cfg index bd1fb514b2b5..b8ae66b376ff 100644 --- a/packages/google-cloud-bigquery/.kokoro/presubmit/system-3.9.cfg +++ b/packages/google-cloud-bigquery/.kokoro/presubmit/system-3.9.cfg @@ -4,4 +4,4 @@ env_vars: { key: "NOX_SESSION" value: "system-3.9" -} +} \ No newline at end of file diff --git a/packages/google-cloud-bigquery/CONTRIBUTING.rst b/packages/google-cloud-bigquery/CONTRIBUTING.rst index 8f4d54bce265..b2993768bc6f 100644 --- a/packages/google-cloud-bigquery/CONTRIBUTING.rst +++ b/packages/google-cloud-bigquery/CONTRIBUTING.rst @@ -143,12 +143,13 @@ Running System Tests $ nox -s system # Run a single system test - $ nox -s system-3.9 -- -k + $ nox -s system-3.13 -- -k .. note:: - System tests are configured to run under Python 3.9, 3.11, 3.12. + System tests are only configured to run under Python 3.9 and 3.13. + For expediency, we do not run them in older versions of Python 3. This alone will not run the tests. You'll need to change some local auth settings and change some configuration in your project to @@ -238,7 +239,7 @@ Supported versions can be found in our ``noxfile.py`` `config`_. .. _config: https://github.com/googleapis/python-bigquery/blob/main/noxfile.py -We also explicitly decided to support Python 3 beginning with version 3.7. +We also explicitly decided to support Python 3 beginning with version 3.9. Reasons for this include: - Encouraging use of newest versions of Python 3 diff --git a/packages/google-cloud-bigquery/noxfile.py b/packages/google-cloud-bigquery/noxfile.py index f069f8d37128..1b118836bd6b 100644 --- a/packages/google-cloud-bigquery/noxfile.py +++ b/packages/google-cloud-bigquery/noxfile.py @@ -38,8 +38,8 @@ ) DEFAULT_PYTHON_VERSION = "3.9" -SYSTEM_TEST_PYTHON_VERSIONS = ["3.9", "3.11", "3.12"] -UNIT_TEST_PYTHON_VERSIONS = ["3.9", "3.11", "3.12"] +SYSTEM_TEST_PYTHON_VERSIONS = ["3.9", "3.11", "3.12", "3.13"] +UNIT_TEST_PYTHON_VERSIONS = ["3.9", "3.11", "3.12", "3.13"] CURRENT_DIRECTORY = pathlib.Path(__file__).parent.absolute() @@ -362,6 +362,40 @@ def prerelease_deps(session): https://github.com/googleapis/python-bigquery/issues/95 """ + # Because we test minimum dependency versions on the minimum Python + # version, the first version we test with in the unit tests sessions has a + # constraints file containing all dependencies and extras. + with open( + CURRENT_DIRECTORY + / "testing" + / f"constraints-{UNIT_TEST_PYTHON_VERSIONS[0]}.txt", + encoding="utf-8", + ) as constraints_file: + constraints_text = constraints_file.read() + + # Ignore leading whitespace and comment lines. + deps = [ + match.group(1) + for match in re.finditer( + r"^\s*(\S+)(?===\S+)", constraints_text, flags=re.MULTILINE + ) + ] + + session.install(*deps) + + session.install( + "--pre", + "--upgrade", + "freezegun", + "google-cloud-datacatalog", + "google-cloud-resource-manager", + "google-cloud-storage", + "google-cloud-testutils", + "psutil", + "pytest", + "pytest-cov", + ) + # PyArrow prerelease packages are published to an alternative PyPI host. # https://arrow.apache.org/docs/python/install.html#installing-nightly-packages session.install( @@ -386,48 +420,18 @@ def prerelease_deps(session): session.install( "--pre", "--upgrade", + "--no-deps", "google-api-core", "google-cloud-bigquery-storage", "google-cloud-core", "google-resumable-media", "db-dtypes", - # Exclude version 1.49.0rc1 which has a known issue. See https://github.com/grpc/grpc/pull/30642 - "grpcio!=1.49.0rc1", - ) - session.install( - "freezegun", - "google-cloud-datacatalog", - "google-cloud-resource-manager", - "google-cloud-storage", - "google-cloud-testutils", - "psutil", - "pytest", - "pytest-cov", + "grpcio", + "protobuf", ) - # Because we test minimum dependency versions on the minimum Python - # version, the first version we test with in the unit tests sessions has a - # constraints file containing all dependencies and extras. - with open( - CURRENT_DIRECTORY - / "testing" - / f"constraints-{UNIT_TEST_PYTHON_VERSIONS[0]}.txt", - encoding="utf-8", - ) as constraints_file: - constraints_text = constraints_file.read() - - # Ignore leading whitespace and comment lines. - deps = [ - match.group(1) - for match in re.finditer( - r"^\s*(\S+)(?===\S+)", constraints_text, flags=re.MULTILINE - ) - ] - - # We use --no-deps to ensure that pre-release versions aren't overwritten - # by the version ranges in setup.py. - session.install(*deps) - session.install("--no-deps", "-e", ".[all]") + # Ensure that this library is installed from source + session.install("-e", ".", "--no-deps") # Print out prerelease package versions. session.run("python", "-m", "pip", "freeze") diff --git a/packages/google-cloud-bigquery/owlbot.py b/packages/google-cloud-bigquery/owlbot.py index c8efaa98d442..fceeaa1b6d5c 100644 --- a/packages/google-cloud-bigquery/owlbot.py +++ b/packages/google-cloud-bigquery/owlbot.py @@ -55,6 +55,9 @@ "geopandas": "https://geopandas.org/", "pandas": "https://pandas.pydata.org/pandas-docs/stable/", }, + system_test_python_versions=["3.9", "3.13"], + unit_test_python_versions=["3.9", "3.10", "3.11", "3.12", "3.13"], + default_python_version="3.9", ) # BigQuery has a custom multiprocessing note @@ -86,41 +89,13 @@ python3.9 -m pip install --upgrade --quiet nox virtualenv==20.26.6""", "python3.9 -m pip install --upgrade --quiet nox virtualenv", ) -s.replace( - "CONTRIBUTING.rst", - "3.7, 3.8, 3.9, 3.10, 3.11, 3.12 and 3.13 on both UNIX and Windows.", - "3.9, 3.10, 3.11, 3.12 and 3.13 on both UNIX and Windows.", -) -s.replace( - "CONTRIBUTING.rst", - r" \$ nox -s system-3.8 -- -k ", - r" $ nox -s system-3.9 -- -k ", -) -s.replace( - "CONTRIBUTING.rst", - r"""System tests are only configured to run under Python 3.8. - For expediency, we do not run them in older versions of Python 3.""", - r"System tests are configured to run under Python 3.9, 3.11, 3.12.", -) + s.replace( "CONTRIBUTING.rst", r"\$ nox -s py-3.8", r"$ nox -s py-3.9", ) -s.replace( - "CONTRIBUTING.rst", - r"""- `Python 3.7`_ -- `Python 3.8`_ -""", - r"", -) -s.replace( - "CONTRIBUTING.rst", - r""".. _Python 3.7: https://docs.python.org/3.7/ -.. _Python 3.8: https://docs.python.org/3.8/ -""", - r"", -) + s.replace( "scripts/readme-gen/templates/install_deps.tmpl.rst", r"Samples are compatible with Python 3.7", diff --git a/packages/google-cloud-bigquery/pyproject.toml b/packages/google-cloud-bigquery/pyproject.toml index 8822fc57dfc8..17bf4fd202c1 100644 --- a/packages/google-cloud-bigquery/pyproject.toml +++ b/packages/google-cloud-bigquery/pyproject.toml @@ -37,17 +37,18 @@ classifiers = [ "Programming Language :: Python :: 3.10", "Programming Language :: Python :: 3.11", "Programming Language :: Python :: 3.12", + "Programming Language :: Python :: 3.13", "Operating System :: OS Independent", "Topic :: Internet", ] dependencies = [ - "google-api-core[grpc] >= 2.11.1, < 3.0.0dev", - "google-auth >= 2.14.1, < 3.0.0dev", - "google-cloud-core >= 2.4.1, < 3.0.0dev", - "google-resumable-media >= 2.0.0, < 3.0dev", + "google-api-core[grpc] >= 2.11.1, < 3.0.0", + "google-auth >= 2.14.1, < 3.0.0", + "google-cloud-core >= 2.4.1, < 3.0.0", + "google-resumable-media >= 2.0.0, < 3.0.0", "packaging >= 24.2.0", - "python-dateutil >= 2.8.2, < 3.0dev", - "requests >= 2.21.0, < 3.0.0dev", + "python-dateutil >= 2.8.2, < 3.0.0", + "requests >= 2.21.0, < 3.0.0", ] dynamic = ["version"] @@ -59,37 +60,37 @@ Repository = "https://github.com/googleapis/python-bigquery" # moved back to optional due to bloat. See # https://github.com/googleapis/python-bigquery/issues/1196 for more background. bqstorage = [ - "google-cloud-bigquery-storage >= 2.18.0, < 3.0.0dev", + "google-cloud-bigquery-storage >= 2.18.0, < 3.0.0", # Due to an issue in pip's dependency resolver, the `grpc` extra is not # installed, even though `google-cloud-bigquery-storage` specifies it # as `google-api-core[grpc]`. We thus need to explicitly specify it here. # See: https://github.com/googleapis/python-bigquery/issues/83 The # grpc.Channel.close() method isn't added until 1.32.0. # https://github.com/grpc/grpc/pull/15254 - "grpcio >= 1.47.0, < 2.0dev", - "grpcio >= 1.49.1, < 2.0dev; python_version >= '3.11'", + "grpcio >= 1.47.0, < 2.0.0", + "grpcio >= 1.49.1, < 2.0.0; python_version >= '3.11'", "pyarrow >= 4.0.0", ] pandas = [ "pandas >= 1.1.4", - "pandas-gbq >= 0.26.1; python_version >= '3.8'", - "grpcio >= 1.47.0, < 2.0dev", - "grpcio >= 1.49.1, < 2.0dev; python_version >= '3.11'", + "pandas-gbq >= 0.26.1", + "grpcio >= 1.47.0, < 2.0.0", + "grpcio >= 1.49.1, < 2.0.0; python_version >= '3.11'", "pyarrow >= 3.0.0", - "db-dtypes >= 1.0.4, < 2.0.0dev", + "db-dtypes >= 1.0.4, < 2.0.0", ] ipywidgets = ["ipywidgets >= 7.7.1", "ipykernel >= 6.2.0"] -geopandas = ["geopandas >= 0.9.0, < 2.0dev", "Shapely >= 1.8.4, < 3.0.0dev"] +geopandas = ["geopandas >= 0.9.0, < 2.0.0", "Shapely >= 1.8.4, < 3.0.0"] ipython = ["ipython >= 7.23.1", "bigquery-magics >= 0.6.0"] -tqdm = ["tqdm >= 4.7.4, < 5.0.0dev"] +tqdm = ["tqdm >= 4.7.4, < 5.0.0"] opentelemetry = [ "opentelemetry-api >= 1.1.0", "opentelemetry-sdk >= 1.1.0", "opentelemetry-instrumentation >= 0.20b0", ] bigquery_v2 = [ - "proto-plus >= 1.22.3, < 2.0.0dev", - "protobuf >= 3.20.2, < 6.0.0dev, != 4.21.0, != 4.21.1, != 4.21.2, != 4.21.3, != 4.21.4, != 4.21.5", # For the legacy proto-based types. + "proto-plus >= 1.22.3, < 2.0.0", + "protobuf >= 3.20.2, < 7.0.0, != 4.21.0, != 4.21.1, != 4.21.2, != 4.21.3, != 4.21.4, != 4.21.5", # For the legacy proto-based types. ] all = [ "google-cloud-bigquery[bqstorage,pandas,ipywidgets,geopandas,ipython,tqdm,opentelemetry,bigquery_v2]", diff --git a/packages/google-cloud-bigquery/testing/constraints-3.13.txt b/packages/google-cloud-bigquery/testing/constraints-3.13.txt new file mode 100644 index 000000000000..e69de29bb2d1 From 937438a6d7d91710eb008a2a5c5131a324204789 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tim=20Swe=C3=B1a=20=28Swast=29?= Date: Wed, 19 Mar 2025 10:17:38 -0500 Subject: [PATCH 1903/2016] chore: refactor cell data parsing to use classes for easier overrides (#2144) * chore: refactor cell data parsing to use classes for easier overrides * improve error messages with urls for valueerrors * Update tests/unit/_helpers/test_cell_data_parser.py * remove unreachable code --- .../google/cloud/bigquery/_helpers.py | 499 ++++++++++-------- .../google/cloud/bigquery/query.py | 27 +- .../google/cloud/bigquery/table.py | 2 +- .../unit/_helpers/test_cell_data_parser.py | 467 ++++++++++++++++ .../tests/unit/_helpers/test_from_json.py | 157 ------ .../test_scalar_query_param_parser.py | 93 ++++ .../tests/unit/test__helpers.py | 479 ----------------- 7 files changed, 858 insertions(+), 866 deletions(-) create mode 100644 packages/google-cloud-bigquery/tests/unit/_helpers/test_cell_data_parser.py delete mode 100644 packages/google-cloud-bigquery/tests/unit/_helpers/test_from_json.py create mode 100644 packages/google-cloud-bigquery/tests/unit/_helpers/test_scalar_query_param_parser.py diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py b/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py index d40217c4d6ee..4ba3ccf9348f 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py @@ -21,8 +21,9 @@ import math import re import os +import textwrap import warnings -from typing import Optional, Union, Any, Tuple, Type +from typing import Any, Optional, Tuple, Type, Union from dateutil import relativedelta from google.cloud._helpers import UTC # type: ignore @@ -133,243 +134,305 @@ def _not_null(value, field): return value is not None or (field is not None and field.mode != "NULLABLE") -def _int_from_json(value, field): - """Coerce 'value' to an int, if set or not nullable.""" - if _not_null(value, field): - return int(value) - - -def _interval_from_json( - value: Optional[str], field -) -> Optional[relativedelta.relativedelta]: - """Coerce 'value' to an interval, if set or not nullable.""" - if not _not_null(value, field): - return None - if value is None: - raise TypeError(f"got {value} for REQUIRED field: {repr(field)}") - - parsed = _INTERVAL_PATTERN.match(value) - if parsed is None: - raise ValueError(f"got interval: '{value}' with unexpected format") - - calendar_sign = -1 if parsed.group("calendar_sign") == "-" else 1 - years = calendar_sign * int(parsed.group("years")) - months = calendar_sign * int(parsed.group("months")) - days = int(parsed.group("days")) - time_sign = -1 if parsed.group("time_sign") == "-" else 1 - hours = time_sign * int(parsed.group("hours")) - minutes = time_sign * int(parsed.group("minutes")) - seconds = time_sign * int(parsed.group("seconds")) - fraction = parsed.group("fraction") - microseconds = time_sign * int(fraction.ljust(6, "0")[:6]) if fraction else 0 - - return relativedelta.relativedelta( - years=years, - months=months, - days=days, - hours=hours, - minutes=minutes, - seconds=seconds, - microseconds=microseconds, - ) - - -def _float_from_json(value, field): - """Coerce 'value' to a float, if set or not nullable.""" - if _not_null(value, field): - return float(value) - - -def _decimal_from_json(value, field): - """Coerce 'value' to a Decimal, if set or not nullable.""" - if _not_null(value, field): - return decimal.Decimal(value) - - -def _bool_from_json(value, field): - """Coerce 'value' to a bool, if set or not nullable.""" - if _not_null(value, field): - return value.lower() in ["t", "true", "1"] - - -def _string_from_json(value, _): - """NOOP string -> string coercion""" - return value - - -def _bytes_from_json(value, field): - """Base64-decode value""" - if _not_null(value, field): - return base64.standard_b64decode(_to_bytes(value)) - - -def _timestamp_from_json(value, field): - """Coerce 'value' to a datetime, if set or not nullable.""" - if _not_null(value, field): - # value will be a integer in seconds, to microsecond precision, in UTC. - return _datetime_from_microseconds(int(value)) +class CellDataParser: + """Converter from BigQuery REST resource to Python value for RowIterator and similar classes. + See: "rows" field of + https://cloud.google.com/bigquery/docs/reference/rest/v2/tabledata/list and + https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs/getQueryResults. + """ -def _timestamp_query_param_from_json(value, field): - """Coerce 'value' to a datetime, if set or not nullable. - - Args: - value (str): The timestamp. - - field (google.cloud.bigquery.schema.SchemaField): - The field corresponding to the value. + def to_py(self, resource, field): + def default_converter(value, field): + _warn_unknown_field_type(field) + return value - Returns: - Optional[datetime.datetime]: - The parsed datetime object from - ``value`` if the ``field`` is not null (otherwise it is - :data:`None`). - """ - if _not_null(value, field): - # Canonical formats for timestamps in BigQuery are flexible. See: - # g.co/cloud/bigquery/docs/reference/standard-sql/data-types#timestamp-type - # The separator between the date and time can be 'T' or ' '. - value = value.replace(" ", "T", 1) - # The UTC timezone may be formatted as Z or +00:00. - value = value.replace("Z", "") - value = value.replace("+00:00", "") - - if "." in value: - # YYYY-MM-DDTHH:MM:SS.ffffff - return datetime.datetime.strptime(value, _RFC3339_MICROS_NO_ZULU).replace( - tzinfo=UTC - ) + converter = getattr( + self, f"{field.field_type.lower()}_to_py", default_converter + ) + if field.mode == "REPEATED": + return [converter(item["v"], field) for item in resource] else: - # YYYY-MM-DDTHH:MM:SS - return datetime.datetime.strptime(value, _RFC3339_NO_FRACTION).replace( - tzinfo=UTC + return converter(resource, field) + + def bool_to_py(self, value, field): + """Coerce 'value' to a bool, if set or not nullable.""" + if _not_null(value, field): + # TODO(tswast): Why does _not_null care if the field is NULLABLE or + # REQUIRED? Do we actually need such client-side validation? + if value is None: + raise TypeError(f"got None for required boolean field {field}") + return value.lower() in ("t", "true", "1") + + def boolean_to_py(self, value, field): + """Coerce 'value' to a bool, if set or not nullable.""" + return self.bool_to_py(value, field) + + def integer_to_py(self, value, field): + """Coerce 'value' to an int, if set or not nullable.""" + if _not_null(value, field): + return int(value) + + def int64_to_py(self, value, field): + """Coerce 'value' to an int, if set or not nullable.""" + return self.integer_to_py(value, field) + + def interval_to_py( + self, value: Optional[str], field + ) -> Optional[relativedelta.relativedelta]: + """Coerce 'value' to an interval, if set or not nullable.""" + if not _not_null(value, field): + return None + if value is None: + raise TypeError(f"got {value} for REQUIRED field: {repr(field)}") + + parsed = _INTERVAL_PATTERN.match(value) + if parsed is None: + raise ValueError( + textwrap.dedent( + f""" + Got interval: '{value}' with unexpected format. + Expected interval in canonical format of "[sign]Y-M [sign]D [sign]H:M:S[.F]". + See: + https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#interval_type + for more information. + """ + ), ) - else: - return None + calendar_sign = -1 if parsed.group("calendar_sign") == "-" else 1 + years = calendar_sign * int(parsed.group("years")) + months = calendar_sign * int(parsed.group("months")) + days = int(parsed.group("days")) + time_sign = -1 if parsed.group("time_sign") == "-" else 1 + hours = time_sign * int(parsed.group("hours")) + minutes = time_sign * int(parsed.group("minutes")) + seconds = time_sign * int(parsed.group("seconds")) + fraction = parsed.group("fraction") + microseconds = time_sign * int(fraction.ljust(6, "0")[:6]) if fraction else 0 + + return relativedelta.relativedelta( + years=years, + months=months, + days=days, + hours=hours, + minutes=minutes, + seconds=seconds, + microseconds=microseconds, + ) -def _datetime_from_json(value, field): - """Coerce 'value' to a datetime, if set or not nullable. + def float_to_py(self, value, field): + """Coerce 'value' to a float, if set or not nullable.""" + if _not_null(value, field): + return float(value) - Args: - value (str): The timestamp. - field (google.cloud.bigquery.schema.SchemaField): - The field corresponding to the value. + def float64_to_py(self, value, field): + """Coerce 'value' to a float, if set or not nullable.""" + return self.float_to_py(value, field) - Returns: - Optional[datetime.datetime]: - The parsed datetime object from - ``value`` if the ``field`` is not null (otherwise it is - :data:`None`). - """ - if _not_null(value, field): - if "." in value: - # YYYY-MM-DDTHH:MM:SS.ffffff - return datetime.datetime.strptime(value, _RFC3339_MICROS_NO_ZULU) - else: - # YYYY-MM-DDTHH:MM:SS - return datetime.datetime.strptime(value, _RFC3339_NO_FRACTION) - else: - return None + def numeric_to_py(self, value, field): + """Coerce 'value' to a Decimal, if set or not nullable.""" + if _not_null(value, field): + return decimal.Decimal(value) + def bignumeric_to_py(self, value, field): + """Coerce 'value' to a Decimal, if set or not nullable.""" + return self.numeric_to_py(value, field) -def _date_from_json(value, field): - """Coerce 'value' to a datetime date, if set or not nullable""" - if _not_null(value, field): - # value will be a string, in YYYY-MM-DD form. - return _date_from_iso8601_date(value) + def string_to_py(self, value, _): + """NOOP string -> string coercion""" + return value + def geography_to_py(self, value, _): + """NOOP string -> string coercion""" + return value -def _time_from_json(value, field): - """Coerce 'value' to a datetime date, if set or not nullable""" - if _not_null(value, field): - if len(value) == 8: # HH:MM:SS - fmt = _TIMEONLY_WO_MICROS - elif len(value) == 15: # HH:MM:SS.micros - fmt = _TIMEONLY_W_MICROS + def bytes_to_py(self, value, field): + """Base64-decode value""" + if _not_null(value, field): + return base64.standard_b64decode(_to_bytes(value)) + + def timestamp_to_py(self, value, field): + """Coerce 'value' to a datetime, if set or not nullable.""" + if _not_null(value, field): + # value will be a integer in seconds, to microsecond precision, in UTC. + return _datetime_from_microseconds(int(value)) + + def datetime_to_py(self, value, field): + """Coerce 'value' to a datetime, if set or not nullable. + + Args: + value (str): The timestamp. + field (google.cloud.bigquery.schema.SchemaField): + The field corresponding to the value. + + Returns: + Optional[datetime.datetime]: + The parsed datetime object from + ``value`` if the ``field`` is not null (otherwise it is + :data:`None`). + """ + if _not_null(value, field): + if "." in value: + # YYYY-MM-DDTHH:MM:SS.ffffff + return datetime.datetime.strptime(value, _RFC3339_MICROS_NO_ZULU) + else: + # YYYY-MM-DDTHH:MM:SS + return datetime.datetime.strptime(value, _RFC3339_NO_FRACTION) else: - raise ValueError("Unknown time format: {}".format(value)) - return datetime.datetime.strptime(value, fmt).time() + return None + def date_to_py(self, value, field): + """Coerce 'value' to a datetime date, if set or not nullable""" + if _not_null(value, field): + # value will be a string, in YYYY-MM-DD form. + return _date_from_iso8601_date(value) + + def time_to_py(self, value, field): + """Coerce 'value' to a datetime date, if set or not nullable""" + if _not_null(value, field): + if len(value) == 8: # HH:MM:SS + fmt = _TIMEONLY_WO_MICROS + elif len(value) == 15: # HH:MM:SS.micros + fmt = _TIMEONLY_W_MICROS + else: + raise ValueError( + textwrap.dedent( + f""" + Got {repr(value)} with unknown time format. + Expected HH:MM:SS or HH:MM:SS.micros. See + https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#time_type + for more information. + """ + ), + ) + return datetime.datetime.strptime(value, fmt).time() + + def record_to_py(self, value, field): + """Coerce 'value' to a mapping, if set or not nullable.""" + if _not_null(value, field): + record = {} + record_iter = zip(field.fields, value["f"]) + for subfield, cell in record_iter: + record[subfield.name] = self.to_py(cell["v"], subfield) + return record + + def struct_to_py(self, value, field): + """Coerce 'value' to a mapping, if set or not nullable.""" + return self.record_to_py(value, field) + + def json_to_py(self, value, field): + """Coerce 'value' to a Pythonic JSON representation.""" + if _not_null(value, field): + return json.loads(value) + else: + return None -def _record_from_json(value, field): - """Coerce 'value' to a mapping, if set or not nullable.""" - if _not_null(value, field): - record = {} - record_iter = zip(field.fields, value["f"]) - for subfield, cell in record_iter: - record[subfield.name] = _field_from_json(cell["v"], subfield) - return record + def _range_element_to_py(self, value, field_element_type): + """Coerce 'value' to a range element value.""" + # Avoid circular imports by importing here. + from google.cloud.bigquery import schema + if value == "UNBOUNDED": + return None + if field_element_type.element_type in _SUPPORTED_RANGE_ELEMENTS: + return self.to_py( + value, + schema.SchemaField("placeholder", field_element_type.element_type), + ) + else: + raise ValueError( + textwrap.dedent( + f""" + Got unsupported range element type: {field_element_type.element_type}. + Exptected one of {repr(_SUPPORTED_RANGE_ELEMENTS)}. See: + https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#declare_a_range_type + for more information. + """ + ), + ) -def _json_from_json(value, field): - """Coerce 'value' to a Pythonic JSON representation.""" - if _not_null(value, field): - return json.loads(value) - else: - return None + def range_to_py(self, value, field): + """Coerce 'value' to a range, if set or not nullable. + + Args: + value (str): The literal representation of the range. + field (google.cloud.bigquery.schema.SchemaField): + The field corresponding to the value. + + Returns: + Optional[dict]: + The parsed range object from ``value`` if the ``field`` is not + null (otherwise it is :data:`None`). + """ + if _not_null(value, field): + if _RANGE_PATTERN.match(value): + start, end = value[1:-1].split(", ") + start = self._range_element_to_py(start, field.range_element_type) + end = self._range_element_to_py(end, field.range_element_type) + return {"start": start, "end": end} + else: + raise ValueError( + textwrap.dedent( + f""" + Got unknown format for range value: {value}. + Expected format '[lower_bound, upper_bound)'. See: + https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#range_with_literal + for more information. + """ + ), + ) -def _range_element_from_json(value, field): - """Coerce 'value' to a range element value.""" - if value == "UNBOUNDED": - return None - if field.element_type in _SUPPORTED_RANGE_ELEMENTS: - return _CELLDATA_FROM_JSON[field.element_type](value, field.element_type) - else: - raise ValueError(f"Unsupported range element type: {field.element_type}") +CELL_DATA_PARSER = CellDataParser() -def _range_from_json(value, field): - """Coerce 'value' to a range, if set or not nullable. +class ScalarQueryParamParser(CellDataParser): + """Override of CellDataParser to handle the differences in the response from query params. - Args: - value (str): The literal representation of the range. - field (google.cloud.bigquery.schema.SchemaField): - The field corresponding to the value. - - Returns: - Optional[dict]: - The parsed range object from ``value`` if the ``field`` is not - null (otherwise it is :data:`None`). + See: "value" field of + https://cloud.google.com/bigquery/docs/reference/rest/v2/QueryParameter#QueryParameterValue """ - if _not_null(value, field): - if _RANGE_PATTERN.match(value): - start, end = value[1:-1].split(", ") - start = _range_element_from_json(start, field.range_element_type) - end = _range_element_from_json(end, field.range_element_type) - return {"start": start, "end": end} - else: - raise ValueError(f"Unknown format for range value: {value}") - else: - return None + def timestamp_to_py(self, value, field): + """Coerce 'value' to a datetime, if set or not nullable. + + Args: + value (str): The timestamp. + + field (google.cloud.bigquery.schema.SchemaField): + The field corresponding to the value. + + Returns: + Optional[datetime.datetime]: + The parsed datetime object from + ``value`` if the ``field`` is not null (otherwise it is + :data:`None`). + """ + if _not_null(value, field): + # Canonical formats for timestamps in BigQuery are flexible. See: + # g.co/cloud/bigquery/docs/reference/standard-sql/data-types#timestamp-type + # The separator between the date and time can be 'T' or ' '. + value = value.replace(" ", "T", 1) + # The UTC timezone may be formatted as Z or +00:00. + value = value.replace("Z", "") + value = value.replace("+00:00", "") + + if "." in value: + # YYYY-MM-DDTHH:MM:SS.ffffff + return datetime.datetime.strptime( + value, _RFC3339_MICROS_NO_ZULU + ).replace(tzinfo=UTC) + else: + # YYYY-MM-DDTHH:MM:SS + return datetime.datetime.strptime(value, _RFC3339_NO_FRACTION).replace( + tzinfo=UTC + ) + else: + return None -# Parse BigQuery API response JSON into a Python representation. -_CELLDATA_FROM_JSON = { - "INTEGER": _int_from_json, - "INT64": _int_from_json, - "INTERVAL": _interval_from_json, - "FLOAT": _float_from_json, - "FLOAT64": _float_from_json, - "NUMERIC": _decimal_from_json, - "BIGNUMERIC": _decimal_from_json, - "BOOLEAN": _bool_from_json, - "BOOL": _bool_from_json, - "STRING": _string_from_json, - "GEOGRAPHY": _string_from_json, - "BYTES": _bytes_from_json, - "TIMESTAMP": _timestamp_from_json, - "DATETIME": _datetime_from_json, - "DATE": _date_from_json, - "TIME": _time_from_json, - "RECORD": _record_from_json, - "JSON": _json_from_json, - "RANGE": _range_from_json, -} -_QUERY_PARAMS_FROM_JSON = dict(_CELLDATA_FROM_JSON) -_QUERY_PARAMS_FROM_JSON["TIMESTAMP"] = _timestamp_query_param_from_json +SCALAR_QUERY_PARAM_PARSER = ScalarQueryParamParser() def _field_to_index_mapping(schema): @@ -377,18 +440,6 @@ def _field_to_index_mapping(schema): return {f.name: i for i, f in enumerate(schema)} -def _field_from_json(resource, field): - def default_converter(value, field): - _warn_unknown_field_type(field) - return value - - converter = _CELLDATA_FROM_JSON.get(field.field_type, default_converter) - if field.mode == "REPEATED": - return [converter(item["v"], field) for item in resource] - else: - return converter(resource, field) - - def _row_tuple_from_json(row, schema): """Convert JSON row data to row with appropriate types. @@ -410,7 +461,7 @@ def _row_tuple_from_json(row, schema): row_data = [] for field, cell in zip(schema, row["f"]): - row_data.append(_field_from_json(cell["v"], field)) + row_data.append(CELL_DATA_PARSER.to_py(cell["v"], field)) return tuple(row_data) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/query.py b/packages/google-cloud-bigquery/google/cloud/bigquery/query.py index f1090a7dcc8b..8745c09f5fb1 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/query.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/query.py @@ -18,11 +18,11 @@ import copy import datetime import decimal -from typing import Any, Optional, Dict, Union +from typing import Any, cast, Optional, Dict, Union from google.cloud.bigquery.table import _parse_schema_resource +from google.cloud.bigquery import _helpers from google.cloud.bigquery._helpers import _rows_from_json -from google.cloud.bigquery._helpers import _QUERY_PARAMS_FROM_JSON from google.cloud.bigquery._helpers import _SCALAR_VALUE_TO_JSON_PARAM from google.cloud.bigquery._helpers import _SUPPORTED_RANGE_ELEMENTS @@ -571,6 +571,9 @@ def from_api_repr(cls, resource: dict) -> "ScalarQueryParameter": Returns: google.cloud.bigquery.query.ScalarQueryParameter: Instance """ + # Import here to avoid circular imports. + from google.cloud.bigquery import schema + name = resource.get("name") type_ = resource["parameterType"]["type"] @@ -578,7 +581,9 @@ def from_api_repr(cls, resource: dict) -> "ScalarQueryParameter": # from the back-end - the latter omits it for None values. value = resource.get("parameterValue", {}).get("value") if value is not None: - converted = _QUERY_PARAMS_FROM_JSON[type_](value, None) + converted = _helpers.SCALAR_QUERY_PARAM_PARSER.to_py( + value, schema.SchemaField(cast(str, name), type_) + ) else: converted = None @@ -693,13 +698,20 @@ def _from_api_repr_struct(cls, resource): @classmethod def _from_api_repr_scalar(cls, resource): + """Converts REST resource into a list of scalar values.""" + # Import here to avoid circular imports. + from google.cloud.bigquery import schema + name = resource.get("name") array_type = resource["parameterType"]["arrayType"]["type"] parameter_value = resource.get("parameterValue", {}) array_values = parameter_value.get("arrayValues", ()) values = [value["value"] for value in array_values] converted = [ - _QUERY_PARAMS_FROM_JSON[array_type](value, None) for value in values + _helpers.SCALAR_QUERY_PARAM_PARSER.to_py( + value, schema.SchemaField(name, array_type) + ) + for value in values ] return cls(name, array_type, converted) @@ -850,6 +862,9 @@ def from_api_repr(cls, resource: dict) -> "StructQueryParameter": Returns: google.cloud.bigquery.query.StructQueryParameter: Instance """ + # Import here to avoid circular imports. + from google.cloud.bigquery import schema + name = resource.get("name") instance = cls(name) type_resources = {} @@ -877,7 +892,9 @@ def from_api_repr(cls, resource: dict) -> "StructQueryParameter": converted = ArrayQueryParameter.from_api_repr(struct_resource) else: value = value["value"] - converted = _QUERY_PARAMS_FROM_JSON[type_](value, None) + converted = _helpers.SCALAR_QUERY_PARAM_PARSER.to_py( + value, schema.SchemaField(cast(str, name), type_) + ) instance.struct_values[key] = converted return instance diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py index 4d79d60dab39..f139e44ade25 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py @@ -3533,7 +3533,7 @@ def _row_iterator_page_columns(schema, response): def get_column_data(field_index, field): for row in rows: - yield _helpers._field_from_json(row["f"][field_index]["v"], field) + yield _helpers.CELL_DATA_PARSER.to_py(row["f"][field_index]["v"], field) for field_index, field in enumerate(schema): columns.append(get_column_data(field_index, field)) diff --git a/packages/google-cloud-bigquery/tests/unit/_helpers/test_cell_data_parser.py b/packages/google-cloud-bigquery/tests/unit/_helpers/test_cell_data_parser.py new file mode 100644 index 000000000000..14721a26c160 --- /dev/null +++ b/packages/google-cloud-bigquery/tests/unit/_helpers/test_cell_data_parser.py @@ -0,0 +1,467 @@ +# Copyright 2025 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import base64 +import datetime +import decimal +import json + +from dateutil.relativedelta import relativedelta +import pytest + +import google.cloud.bigquery.schema + + +def create_field(mode="NULLABLE", type_="IGNORED", name="test_field", **kwargs): + return google.cloud.bigquery.schema.SchemaField(name, type_, mode=mode, **kwargs) + + +@pytest.fixture +def mut(): + from google.cloud.bigquery import _helpers + + return _helpers + + +@pytest.fixture +def object_under_test(mut): + return mut.CELL_DATA_PARSER + + +ALL_TYPES = { + "BOOL", + "BOOLEAN", + "BYTES", + "INTEGER", + "INT64", + "INTERVAL", + "FLOAT", + "FLOAT64", + "NUMERIC", + "BIGNUMERIC", + "STRING", + "GEOGRAPHY", + "TIMESTAMP", + "DATETIME", + "DATE", + "TIME", + "RECORD", + "STRUCT", + "JSON", + "RANGE", +} + +TYPES_WITH_CLIENT_SIDE_NULL_VALIDATION = ALL_TYPES - { + "STRING", + "GEOGRAPHY", +} + + +@pytest.mark.parametrize( + "type_", + list(sorted(ALL_TYPES)), +) +def test_to_py_w_none_nullable(object_under_test, type_): + assert object_under_test.to_py(None, create_field("NULLABLE", type_)) is None + + +@pytest.mark.parametrize("type_", list(sorted(TYPES_WITH_CLIENT_SIDE_NULL_VALIDATION))) +def test_to_py_w_none_required(object_under_test, type_): + with pytest.raises(TypeError): + object_under_test.to_py(None, create_field("REQUIRED", type_)) + + +def test_interval_to_py_w_invalid_format(object_under_test): + with pytest.raises(ValueError, match="NOT_AN_INTERVAL"): + object_under_test.interval_to_py("NOT_AN_INTERVAL", create_field()) + + +@pytest.mark.parametrize( + ("value", "expected"), + ( + ("0-0 0 0:0:0", relativedelta()), + # SELECT INTERVAL X YEAR + ("-10000-0 0 0:0:0", relativedelta(years=-10000)), + ("-1-0 0 0:0:0", relativedelta(years=-1)), + ("1-0 0 0:0:0", relativedelta(years=1)), + ("10000-0 0 0:0:0", relativedelta(years=10000)), + # SELECT INTERVAL X MONTH + ("-0-11 0 0:0:0", relativedelta(months=-11)), + ("-0-1 0 0:0:0", relativedelta(months=-1)), + ("0-1 0 0:0:0", relativedelta(months=1)), + ("0-11 0 0:0:0", relativedelta(months=11)), + # SELECT INTERVAL X DAY + ("0-0 -3660000 0:0:0", relativedelta(days=-3660000)), + ("0-0 -1 0:0:0", relativedelta(days=-1)), + ("0-0 1 0:0:0", relativedelta(days=1)), + ("0-0 3660000 0:0:0", relativedelta(days=3660000)), + # SELECT INTERVAL X HOUR + ("0-0 0 -87840000:0:0", relativedelta(hours=-87840000)), + ("0-0 0 -1:0:0", relativedelta(hours=-1)), + ("0-0 0 1:0:0", relativedelta(hours=1)), + ("0-0 0 87840000:0:0", relativedelta(hours=87840000)), + # SELECT INTERVAL X MINUTE + ("0-0 0 -0:59:0", relativedelta(minutes=-59)), + ("0-0 0 -0:1:0", relativedelta(minutes=-1)), + ("0-0 0 0:1:0", relativedelta(minutes=1)), + ("0-0 0 0:59:0", relativedelta(minutes=59)), + # SELECT INTERVAL X SECOND + ("0-0 0 -0:0:59", relativedelta(seconds=-59)), + ("0-0 0 -0:0:1", relativedelta(seconds=-1)), + ("0-0 0 0:0:1", relativedelta(seconds=1)), + ("0-0 0 0:0:59", relativedelta(seconds=59)), + # SELECT (INTERVAL -1 SECOND) / 1000000 + ("0-0 0 -0:0:0.000001", relativedelta(microseconds=-1)), + ("0-0 0 -0:0:59.999999", relativedelta(seconds=-59, microseconds=-999999)), + ("0-0 0 -0:0:59.999", relativedelta(seconds=-59, microseconds=-999000)), + ("0-0 0 0:0:59.999", relativedelta(seconds=59, microseconds=999000)), + ("0-0 0 0:0:59.999999", relativedelta(seconds=59, microseconds=999999)), + # Test with multiple digits in each section. + ( + "32-11 45 67:16:23.987654", + relativedelta( + years=32, + months=11, + days=45, + hours=67, + minutes=16, + seconds=23, + microseconds=987654, + ), + ), + ( + "-32-11 -45 -67:16:23.987654", + relativedelta( + years=-32, + months=-11, + days=-45, + hours=-67, + minutes=-16, + seconds=-23, + microseconds=-987654, + ), + ), + # Test with mixed +/- sections. + ( + "9999-9 -999999 9999999:59:59.999999", + relativedelta( + years=9999, + months=9, + days=-999999, + hours=9999999, + minutes=59, + seconds=59, + microseconds=999999, + ), + ), + # Test with fraction that is not microseconds. + ("0-0 0 0:0:42.", relativedelta(seconds=42)), + ("0-0 0 0:0:59.1", relativedelta(seconds=59, microseconds=100000)), + ("0-0 0 0:0:0.12", relativedelta(microseconds=120000)), + ("0-0 0 0:0:0.123", relativedelta(microseconds=123000)), + ("0-0 0 0:0:0.1234", relativedelta(microseconds=123400)), + # Fractional seconds can cause rounding problems if cast to float. See: + # https://github.com/googleapis/python-db-dtypes-pandas/issues/18 + ("0-0 0 0:0:59.876543", relativedelta(seconds=59, microseconds=876543)), + ( + "0-0 0 01:01:01.010101", + relativedelta(hours=1, minutes=1, seconds=1, microseconds=10101), + ), + ( + "0-0 0 09:09:09.090909", + relativedelta(hours=9, minutes=9, seconds=9, microseconds=90909), + ), + ( + "0-0 0 11:11:11.111111", + relativedelta(hours=11, minutes=11, seconds=11, microseconds=111111), + ), + ( + "0-0 0 19:16:23.987654", + relativedelta(hours=19, minutes=16, seconds=23, microseconds=987654), + ), + # Nanoseconds are not expected, but should not cause error. + ("0-0 0 0:0:00.123456789", relativedelta(microseconds=123456)), + ("0-0 0 0:0:59.87654321", relativedelta(seconds=59, microseconds=876543)), + ), +) +def test_interval_to_py_w_string_values(object_under_test, value, expected): + got = object_under_test.interval_to_py(value, create_field()) + assert got == expected + + +def test_integer_to_py_w_string_value(object_under_test): + coerced = object_under_test.integer_to_py("42", object()) + assert coerced == 42 + + +def test_integer_to_py_w_float_value(object_under_test): + coerced = object_under_test.integer_to_py(42.0, object()) + assert coerced == 42 + + +def test_json_to_py_w_json_field(object_under_test): + data_field = create_field("REQUIRED", "data", "JSON") + + value = json.dumps( + {"v": {"key": "value"}}, + ) + + expected_output = {"v": {"key": "value"}} + coerced_output = object_under_test.json_to_py(value, data_field) + assert coerced_output == expected_output + + +def test_json_to_py_w_string_value(object_under_test): + coerced = object_under_test.json_to_py('"foo"', create_field()) + assert coerced == "foo" + + +def test_float_to_py_w_string_value(object_under_test): + coerced = object_under_test.float_to_py("3.1415", object()) + assert coerced == 3.1415 + + +def test_float_to_py_w_float_value(object_under_test): + coerced = object_under_test.float_to_py(3.1415, object()) + assert coerced == 3.1415 + + +def test_numeric_to_py_w_string_value(object_under_test): + coerced = object_under_test.numeric_to_py("3.1415", object()) + assert coerced == decimal.Decimal("3.1415") + + +def test_numeric_to_py_w_float_value(object_under_test): + coerced = object_under_test.numeric_to_py(3.1415, object()) + # There is no exact float representation of 3.1415. + assert coerced == decimal.Decimal(3.1415) + + +def test_bool_to_py_w_value_t(object_under_test): + coerced = object_under_test.bool_to_py("T", object()) + assert coerced is True + + +def test_bool_to_py_w_value_true(object_under_test): + coerced = object_under_test.bool_to_py("True", object()) + assert coerced is True + + +def test_bool_to_py_w_value_1(object_under_test): + coerced = object_under_test.bool_to_py("1", object()) + assert coerced is True + + +def test_bool_to_py_w_value_other(object_under_test): + coerced = object_under_test.bool_to_py("f", object()) + assert coerced is False + + +def test_string_to_py_w_string_value(object_under_test): + coerced = object_under_test.string_to_py("Wonderful!", object()) + assert coerced == "Wonderful!" + + +def test_bytes_to_py_w_base64_encoded_bytes(object_under_test): + expected = b"Wonderful!" + encoded = base64.standard_b64encode(expected) + coerced = object_under_test.bytes_to_py(encoded, object()) + assert coerced == expected + + +def test_bytes_to_py_w_base64_encoded_text(object_under_test): + expected = b"Wonderful!" + encoded = base64.standard_b64encode(expected).decode("ascii") + coerced = object_under_test.bytes_to_py(encoded, object()) + assert coerced == expected + + +def test_timestamp_to_py_w_string_int_value(object_under_test): + from google.cloud._helpers import _EPOCH + + coerced = object_under_test.timestamp_to_py("1234567", object()) + assert coerced == _EPOCH + datetime.timedelta(seconds=1, microseconds=234567) + + +def test_timestamp_to_py_w_int_value(object_under_test): + from google.cloud._helpers import _EPOCH + + coerced = object_under_test.timestamp_to_py(1234567, object()) + assert coerced == _EPOCH + datetime.timedelta(seconds=1, microseconds=234567) + + +def test_datetime_to_py_w_string_value(object_under_test): + coerced = object_under_test.datetime_to_py("2016-12-02T18:51:33", object()) + assert coerced == datetime.datetime(2016, 12, 2, 18, 51, 33) + + +def test_datetime_to_py_w_microseconds(object_under_test): + coerced = object_under_test.datetime_to_py("2015-05-22T10:11:12.987654", object()) + assert coerced == datetime.datetime(2015, 5, 22, 10, 11, 12, 987654) + + +def test_date_to_py_w_string_value(object_under_test): + coerced = object_under_test.date_to_py("1987-09-22", object()) + assert coerced == datetime.date(1987, 9, 22) + + +def test_time_to_py_w_string_value(object_under_test): + coerced = object_under_test.time_to_py("12:12:27", object()) + assert coerced == datetime.time(12, 12, 27) + + +def test_time_to_py_w_subsecond_string_value(object_under_test): + coerced = object_under_test.time_to_py("12:12:27.123456", object()) + assert coerced == datetime.time(12, 12, 27, 123456) + + +def test_time_to_py_w_bogus_string_value(object_under_test): + with pytest.raises(ValueError): + object_under_test.time_to_py("12:12:27.123", object()) + + +def test_range_to_py_w_wrong_format(object_under_test): + range_field = create_field( + "NULLABLE", + "RANGE", + range_element_type="DATE", + ) + with pytest.raises(ValueError): + object_under_test.range_to_py("[2009-06-172019-06-17)", range_field) + + +def test_range_to_py_w_wrong_element_type(object_under_test): + range_field = create_field( + "NULLABLE", + "RANGE", + range_element_type=google.cloud.bigquery.schema.FieldElementType( + element_type="TIME" + ), + ) + with pytest.raises(ValueError): + object_under_test.range_to_py("[15:31:38, 15:50:38)", range_field) + + +def test_range_to_py_w_unbounded_value(object_under_test): + range_field = create_field( + "NULLABLE", + "RANGE", + range_element_type="DATE", + ) + coerced = object_under_test.range_to_py("[UNBOUNDED, 2019-06-17)", range_field) + assert coerced == {"start": None, "end": datetime.date(2019, 6, 17)} + + +def test_range_to_py_w_date_value(object_under_test): + range_field = create_field( + "NULLABLE", + "RANGE", + range_element_type="DATE", + ) + coerced = object_under_test.range_to_py("[2009-06-17, 2019-06-17)", range_field) + assert coerced == { + "start": datetime.date(2009, 6, 17), + "end": datetime.date(2019, 6, 17), + } + + +def test_range_to_py_w_datetime_value(object_under_test): + range_field = create_field( + "NULLABLE", + "RANGE", + range_element_type=google.cloud.bigquery.schema.FieldElementType( + element_type="DATETIME" + ), + ) + coerced = object_under_test.range_to_py( + "[2009-06-17T13:45:30, 2019-06-17T13:45:30)", range_field + ) + assert coerced == { + "start": datetime.datetime(2009, 6, 17, 13, 45, 30), + "end": datetime.datetime(2019, 6, 17, 13, 45, 30), + } + + +def test_range_to_py_w_timestamp_value(object_under_test): + from google.cloud._helpers import _EPOCH + + range_field = create_field( + "NULLABLE", + "RANGE", + range_element_type=google.cloud.bigquery.schema.FieldElementType( + element_type="TIMESTAMP" + ), + ) + coerced = object_under_test.range_to_py("[1234567, 1234789)", range_field) + assert coerced == { + "start": _EPOCH + datetime.timedelta(seconds=1, microseconds=234567), + "end": _EPOCH + datetime.timedelta(seconds=1, microseconds=234789), + } + + +def test_record_to_py_w_nullable_subfield_none(object_under_test): + subfield = create_field("NULLABLE", "INTEGER", name="age") + field = create_field("REQUIRED", fields=[subfield]) + value = {"f": [{"v": None}]} + coerced = object_under_test.record_to_py(value, field) + assert coerced == {"age": None} + + +def test_record_to_py_w_scalar_subfield(object_under_test): + subfield = create_field("REQUIRED", "INTEGER", name="age") + field = create_field("REQUIRED", fields=[subfield]) + value = {"f": [{"v": 42}]} + coerced = object_under_test.record_to_py(value, field) + assert coerced == {"age": 42} + + +def test_record_to_py_w_scalar_subfield_geography(object_under_test): + subfield = create_field("REQUIRED", "GEOGRAPHY", name="geo") + field = create_field("REQUIRED", fields=[subfield]) + value = {"f": [{"v": "POINT(1, 2)"}]} + coerced = object_under_test.record_to_py(value, field) + assert coerced == {"geo": "POINT(1, 2)"} + + +def test_record_to_py_w_repeated_subfield(object_under_test): + subfield = create_field("REPEATED", "STRING", name="color") + field = create_field("REQUIRED", fields=[subfield]) + value = {"f": [{"v": [{"v": "red"}, {"v": "yellow"}, {"v": "blue"}]}]} + coerced = object_under_test.record_to_py(value, field) + assert coerced == {"color": ["red", "yellow", "blue"]} + + +def test_record_to_py_w_record_subfield(object_under_test): + full_name = create_field("REQUIRED", "STRING", name="full_name") + area_code = create_field("REQUIRED", "STRING", name="area_code") + local_number = create_field("REQUIRED", "STRING", name="local_number") + rank = create_field("REQUIRED", "INTEGER", name="rank") + phone = create_field( + "NULLABLE", "RECORD", name="phone", fields=[area_code, local_number, rank] + ) + person = create_field( + "REQUIRED", "RECORD", name="person", fields=[full_name, phone] + ) + value = { + "f": [ + {"v": "Phred Phlyntstone"}, + {"v": {"f": [{"v": "800"}, {"v": "555-1212"}, {"v": 1}]}}, + ] + } + expected = { + "full_name": "Phred Phlyntstone", + "phone": {"area_code": "800", "local_number": "555-1212", "rank": 1}, + } + coerced = object_under_test.record_to_py(value, person) + assert coerced == expected diff --git a/packages/google-cloud-bigquery/tests/unit/_helpers/test_from_json.py b/packages/google-cloud-bigquery/tests/unit/_helpers/test_from_json.py deleted file mode 100644 index 65b054f446bb..000000000000 --- a/packages/google-cloud-bigquery/tests/unit/_helpers/test_from_json.py +++ /dev/null @@ -1,157 +0,0 @@ -# Copyright 2021 Google LLC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from dateutil.relativedelta import relativedelta -import pytest - -from google.cloud.bigquery.schema import SchemaField - - -def create_field(mode="NULLABLE", type_="IGNORED"): - return SchemaField("test_field", type_, mode=mode) - - -@pytest.fixture -def mut(): - from google.cloud.bigquery import _helpers - - return _helpers - - -def test_interval_from_json_w_none_nullable(mut): - got = mut._interval_from_json(None, create_field()) - assert got is None - - -def test_interval_from_json_w_none_required(mut): - with pytest.raises(TypeError): - mut._interval_from_json(None, create_field(mode="REQUIRED")) - - -def test_interval_from_json_w_invalid_format(mut): - with pytest.raises(ValueError, match="NOT_AN_INTERVAL"): - mut._interval_from_json("NOT_AN_INTERVAL", create_field()) - - -@pytest.mark.parametrize( - ("value", "expected"), - ( - ("0-0 0 0:0:0", relativedelta()), - # SELECT INTERVAL X YEAR - ("-10000-0 0 0:0:0", relativedelta(years=-10000)), - ("-1-0 0 0:0:0", relativedelta(years=-1)), - ("1-0 0 0:0:0", relativedelta(years=1)), - ("10000-0 0 0:0:0", relativedelta(years=10000)), - # SELECT INTERVAL X MONTH - ("-0-11 0 0:0:0", relativedelta(months=-11)), - ("-0-1 0 0:0:0", relativedelta(months=-1)), - ("0-1 0 0:0:0", relativedelta(months=1)), - ("0-11 0 0:0:0", relativedelta(months=11)), - # SELECT INTERVAL X DAY - ("0-0 -3660000 0:0:0", relativedelta(days=-3660000)), - ("0-0 -1 0:0:0", relativedelta(days=-1)), - ("0-0 1 0:0:0", relativedelta(days=1)), - ("0-0 3660000 0:0:0", relativedelta(days=3660000)), - # SELECT INTERVAL X HOUR - ("0-0 0 -87840000:0:0", relativedelta(hours=-87840000)), - ("0-0 0 -1:0:0", relativedelta(hours=-1)), - ("0-0 0 1:0:0", relativedelta(hours=1)), - ("0-0 0 87840000:0:0", relativedelta(hours=87840000)), - # SELECT INTERVAL X MINUTE - ("0-0 0 -0:59:0", relativedelta(minutes=-59)), - ("0-0 0 -0:1:0", relativedelta(minutes=-1)), - ("0-0 0 0:1:0", relativedelta(minutes=1)), - ("0-0 0 0:59:0", relativedelta(minutes=59)), - # SELECT INTERVAL X SECOND - ("0-0 0 -0:0:59", relativedelta(seconds=-59)), - ("0-0 0 -0:0:1", relativedelta(seconds=-1)), - ("0-0 0 0:0:1", relativedelta(seconds=1)), - ("0-0 0 0:0:59", relativedelta(seconds=59)), - # SELECT (INTERVAL -1 SECOND) / 1000000 - ("0-0 0 -0:0:0.000001", relativedelta(microseconds=-1)), - ("0-0 0 -0:0:59.999999", relativedelta(seconds=-59, microseconds=-999999)), - ("0-0 0 -0:0:59.999", relativedelta(seconds=-59, microseconds=-999000)), - ("0-0 0 0:0:59.999", relativedelta(seconds=59, microseconds=999000)), - ("0-0 0 0:0:59.999999", relativedelta(seconds=59, microseconds=999999)), - # Test with multiple digits in each section. - ( - "32-11 45 67:16:23.987654", - relativedelta( - years=32, - months=11, - days=45, - hours=67, - minutes=16, - seconds=23, - microseconds=987654, - ), - ), - ( - "-32-11 -45 -67:16:23.987654", - relativedelta( - years=-32, - months=-11, - days=-45, - hours=-67, - minutes=-16, - seconds=-23, - microseconds=-987654, - ), - ), - # Test with mixed +/- sections. - ( - "9999-9 -999999 9999999:59:59.999999", - relativedelta( - years=9999, - months=9, - days=-999999, - hours=9999999, - minutes=59, - seconds=59, - microseconds=999999, - ), - ), - # Test with fraction that is not microseconds. - ("0-0 0 0:0:42.", relativedelta(seconds=42)), - ("0-0 0 0:0:59.1", relativedelta(seconds=59, microseconds=100000)), - ("0-0 0 0:0:0.12", relativedelta(microseconds=120000)), - ("0-0 0 0:0:0.123", relativedelta(microseconds=123000)), - ("0-0 0 0:0:0.1234", relativedelta(microseconds=123400)), - # Fractional seconds can cause rounding problems if cast to float. See: - # https://github.com/googleapis/python-db-dtypes-pandas/issues/18 - ("0-0 0 0:0:59.876543", relativedelta(seconds=59, microseconds=876543)), - ( - "0-0 0 01:01:01.010101", - relativedelta(hours=1, minutes=1, seconds=1, microseconds=10101), - ), - ( - "0-0 0 09:09:09.090909", - relativedelta(hours=9, minutes=9, seconds=9, microseconds=90909), - ), - ( - "0-0 0 11:11:11.111111", - relativedelta(hours=11, minutes=11, seconds=11, microseconds=111111), - ), - ( - "0-0 0 19:16:23.987654", - relativedelta(hours=19, minutes=16, seconds=23, microseconds=987654), - ), - # Nanoseconds are not expected, but should not cause error. - ("0-0 0 0:0:00.123456789", relativedelta(microseconds=123456)), - ("0-0 0 0:0:59.87654321", relativedelta(seconds=59, microseconds=876543)), - ), -) -def test_w_string_values(mut, value, expected): - got = mut._interval_from_json(value, create_field()) - assert got == expected diff --git a/packages/google-cloud-bigquery/tests/unit/_helpers/test_scalar_query_param_parser.py b/packages/google-cloud-bigquery/tests/unit/_helpers/test_scalar_query_param_parser.py new file mode 100644 index 000000000000..8e0d2a34e9c6 --- /dev/null +++ b/packages/google-cloud-bigquery/tests/unit/_helpers/test_scalar_query_param_parser.py @@ -0,0 +1,93 @@ +# Copyright 2025 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import datetime + +import pytest + +import google.cloud.bigquery.schema + + +def create_field(mode="NULLABLE", type_="IGNORED"): + return google.cloud.bigquery.schema.SchemaField("test_field", type_, mode=mode) + + +@pytest.fixture +def mut(): + from google.cloud.bigquery import _helpers + + return _helpers + + +@pytest.fixture +def object_under_test(mut): + return mut.SCALAR_QUERY_PARAM_PARSER + + +def test_timestamp_to_py_w_none_nullable(object_under_test): + assert object_under_test.timestamp_to_py(None, create_field()) is None + + +@pytest.mark.parametrize( + ("value", "expected"), + [ + ( + "2016-12-20 15:58:27.339328+00:00", + datetime.datetime( + 2016, 12, 20, 15, 58, 27, 339328, tzinfo=datetime.timezone.utc + ), + ), + ( + "2016-12-20 15:58:27+00:00", + datetime.datetime(2016, 12, 20, 15, 58, 27, tzinfo=datetime.timezone.utc), + ), + ( + "2016-12-20T15:58:27.339328+00:00", + datetime.datetime( + 2016, 12, 20, 15, 58, 27, 339328, tzinfo=datetime.timezone.utc + ), + ), + ( + "2016-12-20T15:58:27+00:00", + datetime.datetime(2016, 12, 20, 15, 58, 27, tzinfo=datetime.timezone.utc), + ), + ( + "2016-12-20 15:58:27.339328Z", + datetime.datetime( + 2016, 12, 20, 15, 58, 27, 339328, tzinfo=datetime.timezone.utc + ), + ), + ( + "2016-12-20 15:58:27Z", + datetime.datetime(2016, 12, 20, 15, 58, 27, tzinfo=datetime.timezone.utc), + ), + ( + "2016-12-20T15:58:27.339328Z", + datetime.datetime( + 2016, 12, 20, 15, 58, 27, 339328, tzinfo=datetime.timezone.utc + ), + ), + ( + "2016-12-20T15:58:27Z", + datetime.datetime(2016, 12, 20, 15, 58, 27, tzinfo=datetime.timezone.utc), + ), + ], +) +def test_timestamp_to_py_w_timestamp_valid(object_under_test, value, expected): + assert object_under_test.timestamp_to_py(value, create_field()) == expected + + +def test_timestamp_to_py_w_timestamp_invalid(object_under_test): + with pytest.raises(ValueError): + object_under_test.timestamp_to_py("definitely-not-a-timestamp", create_field()) diff --git a/packages/google-cloud-bigquery/tests/unit/test__helpers.py b/packages/google-cloud-bigquery/tests/unit/test__helpers.py index adba6327c2de..4e53236e39e7 100644 --- a/packages/google-cloud-bigquery/tests/unit/test__helpers.py +++ b/packages/google-cloud-bigquery/tests/unit/test__helpers.py @@ -12,7 +12,6 @@ # See the License for the specific language governing permissions and # limitations under the License. -import base64 import datetime import decimal import json @@ -133,484 +132,6 @@ def test_w_value(self): self.assertTrue(self._call_fut(object(), object())) -class Test_int_from_json(unittest.TestCase): - def _call_fut(self, value, field): - from google.cloud.bigquery._helpers import _int_from_json - - return _int_from_json(value, field) - - def test_w_none_nullable(self): - self.assertIsNone(self._call_fut(None, _Field("NULLABLE"))) - - def test_w_none_required(self): - with self.assertRaises(TypeError): - self._call_fut(None, _Field("REQUIRED")) - - def test_w_string_value(self): - coerced = self._call_fut("42", object()) - self.assertEqual(coerced, 42) - - def test_w_float_value(self): - coerced = self._call_fut(42, object()) - self.assertEqual(coerced, 42) - - -class Test_json_from_json(unittest.TestCase): - def _call_fut(self, value, field): - from google.cloud.bigquery._helpers import _json_from_json - - return _json_from_json(value, field) - - def test_w_none_nullable(self): - self.assertIsNone(self._call_fut(None, _Field("NULLABLE"))) - - def test_w_none_required(self): - with self.assertRaises(TypeError): - self._call_fut(None, _Field("REQUIRED")) - - def test_w_json_field(self): - data_field = _Field("REQUIRED", "data", "JSON") - - value = json.dumps( - {"v": {"key": "value"}}, - ) - - expected_output = {"v": {"key": "value"}} - coerced_output = self._call_fut(value, data_field) - self.assertEqual(coerced_output, expected_output) - - def test_w_string_value(self): - coerced = self._call_fut('"foo"', object()) - self.assertEqual(coerced, "foo") - - -class Test_float_from_json(unittest.TestCase): - def _call_fut(self, value, field): - from google.cloud.bigquery._helpers import _float_from_json - - return _float_from_json(value, field) - - def test_w_none_nullable(self): - self.assertIsNone(self._call_fut(None, _Field("NULLABLE"))) - - def test_w_none_required(self): - with self.assertRaises(TypeError): - self._call_fut(None, _Field("REQUIRED")) - - def test_w_string_value(self): - coerced = self._call_fut("3.1415", object()) - self.assertEqual(coerced, 3.1415) - - def test_w_float_value(self): - coerced = self._call_fut(3.1415, object()) - self.assertEqual(coerced, 3.1415) - - -class Test_decimal_from_json(unittest.TestCase): - def _call_fut(self, value, field): - from google.cloud.bigquery._helpers import _decimal_from_json - - return _decimal_from_json(value, field) - - def test_w_none_nullable(self): - self.assertIsNone(self._call_fut(None, _Field("NULLABLE"))) - - def test_w_none_required(self): - with self.assertRaises(TypeError): - self._call_fut(None, _Field("REQUIRED")) - - def test_w_string_value(self): - coerced = self._call_fut("3.1415", object()) - self.assertEqual(coerced, decimal.Decimal("3.1415")) - - def test_w_float_value(self): - coerced = self._call_fut(3.1415, object()) - # There is no exact float representation of 3.1415. - self.assertEqual(coerced, decimal.Decimal(3.1415)) - - -class Test_bool_from_json(unittest.TestCase): - def _call_fut(self, value, field): - from google.cloud.bigquery._helpers import _bool_from_json - - return _bool_from_json(value, field) - - def test_w_none_nullable(self): - self.assertIsNone(self._call_fut(None, _Field("NULLABLE"))) - - def test_w_none_required(self): - with self.assertRaises(AttributeError): - self._call_fut(None, _Field("REQUIRED")) - - def test_w_value_t(self): - coerced = self._call_fut("T", object()) - self.assertTrue(coerced) - - def test_w_value_true(self): - coerced = self._call_fut("True", object()) - self.assertTrue(coerced) - - def test_w_value_1(self): - coerced = self._call_fut("1", object()) - self.assertTrue(coerced) - - def test_w_value_other(self): - coerced = self._call_fut("f", object()) - self.assertFalse(coerced) - - -class Test_string_from_json(unittest.TestCase): - def _call_fut(self, value, field): - from google.cloud.bigquery._helpers import _string_from_json - - return _string_from_json(value, field) - - def test_w_none_nullable(self): - self.assertIsNone(self._call_fut(None, _Field("NULLABLE"))) - - def test_w_none_required(self): - self.assertIsNone(self._call_fut(None, _Field("REQUIRED"))) - - def test_w_string_value(self): - coerced = self._call_fut("Wonderful!", object()) - self.assertEqual(coerced, "Wonderful!") - - -class Test_bytes_from_json(unittest.TestCase): - def _call_fut(self, value, field): - from google.cloud.bigquery._helpers import _bytes_from_json - - return _bytes_from_json(value, field) - - def test_w_none_nullable(self): - self.assertIsNone(self._call_fut(None, _Field("NULLABLE"))) - - def test_w_none_required(self): - with self.assertRaises(TypeError): - self._call_fut(None, _Field("REQUIRED")) - - def test_w_base64_encoded_bytes(self): - expected = b"Wonderful!" - encoded = base64.standard_b64encode(expected) - coerced = self._call_fut(encoded, object()) - self.assertEqual(coerced, expected) - - def test_w_base64_encoded_text(self): - expected = b"Wonderful!" - encoded = base64.standard_b64encode(expected).decode("ascii") - coerced = self._call_fut(encoded, object()) - self.assertEqual(coerced, expected) - - -class Test_timestamp_from_json(unittest.TestCase): - def _call_fut(self, value, field): - from google.cloud.bigquery._helpers import _timestamp_from_json - - return _timestamp_from_json(value, field) - - def test_w_none_nullable(self): - self.assertIsNone(self._call_fut(None, _Field("NULLABLE"))) - - def test_w_none_required(self): - with self.assertRaises(TypeError): - self._call_fut(None, _Field("REQUIRED")) - - def test_w_string_int_value(self): - from google.cloud._helpers import _EPOCH - - coerced = self._call_fut("1234567", object()) - self.assertEqual( - coerced, _EPOCH + datetime.timedelta(seconds=1, microseconds=234567) - ) - - def test_w_int_value(self): - from google.cloud._helpers import _EPOCH - - coerced = self._call_fut(1234567, object()) - self.assertEqual( - coerced, _EPOCH + datetime.timedelta(seconds=1, microseconds=234567) - ) - - -class Test_timestamp_query_param_from_json(unittest.TestCase): - def _call_fut(self, value, field): - from google.cloud.bigquery import _helpers - - return _helpers._timestamp_query_param_from_json(value, field) - - def test_w_none_nullable(self): - self.assertIsNone(self._call_fut(None, _Field("NULLABLE"))) - - def test_w_timestamp_valid(self): - from google.cloud._helpers import UTC - - samples = [ - ( - "2016-12-20 15:58:27.339328+00:00", - datetime.datetime(2016, 12, 20, 15, 58, 27, 339328, tzinfo=UTC), - ), - ( - "2016-12-20 15:58:27+00:00", - datetime.datetime(2016, 12, 20, 15, 58, 27, tzinfo=UTC), - ), - ( - "2016-12-20T15:58:27.339328+00:00", - datetime.datetime(2016, 12, 20, 15, 58, 27, 339328, tzinfo=UTC), - ), - ( - "2016-12-20T15:58:27+00:00", - datetime.datetime(2016, 12, 20, 15, 58, 27, tzinfo=UTC), - ), - ( - "2016-12-20 15:58:27.339328Z", - datetime.datetime(2016, 12, 20, 15, 58, 27, 339328, tzinfo=UTC), - ), - ( - "2016-12-20 15:58:27Z", - datetime.datetime(2016, 12, 20, 15, 58, 27, tzinfo=UTC), - ), - ( - "2016-12-20T15:58:27.339328Z", - datetime.datetime(2016, 12, 20, 15, 58, 27, 339328, tzinfo=UTC), - ), - ( - "2016-12-20T15:58:27Z", - datetime.datetime(2016, 12, 20, 15, 58, 27, tzinfo=UTC), - ), - ] - for timestamp_str, expected_result in samples: - self.assertEqual( - self._call_fut(timestamp_str, _Field("NULLABLE")), expected_result - ) - - def test_w_timestamp_invalid(self): - with self.assertRaises(ValueError): - self._call_fut("definitely-not-a-timestamp", _Field("NULLABLE")) - - -class Test_datetime_from_json(unittest.TestCase): - def _call_fut(self, value, field): - from google.cloud.bigquery._helpers import _datetime_from_json - - return _datetime_from_json(value, field) - - def test_w_none_nullable(self): - self.assertIsNone(self._call_fut(None, _Field("NULLABLE"))) - - def test_w_none_required(self): - with self.assertRaises(TypeError): - self._call_fut(None, _Field("REQUIRED")) - - def test_w_string_value(self): - coerced = self._call_fut("2016-12-02T18:51:33", object()) - self.assertEqual(coerced, datetime.datetime(2016, 12, 2, 18, 51, 33)) - - def test_w_microseconds(self): - coerced = self._call_fut("2015-05-22T10:11:12.987654", object()) - self.assertEqual(coerced, datetime.datetime(2015, 5, 22, 10, 11, 12, 987654)) - - -class Test_date_from_json(unittest.TestCase): - def _call_fut(self, value, field): - from google.cloud.bigquery._helpers import _date_from_json - - return _date_from_json(value, field) - - def test_w_none_nullable(self): - self.assertIsNone(self._call_fut(None, _Field("NULLABLE"))) - - def test_w_none_required(self): - with self.assertRaises(TypeError): - self._call_fut(None, _Field("REQUIRED")) - - def test_w_string_value(self): - coerced = self._call_fut("1987-09-22", object()) - self.assertEqual(coerced, datetime.date(1987, 9, 22)) - - -class Test_time_from_json(unittest.TestCase): - def _call_fut(self, value, field): - from google.cloud.bigquery._helpers import _time_from_json - - return _time_from_json(value, field) - - def test_w_none_nullable(self): - self.assertIsNone(self._call_fut(None, _Field("NULLABLE"))) - - def test_w_none_required(self): - with self.assertRaises(TypeError): - self._call_fut(None, _Field("REQUIRED")) - - def test_w_string_value(self): - coerced = self._call_fut("12:12:27", object()) - self.assertEqual(coerced, datetime.time(12, 12, 27)) - - def test_w_subsecond_string_value(self): - coerced = self._call_fut("12:12:27.123456", object()) - self.assertEqual(coerced, datetime.time(12, 12, 27, 123456)) - - def test_w_bogus_string_value(self): - with self.assertRaises(ValueError): - self._call_fut("12:12:27.123", object()) - - -class Test_range_from_json(unittest.TestCase): - def _call_fut(self, value, field): - from google.cloud.bigquery._helpers import _range_from_json - - return _range_from_json(value, field) - - def test_w_none_nullable(self): - self.assertIsNone(self._call_fut(None, _Field("NULLABLE"))) - - def test_w_none_required(self): - with self.assertRaises(TypeError): - self._call_fut(None, _Field("REQUIRED")) - - def test_w_wrong_format(self): - range_field = _Field( - "NULLABLE", - field_type="RANGE", - range_element_type=_Field("NULLABLE", element_type="DATE"), - ) - with self.assertRaises(ValueError): - self._call_fut("[2009-06-172019-06-17)", range_field) - - def test_w_wrong_element_type(self): - range_field = _Field( - "NULLABLE", - field_type="RANGE", - range_element_type=_Field("NULLABLE", element_type="TIME"), - ) - with self.assertRaises(ValueError): - self._call_fut("[15:31:38, 15:50:38)", range_field) - - def test_w_unbounded_value(self): - range_field = _Field( - "NULLABLE", - field_type="RANGE", - range_element_type=_Field("NULLABLE", element_type="DATE"), - ) - coerced = self._call_fut("[UNBOUNDED, 2019-06-17)", range_field) - self.assertEqual( - coerced, - {"start": None, "end": datetime.date(2019, 6, 17)}, - ) - - def test_w_date_value(self): - range_field = _Field( - "NULLABLE", - field_type="RANGE", - range_element_type=_Field("NULLABLE", element_type="DATE"), - ) - coerced = self._call_fut("[2009-06-17, 2019-06-17)", range_field) - self.assertEqual( - coerced, - { - "start": datetime.date(2009, 6, 17), - "end": datetime.date(2019, 6, 17), - }, - ) - - def test_w_datetime_value(self): - range_field = _Field( - "NULLABLE", - field_type="RANGE", - range_element_type=_Field("NULLABLE", element_type="DATETIME"), - ) - coerced = self._call_fut( - "[2009-06-17T13:45:30, 2019-06-17T13:45:30)", range_field - ) - self.assertEqual( - coerced, - { - "start": datetime.datetime(2009, 6, 17, 13, 45, 30), - "end": datetime.datetime(2019, 6, 17, 13, 45, 30), - }, - ) - - def test_w_timestamp_value(self): - from google.cloud._helpers import _EPOCH - - range_field = _Field( - "NULLABLE", - field_type="RANGE", - range_element_type=_Field("NULLABLE", element_type="TIMESTAMP"), - ) - coerced = self._call_fut("[1234567, 1234789)", range_field) - self.assertEqual( - coerced, - { - "start": _EPOCH + datetime.timedelta(seconds=1, microseconds=234567), - "end": _EPOCH + datetime.timedelta(seconds=1, microseconds=234789), - }, - ) - - -class Test_record_from_json(unittest.TestCase): - def _call_fut(self, value, field): - from google.cloud.bigquery._helpers import _record_from_json - - return _record_from_json(value, field) - - def test_w_none_nullable(self): - self.assertIsNone(self._call_fut(None, _Field("NULLABLE"))) - - def test_w_none_required(self): - with self.assertRaises(TypeError): - self._call_fut(None, _Field("REQUIRED")) - - def test_w_nullable_subfield_none(self): - subfield = _Field("NULLABLE", "age", "INTEGER") - field = _Field("REQUIRED", fields=[subfield]) - value = {"f": [{"v": None}]} - coerced = self._call_fut(value, field) - self.assertEqual(coerced, {"age": None}) - - def test_w_scalar_subfield(self): - subfield = _Field("REQUIRED", "age", "INTEGER") - field = _Field("REQUIRED", fields=[subfield]) - value = {"f": [{"v": 42}]} - coerced = self._call_fut(value, field) - self.assertEqual(coerced, {"age": 42}) - - def test_w_scalar_subfield_geography(self): - subfield = _Field("REQUIRED", "geo", "GEOGRAPHY") - field = _Field("REQUIRED", fields=[subfield]) - value = {"f": [{"v": "POINT(1, 2)"}]} - coerced = self._call_fut(value, field) - self.assertEqual(coerced, {"geo": "POINT(1, 2)"}) - - def test_w_repeated_subfield(self): - subfield = _Field("REPEATED", "color", "STRING") - field = _Field("REQUIRED", fields=[subfield]) - value = {"f": [{"v": [{"v": "red"}, {"v": "yellow"}, {"v": "blue"}]}]} - coerced = self._call_fut(value, field) - self.assertEqual(coerced, {"color": ["red", "yellow", "blue"]}) - - def test_w_record_subfield(self): - full_name = _Field("REQUIRED", "full_name", "STRING") - area_code = _Field("REQUIRED", "area_code", "STRING") - local_number = _Field("REQUIRED", "local_number", "STRING") - rank = _Field("REQUIRED", "rank", "INTEGER") - phone = _Field( - "NULLABLE", "phone", "RECORD", fields=[area_code, local_number, rank] - ) - person = _Field("REQUIRED", "person", "RECORD", fields=[full_name, phone]) - value = { - "f": [ - {"v": "Phred Phlyntstone"}, - {"v": {"f": [{"v": "800"}, {"v": "555-1212"}, {"v": 1}]}}, - ] - } - expected = { - "full_name": "Phred Phlyntstone", - "phone": {"area_code": "800", "local_number": "555-1212", "rank": 1}, - } - coerced = self._call_fut(value, person) - self.assertEqual(coerced, expected) - - class Test_field_to_index_mapping(unittest.TestCase): def _call_fut(self, schema): from google.cloud.bigquery._helpers import _field_to_index_mapping From b3f7595d0d63f5800ef73b68dc6c9af511c306dc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tim=20Swe=C3=B1a=20=28Swast=29?= Date: Thu, 20 Mar 2025 11:08:48 -0500 Subject: [PATCH 1904/2016] fix: avoid "Unable to determine type" warning with JSON columns in `to_dataframe` (#1876) * add regression tests for empty dataframe * fix arrow test to be compatible with old pyarrow --- .../google/cloud/bigquery/_helpers.py | 15 ++++ .../google/cloud/bigquery/_pandas_helpers.py | 1 + .../google/cloud/bigquery/_pyarrow_helpers.py | 26 ++++++- .../google/cloud/bigquery/table.py | 4 +- .../tests/system/test_arrow.py | 29 ++++++++ .../tests/system/test_pandas.py | 26 +++++++ .../test_data_frame_cell_data_parser.py | 71 +++++++++++++++++++ .../tests/unit/test__pyarrow_helpers.py | 12 +++- .../tests/unit/test_table_arrow.py | 66 +++++++++++------ .../tests/unit/test_table_pandas.py | 4 ++ 10 files changed, 230 insertions(+), 24 deletions(-) create mode 100644 packages/google-cloud-bigquery/tests/unit/_helpers/test_data_frame_cell_data_parser.py diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py b/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py index 4ba3ccf9348f..76c4f1fbdfd3 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py @@ -387,6 +387,21 @@ def range_to_py(self, value, field): CELL_DATA_PARSER = CellDataParser() +class DataFrameCellDataParser(CellDataParser): + """Override of CellDataParser to handle differences in expection of values in DataFrame-like outputs. + + This is used to turn the output of the REST API into a pyarrow Table, + emulating the serialized arrow from the BigQuery Storage Read API. + """ + + def json_to_py(self, value, _): + """No-op because DataFrame expects string for JSON output.""" + return value + + +DATA_FRAME_CELL_DATA_PARSER = DataFrameCellDataParser() + + class ScalarQueryParamParser(CellDataParser): """Override of CellDataParser to handle the differences in the response from query params. diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/_pandas_helpers.py b/packages/google-cloud-bigquery/google/cloud/bigquery/_pandas_helpers.py index feb6b3adbb81..457eb9078938 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/_pandas_helpers.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/_pandas_helpers.py @@ -158,6 +158,7 @@ def finish(self): b"ARROW:extension:metadata": b'{"encoding": "WKT"}', }, "DATETIME": {b"ARROW:extension:name": b"google:sqlType:datetime"}, + "JSON": {b"ARROW:extension:name": b"google:sqlType:json"}, } diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/_pyarrow_helpers.py b/packages/google-cloud-bigquery/google/cloud/bigquery/_pyarrow_helpers.py index 034e020eef14..03c70bf63b2b 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/_pyarrow_helpers.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/_pyarrow_helpers.py @@ -15,7 +15,9 @@ """Shared helper functions for connecting BigQuery and pyarrow. NOTE: This module is DEPRECATED. Please make updates in the pandas-gbq package, -instead. See: go/pandas-gbq-and-bigframes-redundancy and +instead. See: go/pandas-gbq-and-bigframes-redundancy, +https://github.com/googleapis/python-bigquery-pandas/blob/main/pandas_gbq/schema/bigquery_to_pyarrow.py +and https://github.com/googleapis/python-bigquery-pandas/blob/main/pandas_gbq/schema/pyarrow_to_bigquery.py """ @@ -26,6 +28,14 @@ except ImportError: pyarrow = None +try: + import db_dtypes # type: ignore + + db_dtypes_import_exception = None +except ImportError as exc: + db_dtypes = None + db_dtypes_import_exception = exc + def pyarrow_datetime(): return pyarrow.timestamp("us", tz=None) @@ -67,12 +77,18 @@ def pyarrow_timestamp(): "GEOGRAPHY": pyarrow.string, "INT64": pyarrow.int64, "INTEGER": pyarrow.int64, + # Normally, we'd prefer JSON type built-in to pyarrow (added in 19.0.0), + # but we'd like this to map as closely to the BQ Storage API as + # possible, which uses the string() dtype, as JSON support in Arrow + # predates JSON support in BigQuery by several years. + "JSON": pyarrow.string, "NUMERIC": pyarrow_numeric, "STRING": pyarrow.string, "TIME": pyarrow_time, "TIMESTAMP": pyarrow_timestamp, } + # DEPRECATED: update pandas_gbq.schema.pyarrow_to_bigquery, instead. _ARROW_SCALAR_IDS_TO_BQ = { # https://arrow.apache.org/docs/python/api/datatypes.html#type-classes pyarrow.bool_().id: "BOOL", @@ -97,6 +113,9 @@ def pyarrow_timestamp(): pyarrow.large_string().id: "STRING", # The exact scale and precision don't matter, see below. pyarrow.decimal128(38, scale=9).id: "NUMERIC", + # NOTE: all extension types (e.g. json_, uuid, db_dtypes.JSONArrowType) + # have the same id (31 as of version 19.0.1), so these should not be + # matched by id. } _BQ_TO_ARROW_SCALARS["BIGNUMERIC"] = pyarrow_bignumeric @@ -107,6 +126,9 @@ def pyarrow_timestamp(): def bq_to_arrow_scalars(bq_scalar: str): """ + DEPRECATED: update pandas_gbq.schema.bigquery_to_pyarrow, instead, which is + to be added in https://github.com/googleapis/python-bigquery-pandas/pull/893. + Returns: The Arrow scalar type that the input BigQuery scalar type maps to. If it cannot find the BigQuery scalar, return None. @@ -116,6 +138,8 @@ def bq_to_arrow_scalars(bq_scalar: str): def arrow_scalar_ids_to_bq(arrow_scalar: Any): """ + DEPRECATED: update pandas_gbq.schema.pyarrow_to_bigquery, instead. + Returns: The BigQuery scalar type that the input arrow scalar type maps to. If it cannot find the arrow scalar, return None. diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py index f139e44ade25..238ff6bebd98 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py @@ -3533,7 +3533,9 @@ def _row_iterator_page_columns(schema, response): def get_column_data(field_index, field): for row in rows: - yield _helpers.CELL_DATA_PARSER.to_py(row["f"][field_index]["v"], field) + yield _helpers.DATA_FRAME_CELL_DATA_PARSER.to_py( + row["f"][field_index]["v"], field + ) for field_index, field in enumerate(schema): columns.append(get_column_data(field_index, field)) diff --git a/packages/google-cloud-bigquery/tests/system/test_arrow.py b/packages/google-cloud-bigquery/tests/system/test_arrow.py index 82cf11f859d3..f2aed656c30d 100644 --- a/packages/google-cloud-bigquery/tests/system/test_arrow.py +++ b/packages/google-cloud-bigquery/tests/system/test_arrow.py @@ -194,3 +194,32 @@ def test_list_rows_range_csv( range_type = schema.field("range_date").type assert range_type == expected_type + + +def test_to_arrow_query_with_empty_results(bigquery_client): + """ + JSON regression test for https://github.com/googleapis/python-bigquery/issues/1580. + """ + job = bigquery_client.query( + """ + select + 123 as int_col, + '' as string_col, + to_json('{}') as json_col, + struct(to_json('[]') as json_field, -1 as int_field) as struct_col, + [to_json('null')] as json_array_col, + from unnest([]) + """ + ) + table = job.to_arrow() + assert list(table.column_names) == [ + "int_col", + "string_col", + "json_col", + "struct_col", + "json_array_col", + ] + assert table.shape == (0, 5) + struct_type = table.field("struct_col").type + assert struct_type.get_field_index("json_field") == 0 + assert struct_type.get_field_index("int_field") == 1 diff --git a/packages/google-cloud-bigquery/tests/system/test_pandas.py b/packages/google-cloud-bigquery/tests/system/test_pandas.py index e65fca27ea21..01f552435f86 100644 --- a/packages/google-cloud-bigquery/tests/system/test_pandas.py +++ b/packages/google-cloud-bigquery/tests/system/test_pandas.py @@ -1304,6 +1304,32 @@ def test_upload_time_and_datetime_56(bigquery_client, dataset_id): ] +def test_to_dataframe_query_with_empty_results(bigquery_client): + """ + JSON regression test for https://github.com/googleapis/python-bigquery/issues/1580. + """ + job = bigquery_client.query( + """ + select + 123 as int_col, + '' as string_col, + to_json('{}') as json_col, + struct(to_json('[]') as json_field, -1 as int_field) as struct_col, + [to_json('null')] as json_array_col, + from unnest([]) + """ + ) + df = job.to_dataframe() + assert list(df.columns) == [ + "int_col", + "string_col", + "json_col", + "struct_col", + "json_array_col", + ] + assert len(df.index) == 0 + + def test_to_dataframe_geography_as_objects(bigquery_client, dataset_id): wkt = pytest.importorskip("shapely.wkt") bigquery_client.query( diff --git a/packages/google-cloud-bigquery/tests/unit/_helpers/test_data_frame_cell_data_parser.py b/packages/google-cloud-bigquery/tests/unit/_helpers/test_data_frame_cell_data_parser.py new file mode 100644 index 000000000000..c3332dc895ca --- /dev/null +++ b/packages/google-cloud-bigquery/tests/unit/_helpers/test_data_frame_cell_data_parser.py @@ -0,0 +1,71 @@ +# Copyright 2025 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import pytest + +import google.cloud.bigquery.schema + + +def create_field(mode="NULLABLE", type_="IGNORED", name="test_field", **kwargs): + return google.cloud.bigquery.schema.SchemaField(name, type_, mode=mode, **kwargs) + + +@pytest.fixture +def mut(): + from google.cloud.bigquery import _helpers + + return _helpers + + +@pytest.fixture +def object_under_test(mut): + return mut.DATA_FRAME_CELL_DATA_PARSER + + +def test_json_to_py_doesnt_parse_json(object_under_test): + coerced = object_under_test.json_to_py('{"key":"value"}', create_field()) + assert coerced == '{"key":"value"}' + + +def test_json_to_py_repeated_doesnt_parse_json(object_under_test): + coerced = object_under_test.json_to_py('{"key":"value"}', create_field("REPEATED")) + assert coerced == '{"key":"value"}' + + +def test_record_to_py_doesnt_parse_json(object_under_test): + subfield = create_field(type_="JSON", name="json") + field = create_field(fields=[subfield]) + value = {"f": [{"v": '{"key":"value"}'}]} + coerced = object_under_test.record_to_py(value, field) + assert coerced == {"json": '{"key":"value"}'} + + +def test_record_to_py_doesnt_parse_repeated_json(object_under_test): + subfield = create_field("REPEATED", "JSON", name="json") + field = create_field("REQUIRED", fields=[subfield]) + value = { + "f": [ + { + "v": [ + {"v": '{"key":"value0"}'}, + {"v": '{"key":"value1"}'}, + {"v": '{"key":"value2"}'}, + ] + } + ] + } + coerced = object_under_test.record_to_py(value, field) + assert coerced == { + "json": ['{"key":"value0"}', '{"key":"value1"}', '{"key":"value2"}'] + } diff --git a/packages/google-cloud-bigquery/tests/unit/test__pyarrow_helpers.py b/packages/google-cloud-bigquery/tests/unit/test__pyarrow_helpers.py index f0a872c88414..06fc2eb856a8 100644 --- a/packages/google-cloud-bigquery/tests/unit/test__pyarrow_helpers.py +++ b/packages/google-cloud-bigquery/tests/unit/test__pyarrow_helpers.py @@ -27,8 +27,16 @@ def module_under_test(): def test_bq_to_arrow_scalars(module_under_test): assert ( - module_under_test.bq_to_arrow_scalars("BIGNUMERIC") - == module_under_test.pyarrow_bignumeric + module_under_test.bq_to_arrow_scalars("BIGNUMERIC")() + == module_under_test.pyarrow_bignumeric() + ) + assert ( + # Normally, we'd prefer JSON type built-in to pyarrow (added in 19.0.0), + # but we'd like this to map as closely to the BQ Storage API as + # possible, which uses the string() dtype, as JSON support in Arrow + # predates JSON support in BigQuery by several years. + module_under_test.bq_to_arrow_scalars("JSON")() + == pyarrow.string() ) assert module_under_test.bq_to_arrow_scalars("UNKNOWN_TYPE") is None diff --git a/packages/google-cloud-bigquery/tests/unit/test_table_arrow.py b/packages/google-cloud-bigquery/tests/unit/test_table_arrow.py index 6f1e6f76a39b..830c4ceb7eab 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_table_arrow.py +++ b/packages/google-cloud-bigquery/tests/unit/test_table_arrow.py @@ -28,6 +28,7 @@ def test_to_arrow_with_jobs_query_response(): "fields": [ {"name": "name", "type": "STRING", "mode": "NULLABLE"}, {"name": "number", "type": "INTEGER", "mode": "NULLABLE"}, + {"name": "json", "type": "JSON", "mode": "NULLABLE"}, ] }, "jobReference": { @@ -37,15 +38,21 @@ def test_to_arrow_with_jobs_query_response(): }, "totalRows": "9", "rows": [ - {"f": [{"v": "Tiarra"}, {"v": "6"}]}, - {"f": [{"v": "Timothy"}, {"v": "325"}]}, - {"f": [{"v": "Tina"}, {"v": "26"}]}, - {"f": [{"v": "Tierra"}, {"v": "10"}]}, - {"f": [{"v": "Tia"}, {"v": "17"}]}, - {"f": [{"v": "Tiara"}, {"v": "22"}]}, - {"f": [{"v": "Tiana"}, {"v": "6"}]}, - {"f": [{"v": "Tiffany"}, {"v": "229"}]}, - {"f": [{"v": "Tiffani"}, {"v": "8"}]}, + {"f": [{"v": "Tiarra"}, {"v": "6"}, {"v": "123"}]}, + {"f": [{"v": "Timothy"}, {"v": "325"}, {"v": '{"key":"value"}'}]}, + {"f": [{"v": "Tina"}, {"v": "26"}, {"v": "[1,2,3]"}]}, + { + "f": [ + {"v": "Tierra"}, + {"v": "10"}, + {"v": '{"aKey": {"bKey": {"cKey": -123}}}'}, + ] + }, + {"f": [{"v": "Tia"}, {"v": "17"}, {"v": None}]}, + {"f": [{"v": "Tiara"}, {"v": "22"}, {"v": '"some-json-string"'}]}, + {"f": [{"v": "Tiana"}, {"v": "6"}, {"v": '{"nullKey":null}'}]}, + {"f": [{"v": "Tiffany"}, {"v": "229"}, {"v": '""'}]}, + {"f": [{"v": "Tiffani"}, {"v": "8"}, {"v": "[]"}]}, ], "totalBytesProcessed": "154775150", "jobComplete": True, @@ -65,7 +72,7 @@ def test_to_arrow_with_jobs_query_response(): ) records = rows.to_arrow() - assert records.column_names == ["name", "number"] + assert records.column_names == ["name", "number", "json"] assert records["name"].to_pylist() == [ "Tiarra", "Timothy", @@ -78,6 +85,17 @@ def test_to_arrow_with_jobs_query_response(): "Tiffani", ] assert records["number"].to_pylist() == [6, 325, 26, 10, 17, 22, 6, 229, 8] + assert records["json"].to_pylist() == [ + "123", + '{"key":"value"}', + "[1,2,3]", + '{"aKey": {"bKey": {"cKey": -123}}}', + None, + '"some-json-string"', + '{"nullKey":null}', + '""', + "[]", + ] def test_to_arrow_with_jobs_query_response_and_max_results(): @@ -87,6 +105,7 @@ def test_to_arrow_with_jobs_query_response_and_max_results(): "fields": [ {"name": "name", "type": "STRING", "mode": "NULLABLE"}, {"name": "number", "type": "INTEGER", "mode": "NULLABLE"}, + {"name": "json", "type": "JSON", "mode": "NULLABLE"}, ] }, "jobReference": { @@ -96,15 +115,21 @@ def test_to_arrow_with_jobs_query_response_and_max_results(): }, "totalRows": "9", "rows": [ - {"f": [{"v": "Tiarra"}, {"v": "6"}]}, - {"f": [{"v": "Timothy"}, {"v": "325"}]}, - {"f": [{"v": "Tina"}, {"v": "26"}]}, - {"f": [{"v": "Tierra"}, {"v": "10"}]}, - {"f": [{"v": "Tia"}, {"v": "17"}]}, - {"f": [{"v": "Tiara"}, {"v": "22"}]}, - {"f": [{"v": "Tiana"}, {"v": "6"}]}, - {"f": [{"v": "Tiffany"}, {"v": "229"}]}, - {"f": [{"v": "Tiffani"}, {"v": "8"}]}, + {"f": [{"v": "Tiarra"}, {"v": "6"}, {"v": "123"}]}, + {"f": [{"v": "Timothy"}, {"v": "325"}, {"v": '{"key":"value"}'}]}, + {"f": [{"v": "Tina"}, {"v": "26"}, {"v": "[1,2,3]"}]}, + { + "f": [ + {"v": "Tierra"}, + {"v": "10"}, + {"v": '{"aKey": {"bKey": {"cKey": -123}}}'}, + ] + }, + {"f": [{"v": "Tia"}, {"v": "17"}, {"v": None}]}, + {"f": [{"v": "Tiara"}, {"v": "22"}, {"v": '"some-json-string"'}]}, + {"f": [{"v": "Tiana"}, {"v": "6"}, {"v": '{"nullKey":null}'}]}, + {"f": [{"v": "Tiffany"}, {"v": "229"}, {"v": '""'}]}, + {"f": [{"v": "Tiffani"}, {"v": "8"}, {"v": "[]"}]}, ], "totalBytesProcessed": "154775150", "jobComplete": True, @@ -125,10 +150,11 @@ def test_to_arrow_with_jobs_query_response_and_max_results(): ) records = rows.to_arrow() - assert records.column_names == ["name", "number"] + assert records.column_names == ["name", "number", "json"] assert records["name"].to_pylist() == [ "Tiarra", "Timothy", "Tina", ] assert records["number"].to_pylist() == [6, 325, 26] + assert records["json"].to_pylist() == ["123", '{"key":"value"}', "[1,2,3]"] diff --git a/packages/google-cloud-bigquery/tests/unit/test_table_pandas.py b/packages/google-cloud-bigquery/tests/unit/test_table_pandas.py index 9e42fb737c7c..94737732b390 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_table_pandas.py +++ b/packages/google-cloud-bigquery/tests/unit/test_table_pandas.py @@ -59,6 +59,7 @@ def test_to_dataframe_nullable_scalars( pyarrow.field( "timestamp_col", pyarrow.timestamp("us", tz=datetime.timezone.utc) ), + pyarrow.field("json_col", pyarrow.string()), ] ) arrow_table = pyarrow.Table.from_pydict( @@ -78,6 +79,7 @@ def test_to_dataframe_nullable_scalars( 2021, 8, 9, 13, 30, 44, 123456, tzinfo=datetime.timezone.utc ) ], + "json_col": ["{}"], }, schema=arrow_schema, ) @@ -94,6 +96,7 @@ def test_to_dataframe_nullable_scalars( bigquery.SchemaField("string_col", "STRING"), bigquery.SchemaField("time_col", "TIME"), bigquery.SchemaField("timestamp_col", "TIMESTAMP"), + bigquery.SchemaField("json_col", "JSON"), ] mock_client = mock.create_autospec(bigquery.Client) mock_client.project = "test-proj" @@ -117,6 +120,7 @@ def test_to_dataframe_nullable_scalars( assert df.dtypes["string_col"].name == "object" assert df.dtypes["time_col"].name == "dbtime" assert df.dtypes["timestamp_col"].name == "datetime64[ns, UTC]" + assert df.dtypes["json_col"].name == "object" # Check for expected values. assert df["bignumeric_col"][0] == decimal.Decimal("123.456789101112131415") From 143999f4505348347b0e682d5c0c07c8925731ac Mon Sep 17 00:00:00 2001 From: "release-please[bot]" <55107282+release-please[bot]@users.noreply.github.com> Date: Tue, 25 Mar 2025 13:45:20 -0500 Subject: [PATCH 1905/2016] chore(main): release 3.31.0 (#2139) Co-authored-by: release-please[bot] <55107282+release-please[bot]@users.noreply.github.com> --- packages/google-cloud-bigquery/CHANGELOG.md | 21 +++++++++++++++++++ .../google/cloud/bigquery/version.py | 2 +- 2 files changed, 22 insertions(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/CHANGELOG.md b/packages/google-cloud-bigquery/CHANGELOG.md index 91d0a362dd05..4b115464cd31 100644 --- a/packages/google-cloud-bigquery/CHANGELOG.md +++ b/packages/google-cloud-bigquery/CHANGELOG.md @@ -5,6 +5,27 @@ [1]: https://pypi.org/project/google-cloud-bigquery/#history +## [3.31.0](https://github.com/googleapis/python-bigquery/compare/v3.30.0...v3.31.0) (2025-03-20) + + +### Features + +* Add query text and total bytes processed to RowIterator ([#2140](https://github.com/googleapis/python-bigquery/issues/2140)) ([2d5f932](https://github.com/googleapis/python-bigquery/commit/2d5f9320d7103bc64c7ba496ba54bb0ef52b5605)) +* Add support for Python 3.13 ([0842aa1](https://github.com/googleapis/python-bigquery/commit/0842aa10967b1d8395cfb43e52c8ea091b381870)) + + +### Bug Fixes + +* Adding property setter for table constraints, [#1990](https://github.com/googleapis/python-bigquery/issues/1990) ([#2092](https://github.com/googleapis/python-bigquery/issues/2092)) ([f8572dd](https://github.com/googleapis/python-bigquery/commit/f8572dd86595361bae82c3232b2c0d159690a7b7)) +* Allow protobuf 6.x ([0842aa1](https://github.com/googleapis/python-bigquery/commit/0842aa10967b1d8395cfb43e52c8ea091b381870)) +* Avoid "Unable to determine type" warning with JSON columns in `to_dataframe` ([#1876](https://github.com/googleapis/python-bigquery/issues/1876)) ([968020d](https://github.com/googleapis/python-bigquery/commit/968020d5be9d2a30b90d046eaf52f91bb2c70911)) +* Remove setup.cfg configuration for creating universal wheels ([#2146](https://github.com/googleapis/python-bigquery/issues/2146)) ([d7f7685](https://github.com/googleapis/python-bigquery/commit/d7f76853d598c354bfd2e65f5dde28dae97da0ec)) + + +### Dependencies + +* Remove Python 3.7 and 3.8 as supported runtimes ([#2133](https://github.com/googleapis/python-bigquery/issues/2133)) ([fb7de39](https://github.com/googleapis/python-bigquery/commit/fb7de398cb2ad000b80a8a702d1f6539dc03d8e0)) + ## [3.30.0](https://github.com/googleapis/python-bigquery/compare/v3.29.0...v3.30.0) (2025-02-26) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/version.py b/packages/google-cloud-bigquery/google/cloud/bigquery/version.py index 01c4c51ca6d5..c0f7a96d69ea 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/version.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/version.py @@ -12,4 +12,4 @@ # See the License for the specific language governing permissions and # limitations under the License. -__version__ = "3.30.0" +__version__ = "3.31.0" From 9543847837a46c46a49522c617d775d8434e8cae Mon Sep 17 00:00:00 2001 From: Mend Renovate Date: Thu, 27 Mar 2025 19:00:31 +0100 Subject: [PATCH 1906/2016] chore(deps): update all dependencies (#2143) * chore(deps): update all dependencies * pin ipython===8.18.1 for python 3.9 --------- Co-authored-by: Lingqing Gan --- .../samples/desktopapp/requirements-test.txt | 6 ++-- .../samples/desktopapp/requirements.txt | 2 +- .../samples/geography/requirements-test.txt | 4 +-- .../samples/geography/requirements.txt | 34 +++++++++---------- .../samples/magics/requirements-test.txt | 6 ++-- .../samples/magics/requirements.txt | 8 ++--- .../samples/notebooks/requirements-test.txt | 6 ++-- .../samples/notebooks/requirements.txt | 13 +++---- .../samples/snippets/requirements-test.txt | 6 ++-- .../samples/snippets/requirements.txt | 2 +- 10 files changed, 44 insertions(+), 43 deletions(-) diff --git a/packages/google-cloud-bigquery/samples/desktopapp/requirements-test.txt b/packages/google-cloud-bigquery/samples/desktopapp/requirements-test.txt index cf215e2fd928..c8290d33f988 100644 --- a/packages/google-cloud-bigquery/samples/desktopapp/requirements-test.txt +++ b/packages/google-cloud-bigquery/samples/desktopapp/requirements-test.txt @@ -1,3 +1,3 @@ -google-cloud-testutils==1.5.0 -pytest==8.3.4 -mock==5.1.0 +google-cloud-testutils==1.6.0 +pytest==8.3.5 +mock==5.2.0 diff --git a/packages/google-cloud-bigquery/samples/desktopapp/requirements.txt b/packages/google-cloud-bigquery/samples/desktopapp/requirements.txt index 1658007414e1..fa349e0d33c1 100644 --- a/packages/google-cloud-bigquery/samples/desktopapp/requirements.txt +++ b/packages/google-cloud-bigquery/samples/desktopapp/requirements.txt @@ -1,2 +1,2 @@ -google-cloud-bigquery==3.27.0 +google-cloud-bigquery==3.31.0 google-auth-oauthlib==1.2.1 diff --git a/packages/google-cloud-bigquery/samples/geography/requirements-test.txt b/packages/google-cloud-bigquery/samples/geography/requirements-test.txt index 4ad1bd0285c4..5d20a45547e0 100644 --- a/packages/google-cloud-bigquery/samples/geography/requirements-test.txt +++ b/packages/google-cloud-bigquery/samples/geography/requirements-test.txt @@ -1,2 +1,2 @@ -pytest==8.3.4 -mock==5.1.0 +pytest==8.3.5 +mock==5.2.0 diff --git a/packages/google-cloud-bigquery/samples/geography/requirements.txt b/packages/google-cloud-bigquery/samples/geography/requirements.txt index 0ad2154a44d5..3fa11ce7cf94 100644 --- a/packages/google-cloud-bigquery/samples/geography/requirements.txt +++ b/packages/google-cloud-bigquery/samples/geography/requirements.txt @@ -1,41 +1,41 @@ -attrs==24.3.0 -certifi==2024.12.14 +attrs==25.3.0 +certifi==2025.1.31 cffi==1.17.1 charset-normalizer==3.4.1 click==8.1.8 click-plugins==1.1.1 cligj==0.7.2 -db-dtypes==1.3.1 +db-dtypes==1.4.2 Fiona==1.10.1 geojson==3.2.0 geopandas==1.0.1 -google-api-core==2.24.0 -google-auth==2.37.0 -google-cloud-bigquery==3.27.0 -google-cloud-bigquery-storage==2.27.0 -google-cloud-core==2.4.1 -google-crc32c==1.6.0 +google-api-core==2.24.2 +google-auth==2.38.0 +google-cloud-bigquery==3.31.0 +google-cloud-bigquery-storage==2.30.0 +google-cloud-core==2.4.3 +google-crc32c==1.7.1 google-resumable-media==2.7.2 -googleapis-common-protos==1.66.0 -grpcio==1.69.0 +googleapis-common-protos==1.69.2 +grpcio==1.71.0 idna==3.10 munch==4.0.0 mypy-extensions==1.0.0 packaging==24.2 pandas==2.2.3 -proto-plus==1.25.0 -pyarrow==18.1.0 +proto-plus==1.26.1 +pyarrow==19.0.1 pyasn1==0.6.1 pyasn1-modules==0.4.1 pycparser==2.22 -pyparsing==3.2.1 +pyparsing==3.2.3 python-dateutil==2.9.0.post0 -pytz==2024.2 +pytz==2025.2 PyYAML==6.0.2 requests==2.32.3 rsa==4.9 -Shapely==2.0.6 +Shapely==2.0.7 six==1.17.0 -typing-extensions==4.12.2 +typing-extensions==4.13.0 typing-inspect==0.9.0 urllib3==2.3.0 diff --git a/packages/google-cloud-bigquery/samples/magics/requirements-test.txt b/packages/google-cloud-bigquery/samples/magics/requirements-test.txt index cf215e2fd928..c8290d33f988 100644 --- a/packages/google-cloud-bigquery/samples/magics/requirements-test.txt +++ b/packages/google-cloud-bigquery/samples/magics/requirements-test.txt @@ -1,3 +1,3 @@ -google-cloud-testutils==1.5.0 -pytest==8.3.4 -mock==5.1.0 +google-cloud-testutils==1.6.0 +pytest==8.3.5 +mock==5.2.0 diff --git a/packages/google-cloud-bigquery/samples/magics/requirements.txt b/packages/google-cloud-bigquery/samples/magics/requirements.txt index 4b81fe0ad87b..3ab2159512eb 100644 --- a/packages/google-cloud-bigquery/samples/magics/requirements.txt +++ b/packages/google-cloud-bigquery/samples/magics/requirements.txt @@ -1,6 +1,6 @@ -bigquery_magics==0.5.0 -db-dtypes==1.3.1 -google.cloud.bigquery==3.27.0 -google-cloud-bigquery-storage==2.27.0 +bigquery_magics==0.9.0 +db-dtypes==1.4.2 +google.cloud.bigquery==3.31.0 +google-cloud-bigquery-storage==2.30.0 ipython===8.18.1 pandas==2.2.3 diff --git a/packages/google-cloud-bigquery/samples/notebooks/requirements-test.txt b/packages/google-cloud-bigquery/samples/notebooks/requirements-test.txt index cf215e2fd928..c8290d33f988 100644 --- a/packages/google-cloud-bigquery/samples/notebooks/requirements-test.txt +++ b/packages/google-cloud-bigquery/samples/notebooks/requirements-test.txt @@ -1,3 +1,3 @@ -google-cloud-testutils==1.5.0 -pytest==8.3.4 -mock==5.1.0 +google-cloud-testutils==1.6.0 +pytest==8.3.5 +mock==5.2.0 diff --git a/packages/google-cloud-bigquery/samples/notebooks/requirements.txt b/packages/google-cloud-bigquery/samples/notebooks/requirements.txt index e92d084a4d43..ca5505a2eb12 100644 --- a/packages/google-cloud-bigquery/samples/notebooks/requirements.txt +++ b/packages/google-cloud-bigquery/samples/notebooks/requirements.txt @@ -1,8 +1,9 @@ -bigquery-magics==0.5.0 -db-dtypes==1.3.1 -google-cloud-bigquery==3.27.0 -google-cloud-bigquery-storage==2.27.0 -ipython==8.18.1 +bigquery-magics==0.9.0 +db-dtypes==1.4.2 +google-cloud-bigquery==3.31.0 +google-cloud-bigquery-storage==2.30.0 +ipython===8.18.1; python_version == '3.9' +ipython==9.0.2; python_version >= '3.10' matplotlib===3.9.2; python_version == '3.9' -matplotlib==3.10.0; python_version >= '3.10' +matplotlib==3.10.1; python_version >= '3.10' pandas==2.2.3 diff --git a/packages/google-cloud-bigquery/samples/snippets/requirements-test.txt b/packages/google-cloud-bigquery/samples/snippets/requirements-test.txt index 52ccc8ab26c6..197b891874d3 100644 --- a/packages/google-cloud-bigquery/samples/snippets/requirements-test.txt +++ b/packages/google-cloud-bigquery/samples/snippets/requirements-test.txt @@ -1,4 +1,4 @@ # samples/snippets should be runnable with no "extras" -google-cloud-testutils==1.5.0 -pytest==8.3.4 -mock==5.1.0 +google-cloud-testutils==1.6.0 +pytest==8.3.5 +mock==5.2.0 diff --git a/packages/google-cloud-bigquery/samples/snippets/requirements.txt b/packages/google-cloud-bigquery/samples/snippets/requirements.txt index 307ebac24fb0..4b88c6b702fb 100644 --- a/packages/google-cloud-bigquery/samples/snippets/requirements.txt +++ b/packages/google-cloud-bigquery/samples/snippets/requirements.txt @@ -1,2 +1,2 @@ # samples/snippets should be runnable with no "extras" -google-cloud-bigquery==3.27.0 +google-cloud-bigquery==3.31.0 From 113fd15e024e4d80d5fcaf9dc2cce1c6ceb6b9de Mon Sep 17 00:00:00 2001 From: Mend Renovate Date: Tue, 1 Apr 2025 00:40:19 +0200 Subject: [PATCH 1907/2016] chore(deps): update dependency pyasn1-modules to v0.4.2 (#2150) --- .../google-cloud-bigquery/samples/geography/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/samples/geography/requirements.txt b/packages/google-cloud-bigquery/samples/geography/requirements.txt index 3fa11ce7cf94..514e19d2c4cd 100644 --- a/packages/google-cloud-bigquery/samples/geography/requirements.txt +++ b/packages/google-cloud-bigquery/samples/geography/requirements.txt @@ -26,7 +26,7 @@ pandas==2.2.3 proto-plus==1.26.1 pyarrow==19.0.1 pyasn1==0.6.1 -pyasn1-modules==0.4.1 +pyasn1-modules==0.4.2 pycparser==2.22 pyparsing==3.2.3 python-dateutil==2.9.0.post0 From e8459b1aea6f9680c362005e8ae9120daf0733b3 Mon Sep 17 00:00:00 2001 From: yokomotod Date: Wed, 2 Apr 2025 05:16:41 +0900 Subject: [PATCH 1908/2016] fix: empty record dtypes (#2147) * fix: empty record dtypes * update pandas minimum version * fix coverage * fix test_pandas --------- Co-authored-by: Lingqing Gan --- .../google/cloud/bigquery/table.py | 42 ++++++++----------- packages/google-cloud-bigquery/pyproject.toml | 2 +- .../testing/constraints-3.9.txt | 2 +- .../tests/system/test_pandas.py | 7 +--- .../tests/unit/test_table.py | 10 +---- 5 files changed, 23 insertions(+), 40 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py index 238ff6bebd98..099f7fd694e3 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py @@ -2648,31 +2648,25 @@ def to_dataframe( if pyarrow.types.is_timestamp(col.type) ) - if len(record_batch) > 0: - df = record_batch.to_pandas( + df = record_batch.to_pandas( + date_as_object=date_as_object, + timestamp_as_object=timestamp_as_object, + integer_object_nulls=True, + types_mapper=_pandas_helpers.default_types_mapper( date_as_object=date_as_object, - timestamp_as_object=timestamp_as_object, - integer_object_nulls=True, - types_mapper=_pandas_helpers.default_types_mapper( - date_as_object=date_as_object, - bool_dtype=bool_dtype, - int_dtype=int_dtype, - float_dtype=float_dtype, - string_dtype=string_dtype, - date_dtype=date_dtype, - datetime_dtype=datetime_dtype, - time_dtype=time_dtype, - timestamp_dtype=timestamp_dtype, - range_date_dtype=range_date_dtype, - range_datetime_dtype=range_datetime_dtype, - range_timestamp_dtype=range_timestamp_dtype, - ), - ) - else: - # Avoid "ValueError: need at least one array to concatenate" on - # older versions of pandas when converting empty RecordBatch to - # DataFrame. See: https://github.com/pandas-dev/pandas/issues/41241 - df = pandas.DataFrame([], columns=record_batch.schema.names) + bool_dtype=bool_dtype, + int_dtype=int_dtype, + float_dtype=float_dtype, + string_dtype=string_dtype, + date_dtype=date_dtype, + datetime_dtype=datetime_dtype, + time_dtype=time_dtype, + timestamp_dtype=timestamp_dtype, + range_date_dtype=range_date_dtype, + range_datetime_dtype=range_datetime_dtype, + range_timestamp_dtype=range_timestamp_dtype, + ), + ) for column in dtypes: df[column] = pandas.Series(df[column], dtype=dtypes[column], copy=False) diff --git a/packages/google-cloud-bigquery/pyproject.toml b/packages/google-cloud-bigquery/pyproject.toml index 17bf4fd202c1..38d74cdd05f2 100644 --- a/packages/google-cloud-bigquery/pyproject.toml +++ b/packages/google-cloud-bigquery/pyproject.toml @@ -72,7 +72,7 @@ bqstorage = [ "pyarrow >= 4.0.0", ] pandas = [ - "pandas >= 1.1.4", + "pandas >= 1.3.0", "pandas-gbq >= 0.26.1", "grpcio >= 1.47.0, < 2.0.0", "grpcio >= 1.49.1, < 2.0.0; python_version >= '3.11'", diff --git a/packages/google-cloud-bigquery/testing/constraints-3.9.txt b/packages/google-cloud-bigquery/testing/constraints-3.9.txt index 63b5d8bf6bf5..cb6c29f3b585 100644 --- a/packages/google-cloud-bigquery/testing/constraints-3.9.txt +++ b/packages/google-cloud-bigquery/testing/constraints-3.9.txt @@ -21,7 +21,7 @@ opentelemetry-api==1.1.0 opentelemetry-instrumentation==0.20b0 opentelemetry-sdk==1.1.0 packaging==24.2.0 -pandas==1.1.4 +pandas==1.3.0 pandas-gbq==0.26.1 proto-plus==1.22.3 protobuf==3.20.2 diff --git a/packages/google-cloud-bigquery/tests/system/test_pandas.py b/packages/google-cloud-bigquery/tests/system/test_pandas.py index 01f552435f86..1fe7ff2cdeb2 100644 --- a/packages/google-cloud-bigquery/tests/system/test_pandas.py +++ b/packages/google-cloud-bigquery/tests/system/test_pandas.py @@ -1222,12 +1222,7 @@ def test_list_rows_nullable_scalars_extreme_dtypes_w_custom_dtype( # These pandas dtypes are handled by the custom dtypes. assert df.dtypes["bool_col"].name == "boolean" - # Result is dependent upon which version of pandas is being used. - # Float64 was not introduced until pandas version 1.4. - if PANDAS_INSTALLED_VERSION >= "1.4": - assert df.dtypes["float64_col"].name == "Float64" - else: - assert df.dtypes["float64_col"].name == "string" + assert df.dtypes["float64_col"].name == "Float64" assert df.dtypes["int64_col"].name == "Int64" assert df.dtypes["string_col"].name == "string" diff --git a/packages/google-cloud-bigquery/tests/unit/test_table.py b/packages/google-cloud-bigquery/tests/unit/test_table.py index b846036ab2d4..3588cfba6b66 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_table.py +++ b/packages/google-cloud-bigquery/tests/unit/test_table.py @@ -4143,14 +4143,8 @@ def test_to_dataframe_w_dtypes_mapper(self): ) self.assertEqual(df.name.dtype.name, "string") - # While pyproject.toml lists pandas 1.1 as the lowest supported version of - # pandas, the pip resolver is not able to resolve pandas 1.1 and numpy - if hasattr(pandas, "Float64Dtype"): - self.assertEqual(list(df.miles), [1.77, 6.66, 2.0]) - self.assertEqual(df.miles.dtype.name, "Float64") - else: - self.assertEqual(list(df.miles), ["1.77", "6.66", "2.0"]) - self.assertEqual(df.miles.dtype.name, "string") + self.assertEqual(list(df.miles), [1.77, 6.66, 2.0]) + self.assertEqual(df.miles.dtype.name, "Float64") if hasattr(pandas, "ArrowDtype"): self.assertEqual( From 5a42ace96c92c34477df12f6a624646aec770b6e Mon Sep 17 00:00:00 2001 From: Mend Renovate Date: Thu, 3 Apr 2025 19:50:49 +0200 Subject: [PATCH 1909/2016] chore(deps): update dependency shapely to v2.1.0 (#2155) * chore(deps): update dependency shapely to v2.1.0 * pin Shapely===2.0.7 for python 3.9 --------- Co-authored-by: Lingqing Gan --- .../google-cloud-bigquery/samples/geography/requirements.txt | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/samples/geography/requirements.txt b/packages/google-cloud-bigquery/samples/geography/requirements.txt index 514e19d2c4cd..5fe9005cc6bb 100644 --- a/packages/google-cloud-bigquery/samples/geography/requirements.txt +++ b/packages/google-cloud-bigquery/samples/geography/requirements.txt @@ -34,7 +34,8 @@ pytz==2025.2 PyYAML==6.0.2 requests==2.32.3 rsa==4.9 -Shapely==2.0.7 +Shapely===2.0.7; python_version == '3.9' +Shapely==2.1.0; python_version >= '3.10' six==1.17.0 typing-extensions==4.13.0 typing-inspect==0.9.0 From 24ac7a8d07d7ff780dcc4f661f05d37513cd8233 Mon Sep 17 00:00:00 2001 From: Chelsea Lin Date: Thu, 3 Apr 2025 11:45:08 -0700 Subject: [PATCH 1910/2016] fix: table iterator should not use bqstorage when page_size is not None (#2154) * fix: table iterator should not use bqstorage when page_size is not None * fix dbapi cursor tests --- .../google/cloud/bigquery/table.py | 11 +++++++++-- .../tests/unit/test_dbapi_cursor.py | 1 + .../google-cloud-bigquery/tests/unit/test_table.py | 7 +++++++ 3 files changed, 17 insertions(+), 2 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py index 099f7fd694e3..8a3b6151a697 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py @@ -1873,6 +1873,11 @@ def total_bytes_processed(self) -> Optional[int]: """total bytes processed from job statistics, if present.""" return self._total_bytes_processed + @property + def page_size(self) -> Optional[int]: + """The maximum number of rows in each page of results from this request, if present.""" + return self._page_size + def _is_almost_completely_cached(self): """Check if all results are completely cached. @@ -1924,7 +1929,7 @@ def _should_use_bqstorage(self, bqstorage_client, create_bqstorage_client): if self._is_almost_completely_cached(): return False - if self.max_results is not None: + if self.max_results is not None or self.page_size is not None: return False try: @@ -1994,7 +1999,9 @@ def _maybe_warn_max_results( bqstorage_client: The BigQuery Storage client intended to use for downloading result rows. """ - if bqstorage_client is not None and self.max_results is not None: + if bqstorage_client is not None and ( + self.max_results is not None or self.page_size is not None + ): warnings.warn( "Cannot use bqstorage_client if max_results is set, " "reverting to fetching data with the REST endpoint.", diff --git a/packages/google-cloud-bigquery/tests/unit/test_dbapi_cursor.py b/packages/google-cloud-bigquery/tests/unit/test_dbapi_cursor.py index 6fca4cec0724..cba9030de0b5 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_dbapi_cursor.py +++ b/packages/google-cloud-bigquery/tests/unit/test_dbapi_cursor.py @@ -161,6 +161,7 @@ def _mock_rows( mock_rows, ) mock_rows.max_results = None + mock_rows.page_size = None type(mock_rows).job_id = mock.PropertyMock(return_value="test-job-id") type(mock_rows).location = mock.PropertyMock(return_value="test-location") type(mock_rows).num_dml_affected_rows = mock.PropertyMock( diff --git a/packages/google-cloud-bigquery/tests/unit/test_table.py b/packages/google-cloud-bigquery/tests/unit/test_table.py index 3588cfba6b66..a9966f1ced3c 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_table.py +++ b/packages/google-cloud-bigquery/tests/unit/test_table.py @@ -2693,6 +2693,13 @@ def test__should_use_bqstorage_returns_false_if_max_results_set(self): ) self.assertFalse(result) + def test__should_use_bqstorage_returns_false_if_page_size_set(self): + iterator = self._make_one(page_size=10, first_page_response=None) # not cached + result = iterator._should_use_bqstorage( + bqstorage_client=None, create_bqstorage_client=True + ) + self.assertFalse(result) + def test__should_use_bqstorage_returns_false_w_warning_if_missing_dependency(self): iterator = self._make_one(first_page_response=None) # not cached From b18d00cb5a40c993fffbdca48cb4d4b7764c175a Mon Sep 17 00:00:00 2001 From: Mend Renovate Date: Thu, 3 Apr 2025 23:20:01 +0200 Subject: [PATCH 1911/2016] chore(deps): update dependency typing-extensions to v4.13.1 (#2156) Co-authored-by: Lingqing Gan --- .../google-cloud-bigquery/samples/geography/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/samples/geography/requirements.txt b/packages/google-cloud-bigquery/samples/geography/requirements.txt index 5fe9005cc6bb..37bcdf687e45 100644 --- a/packages/google-cloud-bigquery/samples/geography/requirements.txt +++ b/packages/google-cloud-bigquery/samples/geography/requirements.txt @@ -37,6 +37,6 @@ rsa==4.9 Shapely===2.0.7; python_version == '3.9' Shapely==2.1.0; python_version >= '3.10' six==1.17.0 -typing-extensions==4.13.0 +typing-extensions==4.13.1 typing-inspect==0.9.0 urllib3==2.3.0 From 16ff50551fbd8b0df99a0bfffb60af9b140664fe Mon Sep 17 00:00:00 2001 From: shollyman Date: Thu, 10 Apr 2025 10:13:17 -0700 Subject: [PATCH 1912/2016] feat: add preview support for incremental results (#2145) * feat: add preview support for incremental results Plumbs support to enable incremental results. * fastpath allow * add fastquery test * lint * lint * blacken --- .../google/cloud/bigquery/_job_helpers.py | 1 + .../google/cloud/bigquery/job/query.py | 15 +++++++++++++++ .../tests/unit/job/test_query_config.py | 5 +++++ .../tests/unit/test__job_helpers.py | 12 ++++++++++++ 4 files changed, 33 insertions(+) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/_job_helpers.py b/packages/google-cloud-bigquery/google/cloud/bigquery/_job_helpers.py index a8373c356761..9193f818482f 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/_job_helpers.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/_job_helpers.py @@ -564,6 +564,7 @@ def _supported_by_jobs_query(request_body: Dict[str, Any]) -> bool: "maximumBytesBilled", "requestId", "createSession", + "writeIncrementalResults", } unsupported_keys = request_keys - keys_allowlist diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/job/query.py b/packages/google-cloud-bigquery/google/cloud/bigquery/job/query.py index a27c1053069e..f14039bc0b43 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/job/query.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/job/query.py @@ -674,6 +674,21 @@ def write_disposition(self): def write_disposition(self, value): self._set_sub_prop("writeDisposition", value) + @property + def write_incremental_results(self) -> Optional[bool]: + """This is only supported for a SELECT query using a temporary table. + + If set, the query is allowed to write results incrementally to the temporary result + table. This may incur a performance penalty. This option cannot be used with Legacy SQL. + + This feature is not generally available. + """ + return self._get_sub_prop("writeIncrementalResults") + + @write_incremental_results.setter + def write_incremental_results(self, value): + self._set_sub_prop("writeIncrementalResults", value) + @property def table_definitions(self): """Dict[str, google.cloud.bigquery.external_config.ExternalConfig]: diff --git a/packages/google-cloud-bigquery/tests/unit/job/test_query_config.py b/packages/google-cloud-bigquery/tests/unit/job/test_query_config.py index 7818236f4846..e0878d067b67 100644 --- a/packages/google-cloud-bigquery/tests/unit/job/test_query_config.py +++ b/packages/google-cloud-bigquery/tests/unit/job/test_query_config.py @@ -167,6 +167,11 @@ def test_connection_properties(self): self.assertEqual(config.connection_properties[1].key, "time_zone") self.assertEqual(config.connection_properties[1].value, "America/Chicago") + def test_incremental_results(self): + config = self._get_target_class()() + config.write_incremental_results = True + self.assertEqual(config.write_incremental_results, True) + def test_create_session(self): config = self._get_target_class()() self.assertIsNone(config.create_session) diff --git a/packages/google-cloud-bigquery/tests/unit/test__job_helpers.py b/packages/google-cloud-bigquery/tests/unit/test__job_helpers.py index 96914d9f9680..4fa093c695c3 100644 --- a/packages/google-cloud-bigquery/tests/unit/test__job_helpers.py +++ b/packages/google-cloud-bigquery/tests/unit/test__job_helpers.py @@ -194,6 +194,13 @@ def make_query_response( make_query_request({"maximumBytesBilled": "987654"}), id="job_config-with-maximum_bytes_billed", ), + pytest.param( + job_query.QueryJobConfig( + write_incremental_results=True, + ), + make_query_request({"writeIncrementalResults": True}), + id="job_config-with-incremental-results", + ), ), ) def test__to_query_request(job_config, expected): @@ -1141,6 +1148,11 @@ def test_make_job_id_w_job_id_overrides_prefix(): False, id="priority=BATCH", ), + pytest.param( + job_query.QueryJobConfig(write_incremental_results=True), + True, + id="write_incremental_results", + ), ), ) def test_supported_by_jobs_query_from_queryjobconfig( From bbcea973e4ff71bebc7cad04e7593c4b1d7fba7a Mon Sep 17 00:00:00 2001 From: "gcf-owl-bot[bot]" <78513119+gcf-owl-bot[bot]@users.noreply.github.com> Date: Thu, 10 Apr 2025 10:37:54 -0700 Subject: [PATCH 1913/2016] chore(python): remove .gitignore from templates (#2160) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * chore(python): remove .gitignore from templates Source-Link: https://github.com/googleapis/synthtool/commit/419d94cdddd0d859ac6743ffebd177693c8a027f Post-Processor: gcr.io/cloud-devrel-public-resources/owlbot-python:latest@sha256:a7aef70df5f13313ddc027409fc8f3151422ec2a57ac8730fce8fa75c060d5bb * 🦉 Updates from OwlBot post-processor See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md * remove replacement in owlbot.py --------- Co-authored-by: Owl Bot Co-authored-by: Anthonios Partheniou --- packages/google-cloud-bigquery/.github/.OwlBot.lock.yaml | 4 ++-- packages/google-cloud-bigquery/owlbot.py | 8 -------- 2 files changed, 2 insertions(+), 10 deletions(-) diff --git a/packages/google-cloud-bigquery/.github/.OwlBot.lock.yaml b/packages/google-cloud-bigquery/.github/.OwlBot.lock.yaml index 9d743afe8a83..51b21a62b7b8 100644 --- a/packages/google-cloud-bigquery/.github/.OwlBot.lock.yaml +++ b/packages/google-cloud-bigquery/.github/.OwlBot.lock.yaml @@ -13,5 +13,5 @@ # limitations under the License. docker: image: gcr.io/cloud-devrel-public-resources/owlbot-python:latest - digest: sha256:5581906b957284864632cde4e9c51d1cc66b0094990b27e689132fe5cd036046 -# created: 2025-03-07 + digest: sha256:a7aef70df5f13313ddc027409fc8f3151422ec2a57ac8730fce8fa75c060d5bb +# created: 2025-04-10T17:00:10.042601326Z diff --git a/packages/google-cloud-bigquery/owlbot.py b/packages/google-cloud-bigquery/owlbot.py index fceeaa1b6d5c..8cfa2b097238 100644 --- a/packages/google-cloud-bigquery/owlbot.py +++ b/packages/google-cloud-bigquery/owlbot.py @@ -130,14 +130,6 @@ 'ALL_VERSIONS = ["3.9", "3.10", "3.11", "3.12", "3.13"]', ) - -# ---------------------------------------------------------------------------- -# pytype-related changes -# ---------------------------------------------------------------------------- - -# Add .pytype to .gitignore -s.replace(".gitignore", r"\.pytest_cache", "\\g<0>\n.pytype") - s.shell.run(["nox", "-s", "blacken"], hide_output=False) for noxfile in REPO_ROOT.glob("samples/**/noxfile.py"): s.shell.run(["nox", "-s", "blacken"], cwd=noxfile.parent, hide_output=False) From 61d104a8c042c4bb6ad90cad280dc6db0acee4fb Mon Sep 17 00:00:00 2001 From: Chalmer Lowe Date: Fri, 11 Apr 2025 15:23:01 -0400 Subject: [PATCH 1914/2016] test: adds pytest-xdist to speed up processing of CI/CD checks (#2153) * experimentation using pytest-xdist * adds pytest-xdist to nox system session for experimentation * adds pytest-xdist install AND -n=auto argument * updates sample noxfiles * updates pytest version in requirements-test.txt files * Update samples/notebooks/requirements-test.txt * Update samples/notebooks/requirements-test.txt --- packages/google-cloud-bigquery/noxfile.py | 23 ++++++++++++++++--- .../samples/desktopapp/requirements-test.txt | 1 + .../samples/geography/requirements-test.txt | 1 + .../samples/magics/requirements-test.txt | 1 + .../samples/notebooks/requirements-test.txt | 1 + .../samples/snippets/requirements-test.txt | 1 + 6 files changed, 25 insertions(+), 3 deletions(-) diff --git a/packages/google-cloud-bigquery/noxfile.py b/packages/google-cloud-bigquery/noxfile.py index 1b118836bd6b..c2b4bbb50355 100644 --- a/packages/google-cloud-bigquery/noxfile.py +++ b/packages/google-cloud-bigquery/noxfile.py @@ -98,6 +98,7 @@ def default(session, install_extras=True): "pytest", "google-cloud-testutils", "pytest-cov", + "pytest-xdist", "freezegun", "-c", constraints_path, @@ -129,6 +130,7 @@ def default(session, install_extras=True): # Run py.test against the unit tests. session.run( "py.test", + "-n=auto", "--quiet", "-W default::PendingDeprecationWarning", "--cov=google/cloud/bigquery", @@ -224,7 +226,12 @@ def system(session): # Install all test dependencies, then install local packages in place. session.install( - "pytest", "psutil", "google-cloud-testutils", "-c", constraints_path + "pytest", + "psutil", + "pytest-xdist", + "google-cloud-testutils", + "-c", + constraints_path, ) if os.environ.get("GOOGLE_API_USE_CLIENT_CERTIFICATE", "") == "true": # mTLS test requires pyopenssl and latest google-cloud-storage @@ -257,6 +264,7 @@ def system(session): # Run py.test against the system tests. session.run( "py.test", + "-n=auto", "--quiet", "-W default::PendingDeprecationWarning", os.path.join("tests", "system"), @@ -310,7 +318,9 @@ def snippets(session): ) # Install all test dependencies, then install local packages in place. - session.install("pytest", "google-cloud-testutils", "-c", constraints_path) + session.install( + "pytest", "pytest-xdist", "google-cloud-testutils", "-c", constraints_path + ) session.install("google-cloud-storage", "-c", constraints_path) session.install("grpcio", "-c", constraints_path) @@ -326,9 +336,12 @@ def snippets(session): # Run py.test against the snippets tests. # Skip tests in samples/snippets, as those are run in a different session # using the nox config from that directory. - session.run("py.test", os.path.join("docs", "snippets.py"), *session.posargs) + session.run( + "py.test", "-n=auto", os.path.join("docs", "snippets.py"), *session.posargs + ) session.run( "py.test", + "-n=auto", "samples", "-W default::PendingDeprecationWarning", "--ignore=samples/desktopapp", @@ -393,6 +406,7 @@ def prerelease_deps(session): "google-cloud-testutils", "psutil", "pytest", + "pytest-xdist", "pytest-cov", ) @@ -439,18 +453,21 @@ def prerelease_deps(session): # Run all tests, except a few samples tests which require extra dependencies. session.run( "py.test", + "-n=auto", "tests/unit", "-W default::PendingDeprecationWarning", ) session.run( "py.test", + "-n=auto", "tests/system", "-W default::PendingDeprecationWarning", ) session.run( "py.test", + "-n=auto", "samples/tests", "-W default::PendingDeprecationWarning", ) diff --git a/packages/google-cloud-bigquery/samples/desktopapp/requirements-test.txt b/packages/google-cloud-bigquery/samples/desktopapp/requirements-test.txt index c8290d33f988..183230cf4c03 100644 --- a/packages/google-cloud-bigquery/samples/desktopapp/requirements-test.txt +++ b/packages/google-cloud-bigquery/samples/desktopapp/requirements-test.txt @@ -1,3 +1,4 @@ google-cloud-testutils==1.6.0 pytest==8.3.5 mock==5.2.0 +pytest-xdist==3.6.1 diff --git a/packages/google-cloud-bigquery/samples/geography/requirements-test.txt b/packages/google-cloud-bigquery/samples/geography/requirements-test.txt index 5d20a45547e0..7b01ce8acb7f 100644 --- a/packages/google-cloud-bigquery/samples/geography/requirements-test.txt +++ b/packages/google-cloud-bigquery/samples/geography/requirements-test.txt @@ -1,2 +1,3 @@ pytest==8.3.5 mock==5.2.0 +pytest-xdist==3.6.1 diff --git a/packages/google-cloud-bigquery/samples/magics/requirements-test.txt b/packages/google-cloud-bigquery/samples/magics/requirements-test.txt index c8290d33f988..183230cf4c03 100644 --- a/packages/google-cloud-bigquery/samples/magics/requirements-test.txt +++ b/packages/google-cloud-bigquery/samples/magics/requirements-test.txt @@ -1,3 +1,4 @@ google-cloud-testutils==1.6.0 pytest==8.3.5 mock==5.2.0 +pytest-xdist==3.6.1 diff --git a/packages/google-cloud-bigquery/samples/notebooks/requirements-test.txt b/packages/google-cloud-bigquery/samples/notebooks/requirements-test.txt index c8290d33f988..183230cf4c03 100644 --- a/packages/google-cloud-bigquery/samples/notebooks/requirements-test.txt +++ b/packages/google-cloud-bigquery/samples/notebooks/requirements-test.txt @@ -1,3 +1,4 @@ google-cloud-testutils==1.6.0 pytest==8.3.5 mock==5.2.0 +pytest-xdist==3.6.1 diff --git a/packages/google-cloud-bigquery/samples/snippets/requirements-test.txt b/packages/google-cloud-bigquery/samples/snippets/requirements-test.txt index 197b891874d3..0cf0bb6b4f42 100644 --- a/packages/google-cloud-bigquery/samples/snippets/requirements-test.txt +++ b/packages/google-cloud-bigquery/samples/snippets/requirements-test.txt @@ -2,3 +2,4 @@ google-cloud-testutils==1.6.0 pytest==8.3.5 mock==5.2.0 +pytest-xdist==3.6.1 From 0138b18d3422b4c90eee16ea9917fe60cb9cfe1b Mon Sep 17 00:00:00 2001 From: Chalmer Lowe Date: Wed, 16 Apr 2025 05:20:30 -0400 Subject: [PATCH 1915/2016] feat: adds condition class and assoc. unit tests (#2159) * feat: adds condition class and assoc. unit tests * Updates two test cases for empty string --- .../google/cloud/bigquery/dataset.py | 93 ++++++++++- .../tests/unit/test_dataset.py | 155 ++++++++++++++++++ 2 files changed, 246 insertions(+), 2 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/dataset.py b/packages/google-cloud-bigquery/google/cloud/bigquery/dataset.py index 15a11fb40a69..cc14598fe867 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/dataset.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/dataset.py @@ -19,6 +19,7 @@ import copy import typing +from typing import Optional, List, Dict, Any, Union import google.cloud._helpers # type: ignore @@ -29,8 +30,6 @@ from google.cloud.bigquery.encryption_configuration import EncryptionConfiguration from google.cloud.bigquery import external_config -from typing import Optional, List, Dict, Any, Union - def _get_table_reference(self, table_id: str) -> TableReference: """Constructs a TableReference. @@ -1074,3 +1073,93 @@ def reference(self): model = _get_model_reference routine = _get_routine_reference + + +class Condition(object): + """Represents a textual expression in the Common Expression Language (CEL) syntax. + + Typically used for filtering or policy rules, such as in IAM Conditions + or BigQuery row/column access policies. + + See: + https://cloud.google.com/iam/docs/reference/rest/Shared.Types/Expr + https://github.com/google/cel-spec + + Args: + expression (str): + The condition expression string using CEL syntax. This is required. + Example: ``resource.type == "compute.googleapis.com/Instance"`` + title (Optional[str]): + An optional title for the condition, providing a short summary. + Example: ``"Request is for a GCE instance"`` + description (Optional[str]): + An optional description of the condition, providing a detailed explanation. + Example: ``"This condition checks whether the resource is a GCE instance."`` + """ + + def __init__( + self, + expression: str, + title: Optional[str] = None, + description: Optional[str] = None, + ): + self._properties: Dict[str, Any] = {} + # Use setters to initialize properties, which also handle validation + self.expression = expression + self.title = title + self.description = description + + @property + def title(self) -> Optional[str]: + """Optional[str]: The title for the condition.""" + return self._properties.get("title") + + @title.setter + def title(self, value: Optional[str]): + if value is not None and not isinstance(value, str): + raise ValueError("Pass a string for title, or None") + self._properties["title"] = value + + @property + def description(self) -> Optional[str]: + """Optional[str]: The description for the condition.""" + return self._properties.get("description") + + @description.setter + def description(self, value: Optional[str]): + if value is not None and not isinstance(value, str): + raise ValueError("Pass a string for description, or None") + self._properties["description"] = value + + @property + def expression(self) -> str: + """str: The expression string for the condition.""" + + # Cast assumes expression is always set due to __init__ validation + return typing.cast(str, self._properties.get("expression")) + + @expression.setter + def expression(self, value: str): + if not isinstance(value, str): + raise ValueError("Pass a non-empty string for expression") + if not value: + raise ValueError("expression cannot be an empty string") + self._properties["expression"] = value + + def to_api_repr(self) -> Dict[str, Any]: + """Construct the API resource representation of this Condition.""" + return self._properties + + @classmethod + def from_api_repr(cls, resource: Dict[str, Any]) -> "Condition": + """Factory: construct a Condition instance given its API representation.""" + + # Ensure required fields are present in the resource if necessary + if "expression" not in resource: + raise ValueError("API representation missing required 'expression' field.") + + return cls( + expression=resource["expression"], + title=resource.get("title"), + description=resource.get("description"), + ) diff --git a/packages/google-cloud-bigquery/tests/unit/test_dataset.py b/packages/google-cloud-bigquery/tests/unit/test_dataset.py index 8ab8dffec16f..036e22458326 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_dataset.py +++ b/packages/google-cloud-bigquery/tests/unit/test_dataset.py @@ -19,6 +19,7 @@ import pytest from google.cloud.bigquery.dataset import ( AccessEntry, + Condition, Dataset, DatasetReference, Table, @@ -1228,3 +1229,157 @@ def test_table(self): self.assertEqual(table.table_id, "table_id") self.assertEqual(table.dataset_id, dataset_id) self.assertEqual(table.project, project) + + +class TestCondition: + EXPRESSION = 'resource.name.startsWith("projects/my-project/instances/")' + TITLE = "Instance Access" + DESCRIPTION = "Access to instances in my-project" + + @pytest.fixture + def condition_instance(self): + """Provides a Condition instance for tests.""" + return Condition( + expression=self.EXPRESSION, + title=self.TITLE, + description=self.DESCRIPTION, + ) + + @pytest.fixture + def condition_api_repr(self): + """Provides the API representation for the test Condition.""" + return { + "expression": self.EXPRESSION, + "title": self.TITLE, + "description": self.DESCRIPTION, + } + + # --- Basic Functionality Tests --- + + def test_constructor_and_getters_full(self, condition_instance): + """Test initialization with all arguments and subsequent attribute access.""" + assert condition_instance.expression == self.EXPRESSION + assert condition_instance.title == self.TITLE + assert condition_instance.description == self.DESCRIPTION + + def test_constructor_and_getters_minimal(self): + """Test initialization with only the required expression.""" + condition = Condition(expression=self.EXPRESSION) + assert condition.expression == self.EXPRESSION + assert condition.title is None + assert condition.description is None + + def test_setters(self, condition_instance): + """Test setting attributes after initialization.""" + new_title = "New Title" + new_desc = "New Description" + new_expr = "request.time < timestamp('2024-01-01T00:00:00Z')" + + condition_instance.title = new_title + assert condition_instance.title == new_title + + condition_instance.description = new_desc + assert condition_instance.description == new_desc + + condition_instance.expression = new_expr + assert condition_instance.expression == new_expr + + # Test setting title and description to empty strings + condition_instance.title = "" + assert condition_instance.title == "" + + condition_instance.description = "" + assert condition_instance.description == "" + + # Test setting optional fields back to None + condition_instance.title = None + assert condition_instance.title is None + condition_instance.description = None + assert condition_instance.description is None + + # --- API Representation Tests --- + + def test_to_api_repr_full(self, condition_instance, condition_api_repr): + """Test converting a fully populated Condition to API representation.""" + api_repr = condition_instance.to_api_repr() + assert api_repr == condition_api_repr + + def test_to_api_repr_minimal(self): + """Test converting a minimally populated Condition to API representation.""" + condition = Condition(expression=self.EXPRESSION) + expected_api_repr = { + "expression": self.EXPRESSION, + "title": None, + "description": None, + } + api_repr = condition.to_api_repr() + assert api_repr == expected_api_repr + + def test_from_api_repr_full(self, condition_api_repr): + """Test creating a Condition from a full API representation.""" + condition = Condition.from_api_repr(condition_api_repr) + assert condition.expression == self.EXPRESSION + assert condition.title == self.TITLE + assert condition.description == self.DESCRIPTION + + def test_from_api_repr_minimal(self): + """Test creating a Condition from a minimal API representation.""" + minimal_repr = {"expression": self.EXPRESSION} + condition = Condition.from_api_repr(minimal_repr) + assert condition.expression == self.EXPRESSION + assert condition.title is None + assert condition.description is None + + def test_from_api_repr_with_extra_fields(self): + """Test creating a Condition from an API repr with unexpected fields.""" + api_repr = { + "expression": self.EXPRESSION, + "title": self.TITLE, + "unexpected_field": "some_value", + } + condition = Condition.from_api_repr(api_repr) + assert condition.expression == self.EXPRESSION + assert condition.title == self.TITLE + assert condition.description is None + # Check that the extra field didn't get added to internal properties + assert "unexpected_field" not in condition._properties + + # # --- Validation Tests --- + + @pytest.mark.parametrize( + "kwargs, error_msg", + [ + ({"expression": None}, "Pass a non-empty string for expression"), # type: ignore + ({"expression": ""}, "expression cannot be an empty string"), + ({"expression": 123}, "Pass a non-empty string for expression"), # type: ignore + ({"expression": EXPRESSION, "title": 123}, "Pass a string for title, or None"), # type: ignore + ({"expression": EXPRESSION, "description": False}, "Pass a string for description, or None"), # type: ignore + ], + ) + def test_validation_init(self, kwargs, error_msg): + """Test validation during __init__.""" + with pytest.raises(ValueError, match=error_msg): + Condition(**kwargs) + + @pytest.mark.parametrize( + "attribute, value, error_msg", + [ + ("expression", None, "Pass a non-empty string for expression"), # type: ignore + ("expression", "", "expression cannot be an empty string"), + ("expression", 123, "Pass a non-empty string for expression"), # type: ignore + ("title", 123, "Pass a string for title, or None"), # type: ignore + ("description", [], "Pass a string for description, or None"), # type: ignore + ], + ) + def test_validation_setters(self, condition_instance, attribute, value, error_msg): + """Test validation via setters.""" + with pytest.raises(ValueError, match=error_msg): + setattr(condition_instance, attribute, value) + + def test_validation_expression_required_from_api(self): + """Test ValueError is raised if expression is missing in from_api_repr.""" + api_repr = {"title": self.TITLE} + with pytest.raises( + ValueError, match="API representation missing required 'expression' field." + ): + Condition.from_api_repr(api_repr) From 5c560b9c398fe0d64397651e3a42d356b40a0f16 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tim=20Swe=C3=B1a=20=28Swast=29?= Date: Thu, 24 Apr 2025 15:46:59 -0500 Subject: [PATCH 1916/2016] fix: `query()` now warns when `job_id` is set and the default `job_retry` is ignored (#2167) * fix: `query()` now warns when `job_id` is set and the default `job_retry` is ignored * Update google/cloud/bigquery/client.py * allow None for job_retry in code path that calls jobs.query from client.query * allow None for job_retry in code path that calls jobs.query from client.query * Update tests/unit/test_job_retry.py --- .../google/cloud/bigquery/_job_helpers.py | 42 ++++++++++++++++++- .../google/cloud/bigquery/client.py | 15 ++----- .../tests/unit/test_job_retry.py | 18 +++++--- 3 files changed, 57 insertions(+), 18 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/_job_helpers.py b/packages/google-cloud-bigquery/google/cloud/bigquery/_job_helpers.py index 9193f818482f..4a884ada5f42 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/_job_helpers.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/_job_helpers.py @@ -39,7 +39,9 @@ import functools import os import uuid +import textwrap from typing import Any, Dict, Optional, TYPE_CHECKING, Union +import warnings import google.api_core.exceptions as core_exceptions from google.api_core import retry as retries @@ -198,6 +200,44 @@ def _validate_job_config(request_body: Dict[str, Any], invalid_key: str): raise ValueError(f"got unexpected key {repr(invalid_key)} in job_config") +def validate_job_retry(job_id: Optional[str], job_retry: Optional[retries.Retry]): + """Catch common mistakes, such as setting a job_id and job_retry at the same + time. + """ + if job_id is not None and job_retry is not None: + # TODO(tswast): To avoid breaking changes but still allow a default + # query job retry, we currently only raise if they explicitly set a + # job_retry other than the default. In a future version, we may want to + # avoid this check for DEFAULT_JOB_RETRY and always raise. + if job_retry is not google.cloud.bigquery.retry.DEFAULT_JOB_RETRY: + raise TypeError( + textwrap.dedent( + """ + `job_retry` was provided, but the returned job is + not retryable, because a custom `job_id` was + provided. To customize the job ID and allow for job + retries, set job_id_prefix, instead. + """ + ).strip() + ) + else: + warnings.warn( + textwrap.dedent( + """ + job_retry must be explicitly set to None if job_id is set. + BigQuery cannot retry a failed job by using the exact + same ID. Setting job_id without explicitly disabling + job_retry will raise an error in the future. To avoid this + warning, either use job_id_prefix instead (preferred) or + set job_retry=None. + """ + ).strip(), + category=FutureWarning, + # user code -> client.query / client.query_and_wait -> validate_job_retry + stacklevel=3, + ) + + def _to_query_request( job_config: Optional[job.QueryJobConfig] = None, *, @@ -308,7 +348,7 @@ def query_jobs_query( project: str, retry: retries.Retry, timeout: Optional[float], - job_retry: retries.Retry, + job_retry: Optional[retries.Retry], ) -> job.QueryJob: """Initiate a query using jobs.query with jobCreationMode=JOB_CREATION_REQUIRED. diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py index 8bbdd6c3291f..e7cafc47e869 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py @@ -3388,7 +3388,7 @@ def query( project: Optional[str] = None, retry: retries.Retry = DEFAULT_RETRY, timeout: TimeoutType = DEFAULT_TIMEOUT, - job_retry: retries.Retry = DEFAULT_JOB_RETRY, + job_retry: Optional[retries.Retry] = DEFAULT_JOB_RETRY, api_method: Union[str, enums.QueryApiMethod] = enums.QueryApiMethod.INSERT, ) -> job.QueryJob: """Run a SQL query. @@ -3455,18 +3455,9 @@ def query( class, or if both ``job_id`` and non-``None`` non-default ``job_retry`` are provided. """ - job_id_given = job_id is not None - if ( - job_id_given - and job_retry is not None - and job_retry is not DEFAULT_JOB_RETRY - ): - raise TypeError( - "`job_retry` was provided, but the returned job is" - " not retryable, because a custom `job_id` was" - " provided." - ) + _job_helpers.validate_job_retry(job_id, job_retry) + job_id_given = job_id is not None if job_id_given and api_method == enums.QueryApiMethod.QUERY: raise TypeError( "`job_id` was provided, but the 'QUERY' `api_method` was requested." diff --git a/packages/google-cloud-bigquery/tests/unit/test_job_retry.py b/packages/google-cloud-bigquery/tests/unit/test_job_retry.py index 958986052adb..7144c640bf3a 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_job_retry.py +++ b/packages/google-cloud-bigquery/tests/unit/test_job_retry.py @@ -511,26 +511,34 @@ def api_request(method, path, query_params=None, data=None, **kw): def test_raises_on_job_retry_on_query_with_non_retryable_jobs(client): with pytest.raises( TypeError, - match=re.escape( + match=( "`job_retry` was provided, but the returned job is" " not retryable, because a custom `job_id` was" " provided." - ), + ).replace(" ", r"\s"), ): client.query("select 42", job_id=42, job_retry=google.api_core.retry.Retry()) def test_raises_on_job_retry_on_result_with_non_retryable_jobs(client): client._connection = make_connection({}) - job = client.query("select 42", job_id=42) + + with pytest.warns( + FutureWarning, + match=re.escape("job_retry must be explicitly set to None if job_id is set."), + ): + # Implicitly providing a job_retry is a warning and will be an error in the future. + job = client.query("select 42", job_id=42) + with pytest.raises( TypeError, - match=re.escape( + match=( "`job_retry` was provided, but this job is" " not retryable, because a custom `job_id` was" " provided to the query that created this job." - ), + ).replace(" ", r"\s"), ): + # Explicitly providing a job_retry is an error. job.result(job_retry=google.api_core.retry.Retry()) From ebebdb0f588906a167fe9a1bf83122de0433346c Mon Sep 17 00:00:00 2001 From: shollyman Date: Fri, 25 Apr 2025 10:29:54 -0700 Subject: [PATCH 1917/2016] feat: support BigLakeConfiguration (managed Iceberg tables) (#2162) * feat: support BigLakeConfiguration (managed Iceberg tables) This PR adds the BigLakeConfiguration class to tables, and the necessary property mappings from Table. It also adds some utility enums (BigLakeFileFormat, BigLakeTableFormat) to more easily communicate available values for configuraiton. --- .../google/cloud/bigquery/enums.py | 16 ++ .../google/cloud/bigquery/table.py | 150 ++++++++++++++++ .../tests/unit/test_table.py | 160 ++++++++++++++++++ 3 files changed, 326 insertions(+) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/enums.py b/packages/google-cloud-bigquery/google/cloud/bigquery/enums.py index 5519bc989630..b32fc82009de 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/enums.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/enums.py @@ -387,3 +387,19 @@ def _generate_next_value_(name, start, count, last_values): ROUNDING_MODE_UNSPECIFIED = enum.auto() ROUND_HALF_AWAY_FROM_ZERO = enum.auto() ROUND_HALF_EVEN = enum.auto() + + +class BigLakeFileFormat(object): + FILE_FORMAT_UNSPECIFIED = "FILE_FORMAT_UNSPECIFIED" + """The default unspecified value.""" + + PARQUET = "PARQUET" + """Apache Parquet format.""" + + +class BigLakeTableFormat(object): + TABLE_FORMAT_UNSPECIFIED = "TABLE_FORMAT_UNSPECIFIED" + """The default unspecified value.""" + + ICEBERG = "ICEBERG" + """Apache Iceberg format.""" diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py index 8a3b6151a697..503ca4e71ce8 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py @@ -380,6 +380,7 @@ class Table(_TableBase): _PROPERTY_TO_API_FIELD: Dict[str, Any] = { **_TableBase._PROPERTY_TO_API_FIELD, + "biglake_configuration": "biglakeConfiguration", "clustering_fields": "clustering", "created": "creationTime", "description": "description", @@ -431,6 +432,29 @@ def __init__(self, table_ref, schema=None) -> None: reference = property(_reference_getter) + @property + def biglake_configuration(self): + """google.cloud.bigquery.table.BigLakeConfiguration: Configuration + for managed tables for Apache Iceberg. + + See https://cloud.google.com/bigquery/docs/iceberg-tables for more information. + """ + prop = self._properties.get( + self._PROPERTY_TO_API_FIELD["biglake_configuration"] + ) + if prop is not None: + prop = BigLakeConfiguration.from_api_repr(prop) + return prop + + @biglake_configuration.setter + def biglake_configuration(self, value): + api_repr = value + if value is not None: + api_repr = value.to_api_repr() + self._properties[ + self._PROPERTY_TO_API_FIELD["biglake_configuration"] + ] = api_repr + @property def require_partition_filter(self): """bool: If set to true, queries over the partitioned table require a @@ -3501,6 +3525,132 @@ def to_api_repr(self) -> Dict[str, Any]: return resource +class BigLakeConfiguration(object): + """Configuration for managed tables for Apache Iceberg, formerly + known as BigLake. + + Args: + connection_id (Optional[str]): + The connection specifying the credentials to be used to read and write to external + storage, such as Cloud Storage. The connection_id can have the form + ``{project}.{location}.{connection_id}`` or + ``projects/{project}/locations/{location}/connections/{connection_id}``. + storage_uri (Optional[str]): + The fully qualified location prefix of the external folder where table data is + stored. The '*' wildcard character is not allowed. The URI should be in the + format ``gs://bucket/path_to_table/``. + file_format (Optional[str]): + The file format the table data is stored in. See BigLakeFileFormat for available + values. + table_format (Optional[str]): + The table format the metadata only snapshots are stored in. See BigLakeTableFormat + for available values. + _properties (Optional[dict]): + Private. Used to construct object from API resource. + """ + + def __init__( + self, + connection_id: Optional[str] = None, + storage_uri: Optional[str] = None, + file_format: Optional[str] = None, + table_format: Optional[str] = None, + _properties: Optional[dict] = None, + ) -> None: + if _properties is None: + _properties = {} + self._properties = _properties + if connection_id is not None: + self.connection_id = connection_id + if storage_uri is not None: + self.storage_uri = storage_uri + if file_format is not None: + self.file_format = file_format + if table_format is not None: + self.table_format = table_format + + @property + def connection_id(self) -> Optional[str]: + """str: The connection specifying the credentials to be used to read and write to external + storage, such as Cloud Storage.""" + return self._properties.get("connectionId") + + @connection_id.setter + def connection_id(self, value: Optional[str]): + self._properties["connectionId"] = value + + @property + def storage_uri(self) -> Optional[str]: + """str: The fully qualified location prefix of the external folder where table data is + stored.""" + return self._properties.get("storageUri") + + @storage_uri.setter + def storage_uri(self, value: Optional[str]): + self._properties["storageUri"] = value + + @property + def file_format(self) -> Optional[str]: + """str: The file format the table data is stored in. See BigLakeFileFormat for available + values.""" + return self._properties.get("fileFormat") + + @file_format.setter + def file_format(self, value: Optional[str]): + self._properties["fileFormat"] = value + + @property + def table_format(self) -> Optional[str]: + """str: The table format the metadata only snapshots are stored in. See BigLakeTableFormat + for available values.""" + return self._properties.get("tableFormat") + + @table_format.setter + def table_format(self, value: Optional[str]): + self._properties["tableFormat"] = value + + def _key(self): + return tuple(sorted(self._properties.items())) + + def __eq__(self, other): + if not isinstance(other, BigLakeConfiguration): + return NotImplemented + return self._key() == other._key() + + def __ne__(self, other): + return not self == other + + def __hash__(self): + return hash(self._key()) + + def __repr__(self): + key_vals = ["{}={}".format(key, val) for key, val in self._key()] + return "BigLakeConfiguration({})".format(",".join(key_vals)) + + @classmethod + def from_api_repr(cls, resource: Dict[str, Any]) -> "BigLakeConfiguration": + """Factory: construct a BigLakeConfiguration given its API representation. + + Args: + resource: + BigLakeConfiguration representation returned from the API + + Returns: + BigLakeConfiguration parsed from ``resource``. + """ + ref = cls() + ref._properties = resource + return ref + + def to_api_repr(self) -> Dict[str, Any]: + """Construct the API resource representation of this BigLakeConfiguration. + + Returns: + BigLakeConfiguration represented as an API resource. + """ + return copy.deepcopy(self._properties) + + def _item_to_row(iterator, resource): """Convert a JSON row to the native object. diff --git a/packages/google-cloud-bigquery/tests/unit/test_table.py b/packages/google-cloud-bigquery/tests/unit/test_table.py index a9966f1ced3c..2530065478f2 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_table.py +++ b/packages/google-cloud-bigquery/tests/unit/test_table.py @@ -435,6 +435,12 @@ def _make_resource(self): "sourceFormat": "CSV", "csvOptions": {"allowJaggedRows": True, "encoding": "encoding"}, }, + "biglakeConfiguration": { + "connectionId": "connection", + "storageUri": "uri", + "fileFormat": "PARQUET", + "tableFormat": "ICEBERG", + }, "labels": {"x": "y"}, } @@ -521,6 +527,15 @@ def _verifyResourceProperties(self, table, resource): else: self.assertIsNone(table.encryption_configuration) + if "biglakeConfiguration" in resource: + self.assertIsNotNone(table.biglake_configuration) + self.assertEqual(table.biglake_configuration.connection_id, "connection") + self.assertEqual(table.biglake_configuration.storage_uri, "uri") + self.assertEqual(table.biglake_configuration.file_format, "PARQUET") + self.assertEqual(table.biglake_configuration.table_format, "ICEBERG") + else: + self.assertIsNone(table.biglake_configuration) + def test_ctor(self): dataset = DatasetReference(self.PROJECT, self.DS_ID) table_ref = dataset.table(self.TABLE_NAME) @@ -893,6 +908,60 @@ def test_table_constraints_property_getter(self): assert isinstance(table_constraints, TableConstraints) assert table_constraints.primary_key == PrimaryKey(columns=["id"]) + def test_biglake_configuration_not_set(self): + dataset = DatasetReference(self.PROJECT, self.DS_ID) + table_ref = dataset.table(self.TABLE_NAME) + table = self._make_one(table_ref) + + assert table.biglake_configuration is None + + def test_biglake_configuration_set(self): + from google.cloud.bigquery.table import BigLakeConfiguration + + dataset = DatasetReference(self.PROJECT, self.DS_ID) + table_ref = dataset.table(self.TABLE_NAME) + table = self._make_one(table_ref) + + table._properties["biglakeConfiguration"] = { + "connectionId": "connection", + "storageUri": "uri", + "fileFormat": "PARQUET", + "tableFormat": "ICEBERG", + } + + config = table.biglake_configuration + + assert isinstance(config, BigLakeConfiguration) + assert config.connection_id == "connection" + assert config.storage_uri == "uri" + assert config.file_format == "PARQUET" + assert config.table_format == "ICEBERG" + + def test_biglake_configuration_property_setter(self): + from google.cloud.bigquery.table import BigLakeConfiguration + + dataset = DatasetReference(self.PROJECT, self.DS_ID) + table_ref = dataset.table(self.TABLE_NAME) + table = self._make_one(table_ref) + + config = BigLakeConfiguration( + connection_id="connection", + storage_uri="uri", + file_format="PARQUET", + table_format="ICEBERG", + ) + table.biglake_configuration = config + + assert table._properties["biglakeConfiguration"] == { + "connectionId": "connection", + "storageUri": "uri", + "fileFormat": "PARQUET", + "tableFormat": "ICEBERG", + } + + table.biglake_configuration = None + assert table.biglake_configuration is None + def test_table_constraints_property_setter(self): from google.cloud.bigquery.table import ( ColumnReference, @@ -2166,6 +2235,97 @@ def test_ctor_full_resource(self): assert instance.snapshot_time == expected_time +class TestBigLakeConfiguration(unittest.TestCase): + @staticmethod + def _get_target_class(): + from google.cloud.bigquery.table import BigLakeConfiguration + + return BigLakeConfiguration + + @classmethod + def _make_one(cls, *args, **kwargs): + klass = cls._get_target_class() + return klass(*args, **kwargs) + + def test_ctor_empty_resource(self): + instance = self._make_one() + self.assertIsNone(instance.connection_id) + self.assertIsNone(instance.storage_uri) + self.assertIsNone(instance.file_format) + self.assertIsNone(instance.table_format) + + def test_ctor_kwargs(self): + instance = self._make_one( + connection_id="conn", + storage_uri="uri", + file_format="FILE", + table_format="TABLE", + ) + self.assertEqual(instance.connection_id, "conn") + self.assertEqual(instance.storage_uri, "uri") + self.assertEqual(instance.file_format, "FILE") + self.assertEqual(instance.table_format, "TABLE") + + def test_ctor_full_resource(self): + resource = { + "connectionId": "conn", + "storageUri": "uri", + "fileFormat": "FILE", + "tableFormat": "TABLE", + } + instance = self._make_one(_properties=resource) + self.assertEqual(instance.connection_id, "conn") + self.assertEqual(instance.storage_uri, "uri") + self.assertEqual(instance.file_format, "FILE") + self.assertEqual(instance.table_format, "TABLE") + + def test_to_api_repr(self): + resource = { + "connectionId": "conn", + "storageUri": "uri", + "fileFormat": "FILE", + "tableFormat": "TABLE", + } + instance = self._make_one(_properties=resource) + self.assertEqual(instance.to_api_repr(), resource) + + def test_from_api_repr_partial(self): + klass = self._get_target_class() + api_repr = {"fileFormat": "FILE"} + instance = klass.from_api_repr(api_repr) + + self.assertIsNone(instance.connection_id) + self.assertIsNone(instance.storage_uri) + self.assertEqual(instance.file_format, "FILE") + self.assertIsNone(instance.table_format) + + def test_comparisons(self): + resource = { + "connectionId": "conn", + "storageUri": "uri", + "fileFormat": "FILE", + "tableFormat": "TABLE", + } + + first = self._make_one(_properties=resource) + second = self._make_one(_properties=copy.deepcopy(resource)) + # Exercise comparator overloads. + # first and second should be equivalent. + self.assertNotEqual(first, resource) + self.assertEqual(first, second) + self.assertEqual(hash(first), hash(second)) + + # Update second to ensure that first and second are no longer equivalent. + second.connection_id = "foo" + self.assertNotEqual(first, second) + self.assertNotEqual(hash(first), hash(second)) + + # Update first with the same change, restoring equivalence. + first.connection_id = "foo" + self.assertEqual(first, second) + self.assertEqual(hash(first), hash(second)) + + class TestCloneDefinition: @staticmethod def _get_target_class(): From dadc113dcad061e0a8c1bc94024fd5db93f8702c Mon Sep 17 00:00:00 2001 From: Chalmer Lowe Date: Tue, 29 Apr 2025 09:16:36 -0400 Subject: [PATCH 1918/2016] feat: Update the AccessEntry class with a new condition attribute and unit tests (#2163) * feat: adds condition class and assoc. unit tests * Updates AccessEntry with condition setter/getter * Adds condition attr to AccessEntry and unit tests * adds tests for Condition dunder methods to ensure coverage * moves the entity_type logic out of _from_api_repr to entity_type setter * Updates logic in entity_type getter * updates several AccessEntry related tests * Updates AccessEntry condition setter test to use a dict * udpates entity_id handling * Updates _entity_type access * tweaks type hinting * Update tests/unit/test_dataset.py * Update tests/unit/test_dataset.py * Updates DatasetReference in test and __eq__ check * remove debug print statement --- .../google/cloud/bigquery/dataset.py | 126 ++++++- .../tests/unit/test_dataset.py | 336 +++++++++++++++++- 2 files changed, 432 insertions(+), 30 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/dataset.py b/packages/google-cloud-bigquery/google/cloud/bigquery/dataset.py index cc14598fe867..670fe127c59d 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/dataset.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/dataset.py @@ -298,12 +298,15 @@ def __init__( role: Optional[str] = None, entity_type: Optional[str] = None, entity_id: Optional[Union[Dict[str, Any], str]] = None, + **kwargs, ): - self._properties = {} + self._properties: Dict[str, Any] = {} if entity_type is not None: self._properties[entity_type] = entity_id self._properties["role"] = role - self._entity_type = entity_type + self._entity_type: Optional[str] = entity_type + for prop, val in kwargs.items(): + setattr(self, prop, val) @property def role(self) -> Optional[str]: @@ -330,6 +333,9 @@ def dataset(self, value): if isinstance(value, str): value = DatasetReference.from_string(value).to_api_repr() + if isinstance(value, DatasetReference): + value = value.to_api_repr() + if isinstance(value, (Dataset, DatasetListItem)): value = value.reference.to_api_repr() @@ -437,15 +443,65 @@ def special_group(self) -> Optional[str]: def special_group(self, value): self._properties["specialGroup"] = value + @property + def condition(self) -> Optional["Condition"]: + """Optional[Condition]: The IAM condition associated with this entry.""" + value = typing.cast(Dict[str, Any], self._properties.get("condition")) + return Condition.from_api_repr(value) if value else None + + @condition.setter + def condition(self, value: Union["Condition", dict, None]): + """Set the IAM condition for this entry.""" + if value is None: + self._properties["condition"] = None + elif isinstance(value, Condition): + self._properties["condition"] = value.to_api_repr() + elif isinstance(value, dict): + self._properties["condition"] = value + else: + raise TypeError("condition must be a Condition object, dict, or None") + @property def entity_type(self) -> Optional[str]: """The entity_type of the entry.""" + + # The api_repr for an AccessEntry object is expected to be a dict with + # only a few keys. Two keys that may be present are role and condition. + # Any additional key is going to have one of ~eight different names: + # userByEmail, groupByEmail, domain, dataset, specialGroup, view, + # routine, iamMember + + # if self._entity_type is None, see if it needs setting + # i.e. is there a key: value pair that should be associated with + # entity_type and entity_id? + if self._entity_type is None: + resource = self._properties.copy() + # we are empyting the dict to get to the last `key: value`` pair + # so we don't keep these first entries + _ = resource.pop("role", None) + _ = resource.pop("condition", None) + + try: + # we only need entity_type, because entity_id gets set elsewhere. + entity_type, _ = resource.popitem() + except KeyError: + entity_type = None + + self._entity_type = entity_type + return self._entity_type @property def entity_id(self) -> Optional[Union[Dict[str, Any], str]]: """The entity_id of the entry.""" - return self._properties.get(self._entity_type) if self._entity_type else None + if self.entity_type: + entity_type = self.entity_type + else: + return None + return typing.cast( + Optional[Union[Dict[str, Any], str]], + self._properties.get(entity_type, None), + ) def __eq__(self, other): if not isinstance(other, AccessEntry): @@ -464,7 +520,16 @@ def _key(self): Returns: Tuple: The contents of this :class:`~google.cloud.bigquery.dataset.AccessEntry`. """ + properties = self._properties.copy() + + # Dicts are not hashable. + # Convert condition to a hashable datatype(s) + condition = properties.get("condition") + if isinstance(condition, dict): + condition_key = tuple(sorted(condition.items())) + properties["condition"] = condition_key + prop_tup = tuple(sorted(properties.items())) return (self.role, self._entity_type, self.entity_id, prop_tup) @@ -491,19 +556,11 @@ def from_api_repr(cls, resource: dict) -> "AccessEntry": Returns: google.cloud.bigquery.dataset.AccessEntry: Access entry parsed from ``resource``. - - Raises: - ValueError: - If the resource has more keys than ``role`` and one additional - key. """ - entry = resource.copy() - role = entry.pop("role", None) - entity_type, entity_id = entry.popitem() - if len(entry) != 0: - raise ValueError("Entry has unexpected keys remaining.", entry) - return cls(role, entity_type, entity_id) + access_entry = cls() + access_entry._properties = resource.copy() + return access_entry class Dataset(object): @@ -1160,6 +1217,43 @@ def from_api_repr(cls, resource: Dict[str, Any]) -> "Condition": return cls( expression=resource["expression"], - title=resource.get("title"), - description=resource.get("description"), + title=resource.get("title", None), + description=resource.get("description", None), ) + + def __eq__(self, other: object) -> bool: + """Check for equality based on expression, title, and description.""" + if not isinstance(other, Condition): + return NotImplemented + return self._key() == other._key() + + def _key(self): + """A tuple key that uniquely describes this field. + Used to compute this instance's hashcode and evaluate equality. + Returns: + Tuple: The contents of this :class:`~google.cloud.bigquery.dataset.AccessEntry`. + """ + + properties = self._properties.copy() + + # Dicts are not hashable. + # Convert object to a hashable datatype(s) + prop_tup = tuple(sorted(properties.items())) + return prop_tup + + def __ne__(self, other: object) -> bool: + """Check for inequality.""" + return not self == other + + def __hash__(self) -> int: + """Generate a hash based on expression, title, and description.""" + return hash(self._key()) + + def __repr__(self) -> str: + """Return a string representation of the Condition object.""" + parts = [f"expression={self.expression!r}"] + if self.title is not None: + parts.append(f"title={self.title!r}") + if self.description is not None: + parts.append(f"description={self.description!r}") + return f"Condition({', '.join(parts)})" diff --git a/packages/google-cloud-bigquery/tests/unit/test_dataset.py b/packages/google-cloud-bigquery/tests/unit/test_dataset.py index 036e22458326..51f1809bfe73 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_dataset.py +++ b/packages/google-cloud-bigquery/tests/unit/test_dataset.py @@ -167,7 +167,10 @@ def test_from_api_repr_wo_role(self): entity_type="view", entity_id=resource["view"], ) - self.assertEqual(entry, exp_entry) + + assert entry.entity_type == exp_entry.entity_type + assert entry.entity_id == exp_entry.entity_id + assert entry.role is None def test_to_api_repr_w_extra_properties(self): resource = { @@ -179,15 +182,6 @@ def test_to_api_repr_w_extra_properties(self): exp_resource = entry.to_api_repr() self.assertEqual(resource, exp_resource) - def test_from_api_repr_entries_w_extra_keys(self): - resource = { - "role": "READER", - "specialGroup": "projectReaders", - "userByEmail": "salmon@example.com", - } - with self.assertRaises(ValueError): - self._get_target_class().from_api_repr(resource) - def test_view_getter_setter(self): view = { "projectId": "my_project", @@ -307,7 +301,10 @@ def test_dataset_getter_setter_dataset_ref(self): entry.dataset = dataset_ref resource = entry.to_api_repr() exp_resource = { - "dataset": {"dataset": dataset_ref, "targetTypes": None}, + "dataset": { + "dataset": {"datasetId": "my_dataset", "projectId": "my-project"}, + "targetTypes": None, + }, "role": None, } self.assertEqual(resource, exp_resource) @@ -494,6 +491,262 @@ def test_dataset_target_types_getter_setter_w_dataset(self): self.assertEqual(entry.dataset_target_types, target_types) +# --- Tests for AccessEntry when using Condition --- + +EXPRESSION = "request.time < timestamp('2026-01-01T00:00:00Z')" +TITLE = "Expires end 2025" +DESCRIPTION = "Access expires at the start of 2026." + + +@pytest.fixture +def condition_1(): + """Provides a sample Condition object.""" + return Condition( + expression=EXPRESSION, + title=TITLE, + description=DESCRIPTION, + ) + + +@pytest.fixture +def condition_1_api_repr(): + """Provides the API representation for condition_1.""" + # Use the actual to_api_repr method + return Condition( + expression=EXPRESSION, + title=TITLE, + description=DESCRIPTION, + ).to_api_repr() + + +@pytest.fixture +def condition_2(): + """Provides a second, different Condition object.""" + return Condition( + expression="resource.name.startsWith('projects/_/buckets/restricted/')", + title="Restricted Buckets", + ) + + +@pytest.fixture +def condition_2_api_repr(): + """Provides the API representation for condition2.""" + # Use the actual to_api_repr method + return Condition( + expression="resource.name.startsWith('projects/_/buckets/restricted/')", + title="Restricted Buckets", + ).to_api_repr() + + +class TestAccessEntryAndCondition: + @staticmethod + def _get_target_class(): + return AccessEntry + + def _make_one(self, *args, **kw): + return self._get_target_class()(*args, **kw) + + # Test __init__ without condition + def test_init_without_condition(self): + entry = AccessEntry("READER", "userByEmail", "test@example.com") + assert entry.role == "READER" + assert entry.entity_type == "userByEmail" + assert entry.entity_id == "test@example.com" + assert entry.condition is None + # Accessing _properties is for internal verification in tests + assert "condition" not in entry._properties + + # Test __init__ with condition object + def test_init_with_condition_object(self, condition_1, condition_1_api_repr): + entry = AccessEntry( + "READER", "userByEmail", "test@example.com", condition=condition_1 + ) + assert entry.condition == condition_1 + assert entry._properties.get("condition") == condition_1_api_repr + + # Test __init__ with condition=None + def test_init_with_condition_none(self): + entry = AccessEntry("READER", "userByEmail", "test@example.com", condition=None) + assert entry.condition is None + + # Test condition getter/setter + def test_condition_getter_setter( + self, condition_1, condition_1_api_repr, condition_2, condition_2_api_repr + ): + entry = AccessEntry("WRITER", "group", "admins@example.com") + assert entry.condition is None + + # Set condition 1 + entry.condition = condition_1 + assert entry.condition.to_api_repr() == condition_1_api_repr + assert entry._properties.get("condition") == condition_1_api_repr + + # Set condition 2 + entry.condition = condition_2 + assert entry.condition.to_api_repr() == condition_2_api_repr + assert entry._properties.get("condition") != condition_1_api_repr + assert entry._properties.get("condition") == condition_2.to_api_repr() + + # Set back to None + entry.condition = None + assert entry.condition is None + + # Set condition using a dict + entry.condition = condition_1_api_repr + assert entry._properties.get("condition") == condition_1_api_repr + + # Test setter validation + def test_condition_setter_invalid_type(self): + entry = AccessEntry("READER", "domain", "example.com") + with pytest.raises( + TypeError, match="condition must be a Condition object, dict, or None" + ): + entry.condition = 123 # type: ignore + + # Test equality/hash without condition + def test_equality_and_hash_without_condition(self): + entry1 = AccessEntry("OWNER", "specialGroup", "projectOwners") + entry2 = AccessEntry("OWNER", "specialGroup", "projectOwners") + entry3 = AccessEntry("WRITER", "specialGroup", "projectOwners") + assert entry1 == entry2 + assert entry1 != entry3 + assert hash(entry1) == hash(entry2) + assert hash(entry1) != hash(entry3) # Usually true + + def test_equality_and_hash_with_condition(self, condition_1, condition_2): + cond1a = Condition( + condition_1.expression, condition_1.title, condition_1.description + ) + cond1b = Condition( + condition_1.expression, condition_1.title, condition_1.description + ) # Same values, different object + + entry1a = AccessEntry( + "READER", "userByEmail", "a@example.com", condition=cond1a + ) + entry1b = AccessEntry( + "READER", "userByEmail", "a@example.com", condition=cond1b + ) # Different Condition instance + entry2 = AccessEntry( + "READER", "userByEmail", "a@example.com", condition=condition_2 + ) + entry3 = AccessEntry("READER", "userByEmail", "a@example.com") # No condition + entry4 = AccessEntry( + "WRITER", "userByEmail", "a@example.com", condition=cond1a + ) # Different role + + assert entry1a == entry1b + assert entry1a != entry2 + assert entry1a != entry3 + assert entry1a != entry4 + assert entry2 != entry3 + + assert hash(entry1a) == hash(entry1b) + assert hash(entry1a) != hash(entry2) # Usually true + assert hash(entry1a) != hash(entry3) # Usually true + assert hash(entry1a) != hash(entry4) # Usually true + + # Test to_api_repr with condition + def test_to_api_repr_with_condition(self, condition_1, condition_1_api_repr): + entry = AccessEntry( + "WRITER", "groupByEmail", "editors@example.com", condition=condition_1 + ) + expected_repr = { + "role": "WRITER", + "groupByEmail": "editors@example.com", + "condition": condition_1_api_repr, + } + assert entry.to_api_repr() == expected_repr + + def test_view_property_with_condition(self, condition_1): + """Test setting/getting view property when condition is present.""" + entry = AccessEntry(role=None, entity_type="view", condition=condition_1) + view_ref = TableReference(DatasetReference("proj", "dset"), "view_tbl") + entry.view = view_ref # Use the setter + assert entry.view == view_ref + assert entry.condition == condition_1 # Condition should persist + assert entry.role is None + assert entry.entity_type == "view" + + # Check internal representation + assert "view" in entry._properties + assert "condition" in entry._properties + + def test_user_by_email_property_with_condition(self, condition_1): + """Test setting/getting user_by_email property when condition is present.""" + entry = AccessEntry( + role="READER", entity_type="userByEmail", condition=condition_1 + ) + email = "test@example.com" + entry.user_by_email = email # Use the setter + assert entry.user_by_email == email + assert entry.condition == condition_1 # Condition should persist + assert entry.role == "READER" + assert entry.entity_type == "userByEmail" + + # Check internal representation + assert "userByEmail" in entry._properties + assert "condition" in entry._properties + + # Test from_api_repr without condition + def test_from_api_repr_without_condition(self): + api_repr = {"role": "OWNER", "userByEmail": "owner@example.com"} + entry = AccessEntry.from_api_repr(api_repr) + assert entry.role == "OWNER" + assert entry.entity_type == "userByEmail" + assert entry.entity_id == "owner@example.com" + assert entry.condition is None + + # Test from_api_repr with condition + def test_from_api_repr_with_condition(self, condition_1, condition_1_api_repr): + api_repr = { + "role": "READER", + "view": {"projectId": "p", "datasetId": "d", "tableId": "v"}, + "condition": condition_1_api_repr, + } + entry = AccessEntry.from_api_repr(api_repr) + assert entry.role == "READER" + assert entry.entity_type == "view" + # The entity_id for view/routine/dataset is the dict itself + assert entry.entity_id == {"projectId": "p", "datasetId": "d", "tableId": "v"} + assert entry.condition == condition_1 + + # Test from_api_repr edge case + def test_from_api_repr_no_entity(self, condition_1, condition_1_api_repr): + api_repr = {"role": "READER", "condition": condition_1_api_repr} + entry = AccessEntry.from_api_repr(api_repr) + assert entry.role == "READER" + assert entry.entity_type is None + assert entry.entity_id is None + assert entry.condition == condition_1 + + def test_dataset_property_with_condition(self, condition_1): + project = "my-project" + dataset_id = "my_dataset" + dataset_ref = DatasetReference(project, dataset_id) + entry = self._make_one(None) + entry.dataset = dataset_ref + entry.condition = condition_1 + + resource = entry.to_api_repr() + exp_resource = { + "role": None, + "dataset": { + "dataset": {"datasetId": "my_dataset", "projectId": "my-project"}, + "targetTypes": None, + }, + "condition": { + "expression": "request.time < timestamp('2026-01-01T00:00:00Z')", + "title": "Expires end 2025", + "description": "Access expires at the start of 2026.", + }, + } + assert resource == exp_resource + # Check internal representation + assert "dataset" in entry._properties + assert "condition" in entry._properties + + class TestDatasetReference(unittest.TestCase): @staticmethod def _get_target_class(): @@ -821,7 +1074,15 @@ def test_ctor_explicit(self): self.assertEqual( dataset.path, "/projects/%s/datasets/%s" % (OTHER_PROJECT, self.DS_ID) ) - self.assertEqual(dataset.access_entries, entries) + # creating a list of entries relies on AccessEntry.from_api_repr + # which does not create an object in exactly the same way as calling the + # class directly. We rely on calls to .entity_type and .entity_id to + # finalize the settings on each class. + entry_pairs = zip(dataset.access_entries, entries) + for pair in entry_pairs: + assert pair[0].role == pair[1].role + assert pair[0].entity_type == pair[1].entity_type + assert pair[0].entity_id == pair[1].entity_id self.assertIsNone(dataset.created) self.assertIsNone(dataset.full_dataset_id) @@ -854,8 +1115,18 @@ def test_access_entries_setter(self): dataset = self._make_one(self.DS_REF) phred = AccessEntry("OWNER", "userByEmail", "phred@example.com") bharney = AccessEntry("OWNER", "userByEmail", "bharney@example.com") - dataset.access_entries = [phred, bharney] - self.assertEqual(dataset.access_entries, [phred, bharney]) + entries = [phred, bharney] + dataset.access_entries = entries + + # creating a list of entries relies on AccessEntry.from_api_repr + # which does not create an object in exactly the same way as calling the + # class directly. We rely on calls to .entity_type and .entity_id to + # finalize the settings on each class. + entry_pairs = zip(dataset.access_entries, entries) + for pair in entry_pairs: + assert pair[0].role == pair[1].role + assert pair[0].entity_type == pair[1].entity_type + assert pair[0].entity_id == pair[1].entity_id def test_default_partition_expiration_ms(self): dataset = self._make_one("proj.dset") @@ -1383,3 +1654,40 @@ def test_validation_expression_required_from_api(self): ValueError, match="API representation missing required 'expression' field." ): Condition.from_api_repr(api_repr) + + def test___eq___equality(self, condition_1): + result = condition_1 + expected = condition_1 + assert result == expected + + def test___eq___equality_not_condition(self, condition_1): + result = condition_1 + other = "not a condition" + expected = result.__eq__(other) + assert expected is NotImplemented + + def test__ne__not_equality(self): + result = condition_1 + expected = condition_2 + assert result != expected + + def test__hash__function(self, condition_2): + cond1 = Condition( + expression=self.EXPRESSION, title=self.TITLE, description=self.DESCRIPTION + ) + cond2 = cond1 + cond_not_equal = condition_2 + assert cond1 == cond2 + assert cond1 is cond2 + assert hash(cond1) == hash(cond2) + assert hash(cond1) is not None + assert cond_not_equal != cond1 + assert hash(cond_not_equal) != hash(cond1) + + def test__hash__with_minimal_inputs(self): + cond1 = Condition( + expression="example", + title=None, + description=None, + ) + assert hash(cond1) is not None From af0121c756f938e79e2d25aa8d67d3ecac06b034 Mon Sep 17 00:00:00 2001 From: Chalmer Lowe Date: Thu, 1 May 2025 17:20:21 -0400 Subject: [PATCH 1919/2016] feat: add dataset access policy version attribute (#2169) * feat: adds condition class and assoc. unit tests * Updates two test cases for empty string * Updates tests for clarity * Updates access_policy_version setter and unittest --- .../google/cloud/bigquery/dataset.py | 15 ++++++++-- .../tests/unit/test_dataset.py | 30 +++++++++++++++++++ 2 files changed, 43 insertions(+), 2 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/dataset.py b/packages/google-cloud-bigquery/google/cloud/bigquery/dataset.py index 670fe127c59d..d225b710657f 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/dataset.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/dataset.py @@ -589,6 +589,7 @@ class Dataset(object): "default_rounding_mode": "defaultRoundingMode", "resource_tags": "resourceTags", "external_catalog_dataset_options": "externalCatalogDatasetOptions", + "access_policy_version": "accessPolicyVersion", } def __init__(self, dataset_ref) -> None: @@ -979,6 +980,16 @@ def external_catalog_dataset_options(self, value): self._PROPERTY_TO_API_FIELD["external_catalog_dataset_options"] ] = (value.to_api_repr() if value is not None else None) + @property + def access_policy_version(self): + return self._properties.get("accessPolicyVersion") + + @access_policy_version.setter + def access_policy_version(self, value): + if not isinstance(value, int) and value is not None: + raise ValueError("Pass an integer, or None") + self._properties["accessPolicyVersion"] = value + @classmethod def from_string(cls, full_dataset_id: str) -> "Dataset": """Construct a dataset from fully-qualified dataset ID. @@ -1217,8 +1228,8 @@ def from_api_repr(cls, resource: Dict[str, Any]) -> "Condition": return cls( expression=resource["expression"], - title=resource.get("title", None), - description=resource.get("description", None), + title=resource.get("title"), + description=resource.get("description"), ) def __eq__(self, other: object) -> bool: diff --git a/packages/google-cloud-bigquery/tests/unit/test_dataset.py b/packages/google-cloud-bigquery/tests/unit/test_dataset.py index 51f1809bfe73..9414308276b5 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_dataset.py +++ b/packages/google-cloud-bigquery/tests/unit/test_dataset.py @@ -1049,6 +1049,7 @@ def test_ctor_defaults(self): self.assertIsNone(dataset.friendly_name) self.assertIsNone(dataset.location) self.assertEqual(dataset.is_case_insensitive, False) + self.assertIsNone(dataset.access_policy_version) def test_ctor_string(self): dataset = self._make_one("some-project.some_dset") @@ -1423,6 +1424,35 @@ def test_external_catalog_dataset_options_to_api_repr(self): expected = api_repr["externalCatalogDatasetOptions"] assert result == expected + def test_access_policy_version_valid_input(self): + dataset = self._make_one(self.DS_REF) + # Valid inputs for access_policy_version are currently + # ints 1, 2, 3, and None + # We rely upon the BQ backend to validate acceptable integer + # values, rather than perform that validation in the client. + for expected in [1, 2, 3, None]: + # set property using setter and integer + dataset.access_policy_version = expected + + # check getter and _properties dict + assert ( + dataset.access_policy_version == expected + ), f"Expected {expected} but got {dataset.access_policy_version}" + assert dataset._properties["accessPolicyVersion"] == expected + + def test_access_policy_version_invalid_input(self): + dataset = self._make_one(self.DS_REF) + # Valid inputs for access_policy_version are currently + # ints 1, 2, 3, and None + + with pytest.raises(ValueError): + invalid_value = "a string" + dataset.access_policy_version = invalid_value + + with pytest.raises(ValueError): + invalid_value = 42.0 + dataset.access_policy_version = invalid_value + class TestDatasetListItem(unittest.TestCase): @staticmethod From 7694b2b5494cd946deacd8f27325206d0a1460e2 Mon Sep 17 00:00:00 2001 From: shollyman Date: Thu, 1 May 2025 14:52:26 -0700 Subject: [PATCH 1920/2016] feat: add WRITE_TRUNCATE_DATA enum (#2166) This PR documents the new WRITE_TRUNCATE_DATA write disposition by adding the enum value. internal issue: b/406848221 --- packages/google-cloud-bigquery/google/cloud/bigquery/enums.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/enums.py b/packages/google-cloud-bigquery/google/cloud/bigquery/enums.py index b32fc82009de..203ea3c7b4a4 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/enums.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/enums.py @@ -338,6 +338,10 @@ class WriteDisposition(object): WRITE_TRUNCATE = "WRITE_TRUNCATE" """If the table already exists, BigQuery overwrites the table data.""" + WRITE_TRUNCATE_DATA = "WRITE_TRUNCATE_DATA" + """For existing tables, truncate data but preserve existing schema + and constraints.""" + WRITE_EMPTY = "WRITE_EMPTY" """If the table already exists and contains data, a 'duplicate' error is returned in the job result.""" From 5462a3485ea8c9829c7d1566b24880af0c7f53c9 Mon Sep 17 00:00:00 2001 From: Mend Renovate Date: Mon, 12 May 2025 18:34:55 +0200 Subject: [PATCH 1921/2016] chore(deps): update all dependencies (#2158) * chore(deps): update all dependencies * Update samples/geography/requirements.txt --------- Co-authored-by: Chalmer Lowe --- .../samples/desktopapp/requirements-test.txt | 2 +- .../samples/desktopapp/requirements.txt | 2 +- .../samples/geography/requirements.txt | 25 ++++++++++--------- .../samples/magics/requirements-test.txt | 2 +- .../samples/magics/requirements.txt | 2 +- .../samples/notebooks/requirements-test.txt | 2 +- .../samples/notebooks/requirements.txt | 6 ++--- .../samples/snippets/requirements-test.txt | 2 +- 8 files changed, 22 insertions(+), 21 deletions(-) diff --git a/packages/google-cloud-bigquery/samples/desktopapp/requirements-test.txt b/packages/google-cloud-bigquery/samples/desktopapp/requirements-test.txt index 183230cf4c03..6abea3b4d344 100644 --- a/packages/google-cloud-bigquery/samples/desktopapp/requirements-test.txt +++ b/packages/google-cloud-bigquery/samples/desktopapp/requirements-test.txt @@ -1,4 +1,4 @@ -google-cloud-testutils==1.6.0 +google-cloud-testutils==1.6.2 pytest==8.3.5 mock==5.2.0 pytest-xdist==3.6.1 diff --git a/packages/google-cloud-bigquery/samples/desktopapp/requirements.txt b/packages/google-cloud-bigquery/samples/desktopapp/requirements.txt index fa349e0d33c1..b98f4ace983e 100644 --- a/packages/google-cloud-bigquery/samples/desktopapp/requirements.txt +++ b/packages/google-cloud-bigquery/samples/desktopapp/requirements.txt @@ -1,2 +1,2 @@ google-cloud-bigquery==3.31.0 -google-auth-oauthlib==1.2.1 +google-auth-oauthlib==1.2.2 diff --git a/packages/google-cloud-bigquery/samples/geography/requirements.txt b/packages/google-cloud-bigquery/samples/geography/requirements.txt index 37bcdf687e45..2b5a71c8c1e0 100644 --- a/packages/google-cloud-bigquery/samples/geography/requirements.txt +++ b/packages/google-cloud-bigquery/samples/geography/requirements.txt @@ -1,8 +1,9 @@ attrs==25.3.0 -certifi==2025.1.31 +certifi==2025.4.26 cffi==1.17.1 -charset-normalizer==3.4.1 -click==8.1.8 +charset-normalizer==3.4.2 +click===8.1.8; python_version == '3.9' +click==8.2.0; python_version >= '3.10' click-plugins==1.1.1 cligj==0.7.2 db-dtypes==1.4.2 @@ -10,21 +11,21 @@ Fiona==1.10.1 geojson==3.2.0 geopandas==1.0.1 google-api-core==2.24.2 -google-auth==2.38.0 +google-auth==2.40.1 google-cloud-bigquery==3.31.0 -google-cloud-bigquery-storage==2.30.0 +google-cloud-bigquery-storage==2.31.0 google-cloud-core==2.4.3 google-crc32c==1.7.1 google-resumable-media==2.7.2 -googleapis-common-protos==1.69.2 +googleapis-common-protos==1.70.0 grpcio==1.71.0 idna==3.10 munch==4.0.0 -mypy-extensions==1.0.0 -packaging==24.2 +mypy-extensions==1.1.0 +packaging==25.0 pandas==2.2.3 proto-plus==1.26.1 -pyarrow==19.0.1 +pyarrow==20.0.0 pyasn1==0.6.1 pyasn1-modules==0.4.2 pycparser==2.22 @@ -33,10 +34,10 @@ python-dateutil==2.9.0.post0 pytz==2025.2 PyYAML==6.0.2 requests==2.32.3 -rsa==4.9 +rsa==4.9.1 Shapely===2.0.7; python_version == '3.9' Shapely==2.1.0; python_version >= '3.10' six==1.17.0 -typing-extensions==4.13.1 +typing-extensions==4.13.2 typing-inspect==0.9.0 -urllib3==2.3.0 +urllib3==2.4.0 diff --git a/packages/google-cloud-bigquery/samples/magics/requirements-test.txt b/packages/google-cloud-bigquery/samples/magics/requirements-test.txt index 183230cf4c03..6abea3b4d344 100644 --- a/packages/google-cloud-bigquery/samples/magics/requirements-test.txt +++ b/packages/google-cloud-bigquery/samples/magics/requirements-test.txt @@ -1,4 +1,4 @@ -google-cloud-testutils==1.6.0 +google-cloud-testutils==1.6.2 pytest==8.3.5 mock==5.2.0 pytest-xdist==3.6.1 diff --git a/packages/google-cloud-bigquery/samples/magics/requirements.txt b/packages/google-cloud-bigquery/samples/magics/requirements.txt index 3ab2159512eb..2c9e158c0c70 100644 --- a/packages/google-cloud-bigquery/samples/magics/requirements.txt +++ b/packages/google-cloud-bigquery/samples/magics/requirements.txt @@ -1,6 +1,6 @@ bigquery_magics==0.9.0 db-dtypes==1.4.2 google.cloud.bigquery==3.31.0 -google-cloud-bigquery-storage==2.30.0 +google-cloud-bigquery-storage==2.31.0 ipython===8.18.1 pandas==2.2.3 diff --git a/packages/google-cloud-bigquery/samples/notebooks/requirements-test.txt b/packages/google-cloud-bigquery/samples/notebooks/requirements-test.txt index 183230cf4c03..6abea3b4d344 100644 --- a/packages/google-cloud-bigquery/samples/notebooks/requirements-test.txt +++ b/packages/google-cloud-bigquery/samples/notebooks/requirements-test.txt @@ -1,4 +1,4 @@ -google-cloud-testutils==1.6.0 +google-cloud-testutils==1.6.2 pytest==8.3.5 mock==5.2.0 pytest-xdist==3.6.1 diff --git a/packages/google-cloud-bigquery/samples/notebooks/requirements.txt b/packages/google-cloud-bigquery/samples/notebooks/requirements.txt index ca5505a2eb12..d1e2f39fb588 100644 --- a/packages/google-cloud-bigquery/samples/notebooks/requirements.txt +++ b/packages/google-cloud-bigquery/samples/notebooks/requirements.txt @@ -1,9 +1,9 @@ bigquery-magics==0.9.0 db-dtypes==1.4.2 google-cloud-bigquery==3.31.0 -google-cloud-bigquery-storage==2.30.0 +google-cloud-bigquery-storage==2.31.0 ipython===8.18.1; python_version == '3.9' -ipython==9.0.2; python_version >= '3.10' +ipython==9.2.0; python_version >= '3.10' matplotlib===3.9.2; python_version == '3.9' -matplotlib==3.10.1; python_version >= '3.10' +matplotlib==3.10.3; python_version >= '3.10' pandas==2.2.3 diff --git a/packages/google-cloud-bigquery/samples/snippets/requirements-test.txt b/packages/google-cloud-bigquery/samples/snippets/requirements-test.txt index 0cf0bb6b4f42..6760e1228d13 100644 --- a/packages/google-cloud-bigquery/samples/snippets/requirements-test.txt +++ b/packages/google-cloud-bigquery/samples/snippets/requirements-test.txt @@ -1,5 +1,5 @@ # samples/snippets should be runnable with no "extras" -google-cloud-testutils==1.6.0 +google-cloud-testutils==1.6.2 pytest==8.3.5 mock==5.2.0 pytest-xdist==3.6.1 From dbfc74ee6fb548b7c346683bb6bf305417ca583e Mon Sep 17 00:00:00 2001 From: "release-please[bot]" <55107282+release-please[bot]@users.noreply.github.com> Date: Mon, 12 May 2025 13:04:01 -0400 Subject: [PATCH 1922/2016] chore(main): release 3.32.0 (#2152) Co-authored-by: release-please[bot] <55107282+release-please[bot]@users.noreply.github.com> --- packages/google-cloud-bigquery/CHANGELOG.md | 19 +++++++++++++++++++ .../google/cloud/bigquery/version.py | 2 +- 2 files changed, 20 insertions(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/CHANGELOG.md b/packages/google-cloud-bigquery/CHANGELOG.md index 4b115464cd31..ff1bd7acc020 100644 --- a/packages/google-cloud-bigquery/CHANGELOG.md +++ b/packages/google-cloud-bigquery/CHANGELOG.md @@ -5,6 +5,25 @@ [1]: https://pypi.org/project/google-cloud-bigquery/#history +## [3.32.0](https://github.com/googleapis/python-bigquery/compare/v3.31.0...v3.32.0) (2025-05-12) + + +### Features + +* Add dataset access policy version attribute ([#2169](https://github.com/googleapis/python-bigquery/issues/2169)) ([b7656b9](https://github.com/googleapis/python-bigquery/commit/b7656b97c1bd6c204d0508b1851d114719686655)) +* Add preview support for incremental results ([#2145](https://github.com/googleapis/python-bigquery/issues/2145)) ([22b80bb](https://github.com/googleapis/python-bigquery/commit/22b80bba9d0bed319fd3102e567906c9b458dd02)) +* Add WRITE_TRUNCATE_DATA enum ([#2166](https://github.com/googleapis/python-bigquery/issues/2166)) ([4692747](https://github.com/googleapis/python-bigquery/commit/46927479085f13fd326e3f2388f60dfdd37f7f69)) +* Adds condition class and assoc. unit tests ([#2159](https://github.com/googleapis/python-bigquery/issues/2159)) ([a69d6b7](https://github.com/googleapis/python-bigquery/commit/a69d6b796d2edb6ba453980c9553bc9b206c5a6e)) +* Support BigLakeConfiguration (managed Iceberg tables) ([#2162](https://github.com/googleapis/python-bigquery/issues/2162)) ([a1c8e9a](https://github.com/googleapis/python-bigquery/commit/a1c8e9aaf60986924868d54a0ab0334e77002a39)) +* Update the AccessEntry class with a new condition attribute and unit tests ([#2163](https://github.com/googleapis/python-bigquery/issues/2163)) ([7301667](https://github.com/googleapis/python-bigquery/commit/7301667272dfbdd04b1a831418a9ad2d037171fb)) + + +### Bug Fixes + +* `query()` now warns when `job_id` is set and the default `job_retry` is ignored ([#2167](https://github.com/googleapis/python-bigquery/issues/2167)) ([ca1798a](https://github.com/googleapis/python-bigquery/commit/ca1798aaee2d5905fe688d3097f8ee5c989da333)) +* Empty record dtypes ([#2147](https://github.com/googleapis/python-bigquery/issues/2147)) ([77d7173](https://github.com/googleapis/python-bigquery/commit/77d71736fcc006d3ab8f8ba17955ad5f06e21876)) +* Table iterator should not use bqstorage when page_size is not None ([#2154](https://github.com/googleapis/python-bigquery/issues/2154)) ([e89a707](https://github.com/googleapis/python-bigquery/commit/e89a707b162182ededbf94cc9a0f7594bc2be475)) + ## [3.31.0](https://github.com/googleapis/python-bigquery/compare/v3.30.0...v3.31.0) (2025-03-20) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/version.py b/packages/google-cloud-bigquery/google/cloud/bigquery/version.py index c0f7a96d69ea..fe13d2477ea6 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/version.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/version.py @@ -12,4 +12,4 @@ # See the License for the specific language governing permissions and # limitations under the License. -__version__ = "3.31.0" +__version__ = "3.32.0" From 9e300e28eb26c2855b79323b5ae9434699baa48e Mon Sep 17 00:00:00 2001 From: Mend Renovate Date: Mon, 12 May 2025 20:15:23 +0200 Subject: [PATCH 1923/2016] chore(deps): update dependency db-dtypes to v1.4.3 (#2178) Co-authored-by: Lingqing Gan --- .../google-cloud-bigquery/samples/geography/requirements.txt | 2 +- packages/google-cloud-bigquery/samples/magics/requirements.txt | 2 +- .../google-cloud-bigquery/samples/notebooks/requirements.txt | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/packages/google-cloud-bigquery/samples/geography/requirements.txt b/packages/google-cloud-bigquery/samples/geography/requirements.txt index 2b5a71c8c1e0..3ff1b294491e 100644 --- a/packages/google-cloud-bigquery/samples/geography/requirements.txt +++ b/packages/google-cloud-bigquery/samples/geography/requirements.txt @@ -6,7 +6,7 @@ click===8.1.8; python_version == '3.9' click==8.2.0; python_version >= '3.10' click-plugins==1.1.1 cligj==0.7.2 -db-dtypes==1.4.2 +db-dtypes==1.4.3 Fiona==1.10.1 geojson==3.2.0 geopandas==1.0.1 diff --git a/packages/google-cloud-bigquery/samples/magics/requirements.txt b/packages/google-cloud-bigquery/samples/magics/requirements.txt index 2c9e158c0c70..b000aa50c494 100644 --- a/packages/google-cloud-bigquery/samples/magics/requirements.txt +++ b/packages/google-cloud-bigquery/samples/magics/requirements.txt @@ -1,5 +1,5 @@ bigquery_magics==0.9.0 -db-dtypes==1.4.2 +db-dtypes==1.4.3 google.cloud.bigquery==3.31.0 google-cloud-bigquery-storage==2.31.0 ipython===8.18.1 diff --git a/packages/google-cloud-bigquery/samples/notebooks/requirements.txt b/packages/google-cloud-bigquery/samples/notebooks/requirements.txt index d1e2f39fb588..d80ffcd09742 100644 --- a/packages/google-cloud-bigquery/samples/notebooks/requirements.txt +++ b/packages/google-cloud-bigquery/samples/notebooks/requirements.txt @@ -1,5 +1,5 @@ bigquery-magics==0.9.0 -db-dtypes==1.4.2 +db-dtypes==1.4.3 google-cloud-bigquery==3.31.0 google-cloud-bigquery-storage==2.31.0 ipython===8.18.1; python_version == '3.9' From d462e2d63dfc2c3a11c972b6ea91e67e4c03f2f3 Mon Sep 17 00:00:00 2001 From: Brian Hulette Date: Mon, 12 May 2025 13:10:11 -0700 Subject: [PATCH 1924/2016] feat: add ability to set autodetect_schema query param in update_table (#2171) * Add ability to set autodetect_schema query_param * fixup! Add ability to set autodetect_schema query_param * fixup! Add ability to set autodetect_schema query_param * fixup! Add ability to set autodetect_schema query_param --------- Co-authored-by: Chalmer Lowe --- .../google/cloud/bigquery/client.py | 11 +++++ .../tests/system/test_client.py | 47 +++++++++++++++++++ .../tests/unit/test_client.py | 12 +++-- 3 files changed, 67 insertions(+), 3 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py index e7cafc47e869..8ad1586f40d3 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py @@ -1389,6 +1389,7 @@ def update_table( self, table: Table, fields: Sequence[str], + autodetect_schema: bool = False, retry: retries.Retry = DEFAULT_RETRY, timeout: TimeoutType = DEFAULT_TIMEOUT, ) -> Table: @@ -1419,6 +1420,10 @@ def update_table( fields (Sequence[str]): The fields of ``table`` to change, spelled as the :class:`~google.cloud.bigquery.table.Table` properties. + autodetect_schema (bool): + Specifies if the schema of the table should be autodetected when + updating the table from the underlying source. Only applicable + for external tables. retry (Optional[google.api_core.retry.Retry]): A description of how to retry the API call. timeout (Optional[float]): @@ -1438,12 +1443,18 @@ def update_table( path = table.path span_attributes = {"path": path, "fields": fields} + if autodetect_schema: + query_params = {"autodetect_schema": True} + else: + query_params = {} + api_response = self._call_api( retry, span_name="BigQuery.updateTable", span_attributes=span_attributes, method="PATCH", path=path, + query_params=query_params, data=partial, headers=headers, timeout=timeout, diff --git a/packages/google-cloud-bigquery/tests/system/test_client.py b/packages/google-cloud-bigquery/tests/system/test_client.py index 9df572b14c10..6584ca03c3d0 100644 --- a/packages/google-cloud-bigquery/tests/system/test_client.py +++ b/packages/google-cloud-bigquery/tests/system/test_client.py @@ -978,6 +978,53 @@ def test_update_table_constraints(self): ) self.assertIsNone(reference_table3.table_constraints, None) + def test_update_table_autodetect_schema(self): + dataset = self.temp_dataset(_make_dataset_id("bq_update_table_test")) + + # Create an external table, restrict schema to one field + TABLE_NAME = "test_table" + set_schema = [bigquery.SchemaField("username", "STRING", mode="NULLABLE")] + table_arg = Table(dataset.table(TABLE_NAME)) + + # Create an external_config and include it in the table arguments + external_config = bigquery.ExternalConfig(bigquery.ExternalSourceFormat.AVRO) + external_config.source_uris = SOURCE_URIS_AVRO + external_config.reference_file_schema_uri = REFERENCE_FILE_SCHEMA_URI_AVRO + external_config.schema = set_schema + table_arg.external_data_configuration = external_config + + self.assertFalse(_table_exists(table_arg)) + + table = helpers.retry_403(Config.CLIENT.create_table)(table_arg) + self.to_delete.insert(0, table) + self.assertTrue(_table_exists(table)) + + self.assertEqual(table.schema, set_schema) + + # Update table with schema autodetection + updated_table_arg = Table(dataset.table(TABLE_NAME)) + + # Update the external_config and include it in the table arguments + updated_external_config = copy.deepcopy(external_config) + updated_external_config.autodetect = True + updated_external_config.schema = None + updated_table_arg.external_data_configuration = updated_external_config + + # PATCH call with autodetect_schema=True to trigger schema inference + updated_table = Config.CLIENT.update_table( + updated_table_arg, ["external_data_configuration"], autodetect_schema=True + ) + + # The updated table should have a schema inferred from the reference + # file, which has all four fields. + expected_schema = [ + bigquery.SchemaField("username", "STRING", mode="NULLABLE"), + bigquery.SchemaField("tweet", "STRING", mode="NULLABLE"), + bigquery.SchemaField("timestamp", "STRING", mode="NULLABLE"), + bigquery.SchemaField("likes", "INTEGER", mode="NULLABLE"), + ] + self.assertEqual(updated_table.schema, expected_schema) + @staticmethod def _fetch_single_page(table, selected_fields=None): iterator = Config.CLIENT.list_rows(table, selected_fields=selected_fields) diff --git a/packages/google-cloud-bigquery/tests/unit/test_client.py b/packages/google-cloud-bigquery/tests/unit/test_client.py index 34ef680dd92a..b8140df6600a 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_client.py +++ b/packages/google-cloud-bigquery/tests/unit/test_client.py @@ -2385,7 +2385,7 @@ def test_update_table(self): "resourceTags": {"123456789012/key": "value"}, } conn.api_request.assert_called_once_with( - method="PATCH", data=sent, path="/" + path, timeout=7.5 + method="PATCH", data=sent, path="/" + path, timeout=7.5, query_params={} ) self.assertEqual(updated_table.description, table.description) self.assertEqual(updated_table.friendly_name, table.friendly_name) @@ -2439,6 +2439,7 @@ def test_update_table_w_custom_property(self): path="/%s" % path, data={"newAlphaProperty": "unreleased property"}, timeout=DEFAULT_TIMEOUT, + query_params={}, ) self.assertEqual( updated_table._properties["newAlphaProperty"], "unreleased property" @@ -2475,6 +2476,7 @@ def test_update_table_only_use_legacy_sql(self): path="/%s" % path, data={"view": {"useLegacySql": True}}, timeout=DEFAULT_TIMEOUT, + query_params={}, ) self.assertEqual(updated_table.view_use_legacy_sql, table.view_use_legacy_sql) @@ -2567,9 +2569,10 @@ def test_update_table_w_query(self): "schema": schema_resource, }, timeout=DEFAULT_TIMEOUT, + query_params={}, ) - def test_update_table_w_schema_None(self): + def test_update_table_w_schema_None_autodetect_schema(self): # Simulate deleting schema: not sure if back-end will actually # allow this operation, but the spec says it is optional. path = "projects/%s/datasets/%s/tables/%s" % ( @@ -2611,7 +2614,9 @@ def test_update_table_w_schema_None(self): with mock.patch( "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" ) as final_attributes: - updated_table = client.update_table(table, ["schema"]) + updated_table = client.update_table( + table, ["schema"], autodetect_schema=True + ) final_attributes.assert_called_once_with( {"path": "/%s" % path, "fields": ["schema"]}, client, None @@ -2623,6 +2628,7 @@ def test_update_table_w_schema_None(self): sent = {"schema": {"fields": None}} self.assertEqual(req[1]["data"], sent) self.assertEqual(req[1]["path"], "/%s" % path) + self.assertEqual(req[1]["query_params"], {"autodetect_schema": True}) self.assertEqual(len(updated_table.schema), 0) def test_update_table_delete_property(self): From 9af1f757d8291a62543500b49e315c1c97ec5228 Mon Sep 17 00:00:00 2001 From: "gcf-owl-bot[bot]" <78513119+gcf-owl-bot[bot]@users.noreply.github.com> Date: Tue, 13 May 2025 07:53:27 -0400 Subject: [PATCH 1925/2016] chore(python): remove docs from templates (#2164) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * chore(python): remove docs from templates Source-Link: https://github.com/googleapis/synthtool/commit/3fca64a4bb1772258f8cc939a9192b17dbbbf335 Post-Processor: gcr.io/cloud-devrel-public-resources/owlbot-python:latest@sha256:3b3a31be60853477bc39ed8d9bac162cac3ba083724cecaad54eb81d4e4dae9c * 🦉 Updates from OwlBot post-processor See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md * 🦉 Updates from OwlBot post-processor See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md * 🦉 Updates from OwlBot post-processor See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md * 🦉 Updates from OwlBot post-processor See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md * 🦉 Updates from OwlBot post-processor See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md * remove replacement in owlbot.py --------- Co-authored-by: Owl Bot Co-authored-by: Anthonios Partheniou Co-authored-by: Chalmer Lowe --- .../google-cloud-bigquery/.github/.OwlBot.lock.yaml | 4 ++-- packages/google-cloud-bigquery/owlbot.py | 10 ---------- 2 files changed, 2 insertions(+), 12 deletions(-) diff --git a/packages/google-cloud-bigquery/.github/.OwlBot.lock.yaml b/packages/google-cloud-bigquery/.github/.OwlBot.lock.yaml index 51b21a62b7b8..cea9eb68f368 100644 --- a/packages/google-cloud-bigquery/.github/.OwlBot.lock.yaml +++ b/packages/google-cloud-bigquery/.github/.OwlBot.lock.yaml @@ -13,5 +13,5 @@ # limitations under the License. docker: image: gcr.io/cloud-devrel-public-resources/owlbot-python:latest - digest: sha256:a7aef70df5f13313ddc027409fc8f3151422ec2a57ac8730fce8fa75c060d5bb -# created: 2025-04-10T17:00:10.042601326Z + digest: sha256:3b3a31be60853477bc39ed8d9bac162cac3ba083724cecaad54eb81d4e4dae9c +# created: 2025-04-16T22:40:03.123475241Z diff --git a/packages/google-cloud-bigquery/owlbot.py b/packages/google-cloud-bigquery/owlbot.py index 8cfa2b097238..60759adbe733 100644 --- a/packages/google-cloud-bigquery/owlbot.py +++ b/packages/google-cloud-bigquery/owlbot.py @@ -109,16 +109,6 @@ python.py_samples() -s.replace( - "docs/conf.py", - r'\{"members": True\}', - '{"members": True, "inherited-members": True}', -) -s.replace( - "docs/conf.py", - r"exclude_patterns = \[", - '\\g<0>\n "google/cloud/bigquery_v2/**", # Legacy proto-based types.', -) s.replace( "samples/**/noxfile.py", 'BLACK_VERSION = "black==22.3.0"', From 95db0a4db130b36c803de387d8b2f60348329646 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tim=20Swe=C3=B1a=20=28Swast=29?= Date: Wed, 14 May 2025 04:36:37 -0500 Subject: [PATCH 1926/2016] fix: ensure AccessEntry equality and repr uses the correct `entity_type` (#2182) * fix: ensure AccessEntry equality and repr uses the correct `entity_type` * add a test for access_entries --- .../google/cloud/bigquery/dataset.py | 4 +- .../tests/unit/test_dataset.py | 44 +++++++++++++++++++ 2 files changed, 46 insertions(+), 2 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/dataset.py b/packages/google-cloud-bigquery/google/cloud/bigquery/dataset.py index d225b710657f..f788275cd01e 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/dataset.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/dataset.py @@ -512,7 +512,7 @@ def __ne__(self, other): return not self == other def __repr__(self): - return f"" + return f"" def _key(self): """A tuple key that uniquely describes this field. @@ -531,7 +531,7 @@ def _key(self): properties["condition"] = condition_key prop_tup = tuple(sorted(properties.items())) - return (self.role, self._entity_type, self.entity_id, prop_tup) + return (self.role, self.entity_type, self.entity_id, prop_tup) def __hash__(self): return hash(self._key()) diff --git a/packages/google-cloud-bigquery/tests/unit/test_dataset.py b/packages/google-cloud-bigquery/tests/unit/test_dataset.py index 9414308276b5..5cce2a9a7f03 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_dataset.py +++ b/packages/google-cloud-bigquery/tests/unit/test_dataset.py @@ -613,6 +613,15 @@ def test_equality_and_hash_without_condition(self): assert hash(entry1) == hash(entry2) assert hash(entry1) != hash(entry3) # Usually true + def test_equality_and_hash_from_api_repr(self): + """Compare equal entries where one was created via from_api_repr.""" + entry1 = AccessEntry("OWNER", "specialGroup", "projectOwners") + entry2 = AccessEntry.from_api_repr( + {"role": "OWNER", "specialGroup": "projectOwners"} + ) + assert entry1 == entry2 + assert hash(entry1) == hash(entry2) + def test_equality_and_hash_with_condition(self, condition_1, condition_2): cond1a = Condition( condition_1.expression, condition_1.title, condition_1.description @@ -746,6 +755,13 @@ def test_dataset_property_with_condition(self, condition_1): assert "dataset" in entry._properties assert "condition" in entry._properties + def test_repr_from_api_repr(self): + """Check that repr() includes the correct entity_type when the object is initialized from a dictionary.""" + api_repr = {"role": "OWNER", "userByEmail": "owner@example.com"} + entry = AccessEntry.from_api_repr(api_repr) + entry_str = repr(entry) + assert entry_str == "" + class TestDatasetReference(unittest.TestCase): @staticmethod @@ -1097,6 +1113,34 @@ def test_ctor_explicit(self): self.assertIsNone(dataset.location) self.assertEqual(dataset.is_case_insensitive, False) + def test_access_entries_getter_from_api_repr(self): + """Check that `in` works correctly when Dataset is made via from_api_repr().""" + from google.cloud.bigquery.dataset import AccessEntry + + dataset = self._get_target_class().from_api_repr( + { + "datasetReference": {"projectId": "my-proj", "datasetId": "my_dset"}, + "access": [ + { + "role": "OWNER", + "userByEmail": "uilma@example.com", + }, + { + "role": "READER", + "groupByEmail": "rhubbles@example.com", + }, + ], + } + ) + assert ( + AccessEntry("OWNER", "userByEmail", "uilma@example.com") + in dataset.access_entries + ) + assert ( + AccessEntry("READER", "groupByEmail", "rhubbles@example.com") + in dataset.access_entries + ) + def test_access_entries_setter_non_list(self): dataset = self._make_one(self.DS_REF) with self.assertRaises(TypeError): From 55772d8bf0cbbc52db461f590431fab0f0fff209 Mon Sep 17 00:00:00 2001 From: Chalmer Lowe Date: Wed, 14 May 2025 13:34:26 -0400 Subject: [PATCH 1927/2016] feat: Add dtype parameters to to_geodataframe functions (#2176) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * feat: Add dtype parameters to to_geodataframe This change adds support for `bool_dtype`, `int_dtype`, `float_dtype`, and `string_dtype` parameters to the `to_geodataframe` method in `RowIterator` and `QueryJob`. These parameters allow you to specify the desired pandas dtypes for boolean, integer, float, and string columns when converting BigQuery results to GeoDataFrames. The changes include: - Updating `RowIterator.to_geodataframe` to accept and pass these dtype parameters to the underlying `to_dataframe` method. - Updating `QueryJob.to_geodataframe` to accept and pass these dtype parameters to the underlying `RowIterator.to_geodataframe` method. - Adding unit tests to verify the correct handling of these parameters. * updates to several tests re geopandas as well as imports * updates to enum import * 🦉 Updates from OwlBot post-processor See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md * Update pyproject.toml Co-authored-by: Tim Sweña (Swast) * Update testing/constraints-3.9.txt Co-authored-by: Tim Sweña (Swast) --------- Co-authored-by: google-labs-jules[bot] <161369871+google-labs-jules[bot]@users.noreply.github.com> Co-authored-by: Owl Bot Co-authored-by: Tim Sweña (Swast) --- packages/google-cloud-bigquery/docs/conf.py | 3 +- .../google/cloud/bigquery/job/query.py | 36 ++++++ .../google/cloud/bigquery/table.py | 44 ++++++++ packages/google-cloud-bigquery/noxfile.py | 4 +- packages/google-cloud-bigquery/pyproject.toml | 8 +- .../testing/constraints-3.9.txt | 2 +- .../tests/unit/job/test_query_pandas.py | 6 + .../tests/unit/test_table.py | 9 +- .../tests/unit/test_table_pandas.py | 103 ++++++++++++++++++ 9 files changed, 205 insertions(+), 10 deletions(-) diff --git a/packages/google-cloud-bigquery/docs/conf.py b/packages/google-cloud-bigquery/docs/conf.py index 826298090d45..df1c18b68e31 100644 --- a/packages/google-cloud-bigquery/docs/conf.py +++ b/packages/google-cloud-bigquery/docs/conf.py @@ -61,7 +61,7 @@ # autodoc/autosummary flags autoclass_content = "both" -autodoc_default_options = {"members": True, "inherited-members": True} +autodoc_default_options = {"members": True} autosummary_generate = True @@ -109,7 +109,6 @@ # List of patterns, relative to source directory, that match files and # directories to ignore when looking for source files. exclude_patterns = [ - "google/cloud/bigquery_v2/**", # Legacy proto-based types. "_build", "**/.nox/**/*", "samples/AUTHORING_GUIDE.md", diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/job/query.py b/packages/google-cloud-bigquery/google/cloud/bigquery/job/query.py index f14039bc0b43..f9b99b7fb8d5 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/job/query.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/job/query.py @@ -2102,6 +2102,10 @@ def to_geodataframe( create_bqstorage_client: bool = True, max_results: Optional[int] = None, geography_column: Optional[str] = None, + bool_dtype: Union[Any, None] = DefaultPandasDTypes.BOOL_DTYPE, + int_dtype: Union[Any, None] = DefaultPandasDTypes.INT_DTYPE, + float_dtype: Union[Any, None] = None, + string_dtype: Union[Any, None] = None, ) -> "geopandas.GeoDataFrame": """Return a GeoPandas GeoDataFrame from a QueryJob @@ -2152,6 +2156,34 @@ def to_geodataframe( identifies which one to use to construct a GeoPandas GeoDataFrame. This option can be ommitted if there's only one GEOGRAPHY column. + bool_dtype (Optional[pandas.Series.dtype, None]): + If set, indicate a pandas ExtensionDtype (e.g. ``pandas.BooleanDtype()``) + to convert BigQuery Boolean type, instead of relying on the default + ``pandas.BooleanDtype()``. If you explicitly set the value to ``None``, + then the data type will be ``numpy.dtype("bool")``. BigQuery Boolean + type can be found at: + https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#boolean_type + int_dtype (Optional[pandas.Series.dtype, None]): + If set, indicate a pandas ExtensionDtype (e.g. ``pandas.Int64Dtype()``) + to convert BigQuery Integer types, instead of relying on the default + ``pandas.Int64Dtype()``. If you explicitly set the value to ``None``, + then the data type will be ``numpy.dtype("int64")``. A list of BigQuery + Integer types can be found at: + https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#integer_types + float_dtype (Optional[pandas.Series.dtype, None]): + If set, indicate a pandas ExtensionDtype (e.g. ``pandas.Float32Dtype()``) + to convert BigQuery Float type, instead of relying on the default + ``numpy.dtype("float64")``. If you explicitly set the value to ``None``, + then the data type will be ``numpy.dtype("float64")``. BigQuery Float + type can be found at: + https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#floating_point_types + string_dtype (Optional[pandas.Series.dtype, None]): + If set, indicate a pandas ExtensionDtype (e.g. ``pandas.StringDtype()``) to + convert BigQuery String type, instead of relying on the default + ``numpy.dtype("object")``. If you explicitly set the value to ``None``, + then the data type will be ``numpy.dtype("object")``. BigQuery String + type can be found at: + https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#string_type Returns: geopandas.GeoDataFrame: @@ -2175,6 +2207,10 @@ def to_geodataframe( progress_bar_type=progress_bar_type, create_bqstorage_client=create_bqstorage_client, geography_column=geography_column, + bool_dtype=bool_dtype, + int_dtype=int_dtype, + float_dtype=float_dtype, + string_dtype=string_dtype, ) def __iter__(self): diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py index 503ca4e71ce8..e084468f6bf9 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py @@ -2727,6 +2727,10 @@ def to_geodataframe( progress_bar_type: Optional[str] = None, create_bqstorage_client: bool = True, geography_column: Optional[str] = None, + bool_dtype: Union[Any, None] = DefaultPandasDTypes.BOOL_DTYPE, + int_dtype: Union[Any, None] = DefaultPandasDTypes.INT_DTYPE, + float_dtype: Union[Any, None] = None, + string_dtype: Union[Any, None] = None, ) -> "geopandas.GeoDataFrame": """Create a GeoPandas GeoDataFrame by loading all pages of a query. @@ -2778,6 +2782,34 @@ def to_geodataframe( identifies which one to use to construct a geopandas GeoDataFrame. This option can be ommitted if there's only one GEOGRAPHY column. + bool_dtype (Optional[pandas.Series.dtype, None]): + If set, indicate a pandas ExtensionDtype (e.g. ``pandas.BooleanDtype()``) + to convert BigQuery Boolean type, instead of relying on the default + ``pandas.BooleanDtype()``. If you explicitly set the value to ``None``, + then the data type will be ``numpy.dtype("bool")``. BigQuery Boolean + type can be found at: + https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#boolean_type + int_dtype (Optional[pandas.Series.dtype, None]): + If set, indicate a pandas ExtensionDtype (e.g. ``pandas.Int64Dtype()``) + to convert BigQuery Integer types, instead of relying on the default + ``pandas.Int64Dtype()``. If you explicitly set the value to ``None``, + then the data type will be ``numpy.dtype("int64")``. A list of BigQuery + Integer types can be found at: + https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#integer_types + float_dtype (Optional[pandas.Series.dtype, None]): + If set, indicate a pandas ExtensionDtype (e.g. ``pandas.Float32Dtype()``) + to convert BigQuery Float type, instead of relying on the default + ``numpy.dtype("float64")``. If you explicitly set the value to ``None``, + then the data type will be ``numpy.dtype("float64")``. BigQuery Float + type can be found at: + https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#floating_point_types + string_dtype (Optional[pandas.Series.dtype, None]): + If set, indicate a pandas ExtensionDtype (e.g. ``pandas.StringDtype()``) to + convert BigQuery String type, instead of relying on the default + ``numpy.dtype("object")``. If you explicitly set the value to ``None``, + then the data type will be ``numpy.dtype("object")``. BigQuery String + type can be found at: + https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#string_type Returns: geopandas.GeoDataFrame: @@ -2829,6 +2861,10 @@ def to_geodataframe( progress_bar_type, create_bqstorage_client, geography_as_object=True, + bool_dtype=bool_dtype, + int_dtype=int_dtype, + float_dtype=float_dtype, + string_dtype=string_dtype, ) return geopandas.GeoDataFrame( @@ -2932,6 +2968,10 @@ def to_geodataframe( progress_bar_type=None, create_bqstorage_client=True, geography_column: Optional[str] = None, + bool_dtype: Union[Any, None] = DefaultPandasDTypes.BOOL_DTYPE, + int_dtype: Union[Any, None] = DefaultPandasDTypes.INT_DTYPE, + float_dtype: Union[Any, None] = None, + string_dtype: Union[Any, None] = None, ) -> "pandas.DataFrame": """Create an empty dataframe. @@ -2941,6 +2981,10 @@ def to_geodataframe( progress_bar_type (Any): Ignored. Added for compatibility with RowIterator. create_bqstorage_client (bool): Ignored. Added for compatibility with RowIterator. geography_column (str): Ignored. Added for compatibility with RowIterator. + bool_dtype (Any): Ignored. Added for compatibility with RowIterator. + int_dtype (Any): Ignored. Added for compatibility with RowIterator. + float_dtype (Any): Ignored. Added for compatibility with RowIterator. + string_dtype (Any): Ignored. Added for compatibility with RowIterator. Returns: pandas.DataFrame: An empty :class:`~pandas.DataFrame`. diff --git a/packages/google-cloud-bigquery/noxfile.py b/packages/google-cloud-bigquery/noxfile.py index c2b4bbb50355..1922a68a5df6 100644 --- a/packages/google-cloud-bigquery/noxfile.py +++ b/packages/google-cloud-bigquery/noxfile.py @@ -109,9 +109,7 @@ def default(session, install_extras=True): # that logic (and the associated tests) we avoid installing the [ipython] extra # which has a downstream effect of then avoiding installing bigquery_magics. if install_extras and session.python == UNIT_TEST_PYTHON_VERSIONS[0]: - install_target = ( - ".[bqstorage,pandas,ipywidgets,geopandas,tqdm,opentelemetry,bigquery_v2]" - ) + install_target = ".[bqstorage,pandas,ipywidgets,geopandas,matplotlib,tqdm,opentelemetry,bigquery_v2]" elif install_extras: # run against all other UNIT_TEST_PYTHON_VERSIONS install_target = ".[all]" else: diff --git a/packages/google-cloud-bigquery/pyproject.toml b/packages/google-cloud-bigquery/pyproject.toml index 38d74cdd05f2..9c91a2fc8513 100644 --- a/packages/google-cloud-bigquery/pyproject.toml +++ b/packages/google-cloud-bigquery/pyproject.toml @@ -82,7 +82,11 @@ pandas = [ ipywidgets = ["ipywidgets >= 7.7.1", "ipykernel >= 6.2.0"] geopandas = ["geopandas >= 0.9.0, < 2.0.0", "Shapely >= 1.8.4, < 3.0.0"] ipython = ["ipython >= 7.23.1", "bigquery-magics >= 0.6.0"] -tqdm = ["tqdm >= 4.7.4, < 5.0.0"] +matplotlib = [ + "matplotlib >= 3.7.1, <= 3.9.2; python_version == '3.9'", + "matplotlib >= 3.10.3; python_version >= '3.10'", +] +tqdm = ["tqdm >= 4.23.4, < 5.0.0"] opentelemetry = [ "opentelemetry-api >= 1.1.0", "opentelemetry-sdk >= 1.1.0", @@ -93,7 +97,7 @@ bigquery_v2 = [ "protobuf >= 3.20.2, < 7.0.0, != 4.21.0, != 4.21.1, != 4.21.2, != 4.21.3, != 4.21.4, != 4.21.5", # For the legacy proto-based types. ] all = [ - "google-cloud-bigquery[bqstorage,pandas,ipywidgets,geopandas,ipython,tqdm,opentelemetry,bigquery_v2]", + "google-cloud-bigquery[bqstorage,pandas,ipywidgets,geopandas,ipython,matplotlib,tqdm,opentelemetry,bigquery_v2]", ] [tool.setuptools.dynamic] diff --git a/packages/google-cloud-bigquery/testing/constraints-3.9.txt b/packages/google-cloud-bigquery/testing/constraints-3.9.txt index cb6c29f3b585..60a155f0dba8 100644 --- a/packages/google-cloud-bigquery/testing/constraints-3.9.txt +++ b/packages/google-cloud-bigquery/testing/constraints-3.9.txt @@ -29,4 +29,4 @@ pyarrow==4.0.0 python-dateutil==2.8.2 requests==2.21.0 Shapely==1.8.4 -tqdm==4.7.4 +matplotlib==3.7.1 diff --git a/packages/google-cloud-bigquery/tests/unit/job/test_query_pandas.py b/packages/google-cloud-bigquery/tests/unit/job/test_query_pandas.py index 2cda59bd1335..d82f0dfe3116 100644 --- a/packages/google-cloud-bigquery/tests/unit/job/test_query_pandas.py +++ b/packages/google-cloud-bigquery/tests/unit/job/test_query_pandas.py @@ -22,6 +22,7 @@ from ..helpers import make_connection from .helpers import _make_client from .helpers import _make_job_resource +from google.cloud.bigquery.enums import DefaultPandasDTypes try: from google.cloud import bigquery_storage @@ -30,6 +31,7 @@ except (ImportError, AttributeError): bigquery_storage = None + try: import shapely except (ImportError, AttributeError): @@ -1019,5 +1021,9 @@ def test_query_job_to_geodataframe_delegation(wait_for_query): progress_bar_type=progress_bar_type, create_bqstorage_client=create_bqstorage_client, geography_column=geography_column, + bool_dtype=DefaultPandasDTypes.BOOL_DTYPE, + int_dtype=DefaultPandasDTypes.INT_DTYPE, + float_dtype=None, + string_dtype=None, ) assert df is row_iterator.to_geodataframe.return_value diff --git a/packages/google-cloud-bigquery/tests/unit/test_table.py b/packages/google-cloud-bigquery/tests/unit/test_table.py index 2530065478f2..8daa4ce43bbb 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_table.py +++ b/packages/google-cloud-bigquery/tests/unit/test_table.py @@ -31,6 +31,7 @@ from google.cloud.bigquery import exceptions from google.cloud.bigquery import external_config from google.cloud.bigquery import schema +from google.cloud.bigquery.enums import DefaultPandasDTypes from google.cloud.bigquery.table import TableReference from google.cloud.bigquery.dataset import DatasetReference @@ -4065,7 +4066,7 @@ def test_to_dataframe_no_tqdm(self): def test_to_dataframe_tqdm_error(self): pytest.importorskip("pandas") - pytest.importorskip("tqdm") + tqdm = pytest.importorskip("tqdm") mock.patch("tqdm.tqdm_gui", new=None) mock.patch("tqdm.notebook.tqdm", new=None) mock.patch("tqdm.tqdm", new=None) @@ -4100,7 +4101,7 @@ def test_to_dataframe_tqdm_error(self): for warning in warned: # pragma: NO COVER self.assertIn( warning.category, - [UserWarning, DeprecationWarning], + [UserWarning, DeprecationWarning, tqdm.TqdmExperimentalWarning], ) def test_to_dataframe_w_empty_results(self): @@ -5639,6 +5640,10 @@ def test_rowiterator_to_geodataframe_delegation(self, to_dataframe): progress_bar_type, create_bqstorage_client, geography_as_object=True, + bool_dtype=DefaultPandasDTypes.BOOL_DTYPE, + int_dtype=DefaultPandasDTypes.INT_DTYPE, + float_dtype=None, + string_dtype=None, ) self.assertIsInstance(df, geopandas.GeoDataFrame) diff --git a/packages/google-cloud-bigquery/tests/unit/test_table_pandas.py b/packages/google-cloud-bigquery/tests/unit/test_table_pandas.py index 94737732b390..43d64d77dde0 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_table_pandas.py +++ b/packages/google-cloud-bigquery/tests/unit/test_table_pandas.py @@ -261,3 +261,106 @@ def test_to_dataframe_with_jobs_query_response(class_under_test): "Tiffani", ] assert list(df["number"]) == [6, 325, 26, 10, 17, 22, 6, 229, 8] + + +@mock.patch("google.cloud.bigquery.table.geopandas") +def test_rowiterator_to_geodataframe_with_default_dtypes( + mock_geopandas, monkeypatch, class_under_test +): + mock_geopandas.GeoDataFrame = mock.Mock(spec=True) + mock_client = mock.create_autospec(bigquery.Client) + mock_client.project = "test-proj" + mock_api_request = mock.Mock() + schema = [ + bigquery.SchemaField("geo_col", "GEOGRAPHY"), + bigquery.SchemaField("bool_col", "BOOLEAN"), + bigquery.SchemaField("int_col", "INTEGER"), + bigquery.SchemaField("float_col", "FLOAT"), + bigquery.SchemaField("string_col", "STRING"), + ] + rows = class_under_test(mock_client, mock_api_request, TEST_PATH, schema) + + mock_df = pandas.DataFrame( + { + "geo_col": ["POINT (1 2)"], + "bool_col": [True], + "int_col": [123], + "float_col": [1.23], + "string_col": ["abc"], + } + ) + rows.to_dataframe = mock.Mock(return_value=mock_df) + + rows.to_geodataframe(geography_column="geo_col") + + rows.to_dataframe.assert_called_once_with( + None, # bqstorage_client + None, # dtypes + None, # progress_bar_type + True, # create_bqstorage_client + geography_as_object=True, + bool_dtype=bigquery.enums.DefaultPandasDTypes.BOOL_DTYPE, + int_dtype=bigquery.enums.DefaultPandasDTypes.INT_DTYPE, + float_dtype=None, + string_dtype=None, + ) + mock_geopandas.GeoDataFrame.assert_called_once_with( + mock_df, crs="EPSG:4326", geometry="geo_col" + ) + + +@mock.patch("google.cloud.bigquery.table.geopandas") +def test_rowiterator_to_geodataframe_with_custom_dtypes( + mock_geopandas, monkeypatch, class_under_test +): + mock_geopandas.GeoDataFrame = mock.Mock(spec=True) + mock_client = mock.create_autospec(bigquery.Client) + mock_client.project = "test-proj" + mock_api_request = mock.Mock() + schema = [ + bigquery.SchemaField("geo_col", "GEOGRAPHY"), + bigquery.SchemaField("bool_col", "BOOLEAN"), + bigquery.SchemaField("int_col", "INTEGER"), + bigquery.SchemaField("float_col", "FLOAT"), + bigquery.SchemaField("string_col", "STRING"), + ] + rows = class_under_test(mock_client, mock_api_request, TEST_PATH, schema) + + mock_df = pandas.DataFrame( + { + "geo_col": ["POINT (3 4)"], + "bool_col": [False], + "int_col": [456], + "float_col": [4.56], + "string_col": ["def"], + } + ) + rows.to_dataframe = mock.Mock(return_value=mock_df) + + custom_bool_dtype = "bool" + custom_int_dtype = "int32" + custom_float_dtype = "float32" + custom_string_dtype = "string" + + rows.to_geodataframe( + geography_column="geo_col", + bool_dtype=custom_bool_dtype, + int_dtype=custom_int_dtype, + float_dtype=custom_float_dtype, + string_dtype=custom_string_dtype, + ) + + rows.to_dataframe.assert_called_once_with( + None, # bqstorage_client + None, # dtypes + None, # progress_bar_type + True, # create_bqstorage_client + geography_as_object=True, + bool_dtype=custom_bool_dtype, + int_dtype=custom_int_dtype, + float_dtype=custom_float_dtype, + string_dtype=custom_string_dtype, + ) + mock_geopandas.GeoDataFrame.assert_called_once_with( + mock_df, crs="EPSG:4326", geometry="geo_col" + ) From 5a73ab4539ca7195e66243cbf4dc9c93047249ca Mon Sep 17 00:00:00 2001 From: Chalmer Lowe Date: Wed, 14 May 2025 13:59:44 -0400 Subject: [PATCH 1928/2016] chore: Fix two types of warnings in unit tests (#2183) * Fix two types of warnings in unit tests This commit addresses two warnings that appear when running unit tests: 1. `PytestRemovedIn9Warning` in `tests/unit/test_opentelemetry_tracing.py`: Removed a `@pytest.mark.skipif` decorator from a fixture. The skip condition is already present on the test methods using the fixture. 2. `FutureWarning` in `tests/unit/test_client.py`: Updated calls to `client.query()` to include `job_retry=None` when `job_id` is also specified. This is to avoid ambiguity as BigQuery cannot retry a failed job with the exact same ID. * Update tests/unit/test_client.py * Update tests/unit/test_client.py * Update linting * adds more examples of functions where job_retry is needed --------- Co-authored-by: google-labs-jules[bot] <161369871+google-labs-jules[bot]@users.noreply.github.com> --- .../tests/unit/test_client.py | 52 ++++++++++++++----- .../tests/unit/test_opentelemetry_tracing.py | 1 - 2 files changed, 39 insertions(+), 14 deletions(-) diff --git a/packages/google-cloud-bigquery/tests/unit/test_client.py b/packages/google-cloud-bigquery/tests/unit/test_client.py index b8140df6600a..a35338698c56 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_client.py +++ b/packages/google-cloud-bigquery/tests/unit/test_client.py @@ -4719,7 +4719,7 @@ def test_query_w_api_method_query_and_job_id_fails(self): client._connection = make_connection({}) with self.assertRaises(TypeError) as exc: - client.query(query, job_id="abcd", api_method="QUERY") + client.query(query, job_id="abcd", api_method="QUERY", job_retry=None) self.assertIn( "`job_id` was provided, but the 'QUERY' `api_method` was requested", exc.exception.args[0], @@ -4774,7 +4774,11 @@ def test_query_w_explicit_project(self): conn = client._connection = make_connection(resource) client.query( - query, job_id=job_id, project="other-project", location=self.LOCATION + query, + job_id=job_id, + project="other-project", + location=self.LOCATION, + job_retry=None, ) # Check that query actually starts the job. @@ -4833,7 +4837,11 @@ def test_query_w_explicit_job_config(self): original_config_copy = copy.deepcopy(job_config) client.query( - query, job_id=job_id, location=self.LOCATION, job_config=job_config + query, + job_id=job_id, + location=self.LOCATION, + job_config=job_config, + job_retry=None, ) # Check that query actually starts the job. @@ -4884,7 +4892,11 @@ def test_query_preserving_explicit_job_config(self): original_config_copy = copy.deepcopy(job_config) client.query( - query, job_id=job_id, location=self.LOCATION, job_config=job_config + query, + job_id=job_id, + location=self.LOCATION, + job_config=job_config, + job_retry=None, ) # Check that query actually starts the job. @@ -4940,7 +4952,13 @@ def test_query_preserving_explicit_default_job_config(self): ) conn = client._connection = make_connection(resource) - client.query(query, job_id=job_id, location=self.LOCATION, job_config=None) + client.query( + query, + job_id=job_id, + location=self.LOCATION, + job_config=None, + job_retry=None, + ) # Check that query actually starts the job. conn.api_request.assert_called_once_with( @@ -4978,7 +4996,11 @@ def test_query_w_invalid_job_config(self): with self.assertRaises(TypeError) as exc: client.query( - query, job_id=job_id, location=self.LOCATION, job_config=job_config + query, + job_id=job_id, + location=self.LOCATION, + job_config=job_config, + job_retry=None, ) self.assertIn("Expected an instance of QueryJobConfig", exc.exception.args[0]) @@ -5027,7 +5049,11 @@ def test_query_w_explicit_job_config_override(self): job_config.default_dataset = None client.query( - query, job_id=job_id, location=self.LOCATION, job_config=job_config + query, + job_id=job_id, + location=self.LOCATION, + job_config=job_config, + job_retry=None, ) # Check that query actually starts the job. @@ -5072,7 +5098,7 @@ def test_query_w_client_default_config_no_incoming(self): ) conn = client._connection = make_connection(resource) - client.query(query, job_id=job_id, location=self.LOCATION) + client.query(query, job_id=job_id, location=self.LOCATION, job_retry=None) # Check that query actually starts the job. conn.api_request.assert_called_once_with( @@ -5114,7 +5140,7 @@ def test_query_w_client_location(self): ) conn = client._connection = make_connection(resource) - client.query(query, job_id=job_id, project="other-project") + client.query(query, job_id=job_id, project="other-project", job_retry=None) # Check that query actually starts the job. conn.api_request.assert_called_once_with( @@ -5178,7 +5204,7 @@ def test_query_w_udf_resources(self): config.udf_resources = udf_resources config.use_legacy_sql = True - job = client.query(QUERY, job_config=config, job_id=JOB) + job = client.query(QUERY, job_config=config, job_id=JOB, job_retry=None) self.assertIsInstance(job, QueryJob) self.assertIs(job._client, client) @@ -5234,7 +5260,7 @@ def test_query_w_query_parameters(self): config = QueryJobConfig() config.query_parameters = query_parameters - job = client.query(QUERY, job_config=config, job_id=JOB) + job = client.query(QUERY, job_config=config, job_id=JOB, job_retry=None) self.assertIsInstance(job, QueryJob) self.assertIs(job._client, client) @@ -5277,7 +5303,7 @@ def test_query_job_rpc_fail_w_random_error(self): ) with job_begin_patcher: with pytest.raises(Unknown, match="Not sure what went wrong."): - client.query("SELECT 1;", job_id="123") + client.query("SELECT 1;", job_id="123", job_retry=None) def test_query_job_rpc_fail_w_conflict_job_id_given(self): from google.api_core.exceptions import Conflict @@ -5293,7 +5319,7 @@ def test_query_job_rpc_fail_w_conflict_job_id_given(self): ) with job_begin_patcher: with pytest.raises(Conflict, match="Job already exists."): - client.query("SELECT 1;", job_id="123") + client.query("SELECT 1;", job_id="123", job_retry=None) def test_query_job_rpc_fail_w_conflict_random_id_job_fetch_fails(self): from google.api_core.exceptions import Conflict diff --git a/packages/google-cloud-bigquery/tests/unit/test_opentelemetry_tracing.py b/packages/google-cloud-bigquery/tests/unit/test_opentelemetry_tracing.py index 546cc02bd75e..57132a1b99b5 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_opentelemetry_tracing.py +++ b/packages/google-cloud-bigquery/tests/unit/test_opentelemetry_tracing.py @@ -42,7 +42,6 @@ TEST_SPAN_ATTRIBUTES = {"foo": "baz"} -@pytest.mark.skipif(opentelemetry is None, reason="Require `opentelemetry`") @pytest.fixture def setup(): importlib.reload(opentelemetry_tracing) From 7063c09646cb99f5f172d73fa0a4bc9c8e28240a Mon Sep 17 00:00:00 2001 From: Mend Renovate Date: Wed, 14 May 2025 20:42:29 +0200 Subject: [PATCH 1929/2016] chore(deps): update all dependencies to v3.32.0 (#2179) Co-authored-by: Chalmer Lowe --- .../google-cloud-bigquery/samples/desktopapp/requirements.txt | 2 +- .../google-cloud-bigquery/samples/geography/requirements.txt | 2 +- packages/google-cloud-bigquery/samples/magics/requirements.txt | 2 +- .../google-cloud-bigquery/samples/notebooks/requirements.txt | 2 +- .../google-cloud-bigquery/samples/snippets/requirements.txt | 2 +- 5 files changed, 5 insertions(+), 5 deletions(-) diff --git a/packages/google-cloud-bigquery/samples/desktopapp/requirements.txt b/packages/google-cloud-bigquery/samples/desktopapp/requirements.txt index b98f4ace983e..743d0fe3597d 100644 --- a/packages/google-cloud-bigquery/samples/desktopapp/requirements.txt +++ b/packages/google-cloud-bigquery/samples/desktopapp/requirements.txt @@ -1,2 +1,2 @@ -google-cloud-bigquery==3.31.0 +google-cloud-bigquery==3.32.0 google-auth-oauthlib==1.2.2 diff --git a/packages/google-cloud-bigquery/samples/geography/requirements.txt b/packages/google-cloud-bigquery/samples/geography/requirements.txt index 3ff1b294491e..434a594cb5f3 100644 --- a/packages/google-cloud-bigquery/samples/geography/requirements.txt +++ b/packages/google-cloud-bigquery/samples/geography/requirements.txt @@ -12,7 +12,7 @@ geojson==3.2.0 geopandas==1.0.1 google-api-core==2.24.2 google-auth==2.40.1 -google-cloud-bigquery==3.31.0 +google-cloud-bigquery==3.32.0 google-cloud-bigquery-storage==2.31.0 google-cloud-core==2.4.3 google-crc32c==1.7.1 diff --git a/packages/google-cloud-bigquery/samples/magics/requirements.txt b/packages/google-cloud-bigquery/samples/magics/requirements.txt index b000aa50c494..bb60f2a67e2b 100644 --- a/packages/google-cloud-bigquery/samples/magics/requirements.txt +++ b/packages/google-cloud-bigquery/samples/magics/requirements.txt @@ -1,6 +1,6 @@ bigquery_magics==0.9.0 db-dtypes==1.4.3 -google.cloud.bigquery==3.31.0 +google.cloud.bigquery==3.32.0 google-cloud-bigquery-storage==2.31.0 ipython===8.18.1 pandas==2.2.3 diff --git a/packages/google-cloud-bigquery/samples/notebooks/requirements.txt b/packages/google-cloud-bigquery/samples/notebooks/requirements.txt index d80ffcd09742..17f43bf78ec9 100644 --- a/packages/google-cloud-bigquery/samples/notebooks/requirements.txt +++ b/packages/google-cloud-bigquery/samples/notebooks/requirements.txt @@ -1,6 +1,6 @@ bigquery-magics==0.9.0 db-dtypes==1.4.3 -google-cloud-bigquery==3.31.0 +google-cloud-bigquery==3.32.0 google-cloud-bigquery-storage==2.31.0 ipython===8.18.1; python_version == '3.9' ipython==9.2.0; python_version >= '3.10' diff --git a/packages/google-cloud-bigquery/samples/snippets/requirements.txt b/packages/google-cloud-bigquery/samples/snippets/requirements.txt index 4b88c6b702fb..c31815d69171 100644 --- a/packages/google-cloud-bigquery/samples/snippets/requirements.txt +++ b/packages/google-cloud-bigquery/samples/snippets/requirements.txt @@ -1,2 +1,2 @@ # samples/snippets should be runnable with no "extras" -google-cloud-bigquery==3.31.0 +google-cloud-bigquery==3.32.0 From 2ecfe1a6825fd66cd1e41b2ac447e8fb36b8ee4e Mon Sep 17 00:00:00 2001 From: Chalmer Lowe Date: Thu, 15 May 2025 05:11:00 -0400 Subject: [PATCH 1930/2016] refactor: Fix DeprecationWarnings for datetime methods in job tests (#2185) * Fix DeprecationWarnings for datetime methods in job tests Replaced calls to deprecated `datetime.datetime.utcnow()` with `datetime.datetime.now(datetime.UTC)` in `tests/unit/job/test_base.py`. Replaced calls to deprecated `datetime.datetime.utcfromtimestamp()` with `datetime.datetime.fromtimestamp(timestamp, datetime.UTC)` in `tests/unit/job/helpers.py`. These changes address the specific warnings identified in the issue for these two files. * Update tests/unit/job/test_base.py * Update tests/unit/job/test_base.py * Updates datetime code related to UTC --------- Co-authored-by: google-labs-jules[bot] <161369871+google-labs-jules[bot]@users.noreply.github.com> --- .../tests/unit/job/helpers.py | 4 +++- .../tests/unit/job/test_base.py | 2 +- .../tests/unit/test__pandas_helpers.py | 2 +- .../tests/unit/test_client.py | 17 ++++++++--------- .../tests/unit/test_dataset.py | 4 +++- .../tests/unit/test_query.py | 17 +++++++++-------- .../tests/unit/test_table.py | 8 ++++++-- 7 files changed, 31 insertions(+), 23 deletions(-) diff --git a/packages/google-cloud-bigquery/tests/unit/job/helpers.py b/packages/google-cloud-bigquery/tests/unit/job/helpers.py index 3642c7229647..24ba2fa997c0 100644 --- a/packages/google-cloud-bigquery/tests/unit/job/helpers.py +++ b/packages/google-cloud-bigquery/tests/unit/job/helpers.py @@ -106,7 +106,9 @@ def _setUpConstants(self): from google.cloud._helpers import UTC self.WHEN_TS = 1437767599.006 - self.WHEN = datetime.datetime.utcfromtimestamp(self.WHEN_TS).replace(tzinfo=UTC) + self.WHEN = datetime.datetime.fromtimestamp(self.WHEN_TS, UTC).replace( + tzinfo=UTC + ) self.ETAG = "ETAG" self.FULL_JOB_ID = "%s:%s" % (self.PROJECT, self.JOB_ID) self.RESOURCE_URL = "{}/bigquery/v2/projects/{}/jobs/{}".format( diff --git a/packages/google-cloud-bigquery/tests/unit/job/test_base.py b/packages/google-cloud-bigquery/tests/unit/job/test_base.py index 2d2f0c13c1ec..22a0fa45000b 100644 --- a/packages/google-cloud-bigquery/tests/unit/job/test_base.py +++ b/packages/google-cloud-bigquery/tests/unit/job/test_base.py @@ -331,7 +331,7 @@ def _datetime_and_millis(): import datetime from google.cloud._helpers import _millis - now = datetime.datetime.utcnow().replace( + now = datetime.datetime.now(datetime.timezone.utc).replace( microsecond=123000, tzinfo=datetime.timezone.utc, # stats timestamps have ms precision ) diff --git a/packages/google-cloud-bigquery/tests/unit/test__pandas_helpers.py b/packages/google-cloud-bigquery/tests/unit/test__pandas_helpers.py index 48c085c1dad8..d6ea5df7ed88 100644 --- a/packages/google-cloud-bigquery/tests/unit/test__pandas_helpers.py +++ b/packages/google-cloud-bigquery/tests/unit/test__pandas_helpers.py @@ -886,7 +886,7 @@ def test_list_columns_and_indexes_with_named_index_same_as_column_name( @pytest.mark.skipif(pandas is None, reason="Requires `pandas`") def test_dataframe_to_json_generator(module_under_test): - utcnow = datetime.datetime.utcnow() + utcnow = datetime.datetime.now(datetime.timezone.utc) dataframe = pandas.DataFrame( { "a_series": [1, 2, 3, 4], diff --git a/packages/google-cloud-bigquery/tests/unit/test_client.py b/packages/google-cloud-bigquery/tests/unit/test_client.py index a35338698c56..4680683217ad 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_client.py +++ b/packages/google-cloud-bigquery/tests/unit/test_client.py @@ -5853,7 +5853,7 @@ def test_insert_rows_w_schema(self): from google.cloud.bigquery.schema import SchemaField WHEN_TS = 1437767599.006 - WHEN = datetime.datetime.utcfromtimestamp(WHEN_TS).replace(tzinfo=UTC) + WHEN = datetime.datetime.fromtimestamp(WHEN_TS, UTC).replace(tzinfo=UTC) PATH = "projects/%s/datasets/%s/tables/%s/insertAll" % ( self.PROJECT, self.DS_ID, @@ -5914,7 +5914,7 @@ def test_insert_rows_w_list_of_dictionaries(self): from google.cloud.bigquery.table import Table WHEN_TS = 1437767599.006 - WHEN = datetime.datetime.utcfromtimestamp(WHEN_TS).replace(tzinfo=UTC) + WHEN = datetime.datetime.fromtimestamp(WHEN_TS, UTC).replace(tzinfo=UTC) PATH = "projects/%s/datasets/%s/tables/%s/insertAll" % ( self.PROJECT, self.DS_ID, @@ -6097,6 +6097,7 @@ def _row_data(row): ) def test_insert_rows_w_repeated_fields(self): + from google.cloud._helpers import UTC from google.cloud.bigquery.schema import SchemaField from google.cloud.bigquery.table import Table @@ -6126,12 +6127,8 @@ def test_insert_rows_w_repeated_fields(self): ( 12, [ - datetime.datetime( - 2018, 12, 1, 12, 0, 0, tzinfo=datetime.timezone.utc - ), - datetime.datetime( - 2018, 12, 1, 13, 0, 0, tzinfo=datetime.timezone.utc - ), + datetime.datetime(2018, 12, 1, 12, 0, 0, tzinfo=UTC), + datetime.datetime(2018, 12, 1, 13, 0, 0, tzinfo=UTC), ], [1.25, 2.5], ), @@ -6966,7 +6963,9 @@ def test_list_rows(self): ) WHEN_TS = 1437767599006000 - WHEN = datetime.datetime.utcfromtimestamp(WHEN_TS / 1e6).replace(tzinfo=UTC) + WHEN = datetime.datetime.fromtimestamp( + WHEN_TS / 1e6, datetime.timezone.utc + ).replace(tzinfo=UTC) WHEN_1 = WHEN + datetime.timedelta(microseconds=1) WHEN_2 = WHEN + datetime.timedelta(microseconds=2) ROWS = 1234 diff --git a/packages/google-cloud-bigquery/tests/unit/test_dataset.py b/packages/google-cloud-bigquery/tests/unit/test_dataset.py index 5cce2a9a7f03..3fd2579af955 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_dataset.py +++ b/packages/google-cloud-bigquery/tests/unit/test_dataset.py @@ -945,7 +945,9 @@ def _setUpConstants(self): from google.cloud._helpers import UTC self.WHEN_TS = 1437767599.006 - self.WHEN = datetime.datetime.utcfromtimestamp(self.WHEN_TS).replace(tzinfo=UTC) + self.WHEN = datetime.datetime.fromtimestamp(self.WHEN_TS, UTC).replace( + tzinfo=UTC + ) self.ETAG = "ETAG" self.DS_FULL_ID = "%s:%s" % (self.PROJECT, self.DS_ID) self.RESOURCE_URL = "http://example.com/path/to/resource" diff --git a/packages/google-cloud-bigquery/tests/unit/test_query.py b/packages/google-cloud-bigquery/tests/unit/test_query.py index 40ef080f7543..0d967bdb89ac 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_query.py +++ b/packages/google-cloud-bigquery/tests/unit/test_query.py @@ -637,9 +637,9 @@ def test_to_api_repr_w_timestamp_datetime(self): self.assertEqual(param.to_api_repr(), EXPECTED) def test_to_api_repr_w_timestamp_micros(self): - from google.cloud._helpers import _microseconds_from_datetime + from google.cloud._helpers import _microseconds_from_datetime, UTC - now = datetime.datetime.utcnow() + now = datetime.datetime.now(UTC) seconds = _microseconds_from_datetime(now) / 1.0e6 EXPECTED = { "parameterType": {"type": "TIMESTAMP"}, @@ -650,9 +650,9 @@ def test_to_api_repr_w_timestamp_micros(self): self.assertEqual(param.to_api_repr(), EXPECTED) def test_to_api_repr_w_datetime_datetime(self): - from google.cloud._helpers import _datetime_to_rfc3339 + from google.cloud._helpers import _datetime_to_rfc3339, UTC - now = datetime.datetime.utcnow() + now = datetime.datetime.now(UTC) EXPECTED = { "parameterType": {"type": "DATETIME"}, "parameterValue": { @@ -664,9 +664,9 @@ def test_to_api_repr_w_datetime_datetime(self): self.assertEqual(param.to_api_repr(), EXPECTED) def test_to_api_repr_w_datetime_string(self): - from google.cloud._helpers import _datetime_to_rfc3339 + from google.cloud._helpers import _datetime_to_rfc3339, UTC - now = datetime.datetime.utcnow() + now = datetime.datetime.now(UTC) now_str = _datetime_to_rfc3339(now) EXPECTED = { "parameterType": {"type": "DATETIME"}, @@ -1047,9 +1047,10 @@ def test_to_api_repr_w_datetime_str(self): self.assertEqual(param.to_api_repr(), EXPECTED) def test_to_api_repr_w_datetime_datetime(self): + from google.cloud._helpers import UTC # type: ignore from google.cloud.bigquery._helpers import _RFC3339_MICROS_NO_ZULU - now = datetime.datetime.utcnow() + now = datetime.datetime.now(UTC) now_str = now.strftime(_RFC3339_MICROS_NO_ZULU) EXPECTED = { "parameterType": { @@ -1089,7 +1090,7 @@ def test_to_api_repr_w_timestamp_str(self): def test_to_api_repr_w_timestamp_timestamp(self): from google.cloud._helpers import UTC # type: ignore - now = datetime.datetime.utcnow() + now = datetime.datetime.now(UTC) now = now.astimezone(UTC) now_str = str(now) EXPECTED = { diff --git a/packages/google-cloud-bigquery/tests/unit/test_table.py b/packages/google-cloud-bigquery/tests/unit/test_table.py index 8daa4ce43bbb..92fa0e2ec351 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_table.py +++ b/packages/google-cloud-bigquery/tests/unit/test_table.py @@ -395,7 +395,9 @@ def _setUpConstants(self): from google.cloud._helpers import UTC self.WHEN_TS = 1437767599.006 - self.WHEN = datetime.datetime.utcfromtimestamp(self.WHEN_TS).replace(tzinfo=UTC) + self.WHEN = datetime.datetime.fromtimestamp(self.WHEN_TS, UTC).replace( + tzinfo=UTC + ) self.ETAG = "ETAG" self.TABLE_FULL_ID = "%s:%s.%s" % (self.PROJECT, self.DS_ID, self.TABLE_NAME) self.RESOURCE_URL = "http://example.com/path/to/resource" @@ -1952,7 +1954,9 @@ def _setUpConstants(self): from google.cloud._helpers import UTC self.WHEN_TS = 1437767599.125 - self.WHEN = datetime.datetime.utcfromtimestamp(self.WHEN_TS).replace(tzinfo=UTC) + self.WHEN = datetime.datetime.fromtimestamp(self.WHEN_TS, UTC).replace( + tzinfo=UTC + ) self.EXP_TIME = datetime.datetime(2015, 8, 1, 23, 59, 59, tzinfo=UTC) def test_ctor(self): From fe0dd77bd7d804f3e2856f085df62d33e672eed7 Mon Sep 17 00:00:00 2001 From: Chelsea Lin Date: Mon, 19 May 2025 10:52:16 -0700 Subject: [PATCH 1931/2016] fix: ensure SchemaField.field_dtype returns a string (#2188) * fix: ensure SchemaField.field_dtype returns a string * fix cover tests * fix unit 3.9 --- .../google/cloud/bigquery/_pandas_helpers.py | 154 ++++++++---------- .../google/cloud/bigquery/schema.py | 28 ++-- .../tests/unit/test__pandas_helpers.py | 113 ++++--------- .../tests/unit/test_schema.py | 5 - 4 files changed, 113 insertions(+), 187 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/_pandas_helpers.py b/packages/google-cloud-bigquery/google/cloud/bigquery/_pandas_helpers.py index 457eb9078938..6691e7ef6817 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/_pandas_helpers.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/_pandas_helpers.py @@ -508,31 +508,37 @@ def dataframe_to_bq_schema(dataframe, bq_schema): bq_schema_unused = set() bq_schema_out = [] - unknown_type_fields = [] - + unknown_type_columns = [] + dataframe_reset_index = dataframe.reset_index() for column, dtype in list_columns_and_indexes(dataframe): - # Use provided type from schema, if present. + # Step 1: use provided type from schema, if present. bq_field = bq_schema_index.get(column) if bq_field: bq_schema_out.append(bq_field) bq_schema_unused.discard(bq_field.name) continue - # Otherwise, try to automatically determine the type based on the + # Step 2: try to automatically determine the type based on the # pandas dtype. bq_type = _PANDAS_DTYPE_TO_BQ.get(dtype.name) if bq_type is None: - sample_data = _first_valid(dataframe.reset_index()[column]) + sample_data = _first_valid(dataframe_reset_index[column]) if ( isinstance(sample_data, _BaseGeometry) and sample_data is not None # Paranoia ): bq_type = "GEOGRAPHY" - bq_field = schema.SchemaField(column, bq_type) - bq_schema_out.append(bq_field) + if bq_type is not None: + bq_schema_out.append(schema.SchemaField(column, bq_type)) + continue + + # Step 3: try with pyarrow if available + bq_field = _get_schema_by_pyarrow(column, dataframe_reset_index[column]) + if bq_field is not None: + bq_schema_out.append(bq_field) + continue - if bq_field.field_type is None: - unknown_type_fields.append(bq_field) + unknown_type_columns.append(column) # Catch any schema mismatch. The developer explicitly asked to serialize a # column, but it was not found. @@ -543,98 +549,70 @@ def dataframe_to_bq_schema(dataframe, bq_schema): ) ) - # If schema detection was not successful for all columns, also try with - # pyarrow, if available. - if unknown_type_fields: - if not pyarrow: - msg = "Could not determine the type of columns: {}".format( - ", ".join(field.name for field in unknown_type_fields) - ) - warnings.warn(msg) - return None # We cannot detect the schema in full. - - # The augment_schema() helper itself will also issue unknown type - # warnings if detection still fails for any of the fields. - bq_schema_out = augment_schema(dataframe, bq_schema_out) + if unknown_type_columns != []: + msg = "Could not determine the type of columns: {}".format( + ", ".join(unknown_type_columns) + ) + warnings.warn(msg) + return None # We cannot detect the schema in full. - return tuple(bq_schema_out) if bq_schema_out else None + return tuple(bq_schema_out) -def augment_schema(dataframe, current_bq_schema): - """Try to deduce the unknown field types and return an improved schema. +def _get_schema_by_pyarrow(name, series): + """Attempt to detect the type of the given series by leveraging PyArrow's + type detection capabilities. - This function requires ``pyarrow`` to run. If all the missing types still - cannot be detected, ``None`` is returned. If all types are already known, - a shallow copy of the given schema is returned. + This function requires the ``pyarrow`` library to be installed and + available. If the series type cannot be determined or ``pyarrow`` is not + available, ``None`` is returned. Args: - dataframe (pandas.DataFrame): - DataFrame for which some of the field types are still unknown. - current_bq_schema (Sequence[google.cloud.bigquery.schema.SchemaField]): - A BigQuery schema for ``dataframe``. The types of some or all of - the fields may be ``None``. + name (str): + the column name of the SchemaField. + series (pandas.Series): + The Series data for which to detect the data type. Returns: - Optional[Sequence[google.cloud.bigquery.schema.SchemaField]] + Optional[google.cloud.bigquery.schema.SchemaField]: + A tuple containing the BigQuery-compatible type string (e.g., + "STRING", "INTEGER", "TIMESTAMP", "DATETIME", "NUMERIC", "BIGNUMERIC") + and the mode string ("NULLABLE", "REPEATED"). + Returns ``None`` if the type cannot be determined or ``pyarrow`` + is not imported. """ - # pytype: disable=attribute-error - augmented_schema = [] - unknown_type_fields = [] - for field in current_bq_schema: - if field.field_type is not None: - augmented_schema.append(field) - continue - - arrow_table = pyarrow.array(dataframe.reset_index()[field.name]) - - if pyarrow.types.is_list(arrow_table.type): - # `pyarrow.ListType` - detected_mode = "REPEATED" - detected_type = _pyarrow_helpers.arrow_scalar_ids_to_bq( - arrow_table.values.type.id - ) - - # For timezone-naive datetimes, pyarrow assumes the UTC timezone and adds - # it to such datetimes, causing them to be recognized as TIMESTAMP type. - # We thus additionally check the actual data to see if we need to overrule - # that and choose DATETIME instead. - # Note that this should only be needed for datetime values inside a list, - # since scalar datetime values have a proper Pandas dtype that allows - # distinguishing between timezone-naive and timezone-aware values before - # even requiring the additional schema augment logic in this method. - if detected_type == "TIMESTAMP": - valid_item = _first_array_valid(dataframe[field.name]) - if isinstance(valid_item, datetime) and valid_item.tzinfo is None: - detected_type = "DATETIME" - else: - detected_mode = field.mode - detected_type = _pyarrow_helpers.arrow_scalar_ids_to_bq(arrow_table.type.id) - if detected_type == "NUMERIC" and arrow_table.type.scale > 9: - detected_type = "BIGNUMERIC" - if detected_type is None: - unknown_type_fields.append(field) - continue + if not pyarrow: + return None - new_field = schema.SchemaField( - name=field.name, - field_type=detected_type, - mode=detected_mode, - description=field.description, - fields=field.fields, - ) - augmented_schema.append(new_field) + arrow_table = pyarrow.array(series) + if pyarrow.types.is_list(arrow_table.type): + # `pyarrow.ListType` + mode = "REPEATED" + type = _pyarrow_helpers.arrow_scalar_ids_to_bq(arrow_table.values.type.id) + + # For timezone-naive datetimes, pyarrow assumes the UTC timezone and adds + # it to such datetimes, causing them to be recognized as TIMESTAMP type. + # We thus additionally check the actual data to see if we need to overrule + # that and choose DATETIME instead. + # Note that this should only be needed for datetime values inside a list, + # since scalar datetime values have a proper Pandas dtype that allows + # distinguishing between timezone-naive and timezone-aware values before + # even requiring the additional schema augment logic in this method. + if type == "TIMESTAMP": + valid_item = _first_array_valid(series) + if isinstance(valid_item, datetime) and valid_item.tzinfo is None: + type = "DATETIME" + else: + mode = "NULLABLE" # default mode + type = _pyarrow_helpers.arrow_scalar_ids_to_bq(arrow_table.type.id) + if type == "NUMERIC" and arrow_table.type.scale > 9: + type = "BIGNUMERIC" - if unknown_type_fields: - warnings.warn( - "Pyarrow could not determine the type of columns: {}.".format( - ", ".join(field.name for field in unknown_type_fields) - ) - ) + if type is not None: + return schema.SchemaField(name, type, mode) + else: return None - return augmented_schema - # pytype: enable=attribute-error - def dataframe_to_arrow(dataframe, bq_schema): """Convert pandas dataframe to Arrow table, using BigQuery schema. diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/schema.py b/packages/google-cloud-bigquery/google/cloud/bigquery/schema.py index 749b0a00e471..1f1aab7a4b0b 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/schema.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/schema.py @@ -284,15 +284,13 @@ def name(self): return self._properties.get("name", "") @property - def field_type(self): + def field_type(self) -> str: """str: The type of the field. See: https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#TableFieldSchema.FIELDS.type """ type_ = self._properties.get("type") - if type_ is None: # Shouldn't happen, but some unit tests do this. - return None return cast(str, type_).upper() @property @@ -397,20 +395,16 @@ def _key(self): Returns: Tuple: The contents of this :class:`~google.cloud.bigquery.schema.SchemaField`. """ - field_type = self.field_type.upper() if self.field_type is not None else None - - # Type can temporarily be set to None if the code needs a SchemaField instance, - # but has not determined the exact type of the field yet. - if field_type is not None: - if field_type == "STRING" or field_type == "BYTES": - if self.max_length is not None: - field_type = f"{field_type}({self.max_length})" - elif field_type.endswith("NUMERIC"): - if self.precision is not None: - if self.scale is not None: - field_type = f"{field_type}({self.precision}, {self.scale})" - else: - field_type = f"{field_type}({self.precision})" + field_type = self.field_type + if field_type == "STRING" or field_type == "BYTES": + if self.max_length is not None: + field_type = f"{field_type}({self.max_length})" + elif field_type.endswith("NUMERIC"): + if self.precision is not None: + if self.scale is not None: + field_type = f"{field_type}({self.precision}, {self.scale})" + else: + field_type = f"{field_type}({self.precision})" policy_tags = ( None if self.policy_tags is None else tuple(sorted(self.policy_tags.names)) diff --git a/packages/google-cloud-bigquery/tests/unit/test__pandas_helpers.py b/packages/google-cloud-bigquery/tests/unit/test__pandas_helpers.py index d6ea5df7ed88..d87c65581d6f 100644 --- a/packages/google-cloud-bigquery/tests/unit/test__pandas_helpers.py +++ b/packages/google-cloud-bigquery/tests/unit/test__pandas_helpers.py @@ -1568,31 +1568,7 @@ def test_augment_schema_type_detection_succeeds(module_under_test): # set to "datetime64[ns]", and pyarrow converts that to pyarrow.TimestampArray. # We thus cannot expect to get a DATETIME date when converting back to the # BigQuery type. - - current_schema = ( - schema.SchemaField("bool_field", field_type=None, mode="NULLABLE"), - schema.SchemaField("int_field", field_type=None, mode="NULLABLE"), - schema.SchemaField("float_field", field_type=None, mode="NULLABLE"), - schema.SchemaField("time_field", field_type=None, mode="NULLABLE"), - schema.SchemaField("timestamp_field", field_type=None, mode="NULLABLE"), - schema.SchemaField("date_field", field_type=None, mode="NULLABLE"), - schema.SchemaField("bytes_field", field_type=None, mode="NULLABLE"), - schema.SchemaField("string_field", field_type=None, mode="NULLABLE"), - schema.SchemaField("numeric_field", field_type=None, mode="NULLABLE"), - schema.SchemaField("bignumeric_field", field_type=None, mode="NULLABLE"), - ) - - with warnings.catch_warnings(record=True) as warned: - augmented_schema = module_under_test.augment_schema(dataframe, current_schema) - - # there should be no relevant warnings - unwanted_warnings = [ - warning for warning in warned if "Pyarrow could not" in str(warning) - ] - assert not unwanted_warnings - - # the augmented schema must match the expected - expected_schema = ( + expected_schemas = ( schema.SchemaField("bool_field", field_type="BOOL", mode="NULLABLE"), schema.SchemaField("int_field", field_type="INT64", mode="NULLABLE"), schema.SchemaField("float_field", field_type="FLOAT64", mode="NULLABLE"), @@ -1607,8 +1583,13 @@ def test_augment_schema_type_detection_succeeds(module_under_test): ), ) - by_name = operator.attrgetter("name") - assert sorted(augmented_schema, key=by_name) == sorted(expected_schema, key=by_name) + for col_name, expected_schema in zip(dataframe, expected_schemas): + with warnings.catch_warnings(record=True) as warned: + schema_field = module_under_test._get_schema_by_pyarrow( + col_name, dataframe[col_name] + ) + assert warned == [] + assert schema_field == expected_schema @pytest.mark.skipif(pandas is None, reason="Requires `pandas`") @@ -1639,30 +1620,20 @@ def test_augment_schema_repeated_fields(module_under_test): ] ) - current_schema = ( - schema.SchemaField("string_array", field_type=None, mode="NULLABLE"), - schema.SchemaField("timestamp_array", field_type=None, mode="NULLABLE"), - schema.SchemaField("datetime_array", field_type=None, mode="NULLABLE"), - ) - - with warnings.catch_warnings(record=True) as warned: - augmented_schema = module_under_test.augment_schema(dataframe, current_schema) - - # there should be no relevant warnings - unwanted_warnings = [ - warning for warning in warned if "Pyarrow could not" in str(warning) - ] - assert not unwanted_warnings - # the augmented schema must match the expected - expected_schema = ( + expected_schemas = ( schema.SchemaField("string_array", field_type="STRING", mode="REPEATED"), schema.SchemaField("timestamp_array", field_type="TIMESTAMP", mode="REPEATED"), schema.SchemaField("datetime_array", field_type="DATETIME", mode="REPEATED"), ) - by_name = operator.attrgetter("name") - assert sorted(augmented_schema, key=by_name) == sorted(expected_schema, key=by_name) + for col_name, expected_schema in zip(dataframe, expected_schemas): + with warnings.catch_warnings(record=True) as warned: + schema_field = module_under_test._get_schema_by_pyarrow( + col_name, dataframe[col_name] + ) + assert warned == [] + assert schema_field == expected_schema @pytest.mark.skipif(pandas is None, reason="Requires `pandas`") @@ -1681,24 +1652,21 @@ def test_augment_schema_type_detection_fails(module_under_test): }, ] ) - current_schema = [ - schema.SchemaField("status", field_type="STRING", mode="NULLABLE"), - schema.SchemaField("struct_field", field_type=None, mode="NULLABLE"), - schema.SchemaField("struct_field_2", field_type=None, mode="NULLABLE"), - ] - - with warnings.catch_warnings(record=True) as warned: - augmented_schema = module_under_test.augment_schema(dataframe, current_schema) - assert augmented_schema is None + expected_schemas = ( + schema.SchemaField("status", field_type="STRING", mode="NULLABLE"), + # Could not determine the type of these columns + None, + None, + ) - expected_warnings = [ - warning for warning in warned if "could not determine" in str(warning) - ] - assert len(expected_warnings) == 1 - warning_msg = str(expected_warnings[0]) - assert "pyarrow" in warning_msg.lower() - assert "struct_field" in warning_msg and "struct_field_2" in warning_msg + for col_name, expected_schema in zip(dataframe, expected_schemas): + with warnings.catch_warnings(record=True) as warned: + schema_field = module_under_test._get_schema_by_pyarrow( + col_name, dataframe[col_name] + ) + assert warned == [] + assert schema_field == expected_schema @pytest.mark.skipif(pandas is None, reason="Requires `pandas`") @@ -1706,23 +1674,14 @@ def test_augment_schema_type_detection_fails_array_data(module_under_test): dataframe = pandas.DataFrame( data=[{"all_none_array": [None, float("NaN")], "empty_array": []}] ) - current_schema = [ - schema.SchemaField("all_none_array", field_type=None, mode="NULLABLE"), - schema.SchemaField("empty_array", field_type=None, mode="NULLABLE"), - ] - - with warnings.catch_warnings(record=True) as warned: - augmented_schema = module_under_test.augment_schema(dataframe, current_schema) - assert augmented_schema is None - - expected_warnings = [ - warning for warning in warned if "could not determine" in str(warning) - ] - assert len(expected_warnings) == 1 - warning_msg = str(expected_warnings[0]) - assert "pyarrow" in warning_msg.lower() - assert "all_none_array" in warning_msg and "empty_array" in warning_msg + for col_name in dataframe: + with warnings.catch_warnings(record=True) as warned: + schema_field = module_under_test._get_schema_by_pyarrow( + col_name, dataframe[col_name] + ) + assert warned == [] + assert schema_field is None @pytest.mark.skipif(isinstance(pyarrow, mock.Mock), reason="Requires `pyarrow`") diff --git a/packages/google-cloud-bigquery/tests/unit/test_schema.py b/packages/google-cloud-bigquery/tests/unit/test_schema.py index 3f2304a70734..c63a8312c3fb 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_schema.py +++ b/packages/google-cloud-bigquery/tests/unit/test_schema.py @@ -640,11 +640,6 @@ def test___repr__(self): expected = "SchemaField('field1', 'STRING', 'NULLABLE', None, None, (), None)" self.assertEqual(repr(field1), expected) - def test___repr__type_not_set(self): - field1 = self._make_one("field1", field_type=None) - expected = "SchemaField('field1', None, 'NULLABLE', None, None, (), None)" - self.assertEqual(repr(field1), expected) - def test___repr__evaluable_no_policy_tags(self): field = self._make_one("field1", "STRING", "REQUIRED", "Description") field_repr = repr(field) From 58975bcbc948fe5d1274bfe6c2650ac81ab825f0 Mon Sep 17 00:00:00 2001 From: Lingqing Gan Date: Mon, 19 May 2025 12:10:12 -0700 Subject: [PATCH 1932/2016] feat: support job reservation (#2186) * feat: support job reservation * Update google/cloud/bigquery/job/base.py Co-authored-by: Chalmer Lowe --------- Co-authored-by: Chalmer Lowe --- .../google/cloud/bigquery/job/base.py | 32 ++++++++++++++ .../tests/unit/job/test_base.py | 42 +++++++++++++++++++ 2 files changed, 74 insertions(+) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/job/base.py b/packages/google-cloud-bigquery/google/cloud/bigquery/job/base.py index eaa9d34605da..5eb700ce7131 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/job/base.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/job/base.py @@ -224,6 +224,26 @@ def job_timeout_ms(self, value): else: self._properties.pop("jobTimeoutMs", None) + @property + def reservation(self): + """str: Optional. The reservation that job would use. + + User can specify a reservation to execute the job. If reservation is + not set, reservation is determined based on the rules defined by the + reservation assignments. The expected format is + projects/{project}/locations/{location}/reservations/{reservation}. + + Raises: + ValueError: If ``value`` type is not None or of string type. + """ + return self._properties.setdefault("reservation", None) + + @reservation.setter + def reservation(self, value): + if value and not isinstance(value, str): + raise ValueError("Reservation must be None or a string.") + self._properties["reservation"] = value + @property def labels(self): """Dict[str, str]: Labels for the job. @@ -488,6 +508,18 @@ def location(self): """str: Location where the job runs.""" return _helpers._get_sub_prop(self._properties, ["jobReference", "location"]) + @property + def reservation_id(self): + """str: Name of the primary reservation assigned to this job. + + Note that this could be different than reservations reported in + the reservation field if parent reservations were used to execute + this job. + """ + return _helpers._get_sub_prop( + self._properties, ["statistics", "reservation_id"] + ) + def _require_client(self, client): """Check client or verify over-ride. diff --git a/packages/google-cloud-bigquery/tests/unit/job/test_base.py b/packages/google-cloud-bigquery/tests/unit/job/test_base.py index 22a0fa45000b..aa3d49ce380d 100644 --- a/packages/google-cloud-bigquery/tests/unit/job/test_base.py +++ b/packages/google-cloud-bigquery/tests/unit/job/test_base.py @@ -443,6 +443,16 @@ def test_state(self): status["state"] = state self.assertEqual(job.state, state) + def test_reservation_id(self): + reservation_id = "RESERVATION-ID" + client = _make_client(project=self.PROJECT) + job = self._make_one(self.JOB_ID, client) + self.assertIsNone(job.reservation_id) + stats = job._properties["statistics"] = {} + self.assertIsNone(job.reservation_id) + stats["reservation_id"] = reservation_id + self.assertEqual(job.reservation_id, reservation_id) + def _set_properties_job(self): client = _make_client(project=self.PROJECT) job = self._make_one(self.JOB_ID, client) @@ -1188,15 +1198,18 @@ def test_fill_query_job_config_from_default(self): job_config = QueryJobConfig() job_config.dry_run = True job_config.maximum_bytes_billed = 1000 + job_config.reservation = "reservation_1" default_job_config = QueryJobConfig() default_job_config.use_query_cache = True default_job_config.maximum_bytes_billed = 2000 + default_job_config.reservation = "reservation_2" final_job_config = job_config._fill_from_default(default_job_config) self.assertTrue(final_job_config.dry_run) self.assertTrue(final_job_config.use_query_cache) self.assertEqual(final_job_config.maximum_bytes_billed, 1000) + self.assertEqual(final_job_config.reservation, "reservation_1") def test_fill_load_job_from_default(self): from google.cloud.bigquery import LoadJobConfig @@ -1204,15 +1217,18 @@ def test_fill_load_job_from_default(self): job_config = LoadJobConfig() job_config.create_session = True job_config.encoding = "UTF-8" + job_config.reservation = "reservation_1" default_job_config = LoadJobConfig() default_job_config.ignore_unknown_values = True default_job_config.encoding = "ISO-8859-1" + default_job_config.reservation = "reservation_2" final_job_config = job_config._fill_from_default(default_job_config) self.assertTrue(final_job_config.create_session) self.assertTrue(final_job_config.ignore_unknown_values) self.assertEqual(final_job_config.encoding, "UTF-8") + self.assertEqual(final_job_config.reservation, "reservation_1") def test_fill_from_default_conflict(self): from google.cloud.bigquery import QueryJobConfig @@ -1232,10 +1248,12 @@ def test_fill_from_empty_default_conflict(self): job_config = QueryJobConfig() job_config.dry_run = True job_config.maximum_bytes_billed = 1000 + job_config.reservation = "reservation_1" final_job_config = job_config._fill_from_default(default_job_config=None) self.assertTrue(final_job_config.dry_run) self.assertEqual(final_job_config.maximum_bytes_billed, 1000) + self.assertEqual(final_job_config.reservation, "reservation_1") @mock.patch("google.cloud.bigquery._helpers._get_sub_prop") def test__get_sub_prop_wo_default(self, _get_sub_prop): @@ -1338,3 +1356,27 @@ def test_job_timeout_properties(self): job_config.job_timeout_ms = None assert job_config.job_timeout_ms is None assert "jobTimeoutMs" not in job_config._properties + + def test_reservation_miss(self): + job_config = self._make_one() + self.assertEqual(job_config.reservation, None) + + def test_reservation_hit(self): + job_config = self._make_one() + job_config._properties["reservation"] = "foo" + self.assertEqual(job_config.reservation, "foo") + + def test_reservation_update_in_place(self): + job_config = self._make_one() + job_config.reservation = "bar" # update in place + self.assertEqual(job_config.reservation, "bar") + + def test_reservation_setter_invalid(self): + job_config = self._make_one() + with self.assertRaises(ValueError): + job_config.reservation = object() + + def test_reservation_setter(self): + job_config = self._make_one() + job_config.reservation = "foo" + self.assertEqual(job_config._properties["reservation"], "foo") From 6872a0a30ec7e363e4c71a328de3fc1152e16d6a Mon Sep 17 00:00:00 2001 From: Chalmer Lowe Date: Mon, 19 May 2025 16:54:24 -0400 Subject: [PATCH 1933/2016] ci: Update to the CI/CD pipeline via github workflow to help cut turn-around time (#2189) Update to the CI/CD pipeline via github workflow to help cut turn-around time. * added github workflow * changed the number of pytest-xdist workers from "auto" to "8" (based on local tests and discussion with Tim, choosing auto sometimes takes longer to run than choosing a smaller number. I suspect this is partly because for small or short tests the overhead needed to setup a worker exceeds the time savings of having extra workers). * modified numerous tests to explicitly include a project path to avoid an attempt to find the project by making an external call via the pydata-google-auth workflow (which opens an input and waits for response from the user that never comes). --- .../.github/workflows/unittest.yml | 89 +++++++++++++++++++ packages/google-cloud-bigquery/noxfile.py | 2 +- .../tests/unit/test_magics.py | 30 ++++++- 3 files changed, 118 insertions(+), 3 deletions(-) create mode 100644 packages/google-cloud-bigquery/.github/workflows/unittest.yml diff --git a/packages/google-cloud-bigquery/.github/workflows/unittest.yml b/packages/google-cloud-bigquery/.github/workflows/unittest.yml new file mode 100644 index 000000000000..24c9ddbafed1 --- /dev/null +++ b/packages/google-cloud-bigquery/.github/workflows/unittest.yml @@ -0,0 +1,89 @@ +on: + pull_request: + branches: + - main +name: unittest +jobs: + unit: + # Use `ubuntu-latest` runner. + runs-on: ubuntu-latest + strategy: + matrix: + python: ['3.9', '3.11', '3.12', '3.13'] + steps: + - name: Checkout + uses: actions/checkout@v4 + - name: Setup Python + uses: actions/setup-python@v5 + with: + python-version: ${{ matrix.python }} + - name: Install nox + run: | + python -m pip install --upgrade setuptools pip wheel + python -m pip install nox + - name: Run unit tests + env: + COVERAGE_FILE: .coverage-${{ matrix.python }} + run: | + nox -s unit-${{ matrix.python }} + - name: Upload coverage results + uses: actions/upload-artifact@v4 + with: + name: coverage-artifact-${{ matrix.python }} + path: .coverage-${{ matrix.python }} + include-hidden-files: true + + unit_noextras: + # Use `ubuntu-latest` runner. + runs-on: ubuntu-latest + strategy: + matrix: + python: ['3.9', '3.13'] + steps: + - name: Checkout + uses: actions/checkout@v4 + - name: Setup Python + uses: actions/setup-python@v5 + with: + python-version: ${{ matrix.python }} + - name: Install nox + run: | + python -m pip install --upgrade setuptools pip wheel + python -m pip install nox + - name: Run unit_noextras tests + env: + COVERAGE_FILE: .coverage-unit-noextras-${{ matrix.python }} + run: | + nox -s unit_noextras-${{ matrix.python }} + - name: Upload coverage results + uses: actions/upload-artifact@v4 + with: + name: coverage-artifact-unit-noextras-${{ matrix.python }} + path: .coverage-unit-noextras-${{ matrix.python }} + include-hidden-files: true + + cover: + runs-on: ubuntu-latest + needs: + - unit + - unit_noextras + steps: + - name: Checkout + uses: actions/checkout@v4 + - name: Setup Python + uses: actions/setup-python@v5 + with: + python-version: "3.9" + - name: Install coverage + run: | + python -m pip install --upgrade setuptools pip wheel + python -m pip install coverage + - name: Download coverage results + uses: actions/download-artifact@v4 + with: + path: .coverage-results/ + - name: Report coverage results + run: | + find .coverage-results -type f -name '*.zip' -exec unzip {} \; + coverage combine .coverage-results/**/.coverage* + coverage report --show-missing --fail-under=100 diff --git a/packages/google-cloud-bigquery/noxfile.py b/packages/google-cloud-bigquery/noxfile.py index 1922a68a5df6..575bbb1000b4 100644 --- a/packages/google-cloud-bigquery/noxfile.py +++ b/packages/google-cloud-bigquery/noxfile.py @@ -128,7 +128,7 @@ def default(session, install_extras=True): # Run py.test against the unit tests. session.run( "py.test", - "-n=auto", + "-n=8", "--quiet", "-W default::PendingDeprecationWarning", "--cov=google/cloud/bigquery", diff --git a/packages/google-cloud-bigquery/tests/unit/test_magics.py b/packages/google-cloud-bigquery/tests/unit/test_magics.py index 0f1e030cb0c0..a9a12283bb08 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_magics.py +++ b/packages/google-cloud-bigquery/tests/unit/test_magics.py @@ -480,6 +480,7 @@ def test_bigquery_magic_without_optional_arguments(monkeypatch): run_query_patch = mock.patch( "google.cloud.bigquery.magics.magics._run_query", autospec=True ) + magics.context.project = "unit-test-project" query_job_mock = mock.create_autospec( google.cloud.bigquery.job.QueryJob, instance=True ) @@ -831,6 +832,7 @@ def test_bigquery_magic_w_max_results_query_job_results_fails(monkeypatch): assert close_transports.called +@pytest.mark.usefixtures("ipython_interactive") def test_bigquery_magic_w_table_id_invalid(monkeypatch): ip = IPython.get_ipython() monkeypatch.setattr(bigquery, "bigquery_magics", None) @@ -861,6 +863,7 @@ def test_bigquery_magic_w_table_id_invalid(monkeypatch): assert "Traceback (most recent call last)" not in output +@pytest.mark.usefixtures("ipython_interactive") def test_bigquery_magic_w_missing_query(monkeypatch): ip = IPython.get_ipython() monkeypatch.setattr(bigquery, "bigquery_magics", None) @@ -1354,6 +1357,8 @@ def test_bigquery_magic_w_progress_bar_type_w_context_setter(monkeypatch): run_query_patch = mock.patch( "google.cloud.bigquery.magics.magics._run_query", autospec=True ) + magics.context.project = "unit-test-project" + query_job_mock = mock.create_autospec( google.cloud.bigquery.job.QueryJob, instance=True ) @@ -1383,6 +1388,8 @@ def test_bigquery_magic_with_progress_bar_type(monkeypatch): run_query_patch = mock.patch( "google.cloud.bigquery.magics.magics._run_query", autospec=True ) + magics.context.project = "unit-test-project" + with run_query_patch as run_query_mock: ip.run_cell_magic( "bigquery", "--progress_bar_type=tqdm_gui", "SELECT 17 as num" @@ -1565,6 +1572,8 @@ def test_bigquery_magic_with_string_params(ipython_ns_cleanup, monkeypatch): run_query_patch = mock.patch( "google.cloud.bigquery.magics.magics._run_query", autospec=True ) + magics.context.project = "unit-test-project" + query_job_mock = mock.create_autospec( google.cloud.bigquery.job.QueryJob, instance=True ) @@ -1605,6 +1614,8 @@ def test_bigquery_magic_with_dict_params(ipython_ns_cleanup, monkeypatch): run_query_patch = mock.patch( "google.cloud.bigquery.magics.magics._run_query", autospec=True ) + magics.context.project = "unit-test-project" + query_job_mock = mock.create_autospec( google.cloud.bigquery.job.QueryJob, instance=True ) @@ -1689,6 +1700,7 @@ def test_bigquery_magic_with_option_value_incorrect(monkeypatch): magics.context.credentials = mock.create_autospec( google.auth.credentials.Credentials, instance=True ) + magics.context.project = "unit-test-project" sql = "SELECT @foo AS foo" @@ -1719,6 +1731,8 @@ def test_bigquery_magic_with_dict_params_negative_value( run_query_patch = mock.patch( "google.cloud.bigquery.magics.magics._run_query", autospec=True ) + magics.context.project = "unit-test-project" + query_job_mock = mock.create_autospec( google.cloud.bigquery.job.QueryJob, instance=True ) @@ -1760,6 +1774,8 @@ def test_bigquery_magic_with_dict_params_array_value(ipython_ns_cleanup, monkeyp run_query_patch = mock.patch( "google.cloud.bigquery.magics.magics._run_query", autospec=True ) + magics.context.project = "unit-test-project" + query_job_mock = mock.create_autospec( google.cloud.bigquery.job.QueryJob, instance=True ) @@ -1801,6 +1817,8 @@ def test_bigquery_magic_with_dict_params_tuple_value(ipython_ns_cleanup, monkeyp run_query_patch = mock.patch( "google.cloud.bigquery.magics.magics._run_query", autospec=True ) + magics.context.project = "unit-test-project" + query_job_mock = mock.create_autospec( google.cloud.bigquery.job.QueryJob, instance=True ) @@ -1852,6 +1870,7 @@ def test_bigquery_magic_valid_query_in_existing_variable( magics.context.credentials = mock.create_autospec( google.auth.credentials.Credentials, instance=True ) + magics.context.project = "unit-test-project" ipython_ns_cleanup.append((ip, "custom_query")) ipython_ns_cleanup.append((ip, "query_results_df")) @@ -1892,6 +1911,7 @@ def test_bigquery_magic_nonexisting_query_variable(monkeypatch): magics.context.credentials = mock.create_autospec( google.auth.credentials.Credentials, instance=True ) + magics.context.project = "unit-test-project" run_query_patch = mock.patch( "google.cloud.bigquery.magics.magics._run_query", autospec=True @@ -1917,7 +1937,7 @@ def test_bigquery_magic_empty_query_variable_name(monkeypatch): magics.context.credentials = mock.create_autospec( google.auth.credentials.Credentials, instance=True ) - + magics.context.project = "unit-test-project" run_query_patch = mock.patch( "google.cloud.bigquery.magics.magics._run_query", autospec=True ) @@ -1940,6 +1960,7 @@ def test_bigquery_magic_query_variable_non_string(ipython_ns_cleanup, monkeypatc magics.context.credentials = mock.create_autospec( google.auth.credentials.Credentials, instance=True ) + magics.context.project = "unit-test-project" run_query_patch = mock.patch( "google.cloud.bigquery.magics.magics._run_query", autospec=True @@ -1968,9 +1989,14 @@ def test_bigquery_magic_query_variable_not_identifier(monkeypatch): google.auth.credentials.Credentials, instance=True ) + magics.context.project = "unit-test-project" cell_body = "$123foo" # 123foo is not valid Python identifier - with io.capture_output() as captured_io: + run_query_patch = mock.patch( + "google.cloud.bigquery.magics.magics._run_query", autospec=True + ) + + with run_query_patch, io.capture_output() as captured_io: ip.run_cell_magic("bigquery", "", cell_body) # If "$" prefixes a string that is not a Python identifier, we do not treat such From 0f68bec05aa21ea4aef8317033e68329874baa82 Mon Sep 17 00:00:00 2001 From: Chelsea Lin Date: Mon, 19 May 2025 16:02:21 -0700 Subject: [PATCH 1934/2016] Revert "fix: table iterator should not use bqstorage when page_size is not None (#2154)" (#2191) This reverts commit e89a707b162182ededbf94cc9a0f7594bc2be475. --- .../google/cloud/bigquery/table.py | 11 ++--------- .../tests/unit/test_dbapi_cursor.py | 1 - .../google-cloud-bigquery/tests/unit/test_table.py | 7 ------- 3 files changed, 2 insertions(+), 17 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py index e084468f6bf9..3f472c49090a 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py @@ -1897,11 +1897,6 @@ def total_bytes_processed(self) -> Optional[int]: """total bytes processed from job statistics, if present.""" return self._total_bytes_processed - @property - def page_size(self) -> Optional[int]: - """The maximum number of rows in each page of results from this request, if present.""" - return self._page_size - def _is_almost_completely_cached(self): """Check if all results are completely cached. @@ -1953,7 +1948,7 @@ def _should_use_bqstorage(self, bqstorage_client, create_bqstorage_client): if self._is_almost_completely_cached(): return False - if self.max_results is not None or self.page_size is not None: + if self.max_results is not None: return False try: @@ -2023,9 +2018,7 @@ def _maybe_warn_max_results( bqstorage_client: The BigQuery Storage client intended to use for downloading result rows. """ - if bqstorage_client is not None and ( - self.max_results is not None or self.page_size is not None - ): + if bqstorage_client is not None and self.max_results is not None: warnings.warn( "Cannot use bqstorage_client if max_results is set, " "reverting to fetching data with the REST endpoint.", diff --git a/packages/google-cloud-bigquery/tests/unit/test_dbapi_cursor.py b/packages/google-cloud-bigquery/tests/unit/test_dbapi_cursor.py index cba9030de0b5..6fca4cec0724 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_dbapi_cursor.py +++ b/packages/google-cloud-bigquery/tests/unit/test_dbapi_cursor.py @@ -161,7 +161,6 @@ def _mock_rows( mock_rows, ) mock_rows.max_results = None - mock_rows.page_size = None type(mock_rows).job_id = mock.PropertyMock(return_value="test-job-id") type(mock_rows).location = mock.PropertyMock(return_value="test-location") type(mock_rows).num_dml_affected_rows = mock.PropertyMock( diff --git a/packages/google-cloud-bigquery/tests/unit/test_table.py b/packages/google-cloud-bigquery/tests/unit/test_table.py index 92fa0e2ec351..4791c6511e5f 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_table.py +++ b/packages/google-cloud-bigquery/tests/unit/test_table.py @@ -2858,13 +2858,6 @@ def test__should_use_bqstorage_returns_false_if_max_results_set(self): ) self.assertFalse(result) - def test__should_use_bqstorage_returns_false_if_page_size_set(self): - iterator = self._make_one(page_size=10, first_page_response=None) # not cached - result = iterator._should_use_bqstorage( - bqstorage_client=None, create_bqstorage_client=True - ) - self.assertFalse(result) - def test__should_use_bqstorage_returns_false_w_warning_if_missing_dependency(self): iterator = self._make_one(first_page_response=None) # not cached From 2d8f13f9eb58e2763879f331da4f7d5e4c6a1fee Mon Sep 17 00:00:00 2001 From: "release-please[bot]" <55107282+release-please[bot]@users.noreply.github.com> Date: Mon, 19 May 2025 16:27:29 -0700 Subject: [PATCH 1935/2016] chore(main): release 3.33.0 (#2180) Co-authored-by: release-please[bot] <55107282+release-please[bot]@users.noreply.github.com> Co-authored-by: Lingqing Gan --- packages/google-cloud-bigquery/CHANGELOG.md | 15 +++++++++++++++ .../google/cloud/bigquery/version.py | 2 +- 2 files changed, 16 insertions(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/CHANGELOG.md b/packages/google-cloud-bigquery/CHANGELOG.md index ff1bd7acc020..2f7166d44664 100644 --- a/packages/google-cloud-bigquery/CHANGELOG.md +++ b/packages/google-cloud-bigquery/CHANGELOG.md @@ -5,6 +5,21 @@ [1]: https://pypi.org/project/google-cloud-bigquery/#history +## [3.33.0](https://github.com/googleapis/python-bigquery/compare/v3.32.0...v3.33.0) (2025-05-19) + + +### Features + +* Add ability to set autodetect_schema query param in update_table ([#2171](https://github.com/googleapis/python-bigquery/issues/2171)) ([57f940d](https://github.com/googleapis/python-bigquery/commit/57f940d957613b4d80fb81ea40a1177b73856189)) +* Add dtype parameters to to_geodataframe functions ([#2176](https://github.com/googleapis/python-bigquery/issues/2176)) ([ebfd0a8](https://github.com/googleapis/python-bigquery/commit/ebfd0a83d43bcb96f65f5669437220aa6138b766)) +* Support job reservation ([#2186](https://github.com/googleapis/python-bigquery/issues/2186)) ([cb646ce](https://github.com/googleapis/python-bigquery/commit/cb646ceea172bf199f366ae0592546dff2d3bcb2)) + + +### Bug Fixes + +* Ensure AccessEntry equality and repr uses the correct `entity_type` ([#2182](https://github.com/googleapis/python-bigquery/issues/2182)) ([0217637](https://github.com/googleapis/python-bigquery/commit/02176377d5e2fc25b5cd4f46aa6ebfb1b6a960a6)) +* Ensure SchemaField.field_dtype returns a string ([#2188](https://github.com/googleapis/python-bigquery/issues/2188)) ([7ec2848](https://github.com/googleapis/python-bigquery/commit/7ec2848379d5743bbcb36700a1153540c451e0e0)) + ## [3.32.0](https://github.com/googleapis/python-bigquery/compare/v3.31.0...v3.32.0) (2025-05-12) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/version.py b/packages/google-cloud-bigquery/google/cloud/bigquery/version.py index fe13d2477ea6..8304ac025dcb 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/version.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/version.py @@ -12,4 +12,4 @@ # See the License for the specific language governing permissions and # limitations under the License. -__version__ = "3.32.0" +__version__ = "3.33.0" From ddc04676d0c0a53d75b1a030cbebe9250a8f5d51 Mon Sep 17 00:00:00 2001 From: Chalmer Lowe Date: Tue, 20 May 2025 10:21:22 -0400 Subject: [PATCH 1936/2016] ci: Import numpy before pyarrow in tests to resolve import warning (#2187) * Fix: Import numpy before pyarrow in tests to resolve import warning A `PytestDeprecationWarning` was occurring in several test files because `pyarrow`, when imported by `pytest.importorskip`, would fail to import `numpy.core.multiarray`. This change addresses the warning by explicitly importing `numpy` before `pytest.importorskip("pyarrow", ...)` in the affected test files. This ensures that numpy is fully initialized before pyarrow attempts to use it, resolving the underlying import error. I also updated the test execution to use `nox -s unit`, which correctly sets up the test environment and dependencies, allowing the tests to pass and confirm the warning is resolved. Pre-existing failures in `tests/unit/test_magics.py` are unrelated to this change. * Update tests/unit/test__pyarrow_helpers.py * revisions to numpy handling * adds import or skip commands to accompany pyarrow import or skips * Update tests/unit/test__pandas_helpers.py * updates an import step and restores gc import * Updates magics.context and removes unneeded? reference to numpy/pyarrow --------- Co-authored-by: google-labs-jules[bot] <161369871+google-labs-jules[bot]@users.noreply.github.com> --- packages/google-cloud-bigquery/noxfile.py | 3 +- .../testing/constraints-3.9.txt | 1 + .../tests/unit/test__pandas_helpers.py | 1 + .../tests/unit/test__pyarrow_helpers.py | 2 +- .../tests/unit/test_dbapi__helpers.py | 1 + .../tests/unit/test_magics.py | 5 ++++ .../tests/unit/test_table.py | 28 +++++++++++++++++-- .../tests/unit/test_table_arrow.py | 3 +- 8 files changed, 38 insertions(+), 6 deletions(-) diff --git a/packages/google-cloud-bigquery/noxfile.py b/packages/google-cloud-bigquery/noxfile.py index 575bbb1000b4..6807b7ee476e 100644 --- a/packages/google-cloud-bigquery/noxfile.py +++ b/packages/google-cloud-bigquery/noxfile.py @@ -163,8 +163,7 @@ def unit_noextras(session): # so that it continues to be an optional dependency. # https://github.com/googleapis/python-bigquery/issues/1877 if session.python == UNIT_TEST_PYTHON_VERSIONS[0]: - session.install("pyarrow==4.0.0") - + session.install("pyarrow==4.0.0", "numpy==1.20.2") default(session, install_extras=False) diff --git a/packages/google-cloud-bigquery/testing/constraints-3.9.txt b/packages/google-cloud-bigquery/testing/constraints-3.9.txt index 60a155f0dba8..f61c0cf09bb0 100644 --- a/packages/google-cloud-bigquery/testing/constraints-3.9.txt +++ b/packages/google-cloud-bigquery/testing/constraints-3.9.txt @@ -20,6 +20,7 @@ ipykernel==6.2.0 opentelemetry-api==1.1.0 opentelemetry-instrumentation==0.20b0 opentelemetry-sdk==1.1.0 +numpy==1.20.2 packaging==24.2.0 pandas==1.3.0 pandas-gbq==0.26.1 diff --git a/packages/google-cloud-bigquery/tests/unit/test__pandas_helpers.py b/packages/google-cloud-bigquery/tests/unit/test__pandas_helpers.py index d87c65581d6f..bc94f5f54979 100644 --- a/packages/google-cloud-bigquery/tests/unit/test__pandas_helpers.py +++ b/packages/google-cloud-bigquery/tests/unit/test__pandas_helpers.py @@ -1856,6 +1856,7 @@ def test__download_table_bqstorage_shuts_down_workers( Make sure that when the top-level iterator goes out of scope (is deleted), the child threads are also stopped. """ + pytest.importorskip("google.cloud.bigquery_storage_v1") from google.cloud.bigquery import dataset from google.cloud.bigquery import table import google.cloud.bigquery_storage_v1.reader diff --git a/packages/google-cloud-bigquery/tests/unit/test__pyarrow_helpers.py b/packages/google-cloud-bigquery/tests/unit/test__pyarrow_helpers.py index 06fc2eb856a8..c12a526de5d3 100644 --- a/packages/google-cloud-bigquery/tests/unit/test__pyarrow_helpers.py +++ b/packages/google-cloud-bigquery/tests/unit/test__pyarrow_helpers.py @@ -14,7 +14,7 @@ import pytest - +numpy = pytest.importorskip("numpy") pyarrow = pytest.importorskip("pyarrow", minversion="3.0.0") diff --git a/packages/google-cloud-bigquery/tests/unit/test_dbapi__helpers.py b/packages/google-cloud-bigquery/tests/unit/test_dbapi__helpers.py index 7e1da0034309..9907df97b275 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_dbapi__helpers.py +++ b/packages/google-cloud-bigquery/tests/unit/test_dbapi__helpers.py @@ -210,6 +210,7 @@ def test_empty_iterable(self): self.assertEqual(list(result), []) def test_non_empty_iterable(self): + pytest.importorskip("numpy") pytest.importorskip("pyarrow") from tests.unit.helpers import _to_pyarrow diff --git a/packages/google-cloud-bigquery/tests/unit/test_magics.py b/packages/google-cloud-bigquery/tests/unit/test_magics.py index a9a12283bb08..814150693b21 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_magics.py +++ b/packages/google-cloud-bigquery/tests/unit/test_magics.py @@ -1276,6 +1276,11 @@ def test_bigquery_magic_with_no_query_cache(monkeypatch): bigquery.load_ipython_extension(ip) conn = make_connection() monkeypatch.setattr(magics.context, "_connection", conn) + monkeypatch.setattr( + magics.context, + "credentials", + mock.create_autospec(google.auth.credentials.Credentials, instance=True), + ) monkeypatch.setattr(magics.context, "project", "project-from-context") # --no_query_cache option should override context. diff --git a/packages/google-cloud-bigquery/tests/unit/test_table.py b/packages/google-cloud-bigquery/tests/unit/test_table.py index 4791c6511e5f..eb2c8d9ec6a4 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_table.py +++ b/packages/google-cloud-bigquery/tests/unit/test_table.py @@ -2416,6 +2416,7 @@ def test_to_arrow_error_if_pyarrow_is_none(self): row_iterator.to_arrow() def test_to_arrow(self): + pytest.importorskip("numpy") pyarrow = pytest.importorskip("pyarrow") row_iterator = self._make_one() tbl = row_iterator.to_arrow() @@ -2423,6 +2424,7 @@ def test_to_arrow(self): self.assertEqual(tbl.num_rows, 0) def test_to_arrow_iterable(self): + pytest.importorskip("numpy") pyarrow = pytest.importorskip( "pyarrow", minversion=self.PYARROW_MINIMUM_VERSION ) @@ -3089,6 +3091,7 @@ def test_to_arrow_iterable_w_bqstorage(self): bqstorage_client._transport.grpc_channel.close.assert_not_called() def test_to_arrow(self): + pytest.importorskip("numpy") pyarrow = pytest.importorskip( "pyarrow", minversion=self.PYARROW_MINIMUM_VERSION ) @@ -3173,6 +3176,7 @@ def test_to_arrow(self): ) def test_to_arrow_w_nulls(self): + pytest.importorskip("numpy") pyarrow = pytest.importorskip( "pyarrow", minversion=self.PYARROW_MINIMUM_VERSION ) @@ -3209,6 +3213,7 @@ def test_to_arrow_w_nulls(self): self.assertEqual(ages, [32, 29, None, 111]) def test_to_arrow_w_unknown_type(self): + pytest.importorskip("numpy") pyarrow = pytest.importorskip( "pyarrow", minversion=self.PYARROW_MINIMUM_VERSION ) @@ -3254,6 +3259,7 @@ def test_to_arrow_w_unknown_type(self): self.assertTrue(all("sport" in str(warning) for warning in warned)) def test_to_arrow_w_empty_table(self): + pytest.importorskip("numpy") pyarrow = pytest.importorskip( "pyarrow", minversion=self.PYARROW_MINIMUM_VERSION ) @@ -3295,6 +3301,7 @@ def test_to_arrow_w_empty_table(self): self.assertEqual(child_field.type.value_type[1].name, "age") def test_to_arrow_max_results_w_explicit_bqstorage_client_warning(self): + pytest.importorskip("numpy") pytest.importorskip("pyarrow") pytest.importorskip("google.cloud.bigquery_storage") from google.cloud.bigquery.schema import SchemaField @@ -3337,6 +3344,7 @@ def test_to_arrow_max_results_w_explicit_bqstorage_client_warning(self): mock_client._ensure_bqstorage_client.assert_not_called() def test_to_arrow_max_results_w_create_bqstorage_client_no_warning(self): + pytest.importorskip("numpy") pytest.importorskip("pyarrow") pytest.importorskip("google.cloud.bigquery_storage") from google.cloud.bigquery.schema import SchemaField @@ -3375,6 +3383,7 @@ def test_to_arrow_max_results_w_create_bqstorage_client_no_warning(self): mock_client._ensure_bqstorage_client.assert_not_called() def test_to_arrow_w_bqstorage(self): + pytest.importorskip("numpy") pyarrow = pytest.importorskip("pyarrow") pytest.importorskip("google.cloud.bigquery_storage") from google.cloud.bigquery import schema @@ -3458,6 +3467,7 @@ def test_to_arrow_w_bqstorage(self): bqstorage_client._transport.grpc_channel.close.assert_not_called() def test_to_arrow_w_bqstorage_creates_client(self): + pytest.importorskip("numpy") pytest.importorskip("pyarrow") pytest.importorskip("google.cloud.bigquery_storage") from google.cloud.bigquery import schema @@ -3491,6 +3501,7 @@ def test_to_arrow_w_bqstorage_creates_client(self): bqstorage_client._transport.grpc_channel.close.assert_called_once() def test_to_arrow_ensure_bqstorage_client_wo_bqstorage(self): + pytest.importorskip("numpy") pyarrow = pytest.importorskip( "pyarrow", minversion=self.PYARROW_MINIMUM_VERSION ) @@ -3524,6 +3535,7 @@ def mock_verify_version(raise_if_error: bool = False): self.assertEqual(tbl.num_rows, 2) def test_to_arrow_w_bqstorage_no_streams(self): + pytest.importorskip("numpy") pyarrow = pytest.importorskip("pyarrow") pytest.importorskip("google.cloud.bigquery_storage") from google.cloud.bigquery import schema @@ -3563,6 +3575,7 @@ def test_to_arrow_w_bqstorage_no_streams(self): self.assertEqual(actual_table.schema[2].name, "colB") def test_to_arrow_progress_bar(self): + pytest.importorskip("numpy") pytest.importorskip("pyarrow") pytest.importorskip("tqdm") pytest.importorskip("tqdm.notebook") @@ -3696,6 +3709,7 @@ def test_to_dataframe_iterable_with_dtypes(self): self.assertEqual(df_2["age"][0], 33) def test_to_dataframe_iterable_w_bqstorage(self): + pytest.importorskip("numpy") pandas = pytest.importorskip("pandas") pyarrow = pytest.importorskip("pyarrow") pytest.importorskip("google.cloud.bigquery_storage") @@ -3770,6 +3784,7 @@ def test_to_dataframe_iterable_w_bqstorage(self): bqstorage_client._transport.grpc_channel.close.assert_not_called() def test_to_dataframe_iterable_w_bqstorage_max_results_warning(self): + pytest.importorskip("numpy") pandas = pytest.importorskip("pandas") pytest.importorskip("google.cloud.bigquery_storage") from google.cloud.bigquery import schema @@ -4513,7 +4528,7 @@ def test_to_dataframe_w_none_dtypes_mapper(self): def test_to_dataframe_w_unsupported_dtypes_mapper(self): pytest.importorskip("pandas") - import numpy + numpy = pytest.importorskip("numpy") from google.cloud.bigquery.schema import SchemaField schema = [ @@ -4797,6 +4812,7 @@ def test_to_dataframe_max_results_w_create_bqstorage_client_no_warning(self): mock_client._ensure_bqstorage_client.assert_not_called() def test_to_dataframe_w_bqstorage_creates_client(self): + pytest.importorskip("numpy") pytest.importorskip("pandas") pytest.importorskip("google.cloud.bigquery_storage") from google.cloud.bigquery import schema @@ -4830,6 +4846,7 @@ def test_to_dataframe_w_bqstorage_creates_client(self): bqstorage_client._transport.grpc_channel.close.assert_called_once() def test_to_dataframe_w_bqstorage_no_streams(self): + pytest.importorskip("numpy") pytest.importorskip("pandas") pytest.importorskip("google.cloud.bigquery_storage") from google.cloud.bigquery import schema @@ -4858,6 +4875,7 @@ def test_to_dataframe_w_bqstorage_no_streams(self): self.assertTrue(got.empty) def test_to_dataframe_w_bqstorage_logs_session(self): + pytest.importorskip("numpy") pytest.importorskip("google.cloud.bigquery_storage") pytest.importorskip("pandas") pytest.importorskip("pyarrow") @@ -4882,6 +4900,7 @@ def test_to_dataframe_w_bqstorage_logs_session(self): ) def test_to_dataframe_w_bqstorage_empty_streams(self): + pytest.importorskip("numpy") pytest.importorskip("google.cloud.bigquery_storage") pytest.importorskip("pandas") pyarrow = pytest.importorskip("pyarrow") @@ -4936,6 +4955,7 @@ def test_to_dataframe_w_bqstorage_empty_streams(self): self.assertTrue(got.empty) def test_to_dataframe_w_bqstorage_nonempty(self): + pytest.importorskip("numpy") pytest.importorskip("google.cloud.bigquery_storage") pytest.importorskip("pandas") pyarrow = pytest.importorskip("pyarrow") @@ -5018,6 +5038,7 @@ def test_to_dataframe_w_bqstorage_nonempty(self): bqstorage_client._transport.grpc_channel.close.assert_not_called() def test_to_dataframe_w_bqstorage_multiple_streams_return_unique_index(self): + pytest.importorskip("numpy") bigquery_storage = pytest.importorskip("google.cloud.bigquery_storage") pytest.importorskip("pandas") pyarrow = pytest.importorskip("pyarrow") @@ -5070,6 +5091,7 @@ def test_to_dataframe_w_bqstorage_multiple_streams_return_unique_index(self): self.assertTrue(got.index.is_unique) def test_to_dataframe_w_bqstorage_updates_progress_bar(self): + pytest.importorskip("numpy") bigquery_storage = pytest.importorskip("google.cloud.bigquery_storage") pytest.importorskip("pandas") pyarrow = pytest.importorskip("pyarrow") @@ -5147,6 +5169,7 @@ def blocking_to_arrow(*args, **kwargs): tqdm_mock().close.assert_called_once() def test_to_dataframe_w_bqstorage_exits_on_keyboardinterrupt(self): + pytest.importorskip("numpy") bigquery_storage = pytest.importorskip("google.cloud.bigquery_storage") pytest.importorskip("pandas") pyarrow = pytest.importorskip("pyarrow") @@ -5322,6 +5345,7 @@ def test_to_dataframe_w_bqstorage_snapshot(self): row_iterator.to_dataframe(bqstorage_client) def test_to_dataframe_concat_categorical_dtype_w_pyarrow(self): + pytest.importorskip("numpy") pytest.importorskip("google.cloud.bigquery_storage") pandas = pytest.importorskip("pandas") pyarrow = pytest.importorskip("pyarrow") @@ -5604,7 +5628,7 @@ def test_rowiterator_to_geodataframe_delegation(self, to_dataframe): """ pandas = pytest.importorskip("pandas") geopandas = pytest.importorskip("geopandas") - import numpy + numpy = pytest.importorskip("numpy") from shapely import wkt row_iterator = self._make_one_from_data( diff --git a/packages/google-cloud-bigquery/tests/unit/test_table_arrow.py b/packages/google-cloud-bigquery/tests/unit/test_table_arrow.py index 830c4ceb7eab..fdd1b7b78c60 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_table_arrow.py +++ b/packages/google-cloud-bigquery/tests/unit/test_table_arrow.py @@ -18,7 +18,8 @@ import google.cloud.bigquery.table -pyarrow = pytest.importorskip("pyarrow", minversion="3.0.0") +pytest.importorskip("numpy") +pytest.importorskip("pyarrow", minversion="3.0.0") def test_to_arrow_with_jobs_query_response(): From b228102bd200139d915cfa95964c6f0a834df4b5 Mon Sep 17 00:00:00 2001 From: Dan Lee <71398022+dandhlee@users.noreply.github.com> Date: Tue, 20 May 2025 10:46:23 -0400 Subject: [PATCH 1937/2016] docs: update query.py (#2192) Co-authored-by: Chalmer Lowe --- .../google-cloud-bigquery/google/cloud/bigquery/job/query.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/job/query.py b/packages/google-cloud-bigquery/google/cloud/bigquery/job/query.py index f9b99b7fb8d5..954a4696312d 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/job/query.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/job/query.py @@ -1529,7 +1529,7 @@ def result( # type: ignore # (incompatible with supertype) a DDL query, an ``_EmptyRowIterator`` instance is returned. Raises: - google.cloud.exceptions.GoogleAPICallError: + google.api_core.exceptions.GoogleAPICallError: If the job failed and retries aren't successful. concurrent.futures.TimeoutError: If the job did not complete in the given timeout. From f0f0976cb7b329fb7529fd859216ea1cde27bd44 Mon Sep 17 00:00:00 2001 From: Chalmer Lowe Date: Tue, 20 May 2025 13:56:30 -0400 Subject: [PATCH 1938/2016] ci: adds new github workflow focused on documentation in prep to deprecate kokoro presubmit (#2194) * I've created a new workflow for the docs and docsfx nox sessions. This involves a new GitHub workflow located in `.github/workflows/docs.yml`. This new workflow will now handle running the `docs` and `docsfx` nox sessions, which were previously managed by the `.kokoro/presubmit/presubmit.cfg` workflow. Here's how the new workflow operates: - It activates when you make pull requests to the `main` branch. - It executes two jobs: `docs` and `docsfx`. - Both of these jobs utilize Python 3.10. - Each job installs nox and then runs its corresponding nox session (`docs-3.10` or `docsfx-3.10`). This adjustment is a step towards phasing out and removing the `.kokoro/presubmit/presubmit.cfg` file. * Update .github/workflows/docs.yml * Update .github/workflows/docs.yml --------- Co-authored-by: google-labs-jules[bot] <161369871+google-labs-jules[bot]@users.noreply.github.com> --- .../.github/workflows/docs.yml | 39 +++++++++++++++++++ 1 file changed, 39 insertions(+) create mode 100644 packages/google-cloud-bigquery/.github/workflows/docs.yml diff --git a/packages/google-cloud-bigquery/.github/workflows/docs.yml b/packages/google-cloud-bigquery/.github/workflows/docs.yml new file mode 100644 index 000000000000..9372faac20cd --- /dev/null +++ b/packages/google-cloud-bigquery/.github/workflows/docs.yml @@ -0,0 +1,39 @@ +on: + pull_request: + branches: + - main +name: docs +jobs: + docs: + runs-on: ubuntu-latest + steps: + - name: Checkout + uses: actions/checkout@v4 + - name: Setup Python + uses: actions/setup-python@v5 + with: + python-version: '3.10' + - name: Install nox + run: | + python -m pip install --upgrade setuptools pip wheel + python -m pip install nox + - name: Run docs session + run: | + nox -s docs-3.10 + + docfx: + runs-on: ubuntu-latest + steps: + - name: Checkout + uses: actions/checkout@v4 + - name: Setup Python + uses: actions/setup-python@v5 + with: + python-version: '3.10' + - name: Install nox + run: | + python -m pip install --upgrade setuptools pip wheel + python -m pip install nox + - name: Run docfx session + run: | + nox -s docfx-3.10 From 2bd98a97c064f41d04f4b63a2d119751de21e366 Mon Sep 17 00:00:00 2001 From: Mend Renovate Date: Wed, 21 May 2025 16:20:17 +0200 Subject: [PATCH 1939/2016] fix(deps): update all dependencies (#2184) * fix(deps): update all dependencies * Update pyproject.toml * Update .github/workflows/docs.yml * Update .github/workflows/docs.yml --------- Co-authored-by: Chalmer Lowe --- .../samples/desktopapp/requirements-test.txt | 2 +- .../samples/desktopapp/requirements.txt | 2 +- .../samples/geography/requirements.txt | 6 +++--- .../samples/magics/requirements-test.txt | 2 +- .../google-cloud-bigquery/samples/magics/requirements.txt | 4 ++-- .../samples/notebooks/requirements-test.txt | 2 +- .../samples/notebooks/requirements.txt | 4 ++-- .../samples/snippets/requirements-test.txt | 2 +- .../google-cloud-bigquery/samples/snippets/requirements.txt | 2 +- 9 files changed, 13 insertions(+), 13 deletions(-) diff --git a/packages/google-cloud-bigquery/samples/desktopapp/requirements-test.txt b/packages/google-cloud-bigquery/samples/desktopapp/requirements-test.txt index 6abea3b4d344..cc71ee426e78 100644 --- a/packages/google-cloud-bigquery/samples/desktopapp/requirements-test.txt +++ b/packages/google-cloud-bigquery/samples/desktopapp/requirements-test.txt @@ -1,4 +1,4 @@ -google-cloud-testutils==1.6.2 +google-cloud-testutils==1.6.4 pytest==8.3.5 mock==5.2.0 pytest-xdist==3.6.1 diff --git a/packages/google-cloud-bigquery/samples/desktopapp/requirements.txt b/packages/google-cloud-bigquery/samples/desktopapp/requirements.txt index 743d0fe3597d..4a5b75346cc8 100644 --- a/packages/google-cloud-bigquery/samples/desktopapp/requirements.txt +++ b/packages/google-cloud-bigquery/samples/desktopapp/requirements.txt @@ -1,2 +1,2 @@ -google-cloud-bigquery==3.32.0 +google-cloud-bigquery==3.33.0 google-auth-oauthlib==1.2.2 diff --git a/packages/google-cloud-bigquery/samples/geography/requirements.txt b/packages/google-cloud-bigquery/samples/geography/requirements.txt index 434a594cb5f3..3b1a3ef54c92 100644 --- a/packages/google-cloud-bigquery/samples/geography/requirements.txt +++ b/packages/google-cloud-bigquery/samples/geography/requirements.txt @@ -3,7 +3,7 @@ certifi==2025.4.26 cffi==1.17.1 charset-normalizer==3.4.2 click===8.1.8; python_version == '3.9' -click==8.2.0; python_version >= '3.10' +click==8.2.1; python_version >= '3.10' click-plugins==1.1.1 cligj==0.7.2 db-dtypes==1.4.3 @@ -12,7 +12,7 @@ geojson==3.2.0 geopandas==1.0.1 google-api-core==2.24.2 google-auth==2.40.1 -google-cloud-bigquery==3.32.0 +google-cloud-bigquery==3.33.0 google-cloud-bigquery-storage==2.31.0 google-cloud-core==2.4.3 google-crc32c==1.7.1 @@ -36,7 +36,7 @@ PyYAML==6.0.2 requests==2.32.3 rsa==4.9.1 Shapely===2.0.7; python_version == '3.9' -Shapely==2.1.0; python_version >= '3.10' +Shapely==2.1.1; python_version >= '3.10' six==1.17.0 typing-extensions==4.13.2 typing-inspect==0.9.0 diff --git a/packages/google-cloud-bigquery/samples/magics/requirements-test.txt b/packages/google-cloud-bigquery/samples/magics/requirements-test.txt index 6abea3b4d344..cc71ee426e78 100644 --- a/packages/google-cloud-bigquery/samples/magics/requirements-test.txt +++ b/packages/google-cloud-bigquery/samples/magics/requirements-test.txt @@ -1,4 +1,4 @@ -google-cloud-testutils==1.6.2 +google-cloud-testutils==1.6.4 pytest==8.3.5 mock==5.2.0 pytest-xdist==3.6.1 diff --git a/packages/google-cloud-bigquery/samples/magics/requirements.txt b/packages/google-cloud-bigquery/samples/magics/requirements.txt index bb60f2a67e2b..7d0c91e3dc3c 100644 --- a/packages/google-cloud-bigquery/samples/magics/requirements.txt +++ b/packages/google-cloud-bigquery/samples/magics/requirements.txt @@ -1,6 +1,6 @@ -bigquery_magics==0.9.0 +bigquery_magics==0.10.0 db-dtypes==1.4.3 -google.cloud.bigquery==3.32.0 +google.cloud.bigquery==3.33.0 google-cloud-bigquery-storage==2.31.0 ipython===8.18.1 pandas==2.2.3 diff --git a/packages/google-cloud-bigquery/samples/notebooks/requirements-test.txt b/packages/google-cloud-bigquery/samples/notebooks/requirements-test.txt index 6abea3b4d344..cc71ee426e78 100644 --- a/packages/google-cloud-bigquery/samples/notebooks/requirements-test.txt +++ b/packages/google-cloud-bigquery/samples/notebooks/requirements-test.txt @@ -1,4 +1,4 @@ -google-cloud-testutils==1.6.2 +google-cloud-testutils==1.6.4 pytest==8.3.5 mock==5.2.0 pytest-xdist==3.6.1 diff --git a/packages/google-cloud-bigquery/samples/notebooks/requirements.txt b/packages/google-cloud-bigquery/samples/notebooks/requirements.txt index 17f43bf78ec9..9f131e5b87e3 100644 --- a/packages/google-cloud-bigquery/samples/notebooks/requirements.txt +++ b/packages/google-cloud-bigquery/samples/notebooks/requirements.txt @@ -1,6 +1,6 @@ -bigquery-magics==0.9.0 +bigquery-magics==0.10.0 db-dtypes==1.4.3 -google-cloud-bigquery==3.32.0 +google-cloud-bigquery==3.33.0 google-cloud-bigquery-storage==2.31.0 ipython===8.18.1; python_version == '3.9' ipython==9.2.0; python_version >= '3.10' diff --git a/packages/google-cloud-bigquery/samples/snippets/requirements-test.txt b/packages/google-cloud-bigquery/samples/snippets/requirements-test.txt index 6760e1228d13..503324cb0f7d 100644 --- a/packages/google-cloud-bigquery/samples/snippets/requirements-test.txt +++ b/packages/google-cloud-bigquery/samples/snippets/requirements-test.txt @@ -1,5 +1,5 @@ # samples/snippets should be runnable with no "extras" -google-cloud-testutils==1.6.2 +google-cloud-testutils==1.6.4 pytest==8.3.5 mock==5.2.0 pytest-xdist==3.6.1 diff --git a/packages/google-cloud-bigquery/samples/snippets/requirements.txt b/packages/google-cloud-bigquery/samples/snippets/requirements.txt index c31815d69171..dae43eff307a 100644 --- a/packages/google-cloud-bigquery/samples/snippets/requirements.txt +++ b/packages/google-cloud-bigquery/samples/snippets/requirements.txt @@ -1,2 +1,2 @@ # samples/snippets should be runnable with no "extras" -google-cloud-bigquery==3.32.0 +google-cloud-bigquery==3.33.0 From 6b0ac34953313855317b520bcc73eb44bfd0fc66 Mon Sep 17 00:00:00 2001 From: Chalmer Lowe Date: Thu, 22 May 2025 13:01:13 -0400 Subject: [PATCH 1940/2016] ddocs: fixes several typos and updates a try except block (#2197) --- .../google-cloud-bigquery/google/cloud/bigquery/_helpers.py | 2 +- .../google/cloud/bigquery/_pandas_helpers.py | 2 +- .../google-cloud-bigquery/google/cloud/bigquery/client.py | 2 +- packages/google-cloud-bigquery/google/cloud/bigquery/table.py | 4 ++-- 4 files changed, 5 insertions(+), 5 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py b/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py index 76c4f1fbdfd3..c7d7705e0d01 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/_helpers.py @@ -388,7 +388,7 @@ def range_to_py(self, value, field): class DataFrameCellDataParser(CellDataParser): - """Override of CellDataParser to handle differences in expection of values in DataFrame-like outputs. + """Override of CellDataParser to handle differences in expression of values in DataFrame-like outputs. This is used to turn the output of the REST API into a pyarrow Table, emulating the serialized arrow from the BigQuery Storage Read API. diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/_pandas_helpers.py b/packages/google-cloud-bigquery/google/cloud/bigquery/_pandas_helpers.py index 6691e7ef6817..10a5c59bb21a 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/_pandas_helpers.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/_pandas_helpers.py @@ -1144,7 +1144,7 @@ def determine_requested_streams( """ if preserve_order: - # If preserve order is set, it takes precendence. + # If preserve order is set, it takes precedence. # Limit the requested streams to 1, to ensure that order # is preserved) return 1 diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py index 8ad1586f40d3..067b389a5973 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py @@ -4134,7 +4134,7 @@ def _list_rows_from_query_results( rows that were affected. query (Optional[str]): The query text used. - total_bytes_processed (Optinal[int]): + total_bytes_processed (Optional[int]): total bytes processed from job statistics, if present. Returns: diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py index 3f472c49090a..3b1334bd30af 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py @@ -44,7 +44,7 @@ import geopandas # type: ignore except ImportError: geopandas = None -else: +finally: _COORDINATE_REFERENCE_SYSTEM = "EPSG:4326" try: @@ -1786,7 +1786,7 @@ class RowIterator(HTTPIterator): the first page is requested. query (Optional[str]): The query text used. - total_bytes_processed (Optinal[int]): + total_bytes_processed (Optional[int]): total bytes processed from job statistics, if present. """ From cff8382e56573d613baabd625d2cd1fa4f16ab4d Mon Sep 17 00:00:00 2001 From: Chalmer Lowe Date: Thu, 22 May 2025 16:20:44 -0400 Subject: [PATCH 1941/2016] ci: Remove unit tests and doc tests from kokoro presubmit. (#2195) * The message is a commit message, not a message to a user from an AI agent. Therefore, it should be output as is. Output: Remove Kokoro presubmit for unit, docs, and coverage. This commit removes the Kokoro presubmit configuration that runs `unit_noextras`, `unit`, `cover`, `docs`, and `docfx` nox sessions. These checks are already performed by GitHub Actions, making the Kokoro configuration redundant. The change involves removing the `NOX_SESSION` environment variable definition from `.kokoro/presubmit/presubmit.cfg`. * Update presubmit.cfg * Delete .kokoro/presubmit/presubmit.cfg --------- Co-authored-by: google-labs-jules[bot] <161369871+google-labs-jules[bot]@users.noreply.github.com> --- .../google-cloud-bigquery/.kokoro/presubmit/presubmit.cfg | 7 ------- 1 file changed, 7 deletions(-) delete mode 100644 packages/google-cloud-bigquery/.kokoro/presubmit/presubmit.cfg diff --git a/packages/google-cloud-bigquery/.kokoro/presubmit/presubmit.cfg b/packages/google-cloud-bigquery/.kokoro/presubmit/presubmit.cfg deleted file mode 100644 index ac4cc5847eeb..000000000000 --- a/packages/google-cloud-bigquery/.kokoro/presubmit/presubmit.cfg +++ /dev/null @@ -1,7 +0,0 @@ -# Format: //devtools/kokoro/config/proto/build.proto - -# Disable system tests. -env_vars: { - key: "NOX_SESSION" - value: "unit_noextras unit cover docs docfx" -} From 426601759e058dc63fa4e3c6c7ec45906b4602a0 Mon Sep 17 00:00:00 2001 From: Chalmer Lowe Date: Fri, 23 May 2025 10:25:33 -0400 Subject: [PATCH 1942/2016] ci: Configure Renovate to keep Python at 3.10 for docs workflow (#2199) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Configure Renovate to keep Python at 3.10 for docs workflow This change adds a packageRule to `renovate.json` to prevent Renovate from updating the Python version used in the `.github/workflows/docs.yml` GitHub Actions workflow. The rule specifically targets the `python-version` input of the `actions/setup-python` step and restricts allowed versions to `<3.11`, effectively pinning it to `3.10` for now. * 🦉 Updates from OwlBot post-processor See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md * Update renovate.json * 🦉 Updates from OwlBot post-processor See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md * adds files to excludes lists * 🦉 Updates from OwlBot post-processor See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md * Update owlbot.py * adds packageRule about pyproject.toml --------- Co-authored-by: google-labs-jules[bot] <161369871+google-labs-jules[bot]@users.noreply.github.com> Co-authored-by: Owl Bot --- packages/google-cloud-bigquery/owlbot.py | 3 ++- packages/google-cloud-bigquery/renovate.json | 9 ++++++++- 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/packages/google-cloud-bigquery/owlbot.py b/packages/google-cloud-bigquery/owlbot.py index 60759adbe733..80cf9d6e3bb9 100644 --- a/packages/google-cloud-bigquery/owlbot.py +++ b/packages/google-cloud-bigquery/owlbot.py @@ -65,6 +65,7 @@ templated_files, excludes=[ "noxfile.py", + "renovate.json", "docs/multiprocessing.rst", "docs/index.rst", ".coveragerc", @@ -76,7 +77,7 @@ ".kokoro/continuous/prerelease-deps.cfg", ".kokoro/samples/python3.7/**", ".kokoro/samples/python3.8/**", - ".github/workflows", # exclude gh actions as credentials are needed for tests + ".github/workflows/**", # exclude gh actions as credentials are needed for tests "README.rst", ], ) diff --git a/packages/google-cloud-bigquery/renovate.json b/packages/google-cloud-bigquery/renovate.json index c7875c469bd5..51eb51d6ec79 100644 --- a/packages/google-cloud-bigquery/renovate.json +++ b/packages/google-cloud-bigquery/renovate.json @@ -8,5 +8,12 @@ "ignorePaths": [".pre-commit-config.yaml", ".kokoro/requirements.txt", "setup.py", ".github/workflows/unittest.yml"], "pip_requirements": { "fileMatch": ["requirements-test.txt", "samples/[\\S/]*constraints.txt", "samples/[\\S/]*constraints-test.txt"] - } + }, + "packageRules": [ + { + "matchFileNames": ["pyproject.toml"], + "matchStrings": ["matplotlib (.*); python_version == '3.9'"], + "allowedVersions": ">= 3.7.1, <= 3.9.2" + } + ] } From c473d86fd496878186a51f6ff0adb0846ed3d7cb Mon Sep 17 00:00:00 2001 From: Chalmer Lowe Date: Fri, 23 May 2025 10:57:55 -0400 Subject: [PATCH 1943/2016] ci: updates renovate.json to ignore docs.yml (#2200) * updates renovate to ignore docs.yml * Update renovate.json --- packages/google-cloud-bigquery/renovate.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/renovate.json b/packages/google-cloud-bigquery/renovate.json index 51eb51d6ec79..3ea143d4c0a9 100644 --- a/packages/google-cloud-bigquery/renovate.json +++ b/packages/google-cloud-bigquery/renovate.json @@ -5,7 +5,7 @@ ":preserveSemverRanges", ":disableDependencyDashboard" ], - "ignorePaths": [".pre-commit-config.yaml", ".kokoro/requirements.txt", "setup.py", ".github/workflows/unittest.yml"], + "ignorePaths": [".pre-commit-config.yaml", ".kokoro/requirements.txt", "setup.py", ".github/workflows/unittest.yml", ".github/workflows/docs.yml"], "pip_requirements": { "fileMatch": ["requirements-test.txt", "samples/[\\S/]*constraints.txt", "samples/[\\S/]*constraints-test.txt"] }, From ebeaa46f697d6ca09c8a55bc55dbc1d8937b2e44 Mon Sep 17 00:00:00 2001 From: Mend Renovate Date: Fri, 23 May 2025 17:32:50 +0200 Subject: [PATCH 1944/2016] chore(deps): update dependency google-auth to v2.40.2 (#2196) --- .../google-cloud-bigquery/samples/geography/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/samples/geography/requirements.txt b/packages/google-cloud-bigquery/samples/geography/requirements.txt index 3b1a3ef54c92..7a0946faee71 100644 --- a/packages/google-cloud-bigquery/samples/geography/requirements.txt +++ b/packages/google-cloud-bigquery/samples/geography/requirements.txt @@ -11,7 +11,7 @@ Fiona==1.10.1 geojson==3.2.0 geopandas==1.0.1 google-api-core==2.24.2 -google-auth==2.40.1 +google-auth==2.40.2 google-cloud-bigquery==3.33.0 google-cloud-bigquery-storage==2.31.0 google-cloud-core==2.4.3 From 797e1b066a56172e568b56979c509d7fcc78c17b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tim=20Swe=C3=B1a=20=28Swast=29?= Date: Tue, 27 May 2025 04:38:22 -0500 Subject: [PATCH 1945/2016] docs: use query_and_wait in the array parameters sample (#2202) --- .../samples/client_query_w_array_params.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/packages/google-cloud-bigquery/samples/client_query_w_array_params.py b/packages/google-cloud-bigquery/samples/client_query_w_array_params.py index 25592a94a30a..e9d759f61790 100644 --- a/packages/google-cloud-bigquery/samples/client_query_w_array_params.py +++ b/packages/google-cloud-bigquery/samples/client_query_w_array_params.py @@ -35,8 +35,8 @@ def client_query_w_array_params() -> None: bigquery.ArrayQueryParameter("states", "STRING", ["WA", "WI", "WV", "WY"]), ] ) - query_job = client.query(query, job_config=job_config) # Make an API request. + rows = client.query_and_wait(query, job_config=job_config) # Make an API request. - for row in query_job: + for row in rows: print("{}: \t{}".format(row.name, row.count)) # [END bigquery_query_params_arrays] From 3ad0ab0dd54c5c753d03c6a79766e6aef83b063e Mon Sep 17 00:00:00 2001 From: shollyman Date: Tue, 27 May 2025 10:53:49 -0700 Subject: [PATCH 1946/2016] feat: job creation mode GA (#2190) * feat: job creation mode GA This PR makes the underlying functionality related to how queries can optionally avoid job creation a GA feature. It does the following: * no longer uses the preview QUERY_PREVIEW_ENABLED environment variable to control job creation * adds a new argument to Client instantiation to control job creation mode * adds a property/setter to Client to control job creation mode This PR also updates/renames the sample demonstrating how to leverage job creation mode with Client.query_and_wait. --- .../google/cloud/bigquery/_job_helpers.py | 12 ++-------- .../google/cloud/bigquery/client.py | 22 +++++++++++------ .../google/cloud/bigquery/enums.py | 19 +++++++++++++++ ...rtmode.py => client_query_job_optional.py} | 24 +++++++++++-------- ...e.py => test_client_query_job_optional.py} | 6 ++--- .../tests/unit/test__job_helpers.py | 12 +++++----- .../tests/unit/test_client.py | 11 +++++++++ 7 files changed, 70 insertions(+), 36 deletions(-) rename packages/google-cloud-bigquery/samples/{client_query_shortmode.py => client_query_job_optional.py} (69%) rename packages/google-cloud-bigquery/samples/tests/{test_client_query_shortmode.py => test_client_query_job_optional.py} (85%) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/_job_helpers.py b/packages/google-cloud-bigquery/google/cloud/bigquery/_job_helpers.py index 4a884ada5f42..888dc1e7312c 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/_job_helpers.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/_job_helpers.py @@ -37,7 +37,6 @@ import copy import functools -import os import uuid import textwrap from typing import Any, Dict, Optional, TYPE_CHECKING, Union @@ -400,12 +399,6 @@ def query_and_wait( ) -> table.RowIterator: """Run the query, wait for it to finish, and return the results. - While ``jobCreationMode=JOB_CREATION_OPTIONAL`` is in preview in the - ``jobs.query`` REST API, use the default ``jobCreationMode`` unless - the environment variable ``QUERY_PREVIEW_ENABLED=true``. After - ``jobCreationMode`` is GA, this method will always use - ``jobCreationMode=JOB_CREATION_OPTIONAL``. See: - https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs/query Args: client: @@ -500,9 +493,8 @@ def query_and_wait( request_body["maxResults"] = min(page_size, max_results) elif page_size is not None or max_results is not None: request_body["maxResults"] = page_size or max_results - - if os.getenv("QUERY_PREVIEW_ENABLED", "").casefold() == "true": - request_body["jobCreationMode"] = "JOB_CREATION_OPTIONAL" + if client.default_job_creation_mode: + request_body["jobCreationMode"] = client.default_job_creation_mode def do_query(): request_body["requestId"] = make_job_id() diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py index 067b389a5973..c6873545b0dc 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py @@ -221,6 +221,10 @@ class Client(ClientWithProject): client_options (Optional[Union[google.api_core.client_options.ClientOptions, Dict]]): Client options used to set user options on the client. API Endpoint should be set through client_options. + default_job_creation_mode (Optional[str]): + Sets the default job creation mode used by query methods such as + query_and_wait(). For lightweight queries, JOB_CREATION_OPTIONAL is + generally recommended. Raises: google.auth.exceptions.DefaultCredentialsError: @@ -243,6 +247,7 @@ def __init__( client_options: Optional[ Union[google.api_core.client_options.ClientOptions, Dict[str, Any]] ] = None, + default_job_creation_mode: Optional[str] = None, ) -> None: if client_options is None: client_options = {} @@ -277,6 +282,7 @@ def __init__( self._connection = Connection(self, **kw_args) self._location = location self._default_load_job_config = copy.deepcopy(default_load_job_config) + self.default_job_creation_mode = default_job_creation_mode # Use property setter so validation can run. self.default_query_job_config = default_query_job_config @@ -286,6 +292,15 @@ def location(self): """Default location for jobs / datasets / tables.""" return self._location + @property + def default_job_creation_mode(self): + """Default job creation mode used for query execution.""" + return self._default_job_creation_mode + + @default_job_creation_mode.setter + def default_job_creation_mode(self, value: Optional[str]): + self._default_job_creation_mode = value + @property def default_query_job_config(self) -> Optional[QueryJobConfig]: """Default ``QueryJobConfig`` or ``None``. @@ -3532,13 +3547,6 @@ def query_and_wait( ) -> RowIterator: """Run the query, wait for it to finish, and return the results. - While ``jobCreationMode=JOB_CREATION_OPTIONAL`` is in preview in the - ``jobs.query`` REST API, use the default ``jobCreationMode`` unless - the environment variable ``QUERY_PREVIEW_ENABLED=true``. After - ``jobCreationMode`` is GA, this method will always use - ``jobCreationMode=JOB_CREATION_OPTIONAL``. See: - https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs/query - Args: query (str): SQL query to be executed. Defaults to the standard SQL diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/enums.py b/packages/google-cloud-bigquery/google/cloud/bigquery/enums.py index 203ea3c7b4a4..4cb7a056dfc4 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/enums.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/enums.py @@ -407,3 +407,22 @@ class BigLakeTableFormat(object): ICEBERG = "ICEBERG" """Apache Iceberg format.""" + + +class JobCreationMode(object): + """Documented values for Job Creation Mode.""" + + JOB_CREATION_MODE_UNSPECIFIED = "JOB_CREATION_MODE_UNSPECIFIED" + """Job creation mode is unspecified.""" + + JOB_CREATION_REQUIRED = "JOB_CREATION_REQUIRED" + """Job creation is always required.""" + + JOB_CREATION_OPTIONAL = "JOB_CREATION_OPTIONAL" + """Job creation is optional. + + Returning immediate results is prioritized. + BigQuery will automatically determine if a Job needs to be created. + The conditions under which BigQuery can decide to not create a Job are + subject to change. + """ diff --git a/packages/google-cloud-bigquery/samples/client_query_shortmode.py b/packages/google-cloud-bigquery/samples/client_query_job_optional.py similarity index 69% rename from packages/google-cloud-bigquery/samples/client_query_shortmode.py rename to packages/google-cloud-bigquery/samples/client_query_job_optional.py index 50446dc48a71..6321aea35101 100644 --- a/packages/google-cloud-bigquery/samples/client_query_shortmode.py +++ b/packages/google-cloud-bigquery/samples/client_query_job_optional.py @@ -13,16 +13,18 @@ # limitations under the License. -def client_query_shortmode() -> None: - # [START bigquery_query_shortquery] - # This example demonstrates issuing a query that may be run in short query mode. - # - # To enable the short query mode preview feature, the QUERY_PREVIEW_ENABLED - # environmental variable should be set to `TRUE`. +def client_query_job_optional() -> None: + # [START bigquery_query_job_optional] + # This example demonstrates executing a query without requiring an associated + # job. from google.cloud import bigquery + from google.cloud.bigquery.enums import JobCreationMode - # Construct a BigQuery client object. - client = bigquery.Client() + # Construct a BigQuery client object, specifying that the library should + # avoid creating jobs when possible. + client = bigquery.Client( + default_job_creation_mode=JobCreationMode.JOB_CREATION_OPTIONAL + ) query = """ SELECT @@ -44,10 +46,12 @@ def client_query_shortmode() -> None: if rows.job_id is not None: print("Query was run with job state. Job ID: {}".format(rows.job_id)) else: - print("Query was run in short mode. Query ID: {}".format(rows.query_id)) + print( + "Query was run without creating a job. Query ID: {}".format(rows.query_id) + ) print("The query data:") for row in rows: # Row values can be accessed by field name or index. print("name={}, gender={}, total={}".format(row[0], row[1], row["total"])) - # [END bigquery_query_shortquery] + # [END bigquery_query_job_optional] diff --git a/packages/google-cloud-bigquery/samples/tests/test_client_query_shortmode.py b/packages/google-cloud-bigquery/samples/tests/test_client_query_job_optional.py similarity index 85% rename from packages/google-cloud-bigquery/samples/tests/test_client_query_shortmode.py rename to packages/google-cloud-bigquery/samples/tests/test_client_query_job_optional.py index 41132f24cbea..0e0b2cf1916a 100644 --- a/packages/google-cloud-bigquery/samples/tests/test_client_query_shortmode.py +++ b/packages/google-cloud-bigquery/samples/tests/test_client_query_job_optional.py @@ -1,4 +1,4 @@ -# Copyright 2024 Google LLC +# Copyright 2025 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -14,13 +14,13 @@ import typing -from .. import client_query_shortmode +from .. import client_query_job_optional if typing.TYPE_CHECKING: import pytest def test_client_query_shortmode(capsys: "pytest.CaptureFixture[str]") -> None: - client_query_shortmode.client_query_shortmode() + client_query_job_optional.client_query_job_optional() out, err = capsys.readouterr() assert "Query was run" in out diff --git a/packages/google-cloud-bigquery/tests/unit/test__job_helpers.py b/packages/google-cloud-bigquery/tests/unit/test__job_helpers.py index 4fa093c695c3..417f911b808a 100644 --- a/packages/google-cloud-bigquery/tests/unit/test__job_helpers.py +++ b/packages/google-cloud-bigquery/tests/unit/test__job_helpers.py @@ -554,13 +554,9 @@ def test_query_and_wait_retries_job_times_out(): ) -def test_query_and_wait_sets_job_creation_mode(monkeypatch: pytest.MonkeyPatch): - monkeypatch.setenv( - "QUERY_PREVIEW_ENABLED", - # The comparison should be case insensitive. - "TrUe", - ) +def test_query_and_wait_sets_job_creation_mode(): client = mock.create_autospec(Client) + client.default_job_creation_mode = "JOB_CREATION_OPTIONAL" client._call_api.return_value = { "jobReference": { "projectId": "response-project", @@ -642,6 +638,7 @@ def test_query_and_wait_sets_location(): "useInt64Timestamp": True, }, "requestId": mock.ANY, + "jobCreationMode": mock.ANY, }, timeout=None, ) @@ -658,6 +655,7 @@ def test_query_and_wait_sets_location(): ) def test_query_and_wait_sets_max_results(max_results, page_size, expected): client = mock.create_autospec(Client) + client.default_job_creation_mode = None client._call_api.return_value = { "jobReference": { "projectId": "response-project", @@ -703,6 +701,7 @@ def test_query_and_wait_sets_max_results(max_results, page_size, expected): def test_query_and_wait_caches_completed_query_results_one_page(): client = mock.create_autospec(Client) + client.default_job_creation_mode = None client._call_api.return_value = { "jobReference": { "projectId": "response-project", @@ -768,6 +767,7 @@ def test_query_and_wait_caches_completed_query_results_one_page(): def test_query_and_wait_caches_completed_query_results_one_page_no_rows(): client = mock.create_autospec(Client) + client.default_job_creation_mode = None client._call_api.return_value = { "jobReference": { "projectId": "response-project", diff --git a/packages/google-cloud-bigquery/tests/unit/test_client.py b/packages/google-cloud-bigquery/tests/unit/test_client.py index 4680683217ad..8ce8d2cbd8af 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_client.py +++ b/packages/google-cloud-bigquery/tests/unit/test_client.py @@ -213,6 +213,17 @@ def test_ctor_w_client_options_universe(self): ) self.assertEqual(client._connection.API_BASE_URL, "https://bigquery.foo.com") + def test_ctor_w_job_creation_mode(self): + creds = _make_credentials() + http = object() + client = self._make_one( + project=self.PROJECT, + credentials=creds, + _http=http, + default_job_creation_mode="foo", + ) + self.assertEqual(client.default_job_creation_mode, "foo") + def test_ctor_w_location(self): from google.cloud.bigquery._http import Connection From 0203dd5fb8c6687426f54c2d356eab98f643dac9 Mon Sep 17 00:00:00 2001 From: Mend Renovate Date: Tue, 27 May 2025 20:46:02 +0200 Subject: [PATCH 1947/2016] chore(deps): update dependency pytest-xdist to v3.7.0 (#2203) Co-authored-by: Chalmer Lowe Co-authored-by: Lingqing Gan --- .../samples/desktopapp/requirements-test.txt | 2 +- .../samples/geography/requirements-test.txt | 2 +- .../google-cloud-bigquery/samples/magics/requirements-test.txt | 2 +- .../samples/notebooks/requirements-test.txt | 2 +- .../samples/snippets/requirements-test.txt | 2 +- 5 files changed, 5 insertions(+), 5 deletions(-) diff --git a/packages/google-cloud-bigquery/samples/desktopapp/requirements-test.txt b/packages/google-cloud-bigquery/samples/desktopapp/requirements-test.txt index cc71ee426e78..2ad35b4186b5 100644 --- a/packages/google-cloud-bigquery/samples/desktopapp/requirements-test.txt +++ b/packages/google-cloud-bigquery/samples/desktopapp/requirements-test.txt @@ -1,4 +1,4 @@ google-cloud-testutils==1.6.4 pytest==8.3.5 mock==5.2.0 -pytest-xdist==3.6.1 +pytest-xdist==3.7.0 diff --git a/packages/google-cloud-bigquery/samples/geography/requirements-test.txt b/packages/google-cloud-bigquery/samples/geography/requirements-test.txt index 7b01ce8acb7f..3ca365401e7c 100644 --- a/packages/google-cloud-bigquery/samples/geography/requirements-test.txt +++ b/packages/google-cloud-bigquery/samples/geography/requirements-test.txt @@ -1,3 +1,3 @@ pytest==8.3.5 mock==5.2.0 -pytest-xdist==3.6.1 +pytest-xdist==3.7.0 diff --git a/packages/google-cloud-bigquery/samples/magics/requirements-test.txt b/packages/google-cloud-bigquery/samples/magics/requirements-test.txt index cc71ee426e78..2ad35b4186b5 100644 --- a/packages/google-cloud-bigquery/samples/magics/requirements-test.txt +++ b/packages/google-cloud-bigquery/samples/magics/requirements-test.txt @@ -1,4 +1,4 @@ google-cloud-testutils==1.6.4 pytest==8.3.5 mock==5.2.0 -pytest-xdist==3.6.1 +pytest-xdist==3.7.0 diff --git a/packages/google-cloud-bigquery/samples/notebooks/requirements-test.txt b/packages/google-cloud-bigquery/samples/notebooks/requirements-test.txt index cc71ee426e78..2ad35b4186b5 100644 --- a/packages/google-cloud-bigquery/samples/notebooks/requirements-test.txt +++ b/packages/google-cloud-bigquery/samples/notebooks/requirements-test.txt @@ -1,4 +1,4 @@ google-cloud-testutils==1.6.4 pytest==8.3.5 mock==5.2.0 -pytest-xdist==3.6.1 +pytest-xdist==3.7.0 diff --git a/packages/google-cloud-bigquery/samples/snippets/requirements-test.txt b/packages/google-cloud-bigquery/samples/snippets/requirements-test.txt index 503324cb0f7d..767f71fb1751 100644 --- a/packages/google-cloud-bigquery/samples/snippets/requirements-test.txt +++ b/packages/google-cloud-bigquery/samples/snippets/requirements-test.txt @@ -2,4 +2,4 @@ google-cloud-testutils==1.6.4 pytest==8.3.5 mock==5.2.0 -pytest-xdist==3.6.1 +pytest-xdist==3.7.0 From 89f0eb54e38cdfec819b730d93664bd9901eb658 Mon Sep 17 00:00:00 2001 From: "release-please[bot]" <55107282+release-please[bot]@users.noreply.github.com> Date: Tue, 27 May 2025 13:34:28 -0700 Subject: [PATCH 1948/2016] chore(main): release 3.34.0 (#2193) Co-authored-by: release-please[bot] <55107282+release-please[bot]@users.noreply.github.com> Co-authored-by: shollyman --- packages/google-cloud-bigquery/CHANGELOG.md | 18 ++++++++++++++++++ .../google/cloud/bigquery/version.py | 2 +- 2 files changed, 19 insertions(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/CHANGELOG.md b/packages/google-cloud-bigquery/CHANGELOG.md index 2f7166d44664..3b29a6a41dd7 100644 --- a/packages/google-cloud-bigquery/CHANGELOG.md +++ b/packages/google-cloud-bigquery/CHANGELOG.md @@ -5,6 +5,24 @@ [1]: https://pypi.org/project/google-cloud-bigquery/#history +## [3.34.0](https://github.com/googleapis/python-bigquery/compare/v3.33.0...v3.34.0) (2025-05-27) + + +### Features + +* Job creation mode GA ([#2190](https://github.com/googleapis/python-bigquery/issues/2190)) ([64cd39f](https://github.com/googleapis/python-bigquery/commit/64cd39fb395c4a03ef6d2ec8261e1709477b2186)) + + +### Bug Fixes + +* **deps:** Update all dependencies ([#2184](https://github.com/googleapis/python-bigquery/issues/2184)) ([12490f2](https://github.com/googleapis/python-bigquery/commit/12490f2f03681516465fc34217dcdf57000f6fdd)) + + +### Documentation + +* Update query.py ([#2192](https://github.com/googleapis/python-bigquery/issues/2192)) ([9b5ee78](https://github.com/googleapis/python-bigquery/commit/9b5ee78f046d9ca3f758eeca6244b8485fe35875)) +* Use query_and_wait in the array parameters sample ([#2202](https://github.com/googleapis/python-bigquery/issues/2202)) ([28a9994](https://github.com/googleapis/python-bigquery/commit/28a9994792ec90a6a4d16835faf2137c09c0fb02)) + ## [3.33.0](https://github.com/googleapis/python-bigquery/compare/v3.32.0...v3.33.0) (2025-05-19) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/version.py b/packages/google-cloud-bigquery/google/cloud/bigquery/version.py index 8304ac025dcb..9e139385419b 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/version.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/version.py @@ -12,4 +12,4 @@ # See the License for the specific language governing permissions and # limitations under the License. -__version__ = "3.33.0" +__version__ = "3.34.0" From 23f2f352bcd520295de867bf9abab68327e3f64f Mon Sep 17 00:00:00 2001 From: Mend Renovate Date: Fri, 30 May 2025 11:16:38 +0200 Subject: [PATCH 1949/2016] chore(deps): update all dependencies (#2205) --- .../google-cloud-bigquery/samples/desktopapp/requirements.txt | 2 +- .../google-cloud-bigquery/samples/geography/requirements.txt | 4 ++-- .../google-cloud-bigquery/samples/magics/requirements.txt | 4 ++-- .../google-cloud-bigquery/samples/notebooks/requirements.txt | 4 ++-- .../google-cloud-bigquery/samples/snippets/requirements.txt | 2 +- 5 files changed, 8 insertions(+), 8 deletions(-) diff --git a/packages/google-cloud-bigquery/samples/desktopapp/requirements.txt b/packages/google-cloud-bigquery/samples/desktopapp/requirements.txt index 4a5b75346cc8..a512dbd3a7b5 100644 --- a/packages/google-cloud-bigquery/samples/desktopapp/requirements.txt +++ b/packages/google-cloud-bigquery/samples/desktopapp/requirements.txt @@ -1,2 +1,2 @@ -google-cloud-bigquery==3.33.0 +google-cloud-bigquery==3.34.0 google-auth-oauthlib==1.2.2 diff --git a/packages/google-cloud-bigquery/samples/geography/requirements.txt b/packages/google-cloud-bigquery/samples/geography/requirements.txt index 7a0946faee71..049e8823782d 100644 --- a/packages/google-cloud-bigquery/samples/geography/requirements.txt +++ b/packages/google-cloud-bigquery/samples/geography/requirements.txt @@ -12,8 +12,8 @@ geojson==3.2.0 geopandas==1.0.1 google-api-core==2.24.2 google-auth==2.40.2 -google-cloud-bigquery==3.33.0 -google-cloud-bigquery-storage==2.31.0 +google-cloud-bigquery==3.34.0 +google-cloud-bigquery-storage==2.32.0 google-cloud-core==2.4.3 google-crc32c==1.7.1 google-resumable-media==2.7.2 diff --git a/packages/google-cloud-bigquery/samples/magics/requirements.txt b/packages/google-cloud-bigquery/samples/magics/requirements.txt index 7d0c91e3dc3c..960eb6db457c 100644 --- a/packages/google-cloud-bigquery/samples/magics/requirements.txt +++ b/packages/google-cloud-bigquery/samples/magics/requirements.txt @@ -1,6 +1,6 @@ bigquery_magics==0.10.0 db-dtypes==1.4.3 -google.cloud.bigquery==3.33.0 -google-cloud-bigquery-storage==2.31.0 +google.cloud.bigquery==3.34.0 +google-cloud-bigquery-storage==2.32.0 ipython===8.18.1 pandas==2.2.3 diff --git a/packages/google-cloud-bigquery/samples/notebooks/requirements.txt b/packages/google-cloud-bigquery/samples/notebooks/requirements.txt index 9f131e5b87e3..27eb7459ae4a 100644 --- a/packages/google-cloud-bigquery/samples/notebooks/requirements.txt +++ b/packages/google-cloud-bigquery/samples/notebooks/requirements.txt @@ -1,7 +1,7 @@ bigquery-magics==0.10.0 db-dtypes==1.4.3 -google-cloud-bigquery==3.33.0 -google-cloud-bigquery-storage==2.31.0 +google-cloud-bigquery==3.34.0 +google-cloud-bigquery-storage==2.32.0 ipython===8.18.1; python_version == '3.9' ipython==9.2.0; python_version >= '3.10' matplotlib===3.9.2; python_version == '3.9' diff --git a/packages/google-cloud-bigquery/samples/snippets/requirements.txt b/packages/google-cloud-bigquery/samples/snippets/requirements.txt index dae43eff307a..fd8bd672bcdc 100644 --- a/packages/google-cloud-bigquery/samples/snippets/requirements.txt +++ b/packages/google-cloud-bigquery/samples/snippets/requirements.txt @@ -1,2 +1,2 @@ # samples/snippets should be runnable with no "extras" -google-cloud-bigquery==3.33.0 +google-cloud-bigquery==3.34.0 From 8aad0323ef11c8d59e6d046b6e280f0060095d3e Mon Sep 17 00:00:00 2001 From: Chalmer Lowe Date: Fri, 30 May 2025 12:36:07 -0400 Subject: [PATCH 1950/2016] Fix: Update type hints for various BigQuery files (#2206) * Fix: Update type hints for various BigQuery files This commit addresses Issue #2132 by updating type hints in the following files: - google/cloud/bigquery/external_config.py - google/cloud/bigquery/job/base.py - google/cloud/bigquery/routine/routine.py - google/cloud/bigquery/schema.py - google/cloud/bigquery/table.py These changes improve code clarity and maintainability by providing more accurate type information. * updates type hints across the board --------- Co-authored-by: google-labs-jules[bot] <161369871+google-labs-jules[bot]@users.noreply.github.com> --- .../google/cloud/bigquery/external_config.py | 9 +++++---- .../google/cloud/bigquery/job/base.py | 4 +--- .../google/cloud/bigquery/routine/routine.py | 12 +++--------- .../google/cloud/bigquery/schema.py | 6 ++---- .../google/cloud/bigquery/table.py | 11 ++++++----- 5 files changed, 17 insertions(+), 25 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/external_config.py b/packages/google-cloud-bigquery/google/cloud/bigquery/external_config.py index 6e943adf357e..cb8141cd09e3 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/external_config.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/external_config.py @@ -22,6 +22,7 @@ import base64 import copy +import typing from typing import Any, Dict, FrozenSet, Iterable, Optional, Union from google.cloud.bigquery._helpers import _to_bytes @@ -835,10 +836,10 @@ def schema(self): See https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#ExternalDataConfiguration.FIELDS.schema """ - # TODO: The typehinting for this needs work. Setting this pragma to temporarily - # manage a pytype issue that came up in another PR. See Issue: #2132 - prop = self._properties.get("schema", {}) # type: ignore - return [SchemaField.from_api_repr(field) for field in prop.get("fields", [])] # type: ignore + prop: Dict[str, Any] = typing.cast( + Dict[str, Any], self._properties.get("schema", {}) + ) + return [SchemaField.from_api_repr(field) for field in prop.get("fields", [])] @schema.setter def schema(self, value): diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/job/base.py b/packages/google-cloud-bigquery/google/cloud/bigquery/job/base.py index 5eb700ce7131..f007b93413ef 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/job/base.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/job/base.py @@ -435,9 +435,7 @@ def __init__(self, job_id, client): @property def configuration(self) -> _JobConfig: """Job-type specific configurtion.""" - # TODO: The typehinting for this needs work. Setting this pragma to temporarily - # manage a pytype issue that came up in another PR. See Issue: #2132 - configuration = self._CONFIG_CLASS() # pytype: disable=not-callable + configuration: _JobConfig = self._CONFIG_CLASS() # pytype: disable=not-callable configuration._properties = self._properties.setdefault("configuration", {}) return configuration diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/routine/routine.py b/packages/google-cloud-bigquery/google/cloud/bigquery/routine/routine.py index 7e079781ddff..e933fa137a7f 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/routine/routine.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/routine/routine.py @@ -518,23 +518,17 @@ def __init__(self): @property def project(self): """str: ID of the project containing the routine.""" - # TODO: The typehinting for this needs work. Setting this pragma to temporarily - # manage a pytype issue that came up in another PR. See Issue: #2132 - return self._properties["projectId"] # pytype: disable=typed-dict-error + return self._properties.get("projectId", "") @property def dataset_id(self): """str: ID of dataset containing the routine.""" - # TODO: The typehinting for this needs work. Setting this pragma to temporarily - # manage a pytype issue that came up in another PR. See Issue: #2132 - return self._properties["datasetId"] # pytype: disable=typed-dict-error + return self._properties.get("datasetId", "") @property def routine_id(self): """str: The routine ID.""" - # TODO: The typehinting for this needs work. Setting this pragma to temporarily - # manage a pytype issue that came up in another PR. See Issue: #2132 - return self._properties["routineId"] # pytype: disable=typed-dict-error + return self._properties.get("routineId", "") @property def path(self): diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/schema.py b/packages/google-cloud-bigquery/google/cloud/bigquery/schema.py index 1f1aab7a4b0b..456730b00c8a 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/schema.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/schema.py @@ -232,11 +232,9 @@ def __init__( if max_length is not _DEFAULT_VALUE: self._properties["maxLength"] = max_length if policy_tags is not _DEFAULT_VALUE: - # TODO: The typehinting for this needs work. Setting this pragma to temporarily - # manage a pytype issue that came up in another PR. See Issue: #2132 self._properties["policyTags"] = ( - policy_tags.to_api_repr() # pytype: disable=attribute-error - if policy_tags is not None + policy_tags.to_api_repr() + if isinstance(policy_tags, PolicyTagList) else None ) if isinstance(range_element_type, str): diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py index 3b1334bd30af..3ffd5ca56b0f 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py @@ -137,9 +137,9 @@ def _reference_getter(table): return TableReference(dataset_ref, table.table_id) -# TODO: The typehinting for this needs work. Setting this pragma to temporarily -# manage a pytype issue that came up in another PR. See Issue: #2132 -def _view_use_legacy_sql_getter(table): +def _view_use_legacy_sql_getter( + table: Union["Table", "TableListItem"] +) -> Optional[bool]: """bool: Specifies whether to execute the view with Legacy or Standard SQL. This boolean specifies whether to execute the view with Legacy SQL @@ -151,15 +151,16 @@ def _view_use_legacy_sql_getter(table): ValueError: For invalid value types. """ - view = table._properties.get("view") # type: ignore + view: Optional[Dict[str, Any]] = table._properties.get("view") if view is not None: # The server-side default for useLegacySql is True. - return view.get("useLegacySql", True) # type: ignore + return view.get("useLegacySql", True) if view is not None else True # In some cases, such as in a table list no view object is present, but the # resource still represents a view. Use the type as a fallback. if table.table_type == "VIEW": # The server-side default for useLegacySql is True. return True + return None # explicit return statement to appease mypy class _TableBase: From 92bd647e24866efa6ba6da89a2309b59632e2921 Mon Sep 17 00:00:00 2001 From: Chalmer Lowe Date: Fri, 30 May 2025 17:58:02 -0400 Subject: [PATCH 1951/2016] feat: Add UpdateMode to update_dataset (#2204) * feat: Add UpdateMode to update_dataset This commit introduces the `UpdateMode` enum and integrates it into the `update_dataset` method in the BigQuery client. The `UpdateMode` enum allows you to specify which parts of a dataset should be updated (metadata, ACL, or full update). The following changes were made: - Defined the `UpdateMode` enum in `google/cloud/bigquery/enums.py` with values: `UPDATE_MODE_UNSPECIFIED`, `UPDATE_METADATA`, `UPDATE_ACL`, and `UPDATE_FULL`. - Modified the `update_dataset` method in `google/cloud/bigquery/client.py` to accept an optional `update_mode` parameter. This parameter is added to the query parameters if provided. - Added unit tests in `tests/unit/test_client.py` to verify the correct handling of the `update_mode` parameter, including testing all enum values and the default case where it's not provided. * updates enums, client, and tests --------- Co-authored-by: google-labs-jules[bot] <161369871+google-labs-jules[bot]@users.noreply.github.com> --- .../google/cloud/bigquery/client.py | 22 +++++ .../google/cloud/bigquery/enums.py | 18 ++++ .../tests/unit/test_client.py | 93 ++++++++++++++++++- 3 files changed, 132 insertions(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py index c6873545b0dc..cc3b3eb2a504 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py @@ -91,6 +91,7 @@ from google.cloud.bigquery.dataset import DatasetListItem from google.cloud.bigquery.dataset import DatasetReference from google.cloud.bigquery.enums import AutoRowIDs +from google.cloud.bigquery.enums import UpdateMode from google.cloud.bigquery.format_options import ParquetOptions from google.cloud.bigquery.job import ( CopyJob, @@ -1198,6 +1199,7 @@ def update_dataset( fields: Sequence[str], retry: retries.Retry = DEFAULT_RETRY, timeout: TimeoutType = DEFAULT_TIMEOUT, + update_mode: Optional[UpdateMode] = None, ) -> Dataset: """Change some fields of a dataset. @@ -1237,6 +1239,20 @@ def update_dataset( timeout (Optional[float]): The number of seconds to wait for the underlying HTTP transport before using ``retry``. + update_mode (Optional[google.cloud.bigquery.enums.UpdateMode]): + Specifies the kind of information to update in a dataset. + By default, dataset metadata (e.g. friendlyName, description, + labels, etc) and ACL information are updated. This argument can + take on the following possible enum values. + + * :attr:`~google.cloud.bigquery.enums.UPDATE_MODE_UNSPECIFIED`: + The default value. Behavior defaults to UPDATE_FULL. + * :attr:`~google.cloud.bigquery.enums.UpdateMode.UPDATE_METADATA`: + Includes metadata information for the dataset, such as friendlyName, description, labels, etc. + * :attr:`~google.cloud.bigquery.enums.UpdateMode.UPDATE_ACL`: + Includes ACL information for the dataset, which defines dataset access for one or more entities. + * :attr:`~google.cloud.bigquery.enums.UpdateMode.UPDATE_FULL`: + Includes both dataset metadata and ACL information. Returns: google.cloud.bigquery.dataset.Dataset: @@ -1250,6 +1266,11 @@ def update_dataset( path = dataset.path span_attributes = {"path": path, "fields": fields} + if update_mode: + query_params = {"updateMode": update_mode.value} + else: + query_params = {} + api_response = self._call_api( retry, span_name="BigQuery.updateDataset", @@ -1259,6 +1280,7 @@ def update_dataset( data=partial, headers=headers, timeout=timeout, + query_params=query_params, ) return Dataset.from_api_repr(api_response) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/enums.py b/packages/google-cloud-bigquery/google/cloud/bigquery/enums.py index 4cb7a056dfc4..e9cd911d0dc2 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/enums.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/enums.py @@ -409,6 +409,24 @@ class BigLakeTableFormat(object): """Apache Iceberg format.""" +class UpdateMode(enum.Enum): + """Specifies the kind of information to update in a dataset.""" + + UPDATE_MODE_UNSPECIFIED = "UPDATE_MODE_UNSPECIFIED" + """The default value. Behavior defaults to UPDATE_FULL.""" + + UPDATE_METADATA = "UPDATE_METADATA" + """Includes metadata information for the dataset, such as friendlyName, + description, labels, etc.""" + + UPDATE_ACL = "UPDATE_ACL" + """Includes ACL information for the dataset, which defines dataset access + for one or more entities.""" + + UPDATE_FULL = "UPDATE_FULL" + """Includes both dataset metadata and ACL information.""" + + class JobCreationMode(object): """Documented values for Job Creation Mode.""" diff --git a/packages/google-cloud-bigquery/tests/unit/test_client.py b/packages/google-cloud-bigquery/tests/unit/test_client.py index 8ce8d2cbd8af..ed092bcdb8b7 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_client.py +++ b/packages/google-cloud-bigquery/tests/unit/test_client.py @@ -60,7 +60,8 @@ from google.cloud.bigquery import job as bqjob import google.cloud.bigquery._job_helpers -from google.cloud.bigquery.dataset import DatasetReference +from google.cloud.bigquery.dataset import DatasetReference, Dataset +from google.cloud.bigquery.enums import UpdateMode from google.cloud.bigquery import exceptions from google.cloud.bigquery import ParquetOptions import google.cloud.bigquery.retry @@ -2101,6 +2102,7 @@ def test_update_dataset(self): }, path="/" + PATH, timeout=7.5, + query_params={}, ) self.assertEqual(ds2.description, ds.description) self.assertEqual(ds2.friendly_name, ds.friendly_name) @@ -2114,6 +2116,94 @@ def test_update_dataset(self): client.update_dataset(ds, []) req = conn.api_request.call_args self.assertEqual(req[1]["headers"]["If-Match"], "etag") + self.assertEqual(req[1].get("query_params"), {}) + + def test_update_dataset_w_update_mode(self): + PATH = f"projects/{self.PROJECT}/datasets/{self.DS_ID}" + creds = _make_credentials() + client = self._make_one(project=self.PROJECT, credentials=creds) + + DESCRIPTION = "DESCRIPTION" + RESOURCE = { + "datasetReference": {"projectId": self.PROJECT, "datasetId": self.DS_ID}, + "etag": "etag", + "description": DESCRIPTION, + } + dataset_ref = DatasetReference(self.PROJECT, self.DS_ID) + orig_dataset = Dataset(dataset_ref) + orig_dataset.description = DESCRIPTION + filter_fields = ["description"] + + test_cases = [ + (None, None), + (UpdateMode.UPDATE_MODE_UNSPECIFIED, "UPDATE_MODE_UNSPECIFIED"), + (UpdateMode.UPDATE_METADATA, "UPDATE_METADATA"), + (UpdateMode.UPDATE_ACL, "UPDATE_ACL"), + (UpdateMode.UPDATE_FULL, "UPDATE_FULL"), + ] + + for update_mode_arg, expected_param_value in test_cases: + with self.subTest( + update_mode_arg=update_mode_arg, + expected_param_value=expected_param_value, + ): + conn = client._connection = make_connection(RESOURCE, RESOURCE) + + new_dataset = client.update_dataset( + orig_dataset, + fields=filter_fields, + update_mode=update_mode_arg, + ) + self.assertEqual(orig_dataset.description, new_dataset.description) + + if expected_param_value: + expected_query_params = {"updateMode": expected_param_value} + else: + expected_query_params = {} + + conn.api_request.assert_called_once_with( + method="PATCH", + path="/" + PATH, + data={"description": DESCRIPTION}, + timeout=DEFAULT_TIMEOUT, + query_params=expected_query_params if expected_query_params else {}, + ) + + def test_update_dataset_w_invalid_update_mode(self): + creds = _make_credentials() + client = self._make_one(project=self.PROJECT, credentials=creds) + + DESCRIPTION = "DESCRIPTION" + resource = { + "datasetReference": {"projectId": self.PROJECT, "datasetId": self.DS_ID}, + "etag": "etag", + } + + dataset_ref = DatasetReference(self.PROJECT, self.DS_ID) + orig_dataset = Dataset(dataset_ref) + orig_dataset.description = DESCRIPTION + filter_fields = ["description"] # A non-empty list of fields is required + + # Mock the connection to prevent actual API calls + # and to provide a minimal valid response if the call were to proceed. + conn = client._connection = make_connection(resource) + + test_cases = [ + "INVALID_STRING", + 123, + 123.45, + object(), + ] + + for invalid_update_mode in test_cases: + with self.subTest(invalid_update_mode=invalid_update_mode): + conn.api_request.reset_mock() # Reset mock for each sub-test + with self.assertRaises(AttributeError): + client.update_dataset( + orig_dataset, + fields=filter_fields, + update_mode=invalid_update_mode, + ) def test_update_dataset_w_custom_property(self): # The library should handle sending properties to the API that are not @@ -2145,6 +2235,7 @@ def test_update_dataset_w_custom_property(self): data={"newAlphaProperty": "unreleased property"}, path=path, timeout=DEFAULT_TIMEOUT, + query_params={}, ) self.assertEqual(dataset.dataset_id, self.DS_ID) From 72fff05a2a36415748073ca45d626800cddc523d Mon Sep 17 00:00:00 2001 From: Chalmer Lowe Date: Mon, 2 Jun 2025 05:42:08 -0400 Subject: [PATCH 1952/2016] feat: Adds dataset_view parameter to get_dataset method (#2198) * feat: Add dataset_view parameter to get_dataset method This commit introduces a new `dataset_view` parameter to the `get_dataset` method in the BigQuery client. This allows you to specify the level of detail (METADATA, ACL, FULL) returned when fetching a dataset. The `DatasetView` enum has been added to `enums.py`. Unit tests have been added to verify: - Correct query parameter (`view`) formation for each enum value. - Correct behavior when `dataset_view` is None. - AttributeError is raised for invalid `dataset_view` types. * test edits, linting, etc. * Fixes docstring * updates docstrings * update parameter name to align with discovery doc * Update google/cloud/bigquery/client.py --------- Co-authored-by: google-labs-jules[bot] <161369871+google-labs-jules[bot]@users.noreply.github.com> --- .../google/cloud/bigquery/client.py | 26 ++++++- .../google/cloud/bigquery/enums.py | 18 +++++ .../tests/unit/test_client.py | 70 ++++++++++++++++++- .../tests/unit/test_create_dataset.py | 7 +- 4 files changed, 116 insertions(+), 5 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py index cc3b3eb2a504..bb4d80c7303e 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py @@ -90,8 +90,8 @@ from google.cloud.bigquery.dataset import Dataset from google.cloud.bigquery.dataset import DatasetListItem from google.cloud.bigquery.dataset import DatasetReference -from google.cloud.bigquery.enums import AutoRowIDs -from google.cloud.bigquery.enums import UpdateMode + +from google.cloud.bigquery.enums import AutoRowIDs, DatasetView, UpdateMode from google.cloud.bigquery.format_options import ParquetOptions from google.cloud.bigquery.job import ( CopyJob, @@ -865,6 +865,7 @@ def get_dataset( dataset_ref: Union[DatasetReference, str], retry: retries.Retry = DEFAULT_RETRY, timeout: TimeoutType = DEFAULT_TIMEOUT, + dataset_view: Optional[DatasetView] = None, ) -> Dataset: """Fetch the dataset referenced by ``dataset_ref`` @@ -882,7 +883,21 @@ def get_dataset( timeout (Optional[float]): The number of seconds to wait for the underlying HTTP transport before using ``retry``. + dataset_view (Optional[google.cloud.bigquery.enums.DatasetView]): + Specifies the view that determines which dataset information is + returned. By default, dataset metadata (e.g. friendlyName, description, + labels, etc) and ACL information are returned. This argument can + take on the following possible enum values. + * :attr:`~google.cloud.bigquery.enums.DatasetView.ACL`: + Includes dataset metadata and the ACL. + * :attr:`~google.cloud.bigquery.enums.DatasetView.FULL`: + Includes all dataset metadata, including the ACL and table metadata. + This view is not supported by the `datasets.list` API method. + * :attr:`~google.cloud.bigquery.enums.DatasetView.METADATA`: + Includes basic dataset metadata, but not the ACL. + * :attr:`~google.cloud.bigquery.enums.DatasetView.DATASET_VIEW_UNSPECIFIED`: + The server will decide which view to use. Currently defaults to FULL. Returns: google.cloud.bigquery.dataset.Dataset: A ``Dataset`` instance. @@ -892,6 +907,12 @@ def get_dataset( dataset_ref, default_project=self.project ) path = dataset_ref.path + + if dataset_view: + query_params = {"datasetView": dataset_view.value} + else: + query_params = {} + span_attributes = {"path": path} api_response = self._call_api( retry, @@ -900,6 +921,7 @@ def get_dataset( method="GET", path=path, timeout=timeout, + query_params=query_params, ) return Dataset.from_api_repr(api_response) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/enums.py b/packages/google-cloud-bigquery/google/cloud/bigquery/enums.py index e9cd911d0dc2..9a1e4880cff5 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/enums.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/enums.py @@ -80,6 +80,24 @@ class CreateDisposition(object): returned in the job result.""" +class DatasetView(enum.Enum): + """DatasetView specifies which dataset information is returned.""" + + DATASET_VIEW_UNSPECIFIED = "DATASET_VIEW_UNSPECIFIED" + """The default value. Currently maps to the FULL view.""" + + METADATA = "METADATA" + """View metadata information for the dataset, such as friendlyName, + description, labels, etc.""" + + ACL = "ACL" + """View ACL information for the dataset, which defines dataset access + for one or more entities.""" + + FULL = "FULL" + """View both dataset metadata and ACL information.""" + + class DefaultPandasDTypes(enum.Enum): """Default Pandas DataFrem DTypes to convert BigQuery data. These Sentinel values are used instead of None to maintain backward compatibility, diff --git a/packages/google-cloud-bigquery/tests/unit/test_client.py b/packages/google-cloud-bigquery/tests/unit/test_client.py index ed092bcdb8b7..42bfc84b9c5b 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_client.py +++ b/packages/google-cloud-bigquery/tests/unit/test_client.py @@ -61,7 +61,7 @@ from google.cloud.bigquery import job as bqjob import google.cloud.bigquery._job_helpers from google.cloud.bigquery.dataset import DatasetReference, Dataset -from google.cloud.bigquery.enums import UpdateMode +from google.cloud.bigquery.enums import UpdateMode, DatasetView from google.cloud.bigquery import exceptions from google.cloud.bigquery import ParquetOptions import google.cloud.bigquery.retry @@ -753,7 +753,7 @@ def test_get_dataset(self): final_attributes.assert_called_once_with({"path": "/%s" % path}, client, None) conn.api_request.assert_called_once_with( - method="GET", path="/%s" % path, timeout=7.5 + method="GET", path="/%s" % path, timeout=7.5, query_params={} ) self.assertEqual(dataset.dataset_id, self.DS_ID) @@ -819,6 +819,72 @@ def test_get_dataset(self): self.assertEqual(dataset.dataset_id, self.DS_ID) + def test_get_dataset_with_dataset_view(self): + path = "projects/%s/datasets/%s" % (self.PROJECT, self.DS_ID) + creds = _make_credentials() + http = object() + client = self._make_one(project=self.PROJECT, credentials=creds, _http=http) + resource = { + "id": "%s:%s" % (self.PROJECT, self.DS_ID), + "datasetReference": {"projectId": self.PROJECT, "datasetId": self.DS_ID}, + } + dataset_ref = DatasetReference(self.PROJECT, self.DS_ID) + + test_cases = [ + (None, None), + (DatasetView.DATASET_VIEW_UNSPECIFIED, "DATASET_VIEW_UNSPECIFIED"), + (DatasetView.METADATA, "METADATA"), + (DatasetView.ACL, "ACL"), + (DatasetView.FULL, "FULL"), + ] + + for dataset_view_arg, expected_param_value in test_cases: + with self.subTest( + dataset_view_arg=dataset_view_arg, + expected_param_value=expected_param_value, + ): + # Re-initialize the connection mock for each sub-test to reset side_effect + conn = client._connection = make_connection(resource) + + dataset = client.get_dataset(dataset_ref, dataset_view=dataset_view_arg) + + self.assertEqual(dataset.dataset_id, self.DS_ID) + + if expected_param_value: + expected_query_params = {"datasetView": expected_param_value} + else: + expected_query_params = {} + + conn.api_request.assert_called_once_with( + method="GET", + path="/%s" % path, + timeout=DEFAULT_TIMEOUT, + query_params=expected_query_params if expected_query_params else {}, + ) + + def test_get_dataset_with_invalid_dataset_view(self): + invalid_view_values = [ + "INVALID_STRING", + 123, + 123.45, + object(), + ] + creds = _make_credentials() + http = object() + client = self._make_one(project=self.PROJECT, credentials=creds, _http=http) + resource = { + "id": "%s:%s" % (self.PROJECT, self.DS_ID), + "datasetReference": {"projectId": self.PROJECT, "datasetId": self.DS_ID}, + } + conn = client._connection = make_connection(resource) + dataset_ref = DatasetReference(self.PROJECT, self.DS_ID) + + for invalid_view_value in invalid_view_values: + with self.subTest(invalid_view_value=invalid_view_value): + conn.api_request.reset_mock() # Reset mock for each sub-test + with self.assertRaises(AttributeError): + client.get_dataset(dataset_ref, dataset_view=invalid_view_value) + def test_ensure_bqstorage_client_creating_new_instance(self): bigquery_storage = pytest.importorskip("google.cloud.bigquery_storage") diff --git a/packages/google-cloud-bigquery/tests/unit/test_create_dataset.py b/packages/google-cloud-bigquery/tests/unit/test_create_dataset.py index bd7c6a8f8810..b144471ca503 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_create_dataset.py +++ b/packages/google-cloud-bigquery/tests/unit/test_create_dataset.py @@ -372,7 +372,12 @@ def test_create_dataset_alreadyexists_w_exists_ok_true(PROJECT, DS_ID, LOCATION) }, timeout=DEFAULT_TIMEOUT, ), - mock.call(method="GET", path=get_path, timeout=DEFAULT_TIMEOUT), + mock.call( + method="GET", + path=get_path, + timeout=DEFAULT_TIMEOUT, + query_params={}, + ), ] ) From 8a5f3b14dd295390b2b7621f1addfa048008a75a Mon Sep 17 00:00:00 2001 From: Mend Renovate Date: Thu, 5 Jun 2025 00:19:15 +0200 Subject: [PATCH 1953/2016] chore(deps): update all dependencies (#2209) * chore(deps): update all dependencies * pin geopandas===1.0.1 for python <= 3.9 --------- Co-authored-by: Lingqing Gan --- .../samples/desktopapp/requirements-test.txt | 2 +- .../samples/geography/requirements-test.txt | 2 +- .../samples/geography/requirements.txt | 9 +++++---- .../samples/magics/requirements-test.txt | 2 +- .../samples/notebooks/requirements-test.txt | 2 +- .../samples/notebooks/requirements.txt | 2 +- .../samples/snippets/requirements-test.txt | 2 +- 7 files changed, 11 insertions(+), 10 deletions(-) diff --git a/packages/google-cloud-bigquery/samples/desktopapp/requirements-test.txt b/packages/google-cloud-bigquery/samples/desktopapp/requirements-test.txt index 2ad35b4186b5..4b9c515a706d 100644 --- a/packages/google-cloud-bigquery/samples/desktopapp/requirements-test.txt +++ b/packages/google-cloud-bigquery/samples/desktopapp/requirements-test.txt @@ -1,4 +1,4 @@ google-cloud-testutils==1.6.4 -pytest==8.3.5 +pytest==8.4.0 mock==5.2.0 pytest-xdist==3.7.0 diff --git a/packages/google-cloud-bigquery/samples/geography/requirements-test.txt b/packages/google-cloud-bigquery/samples/geography/requirements-test.txt index 3ca365401e7c..824a1df4a197 100644 --- a/packages/google-cloud-bigquery/samples/geography/requirements-test.txt +++ b/packages/google-cloud-bigquery/samples/geography/requirements-test.txt @@ -1,3 +1,3 @@ -pytest==8.3.5 +pytest==8.4.0 mock==5.2.0 pytest-xdist==3.7.0 diff --git a/packages/google-cloud-bigquery/samples/geography/requirements.txt b/packages/google-cloud-bigquery/samples/geography/requirements.txt index 049e8823782d..5ff1c0c025b8 100644 --- a/packages/google-cloud-bigquery/samples/geography/requirements.txt +++ b/packages/google-cloud-bigquery/samples/geography/requirements.txt @@ -9,8 +9,9 @@ cligj==0.7.2 db-dtypes==1.4.3 Fiona==1.10.1 geojson==3.2.0 -geopandas==1.0.1 -google-api-core==2.24.2 +geopandas===1.0.1; python_version <= '3.9' +geopandas==1.1.0; python_version >= '3.10' +google-api-core==2.25.0 google-auth==2.40.2 google-cloud-bigquery==3.34.0 google-cloud-bigquery-storage==2.32.0 @@ -18,7 +19,7 @@ google-cloud-core==2.4.3 google-crc32c==1.7.1 google-resumable-media==2.7.2 googleapis-common-protos==1.70.0 -grpcio==1.71.0 +grpcio==1.72.1 idna==3.10 munch==4.0.0 mypy-extensions==1.1.0 @@ -38,6 +39,6 @@ rsa==4.9.1 Shapely===2.0.7; python_version == '3.9' Shapely==2.1.1; python_version >= '3.10' six==1.17.0 -typing-extensions==4.13.2 +typing-extensions==4.14.0 typing-inspect==0.9.0 urllib3==2.4.0 diff --git a/packages/google-cloud-bigquery/samples/magics/requirements-test.txt b/packages/google-cloud-bigquery/samples/magics/requirements-test.txt index 2ad35b4186b5..4b9c515a706d 100644 --- a/packages/google-cloud-bigquery/samples/magics/requirements-test.txt +++ b/packages/google-cloud-bigquery/samples/magics/requirements-test.txt @@ -1,4 +1,4 @@ google-cloud-testutils==1.6.4 -pytest==8.3.5 +pytest==8.4.0 mock==5.2.0 pytest-xdist==3.7.0 diff --git a/packages/google-cloud-bigquery/samples/notebooks/requirements-test.txt b/packages/google-cloud-bigquery/samples/notebooks/requirements-test.txt index 2ad35b4186b5..4b9c515a706d 100644 --- a/packages/google-cloud-bigquery/samples/notebooks/requirements-test.txt +++ b/packages/google-cloud-bigquery/samples/notebooks/requirements-test.txt @@ -1,4 +1,4 @@ google-cloud-testutils==1.6.4 -pytest==8.3.5 +pytest==8.4.0 mock==5.2.0 pytest-xdist==3.7.0 diff --git a/packages/google-cloud-bigquery/samples/notebooks/requirements.txt b/packages/google-cloud-bigquery/samples/notebooks/requirements.txt index 27eb7459ae4a..c3feffb35c9c 100644 --- a/packages/google-cloud-bigquery/samples/notebooks/requirements.txt +++ b/packages/google-cloud-bigquery/samples/notebooks/requirements.txt @@ -3,7 +3,7 @@ db-dtypes==1.4.3 google-cloud-bigquery==3.34.0 google-cloud-bigquery-storage==2.32.0 ipython===8.18.1; python_version == '3.9' -ipython==9.2.0; python_version >= '3.10' +ipython==9.3.0; python_version >= '3.10' matplotlib===3.9.2; python_version == '3.9' matplotlib==3.10.3; python_version >= '3.10' pandas==2.2.3 diff --git a/packages/google-cloud-bigquery/samples/snippets/requirements-test.txt b/packages/google-cloud-bigquery/samples/snippets/requirements-test.txt index 767f71fb1751..d311187ec161 100644 --- a/packages/google-cloud-bigquery/samples/snippets/requirements-test.txt +++ b/packages/google-cloud-bigquery/samples/snippets/requirements-test.txt @@ -1,5 +1,5 @@ # samples/snippets should be runnable with no "extras" google-cloud-testutils==1.6.4 -pytest==8.3.5 +pytest==8.4.0 mock==5.2.0 pytest-xdist==3.7.0 From e2435768db2ac8d22b0b58f68abd7f98ae77d57f Mon Sep 17 00:00:00 2001 From: Lingqing Gan Date: Thu, 5 Jun 2025 11:01:02 -0700 Subject: [PATCH 1954/2016] fix: fix rows returned when both start_index and page_size are provided (#2181) * fix: fix total rows returned when both start_index and page_size are provided * use shallow copy and add comments * add docstring * add unit test * lint * add comment --------- Co-authored-by: Chalmer Lowe --- .../google/cloud/bigquery/client.py | 9 ++- .../google/cloud/bigquery/job/query.py | 8 +++ .../google/cloud/bigquery/table.py | 11 ++- .../tests/unit/job/test_query.py | 72 +++++++++++++++++++ 4 files changed, 97 insertions(+), 3 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py index bb4d80c7303e..811e9ef03273 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py @@ -2045,6 +2045,7 @@ def _get_query_results( location: Optional[str] = None, timeout: TimeoutType = DEFAULT_TIMEOUT, page_size: int = 0, + start_index: Optional[int] = None, ) -> _QueryResults: """Get the query results object for a query job. @@ -2063,9 +2064,12 @@ def _get_query_results( before using ``retry``. If set, this connection timeout may be increased to a minimum value. This prevents retries on what would otherwise be a successful response. - page_size (int): + page_size (Optional[int]): Maximum number of rows in a single response. See maxResults in the jobs.getQueryResults REST API. + start_index (Optional[int]): + Zero-based index of the starting row. See startIndex in the + jobs.getQueryResults REST API. Returns: google.cloud.bigquery.query._QueryResults: @@ -2095,6 +2099,9 @@ def _get_query_results( if location is not None: extra_params["location"] = location + if start_index is not None: + extra_params["startIndex"] = start_index + path = "/projects/{}/queries/{}".format(project, job_id) # This call is typically made in a polling loop that checks whether the diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/job/query.py b/packages/google-cloud-bigquery/google/cloud/bigquery/job/query.py index 954a4696312d..4d95f0e7189b 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/job/query.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/job/query.py @@ -1409,6 +1409,7 @@ def _reload_query_results( retry: "retries.Retry" = DEFAULT_RETRY, timeout: Optional[float] = None, page_size: int = 0, + start_index: Optional[int] = None, ): """Refresh the cached query results unless already cached and complete. @@ -1421,6 +1422,9 @@ def _reload_query_results( page_size (int): Maximum number of rows in a single response. See maxResults in the jobs.getQueryResults REST API. + start_index (Optional[int]): + Zero-based index of the starting row. See startIndex in the + jobs.getQueryResults REST API. """ # Optimization: avoid a call to jobs.getQueryResults if it's already # been fetched, e.g. from jobs.query first page of results. @@ -1468,6 +1472,7 @@ def _reload_query_results( location=self.location, timeout=transport_timeout, page_size=page_size, + start_index=start_index, ) def result( # type: ignore # (incompatible with supertype) @@ -1570,6 +1575,9 @@ def result( # type: ignore # (incompatible with supertype) if page_size is not None: reload_query_results_kwargs["page_size"] = page_size + if start_index is not None: + reload_query_results_kwargs["start_index"] = start_index + try: retry_do_query = getattr(self, "_retry_do_query", None) if retry_do_query is not None: diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py index 3ffd5ca56b0f..861f806b47e5 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py @@ -1987,12 +1987,19 @@ def _get_next_page_response(self): return response params = self._get_query_params() + + # If the user has provided page_size and start_index, we need to pass + # start_index for the first page, but for all subsequent pages, we + # should not pass start_index. We make a shallow copy of params and do + # not alter the original, so if the user iterates the results again, + # start_index is preserved. + params_copy = copy.copy(params) if self._page_size is not None: if self.page_number and "startIndex" in params: - del params["startIndex"] + del params_copy["startIndex"] return self.api_request( - method=self._HTTP_METHOD, path=self.path, query_params=params + method=self._HTTP_METHOD, path=self.path, query_params=params_copy ) @property diff --git a/packages/google-cloud-bigquery/tests/unit/job/test_query.py b/packages/google-cloud-bigquery/tests/unit/job/test_query.py index 1df65279d788..46b802aa3bbf 100644 --- a/packages/google-cloud-bigquery/tests/unit/job/test_query.py +++ b/packages/google-cloud-bigquery/tests/unit/job/test_query.py @@ -1682,6 +1682,78 @@ def test_result_with_start_index(self): tabledata_list_request[1]["query_params"]["maxResults"], page_size ) + def test_result_with_start_index_multi_page(self): + # When there are multiple pages of response and the user has set + # start_index, we should supply start_index to the server in the first + # request. However, in the subsequent requests, we will pass only + # page_token but not start_index, because the server only allows one + # of them. + from google.cloud.bigquery.table import RowIterator + + query_resource = { + "jobComplete": True, + "jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID}, + "schema": {"fields": [{"name": "col1", "type": "STRING"}]}, + "totalRows": "7", + } + + # Although the result has 7 rows, the response only returns 6, because + # start_index is 1. + tabledata_resource_1 = { + "totalRows": "7", + "pageToken": "page_token_1", + "rows": [ + {"f": [{"v": "abc"}]}, + {"f": [{"v": "def"}]}, + {"f": [{"v": "ghi"}]}, + ], + } + tabledata_resource_2 = { + "totalRows": "7", + "pageToken": None, + "rows": [ + {"f": [{"v": "jkl"}]}, + {"f": [{"v": "mno"}]}, + {"f": [{"v": "pqe"}]}, + ], + } + + connection = make_connection( + query_resource, tabledata_resource_1, tabledata_resource_2 + ) + client = _make_client(self.PROJECT, connection=connection) + resource = self._make_resource(ended=True) + job = self._get_target_class().from_api_repr(resource, client) + + start_index = 1 + page_size = 3 + + result = job.result(page_size=page_size, start_index=start_index) + + self.assertIsInstance(result, RowIterator) + self.assertEqual(result.total_rows, 7) + + rows = list(result) + + self.assertEqual(len(rows), 6) + self.assertEqual(len(connection.api_request.call_args_list), 3) + + # First call has both startIndex and maxResults. + tabledata_list_request_1 = connection.api_request.call_args_list[1] + self.assertEqual( + tabledata_list_request_1[1]["query_params"]["startIndex"], start_index + ) + self.assertEqual( + tabledata_list_request_1[1]["query_params"]["maxResults"], page_size + ) + + # Second call only has maxResults. + tabledata_list_request_2 = connection.api_request.call_args_list[2] + self.assertFalse("startIndex" in tabledata_list_request_2[1]["query_params"]) + self.assertEqual( + tabledata_list_request_2[1]["query_params"]["maxResults"], page_size + ) + def test_result_error(self): from google.cloud import exceptions From db9f5d9bf739c2b915e6429244e0d38bf4bff85c Mon Sep 17 00:00:00 2001 From: Lingqing Gan Date: Thu, 5 Jun 2025 12:01:06 -0700 Subject: [PATCH 1955/2016] docs: Improve clarity of "Output Only" fields in Dataset class (#2201) fixes b/407210727 --- .../google/cloud/bigquery/dataset.py | 20 +++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/dataset.py b/packages/google-cloud-bigquery/google/cloud/bigquery/dataset.py index f788275cd01e..ec4098511094 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/dataset.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/dataset.py @@ -574,6 +574,10 @@ class Dataset(object): A pointer to a dataset. If ``dataset_ref`` is a string, it must include both the project ID and the dataset ID, separated by ``.``. + + Note: + Fields marked as "Output Only" are populated by the server and will only be + available after calling :meth:`google.cloud.bigquery.client.Client.get_dataset`. """ _PROPERTY_TO_API_FIELD = { @@ -692,7 +696,7 @@ def access_entries(self, value): @property def created(self): - """Union[datetime.datetime, None]: Datetime at which the dataset was + """Union[datetime.datetime, None]: Output only. Datetime at which the dataset was created (:data:`None` until set from the server). """ creation_time = self._properties.get("creationTime") @@ -709,8 +713,8 @@ def dataset_id(self): @property def full_dataset_id(self): - """Union[str, None]: ID for the dataset resource (:data:`None` until - set from the server) + """Union[str, None]: Output only. ID for the dataset resource + (:data:`None` until set from the server). In the format ``project_id:dataset_id``. """ @@ -725,14 +729,14 @@ def reference(self): @property def etag(self): - """Union[str, None]: ETag for the dataset resource (:data:`None` until - set from the server). + """Union[str, None]: Output only. ETag for the dataset resource + (:data:`None` until set from the server). """ return self._properties.get("etag") @property def modified(self): - """Union[datetime.datetime, None]: Datetime at which the dataset was + """Union[datetime.datetime, None]: Output only. Datetime at which the dataset was last modified (:data:`None` until set from the server). """ modified_time = self._properties.get("lastModifiedTime") @@ -744,8 +748,8 @@ def modified(self): @property def self_link(self): - """Union[str, None]: URL for the dataset resource (:data:`None` until - set from the server). + """Union[str, None]: Output only. URL for the dataset resource + (:data:`None` until set from the server). """ return self._properties.get("selfLink") From b9e053d8908db3211e41a56e0ac5f71cf614c620 Mon Sep 17 00:00:00 2001 From: Chalmer Lowe Date: Mon, 9 Jun 2025 12:31:37 -0400 Subject: [PATCH 1956/2016] test: remove pragma (#2212) * test: remove pragma * test: remove comment about pragma * updates to conditionals related to pandas 2.0+ tests --- .../google/cloud/bigquery/_pandas_helpers.py | 9 ++------- .../tests/unit/job/test_query_pandas.py | 12 +++++------- .../tests/unit/test_table_pandas.py | 14 ++++++-------- 3 files changed, 13 insertions(+), 22 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/_pandas_helpers.py b/packages/google-cloud-bigquery/google/cloud/bigquery/_pandas_helpers.py index 10a5c59bb21a..2dab03a06840 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/_pandas_helpers.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/_pandas_helpers.py @@ -337,13 +337,8 @@ def types_mapper(arrow_data_type): ): return range_date_dtype - # TODO: this section does not have a test yet OR at least not one that is - # recognized by coverage, hence the pragma. See Issue: #2132 - elif ( - range_timestamp_dtype is not None - and arrow_data_type.equals( # pragma: NO COVER - range_timestamp_dtype.pyarrow_dtype - ) + elif range_timestamp_dtype is not None and arrow_data_type.equals( + range_timestamp_dtype.pyarrow_dtype ): return range_timestamp_dtype diff --git a/packages/google-cloud-bigquery/tests/unit/job/test_query_pandas.py b/packages/google-cloud-bigquery/tests/unit/job/test_query_pandas.py index d82f0dfe3116..a6c59b1582e6 100644 --- a/packages/google-cloud-bigquery/tests/unit/job/test_query_pandas.py +++ b/packages/google-cloud-bigquery/tests/unit/job/test_query_pandas.py @@ -647,12 +647,6 @@ def test_to_dataframe_bqstorage_no_pyarrow_compression(): ) -# TODO: The test needs work to account for pandas 2.0+. See Issue: #2132 -# pragma added due to issues with coverage. -@pytest.mark.skipif( - pandas.__version__.startswith("2."), - reason="pandas 2.0 changes some default dtypes and we haven't update the test to account for those", -) @pytest.mark.skipif(pyarrow is None, reason="Requires `pyarrow`") def test_to_dataframe_column_dtypes(): from google.cloud.bigquery.job import QueryJob as target_class @@ -704,7 +698,6 @@ def test_to_dataframe_column_dtypes(): exp_columns = [field["name"] for field in query_resource["schema"]["fields"]] assert list(df) == exp_columns # verify the column names - assert df.start_timestamp.dtype.name == "datetime64[ns, UTC]" assert df.seconds.dtype.name == "Int64" assert df.miles.dtype.name == "float64" assert df.km.dtype.name == "float16" @@ -712,6 +705,11 @@ def test_to_dataframe_column_dtypes(): assert df.complete.dtype.name == "boolean" assert df.date.dtype.name == "dbdate" + if pandas.__version__.startswith("2."): + assert df.start_timestamp.dtype.name == "datetime64[us, UTC]" + else: + assert df.start_timestamp.dtype.name == "datetime64[ns, UTC]" + def test_to_dataframe_column_date_dtypes(): from google.cloud.bigquery.job import QueryJob as target_class diff --git a/packages/google-cloud-bigquery/tests/unit/test_table_pandas.py b/packages/google-cloud-bigquery/tests/unit/test_table_pandas.py index 43d64d77dde0..a4fa3fa3910e 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_table_pandas.py +++ b/packages/google-cloud-bigquery/tests/unit/test_table_pandas.py @@ -34,12 +34,6 @@ def class_under_test(): return RowIterator -# TODO: The test needs work to account for pandas 2.0+. See Issue: #2132 -# pragma added due to issues with coverage. -@pytest.mark.skipif( - pandas.__version__.startswith("2."), - reason="pandas 2.0 changes some default dtypes and we haven't update the test to account for those", -) def test_to_dataframe_nullable_scalars( monkeypatch, class_under_test ): # pragma: NO COVER @@ -113,14 +107,18 @@ def test_to_dataframe_nullable_scalars( assert df.dtypes["bool_col"].name == "boolean" assert df.dtypes["bytes_col"].name == "object" assert df.dtypes["date_col"].name == "dbdate" - assert df.dtypes["datetime_col"].name == "datetime64[ns]" assert df.dtypes["float64_col"].name == "float64" assert df.dtypes["int64_col"].name == "Int64" assert df.dtypes["numeric_col"].name == "object" assert df.dtypes["string_col"].name == "object" assert df.dtypes["time_col"].name == "dbtime" - assert df.dtypes["timestamp_col"].name == "datetime64[ns, UTC]" assert df.dtypes["json_col"].name == "object" + if pandas.__version__.startswith("2."): + assert df.dtypes["datetime_col"].name == "datetime64[us]" + assert df.dtypes["timestamp_col"].name == "datetime64[us, UTC]" + else: + assert df.dtypes["datetime_col"].name == "datetime64[ns]" + assert df.dtypes["timestamp_col"].name == "datetime64[ns, UTC]" # Check for expected values. assert df["bignumeric_col"][0] == decimal.Decimal("123.456789101112131415") From 776c810682108c3916c416eca7a1d608893bfe38 Mon Sep 17 00:00:00 2001 From: Mend Renovate Date: Mon, 9 Jun 2025 18:58:40 +0200 Subject: [PATCH 1957/2016] chore(deps): update all dependencies (#2211) Co-authored-by: Chalmer Lowe --- .../samples/geography/requirements.txt | 6 +++--- .../google-cloud-bigquery/samples/magics/requirements.txt | 2 +- .../samples/notebooks/requirements.txt | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/packages/google-cloud-bigquery/samples/geography/requirements.txt b/packages/google-cloud-bigquery/samples/geography/requirements.txt index 5ff1c0c025b8..4ebff482d484 100644 --- a/packages/google-cloud-bigquery/samples/geography/requirements.txt +++ b/packages/google-cloud-bigquery/samples/geography/requirements.txt @@ -12,19 +12,19 @@ geojson==3.2.0 geopandas===1.0.1; python_version <= '3.9' geopandas==1.1.0; python_version >= '3.10' google-api-core==2.25.0 -google-auth==2.40.2 +google-auth==2.40.3 google-cloud-bigquery==3.34.0 google-cloud-bigquery-storage==2.32.0 google-cloud-core==2.4.3 google-crc32c==1.7.1 google-resumable-media==2.7.2 googleapis-common-protos==1.70.0 -grpcio==1.72.1 +grpcio==1.73.0 idna==3.10 munch==4.0.0 mypy-extensions==1.1.0 packaging==25.0 -pandas==2.2.3 +pandas==2.3.0 proto-plus==1.26.1 pyarrow==20.0.0 pyasn1==0.6.1 diff --git a/packages/google-cloud-bigquery/samples/magics/requirements.txt b/packages/google-cloud-bigquery/samples/magics/requirements.txt index 960eb6db457c..d2456fc5a7e5 100644 --- a/packages/google-cloud-bigquery/samples/magics/requirements.txt +++ b/packages/google-cloud-bigquery/samples/magics/requirements.txt @@ -3,4 +3,4 @@ db-dtypes==1.4.3 google.cloud.bigquery==3.34.0 google-cloud-bigquery-storage==2.32.0 ipython===8.18.1 -pandas==2.2.3 +pandas==2.3.0 diff --git a/packages/google-cloud-bigquery/samples/notebooks/requirements.txt b/packages/google-cloud-bigquery/samples/notebooks/requirements.txt index c3feffb35c9c..66409e49d376 100644 --- a/packages/google-cloud-bigquery/samples/notebooks/requirements.txt +++ b/packages/google-cloud-bigquery/samples/notebooks/requirements.txt @@ -6,4 +6,4 @@ ipython===8.18.1; python_version == '3.9' ipython==9.3.0; python_version >= '3.10' matplotlib===3.9.2; python_version == '3.9' matplotlib==3.10.3; python_version >= '3.10' -pandas==2.2.3 +pandas==2.3.0 From 450a05095c64bebdf6d5438204247399312958fd Mon Sep 17 00:00:00 2001 From: Mend Renovate Date: Mon, 9 Jun 2025 20:53:46 +0200 Subject: [PATCH 1958/2016] chore(deps): update dependency requests to v2.32.4 (#2213) --- .../google-cloud-bigquery/samples/geography/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/samples/geography/requirements.txt b/packages/google-cloud-bigquery/samples/geography/requirements.txt index 4ebff482d484..75a196eeb622 100644 --- a/packages/google-cloud-bigquery/samples/geography/requirements.txt +++ b/packages/google-cloud-bigquery/samples/geography/requirements.txt @@ -34,7 +34,7 @@ pyparsing==3.2.3 python-dateutil==2.9.0.post0 pytz==2025.2 PyYAML==6.0.2 -requests==2.32.3 +requests==2.32.4 rsa==4.9.1 Shapely===2.0.7; python_version == '3.9' Shapely==2.1.1; python_version >= '3.10' From 2e440bd696e2de3e3927a6585e3410ef720250fc Mon Sep 17 00:00:00 2001 From: Mend Renovate Date: Fri, 13 Jun 2025 19:49:19 +0200 Subject: [PATCH 1959/2016] chore(deps): update dependency google-api-core to v2.25.1 (#2215) --- .../google-cloud-bigquery/samples/geography/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/samples/geography/requirements.txt b/packages/google-cloud-bigquery/samples/geography/requirements.txt index 75a196eeb622..daaf67b9e9cf 100644 --- a/packages/google-cloud-bigquery/samples/geography/requirements.txt +++ b/packages/google-cloud-bigquery/samples/geography/requirements.txt @@ -11,7 +11,7 @@ Fiona==1.10.1 geojson==3.2.0 geopandas===1.0.1; python_version <= '3.9' geopandas==1.1.0; python_version >= '3.10' -google-api-core==2.25.0 +google-api-core==2.25.1 google-auth==2.40.3 google-cloud-bigquery==3.34.0 google-cloud-bigquery-storage==2.32.0 From e81aee2b96b9dc42dbca32f01f7d3a72188411d0 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Fri, 20 Jun 2025 14:36:25 -0700 Subject: [PATCH 1960/2016] chore(deps): bump urllib3 from 2.4.0 to 2.5.0 in /samples/geography (#2220) Bumps [urllib3](https://github.com/urllib3/urllib3) from 2.4.0 to 2.5.0. - [Release notes](https://github.com/urllib3/urllib3/releases) - [Changelog](https://github.com/urllib3/urllib3/blob/main/CHANGES.rst) - [Commits](https://github.com/urllib3/urllib3/compare/2.4.0...2.5.0) --- updated-dependencies: - dependency-name: urllib3 dependency-version: 2.5.0 dependency-type: direct:production ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- .../google-cloud-bigquery/samples/geography/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/samples/geography/requirements.txt b/packages/google-cloud-bigquery/samples/geography/requirements.txt index daaf67b9e9cf..379d682b4e08 100644 --- a/packages/google-cloud-bigquery/samples/geography/requirements.txt +++ b/packages/google-cloud-bigquery/samples/geography/requirements.txt @@ -41,4 +41,4 @@ Shapely==2.1.1; python_version >= '3.10' six==1.17.0 typing-extensions==4.14.0 typing-inspect==0.9.0 -urllib3==2.4.0 +urllib3==2.5.0 From 60b2038dadd09fac44246ad56f2d61b6c1e2a09c Mon Sep 17 00:00:00 2001 From: "Leah E. Cole" <6719667+leahecole@users.noreply.github.com> Date: Mon, 23 Jun 2025 11:50:33 -0400 Subject: [PATCH 1961/2016] chore: add label job sample (#2219) * chore: add label job sample * lint * remove unnecessary api call * Apply suggestions from code review Co-authored-by: Chalmer Lowe --------- Co-authored-by: Chalmer Lowe --- .../samples/snippets/label_job.py | 36 +++++++++++++++++++ .../samples/snippets/label_job_test.py | 31 ++++++++++++++++ 2 files changed, 67 insertions(+) create mode 100644 packages/google-cloud-bigquery/samples/snippets/label_job.py create mode 100644 packages/google-cloud-bigquery/samples/snippets/label_job_test.py diff --git a/packages/google-cloud-bigquery/samples/snippets/label_job.py b/packages/google-cloud-bigquery/samples/snippets/label_job.py new file mode 100644 index 000000000000..cfd06d189b9e --- /dev/null +++ b/packages/google-cloud-bigquery/samples/snippets/label_job.py @@ -0,0 +1,36 @@ +# Copyright 2025 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +def label_job() -> None: + # [START bigquery_label_job] + from google.cloud import bigquery + + client = bigquery.Client() + + sql = """ + SELECT corpus + FROM `bigquery-public-data.samples.shakespeare` + GROUP BY corpus; + """ + labels = {"color": "green"} + + config = bigquery.QueryJobConfig() + config.labels = labels + location = "us" + job = client.query(sql, location=location, job_config=config) + job_id = job.job_id + + print(f"Added {job.labels} to {job_id}.") + # [END bigquery_label_job] diff --git a/packages/google-cloud-bigquery/samples/snippets/label_job_test.py b/packages/google-cloud-bigquery/samples/snippets/label_job_test.py new file mode 100644 index 000000000000..0780db61a446 --- /dev/null +++ b/packages/google-cloud-bigquery/samples/snippets/label_job_test.py @@ -0,0 +1,31 @@ +# Copyright 2025 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import typing + +import label_job # type: ignore + + +if typing.TYPE_CHECKING: + import pytest + + +def test_label_job( + capsys: "pytest.CaptureFixture[str]", +) -> None: + label_job.label_job() + + out, _ = capsys.readouterr() + assert "color" in out + assert "green" in out From 9e81f4fdcc35eb980d00c19b28b0c17831a76e77 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Cumplido?= Date: Mon, 23 Jun 2025 20:32:50 +0200 Subject: [PATCH 1962/2016] chore: update PyPI URL for official nightly pyarrow repository (#2223) Co-authored-by: Lingqing Gan --- packages/google-cloud-bigquery/noxfile.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/packages/google-cloud-bigquery/noxfile.py b/packages/google-cloud-bigquery/noxfile.py index 6807b7ee476e..eb79c238da03 100644 --- a/packages/google-cloud-bigquery/noxfile.py +++ b/packages/google-cloud-bigquery/noxfile.py @@ -408,10 +408,10 @@ def prerelease_deps(session): ) # PyArrow prerelease packages are published to an alternative PyPI host. - # https://arrow.apache.org/docs/python/install.html#installing-nightly-packages + # https://arrow.apache.org/docs/developers/python.html#installing-nightly-packages session.install( "--extra-index-url", - "https://pypi.fury.io/arrow-nightlies/", + "https://pypi.anaconda.org/scientific-python-nightly-wheels/simple", "--prefer-binary", "--pre", "--upgrade", From a247049f82a162ab1c42419d1cc07684652526b4 Mon Sep 17 00:00:00 2001 From: Mend Renovate Date: Wed, 25 Jun 2025 19:08:09 +0200 Subject: [PATCH 1963/2016] chore(deps): update all dependencies (#2216) Co-authored-by: Lingqing Gan --- .../samples/desktopapp/requirements-test.txt | 2 +- .../samples/geography/requirements-test.txt | 2 +- .../google-cloud-bigquery/samples/geography/requirements.txt | 2 +- .../google-cloud-bigquery/samples/magics/requirements-test.txt | 2 +- .../samples/notebooks/requirements-test.txt | 2 +- .../samples/snippets/requirements-test.txt | 2 +- 6 files changed, 6 insertions(+), 6 deletions(-) diff --git a/packages/google-cloud-bigquery/samples/desktopapp/requirements-test.txt b/packages/google-cloud-bigquery/samples/desktopapp/requirements-test.txt index 4b9c515a706d..b3046227c192 100644 --- a/packages/google-cloud-bigquery/samples/desktopapp/requirements-test.txt +++ b/packages/google-cloud-bigquery/samples/desktopapp/requirements-test.txt @@ -1,4 +1,4 @@ google-cloud-testutils==1.6.4 -pytest==8.4.0 +pytest==8.4.1 mock==5.2.0 pytest-xdist==3.7.0 diff --git a/packages/google-cloud-bigquery/samples/geography/requirements-test.txt b/packages/google-cloud-bigquery/samples/geography/requirements-test.txt index 824a1df4a197..ee895a4f49a1 100644 --- a/packages/google-cloud-bigquery/samples/geography/requirements-test.txt +++ b/packages/google-cloud-bigquery/samples/geography/requirements-test.txt @@ -1,3 +1,3 @@ -pytest==8.4.0 +pytest==8.4.1 mock==5.2.0 pytest-xdist==3.7.0 diff --git a/packages/google-cloud-bigquery/samples/geography/requirements.txt b/packages/google-cloud-bigquery/samples/geography/requirements.txt index 379d682b4e08..f8f79a9706ed 100644 --- a/packages/google-cloud-bigquery/samples/geography/requirements.txt +++ b/packages/google-cloud-bigquery/samples/geography/requirements.txt @@ -1,5 +1,5 @@ attrs==25.3.0 -certifi==2025.4.26 +certifi==2025.6.15 cffi==1.17.1 charset-normalizer==3.4.2 click===8.1.8; python_version == '3.9' diff --git a/packages/google-cloud-bigquery/samples/magics/requirements-test.txt b/packages/google-cloud-bigquery/samples/magics/requirements-test.txt index 4b9c515a706d..b3046227c192 100644 --- a/packages/google-cloud-bigquery/samples/magics/requirements-test.txt +++ b/packages/google-cloud-bigquery/samples/magics/requirements-test.txt @@ -1,4 +1,4 @@ google-cloud-testutils==1.6.4 -pytest==8.4.0 +pytest==8.4.1 mock==5.2.0 pytest-xdist==3.7.0 diff --git a/packages/google-cloud-bigquery/samples/notebooks/requirements-test.txt b/packages/google-cloud-bigquery/samples/notebooks/requirements-test.txt index 4b9c515a706d..b3046227c192 100644 --- a/packages/google-cloud-bigquery/samples/notebooks/requirements-test.txt +++ b/packages/google-cloud-bigquery/samples/notebooks/requirements-test.txt @@ -1,4 +1,4 @@ google-cloud-testutils==1.6.4 -pytest==8.4.0 +pytest==8.4.1 mock==5.2.0 pytest-xdist==3.7.0 diff --git a/packages/google-cloud-bigquery/samples/snippets/requirements-test.txt b/packages/google-cloud-bigquery/samples/snippets/requirements-test.txt index d311187ec161..d71018b3ff13 100644 --- a/packages/google-cloud-bigquery/samples/snippets/requirements-test.txt +++ b/packages/google-cloud-bigquery/samples/snippets/requirements-test.txt @@ -1,5 +1,5 @@ # samples/snippets should be runnable with no "extras" google-cloud-testutils==1.6.4 -pytest==8.4.0 +pytest==8.4.1 mock==5.2.0 pytest-xdist==3.7.0 From 9be5110b6dc9d68563fb9c41e73a1c61a1ae6f4c Mon Sep 17 00:00:00 2001 From: Prabakar <86585391+drokeye@users.noreply.github.com> Date: Thu, 26 Jun 2025 23:28:53 +0530 Subject: [PATCH 1964/2016] fix: make AccessEntry equality consistent with from_api_repr (#2218) * fix: make AccessEntry equality consistent for view entity type * fix: make AccessEntry equality consistent for view entity type * fix: use json.dumps() for normalizaiton of entity_id * remove trailing whitespace and add test assertions * revert back to the original code * fix linting in `dataset.py` * fix linting in `test_dataset.py` --------- Co-authored-by: Lingqing Gan --- .../google/cloud/bigquery/dataset.py | 17 ++++- .../tests/unit/test_dataset.py | 67 +++++++++++++++++++ 2 files changed, 82 insertions(+), 2 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/dataset.py b/packages/google-cloud-bigquery/google/cloud/bigquery/dataset.py index ec4098511094..878b77d4186c 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/dataset.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/dataset.py @@ -17,6 +17,7 @@ from __future__ import absolute_import import copy +import json import typing from typing import Optional, List, Dict, Any, Union @@ -506,7 +507,20 @@ def entity_id(self) -> Optional[Union[Dict[str, Any], str]]: def __eq__(self, other): if not isinstance(other, AccessEntry): return NotImplemented - return self._key() == other._key() + return ( + self.role == other.role + and self.entity_type == other.entity_type + and self._normalize_entity_id(self.entity_id) + == self._normalize_entity_id(other.entity_id) + and self.condition == other.condition + ) + + @staticmethod + def _normalize_entity_id(value): + """Ensure consistent equality for dicts like 'view'.""" + if isinstance(value, dict): + return json.dumps(value, sort_keys=True) + return value def __ne__(self, other): return not self == other @@ -557,7 +571,6 @@ def from_api_repr(cls, resource: dict) -> "AccessEntry": google.cloud.bigquery.dataset.AccessEntry: Access entry parsed from ``resource``. """ - access_entry = cls() access_entry._properties = resource.copy() return access_entry diff --git a/packages/google-cloud-bigquery/tests/unit/test_dataset.py b/packages/google-cloud-bigquery/tests/unit/test_dataset.py index 3fd2579af955..604e5ed2e4bf 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_dataset.py +++ b/packages/google-cloud-bigquery/tests/unit/test_dataset.py @@ -1767,3 +1767,70 @@ def test__hash__with_minimal_inputs(self): description=None, ) assert hash(cond1) is not None + + def test_access_entry_view_equality(self): + from google.cloud import bigquery + + entry1 = bigquery.dataset.AccessEntry( + entity_type="view", + entity_id={ + "projectId": "my_project", + "datasetId": "my_dataset", + "tableId": "my_table", + }, + ) + entry2 = bigquery.dataset.AccessEntry.from_api_repr( + { + "view": { + "projectId": "my_project", + "datasetId": "my_dataset", + "tableId": "my_table", + } + } + ) + + entry3 = bigquery.dataset.AccessEntry( + entity_type="routine", + entity_id={ + "projectId": "my_project", + "datasetId": "my_dataset", + "routineId": "my_routine", + }, + ) + + entry4 = bigquery.dataset.AccessEntry.from_api_repr( + { + "routine": { + "projectId": "my_project", + "datasetId": "my_dataset", + "routineId": "my_routine", + } + } + ) + + entry5 = bigquery.dataset.AccessEntry( + entity_type="dataset", + entity_id={ + "dataset": { + "projectId": "my_project", + "datasetId": "my_dataset", + }, + "target_types": "VIEWS", + }, + ) + + entry6 = bigquery.dataset.AccessEntry.from_api_repr( + { + "dataset": { + "dataset": { + "projectId": "my_project", + "datasetId": "my_dataset", + }, + "target_types": "VIEWS", + } + } + ) + + assert entry1 == entry2 + assert entry3 == entry4 + assert entry5 == entry6 From 137fb62e8b62f358289d7db986ef27ceae22e61c Mon Sep 17 00:00:00 2001 From: Mend Renovate Date: Fri, 27 Jun 2025 21:00:19 +0200 Subject: [PATCH 1965/2016] chore(deps): update all dependencies (#2224) --- .../samples/geography/requirements.txt | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/packages/google-cloud-bigquery/samples/geography/requirements.txt b/packages/google-cloud-bigquery/samples/geography/requirements.txt index f8f79a9706ed..68f6c16626dd 100644 --- a/packages/google-cloud-bigquery/samples/geography/requirements.txt +++ b/packages/google-cloud-bigquery/samples/geography/requirements.txt @@ -4,13 +4,13 @@ cffi==1.17.1 charset-normalizer==3.4.2 click===8.1.8; python_version == '3.9' click==8.2.1; python_version >= '3.10' -click-plugins==1.1.1 +click-plugins==1.1.1.2 cligj==0.7.2 db-dtypes==1.4.3 Fiona==1.10.1 geojson==3.2.0 geopandas===1.0.1; python_version <= '3.9' -geopandas==1.1.0; python_version >= '3.10' +geopandas==1.1.1; python_version >= '3.10' google-api-core==2.25.1 google-auth==2.40.3 google-cloud-bigquery==3.34.0 @@ -19,7 +19,7 @@ google-cloud-core==2.4.3 google-crc32c==1.7.1 google-resumable-media==2.7.2 googleapis-common-protos==1.70.0 -grpcio==1.73.0 +grpcio==1.73.1 idna==3.10 munch==4.0.0 mypy-extensions==1.1.0 From 08c1b67b118aa540257c710368a29bba13267b5a Mon Sep 17 00:00:00 2001 From: Chalmer Lowe Date: Wed, 2 Jul 2025 17:00:39 -0400 Subject: [PATCH 1966/2016] =?UTF-8?q?fix:=20adds=20magics.context.project?= =?UTF-8?q?=20to=20eliminate=20issues=20with=20unit=20tests=20=E2=80=A6=20?= =?UTF-8?q?(#2228)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adds `magics.context.project` to eliminate issues with unit tests in an upcoming PR. Several magics unit tests fail with an error message. If the test does not have knowledge of the project, it attempts to initiate a login sequence to be able to get the project identifier. The login cannot complete because the process is running in an ipython interpreter and pytest does not capture any input. This change provides an explicit reference to a project to avoid that process. ``` Please visit this URL to authorize this application: [REDACTED DUE TO SPACE REASONS] self = <_pytest.capture.DontReadFromInput object at 0x7f55d6821bd0>, size = -1 def read(self, size: int = -1) -> str: > raise OSError( "pytest: reading from stdin while output is captured! Consider using `-s`.") E OSError: pytest: reading from stdin while output is captured! Consider using `-s`. .nox/unit-3-11/lib/python3.11/site-packages/_pytest/capture.py:229: OSError ``` --- packages/google-cloud-bigquery/tests/unit/test_magics.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/packages/google-cloud-bigquery/tests/unit/test_magics.py b/packages/google-cloud-bigquery/tests/unit/test_magics.py index 814150693b21..c79e923f85cb 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_magics.py +++ b/packages/google-cloud-bigquery/tests/unit/test_magics.py @@ -986,6 +986,7 @@ def test_bigquery_magic_dryrun_option_sets_job_config(monkeypatch): google.auth.credentials.Credentials, instance=True ) + magics.context.project = "project-from-context" run_query_patch = mock.patch( "google.cloud.bigquery.magics.magics._run_query", autospec=True ) @@ -1007,6 +1008,7 @@ def test_bigquery_magic_dryrun_option_returns_query_job(monkeypatch): magics.context.credentials = mock.create_autospec( google.auth.credentials.Credentials, instance=True ) + magics.context.project = "project-from-context" query_job_mock = mock.create_autospec( google.cloud.bigquery.job.QueryJob, instance=True ) @@ -1035,6 +1037,7 @@ def test_bigquery_magic_dryrun_option_variable_error_message( google.auth.credentials.Credentials, instance=True ) + magics.context.project = "project-from-context" ipython_ns_cleanup.append((ip, "q_job")) run_query_patch = mock.patch( @@ -1064,6 +1067,7 @@ def test_bigquery_magic_dryrun_option_saves_query_job_to_variable( magics.context.credentials = mock.create_autospec( google.auth.credentials.Credentials, instance=True ) + magics.context.project = "project-from-context" query_job_mock = mock.create_autospec( google.cloud.bigquery.job.QueryJob, instance=True ) @@ -1098,6 +1102,7 @@ def test_bigquery_magic_saves_query_job_to_variable_on_error( google.auth.credentials.Credentials, instance=True ) + magics.context.project = "project-from-context" ipython_ns_cleanup.append((ip, "result")) client_query_patch = mock.patch( From 7dad037b8faeebf0865f071e271dca966701af6f Mon Sep 17 00:00:00 2001 From: Mend Renovate Date: Mon, 7 Jul 2025 22:52:24 +0200 Subject: [PATCH 1967/2016] chore(deps): update all dependencies (#2226) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This PR contains the following updates: | Package | Change | Age | Confidence | |---|---|---|---| | [bigquery-magics](https://redirect.github.com/googleapis/python-bigquery-magics) | `==0.10.0` -> `==0.10.1` | [![age](https://developer.mend.io/api/mc/badges/age/pypi/bigquery-magics/0.10.1?slim=true)](https://docs.renovatebot.com/merge-confidence/) | [![confidence](https://developer.mend.io/api/mc/badges/confidence/pypi/bigquery-magics/0.10.0/0.10.1?slim=true)](https://docs.renovatebot.com/merge-confidence/) | | [bigquery_magics](https://redirect.github.com/googleapis/python-bigquery-magics) | `==0.10.0` -> `==0.10.1` | [![age](https://developer.mend.io/api/mc/badges/age/pypi/bigquery-magics/0.10.1?slim=true)](https://docs.renovatebot.com/merge-confidence/) | [![confidence](https://developer.mend.io/api/mc/badges/confidence/pypi/bigquery-magics/0.10.0/0.10.1?slim=true)](https://docs.renovatebot.com/merge-confidence/) | | [ipython](https://redirect.github.com/ipython/ipython) | `==9.3.0` -> `==9.4.0` | [![age](https://developer.mend.io/api/mc/badges/age/pypi/ipython/9.4.0?slim=true)](https://docs.renovatebot.com/merge-confidence/) | [![confidence](https://developer.mend.io/api/mc/badges/confidence/pypi/ipython/9.3.0/9.4.0?slim=true)](https://docs.renovatebot.com/merge-confidence/) | | [pandas](https://redirect.github.com/pandas-dev/pandas) | `==2.3.0` -> `==2.3.1` | [![age](https://developer.mend.io/api/mc/badges/age/pypi/pandas/2.3.1?slim=true)](https://docs.renovatebot.com/merge-confidence/) | [![confidence](https://developer.mend.io/api/mc/badges/confidence/pypi/pandas/2.3.0/2.3.1?slim=true)](https://docs.renovatebot.com/merge-confidence/) | | [pytest-xdist](https://redirect.github.com/pytest-dev/pytest-xdist) ([changelog](https://pytest-xdist.readthedocs.io/en/latest/changelog.html)) | `==3.7.0` -> `==3.8.0` | [![age](https://developer.mend.io/api/mc/badges/age/pypi/pytest-xdist/3.8.0?slim=true)](https://docs.renovatebot.com/merge-confidence/) | [![confidence](https://developer.mend.io/api/mc/badges/confidence/pypi/pytest-xdist/3.7.0/3.8.0?slim=true)](https://docs.renovatebot.com/merge-confidence/) | | [typing-extensions](https://redirect.github.com/python/typing_extensions) ([changelog](https://redirect.github.com/python/typing_extensions/blob/main/CHANGELOG.md)) | `==4.14.0` -> `==4.14.1` | [![age](https://developer.mend.io/api/mc/badges/age/pypi/typing-extensions/4.14.1?slim=true)](https://docs.renovatebot.com/merge-confidence/) | [![confidence](https://developer.mend.io/api/mc/badges/confidence/pypi/typing-extensions/4.14.0/4.14.1?slim=true)](https://docs.renovatebot.com/merge-confidence/) | --- ### Release Notes
googleapis/python-bigquery-magics (bigquery-magics) ### [`v0.10.1`](https://redirect.github.com/googleapis/python-bigquery-magics/blob/HEAD/CHANGELOG.md#0101-2025-07-07) [Compare Source](https://redirect.github.com/googleapis/python-bigquery-magics/compare/v0.10.0...v0.10.1) ##### Dependencies - Move spanner-graph-notebook back to version 1.1.6 ([#​126](https://redirect.github.com/googleapis/python-bigquery-magics/issues/126)) ([17ee695](https://redirect.github.com/googleapis/python-bigquery-magics/commit/17ee6956c8fec740440836609a9106e900b63074))
ipython/ipython (ipython) ### [`v9.4.0`](https://redirect.github.com/ipython/ipython/compare/9.3.0...9.4.0) [Compare Source](https://redirect.github.com/ipython/ipython/compare/9.3.0...9.4.0)
pandas-dev/pandas (pandas) ### [`v2.3.1`](https://redirect.github.com/pandas-dev/pandas/releases/tag/v2.3.1): Pandas 2.3.1 [Compare Source](https://redirect.github.com/pandas-dev/pandas/compare/v2.3.0...v2.3.1) We are pleased to announce the release of pandas 2.3.1. This release includes some improvements and fixes to the future string data type (preview feature for the upcoming pandas 3.0). We recommend that all users upgrade to this version. See the [full whatsnew](https://pandas.pydata.org/pandas-docs/version/2.3.1/whatsnew/v2.3.1.html) for a list of all the changes. Pandas 2.3.1 supports Python 3.9 and higher. The release will be available on the conda-forge channel: ``` conda install pandas --channel conda-forge ``` Or via PyPI: ``` python3 -m pip install --upgrade pandas ``` Please report any issues with the release on the [pandas issue tracker](https://redirect.github.com/pandas-dev/pandas/issues). Thanks to all the contributors who made this release possible.
pytest-dev/pytest-xdist (pytest-xdist) ### [`v3.8.0`](https://redirect.github.com/pytest-dev/pytest-xdist/blob/HEAD/CHANGELOG.rst#pytest-xdist-380-2025-06-30) [Compare Source](https://redirect.github.com/pytest-dev/pytest-xdist/compare/v3.7.0...v3.8.0) \=============================== ## Features - `#​1083 `\_: Add `--no-loadscope-reorder` and `--loadscope-reorder` option to control whether to automatically reorder tests in loadscope for tests where relative ordering matters. This only applies when using `loadscope`. For example, \[test\_file\_1, test\_file\_2, ..., test\_file\_n] are given as input test files, if `--no-loadscope-reorder` is used, for either worker, the `test_file_a` will be executed before `test_file_b` only if `a < b`. The default behavior is to reorder the tests to maximize the number of tests that can be executed in parallel.
python/typing_extensions (typing-extensions) ### [`v4.14.1`](https://redirect.github.com/python/typing_extensions/blob/HEAD/CHANGELOG.md#Release-4141-July-4-2025) [Compare Source](https://redirect.github.com/python/typing_extensions/compare/4.14.0...4.14.1) - Fix usage of `typing_extensions.TypedDict` nested inside other types (e.g., `typing.Type[typing_extensions.TypedDict]`). This is not allowed by the type system but worked on older versions, so we maintain support.
--- ### Configuration 📅 **Schedule**: Branch creation - At any time (no schedule defined), Automerge - At any time (no schedule defined). 🚦 **Automerge**: Disabled by config. Please merge this manually once you are satisfied. ♻ **Rebasing**: Whenever PR becomes conflicted, or you tick the rebase/retry checkbox. 👻 **Immortal**: This PR will be recreated if closed unmerged. Get [config help](https://redirect.github.com/renovatebot/renovate/discussions) if that's undesired. --- - [ ] If you want to rebase/retry this PR, check this box --- This PR was generated by [Mend Renovate](https://mend.io/renovate/). View the [repository job log](https://developer.mend.io/github/googleapis/python-bigquery). --- .../samples/desktopapp/requirements-test.txt | 2 +- .../samples/geography/requirements-test.txt | 2 +- .../samples/geography/requirements.txt | 4 ++-- .../samples/magics/requirements-test.txt | 2 +- .../google-cloud-bigquery/samples/magics/requirements.txt | 4 ++-- .../samples/notebooks/requirements-test.txt | 2 +- .../samples/notebooks/requirements.txt | 6 +++--- .../samples/snippets/requirements-test.txt | 2 +- 8 files changed, 12 insertions(+), 12 deletions(-) diff --git a/packages/google-cloud-bigquery/samples/desktopapp/requirements-test.txt b/packages/google-cloud-bigquery/samples/desktopapp/requirements-test.txt index b3046227c192..3bf52c85d823 100644 --- a/packages/google-cloud-bigquery/samples/desktopapp/requirements-test.txt +++ b/packages/google-cloud-bigquery/samples/desktopapp/requirements-test.txt @@ -1,4 +1,4 @@ google-cloud-testutils==1.6.4 pytest==8.4.1 mock==5.2.0 -pytest-xdist==3.7.0 +pytest-xdist==3.8.0 diff --git a/packages/google-cloud-bigquery/samples/geography/requirements-test.txt b/packages/google-cloud-bigquery/samples/geography/requirements-test.txt index ee895a4f49a1..d449b373b688 100644 --- a/packages/google-cloud-bigquery/samples/geography/requirements-test.txt +++ b/packages/google-cloud-bigquery/samples/geography/requirements-test.txt @@ -1,3 +1,3 @@ pytest==8.4.1 mock==5.2.0 -pytest-xdist==3.7.0 +pytest-xdist==3.8.0 diff --git a/packages/google-cloud-bigquery/samples/geography/requirements.txt b/packages/google-cloud-bigquery/samples/geography/requirements.txt index 68f6c16626dd..5b342fe5c086 100644 --- a/packages/google-cloud-bigquery/samples/geography/requirements.txt +++ b/packages/google-cloud-bigquery/samples/geography/requirements.txt @@ -24,7 +24,7 @@ idna==3.10 munch==4.0.0 mypy-extensions==1.1.0 packaging==25.0 -pandas==2.3.0 +pandas==2.3.1 proto-plus==1.26.1 pyarrow==20.0.0 pyasn1==0.6.1 @@ -39,6 +39,6 @@ rsa==4.9.1 Shapely===2.0.7; python_version == '3.9' Shapely==2.1.1; python_version >= '3.10' six==1.17.0 -typing-extensions==4.14.0 +typing-extensions==4.14.1 typing-inspect==0.9.0 urllib3==2.5.0 diff --git a/packages/google-cloud-bigquery/samples/magics/requirements-test.txt b/packages/google-cloud-bigquery/samples/magics/requirements-test.txt index b3046227c192..3bf52c85d823 100644 --- a/packages/google-cloud-bigquery/samples/magics/requirements-test.txt +++ b/packages/google-cloud-bigquery/samples/magics/requirements-test.txt @@ -1,4 +1,4 @@ google-cloud-testutils==1.6.4 pytest==8.4.1 mock==5.2.0 -pytest-xdist==3.7.0 +pytest-xdist==3.8.0 diff --git a/packages/google-cloud-bigquery/samples/magics/requirements.txt b/packages/google-cloud-bigquery/samples/magics/requirements.txt index d2456fc5a7e5..b53a35982dc8 100644 --- a/packages/google-cloud-bigquery/samples/magics/requirements.txt +++ b/packages/google-cloud-bigquery/samples/magics/requirements.txt @@ -1,6 +1,6 @@ -bigquery_magics==0.10.0 +bigquery_magics==0.10.1 db-dtypes==1.4.3 google.cloud.bigquery==3.34.0 google-cloud-bigquery-storage==2.32.0 ipython===8.18.1 -pandas==2.3.0 +pandas==2.3.1 diff --git a/packages/google-cloud-bigquery/samples/notebooks/requirements-test.txt b/packages/google-cloud-bigquery/samples/notebooks/requirements-test.txt index b3046227c192..3bf52c85d823 100644 --- a/packages/google-cloud-bigquery/samples/notebooks/requirements-test.txt +++ b/packages/google-cloud-bigquery/samples/notebooks/requirements-test.txt @@ -1,4 +1,4 @@ google-cloud-testutils==1.6.4 pytest==8.4.1 mock==5.2.0 -pytest-xdist==3.7.0 +pytest-xdist==3.8.0 diff --git a/packages/google-cloud-bigquery/samples/notebooks/requirements.txt b/packages/google-cloud-bigquery/samples/notebooks/requirements.txt index 66409e49d376..4b134ac9dd3f 100644 --- a/packages/google-cloud-bigquery/samples/notebooks/requirements.txt +++ b/packages/google-cloud-bigquery/samples/notebooks/requirements.txt @@ -1,9 +1,9 @@ -bigquery-magics==0.10.0 +bigquery-magics==0.10.1 db-dtypes==1.4.3 google-cloud-bigquery==3.34.0 google-cloud-bigquery-storage==2.32.0 ipython===8.18.1; python_version == '3.9' -ipython==9.3.0; python_version >= '3.10' +ipython==9.4.0; python_version >= '3.10' matplotlib===3.9.2; python_version == '3.9' matplotlib==3.10.3; python_version >= '3.10' -pandas==2.3.0 +pandas==2.3.1 diff --git a/packages/google-cloud-bigquery/samples/snippets/requirements-test.txt b/packages/google-cloud-bigquery/samples/snippets/requirements-test.txt index d71018b3ff13..cef3450e15bd 100644 --- a/packages/google-cloud-bigquery/samples/snippets/requirements-test.txt +++ b/packages/google-cloud-bigquery/samples/snippets/requirements-test.txt @@ -2,4 +2,4 @@ google-cloud-testutils==1.6.4 pytest==8.4.1 mock==5.2.0 -pytest-xdist==3.7.0 +pytest-xdist==3.8.0 From c67534bdbc3a64f177dff95374d9d291eac88569 Mon Sep 17 00:00:00 2001 From: Chalmer Lowe Date: Wed, 9 Jul 2025 19:12:23 -0400 Subject: [PATCH 1968/2016] feat: adds time_zone to external config and load job (#2229) This commit introduces new configuration options for BigQuery load jobs and external table definitions, aligning with recent updates to the underlying protos. New options added: `time_zone`: Time zone used when parsing timestamp values that do not have specific time zone information. (Applies to `LoadJobConfig`, `LoadJob`, and `ExternalConfig`) Changes include: Added corresponding properties (getters/setters) to `LoadJobConfig`, `LoadJob`, and `ExternalConfig`. Updated docstrings and type hints for all new attributes. Updated unit tests to cover the new options, ensuring they are correctly handled during object initialization, serialization to API representation, and deserialization from API responses. --- .../google/cloud/bigquery/external_config.py | 17 +++ .../google/cloud/bigquery/job/load.py | 21 +++ .../tests/unit/job/test_load.py | 32 +++++ .../tests/unit/job/test_load_config.py | 127 ++++++++++++++++++ .../tests/unit/test_external_config.py | 7 + 5 files changed, 204 insertions(+) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/external_config.py b/packages/google-cloud-bigquery/google/cloud/bigquery/external_config.py index cb8141cd09e3..fcfcaca2006f 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/external_config.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/external_config.py @@ -848,6 +848,23 @@ def schema(self, value): prop = {"fields": [field.to_api_repr() for field in value]} self._properties["schema"] = prop + @property + def time_zone(self) -> Optional[str]: + """Optional[str]: Time zone used when parsing timestamp values that do not + have specific time zone information (e.g. 2024-04-20 12:34:56). The expected + format is an IANA timezone string (e.g. America/Los_Angeles). + + See: + https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#ExternalDataConfiguration.FIELDS.time_zone + """ + + result = self._properties.get("timeZone") + return typing.cast(str, result) + + @time_zone.setter + def time_zone(self, value: Optional[str]): + self._properties["timeZone"] = value + @property def connection_id(self): """Optional[str]: [Experimental] ID of a BigQuery Connection API diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/job/load.py b/packages/google-cloud-bigquery/google/cloud/bigquery/job/load.py index e56ce16f04a5..5d49aef189e7 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/job/load.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/job/load.py @@ -548,6 +548,20 @@ def source_format(self): def source_format(self, value): self._set_sub_prop("sourceFormat", value) + @property + def time_zone(self) -> Optional[str]: + """Optional[str]: Default time zone that will apply when parsing timestamp + values that have no specific time zone. + + See: + https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationLoad.FIELDS.time_zone + """ + return self._get_sub_prop("timeZone") + + @time_zone.setter + def time_zone(self, value: Optional[str]): + self._set_sub_prop("timeZone", value) + @property def time_partitioning(self): """Optional[google.cloud.bigquery.table.TimePartitioning]: Specifies time-based @@ -889,6 +903,13 @@ def clustering_fields(self): """ return self.configuration.clustering_fields + @property + def time_zone(self): + """See + :attr:`google.cloud.bigquery.job.LoadJobConfig.time_zone`. + """ + return self.configuration.time_zone + @property def schema_update_options(self): """See diff --git a/packages/google-cloud-bigquery/tests/unit/job/test_load.py b/packages/google-cloud-bigquery/tests/unit/job/test_load.py index 10df46fb37e6..81d8e44b41d3 100644 --- a/packages/google-cloud-bigquery/tests/unit/job/test_load.py +++ b/packages/google-cloud-bigquery/tests/unit/job/test_load.py @@ -38,10 +38,14 @@ def _setUpConstants(self): self.OUTPUT_ROWS = 345 self.REFERENCE_FILE_SCHEMA_URI = "gs://path/to/reference" + self.TIME_ZONE = "UTC" + def _make_resource(self, started=False, ended=False): resource = super(TestLoadJob, self)._make_resource(started, ended) config = resource["configuration"]["load"] config["sourceUris"] = [self.SOURCE1] + + config["timeZone"] = self.TIME_ZONE config["destinationTable"] = { "projectId": self.PROJECT, "datasetId": self.DS_ID, @@ -152,6 +156,10 @@ def _verifyResourceProperties(self, job, resource): ) else: self.assertIsNone(job.destination_encryption_configuration) + if "timeZone" in config: + self.assertEqual(job.time_zone, config["timeZone"]) + else: + self.assertIsNone(job.time_zone) def test_ctor(self): client = _make_client(project=self.PROJECT) @@ -195,6 +203,8 @@ def test_ctor(self): self.assertIsNone(job.schema_update_options) self.assertIsNone(job.reference_file_schema_uri) + self.assertIsNone(job.time_zone) + def test_ctor_w_config(self): from google.cloud.bigquery.schema import SchemaField from google.cloud.bigquery.job import LoadJobConfig @@ -431,6 +441,24 @@ def test_from_api_repr_w_properties(self): self.assertIs(job._client, client) self._verifyResourceProperties(job, RESOURCE) + def test_to_api_repr(self): + self._setUpConstants() + client = _make_client(project=self.PROJECT) + RESOURCE = self._make_resource(ended=False) + + klass = self._get_target_class() + job = klass.from_api_repr(RESOURCE, client) + api_repr = job.to_api_repr() + + # as per the documentation in load.py -> LoadJob.to_api_repr(), + # the return value from to_api_repr should not include statistics + expected = { + "jobReference": RESOURCE["jobReference"], + "configuration": RESOURCE["configuration"], + } + + self.assertEqual(api_repr, expected) + def test_begin_w_already_running(self): conn = make_connection() client = _make_client(project=self.PROJECT, connection=conn) @@ -571,6 +599,7 @@ def test_begin_w_alternate_client(self): ] }, "schemaUpdateOptions": [SchemaUpdateOption.ALLOW_FIELD_ADDITION], + "timeZone": self.TIME_ZONE, } RESOURCE["configuration"]["load"] = LOAD_CONFIGURATION conn1 = make_connection() @@ -599,6 +628,9 @@ def test_begin_w_alternate_client(self): config.write_disposition = WriteDisposition.WRITE_TRUNCATE config.schema_update_options = [SchemaUpdateOption.ALLOW_FIELD_ADDITION] config.reference_file_schema_uri = "gs://path/to/reference" + + config.time_zone = self.TIME_ZONE + with mock.patch( "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" ) as final_attributes: diff --git a/packages/google-cloud-bigquery/tests/unit/job/test_load_config.py b/packages/google-cloud-bigquery/tests/unit/job/test_load_config.py index 3a681c476228..6424f7e68b10 100644 --- a/packages/google-cloud-bigquery/tests/unit/job/test_load_config.py +++ b/packages/google-cloud-bigquery/tests/unit/job/test_load_config.py @@ -828,6 +828,22 @@ def test_write_disposition_setter(self): config._properties["load"]["writeDisposition"], write_disposition ) + def test_time_zone_missing(self): + config = self._get_target_class()() + self.assertIsNone(config.time_zone) + + def test_time_zone_hit(self): + time_zone = "UTC" + config = self._get_target_class()() + config._properties["load"]["timeZone"] = time_zone + self.assertEqual(config.time_zone, time_zone) + + def test_time_zone_setter(self): + time_zone = "America/New_York" + config = self._get_target_class()() + config.time_zone = time_zone + self.assertEqual(config._properties["load"]["timeZone"], time_zone) + def test_parquet_options_missing(self): config = self._get_target_class()() self.assertIsNone(config.parquet_options) @@ -901,3 +917,114 @@ def test_column_name_character_map_none(self): config._properties["load"]["columnNameCharacterMap"], ColumnNameCharacterMap.COLUMN_NAME_CHARACTER_MAP_UNSPECIFIED, ) + + RESOURCE = { + "load": { + "allowJaggedRows": True, + "createDisposition": "CREATE_NEVER", + "encoding": "UTF-8", + "fieldDelimiter": ",", + "ignoreUnknownValues": True, + "maxBadRecords": 10, + "nullMarker": "\\N", + "quote": '"', + "schema": { + "fields": [ + {"name": "name", "type": "STRING", "mode": "NULLABLE"}, + {"name": "age", "type": "INTEGER", "mode": "NULLABLE"}, + ] + }, + "skipLeadingRows": "1", + "sourceFormat": "CSV", + "timePartitioning": { + "type": "DAY", + "field": "transaction_date", + }, + "useAvroLogicalTypes": True, + "writeDisposition": "WRITE_TRUNCATE", + "timeZone": "America/New_York", + "parquetOptions": {"enableListInference": True}, + "columnNameCharacterMap": "V2", + "someNewField": "some-value", + } + } + + def test_from_api_repr(self): + from google.cloud.bigquery.job import ( + CreateDisposition, + LoadJobConfig, + SourceFormat, + WriteDisposition, + ) + from google.cloud.bigquery.schema import SchemaField + from google.cloud.bigquery.table import TimePartitioning, TimePartitioningType + + from google.cloud.bigquery.job.load import ColumnNameCharacterMap + + config = LoadJobConfig.from_api_repr(self.RESOURCE) + + self.assertTrue(config.allow_jagged_rows) + self.assertEqual(config.create_disposition, CreateDisposition.CREATE_NEVER) + self.assertEqual(config.encoding, "UTF-8") + self.assertEqual(config.field_delimiter, ",") + self.assertTrue(config.ignore_unknown_values) + self.assertEqual(config.max_bad_records, 10) + self.assertEqual(config.null_marker, "\\N") + self.assertEqual(config.quote_character, '"') + self.assertEqual( + config.schema, + [SchemaField("name", "STRING"), SchemaField("age", "INTEGER")], + ) + self.assertEqual(config.skip_leading_rows, 1) + self.assertEqual(config.source_format, SourceFormat.CSV) + self.assertEqual( + config.time_partitioning, + TimePartitioning(type_=TimePartitioningType.DAY, field="transaction_date"), + ) + self.assertTrue(config.use_avro_logical_types) + self.assertEqual(config.write_disposition, WriteDisposition.WRITE_TRUNCATE) + self.assertEqual(config.time_zone, "America/New_York") + self.assertTrue(config.parquet_options.enable_list_inference) + self.assertEqual(config.column_name_character_map, ColumnNameCharacterMap.V2) + self.assertEqual(config._properties["load"]["someNewField"], "some-value") + + def test_to_api_repr(self): + from google.cloud.bigquery.job import ( + CreateDisposition, + LoadJobConfig, + SourceFormat, + WriteDisposition, + ) + from google.cloud.bigquery.schema import SchemaField + from google.cloud.bigquery.table import TimePartitioning, TimePartitioningType + from google.cloud.bigquery.format_options import ParquetOptions + from google.cloud.bigquery.job.load import ColumnNameCharacterMap + + config = LoadJobConfig() + config.allow_jagged_rows = True + config.create_disposition = CreateDisposition.CREATE_NEVER + config.encoding = "UTF-8" + config.field_delimiter = "," + config.ignore_unknown_values = True + config.max_bad_records = 10 + config.null_marker = r"\N" + config.quote_character = '"' + config.schema = [SchemaField("name", "STRING"), SchemaField("age", "INTEGER")] + config.skip_leading_rows = 1 + config.source_format = SourceFormat.CSV + config.time_partitioning = TimePartitioning( + type_=TimePartitioningType.DAY, field="transaction_date" + ) + config.use_avro_logical_types = True + config.write_disposition = WriteDisposition.WRITE_TRUNCATE + config.time_zone = "America/New_York" + parquet_options = ParquetOptions() + parquet_options.enable_list_inference = True + config.parquet_options = parquet_options + config.column_name_character_map = ColumnNameCharacterMap.V2 + config._properties["load"]["someNewField"] = "some-value" + + api_repr = config.to_api_repr() + + expected = self.RESOURCE + self.assertEqual(api_repr, expected) diff --git a/packages/google-cloud-bigquery/tests/unit/test_external_config.py b/packages/google-cloud-bigquery/tests/unit/test_external_config.py index 7f84a9f5bef2..a89b7a1fb78a 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_external_config.py +++ b/packages/google-cloud-bigquery/tests/unit/test_external_config.py @@ -26,6 +26,8 @@ class TestExternalConfig(unittest.TestCase): SOURCE_URIS = ["gs://foo", "gs://bar"] + TIME_ZONE = "America/Los_Angeles" + BASE_RESOURCE = { "sourceFormat": "", "sourceUris": SOURCE_URIS, @@ -33,6 +35,7 @@ class TestExternalConfig(unittest.TestCase): "autodetect": True, "ignoreUnknownValues": False, "compression": "compression", + "timeZone": TIME_ZONE, } def test_from_api_repr_base(self): @@ -79,6 +82,7 @@ def test_to_api_repr_base(self): ec.connection_id = "path/to/connection" ec.schema = [schema.SchemaField("full_name", "STRING", mode="REQUIRED")] + ec.time_zone = self.TIME_ZONE exp_schema = { "fields": [{"name": "full_name", "type": "STRING", "mode": "REQUIRED"}] } @@ -92,6 +96,7 @@ def test_to_api_repr_base(self): "compression": "compression", "connectionId": "path/to/connection", "schema": exp_schema, + "timeZone": self.TIME_ZONE, } self.assertEqual(got_resource, exp_resource) @@ -128,6 +133,8 @@ def _verify_base(self, ec): self.assertEqual(ec.max_bad_records, 17) self.assertEqual(ec.source_uris, self.SOURCE_URIS) + self.assertEqual(ec.time_zone, self.TIME_ZONE) + def test_to_api_repr_source_format(self): ec = external_config.ExternalConfig("CSV") got = ec.to_api_repr() From f29d0de492abcf0e77efb98cc81e95103ee15ea3 Mon Sep 17 00:00:00 2001 From: Mend Renovate Date: Thu, 10 Jul 2025 19:44:58 +0200 Subject: [PATCH 1969/2016] chore(deps): update dependency certifi to v2025.7.9 (#2232) Co-authored-by: Chalmer Lowe --- .../google-cloud-bigquery/samples/geography/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/samples/geography/requirements.txt b/packages/google-cloud-bigquery/samples/geography/requirements.txt index 5b342fe5c086..447e92c81811 100644 --- a/packages/google-cloud-bigquery/samples/geography/requirements.txt +++ b/packages/google-cloud-bigquery/samples/geography/requirements.txt @@ -1,5 +1,5 @@ attrs==25.3.0 -certifi==2025.6.15 +certifi==2025.7.9 cffi==1.17.1 charset-normalizer==3.4.2 click===8.1.8; python_version == '3.9' From ae08ade3024d75d5e7e9a0f0391190f317ed765d Mon Sep 17 00:00:00 2001 From: Huan Chen <142538604+Genesis929@users.noreply.github.com> Date: Fri, 11 Jul 2025 10:10:58 -0700 Subject: [PATCH 1970/2016] feat: add total slot ms to RowIterator (#2233) * feat: add total slot ms to RowIterator * format fix --------- Co-authored-by: Chalmer Lowe --- .../google/cloud/bigquery/_job_helpers.py | 1 + .../google/cloud/bigquery/client.py | 4 ++++ .../google/cloud/bigquery/job/query.py | 1 + .../google/cloud/bigquery/query.py | 14 ++++++++++++++ .../google/cloud/bigquery/table.py | 7 +++++++ .../tests/unit/job/test_query.py | 2 ++ .../tests/unit/test_client.py | 2 ++ .../tests/unit/test_query.py | 16 ++++++++++++++++ 8 files changed, 47 insertions(+) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/_job_helpers.py b/packages/google-cloud-bigquery/google/cloud/bigquery/_job_helpers.py index 888dc1e7312c..73d4f6e7bc27 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/_job_helpers.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/_job_helpers.py @@ -560,6 +560,7 @@ def do_query(): num_dml_affected_rows=query_results.num_dml_affected_rows, query=query, total_bytes_processed=query_results.total_bytes_processed, + slot_millis=query_results.slot_millis, ) if job_retry is not None: diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py index 811e9ef03273..804f77ea2871 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py @@ -4144,6 +4144,7 @@ def _list_rows_from_query_results( num_dml_affected_rows: Optional[int] = None, query: Optional[str] = None, total_bytes_processed: Optional[int] = None, + slot_millis: Optional[int] = None, ) -> RowIterator: """List the rows of a completed query. See @@ -4195,6 +4196,8 @@ def _list_rows_from_query_results( The query text used. total_bytes_processed (Optional[int]): total bytes processed from job statistics, if present. + slot_millis (Optional[int]): + Number of slot ms the user is actually billed for. Returns: google.cloud.bigquery.table.RowIterator: @@ -4234,6 +4237,7 @@ def _list_rows_from_query_results( num_dml_affected_rows=num_dml_affected_rows, query=query, total_bytes_processed=total_bytes_processed, + slot_millis=slot_millis, ) return row_iterator diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/job/query.py b/packages/google-cloud-bigquery/google/cloud/bigquery/job/query.py index 4d95f0e7189b..ec9379ea9c47 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/job/query.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/job/query.py @@ -1766,6 +1766,7 @@ def is_job_done(): num_dml_affected_rows=self._query_results.num_dml_affected_rows, query=self.query, total_bytes_processed=self.total_bytes_processed, + slot_millis=self.slot_millis, **list_rows_kwargs, ) rows._preserve_order = _contains_order_by(self.query) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/query.py b/packages/google-cloud-bigquery/google/cloud/bigquery/query.py index 8745c09f5fb1..4a006d621285 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/query.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/query.py @@ -1282,6 +1282,20 @@ def total_bytes_processed(self): if total_bytes_processed is not None: return int(total_bytes_processed) + @property + def slot_millis(self): + """Total number of slot ms the user is actually billed for. + + See: + https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs/query#body.QueryResponse.FIELDS.slot_millis + + Returns: + Optional[int]: Count generated on the server (None until set by the server). + """ + slot_millis = self._properties.get("totalSlotMs") + if slot_millis is not None: + return int(slot_millis) + @property def num_dml_affected_rows(self): """Total number of rows affected by a DML query. diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py index 861f806b47e5..dbdde36d1292 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py @@ -1812,6 +1812,7 @@ def __init__( num_dml_affected_rows: Optional[int] = None, query: Optional[str] = None, total_bytes_processed: Optional[int] = None, + slot_millis: Optional[int] = None, ): super(RowIterator, self).__init__( client, @@ -1841,6 +1842,7 @@ def __init__( self._num_dml_affected_rows = num_dml_affected_rows self._query = query self._total_bytes_processed = total_bytes_processed + self._slot_millis = slot_millis @property def _billing_project(self) -> Optional[str]: @@ -1898,6 +1900,11 @@ def total_bytes_processed(self) -> Optional[int]: """total bytes processed from job statistics, if present.""" return self._total_bytes_processed + @property + def slot_millis(self) -> Optional[int]: + """Number of slot ms the user is actually billed for.""" + return self._slot_millis + def _is_almost_completely_cached(self): """Check if all results are completely cached. diff --git a/packages/google-cloud-bigquery/tests/unit/job/test_query.py b/packages/google-cloud-bigquery/tests/unit/job/test_query.py index 46b802aa3bbf..7201adb55e83 100644 --- a/packages/google-cloud-bigquery/tests/unit/job/test_query.py +++ b/packages/google-cloud-bigquery/tests/unit/job/test_query.py @@ -888,6 +888,7 @@ def test_result_reloads_job_state_until_done(self): job_resource = self._make_resource(started=True, location="EU") job_resource_done = self._make_resource(started=True, ended=True, location="EU") job_resource_done["statistics"]["query"]["totalBytesProcessed"] = str(1234) + job_resource_done["statistics"]["query"]["totalSlotMs"] = str(5678) job_resource_done["configuration"]["query"]["destinationTable"] = { "projectId": "dest-project", "datasetId": "dest_dataset", @@ -969,6 +970,7 @@ def test_result_reloads_job_state_until_done(self): self.assertEqual(result.total_rows, 1) self.assertEqual(result.query, job.query) self.assertEqual(result.total_bytes_processed, 1234) + self.assertEqual(result.slot_millis, 5678) query_results_path = f"/projects/{self.PROJECT}/queries/{self.JOB_ID}" query_results_call = mock.call( diff --git a/packages/google-cloud-bigquery/tests/unit/test_client.py b/packages/google-cloud-bigquery/tests/unit/test_client.py index 42bfc84b9c5b..bb86ccc3cf31 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_client.py +++ b/packages/google-cloud-bigquery/tests/unit/test_client.py @@ -5718,6 +5718,7 @@ def test_query_and_wait_defaults(self): "rows": [{"f": [{"v": "5552452"}]}], "queryId": "job_abcDEF_", "totalBytesProcessed": 1234, + "totalSlotMs": 5678, } creds = _make_credentials() http = object() @@ -5735,6 +5736,7 @@ def test_query_and_wait_defaults(self): self.assertIsNone(rows.location) self.assertEqual(rows.query, query) self.assertEqual(rows.total_bytes_processed, 1234) + self.assertEqual(rows.slot_millis, 5678) # Verify the request we send is to jobs.query. conn.api_request.assert_called_once() diff --git a/packages/google-cloud-bigquery/tests/unit/test_query.py b/packages/google-cloud-bigquery/tests/unit/test_query.py index 0d967bdb89ac..2b704d3c9d2b 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_query.py +++ b/packages/google-cloud-bigquery/tests/unit/test_query.py @@ -2000,6 +2000,22 @@ def test_total_bytes_processed_present_string(self): query = self._make_one(resource) self.assertEqual(query.total_bytes_processed, 123456) + def test_slot_millis_missing(self): + query = self._make_one(self._make_resource()) + self.assertIsNone(query.slot_millis) + + def test_slot_millis_present_integer(self): + resource = self._make_resource() + resource["totalSlotMs"] = 123456 + query = self._make_one(resource) + self.assertEqual(query.slot_millis, 123456) + + def test_slot_millis_present_string(self): + resource = self._make_resource() + resource["totalSlotMs"] = "123456" + query = self._make_one(resource) + self.assertEqual(query.slot_millis, 123456) + def test_num_dml_affected_rows_missing(self): query = self._make_one(self._make_resource()) self.assertIsNone(query.num_dml_affected_rows) From a1ed15fc3f5f82656487b24f026505fa7090890b Mon Sep 17 00:00:00 2001 From: Chalmer Lowe Date: Fri, 11 Jul 2025 20:29:28 -0400 Subject: [PATCH 1971/2016] feat: adds date_format to load job and external config (#2231) * feat: adds date_format to load job and external config * adds date_format to new to/from_api_repr tests --- .../google/cloud/bigquery/external_config.py | 14 +++++++++++++ .../google/cloud/bigquery/job/load.py | 20 +++++++++++++++++++ .../tests/unit/job/test_load.py | 14 ++++++++----- .../tests/unit/job/test_load_config.py | 19 ++++++++++++++++++ .../tests/unit/test_external_config.py | 7 +++++-- 5 files changed, 67 insertions(+), 7 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/external_config.py b/packages/google-cloud-bigquery/google/cloud/bigquery/external_config.py index fcfcaca2006f..54b7bf396768 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/external_config.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/external_config.py @@ -848,6 +848,20 @@ def schema(self, value): prop = {"fields": [field.to_api_repr() for field in value]} self._properties["schema"] = prop + @property + def date_format(self) -> Optional[str]: + """Optional[str]: Format used to parse DATE values. Supports C-style and SQL-style values. + + See: + https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#ExternalDataConfiguration.FIELDS.date_format + """ + result = self._properties.get("dateFormat") + return typing.cast(str, result) + + @date_format.setter + def date_format(self, value: Optional[str]): + self._properties["dateFormat"] = value + @property def time_zone(self) -> Optional[str]: """Optional[str]: Time zone used when parsing timestamp values that do not diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/job/load.py b/packages/google-cloud-bigquery/google/cloud/bigquery/job/load.py index 5d49aef189e7..277478d81d31 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/job/load.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/job/load.py @@ -548,6 +548,19 @@ def source_format(self): def source_format(self, value): self._set_sub_prop("sourceFormat", value) + @property + def date_format(self) -> Optional[str]: + """Optional[str]: Date format used for parsing DATE values. + + See: + https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationLoad.FIELDS.date_format + """ + return self._get_sub_prop("dateFormat") + + @date_format.setter + def date_format(self, value: Optional[str]): + self._set_sub_prop("dateFormat", value) + @property def time_zone(self) -> Optional[str]: """Optional[str]: Default time zone that will apply when parsing timestamp @@ -903,6 +916,13 @@ def clustering_fields(self): """ return self.configuration.clustering_fields + @property + def date_format(self): + """See + :attr:`google.cloud.bigquery.job.LoadJobConfig.date_format`. + """ + return self.configuration.date_format + @property def time_zone(self): """See diff --git a/packages/google-cloud-bigquery/tests/unit/job/test_load.py b/packages/google-cloud-bigquery/tests/unit/job/test_load.py index 81d8e44b41d3..82baa03c704d 100644 --- a/packages/google-cloud-bigquery/tests/unit/job/test_load.py +++ b/packages/google-cloud-bigquery/tests/unit/job/test_load.py @@ -37,14 +37,14 @@ def _setUpConstants(self): self.OUTPUT_BYTES = 23456 self.OUTPUT_ROWS = 345 self.REFERENCE_FILE_SCHEMA_URI = "gs://path/to/reference" - + self.DATE_FORMAT = "%Y-%m-%d" self.TIME_ZONE = "UTC" def _make_resource(self, started=False, ended=False): resource = super(TestLoadJob, self)._make_resource(started, ended) config = resource["configuration"]["load"] config["sourceUris"] = [self.SOURCE1] - + config["dateFormat"] = self.DATE_FORMAT config["timeZone"] = self.TIME_ZONE config["destinationTable"] = { "projectId": self.PROJECT, @@ -147,7 +147,6 @@ def _verifyResourceProperties(self, job, resource): ) else: self.assertIsNone(job.reference_file_schema_uri) - if "destinationEncryptionConfiguration" in config: self.assertIsNotNone(job.destination_encryption_configuration) self.assertEqual( @@ -156,6 +155,10 @@ def _verifyResourceProperties(self, job, resource): ) else: self.assertIsNone(job.destination_encryption_configuration) + if "dateFormat" in config: + self.assertEqual(job.date_format, config["dateFormat"]) + else: + self.assertIsNone(job.date_format) if "timeZone" in config: self.assertEqual(job.time_zone, config["timeZone"]) else: @@ -202,7 +205,7 @@ def test_ctor(self): self.assertIsNone(job.clustering_fields) self.assertIsNone(job.schema_update_options) self.assertIsNone(job.reference_file_schema_uri) - + self.assertIsNone(job.date_format) self.assertIsNone(job.time_zone) def test_ctor_w_config(self): @@ -599,6 +602,7 @@ def test_begin_w_alternate_client(self): ] }, "schemaUpdateOptions": [SchemaUpdateOption.ALLOW_FIELD_ADDITION], + "dateFormat": self.DATE_FORMAT, "timeZone": self.TIME_ZONE, } RESOURCE["configuration"]["load"] = LOAD_CONFIGURATION @@ -628,7 +632,7 @@ def test_begin_w_alternate_client(self): config.write_disposition = WriteDisposition.WRITE_TRUNCATE config.schema_update_options = [SchemaUpdateOption.ALLOW_FIELD_ADDITION] config.reference_file_schema_uri = "gs://path/to/reference" - + config.date_format = self.DATE_FORMAT config.time_zone = self.TIME_ZONE with mock.patch( diff --git a/packages/google-cloud-bigquery/tests/unit/job/test_load_config.py b/packages/google-cloud-bigquery/tests/unit/job/test_load_config.py index 6424f7e68b10..5b7f8175be7b 100644 --- a/packages/google-cloud-bigquery/tests/unit/job/test_load_config.py +++ b/packages/google-cloud-bigquery/tests/unit/job/test_load_config.py @@ -828,6 +828,22 @@ def test_write_disposition_setter(self): config._properties["load"]["writeDisposition"], write_disposition ) + def test_date_format_missing(self): + config = self._get_target_class()() + self.assertIsNone(config.date_format) + + def test_date_format_hit(self): + date_format = "%Y-%m-%d" + config = self._get_target_class()() + config._properties["load"]["dateFormat"] = date_format + self.assertEqual(config.date_format, date_format) + + def test_date_format_setter(self): + date_format = "YYYY/MM/DD" + config = self._get_target_class()() + config.date_format = date_format + self.assertEqual(config._properties["load"]["dateFormat"], date_format) + def test_time_zone_missing(self): config = self._get_target_class()() self.assertIsNone(config.time_zone) @@ -942,6 +958,7 @@ def test_column_name_character_map_none(self): }, "useAvroLogicalTypes": True, "writeDisposition": "WRITE_TRUNCATE", + "dateFormat": "%Y-%m-%d", "timeZone": "America/New_York", "parquetOptions": {"enableListInference": True}, "columnNameCharacterMap": "V2", @@ -983,6 +1000,7 @@ def test_from_api_repr(self): ) self.assertTrue(config.use_avro_logical_types) self.assertEqual(config.write_disposition, WriteDisposition.WRITE_TRUNCATE) + self.assertEqual(config.date_format, "%Y-%m-%d") self.assertEqual(config.time_zone, "America/New_York") self.assertTrue(config.parquet_options.enable_list_inference) self.assertEqual(config.column_name_character_map, ColumnNameCharacterMap.V2) @@ -1017,6 +1035,7 @@ def test_to_api_repr(self): ) config.use_avro_logical_types = True config.write_disposition = WriteDisposition.WRITE_TRUNCATE + config.date_format = "%Y-%m-%d" config.time_zone = "America/New_York" parquet_options = ParquetOptions() parquet_options.enable_list_inference = True diff --git a/packages/google-cloud-bigquery/tests/unit/test_external_config.py b/packages/google-cloud-bigquery/tests/unit/test_external_config.py index a89b7a1fb78a..0f5d09504d0a 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_external_config.py +++ b/packages/google-cloud-bigquery/tests/unit/test_external_config.py @@ -25,7 +25,7 @@ class TestExternalConfig(unittest.TestCase): SOURCE_URIS = ["gs://foo", "gs://bar"] - + DATE_FORMAT = "MM/DD/YYYY" TIME_ZONE = "America/Los_Angeles" BASE_RESOURCE = { @@ -35,6 +35,7 @@ class TestExternalConfig(unittest.TestCase): "autodetect": True, "ignoreUnknownValues": False, "compression": "compression", + "dateFormat": DATE_FORMAT, "timeZone": TIME_ZONE, } @@ -82,6 +83,7 @@ def test_to_api_repr_base(self): ec.connection_id = "path/to/connection" ec.schema = [schema.SchemaField("full_name", "STRING", mode="REQUIRED")] + ec.date_format = self.DATE_FORMAT ec.time_zone = self.TIME_ZONE exp_schema = { "fields": [{"name": "full_name", "type": "STRING", "mode": "REQUIRED"}] @@ -96,6 +98,7 @@ def test_to_api_repr_base(self): "compression": "compression", "connectionId": "path/to/connection", "schema": exp_schema, + "dateFormat": self.DATE_FORMAT, "timeZone": self.TIME_ZONE, } self.assertEqual(got_resource, exp_resource) @@ -132,7 +135,7 @@ def _verify_base(self, ec): self.assertEqual(ec.ignore_unknown_values, False) self.assertEqual(ec.max_bad_records, 17) self.assertEqual(ec.source_uris, self.SOURCE_URIS) - + self.assertEqual(ec.date_format, self.DATE_FORMAT) self.assertEqual(ec.time_zone, self.TIME_ZONE) def test_to_api_repr_source_format(self): From 5df3348edddc88c90be7deff3af0234e3aabf6fe Mon Sep 17 00:00:00 2001 From: Chalmer Lowe Date: Mon, 14 Jul 2025 12:14:58 -0400 Subject: [PATCH 1972/2016] feat: adds time_format and timestamp_format and associated tests (#2238) --- .../google/cloud/bigquery/external_config.py | 28 +++++++++++++ .../google/cloud/bigquery/job/load.py | 40 +++++++++++++++++++ .../tests/unit/job/test_load.py | 20 ++++++++++ .../tests/unit/job/test_load_config.py | 34 ++++++++++++++++ .../tests/unit/test_external_config.py | 11 +++++ 5 files changed, 133 insertions(+) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/external_config.py b/packages/google-cloud-bigquery/google/cloud/bigquery/external_config.py index 54b7bf396768..370f62c0aadd 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/external_config.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/external_config.py @@ -879,6 +879,34 @@ def time_zone(self) -> Optional[str]: def time_zone(self, value: Optional[str]): self._properties["timeZone"] = value + @property + def time_format(self) -> Optional[str]: + """Optional[str]: Format used to parse TIME values. Supports C-style and SQL-style values. + + See: + https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#ExternalDataConfiguration.FIELDS.time_format + """ + result = self._properties.get("timeFormat") + return typing.cast(str, result) + + @time_format.setter + def time_format(self, value: Optional[str]): + self._properties["timeFormat"] = value + + @property + def timestamp_format(self) -> Optional[str]: + """Optional[str]: Format used to parse TIMESTAMP values. Supports C-style and SQL-style values. + + See: + https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#ExternalDataConfiguration.FIELDS.timestamp_format + """ + result = self._properties.get("timestampFormat") + return typing.cast(str, result) + + @timestamp_format.setter + def timestamp_format(self, value: Optional[str]): + self._properties["timestampFormat"] = value + @property def connection_id(self): """Optional[str]: [Experimental] ID of a BigQuery Connection API diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/job/load.py b/packages/google-cloud-bigquery/google/cloud/bigquery/job/load.py index 277478d81d31..2e5a9a9bb32c 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/job/load.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/job/load.py @@ -575,6 +575,32 @@ def time_zone(self) -> Optional[str]: def time_zone(self, value: Optional[str]): self._set_sub_prop("timeZone", value) + @property + def time_format(self) -> Optional[str]: + """Optional[str]: Date format used for parsing TIME values. + + See: + https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationLoad.FIELDS.time_format + """ + return self._get_sub_prop("timeFormat") + + @time_format.setter + def time_format(self, value: Optional[str]): + self._set_sub_prop("timeFormat", value) + + @property + def timestamp_format(self) -> Optional[str]: + """Optional[str]: Date format used for parsing TIMESTAMP values. + + See: + https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationLoad.FIELDS.timestamp_format + """ + return self._get_sub_prop("timestampFormat") + + @timestamp_format.setter + def timestamp_format(self, value: Optional[str]): + self._set_sub_prop("timestampFormat", value) + @property def time_partitioning(self): """Optional[google.cloud.bigquery.table.TimePartitioning]: Specifies time-based @@ -930,6 +956,20 @@ def time_zone(self): """ return self.configuration.time_zone + @property + def time_format(self): + """See + :attr:`google.cloud.bigquery.job.LoadJobConfig.time_format`. + """ + return self.configuration.time_format + + @property + def timestamp_format(self): + """See + :attr:`google.cloud.bigquery.job.LoadJobConfig.timestamp_format`. + """ + return self.configuration.timestamp_format + @property def schema_update_options(self): """See diff --git a/packages/google-cloud-bigquery/tests/unit/job/test_load.py b/packages/google-cloud-bigquery/tests/unit/job/test_load.py index 82baa03c704d..77adf0cc84cc 100644 --- a/packages/google-cloud-bigquery/tests/unit/job/test_load.py +++ b/packages/google-cloud-bigquery/tests/unit/job/test_load.py @@ -39,6 +39,8 @@ def _setUpConstants(self): self.REFERENCE_FILE_SCHEMA_URI = "gs://path/to/reference" self.DATE_FORMAT = "%Y-%m-%d" self.TIME_ZONE = "UTC" + self.TIME_FORMAT = "%H:%M:%S" + self.TIMESTAMP_FORMAT = "YYYY-MM-DD HH:MM:SS.SSSSSSZ" def _make_resource(self, started=False, ended=False): resource = super(TestLoadJob, self)._make_resource(started, ended) @@ -46,6 +48,9 @@ def _make_resource(self, started=False, ended=False): config["sourceUris"] = [self.SOURCE1] config["dateFormat"] = self.DATE_FORMAT config["timeZone"] = self.TIME_ZONE + config["timeFormat"] = self.TIME_FORMAT + config["timestampFormat"] = self.TIMESTAMP_FORMAT + config["destinationTable"] = { "projectId": self.PROJECT, "datasetId": self.DS_ID, @@ -163,6 +168,14 @@ def _verifyResourceProperties(self, job, resource): self.assertEqual(job.time_zone, config["timeZone"]) else: self.assertIsNone(job.time_zone) + if "timeFormat" in config: + self.assertEqual(job.time_format, config["timeFormat"]) + else: + self.assertIsNone(job.time_format) + if "timestampFormat" in config: + self.assertEqual(job.timestamp_format, config["timestampFormat"]) + else: + self.assertIsNone(job.timestamp_format) def test_ctor(self): client = _make_client(project=self.PROJECT) @@ -207,6 +220,8 @@ def test_ctor(self): self.assertIsNone(job.reference_file_schema_uri) self.assertIsNone(job.date_format) self.assertIsNone(job.time_zone) + self.assertIsNone(job.time_format) + self.assertIsNone(job.timestamp_format) def test_ctor_w_config(self): from google.cloud.bigquery.schema import SchemaField @@ -604,7 +619,10 @@ def test_begin_w_alternate_client(self): "schemaUpdateOptions": [SchemaUpdateOption.ALLOW_FIELD_ADDITION], "dateFormat": self.DATE_FORMAT, "timeZone": self.TIME_ZONE, + "timeFormat": self.TIME_FORMAT, + "timestampFormat": self.TIMESTAMP_FORMAT, } + RESOURCE["configuration"]["load"] = LOAD_CONFIGURATION conn1 = make_connection() client1 = _make_client(project=self.PROJECT, connection=conn1) @@ -634,6 +652,8 @@ def test_begin_w_alternate_client(self): config.reference_file_schema_uri = "gs://path/to/reference" config.date_format = self.DATE_FORMAT config.time_zone = self.TIME_ZONE + config.time_format = self.TIME_FORMAT + config.timestamp_format = self.TIMESTAMP_FORMAT with mock.patch( "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" diff --git a/packages/google-cloud-bigquery/tests/unit/job/test_load_config.py b/packages/google-cloud-bigquery/tests/unit/job/test_load_config.py index 5b7f8175be7b..b733bdda07c3 100644 --- a/packages/google-cloud-bigquery/tests/unit/job/test_load_config.py +++ b/packages/google-cloud-bigquery/tests/unit/job/test_load_config.py @@ -860,6 +860,40 @@ def test_time_zone_setter(self): config.time_zone = time_zone self.assertEqual(config._properties["load"]["timeZone"], time_zone) + def test_time_format_missing(self): + config = self._get_target_class()() + self.assertIsNone(config.time_format) + + def test_time_format_hit(self): + time_format = "%H:%M:%S" + config = self._get_target_class()() + config._properties["load"]["timeFormat"] = time_format + self.assertEqual(config.time_format, time_format) + + def test_time_format_setter(self): + time_format = "HH24:MI:SS" + config = self._get_target_class()() + config.time_format = time_format + self.assertEqual(config._properties["load"]["timeFormat"], time_format) + + def test_timestamp_format_missing(self): + config = self._get_target_class()() + self.assertIsNone(config.timestamp_format) + + def test_timestamp_format_hit(self): + timestamp_format = "%Y-%m-%dT%H:%M:%S.%fZ" + config = self._get_target_class()() + config._properties["load"]["timestampFormat"] = timestamp_format + self.assertEqual(config.timestamp_format, timestamp_format) + + def test_timestamp_format_setter(self): + timestamp_format = "YYYY/MM/DD HH24:MI:SS.FF6 TZR" + config = self._get_target_class()() + config.timestamp_format = timestamp_format + self.assertEqual( + config._properties["load"]["timestampFormat"], timestamp_format + ) + def test_parquet_options_missing(self): config = self._get_target_class()() self.assertIsNone(config.parquet_options) diff --git a/packages/google-cloud-bigquery/tests/unit/test_external_config.py b/packages/google-cloud-bigquery/tests/unit/test_external_config.py index 0f5d09504d0a..8b41cd8e3240 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_external_config.py +++ b/packages/google-cloud-bigquery/tests/unit/test_external_config.py @@ -27,6 +27,8 @@ class TestExternalConfig(unittest.TestCase): SOURCE_URIS = ["gs://foo", "gs://bar"] DATE_FORMAT = "MM/DD/YYYY" TIME_ZONE = "America/Los_Angeles" + TIME_FORMAT = "HH24:MI:SS" + TIMESTAMP_FORMAT = "MM/DD/YYYY HH24:MI:SS.FF6 TZR" BASE_RESOURCE = { "sourceFormat": "", @@ -37,6 +39,8 @@ class TestExternalConfig(unittest.TestCase): "compression": "compression", "dateFormat": DATE_FORMAT, "timeZone": TIME_ZONE, + "timeFormat": TIME_FORMAT, + "timestampFormat": TIMESTAMP_FORMAT, } def test_from_api_repr_base(self): @@ -85,6 +89,9 @@ def test_to_api_repr_base(self): ec.date_format = self.DATE_FORMAT ec.time_zone = self.TIME_ZONE + ec.time_format = self.TIME_FORMAT + ec.timestamp_format = self.TIMESTAMP_FORMAT + exp_schema = { "fields": [{"name": "full_name", "type": "STRING", "mode": "REQUIRED"}] } @@ -100,6 +107,8 @@ def test_to_api_repr_base(self): "schema": exp_schema, "dateFormat": self.DATE_FORMAT, "timeZone": self.TIME_ZONE, + "timeFormat": self.TIME_FORMAT, + "timestampFormat": self.TIMESTAMP_FORMAT, } self.assertEqual(got_resource, exp_resource) @@ -137,6 +146,8 @@ def _verify_base(self, ec): self.assertEqual(ec.source_uris, self.SOURCE_URIS) self.assertEqual(ec.date_format, self.DATE_FORMAT) self.assertEqual(ec.time_zone, self.TIME_ZONE) + self.assertEqual(ec.time_format, self.TIME_FORMAT) + self.assertEqual(ec.timestamp_format, self.TIMESTAMP_FORMAT) def test_to_api_repr_source_format(self): ec = external_config.ExternalConfig("CSV") From e7cb33b5969ed46aee87249d96c3746281f52df0 Mon Sep 17 00:00:00 2001 From: Chalmer Lowe Date: Tue, 15 Jul 2025 05:22:44 -0400 Subject: [PATCH 1973/2016] feat: adds datetime_format as an option (#2236) * feat: adds datetime_format as an option * updates docstrings --- .../google/cloud/bigquery/external_config.py | 15 ++++++++++++++ .../google/cloud/bigquery/job/load.py | 20 +++++++++++++++++++ .../tests/unit/job/test_load.py | 9 +++++++++ .../tests/unit/job/test_load_config.py | 16 +++++++++++++++ .../tests/unit/test_external_config.py | 5 +++++ 5 files changed, 65 insertions(+) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/external_config.py b/packages/google-cloud-bigquery/google/cloud/bigquery/external_config.py index 370f62c0aadd..82c6a9e754d8 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/external_config.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/external_config.py @@ -862,6 +862,21 @@ def date_format(self) -> Optional[str]: def date_format(self, value: Optional[str]): self._properties["dateFormat"] = value + @property + def datetime_format(self) -> Optional[str]: + """Optional[str]: Format used to parse DATETIME values. Supports C-style + and SQL-style values. + + See: + https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#ExternalDataConfiguration.FIELDS.datetime_format + """ + result = self._properties.get("datetimeFormat") + return typing.cast(str, result) + + @datetime_format.setter + def datetime_format(self, value: Optional[str]): + self._properties["datetimeFormat"] = value + @property def time_zone(self) -> Optional[str]: """Optional[str]: Time zone used when parsing timestamp values that do not diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/job/load.py b/packages/google-cloud-bigquery/google/cloud/bigquery/job/load.py index 2e5a9a9bb32c..3be914f43bb1 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/job/load.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/job/load.py @@ -561,6 +561,19 @@ def date_format(self) -> Optional[str]: def date_format(self, value: Optional[str]): self._set_sub_prop("dateFormat", value) + @property + def datetime_format(self) -> Optional[str]: + """Optional[str]: Date format used for parsing DATETIME values. + + See: + https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationLoad.FIELDS.datetime_format + """ + return self._get_sub_prop("datetimeFormat") + + @datetime_format.setter + def datetime_format(self, value: Optional[str]): + self._set_sub_prop("datetimeFormat", value) + @property def time_zone(self) -> Optional[str]: """Optional[str]: Default time zone that will apply when parsing timestamp @@ -949,6 +962,13 @@ def date_format(self): """ return self.configuration.date_format + @property + def datetime_format(self): + """See + :attr:`google.cloud.bigquery.job.LoadJobConfig.datetime_format`. + """ + return self.configuration.datetime_format + @property def time_zone(self): """See diff --git a/packages/google-cloud-bigquery/tests/unit/job/test_load.py b/packages/google-cloud-bigquery/tests/unit/job/test_load.py index 77adf0cc84cc..7afe9cba69d8 100644 --- a/packages/google-cloud-bigquery/tests/unit/job/test_load.py +++ b/packages/google-cloud-bigquery/tests/unit/job/test_load.py @@ -38,6 +38,7 @@ def _setUpConstants(self): self.OUTPUT_ROWS = 345 self.REFERENCE_FILE_SCHEMA_URI = "gs://path/to/reference" self.DATE_FORMAT = "%Y-%m-%d" + self.DATETIME_FORMAT = "%Y-%m-%dT%H:%M:%S" self.TIME_ZONE = "UTC" self.TIME_FORMAT = "%H:%M:%S" self.TIMESTAMP_FORMAT = "YYYY-MM-DD HH:MM:SS.SSSSSSZ" @@ -47,6 +48,7 @@ def _make_resource(self, started=False, ended=False): config = resource["configuration"]["load"] config["sourceUris"] = [self.SOURCE1] config["dateFormat"] = self.DATE_FORMAT + config["datetimeFormat"] = self.DATETIME_FORMAT config["timeZone"] = self.TIME_ZONE config["timeFormat"] = self.TIME_FORMAT config["timestampFormat"] = self.TIMESTAMP_FORMAT @@ -164,6 +166,10 @@ def _verifyResourceProperties(self, job, resource): self.assertEqual(job.date_format, config["dateFormat"]) else: self.assertIsNone(job.date_format) + if "datetimeFormat" in config: + self.assertEqual(job.datetime_format, config["datetimeFormat"]) + else: + self.assertIsNone(job.datetime_format) if "timeZone" in config: self.assertEqual(job.time_zone, config["timeZone"]) else: @@ -219,6 +225,7 @@ def test_ctor(self): self.assertIsNone(job.schema_update_options) self.assertIsNone(job.reference_file_schema_uri) self.assertIsNone(job.date_format) + self.assertIsNone(job.datetime_format) self.assertIsNone(job.time_zone) self.assertIsNone(job.time_format) self.assertIsNone(job.timestamp_format) @@ -618,6 +625,7 @@ def test_begin_w_alternate_client(self): }, "schemaUpdateOptions": [SchemaUpdateOption.ALLOW_FIELD_ADDITION], "dateFormat": self.DATE_FORMAT, + "datetimeFormat": self.DATETIME_FORMAT, "timeZone": self.TIME_ZONE, "timeFormat": self.TIME_FORMAT, "timestampFormat": self.TIMESTAMP_FORMAT, @@ -651,6 +659,7 @@ def test_begin_w_alternate_client(self): config.schema_update_options = [SchemaUpdateOption.ALLOW_FIELD_ADDITION] config.reference_file_schema_uri = "gs://path/to/reference" config.date_format = self.DATE_FORMAT + config.datetime_format = self.DATETIME_FORMAT config.time_zone = self.TIME_ZONE config.time_format = self.TIME_FORMAT config.timestamp_format = self.TIMESTAMP_FORMAT diff --git a/packages/google-cloud-bigquery/tests/unit/job/test_load_config.py b/packages/google-cloud-bigquery/tests/unit/job/test_load_config.py index b733bdda07c3..dbb062486080 100644 --- a/packages/google-cloud-bigquery/tests/unit/job/test_load_config.py +++ b/packages/google-cloud-bigquery/tests/unit/job/test_load_config.py @@ -844,6 +844,22 @@ def test_date_format_setter(self): config.date_format = date_format self.assertEqual(config._properties["load"]["dateFormat"], date_format) + def test_datetime_format_missing(self): + config = self._get_target_class()() + self.assertIsNone(config.datetime_format) + + def test_datetime_format_hit(self): + datetime_format = "%Y-%m-%dT%H:%M:%S" + config = self._get_target_class()() + config._properties["load"]["datetimeFormat"] = datetime_format + self.assertEqual(config.datetime_format, datetime_format) + + def test_datetime_format_setter(self): + datetime_format = "YYYY/MM/DD HH24:MI:SS" + config = self._get_target_class()() + config.datetime_format = datetime_format + self.assertEqual(config._properties["load"]["datetimeFormat"], datetime_format) + def test_time_zone_missing(self): config = self._get_target_class()() self.assertIsNone(config.time_zone) diff --git a/packages/google-cloud-bigquery/tests/unit/test_external_config.py b/packages/google-cloud-bigquery/tests/unit/test_external_config.py index 8b41cd8e3240..3a441d1f50fc 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_external_config.py +++ b/packages/google-cloud-bigquery/tests/unit/test_external_config.py @@ -26,6 +26,7 @@ class TestExternalConfig(unittest.TestCase): SOURCE_URIS = ["gs://foo", "gs://bar"] DATE_FORMAT = "MM/DD/YYYY" + DATETIME_FORMAT = "MM/DD/YYYY HH24:MI:SS" TIME_ZONE = "America/Los_Angeles" TIME_FORMAT = "HH24:MI:SS" TIMESTAMP_FORMAT = "MM/DD/YYYY HH24:MI:SS.FF6 TZR" @@ -38,6 +39,7 @@ class TestExternalConfig(unittest.TestCase): "ignoreUnknownValues": False, "compression": "compression", "dateFormat": DATE_FORMAT, + "datetimeFormat": DATETIME_FORMAT, "timeZone": TIME_ZONE, "timeFormat": TIME_FORMAT, "timestampFormat": TIMESTAMP_FORMAT, @@ -88,6 +90,7 @@ def test_to_api_repr_base(self): ec.schema = [schema.SchemaField("full_name", "STRING", mode="REQUIRED")] ec.date_format = self.DATE_FORMAT + ec.datetime_format = self.DATETIME_FORMAT ec.time_zone = self.TIME_ZONE ec.time_format = self.TIME_FORMAT ec.timestamp_format = self.TIMESTAMP_FORMAT @@ -106,6 +109,7 @@ def test_to_api_repr_base(self): "connectionId": "path/to/connection", "schema": exp_schema, "dateFormat": self.DATE_FORMAT, + "datetimeFormat": self.DATETIME_FORMAT, "timeZone": self.TIME_ZONE, "timeFormat": self.TIME_FORMAT, "timestampFormat": self.TIMESTAMP_FORMAT, @@ -145,6 +149,7 @@ def _verify_base(self, ec): self.assertEqual(ec.max_bad_records, 17) self.assertEqual(ec.source_uris, self.SOURCE_URIS) self.assertEqual(ec.date_format, self.DATE_FORMAT) + self.assertEqual(ec.datetime_format, self.DATETIME_FORMAT) self.assertEqual(ec.time_zone, self.TIME_ZONE) self.assertEqual(ec.time_format, self.TIME_FORMAT) self.assertEqual(ec.timestamp_format, self.TIMESTAMP_FORMAT) From 4f154e01803db2cff60680aac2743d39b85502ed Mon Sep 17 00:00:00 2001 From: Mend Renovate Date: Tue, 15 Jul 2025 11:43:28 +0200 Subject: [PATCH 1974/2016] chore(deps): update dependency certifi to v2025.7.14 (#2237) Co-authored-by: Chalmer Lowe --- .../google-cloud-bigquery/samples/geography/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/samples/geography/requirements.txt b/packages/google-cloud-bigquery/samples/geography/requirements.txt index 447e92c81811..e932625b8e3b 100644 --- a/packages/google-cloud-bigquery/samples/geography/requirements.txt +++ b/packages/google-cloud-bigquery/samples/geography/requirements.txt @@ -1,5 +1,5 @@ attrs==25.3.0 -certifi==2025.7.9 +certifi==2025.7.14 cffi==1.17.1 charset-normalizer==3.4.2 click===8.1.8; python_version == '3.9' From 03858e9fc35abfaf8a962e77d96e6f53ce299d84 Mon Sep 17 00:00:00 2001 From: Chalmer Lowe Date: Tue, 15 Jul 2025 14:32:47 -0400 Subject: [PATCH 1975/2016] feat: Add null_markers property to LoadJobConfig and CSVOptions (#2239) * feat: Add null_markers property to LoadJobConfig and CSVOptions * feat: adds null_markers as a load and external_config option --------- Co-authored-by: google-labs-jules[bot] <161369871+google-labs-jules[bot]@users.noreply.github.com> --- .../google/cloud/bigquery/external_config.py | 21 ++++++++++++++ .../google/cloud/bigquery/job/load.py | 28 +++++++++++++++++++ .../tests/unit/job/test_load.py | 7 +++++ .../tests/unit/job/test_load_config.py | 16 +++++++++++ .../tests/unit/test_external_config.py | 4 +++ 5 files changed, 76 insertions(+) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/external_config.py b/packages/google-cloud-bigquery/google/cloud/bigquery/external_config.py index 82c6a9e754d8..69ed72bc9402 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/external_config.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/external_config.py @@ -474,6 +474,27 @@ def skip_leading_rows(self): def skip_leading_rows(self, value): self._properties["skipLeadingRows"] = str(value) + @property + def null_markers(self) -> Optional[Iterable[str]]: + """Optional[Iterable[str]]: A list of strings represented as SQL NULL values in a CSV file. + + .. note:: + null_marker and null_markers can't be set at the same time. + If null_marker is set, null_markers has to be not set. + If null_markers is set, null_marker has to be not set. + If both null_marker and null_markers are set at the same time, a user error would be thrown. + Any strings listed in null_markers, including empty string would be interpreted as SQL NULL. + This applies to all column types. + + See + https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#CsvOptions.FIELDS.null_markers + """ + return self._properties.get("nullMarkers") + + @null_markers.setter + def null_markers(self, value: Optional[Iterable[str]]): + self._properties["nullMarkers"] = value + def to_api_repr(self) -> dict: """Build an API representation of this object. diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/job/load.py b/packages/google-cloud-bigquery/google/cloud/bigquery/job/load.py index 3be914f43bb1..eabc12cfcde4 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/job/load.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/job/load.py @@ -386,6 +386,27 @@ def null_marker(self): def null_marker(self, value): self._set_sub_prop("nullMarker", value) + @property + def null_markers(self) -> Optional[List[str]]: + """Optional[List[str]]: A list of strings represented as SQL NULL values in a CSV file. + + .. note:: + null_marker and null_markers can't be set at the same time. + If null_marker is set, null_markers has to be not set. + If null_markers is set, null_marker has to be not set. + If both null_marker and null_markers are set at the same time, a user error would be thrown. + Any strings listed in null_markers, including empty string would be interpreted as SQL NULL. + This applies to all column types. + + See: + https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationLoad.FIELDS.null_markers + """ + return self._get_sub_prop("nullMarkers") + + @null_markers.setter + def null_markers(self, value: Optional[List[str]]): + self._set_sub_prop("nullMarkers", value) + @property def preserve_ascii_control_characters(self): """Optional[bool]: Preserves the embedded ASCII control characters when sourceFormat is set to CSV. @@ -854,6 +875,13 @@ def null_marker(self): """ return self.configuration.null_marker + @property + def null_markers(self): + """See + :attr:`google.cloud.bigquery.job.LoadJobConfig.null_markers`. + """ + return self.configuration.null_markers + @property def quote_character(self): """See diff --git a/packages/google-cloud-bigquery/tests/unit/job/test_load.py b/packages/google-cloud-bigquery/tests/unit/job/test_load.py index 7afe9cba69d8..5d52401c91cf 100644 --- a/packages/google-cloud-bigquery/tests/unit/job/test_load.py +++ b/packages/google-cloud-bigquery/tests/unit/job/test_load.py @@ -42,6 +42,7 @@ def _setUpConstants(self): self.TIME_ZONE = "UTC" self.TIME_FORMAT = "%H:%M:%S" self.TIMESTAMP_FORMAT = "YYYY-MM-DD HH:MM:SS.SSSSSSZ" + self.NULL_MARKERS = ["", "NA"] def _make_resource(self, started=False, ended=False): resource = super(TestLoadJob, self)._make_resource(started, ended) @@ -52,6 +53,7 @@ def _make_resource(self, started=False, ended=False): config["timeZone"] = self.TIME_ZONE config["timeFormat"] = self.TIME_FORMAT config["timestampFormat"] = self.TIMESTAMP_FORMAT + config["nullMarkers"] = self.NULL_MARKERS config["destinationTable"] = { "projectId": self.PROJECT, @@ -140,6 +142,10 @@ def _verifyResourceProperties(self, job, resource): self.assertEqual(job.null_marker, config["nullMarker"]) else: self.assertIsNone(job.null_marker) + if "nullMarkers" in config: + self.assertEqual(job.null_markers, config["nullMarkers"]) + else: + self.assertIsNone(job.null_markers) if "quote" in config: self.assertEqual(job.quote_character, config["quote"]) else: @@ -211,6 +217,7 @@ def test_ctor(self): self.assertIsNone(job.ignore_unknown_values) self.assertIsNone(job.max_bad_records) self.assertIsNone(job.null_marker) + self.assertIsNone(job.null_markers) self.assertIsNone(job.quote_character) self.assertIsNone(job.skip_leading_rows) self.assertIsNone(job.source_format) diff --git a/packages/google-cloud-bigquery/tests/unit/job/test_load_config.py b/packages/google-cloud-bigquery/tests/unit/job/test_load_config.py index dbb062486080..8ff9244d2e28 100644 --- a/packages/google-cloud-bigquery/tests/unit/job/test_load_config.py +++ b/packages/google-cloud-bigquery/tests/unit/job/test_load_config.py @@ -469,6 +469,22 @@ def test_null_marker_setter(self): config.null_marker = null_marker self.assertEqual(config._properties["load"]["nullMarker"], null_marker) + def test_null_markers_missing(self): + config = self._get_target_class()() + self.assertIsNone(config.null_markers) + + def test_null_markers_hit(self): + null_markers = ["", "NA"] + config = self._get_target_class()() + config._properties["load"]["nullMarkers"] = null_markers + self.assertEqual(config.null_markers, null_markers) + + def test_null_markers_setter(self): + null_markers = ["", "NA"] + config = self._get_target_class()() + config.null_markers = null_markers + self.assertEqual(config._properties["load"]["nullMarkers"], null_markers) + def test_preserve_ascii_control_characters_missing(self): config = self._get_target_class()() self.assertIsNone(config.preserve_ascii_control_characters) diff --git a/packages/google-cloud-bigquery/tests/unit/test_external_config.py b/packages/google-cloud-bigquery/tests/unit/test_external_config.py index 3a441d1f50fc..61532b4b8341 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_external_config.py +++ b/packages/google-cloud-bigquery/tests/unit/test_external_config.py @@ -277,6 +277,7 @@ def test_from_api_repr_csv(self): "allowJaggedRows": False, "encoding": "encoding", "preserveAsciiControlCharacters": False, + "nullMarkers": ["", "NA"], }, }, ) @@ -293,6 +294,7 @@ def test_from_api_repr_csv(self): self.assertEqual(ec.options.allow_jagged_rows, False) self.assertEqual(ec.options.encoding, "encoding") self.assertEqual(ec.options.preserve_ascii_control_characters, False) + self.assertEqual(ec.options.null_markers, ["", "NA"]) got_resource = ec.to_api_repr() @@ -314,6 +316,7 @@ def test_to_api_repr_csv(self): options.skip_leading_rows = 123 options.allow_jagged_rows = False options.preserve_ascii_control_characters = False + options.null_markers = ["", "NA"] ec.csv_options = options exp_resource = { @@ -326,6 +329,7 @@ def test_to_api_repr_csv(self): "allowJaggedRows": False, "encoding": "encoding", "preserveAsciiControlCharacters": False, + "nullMarkers": ["", "NA"], }, } From 08970ad5774a2160a53723ddcbe4c16a19254e8d Mon Sep 17 00:00:00 2001 From: Chalmer Lowe Date: Tue, 15 Jul 2025 18:04:24 -0400 Subject: [PATCH 1976/2016] feat: Adds source_column_match and associated tests (#2227) This commit introduces new configuration options for BigQuery load jobs and external table definitions, aligning with recent updates to the underlying protos. New options added: - `source_column_name_match_option`: Controls how source columns are matched to the schema. (Applies to LoadJobConfig, ExternalConfig, LoadJob) Changes include: - Added corresponding properties (getters/setters) to `LoadJobConfig`, `LoadJob`, `ExternalConfig`, and `CSVOptions`. - Updated docstrings and type hints for all new attributes. - Updated unit tests to cover the new options, ensuring they are correctly handled during object initialization, serialization to API representation, and deserialization from API responses. --- .../google/cloud/bigquery/enums.py | 18 ++++++++ .../google/cloud/bigquery/external_config.py | 34 +++++++++++++++ .../google/cloud/bigquery/job/load.py | 43 ++++++++++++++++++- .../tests/unit/job/test_load.py | 15 +++++++ .../tests/unit/job/test_load_config.py | 32 ++++++++++++++ .../tests/unit/test_external_config.py | 43 ++++++++++++++++++- 6 files changed, 183 insertions(+), 2 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/enums.py b/packages/google-cloud-bigquery/google/cloud/bigquery/enums.py index 9a1e4880cff5..1b1eb241aee1 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/enums.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/enums.py @@ -462,3 +462,21 @@ class JobCreationMode(object): The conditions under which BigQuery can decide to not create a Job are subject to change. """ + + +class SourceColumnMatch(str, enum.Enum): + """Uses sensible defaults based on how the schema is provided. + If autodetect is used, then columns are matched by name. Otherwise, columns + are matched by position. This is done to keep the behavior backward-compatible. + """ + + SOURCE_COLUMN_MATCH_UNSPECIFIED = "SOURCE_COLUMN_MATCH_UNSPECIFIED" + """Unspecified column name match option.""" + + POSITION = "POSITION" + """Matches by position. This assumes that the columns are ordered the same + way as the schema.""" + + NAME = "NAME" + """Matches by name. This reads the header row as column names and reorders + columns to match the field names in the schema.""" diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/external_config.py b/packages/google-cloud-bigquery/google/cloud/bigquery/external_config.py index 69ed72bc9402..dc7a33e6a2b8 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/external_config.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/external_config.py @@ -30,6 +30,7 @@ from google.cloud.bigquery._helpers import _int_or_none from google.cloud.bigquery._helpers import _str_or_none from google.cloud.bigquery import _helpers +from google.cloud.bigquery.enums import SourceColumnMatch from google.cloud.bigquery.format_options import AvroOptions, ParquetOptions from google.cloud.bigquery import schema from google.cloud.bigquery.schema import SchemaField @@ -474,6 +475,39 @@ def skip_leading_rows(self): def skip_leading_rows(self, value): self._properties["skipLeadingRows"] = str(value) + @property + def source_column_match(self) -> Optional[SourceColumnMatch]: + """Optional[google.cloud.bigquery.enums.SourceColumnMatch]: Controls the + strategy used to match loaded columns to the schema. If not set, a sensible + default is chosen based on how the schema is provided. If autodetect is + used, then columns are matched by name. Otherwise, columns are matched by + position. This is done to keep the behavior backward-compatible. + + Acceptable values are: + + SOURCE_COLUMN_MATCH_UNSPECIFIED: Unspecified column name match option. + POSITION: matches by position. This assumes that the columns are ordered + the same way as the schema. + NAME: matches by name. This reads the header row as column names and + reorders columns to match the field names in the schema. + + See + https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#CsvOptions.FIELDS.source_column_match + """ + + value = self._properties.get("sourceColumnMatch") + return SourceColumnMatch(value) if value is not None else None + + @source_column_match.setter + def source_column_match(self, value: Union[SourceColumnMatch, str, None]): + if value is not None and not isinstance(value, (SourceColumnMatch, str)): + raise TypeError( + "value must be a google.cloud.bigquery.enums.SourceColumnMatch, str, or None" + ) + if isinstance(value, SourceColumnMatch): + value = value.value + self._properties["sourceColumnMatch"] = value if value else None + @property def null_markers(self) -> Optional[Iterable[str]]: """Optional[Iterable[str]]: A list of strings represented as SQL NULL values in a CSV file. diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/job/load.py b/packages/google-cloud-bigquery/google/cloud/bigquery/job/load.py index eabc12cfcde4..8cdb779ac33e 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/job/load.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/job/load.py @@ -15,9 +15,10 @@ """Classes for load jobs.""" import typing -from typing import FrozenSet, List, Iterable, Optional +from typing import FrozenSet, List, Iterable, Optional, Union from google.cloud.bigquery.encryption_configuration import EncryptionConfiguration +from google.cloud.bigquery.enums import SourceColumnMatch from google.cloud.bigquery.external_config import HivePartitioningOptions from google.cloud.bigquery.format_options import ParquetOptions from google.cloud.bigquery import _helpers @@ -569,6 +570,39 @@ def source_format(self): def source_format(self, value): self._set_sub_prop("sourceFormat", value) + @property + def source_column_match(self) -> Optional[SourceColumnMatch]: + """Optional[google.cloud.bigquery.enums.SourceColumnMatch]: Controls the + strategy used to match loaded columns to the schema. If not set, a sensible + default is chosen based on how the schema is provided. If autodetect is + used, then columns are matched by name. Otherwise, columns are matched by + position. This is done to keep the behavior backward-compatible. + + Acceptable values are: + + SOURCE_COLUMN_MATCH_UNSPECIFIED: Unspecified column name match option. + POSITION: matches by position. This assumes that the columns are ordered + the same way as the schema. + NAME: matches by name. This reads the header row as column names and + reorders columns to match the field names in the schema. + + See: + + https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationLoad.FIELDS.source_column_match + """ + value = self._get_sub_prop("sourceColumnMatch") + return SourceColumnMatch(value) if value is not None else None + + @source_column_match.setter + def source_column_match(self, value: Union[SourceColumnMatch, str, None]): + if value is not None and not isinstance(value, (SourceColumnMatch, str)): + raise TypeError( + "value must be a google.cloud.bigquery.enums.SourceColumnMatch, str, or None" + ) + if isinstance(value, SourceColumnMatch): + value = value.value + self._set_sub_prop("sourceColumnMatch", value if value else None) + @property def date_format(self) -> Optional[str]: """Optional[str]: Date format used for parsing DATE values. @@ -983,6 +1017,13 @@ def clustering_fields(self): """ return self.configuration.clustering_fields + @property + def source_column_match(self) -> Optional[SourceColumnMatch]: + """See + :attr:`google.cloud.bigquery.job.LoadJobConfig.source_column_match`. + """ + return self.configuration.source_column_match + @property def date_format(self): """See diff --git a/packages/google-cloud-bigquery/tests/unit/job/test_load.py b/packages/google-cloud-bigquery/tests/unit/job/test_load.py index 5d52401c91cf..b551d52dd327 100644 --- a/packages/google-cloud-bigquery/tests/unit/job/test_load.py +++ b/packages/google-cloud-bigquery/tests/unit/job/test_load.py @@ -19,6 +19,7 @@ from .helpers import _Base from .helpers import _make_client +from google.cloud.bigquery.enums import SourceColumnMatch class TestLoadJob(_Base): @@ -37,6 +38,7 @@ def _setUpConstants(self): self.OUTPUT_BYTES = 23456 self.OUTPUT_ROWS = 345 self.REFERENCE_FILE_SCHEMA_URI = "gs://path/to/reference" + self.SOURCE_COLUMN_MATCH = "NAME" self.DATE_FORMAT = "%Y-%m-%d" self.DATETIME_FORMAT = "%Y-%m-%dT%H:%M:%S" self.TIME_ZONE = "UTC" @@ -48,6 +50,7 @@ def _make_resource(self, started=False, ended=False): resource = super(TestLoadJob, self)._make_resource(started, ended) config = resource["configuration"]["load"] config["sourceUris"] = [self.SOURCE1] + config["sourceColumnMatch"] = self.SOURCE_COLUMN_MATCH config["dateFormat"] = self.DATE_FORMAT config["datetimeFormat"] = self.DATETIME_FORMAT config["timeZone"] = self.TIME_ZONE @@ -189,6 +192,15 @@ def _verifyResourceProperties(self, job, resource): else: self.assertIsNone(job.timestamp_format) + if "sourceColumnMatch" in config: + # job.source_column_match will be an Enum, config[...] is a string + self.assertEqual( + job.source_column_match.value, + config["sourceColumnMatch"], + ) + else: + self.assertIsNone(job.source_column_match) + def test_ctor(self): client = _make_client(project=self.PROJECT) job = self._make_one(self.JOB_ID, [self.SOURCE1], self.TABLE_REF, client) @@ -231,6 +243,7 @@ def test_ctor(self): self.assertIsNone(job.clustering_fields) self.assertIsNone(job.schema_update_options) self.assertIsNone(job.reference_file_schema_uri) + self.assertIsNone(job.source_column_match) self.assertIsNone(job.date_format) self.assertIsNone(job.datetime_format) self.assertIsNone(job.time_zone) @@ -631,6 +644,7 @@ def test_begin_w_alternate_client(self): ] }, "schemaUpdateOptions": [SchemaUpdateOption.ALLOW_FIELD_ADDITION], + "sourceColumnMatch": self.SOURCE_COLUMN_MATCH, "dateFormat": self.DATE_FORMAT, "datetimeFormat": self.DATETIME_FORMAT, "timeZone": self.TIME_ZONE, @@ -665,6 +679,7 @@ def test_begin_w_alternate_client(self): config.write_disposition = WriteDisposition.WRITE_TRUNCATE config.schema_update_options = [SchemaUpdateOption.ALLOW_FIELD_ADDITION] config.reference_file_schema_uri = "gs://path/to/reference" + config.source_column_match = SourceColumnMatch(self.SOURCE_COLUMN_MATCH) config.date_format = self.DATE_FORMAT config.datetime_format = self.DATETIME_FORMAT config.time_zone = self.TIME_ZONE diff --git a/packages/google-cloud-bigquery/tests/unit/job/test_load_config.py b/packages/google-cloud-bigquery/tests/unit/job/test_load_config.py index 8ff9244d2e28..27d3cead1f4e 100644 --- a/packages/google-cloud-bigquery/tests/unit/job/test_load_config.py +++ b/packages/google-cloud-bigquery/tests/unit/job/test_load_config.py @@ -844,6 +844,38 @@ def test_write_disposition_setter(self): config._properties["load"]["writeDisposition"], write_disposition ) + def test_source_column_match_missing(self): + config = self._get_target_class()() + self.assertIsNone(config.source_column_match) + + def test_source_column_match_hit(self): + from google.cloud.bigquery.enums import SourceColumnMatch + + option_enum = SourceColumnMatch.NAME + config = self._get_target_class()() + # Assume API stores the string value of the enum + config._properties["load"]["sourceColumnMatch"] = option_enum.value + self.assertEqual(config.source_column_match, option_enum) + + def test_source_column_match_setter(self): + from google.cloud.bigquery.enums import SourceColumnMatch + + option_enum = SourceColumnMatch.POSITION + config = self._get_target_class()() + config.source_column_match = option_enum + # Assert that the string value of the enum is stored + self.assertEqual( + config._properties["load"]["sourceColumnMatch"], option_enum.value + ) + option_str = "NAME" + config.source_column_match = option_str + self.assertEqual(config._properties["load"]["sourceColumnMatch"], option_str) + + def test_source_column_match_setter_invalid_type(self): + config = self._get_target_class()() + with self.assertRaises(TypeError): + config.source_column_match = 3.14 + def test_date_format_missing(self): config = self._get_target_class()() self.assertIsNone(config.date_format) diff --git a/packages/google-cloud-bigquery/tests/unit/test_external_config.py b/packages/google-cloud-bigquery/tests/unit/test_external_config.py index 61532b4b8341..ea827a560eec 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_external_config.py +++ b/packages/google-cloud-bigquery/tests/unit/test_external_config.py @@ -19,12 +19,14 @@ from google.cloud.bigquery import external_config from google.cloud.bigquery import schema +from google.cloud.bigquery.enums import SourceColumnMatch import pytest class TestExternalConfig(unittest.TestCase): SOURCE_URIS = ["gs://foo", "gs://bar"] + SOURCE_COLUMN_MATCH = SourceColumnMatch.NAME DATE_FORMAT = "MM/DD/YYYY" DATETIME_FORMAT = "MM/DD/YYYY HH24:MI:SS" TIME_ZONE = "America/Los_Angeles" @@ -277,6 +279,7 @@ def test_from_api_repr_csv(self): "allowJaggedRows": False, "encoding": "encoding", "preserveAsciiControlCharacters": False, + "sourceColumnMatch": self.SOURCE_COLUMN_MATCH, "nullMarkers": ["", "NA"], }, }, @@ -294,6 +297,10 @@ def test_from_api_repr_csv(self): self.assertEqual(ec.options.allow_jagged_rows, False) self.assertEqual(ec.options.encoding, "encoding") self.assertEqual(ec.options.preserve_ascii_control_characters, False) + self.assertEqual( + ec.options.source_column_match, + self.SOURCE_COLUMN_MATCH, + ) self.assertEqual(ec.options.null_markers, ["", "NA"]) got_resource = ec.to_api_repr() @@ -316,7 +323,9 @@ def test_to_api_repr_csv(self): options.skip_leading_rows = 123 options.allow_jagged_rows = False options.preserve_ascii_control_characters = False + options.source_column_match = self.SOURCE_COLUMN_MATCH options.null_markers = ["", "NA"] + ec.csv_options = options exp_resource = { @@ -329,6 +338,7 @@ def test_to_api_repr_csv(self): "allowJaggedRows": False, "encoding": "encoding", "preserveAsciiControlCharacters": False, + "sourceColumnMatch": self.SOURCE_COLUMN_MATCH, "nullMarkers": ["", "NA"], }, } @@ -881,7 +891,9 @@ def test_to_api_repr(self): ) -class CSVOptions(unittest.TestCase): +class TestCSVOptions(unittest.TestCase): + SOURCE_COLUMN_MATCH = SourceColumnMatch.NAME + def test_to_api_repr(self): options = external_config.CSVOptions() options.field_delimiter = "\t" @@ -891,6 +903,7 @@ def test_to_api_repr(self): options.allow_jagged_rows = False options.encoding = "UTF-8" options.preserve_ascii_control_characters = False + options.source_column_match = self.SOURCE_COLUMN_MATCH resource = options.to_api_repr() @@ -904,9 +917,37 @@ def test_to_api_repr(self): "allowJaggedRows": False, "encoding": "UTF-8", "preserveAsciiControlCharacters": False, + "sourceColumnMatch": self.SOURCE_COLUMN_MATCH, }, ) + def test_source_column_match_None(self): + ec = external_config.CSVOptions() + ec.source_column_match = None + expected = None + result = ec.source_column_match + self.assertEqual(expected, result) + + def test_source_column_match_valid_input(self): + ec = external_config.CSVOptions() + ec.source_column_match = SourceColumnMatch.NAME + expected = "NAME" + result = ec.source_column_match + self.assertEqual(expected, result) + + ec.source_column_match = "POSITION" + expected = "POSITION" + result = ec.source_column_match + self.assertEqual(expected, result) + + def test_source_column_match_invalid_input(self): + ec = external_config.CSVOptions() + with self.assertRaisesRegex( + TypeError, + "value must be a google.cloud.bigquery.enums.SourceColumnMatch, str, or None", + ): + ec.source_column_match = 3.14 + class TestGoogleSheetsOptions(unittest.TestCase): def test_to_api_repr(self): From 9203863fbd9881203d9347df6198297b99419bd9 Mon Sep 17 00:00:00 2001 From: "release-please[bot]" <55107282+release-please[bot]@users.noreply.github.com> Date: Tue, 15 Jul 2025 20:29:42 -0400 Subject: [PATCH 1977/2016] chore(main): release 3.35.0 (#2207) Co-authored-by: release-please[bot] <55107282+release-please[bot]@users.noreply.github.com> --- packages/google-cloud-bigquery/CHANGELOG.md | 28 +++++++++++++++++++ .../google/cloud/bigquery/version.py | 2 +- 2 files changed, 29 insertions(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/CHANGELOG.md b/packages/google-cloud-bigquery/CHANGELOG.md index 3b29a6a41dd7..e4574aa7ab83 100644 --- a/packages/google-cloud-bigquery/CHANGELOG.md +++ b/packages/google-cloud-bigquery/CHANGELOG.md @@ -5,6 +5,34 @@ [1]: https://pypi.org/project/google-cloud-bigquery/#history +## [3.35.0](https://github.com/googleapis/python-bigquery/compare/v3.34.0...v3.35.0) (2025-07-15) + + +### Features + +* Add null_markers property to LoadJobConfig and CSVOptions ([#2239](https://github.com/googleapis/python-bigquery/issues/2239)) ([289446d](https://github.com/googleapis/python-bigquery/commit/289446dd8c356d11a0b63b8e6275629b1ae5dc08)) +* Add total slot ms to RowIterator ([#2233](https://github.com/googleapis/python-bigquery/issues/2233)) ([d44bf02](https://github.com/googleapis/python-bigquery/commit/d44bf0231e6e96369e4e03667a3f96618fb664e2)) +* Add UpdateMode to update_dataset ([#2204](https://github.com/googleapis/python-bigquery/issues/2204)) ([eb9c2af](https://github.com/googleapis/python-bigquery/commit/eb9c2aff242c5107f968bbd8b6a9d30cecc877f6)) +* Adds dataset_view parameter to get_dataset method ([#2198](https://github.com/googleapis/python-bigquery/issues/2198)) ([28a5750](https://github.com/googleapis/python-bigquery/commit/28a5750d455f0381548df6f9b1f7661823837d81)) +* Adds date_format to load job and external config ([#2231](https://github.com/googleapis/python-bigquery/issues/2231)) ([7d31828](https://github.com/googleapis/python-bigquery/commit/7d3182802deccfceb0646b87fc8d12275d0a569b)) +* Adds datetime_format as an option ([#2236](https://github.com/googleapis/python-bigquery/issues/2236)) ([54d3dc6](https://github.com/googleapis/python-bigquery/commit/54d3dc66244d50a031e3c80d43d372d2743ecbc3)) +* Adds source_column_match and associated tests ([#2227](https://github.com/googleapis/python-bigquery/issues/2227)) ([6d5d236](https://github.com/googleapis/python-bigquery/commit/6d5d23685cd457d85955356705c1101e9ec3cdcd)) +* Adds time_format and timestamp_format and associated tests ([#2238](https://github.com/googleapis/python-bigquery/issues/2238)) ([371ad29](https://github.com/googleapis/python-bigquery/commit/371ad292df537278767dba71d81822ed57dd8e7d)) +* Adds time_zone to external config and load job ([#2229](https://github.com/googleapis/python-bigquery/issues/2229)) ([b2300d0](https://github.com/googleapis/python-bigquery/commit/b2300d032843512b7e4a5703377632fe60ef3f8d)) + + +### Bug Fixes + +* Adds magics.context.project to eliminate issues with unit tests … ([#2228](https://github.com/googleapis/python-bigquery/issues/2228)) ([27ff3a8](https://github.com/googleapis/python-bigquery/commit/27ff3a89a5f97305fa3ff673aa9183baa7df200f)) +* Fix rows returned when both start_index and page_size are provided ([#2181](https://github.com/googleapis/python-bigquery/issues/2181)) ([45643a2](https://github.com/googleapis/python-bigquery/commit/45643a2e20ce5d503118522dd195aeca00dec3bc)) +* Make AccessEntry equality consistent with from_api_repr ([#2218](https://github.com/googleapis/python-bigquery/issues/2218)) ([4941de4](https://github.com/googleapis/python-bigquery/commit/4941de441cb32cabeb55ec0320f305fb62551155)) +* Update type hints for various BigQuery files ([#2206](https://github.com/googleapis/python-bigquery/issues/2206)) ([b863291](https://github.com/googleapis/python-bigquery/commit/b86329188ba35e61871db82ae1d95d2a576eed1b)) + + +### Documentation + +* Improve clarity of "Output Only" fields in Dataset class ([#2201](https://github.com/googleapis/python-bigquery/issues/2201)) ([bd5aba8](https://github.com/googleapis/python-bigquery/commit/bd5aba8ba40c2f35fb672a68eed11d6baedb304f)) + ## [3.34.0](https://github.com/googleapis/python-bigquery/compare/v3.33.0...v3.34.0) (2025-05-27) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/version.py b/packages/google-cloud-bigquery/google/cloud/bigquery/version.py index 9e139385419b..0107ae309b8c 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/version.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/version.py @@ -12,4 +12,4 @@ # See the License for the specific language governing permissions and # limitations under the License. -__version__ = "3.34.0" +__version__ = "3.35.0" From 5f9721bda14d2c8588da31a52fed21ac5ec73e11 Mon Sep 17 00:00:00 2001 From: Mend Renovate Date: Wed, 16 Jul 2025 19:22:36 +0200 Subject: [PATCH 1978/2016] chore(deps): update all dependencies to v3.35.0 (#2242) --- .../google-cloud-bigquery/samples/desktopapp/requirements.txt | 2 +- .../google-cloud-bigquery/samples/geography/requirements.txt | 2 +- packages/google-cloud-bigquery/samples/magics/requirements.txt | 2 +- .../google-cloud-bigquery/samples/notebooks/requirements.txt | 2 +- .../google-cloud-bigquery/samples/snippets/requirements.txt | 2 +- 5 files changed, 5 insertions(+), 5 deletions(-) diff --git a/packages/google-cloud-bigquery/samples/desktopapp/requirements.txt b/packages/google-cloud-bigquery/samples/desktopapp/requirements.txt index a512dbd3a7b5..54b708ca82df 100644 --- a/packages/google-cloud-bigquery/samples/desktopapp/requirements.txt +++ b/packages/google-cloud-bigquery/samples/desktopapp/requirements.txt @@ -1,2 +1,2 @@ -google-cloud-bigquery==3.34.0 +google-cloud-bigquery==3.35.0 google-auth-oauthlib==1.2.2 diff --git a/packages/google-cloud-bigquery/samples/geography/requirements.txt b/packages/google-cloud-bigquery/samples/geography/requirements.txt index e932625b8e3b..5b85a9bfe0b3 100644 --- a/packages/google-cloud-bigquery/samples/geography/requirements.txt +++ b/packages/google-cloud-bigquery/samples/geography/requirements.txt @@ -13,7 +13,7 @@ geopandas===1.0.1; python_version <= '3.9' geopandas==1.1.1; python_version >= '3.10' google-api-core==2.25.1 google-auth==2.40.3 -google-cloud-bigquery==3.34.0 +google-cloud-bigquery==3.35.0 google-cloud-bigquery-storage==2.32.0 google-cloud-core==2.4.3 google-crc32c==1.7.1 diff --git a/packages/google-cloud-bigquery/samples/magics/requirements.txt b/packages/google-cloud-bigquery/samples/magics/requirements.txt index b53a35982dc8..5c48d707fe41 100644 --- a/packages/google-cloud-bigquery/samples/magics/requirements.txt +++ b/packages/google-cloud-bigquery/samples/magics/requirements.txt @@ -1,6 +1,6 @@ bigquery_magics==0.10.1 db-dtypes==1.4.3 -google.cloud.bigquery==3.34.0 +google.cloud.bigquery==3.35.0 google-cloud-bigquery-storage==2.32.0 ipython===8.18.1 pandas==2.3.1 diff --git a/packages/google-cloud-bigquery/samples/notebooks/requirements.txt b/packages/google-cloud-bigquery/samples/notebooks/requirements.txt index 4b134ac9dd3f..88f725bb4ab7 100644 --- a/packages/google-cloud-bigquery/samples/notebooks/requirements.txt +++ b/packages/google-cloud-bigquery/samples/notebooks/requirements.txt @@ -1,6 +1,6 @@ bigquery-magics==0.10.1 db-dtypes==1.4.3 -google-cloud-bigquery==3.34.0 +google-cloud-bigquery==3.35.0 google-cloud-bigquery-storage==2.32.0 ipython===8.18.1; python_version == '3.9' ipython==9.4.0; python_version >= '3.10' diff --git a/packages/google-cloud-bigquery/samples/snippets/requirements.txt b/packages/google-cloud-bigquery/samples/snippets/requirements.txt index fd8bd672bcdc..e43cb04e9ec3 100644 --- a/packages/google-cloud-bigquery/samples/snippets/requirements.txt +++ b/packages/google-cloud-bigquery/samples/snippets/requirements.txt @@ -1,2 +1,2 @@ # samples/snippets should be runnable with no "extras" -google-cloud-bigquery==3.34.0 +google-cloud-bigquery==3.35.0 From dcfeae1513ed46bdb6174e7d69c960570dab920b Mon Sep 17 00:00:00 2001 From: shollyman Date: Mon, 21 Jul 2025 17:34:50 -0500 Subject: [PATCH 1979/2016] docs: specify the inherited-members directive for job classes (#2244) It seems that versions of python earlier than 3.10 may have had issues processing inherited members annotations, and accidentally include inherited members by default. As we recently worked to excise older versions of python in this repo, it seems we're now correctly processing sphinx directives, which means we no longer emit docstrings for inherited members. This PR adds a minor sphinx directive to include inherited members for the job classes, and I've confirmed locally by running the `docsfx` nox job that the inherited members do now get included in the docfx_yaml output. --- packages/google-cloud-bigquery/docs/reference.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/packages/google-cloud-bigquery/docs/reference.rst b/packages/google-cloud-bigquery/docs/reference.rst index 6c00df0771c2..d24a735965ab 100644 --- a/packages/google-cloud-bigquery/docs/reference.rst +++ b/packages/google-cloud-bigquery/docs/reference.rst @@ -22,6 +22,7 @@ Job === .. automodule:: google.cloud.bigquery.job + :inherited-members: .. toctree:: :maxdepth: 2 From 50f54f4a36363b60b9346427c30f8ad786b96e1c Mon Sep 17 00:00:00 2001 From: "release-please[bot]" <55107282+release-please[bot]@users.noreply.github.com> Date: Thu, 24 Jul 2025 10:57:43 -0400 Subject: [PATCH 1980/2016] chore(main): release 3.35.1 (#2245) Co-authored-by: release-please[bot] <55107282+release-please[bot]@users.noreply.github.com> --- packages/google-cloud-bigquery/CHANGELOG.md | 7 +++++++ .../google-cloud-bigquery/google/cloud/bigquery/version.py | 2 +- 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/CHANGELOG.md b/packages/google-cloud-bigquery/CHANGELOG.md index e4574aa7ab83..374448a5e91f 100644 --- a/packages/google-cloud-bigquery/CHANGELOG.md +++ b/packages/google-cloud-bigquery/CHANGELOG.md @@ -5,6 +5,13 @@ [1]: https://pypi.org/project/google-cloud-bigquery/#history +## [3.35.1](https://github.com/googleapis/python-bigquery/compare/v3.35.0...v3.35.1) (2025-07-21) + + +### Documentation + +* Specify the inherited-members directive for job classes ([#2244](https://github.com/googleapis/python-bigquery/issues/2244)) ([d207f65](https://github.com/googleapis/python-bigquery/commit/d207f6539b7a4c248a5de5719d7f384abbe20abe)) + ## [3.35.0](https://github.com/googleapis/python-bigquery/compare/v3.34.0...v3.35.0) (2025-07-15) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/version.py b/packages/google-cloud-bigquery/google/cloud/bigquery/version.py index 0107ae309b8c..d565bc46e0fd 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/version.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/version.py @@ -12,4 +12,4 @@ # See the License for the specific language governing permissions and # limitations under the License. -__version__ = "3.35.0" +__version__ = "3.35.1" From f98bde72ce69c5a4a1d9a061452530780fc278a9 Mon Sep 17 00:00:00 2001 From: Mend Renovate Date: Sat, 26 Jul 2025 00:09:47 +0200 Subject: [PATCH 1981/2016] chore(deps): update all dependencies (#2243) --- .../samples/desktopapp/requirements.txt | 2 +- .../samples/geography/requirements.txt | 6 +++--- .../google-cloud-bigquery/samples/magics/requirements.txt | 2 +- .../samples/notebooks/requirements.txt | 2 +- .../google-cloud-bigquery/samples/snippets/requirements.txt | 2 +- 5 files changed, 7 insertions(+), 7 deletions(-) diff --git a/packages/google-cloud-bigquery/samples/desktopapp/requirements.txt b/packages/google-cloud-bigquery/samples/desktopapp/requirements.txt index 54b708ca82df..e7a02eca5dd7 100644 --- a/packages/google-cloud-bigquery/samples/desktopapp/requirements.txt +++ b/packages/google-cloud-bigquery/samples/desktopapp/requirements.txt @@ -1,2 +1,2 @@ -google-cloud-bigquery==3.35.0 +google-cloud-bigquery==3.35.1 google-auth-oauthlib==1.2.2 diff --git a/packages/google-cloud-bigquery/samples/geography/requirements.txt b/packages/google-cloud-bigquery/samples/geography/requirements.txt index 5b85a9bfe0b3..4f2c0aff44f7 100644 --- a/packages/google-cloud-bigquery/samples/geography/requirements.txt +++ b/packages/google-cloud-bigquery/samples/geography/requirements.txt @@ -13,20 +13,20 @@ geopandas===1.0.1; python_version <= '3.9' geopandas==1.1.1; python_version >= '3.10' google-api-core==2.25.1 google-auth==2.40.3 -google-cloud-bigquery==3.35.0 +google-cloud-bigquery==3.35.1 google-cloud-bigquery-storage==2.32.0 google-cloud-core==2.4.3 google-crc32c==1.7.1 google-resumable-media==2.7.2 googleapis-common-protos==1.70.0 -grpcio==1.73.1 +grpcio==1.74.0 idna==3.10 munch==4.0.0 mypy-extensions==1.1.0 packaging==25.0 pandas==2.3.1 proto-plus==1.26.1 -pyarrow==20.0.0 +pyarrow==21.0.0 pyasn1==0.6.1 pyasn1-modules==0.4.2 pycparser==2.22 diff --git a/packages/google-cloud-bigquery/samples/magics/requirements.txt b/packages/google-cloud-bigquery/samples/magics/requirements.txt index 5c48d707fe41..e72d2822ecfd 100644 --- a/packages/google-cloud-bigquery/samples/magics/requirements.txt +++ b/packages/google-cloud-bigquery/samples/magics/requirements.txt @@ -1,6 +1,6 @@ bigquery_magics==0.10.1 db-dtypes==1.4.3 -google.cloud.bigquery==3.35.0 +google.cloud.bigquery==3.35.1 google-cloud-bigquery-storage==2.32.0 ipython===8.18.1 pandas==2.3.1 diff --git a/packages/google-cloud-bigquery/samples/notebooks/requirements.txt b/packages/google-cloud-bigquery/samples/notebooks/requirements.txt index 88f725bb4ab7..c9aed9e58f77 100644 --- a/packages/google-cloud-bigquery/samples/notebooks/requirements.txt +++ b/packages/google-cloud-bigquery/samples/notebooks/requirements.txt @@ -1,6 +1,6 @@ bigquery-magics==0.10.1 db-dtypes==1.4.3 -google-cloud-bigquery==3.35.0 +google-cloud-bigquery==3.35.1 google-cloud-bigquery-storage==2.32.0 ipython===8.18.1; python_version == '3.9' ipython==9.4.0; python_version >= '3.10' diff --git a/packages/google-cloud-bigquery/samples/snippets/requirements.txt b/packages/google-cloud-bigquery/samples/snippets/requirements.txt index e43cb04e9ec3..afa62b6b8e6a 100644 --- a/packages/google-cloud-bigquery/samples/snippets/requirements.txt +++ b/packages/google-cloud-bigquery/samples/snippets/requirements.txt @@ -1,2 +1,2 @@ # samples/snippets should be runnable with no "extras" -google-cloud-bigquery==3.35.0 +google-cloud-bigquery==3.35.1 From 532f19e3a981547c95d3a20ce89c20c0086ee03f Mon Sep 17 00:00:00 2001 From: Mend Renovate Date: Thu, 31 Jul 2025 23:54:06 +0200 Subject: [PATCH 1982/2016] chore(deps): update dependency matplotlib to v3.10.5 (#2251) --- .../google-cloud-bigquery/samples/notebooks/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/samples/notebooks/requirements.txt b/packages/google-cloud-bigquery/samples/notebooks/requirements.txt index c9aed9e58f77..a2f0cb44ae01 100644 --- a/packages/google-cloud-bigquery/samples/notebooks/requirements.txt +++ b/packages/google-cloud-bigquery/samples/notebooks/requirements.txt @@ -5,5 +5,5 @@ google-cloud-bigquery-storage==2.32.0 ipython===8.18.1; python_version == '3.9' ipython==9.4.0; python_version >= '3.10' matplotlib===3.9.2; python_version == '3.9' -matplotlib==3.10.3; python_version >= '3.10' +matplotlib==3.10.5; python_version >= '3.10' pandas==2.3.1 From 13324c09d9032c7c6fa5709172c500ddc7ab4966 Mon Sep 17 00:00:00 2001 From: Dan Lee <71398022+dandhlee@users.noreply.github.com> Date: Tue, 5 Aug 2025 04:41:40 -0400 Subject: [PATCH 1983/2016] docs: update README to break infinite redirect loop (#2254) --- packages/google-cloud-bigquery/README.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/README.rst b/packages/google-cloud-bigquery/README.rst index 29e15e067052..23ed9257ddc4 100644 --- a/packages/google-cloud-bigquery/README.rst +++ b/packages/google-cloud-bigquery/README.rst @@ -18,7 +18,7 @@ processing power of Google's infrastructure. .. |versions| image:: https://img.shields.io/pypi/pyversions/google-cloud-bigquery.svg :target: https://pypi.org/project/google-cloud-bigquery/ .. _BigQuery: https://cloud.google.com/bigquery/what-is-bigquery -.. _Client Library Documentation: https://googleapis.dev/python/bigquery/latest +.. _Client Library Documentation: https://cloud.google.com/python/docs/reference/bigquery/latest/summary_overview .. _Product Documentation: https://cloud.google.com/bigquery/docs/reference/v2/ Quick Start From 020041c5cad1ed819de9e309ad6f6d7cba020d83 Mon Sep 17 00:00:00 2001 From: Mend Renovate Date: Tue, 5 Aug 2025 14:45:23 +0200 Subject: [PATCH 1984/2016] chore(deps): update all dependencies (#2253) Co-authored-by: Leah E. Cole <6719667+leahecole@users.noreply.github.com> --- .../google-cloud-bigquery/samples/geography/requirements.txt | 2 +- packages/google-cloud-bigquery/samples/magics/requirements.txt | 2 +- .../google-cloud-bigquery/samples/notebooks/requirements.txt | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/packages/google-cloud-bigquery/samples/geography/requirements.txt b/packages/google-cloud-bigquery/samples/geography/requirements.txt index 4f2c0aff44f7..fa54cc2297ee 100644 --- a/packages/google-cloud-bigquery/samples/geography/requirements.txt +++ b/packages/google-cloud-bigquery/samples/geography/requirements.txt @@ -1,5 +1,5 @@ attrs==25.3.0 -certifi==2025.7.14 +certifi==2025.8.3 cffi==1.17.1 charset-normalizer==3.4.2 click===8.1.8; python_version == '3.9' diff --git a/packages/google-cloud-bigquery/samples/magics/requirements.txt b/packages/google-cloud-bigquery/samples/magics/requirements.txt index e72d2822ecfd..e7230053c622 100644 --- a/packages/google-cloud-bigquery/samples/magics/requirements.txt +++ b/packages/google-cloud-bigquery/samples/magics/requirements.txt @@ -1,4 +1,4 @@ -bigquery_magics==0.10.1 +bigquery_magics==0.10.2 db-dtypes==1.4.3 google.cloud.bigquery==3.35.1 google-cloud-bigquery-storage==2.32.0 diff --git a/packages/google-cloud-bigquery/samples/notebooks/requirements.txt b/packages/google-cloud-bigquery/samples/notebooks/requirements.txt index a2f0cb44ae01..829f08f472e4 100644 --- a/packages/google-cloud-bigquery/samples/notebooks/requirements.txt +++ b/packages/google-cloud-bigquery/samples/notebooks/requirements.txt @@ -1,4 +1,4 @@ -bigquery-magics==0.10.1 +bigquery-magics==0.10.2 db-dtypes==1.4.3 google-cloud-bigquery==3.35.1 google-cloud-bigquery-storage==2.32.0 From 8386be61d9a9a254a9be2d9d49632a43c2c6865e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tim=20Swe=C3=B1a=20=28Swast=29?= Date: Wed, 6 Aug 2025 04:50:30 -0500 Subject: [PATCH 1985/2016] feat: retry query jobs if `jobBackendError` or `jobInternalError` are encountered (#2256) * feat: retry query jobs if `jobBackendError` or `jobInternalError` are encountered * Update google/cloud/bigquery/retry.py --- packages/google-cloud-bigquery/google/cloud/bigquery/retry.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/retry.py b/packages/google-cloud-bigquery/google/cloud/bigquery/retry.py index 999d0e851ee5..8f469f2d33ae 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/retry.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/retry.py @@ -124,6 +124,8 @@ def _should_retry_get_job_conflict(exc): "rateLimitExceeded", "backendError", "internalError", + "jobBackendError", + "jobInternalError", "jobRateLimitExceeded", ) From 2f4a20fc7ce6cb9edaa2821fb2101e0e8f6fb9ae Mon Sep 17 00:00:00 2001 From: Huan Chen <142538604+Genesis929@users.noreply.github.com> Date: Thu, 14 Aug 2025 15:28:56 -0700 Subject: [PATCH 1986/2016] feat: add created/started/ended properties to RowIterator. (#2260) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * feat: add created/started/ended attribute to RowIterator. * fix annotation * links update * mypy fix * Update google/cloud/bigquery/query.py Co-authored-by: Tim Sweña (Swast) * Update google/cloud/bigquery/table.py Co-authored-by: Tim Sweña (Swast) * Update google/cloud/bigquery/table.py Co-authored-by: Tim Sweña (Swast) * Update google/cloud/bigquery/query.py Co-authored-by: Tim Sweña (Swast) * Update google/cloud/bigquery/query.py Co-authored-by: Tim Sweña (Swast) * Update google/cloud/bigquery/job/query.py Co-authored-by: Tim Sweña (Swast) * fix unit test --------- Co-authored-by: Tim Sweña (Swast) Co-authored-by: Lingqing Gan --- .../google/cloud/bigquery/_job_helpers.py | 3 ++ .../google/cloud/bigquery/client.py | 12 +++++ .../google/cloud/bigquery/job/query.py | 3 ++ .../google/cloud/bigquery/query.py | 52 ++++++++++++++++++- .../google/cloud/bigquery/table.py | 31 ++++++++++- .../tests/unit/job/test_query.py | 6 +++ .../tests/unit/test_client.py | 6 +++ .../tests/unit/test_query.py | 48 +++++++++++++++++ 8 files changed, 159 insertions(+), 2 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/_job_helpers.py b/packages/google-cloud-bigquery/google/cloud/bigquery/_job_helpers.py index 73d4f6e7bc27..aa0b115d9618 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/_job_helpers.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/_job_helpers.py @@ -561,6 +561,9 @@ def do_query(): query=query, total_bytes_processed=query_results.total_bytes_processed, slot_millis=query_results.slot_millis, + created=query_results.created, + started=query_results.started, + ended=query_results.ended, ) if job_retry is not None: diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py index 804f77ea2871..8048452db6b6 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py @@ -4145,6 +4145,9 @@ def _list_rows_from_query_results( query: Optional[str] = None, total_bytes_processed: Optional[int] = None, slot_millis: Optional[int] = None, + created: Optional[datetime.datetime] = None, + started: Optional[datetime.datetime] = None, + ended: Optional[datetime.datetime] = None, ) -> RowIterator: """List the rows of a completed query. See @@ -4198,6 +4201,12 @@ def _list_rows_from_query_results( total bytes processed from job statistics, if present. slot_millis (Optional[int]): Number of slot ms the user is actually billed for. + created (Optional[datetime.datetime]): + Datetime at which the job was created. + started (Optional[datetime.datetime]): + Datetime at which the job was started. + ended (Optional[datetime.datetime]): + Datetime at which the job finished. Returns: google.cloud.bigquery.table.RowIterator: @@ -4238,6 +4247,9 @@ def _list_rows_from_query_results( query=query, total_bytes_processed=total_bytes_processed, slot_millis=slot_millis, + created=created, + started=started, + ended=ended, ) return row_iterator diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/job/query.py b/packages/google-cloud-bigquery/google/cloud/bigquery/job/query.py index ec9379ea9c47..44d8a92e6c4b 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/job/query.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/job/query.py @@ -1767,6 +1767,9 @@ def is_job_done(): query=self.query, total_bytes_processed=self.total_bytes_processed, slot_millis=self.slot_millis, + created=self.created, + started=self.started, + ended=self.ended, **list_rows_kwargs, ) rows._preserve_order = _contains_order_by(self.query) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/query.py b/packages/google-cloud-bigquery/google/cloud/bigquery/query.py index 4a006d621285..58372f1e6a74 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/query.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/query.py @@ -1287,7 +1287,7 @@ def slot_millis(self): """Total number of slot ms the user is actually billed for. See: - https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs/query#body.QueryResponse.FIELDS.slot_millis + https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs/query#body.QueryResponse.FIELDS.total_slot_ms Returns: Optional[int]: Count generated on the server (None until set by the server). @@ -1310,6 +1310,56 @@ def num_dml_affected_rows(self): if num_dml_affected_rows is not None: return int(num_dml_affected_rows) + @property + def created(self): + """Creation time of this query. + + See: + https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs/query#body.QueryResponse.FIELDS.creation_time + + Returns: + Optional[datetime.datetime]: + the creation time (None until set from the server). + """ + millis = self._properties.get("creationTime") + if millis is not None: + return _helpers._datetime_from_microseconds(int(millis) * 1000.0) + + @property + def started(self): + """Start time of this query. + + This field will be present when the query transitions from the + PENDING state to either RUNNING or DONE. + + See: + https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs/query#body.QueryResponse.FIELDS.start_time + + Returns: + Optional[datetime.datetime]: + the start time (None until set from the server). + """ + millis = self._properties.get("startTime") + if millis is not None: + return _helpers._datetime_from_microseconds(int(millis) * 1000.0) + + @property + def ended(self): + """End time of this query. + + This field will be present whenever a query is in the DONE state. + + See: + https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs/query#body.QueryResponse.FIELDS.end_time + + Returns: + Optional[datetime.datetime]: + the end time (None until set from the server). + """ + millis = self._properties.get("endTime") + if millis is not None: + return _helpers._datetime_from_microseconds(int(millis) * 1000.0) + @property def rows(self): """Query results. diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py index dbdde36d1292..a0986c44efd8 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py @@ -1788,7 +1788,15 @@ class RowIterator(HTTPIterator): query (Optional[str]): The query text used. total_bytes_processed (Optional[int]): - total bytes processed from job statistics, if present. + If representing query results, the total bytes processed by the associated query. + slot_millis (Optional[int]): + If representing query results, the number of slot ms billed for the associated query. + created (Optional[datetime.datetime]): + If representing query results, the creation time of the associated query. + started (Optional[datetime.datetime]): + If representing query results, the start time of the associated query. + ended (Optional[datetime.datetime]): + If representing query results, the end time of the associated query. """ def __init__( @@ -1813,6 +1821,9 @@ def __init__( query: Optional[str] = None, total_bytes_processed: Optional[int] = None, slot_millis: Optional[int] = None, + created: Optional[datetime.datetime] = None, + started: Optional[datetime.datetime] = None, + ended: Optional[datetime.datetime] = None, ): super(RowIterator, self).__init__( client, @@ -1843,6 +1854,9 @@ def __init__( self._query = query self._total_bytes_processed = total_bytes_processed self._slot_millis = slot_millis + self._job_created = created + self._job_started = started + self._job_ended = ended @property def _billing_project(self) -> Optional[str]: @@ -1905,6 +1919,21 @@ def slot_millis(self) -> Optional[int]: """Number of slot ms the user is actually billed for.""" return self._slot_millis + @property + def created(self) -> Optional[datetime.datetime]: + """If representing query results, the creation time of the associated query.""" + return self._job_created + + @property + def started(self) -> Optional[datetime.datetime]: + """If representing query results, the start time of the associated query.""" + return self._job_started + + @property + def ended(self) -> Optional[datetime.datetime]: + """If representing query results, the end time of the associated query.""" + return self._job_ended + def _is_almost_completely_cached(self): """Check if all results are completely cached. diff --git a/packages/google-cloud-bigquery/tests/unit/job/test_query.py b/packages/google-cloud-bigquery/tests/unit/job/test_query.py index 7201adb55e83..8f684c3e9172 100644 --- a/packages/google-cloud-bigquery/tests/unit/job/test_query.py +++ b/packages/google-cloud-bigquery/tests/unit/job/test_query.py @@ -889,6 +889,9 @@ def test_result_reloads_job_state_until_done(self): job_resource_done = self._make_resource(started=True, ended=True, location="EU") job_resource_done["statistics"]["query"]["totalBytesProcessed"] = str(1234) job_resource_done["statistics"]["query"]["totalSlotMs"] = str(5678) + job_resource_done["statistics"]["creationTime"] = str(11) + job_resource_done["statistics"]["startTime"] = str(22) + job_resource_done["statistics"]["endTime"] = str(33) job_resource_done["configuration"]["query"]["destinationTable"] = { "projectId": "dest-project", "datasetId": "dest_dataset", @@ -971,6 +974,9 @@ def test_result_reloads_job_state_until_done(self): self.assertEqual(result.query, job.query) self.assertEqual(result.total_bytes_processed, 1234) self.assertEqual(result.slot_millis, 5678) + self.assertEqual(result.created.timestamp() * 1000, 11) + self.assertEqual(result.started.timestamp() * 1000, 22) + self.assertEqual(result.ended.timestamp() * 1000, 33) query_results_path = f"/projects/{self.PROJECT}/queries/{self.JOB_ID}" query_results_call = mock.call( diff --git a/packages/google-cloud-bigquery/tests/unit/test_client.py b/packages/google-cloud-bigquery/tests/unit/test_client.py index bb86ccc3cf31..c3cf33279dfa 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_client.py +++ b/packages/google-cloud-bigquery/tests/unit/test_client.py @@ -5719,6 +5719,9 @@ def test_query_and_wait_defaults(self): "queryId": "job_abcDEF_", "totalBytesProcessed": 1234, "totalSlotMs": 5678, + "creationTime": "1437767599006", + "startTime": "1437767600007", + "endTime": "1437767601008", } creds = _make_credentials() http = object() @@ -5737,6 +5740,9 @@ def test_query_and_wait_defaults(self): self.assertEqual(rows.query, query) self.assertEqual(rows.total_bytes_processed, 1234) self.assertEqual(rows.slot_millis, 5678) + self.assertEqual(rows.created.timestamp() * 1000, 1437767599006) + self.assertEqual(rows.started.timestamp() * 1000, 1437767600007) + self.assertEqual(rows.ended.timestamp() * 1000, 1437767601008) # Verify the request we send is to jobs.query. conn.api_request.assert_called_once() diff --git a/packages/google-cloud-bigquery/tests/unit/test_query.py b/packages/google-cloud-bigquery/tests/unit/test_query.py index 2b704d3c9d2b..adb43bcd9b4f 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_query.py +++ b/packages/google-cloud-bigquery/tests/unit/test_query.py @@ -2016,6 +2016,54 @@ def test_slot_millis_present_string(self): query = self._make_one(resource) self.assertEqual(query.slot_millis, 123456) + def test_created_missing(self): + query = self._make_one(self._make_resource()) + self.assertIsNone(query.created) + + def test_created_present_integer(self): + resource = self._make_resource() + resource["creationTime"] = 1437767599006 + query = self._make_one(resource) + self.assertEqual(query.created.timestamp() * 1000, 1437767599006) + + def test_created_present_string(self): + resource = self._make_resource() + resource["creationTime"] = "1437767599006" + query = self._make_one(resource) + self.assertEqual(query.created.timestamp() * 1000, 1437767599006) + + def test_started_missing(self): + query = self._make_one(self._make_resource()) + self.assertIsNone(query.started) + + def test_started_present_integer(self): + resource = self._make_resource() + resource["startTime"] = 1437767599006 + query = self._make_one(resource) + self.assertEqual(query.started.timestamp() * 1000, 1437767599006) + + def test_started_present_string(self): + resource = self._make_resource() + resource["startTime"] = "1437767599006" + query = self._make_one(resource) + self.assertEqual(query.started.timestamp() * 1000, 1437767599006) + + def test_ended_missing(self): + query = self._make_one(self._make_resource()) + self.assertIsNone(query.ended) + + def test_ended_present_integer(self): + resource = self._make_resource() + resource["endTime"] = 1437767599006 + query = self._make_one(resource) + self.assertEqual(query.ended.timestamp() * 1000, 1437767599006) + + def test_ended_present_string(self): + resource = self._make_resource() + resource["endTime"] = "1437767599006" + query = self._make_one(resource) + self.assertEqual(query.ended.timestamp() * 1000, 1437767599006) + def test_num_dml_affected_rows_missing(self): query = self._make_one(self._make_resource()) self.assertIsNone(query.num_dml_affected_rows) From 14857e240c4f8dde0108035e91349be56d9d8da1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tim=20Swe=C3=B1a=20=28Swast=29?= Date: Mon, 18 Aug 2025 10:39:54 -0500 Subject: [PATCH 1987/2016] chore: add private `_query_and_wait_bigframes` method (#2250) * chore: add private `_query_and_wait_bigframes` method Towards internal issue b/409104302 * fix unit tests * revert type hints * lint * Apply suggestions from code review Co-authored-by: Chalmer Lowe * populate created, started, ended --------- Co-authored-by: Chalmer Lowe --- .../google/cloud/bigquery/_job_helpers.py | 161 ++++++- .../google/cloud/bigquery/client.py | 40 +- .../google/cloud/bigquery/job/query.py | 6 + .../google/cloud/bigquery/query.py | 9 +- .../google/cloud/bigquery/table.py | 3 +- .../tests/unit/test_client_bigframes.py | 411 ++++++++++++++++++ 6 files changed, 619 insertions(+), 11 deletions(-) create mode 100644 packages/google-cloud-bigquery/tests/unit/test_client_bigframes.py diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/_job_helpers.py b/packages/google-cloud-bigquery/google/cloud/bigquery/_job_helpers.py index aa0b115d9618..6fd561f8c3a0 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/_job_helpers.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/_job_helpers.py @@ -35,17 +35,22 @@ predicates where it is safe to generate a new query ID. """ +from __future__ import annotations + import copy +import dataclasses +import datetime import functools import uuid import textwrap -from typing import Any, Dict, Optional, TYPE_CHECKING, Union +from typing import Any, Callable, Dict, Optional, TYPE_CHECKING, Union import warnings import google.api_core.exceptions as core_exceptions from google.api_core import retry as retries from google.cloud.bigquery import job +import google.cloud.bigquery.job.query import google.cloud.bigquery.query from google.cloud.bigquery import table import google.cloud.bigquery.retry @@ -116,14 +121,21 @@ def query_jobs_insert( retry: Optional[retries.Retry], timeout: Optional[float], job_retry: Optional[retries.Retry], + *, + callback: Callable = lambda _: None, ) -> job.QueryJob: """Initiate a query using jobs.insert. See: https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs/insert + + Args: + callback (Callable): + A callback function used by bigframes to report query progress. """ job_id_given = job_id is not None job_id_save = job_id job_config_save = job_config + query_sent_factory = QuerySentEventFactory() def do_query(): # Make a copy now, so that original doesn't get changed by the process @@ -136,6 +148,16 @@ def do_query(): try: query_job._begin(retry=retry, timeout=timeout) + if job_config is not None and not job_config.dry_run: + callback( + query_sent_factory( + query=query, + billing_project=query_job.project, + location=query_job.location, + job_id=query_job.job_id, + request_id=None, + ) + ) except core_exceptions.Conflict as create_exc: # The thought is if someone is providing their own job IDs and they get # their job ID generation wrong, this could end up returning results for @@ -396,6 +418,7 @@ def query_and_wait( job_retry: Optional[retries.Retry], page_size: Optional[int] = None, max_results: Optional[int] = None, + callback: Callable = lambda _: None, ) -> table.RowIterator: """Run the query, wait for it to finish, and return the results. @@ -415,9 +438,8 @@ def query_and_wait( location (Optional[str]): Location where to run the job. Must match the location of the table used in the query as well as the destination table. - project (Optional[str]): - Project ID of the project of where to run the job. Defaults - to the client's project. + project (str): + Project ID of the project of where to run the job. api_timeout (Optional[float]): The number of seconds to wait for the underlying HTTP transport before using ``retry``. @@ -441,6 +463,8 @@ def query_and_wait( request. Non-positive values are ignored. max_results (Optional[int]): The maximum total number of rows from this request. + callback (Callable): + A callback function used by bigframes to report query progress. Returns: google.cloud.bigquery.table.RowIterator: @@ -479,12 +503,14 @@ def query_and_wait( retry=retry, timeout=api_timeout, job_retry=job_retry, + callback=callback, ), api_timeout=api_timeout, wait_timeout=wait_timeout, retry=retry, page_size=page_size, max_results=max_results, + callback=callback, ) path = _to_query_path(project) @@ -496,10 +522,24 @@ def query_and_wait( if client.default_job_creation_mode: request_body["jobCreationMode"] = client.default_job_creation_mode + query_sent_factory = QuerySentEventFactory() + def do_query(): - request_body["requestId"] = make_job_id() + request_id = make_job_id() + request_body["requestId"] = request_id span_attributes = {"path": path} + if "dryRun" not in request_body: + callback( + query_sent_factory( + query=query, + billing_project=project, + location=location, + job_id=None, + request_id=request_id, + ) + ) + # For easier testing, handle the retries ourselves. if retry is not None: response = retry(client._call_api)( @@ -542,8 +582,25 @@ def do_query(): retry=retry, page_size=page_size, max_results=max_results, + callback=callback, ) + if "dryRun" not in request_body: + callback( + QueryFinishedEvent( + billing_project=project, + location=query_results.location, + query_id=query_results.query_id, + job_id=query_results.job_id, + total_rows=query_results.total_rows, + total_bytes_processed=query_results.total_bytes_processed, + slot_millis=query_results.slot_millis, + destination=None, + created=query_results.created, + started=query_results.started, + ended=query_results.ended, + ) + ) return table.RowIterator( client=client, api_request=functools.partial(client._call_api, retry, timeout=api_timeout), @@ -614,6 +671,8 @@ def _wait_or_cancel( retry: Optional[retries.Retry], page_size: Optional[int], max_results: Optional[int], + *, + callback: Callable = lambda _: None, ) -> table.RowIterator: """Wait for a job to complete and return the results. @@ -621,12 +680,43 @@ def _wait_or_cancel( the job. """ try: - return job.result( + if not job.dry_run: + callback( + QueryReceivedEvent( + billing_project=job.project, + location=job.location, + job_id=job.job_id, + statement_type=job.statement_type, + state=job.state, + query_plan=job.query_plan, + created=job.created, + started=job.started, + ended=job.ended, + ) + ) + query_results = job.result( page_size=page_size, max_results=max_results, retry=retry, timeout=wait_timeout, ) + if not job.dry_run: + callback( + QueryFinishedEvent( + billing_project=job.project, + location=query_results.location, + query_id=query_results.query_id, + job_id=query_results.job_id, + total_rows=query_results.total_rows, + total_bytes_processed=query_results.total_bytes_processed, + slot_millis=query_results.slot_millis, + destination=job.destination, + created=job.created, + started=job.started, + ended=job.ended, + ) + ) + return query_results except Exception: # Attempt to cancel the job since we can't return the results. try: @@ -635,3 +725,62 @@ def _wait_or_cancel( # Don't eat the original exception if cancel fails. pass raise + + +@dataclasses.dataclass(frozen=True) +class QueryFinishedEvent: + """Query finished successfully.""" + + billing_project: Optional[str] + location: Optional[str] + query_id: Optional[str] + job_id: Optional[str] + destination: Optional[table.TableReference] + total_rows: Optional[int] + total_bytes_processed: Optional[int] + slot_millis: Optional[int] + created: Optional[datetime.datetime] + started: Optional[datetime.datetime] + ended: Optional[datetime.datetime] + + +@dataclasses.dataclass(frozen=True) +class QueryReceivedEvent: + """Query received and acknowledged by the BigQuery API.""" + + billing_project: Optional[str] + location: Optional[str] + job_id: Optional[str] + statement_type: Optional[str] + state: Optional[str] + query_plan: Optional[list[google.cloud.bigquery.job.query.QueryPlanEntry]] + created: Optional[datetime.datetime] + started: Optional[datetime.datetime] + ended: Optional[datetime.datetime] + + +@dataclasses.dataclass(frozen=True) +class QuerySentEvent: + """Query sent to BigQuery.""" + + query: str + billing_project: Optional[str] + location: Optional[str] + job_id: Optional[str] + request_id: Optional[str] + + +class QueryRetryEvent(QuerySentEvent): + """Query sent another time because the previous attempt failed.""" + + +class QuerySentEventFactory: + """Creates a QuerySentEvent first, then QueryRetryEvent after that.""" + + def __init__(self): + self._event_constructor = QuerySentEvent + + def __call__(self, **kwargs): + result = self._event_constructor(**kwargs) + self._event_constructor = QueryRetryEvent + return result diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py index 8048452db6b6..4ca2cb4283dc 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py @@ -15,6 +15,7 @@ """Client for interacting with the Google BigQuery API.""" from __future__ import absolute_import +from __future__ import annotations from __future__ import division from collections import abc as collections_abc @@ -31,6 +32,7 @@ import typing from typing import ( Any, + Callable, Dict, IO, Iterable, @@ -3633,8 +3635,8 @@ def query_and_wait( rate-limit-exceeded errors. Passing ``None`` disables job retry. Not all jobs can be retried. page_size (Optional[int]): - The maximum number of rows in each page of results from this - request. Non-positive values are ignored. + The maximum number of rows in each page of results from the + initial jobs.query request. Non-positive values are ignored. max_results (Optional[int]): The maximum total number of rows from this request. @@ -3656,6 +3658,39 @@ def query_and_wait( :class:`~google.cloud.bigquery.job.QueryJobConfig` class. """ + return self._query_and_wait_bigframes( + query, + job_config=job_config, + location=location, + project=project, + api_timeout=api_timeout, + wait_timeout=wait_timeout, + retry=retry, + job_retry=job_retry, + page_size=page_size, + max_results=max_results, + ) + + def _query_and_wait_bigframes( + self, + query, + *, + job_config: Optional[QueryJobConfig] = None, + location: Optional[str] = None, + project: Optional[str] = None, + api_timeout: TimeoutType = DEFAULT_TIMEOUT, + wait_timeout: Union[Optional[float], object] = POLLING_DEFAULT_VALUE, + retry: retries.Retry = DEFAULT_RETRY, + job_retry: retries.Retry = DEFAULT_JOB_RETRY, + page_size: Optional[int] = None, + max_results: Optional[int] = None, + callback: Callable = lambda _: None, + ) -> RowIterator: + """See query_and_wait. + + This method has an extra callback parameter, which is used by bigframes + to create better progress bars. + """ if project is None: project = self.project @@ -3681,6 +3716,7 @@ def query_and_wait( job_retry=job_retry, page_size=page_size, max_results=max_results, + callback=callback, ) def insert_rows( diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/job/query.py b/packages/google-cloud-bigquery/google/cloud/bigquery/job/query.py index 44d8a92e6c4b..b377f979dadd 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/job/query.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/job/query.py @@ -1550,6 +1550,8 @@ def result( # type: ignore # (incompatible with supertype) return _EmptyRowIterator( project=self.project, location=self.location, + schema=self.schema, + total_bytes_processed=self.total_bytes_processed, # Intentionally omit job_id and query_id since this doesn't # actually correspond to a finished query job. ) @@ -1737,7 +1739,11 @@ def is_job_done(): project=self.project, job_id=self.job_id, query_id=self.query_id, + schema=self.schema, num_dml_affected_rows=self._query_results.num_dml_affected_rows, + query=self.query, + total_bytes_processed=self.total_bytes_processed, + slot_millis=self.slot_millis, ) # We know that there's at least 1 row, so only treat the response from diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/query.py b/packages/google-cloud-bigquery/google/cloud/bigquery/query.py index 58372f1e6a74..7f70f6a2a87a 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/query.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/query.py @@ -1228,11 +1228,18 @@ def location(self): See: https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs/query#body.QueryResponse.FIELDS.job_reference + or https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs/query#body.QueryResponse.FIELDS.location Returns: str: Job ID of the query job. """ - return self._properties.get("jobReference", {}).get("location") + location = self._properties.get("jobReference", {}).get("location") + + # Sometimes there's no job, but we still want to get the location + # information. Prefer the value from job for backwards compatibilitity. + if not location: + location = self._properties.get("location") + return location @property def query_id(self) -> Optional[str]: diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py index a0986c44efd8..219b314678c6 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py @@ -1837,7 +1837,7 @@ def __init__( page_start=_rows_page_start, next_token="pageToken", ) - schema = _to_schema_fields(schema) + schema = _to_schema_fields(schema) if schema else () self._field_to_index = _helpers._field_to_index_mapping(schema) self._page_size = page_size self._preserve_order = False @@ -2917,7 +2917,6 @@ class _EmptyRowIterator(RowIterator): statements. """ - schema = () pages = () total_rows = 0 diff --git a/packages/google-cloud-bigquery/tests/unit/test_client_bigframes.py b/packages/google-cloud-bigquery/tests/unit/test_client_bigframes.py new file mode 100644 index 000000000000..0fcc31e40d3b --- /dev/null +++ b/packages/google-cloud-bigquery/tests/unit/test_client_bigframes.py @@ -0,0 +1,411 @@ +# Copyright 2025 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Tests for Client features enabling the bigframes integration.""" + +from __future__ import annotations + +import datetime +from unittest import mock + +import pytest + +import google.auth.credentials +from google.api_core import exceptions +from google.cloud import bigquery +import google.cloud.bigquery.client +from google.cloud.bigquery import _job_helpers + + +PROJECT = "test-project" +LOCATION = "test-location" + + +def make_response(body, *, status_code: int = 200): + response = mock.Mock() + type(response).status_code = mock.PropertyMock(return_value=status_code) + response.json.return_value = body + return response + + +@pytest.fixture +def client(): + """A real client object with mocked API requests.""" + credentials = mock.create_autospec( + google.auth.credentials.Credentials, instance=True + ) + http_session = mock.Mock() + return google.cloud.bigquery.client.Client( + project=PROJECT, + credentials=credentials, + _http=http_session, + location=LOCATION, + ) + + +def test_query_and_wait_bigframes_dry_run_no_callback(client): + client._http.request.side_effect = [ + make_response( + { + # https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs/query + "location": LOCATION, + "queryId": "abcdefg", + "totalBytesProcessed": "123", + "jobComplete": True, + } + ), + ] + callback = mock.Mock() + job_config = bigquery.QueryJobConfig(dry_run=True) + response = client._query_and_wait_bigframes( + query="SELECT 1", job_config=job_config, callback=callback + ) + callback.assert_not_called() + assert response.total_bytes_processed == 123 + assert response.query_id == "abcdefg" + + +def test_query_and_wait_bigframes_callback(client): + created = datetime.datetime( + 2025, 8, 18, 10, 11, 12, 345000, tzinfo=datetime.timezone.utc + ) + started = datetime.datetime( + 2025, 8, 18, 10, 11, 13, 456000, tzinfo=datetime.timezone.utc + ) + ended = datetime.datetime( + 2025, 8, 18, 10, 11, 14, 567000, tzinfo=datetime.timezone.utc + ) + client._http.request.side_effect = [ + make_response( + { + # https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs/query + "location": LOCATION, + "queryId": "abcdefg", + "totalRows": "100", + "totalBytesProcessed": "123", + "totalSlotMs": "987", + "jobComplete": True, + "creationTime": _to_millis(created), + "startTime": _to_millis(started), + "endTime": _to_millis(ended), + } + ), + ] + callback = mock.Mock() + client._query_and_wait_bigframes(query="SELECT 1", callback=callback) + callback.assert_has_calls( + [ + mock.call( + _job_helpers.QuerySentEvent( + query="SELECT 1", + billing_project=PROJECT, + location=LOCATION, + # No job ID, because a basic query is eligible for jobs.query. + job_id=None, + request_id=mock.ANY, + ) + ), + mock.call( + _job_helpers.QueryFinishedEvent( + billing_project=PROJECT, + location=LOCATION, + query_id="abcdefg", + total_rows=100, + total_bytes_processed=123, + slot_millis=987, + created=created, + started=started, + ended=ended, + # No job ID or destination, because a basic query is eligible for jobs.query. + job_id=None, + destination=None, + ), + ), + ] + ) + + +def _to_millis(dt: datetime.datetime) -> str: + return str( + int( + (dt - datetime.datetime(1970, 1, 1, 0, 0, 0, tzinfo=datetime.timezone.utc)) + / datetime.timedelta(milliseconds=1) + ) + ) + + +def test_query_and_wait_bigframes_with_jobs_insert_callback_empty_results(client): + client._http.request.side_effect = [ + # jobs.insert because destination table present in job_config + make_response( + { + # https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs/insert + # https://cloud.google.com/bigquery/docs/reference/rest/v2/Job + "jobReference": { + "projectId": "response-project", + "jobId": "response-job-id", + "location": "response-location", + }, + "statistics": { + "creationTime": _to_millis( + datetime.datetime( + 2025, 8, 13, 13, 7, 31, 123000, tzinfo=datetime.timezone.utc + ) + ), + "query": { + "statementType": "SELECT", + # "queryPlan": [{"name": "part1"}, {"name": "part2"}], + }, + }, + "status": { + "state": "PENDING", + }, + } + ), + # jobs.get waiting for query to finish + make_response( + { + # https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs/insert + # https://cloud.google.com/bigquery/docs/reference/rest/v2/Job + "jobReference": { + "projectId": "response-project", + "jobId": "response-job-id", + "location": "response-location", + }, + "status": { + "state": "PENDING", + }, + } + ), + # jobs.getQueryResults with max_results=0 + make_response( + { + "jobReference": { + "projectId": "response-project", + "jobId": "response-job-id", + "location": "response-location", + }, + "jobComplete": True, + # totalRows is intentionally missing so we end up in the _EmptyRowIterator code path. + } + ), + # jobs.get + make_response( + { + "jobReference": { + "projectId": "response-project", + "jobId": "response-job-id", + "location": "response-location", + }, + "statistics": { + "creationTime": _to_millis( + datetime.datetime( + 2025, 8, 13, 13, 7, 31, 123000, tzinfo=datetime.timezone.utc + ) + ), + "startTime": _to_millis( + datetime.datetime( + 2025, 8, 13, 13, 7, 32, 123000, tzinfo=datetime.timezone.utc + ) + ), + "endTime": _to_millis( + datetime.datetime( + 2025, 8, 13, 13, 7, 33, 123000, tzinfo=datetime.timezone.utc + ) + ), + "query": { + "statementType": "SELECT", + "totalBytesProcessed": 123, + "totalSlotMs": 987, + }, + }, + "status": {"state": "DONE"}, + } + ), + ] + callback = mock.Mock() + config = bigquery.QueryJobConfig() + config.destination = "proj.dset.table" + client._query_and_wait_bigframes( + query="SELECT 1", job_config=config, callback=callback + ) + callback.assert_has_calls( + [ + mock.call( + _job_helpers.QuerySentEvent( + query="SELECT 1", + billing_project="response-project", + location="response-location", + job_id="response-job-id", + # We use jobs.insert not jobs.query because destination is + # present on job_config. + request_id=None, + ) + ), + mock.call( + _job_helpers.QueryReceivedEvent( + billing_project="response-project", + location="response-location", + job_id="response-job-id", + statement_type="SELECT", + state="PENDING", + query_plan=[], + created=datetime.datetime( + 2025, 8, 13, 13, 7, 31, 123000, tzinfo=datetime.timezone.utc + ), + started=None, + ended=None, + ) + ), + mock.call( + _job_helpers.QueryFinishedEvent( + billing_project="response-project", + location="response-location", + job_id="response-job-id", + query_id=None, + total_rows=0, + total_bytes_processed=123, + slot_millis=987, + created=datetime.datetime( + 2025, 8, 13, 13, 7, 31, 123000, tzinfo=datetime.timezone.utc + ), + started=datetime.datetime( + 2025, 8, 13, 13, 7, 32, 123000, tzinfo=datetime.timezone.utc + ), + ended=datetime.datetime( + 2025, 8, 13, 13, 7, 33, 123000, tzinfo=datetime.timezone.utc + ), + destination=None, + ), + ), + ] + ) + + +def test_query_and_wait_bigframes_with_jobs_insert_dry_run_no_callback(client): + client._http.request.side_effect = [ + # jobs.insert because destination table present in job_config + make_response( + { + "jobReference": { + "projectId": "response-project", + "jobId": "response-job-id", + "location": "response-location", + }, + "statistics": { + "creationTime": _to_millis( + datetime.datetime( + 2025, 8, 13, 13, 7, 31, 123000, tzinfo=datetime.timezone.utc + ) + ), + "query": { + "statementType": "SELECT", + "totalBytesProcessed": 123, + "schema": { + "fields": [ + {"name": "_f0", "type": "INTEGER"}, + ], + }, + }, + }, + "configuration": { + "dryRun": True, + }, + "status": {"state": "DONE"}, + } + ), + ] + callback = mock.Mock() + config = bigquery.QueryJobConfig() + config.destination = "proj.dset.table" + config.dry_run = True + result = client._query_and_wait_bigframes( + query="SELECT 1", job_config=config, callback=callback + ) + callback.assert_not_called() + assert result.total_bytes_processed == 123 + assert result.schema == [bigquery.SchemaField("_f0", "INTEGER")] + + +def test_query_and_wait_bigframes_with_query_retry_callbacks(client): + created = datetime.datetime( + 2025, 8, 18, 10, 11, 12, 345000, tzinfo=datetime.timezone.utc + ) + started = datetime.datetime( + 2025, 8, 18, 10, 11, 13, 456000, tzinfo=datetime.timezone.utc + ) + ended = datetime.datetime( + 2025, 8, 18, 10, 11, 14, 567000, tzinfo=datetime.timezone.utc + ) + client._http.request.side_effect = [ + exceptions.InternalServerError( + "first try", errors=({"reason": "jobInternalError"},) + ), + make_response( + { + # https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs/query + "location": LOCATION, + "queryId": "abcdefg", + "totalRows": "100", + "totalBytesProcessed": "123", + "totalSlotMs": "987", + "jobComplete": True, + "creationTime": _to_millis(created), + "startTime": _to_millis(started), + "endTime": _to_millis(ended), + } + ), + ] + callback = mock.Mock() + client._query_and_wait_bigframes(query="SELECT 1", callback=callback) + callback.assert_has_calls( + [ + mock.call( + _job_helpers.QuerySentEvent( + query="SELECT 1", + billing_project=PROJECT, + location=LOCATION, + # No job ID, because a basic query is eligible for jobs.query. + job_id=None, + request_id=mock.ANY, + ) + ), + mock.call( + _job_helpers.QueryRetryEvent( + query="SELECT 1", + billing_project=PROJECT, + location=LOCATION, + # No job ID, because a basic query is eligible for jobs.query. + job_id=None, + request_id=mock.ANY, + ) + ), + mock.call( + _job_helpers.QueryFinishedEvent( + billing_project=PROJECT, + location=LOCATION, + query_id=mock.ANY, + total_rows=100, + total_bytes_processed=123, + slot_millis=987, + created=created, + started=started, + ended=ended, + # No job ID or destination, because a basic query is eligible for jobs.query. + job_id=None, + destination=None, + ), + ), + ] + ) From 450e6186ceb0d44b2a7476ffb9f5327245513daa Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tim=20Swe=C3=B1a=20=28Swast=29?= Date: Mon, 18 Aug 2025 13:02:13 -0500 Subject: [PATCH 1988/2016] docs: add a TROUBLESHOOTING.md file with tips for logging (#2262) * docs: add a TROUBLESHOOTING.md file with tips for logging * typo * finish my sentence --------- Co-authored-by: Lingqing Gan --- .../google-cloud-bigquery/TROUBLESHOOTING.md | 34 +++++++++++++++++++ 1 file changed, 34 insertions(+) create mode 100644 packages/google-cloud-bigquery/TROUBLESHOOTING.md diff --git a/packages/google-cloud-bigquery/TROUBLESHOOTING.md b/packages/google-cloud-bigquery/TROUBLESHOOTING.md new file mode 100644 index 000000000000..7da12c44018a --- /dev/null +++ b/packages/google-cloud-bigquery/TROUBLESHOOTING.md @@ -0,0 +1,34 @@ +# Troubleshooting steps + +## Enable logging of BQ Storage Read API session creation + +It can be helpful to get the BQ Storage Read API session to allow the BigQuery +backend team to debug cases of API instability. The logs that share the session +creation are in a module-specific logger. To enable the logs, refer to the +following code sample: + +```python +import logging +import google.cloud.bigquery + +# Configure the basic logging to show DEBUG level messages +log_formatter = logging.Formatter( + '%(asctime)s - %(levelname)s - %(message)s' +) +handler = logging.StreamHandler() +handler.setFormatter(log_formatter) +default_logger = logging.getLogger() +default_logger.setLevel(logging.DEBUG) +default_logger.addHandler(handler) +to_dataframe_logger = logging.getLogger("google.cloud.bigquery._pandas_helpers") +to_dataframe_logger.setLevel(logging.DEBUG) +to_dataframe_logger.addHandler(handler) + +# Example code that touches the BQ Storage Read API. +bqclient = google.cloud.bigquery.Client() +results = bqclient.query_and_wait("SELECT * FROM `bigquery-public-data.usa_names.usa_1910_2013`") +print(results.to_dataframe().head()) +``` + +In particular, watch for the text "with BQ Storage API session" in the logs +to get the streaming API session ID to share with your support person. From 1dfea750c91808706cb1ff69d4b7f12d3d257523 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tim=20Swe=C3=B1a=20=28Swast=29?= Date: Wed, 20 Aug 2025 14:36:03 -0500 Subject: [PATCH 1989/2016] chore: execute retry tests serially, since they depend on global time (#2265) * chore: migrate test_base retry tests * migrate job_helpers test * migrate more tests * fix initiate resumable upload tests * fix failing tests * remove dead test code --- .../tests/unit/conftest.py | 13 + .../tests/unit/job/test_async_job_retry.py | 139 +++++ .../tests/unit/job/test_base.py | 104 ---- .../tests/unit/job/test_query.py | 196 ------ .../tests/unit/job/test_query_job_retry.py | 229 +++++++ .../tests/unit/test__job_helpers.py | 105 ---- .../tests/unit/test__job_helpers_retry.py | 122 ++++ .../tests/unit/test_client.py | 587 ------------------ .../tests/unit/test_client_bigframes.py | 2 +- .../test_client_resumable_media_upload.py | 433 +++++++++++++ .../tests/unit/test_client_retry.py | 279 +++++++++ .../tests/unit/test_job_retry.py | 18 +- 12 files changed, 1226 insertions(+), 1001 deletions(-) create mode 100644 packages/google-cloud-bigquery/tests/unit/job/test_async_job_retry.py create mode 100644 packages/google-cloud-bigquery/tests/unit/job/test_query_job_retry.py create mode 100644 packages/google-cloud-bigquery/tests/unit/test__job_helpers_retry.py create mode 100644 packages/google-cloud-bigquery/tests/unit/test_client_resumable_media_upload.py create mode 100644 packages/google-cloud-bigquery/tests/unit/test_client_retry.py diff --git a/packages/google-cloud-bigquery/tests/unit/conftest.py b/packages/google-cloud-bigquery/tests/unit/conftest.py index ebe2d2a7a659..5070a199bc58 100644 --- a/packages/google-cloud-bigquery/tests/unit/conftest.py +++ b/packages/google-cloud-bigquery/tests/unit/conftest.py @@ -13,6 +13,7 @@ # limitations under the License. from unittest import mock +import threading import pytest @@ -24,6 +25,18 @@ def client(): yield make_client() +time_lock = threading.Lock() + + +@pytest.fixture +def global_time_lock(): + """Fixture to run tests serially that depend on the global time state, + such as tests of retry behavior. + """ + with time_lock: + yield + + @pytest.fixture def PROJECT(): yield "PROJECT" diff --git a/packages/google-cloud-bigquery/tests/unit/job/test_async_job_retry.py b/packages/google-cloud-bigquery/tests/unit/job/test_async_job_retry.py new file mode 100644 index 000000000000..35041aa1b965 --- /dev/null +++ b/packages/google-cloud-bigquery/tests/unit/job/test_async_job_retry.py @@ -0,0 +1,139 @@ +# Copyright 2025 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from unittest import mock + +import google.api_core.retry +from google.api_core import exceptions + +from . import helpers +import google.cloud.bigquery.job + + +PROJECT = "test-project" +JOB_ID = "test-job-id" + + +def test_cancel_w_custom_retry(global_time_lock): + from google.cloud.bigquery.retry import DEFAULT_RETRY + + api_path = "/projects/{}/jobs/{}/cancel".format(PROJECT, JOB_ID) + resource = { + "jobReference": { + "jobId": JOB_ID, + "projectId": PROJECT, + "location": None, + }, + "configuration": {"test": True}, + } + expected = resource.copy() + expected["statistics"] = {} + response = {"job": resource} + conn = helpers.make_connection( + ValueError, + response, + ) + client = helpers._make_client(project=PROJECT, connection=conn) + job = google.cloud.bigquery.job._AsyncJob( + google.cloud.bigquery.job._JobReference(JOB_ID, PROJECT, "EU"), client + ) + + retry = DEFAULT_RETRY.with_deadline(1).with_predicate( + lambda exc: isinstance(exc, ValueError) + ) + + with mock.patch( + "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" + ) as final_attributes: + result = job.cancel(retry=retry, timeout=7.5) + + final_attributes.assert_called() + + assert result is True + assert job._properties == expected + conn.api_request.assert_has_calls( + [ + mock.call( + method="POST", + path=api_path, + query_params={"location": "EU"}, + timeout=7.5, + ), + mock.call( + method="POST", + path=api_path, + query_params={"location": "EU"}, + timeout=7.5, + ), # was retried once + ], + ) + + +def test_result_w_retry_wo_state(global_time_lock): + from google.cloud.bigquery.retry import DEFAULT_GET_JOB_TIMEOUT + + begun_job_resource = helpers._make_job_resource( + job_id=JOB_ID, project_id=PROJECT, location="EU", started=True + ) + done_job_resource = helpers._make_job_resource( + job_id=JOB_ID, + project_id=PROJECT, + location="EU", + started=True, + ended=True, + ) + conn = helpers.make_connection( + exceptions.NotFound("not normally retriable"), + begun_job_resource, + exceptions.NotFound("not normally retriable"), + done_job_resource, + ) + client = helpers._make_client(project=PROJECT, connection=conn) + job = google.cloud.bigquery.job._AsyncJob( + google.cloud.bigquery.job._JobReference(JOB_ID, PROJECT, "EU"), client + ) + custom_predicate = mock.Mock() + custom_predicate.return_value = True + custom_retry = google.api_core.retry.Retry( + predicate=custom_predicate, + initial=0.001, + maximum=0.001, + deadline=0.1, + ) + assert job.result(retry=custom_retry) is job + + begin_call = mock.call( + method="POST", + path=f"/projects/{PROJECT}/jobs", + data={ + "jobReference": { + "jobId": JOB_ID, + "projectId": PROJECT, + "location": "EU", + } + }, + timeout=None, + ) + reload_call = mock.call( + method="GET", + path=f"/projects/{PROJECT}/jobs/{JOB_ID}", + query_params={ + "projection": "full", + "location": "EU", + }, + timeout=DEFAULT_GET_JOB_TIMEOUT, + ) + conn.api_request.assert_has_calls( + [begin_call, begin_call, reload_call, reload_call] + ) diff --git a/packages/google-cloud-bigquery/tests/unit/job/test_base.py b/packages/google-cloud-bigquery/tests/unit/job/test_base.py index aa3d49ce380d..f5861f645a1b 100644 --- a/packages/google-cloud-bigquery/tests/unit/job/test_base.py +++ b/packages/google-cloud-bigquery/tests/unit/job/test_base.py @@ -17,8 +17,6 @@ import unittest from unittest import mock -from google.api_core import exceptions -import google.api_core.retry from google.api_core.future import polling import pytest @@ -882,50 +880,6 @@ def test_cancel_explicit(self): ) self.assertEqual(job._properties, expected) - def test_cancel_w_custom_retry(self): - from google.cloud.bigquery.retry import DEFAULT_RETRY - - api_path = "/projects/{}/jobs/{}/cancel".format(self.PROJECT, self.JOB_ID) - resource = { - "jobReference": { - "jobId": self.JOB_ID, - "projectId": self.PROJECT, - "location": None, - }, - "configuration": {"test": True}, - } - expected = resource.copy() - expected["statistics"] = {} - response = {"job": resource} - job = self._set_properties_job() - - api_request_patcher = mock.patch.object( - job._client._connection, "api_request", side_effect=[ValueError, response] - ) - retry = DEFAULT_RETRY.with_deadline(1).with_predicate( - lambda exc: isinstance(exc, ValueError) - ) - - with api_request_patcher as fake_api_request: - with mock.patch( - "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" - ) as final_attributes: - result = job.cancel(retry=retry, timeout=7.5) - - final_attributes.assert_called() - - self.assertTrue(result) - self.assertEqual(job._properties, expected) - self.assertEqual( - fake_api_request.call_args_list, - [ - mock.call(method="POST", path=api_path, query_params={}, timeout=7.5), - mock.call( - method="POST", path=api_path, query_params={}, timeout=7.5 - ), # was retried once - ], - ) - def test__set_future_result_wo_done(self): client = _make_client(project=self.PROJECT) job = self._make_one(self.JOB_ID, client) @@ -1069,64 +1023,6 @@ def test_result_default_wo_state(self): ) conn.api_request.assert_has_calls([begin_call, begin_call, reload_call]) - def test_result_w_retry_wo_state(self): - from google.cloud.bigquery.retry import DEFAULT_GET_JOB_TIMEOUT - - begun_job_resource = _make_job_resource( - job_id=self.JOB_ID, project_id=self.PROJECT, location="EU", started=True - ) - done_job_resource = _make_job_resource( - job_id=self.JOB_ID, - project_id=self.PROJECT, - location="EU", - started=True, - ended=True, - ) - conn = make_connection( - exceptions.NotFound("not normally retriable"), - begun_job_resource, - exceptions.NotFound("not normally retriable"), - done_job_resource, - ) - client = _make_client(project=self.PROJECT, connection=conn) - job = self._make_one( - self._job_reference(self.JOB_ID, self.PROJECT, "EU"), client - ) - custom_predicate = mock.Mock() - custom_predicate.return_value = True - custom_retry = google.api_core.retry.Retry( - predicate=custom_predicate, - initial=0.001, - maximum=0.001, - deadline=0.1, - ) - self.assertIs(job.result(retry=custom_retry), job) - - begin_call = mock.call( - method="POST", - path=f"/projects/{self.PROJECT}/jobs", - data={ - "jobReference": { - "jobId": self.JOB_ID, - "projectId": self.PROJECT, - "location": "EU", - } - }, - timeout=None, - ) - reload_call = mock.call( - method="GET", - path=f"/projects/{self.PROJECT}/jobs/{self.JOB_ID}", - query_params={ - "projection": "full", - "location": "EU", - }, - timeout=DEFAULT_GET_JOB_TIMEOUT, - ) - conn.api_request.assert_has_calls( - [begin_call, begin_call, reload_call, reload_call] - ) - def test_result_explicit_w_state(self): conn = make_connection() client = _make_client(project=self.PROJECT, connection=conn) diff --git a/packages/google-cloud-bigquery/tests/unit/job/test_query.py b/packages/google-cloud-bigquery/tests/unit/job/test_query.py index 8f684c3e9172..ef64295989e8 100644 --- a/packages/google-cloud-bigquery/tests/unit/job/test_query.py +++ b/packages/google-cloud-bigquery/tests/unit/job/test_query.py @@ -20,15 +20,11 @@ import types from unittest import mock -import freezegun -from google.api_core import exceptions -import google.api_core.retry import requests from google.cloud.bigquery.client import _LIST_ROWS_FROM_QUERY_RESULTS_FIELDS import google.cloud.bigquery._job_helpers import google.cloud.bigquery.query -import google.cloud.bigquery.retry from google.cloud.bigquery.retry import DEFAULT_GET_JOB_TIMEOUT from google.cloud.bigquery.table import _EmptyRowIterator @@ -1335,102 +1331,6 @@ def test_result_with_max_results(self): [jobs_get_call, query_page_waiting_call, query_page_2_call] ) - def test_result_w_custom_retry(self): - from google.cloud.bigquery.table import RowIterator - - query_resource = { - "jobComplete": False, - "jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID}, - } - query_resource_done = { - "jobComplete": True, - "jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID}, - "schema": {"fields": [{"name": "col1", "type": "STRING"}]}, - "totalRows": "2", - } - job_resource = self._make_resource(started=True, location="asia-northeast1") - job_resource_done = self._make_resource( - started=True, ended=True, location="asia-northeast1" - ) - job_resource_done["configuration"]["query"]["destinationTable"] = { - "projectId": "dest-project", - "datasetId": "dest_dataset", - "tableId": "dest_table", - } - - connection = make_connection( - # Also, for each API request, raise an exception that we know can - # be retried. Because of this, for each iteration we do: - # jobs.get (x2) & jobs.getQueryResults (x2) - exceptions.NotFound("not normally retriable"), - job_resource, - exceptions.NotFound("not normally retriable"), - query_resource, - # Query still not done, repeat both. - exceptions.NotFound("not normally retriable"), - job_resource, - exceptions.NotFound("not normally retriable"), - query_resource, - exceptions.NotFound("not normally retriable"), - # Query still not done, repeat both. - job_resource_done, - exceptions.NotFound("not normally retriable"), - query_resource_done, - # Query finished! - ) - client = _make_client(self.PROJECT, connection=connection) - job = self._get_target_class().from_api_repr(job_resource, client) - - custom_predicate = mock.Mock() - custom_predicate.return_value = True - custom_retry = google.api_core.retry.Retry( - initial=0.001, - maximum=0.001, - multiplier=1.0, - deadline=0.1, - predicate=custom_predicate, - ) - - self.assertIsInstance(job.result(retry=custom_retry), RowIterator) - query_results_call = mock.call( - method="GET", - path=f"/projects/{self.PROJECT}/queries/{self.JOB_ID}", - query_params={"maxResults": 0, "location": "asia-northeast1"}, - # TODO(tswast): Why do we end up setting timeout to - # google.cloud.bigquery.client._MIN_GET_QUERY_RESULTS_TIMEOUT in - # some cases but not others? - timeout=mock.ANY, - ) - reload_call = mock.call( - method="GET", - path=f"/projects/{self.PROJECT}/jobs/{self.JOB_ID}", - query_params={"projection": "full", "location": "asia-northeast1"}, - timeout=DEFAULT_GET_JOB_TIMEOUT, - ) - - connection.api_request.assert_has_calls( - [ - # See make_connection() call above for explanation of the - # expected API calls. - # - # Query not done. - reload_call, - reload_call, - query_results_call, - query_results_call, - # Query still not done. - reload_call, - reload_call, - query_results_call, - query_results_call, - # Query done! - reload_call, - reload_call, - query_results_call, - query_results_call, - ] - ) - def test_result_w_empty_schema(self): from google.cloud.bigquery.table import _EmptyRowIterator @@ -1455,102 +1355,6 @@ def test_result_w_empty_schema(self): self.assertEqual(result.location, "asia-northeast1") self.assertEqual(result.query_id, "xyz-abc") - def test_result_w_timeout_doesnt_raise(self): - import google.cloud.bigquery.client - - begun_resource = self._make_resource() - query_resource = { - "jobComplete": True, - "jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID}, - "schema": {"fields": [{"name": "col1", "type": "STRING"}]}, - } - done_resource = copy.deepcopy(begun_resource) - done_resource["status"] = {"state": "DONE"} - connection = make_connection(begun_resource, query_resource, done_resource) - client = _make_client(project=self.PROJECT, connection=connection) - job = self._make_one(self.JOB_ID, self.QUERY, client) - job._properties["jobReference"]["location"] = "US" - job._properties["status"] = {"state": "RUNNING"} - - with freezegun.freeze_time("1970-01-01 00:00:00", tick=False): - job.result( - # Test that fractional seconds are supported, but use a timeout - # that is representable as a floating point without rounding - # errors since it can be represented exactly in base 2. In this - # case 1.125 is 9 / 8, which is a fraction with a power of 2 in - # the denominator. - timeout=1.125, - ) - - reload_call = mock.call( - method="GET", - path=f"/projects/{self.PROJECT}/jobs/{self.JOB_ID}", - query_params={"projection": "full", "location": "US"}, - timeout=1.125, - ) - get_query_results_call = mock.call( - method="GET", - path=f"/projects/{self.PROJECT}/queries/{self.JOB_ID}", - query_params={ - "maxResults": 0, - "location": "US", - }, - timeout=google.cloud.bigquery.client._MIN_GET_QUERY_RESULTS_TIMEOUT, - ) - connection.api_request.assert_has_calls( - [ - reload_call, - get_query_results_call, - reload_call, - ] - ) - - def test_result_w_timeout_raises_concurrent_futures_timeout(self): - import google.cloud.bigquery.client - - begun_resource = self._make_resource() - begun_resource["jobReference"]["location"] = "US" - query_resource = { - "jobComplete": True, - "jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID}, - "schema": {"fields": [{"name": "col1", "type": "STRING"}]}, - } - done_resource = copy.deepcopy(begun_resource) - done_resource["status"] = {"state": "DONE"} - connection = make_connection(begun_resource, query_resource, done_resource) - client = _make_client(project=self.PROJECT, connection=connection) - job = self._make_one(self.JOB_ID, self.QUERY, client) - job._properties["jobReference"]["location"] = "US" - job._properties["status"] = {"state": "RUNNING"} - - with freezegun.freeze_time( - "1970-01-01 00:00:00", auto_tick_seconds=1.0 - ), self.assertRaises(concurrent.futures.TimeoutError): - job.result(timeout=1.125) - - reload_call = mock.call( - method="GET", - path=f"/projects/{self.PROJECT}/jobs/{self.JOB_ID}", - query_params={"projection": "full", "location": "US"}, - timeout=1.125, - ) - get_query_results_call = mock.call( - method="GET", - path=f"/projects/{self.PROJECT}/queries/{self.JOB_ID}", - query_params={ - "maxResults": 0, - "location": "US", - }, - timeout=google.cloud.bigquery.client._MIN_GET_QUERY_RESULTS_TIMEOUT, - ) - connection.api_request.assert_has_calls( - [ - reload_call, - get_query_results_call, - # Timeout before we can reload with the final job state. - ] - ) - def test_result_w_page_size(self): # Arrange query_results_resource = { diff --git a/packages/google-cloud-bigquery/tests/unit/job/test_query_job_retry.py b/packages/google-cloud-bigquery/tests/unit/job/test_query_job_retry.py new file mode 100644 index 000000000000..c8355b68832c --- /dev/null +++ b/packages/google-cloud-bigquery/tests/unit/job/test_query_job_retry.py @@ -0,0 +1,229 @@ +# Copyright 2025 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from unittest import mock + +import concurrent.futures +import freezegun +from google.api_core import exceptions +import google.api_core.retry +import pytest + +from google.cloud.bigquery.client import _MIN_GET_QUERY_RESULTS_TIMEOUT +from google.cloud.bigquery.job import QueryJob +from google.cloud.bigquery.retry import DEFAULT_GET_JOB_TIMEOUT +from google.cloud.bigquery.table import RowIterator + +from ..helpers import make_connection +from .helpers import _make_client + + +PROJECT = "test-project" +JOB_ID = "test-job-id" +QUERY = "select count(*) from persons" + + +def _make_resource(started=False, ended=False, location="US"): + resource = { + "jobReference": {"projectId": PROJECT, "jobId": JOB_ID, "location": location}, + "status": {"state": "PENDING"}, + "configuration": { + "query": {"query": QUERY}, + "job_type": "query", + }, + "statistics": {"creationTime": "1"}, + } + + if started: + resource["status"]["state"] = "RUNNING" + resource["statistics"]["startTime"] = "2" + + if ended: + resource["status"]["state"] = "DONE" + resource["statistics"]["endTime"] = "3" + + return resource + + +def test_result_w_custom_retry(global_time_lock): + query_resource = { + "jobComplete": False, + "jobReference": {"projectId": PROJECT, "jobId": JOB_ID}, + } + query_resource_done = { + "jobComplete": True, + "jobReference": {"projectId": PROJECT, "jobId": JOB_ID}, + "schema": {"fields": [{"name": "col1", "type": "STRING"}]}, + "totalRows": "2", + } + job_resource = _make_resource(started=True, location="asia-northeast1") + job_resource_done = _make_resource( + started=True, ended=True, location="asia-northeast1" + ) + job_resource_done["configuration"]["query"]["destinationTable"] = { + "projectId": "dest-project", + "datasetId": "dest_dataset", + "tableId": "dest_table", + } + + connection = make_connection( + # Also, for each API request, raise an exception that we know can + # be retried. Because of this, for each iteration we do: + # jobs.get (x2) & jobs.getQueryResults (x2) + exceptions.NotFound("not normally retriable"), + job_resource, + exceptions.NotFound("not normally retriable"), + query_resource, + # Query still not done, repeat both. + exceptions.NotFound("not normally retriable"), + job_resource, + exceptions.NotFound("not normally retriable"), + query_resource, + exceptions.NotFound("not normally retriable"), + # Query still not done, repeat both. + job_resource_done, + exceptions.NotFound("not normally retriable"), + query_resource_done, + # Query finished! + ) + client = _make_client(PROJECT, connection=connection) + job = QueryJob.from_api_repr(job_resource, client) + + custom_predicate = mock.Mock() + custom_predicate.return_value = True + custom_retry = google.api_core.retry.Retry( + initial=0.001, + maximum=0.001, + multiplier=1.0, + deadline=0.1, + predicate=custom_predicate, + ) + + assert isinstance(job.result(retry=custom_retry), RowIterator) + query_results_call = mock.call( + method="GET", + path=f"/projects/{PROJECT}/queries/{JOB_ID}", + query_params={"maxResults": 0, "location": "asia-northeast1"}, + timeout=mock.ANY, + ) + reload_call = mock.call( + method="GET", + path=f"/projects/{PROJECT}/jobs/{JOB_ID}", + query_params={"projection": "full", "location": "asia-northeast1"}, + timeout=DEFAULT_GET_JOB_TIMEOUT, + ) + + connection.api_request.assert_has_calls( + [ + reload_call, + reload_call, + query_results_call, + query_results_call, + reload_call, + reload_call, + query_results_call, + query_results_call, + reload_call, + reload_call, + query_results_call, + query_results_call, + ] + ) + + +def test_result_w_timeout_doesnt_raise(global_time_lock): + begun_resource = _make_resource() + query_resource = { + "jobComplete": True, + "jobReference": {"projectId": PROJECT, "jobId": JOB_ID}, + "schema": {"fields": [{"name": "col1", "type": "STRING"}]}, + } + done_resource = begun_resource.copy() + done_resource["status"] = {"state": "DONE"} + connection = make_connection(begun_resource, query_resource, done_resource) + client = _make_client(project=PROJECT, connection=connection) + job = QueryJob(JOB_ID, QUERY, client) + job._properties["jobReference"]["location"] = "US" + job._properties["status"] = {"state": "RUNNING"} + + with freezegun.freeze_time("1970-01-01 00:00:00", tick=False): + job.result( + timeout=1.125, + ) + + reload_call = mock.call( + method="GET", + path=f"/projects/{PROJECT}/jobs/{JOB_ID}", + query_params={"projection": "full", "location": "US"}, + timeout=1.125, + ) + get_query_results_call = mock.call( + method="GET", + path=f"/projects/{PROJECT}/queries/{JOB_ID}", + query_params={ + "maxResults": 0, + "location": "US", + }, + timeout=_MIN_GET_QUERY_RESULTS_TIMEOUT, + ) + connection.api_request.assert_has_calls( + [ + reload_call, + get_query_results_call, + reload_call, + ] + ) + + +def test_result_w_timeout_raises_concurrent_futures_timeout(global_time_lock): + begun_resource = _make_resource() + begun_resource["jobReference"]["location"] = "US" + query_resource = { + "jobComplete": True, + "jobReference": {"projectId": PROJECT, "jobId": JOB_ID}, + "schema": {"fields": [{"name": "col1", "type": "STRING"}]}, + } + done_resource = begun_resource.copy() + done_resource["status"] = {"state": "DONE"} + connection = make_connection(begun_resource, query_resource, done_resource) + client = _make_client(project=PROJECT, connection=connection) + job = QueryJob(JOB_ID, QUERY, client) + job._properties["jobReference"]["location"] = "US" + job._properties["status"] = {"state": "RUNNING"} + + with freezegun.freeze_time( + "1970-01-01 00:00:00", auto_tick_seconds=1.0 + ), pytest.raises(concurrent.futures.TimeoutError): + job.result(timeout=1.125) + + reload_call = mock.call( + method="GET", + path=f"/projects/{PROJECT}/jobs/{JOB_ID}", + query_params={"projection": "full", "location": "US"}, + timeout=1.125, + ) + get_query_results_call = mock.call( + method="GET", + path=f"/projects/{PROJECT}/queries/{JOB_ID}", + query_params={ + "maxResults": 0, + "location": "US", + }, + timeout=_MIN_GET_QUERY_RESULTS_TIMEOUT, + ) + connection.api_request.assert_has_calls( + [ + reload_call, + get_query_results_call, + ] + ) diff --git a/packages/google-cloud-bigquery/tests/unit/test__job_helpers.py b/packages/google-cloud-bigquery/tests/unit/test__job_helpers.py index 417f911b808a..1f543f0330b9 100644 --- a/packages/google-cloud-bigquery/tests/unit/test__job_helpers.py +++ b/packages/google-cloud-bigquery/tests/unit/test__job_helpers.py @@ -15,7 +15,6 @@ from typing import Any, Dict, Optional from unittest import mock -import freezegun import google.api_core.exceptions from google.api_core import retry as retries import pytest @@ -450,110 +449,6 @@ def test_query_and_wait_uses_jobs_insert(): ) -def test_query_and_wait_retries_job(): - freezegun.freeze_time(auto_tick_seconds=100) - client = mock.create_autospec(Client) - client._call_api.__name__ = "_call_api" - client._call_api.__qualname__ = "Client._call_api" - client._call_api.__annotations__ = {} - client._call_api.__type_params__ = () - client._call_api.side_effect = ( - google.api_core.exceptions.BadGateway("retry me"), - google.api_core.exceptions.InternalServerError("job_retry me"), - google.api_core.exceptions.BadGateway("retry me"), - { - "jobReference": { - "projectId": "response-project", - "jobId": "abc", - "location": "response-location", - }, - "jobComplete": True, - "schema": { - "fields": [ - {"name": "full_name", "type": "STRING", "mode": "REQUIRED"}, - {"name": "age", "type": "INT64", "mode": "NULLABLE"}, - ], - }, - "rows": [ - {"f": [{"v": "Whillma Phlyntstone"}, {"v": "27"}]}, - {"f": [{"v": "Bhetty Rhubble"}, {"v": "28"}]}, - {"f": [{"v": "Phred Phlyntstone"}, {"v": "32"}]}, - {"f": [{"v": "Bharney Rhubble"}, {"v": "33"}]}, - ], - }, - ) - rows = _job_helpers.query_and_wait( - client, - query="SELECT 1", - location="request-location", - project="request-project", - job_config=None, - page_size=None, - max_results=None, - retry=retries.Retry( - lambda exc: isinstance(exc, google.api_core.exceptions.BadGateway), - multiplier=1.0, - ).with_deadline( - 200.0 - ), # Since auto_tick_seconds is 100, we should get at least 1 retry. - job_retry=retries.Retry( - lambda exc: isinstance(exc, google.api_core.exceptions.InternalServerError), - multiplier=1.0, - ).with_deadline(600.0), - ) - assert len(list(rows)) == 4 - - # For this code path, where the query has finished immediately, we should - # only be calling the jobs.query API and no other request path. - request_path = "/projects/request-project/queries" - for call in client._call_api.call_args_list: - _, kwargs = call - assert kwargs["method"] == "POST" - assert kwargs["path"] == request_path - - -@freezegun.freeze_time(auto_tick_seconds=100) -def test_query_and_wait_retries_job_times_out(): - client = mock.create_autospec(Client) - client._call_api.__name__ = "_call_api" - client._call_api.__qualname__ = "Client._call_api" - client._call_api.__annotations__ = {} - client._call_api.__type_params__ = () - client._call_api.side_effect = ( - google.api_core.exceptions.BadGateway("retry me"), - google.api_core.exceptions.InternalServerError("job_retry me"), - google.api_core.exceptions.BadGateway("retry me"), - google.api_core.exceptions.InternalServerError("job_retry me"), - ) - - with pytest.raises(google.api_core.exceptions.RetryError) as exc_info: - _job_helpers.query_and_wait( - client, - query="SELECT 1", - location="request-location", - project="request-project", - job_config=None, - page_size=None, - max_results=None, - retry=retries.Retry( - lambda exc: isinstance(exc, google.api_core.exceptions.BadGateway), - multiplier=1.0, - ).with_deadline( - 200.0 - ), # Since auto_tick_seconds is 100, we should get at least 1 retry. - job_retry=retries.Retry( - lambda exc: isinstance( - exc, google.api_core.exceptions.InternalServerError - ), - multiplier=1.0, - ).with_deadline(400.0), - ) - - assert isinstance( - exc_info.value.cause, google.api_core.exceptions.InternalServerError - ) - - def test_query_and_wait_sets_job_creation_mode(): client = mock.create_autospec(Client) client.default_job_creation_mode = "JOB_CREATION_OPTIONAL" diff --git a/packages/google-cloud-bigquery/tests/unit/test__job_helpers_retry.py b/packages/google-cloud-bigquery/tests/unit/test__job_helpers_retry.py new file mode 100644 index 000000000000..3ea4b1aae6c7 --- /dev/null +++ b/packages/google-cloud-bigquery/tests/unit/test__job_helpers_retry.py @@ -0,0 +1,122 @@ +# Copyright 2025 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import freezegun +import google.api_core.exceptions +from google.api_core import retry as retries +import pytest + +from google.cloud.bigquery import _job_helpers + +from . import helpers + + +def test_query_and_wait_retries_job(global_time_lock): + with freezegun.freeze_time(auto_tick_seconds=100): + conn = helpers.make_connection( + google.api_core.exceptions.BadGateway("retry me"), + google.api_core.exceptions.InternalServerError("job_retry me"), + google.api_core.exceptions.BadGateway("retry me"), + { + "jobReference": { + "projectId": "response-project", + "jobId": "abc", + "location": "response-location", + }, + "jobComplete": True, + "schema": { + "fields": [ + {"name": "full_name", "type": "STRING", "mode": "REQUIRED"}, + {"name": "age", "type": "INT64", "mode": "NULLABLE"}, + ], + }, + "rows": [ + {"f": [{"v": "Whillma Phlyntstone"}, {"v": "27"}]}, + {"f": [{"v": "Bhetty Rhubble"}, {"v": "28"}]}, + {"f": [{"v": "Phred Phlyntstone"}, {"v": "32"}]}, + {"f": [{"v": "Bharney Rhubble"}, {"v": "33"}]}, + ], + }, + ) + client = helpers.make_client(project="client-project") + client._connection = conn + rows = _job_helpers.query_and_wait( + client, + query="SELECT 1", + location="request-location", + project="request-project", + job_config=None, + page_size=None, + max_results=None, + retry=retries.Retry( + lambda exc: isinstance(exc, google.api_core.exceptions.BadGateway), + multiplier=1.0, + ).with_deadline( + 200.0 + ), # Since auto_tick_seconds is 100, we should get at least 1 retry. + job_retry=retries.Retry( + lambda exc: isinstance( + exc, google.api_core.exceptions.InternalServerError + ), + multiplier=1.0, + ).with_deadline(600.0), + ) + assert len(list(rows)) == 4 + + # For this code path, where the query has finished immediately, we should + # only be calling the jobs.query API and no other request path. + request_path = "/projects/request-project/queries" + for call in client._connection.api_request.call_args_list: + _, kwargs = call + assert kwargs["method"] == "POST" + assert kwargs["path"] == request_path + + +def test_query_and_wait_retries_job_times_out(global_time_lock): + with freezegun.freeze_time(auto_tick_seconds=100): + conn = helpers.make_connection( + google.api_core.exceptions.BadGateway("retry me"), + google.api_core.exceptions.InternalServerError("job_retry me"), + google.api_core.exceptions.BadGateway("retry me"), + google.api_core.exceptions.InternalServerError("job_retry me"), + ) + client = helpers.make_client(project="client-project") + client._connection = conn + + with pytest.raises(google.api_core.exceptions.RetryError) as exc_info: + _job_helpers.query_and_wait( + client, + query="SELECT 1", + location="request-location", + project="request-project", + job_config=None, + page_size=None, + max_results=None, + retry=retries.Retry( + lambda exc: isinstance(exc, google.api_core.exceptions.BadGateway), + multiplier=1.0, + ).with_deadline( + 200.0 + ), # Since auto_tick_seconds is 100, we should get at least 1 retry. + job_retry=retries.Retry( + lambda exc: isinstance( + exc, google.api_core.exceptions.InternalServerError + ), + multiplier=1.0, + ).with_deadline(400.0), + ) + + assert isinstance( + exc_info.value.cause, google.api_core.exceptions.InternalServerError + ) diff --git a/packages/google-cloud-bigquery/tests/unit/test_client.py b/packages/google-cloud-bigquery/tests/unit/test_client.py index c3cf33279dfa..213f382dc8ee 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_client.py +++ b/packages/google-cloud-bigquery/tests/unit/test_client.py @@ -16,7 +16,6 @@ import collections import datetime import decimal -import email import gzip import http.client import io @@ -28,13 +27,10 @@ from unittest import mock import warnings -import freezegun import packaging import pytest import requests -import google.api - try: import opentelemetry @@ -58,8 +54,6 @@ import google.cloud._helpers from google.cloud import bigquery -from google.cloud.bigquery import job as bqjob -import google.cloud.bigquery._job_helpers from google.cloud.bigquery.dataset import DatasetReference, Dataset from google.cloud.bigquery.enums import UpdateMode, DatasetView from google.cloud.bigquery import exceptions @@ -313,31 +307,6 @@ def test__call_api_extra_headers(self): headers = kwargs["headers"] assert headers["x-goog-request-reason"] == "because-friday" - def test__call_api_applying_custom_retry_on_timeout(self): - from concurrent.futures import TimeoutError - from google.cloud.bigquery.retry import DEFAULT_RETRY - - creds = _make_credentials() - client = self._make_one(project=self.PROJECT, credentials=creds) - - api_request_patcher = mock.patch.object( - client._connection, - "api_request", - side_effect=[TimeoutError, "result"], - ) - retry = DEFAULT_RETRY.with_deadline(1).with_predicate( - lambda exc: isinstance(exc, TimeoutError) - ) - - with api_request_patcher as fake_api_request: - result = client._call_api(retry, foo="bar") - - self.assertEqual(result, "result") - self.assertEqual( - fake_api_request.call_args_list, - [mock.call(foo="bar"), mock.call(foo="bar")], # was retried once - ) - def test__call_api_span_creator_not_called(self): from concurrent.futures import TimeoutError from google.cloud.bigquery.retry import DEFAULT_RETRY @@ -644,48 +613,6 @@ def test_get_service_account_email_w_alternate_project(self): ) self.assertEqual(service_account_email, email) - def test_get_service_account_email_w_custom_retry(self): - from google.cloud.bigquery.retry import DEFAULT_RETRY - - api_path = "/projects/{}/serviceAccount".format(self.PROJECT) - creds = _make_credentials() - http = object() - client = self._make_one(project=self.PROJECT, credentials=creds, _http=http) - - resource = { - "kind": "bigquery#getServiceAccountResponse", - "email": "bq-123@bigquery-encryption.iam.gserviceaccount.com", - } - api_request_patcher = mock.patch.object( - client._connection, - "api_request", - side_effect=[ValueError, resource], - ) - - retry = DEFAULT_RETRY.with_deadline(1).with_predicate( - lambda exc: isinstance(exc, ValueError) - ) - - with api_request_patcher as fake_api_request: - with mock.patch( - "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" - ) as final_attributes: - service_account_email = client.get_service_account_email( - retry=retry, timeout=7.5 - ) - - final_attributes.assert_called_once_with({"path": api_path}, client, None) - self.assertEqual( - service_account_email, "bq-123@bigquery-encryption.iam.gserviceaccount.com" - ) - self.assertEqual( - fake_api_request.call_args_list, - [ - mock.call(method="GET", path=api_path, timeout=7.5), - mock.call(method="GET", path=api_path, timeout=7.5), # was retried once - ], - ) - def test_dataset_with_specified_project(self): from google.cloud.bigquery.dataset import DatasetReference @@ -3848,176 +3775,6 @@ def test_load_table_from_uri_w_default_load_config(self): timeout=DEFAULT_TIMEOUT, ) - @staticmethod - def _mock_requests_response(status_code, headers, content=b""): - return mock.Mock( - content=content, - headers=headers, - status_code=status_code, - spec=["content", "headers", "status_code"], - ) - - def _mock_transport(self, status_code, headers, content=b""): - fake_transport = mock.Mock(spec=["request"]) - fake_response = self._mock_requests_response( - status_code, headers, content=content - ) - fake_transport.request.return_value = fake_response - return fake_transport - - def _initiate_resumable_upload_helper(self, num_retries=None, mtls=False): - from google.resumable_media.requests import ResumableUpload - from google.cloud.bigquery.client import _DEFAULT_CHUNKSIZE - from google.cloud.bigquery.client import _GENERIC_CONTENT_TYPE - from google.cloud.bigquery.client import _get_upload_headers - from google.cloud.bigquery.job import LoadJob - from google.cloud.bigquery.job import LoadJobConfig - from google.cloud.bigquery.job import SourceFormat - - # Create mocks to be checked for doing transport. - resumable_url = "http://test.invalid?upload_id=hey-you" - response_headers = {"location": resumable_url} - fake_transport = self._mock_transport(http.client.OK, response_headers) - client = self._make_one(project=self.PROJECT, _http=fake_transport) - conn = client._connection = make_connection() - if mtls: - conn.get_api_base_url_for_mtls = mock.Mock(return_value="https://foo.mtls") - - # Create some mock arguments and call the method under test. - data = b"goodbye gudbi gootbee" - stream = io.BytesIO(data) - config = LoadJobConfig() - config.source_format = SourceFormat.CSV - job = LoadJob(None, None, self.TABLE_REF, client, job_config=config) - metadata = job.to_api_repr() - upload, transport = client._initiate_resumable_upload( - stream, metadata, num_retries, None - ) - - # Check the returned values. - self.assertIsInstance(upload, ResumableUpload) - - host_name = "https://foo.mtls" if mtls else "https://bigquery.googleapis.com" - upload_url = ( - f"{host_name}/upload/bigquery/v2/projects/{self.PROJECT}" - "/jobs?uploadType=resumable" - ) - self.assertEqual(upload.upload_url, upload_url) - expected_headers = _get_upload_headers(conn.user_agent) - self.assertEqual(upload._headers, expected_headers) - self.assertFalse(upload.finished) - self.assertEqual(upload._chunk_size, _DEFAULT_CHUNKSIZE) - self.assertIs(upload._stream, stream) - self.assertIsNone(upload._total_bytes) - self.assertEqual(upload._content_type, _GENERIC_CONTENT_TYPE) - self.assertEqual(upload.resumable_url, resumable_url) - - retry_strategy = upload._retry_strategy - self.assertEqual(retry_strategy.max_sleep, 64.0) - if num_retries is None: - self.assertEqual(retry_strategy.max_cumulative_retry, 600.0) - self.assertIsNone(retry_strategy.max_retries) - else: - self.assertIsNone(retry_strategy.max_cumulative_retry) - self.assertEqual(retry_strategy.max_retries, num_retries) - self.assertIs(transport, fake_transport) - # Make sure we never read from the stream. - self.assertEqual(stream.tell(), 0) - - # Check the mocks. - request_headers = expected_headers.copy() - request_headers["x-upload-content-type"] = _GENERIC_CONTENT_TYPE - fake_transport.request.assert_called_once_with( - "POST", - upload_url, - data=json.dumps(metadata).encode("utf-8"), - headers=request_headers, - timeout=mock.ANY, - ) - - def test__initiate_resumable_upload(self): - self._initiate_resumable_upload_helper() - - def test__initiate_resumable_upload_mtls(self): - self._initiate_resumable_upload_helper(mtls=True) - - def test__initiate_resumable_upload_with_retry(self): - self._initiate_resumable_upload_helper(num_retries=11) - - def _do_multipart_upload_success_helper( - self, get_boundary, num_retries=None, project=None, mtls=False - ): - from google.cloud.bigquery.client import _get_upload_headers - from google.cloud.bigquery.job import LoadJob - from google.cloud.bigquery.job import LoadJobConfig - from google.cloud.bigquery.job import SourceFormat - - fake_transport = self._mock_transport(http.client.OK, {}) - client = self._make_one(project=self.PROJECT, _http=fake_transport) - conn = client._connection = make_connection() - if mtls: - conn.get_api_base_url_for_mtls = mock.Mock(return_value="https://foo.mtls") - - if project is None: - project = self.PROJECT - - # Create some mock arguments. - data = b"Bzzzz-zap \x00\x01\xf4" - stream = io.BytesIO(data) - config = LoadJobConfig() - config.source_format = SourceFormat.CSV - job = LoadJob(None, None, self.TABLE_REF, client, job_config=config) - metadata = job.to_api_repr() - size = len(data) - - response = client._do_multipart_upload( - stream, metadata, size, num_retries, None, project=project - ) - - # Check the mocks and the returned value. - self.assertIs(response, fake_transport.request.return_value) - self.assertEqual(stream.tell(), size) - get_boundary.assert_called_once_with() - - host_name = "https://foo.mtls" if mtls else "https://bigquery.googleapis.com" - upload_url = ( - f"{host_name}/upload/bigquery/v2/projects/{project}" - "/jobs?uploadType=multipart" - ) - payload = ( - b"--==0==\r\n" - b"content-type: application/json; charset=UTF-8\r\n\r\n" - b"%(json_metadata)s" - b"\r\n" - b"--==0==\r\n" - b"content-type: */*\r\n\r\n" - b"%(data)s" - b"\r\n" - b"--==0==--" - ) % {b"json_metadata": json.dumps(metadata).encode("utf-8"), b"data": data} - - headers = _get_upload_headers(conn.user_agent) - headers["content-type"] = b'multipart/related; boundary="==0=="' - fake_transport.request.assert_called_once_with( - "POST", upload_url, data=payload, headers=headers, timeout=mock.ANY - ) - - @mock.patch("google.resumable_media._upload.get_boundary", return_value=b"==0==") - def test__do_multipart_upload(self, get_boundary): - self._do_multipart_upload_success_helper(get_boundary) - - @mock.patch("google.resumable_media._upload.get_boundary", return_value=b"==0==") - def test__do_multipart_upload_mtls(self, get_boundary): - self._do_multipart_upload_success_helper(get_boundary, mtls=True) - - @mock.patch("google.resumable_media._upload.get_boundary", return_value=b"==0==") - def test__do_multipart_upload_with_retry(self, get_boundary): - self._do_multipart_upload_success_helper(get_boundary, num_retries=8) - - @mock.patch("google.resumable_media._upload.get_boundary", return_value=b"==0==") - def test__do_multipart_upload_with_custom_project(self, get_boundary): - self._do_multipart_upload_success_helper(get_boundary, project="custom-project") - def test_copy_table(self): from google.cloud.bigquery.job import CopyJob @@ -5543,143 +5300,6 @@ def test_query_job_rpc_fail_w_conflict_random_id_job_fetch_fails_no_retries(self job_retry=None, ) - def test_query_job_rpc_fail_w_conflict_random_id_job_fetch_retries_404(self): - """Regression test for https://github.com/googleapis/python-bigquery/issues/2134 - - Sometimes after a Conflict, the fetch fails with a 404, but we know - because of the conflict that really the job does exist. Retry until we - get the job status (or timeout). - """ - job_id = "abc123" - creds = _make_credentials() - http = object() - client = self._make_one(project=self.PROJECT, credentials=creds, _http=http) - conn = client._connection = make_connection( - # We're mocking QueryJob._begin, so this is only going to be - # jobs.get requests and responses. - google.api_core.exceptions.TooManyRequests("this is retriable by default"), - google.api_core.exceptions.NotFound("we lost your job"), - google.api_core.exceptions.NotFound("we lost your job again, sorry"), - { - "jobReference": { - "projectId": self.PROJECT, - "location": "TESTLOC", - "jobId": job_id, - } - }, - ) - - job_create_error = google.api_core.exceptions.Conflict("Job already exists.") - job_begin_patcher = mock.patch.object( - bqjob.QueryJob, "_begin", side_effect=job_create_error - ) - job_id_patcher = mock.patch.object( - google.cloud.bigquery._job_helpers, - "make_job_id", - return_value=job_id, - ) - - with job_begin_patcher, job_id_patcher: - # If get job request fails there does exist a job - # with this ID already, retry 404 until we get it (or fails for a - # non-retriable reason, see other tests). - result = client.query("SELECT 1;", job_id=None) - - jobs_get_path = mock.call( - method="GET", - path=f"/projects/{self.PROJECT}/jobs/{job_id}", - query_params={ - "projection": "full", - }, - timeout=google.cloud.bigquery.retry.DEFAULT_GET_JOB_TIMEOUT, - ) - conn.api_request.assert_has_calls( - # Double-check that it was jobs.get that was called for each of our - # mocked responses. - [jobs_get_path] - * 4, - ) - assert result.job_id == job_id - - def test_query_job_rpc_fail_w_conflict_random_id_job_fetch_retries_404_and_query_job_insert( - self, - ): - """Regression test for https://github.com/googleapis/python-bigquery/issues/2134 - - Sometimes after a Conflict, the fetch fails with a 404. If it keeps - failing with a 404, assume that the job actually doesn't exist. - """ - job_id_1 = "abc123" - job_id_2 = "xyz789" - creds = _make_credentials() - http = object() - client = self._make_one(project=self.PROJECT, credentials=creds, _http=http) - - # We're mocking QueryJob._begin, so that the connection should only get - # jobs.get requests. - job_create_error = google.api_core.exceptions.Conflict("Job already exists.") - job_begin_patcher = mock.patch.object( - bqjob.QueryJob, "_begin", side_effect=job_create_error - ) - conn = client._connection = make_connection( - google.api_core.exceptions.NotFound("we lost your job again, sorry"), - { - "jobReference": { - "projectId": self.PROJECT, - "location": "TESTLOC", - "jobId": job_id_2, - } - }, - ) - - # Choose a small deadline so the 404 retries give up. - retry = ( - google.cloud.bigquery.retry._DEFAULT_GET_JOB_CONFLICT_RETRY.with_deadline(1) - ) - job_id_patcher = mock.patch.object( - google.cloud.bigquery._job_helpers, - "make_job_id", - side_effect=[job_id_1, job_id_2], - ) - retry_patcher = mock.patch.object( - google.cloud.bigquery.retry, - "_DEFAULT_GET_JOB_CONFLICT_RETRY", - retry, - ) - - with freezegun.freeze_time( - "2025-01-01 00:00:00", - # 10x the retry deadline to guarantee a timeout. - auto_tick_seconds=10, - ), job_begin_patcher, job_id_patcher, retry_patcher: - # If get job request fails there does exist a job - # with this ID already, retry 404 until we get it (or fails for a - # non-retriable reason, see other tests). - result = client.query("SELECT 1;", job_id=None) - - jobs_get_path_1 = mock.call( - method="GET", - path=f"/projects/{self.PROJECT}/jobs/{job_id_1}", - query_params={ - "projection": "full", - }, - timeout=google.cloud.bigquery.retry.DEFAULT_GET_JOB_TIMEOUT, - ) - jobs_get_path_2 = mock.call( - method="GET", - path=f"/projects/{self.PROJECT}/jobs/{job_id_2}", - query_params={ - "projection": "full", - }, - timeout=google.cloud.bigquery.retry.DEFAULT_GET_JOB_TIMEOUT, - ) - conn.api_request.assert_has_calls( - # Double-check that it was jobs.get that was called for each of our - # mocked responses. - [jobs_get_path_1, jobs_get_path_2], - ) - assert result.job_id == job_id_2 - def test_query_job_rpc_fail_w_conflict_random_id_job_fetch_succeeds(self): from google.api_core.exceptions import Conflict from google.cloud.bigquery.job import QueryJob @@ -10032,213 +9652,6 @@ def test_load_table_from_json_unicode_emoji_data_case(self): assert sent_data_file.getvalue() == expected_bytes # Low-level tests - - @classmethod - def _make_resumable_upload_responses(cls, size): - """Make a series of responses for a successful resumable upload.""" - from google import resumable_media - - resumable_url = "http://test.invalid?upload_id=and-then-there-was-1" - initial_response = cls._make_response( - http.client.OK, "", {"location": resumable_url} - ) - data_response = cls._make_response( - resumable_media.PERMANENT_REDIRECT, - "", - {"range": "bytes=0-{:d}".format(size - 1)}, - ) - final_response = cls._make_response( - http.client.OK, - json.dumps({"size": size}), - {"Content-Type": "application/json"}, - ) - return [initial_response, data_response, final_response] - - @staticmethod - def _make_transport(responses=None): - import google.auth.transport.requests - - transport = mock.create_autospec( - google.auth.transport.requests.AuthorizedSession, instance=True - ) - transport.request.side_effect = responses - return transport - - def test__do_resumable_upload(self): - file_obj = self._make_file_obj() - file_obj_len = len(file_obj.getvalue()) - transport = self._make_transport( - self._make_resumable_upload_responses(file_obj_len) - ) - client = self._make_client(transport) - - result = client._do_resumable_upload( - file_obj, self.EXPECTED_CONFIGURATION, None, None - ) - - content = result.content.decode("utf-8") - assert json.loads(content) == {"size": file_obj_len} - - # Verify that configuration data was passed in with the initial - # request. - transport.request.assert_any_call( - "POST", - mock.ANY, - data=json.dumps(self.EXPECTED_CONFIGURATION).encode("utf-8"), - headers=mock.ANY, - timeout=mock.ANY, - ) - - def test__do_resumable_upload_custom_project(self): - file_obj = self._make_file_obj() - file_obj_len = len(file_obj.getvalue()) - transport = self._make_transport( - self._make_resumable_upload_responses(file_obj_len) - ) - client = self._make_client(transport) - - result = client._do_resumable_upload( - file_obj, - self.EXPECTED_CONFIGURATION, - None, - None, - project="custom-project", - ) - - content = result.content.decode("utf-8") - assert json.loads(content) == {"size": file_obj_len} - - # Verify that configuration data was passed in with the initial - # request. - transport.request.assert_any_call( - "POST", - mock.ANY, - data=json.dumps(self.EXPECTED_CONFIGURATION).encode("utf-8"), - headers=mock.ANY, - timeout=mock.ANY, - ) - - initiation_url = next( - ( - call[0][1] - for call in transport.request.call_args_list - if call[0][0] == "POST" and "uploadType=resumable" in call[0][1] - ), - None, - ) # pragma: NO COVER - - assert initiation_url is not None - assert "projects/custom-project" in initiation_url - - def test__do_resumable_upload_custom_timeout(self): - file_obj = self._make_file_obj() - file_obj_len = len(file_obj.getvalue()) - transport = self._make_transport( - self._make_resumable_upload_responses(file_obj_len) - ) - client = self._make_client(transport) - - client._do_resumable_upload( - file_obj, self.EXPECTED_CONFIGURATION, num_retries=0, timeout=3.14 - ) - - # The timeout should be applied to all underlying calls. - for call_args in transport.request.call_args_list: - assert call_args[1].get("timeout") == 3.14 - - def test__do_multipart_upload(self): - transport = self._make_transport([self._make_response(http.client.OK)]) - client = self._make_client(transport) - file_obj = self._make_file_obj() - file_obj_len = len(file_obj.getvalue()) - - client._do_multipart_upload( - file_obj, self.EXPECTED_CONFIGURATION, file_obj_len, None, None - ) - - # Verify that configuration data was passed in with the initial - # request. - request_args = transport.request.mock_calls[0][2] - request_data = request_args["data"].decode("utf-8") - request_headers = request_args["headers"] - - request_content = email.message_from_string( - "Content-Type: {}\r\n{}".format( - request_headers["content-type"].decode("utf-8"), request_data - ) - ) - - # There should be two payloads: the configuration and the binary daya. - configuration_data = request_content.get_payload(0).get_payload() - binary_data = request_content.get_payload(1).get_payload() - - assert json.loads(configuration_data) == self.EXPECTED_CONFIGURATION - assert binary_data.encode("utf-8") == file_obj.getvalue() - - def test__do_multipart_upload_wrong_size(self): - client = self._make_client() - file_obj = self._make_file_obj() - file_obj_len = len(file_obj.getvalue()) - - with pytest.raises(ValueError): - client._do_multipart_upload(file_obj, {}, file_obj_len + 1, None, None) - - def test_schema_from_json_with_file_path(self): - from google.cloud.bigquery.schema import SchemaField - - file_content = """[ - { - "description": "quarter", - "mode": "REQUIRED", - "name": "qtr", - "type": "STRING" - }, - { - "description": "sales representative", - "mode": "NULLABLE", - "name": "rep", - "type": "STRING" - }, - { - "description": "total sales", - "mode": "NULLABLE", - "name": "sales", - "type": "FLOAT" - } - ]""" - - expected = [ - SchemaField("qtr", "STRING", "REQUIRED", description="quarter"), - SchemaField( - "rep", - "STRING", - "NULLABLE", - description="sales representative", - ), - SchemaField( - "sales", - "FLOAT", - "NULLABLE", - description="total sales", - ), - ] - - client = self._make_client() - mock_file_path = "/mocked/file.json" - - open_patch = mock.patch( - "builtins.open", new=mock.mock_open(read_data=file_content) - ) - - with open_patch as _mock_file: - actual = client.schema_from_json(mock_file_path) - _mock_file.assert_called_once_with(mock_file_path) - # This assert is to make sure __exit__ is called in the context - # manager that opens the file in the function - _mock_file().__exit__.assert_called_once() - - assert expected == actual - def test_schema_from_json_with_file_object(self): from google.cloud.bigquery.schema import SchemaField diff --git a/packages/google-cloud-bigquery/tests/unit/test_client_bigframes.py b/packages/google-cloud-bigquery/tests/unit/test_client_bigframes.py index 0fcc31e40d3b..0260da5e4bc8 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_client_bigframes.py +++ b/packages/google-cloud-bigquery/tests/unit/test_client_bigframes.py @@ -338,7 +338,7 @@ def test_query_and_wait_bigframes_with_jobs_insert_dry_run_no_callback(client): assert result.schema == [bigquery.SchemaField("_f0", "INTEGER")] -def test_query_and_wait_bigframes_with_query_retry_callbacks(client): +def test_query_and_wait_bigframes_with_query_retry_callbacks(client, global_time_lock): created = datetime.datetime( 2025, 8, 18, 10, 11, 12, 345000, tzinfo=datetime.timezone.utc ) diff --git a/packages/google-cloud-bigquery/tests/unit/test_client_resumable_media_upload.py b/packages/google-cloud-bigquery/tests/unit/test_client_resumable_media_upload.py new file mode 100644 index 000000000000..642c18d15c11 --- /dev/null +++ b/packages/google-cloud-bigquery/tests/unit/test_client_resumable_media_upload.py @@ -0,0 +1,433 @@ +# Copyright 2025 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from unittest import mock +import email +import http.client +import io +import json + +import pytest + +from google.cloud.bigquery.table import TableReference + +from .helpers import make_connection + + +PROJECT = "test-project" +TABLE_REF = TableReference.from_string(f"{PROJECT}.test_dataset.test_table") +EXPECTED_CONFIGURATION = { + "load": { + "destinationTable": { + "projectId": PROJECT, + "datasetId": "test_dataset", + "tableId": "test_table", + }, + "sourceFormat": "CSV", + } +} + + +@pytest.fixture(autouse=True) +def mock_sleep(monkeypatch): + sleep = mock.Mock() + monkeypatch.setattr("time.sleep", sleep) + + +def _make_credentials(): + import google.auth.credentials + + return mock.Mock(spec=google.auth.credentials.Credentials) + + +def _make_client(*args, **kw): + from google.cloud.bigquery.client import Client + + kw["credentials"] = _make_credentials() + kw["project"] = PROJECT + return Client(*args, **kw) + + +def _make_file_obj(contents=b"some data"): + return io.BytesIO(contents) + + +def _make_response(status_code, content=b"", headers=None): + response = mock.Mock(spec=["status_code", "content", "request", "headers"]) + response.status_code = status_code + response.content = content + response.headers = headers or {} + response.request = mock.Mock(spec=["headers"]) + return response + + +def _make_resumable_upload_responses(num_bytes): + # In a real scenario, the upload URL is returned in a 'Location' + # header. + return [ + _make_response( + http.client.OK, + headers={"location": "http://test.invalid/upload-id"}, + ), + _make_response( + http.client.OK, content=json.dumps({"size": num_bytes}).encode("utf-8") + ), + ] + + +def _make_transport(responses=None): + import google.auth.transport.requests + + transport = mock.create_autospec( + google.auth.transport.requests.AuthorizedSession, instance=True + ) + transport.request.side_effect = responses + return transport + + +def _mock_requests_response(status_code, headers, content=b""): + return mock.Mock( + content=content, + headers=headers, + status_code=status_code, + spec=["content", "headers", "status_code"], + ) + + +def _mock_transport(status_code, headers, content=b""): + fake_transport = mock.Mock(spec=["request"]) + fake_response = _mock_requests_response(status_code, headers, content=content) + fake_transport.request.return_value = fake_response + return fake_transport + + +def _initiate_resumable_upload_helper(num_retries=None, mtls=False): + from google.resumable_media.requests import ResumableUpload + from google.cloud.bigquery.client import _DEFAULT_CHUNKSIZE + from google.cloud.bigquery.client import _GENERIC_CONTENT_TYPE + from google.cloud.bigquery.client import _get_upload_headers + from google.cloud.bigquery.job import LoadJob + from google.cloud.bigquery.job import LoadJobConfig + from google.cloud.bigquery.job import SourceFormat + + # Create mocks to be checked for doing transport. + resumable_url = "http://test.invalid?upload_id=hey-you" + response_headers = {"location": resumable_url} + fake_transport = _mock_transport(http.client.OK, response_headers) + client = _make_client(_http=fake_transport) + conn = client._connection = make_connection() + if mtls: + conn.get_api_base_url_for_mtls = mock.Mock(return_value="https://foo.mtls") + + # Create some mock arguments and call the method under test. + data = b"goodbye gudbi gootbee" + stream = io.BytesIO(data) + config = LoadJobConfig() + config.source_format = SourceFormat.CSV + job = LoadJob(None, None, TABLE_REF, client, job_config=config) + metadata = job.to_api_repr() + upload, transport_out = client._initiate_resumable_upload( + stream, metadata, num_retries, None + ) + + # Check the returned values. + assert isinstance(upload, ResumableUpload) + + host_name = "https://foo.mtls" if mtls else "https://bigquery.googleapis.com" + upload_url = ( + f"{host_name}/upload/bigquery/v2/projects/{PROJECT}/jobs?uploadType=resumable" + ) + assert upload.upload_url == upload_url + expected_headers = _get_upload_headers(conn.user_agent) + assert upload._headers == expected_headers + assert not upload.finished + assert upload._chunk_size == _DEFAULT_CHUNKSIZE + assert upload._stream is stream + assert upload._total_bytes is None + assert upload._content_type == _GENERIC_CONTENT_TYPE + assert upload.resumable_url == resumable_url + + retry_strategy = upload._retry_strategy + assert retry_strategy.max_sleep == 64.0 + if num_retries is None: + assert retry_strategy.max_cumulative_retry == 600.0 + assert retry_strategy.max_retries is None + else: + assert retry_strategy.max_cumulative_retry is None + assert retry_strategy.max_retries == num_retries + assert transport_out is fake_transport + # Make sure we never read from the stream. + assert stream.tell() == 0 + + # Check the mocks. + request_headers = expected_headers.copy() + request_headers["x-upload-content-type"] = _GENERIC_CONTENT_TYPE + fake_transport.request.assert_called_once_with( + "POST", + upload_url, + data=json.dumps(metadata).encode("utf-8"), + headers=request_headers, + timeout=mock.ANY, + ) + + +def test__initiate_resumable_upload(): + _initiate_resumable_upload_helper() + + +def test__initiate_resumable_upload_mtls(): + _initiate_resumable_upload_helper(mtls=True) + + +def test_initiate_resumable_upload_with_retry(): + _initiate_resumable_upload_helper(num_retries=11) + + +def _do_multipart_upload_success_helper( + get_boundary, num_retries=None, project=None, mtls=False +): + from google.cloud.bigquery.client import _get_upload_headers + from google.cloud.bigquery.job import LoadJob + from google.cloud.bigquery.job import LoadJobConfig + from google.cloud.bigquery.job import SourceFormat + + fake_transport = _mock_transport(http.client.OK, {}) + client = _make_client(_http=fake_transport) + conn = client._connection = make_connection() + if mtls: + conn.get_api_base_url_for_mtls = mock.Mock(return_value="https://foo.mtls") + + if project is None: + project = PROJECT + + # Create some mock arguments. + data = b"Bzzzz-zap \x00\x01\xf4" + stream = io.BytesIO(data) + config = LoadJobConfig() + config.source_format = SourceFormat.CSV + job = LoadJob(None, None, TABLE_REF, client, job_config=config) + metadata = job.to_api_repr() + size = len(data) + + response = client._do_multipart_upload( + stream, metadata, size, num_retries, None, project=project + ) + + # Check the mocks and the returned value. + assert response is fake_transport.request.return_value + assert stream.tell() == size + get_boundary.assert_called_once_with() + + host_name = "https://foo.mtls" if mtls else "https://bigquery.googleapis.com" + upload_url = ( + f"{host_name}/upload/bigquery/v2/projects/{project}/jobs?uploadType=multipart" + ) + payload = ( + b"--==0==\r\n" + b"content-type: application/json; charset=UTF-8\r\n\r\n" + b"%(json_metadata)s" + b"\r\n" + b"--==0==\r\n" + b"content-type: */*\r\n\r\n" + b"%(data)s" + b"\r\n" + b"--==0==--" + ) % {b"json_metadata": json.dumps(metadata).encode("utf-8"), b"data": data} + + headers = _get_upload_headers(conn.user_agent) + headers["content-type"] = b'multipart/related; boundary="==0=="' + fake_transport.request.assert_called_once_with( + "POST", upload_url, data=payload, headers=headers, timeout=mock.ANY + ) + + +@mock.patch("google.resumable_media._upload.get_boundary", return_value=b"==0==") +def test__do_multipart_upload(get_boundary): + _do_multipart_upload_success_helper(get_boundary) + + +@mock.patch("google.resumable_media._upload.get_boundary", return_value=b"==0==") +def test__do_multipart_upload_mtls(get_boundary): + _do_multipart_upload_success_helper(get_boundary, mtls=True) + + +@mock.patch("google.resumable_media._upload.get_boundary", return_value=b"==0==") +def test_do_multipart_upload_with_retry(get_boundary): + _do_multipart_upload_success_helper(get_boundary, num_retries=8) + + +@mock.patch("google.resumable_media._upload.get_boundary", return_value=b"==0==") +def test__do_multipart_upload_with_custom_project(get_boundary): + _do_multipart_upload_success_helper(get_boundary, project="custom-project") + + +def test__do_resumable_upload(): + file_obj = _make_file_obj() + file_obj_len = len(file_obj.getvalue()) + transport = _make_transport(_make_resumable_upload_responses(file_obj_len)) + client = _make_client(_http=transport) + + result = client._do_resumable_upload(file_obj, EXPECTED_CONFIGURATION, None, None) + + content = result.content.decode("utf-8") + assert json.loads(content) == {"size": file_obj_len} + + transport.request.assert_any_call( + "POST", + mock.ANY, + data=json.dumps(EXPECTED_CONFIGURATION).encode("utf-8"), + headers=mock.ANY, + timeout=mock.ANY, + ) + + +def test__do_resumable_upload_custom_project(): + file_obj = _make_file_obj() + file_obj_len = len(file_obj.getvalue()) + transport = _make_transport(_make_resumable_upload_responses(file_obj_len)) + client = _make_client(_http=transport) + + result = client._do_resumable_upload( + file_obj, + EXPECTED_CONFIGURATION, + None, + None, + project="custom-project", + ) + + content = result.content.decode("utf-8") + assert json.loads(content) == {"size": file_obj_len} + + transport.request.assert_any_call( + "POST", + mock.ANY, + data=json.dumps(EXPECTED_CONFIGURATION).encode("utf-8"), + headers=mock.ANY, + timeout=mock.ANY, + ) + + initiation_url = next( + ( + call[0][1] + for call in transport.request.call_args_list + if call[0][0] == "POST" and "uploadType=resumable" in call[0][1] + ), + None, + ) + assert initiation_url is not None + assert "projects/custom-project" in initiation_url + + +def test__do_resumable_upload_custom_timeout(): + file_obj = _make_file_obj() + file_obj_len = len(file_obj.getvalue()) + transport = _make_transport(_make_resumable_upload_responses(file_obj_len)) + client = _make_client(_http=transport) + + client._do_resumable_upload( + file_obj, EXPECTED_CONFIGURATION, num_retries=0, timeout=3.14 + ) + + for call_args in transport.request.call_args_list: + assert call_args[1].get("timeout") == 3.14 + + +def test__do_multipart_upload_request_body(): + transport = _make_transport([_make_response(http.client.OK)]) + client = _make_client(_http=transport) + file_obj = _make_file_obj() + file_obj_len = len(file_obj.getvalue()) + + client._do_multipart_upload( + file_obj, EXPECTED_CONFIGURATION, file_obj_len, None, None + ) + + request_args = transport.request.mock_calls[0][2] + request_data = request_args["data"].decode("utf-8") + request_headers = request_args["headers"] + + request_content = email.message_from_string( + "Content-Type: {}\n{}".format( + request_headers["content-type"].decode("utf-8"), request_data + ) + ) + + configuration_data = request_content.get_payload(0).get_payload() + binary_data = request_content.get_payload(1).get_payload() + + assert json.loads(configuration_data) == EXPECTED_CONFIGURATION + assert binary_data.encode("utf-8") == file_obj.getvalue() + + +def test__do_multipart_upload_wrong_size(): + client = _make_client() + file_obj = _make_file_obj() + file_obj_len = len(file_obj.getvalue()) + + with pytest.raises(ValueError): + client._do_multipart_upload(file_obj, {}, file_obj_len + 1, None, None) + + +def test_schema_from_json_with_file_path(): + from google.cloud.bigquery.schema import SchemaField + + file_content = """ + [ + { + "description": "quarter", + "mode": "REQUIRED", + "name": "qtr", + "type": "STRING" + }, + { + "description": "sales representative", + "mode": "NULLABLE", + "name": "rep", + "type": "STRING" + }, + { + "description": "total sales", + "mode": "NULLABLE", + "name": "sales", + "type": "FLOAT" + } + ]""" + + expected = [ + SchemaField("qtr", "STRING", "REQUIRED", description="quarter"), + SchemaField( + "rep", + "STRING", + "NULLABLE", + description="sales representative", + ), + SchemaField( + "sales", + "FLOAT", + "NULLABLE", + description="total sales", + ), + ] + + client = _make_client() + mock_file_path = "/mocked/file.json" + + open_patch = mock.patch("builtins.open", new=mock.mock_open(read_data=file_content)) + + with open_patch as _mock_file: + actual = client.schema_from_json(mock_file_path) + _mock_file.assert_called_once_with(mock_file_path) + _mock_file.return_value.read.assert_called_once() + + assert expected == actual diff --git a/packages/google-cloud-bigquery/tests/unit/test_client_retry.py b/packages/google-cloud-bigquery/tests/unit/test_client_retry.py new file mode 100644 index 000000000000..6e49cc46400a --- /dev/null +++ b/packages/google-cloud-bigquery/tests/unit/test_client_retry.py @@ -0,0 +1,279 @@ +# Copyright 2025 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from unittest import mock + +import freezegun +import google.api_core.exceptions +from google.cloud.bigquery import job as bqjob +from google.cloud.bigquery.retry import DEFAULT_RETRY +from .helpers import make_connection + + +PROJECT = "test-project" + + +def _make_credentials(): + import google.auth.credentials + + return mock.Mock(spec=google.auth.credentials.Credentials) + + +def _make_client(*args, **kw): + from google.cloud.bigquery.client import Client + + return Client(*args, **kw) + + +def test_get_service_account_email_w_custom_retry(global_time_lock): + api_path = f"/projects/{PROJECT}/serviceAccount" + creds = _make_credentials() + http = object() + client = _make_client(project=PROJECT, credentials=creds, _http=http) + + resource = { + "kind": "bigquery#getServiceAccountResponse", + "email": "bq-123@bigquery-encryption.iam.gserviceaccount.com", + } + api_request_patcher = mock.patch.object( + client._connection, + "api_request", + side_effect=[ValueError, resource], + ) + + retry = DEFAULT_RETRY.with_deadline(1).with_predicate( + lambda exc: isinstance(exc, ValueError) + ) + + with api_request_patcher as fake_api_request: + with mock.patch( + "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" + ) as final_attributes: + service_account_email = client.get_service_account_email( + retry=retry, timeout=7.5 + ) + + final_attributes.assert_called_once_with({"path": api_path}, client, None) + assert service_account_email == "bq-123@bigquery-encryption.iam.gserviceaccount.com" + assert fake_api_request.call_args_list == [ + mock.call(method="GET", path=api_path, timeout=7.5), + mock.call(method="GET", path=api_path, timeout=7.5), # was retried once + ] + + +def test_call_api_applying_custom_retry_on_timeout(global_time_lock): + from concurrent.futures import TimeoutError + + creds = _make_credentials() + client = _make_client(project=PROJECT, credentials=creds) + + api_request_patcher = mock.patch.object( + client._connection, + "api_request", + side_effect=[TimeoutError, "result"], + ) + retry = DEFAULT_RETRY.with_deadline(1).with_predicate( + lambda exc: isinstance(exc, TimeoutError) + ) + + with api_request_patcher as fake_api_request: + result = client._call_api(retry, foo="bar") + + assert result == "result" + assert fake_api_request.call_args_list == [ + mock.call(foo="bar"), + mock.call(foo="bar"), + ] + + +def test_query_job_rpc_fail_w_conflict_random_id_job_fetch_retries_404( + global_time_lock, +): + """Regression test for https://github.com/googleapis/python-bigquery/issues/2134 + + Sometimes after a Conflict, the fetch fails with a 404, but we know + because of the conflict that really the job does exist. Retry until we + get the job status (or timeout). + """ + job_id = "abc123" + creds = _make_credentials() + http = object() + client = _make_client(project=PROJECT, credentials=creds, _http=http) + conn = client._connection = make_connection( + # We're mocking QueryJob._begin, so this is only going to be + # jobs.get requests and responses. + google.api_core.exceptions.TooManyRequests("this is retriable by default"), + google.api_core.exceptions.NotFound("we lost your job"), + google.api_core.exceptions.NotFound("we lost your job again, sorry"), + { + "jobReference": { + "projectId": PROJECT, + "location": "TESTLOC", + "jobId": job_id, + } + }, + ) + + job_create_error = google.api_core.exceptions.Conflict("Job already exists.") + job_begin_patcher = mock.patch.object( + bqjob.QueryJob, "_begin", side_effect=job_create_error + ) + job_id_patcher = mock.patch.object( + google.cloud.bigquery._job_helpers, + "make_job_id", + return_value=job_id, + ) + + with job_begin_patcher, job_id_patcher: + # If get job request fails there does exist a job + # with this ID already, retry 404 until we get it (or fails for a + # non-retriable reason, see other tests). + result = client.query("SELECT 1;", job_id=None) + + jobs_get_path = mock.call( + method="GET", + path=f"/projects/{PROJECT}/jobs/{job_id}", + query_params={ + "projection": "full", + }, + timeout=google.cloud.bigquery.retry.DEFAULT_GET_JOB_TIMEOUT, + ) + conn.api_request.assert_has_calls( + # Double-check that it was jobs.get that was called for each of our + # mocked responses. + [jobs_get_path] + * 4, + ) + assert result.job_id == job_id + + +def test_query_job_rpc_fail_w_conflict_random_id_job_fetch_retries_404_and_query_job_insert( + global_time_lock, +): + """Regression test for https://github.com/googleapis/python-bigquery/issues/2134 + + Sometimes after a Conflict, the fetch fails with a 404. If it keeps + failing with a 404, assume that the job actually doesn't exist. + """ + job_id_1 = "abc123" + job_id_2 = "xyz789" + creds = _make_credentials() + http = object() + client = _make_client(project=PROJECT, credentials=creds, _http=http) + + # We're mocking QueryJob._begin, so that the connection should only get + # jobs.get requests. + job_create_error = google.api_core.exceptions.Conflict("Job already exists.") + job_begin_patcher = mock.patch.object( + bqjob.QueryJob, "_begin", side_effect=job_create_error + ) + conn = client._connection = make_connection( + google.api_core.exceptions.NotFound("we lost your job again, sorry"), + { + "jobReference": { + "projectId": PROJECT, + "location": "TESTLOC", + "jobId": job_id_2, + } + }, + ) + + # Choose a small deadline so the 404 retries give up. + retry = google.cloud.bigquery.retry._DEFAULT_GET_JOB_CONFLICT_RETRY.with_deadline(1) + job_id_patcher = mock.patch.object( + google.cloud.bigquery._job_helpers, + "make_job_id", + side_effect=[job_id_1, job_id_2], + ) + retry_patcher = mock.patch.object( + google.cloud.bigquery.retry, + "_DEFAULT_GET_JOB_CONFLICT_RETRY", + retry, + ) + + with freezegun.freeze_time( + "2025-01-01 00:00:00", + # 10x the retry deadline to guarantee a timeout. + auto_tick_seconds=10, + ), job_begin_patcher, job_id_patcher, retry_patcher: + # If get job request fails there does exist a job + # with this ID already, retry 404 until we get it (or fails for a + # non-retriable reason, see other tests). + result = client.query("SELECT 1;", job_id=None) + + jobs_get_path_1 = mock.call( + method="GET", + path=f"/projects/{PROJECT}/jobs/{job_id_1}", + query_params={ + "projection": "full", + }, + timeout=google.cloud.bigquery.retry.DEFAULT_GET_JOB_TIMEOUT, + ) + jobs_get_path_2 = mock.call( + method="GET", + path=f"/projects/{PROJECT}/jobs/{job_id_2}", + query_params={ + "projection": "full", + }, + timeout=google.cloud.bigquery.retry.DEFAULT_GET_JOB_TIMEOUT, + ) + conn.api_request.assert_has_calls( + # Double-check that it was jobs.get that was called for each of our + # mocked responses. + [jobs_get_path_1, jobs_get_path_2], + ) + assert result.job_id == job_id_2 + + +def test_query_job_rpc_fail_w_conflict_random_id_job_fetch_retry(global_time_lock): + """Regression test for https://github.com/googleapis/python-bigquery/issues/2134 + + If we get a 409 conflict on jobs.insert, and we are using a random + job ID, we should retry by getting the job by ID. This test ensures that + if the get job by ID fails, we retry the whole sequence. + """ + from google.cloud.bigquery import job + + client = _make_client(project=PROJECT, credentials=_make_credentials()) + job_id = "some-random-job-id" + query_text = "SELECT 1" + job_config = job.QueryJobConfig() + job_config.use_legacy_sql = False + + job_resource = { + "jobReference": {"projectId": PROJECT, "jobId": job_id}, + "configuration": {"query": {"query": query_text}}, + "status": {"state": "DONE"}, + } + + conn = make_connection( + # First attempt at jobs.insert fails with a 409 + google.api_core.exceptions.Conflict("Job already exists."), + # First attempt at jobs.get fails with a 500 + google.api_core.exceptions.InternalServerError("get job failed"), + # Second attempt at jobs.insert succeeds + job_resource, + ) + client._connection = conn + + job_id_patcher = mock.patch.object( + google.cloud.bigquery._job_helpers, + "make_job_id", + return_value=job_id, + ) + + with job_id_patcher: + query_job = client.query(query_text, job_config=job_config, job_id=None) + + assert query_job.job_id == job_id diff --git a/packages/google-cloud-bigquery/tests/unit/test_job_retry.py b/packages/google-cloud-bigquery/tests/unit/test_job_retry.py index 7144c640bf3a..7343fed3daec 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_job_retry.py +++ b/packages/google-cloud-bigquery/tests/unit/test_job_retry.py @@ -80,7 +80,7 @@ ), ], ) -def test_retry_failed_jobs(sleep, reason, job_retry, result_retry): +def test_retry_failed_jobs(sleep, reason, job_retry, result_retry, global_time_lock): client = make_client() err = dict(reason=reason) conn = client._connection = make_connection( @@ -138,7 +138,7 @@ def test_retry_failed_jobs(sleep, reason, job_retry, result_retry): def test_retry_connection_error_with_default_retries_and_successful_first_job( - monkeypatch, client + monkeypatch, client, global_time_lock ): """ Make sure ConnectionError can be retried at `is_job_done` level, even if @@ -254,7 +254,7 @@ def make_job_id(*args, **kwargs): def test_query_retry_with_default_retry_and_ambiguous_errors_only_retries_with_failed_job( - client, monkeypatch + client, monkeypatch, global_time_lock ): """ Some errors like 'rateLimitExceeded' can be ambiguous. Make sure we only @@ -419,7 +419,7 @@ def make_job_id(*args, **kwargs): # - Pass None retry to `result`. @pytest.mark.parametrize("job_retry_on_query", ["Query", "Result"]) @mock.patch("time.sleep") -def test_disable_retry_failed_jobs(sleep, client, job_retry_on_query): +def test_disable_retry_failed_jobs(sleep, client, job_retry_on_query, global_time_lock): """ Test retry of job failures, as opposed to API-invocation failures. """ @@ -450,7 +450,7 @@ def api_request(method, path, query_params=None, data=None, **kw): @mock.patch("time.sleep") -def test_retry_failed_jobs_after_retry_failed(sleep, client): +def test_retry_failed_jobs_after_retry_failed(sleep, client, global_time_lock): """ If at first you don't succeed, maybe you will later. :) """ @@ -508,7 +508,7 @@ def api_request(method, path, query_params=None, data=None, **kw): assert job.job_id != orig_job_id -def test_raises_on_job_retry_on_query_with_non_retryable_jobs(client): +def test_raises_on_job_retry_on_query_with_non_retryable_jobs(client, global_time_lock): with pytest.raises( TypeError, match=( @@ -520,7 +520,9 @@ def test_raises_on_job_retry_on_query_with_non_retryable_jobs(client): client.query("select 42", job_id=42, job_retry=google.api_core.retry.Retry()) -def test_raises_on_job_retry_on_result_with_non_retryable_jobs(client): +def test_raises_on_job_retry_on_result_with_non_retryable_jobs( + client, global_time_lock +): client._connection = make_connection({}) with pytest.warns( @@ -542,7 +544,7 @@ def test_raises_on_job_retry_on_result_with_non_retryable_jobs(client): job.result(job_retry=google.api_core.retry.Retry()) -def test_query_and_wait_retries_job_for_DDL_queries(): +def test_query_and_wait_retries_job_for_DDL_queries(global_time_lock): """ Specific test for retrying DDL queries with "jobRateLimitExceeded" error: https://github.com/googleapis/python-bigquery/issues/1790 From f6ee54bfa5d8351ff83390c932f2b2c2e818dde3 Mon Sep 17 00:00:00 2001 From: "release-please[bot]" <55107282+release-please[bot]@users.noreply.github.com> Date: Wed, 20 Aug 2025 15:03:23 -0500 Subject: [PATCH 1990/2016] chore(main): release 3.36.0 (#2255) Co-authored-by: release-please[bot] <55107282+release-please[bot]@users.noreply.github.com> --- packages/google-cloud-bigquery/CHANGELOG.md | 14 ++++++++++++++ .../google/cloud/bigquery/version.py | 2 +- 2 files changed, 15 insertions(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/CHANGELOG.md b/packages/google-cloud-bigquery/CHANGELOG.md index 374448a5e91f..62352c344bb6 100644 --- a/packages/google-cloud-bigquery/CHANGELOG.md +++ b/packages/google-cloud-bigquery/CHANGELOG.md @@ -5,6 +5,20 @@ [1]: https://pypi.org/project/google-cloud-bigquery/#history +## [3.36.0](https://github.com/googleapis/python-bigquery/compare/v3.35.1...v3.36.0) (2025-08-20) + + +### Features + +* Add created/started/ended properties to RowIterator. ([#2260](https://github.com/googleapis/python-bigquery/issues/2260)) ([0a95b24](https://github.com/googleapis/python-bigquery/commit/0a95b24192395cc3ccf801aa9bc318999873a2bf)) +* Retry query jobs if `jobBackendError` or `jobInternalError` are encountered ([#2256](https://github.com/googleapis/python-bigquery/issues/2256)) ([3deff1d](https://github.com/googleapis/python-bigquery/commit/3deff1d963980800e8b79fa3aaf5b712d4fd5062)) + + +### Documentation + +* Add a TROUBLESHOOTING.md file with tips for logging ([#2262](https://github.com/googleapis/python-bigquery/issues/2262)) ([b684832](https://github.com/googleapis/python-bigquery/commit/b68483227693ea68f6b12eacca2be1803cffb1d1)) +* Update README to break infinite redirect loop ([#2254](https://github.com/googleapis/python-bigquery/issues/2254)) ([8f03166](https://github.com/googleapis/python-bigquery/commit/8f031666114a826da2ad965f8ecd4727466cb480)) + ## [3.35.1](https://github.com/googleapis/python-bigquery/compare/v3.35.0...v3.35.1) (2025-07-21) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/version.py b/packages/google-cloud-bigquery/google/cloud/bigquery/version.py index d565bc46e0fd..a8f4c8e14972 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/version.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/version.py @@ -12,4 +12,4 @@ # See the License for the specific language governing permissions and # limitations under the License. -__version__ = "3.35.1" +__version__ = "3.36.0" From ec967c543e2213e27e1a3a7b843051d478756fa6 Mon Sep 17 00:00:00 2001 From: shollyman Date: Tue, 26 Aug 2025 10:52:30 -0500 Subject: [PATCH 1991/2016] feat: updates to fastpath query execution (#2268) This PR updates query handling to allow base config properties like job timeout, reservation, and a preview max slots field to leverage the faster path (e.g. using jobs.query rather than jobs.insert). --- .../google/cloud/bigquery/_job_helpers.py | 3 ++ .../google/cloud/bigquery/job/base.py | 31 ++++++++++++++ .../tests/unit/job/test_base.py | 41 +++++++++++++++++++ .../tests/unit/job/test_query_config.py | 5 +++ .../tests/unit/test__job_helpers.py | 28 +++++++++++++ 5 files changed, 108 insertions(+) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/_job_helpers.py b/packages/google-cloud-bigquery/google/cloud/bigquery/_job_helpers.py index 6fd561f8c3a0..27e90246fdfe 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/_job_helpers.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/_job_helpers.py @@ -658,6 +658,9 @@ def _supported_by_jobs_query(request_body: Dict[str, Any]) -> bool: "requestId", "createSession", "writeIncrementalResults", + "jobTimeoutMs", + "reservation", + "maxSlots", } unsupported_keys = request_keys - keys_allowlist diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/job/base.py b/packages/google-cloud-bigquery/google/cloud/bigquery/job/base.py index f007b93413ef..1344082be77d 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/job/base.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/job/base.py @@ -224,6 +224,37 @@ def job_timeout_ms(self, value): else: self._properties.pop("jobTimeoutMs", None) + @property + def max_slots(self) -> Optional[int]: + """The maximum rate of slot consumption to allow for this job. + + If set, the number of slots used to execute the job will be throttled + to try and keep its slot consumption below the requested rate. + This feature is not generally available. + """ + + max_slots = self._properties.get("maxSlots") + if max_slots is not None: + if isinstance(max_slots, str): + return int(max_slots) + if isinstance(max_slots, int): + return max_slots + return None + + @max_slots.setter + def max_slots(self, value): + try: + value = _int_or_none(value) + except ValueError as err: + raise ValueError("Pass an int for max slots, e.g. 100").with_traceback( + err.__traceback__ + ) + + if value is not None: + self._properties["maxSlots"] = str(value) + else: + self._properties.pop("maxSlots", None) + @property def reservation(self): """str: Optional. The reservation that job would use. diff --git a/packages/google-cloud-bigquery/tests/unit/job/test_base.py b/packages/google-cloud-bigquery/tests/unit/job/test_base.py index f5861f645a1b..4209048205a5 100644 --- a/packages/google-cloud-bigquery/tests/unit/job/test_base.py +++ b/packages/google-cloud-bigquery/tests/unit/job/test_base.py @@ -1276,3 +1276,44 @@ def test_reservation_setter(self): job_config = self._make_one() job_config.reservation = "foo" self.assertEqual(job_config._properties["reservation"], "foo") + + def test_max_slots_miss(self): + job_config = self._make_one() + self.assertEqual(job_config.max_slots, None) + + def test_max_slots_set_and_clear(self): + job_config = self._make_one() + job_config.max_slots = 14 + self.assertEqual(job_config.max_slots, 14) + job_config.max_slots = None + self.assertEqual(job_config.max_slots, None) + + def test_max_slots_hit_str(self): + job_config = self._make_one() + job_config._properties["maxSlots"] = "4" + self.assertEqual(job_config.max_slots, 4) + + def test_max_slots_hit_int(self): + job_config = self._make_one() + job_config._properties["maxSlots"] = int(3) + self.assertEqual(job_config.max_slots, 3) + + def test_max_slots_hit_invalid(self): + job_config = self._make_one() + job_config._properties["maxSlots"] = object() + self.assertEqual(job_config.max_slots, None) + + def test_max_slots_update_in_place(self): + job_config = self._make_one() + job_config.max_slots = 45 # update in place + self.assertEqual(job_config.max_slots, 45) + + def test_max_slots_setter_invalid(self): + job_config = self._make_one() + with self.assertRaises(ValueError): + job_config.max_slots = "foo" + + def test_max_slots_setter(self): + job_config = self._make_one() + job_config.max_slots = 123 + self.assertEqual(job_config._properties["maxSlots"], "123") diff --git a/packages/google-cloud-bigquery/tests/unit/job/test_query_config.py b/packages/google-cloud-bigquery/tests/unit/job/test_query_config.py index e0878d067b67..a63a14b73f46 100644 --- a/packages/google-cloud-bigquery/tests/unit/job/test_query_config.py +++ b/packages/google-cloud-bigquery/tests/unit/job/test_query_config.py @@ -172,6 +172,11 @@ def test_incremental_results(self): config.write_incremental_results = True self.assertEqual(config.write_incremental_results, True) + def test_max_slots(self): + config = self._get_target_class()() + config.max_slots = 99 + self.assertEqual(config.max_slots, 99) + def test_create_session(self): config = self._get_target_class()() self.assertIsNone(config.create_session) diff --git a/packages/google-cloud-bigquery/tests/unit/test__job_helpers.py b/packages/google-cloud-bigquery/tests/unit/test__job_helpers.py index 1f543f0330b9..10cbefe1371c 100644 --- a/packages/google-cloud-bigquery/tests/unit/test__job_helpers.py +++ b/packages/google-cloud-bigquery/tests/unit/test__job_helpers.py @@ -200,6 +200,19 @@ def make_query_response( make_query_request({"writeIncrementalResults": True}), id="job_config-with-incremental-results", ), + pytest.param( + job_query.QueryJobConfig( + reservation="foo", + max_slots=100, + ), + make_query_request( + { + "maxSlots": "100", + "reservation": "foo", + } + ), + id="job_config-with-reservation-and-slots", + ), ), ) def test__to_query_request(job_config, expected): @@ -1048,6 +1061,21 @@ def test_make_job_id_w_job_id_overrides_prefix(): True, id="write_incremental_results", ), + pytest.param( + job_query.QueryJobConfig(job_timeout_ms=1000), + True, + id="job_timeout_ms", + ), + pytest.param( + job_query.QueryJobConfig(reservation="foo"), + True, + id="reservation", + ), + pytest.param( + job_query.QueryJobConfig(max_slots=20), + True, + id="max_slots", + ), ), ) def test_supported_by_jobs_query_from_queryjobconfig( From 415242e2766ec086d212f64a7b8e871d1a215b0a Mon Sep 17 00:00:00 2001 From: Mend Renovate Date: Wed, 3 Sep 2025 18:27:17 +0200 Subject: [PATCH 1992/2016] chore(deps): update all dependencies (#2259) --- .../samples/desktopapp/requirements.txt | 2 +- .../samples/geography/requirements.txt | 10 +++++----- .../samples/magics/requirements.txt | 6 +++--- .../samples/notebooks/requirements.txt | 10 +++++----- .../samples/snippets/requirements.txt | 2 +- 5 files changed, 15 insertions(+), 15 deletions(-) diff --git a/packages/google-cloud-bigquery/samples/desktopapp/requirements.txt b/packages/google-cloud-bigquery/samples/desktopapp/requirements.txt index e7a02eca5dd7..f86e57e5c265 100644 --- a/packages/google-cloud-bigquery/samples/desktopapp/requirements.txt +++ b/packages/google-cloud-bigquery/samples/desktopapp/requirements.txt @@ -1,2 +1,2 @@ -google-cloud-bigquery==3.35.1 +google-cloud-bigquery==3.36.0 google-auth-oauthlib==1.2.2 diff --git a/packages/google-cloud-bigquery/samples/geography/requirements.txt b/packages/google-cloud-bigquery/samples/geography/requirements.txt index fa54cc2297ee..c2bd74bedb89 100644 --- a/packages/google-cloud-bigquery/samples/geography/requirements.txt +++ b/packages/google-cloud-bigquery/samples/geography/requirements.txt @@ -1,7 +1,7 @@ attrs==25.3.0 certifi==2025.8.3 cffi==1.17.1 -charset-normalizer==3.4.2 +charset-normalizer==3.4.3 click===8.1.8; python_version == '3.9' click==8.2.1; python_version >= '3.10' click-plugins==1.1.1.2 @@ -13,7 +13,7 @@ geopandas===1.0.1; python_version <= '3.9' geopandas==1.1.1; python_version >= '3.10' google-api-core==2.25.1 google-auth==2.40.3 -google-cloud-bigquery==3.35.1 +google-cloud-bigquery==3.36.0 google-cloud-bigquery-storage==2.32.0 google-cloud-core==2.4.3 google-crc32c==1.7.1 @@ -24,7 +24,7 @@ idna==3.10 munch==4.0.0 mypy-extensions==1.1.0 packaging==25.0 -pandas==2.3.1 +pandas==2.3.2 proto-plus==1.26.1 pyarrow==21.0.0 pyasn1==0.6.1 @@ -34,11 +34,11 @@ pyparsing==3.2.3 python-dateutil==2.9.0.post0 pytz==2025.2 PyYAML==6.0.2 -requests==2.32.4 +requests==2.32.5 rsa==4.9.1 Shapely===2.0.7; python_version == '3.9' Shapely==2.1.1; python_version >= '3.10' six==1.17.0 -typing-extensions==4.14.1 +typing-extensions==4.15.0 typing-inspect==0.9.0 urllib3==2.5.0 diff --git a/packages/google-cloud-bigquery/samples/magics/requirements.txt b/packages/google-cloud-bigquery/samples/magics/requirements.txt index e7230053c622..7b4f84e8e06d 100644 --- a/packages/google-cloud-bigquery/samples/magics/requirements.txt +++ b/packages/google-cloud-bigquery/samples/magics/requirements.txt @@ -1,6 +1,6 @@ -bigquery_magics==0.10.2 +bigquery_magics==0.10.3 db-dtypes==1.4.3 -google.cloud.bigquery==3.35.1 +google.cloud.bigquery==3.36.0 google-cloud-bigquery-storage==2.32.0 ipython===8.18.1 -pandas==2.3.1 +pandas==2.3.2 diff --git a/packages/google-cloud-bigquery/samples/notebooks/requirements.txt b/packages/google-cloud-bigquery/samples/notebooks/requirements.txt index 829f08f472e4..dc22903c7f36 100644 --- a/packages/google-cloud-bigquery/samples/notebooks/requirements.txt +++ b/packages/google-cloud-bigquery/samples/notebooks/requirements.txt @@ -1,9 +1,9 @@ -bigquery-magics==0.10.2 +bigquery-magics==0.10.3 db-dtypes==1.4.3 -google-cloud-bigquery==3.35.1 +google-cloud-bigquery==3.36.0 google-cloud-bigquery-storage==2.32.0 ipython===8.18.1; python_version == '3.9' -ipython==9.4.0; python_version >= '3.10' +ipython==9.5.0; python_version >= '3.10' matplotlib===3.9.2; python_version == '3.9' -matplotlib==3.10.5; python_version >= '3.10' -pandas==2.3.1 +matplotlib==3.10.6; python_version >= '3.10' +pandas==2.3.2 diff --git a/packages/google-cloud-bigquery/samples/snippets/requirements.txt b/packages/google-cloud-bigquery/samples/snippets/requirements.txt index afa62b6b8e6a..23da68d60478 100644 --- a/packages/google-cloud-bigquery/samples/snippets/requirements.txt +++ b/packages/google-cloud-bigquery/samples/snippets/requirements.txt @@ -1,2 +1,2 @@ # samples/snippets should be runnable with no "extras" -google-cloud-bigquery==3.35.1 +google-cloud-bigquery==3.36.0 From 27b1f9f578bafc3fa6e78608c47216e4007d4cf3 Mon Sep 17 00:00:00 2001 From: Chalmer Lowe Date: Wed, 3 Sep 2025 14:30:49 -0400 Subject: [PATCH 1993/2016] bug: updates `__eq__` comparison on TableConstraint (#2274) * bug: updates __eq__ comparison on TableConstraint * updates tests * moves test out of class to accommodate pytest params --- .../google/cloud/bigquery/table.py | 6 +- .../tests/unit/test_table.py | 144 +++++++++--------- 2 files changed, 71 insertions(+), 79 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py index 219b314678c6..5efcb19588c6 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py @@ -3574,9 +3574,9 @@ def __init__( def __eq__(self, other): if not isinstance(other, TableConstraints) and other is not None: raise TypeError("The value provided is not a BigQuery TableConstraints.") - return ( - self.primary_key == other.primary_key if other.primary_key else None - ) and (self.foreign_keys == other.foreign_keys if other.foreign_keys else None) + return self.primary_key == ( + other.primary_key if other.primary_key else None + ) and self.foreign_keys == (other.foreign_keys if other.foreign_keys else None) @classmethod def from_api_repr(cls, resource: Dict[str, Any]) -> "TableConstraints": diff --git a/packages/google-cloud-bigquery/tests/unit/test_table.py b/packages/google-cloud-bigquery/tests/unit/test_table.py index eb2c8d9ec6a4..af31d116b7bb 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_table.py +++ b/packages/google-cloud-bigquery/tests/unit/test_table.py @@ -6322,82 +6322,6 @@ def test_constructor_explicit_with_none(self): self.assertIsNone(table_constraint.primary_key) self.assertIsNone(table_constraint.foreign_keys) - def test__eq__primary_key_mismatch(self): - from google.cloud.bigquery.table import ( - PrimaryKey, - ForeignKey, - TableReference, - ColumnReference, - ) - - foriegn_keys = [ - ForeignKey( - name="my_fk_id", - referenced_table=TableReference.from_string( - "my-project.my-dataset.my-table" - ), - column_references=[ - ColumnReference(referencing_column="id", referenced_column="id"), - ], - ), - ] - - table_constraint = self._make_one( - primary_key=PrimaryKey(columns=["my_pk_id"]), - foreign_keys=foriegn_keys, - ) - other_table_constraint = self._make_one( - primary_key=PrimaryKey(columns=["my_other_pk_id"]), - foreign_keys=foriegn_keys, - ) - - self.assertNotEqual(table_constraint, other_table_constraint) - - def test__eq__foreign_keys_mismatch(self): - from google.cloud.bigquery.table import ( - PrimaryKey, - ForeignKey, - TableReference, - ColumnReference, - ) - - primary_key = PrimaryKey(columns=["my_pk_id"]) - - table_constraint = self._make_one( - primary_key=primary_key, - foreign_keys=[ - ForeignKey( - name="my_fk_id", - referenced_table=TableReference.from_string( - "my-project.my-dataset.my-table" - ), - column_references=[ - ColumnReference( - referencing_column="id", referenced_column="id" - ), - ], - ), - ], - ) - other_table_constraint = self._make_one( - primary_key=primary_key, - foreign_keys=[ - ForeignKey( - name="my_other_fk_id", - referenced_table=TableReference.from_string( - "my-project.my-dataset.my-other-table" - ), - column_references=[ - ColumnReference( - referencing_column="other_id", referenced_column="other_id" - ), - ], - ), - ], - ) - - self.assertNotEqual(table_constraint, other_table_constraint) - def test__eq__other_type(self): from google.cloud.bigquery.table import ( PrimaryKey, @@ -6615,6 +6539,74 @@ def test_to_api_repr_empty_constraints(self): self.assertEqual(instance.to_api_repr(), expected) +@pytest.mark.parametrize( + "self_pk_name,self_fk_name,other_pk_name,other_fk_name,expected_equal", + [ + (None, None, None, None, True), + ("pkey", None, "pkey", None, True), + ("pkey", "fkey", "pkey", "fkey", True), + (None, "fkey", None, "fkey", True), + ("pkey", None, "pkey_no_match", None, False), + ("pkey", "fkey", "pkey_no_match", "fkey_no_match", False), + (None, "fkey", None, "fkey_no_match", False), + ("pkey", "fkey", "pkey_no_match", "fkey", False), + ("pkey", "fkey", "pkey", "fkey_no_match", False), + ], +) +def test_table_constraint_eq_parametrized( + self_pk_name, self_fk_name, other_pk_name, other_fk_name, expected_equal +): + # Imports are placed here to ensure they are self-contained for this example. + # In a real test file, they would likely be at the top of the file. + from google.cloud.bigquery.table import ( + ColumnReference, + ForeignKey, + PrimaryKey, + TableReference, + TableConstraints, + ) + + # Helper function to create a PrimaryKey object or None + def _create_primary_key(name): + if name is None: + return None + return PrimaryKey(columns=[name]) + + # Helper function to create a list of ForeignKey objects or None + def _create_foreign_keys(name): + if name is None: + return None + # Using a generic referenced_table and column_references for simplicity + # The 'name' parameter ensures different ForeignKey objects for different names + return [ + ForeignKey( + name=name, + referenced_table=TableReference.from_string( + f"my-project.my-dataset.{name}_referenced_table" + ), + column_references=[ + ColumnReference( + referencing_column=f"{name}_ref_col", + referenced_column=f"{name}_pk_col", + ) + ], + ) + ] + + # Create the two TableConstraints instances for comparison + tc1 = TableConstraints( + primary_key=_create_primary_key(self_pk_name), + foreign_keys=_create_foreign_keys(self_fk_name), + ) + tc2 = TableConstraints( + primary_key=_create_primary_key(other_pk_name), + foreign_keys=_create_foreign_keys(other_fk_name), + ) + + # Assert the equality based on the expected outcome + assert (tc1 == tc2) == expected_equal + + class TestExternalCatalogTableOptions: PROJECT = "test-project" DATASET_ID = "test_dataset" From 8f4e68350d832f05f4f1e0c4aa3d8b9dd06c7366 Mon Sep 17 00:00:00 2001 From: shollyman Date: Thu, 4 Sep 2025 13:25:49 -0500 Subject: [PATCH 1994/2016] docs: clarify the api_method arg for client.query() (#2277) * docs: clarify the api_method arg for client.query() --- packages/google-cloud-bigquery/google/cloud/bigquery/client.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py index 4ca2cb4283dc..ea592852a1a5 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py @@ -3519,7 +3519,8 @@ def query( specified here becomes the default ``job_retry`` for ``result()``, where it can also be specified. api_method (Union[str, enums.QueryApiMethod]): - Method with which to start the query job. + Method with which to start the query job. By default, + the jobs.insert API is used for starting a query. See :class:`google.cloud.bigquery.enums.QueryApiMethod` for details on the difference between the query start methods. From 0600edc4183125939f87f24efa295a1edf88891d Mon Sep 17 00:00:00 2001 From: Lingqing Gan Date: Sat, 6 Sep 2025 10:16:44 -0700 Subject: [PATCH 1995/2016] fix: remove deepcopy while setting properties for _QueryResults (#2280) --- packages/google-cloud-bigquery/google/cloud/bigquery/query.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/query.py b/packages/google-cloud-bigquery/google/cloud/bigquery/query.py index 7f70f6a2a87a..170ed2976849 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/query.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/query.py @@ -1400,7 +1400,7 @@ def _set_properties(self, api_response): api_response (Dict): Response returned from an API call """ self._properties.clear() - self._properties.update(copy.deepcopy(api_response)) + self._properties.update(api_response) def _query_param_from_api_repr(resource): From 0ee271fa87ca1ac9ba572e85ee5725d2a562c2ca Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tim=20Swe=C3=B1a=20=28Swast=29?= Date: Mon, 8 Sep 2025 09:58:27 -0500 Subject: [PATCH 1996/2016] docs: clarify that the presence of `XyzJob.errors` doesn't necessarily mean that the job has not completed or was unsuccessful (#2278) Internal issue b/440349994 --- .../google/cloud/bigquery/job/base.py | 22 ++++++++++++++++--- 1 file changed, 19 insertions(+), 3 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/job/base.py b/packages/google-cloud-bigquery/google/cloud/bigquery/job/base.py index 1344082be77d..9b7ddb82da21 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/job/base.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/job/base.py @@ -693,7 +693,12 @@ def transaction_info(self) -> Optional[TransactionInfo]: @property def error_result(self): - """Error information about the job as a whole. + """Output only. Final error result of the job. + + If present, indicates that the job has completed and was unsuccessful. + + See: + https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobStatus.FIELDS.error_result Returns: Optional[Mapping]: the error information (None until set from the server). @@ -704,7 +709,13 @@ def error_result(self): @property def errors(self): - """Information about individual errors generated by the job. + """Output only. The first errors encountered during the running of the job. + + The final message includes the number of errors that caused the process to stop. + Errors here do not necessarily mean that the job has not completed or was unsuccessful. + + See: + https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobStatus.FIELDS.errors Returns: Optional[List[Mapping]]: @@ -716,7 +727,12 @@ def errors(self): @property def state(self): - """Status of the job. + """Output only. Running state of the job. + + Valid states include 'PENDING', 'RUNNING', and 'DONE'. + + See: + https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobStatus.FIELDS.state Returns: Optional[str]: From 1e86b76873bf73833b408b0db9fe8a1a87bd8986 Mon Sep 17 00:00:00 2001 From: "release-please[bot]" <55107282+release-please[bot]@users.noreply.github.com> Date: Tue, 9 Sep 2025 10:01:33 -0700 Subject: [PATCH 1997/2016] chore(main): release 3.37.0 (#2269) Co-authored-by: release-please[bot] <55107282+release-please[bot]@users.noreply.github.com> --- packages/google-cloud-bigquery/CHANGELOG.md | 18 ++++++++++++++++++ .../google/cloud/bigquery/version.py | 2 +- 2 files changed, 19 insertions(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/CHANGELOG.md b/packages/google-cloud-bigquery/CHANGELOG.md index 62352c344bb6..fe721dfde2fc 100644 --- a/packages/google-cloud-bigquery/CHANGELOG.md +++ b/packages/google-cloud-bigquery/CHANGELOG.md @@ -5,6 +5,24 @@ [1]: https://pypi.org/project/google-cloud-bigquery/#history +## [3.37.0](https://github.com/googleapis/python-bigquery/compare/v3.36.0...v3.37.0) (2025-09-08) + + +### Features + +* Updates to fastpath query execution ([#2268](https://github.com/googleapis/python-bigquery/issues/2268)) ([ef2740a](https://github.com/googleapis/python-bigquery/commit/ef2740a158199633b5543a7b6eb19587580792cd)) + + +### Bug Fixes + +* Remove deepcopy while setting properties for _QueryResults ([#2280](https://github.com/googleapis/python-bigquery/issues/2280)) ([33ea296](https://github.com/googleapis/python-bigquery/commit/33ea29616c06a2e2a106a785d216e784737ae386)) + + +### Documentation + +* Clarify that the presence of `XyzJob.errors` doesn't necessarily mean that the job has not completed or was unsuccessful ([#2278](https://github.com/googleapis/python-bigquery/issues/2278)) ([6e88d7d](https://github.com/googleapis/python-bigquery/commit/6e88d7dbe42ebfc35986da665d656b49ac481db4)) +* Clarify the api_method arg for client.query() ([#2277](https://github.com/googleapis/python-bigquery/issues/2277)) ([8a13c12](https://github.com/googleapis/python-bigquery/commit/8a13c12905ffcb3dbb6086a61df37556f0c2cd31)) + ## [3.36.0](https://github.com/googleapis/python-bigquery/compare/v3.35.1...v3.36.0) (2025-08-20) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/version.py b/packages/google-cloud-bigquery/google/cloud/bigquery/version.py index a8f4c8e14972..aa24ae04e80d 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/version.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/version.py @@ -12,4 +12,4 @@ # See the License for the specific language governing permissions and # limitations under the License. -__version__ = "3.36.0" +__version__ = "3.37.0" From 8541da0ff28d84df510be3b59d5577f060b93ae6 Mon Sep 17 00:00:00 2001 From: shollyman Date: Sat, 13 Sep 2025 08:02:19 -0700 Subject: [PATCH 1998/2016] feat: add additional query stats (#2270) * feat: add additional query stats This PR adds support for incremental query stats. --- .../google/cloud/bigquery/job/__init__.py | 2 + .../google/cloud/bigquery/job/query.py | 67 +++++++++++++++++++ .../tests/unit/job/test_query.py | 17 +++++ .../tests/unit/job/test_query_stats.py | 61 +++++++++++++++++ 4 files changed, 147 insertions(+) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/job/__init__.py b/packages/google-cloud-bigquery/google/cloud/bigquery/job/__init__.py index f51311b0bb01..4cda6596516c 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/job/__init__.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/job/__init__.py @@ -39,6 +39,7 @@ from google.cloud.bigquery.job.query import QueryPlanEntryStep from google.cloud.bigquery.job.query import ScriptOptions from google.cloud.bigquery.job.query import TimelineEntry +from google.cloud.bigquery.job.query import IncrementalResultStats from google.cloud.bigquery.enums import Compression from google.cloud.bigquery.enums import CreateDisposition from google.cloud.bigquery.enums import DestinationFormat @@ -84,4 +85,5 @@ "SourceFormat", "TransactionInfo", "WriteDisposition", + "IncrementalResultStats", ] diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/job/query.py b/packages/google-cloud-bigquery/google/cloud/bigquery/job/query.py index b377f979dadd..38b8a71488ca 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/job/query.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/job/query.py @@ -197,6 +197,66 @@ def from_api_repr(cls, stats: Dict[str, str]) -> "DmlStats": return cls(*args) +class IncrementalResultStats: + """IncrementalResultStats provides information about incremental query execution.""" + + def __init__(self): + self._properties = {} + + @classmethod + def from_api_repr(cls, resource) -> "IncrementalResultStats": + """Factory: construct instance from the JSON repr. + + Args: + resource(Dict[str: object]): + IncrementalResultStats representation returned from API. + + Returns: + google.cloud.bigquery.job.IncrementalResultStats: + stats parsed from ``resource``. + """ + entry = cls() + entry._properties = resource + return entry + + @property + def disabled_reason(self): + """Optional[string]: Reason why incremental results were not + written by the query. + """ + return _helpers._str_or_none(self._properties.get("disabledReason")) + + @property + def result_set_last_replace_time(self): + """Optional[datetime]: The time at which the result table's contents + were completely replaced. May be absent if no results have been written + or the query has completed.""" + from google.cloud._helpers import _rfc3339_nanos_to_datetime + + value = self._properties.get("resultSetLastReplaceTime") + if value: + try: + return _rfc3339_nanos_to_datetime(value) + except ValueError: + pass + return None + + @property + def result_set_last_modify_time(self): + """Optional[datetime]: The time at which the result table's contents + were modified. May be absent if no results have been written or the + query has completed.""" + from google.cloud._helpers import _rfc3339_nanos_to_datetime + + value = self._properties.get("resultSetLastModifyTime") + if value: + try: + return _rfc3339_nanos_to_datetime(value) + except ValueError: + pass + return None + + class IndexUnusedReason(typing.NamedTuple): """Reason about why no search index was used in the search query (or sub-query). @@ -1339,6 +1399,13 @@ def bi_engine_stats(self) -> Optional[BiEngineStats]: else: return BiEngineStats.from_api_repr(stats) + @property + def incremental_result_stats(self) -> Optional[IncrementalResultStats]: + stats = self._job_statistics().get("incrementalResultStats") + if stats is None: + return None + return IncrementalResultStats.from_api_repr(stats) + def _blocking_poll(self, timeout=None, **kwargs): self._done_timeout = timeout self._transport_timeout = timeout diff --git a/packages/google-cloud-bigquery/tests/unit/job/test_query.py b/packages/google-cloud-bigquery/tests/unit/job/test_query.py index ef64295989e8..4a6771c4639b 100644 --- a/packages/google-cloud-bigquery/tests/unit/job/test_query.py +++ b/packages/google-cloud-bigquery/tests/unit/job/test_query.py @@ -838,6 +838,23 @@ def test_search_stats(self): assert isinstance(job.search_stats, SearchStats) assert job.search_stats.mode == "INDEX_USAGE_MODE_UNSPECIFIED" + def test_incremental_result_stats(self): + from google.cloud.bigquery.job.query import IncrementalResultStats + + client = _make_client(project=self.PROJECT) + job = self._make_one(self.JOB_ID, self.QUERY, client) + assert job.incremental_result_stats is None + + statistics = job._properties["statistics"] = {} + assert job.incremental_result_stats is None + + query_stats = statistics["query"] = {} + assert job.incremental_result_stats is None + + query_stats["incrementalResultStats"] = {"disabledReason": "BAZ"} + assert isinstance(job.incremental_result_stats, IncrementalResultStats) + assert job.incremental_result_stats.disabled_reason == "BAZ" + def test_reload_query_results_uses_transport_timeout(self): conn = make_connection({}) client = _make_client(self.PROJECT, connection=conn) diff --git a/packages/google-cloud-bigquery/tests/unit/job/test_query_stats.py b/packages/google-cloud-bigquery/tests/unit/job/test_query_stats.py index 61b278d43f88..c7c7a31e05be 100644 --- a/packages/google-cloud-bigquery/tests/unit/job/test_query_stats.py +++ b/packages/google-cloud-bigquery/tests/unit/job/test_query_stats.py @@ -13,6 +13,7 @@ # limitations under the License. from .helpers import _Base +import datetime class TestBiEngineStats: @@ -520,3 +521,63 @@ def test_from_api_repr_normal(self): self.assertEqual(entry.pending_units, self.PENDING_UNITS) self.assertEqual(entry.completed_units, self.COMPLETED_UNITS) self.assertEqual(entry.slot_millis, self.SLOT_MILLIS) + + +class TestIncrementalResultStats: + @staticmethod + def _get_target_class(): + from google.cloud.bigquery.job import IncrementalResultStats + + return IncrementalResultStats + + def _make_one(self, *args, **kw): + return self._get_target_class()(*args, **kw) + + def test_ctor_defaults(self): + stats = self._make_one() + assert stats.disabled_reason is None + assert stats.result_set_last_replace_time is None + assert stats.result_set_last_modify_time is None + + def test_from_api_repr_partial_stats(self): + klass = self._get_target_class() + stats = klass.from_api_repr({"disabledReason": "FOO"}) + + assert isinstance(stats, klass) + assert stats.disabled_reason == "FOO" + assert stats.result_set_last_replace_time is None + assert stats.result_set_last_modify_time is None + + def test_from_api_repr_full_stats(self): + klass = self._get_target_class() + stats = klass.from_api_repr( + { + "disabledReason": "BAR", + "resultSetLastReplaceTime": "2025-01-02T03:04:05.06Z", + "resultSetLastModifyTime": "2025-02-02T02:02:02.02Z", + } + ) + + assert isinstance(stats, klass) + assert stats.disabled_reason == "BAR" + assert stats.result_set_last_replace_time == datetime.datetime( + 2025, 1, 2, 3, 4, 5, 60000, tzinfo=datetime.timezone.utc + ) + assert stats.result_set_last_modify_time == datetime.datetime( + 2025, 2, 2, 2, 2, 2, 20000, tzinfo=datetime.timezone.utc + ) + + def test_from_api_repr_invalid_stats(self): + klass = self._get_target_class() + stats = klass.from_api_repr( + { + "disabledReason": "BAR", + "resultSetLastReplaceTime": "xxx", + "resultSetLastModifyTime": "yyy", + } + ) + + assert isinstance(stats, klass) + assert stats.disabled_reason == "BAR" + assert stats.result_set_last_replace_time is None + assert stats.result_set_last_modify_time is None From e78fe178c9eccd9619ff159fc5bed96fa65ed404 Mon Sep 17 00:00:00 2001 From: Mend Renovate Date: Mon, 15 Sep 2025 14:21:21 +0200 Subject: [PATCH 1999/2016] chore(deps): update all dependencies (#2275) --- .../samples/desktopapp/requirements-test.txt | 2 +- .../samples/desktopapp/requirements.txt | 2 +- .../samples/geography/requirements-test.txt | 2 +- .../samples/geography/requirements.txt | 10 +++++----- .../samples/magics/requirements-test.txt | 2 +- .../samples/magics/requirements.txt | 4 ++-- .../samples/notebooks/requirements-test.txt | 2 +- .../samples/notebooks/requirements.txt | 4 ++-- .../samples/snippets/requirements-test.txt | 2 +- .../samples/snippets/requirements.txt | 2 +- 10 files changed, 16 insertions(+), 16 deletions(-) diff --git a/packages/google-cloud-bigquery/samples/desktopapp/requirements-test.txt b/packages/google-cloud-bigquery/samples/desktopapp/requirements-test.txt index 3bf52c85d823..31b836790f7c 100644 --- a/packages/google-cloud-bigquery/samples/desktopapp/requirements-test.txt +++ b/packages/google-cloud-bigquery/samples/desktopapp/requirements-test.txt @@ -1,4 +1,4 @@ google-cloud-testutils==1.6.4 -pytest==8.4.1 +pytest==8.4.2 mock==5.2.0 pytest-xdist==3.8.0 diff --git a/packages/google-cloud-bigquery/samples/desktopapp/requirements.txt b/packages/google-cloud-bigquery/samples/desktopapp/requirements.txt index f86e57e5c265..21ccef2fdc8f 100644 --- a/packages/google-cloud-bigquery/samples/desktopapp/requirements.txt +++ b/packages/google-cloud-bigquery/samples/desktopapp/requirements.txt @@ -1,2 +1,2 @@ -google-cloud-bigquery==3.36.0 +google-cloud-bigquery==3.37.0 google-auth-oauthlib==1.2.2 diff --git a/packages/google-cloud-bigquery/samples/geography/requirements-test.txt b/packages/google-cloud-bigquery/samples/geography/requirements-test.txt index d449b373b688..6fb9ba31062d 100644 --- a/packages/google-cloud-bigquery/samples/geography/requirements-test.txt +++ b/packages/google-cloud-bigquery/samples/geography/requirements-test.txt @@ -1,3 +1,3 @@ -pytest==8.4.1 +pytest==8.4.2 mock==5.2.0 pytest-xdist==3.8.0 diff --git a/packages/google-cloud-bigquery/samples/geography/requirements.txt b/packages/google-cloud-bigquery/samples/geography/requirements.txt index c2bd74bedb89..c8a93a35ee52 100644 --- a/packages/google-cloud-bigquery/samples/geography/requirements.txt +++ b/packages/google-cloud-bigquery/samples/geography/requirements.txt @@ -1,6 +1,6 @@ attrs==25.3.0 certifi==2025.8.3 -cffi==1.17.1 +cffi==2.0.0 charset-normalizer==3.4.3 click===8.1.8; python_version == '3.9' click==8.2.1; python_version >= '3.10' @@ -13,8 +13,8 @@ geopandas===1.0.1; python_version <= '3.9' geopandas==1.1.1; python_version >= '3.10' google-api-core==2.25.1 google-auth==2.40.3 -google-cloud-bigquery==3.36.0 -google-cloud-bigquery-storage==2.32.0 +google-cloud-bigquery==3.37.0 +google-cloud-bigquery-storage==2.33.1 google-cloud-core==2.4.3 google-crc32c==1.7.1 google-resumable-media==2.7.2 @@ -29,8 +29,8 @@ proto-plus==1.26.1 pyarrow==21.0.0 pyasn1==0.6.1 pyasn1-modules==0.4.2 -pycparser==2.22 -pyparsing==3.2.3 +pycparser==2.23 +pyparsing==3.2.4 python-dateutil==2.9.0.post0 pytz==2025.2 PyYAML==6.0.2 diff --git a/packages/google-cloud-bigquery/samples/magics/requirements-test.txt b/packages/google-cloud-bigquery/samples/magics/requirements-test.txt index 3bf52c85d823..31b836790f7c 100644 --- a/packages/google-cloud-bigquery/samples/magics/requirements-test.txt +++ b/packages/google-cloud-bigquery/samples/magics/requirements-test.txt @@ -1,4 +1,4 @@ google-cloud-testutils==1.6.4 -pytest==8.4.1 +pytest==8.4.2 mock==5.2.0 pytest-xdist==3.8.0 diff --git a/packages/google-cloud-bigquery/samples/magics/requirements.txt b/packages/google-cloud-bigquery/samples/magics/requirements.txt index 7b4f84e8e06d..d10d53c24480 100644 --- a/packages/google-cloud-bigquery/samples/magics/requirements.txt +++ b/packages/google-cloud-bigquery/samples/magics/requirements.txt @@ -1,6 +1,6 @@ bigquery_magics==0.10.3 db-dtypes==1.4.3 -google.cloud.bigquery==3.36.0 -google-cloud-bigquery-storage==2.32.0 +google.cloud.bigquery==3.37.0 +google-cloud-bigquery-storage==2.33.1 ipython===8.18.1 pandas==2.3.2 diff --git a/packages/google-cloud-bigquery/samples/notebooks/requirements-test.txt b/packages/google-cloud-bigquery/samples/notebooks/requirements-test.txt index 3bf52c85d823..31b836790f7c 100644 --- a/packages/google-cloud-bigquery/samples/notebooks/requirements-test.txt +++ b/packages/google-cloud-bigquery/samples/notebooks/requirements-test.txt @@ -1,4 +1,4 @@ google-cloud-testutils==1.6.4 -pytest==8.4.1 +pytest==8.4.2 mock==5.2.0 pytest-xdist==3.8.0 diff --git a/packages/google-cloud-bigquery/samples/notebooks/requirements.txt b/packages/google-cloud-bigquery/samples/notebooks/requirements.txt index dc22903c7f36..f65008baa667 100644 --- a/packages/google-cloud-bigquery/samples/notebooks/requirements.txt +++ b/packages/google-cloud-bigquery/samples/notebooks/requirements.txt @@ -1,7 +1,7 @@ bigquery-magics==0.10.3 db-dtypes==1.4.3 -google-cloud-bigquery==3.36.0 -google-cloud-bigquery-storage==2.32.0 +google-cloud-bigquery==3.37.0 +google-cloud-bigquery-storage==2.33.1 ipython===8.18.1; python_version == '3.9' ipython==9.5.0; python_version >= '3.10' matplotlib===3.9.2; python_version == '3.9' diff --git a/packages/google-cloud-bigquery/samples/snippets/requirements-test.txt b/packages/google-cloud-bigquery/samples/snippets/requirements-test.txt index cef3450e15bd..901f1df1a9d0 100644 --- a/packages/google-cloud-bigquery/samples/snippets/requirements-test.txt +++ b/packages/google-cloud-bigquery/samples/snippets/requirements-test.txt @@ -1,5 +1,5 @@ # samples/snippets should be runnable with no "extras" google-cloud-testutils==1.6.4 -pytest==8.4.1 +pytest==8.4.2 mock==5.2.0 pytest-xdist==3.8.0 diff --git a/packages/google-cloud-bigquery/samples/snippets/requirements.txt b/packages/google-cloud-bigquery/samples/snippets/requirements.txt index 23da68d60478..1fed246f3d3a 100644 --- a/packages/google-cloud-bigquery/samples/snippets/requirements.txt +++ b/packages/google-cloud-bigquery/samples/snippets/requirements.txt @@ -1,2 +1,2 @@ # samples/snippets should be runnable with no "extras" -google-cloud-bigquery==3.36.0 +google-cloud-bigquery==3.37.0 From 7207f9f5189bd9093e314177432367b9e8a66b50 Mon Sep 17 00:00:00 2001 From: "release-please[bot]" <55107282+release-please[bot]@users.noreply.github.com> Date: Wed, 17 Sep 2025 13:23:31 -0700 Subject: [PATCH 2000/2016] chore(main): release 3.38.0 (#2289) Co-authored-by: release-please[bot] <55107282+release-please[bot]@users.noreply.github.com> --- packages/google-cloud-bigquery/CHANGELOG.md | 7 +++++++ .../google-cloud-bigquery/google/cloud/bigquery/version.py | 2 +- 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/CHANGELOG.md b/packages/google-cloud-bigquery/CHANGELOG.md index fe721dfde2fc..95db5735c18e 100644 --- a/packages/google-cloud-bigquery/CHANGELOG.md +++ b/packages/google-cloud-bigquery/CHANGELOG.md @@ -5,6 +5,13 @@ [1]: https://pypi.org/project/google-cloud-bigquery/#history +## [3.38.0](https://github.com/googleapis/python-bigquery/compare/v3.37.0...v3.38.0) (2025-09-15) + + +### Features + +* Add additional query stats ([#2270](https://github.com/googleapis/python-bigquery/issues/2270)) ([7b1b718](https://github.com/googleapis/python-bigquery/commit/7b1b718123afd80c0f68212946e4179bcd6db67f)) + ## [3.37.0](https://github.com/googleapis/python-bigquery/compare/v3.36.0...v3.37.0) (2025-09-08) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/version.py b/packages/google-cloud-bigquery/google/cloud/bigquery/version.py index aa24ae04e80d..22550a8f1da4 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/version.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/version.py @@ -12,4 +12,4 @@ # See the License for the specific language governing permissions and # limitations under the License. -__version__ = "3.37.0" +__version__ = "3.38.0" From aabedf955fcb7f579f9457719e3308948aeec453 Mon Sep 17 00:00:00 2001 From: shollyman Date: Fri, 19 Sep 2025 10:19:42 -0700 Subject: [PATCH 2001/2016] docs: remove experimental annotations from GA features (#2303) * docs: remove experimental annotations from GA features Corrects some documentation drift. --- .../google/cloud/bigquery/external_config.py | 20 +++---------------- 1 file changed, 3 insertions(+), 17 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/external_config.py b/packages/google-cloud-bigquery/google/cloud/bigquery/external_config.py index dc7a33e6a2b8..7e76f93b57fe 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/external_config.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/external_config.py @@ -637,11 +637,7 @@ def from_api_repr(cls, resource: dict) -> "GoogleSheetsOptions": class HivePartitioningOptions(object): - """[Beta] Options that configure hive partitioning. - - .. note:: - **Experimental**. This feature is experimental and might change or - have limited support. + """Options that configure hive partitioning. See https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#HivePartitioningOptions @@ -808,13 +804,9 @@ def decimal_target_types(self, value: Optional[Iterable[str]]): @property def hive_partitioning(self): - """Optional[:class:`~.external_config.HivePartitioningOptions`]: [Beta] When set, \ + """Optional[:class:`~.external_config.HivePartitioningOptions`]: When set, \ it configures hive partitioning support. - .. note:: - **Experimental**. This feature is experimental and might change or - have limited support. - See https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#ExternalDataConfiguration.FIELDS.hive_partitioning_options """ @@ -979,14 +971,8 @@ def timestamp_format(self, value: Optional[str]): @property def connection_id(self): - """Optional[str]: [Experimental] ID of a BigQuery Connection API + """Optional[str]: ID of a BigQuery Connection API resource. - - .. WARNING:: - - This feature is experimental. Pre-GA features may have limited - support, and changes to pre-GA features may not be compatible with - other pre-GA versions. """ return self._properties.get("connectionId") From 58dc210a3535fe7333d18d86294e1422380341e8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tim=20Swe=C3=B1a=20=28Swast=29?= Date: Wed, 24 Sep 2025 14:29:28 -0500 Subject: [PATCH 2002/2016] fix: remove ambiguous error codes from query retries (#2308) Context: internal issue b/445984807 comment 10. --- packages/google-cloud-bigquery/google/cloud/bigquery/retry.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/retry.py b/packages/google-cloud-bigquery/google/cloud/bigquery/retry.py index 8f469f2d33ae..19012efd65ca 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/retry.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/retry.py @@ -121,9 +121,6 @@ def _should_retry_get_job_conflict(exc): """ job_retry_reasons = ( - "rateLimitExceeded", - "backendError", - "internalError", "jobBackendError", "jobInternalError", "jobRateLimitExceeded", From 1172f3e722b2d2f30c112d7e104f7a43f24c9307 Mon Sep 17 00:00:00 2001 From: Mend Renovate Date: Tue, 30 Sep 2025 17:05:06 +0100 Subject: [PATCH 2003/2016] chore(deps): update all dependencies (#2300) --- .../samples/desktopapp/requirements.txt | 2 +- .../samples/geography/requirements.txt | 16 ++++++++-------- .../samples/magics/requirements.txt | 4 ++-- .../samples/notebooks/requirements.txt | 6 +++--- .../samples/snippets/requirements.txt | 2 +- 5 files changed, 15 insertions(+), 15 deletions(-) diff --git a/packages/google-cloud-bigquery/samples/desktopapp/requirements.txt b/packages/google-cloud-bigquery/samples/desktopapp/requirements.txt index 21ccef2fdc8f..56696f8689ca 100644 --- a/packages/google-cloud-bigquery/samples/desktopapp/requirements.txt +++ b/packages/google-cloud-bigquery/samples/desktopapp/requirements.txt @@ -1,2 +1,2 @@ -google-cloud-bigquery==3.37.0 +google-cloud-bigquery==3.38.0 google-auth-oauthlib==1.2.2 diff --git a/packages/google-cloud-bigquery/samples/geography/requirements.txt b/packages/google-cloud-bigquery/samples/geography/requirements.txt index c8a93a35ee52..9fdca241ae42 100644 --- a/packages/google-cloud-bigquery/samples/geography/requirements.txt +++ b/packages/google-cloud-bigquery/samples/geography/requirements.txt @@ -3,7 +3,7 @@ certifi==2025.8.3 cffi==2.0.0 charset-normalizer==3.4.3 click===8.1.8; python_version == '3.9' -click==8.2.1; python_version >= '3.10' +click==8.3.0; python_version >= '3.10' click-plugins==1.1.1.2 cligj==0.7.2 db-dtypes==1.4.3 @@ -12,32 +12,32 @@ geojson==3.2.0 geopandas===1.0.1; python_version <= '3.9' geopandas==1.1.1; python_version >= '3.10' google-api-core==2.25.1 -google-auth==2.40.3 -google-cloud-bigquery==3.37.0 +google-auth==2.41.0 +google-cloud-bigquery==3.38.0 google-cloud-bigquery-storage==2.33.1 google-cloud-core==2.4.3 google-crc32c==1.7.1 google-resumable-media==2.7.2 googleapis-common-protos==1.70.0 -grpcio==1.74.0 +grpcio==1.75.1 idna==3.10 munch==4.0.0 mypy-extensions==1.1.0 packaging==25.0 -pandas==2.3.2 +pandas==2.3.3 proto-plus==1.26.1 pyarrow==21.0.0 pyasn1==0.6.1 pyasn1-modules==0.4.2 pycparser==2.23 -pyparsing==3.2.4 +pyparsing==3.2.5 python-dateutil==2.9.0.post0 pytz==2025.2 -PyYAML==6.0.2 +PyYAML==6.0.3 requests==2.32.5 rsa==4.9.1 Shapely===2.0.7; python_version == '3.9' -Shapely==2.1.1; python_version >= '3.10' +Shapely==2.1.2; python_version >= '3.10' six==1.17.0 typing-extensions==4.15.0 typing-inspect==0.9.0 diff --git a/packages/google-cloud-bigquery/samples/magics/requirements.txt b/packages/google-cloud-bigquery/samples/magics/requirements.txt index d10d53c24480..331e910e2adf 100644 --- a/packages/google-cloud-bigquery/samples/magics/requirements.txt +++ b/packages/google-cloud-bigquery/samples/magics/requirements.txt @@ -1,6 +1,6 @@ bigquery_magics==0.10.3 db-dtypes==1.4.3 -google.cloud.bigquery==3.37.0 +google.cloud.bigquery==3.38.0 google-cloud-bigquery-storage==2.33.1 ipython===8.18.1 -pandas==2.3.2 +pandas==2.3.3 diff --git a/packages/google-cloud-bigquery/samples/notebooks/requirements.txt b/packages/google-cloud-bigquery/samples/notebooks/requirements.txt index f65008baa667..ef509734a908 100644 --- a/packages/google-cloud-bigquery/samples/notebooks/requirements.txt +++ b/packages/google-cloud-bigquery/samples/notebooks/requirements.txt @@ -1,9 +1,9 @@ bigquery-magics==0.10.3 db-dtypes==1.4.3 -google-cloud-bigquery==3.37.0 +google-cloud-bigquery==3.38.0 google-cloud-bigquery-storage==2.33.1 ipython===8.18.1; python_version == '3.9' -ipython==9.5.0; python_version >= '3.10' +ipython==9.6.0; python_version >= '3.10' matplotlib===3.9.2; python_version == '3.9' matplotlib==3.10.6; python_version >= '3.10' -pandas==2.3.2 +pandas==2.3.3 diff --git a/packages/google-cloud-bigquery/samples/snippets/requirements.txt b/packages/google-cloud-bigquery/samples/snippets/requirements.txt index 1fed246f3d3a..441385536ce0 100644 --- a/packages/google-cloud-bigquery/samples/snippets/requirements.txt +++ b/packages/google-cloud-bigquery/samples/snippets/requirements.txt @@ -1,2 +1,2 @@ # samples/snippets should be runnable with no "extras" -google-cloud-bigquery==3.37.0 +google-cloud-bigquery==3.38.0 From 3dc80bcbd4bb051f380fb5e94dd7f4c5507c100c Mon Sep 17 00:00:00 2001 From: Chalmer Lowe Date: Tue, 30 Sep 2025 13:54:00 -0400 Subject: [PATCH 2004/2016] fix: honor custom `retry` in `job.result()` (#2302) * fix(job): honor custom retry in job.result() The `_AsyncJob.result()` method was not correctly passing the `retry` argument to the superclass's `result()` method when the `retry` object was the same as the default retry object. This caused the default retry settings to be ignored in some cases. This change modifies the `result()` method to always pass the `retry` argument to the superclass, ensuring that the provided retry settings are always honored. A new test case is added to verify that `job.result()` correctly handles both the default retry and a custom retry object. * Update tests/unit/test_job_retry.py * Update tests/unit/test_job_retry.py * blacken and lint * udpates retry handling and testing of retry handling * Update tests/unit/test_job_retry.py * Update tests/unit/test_job_retry.py * Update tests/unit/test_job_retry.py --------- Co-authored-by: google-labs-jules[bot] <161369871+google-labs-jules[bot]@users.noreply.github.com> --- .../google/cloud/bigquery/job/base.py | 3 +- .../tests/unit/test_job_retry.py | 77 +++++++++++++++++++ 2 files changed, 78 insertions(+), 2 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/job/base.py b/packages/google-cloud-bigquery/google/cloud/bigquery/job/base.py index 9b7ddb82da21..7576fc9aa64e 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/job/base.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/job/base.py @@ -1044,8 +1044,7 @@ def result( # type: ignore # (incompatible with supertype) if self.state is None: self._begin(retry=retry, timeout=timeout) - kwargs = {} if retry is DEFAULT_RETRY else {"retry": retry} - return super(_AsyncJob, self).result(timeout=timeout, **kwargs) + return super(_AsyncJob, self).result(timeout=timeout, retry=retry) def cancelled(self): """Check if the job has been cancelled. diff --git a/packages/google-cloud-bigquery/tests/unit/test_job_retry.py b/packages/google-cloud-bigquery/tests/unit/test_job_retry.py index 7343fed3daec..fa55e8f6a864 100644 --- a/packages/google-cloud-bigquery/tests/unit/test_job_retry.py +++ b/packages/google-cloud-bigquery/tests/unit/test_job_retry.py @@ -615,3 +615,80 @@ def test_query_and_wait_retries_job_for_DDL_queries(global_time_lock): _, kwargs = calls[3] assert kwargs["method"] == "POST" assert kwargs["path"] == query_request_path + + +@pytest.mark.parametrize( + "result_retry_param", + [ + pytest.param( + {}, + id="default retry {}", + ), + pytest.param( + { + "retry": google.cloud.bigquery.retry.DEFAULT_RETRY.with_timeout( + timeout=10.0 + ) + }, + id="custom retry object with timeout 10.0", + ), + ], +) +def test_retry_load_job_result(result_retry_param, PROJECT, DS_ID): + from google.cloud.bigquery.dataset import DatasetReference + from google.cloud.bigquery.job.load import LoadJob + import google.cloud.bigquery.retry + + client = make_client() + conn = client._connection = make_connection( + dict( + status=dict(state="RUNNING"), + jobReference={"jobId": "id_1"}, + ), + google.api_core.exceptions.ServiceUnavailable("retry me"), + dict( + status=dict(state="DONE"), + jobReference={"jobId": "id_1"}, + statistics={"load": {"outputRows": 1}}, + ), + ) + + table_ref = DatasetReference(project=PROJECT, dataset_id=DS_ID).table("new_table") + job = LoadJob("id_1", source_uris=None, destination=table_ref, client=client) + + with mock.patch.object( + client, "_call_api", wraps=client._call_api + ) as wrapped_call_api: + result = job.result(**result_retry_param) + + assert job.state == "DONE" + assert result.output_rows == 1 + + # Check that _call_api was called multiple times due to retry + assert wrapped_call_api.call_count > 1 + + # Verify the retry object used in the calls to _call_api + expected_retry = result_retry_param.get( + "retry", google.cloud.bigquery.retry.DEFAULT_RETRY + ) + + for call in wrapped_call_api.mock_calls: + name, args, kwargs = call + # The retry object is the first positional argument to _call_api + called_retry = args[0] + + # We only care about the calls made during the job.result() polling + if kwargs.get("method") == "GET" and "jobs/id_1" in kwargs.get("path", ""): + assert called_retry._predicate == expected_retry._predicate + assert called_retry._initial == expected_retry._initial + assert called_retry._maximum == expected_retry._maximum + assert called_retry._multiplier == expected_retry._multiplier + assert called_retry._deadline == expected_retry._deadline + if "retry" in result_retry_param: + # Specifically check the timeout for the custom retry case + assert called_retry._timeout == 10.0 + else: + assert called_retry._timeout == expected_retry._timeout + + # The number of api_request calls should still be 3 + assert conn.api_request.call_count == 3 From 0542216f30c080327c0b48c6763613e5ee5c6332 Mon Sep 17 00:00:00 2001 From: Mend Renovate Date: Wed, 1 Oct 2025 21:59:07 +0100 Subject: [PATCH 2005/2016] chore(deps): update dependency google-auth to v2.41.1 (#2312) --- .../google-cloud-bigquery/samples/geography/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/samples/geography/requirements.txt b/packages/google-cloud-bigquery/samples/geography/requirements.txt index 9fdca241ae42..0a5d18d6d187 100644 --- a/packages/google-cloud-bigquery/samples/geography/requirements.txt +++ b/packages/google-cloud-bigquery/samples/geography/requirements.txt @@ -12,7 +12,7 @@ geojson==3.2.0 geopandas===1.0.1; python_version <= '3.9' geopandas==1.1.1; python_version >= '3.10' google-api-core==2.25.1 -google-auth==2.41.0 +google-auth==2.41.1 google-cloud-bigquery==3.38.0 google-cloud-bigquery-storage==2.33.1 google-cloud-core==2.4.3 From 157e5d4a1a75aa4a7bed48838a612d4cd6446ebc Mon Sep 17 00:00:00 2001 From: Mend Renovate Date: Tue, 7 Oct 2025 00:16:21 +0100 Subject: [PATCH 2006/2016] chore(deps): update all dependencies (#2314) --- .../samples/geography/requirements.txt | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/packages/google-cloud-bigquery/samples/geography/requirements.txt b/packages/google-cloud-bigquery/samples/geography/requirements.txt index 0a5d18d6d187..8955e0cfd669 100644 --- a/packages/google-cloud-bigquery/samples/geography/requirements.txt +++ b/packages/google-cloud-bigquery/samples/geography/requirements.txt @@ -1,5 +1,5 @@ -attrs==25.3.0 -certifi==2025.8.3 +attrs==25.4.0 +certifi==2025.10.5 cffi==2.0.0 charset-normalizer==3.4.3 click===8.1.8; python_version == '3.9' @@ -11,7 +11,7 @@ Fiona==1.10.1 geojson==3.2.0 geopandas===1.0.1; python_version <= '3.9' geopandas==1.1.1; python_version >= '3.10' -google-api-core==2.25.1 +google-api-core==2.25.2 google-auth==2.41.1 google-cloud-bigquery==3.38.0 google-cloud-bigquery-storage==2.33.1 From 6a7221c5f9332046ebfa59dbcf2873c5019548d2 Mon Sep 17 00:00:00 2001 From: Wabio Date: Mon, 13 Oct 2025 18:57:19 +0200 Subject: [PATCH 2007/2016] build: update `pyproject.toml` to follow PEP 639 (#2309) * update pyproject.toml to follow PEP 639 * Update pyproject.toml PEP 639 Thanks for the feedback, I've removed the version number completely as requested. * Update pyproject.toml --------- Co-authored-by: Chalmer Lowe --- packages/google-cloud-bigquery/pyproject.toml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/packages/google-cloud-bigquery/pyproject.toml b/packages/google-cloud-bigquery/pyproject.toml index 9c91a2fc8513..1c6ec1f777a9 100644 --- a/packages/google-cloud-bigquery/pyproject.toml +++ b/packages/google-cloud-bigquery/pyproject.toml @@ -19,7 +19,8 @@ build-backend = "setuptools.build_meta" [project] name = "google-cloud-bigquery" authors = [{ name = "Google LLC", email = "googleapis-packages@google.com" }] -license = { text = "Apache 2.0" } +license = "Apache-2.0" +license-files = ["LICENSE"] requires-python = ">=3.9" description = "Google BigQuery API client library" readme = "README.rst" @@ -30,7 +31,6 @@ classifiers = [ # "Development Status :: 5 - Production/Stable" "Development Status :: 5 - Production/Stable", "Intended Audience :: Developers", - "License :: OSI Approved :: Apache Software License", "Programming Language :: Python", "Programming Language :: Python :: 3", "Programming Language :: Python :: 3.9", From eebaf645d465de78db21dd53cc8306673a3ec7dd Mon Sep 17 00:00:00 2001 From: "gcf-owl-bot[bot]" <78513119+gcf-owl-bot[bot]@users.noreply.github.com> Date: Tue, 14 Oct 2025 07:23:10 -0400 Subject: [PATCH 2008/2016] chore(python): Add Python 3.14 to python post processor image (#2317) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * chore(python): Add Python 3.14 to python post processor image Source-Link: https://github.com/googleapis/synthtool/commit/16790a32126759493ba20781e04edd165825ff82 Post-Processor: gcr.io/cloud-devrel-public-resources/owlbot-python:latest@sha256:543e209e7c1c1ffe720eb4db1a3f045a75099304fb19aa11a47dc717b8aae2a9 * Update samples/snippets/noxfile.py * Update samples/notebooks/noxfile.py * Update samples/magics/noxfile.py * Update samples/geography/noxfile.py * Update samples/desktopapp/noxfile.py * 🦉 Updates from OwlBot post-processor See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md * 🦉 Updates from OwlBot post-processor See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md * 🦉 Updates from OwlBot post-processor See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md * 🦉 Updates from OwlBot post-processor See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md * 🦉 Updates from OwlBot post-processor See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md --------- Co-authored-by: Owl Bot Co-authored-by: Chalmer Lowe --- .../.github/.OwlBot.lock.yaml | 4 +- .../.kokoro/samples/python3.14/common.cfg | 40 +++++++++++++++++++ .../.kokoro/samples/python3.14/continuous.cfg | 6 +++ .../samples/python3.14/periodic-head.cfg | 11 +++++ .../.kokoro/samples/python3.14/periodic.cfg | 6 +++ .../.kokoro/samples/python3.14/presubmit.cfg | 6 +++ .../samples/desktopapp/noxfile.py | 2 +- .../samples/geography/noxfile.py | 2 +- .../samples/magics/noxfile.py | 2 +- .../samples/notebooks/noxfile.py | 2 +- .../samples/snippets/noxfile.py | 2 +- 11 files changed, 76 insertions(+), 7 deletions(-) create mode 100644 packages/google-cloud-bigquery/.kokoro/samples/python3.14/common.cfg create mode 100644 packages/google-cloud-bigquery/.kokoro/samples/python3.14/continuous.cfg create mode 100644 packages/google-cloud-bigquery/.kokoro/samples/python3.14/periodic-head.cfg create mode 100644 packages/google-cloud-bigquery/.kokoro/samples/python3.14/periodic.cfg create mode 100644 packages/google-cloud-bigquery/.kokoro/samples/python3.14/presubmit.cfg diff --git a/packages/google-cloud-bigquery/.github/.OwlBot.lock.yaml b/packages/google-cloud-bigquery/.github/.OwlBot.lock.yaml index cea9eb68f368..4a311db0294c 100644 --- a/packages/google-cloud-bigquery/.github/.OwlBot.lock.yaml +++ b/packages/google-cloud-bigquery/.github/.OwlBot.lock.yaml @@ -13,5 +13,5 @@ # limitations under the License. docker: image: gcr.io/cloud-devrel-public-resources/owlbot-python:latest - digest: sha256:3b3a31be60853477bc39ed8d9bac162cac3ba083724cecaad54eb81d4e4dae9c -# created: 2025-04-16T22:40:03.123475241Z + digest: sha256:543e209e7c1c1ffe720eb4db1a3f045a75099304fb19aa11a47dc717b8aae2a9 +# created: 2025-10-09T14:48:42.914384887Z diff --git a/packages/google-cloud-bigquery/.kokoro/samples/python3.14/common.cfg b/packages/google-cloud-bigquery/.kokoro/samples/python3.14/common.cfg new file mode 100644 index 000000000000..d2fcee553b25 --- /dev/null +++ b/packages/google-cloud-bigquery/.kokoro/samples/python3.14/common.cfg @@ -0,0 +1,40 @@ +# Format: //devtools/kokoro/config/proto/build.proto + +# Build logs will be here +action { + define_artifacts { + regex: "**/*sponge_log.xml" + } +} + +# Specify which tests to run +env_vars: { + key: "RUN_TESTS_SESSION" + value: "py-3.14" +} + +# Declare build specific Cloud project. +env_vars: { + key: "BUILD_SPECIFIC_GCLOUD_PROJECT" + value: "python-docs-samples-tests-314" +} + +env_vars: { + key: "TRAMPOLINE_BUILD_FILE" + value: "github/python-bigquery/.kokoro/test-samples.sh" +} + +# Configure the docker image for kokoro-trampoline. +env_vars: { + key: "TRAMPOLINE_IMAGE" + value: "gcr.io/cloud-devrel-kokoro-resources/python-samples-testing-docker" +} + +# Download secrets for samples +gfile_resources: "/bigstore/cloud-devrel-kokoro-resources/python-docs-samples" + +# Download trampoline resources. +gfile_resources: "/bigstore/cloud-devrel-kokoro-resources/trampoline" + +# Use the trampoline script to run in docker. +build_file: "python-bigquery/.kokoro/trampoline_v2.sh" diff --git a/packages/google-cloud-bigquery/.kokoro/samples/python3.14/continuous.cfg b/packages/google-cloud-bigquery/.kokoro/samples/python3.14/continuous.cfg new file mode 100644 index 000000000000..a1c8d9759c88 --- /dev/null +++ b/packages/google-cloud-bigquery/.kokoro/samples/python3.14/continuous.cfg @@ -0,0 +1,6 @@ +# Format: //devtools/kokoro/config/proto/build.proto + +env_vars: { + key: "INSTALL_LIBRARY_FROM_SOURCE" + value: "True" +} \ No newline at end of file diff --git a/packages/google-cloud-bigquery/.kokoro/samples/python3.14/periodic-head.cfg b/packages/google-cloud-bigquery/.kokoro/samples/python3.14/periodic-head.cfg new file mode 100644 index 000000000000..5aa01bab5bf3 --- /dev/null +++ b/packages/google-cloud-bigquery/.kokoro/samples/python3.14/periodic-head.cfg @@ -0,0 +1,11 @@ +# Format: //devtools/kokoro/config/proto/build.proto + +env_vars: { + key: "INSTALL_LIBRARY_FROM_SOURCE" + value: "True" +} + +env_vars: { + key: "TRAMPOLINE_BUILD_FILE" + value: "github/python-bigquery/.kokoro/test-samples-against-head.sh" +} diff --git a/packages/google-cloud-bigquery/.kokoro/samples/python3.14/periodic.cfg b/packages/google-cloud-bigquery/.kokoro/samples/python3.14/periodic.cfg new file mode 100644 index 000000000000..71cd1e597e38 --- /dev/null +++ b/packages/google-cloud-bigquery/.kokoro/samples/python3.14/periodic.cfg @@ -0,0 +1,6 @@ +# Format: //devtools/kokoro/config/proto/build.proto + +env_vars: { + key: "INSTALL_LIBRARY_FROM_SOURCE" + value: "False" +} diff --git a/packages/google-cloud-bigquery/.kokoro/samples/python3.14/presubmit.cfg b/packages/google-cloud-bigquery/.kokoro/samples/python3.14/presubmit.cfg new file mode 100644 index 000000000000..a1c8d9759c88 --- /dev/null +++ b/packages/google-cloud-bigquery/.kokoro/samples/python3.14/presubmit.cfg @@ -0,0 +1,6 @@ +# Format: //devtools/kokoro/config/proto/build.proto + +env_vars: { + key: "INSTALL_LIBRARY_FROM_SOURCE" + value: "True" +} \ No newline at end of file diff --git a/packages/google-cloud-bigquery/samples/desktopapp/noxfile.py b/packages/google-cloud-bigquery/samples/desktopapp/noxfile.py index a8659038261c..db2333e5a511 100644 --- a/packages/google-cloud-bigquery/samples/desktopapp/noxfile.py +++ b/packages/google-cloud-bigquery/samples/desktopapp/noxfile.py @@ -89,7 +89,7 @@ def get_pytest_env_vars() -> Dict[str, str]: # DO NOT EDIT - automatically generated. # All versions used to test samples. -ALL_VERSIONS = ["3.9", "3.10", "3.11", "3.12", "3.13"] +ALL_VERSIONS = ["3.7", "3.8", "3.9", "3.10", "3.11", "3.12", "3.13", "3.14"] # Any default versions that should be ignored. IGNORED_VERSIONS = TEST_CONFIG["ignored_versions"] diff --git a/packages/google-cloud-bigquery/samples/geography/noxfile.py b/packages/google-cloud-bigquery/samples/geography/noxfile.py index a8659038261c..db2333e5a511 100644 --- a/packages/google-cloud-bigquery/samples/geography/noxfile.py +++ b/packages/google-cloud-bigquery/samples/geography/noxfile.py @@ -89,7 +89,7 @@ def get_pytest_env_vars() -> Dict[str, str]: # DO NOT EDIT - automatically generated. # All versions used to test samples. -ALL_VERSIONS = ["3.9", "3.10", "3.11", "3.12", "3.13"] +ALL_VERSIONS = ["3.7", "3.8", "3.9", "3.10", "3.11", "3.12", "3.13", "3.14"] # Any default versions that should be ignored. IGNORED_VERSIONS = TEST_CONFIG["ignored_versions"] diff --git a/packages/google-cloud-bigquery/samples/magics/noxfile.py b/packages/google-cloud-bigquery/samples/magics/noxfile.py index a8659038261c..db2333e5a511 100644 --- a/packages/google-cloud-bigquery/samples/magics/noxfile.py +++ b/packages/google-cloud-bigquery/samples/magics/noxfile.py @@ -89,7 +89,7 @@ def get_pytest_env_vars() -> Dict[str, str]: # DO NOT EDIT - automatically generated. # All versions used to test samples. -ALL_VERSIONS = ["3.9", "3.10", "3.11", "3.12", "3.13"] +ALL_VERSIONS = ["3.7", "3.8", "3.9", "3.10", "3.11", "3.12", "3.13", "3.14"] # Any default versions that should be ignored. IGNORED_VERSIONS = TEST_CONFIG["ignored_versions"] diff --git a/packages/google-cloud-bigquery/samples/notebooks/noxfile.py b/packages/google-cloud-bigquery/samples/notebooks/noxfile.py index a8659038261c..db2333e5a511 100644 --- a/packages/google-cloud-bigquery/samples/notebooks/noxfile.py +++ b/packages/google-cloud-bigquery/samples/notebooks/noxfile.py @@ -89,7 +89,7 @@ def get_pytest_env_vars() -> Dict[str, str]: # DO NOT EDIT - automatically generated. # All versions used to test samples. -ALL_VERSIONS = ["3.9", "3.10", "3.11", "3.12", "3.13"] +ALL_VERSIONS = ["3.7", "3.8", "3.9", "3.10", "3.11", "3.12", "3.13", "3.14"] # Any default versions that should be ignored. IGNORED_VERSIONS = TEST_CONFIG["ignored_versions"] diff --git a/packages/google-cloud-bigquery/samples/snippets/noxfile.py b/packages/google-cloud-bigquery/samples/snippets/noxfile.py index a8659038261c..db2333e5a511 100644 --- a/packages/google-cloud-bigquery/samples/snippets/noxfile.py +++ b/packages/google-cloud-bigquery/samples/snippets/noxfile.py @@ -89,7 +89,7 @@ def get_pytest_env_vars() -> Dict[str, str]: # DO NOT EDIT - automatically generated. # All versions used to test samples. -ALL_VERSIONS = ["3.9", "3.10", "3.11", "3.12", "3.13"] +ALL_VERSIONS = ["3.7", "3.8", "3.9", "3.10", "3.11", "3.12", "3.13", "3.14"] # Any default versions that should be ignored. IGNORED_VERSIONS = TEST_CONFIG["ignored_versions"] From f29b6eed74943af68f21bb4aa8f8c1d4851c9696 Mon Sep 17 00:00:00 2001 From: "google-labs-jules[bot]" <161369871+google-labs-jules[bot]@users.noreply.github.com> Date: Wed, 15 Oct 2025 03:31:50 -0700 Subject: [PATCH 2009/2016] feat: Add ExternalRuntimeOptions to BigQuery routine (#2311) * feat: Add ExternalRuntimeOptions to BigQuery routine This change introduces the `ExternalRuntimeOptions` class to the `google.cloud.bigquery.routine` module, allowing users to configure runtime options for external routines. Key changes: - Created the `ExternalRuntimeOptions` class with setters and getters for `container_memory`, `container_cpu`, `runtime_connection`, `max_batching_rows`, and `runtime_version`. - Updated the `Routine` class to include an `external_runtime_options` property that accepts an `ExternalRuntimeOptions` object. - Added comprehensive unit tests for the new class and its integration with the `Routine` class, including tests for both valid and invalid input values. * Update google/cloud/bigquery/routine/routine.py * feat: Add ExternalRuntimeOptions to BigQuery routine This change introduces the `ExternalRuntimeOptions` class to the `google.cloud.bigquery.routine` module, allowing users to configure runtime options for external routines. Key changes: - Created the `ExternalRuntimeOptions` class with setters and getters for `container_memory`, `container_cpu`, `runtime_connection`, `max_batching_rows`, and `runtime_version`. - Updated the `Routine` class to include an `external_runtime_options` property that accepts an `ExternalRuntimeOptions` object. - Added comprehensive unit tests for the new class and its integration with the `Routine` class, including tests for both valid and invalid input values. * feat: Add ExternalRuntimeOptions to BigQuery routine This change introduces the `ExternalRuntimeOptions` class to the `google.cloud.bigquery.routine` module, allowing users to configure runtime options for external routines. Key changes: - Created the `ExternalRuntimeOptions` class with setters and getters for `container_memory`, `container_cpu`, `runtime_connection`, `max_batching_rows`, and `runtime_version`. - Updated the `Routine` class to include an `external_runtime_options` property that accepts an `ExternalRuntimeOptions` object. - Added comprehensive unit tests for the new class and its integration with the `Routine` class, including tests for both valid and invalid input values. - Added additional tests to improve code coverage based on feedback. * feat: Add ExternalRuntimeOptions to BigQuery routine This change introduces the `ExternalRuntimeOptions` class to the `google.cloud.bigquery.routine` module, allowing users to configure runtime options for external routines. Key changes: - Created the `ExternalRuntimeOptions` class with setters and getters for `container_memory`, `container_cpu`, `runtime_connection`, `max_batching_rows`, and `runtime_version`. - Updated the `Routine` class to include an `external_runtime_options` property that accepts an `ExternalRuntimeOptions` object. - Added comprehensive unit tests for the new class and its integration with the `Routine` class, including tests for both valid and invalid input values. - Added additional tests to improve code coverage based on feedback. - Addressed PyType errors by using helper functions for type conversion. * Update tests/unit/routine/test_external_runtime_options.py * feat: Add ExternalRuntimeOptions to BigQuery routine This change introduces the `ExternalRuntimeOptions` class to the `google.cloud.bigquery.routine` module, allowing users to configure runtime options for external routines. Key changes: - Created the `ExternalRuntimeOptions` class with setters and getters for `container_memory`, `container_cpu`, `runtime_connection`, `max_batching_rows`, and `runtime_version`. - Updated the `Routine` class to include an `external_runtime_options` property that accepts an `ExternalRuntimeOptions` object. - Added comprehensive unit tests for the new class and its integration with the `Routine` class, including tests for both valid and invalid input values. - Added additional tests to improve code coverage based on feedback. - Addressed PyType errors by using helper functions for type conversion. - Addressed formatting nits from code review. --------- Co-authored-by: google-labs-jules[bot] <161369871+google-labs-jules[bot]@users.noreply.github.com> Co-authored-by: Chalmer Lowe Co-authored-by: Lingqing Gan --- .../google/cloud/bigquery/__init__.py | 2 + .../google/cloud/bigquery/routine/__init__.py | 2 + .../google/cloud/bigquery/routine/routine.py | 185 ++++++++++++++++- .../routine/test_external_runtime_options.py | 191 ++++++++++++++++++ .../tests/unit/routine/test_routine.py | 42 ++++ 5 files changed, 421 insertions(+), 1 deletion(-) create mode 100644 packages/google-cloud-bigquery/tests/unit/routine/test_external_runtime_options.py diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/__init__.py b/packages/google-cloud-bigquery/google/cloud/bigquery/__init__.py index d39c71641c85..904bea3d4f90 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/__init__.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/__init__.py @@ -98,6 +98,7 @@ from google.cloud.bigquery.routine import RoutineReference from google.cloud.bigquery.routine import RoutineType from google.cloud.bigquery.routine import RemoteFunctionOptions +from google.cloud.bigquery.routine import ExternalRuntimeOptions from google.cloud.bigquery.schema import PolicyTagList from google.cloud.bigquery.schema import SchemaField from google.cloud.bigquery.schema import FieldElementType @@ -181,6 +182,7 @@ "RoutineArgument", "RoutineReference", "RemoteFunctionOptions", + "ExternalRuntimeOptions", # Shared helpers "SchemaField", "FieldElementType", diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/routine/__init__.py b/packages/google-cloud-bigquery/google/cloud/bigquery/routine/__init__.py index e576b0d49c0f..0251039579ca 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/routine/__init__.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/routine/__init__.py @@ -21,6 +21,7 @@ from google.cloud.bigquery.routine.routine import RoutineReference from google.cloud.bigquery.routine.routine import RoutineType from google.cloud.bigquery.routine.routine import RemoteFunctionOptions +from google.cloud.bigquery.routine.routine import ExternalRuntimeOptions __all__ = ( @@ -30,4 +31,5 @@ "RoutineReference", "RoutineType", "RemoteFunctionOptions", + "ExternalRuntimeOptions", ) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/routine/routine.py b/packages/google-cloud-bigquery/google/cloud/bigquery/routine/routine.py index e933fa137a7f..c5aa8750ea70 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/routine/routine.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/routine/routine.py @@ -15,7 +15,7 @@ # limitations under the License. """Define resources for the BigQuery Routines API.""" - +import typing from typing import Any, Dict, Optional, Union import google.cloud._helpers # type: ignore @@ -69,6 +69,7 @@ class Routine(object): "determinism_level": "determinismLevel", "remote_function_options": "remoteFunctionOptions", "data_governance_type": "dataGovernanceType", + "external_runtime_options": "externalRuntimeOptions", } def __init__(self, routine_ref, **kwargs) -> None: @@ -349,6 +350,37 @@ def data_governance_type(self, value): ) self._properties[self._PROPERTY_TO_API_FIELD["data_governance_type"]] = value + @property + def external_runtime_options(self): + """Optional[google.cloud.bigquery.routine.ExternalRuntimeOptions]: + Configures the external runtime options for a routine. + + Raises: + ValueError: + If the value is not + :class:`~google.cloud.bigquery.routine.ExternalRuntimeOptions` or + :data:`None`. + """ + prop = self._properties.get( + self._PROPERTY_TO_API_FIELD["external_runtime_options"] + ) + if prop is not None: + return ExternalRuntimeOptions.from_api_repr(prop) + + @external_runtime_options.setter + def external_runtime_options(self, value): + api_repr = value + if isinstance(value, ExternalRuntimeOptions): + api_repr = value.to_api_repr() + elif value is not None: + raise ValueError( + "value must be google.cloud.bigquery.routine.ExternalRuntimeOptions " + "or None" + ) + self._properties[ + self._PROPERTY_TO_API_FIELD["external_runtime_options"] + ] = api_repr + @classmethod def from_api_repr(cls, resource: dict) -> "Routine": """Factory: construct a routine given its API representation. @@ -736,3 +768,154 @@ def __repr__(self): for property_name in sorted(self._PROPERTY_TO_API_FIELD) ] return "RemoteFunctionOptions({})".format(", ".join(all_properties)) + + +class ExternalRuntimeOptions(object): + """Options for the runtime of the external system. + + Args: + container_memory (str): + Optional. Amount of memory provisioned for a Python UDF container + instance. Format: {number}{unit} where unit is one of "M", "G", "Mi" + and "Gi" (e.g. 1G, 512Mi). If not specified, the default value is + 512Mi. For more information, see `Configure container limits for + Python UDFs `_ + container_cpu (int): + Optional. Amount of CPU provisioned for a Python UDF container + instance. For more information, see `Configure container limits + for Python UDFs `_ + runtime_connection (str): + Optional. Fully qualified name of the connection whose service account + will be used to execute the code in the container. Format: + "projects/{projectId}/locations/{locationId}/connections/{connectionId}" + max_batching_rows (int): + Optional. Maximum number of rows in each batch sent to the external + runtime. If absent or if 0, BigQuery dynamically decides the number of + rows in a batch. + runtime_version (str): + Optional. Language runtime version. Example: python-3.11. + """ + + _PROPERTY_TO_API_FIELD = { + "container_memory": "containerMemory", + "container_cpu": "containerCpu", + "runtime_connection": "runtimeConnection", + "max_batching_rows": "maxBatchingRows", + "runtime_version": "runtimeVersion", + } + + def __init__( + self, + container_memory: Optional[str] = None, + container_cpu: Optional[int] = None, + runtime_connection: Optional[str] = None, + max_batching_rows: Optional[int] = None, + runtime_version: Optional[str] = None, + _properties: Optional[Dict] = None, + ) -> None: + if _properties is None: + _properties = {} + self._properties = _properties + + if container_memory is not None: + self.container_memory = container_memory + if container_cpu is not None: + self.container_cpu = container_cpu + if runtime_connection is not None: + self.runtime_connection = runtime_connection + if max_batching_rows is not None: + self.max_batching_rows = max_batching_rows + if runtime_version is not None: + self.runtime_version = runtime_version + + @property + def container_memory(self) -> Optional[str]: + """Optional. Amount of memory provisioned for a Python UDF container instance.""" + return _helpers._str_or_none(self._properties.get("containerMemory")) + + @container_memory.setter + def container_memory(self, value: Optional[str]): + if value is not None and not isinstance(value, str): + raise ValueError("container_memory must be a string or None.") + self._properties["containerMemory"] = value + + @property + def container_cpu(self) -> Optional[int]: + """Optional. Amount of CPU provisioned for a Python UDF container instance.""" + return _helpers._int_or_none(self._properties.get("containerCpu")) + + @container_cpu.setter + def container_cpu(self, value: Optional[int]): + if value is not None and not isinstance(value, int): + raise ValueError("container_cpu must be an integer or None.") + self._properties["containerCpu"] = value + + @property + def runtime_connection(self) -> Optional[str]: + """Optional. Fully qualified name of the connection.""" + return _helpers._str_or_none(self._properties.get("runtimeConnection")) + + @runtime_connection.setter + def runtime_connection(self, value: Optional[str]): + if value is not None and not isinstance(value, str): + raise ValueError("runtime_connection must be a string or None.") + self._properties["runtimeConnection"] = value + + @property + def max_batching_rows(self) -> Optional[int]: + """Optional. Maximum number of rows in each batch sent to the external runtime.""" + return typing.cast( + int, _helpers._int_or_none(self._properties.get("maxBatchingRows")) + ) + + @max_batching_rows.setter + def max_batching_rows(self, value: Optional[int]): + if value is not None and not isinstance(value, int): + raise ValueError("max_batching_rows must be an integer or None.") + self._properties["maxBatchingRows"] = _helpers._str_or_none(value) + + @property + def runtime_version(self) -> Optional[str]: + """Optional. Language runtime version.""" + return _helpers._str_or_none(self._properties.get("runtimeVersion")) + + @runtime_version.setter + def runtime_version(self, value: Optional[str]): + if value is not None and not isinstance(value, str): + raise ValueError("runtime_version must be a string or None.") + self._properties["runtimeVersion"] = value + + @classmethod + def from_api_repr(cls, resource: dict) -> "ExternalRuntimeOptions": + """Factory: construct external runtime options given its API representation. + Args: + resource (Dict[str, object]): Resource, as returned from the API. + Returns: + google.cloud.bigquery.routine.ExternalRuntimeOptions: + Python object, as parsed from ``resource``. + """ + ref = cls() + ref._properties = resource + return ref + + def to_api_repr(self) -> dict: + """Construct the API resource representation of this ExternalRuntimeOptions. + Returns: + Dict[str, object]: External runtime options represented as an API resource. + """ + return self._properties + + def __eq__(self, other): + if not isinstance(other, ExternalRuntimeOptions): + return NotImplemented + return self._properties == other._properties + + def __ne__(self, other): + return not self == other + + def __repr__(self): + all_properties = [ + "{}={}".format(property_name, repr(getattr(self, property_name))) + for property_name in sorted(self._PROPERTY_TO_API_FIELD) + ] + return "ExternalRuntimeOptions({})".format(", ".join(all_properties)) diff --git a/packages/google-cloud-bigquery/tests/unit/routine/test_external_runtime_options.py b/packages/google-cloud-bigquery/tests/unit/routine/test_external_runtime_options.py new file mode 100644 index 000000000000..d4edaae9a976 --- /dev/null +++ b/packages/google-cloud-bigquery/tests/unit/routine/test_external_runtime_options.py @@ -0,0 +1,191 @@ +# -*- coding: utf-8 -*- +# +# Copyright 2024 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import pytest + + +@pytest.fixture +def target_class(): + from google.cloud.bigquery.routine.routine import ExternalRuntimeOptions + + return ExternalRuntimeOptions + + +@pytest.fixture +def object_under_test(target_class): + return target_class() + + +def test_ctor(target_class): + container_memory = "1G" + container_cpu = 1 + runtime_connection = ( + "projects/my-project/locations/us-central1/connections/my-connection" + ) + max_batching_rows = 100 + runtime_version = "python-3.11" + + instance = target_class( + container_memory=container_memory, + container_cpu=container_cpu, + runtime_connection=runtime_connection, + max_batching_rows=max_batching_rows, + runtime_version=runtime_version, + ) + + assert instance.container_memory == container_memory + assert instance.container_cpu == container_cpu + assert instance.runtime_connection == runtime_connection + assert instance.max_batching_rows == max_batching_rows + assert instance.runtime_version == runtime_version + + +def test_container_memory(object_under_test): + container_memory = "512Mi" + object_under_test.container_memory = container_memory + assert object_under_test.container_memory == container_memory + + +def test_container_cpu(object_under_test): + container_cpu = 1 + object_under_test.container_cpu = container_cpu + assert object_under_test.container_cpu == container_cpu + + +def test_runtime_connection(object_under_test): + runtime_connection = ( + "projects/my-project/locations/us-central1/connections/my-connection" + ) + object_under_test.runtime_connection = runtime_connection + assert object_under_test.runtime_connection == runtime_connection + + +def test_max_batching_rows(object_under_test): + max_batching_rows = 100 + object_under_test.max_batching_rows = max_batching_rows + assert object_under_test.max_batching_rows == max_batching_rows + + +def test_runtime_version(object_under_test): + runtime_version = "python-3.11" + object_under_test.runtime_version = runtime_version + assert object_under_test.runtime_version == runtime_version + + +def test_ctor_w_properties(target_class): + properties = { + "containerMemory": "1G", + "containerCpu": 1, + } + instance = target_class(_properties=properties) + assert instance._properties == properties + + +def test_ne(target_class): + instance1 = target_class(container_memory="1G") + instance2 = target_class(container_memory="2G") + assert instance1 != instance2 + + +def test_ne_false(target_class): + instance1 = target_class(container_memory="1G") + instance2 = target_class(container_memory="1G") + assert not (instance1 != instance2) + + +def test_eq_not_implemented(object_under_test): + assert not (object_under_test == object()) + assert object_under_test != object() + + +def test_from_api_repr(target_class): + resource = { + "containerMemory": "1G", + "containerCpu": 1, + "runtimeConnection": "projects/my-project/locations/us-central1/connections/my-connection", + "maxBatchingRows": "100", + "runtimeVersion": "python-3.11", + } + instance = target_class.from_api_repr(resource) + + assert instance.container_memory == "1G" + assert instance.container_cpu == 1 + assert ( + instance.runtime_connection + == "projects/my-project/locations/us-central1/connections/my-connection" + ) + assert instance.max_batching_rows == 100 + assert instance.runtime_version == "python-3.11" + + +def test_to_api_repr(target_class): + instance = target_class( + container_memory="1G", + container_cpu=1, + runtime_connection="projects/my-project/locations/us-central1/connections/my-connection", + max_batching_rows=100, + runtime_version="python-3.11", + ) + resource = instance.to_api_repr() + + assert resource == { + "containerMemory": "1G", + "containerCpu": 1, + "runtimeConnection": "projects/my-project/locations/us-central1/connections/my-connection", + "maxBatchingRows": "100", + "runtimeVersion": "python-3.11", + } + + +def test_repr(target_class): + instance = target_class( + container_memory="1G", + container_cpu=1, + ) + expected_repr = ( + "ExternalRuntimeOptions(container_cpu=1, container_memory='1G', " + "max_batching_rows=None, runtime_connection=None, runtime_version=None)" + ) + assert repr(instance) == expected_repr + + +def test_invalid_container_memory(object_under_test): + with pytest.raises(ValueError, match="container_memory must be a string or None."): + object_under_test.container_memory = 123 + + +def test_invalid_container_cpu(object_under_test): + with pytest.raises(ValueError, match="container_cpu must be an integer or None."): + object_under_test.container_cpu = "1" + + +def test_invalid_runtime_connection(object_under_test): + with pytest.raises( + ValueError, match="runtime_connection must be a string or None." + ): + object_under_test.runtime_connection = 123 + + +def test_invalid_max_batching_rows(object_under_test): + with pytest.raises( + ValueError, match="max_batching_rows must be an integer or None." + ): + object_under_test.max_batching_rows = "100" + + +def test_invalid_runtime_version(object_under_test): + with pytest.raises(ValueError, match="runtime_version must be a string or None."): + object_under_test.runtime_version = 123 diff --git a/packages/google-cloud-bigquery/tests/unit/routine/test_routine.py b/packages/google-cloud-bigquery/tests/unit/routine/test_routine.py index acd3bc40e2ff..965c6b2eb882 100644 --- a/packages/google-cloud-bigquery/tests/unit/routine/test_routine.py +++ b/packages/google-cloud-bigquery/tests/unit/routine/test_routine.py @@ -81,6 +81,13 @@ def test_ctor_w_properties(target_class): max_batching_rows=99, user_defined_context={"foo": "bar"}, ) + external_runtime_options = bigquery.ExternalRuntimeOptions( + container_memory="1G", + container_cpu=1, + runtime_connection="projects/p/locations/l/connections/c", + max_batching_rows=100, + runtime_version="python-3.11", + ) actual_routine = target_class( routine_id, @@ -92,6 +99,7 @@ def test_ctor_w_properties(target_class): description=description, determinism_level=determinism_level, remote_function_options=options, + external_runtime_options=external_runtime_options, ) ref = RoutineReference.from_string(routine_id) @@ -106,6 +114,7 @@ def test_ctor_w_properties(target_class): actual_routine.determinism_level == bigquery.DeterminismLevel.NOT_DETERMINISTIC ) assert actual_routine.remote_function_options == options + assert actual_routine.external_runtime_options == external_runtime_options def test_ctor_invalid_remote_function_options(target_class): @@ -119,6 +128,17 @@ def test_ctor_invalid_remote_function_options(target_class): ) +def test_ctor_invalid_external_runtime_options(target_class): + with pytest.raises( + ValueError, + match=".*must be google.cloud.bigquery.routine.ExternalRuntimeOptions.*", + ): + target_class( + "my-proj.my_dset.my_routine", + external_runtime_options=object(), + ) + + def test_from_api_repr(target_class): from google.cloud.bigquery.routine import RoutineArgument from google.cloud.bigquery.routine import RoutineReference @@ -155,6 +175,13 @@ def test_from_api_repr(target_class): }, }, "dataGovernanceType": "DATA_MASKING", + "externalRuntimeOptions": { + "containerMemory": "1G", + "containerCpu": 1, + "runtimeConnection": "projects/p/locations/l/connections/c", + "maxBatchingRows": 100, + "runtimeVersion": "python-3.11", + }, } actual_routine = target_class.from_api_repr(resource) @@ -194,6 +221,14 @@ def test_from_api_repr(target_class): assert actual_routine.remote_function_options.max_batching_rows == 50 assert actual_routine.remote_function_options.user_defined_context == {"foo": "bar"} assert actual_routine.data_governance_type == "DATA_MASKING" + assert actual_routine.external_runtime_options.container_memory == "1G" + assert actual_routine.external_runtime_options.container_cpu == 1 + assert ( + actual_routine.external_runtime_options.runtime_connection + == "projects/p/locations/l/connections/c" + ) + assert actual_routine.external_runtime_options.max_batching_rows == 100 + assert actual_routine.external_runtime_options.runtime_version == "python-3.11" def test_from_api_repr_tvf_function(target_class): @@ -297,6 +332,7 @@ def test_from_api_repr_w_minimal_resource(target_class): assert actual_routine.determinism_level is None assert actual_routine.remote_function_options is None assert actual_routine.data_governance_type is None + assert actual_routine.external_runtime_options is None def test_from_api_repr_w_unknown_fields(target_class): @@ -571,6 +607,12 @@ def test_set_remote_function_options_w_none(object_under_test): assert object_under_test._properties["remoteFunctionOptions"] is None +def test_set_external_runtime_options_w_none(object_under_test): + object_under_test.external_runtime_options = None + assert object_under_test.external_runtime_options is None + assert object_under_test._properties["externalRuntimeOptions"] is None + + def test_set_data_governance_type_w_none(object_under_test): object_under_test.data_governance_type = None assert object_under_test.data_governance_type is None From bf524b5c1818a39f01587abe44be6386d5776e2d Mon Sep 17 00:00:00 2001 From: Shenyang Cai Date: Wed, 15 Oct 2025 14:30:12 -0700 Subject: [PATCH 2010/2016] fix: include `io.Base` in the `PathType` (#2323) --- packages/google-cloud-bigquery/google/cloud/bigquery/client.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py index ea592852a1a5..c50e7c2d7d62 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/client.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/client.py @@ -139,7 +139,7 @@ if typing.TYPE_CHECKING: # pragma: NO COVER # os.PathLike is only subscriptable in Python 3.9+, thus shielding with a condition. - PathType = Union[str, bytes, os.PathLike[str], os.PathLike[bytes]] + PathType = Union[str, bytes, os.PathLike[str], os.PathLike[bytes], io.IOBase] _DEFAULT_CHUNKSIZE = 100 * 1024 * 1024 # 100 MB _MAX_MULTIPART_SIZE = 5 * 1024 * 1024 _DEFAULT_NUM_RETRIES = 6 From c5b88728d68ef50fd720515d53d6fc5c2599b1ae Mon Sep 17 00:00:00 2001 From: Chalmer Lowe Date: Mon, 20 Oct 2025 15:06:52 -0400 Subject: [PATCH 2011/2016] feat: adds support for Python runtime 3.14 (#2322) * feat: adds support for Python runtime 3.14 * adds step to install gdal * adds files required by pyarrow * adds repo required by pyarrow * corrects url to repo required by pyarrow * testing a theory with a conditional * testing a theory with a conditional version of ubuntu * testing a new approach to installing arrow * testing a new approach to dearmoring the key * back to the basics * trying a conditional again. * adds explanatory comment resets ubuntu version to latest * Apply suggestion from @chalmerlowe * Apply suggestion from @chalmerlowe * Apply suggestion from @chalmerlowe * Apply suggestion from @chalmerlowe --- .../.github/sync-repo-settings.yaml | 1 + .../.github/workflows/unittest.yml | 11 ++++++++--- packages/google-cloud-bigquery/CONTRIBUTING.rst | 4 +++- packages/google-cloud-bigquery/noxfile.py | 2 +- packages/google-cloud-bigquery/owlbot.py | 2 +- packages/google-cloud-bigquery/pyproject.toml | 3 +++ .../testing/constraints-3.14.txt | 2 ++ 7 files changed, 19 insertions(+), 6 deletions(-) create mode 100644 packages/google-cloud-bigquery/testing/constraints-3.14.txt diff --git a/packages/google-cloud-bigquery/.github/sync-repo-settings.yaml b/packages/google-cloud-bigquery/.github/sync-repo-settings.yaml index 1e61b4d6587f..ac91806eb5e2 100644 --- a/packages/google-cloud-bigquery/.github/sync-repo-settings.yaml +++ b/packages/google-cloud-bigquery/.github/sync-repo-settings.yaml @@ -19,6 +19,7 @@ branchProtectionRules: - 'Samples - Python 3.11' - 'Samples - Python 3.12' - 'Samples - Python 3.13' + - 'Samples - Python 3.14' - pattern: v2 requiresLinearHistory: true requiresCodeOwnerReviews: true diff --git a/packages/google-cloud-bigquery/.github/workflows/unittest.yml b/packages/google-cloud-bigquery/.github/workflows/unittest.yml index 24c9ddbafed1..f6b92547e36d 100644 --- a/packages/google-cloud-bigquery/.github/workflows/unittest.yml +++ b/packages/google-cloud-bigquery/.github/workflows/unittest.yml @@ -5,11 +5,10 @@ on: name: unittest jobs: unit: - # Use `ubuntu-latest` runner. runs-on: ubuntu-latest strategy: matrix: - python: ['3.9', '3.11', '3.12', '3.13'] + python: ['3.9', '3.10', '3.11', '3.12', '3.13', '3.14'] steps: - name: Checkout uses: actions/checkout@v4 @@ -22,6 +21,12 @@ jobs: python -m pip install --upgrade setuptools pip wheel python -m pip install nox - name: Run unit tests + + # TODO (https://b.corp.google.com/issues/450370502) 3.14 is not yet supported by pyarrow. See + # https://github.com/googleapis/google-cloud-python/issues/14686 + # https://github.com/apache/arrow/issues/47438 + # Reinstate running tests with 3.14 once this bug is fixed + if: matrix.python != '3.14' env: COVERAGE_FILE: .coverage-${{ matrix.python }} run: | @@ -38,7 +43,7 @@ jobs: runs-on: ubuntu-latest strategy: matrix: - python: ['3.9', '3.13'] + python: ['3.9', '3.14'] steps: - name: Checkout uses: actions/checkout@v4 diff --git a/packages/google-cloud-bigquery/CONTRIBUTING.rst b/packages/google-cloud-bigquery/CONTRIBUTING.rst index b2993768bc6f..3f8653f4bc9b 100644 --- a/packages/google-cloud-bigquery/CONTRIBUTING.rst +++ b/packages/google-cloud-bigquery/CONTRIBUTING.rst @@ -22,7 +22,7 @@ In order to add a feature: documentation. - The feature must work fully on the following CPython versions: - 3.9, 3.10, 3.11, 3.12 and 3.13 on both UNIX and Windows. + 3.9, 3.10, 3.11, 3.12, 3.13 and 3.14 on both UNIX and Windows. - The feature must not add unnecessary dependencies (where "unnecessary" is of course subjective, but new dependencies should @@ -226,12 +226,14 @@ We support: - `Python 3.11`_ - `Python 3.12`_ - `Python 3.13`_ +- `Python 3.14`_ .. _Python 3.9: https://docs.python.org/3.9/ .. _Python 3.10: https://docs.python.org/3.10/ .. _Python 3.11: https://docs.python.org/3.11/ .. _Python 3.12: https://docs.python.org/3.12/ .. _Python 3.13: https://docs.python.org/3.13/ +.. _Python 3.14: https://docs.python.org/3.14/ Supported versions can be found in our ``noxfile.py`` `config`_. diff --git a/packages/google-cloud-bigquery/noxfile.py b/packages/google-cloud-bigquery/noxfile.py index eb79c238da03..2457382fbe4e 100644 --- a/packages/google-cloud-bigquery/noxfile.py +++ b/packages/google-cloud-bigquery/noxfile.py @@ -39,7 +39,7 @@ DEFAULT_PYTHON_VERSION = "3.9" SYSTEM_TEST_PYTHON_VERSIONS = ["3.9", "3.11", "3.12", "3.13"] -UNIT_TEST_PYTHON_VERSIONS = ["3.9", "3.11", "3.12", "3.13"] +UNIT_TEST_PYTHON_VERSIONS = ["3.9", "3.10", "3.11", "3.12", "3.13", "3.14"] CURRENT_DIRECTORY = pathlib.Path(__file__).parent.absolute() diff --git a/packages/google-cloud-bigquery/owlbot.py b/packages/google-cloud-bigquery/owlbot.py index 80cf9d6e3bb9..bd694180fbe1 100644 --- a/packages/google-cloud-bigquery/owlbot.py +++ b/packages/google-cloud-bigquery/owlbot.py @@ -56,7 +56,7 @@ "pandas": "https://pandas.pydata.org/pandas-docs/stable/", }, system_test_python_versions=["3.9", "3.13"], - unit_test_python_versions=["3.9", "3.10", "3.11", "3.12", "3.13"], + unit_test_python_versions=["3.9", "3.10", "3.11", "3.12", "3.13", "3.14"], default_python_version="3.9", ) diff --git a/packages/google-cloud-bigquery/pyproject.toml b/packages/google-cloud-bigquery/pyproject.toml index 1c6ec1f777a9..a0e356b3430b 100644 --- a/packages/google-cloud-bigquery/pyproject.toml +++ b/packages/google-cloud-bigquery/pyproject.toml @@ -38,6 +38,7 @@ classifiers = [ "Programming Language :: Python :: 3.11", "Programming Language :: Python :: 3.12", "Programming Language :: Python :: 3.13", + "Programming Language :: Python :: 3.14", "Operating System :: OS Independent", "Topic :: Internet", ] @@ -69,6 +70,7 @@ bqstorage = [ # https://github.com/grpc/grpc/pull/15254 "grpcio >= 1.47.0, < 2.0.0", "grpcio >= 1.49.1, < 2.0.0; python_version >= '3.11'", + "grpcio >= 1.75.1, < 2.0.0; python_version >= '3.14'", "pyarrow >= 4.0.0", ] pandas = [ @@ -76,6 +78,7 @@ pandas = [ "pandas-gbq >= 0.26.1", "grpcio >= 1.47.0, < 2.0.0", "grpcio >= 1.49.1, < 2.0.0; python_version >= '3.11'", + "grpcio >= 1.75.1, < 2.0.0; python_version >= '3.14'", "pyarrow >= 3.0.0", "db-dtypes >= 1.0.4, < 2.0.0", ] diff --git a/packages/google-cloud-bigquery/testing/constraints-3.14.txt b/packages/google-cloud-bigquery/testing/constraints-3.14.txt new file mode 100644 index 000000000000..6bd20f5fbf8a --- /dev/null +++ b/packages/google-cloud-bigquery/testing/constraints-3.14.txt @@ -0,0 +1,2 @@ +# Constraints for Python 3.14 +grpcio >= 1.75.1 From 7e8083a2d8ac6ea6478dae7118649fcdd10cf8ec Mon Sep 17 00:00:00 2001 From: Anthonios Partheniou Date: Fri, 7 Nov 2025 13:29:40 -0500 Subject: [PATCH 2012/2016] chore(librarian): onboard to librarian (#2326) Towards https://github.com/googleapis/librarian/issues/2456 Files removed which is no longer used - Owlbot config files, including owlbot.py - Sync repo settings config file - Release please config files --- .../.github/.OwlBot.lock.yaml | 17 --- .../.github/.OwlBot.yaml | 22 --- .../.github/auto-approve.yml | 3 - .../.github/release-please.yml | 14 -- .../.github/release-trigger.yml | 2 - .../.github/sync-repo-settings.yaml | 32 ----- .../.librarian/state.yaml | 10 ++ packages/google-cloud-bigquery/owlbot.py | 126 ------------------ 8 files changed, 10 insertions(+), 216 deletions(-) delete mode 100644 packages/google-cloud-bigquery/.github/.OwlBot.lock.yaml delete mode 100644 packages/google-cloud-bigquery/.github/.OwlBot.yaml delete mode 100644 packages/google-cloud-bigquery/.github/auto-approve.yml delete mode 100644 packages/google-cloud-bigquery/.github/release-please.yml delete mode 100644 packages/google-cloud-bigquery/.github/release-trigger.yml delete mode 100644 packages/google-cloud-bigquery/.github/sync-repo-settings.yaml create mode 100644 packages/google-cloud-bigquery/.librarian/state.yaml delete mode 100644 packages/google-cloud-bigquery/owlbot.py diff --git a/packages/google-cloud-bigquery/.github/.OwlBot.lock.yaml b/packages/google-cloud-bigquery/.github/.OwlBot.lock.yaml deleted file mode 100644 index 4a311db0294c..000000000000 --- a/packages/google-cloud-bigquery/.github/.OwlBot.lock.yaml +++ /dev/null @@ -1,17 +0,0 @@ -# Copyright 2025 Google LLC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -docker: - image: gcr.io/cloud-devrel-public-resources/owlbot-python:latest - digest: sha256:543e209e7c1c1ffe720eb4db1a3f045a75099304fb19aa11a47dc717b8aae2a9 -# created: 2025-10-09T14:48:42.914384887Z diff --git a/packages/google-cloud-bigquery/.github/.OwlBot.yaml b/packages/google-cloud-bigquery/.github/.OwlBot.yaml deleted file mode 100644 index 8b142686cf89..000000000000 --- a/packages/google-cloud-bigquery/.github/.OwlBot.yaml +++ /dev/null @@ -1,22 +0,0 @@ -# Copyright 2021 Google LLC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -docker: - image: gcr.io/cloud-devrel-public-resources/owlbot-python:latest - -deep-remove-regex: - - /owl-bot-staging - -begin-after-commit-hash: f2de93abafa306b2ebadf1d10d947db8bcf2bf15 - diff --git a/packages/google-cloud-bigquery/.github/auto-approve.yml b/packages/google-cloud-bigquery/.github/auto-approve.yml deleted file mode 100644 index 311ebbb853a9..000000000000 --- a/packages/google-cloud-bigquery/.github/auto-approve.yml +++ /dev/null @@ -1,3 +0,0 @@ -# https://github.com/googleapis/repo-automation-bots/tree/main/packages/auto-approve -processes: - - "OwlBotTemplateChanges" diff --git a/packages/google-cloud-bigquery/.github/release-please.yml b/packages/google-cloud-bigquery/.github/release-please.yml deleted file mode 100644 index 5161ab347cdf..000000000000 --- a/packages/google-cloud-bigquery/.github/release-please.yml +++ /dev/null @@ -1,14 +0,0 @@ -releaseType: python -handleGHRelease: true -# NOTE: this section is generated by synthtool.languages.python -# See https://github.com/googleapis/synthtool/blob/master/synthtool/languages/python.py -branches: -- branch: v2 - handleGHRelease: true - releaseType: python -- branch: v1 - handleGHRelease: true - releaseType: python -- branch: v0 - handleGHRelease: true - releaseType: python diff --git a/packages/google-cloud-bigquery/.github/release-trigger.yml b/packages/google-cloud-bigquery/.github/release-trigger.yml deleted file mode 100644 index b975c190db1b..000000000000 --- a/packages/google-cloud-bigquery/.github/release-trigger.yml +++ /dev/null @@ -1,2 +0,0 @@ -enabled: true -multiScmName: python-bigquery diff --git a/packages/google-cloud-bigquery/.github/sync-repo-settings.yaml b/packages/google-cloud-bigquery/.github/sync-repo-settings.yaml deleted file mode 100644 index ac91806eb5e2..000000000000 --- a/packages/google-cloud-bigquery/.github/sync-repo-settings.yaml +++ /dev/null @@ -1,32 +0,0 @@ -# https://github.com/googleapis/repo-automation-bots/tree/main/packages/sync-repo-settings -mergeCommitAllowed: false -# Rules for main branch protection -branchProtectionRules: -# Identifies the protection rule pattern. Name of the branch to be protected. -# Defaults to `main` -- pattern: main - requiresLinearHistory: true - requiresCodeOwnerReviews: true - requiresStrictStatusChecks: true - requiredStatusCheckContexts: - - 'Kokoro' - - 'Kokoro system-3.13' - - 'Kokoro snippets-3.13' - - 'cla/google' - - 'Samples - Lint' - - 'Samples - Python 3.9' - - 'Samples - Python 3.10' - - 'Samples - Python 3.11' - - 'Samples - Python 3.12' - - 'Samples - Python 3.13' - - 'Samples - Python 3.14' -- pattern: v2 - requiresLinearHistory: true - requiresCodeOwnerReviews: true - requiresStrictStatusChecks: true - requiredStatusCheckContexts: - - 'Kokoro' - - 'cla/google' - - 'Samples - Lint' - - 'Samples - Python 3.9' - - 'Samples - Python 3.10' diff --git a/packages/google-cloud-bigquery/.librarian/state.yaml b/packages/google-cloud-bigquery/.librarian/state.yaml new file mode 100644 index 000000000000..1834779bc9e6 --- /dev/null +++ b/packages/google-cloud-bigquery/.librarian/state.yaml @@ -0,0 +1,10 @@ +image: us-central1-docker.pkg.dev/cloud-sdk-librarian-prod/images-prod/python-librarian-generator@sha256:39628f6e89c9cad27973b9a39a50f7052bec0435ee58c7027b4fa6b655943e31 +libraries: + - id: google-cloud-bigquery + version: 3.38.0 + apis: [] + source_roots: + - . + preserve_regex: [] + remove_regex: [] + tag_format: v{version} diff --git a/packages/google-cloud-bigquery/owlbot.py b/packages/google-cloud-bigquery/owlbot.py deleted file mode 100644 index bd694180fbe1..000000000000 --- a/packages/google-cloud-bigquery/owlbot.py +++ /dev/null @@ -1,126 +0,0 @@ -# Copyright 2018 Google LLC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -"""This script is used to synthesize generated parts of this library.""" -from pathlib import Path - -import synthtool as s -from synthtool import gcp -from synthtool.languages import python - -REPO_ROOT = Path(__file__).parent.absolute() - -default_version = "v2" - -for library in s.get_staging_dirs(default_version): - # Avoid breaking change due to change in field renames. - # https://github.com/googleapis/python-bigquery/issues/319 - s.replace( - library / f"google/cloud/bigquery_{library.name}/types/standard_sql.py", - r"type_ ", - "type ", - ) - # Patch docs issue - s.replace( - library / f"google/cloud/bigquery_{library.name}/types/model.py", - r"""\"predicted_\"""", - """`predicted_`""", - ) - s.move(library / f"google/cloud/bigquery_{library.name}/types") -s.remove_staging_dirs() - -common = gcp.CommonTemplates() - -# ---------------------------------------------------------------------------- -# Add templated files -# ---------------------------------------------------------------------------- -templated_files = common.py_library( - cov_level=100, - samples=True, - microgenerator=True, - split_system_tests=True, - intersphinx_dependencies={ - "dateutil": "https://dateutil.readthedocs.io/en/latest/", - "geopandas": "https://geopandas.org/", - "pandas": "https://pandas.pydata.org/pandas-docs/stable/", - }, - system_test_python_versions=["3.9", "3.13"], - unit_test_python_versions=["3.9", "3.10", "3.11", "3.12", "3.13", "3.14"], - default_python_version="3.9", -) - -# BigQuery has a custom multiprocessing note -s.move( - templated_files, - excludes=[ - "noxfile.py", - "renovate.json", - "docs/multiprocessing.rst", - "docs/index.rst", - ".coveragerc", - ".github/CODEOWNERS", - # Include custom SNIPPETS_TESTS job for performance. - # https://github.com/googleapis/python-bigquery/issues/191 - ".kokoro/presubmit/presubmit.cfg", - ".kokoro/presubmit/system-3.8.cfg", - ".kokoro/continuous/prerelease-deps.cfg", - ".kokoro/samples/python3.7/**", - ".kokoro/samples/python3.8/**", - ".github/workflows/**", # exclude gh actions as credentials are needed for tests - "README.rst", - ], -) - -python.configure_previous_major_version_branches() - -s.replace( - ".kokoro/test-samples-impl.sh", - """# `virtualenv==20.26.6` is added for Python 3.7 compatibility -python3.9 -m pip install --upgrade --quiet nox virtualenv==20.26.6""", - "python3.9 -m pip install --upgrade --quiet nox virtualenv", -) - -s.replace( - "CONTRIBUTING.rst", - r"\$ nox -s py-3.8", - r"$ nox -s py-3.9", -) - -s.replace( - "scripts/readme-gen/templates/install_deps.tmpl.rst", - r"Samples are compatible with Python 3.7", - r"Samples are compatible with Python 3.9", -) - - -# ---------------------------------------------------------------------------- -# Samples templates -# ---------------------------------------------------------------------------- - -python.py_samples() - -s.replace( - "samples/**/noxfile.py", - 'BLACK_VERSION = "black==22.3.0"', - 'BLACK_VERSION = "black==23.7.0"', -) -s.replace( - "samples/**/noxfile.py", - r'ALL_VERSIONS = \["3.7", "3.8", "3.9", "3.10", "3.11", "3.12", "3.13"\]', - 'ALL_VERSIONS = ["3.9", "3.10", "3.11", "3.12", "3.13"]', -) - -s.shell.run(["nox", "-s", "blacken"], hide_output=False) -for noxfile in REPO_ROOT.glob("samples/**/noxfile.py"): - s.shell.run(["nox", "-s", "blacken"], cwd=noxfile.parent, hide_output=False) From c2f1ba0027b964ad628471e461ce5172c7dd6995 Mon Sep 17 00:00:00 2001 From: Anthonios Partheniou Date: Fri, 21 Nov 2025 11:19:22 -0500 Subject: [PATCH 2013/2016] tests: temporarily pin pytest (#2334) Temporarily pin `pytest < 9` to resolve the following issue ``` for invalid_view_value in invalid_view_values: > with self.subTest(invalid_view_value=invalid_view_value): tests/unit/test_client.py:810: _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ /opt/hostedtoolcache/Python/3.11.14/x64/lib/python3.11/contextlib.py:144: in __exit__ next(self.gen) /opt/hostedtoolcache/Python/3.11.14/x64/lib/python3.11/contextlib.py:144: in __exit__ next(self.gen) .nox/unit-3-11/lib/python3.11/site-packages/_pytest/unittest.py:438: in addSubTest self.ihook.pytest_runtest_logreport(report=sub_report) .nox/unit-3-11/lib/python3.11/site-packages/pluggy/_hooks.py:512: in __call__ return self._hookexec(self.name, self._hookimpls.copy(), kwargs, firstresult) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ .nox/unit-3-11/lib/python3.11/site-packages/pluggy/_manager.py:120: in _hookexec return self._inner_hookexec(hook_name, methods, kwargs, firstresult) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ .nox/unit-3-11/lib/python3.11/site-packages/xdist/remote.py:289: in pytest_runtest_logreport self.sendevent("testreport", data=data) .nox/unit-3-11/lib/python3.11/site-packages/xdist/remote.py:126: in sendevent self.channel.send((name, kwargs)) .nox/unit-3-11/lib/python3.11/site-packages/execnet/gateway_base.py:912: in send self.gateway._send(Message.CHANNEL_DATA, self.id, dumps_internal(item)) ^^^^^^^^^^^^^^^^^^^^ .nox/unit-3-11/lib/python3.11/site-packages/execnet/gateway_base.py:1629: in dumps_internal return _Serializer().save(obj) # type: ignore[return-value] ^^^^^^^^^^^^^^^^^^^^^^^ .nox/unit-3-11/lib/python3.11/site-packages/execnet/gateway_base.py:1647: in save self._save(obj) .nox/unit-3-11/lib/python3.11/site-packages/execnet/gateway_base.py:1667: in _save dispatch(self, obj) .nox/unit-3-11/lib/python3.11/site-packages/execnet/gateway_base.py:1744: in save_tuple self._save(item) .nox/unit-3-11/lib/python3.11/site-packages/execnet/gateway_base.py:1667: in _save dispatch(self, obj) .nox/unit-3-11/lib/python3.11/site-packages/execnet/gateway_base.py:1740: in save_dict self._write_setitem(key, value) .nox/unit-3-11/lib/python3.11/site-packages/execnet/gateway_base.py:1734: in _write_setitem self._save(value) .nox/unit-3-11/lib/python3.11/site-packages/execnet/gateway_base.py:1667: in _save dispatch(self, obj) .nox/unit-3-11/lib/python3.11/site-packages/execnet/gateway_base.py:1740: in save_dict self._write_setitem(key, value) .nox/unit-3-11/lib/python3.11/site-packages/execnet/gateway_base.py:1734: in _write_setitem self._save(value) .nox/unit-3-11/lib/python3.11/site-packages/execnet/gateway_base.py:1667: in _save dispatch(self, obj) .nox/unit-3-11/lib/python3.11/site-packages/execnet/gateway_base.py:1740: in save_dict self._write_setitem(key, value) .nox/unit-3-11/lib/python3.11/site-packages/execnet/gateway_base.py:1734: in _write_setitem self._save(value) .nox/unit-3-11/lib/python3.11/site-packages/execnet/gateway_base.py:1667: in _save dispatch(self, obj) .nox/unit-3-11/lib/python3.11/site-packages/execnet/gateway_base.py:1740: in save_dict self._write_setitem(key, value) .nox/unit-3-11/lib/python3.11/site-packages/execnet/gateway_base.py:1734: in _write_setitem self._save(value) _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ self = obj = def _save(self, obj: object) -> None: tp = type(obj) try: dispatch = self._dispatch[tp] except KeyError: methodname = "save_" + tp.__name__ meth: Callable[[_Serializer, object], None] | None = getattr( self.__class__, methodname, None ) if meth is None: > raise DumpError(f"can't serialize {tp}") from None E execnet.gateway_base.DumpError: can't serialize ``` The upstream issue is tracked in https://github.com/pytest-dev/pytest-xdist/issues/1273 --- packages/google-cloud-bigquery/noxfile.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/noxfile.py b/packages/google-cloud-bigquery/noxfile.py index 2457382fbe4e..194e7ce8f388 100644 --- a/packages/google-cloud-bigquery/noxfile.py +++ b/packages/google-cloud-bigquery/noxfile.py @@ -95,7 +95,8 @@ def default(session, install_extras=True): # Install all test dependencies, then install local packages in-place. session.install( - "pytest", + # TODO(https://github.com/pytest-dev/pytest-xdist/issues/1273): Remove once this bug is fixed + "pytest<9", "google-cloud-testutils", "pytest-cov", "pytest-xdist", From dd2c7973de73aafcdd0a1927f2f1e3335b3ea609 Mon Sep 17 00:00:00 2001 From: ohmayr Date: Fri, 21 Nov 2025 08:42:31 -0800 Subject: [PATCH 2014/2016] chore: update librarian sha (#2329) This PR updates the librarian sha to support v1.0.0 --- packages/google-cloud-bigquery/.librarian/state.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/.librarian/state.yaml b/packages/google-cloud-bigquery/.librarian/state.yaml index 1834779bc9e6..b5d3126e4a71 100644 --- a/packages/google-cloud-bigquery/.librarian/state.yaml +++ b/packages/google-cloud-bigquery/.librarian/state.yaml @@ -1,4 +1,4 @@ -image: us-central1-docker.pkg.dev/cloud-sdk-librarian-prod/images-prod/python-librarian-generator@sha256:39628f6e89c9cad27973b9a39a50f7052bec0435ee58c7027b4fa6b655943e31 +image: us-central1-docker.pkg.dev/cloud-sdk-librarian-prod/images-prod/python-librarian-generator@sha256:c8612d3fffb3f6a32353b2d1abd16b61e87811866f7ec9d65b59b02eb452a620 libraries: - id: google-cloud-bigquery version: 3.38.0 From 7aa1bf6f34bf2a1ec627012f35f612bc3550b033 Mon Sep 17 00:00:00 2001 From: Anthonios Partheniou Date: Mon, 24 Nov 2025 22:24:28 +0000 Subject: [PATCH 2015/2016] Trigger owlbot post-processor --- .../google-cloud-bigquery/google-cloud-bigquery.txt | 0 1 file changed, 0 insertions(+), 0 deletions(-) create mode 100644 owl-bot-staging/google-cloud-bigquery/google-cloud-bigquery/google-cloud-bigquery.txt diff --git a/owl-bot-staging/google-cloud-bigquery/google-cloud-bigquery/google-cloud-bigquery.txt b/owl-bot-staging/google-cloud-bigquery/google-cloud-bigquery/google-cloud-bigquery.txt new file mode 100644 index 000000000000..e69de29bb2d1 From 91fecb508d935c5afc9615554ae66e779ef5c268 Mon Sep 17 00:00:00 2001 From: Anthonios Partheniou Date: Mon, 24 Nov 2025 22:24:42 +0000 Subject: [PATCH 2016/2016] build: google-cloud-bigquery migration: adjust owlbot-related files --- .../google-cloud-bigquery/.github/CODEOWNERS | 11 - .../.github/CONTRIBUTING.md | 28 - .../.github/ISSUE_TEMPLATE/bug_report.md | 43 -- .../.github/ISSUE_TEMPLATE/feature_request.md | 18 - .../.github/ISSUE_TEMPLATE/support_request.md | 7 - .../.github/PULL_REQUEST_TEMPLATE.md | 7 - .../.github/auto-label.yaml | 20 - .../.github/blunderbuss.yml | 17 - .../.github/header-checker-lint.yml | 15 - .../.github/snippet-bot.yml | 0 .../.github/workflows/docs.yml | 39 -- .../.github/workflows/unittest.yml | 94 ---- .../google-cloud-bigquery/.kokoro/build.sh | 60 --- .../.kokoro/continuous/common.cfg | 27 - .../.kokoro/continuous/continuous.cfg | 1 - .../continuous/prerelease-deps-3.13.cfg | 7 - .../.kokoro/continuous/unit-tests-misc.cfg | 9 - .../.kokoro/populate-secrets.sh | 43 -- .../.kokoro/presubmit/common.cfg | 27 - .../.kokoro/presubmit/linting-typing.cfg | 7 - .../.kokoro/presubmit/prerelease-deps.cfg | 7 - .../.kokoro/presubmit/snippets-3.13.cfg | 7 - .../.kokoro/presubmit/snippets-3.9.cfg | 7 - .../.kokoro/presubmit/system-3.13.cfg | 7 - .../.kokoro/presubmit/system-3.9.cfg | 7 - .../.kokoro/samples/lint/common.cfg | 34 -- .../.kokoro/samples/lint/continuous.cfg | 6 - .../.kokoro/samples/lint/periodic.cfg | 6 - .../.kokoro/samples/lint/presubmit.cfg | 6 - .../.kokoro/samples/python3.10/common.cfg | 40 -- .../.kokoro/samples/python3.10/continuous.cfg | 6 - .../samples/python3.10/periodic-head.cfg | 11 - .../.kokoro/samples/python3.10/periodic.cfg | 6 - .../.kokoro/samples/python3.10/presubmit.cfg | 6 - .../.kokoro/samples/python3.11/common.cfg | 40 -- .../.kokoro/samples/python3.11/continuous.cfg | 6 - .../samples/python3.11/periodic-head.cfg | 11 - .../.kokoro/samples/python3.11/periodic.cfg | 6 - .../.kokoro/samples/python3.11/presubmit.cfg | 6 - .../.kokoro/samples/python3.12/common.cfg | 40 -- .../.kokoro/samples/python3.12/continuous.cfg | 6 - .../samples/python3.12/periodic-head.cfg | 11 - .../.kokoro/samples/python3.12/periodic.cfg | 6 - .../.kokoro/samples/python3.12/presubmit.cfg | 6 - .../.kokoro/samples/python3.13/common.cfg | 40 -- .../.kokoro/samples/python3.13/continuous.cfg | 6 - .../samples/python3.13/periodic-head.cfg | 11 - .../.kokoro/samples/python3.13/periodic.cfg | 6 - .../.kokoro/samples/python3.13/presubmit.cfg | 6 - .../.kokoro/samples/python3.14/common.cfg | 40 -- .../.kokoro/samples/python3.14/continuous.cfg | 6 - .../samples/python3.14/periodic-head.cfg | 11 - .../.kokoro/samples/python3.14/periodic.cfg | 6 - .../.kokoro/samples/python3.14/presubmit.cfg | 6 - .../.kokoro/samples/python3.9/common.cfg | 40 -- .../.kokoro/samples/python3.9/continuous.cfg | 6 - .../samples/python3.9/periodic-head.cfg | 11 - .../.kokoro/samples/python3.9/periodic.cfg | 6 - .../.kokoro/samples/python3.9/presubmit.cfg | 6 - .../.kokoro/test-samples-against-head.sh | 26 - .../.kokoro/test-samples-impl.sh | 102 ---- .../.kokoro/test-samples.sh | 44 -- .../.kokoro/trampoline.sh | 28 - .../.kokoro/trampoline_v2.sh | 487 ------------------ packages/google-cloud-bigquery/.trampolinerc | 61 --- .../google-cloud-bigquery/docs/changelog.md | 1 - 66 files changed, 1730 deletions(-) delete mode 100644 packages/google-cloud-bigquery/.github/CODEOWNERS delete mode 100644 packages/google-cloud-bigquery/.github/CONTRIBUTING.md delete mode 100644 packages/google-cloud-bigquery/.github/ISSUE_TEMPLATE/bug_report.md delete mode 100644 packages/google-cloud-bigquery/.github/ISSUE_TEMPLATE/feature_request.md delete mode 100644 packages/google-cloud-bigquery/.github/ISSUE_TEMPLATE/support_request.md delete mode 100644 packages/google-cloud-bigquery/.github/PULL_REQUEST_TEMPLATE.md delete mode 100644 packages/google-cloud-bigquery/.github/auto-label.yaml delete mode 100644 packages/google-cloud-bigquery/.github/blunderbuss.yml delete mode 100644 packages/google-cloud-bigquery/.github/header-checker-lint.yml delete mode 100644 packages/google-cloud-bigquery/.github/snippet-bot.yml delete mode 100644 packages/google-cloud-bigquery/.github/workflows/docs.yml delete mode 100644 packages/google-cloud-bigquery/.github/workflows/unittest.yml delete mode 100755 packages/google-cloud-bigquery/.kokoro/build.sh delete mode 100644 packages/google-cloud-bigquery/.kokoro/continuous/common.cfg delete mode 100644 packages/google-cloud-bigquery/.kokoro/continuous/continuous.cfg delete mode 100644 packages/google-cloud-bigquery/.kokoro/continuous/prerelease-deps-3.13.cfg delete mode 100644 packages/google-cloud-bigquery/.kokoro/continuous/unit-tests-misc.cfg delete mode 100755 packages/google-cloud-bigquery/.kokoro/populate-secrets.sh delete mode 100644 packages/google-cloud-bigquery/.kokoro/presubmit/common.cfg delete mode 100644 packages/google-cloud-bigquery/.kokoro/presubmit/linting-typing.cfg delete mode 100644 packages/google-cloud-bigquery/.kokoro/presubmit/prerelease-deps.cfg delete mode 100644 packages/google-cloud-bigquery/.kokoro/presubmit/snippets-3.13.cfg delete mode 100644 packages/google-cloud-bigquery/.kokoro/presubmit/snippets-3.9.cfg delete mode 100644 packages/google-cloud-bigquery/.kokoro/presubmit/system-3.13.cfg delete mode 100644 packages/google-cloud-bigquery/.kokoro/presubmit/system-3.9.cfg delete mode 100644 packages/google-cloud-bigquery/.kokoro/samples/lint/common.cfg delete mode 100644 packages/google-cloud-bigquery/.kokoro/samples/lint/continuous.cfg delete mode 100644 packages/google-cloud-bigquery/.kokoro/samples/lint/periodic.cfg delete mode 100644 packages/google-cloud-bigquery/.kokoro/samples/lint/presubmit.cfg delete mode 100644 packages/google-cloud-bigquery/.kokoro/samples/python3.10/common.cfg delete mode 100644 packages/google-cloud-bigquery/.kokoro/samples/python3.10/continuous.cfg delete mode 100644 packages/google-cloud-bigquery/.kokoro/samples/python3.10/periodic-head.cfg delete mode 100644 packages/google-cloud-bigquery/.kokoro/samples/python3.10/periodic.cfg delete mode 100644 packages/google-cloud-bigquery/.kokoro/samples/python3.10/presubmit.cfg delete mode 100644 packages/google-cloud-bigquery/.kokoro/samples/python3.11/common.cfg delete mode 100644 packages/google-cloud-bigquery/.kokoro/samples/python3.11/continuous.cfg delete mode 100644 packages/google-cloud-bigquery/.kokoro/samples/python3.11/periodic-head.cfg delete mode 100644 packages/google-cloud-bigquery/.kokoro/samples/python3.11/periodic.cfg delete mode 100644 packages/google-cloud-bigquery/.kokoro/samples/python3.11/presubmit.cfg delete mode 100644 packages/google-cloud-bigquery/.kokoro/samples/python3.12/common.cfg delete mode 100644 packages/google-cloud-bigquery/.kokoro/samples/python3.12/continuous.cfg delete mode 100644 packages/google-cloud-bigquery/.kokoro/samples/python3.12/periodic-head.cfg delete mode 100644 packages/google-cloud-bigquery/.kokoro/samples/python3.12/periodic.cfg delete mode 100644 packages/google-cloud-bigquery/.kokoro/samples/python3.12/presubmit.cfg delete mode 100644 packages/google-cloud-bigquery/.kokoro/samples/python3.13/common.cfg delete mode 100644 packages/google-cloud-bigquery/.kokoro/samples/python3.13/continuous.cfg delete mode 100644 packages/google-cloud-bigquery/.kokoro/samples/python3.13/periodic-head.cfg delete mode 100644 packages/google-cloud-bigquery/.kokoro/samples/python3.13/periodic.cfg delete mode 100644 packages/google-cloud-bigquery/.kokoro/samples/python3.13/presubmit.cfg delete mode 100644 packages/google-cloud-bigquery/.kokoro/samples/python3.14/common.cfg delete mode 100644 packages/google-cloud-bigquery/.kokoro/samples/python3.14/continuous.cfg delete mode 100644 packages/google-cloud-bigquery/.kokoro/samples/python3.14/periodic-head.cfg delete mode 100644 packages/google-cloud-bigquery/.kokoro/samples/python3.14/periodic.cfg delete mode 100644 packages/google-cloud-bigquery/.kokoro/samples/python3.14/presubmit.cfg delete mode 100644 packages/google-cloud-bigquery/.kokoro/samples/python3.9/common.cfg delete mode 100644 packages/google-cloud-bigquery/.kokoro/samples/python3.9/continuous.cfg delete mode 100644 packages/google-cloud-bigquery/.kokoro/samples/python3.9/periodic-head.cfg delete mode 100644 packages/google-cloud-bigquery/.kokoro/samples/python3.9/periodic.cfg delete mode 100644 packages/google-cloud-bigquery/.kokoro/samples/python3.9/presubmit.cfg delete mode 100755 packages/google-cloud-bigquery/.kokoro/test-samples-against-head.sh delete mode 100755 packages/google-cloud-bigquery/.kokoro/test-samples-impl.sh delete mode 100755 packages/google-cloud-bigquery/.kokoro/test-samples.sh delete mode 100755 packages/google-cloud-bigquery/.kokoro/trampoline.sh delete mode 100755 packages/google-cloud-bigquery/.kokoro/trampoline_v2.sh delete mode 100644 packages/google-cloud-bigquery/.trampolinerc delete mode 120000 packages/google-cloud-bigquery/docs/changelog.md diff --git a/packages/google-cloud-bigquery/.github/CODEOWNERS b/packages/google-cloud-bigquery/.github/CODEOWNERS deleted file mode 100644 index 6763f258cdb5..000000000000 --- a/packages/google-cloud-bigquery/.github/CODEOWNERS +++ /dev/null @@ -1,11 +0,0 @@ -# Code owners file. -# This file controls who is tagged for review for any given pull request. -# -# For syntax help see: -# https://help.github.com/en/github/creating-cloning-and-archiving-repositories/about-code-owners#codeowners-syntax - -# The @googleapis/api-bigquery is the default owner for changes in this repo -* @googleapis/api-bigquery @googleapis/yoshi-python - -# The python-samples-reviewers team is the default owner for samples changes -/samples/ @googleapis/api-bigquery @googleapis/python-samples-owners @googleapis/yoshi-python diff --git a/packages/google-cloud-bigquery/.github/CONTRIBUTING.md b/packages/google-cloud-bigquery/.github/CONTRIBUTING.md deleted file mode 100644 index 939e5341e74d..000000000000 --- a/packages/google-cloud-bigquery/.github/CONTRIBUTING.md +++ /dev/null @@ -1,28 +0,0 @@ -# How to Contribute - -We'd love to accept your patches and contributions to this project. There are -just a few small guidelines you need to follow. - -## Contributor License Agreement - -Contributions to this project must be accompanied by a Contributor License -Agreement. You (or your employer) retain the copyright to your contribution; -this simply gives us permission to use and redistribute your contributions as -part of the project. Head over to to see -your current agreements on file or to sign a new one. - -You generally only need to submit a CLA once, so if you've already submitted one -(even if it was for a different project), you probably don't need to do it -again. - -## Code reviews - -All submissions, including submissions by project members, require review. We -use GitHub pull requests for this purpose. Consult -[GitHub Help](https://help.github.com/articles/about-pull-requests/) for more -information on using pull requests. - -## Community Guidelines - -This project follows [Google's Open Source Community -Guidelines](https://opensource.google.com/conduct/). diff --git a/packages/google-cloud-bigquery/.github/ISSUE_TEMPLATE/bug_report.md b/packages/google-cloud-bigquery/.github/ISSUE_TEMPLATE/bug_report.md deleted file mode 100644 index 5b5339350a60..000000000000 --- a/packages/google-cloud-bigquery/.github/ISSUE_TEMPLATE/bug_report.md +++ /dev/null @@ -1,43 +0,0 @@ ---- -name: Bug report -about: Create a report to help us improve - ---- - -Thanks for stopping by to let us know something could be better! - -**PLEASE READ**: If you have a support contract with Google, please create an issue in the [support console](https://cloud.google.com/support/) instead of filing on GitHub. This will ensure a timely response. - -Please run down the following list and make sure you've tried the usual "quick fixes": - - - Search the issues already opened: https://github.com/googleapis/python-bigquery/issues - - Search StackOverflow: https://stackoverflow.com/questions/tagged/google-cloud-platform+python - -If you are still having issues, please be sure to include as much information as possible: - -#### Environment details - - - OS type and version: - - Python version: `python --version` - - pip version: `pip --version` - - `google-cloud-bigquery` version: `pip show google-cloud-bigquery` - -#### Steps to reproduce - - 1. ? - 2. ? - -#### Code example - -```python -# example -``` - -#### Stack trace -``` -# example -``` - -Making sure to follow these steps will guarantee the quickest resolution possible. - -Thanks! diff --git a/packages/google-cloud-bigquery/.github/ISSUE_TEMPLATE/feature_request.md b/packages/google-cloud-bigquery/.github/ISSUE_TEMPLATE/feature_request.md deleted file mode 100644 index 6365857f33c6..000000000000 --- a/packages/google-cloud-bigquery/.github/ISSUE_TEMPLATE/feature_request.md +++ /dev/null @@ -1,18 +0,0 @@ ---- -name: Feature request -about: Suggest an idea for this library - ---- - -Thanks for stopping by to let us know something could be better! - -**PLEASE READ**: If you have a support contract with Google, please create an issue in the [support console](https://cloud.google.com/support/) instead of filing on GitHub. This will ensure a timely response. - - **Is your feature request related to a problem? Please describe.** -A clear and concise description of what the problem is. Ex. I'm always frustrated when [...] - **Describe the solution you'd like** -A clear and concise description of what you want to happen. - **Describe alternatives you've considered** -A clear and concise description of any alternative solutions or features you've considered. - **Additional context** -Add any other context or screenshots about the feature request here. diff --git a/packages/google-cloud-bigquery/.github/ISSUE_TEMPLATE/support_request.md b/packages/google-cloud-bigquery/.github/ISSUE_TEMPLATE/support_request.md deleted file mode 100644 index 995869032125..000000000000 --- a/packages/google-cloud-bigquery/.github/ISSUE_TEMPLATE/support_request.md +++ /dev/null @@ -1,7 +0,0 @@ ---- -name: Support request -about: If you have a support contract with Google, please create an issue in the Google Cloud Support console. - ---- - -**PLEASE READ**: If you have a support contract with Google, please create an issue in the [support console](https://cloud.google.com/support/) instead of filing on GitHub. This will ensure a timely response. diff --git a/packages/google-cloud-bigquery/.github/PULL_REQUEST_TEMPLATE.md b/packages/google-cloud-bigquery/.github/PULL_REQUEST_TEMPLATE.md deleted file mode 100644 index 65ceeeb5e490..000000000000 --- a/packages/google-cloud-bigquery/.github/PULL_REQUEST_TEMPLATE.md +++ /dev/null @@ -1,7 +0,0 @@ -Thank you for opening a Pull Request! Before submitting your PR, there are a few things you can do to make sure it goes smoothly: -- [ ] Make sure to open an issue as a [bug/issue](https://github.com/googleapis/python-bigquery/issues/new/choose) before writing your code! That way we can discuss the change, evaluate designs, and agree on the general idea -- [ ] Ensure the tests and linter pass -- [ ] Code coverage does not decrease (if any source code was changed) -- [ ] Appropriate docs were updated (if necessary) - -Fixes # 🦕 diff --git a/packages/google-cloud-bigquery/.github/auto-label.yaml b/packages/google-cloud-bigquery/.github/auto-label.yaml deleted file mode 100644 index 21786a4eb085..000000000000 --- a/packages/google-cloud-bigquery/.github/auto-label.yaml +++ /dev/null @@ -1,20 +0,0 @@ -# Copyright 2024 Google LLC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -requestsize: - enabled: true - -path: - pullrequest: true - paths: - samples: "samples" diff --git a/packages/google-cloud-bigquery/.github/blunderbuss.yml b/packages/google-cloud-bigquery/.github/blunderbuss.yml deleted file mode 100644 index 5b7383dc7665..000000000000 --- a/packages/google-cloud-bigquery/.github/blunderbuss.yml +++ /dev/null @@ -1,17 +0,0 @@ -# Blunderbuss config -# -# This file controls who is assigned for pull requests and issues. -# Note: This file is autogenerated. To make changes to the assignee -# team, please update `codeowner_team` in `.repo-metadata.json`. -assign_issues: - - googleapis/api-bigquery - -assign_issues_by: - - labels: - - "samples" - to: - - googleapis/python-samples-reviewers - - googleapis/api-bigquery - -assign_prs: - - googleapis/api-bigquery diff --git a/packages/google-cloud-bigquery/.github/header-checker-lint.yml b/packages/google-cloud-bigquery/.github/header-checker-lint.yml deleted file mode 100644 index 6fe78aa7987a..000000000000 --- a/packages/google-cloud-bigquery/.github/header-checker-lint.yml +++ /dev/null @@ -1,15 +0,0 @@ -{"allowedCopyrightHolders": ["Google LLC"], - "allowedLicenses": ["Apache-2.0", "MIT", "BSD-3"], - "ignoreFiles": ["**/requirements.txt", "**/requirements-test.txt", "**/__init__.py", "samples/**/constraints.txt", "samples/**/constraints-test.txt"], - "sourceFileExtensions": [ - "ts", - "js", - "java", - "sh", - "Dockerfile", - "yaml", - "py", - "html", - "txt" - ] -} \ No newline at end of file diff --git a/packages/google-cloud-bigquery/.github/snippet-bot.yml b/packages/google-cloud-bigquery/.github/snippet-bot.yml deleted file mode 100644 index e69de29bb2d1..000000000000 diff --git a/packages/google-cloud-bigquery/.github/workflows/docs.yml b/packages/google-cloud-bigquery/.github/workflows/docs.yml deleted file mode 100644 index 9372faac20cd..000000000000 --- a/packages/google-cloud-bigquery/.github/workflows/docs.yml +++ /dev/null @@ -1,39 +0,0 @@ -on: - pull_request: - branches: - - main -name: docs -jobs: - docs: - runs-on: ubuntu-latest - steps: - - name: Checkout - uses: actions/checkout@v4 - - name: Setup Python - uses: actions/setup-python@v5 - with: - python-version: '3.10' - - name: Install nox - run: | - python -m pip install --upgrade setuptools pip wheel - python -m pip install nox - - name: Run docs session - run: | - nox -s docs-3.10 - - docfx: - runs-on: ubuntu-latest - steps: - - name: Checkout - uses: actions/checkout@v4 - - name: Setup Python - uses: actions/setup-python@v5 - with: - python-version: '3.10' - - name: Install nox - run: | - python -m pip install --upgrade setuptools pip wheel - python -m pip install nox - - name: Run docfx session - run: | - nox -s docfx-3.10 diff --git a/packages/google-cloud-bigquery/.github/workflows/unittest.yml b/packages/google-cloud-bigquery/.github/workflows/unittest.yml deleted file mode 100644 index f6b92547e36d..000000000000 --- a/packages/google-cloud-bigquery/.github/workflows/unittest.yml +++ /dev/null @@ -1,94 +0,0 @@ -on: - pull_request: - branches: - - main -name: unittest -jobs: - unit: - runs-on: ubuntu-latest - strategy: - matrix: - python: ['3.9', '3.10', '3.11', '3.12', '3.13', '3.14'] - steps: - - name: Checkout - uses: actions/checkout@v4 - - name: Setup Python - uses: actions/setup-python@v5 - with: - python-version: ${{ matrix.python }} - - name: Install nox - run: | - python -m pip install --upgrade setuptools pip wheel - python -m pip install nox - - name: Run unit tests - - # TODO (https://b.corp.google.com/issues/450370502) 3.14 is not yet supported by pyarrow. See - # https://github.com/googleapis/google-cloud-python/issues/14686 - # https://github.com/apache/arrow/issues/47438 - # Reinstate running tests with 3.14 once this bug is fixed - if: matrix.python != '3.14' - env: - COVERAGE_FILE: .coverage-${{ matrix.python }} - run: | - nox -s unit-${{ matrix.python }} - - name: Upload coverage results - uses: actions/upload-artifact@v4 - with: - name: coverage-artifact-${{ matrix.python }} - path: .coverage-${{ matrix.python }} - include-hidden-files: true - - unit_noextras: - # Use `ubuntu-latest` runner. - runs-on: ubuntu-latest - strategy: - matrix: - python: ['3.9', '3.14'] - steps: - - name: Checkout - uses: actions/checkout@v4 - - name: Setup Python - uses: actions/setup-python@v5 - with: - python-version: ${{ matrix.python }} - - name: Install nox - run: | - python -m pip install --upgrade setuptools pip wheel - python -m pip install nox - - name: Run unit_noextras tests - env: - COVERAGE_FILE: .coverage-unit-noextras-${{ matrix.python }} - run: | - nox -s unit_noextras-${{ matrix.python }} - - name: Upload coverage results - uses: actions/upload-artifact@v4 - with: - name: coverage-artifact-unit-noextras-${{ matrix.python }} - path: .coverage-unit-noextras-${{ matrix.python }} - include-hidden-files: true - - cover: - runs-on: ubuntu-latest - needs: - - unit - - unit_noextras - steps: - - name: Checkout - uses: actions/checkout@v4 - - name: Setup Python - uses: actions/setup-python@v5 - with: - python-version: "3.9" - - name: Install coverage - run: | - python -m pip install --upgrade setuptools pip wheel - python -m pip install coverage - - name: Download coverage results - uses: actions/download-artifact@v4 - with: - path: .coverage-results/ - - name: Report coverage results - run: | - find .coverage-results -type f -name '*.zip' -exec unzip {} \; - coverage combine .coverage-results/**/.coverage* - coverage report --show-missing --fail-under=100 diff --git a/packages/google-cloud-bigquery/.kokoro/build.sh b/packages/google-cloud-bigquery/.kokoro/build.sh deleted file mode 100755 index d41b45aa1dd0..000000000000 --- a/packages/google-cloud-bigquery/.kokoro/build.sh +++ /dev/null @@ -1,60 +0,0 @@ -#!/bin/bash -# Copyright 2024 Google LLC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -set -eo pipefail - -CURRENT_DIR=$(dirname "${BASH_SOURCE[0]}") - -if [[ -z "${PROJECT_ROOT:-}" ]]; then - PROJECT_ROOT=$(realpath "${CURRENT_DIR}/..") -fi - -pushd "${PROJECT_ROOT}" - -# Disable buffering, so that the logs stream through. -export PYTHONUNBUFFERED=1 - -# Debug: show build environment -env | grep KOKORO - -# Setup service account credentials. -if [[ -f "${KOKORO_GFILE_DIR}/service-account.json" ]] -then - export GOOGLE_APPLICATION_CREDENTIALS=${KOKORO_GFILE_DIR}/service-account.json -fi - -# Setup project id. -if [[ -f "${KOKORO_GFILE_DIR}/project-id.json" ]] -then - export PROJECT_ID=$(cat "${KOKORO_GFILE_DIR}/project-id.json") -fi - -# If this is a continuous build, send the test log to the FlakyBot. -# See https://github.com/googleapis/repo-automation-bots/tree/main/packages/flakybot. -if [[ $KOKORO_BUILD_ARTIFACTS_SUBDIR = *"continuous"* ]]; then - cleanup() { - chmod +x $KOKORO_GFILE_DIR/linux_amd64/flakybot - $KOKORO_GFILE_DIR/linux_amd64/flakybot - } - trap cleanup EXIT HUP -fi - -# If NOX_SESSION is set, it only runs the specified session, -# otherwise run all the sessions. -if [[ -n "${NOX_SESSION:-}" ]]; then - python3 -m nox -s ${NOX_SESSION:-} -else - python3 -m nox -fi diff --git a/packages/google-cloud-bigquery/.kokoro/continuous/common.cfg b/packages/google-cloud-bigquery/.kokoro/continuous/common.cfg deleted file mode 100644 index 1f46f62708d9..000000000000 --- a/packages/google-cloud-bigquery/.kokoro/continuous/common.cfg +++ /dev/null @@ -1,27 +0,0 @@ -# Format: //devtools/kokoro/config/proto/build.proto - -# Build logs will be here -action { - define_artifacts { - regex: "**/*sponge_log.xml" - } -} - -# Download trampoline resources. -gfile_resources: "/bigstore/cloud-devrel-kokoro-resources/trampoline" - -# Download resources for system tests (service account key, etc.) -gfile_resources: "/bigstore/cloud-devrel-kokoro-resources/google-cloud-python" - -# Use the trampoline script to run in docker. -build_file: "python-bigquery/.kokoro/trampoline.sh" - -# Configure the docker image for kokoro-trampoline. -env_vars: { - key: "TRAMPOLINE_IMAGE" - value: "gcr.io/cloud-devrel-kokoro-resources/python-multi" -} -env_vars: { - key: "TRAMPOLINE_BUILD_FILE" - value: "github/python-bigquery/.kokoro/build.sh" -} diff --git a/packages/google-cloud-bigquery/.kokoro/continuous/continuous.cfg b/packages/google-cloud-bigquery/.kokoro/continuous/continuous.cfg deleted file mode 100644 index 8f43917d92fe..000000000000 --- a/packages/google-cloud-bigquery/.kokoro/continuous/continuous.cfg +++ /dev/null @@ -1 +0,0 @@ -# Format: //devtools/kokoro/config/proto/build.proto \ No newline at end of file diff --git a/packages/google-cloud-bigquery/.kokoro/continuous/prerelease-deps-3.13.cfg b/packages/google-cloud-bigquery/.kokoro/continuous/prerelease-deps-3.13.cfg deleted file mode 100644 index 99a1e7150b1e..000000000000 --- a/packages/google-cloud-bigquery/.kokoro/continuous/prerelease-deps-3.13.cfg +++ /dev/null @@ -1,7 +0,0 @@ -# Format: //devtools/kokoro/config/proto/build.proto - -# Only run this nox session. -env_vars: { - key: "NOX_SESSION" - value: "prerelease_deps-3.13" -} diff --git a/packages/google-cloud-bigquery/.kokoro/continuous/unit-tests-misc.cfg b/packages/google-cloud-bigquery/.kokoro/continuous/unit-tests-misc.cfg deleted file mode 100644 index 6598baee77e1..000000000000 --- a/packages/google-cloud-bigquery/.kokoro/continuous/unit-tests-misc.cfg +++ /dev/null @@ -1,9 +0,0 @@ -# Format: //devtools/kokoro/config/proto/build.proto - -# Only run these nox sessions. -# A subset based on Python versions that are neither our newest OR oldest -# supported versions of Python -env_vars: { - key: "NOX_SESSION" - value: "unit_noextras-3.9 unit_noextras-3.10 unit_noextras-3.11 unit-3.9 unit-3.10 unit-3.11" -} \ No newline at end of file diff --git a/packages/google-cloud-bigquery/.kokoro/populate-secrets.sh b/packages/google-cloud-bigquery/.kokoro/populate-secrets.sh deleted file mode 100755 index c435402f473e..000000000000 --- a/packages/google-cloud-bigquery/.kokoro/populate-secrets.sh +++ /dev/null @@ -1,43 +0,0 @@ -#!/bin/bash -# Copyright 2024 Google LLC. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -set -eo pipefail - -function now { date +"%Y-%m-%d %H:%M:%S" | tr -d '\n' ;} -function msg { println "$*" >&2 ;} -function println { printf '%s\n' "$(now) $*" ;} - - -# Populates requested secrets set in SECRET_MANAGER_KEYS from service account: -# kokoro-trampoline@cloud-devrel-kokoro-resources.iam.gserviceaccount.com -SECRET_LOCATION="${KOKORO_GFILE_DIR}/secret_manager" -msg "Creating folder on disk for secrets: ${SECRET_LOCATION}" -mkdir -p ${SECRET_LOCATION} -for key in $(echo ${SECRET_MANAGER_KEYS} | sed "s/,/ /g") -do - msg "Retrieving secret ${key}" - docker run --entrypoint=gcloud \ - --volume=${KOKORO_GFILE_DIR}:${KOKORO_GFILE_DIR} \ - gcr.io/google.com/cloudsdktool/cloud-sdk \ - secrets versions access latest \ - --project cloud-devrel-kokoro-resources \ - --secret ${key} > \ - "${SECRET_LOCATION}/${key}" - if [[ $? == 0 ]]; then - msg "Secret written to ${SECRET_LOCATION}/${key}" - else - msg "Error retrieving secret ${key}" - fi -done diff --git a/packages/google-cloud-bigquery/.kokoro/presubmit/common.cfg b/packages/google-cloud-bigquery/.kokoro/presubmit/common.cfg deleted file mode 100644 index 1f46f62708d9..000000000000 --- a/packages/google-cloud-bigquery/.kokoro/presubmit/common.cfg +++ /dev/null @@ -1,27 +0,0 @@ -# Format: //devtools/kokoro/config/proto/build.proto - -# Build logs will be here -action { - define_artifacts { - regex: "**/*sponge_log.xml" - } -} - -# Download trampoline resources. -gfile_resources: "/bigstore/cloud-devrel-kokoro-resources/trampoline" - -# Download resources for system tests (service account key, etc.) -gfile_resources: "/bigstore/cloud-devrel-kokoro-resources/google-cloud-python" - -# Use the trampoline script to run in docker. -build_file: "python-bigquery/.kokoro/trampoline.sh" - -# Configure the docker image for kokoro-trampoline. -env_vars: { - key: "TRAMPOLINE_IMAGE" - value: "gcr.io/cloud-devrel-kokoro-resources/python-multi" -} -env_vars: { - key: "TRAMPOLINE_BUILD_FILE" - value: "github/python-bigquery/.kokoro/build.sh" -} diff --git a/packages/google-cloud-bigquery/.kokoro/presubmit/linting-typing.cfg b/packages/google-cloud-bigquery/.kokoro/presubmit/linting-typing.cfg deleted file mode 100644 index b1a7406c2a29..000000000000 --- a/packages/google-cloud-bigquery/.kokoro/presubmit/linting-typing.cfg +++ /dev/null @@ -1,7 +0,0 @@ -# Format: //devtools/kokoro/config/proto/build.proto - -# Only run these nox sessions. -env_vars: { - key: "NOX_SESSION" - value: "lint lint_setup_py blacken mypy mypy_samples pytype" -} diff --git a/packages/google-cloud-bigquery/.kokoro/presubmit/prerelease-deps.cfg b/packages/google-cloud-bigquery/.kokoro/presubmit/prerelease-deps.cfg deleted file mode 100644 index 3595fb43f5c0..000000000000 --- a/packages/google-cloud-bigquery/.kokoro/presubmit/prerelease-deps.cfg +++ /dev/null @@ -1,7 +0,0 @@ -# Format: //devtools/kokoro/config/proto/build.proto - -# Only run this nox session. -env_vars: { - key: "NOX_SESSION" - value: "prerelease_deps" -} diff --git a/packages/google-cloud-bigquery/.kokoro/presubmit/snippets-3.13.cfg b/packages/google-cloud-bigquery/.kokoro/presubmit/snippets-3.13.cfg deleted file mode 100644 index 0b89f08630bb..000000000000 --- a/packages/google-cloud-bigquery/.kokoro/presubmit/snippets-3.13.cfg +++ /dev/null @@ -1,7 +0,0 @@ -# Format: //devtools/kokoro/config/proto/build.proto - -# Only run this nox session. -env_vars: { - key: "NOX_SESSION" - value: "snippets-3.13" -} diff --git a/packages/google-cloud-bigquery/.kokoro/presubmit/snippets-3.9.cfg b/packages/google-cloud-bigquery/.kokoro/presubmit/snippets-3.9.cfg deleted file mode 100644 index d1de209a2f2f..000000000000 --- a/packages/google-cloud-bigquery/.kokoro/presubmit/snippets-3.9.cfg +++ /dev/null @@ -1,7 +0,0 @@ -# Format: //devtools/kokoro/config/proto/build.proto - -# Only run this nox session. -env_vars: { - key: "NOX_SESSION" - value: "snippets-3.9" -} diff --git a/packages/google-cloud-bigquery/.kokoro/presubmit/system-3.13.cfg b/packages/google-cloud-bigquery/.kokoro/presubmit/system-3.13.cfg deleted file mode 100644 index a0e9a010884b..000000000000 --- a/packages/google-cloud-bigquery/.kokoro/presubmit/system-3.13.cfg +++ /dev/null @@ -1,7 +0,0 @@ -# Format: //devtools/kokoro/config/proto/build.proto - -# Only run this nox session. -env_vars: { - key: "NOX_SESSION" - value: "system-3.13" -} \ No newline at end of file diff --git a/packages/google-cloud-bigquery/.kokoro/presubmit/system-3.9.cfg b/packages/google-cloud-bigquery/.kokoro/presubmit/system-3.9.cfg deleted file mode 100644 index b8ae66b376ff..000000000000 --- a/packages/google-cloud-bigquery/.kokoro/presubmit/system-3.9.cfg +++ /dev/null @@ -1,7 +0,0 @@ -# Format: //devtools/kokoro/config/proto/build.proto - -# Only run this nox session. -env_vars: { - key: "NOX_SESSION" - value: "system-3.9" -} \ No newline at end of file diff --git a/packages/google-cloud-bigquery/.kokoro/samples/lint/common.cfg b/packages/google-cloud-bigquery/.kokoro/samples/lint/common.cfg deleted file mode 100644 index 153746cccae7..000000000000 --- a/packages/google-cloud-bigquery/.kokoro/samples/lint/common.cfg +++ /dev/null @@ -1,34 +0,0 @@ -# Format: //devtools/kokoro/config/proto/build.proto - -# Build logs will be here -action { - define_artifacts { - regex: "**/*sponge_log.xml" - } -} - -# Specify which tests to run -env_vars: { - key: "RUN_TESTS_SESSION" - value: "lint" -} - -env_vars: { - key: "TRAMPOLINE_BUILD_FILE" - value: "github/python-bigquery/.kokoro/test-samples.sh" -} - -# Configure the docker image for kokoro-trampoline. -env_vars: { - key: "TRAMPOLINE_IMAGE" - value: "gcr.io/cloud-devrel-kokoro-resources/python-samples-testing-docker" -} - -# Download secrets for samples -gfile_resources: "/bigstore/cloud-devrel-kokoro-resources/python-docs-samples" - -# Download trampoline resources. -gfile_resources: "/bigstore/cloud-devrel-kokoro-resources/trampoline" - -# Use the trampoline script to run in docker. -build_file: "python-bigquery/.kokoro/trampoline_v2.sh" \ No newline at end of file diff --git a/packages/google-cloud-bigquery/.kokoro/samples/lint/continuous.cfg b/packages/google-cloud-bigquery/.kokoro/samples/lint/continuous.cfg deleted file mode 100644 index a1c8d9759c88..000000000000 --- a/packages/google-cloud-bigquery/.kokoro/samples/lint/continuous.cfg +++ /dev/null @@ -1,6 +0,0 @@ -# Format: //devtools/kokoro/config/proto/build.proto - -env_vars: { - key: "INSTALL_LIBRARY_FROM_SOURCE" - value: "True" -} \ No newline at end of file diff --git a/packages/google-cloud-bigquery/.kokoro/samples/lint/periodic.cfg b/packages/google-cloud-bigquery/.kokoro/samples/lint/periodic.cfg deleted file mode 100644 index 50fec9649732..000000000000 --- a/packages/google-cloud-bigquery/.kokoro/samples/lint/periodic.cfg +++ /dev/null @@ -1,6 +0,0 @@ -# Format: //devtools/kokoro/config/proto/build.proto - -env_vars: { - key: "INSTALL_LIBRARY_FROM_SOURCE" - value: "False" -} \ No newline at end of file diff --git a/packages/google-cloud-bigquery/.kokoro/samples/lint/presubmit.cfg b/packages/google-cloud-bigquery/.kokoro/samples/lint/presubmit.cfg deleted file mode 100644 index a1c8d9759c88..000000000000 --- a/packages/google-cloud-bigquery/.kokoro/samples/lint/presubmit.cfg +++ /dev/null @@ -1,6 +0,0 @@ -# Format: //devtools/kokoro/config/proto/build.proto - -env_vars: { - key: "INSTALL_LIBRARY_FROM_SOURCE" - value: "True" -} \ No newline at end of file diff --git a/packages/google-cloud-bigquery/.kokoro/samples/python3.10/common.cfg b/packages/google-cloud-bigquery/.kokoro/samples/python3.10/common.cfg deleted file mode 100644 index da4003d76d91..000000000000 --- a/packages/google-cloud-bigquery/.kokoro/samples/python3.10/common.cfg +++ /dev/null @@ -1,40 +0,0 @@ -# Format: //devtools/kokoro/config/proto/build.proto - -# Build logs will be here -action { - define_artifacts { - regex: "**/*sponge_log.xml" - } -} - -# Specify which tests to run -env_vars: { - key: "RUN_TESTS_SESSION" - value: "py-3.10" -} - -# Declare build specific Cloud project. -env_vars: { - key: "BUILD_SPECIFIC_GCLOUD_PROJECT" - value: "python-docs-samples-tests-310" -} - -env_vars: { - key: "TRAMPOLINE_BUILD_FILE" - value: "github/python-bigquery/.kokoro/test-samples.sh" -} - -# Configure the docker image for kokoro-trampoline. -env_vars: { - key: "TRAMPOLINE_IMAGE" - value: "gcr.io/cloud-devrel-kokoro-resources/python-samples-testing-docker" -} - -# Download secrets for samples -gfile_resources: "/bigstore/cloud-devrel-kokoro-resources/python-docs-samples" - -# Download trampoline resources. -gfile_resources: "/bigstore/cloud-devrel-kokoro-resources/trampoline" - -# Use the trampoline script to run in docker. -build_file: "python-bigquery/.kokoro/trampoline_v2.sh" \ No newline at end of file diff --git a/packages/google-cloud-bigquery/.kokoro/samples/python3.10/continuous.cfg b/packages/google-cloud-bigquery/.kokoro/samples/python3.10/continuous.cfg deleted file mode 100644 index a1c8d9759c88..000000000000 --- a/packages/google-cloud-bigquery/.kokoro/samples/python3.10/continuous.cfg +++ /dev/null @@ -1,6 +0,0 @@ -# Format: //devtools/kokoro/config/proto/build.proto - -env_vars: { - key: "INSTALL_LIBRARY_FROM_SOURCE" - value: "True" -} \ No newline at end of file diff --git a/packages/google-cloud-bigquery/.kokoro/samples/python3.10/periodic-head.cfg b/packages/google-cloud-bigquery/.kokoro/samples/python3.10/periodic-head.cfg deleted file mode 100644 index 5aa01bab5bf3..000000000000 --- a/packages/google-cloud-bigquery/.kokoro/samples/python3.10/periodic-head.cfg +++ /dev/null @@ -1,11 +0,0 @@ -# Format: //devtools/kokoro/config/proto/build.proto - -env_vars: { - key: "INSTALL_LIBRARY_FROM_SOURCE" - value: "True" -} - -env_vars: { - key: "TRAMPOLINE_BUILD_FILE" - value: "github/python-bigquery/.kokoro/test-samples-against-head.sh" -} diff --git a/packages/google-cloud-bigquery/.kokoro/samples/python3.10/periodic.cfg b/packages/google-cloud-bigquery/.kokoro/samples/python3.10/periodic.cfg deleted file mode 100644 index 71cd1e597e38..000000000000 --- a/packages/google-cloud-bigquery/.kokoro/samples/python3.10/periodic.cfg +++ /dev/null @@ -1,6 +0,0 @@ -# Format: //devtools/kokoro/config/proto/build.proto - -env_vars: { - key: "INSTALL_LIBRARY_FROM_SOURCE" - value: "False" -} diff --git a/packages/google-cloud-bigquery/.kokoro/samples/python3.10/presubmit.cfg b/packages/google-cloud-bigquery/.kokoro/samples/python3.10/presubmit.cfg deleted file mode 100644 index a1c8d9759c88..000000000000 --- a/packages/google-cloud-bigquery/.kokoro/samples/python3.10/presubmit.cfg +++ /dev/null @@ -1,6 +0,0 @@ -# Format: //devtools/kokoro/config/proto/build.proto - -env_vars: { - key: "INSTALL_LIBRARY_FROM_SOURCE" - value: "True" -} \ No newline at end of file diff --git a/packages/google-cloud-bigquery/.kokoro/samples/python3.11/common.cfg b/packages/google-cloud-bigquery/.kokoro/samples/python3.11/common.cfg deleted file mode 100644 index f5adc870378f..000000000000 --- a/packages/google-cloud-bigquery/.kokoro/samples/python3.11/common.cfg +++ /dev/null @@ -1,40 +0,0 @@ -# Format: //devtools/kokoro/config/proto/build.proto - -# Build logs will be here -action { - define_artifacts { - regex: "**/*sponge_log.xml" - } -} - -# Specify which tests to run -env_vars: { - key: "RUN_TESTS_SESSION" - value: "py-3.11" -} - -# Declare build specific Cloud project. -env_vars: { - key: "BUILD_SPECIFIC_GCLOUD_PROJECT" - value: "python-docs-samples-tests-311" -} - -env_vars: { - key: "TRAMPOLINE_BUILD_FILE" - value: "github/python-bigquery/.kokoro/test-samples.sh" -} - -# Configure the docker image for kokoro-trampoline. -env_vars: { - key: "TRAMPOLINE_IMAGE" - value: "gcr.io/cloud-devrel-kokoro-resources/python-samples-testing-docker" -} - -# Download secrets for samples -gfile_resources: "/bigstore/cloud-devrel-kokoro-resources/python-docs-samples" - -# Download trampoline resources. -gfile_resources: "/bigstore/cloud-devrel-kokoro-resources/trampoline" - -# Use the trampoline script to run in docker. -build_file: "python-bigquery/.kokoro/trampoline_v2.sh" \ No newline at end of file diff --git a/packages/google-cloud-bigquery/.kokoro/samples/python3.11/continuous.cfg b/packages/google-cloud-bigquery/.kokoro/samples/python3.11/continuous.cfg deleted file mode 100644 index a1c8d9759c88..000000000000 --- a/packages/google-cloud-bigquery/.kokoro/samples/python3.11/continuous.cfg +++ /dev/null @@ -1,6 +0,0 @@ -# Format: //devtools/kokoro/config/proto/build.proto - -env_vars: { - key: "INSTALL_LIBRARY_FROM_SOURCE" - value: "True" -} \ No newline at end of file diff --git a/packages/google-cloud-bigquery/.kokoro/samples/python3.11/periodic-head.cfg b/packages/google-cloud-bigquery/.kokoro/samples/python3.11/periodic-head.cfg deleted file mode 100644 index 5aa01bab5bf3..000000000000 --- a/packages/google-cloud-bigquery/.kokoro/samples/python3.11/periodic-head.cfg +++ /dev/null @@ -1,11 +0,0 @@ -# Format: //devtools/kokoro/config/proto/build.proto - -env_vars: { - key: "INSTALL_LIBRARY_FROM_SOURCE" - value: "True" -} - -env_vars: { - key: "TRAMPOLINE_BUILD_FILE" - value: "github/python-bigquery/.kokoro/test-samples-against-head.sh" -} diff --git a/packages/google-cloud-bigquery/.kokoro/samples/python3.11/periodic.cfg b/packages/google-cloud-bigquery/.kokoro/samples/python3.11/periodic.cfg deleted file mode 100644 index 71cd1e597e38..000000000000 --- a/packages/google-cloud-bigquery/.kokoro/samples/python3.11/periodic.cfg +++ /dev/null @@ -1,6 +0,0 @@ -# Format: //devtools/kokoro/config/proto/build.proto - -env_vars: { - key: "INSTALL_LIBRARY_FROM_SOURCE" - value: "False" -} diff --git a/packages/google-cloud-bigquery/.kokoro/samples/python3.11/presubmit.cfg b/packages/google-cloud-bigquery/.kokoro/samples/python3.11/presubmit.cfg deleted file mode 100644 index a1c8d9759c88..000000000000 --- a/packages/google-cloud-bigquery/.kokoro/samples/python3.11/presubmit.cfg +++ /dev/null @@ -1,6 +0,0 @@ -# Format: //devtools/kokoro/config/proto/build.proto - -env_vars: { - key: "INSTALL_LIBRARY_FROM_SOURCE" - value: "True" -} \ No newline at end of file diff --git a/packages/google-cloud-bigquery/.kokoro/samples/python3.12/common.cfg b/packages/google-cloud-bigquery/.kokoro/samples/python3.12/common.cfg deleted file mode 100644 index 6eb699edd456..000000000000 --- a/packages/google-cloud-bigquery/.kokoro/samples/python3.12/common.cfg +++ /dev/null @@ -1,40 +0,0 @@ -# Format: //devtools/kokoro/config/proto/build.proto - -# Build logs will be here -action { - define_artifacts { - regex: "**/*sponge_log.xml" - } -} - -# Specify which tests to run -env_vars: { - key: "RUN_TESTS_SESSION" - value: "py-3.12" -} - -# Declare build specific Cloud project. -env_vars: { - key: "BUILD_SPECIFIC_GCLOUD_PROJECT" - value: "python-docs-samples-tests-312" -} - -env_vars: { - key: "TRAMPOLINE_BUILD_FILE" - value: "github/python-bigquery/.kokoro/test-samples.sh" -} - -# Configure the docker image for kokoro-trampoline. -env_vars: { - key: "TRAMPOLINE_IMAGE" - value: "gcr.io/cloud-devrel-kokoro-resources/python-samples-testing-docker" -} - -# Download secrets for samples -gfile_resources: "/bigstore/cloud-devrel-kokoro-resources/python-docs-samples" - -# Download trampoline resources. -gfile_resources: "/bigstore/cloud-devrel-kokoro-resources/trampoline" - -# Use the trampoline script to run in docker. -build_file: "python-bigquery/.kokoro/trampoline_v2.sh" \ No newline at end of file diff --git a/packages/google-cloud-bigquery/.kokoro/samples/python3.12/continuous.cfg b/packages/google-cloud-bigquery/.kokoro/samples/python3.12/continuous.cfg deleted file mode 100644 index a1c8d9759c88..000000000000 --- a/packages/google-cloud-bigquery/.kokoro/samples/python3.12/continuous.cfg +++ /dev/null @@ -1,6 +0,0 @@ -# Format: //devtools/kokoro/config/proto/build.proto - -env_vars: { - key: "INSTALL_LIBRARY_FROM_SOURCE" - value: "True" -} \ No newline at end of file diff --git a/packages/google-cloud-bigquery/.kokoro/samples/python3.12/periodic-head.cfg b/packages/google-cloud-bigquery/.kokoro/samples/python3.12/periodic-head.cfg deleted file mode 100644 index 5aa01bab5bf3..000000000000 --- a/packages/google-cloud-bigquery/.kokoro/samples/python3.12/periodic-head.cfg +++ /dev/null @@ -1,11 +0,0 @@ -# Format: //devtools/kokoro/config/proto/build.proto - -env_vars: { - key: "INSTALL_LIBRARY_FROM_SOURCE" - value: "True" -} - -env_vars: { - key: "TRAMPOLINE_BUILD_FILE" - value: "github/python-bigquery/.kokoro/test-samples-against-head.sh" -} diff --git a/packages/google-cloud-bigquery/.kokoro/samples/python3.12/periodic.cfg b/packages/google-cloud-bigquery/.kokoro/samples/python3.12/periodic.cfg deleted file mode 100644 index 71cd1e597e38..000000000000 --- a/packages/google-cloud-bigquery/.kokoro/samples/python3.12/periodic.cfg +++ /dev/null @@ -1,6 +0,0 @@ -# Format: //devtools/kokoro/config/proto/build.proto - -env_vars: { - key: "INSTALL_LIBRARY_FROM_SOURCE" - value: "False" -} diff --git a/packages/google-cloud-bigquery/.kokoro/samples/python3.12/presubmit.cfg b/packages/google-cloud-bigquery/.kokoro/samples/python3.12/presubmit.cfg deleted file mode 100644 index a1c8d9759c88..000000000000 --- a/packages/google-cloud-bigquery/.kokoro/samples/python3.12/presubmit.cfg +++ /dev/null @@ -1,6 +0,0 @@ -# Format: //devtools/kokoro/config/proto/build.proto - -env_vars: { - key: "INSTALL_LIBRARY_FROM_SOURCE" - value: "True" -} \ No newline at end of file diff --git a/packages/google-cloud-bigquery/.kokoro/samples/python3.13/common.cfg b/packages/google-cloud-bigquery/.kokoro/samples/python3.13/common.cfg deleted file mode 100644 index ee96889957e5..000000000000 --- a/packages/google-cloud-bigquery/.kokoro/samples/python3.13/common.cfg +++ /dev/null @@ -1,40 +0,0 @@ -# Format: //devtools/kokoro/config/proto/build.proto - -# Build logs will be here -action { - define_artifacts { - regex: "**/*sponge_log.xml" - } -} - -# Specify which tests to run -env_vars: { - key: "RUN_TESTS_SESSION" - value: "py-3.13" -} - -# Declare build specific Cloud project. -env_vars: { - key: "BUILD_SPECIFIC_GCLOUD_PROJECT" - value: "python-docs-samples-tests-313" -} - -env_vars: { - key: "TRAMPOLINE_BUILD_FILE" - value: "github/python-bigquery/.kokoro/test-samples.sh" -} - -# Configure the docker image for kokoro-trampoline. -env_vars: { - key: "TRAMPOLINE_IMAGE" - value: "gcr.io/cloud-devrel-kokoro-resources/python-samples-testing-docker" -} - -# Download secrets for samples -gfile_resources: "/bigstore/cloud-devrel-kokoro-resources/python-docs-samples" - -# Download trampoline resources. -gfile_resources: "/bigstore/cloud-devrel-kokoro-resources/trampoline" - -# Use the trampoline script to run in docker. -build_file: "python-bigquery/.kokoro/trampoline_v2.sh" diff --git a/packages/google-cloud-bigquery/.kokoro/samples/python3.13/continuous.cfg b/packages/google-cloud-bigquery/.kokoro/samples/python3.13/continuous.cfg deleted file mode 100644 index a1c8d9759c88..000000000000 --- a/packages/google-cloud-bigquery/.kokoro/samples/python3.13/continuous.cfg +++ /dev/null @@ -1,6 +0,0 @@ -# Format: //devtools/kokoro/config/proto/build.proto - -env_vars: { - key: "INSTALL_LIBRARY_FROM_SOURCE" - value: "True" -} \ No newline at end of file diff --git a/packages/google-cloud-bigquery/.kokoro/samples/python3.13/periodic-head.cfg b/packages/google-cloud-bigquery/.kokoro/samples/python3.13/periodic-head.cfg deleted file mode 100644 index 5aa01bab5bf3..000000000000 --- a/packages/google-cloud-bigquery/.kokoro/samples/python3.13/periodic-head.cfg +++ /dev/null @@ -1,11 +0,0 @@ -# Format: //devtools/kokoro/config/proto/build.proto - -env_vars: { - key: "INSTALL_LIBRARY_FROM_SOURCE" - value: "True" -} - -env_vars: { - key: "TRAMPOLINE_BUILD_FILE" - value: "github/python-bigquery/.kokoro/test-samples-against-head.sh" -} diff --git a/packages/google-cloud-bigquery/.kokoro/samples/python3.13/periodic.cfg b/packages/google-cloud-bigquery/.kokoro/samples/python3.13/periodic.cfg deleted file mode 100644 index 71cd1e597e38..000000000000 --- a/packages/google-cloud-bigquery/.kokoro/samples/python3.13/periodic.cfg +++ /dev/null @@ -1,6 +0,0 @@ -# Format: //devtools/kokoro/config/proto/build.proto - -env_vars: { - key: "INSTALL_LIBRARY_FROM_SOURCE" - value: "False" -} diff --git a/packages/google-cloud-bigquery/.kokoro/samples/python3.13/presubmit.cfg b/packages/google-cloud-bigquery/.kokoro/samples/python3.13/presubmit.cfg deleted file mode 100644 index a1c8d9759c88..000000000000 --- a/packages/google-cloud-bigquery/.kokoro/samples/python3.13/presubmit.cfg +++ /dev/null @@ -1,6 +0,0 @@ -# Format: //devtools/kokoro/config/proto/build.proto - -env_vars: { - key: "INSTALL_LIBRARY_FROM_SOURCE" - value: "True" -} \ No newline at end of file diff --git a/packages/google-cloud-bigquery/.kokoro/samples/python3.14/common.cfg b/packages/google-cloud-bigquery/.kokoro/samples/python3.14/common.cfg deleted file mode 100644 index d2fcee553b25..000000000000 --- a/packages/google-cloud-bigquery/.kokoro/samples/python3.14/common.cfg +++ /dev/null @@ -1,40 +0,0 @@ -# Format: //devtools/kokoro/config/proto/build.proto - -# Build logs will be here -action { - define_artifacts { - regex: "**/*sponge_log.xml" - } -} - -# Specify which tests to run -env_vars: { - key: "RUN_TESTS_SESSION" - value: "py-3.14" -} - -# Declare build specific Cloud project. -env_vars: { - key: "BUILD_SPECIFIC_GCLOUD_PROJECT" - value: "python-docs-samples-tests-314" -} - -env_vars: { - key: "TRAMPOLINE_BUILD_FILE" - value: "github/python-bigquery/.kokoro/test-samples.sh" -} - -# Configure the docker image for kokoro-trampoline. -env_vars: { - key: "TRAMPOLINE_IMAGE" - value: "gcr.io/cloud-devrel-kokoro-resources/python-samples-testing-docker" -} - -# Download secrets for samples -gfile_resources: "/bigstore/cloud-devrel-kokoro-resources/python-docs-samples" - -# Download trampoline resources. -gfile_resources: "/bigstore/cloud-devrel-kokoro-resources/trampoline" - -# Use the trampoline script to run in docker. -build_file: "python-bigquery/.kokoro/trampoline_v2.sh" diff --git a/packages/google-cloud-bigquery/.kokoro/samples/python3.14/continuous.cfg b/packages/google-cloud-bigquery/.kokoro/samples/python3.14/continuous.cfg deleted file mode 100644 index a1c8d9759c88..000000000000 --- a/packages/google-cloud-bigquery/.kokoro/samples/python3.14/continuous.cfg +++ /dev/null @@ -1,6 +0,0 @@ -# Format: //devtools/kokoro/config/proto/build.proto - -env_vars: { - key: "INSTALL_LIBRARY_FROM_SOURCE" - value: "True" -} \ No newline at end of file diff --git a/packages/google-cloud-bigquery/.kokoro/samples/python3.14/periodic-head.cfg b/packages/google-cloud-bigquery/.kokoro/samples/python3.14/periodic-head.cfg deleted file mode 100644 index 5aa01bab5bf3..000000000000 --- a/packages/google-cloud-bigquery/.kokoro/samples/python3.14/periodic-head.cfg +++ /dev/null @@ -1,11 +0,0 @@ -# Format: //devtools/kokoro/config/proto/build.proto - -env_vars: { - key: "INSTALL_LIBRARY_FROM_SOURCE" - value: "True" -} - -env_vars: { - key: "TRAMPOLINE_BUILD_FILE" - value: "github/python-bigquery/.kokoro/test-samples-against-head.sh" -} diff --git a/packages/google-cloud-bigquery/.kokoro/samples/python3.14/periodic.cfg b/packages/google-cloud-bigquery/.kokoro/samples/python3.14/periodic.cfg deleted file mode 100644 index 71cd1e597e38..000000000000 --- a/packages/google-cloud-bigquery/.kokoro/samples/python3.14/periodic.cfg +++ /dev/null @@ -1,6 +0,0 @@ -# Format: //devtools/kokoro/config/proto/build.proto - -env_vars: { - key: "INSTALL_LIBRARY_FROM_SOURCE" - value: "False" -} diff --git a/packages/google-cloud-bigquery/.kokoro/samples/python3.14/presubmit.cfg b/packages/google-cloud-bigquery/.kokoro/samples/python3.14/presubmit.cfg deleted file mode 100644 index a1c8d9759c88..000000000000 --- a/packages/google-cloud-bigquery/.kokoro/samples/python3.14/presubmit.cfg +++ /dev/null @@ -1,6 +0,0 @@ -# Format: //devtools/kokoro/config/proto/build.proto - -env_vars: { - key: "INSTALL_LIBRARY_FROM_SOURCE" - value: "True" -} \ No newline at end of file diff --git a/packages/google-cloud-bigquery/.kokoro/samples/python3.9/common.cfg b/packages/google-cloud-bigquery/.kokoro/samples/python3.9/common.cfg deleted file mode 100644 index 58d56ce743c9..000000000000 --- a/packages/google-cloud-bigquery/.kokoro/samples/python3.9/common.cfg +++ /dev/null @@ -1,40 +0,0 @@ -# Format: //devtools/kokoro/config/proto/build.proto - -# Build logs will be here -action { - define_artifacts { - regex: "**/*sponge_log.xml" - } -} - -# Specify which tests to run -env_vars: { - key: "RUN_TESTS_SESSION" - value: "py-3.9" -} - -# Declare build specific Cloud project. -env_vars: { - key: "BUILD_SPECIFIC_GCLOUD_PROJECT" - value: "python-docs-samples-tests-py39" -} - -env_vars: { - key: "TRAMPOLINE_BUILD_FILE" - value: "github/python-bigquery/.kokoro/test-samples.sh" -} - -# Configure the docker image for kokoro-trampoline. -env_vars: { - key: "TRAMPOLINE_IMAGE" - value: "gcr.io/cloud-devrel-kokoro-resources/python-samples-testing-docker" -} - -# Download secrets for samples -gfile_resources: "/bigstore/cloud-devrel-kokoro-resources/python-docs-samples" - -# Download trampoline resources. -gfile_resources: "/bigstore/cloud-devrel-kokoro-resources/trampoline" - -# Use the trampoline script to run in docker. -build_file: "python-bigquery/.kokoro/trampoline_v2.sh" \ No newline at end of file diff --git a/packages/google-cloud-bigquery/.kokoro/samples/python3.9/continuous.cfg b/packages/google-cloud-bigquery/.kokoro/samples/python3.9/continuous.cfg deleted file mode 100644 index a1c8d9759c88..000000000000 --- a/packages/google-cloud-bigquery/.kokoro/samples/python3.9/continuous.cfg +++ /dev/null @@ -1,6 +0,0 @@ -# Format: //devtools/kokoro/config/proto/build.proto - -env_vars: { - key: "INSTALL_LIBRARY_FROM_SOURCE" - value: "True" -} \ No newline at end of file diff --git a/packages/google-cloud-bigquery/.kokoro/samples/python3.9/periodic-head.cfg b/packages/google-cloud-bigquery/.kokoro/samples/python3.9/periodic-head.cfg deleted file mode 100644 index 5aa01bab5bf3..000000000000 --- a/packages/google-cloud-bigquery/.kokoro/samples/python3.9/periodic-head.cfg +++ /dev/null @@ -1,11 +0,0 @@ -# Format: //devtools/kokoro/config/proto/build.proto - -env_vars: { - key: "INSTALL_LIBRARY_FROM_SOURCE" - value: "True" -} - -env_vars: { - key: "TRAMPOLINE_BUILD_FILE" - value: "github/python-bigquery/.kokoro/test-samples-against-head.sh" -} diff --git a/packages/google-cloud-bigquery/.kokoro/samples/python3.9/periodic.cfg b/packages/google-cloud-bigquery/.kokoro/samples/python3.9/periodic.cfg deleted file mode 100644 index 71cd1e597e38..000000000000 --- a/packages/google-cloud-bigquery/.kokoro/samples/python3.9/periodic.cfg +++ /dev/null @@ -1,6 +0,0 @@ -# Format: //devtools/kokoro/config/proto/build.proto - -env_vars: { - key: "INSTALL_LIBRARY_FROM_SOURCE" - value: "False" -} diff --git a/packages/google-cloud-bigquery/.kokoro/samples/python3.9/presubmit.cfg b/packages/google-cloud-bigquery/.kokoro/samples/python3.9/presubmit.cfg deleted file mode 100644 index a1c8d9759c88..000000000000 --- a/packages/google-cloud-bigquery/.kokoro/samples/python3.9/presubmit.cfg +++ /dev/null @@ -1,6 +0,0 @@ -# Format: //devtools/kokoro/config/proto/build.proto - -env_vars: { - key: "INSTALL_LIBRARY_FROM_SOURCE" - value: "True" -} \ No newline at end of file diff --git a/packages/google-cloud-bigquery/.kokoro/test-samples-against-head.sh b/packages/google-cloud-bigquery/.kokoro/test-samples-against-head.sh deleted file mode 100755 index e9d8bd79a644..000000000000 --- a/packages/google-cloud-bigquery/.kokoro/test-samples-against-head.sh +++ /dev/null @@ -1,26 +0,0 @@ -#!/bin/bash -# Copyright 2024 Google LLC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -# A customized test runner for samples. -# -# For periodic builds, you can specify this file for testing against head. - -# `-e` enables the script to automatically fail when a command fails -# `-o pipefail` sets the exit code to the rightmost comment to exit with a non-zero -set -eo pipefail -# Enables `**` to include files nested inside sub-folders -shopt -s globstar - -exec .kokoro/test-samples-impl.sh diff --git a/packages/google-cloud-bigquery/.kokoro/test-samples-impl.sh b/packages/google-cloud-bigquery/.kokoro/test-samples-impl.sh deleted file mode 100755 index 40e24882277e..000000000000 --- a/packages/google-cloud-bigquery/.kokoro/test-samples-impl.sh +++ /dev/null @@ -1,102 +0,0 @@ -#!/bin/bash -# Copyright 2024 Google LLC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - - -# `-e` enables the script to automatically fail when a command fails -# `-o pipefail` sets the exit code to the rightmost comment to exit with a non-zero -set -eo pipefail -# Enables `**` to include files nested inside sub-folders -shopt -s globstar - -# Exit early if samples don't exist -if ! find samples -name 'requirements.txt' | grep -q .; then - echo "No tests run. './samples/**/requirements.txt' not found" - exit 0 -fi - -# Disable buffering, so that the logs stream through. -export PYTHONUNBUFFERED=1 - -# Debug: show build environment -env | grep KOKORO - -# Install nox -python3.9 -m pip install --upgrade --quiet nox virtualenv - -# Use secrets acessor service account to get secrets -if [[ -f "${KOKORO_GFILE_DIR}/secrets_viewer_service_account.json" ]]; then - gcloud auth activate-service-account \ - --key-file="${KOKORO_GFILE_DIR}/secrets_viewer_service_account.json" \ - --project="cloud-devrel-kokoro-resources" -fi - -# This script will create 3 files: -# - testing/test-env.sh -# - testing/service-account.json -# - testing/client-secrets.json -./scripts/decrypt-secrets.sh - -source ./testing/test-env.sh -export GOOGLE_APPLICATION_CREDENTIALS=$(pwd)/testing/service-account.json - -# For cloud-run session, we activate the service account for gcloud sdk. -gcloud auth activate-service-account \ - --key-file "${GOOGLE_APPLICATION_CREDENTIALS}" - -export GOOGLE_CLIENT_SECRETS=$(pwd)/testing/client-secrets.json - -echo -e "\n******************** TESTING PROJECTS ********************" - -# Switch to 'fail at end' to allow all tests to complete before exiting. -set +e -# Use RTN to return a non-zero value if the test fails. -RTN=0 -ROOT=$(pwd) -# Find all requirements.txt in the samples directory (may break on whitespace). -for file in samples/**/requirements.txt; do - cd "$ROOT" - # Navigate to the project folder. - file=$(dirname "$file") - cd "$file" - - echo "------------------------------------------------------------" - echo "- testing $file" - echo "------------------------------------------------------------" - - # Use nox to execute the tests for the project. - python3.9 -m nox -s "$RUN_TESTS_SESSION" - EXIT=$? - - # If this is a periodic build, send the test log to the FlakyBot. - # See https://github.com/googleapis/repo-automation-bots/tree/main/packages/flakybot. - if [[ $KOKORO_BUILD_ARTIFACTS_SUBDIR = *"periodic"* ]]; then - chmod +x $KOKORO_GFILE_DIR/linux_amd64/flakybot - $KOKORO_GFILE_DIR/linux_amd64/flakybot - fi - - if [[ $EXIT -ne 0 ]]; then - RTN=1 - echo -e "\n Testing failed: Nox returned a non-zero exit code. \n" - else - echo -e "\n Testing completed.\n" - fi - -done -cd "$ROOT" - -# Workaround for Kokoro permissions issue: delete secrets -rm testing/{test-env.sh,client-secrets.json,service-account.json} - -exit "$RTN" diff --git a/packages/google-cloud-bigquery/.kokoro/test-samples.sh b/packages/google-cloud-bigquery/.kokoro/test-samples.sh deleted file mode 100755 index 7933d820149a..000000000000 --- a/packages/google-cloud-bigquery/.kokoro/test-samples.sh +++ /dev/null @@ -1,44 +0,0 @@ -#!/bin/bash -# Copyright 2024 Google LLC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -# The default test runner for samples. -# -# For periodic builds, we rewinds the repo to the latest release, and -# run test-samples-impl.sh. - -# `-e` enables the script to automatically fail when a command fails -# `-o pipefail` sets the exit code to the rightmost comment to exit with a non-zero -set -eo pipefail -# Enables `**` to include files nested inside sub-folders -shopt -s globstar - -# Run periodic samples tests at latest release -if [[ $KOKORO_BUILD_ARTIFACTS_SUBDIR = *"periodic"* ]]; then - # preserving the test runner implementation. - cp .kokoro/test-samples-impl.sh "${TMPDIR}/test-samples-impl.sh" - echo "--- IMPORTANT IMPORTANT IMPORTANT ---" - echo "Now we rewind the repo back to the latest release..." - LATEST_RELEASE=$(git describe --abbrev=0 --tags) - git checkout $LATEST_RELEASE - echo "The current head is: " - echo $(git rev-parse --verify HEAD) - echo "--- IMPORTANT IMPORTANT IMPORTANT ---" - # move back the test runner implementation if there's no file. - if [ ! -f .kokoro/test-samples-impl.sh ]; then - cp "${TMPDIR}/test-samples-impl.sh" .kokoro/test-samples-impl.sh - fi -fi - -exec .kokoro/test-samples-impl.sh diff --git a/packages/google-cloud-bigquery/.kokoro/trampoline.sh b/packages/google-cloud-bigquery/.kokoro/trampoline.sh deleted file mode 100755 index 48f79699706e..000000000000 --- a/packages/google-cloud-bigquery/.kokoro/trampoline.sh +++ /dev/null @@ -1,28 +0,0 @@ -#!/bin/bash -# Copyright 2024 Google LLC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -set -eo pipefail - -# Always run the cleanup script, regardless of the success of bouncing into -# the container. -function cleanup() { - chmod +x ${KOKORO_GFILE_DIR}/trampoline_cleanup.sh - ${KOKORO_GFILE_DIR}/trampoline_cleanup.sh - echo "cleanup"; -} -trap cleanup EXIT - -$(dirname $0)/populate-secrets.sh # Secret Manager secrets. -python3 "${KOKORO_GFILE_DIR}/trampoline_v1.py" \ No newline at end of file diff --git a/packages/google-cloud-bigquery/.kokoro/trampoline_v2.sh b/packages/google-cloud-bigquery/.kokoro/trampoline_v2.sh deleted file mode 100755 index 35fa529231dc..000000000000 --- a/packages/google-cloud-bigquery/.kokoro/trampoline_v2.sh +++ /dev/null @@ -1,487 +0,0 @@ -#!/usr/bin/env bash -# Copyright 2024 Google LLC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -# trampoline_v2.sh -# -# This script does 3 things. -# -# 1. Prepare the Docker image for the test -# 2. Run the Docker with appropriate flags to run the test -# 3. Upload the newly built Docker image -# -# in a way that is somewhat compatible with trampoline_v1. -# -# To run this script, first download few files from gcs to /dev/shm. -# (/dev/shm is passed into the container as KOKORO_GFILE_DIR). -# -# gsutil cp gs://cloud-devrel-kokoro-resources/python-docs-samples/secrets_viewer_service_account.json /dev/shm -# gsutil cp gs://cloud-devrel-kokoro-resources/python-docs-samples/automl_secrets.txt /dev/shm -# -# Then run the script. -# .kokoro/trampoline_v2.sh -# -# These environment variables are required: -# TRAMPOLINE_IMAGE: The docker image to use. -# TRAMPOLINE_DOCKERFILE: The location of the Dockerfile. -# -# You can optionally change these environment variables: -# TRAMPOLINE_IMAGE_UPLOAD: -# (true|false): Whether to upload the Docker image after the -# successful builds. -# TRAMPOLINE_BUILD_FILE: The script to run in the docker container. -# TRAMPOLINE_WORKSPACE: The workspace path in the docker container. -# Defaults to /workspace. -# Potentially there are some repo specific envvars in .trampolinerc in -# the project root. - - -set -euo pipefail - -TRAMPOLINE_VERSION="2.0.5" - -if command -v tput >/dev/null && [[ -n "${TERM:-}" ]]; then - readonly IO_COLOR_RED="$(tput setaf 1)" - readonly IO_COLOR_GREEN="$(tput setaf 2)" - readonly IO_COLOR_YELLOW="$(tput setaf 3)" - readonly IO_COLOR_RESET="$(tput sgr0)" -else - readonly IO_COLOR_RED="" - readonly IO_COLOR_GREEN="" - readonly IO_COLOR_YELLOW="" - readonly IO_COLOR_RESET="" -fi - -function function_exists { - [ $(LC_ALL=C type -t $1)"" == "function" ] -} - -# Logs a message using the given color. The first argument must be one -# of the IO_COLOR_* variables defined above, such as -# "${IO_COLOR_YELLOW}". The remaining arguments will be logged in the -# given color. The log message will also have an RFC-3339 timestamp -# prepended (in UTC). You can disable the color output by setting -# TERM=vt100. -function log_impl() { - local color="$1" - shift - local timestamp="$(date -u "+%Y-%m-%dT%H:%M:%SZ")" - echo "================================================================" - echo "${color}${timestamp}:" "$@" "${IO_COLOR_RESET}" - echo "================================================================" -} - -# Logs the given message with normal coloring and a timestamp. -function log() { - log_impl "${IO_COLOR_RESET}" "$@" -} - -# Logs the given message in green with a timestamp. -function log_green() { - log_impl "${IO_COLOR_GREEN}" "$@" -} - -# Logs the given message in yellow with a timestamp. -function log_yellow() { - log_impl "${IO_COLOR_YELLOW}" "$@" -} - -# Logs the given message in red with a timestamp. -function log_red() { - log_impl "${IO_COLOR_RED}" "$@" -} - -readonly tmpdir=$(mktemp -d -t ci-XXXXXXXX) -readonly tmphome="${tmpdir}/h" -mkdir -p "${tmphome}" - -function cleanup() { - rm -rf "${tmpdir}" -} -trap cleanup EXIT - -RUNNING_IN_CI="${RUNNING_IN_CI:-false}" - -# The workspace in the container, defaults to /workspace. -TRAMPOLINE_WORKSPACE="${TRAMPOLINE_WORKSPACE:-/workspace}" - -pass_down_envvars=( - # TRAMPOLINE_V2 variables. - # Tells scripts whether they are running as part of CI or not. - "RUNNING_IN_CI" - # Indicates which CI system we're in. - "TRAMPOLINE_CI" - # Indicates the version of the script. - "TRAMPOLINE_VERSION" -) - -log_yellow "Building with Trampoline ${TRAMPOLINE_VERSION}" - -# Detect which CI systems we're in. If we're in any of the CI systems -# we support, `RUNNING_IN_CI` will be true and `TRAMPOLINE_CI` will be -# the name of the CI system. Both envvars will be passing down to the -# container for telling which CI system we're in. -if [[ -n "${KOKORO_BUILD_ID:-}" ]]; then - # descriptive env var for indicating it's on CI. - RUNNING_IN_CI="true" - TRAMPOLINE_CI="kokoro" - if [[ "${TRAMPOLINE_USE_LEGACY_SERVICE_ACCOUNT:-}" == "true" ]]; then - if [[ ! -f "${KOKORO_GFILE_DIR}/kokoro-trampoline.service-account.json" ]]; then - log_red "${KOKORO_GFILE_DIR}/kokoro-trampoline.service-account.json does not exist. Did you forget to mount cloud-devrel-kokoro-resources/trampoline? Aborting." - exit 1 - fi - # This service account will be activated later. - TRAMPOLINE_SERVICE_ACCOUNT="${KOKORO_GFILE_DIR}/kokoro-trampoline.service-account.json" - else - if [[ "${TRAMPOLINE_VERBOSE:-}" == "true" ]]; then - gcloud auth list - fi - log_yellow "Configuring Container Registry access" - gcloud auth configure-docker --quiet - fi - pass_down_envvars+=( - # KOKORO dynamic variables. - "KOKORO_BUILD_NUMBER" - "KOKORO_BUILD_ID" - "KOKORO_JOB_NAME" - "KOKORO_GIT_COMMIT" - "KOKORO_GITHUB_COMMIT" - "KOKORO_GITHUB_PULL_REQUEST_NUMBER" - "KOKORO_GITHUB_PULL_REQUEST_COMMIT" - # For FlakyBot - "KOKORO_GITHUB_COMMIT_URL" - "KOKORO_GITHUB_PULL_REQUEST_URL" - ) -elif [[ "${TRAVIS:-}" == "true" ]]; then - RUNNING_IN_CI="true" - TRAMPOLINE_CI="travis" - pass_down_envvars+=( - "TRAVIS_BRANCH" - "TRAVIS_BUILD_ID" - "TRAVIS_BUILD_NUMBER" - "TRAVIS_BUILD_WEB_URL" - "TRAVIS_COMMIT" - "TRAVIS_COMMIT_MESSAGE" - "TRAVIS_COMMIT_RANGE" - "TRAVIS_JOB_NAME" - "TRAVIS_JOB_NUMBER" - "TRAVIS_JOB_WEB_URL" - "TRAVIS_PULL_REQUEST" - "TRAVIS_PULL_REQUEST_BRANCH" - "TRAVIS_PULL_REQUEST_SHA" - "TRAVIS_PULL_REQUEST_SLUG" - "TRAVIS_REPO_SLUG" - "TRAVIS_SECURE_ENV_VARS" - "TRAVIS_TAG" - ) -elif [[ -n "${GITHUB_RUN_ID:-}" ]]; then - RUNNING_IN_CI="true" - TRAMPOLINE_CI="github-workflow" - pass_down_envvars+=( - "GITHUB_WORKFLOW" - "GITHUB_RUN_ID" - "GITHUB_RUN_NUMBER" - "GITHUB_ACTION" - "GITHUB_ACTIONS" - "GITHUB_ACTOR" - "GITHUB_REPOSITORY" - "GITHUB_EVENT_NAME" - "GITHUB_EVENT_PATH" - "GITHUB_SHA" - "GITHUB_REF" - "GITHUB_HEAD_REF" - "GITHUB_BASE_REF" - ) -elif [[ "${CIRCLECI:-}" == "true" ]]; then - RUNNING_IN_CI="true" - TRAMPOLINE_CI="circleci" - pass_down_envvars+=( - "CIRCLE_BRANCH" - "CIRCLE_BUILD_NUM" - "CIRCLE_BUILD_URL" - "CIRCLE_COMPARE_URL" - "CIRCLE_JOB" - "CIRCLE_NODE_INDEX" - "CIRCLE_NODE_TOTAL" - "CIRCLE_PREVIOUS_BUILD_NUM" - "CIRCLE_PROJECT_REPONAME" - "CIRCLE_PROJECT_USERNAME" - "CIRCLE_REPOSITORY_URL" - "CIRCLE_SHA1" - "CIRCLE_STAGE" - "CIRCLE_USERNAME" - "CIRCLE_WORKFLOW_ID" - "CIRCLE_WORKFLOW_JOB_ID" - "CIRCLE_WORKFLOW_UPSTREAM_JOB_IDS" - "CIRCLE_WORKFLOW_WORKSPACE_ID" - ) -fi - -# Configure the service account for pulling the docker image. -function repo_root() { - local dir="$1" - while [[ ! -d "${dir}/.git" ]]; do - dir="$(dirname "$dir")" - done - echo "${dir}" -} - -# Detect the project root. In CI builds, we assume the script is in -# the git tree and traverse from there, otherwise, traverse from `pwd` -# to find `.git` directory. -if [[ "${RUNNING_IN_CI:-}" == "true" ]]; then - PROGRAM_PATH="$(realpath "$0")" - PROGRAM_DIR="$(dirname "${PROGRAM_PATH}")" - PROJECT_ROOT="$(repo_root "${PROGRAM_DIR}")" -else - PROJECT_ROOT="$(repo_root $(pwd))" -fi - -log_yellow "Changing to the project root: ${PROJECT_ROOT}." -cd "${PROJECT_ROOT}" - -# To support relative path for `TRAMPOLINE_SERVICE_ACCOUNT`, we need -# to use this environment variable in `PROJECT_ROOT`. -if [[ -n "${TRAMPOLINE_SERVICE_ACCOUNT:-}" ]]; then - - mkdir -p "${tmpdir}/gcloud" - gcloud_config_dir="${tmpdir}/gcloud" - - log_yellow "Using isolated gcloud config: ${gcloud_config_dir}." - export CLOUDSDK_CONFIG="${gcloud_config_dir}" - - log_yellow "Using ${TRAMPOLINE_SERVICE_ACCOUNT} for authentication." - gcloud auth activate-service-account \ - --key-file "${TRAMPOLINE_SERVICE_ACCOUNT}" - log_yellow "Configuring Container Registry access" - gcloud auth configure-docker --quiet -fi - -required_envvars=( - # The basic trampoline configurations. - "TRAMPOLINE_IMAGE" - "TRAMPOLINE_BUILD_FILE" -) - -if [[ -f "${PROJECT_ROOT}/.trampolinerc" ]]; then - source "${PROJECT_ROOT}/.trampolinerc" -fi - -log_yellow "Checking environment variables." -for e in "${required_envvars[@]}" -do - if [[ -z "${!e:-}" ]]; then - log "Missing ${e} env var. Aborting." - exit 1 - fi -done - -# We want to support legacy style TRAMPOLINE_BUILD_FILE used with V1 -# script: e.g. "github/repo-name/.kokoro/run_tests.sh" -TRAMPOLINE_BUILD_FILE="${TRAMPOLINE_BUILD_FILE#github/*/}" -log_yellow "Using TRAMPOLINE_BUILD_FILE: ${TRAMPOLINE_BUILD_FILE}" - -# ignore error on docker operations and test execution -set +e - -log_yellow "Preparing Docker image." -# We only download the docker image in CI builds. -if [[ "${RUNNING_IN_CI:-}" == "true" ]]; then - # Download the docker image specified by `TRAMPOLINE_IMAGE` - - # We may want to add --max-concurrent-downloads flag. - - log_yellow "Start pulling the Docker image: ${TRAMPOLINE_IMAGE}." - if docker pull "${TRAMPOLINE_IMAGE}"; then - log_green "Finished pulling the Docker image: ${TRAMPOLINE_IMAGE}." - has_image="true" - else - log_red "Failed pulling the Docker image: ${TRAMPOLINE_IMAGE}." - has_image="false" - fi -else - # For local run, check if we have the image. - if docker images "${TRAMPOLINE_IMAGE}:latest" | grep "${TRAMPOLINE_IMAGE}"; then - has_image="true" - else - has_image="false" - fi -fi - - -# The default user for a Docker container has uid 0 (root). To avoid -# creating root-owned files in the build directory we tell docker to -# use the current user ID. -user_uid="$(id -u)" -user_gid="$(id -g)" -user_name="$(id -un)" - -# To allow docker in docker, we add the user to the docker group in -# the host os. -docker_gid=$(cut -d: -f3 < <(getent group docker)) - -update_cache="false" -if [[ "${TRAMPOLINE_DOCKERFILE:-none}" != "none" ]]; then - # Build the Docker image from the source. - context_dir=$(dirname "${TRAMPOLINE_DOCKERFILE}") - docker_build_flags=( - "-f" "${TRAMPOLINE_DOCKERFILE}" - "-t" "${TRAMPOLINE_IMAGE}" - "--build-arg" "UID=${user_uid}" - "--build-arg" "USERNAME=${user_name}" - ) - if [[ "${has_image}" == "true" ]]; then - docker_build_flags+=("--cache-from" "${TRAMPOLINE_IMAGE}") - fi - - log_yellow "Start building the docker image." - if [[ "${TRAMPOLINE_VERBOSE:-false}" == "true" ]]; then - echo "docker build" "${docker_build_flags[@]}" "${context_dir}" - fi - - # ON CI systems, we want to suppress docker build logs, only - # output the logs when it fails. - if [[ "${RUNNING_IN_CI:-}" == "true" ]]; then - if docker build "${docker_build_flags[@]}" "${context_dir}" \ - > "${tmpdir}/docker_build.log" 2>&1; then - if [[ "${TRAMPOLINE_VERBOSE:-}" == "true" ]]; then - cat "${tmpdir}/docker_build.log" - fi - - log_green "Finished building the docker image." - update_cache="true" - else - log_red "Failed to build the Docker image, aborting." - log_yellow "Dumping the build logs:" - cat "${tmpdir}/docker_build.log" - exit 1 - fi - else - if docker build "${docker_build_flags[@]}" "${context_dir}"; then - log_green "Finished building the docker image." - update_cache="true" - else - log_red "Failed to build the Docker image, aborting." - exit 1 - fi - fi -else - if [[ "${has_image}" != "true" ]]; then - log_red "We do not have ${TRAMPOLINE_IMAGE} locally, aborting." - exit 1 - fi -fi - -# We use an array for the flags so they are easier to document. -docker_flags=( - # Remove the container after it exists. - "--rm" - - # Use the host network. - "--network=host" - - # Run in priviledged mode. We are not using docker for sandboxing or - # isolation, just for packaging our dev tools. - "--privileged" - - # Run the docker script with the user id. Because the docker image gets to - # write in ${PWD} you typically want this to be your user id. - # To allow docker in docker, we need to use docker gid on the host. - "--user" "${user_uid}:${docker_gid}" - - # Pass down the USER. - "--env" "USER=${user_name}" - - # Mount the project directory inside the Docker container. - "--volume" "${PROJECT_ROOT}:${TRAMPOLINE_WORKSPACE}" - "--workdir" "${TRAMPOLINE_WORKSPACE}" - "--env" "PROJECT_ROOT=${TRAMPOLINE_WORKSPACE}" - - # Mount the temporary home directory. - "--volume" "${tmphome}:/h" - "--env" "HOME=/h" - - # Allow docker in docker. - "--volume" "/var/run/docker.sock:/var/run/docker.sock" - - # Mount the /tmp so that docker in docker can mount the files - # there correctly. - "--volume" "/tmp:/tmp" - # Pass down the KOKORO_GFILE_DIR and KOKORO_KEYSTORE_DIR - # TODO(tmatsuo): This part is not portable. - "--env" "TRAMPOLINE_SECRET_DIR=/secrets" - "--volume" "${KOKORO_GFILE_DIR:-/dev/shm}:/secrets/gfile" - "--env" "KOKORO_GFILE_DIR=/secrets/gfile" - "--volume" "${KOKORO_KEYSTORE_DIR:-/dev/shm}:/secrets/keystore" - "--env" "KOKORO_KEYSTORE_DIR=/secrets/keystore" -) - -# Add an option for nicer output if the build gets a tty. -if [[ -t 0 ]]; then - docker_flags+=("-it") -fi - -# Passing down env vars -for e in "${pass_down_envvars[@]}" -do - if [[ -n "${!e:-}" ]]; then - docker_flags+=("--env" "${e}=${!e}") - fi -done - -# If arguments are given, all arguments will become the commands run -# in the container, otherwise run TRAMPOLINE_BUILD_FILE. -if [[ $# -ge 1 ]]; then - log_yellow "Running the given commands '" "${@:1}" "' in the container." - readonly commands=("${@:1}") - if [[ "${TRAMPOLINE_VERBOSE:-}" == "true" ]]; then - echo docker run "${docker_flags[@]}" "${TRAMPOLINE_IMAGE}" "${commands[@]}" - fi - docker run "${docker_flags[@]}" "${TRAMPOLINE_IMAGE}" "${commands[@]}" -else - log_yellow "Running the tests in a Docker container." - docker_flags+=("--entrypoint=${TRAMPOLINE_BUILD_FILE}") - if [[ "${TRAMPOLINE_VERBOSE:-}" == "true" ]]; then - echo docker run "${docker_flags[@]}" "${TRAMPOLINE_IMAGE}" - fi - docker run "${docker_flags[@]}" "${TRAMPOLINE_IMAGE}" -fi - - -test_retval=$? - -if [[ ${test_retval} -eq 0 ]]; then - log_green "Build finished with ${test_retval}" -else - log_red "Build finished with ${test_retval}" -fi - -# Only upload it when the test passes. -if [[ "${update_cache}" == "true" ]] && \ - [[ $test_retval == 0 ]] && \ - [[ "${TRAMPOLINE_IMAGE_UPLOAD:-false}" == "true" ]]; then - log_yellow "Uploading the Docker image." - if docker push "${TRAMPOLINE_IMAGE}"; then - log_green "Finished uploading the Docker image." - else - log_red "Failed uploading the Docker image." - fi - # Call trampoline_after_upload_hook if it's defined. - if function_exists trampoline_after_upload_hook; then - trampoline_after_upload_hook - fi - -fi - -exit "${test_retval}" diff --git a/packages/google-cloud-bigquery/.trampolinerc b/packages/google-cloud-bigquery/.trampolinerc deleted file mode 100644 index 0080152373d5..000000000000 --- a/packages/google-cloud-bigquery/.trampolinerc +++ /dev/null @@ -1,61 +0,0 @@ -# Copyright 2024 Google LLC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -# Add required env vars here. -required_envvars+=( -) - -# Add env vars which are passed down into the container here. -pass_down_envvars+=( - "NOX_SESSION" - ############### - # Docs builds - ############### - "STAGING_BUCKET" - "V2_STAGING_BUCKET" - ################## - # Samples builds - ################## - "INSTALL_LIBRARY_FROM_SOURCE" - "RUN_TESTS_SESSION" - "BUILD_SPECIFIC_GCLOUD_PROJECT" - # Target directories. - "RUN_TESTS_DIRS" - # The nox session to run. - "RUN_TESTS_SESSION" -) - -# Prevent unintentional override on the default image. -if [[ "${TRAMPOLINE_IMAGE_UPLOAD:-false}" == "true" ]] && \ - [[ -z "${TRAMPOLINE_IMAGE:-}" ]]; then - echo "Please set TRAMPOLINE_IMAGE if you want to upload the Docker image." - exit 1 -fi - -# Define the default value if it makes sense. -if [[ -z "${TRAMPOLINE_IMAGE_UPLOAD:-}" ]]; then - TRAMPOLINE_IMAGE_UPLOAD="" -fi - -if [[ -z "${TRAMPOLINE_IMAGE:-}" ]]; then - TRAMPOLINE_IMAGE="" -fi - -if [[ -z "${TRAMPOLINE_DOCKERFILE:-}" ]]; then - TRAMPOLINE_DOCKERFILE="" -fi - -if [[ -z "${TRAMPOLINE_BUILD_FILE:-}" ]]; then - TRAMPOLINE_BUILD_FILE="" -fi diff --git a/packages/google-cloud-bigquery/docs/changelog.md b/packages/google-cloud-bigquery/docs/changelog.md deleted file mode 120000 index 04c99a55caae..000000000000 --- a/packages/google-cloud-bigquery/docs/changelog.md +++ /dev/null @@ -1 +0,0 @@ -../CHANGELOG.md \ No newline at end of file